[med-svn] r309 - in trunk/packages: . bioperl bioperl/branches bioperl/branches/upstream bioperl/branches/upstream/current bioperl/branches/upstream/current/Bio bioperl/branches/upstream/current/Bio/Align bioperl/branches/upstream/current/Bio/AlignIO bioperl/branches/upstream/current/Bio/Annotation bioperl/branches/upstream/current/Bio/Assembly bioperl/branches/upstream/current/Bio/Assembly/IO bioperl/branches/upstream/current/Bio/Biblio bioperl/branches/upstream/current/Bio/Biblio/IO bioperl/branches/upstream/current/Bio/Cluster bioperl/branches/upstream/current/Bio/ClusterIO bioperl/branches/upstream/current/Bio/CodonUsage bioperl/branches/upstream/current/Bio/Coordinate bioperl/branches/upstream/current/Bio/Coordinate/Result bioperl/branches/upstream/current/Bio/DB bioperl/branches/upstream/current/Bio/DB/Biblio bioperl/branches/upstream/current/Bio/DB/EUtilities bioperl/branches/upstream/current/Bio/DB/Expression bioperl/branches/upstream/current/Bio/DB/Flat bioperl/branches/upstream/current/Bio/DB/Flat/BDB bioperl/branches/upstream/current/Bio/DB/GFF bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator bioperl/branches/upstream/current/Bio/DB/GFF/Util bioperl/branches/upstream/current/Bio/DB/Query bioperl/branches/upstream/current/Bio/DB/SeqFeature bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI bioperl/branches/upstream/current/Bio/DB/SeqVersion bioperl/branches/upstream/current/Bio/DB/Taxonomy bioperl/branches/upstream/current/Bio/Das bioperl/branches/upstream/current/Bio/Event bioperl/branches/upstream/current/Bio/Expression bioperl/branches/upstream/current/Bio/Expression/FeatureGroup bioperl/branches/upstream/current/Bio/Expression/FeatureSet bioperl/branches/upstream/current/Bio/Factory bioperl/branches/upstream/current/Bio/FeatureIO bioperl/branches/upstream/current/Bio/Graph bioperl/branches/upstream/current/Bio/Graph/IO bioperl/branches/upstream/current/Bio/Graph/SimpleGraph bioperl/branches/upstream/current/Bio/Graphics bioperl/branches/upstream/current/Bio/Graphics/FeatureFile bioperl/branches/upstream/current/Bio/Graphics/Glyph bioperl/branches/upstream/current/Bio/Index bioperl/branches/upstream/current/Bio/LiveSeq bioperl/branches/upstream/current/Bio/LiveSeq/IO bioperl/branches/upstream/current/Bio/Location bioperl/branches/upstream/current/Bio/Map bioperl/branches/upstream/current/Bio/MapIO bioperl/branches/upstream/current/Bio/Matrix bioperl/branches/upstream/current/Bio/Matrix/IO bioperl/branches/upstream/current/Bio/Matrix/PSM bioperl/branches/upstream/current/Bio/Matrix/PSM/IO bioperl/branches/upstream/current/Bio/Ontology bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine bioperl/branches/upstream/current/Bio/OntologyIO bioperl/branches/upstream/current/Bio/OntologyIO/Handlers bioperl/branches/upstream/current/Bio/Phenotype bioperl/branches/upstream/current/Bio/Phenotype/MeSH bioperl/branches/upstream/current/Bio/Phenotype/OMIM bioperl/branches/upstream/current/Bio/PopGen bioperl/branches/upstream/current/Bio/PopGen/IO bioperl/branches/upstream/current/Bio/PopGen/Simulation bioperl/branches/upstream/current/Bio/Restriction bioperl/branches/upstream/current/Bio/Restriction/Enzyme bioperl/branches/upstream/current/Bio/Restriction/IO bioperl/branches/upstream/current/Bio/Root bioperl/branches/upstream/current/Bio/Search bioperl/branches/upstream/current/Bio/Search/HSP bioperl/branches/upstream/current/Bio/Search/Hit bioperl/branches/upstream/current/Bio/Search/Iteration bioperl/branches/upstream/current/Bio/Search/Result bioperl/branches/upstream/current/Bio/SearchIO bioperl/branches/upstream/current/Bio/SearchIO/Writer bioperl/branches/upstream/current/Bio/Seq bioperl/branches/upstream/current/Bio/Seq/Meta bioperl/branches/upstream/current/Bio/SeqFeature bioperl/branches/upstream/current/Bio/SeqFeature/Gene bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA bioperl/branches/upstream/current/Bio/SeqFeature/Tools bioperl/branches/upstream/current/Bio/SeqIO bioperl/branches/upstream/current/Bio/SeqIO/game bioperl/branches/upstream/current/Bio/SeqIO/tinyseq bioperl/branches/upstream/current/Bio/Structure bioperl/branches/upstream/current/Bio/Structure/IO bioperl/branches/upstream/current/Bio/Structure/SecStr bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE bioperl/branches/upstream/current/Bio/Symbol bioperl/branches/upstream/current/Bio/Taxonomy bioperl/branches/upstream/current/Bio/Tools bioperl/branches/upstream/current/Bio/Tools/Alignment bioperl/branches/upstream/current/Bio/Tools/Analysis bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein bioperl/branches/upstream/current/Bio/Tools/BPlite bioperl/branches/upstream/current/Bio/Tools/EMBOSS bioperl/branches/upstream/current/Bio/Tools/HMMER bioperl/branches/upstream/current/Bio/Tools/Phylo bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip bioperl/branches/upstream/current/Bio/Tools/Prediction bioperl/branches/upstream/current/Bio/Tools/Primer bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor bioperl/branches/upstream/current/Bio/Tools/Run bioperl/branches/upstream/current/Bio/Tools/SiRNA bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset bioperl/branches/upstream/current/Bio/Tools/Sim4 bioperl/branches/upstream/current/Bio/Tools/Spidey bioperl/branches/upstream/current/Bio/Tree bioperl/branches/upstream/current/Bio/Tree/Draw bioperl/branches/upstream/current/Bio/TreeIO bioperl/branches/upstream/current/Bio/Variation bioperl/branches/upstream/current/Bio/Variation/IO bioperl/branches/upstream/current/doc bioperl/branches/upstream/current/doc/Deobfuscator bioperl/branches/upstream/current/doc/Deobfuscator/bin bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin bioperl/branches/upstream/current/doc/Deobfuscator/lib bioperl/branches/upstream/current/doc/Deobfuscator/t bioperl/branches/upstream/current/examples bioperl/branches/upstream/current/examples/Bio-DB-GFF bioperl/branches/upstream/current/examples/align bioperl/branches/upstream/current/examples/biblio bioperl/branches/upstream/current/examples/biographics bioperl/branches/upstream/current/examples/cluster bioperl/branches/upstream/current/examples/contributed bioperl/branches/upstream/current/examples/db bioperl/branches/upstream/current/examples/liveseq bioperl/branches/upstream/current/examples/popgen bioperl/branches/upstream/current/examples/root bioperl/branches/upstream/current/examples/root/lib bioperl/branches/upstream/current/examples/root/lib/Bio bioperl/branches/upstream/current/examples/searchio bioperl/branches/upstream/current/examples/sirna bioperl/branches/upstream/current/examples/structure bioperl/branches/upstream/current/examples/tk bioperl/branches/upstream/current/examples/tools bioperl/branches/upstream/current/examples/tree bioperl/branches/upstream/current/maintenance bioperl/branches/upstream/current/models bioperl/branches/upstream/current/scripts bioperl/branches/upstream/current/scripts/Bio-DB-GFF bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store bioperl/branches/upstream/current/scripts/DB bioperl/branches/upstream/current/scripts/biblio bioperl/branches/upstream/current/scripts/biographics bioperl/branches/upstream/current/scripts/das bioperl/branches/upstream/current/scripts/graphics bioperl/branches/upstream/current/scripts/index bioperl/branches/upstream/current/scripts/popgen bioperl/branches/upstream/current/scripts/searchio bioperl/branches/upstream/current/scripts/seq bioperl/branches/upstream/current/scripts/seqstats bioperl/branches/upstream/current/scripts/taxa bioperl/branches/upstream/current/scripts/tree bioperl/branches/upstream/current/scripts/utilities bioperl/branches/upstream/current/t bioperl/branches/upstream/current/t/data bioperl/branches/upstream/current/t/data/biodbgff bioperl/branches/upstream/current/t/data/biographics bioperl/branches/upstream/current/t/data/biographics/t1 bioperl/branches/upstream/current/t/data/biographics/t2 bioperl/branches/upstream/current/t/data/biographics/t3 bioperl/branches/upstream/current/t/data/codeml_lysozyme bioperl/branches/upstream/current/t/data/consed_project bioperl/branches/upstream/current/t/data/consed_project/edit_dir bioperl/branches/upstream/current/t/data/consed_project/phd_dir bioperl/branches/upstream/current/t/data/dbfa bioperl/branches/upstream/current/t/data/registry bioperl/branches/upstream/current/t/data/registry/bdb bioperl/branches/upstream/current/t/data/registry/flat bioperl/branches/upstream/current/t/data/seqfeaturedb bioperl/branches/upstream/current/t/data/taxdump bioperl/branches/upstream/current/t/lib bioperl/branches/upstream/current/t/lib/Test bioperl/branches/upstream/current/t/lib/Test/Builder bioperl/branches/upstream/current/t/lib/Test/Builder/Tester

charles-guest at alioth.debian.org charles-guest at alioth.debian.org
Thu Jun 14 23:43:59 UTC 2007


Author: charles-guest
Date: 2007-06-14 23:43:54 +0000 (Thu, 14 Jun 2007)
New Revision: 309

Added:
   trunk/packages/bioperl/
   trunk/packages/bioperl/branches/
   trunk/packages/bioperl/branches/upstream/
   trunk/packages/bioperl/branches/upstream/current/
   trunk/packages/bioperl/branches/upstream/current/AUTHORS
   trunk/packages/bioperl/branches/upstream/current/BUGS
   trunk/packages/bioperl/branches/upstream/current/Bio/
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/AlignI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/DNAStatistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/PairwiseStatistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/ProteinStatistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/StatisticsI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Align/Utilities.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/bl2seq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/clustalw.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/emboss.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/largemultifasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/maf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mega.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/meme.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/metafasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/msf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/nexus.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/pfam.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/phylip.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/po.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/prodom.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/psi.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/selex.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/stockholm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisParserI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisResultI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnnotatableI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/AnnotationFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Collection.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Comment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/DBLink.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/OntologyTerm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Reference.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/SimpleValue.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/StructuredValue.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Target.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/TypeManager.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationCollectionI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Contig.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ContigAnalysis.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/ace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/phrap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Scaffold.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ScaffoldI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Singlet.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Article.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BiblioBase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Book.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BookArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medline2ref.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medlinexml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmed2ref.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmedxml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Journal.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/JournalArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBook.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBookArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournal.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournalArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Organisation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Patent.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Person.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Proceeding.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Provider.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedBookArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedJournalArticle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Ref.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Service.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/TechReport.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Thesis.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/WebResource.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/ClusterFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/FamilyI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/SequenceFamily.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGeneI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/ClusterI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/dbsnp.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/unigene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/
   trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/Table.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Chain.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Collection.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ExtrapolatingPair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/GeneMapper.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Graph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/MapperI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Pair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Gap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Match.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ResultI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Utils.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Ace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/biofetch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/eutils.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/pdf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/soap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/BiblioI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/BioFetch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/CUTG.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/DBFetch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EMBL.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/Cookie.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/ElinkData.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/efetch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/egquery.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/einfo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/elink.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/epost.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esearch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esummary.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/EntrezGene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression/geo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Failover.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/FileCache.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/embl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/genbank.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swiss.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swissprot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BinarySearch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GDB.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/ace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch_oracle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/iterator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysql.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracleace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg_fts.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/feature_serializer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/iterator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/alignment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/clone.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/coding.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/match.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/none.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/processed_transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/so_transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_acembly.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_ensgene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_genscan.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_refgene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_softberry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_twinscan.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_unigene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Featname.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Feature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Homol.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/RelSegment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Segment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Typename.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Binning.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Rearrange.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenBank.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenPept.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenericWebDBI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/InMemoryCache.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/LocationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/MeSH.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/NCBIHelper.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/GenBank.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/WebQuery.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/QueryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/RandomAccessI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/RefSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/ReferenceI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Registry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeatureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedTableFeatureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Segment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/Iterator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/mysql.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/GFF3Loader.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/bdb.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/berkeleydb.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/memory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqHound.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion/gi.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/SwissProt.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/entrez.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/flatfile.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/list.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/Universal.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/UpdateableSeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/WebDBSeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBL.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBLService.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DBLinkContainerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Das/
   trunk/packages/bioperl/branches/upstream/current/Bio/Das/FeatureTypeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Das/SegmentI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DasI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/DescribableI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Event/
   trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventGeneratorI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventHandlerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Contact.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/DataSet.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup/
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup/FeatureGroupMas50.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/FeatureSetMas50.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Platform.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/ProbeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Sample.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/AnalysisI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ApplicationFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/DriverFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/FTLocationFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/HitFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/LocationFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/MapFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectBuilderI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ResultFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceProcessorI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceStreamI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Factory/TreeFactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureHolderI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/bed.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gff.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gtf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/interpro.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/ptt.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/Edge.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/dip.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/psi_xml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/ProteinGraph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph/Traversal.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/ConfiguratorI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Feature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureBase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile/Iterator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/Factory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/alignment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/anchored_arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/box.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/broken_line.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/cds.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/christmas_arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/crossbox.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dashed_line.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/diamond.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dna.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dumbbell.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ellipse.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ex.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/extending_arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/flag.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/gene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/generic.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/graded_segments.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/group.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/heterogeneous_segments.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/image.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/lightning.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/line.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merge_parts.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merged_alignment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/minmax.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/oval.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pentagram.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pinsertion.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/primers.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/processed_transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/protein.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ragged_ends.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_box.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_segment.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/repeating_shape.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/rndrect.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ruler_arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/saw_teeth.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segmented_keyglyph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segments.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/so_transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/span.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/splice_site.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/text_in_box.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/three_letters.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/tic_tac_toe.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/toomany.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/track.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript2.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/translation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/triangle.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/two_bolts.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/wave.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/weighted_arrow.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/whiskerplot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/xyplot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Panel.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Pictogram.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/RendererI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Util.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/IdCollectionI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/IdentifiableI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Abstract.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/AbstractSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Blast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/EMBL.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fastq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/GenBank.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Hmmer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Qual.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/SwissPfam.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Index/Swissprot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/AARange.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Chain.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/ChainI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/DNA.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Exon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Gene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/BioPerl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/Loader.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/README
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Intron.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Prim_Transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Range.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Region.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Unit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/SeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Translation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LocatableSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/Atomic.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/AvWithinCoordPolicy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/CoordinatePolicyI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/Fuzzy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/FuzzyLocationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/NarrowestCoordPolicy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/Simple.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/Split.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/SplitLocationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Location/WidestCoordPolicy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/LocationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Clone.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Contig.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMarker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoPosition.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/EntityI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/FPCMarker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkageMap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkagePosition.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/MapI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Mappable.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/MappableI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Marker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/MarkerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Microsatellite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPosition.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPositionWithDistance.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Physical.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Position.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandlerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/Relative.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/RelativeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Map/SimpleMap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/MapIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/fpc.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/mapmaker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Generic.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/phylip.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/scoring.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/MatrixI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/mast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/masta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/meme.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/psiblast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/transfac.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSiteI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtMatrix.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtPsm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/Psm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeader.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeaderI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrix.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrixI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PhylipDist.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Scoring.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/DocumentRegistry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/GOterm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/InterProTerm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOEngine.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOterm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Ontology.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyEngineI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyStore.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Path.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/PathI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Relationship.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipType.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleOntologyEngine.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Term.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/BaseSAXHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterProHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/InterProParser.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/dagflat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/goflat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/obo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/simplehierarchy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/soflat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Perl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Correlate.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Term.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Twig.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Measure.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/MiniMIMentry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMparser.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Phenotype.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/PhenotypeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Genotype.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/GenotypeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/HtSNP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/csv.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/hapmap.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/phase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/prettybase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Individual.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IndividualI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Marker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/MarkerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopStats.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Population.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopulationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/Coalescent.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/GeneticDrift.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Statistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/TagHaplotype.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Utilities.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/PullParserI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Range.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/RangeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Analysis.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiCut.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiSite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeCollection.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/bairoch.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/base.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/itype2.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/withrefm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/Exception.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/HTTPget.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/Root.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/RootI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/Storable.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Root/Version.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastStatistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastUtils.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/DatabaseI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericDatabase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericStatistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/BlastHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/FastaHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/GenericHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HMMERHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HmmpfamHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PSLHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PsiBlastHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PullHSPI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/WABAHSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/BlastHit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/Fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/GenericHit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HMMERHit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HmmpfamHit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PsiBlastHit.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PullHitI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/GenericIteration.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/IterationI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Processor.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/BlastResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/GenericResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HMMERResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HmmpfamResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/PullResultI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/WABAResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/SearchUtils.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Search/StatisticsI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchDist.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/EventHandlerI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/FastHitEventBuilder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/IteratedSearchResultEventBuilder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchResultEventBuilder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchWriterI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/BSMLResultWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/GbrowseGFF.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HSPTableWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HTMLResultWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HitTableWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/ResultTableWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/TextResultWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/axt.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blasttable.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blastxml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/exonerate.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer_pull.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/megablast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/psl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/sim4.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/waba.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/wise.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/BaseSeqProcessor.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/EncodedSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeLocatableSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargePrimarySeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta/
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta/Array.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/MetaI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimaryQual.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimedSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/QualI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Quality.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqBuilder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFastaSpeedFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqWithQuality.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SequenceTrace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Seq/TraceI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqAnalysisParserI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Annotated.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/AnnotationAdaptor.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Collection.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/CollectionI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Computation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/FeaturePair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Exon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/ExonI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructure.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Intron.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/NC_Feature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Poly_A_site.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Promoter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Transcript.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/TranscriptI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/UTR.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Generic.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/PositionProxy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Primer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Oligo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Pair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Similarity.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SimilarityPair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/FeatureNamer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/IDHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/TypeMapper.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/Unflattener.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/TypedSeqFeatureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeatureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/FTHelper.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/MultiFile.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/abi.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ace.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/agave.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/alf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/asciitree.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml_sax.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chadoxml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaos.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaosxml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ctf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/embl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/entrezgene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/excel.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/exp.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fastq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/featHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameSubs.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameWriter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/seqHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/gcg.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/genbank.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/interpro.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/kegg.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/largefasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/lasergene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/locuslink.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/metafasta.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/phd.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pir.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pln.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/qual.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/raw.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/scf.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/strider.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/swiss.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tab.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/table.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigr.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigrxml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq/
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq/tinyseqHandler.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ztr.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SeqUtils.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAlign.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAnalysisI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Species.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Atom.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Chain.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Entry.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO/pdb.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Model.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Residue.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/Res.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/Res.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Structure/StructureI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Alphabet.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/AlphabetI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/DNAAlphabet.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/ProteinAlphabet.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/README.Symbol
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Symbol.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/SymbolI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/FactoryI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Node.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Taxon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Tree.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AlignFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Consed.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Trim.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA/ESEfinder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Domcut.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/ELM.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/GOR4.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/HNN.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Mitoprot.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/NetPhos.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Scansite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Sopma.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/SimpleAnalysisBase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AnalysisResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPbl2seq.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/HSP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Iteration.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Sbjct.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPpsilite.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Blat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/CodonTable.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Coil.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ECnumber.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EMBOSS/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EMBOSS/Palindrome.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EPCR.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ERPIN.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ESTScan.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Eponine.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Est2Genome.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Fgenesh.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/FootPrinter.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GFF.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Gel.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Geneid.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genemark.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genewise.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genomewise.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genscan.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Glimmer.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Grail.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GuessSeqFormat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMM.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Domain.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Results.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Set.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Hmmpfam.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/IUPAC.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Lucy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/MZEF.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/OddCodes.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy/Result.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/ModelResult.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/Result.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip/ProtDist.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Exon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Gene.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor/Base.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/AssessorI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Feature.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Pair.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer3.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prints.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Profile.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Promoterwise.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/PrositeScan.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Pseudowise.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/QRNA.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RNAMotif.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RandomDistFunctions.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RepeatMasker.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RestrictionEnzyme.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/GenericParameters.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/ParametersI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/README
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/RemoteBlast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/StandAloneBlast.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/WrapperBase.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Seg.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqPattern.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqStats.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqWords.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/saigo.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/tuschl.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sigcleave.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Signalp.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Exon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Results.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Exon.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Results.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Tmhmm.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/dpAlign.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ipcress.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/isPcr.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pICalculator.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pSW.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tools/tRNAscanSE.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/AlleleNode.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Compatible.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/DistanceFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Draw/
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Draw/Cladogram.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Node.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeNHX.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/RandomFactory.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Statistics.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Tree.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeFunctionsI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/TreeEventBuilder.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/cluster.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/lintree.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/newick.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nexus.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nhx.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/pag.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/svggraph.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/tabtree.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/UpdateableSeqI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAChange.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAReverseMutate.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/Allele.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/DNAMutation.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/flat.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/xml.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/README
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/RNAChange.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SNP.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SeqDiff.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/Variation/VariantI.pm
   trunk/packages/bioperl/branches/upstream/current/Bio/WebAgent.pm
   trunk/packages/bioperl/branches/upstream/current/Build.PL
   trunk/packages/bioperl/branches/upstream/current/Changes
   trunk/packages/bioperl/branches/upstream/current/DEPENDENCIES
   trunk/packages/bioperl/branches/upstream/current/DEPRECATED
   trunk/packages/bioperl/branches/upstream/current/INSTALL
   trunk/packages/bioperl/branches/upstream/current/INSTALL.WIN
   trunk/packages/bioperl/branches/upstream/current/LICENSE
   trunk/packages/bioperl/branches/upstream/current/MANIFEST
   trunk/packages/bioperl/branches/upstream/current/META.yml
   trunk/packages/bioperl/branches/upstream/current/Makefile.PL
   trunk/packages/bioperl/branches/upstream/current/ModuleBuildBioperl.pm
   trunk/packages/bioperl/branches/upstream/current/PLATFORMS
   trunk/packages/bioperl/branches/upstream/current/README
   trunk/packages/bioperl/branches/upstream/current/doc/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Build.PL
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Changes
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/LICENSE
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/MANIFEST
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/META.yml
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Makefile.PL
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/README
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/deob_index.pl
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_detail.cgi
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_flowchart.png
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_help.html
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_interface.cgi
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/Deobfuscator.pm
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/00.load.t
   trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/pod.t
   trunk/packages/bioperl/branches/upstream/current/doc/README
   trunk/packages/bioperl/branches/upstream/current/doc/makedoc.PL
   trunk/packages/bioperl/branches/upstream/current/examples/
   trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/
   trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/load_ucsc.pl
   trunk/packages/bioperl/branches/upstream/current/examples/align/
   trunk/packages/bioperl/branches/upstream/current/examples/align/align_on_codons.pl
   trunk/packages/bioperl/branches/upstream/current/examples/align/aligntutorial.pl
   trunk/packages/bioperl/branches/upstream/current/examples/align/clustalw.pl
   trunk/packages/bioperl/branches/upstream/current/examples/align/simplealign.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biblio/
   trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-eutils-example.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-soap-example.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio_soap.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/all_glyphs.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/dynamic_glyphs.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.gff
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.txt
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/lots_of_glyphs.pl
   trunk/packages/bioperl/branches/upstream/current/examples/biographics/render_sequence.pl
   trunk/packages/bioperl/branches/upstream/current/examples/bioperl.pl
   trunk/packages/bioperl/branches/upstream/current/examples/cluster/
   trunk/packages/bioperl/branches/upstream/current/examples/cluster/dbsnp.pl
   trunk/packages/bioperl/branches/upstream/current/examples/contributed/
   trunk/packages/bioperl/branches/upstream/current/examples/contributed/nmrpdb_parse.pl
   trunk/packages/bioperl/branches/upstream/current/examples/contributed/prosite2perl.pl
   trunk/packages/bioperl/branches/upstream/current/examples/contributed/rebase2list.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/
   trunk/packages/bioperl/branches/upstream/current/examples/db/dbfetch
   trunk/packages/bioperl/branches/upstream/current/examples/db/est_tissue_query.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/gb2features.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/getGenBank.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/get_seqs.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/rfetch.pl
   trunk/packages/bioperl/branches/upstream/current/examples/db/use_registry.pl
   trunk/packages/bioperl/branches/upstream/current/examples/generate_random_seq.pl
   trunk/packages/bioperl/branches/upstream/current/examples/liveseq/
   trunk/packages/bioperl/branches/upstream/current/examples/liveseq/change_gene.pl
   trunk/packages/bioperl/branches/upstream/current/examples/longorf.pl
   trunk/packages/bioperl/branches/upstream/current/examples/make_primers.pl
   trunk/packages/bioperl/branches/upstream/current/examples/popgen/
   trunk/packages/bioperl/branches/upstream/current/examples/popgen/parse_calc_stats.pl
   trunk/packages/bioperl/branches/upstream/current/examples/rev_and_trans.pl
   trunk/packages/bioperl/branches/upstream/current/examples/revcom_dir.pl
   trunk/packages/bioperl/branches/upstream/current/examples/root/
   trunk/packages/bioperl/branches/upstream/current/examples/root/README
   trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions1.pl
   trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions2.pl
   trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions3.pl
   trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions4.pl
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeq.pm
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeqI.pm
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/Seq.pm
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/SeqI.pm
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestInterface.pm
   trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestObject.pm
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/blast_example.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/custom_writer.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/hitwriter.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/hspwriter.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/htmlwriter.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_features.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_iterations.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/rawwriter.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/resultwriter.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff.pl
   trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff3.pl
   trunk/packages/bioperl/branches/upstream/current/examples/sirna/
   trunk/packages/bioperl/branches/upstream/current/examples/sirna/TAG
   trunk/packages/bioperl/branches/upstream/current/examples/sirna/rnai_finder.cgi
   trunk/packages/bioperl/branches/upstream/current/examples/structure/
   trunk/packages/bioperl/branches/upstream/current/examples/structure/structure-io.pl
   trunk/packages/bioperl/branches/upstream/current/examples/subsequence.cgi
   trunk/packages/bioperl/branches/upstream/current/examples/tk/
   trunk/packages/bioperl/branches/upstream/current/examples/tk/gsequence.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tk/hitdisplay.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/
   trunk/packages/bioperl/branches/upstream/current/examples/tools/extract_genes.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/gb_to_gff.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/gff2ps.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/parse_codeml.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/psw.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/run_genscan.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/run_primer3.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/seq_pattern.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tools/standaloneblast.pl
   trunk/packages/bioperl/branches/upstream/current/examples/tree/
   trunk/packages/bioperl/branches/upstream/current/examples/tree/paup2phylip.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/
   trunk/packages/bioperl/branches/upstream/current/maintenance/README
   trunk/packages/bioperl/branches/upstream/current/maintenance/authors.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/check_NAME.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/check_URLs.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/modules.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/ncbi_blast_switches.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/pod.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_script.pl
   trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_scripts.PLS
   trunk/packages/bioperl/branches/upstream/current/maintenance/version.pl
   trunk/packages/bioperl/branches/upstream/current/models/
   trunk/packages/bioperl/branches/upstream/current/models/README
   trunk/packages/bioperl/branches/upstream/current/models/biblio.dia
   trunk/packages/bioperl/branches/upstream/current/models/bio_liveseq_variation.dia
   trunk/packages/bioperl/branches/upstream/current/models/bio_map.dia
   trunk/packages/bioperl/branches/upstream/current/models/bio_restriction.dia
   trunk/packages/bioperl/branches/upstream/current/models/bioperl.dia
   trunk/packages/bioperl/branches/upstream/current/models/coordinatemapper.dia
   trunk/packages/bioperl/branches/upstream/current/models/map_proposal.txt
   trunk/packages/bioperl/branches/upstream/current/models/maps_and_markers.dia
   trunk/packages/bioperl/branches/upstream/current/models/popgen.dia
   trunk/packages/bioperl/branches/upstream/current/models/population_proposal.txt
   trunk/packages/bioperl/branches/upstream/current/scripts/
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/README
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bp_genbank2gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bulk_load_gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/fast_load_gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/genbank2gff3.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/generate_histogram.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/load_gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/meta_gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_gadfly.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_sgd.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_wormbase.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/
   trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/biofetch_genbank_proxy.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/bioflat_index.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/biogetseq.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/DB/flanks.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/README
   trunk/packages/bioperl/branches/upstream/current/scripts/biblio/
   trunk/packages/bioperl/branches/upstream/current/scripts/biblio/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/biblio/biblio.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/biographics/
   trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_embl2picture.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs1-demo.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs2-demo.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/das/
   trunk/packages/bioperl/branches/upstream/current/scripts/das/README
   trunk/packages/bioperl/branches/upstream/current/scripts/das/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/README
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/contig_draw.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/feature_draw.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/frend.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/graphics/search_overview.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/index/
   trunk/packages/bioperl/branches/upstream/current/scripts/index/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_fetch.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_index.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_seqret.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/popgen/
   trunk/packages/bioperl/branches/upstream/current/scripts/popgen/composite_LD.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/popgen/heterogeneity_test.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/README
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/fastam9_to_table.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/filter_search.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/hmmer_to_table.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/parse_hmmsearch.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/searchio/search2table.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/extract_feature_seq.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/make_mrna_protein.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/seqconvert.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/split_seq.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/translate_seq.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seq/unflatten_seq.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/aacomp.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/chaos_plot.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/gccalc.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/oligo_count.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/classify_hits_kingdom.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/local_taxonomydb_query.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/query_entrez_taxa.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxid4species.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxonomy2tree.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/tree/
   trunk/packages/bioperl/branches/upstream/current/scripts/tree/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/tree/blast2tree.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/tree/nexus2nh.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/tree/tree2pag.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/README
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/TAG
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_mrtrans.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_nrdb.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_sreformat.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/dbsplit.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mask_by_search.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mutate.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/pairwise_kaks.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/remote_blast.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2BSML.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2alnblocks.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2gff.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2tribe.PLS
   trunk/packages/bioperl/branches/upstream/current/scripts/utilities/seq_length.PLS
   trunk/packages/bioperl/branches/upstream/current/t/
   trunk/packages/bioperl/branches/upstream/current/t/AAChange.t
   trunk/packages/bioperl/branches/upstream/current/t/AAReverseMutate.t
   trunk/packages/bioperl/branches/upstream/current/t/AlignIO.t
   trunk/packages/bioperl/branches/upstream/current/t/AlignStats.t
   trunk/packages/bioperl/branches/upstream/current/t/AlignUtil.t
   trunk/packages/bioperl/branches/upstream/current/t/Allele.t
   trunk/packages/bioperl/branches/upstream/current/t/Alphabet.t
   trunk/packages/bioperl/branches/upstream/current/t/Annotation.t
   trunk/packages/bioperl/branches/upstream/current/t/AnnotationAdaptor.t
   trunk/packages/bioperl/branches/upstream/current/t/Assembly.t
   trunk/packages/bioperl/branches/upstream/current/t/Biblio.t
   trunk/packages/bioperl/branches/upstream/current/t/BiblioReferences.t
   trunk/packages/bioperl/branches/upstream/current/t/Biblio_biofetch.t
   trunk/packages/bioperl/branches/upstream/current/t/Biblio_eutils.t
   trunk/packages/bioperl/branches/upstream/current/t/BioDBGFF.t
   trunk/packages/bioperl/branches/upstream/current/t/BioDBSeqFeature.t
   trunk/packages/bioperl/branches/upstream/current/t/BioFetch_DB.t
   trunk/packages/bioperl/branches/upstream/current/t/BioGraphics.t
   trunk/packages/bioperl/branches/upstream/current/t/BlastIndex.t
   trunk/packages/bioperl/branches/upstream/current/t/Chain.t
   trunk/packages/bioperl/branches/upstream/current/t/ClusterIO.t
   trunk/packages/bioperl/branches/upstream/current/t/Coalescent.t
   trunk/packages/bioperl/branches/upstream/current/t/CodonTable.t
   trunk/packages/bioperl/branches/upstream/current/t/Compatible.t
   trunk/packages/bioperl/branches/upstream/current/t/CoordinateGraph.t
   trunk/packages/bioperl/branches/upstream/current/t/CoordinateMapper.t
   trunk/packages/bioperl/branches/upstream/current/t/Correlate.t
   trunk/packages/bioperl/branches/upstream/current/t/CytoMap.t
   trunk/packages/bioperl/branches/upstream/current/t/DB.t
   trunk/packages/bioperl/branches/upstream/current/t/DBCUTG.t
   trunk/packages/bioperl/branches/upstream/current/t/DBFasta.t
   trunk/packages/bioperl/branches/upstream/current/t/DNAMutation.t
   trunk/packages/bioperl/branches/upstream/current/t/Domcut.t
   trunk/packages/bioperl/branches/upstream/current/t/ECnumber.t
   trunk/packages/bioperl/branches/upstream/current/t/ELM.t
   trunk/packages/bioperl/branches/upstream/current/t/EMBL_DB.t
   trunk/packages/bioperl/branches/upstream/current/t/EMBOSS_Tools.t
   trunk/packages/bioperl/branches/upstream/current/t/ESEfinder.t
   trunk/packages/bioperl/branches/upstream/current/t/EUtilities.t
   trunk/packages/bioperl/branches/upstream/current/t/EncodedSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/Exception.t
   trunk/packages/bioperl/branches/upstream/current/t/Exonerate.t
   trunk/packages/bioperl/branches/upstream/current/t/FeatureHolder.x
   trunk/packages/bioperl/branches/upstream/current/t/FeatureIO.t
   trunk/packages/bioperl/branches/upstream/current/t/FootPrinter.t
   trunk/packages/bioperl/branches/upstream/current/t/GDB.t
   trunk/packages/bioperl/branches/upstream/current/t/GFF.t
   trunk/packages/bioperl/branches/upstream/current/t/GOR4.t
   trunk/packages/bioperl/branches/upstream/current/t/GOterm.t
   trunk/packages/bioperl/branches/upstream/current/t/GbrowseGFF.t
   trunk/packages/bioperl/branches/upstream/current/t/Gel.t
   trunk/packages/bioperl/branches/upstream/current/t/GeneCoordinateMapper.t
   trunk/packages/bioperl/branches/upstream/current/t/Geneid.t
   trunk/packages/bioperl/branches/upstream/current/t/Genewise.t
   trunk/packages/bioperl/branches/upstream/current/t/Genomewise.t
   trunk/packages/bioperl/branches/upstream/current/t/Genpred.t
   trunk/packages/bioperl/branches/upstream/current/t/GraphAdaptor.t
   trunk/packages/bioperl/branches/upstream/current/t/GuessSeqFormat.t
   trunk/packages/bioperl/branches/upstream/current/t/HNN.t
   trunk/packages/bioperl/branches/upstream/current/t/HtSNP.t
   trunk/packages/bioperl/branches/upstream/current/t/IUPAC.t
   trunk/packages/bioperl/branches/upstream/current/t/Index.t
   trunk/packages/bioperl/branches/upstream/current/t/InstanceSite.t
   trunk/packages/bioperl/branches/upstream/current/t/InterProParser.t
   trunk/packages/bioperl/branches/upstream/current/t/LargeLocatableSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/LinkageMap.t
   trunk/packages/bioperl/branches/upstream/current/t/LiveSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/LocatableSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/Location.t
   trunk/packages/bioperl/branches/upstream/current/t/LocationFactory.t
   trunk/packages/bioperl/branches/upstream/current/t/LocusLink.t
   trunk/packages/bioperl/branches/upstream/current/t/Map.t
   trunk/packages/bioperl/branches/upstream/current/t/MapIO.t
   trunk/packages/bioperl/branches/upstream/current/t/Matrix.t
   trunk/packages/bioperl/branches/upstream/current/t/MeSH.t
   trunk/packages/bioperl/branches/upstream/current/t/Measure.t
   trunk/packages/bioperl/branches/upstream/current/t/MetaSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/MicrosatelliteMarker.t
   trunk/packages/bioperl/branches/upstream/current/t/MiniMIMentry.t
   trunk/packages/bioperl/branches/upstream/current/t/MitoProt.t
   trunk/packages/bioperl/branches/upstream/current/t/Molphy.t
   trunk/packages/bioperl/branches/upstream/current/t/MultiFile.t
   trunk/packages/bioperl/branches/upstream/current/t/Mutation.t
   trunk/packages/bioperl/branches/upstream/current/t/Mutator.t
   trunk/packages/bioperl/branches/upstream/current/t/NetPhos.t
   trunk/packages/bioperl/branches/upstream/current/t/Node.t
   trunk/packages/bioperl/branches/upstream/current/t/OMIMentry.t
   trunk/packages/bioperl/branches/upstream/current/t/OMIMentryAllelicVariant.t
   trunk/packages/bioperl/branches/upstream/current/t/OMIMparser.t
   trunk/packages/bioperl/branches/upstream/current/t/OddCodes.t
   trunk/packages/bioperl/branches/upstream/current/t/Ontology.t
   trunk/packages/bioperl/branches/upstream/current/t/OntologyEngine.t
   trunk/packages/bioperl/branches/upstream/current/t/OntologyStore.t
   trunk/packages/bioperl/branches/upstream/current/t/PAML.t
   trunk/packages/bioperl/branches/upstream/current/t/Perl.t
   trunk/packages/bioperl/branches/upstream/current/t/Phenotype.t
   trunk/packages/bioperl/branches/upstream/current/t/PhylipDist.t
   trunk/packages/bioperl/branches/upstream/current/t/PhysicalMap.t
   trunk/packages/bioperl/branches/upstream/current/t/Pictogram.t
   trunk/packages/bioperl/branches/upstream/current/t/PopGen.t
   trunk/packages/bioperl/branches/upstream/current/t/PopGenSims.t
   trunk/packages/bioperl/branches/upstream/current/t/PrimarySeq.t
   trunk/packages/bioperl/branches/upstream/current/t/Primer.t
   trunk/packages/bioperl/branches/upstream/current/t/Promoterwise.t
   trunk/packages/bioperl/branches/upstream/current/t/ProtDist.t
   trunk/packages/bioperl/branches/upstream/current/t/ProtMatrix.t
   trunk/packages/bioperl/branches/upstream/current/t/ProtPsm.t
   trunk/packages/bioperl/branches/upstream/current/t/Pseudowise.t
   trunk/packages/bioperl/branches/upstream/current/t/QRNA.t
   trunk/packages/bioperl/branches/upstream/current/t/RNAChange.t
   trunk/packages/bioperl/branches/upstream/current/t/RandDistFunctions.t
   trunk/packages/bioperl/branches/upstream/current/t/RandomTreeFactory.t
   trunk/packages/bioperl/branches/upstream/current/t/Range.t
   trunk/packages/bioperl/branches/upstream/current/t/RangeI.t
   trunk/packages/bioperl/branches/upstream/current/t/RefSeq.t
   trunk/packages/bioperl/branches/upstream/current/t/Registry.t
   trunk/packages/bioperl/branches/upstream/current/t/Relationship.t
   trunk/packages/bioperl/branches/upstream/current/t/RelationshipType.t
   trunk/packages/bioperl/branches/upstream/current/t/RemoteBlast.t
   trunk/packages/bioperl/branches/upstream/current/t/RepeatMasker.t
   trunk/packages/bioperl/branches/upstream/current/t/RestrictionAnalysis.t
   trunk/packages/bioperl/branches/upstream/current/t/RestrictionIO.t
   trunk/packages/bioperl/branches/upstream/current/t/RootI.t
   trunk/packages/bioperl/branches/upstream/current/t/RootIO.t
   trunk/packages/bioperl/branches/upstream/current/t/RootStorable.t
   trunk/packages/bioperl/branches/upstream/current/t/SNP.t
   trunk/packages/bioperl/branches/upstream/current/t/Scansite.t
   trunk/packages/bioperl/branches/upstream/current/t/SearchDist.t
   trunk/packages/bioperl/branches/upstream/current/t/SearchIO.t
   trunk/packages/bioperl/branches/upstream/current/t/Seg.t
   trunk/packages/bioperl/branches/upstream/current/t/Seq.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqAnalysisParser.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqBuilder.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqDiff.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqFeatCollection.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqFeature.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqHound_DB.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqIO.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqPattern.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqStats.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqUtils.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqVersion.t
   trunk/packages/bioperl/branches/upstream/current/t/SeqWords.t
   trunk/packages/bioperl/branches/upstream/current/t/SequenceFamily.t
   trunk/packages/bioperl/branches/upstream/current/t/Sigcleave.t
   trunk/packages/bioperl/branches/upstream/current/t/Signalp.t
   trunk/packages/bioperl/branches/upstream/current/t/Sim4.t
   trunk/packages/bioperl/branches/upstream/current/t/SimilarityPair.t
   trunk/packages/bioperl/branches/upstream/current/t/SimpleAlign.t
   trunk/packages/bioperl/branches/upstream/current/t/SiteMatrix.t
   trunk/packages/bioperl/branches/upstream/current/t/Sopma.t
   trunk/packages/bioperl/branches/upstream/current/t/Species.t
   trunk/packages/bioperl/branches/upstream/current/t/Spidey.t
   trunk/packages/bioperl/branches/upstream/current/t/StandAloneBlast.t
   trunk/packages/bioperl/branches/upstream/current/t/StructIO.t
   trunk/packages/bioperl/branches/upstream/current/t/Structure.t
   trunk/packages/bioperl/branches/upstream/current/t/Symbol.t
   trunk/packages/bioperl/branches/upstream/current/t/TagHaplotype.t
   trunk/packages/bioperl/branches/upstream/current/t/TaxonTree.t
   trunk/packages/bioperl/branches/upstream/current/t/Taxonomy.t
   trunk/packages/bioperl/branches/upstream/current/t/Tempfile.t
   trunk/packages/bioperl/branches/upstream/current/t/Term.t
   trunk/packages/bioperl/branches/upstream/current/t/Test.pm
   trunk/packages/bioperl/branches/upstream/current/t/Tmhmm.t
   trunk/packages/bioperl/branches/upstream/current/t/Tools.t
   trunk/packages/bioperl/branches/upstream/current/t/Tree.t
   trunk/packages/bioperl/branches/upstream/current/t/TreeBuild.t
   trunk/packages/bioperl/branches/upstream/current/t/TreeIO.t
   trunk/packages/bioperl/branches/upstream/current/t/UCSCParsers.t
   trunk/packages/bioperl/branches/upstream/current/t/Unflattener.t
   trunk/packages/bioperl/branches/upstream/current/t/Unflattener2.t
   trunk/packages/bioperl/branches/upstream/current/t/UniGene.t
   trunk/packages/bioperl/branches/upstream/current/t/Variation_IO.t
   trunk/packages/bioperl/branches/upstream/current/t/WABA.t
   trunk/packages/bioperl/branches/upstream/current/t/XEMBL_DB.t
   trunk/packages/bioperl/branches/upstream/current/t/abi.t
   trunk/packages/bioperl/branches/upstream/current/t/ace.t
   trunk/packages/bioperl/branches/upstream/current/t/alignUtilities.t
   trunk/packages/bioperl/branches/upstream/current/t/asciitree.t
   trunk/packages/bioperl/branches/upstream/current/t/bsml_sax.t
   trunk/packages/bioperl/branches/upstream/current/t/chaosxml.t
   trunk/packages/bioperl/branches/upstream/current/t/cigarstring.t
   trunk/packages/bioperl/branches/upstream/current/t/consed.t
   trunk/packages/bioperl/branches/upstream/current/t/ctf.t
   trunk/packages/bioperl/branches/upstream/current/t/data/
   trunk/packages/bioperl/branches/upstream/current/t/data/1A11.pdb
   trunk/packages/bioperl/branches/upstream/current/t/data/1A3I.pdb
   trunk/packages/bioperl/branches/upstream/current/t/data/1BPT.pdb
   trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.0
   trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.2
   trunk/packages/bioperl/branches/upstream/current/t/data/5X_1895.FASTXY
   trunk/packages/bioperl/branches/upstream/current/t/data/8HVP.pdb
   trunk/packages/bioperl/branches/upstream/current/t/data/AAC12660.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/AB077698.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/AE003528_ecoli.bls
   trunk/packages/bioperl/branches/upstream/current/t/data/AE003644_Adh-genomic.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/AF032047.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/AF165282.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/AHCYL1.kegg
   trunk/packages/bioperl/branches/upstream/current/t/data/ATF14F8.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/AY095303S1.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/AY763288.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/AnnIX-v003.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/BAB68554.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/BC000007.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/BEL16-LTR_AG.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/BK000016-tpa.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/BLOSUM50
   trunk/packages/bioperl/branches/upstream/current/t/data/BN000066-tpa.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/Bird_Ovomucoids.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/D10483.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/D12555.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/DQ018368.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/ECAPAH02.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test
   trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test2
   trunk/packages/bioperl/branches/upstream/current/t/data/Genscan.FastA
   trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer2.out
   trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.detail
   trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.predict
   trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerHMM.out
   trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerM.out
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.FASTA
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.gff
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grail
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grailexp
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.mzef
   trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.tblastx
   trunk/packages/bioperl/branches/upstream/current/t/data/Kingdoms_DNA.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/L77119.hmmer
   trunk/packages/bioperl/branches/upstream/current/t/data/LL-sample.seq
   trunk/packages/bioperl/branches/upstream/current/t/data/LOAD_Ccd1.dnd
   trunk/packages/bioperl/branches/upstream/current/t/data/LittleChrY.dbsnp.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/M0.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/MSGEFTUA.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/Mcjanrna_rdbII.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/MmCT
   trunk/packages/bioperl/branches/upstream/current/t/data/NC_001284.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/NC_006346.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/NC_006511-short.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/NM_002253.tseq
   trunk/packages/bioperl/branches/upstream/current/t/data/NM_002254.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/NT_021877.gbk
   trunk/packages/bioperl/branches/upstream/current/t/data/O_sat.wgs
   trunk/packages/bioperl/branches/upstream/current/t/data/P33897
   trunk/packages/bioperl/branches/upstream/current/t/data/P35527.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/PAM250
   trunk/packages/bioperl/branches/upstream/current/t/data/Primate_mtDNA.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/Rab1.chaos-xml
   trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family4nl.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family7n.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family8a.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/SwissProt.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/T7.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/Treebase-chlamy-dna.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/U58726.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/U71225.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/U83300.bsml
   trunk/packages/bioperl/branches/upstream/current/t/data/UnaSmithHIV-both.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/X98338_Adh-mRNA.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/a_thaliana.blastn
   trunk/packages/bioperl/branches/upstream/current/t/data/aaml.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/aaml_pairwise.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/acefile.ace.1
   trunk/packages/bioperl/branches/upstream/current/t/data/acefile.singlets
   trunk/packages/bioperl/branches/upstream/current/t/data/adh.mb_tree.nexus
   trunk/packages/bioperl/branches/upstream/current/t/data/alnfile.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/amino.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/ar.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/atp1.matrix
   trunk/packages/bioperl/branches/upstream/current/t/data/ay007676.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/ay116458.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/ay149291.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/barns-combined.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/baseml.pairwise
   trunk/packages/bioperl/branches/upstream/current/t/data/baseml.usertree
   trunk/packages/bioperl/branches/upstream/current/t/data/basic-bush.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/basic-ladder.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/
   trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff
   trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.cor
   trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.fpc
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/feature_data.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version3.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version4.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version5.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version6.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version7.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version8.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version10.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version11.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version12.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version13.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version14.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version3.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version4.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version5.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version6.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version7.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version8.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version9.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.gif
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version3.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version4.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version5.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version6.png
   trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version7.png
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn.rev
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastx.out
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.bug940.out
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.out
   trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.tblastx.out
   trunk/packages/bioperl/branches/upstream/current/t/data/blast.report
   trunk/packages/bioperl/branches/upstream/current/t/data/blat.psLayout3
   trunk/packages/bioperl/branches/upstream/current/t/data/blosum62.bla
   trunk/packages/bioperl/branches/upstream/current/t/data/branchSite.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/brassica_ATH.WUBLASTN
   trunk/packages/bioperl/branches/upstream/current/t/data/bug2120.phd
   trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN
   trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN.m8
   trunk/packages/bioperl/branches/upstream/current/t/data/calm.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/catalase-webblast.BLASTP
   trunk/packages/bioperl/branches/upstream/current/t/data/cds-266.fas
   trunk/packages/bioperl/branches/upstream/current/t/data/chad100.scf
   trunk/packages/bioperl/branches/upstream/current/t/data/char-interleave.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/char-matrix-spaces.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml315.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dN
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dS
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.t
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/4fold.nuc
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lnf
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.ctl
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.trees
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst1
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rub
   trunk/packages/bioperl/branches/upstream/current/t/data/codeml_nssites.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/compLD_missingtest.prettybase
   trunk/packages/bioperl/branches/upstream/current/t/data/compLD_test.prettybase
   trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test
   trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test2
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.contigs
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.log
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.log
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.singlets
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.view
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.newtags
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.phrap.out
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.screen.out
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_projectNewChromats.fof
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project_to_alu.cross
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4922R.phd.1
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924F.phd.1
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924R.phd.1
   trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4947F.phd.1
   trunk/packages/bioperl/branches/upstream/current/t/data/crab.dat.cn
   trunk/packages/bioperl/branches/upstream/current/t/data/crab.nj
   trunk/packages/bioperl/branches/upstream/current/t/data/crab.njb
   trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-0
   trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-3
   trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-4
   trunk/packages/bioperl/branches/upstream/current/t/data/ctgdemo.fpc
   trunk/packages/bioperl/branches/upstream/current/t/data/cys1_dicdi.water
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.msf
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.needle
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.tblastn
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.water
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.FASTA
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.msf
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.hmmsearch
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.msf
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.newick
   trunk/packages/bioperl/branches/upstream/current/t/data/cysprot_vs_gadfly.FASTA
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/1.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/2.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/3.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/4.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/5.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/6.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/7.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/directives.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/dmel_2Lchunk.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/dna1.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dna2.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub-prot.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wublastx
   trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastn
   trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastx
   trunk/packages/bioperl/branches/upstream/current/t/data/dq519393.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/ecoli-trna-qrna.out
   trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rps.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rpsblast
   trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.bls
   trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.noseqs.wublastp
   trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.wublastp
   trunk/packages/bioperl/branches/upstream/current/t/data/empty.bl2seq
   trunk/packages/bioperl/branches/upstream/current/t/data/entrezgene.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/example.hap
   trunk/packages/bioperl/branches/upstream/current/t/data/example.phase
   trunk/packages/bioperl/branches/upstream/current/t/data/expected.blast.out
   trunk/packages/bioperl/branches/upstream/current/t/data/factor7.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/footprinter.out
   trunk/packages/bioperl/branches/upstream/current/t/data/frac_problems.blast
   trunk/packages/bioperl/branches/upstream/current/t/data/geneid_1.0.out
   trunk/packages/bioperl/branches/upstream/current/t/data/genemark.out
   trunk/packages/bioperl/branches/upstream/current/t/data/genewise.out
   trunk/packages/bioperl/branches/upstream/current/t/data/genewise_output.paracel_btk
   trunk/packages/bioperl/branches/upstream/current/t/data/genomewise.out
   trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.epcr
   trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.genscan
   trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.mzef
   trunk/packages/bioperl/branches/upstream/current/t/data/gf-s71.needle
   trunk/packages/bioperl/branches/upstream/current/t/data/glimmer.out
   trunk/packages/bioperl/branches/upstream/current/t/data/hemoglobinA.meg
   trunk/packages/bioperl/branches/upstream/current/t/data/hg16_chroms.gff
   trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam.out
   trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam_fake.out
   trunk/packages/bioperl/branches/upstream/current/t/data/hmmsearch.out
   trunk/packages/bioperl/branches/upstream/current/t/data/hs_est.est2genome
   trunk/packages/bioperl/branches/upstream/current/t/data/hs_fugu.newick
   trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fas
   trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/hsinsulin.blastcl3.blastn
   trunk/packages/bioperl/branches/upstream/current/t/data/humor.maf
   trunk/packages/bioperl/branches/upstream/current/t/data/humts1.pal
   trunk/packages/bioperl/branches/upstream/current/t/data/hybrid1.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/hybrid2.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/insulin.water
   trunk/packages/bioperl/branches/upstream/current/t/data/interpro_ebi.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/interpro_short.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/intrablock-comment.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/kinases.tsv
   trunk/packages/bioperl/branches/upstream/current/t/data/kinases.xls
   trunk/packages/bioperl/branches/upstream/current/t/data/knownGene.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/little.largemultifasta
   trunk/packages/bioperl/branches/upstream/current/t/data/long-names.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/lucy.info
   trunk/packages/bioperl/branches/upstream/current/t/data/lucy.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/lucy.seq
   trunk/packages/bioperl/branches/upstream/current/t/data/lucy.stderr
   trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.protml
   trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.simple.protml
   trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.out
   trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/mast.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/masta.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/megablast_output.paracel_btk
   trunk/packages/bioperl/branches/upstream/current/t/data/meme.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/mini-AE001405.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/mini-align.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/mixedmast.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/mpath.ontology.test
   trunk/packages/bioperl/branches/upstream/current/t/data/multi_1.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/multi_2.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/multi_blast.bls
   trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq
   trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/multiline-intrablock-comment.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/multiseq.bls
   trunk/packages/bioperl/branches/upstream/current/t/data/mus.bls.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/mutations.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/mutations.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/myco_sites.gff
   trunk/packages/bioperl/branches/upstream/current/t/data/nei_gojobori_test.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/neighbor.dist
   trunk/packages/bioperl/branches/upstream/current/t/data/new_blastn.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/newformat.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/no-genes.genscan
   trunk/packages/bioperl/branches/upstream/current/t/data/no_FH.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/no_cds_example.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/no_hsps.blastp
   trunk/packages/bioperl/branches/upstream/current/t/data/noninterleaved.phy
   trunk/packages/bioperl/branches/upstream/current/t/data/omim_genemap_test
   trunk/packages/bioperl/branches/upstream/current/t/data/omim_text_test
   trunk/packages/bioperl/branches/upstream/current/t/data/pep-266.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/pfam_tests.stk
   trunk/packages/bioperl/branches/upstream/current/t/data/phi.out
   trunk/packages/bioperl/branches/upstream/current/t/data/phipsi.out
   trunk/packages/bioperl/branches/upstream/current/t/data/phredfile.phd
   trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist-36.out
   trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist.out
   trunk/packages/bioperl/branches/upstream/current/t/data/pictogram.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/plague_yeast.bls.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.old.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.multidat
   trunk/packages/bioperl/branches/upstream/current/t/data/popstats.prettybase
   trunk/packages/bioperl/branches/upstream/current/t/data/pre_rel9.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/primedseq.fa
   trunk/packages/bioperl/branches/upstream/current/t/data/primer3_infile.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/primer3_outfile.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/primer3_output.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/prints.out
   trunk/packages/bioperl/branches/upstream/current/t/data/promoterwise.out
   trunk/packages/bioperl/branches/upstream/current/t/data/protpars.phy
   trunk/packages/bioperl/branches/upstream/current/t/data/pseudowise.out
   trunk/packages/bioperl/branches/upstream/current/t/data/psi_xml.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/psiblastreport.out
   trunk/packages/bioperl/branches/upstream/current/t/data/puzzle.tre
   trunk/packages/bioperl/branches/upstream/current/t/data/qrna-relloc.out
   trunk/packages/bioperl/branches/upstream/current/t/data/qualfile.qual
   trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings1.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings2.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace_02.nex
   trunk/packages/bioperl/branches/upstream/current/t/data/readtest.abi
   trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ctf
   trunk/packages/bioperl/branches/upstream/current/t/data/readtest.exp
   trunk/packages/bioperl/branches/upstream/current/t/data/readtest.pln
   trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ztr
   trunk/packages/bioperl/branches/upstream/current/t/data/rebase.itype2
   trunk/packages/bioperl/branches/upstream/current/t/data/rebase.withrefm
   trunk/packages/bioperl/branches/upstream/current/t/data/registry/
   trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/
   trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/seqdatabase.ini
   trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/
   trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/seqdatabase.ini
   trunk/packages/bioperl/branches/upstream/current/t/data/rel9.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/repeatmasker.fa.out
   trunk/packages/bioperl/branches/upstream/current/t/data/revcomp_mrna.gb
   trunk/packages/bioperl/branches/upstream/current/t/data/rfam_tests.stk
   trunk/packages/bioperl/branches/upstream/current/t/data/roa1.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/roa1.genbank
   trunk/packages/bioperl/branches/upstream/current/t/data/roa1.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/roa1_v2.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/sbay_c545-yeast.BLASTZ.PSL
   trunk/packages/bioperl/branches/upstream/current/t/data/seg.out
   trunk/packages/bioperl/branches/upstream/current/t/data/seqdatabase.ini
   trunk/packages/bioperl/branches/upstream/current/t/data/seqfeaturedb/
   trunk/packages/bioperl/branches/upstream/current/t/data/seqfeaturedb/test.gff3
   trunk/packages/bioperl/branches/upstream/current/t/data/seqfile.pir
   trunk/packages/bioperl/branches/upstream/current/t/data/seqs.fas
   trunk/packages/bioperl/branches/upstream/current/t/data/sequencefamily.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/short.blx
   trunk/packages/bioperl/branches/upstream/current/t/data/signalp.negative.out
   trunk/packages/bioperl/branches/upstream/current/t/data/signalp.positive.out
   trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.for
   trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.rev
   trunk/packages/bioperl/branches/upstream/current/t/data/sim4.rev
   trunk/packages/bioperl/branches/upstream/current/t/data/singleNSsite.mlc
   trunk/packages/bioperl/branches/upstream/current/t/data/so.obo
   trunk/packages/bioperl/branches/upstream/current/t/data/sofa.ontology
   trunk/packages/bioperl/branches/upstream/current/t/data/sparsealn.needle
   trunk/packages/bioperl/branches/upstream/current/t/data/spidey.noalignment
   trunk/packages/bioperl/branches/upstream/current/t/data/spidey.test1
   trunk/packages/bioperl/branches/upstream/current/t/data/sprintf.rnamotif
   trunk/packages/bioperl/branches/upstream/current/t/data/ssp160.embl.1
   trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_medline.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_pubmed.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/sv40_small.xml
   trunk/packages/bioperl/branches/upstream/current/t/data/swiss.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/swisspfam.data
   trunk/packages/bioperl/branches/upstream/current/t/data/tab1part.mif
   trunk/packages/bioperl/branches/upstream/current/t/data/tab2part.mif
   trunk/packages/bioperl/branches/upstream/current/t/data/tab3part.mif
   trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/
   trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/names.dmp
   trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/nodes.dmp
   trunk/packages/bioperl/branches/upstream/current/t/data/tblastn.out
   trunk/packages/bioperl/branches/upstream/current/t/data/test.ace
   trunk/packages/bioperl/branches/upstream/current/t/data/test.embl
   trunk/packages/bioperl/branches/upstream/current/t/data/test.embl2sq
   trunk/packages/bioperl/branches/upstream/current/t/data/test.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/test.game
   trunk/packages/bioperl/branches/upstream/current/t/data/test.gcg
   trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgblast
   trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgfasta
   trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank
   trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank.noseq
   trunk/packages/bioperl/branches/upstream/current/t/data/test.interpro
   trunk/packages/bioperl/branches/upstream/current/t/data/test.lasergene
   trunk/packages/bioperl/branches/upstream/current/t/data/test.mase
   trunk/packages/bioperl/branches/upstream/current/t/data/test.meme
   trunk/packages/bioperl/branches/upstream/current/t/data/test.meme2
   trunk/packages/bioperl/branches/upstream/current/t/data/test.metafasta
   trunk/packages/bioperl/branches/upstream/current/t/data/test.nh
   trunk/packages/bioperl/branches/upstream/current/t/data/test.nhx
   trunk/packages/bioperl/branches/upstream/current/t/data/test.pfam
   trunk/packages/bioperl/branches/upstream/current/t/data/test.pir
   trunk/packages/bioperl/branches/upstream/current/t/data/test.ptt
   trunk/packages/bioperl/branches/upstream/current/t/data/test.raw
   trunk/packages/bioperl/branches/upstream/current/t/data/test.swiss
   trunk/packages/bioperl/branches/upstream/current/t/data/test.tab
   trunk/packages/bioperl/branches/upstream/current/t/data/test.tigrxml
   trunk/packages/bioperl/branches/upstream/current/t/data/test.txt
   trunk/packages/bioperl/branches/upstream/current/t/data/test.waba
   trunk/packages/bioperl/branches/upstream/current/t/data/test_badlf.gcg
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.aln
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.mase
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.metafasta
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.msf
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.nexus
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.pfam
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.phylip
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.po
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.prodom
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.psi
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.selex
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln.stockholm
   trunk/packages/bioperl/branches/upstream/current/t/data/testaln2.fasta
   trunk/packages/bioperl/branches/upstream/current/t/data/testdat.exonerate
   trunk/packages/bioperl/branches/upstream/current/t/data/testdbaccnums.out
   trunk/packages/bioperl/branches/upstream/current/t/data/testfile.erpin
   trunk/packages/bioperl/branches/upstream/current/t/data/testfuzzy.genbank
   trunk/packages/bioperl/branches/upstream/current/t/data/tmhmm.out
   trunk/packages/bioperl/branches/upstream/current/t/data/transfac.dat
   trunk/packages/bioperl/branches/upstream/current/t/data/tree_nonewline.nexus
   trunk/packages/bioperl/branches/upstream/current/t/data/tricky.wublast
   trunk/packages/bioperl/branches/upstream/current/t/data/trna.strict.rnamotif
   trunk/packages/bioperl/branches/upstream/current/t/data/unigene.data
   trunk/packages/bioperl/branches/upstream/current/t/data/urease.tre.nexus
   trunk/packages/bioperl/branches/upstream/current/t/data/version2.scf
   trunk/packages/bioperl/branches/upstream/current/t/data/version3.scf
   trunk/packages/bioperl/branches/upstream/current/t/data/worm_fam_2785.cdna
   trunk/packages/bioperl/branches/upstream/current/t/data/yeast.tRNAscanSE
   trunk/packages/bioperl/branches/upstream/current/t/data/yn00.mlc
   trunk/packages/bioperl/branches/upstream/current/t/ePCR.t
   trunk/packages/bioperl/branches/upstream/current/t/embl.t
   trunk/packages/bioperl/branches/upstream/current/t/entrezgene.t
   trunk/packages/bioperl/branches/upstream/current/t/est2genome.t
   trunk/packages/bioperl/branches/upstream/current/t/exp.t
   trunk/packages/bioperl/branches/upstream/current/t/fasta.t
   trunk/packages/bioperl/branches/upstream/current/t/flat.t
   trunk/packages/bioperl/branches/upstream/current/t/game.t
   trunk/packages/bioperl/branches/upstream/current/t/gcg.t
   trunk/packages/bioperl/branches/upstream/current/t/genbank.t
   trunk/packages/bioperl/branches/upstream/current/t/hmmer.t
   trunk/packages/bioperl/branches/upstream/current/t/hmmer_pull.t
   trunk/packages/bioperl/branches/upstream/current/t/interpro.t
   trunk/packages/bioperl/branches/upstream/current/t/kegg.t
   trunk/packages/bioperl/branches/upstream/current/t/largefasta.t
   trunk/packages/bioperl/branches/upstream/current/t/largepseq.t
   trunk/packages/bioperl/branches/upstream/current/t/lasergene.t
   trunk/packages/bioperl/branches/upstream/current/t/lib/
   trunk/packages/bioperl/branches/upstream/current/t/lib/Error.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Module.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester/
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester/Color.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/More.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Simple.pm
   trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Tutorial.pod
   trunk/packages/bioperl/branches/upstream/current/t/lucy.t
   trunk/packages/bioperl/branches/upstream/current/t/masta.t
   trunk/packages/bioperl/branches/upstream/current/t/metafasta.t
   trunk/packages/bioperl/branches/upstream/current/t/multiple_fasta.t
   trunk/packages/bioperl/branches/upstream/current/t/obo_parser.t
   trunk/packages/bioperl/branches/upstream/current/t/pICalculator.t
   trunk/packages/bioperl/branches/upstream/current/t/phd.t
   trunk/packages/bioperl/branches/upstream/current/t/pir.t
   trunk/packages/bioperl/branches/upstream/current/t/pln.t
   trunk/packages/bioperl/branches/upstream/current/t/primaryqual.t
   trunk/packages/bioperl/branches/upstream/current/t/primedseq.t
   trunk/packages/bioperl/branches/upstream/current/t/primer3.t
   trunk/packages/bioperl/branches/upstream/current/t/protgraph.t
   trunk/packages/bioperl/branches/upstream/current/t/psm.t
   trunk/packages/bioperl/branches/upstream/current/t/qual.t
   trunk/packages/bioperl/branches/upstream/current/t/raw.t
   trunk/packages/bioperl/branches/upstream/current/t/rnamotif.t
   trunk/packages/bioperl/branches/upstream/current/t/scf.t
   trunk/packages/bioperl/branches/upstream/current/t/seq_quality.t
   trunk/packages/bioperl/branches/upstream/current/t/seqfeaturePrimer.t
   trunk/packages/bioperl/branches/upstream/current/t/seqread_fail.t
   trunk/packages/bioperl/branches/upstream/current/t/sequencetrace.t
   trunk/packages/bioperl/branches/upstream/current/t/seqwithquality.t
   trunk/packages/bioperl/branches/upstream/current/t/simpleGOparser.t
   trunk/packages/bioperl/branches/upstream/current/t/singlet.t
   trunk/packages/bioperl/branches/upstream/current/t/sirna.t
   trunk/packages/bioperl/branches/upstream/current/t/splicedseq.t
   trunk/packages/bioperl/branches/upstream/current/t/swiss.t
   trunk/packages/bioperl/branches/upstream/current/t/tRNAscanSE.t
   trunk/packages/bioperl/branches/upstream/current/t/tab.t
   trunk/packages/bioperl/branches/upstream/current/t/table.t
   trunk/packages/bioperl/branches/upstream/current/t/testformats.pl
   trunk/packages/bioperl/branches/upstream/current/t/tigrxml.t
   trunk/packages/bioperl/branches/upstream/current/t/tinyseq.t
   trunk/packages/bioperl/branches/upstream/current/t/trim.t
   trunk/packages/bioperl/branches/upstream/current/t/ztr.t
   trunk/packages/bioperl/tags/
Log:
[svn-inject] Installing original source of bioperl

Added: trunk/packages/bioperl/branches/upstream/current/AUTHORS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/AUTHORS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/AUTHORS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,232 @@
+=head1 PRIMARY AUTHORS AND MAJOR CONTRIBUTORS TO BIOPERL
+
+=head2 Releases co-ordinated and submitted by bioperl core devs.
+
+=over
+
+=item * Ewan Birney <birney at ebi.ac.uk>
+
+=item * Chris Dagdigian <dag at sonsorol.org>
+
+=item * Hilmar Lapp <hlapp at gmx.net>
+
+=item * Heikki Lehväslaiho <heikki at ebi.ac.uk>
+
+=item * Jason Stajich <jason at bioperl.org>
+
+=item * Lincoln Stein <lstein at cshl.org>
+
+=back
+
+=head2 Previous Bioperl Coordinators:
+
+=over
+
+=item * Steven Brenner <brenner at compbio.berkely.edu>
+
+=item * Georg Fuellen <fuellen at alum.mit.edu>
+
+=item * Steve Chervitz <sac at bioperl.org>
+
+=back
+
+=head2 Major Contributors
+
+(Feel free to add descriptions of which modules you are responsible
+for if you see fit)
+
+=over
+
+=item * Richard Adams <Richard.Adams at ed.ac.uk>
+
+=item * Shuly Avraham <avraham at cshl.org> - Bio::Graphics::Glyph
+
+=item * Sendu Bala <bix at sendu.me.uk>
+
+=item * Peter Blaiklock <pblaiklo at restrictionmapper.org>
+
+=item * Benjamin Berman <benb at fruitfly.berkeley.edu>
+
+=item * Matthew Betts <Matthew.Betts at ii.uib.no>
+
+=item * David Block <dblock at gnf.org>
+
+=item * Kris Boulez <kris.boulez at algonomics.com>
+
+=item * Tim Bunce <Tim.Bunce at pobox.com> - code optimizations
+
+=item * Scott Cain <cain at cshl.org> - Bio::Graphics::Glyph
+
+=item * Yee Man Chan <ymc at yahoo.com> - Bio::Tools::dpAlign
+
+=item * Brad Chapman <chapmanb at arches.uga.edu>
+
+=item * Michele Clamp <michele at sanger.ac.uk>
+
+=item * Tony Cox <avc at sanger.ac.uk>
+
+=item * James Cuff <james at sanger.ac.uk>
+
+=item * Andrew Dalke <dalke at acm.org>
+
+=item * Allen Day <allenday at ucla.edu>
+
+=item * Jared Fox <jaredfox at ucla.edu> - Bio::SeqIO::interpro
+
+=item * Brian O'Connor <boconnor at ucla.edu> - Bio::TreeIO::svggraph
+
+=item * James Diggans <JDiggans at genelogic.com>
+
+=item * Peter Dimitrov <dimitrov at gnf.org> - Bio::Ontology
+
+=item * Rich Dobson <r.j.dobson at qmul.ac.uk> - Bio::PopGen::IO::hapmap,phase
+
+=item * Paul Edlefsen <pedlefsen at systemsbiology.org>
+
+=item * Rob Edwards <redwards at utmem.edu> - Bio::Restriction
+
+=item * Arne Elofsson <arne at sbc.su.se>
+
+=item * David Evans <David.Evans at vir.gla.ac.uk>
+
+=item * Christopher Fields <cjfields at uiuc.edu>
+
+=item * Mark Fiers <M.W.E.J.Fiers at plant.wag-ur.nl>
+
+=item * The Fugu Team <fuguteam at fugu-sg.org>
+
+=item * Luc Gauthier <lgauthie at hotmail.com>
+
+=item * James Gilbert <jgrg at sanger.ac.uk>
+
+=item * Nat Goodman
+
+=item * Ed Green <ed at compbio.berkeley.edu>
+
+=item * Matthew Hahn <matthew.hahn at duke.edu>
+
+=item * Roger Hall <roger at iosea.com>
+
+=item * Todd Harris <harris at cshl.org> - SVG support in Bio::Graphics
+
+=item * Mauricio Herrera Cuadra <arareko at campus.iztacala.unam.mx>
+
+=item * Ian Holmes <ihn at fruitfly.org>
+
+=item * Shawn Hoon <shawnh at fugu-sg.org>
+
+=item * Robert Hubley <rhubley at systemsbiology.org>
+
+=item * Joseph Insana <insana at ebi.ac.uk> - Bio::LiveSeq
+
+=item * Donald Jackson <donald.jackson at bms.com> - SiRNA
+
+=item * Keith James <kdj at sanger.ac.uk> - Bio::Tools::Geneid
+
+=item * Nicolas Joly <njoly at pasteur.fr> 
+
+=item * Ian Korf <ikorf at sapiens.wustl.edu>
+
+=item * Dan Kortschak <kortschak at rsbs.anu.edu.au>
+
+=item * Arek Kasprzyk <arek at ebi.ac.uk>
+
+=item * Andreas Kähäri <andreas.kahari at ebi.ac.uk>
+
+=item * Charles C. Kim <cckim at stanford.edu>
+
+=item * Stefan Kirov <skirov at utk.edu> - Bio::Matrix::PSM
+
+=item * Balamurugan Kumarasamy <savikalpa at fugu-sg.org>
+
+=item * Josh Lauricha <laurichj at cs.ucr.edu> - Bio::SeqIO::tigr
+
+=item * Eckhard Lehmann <ecky at e-lehmann.de>
+
+=item * Catherine Letondal <letondal at pasteur.fr>
+
+=item * Philip Lijnzaad <p.lijnzaad at med.uu.nl>
+
+=item * Aaron Mackey <amackey at pcbi.upenn.edu>
+
+=item * Brad Marshall <bradmars at yahoo.com>
+
+=item * Chad Matsalla <chad at dieselwurks.com>
+
+=item * Andrew Macgregor <andrew at anatomy.otago.ac.nz>
+
+=item * Sheldon McKay <mckays at cshl.edu>
+
+=item * Juha Muilu <muilu at ebi.ac.uk>
+
+=item * Chris Mungall <cjm at fruitfly.org>
+
+=item * Giri Narasimhan <giri at cs.fiu.edu>
+
+=item * Brian Osborne <bosborne at alum.mit.edu>
+
+=item * Xiaokang Pan <pan at cshl.org> - Bio::Graphics::Glyph
+
+=item * Jong Park
+
+=item * Matthew Pocock <matthew_pocock at yahoo.co.uk>
+
+=item * Lorenz Pollack <lorenz at ist.org> -- BPlite porting
+
+=item * Richard Resnick -- original Bio::Seq
+
+=item * Todd Richmond <todd at andrew2.stanford.edu>
+
+=item * Peter Schattner <schattner at alum.mit.edu>
+
+=item * Torsten Seemann <torsten.seemann at infotech.monash.edu.au> -- Bio::Tools::Run::StandaloneBlast
+
+=item * Martin Senger <senger at ebi.ac.uk> -- Bio::Biblio
+
+=item * Nigam Shah <nigam at psu.edu>
+
+=item * Shengqiang Shu <sshu at bdgp.lbl.gov> - Bio::Graphics::Glyph
+
+=item * Allen Smith <allens at cpan.org> -- Bio::Matrix and Bio::SimpleAlign fixes
+
+=item * Marc Sohrmann <ms2 at sanger.ac.uk>
+
+=item * Robson Francisco de Souza <rfsouza at citri.iq.usp.br> - Bio::Assembly
+
+=item * Mark Southern <mark_southern at merck.com>
+
+=item * Will Spooner <whs at sanger.ac.uk>
+
+=item * Arne Stabenau <stabenau at ebi.ac.uk>
+
+=item * Elia Stupka <elia at fugu-sg.org>
+
+=item * Gert Thijs <gert.thijs at esat.kuleuven.ac.be> 
+
+=item * James Thompson <tex at biosysadmin.com> - Bio::Matrix::PSM protein-related modules.
+
+=item * Charles Tilford <tilfordc at bms.com>
+
+=item * Anthony Underwood <aunderwood at phls.org.uk>
+
+=item * Paul-Christophe Varoutas 
+
+=item * Andrew G. Walsh <paeruginosa at hotmail.com>
+
+=item * Kai Wang <tumorimmunology at yahoo.com>
+
+=item * Gary Williams <G.Williams at hgmp.mrc.ac.uk>
+
+=item * Mark Wilkinson <mwilkinson at gene.pbi.nrc.ca>
+
+=item * Helge Weissig <helgew at sdsc.edu>
+
+=item * Juguang Xiao <juguang at tll.org.sg>
+
+=item * Alex Zelensky <alex_zelensky at mac.com> - Bioperl-DB 
+
+=item * Peili Zhang <peili at morgan.harvard.edu>
+
+=item * Christian M. Zmasek <czmasek at gnf.org> - Bio::Phenotype & Bio::Ontology
+
+=back

Added: trunk/packages/bioperl/branches/upstream/current/BUGS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/BUGS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/BUGS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,95 @@
+# $Id: BUGS,v 1.6.6.1 2006/11/17 09:32:43 sendu Exp $
+
+Known Bugs
+
+Bugs are tracked at this URL:
+http://bugzilla.bioperl.org/
+
+
+Bioperl 1.5.2
+=============
+
+There are no known installation bugs in 1.5.2 per se, but issues with
+external programs may cause problems. See the following URL for details:
+http://www.bioperl.org/wiki/Release_1.5.2#Notes
+
+
+Bioperl 1.2
+===========
+
+ * The StandAloneBlast.t test is failing on cygwin installations (and
+   nowhere else). We suspect something to do with temporary file
+   opening. Fixed in 1.4 (set TMPDIR).
+
+
+Bioperl 0.9.0 
+=============
+
+ * Bio::Tools::Blast continues to cause problems for some people.  As
+   it is not actively maintained there are a slew of reported bugs for 
+   it that have not been fixed.  
+
+ * Bio::Tools::Run::Alignment::TCoffee - t_coffee binary does not get 
+   all parameters it needs when aligning (two) two DNA sequences
+   (jitterbug #966).
+
+ * Bio::Tools::Run::ClustalW and t/ClustalW will report errors for
+   clustalw versions 1.8x due to a bug in clustalw.
+
+ * Bio::DB::GenBank continues to have intermittent errors.  Bio::DB::GDB 
+   is also unreliable at times and one can safely ignore errors from
+   these during a make test.  
+   Bio::DB::GenBank is unable to download whole contig files as well
+   as NCBI ref seqs like NT_* numbers unless the -format flag is
+   passed in and specified as 'fasta' in the constructor.
+   get_Stream_by_batch() also has intermittent errors which are being
+   tracked down.
+
+
+Bioperl 0.7.2
+=============
+
+ * NCBI has changed some of the cgi scripts for retrieving sequences
+   online which as resulted in some of the DB methods from not working
+   consistently.  We are addressing these in the 0.9.x and 1.0 series
+   of releases.  We recommend using the Bio::DB::EMBL object that is
+   part of the later releases. 
+ 
+   Additionally RefSeq Contigs are not properly downloaded, please see
+   the bioperl list archives for information about potential
+   workarounds and ongoing development effort to address these.
+
+
+Bioperl 0.7.1
+=============
+
+ * Bio::Tools::BPlite does not parse and set frame properly for
+   tblastx reports (Jitterbug bug # 978).
+
+ * Bio::Tools::BPlite interface needs to be updated to fix parsing
+   more than bl2seq report report (Jitterbug bug #940), this has been
+   fixed on the main code trunk and will be part of the next major
+   bioperl release.
+ 
+ * If File::Temp is not installed, tempdirs are not cleaned up
+   properly.  This is fixed on main code trunk with the introduction
+   of rmtree method in Bio::Root::IO, however, it is best to install
+   File::Temp when running 0.7 branch code.
+ 
+ * Bio::Tools::Blast does not allow users to run blast, instead use
+   Bio::Tools::Run::StandAloneBlast to run local blasts.  To submit
+   jobs to a remote blast server like NCBI a module
+   Bio::Tools::Run::RemoteBlast has been written but is part of the
+   main trunk code and must be obtained through CVS until the next
+   major bioperl release.
+
+
+Bioperl 0.7
+===========
+
+ * Bio::Tools::BPlite doc error lists
+   code synopsis code as 
+     my $parser = new BPlite(\*FH);  
+   should be 
+     my $parser = new Bio::Tools::BPlite(\*FH);
+  

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/AlignI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/AlignI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/AlignI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,821 @@
+# $Id: AlignI.pm,v 1.16.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::AlignI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::AlignI - An interface for describing sequence alignments.
+
+=head1 SYNOPSIS
+
+  # get a Bio::Align::AlignI somehow - typically using Bio::AlignIO system
+  # some descriptors
+  print $aln->length, "\n";
+  print $aln->no_residues, "\n";
+  print $aln->is_flush, "\n";
+  print $aln->no_sequences, "\n";
+  print $aln->percentage_identity, "\n";
+  print $aln->consensus_string(50), "\n";
+
+  # find the position in the alignment for a sequence location
+  $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
+
+  # extract sequences and check values for the alignment column $pos
+  foreach $seq ($aln->each_seq) {
+      $res = $seq->subseq($pos, $pos);
+      $count{$res}++;
+  }
+  foreach $res (keys %count) {
+      printf "Res: %s  Count: %2d\n", $res, $count{$res};
+  }
+
+=head1 DESCRIPTION
+
+This interface describes the basis for alignment objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Ewan Birney, birney at ebi.ac.uk
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::AlignI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 Modifier methods
+
+These methods modify the MSE by adding, removing or shuffling complete
+sequences.
+
+=head2 add_seq
+
+ Title     : add_seq
+ Usage     : $myalign->add_seq($newseq);
+ Function  : Adds another sequence to the alignment. *Does not* align
+             it - just adds it to the hashes.
+ Returns   : nothing
+ Argument  : a Bio::LocatableSeq object
+             order (optional)
+
+See L<Bio::LocatableSeq> for more information.
+
+=cut
+
+sub add_seq {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 remove_seq
+
+ Title     : remove_seq
+ Usage     : $aln->remove_seq($seq);
+ Function  : Removes a single sequence from an alignment
+ Returns   :
+ Argument  : a Bio::LocatableSeq object
+
+=cut
+
+sub remove_seq {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 purge
+
+ Title   : purge
+ Usage   : $aln->purge(0.7);
+ Function:
+
+           Removes sequences above whatever %id.
+
+           This function will grind on large alignments. Beware!
+           (perhaps not ideally implemented)
+
+ Example :
+ Returns : An array of the removed sequences
+ Argument:
+
+
+=cut
+
+sub purge {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 sort_alphabetically
+
+ Title     : sort_alphabetically
+ Usage     : $ali->sort_alphabetically
+ Function  : 
+
+             Changes the order of the alignment to alphabetical on name 
+             followed by numerical by number.
+
+ Returns   : 
+ Argument  : 
+
+=cut
+
+sub sort_alphabetically {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Sequence selection methods
+
+Methods returning one or more sequences objects.
+
+=head2 each_seq
+
+ Title     : each_seq
+ Usage     : foreach $seq ( $align->each_seq() ) 
+ Function  : Gets an array of Seq objects from the alignment
+ Returns   : an array
+ Argument  : 
+
+=cut
+
+sub each_seq {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 each_alphabetically
+
+ Title     : each_alphabetically
+ Usage     : foreach $seq ( $ali->each_alphabetically() )
+ Function  :
+
+             Returns an array of sequence object sorted alphabetically 
+             by name and then by start point.
+             Does not change the order of the alignment
+
+ Returns   : 
+ Argument  : 
+
+=cut
+
+sub each_alphabetically {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 each_seq_with_id
+
+ Title     : each_seq_with_id
+ Usage     : foreach $seq ( $align->each_seq_with_id() ) 
+ Function  : 
+
+             Gets an array of Seq objects from the
+             alignment, the contents being those sequences
+             with the given name (there may be more than one)
+
+ Returns   : an array
+ Argument  : a seq name
+
+=cut
+
+sub each_seq_with_id {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 get_seq_by_pos
+
+ Title     : get_seq_by_pos
+ Usage     : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
+ Function  : 
+
+             Gets a sequence based on its position in the alignment.
+             Numbering starts from 1.  Sequence positions larger than
+             no_sequences() will throw an error.
+
+ Returns   : a Bio::LocatableSeq object
+ Argument  : positive integer for the sequence position
+
+=cut
+
+sub get_seq_by_pos {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Create new alignments
+
+The result of these methods are horizontal or vertical subsets of the
+current MSE.
+
+=head2 select
+
+ Title     : select
+ Usage     : $aln2 = $aln->select(1, 3) # three first sequences
+ Function  : 
+
+             Creates a new alignment from a continuous subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will throw an error.
+
+ Returns   : a Bio::SimpleAlign object
+ Argument  : positive integer for the first sequence
+             positive integer for the last sequence to include (optional)
+
+=cut
+
+sub select {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 select_noncont
+
+ Title     : select_noncont
+ Usage     : $aln2 = $aln->select_noncont(1, 3) # first and 3rd sequences
+ Function  : 
+
+             Creates a new alignment from a subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will throw an error.
+
+ Returns   : a Bio::SimpleAlign object
+ Args      : array of integers for the sequences
+
+=cut
+
+sub select_noncont {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 slice
+
+ Title     : slice
+ Usage     : $aln2 = $aln->slice(20, 30)
+ Function  : 
+
+             Creates a slice from the alignment inclusive of start and
+             end columns.  Sequences with no residues in the slice are
+             excluded from the new alignment and a warning is printed.
+             Slice beyond the length of the sequence does not do
+             padding.
+
+ Returns   : a Bio::SimpleAlign object
+ Argument  : positive integer for start column 
+             positive integer for end column 
+
+=cut
+
+sub slice {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Change sequences within the MSE
+
+These methods affect characters in all sequences without changing the
+alignment.
+
+
+=head2 map_chars
+
+ Title     : map_chars
+ Usage     : $ali->map_chars('\.','-')
+ Function  : 
+
+             Does a s/$arg1/$arg2/ on the sequences. Useful for gap
+             characters
+
+             Notice that the from (arg1) is interpreted as a regex,
+             so be careful about quoting meta characters (eg
+             $ali->map_chars('.','-') wont do what you want)
+
+ Returns   : 
+ Argument  : 'from' rexexp
+             'to' string
+
+=cut
+
+sub map_chars {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 uppercase
+
+ Title     : uppercase()
+ Usage     : $ali->uppercase()
+ Function  : Sets all the sequences to uppercase
+ Returns   : 
+ Argument  : 
+
+=cut
+
+sub uppercase {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match_line
+
+ Title    : match_line()
+ Usage    : $align->match_line()
+ Function : Generates a match line - much like consensus string
+            except that a line indicating the '*' for a match.
+ Argument : (optional) Match line characters ('*' by default)
+            (optional) Strong match char (':' by default)
+            (optional) Weak match char ('.' by default)
+
+=cut
+
+sub match_line {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match
+
+ Title     : match()
+ Usage     : $ali->match()
+ Function  : 
+
+             Goes through all columns and changes residues that are
+             identical to residue in first sequence to match '.'
+             character. Sets match_char.
+
+             USE WITH CARE: Most MSE formats do not support match
+             characters in sequences, so this is mostly for output
+             only. NEXUS format (Bio::AlignIO::nexus) can handle
+             it.
+
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+=cut
+
+sub match {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 unmatch
+
+ Title     : unmatch()
+ Usage     : $ali->unmatch()
+ Function  : 
+
+             Undoes the effect of method match. Unsets match_char.
+
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+=cut
+
+sub unmatch {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head1 MSE attibutes
+
+Methods for setting and reading the MSE attributes. 
+
+Note that the methods defining character semantics depend on the user
+to set them sensibly.  They are needed only by certain input/output
+methods. Unset them by setting to an empty string ('').
+
+=head2 id
+
+ Title     : id
+ Usage     : $myalign->id("Ig")
+ Function  : Gets/sets the id field of the alignment
+ Returns   : An id string
+ Argument  : An id string (optional)
+
+=cut
+
+sub id {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 missing_char
+
+ Title     : missing_char
+ Usage     : $myalign->missing_char("?")
+ Function  : Gets/sets the missing_char attribute of the alignment
+             It is generally recommended to set it to 'n' or 'N' 
+             for nucleotides and to 'X' for protein. 
+ Returns   : An missing_char string,
+ Argument  : An missing_char string (optional)
+
+=cut
+
+sub missing_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match_char
+
+ Title     : match_char
+ Usage     : $myalign->match_char('.')
+ Function  : Gets/sets the match_char attribute of the alignment
+ Returns   : An match_char string,
+ Argument  : An match_char string (optional)
+
+=cut
+
+sub match_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 gap_char
+
+ Title     : gap_char
+ Usage     : $myalign->gap_char('-')
+ Function  : Gets/sets the gap_char attribute of the alignment
+ Returns   : An gap_char string, defaults to '-'
+ Argument  : An gap_char string (optional)
+
+=cut
+
+sub gap_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 symbol_chars
+
+ Title   : symbol_chars
+ Usage   : my @symbolchars = $aln->symbol_chars;
+ Function: Returns all the seen symbols (other than gaps)
+ Returns : array of characters that are the seen symbols
+ Argument: boolean to include the gap/missing/match characters
+
+=cut
+
+sub symbol_chars{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Alignment descriptors
+
+These read only methods describe the MSE in various ways. 
+
+
+=head2 consensus_string
+
+ Title     : consensus_string
+ Usage     : $str = $ali->consensus_string($threshold_percent)
+ Function  : Makes a strict consensus 
+ Returns   : 
+ Argument  : Optional threshold ranging from 0 to 100.
+             The consensus residue has to appear at least threshold %
+             of the sequences at a given location, otherwise a '?'
+             character will be placed at that location.
+             (Default value = 0%)
+
+=cut
+
+sub consensus_string {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 consensus_iupac
+
+ Title     : consensus_iupac
+ Usage     : $str = $ali->consensus_iupac()
+ Function  : 
+
+             Makes a consensus using IUPAC ambiguity codes from DNA
+             and RNA. The output is in upper case except when gaps in
+             a column force output to be in lower case.
+
+             Note that if your alignment sequences contain a lot of
+             IUPAC ambiquity codes you often have to manually set
+             alphabet.  Bio::PrimarySeq::_guess_type thinks they
+             indicate a protein sequence.
+
+ Returns   : consensus string
+ Argument  : none
+ Throws    : on protein sequences
+
+
+=cut
+
+sub consensus_iupac {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 is_flush
+
+ Title     : is_flush
+ Usage     : if( $ali->is_flush() )
+           : 
+           :
+ Function  : Tells you whether the alignment 
+           : is flush, ie all of the same length
+           : 
+           :
+ Returns   : 1 or 0
+ Argument  : 
+
+=cut
+
+sub is_flush {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 length
+
+ Title     : length()
+ Usage     : $len = $ali->length() 
+ Function  : Returns the maximum length of the alignment.
+             To be sure the alignment is a block, use is_flush
+ Returns   : 
+ Argument  : 
+
+=cut
+
+sub length {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 maxdisplayname_length
+
+ Title     : maxdisplayname_length
+ Usage     : $ali->maxdisplayname_length()
+ Function  : 
+
+             Gets the maximum length of the displayname in the
+             alignment. Used in writing out various MSE formats.
+
+ Returns   : integer
+ Argument  : 
+
+=cut
+
+sub maxname_length {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 no_residues
+
+ Title     : no_residues
+ Usage     : $no = $ali->no_residues
+ Function  : number of residues in total in the alignment
+ Returns   : integer
+ Argument  : 
+
+=cut
+
+sub no_residues {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 no_sequences
+
+ Title     : no_sequences
+ Usage     : $depth = $ali->no_sequences
+ Function  : number of sequence in the sequence alignment
+ Returns   : integer
+ Argument  : None
+
+=cut
+
+sub no_sequences {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 percentage_identity
+
+ Title   : percentage_identity
+ Usage   : $id = $align->percentage_identity
+ Function: The function calculates the percentage identity of the alignment
+ Returns : The percentage identity of the alignment (as defined by the 
+						     implementation)
+ Argument: None
+
+=cut
+
+sub percentage_identity{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 overall_percentage_identity
+
+ Title   : overall_percentage_identity
+ Usage   : $id = $align->overall_percentage_identity
+ Function: The function calculates the percentage identity of 
+           the conserved columns
+ Returns : The percentage identity of the conserved columns
+ Args    : None
+
+=cut
+
+sub overall_percentage_identity{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 average_percentage_identity
+
+ Title   : average_percentage_identity
+ Usage   : $id = $align->average_percentage_identity
+ Function: The function uses a fast method to calculate the average 
+           percentage identity of the alignment
+ Returns : The average percentage identity of the alignment
+ Args    : None
+
+=cut
+
+sub average_percentage_identity{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Alignment positions
+
+Methods to map a sequence position into an alignment column and back.
+column_from_residue_number() does the former. The latter is really a
+property of the sequence object and can done using
+L<Bio::LocatableSeq::location_from_column>:
+
+    # select somehow a sequence from the alignment, e.g.
+    my $seq = $aln->get_seq_by_pos(1);
+    #$loc is undef or Bio::LocationI object
+    my $loc = $seq->location_from_column(5);
+
+
+=head2 column_from_residue_number
+
+ Title   : column_from_residue_number
+ Usage   : $col = $ali->column_from_residue_number( $seqname, $resnumber)
+ Function:
+
+           This function gives the position in the alignment
+           (i.e. column number) of the given residue number in the
+           sequence with the given name. For example, for the
+           alignment
+
+  	     Seq1/91-97 AC..DEF.GH
+  	     Seq2/24-30 ACGG.RTY..
+  	     Seq3/43-51 AC.DDEFGHI
+
+           column_from_residue_number( "Seq1", 94 ) returns 5.
+           column_from_residue_number( "Seq2", 25 ) returns 2.
+           column_from_residue_number( "Seq3", 50 ) returns 9.
+
+           An exception is thrown if the residue number would lie
+           outside the length of the alignment
+           (e.g. column_from_residue_number( "Seq2", 22 )
+
+	  Note: If the parent sequence is represented by more than one
+	  alignment sequence and the residue number is present in
+	  them, this method finds only the first one.
+
+ Returns : A column number for the position in the alignment of the
+           given residue in the given sequence (1 = first column)
+ Args    : A sequence id/name (not a name/start-end)
+           A residue number in the whole sequence (not just that
+           segment of it in the alignment)
+
+=cut
+
+sub column_from_residue_number {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Sequence names
+
+Methods to manipulate the display name. The default name based on the
+sequence id and subsequence positions can be overridden in various
+ways.
+
+=head2 displayname
+
+ Title     : displayname
+ Usage     : $myalign->displayname("Ig", "IgA")
+ Function  : Gets/sets the display name of a sequence in the alignment
+           :
+ Returns   : A display name string
+ Argument  : name of the sequence
+             displayname of the sequence (optional)
+
+=cut
+
+sub displayname {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 set_displayname_count
+
+ Title     : set_displayname_count
+ Usage     : $ali->set_displayname_count
+ Function  : 
+
+             Sets the names to be name_# where # is the number of
+             times this name has been used.
+
+ Returns   : None 
+ Argument  : None
+
+=cut
+
+sub set_displayname_count {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 set_displayname_flat
+
+ Title     : set_displayname_flat
+ Usage     : $ali->set_displayname_flat()
+ Function  : Makes all the sequences be displayed as just their name,
+             not name/start-end
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub set_displayname_flat {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 set_displayname_normal
+
+ Title     : set_displayname_normal
+ Usage     : $ali->set_displayname_normal() 
+ Function  : Makes all the sequences be displayed as name/start-end
+ Returns   : None
+ Argument  : None
+
+=cut
+
+sub set_displayname_normal {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/DNAStatistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/DNAStatistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/DNAStatistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1723 @@
+# $Id: DNAStatistics.pm,v 1.32.4.2 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::DNAStatistics
+#
+# Cared for by Jason Stajich <jason-AT-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::DNAStatistics - Calculate some statistics for a DNA alignment
+
+=head1 SYNOPSIS
+
+  use Bio::AlignIO;
+  use Bio::Align::DNAStatistics;
+
+  my $stats = new Bio::Align::DNAStatistics;
+  my $alignin = new Bio::AlignIO(-format => 'emboss',
+                                 -file   => 't/data/insulin.water');
+  my $aln = $alignin->next_aln;
+  my $jcmatrix = $stats->distance(-align => $aln, 
+                                  -method => 'Jukes-Cantor');
+
+  print $jcmatrix->print_matrix;
+  ## and for measurements of synonymous /nonsynonymous substitutions ##
+
+  my $in = new Bio::AlignIO(-format => 'fasta',
+                            -file   => 't/data/nei_gojobori_test.aln');
+  my $alnobj = $in->next_aln;
+  my ($seq1id,$seq2id) = map { $_->display_id } $alnobj->each_seq;
+  my $results = $stats->calc_KaKs_pair($alnobj, $seq1id, $seq2id);
+  print "comparing ".$results->[0]{'Seq1'}." and ".$results->[0]{'Seq2'}."\n";
+  for (sort keys %{$results->[0]} ){
+      next if /Seq/;
+      printf("%-9s %.4f \n",$_ , $results->[0]{$_});
+  }
+
+  my $results2 = $stats->calc_all_KaKs_pairs($alnobj);
+  for my $an (@$results2){
+      print "comparing ". $an->{'Seq1'}." and ". $an->{'Seq2'}. " \n";
+      for (sort keys %$an ){
+	  next if /Seq/;
+	  printf("%-9s %.4f \n",$_ , $an->{$_});
+      }
+      print "\n\n";
+  }
+
+  my $result3 = $stats->calc_average_KaKs($alnobj, 1000);
+  for (sort keys %$result3 ){
+      next if /Seq/;
+      printf("%-9s %.4f \n",$_ , $result3->{$_});
+  }
+
+=head1 DESCRIPTION
+
+This object contains routines for calculating various statistics and
+distances for DNA alignments.  The routines are not well tested and do
+contain errors at this point.  Work is underway to correct them, but
+do not expect this code to give you the right answer currently!  Use
+dnadist/distmat in the PHLYIP or EMBOSS packages to calculate the
+distances.
+
+
+Several different distance method calculations are supported.  Listed
+in brackets are the pattern which will match
+
+=over 3
+
+=item JukesCantor [jc|jukes|jukescantor|jukes-cantor]
+
+=item Uncorrected [jcuncor|uncorrected]
+
+=item F81 [f81|felsenstein]
+
+=item Kimura [k2|k2p|k80|kimura]
+
+=item Tamura [t92|tamura|tamura92]
+
+=item F84 [f84|felsenstein84]
+
+=item TajimaNei [tajimanei|tajima\-nei]
+
+=item JinNei [jinnei|jin\-nei] (not implemented)
+
+=back
+
+There are also three methods to calculate the ratio of synonymous to
+non-synonymous mutations.  All are implementations of the Nei-Gojobori
+evolutionary pathway method and use the Jukes-Cantor method of
+nucleotide substitution. This method works well so long as the
+nucleotide frequencies are roughly equal and there is no significant
+transition/transversion bias.  In order to use these methods there are
+several pre-requisites for the alignment.
+
+=over 3
+
+=item 1
+
+DNA alignment must be based on protein alignment. Use the subroutine
+L<aa_to_dna_aln> in Bio::Align::Utilities to achieve this.
+
+=item 2
+
+Therefore alignment gaps must be in multiples of 3 (representing an aa
+deletion/insertion) and at present must be indicated by a '-' symbol.
+
+=item 3
+
+Alignment must be solely of coding region and be in reading frame 0 to
+achieve meaningful results
+
+=item 4
+
+Alignment must therefore be a multiple of 3 nucleotides long.
+
+=item 5
+
+All sequences must be the same length (including gaps). This should be
+the case anyway if the sequences have been automatically aligned using
+a program like Clustal.
+
+=item 6
+
+Only the standard codon alphabet is supported at present.
+
+=back
+
+calc_KaKs_pair() calculates a number of statistics for a named pair of
+sequences in the alignment.
+
+calc_all_KaKs_pairs() calculates these statistics for all pairwise
+comparisons in an MSA.  The statistics returned are:
+
+=over 3
+
+=item S_d
+
+Number of synonymous mutations between the 2 sequences.
+
+=item N_d
+
+Number of non-synonymous mutations between the 2 sequences.
+
+=item S
+
+Mean number of  synonymous sites in both sequences.
+
+=item N
+
+mean number of  synonymous sites in both sequences.
+
+=item P_s
+
+proportion of synonymous differences in both sequences given by P_s = S_d/S.
+
+=item P_n
+
+proportion of non-synonymous differences in both sequences given by P_n = S_n/S.
+
+=item D_s
+
+estimation of synonymous mutations per synonymous site (by Jukes-Cantor).
+
+=item D_n
+
+estimation of non-synonymous mutations per non-synonymous site (by Jukes-Cantor).
+
+=item D_n_var
+
+estimation of variance of D_n .
+
+=item D_s_var
+
+estimation of variance of S_n.
+
+=item z_value
+
+calculation of z value.Positive value indicates D_n E<gt> D_s,
+negative value indicates D_s E<gt> D_n.
+
+=back
+
+The statistics returned by calc_average_KaKs are:
+
+=over 3
+
+=item D_s
+
+Average number of synonymous mutations/synonymous site.
+
+=item D_n
+
+Average number of non-synonymous mutations/non-synonymous site.
+
+=item D_s_var
+
+Estimated variance of Ds from bootstrapped alignments.
+
+=item D_n_var
+
+Estimated variance of Dn from bootstrapped alignments.
+
+=item z_score
+
+calculation of z value. Positive value indicates D_n E<gt>D_s,
+negative values vice versa.
+
+=back
+
+The design of the code is based around the explanation of the
+Nei-Gojobori algorithm in the excellent book "Molecular Evolution and
+Phylogenetics" by Nei and Kumar, published by Oxford University
+Press. The methods have been tested using the worked example 4.1 in
+the book, and reproduce those results. If people like having this sort
+of analysis in BioPerl other methods for estimating Ds and Dn can be
+provided later.
+
+
+Much of the DNA distance code is based on implementations in EMBOSS
+(Rice et al, www.emboss.org) [distmat.c] and PHYLIP (J. Felsenstein et
+al) [dnadist.c].  Insight also gained from Eddy, Durbin, Krogh, &
+Mitchison.
+
+=head1 REFERENCES
+
+=over 3
+
+=item D_JukesCantor 
+
+"Phylogenetic Inference", Swoffrod, Olsen, Waddell and Hillis, in
+Mol. Systematics, 2nd ed, 1996, Ch 11.  Derived from "Evolution of
+Protein Molecules", Jukes & Cantor, in Mammalian Prot. Metab., III,
+1969, pp. 21-132.
+
+=item D_Tamura
+
+K Tamura, Mol. Biol. Evol. 1992, 9, 678.
+
+=item D_Kimura 
+
+M Kimura, J. Mol. Evol., 1980, 16, 111.
+
+=item JinNei 
+
+Jin and Nei, Mol. Biol. Evol. 82, 7, 1990.
+
+=item D_TajimaNei
+
+Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-AT-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Richard Adams, richard.adams at ed.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::DNAStatistics;
+use vars qw(%DNAChanges @Nucleotides %NucleotideIndexes
+	    $GapChars $SeqCount $DefaultGapPenalty %DistanceMethods
+            $CODONS %synchanges $synsites $Precision $GCChhars);
+use strict;
+use Bio::Align::PairwiseStatistics;
+use Bio::Matrix::PhylipDist;
+use Bio::Tools::IUPAC;
+
+BEGIN {
+    $GapChars = '[\.\-]';
+    $GCChhars = '[GCS]';
+    @Nucleotides = qw(A G T C);
+    $SeqCount = 2;
+    $Precision = 5;
+    
+    # these values come from EMBOSS distmat implementation
+    %NucleotideIndexes = ( 'A' => 0,
+			   'T' => 1,
+			   'C' => 2,
+			   'G' => 3,
+
+			   'AT' => 0,
+			   'AC' => 1,
+			   'AG' => 2,
+			   'CT' => 3,
+			   'GT' => 4,
+			   'CG' => 5,
+
+# these are wrong now
+#			   'S' => [ 1, 3],
+#			   'W' => [ 0, 4],
+#			   'Y' => [ 2, 3],
+#			   'R' => [ 0, 1],
+#			   'M' => [ 0, 3],
+#			   'K' => [ 1, 2],
+#			   'B' => [ 1, 2, 3],
+#			   'H' => [ 0, 2, 3],
+#			   'V' => [ 0, 1, 3],
+#			   'D' => [ 0, 1, 2],
+			   );
+
+    $DefaultGapPenalty = 0;
+    # could put ambiguities here?
+    %DNAChanges = ( 'Transversions' => { 'A' => [ 'T', 'C'],
+					 'T' => [ 'A', 'G'],
+					 'C' => [ 'A', 'G'],
+					 'G' => [ 'C', 'T'],
+				     },
+		    'Transitions'   => { 'A' => [ 'G' ],
+					 'G' => [ 'A' ],
+					 'C' => [ 'T' ],
+					 'T' => [ 'C' ],
+				     },
+		    );
+    %DistanceMethods = ( 'jc|jukes|jukescantor|jukes\-cantor' => 'JukesCantor',
+			 'jcuncor|uncorrected'   => 'Uncorrected',
+			 'f81|felsenstein81'     => 'F81',
+			 'k2|k2p|k80|kimura'     => 'Kimura',
+			 't92|tamura|tamura92'   => 'Tamura',
+			 'f84|felsenstein84'     => 'F84',
+			 'tajimanei|tajima\-nei' => 'TajimaNei',
+			 'jinnei|jin\-nei'       => 'JinNei');
+
+}
+use base qw(Bio::Root::Root Bio::Align::StatisticsI);
+
+## generate look up hashes for Nei_Gojobori methods##
+$CODONS = get_codons();
+my @t = split '', "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
+#create look up hash of number of possible synonymous mutations per codon
+$synsites = get_syn_sites();
+#create reference look up hash of single basechanges in codons
+%synchanges = get_syn_changes();
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Align::DNAStatistics();
+ Function: Builds a new Bio::Align::DNAStatistics object 
+ Returns : Bio::Align::DNAStatistics
+ Args    : none
+
+
+=cut
+
+sub new { 
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    $self->pairwise_stats( new Bio::Align::PairwiseStatistics());
+
+    return $self;
+}
+
+
+=head2 distance
+
+ Title   : distance
+ Usage   : my $distance_mat = $stats->distance(-align  => $aln, 
+		 			       -method => $method);
+ Function: Calculates a distance matrix for all pairwise distances of
+           sequences in an alignment.
+ Returns : L<Bio::Matrix::PhylipDist> object
+ Args    : -align  => Bio::Align::AlignI object
+           -method => String specifying specific distance method 
+                      (implementing class may assume a default)
+See also: L<Bio::Matrix::PhylipDist>
+
+=cut
+
+sub distance{
+   my ($self, at args) = @_;
+   my ($aln,$method) = $self->_rearrange([qw(ALIGN METHOD)], at args);
+   if( ! defined $aln || ! ref ($aln) || ! $aln->isa('Bio::Align::AlignI') ) { 
+       $self->throw("Must supply a valid Bio::Align::AlignI for the -align parameter in distance");
+   }
+   $method ||= 'JukesCantor';
+   foreach my $m ( keys %DistanceMethods ) {
+       if(defined $m &&  $method =~ /$m/i ) {
+	   my $mtd = "D_$DistanceMethods{$m}";
+	   return $self->$mtd($aln);
+       }
+   }
+   $self->warn("Unrecognized distance method $method must be one of [".
+	       join(',',$self->available_distance_methods())."]");
+   return;
+}
+
+=head2 available_distance_methods
+
+ Title   : available_distance_methods
+ Usage   : my @methods = $stats->available_distance_methods();
+ Function: Enumerates the possible distance methods
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub available_distance_methods{
+   my ($self, at args) = @_;
+   return values %DistanceMethods;
+}
+
+=head2 D - distance methods
+
+
+=cut
+
+
+=head2 D_JukesCantor
+
+ Title   : D_JukesCantor
+ Usage   : my $d = $stat->D_JukesCantor($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Jukes-Cantor 1 parameter model. 
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+           double - gap penalty
+
+
+=cut
+
+sub D_JukesCantor{
+   my ($self,$aln,$gappenalty) = @_;
+   return 0 unless $self->_check_arg($aln);
+   $gappenalty = $DefaultGapPenalty unless defined $gappenalty;
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+   my $precisionstr = "%.$Precision"."f";
+   for(my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);        
+
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],
+							       $seqs[$j]);
+	   # just want diagonals
+	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] + 
+		     $matrix->[2]->[2] + $matrix->[3]->[3] );
+	   my $D = 1 - ( $m / ($aln->length - $gaps + ( $gaps * $gappenalty)));
+	   my $d = (- 3 / 4) * log ( 1 - (4 * $D/ 3));
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];   
+	   $values[$j][$j] = sprintf($precisionstr,0);
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values);   
+}
+
+=head2 D_F81
+
+ Title   : D_F81
+ Usage   : my $d = $stat->D_F81($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Felsenstein 1981 distance model. 
+           Relaxes the assumption of equal base frequencies that is
+           in JC.
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+
+
+=cut
+
+sub D_F81{
+   my ($self,$aln,$gappenalty) = @_;
+   return 0 unless $self->_check_arg($aln);
+   $gappenalty = $DefaultGapPenalty unless defined $gappenalty;
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+   my $precisionstr = "%.$Precision"."f";
+   for(my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);        
+
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],
+							       $seqs[$j]);
+	   # just want diagonals
+	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] + 
+		     $matrix->[2]->[2] + $matrix->[3]->[3] );
+	   my $D = 1 - ( $m / ($aln->length - $gaps + ( $gaps * $gappenalty)));
+	   my $d = (- 3 / 4) * log ( 1 - (4 * $D/ 3));
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	   
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values);   
+}
+
+=head2 D_Uncorrected
+
+ Title   : D_Uncorrected
+ Usage   : my $d = $stats->D_Uncorrected($aln)
+ Function: Calculate a distance D, no correction for multiple substitutions 
+           is used.
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> (DNA Alignment)
+           [optional] gap penalty
+
+=cut
+
+sub D_Uncorrected {
+   my ($self,$aln,$gappenalty) = @_;
+   $gappenalty = $DefaultGapPenalty unless defined $gappenalty;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+
+   my $precisionstr = "%.$Precision"."f";
+   my $len = $aln->length;
+   for( my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+       
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],
+							       $seqs[$j]);
+	   my $m = ( $matrix->[0]->[0] + 
+		     $matrix->[1]->[1] +
+		     $matrix->[2]->[2] +
+		     $matrix->[3]->[3] ); 
+	   my $D = 1 - ( $m / ( $len - $gaps + ( $gaps * $gappenalty)));
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$D);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	   
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values); 
+}
+
+
+# M Kimura, J. Mol. Evol., 1980, 16, 111.
+
+=head2 D_Kimura
+
+ Title   : D_Kimura
+ Usage   : my $d = $stat->D_Kimura($aln)
+ Function: Calculates D (pairwise distance) between all pairs of sequences 
+           in an alignment using the Kimura 2 parameter model.
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+
+
+=cut
+
+sub D_Kimura {
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;
+       $seqct++;
+   }
+
+   my $precisionstr = "%.$Precision"."f";
+
+   for( my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   my $pairwise = $aln->select_noncont($i+1,$j+1);
+	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+	   unless( $L ) { 
+	       $L = 1;
+	   }
+	   my $P = $self->transitions($pairwise) / $L;
+	   my $Q = $self->transversions($pairwise) / $L;
+	   my $K = 0;
+	   my $a = 1 / ( 1 - (2 * $P) - $Q);
+	   my $b = 1 / ( 1 - 2 * $Q );
+	   if( $a < 0 || $b < 0 ) { 
+	       $K = -1;
+	   } else{ 
+	       $K = (1/2) * log ( $a ) + (1/4) * log($b);
+	   }
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$K);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	   
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values); 
+}
+
+
+=head2 D_Kimura_variance
+
+ Title   : D_Kimura
+ Usage   : my $d = $stat->D_Kimura_variance($aln)
+ Function: Calculates D (pairwise distance) between all pairs of sequences 
+           in an alignment using the Kimura 2 parameter model.
+ Returns : array of 2 L<Bio::Matrix::PhylipDist>,
+           the first is the Kimura distance and the second is
+           a matrix of variance V(K)
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+
+
+=cut
+
+sub D_Kimura_variance {
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@names, at values,%dist, at var);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;
+       $seqct++;
+   }
+
+   my $precisionstr = "%.$Precision"."f";
+
+   for( my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   my $pairwise = $aln->select_noncont($i+1,$j+1);
+	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+	   unless( $L ) { 
+	       $L = 1;
+	   }
+	   my $P = $self->transitions($pairwise) / $L;
+	   my $Q = $self->transversions($pairwise) / $L;
+	   my ($a,$b,$K,$var_k);
+	   my $a_denom = ( 1 - (2 * $P) - $Q);
+	   my $b_denom = 1 - 2 * $Q;
+	   unless( $a_denom > 0 && $b_denom > 0 ) {
+	       $a = 1;
+	       $b = 1;
+	       $K = -1;
+	       $var_k = -1;
+	   } else { 
+	       $a = 1 / $a_denom;
+	       $b = 1 / $b_denom;
+	       $K = (1/2) * log ( $a ) + (1/4) * log($b);
+	       # from Wu and Li 1985 which in turn is from Kimura 1980
+	       my $c = ( $a - $b ) / 2;
+	       my $d = ( $a + $b ) / 2;
+	       $var_k = ( $a**2 * $P + $d**2 * $Q - ( $a * $P + $d * $Q)**2 ) / $L;
+	   }
+
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$K);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];   
+	   $values[$j]->[$j] = sprintf($precisionstr,0); 
+	   
+	   $var[$j]->[$i] = $var[$i]->[$j] = sprintf($precisionstr,$var_k);
+	   $var[$j]->[$j] = $values[$j]->[$j];
+       }
+   }
+   return ( Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+					 -matrix  => \%dist,
+					 -names   => \@names,
+					 -values  => \@values),
+	    Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+					 -matrix  => \%dist,
+					 -names   => \@names,
+					 -values  => \@var)
+	    );
+}
+
+
+#  K Tamura, Mol. Biol. Evol. 1992, 9, 678.
+
+=head2 D_Tamura
+
+ Title   : D_Tamura
+ Usage   : Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using Tamura 1992 distance model. 
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+
+
+=cut
+
+sub D_Tamura {
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist,$i,$j);
+   my $seqct = 0;
+   my $length = $aln->length;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+
+   my $precisionstr = "%.$Precision"."f";
+   my (@gap, at gc, at trans, at tranv, at score);
+   $i = 0;
+   for my $t1 ( @seqs ) {
+       $j = 0;
+       for my $t2 ( @seqs ) {
+	   $gap[$i][$j] = 0;
+	   for( my $k = 0; $k < $length; $k++ ) {
+	       my ($c1,$c2) = ( substr($seqs[$i],$k,1),
+				substr($seqs[$j],$k,1) );
+	       if( $c1 =~ /^$GapChars$/ ||
+		   $c2 =~ /^$GapChars$/ ) {
+		   $gap[$i][$j]++;	
+	       } elsif( $c2 =~ /^$GCChhars$/i ) {
+		   $gc[$i][$j]++;
+	       } 
+	   }
+	   $gc[$i][$j] = ( $gc[$i][$j] / 
+			   ($length - $gap[$i][$j]) );
+	   $j++;
+       }
+       $i++;
+   }
+   
+   for( $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+       
+       for( $j = $i+1; $j < $seqct; $j++ ) {
+	   
+	   my $pairwise = $aln->select_noncont($i+1,$j+1);
+	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+	   my $P = $self->transitions($pairwise) / $L;
+	   my $Q = $self->transversions($pairwise) / $L;
+	   my $C = $gc[$i][$j] + $gc[$j][$i]- 
+	       ( 2 * $gc[$i][$j] * $gc[$j][$i] );
+	   if( $P ) {
+	       $P = $P / $C;
+	   }
+	   my $d = -($C * log(1- $P - $Q)) -(0.5* ( 1 - $C) * log(1 - 2 * $Q));
+           # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values); 
+
+}
+
+=head2 D_F84
+
+ Title   : D_F84
+ Usage   : my $d = $stat->D_F84($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Felsenstein 1984 distance model. 
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+           [optional] double - gap penalty
+
+=cut
+
+sub D_F84 {
+   my ($self,$aln,$gappenalty) = @_;
+   return 0 unless $self->_check_arg($aln);
+   $self->throw_not_implemented();
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       # if there is no name, 
+       my $id = $seq->display_id;
+       if( ! length($id) ||       # deal with empty names
+	   $id =~ /^\s+$/ ) {
+	   $id = $seqct+1;
+       }
+       push @names, $id;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+
+   my $precisionstr = "%.$Precision"."f";
+
+   for( my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+       }
+   }   
+}
+
+# Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.
+#  Tajima-Nei correction used for multiple substitutions in the calc
+# of the distance matrix. Nucleic acids only.
+#
+#  D = p-distance = 1 - (matches/(posns_scored + gaps)
+#
+#  distance = -b * ln(1-D/b)
+#
+
+=head2 D_TajimaNei
+
+ Title   : D_TajimaNei
+ Usage   : my $d = $stat->D_TajimaNei($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the TajimaNei 1984 distance model. 
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : Bio::Align::AlignI of DNA sequences
+
+
+=cut
+
+sub D_TajimaNei{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       # if there is no name, 
+       push @names, $seq->display_id;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+   my $precisionstr = "%.$Precision"."f";
+   my ($i,$j,$bs);
+   # pairwise
+   for( $i =0; $i < $seqct -1; $i++ ) {
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+
+       for ( $j = $i+1; $j <$seqct;$j++ ) {
+	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],
+							       $seqs[$j]);
+	   my $pairwise = $aln->select_noncont($i+1,$j+1);
+	   my $slen = $self->pairwise_stats->number_of_comparable_bases($pairwise);	    
+	   my $fij2 = 0;
+	   for( $bs = 0; $bs < 4; $bs++ ) {
+	       my $fi = 0;
+	       map {$fi += $matrix->[$bs]->[$_] } 0..3;
+	       my $fj = 0;
+	       # summation 
+	       map { $fj += $matrix->[$_]->[$bs] } 0..3;
+	       my $fij = ( $fi && $fj ) ? ($fi + $fj) /( 2 * $slen) : 0;
+	       $fij2 += $fij**2;
+	   }
+	   
+	   my ($pair,$h) = (0,0);
+	   for( $bs = 0; $bs < 3; $bs++ ) {
+	       for(my $bs1 = $bs+1; $bs1 <= 3; $bs1++ ) {
+		   my $fij = $pfreq->[$pair++] / $slen;
+		   if( $fij ) {
+		       
+		       my ($ci1,$ci2,$cj1,$cj2) = (0,0,0,0);
+
+		       map { $ci1 += $matrix->[$_]->[$bs] } 0..3;
+		       map { $cj1 += $matrix->[$bs]->[$_] } 0..3;
+		       map { $ci2 += $matrix->[$_]->[$bs1] } 0..3;
+		       map { $cj2 += $matrix->[$bs1]->[$_] } 0..3;
+		       
+		       if( $fij ) {
+			   $h += ( ($fij**2) / 2 ) / 
+			       (  ( ( $ci1 + $cj1 ) / (2 * $slen) ) *
+				  ( ( $ci2 + $cj2 ) / (2 * $slen) ) 
+				  );
+		       }
+		       $self->debug( "slen is $slen h is $h fij = $fij ci1 =$ci1 cj1=$cj1 ci2=$ci2 cj2=$cj2\n");
+		   }
+	       }
+	   }
+	   # just want diagonals which are matches (A matched A, C -> C)
+
+	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] + 
+		     $matrix->[2]->[2] + $matrix->[3]->[3] );
+	   my $D = 1 - ( $m / $slen);
+	   my $d;
+	   if( $h == 0 ) {
+	       $d = -1;
+	   } else {
+	       my $b = (1 - $fij2 + (($D**2)/$h)) / 2;
+	       my $c = 1- $D/ $b;
+
+	       if( $c < 0 ) {
+		   $d = -1;
+	       } else { 
+		   $d = (-1 * $b) * log ( $c);
+	       }
+	   }
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);
+
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	   
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values); 
+
+}
+
+# Jin and Nei, Mol. Biol. Evol. 82, 7, 1990.
+
+=head2 D_JinNei
+
+ Title   : D_JinNei
+ Usage   : my $d = $stat->D_JinNei($aln)
+ Function: Calculates D (pairwise distance) between 2 sequences in an 
+           alignment using the Jin-Nei 1990 distance model. 
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI> of DNA sequences
+
+
+=cut
+
+sub D_JinNei{
+   my ($self, at args) = @_;
+   $self->warn("JinNei implementation not completed");
+   return;
+}
+
+=head2 transversions
+
+ Title   : transversions
+ Usage   : my $transversions = $stats->transversion($aln);
+ Function: Calculates the number of transversions between two sequences in 
+           an alignment
+ Returns : integer
+ Args    : Bio::Align::AlignI
+
+
+=cut
+
+sub transversions{
+   my ($self,$aln) = @_;
+   return $self->_trans_count_helper($aln, $DNAChanges{'Transversions'});
+}
+
+=head2 transitions
+
+ Title   : transitions
+ Usage   : my $transitions = Bio::Align::DNAStatistics->transitions($aln);
+ Function: Calculates the number of transitions in a given DNA alignment
+ Returns : integer representing the number of transitions
+ Args    : Bio::Align::AlignI object
+
+
+=cut
+
+sub transitions{
+   my ($self,$aln) = @_;
+   return $self->_trans_count_helper($aln, $DNAChanges{'Transitions'});
+}
+
+
+sub _trans_count_helper {
+    my ($self,$aln,$type) = @_;
+    return 0 unless( $self->_check_arg($aln) );
+    if( ! $aln->is_flush ) { $self->throw("must be flush") }
+    my (@tcount);
+    my ($first,$second) = ( uc $aln->get_seq_by_pos(1)->seq(),
+			    uc $aln->get_seq_by_pos(2)->seq() );
+    my $alen = $aln->length; 
+    for (my $i = 0;$i<$alen; $i++ ) { 
+	my ($c1,$c2) = ( substr($first,$i,1),
+			 substr($second,$i,1) );
+	if( $c1 ne $c2 ) { 
+	    foreach my $nt ( @{$type->{$c1}} ) {
+		if( $nt eq $c2) {
+		   $tcount[$i]++;
+	       }
+	    }
+	}
+    }
+    my $sum = 0;
+    map { if( $_) { $sum += $_} } @tcount;
+    return $sum;
+}
+
+# this will generate a matrix which records across the row, the number
+# of DNA subst 
+# 
+sub _build_nt_matrix {
+    my ($self,$seqa,$seqb) = @_;
+    
+
+    my $basect_matrix = [ [ qw(0 0 0 0) ],  # number of bases that match
+			  [ qw(0 0 0 0) ],
+			  [ qw(0 0 0 0) ],
+			  [ qw(0 0 0 0) ] ];
+    my $gaps = 0;                           # number of gaps
+    my $pfreq = [ qw( 0 0 0 0 0 0)];        # matrix for pair frequency
+    my $len_a = length($seqa);
+    for( my $i = 0; $i < $len_a; $i++) {
+	my ($ti,$tj) = (substr($seqa,$i,1),substr($seqb,$i,1));
+	$ti =~ tr/U/T/;
+	$tj =~ tr/U/T/;
+
+	if( $ti =~ /^$GapChars$/) { $gaps++; next; }
+	if( $tj =~ /^$GapChars$/) { $gaps++; next }
+
+	my $ti_index = $NucleotideIndexes{$ti};		
+	my $tj_index = $NucleotideIndexes{$tj};	    
+
+	if( ! defined $ti_index ) {
+	    print "ti_index not defined for $ti\n";
+	    next;
+	}
+	
+	$basect_matrix->[$ti_index]->[$tj_index]++;
+	
+	if( $ti ne $tj ) {
+	    $pfreq->[$NucleotideIndexes{join('',sort ($ti,$tj))}]++;
+	}
+    }
+    return ($basect_matrix,$pfreq,$gaps);
+}
+
+sub _check_ambiguity_nucleotide {
+    my ($base1,$base2) = @_;
+    my %iub = Bio::Tools::IUPAC->iupac_iub();
+    my @amb1 = @{ $iub{uc($base1)} };
+    my @amb2 = @{ $iub{uc($base2)} };    
+    my ($pmatch) = (0);
+    for my $amb ( @amb1 ) {
+	if( grep { $amb eq $_ } @amb2 ) {
+	    $pmatch = 1;
+	    last;
+	}
+    }
+    if( $pmatch ) { 
+	return (1 / scalar @amb1) * (1 / scalar @amb2);
+    } else { 
+	return 0;
+    }
+}
+
+
+sub _check_arg {
+    my($self,$aln ) = @_;
+    if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::DNAStatistics");
+	return 0;
+    } elsif( $aln->get_seq_by_pos(1)->alphabet ne 'dna' ) { 
+	$self->warn("Must provide a DNA alignment to Bio::Align::DNAStatistics, you provided a " . $aln->get_seq_by_pos(1)->alphabet);
+	return 0;
+    }
+    return 1;
+}
+
+=head2 Data Methods
+
+=cut
+
+=head2 pairwise_stats
+
+ Title   : pairwise_stats
+ Usage   : $obj->pairwise_stats($newval)
+ Function: 
+ Returns : value of pairwise_stats
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub pairwise_stats{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_pairwise_stats'} = $value;
+    }
+    return $self->{'_pairwise_stats'};
+
+}
+
+=head2 calc_KaKs_pair
+
+ Title    : calc_KaKs_pair
+ Useage   : my $results = $stats->calc_KaKs_pair($alnobj,
+            $name1, $name2).
+ Function : calculates Nei-Gojobori statistics for pairwise 
+            comparison.
+ Args     : A Bio::Align::AlignI compliant object such as a 
+            Bio::SimpleAlign object, and 2 sequence name strings.
+ Returns  : a reference to a hash of statistics with keys as 
+            listed in Description.
+
+=cut
+
+sub calc_KaKs_pair {
+    my ( $self, $aln, $seq1_id, $seq2_id) = @_;
+    $self->throw("Needs 3 arguments - an alignment object, and 2 sequence ids") 
+	if @_!= 4;
+    $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
+    my @seqs = (
+		#{id => $seq1_id, seq =>($aln->each_seq_with_id($seq1_id))[0]->seq},
+		#{id => $seq2_id, seq =>($aln->each_seq_with_id($seq2_id))[0]->seq}
+		{id => $seq1_id, seq => uc(($aln->each_seq_with_id($seq1_id))[0]->seq)},
+                {id => $seq2_id, seq => uc(($aln->each_seq_with_id($seq2_id))[0]->seq)}
+	       ) ;
+    if (length($seqs[0]{'seq'}) != length($seqs[1]{'seq'})) {
+	$self->throw(" aligned sequences must be of equal length!");
+    }
+    my $results = [];
+    $self->_get_av_ds_dn(\@seqs, $results);
+    return $results;
+
+}
+
+=head2 calc_all_KaKs_pairs
+
+ Title    : calc_all_KaKs_pairs
+ Useage   : my $results2 = $stats->calc_KaKs_pair($alnobj).
+ Function : Calculates Nei_gojobori statistics for all pairwise
+            combinations in sequence.
+ Arguments: A Bio::Align::ALignI compliant object such as
+            a Bio::SimpleAlign object.
+ Returns  : A reference to an array of hashes of statistics of
+            all pairwise comparisons in the alignment.
+
+=cut
+
+
+
+sub calc_all_KaKs_pairs {
+#returns a multi_element_array with all pairwise comparisons
+	my ($self,$aln) = @_;
+	$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
+	my @seqs;
+	for my $seq ($aln->each_seq) {
+		push @seqs, {id => $seq->display_id, seq=>$seq->seq};
+		}
+	my $results ;
+	$results = $self->_get_av_ds_dn(\@seqs, $results);
+	return $results;
+}
+
+=head2 calc_average_KaKs
+
+ Title    : calc_average_KaKs.  
+ Useage   : my $res= $stats->calc_average_KaKs($alnobj, 1000).
+ Function : calculates Nei_Gojobori stats for average of all 
+            sequences in the alignment.
+ Args     : A Bio::Align::AlignI compliant object such as a
+            Bio::SimpleAlign object, number of bootstrap iterations
+            (default 1000).
+ Returns  : A reference to a hash of statistics as listed in Description.
+
+=cut
+
+sub calc_average_KaKs {
+#calculates global value for sequences in alignment using bootstrapping
+#this is quite slow (~10 seconds per  3 X 200nt seqs); 
+    my ($self, $aln, $bootstrap_rpt) = @_;
+    $bootstrap_rpt ||= 1000;
+    $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');
+    my @seqs;
+    for my $seq ($aln->each_seq) {
+	push @seqs, {id => $seq->display_id, seq=>$seq->seq};
+    }
+    my $results ;
+    my ($ds_orig, $dn_orig) = $self->_get_av_ds_dn(\@seqs);
+    #print "ds = $ds_orig, dn = $dn_orig\n";
+    $results = {D_s => $ds_orig, D_n => $dn_orig};
+    $self->_run_bootstrap(\@seqs, $results, $bootstrap_rpt);
+    return $results;
+}
+
+############## primary internal subs for alignment comparisons ########################
+
+sub _run_bootstrap {
+    ### generates sampled sequences, calculates Ds and Dn values,
+    ### then calculates variance of sampled sequences and add results to results hash
+    ### 
+    my ($self,$seq_ref, $results, $bootstrap_rpt) = @_;	
+    my @seqs = @$seq_ref;
+    my @btstrp_aoa; # to hold array of array of nucleotides for resampling
+    my %bootstrap_values = (ds => [], dn =>[]);	# to hold list of av values 
+
+    #1st make alternative array of codons;
+    my $c = 0;
+    while ($c < length $seqs[0]{'seq'}) {
+	for (0..$#seqs) {
+	    push @{$btstrp_aoa[$_]}, substr ($seqs[$_]{'seq'}, $c, 3);
+	}
+	$c+=3;
+    }
+
+    for (1..$bootstrap_rpt) {
+	my $sampled = _resample (\@btstrp_aoa);
+	my ($ds, $dn) = $self->_get_av_ds_dn ($sampled) ; # is array ref
+	push @{$bootstrap_values{'ds'}}, $ds;
+	push @{$bootstrap_values{'dn'}}, $dn;
+    }	
+
+    $results->{'D_s_var'} = sampling_variance($bootstrap_values{'ds'});
+    $results->{'D_n_var'} = sampling_variance($bootstrap_values{'dn'});
+    $results->{'z_score'} = 	($results->{'D_n'} - $results->{'D_s'}) / 
+	sqrt($results->{'D_s_var'} + $results->{'D_n_var'} ); 
+    #print "bootstrapped var_syn = 	$results->{'D_s_var'} \n" ;
+    #print "bootstrapped var_nc = 	$results->{'D_n_var'} \n"; 
+    #print "z is $results->{'z_score'}\n";	### end of global set up of/perm look up data
+}
+
+sub _resample {
+    my $ref = shift;
+    my $codon_num = scalar (@{$ref->[0]});
+    my @altered;
+    for (0..$codon_num -1) {	#for each codon
+	my $rand = int (rand ($codon_num));
+	for (0..$#$ref) {
+	    push @{$altered[$_]}, $ref->[$_][$rand];
+	}
+    }
+    my @stringed = map {join '', @$_}@altered;
+    my @return;
+    #now out in random name to keep other subs happy
+    for (@stringed) {
+	push @return, {id=>'1', seq=> $_};
+    }
+    return \@return;
+}
+
+sub _get_av_ds_dn {
+    # takes array of hashes of sequence strings and ids   #
+    my $self = shift;
+    my $seq_ref = shift;
+    my $result = shift if @_;
+    my @caller = caller(1);
+    my @seqarray = @$seq_ref;
+    my $bootstrap_score_list;
+    #for a multiple alignment considers all pairwise combinations#
+    my %dsfor_average = (ds => [], dn => []); 
+    for (my $i = 0; $i < scalar @seqarray; $i++) {
+	for (my $j = $i +1; $j<scalar @seqarray; $j++ ){
+#			print "comparing $i and $j\n";
+	    if (length($seqarray[$i]{'seq'}) != length($seqarray[$j]{'seq'})) {
+		$self->warn(" aligned sequences must be of equal length!");
+		next;
+	    }
+
+	    my $syn_site_count = count_syn_sites($seqarray[$i]{'seq'}, $synsites);
+	    my $syn_site_count2 = count_syn_sites($seqarray[$j]{'seq'}, $synsites);
+#			print "syn 1 is $syn_site_count , syn2 is $syn_site_count2\n";
+	    my ($syn_count, $non_syn_count, $gap_cnt) = analyse_mutations($seqarray[$i]{'seq'}, $seqarray[$j]{'seq'});	
+	    #get averages
+	    my $av_s_site = ($syn_site_count + $syn_site_count2)/2;
+	    my $av_ns_syn_site = length($seqarray[$i]{'seq'}) - $gap_cnt- $av_s_site ;
+
+	    #calculate ps and pn  (p54)
+	    my $syn_prop = $syn_count / $av_s_site;
+	    my $nc_prop = $non_syn_count / $av_ns_syn_site	;
+
+	    #now use jukes/cantor to calculate D_s and D_n, would alter here if needed a different method
+	    my $d_syn = $self->jk($syn_prop);
+	    my $d_nc = $self->jk($nc_prop);
+
+	    #JK calculation must succeed for continuation of calculation
+	    #ret_value = -1 if error
+	    next unless $d_nc >=0 && $d_syn >=0;
+
+
+	    push @{$dsfor_average{'ds'}}, $d_syn;
+	    push @{$dsfor_average{'dn'}}, $d_nc;
+
+	    #if not doing bootstrap, calculate the pairwise comparisin stats
+	    if ($caller[3] =~ /calc_KaKs_pair/ || $caller[3] =~ /calc_all_KaKs_pairs/) {
+				#now calculate variances assuming large sample
+		my $d_syn_var =  jk_var($syn_prop, length($seqarray[$i]{'seq'})  - $gap_cnt );
+		my $d_nc_var =  jk_var($nc_prop, length ($seqarray[$i]{'seq'}) - $gap_cnt);
+		#now calculate z_value
+		#print "d_syn_var is  $d_syn_var,and d_nc_var is $d_nc_var\n";
+		#my $z = ($d_nc - $d_syn) / sqrt($d_syn_var + $d_nc_var);
+		my $z = ($d_syn_var + $d_nc_var) ? 
+		  ($d_nc - $d_syn) / sqrt($d_syn_var + $d_nc_var) : 0;
+		#	print "z is $z\n";
+		push @$result , {S => $av_s_site, N=>$av_ns_syn_site,
+				 S_d => $syn_count, N_d =>$non_syn_count,
+				 P_s => $syn_prop, P_n=>$nc_prop,
+				 D_s => @{$dsfor_average{'ds'}}[-1],
+				 D_n => @{$dsfor_average{'dn'}}[-1],
+				 D_n_var =>$d_nc_var, D_s_var => $d_syn_var,
+				 Seq1 => $seqarray[$i]{'id'},
+				 Seq2 => $seqarray[$j]{'id'},
+				 z_score => $z,
+			     };
+		$self->warn (" number of mutations too small to justify normal test for  $seqarray[$i]{'id'} and $seqarray[$j]{'id'}\n- use Fisher's exact, or bootstrap a MSA")
+		    if ($syn_count < 10 || $non_syn_count < 10 ) && $self->verbose > -1 ;
+	    }#endif
+	    }
+    }
+
+    #warn of failure if no results hashes are present
+    #will fail if Jukes Cantor has failed for all pairwise combinations
+    #$self->warn("calculation failed!") if scalar @$result ==0;
+
+    #return results unless bootstrapping
+    return $result if $caller[3]=~ /calc_all_KaKs/ || $caller[3] =~ /calc_KaKs_pair/; 
+    #else if getting average for bootstrap
+    return( mean ($dsfor_average{'ds'}),mean ($dsfor_average{'dn'})) ;
+}
+
+
+sub jk {
+    my ($self, $p) = @_;
+    if ($p > 0.75) {
+	$self->warn( " Jukes Cantor won't  work -too divergent!");
+	return -1;
+    }
+    return -1 * (3/4) * (log(1 - (4/3) * $p));
+}
+
+#works for large value of n (50?100?)
+sub jk_var {
+    my ($p, $n) = @_;
+    return (9 * $p * (1 -$p))/(((3 - 4 *$p) **2) * $n);
+}
+
+
+# compares 2 sequences to find the number of synonymous/non
+# synonymous mutations between them
+
+sub analyse_mutations {
+    my ($seq1, $seq2) = @_;
+    my %mutator = ( 2=> {0=>[[1,2],  # codon positions to be altered 
+			     [2,1]], # depend on which is the same
+			 1=>[[0,2],
+			     [2,0]],
+			 2=>[[0,1],
+			     [1,0]],	
+		     },
+		    3=> [ [0,1,2],  # all need to be altered 
+			  [1,0,2],
+			  [0,2,1],
+			  [1,2,0],
+			  [2,0,1],
+			  [2,1,0] ],
+		    );
+    my $TOTAL   = 0;    # total synonymous changes
+    my $TOTAL_n = 0;	# total non-synonymous changes
+    my $gap_cnt = 0;
+
+    my %input;
+    my $seqlen = length($seq1);
+    for (my $j=0; $j< $seqlen; $j+=3) {
+	$input{'cod1'} = substr($seq1, $j,3);
+	$input{'cod2'} = substr($seq2, $j,3);
+
+	#ignore codon if beeing compared with gaps! 
+	if ($input{'cod1'} =~ /\-/ || $input{'cod2'} =~ /\-/){
+	    $gap_cnt += 3; #just increments once if there is a pair of gaps
+	    next;
+	}
+
+	my ($diff_cnt, $same) = count_diffs(\%input);
+
+	#ignore if codons are identical
+	next if $diff_cnt == 0 ;
+	if ($diff_cnt == 1) {
+	    $TOTAL += $synchanges{$input{'cod1'}}{$input{'cod2'}};
+	    $TOTAL_n += 1 - $synchanges{$input{'cod1'}}{$input{'cod2'}};
+	    #print " \nfordiff is 1 , total now $TOTAL, total n now $TOTAL_n\n\n"
+	}
+	elsif ($diff_cnt ==2) {
+	    my $s_cnt = 0;
+	    my $n_cnt = 0;
+	    my $tot_muts = 4;
+	    #will stay 4 unless there are stop codons at intervening point
+	  OUTER:for my $perm (@{$mutator{'2'}{$same}}) {
+	      my $altered = $input{'cod1'};
+	      my $prev= $altered;
+	      #		print "$prev -> (", $t[$CODONS->{$altered}], ")";
+	      for 	my $mut_i (@$perm) { #index of codon mutated
+		  substr($altered, $mut_i,1) = substr($input{'cod2'}, $mut_i, 1);
+		  if ($t[$CODONS->{$altered}] eq '*') {
+		      $tot_muts -=2;
+		      #print "changes to stop codon!!\n";
+		      next OUTER;
+		  }
+		  else {
+		      $s_cnt += $synchanges{$prev}{$altered};
+		      #					print "$altered ->(", $t[$CODONS->{$altered}], ") ";
+		  }
+		  $prev = $altered;
+	      }
+	      #		print "\n";
+	  }
+	    if ($tot_muts != 0) {
+		$TOTAL += ($s_cnt/($tot_muts/2));
+		$TOTAL_n += ($tot_muts - $s_cnt)/ ($tot_muts / 2);
+	    }
+
+	}
+	elsif ($diff_cnt ==3 ) {
+	    my $s_cnt = 0;
+	    my $n_cnt = 0;
+	    my $tot_muts = 18;	#potential number  of mutations
+	  OUTER: for my $perm (@{$mutator{'3'}}) {
+	      my $altered = $input{'cod1'};
+	      my $prev= $altered;
+	      #	print "$prev -> (", $t[$CODONS->{$altered}], ")";
+	      for my $mut_i (@$perm) { #index of codon mutated
+		  substr($altered, $mut_i,1) = substr($input{'cod2'}, $mut_i, 1);
+		  if ($t[$CODONS->{$altered}] eq '*') {
+		      $tot_muts -=3;
+		      #	print "changes to stop codon!!\n";
+		      next OUTER;
+
+		  }
+		  else {
+		      $s_cnt += $synchanges{$prev}{$altered};
+		      #			print "$altered ->(", $t[$CODONS->{$altered}], ") ";
+		  }
+		  $prev = $altered;
+	      }
+	      #	print "\n";
+
+	  }#end OUTER loop
+	      #calculate number of synonymous/non synonymous mutations for that codon
+	      # and add to total
+	      if ($tot_muts != 0) {
+		  $TOTAL += ($s_cnt / ($tot_muts /3));
+		  $TOTAL_n += 3 - ($s_cnt / ($tot_muts /3));
+	      }
+	}			#endif $diffcnt = 3
+    }				#end of sequencetraversal
+    return ($TOTAL, $TOTAL_n, $gap_cnt);
+}
+
+
+sub count_diffs {
+    #counts the number of nucleotide differences between 2 codons
+    # returns this value plus the codon index of which nucleotide is the same when 2
+    #nucleotides are different. This is so analyse_mutations() knows which nucleotides
+    # to change.
+    my $ref = shift;
+    my $cnt = 0;
+    my $same= undef;
+    #just for 2 differences
+    for (0..2) {
+	if (substr($ref->{'cod1'}, $_,1) ne substr($ref->{'cod2'}, $_, 1)){
+	    $cnt++;
+	} else {
+	    $same = $_;
+	}
+    }
+    return ($cnt, $same);
+}
+
+=head2 get_syn_changes
+
+ Title   : get_syn_changes
+ Usage   : Bio::Align::DNAStatitics->get_syn_chnages
+ Function: Generate a hashref of all pairwise combinations of codns
+           differing by 1
+ Returns : Symetic matrix using hashes
+           First key is codon
+           and each codon points to a hashref of codons
+           the values of which describe type of change.
+           my $type = $hash{$codon1}->{$codon2};
+           values are :
+             1   synonymous
+             0   non-syn
+            -1   either codon is a stop codon
+ Args    : none
+
+=cut
+
+sub get_syn_changes {
+#hash of all pairwise combinations of codons differing by 1
+# 1 = syn, 0 = non-syn, -1 = stop
+    my %results;
+    my @codons = _make_codons ();
+    my $arr_len = scalar @codons;
+    for (my $i = 0; $i < $arr_len -1; $i++) {
+	my $cod1 = $codons[$i];
+	for (my $j = $i +1; $j < $arr_len; $j++) {
+	    my $diff_cnt = 0;
+	    for my $pos(0..2) {
+		$diff_cnt++ if substr($cod1, $pos, 1) ne substr($codons[$j], $pos, 1);
+	    }
+	    next if $diff_cnt !=1;
+
+	    #synon change
+	    if($t[$CODONS->{$cod1}] eq $t[$CODONS->{$codons[$j]}]) {
+		$results{$cod1}{$codons[$j]} =1;
+		$results{$codons[$j]}{$cod1} = 1;
+	    }
+	    #stop codon
+	    elsif ($t[$CODONS->{$cod1}] eq '*' or $t[$CODONS->{$codons[$j]}] eq '*') {
+		$results{$cod1}{$codons[$j]} = -1;
+		$results{$codons[$j]}{$cod1} = -1;
+	    }
+	    # nc change
+	    else {
+		$results{$cod1}{$codons[$j]} = 0;
+		$results{$codons[$j]}{$cod1} = 0;
+	    }
+	}
+    }
+    return %results;
+}
+
+=head2 dnds_pattern_number
+
+ Title   : dnds_pattern_number
+ Usage   : my $patterns = $stats->dnds_pattern_number($alnobj);
+ Function: Counts the number of codons with no gaps in the MSA
+ Returns : Number of codons with no gaps ('patterns' in PAML notation)
+ Args    : A Bio::Align::AlignI compliant object such as a
+            Bio::SimpleAlign object.
+
+=cut
+
+sub dnds_pattern_number{
+    my ($self, $aln) = @_;
+    return ($aln->remove_gaps->length)/3;
+}
+
+sub count_syn_sites {
+    #counts the number of possible synonymous changes for sequence
+    my ($seq, $synsite) = @_;
+    __PACKAGE__->throw("not integral number of codons") if length($seq) % 3 != 0;
+    my $S = 0;
+    for (my $i = 0; $i< length($seq); $i+=3) {
+	my $cod = substr($seq, $i, 3);
+	next if $cod =~ /\-/;	#deal with alignment gaps
+	$S +=  $synsite->{$cod}{'s'};
+    }
+    #print "S is $S\n";
+    return $S;
+}
+
+	
+
+sub get_syn_sites {
+    #sub to generate lookup hash for the number of synonymous changes per codon
+    my @nucs = qw(T C A G);
+    my %raw_results;
+    for my $i (@nucs) {
+	for my $j (@nucs) {
+	    for my $k (@nucs) {
+		# for each possible codon
+          	my $cod = "$i$j$k";
+           	my $aa = $t[$CODONS->{$cod}];
+		#calculate number of synonymous mutations vs non syn mutations
+            	for my $i (qw(0 1 2)){
+		    my $s = 0;
+		    my $n = 3;
+		    for my $nuc (qw(A T C G)) {
+			next if substr ($cod, $i,1) eq $nuc;
+			my $test = $cod;
+			substr($test, $i, 1) = $nuc ;
+			if ($t[$CODONS->{$test}] eq $aa) {
+			    $s++;
+			}
+			if ($t[$CODONS->{$test}] eq '*') {
+			    $n--;
+			}	
+		    }
+		    $raw_results{$cod}[$i] = {'s' => $s ,
+					      'n' => $n };
+		}
+		
+	    } #end analysis of single codon
+	}
+    } #end analysis of all codons
+    my %final_results;
+    
+    for my $cod (sort keys %raw_results) {
+    	my $t = 0;
+    	map{$t += ($_->{'s'} /$_->{'n'})} @{$raw_results{$cod}};
+    	$final_results{$cod} = { 's'=>$t, 'n' => 3 -$t};
+    }
+    return \%final_results;
+}
+
+sub _make_codons {
+#makes all codon combinations, returns array of them
+    my @nucs = qw(T C A G);
+    my @codons;
+    for my $i (@nucs) {
+        for my $j (@nucs) {
+            for my $k (@nucs) {
+            	push @codons, "$i$j$k";
+	    }
+	}
+    }    
+    return @codons;
+}
+
+sub get_codons {
+ #generates codon translation look up table#
+ my $x = 0;
+ my  $CODONS = {};
+ for my $codon (_make_codons) {
+     $CODONS->{$codon} = $x;
+     $x++;
+ } 
+ return $CODONS;
+}
+
+#########stats subs, can go in another module? Here for speed. ###
+sub mean {
+    my $ref = shift;
+    my $el_num = scalar @$ref;
+    my $tot = 0;
+    map{$tot += $_}@$ref;
+    return ($tot/$el_num);
+}
+
+sub variance {
+    my $ref = shift;
+    my $mean = mean($ref);
+    my $sum_of_squares = 0;
+    map{$sum_of_squares += ($_ - $mean) **2}@$ref;
+    return $sum_of_squares;
+}
+
+sub sampling_variance {
+    my $ref = shift;
+    return variance($ref) / (scalar @$ref -1);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/PairwiseStatistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/PairwiseStatistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/PairwiseStatistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+# $Id: PairwiseStatistics.pm,v 1.10.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::PairwiseStatistics
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
+
+=head1 SYNOPSIS
+
+  use strict;
+  my $stats = Bio::Align::DNAStatistics->new();
+
+  # get alignment object of two sequences somehow
+  my $pwaln;
+  print $stats->number_of_comparable_bases($pwaln);
+
+=head1 DESCRIPTION
+
+Calculate pairwise statistics.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::PairwiseStatistics;
+use vars qw($GapChars);
+use strict;
+
+
+BEGIN { $GapChars = '(\.|\-)'; }
+
+use base qw(Bio::Root::Root Bio::Align::StatisticsI);
+
+=head2 number_of_comparable_bases
+
+ Title   : number_of_comparable_bases
+ Usage   : my $bases = $stat->number_of_comparable_bases($aln);
+ Function: Returns the count of the number of bases that can be
+           compared (L) in this alignment ( length - gaps)
+ Returns : integer
+ Args    : L<Bio::Align::AlignI>
+
+
+=cut
+
+sub number_of_comparable_bases{
+   my ($self,$aln) = @_;
+   if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+       $self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::PairwiseStatistics");
+       return 0;
+   } elsif( $aln->no_sequences != 2 ) { 
+       $self->warn("only pairwise calculations currently supported ". $aln->no_sequences."\n");
+   }
+   my $L = $aln->length - $self->number_of_gaps($aln);
+   return $L;
+}
+
+=head2 number_of_differences
+
+ Title   : number_of_differences
+ Usage   : my $nd = $stat->number_of_distances($aln);
+ Function: Returns the number of differences between two sequences
+ Returns : integer
+ Args    : L<Bio::Align::AlignI>
+
+
+=cut
+
+sub number_of_differences{
+   my ($self,$aln) = @_;
+    if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::PairwiseStatistics");
+	return 0;
+    } elsif( $aln->no_sequences != 2 ) { 
+	$self->warn("only pairwise calculations currently supported");
+    }
+   my (@seqs);
+   foreach my $seq ( $aln->each_seq) {
+       push @seqs, [ split(//,$seq->seq())];
+   }
+   my $firstseq = shift @seqs;
+#    my $secondseq = shift @seqs;
+   my $diffcount = 0;
+   for (my $i = 0;$i<$aln->length; $i++ ) {
+       next if( $firstseq->[$i]  =~ /^$GapChars$/);
+       foreach my $seq ( @seqs ) {
+	   next if( $seq->[$i]  =~ /^$GapChars$/);
+	   if( $firstseq->[$i] ne $seq->[$i] ) {
+	       $diffcount++;
+	   }
+       }
+   }
+   return $diffcount;
+}
+
+=head2 number_of_gaps
+
+ Title   : number_of_gaps
+ Usage   : my $nd = $stat->number_of_gaps($aln);
+ Function: Returns the number of gapped positions among sequences in alignment
+ Returns : integer
+ Args    : L<Bio::Align::AlignI>
+
+
+=cut
+
+sub number_of_gaps{
+   my ($self,$aln) = @_;
+    if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::PairwiseStatistics");
+	return 0;
+    }
+   my $gapline = $aln->gap_line;
+   # this will count the number of '-' characters
+   return $gapline =~ tr/-/-/;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/ProteinStatistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/ProteinStatistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/ProteinStatistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,304 @@
+# $Id: ProteinStatistics.pm,v 1.7.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::ProteinStatistics
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::ProteinStatistics - Calculate Protein Alignment statistics (mostly distances)
+
+=head1 SYNOPSIS
+
+  use Bio::Align::ProteinStatistics;
+  use Bio::AlignIO;
+  my $in = new Bio::AlignIO(-format => 'fasta',
+			    -file   => 'pep-104.fasaln');
+  my $aln = $in->next_aln;
+
+  my $pepstats = Bio::Align::ProteinStatistics->new();
+  $kimura = $protstats->distance(-align => $aln,
+			         -method => 'Kimura');
+  print $kimura->print_matrix;
+
+
+=head1 DESCRIPTION
+
+This object is for generating various statistics from a protein
+alignment.  Mostly it is where pairwise protein distances can be
+calculated.
+
+=head1 REFERENCES 
+
+D_Kimura - Kimura, M. 1983. The Neutral Theory of Molecular Evolution. CUP, 
+           Cambridge.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::ProteinStatistics;
+use vars qw(%DistanceMethods $Precision $DefaultGapPenalty);
+use strict;
+
+use Bio::Align::PairwiseStatistics;
+use Bio::Matrix::PhylipDist;
+
+%DistanceMethods = ('kimura|k' => 'Kimura',
+		    );
+$Precision = 5;
+$DefaultGapPenalty = 0;
+
+use base qw(Bio::Root::Root Bio::Align::StatisticsI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Align::ProteinStatistics();
+ Function: Builds a new Bio::Align::ProteinStatistics object 
+ Returns : an instance of Bio::Align::ProteinStatistics
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->pairwise_stats( new Bio::Align::PairwiseStatistics());
+
+  return $self;
+}
+
+=head2 distance
+
+ Title   : distance
+ Usage   : my $distance_mat = $stats->distance(-align  => $aln, 
+		 			       -method => $method);
+ Function: Calculates a distance matrix for all pairwise distances of
+           sequences in an alignment.
+ Returns : L<Bio::Matrix::PhylipDist> object
+ Args    : -align  => Bio::Align::AlignI object
+           -method => String specifying specific distance method 
+                      (implementing class may assume a default)
+
+=cut
+
+sub distance{
+   my ($self, at args) = @_;
+   my ($aln,$method) = $self->_rearrange([qw(ALIGN METHOD)], at args);
+   if( ! defined $aln || ! ref ($aln) || ! $aln->isa('Bio::Align::AlignI') ) { 
+       $self->throw("Must supply a valid Bio::Align::AlignI for the -align parameter in distance");
+   }
+   $method ||= 'Kimura';
+   foreach my $m ( keys %DistanceMethods ) {
+       if(defined $m &&  $method =~ /$m/i ) {
+	   my $mtd = "D_$DistanceMethods{$m}";
+	   return $self->$mtd($aln);
+       }
+   }
+   $self->warn("Unrecognized distance method $method must be one of [".
+	       join(',',$self->available_distance_methods())."]");
+   return;
+}
+
+=head2 available_distance_methods
+
+ Title   : available_distance_methods
+ Usage   : my @methods = $stats->available_distance_methods();
+ Function: Enumerates the possible distance methods
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub available_distance_methods{
+   my ($self, at args) = @_;
+   return values %DistanceMethods;
+}
+
+=head2 D - distance methods
+
+
+=cut
+
+
+=head2 D_Kimura
+
+ Title   : D_Kimura
+ Usage   : my $matrix = $pepstats->D_Kimura($aln);
+ Function: Calculate Kimura protein distance (Kimura 1983) which 
+           approximates PAM distance
+           D = -ln ( 1 - p - 0.2 * p^2 )
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : L<Bio::Align::AlignI>
+
+
+=cut
+
+# Kimura, M. 1983. The Neutral Theory of Molecular Evolution. CUP, Cambridge.
+
+sub D_Kimura{
+   my ($self,$aln) = @_;
+   return 0 unless $self->_check_arg($aln);
+   # ambiguities ignored at this point
+   my (@seqs, at names, at values,%dist);
+   my $seqct = 0;
+   foreach my $seq ( $aln->each_seq) {
+       push @names, $seq->display_id;
+       push @seqs, uc $seq->seq();
+       $seqct++;
+   }
+   my $len = $aln->length;
+   my $precisionstr = "%.$Precision"."f";
+
+   for( my $i = 0; $i < $seqct-1; $i++ ) {
+       # (diagonals) distance is 0 for same sequence
+       $dist{$names[$i]}->{$names[$i]} = [$i,$i];
+       $values[$i][$i] = sprintf($precisionstr,0);
+       for( my $j = $i+1; $j < $seqct; $j++ ) {
+	   my ($scored,$match) = (0,0);
+	   for( my $k=0; $k < $len; $k++ ) {
+	       my $m1 = substr($seqs[$i],$k,1);
+	       my $m2 = substr($seqs[$j],$k,1);
+	       if( $m1 ne '-' && $m2 ne '-' ) {
+		   # score is number of scored bases (alignable bases)
+		   # it could have also come from 
+		   # my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);
+		   # match is number of matches weighting ambiguity bases
+		   # as well
+		   $match += _check_ambiguity_protein($m1,$m2);
+		   $scored++;
+	       }
+	   }
+	   # From Felsenstein's PHYLIP documentation:
+	   # This is very quick to do but has some obvious
+	   # limitations. It does not take into account which amino
+	   # acids differ or to what amino acids they change, so some
+	   # information is lost. The units of the distance measure
+	   # are fraction of amino acids differing, as also in the
+	   # case of the PAM distance. If the fraction of amino acids
+	   # differing gets larger than 0.8541 the distance becomes
+	   # infinite.
+
+	   my $D = 1 - ( $match / $scored );
+	   if( $D < 0.8541 ) {
+	       $D = - log ( 1 - $D - (0.2 * ($D ** 2)));
+	       $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$D);
+	   } else { 
+	       $values[$j][$i] = $values[$i][$j] = '    NaN';
+	   }
+	   # fwd and rev lookup
+	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];
+	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	   
+
+           # (diagonals) distance is 0 for same sequence
+	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	   
+	   $values[$j][$j] = sprintf($precisionstr,0); 
+
+       }
+   }
+   return Bio::Matrix::PhylipDist->new(-program => 'bioperl_PEPstats',
+				       -matrix  => \%dist,
+				       -names   => \@names,
+				       -values  => \@values); 
+   
+}
+
+# some methods from EMBOSS distmat
+sub _check_ambiguity_protein
+{
+    my ($t1,$t2) = @_;
+    my $n = 0;
+
+    if( $t1 ne 'X' && $t1 eq $t2 ) { 
+        $n = 1.0;
+    } elsif(  ((($t1 eq 'B' && $t2 eq 'DN') ||
+	       ($t2 eq 'B' && $t2 eq 'DN'))) ||
+	      
+	      ( ($t1 eq 'Z' && $t2 eq 'EQ') ||
+		($t2 eq 'Z' && $t1 eq 'EQ'))) {
+        $n = 0.5;
+    } elsif ( $t1 eq 'X' && $t2 eq 'X' ) {
+        $n = 0.0025;
+    } elsif(  $t1 eq 'X' || $t2 eq 'X' ) {
+        $n = 0.05;
+    }
+    return $n;
+}
+
+=head2 Data Methods
+
+=cut
+
+=head2 pairwise_stats
+
+ Title   : pairwise_stats
+ Usage   : $obj->pairwise_stats($newval)
+ Function: 
+ Returns : value of pairwise_stats
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub pairwise_stats{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_pairwise_stats'} = $value;
+    }
+    return $self->{'_pairwise_stats'};
+
+}
+
+sub _check_arg {
+    my($self,$aln ) = @_;
+    if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
+	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::DNAStatistics");
+	return 0;
+    } elsif( $aln->get_seq_by_pos(1)->alphabet ne 'protein' ) { 
+	$self->warn("Must provide a protein alignment to Bio::Align::ProteinStatistics, you provided a " . $aln->get_seq_by_pos(1)->alphabet);
+	return 0;
+    }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/StatisticsI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/StatisticsI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/StatisticsI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+# $Id: StatisticsI.pm,v 1.7.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::StatisticsI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::StatisticsI - Calculate some statistics for an alignment
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the interface here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Align::StatisticsI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 distance
+
+ Title   : distance
+ Usage   : my $distance_mat = $stats->distance(-align  => $aln, 
+		 			       -method => $method);
+ Function: Calculates a distance matrix for all pairwise distances of
+           sequences in an alignment.
+ Returns : Array ref
+ Args    : -align  => Bio::Align::AlignI object
+           -method => String specifying specific distance method 
+                      (implementing class may assume a default)
+
+=cut
+
+sub distance{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 available_distance_methods
+
+ Title   : available_distance_methods
+ Usage   : my @methods = $stats->available_distance_methods();
+ Function: Enumerates the possible distance methods
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub available_distance_methods{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Align/Utilities.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Align/Utilities.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Align/Utilities.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,217 @@
+# $Id: Utilities.pm,v 1.20.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Align::Utilities
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Align::Utilities - A collection of utilities regarding converting
+and manipulating alignment objects
+
+=head1 SYNOPSIS
+
+  use Bio::Align::Utilities qw(:all);
+  # %dnaseqs is a hash of CDS sequences (spliced)
+
+
+  # Even if the protein alignments are local make sure the start/end
+  # stored in the LocatableSeq objects are to the full length protein.
+  # The CoDing Sequence that is passed in should still be the full 
+  # length CDS as the nt alignment will be generated.
+  #
+  my $dna_aln = &aa_to_dna_aln($aa_aln,\%dnaseqs);
+
+
+  # generate bootstraps
+  my $replicates = &bootstrap_replicates($aln,$count);
+
+
+=head1 DESCRIPTION
+
+This module contains utility methods for manipulating sequence
+alignments ( L<Bio::Align::AlignI>) objects.
+
+The B<aa_to_dna_aln> utility is essentially the same as the B<mrtrans>
+program by Bill Pearson available at
+ftp://ftp.virginia.edu/pub/fasta/other/mrtrans.shar.  Of course this
+is a pure-perl implementation, but just to mention that if anything
+seems odd you can check the alignments generated against Bill's
+program.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+#' keep my emacs happy
+# Let the code begin...
+
+
+package Bio::Align::Utilities;
+use vars qw(@EXPORT @EXPORT_OK $GAP $CODONGAP %EXPORT_TAGS);
+use strict;
+use Carp;
+use Bio::Root::Version;
+require Exporter;
+
+use base qw(Exporter);
+
+ at EXPORT = qw();
+ at EXPORT_OK = qw(aa_to_dna_aln bootstrap_replicates);
+%EXPORT_TAGS = (all =>[@EXPORT, @EXPORT_OK]);
+BEGIN {
+    use constant CODONSIZE => 3;
+    $GAP = '-';
+    $CODONGAP = $GAP x CODONSIZE;
+}
+
+=head2 aa_to_dna_aln
+
+ Title   : aa_to_dna_aln
+ Usage   : my $dnaaln = aa_to_dna_aln($aa_aln, \%seqs);
+ Function: Will convert an AA alignment to DNA space given the 
+           corresponding DNA sequences.  Note that this method expects 
+           the DNA sequences to be in frame +1 (GFF frame 0) as it will
+           start to project into coordinates starting at the first base of 
+           the DNA sequence, if this alignment represents a different 
+           frame for the cDNA you will need to edit the DNA sequences
+           to remove the 1st or 2nd bases (and revcom if things should be).
+ Returns : Bio::Align::AlignI object 
+ Args    : 2 arguments, the alignment and a hashref.
+           Alignment is a Bio::Align::AlignI of amino acid sequences. 
+           The hash reference should have keys which are 
+           the display_ids for the aa 
+           sequences in the alignment and the values are a 
+           Bio::PrimarySeqI object for the corresponding 
+           spliced cDNA sequence. 
+
+See also: L<Bio::Align::AlignI>, L<Bio::SimpleAlign>, L<Bio::PrimarySeq>
+
+=cut
+
+sub aa_to_dna_aln {
+    my ($aln,$dnaseqs) = @_;
+    unless( defined $aln && 
+	    ref($aln) &&
+	    $aln->isa('Bio::Align::AlignI') ) { 
+	croak('Must provide a valid Bio::Align::AlignI object as the first argument to aa_to_dna_aln, see the documentation for proper usage and the method signature');
+    }
+    my $alnlen = $aln->length;
+    my $dnaalign = new Bio::SimpleAlign;
+    $aln->map_chars('\.',$GAP);
+
+    foreach my $seq ( $aln->each_seq ) {    
+	my $aa_seqstr = $seq->seq();
+	my $id = $seq->display_id;
+	my $dnaseq = $dnaseqs->{$id} || $aln->throw("cannot find ".
+						     $seq->display_id);
+	my $start_offset = ($seq->start - 1) * CODONSIZE;
+
+	$dnaseq = $dnaseq->seq();
+	my $dnalen = $dnaseqs->{$id}->length;
+	my $nt_seqstr;
+	my $j = 0;
+	for( my $i = 0; $i < $alnlen; $i++ ) {
+	    my $char = substr($aa_seqstr,$i + $start_offset,1);	    
+	    if ( $char eq $GAP || $j >= $dnalen )  { 
+		$nt_seqstr .= $CODONGAP;
+	    } else {
+		$nt_seqstr .= substr($dnaseq,$j,CODONSIZE);
+		$j += CODONSIZE;
+	    }
+	}
+	$nt_seqstr .= $GAP x (($alnlen * 3) - length($nt_seqstr));
+
+	my $newdna = new Bio::LocatableSeq(-display_id  => $id,
+					   -alphabet    => 'dna',
+					   -start       => $start_offset+1,
+					   -end         => ($seq->end * 
+							    CODONSIZE),
+					   -strand      => 1,
+					   -seq         => $nt_seqstr);    
+	$dnaalign->add_seq($newdna);
+    }
+    return $dnaalign;
+}
+
+=head2 bootstrap_replicates
+
+ Title   : bootstrap_replicates
+ Usage   : my $alns = &bootstrap_replicates($aln,100);
+ Function: Generate a pseudo-replicate of the data by randomly
+           sampling, with replacement, the columns from an alignment for
+           the non-parametric bootstrap.
+ Returns : Arrayref of L<Bio::SimpleAlign> objects
+ Args    : L<Bio::SimpleAlign> object
+           Number of replicates to generate
+
+=cut
+
+sub bootstrap_replicates {
+   my ($aln,$count) = @_;
+   $count ||= 1;
+   my $alen = $aln->length;
+   my (@seqs, at nm);
+   $aln->set_displayname_flat(1);
+   for my $s ( $aln->each_seq ) {
+       push @seqs, $s->seq();
+       push @nm, $s->id;
+   }
+   my (@alns,$i);
+   while( $count-- > 0 ) {
+       my @newseqs;
+       for($i =0; $i < $alen; $i++ ) {
+	   my $index = int(rand($alen));
+	   my $c = 0;
+	   for ( @seqs ) {
+	       $newseqs[$c++] .= substr($_,$index,1);
+	   }
+       }
+       my $newaln = Bio::SimpleAlign->new();
+       my $i = 0;
+       for my $s ( @newseqs ) {
+
+	   $newaln->add_seq( Bio::LocatableSeq->new
+			     (-start         => 1,
+			      -end           => $alen,
+			      -display_id    => $nm[$i++],
+			      -seq           => $s));
+       }
+       push @alns, $newaln;
+   }
+   return \@alns;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/bl2seq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/bl2seq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/bl2seq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# $Id: bl2seq.pm,v 1.23.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::bl2seq
+
+#   based on the Bio::SeqIO modules
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#   the Bio::Tools::BPlite modules by
+#   Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
+#   Lorenz Pollak (lorenz at ist.org, bioperl port)
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::bl2seq - bl2seq sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class, as in:
+
+    use Bio::AlignIO;
+
+    $in  = Bio::AlignIO->new(-file   => "inputfilename" ,
+                             -format => "bl2seq",
+                             -report_type => "blastn");
+    $aln = $in->next_aln();
+
+
+=head1 DESCRIPTION
+
+This object can create L<Bio::SimpleAlign> sequence alignment objects (of
+two sequences) from C<bl2seq> BLAST reports.
+
+A nice feature of this module is that - in combination with
+L<Bio::Tools::Run::StandAloneBlast.pm> or a remote BLAST - it can be used to
+align two sequences and make a L<Bio::SimpleAlign> object from them which
+can then be manipulated using any L<Bio::SimpleAlign> methods, eg:
+
+   # Get two sequences
+   $str = Bio::SeqIO->new(-file=>'t/amino.fa' , '-format' => 'Fasta', );
+   my $seq3 = $str->next_seq();
+   my $seq4 = $str->next_seq();
+
+   # Run bl2seq on them
+   $factory = Bio::Tools::StandAloneBlast->new('program' => 'blastp',
+                                               'outfile' => 'bl2seq.out');
+   my $bl2seq_report = $factory->bl2seq($seq3, $seq4);
+   # Note that report is a Bio::SearchIO object
+
+   # Use AlignIO.pm to create a SimpleAlign object from the bl2seq report
+   $str = Bio::AlignIO->new(-file=> 'bl2seq.out','-format' => 'bl2seq');
+   $aln = $str->next_aln();
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::bl2seq;
+use strict;
+
+use Bio::SearchIO;
+
+use base qw(Bio::AlignIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $alignio = Bio::SimpleAlign->new(-format => 'bl2seq',
+                                               -file   => 'filename',
+                                               -report_type => 'blastx');
+ Function: Get a L<Bio::SimpleAlign>
+ Returns : L<Bio::SimpleAlign> object
+ Args    : -report_type => report type (blastn,blastx,tblastx,tblastn,blastp)
+
+
+=cut
+
+
+sub new {
+    my ($class) = shift;
+    my $self = $class->SUPER::new(@_);
+    my ($rt) = $self->_rearrange([qw(REPORT_TYPE)], at _);
+    defined $rt && $self->report_type($rt);
+    return $self;
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object on success,
+           undef on error or end of file
+ Args    : none
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $aln =  Bio::SimpleAlign->new(-source => 'bl2seq');
+    $self->{'bl2seqobj'} = $self->{'bl2seqobj'} ||
+	Bio::SearchIO->new(-fh => $self->_fh,
+			   -format => 'blast');
+    my $bl2seqobj = $self->{'bl2seqobj'};
+    my $result = $self->{'_result'} || $bl2seqobj->next_result;
+    $self->{'result'} = undef, return unless defined $result;
+
+    my $hit = $self->{'_hit'} || $result->next_hit;
+    $self->{'_hit'} = undef, return unless defined $hit;
+
+    my $hsp  = $hit->next_hsp;
+    return unless defined $hsp;
+    return $hsp->get_aln;
+
+# much easier above, eh?
+#     my ($start,$end,$name,$seqname,$seq,$seqchar,$strand);
+#     $seqchar = $hsp->query_string;
+#     $start   = $hsp->query->start;
+#     $end     = $hsp->query->end;
+#      # Query name typically not present in bl2seq report
+#     $seqname = $hsp->query->seq_id || 'Query-sequence';
+#     $strand  = $hsp->query->strand;
+
+#     #    unless ($seqchar && $start && $end  && $seqname) {return 0} ;
+#     unless ($seqchar && $start && $end ) {return 0} ;
+
+#     $seq = new Bio::LocatableSeq('-seq'   =>$seqchar,
+# 				 '-id'    =>$seqname,
+# 				 '-start' =>$start,
+# 				 '-end'   =>$end,
+# 				 '-strand'=>$strand,
+# 				 );
+
+#     $aln->add_seq($seq);
+
+#     $seqchar  = $hsp->hit_string;
+#     $start    = $hsp->hit->start;
+#     $end      = $hsp->hit->end;
+#     $seqname  = $hsp->hit->seq_id;
+#     $strand   = $hsp->hit->strand;
+
+#     unless ($seqchar && $start && $end  && $seqname) {return 0} ;
+
+#     $seq = new Bio::LocatableSeq('-seq'   =>$seqchar,
+# 				 '-id'    =>$seqname,
+# 				 '-start' =>$start,
+# 				 '-end'   =>$end,
+# 				 '-strand'=>$strand,
+# 				 );
+#     $aln->add_seq($seq);
+#     return $aln;
+}
+
+
+=head2 write_aln (NOT IMPLEMENTED)
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in bl2seq format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 report_type
+
+ Title   : report_type
+ Usage   : $obj->report_type($newval)
+ Function: Sets the report type (blastn, blastp...)
+ Returns : value of report_type (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub report_type{
+    my $self = shift;
+    return $self->{'report_type'} = shift if @_;
+    return $self->{'report_type'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/clustalw.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/clustalw.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/clustalw.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,345 @@
+# $Id: clustalw.pm,v 1.37.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::clustalw
+#
+#   based on the Bio::SeqIO modules
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#       and the Bio::SimpleAlign module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# History
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::clustalw - clustalw sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::AlignIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Align::AlignI objects to and from clustalw
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::clustalw;
+use vars qw($LINELENGTH $CLUSTALPRINTVERSION);
+use strict;
+
+
+$LINELENGTH          = 60;
+$CLUSTALPRINTVERSION = '1.81';
+use base qw(Bio::AlignIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $alignio = new Bio::AlignIO(-format => 'clustalw',
+                       -file => 'filename');
+ Function: returns a new Bio::AlignIO object to handle clustalw files
+ Returns : Bio::AlignIO::clustalw object
+ Args    : -verbose => verbosity setting (-1, 0, 1, 2)
+           -file    => name of file to read in or to write, with ">"
+           -fh      => alternative to -file param - provide a filehandle
+                       to read from or write to
+           -format  => alignment format to process or produce
+           -percentages => display a percentage of identity
+                           in each line of the alignment (clustalw only)
+           -linelength=> alignment output line length (default 60)
+
+=cut
+
+sub _initialize {
+    my ( $self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    my ( $percentages, $ll ) =
+      $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
+    defined $percentages && $self->percentages($percentages);
+    $self->line_length( $ll || $LINELENGTH );
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream
+ Returns : Bio::Align::AlignI object
+ Args    : NONE
+
+See L<Bio::Align::AlignI> for details
+
+=cut
+
+sub next_aln {
+    my ($self) = @_;
+    my $first_line;
+
+    while ( $first_line = $self->_readline ) {
+        last if $first_line !~ /^$/;
+    }
+    $self->_pushback($first_line);
+    if ( defined( $first_line = $self->_readline )
+        && $first_line !~ /CLUSTAL/ )
+    {
+        $self->throw(
+            "trying to parse a file which does not start with a CLUSTAL header"
+        );
+    }
+    my %alignments;
+    my $aln = Bio::SimpleAlign->new(
+        -source  => 'clustalw',
+        -verbose => $self->verbose
+    );
+    my $order = 0;
+    my %order;
+    $self->{_lastline} = '';
+    my ($first_block, $seen_block) = (0,0);
+    while ( defined( $_ = $self->_readline ) ) {
+        next if (/^\s+$/ && !$first_block);
+        if (/^\s$/) {  # line contains no description
+            $seen_block = 1;
+            next;
+        }
+        $first_block = 1;
+        # break the loop if we come to the end of the current alignment
+        # and push back the CLUSTAL header
+        if (/CLUSTAL/) {
+            $self->_pushback($_);
+            last;
+        }
+
+        my ( $seqname, $aln_line ) = ( '', '' );
+        if (/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
+
+            # clustal 1.4 format
+            ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
+
+            # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
+        }
+        elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) {    # with numbers
+            ( $seqname, $aln_line ) = ( $1, $2 );
+            if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
+                $self->{_lastline} = $_;
+                next;
+            }
+        }
+        else {
+            $self->{_lastline} = $_;
+            next;
+        }
+
+        if ( !$seen_block ) {
+            if (exists $order{$seqname}) {
+                $self->warn("Duplicate sequence : $seqname\n".
+                            "Can't guarantee alignment quality");
+            }
+            else {
+                $order{$seqname} = $order++;
+            }
+        }
+
+        $alignments{$seqname} .= $aln_line;
+    }
+
+    my ( $sname, $start, $end );
+    foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
+        if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
+            ( $sname, $start, $end ) = ( $1, $2, $3 );
+        }
+        else {
+            ( $sname, $start ) = ( $name, 1 );
+            my $str = $alignments{$name};
+            $str =~ s/[^A-Za-z]//g;
+            $end = length($str);
+        }
+        my $seq = new Bio::LocatableSeq(
+            -seq   => $alignments{$name},
+            -id    => $sname,
+            -start => $start,
+            -end   => $end
+        );
+        $aln->add_seq($seq);
+    }
+
+    # not sure if this should be a default option - or we can pass in
+    # an option to do this in the future? --jason stajich
+    # $aln->map_chars('\.','-');
+    undef $aln if ( !defined $end || $end <= 0 );
+    return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the clustalw-format object (.aln) into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Align::AlignI object
+
+=cut
+
+sub write_aln {
+    my ( $self, @aln ) = @_;
+    my ( $count, $length, $seq, @seq, $tempcount, $line_len );
+    $line_len = $self->line_length || $LINELENGTH;
+    foreach my $aln (@aln) {
+        if ( !$aln || !$aln->isa('Bio::Align::AlignI') ) {
+            $self->warn(
+"Must provide a Bio::Align::AlignI object when calling write_aln"
+            );
+            next;
+        }
+        my $matchline = $aln->match_line;
+        if ( $self->force_displayname_flat ) {
+            $aln->set_displayname_flat(1);
+        }
+        $self->_print(
+            sprintf( "CLUSTAL W(%s) multiple sequence alignment\n\n\n",
+                $CLUSTALPRINTVERSION )
+        ) or return;
+        $length = $aln->length();
+        $count  = $tempcount = 0;
+        @seq    = $aln->each_seq();
+        my $max = 22;
+        foreach $seq (@seq) {
+            $max = length( $aln->displayname( $seq->get_nse() ) )
+              if ( length( $aln->displayname( $seq->get_nse() ) ) > $max );
+        }
+
+        while ( $count < $length ) {
+            my ( $linesubstr, $first ) = ( '', 1 );
+            foreach $seq (@seq) {
+
+              #
+              #  Following lines are to suppress warnings
+              #  if some sequences in the alignment are much longer than others.
+
+                my ($substring);
+                my $seqchars = $seq->seq();
+              SWITCH: {
+                    if ( length($seqchars) >= ( $count + $line_len ) ) {
+                        $substring = substr( $seqchars, $count, $line_len );
+                        if ($first) {
+                            $linesubstr =
+                              substr( $matchline, $count, $line_len );
+                            $first = 0;
+                        }
+                        last SWITCH;
+                    }
+                    elsif ( length($seqchars) >= $count ) {
+                        $substring = substr( $seqchars, $count );
+                        if ($first) {
+                            $linesubstr = substr( $matchline, $count );
+                            $first = 0;
+                        }
+                        last SWITCH;
+                    }
+                    $substring = "";
+                }
+                $self->_print(
+                    sprintf(
+                        "%-" . $max . "s %s\n",
+                        $aln->displayname( $seq->get_nse() ), $substring
+                    )
+                ) or return;
+            }
+
+            my $percentages = '';
+            if ( $self->percentages ) {
+                my ($strcpy) = ($linesubstr);
+                my $count = ( $strcpy =~ tr/\*// );
+                $percentages =
+                  sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
+            }
+            $self->_print(
+                sprintf(
+                    "%-" . $max . "s %s%s\n",
+                    '', $linesubstr, $percentages
+                )
+            );
+            $self->_print( sprintf("\n\n") ) or return;
+            $count += $line_len;
+        }
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 percentages
+
+ Title   : percentages
+ Usage   : $obj->percentages($newval)
+ Function: Set the percentages flag - whether or not to show percentages in
+           each output line
+ Returns : value of percentages
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub percentages {
+    my ( $self, $value ) = @_;
+    if ( defined $value ) {
+        $self->{'_percentages'} = $value;
+    }
+    return $self->{'_percentages'};
+}
+
+=head2 line_length
+
+ Title   : line_length
+ Usage   : $obj->line_length($newval)
+ Function: Set the alignment output line length
+ Returns : value of line_length
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub line_length {
+    my ( $self, $value ) = @_;
+    if ( defined $value ) {
+        $self->{'_line_length'} = $value;
+    }
+    return $self->{'_line_length'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/emboss.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/emboss.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/emboss.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,244 @@
+# $Id: emboss.pm,v 1.17.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::emboss
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::emboss - Parse EMBOSS alignment output (from applications water and needle)
+
+=head1 SYNOPSIS
+
+    # do not use the object directly
+    use Bio::AlignIO;
+    # read in an alignment from the EMBOSS program water
+    my $in = new Bio::AlignIO(-format => 'emboss',
+                              -file   => 'seq.water');
+    while( my $aln = $in->next_aln ) {
+    # do something with the alignment
+    }
+
+=head1 DESCRIPTION
+
+This object handles parsing and writing pairwise sequence alignments
+from the EMBOSS suite.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::AlignIO::emboss;
+use vars qw($EMBOSSTitleLen $EMBOSSLineLen);
+use strict;
+
+use Bio::LocatableSeq;
+
+use base qw(Bio::AlignIO);
+
+BEGIN {
+    $EMBOSSTitleLen    = 13;
+    $EMBOSSLineLen     = 50;
+}
+
+sub _initialize {
+    my($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    $self->{'_type'} = undef;
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
+	    or on error
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my ($self) = @_;
+    my $seenbegin = 0;
+    my %data = ( 'seq1' => {
+		     'start'=> undef,
+		     'end'=> undef,
+		     'name' => '',
+		     'data' => '' },
+		 'seq2' => {
+		     'start'=> undef,
+		     'end'=> undef,
+		     'name' => '',
+		     'data' => '' },
+		 'align' => '',
+		 'type'  => $self->{'_type'},  # to restore type from
+		                                     # previous aln if possible
+		 );
+    my %names;
+    while( defined($_ = $self->_readline) ) {
+	next if( /^\#?\s+$/ || /^\#+\s*$/ );
+	if( /^\#(\=|\-)+\s*$/) {
+	    last if( $seenbegin);
+	} elsif( /(Local|Global):\s*(\S+)\s+vs\s+(\S+)/ ||
+		 /^\#\s+Program:\s+(\S+)/ )
+	{
+	    my ($name1,$name2) = ($2,$3);
+	    if( ! defined $name1 ) { # Handle EMBOSS 2.2.X
+		$data{'type'} = $1;
+		$name1 = $name2 = '';
+	    } else {
+		$data{'type'} = $1 eq 'Local' ? 'water' : 'needle';
+	    }
+	    $data{'seq1'}->{'name'} = $name1;
+	    $data{'seq2'}->{'name'} = $name2;
+
+	    $self->{'_type'} = $data{'type'};
+
+	} elsif( /Score:\s+(\S+)/ ) {
+	    $data{'score'} = $1;
+	} elsif( /^\#\s+(1|2):\s+(\S+)/ && !  $data{"seq$1"}->{'name'} ) {
+	    my $nm = $2;
+	    $nm = substr($nm,0,$EMBOSSTitleLen); # emboss has a max seq length
+	    if( $names{$nm} ) {
+		$nm .= "-". $names{$nm};
+	    }
+	    $names{$nm}++;
+	    $data{"seq$1"}->{'name'} = $nm;
+	} elsif( $data{'seq1'}->{'name'} &&
+		 /^\Q$data{'seq1'}->{'name'}/ ) {
+	    my $count = 0;
+	    $seenbegin = 1;
+	    my @current;
+	    while( defined ($_) ) {
+		my $align_other = '';
+		my $delayed;
+		if($count == 0 || $count == 2 ) {
+		    my @l = split;
+		    my ($seq,$align,$start,$end);
+		    if( $count == 2 && $data{'seq2'}->{'name'} eq '' ) {
+			# weird boundary condition
+			($start,$align,$end) = @l;
+		    } elsif( @l == 3 ) {
+			$align = '';
+			($seq,$start,$end) = @l
+		    } else {
+			($seq,$start,$align,$end) = @l;
+ 		    }
+
+		    my $seqname = sprintf("seq%d", ($count == 0) ? '1' : '2');
+		    $data{$seqname}->{'data'} .= $align;
+		    $data{$seqname}->{'start'} ||= $start;
+		    $data{$seqname}->{'end'} = $end;
+		    $current[$count] = [ $start,$align || ''];
+		} else {
+		    s/^\s+//;
+		    s/\s+$//;
+		    $data{'align'} .= $_;
+		}
+
+	      BOTTOM:
+		last if( $count++ == 2);
+		$_ = $self->_readline();
+	    }
+
+	    if( $data{'type'} eq 'needle' ) {
+		# which ever one is shorter we want to bring it up to
+		# length.  Man this stinks.
+		my ($s1,$s2) =  ($data{'seq1'}, $data{'seq2'});
+
+		my $d = length($current[0]->[1]) - length($current[2]->[1]);
+		if( $d < 0 ) { # s1 is smaller, need to add some
+		    # compare the starting points for this alignment line
+		    if( $current[0]->[0] <= 1 ) {
+			$s1->{'data'} = ('-' x abs($d)) . $s1->{'data'};
+			$data{'align'} = (' 'x abs($d)).$data{'align'};
+		    } else {
+			$s1->{'data'} .= '-' x abs($d);
+			$data{'align'} .= ' 'x abs($d);
+		    }
+		} elsif( $d > 0) { # s2 is smaller, need to add some
+		    if( $current[2]->[0] <= 1 ) {
+			$s2->{'data'} = ('-' x abs($d)) . $s2->{'data'};
+			$data{'align'} = (' 'x abs($d)).$data{'align'};
+		    } else {
+			$s2->{'data'} .= '-' x abs($d);
+			$data{'align'} .= ' 'x abs($d);
+		    }
+		}
+	    }
+
+	}
+    }
+    return unless $seenbegin;
+    my $aln =  Bio::SimpleAlign->new(-verbose => $self->verbose(),
+				     -score   => $data{'score'},
+				     -source => "EMBOSS-".$data{'type'});
+
+    foreach my $seqname ( qw(seq1 seq2) ) {
+	return unless ( defined $data{$seqname} );
+	$data{$seqname}->{'name'} ||= $seqname;
+	my $seq = new Bio::LocatableSeq('-seq' => $data{$seqname}->{'data'},
+					'-id'  => $data{$seqname}->{'name'},
+					'-start'=> $data{$seqname}->{'start'},
+					'-end' => $data{$seqname}->{'end'},
+					);
+	$aln->add_seq($seq);
+    }
+    return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in emboss format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+
+    $self->throw("Sorry: writing emboss output is not currently available! \n");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,242 @@
+# $Id: fasta.pm,v 1.27.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::fasta
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> 
+class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::SimpleAlign> objects to and from
+fasta flat file databases.  This is for the fasta alignment format, not
+for the FastA sequence analysis program.  To process the alignments from
+FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Peter Schattner
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::fasta;
+use vars qw($WIDTH);
+use strict;
+
+
+use base qw(Bio::AlignIO);
+$WIDTH = 60;
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln
+ Function: returns the next alignment in the stream.
+ Returns : Bio::Align::AlignI object - returns 0 on end of file
+	        or on error
+ Args    : -width => optional argument to specify the width sequence
+           will be written (60 chars by default)
+
+See L<Bio::Align::AlignI>
+
+=cut
+
+sub next_aln {
+	my $self = shift;
+	my ($width) = $self->_rearrange([qw(WIDTH)], at _);
+	$self->width($width || $WIDTH);
+
+	my ($start, $end, $name, $seqname, $seq, $seqchar, $entry, 
+		 $tempname, $tempdesc, %align, $desc, $maxlen);
+	my $aln = Bio::SimpleAlign->new();
+
+	while (defined ($entry = $self->_readline) ) {
+		chomp $entry;
+		if ( $entry =~ s/^>\s*(\S+)\s*// ) {
+			$tempname  = $1;
+			chomp($entry);
+			$tempdesc = $entry;
+			if ( defined $name ) {
+				# put away last name and sequence
+				if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+					$seqname = $1;
+					$start = $2;
+					$end = $3;
+				} else {
+					$seqname = $name;
+					$start = 1;
+					$end = $self->_get_len($seqchar);
+				}
+				$seq = new Bio::LocatableSeq(
+						  -seq         => $seqchar,
+					     -display_id  => $seqname,
+					     -description => $desc,
+					     -start       => $start,
+					     -end         => $end,
+					     );
+				$aln->add_seq($seq);
+				$self->debug("Reading $seqname\n");
+			}
+			$desc = $tempdesc;	
+			$name = $tempname;
+			$desc = $entry;
+			$seqchar  = "";
+			next;
+		}
+		# removed redundant symbol validation
+		# this is already done in Bio::PrimarySeq
+		$seqchar .= $entry;
+	}
+
+	#  Next two lines are to silence warnings that
+	#  otherwise occur at EOF when using <$fh>
+	$name = "" if (!defined $name);
+	$seqchar="" if (!defined $seqchar);
+
+	#  Put away last name and sequence
+	if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+		$seqname = $1;
+		$start = $2;
+		$end = $3;
+	} else {
+		$seqname = $name;
+		$start = 1;
+		$end = $self->_get_len($seqchar);
+	}
+
+	#  If $end <= 0, we have either reached the end of
+	#  file in <> or we have encountered some other error
+	if ( $end <= 0 ) { 
+		undef $aln; 
+		return $aln;
+	}
+
+	# This logic now also reads empty lines at the 
+	# end of the file. Skip this is seqchar and seqname is null
+	unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
+		$seq = new Bio::LocatableSeq(-seq         => $seqchar,
+											  -display_id  => $seqname,
+											  -description => $desc,
+											  -start       => $start,
+											  -end         => $end,
+											 );
+		$aln->add_seq($seq);
+		$self->debug("Reading $seqname\n");
+	}
+	my $alnlen = $aln->length;
+	foreach my $seq ( $aln->each_seq ) {
+		if ( $seq->length < $alnlen ) {
+			my ($diff) = ($alnlen - $seq->length);
+			$seq->seq( $seq->seq() . "-" x $diff);
+		}
+	}
+	return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in fasta format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+See L<Bio::Align::AlignI>
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my $width = $self->width;
+    my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) { 
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	if( $self->force_displayname_flat ) {
+	    $aln->set_displayname_flat(1);
+	}
+	foreach $rseq ( $aln->each_seq() ) {
+	    $name = $aln->displayname($rseq->get_nse());
+	    $seq  = $rseq->seq();
+	    $desc = $rseq->description || '';
+	    $self->_print (">$name $desc\n") or return ;	
+	    $count = 0;
+	    $length = length($seq);
+	    if(defined $seq && $length > 0) {
+		$seq =~ s/(.{1,$width})/$1\n/g;
+	    } else {
+		$seq = "\n";
+	    }
+	    $self->_print($seq);
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 _get_len
+
+ Title   : _get_len
+ Usage   : 
+ Function: determine number of alphabetic chars
+ Returns : integer
+ Args    : sequence string
+
+=cut
+
+sub _get_len {
+	my ($self,$seq) = @_;
+	$seq =~ s/[^A-Z]//gi;
+	return CORE::length($seq);
+}
+
+=head2 width
+
+ Title   : width
+ Usage   : $obj->width($newwidth)
+           $width = $obj->width;
+ Function: Get/set width of alignment
+ Returns : integer value of width 
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub width{
+    my $self = shift;
+
+    return $self->{'_width'} = shift if @_;
+    return $self->{'_width'} || $WIDTH;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/largemultifasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/largemultifasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/largemultifasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,191 @@
+#
+# BioPerl module for Bio::AlignIO::largemultifasta
+
+#   based on the Bio::SeqIO::largefasta module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Albert Vilella
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# January 20, 2004
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::largemultifasta - Largemultifasta MSA Sequence
+input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::SimpleAlign> objects to and from
+largemultifasta flat file databases.  This is for the fasta sequence
+format NOT FastA analysis program.  To process the pairwise alignments
+from a FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO
+module.
+
+Reimplementation of Bio::AlignIO::fasta modules so that creates
+temporary files instead of keeping the whole sequences in memory.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Albert Vilella, Heikki Lehvaslaiho
+
+Email: avilella-at-gmail-dot-com, heikki-at-bioperl-dot-org
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::largemultifasta;
+use strict;
+
+use Bio::Seq::LargeLocatableSeq;
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::AlignIO Bio::SeqIO Bio::SimpleAlign);
+
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq::LargeLocatableSeq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream while taking care
+           of the length
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+    my ($self) = @_;
+    my $largeseq = $self->sequence_factory->create();
+    my ($id,$fulldesc,$entry);
+    my $count = 0;
+    my $seen = 0;
+    while( defined ($entry = $self->_readline) ) {
+	if( $seen == 1 && $entry =~ /^\s*>/ ) {
+	    $self->_pushback($entry);
+	    return $largeseq;
+	}
+	if ( ($entry eq '>')  ) { $seen = 1; next; }
+	elsif( $entry =~ /\s*>(.+?)$/ ) {
+	    $seen = 1;
+	    ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
+		or $self->warn("Can't parse fasta header");
+	    $largeseq->display_id($id);
+	    $largeseq->primary_id($id);
+	    $largeseq->desc($fulldesc);
+	} else {
+	    $entry =~ s/\s+//g;
+	    $largeseq->add_sequence_as_string($entry);
+	}
+	(++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
+    }
+    if( ! $seen ) { return; }
+    return $largeseq;
+}
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
+	    or on error
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $largeseq;
+    my $aln =  Bio::SimpleAlign->new();
+    while (defined ($largeseq = $self->next_seq) ) {
+        $aln->add_seq($largeseq);
+        $self->debug("sequence readed\n");
+    }
+
+    my $alnlen = $aln->length;
+    foreach my $largeseq ( $aln->each_seq ) {
+	if( $largeseq->length < $alnlen ) {
+	    my ($diff) = ($alnlen - $largeseq->length);
+	    $largeseq->seq("-" x $diff);
+	}
+    }
+
+    return $aln;
+
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in largemultifasta format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	foreach $rseq ( $aln->each_seq() ) {
+	    $name = $aln->displayname($rseq->get_nse());
+	    $seq  = $rseq->seq();
+	    $desc = $rseq->description || '';
+	    $self->_print (">$name $desc\n") or return ;
+	    $count =0;
+	    $length = length($seq);
+	    while( ($count * 60 ) < $length ) {
+		$seqsub = substr($seq,$count*60,60);
+		$self->_print ("$seqsub\n") or return ;
+		$count++;
+	    }
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/maf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/maf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/maf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+# $Id: maf.pm,v 1.10.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::maf
+#
+# Copyright Allen Day
+#
+
+=head1 NAME
+
+Bio::AlignIO::maf - Multipla Alignment Format sequence input stream
+
+=head1 SYNOPSIS
+
+ Do not use this module directly.  Use it via the Bio::AlignIO class.
+
+ use Bio::AlignIO;
+
+ my $alignio = Bio::AlignIO->new(-fh => \*STDIN, -format => 'maf');
+
+ while(my $aln = $alignio->next_aln()){
+   my $match_line = $aln->match_line;
+
+   print $aln, "\n";
+
+   print $aln->length, "\n";
+   print $aln->no_residues, "\n";
+   print $aln->is_flush, "\n";
+   print $aln->no_sequences, "\n";
+
+   $aln->splice_by_seq_pos(1);
+
+   print $aln->consensus_string(60), "\n";
+   print $aln->get_seq_by_pos(1)->seq, "\n";
+   print $aln->match_line(), "\n";
+
+   print "\n";
+ }
+
+=head1 DESCRIPTION
+
+This class constructs Bio::SimpleAlign objects from an MAF-format
+multiple alignment file.
+
+Writing in MAF format is currently unimplemented.
+
+Spec of MAF format is here:
+  http://hgwdev-sugnet.cse.ucsc.edu/cgi-bin/hgGateway?org=human
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Allen Day
+
+Email: allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::maf;
+use vars qw($seen_header);
+use strict;
+
+use Bio::SimpleAlign;
+
+$seen_header = 0;
+
+use base qw(Bio::AlignIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $alignio = new Bio::AlignIO(-format => 'maf'
+					  -file   => '>file',
+					  -idlength => 10,
+					  -idlinebreak => 1);
+ Function: Initialize a new L<Bio::AlignIO::maf> reader
+ Returns : L<Bio::AlignIO> object
+ Args    :
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+
+  1;
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+           Throws an exception if trying to read in PHYLIP
+           sequential format.
+ Returns : L<Bio::SimpleAlign> object
+ Args    : 
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+
+    if(!$seen_header){
+	my $line = $self->_readline;
+	$self->throw("This doesn't look like a MAF file.  First line should start with ##maf, but it was: ".$line)
+	    unless $line =~ /^##maf/;
+	$seen_header = 1;
+    }
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'maf');
+
+    my($aline, @slines);
+    while(my $line = $self->_readline()){
+	$aline = $line if $line =~ /^a/;
+	push @slines, $line if $line =~ /^s /;
+	last if $line !~ /\S/;
+
+    }
+
+    return unless $aline;
+
+    my($kvs) = $aline =~ /^a\s+(.+)$/;
+    my @kvs  = split /\s+/, $kvs if $kvs;
+    my %kv;
+    foreach my $kv (@kvs){
+	my($k,$v) = $kv =~ /(.+)=(.+)/;
+	$kv{$k} = $v;
+    }
+
+    $aln->score($kv{score});
+
+    foreach my $sline (@slines){
+	my($s,$src,$start,$size,$strand,$srcsize,$text) =
+	    split /\s+/, $sline;
+	# adjust coordinates to be one-based inclusive
+        $start = $start + 1;
+	my $seq = new Bio::LocatableSeq('-seq'    => $text,
+					'-id'     => $src,
+					'-start'  => $start,
+					'-end'    => $start + $size - 1,
+					'-strand' => $strand,
+					);
+	$aln->add_seq($seq);
+    }
+
+    return $aln;
+}
+
+sub write_aln {
+  shift->throw_not_implemented
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/maf.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+# $Id: mase.pm,v 1.11.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::mase
+
+#   based on the Bio::SeqIO::mase module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::mase - mase sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from mase flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::mase;
+use strict;
+
+
+use base qw(Bio::AlignIO);
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my $name;
+    my $start;
+    my $end;
+    my $seq;
+    my $add;
+    my $count = 0;
+    my $seq_residues;
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'mase');
+
+
+    while( $entry = $self->_readline) {
+        $entry =~ /^;/ && next;
+	if(  $entry =~ /^(\S+)\/(\d+)-(\d+)/ ) {
+	    $name = $1;
+	    $start = $2;
+	    $end = $3;
+	} else {
+	    $entry =~ s/\s//g;
+	    $name = $entry;
+	    $end = -1;
+	}
+
+	$seq = "";
+
+	while( $entry = $self->_readline) {
+	    $entry =~ /^;/ && last;
+	    $entry =~ s/[^A-Za-z\.\-]//g;
+	    $seq .= $entry;
+	}
+	if( $end == -1) {
+	    $start = 1;
+
+	    $seq_residues = $seq;
+	    $seq_residues =~ s/\W//g;
+	    $end = length($seq_residues);
+	}
+
+	$add = new Bio::LocatableSeq('-seq'=>$seq,
+			    '-id'=>$name,
+			    '-start'=>$start,
+			    '-end'=>$end,
+			    );
+
+
+       $aln->add_seq($add);
+
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+   if ($end <= 0) { undef $aln;}
+
+   }
+
+   return $aln;
+}
+
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in mase format  ###Not yet implemented!###
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mega.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mega.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/mega.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,221 @@
+# $Id: mega.pm,v 1.11.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::mega
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::mega - Parse and Create MEGA format data files
+
+=head1 SYNOPSIS
+
+    use Bio::AlignIO;
+    my $alignio = new Bio::AlignIO(-format => 'mega',
+                                   -file   => 't/data/hemoglobinA.meg');
+
+    while( my $aln = $alignio->next_aln ) {
+    # process each alignment or convert to another format like NEXUS
+    }
+
+=head1 DESCRIPTION
+
+This object handles reading and writing data streams in the MEGA
+format (Kumar and Nei).
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::AlignIO::mega;
+use vars qw($MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
+use strict;
+
+use Bio::SimpleAlign;
+use Bio::LocatableSeq;
+
+BEGIN {
+  $MEGANAMELEN = 10;
+  $LINELEN = 60;
+  $BLOCKLEN = 10;
+  %VALID_TYPES =  map {$_, 1} qw( dna rna protein standard);
+}
+use base qw(Bio::AlignIO);
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+           Supports the following MEGA format features:
+           - The file has to start with '#mega'
+           - Reads in the name of the alignment from a comment
+             (anything after '!TITLE: ') .
+           - Reads in the format parameters datatype
+
+ Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
+	    or on error
+ Args    : NONE
+
+
+=cut
+
+sub next_aln{
+   my ($self) = @_;
+   my $entry;
+   my ($alphabet,%seqs);
+
+   my $aln = Bio::SimpleAlign->new(-source => 'mega');
+
+   while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
+
+   $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
+       unless $entry =~ /^#mega/i;
+
+   while( defined($entry = $self->_readline() ) ) {
+       local($_) = $entry;
+       if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
+       elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
+	   my (@fields) = split(/\s+/,$1);
+	   foreach my $f ( @fields ) {
+	       my ($name,$value) = split(/\=/,$f);
+	       if( $name eq 'datatype' ) {
+		   $alphabet = $value;
+	       } elsif( $name eq 'identical' ) {
+		   $aln->match_char($value);
+	       } elsif( $name eq 'indel' ) {
+		   $aln->gap_char($value);
+	       }
+	   }
+       } elsif( /^\#/ ) {
+	   last;
+       }
+   }
+   my @order;
+   while( defined($entry) ) {
+       if( $entry !~ /^\s+$/ ) {
+	   # this is to skip the leading '#'
+	   my $seqname = substr($entry,1,$MEGANAMELEN-1);
+	   $seqname =~ s/(\S+)\s+$/$1/g;
+	   my $line = substr($entry,$MEGANAMELEN);
+	   $line =~ s/\s+//g;
+	   if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
+	   $seqs{$seqname} .= $line;
+       }
+       $entry = $self->_readline();
+   }
+
+   foreach my $seqname ( @order ) {
+       my $s = $seqs{$seqname};
+       $s =~ s/\-//g;
+       my $end = length($s);
+       my $seq = new Bio::LocatableSeq(-alphabet => $alphabet,
+				       -id => $seqname,
+				       -seq => $seqs{$seqname},
+				       -start => 1,
+				       -end   => $end);
+
+       $aln->add_seq($seq);
+   }
+   $aln->unmatch;
+   return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in MEGA format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln{
+   my ($self, at aln) = @_;
+   my $count = 0;
+   my $wrapped = 0;
+   my $maxname;
+
+   foreach my $aln ( @aln ) {
+       if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+	   $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	   return 0;
+       } elsif( ! $aln->is_flush($self->verbose) ) {
+	   $self->warn("All Sequences in the alignment must be the same length");
+	   return 0;
+       }
+       $aln->match();
+       my $len = $aln->length();
+       my $format = sprintf('datatype=%s identical=%s indel=%s;',
+			    $aln->get_seq_by_pos(1)->alphabet(),
+			    $aln->match_char, $aln->gap_char);
+
+       $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
+			     $aln->id, $format));
+
+       my ($count, $blockcount,$length) = ( 0,0,$aln->length());
+       $aln->set_displayname_flat();
+       while( $count < $length ) {
+	   foreach my $seq ( $aln->each_seq ) {
+	       my $seqchars = $seq->seq();
+	       $blockcount = 0;
+	       my $substring = substr($seqchars, $count, $LINELEN);
+	       my @blocks;
+	       while( $blockcount < length($substring) ) {
+		   push @blocks, substr($substring, $blockcount,$BLOCKLEN);
+		   $blockcount += $BLOCKLEN;
+	       }
+	       $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
+				     substr($aln->displayname($seq->get_nse()),
+					    0,$MEGANAMELEN-2),
+				     join(' ', @blocks)));
+	   }
+	   $self->_print("\n");
+	   $count += $LINELEN;
+       }
+   }
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/meme.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/meme.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/meme.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,193 @@
+# $Id: meme.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+#  BioPerl module for Bio::AlignIO::meme
+#   Based on the Bio::SeqIO modules
+#  by Ewan Birney <birney at ebi.ac.uk>
+#  and Lincoln Stein  <lstein at cshl.org>
+#  and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Benjamin Berman
+#
+# You may distribute this module under the same terms as perl itself
+
+=head1 NAME
+
+Bio::AlignIO::meme - meme sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::AlignIO class.
+
+  use Bio::AlignIO;
+  # read in an alignment from meme
+  my $in = Bio::AlignIO->new(-format => 'meme',
+                             -file   => 'meme.out');
+  while( my $aln = $in->next_aln ) {
+     # do something with the alignment
+  }
+
+=head1 DESCRIPTION
+
+This object transforms the "sites sorted by position p-value" sections
+of a meme (text) output file into a series of Bio::SimpleAlign
+objects.  Each SimpleAlign object contains Bio::LocatableSeq
+objects which represent the individual aligned sites as defined by
+the central portion of the "site" field in the meme file.  The start
+and end coordinates are derived from the "Start" field. See
+L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
+
+This module can only parse MEME version 3.0 and greater.  Previous
+versions have output formats that are more difficult to parse
+correctly.  If the meme output file is not version 3.0 or greater,
+we signal an error.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Benjamin Berman
+
+ Bbased on the Bio::SeqIO modules by Ewan Birney and others
+ Email: benb at fruitfly.berkeley.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with an
+underscore.
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::meme;
+use strict;
+use Bio::LocatableSeq;
+
+use base qw(Bio::AlignIO);
+
+# Constants
+my $MEME_VERS_ERR =
+"MEME output file must be generated by version 3.0 or higher";
+my $MEME_NO_HEADER_ERR =
+"MEME output file contains no header line (ex: MEME version 3.0)";
+my $HTML_VERS_ERR =
+"MEME output file must be generated with the -text option";
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream
+ Returns : Bio::SimpleAlign object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+	my ($self) = @_;
+	my $aln = Bio::SimpleAlign->new(-source => 'meme');
+	my $line;
+	my $good_align_sec = 0;
+	my $in_align_sec = 0;
+	while (!$good_align_sec && defined($line = $self->_readline())) {
+		if (!$in_align_sec) {
+			# Check for the meme header
+			if ($line =~ /^\s*MEME\s+version\s+(\S+)/ ){
+				$self->{'meme_vers'} = $1;
+                my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
+				$self->throw($MEME_VERS_ERR) unless ($vers >= 3);
+				$self->{'seen_header'} = 1;
+	      }
+			# Check if they've output the HTML version
+			if ($line =~ /\<TITLE\>/i){
+				$self->throw($HTML_VERS_ERR);
+	      }
+			# Check if we're going into an alignment section
+			if ($line =~ /sites sorted by position/) {
+				$self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
+				$in_align_sec = 1;
+			}
+		} elsif ($line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
+                       (\S+)\s+([.ACTGX\-]*)\s+([ACTGX\-]+)\s+
+                       ([.ACTGX\-]*)/xi ) {
+			# Got a sequence line
+			my $seq_name = $1;
+			my $strand = ($2 eq '-') ? -1 : 1;
+			my $start_pos = $3;
+			my $central = uc($6);
+
+			# my $p_val = $4;
+			# my $left_flank = uc($5);
+			# my $right_flank = uc($7);
+
+			# Info about the flanking sequence
+			# my $start_len = ($strand > 0) ? length($left_flank) :
+			# length($right_flank);
+			# my $end_len = ($strand > 0) ? length($right_flank) :
+			# length($left_flank);
+
+			# Make the sequence.  Meme gives the start coordinate at the left
+			# hand side of the motif relative to the INPUT sequence.
+			my $end_pos = $start_pos + length($central) - 1;
+			my $seq = new Bio::LocatableSeq(-seq    => $central,
+													  -id     => $seq_name,
+													  -start  => $start_pos,
+													  -end    => $end_pos,
+													  -strand => $strand
+													 );
+			# Add the sequence motif to the alignment
+			$aln->add_seq($seq);
+		} elsif (($line =~ /^\-/) || ($line =~ /Sequence name/)){
+			# These are acceptable things to be in the site section
+		} elsif ($line =~ /^\s*$/){
+			# This ends the site section
+			$in_align_sec = 0;
+			$good_align_sec = 1;
+		} else{
+			$self->warn("Unrecognized format:\n$line");
+			return 0;
+		}
+	}
+	# Signal an error if we didn't find a header section
+	$self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
+
+	return ($good_align_sec ? $aln : 0);
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: Not implemented
+ Returns : 1 for success and 0 for error
+ Args    : Bio::SimpleAlign object
+
+=cut
+
+sub write_aln {
+   my ($self, at aln) = @_;
+   $self->throw_not_implemented();
+}
+
+# ----------------------------------------
+# -   Private methods
+# ----------------------------------------
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  # Call into our base version
+  $self->SUPER::_initialize(@args);
+
+  # Then initialize our data variables
+  $self->{'seen_header'} = 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/metafasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/metafasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/metafasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,222 @@
+# $Id: metafasta.pm,v 1.6.4.2 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::metafasta
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::metafasta - Metafasta MSA Sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::SimpleAlign> objects to and from
+metafasta flat file databases.
+
+The format of a metafasta file is
+
+  >test/1-25
+  ABCDEFHIJKLMNOPQRSTUVWXYZ
+  &charge
+  NBNAANCNJCNNNONNCNNUNNXNZ
+  &chemical
+  LBSAARCLJCLSMOIMCHHULRXRZ
+
+where the sequence block is followed by one or several meta blocks.
+Each meta block starts with the ampersand character '&' in the first
+column and is immediately followed by the name of the meta data which
+continues until the new line. The meta data follows it. All
+characters, except new line, are important in meta data.
+
+=head1 SEE ALSO
+
+L<Bio::SeqIO::metafasta>
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::metafasta;
+use vars qw($WIDTH);
+use strict;
+
+use Bio::SimpleAlign;
+use Bio::Seq::Meta;
+use Bio::Seq::SeqFactory;
+use Bio::Seq::SeqFastaSpeedFactory;
+
+use base qw(Bio::AlignIO);
+
+BEGIN { $WIDTH = 60}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  my ($width) = $self->_rearrange([qw(WIDTH)], @args);
+  $width && $self->width($width);
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
+	    or on error
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my( $self ) = @_;
+    my $seq;
+    my $alphabet;
+    local $/ = "\n>";
+
+    my $aln =  Bio::SimpleAlign->new();
+
+    while(defined (my $entry = $self->_readline)) {
+        chomp($entry);
+        if ($entry =~ m/\A\s*\Z/s) { # very first one
+            return unless $entry = $self->_readline;
+            chomp($entry);
+        }
+        $entry =~ s/^>//;
+
+        my ($top,$sequence) = split(/\n/,$entry,2);
+        defined $sequence && $sequence =~ s/>//g;
+
+        my @metas;
+        ($sequence, @metas) = split /\n&/, $sequence;
+
+        my ($id, $start, $end);
+        if ( $top =~ /(\S+)\/(\d+)-(\d+)/ ) {
+            $id = $1;
+            $start = $2;
+            $end = $3;
+        }
+        elsif ($top =~ /(\S+)/) {
+            $id = $1;
+            $start = 1;
+            $end = length($sequence);
+        }
+
+        defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
+
+        $seq = Bio::Seq::Meta->new('-seq'=>$sequence,
+				   '-id'=>$id,
+				   '-start'=>$start,
+				   '-end'=>$end
+				  );
+
+        foreach my $meta (@metas) {
+            my ($name,$string) = split /\n/, $meta;
+            $string =~ s/\n//g;	# Remove newlines, spaces are important
+            $seq->named_meta($name, $string);
+        }
+
+	$aln->add_seq($seq);
+	
+	# alignment needs seqs all the same length, pad with gaps
+	my $alnlen = $aln->length;
+	foreach my $seq ( $aln->each_seq ) {
+		if ( $seq->length < $alnlen ) {
+			my ($diff) = ($alnlen - $seq->length);
+			$seq->seq( $seq->seq() . "-" x $diff);
+		}
+	}
+    }
+    return $aln;
+}
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in fasta format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my $width = $self->width;
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	foreach my $seq ( $aln->each_seq() ) {
+	    my $name = $aln->displayname($seq->get_nse);
+
+	    my $str  = $seq->seq();
+            if(length($str) > 0) {
+                $str =~ s/(.{1,$width})/$1\n/g;
+            } else {
+                $str = "\n";
+            }
+            $self->_print (">",$name,"\n",$str) or return;
+            if ($seq->isa('Bio::Seq::MetaI')) {
+                foreach my $meta ($seq->meta_names) {
+                    my $str = $seq->named_meta($meta);
+                    $str =~ s/(.{1,$width})/$1\n/g;
+                    $self->_print ("&",$meta,"\n",$str);
+                }
+            }
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+
+=head2 width
+
+ Title   : width
+ Usage   : $obj->width($newval)
+ Function: Get/Set the line width for METAFASTA output
+ Returns : value of width
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub width{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'width'} = $value;
+    }
+    return $self->{'width'} || $WIDTH;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/msf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/msf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/msf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,236 @@
+# $Id: msf.pm,v 1.22.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::msf
+#   based on the Bio::SeqIO::msf module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::msf - msf sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from msf
+flat file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::msf;
+use vars qw(%valid_type);
+use strict;
+
+use Bio::SeqIO::gcg; # for GCG_checksum()
+use Bio::SimpleAlign;
+
+use base qw(Bio::AlignIO);
+
+BEGIN {
+	%valid_type = qw( dna N rna N protein P );
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream. Tries to read *all* MSF
+           It reads all non whitespace characters in the alignment
+           area. For MSFs with weird gaps (eg ~~~) map them by using
+           $aln->map_chars('~','-')
+ Returns : Bio::Align::AlignI object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+	my $self = shift;
+	my $entry;
+	my (%hash,$name,$str, at names,$seqname,$start,$end,$count,$seq);
+
+	my $aln =  Bio::SimpleAlign->new(-source => 'gcg' );
+
+	while( $entry = $self->_readline) {
+		$entry =~ m{//} && last; # move to alignment section
+		$entry =~ /Name:\s+(\S+)/ && do { $name = $1;
+					  $hash{$name} = ""; # blank line
+					  push(@names,$name); # we need it ordered!
+				       };
+		# otherwise - skip
+	}
+
+   # alignment section
+
+   while( $entry = $self->_readline) {
+		next if ( $entry =~ /^\s+(\d+)/ ) ;
+		$entry =~ /^\s*(\S+)\s+(.*)$/ && do {
+			$name = $1;
+			$str = $2;
+			if( ! exists $hash{$name} ) {
+				$self->throw("$name exists as an alignment line but not in the header. Not confident of what is going on!");
+			}
+			$str =~ s/\s//g;
+			$str =~ s/~/-/g;
+			$hash{$name} .= $str;
+		};
+   }
+
+   # return 0 if scalar @names < 1;
+	if (scalar(@names) < 1) {
+		undef $aln;
+		return $aln;
+	}
+
+   # now got this as a name - sequence hash. Let's make some sequences!
+
+   foreach $name ( @names ) {
+		if( $name =~ m{(\S+)/(\d+)-(\d+)} ) {
+			$seqname = $1;
+			$start = $2;
+			$end = $3;
+		} else {
+			$seqname = $name;
+			$start = 1;
+			$str = $hash{$name};
+			$str =~ s/[^A-Za-z]//g;
+			$end = length($str);
+		}
+
+		$seq = new Bio::LocatableSeq(-seq   => $hash{$name},
+											  -id    => $seqname,
+											  -start => $start,
+											  -end   => $end,
+											 );
+		$aln->add_seq($seq);
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+#   if ($end <= 0) { undef $aln;}
+
+   }
+   return $aln;
+}
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in MSF format
+           Sequence type of the alignment is determined by the first sequence.
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Align::AlignI object
+
+
+=cut
+
+sub write_aln {
+	my ($self, at aln) = @_;
+	my $msftag;
+	my $type;
+	my $count = 0;
+	my $maxname;
+	my ($length,$date,$name,$seq,$miss,$pad,%hash, at arr,$tempcount,$index);
+	foreach my $aln (@aln) {
+		if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+			$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+			next;
+		}
+		$date = localtime(time);
+		$msftag = "MSF";
+		$type = $valid_type{$aln->get_seq_by_pos(1)->alphabet};
+		$maxname = $aln->maxdisplayname_length();
+		$length  = $aln->length();
+		$name = $aln->id();
+		if( !defined $name ) {
+			$name = "Align";
+		}
+
+		$self->_print (sprintf("\n%s   MSF: %d  Type: %s  %s  Check: 00 ..\n\n",
+			       $name,  $aln->no_sequences, $type, $date));
+
+		foreach $seq ( $aln->each_seq() ) {
+			$name = $aln->displayname($seq->get_nse());
+			$miss = $maxname - length ($name);
+			$miss += 2;
+			$pad  = " " x $miss;
+
+			$self->_print (sprintf(" Name: %s%sLen:    %d  Check:  %d  Weight:  1.00\n",$name,$pad,length $seq->seq(), Bio::SeqIO::gcg->GCG_checksum($seq)));
+
+			$hash{$name} = $seq->seq();
+			push(@arr,$name);
+		}
+    	# ok - heavy handed, but there you go.
+    	#
+    	$self->_print ("\n//\n\n\n");
+
+    	while( $count < $length ) {
+			# there is another block to go!
+			$self->_print (sprintf("%22s%-27d%27d\n",' ',$count+1,$count+50));
+			foreach $name  ( @arr ) {
+				$self->_print (sprintf("%-20s  ",$name));
+
+				$tempcount = $count;
+				$index = 0;
+				while( ($tempcount + 10 < $length) && ($index < 5)  ) {
+
+					$self->_print (sprintf("%s ",substr($hash{$name},
+																	$tempcount,10)));
+
+					$tempcount += 10;
+					$index++;
+				}	    	#
+				# ok, could be the very last guy ;)
+				#
+				if( $index < 5) {
+					# space to print!
+					#
+					$self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
+					$tempcount += 10;
+				}
+				$self->_print ("\n");
+			}
+			$self->_print ("\n\n");
+			$count = $tempcount;
+    	}
+	}
+	$self->flush if $self->_flush_on_write && defined $self->_fh;
+	return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/nexus.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/nexus.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/nexus.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,462 @@
+# $Id: nexus.pm,v 1.27.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::nexus
+#
+# Copyright Heikki Lehvaslaiho
+#
+
+=head1 NAME
+
+Bio::AlignIO::nexus - NEXUS format sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+    use Bio::AlignIO;
+
+    my $in = new Bio::AlignIO(-format => 'nexus',
+                              -file   => 'aln.nexus');
+    while( my $aln = $in->next_aln ) {
+        # do something with the alignment
+    }
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from NEXUS
+data blocks. See method documentation for supported NEXUS features.
+
+=head1 ACKNOWLEDGEMENTS
+
+Will Fisher has written an excellent standalone NEXUS format parser in
+Perl, readnexus. A number of tricks were adapted from it.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Heikki Lehvaslaiho
+
+Email: heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::nexus;
+use vars qw(%valid_type);
+use strict;
+no strict "refs";
+
+
+use base qw(Bio::AlignIO);
+
+BEGIN {
+    %valid_type = map {$_, 1} qw( dna rna protein standard );
+    # standard throws error: inherited from Bio::PrimarySeq
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $alignio = new Bio::AlignIO(-format => 'nexus',
+													-file   => 'filename');
+ Function: returns a new Bio::AlignIO object to handle clustalw files
+ Returns : Bio::AlignIO::clustalw object
+ Args    : -verbose => verbosity setting (-1,0,1,2)
+           -file    => name of file to read in or with ">" - writeout
+           -fh      => alternative to -file param - provide a filehandle 
+                       to read from/write to 
+           -format  => type of Alignment Format to process or produce
+
+           Customization of nexus flavor output
+
+           -show_symbols => print the symbols="ATGC" in the data definition
+                            (MrBayes does not like this)
+                            boolean [default is 1] 
+           -show_endblock => print an 'endblock;' at the end of the data
+                            (MyBayes does not like this)
+                            boolean [default is 1] 
+
+=cut
+
+sub _initialize {
+    my ($self, @args) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($show_symbols, $endblock) = 
+	$self->_rearrange([qw(SHOW_SYMBOLS SHOW_ENDBLOCK)], @args);
+    my @names = qw(symbols endblock);
+    for my $v ( $show_symbols, $endblock ) {
+	$v = 1 unless defined $v; # default value is 1
+	my $n = shift @names;
+	$self->flag($n, $v);
+    }
+}
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: Returns the next alignment in the stream.
+
+           Supports the following NEXUS format features:
+           - The file has to start with '#NEXUS'
+           - Reads in the name of the alignment from a comment
+             (anything after 'TITLE: ') .
+           - Sequence names can be given in a taxa block, too.
+           - If matchchar notation is used, converts
+             them back to sequence characters.
+           - Does character conversions specified in the
+             NEXUS equate command.
+           - Sequence names of type 'Homo sapiens' and
+             Homo_sapiens are treated identically.
+
+ Returns : L<Bio::Align::AlignI> object
+ Args    :
+
+
+=cut
+
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my ($aln_name, $seqcount, $residuecount, %hash, $alphabet,
+	$match, $gap, $missing, $equate, $interleave,
+	$name,$str, at names,$seqname,$start,$end,$count,$seq);
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'nexus');
+
+    # file starts with '#NEXUS' but we allow white space only lines before it
+    $entry = $self->_readline;
+    $entry = $self->_readline while defined $entry && $entry =~ /^\s+$/;
+
+    return unless $entry;
+    $self->throw("Not a valid interleaved NEXUS file! [#NEXUS] not starting the file\n$entry")
+	unless $entry =~ /^#NEXUS/i;
+
+    # skip anything before either the taxa or data block
+    # but read in the optional title in a comment
+    while (defined($entry = $self->_readline)) {
+	local ($_) = $entry;
+	/\[TITLE. *([^\]]+)]\s+/i and $aln_name = $1;
+	last if /^begin +data/i || /^begin +taxa/i;
+    }
+    $aln_name =~ s/\s/_/g and $aln->id($aln_name) if $aln_name;
+
+    # data and taxa blocks
+    my $incomment;
+    while ($entry = $self->_readline) {
+	local ($_) =  $entry;
+	next if s/\[[^\]]+\]//g; # remove comments
+	if( s/\[[^\]]+$// ) {
+	    $incomment = 1;
+		 # skip line if it is now empty or contains only whitespace
+	    next if /^\s*$/;
+	} elsif($incomment) {
+	    if( s/^[^\]]*\]// ) {
+			 $incomment = 0;
+	    } else {
+			 next;
+	    }
+	} elsif( /taxlabels/i ) {
+	    # doesn't deal with taxlabels adequately and can mess things up!
+	    # @names = $self->_read_taxlabels;
+	} else {
+
+	    /ntax\s*=\s*(\d+)/i        and $seqcount = $1;
+	    /nchar\s*=\s*(\d+)/i       and $residuecount = $1;
+	    /matchchar\s*=\s*(.)/i     and $match = $1;
+	    /gap\s*=\s*(.)/i           and $gap = $1;
+	    /missing\s*=\s*(.)/i       and $missing = $1;
+	    /equate\s*=\s*\"([^\"]+)/i and $equate = $1;  # "e.g. equate="T=C G=A";
+	    /datatype\s*=\s*(\w+)/i    and $alphabet = lc $1;
+	    /interleave/i              and $interleave = 1 ;
+	    last if /matrix/io;
+	}
+    }
+    $self->throw("Not a valid NEXUS sequence file. Datatype not specified.")
+	unless $alphabet;
+    $self->throw("Not a valid NEXUS sequence file. Datatype should not be [$alphabet]")
+	unless $valid_type{$alphabet};
+    $self->throw("\"$gap\" is not a valid gap character. For compatability, gap char can not be one of: ()[]{}/\,;:=*'`\"<>^")
+    	if $gap && $gap =~ /[\(\)\[\]\{\}\/\\\,\;\:\=\*\'\`\<\>\^]/;
+    $self->throw("\"$missing\" is not a valid missing character. For compatability, missing char can not be one of: ()[]{}/\,;:=*'`\"<>^")
+    	if $missing && $missing =~ /[\(\)\[\]\{\}\/\\\,\;\:\=\*\'\`\<\>\^]/;
+
+    $aln->gap_char($gap);
+    $aln->missing_char($missing);
+
+    #
+    # if data is not right after the matrix line
+    #  read the empty lines out
+    #
+    while ($entry = $self->_readline) {
+	unless ($entry =~ /^\s+$/) {
+	    $self->_pushback($entry);
+	    last;
+	}
+    }
+
+    #
+    # matrix command
+    #
+    # first alignment section
+    if (@names == 0) {		# taxa block did not exist
+	while ($entry = $self->_readline) {
+		local ($_) =  $entry;
+		if( s/\[[^\]]+\]//g ) { #]  remove comments
+			next if /^\s*$/; 
+			# skip line if it is now empty or contains only whitespace
+		}
+		if ($interleave && defined$count && ($count <= $seqcount)) {
+			/^\s+$/ and last;
+		} else {
+			/^\s+$/ and next;
+		}
+		/^\s*;/ and last;	# stop if colon at end of matrix is on it's own line
+		#/^\s*;\s*$/ and last;
+		if ( /^\s*([\"\'](.+?)[\"\']|(\S+))\s+(.*)\s*$/ ) {	
+			# get single and double quoted names, or all the first 
+         # nonwhite word as the name, and remained is seq
+			#if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)$/) { #'
+			$name = ($2 || $3);
+			if  ($4) {
+				# seq is on same line as name
+				# this is the usual NEXUS format
+				$str = $4;
+			} else {
+				# otherwise get seq from following lines. No comments allowed
+				# a less common matrix format, usually used for very long seqs
+				$str='';
+				while (local ($_) = $self->_readline) {
+					my $str_tmp = $_;
+					$str_tmp =~ s/[\s;]//g;
+					$str .= $str_tmp;
+					last if length$str == $residuecount;
+				}
+			}
+			$name =~ s/ /_/g;
+			push @names, $name;
+
+			$str =~ s/[\s;]//g;
+			$count =  @names;
+			$hash{$count} = $str;
+		}
+		$self->throw("Not a valid interleaved NEXUS file! seqcount [$count] > predeclared [$seqcount] in the first section") if $count > $seqcount;
+		/;/ and last;	# stop if colon at end of matrix is on the same line as the last seq
+	}
+}
+
+    # interleaved sections
+    $count = 0;
+    if ( $interleave ) {	# only read next section if file is interleaved
+	while( $entry = $self->_readline) {
+	    local ($_) =  $entry;
+	    if( s/\[[^\]]+\]//g ) { #]  remove comments
+		next if /^\s*$/; # skip line if it is now empty or contains only whitespace
+	    }
+	    /^\s*;/ and last;		# stop if colon at end of matrix is on it's own line
+	    $count = 0, next if $entry =~ /^\s*$/;
+	    if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)$/) { #'
+		$str = $4;
+		$str =~ s/[\s;]//g;
+		$count++;
+		$hash{$count} .= $str;
+	    };
+	    $self->throw("Not a valid interleaved NEXUS file!
+    		seqcount [$count] > predeclared [$seqcount] ") if $count > $seqcount;
+	    /;/ and last;	# stop if colon at end of matrix is on the same line as the last seq
+	}
+    }
+
+    return 0 if @names < 1;
+
+    # sequence creation
+    $count = 0;
+    foreach $name ( @names ) {
+	$count++;
+	if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+	    $seqname = $1;
+	    $start = $2;
+	    $end = $3;
+	} else {
+	    $seqname=$name;
+	    $start = 1;
+	    $str = $hash{$count};
+	    $str =~ s/[^A-Za-z]//g;
+	    $end = length($str);
+	}
+
+	# consistency test
+	$self->throw("Length of sequence [$seqname] is not [$residuecount]! ")
+	    unless CORE::length($hash{$count}) == $residuecount;
+
+	$seq = new Bio::LocatableSeq('-seq'=>$hash{$count},
+				     '-id'=>$seqname,
+				     '-start'=>$start,
+				     '-end'=>$end,
+				     'alphabet'=>$alphabet
+				     );
+	$aln->add_seq($seq);
+    }
+
+    # if matchchar is used
+    $aln->unmatch($match) if $match;
+
+    # if equate ( e.g. equate="T=C G=A") is used
+    if ($equate) {
+	$aln->map_chars($1, $2) while $equate =~ /(\S)=(\S)/g;
+    }
+
+    while  (defined $entry &&
+	    $entry !~ /endblock/i) {
+        $entry = $self->_readline;
+    }
+
+    return $aln;
+}
+
+sub _read_taxlabels {
+    my ($self) = @_;
+    my ($name, @names);
+    while (my $entry = $self->_readline) {
+	last if $entry =~ m/^\s*(END)?;/i;
+	if( $entry =~ m/\s*(\S+)\s+/ ) {
+	    ($name) = ($1);
+	    $name =~ s/\[[^\[]+\]//g;
+	    $name =~ s/\W/_/g;
+	    push @names, $name;
+	}
+    }
+    return @names;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: Writes the $aln object into the stream in interleaved NEXUS
+           format. Everything is written into a data block.
+           SimpleAlign methods match_char, missing_char and gap_char must be set
+           if you want to see them in the output.
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my $count = 0;
+    my $wrapped = 0;
+    my $maxname;
+    my ($length,$date,$name,$seq,$miss,$pad,%hash, at arr,$tempcount,$index );
+    my ($match, $missing, $gap,$symbols) = ('', '', '','');
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	$self->throw("All sequences in the alignment must be the same length")
+	    unless $aln->is_flush($self->verbose);
+
+	$length  = $aln->length();
+
+	$self->_print (sprintf("#NEXUS\n[TITLE: %s]\n\nbegin data;\ndimensions ntax=%s nchar=%s;\n",
+			       $aln->id, $aln->no_sequences, $length));
+	$match = "match=". $aln->match_char if $aln->match_char;
+	$missing = "missing=". $aln->missing_char if $aln->missing_char;
+	$gap = "gap=". $aln->gap_char if $aln->gap_char;
+
+	$symbols = 'symbols="'.join('',$aln->symbol_chars). '"' 
+	    if( $self->flag('symbols') && $aln->symbol_chars);
+	$self->_print 
+	    (sprintf("format interleave datatype=%s %s %s %s %s;\n\nmatrix\n",
+		     $aln->get_seq_by_pos(1)->alphabet, $match, 
+		     $missing, $gap, $symbols));
+
+                     # account for single quotes round names
+	my $indent = $aln->maxdisplayname_length+2;
+
+	$aln->set_displayname_flat();
+	foreach $seq ( $aln->each_seq() ) {
+	    my $nmid = $aln->displayname($seq->get_nse());
+	    if( $nmid =~ /[^\w\d]/ ) {
+              # put name in single quotes incase it contains any of
+              # the following chars: ()[]{}/\,;:=*'"`+-<> that are not
+              # allowed in PAUP* and possible other software
+
+		$name = sprintf("%-${indent}s", "\'" . $nmid . "\'");
+	    } else { 
+		$name = sprintf("%-${indent}s", $nmid);
+	    }
+	    $hash{$name} = $seq->seq;
+	    push(@arr,$name);
+	}
+
+	while( $count < $length ) {
+	    # there is another block to go!
+	    foreach $name ( @arr ) {
+		my $dispname = $name;
+#		 $dispname = '' if $wrapped;
+		$self->_print (sprintf("%${indent}s  ",$dispname));
+		$tempcount = $count;
+		$index = 0;
+		while( ($tempcount + 10 < $length) && ($index < 5)  ) {
+		    $self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10)));
+		    $tempcount += 10;
+		    $index++;
+		}
+		# last
+		if( $index < 5) {
+		    # space to print!
+		    $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
+		    $tempcount += 10;
+		}
+		$self->_print ("\n");
+	    }
+	    $self->_print ("\n\n");
+	    $count = $tempcount;
+	    $wrapped = 1;
+	}
+	if( $self->flag('endblock') ) {
+	    $self->_print (";\n\nendblock;\n");
+	} else { 
+	    $self->_print (";\n\nend;\n");
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 flag
+
+ Title   : flag
+ Usage   : $obj->flag($name,$value)
+ Function: Get/Set a flag value
+ Returns : value of flag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub flag{
+    my ($self,$name,$val) = @_;
+    return $self->{'flag'}->{$name} = $val if defined $val;
+    return $self->{'flag'}->{$name};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/pfam.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/pfam.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/pfam.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+# $Id: pfam.pm,v 1.12.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::pfam
+
+#   based on the Bio::SeqIO:: modules
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::pfam - pfam sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::SimpleAlign objects to and from pfam flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::pfam;
+use strict;
+
+use Bio::SimpleAlign;
+use base qw(Bio::AlignIO);
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my $name;
+    my $start;
+    my $end;
+    my $seq;
+    my $add;
+    my $acc;
+    my %names;
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'pfam');
+
+    while( $entry = $self->_readline) {
+	chomp $entry;
+	$entry =~ m{^//} && last;
+	if($entry !~ m{^(\S+)/(\d+)-(\d+)\s+(\S+)\s*} ) {
+	    $self->throw("Found a bad line [$_] in the pfam format alignment");
+	    next;
+	}
+
+	$name = $1;
+	$start = $2;
+	$end = $3;
+	$seq = $4;
+
+
+	$add = new Bio::LocatableSeq('-seq'=>$seq,
+			    '-id'=>$name,
+			    '-start'=>$start,
+			    '-end'=>$end,
+			    );
+
+	$aln->add_seq($add);
+
+    }
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+   if ($end <= 0) { undef $aln;}
+
+      return $aln;
+}
+
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+   my ($self, at aln) = @_;
+   if( @aln > 1 ) { $self->warn("Only the 1st pfam alignment will be output since the format does not support multiple alignments in the same file"); }
+   my $aln = shift @aln;
+   if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+       $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+       next;
+   }
+   my ($namestr,$seq,$add);
+   my ($maxn);
+   $maxn = $aln->maxdisplayname_length();
+
+   foreach $seq ( $aln->each_seq() ) {
+       $namestr = $aln->displayname($seq->get_nse());
+       $add = $maxn - length($namestr) + 2;
+       $namestr .= " " x $add;
+	  $self->_print (sprintf("%s  %s\n",$namestr,$seq->seq())) or return;
+   }
+   $self->flush() if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/phylip.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/phylip.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/phylip.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,523 @@
+# $Id: phylip.pm,v 1.36.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::phylip
+#
+# Copyright Heikki Lehvaslaiho
+#
+
+=head1 NAME
+
+Bio::AlignIO::phylip - PHYLIP format sequence input/output stream
+
+=head1 SYNOPSIS
+
+# Do not use this module directly.  Use it via the Bio::AlignIO class.
+
+    use Bio::AlignIO;
+    use Bio::SimpleAlign;
+    #you can set the name length to something other than the default 10
+    #if you use a version of phylip (hacked) that accepts ids > 10
+    my $phylipstream = new Bio::AlignIO(-format  => 'phylip',
+                                        -fh      => \*STDOUT,
+                                        -idlength=>30);
+    # convert data from one format to another
+    my $gcgstream     =  new Bio::AlignIO(-format => 'msf',
+                                          -file   => 't/data/cysprot1a.msf');
+
+    while( my $aln = $gcgstream->next_aln ) {
+        $phylipstream->write_aln($aln);
+    }
+
+    # do it again with phylip sequential format format
+    $phylipstream->interleaved(0);
+    # can also initialize the object like this
+    $phylipstream = new Bio::AlignIO(-interleaved => 0,
+                                     -format => 'phylip',
+                                     -fh   => \*STDOUT,
+                                     -idlength=>10);
+    $gcgstream     =  new Bio::AlignIO(-format => 'msf',
+                                       -file   => 't/data/cysprot1a.msf');
+
+    while( my $aln = $gcgstream->next_aln ) {
+        $phylipstream->write_aln($aln);
+    }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::SimpleAlign objects to and from PHYLIP
+interleaved format. It will not work with PHYLIP sequencial format.
+
+This module will output PHYLIP sequential format.  By specifying the
+flag -interleaved =E<gt> 0 in the initialization the module can output
+data in interleaved format.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Heikki Lehvaslaiho and Jason Stajich
+
+Email: heikki at ebi.ac.uk
+Email: jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::phylip;
+use vars qw($DEFAULTIDLENGTH $DEFAULTLINELEN $DEFAULTTAGLEN);
+use strict;
+
+use Bio::SimpleAlign;
+use POSIX; # for the rounding call
+
+use base qw(Bio::AlignIO);
+
+BEGIN {
+    $DEFAULTIDLENGTH = 10;
+    $DEFAULTLINELEN = 60;
+    $DEFAULTTAGLEN = 10;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $alignio = new Bio::AlignIO(-format => 'phylip'
+					  -file   => '>file',
+					  -idlength => 10,
+					  -idlinebreak => 1);
+ Function: Initialize a new L<Bio::AlignIO::phylip> reader or writer
+ Returns : L<Bio::AlignIO> object
+ Args    : [specific for writing of phylip format files]
+           -idlength => integer - length of the id (will pad w/
+						    spaces if needed)
+           -interleaved => boolean - whether or not write as interleaved
+                                     or sequential format
+           -line_length  => integer of how long a sequence lines should be
+           -idlinebreak => insert a line break after the sequence id
+                           so that sequence starts on the next line
+           -flag_SI => whether or not write a "S" or "I" just after
+                       the num.seq. and line len., in the first line
+           -tag_length => integer of how long the tags have to be in
+                         each line between the space separator. set it
+                         to 0 to have 1 tag only.
+           -wrap_sequential => boolean for whether or not sequential
+                                   format should be broken up or a single line
+                                   default is false (single line)
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+
+  my ($interleave,$linelen,$idlinebreak,
+      $idlength, $flag_SI, $tag_length,$ws) =
+          $self->_rearrange([qw(INTERLEAVED
+                                LINE_LENGTH
+                                IDLINEBREAK
+                                IDLENGTH
+                                FLAG_SI
+                                TAG_LENGTH
+				WRAP_SEQUENTIAL)], at args);
+  $self->interleaved(1) if( $interleave || ! defined $interleave);
+  $self->idlength($idlength || $DEFAULTIDLENGTH);
+  $self->id_linebreak(1) if( $idlinebreak );
+  $self->line_length($linelen) if defined $linelen && $linelen > 0;
+  $self->flag_SI(1) if ( $flag_SI );
+  $self->tag_length($tag_length) if ( $tag_length || $DEFAULTTAGLEN );
+  $self->wrap_sequential($ws ? 1 : 0);
+  1;
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+           Throws an exception if trying to read in PHYLIP
+           sequential format.
+ Returns : L<Bio::SimpleAlign> object
+ Args    :
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my ($seqcount, $residuecount, %hash, $name,$str,
+	@names,$seqname,$start,$end,$count,$seq);
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'phylip');
+    $entry = $self->_readline and
+        ($seqcount, $residuecount) = $entry =~ /\s*(\d+)\s+(\d+)/;
+    return 0 unless $seqcount and $residuecount;
+
+    # first alignment section
+    my $idlen = $self->idlength;
+    $count = 0;
+    my $iter = 1;
+    my $interleaved = $self->interleaved;
+    while( $entry = $self->_readline) {
+	last if( $entry =~ /^\s?$/ && $interleaved );
+
+	if( $entry =~ /^\s+(\d+)\s+(\d+)\s*$/) {
+	    $self->_pushback($entry);
+	    last;
+	}
+	if( $entry =~ /^\s+(.+)$/ ) {
+	    $interleaved = 0;
+	    $str = $1;
+	    $str =~ s/\s//g;
+	    $count = scalar @names;
+	    $hash{$count} .= $str;
+
+       	} elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ||
+		 $entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdnes s when id is too long
+		 ) {
+	    $name = $1;
+	    $str = $2;
+	    $name =~ s/[\s\/]/_/g;
+	    $name =~ s/_+$//; # remove any trailing _'s
+
+	    push @names, $name;
+	    $str =~ s/\s//g;
+	    $count = scalar @names;
+	    $hash{$count} = $str;
+	} elsif( $interleaved ) {
+	    if( $entry =~ /^(\S+)\s+(.+)/ ||
+		$entry =~ /^(.{$idlen})(.*)\s$/ ) {
+		$name = $1;
+		$str = $2;
+		$name =~ s/[\s\/]/_/g;
+		$name =~ s/_+$//; # remove any trailing _'s
+		push @names, $name;
+		$str =~ s/\s//g;
+		$count = scalar @names;
+		$hash{$count} = $str;
+	    } else {
+		$self->debug("unmatched line: $entry");
+	    }
+	}
+	$self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
+    }
+
+    if( $interleaved ) {
+	# interleaved sections
+	$count = 0;
+	while( $entry = $self->_readline) {
+            # finish current entry
+	    if($entry =~/\s*\d+\s+\d+/){
+		$self->_pushback($entry);
+		last;
+	    }
+	    $count = 0, next if $entry =~ /^\s$/;
+	    $entry =~ /\s*(.*)$/ && do {
+		$str = $1;
+		$str =~ s/\s//g;
+		$count++;
+		$hash{$count} .= $str;
+	    };
+	    $self->throw("Not a valid interleaved PHYLIP file! [$count,$seqcount] ($entry)") if $count > $seqcount;
+	}
+    }
+    return 0 if scalar @names < 1;
+
+    # sequence creation
+    $count = 0;
+    foreach $name ( @names ) {
+	$count++;
+	if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+	    $seqname = $1;
+	    $start = $2;
+	    $end = $3;
+	} else {
+	    $seqname=$name;
+	    $start = 1;
+	    $str = $hash{$count};
+	    $str =~ s/[^A-Za-z]//g;
+	    $end = length($str);
+	}
+	# consistency test
+	$self->throw("Length of sequence [$seqname] is not [$residuecount] it is ".CORE::length($hash{$count})."! ")
+	    unless CORE::length($hash{$count}) == $residuecount;
+
+       $seq = new Bio::LocatableSeq('-seq'=>$hash{$count},
+				    '-id'=>$seqname,
+				    '-start'=>$start,
+				    '-end'=>$end,
+				   );
+	$aln->add_seq($seq);
+
+   }
+   return $aln;
+}
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in MSF format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my $count = 0;
+    my $wrapped = 0;
+    my $maxname;
+    my $width = $self->line_length();
+    my ($length,$date,$name,$seq,$miss,$pad,
+	%hash, at arr,$tempcount,$index,$idlength,$flag_SI,$line_length, $tag_length);
+
+    foreach my $aln (@aln) {
+	if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) {
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+	$self->throw("All sequences in the alignment must be the same length")
+	    unless $aln->is_flush(1) ;
+
+        $flag_SI = $self->flag_SI();
+	$aln->set_displayname_flat(); # plain
+	$length  = $aln->length();
+        if ($flag_SI) {
+            if ($self->interleaved() ) {
+                $self->_print (sprintf(" %s %s I\n", $aln->no_sequences, $aln->length));
+            } else {
+                $self->_print (sprintf(" %s %s S\n", $aln->no_sequences, $aln->length));
+            }
+        } else {
+            $self->_print (sprintf(" %s %s\n", $aln->no_sequences, $aln->length));
+        }
+
+	$idlength = $self->idlength();
+	$line_length = $self->line_length();
+	$tag_length = $self->tag_length();
+	foreach $seq ( $aln->each_seq() ) {
+	    $name = $aln->displayname($seq->get_nse);
+	    $name = substr($name, 0, $idlength) if length($name) > $idlength;
+	    $name = sprintf("%-".$idlength."s",$name);
+	    if( $self->interleaved() ) {
+		$name .= '   ' ;
+	    } elsif( $self->id_linebreak) {
+		$name .= "\n";
+	    }
+
+	    #phylip needs dashes not dots
+	    my $seq = $seq->seq();
+	    $seq =~ s/\./-/g;
+	    $hash{$name} = $seq;
+	    push(@arr,$name);
+	}
+
+	if( $self->interleaved() ) {
+            my $numtags;
+            if ($tag_length <= $line_length) {
+                $numtags = floor($line_length/$tag_length);
+                $line_length = $tag_length*$numtags;
+            } else {
+                $numtags = 1;
+            }
+	    while( $count < $length ) {
+
+		# there is another block to go!
+		foreach $name ( @arr ) {
+		    my $dispname = $name;
+		    $dispname = '' if $wrapped;
+		    $self->_print (sprintf("%".($idlength+3)."s",$dispname));
+		    $tempcount = $count;
+                    $index = 0;
+                    $self->debug("residue count: $count\n") if ($count%100000 == 0);
+		    while( ($tempcount + $tag_length < $length) &&
+			   ($index < $numtags)  ) {
+			$self->_print (sprintf("%s ",substr($hash{$name},
+							    $tempcount,
+							    $tag_length)));
+			$tempcount += $tag_length;
+			$index++;
+		    }
+		    # last
+		    if( $index < $numtags) {
+			# space to print!
+			$self->_print (sprintf("%s ",substr($hash{$name},
+							    $tempcount)));
+			$tempcount += $tag_length;
+		    }
+		    $self->_print ("\n");
+		}
+		$self->_print ("\n");
+		$count = $tempcount;
+		$wrapped = 1;
+	    }
+	} else {
+	    foreach $name ( @arr ) {
+		my $dispname = $name;
+		my $line = sprintf("%s%s\n",$dispname,$hash{$name});
+		if( $self->wrap_sequential ) {
+		    $line =~ s/(.{1,$width})/$1\n/g;
+		}
+		$self->_print ($line);
+	    }
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 interleaved
+
+ Title   : interleaved
+ Usage   : my $interleaved = $obj->interleaved
+ Function: Get/Set Interleaved status
+ Returns : boolean
+ Args    : boolean
+
+
+=cut
+
+sub interleaved{
+   my ($self,$value) = @_;
+   my $previous = $self->{'_interleaved'};
+   if( defined $value ) {
+       $self->{'_interleaved'} = $value;
+   }
+   return $previous;
+}
+
+=head2 flag_SI
+
+ Title   : flag_SI
+ Usage   : my $flag = $obj->flag_SI
+ Function: Get/Set if the Sequential/Interleaved flag has to be shown
+           after the number of sequences and sequence length
+ Example :
+ Returns : boolean
+ Args    : boolean
+
+
+=cut
+
+sub flag_SI{
+   my ($self,$value) = @_;
+   my $previous = $self->{'_flag_SI'};
+   if( defined $value ) {
+       $self->{'_flag_SI'} = $value;
+   }
+   return $previous;
+}
+
+=head2 idlength
+
+ Title   : idlength
+ Usage   : my $idlength = $obj->idlength
+ Function: Get/Set value of id length
+ Returns : string
+ Args    : string
+
+
+=cut
+
+sub idlength {
+	my($self,$value) = @_;
+	if (defined $value){
+	   $self->{'_idlength'} = $value;
+	}
+	return $self->{'_idlength'};
+}
+
+=head2 line_length
+
+ Title   : line_length
+ Usage   : $obj->line_length($newval)
+ Function:
+ Returns : value of line_length
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub line_length{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_line_length'} = $value;
+    }
+    return $self->{'_line_length'} || $DEFAULTLINELEN;
+
+}
+
+=head2 tag_length
+
+ Title   : tag_length
+ Usage   : $obj->tag_length($newval)
+ Function:
+ Example : my $tag_length = $obj->tag_length
+ Returns : value of the length for each space-separated tag in a line
+ Args    : newvalue (optional) - set to zero to have one tag per line
+
+
+=cut
+
+sub tag_length{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_tag_length'} = $value;
+    }
+    return $self->{'_tag_length'} || $DEFAULTTAGLEN;
+}
+
+
+=head2 id_linebreak
+
+ Title   : id_linebreak
+ Usage   : $obj->id_linebreak($newval)
+ Function:
+ Returns : value of id_linebreak
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub id_linebreak{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_id_linebreak'} = $value;
+    }
+    return $self->{'_id_linebreak'} || 0;
+}
+
+
+=head2 wrap_sequential
+
+ Title   : wrap_sequential
+ Usage   : $obj->wrap_sequential($newval)
+ Function:
+ Returns : value of wrap_sequential
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub wrap_sequential{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_wrap_sequential'} = $value;
+    }
+    return $self->{'_wrap_sequential'} || 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/po.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/po.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/po.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,340 @@
+# $Id: po.pm
+#
+# BioPerl module for Bio::AlignIO::po
+
+#   based on the Bio::AlignIO::fasta module
+#       by Peter Schattner (and others?)
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::po - po MSA Sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::SimpleAlign> objects to and from
+'po' format flat file databases. 'po' format is the native format of
+the POA alignment program (Lee C, Grasso C, Sharlow MF, 'Multiple
+sequence alignment using partial order graphs', Bioinformatics (2002),
+18(3):452-64).
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Matthew Betts
+
+Email: matthew.betts at ii.uib.no
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::po;
+use strict;
+
+use Bio::SimpleAlign;
+
+use base qw(Bio::AlignIO);
+
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object - returns undef on end of file
+	    or on error
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+
+    my $aln;
+    my $entry;
+    my $name;
+    my $seqs;
+    my $seq;
+    my $nodes;
+    my $list;
+    my $node;
+    my @chars;
+    my $s;
+    my $a;
+
+    $aln =  Bio::SimpleAlign->new();
+
+    # get to the first 'VERSION' line
+    while(defined($entry = $self->_readline)) {
+	if($entry =~ /^VERSION=(\S+)/) {
+	    $aln->source($1);
+
+	    if(defined($entry = $self->_readline) and $entry =~ /^NAME=(\S+)/) {
+		$aln->id($1);
+	    }
+
+	    last;
+	}
+    }
+
+    # read in the sequence names and node data, up to the end of
+    # the file or the next 'VERSION' line, whichever comes first
+    $seqs = [];
+    $nodes = [];
+    while(defined($entry = $self->_readline)) {
+	if($entry =~ /^VERSION/) {
+	    # start of a new alignment, so...
+	    $self->_pushback($entry);
+	    last;
+	}
+	elsif($entry =~ /^SOURCENAME=(\S+)/) {
+	    $name = $1;
+
+	    if($name =~ /(\S+)\/(\d+)-(\d+)/) {
+		$seq = new Bio::LocatableSeq(
+					     '-display_id' => $1,
+					     '-start'      => $2,
+					     '-end'        => $3,
+					    );
+
+	    } else {
+		$seq = new Bio::LocatableSeq('-display_id' => $name);
+	    }
+
+	    # store sequences in a list initially, because can't guarantee
+	    # that will get them back from SimpleAlign object in the order
+	    # they were read, and also can't add them to the SimpleAlign
+	    # object here because their sequences are currently unknown
+	    push @{$seqs}, {
+			    'seq' => $seq,
+			    'str' => '',
+			   };
+	}
+	elsif($entry =~ /^SOURCEINFO=(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)/) {
+	    $seq->desc($5);
+	}
+	elsif($entry =~ /^(\S):(\S+)/) {
+	    $node = {
+		     'aa'     => $1,
+		     'L'      => [],
+		     'S'      => [],
+		     'A'      => [],
+		     'status' => 'unvisited',
+		    };
+
+	    $list = $2;
+	    if($list =~ /^([L\d]*)([S\d]*)([A\d]*)/) {
+		push(@{$node->{'L'}}, split(/L/, $1));
+		push(@{$node->{'S'}}, split(/S/, $2));
+		push(@{$node->{'A'}}, split(/A/, $3));
+
+		(@{$node->{'L'}} > 0) and shift @{$node->{'L'}};
+		(@{$node->{'S'}} > 0) and shift @{$node->{'S'}};
+		(@{$node->{'A'}} > 0) and shift @{$node->{'A'}};
+	    }
+
+	    push @{$nodes}, $node;
+	}
+    }
+
+    # process the nodes
+    foreach $node (@{$nodes}) {
+	($node->{'status'} ne 'unvisited') and next;
+
+	@chars = ($aln->gap_char) x @{$seqs}; # char for each seq defaults to a gap
+
+	# set the character for each sequence represented by this node
+	foreach $s (@{$node->{'S'}}) {
+	    $chars[$s] = $node->{'aa'};
+	}
+	$node->{'status'} = 'visited';
+
+	# do the same for each node in the same align ring
+	while(defined($a = $node->{'A'}->[0])) {
+	    $node = $nodes->[$a];
+	    ($node->{'status'} ne 'unvisited') and last;
+
+	    foreach $s (@{$node->{'S'}}) {
+		$chars[$s] = $node->{'aa'};
+	    }
+
+	    $node->{'status'} = 'visited';
+	}
+
+	# update the sequences
+	foreach $seq (@{$seqs}) {
+	    $seq->{'str'} .= shift @chars;
+	}
+    }
+
+    # set the sequences of the bioperl objects
+    # and add them to the alignment
+    foreach $seq (@{$seqs}) {
+	$seq->{'seq'}->seq($seq->{'str'});
+	$aln->add_seq($seq->{'seq'});
+    }
+
+    # has an alignment been read?...
+    ($aln->no_sequences == 0) and ($aln = undef);
+
+    return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in po format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln {
+    my $self = shift;
+    my @alns = @_;
+
+    my $aln;
+    my $seqs;
+    my $nodes;
+    my $seq;
+    my $node;
+    my $col;
+    my $ring;
+    my $i;
+    my $char;
+
+    foreach $aln (@alns) {
+	if(!$aln or !$aln->isa('Bio::Align::AlignI')) {
+	    $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
+	    next;
+	}
+
+	# store the seqs on a list, because po format
+	# refers to them by position on this list
+	$seqs  = [];
+	foreach $seq ($aln->each_seq()) {
+	    push @{$seqs}, {
+			    'seq'      => $seq,
+			    'n_nodes'  => 0,
+			    'first'    => undef,
+			    'previous' => undef,
+			   };
+	}
+
+	# go through each column in the alignment and construct
+	# the nodes for the equivalent poa alignment ring
+	$nodes = [];
+	for($col = 0; $col < $aln->length; $col++) {
+	    $ring = {
+		     'nodes' => {},
+		     'first' => scalar @{$nodes},
+		     'last'  => scalar @{$nodes},
+		    };
+
+	    for($i = 0; $i < @{$seqs}; $i++) {
+		$seq = $seqs->[$i];
+
+		$char = $seq->{'seq'}->subseq($col + 1, $col + 1);
+
+		($char eq $aln->gap_char) and next;
+
+		if(!defined($node = $ring->{'nodes'}->{$char})) {
+		    $node = {
+			     'n'  => scalar @{$nodes},
+			     'aa' => $char,
+			     'L'  => {},
+			     'S'  => [],
+			     'A'  => [],
+			    };
+
+		    # update the ring
+		    $ring->{'nodes'}->{$char} = $node;
+		    $ring->{'last'} = $node->{'n'};
+
+		    # add the node to the node list
+		    push @{$nodes}, $node;
+		}
+
+		# add the sequence to the node
+		push @{$node->{'S'}}, $i;
+
+		# add the node to the sequence
+		defined($seq->{'first'}) or ($seq->{'first'} = $node);
+		$seq->{'n_nodes'}++;
+
+		# add an edge from the previous node in the sequence to this one.
+		# Then set the previous node to the current one, ready for the next
+		# residue in this sequence
+		defined($seq->{'previous'}) and ($node->{'L'}->{$seq->{'previous'}->{'n'}} = $seq->{'previous'});
+		$seq->{'previous'} = $node;
+	    }
+
+	    # set the 'next node in ring' field for each node in the ring
+	    if($ring->{'first'} < $ring->{'last'}) {
+		for($i = $ring->{'first'}; $i < $ring->{'last'}; $i++) {
+		    push @{$nodes->[$i]->{'A'}}, $i + 1;
+		}
+		push @{$nodes->[$ring->{'last'}]->{'A'}}, $ring->{'first'};
+	    }
+	}
+
+	# print header information
+	$self->_print(
+		      'VERSION=', ($aln->source and ($aln->source !~ /\A\s*\Z/)) ? $aln->source : 'bioperl', "\n",
+		      'NAME=', $aln->id, "\n",
+		      'TITLE=', ($seqs->[0]->{'seq'}->description or $aln->id), "\n",
+		      'LENGTH=', scalar @{$nodes}, "\n",
+		      'SOURCECOUNT=', scalar @{$seqs}, "\n",
+		     );
+
+	# print sequence information
+	foreach $seq (@{$seqs}) {
+	    $self->_print(
+			  'SOURCENAME=', $seq->{'seq'}->display_id, "\n",
+			  'SOURCEINFO=',
+			  $seq->{'n_nodes'},      ' ', # number of nodes in the sequence
+			  $seq->{'first'}->{'n'}, ' ', # index of first node containing the sequence
+			  0,                      ' ', # FIXME - sequence weight?
+			  -1,                     ' ', # FIXME - index of bundle containing sequence?
+			  ($seq->{'seq'}->description or 'untitled'), "\n",
+			 );
+	}
+
+	# print node information
+	foreach $node (@{$nodes}) {
+	    $self->_print($node->{'aa'}, ':');
+	    (keys %{$node->{'L'}} > 0) and $self->_print('L', join('L', sort {$a <=> $b} keys %{$node->{'L'}}));
+	    (@{$node->{'S'}} > 0) and $self->_print('S', join('S', @{$node->{'S'}}));
+	    (@{$node->{'A'}} > 0) and $self->_print('A', join('A', @{$node->{'A'}}));
+	    $self->_print("\n");
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/prodom.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/prodom.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/prodom.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,133 @@
+# $Id: prodom.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::prodom
+
+#   based on the Bio::SeqIO::prodom module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::prodom - prodom sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from prodom flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::prodom;
+use strict;
+
+
+use base qw(Bio::AlignIO);
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my ($acc, $fake_id, $start, $end, $seq, $add, %names);
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'prodom');
+
+    while( $entry = $self->_readline) {
+
+       if ($entry =~ /^AC\s+(\S+)\s*$/) {         #ps 9/12/00
+	   $aln->id( $1 );
+       }
+       elsif ($entry =~ /^AL\s+(\S+)\|(\S+)\s+(\d+)\s+(\d+)\s+\S+\s+(\S+)\s*$/){    #ps 9/12/00
+	   $acc=$1;
+	   $fake_id=$2;  # Accessions have _species appended
+	   $start=$3;
+	   $end=$4;
+	   $seq=$5;
+
+	   $names{'fake_id'} = $fake_id;
+
+	   $add = new Bio::LocatableSeq('-seq'=>$seq,
+			       '-id'=>$acc,
+			       '-start'=>$start,
+			       '-end'=>$end,
+			       );
+
+	   $aln->add_seq($add);
+       }
+       elsif ($entry =~ /^CO/) {
+	   # the consensus line marks the end of the alignment part of the entry
+	   last;
+       }
+   }
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+   if ($end <= 0) { undef $aln;}
+
+
+   return $aln;
+}
+
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in prodom format  ###Not yet implemented!###
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/psi.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/psi.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/psi.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,161 @@
+# $Id: psi.pm,v 1.11.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::psi
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::psi - Read/Write PSI-BLAST profile alignment files
+
+=head1 SYNOPSIS
+
+This module will parse PSI-BLAST output of the format seqid XXXX  
+
+=head1 DESCRIPTION
+
+This is a parser for psi-blast blocks.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::AlignIO::psi;
+use vars qw($BlockLen $IdLength);
+use strict;
+
+$BlockLen = 100; 
+$IdLength = 13;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::SimpleAlign;
+use Bio::LocatableSeq;
+
+use base qw(Bio::AlignIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::AlignIO::psi();
+ Function: Builds a new Bio::AlignIO::psi object 
+ Returns : Bio::AlignIO::psi
+ Args    :
+
+=cut
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream
+ Returns : Bio::Align::AlignI object
+ Args    : NONE
+
+See L<Bio::Align::AlignI>
+
+=cut
+
+sub next_aln {
+    my ($self) = @_;
+    my $aln;
+    my %seqs;
+    my @order;
+    while( defined ($_ = $self->_readline ) ) {
+	next if( /^\s+$/);
+	if( !defined $aln ) {
+	    $aln = new Bio::SimpleAlign;
+	}
+	my ($id,$s) = split;
+	push @order, $id if( ! defined $seqs{$id});
+	$seqs{$id} .= $s;
+    }
+    foreach my $id ( @order) {
+	my $seq = new Bio::LocatableSeq(-seq => $seqs{$id},
+					-id  => $id,
+					-start => 1,
+					-end   => length($seqs{$id}));
+	$aln->add_seq($seq);
+    }
+    return $aln;
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the NCBI psi-format object (.aln) into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Align::AlignI object
+
+L<Bio::Align::AlignI>
+
+=cut
+
+sub write_aln {
+	my ($self,$aln) = @_;
+	unless( defined $aln && ref($aln) && 
+			  $aln->isa('Bio::Align::AlignI') ) {
+		$self->warn("Must provide a valid Bio::Align::AlignI to write_aln");
+		return 0;
+	}
+	my $ct = 0;
+	my @seqs = $aln->each_seq;
+	my $len = 1;
+	my $alnlen = $aln->length;
+	my $idlen = $IdLength;
+	my @ids = map { substr($_->display_id,0,$idlen) } @seqs;
+	while( $len < ($alnlen + 1) ) {
+		my $start = $len;
+		my $end   = $len + $BlockLen;
+		$end = $alnlen if ( $end > $alnlen ); 
+		my $c = 0;
+		foreach my $seq ( @seqs ) {
+			$self->_print(sprintf("%-".$idlen."s %s\n",
+										 $ids[$c++],
+										 $seq->subseq($start,$end)));
+		}
+		$self->_print("\n");
+		$len += $BlockLen+1;
+	}
+	$self->flush if $self->_flush_on_write && defined $self->_fh;
+	return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/selex.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/selex.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/selex.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# $Id: selex.pm,v 1.14.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO::selex
+
+#   based on the Bio::SeqIO::selex module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::selex - selex sequence input/output stream
+
+=head1 SYNOPSIS
+
+  # Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+  use Bio::AlignIO;
+  use strict;
+
+  my $in = Bio::AlignIO->new(-format => 'selex',
+                             -file   => 't/data/testaln.selex');
+  while( my $aln = $in->next_aln ) {
+
+  }
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from selex flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::selex;
+use strict;
+
+use base qw(Bio::AlignIO);
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream. Tries to read *all* selex
+          It reads all non whitespace characters in the alignment
+          area. For selexs with weird gaps (eg ~~~) map them by using
+          $al->map_chars('~','-')
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $entry;
+    my ($start,$end,%align,$name,$seqname,%hash, at c2name, %accession,%desc);
+    my $aln =  Bio::SimpleAlign->new(-source => 'selex');
+
+    # in selex format, every non-blank line that does not start
+    # with '#=' is an alignment segment; the '#=' lines are mark up lines.
+    # Of particular interest are the '#=GF <name/st-ed> AC <accession>'
+    # lines, which give accession numbers for each segment
+    while( $entry = $self->_readline) {
+        if( $entry =~ /^\#=GS\s+(\S+)\s+AC\s+(\S+)/ ) {
+	    $accession{ $1 } = $2;
+	    next;
+	} elsif( $entry =~ /^\#=GS\s+(\S+)\s+DE\s+(.+)\s*$/ ) {
+	    $desc{$1} .= $2;
+	} elsif ( $entry =~ /^([^\#]\S+)\s+([A-Za-z\.\-\*]+)\s*/ ) {
+	    my ($name,$seq) = ($1,$2);
+
+	    if( ! defined $align{$name}  ) {
+		push @c2name, $name;
+	    }
+	    $align{$name} .= $seq;
+	}
+    }
+    # ok... now we can make the sequences
+
+    foreach my $name ( @c2name ) {
+
+	if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
+	    $seqname = $1;
+	    $start = $2;
+	    $end = $3;
+	} else {
+	    $seqname=$name;
+	    $start = 1;
+	    $end = length($align{$name});
+	}
+	my $seq = new Bio::LocatableSeq
+	    ('-seq'              => $align{$name},
+	     '-display_id'       => $seqname,
+	     '-start'            => $start,
+	     '-end'              => $end,
+	     '-description'      => $desc{$name},
+	     '-accession_number' => $accession{$name},
+	     );
+
+	$aln->add_seq($seq);
+    }
+
+#  If $end <= 0, we have either reached the end of
+#  file in <> or we have encountered some other error
+#
+    return if ($end <= 0);
+    return $aln;
+}
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in selex format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+
+=cut
+
+sub write_aln {
+    my ($self, at aln) = @_;
+    my ($namestr,$seq,$add);
+    my ($maxn);
+    foreach my $aln (@aln) {
+	$maxn = $aln->maxdisplayname_length();
+	foreach $seq ( $aln->each_seq() ) {
+	    $namestr = $aln->displayname($seq->get_nse());
+	    $add = $maxn - length($namestr) + 2;
+	    $namestr .= " " x $add;
+	    $self->_print (sprintf("%s  %s\n",$namestr,$seq->seq())) or return;
+	}
+    }
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/stockholm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/stockholm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO/stockholm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,593 @@
+# $Id: stockholm.pm,v 1.15.4.6 2006/11/14 15:07:01 cjfields Exp $
+#
+# BioPerl module for Bio::AlignIO::stockholm
+
+#   Based on the Bio::SeqIO::stockholm module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+#       and the SimpleAlign.pm module of Ewan Birney
+#
+# Copyright Peter Schattner, Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# September 5, 2000
+# November 6, 2006 - completely refactor read_aln(), add write_aln()
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO::stockholm - stockholm sequence input/output stream
+
+=head1 SYNOPSIS
+
+  # Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
+
+  use Bio::AlignIO;
+  use strict;
+
+  my $in = Bio::AlignIO->new(-format => 'stockholm',
+                             -file   => 't/data/testaln.stockholm');
+  while( my $aln = $in->next_aln ) {
+
+  }
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Align::AlignI> objects to and from
+stockholm flat file databases.  This has been completely refactored
+from the original stockholm parser to handle annotation data and now
+includes a write_aln() method for (almost) complete stockholm
+format output.
+
+Stockholm alignment records normally contain additional sequence-based
+and alignment-based annotation
+
+  GF Lines (alignment feature/annotation):
+  #=GF <featurename> <Generic per-file annotation, free text>
+  Placed above the alignment
+  
+  GC Lines (Alignment consensus)
+  #=GC <featurename> <Generic per-column annotation, exactly 1
+       character per column>
+  Placed below the alignment
+
+  GS Lines (Sequence annotations)
+  #=GS <seqname> <featurename> <Generic per-sequence annotation, free
+       text>
+
+  GR Lines (Sequence meta data)
+  #=GR <seqname> <featurename> <Generic per-sequence AND per-column
+       mark up, exactly 1 character per column>
+
+Currently, sequence annotations (those designated with GS tags) are
+parsed only for accession numbers and descriptions.  It is intended that
+full parsing will be added at some point in the near future along with
+a builder option for optionally parsing alignment annotation and meta data.
+
+The following methods/tags are currently used for storing and writing
+the alignment annotation data.
+
+    Tag        SimpleAlign
+                 Method  
+    ----------------------------------------------------------------------
+     AC        accession  
+     ID        id  
+     DE        description
+    ----------------------------------------------------------------------
+    
+    Tag        Bio::Annotation   TagName                    Parameters
+               Class
+    ----------------------------------------------------------------------
+     AU        SimpleValue       record_authors             value
+     SE        SimpleValue       seed_source                value
+     GA        SimpleValue       gathering_threshold        value
+     NC        SimpleValue       noise_cutoff               value
+     TC        SimpleValue       trusted_cutoff             value
+     TP        SimpleValue       entry_type                 value
+     SQ        SimpleValue       num_sequences              value
+     PI        SimpleValue       previous_ids               value
+     DC        Comment           database_comment           comment
+     CC        Comment           alignment_comment          comment
+     DR        DBLink            aln_dblink                 database
+                                                            primary_id
+                                                            comment
+     AM        SimpleValue       build_method               value
+     NE        SimpleValue       pfam_family_accession      value
+     NL        SimpleValue       sequence_start_stop        value
+     SS        SimpleValue       sec_structure_source       value
+     BM        SimpleValue       build_model                value
+     RN        Reference         reference                  *
+     RC        Reference         reference                  comment
+     RM        Reference         reference                  pubmed
+     RT        Reference         reference                  title
+     RA        Reference         reference                  authors
+     RL        Reference         reference                  location
+    ----------------------------------------------------------------------
+  * RN is generated based on the number of Bio::Annotation::Reference objects
+
+=head1 FEEDBACK
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Chris Fields, Peter Schattner
+
+Email: cjfields-at-uiuc-dot-edu, schattner at alum.mit.edu 
+
+=head1 CONTRIBUTORS
+
+Andreas Kahari, ak-at-ebi.ac.uk
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::AlignIO::stockholm;
+use strict;
+
+use Bio::Seq::Meta;
+use Bio::Annotation::AnnotationFactory;
+use Data::Dumper;
+use Text::Wrap qw(wrap);
+
+use base qw(Bio::AlignIO);
+
+our $STKVERSION = 'STOCKHOLM 1.0';
+
+# This maps the two-letter annotation key to a Annotation/parameter/tagname
+# combination.  Some data is stored using get/set methods ('Methods')  The rest 
+# is mapped to Annotation objects using the parameter for the parsed data
+# and the tagname for, well, the Annotation tagname.  A few are treated differently
+# based on the type of data stored (Reference data in particular).
+
+our %READMAP = (
+            'AC'   => 'Method/accession', 
+            'ID'   => 'Method/id', 
+            'DE'   => 'Method/description',
+            'AU'   => 'SimpleValue/-value/record_authors',
+            'SE'   => 'SimpleValue/-value/seed_source', 
+            'GA'   => 'SimpleValue/-value/gathering_threshold',
+            'NC'   => 'SimpleValue/-value/noise_cutoff', 
+            'TC'   => 'SimpleValue/-value/trusted_cutoff', 
+            'TP'   => 'SimpleValue/-value/entry_type', 
+            'SQ'   => 'SimpleValue/-value/num_sequences', 
+            'PI'   => 'SimpleValue/-value/previous_ids', 
+            'DC'   => 'Comment/-text/database_comment',
+            'CC'   => 'Comment/-text/alignment_comment',
+            # DBLink, treated differently
+            'DR'   => 'DBLink/-value/aln_dblink',
+            # Pfam-specific
+            'AM'   => 'SimpleValue/-value/build_method', 
+            'NE'   => 'SimpleValue/-value/pfam_family_accession',
+            'NL'   => 'SimpleValue/-value/sequence_start_stop',
+            # Rfam-specific GF lines
+            'SS'   => 'SimpleValue/-value/sec_structure_source',
+            # Reference objects mapped differently
+            'RN'   => '-number',  # reference number is dumped
+            'RC'   => '-comment',
+            'RM'   => '-pubmed', 
+            'RT'   => '-title', 
+            'RA'   => '-authors',
+            'RL'   => '-location',
+            # Build model mapped differently
+            'BM'   => '-value',            
+            );
+
+# this is the order that annotations are written
+our @WRITEORDER = qw(accession
+  id
+  description
+  previous_ids  
+  record_authors
+  seed_source
+  sec_structure_source
+  gathering_threshold
+  trusted_cutoff 
+  noise_cutoff
+  entry_type
+  build_command
+  build_method
+  pfam_family_accession
+  seq_start_stop
+  reference
+  database_comment
+  custom
+  aln_dblink
+  alignment_comment
+  num_sequences
+  );
+
+# This maps the tagname back to a tagname-annotation value combination.
+# Some data is stored using get/set methods ('Methods'), others
+# are mapped b/c of more complex annotation types.
+
+our %WRITEMAP = (
+            'accession'             =>  'AC/Method',
+            'id'                    =>  'ID/Method',
+            'description'           =>  'DE/Method',
+            'record_authors'        =>  'AU/SimpleValue',
+            'seed_source'           =>  'SE/SimpleValue',
+            'build_command'         =>  'BM/SimpleValue',
+            'gathering_threshold'   =>  'GA/SimpleValue',
+            'noise_cutoff'          =>  'NC/SimpleValue',
+            'trusted_cutoff'        =>  'TC/SimpleValue',
+            'entry_type'            =>  'TP/SimpleValue',
+            'num_sequences'         =>  'SQ/SimpleValue',
+            'previous_ids'          =>  'PI/SimpleValue',
+            'database_comment'      =>  'DC/SimpleValue',
+            'aln_dblink'            =>  'DR/DBLink',
+            'reference'             =>  'RX/Reference',
+            'ref_number'            =>  'RN/number',
+            'ref_comment'           =>  'RC/comment',
+            'ref_pubmed'            =>  'RM/pubmed',
+            'ref_title'             =>  'RT/title',
+            'ref_authors'           =>  'RA/authors',
+            'ref_location'          =>  'RL/location',
+            'alignment_comment'     =>  'CC/Comment',
+            #Pfam-specific 
+            'build_method'          =>  'AM/SimpleValue',
+            'pfam_family_accession' =>  'NE/SimpleValue',
+            'seq_start_stop'        =>  'NL/SimpleValue',
+            # Rfam-specific GF lines
+            'sec_structure_source'  =>  'SS/SimpleValue',
+            # custom
+            'custom'                =>  'XX/SimpleValue'
+            );
+
+sub _initialize {
+    my ( $self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    # add arguments to handle build object, interleaved format
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = $stream->next_aln()
+ Function: returns the next alignment in the stream.
+ Returns : L<Bio::Align::AlignI> object
+ Args    : NONE
+
+=cut
+
+sub next_aln {
+    my $self = shift;
+    my $line;
+
+    my ($start, $end, $id, $name, $seqname, $seq, $count, $tag, $data);
+    my $seen_rc;
+    my ($refct, $bct, $lnkct) = (0,0,0);
+    my @c2name;
+    my (%align, %accession, %desc, %seq_meta, %aln_meta, %annotation);
+
+    # in stockholm format, every non-blank line that does not start
+    # with '#=' is an alignment segment; the '#=' lines are mark up lines.
+    # Of particular interest are the '#=GF <name/st-ed> AC <accession>'
+    # lines, which give accession numbers for each segment
+
+    my $aln =  Bio::SimpleAlign->new(-source => 'stockholm');
+    while( defined($line = $self->_readline) ) {
+        next unless $line =~ /\w+/;
+        if ($line =~ /^#\s*STOCKHOLM\s+/) {
+            last;
+        } else {
+            $self->throw("Not Stockholm format: Expecting \"# STOCKHOLM 1.0\"; Found \"$_\"");
+        }
+    }
+    
+    READLINE:
+    while( defined($line = $self->_readline) ) {
+        #skip empty lines
+        next if $line =~ /^\s+$/;
+        
+        # Double slash (//) signals end of file.
+        last if $line =~ m{^//};
+        
+        # GF/GS lines, by convention, should be at the top of the alignment
+        if ($line =~ m{^\#=GF\s+(\S+?)\s+([^\n]*)$}xms) {
+
+            # alignment annotation
+            ($tag, $data) = ($1, $2);
+            if (exists $READMAP{$tag}) {
+
+                # reference data (multi line)
+                if (index($tag, 'R') == 0) {
+                    # comments come before numbering, tricky
+                    $refct++ if ( ($tag eq 'RN' && !$seen_rc) || $tag eq 'RC');
+                    $seen_rc = 1 if $tag eq 'RC';
+                    # Don't need
+                    next READLINE if $tag eq 'RN';
+                    #                           # of ref       parameter     
+                    $annotation{ 'reference' }->[$refct-1]->{ $READMAP{$tag} } .= $data.' ';
+
+                # Build commands (single line)
+                } elsif ($tag eq 'BM') {
+                    #                            # build cmd    parameter     
+                    $annotation{ 'build_command' }->[$bct]->{ $READMAP{$tag} } = $data;
+                    $bct++;
+                    
+                # DBLinks (single line)
+                } elsif ($tag eq 'DR') {
+                    my ($dbase, $uid, $extra) = split /\s*;\s*/ , $data, 3;
+                    my $ref;
+                    $ref->{'-database'} = $dbase;
+                    $ref->{'-primary_id'} = ($dbase eq 'URL') ? $uid : uc $uid;
+                    $ref->{'-comment'} = $extra if $extra;
+                    #                       # dblink       parameter list    
+                    $annotation{ 'aln_dblink' }->[$lnkct] = $ref;
+                    $lnkct++;
+                    
+                # Everything else (single and multi line)
+                } else {
+                    #       # param/-value/tagname 
+                    $annotation{ $READMAP{$tag} } .= $data.' ';
+                }
+
+            } else {
+                # unknown or custom data treated with simplevalue objects
+                #$self->debug("Unknown tag: $tag:\t$data\n");
+                $annotation{ 'custom' }->{ $tag } .= $data.' ';
+            }
+
+        } elsif( $line =~ m{^\#=GS\s+(\S+)\s+(\w{2})\s+(\S+)}xms ) {
+            # sequence annotation and data
+            ($id, $tag, $data) = ($1, $2, $3);
+            if ($tag eq 'AC') {
+                $accession{$id} .= $data;
+            } elsif ($tag eq 'DE') {
+                $desc{$id} .= $data;
+            }
+            # Bio::Seq::Meta is not AnnotationI, so can't add seq-based
+            # Annotations yet; uncomment to see what is passed by
+            #else {
+            #    $self->debug("Missed data: $entry");
+            #}
+        } elsif( $line =~ m{^\#=GR\s+(\S+)\s+(\S+)\s+([^\n]+)} ) {
+            # meta strings per sequence
+            ($name, $tag, $data) = ($1, $2, $3);
+            $seq_meta{$name}->{$tag} .= $data;
+        } elsif( $line =~ m{^\#=GC\s+(\S+)\s+([^\n]+)}xms ) {
+            # meta strings per alignment
+            ($tag, $data) = ($1, $2);
+            $aln_meta{$tag} .= $data;
+        } elsif( $line =~ m{^([^\#]\S+)\s+([A-Za-z.\-\*]+)\s*}xms ) {
+            ($name,$seq) = ($1,$2);
+            if( ! exists $align{$name}  ) {
+                push @c2name, $name;
+            }
+            $align{$name} .= $seq;
+        } else {
+            # debugging to catch missed data; uncomment to turn on
+            #$self->debug("Missed Data: $line");
+        }
+    }
+    
+    # ok... now we can make the sequences
+    
+    for my $name ( @c2name ) {
+        if( $name =~ m{(\S+)/(\d+)-(\d+)}xms ) {
+            ($seqname, $start, $end) = ($1, $2, $3);
+        } else {
+            $seqname=$name;
+            $start = 1;
+            $end = length($align{$name});
+        }
+        $seq = Bio::Seq::Meta->new
+            ('-seq'              => $align{$name},
+             '-display_id'       => $seqname,
+             '-start'            => $start,
+             '-end'              => $end,
+             '-description'      => $desc{$name},
+             '-accession_number' => $accession{$name}
+             );
+        if (exists $seq_meta{$name}) {
+            for my $tag (sort keys %{ $seq_meta{$name} }) {
+                $seq->named_meta($tag, $seq_meta{$name}->{$tag});
+            }
+        }
+        $aln->add_seq($seq);
+    }
+    
+    # add meta strings w/o sequence for consensus meta data
+    my $ameta = Bio::Seq::Meta->new();
+    for my $tag (sort keys %aln_meta) {
+        $ameta->named_meta($tag, $aln_meta{$tag});
+    }
+    
+    $aln->consensus_meta($ameta);
+    
+    # Make the annotation collection...
+    
+    my $coll = Bio::Annotation::Collection->new();
+
+    for my $tag (sort keys %annotation) {
+        
+        # most annotations
+        if (!ref($annotation{$tag})) {
+            my ($atype, $aparam, $tagname) = split q(/), $tag;
+            # remove trailing newline, convert internal newlines to spaces
+            $annotation{$tag} =~ s{\s+$}{}g;
+            # split the READTYPE map to determine Annotation type, parameters, etc.
+            if ($atype eq 'Method') {
+                $aln->$aparam($annotation{$tag});
+            } else {
+                my $factory = Bio::Annotation::AnnotationFactory->new(
+                    -type => "Bio::Annotation::$atype");
+                $coll->add_Annotation
+                ($tagname, $factory->create_object($aparam  => $annotation{$tag}));
+            }
+            
+        } elsif ($tag eq 'custom') {
+            my $factory = Bio::Annotation::AnnotationFactory->new(
+                        -type => "Bio::Annotation::SimpleValue");
+            for my $key (sort keys %{ $annotation{$tag} }) {
+                $coll->add_Annotation(
+                    $tag, $factory->create_object(-tagname => $key,
+                                                  -value => $annotation{$tag}->{$key}));
+            }
+        
+        # more complex annotations
+        
+        } else {
+            my $atype = #($tag eq 'custom')          ? 'SimpleValue'   :
+                        ($tag eq 'reference')       ? 'Reference'   :
+                        ($tag eq 'aln_dblink')      ? 'DBLink'   :
+                        ($tag eq 'build_command')   ? 'SimpleValue' :
+                        'BadValue'; # this will cause the factory to choke
+            $self->throw("Bad tag value : $tag.") if $atype eq 'BadValue';
+            my $factory = Bio::Annotation::AnnotationFactory->new(
+                -type => "Bio::Annotation::$atype");                
+            while (my $data = shift @{ $annotation{$tag} }) {
+                next unless $data;
+                # remove trailing spaces for concatenated data
+                my %clean_data = map {
+                    $data->{$_} =~ s{\s+$}{}g;
+                    $_ => $data->{$_};
+                    } keys %{ $data };
+                my $ann = $factory->create_object(%clean_data);
+                $coll->add_Annotation($tag, $ann);
+                $refct++;
+            }
+        }
+    }
+
+    #$self->debug(Dumper($coll));
+
+    # add annotations
+    $aln->annotation($coll); 
+    
+    #  If $end <= 0, we have either reached the end of
+    #  file in <fh> or we have encountered some other error
+    return if ($end <= 0);
+    return $aln;
+}
+
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln(@aln)
+ Function: writes the $aln object into the stream in stockholm format
+ Returns : 1 for success and 0 for error
+ Args    : L<Bio::Align::AlignI> object
+
+=cut
+
+sub write_aln {
+    # enable array of SimpleAlign objects as well (see clustalw write_aln())
+    my ($self, @aln) = @_;
+    for my $aln (@aln) {
+    $self->throw('Need Bio::Align::AlignI object')
+          if (!$aln || !($aln->isa('Bio::Align::AlignI')));
+
+    my @anns;
+    my $coll = $aln->annotation;
+    my ($aln_ann, $seq_ann, $aln_meta, $seq_meta) =
+       ('#=GF ', '#=GS ', '#=GC ', '#=GR' );
+    $self->_print("# $STKVERSION\n\n") or return 0;
+    
+    # annotations first
+    
+    for my $param (@WRITEORDER) {
+        # no point in going through this if there is no annotation!
+        last if !$coll;
+        # alignment annotations
+        my $ct = 1;
+        $self->throw("Bad parameter: $param") if !exists $WRITEMAP{$param};
+        # get the data, act on it based on the tag
+        my ($tag, $key) = split q(/), $WRITEMAP{$param};
+        if ($key eq 'Method') {
+            push @anns, $aln->$param;
+        } else {
+            @anns = $coll->get_Annotations($param);
+        }
+        my $rn = 1;
+        ANNOTATIONS:
+        while (my $ann = shift @anns) {
+            # using Text::Wrap::wrap() for word wrap
+            my ($text, $alntag, $data);
+            if ($tag eq 'RX') {
+                REFS:
+                for my $rkey (qw(ref_comment ref_number ref_pubmed
+                              ref_title ref_authors ref_location)) {
+                    my ($newtag, $method) = split q(/), $WRITEMAP{$rkey};
+                    $alntag = sprintf('%-10s',$aln_ann.$newtag);
+                    if ($rkey eq 'ref_number') {
+                        $data = "[$rn]";
+                    } else {
+                        $data = $ann->$method;
+                    }
+                    next REFS unless $data;
+                    $text = wrap($alntag, $alntag, $data);
+                    $self->_print("$text\n") or return 0;
+                }
+                $rn++;
+                next ANNOTATIONS;
+            } elsif ($tag eq 'XX') { # custom
+                my $newtag = $ann->tagname;
+                $alntag = sprintf('%-10s',$aln_ann.$newtag);
+                $data = $ann;
+            } elsif ($tag eq 'SQ') {
+                # use the actual number, not the stored Annotation data
+                $alntag = sprintf('%-10s',$aln_ann.$tag);
+                $data = $aln->no_sequences;
+            } else {
+                $alntag = sprintf('%-10s',$aln_ann.$tag);
+                $data = $ann;
+            }
+            $text = wrap($alntag, $alntag, $data);
+            $self->_print("$text\n") or return 0;
+        }
+    }
+    
+    $self->_print("\n");
+    
+    # now the sequences...
+    
+    # modified (significantly) from AlignIO::pfam
+    
+    my ($namestr,$seq,$add);
+    
+    # pad extra for meta lines
+    my $maxlen = $aln->maxdisplayname_length() + 5;
+    my $metalen = $aln->max_metaname_length() || 0;
+    
+    for $seq ( $aln->each_seq() ) {
+        $namestr = $aln->displayname($seq->get_nse());
+        $self->_print(sprintf("%-*s  %s\n",$maxlen+$metalen, $namestr, $seq->seq())) or return 0;
+        if ($seq->isa('Bio::Seq::MetaI')) {
+            for my $mname ($seq->meta_names) {
+                 $self->_print(sprintf("%-*s%*s  %s\n",$maxlen, $seq_meta.' '.$namestr, $metalen,
+                                       $mname, $seq->named_meta($mname))) or return 0;
+            }
+        }
+    }
+    # alignment consensus
+    my $ameta = $aln->consensus_meta;
+    if ($ameta) {
+        for my $mname ($ameta->meta_names) {
+            $self->_print(sprintf("%-*s%*s  %s\n",$maxlen, $aln_meta, $metalen,
+                                  $mname, $ameta->named_meta($mname))) or return 0; 
+        }
+    }
+    $self->_print("//\n") or return 0;
+    }
+    $self->flush() if $self->_flush_on_write && defined $self->_fh;
+    
+    return 1;
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AlignIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,512 @@
+# $Id: AlignIO.pm,v 1.46.4.4 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module for Bio::AlignIO
+#
+#	based on the Bio::SeqIO module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+#
+# History
+# September, 2000 AlignIO written by Peter Schattner
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AlignIO - Handler for AlignIO Formats
+
+=head1 SYNOPSIS
+
+    use Bio::AlignIO;
+
+    $inputfilename = "testaln.fasta";
+    $in  = Bio::AlignIO->new(-file   => $inputfilename ,
+                             -format => 'fasta');
+    $out = Bio::AlignIO->new(-file   => ">out.aln.pfam" ,
+                             -format => 'pfam');
+
+    while ( my $aln = $in->next_aln() ) {
+        $out->write_aln($aln);
+    }
+
+    # OR
+
+    use Bio::AlignIO;
+
+    open MYIN,"testaln.fasta";
+    $in  = Bio::AlignIO->newFh(-fh     => \*MYIN,
+                               -format => 'fasta');
+    open my $MYOUT, '>', 'testaln.pfam';
+    $out = Bio::AlignIO->newFh(-fh     =>  $MYOUT,
+                               -format => 'pfam');
+
+    # World's smallest Fasta<->pfam format converter:
+    print $out $_ while <$in>;
+
+=head1 DESCRIPTION
+
+L<Bio::AlignIO> is a handler module for the formats in the AlignIO set,
+for example, L<Bio::AlignIO::fasta>. It is the officially sanctioned way 
+of getting at the alignment objects. The resulting alignment is a
+L<Bio::Align::AlignI>-compliant object. 
+
+The idea is that you request an object for a particular format.
+All the objects have a notion of an internal file that is read
+from or written to. A particular AlignIO object instance is configured
+for either input or output, you can think of it as a stream object.
+
+Each object has functions:
+
+   $stream->next_aln();
+
+And:
+
+   $stream->write_aln($aln);
+
+Also:
+
+   $stream->type() # returns 'INPUT' or 'OUTPUT'
+
+As an added bonus, you can recover a filehandle that is tied to the
+AlignIO object, allowing you to use the standard E<lt>E<gt> and print
+operations to read and write alignment objects:
+
+    use Bio::AlignIO;
+
+    # read from standard input
+    $stream = Bio::AlignIO->newFh(-format => 'Fasta');
+
+    while ( $aln = <$stream> ) {
+	     # do something with $aln
+    }
+
+And:
+
+    print $stream $aln; # when stream is in output mode
+
+L<Bio::AlignIO> is patterned on the L<Bio::SeqIO> module and shares
+most of its features.  One significant difference is that
+L<Bio::AlignIO> usually handles IO for only a single alignment at a time,
+whereas L<Bio::SeqIO> handles IO for multiple sequences in a single stream.  
+The principal reason for this is that whereas simultaneously handling
+multiple sequences is a common requirement, simultaneous handling of
+multiple alignments is not. The only current exception is format
+C<bl2seq> which parses results of the BLAST C<bl2seq> program and which
+may produce several alignment pairs.  This set of alignment pairs can
+be read using multiple calls to L<next_aln()>.
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::AlignIO-E<gt>new()
+
+   $seqIO = Bio::AlignIO->new(-file => 'filename',   -format=>$format);
+   $seqIO = Bio::AlignIO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $seqIO = Bio::AlignIO->new(-format => $format);
+   $seqIO = Bio::AlignIO->new(-fh => \*STDOUT, -format => $format);
+
+The L<new()> class method constructs a new L<Bio::AlignIO> object.  
+The returned object can be used to retrieve or print alignment
+objects. L<new()> accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $seqIO = Bio::AlignIO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   bl2seq      Bl2seq Blast output
+   clustalw    clustalw (.aln) format
+   emboss      EMBOSS water and needle format
+   fasta       FASTA format
+   maf         Multiple Alignment Format
+   mase        mase (seaview) format
+   mega        MEGA format
+   meme        MEME format
+   msf         msf (GCG) format
+   nexus       Swofford et al NEXUS format
+   pfam        Pfam sequence alignment format
+   phylip      Felsenstein PHYLIP format
+   prodom      prodom (protein domain) format
+   psi         PSI-BLAST format
+   selex       selex (hmmer) format
+   stockholm   stockholm format
+
+Currently only those formats which were implemented in L<Bio::SimpleAlign>
+have been incorporated into L<Bio::AlignIO>.  Specifically, C<mase>, C<stockholm>
+and C<prodom> have only been implemented for input. See the specific module
+(e.g. L<Bio::AlignIO::prodom>) for notes on supported versions.
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename suffix.  If this is unsuccessful,
+C<fasta> format is assumed.
+
+The format name is case insensitive; C<FASTA>, C<Fasta> and C<fasta> are
+all treated equivalently.
+
+=back
+
+=head2 Bio::AlignIO-E<gt>newFh()
+
+   $fh = Bio::AlignIO->newFh(-fh   => \*FILEHANDLE, -format=>$format);
+   # read from STDIN or use @ARGV:
+   $fh = Bio::AlignIO->newFh(-format => $format);
+
+This constructor behaves like L<new()>, but returns a tied filehandle
+rather than a L<Bio::AlignIO> object.  You can read sequences from this
+object using the familiar E<lt>E<gt> operator, and write to it using
+L<print()>. The usual array and $_ semantics work.  For example, you can
+read all sequence objects into an array like this:
+
+  @sequences = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and printf()
+are not supported.
+
+=over 1
+
+=item -flush
+
+By default, all files (or filehandles) opened for writing alignments
+will be flushed after each write_aln() making the file immediately
+usable.  If you do not need this facility and would like to marginally
+improve the efficiency of writing multiple sequences to the same file
+(or filehandle), pass the -flush option '0' or any other value that
+evaluates as defined but false:
+
+  my $clustal = Bio::AlignIO->new( -file   => "<prot.aln",
+                                   -format => "clustalw" );
+  my $msf = Bio::AlignIO->new(-file   => ">prot.msf",
+                              -format => "msf",
+                              -flush  => 0 ); # go as fast as we can!
+  while($seq = $clustal->next_aln) { $msf->write_aln($seq) }
+
+=back
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $alignment = $AlignIO-E<gt>next_aln()
+
+Fetch an alignment from a formatted file.
+
+=head2 $AlignIO-E<gt>write_aln($aln)
+
+Write the specified alignment to a file..
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# 'Let the code begin...
+
+package Bio::AlignIO;
+
+use strict;
+
+use Bio::Seq;
+use Bio::LocatableSeq;
+use Bio::SimpleAlign;
+use Bio::Tools::GuessSeqFormat;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::AlignIO->new(-file => $filename,
+                                       -format => 'Format')
+ Function: Returns a new seqstream
+ Returns : A Bio::AlignIO::Handler initialised with
+           the appropriate format
+ Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+           -displayname_flat => 1 [optional]
+                                to force the displayname to not show start/end
+                                information
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::AlignIO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);
+	$self->_initialize(@args);
+	return $self;
+    } else {
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{-file} || $ARGV[0] );
+        unless ($format) {
+            if ($param{-file}) {
+                $format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
+            }
+            elsif ($param{-fh}) {
+                $format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
+            }
+        }
+	$format = "\L$format";	# normalize capitalization to lower case
+        $class->throw("Unknown format given or could not determine it [$format]")
+            unless $format;
+
+	return unless( $class->_load_format_module($format) );
+	return "Bio::AlignIO::$format"->new(@args);
+    }
+}
+
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to the Bio::AlignIO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to the Bio::AlignIO::Fh class
+ Args    :
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+# _initialize is where the heavy stuff will happen when new is called
+
+sub _initialize {
+  my($self, at args) = @_;
+  my ($flat) = $self->_rearrange([qw(DISPLAYNAME_FLAT)],
+				 @args);
+  $self->force_displayname_flat($flat) if defined $flat;
+  $self->_initialize_io(@args);
+  1;
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL AlignIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::AlignIO::" . $format;
+  my $ok;
+
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the AlignIO system please see the AlignIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+=head2 next_aln
+
+ Title   : next_aln
+ Usage   : $aln = stream->next_aln
+ Function: reads the next $aln object from the stream
+ Returns : a Bio::Align::AlignI compliant object
+ Args    :
+
+=cut
+
+sub next_aln {
+   my ($self,$aln) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::AlignIO object.");
+}
+
+=head2 write_aln
+
+ Title   : write_aln
+ Usage   : $stream->write_aln($aln)
+ Function: writes the $aln object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+=cut
+
+sub write_aln {
+    my ($self,$aln) = @_;
+    $self->throw("Sorry, you cannot write to a generic Bio::AlignIO object.");
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+my $class = shift;
+   return unless $_ = shift;
+   return 'clustalw' if /\.aln$/i;
+   return 'emboss'   if /\.(water|needle)$/i;
+   return 'metafasta'if /\.metafasta$/;
+   return 'fasta'    if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
+   return 'maf'      if /\.maf/i;
+   return 'mega'     if /\.(meg|mega)$/i;
+   return 'meme'     if /\.meme$/i;
+   return 'msf'      if /\.(msf|pileup|gcg)$/i;
+   return 'nexus'    if /\.(nexus|nex)$/i;
+   return 'pfam'     if /\.(pfam|pfm)$/i;
+   return 'phylip'   if /\.(phylip|phlp|phyl|phy|ph)$/i;
+   return 'psi'      if /\.psi$/i;
+   return 'selex'    if /\.(selex|slx|selx|slex|sx)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {'alignio' => shift},$class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'alignio'}->next_aln() unless wantarray;
+  my (@list,$obj);
+  push @list,$obj  while $obj = $self->{'alignio'}->next_aln();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'alignio'}->write_aln(@_);
+}
+
+
+=head2 force_displayname_flat
+
+ Title   : force_displayname_flat
+ Usage   : $obj->force_displayname_flat($newval)
+ Function:
+ Example :
+ Returns : value of force_displayname_flat (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub force_displayname_flat{
+    my $self = shift;
+    return $self->{'_force_displayname_flat'} = shift if @_;
+    return $self->{'_force_displayname_flat'} || 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,811 @@
+# $Id: AnalysisI.pm,v 1.10.4.1 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module for Bio::AnalysisI
+#
+# Cared for by Martin Senger <martin.senger at gmail.com>
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AnalysisI - An interface to any (local or remote) analysis tool
+
+=head1 SYNOPSIS
+
+This is an interface module - you do not instantiate it.
+Use C<Bio::Tools::Run::Analysis> module:
+
+  use Bio::Tools::Run::Analysis;
+  my $tool = new Bio::Tools::Run::Analysis (@args);
+
+=head1 DESCRIPTION
+
+This interface contains all public methods for accessing and
+controlling local and remote analysis tools. It is meant to be used on
+the client side.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003, Martin Senger and EMBL-EBI.
+All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+L<http://www.ebi.ac.uk/soaplab/Perl_Client.html>.
+
+=head1 APPENDIX
+
+This is actually the main documentation...
+
+If you try to call any of these methods directly on this
+C<Bio::AnalysisI> object you will get a I<not implemented> error
+message. You need to call them on a C<Bio::Tools::Run::Analysis> object instead.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::AnalysisI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+# -----------------------------------------------------------------------------
+
+=head2 analysis_name
+
+ Usage   : $tool->analysis_name;
+ Returns : a name of this analysis
+ Args    : none
+
+=cut
+
+sub analysis_name { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 analysis_spec
+
+ Usage   : $tool->analysis_spec;
+ Returns : a hash reference describing this analysis
+ Args    : none
+
+The returned hash reference uses the following keys (not all of them always
+present, perhaps others present as well): C<name>, C<type>, C<version>,
+C<supplier>, C<installation>, C<description>.
+
+Here is an example output:
+
+  Analysis 'edit.seqret':
+        installation => EMBL-EBI
+        description => Reads and writes (returns) sequences
+        supplier => EMBOSS
+        version => 2.6.0
+        type => edit
+        name => seqret
+
+=cut
+
+sub analysis_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 describe
+
+ Usage   : $tool->analysis_spec;
+ Returns : an XML detailed description of this analysis
+ Args    : none
+
+The returned XML string contains metadata describing this analysis
+service. It includes also metadata returned (and easier used) by
+method C<analysis_spec>, C<input_spec> and C<result_spec>.
+
+The DTD used for returned metadata is based on the adopted standard
+(BSA specification for analysis engine):
+
+  <!ELEMENT DsLSRAnalysis (analysis)+>
+
+  <!ELEMENT analysis (description?, input*, output*, extension?)>
+
+  <!ATTLIST analysis
+      type          CDATA #REQUIRED
+      name          CDATA #IMPLIED
+      version       CDATA #IMPLIED
+      supplier      CDATA #IMPLIED
+      installation  CDATA #IMPLIED>
+
+  <!ELEMENT description ANY>
+  <!ELEMENT extension ANY>
+
+  <!ELEMENT input (default?, allowed*, extension?)>
+
+  <!ATTLIST input
+      type          CDATA #REQUIRED
+      name          CDATA #REQUIRED
+      mandatory     (true|false) "false">
+
+  <!ELEMENT default (#PCDATA)>
+  <!ELEMENT allowed (#PCDATA)>
+
+  <!ELEMENT output (extension?)>
+
+  <!ATTLIST output
+      type          CDATA #REQUIRED
+      name          CDATA #REQUIRED>
+
+But the DTD may be extended by provider-specific metadata. For
+example, the EBI experimental SOAP-based service on top of EMBOSS uses
+DTD explained at C<http://www.ebi.ac.uk/~senger/applab>.
+
+=cut
+
+sub describe { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 input_spec
+
+ Usage   : $tool->input_spec;
+ Returns : an array reference with hashes as elements
+ Args    : none
+
+The analysis input data are named, and can be also associated with a
+default value, with allowed values and with few other attributes. The
+names are important for feeding the service with the input data (the
+inputs are given to methods C<create_job>, C<run>, and/or C<wait_for>
+as name/value pairs).
+
+Here is a (slightly shortened) example of an input specification:
+
+ $input_spec = [
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'sequence_usa'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'sequence_direct_data'
+          },
+          {
+            'mandatory' => 'false',
+            'allowed_values' => [
+                                  'gcg',
+                                  'gcg8',
+                                  ...
+                                  'raw'
+                                ],
+            'type' => 'String',
+            'name' => 'sformat'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'sbegin'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'send'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'sprotein'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'snucleotide'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'sreverse'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'slower'
+          },
+          {
+            'mandatory' => 'false',
+            'type' => 'String',
+            'name' => 'supper'
+          },
+          {
+            'mandatory' => 'false',
+            'default' => 'false',
+            'type' => 'String',
+            'name' => 'firstonly'
+          },
+          {
+            'mandatory' => 'false',
+            'default' => 'fasta',
+            'allowed_values' => [
+                                  'gcg',
+                                  'gcg8',
+                                  'embl',
+                                  ...
+                                  'raw'
+                                ],
+            'type' => 'String',
+            'name' => 'osformat'
+          }
+        ];
+
+=cut
+
+sub input_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 result_spec
+
+ Usage   : $tool->result_spec;
+ Returns : a hash reference with result names as keys
+           and result types as values
+ Args    : none
+
+The analysis results are named and can be retrieved using their names
+by methods C<results> and C<result>.
+
+Here is an example of the result specification (again for the service
+I<edit.seqret>):
+
+  $result_spec = {
+          'outseq' => 'String',
+          'report' => 'String',
+          'detailed_status' => 'String'
+        };
+
+=cut
+
+sub result_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 create_job
+
+ Usage   : $tool->create_job ( {'sequence'=>'tatat'} )
+ Returns : Bio::Tools::Run::Analysis::Job
+ Args    : data and parameters for this execution
+           (in various formats)
+
+Create an object representing a single execution of this analysis
+tool.
+
+Call this method if you wish to "stage the scene" - to create a job
+with all input data but without actually running it. This method is
+called automatically from other methods (C<run> and C<wait_for>) so
+usually you do not need to call it directly.
+
+The input data and prameters for this execution can be specified in
+various ways:
+
+=over
+
+=item array reference
+
+The array has scalar elements of the form
+
+   name = [[@]value]
+
+where C<name> is the name of an input data or input parameter (see
+method C<input_spec> for finding what names are recognized by this
+analysis) and C<value> is a value for this data/parameter. If C<value>
+is missing a 1 is assumed (which is convenient for the boolean
+options). If C<value> starts with C<@> it is treated as a local
+filename, and its contents is used as the data/parameter value.
+
+=item hash reference
+
+The same as with the array reference but now there is no need to use
+an equal sign. The hash keys are input names and hash values their
+data. The values can again start with a C<@> sign indicating a local
+filename.
+
+=item scalar
+
+In this case, the parameter represents a job ID obtained in some
+previous invocation - such job already exists on the server side, and
+we are just re-creating it here using the same job ID.
+
+I<TBD: here we should allow the same by using a reference to the
+Bio::Tools::Run::Analysis::Job object.>
+
+=item undef
+
+Finally, if the parameter is undefined, ask server to create an empty
+job. The input data may be added later using C<set_data...>
+method(s) - see scripts/papplmaker.PLS for details.
+
+=back
+
+=cut
+
+sub create_job { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 run
+
+ Usage   : $tool->run ( ['sequence=@my.seq', 'osformat=embl'] )
+ Returns : Bio::Tools::Run::Analysis::Job,
+           representing started job (an execution)
+ Args    : the same as for create_job
+
+Create a job and start it, but do not wait for its completion.
+
+=cut
+
+sub run { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 wait_for
+
+ Usage   : $tool->wait_for ( { 'sequence' => '@my,file' } )
+ Returns : Bio::Tools::Run::Analysis::Job,
+           representing finished job
+ Args    : the same as for create_job
+
+Create a job, start it and wait for its completion.
+
+Note that this is a blocking method. It returns only after the
+executed job finishes, either normally or by an error.
+
+Usually, after this call, you ask for results of the finished job:
+
+    $analysis->wait_for (...)->results;
+
+=cut
+
+sub wait_for { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+#
+#   Bio::AnalysisI::JobI
+#
+# -----------------------------------------------------------------------------
+
+package Bio::AnalysisI::JobI;
+
+=head1 Module Bio::AnalysisI::JobI
+
+An interface to the public methods provided by C<Bio::Tools::Run::Analysis::Job>
+objects.
+
+The C<Bio::Tools::Run::Analysis::Job> objects represent a created,
+running, or finished execution of an analysis tool.
+
+The factory for these objects is module C<Bio::Tools::Run::Analysis>
+where the following methods return an
+C<Bio::Tools::Run::Analysis::Job> object:
+
+    create_job   (returning a prepared job)
+    run          (returning a running job)
+    wait_for     (returning a finished job)
+
+=cut
+
+use strict;
+use base qw(Bio::Root::RootI);
+
+# -----------------------------------------------------------------------------
+
+=head2 id
+
+ Usage   : $job->id;
+ Returns : this job ID
+ Args    : none
+
+Each job (an execution) is identifiable by this unique ID which can be
+used later to re-create the same job (in other words: to re-connect to
+the same job). It is useful in cases when a job takes long time to
+finish and your client program does not want to wait for it within the
+same session.
+
+=cut
+
+sub id { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 run
+
+ Usage   : $job->run
+ Returns : itself
+ Args    : none
+
+It starts previously created job.  The job already must have all input
+data filled-in. This differs from the method of the same name of the
+C<Bio::Tools::Run::Analysis> object where the C<run> method creates
+also a new job allowing to set input data.
+
+=cut
+
+sub run { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 wait_for
+
+ Usage   : $job->wait_for
+ Returns : itself
+ Args    : none
+
+It waits until a previously started execution of this job finishes.
+
+=cut
+
+sub wait_for { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 terminate
+
+ Usage   : $job->terminate
+ Returns : itself
+ Args    : none
+
+Stop the currently running job (represented by this object). This is a
+definitive stop, there is no way to resume it later.
+
+=cut
+
+sub terminate { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 last_event
+
+ Usage   : $job->last_event
+ Returns : an XML string
+ Args    : none
+
+It returns a short XML document showing what happened last with this
+job. This is the used DTD:
+
+   <!-- place for extensions -->
+   <!ENTITY % event_body_template "(state_changed | heartbeat_progress | percent_progress | time_progress | step_progress)">
+
+   <!ELEMENT analysis_event (message?, (%event_body_template;)?)>
+
+   <!ATTLIST analysis_event
+       timestamp  CDATA #IMPLIED>
+
+   <!ELEMENT message (#PCDATA)>
+
+   <!ELEMENT state_changed EMPTY>
+   <!ENTITY % analysis_state "created | running | completed | terminated_by_request | terminated_by_error">
+   <!ATTLIST state_changed
+       previous_state  (%analysis_state;) "created"
+       new_state       (%analysis_state;) "created">
+
+   <!ELEMENT heartbeat_progress EMPTY>
+
+   <!ELEMENT percent_progress EMPTY>
+   <!ATTLIST percent_progress
+       percentage CDATA #REQUIRED>
+
+   <!ELEMENT time_progress EMPTY>
+   <!ATTLIST time_progress
+       remaining CDATA #REQUIRED>
+
+   <!ELEMENT step_progress EMPTY>
+   <!ATTLIST step_progress
+       total_steps      CDATA #IMPLIED
+       steps_completed CDATA #REQUIRED>
+
+Here is an example what is returned after a job was created and
+started, but before it finishes (note that the example uses an
+analysis 'showdb' which does not need any input data):
+
+   use Bio::Tools::Run::Analysis;
+   print new Bio::Tools::Run::Analysis (-name => 'display.showdb')
+             ->run
+	     ->last_event;
+
+It prints:
+
+   <?xml version = "1.0"?>
+   <analysis_event>
+     <message>Mar 3, 2003 5:14:46 PM (Europe/London)</message>
+     <state_changed previous_state="created" new_state="running"/>
+   </analysis_event>
+
+The same example but now after it finishes:
+
+   use Bio::Tools::Run::Analysis;
+   print new Bio::Tools::Run::Analysis (-name => 'display.showdb')
+             ->wait_for
+	     ->last_event;
+
+   <?xml version = "1.0"?>
+   <analysis_event>
+     <message>Mar 3, 2003 5:17:14 PM (Europe/London)</message>
+     <state_changed previous_state="running" new_state="completed"/>
+   </analysis_event>
+
+=cut
+
+sub last_event { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 status
+
+ Usage   : $job->status
+ Returns : string describing the job status
+ Args    : none
+
+It returns one of the following strings (and perhaps more if a server
+implementation extended possible job states):
+
+   CREATED
+   RUNNING
+   COMPLETED
+   TERMINATED_BY_REQUEST
+   TERMINATED_BY_ERROR
+
+=cut
+
+sub status { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 created
+
+ Usage   : $job->created (1)
+ Returns : time when this job was created
+ Args    : optional
+
+Without any argument it returns a time of creation of this job in
+seconds, counting from the beginning of the UNIX epoch
+(1.1.1970). With a true argument it returns a formatted time, using
+rules described in C<Bio::Tools::Run::Analysis::Utils::format_time>.
+
+=cut
+
+sub created { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 started
+
+ Usage   : $job->started (1)
+ Returns : time when this job was started
+ Args    : optional
+
+See C<created>.
+
+=cut
+
+sub started { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 ended
+
+ Usage   : $job->ended (1)
+ Returns : time when this job was terminated
+ Args    : optional
+
+See C<created>.
+
+=cut
+
+sub ended { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 elapsed
+
+ Usage   : $job->elapsed
+ Returns : elapsed time of the execution of the given job
+           (in milliseconds), or 0 of job was not yet started
+ Args    : none
+
+Note that some server implementations cannot count in millisecond - so
+the returned time may be rounded to seconds.
+
+=cut
+
+sub elapsed { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 times
+
+ Usage   : $job->times ('formatted')
+ Returns : a hash refrence with all time characteristics
+ Args    : optional
+
+It is a convenient method returning a hash reference with the folowing
+keys:
+
+   created
+   started
+   ended
+   elapsed
+
+See C<create> for remarks on time formating.
+
+An example - both for unformatted and formatted times:
+
+   use Data::Dumper;
+   use Bio::Tools::Run::Analysis;
+   my $rh = new Bio::Tools::Run::Analysis (-name => 'nucleic_cpg_islands.cpgplot')
+             ->wait_for ( { 'sequence_usa' => 'embl:hsu52852' } )
+	     ->times (1);
+   print Data::Dumper->Dump ( [$rh], ['Times']);
+   $rh = new Bio::Tools::Run::Analysis (-name => 'nucleic_cpg_islands.cpgplot')
+             ->wait_for ( { 'sequence_usa' => 'embl:AL499624' } )
+	     ->times;
+   print Data::Dumper->Dump ( [$rh], ['Times']);
+
+   $Times = {
+           'ended'   => 'Mon Mar  3 17:52:06 2003',
+           'started' => 'Mon Mar  3 17:52:05 2003',
+           'elapsed' => '1000',
+           'created' => 'Mon Mar  3 17:52:05 2003'
+         };
+   $Times = {
+           'ended'   => '1046713961',
+           'started' => '1046713926',
+           'elapsed' => '35000',
+           'created' => '1046713926'
+         };
+
+=cut
+
+sub times { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 results
+
+ Usage   : $job->results (...)
+ Returns : one or more results created by this job
+ Args    : various, see belou
+
+This is a complex method trying to make sense for all kinds of
+results. Especially it tries to help to put binary results (such as
+images) into local files. Generally it deals with fhe following facts:
+
+=over
+
+=item *
+
+Each analysis tool may produce more results.
+
+=item *
+
+Some results may contain binary data not suitable for printing into a
+terminal window.
+
+=item *
+
+Some results may be split into variable number of parts (this is
+mainly true for the image results that can consist of more *.png
+files).
+
+=back
+
+Note also that results have names to distinguish if there are more of
+them. The names can be obtained by method C<result_spec>.
+
+Here are the rules how the method works:
+
+    Retrieving NAMED results:
+    -------------------------
+     results ('name1', ...)   => return results as they are, no storing into files
+
+     results ( { 'name1' => 'filename', ... } )  => store into 'filename', return 'filename'
+     results ( 'name1=filename', ...)            => ditto
+
+     results ( { 'name1' => '-', ... } )         => send result to the STDOUT, do not return anything
+     results ( 'name1=-', ...)                   => ditto
+
+     results ( { 'name1' => '@', ... } )  => store into file whose name is invented by
+                                             this method, perhaps using RESULT_NAME_TEMPLATE env
+     results ( 'name1=@', ...)            => ditto
+
+     results ( { 'name1' => '?', ... } )  => find of what type is this result and then use
+                                             {'name1'=>'@' for binary files, and a regular
+                                             return for non-binary files
+     results ( 'name=?', ...)             => ditto
+
+    Retrieving ALL results:
+    -----------------------
+     results()     => return all results as they are, no storing into files
+
+     results ('@') => return all results, as if each of them given
+                      as {'name' => '@'} (see above)
+
+     results ('?') => return all results, as if each of them given
+                      as {'name' => '?'} (see above)
+
+    Misc:
+    -----
+     * any result can be returned as a scalar value, or as an array reference
+       (the latter is used for results consisting of more parts, such images);
+       this applies regardless whether the returned result is the result itself
+       or a filename created for the result
+
+     * look in the documentation of the C<panalysis[.PLS]> script for examples
+       (especially how to use various templates for inventing file names)
+
+=cut
+
+sub results { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 result
+
+ Usage   : $job->result (...)
+ Returns : the first result
+ Args    : see 'results'
+
+=cut
+
+sub result { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 remove
+
+ Usage   : $job->remove
+ Returns : 1
+ Args    : none
+
+The job object is not actually removed in this time but it is marked
+(setting 1 to C<_destroy_on_exit> attribute) as ready for deletion when
+the client program ends (including a request to server to forget the job
+mirror object on the server side).
+
+=cut
+
+sub remove { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+1;
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisParserI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisParserI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisParserI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,154 @@
+#---------------------------------------------------------------
+# $Id: AnalysisParserI.pm,v 1.12.4.1 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module Bio::AnalysisParserI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Derived from Bio::SeqAnalysisParserI by Jason Stajich, Hilmar Lapp.
+#
+# You may distribute this module under the same terms as perl itself
+#---------------------------------------------------------------
+
+=head1 NAME
+
+Bio::AnalysisParserI - Generic analysis output parser interface
+
+=head1 SYNOPSIS
+
+    # get a AnalysisParserI somehow.
+    # Eventually, there may be an Bio::Factory::AnalysisParserFactory.
+    # For now a SearchIO object, an implementation of AnalysisParserI, can be created 
+    # directly, as in the following:
+    my $parser = Bio::SearchIO->new(
+                                    '-file'   => 'inputfile',
+                                    '-format' => 'blast'); 
+
+    while( my $result = $parser->next_result() ) {
+        print "Result:  ", $result->analysis_method, 
+              ", Query:  ", $result->query_name, "\n";
+
+          while( my $feature = $result->next_feature() ) {
+              print "Feature from ", $feature->start, " to ", 
+                    $feature->end, "\n";
+          }
+    }
+
+=head1 DESCRIPTION
+
+AnalysisParserI is a interface for describing generic analysis
+result parsers. This module makes no assumption about the nature of
+analysis being parsed, only that zero or more result sets can be
+obtained from the input source.
+
+This module was derived from Bio::SeqAnalysisParserI, the differences being
+
+=over 4
+
+=item 1. next_feature() was replaced with next_result().
+
+Instead of flattening a stream containing potentially multiple
+analysis results into a single set of features, AnalysisParserI
+segments the stream in terms of analysis result sets
+(Bio::AnalysisResultI objects). Each AnalysisResultI can then be
+queried for its features (if any) as well as other information
+about the result
+
+=item 2. AnalysisParserI is a pure interface.
+
+It does not inherit from Bio::Root::RootI and does not provide a new()
+method. Implementations are free to choose how to implement it.
+
+=back
+
+=head2 Rationale (copied from Bio::SeqAnalysisParserI)
+
+The concept behind this interface is to have a generic interface in sequence
+annotation pipelines (as used e.g. in high-throughput automated
+sequence annotation). This interface enables plug-and-play for new analysis
+methods and their corresponding parsers without the necessity for modifying
+the core of the annotation pipeline. In this concept the annotation pipeline
+has to rely on only a list of methods for which to process the results, and a
+factory from which it can obtain the corresponding parser implementing this
+interface.
+
+=head2 TODO
+
+Create Bio::Factory::AnalysisParserFactoryI and
+Bio::Factory::AnalysisParserFactory for interface and an implementation.
+Note that this factory could return Bio::SearchIO-derived objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz, Jason Stajich, Hilmar Lapp
+
+Email sac at bioperl.org
+
+Authors of Bio::SeqAnalysisParserI on which this module is based:
+Email jason at bioperl.org 
+Email hlapp at gmx.net
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::AnalysisParserI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : $result = $obj->next_result();
+ Function: Returns the next result available from the input, or
+           undef if there are no more results.
+ Example :
+ Returns : A Bio::Search::Result::ResultI implementing object, 
+           or undef if there are no more results.
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self);
+    $self->throw_not_implemented;
+}
+
+
+1;
+__END__
+
+NOTE (sac): My ten-month old son Russell added the following line.
+It doesn't look like it will compile so I'm putting it here:
+mt6 j7qa

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisResultI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisResultI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnalysisResultI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+#-----------------------------------------------------------------
+# $Id: AnalysisResultI.pm,v 1.10.4.1 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module Bio::AnalysisResultI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Derived from Bio::Tools::AnalysisResult by Hilmar Lapp <hlapp at gmx.net>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AnalysisResultI - Interface for analysis result objects
+
+=head1 SYNOPSIS
+
+Bio::AnalysisResultI defines an interface that must be implemented by
+a subclass. So you cannot create Bio::AnalysisResultI objects,
+only objects that inherit from Bio::AnalysisResultI. 
+
+=head1 DESCRIPTION
+
+The AnalysisResultI module provides an interface for modules
+encapsulating the result of an analysis that was carried out with a
+query sequence and an optional subject dataset.
+
+The notion of an analysis represented by this base class is that of a unary or
+binary operator, taking either one query or a query and a subject and producing
+a result. The query is e.g. a sequence, and a subject is either a sequence,
+too, or a database of sequences. 
+
+This interface defines methods to access analysis result data and does
+not impose any contraints on how the analysis result data is acquired.
+
+Note that this module does not provide support for B<running> an analysis.
+Rather, it is positioned in the subsequent parsing step (concerned with
+turning raw results into BioPerl objects).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz, Hilmar Lapp
+
+Email sac at bioperl.org
+Email hlapp at gmx.net (author of Bio::Tools::AnalysisResult on which this module is based)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::AnalysisResultI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 analysis_query
+
+ Usage     : $query_obj = $result->analysis_query();
+ Purpose   : Get a Bio::PrimarySeqI-compatible object representing the entity 
+             on which the analysis was performed. Lacks sequence information.
+ Argument  : n/a
+ Returns   : A Bio::PrimarySeqI-compatible object without sequence information.
+             The sequence will have display_id, description, moltype, and length data.
+
+=cut
+
+#---------------------
+sub analysis_query {
+#---------------------
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 analysis_subject
+
+ Usage     : $obj = $result->analyis_subject();
+ Purpose   : Get the subject of the analysis against which it was
+             performed. For similarity searches it will probably be a database,
+             and for sequence feature predictions (exons, promoters, etc) it
+             may be a collection of models or homologous sequences that were
+             used, or undefined.
+ Returns   : An object of a type the depends on the implementation
+             May also return undef for analyses that don\'t involve subjects.
+ Argument  : n/a
+ Comments  : Implementation of this method is optional.
+             AnalysisResultI provides a default behavior of returning undef.
+
+=cut
+
+#---------------
+sub analysis_subject { 
+#---------------
+    my ($self) = @_; 
+    return;
+}
+
+=head2 analysis_subject_version
+
+ Usage     : $vers = $result->analyis_subject_version();
+ Purpose   : Get the version string of the subject of the analysis.
+ Returns   : String or undef for analyses that don\'t involve subjects.
+ Argument  : n/a
+ Comments  : Implementation of this method is optional.
+             AnalysisResultI provides a default behavior of returning undef.
+
+=cut
+
+#---------------
+sub analysis_subject_version { 
+#---------------
+    my ($self) = @_; 
+    return;
+}
+
+
+=head2 analysis_date
+
+ Usage     : $date = $result->analysis_date();
+ Purpose   : Get the date on which the analysis was performed.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#---------------------
+sub analysis_date {
+#---------------------
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 analysis_method
+
+ Usage     : $meth = $result->analysis_method();
+ Purpose   : Get the name of the sequence analysis method that was used
+             to produce this result (BLASTP, FASTA, etc.). May also be the
+             actual name of a program.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self) = @_;  
+    $self->throw_not_implemented;
+}
+
+=head2 analysis_method_version
+
+ Usage     : $vers = $result->analysis_method_version();
+ Purpose   : Get the version string of the analysis program.
+           : (e.g., 1.4.9MP, 2.0a19MP-WashU).
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#---------------------
+sub analysis_method_version {
+#---------------------
+    my ($self) = @_; 
+    $self->throw_not_implemented;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none
+
+=cut
+
+#---------------------
+sub next_feature {
+#---------------------
+    my ($self);
+    $self->throw_not_implemented;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnnotatableI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnnotatableI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnnotatableI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,329 @@
+# $Id: AnnotatableI.pm,v 1.12.4.1 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module for Bio::AnnotatableI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AnnotatableI - the base interface an annotatable object must implement
+
+=head1 SYNOPSIS
+
+    use Bio::SeqIO;
+    # get an annotatable object somehow: for example, Bio::SeqI objects
+    # are annotatable
+    my $seqio = Bio::SeqIO->new(-fh => \*STDIN, -format => 'genbank');
+    while (my $seq = $seqio->next_seq()) {
+        # $seq is-a Bio::AnnotatableI, hence:
+        my $ann_coll = $seq->annotation();
+        # $ann_coll is-a Bio::AnnotationCollectionI, hence:
+        my @all_anns = $ann_coll->get_Annotations();
+        # do something with the annotation objects
+    }
+
+=head1 DESCRIPTION
+
+This is the base interface that all annotatable objects must implement. A 
+good example is Bio::Seq which is an AnnotableI object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Hilmar Lapp E<lt>hlapp at gmx.netE<gt>
+ Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::AnnotatableI;
+use strict;
+use Carp;
+
+use Bio::Annotation::Comment;
+use Bio::Annotation::DBLink;
+#use Bio::Annotation::OntologyTerm;
+use Bio::Annotation::Reference;
+use Bio::Annotation::SimpleValue;
+
+our %tagclass = (
+  comment        => 'Bio::Annotation::Comment',
+  dblink         => 'Bio::Annotation::DBLink',
+  description    => 'Bio::Annotation::SimpleValue',
+  gene_name      => 'Bio::Annotation::SimpleValue',
+  ontology_term  => 'Bio::Annotation::OntologyTerm',
+  reference      => 'Bio::Annotation::Reference',
+  __DEFAULT__    => 'Bio::Annotation::SimpleValue',
+);
+
+our %tag2text = (
+  'Bio::Annotation::Comment'        => 'text',
+  'Bio::Annotation::DBLink'         => 'primary_id',
+  'Bio::Annotation::SimpleValue'    => 'value',
+  'Bio::Annotation::SimpleValue'    => 'value',
+  'Bio::Annotation::OntologyTerm'   => 'name',
+  'Bio::Annotation::Reference'      => 'title',
+  __DEFAULT__                       => 'value',
+
+);
+
+use base qw(Bio::Root::RootI);
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($newval)
+ Function: Get the annotation collection for this annotatable object.
+ Example : 
+ Returns : a Bio::AnnotationCollectionI implementing object, or undef
+ Args    : on set, new value (a Bio::AnnotationCollectionI
+           implementing object, optional) (an implementation may not
+           support changing the annotation collection)
+
+See L<Bio::AnnotationCollectionI>
+
+=cut
+
+sub annotation{
+  shift->throw_not_implemented();
+}
+
+
+=head1 "*_tag_*" METHODS
+
+The methods below allow mapping of the old "get_tag_values()"-style
+annotation access to Bio::AnnotationCollectionI.  These need not be
+implemented in a Bio::AnnotationCollectionI compliant class, as they
+are built on top of the methods.
+
+B<DEPRECATED>: DO NOT USE THESE FOR FUTURE DEVELOPMENT.
+
+=cut
+
+=head2 has_tag
+
+ Usage   : $count = $obj->has_tag($tag)
+ Function: returns the number of annotations corresponding to $tag
+ Returns : an integer
+ Args    : tag name
+ Note    : DEPRECATED
+
+Use L</get_Annotations> instead.
+
+=cut
+
+sub has_tag {
+  my ($self,$tag) = @_;
+  #uncomment in 1.6
+  #$self->deprecated('has_tag() is deprecated, use get_Annotations()');
+
+  return scalar($self->annotation->get_Annotations($tag));
+}
+
+=head2 add_tag_value
+
+ Usage   : See add_Annotation
+ Function:
+ Returns : 
+ Args    : DEPRECATED
+
+See L<Bio::AnnotationCollectionI::add_Annotation>
+
+=cut
+
+sub add_tag_value {
+  my ($self,$tag, at vals) = @_;
+
+  #uncomment in 1.6
+  #$self->deprecated('add_tag_value() is deprecated, use add_Annotation()');
+
+  foreach my $val (@vals){
+    my $class = $tagclass{$tag}   || $tagclass{__DEFAULT__};
+    my $slot  = $tag2text{$class};
+
+    my $a = $class->new();
+    $a->$slot($val);
+
+    $self->annotation->add_Annotation($tag,$a);
+  }
+
+  return 1;
+  #return $self->annotation->add_Annotation(@args);
+}
+
+
+=head2 get_Annotations
+
+ Usage   : my $parent   = $obj->get_Annotations('Parent');
+           my @parents = $obj->get_Annotations('Parent');
+ Function: a wrapper around Bio::Annotation::Collection::get_Annotations().
+ Returns : returns annotations as
+           Bio::Annotation::Collection::get_Annotations() does, but
+           additionally returns a single scalar in scalar context
+           instead of list context so that if an annotation tag
+           contains only a single value, you can do:
+
+           $parent = $feature->get_Annotations('Parent');
+
+           instead of:
+
+           ($parent) = ($feature->get_Annotations('Parent'))[0];
+
+           if the 'Parent' tag has multiple values and is called in a
+           scalar context, the number of annotations is returned.
+
+ Args    : an annotation tag name.
+
+=cut
+
+sub get_Annotations {
+    my $self = shift;
+
+    my @annotations = $self->annotation->get_Annotations(@_);
+
+    if(wantarray){
+        return @annotations;
+    } elsif(scalar(@annotations) == 1){
+        return $annotations[0];
+    } else {
+        return scalar(@annotations);
+    }
+}
+
+=head2 get_tag_values
+
+ Usage   : @annotations = $obj->get_tag_values($tag)
+ Function: returns annotations corresponding to $tag
+ Returns : a list of scalars
+ Args    : tag name
+ Note    : DEPRECATED
+
+This method is essentially L</get_Annotations>, use it instead.
+
+=cut
+
+sub get_tag_values {
+    my ($self,$tag) = @_;
+    
+    #uncomment in 1.6
+    #$self->deprecated('get_tag_values() is deprecatedk, use get_Annotations()');
+
+    if(!$tagclass{$tag} && $self->annotation->get_Annotations($tag)){
+        #new tag, haven't seen it yet but it exists.  add to registry
+        my($proto) = $self->annotation->get_Annotations($tag);
+        # we can only register if there's a method known for obtaining the value
+        if (exists($tag2text{ref($proto)})) {
+            $tagclass{$tag} = ref($proto);
+        }
+    }
+
+    my $slot  = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} };
+    
+    return map { $_->$slot } $self->annotation->get_Annotations($tag);
+}
+
+=head2 get_tagset_values
+
+ Usage   : @annotations = $obj->get_tagset_values($tag1,$tag2)
+ Function: returns annotations corresponding to a list of tags.
+           this is a convenience method equivalent to multiple calls
+           to get_tag_values with each tag in the list.
+ Returns : a list of Bio::AnnotationI objects.
+ Args    : a list of tag names
+ Note    : DEPRECATED
+
+See L<Bio::AnnotationCollectionI::get_Annotations>
+
+=cut
+
+sub get_tagset_values {
+  my ($self, at tags) = @_;
+
+  #uncomment in 1.6
+  #$self->deprecated('get_tagset_values() is deprecated, use get_Annotations()');
+
+  my @r = ();
+  foreach my $tag (@tags){
+    my $slot  = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} };
+    push @r, map { $_->$slot } $self->annotation->get_Annotations($tag);
+  }
+  return @r;
+}
+
+=head2 get_all_tags
+
+ Usage   : @tags = $obj->get_all_tags()
+ Function: returns a list of annotation tag names.
+ Returns : a list of tag names
+ Args    : none
+ Note    : DEPRECATED
+
+See L<Bio::AnnotationCollectionI::get_all_annotation_keys>
+
+=cut
+
+sub get_all_tags {
+  my ($self, at args) = @_;
+
+  #uncomment in 1.6
+  #$self->deprecated('get_all_tags() is deprecated, use get_all_annotation_keys()');
+
+  return $self->annotation->get_all_annotation_keys(@args);
+}
+
+=head2 remove_tag
+
+ Usage   : See remove_Annotations().
+ Function:
+ Returns : 
+ Args    : DEPRECATED
+ Note    : Contrary to what the name suggests, this method removes
+           all annotations corresponding to $tag, not just a
+           single anntoation.
+
+See L<Bio::AnnotationCollectionI::remove_Annotations>
+
+=cut
+
+sub remove_tag {
+  my ($self, at args) = @_;
+
+  #uncomment in 1.6
+  #$self->deprecated('remove_tag() is deprecated, use remove_Annotations()');
+
+  return $self->annotation->remove_Annotations(@args);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/AnnotationFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/AnnotationFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/AnnotationFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,249 @@
+# $Id: AnnotationFactory.pm,v 1.6.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::AnnotationFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::AnnotationFactory - Instantiates a new 
+Bio::AnnotationI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Annotation::AnnotationFactory;
+    # 
+    my $factory = new Bio::Annotation::AnnotationFactory(
+                    -type => 'Bio::Annotation::SimpleValue');
+    my $ann = $factory->create_object(-value => 'peroxisome',
+                                      -tagname => 'cellular component');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::AnnotationI> objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 CONTRIBUTORS
+
+This is mostly copy-and-paste with subsequent adaptation from
+Bio::Seq::SeqFactory by Jason Stajich. Most credits should in fact go
+to him.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::AnnotationFactory;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Annotation::AnnotationFactory();
+ Function: Builds a new Bio::Annotation::AnnotationFactory object 
+ Returns : Bio::Annotation::AnnotationFactory
+ Args    : -type => string, name of a L<Bio::AnnotationI> derived class.
+
+If type is not set the module guesses it based on arguments passed to
+method L<create_object>.
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+  
+    my ($type) = $self->_rearrange([qw(TYPE)], @args);
+
+    $self->{'_loaded_types'} = {};
+    $self->type($type) if $type;
+
+    return $self;
+}
+
+
+=head2 create_object
+
+ Title   : create_object
+ Usage   : my $seq = $factory->create_object(<named parameters>);
+ Function: Instantiates new Bio::AnnotationI (or one of its child classes)
+
+           This object allows us to genericize the instantiation of
+           cluster objects.
+
+ Returns : L<Bio::AnnotationI> compliant object
+           The return type is configurable using new(-type =>"...").
+ Args    : initialization parameters specific to the type of annotation
+           object we want.
+
+=cut
+
+sub create_object {
+   my ($self, at args) = @_;
+
+   my $type = $self->type; 
+   if(! $type) {
+       # we need to guess this
+       $type = $self->_guess_type(@args);
+       if(! $type) {
+	   $self->throw("No annotation type set and unable to guess.");
+       }
+       # load dynamically if it hasn't been loaded yet
+       if(! $self->{'_loaded_types'}->{$type}) {
+	   eval {
+	       $self->_load_module($type);
+	       $self->{'_loaded_types'}->{$type} = 1;
+	   };
+	   if($@) {
+	       $self->throw("Bio::AnnotationI implementation $type ".
+			    "failed to load: ".$@);
+	   }
+       }
+   }
+   return $type->new(-verbose => $self->verbose, @args);
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $obj->type($newval)
+ Function: Get/set the type of L<Bio::AnnotationI> object to be created.
+
+           This may be changed at any time during the lifetime of this
+           factory.
+
+ Returns : value of type
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub type{
+    my $self = shift;
+
+    if(@_) {
+	my $type = shift;
+	if($type && (! $self->{'_loaded_types'}->{$type})) {
+	    eval {
+		$self->_load_module($type);
+	    };
+	    if( $@ ) {
+		$self->throw("Annotation class '$type' failed to load: ".
+			     $@);
+	    }
+	    my $a = bless {},$type;
+	    if( ! $a->isa('Bio::AnnotationI') ) {
+		$self->throw("'$type' does not implement Bio::AnnotationI. ".
+			     "Too bad.");
+	    }
+	    $self->{'_loaded_types'}->{$type} = 1;
+	}
+	return $self->{'type'} = $type;
+    }
+    return $self->{'type'};
+}
+
+=head2 _guess_type
+
+ Title   : _guess_type
+ Usage   :
+ Function: Guesses the right type of L<Bio::AnnotationI> implementation
+           based on initialization parameters for the prospective
+           object.
+ Example :
+ Returns : the type (a string, the module name)
+ Args    : initialization parameters to be passed to the prospective
+           cluster object
+
+
+=cut
+
+sub _guess_type{
+    my ($self, at args) = @_;
+    my $type;
+
+    # we can only guess from a certain number of arguments
+    my ($val,$db,$text,$name,$authors, $start) =
+	$self->_rearrange([qw(VALUE
+			      DATABASE
+			      TEXT
+			      NAME
+			      AUTHORS
+                              START
+			      )], @args);
+  SWITCH: {
+      $val        && do { $type = "SimpleValue"; last SWITCH; };
+      $authors    && do { $type = "Reference"; last SWITCH; };
+      $db         && do { $type = "DBLink"; last SWITCH; };
+      $text       && do { $type = "Comment"; last SWITCH; };
+      $name       && do { $type = "OntologyTerm"; last SWITCH; };
+      $start      && do { $type = "Target"; last SWITCH; };
+      # what else could we look for?
+  }
+    $type = "Bio::Annotation::".$type;
+
+    return $type;
+}
+
+#####################################################################
+# aliases for naming consistency or other reasons                   #
+#####################################################################
+
+*create = \&create_object;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Collection.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Collection.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Collection.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,683 @@
+# $Id: Collection.pm,v 1.23.4.1 2006/10/02 23:10:12 sendu Exp $
+
+#
+# BioPerl module for Bio::Annotation::Collection.pm
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::Collection - Default Perl implementation of 
+AnnotationCollectionI
+
+=head1 SYNOPSIS
+
+   # get an AnnotationCollectionI somehow, eg
+
+   $ac = $seq->annotation();
+
+   foreach $key ( $ac->get_all_annotation_keys() ) {
+       @values = $ac->get_Annotations($key);
+       foreach $value ( @values ) {
+          # value is an Bio::AnnotationI, and defines a "as_text" method
+          print "Annotation ",$key," stringified value ",$value->as_text,"\n";
+
+          # also defined hash_tree method, which allows data orientated
+          # access into this object
+          $hash = $value->hash_tree();
+       }
+   }
+
+=head1 DESCRIPTION
+
+Bioperl implementation for Bio::AnnotationCollectionI 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::Collection;
+
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Annotation::TypeManager;
+use Bio::Annotation::SimpleValue;
+
+
+use base qw(Bio::Root::Root Bio::AnnotationCollectionI Bio::AnnotationI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $coll = Bio::Annotation::Collection->new()
+ Function: Makes a new Annotation::Collection object. 
+ Returns : Bio::Annotation::Collection
+ Args    : none
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   $self->{'_annotation'} = {};
+   $self->_typemap(Bio::Annotation::TypeManager->new());
+
+   return $self;
+}
+
+
+=head1 L<Bio::AnnotationCollectionI> implementing methods
+
+=cut
+
+=head2 get_all_annotation_keys
+
+ Title   : get_all_annotation_keys
+ Usage   : $ac->get_all_annotation_keys()
+ Function: gives back a list of annotation keys, which are simple text strings
+ Returns : list of strings
+ Args    : none
+
+=cut
+
+sub get_all_annotation_keys{
+   my ($self) = @_;
+   return keys %{$self->{'_annotation'}};
+}
+
+=head2 get_Annotations
+
+ Title   : get_Annotations
+ Usage   : my @annotations = $collection->get_Annotations('key')
+ Function: Retrieves all the Bio::AnnotationI objects for one or more
+           specific key(s).
+
+           If no key is given, returns all annotation objects.
+
+           The returned objects will have their tagname() attribute set to
+           the key under which they were attached, unless the tagname was
+           already set.
+
+ Returns : list of Bio::AnnotationI - empty if no objects stored for a key
+ Args    : keys (list of strings) for annotations (optional)
+
+=cut
+
+sub get_Annotations{
+    my ($self, at keys) = @_;
+
+    my @anns = ();
+    @keys = $self->get_all_annotation_keys() unless @keys;
+    foreach my $key (@keys) {
+	if(exists($self->{'_annotation'}->{$key})) {
+	    push(@anns,
+		 map {
+		     $_->tagname($key) if ! $_->tagname(); $_;
+		 } @{$self->{'_annotation'}->{$key}});
+	}
+    }
+    return @anns;
+}
+
+=head2 get_all_Annotations
+
+ Title   : get_all_Annotations
+ Usage   :
+ Function: Similar to get_Annotations, but traverses and flattens nested
+           annotation collections. This means that collections in the
+           tree will be replaced by their components.
+
+           Keys will not be passed on to nested collections. I.e., if the
+           tag name of a nested collection matches the key, it will be
+           flattened in its entirety.
+
+           Hence, for un-nested annotation collections this will be identical
+           to get_Annotations.
+ Example :
+ Returns : an array of L<Bio::AnnotationI> compliant objects
+ Args    : keys (list of strings) for annotations (optional)
+
+
+=cut
+
+sub get_all_Annotations{
+    my ($self, at keys) = @_;
+
+    return map {
+	$_->isa("Bio::AnnotationCollectionI") ?
+	    $_->get_all_Annotations() : $_;
+    } $self->get_Annotations(@keys);
+}
+
+=head2 get_num_of_annotations
+
+ Title   : get_num_of_annotations
+ Usage   : my $count = $collection->get_num_of_annotations()
+ Function: Returns the count of all annotations stored in this collection 
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub get_num_of_annotations{
+   my ($self) = @_;
+   my $count = 0;
+   map { $count += scalar @$_ } values %{$self->{'_annotation'}};
+   return $count;
+}
+
+=head1 Implementation specific functions - mainly for adding
+
+=cut
+
+=head2 add_Annotation
+
+ Title   : add_Annotation
+ Usage   : $self->add_Annotation('reference',$object);
+           $self->add_Annotation($object,'Bio::MyInterface::DiseaseI');
+           $self->add_Annotation($object);
+           $self->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI');
+ Function: Adds an annotation for a specific key.
+
+           If the key is omitted, the object to be added must provide a value
+           via its tagname().
+
+           If the archetype is provided, this and future objects added under
+           that tag have to comply with the archetype and will be rejected
+           otherwise.
+
+ Returns : none
+ Args    : annotation key ('disease', 'dblink', ...)
+           object to store (must be Bio::AnnotationI compliant)
+           [optional] object archetype to map future storage of object 
+                      of these types to
+
+=cut
+
+sub add_Annotation{
+   my ($self,$key,$object,$archetype) = @_;
+   
+   # if there's no key we use the tagname() as key
+   if(ref($key) && $key->isa("Bio::AnnotationI") && (!ref($object))) {
+       $archetype = $object if defined($object);
+       $object = $key;
+       $key = $object->tagname();
+       $key = $key->name() if ref($key); # OntologyTermI
+       $self->throw("Annotation object must have a tagname if key omitted")
+	   unless $key;
+   }
+
+   if( !defined $object ) {
+       $self->throw("Must have at least key and object in add_Annotation");
+   }
+
+   if( !ref $object ) {
+       $self->throw("Must add an object. Use Bio::Annotation::{Comment,SimpleValue,OntologyTerm} for simple text additions");
+   }
+
+   if( !$object->isa("Bio::AnnotationI") ) {
+       $self->throw("object must be AnnotationI compliant, otherwise we wont add it!");
+   }
+
+   # ok, now we are ready! If we don't have an archetype, set it
+   # from the type of the object
+
+   if( !defined $archetype ) {
+       $archetype = ref $object;
+   }
+
+   # check typemap, storing if needed.
+   my $stored_map = $self->_typemap->type_for_key($key);
+
+   if( defined $stored_map ) {
+       # check validity, irregardless of archetype. A little cheeky
+       # this means isa stuff is executed correctly
+
+       if( !$self->_typemap()->is_valid($key,$object) ) {
+	   $self->throw("Object $object was not valid with key $key. If you were adding new keys in, perhaps you want to make use of the archetype method to allow registration to a more basic type");
+       }
+   } else {
+       $self->_typemap->_add_type_map($key,$archetype);
+   }
+
+   # we are ok to store
+
+   if( !defined $self->{'_annotation'}->{$key} ) {
+       $self->{'_annotation'}->{$key} = [];
+   }
+
+   push(@{$self->{'_annotation'}->{$key}},$object);
+
+   return 1;
+}
+
+=head2 remove_Annotations
+
+ Title   : remove_Annotations
+ Usage   :
+ Function: Remove the annotations for the specified key from this collection.
+ Example :
+ Returns : an array Bio::AnnotationI compliant objects which were stored
+           under the given key(s)
+ Args    : the key(s) (tag name(s), one or more strings) for which to
+           remove annotations (optional; if none given, flushes all
+           annotations)
+
+
+=cut
+
+sub remove_Annotations{
+    my ($self, @keys) = @_;
+
+    @keys = $self->get_all_annotation_keys() unless @keys;
+    my @anns = $self->get_Annotations(@keys);
+    # flush
+    foreach my $key (@keys) {
+      delete $self->{'_annotation'}->{$key};
+      delete $self->{'_typemap'}->{'_type'}->{$key};
+    }
+    return @anns;
+}
+
+=head2 flatten_Annotations
+
+ Title   : flatten_Annotations
+ Usage   :
+ Function: Flattens part or all of the annotations in this collection.
+
+           This is a convenience method for getting the flattened
+           annotation for the given keys, removing the annotation for
+           those keys, and adding back the flattened array.
+
+           This should not change anything for un-nested collections.
+ Example :
+ Returns : an array Bio::AnnotationI compliant objects which were stored
+           under the given key(s)
+ Args    : list of keys (strings) the annotation for which to flatten,
+           defaults to all keys if not given
+
+
+=cut
+
+sub flatten_Annotations{
+    my ($self, at keys) = @_;
+
+    my @anns = $self->get_all_Annotations(@keys);
+    my @origanns = $self->remove_Annotations(@keys);
+    foreach (@anns) {
+	$self->add_Annotation($_);
+    }
+    return @origanns;
+}
+
+=head1 Bio::AnnotationI methods implementations
+
+   This is to allow nested annotation: you can a collection as an
+   annotation object to an annotation collection.
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function: See L<Bio::AnnotationI>
+ Example :
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub as_text{
+    my $self = shift;
+
+    my $txt = "Collection consisting of ";
+    my @texts = ();
+    foreach my $ann ($self->get_Annotations()) {
+	push(@texts, $ann->as_text());
+    }
+    if(@texts) {
+	$txt .= join(", ", map { '['.$_.']'; } @texts);
+    } else {
+	$txt .= "no elements";
+    }
+    return $txt;
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   :
+ Function: See L<Bio::AnnotationI>
+ Example :
+ Returns : a hash reference
+ Args    : none
+
+
+=cut
+
+sub hash_tree{
+    my $self = shift;
+    my $tree = {};
+
+    foreach my $key ($self->get_all_annotation_keys()) {
+	# all contained objects will support hash_tree() 
+	# (they are AnnotationIs)
+	$tree->{$key} = [$self->get_Annotations($key)];
+    }
+    return $tree;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to Bio::AnnotationCollectionI when adding
+           this object. When obtaining an AnnotationI object from the
+           collection, the collection will set the value to the tag
+           under which it was stored unless the object has a tag
+           stored already.
+
+ Example : 
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my $self = shift;
+
+    return $self->{'tagname'} = shift if @_;
+    return $self->{'tagname'};
+}
+
+
+=head1 Backward compatible functions
+
+Functions put in for backward compatibility with old
+Bio::Annotation.pm stuff
+
+=cut
+
+=head2 description
+
+ Title   : description
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub description{
+   my ($self,$value) = @_;
+
+   $self->deprecated("Using old style annotation call on new Annotation::Collection object");
+
+   if( defined $value ) {
+       my $val = Bio::Annotation::SimpleValue->new();
+       $val->value($value);
+       $self->add_Annotation('description',$val);
+   }
+
+   my ($desc) = $self->get_Annotations('description');
+   
+   # If no description tag exists, do not attempt to call value on undef:
+   return $desc ? $desc->value : undef;
+}
+
+
+=head2 add_gene_name
+
+ Title   : add_gene_name
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub add_gene_name{
+   my ($self,$value) = @_;
+
+   $self->deprecated("Old style add_gene_name called on new style Annotation::Collection");
+
+   my $val = Bio::Annotation::SimpleValue->new();
+   $val->value($value);
+   $self->add_Annotation('gene_name',$val);
+}
+
+=head2 each_gene_name
+
+ Title   : each_gene_name
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_gene_name{
+   my ($self) = @_;
+
+   $self->deprecated("Old style each_gene_name called on new style Annotation::Collection");
+
+   my @out;
+   my @gene = $self->get_Annotations('gene_name');
+
+   foreach my $g ( @gene ) {
+       push(@out,$g->value);
+   }
+
+   return @out;
+}
+
+=head2 add_Reference
+
+ Title   : add_Reference
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub add_Reference{
+   my ($self, @values) = @_;
+
+   $self->deprecated("add_Reference (old style Annotation) on new style Annotation::Collection");
+   
+   # Allow multiple (or no) references to be passed, as per old method
+   foreach my $value (@values) {
+       $self->add_Annotation('reference',$value);
+   }
+}
+
+=head2 each_Reference
+
+ Title   : each_Reference
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_Reference{
+   my ($self) = @_;
+
+   $self->deprecated("each_Reference (old style Annotation) on new style Annotation::Collection");
+   
+   return $self->get_Annotations('reference');
+}
+
+
+=head2 add_Comment
+
+ Title   : add_Comment
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub add_Comment{
+   my ($self,$value) = @_;
+
+   $self->deprecated("add_Comment (old style Annotation) on new style Annotation::Collection");
+
+   $self->add_Annotation('comment',$value);
+
+}
+
+=head2 each_Comment
+
+ Title   : each_Comment
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_Comment{
+   my ($self) = @_;
+
+   $self->deprecated("each_Comment (old style Annotation) on new style Annotation::Collection");
+   
+   return $self->get_Annotations('comment');
+}
+
+
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub add_DBLink{
+   my ($self,$value) = @_;
+
+   $self->deprecated("add_DBLink (old style Annotation) on new style Annotation::Collection");
+
+   $self->add_Annotation('dblink',$value);
+
+}
+
+=head2 each_DBLink
+
+ Title   : each_DBLink
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_DBLink{
+   my ($self) = @_;
+
+   $self->deprecated("each_DBLink (old style Annotation) on new style Annotation::Collection - use get_Annotations('dblink')");
+   
+   return $self->get_Annotations('dblink');
+}
+
+
+
+=head1 Implementation management functions
+
+=cut
+
+=head2 _typemap
+
+ Title   : _typemap
+ Usage   : $obj->_typemap($newval)
+ Function: 
+ Example : 
+ Returns : value of _typemap
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _typemap{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_typemap'} = $value;
+    }
+    return $self->{'_typemap'};
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Comment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Comment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Comment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,178 @@
+# $Id: Comment.pm,v 1.12.6.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::Comment
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::Comment - A comment object, holding text
+
+=head1 SYNOPSIS
+
+
+    $comment = Bio::Annotation::Comment->new();
+    $comment->text("This is the text of this comment");
+    $annotation->add_Annotation('comment', $comment);
+
+
+=head1 DESCRIPTION
+
+A holder for comments in annotations, just plain text. This is a very simple
+object, and justifiably so.
+
+=head1 AUTHOR - Ewan Birney 
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Annotation::Comment;
+use strict;
+use overload '""' => sub { $_[0]->text || ''};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+
+use base qw(Bio::Root::Root Bio::AnnotationI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $comment = Bio::Annotation::Comment->new( '-text' => 'some text for this comment');
+ Function: This returns a new comment object, optionally with
+           text filed
+ Example :
+ Returns : a Bio::Annotation::Comment object
+ Args    : a hash with -text optionally set
+
+
+=cut
+
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($text,$tag) = $self->_rearrange([qw(TEXT TAGNAME)], @args);
+
+  defined $text && $self->text($text);
+  defined $tag && $self->tagname($tag);
+
+  return $self;
+}
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   return "Comment: ".$self->text;
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub hash_tree{
+    my $self = shift;
+   
+    my $h = {};
+    $h->{'text'} = $self->text;
+    return $h;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to Bio::AnnotationCollectionI when adding
+           this object. When obtaining an AnnotationI object from the
+           collection, the collection will set the value to the tag
+           under which it was stored unless the object has a tag
+           stored already.
+
+ Example : 
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'tagname'} = $value;
+    }
+    return $self->{'tagname'};
+}
+
+=head1 Specific accessors for Comments
+
+=cut
+
+
+=head2 text
+
+ Title   : text
+ Usage   : $value = $self->text($newval)
+ Function: get/set for the text field. A comment object
+           just holds a single string which is accessible through
+           this method
+ Example : 
+ Returns : value of text
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub text{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'text'} = $value;
+    }
+    return $self->{'text'};
+
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/DBLink.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/DBLink.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/DBLink.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,389 @@
+# $Id: DBLink.pm,v 1.18.6.2 2006/10/19 17:52:51 jason Exp $
+#
+# BioPerl module for Bio::Annotation::DBLink
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::DBLink - untyped links between databases
+
+=head1 SYNOPSIS
+
+   $link1 = new Bio::Annotation::DBLink(-database => 'TSC',
+                                        -primary_id => 'TSC0000030'
+					);
+
+   #or 
+
+   $link2 = new Bio::Annotation::DBLink();
+   $link2->database('dbSNP');
+   $link2->primary_id('2367');
+
+   # DBLink is-a Bio::AnnotationI object, can be added to annotation
+   # collections, e.g. the one on features or seqs
+   $feat->annotation->add_Annotation('dblink', $link2);
+
+
+=head1 DESCRIPTION
+
+Provides an object which represents a link from one object to something
+in another database without prescribing what is in the other database.
+
+Aside from L<Bio::AnnotationI>, this class also implements
+L<Bio::IdentifiableI>.
+
+=head1 AUTHOR - Ewan Birney
+
+Ewan Birney - birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Annotation::DBLink;
+use strict;
+use overload '""' => sub { (($_[0]->database ? $_[0]->database . ':' : '' ) . ($_[0]->primary_id ? $_[0]->primary_id : '') . ($_[0]->version ? '.' . $_[0]->version : '')) || '' };
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+
+use base qw(Bio::Root::Root Bio::AnnotationI Bio::IdentifiableI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $dblink = Bio::Annotation::DBLink->new(-database =>"GenBank",
+                                                  -primary_id => "M123456");
+ Function: Creates a new instance of this class.
+ Example :
+ Returns : A new instance of Bio::Annotation::DBLink.
+ Args    : Named parameters. At present, the following parameters are
+           recognized.
+
+             -database    the name of the database referenced by the xref
+             -primary_id  the primary (main) id of the referenced entry
+                          (usually this will be an accession number)
+             -optional_id a secondary ID under which the referenced entry
+                          is known in the same database
+             -comment     comment text for the dbxref
+             -tagname     the name of the tag under which to add this
+                          instance to an annotation bundle (usually 'dblink')
+             -namespace   synonymous with -database (also overrides)
+             -version     version of the referenced entry
+             -authority   attribute of the Bio::IdentifiableI interface
+             -url         attribute of the Bio::IdentifiableI interface
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  my ($database,$primary_id,$optional_id,$comment,$tag,$ns,$auth,$v,$url) =
+      $self->_rearrange([qw(DATABASE
+			    PRIMARY_ID
+			    OPTIONAL_ID
+			    COMMENT
+			    TAGNAME
+			    NAMESPACE
+			    AUTHORITY
+			    VERSION
+			    URL
+			    )], @args);
+  
+  $database    && $self->database($database);
+  $primary_id  && $self->primary_id($primary_id);
+  $optional_id && $self->optional_id($optional_id);
+  $comment     && $self->comment($comment);
+  $tag         && $self->tagname($tag);
+  # Bio::IdentifiableI parameters:
+  $ns          && $self->namespace($ns); # this will override $database
+  $auth        && $self->authority($auth);
+  defined($v)  && $self->version($v);
+  defined($url)  && $self->url($url);
+
+  return $self;
+}
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   return "Direct database link to ".$self->primary_id
+       .($self->version ? ".".$self->version : "")
+       .($self->optional_id ? " (".$self->optional_id.")" : "")
+       ." in database ".$self->database;
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub hash_tree{
+   my ($self) = @_;
+   
+   my $h = {};
+   $h->{'database'}   = $self->database;
+   $h->{'primary_id'} = $self->primary_id;
+   if( defined $self->optional_id ) {
+       $h->{'optional_id'} = $self->optional_id;
+   }
+   if( defined $self->comment ) {
+       # we know that comments have hash_tree methods
+       $h->{'comment'} = $self->comment;
+   }
+
+   return $h;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to Bio::AnnotationCollectionI when adding
+           this object. When obtaining an AnnotationI object from the
+           collection, the collection will set the value to the tag
+           under which it was stored unless the object has a tag
+           stored already.
+
+ Example : 
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my $self = shift;
+
+    return $self->{'tagname'} = shift if @_;
+    return $self->{'tagname'};
+}
+
+=head1 Specific accessors for DBLinks
+
+=cut
+
+=head2 database
+
+ Title   : database
+ Usage   : $self->database($newval)
+ Function: set/get on the database string. Databases are just
+           a string here which can then be interpreted elsewhere
+ Example : 
+ Returns : value of database
+ Args    : newvalue (optional)
+
+=cut
+
+sub database{
+    my $self = shift;
+
+    return $self->{'database'} = shift if @_;
+    return $self->{'database'};
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $self->primary_id($newval)
+ Function: set/get on the primary id (a string)
+           The primary id is the main identifier used for this object in 
+           the database. Good examples would be accession numbers. The id
+           is meant to be the main, stable identifier for this object
+ Example : 
+ Returns : value of primary_id
+ Args    : newvalue (optional)
+
+=cut
+
+sub primary_id{
+    my $self = shift;
+
+    return $self->{'primary_id'} = shift if @_;
+    return $self->{'primary_id'};
+}
+
+=head2 optional_id
+
+ Title   : optional_id
+ Usage   : $self->optional_id($newval)
+ Function: get/set for the optional_id (a string)
+
+           optional id is a slot for people to use as they wish. The
+           main issue is that some databases do not have a clean
+           single string identifier scheme. It is hoped that the
+           primary_id can behave like a reasonably sane "single string
+           identifier" of objects, and people can use/abuse optional
+           ids to their heart's content to provide precise mappings.
+
+ Example : 
+ Returns : value of optional_id
+ Args    : newvalue (optional)
+
+=cut
+
+#'
+
+sub optional_id{
+    my $self = shift;
+
+    return $self->{'optional_id'} = shift if @_;
+    return $self->{'optional_id'};
+}
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $self->comment($newval)
+ Function: get/set of comments (comment object)
+           Sets or gets comments of this dblink, which is sometimes relevant
+ Example : 
+ Returns : value of comment (Bio::Annotation::Comment)
+ Args    : newvalue (optional)
+
+=cut
+
+sub comment{
+    my $self = shift;
+
+    return $self->{'comment'} = shift if @_;
+    return $self->{'comment'};
+}
+
+=head1 Methods for Bio::IdentifiableI compliance
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to primary_id().
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->primary_id(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+ Returns : A number
+
+=cut
+
+sub version{
+    my $self = shift;
+
+    return $self->{'version'} = shift if @_;
+    return $self->{'version'};
+}
+
+
+=head2 url
+
+ Title   : url
+ Usage   : $url    = $obj->url()
+ Function: URL which is associated with this DB link
+ Returns : string, full URL descriptor
+
+=cut
+
+sub url {
+    my $self = shift;
+    return $self->{'url'} = shift if @_;
+    return $self->{'url'};
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for  
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+
+=cut
+
+sub authority{
+    my $self = shift;
+
+    return $self->{'authority'} = shift if @_;
+    return $self->{'authority'};
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection 
+
+           For DBLink this is the same as database().
+ Returns : A scalar
+
+
+=cut
+
+sub namespace{
+    return shift->database(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/OntologyTerm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/OntologyTerm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/OntologyTerm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,490 @@
+# $Id: OntologyTerm.pm,v 1.15.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::OntologyTerm
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::OntologyTerm - An ontology term adapted to AnnotationI
+
+=head1 SYNOPSIS
+
+   use Bio::Annotation::OntologyTerm;
+   use Bio::Annotation::Collection;
+   use Bio::Ontology::Term;
+
+   my $coll = new Bio::Annotation::Collection;
+
+   # this also implements a tag/value pair, where tag _and_ value are treated
+   # as ontology terms
+   my $annterm = new Bio::Annotation::OntologyTerm(-label => 'ABC1',
+                                                   -tagname => 'Gene Name');
+   # ontology terms can be added directly - they implicitly have a tag
+   $coll->add_Annotation($annterm);
+
+   # implementation is by composition - you can get/set the term object
+   # e.g.
+   my $term = $annterm->term(); # term is-a Bio::Ontology::TermI
+   print "ontology term ",$term->name()," (ID ",$term->identifier(),
+         "), ontology ",$term->ontology()->name(),"\n";
+   $term = Bio::Ontology::Term->new(-name => 'ABC2',
+                                    -ontology => 'Gene Name');
+   $annterm->term($term);
+
+=head1 DESCRIPTION
+
+Ontology term annotation object
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::OntologyTerm;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Ontology::Term;
+use overload '""' => sub { $_[0]->identifier || ''};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+use base qw(Bio::Root::Root Bio::AnnotationI Bio::Ontology::TermI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $sv = new Bio::Annotation::OntologyTerm;
+ Function: Instantiate a new OntologyTerm object
+ Returns : Bio::Annotation::OntologyTerm object
+ Args    : -term => $term to initialize the term data field [optional]
+           Most named arguments that Bio::Ontology::Term accepts will work
+           here too. -label is a synonym for -name, -tagname is a synonym for
+           -ontology.
+
+=cut
+
+sub new{
+    my ($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    my ($term,$name,$label,$identifier,$definition,$ont,$tag) =
+	$self->_rearrange([qw(TERM
+                          NAME
+                          LABEL
+                          IDENTIFIER
+                          DEFINITION
+                          ONTOLOGY
+                          TAGNAME)],
+                      @args);
+    if($term) {
+        $self->term($term);
+    } else {
+        $self->name($name || $label) if $name || $label;
+        $self->identifier($identifier) if $identifier;
+        $self->definition($definition) if $definition;
+    }
+    $self->ontology($ont || $tag) if $ont || $tag;
+    return $self;
+}
+
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   : my $text = $obj->as_text
+ Function: Returns a textual representation of the annotation that
+           this object holds. Presently, it is tag name, name of the
+           term, and the is_obsolete attribute concatenated togather
+           with a delimiter (|).
+
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   return $self->tagname()."|".$self->name()."|".($self->is_obsolete()||'');
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   : my $hashtree = $value->hash_tree
+ Function: For supporting the AnnotationI interface just returns the value
+           as a hashref with the key 'value' pointing to the value
+ Returns : hashrf
+ Args    : none
+
+
+=cut
+
+sub hash_tree{
+   my ($self) = @_;
+
+   my $h = {};
+   $h->{'name'} = $self->name();
+   $h->{'identifier'} = $self->identifier();
+   $h->{'definition'} = $self->definition();
+   $h->{'synonyms'} = [$self->get_synonyms()];
+}
+
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to provide
+           a tag to AnnotationCollection when adding this object.
+
+           This is aliased to ontology() here.
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my $self = shift;
+
+    return $self->ontology(@_) if @_;
+    # if in get mode we need to get the name from the ontology
+    my $ont = $self->ontology();
+    return ref($ont) ? $ont->name() : $ont;
+}
+
+=head1 Methods for Bio::Ontology::TermI compliance
+
+=cut
+
+=head2 term
+
+ Title   : term
+ Usage   : $obj->term($newval)
+ Function: Get/set the Bio::Ontology::TermI implementing object.
+
+           We implement TermI by composition, and this method sets/gets the
+           object we delegate to.
+ Example :
+ Returns : value of term (a Bio::Ontology::TermI compliant object)
+ Args    : new value (a Bio::Ontology::TermI compliant object, optional)
+
+
+=cut
+
+sub term{
+    my ($self,$value) = @_;
+    if( defined $value) {
+        $self->{'term'} = $value;
+    }
+    if(! exists($self->{'term'})) {
+        $self->{'term'} = Bio::Ontology::Term->new();
+    }
+    return $self->{'term'};
+}
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $term->identifier( "0003947" );
+           or
+           print $term->identifier();
+ Function: Set/get for the identifier of this Term.
+ Returns : The identifier [scalar].
+ Args    : The identifier [scalar] (optional).
+
+=cut
+
+sub identifier {
+    return shift->term()->identifier(@_);
+} # identifier
+
+=head2 name
+
+ Title   : name
+ Usage   : $term->name( "N-acetylgalactosaminyltransferase" );
+           or
+           print $term->name();
+ Function: Set/get for the name of this Term.
+ Returns : The name [scalar].
+ Args    : The name [scalar] (optional).
+
+=cut
+
+sub name {
+    return shift->term()->name(@_);
+} # name
+
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $term->definition( "Catalysis of ..." );
+           or
+           print $term->definition();
+ Function: Set/get for the definition of this Term.
+ Returns : The definition [scalar].
+ Args    : The definition [scalar] (optional).
+
+=cut
+
+sub definition {
+    return shift->term()->definition(@_);
+} # definition
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $term->ontology( $top );
+           or
+           $top = $term->ontology();
+ Function: Set/get for a relationship between this Term and
+           another Term (e.g. the top level of the ontology).
+ Returns : The ontology of this Term [TermI].
+ Args    : The ontology of this Term [TermI or scalar -- which
+           becomes the name of the catagory term] (optional).
+
+=cut
+
+sub ontology {
+    return shift->term()->ontology(@_);
+}
+
+=head2 is_obsolete
+
+ Title   : is_obsolete
+ Usage   : $term->is_obsolete( 1 );
+           or
+           if ( $term->is_obsolete() )
+ Function: Set/get for the obsoleteness of this Term.
+ Returns : the obsoleteness [0 or 1].
+ Args    : the obsoleteness [0 or 1] (optional).
+
+=cut
+
+sub is_obsolete {
+    return shift->term()->is_obsolete(@_);
+} # is_obsolete
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $term->comment( "Consider the term ..." );
+           or
+           print $term->comment();
+ Function: Set/get for an arbitrary comment about this Term.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment {
+    return shift->term()->comment(@_);
+} # comment
+
+=head2 get_synonyms
+
+ Title   : get_synonyms()
+ Usage   : @aliases = $term->get_synonyms();
+ Function: Returns a list of aliases of this Term.
+ Returns : A list of aliases [array of [scalar]].
+ Args    :
+
+=cut
+
+sub get_synonyms {
+    return shift->term()->get_synonyms(@_);
+} # get_synonyms
+
+=head2 add_synonym
+
+ Title   : add_synonym
+ Usage   : $term->add_synonym( @asynonyms );
+           or
+           $term->add_synonym( $synonym );
+ Function: Pushes one or more synonyms into the list of synonyms.
+ Returns :
+ Args    : One synonym [scalar] or a list of synonyms [array of [scalar]].
+
+=cut
+
+sub add_synonym {
+    return shift->term()->add_synonym(@_);
+} # add_synonym
+
+
+=head2 remove_synonyms
+
+ Title   : remove_synonyms()
+ Usage   : $term->remove_synonyms();
+ Function: Deletes (and returns) the synonyms of this Term.
+ Returns : A list of synonyms [array of [scalar]].
+ Args    :
+
+=cut
+
+sub remove_synonyms {
+    return shift->term()->remove_synonyms(@_);
+} # remove_synonyms
+
+=head2 get_dblinks
+
+ Title   : get_dblinks()
+ Usage   : @ds = $term->get_dblinks();
+ Function: Returns a list of each dblinks of this GO term.
+ Returns : A list of dblinks [array of [scalars]].
+ Args    :
+
+=cut
+
+sub get_dblinks {
+    return shift->term->get_dblinks(@_);
+} # get_dblinks
+
+
+=head2 add_dblink
+
+ Title   : add_dblink
+ Usage   : $term->add_dblink( @dbls );
+           or
+           $term->add_dblink( $dbl );
+ Function: Pushes one or more dblinks
+           into the list of dblinks.
+ Returns :
+ Args    : One  dblink [scalar] or a list of
+            dblinks [array of [scalars]].
+
+=cut
+
+sub add_dblink {
+    return shift->term->add_dblink(@_);
+} # add_dblink
+
+
+=head2 remove_dblinks
+
+ Title   : remove_dblinks()
+ Usage   : $term->remove_dblinks();
+ Function: Deletes (and returns) the definition references of this GO term.
+ Returns : A list of definition references [array of [scalars]].
+ Args    :
+
+=cut
+
+sub remove_dblinks {
+    return shift->term->remove_dblinks(@_);
+} # remove_dblinks
+
+=head2 get_secondary_ids
+
+ Title   : get_secondary_ids
+ Usage   : @ids = $term->get_secondary_ids();
+ Function: Returns a list of secondary identifiers of this Term.
+
+           Secondary identifiers mostly originate from merging terms,
+           or possibly also from splitting terms.
+
+ Returns : A list of secondary identifiers [array of [scalar]]
+ Args    :
+
+=cut
+
+sub get_secondary_ids {
+    return shift->term->get_secondary_ids(@_);
+} # get_secondary_ids
+
+
+=head2 add_secondary_id
+
+ Title   : add_secondary_id
+ Usage   : $term->add_secondary_id( @ids );
+           or
+           $term->add_secondary_id( $id );
+ Function: Adds one or more secondary identifiers to this term.
+ Returns :
+ Args    : One or more secondary identifiers [scalars]
+
+=cut
+
+sub add_secondary_id {
+    return shift->term->add_secondary_id(@_);
+} # add_secondary_id
+
+
+=head2 remove_secondary_ids
+
+ Title   : remove_secondary_ids
+ Usage   : $term->remove_secondary_ids();
+ Function: Deletes (and returns) the secondary identifiers of this Term.
+ Returns : The previous list of secondary identifiers [array of [scalars]]
+ Args    :
+
+=cut
+
+sub remove_secondary_ids {
+    return shift->term->remove_secondary_ids(@_);
+} # remove_secondary_ids
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Reference.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Reference.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Reference.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,556 @@
+# $Id: Reference.pm,v 1.24.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::Reference
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::Reference - Specialised DBLink object for Literature References
+
+=head1 SYNOPSIS
+
+    $reg = Bio::Annotation::Reference->new( -title    => 'title line',
+                                            -location => 'location line',
+                                            -authors  => 'author line',
+                                            -medline  => 998122 );
+
+=head1 DESCRIPTION
+
+Object which presents a literature reference. This is considered to be
+a specialised form of database link. The additional methods provided
+are all set/get methods to store strings commonly associated with
+references, in particular title, location (ie, journal page) and
+authors line.
+
+There is no attempt to do anything more than store these things as
+strings for processing elsewhere. This is mainly because parsing these
+things suck and generally are specific to the specific format one is
+using. To provide an easy route to go format --E<gt> object --E<gt> format
+without losing data, we keep them as strings. Feel free to post the
+list for a better solution, but in general this gets very messy very
+fast...
+
+=head1 AUTHOR - Ewan Birney 
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Annotation::Reference;
+use strict;
+use overload '""' => sub { $_[0]->title || ''};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+use Bio::AnnotationI;
+
+use base qw(Bio::Annotation::DBLink);
+
+=head2 new
+
+ Title   : new
+ Usage   : $ref = Bio::Annotation::Reference->new( -title => 'title line',
+						   -authors => 'author line',
+						   -location => 'location line',
+						   -medline => 9988812);
+ Function:
+ Example :
+ Returns : a new Bio::Annotation::Reference object
+ Args    : a hash with optional title, authors, location, medline, pubmed,
+           start, end, consortium, rp and rg attributes
+
+
+=cut
+
+sub new{
+    my ($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my ($start,$end,$authors,$consortium,$location,$title,$medline,
+	$pubmed,$rp,$rg) =
+	$self->_rearrange([qw(START
+			      END
+			      AUTHORS
+				  CONSORTIUM
+			      LOCATION
+			      TITLE
+			      MEDLINE
+				  PUBMED
+				  RP
+				  RG
+			      )], at args);
+
+    defined $start    && $self->start($start);
+    defined $end      && $self->end($end);
+    defined $authors  && $self->authors($authors);
+	defined $consortium  && $self->consortium($consortium);
+    defined $location && $self->location($location);
+    defined $title    && $self->title($title);
+    defined $medline  && $self->medline($medline);
+    defined $pubmed   && $self->pubmed($pubmed);
+    defined $rp       && $self->rp($rp);
+    defined $rg       && $self->rg($rg);
+    return $self;
+}
+
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   # this could get out of hand!
+   return "Reference: ".$self->title;
+}
+
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub hash_tree{
+	my ($self) = @_;
+
+	my $h = {};
+	$h->{'title'}   = $self->title;
+	$h->{'authors'} = $self->authors;
+	$h->{'location'} = $self->location;
+	if (defined $self->start) {
+		$h->{'start'}   = $self->start;
+	}
+	if (defined $self->end) {
+		$h->{'end'} = $self->end;
+	}
+	$h->{'medline'} = $self->medline;
+	if (defined $self->pubmed) {
+		$h->{'pubmed'} = $self->pubmed;
+	}
+
+	return $h;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to provide
+           a tag to Bio::AnnotationCollectionI when adding this object. When
+           obtaining an AnnotationI object from the collection, the collection
+           will set the value to the tag under which it was stored unless the
+           object has a tag stored already.
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+
+=head1 Specific accessors for References
+
+=cut
+
+
+=head2 start
+
+ Title   : start
+ Usage   : $self->start($newval)
+ Function: Gives the reference start base
+ Example :
+ Returns : value of start
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub start {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'start'} = $value;
+    }
+    return $self->{'start'};
+
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $self->end($newval)
+ Function: Gives the reference end base
+ Example :
+ Returns : value of end
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub end {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'end'} = $value;
+    }
+    return $self->{'end'};
+}
+
+=head2 rp
+
+ Title   : rp
+ Usage   : $self->rp($newval)
+ Function: Gives the RP line. No attempt is made to parse this line.
+ Example :
+ Returns : value of rp
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub rp{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'rp'} = $value;
+    }
+    return $self->{'rp'};
+}
+
+=head2 rg
+
+ Title   : rg
+ Usage   : $obj->rg($newval)
+ Function: Gives the RG line. This is Swissprot/Uniprot specific, and
+           if set will usually be identical to the authors attribute,
+           but the swissprot manual does allow both RG and RA (author)
+           to be present for the same reference.
+
+ Example :
+ Returns : value of rg (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub rg{
+    my $self = shift;
+
+    return $self->{'rg'} = shift if @_;
+    return $self->{'rg'};
+}
+
+=head2 authors
+
+ Title   : authors
+ Usage   : $self->authors($newval)
+ Function: Gives the author line. No attempt is made to parse the author line
+ Example :
+ Returns : value of authors
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub authors{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'authors'} = $value;
+    }
+    return $self->{'authors'};
+
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : $self->location($newval)
+ Function: Gives the location line. No attempt is made to parse the location line
+ Example :
+ Returns : value of location
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub location{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'location'} = $value;
+    }
+    return $self->{'location'};
+
+}
+
+=head2 title
+
+ Title   : title
+ Usage   : $self->title($newval)
+ Function: Gives the title line (if exists)
+ Example :
+ Returns : value of title
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub title{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'title'} = $value;
+    }
+    return $self->{'title'};
+
+}
+
+=head2 medline
+
+ Title   : medline
+ Usage   : $self->medline($newval)
+ Function: Gives the medline number
+ Example :
+ Returns : value of medline
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub medline{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'medline'} = $value;
+    }
+    return $self->{'medline'};
+}
+
+=head2 pubmed
+
+ Title   : pubmed
+ Usage   : $refobj->pubmed($newval)
+ Function: Get/Set the PubMed number, if it is different from the MedLine
+           number.
+ Example :
+ Returns : value of medline
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub pubmed {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'pubmed'} = $value;
+    }
+    return $self->{'pubmed'};
+}
+
+=head2 database
+
+ Title   : database
+ Usage   :
+ Function: Overrides DBLink database to be hard coded to 'MEDLINE' (or 'PUBMED'
+		   if only pubmed id has been supplied), unless the database has been
+		   set explicitely before.
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub database{
+	my ($self, @args) = @_;
+	my $default = 'MEDLINE';
+	if (! defined $self->medline && defined $self->pubmed) {
+		$default = 'PUBMED';
+	}
+	return $self->SUPER::database(@args) || $default;
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   :
+ Function: Overrides DBLink primary_id to provide medline number, or pubmed
+           number if only that has been defined
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub primary_id{
+	my ($self, @args) = @_;
+	if (@args) {
+		$self->medline(@args);
+	}
+	if (! defined $self->medline && defined $self->pubmed) {
+		return $self->pubmed;
+	}
+	return $self->medline;
+}
+
+=head2 optional_id
+
+ Title   : optional_id
+ Usage   :
+ Function: Overrides DBLink optional_id to provide the PubMed number.
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub optional_id{
+   my ($self, @args) = @_;
+
+   return $self->pubmed(@args);
+}
+
+=head2 publisher
+
+ Title   : publisher
+ Usage   : $self->publisher($newval)
+ Function: Gives the publisher line. No attempt is made to parse the publisher line
+ Example :
+ Returns : value of publisher
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub publisher {
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'publisher'} = $value;
+   }
+   return $self->{'publisher'};
+}
+
+
+=head2 editors
+
+ Title   : editors
+ Usage   : $self->editors($newval)
+ Function: Gives the editors line. No attempt is made to parse the editors line
+ Example :
+ Returns : value of editors
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub editors {
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'editors'} = $value;
+   }
+   return $self->{'editors'};
+}
+
+
+=head2 encoded_ref
+
+ Title   : encoded_ref
+ Usage   : $self->encoded_ref($newval)
+ Function: Gives the encoded_ref line. No attempt is made to parse the encoded_ref line
+ 	(this is added for reading PDB records (REFN record), where this contains
+	 ISBN/ISSN/ASTM code)
+ Example :
+ Returns : value of encoded_ref
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub encoded_ref {
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'encoded_ref'} = $value;
+   }
+   return $self->{'encoded_ref'};
+}
+
+=head2 consortium
+
+ Title   : consortium
+ Usage   : $self->consortium($newval)
+ Function: Gives the consortium line. No attempt is made to parse the consortium line
+ Example :
+ Returns : value of consortium
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub consortium{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'consortium'} = $value;
+    }
+    return $self->{'consortium'};
+
+}
+
+=head2 gb_reference
+
+ Title   : gb_reference
+ Usage   : $obj->gb_reference($newval)
+ Function: Gives the generic GenBank REFERENCE line. This is GenBank-specific.
+           If set, this includes everything on the reference line except
+	   the REFERENCE tag and the reference count.  This is mainly a
+	   fallback for the few instances when REFERENCE lines have unusual
+	   additional information such as split sequence locations, feature
+	   references, etc.  See Bug 2020 in Bugzilla for more information.
+ Example :
+ Returns : value of gb_reference (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub gb_reference{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'gb_reference'} = $value;
+    }
+    return $self->{'gb_reference'};
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/SimpleValue.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/SimpleValue.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/SimpleValue.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,234 @@
+# $Id: SimpleValue.pm,v 1.18.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::SimpleValue
+#
+# Cared for by bioperl <bioperl-l at bioperl.org>
+#
+# Copyright bioperl
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::SimpleValue - A simple scalar
+
+=head1 SYNOPSIS
+
+   use Bio::Annotation::SimpleValue;
+   use Bio::Annotation::Collection;
+
+   my $col = new Bio::Annotation::Collection;
+   my $sv = new Bio::Annotation::SimpleValue(-value => 'someval');
+   $col->add_Annotation('tagname', $sv);
+
+=head1 DESCRIPTION
+
+Scalar value annotation object
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR  - Ewan Birney 
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::SimpleValue;
+use strict;
+use overload '""' => sub { $_[0]->value};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+# Object preamble - inherits from Bio::Root::Root
+
+#use Bio::Ontology::TermI;
+
+use base qw(Bio::Root::Root Bio::AnnotationI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $sv = new Bio::Annotation::SimpleValue;
+ Function: Instantiate a new SimpleValue object
+ Returns : Bio::Annotation::SimpleValue object
+ Args    : -value    => $value to initialize the object data field [optional]
+           -tagname  => $tag to initialize the tagname [optional]
+           -tag_term => ontology term representation of the tag [optional]
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   my ($value,$tag,$term) =
+       $self->_rearrange([qw(VALUE TAGNAME TAG_TERM)], @args);
+
+   # set the term first
+   defined $term   && $self->tag_term($term);
+   defined $value  && $self->value($value);
+   defined $tag    && $self->tagname($tag);
+
+   return $self;
+}
+
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   : my $text = $obj->as_text
+ Function: return the string "Value: $v" where $v is the value
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   return "Value: ".$self->value;
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   : my $hashtree = $value->hash_tree
+ Function: For supporting the AnnotationI interface just returns the value
+           as a hashref with the key 'value' pointing to the value
+ Returns : hashrf
+ Args    : none
+
+
+=cut
+
+sub hash_tree{
+    my $self = shift;
+
+    my $h = {};
+    $h->{'value'} = $self->value;
+    return $h;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to AnnotationCollection when adding this
+           object.
+
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my $self = shift;
+
+    # check for presence of an ontology term
+    if($self->{'_tag_term'}) {
+	# keep a copy in case the term is removed later
+	$self->{'tagname'} = $_[0] if @_;
+	# delegate to the ontology term object
+	return $self->tag_term->name(@_);
+    }
+    return $self->{'tagname'} = shift if @_;
+    return $self->{'tagname'};
+}
+
+
+=head1 Specific accessors for SimpleValue
+
+=cut
+
+=head2 value
+
+ Title   : value
+ Usage   : $obj->value($newval)
+ Function: Get/Set the value for simplevalue
+ Returns : value of value
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub value{
+   my ($self,$value) = @_;
+
+   if( defined $value) {
+      $self->{'value'} = $value;
+    }
+    return $self->{'value'};
+}
+
+=head2 tag_term
+
+ Title   : tag_term
+ Usage   : $obj->tag_term($newval)
+ Function: Get/set the L<Bio::Ontology::TermI> object representing
+           the tag name.
+
+           This is so you can specifically relate the tag of this
+           annotation to an entry in an ontology. You may want to do
+           this to associate an identifier with the tag, or a
+           particular category, such that you can better match the tag
+           against a controlled vocabulary.
+
+           This accessor will return undef if it has never been set
+           before in order to allow this annotation to stay
+           light-weight if an ontology term representation of the tag
+           is not needed. Once it is set to a valid value, tagname()
+           will actually delegate to the name() of this term.
+
+ Example :
+ Returns : a L<Bio::Ontology::TermI> compliant object, or undef
+ Args    : on set, new value (a L<Bio::Ontology::TermI> compliant
+           object or undef, optional)
+
+
+=cut
+
+sub tag_term{
+    my $self = shift;
+
+    return $self->{'_tag_term'} = shift if @_;
+    return $self->{'_tag_term'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/StructuredValue.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/StructuredValue.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/StructuredValue.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,349 @@
+# $Id: StructuredValue.pm,v 1.7.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::StructuredValue
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::StructuredValue - A scalar with embedded structured
+information
+
+=head1 SYNOPSIS
+
+   use Bio::Annotation::StructuredValue;
+   use Bio::Annotation::Collection;
+
+   my $col = new Bio::Annotation::Collection;
+   my $sv = new Bio::Annotation::StructuredValue(-value => 'someval');
+   $col->add_Annotation('tagname', $sv);
+
+=head1 DESCRIPTION
+
+Scalar value annotation object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::StructuredValue;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::AnnotationI;
+use overload '""' => sub { $_[0]->value || ''};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+use base qw(Bio::Annotation::SimpleValue);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $sv = new Bio::Annotation::StructuredValue;
+ Function: Instantiate a new StructuredValue object
+ Returns : Bio::Annotation::StructuredValue object
+ Args    : -value => $value to initialize the object data field [optional]
+           -tagname => $tag to initialize the tagname [optional]
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   my ($value,$tag) = $self->_rearrange([qw(VALUE TAGNAME)], @args);
+
+   $self->{'values'} = [];
+   defined $value  && $self->value($value);
+   defined $tag    && $self->tagname($tag);
+
+   return $self;
+}
+
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   : my $text = $obj->as_text
+ Function: return the string "Value: $v" where $v is the value
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub as_text{
+   my ($self) = @_;
+
+   return "Value: ".$self->value;
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   : my $hashtree = $value->hash_tree
+ Function: For supporting the AnnotationI interface just returns the value
+           as a hashref with the key 'value' pointing to the value
+ Returns : hashrf
+ Args    : none
+
+
+=cut
+
+sub hash_tree{
+   my ($self) = @_;
+
+   my $h = {};
+   $h->{'value'} = $self->value;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to provide
+           a tag to AnnotationCollection when adding this object.
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'tagname'} = $value;
+    }
+    return $self->{'tagname'};
+}
+
+
+=head1 Specific accessors for StructuredValue
+
+=cut
+
+=head2 value
+
+ Title   : value
+ Usage   : $obj->value($newval)
+ Function: Get/set the value for this annotation.
+
+           Set mode is here only to retain compatibility with
+           SimpleValue. It is equivalent to calling
+           add_value([0], $newval).
+
+           In get mode, this implementation allows to pass additional
+           parameters that control how the structured annotation
+           components will be joined together to form a
+           string. Recognized are presently
+               -joins     a reference to an array of join strings, the
+                          elements at index i applying to joining
+                          annotations at dimension i. The last element
+                          will be re-used for dimensions higher than i.
+                          Defaults to ['; '].
+               -brackets  a reference to an array of two strings
+                          denoting the opening and closing brackets for
+                          the elements of one dimension, if there is
+                          more than one element in the dimension.
+                          Defaults to ['(',')'].
+
+ Returns : value of value
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub value{
+    my ($self,$value, at args) = @_;
+
+    # set mode?
+    return $self->add_value([0], $value) if defined($value) && (@args == 0);
+    # no, get mode
+    # determine joins and brackets
+    unshift(@args, $value);
+    my ($joins, $brackets) =
+	$self->_rearrange([qw(JOINS BRACKETS)], @args);
+    $joins = ['; '] unless $joins;
+    $brackets = ['(', ')'] unless $brackets;
+    my $txt = &_to_text($self->{'values'}, $joins, $brackets);
+    # if there's only brackets at the start and end, remove them
+    if((@{$self->{'values'}} == 1) &&
+       (length($brackets->[0]) == 1) && (length($brackets->[1]) == 1)) {
+	my $re = '\\'.$brackets->[0].
+	    '([^\\'.$brackets->[1].']*)\\'.$brackets->[1];
+	$txt =~ s/^$re$/$1/;
+    }
+    return $txt;
+}
+
+sub _to_text{
+    my ($arr, $joins, $brackets, $rec_n) = @_;
+
+    $rec_n = 0 unless defined($rec_n);
+    my $i = $rec_n >= @$joins ? @$joins-1 : $rec_n;
+    my $txt = join($joins->[$i],
+		   map {
+		       ref($_) ?
+			   (ref($_) eq "ARRAY" ?
+			        &_to_text($_, $joins, $brackets, $rec_n+1) :
+			        $_->value()) :
+			   $_;
+		   } @$arr);
+    if($rec_n && (@$arr > 1)) {
+	$txt = $brackets->[0] . $txt . $brackets->[1];
+    }
+    return $txt;
+}
+
+=head2 get_values
+
+ Title   : get_values
+ Usage   :
+ Function: Get the top-level array of values. Each of the elements will
+           recursively be a reference to an array or a scalar, depending
+           on the depth of this structured value annotation.
+ Example :
+ Returns : an array
+ Args    : none
+
+
+=cut
+
+sub get_values{
+    my $self = shift;
+
+    return @{$self->{'values'}};
+}
+
+=head2 get_all_values
+
+ Title   : get_all_values
+ Usage   :
+ Function: Flattens all values in this structured annotation and
+           returns them as an array.
+ Example :
+ Returns : the (flat) array of values
+ Args    : none
+
+
+=cut
+
+sub get_all_values{
+    my ($self) = @_;
+
+    # we code lazy here and just take advantage of value()
+    my $txt = $self->value(-joins => ['@!@'], -brackets => ['','']);
+    return split(/\@!\@/, $txt);
+}
+
+=head2 add_value
+
+ Title   : add_value
+ Usage   :
+ Function: Adds the given value to the structured annotation at the
+           given index.
+
+           The index is multi-dimensional, with the first dimension
+           applying to the first level, and so forth. If a particular
+           dimension or a particular index does not exist yet, it will
+           be created. If it does exist and adding the value would
+           mean replacing a scalar with an array reference, we throw
+           an exception to prevent unintended damage. An index of -1
+           at any dimension means append.
+
+           If an array of values is to be added, it will create an
+           additional dimension at the index specified, unless the
+           last index value is -1, in which case they will all be
+           appended to the last dimension.
+
+ Example :
+ Returns : none
+ Args    : the index at which to add (a reference to an array)
+           the value(s) to add
+
+
+=cut
+
+sub add_value{
+    my ($self,$index, at values) = @_;
+
+    my $tree = $self->{'values'};
+    my $lastidx = pop(@$index);
+    foreach my $i (@$index) {
+	if($i < 0) {
+	    my $subtree = [];
+	    push(@$tree, $subtree);
+	    $tree = $subtree;
+	} elsif((! $tree->[$i]) || (ref($tree->[$i]) eq "ARRAY")) {
+	    $tree->[$i] = [] unless ref($tree->[$i]) eq "ARRAY";
+	    $tree = $tree->[$i];
+	} else {
+	    $self->throw("element $i is a scalar but not in last dimension");
+	}
+    }
+    if($lastidx < 0) {
+	push(@$tree, @values);
+    } elsif(@values < 2) {
+	$tree->[$lastidx] = shift(@values);
+    } else {
+	$tree->[$lastidx] = [@values];
+    }
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Target.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Target.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/Target.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,177 @@
+# $Id: Target.pm,v 1.7.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::Target
+#
+# Cared for by Scott Cain <cain at cshl.org>
+#
+# Copyright Scott Cain
+#
+# Based on the Bio::Annotation::DBLink by Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::Target - Provides an object which represents a target (ie, a  similarity hit) from one object to something in another database
+
+=head1 SYNOPSIS
+
+   $target1 = new Bio::Annotation::Target(-target_id  => 'F321966.1',
+                                          -start      => 1,
+                                          -end        => 200,
+                                          -strand     => 1,   # or -1
+                                         );
+
+   # or
+
+   $target2 = new Bio::Annotation::Target();
+   $target2->target_id('Q75IM5');
+   $target2->start(7);
+   # ... etc ...
+
+   # Target is-a Bio::AnnotationI object, can be added to annotation
+   # collections, e.g. the one on features or seqs
+   $feat->annotation->add_Annotation('Target', $target2);
+
+
+=head1 DESCRIPTION
+
+Provides an object which represents a target (ie, a similarity hit) from
+one object to something in another database without prescribing what is
+in the other database
+
+=head1 AUTHOR - Scott Cain
+
+Scott Cain - cain at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Annotation::Target;
+use strict;
+use overload '""' => sub { $_[0]->as_text || ''};
+use overload 'eq' => sub { "$_[0]" eq "$_[1]" };
+
+
+use base qw(Bio::Root::Root Bio::AnnotationI Bio::Range);
+
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  my ($target_id, $tstart, $tend, $tstrand) =
+      $self->_rearrange([ qw(
+                              TARGET_ID
+                              START
+                              END
+                              STRAND ) ], @args);
+
+  $target_id    && $self->target_id($target_id);
+  $tstart       && $self->start($tstart);
+  $tend         && $self->end($tend);
+  $tstrand      && $self->strand($tstrand);
+
+  return $self;
+}
+
+=head1 AnnotationI implementing functions
+
+=cut
+
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub as_text{
+  my ($self) = @_;
+
+  my $target = $self->target_id || '';
+  my $start  = $self->start     || '';
+  my $end    = $self->end       || '';
+  my $strand = $self->strand    || '';
+
+   return "Target=".$target." ".$start." ".$end." ".$strand;
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to Bio::AnnotationCollectionI when adding
+           this object. When obtaining an AnnotationI object from the
+           collection, the collection will set the value to the tag
+           under which it was stored unless the object has a tag
+           stored already.
+
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'tagname'} = $value;
+    }
+    return $self->{'tagname'};
+}
+
+=head1 Specific accessors for Targets
+
+=cut
+
+=head2 target_id
+
+=over
+
+=item Usage
+
+  $obj->target_id()        #get existing value
+  $obj->target_id($newval) #set new value
+
+=item Function
+
+=item Returns
+
+value of target_id (a scalar)
+
+=item Arguments
+
+new value of target_id (to set)
+
+=back
+
+=cut
+
+sub target_id {
+    my $self = shift;
+    return $self->{'target_id'} = shift if defined($_[0]);
+    return $self->{'target_id'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/TypeManager.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/TypeManager.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Annotation/TypeManager.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,170 @@
+# $Id: TypeManager.pm,v 1.6.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Annotation::TypeManager
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Annotation::TypeManager - Manages types for annotation collections
+
+=head1 SYNOPSIS
+
+    # default type manager
+
+    $tm = Bio::Annotation::TypeManager->new();
+
+    # $key is a string or a Bio::Ontology::TermI compliant object
+    print "The type for $key is ",$tm->type_for_key($key),"\n";
+
+    if( !$tm->is_valid($key,$object) ) {
+        $self->throw("Invalid object for key $key");
+    }
+
+=head1 DESCRIPTION
+
+Manages types for annotation collections.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Annotation::TypeManager;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::Root::Root);
+# new() can be inherited from Bio::Root::Root
+
+=head2 new
+
+ Title   : new
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   $self->{'_type'} = {};
+
+   $self->_add_type_map('reference',"Bio::Annotation::Reference");
+   $self->_add_type_map('comment',"Bio::Annotation::Comment");
+   $self->_add_type_map('dblink',"Bio::Annotation::DBLink");
+
+   return $self;
+}
+
+
+=head2 type_for_key
+
+ Title   : type_for_key
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub type_for_key{
+   my ($self,$key) = @_;
+
+   $key = $key->name() if ref($key) && $key->isa("Bio::Ontology::TermI");
+   return $self->{'_type'}->{$key};
+}
+
+
+=head2 is_valid
+
+ Title   : is_valid
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub is_valid{
+   my ($self,$key,$object) = @_;
+
+   if( !defined $object || !ref $object ) {
+       $self->throw("Cannot type an object [$object]!");
+   }
+
+   if( !$object->isa($self->type_for_key($key)) ) {
+       return 0;
+   } else {
+       return 1;
+   }
+}
+
+
+=head2 _add_type_map
+
+ Title   : _add_type_map
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _add_type_map{
+   my ($self,$key,$type) = @_;
+
+   $key = $key->name() if ref($key) && $key->isa("Bio::Ontology::TermI");
+   $self->{'_type'}->{$key} = $type;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationCollectionI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationCollectionI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationCollectionI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,216 @@
+# $Id: AnnotationCollectionI.pm,v 1.17.4.4 2006/10/02 23:10:11 sendu Exp $
+#
+# BioPerl module for Bio::AnnotationCollectionI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AnnotationCollectionI - Interface for annotation collections
+
+=head1 SYNOPSIS
+
+   # get an AnnotationCollectionI somehow, eg
+
+   $ac = $seq->annotation();
+
+   foreach $key ( $ac->get_all_annotation_keys() ) {
+       @values = $ac->get_Annotations($key);
+       foreach $value ( @values ) {
+          # value is an Bio::AnnotationI, and defines a "as_text" method
+          print "Annotation ",$key," stringified value ",$value->as_text,"\n";
+
+          # also defined hash_tree method, which allows data orientated
+          # access into this object
+          $hash = $value->hash_tree();
+       }
+   }
+
+=head1 DESCRIPTION
+
+Annotation Collections are a way of storing a series of "interesting
+facts" about something. We call an "interesting fact" in Bioperl an
+Annotation (this differs from a Sequence Feature, which is called
+a Sequence Feature and may or may not have an Annotation Collection).
+
+A benefit of this approach is that all sorts of simple, interesting
+observations can be collected, the possibility is endless.
+
+The Bioperl approach is that the "interesting facts" are represented by
+Bio::AnnotationI objects. The interface Bio::AnnotationI guarantees
+two methods
+
+   $obj->as_text(); # string formated to display to users
+
+and
+
+   $obj->hash_tree(); # hash with defined rules for data-orientated discovery
+
+The hash_tree method is designed to play well with XML output and
+other "nested-tag-of-data-values", think BoulderIO and/or Ace stuff. For more
+information see L<Bio::AnnotationI>.
+
+Annotations are stored in AnnotationCollections, each Annotation under a
+different "tag". The tags allow simple discovery of the available annotations,
+and in some cases (like the tag "gene_name") indicate how to interpret the
+data underneath the tag. The tag is only one tag deep and each tag can have an
+array of values.
+
+In addition, AnnotationCollections are guaranteed to maintain consistent
+types of objects under each tag - at least that each object complies to one
+interface. The "standard" AnnotationCollection insists the following rules
+are set up:
+
+  Tag            Object
+  ---            ------
+  comment        Bio::Annotation::Comment
+  dblink         Bio::Annotation::DBLink
+  description    Bio::Annotation::SimpleValue
+  gene_name      Bio::Annotation::SimpleValue
+  ontology_term  Bio::Annotation::OntologyTerm
+  reference      Bio::Annotation::Reference
+
+These tags are the implict tags that the SeqIO system needs to round-trip
+GenBank/EMBL/Swissprot.
+
+However, you as a user and us collectively as a community can grow the
+"standard" tag mapping over time and specifically for a particular
+area.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods
+are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::AnnotationCollectionI;
+use strict;
+
+# Interface preamble - inherits from Bio::Root::RootI
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 ACCESSOR METHODS
+
+Use these for Bio::AnnotationI object access.
+
+=cut
+
+=head2 get_all_annotation_keys()
+
+ Usage   : $ac->get_all_annotation_keys()
+ Function: gives back a list of annotation keys, which are simple text strings
+ Returns : list of strings
+ Args    : none
+
+=cut
+
+sub get_all_annotation_keys{
+    shift->throw_not_implemented();
+}
+
+
+=head2 get_Annotations()
+
+ Usage   : my @annotations = $collection->get_Annotations('key')
+ Function: Retrieves all the Bio::AnnotationI objects for a specific key
+ Returns : list of Bio::AnnotationI - empty if no objects stored for a key
+ Args    : string which is key for annotations
+
+=cut
+
+sub get_Annotations{
+    shift->throw_not_implemented();
+}
+
+=head2 add_Annotation()
+
+ Usage   : $self->add_Annotation('reference',$object);
+           $self->add_Annotation($object,'Bio::MyInterface::DiseaseI');
+           $self->add_Annotation($object);
+           $self->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI');
+ Function: Adds an annotation for a specific key.
+
+           If the key is omitted, the object to be added must provide a value
+           via its tagname().
+
+           If the archetype is provided, this and future objects added under
+           that tag have to comply with the archetype and will be rejected
+           otherwise.
+
+ Returns : none
+ Args    : annotation key ('disease', 'dblink', ...)
+           object to store (must be Bio::AnnotationI compliant)
+           [optional] object archetype to map future storage of object
+           of these types to
+
+=cut
+
+sub add_Annotation {
+  shift->throw_not_implemented();
+}
+
+=head2 remove_Annotations()
+
+ Usage   :
+ Function: Remove the annotations for the specified key from this collection.
+ Returns : an list of Bio::AnnotationI compliant objects which were stored
+           under the given key(s)
+ Args    : the key(s) (tag name(s), one or more strings) for which to
+           remove annotations (optional; if none given, flushes all
+           annotations)
+
+=cut
+
+sub remove_Annotations{
+  shift->throw_not_implemented();
+}
+
+=head2 get_num_of_annotations()
+
+ Usage   : my $count = $collection->get_num_of_annotations()
+ Function: Returns the count of all annotations stored in this collection
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub get_num_of_annotations{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/AnnotationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,175 @@
+# $Id: AnnotationI.pm,v 1.10.4.4 2006/10/02 23:10:12 sendu Exp $
+
+#
+# BioPerl module for Bio::AnnotationI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::AnnotationI - Annotation interface
+
+=head1 SYNOPSIS
+
+  # generally you get AnnotationI's from AnnotationCollectionI's
+
+   foreach $key ( $ac->get_all_annotation_keys() ) {
+       @values = $ac->get_Annotations($key);
+       foreach $value ( @values ) {
+          # value is an Bio::AnnotationI, and defines a "as_text" method
+          print "Annotation ",$key," stringified value ",$value->as_text,"\n";
+          # you can also use a generic hash_tree method for getting
+          # stuff out say into XML format
+          $hash_tree = $value->hash_tree();
+       }
+   }
+
+
+=head1 DESCRIPTION
+
+Interface all annotations must support. There are two things that each
+annotation has to support.
+
+  $annotation->as_text()
+
+Annotations have to support an "as_text" method. This should be a
+single text string, without newlines representing the annotation,
+mainly for human readability. It is not aimed at being able to
+store/represent the annotation.
+
+The second method allows annotations to at least attempt to represent
+themselves as pure data for storage/display/whatever. The method
+hash_tree
+
+   $hash = $annotation->hash_tree();
+
+should return an anonymous hash with "XML-like" formatting. The
+formatting is as follows.
+
+  (1) For each key in the hash, if the value is a reference'd array -
+
+      (2) For each element of the array if the value is a object -
+          Assume the object has the method "hash_tree";
+      (3) else if the value is a referene to a hash
+          Recurse again from point (1)
+      (4) else
+          Assumme the value is a scalar, and handle it directly as text
+
+   (5) else (if not an array) apply rules 2,3 and 4 to value
+
+The XML path in tags is represented by the keys taken in the
+hashes. When arrays are encountered they are all present in the path
+level of this tag
+
+This is a pretty "natural" representation of an object tree in an XML
+style, without forcing everything to inheriet off some super-generic
+interface for representing things in the hash.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::AnnotationI;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 as_text
+
+ Title   : as_text
+ Usage   :
+ Function: single text string, without newlines representing the
+           annotation, mainly for human readability. It is not aimed
+           at being able to store/represent the annotation.
+ Example :
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub as_text{
+    shift->throw_not_implemented();
+}
+
+=head2 hash_tree
+
+ Title   : hash_tree
+ Usage   :
+ Function: should return an anonymous hash with "XML-like" formatting
+ Example :
+ Returns : a hash reference
+ Args    : none
+
+
+=cut
+
+sub hash_tree{
+    shift->throw_not_implemented();
+}
+
+=head2 tagname
+
+ Title   : tagname
+ Usage   : $obj->tagname($newval)
+ Function: Get/set the tagname for this annotation value.
+
+           Setting this is optional. If set, it obviates the need to
+           provide a tag to Bio::AnnotationCollectionI when adding
+           this object. When obtaining an AnnotationI object from the
+           collection, the collection will set the value to the tag
+           under which it was stored unless the object has a tag
+           stored already.
+
+ Example :
+ Returns : value of tagname (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub tagname{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Contig.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Contig.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Contig.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2089 @@
+# $Id: Contig.pm,v 1.11.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Assembly::Contig
+#   Mostly based on Bio::SimpleAlign by Ewan Birney
+#
+# Cared for by Robson francisco de Souza <rfsouza at citri.iq.usp.br>
+#
+# Copyright Robson Francisco de Souza
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::Contig - Perl module to hold and manipulate
+                     sequence assembly contigs.
+
+=head1 SYNOPSIS
+
+    # Module loading
+    use Bio::Assembly::IO;
+
+    # Assembly loading methods
+    $aio = new Bio::Assembly::IO(-file=>"test.ace.1",
+                               -format=>'phrap');
+
+    $assembly = $aio->next_assembly;
+    foreach $contig ($assembly->all_contigs) {
+      # do something
+    }
+
+    # OR, if you want to build the contig yourself,
+
+    use Bio::Assembly::Contig;
+    $c = Bio::Assembly::Contig->new(-id=>"1");
+
+    $ls  = Bio::LocatableSeq->new(-seq=>"ACCG-T",
+                                  -id=>"r1",
+                                  -alphabet=>'dna');
+    $ls2 = Bio::LocatableSeq->new(-seq=>"ACA-CG-T",
+                                  -id=>"r2",
+                                  -alphabet=>'dna');
+
+    $ls_coord = Bio::SeqFeature::Generic->new(-start=>3,
+                                              -end=>8,
+                                              -strand=>1);
+    $ls2_coord = Bio::SeqFeature::Generic->new(-start=>1,
+                                               -end=>8,
+                                               -strand=>1);
+    $c->add_seq($ls);
+    $c->add_seq($ls2);
+    $c->set_seq_coord($ls_coord,$ls);
+    $c->set_seq_coord($ls2_coord,$ls2);
+
+    $con = Bio::LocatableSeq->new(-seq=>"ACACCG-T",
+                                  -alphabet=>'dna');
+    $c->set_consensus_sequence($con);
+
+    $l = $c->change_coord('unaligned r2','ungapped consensus',6);
+    print "6 in unaligned r2 => $l in ungapped consensus\n";
+
+
+=head1 DESCRIPTION
+
+A contig is as a set of sequences, locally aligned to each other, so
+that every sequence has overlapping regions with at least one sequence
+in the contig, such that a continuous of overlapping sequences is
+formed, allowing the deduction of a consensus sequence which may be
+longer than any of the sequences from which it was deduced.
+
+In this documentation we refer to the overlapping sequences used to
+build the contig as "aligned sequences" and to the sequence deduced
+from the overlap of aligned sequences as the "consensus". Methods to
+deduce the consensus sequence from aligned sequences were not yet
+implemented in this module, but its posssible to add a consensus
+sequence deduced by other means, e.g, by the assembly program used to
+build the alignment.
+
+All aligned sequences in a Bio::Assembly::Contig must be Bio::Assembly::Locatable
+objects and have a unique ID. The unique ID restriction is due to the
+nature of the module's internal data structures and is also a request
+of some assembly programs. If two sequences with the same ID are added
+to a contig, the first sequence added is replaced by the second one.
+
+=head2 Coordinate_systems
+
+There are four base coordinate systems in Bio::Assembly::Contig.  When
+you need to access contig elements or data that exists on a certain
+range or location, you may be specifying coordinates in relation to
+different sequences, which may be either the contig consensus or one
+of the aligned sequences that were used to do the assembly.
+
+ =========================================================
+          Name           | Referenced sequence
+ ---------------------------------------------------------
+   "gapped consensus"    | Contig (with gaps)
+   "ungapped consensus"  | Contig (without gaps)
+   "aligned $seqID"      | sequence $seqID (with gaps)
+   "unaligned $seqID"    | sequence $seqID (without gaps)
+ =========================================================
+
+"gapped consensus" refers to positions in the aligned consensus
+sequence, which is the consensus sequence including the gaps inserted
+to align it agains the aligned sequences that were used to assemble
+the contig. So, its limits are [ 1, (consensus length + number of gaps
+in consensus) ]
+
+"ungapped consensus" is a coordinate system based on the consensus
+sequence, but excluding consensus gaps. This is just the coordinate
+system that you have when considering the consensus sequence alone,
+instead of aligned to other sequences.
+
+"aligned $seqID" refers to locations in the sequence $seqID after
+alignment of $seqID against the consensus sequence (reverse
+complementing the original sequence, if needed).  Coordinate 1 in
+"aligned $seqID" is equivalent to the start location (first base) of
+$seqID in the consensus sequence, just like if the aligned sequence
+$seqID was a feature of the consensus sequence.
+
+"unaligned $seqID" is equivalent to a location in the isolated
+sequence, just like you would have when considering the sequence
+alone, out of an alignment.  When changing coordinates from "aligned
+$seq2" to "unaligned $seq2", if $seq2 was reverse complemented when
+included in the alignment, the output coordinates will be reversed to
+fit that fact, i.e. 1 will be changed to length($seq2), 2 will be
+length($seq)-1 and so on.
+
+An important note: when you change gap coordinates from a gapped
+system ("gapped consensus" or "aligned $seqID") to a system that does
+not include gaps ("ungapped consensus" or "unaligned $seqID"), the
+position returned will be the first location before all gaps
+neighboring the input location.
+
+=head2 Feature_collection
+
+Bio::Assembly::Contig stores much information about a contig in a
+Bio::Assembly::SeqFeature::Collection object. Relevant information on the
+alignment is accessed by selecting features based on their primary
+tags (e.g. all features which have a primary tag of the form
+'_aligned_coord:$seqID', where $seqID is an aligned sequence ID, are
+coordinates for sequences in the contig alignment) and, by using
+methods from Bio::Assembly::SeqFeature::Collection, it's possible to select
+features by overlap with other features.
+
+We suggest that you use the primary tags of features as identifiers
+for feature classes. By convention, features with primary tags
+starting with a '_' are generated by modules that populate the contig
+data structure and return the contig object, maybe as part of an
+assembly object, e.g.  drivers from the Bio::Assembly::IO set.
+
+Features in the features collection may be associated with particular
+aligned sequences. To obtain this, you must attach the sequence to the
+feature, using attach() seq from Bio::Assembly::SeqFeatureI, before you add the
+feature to the feature collection. We also suggest to add the sequence
+id to the primary tag, so that is easy to select feature for a
+particular sequence.
+
+There is only one feature class that some methods in
+Bio::Assembly::Contig expect to find in the feature collection: features
+with primary tags of the form '_aligned_coord:$seqID', where $seqID is
+the aligned sequence id (like returned by $seq-E<gt>id()). These features
+describe the position (in "gapped consensus" coordinates) of aligned
+sequences, and the method set_seq_coord() automatically changes a
+feature's primary tag to this form whenever the feature is added to
+the collection by this method. Only two methods in Bio::Assembly::Contig
+will not work unless there are features from this class:
+change_coord() and get_seq_coord().
+
+Other feature classes will be automatically available only when
+Bio::Assembly::Contig objects are created by a specific module. Such
+feature classes are (or should be) documented in the documentation of
+the module which create them, to which the user should refer.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+rfsouza at citri.iq.usp.br
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+package Bio::Assembly::Contig;
+
+use strict;
+
+use Bio::SeqFeature::Collection;
+use Bio::Seq::PrimaryQual;
+
+use base qw(Bio::Root::Root Bio::Align::AlignI);
+
+=head1 Object creator
+
+=head2 new
+
+ Title     : new
+ Usage     : my $contig = new Bio::Assembly::Contig();
+ Function  : Creates a new contig object
+ Returns   : Bio::Assembly::Contig
+ Args      : -source => string representing the source
+                        program where this contig came
+                        from
+             -id => contig unique ID
+
+=cut
+
+#-----------
+sub new {
+#-----------
+    my ($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my ($src, $id) = $self->_rearrange([qw(SOURCE ID)], @args);
+    $src && $self->source($src);
+    ($id && $self->id($id)) || ($self->{'_id'} = 'NoName'); # Alignment (contig) name
+    ($id && $self->id($id)) || ($self->{'_source'} = 'Unknown'); # Program used to build the contig
+    # we need to set up internal hashes first!
+
+    # Bio::SimpleAlign derived fields (check which ones are needed for AlignI compatibility)
+    $self->{'_elem'} = {}; # contig elements: aligned sequence objects (keyed by ID)
+    $self->{'_order'} = {}; # store sequence order
+#    $self->{'start_end_lists'} = {}; # References to entries in {'_seq'}. Keyed by seq ids.
+#    $self->{'_dis_name'} = {}; # Display names for each sequence
+    $self->{'_symbols'} = {}; # List of symbols
+
+    #Contig specific slots
+    $self->{'_consensus_sequence'} = undef;
+    $self->{'_consensus_quality'} = undef;
+    $self->{'_nof_residues'} = 0;
+    $self->{'_nof_seqs'} = 0;
+#    $self->{'_nof_segments'} = 0; # Let's not make it heavier than needed by now...
+    $self->{'_sfc'} = Bio::SeqFeature::Collection->new();
+
+    # Assembly specifcs
+    $self->{'_assembly'} = undef; # Reference to a Bio::Assembly::Scaffold object, if contig belongs to one.
+    $self->{'_strand'} = 0; # Reverse (-1) or forward (1), if contig is in a scaffold. 0 otherwise
+    $self->{'_neighbor_start'} = undef; # Will hold a reference to another contig
+    $self->{'_neighbor_end'} = undef; # Will hold a reference to another contig
+
+    return $self; # success - we hope!
+}
+
+=head1 Assembly related methods
+
+These methods exist to enable adding information about possible
+relations among contigs, e.g. when you already have a scaffold for
+your assembly, describing the ordering of contigs in the final
+assembly, but no sequences covering the gaps between neighboring
+contigs.
+
+=head2 source
+
+ Title     : source
+ Usage     : $contig->source($program);
+ Function  : Get/Set program used to build this contig
+ Returns   : string
+ Argument  : [optional] string
+
+=cut
+
+sub source {
+    my $self = shift;
+    my $source = shift;
+
+    $self->{'_source'} = $source if (defined $source);
+    return $self->{'_source'};
+}
+
+=head2 assembly
+
+ Title     : assembly
+ Usage     : $contig->assembly($assembly);
+ Function  : Get/Set assembly object for this contig
+ Returns   : a Bio::Assembly::Scaffold object
+ Argument  : a Bio::Assembly::Scaffold object
+
+=cut
+
+sub assembly {
+    my $self = shift;
+    my $assembly = shift;
+
+    $self->throw("Using non Bio::Assembly::Scaffold object when assign contig to assembly")
+	if (defined $assembly && ! $assembly->isa("Bio::Assembly::Scaffold"));
+
+    $self->{'_assembly'} = $assembly if (defined $assembly);
+    return $self->{'_assembly'};
+}
+
+=head2 strand
+
+ Title     : strand
+ Usage     : $contig->strand($num);
+ Function  : Get/Set contig orientation in a scaffold/assembly.
+             Its equivalent to the strand property of sequence
+             objects and sets whether the contig consensus should
+             be reversed and complemented before being added to a
+             scaffold or assembly.
+ Returns   : integer
+ Argument  : 1 if orientaion is forward, -1 if reverse and
+             0 if none
+
+=cut
+
+sub strand {
+    my $self = shift;
+    my $ori = shift;
+
+	if (defined $ori) {
+    $self->throw("Contig strand must be either 1, -1 or 0")
+            unless $ori == 1 || $ori == 0 || $ori == -1;
+
+    $self->{'_strand'} = $ori;
+    }
+
+    return $self->{'_strand'};
+}
+
+=head2 upstream_neighbor
+
+ Title     : upstream_neighbor
+ Usage     : $contig->upstream_neighbor($contig);
+ Function  : Get/Set a contig neighbor for the current contig when
+             building a scaffold. The upstream neighbor is
+             located before $contig first base
+ Returns   : nothing
+ Argument  : Bio::Assembly::Contig
+
+=cut
+
+sub upstream_neighbor {
+    my $self = shift;
+    my $ref = shift;
+
+    $self->throw("Trying to assign a non Bio::Assembly::Contig object to upstream contig")
+	if (defined $ref && ! $ref->isa("Bio::Assembly::Contig"));
+
+    $self->{'_neighbor_start'} = $ref if (defined $ref);
+    return $self->{'_neighbor_start'};
+}
+
+=head2 downstream_neighbor
+
+ Title     : downstream_neighbor
+ Usage     : $contig->downstream_neighbor($num);
+ Function  : Get/Set a contig neighbor for the current contig when
+             building a scaffold. The downstream neighbor is
+             located after $contig last base
+ Returns   : nothing
+ Argument  : Bio::Assembly::Contig
+
+=cut
+
+sub downstream_neighbor {
+    my $self = shift;
+    my $ref = shift;
+
+    $self->throw("Trying to assign a non Bio::Assembly::Contig object to downstream contig")
+	if (defined $ref && ! $ref->isa("Bio::Assembly::Contig"));
+    $self->{'_neighbor_end'} = $ref if (defined $ref);
+    return $self->{'_neighbor_end'};
+}
+
+=head1 Contig feature collection methods
+
+=head2 add_features
+
+ Title     : add_features
+ Usage     : $contig->add_features($feat,$flag)
+ Function  :
+
+             Add an array of features to the contig feature
+             collection. The consensus sequence may be attached to the
+             added feature, if $flag is set to 1. If $flag is 0 and
+             the feature attached to one of the contig aligned
+             sequences, the feature is registered as an aligned
+             sequence feature. If $flag is 0 and the feature is not
+             attched to any sequence in the contig, the feature is
+             simply added to the feature collection and no attachment
+             or registration is made.
+
+             Note: You must attach aligned sequences to their features
+             prior to calling add_features, otherwise you won't be
+             able to access the feature through get_seq_feat_by_tag()
+             method.
+
+ Returns   : number of features added.
+ Argument  :
+             $feat : A reference to an array of Bio::SeqFeatureI
+             $flag : boolean - true if consensus sequence object
+                     should be attached to this feature, false if
+                     no consensus attachment should be made.
+                     Default: false.
+
+=cut
+
+sub add_features {
+    my ($self, $args, $flag) = @_;
+
+    # Adding shortcuts for aligned sequence features
+    $flag = 0 unless (defined $flag);
+    if ($flag && defined $self->{'_consensus_sequence'}) {
+	foreach my $feat (@$args) {
+	    next if (defined $feat->seq);
+	    $feat->attach_seq($self->{'_consensus_sequence'});
+	}
+    } elsif (!$flag) { # Register aligned sequence features
+	foreach my $feat (@$args) {
+	    if (my $seq = $feat->entire_seq()) {
+		my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+		$self->warn("Adding contig feature attached to unknown sequence $seqID!")
+		    unless (exists $self->{'_elem'}{$seqID});
+		my $tag = $feat->primary_tag;
+		$self->{'_elem'}{$seqID}{'_feat'}{$tag} = $feat;
+	    }
+	}
+    }
+
+    # Add feature to feature collection
+    my $nof_added = $self->{'_sfc'}->add_features($args);
+
+    return $nof_added;
+}
+
+=head2 remove_features
+
+ Title     : remove_features
+ Usage     : $contig->remove_features(@feat)
+ Function  : Remove an array of contig features
+ Returns   : number of features removed.
+ Argument  : An array of Bio::SeqFeatureI
+
+=cut
+
+sub remove_features {
+    my ($self, @args) = @_;
+
+    # Removing shortcuts for aligned sequence features
+    foreach my $feat (@args) {
+	if (my $seq = $feat->entire_seq()) {
+	    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+	    my $tag = $feat->primary_tag;
+	    $tag =~ s/:$seqID$/$1/g;
+	    delete( $self->{'_elem'}{$seqID}{'_feat'}{$tag} )
+		if (exists $self->{'_elem'}{$seqID}{'_feat'}{$tag} &&
+		    $self->{'_elem'}{$seqID}{'_feat'}{$tag} eq $feat);
+	}
+    }
+
+    return $self->{'_sfc'}->remove_features(\@args);
+}
+
+=head2 get_features_collection
+
+ Title     : get_features_collection
+ Usage     : $contig->get_features_collection()
+ Function  : Get the collection of all contig features
+ Returns   : Bio::SeqFeature::Collection
+ Argument  : none
+
+=cut
+
+sub get_features_collection {
+    my $self = shift;
+
+    return $self->{'_sfc'};
+}
+
+=head1 Coordinate system's related methods
+
+See L<Coordinate_Systems> above.
+
+=head2 change_coord
+
+ Title     : change_coord
+ Usage     : $contig->change_coord($in,$out,$query)
+ Function  :
+
+             Change coordinate system for $query.  This method
+             transforms locations between coordinate systems described
+             in section "Coordinate Systems" of this document.
+
+             Note: this method will throw an exception when changing
+             coordinates between "ungapped consensus" and other
+             systems if consensus sequence was not set. It will also
+             throw exceptions when changing coordinates among aligned
+             sequence, either with or without gaps, and other systems
+             if sequence locations were not set with set_seq_coord().
+
+ Returns   : integer
+ Argument  :
+             $in    : [string]  input coordinate system
+             $out   : [string]  output coordinate system
+             $query : [integer] a position in a sequence
+
+=cut
+
+sub change_coord {
+    my $self     = shift;
+    my $type_in  = shift;
+    my $type_out = shift;
+    my $query    = shift;
+
+    # Parsing arguments
+    # Loading read objects (these calls will throw exceptions whether $read_in or
+    # $read_out is not found
+    my ($read_in,$read_out) = (undef,undef);
+    my $in_ID  = ( split(' ',$type_in)  )[1];
+    my $out_ID = ( split(' ',$type_out) )[1];
+
+    if ($in_ID  ne 'consensus') {
+	$read_in  = $self->get_seq_coord( $self->get_seq_by_name($in_ID)  );
+	$self->throw("Can't change coordinates without sequence location for $in_ID")
+	    unless (defined $read_in);
+    }
+    if ($out_ID ne 'consensus') {
+	$read_out = $self->get_seq_coord( $self->get_seq_by_name($out_ID) );
+	$self->throw("Can't change coordinates without sequence location for $out_ID")
+	    unless (defined $read_out);
+    }
+
+    # Performing transformation between coordinates
+  SWITCH1: {
+
+      # Transformations between contig padded and contig unpadded
+      (($type_in eq 'gapped consensus') && ($type_out eq 'ungapped consensus')) && do {
+	  $self->throw("Can't use ungapped consensus coordinates without a consensus sequence")
+	      unless (defined $self->{'_consensus_sequence'});
+	  $query = &_padded_unpadded($self->{'_consensus_gaps'}, $query);
+	  last SWITCH1;
+      };
+      (($type_in eq 'ungapped consensus') && ($type_out eq 'gapped consensus')) && do {
+	  $self->throw("Can't use ungapped consensus coordinates without a consensus sequence")
+	      unless (defined $self->{'_consensus_sequence'});
+	  $query = &_unpadded_padded($self->{'_consensus_gaps'},$query);
+	  last SWITCH1;
+      };
+
+      # Transformations between contig (padded) and read (padded)
+      (($type_in  eq 'gapped consensus') &&
+       ($type_out =~ /^aligned /) && defined($read_out)) && do {
+	   $query = $query - $read_out->start() + 1;
+	   last SWITCH1;
+       };
+      (($type_in =~ /^aligned /) && defined($read_in) &&
+       ($type_out  eq 'gapped consensus')) && do {
+	   $query = $query + $read_in->start() - 1;
+	   last SWITCH1;
+       };
+
+      # Transformations between contig (unpadded) and read (padded)
+      (($type_in eq 'ungapped consensus') &&
+       ($type_out =~ /^aligned /) && defined($read_out)) && do {
+	   $query = $self->change_coord('ungapped consensus','gapped consensus',$query);
+	   $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
+	   last SWITCH1;
+       };
+      (($type_in =~ /^aligned /) && defined($read_in) &&
+       ($type_out eq 'ungapped consensus')) && do {
+	   $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
+	   $query = $self->change_coord('gapped consensus','ungapped consensus',$query);
+	   last SWITCH1;
+       };
+
+      # Transformations between seq $read_in padded and seq $read_out padded
+      (defined($read_in)  && ($type_in  =~ /^aligned /)  &&
+       defined($read_out) && ($type_out =~ /^aligned /)) && do {
+	   $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
+	   $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
+	   last SWITCH1;
+       };
+
+      # Transformations between seq $read_in padded and seq $read_out unpadded
+      (defined($read_in)  && ($type_in  =~ /^aligned /)    &&
+       defined($read_out) && ($type_out =~ /^unaligned /)) && do {
+	   if ($read_in ne $read_out) {
+	       $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
+	       $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
+	   }
+	   my $list_out = $self->{'_elem'}{$out_ID}{'_gaps'};
+	   $query = &_padded_unpadded($list_out,$query);
+	   # Changing read orientation if read was reverse complemented when aligned
+	   if ($read_out->strand == -1) {
+	       my ($length) = $read_out->length();
+	       $length = $length - &_nof_gaps($list_out,$length);
+	       $query  = $length - $query + 1;
+	   }
+	   last SWITCH1;
+       };
+      (defined($read_in)  && ($type_in  =~ /^unaligned /) &&
+       defined($read_out) && ($type_out =~ /^aligned /))  && do {
+	   my $list_in = $self->{'_elem'}{$in_ID}{'_gaps'};
+	   # Changing read orientation if read was reverse complemented when aligned
+	   if ($read_in->strand == -1) {
+	       my ($length) = $read_in->length();
+	       $length = $length - &_nof_gaps($list_in,$length);
+	       $query  = $length - $query + 1;
+	   }
+	   $query = &_unpadded_padded($list_in,$query);
+	   if ($read_in ne $read_out) {
+	       $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
+	       $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
+	   }
+	   last SWITCH1;
+       };
+
+      # Transformations between seq $read_in unpadded and seq $read_out unpadded
+      (defined($read_in)  && ($type_in  =~ /^unaligned /)    &&
+       defined($read_out) && ($type_out =~ /^unaligned /)) && do {
+	   $query = $self->change_coord("unaligned $in_ID","aligned $out_ID",$query);
+	   $query = $self->change_coord("aligned $out_ID","unaligned $out_ID",$query);
+	   last SWITCH1;
+       };
+
+      # Transformations between contig (padded) and read (unpadded)
+      (($type_in eq 'gapped consensus') &&
+       ($type_out =~ /^unaligned /) && defined($read_out)) && do {
+	   $query = $self->change_coord('gapped consensus',"aligned $out_ID",$query);
+	   $query = $self->change_coord("aligned $out_ID","unaligned $out_ID",$query);
+	   last SWITCH1;
+       };
+      (($type_in =~ /^unaligned /) && defined($read_in) &&
+       ($type_out eq 'gapped consensus')) && do {
+	   $query = $self->change_coord("unaligned $in_ID","aligned $in_ID",$query);
+	   $query = $self->change_coord("aligned $in_ID",'gapped consensus',$query);
+	   last SWITCH1;
+       };
+
+      # Transformations between contig (unpadded) and read (unpadded)
+      (($type_in eq 'ungapped consensus') &&
+       ($type_out =~ /^unaligned /) && defined($read_out)) && do {
+	   $query = $self->change_coord('ungapped consensus','gapped consensus',$query);
+	   $query = $self->change_coord('gapped consensus',"unaligned $out_ID",$query);
+	   last SWITCH1;
+       };
+      (($type_in =~ /^unaligned /) && defined($read_in) &&
+       ($type_out eq 'ungapped consensus')) && do {
+	   $query = $self->change_coord("unaligned $in_ID",'gapped consensus',$query);
+	   $query = $self->change_coord('gapped consensus','ungapped consensus',$query);
+	   last SWITCH1;
+       };
+
+      $self->throw("Unknow coordinate system. Args: $type_in, $type_out.");
+      $query = undef; # If a coordinate systems just requested is unknown
+  }
+
+    return $query;
+}
+
+=head2 get_seq_coord
+
+ Title     : get_seq_coord
+ Usage     : $contig->get_seq_coord($seq);
+ Function  : Get "gapped consensus" location for aligned sequence
+ Returns   : Bio::SeqFeature::Generic for coordinates or undef.
+             A warning is printed if sequence coordinates were not set.
+ Argument  : Bio::LocatabaleSeq object
+
+=cut
+
+sub get_seq_coord {
+    my ($self,$seq) = @_;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("$seq is not a Bio::LocatableSeq");
+    }
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+
+    unless (exists( $self->{'_elem'}{$seqID} )) {
+	$self->warn("No such sequence ($seqID) in contig ".$self->id);
+	return;
+    }
+    unless (exists( $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"} )) {
+	# $self->warn("Chad. Location not set for sequence ($seqID) in contig ".$self->id);
+	return;
+    }
+
+    return $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"};
+}
+
+=head2 set_seq_coord
+
+ Title     : set_seq_coord
+ Usage     : $contig->set_seq_coord($feat,$seq);
+ Function  :
+
+             Set "gapped consensus" location for an aligned
+             sequence. If the sequence was previously added using
+             add_seq, its coordinates are changed/set.  Otherwise,
+             add_seq is called and the sequence is added to the
+             contig.
+
+ Returns   : Bio::SeqFeature::Generic for old coordinates or undef.
+ Argument  :
+             $feat  : a Bio::SeqFeature::Generic object
+                      representing a location for the
+                      aligned sequence, in "gapped
+                      consensus" coordinates.
+
+             Note: the original feature primary tag will
+                   be lost.
+
+             $seq   : a Bio::LocatabaleSeq object
+
+=cut
+
+sub set_seq_coord {
+    my ($self,$feat,$seq) = @_;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [".ref($seq)."]");
+    }
+
+    # Complaining about inadequate feature object
+     $self->throw("Coordinates must be a Bio::SeqFeature::Generic object!")
+	unless ( $feat->isa("Bio::SeqFeature::Generic") );
+    $self->throw("Sequence coordinates must have an end!")
+	unless (defined $feat->end);
+    $self->throw("Sequence coordinates must have a start!")
+	unless (defined $feat->start);
+
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+    if (exists( $self->{'_elem'}{$seqID} ) &&
+	exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
+	defined( $self->{'_elem'}{$seqID}{'_seq'} ) &&
+	($seq ne $self->{'_elem'}{$seqID}{'_seq'}) ) {
+	$self->warn("Replacing sequence $seqID\n");
+	$self->remove_seq($self->{'_elem'}{$seqID}{'_seq'});
+    }
+    $self->add_seq($seq);
+
+    # Remove previous coordinates, if any
+    $self->remove_features($feat);
+
+    # Add new Bio::Generic::SeqFeature
+    $feat->add_tag_value('contig',$self->id)
+	unless ( $feat->has_tag('contig') );
+    $feat->primary_tag("_aligned_coord:$seqID");
+    $feat->attach_seq($seq);
+    $self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"} = $feat;
+    $self->add_features([ $feat ]);
+}
+
+=head1 Bio::Assembly::Contig consensus methods
+
+=head2 set_consensus_sequence
+
+ Title     : set_consensus_sequence
+ Usage     : $contig->set_consensus_sequence($seq)
+ Function  : Set the consensus sequence object for this contig
+ Returns   : consensus length
+ Argument  : Bio::LocatableSeq
+
+=cut
+
+sub set_consensus_sequence {
+    my $self = shift;
+    my $seq  = shift;
+
+    $self->throw("Consensus sequence must be a Bio::LocatableSeq!")
+	unless ($seq->isa("Bio::LocatableSeq"));
+
+    my $con_len = $seq->length;
+    $seq->start(1); $seq->end($con_len);
+
+    $self->{'_consensus_gaps'} = []; # Consensus Gap registry
+    $self->_register_gaps($seq->seq,
+			  $self->{'_consensus_gaps'});
+    $self->{'_consensus_sequence'} = $seq;
+
+    return $con_len;
+}
+
+=head2 set_consensus_quality
+
+ Title     : set_consensus_quality
+ Usage     : $contig->set_consensus_quality($qual)
+ Function  : Set the quality object for consensus sequence
+ Returns   : nothing
+ Argument  : Bio::Seq::QualI object
+
+=cut
+
+sub set_consensus_quality {
+    my $self = shift;
+    my $qual  = shift;
+
+    $self->throw("Consensus quality must be a Bio::Seq::Quality object!")
+	unless ( $qual->isa("Bio::Seq::Quality") );
+
+    $self->throw("Consensus quality can't be added before you set the consensus sequence!")
+	unless (defined $self->{'_consensus_sequence'});
+
+    $self->{'_consensus_quality'} = $qual;
+}
+
+=head2 get_consensus_length
+
+ Title     : get_consensus_length
+ Usage     : $contig->get_consensus_length()
+ Function  : Get consensus sequence length
+ Returns   : integer
+ Argument  : none
+
+=cut
+
+sub get_consensus_length {
+    my $self = shift;
+
+    return $self->{'_consensus_sequence'}->length();
+}
+
+=head2 get_consensus_sequence
+
+ Title     : get_consensus_sequence
+ Usage     : $contig->get_consensus_sequence()
+ Function  : Get a reference to the consensus sequence object
+             for this contig
+ Returns   : Bio::SeqI object
+ Argument  : none
+
+=cut
+
+sub get_consensus_sequence {
+    my ($self, @args) = @_;
+
+    return $self->{'_consensus_sequence'};
+}
+
+=head2 get_consensus_quality
+
+ Title     : get_consensus_quality
+ Usage     : $contig->get_consensus_quality()
+ Function  : Get a reference to the consensus quality object
+             for this contig.
+ Returns   : A Bio::QualI object
+ Argument  : none
+
+=cut
+
+sub get_consensus_quality {
+    my ($self, @args) = @_;
+
+    return $self->{'_consensus_quality'};
+}
+
+=head1 Bio::Assembly::Contig aligned sequences methods
+
+=head2 set_seq_qual
+
+ Title     : set_seq_qual
+ Usage     : $contig->set_seq_qual($seq,$qual);
+ Function  : Adds quality to an aligned sequence.
+ Returns   : nothing
+ Argument  : a Bio::LocatableSeq object and
+             a Bio::Seq::QualI object
+
+See L<Bio::LocatableSeq> for more information.
+
+=cut
+
+sub set_seq_qual {
+    my ($self,$seq,$qual) = @_;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [", ref($seq), "]");
+    }
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+
+    $self->throw("Consensus quality must be a Bio::Seq::QualI object!")
+	unless ( $qual->isa("Bio::Seq::QualI") );
+    $self->throw("Use add_seq first: aligned sequence qualities can't be added before you load the sequence!")
+	unless (exists $self->{'_elem'}{$seqID}{'_seq'});
+    $self->throw("Use set_seq_coord first: aligned sequence qualities can't be added before you add coordinates for the sequence!") unless (defined( $self->get_seq_coord($seq) ));
+
+    # Adding gaps to quality object
+    my $sequence = $self->{'_elem'}{$seqID}{'_seq'}->seq();
+    my $tmp = $qual->qual();
+    @{$tmp} = reverse(@{$tmp}) if ($self->get_seq_coord($seq)->strand() == -1);
+    my @quality  = ();
+    my $previous = 0;
+    my $next     = 0;
+    my $i = 0; my $j = 0;
+    while ($i<=$#{$tmp}) {
+	# IF base is a gap, quality is the average for neighbouring sites
+	if (substr($sequence,$j,1) eq '-') {
+	    $previous = $tmp->[$i-1] unless ($i == 0);
+	    if ($i < $#{$tmp}) {
+		$next = $tmp->[$i+1];
+	    } else {
+		$next = 0;
+	    }
+	    push(@quality,int( ($previous+$next)/2 ));
+	} else {
+	    push(@quality,$tmp->[$i]);
+	    $i++;
+	}
+	$j++;
+    }
+
+    $self->{'_elem'}{$seqID}{'_qual'} = Bio::Seq::PrimaryQual->new(-qual=>join(" ", at quality),
+								   -id=>$seqID);
+}
+
+=head2 get_seq_ids
+
+ Title     : get_seq_ids
+ Usage     : $contig->get_seq_ids(-start=>$start,
+				  -end=>$end,
+				  -type=>"gapped A0QR67B08.b");
+ Function  : Get list of sequence IDs overlapping inteval [$start, $end]
+             The default interval is [1,$contig->length]
+             Default coordinate system is "gapped contig"
+ Returns   : An array
+ Argument  : A hash with optional elements:
+             -start : consensus subsequence start
+             -end   : consensus subsequence end
+             -type  : the coordinate system type for $start and $end arguments
+                      Coordinate system avaliable are:
+                      "gapped consensus"   : consensus coordinates with gaps
+                      "ungapped consensus" : consensus coordinates without gaps
+                      "aligned $ReadID"    : read $ReadID coordinates with gaps
+                      "unaligned $ReadID"  : read $ReadID coordinates without gaps
+
+
+=cut
+
+sub get_seq_ids {
+    my ($self, @args) = @_;
+
+    my ($type,$start,$end) =
+	$self->_rearrange([qw(TYPE START END)], @args);
+
+    if (defined($start) && defined($end)) {
+	if (defined($type) && ($type ne 'gapped consensus')) {
+	    $start = $self->change_coord($type,'gapped consensus',$start);
+	    $end   = $self->change_coord($type,'gapped consensus',$end);
+	}
+
+	my @list = grep { $_->isa("Bio::SeqFeature::Generic") &&
+			      ($_->primary_tag =~ /^_aligned_coord:/) }
+	$self->{'_sfc'}->features_in_range(-start=>$start,
+					   -end=>$end,
+					   -contain=>0,
+					   -strandmatch=>'ignore');
+	@list = map { $_->entire_seq->id } @list;
+	return @list;
+    }
+
+    # Entire aligned sequences list
+    return map { $self->{'_order'}{$_} } sort { $a cmp $b } keys %{ $self->{'_order'} };
+}
+
+=head2 get_seq_feat_by_tag
+
+ Title     : get_seq_feat_by_tag
+ Usage     : $seq = $contig->get_seq_feat_by_tag($seq,"_aligned_coord:$seqID")
+ Function  :
+
+             Get a sequence feature based on its primary_tag.
+             When you add
+
+ Returns   : a Bio::SeqFeature object
+ Argument  : a Bio::LocatableSeq and a string (feature primary tag)
+
+=cut
+
+sub get_seq_feat_by_tag {
+    my ($self,$seq,$tag) = @_;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [", ref($seq), "]");
+    }
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+
+    return $self->{'_elem'}{$seqID}{'_feat'}{$tag};
+}
+
+=head2 get_seq_by_name
+
+ Title     : get_seq_by_name
+ Usage     : $seq = $contig->get_seq_by_name('Seq1')
+ Function  : Gets a sequence based on its id.
+ Returns   : a Bio::LocatableSeq object
+             undef if name is not found
+ Argument  : string
+
+=cut
+
+sub get_seq_by_name {
+    my $self = shift;
+    my ($seqID) = @_;
+
+    unless (exists $self->{'_elem'}{$seqID}{'_seq'}) {
+	$self->throw("Could not find sequence $seqID in contig ".$self->id);
+	return;
+    }
+
+    return $self->{'_elem'}{$seqID}{'_seq'};
+}
+
+=head2 get_qual_by_name
+
+ Title     : get_qual_by_name
+ Usage     : $seq = $contig->get_qual_by_name('Seq1')
+ Function  :
+
+             Gets Bio::Seq::QualI object for a sequence
+             through its id ( as given by $qual->id() ).
+
+ Returns   : a Bio::Seq::QualI object.
+             undef if name is not found
+ Argument  : string
+
+=cut
+
+sub get_qual_by_name {
+    my $self = shift;
+    my ($seqID) = @_;
+
+    unless (exists $self->{'_elem'}{$seqID}{'_qual'}) {
+	$self->warn("Could not find quality for $seqID in contig!");
+	return;
+    }
+
+    return $self->{'_elem'}{$seqID}{'_qual'};
+}
+
+=head1 Bio::Align::AlignI compatible methods
+
+=head2 Modifier methods
+
+These methods modify the MSE by adding, removing or shuffling complete
+sequences.
+
+=head2 add_seq
+
+ Title     : add_seq
+ Usage     : $contig->add_seq($newseq);
+ Function  :
+
+             Adds a sequence to the contig. *Does*
+             *not* align it - just adds it to the
+             hashes.
+
+ Returns   : nothing
+ Argument  : a Bio::LocatableSeq object
+
+See L<Bio::LocatableSeq> for more information.
+
+=cut
+
+sub add_seq {
+    my $self = shift;
+    my $seq = shift;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [", ref($seq), "]");
+    }
+
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+    $self->{'_elem'}{$seqID} = {} unless (exists $self->{'elem'}{$seqID});
+
+    if (exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
+	($seq eq $self->{'_elem'}{$seqID}{'_seq'}) ) {
+	$self->warn("Adding sequence $seqID, which has already been added");
+    }
+
+    # Our locatable sequences are always considered to be complete sequences
+    $seq->start(1); $seq->end($seq->length());
+
+    $self->warn("Adding non-nucleotidic sequence ".$seqID)
+	if (lc($seq->alphabet) ne 'dna' && lc($seq->alphabet) ne 'rna');
+
+    # build the symbol list for this sequence,
+    # will prune out the gap and missing/match chars
+    # when actually asked for the symbol list in the
+    # symbol_chars
+    if (defined $seq->seq) {
+	map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq);
+    } else {
+	$self->{'_symbols'} = {};
+    }
+
+    my $seq_no = ++$self->{'_nof_seqs'};
+
+    if (ref( $self->{'_elem'}{$seqID}{'_seq'} )) {
+	$self->warn("Replacing one sequence [$seqID]\n");
+    } else {
+	#print STDERR "Assigning $seqID to $order\n";
+	$self->{'_order'}->{$seq_no} = $seqID;
+#	$self->{'_start_end_lists'}->{$id} = []
+#	    unless(exists $self->{'_start_end_lists'}->{$id});
+#	push @{$self->{'_start_end_lists'}->{$id}}, $seq;
+    }
+
+    $self->{'_elem'}{$seqID}{'_seq'}  = $seq;
+    $self->{'_elem'}{$seqID}{'_feat'} = {};
+    $self->{'_elem'}{$seqID}{'_gaps'} = [];
+    my $dbref = $self->{'_elem'}{$seqID}{'_gaps'};
+    my $nofgaps = $self->_register_gaps($seq->seq,$dbref);
+
+    # Updating residue count
+    $self->{'_nof_residues'} += $seq->length - $nofgaps;
+
+    return 1;
+}
+
+=head2 remove_seq
+
+ Title     : remove_seq
+ Usage     : $contig->remove_seq($seq);
+ Function  : Removes a single sequence from an alignment
+ Returns   : 1 on success, 0 otherwise
+ Argument  : a Bio::LocatableSeq object
+
+=cut
+
+sub remove_seq {
+    my ($self,$seq) = @_;
+
+    if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [", ref($seq), "]");
+    }
+
+    my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
+    unless (exists $self->{'_elem'}{$seqID} ) {
+	$self->warn("No sequence named $seqID  [$seq]");
+	return 0;
+    }
+
+    # Updating residue count
+    $self->{'_nof_residues'} -= $seq->length() +
+	&_nof_gaps( $self->{'_elem'}{$seqID}{'_gaps'}, $seq->length );
+
+    # Remove all references to features of this sequence
+    my @feats = ();
+    foreach my $tag (keys %{ $self->{'_elem'}{$seqID}{'_feat'} }) {
+	push(@feats, $self->{'_elem'}{$seqID}{'_feat'}{$tag});
+    }
+    $self->{'_sfc'}->remove_features(\@feats);
+    delete $self->{'_elem'}{$seqID};
+
+    return 1;
+}
+
+=head2 purge
+
+ Title   : purge
+ Usage   : $contig->purge(0.7);
+ Function:
+
+           Removes sequences above whatever %id.
+
+           This function will grind on large alignments. Beware!
+           (perhaps not ideally implemented)
+
+ Example :
+ Returns : An array of the removed sequences
+ Argument:
+
+
+=cut
+
+sub purge {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 sort_alphabetically
+
+ Title     : sort_alphabetically
+ Usage     : $contig->sort_alphabetically
+ Function  :
+
+             Changes the order of the alignemnt to alphabetical on name
+             followed by numerical by number.
+
+ Returns   :
+ Argument  :
+
+=cut
+
+sub sort_alphabetically {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 Sequence selection methods
+
+Methods returning one or more sequences objects.
+
+=head2 each_seq
+
+ Title     : each_seq
+ Usage     : foreach $seq ( $contig->each_seq() )
+ Function  : Gets an array of Seq objects from the alignment
+ Returns   : an array
+ Argument  :
+
+=cut
+
+sub each_seq {
+    my ($self) = @_;
+
+    my (@arr,$seqID);
+
+    foreach $seqID ( map { $self->{'_order'}{$_} } sort { $a <=> $b } keys %{$self->{'_order'}} ) {
+	push(@arr,$self->{'_elem'}{$seqID}{'_seq'});
+    }
+
+    return @arr;
+}
+
+=head2 each_alphabetically
+
+ Title     : each_alphabetically
+ Usage     : foreach $seq ( $contig->each_alphabetically() )
+ Function  :
+
+             Returns an array of sequence object sorted alphabetically
+             by name and then by start point.
+             Does not change the order of the alignment
+
+ Returns   :
+ Argument  :
+
+=cut
+
+sub each_alphabetically {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 each_seq_with_id
+
+ Title     : each_seq_with_id
+ Usage     : foreach $seq ( $contig->each_seq_with_id() )
+ Function  :
+
+             Gets an array of Seq objects from the
+             alignment, the contents being those sequences
+             with the given name (there may be more than one)
+
+ Returns   : an array
+ Argument  : a seq name
+
+=cut
+
+sub each_seq_with_id {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 get_seq_by_pos
+
+ Title     : get_seq_by_pos
+ Usage     : $seq = $contig->get_seq_by_pos(3)
+ Function  :
+
+             Gets a sequence based on its position in the alignment.
+             Numbering starts from 1.  Sequence positions larger than
+             no_sequences() will thow an error.
+
+ Returns   : a Bio::LocatableSeq object
+ Argument  : positive integer for the sequence osition
+
+=cut
+
+sub get_seq_by_pos {
+    my $self = shift;
+    my ($pos) = @_;
+
+    $self->throw("Sequence position has to be a positive integer, not [$pos]")
+	unless $pos =~ /^\d+$/ and $pos > 0;
+    $self->throw("No sequence at position [$pos]")
+	unless $pos <= $self->no_sequences ;
+
+    my $seqID = $self->{'_order'}->{--$pos};
+    return $self->{'_elem'}{$seqID}{'_seq'};
+}
+
+=head2 Create new alignments
+
+The result of these methods are horizontal or vertical subsets of the
+current MSE.
+
+=head2 select
+
+ Title     : select
+ Usage     : $contig2 = $contig->select(1, 3) # three first sequences
+ Function  :
+
+             Creates a new alignment from a continuous subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will thow an error.
+
+ Returns   : a Bio::Assembly::Contig object
+ Argument  : positive integer for the first sequence
+             positive integer for the last sequence to include (optional)
+
+=cut
+
+sub select {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 select_noncont
+
+ Title     : select_noncont
+ Usage     : $contig2 = $contig->select_noncont(1, 3) # first and 3rd sequences
+ Function  :
+
+             Creates a new alignment from a subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will thow an error.
+
+ Returns   : a Bio::Assembly::Contig object
+ Args      : array of integers for the sequences
+
+=cut
+
+sub select_noncont {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 slice
+
+ Title     : slice
+ Usage     : $contig2 = $contig->slice(20, 30)
+ Function  :
+
+             Creates a slice from the alignment inclusive of start and
+             end columns.  Sequences with no residues in the slice are
+             excluded from the new alignment and a warning is printed.
+             Slice beyond the length of the sequence does not do
+             padding.
+
+ Returns   : a Bio::Assembly::Contig object
+ Argument  : positive integer for start column
+             positive integer for end column
+
+=cut
+
+sub slice {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 Change sequences within the MSE
+
+These methods affect characters in all sequences without changeing the
+alignment.
+
+
+=head2 map_chars
+
+ Title     : map_chars
+ Usage     : $contig->map_chars('\.','-')
+ Function  :
+
+             Does a s/$arg1/$arg2/ on the sequences. Useful for gap
+             characters
+
+             Notice that the from (arg1) is interpretted as a regex,
+             so be careful about quoting meta characters (eg
+             $contig->map_chars('.','-') wont do what you want)
+
+ Returns   :
+ Argument  : 'from' rexexp
+             'to' string
+
+=cut
+
+sub map_chars {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 uppercase
+
+ Title     : uppercase()
+ Usage     : $contig->uppercase()
+ Function  : Sets all the sequences to uppercase
+ Returns   :
+ Argument  :
+
+=cut
+
+sub uppercase {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match_line
+
+ Title    : match_line()
+ Usage    : $contig->match_line()
+ Function : Generates a match line - much like consensus string
+            except that a line indicating the '*' for a match.
+ Argument : (optional) Match line characters ('*' by default)
+            (optional) Strong match char (':' by default)
+            (optional) Weak match char ('.' by default)
+
+=cut
+
+sub match_line {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match
+
+ Title     : match()
+ Usage     : $contig->match()
+ Function  :
+
+             Goes through all columns and changes residues that are
+             identical to residue in first sequence to match '.'
+             character. Sets match_char.
+
+             USE WITH CARE: Most MSE formats do not support match
+             characters in sequences, so this is mostly for output
+             only. NEXUS format (Bio::AlignIO::nexus) can handle
+             it.
+
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+=cut
+
+sub match {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 unmatch
+
+ Title     : unmatch()
+ Usage     : $contig->unmatch()
+ Function  :
+
+             Undoes the effect of method match. Unsets match_char.
+
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+=cut
+
+sub unmatch {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 MSE attibutes
+
+Methods for setting and reading the MSE attributes.
+
+Note that the methods defining character semantics depend on the user
+to set them sensibly.  They are needed only by certain input/output
+methods. Unset them by setting to an empty string ('').
+
+=head2 id
+
+ Title     : id
+ Usage     : $contig->id("Ig")
+ Function  : Gets/sets the id field of the alignment
+ Returns   : An id string
+ Argument  : An id string (optional)
+
+=cut
+
+sub id {
+    my ($self, $contig_name) = @_;
+
+    if (defined( $contig_name )) {
+	$self->{'_id'} = $contig_name;
+    }
+
+    return $self->{'_id'};
+}
+
+=head2 missing_char
+
+ Title     : missing_char
+ Usage     : $contig->missing_char("?")
+ Function  : Gets/sets the missing_char attribute of the alignment
+             It is generally recommended to set it to 'n' or 'N'
+             for nucleotides and to 'X' for protein.
+ Returns   : An missing_char string,
+ Argument  : An missing_char string (optional)
+
+=cut
+
+sub missing_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 match_char
+
+ Title     : match_char
+ Usage     : $contig->match_char('.')
+ Function  : Gets/sets the match_char attribute of the alignment
+ Returns   : An match_char string,
+ Argument  : An match_char string (optional)
+
+=cut
+
+sub match_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 gap_char
+
+ Title     : gap_char
+ Usage     : $contig->gap_char('-')
+ Function  : Gets/sets the gap_char attribute of the alignment
+ Returns   : An gap_char string, defaults to '-'
+ Argument  : An gap_char string (optional)
+
+=cut
+
+sub gap_char {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 symbol_chars
+
+ Title   : symbol_chars
+ Usage   : my @symbolchars = $contig->symbol_chars;
+ Function: Returns all the seen symbols (other than gaps)
+ Returns : array of characters that are the seen symbols
+ Argument: boolean to include the gap/missing/match characters
+
+=cut
+
+sub symbol_chars{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 Alignment descriptors
+
+These read only methods describe the MSE in various ways.
+
+
+=head2 consensus_string
+
+ Title     : consensus_string
+ Usage     : $str = $contig->consensus_string($threshold_percent)
+ Function  : Makes a strict consensus
+ Returns   :
+ Argument  : Optional treshold ranging from 0 to 100.
+             The consensus residue has to appear at least threshold %
+             of the sequences at a given location, otherwise a '?'
+             character will be placed at that location.
+             (Default value = 0%)
+
+=cut
+
+sub consensus_string {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 consensus_iupac
+
+ Title     : consensus_iupac
+ Usage     : $str = $contig->consensus_iupac()
+ Function  :
+
+             Makes a consensus using IUPAC ambiguity codes from DNA
+             and RNA. The output is in upper case except when gaps in
+             a column force output to be in lower case.
+
+             Note that if your alignment sequences contain a lot of
+             IUPAC ambiquity codes you often have to manually set
+             alphabet.  Bio::PrimarySeq::_guess_type thinks they
+             indicate a protein sequence.
+
+ Returns   : consensus string
+ Argument  : none
+ Throws    : on protein sequences
+
+
+=cut
+
+sub consensus_iupac {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 is_flush
+
+ Title     : is_flush
+ Usage     : if( $contig->is_flush() )
+           :
+           :
+ Function  : Tells you whether the alignment
+           : is flush, ie all of the same length
+           :
+           :
+ Returns   : 1 or 0
+ Argument  :
+
+=cut
+
+sub is_flush {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 length
+
+ Title     : length()
+ Usage     : $len = $contig->length()
+ Function  : Returns the maximum length of the alignment.
+             To be sure the alignment is a block, use is_flush
+ Returns   :
+ Argument  :
+
+=cut
+
+sub length {
+    my ($self) = @_;
+
+    $self->throw_not_implemented();
+}
+
+=head2 maxdisplayname_length
+
+ Title     : maxdisplayname_length
+ Usage     : $contig->maxdisplayname_length()
+ Function  :
+
+             Gets the maximum length of the displayname in the
+             alignment. Used in writing out various MSE formats.
+
+ Returns   : integer
+ Argument  :
+
+=cut
+
+sub maxname_length {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 no_residues
+
+ Title     : no_residues
+ Usage     : $no = $contig->no_residues
+ Function  : number of residues in total in the alignment
+ Returns   : integer
+ Argument  :
+
+=cut
+
+sub no_residues {
+    my ($self) = @_;
+
+    return $self->{'_nof_residues'};
+}
+
+=head2 no_sequences
+
+ Title     : no_sequences
+ Usage     : $depth = $contig->no_sequences
+ Function  : number of sequence in the sequence alignment
+ Returns   : integer
+ Argument  : None
+
+=cut
+
+sub no_sequences {
+    my ($self) = @_;
+
+    return scalar( keys %{ $self->{'_elem'} } );
+}
+
+=head2 percentage_identity
+
+ Title   : percentage_identity
+ Usage   : $id = $contig->percentage_identity
+ Function: The function calculates the percentage identity of the alignment
+ Returns : The percentage identity of the alignment (as defined by the
+						     implementation)
+ Argument: None
+
+=cut
+
+sub percentage_identity{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 overall_percentage_identity
+
+ Title   : percentage_identity
+ Usage   : $id = $contig->percentage_identity
+ Function: The function calculates the percentage identity of
+           the conserved columns
+ Returns : The percentage identity of the conserved columns
+ Args    : None
+
+=cut
+
+sub overall_percentage_identity{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 average_percentage_identity
+
+ Title   : average_percentage_identity
+ Usage   : $id = $contig->average_percentage_identity
+ Function: The function uses a fast method to calculate the average
+           percentage identity of the alignment
+ Returns : The average percentage identity of the alignment
+ Args    : None
+
+=cut
+
+sub average_percentage_identity {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 Alignment positions
+
+Methods to map a sequence position into an alignment column and back.
+column_from_residue_number() does the former. The latter is really a
+property of the sequence object and can done using
+L<Bio::LocatableSeq::location_from_column>:
+
+    # select somehow a sequence from the alignment, e.g.
+    my $seq = $contig->get_seq_by_pos(1);
+    #$loc is undef or Bio::LocationI object
+    my $loc = $seq->location_from_column(5);
+
+
+=head2 column_from_residue_number
+
+ Title   : column_from_residue_number
+ Usage   : $col = $contig->column_from_residue_number( $seqname, $resnumber)
+ Function:
+
+           This function gives the position in the alignment
+           (i.e. column number) of the given residue number in the
+           sequence with the given name. For example, for the
+           alignment
+
+  	     Seq1/91-97 AC..DEF.GH
+  	     Seq2/24-30 ACGG.RTY..
+  	     Seq3/43-51 AC.DDEFGHI
+
+           column_from_residue_number( "Seq1", 94 ) returns 5.
+           column_from_residue_number( "Seq2", 25 ) returns 2.
+           column_from_residue_number( "Seq3", 50 ) returns 9.
+
+           An exception is thrown if the residue number would lie
+           outside the length of the aligment
+           (e.g. column_from_residue_number( "Seq2", 22 )
+
+	  Note: If the the parent sequence is represented by more than
+	  one alignment sequence and the residue number is present in
+	  them, this method finds only the first one.
+
+ Returns : A column number for the position in the alignment of the
+           given residue in the given sequence (1 = first column)
+ Args    : A sequence id/name (not a name/start-end)
+           A residue number in the whole sequence (not just that
+           segment of it in the alignment)
+
+=cut
+
+sub column_from_residue_number {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 Sequence names
+
+Methods to manipulate the display name. The default name based on the
+sequence id and subsequence positions can be overridden in various
+ways.
+
+=head2 displayname
+
+ Title     : displayname
+ Usage     : $contig->displayname("Ig", "IgA")
+ Function  : Gets/sets the display name of a sequence in the alignment
+           :
+ Returns   : A display name string
+ Argument  : name of the sequence
+             displayname of the sequence (optional)
+
+=cut
+
+sub displayname { # Do nothing
+}
+
+=head2 set_displayname_count
+
+ Title     : set_displayname_count
+ Usage     : $contig->set_displayname_count
+ Function  :
+
+             Sets the names to be name_# where # is the number of
+             times this name has been used.
+
+ Returns   : None
+ Argument  : None
+
+=cut
+
+sub set_displayname_count {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 set_displayname_flat
+
+ Title     : set_displayname_flat
+ Usage     : $contig->set_displayname_flat()
+ Function  : Makes all the sequences be displayed as just their name,
+             not name/start-end
+ Returns   : 1
+ Argument  : None
+
+=cut
+
+sub set_displayname_flat { # Do nothing!
+}
+
+=head2 set_displayname_normal
+
+ Title     : set_displayname_normal
+ Usage     : $contig->set_displayname_normal()
+ Function  : Makes all the sequences be displayed as name/start-end
+ Returns   : None
+ Argument  : None
+
+=cut
+
+sub set_displayname_normal { # Do nothing!
+}
+
+=head1 Internal Methods
+
+=head2 _binary_search
+
+ Title     : _binary_search
+ Usage     : _binary_search($list,$query)
+ Function  :
+
+             Find a number in a sorted list of numbers.  Return values
+             may be on or two integers. One positive integer or zero
+             (>=0) is the index of the element that stores the queried
+             value.  Two positive integers (or zero and another
+             number) are the indexes of elements among which the
+             queried value should be placed. Negative single values
+             mean:
+
+             -1: $query is smaller than smallest element in list
+             -2: $query is greater than greatest element in list
+
+ Returns   : array of integers
+ Argument  :
+             $list  : array reference
+             $query : integer
+
+=cut
+
+sub _binary_search {
+    my $list   = shift;
+    my $query  = shift;
+    #
+    # If there is only one element in list
+    if (!$#{$list} && ($query == $list->[0])) { return (0) }
+    # If there are others...
+    my $start = 0;
+    my $end   = $#{$list};
+    (&_compare($query,$list->[$start]) == 0) && do { return ($start) };
+    (&_compare($query,$list->[$end])   == 0) && do { return ($end) };
+    (&_compare($query,$list->[$start])  < 0) && do { return (-1) };
+    (&_compare($query,$list->[$end])    > 0) && do { return (-2) };
+    my $middle = 0;
+    while ($end - $start > 1) {
+        $middle = int(($end+$middle)/2);
+	(&_compare($query,$list->[$middle]) == 0) && return ($middle);
+	(&_compare($query,$list->[$middle]) <  0) && do { $end   = $middle ; $middle = 0; next };
+	$start = $middle; # If &_compare() > 0, move region beggining
+    }
+    return ($start,$end);
+}
+
+=head2 _compare
+
+    Title   : _compare
+    Usage   : _compare($arg1,$arg2)
+    Function: Perform numeric or string comparisons
+    Returns : integer (0, 1 or -1)
+    Args    : values to be compared
+
+=cut
+
+sub _compare {
+    my $arg1 = shift;
+    my $arg2 = shift;
+    #
+    if (($arg1 =~ /^\d+$/) && ($arg2 =~ /^\d+$/)) { return $arg1 <=> $arg2 }
+    else { return $arg1 cmp $arg2 }
+}
+
+=head2 _nof_gaps
+
+    Title   : _nof_gaps
+    Usage   : _nof_gaps($array_ref, $query)
+    Function: number of gaps found before position $query
+    Returns : integer
+    Args    :
+              $array_ref : gap registry reference
+              $query     : [integer] a position in a sequence
+
+=cut
+
+#' emacs...
+sub _nof_gaps {
+    my $list  = shift;
+    my $query = shift;
+    # If there are no gaps in this contig
+    return 0 unless (defined($list) && scalar(@{$list}));
+    # Locate query index in gap list (if any)
+    my @index = &_binary_search($list,$query);
+    # If after all alignments, correct using total number of align
+    if ($index[0] == -2) { $query = scalar(@{$list}) }
+    # If before any alignment, return 0
+    elsif ($index[0] == -1) { $query = 0 }
+    elsif ($index[0] >= 0) {
+	# If query is between alignments, translate coordinates
+	if ($#index > 0) { $query = $index[0] + 1 }
+	# If query sits upon an alignment, do another correction
+	elsif ($#index == 0) { $query = $index[0] }
+    }
+    #
+    return $query;
+}
+
+=head2 _padded_unpadded
+
+    Title   : _padded_unpadded
+    Usage   : _padded_unpadded($array_ref, $query)
+    Function:
+
+              Returns a coordinate corresponding to
+              position $query after gaps were
+              removed from a sequence.
+
+    Returns : integer
+    Args    :
+              $array_ref : reference to this gap registry
+              $query     : [integer] coordionate to change
+
+=cut
+
+sub _padded_unpadded {
+    my $list  = shift;
+    my $query = shift;
+
+    my $align = &_nof_gaps($list,$query);
+    $query-- if (defined($list->[$align]) && ($list->[$align] == $query));
+    $query = $query - $align;
+    #
+    return $query;
+}
+
+=head2 _unpadded_padded
+
+    Title   : _unpadded_padded
+    Usage   : _unpadded_padded($array_ref, $query)
+    Function:
+
+              Returns the value corresponding to
+              ungapped position $query when gaps are
+              counted as valid sites in a sequence
+
+    Returns :
+    Args    : $array_ref = a reference to this sequence's gap registry
+              $query = [integer] location to change
+
+=cut
+
+#'
+sub _unpadded_padded {
+    my $list  = shift;
+    my $query = shift;
+
+    my $align  = &_nof_gaps($list,$query);
+    $query = $query + $align;
+    my $new_align = &_nof_gaps($list,$query);
+    while ($new_align - $align > 0) {
+	$query = $query + $new_align - $align;
+	$align  = $new_align;
+	$new_align = &_nof_gaps($list,$query);
+    }
+    # If current position is also a align, look for the first upstream base
+    while (defined($list->[$align]) && ($list->[$align] == $query)) {
+	$query++; $align++;
+    }
+    #
+    return $query;
+}
+
+=head2 _register_gaps
+
+    Title   : _register_gaps
+    Usage   : $self->_register_gaps($seq, $array_ref)
+    Function: stores gap locations for a sequence
+    Returns : number of gaps found
+    Args    :
+              $seq       : sequence string
+              $array_ref : a reference to an array,
+                           where gap locations will
+                           be stored
+
+=cut
+
+sub _register_gaps {
+    my $self     = shift;
+    my $sequence = shift;
+    my $dbref    = shift;
+
+    $self->throw("Not an aligned sequence string to register gaps")
+	if (ref($sequence));
+
+    $self->throw("Not an array reference for gap registry")
+	unless (ref($dbref) eq 'ARRAY');
+
+    # Registering alignments
+    @{$dbref} = (); # Cleaning registry
+    if (defined $sequence) {
+	my $i = -1;
+	while(1) {
+	    $i = index($sequence,"-",$i+1);
+	    last if ($i == -1);
+	    push(@{$dbref},$i+1);
+	}
+    } else {
+#	$self->warn("Found undefined sequence while registering gaps");
+	return 0;
+    }
+
+    return scalar(@{$dbref});
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ContigAnalysis.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ContigAnalysis.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ContigAnalysis.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,497 @@
+# $Id: ContigAnalysis.pm,v 1.6.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Assembly::ContigAnalysis
+#
+# Cared for by Robson francisco de Souza <rfsouza at citri.iq.usp.br>
+#
+# Copyright Robson Francisco de Souza
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::ContigAnalysis - 
+    Perform analysis on sequence assembly contigs.
+
+=head1 SYNOPSIS
+
+    # Module loading
+    use Bio::Assembly::ContigAnalysis;
+
+    # Assembly loading methods
+    my $ca = new Bio::Assembly::ContigAnalysis( -contig=>$contigOBJ );
+
+    my @lcq = $ca->low_consensus_quality;
+    my @hqd = $ca->high_quality_discrepancies;
+    my @ss  = $ca->single_strand_regions;
+
+=head1 DESCRIPTION
+
+A contig is as a set of sequences, locally aligned to each other, when
+the sequences in a pair may be aligned. It may also include a
+consensus sequence. Bio::Assembly::ContigAnalysis is a module
+holding a collection of methods to analyze contig objects. It was
+developed around the Bio::Assembly::Contig implementation of contigs and
+can not work with another contig interface.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+Email: rfsouza at citri.iq.usp.br
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Assembly::ContigAnalysis;
+
+use strict;
+
+use base qw(Bio::Root::Root);
+
+=head1 Object creator
+
+=head2 new
+
+ Title     : new
+ Usage     : my $contig = Bio::Assembly::ContigAnalysis->new(-contig=>$contigOBJ);
+ Function  : Creates a new contig analysis object
+ Returns   : Bio::Assembly::ContigAnalysis
+ Args      :
+             -contig : a Bio::Assembly::Contig object
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($contigOBJ) = $self->_rearrange([qw(CONTIG)], at args);
+    unless ($contigOBJ->isa("Bio::Assembly::Contig")) {
+	$self->throw("ContigAnal works only on Bio::Assembly::Contig objects\n");
+    }
+
+    $self->{'_objref'} = $contigOBJ;
+    return $self;
+}
+
+=head1 Analysis methods
+
+=head2 high_quality_discrepancies
+
+ Title     : high_quality_discrepancies
+ Usage     : my $sfc = $ContigAnal->high_quality_discrepancies();
+ Function  : 
+
+             Locates all high quality discrepancies among aligned
+             sequences and the consensus sequence.
+
+             Note: see Bio::Assembly::Contig POD documentation,
+             section "Coordinate System", for a definition of
+             available types. Default coordinate system type is
+             "gapped consensus", i.e. consensus sequence (with gaps)
+             coordinates. If limits are not specified, the entire
+             alignment is analyzed.
+
+ Returns   : Bio::SeqFeature::Collection
+ Args      : optional arguments are
+             -threshold : cutoff value for low quality (minimum high quality)
+                          Default: 40
+             -ignore    : number of bases that will not be analysed at
+                          both ends of contig aligned elements
+                          Default: 5
+             -start     : start of interval that will be analyzed
+             -end       : start of interval that will be analyzed
+             -type      : coordinate system type for interval
+
+=cut
+
+sub high_quality_discrepancies {
+    my ($self, at args) = shift; # Package reference
+
+    my ($threshold,$ignore,$start,$end,$type) = 
+	$self->_rearrange([qw(THRESHOLD IGNORE START END TYPE)], at args);
+
+    # Defining default threhold and HQD_ignore
+    $threshold  = 40 unless (defined($threshold));
+    $ignore = 5  unless (defined($ignore));
+    $type = 'gapped consensus' unless (defined($type));
+
+    # Changing input coordinates system (if needed)
+    if (defined $start && $type ne 'gapped consensus') {
+	$start = $self->{'_objref'}->change_coord($type,'gapped consensus',$start);
+    } elsif (!defined $start) {
+	$start = 1;
+    }
+    if (defined $end && $type ne 'gapped consensus') {
+	$end = $self->{'_objref'}->change_coord($type,'gapped consensus',$end);
+    } elsif (!defined $end) {
+	$end = $self->{'_objref'}->get_consensus_length();
+    }
+
+    # Scanning each read sequence and the contig sequence and
+    # adding discrepancies to Bio::SeqFeature::Collection
+    my @seqIDs = $self->{'_objref'}->get_seq_ids(-start=>$start,
+						 -end=>$end,
+						 -type=>$type);
+    my $consensus = $self->{'_objref'}->get_consensus_sequence()->seq;
+
+    my @HQD = ();
+    foreach my $seqID (@seqIDs) {
+	# Setting aligned read sub-sequence limits and loading data
+	my $seq  = $self->{'_objref'}->get_seq_by_name($seqID);
+	my $qual = $self->{'_objref'}->get_qual_by_name($seqID);
+	unless (defined $qual) {
+	    $self->warn("Can't correctly evaluate HQD without aligned sequence qualities for $seqID");
+	    next;
+	}
+	my $sequence = $seq->seq;
+	my @quality  = @{ $qual->qual };
+
+	# Scanning the aligned region of each read
+	my $seq_ix = 0;
+	my $coord = $self->{'_objref'}->get_seq_feat_by_tag($seq,"_align_clipping:$seqID");
+	my ($astart,$aend) = ($coord->start,$coord->end);
+	$astart = $astart + $ignore; # Redefining limits to evaluate HQDs (jump $ignore at start)
+	$aend   = $aend   - $ignore; # Redefining limits to evaluate HQDs (stop $ignore before end)
+
+	my ($d_start,$d_end,$i);
+	for ($i=$astart-1; $i<=$aend-1; $i++) {
+	    # Changing coordinate $i+1 from 'gapped consensus' mode to "aligned $seqID" (coordinate $seq_ix)
+	    $seq_ix = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$i+1);
+	    next unless (($i >= $start) && ($i <= $end));
+
+	    my $r_base = uc(substr($sequence,$seq_ix-1,1));
+	    my $c_base = uc(substr($consensus,$i,1));
+
+	    # Discrepant region start: store $d_start and $type
+	    (!defined($d_start) &&
+	     ($r_base ne $c_base) &&
+	     ($quality[$seq_ix-1] >= $threshold)) && do {
+		 $d_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i+1);
+		 #print $seqID," ",$r_base," ",$i+1," ",$c_base," ",$contig_ix-1," ",$quality[$i]," $type\n";
+		 next;
+	     };
+
+	    # Quality change or end of discrepant region: store limits and undef $d_start
+	    if (defined($d_start) &&
+		(($quality[$seq_ix-1] < $threshold) ||
+		 (uc($r_base) eq uc($c_base)))) {
+		$d_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
+		#print $seqID," ",$r_base," ",$i+1," ",$c_base," ",$contig_ix-1," ",$quality[$i]," $type\n";
+		push(@HQD, Bio::SeqFeature::Generic->new(-primary=>"high_quality_discrepancy:$seqID",
+							 -start=>$d_start,
+							 -end=>$d_end,
+							 -strand=>$seq->strand()) );
+		$d_start = undef;
+		next;
+	    }
+	} # for ($i=$astart-1; $i<=$aend-1; $i++)
+
+	# Loading discrepancies located at sub-sequence end, if any.
+	if (defined($d_start)) {
+	    $d_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
+	    push(@HQD, Bio::SeqFeature::Generic->new(-primary=>"high_quality_discrepancy:$seqID",
+						     -start=>$d_start,
+						     -end=>$d_end,
+						     -strand=>$seq->strand()) );
+	}
+    } # foreach my $seqID (@seqIDs)
+
+    return @HQD;
+}
+
+=head2 low_consensus_quality
+
+ Title     : low_consensus_quality
+ Usage     : my $sfc = $ContigAnal->low_consensus_quality();
+ Function  : Locates all low quality regions in the consensus
+ Returns   : an array of Bio::SeqFeature::Generic objects
+ Args      : optional arguments are
+             -threshold : cutoff value for low quality (minimum high quality)
+                          Default: 25
+             -start     : start of interval that will be analyzed
+             -end       : start of interval that will be analyzed
+             -type      : coordinate system type for interval
+
+=cut
+
+sub low_consensus_quality {
+    my ($self, at args) = shift; # Packege reference
+
+    my ($threshold,$start,$end,$type) = 
+	$self->_rearrange([qw(THRESHOLD START END TYPE)], at args);
+
+    # Setting default value for threshold
+    $threshold = 25 unless (defined($threshold));
+
+    # Loading qualities
+    my @quality = @{ $self->{'_objref'}->get_consensus_quality()->qual };
+
+    # Changing coordinates to gap mode noaln (consed: consensus without alignments)
+    $start = 1 unless (defined($start));
+    if (defined $start && defined $type && ($type ne 'gapped consensus')) {
+	$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
+	$end   = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
+    }
+    $end = $self->{'_objref'}->get_consensus_length unless (defined $end);
+
+    # Scanning @quality vector and storing intervals limits with base qualities less then
+    # the threshold value
+    my ($lcq_start);
+    my ($i, at LCQ);
+    for ($i=$start-1; $i<=$end-1; $i++) {
+#	print $quality[$i],"\t",$i,"\n";
+	if (!defined($lcq_start) && (($quality[$i] <= $threshold) || ($quality[$i] == 98))) {
+	    $lcq_start = $i+1;
+	} elsif (defined($lcq_start) && ($quality[$i] > $threshold)) {
+	    $lcq_start  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
+	    my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
+	    push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
+						     -end=>$lcq_end,
+						     -primary=>'low_consensus_quality') );
+	    $lcq_start = undef;
+	}
+    }
+
+    if (defined $lcq_start) {
+	$lcq_start  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$lcq_start);
+	my $lcq_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
+	push(@LCQ, Bio::SeqFeature::Generic->new(-start=>$lcq_start,
+						 -end=>$lcq_end,
+						 -primary=>'low_consensus_quality') );
+    }
+
+    return @LCQ;
+}
+
+=head2 not_confirmed_on_both_strands
+
+ Title     : low_quality_consensus
+ Usage     : my $sfc = $ContigAnal->low_quality_consensus();
+ Function  : 
+
+             Locates all regions whose consensus bases were not
+             confirmed by bases from sequences aligned in both
+             orientations, i.e., in such regions, no bases in aligned
+             sequences of either +1 or -1 strand agree with the
+             consensus bases.
+
+ Returns   : an array of Bio::SeqFeature::Generic objects
+ Args      : optional arguments are
+             -start : start of interval that will be analyzed
+             -end   : start of interval that will be analyzed
+             -type  : coordinate system type for interval
+
+=cut
+
+sub not_confirmed_on_both_strands {
+    my ($self, at args) = shift; # Package reference
+
+    my ($start,$end,$type) = 
+	$self->_rearrange([qw(START END TYPE)], at args);
+
+    # Changing coordinates to default system 'align' (contig sequence with alignments)
+    $start = 1 unless (defined($start));
+    if (defined($type) && ($type ne 'gapped consensus')) {
+	$start = $self->{'_objref'}->change_coord($type,'gapped consensus',$start);
+	$end   = $self->{'_objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
+    }
+    $end = $self->{'_objref'}->get_consensus_length unless (defined($end));
+
+    # Scanning alignment
+    my %confirmed = (); # If ($confirmed{$orientation}[$i] > 0) then $i is confirmed in $orientation strand
+    my ($i);
+    my $consensus = $self->{'_objref'}->get_consensus_sequence()->seq;
+    foreach my $seqID ($self->{'_objref'}->get_seq_ids) {
+	# Setting aligned read sub-sequence limits and loading data
+	my $seq = $self->{'_objref'}->get_seq_by_name($seqID);
+	my $sequence = $seq->seq;
+
+	# Scanning the aligned regions of each read and registering confirmed sites
+	my $contig_ix = 0;
+	my $coord = $self->{'_objref'}->get_seq_feat_by_tag($seq,"_align_clipping:$seqID");
+	my ($astart,$aend,$orientation) = ($coord->start,$coord->end,$coord->strand);
+	$astart = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$astart);
+	$aend   = $self->{'_objref'}->change_coord('gapped consensus',"aligned $seqID",$aend);
+	for ($i=$astart-1; $i<=$aend-1; $i++) {
+	    # $i+1 in 'align' mode is $contig_ix
+	    $contig_ix = $self->{'_objref'}->change_coord("aligned $seqID",'gapped consensus',$i+1);
+	    next unless (($contig_ix >= $start) && ($contig_ix <= $end));
+	    my $r_base = uc(substr($sequence,$i,1));
+	    my $c_base = uc(substr($consensus,$contig_ix-1,1));
+	    if ($c_base eq '-') {
+		$confirmed{$orientation}[$contig_ix] = -1;
+	    } elsif (uc($r_base) eq uc($c_base)) { # Non discrepant region found
+		$confirmed{$orientation}[$contig_ix]++;
+	    }
+	} # for ($i=$astart-1; $i<=$aend-1; $i++)
+    } # foreach $seqID (@reads)
+
+    # Locating non-confirmed aligned regions for each orientation in $confirmed registry
+    my ($orientation);
+    my @NCBS = ();
+    foreach $orientation (keys %confirmed) {
+	my ($ncbs_start,$ncbs_end);
+
+	for ($i=$start; $i<=$end; $i++) {
+	    if (!defined($ncbs_start) &&
+		(!defined($confirmed{$orientation}[$i]) || ($confirmed{$orientation}[$i] == 0))) {
+		$ncbs_start = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i);
+	    } elsif (defined($ncbs_start) &&
+		     defined($confirmed{$orientation}[$i]) &&
+		     ($confirmed{$orientation}[$i] > 0)) {
+		$ncbs_end   = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$i-1);
+		push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
+							  -end=>$ncbs_end,
+							  -strand=>$orientation,
+							  -primary=>"not_confirmed_on_both_strands") );
+		$ncbs_start = undef;
+	    }
+	}
+
+	if (defined($ncbs_start)) { # NCBS at the end of contig
+	    $ncbs_end = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$end);
+	    push(@NCBS, Bio::SeqFeature::Generic->new(-start=>$ncbs_start,
+						      -end=>$ncbs_end,
+						      -strand=>$orientation,
+						      -primary=>'not_confirmed_on_both_strands') );
+	}
+    }
+
+    return @NCBS;
+}
+
+=head2 single_strand
+
+ Title     : single_strand
+ Usage     : my $sfc = $ContigAnal->single_strand();
+ Function  : 
+
+             Locates all regions covered by aligned sequences only in
+             one of the two strands, i.e., regions for which aligned
+             sequence's strand() method returns +1 or -1 for all
+             sequences.
+
+ Returns   : an array of Bio::SeqFeature::Generic objects
+ Args      : optional arguments are
+             -start : start of interval that will be analyzed
+             -end   : start of interval that will be analyzed
+             -type  : coordinate system type for interval
+
+=cut
+
+#'
+sub single_strand {
+    my ($self, at args) = shift; # Package reference
+
+    my ($start,$end,$type) = 
+	$self->_rearrange([qw(START END TYPE)], at args);
+
+    # Changing coordinates to gap mode align (consed: consensus sequence with alignments)
+    $type  = 'gapped consensus' unless(defined($type));
+    $start = 1 unless (defined($start));
+    if (defined($type) && $type ne 'gapped consensus') {
+	$start = $self->{'objref'}->change_coord($type,'gapped consensus',$start);
+	$end   = $self->{'objref'}->change_coord($type,'gapped consensus',$end) if (defined($end));
+    }
+    ($end) = $self->{'_objref'}->get_consensus_length unless (defined($end));
+
+    # Loading complete list of coordinates for aligned sequences
+    my $sfc = $self->{'_objref'}->get_features_collection();
+    my @forward = grep { $_->primary_tag =~ /^_aligned_coord:/ } 
+    $sfc->features_in_range(-start=>$start,
+			    -end=>$end,
+			    -contain=>0,
+			    -strand=>1,
+			    -strandmatch=>'strong');
+    my @reverse = grep { $_->primary_tag =~ /^_aligned_coord:/ } 
+    $sfc->features_in_range(-start=>$start,
+			    -end=>$end,
+			    -contain=>0,
+			    -strand=>-1,
+			    -strandmatch=>'strong');
+    # Merging overlapping features
+    @forward = $self->_merge_overlapping_features(@forward);
+    @reverse = $self->_merge_overlapping_features(@reverse);
+
+    # Finding single stranded regions
+    my ($length) = $self->{'_objref'}->get_consensus_length;
+    $length  = $self->{'_objref'}->change_coord('gapped consensus','ungapped consensus',$length);
+    @forward = $self->_complementary_features_list(1,$length, at forward);
+    @reverse = $self->_complementary_features_list(1,$length, at reverse);
+
+    my @SS = ();
+    foreach my $feat (@forward, @reverse) {
+	$feat->primary_tag('single_strand_region');
+	push(@SS,$feat);
+    }
+
+    return @SS;
+}
+
+=head1 Internal Methods
+
+=head2 _merge_overlapping_features
+
+ Title     : _merge_overlapping_features
+ Usage     : my @feat = $ContigAnal->_merge_overlapping_features(@features);
+ Function  : Merge all overlapping features into features
+             that hold original features as sub-features
+ Returns   : array of Bio::SeqFeature::Generic objects
+ Args      : array of Bio::SeqFeature::Generic objects
+
+=cut
+
+sub _merge_overlapping_features {
+    my ($self, at feat) = @_;
+
+    $self->throw_not_implemented();
+}
+
+=head2 _complementary_features_list
+
+ Title     : _complementary_features_list
+ Usage     : @feat = $ContigAnal->_complementary_features_list($start,$end, at features);
+ Function  : Build a list of features for regions
+             not covered by features in @features array
+ Returns   : array of Bio::SeqFeature::Generic objects
+ Args      : 
+             $start    : [integer] start of first output feature
+             $end      : [integer] end of last output feature
+             @features : array of Bio::SeqFeature::Generic objects
+
+=cut
+
+sub _complementary_features_list {
+    my ($self,$start,$end, at feat) = @_;
+
+    $self->throw_not_implemented();
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/ace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/ace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/ace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,439 @@
+# $Id: ace.pm,v 1.13.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+## BioPerl module for Bio::Assembly::IO::ace
+#
+# Copyright by Robson F. de Souza
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::IO::ace -  module to load phrap ACE files.
+
+=head1 SYNOPSIS
+
+    # Building an input stream
+    use Bio::Assembly::IO;
+
+    # Assembly loading methods
+    $io = new Bio::Assembly::IO(-file=>"SGC0-424.fasta.screen.ace.1",
+                         -format=>"ace");
+
+    $assembly = $io->next_assembly;
+
+=head1 DESCRIPTION
+
+This package loads the ACE files from the (phred/phrap/consed) package
+by Phill Green.  It was written to be used as a driver module for
+Bio::Assembly::IO input/output.
+
+=head2 Implemention
+
+Assemblies are loaded into Bio::Assembly::Scaffold objects composed by
+Bio::Assembly::Contig objects. In addition to default
+"_aligned_coord:$seqID" feature class from Bio::Assembly::Contig, contig
+objects loaded by this module will have the following special feature
+classes in their feature collection:
+
+"_align_clipping:$seqID" : location of subsequence in sequence $seqID
+                           which is aligned to the contig
+
+"_quality_clipping:$seqID" : location of good quality subsequence for
+                             sequence $seqID
+
+"consensus tags", as they are called in Consed's documentation, is
+equivalent to a bioperl sequence feature and, therefore, are added to
+the feature collection using their type field (see Consed's README.txt
+file) as primary tag.
+
+"read tags" are also sequence features and are stored as
+sub_SeqFeatures of the sequence's coordinate feature (the
+corresponding "_aligned_coord:$seqID" feature, easily accessed through
+get_seq_coord() method).
+
+"whole assembly tags" have no start and end, as they are not
+associated to any particular sequence in the assembly, and are added
+to the assembly's annotation collection using phrap as tag.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+Email rfsouza at citri.iq.usp.br
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Assembly::IO::ace;
+
+use strict;
+
+use Bio::Assembly::Scaffold;
+use Bio::Assembly::Contig;
+use Bio::Assembly::Singlet;
+use Bio::LocatableSeq;
+use Bio::Annotation::SimpleValue;
+use Bio::Seq::Quality;
+use Bio::SeqIO;
+use Bio::SeqFeature::Generic;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1);
+
+use base qw(Bio::Assembly::IO);
+
+=head1 Parser methods
+
+=head2 next_assembly
+
+ Title   : next_assembly
+ Usage   : $unigene = $stream->next_assembly()
+ Function: returns the next assembly in the stream
+ Returns : Bio::Assembly::Scaffold object
+ Args    : NONE
+
+=cut
+
+sub next_assembly {
+    my $self = shift; # Object reference
+    my $lingering_read;
+    local $/="\n";
+
+    # Resetting assembly data structure
+    my $assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
+
+    # Looping over all ACE file lines
+    my ($contigOBJ,$read_name);
+    my $read_data = {}; # Temporary holder for read data
+    while ($_ = $self->_readline) { # By now, ACE files hold a single assembly
+	chomp;
+
+	# Loading assembly information (ASsembly field)
+#	(/^AS (\d+) (\d+)/) && do {
+#	    $assembly->_set_nof_contigs($1);
+#	    $assembly->_set_nof_sequences_in_contigs($2);
+#	};
+
+	# Loading contig sequence (COntig sequence field)
+	(/^CO Contig(\d+) (\d+) (\d+) (\d+) (\w+)/) && do { # New contig found!
+	    my $contigID = $1;
+	    $contigOBJ = Bio::Assembly::Contig->new(-source=>'phrap', -id=>$contigID);
+#	    $contigOBJ->set_nof_bases($2); # Contig length in base pairs
+#	    $contigOBJ->set_nof_reads($3); # Number of reads in this contig
+#	    $contigOBJ->set_nof_segments($4); # Number of read segments selected for consensus assembly
+	    my $ori = ($5 eq 'U' ? 1 : -1); # 'C' if contig was complemented (using consed) or U if not (default)
+	    $contigOBJ->strand($ori);
+	    my $consensus_sequence = undef;
+	    while ($_ = $self->_readline) { # Looping over contig lines
+		chomp;                   # Drop <ENTER> (\n) on current line
+		last if (/^$/);          # Stop if empty line (contig end) is found
+		s/\*/-/g; # Forcing '-' as gap symbol
+		$consensus_sequence .= $_;
+	    }
+
+	    my $consensus_length = length($consensus_sequence);
+	    $consensus_sequence = Bio::LocatableSeq->new(-seq=>$consensus_sequence,
+							      -start=>1,
+							      -end=>$consensus_length,
+							      -id=>"Consensus sequence for $contigID");
+	    $contigOBJ->set_consensus_sequence($consensus_sequence);
+	    $assembly->add_contig($contigOBJ);
+	};
+
+	# Loading contig qualities... (Base Quality field)
+	/^BQ/ && do {
+	    my $consensus = $contigOBJ->get_consensus_sequence()->seq();
+	    my ($i,$j, at tmp);
+	    my @quality = ();
+	    $j = 0;
+	    while ($_ = $self->_readline) {
+		chomp;
+		last if (/^$/);
+		@tmp = grep { /^\d+$/ } split(/\s+/);
+		$i = 0;
+		my $previous = 0;
+		my $next     = 0;
+		while ($i<=$#tmp) {
+		    # IF base is a gap, quality is the average for neighbouring sites
+		    if (substr($consensus,$j,1) eq '-') {
+			$previous = $tmp[$i-1] unless ($i == 0);
+			if ($i < $#tmp) {
+			    $next = $tmp[$i+1];
+			} else {
+			    $next = 0;
+			}
+			push(@quality,int(($previous+$next)/2));
+		    } else {
+			push(@quality,$tmp[$i]);
+			$i++;
+		    }
+		    $j++;
+		}
+	    }
+
+	    my $qual = Bio::Seq::Quality->new(-qual=>join(" ", at quality),
+                                              -id=>$contigOBJ->id());
+	    $contigOBJ->set_consensus_quality($qual);
+	};
+
+	# Loading read info... (Assembled From field)
+	/^AF (\S+) (C|U) (-*\d+)/ && do {
+	    $read_name = $1; my $ori = $2;
+	    $read_data->{$read_name}{'padded_start'} = $3; # aligned start
+	    $ori = ( $ori eq 'U' ? 1 : -1);
+	    $read_data->{$read_name}{'strand'}  = $ori;
+	};
+
+	# Loading base segments definitions (Base Segment field)
+#	/^BS (\d+) (\d+) (\S+)/ && do {
+#	    if (exists($self->{'contigs'}[$contig]{'reads'}{$3}{'segments'})) {
+#		$self->{'contigs'}[$contig]{'reads'}{$3}{'segments'} .= " " . $1 . " " . $2;
+#	    } else { $self->{'contigs'}[$contig]{'reads'}{$3}{'segments'} = $1 . " " . $2 }
+#	};
+
+	# Loading reads... (ReaD sequence field
+	/^RD (\S+) (-*\d+) (\d+) (\d+)/ && do {
+	    $read_name = $1;
+	    $read_data->{$read_name}{'length'} = $2; # number_of_padded_bases
+	    $read_data->{$read_name}{'contig'} = $contigOBJ;
+#	    $read_data->{$read_name}{'number_of_read_info_items'} = $3;
+#	    $read_data->{$read_name}{'number_of_tags'}            = $4;
+	    my $read_sequence;
+	    while ($_ = $self->_readline) {
+		chomp;
+		last if (/^$/);
+		s/\*/-/g; # Forcing '-' as gap symbol
+		$read_sequence .= $_; # aligned read sequence
+	    }
+	    my $read = Bio::LocatableSeq->new(-seq=>$read_sequence,
+					      -start=>1,
+					      -end=>$read_data->{$read_name}{'length'},
+					      -strand=>$read_data->{$read_name}{'strand'},
+					      -id=>$read_name,
+					      -primary_id=>$read_name,
+					      -alphabet=>'dna');
+          $lingering_read = $read;
+	    # Adding read location and sequence to contig ("gapped consensus" coordinates)
+	    my $padded_start = $read_data->{$read_name}{'padded_start'};
+	    my $padded_end   = $padded_start + $read_data->{$read_name}{'length'} - 1;
+	    my $coord = Bio::SeqFeature::Generic->new(-start=>$padded_start,
+						      -end=>$padded_end,
+						      -strand=>$read_data->{$read_name}{'strand'},
+						      -tag => { 'contig' => $contigOBJ->id }
+						      );
+	    $contigOBJ->set_seq_coord($coord,$read);
+	};
+
+	# Loading read trimming and alignment ranges...
+	/^QA (-?\d+) (-?\d+) (-?\d+) (-?\d+)/ && do {
+	    my $qual_start  = $1; my $qual_end  = $2;
+	    my $align_start = $3; my $align_end = $4;
+
+	    unless ($align_start == -1 && $align_end == -1) {
+		$align_start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_start);
+		$align_end   = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_end);
+		my $align_feat = Bio::SeqFeature::Generic->new(-start=>$align_start,
+							       -end=>$align_end,
+							       -strand=>$read_data->{$read_name}{'strand'},
+							       -primary=>"_align_clipping:$read_name");
+		$align_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
+		$contigOBJ->add_features([ $align_feat ], 0);
+	    }
+
+	    unless ($qual_start == -1 && $qual_end == -1) {
+		$qual_start  = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_start);
+		$qual_end    = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_end);
+		my $qual_feat = Bio::SeqFeature::Generic->new(-start=>$qual_start,
+							      -end=>$qual_end,
+							      -strand=>$read_data->{$read_name}{'strand'},
+							      -primary=>"_quality_clipping:$read_name");
+		$qual_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
+		$contigOBJ->add_features([ $qual_feat ], 0);
+	    }
+	};
+	     # Loading read description (DeScription fields)
+          # chad was here! easter 2004.
+          # lingering read is a locatableseq. is there a better way to do this?
+          # i am simply adding more keys to the locatableseq
+ 	/^DS / && do {
+ 	    /CHEM: (\S+)/ && do {
+ 		$lingering_read->{'chemistry'} = $1;
+ 	    };
+ 	    /CHROMAT_FILE: (\S+)/ && do {
+ 		$lingering_read->{'chromatfilename'} = $1;
+ 	    };
+ 	    /DIRECTION: (\w+)/ && do {
+ 		my $ori = $1;
+ 		if    ($ori eq 'rev') { $ori = 'C' }
+ 		elsif ($ori eq 'fwd') { $ori = 'U' }
+ 		$lingering_read->{'strand'} = $ori;
+ 	    };
+ 	    /DYE: (\S+)/ && do {
+ 		$lingering_read->{'dye'} = $1;
+ 	    };
+ 	    /PHD_FILE: (\S+)/ && do {
+ 		$lingering_read->{'phdfilename'} = $1;
+ 	    };
+ 	    /TEMPLATE: (\S+)/ && do {
+ 		$lingering_read->{'template'} = $1;
+ 	    };
+ 	    /TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
+ 		$lingering_read->{'phd_time'} = $1;
+ 	    };
+ 	};
+
+	# Loading contig tags ('tags' in phrap terminology, but Bioperl calls them features)
+	/^CT\s*\{/ && do {
+	    my ($contigID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
+        my %tags = (source => $source, creation_date => $date);
+	    $contigID =~ s/^Contig//i;
+	    my $tag_type = 'extra_info';
+	    while ($_ = $self->_readline) {
+            if (/COMMENT\s*\{/)
+            {
+                $tag_type = 'comment';
+            }
+            elsif (/C\}/)
+            {
+                $tag_type = 'extra_info';
+            }
+            elsif (/\}/)
+            {
+                last;
+            }
+            else
+            {
+                $tags{$tag_type} .= "$_";
+            }
+	    }
+	    my $contig_tag = Bio::SeqFeature::Generic->new(-start=>$start,
+							   -end=>$end,
+							   -primary=>$type,
+							   -tag=>\%tags,
+							       );
+	    $assembly->get_contig_by_id($contigID)->add_features([ $contig_tag ],1);
+	};
+
+	# Loading read tags
+	/^RT\s*\{/ && do {
+	    my ($readID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
+	    my $extra_info = undef;
+	    while ($_ = $self->_readline) {
+		last if (/\}/);
+		$extra_info .= $_;
+	    }
+	    $start  = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$start);
+	    $end    = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$end);
+	    my $read_tag = Bio::SeqFeature::Generic->new(-start=>$start,
+							 -end=>$end,
+							 -primary=>$type,
+							 -tag=>{ 'source' => $source,
+								 'creation_date' => $date,
+								 'extra_info' => $extra_info
+								 });
+	    my $contig = $read_data->{$readID}{'contig'};
+	    my $coord  = $contig->get_seq_coord( $contig->get_seq_by_name($readID) );
+	    $coord->add_sub_SeqFeature($read_tag);
+	};
+
+	# Loading read tags
+	/^WA\s*\{/ && do {
+	    my ($type,$source,$date) = split(' ',$self->_readline);
+	    my $extra_info = undef;
+	    while ($_ = $self->_readline) {
+		last if (/\}/);
+		$extra_info = $_;
+	    }
+#?	    push(@line,\@extra_info);
+	    my $assembly_tag = join(" ","TYPE: ",$type,"PROGRAM:",$source,
+				    "DATE:",$date,"DATA:",$extra_info);
+	    $assembly_tag = Bio::Annotation::SimpleValue->new(-value=>$assembly_tag);
+	    $assembly->annotation->add_Annotation('phrap',$assembly_tag);
+	};
+
+    } # while ($_ = $self->_readline)
+
+          # hmm. what about singlets?
+     my $singletsfilename = $self->file();
+     $singletsfilename =~ s/\.ace.*$/.singlets/;
+     $singletsfilename =~ s/\<//;
+     if (!-f $singletsfilename) {
+               # oh deario, no singlets here
+          return $assembly;
+     }
+     # print("Opening the singletsfile (".$singletsfilename.")\n");
+     my $singlets_fh = Bio::SeqIO->new(-file   => "<$singletsfilename",
+                                          -format => 'fasta');
+     my $adder;
+     while (my $seq = $singlets_fh->next_seq()) {
+          # $dumper->dumpValue($seq);
+               # find the name of this singlet and attempt to get the phd from phd_dir instead
+          my ($phdfilename,$chromatfilename) = qw(unset unset);
+	  if ($seq->desc() =~ /PHD_FILE: (\S+)/) {
+              $phdfilename = $1;
+          }
+          if ($seq->desc() =~ /CHROMAT_FILE: (\S+)/)  {
+               $chromatfilename = $1;
+          }
+          (my $phdfile = $singletsfilename) =~ s/edit_dir.*//;
+          $phdfile .= "phd_dir/$phdfilename";
+          my $singlet = new Bio::Assembly::Singlet();
+          if (-f $phdfile) {
+               # print STDERR ("Reading singlet data from this phdfile ($phdfile)\n");
+               my $phd_fh = new Bio::SeqIO( -file =>   "<$phdfile", -format     =>   'phd');
+               my $swq = $phd_fh->next_seq();
+               $adder = $swq;
+          }
+          else {
+               $adder = $seq;
+          }
+          $adder->{phdfilename} = $phdfilename;
+          $adder->{chromatfilename} = $chromatfilename;
+          $singlet->seq_to_singlet($adder);
+          $assembly->add_singlet($singlet);
+     }
+    $assembly->update_seq_list();
+    return $assembly;
+}
+
+=head2 write_assembly
+
+    Title   : write_assembly
+    Usage   : $ass_io->write_assembly($assembly)
+    Function: Write the assembly object in Phrap compatible ACE format
+    Returns : 1 on success, 0 for error
+    Args    : A Bio::Assembly::Scaffold object
+
+=cut
+
+sub write_assembly {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/phrap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/phrap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO/phrap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,309 @@
+# $Id: phrap.pm,v 1.5.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl driver for phrap.out file
+#
+# Copyright by Robson F. de Souza
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::IO::phrap - driver to load phrap.out files.
+
+=head1 SYNOPSIS
+
+    # Building an input stream
+    use Bio::Assembly::IO;
+
+    # Assembly loading methods
+    $io = new Bio::Assembly::IO(-file=>"SGC0-424.phrap.out",
+                                -format=>"phrap");
+
+    $assembly = $io->next_assembly;
+
+=head1 DESCRIPTION
+
+This package was developed to load the phrap.out files from the
+(phred/phrap/consed) package by Phill Green. This files contain just
+the messages printed to standard out by phrap when building an
+assembly.  This output is redirected by phredPhrap perl-script to a
+file in the project's directory and hold some bit of information
+regarding assembly quality, connections between contigs and clone's
+position inside contigs.  It should be noted that such files have no
+data about the sequence. neither for contig consensus nor for any
+aligned sequence. Anyway, such information may be loaded from Fasta
+files in the projects directory and added to the assembly object
+later.
+
+Note that, because no sequence is loaded for the contig consensus and
+locations for aligned sequences are only given in "ungapped consensus"
+coordinates in a phrap.out file, you can't make coordinate changes in
+assemblies loaded by pharp.pm, unless you add an aligned
+coordinates for each sequence to each contig's features collection
+yourself. See L<Bio::Assembly::Contig::Coordinate_Systems> and
+L<Bio::Assembly::Contig::Feature_collection>..
+
+This driver also loads singlets into the assembly contigs as Bio::Seq
+objects, altough without their sequence strings. It also adds a
+feature for the entire sequence, thus storing the singlet length in
+its end position, and adds a tag '_nof_trimmed_nonX' to the feature,
+which stores the number of non-vector bases in the singlet.
+
+=head2 Implementation
+
+Assemblies are loaded into Bio::Assembly::Scaffold objects composed by
+Bio::Assembly::Contig objects. No features are added to Bio::Assembly::Contig
+"_aligned_coord:$seqID" feature class, therefore you can't make
+coordinate changes in contigs loaded by this module. Contig objects
+created by this module will have the following special feature
+classes, identified by their primary tags, in their features
+collection:
+
+"_main_contig_feature:$ID" : main feature for contig $ID.  This
+                              feature is used to store information
+                              about the entire consensus
+                              sequence. This feature always start at
+                              base 1 and its end position is the
+                              consensus sequence length. A tag,
+                              'trimmed_length' holds the length of the
+                              trimmed good quality region inside the
+                              consensus sequence.
+
+"_covered_region:$index" : coordinates for valid clones inside the
+                              contig. $index is the covered region
+                              number, starting at 1 for the covered
+                              region closest to the consensus sequence
+                              first base.
+
+"_unalign_coord:$seqID" : location of a sequence in "ungapped
+                              consensus" coordinates (consensus
+                              sequence without gaps).  Primary and
+                              secondary scores, indel and
+                              substitutions statistics are stored as
+                              feature tags.
+
+"_internal_clones:$cloneID" : clones inside contigs $cloneID should be
+                              used as the unique id for each
+                              clone. These features have six tags:
+                              '_1st_name', which is the id of the
+                              upstream (5') aligned sequence
+                              delimiting the clone; '_1st_strand', the
+                              upstream sequence strand in the
+                              alignment; '_2nd_name', downstream (3')
+                              sequence id; '_2nd_strand', the
+                              downstream sequence strand in the
+                              alignment; '_length', unaligned clone
+                              length; '_rejected', a boolean flag,
+                              which is false if the clone is valid and
+                              true if it was rejected.
+
+All coordinates for the features above are expressed as "ungapped
+consensus" coordinates (See L<Bio::Assembly::Contig::Coordinate_Systems>..
+
+=head2 Feature collection
+
+#
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+Email rfsouza at citri.iq.usp.br
+
+head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Assembly::IO::phrap;
+
+use strict;
+
+use Bio::Assembly::Scaffold;
+use Bio::Assembly::Contig;
+use Bio::LocatableSeq;
+use Bio::Seq;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Assembly::IO);
+
+=head2 next_assembly
+
+ Title   : next_assembly
+ Usage   : $unigene = $stream->next_assembly()
+ Function: returns the next assembly in the stream
+ Returns : Bio::Assembly::Scaffold object
+ Args    : NONE
+
+=cut
+
+sub next_assembly {
+    my $self      = shift; # Package reference
+
+    # Resetting assembly data structure
+    my $Assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
+
+    # Looping over all phrap out file lines
+    my ($contigOBJ);
+    while ($_ = $self->_readline) {
+	chomp;
+
+	# Loading exact dupicated reads list
+#	/Exact duplicate reads:/ && do {
+#	    my @exact_dupl;
+#	    while (<FILE>) {
+#		last if (/^\s*$/);
+#		/(\S+)\s+(\S+)/ && do {
+#		    push(@exact_dupl,[$1,$2]);
+#		};
+#		$self->{'assembly'}{'exact_dupl_reads'} =
+#		    new Data::Table(\@exact_dupl,['included','excluded'],0);
+#	    }
+#	};
+
+	# Loading singlets reads data
+	/^(\d+) isolated singletons/ && do {
+	    while ($_ = $self->_readline) {
+		chomp;
+		last if (/^$/);
+		if (/^\s+(\S+)\s+(\d+)\s+\((\d+)\)/) {
+		    my $seqID = $1; my $length = $2;
+		    my $nof_trimmed_nonX = $3;
+		    my $seq = new Bio::Seq(-strand=>1,
+					   -primary_id=>$seqID);
+		    my $f = Bio::SeqFeature::Generic->new
+			(-start=>1, -end=>$seq->length(),
+			 -primary=>$seq->primary_id(),
+			 -tag=>{ '_nof_trimmed_nonX' => $nof_trimmed_nonX }
+			 );
+		    $seq->add_SeqFeature($f);
+		    $Assembly->add_singlet($seq);
+		}
+	    }
+	};
+
+	# Loading contig information
+	/^Contig (\d+)\.\s+(\d+) reads?; (\d+) bp \(untrimmed\), (\d+) \(trimmed\)\./ && do {
+	    my $nof_reads = $2; my $length = $3; my $trimmed_length = $4;
+	    $contigOBJ = Bio::Assembly::Contig->new(-id=>$1, -source=>'phrap');
+	    my $feat   = Bio::SeqFeature::Generic->new(-start=>1,
+						       -end=>$length,
+						       -primary=>"_main_contig_feature:".$contigOBJ->id(),
+						       -tag=>{ '_trimmed_length' => $trimmed_length }
+						       );
+	    $contigOBJ->add_features([ $feat ],1);
+	    $Assembly->add_contig($contigOBJ);
+	};
+
+	# Loading read information
+	/^(C?)\s+(-?\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+\(\s*(\d+)\)\s+(\d+\.\d*)\s+(\d+\.\d*)\s+(\d+\.\d*)/ && do {
+	    my $strand = ($1 eq 'C' ? -1 : 1);
+	    my $readID = $4; my $start = $2; my $end = $3;
+	    my $primary_score = $5; my $secondary_score = $6;
+	    my $substitutions = $7; my $deletions = $8; my $insertions = $9;
+	    my $seq = Bio::LocatableSeq->new(-start=>$start,
+					     -end=>$end,
+					     -strand=>$strand,
+					     -id=>$readID,
+					     -primary_id=>$readID,
+					     -alphabet=>'dna');
+	    my $unalign_coord = Bio::SeqFeature::Generic->new(-start=>$start,
+							      -end=>$end,
+							      -primary=>"_unalign_coord:$readID",
+							      -tag=>{'_primary_score'=>$primary_score,
+								     '_secondary_score'=>$secondary_score,
+								     '_substitutions'=>$substitutions,
+								     '_insertions'=>,$insertions,
+								     '_deletions'=>$deletions }
+							      );
+	    $unalign_coord->attach_seq($seq);
+	    $contigOBJ->add_seq($seq); $contigOBJ->add_features([ $unalign_coord ]);
+	};
+
+	# Loading INTERNAL clones description
+	/INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/ && do {
+	    my $contigID = $1;
+	    my $contig = $Assembly->get_contig_by_id($contigID);
+	    while ($_ = $self->_readline) {
+		my (@data,$rejected,$c1_strand,$c2_strand);
+
+		(@data = /\s+(\*?)\s+(C?)\s+(\S+)\s+(C?)\s+(\S+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)/) && do {
+		    if ($data[0] eq '*') { $rejected = 1 } else { $rejected = 0 }
+		    $c1_strand = ($data[1] eq 'C' ? -1 : 1);
+		    $c2_strand = ($data[3] eq 'C' ? -1 : 1);
+		    (my $clone_name = $data[2]) =~ s/^(\S+)\.\w.*/$1/;
+		    my $clone = Bio::SeqFeature::Generic->new(-start=>$data[6],
+							      -end=>$data[7],
+							      -strand=>0,
+							      -primary=>"_internal_clone:$clone_name",
+							      -tag=>{'_1st_strand'=>,$c1_strand,
+								     '_2nd_strand'=>,$c2_strand,
+								     '_1st_name'=>$data[2],
+								     '_2nd_name'=>$data[4],
+								     '_length'=>$data[5],
+								     '_rejected'=>$rejected
+								 }
+							      );
+		    $contig->add_features([ $clone ]);
+		};
+
+		/Covered regions:/ && do {
+		    my %coord  = /(\d+)/g; my $i = 0;
+		    foreach my $start (sort { $a <=> $b } keys %coord) {
+			my $cov = Bio::SeqFeature::Generic->new(-start=>$start,
+								-end=>$coord{$start},
+								-primary=>'_covered_region:'.++$i
+								);
+			# 1: attach feature to contig consensus, if any
+			$contig->add_features([ $cov ],1);
+		    }
+		    last; # exit while loop
+		}; # /Covered regions:/
+
+	    } # while ($_ = $self->_readline)
+	}; # /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/
+
+    } # while ($_ = $self->_readline)
+    return $Assembly;
+}
+
+=head2 write_assembly (NOT IMPLEMENTED)
+
+    Title   : write_assembly
+    Usage   : $ass_io->write_assembly($assembly)
+    Function: Write the assembly object in Phrap compatible ACE format
+    Returns : 1 on success, 0 for error
+    Args    : A Bio::Assembly::Scaffold object
+
+=cut
+
+sub write_assemebly {
+    my $self = shift;
+    $self->throw_not_implemented();   
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# $Id: IO.pm,v 1.6.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Assembly::IO
+#
+#   based on the Bio::SeqIO module
+#       by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+# Copyright Robson Francisco de Souza
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::IO - Handler for Assembly::IO Formats
+
+=head1 SYNOPSIS
+
+    use Bio::Assembly::IO;
+
+    $in  = Bio::Assembly::IO->new(-file=>"<inputfilename",
+                                  -format=>'phrap');
+    $out = Bio::Assembly::IO->new(-file=>">outputfilename",
+                                  -format=>'phrap');
+
+    while ( my $seq = $in->next_seq() ) {
+       $out->write_seq($seq);
+    }
+
+=head1 DESCRIPTION
+
+Bio::Assembly::IO is a handler module for formats in the Assembly::IO set
+(e.g. Bio::Assembly::IO::phrap).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Robson Francisco de Souza
+
+E-mail: rfsouza at citri.iq.usp.br
+
+=head1 CONTRIBUTORS
+
+#
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Assembly::IO;
+
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : Bio::Assembly::IO->new(-file =>$filename,-format=>'format')
+ Function: Returns a new assembly stream
+ Returns : A Bio::Assembly::IO::Handler initialised
+           with the appropriate format
+ Args    : -file => $filename
+           -format => format
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::Assembly::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);
+	$self->_initialize(@args);
+	return $self;
+    } else {
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+
+	$class->throw("Need at least a file name to proceed!")
+	    unless (defined $param{'-file'} || defined $ARGV[0]);
+
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{-file} || $ARGV[0] );
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# normalize capitalization
+	return unless( $class->_load_format_module($format) );
+	return "Bio::Assembly::IO::$format"->new(@args);
+    }
+}
+
+# _initialize is chained for all SeqIO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+    # initialize the IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_assembly
+
+ Title   : next_assembly
+ Usage   : $cluster = $stream->next_assembly()
+ Function: Reads the next assembly object from the stream and returns it.
+ Returns : a Bio::Assembly::ScaffoldI compliant object
+ Args    : none
+
+=cut
+
+sub next_assembly {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::Assembly::IO object.");
+}
+
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL Assembly::IO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::Assembly::IO::" . $format;
+  my $ok;
+
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: could not load $format - for more details on supported formats please see the Assembly::IO docs
+Exception $@
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function: guess format based on file suffix
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+ Notes   : formats that _filehandle() will guess includes
+           only phrap, by now.
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   my $arg   = shift;
+
+   return unless defined($arg);
+   return 'ace' if ($arg =~ /\.ace\.\d+$/i);
+   return 'phrap' if ($arg =~ /\.phrap\.out$/i);
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+# I need some direction on these!! The module works so I haven't fiddled with them!
+# Me neither! (rfsouza)
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'seqio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'seqio'}->next_seq() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'seqio'}->next_seq();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'seqio'}->write_seq(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Scaffold.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Scaffold.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Scaffold.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,611 @@
+# $Id: Scaffold.pm,v 1.12.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+#  BioPerl module for Bio::Assembly::Scaffold
+#
+# Copyright by Robson F. de Souza
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::Scaffold - Perl module to hold and manipulate sequence assembly data.
+
+=head1 SYNOPSIS
+
+    # Module loading
+    use Bio::Assembly::IO;
+
+    # Assembly loading methods
+    my $aio = new Bio::Assembly::IO(-file=>"test.ace.1", -format=>'phrap');
+    my $assembly = $aio->next_assembly;
+
+    foreach my $contig ($assembly->all_contigs) {
+        # do something... (see Bio::Assembly::Contig)
+    }
+
+=head1 DESCRIPTION
+
+Bio::Assembly::Scaffold was developed to store and manipulate data
+from sequence assembly programs like Phrap. It implements the
+ScaffoldI interface and intends to be generic enough to be used by
+Bio::Assembly::IO drivers written to programs other than Phrap.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+rfsouza at citri.iq.usp.br
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Assembly::Scaffold;
+
+use strict;
+
+use Bio::Annotation::Collection;
+
+use base qw(Bio::Root::Root Bio::Assembly::ScaffoldI);
+
+=head2 new ()
+
+    Title   : new
+    Usage   : $assembly = new (-source=>'program_name',
+			       -contigs=>\@contigs,
+			       -id=>"assembly 1");
+    Function: creates a new assembly object
+    Returns : 
+    Args    : 
+              -source  : [string] sequence assembly program
+              -contigs : reference to array of 
+                         Bio::Assembly::Contig objects
+              -id      : [string] assembly name
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  my ($src,$contigs,$id) = $self->_rearrange([qw(SOURCE CONTIGS ID)], @args);
+
+  $self->{'_contigs'} = {};
+  $self->{'_singlets'} = {};
+  $self->{'_seqs'} = {};
+  $self->{'_annotation'} = Bio::Annotation::Collection->new();
+  $self->{'_id'} = 'NoName';
+
+  if (defined $contigs && ref($contigs = 'ARRAY')) {
+      foreach my $contig (@{$contigs}) {
+	  $self->add_contig($contig);
+      }
+  }
+
+  $self->{'_id'} = $id if (defined $id);
+
+  return $self;
+}
+
+=head1 Accessing general assembly data
+
+=cut
+
+=head2 id
+
+    Title   : id
+    Usage   : $assembly->id()
+    Function: Get/Set assembly ID
+    Returns : string or undef
+    Args    : string
+
+=cut
+
+sub id {
+    my $self = shift;
+    my $id   = shift;
+
+    $self->{'_id'} = $id if (defined $id);
+
+    return $self->{'_id'};
+}
+
+=head2 annotation
+
+    Title   : annotation
+    Usage   : $assembly->annotation()
+    Function: Get/Set assembly annotation object
+    Returns : Bio::Annotation::Collection
+    Args    : none
+
+=cut
+
+sub annotation {
+    my ($self,$ref) = shift;
+
+    $self->{'_annotation'} = $ref if (defined $ref);
+    return $self->{'_annotation'};
+}
+
+=head2 get_nof_contigs
+
+    Title   : get_nof_contigs
+    Usage   : $assembly->get_nof_contigs()
+    Function: Get the number of contigs included in the assembly
+    Returns : integer
+    Args    : none 
+
+=cut
+
+sub get_nof_contigs {
+    my $self = shift;
+    return scalar( $self->get_contig_ids() );
+}
+
+=head2 get_nof_sequences_in_contigs
+
+    Title   : get_nof_sequences_in_contigs
+    Usage   : $assembly->get_nof_sequences_in_contigs()
+    Function: 
+
+              Get the number of sequences included in the
+              assembly. This number refers only to the sequences used
+              to build contigs in this assembly. It does not includes
+              contig consensus sequences or singlets.
+
+    Returns : integer
+    Args    : none
+
+=cut
+
+sub get_nof_sequences_in_contigs {
+    my $self = shift;
+
+    my $nof_seqs = 0;
+    foreach my $contig ($self->all_contigs) {
+	$nof_seqs += scalar( $contig->get_seq_ids() );
+    }
+
+    return $nof_seqs;
+}
+
+=head2 get_nof_singlets
+
+    Title   : nof_singlets
+    Usage   : $assembly->nof_singlets()
+    Function: Get the number of singlets included in the assembly
+    Returns : integer
+    Args    : none
+
+=cut
+
+sub get_nof_singlets {
+    my $self = shift;
+
+    return scalar( $self->get_singlet_ids() );
+}
+
+=head2 get_seq_ids
+
+    Title   : get_seq_ids
+    Usage   : $assembly->get_seq_ids()
+    Function: 
+
+              Get the ID of sequences from all contigs.  This list
+              refers only to the aligned sequences in contigs. It does
+              not includes contig consensus sequences or singlets.
+
+    Returns : array of strings
+    Args    : none
+
+=cut
+
+sub get_seq_ids {
+    my $self = shift;
+
+    return keys %{ $self->{'_seqs'} };
+}
+
+=head2 get_contig_ids
+
+    Title   : get_contig_ids
+    Usage   : $assembly->get_contig_ids()
+    Function: Access list of contig IDs from assembly
+    Returns : an array, if there are any contigs in the
+              assembly. An empty array otherwise
+    Args    : none
+
+=cut
+
+sub get_contig_ids {
+    my $self = shift;
+
+    return wantarray
+        ? sort keys %{$self->{'_contigs'}}
+        : scalar keys %{$self->{'_contigs'}};
+}
+
+=head2 get_singlet_ids
+
+    Title   : get_singlet_ids
+    Usage   : $assembly->get_singlet_ids()
+    Function: Access list of singlet IDs from assembly
+    Returns : array of strings if there are any singlets
+              otherwise an empty array
+    Args    : none
+
+=cut
+
+sub get_singlet_ids {
+    my $self = shift;
+
+    return wantarray
+        ? sort keys %{$self->{'_singlets'}}
+        : scalar keys %{$self->{'_singlets'}};
+}
+
+=head2 get_seq_by_id
+
+    Title   : get_seq_by_id
+    Usage   : $assembly->get_seq_by_id($id)
+    Function: 
+
+              Get a reference for an aligned sequence
+              This sequence must be part of a contig
+              in the assembly.
+
+    Returns : a Bio::LocatableSeq object
+              undef if sequence $id is not found
+              in any contig
+    Args    : [string] sequence identifier (id)
+
+=cut
+
+sub get_seq_by_id {
+    my $self = shift;
+    my $seqID = shift;
+
+    return unless (exists $self->{'_seqs'}{$seqID});
+
+    return $self->{'_seqs'}{$seqID}->get_seq_by_name($seqID);
+}
+
+=head2 get_contig_by_id
+
+    Title   : get_contig_by_id
+    Usage   : $assembly->get_contig_by_id($id)
+    Function: Get a reference for a contig
+    Returns : a Bio::Assembly::Contig object or undef
+    Args    : [string] contig unique identifier (ID)
+
+=cut
+
+sub get_contig_by_id {
+    my $self = shift;
+    my $contigID = shift;
+
+    return unless (exists $self->{'_contigs'}{$contigID});
+
+    return $self->{'_contigs'}{$contigID};
+}
+
+=head2 get_singlet_by_id
+
+    Title   : get_singlet_by_id
+    Usage   : $assembly->get_singlet_by_id()
+    Function: Get a reference for a singlet
+    Returns : Bio::PrimarySeqI object or undef
+    Args    : [string] a singlet ID
+
+=cut
+
+sub get_singlet_by_id {
+    my $self = shift;
+
+    my $singletID = shift;
+
+    return unless (exists $self->{'_singlets'}{$singletID});
+
+    return $self->{'_singlets'}{$singletID};
+}
+
+=head1 Modifier methods
+
+=cut
+
+=head2 add_contig
+
+    Title   : add_contig
+    Usage   : $assembly->add_contig($contig)
+    Function: Add a contig to the assembly
+    Returns : 1 on success
+    Args    : a Bio::Assembly::Contig object
+	      order (optional)
+
+=cut
+
+sub add_contig {
+    my $self = shift;
+    my $contig = shift;
+
+    if( !ref $contig || ! $contig->isa('Bio::Assembly::Contig') ) {
+	$self->throw("Scaffold::add_contig is unable to process non Bio::Assembly::Contig object [", ref($contig), "]");
+    }
+    my $contigID  = $contig->id();
+    if( !defined $contigID ) {
+	$contigID = 'Unknown_' . ($self->get_nof_contigs() + 1);
+	$contig->id($contigID);
+	$self->warn("Attributing ID $contigID to unidentified Bio::Assembly::Contig object.");
+    }
+
+    $self->warn("Replacing contig $contigID with a new contig object")
+	if (exists $self->{'_contigs'}{$contigID});
+    $self->{'_contigs'}{$contigID} = $contig;
+
+    foreach my $seqID ($contig->get_seq_ids()) {
+	if (exists $self->{'_seqs'}{$seqID}) {
+	    $self->warn( "Sequence $seqID already assigned to contig ".
+			 $self->{'_seqs'}{$seqID}->id().". Moving to contig $contigID")
+		unless ($self->{'_seqs'}{$seqID} eq $contig);
+	}
+	$self->{'_seqs'}{$seqID} = $contig;
+    }
+
+    return 1;
+}
+
+=head2 add_singlet
+
+    Title   : add_singlet
+    Usage   : $assembly->add_singlet($seq)
+    Function: Add a singlet to the assembly
+    Returns : 1 on success, 0 otherwise
+    Args    : a Bio::PrimarySeqI object
+		  order (optional)
+
+=cut
+
+sub add_singlet {
+    my ($self,$singlet) = @_;
+
+    if ( !ref $singlet || ! $singlet->isa('Bio::Assembly::Singlet') ) {
+	$self->warn("Scaffold::add_singlet is unable to add a singlet ($singlet) because it was not a Bio::Assembly::Singlet object.");
+	return 0;
+    }
+    my $singletID = $singlet->id();
+    $self->warn("Replacing singlet $singletID wih a new sequence object")
+	if (exists $self->{'_contigs'}{$singletID});
+    $self->{'_singlets'}{$singletID} = $singlet;
+
+    return 1;
+}
+
+=head2 update_seq_list
+
+    Title   : update_seq_list
+    Usage   : $assembly->update_seq_list()
+    Function: 
+
+              Synchronizes the assembly registry for sequences in
+              contigs and contig actual aligned sequences content. You
+              probably want to run this after you remove/add a
+              sequence from/to a contig in the assembly.
+
+    Returns : nothing
+    Args    : none 
+
+=cut
+
+sub update_seq_list {
+    my $self = shift;
+
+    $self->{'_seqs'} = {};
+    foreach my $contig ($self->all_contigs) {
+	foreach my $seqID ($contig->get_seq_ids) {
+	    $self->{'_seqs'}{$seqID} = $contig;
+	}
+    }
+
+    return 1;
+}
+
+=head2 remove_contigs
+
+    Title   : remove_contigs
+    Usage   : $assembly->remove_contigs(1..4)
+    Function: Remove contig from assembly object
+    Returns : an array of removed Bio::Assembly::Contig
+              objects
+    Args    : an array of contig IDs 
+
+    See function get_contig_ids() above
+
+=cut
+
+#---------------------
+sub remove_contigs {
+#---------------------
+    my ($self, at args) = @_;
+
+    my @ret = ();
+    foreach my $contigID (@args) {
+	foreach my $seqID ($self->get_contig_by_id($contigID)->get_seq_ids()) {
+	    delete $self->{'_seqs'}{$seqID};
+	}
+	push(@ret,$self->{'_contigs'}{$contigID});
+	delete $self->{'_contigs'}{$contigID};
+    }
+
+    return @ret;
+}
+
+=head2 remove_singlets
+
+    Title   : remove_singlets
+    Usage   : $assembly->remove_singlets(@singlet_ids)
+    Function: Remove singlet from assembly object
+    Returns : the Bio::SeqI objects removed
+    Args    : a list of singlet IDs
+
+    See function get_singlet_ids() above
+
+=cut
+
+#---------------------
+sub remove_singlets {
+#---------------------
+    my ($self, at args) = @_;
+
+    my @ret = ();
+    foreach my $singletID (@args) {
+	push(@ret,$self->{'_singlets'}{$singletID});
+	delete $self->{'_singlets'}{$singletID};
+    }
+
+    return @ret;
+}
+
+=head1 Contig and singlet selection methos
+
+=cut
+
+=head2 select_contigs
+
+    Title   : select_contigs
+    Usage   : $assembly->select_contigs(@list)
+    Function: Select an array of contigs from the assembly
+    Returns : an array of Bio::Assembly::Contig objects
+    Args    : an array of contig ids
+
+    See function get_contig_ids() above
+
+=cut
+
+#---------------------
+sub select_contigs {
+#---------------------
+    my ($self, at args) = @_;
+
+    my @contigs = ();
+    foreach my $contig (@args) {
+	unless (exists $self->{'_contigs'}{$contig}) {
+	    $self->warn("$contig contig not found. Ignoring...");
+	    next;
+	}
+	push(@contigs, $self->{'_contigs'}{$contig});
+    }
+
+    return @contigs;
+}
+
+=head2 select_singlets
+
+    Title   : select_singlets
+    Usage   : $assembly->select_singlets(@list)
+    Function: Selects an array of singlets from the assembly
+    Returns : an array of Bio::SeqI objects
+    Args    : an array of singlet ids
+
+    See function get_singlet_ids() above
+
+=cut
+
+#---------------------
+sub select_singlets {
+#---------------------
+    my ($self, at args) = @_;
+
+    my @singlets = ();
+    foreach my $singlet (@args) {
+	unless (exists $self->{'_singlets'}{$singlet}) {
+	    $self->warn("$singlet singlet not found. Ignoring...");
+	    next;
+	}
+	push(@singlets, $self->{'_singlets'}{$singlet});
+    }
+
+    return @singlets;
+}
+
+=head2 all_contigs
+
+    Title   : all_contigs
+    Usage   : my @contigs = $assembly->all_contigs
+    Function: 
+
+              Returns a list of all contigs in this assembly.  Contigs
+              are both clusters and alignments of one or more reads,
+              with an associated consensus sequence.
+
+    Returns : array of Bio::Assembly::Contig (in lexical id order)
+    Args    : none
+
+=cut
+
+#---------------------
+sub all_contigs {
+#---------------------
+    my ($self) = @_;
+
+    my @contigs = ();
+    foreach my $contig (sort { $a cmp $b } keys %{ $self->{'_contigs'} }) {
+	push(@contigs, $self->{'_contigs'}{$contig});
+    }
+
+    return @contigs;
+}
+
+=head2 all_singlets
+
+    Title   : all_singlets
+    Usage   : my @singlets = $assembly->all_singlets
+    Function: 
+
+              Returns a list of all singlets in this assembly.
+	      Singlets are isolated reads, without non-vector
+	      matches to any other read in the assembly.
+
+    Returns : array of Bio::SeqI (in lexical order by id)
+    Args    : none
+
+=cut
+
+#---------------------
+sub all_singlets {
+#---------------------
+    my ($self) = @_;
+
+    my @singlets = ();
+    foreach my $singlet (sort { $a cmp $b } keys %{ $self->{'_singlets'} }) {
+	push(@singlets, $self->{'_singlets'}{$singlet});
+    }
+
+    return @singlets;
+}
+
+# =head1 Internal Methods
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ScaffoldI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ScaffoldI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/ScaffoldI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,337 @@
+# $Id: ScaffoldI.pm,v 1.6.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+#  BioPerl module for Bio::Assembly::ScaffoldI
+#
+# Copyright by Robson F. de Souza
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::ScaffoldI - Abstract Inteface of Sequence Assemblies
+
+=head1 SYNOPSIS
+
+    # get a Bio::Assembly::ScaffoldI object somehow
+
+    foreach my $contig ($assembly->all_contigs) {
+       # do something (see Bio::Assembly::Contig)
+    }
+
+=head1 DESCRIPTION
+
+This interface defines the basic set of methods an object should have
+to manipulate assembly data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robson Francisco de Souza
+
+Email: rfsouza at citri.iq.usp.br
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#
+# Now, let's code!
+
+
+package Bio::Assembly::ScaffoldI;
+
+use strict;
+use Carp;
+
+# Inheritance
+
+use base qw(Bio::Root::RootI);
+
+#
+# Methods
+
+=head1 Accessing general assembly data
+
+=cut
+
+=head2 get_nof_contigs
+
+	Title   : get_nof_contigs
+	Usage   : $assembly->get_nof_contigs()
+	Function: Get the number of contigs included in the assembly
+	Returns : integer
+	Args    : none
+
+=cut
+
+sub get_nof_contigs {
+    my $self = shift;
+
+    $self->throw_not_implemented();
+}
+
+=head2 get_nof_singlets
+
+	Title   : get_nof_singlets
+	Usage   : $assembly->get_nof_singlets()
+	Function: Get the number of singlets included in the assembly
+	Returns : integer
+	Args    : none
+
+=cut
+
+sub get_nof_singlets {
+    my $self = shift;
+
+    $self->throw_not_implemented();
+}
+
+=head2 get_contig_ids
+
+	Title   : get_contig_ids
+	Usage   : $assembly->get_contig_ids()
+	Function: Access list of contig IDs from assembly
+	Returns : an array if there are any contigs in the assembly.
+                  undef otherwise
+	Args    : an array of contig IDs
+
+=cut
+
+sub get_contig_ids {
+    my $self = shift;
+
+    $self->throw_not_implemented();
+}
+
+=head2 get_singlet_ids
+
+	Title   : get_singlet_ids
+	Usage   : $assembly->get_singlet_ids()
+	Function: Access list of singlet IDs from assembly
+	Returns : an array if there are any singlets in the assembly.
+                  undef otherwise
+	Args    : an array of singlet IDs
+
+=cut
+
+sub get_singlet_ids {
+    my $self = shift;
+
+    $self->throw_not_implemented();
+}
+
+=head2 get_contig_by_id
+
+    Title   : get_contig_by_id
+    Usage   : $assembly->get_contig_by_id($id)
+    Function: Get a reference for a contig from the assembly
+    Returns : a Bio::Assembly::Contig object or undef
+    Args    : [string] contig unique identifier (ID)
+
+=cut
+
+sub get_contig_by_id {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_singlet_by_id
+
+    Title   : get_singlet_by_id
+    Usage   : $assembly->get_singlet_by_id()
+    Function: Get a reference for a singlet from the assembly
+    Returns : Bio::PrimarySeqI object or undef
+    Args    : [string] a singlet ID
+
+=cut
+
+sub get_singlet_by_id {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head1 Modifier methods
+
+Implementation of these methods is optional in the sense that
+read-only implementations may not have these. If an object implements
+one of them, it should however implement all.
+
+=cut
+
+=head2 add_contig
+
+	Title   : add_contig
+	Usage   : $assembly->add_contig($contig)
+	Function: Add another contig to the Bio::Assembly::ScaffoldI object
+	Returns : 1 on success, 0 otherwise
+	Args    : a Bio::Assembly:Contig object
+
+    See Bio::Assembly::Contig for more information
+
+=cut
+
+#---------------------
+sub add_contig {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 add_singlet
+
+	Title   : add_singlet
+	Usage   : $assembly->add_singlet($seq)
+	Function: Add another singlet to the Bio::Assembly::ScaffoldI object
+	Returns : 1 on success, 0 otherwise
+	Args    : a Bio::Align::Singlet object
+
+=cut
+
+#---------------------
+sub add_singlet {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 remove_contigs
+
+        Title   : remove_contigs
+	Usage   : $assembly->remove_contigs(1..4)
+	Function: Remove contig from assembly object
+	Returns : a Bio::Assembly::Contig object
+	Args    : a list of contig IDs
+
+    See function get_contig_ids() above
+
+=cut
+
+#---------------------
+sub remove_contigs {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 remove_singlets
+
+        Title   : remove_singlets
+	Usage   : $assembly->remove_singlets(1..4)
+	Function: Remove singlet from assembly object
+	Returns : a Bio::SeqI object
+	Args    : a list of singlet IDs 
+
+    See function get_singlet_ids() above
+
+=cut
+
+#---------------------
+sub remove_singlets {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head1 Contig and singlet selection methos
+
+=cut
+
+=head2 select_contigs
+
+	Title   : select_contig
+	Usage   : $assembly->select_contig
+	Function: Selects an array of contigs from the assembly
+	Returns : an array of Bio::Assembly::Contig objects
+	Args    : an array of contig ids
+
+    See function get_contig_ids() above
+
+=cut
+
+#---------------------
+sub select_contigs {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 select_singlets
+
+	Title   : select_singlets
+	Usage   : $assembly->select_singlets(@list)
+	Function: Selects an array of singlets from the assembly
+	Returns : an array of Bio::SeqI objects
+	Args    : an array of singlet ids
+
+    See function get_singlet_ids() above
+
+=cut
+
+#---------------------
+sub select_singlets {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 all_contigs
+
+	Title   : all_contigs
+	Usage   : my @contigs = $assembly->all_contigs
+	Function: Returns a list of all contigs in this assembly.
+		  Contigs are both clusters and alignments of one
+		  or more reads, with an associated consensus
+		  sequence.
+	Returns : array of Bio::Assembly::Contig
+	Args    : none
+
+=cut
+
+#---------------------
+sub all_contigs {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 all_singlets
+
+    Title   : all_singlets
+    Usage   : my @singlets = $assembly->all_singlets
+    Function: Returns a list of all singlets in this assembly.
+	      Singlets are isolated reads, without non-vector
+	      matches to any other read in the assembly.
+    Returns : array of Bio::Assembly::Contig
+    Args    : none
+
+=cut
+
+#---------------------
+sub all_singlets {
+#---------------------
+	my ($self) = @_;
+	$self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Singlet.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Singlet.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Assembly/Singlet.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,191 @@
+# $Id: Singlet.pm,v 1.9.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Assembly::Singlet
+# 
+# Cared for by Chad Matsalla <bioinformatics1 at dieselwurks.com>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Assembly::Singlet - Perl module to hold and manipulate
+                     singlets from sequence assembly contigs.
+
+=head1 SYNOPSIS
+
+    # Module loading
+    use Bio::Assembly::IO;
+
+    # Assembly loading methods
+    $aio = new Bio::Assembly::IO(-file=>"test.ace.1",
+                               -format=>'phrap');
+
+    $assembly = $aio->next_assembly;
+    foreach $singlet ($assembly->all_singlets) {
+      # do something
+    }
+
+=head1 DESCRIPTION
+
+A singlet is a sequence that phrap was unable to align to any other sequences.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad S. Matsalla
+
+bioinformatics1 at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+package Bio::Assembly::Singlet;
+
+use strict;
+
+use Bio::SeqFeature::Collection;
+use Bio::Seq::PrimaryQual;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1);
+use base qw(Bio::Assembly::Contig Bio::Root::Root Bio::Align::AlignI Bio::Assembly::Contig);
+
+
+sub new {
+     my ($class,%ARG) = @_;
+     my $self = $class->SUPER::new(%ARG);
+     my $args = \%ARG;
+     bless ($self,$class);
+     if ($args->{'-seq'}) {
+          $self->seq_to_singlet($args->{'-seq'});
+     }
+     return $self;
+}
+
+=head2 seq_to_singlet
+
+    Title   : seq_to_singlet
+    Usage   : my $singlet = $io->seq_to_singlet($seq)
+    Function: Wrap the information for a singlet as a Bio::Assembly::Singlet
+    Returns : A Bio::Assembly::Singlet object
+    Args    : A Bio::Seq-compliant object
+
+=cut
+
+sub seq_to_singlet {
+    my ($self,$seq) = @_;
+    $self->seqref($seq);
+    $self->strand(1);
+     my $lseq = new Bio::LocatableSeq(
+               -seq =>   $seq->seq(),
+               -start    =>   1,
+               -end =>   $seq->length(),
+               -id  =>   $seq->display_id());
+     $lseq->{chromatfilename} = $seq->{'chromatfilename'};
+     $lseq->{phdfilename} = $seq->{'phdfilename'};
+     $self->set_consensus_sequence($lseq);
+     if (UNIVERSAL::isa($seq,"Bio::Seq::Quality")) {
+          $self->set_consensus_quality($seq)
+     }
+     else {
+          # print("seq_to_singlet: the sequence (".$seq->desc().") is not a Bio::Seq::quality. it is this ($seq)\n");
+     }
+     $self->add_seq($lseq);
+}
+
+
+=head2 id
+
+    Title   : id
+    Usage   : my $id = $singlet->id('chad matsalla')
+    Function: 
+    Returns : 
+    Args    : 
+
+=cut
+
+sub id {
+     my $self = shift;
+     # print("Getting the id for this thing:\n");
+     # $dumper->dumpValue($self->seqref());
+     # print("This is the id: (".$self->seqref()->id().")\n");
+     return $self->seqref()->id();
+}
+
+
+=head2 seqref
+
+    Title   : seqref
+    Usage   : my $seqref = $singlet->seqref($seq);
+    Function: Set the sequence to which this Singlet refers
+    Returns : A Bio::Seq-compliant object
+    Args    : 
+
+=cut
+
+sub seqref {
+     my ($self,$seq) = @_;
+     if ($seq) { $self->{'seqref'} = $seq; }
+     return $self->{'seqref'};
+}
+
+
+=head2 chromatfilename
+
+    Title   : chromatfilename
+    Usage   : my $chromatfilename = $singlet->chromatfilename($newfilename);
+    Function: Get the name of the chromatfile for this singlet
+    Returns : A string.
+    Args    : If a string is provided, the chromatfilename will be set to that value.
+
+=cut
+
+sub chromatfilename {
+     my ($self,$name) = @_;
+     if ($name) { $self->{'chromatfilename'} = $name; }
+     return $self->{'chromatfilename'};
+}
+
+=head2 phdfilename
+
+    Title   : phdfilename
+    Usage   : my $phdfilename = $singlet->phdfilename($newfilename);
+    Function: Get the name of the phdfile for this singlet
+    Returns : A string.
+    Args    : If a string is provided, the phdfilename will be set to that value.
+
+=cut
+
+sub phdfilename {
+     my ($self,$name) = @_;
+     if ($name) { $self->{phdfilename} = $name; }
+     return $self->{'phdfilename'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Article.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Article.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Article.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,134 @@
+# $Id: Article.pm,v 1.14.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Article
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Article - Representation of a general article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Article (-identifier => '123abc',
+                                     -first_page => 23,
+                                     -last_page  => 68);
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Article;
+    $obj->identifier ('123abc');
+    $obj->first_page (23);
+    $obj->last_page (68);
+
+=head1 DESCRIPTION
+
+A storage object for a general article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  first_page
+  last_page
+
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Article;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _first_page => undef,
+	 _last_page => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BiblioBase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BiblioBase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BiblioBase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,185 @@
+# $Id: BiblioBase.pm,v 1.13.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::BiblioBase
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::BiblioBase - An abstract base for other biblio classes
+
+=head1 SYNOPSIS
+
+ # do not instantiate this class directly
+
+=head1 DESCRIPTION
+
+It is a base class where all other biblio data storage classes inherit
+from. It does not reflect any real-world object, it exists only for
+convenience, in order to have a place for shared code.
+
+=head2 new()
+
+The I<new()> class method constructs a new biblio storage object.  It
+accepts list of named arguments - the same names as attribute names
+prefixed with a minus sign. Available attribute names are listed in
+the documentation of the individual biblio storage objects.
+
+=head2 Accessors
+
+All attribute names can be used as method names. When used without any
+parameter the method returns current value of the attribute (or
+undef), when used with a value the method sets the attribute to this
+value and also returns it back. The set method also checks if the type
+of the new value is correct.
+
+=head2 Custom classes
+
+If there is a need for new attributes, create your own class which
+usually inherits from I<Bio::Biblio::Ref>. For new types of providers
+and journals, let your class inherit directly from this
+I<Bio::Biblio::BiblioBase> class.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::BiblioBase;
+use strict;
+use vars qw($AUTOLOAD);
+
+
+use base qw(Bio::Root::Root);
+
+# these methods should not be called here;
+# they should be implemented by a subclass
+sub _accessible { shift->throw_not_implemented(); }
+sub _attr_type { shift->throw_not_implemented(); }
+
+#
+# deal with 'set_' and 'get_' methods
+#
+sub AUTOLOAD {
+    my ($self, $newval) = @_;
+    if ($AUTOLOAD =~ /.*::(\w+)/ && $self->_accessible ("_$1")) {
+	my $attr_name = "_$1";
+	my $attr_type = $self->_attr_type ($attr_name);
+	my $ref_sub =
+	    sub {
+		my ($this, $new_value) = @_;
+		return $this->{$attr_name} unless defined $new_value;
+
+		# here we continue with 'set' method
+		my ($newval_type) = ref ($new_value) || 'string';
+		my ($expected_type) = $attr_type || 'string';
+#		$this->throw ("In method $AUTOLOAD, trying to set a value of type '$newval_type' but '$expected_type' is expected.")
+		$this->throw ($this->_wrong_type_msg ($newval_type, $expected_type, $AUTOLOAD))
+		    unless ($newval_type eq $expected_type) or
+		      UNIVERSAL::isa ($new_value, $expected_type);
+                       
+		$this->{$attr_name} = $new_value;
+		return $new_value;
+	    };
+
+        no strict 'refs'; 
+        *{$AUTOLOAD} = $ref_sub;
+        use strict 'refs'; 
+        return $ref_sub->($self, $newval);
+    }
+
+    $self->throw ("No such method: $AUTOLOAD");
+}
+
+# 
+
+sub new {
+    my ($caller, @args) = @_;
+    my $class = ref ($caller) || $caller;
+
+    # create and bless a new instance    
+    my ($self) = $class->SUPER::new (@args);	
+
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # set all @args into this object with 'set' values;
+    # change '-key' into '_key', and making keys lowercase
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/-/_/og;   # change it everywhere, why not
+        my $method = lc (substr ($new_key, 1));   # omitting the first '_'
+        no strict 'refs'; 
+        $method->($self, $param { $key });
+    }
+
+    # done
+    return $self;
+}
+
+#
+# set methods test whether incoming value is of a correct type;
+# here we return message explaining it
+#
+sub _wrong_type_msg {
+    my ($self, $given_type, $expected_type, $method) = @_;
+    my $msg = 'In method ';
+    if (defined $method) {
+	$msg .= $method;
+    } else {
+	$msg .= (caller(1))[3];
+    }
+    return ("$msg: Trying to set a value of type '$given_type' but '$expected_type' is expected.");
+}
+
+#
+# probably just for debugging
+# TBD: to decide...
+#
+sub print_me {
+    my ($self) = @_;
+    require Data::Dumper;
+    return Data::Dumper->Dump ( [$self], ['Citation']);
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Book.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Book.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Book.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: Book.pm,v 1.14.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Book
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Book - Representation of a book
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Book (-identifier => '123abc',
+                                  -editor => new Bio::Biblio::Person
+                                            (-lastname => 'Loukides'),
+                                  -isbn  => '0-596-00068-5');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Book;
+    $obj->isbn ('0-596-00068-5');
+
+=head1 DESCRIPTION
+
+A storage object for a book.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  edition
+  editor    type: Bio::Biblio::Provider
+  isbn
+  series
+  title
+  volume
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Book;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _edition => undef,
+	 _editor => 'Bio::Biblio::Provider',
+	 _isbn => undef,
+	 _series => undef,
+	 _title => undef,
+	 _volume => undef,
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BookArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BookArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/BookArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,129 @@
+# $Id: BookArticle.pm,v 1.13.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::BookArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::BookArticle - Representation of a book article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::BookArticle (-identifier => '123abc',
+                                         -book => new Bio::Biblio::Book);
+  #--- OR ---
+
+    $obj = new Bio::Biblio::BookArticle;
+    $obj->book (new Bio::Biblio::Book);
+
+
+=head1 DESCRIPTION
+
+A storage object for a book article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  book      type: Bio::Biblio::Book
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::BookArticle;
+use strict;
+
+
+use base qw(Bio::Biblio::Article);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _book => 'Bio::Biblio::Book',
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medline2ref.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medline2ref.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medline2ref.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,533 @@
+# $Id: medline2ref.pm,v 1.15.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module Bio::Biblio::IO::medline2ref.pm
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO::medline2ref - A converter of a raw hash to MEDLINE citations
+
+=head1 SYNOPSIS
+
+ # to be written
+
+=head1 DESCRIPTION
+
+ # to be written
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::IO::medline2ref;
+
+use strict;
+
+use Bio::Biblio::MedlineJournal;
+use Bio::Biblio::MedlineBook;
+use Bio::Biblio::Provider;
+use Bio::Biblio::Person;
+use Bio::Biblio::Organisation;
+
+use base qw(Bio::Root::Root);
+
+# -----------------------------------------------------------------------------
+sub new {
+    my ($caller, @args) = @_;
+    my $class = ref ($caller) || $caller;
+
+    # object creation and blessing    
+    my ($self) = $class->SUPER::new (@args);	
+    
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key', and making keys lowercase
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/^-/_/;
+	$self->{ lc $new_key } = $param { $key };
+    }
+
+    # done
+    return $self;
+}
+
+# ---------------------------------------------------------------------
+#
+#   Here is the core...
+#
+# ---------------------------------------------------------------------
+
+sub _load_instance {
+    my ($self, $source) = @_;
+
+    #
+    # MEDLINE has only JournalArticles and BookArticles
+    # but we may create a general Ref if there is no attribute 'article'
+    #
+    my $result;
+    my $article = $$source{'article'};
+    if (defined $article) {
+	if (defined $$article{'journal'}) {
+	    $result = $self->_new_instance ('Bio::Biblio::MedlineJournalArticle');
+	    $result->type ('JournalArticle');
+	} elsif (defined $$article{'book'}) {
+	    $result = $self->_new_instance ('Bio::Biblio::MedlineBookArticle');
+	    $result->type ('BookArticle');
+	} else {
+	    $result->type ('MedlineArticle');
+	}
+    }
+    $result = $self->_new_instance ('Bio::Biblio::Ref') unless defined $result;
+    return $result;
+}
+
+sub convert {
+   my ($self, $source) = @_;
+   my $result = $self->_load_instance ($source);
+
+   if (defined $result->type) {
+       if ($result->type eq 'JournalArticle') {
+	   &_convert_journal_article ($result, $source);
+       } elsif ($result->type eq 'BookArticle') {
+	   &_convert_book_article ($result, $source);
+       } elsif ($result->type eq 'Article') {
+	   &_convert_article ($result, $source);
+       }
+   }
+
+   #
+   # now do the attributes which are the same for all resource types
+   #
+
+   # ...identification is now by MedlineID but the trend is to replace
+   # it by PMID (I have heard) theefore we keep both also separately
+   # from the 'identifier'
+   if (defined $$source{'medlineID'}) {
+       $result->identifier ($$source{'medlineID'});
+   } else {
+       $result->identifier ($$source{'PMID'});
+   }
+   $result->pmid ($$source{'PMID'}) if defined $$source{'PMID'};
+   $result->medline_id ($$source{'medlineID'}) if defined $$source{'medlineID'};
+
+   # ...few others
+   $result->citation_owner ($$source{'owner'}) if defined $$source{'owner'};
+   $result->status ($$source{'status'}) if defined $$source{'status'};
+   $result->number_of_references ($$source{'numberOfReferences'}) if defined $$source{'numberOfReferences'};
+
+   # ...entry status of the citation in the repository
+   my $date;
+   if (defined $$source{'dateRevised'}) {
+       $result->last_modified_date (&_convert_date ($$source{'dateRevised'}));
+       $date = &_convert_date ($$source{'dateCreated'});
+       $result->date_created ($date) if defined $date;
+       $date = &_convert_date ($$source{'dateCompleted'});
+       $result->date_completed ($date) if defined $date;
+   } elsif (defined $$source{'dateCompleted'}) {
+       $result->last_modified_date (&_convert_date ($$source{'dateCompleted'}));
+       $date = &_convert_date ($$source{'dateCreated'});
+       $result->date_created ($date) if defined $date;
+   } elsif (defined $$source{'dateCreated'}) {
+       $result->last_modified_date (&_convert_date ($$source{'dateCreated'}));
+   }
+
+   # ...put citation subsets in a comma-separated string
+   if (defined $$source{'citationSubsets'}) {
+       $result->repository_subset (join (',', @{ $$source{'citationSubsets'} }));
+   }
+
+   # ...MEDLINE's Comments & Corrections will be arrays of hashes
+   if (defined $$source{'commentsCorrections'}) {
+       my $corr = $$source{'commentsCorrections'};
+       $result->comment_ons ($$corr{'commentOns'}) if defined $$corr{'commentOns'};
+       $result->comment_ins ($$corr{'commentIns'}) if defined $$corr{'commentIns'};
+       $result->erratum_ins ($$corr{'erratumIns'}) if defined $$corr{'erratumIns'};
+       $result->erratum_fors ($$corr{'erratumFors'}) if defined $$corr{'erratumFors'};
+       $result->original_report_ins ($$corr{'originalReportIns'}) if defined $$corr{'originalReportIns'};
+       $result->republished_froms ($$corr{'republishedFroms'}) if defined $$corr{'republishedFroms'};
+       $result->republished_ins ($$corr{'republishedIns'}) if defined $$corr{'republishedIns'};
+       $result->retraction_ofs ($$corr{'retractionOfs'}) if defined $$corr{'retractionOfs'};
+       $result->retraction_ins ($$corr{'retractionIns'}) if defined $$corr{'retractionIns'};
+       $result->summary_for_patients_ins ($$corr{'summaryForPatientsIns'}) if defined $$corr{'summaryForPatientsIns'};
+       $result->update_ins ($$corr{'updateIns'}) if defined $$corr{'updateIns'};
+       $result->update_ofs ($$corr{'updateOfs'}) if defined $$corr{'updateOfs'};
+   }
+
+   # ...MEDLINE's GeneSymbols are put in a comma-separated string
+   if (defined $$source{'geneSymbols'}) {
+       $result->gene_symbols (join (',', @{ $$source{'geneSymbols'} }));
+   }
+
+   # ...MEDLINE's GeneralNotes into an array of hashtables, each one
+   # having keys for the 'owner' and the 'note'
+   $result->general_notes ($$source{'generalNotes'}) if defined $$source{'generalNotes'};
+
+   # ...MEDLINE's PersonalNameSubjects into contributors (TBD: is that correct?)
+   if (defined $$source{'personalNameSubjects'}) {
+       my @contributors;
+       foreach my $person ( @{ $$source{'personalNameSubjects'} } ) {
+	   push (@contributors, &_convert_personal_name ($person));
+       }
+       $result->contributors (\@contributors);
+   }
+
+   # ...MEDLINE's OtherAbstract into an array of hashtables, each one
+   # having keys for the 'type', 'AbstractText' and the 'copyright'
+   $result->other_abstracts ($$source{'otherAbstracts'}) if defined $$source{'otherAbstracts'};
+#   if (defined $$source{'otherAbstracts'}) {
+#	my @other_abstracts = ();
+#	foreach my $oa ( @{ $$source{'otherAbstracts'} } ) {
+#	    if (defined $$oa{'abstractText'}) {
+#		my $abstract = $$oa{'abstractText'};
+#		delete $$oa{'abstractText'};
+#		$$oa{'abstract'} = $$abstract{'abstractText'};
+#		$$oa{'rights'} = $$abstract{'copyrightInformation'} if defined $$abstract{'copyrightInformation'};
+#		push (@other_abstracts, $oa);
+#	    }
+#	}
+#	$result->other_abstracts (\@other_abstracts);
+#    }
+
+   # ...MEDLINE's OtherIDs into an array of hashtables, each one
+   # having keys for the 'id', and 'source'
+   $result->other_ids ($$source{'otherIDs'}) if defined $$source{'otherIDs'};
+
+   # ...MEDLINE's Chemicals - store them as an array of hashtables
+   # (each one for each Chemical)
+   $result->chemicals ($$source{'chemicals'}) if defined $$source{'chemicals'};
+
+   # MeshHeadings are put on two places:
+   # - a complete information in a property called "MeshHeadings", and
+   # - only descriptors in the hashtable "subject_headings", together
+   #   with the word "MeSH" in "subject_headings_source"
+   if (defined $$source{'meshHeadings'}) {
+       $result->mesh_headings ($$source{'meshHeadings'});
+       my %subject_headings;
+       foreach my $mesh ( @{ $$source{'meshHeadings'} } ) {
+	   $subject_headings{ $$mesh{'descriptorName'} } = 1 if defined $$mesh{'descriptorName'};
+       }
+       if (%subject_headings) {
+	   $result->subject_headings (\%subject_headings);
+	   $result->subject_headings_source ('Mesh');
+       }
+   }
+
+   # ...MEDLINE's keyword lists are merger all together (this may not
+   # be good idea - but again the keywords are better accessible
+   # -TBD?)
+   if (defined $$source{'keywordLists'}) {
+       my %keywords;
+       foreach my $keywords ( @{ $$source{'keywordLists'} } ) {
+	   if ($$keywords{'keywords'}) {
+	       foreach my $keyword ( @{ $$keywords{'keywords'} } ) {
+		   $keywords{$keyword} = 1;
+	       }
+	   }
+       }
+       $result->keywords (\%keywords) if %keywords;
+   }
+
+   # Done!
+   return $result;
+}
+
+# load a module (given as a real module name, e.g. 'Bio::Biblio::MedlineJournalArticle'),
+# call new() method on it, and return the instance returned by the new() method
+sub _new_instance {
+    my ($self, $module) = @_;
+    my ($filename);
+    ($filename = $module . '.pm') =~ s|\:\:|/|g;
+    eval { require $filename; };
+    $self->throw ("Loading error when trying '$filename'. $@\n") if $@;
+    return $module->new;
+}
+
+#
+# see OpenBQS specification (http://www.ebi.ac.uk/~senger/openbqs/) how
+# a date should be coded;
+# TBD: this can be improved - checking is missing, timezones,
+#      converting to UTC...
+# Also note that this routine does not convert 'medline_date' - it
+# is stored in a separate attribute without ant conversion.
+#
+sub _convert_date {
+    my ($date) = @_;
+    return undef unless
+	exists $$date{'year'} or
+	    exists $$date{'month'} or
+		exists $$date{'day'} or
+		    exists $$date{'hour'} or
+			exists $$date{'minute'} or
+			    exists $$date{'second'};
+
+
+    my $converted = (exists $$date{'year'} ? $$date{'year'} : '0000');
+
+    if (exists $$date{'month'}) {
+	$converted .= '-' . $$date{'month'};
+    } elsif (exists $$date{'day'}) {
+	$converted .= '-00';
+    }
+
+    if (exists $$date{'day'}) {
+	$converted .= '-' . $$date{'day'};
+    } elsif (exists $$date{'hour'}) {
+	$converted .= '-00';
+    }
+
+    if (exists $$date{'hour'}) {
+	$converted .= 'T' . $$date{'hour'} .
+	    ':' . (exists $$date{'minute'} ? $$date{'minute'} : '00') .
+		':' . (exists $$date{'second'} ? $$date{'second'} : '00') . 'Z';
+    }
+    return $converted;
+}
+
+# $person is a hash with persons attributes - we need to create and
+# return a Bio::Biblio::Person object
+sub _convert_personal_name {
+    my ($person) = @_;
+    foreach my $key (keys %$person) {
+	$$person{"_$key"} = $$person{$key};
+	delete $$person{$key};
+    }
+    new Bio::Biblio::Person (%$person);
+}
+
+#
+# takes journal article related attributes from $article and convert
+# them into $result and at the end call _convert_article (which is
+# shared with book article)
+#
+sub _convert_journal_article {
+    my ($result, $source) = @_;
+    my $article = $$source{'article'};
+
+    # create and populate both a Journal and a resulting Article objects
+    my $from_journal = $$article{'journal'};
+    my $journal = new Bio::Biblio::MedlineJournal;
+    $journal->name ($$from_journal{'title'}) if defined $$from_journal{'title'};
+    $journal->issn ($$from_journal{'iSSN'}) if defined $$from_journal{'iSSN'};
+    $journal->abbreviation ($$from_journal{'iSOAbbreviation'}) if defined $$from_journal{'iSOAbbreviation'};
+    $journal->coden ($$from_journal{'coden'}) if defined $$from_journal{'coden'};
+    if (defined $$from_journal{'journalIssue'}) {
+	my $issue = $$from_journal{'journalIssue'};
+	$result->volume ($$issue{'volume'}) if defined $$issue{'volume'};
+	$result->issue ($$issue{'issue'}) if defined $$issue{'issue'};
+
+	if (defined $$issue{'pubDate'}) {
+	    my $pub_date = $$issue{'pubDate'};
+	    my $converted = &_convert_date ($pub_date);
+	    $result->date ($converted) if defined $converted;
+
+	    # Some parts of a MEDLINE date are stored just as properties
+	    # because they have almost non-parseable format :-).
+	    $result->medline_date ($$pub_date{'medlineDate'}) if defined $$pub_date{'medlineDate'};
+	    $result->season ($$pub_date{'season'}) if defined $$pub_date{'season'};
+	}
+    }
+
+    # ...some attributes are in journalInfo (which is outside of the article)
+    my $journal_info = $$source{'journalInfo'};
+    if (defined $journal_info) {
+	$journal->country ($$journal_info{'country'}) if defined $$journal_info{'country'};
+	$journal->medline_ta ($$journal_info{'medlineTA'}) if defined $$journal_info{'medlineTA'};
+	$journal->medline_code ($$journal_info{'medlineCode'}) if defined $$journal_info{'medlineCode'};
+	$journal->nlm_unique_id ($$journal_info{'nlmUniqueID'}) if defined $$journal_info{'nlmUniqueID'};
+    }
+
+    $result->journal ($journal);
+    &_convert_article ($result, $source);
+}
+
+#
+# takes book article related attributes from $article and convert
+# them into $result and at the end call _convert_article (which is
+# shared with journal article)
+#
+sub _convert_book_article {
+    my ($result, $source) = @_;
+    my $article = $$source{'article'};
+
+    # create and populate both book and resulting article objects
+    my $from_book = $$article{'book'};
+    my $book = new Bio::Biblio::MedlineBook;
+    $book->title ($$from_book{'title'}) if defined $$from_book{'title'};
+    $book->volume ($$from_book{'volume'}) if defined $$from_book{'volume'};
+    $book->series ($$from_book{'collectionTitle'}) if defined $$from_book{'collectionTitle'};
+
+    if (defined $$from_book{'pubDate'}) {
+	my $pub_date = $$from_book{'pubDate'};
+	my $converted = &_convert_date ($pub_date);
+	$result->pub_date ($converted) if defined $converted;
+
+	# Some parts of a MEDLINE date are stored just as properties
+	# because they have almost non-parseable format :-).
+	$result->medline_date ($$pub_date{'medlineDate'}) if defined $$pub_date{'medlineDate'};
+	$result->season ($$pub_date{'season'}) if defined $$pub_date{'season'};
+    }
+
+    if (defined $$from_book{'publisher'}) {
+	my $publisher = new Bio::Biblio::Organisation;
+	$publisher->name ($$from_book{'publisher'});
+        $book->publisher ($publisher);
+    }
+
+    my @authors = &_convert_providers ($$from_book{'authors'});
+    $book->authors (\@authors) if @authors;
+
+    $result->book ($book);
+    &_convert_article ($result, $source);
+}
+
+#
+# takes from $source article related attributes and convert them into
+# $article (these attributes are the same both for journal and book
+# articles
+#
+sub _convert_article {
+    my ($article, $source) = @_;
+    my $from_article = $$source{'article'};
+
+    $article->title ($$from_article{'articleTitle'}) if defined $$from_article{'articleTitle'};
+    $article->affiliation ($$from_article{'affiliation'}) if defined $$from_article{'affiliation'};
+    $article->vernacular_title ($$from_article{'vernacularTitle'}) if defined $$from_article{'vernacularTitle'};
+    $article->date_of_electronic_publication
+	($$from_article{'dateOfElectronicPublication'}) if defined $$from_article{'dateOfElectronicPublication'};
+
+    if (defined $$from_article{'pagination'}) {
+	my $pagination = $$from_article{'pagination'};
+	$article->first_page ($$pagination{'startPage'}) if defined $$pagination{'startPage'};
+	$article->last_page ($$pagination{'endPage'}) if defined $$pagination{'endPage'};
+	$article->medline_page ($$pagination{'medlinePgn'}) if defined $$pagination{'medlinePgn'};
+    }
+
+    if (defined $$from_article{'abstract'}) {
+	my $abstract = $$from_article{'abstract'};
+	$article->abstract ($$abstract{'abstractText'}) if defined $$abstract{'abstractText'};
+	$article->abstract_type ('text/plain');
+	$article->rights ($$abstract{'copyrightInformation'}) if defined $$abstract{'copyrightInformation'};
+    }
+
+    if (defined $$from_article{'languages'}) {
+	my $languages = $$from_article{'languages'};  # ref-array
+	if ( @{ $languages } > 0) {
+	    $article->language ( $$languages[0] );
+	}
+	if ( @{ $languages } > 1) {
+	    $article->other_languages (join (',', @{ $languages }));
+	}
+    }
+
+    my @authors = &_convert_providers ($$from_article{'authors'});
+    if (@authors) {
+	$article->authors (\@authors);
+	$article->author_list_complete
+	    ($$from_article{'authorListComplete'}) if defined $$from_article{'authorListComplete'};
+    }
+
+    # references to database entries are prefixed with database name
+    # (separated by a slash)
+    use Bio::Annotation::DBLink;
+    if (defined $$from_article{'dataBanks'}) {
+	my $databanks = $$from_article{'dataBanks'};  # a ref-array
+	my @references;
+	foreach my $bank ( @{ $databanks } ) {
+	    my $db_name = $$bank{'dataBankName'};
+	    if (defined $$bank{'accessionNumbers'}) {
+		foreach my $accn ( @{ $$bank{'accessionNumbers'} } ) {
+		    my $dblink = new Bio::Annotation::DBLink (-primary_id => $accn);
+		    $dblink->database ($db_name);   # it does not matter if it is undef
+		    push (@references, $dblink);
+		}
+	    }
+	}
+	if (@references) {
+	    $article->cross_references (\@references);
+	    $article->cross_references_list_complete
+		($$from_article{'dataBankListComplete'}) if defined $$from_article{'dataBankListComplete'};
+	}
+    }
+
+    # grants are stored in an array of hashtables (each of the
+    # hashtables has keys agency, grantID and acronym)
+    $article->grants ($$from_article{'grants'}) if defined $$from_article{'grants'};
+    $article->grant_list_complete
+	    ($$from_article{'grantListComplete'}) if defined $$from_article{'grandListComplete'};
+
+}
+
+#
+# takes a ref-array of providers - they can be persons or
+# organisations, and returns an array of converted providers
+#
+sub _convert_providers {
+    my ($providers) = @_;
+    return () unless defined $providers;
+
+    my @results;
+    foreach my $provider ( @{ $providers } ) {
+	if (defined $$provider{'personalName'}) {
+	    my $converted = &_convert_personal_name ($$provider{'personalName'});
+	    push (@results, $converted) if defined $converted;
+	} elsif (defined $$provider{'collectiveName'}) {
+	    push (@results, new Bio::Biblio::Organisation (-name => $$provider{'collectiveName'}));
+	} else {
+            new Bio::Biblio::Provider;
+	}
+    }
+    return () unless @results;
+    return @results;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medlinexml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medlinexml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/medlinexml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,735 @@
+# $Id: medlinexml.pm,v 1.9.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module Bio::Biblio::IO::medlinexml.pm
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO::medlinexml - A converter of XML files with MEDLINE citations
+
+=head1 SYNOPSIS
+
+Do not use this object directly, it is recommended to access it and use
+it through the I<Bio::Biblio::IO> module:
+
+  use Bio::Biblio::IO;
+  my $io = new Bio::Biblio::IO (-format => 'medlinexml');
+
+=head1 DESCRIPTION
+
+This object reads bibliographic citations in XML/MEDLINE format and
+converts them into I<Bio::Biblio::RefI> objects. It is an
+implementation of methods defined in I<Bio::Biblio::IO>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::Biblio::IO>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::IO::medlinexml;
+use vars qw(@Citations $Callback $Convert @ObjectStack @PCDataStack);
+use vars qw(%PCDATA_NAMES %SIMPLE_TREATMENT %POP_DATA_AND_PEEK_OBJ %POP_OBJ_AND_PEEK_OBJ);
+use vars qw(%POP_AND_ADD_ELEMENT %POP_AND_ADD_DATA_ELEMENT);
+
+use strict;
+
+use XML::Parser;
+
+use base qw(Bio::Biblio::IO);
+
+# -----------------------------------------------------------------------------
+
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key', and making keys lowercase
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/^-/_/;
+	$self->{ lc $new_key } = $param { $key };
+    }
+
+    # find the format for output - and put it into a global $Convert
+    # because it will be used by the event handler who knows nothing
+    # about this object
+    my $result = $self->{'_result'} || 'medline2ref';
+    $result = "\L$result";	# normalize capitalization to lower case
+
+    # a special case is 'raw' when no converting module is loaded
+    # and citations will be returned as a hashtable (the one which
+    # is created during parsing XML file/stream)
+    unless ($result eq 'raw') {
+
+	# load module with output converter - as defined in $result
+	if (defined &Bio::Biblio::IO::_load_format_module ($result)) {
+	    $Convert = "Bio::Biblio::IO::$result"->new (@args);
+	}
+    }
+
+    # create an instance of the XML parser
+    # (unless it is already there...)
+    $self->{'_xml_parser'} = new XML::Parser (Handlers => {Init  => \&handle_doc_start,
+							   Start => \&handle_start,
+							   End   => \&handle_end,
+							   Char  => \&handle_char,
+							   Final => \&handle_doc_end})
+	unless $self->{'_xml_parser'};
+
+    # if there is an argument '-callback' then start parsing at once -
+    # the registered event handlers will use 'callback' to report
+    # back after each citation
+    #
+    # we need to remember this situation also in a global variable
+    # because the event handler subroutines know nothing about this
+    # object (unfortunately)
+    if ($Callback = $self->{'_callback'}) {
+	$self->_parse;
+    }
+}
+
+# -----------------------------------------------------------------------------
+
+sub _parse {
+    my ($self) = shift;
+
+
+    if (defined $self->{'_file'}) {
+	$self->{'_xml_parser'}->parsefile ($self->{'_file'});
+    } elsif (defined $self->{'_fh'}) {
+	my $fh = $self->{'_fh'};
+	if (ref ($fh) and UNIVERSAL::isa ($fh, 'IO::Handler')) {
+	    $self->{'_xml_parser'}->parse ($fh);
+	} else {
+	    my $data;
+	    $data .= $_ while <$fh>;
+	    $self->{'_xml_parser'}->parse ($data);
+	}
+    } elsif ($self->{'_data'}) {
+	$self->{'_xml_parser'}->parse ($self->{'_data'});
+    } else {
+	$self->throw ("XML source to be parsed is unknown. Should be given in the new().");
+    }
+
+    # when parsing is done all citations have already been delivered
+    # to the caller using her callbacks - and nothing to be stored
+    # here, or parser put all citations into global @Cittaions where
+    # we want to copy there into this instance - so any caller can
+    # start parsing other XML input without overwriting already read
+    # citations from the first parser
+    if (@Citations) {
+	$self->{'_citations'} = [];
+	foreach my $cit (@Citations) {
+	    push (@{ $self->{'_citations'} }, $cit);
+	    undef $cit;
+	}
+	undef @Citations;
+    }
+}
+
+# ---------------------------------------------------------------------
+#
+#   Here is an implementation of Bio::Biblio::IO methods
+#
+# ---------------------------------------------------------------------
+
+# global variables used by the XML event handlers
+# TBD: make them accessible at least ONLY from this module...
+ at Citations = ();
+$Callback = undef;
+$Convert = undef;
+ at ObjectStack = ();   # it has Hash-Ref elements
+ at PCDataStack = ();   # it has String elements
+
+sub next_bibref {
+   my ($self) = @_;
+   $self->throw ("Method 'next_bibref' should not be called when a '-callback' argument given.")
+       if $self->{'_callback'};
+
+   # parse the whole input into memory (global @Citations)
+   # and then copy it into this object
+   $self->_parse unless $self->{'_citations'};
+
+   # return the next citation (and forget it here)
+   shift (@{ $self->{'_citations'} });
+}
+
+# ---------------------------------------------------------------------
+#
+#   Here are the event handlers (they do the real job!)
+#
+# Note that these methods do not know anything about the object they
+# are part of - they are called as subroutines. not as methods.
+# It also means that they need to use global variables to store and
+# exchnage intermediate results.
+#
+# ---------------------------------------------------------------------
+
+#
+# This is a list of #PCDATA elements.
+#
+%PCDATA_NAMES = (
+		 'AbstractText' => 1,
+		 'AccessionNumber' => 1,
+		 'Acronym' => 1,
+		 'Affiliation' => 1,
+		 'Agency' => 1,
+		 'ArticleTitle' => 1,
+		 'CASRegistryNumber' => 1,
+		 'CitationSubset' => 1,
+		 'Coden' => 1,
+		 'CollectionTitle' => 1,
+		 'CollectiveName' => 1,
+		 'CopyrightInformation' => 1,
+		 'Country' => 1,
+		 'DataBankName' => 1,
+		 'DateOfElectronicPublication' => 1,
+		 'Day' => 1,
+		 'Descriptor' => 1,
+		 'DescriptorName' => 1,
+		 'EndPage' => 1,
+		 'FirstName' => 1,
+		 'ForeName' => 1,
+		 'GeneralNote' => 1,
+		 'GeneSymbol' => 1,
+		 'GrantID' => 1,
+		 'Hour' => 1,
+		 'ISOAbbreviation' => 1,
+		 'ISSN' => 1,
+		 'Initials' => 1,
+		 'Issue' => 1,
+		 'Keyword' => 1,
+		 'Language' => 1,
+		 'LastName' => 1,
+		 'MedlineCode' => 1,
+		 'MedlineDate' => 1,
+		 'MedlineID' => 1,
+		 'MedlinePgn' => 1,
+		 'MedlineTA' => 1,
+		 'MiddleName' => 1,
+		 'Minute' => 1,
+		 'Month' => 1,
+		 'NameOfSubstance' => 1,
+		 'NlmUniqueID' => 1,
+		 'Note' => 1,
+		 'NumberOfReferences' => 1,
+		 'OtherID' => 1,
+		 'PMID' => 1,
+		 'PublicationType' => 1,
+		 'Publisher' => 1,
+		 'QualifierName' => 1,
+		 'RefSource' => 1,
+		 'RegistryNumber' => 1,
+		 'Season' => 1,
+		 'Second' => 1,
+		 'SpaceFlightMission' => 1,
+		 'StartPage' => 1,
+		 'SubHeading' => 1,
+		 'Suffix' => 1,
+		 'Title' => 1,
+		 'VernacularTitle' => 1,
+		 'Volume' => 1,
+		 'Year' => 1,
+		 );
+
+%SIMPLE_TREATMENT = (
+		     'MeshHeading' => 1,
+		     'Author' => 1,
+		     'Article' => 1,
+		     'Book' => 1,
+		     'Investigator' => 1,
+		     'Chemical' => 1,
+		     'Pagination' => 1,
+		     'MedlineJournalInfo' => 1,
+		     'JournalIssue' => 1,
+		     'Journal' => 1,
+		     'DateCreated' => 1,
+		     'DateCompleted' => 1,
+		     'DateRevised' => 1,
+		     'PubDate' => 1,
+		     'Abstract' => 1,
+		     'Grant' => 1,
+		     'CommentsCorrections' => 1,
+		     'CommentOn' => 1,
+		     'CommentIn' => 1,
+		     'ErratumFor' => 1,
+		     'ErratumIn' => 1,
+		     'OriginalReportIn' => 1,
+		     'RepublishedFrom' => 1,
+		     'RepublishedIn' => 1,
+		     'RetractionOf' => 1,
+		     'RetractionIn' => 1,
+		     'SummaryForPatientsIn' => 1,
+		     'UpdateIn' => 1,
+		     'UpdateOf' => 1,
+		     'DataBank' => 1,
+		     'KeywordList' => 1,
+		     'DeleteCitation' => 1,
+		     );
+
+%POP_DATA_AND_PEEK_OBJ = (
+			  'Descriptor' => 1,
+			  'DescriptorName' => 1,
+			  'Year' => 1,
+			  'Month' => 1,
+			  'Day' => 1,
+			  'LastName' => 1,
+			  'Initials' => 1,
+			  'FirstName' => 1,
+			  'ForeName' => 1,
+			  'NameOfSubstance' => 1,
+			  'RegistryNumber' => 1,
+			  'CASRegistryNumber' => 1,
+			  'MiddleName' => 1,
+			  'NlmUniqueID' => 1,
+			  'MedlineTA' => 1,
+			  'MedlinePgn' => 1,
+			  'MedlineCode' => 1,
+			  'Country' => 1,
+			  'ISSN' => 1,
+			  'ArticleTitle' => 1,
+			  'Issue' => 1,
+			  'AbstractText' => 1,
+			  'VernacularTitle' => 1,
+			  'GrantID' => 1,
+			  'Agency' => 1,
+			  'Acronym' => 1,
+			  'MedlineDate' => 1,
+			  'NumberOfReferences' => 1,
+			  'RefSource' => 1,
+			  'DataBankName' => 1,
+			  'CopyrightInformation' => 1,
+			  'Suffix' => 1,
+			  'Note' => 1,
+			  'CollectiveName' => 1,
+			  'Hour' => 1,
+			  'Minute' => 1,
+			  'Second' => 1,
+			  'Season' => 1,
+			  'Coden' => 1,
+			  'ISOAbbreviation' => 1,
+			  'Publisher' => 1,
+			  'CollectionTitle' => 1,
+			  'DateOfElectronicPublication' => 1,
+			  'StartPage' => 1,
+			  'EndPage' => 1,
+			  'Volume' => 1,
+			  'Title' => 1,
+			  );
+
+%POP_OBJ_AND_PEEK_OBJ = (
+			 'Pagination' => 1,
+			 'JournalIssue' => 1,
+			 'Journal' => 1,
+			 'DateCreated' => 1,
+			 'Article' => 1,
+			 'DateCompleted' => 1,
+			 'DateRevised' => 1,
+			 'CommentsCorrections' => 1,
+			 'Book' => 1,
+			 'PubDate' => 1,
+			 'Abstract' => 1,
+			 );
+
+%POP_AND_ADD_DATA_ELEMENT = (
+			     'Keyword' => 'keywords',
+			     'PublicationType' => 'publicationTypes',
+			     'CitationSubset' => 'citationSubsets',
+			     'Language' => 'languages',
+			     'AccessionNumber' => 'accessionNumbers',
+			     'GeneSymbol' => 'geneSymbols',
+			     'SpaceFlightMission' => 'spaceFlightMissions',
+			     );
+
+
+%POP_AND_ADD_ELEMENT = (
+			'OtherAbstract' => 'otherAbstracts',
+			'Chemical' => 'chemicals',
+			'KeywordList' => 'keywordLists',
+			'Grant' => 'grants',
+			'UpdateIn' => 'updateIns',
+			'CommentOn' => 'commentOns',
+			'CommentIn' => 'commentIns',
+			'DataBank' => 'dataBanks',
+			'PersonalNameSubject' => 'personalNameSubjects',
+			'ErratumFor' => 'erratumFors',
+			'ErratumIn' => 'erratumIns',
+			'RepublishedFrom' => 'republishedFroms',
+			'RepublishedIn' => 'republishedIns',
+			'RetractionOf' => 'retractionOfs',
+			'RetractionIn' => 'retractionIns',
+			'UpdateOf' => 'updateOfs',
+			'OriginalReportIn' => 'originalReportIns',
+			'SummaryForPatientsIn' => 'summaryForPatientsIns',
+			'MeshHeading' => 'meshHeadings',
+			);
+
+sub handle_doc_start {
+    @Citations = ();
+    @ObjectStack = ();
+    @PCDataStack = ();
+}
+
+sub handle_doc_end {
+    undef @ObjectStack;
+    undef @PCDataStack;
+}
+
+sub handle_char {
+    my ($expat, $str) = @_;
+
+    # this may happen with whitespaces between tags;
+    # but because I have not created an entry for data on the stack
+    # I can also ignore such data, can't I
+    return if $#PCDataStack < 0;
+
+    $PCDataStack [$#PCDataStack] .= $str;
+}
+
+
+
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::Biblio::IO::medlinexml::VERSION;
+           print $Bio::Biblio::IO::medlinexml::Revision;
+
+=cut
+
+
+sub handle_start {
+    my ($expat, $e, %attrs) = @_; 
+#    &_debug_object_stack ("START", $e);
+
+    #
+    # The #PCDATA elements which have an attribute list must
+    # be first here - because for them I create entries both on
+    # the @PCDataStack _and_ on @ObjectStack.
+    #
+    if ($e eq 'QualifierName' or
+	$e eq 'SubHeading') {
+	my %p = ();
+	$p{'majorTopic'} = "Y" if $attrs{'MajorTopicYN'};
+	push (@ObjectStack, \%p);
+    }
+
+    if ($e eq 'GeneralNote') {
+	my %p = ();
+	$p{'owner'} = $attrs{'Owner'} if $attrs{'Owner'};
+	push (@ObjectStack, \%p);
+    }
+
+    if ($e eq 'OtherID') {
+	my %p = ();
+	$p{'source'} = $attrs{'Source'};
+	push (@ObjectStack, \%p);
+    }
+
+    #
+    # A special treatment is for attributes for personal name.
+    # Because there is no XML element 'PersonalName' I need to
+    # to put yet another object on @ObjectStack unless there is
+    # already one.
+    #
+    if ($e eq 'LastName' or
+	$e eq 'FirstName' or
+	$e eq 'MidleName' or
+	$e eq 'Initials' or
+	$e eq 'ForeName' or
+	$e eq 'Suffix') {
+	my $peek = $ObjectStack[$#ObjectStack];
+	push (@ObjectStack, {'type' => 'PersonalName'})
+	    unless (ref $peek and &_eq_hash_elem ($peek, 'type', 'PersonalName'));
+    }
+
+    #
+    # Then we have #PCDATA elements without an attribute list.
+    # For them I create an entry on @PCDataStack.
+    #
+    if (exists $PCDATA_NAMES{$e}) {
+	push (@PCDataStack, '');
+
+    #
+    # And finally, all non-PCDATA elements go to the objectStack
+    #
+    } elsif (exists $SIMPLE_TREATMENT{$e}) {
+	push (@ObjectStack, {});
+
+    } elsif ($e eq 'PersonalNameSubject') {
+	push (@ObjectStack, {'type' => 'PersonalName'});
+
+    } elsif ($e eq 'DescriptorName' or
+	     $e eq 'Descriptor') {
+	if (&_eq_hash_elem (\%attrs, 'MajorTopicYN', "Y")) {
+	    my $peek = $ObjectStack[$#ObjectStack];
+	    $$peek{'descriptorMajorTopic'} = "Y";
+	}
+	    
+    } elsif ($e eq 'MedlineCitation' ||
+	     $e eq 'NCBIArticle') {
+	my %p = ( 'type' => 'MedlineCitation' );
+	$p{'owner'} = $attrs{'Owner'} if $attrs{'Owner'};
+	$p{'status'} = $attrs{'Status'} if $attrs{'Status'};
+	push (@ObjectStack, \%p);
+
+    } elsif ($e eq 'GrantList') {
+	if (&_eq_hash_elem (\%attrs, 'CompleteYN', "N")) {
+	    my $peek = $ObjectStack[$#ObjectStack];
+	    $$peek{'grantListComplete'} = "N";
+	}
+
+    } elsif ($e eq 'DataBankList') {
+	if (&_eq_hash_elem (\%attrs, 'CompleteYN', "N")) {
+	    my $peek = $ObjectStack[$#ObjectStack];
+	    $$peek{'dataBankListComplete'} = "N";
+	}
+
+    } elsif ($e eq 'AuthorList') {
+	if (&_eq_hash_elem (\%attrs, 'CompleteYN', "N")) {
+	    my $peek = $ObjectStack[$#ObjectStack];
+	    $$peek{'authorListComplete'} = "N";
+	}
+
+    } elsif ($e eq 'OtherAbstract') {
+	my %p = ();
+	$p{'type'} = $attrs{'Type'} if $attrs{'Type'};
+	push (@ObjectStack, \%p);
+#	push (@ObjectStack, { 'type' => 'Abstract' });
+	      
+    }
+}
+
+sub handle_end {
+    my ($expat, $e) = @_;
+    #
+    # First I have to deal with those elements which are both PCDATA
+    # (and therefore they are on the pcdataStack) and which have an
+    # attribute list (therefore they are also known as a separate
+    # p-object on the objectStack.
+    #
+    if ($e eq 'QualifierName' or
+	$e eq 'SubHeading') {
+	my $p = pop @ObjectStack;   # pSubHeading
+        $$p{'subHeading'} = pop @PCDataStack;
+	&_add_element ('subHeadings', $p);  # adding to pMeshHeadings
+#	&_debug_object_stack ("END", $e);
+	return;
+
+    } elsif ($e eq 'GeneralNote') {
+	my $p = pop @ObjectStack;  # pGeneralNote
+        $$p{'generalNote'} = pop @PCDataStack;
+	&_add_element ('generalNotes', $p);  # adding to pMedlineCitation
+#	&_debug_object_stack ("END", $e);
+	return;
+
+    } elsif ($e eq 'OtherID') {
+	my $p = pop @ObjectStack;  # pOtherID
+        $$p{'otherID'} = pop @PCDataStack;
+	&_add_element ('otherIDs', $p);  # adding to pMedlineCitation
+#	&_debug_object_stack ("END", $e);
+	return;
+    }
+
+    #
+    # both object and pcdata stacks elements mixed here together
+    # (the element names appear in the order of frequency in the
+    # medline data set)
+    #
+
+    if (exists $POP_DATA_AND_PEEK_OBJ{$e}) {
+	&_data2obj ("\l$e");
+
+    } elsif (exists $POP_OBJ_AND_PEEK_OBJ{$e}) {
+	&_obj2obj ("\l$e");
+
+    } elsif (exists $POP_AND_ADD_ELEMENT{$e}) {
+	&_add_element ($POP_AND_ADD_ELEMENT{$e}, pop @ObjectStack);
+
+    } elsif (exists $POP_AND_ADD_DATA_ELEMENT{$e}) {
+	&_add_element ($POP_AND_ADD_DATA_ELEMENT{$e});
+
+    } elsif ($e eq 'Author' or
+	     $e eq 'Investigator') {
+	my $pAuthor;
+	my $p = pop @ObjectStack;  # pPersonalName or pAuthor
+	if (&_eq_hash_elem ($p, 'type', 'PersonalName')) {
+	    $pAuthor = pop @ObjectStack;
+	    $$pAuthor{'personalName'} = $p;
+	} else {
+	    $pAuthor = $p;
+	}
+	my $peek = $ObjectStack[$#ObjectStack];   # pMedlineCitation, pArticle or pBook
+	if (&_eq_hash_elem ($peek, 'type', 'MedlineCitation')) {
+	    &_add_element ('investigators', $pAuthor);
+	} else {
+	    &_add_element ('authors', $pAuthor);
+	}
+
+    } elsif ($e eq 'MedlineJournalInfo') {
+	&_obj2obj ('journalInfo');
+
+    } elsif ($e eq 'PMID') {
+	my $peek = $ObjectStack[$#ObjectStack];   # pMedlineCitation, pReference or pDeleteCitation
+	if (&_eq_hash_elem ($peek, 'type', 'DeleteCitation')) {
+	    &_add_element ('PMIDs');
+	} else {
+	    $$peek{'PMID'} = pop @PCDataStack;
+	}
+
+    } elsif ($e eq 'MedlineID') {
+	my $peek = $ObjectStack[$#ObjectStack];   # pMedlineCitation, pReference or pDeleteCitation
+	if (&_eq_hash_elem ($peek, 'type', 'DeleteCitation')) {
+	    &_add_element ('MedlineIDs');
+	} else {
+	    $$peek{'medlineID'} = pop @PCDataStack;
+	}
+
+#    } elsif ($e eq 'OtherAbstract') {
+#	my $pAbstract = pop @ObjectStack;
+#	my $pOtherAbstract = pop @ObjectStack;
+#	$$pOtherAbstract{'abstract'} = $pAbstract
+#	    &_add_element ('otherAbstracts', $pOtherAbstract);
+
+    } elsif ($e eq 'Affiliation') {
+	my $peek = $ObjectStack[$#ObjectStack];
+	if (&_eq_hash_elem ($peek, 'type', 'PersonalName')) {
+	    my $peek2 = $ObjectStack[$#ObjectStack - 1];
+	    $$peek2{'affiliation'} = pop @PCDataStack;
+	} else {
+	    $$peek{'affiliation'} = pop @PCDataStack;
+	}
+
+    } elsif ($e eq 'DeleteCitation') {
+	pop @ObjectStack;
+###	warn ("'DeleteCitation' tag found. Not known what to do with it.");   # silently ignored
+
+    } elsif ($e eq 'MedlineCitation') {
+
+	#
+	# Here we finally have the whole citation ready.
+	#
+	&_process_citation (pop @ObjectStack);
+
+    #
+    # ERROR: if we are here, there was an unexpected element
+    #
+    } elsif (exists $PCDATA_NAMES{$e}) {
+	pop @PCDataStack;
+	warn ("An unexpected element found: $e");
+    }
+#    &_debug_object_stack ("END", $e);
+
+}
+
+# what to do when we have the whole $citation ready
+sub _process_citation {
+    my ($citation) = @_;
+    $citation = $Convert->convert ($citation) if defined $Convert;
+
+    if ($Callback) {
+	&$Callback ($citation);
+    } else {
+	push (@Citations, $citation);
+    }
+}
+
+# add $element into an array named $key to the top object at @ObjectStack;
+# if $element is empty, take it from @PCDataStack
+sub _add_element {
+    my ($key, $element) = @_;
+    my $peek = $ObjectStack[$#ObjectStack];
+    $$peek{$key} = [] unless $$peek{$key};
+    push (@{ $$peek{$key} }, (defined $element ? $element : pop @PCDataStack));
+}
+
+# remove top of @PCDataStack and put it into top object at @ObjectStack under name $key
+sub _data2obj {
+    my ($key) = @_;
+    my $peek = $ObjectStack[$#ObjectStack];
+    $$peek{$key} = pop @PCDataStack;
+}
+
+# remove top of @ObjectStack and put it into now-top at @ObjectStack under name $key
+sub _obj2obj {
+    my ($key) = @_;
+    my $p = pop @ObjectStack;
+    my $peek = $ObjectStack[$#ObjectStack];
+    $$peek{$key} = $p;
+}
+
+# check if a $key exists in a ref-hash $rh and if it is equal to $value
+sub _eq_hash_elem {
+    my ($rh, $key, $value) = @_;
+    return (defined $$rh{$key} and $$rh{$key} eq $value);
+}
+
+#
+# --- only for debugging
+#
+use vars qw(%DEBUGSTACK);
+%DEBUGSTACK = ();
+sub _debug_object_stack {
+    my ($action, $element) = @_;
+    if ($action =~ /^START/o) {
+	$DEBUGSTACK{$element} = (@ObjectStack+0);
+    } else {
+	return if $element eq 'LastName';
+	print "Element $element starts on " .
+	    $DEBUGSTACK{$element} . 'and ends on ' . (@ObjectStack+0) . "\n"
+		if $DEBUGSTACK{$element} != (@ObjectStack+0);
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmed2ref.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmed2ref.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmed2ref.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+# $Id: pubmed2ref.pm,v 1.6.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module Bio::Biblio::IO::pubmed2ref.pm
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO::pubmed2ref - A converter of a raw hash to PUBMED citations
+
+=head1 SYNOPSIS
+
+ # to be written
+
+=head1 DESCRIPTION
+
+ # to be written
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::IO::pubmed2ref;
+
+use strict;
+
+use base qw(Bio::Biblio::IO::medline2ref);
+
+
+# ---------------------------------------------------------------------
+#
+#   Here is the core...
+#
+# ---------------------------------------------------------------------
+
+sub _load_instance {
+    my ($self, $source) = @_;
+
+    my $result;
+    my $article = $$source{'article'};
+    if (defined $article) {
+	if (defined $$article{'journal'}) {
+	    $result = $self->_new_instance ('Bio::Biblio::PubmedJournalArticle');
+	    $result->type ('JournalArticle');
+	} elsif (defined $$article{'book'}) {
+	    $result = $self->_new_instance ('Bio::Biblio::PubmedBookArticle');
+	    $result->type ('BookArticle');
+	} else {
+	    $result->type ('PubmedArticle');
+	}
+    }
+    $result = $self->_new_instance ('Bio::Biblio::Ref') unless defined $result;
+    return $result;
+}
+
+sub convert {
+    my ($self, $source) = @_;
+    my $result = $self->SUPER::convert ($source->{'Citation'});	
+
+    # here we do PUBMED's specific stuff
+    my $pubmed_data = $$source{'PubmedData'};
+    if (defined $pubmed_data) {
+
+	# ... just take it (perhaps rename it)
+	$result->pubmed_status ($$pubmed_data{'publicationStatus'}) if defined $$pubmed_data{'publicationStatus'};
+	$result->pubmed_provider_id ($$pubmed_data{'providerId'}) if defined $$pubmed_data{'providerId'};
+	$result->pubmed_article_id_list ($$pubmed_data{'pubmedArticleIds'}) if defined $$pubmed_data{'pubmedArticleIds'};
+	$result->pubmed_url_list ($$pubmed_data{'pubmedURLs'}) if defined $$pubmed_data{'pubmedURLs'};
+
+	# ... put all dates from all 'histories' into one array
+	if (defined $$pubmed_data{'histories'}) {
+	    my @history_list;
+	    foreach my $history ( @{ $$pubmed_data{'histories'} } ) {
+		my $ra_pub_dates = $$history{'pubDates'};
+		foreach my $pub_date ( @{ $ra_pub_dates } ) {
+		    my %history = ();
+		    my $converted_date = &Bio::Biblio::IO::medline2ref::_convert_date ($pub_date);
+		    $history{'date'} = $converted_date if defined $converted_date;
+		    $history{'pub_status'} = $$pub_date{'pubStatus'} if defined $$pub_date{'pubStatus'};
+		    push (@history_list, \%history);
+		}
+	    }
+	    $result->pubmed_history_list (\@history_list);
+	}
+    }
+
+    # Done!
+    return $result;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmedxml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmedxml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO/pubmedxml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,300 @@
+# $Id: pubmedxml.pm,v 1.8.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module Bio::Biblio::IO::pubmedxml.pm
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO::pubmedxml - A converter of XML files with PUBMED citations
+
+=head1 SYNOPSIS
+
+Do not use this object directly, it is recommended to access it and use
+it through the I<Bio::Biblio::IO> module:
+
+  use Bio::Biblio::IO;
+  my $io = new Bio::Biblio::IO (-format => 'pubmedxml');
+
+=head1 DESCRIPTION
+
+This object reads bibliographic citations in XML/MEDLINE format and
+converts them into I<Bio::Biblio::RefI> objects. It is an
+implementation of methods defined in I<Bio::Biblio::IO>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::Biblio::IO>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::IO::pubmedxml;
+use vars qw(%PCDATA_NAMES %SIMPLE_TREATMENT %POP_DATA_AND_PEEK_OBJ %POP_AND_ADD_DATA_ELEMENT);
+
+use strict;
+
+use base qw(Bio::Biblio::IO::medlinexml);
+
+
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key', and making keys lowercase
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/^-/_/;
+	$self->{ lc $new_key } = $param { $key };
+    }
+
+    # find the format for output - and put it into a global $Convert
+    # because it will be used by the event handler who knows nothing
+    # about this object
+    my $result = $self->{'_result'} || 'pubmed2ref';
+    $result = "\L$result";	# normalize capitalization to lower case
+
+    # a special case is 'raw' when no converting module is loaded
+    # and citations will be returned as a hashtable (the one which
+    # is created during parsing XML file/stream)
+    unless ($result eq 'raw') {
+
+	# load module with output converter - as defined in $result
+	if (defined &Bio::Biblio::IO::_load_format_module ($result)) {
+	    $Bio::Biblio::IO::medlinexml::Convert = "Bio::Biblio::IO::$result"->new (@args);
+	}
+    }
+
+    # create an instance of the XML parser
+    # (unless it is already there...)
+    $self->{'_xml_parser'} = new XML::Parser (Handlers => {Init  => \&Bio::Biblio::IO::medlinexml::handle_doc_start,
+							   Start => \&handle_start,
+							   End   => \&handle_end,
+							   Char  => \&Bio::Biblio::IO::medlinexml::handle_char,
+							   Final => \&Bio::Biblio::IO::medlinexml::handle_doc_end})
+	unless $self->{'_xml_parser'};
+
+    # if there is an argument '-callback' then start parsing at once -
+    # the registered event handlers will use 'callback' to report
+    # back after each citation
+    #
+    # we need to remember this situation also in a global variable
+    # because the event handler subroutines know nothing about this
+    # object (unfortunately)
+    if ($SUPER::Callback = $self->{'_callback'}) {
+	$self->_parse;
+    }
+}
+
+# ---------------------------------------------------------------------
+#
+#   Here are the event handlers (they do the real job!)
+#
+# Note that these methods do not know anything about the object they
+# are part of - they are called as subroutines. not as methods.
+# It also means that they need to use global variables to store and
+# exchnage intermediate results.
+#
+# ---------------------------------------------------------------------
+
+#
+# This is a list of #PCDATA elements.
+#
+%PCDATA_NAMES =
+    (
+     'PublicationStatus' => 1,
+     'ProviderId' => 1,
+     'ArticleId' => 1,
+     'URL' => 1,
+     );
+
+%SIMPLE_TREATMENT =
+    (
+     'History' => 1,
+     'PubMedArticle' => 1,
+     'PubmedArticle' => 1,
+     'PubmedData' => 1,
+     );
+
+%POP_DATA_AND_PEEK_OBJ =
+    (
+     'Year' => 1,
+     'Month' => 1,
+     'Day' => 1,
+     'Hour' => 1,
+     'Minute' => 1,
+     'Second' => 1,
+     'ProviderId' => 1,
+     'PublicationStatus' => 1,
+     );
+
+%POP_AND_ADD_DATA_ELEMENT =
+    (
+     'PubMedPubDate' => 'pubDates',
+     'History' => 'histories',
+     );
+
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::Biblio::IO::pubmedxml::VERSION;
+           print $Bio::Biblio::IO::pubmedxml::Revision;
+
+=cut
+
+
+sub handle_start {
+    my ($expat, $e, %attrs) = @_; 
+#    &Bio::Biblio::IO::medlinexml::_debug_object_stack ("START", $e);
+
+    #
+    # The #PCDATA elements which have an attribute list must
+    # be first here - because for them I create entries both on
+    # the @PCDataStack _and_ on @ObjectStack.
+    #
+    if ($e eq 'ArticleId') {
+	my %p = ();
+	$p{'idType'} = (defined $attrs{'IdType'} ? $attrs{'IdType'} : 'pubmed');
+	push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p);
+    }
+
+    if ($e eq 'URL') {
+	my %p = ();
+	$p{'type'} = $attrs{'type'} if $attrs{'type'};
+	$p{'lang'} = $attrs{'lang'} if $attrs{'lang'};
+	push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p);
+    }
+
+    #
+    # Then we have #PCDATA elements without an attribute list.
+    # For them I create an entry on @PCDataStack.
+    #
+    if (exists $PCDATA_NAMES{$e}) {
+	push (@Bio::Biblio::IO::medlinexml::PCDataStack, '');
+
+    #
+    # And finally, all non-PCDATA elements go to the objectStack
+    #
+    } elsif (exists $SIMPLE_TREATMENT{$e}) {
+	push (@Bio::Biblio::IO::medlinexml::ObjectStack, {});
+
+    } elsif ($e eq 'ArticleIdList') {
+	;
+
+    } elsif ($e eq 'PubMedPubDate') {
+	my %p = ();
+	$p{'pubStatus'} = $attrs{'PubStatus'} if $attrs{'PubStatus'};
+	push (@Bio::Biblio::IO::medlinexml::ObjectStack, \%p);
+
+    } else {
+	&Bio::Biblio::IO::medlinexml::handle_start ($expat, $e, %attrs);	
+    }
+}
+
+sub handle_end {
+    my ($expat, $e) = @_;
+
+    #
+    # First I have to deal with those elements which are both PCDATA
+    # (and therefore they are on the pcdataStack) and which have an
+    # attribute list (therefore they are also known as a separate
+    # p-object on the objectStack.
+    #
+    if ($e eq 'ArticleId') {
+	&Bio::Biblio::IO::medlinexml::_data2obj ('id');
+	&Bio::Biblio::IO::medlinexml::_add_element ('pubmedArticleIds', pop @Bio::Biblio::IO::medlinexml::ObjectStack);
+#	&Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e);
+	return;
+    }
+
+    if ($e eq 'URL') {
+	&Bio::Biblio::IO::medlinexml::_data2obj ('URL');
+	&Bio::Biblio::IO::medlinexml::_add_element ('pubmedURLs', pop @Bio::Biblio::IO::medlinexml::ObjectStack);
+#	&Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e);
+	return;
+    }
+
+
+    #
+    # both object and pcdata stacks elements mixed here together
+    #
+
+    if (exists $POP_DATA_AND_PEEK_OBJ{$e}) {
+	&Bio::Biblio::IO::medlinexml::_data2obj ("\l$e");
+
+    } elsif (exists $POP_AND_ADD_DATA_ELEMENT{$e}) {
+	&Bio::Biblio::IO::medlinexml::_add_element ($POP_AND_ADD_DATA_ELEMENT{$e}, pop @Bio::Biblio::IO::medlinexml::ObjectStack);
+
+    } elsif ($e eq 'MedlineCitation' ||
+	     $e eq 'NCBIArticle') {
+	&Bio::Biblio::IO::medlinexml::_obj2obj ('Citation');
+
+    } elsif ($e eq 'PubmedData') {
+	&Bio::Biblio::IO::medlinexml::_obj2obj ('PubmedData');
+
+    } elsif ($e eq 'PubMedArticle' ||
+	     $e eq 'PubmedArticle') {
+
+	#
+	# Here we finally have the whole citation ready.
+	#
+	&Bio::Biblio::IO::medlinexml::_process_citation (pop @Bio::Biblio::IO::medlinexml::ObjectStack);
+
+    } else {
+	&Bio::Biblio::IO::medlinexml::handle_end ($expat, $e);	
+    }
+    
+#    &Bio::Biblio::IO::medlinexml::_debug_object_stack ("END", $e);
+
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,370 @@
+# $Id: IO.pm,v 1.17.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::IO
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::IO - Handling the bibliographic references
+
+=head1 SYNOPSIS
+
+    use Bio::Biblio::IO;
+
+    # getting citations from a file
+    $in = Bio::Biblio::IO->new ('-file' => 'myfile.xml' ,
+                                '-format' => 'medlinexml');
+  # --- OR ---
+
+    # getting citations from a string
+    $in = Bio::Biblio::IO->new ('-data' => '<MedlineCitation>...</MedlineCitation>' ,
+                                '-format' => 'medlinexml');
+  #--- OR ---
+
+    # getting citations from a string if IO::String is installed
+    use IO::String;
+    $in = Bio::Biblio::IO->new ('-fh' => IO::String->new ($citation),
+                                '-format' => 'medlinexml');
+
+    $in = Bio::Biblio::IO->new(-fh => $io_handle , '-format' => 'medlinexml');
+
+  #--- OR ---
+
+    # getting citations from any IO handler
+    $in = Bio::Biblio::IO->new('-fh' => $io_handle ,
+                               '-format' => 'medlinexml');
+
+
+    # now, having $in, we can read all citations
+    while ( my $citation = $in->next_bibref() ) {
+        &do_something_with_citation ($citation);
+    }
+
+  #--- OR ---
+
+    # again reading all citation but now a callback defined in your
+    # code is used (note that the reading starts already when new()
+    # is called)
+    $io = new Bio::Biblio::IO ('-format'   => 'medlinexml',
+                               '-file'     => $testfile,
+                               '-callback' => \&callback);
+    sub callback {
+        my $citation = shift;
+        print $citation->{'_identifier'} . "\n";
+    }
+
+  #Now, to actually get a citation in an XML format,
+  #use I<Bio::Biblio> module which returns an XML string:
+
+    use Bio::Biblio;
+    use Bio::Biblio::IO;
+    my $xml = new Bio::Biblio->get_by_id ('12368254');
+    my $reader = Bio::Biblio::IO->new ('-data' => $xml,
+                                       '-format' => 'medlinexml');
+
+    while (my $citation = $reader->next_bibref()) {
+       #... do something here with $citation
+       }
+
+  #And, finally, the resulting citation can be received in different
+  #output formats:
+
+    $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+                               '-result' => 'raw');
+  #--- OR ---
+
+    $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+                               '-result' => 'medline2ref');
+
+  #--- OR ---
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+                               '-result' => 'pubmed2ref');
+
+=head1 DESCRIPTION
+
+Bio::Biblio::IO is a handler module for accessing bibliographic
+citations. The citations can be in different formats - assuming that
+there is a corresponding module knowing that format in Bio::Biblio::IO
+directory (e.g. Bio::Biblio::IO::medlinexml). The format (and the
+module name) is given by the argument I<-format>.
+
+Once an instance of C<Bio::Biblio::IO> class is available, the
+citations can be read by calling repeatedly method I<next_bibref>:
+
+    while (my $citation = $reader->next_bibref()) {
+       ... do something here with $citation
+       }
+
+However, this may imply that all citations were already read into the
+memory. If you expect a huge amount of citations to be read, you may
+choose a I<callback> option. Your subroutine is specified in the
+C<new()> method and is called everytime a new citation is available
+(see an example above in SYNOPSIS).
+
+The citations returned by I<next_bibref> or given to your callback
+routine can be of different formats depending on the argument
+I<-result>. One result type is I<raw> and it is represented by a
+simple, not blessed hash table:
+
+    $io = new Bio::Biblio::IO ('-result' => 'raw');
+
+What other result formats are available depends on the module who
+reads the citations in the first place. At the moment, the following
+ones are available:
+
+    $io = new Bio::Biblio::IO ('-result' => 'medline2ref');
+
+This is a default result format for reading citations by the
+I<medlinexml> module. The C<medlinexml> module is again the default
+one. Which means that you can almost omit arguments (you still need to
+say where the citations come from):
+
+    $io = new Bio::Biblio::IO ('-file' => 'data/medline_data.xml');
+
+Another result format available is for PUBMED citations (which is a
+super-set of the MEDLINE citations having few more tags):
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+                               '-result' => 'pubmed2ref',
+                               '-data'   => $citation);
+
+Or, because C<pubmed2ref> is a default one for PUBMED citations, you can say just:
+
+    $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+                               '-data'   => $citation);
+
+Both C<medline2ref> and C<pubmed2ref> results are objects defined in
+the directory C<Bio::Biblio>.
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+An example script I<examples/biblio.pl>. It has many options and its
+own help.  The relevant options to this IO module are I<-f>
+(specifying what file to read) and I<-O> (specifying what result
+format to achieve).
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Biblio::IO;
+
+use strict;
+
+use Symbol();
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+my $entry = 0;
+
+sub new {
+    my ($caller, @args) = @_;
+    my $class = ref ($caller) || $caller;
+
+    # if $caller is an object, or if it is an underlying
+    # 'real-work-doing' class (e.g. Bio::Biblio::IO::medlinexml) then
+    # we want to call SUPER to create and bless an object
+    if( $class =~ /Bio::Biblio::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new (@args);
+	$self->_initialize (@args);
+	return $self;
+
+    # this is called only the first time when somebody calls: 'new
+    # Bio::Biblio::IO (...)', and it actually loads a 'real-work-doing'
+    # module and call this new() method again (unless the loaded
+    # module has its own new() method)
+    } else {
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{-file} || $ARGV[0] ) ||
+		'medlinexml';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# load module with the real implementation - as defined in $format
+	return unless (&_load_format_module ($format));
+
+	# this will call this same method new() - but rather its
+	# upper (object) branche
+	return "Bio::Biblio::IO::$format"->new(@args);
+    }
+}
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+# _initialize is chained for all Bio::Biblio::IO classes
+
+sub _initialize {
+    my ($self, @args) = @_;
+    # initialize the IO part
+    $self->_initialize_io (@args);
+}
+
+=head2 next_bibref
+
+ Usage   : $citation = stream->next_bibref
+ Function: Reads the next citation object from the stream and returns it.
+ Returns : a Bio::Biblio::Ref citation object, or something else
+           (depending on the '-result' argument given in the 'new()'
+	    method).
+ Args    : none
+
+=cut
+
+sub next_bibref {
+   my ($self) = shift;
+   $self->throw ("Sorry, you cannot read from a generic Bio::Biblio::IO object.");
+}
+
+# -----------------------------------------------------------------------------
+
+=head2 _load_format_module
+
+ Usage   : $class->_load_format_module ($format)
+ Returns : 1 on success, undef on failure
+ Args    : 'format' should contain the last part of the
+           name of a module who does the real implementation
+
+It does (in run-time) a similar thing as
+
+   require Bio::Biblio::IO::$format
+
+It throws an exception if it fails to find and load the module
+(for example, because of the compilation errors in the module).
+
+=cut
+
+sub _load_format_module {
+  my ($format) = @_;
+  my ($module, $load, $m);
+
+  $module = "_<Bio/Biblio/IO/$format.pm";
+  $load = "Bio/Biblio/IO/$format.pm";
+
+  return 1 if $main::{$module};
+  eval {
+    require $load;
+  };
+  if ( $@ ) {
+    Bio::Root::Root->throw (<<END);
+$load: $format cannot be found or loaded
+Exception $@
+For more information about the Biblio system please see the Bio::Biblio::IO docs.
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+=head2 _guess_format
+
+ Usage   : $class->_guess_format ($filename)
+ Returns : string with a guessed format of the input data (e.g. 'medlinexml')
+ Args    : a file name whose extension can help to guess its format
+
+It makes an expert guess what kind of data are in the given file
+(but be prepare that $filename may be empty).
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'medlinexml'   if (/\.(xml|medlinexml)$/i);
+   return;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'biblio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'biblio'}->next_bibref() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'biblio'}->next_bibref();
+  return @list;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Journal.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Journal.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Journal.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,128 @@
+# $Id: Journal.pm,v 1.13.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Journal
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Journal - Representation of a journal
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Journal (-name => 'The Perl Journal',
+                                     -issn  => '1087-903X');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Journal;
+    $obj->issn ('1087-903X');
+
+=head1 DESCRIPTION
+
+A storage object for a journal.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  abbreviation
+  issn
+  name
+  provider       type: Bio::Biblio::Provider
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Biblio::Journal;
+use strict;
+
+
+use base qw(Bio::Biblio::BiblioBase);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _abbreviation => undef,
+	 _issn => undef,
+	 _name => undef,
+	 _provider => 'Bio::Biblio::Provider',
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr};
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	$_allowed{$attr};
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/JournalArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/JournalArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/JournalArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+# $Id: JournalArticle.pm,v 1.14.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::JournalArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::JournalArticle - Representation of a journal article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::JournalArticle (-title => 'Come to grief',
+                                            -journal => new Bio::Biblio::Journal);
+  #--- OR ---
+
+    $obj = new Bio::Biblio::JournalArticle;
+    $obj->title ('Come to grief');
+    $obj->journal (new Bio::Biblio::Journal (-name => 'English Mysteries'));
+
+=head1 DESCRIPTION
+
+A storage object for a journal article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  issue
+  issue_supplement
+  journal           type: Bio::Biblio::Journal
+  volume
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::JournalArticle;
+use strict;
+
+
+use base qw(Bio::Biblio::Article);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _issue => undef,
+	 _issue_supplement => undef,
+	 _journal => 'Bio::Biblio::Journal',
+	 _volume => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,204 @@
+# $Id: MedlineArticle.pm,v 1.12.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::MedlineArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::MedlineArticle - Representation of a MEDLINE article
+
+=head1 SYNOPSIS
+
+  $obj = new Bio::Biblio::MedlineArticle (-mesh_headings =>
+                                            #array ref of hashes
+                                         );
+
+  # how are Mesh terms stored:
+  use Data::Dumper;
+  print Data::Dumper->Dump ( [$obj->mesh_headings], ['MeshHeadings']);
+
+  #It produces (something like) this:
+  #'MeshHeadings' => [
+  #       { 'descriptorName' => 'Adult' },
+  #       { 'descriptorName' => 'Cardiovascular Diseases',
+  #         'subHeadings'    => [ { 'subHeading' => 'etiology' },
+  #                               { 'majorTopic' => 'Y',
+  #                                 'subHeading' => 'mortality' } ] },
+  #       { 'descriptorName' => 'Child Development',
+  #         'subHeadings'    => [ { 'majorTopic' => 'Y',
+  #                                 'subHeading' => 'physiology' } ] },
+  #       { 'descriptorName' => 'Human' },
+  #      ]
+
+=head1 DESCRIPTION
+
+A storage object for a MEDLINE article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  affiliation
+  chemicals                      type: array ref of hashes
+  citation_owner
+  comment_ins                    type: array ref of hashes
+  comment_ons                    type: array ref of hashes
+  date_of_electronic_publication
+  erratum_fors                   type: array ref of hashes
+  erratum_in                     type: array ref of hashes
+  gene_symbols
+  general_notes                  type: array ref of hashes
+  grant_list_complete
+  grants                         type: array ref of hashes
+  medline_date
+  medline_id
+  medline_page
+  mesh_headings                  type: array ref of hashes
+  number_of_references
+  original_report_ins            type: array ref of hashes
+  other_abstracts                type: array ref of hashes
+  other_ids                      type: array ref of hashes
+  other_languages
+  pmid
+  republished_froms              type: array ref of hashes
+  republished_ins                type: array ref of hashes
+  retraction_ins                 type: array ref of hashes
+  retraction_ofs                 type: array ref of hashes
+  season
+  status
+  summary_for_patients_ins       type: array ref of hashes
+  update_ins                     type: array ref of hashes
+  update_ofs                     type: array ref of hashes
+  vernacular_title
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::MedlineArticle;
+use strict;
+
+
+use base qw(Bio::Biblio::Article);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _affiliation => undef,
+	 _chemicals => 'ARRAY',
+	 _citation_owner => undef,
+	 _comment_ins => 'ARRAY',
+	 _comment_ons => 'ARRAY',
+	 _date_of_electronic_publication => undef,
+	 _erratum_fors => 'ARRAY',
+	 _erratum_ins => 'ARRAY',
+	 _gene_symbols => undef,
+	 _general_notes => 'ARRAY',
+	 _grant_list_complete => undef,
+	 _grants => 'ARRAY',
+	 _medline_date => undef,
+	 _medline_id => undef,
+	 _medline_page => undef,
+	 _mesh_headings => 'ARRAY',
+	 _number_of_references => undef,
+	 _original_report_ins => 'ARRAY',
+	 _other_abstracts => 'ARRAY',
+	 _other_ids => 'ARRAY',
+	 _other_languages => undef,
+	 _pmid => undef,
+	 _republished_froms => 'ARRAY',
+	 _republished_ins => 'ARRAY',
+	 _retraction_ins => 'ARRAY',
+	 _retraction_ofs => 'ARRAY',
+	 _season => undef,
+	 _status => undef,
+	 _summary_for_patients_ins => 'ARRAY',
+	 _update_ins => 'ARRAY',
+	 _update_ofs => 'ARRAY',
+	 _vernacular_title => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBook.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBook.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBook.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,128 @@
+# $Id: MedlineBook.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::MedlineBook
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::MedlineBook - Representation of a MEDLINE book
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::MedlineBook
+                  (-editor => new Bio::Biblio::Person
+                             (-lastname => 'Loukides'),
+                   -isbn  => '0-596-00068-5');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::MedlineBook;
+    $obj->isbn ('0-596-00068-5');
+
+=head1 DESCRIPTION
+
+A storage object for a MEDLINE book.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+The main raison d'etre of this class is to be associated with MEDLINE book articles.
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::MedlineBook;
+use strict;
+
+
+use base qw(Bio::Biblio::Book);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBookArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBookArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineBookArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: MedlineBookArticle.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::MedlineBookArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::MedlineBookArticle - Representation of a MEDLINE book article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::MedlineBookArticle
+                  (-title => 'Getting started'.
+                   -book => new Bio::Biblio::MedlineBook);
+  #--- OR ---
+
+    $obj = new Bio::Biblio::MedlineBookArticle;
+    $obj->title ('Getting started');
+
+=head1 DESCRIPTION
+
+A storage object for a MEDLINE book.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  book           type: Bio::Biblio::MedlineBook
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::MedlineBookArticle;
+use strict;
+use vars qw(@ISA);
+
+
+use base qw(Bio::Biblio::BookArticle Bio::Biblio::MedlineArticle);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _book => 'Bio::Biblio::MedlineBook',
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+	return 1 if exists $_allowed{$attr};
+        foreach my $parent (@ISA) {
+	    return 1 if $parent->_accessible ($attr);
+	}
+    }
+
+    # return an expected type of given $attr
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    foreach my $parent (@ISA) {
+		if ($parent->_accessible ($attr)) {
+		    return $parent->_attr_type ($attr);
+		}
+	    }
+	}
+	return 'unknown';
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournal.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournal.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournal.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,134 @@
+# $Id: MedlineJournal.pm,v 1.10.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::MedlineJournal
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::MedlineJournal - Representation of a MEDLINE journal
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::MedlineJournal
+               (-medline_ta => 'J Vasc Interv Radiol');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::MedlineJournal;
+    $obj->medline_ta ('J Vasc Interv Radiol');
+
+=head1 DESCRIPTION
+
+A storage object for a MEDLINE journal.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  coden
+  country
+  medline_code
+  medline_ta
+  nlm_unique_id
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Biblio::MedlineJournal;
+use strict;
+
+
+use base qw(Bio::Biblio::Journal);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _coden => undef,
+	 _country => undef,
+	 _medline_code => undef,
+	 _medline_ta => undef,
+	 _nlm_unique_id => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournalArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournalArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/MedlineJournalArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,141 @@
+# $Id: MedlineJournalArticle.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::MedlineJournalArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::MedlineJournalArticle - Representation of a MEDLINE journal article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::MedlineJournalArticle (
+                  -title => 'Thermal adaptation analyzed by comparison of protein sequences from mesophilic and extremely thermophilic Methanococcus species.',
+                  -journal => new Bio::Biblio::MedlineJournal (-issn => '0027-8424'),
+                  -volume => 96,
+                  -issue => 7);
+  #--- OR ---
+
+    $obj = new Bio::Biblio::MedlineJournalArticle;
+    $obj->title ('...');
+    $obj->journal (new Bio::Biblio::MedlineJournal (-issn => '0027-8424'));
+
+=head1 DESCRIPTION
+
+A storage object for a MEDLINE journal article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  journal           type: Bio::Biblio::MedlineJournal
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::MedlineJournalArticle;
+use strict;
+use vars qw(@ISA);
+
+
+use base qw(Bio::Biblio::MedlineArticle Bio::Biblio::JournalArticle);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _journal => 'Bio::Biblio::MedlineJournal',
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	return 1 if exists $_allowed{$attr};
+        foreach my $parent (@ISA) {
+	    return 1 if $parent->_accessible ($attr);
+	}
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    foreach my $parent (@ISA) {
+		if ($parent->_accessible ($attr)) {
+		    return $parent->_attr_type ($attr);
+		}
+	    }
+	}
+	return 'unknown';
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Organisation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Organisation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Organisation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+# $Id: Organisation.pm,v 1.14.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Organisation
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Organisation - Representation of an organisation
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Organisation (-name => 'O\'Reilly');
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Organisation;
+    $obj->name ('O\'Reilly');
+
+=head1 DESCRIPTION
+
+A storage object for an organisation related to a bibliographic resource.
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  name
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Organisation;
+use strict;
+
+
+use base qw(Bio::Biblio::Provider);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _name => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Patent.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Patent.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Patent.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+# $Id: Patent.pm,v 1.14.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Patent
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Patent - Representation of a patent
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Patent (-doc_number => '1-2-3-4-5');
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Patent;
+    $obj->doc_number ('1-2-3-4-5');
+
+=head1 DESCRIPTION
+
+A storage object for a patent.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  doc_number
+  doc_office
+  doc_type
+  applicants       type: array ref of Bio::Biblio::Providers
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Patent;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+		    _doc_number => undef,
+		    _doc_office => undef,
+		    _doc_type => undef,
+		    _applicants => 'ARRAY',
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Person.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Person.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Person.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,143 @@
+# $Id: Person.pm,v 1.15.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Person
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Person - Representation of a person
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Person (-lastname => 'Capek',
+                                    -firstname => 'Karel');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Person;
+    $obj->firstname ('Karel');
+    $obj->lastname ('Capek');
+
+=head1 DESCRIPTION
+
+A storage object for a person related to a bibliographic resource.
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  affiliation
+  email
+  firstname
+  forename
+  initials
+  lastname
+  middlename
+  postal_address
+  suffix
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org)
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Person;
+use strict;
+
+
+use base qw(Bio::Biblio::Provider);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+ 	 _affiliation => undef,
+	 _email => undef,
+	 _firstname => undef,
+	 _forename => undef,
+	 _initials => undef,
+	 _lastname => undef,
+	 _middlename => undef,
+	 _postal_address => undef,
+	 _suffix => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Proceeding.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Proceeding.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Proceeding.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,130 @@
+# $Id: Proceeding.pm,v 1.13.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Proceeding
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Proceeding - Representation of a conference proceeding
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Proceeding (-title => 'JavaONE');
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Proceeding;
+    $obj->title ('JavaONE');
+
+=head1 DESCRIPTION
+
+A storage object for a conference proceeding.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+This class should be probably somewhere else in the class hierarchy
+because a proceeding is actrually a collection of resources. Perhaps
+this will be changed in the future.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Proceeding;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Provider.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Provider.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Provider.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,124 @@
+# $Id: Provider.pm,v 1.11.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Provider
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Provider - Representation of a general provider
+
+=head1 SYNOPSIS
+
+    # usually this class is not instantiated but can be...
+    $obj = new Bio::Biblio::Provider (-type => 'Department');
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Provider;
+    $obj->type ('Department');
+
+=head1 DESCRIPTION
+
+A storage object for a general bibliographic resource provider
+(a rpovider can be a person, a organisation, or even a program).
+
+=head2 Attributes
+
+The following attributes are specific to this class, 
+and they are inherited by all provider types.
+
+  type
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Provider;
+use strict;
+use vars qw($AUTOLOAD);
+
+
+use base qw(Bio::Biblio::BiblioBase);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _type => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr};
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	$_allowed{$attr};
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+# $Id: PubmedArticle.pm,v 1.9.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::PubmedArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::PubmedArticle - Representation of a PUBMED article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::PubmedArticle
+                  (-pubmed_history_list =>
+                       [ { 'pub_status' => 'pubmed',
+                           'date' => '2001-12-1T10:0:00Z' },
+                         { 'pub_status' => 'medline',
+                           'date' => '2002-1-5T10:1:00Z' } ],
+                   -pubmed_status => 'ppublish');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::PubmedArticle;
+    $obj->pubmed_status ('ppublish');
+
+=head1 DESCRIPTION
+
+A storage object for a general PUBMED article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  pubmed_status
+  pubmed_provider_id
+  pubmed_history_list       type: array ref of hashes
+  pubmed_article_id_list    type: array ref of hashes
+  pubmed_url_list           type: array ref of hashes
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::PubmedArticle;
+use strict;
+use vars qw(@ISA);
+
+use base qw(Bio::Biblio::MedlineArticle);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _pubmed_status => undef,
+	 _pubmed_provider_id => undef,
+	 _pubmed_history_list => 'ARRAY',
+	 _pubmed_article_id_list => 'ARRAY',
+	 _pubmed_url_list => 'ARRAY',
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	return 1 if exists $_allowed{$attr};
+        foreach my $parent (@ISA) {
+	    return 1 if $parent->_accessible ($attr);
+	}
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    foreach my $parent (@ISA) {
+		if ($parent->_accessible ($attr)) {
+		    return $parent->_attr_type ($attr);
+		}
+	    }
+	}
+	return 'unknown';
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedBookArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedBookArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedBookArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+# $Id: PubmedBookArticle.pm,v 1.9.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::PubmedBookArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::PubmedBookArticle - Representation of a PUBMED book article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::PubmedBookArticle
+                  (-title => 'Still getting started'.
+                   -book => new Bio::Biblio::MedlineBook);
+    # note that there is no specialised class PubmedBook
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::PubmedBookArticle;
+    $obj->title ('Still getting started');
+
+=head1 DESCRIPTION
+
+A storage object for a PUBMED book article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::PubmedBookArticle;
+use strict;
+use vars qw(@ISA);
+
+use base qw(Bio::Biblio::PubmedArticle Bio::Biblio::MedlineBookArticle);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	return 1 if exists $_allowed{$attr};
+        foreach my $parent (@ISA) {
+	    return 1 if $parent->_accessible ($attr);
+	}
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    foreach my $parent (@ISA) {
+		if ($parent->_accessible ($attr)) {
+		    return $parent->_attr_type ($attr);
+		}
+	    }
+	}
+	return 'unknown';
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedJournalArticle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedJournalArticle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/PubmedJournalArticle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,148 @@
+# $Id: PubmedJournalArticle.pm,v 1.10.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::PubmedJournalArticle
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::PubmedJournalArticle - Representation of a PUBMED journal article
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::PubmedJournalArticle (
+
+                  # some attributes from MedlineJournalArticle
+                  -title => 'Thermal adaptation analyzed by comparison of protein sequences from mesophilic and extremely thermophilic Methanococcus species.',
+                  -journal => new Bio::Biblio::MedlineJournal (-issn => '0027-8424'),
+                  -volume => 96,
+                  -issue => 7,
+
+                  # and some from PubmedArticle
+                  -pubmed_history_list =>
+                       [ { 'pub_status' => 'pubmed',
+                           'date' => '2001-12-1T10:0:00Z' },
+                         { 'pub_status' => 'medline',
+                           'date' => '2002-1-5T10:1:00Z' } ],
+                   -pubmed_status => 'ppublish');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::PubmedJournalArticle;
+    $obj->title ('...');
+    $obj->journal (new Bio::Biblio::MedlineJournal (-issn => '0027-8424'));
+    $obj->pubmed_status ('ppublish');
+
+
+=head1 DESCRIPTION
+
+A storage object for a PUBMED journal article.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::PubmedJournalArticle;
+use strict;
+use vars qw(@ISA);
+
+use base qw(Bio::Biblio::PubmedArticle Bio::Biblio::MedlineJournalArticle);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	return 1 if exists $_allowed{$attr};
+        foreach my $parent (@ISA) {
+	    return 1 if $parent->_accessible ($attr);
+	}
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    foreach my $parent (@ISA) {
+		if ($parent->_accessible ($attr)) {
+		    return $parent->_attr_type ($attr);
+		}
+	    }
+	}
+	return 'unknown';
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Ref.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Ref.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Ref.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,249 @@
+# $Id: Ref.pm,v 1.12.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Ref
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Ref - Representation of a bibliographic reference
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Ref (-type  => 'Letter',
+                                 -title => 'Onegin to Tatiana');
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Ref;
+    $obj->type ('Letter');
+
+=head1 DESCRIPTION
+
+A storage object for a general bibliographic reference (a citation).
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class,
+and they are inherited by all citation types.
+
+  author_list_complete            values: 'Y'  (default) or 'N'
+  authors                         type:   array ref of Bio::Biblio::Provider's
+  cross_references                type:   array ref of Bio::Annotation::DBLink's
+  cross_references_list_complete  values: 'Y' (default) or 'N'
+  abstract
+  abstract_language
+  abstract_type
+  codes                           type:   hash ref
+  contributors                    type:   array ref of Bio::Biblio::Provider's
+  date
+  date_completed
+  date_created
+  date_revised
+  format
+  identifier
+  keywords
+  language
+  last_modified_date
+  publisher                       type:   Bio::Biblio::Provider
+  repository_subset
+  rights
+  spatial_location
+  subject_headings                type:   hash ref
+  subject_headings_source
+  temporal_period
+  title
+  toc
+  toc_type
+  type
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Ref;
+use strict;
+use vars qw($AUTOLOAD);
+
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::Biblio::BiblioBase);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed =
+	(
+	 _author_list_complete => undef,
+	 _authors => 'ARRAY',  # of Bio::Biblio::Provider
+	 _cross_references => 'ARRAY',   # of Bio::Annotation::DBLink
+	 _cross_references_list_complete => undef,
+	 _abstract => undef,
+	 _abstract_language => undef,
+	 _abstract_type => undef,
+	 _codes => 'HASH',
+	 _contributors => 'ARRAY',  # of Bio::Biblio::Provider
+	 _date => undef,
+	 _date_completed => undef,
+	 _date_created => undef,
+	 _date_revised => undef,
+	 _format => undef,
+	 _identifier => undef,
+	 _keywords => 'HASH',
+	 _language => undef,
+	 _last_modified_date => undef,
+	 _publisher => 'Bio::Biblio::Provider',
+	 _repository_subset => undef,
+	 _rights => undef,
+	 _spatial_location => undef,
+	 _subject_headings => 'HASH',
+	 _subject_headings_source => undef,
+	 _temporal_period => undef,
+	 _title => undef,
+	 _toc => undef,
+	 _toc_type => undef,
+	 _type => undef,
+	 );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr};
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	$_allowed{$attr};
+    }
+}
+
+
+=head2 add_cross_reference
+
+ Usage   : $self->add_cross_reference
+               (new Bio::Annotation::DBLink (-database   => 'EMBL',
+					     -primary_id => 'V00808');
+ Function: adding a link to a database entry
+ Returns : new value of 'cross_references'
+ Args    : an object of type Bio::Annotation::DBLink
+
+=cut
+
+sub add_cross_reference {
+    my ($self, $value) = @_;
+    $self->throw ($self->_wrong_type_msg (ref $value, 'Bio::Annotation::DBLink'))
+	unless (UNIVERSAL::isa ($value, 'Bio::Annotation::DBLink'));
+    (defined $self->cross_references) ?
+	push (@{ $self->cross_references }, $value) :
+	    return $self->cross_references ( [$value] );
+    return $self->cross_references;
+}
+
+
+=head2 add_author
+
+ Usage   : $self->add_author (new Bio::Biblio::Person (-lastname => 'Novak');
+ Function: adding an author to a list of authors
+ Returns : new value of 'authors' (a full list)
+ Args    : an object of type Bio::Biblio::Provider
+
+=cut
+
+
+sub add_author {
+    my ($self, $value) = @_;
+    $self->throw ($self->_wrong_type_msg (ref $value, 'Bio::Biblio::Provider'))
+	unless (UNIVERSAL::isa ($value, 'Bio::Biblio::Provider'));
+    (defined $self->authors) ?
+	push (@{ $self->authors }, $value) :
+	    return $self->authors ( [$value] );
+    return $self->authors;
+}
+
+=head2 add_contributor
+
+ Usage   : $self->add_contributor (new Bio::Biblio::Person (-lastname => 'Novak');
+ Function: adding a contributor to a list of contributors
+ Returns : new value of 'contributors' (a full list)
+ Args    : an object of type Bio::Biblio::Provider
+
+=cut
+
+sub add_contributor {
+    my ($self, $value) = @_;
+    $self->throw ($self->_wrong_type_msg (ref $value, 'Bio::Biblio::Provider'))
+	unless (UNIVERSAL::isa ($value, 'Bio::Biblio::Provider'));
+    (defined $self->contributors) ?
+	push (@{ $self->contributors }, $value) :
+	    return $self->contributors ( [$value] );
+    return $self->contributors;
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Service.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Service.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Service.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,125 @@
+# $Id: Service.pm,v 1.13.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Service
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Service - Representation of a provider of type service
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Service (-name => 'Report generator');
+
+  #--- OR ---
+
+    $obj = new Bio::Biblio::Service;
+    $obj->name ('Report generator');
+
+=head1 DESCRIPTION
+
+A storage object for a service (such a computer service) related to a bibliographic resource.
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  name
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Service;
+use strict;
+
+
+use base qw(Bio::Biblio::Provider);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+	_name => undef,
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/TechReport.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/TechReport.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/TechReport.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,125 @@
+# $Id: TechReport.pm,v 1.12.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::TechReport
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::TechReport - Representation of a technical report
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::TechReport
+                  (-authors => [
+                       new Bio::Biblio::Person (-lastname => 'Hasek'),
+                       new Bio::Biblio::Person (-lastname => 'Jagr'),
+                       new Bio::Biblio::Organisation (-name => 'NHL'),
+                               ]
+                   -title => 'Pinned in the corner');
+
+=head1 DESCRIPTION
+
+A storage object for a technical report.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::TechReport;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Thesis.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Thesis.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/Thesis.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,124 @@
+# $Id: Thesis.pm,v 1.12.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::Thesis
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::Thesis - Representation of thesis
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::Thesis (-title => 'Perl on the edge');
+
+ #--- OR ---
+
+    $obj = new Bio::Biblio::Thesis;
+    $obj->title ('Perl on the edge');
+
+=head1 DESCRIPTION
+
+A storage object for thesis.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+There are no specific attributes in this class
+(however, you can set and get all attributes defined in the parent classes).
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::Thesis;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/WebResource.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/WebResource.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio/WebResource.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+# $Id: WebResource.pm,v 1.13.4.3 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Biblio::WebResource
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio::WebResource - Representation of a web resource
+
+=head1 SYNOPSIS
+
+    $obj = new Bio::Biblio::WebResource
+                  (-url  => 'http://resources/best.html',
+                   -estimated_size => 45000);
+  # --- OR ---
+
+    $obj = new Bio::Biblio::WebResource;
+    $obj->cost ('0.3 EURO');
+
+=head1 DESCRIPTION
+
+A storage object for a citation quoting a web resource.
+See its place in the class hierarchy in
+http://www.ebi.ac.uk/~senger/openbqs/images/bibobjects_perl.gif
+
+=head2 Attributes
+
+The following attributes are specific to this class
+(however, you can also set and get all attributes defined in the parent classes):
+
+  url
+  estimated_size
+  cost
+
+=head1 SEE ALSO
+
+=over 4
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted or the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org),
+Martin Senger (senger at ebi.ac.uk)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio::WebResource;
+use strict;
+
+
+use base qw(Bio::Biblio::Ref);
+
+#
+# a closure with a list of allowed attribute names (these names
+# correspond with the allowed 'get' and 'set' methods); each name also
+# keep what type the attribute should be (use 'undef' if it is a
+# simple scalar)
+#
+{
+    my %_allowed = (
+		    _url => undef,
+		    _estimated_size => undef,
+		    _cost => undef,
+		    );
+
+    # return 1 if $attr is allowed to be set/get in this class
+    sub _accessible {
+	my ($self, $attr) = @_;
+	exists $_allowed{$attr} or $self->SUPER::_accessible ($attr);
+    }
+
+    # return an expected type of given $attr
+    sub _attr_type {
+	my ($self, $attr) = @_;
+	if (exists $_allowed{$attr}) {
+	    return $_allowed{$attr};
+	} else {
+	    return $self->SUPER::_attr_type ($attr);
+	}
+    }
+}
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Biblio.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Biblio.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Biblio.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,364 @@
+# $Id: Biblio.pm,v 1.16.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module Bio::Biblio
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Biblio - A Bibliographic Query Service module
+
+=head1 SYNOPSIS
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio;
+
+  print $biblio->find ('perl')->get_count . "\n";
+
+  my $collection = $biblio->find ('brazma', 'authors');
+  while ( $collection->has_next ) {
+      print $collection->get_next;
+  }
+
+  # The new() method can accept parameters, for example:
+
+  $biblio = Bio::Biblio
+    (-access          => 'soap',
+     -location        => 'http://www.ebi.ac.uk/openbqs/services/MedlineSRS',
+     -destroy_on_exit => '0');
+
+  # See below for some one-liners
+
+=head1 DESCRIPTION
+
+This is a class whose instances can access bibliographic
+repositories. It allows to query a bibliographic database (such as
+MEDLINE) and then to retrieve resulting citations from it. The
+citations are returned in an XML format which is native to the
+repository but there are also supporting modules for converting them
+into Perl objects.
+
+The detailed descriptions of all query and retrieval methods are in
+L<Bio::DB::BiblioI> (an interface). All those methods should be
+called on instances of this (Bio::Biblio) module.
+
+The module complies (with some simplifications) with the specification
+described in the B<OpenBQS> project. Its home page is at
+L<http://www.ebi.ac.uk/~senger/openbqs>.
+
+The module also gives an access to a set of controlled vocabularies
+and their values. It allows to introspect bibliographic repositories
+and to find what citation resource types (such as journal and book
+articles, patents or technical reports) are provided, and what
+attributes they have, eventually what attribute values are allowed.
+
+Here are some one-liners:
+
+  perl -MBio::Biblio -e 'print new Bio::Biblio->get_by_id ("12368254")'
+  perl -MBio::Biblio \
+       -e 'print join ("\n", @{ new Bio::Biblio->find ("brazma")->get_all_ids })'
+  perl -MBio::Biblio \
+       -e 'print new Bio::Biblio->find ("Java")->find ("perl")->get_count'
+
+
+=head1 OVERVIEW OF CLASSES AND PACKAGES
+
+=over
+
+=item L<Bio::Biblio>
+
+This is the main class to be used by the end users. It
+loads a real implementation for a particular access protocol according
+to the argument I<-access>. At the time of writing this documentation
+there is only one available access module implementing all query and
+retrieval methods:
+
+   -access => soap
+
+This module implements all methods defined in the interface
+I<Bio::DB::BiblioI> (see L<Bio::DB::BiblioI>) by delegating
+calls to a loaded low-level module (e.g. see
+L<Bio::DB::Biblio::soap>).
+
+Note that there are other modules which do not use the SOAP protocol 
+and do not implement all query methods - nevertheless they have retrieval 
+methods and can be used in the same way:
+
+   -access => biofetch
+
+Lacking documentation:
+
+   -access => eutils
+
+=item Bio::DB::BiblioI
+
+This is an interface defining all methods that can be called on
+I<Bio::Biblio> instances.
+
+=item Bio::DB::Biblio::soap
+
+This is a real implementation of all methods defined in
+Bio::DB::BiblioI using SOAP protocol (calling a WebService
+based on SOAP). This class should not be instantiated directly (use
+I<Bio::Biblio> instead). See L<Bio::DB::BiblioI> for details.
+
+=item Bio::Biblio::IO
+
+This module instantiates and uses a converter of the citations read by
+any of the access methods mentioned above. See L<Bio::Biblio::IO> for
+details.
+
+=item Bio::Biblio::IO::medlinexml and Bio::Biblio::IO::medline2ref
+
+A converter of MEDLINE citations in XML into Perl objects.
+
+=item Bio::Biblio::IO::pubmedxml and Bio::Biblio::IO::pubmed2ref
+
+A converter of PUBMED citations in XML into Perl objects.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+=over
+
+=item *
+
+OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs/
+
+=item *
+
+Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
+
+=back
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Biblio;
+use strict;
+
+use base qw(Bio::Root::Root Bio::DB::BiblioI);
+
+# -----------------------------------------------------------------------------
+
+=head2 new
+
+ Usage   : my $obj = new Bio::Biblio (@args);
+ Returns : Bio::Biblio object on success, or undef on failure
+ Args    : This module recognizes and uses:
+
+             -access => 'soap'
+               It indicates what lower-level module to load.
+               Default is 'soap'.
+
+             -location => 'http://...'
+                It says where to find a bibliographic query service.
+                The format and contents of this argument is dependent
+                on the '-access' argument.
+
+                For 'soap' access it is a URL of a WebService.
+                Default is http://www.ebi.ac.uk/openbqs/services/MedlineSRS
+
+           Other arguments can be given here but they are
+           recognized by the lower-level module
+           (e.g. see Bio::DB::Biblio::soap).
+
+It builds, populates and returns a new I<Bio::Biblio> object. This is
+how it is seen from the outside. But in fact, it builds, populates and
+returns a more specific lower-level object, for example
+I<Bio::DB::Biblio::soap> object - which one it is depends on the
+parameter I<-access>.
+
+The real initialization is done in the method I<_initialize> of the
+lower-level object.
+
+This method can also be used for I<cloning> an existing object and
+changing or adding new attributes to it in the same time. This is,
+however, not particulary useful for the casual users of this module,
+because the query methods (see L<Bio::DB::BiblioI>) themselves
+already return cloned objects with more refined query
+collections. Anyway this is how the cloning can be done:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio;
+
+  # this will create a new object which will NOT send a 'destroy'
+  # message to the remote server when its life ends
+  my $clone = $biblio->new (-destroy-on-exit => '0'); 
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+  
+    # if $caller is an object, or if it is an underlying
+    # 'real-work-doing' class (e.g. Bio::DB::Biblio::soap) then
+    # we want to call SUPER to create and bless an object
+
+    if ($class =~ /Bio::DB::Biblio::(\S+)/) {
+	my ($self) = $class->SUPER::new (@args);
+
+	# now the $self is an empty object - we will populate it from
+	# the $caller - if $caller is an object
+
+	if (ref ($caller)) {
+	    %{ $self } = %{ $caller };
+	}
+
+	# and finally add values from '@args' into the newly created
+	# object (the values will overwrite the values copied above)
+
+	$self->_initialize (@args);
+	return $self;
+
+    # this is called only the first time when somebody calls: 'new
+    # Bio::Biblio (...)', and it actually loads a 'real-work-doing'
+    # module and call this new() method again (unless the loaded
+    # module has its own new() method)
+
+    } else { 
+	my %param = @args;
+	@param { map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $access =
+	    $param {'-access'} || 
+	    $class->_guess_access ( $param {'-location'} ) ||
+	    'soap';
+	$access = "\L$access";	# normalize capitalization to lower case
+
+	# load module with the real implementation - as defined in $access
+	return unless (&_load_access_module ($access));
+
+	# this will call this same method new() - but rather its the
+	# upper (object) branche
+	return "Bio::DB::Biblio::$access"->new (@args);
+    }
+}
+
+# -----------------------------------------------------------------------------
+
+=head2 _load_access_module
+
+ Usage   : $class->_load_access_module ($access)
+ Returns : 1 on success, undef on failure
+ Args    : 'access' should contain the last part of the
+           name of a module who does the real implementation
+
+It does (in run-time) a similar thing as
+
+   require Bio::DB::Biblio::$access
+
+It prints an error on STDERR if it fails to find and load the module
+(for example, because of the compilation errors in the module).
+
+=cut
+
+sub _load_access_module {
+  my ($access) = @_;
+  my ($module, $load, $m);
+
+  $module = "_<Bio/DB/Biblio/$access.pm";
+  $load = "Bio/DB/Biblio/$access.pm";
+
+  return 1 if $main::{$module};
+  eval {
+    require $load;
+  };
+
+  if ( $@ ) {
+    Bio::Root::Root->throw (<<END);
+$load: $access cannot be found or loaded
+Exception $@
+For more information about the Biblio system please see the Bio::Biblio docs.
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+# -----------------------------------------------------------------------------
+
+=head2 _guess_access
+
+ Usage   : $class->_guess_access ($location)
+ Returns : string with a guessed access protocol (e.g. 'soap')
+ Args    : 'location' defines where to find a bibliographic service
+           in a protocol-dependent manner (e.g. for SOAP it is
+           a URL of a bibliographic WebService)
+
+It makes an expert guess what kind of access/transport protocol should
+be used based on the I<location> of the service (e.g. if the
+I<location> looks like an IOR then the access protocol is probably
+CORBA).
+
+=cut
+
+# this is kept here for the future when more access protocols
+# (e.g. CORBA) may be available for accessing bibliographic query
+# services
+
+sub _guess_access {
+#   my ($class, $location) = @_;
+   return 'soap';
+}
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::Biblio::VERSION;
+           print $Bio::Biblio::Revision;
+
+=cut
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/ClusterFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/ClusterFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/ClusterFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,179 @@
+# $Id: ClusterFactory.pm,v 1.6.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::Cluster::ClusterFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::ClusterFactory - Instantiates a new Bio::ClusterI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Cluster::ClusterFactory;
+    # if you don't provide a default type, the factory will try
+    # some guesswork based on display_id and namespace
+    my $factory = new Bio::Cluster::ClusterFactory(-type => 'Bio::Cluster::UniGene');
+    my $clu = $factory->create_object(-description => 'NAT',
+                                      -display_id  => 'Hs.2');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::ClusterI> objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Cluster::ClusterFactory;
+use strict;
+
+use Bio::Root::Root;
+
+use base qw(Bio::Factory::ObjectFactory);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Cluster::ClusterFactory();
+ Function: Builds a new Bio::Cluster::ClusterFactory object 
+ Returns : Bio::Cluster::ClusterFactory
+ Args    : -type => string, name of a ClusterI derived class.
+                    If not provided, the factory will have to guess
+                    from ID and namespace, which may or may not be
+                    successful.
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    $self->interface("Bio::ClusterI");
+    $self->type($self->type) if $self->type;
+
+    return $self;
+}
+
+
+=head2 create_object
+
+ Title   : create_object
+ Usage   : my $seq = $factory->create_object(<named parameters>);
+ Function: Instantiates new Bio::ClusterI (or one of its child classes)
+
+           This object allows us to genericize the instantiation of
+           cluster objects.
+
+ Returns : L<Bio::ClusterI> compliant object
+           The return type is configurable using new(-type =>"...").
+ Args    : initialization parameters specific to the type of cluster
+           object we want.  Typically 
+           -display_id  => $name
+           -description => description of the cluster
+           -members     => arrayref, members of the cluster
+
+=cut
+
+sub create_object {
+   my ($self, at args) = @_;
+
+   my $type = $self->type();
+   if(! $type) {
+       # we need to guess this
+       $type = $self->_guess_type(@args);
+       $self->throw("No cluster type set and unable to guess.") unless $type;
+       $self->type($type);
+   }
+   return $type->new(-verbose => $self->verbose, @args);
+}
+
+=head2 _guess_type
+
+ Title   : _guess_type
+ Usage   :
+ Function: Guesses the right type of L<Bio::ClusterI> implementation
+           based on initialization parameters for the prospective
+           object.
+ Example :
+ Returns : the type (a string, the module name)
+ Args    : initialization parameters to be passed to the prospective
+           cluster object
+
+
+=cut
+
+sub _guess_type{
+    my ($self, at args) = @_;
+    my $type;
+
+    # we can only guess from a certain number of arguments
+    my ($dispid, $ns, $members) =
+	$self->_rearrange([qw(DISPLAY_ID
+			      NAMESPACE
+			      MEMBERS
+			      )], @args);
+    # Unigene namespace or ID?
+    if($ns && (lc($ns) eq "unigene")) {
+	$type = 'Bio::Cluster::UniGene';
+    } elsif($dispid && ($dispid =~ /^Hs\.[0-9]/)) {
+	$type = 'Bio::Cluster::UniGene';
+    }
+    # what else could we look for?
+    return $type;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/FamilyI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/FamilyI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/FamilyI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,172 @@
+#
+# BioPerl module for Bio::Cluster::FamilyI
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::FamilyI - Family Interface
+
+=head1 SYNOPSIS
+
+    # see the implementations of this interface for details
+
+    my $cluster= $cluster->new(-description=>"POLYUBIQUITIN",
+                               -members    =>[$seq1,$seq2]);
+    my @members = $cluster->get_members();
+    my @sub_members = $cluster->get_members(-species=>"homo sapiens");
+
+=head1 DESCRIPTION
+
+This interface if for a Family object representing a family of 
+biological objects. A generic implementation for this may be
+found a L<Bio::Cluster::Family>.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::Cluster::FamilyI;
+use strict;
+
+
+use base qw(Bio::ClusterI);
+
+=head2 new
+
+  We dont mandate but encourage implementors to support at least the
+  following named parameters upon object initialization.
+
+ Arguments          Description
+ ---------          -----------
+ -family_id         the name of the family
+ -description       the consensus description of the family
+ -annotation_score  the confidence by which the consensus description is 
+                    representative of the family
+ -members           the members belonging to the family
+ -alignment         the multiple alignment of the members
+
+=cut
+
+
+=head2 family_id
+
+ Title   : family_id
+ Usage   : Bio::Cluster::FamilyI->family_id("znfp");
+ Function: get/set for the family id 
+ Returns : the family id 
+ Args    : the family id
+
+=cut
+
+sub family_id{
+    shift->throw_not_implemented();
+}
+
+=head2 family_score
+
+ Title   : family_score
+ Usage   : Bio::Cluster::FamilyI->family_score(95);
+ Function: get/set for the score of algorithm used to generate
+           the family if present
+ Returns : the score
+ Args    : the score
+
+=cut
+
+sub family_score {
+    shift->throw_not_implemented();
+}
+
+
+=head1 Methods inherited from L<Bio::ClusterI>
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get the display name or identifier for the cluster
+ Returns : a string
+ Args    : 
+
+=cut
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Bio::Cluster::FamilyI->get_members();
+ Function: get the members of the family
+ Returns : the array of members
+ Args    : the array of members
+
+=cut
+
+=head2 description
+
+ Title   : description
+ Usage   : Bio::Cluster::FamilyI->description("Zinc Finger Protein");
+ Function: get/set for the description of the family
+ Returns : the description 
+ Args    : the description
+
+=cut
+
+
+=head2 size
+
+ Title   : size
+ Usage   : Bio::Cluster::FamilyI->size();
+ Function: get/set for the description of the family
+ Returns : size 
+ Args    : 
+
+=cut
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $cluster ->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+ Returns : a number
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/SequenceFamily.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/SequenceFamily.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/SequenceFamily.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,447 @@
+# $Id: SequenceFamily.pm,v 1.10.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::Cluster::SequenceFamily
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::SequenceFamily - Sequence Family object
+
+=head1 SYNOPSIS
+
+  use Bio::Cluster::SequenceFamily;
+
+  use Bio::SeqIO;
+  use Bio::Cluster::SequenceFamily;
+
+  my $file =  Bio::Root::IO->catfile('t','data','swiss.dat');
+  my $seqio= new Bio::SeqIO(-format => 'swiss',
+                            -file => $file);
+  my @mem;
+  while(my $seq = $seqio->next_seq){
+    push @mem, $seq;
+  }
+
+  #create the family
+  my $family = Bio::Cluster::SequenceFamily->new(
+          -family_id=>"Family_1",
+          -description=>"Family Description Here",
+          -annotation_score=>"100",
+          -members=>\@mem);
+
+  #access the family
+
+  foreach my $mem ($family->get_members){
+    print $mem->display_id."\t".$mem->desc."\n";
+  }
+
+  #select members if members have a Bio::Species Object
+
+  my @mem = $family->get_members(-binomial=>"Homo sapiens");
+  @mem = $family->get_members(-ncbi_taxid => 9606);
+  @mem = $family->get_members(-common_name=>"Human");
+  @mem = $family->get_members(-species=>"sapiens");
+  @mem = $family->get_members(-genus=>"Homo");
+
+=head1 DESCRIPTION
+
+This is a simple Family object that may hold any group of object. For more
+specific families, one should derive from FamilyI.
+
+=head1 FEEDBACK
+
+Email bioperl-l at bioperl.org for support and feedback.
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Cluster::SequenceFamily;
+
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Cluster::FamilyI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $family = Bio::Cluster::SequenceFamily->new(
+                             -family_id=>"Family_1",
+                             -description=>"Family Description Here",
+                             -annotation_score=>"100",
+                             -members=>\@mem);
+ Function: Constructor for SequenceFamily object
+ Returns : Bio::Cluster::SequenceFamily object
+
+See L<Bio::Cluster::SequenceFamily>.
+
+=cut
+
+sub new {
+	my ($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  my ($id,$description,$version,$annot_score,
+  $family_score,$members) = $self->_rearrange([qw(FAMILY_ID DESCRIPTION VERSION 
+                                                   ANNOTATION_SCORE 
+                                                   FAMILY_SCORE MEMBERS)], at args);
+  $self->{'_members'} = [];
+  $id && $self->family_id($id);
+  $description && $self->description($description);
+  $version && $self->version($version);
+  $annot_score && $self->annotation_score($annot_score);
+  $family_score && $self->family_score($family_score);
+  $members && $self->add_members($members);
+
+  return $self;
+
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $family->version("1.0");
+ Function: get/set for version
+ Returns : a string version of the family generated. 
+
+=cut
+
+sub version{
+  my ($self,$value) = @_;
+  if($value){
+    $self->{'_version'} =$value;
+  }
+  return $self->{'_version'};
+}
+
+=head2 annotation_score
+
+ Title   : annotation_score
+ Usage   : $family->annotation_score(100);
+ Function: get/set for annotation_score which
+           represent the confidence in which the 
+           consensus description has been assigned
+           to the family.
+ Returns : Bio::SimpleAlign
+
+See L<Bio::SimpleAlign>
+
+=cut
+
+sub annotation_score{
+  my ($self,$score) = @_;
+  if($score){
+    $self->{'_annotation_score'} = $score;
+  }
+  return $self->{'_annotation_score'};
+}
+
+=head2 alignment
+
+ Title   : alignment
+ Usage   : $family->alignment($align);
+ Function: get/set for an alignment object representing
+           the multiple alignment of the members of the family.
+ Returns : Bio::SimpleAlign
+
+See L<Bio::SimpleAlign>
+
+=cut
+
+sub alignment {
+	my ($self,$align) = @_;
+  if($align){
+    $self->{'_alignment'} = $align;
+  }
+    return $self->{'_alignment'};
+}
+
+=head2 tree
+
+ Title   : tree
+ Usage   : $family->tree($tree);
+ Function: get/set for an tree object representing
+           the phylogenetic tree of the family. 
+ Returns : Bio::Tree
+
+See L<Bio::Tree>
+
+=cut
+
+sub tree {
+  my ($self,$tree) = @_;
+  if($tree) {
+    $self->{'_tree'} = $tree;
+  }
+  return $self->{'_tree'};
+}
+
+=head1 L<Bio::Cluster::FamilyI> methods
+
+=cut
+
+=head2 family_score
+
+ Title   : family_score
+ Usage   : Bio::Cluster::FamilyI->family_score(95);
+ Function: get/set for the score of algorithm used to generate
+           the family if present
+
+           This is aliased to cluster_score().
+
+ Returns : the score
+ Args    : the score
+
+=cut
+
+sub family_score {
+    return shift->cluster_score(@_);
+}
+
+
+=head2 family_id
+
+ Title   : family_id
+ Usage   : $family->family_id("Family_1"); 
+ Function: get/set for family id 
+
+           This is aliased to display_id().
+
+ Returns : a string specifying identifier of the family 
+
+=cut
+
+sub family_id{
+	return shift->display_id(@_);
+}
+
+=head1 L<Bio::ClusterI> methods
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get/set the display name or identifier for the cluster
+ Returns : a string
+ Args    : optional, on set the display ID ( a string)
+
+=cut
+
+sub display_id{
+	my ($self,$id) = @_;
+	if($id){
+		$self->{'_cluster_id'} = $id;
+	}
+	return $self->{'_cluster_id'};
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $fam->description("POLYUBIQUITIN")
+ Function: get/set for the consensus description of the cluster
+ Returns : the description string 
+ Args    : Optional the description string 
+
+=cut
+
+sub description{
+	my ($self,$desc) = @_;
+	if($desc){
+		$self->{'_description'} = $desc;
+	}
+	return $self->{'_description'};
+}
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Valid criteria:
+           -common_name
+           -binomial
+           -ncbi_taxid
+           -organelle
+           -genus
+           $family->get_members(-common_name =>"human");
+           $family->get_members(-species     =>"homo sapiens");
+           $family->get_members(-ncbi_taxid  => 9606);
+           For now, multiple critieria are ORed.
+
+           Will return all members if no criteria are provided.
+
+ Function: get members using methods from L<Bio::Species>
+           the phylogenetic tree of the family.
+ Returns : an array of objects that are member of this family. 
+
+=cut
+
+sub get_members {
+	my $self = shift;
+	my @ret;
+
+	if(@_) {
+		my %hash = @_;
+		foreach my $mem ( @{$self->{'_members'}} ) {
+			foreach my $key ( keys %hash){
+				my $method = $key;
+				$method=~s/-//g;
+				if($mem->can('species')){
+					my $species = $mem->species;
+					$species->can($method) ||
+					  $self->throw("$method is an invalid criteria");
+					if($species->$method() eq $hash{$key} ){
+						push @ret, $mem;
+					}
+				}
+			}
+		}
+		return @ret;
+	}
+	return @{$self->{'_members'}};
+}
+
+=head2 size
+
+ Title   : size
+ Usage   : $fam->size();
+ Function: get/set for the size of the family, 
+           calculated from the number of members
+ Returns : the size of the family 
+ Args    : 
+
+=cut
+
+sub size {
+  my ($self) = @_;
+
+  return scalar(@{$self->{'_members'}});
+
+}
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $fam->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+ Returns : a number
+
+=cut
+
+sub cluster_score{
+  my ($self,$score) = @_;
+  if($score){
+    $self->{'_cluster_score'} = $score;
+  }
+  return $self->{'_cluster_score'};
+}
+
+
+=head1 Implementation specific methods
+
+  These are mostly for adding/removing/changing.
+
+=cut
+
+=head2 add_members
+
+ Title   : add_members
+ Usage   : $fam->add_member([$seq1,$seq1]);
+ Function: add members to a family
+ Returns : 
+ Args    : the member(s) to add, as an array or arrayref
+
+=cut
+
+sub add_members{
+    my ($self, at mems) = @_;
+
+    my $mem = shift(@mems);
+    if(ref($mem) eq "ARRAY"){
+	push @{$self->{'_members'}},@{$mem};
+    } else {
+	push @{$self->{'_members'}},$mem;
+    }
+    push @{$self->{'_members'}}, @mems;
+
+    return 1;
+}
+
+=head2 remove_members
+
+ Title   : remove_members
+ Usage   : $fam->remove_members();
+ Function: remove all members from a family 
+ Returns : the previous array of members
+ Args    : none
+
+=cut
+
+sub remove_members{
+    my ($self) =  @_;
+    my $mems = $self->{'_members'};
+    $self->{'_members'} = [];
+    return @$mems;
+}
+
+#####################################################################
+# aliases for naming consistency or other reasons                   #
+#####################################################################
+
+*flush_members = \&remove_members;
+*add_member = \&add_members;
+
+sub members{
+    my $self = shift;
+    if(@_) {
+	# this is in set mode
+	$self->warn("setting members() in ".ref($self)." is deprecated.\n".
+		    "Use add_members() instead.");
+	return $self->add_members(@_);
+    } else {
+	# get mode
+	$self->warn("members() in ".ref($self)." is deprecated.\n".
+		    "Use get_members() instead.");
+	return $self->get_members();
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1446 @@
+# $Id: UniGene.pm,v 1.38.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::Cluster::UniGene.pm
+#
+# Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://meg.otago.ac.nz/
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# April 17, 2002 - Initial implementation by Andrew Macgregor
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::UniGene - UniGene object
+
+=head1 SYNOPSIS
+
+	use Bio::Cluster::UniGene;
+	use Bio::ClusterIO;
+
+	$stream  = Bio::ClusterIO->new('-file' => "Hs.data", 
+                                       '-format' => "unigene");
+	# note: we quote -format to keep older perl's from complaining.
+
+	while ( my $in = $stream->next_cluster() ) {
+		print $in->unigene_id() . "\n";
+		while ( my $sequence = $in->next_seq() ) {
+			print $sequence->accession_number() . "\n";
+		}
+       }
+
+=head1 DESCRIPTION
+
+This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
+for the representation if UniGene clusters in Bioperl. It is returned
+by the L<Bio::ClusterIO> parser for unigene format and contains all
+the data associated with one UniGene record.
+
+This class implements several interfaces and hence can be used
+wherever instances of such interfaces are expected. In particular, the
+interfaces are L<Bio::ClusterI> as the base interface for all cluster
+representations, and in addition L<Bio::IdentifiableI> and
+L<Bio::DescribableI>.
+
+The following lists the UniGene specific methods that are available
+(see below for details). Be aware next_XXX iterators take a snapshot
+of the array property when they are first called, and this snapshot is
+not reset until the iterator is exhausted. Hence, once called you need
+to exhaust the iterator to see any changes that have been made to the
+property in the meantime. You will usually want to use the
+non-iterator equivalents and loop over the elements yourself.
+
+new() - standard new call
+
+unigene_id() - set/get unigene_id
+
+title() - set/get title (description)
+
+gene() - set/get gene
+
+cytoband() - set/get cytoband
+
+mgi() - set/get mgi
+
+locuslink() - set/get locuslink
+
+homol() - set/get homologene
+
+gnm_terminus() - set/get gnm_terminus
+
+scount() - set/get scount
+
+express() - set/get express, currently takes/returns a reference to an
+array of expressed tissues
+
+next_express() - returns the next tissue expression from the expressed
+tissue array
+
+chromosome() - set/get chromosome, currently takes/returns a reference
+to an array of chromosome lines
+
+next_chromosome() - returns the next chromosome line from the array of
+chromosome lines
+
+sts() - set/get sts, currently takes/returns a reference to an array
+of sts lines
+
+next_sts() - returns the next sts line from the array of sts lines
+
+txmap() - set/get txmap, currently takes/returns a reference to an
+array of txmap lines
+
+next_txmap() - returns the next txmap line from the array of txmap
+lines
+
+protsim() - set/get protsim, currently takes/returns a reference to an
+array of protsim lines
+
+next_protsim() - returns the next protsim line from the array of
+protsim lines
+
+sequences() - set/get sequence, currently takes/returns a reference to
+an array of references to seq info
+
+next_seq() - returns a Seq object that currently only contains an
+accession number
+
+
+=head1 Implemented Interfaces
+
+This class implementes the following interfaces.
+
+=over 4
+
+=item Bio::Cluster::UniGeneI
+
+This includes implementing Bio::ClusterI.
+
+=item Bio::IdentifiableI
+
+=item Bio::DescribableI
+
+=item Bio::AnnotatableI
+
+=item Bio::Factory::SequenceStreamI
+
+=back
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Andrew Macgregor
+
+Email andrew at cbbc.murdoch.edu.au
+
+=head1 CONTRIBUTORS
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Cluster::UniGene;
+use strict;
+
+use Bio::Annotation::Collection;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::SimpleValue;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
+
+my %species_map = (
+		   'Aga' => "Anopheles gambiae",
+		   'Ame' => "Apis mellifera",
+		   'At'  => "Arabidopsis thaliana",
+		   'Bmo' => "Bombyx mori",
+		   'Bt'  => "Bos taurus",
+		   'Cel' => "Caenorhabditis elegans",
+		   'Cfa' => "Canine familiaris",
+		   'Cin' => "Ciona intestinalis",
+		   'Cre' => "Chlamydomonas reinhardtii",
+		   'Csa' => "Ciona savignyi",
+		   'Csi' => "Citrus sinensis",
+		   'Ddi' => "Dictyostelium discoideum",
+		   'Dr'  => "Danio rerio",
+		   'Dm'  => "Drosophila melanogaster",
+		   'Gga' => "Gallus gallus",
+		   'Gma' => "Glycine max",
+		   'Han' => "Helianthus annus",
+		   'Hs'  => "Homo sapiens",
+		   'Hma' => "Hydra magnipapillata",
+		   'Hv'  => "Hordeum vulgare",
+		   'Lco' => "Lotus corniculatus",
+		   'Les' => "Lycopersicon esculentum",
+		   'Lsa' => "Lactuca sativa",
+		   'Mdo' => "Malus x domestica",
+                   'Mgr' => "Magnaporthe grisea",
+		   'Mm'  => "Mus musculus",
+		   'Mtr' => "Medicago truncatula",
+                   'Ncr' => "Neurospora crassa",
+		   'Oar' => "Ovis aries",
+		   'Omy' => "Oncorhynchus mykiss",
+		   'Os'  => "Oryza sativa",
+		   'Ola' => "Oryzias latipes",
+		   'Ppa' => "Physcomitrella patens",
+		   'Pta' => "Pinus taeda",
+		   'Ptp' => "Populus tremula x Populus tremuloides",
+		   'Rn'  => "Rattus norvegicus",
+		   'Sbi' => "Sorghum bicolor",
+		   'Sma' => "Schistosoma mansoni",
+		   'Sof' => "Saccharum officinarum",
+		   'Spu' => "Strongylocentrotus purpuratus",
+		   'Ssa' => "Salmo salar",
+		   'Ssc' => "Sus scrofa",
+		   'Str' => "Xenopus tropicalis",
+		   'Stu' => "Solanum tuberosum",
+		   'Ta'  => "Triticum aestivum",
+		   'Tgo' => "Toxoplasma gondii",
+                   'Tru' => "Takifugu rubripes",
+		   'Vvi' => "Vitis vinifera",
+		   'Xl'  => "Xenopus laevis",
+		   'Zm'  => "Zea mays",
+		   );
+
+
+=head2 new
+
+ Title   : new
+ Usage   : used by ClusterIO
+ Returns : a new Bio::Cluster::Unigene object
+
+=cut
+
+sub new {
+    # standard new call..
+    my($caller, at args) = @_;
+    my $self = $caller->SUPER::new(@args);
+
+    my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
+	$self->_rearrange([qw(UNIGENE_ID
+			      DESCRIPTION
+			      MEMBERS
+			      SIZE
+			      SPECIES
+			      DISPLAY_ID
+			      OBJECT_ID
+			      NAMESPACE
+			      AUTHORITY
+			      VERSION
+			      SEQFACTORY
+			      )], @args);
+
+    $self->{'_alphabet'} = 'dna';
+
+    $self->unigene_id($ugid) if $ugid;
+    $self->description($desc) if $desc;
+    $self->sequences($mems) if $mems;
+    $self->size($size) if defined($size);
+    $self->display_id($dispid) if $dispid; # overwrites ugid
+    $self->object_id($id) if $id;          # overwrites dispid
+    $self->namespace($ns || 'UniGene');
+    $self->authority($auth || 'NCBI');
+    $self->version($v) if defined($v);
+    if( ! defined $seqfact ) {
+	$seqfact = new Bio::Seq::SeqFactory
+	    (-verbose => $self->verbose(), 
+	     -type => 'Bio::Seq::RichSeq');
+    }
+    $self->sequence_factory($seqfact);
+    if( (! $species) && (defined $self->unigene_id() && 
+			 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
+	# try set a default one depending on the ID
+	$species = $species_map{$1};
+    }
+    $self->species($species);
+    return $self;
+}
+
+
+=head1 L<Bio::Cluster::UniGeneI> methods
+
+=cut
+
+=head2 unigene_id
+
+ Title   : unigene_id
+ Usage   : unigene_id();
+ Function: Returns the unigene_id associated with the object.
+ Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
+ Returns : A string
+ Args    : None or an id
+
+
+=cut
+
+sub unigene_id {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+		$obj->{'unigene_id'} = $value;
+	}
+	return $obj->{'unigene_id'};
+}
+
+
+
+=head2 title
+
+ Title   : title
+ Usage   : title();
+ Function: Returns the title associated with the object.
+ Example : $title = $unigene->title or $unigene->title($title)
+ Returns : A string
+ Args    : None or a title
+
+
+=cut
+
+sub title {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+		$obj->{'title'} = $value;
+	}
+	return $obj->{'title'};
+}
+
+
+=head2 gene
+
+ Title   : gene
+ Usage   : gene();
+ Function: Returns the gene associated with the object.
+ Example : $gene = $unigene->gene or $unigene->gene($gene)
+ Returns : A string
+ Args    : None or a gene
+
+
+=cut
+
+sub gene {
+    my $self = shift;
+    return $self->_annotation_value('gene_name', @_);
+}
+
+
+=head2 cytoband
+
+ Title   : cytoband
+ Usage   : cytoband();
+ Function: Returns the cytoband associated with the object.
+ Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
+ Returns : A string
+ Args    : None or a cytoband
+
+
+=cut
+
+sub cytoband {
+    my $self = shift;
+    return $self->_annotation_value('cyto_band', @_);
+}
+
+=head2 mgi
+
+ Title   : mgi
+ Usage   : mgi();
+ Function: Returns the mgi associated with the object.
+ Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
+ Returns : A string
+ Args    : None or a mgi
+
+
+=cut
+
+sub mgi {
+    my $self = shift;
+    my $acc;
+
+    if(@_) {
+	# purge first
+	$self->_remove_dblink('dblink','MGI');
+	# then add if a valid value is present
+	if($acc = shift) {
+	    $self->_annotation_dblink('dblink','MGI',$acc);
+	}
+    } else {
+	($acc) = $self->_annotation_dblink('dblink','MGI');
+    }
+    return $acc;
+}
+
+
+=head2 locuslink
+
+ Title   : locuslink
+ Usage   : locuslink();
+ Function: Returns or stores a reference to an array containing locuslink data.
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub locuslink {
+    my ($self,$ll) = @_;
+    
+    if($ll) {
+	# purge first
+	$self->_remove_dblink('dblink','LocusLink');
+	# then add as many accessions as are present
+	foreach my $acc (@$ll) {
+	    $self->_annotation_dblink('dblink','LocusLink',$acc);
+	}
+    } else {
+	my @accs = $self->_annotation_dblink('dblink','LocusLink');
+	$ll = [@accs];
+    }
+    return $ll;
+}
+
+
+=head2 homol
+
+ Title   : homol
+ Usage   : homol();
+ Function: Returns the homol entry associated with the object.
+ Example : $homol = $unigene->homol or $unigene->homol($homol)
+ Returns : A string
+ Args    : None or a homol entry
+
+=cut
+
+sub homol {
+    my $self = shift;
+    return $self->_annotation_value('homol', @_);
+}
+
+
+=head2 restr_expr
+
+ Title   : restr_expr
+ Usage   : restr_expr();
+ Function: Returns the restr_expr entry associated with the object.
+ Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
+ Returns : A string
+ Args    : None or a restr_expr entry
+
+=cut
+
+sub restr_expr {
+    my $self = shift;
+    return $self->_annotation_value('restr_expr', @_);
+}
+
+
+=head2 gnm_terminus
+
+ Title   : gnm_terminus
+ Usage   : gnm_terminus();
+ Function: Returns the gnm_terminus associated with the object.
+ Example : $gnm_terminus = $unigene->gnm_terminus or 
+           $unigene->gnm_terminus($gnm_terminus)
+ Returns : A string
+ Args    : None or a gnm_terminus
+
+=cut
+
+sub gnm_terminus {
+    my $self = shift;
+    return $self->_annotation_value('gnm_terminus', @_);
+}
+
+=head2 scount
+
+ Title   : scount
+ Usage   : scount();
+ Function: Returns the scount associated with the object.
+ Example : $scount = $unigene->scount or $unigene->scount($scount)
+ Returns : A string
+ Args    : None or a scount
+
+=cut
+
+sub scount {
+	my ($obj,$value) = @_;
+	if( defined $value) {
+	    $obj->{'scount'} = $value;
+	} elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
+	    $obj->{'scount'} = $obj->size();
+	}
+	return $obj->{'scount'};
+}
+
+
+=head2 express
+
+ Title   : express
+ Usage   : express();
+ Function: Returns or stores a reference to an array containing 
+           tissue expression data
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub express {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('expressed', at _);
+}
+
+
+=head2 chromosome
+
+ Title   : chromosome
+ Usage   : chromosome();
+ Function: Returns or stores a reference to an array containing
+           chromosome lines
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub chromosome {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('chromosome', at _);
+ }
+
+
+=head2 sts
+
+ Title   : sts
+ Usage   : sts();
+ Function: Returns or stores a reference to an array containing sts lines
+
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub sts {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('sts', at _);
+}
+
+
+=head2 txmap
+
+ Title   : txmap
+ Usage   : txmap();
+ Function: Returns or stores a reference to an array containing txmap lines
+
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub txmap {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('txmap', at _);
+}
+
+
+=head2 protsim
+
+ Title   : protsim
+ Usage   : protsim();
+ Function: Returns or stores a reference to an array containing protsim lines
+	   This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub protsim {
+    my $self = shift;
+
+    return $self->_annotation_value_ary('protsim', at _);
+}
+
+
+=head2 sequences
+
+ Title   : sequences
+ Usage   : sequences();
+ Function: Returns or stores a reference to an array containing
+           sequence data.
+
+           This is mostly reserved for ClusterIO parsers. You should
+           use get_members() for get and add_member()/remove_members()
+           for set.
+
+ Returns : An array reference, or undef
+ Args    : None or an array reference or undef
+
+=cut
+
+sub sequences {
+    my $self = shift;
+
+    return $self->{'members'} = shift if @_;
+    return $self->{'members'};
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj->species($newval)
+ Function: Get/set the species object for this Unigene cluster.
+ Example : 
+ Returns : value of species (a L<Bio::Species> object)
+ Args    : on set, new value (a L<Bio::Species> object or 
+           the binomial name, or undef, optional)
+
+
+=cut
+
+sub species{
+    my $self = shift;
+
+    if(@_) {
+	my $species = shift;
+	if($species && (! ref($species))) {
+	    my @class = reverse(split(' ',$species));
+	    $species = Bio::Species->new(-classification => \@class);
+	}
+	return $self->{'species'} = $species;
+    }
+    return $self->{'species'};
+}
+
+
+=head1 L<Bio::ClusterI> methods
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get/set the display name or identifier for the cluster
+
+           This is aliased to unigene_id().
+
+ Returns : a string
+ Args    : optional, on set the display ID ( a string)
+
+=cut
+
+sub display_id{
+    return shift->unigene_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : Bio::ClusterI->description("POLYUBIQUITIN")
+ Function: get/set for the consensus description of the cluster
+
+           This is aliased to title().
+
+ Returns : the description string 
+ Args    : Optional the description string 
+
+=cut
+
+sub description{
+    return shift->title(@_);
+}
+
+=head2 size
+
+ Title   : size
+ Usage   : Bio::ClusterI->size();
+ Function: get for the size of the family, 
+           calculated from the number of members
+
+           This is aliased to scount().
+
+ Returns : the size of the cluster
+ Args    : 
+
+=cut
+
+sub size {
+    my $self = shift;
+
+    # hard-wiring the size is allowed if there are no sequences
+    return $self->scount(@_) unless defined($self->sequences());
+    # but we can't change the number of members through this method
+    my $n = scalar(@{$self->sequences()});
+    if(@_ && ($n != $_[0])) {
+	$self->throw("Cannot change cluster size using size() from $n to ".
+		     $_[0]);
+    }
+    return $n;
+}
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $cluster ->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+
+           For UniGene clusters, there really is no cluster score that
+           would come with the data. However, we provide an
+           implementation here so that you can score UniGene clusters
+           if you want to.
+
+ Returns : a number
+ Args    : optionally, on set a number
+
+=cut
+
+sub cluster_score{
+    my $self = shift;
+
+    return $self->{'cluster_score'} = shift if @_;
+    return $self->{'cluster_score'};
+}
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Bio::ClusterI->get_members(($seq1, $seq2));
+ Function: retrieve the members of the family by some criteria
+
+           Will return all members if no criteria are provided.
+
+           At this time this implementation does not support
+           specifying criteria and will always return all members.
+
+ Returns : the array of members
+ Args    : 
+
+=cut
+
+sub get_members {
+    my $self = shift;
+
+    my $mems = $self->sequences() || [];
+    # already objects?
+    if(@$mems && (ref($mems->[0]) eq "HASH")) {
+	# nope, we need to build the object list from scratch
+	my @memlist = ();
+	while(my $seq = $self->next_seq()) {
+	    push(@memlist, $seq);
+	}
+	# we cache this array of objects as the new member list
+	$mems = \@memlist;
+	$self->sequences($mems);
+    }
+    # done
+    return @$mems;
+}
+
+
+=head1 Annotatable view at the object properties
+
+=cut
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($newval)
+ Function: Get/set the L<Bio::AnnotationCollectionI> object for
+           this UniGene cluster.
+
+           Many attributes of this class are actually stored within
+           the annotation collection object as L<Bio::AnnotationI>
+           compliant objects, so you can conveniently access them
+           through the same interface as you would e.g. access
+           L<Bio::SeqI> annotation properties.
+
+           If you call this method in set mode and replace the
+           annotation collection with another one you should know
+           exactly what you are doing.
+
+ Example : 
+ Returns : a L<Bio::AnnotationCollectionI> compliant object
+ Args    : on set, new value (a L<Bio::AnnotationCollectionI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub annotation{
+    my $self = shift;
+
+    if(@_) {
+	return $self->{'annotation'} = shift;
+    } elsif(! exists($self->{'annotation'})) {
+	$self->{'annotation'} = Bio::Annotation::Collection->new();
+    }
+    return $self->{'annotation'};
+}
+
+
+=head1 Implementation specific methods
+
+ These are mostly for adding/removing to array properties, and for
+ methods with special functionality.
+
+=cut
+
+=head2 add_member
+
+ Title   : add_member
+ Usage   :
+ Function: Adds a member object to the list of members.
+ Example :
+ Returns : TRUE if the new member was successfuly added, and FALSE
+           otherwise.
+ Args    : The member to add.
+
+
+=cut
+
+sub add_member{
+    my ($self, at mems) = @_;
+
+    my $memlist = $self->{'members'} || [];
+    # this is an object interface; is the member list already objects?
+    if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
+	# nope, convert to objects
+        $memlist = [$self->get_members()];
+    }
+    # add new member(s)
+    push(@$memlist, @mems);
+    # store if we created this array ref ourselves
+    $self->sequences($memlist);
+    # done
+    return 1;
+}
+
+=head2 remove_members
+
+ Title   : remove_members
+ Usage   :
+ Function: Remove the list of members for this cluster such that the
+           member list is undefined afterwards (as opposed to zero members).
+ Example :
+ Returns : the previous list of members
+ Args    : none
+
+
+=cut
+
+sub remove_members{
+    my $self = shift;
+
+    my @mems = $self->get_members();
+    $self->sequences(undef);
+    return @mems;
+}
+
+
+=head2 next_locuslink
+
+ Title   : next_locuslink
+ Usage   : next_locuslink();
+ Function: Returns the next locuslink from an array referred 
+           to using $obj->{'locuslink'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $locuslink = $in->next_locuslink() ) {
+				print "$locuslink\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_locuslink {
+    my ($obj) = @_;
+
+    return $obj->_next_element("ll","locuslink");
+}
+
+=head2 next_express
+
+ Title   : next_express
+ Usage   : next_express();
+ Function: Returns the next tissue from an array referred 
+           to using $obj->{'express'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $express = $in->next_express() ) {
+				print "$express\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_express {
+    my ($obj) = @_;
+
+    return $obj->_next_element("express","express");
+}
+
+
+=head2 next_chromosome
+
+ Title   : next_chromosome
+ Usage   : next_chromosome();
+ Function: Returns the next chromosome line from an array referred
+           to using $obj->{'chromosome'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $chromosome = $in->next_chromosome() ) {
+				print "$chromosome\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_chromosome {
+    my ($obj) = @_;
+
+    return $obj->_next_element("chr","chromosome");
+}
+
+
+=head2 next_protsim
+
+ Title   : next_protsim
+ Usage   : next_protsim();
+ Function: Returns the next protsim line from an array referred 
+           to using $obj->{'protsim'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $protsim = $in->next_protsim() ) {
+				print "$protsim\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_protsim {
+    my ($obj) = @_;
+
+    return $obj->_next_element("protsim","protsim");
+}
+
+
+=head2 next_sts
+
+ Title   : next_sts
+ Usage   : next_sts();
+ Function: Returns the next sts line from an array referred 
+           to using $obj->{'sts'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $sts = $in->next_sts() ) {
+				print "$sts\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_sts {
+    my ($obj) = @_;
+
+    return $obj->_next_element("sts","sts");
+}
+
+
+=head2 next_txmap
+
+ Title   : next_txmap
+ Usage   : next_txmap();
+ Function: Returns the next txmap line from an array 
+           referred to using $obj->{'txmap'}
+
+           If you call this iterator again after it returned undef, it
+           will re-cycle through the list of elements. Changes in the
+           underlying array property while you loop over this iterator
+           will not be reflected until you exhaust the iterator.
+
+ Example : 	while ( my $tsmap = $in->next_txmap() ) {
+				print "$txmap\n";
+			}
+ Returns : String
+ Args    : None
+
+=cut
+
+sub next_txmap {
+    my ($obj) = @_;
+
+    return $obj->_next_element("txmap","txmap");
+}
+
+###############################
+# private method
+#
+# args: prefix name for the queue
+#       name of the method from which to re-fill
+# returns: the next element from that queue, or undef if the queue is empty
+###############################
+sub _next_element{
+    my ($self,$queuename,$meth) = @_;
+
+    $queuename = "_".$queuename."_queue";
+    if(! exists($self->{$queuename})) {
+	# re-initialize from array of sequence data
+	$self->{$queuename} = [@{$self->$meth() }];
+    }
+    my $queue = $self->{$queuename};
+    # is queue exhausted (equivalent to end of stream)?
+    if(! @$queue) {
+	# yes, remove queue and signal to the caller
+	delete $self->{$queuename};
+	return;
+    }
+    return shift(@$queue);
+}
+
+=head1 L<Bio::IdentifiableI> methods
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to unigene_id().
+
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->unigene_id(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+           Unigene clusters usually won't have a version, so this
+           will be mostly undefined.
+
+ Returns : A number
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub version {
+    my $self = shift;
+
+    return $self->{'version'} = shift if @_;
+    return $self->{'version'};
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for  
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub authority {
+    my $self = shift;
+
+    return $self->{'authority'} = shift if @_;
+    return $self->{'authority'};
+}
+
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection 
+
+ Returns : A scalar
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub namespace {
+    my $self = shift;
+
+    return $self->{'namespace'} = shift if @_;
+    return $self->{'namespace'};
+}
+
+=head1 L<Bio::DescribableI> methods
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking 
+           this is a good idea)
+
+           This is aliased to unigene_id().
+
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub display_name {
+    return shift->unigene_id(@_);
+}
+
+
+=head2 description()
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a 
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+
+           This is already demanded by Bio::ClusterI and hence is
+           present anyway.
+
+ Returns : A scalar
+
+
+=cut
+
+
+=head1 L<Bio::Factory::SequenceStreamI> methods
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : next_seq();
+ Function: Returns the next seq as a Seq object as defined by 
+           $seq->sequence_factory(), 
+           at present an empty Bio::Seq::RichSeq object with 
+           just the accession_number() and pid() set
+
+           This iterator will not exhaust the array of member
+           sequences. If you call next_seq() again after it returned
+           undef, it will re-cycle through the list of member
+           sequences.
+
+ Example :  while ( my $sequence = $in->next_seq() ) {
+             print $sequence->accession_number() . "\n";
+	    }
+ Returns : Bio::PrimarySeqI object
+ Args    : None
+
+=cut
+
+sub next_seq {
+    my ($obj) = @_;
+
+    if(! exists($obj->{'_seq_queue'})) {
+	# re-initialize from array of sequence data
+	$obj->{'_seq_queue'} = [@{$obj->sequences()}];
+    }
+    my $queue = $obj->{'_seq_queue'};
+    # is queue exhausted (equivalent to end of stream)?
+    if(! @$queue) {
+	# yes, remove queue and signal to the caller
+	delete $obj->{'_seq_queue'};
+	return;
+    }
+    # no, still data in the queue: get the next one from the queue
+    my $seq_h = shift(@$queue);
+    # if this is not a simple hash ref, it's an object already, and we'll
+    # return just that
+    return $seq_h if(ref($seq_h) ne 'HASH');
+    # nope, we need to assemble this object from scratch
+    #
+    # assemble the annotation collection
+    my $ac = Bio::Annotation::Collection->new();
+    foreach my $k (keys %$seq_h) {
+	next if $k =~ /acc|pid|nid|version/;
+	my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
+						    -value   => $seq_h->{$k});
+	$ac->add_Annotation($ann);
+    }
+    # assemble the initialization parameters and create object
+    my $seqobj = $obj->sequence_factory->create(
+	  -accession_number => $seq_h->{acc},
+	  -pid              => $seq_h->{pid},
+	  # why does NCBI prepend a 'g' to its own identifiers??
+	  -primary_id       => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
+				     substr($seq_h->{nid},1) : $seq_h->{nid},
+	  -display_id       => $seq_h->{acc},
+	  -seq_version	    => $seq_h->{version},
+	  -alphabet         => $obj->{'_alphabet'},
+	  -namespace        => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
+	  -authority        => $obj->authority(), # default is NCBI
+	  -species          => $obj->species(),
+	  -annotation       => $ac
+	  );
+    return $seqobj;
+}
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get/Set the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : [optional] Bio::Factory::SequenceFactoryI
+
+
+=cut
+
+sub sequence_factory {
+    my ($self,$obj) = @_;   
+    if( defined $obj ) {
+	if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
+	    $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
+	}
+	$self->{'_seqfactory'} = $obj;
+    }
+    $self->{'_seqfactory'};
+}
+
+=head1 Private methods
+
+=cut
+
+=head2 _annotation_value
+
+ Title   : _annotation_value
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : the value (a string)
+ Args    : annotation key (a string)
+           on set, annotation value (a string)
+
+
+=cut
+
+sub _annotation_value{
+    my $self = shift;
+    my $key = shift;
+
+    my ($ann, $val);
+    if(@_) {
+	$val = shift;
+	if(! defined($val)) {
+	    ($ann) = $self->annotation->remove_Annotations($key);
+	    return $ann ? $ann->value() : undef;
+	}
+    }
+    ($ann) = $self->annotation->get_Annotations($key);
+    if($ann && (! $val)) {
+	# get mode and exists
+	$val = $ann->value();
+    } elsif($val) {
+	# set mode
+	if(! $ann) {
+	    $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
+	    $self->annotation->add_Annotation($ann);
+	}
+	$ann->value($val);
+    }
+    return $val;
+}
+
+
+=head2 _annotation_value_ary
+
+ Title   : _annotation_value_ary
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : reference to the array of values
+ Args    : annotation key (a string)
+           on set, reference to an array holding the values
+
+
+=cut
+
+sub _annotation_value_ary{
+    my ($self,$key,$arr) = @_;
+
+    my $ac = $self->annotation;
+    if($arr) {
+	# purge first
+	$ac->remove_Annotations($key);
+	# then add as many values as are present
+	foreach my $val (@$arr) {
+	    my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
+							-tagname => $key
+							);
+	    $ac->add_Annotation($ann);
+	}
+    } else {
+	my @vals = map { $_->value(); } $ac->get_Annotations($key);
+	$arr = [@vals];
+    }
+    return $arr;
+}
+
+
+=head2 _annotation_dblink
+
+ Title   : _annotation_dblink
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : array of accessions for the given database (namespace)
+ Args    : annotation key (a string)
+           dbname (a string) (optional on get, mandatory on set)
+           on set, accession or ID (a string), and version
+
+
+=cut
+
+sub _annotation_dblink{
+    my ($self,$key,$dbname,$acc,$version) = @_;
+
+    if($acc) {
+	# set mode -- this is adding here
+	my $ann = Bio::Annotation::DBLink->new(-tagname    => $key,
+					       -primary_id => $acc,
+					       -database   => $dbname,
+					       -version    => $version);
+	$self->annotation->add_Annotation($ann);
+	return 1;
+    } else {
+	# get mode
+	my @anns = $self->annotation->get_Annotations($key);
+	# filter out those that don't match the requested database
+	if($dbname) {
+	    @anns = grep { $_->database() eq $dbname; } @anns;
+	}
+	return map { $_->primary_id(); } @anns;
+    }
+}
+
+=head2 _remove_dblink
+
+ Title   : _remove_dblink
+ Usage   :
+ Function: Private method.
+ Example :
+ Returns : array of accessions for the given database (namespace)
+ Args    : annotation key (a string)
+           dbname (a string) (optional)
+
+
+=cut
+
+sub _remove_dblink{
+    my ($self,$key,$dbname) = @_;
+
+    my $ac = $self->annotation();
+    my @anns = ();
+    if($dbname) {
+	foreach my $ann ($ac->remove_Annotations($key)) {
+	    if($ann->database() eq $dbname) {
+		push(@anns, $ann);
+	    } else {
+		$ac->add_Annotation($ann);
+	    }
+	}
+    } else {
+	@anns = $ac->remove_Annotations($key);
+    }
+    return map { $_->primary_id(); } @anns;
+}
+
+
+#####################################################################
+# aliases for naming consistency or other reasons                   #
+#####################################################################
+
+*sequence = \&sequences;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGeneI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGeneI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Cluster/UniGeneI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,437 @@
+# $Id: UniGeneI.pm,v 1.16.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::Cluster::UniGeneI.pm
+#
+# Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://anatomy.otago.ac.nz/meg
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# April 31, 2002 - Initial implementation by Andrew Macgregor
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Cluster::UniGeneI - abstract interface of UniGene object
+
+=head1 SYNOPSIS
+
+  #
+
+=head1 DESCRIPTION
+
+This is the general interface for a UniGene cluster representation in
+Bioperl. You cannot use this module directly, use an implementation
+instead.
+
+You can create UniGene cluster objects yourself by instantiating
+L<Bio::Cluster::UniGene>. If you read UniGene clusters from a
+ClusterIO parser, you will get objects implementing this interface,
+most likely instances of said UniGene class.
+
+L<Bio::Cluster::UniGeneI> inherits from L<Bio::ClusterI>, so you can
+use it wherever a cluster object is expected.
+
+=head1 FEEDBACK
+
+  #
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Andrew Macgregor
+
+Email andrew at cbbc.murdoch.edu.au
+
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Cluster::UniGeneI;
+use strict;
+
+
+use base qw(Bio::ClusterI);
+
+
+=head2 unigene_id
+
+ Title   : unigene_id
+ Usage   : unigene_id();
+ Function: Returns the unigene_id associated with the object.
+ Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
+ Returns : A string
+ Args    : None or an id
+
+
+=cut
+
+sub unigene_id {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+
+=head2 title
+
+ Title   : title
+ Usage   : title();
+ Function: Returns the title associated with the object.
+ Example : $title = $unigene->title or $unigene->title($title)
+ Returns : A string
+ Args    : None or a title
+
+
+=cut
+
+sub title {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 gene
+
+ Title   : gene
+ Usage   : gene();
+ Function: Returns the gene associated with the object.
+ Example : $gene = $unigene->gene or $unigene->gene($gene)
+ Returns : A string
+ Args    : None or a gene
+
+
+=cut
+
+sub gene {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 cytoband
+
+ Title   : cytoband
+ Usage   : cytoband();
+ Function: Returns the cytoband associated with the object.
+ Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
+ Returns : A string
+ Args    : None or a cytoband
+
+
+=cut
+
+sub cytoband {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 mgi
+
+ Title   : mgi
+ Usage   : mgi();
+ Function: Returns the mgi associated with the object.
+ Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
+ Returns : A string
+ Args    : None or a mgi
+
+
+=cut
+
+sub mgi {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 locuslink
+
+ Title   : locuslink
+ Usage   : locuslink();
+ Function: Returns or stores a reference to an array containing locuslink data.
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub locuslink {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 homol
+
+ Title   : homol
+ Usage   : homol();
+ Function: Returns the homol entry associated with the object.
+ Example : $homol = $unigene->homol or $unigene->homol($homol)
+ Returns : A string
+ Args    : None or a homol entry
+
+=cut
+
+sub homol {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 restr_expr
+
+ Title   : restr_expr
+ Usage   : restr_expr();
+ Function: Returns the restr_expr entry associated with the object.
+ Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
+ Returns : A string
+ Args    : None or a restr_expr entry
+
+=cut
+
+sub restr_expr {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 gnm_terminus
+
+ Title   : gnm_terminus
+ Usage   : gnm_terminus();
+ Function: Returns the gnm_terminus associated with the object.
+ Example : $gnm_terminus = $unigene->gnm_terminus or $unigene->gnm_terminus($gnm_terminus)
+ Returns : A string
+ Args    : None or a gnm_terminus
+
+=cut
+
+sub gnm_terminus {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 scount
+
+ Title   : scount
+ Usage   : scount();
+ Function: Returns the scount associated with the object.
+ Example : $scount = $unigene->scount or $unigene->scount($scount)
+ Returns : A string
+ Args    : None or a scount
+
+=cut
+
+sub scount {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+
+=head2 express
+
+ Title   : express
+ Usage   : express();
+ Function: Returns or stores a reference to an array containing tissue expression data.
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub express {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 chromosome
+
+ Title   : chromosome
+ Usage   : chromosome();
+ Function: Returns or stores a reference to an array containing chromosome lines
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub chromosome {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 sts
+
+ Title   : sts
+ Usage   : sts();
+ Function: Returns or stores a reference to an array containing sts lines
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub sts {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 txmap
+
+ Title   : txmap
+ Usage   : txmap();
+ Function: Returns or stores a reference to an array containing txmap lines
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub txmap {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 protsim
+
+ Title   : protsim
+ Usage   : protsim();
+ Function: Returns or stores a reference to an array containing protsim lines
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub protsim {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 sequence
+
+ Title   : sequence
+ Usage   : sequence();
+ Function: Returns or stores a reference to an array containing sequence data
+           This should really only be used by ClusterIO, not directly
+ Returns : An array reference
+ Args    : None or an array reference
+
+=cut
+
+sub sequence {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj->species($newval)
+ Function: Get the species object for this Unigene cluster.
+ Example : 
+ Returns : value of species (a L<Bio::Species> object)
+ Args    : 
+
+
+=cut
+
+sub species{
+    shift->throw_not_implemented();
+}
+
+=head1 Methods inherited from L<Bio::ClusterI>
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get/set the display name or identifier for the cluster
+ Returns : a string
+ Args    : optional, on set the display ID ( a string)
+
+=cut
+
+=head2 description
+
+ Title   : description
+ Usage   : Bio::ClusterI->description("POLYUBIQUITIN")
+ Function: get/set for the consensus description of the cluster
+ Returns : the description string 
+ Args    : Optional the description string 
+
+=cut
+
+=head2 size
+
+ Title   : size
+ Usage   : Bio::ClusterI->size();
+ Function: get/set for the size of the family, 
+           calculated from the number of members
+ Returns : the size of the family 
+ Args    : 
+
+=cut
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $cluster ->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+ Returns : a number
+
+=cut
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Bio::ClusterI->get_members(($seq1, $seq2));
+ Function: retrieve the members of the family by some criteria, for
+           example :
+           $cluster->get_members(-species => 'homo sapiens'); 
+
+           Will return all members if no criteria are provided.
+
+ Returns : the array of members
+ Args    : 
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/ClusterI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/ClusterI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/ClusterI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,168 @@
+# $Id: ClusterI.pm,v 1.6.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::ClusterI
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::ClusterI - Cluster Interface 
+
+=head1 SYNOPSIS
+
+    # see the implementations of this interface for details
+
+    my $cluster= $cluster->new(-description=>"POLYUBIQUITIN",
+                               -members    =>[$seq1,$seq2]);
+    my @members = $cluster->get_members();
+    my @sub_members = $cluster->get_members(-species=>"homo sapiens");
+
+
+=head1 DESCRIPTION
+
+This interface is the basic structure for a cluster of bioperl objects.
+In this case it is up to the implementer to check arguments
+and initialize whatever new object the implementing class is designed for.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::ClusterI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 new
+
+  We dont mandate but encourage implementors to support at least the
+  following named parameters upon object initialization.
+
+  Argument        Description
+  --------        -----------
+  -display_id     the display ID or name for the cluster
+  -description    the consensus description or name of the cluster
+  -members        the array of objects belonging to the family
+
+=cut
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : 
+ Function: Get the display name or identifier for the cluster
+ Returns : a string
+ Args    : 
+
+=cut
+
+sub display_id{
+    shift->throw_not_implemented();
+}
+
+
+=head2 description
+
+ Title   : description
+ Usage   : Bio::ClusterI->description("POLYUBIQUITIN")
+ Function: get/set for the consensus description of the cluster
+ Returns : the description string 
+ Args    : Optional the description string 
+
+=cut
+
+sub description{
+    shift->throw_not_implemented();
+}
+
+=head2 size
+
+ Title   : size
+ Usage   : Bio::ClusterI->size();
+ Function: get/set for the size of the family, 
+           calculated from the number of members
+ Returns : the size of the family 
+ Args    : 
+
+=cut
+
+sub size {
+    shift->throw_not_implemented();
+}
+
+=head2 cluster_score
+
+ Title   : cluster_score
+ Usage   : $cluster ->cluster_score(100);
+ Function: get/set for cluster_score which
+           represent the score in which the clustering
+           algorithm assigns to this cluster.
+ Returns : a number
+
+=cut
+
+sub cluster_score{
+    shift->throw_not_implemented();
+}
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : Bio::ClusterI->get_members(($seq1, $seq2));
+ Function: retrieve the members of the family by some criteria, for
+           example :
+           $cluster->get_members(-species => 'homo sapiens'); 
+
+           Will return all members if no criteria are provided.
+
+ Returns : the array of members
+ Args    : 
+
+=cut
+
+sub get_members {
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/dbsnp.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/dbsnp.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/dbsnp.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,366 @@
+# $Id: dbsnp.pm,v 1.16.4.1 2006/10/02 23:10:13 sendu Exp $
+# BioPerl module for Bio::ClusterIO::dbsnp
+#
+# Copyright Allen Day <allenday at ucla.edu>, Stan Nelson <snelson at ucla.edu>
+# Human Genetics, UCLA Medical School, University of California, Los Angeles
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::ClusterIO::dbsnp - dbSNP input stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::ClusterIO class.
+
+=head1 DESCRIPTION
+
+Parse dbSNP XML files, one refSNP entry at a time.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::ClusterIO::dbsnp;
+
+use strict;
+use Bio::Root::Root;
+use Bio::Variation::SNP;
+use XML::Parser::PerlSAX;
+use XML::Handler::Subs;
+use Data::Dumper;
+use IO::File;
+
+use vars qw($DTD $DEBUG %MODEMAP %MAPPING);
+$DTD = 'ftp://ftp.ncbi.nih.gov/snp/specs/NSE.dtd';
+use base qw(Bio::ClusterIO);
+
+BEGIN {
+  %MAPPING = (
+#the ones commented out i haven't written methods for yet... -Allen
+			  'Rs_rsId'               => 'id',
+#			  'Rs_taxId'                   => 'tax_id',
+#			  'Rs_organism'                => 'organism',
+			  'Rs_snpType'                => {'type' => 'value'},
+			  'Rs_sequence_observed'                => 'observed',
+			  'Rs_sequence_seq5'                 => 'seq_5',
+			  'Rs_sequence_seq3'                 => 'seq_3',
+#			  'Rs_sequence_exemplarSs'         => 'exemplar_subsnp',
+			  'Rs_create_build'           => 'ncbi_build',
+#??			  'Rs_update_build'           => 'ncbi_build',
+#			  'NSE-rs_ncbi-num-chr-hits'       => 'ncbi_chr_hits',
+#			  'NSE-rs_ncbi-num-ctg-hits'       => 'ncbi_ctg_hits',
+#			  'NSE-rs_ncbi-num-seq-loc'        => 'ncbi_seq_loc',
+#			  'NSE-rs_ncbi-mapweight'          => 'ncbi_mapweight',
+#			  'NSE-rs_ucsc-build-id'           => 'ucsc_build',
+#			  'NSE-rs_ucsc-num-chr-hits'       => 'ucsc_chr_hits',
+#			  'NSE-rs_ucsc-num-seq-loc'        => 'ucsc_ctg_hits',
+#			  'NSE-rs_ucsc-mapweight'          => 'ucsc_mapweight',
+
+			  'Rs_het_value'                     => 'heterozygous',
+			  'Rs_het-stdError'                  => 'heterozygous_SE',
+			  'Rs_validation'               => {'validated' => 'value'}, #??
+#			  'NSE-rs_genotype'                => {'genotype' => 'value'},
+
+			  'Ss_handle'                  => 'handle',
+			  'Ss_batchId'                => 'batch_id',
+			  'Ss_locSnpId'               => 'id',
+#			  'Ss_locSnpId'              => 'loc_id',
+#			  'Ss_orient'                  => {'orient' => 'value'},
+#			  'Ss_buildId'                => 'build',
+			  'Ss_methodClass'            => {'method' => 'value'},
+#			  'NSE-ss_accession_E'             => 'accession',
+#			  'NSE-ss_comment_E'               => 'comment',
+#			  'NSE-ss_genename'                => 'gene_name',
+#			  'NSE-ss_assay-5_E'               => 'seq_5',
+#			  'NSE-ss_assay-3_E'               => 'seq_3',
+#			  'NSE-ss_observed'                => 'observed',
+
+#			  'NSE-ss-popinfo_type'            => 'pop_type',
+#			  'NSE-ss-popinfo_batch-id'        => 'pop_batch_id',
+#			  'NSE-ss-popinfo_pop-name'        => 'pop_name',
+#			  'NSE-ss-popinfo_samplesize'      => 'pop_samplesize',
+#			  'NSE-ss_popinfo_est-het'         => 'pop_est_heterozygous',
+#			  'NSE-ss_popinfo_est-het-se-sq'   => 'pop_est_heterozygous_se_sq',
+
+#			  'NSE-ss-alleleinfo_type'         => 'allele_type',
+#			  'NSE-ss-alleleinfo_batch-id'     => 'allele_batch_id',
+#			  'NSE-ss-alleleinfo_pop-id'       => 'allele_pop_id',
+#			  'NSE-ss-alleleinfo_snp-allele'   => 'allele_snp',
+#			  'NSE-ss-alleleinfo_other-allele' => 'allele_other',
+#			  'NSE-ss-alleleinfo_freq'         => 'allele_freq',
+#			  'NSE-ss-alleleinfo_count'        => 'allele_count',
+
+#			  'NSE-rsContigHit_contig-id'      => 'contig_hit',
+#			  'NSE-rsContigHit_accession'      => 'accession_hit',
+#			  'NSE-rsContigHit_version'        => 'version',
+#			  'NSE-rsContigHit_chromosome'     => 'chromosome_hit',
+
+#			  'NSE-rsMaploc_asn-from'          => 'asn_from',
+#			  'NSE-rsMaploc_asn-to'            => 'asn_to',
+#			  'NSE-rsMaploc_loc-type'          => {'loc_type' => 'value'},
+#			  'NSE-rsMaploc_hit-quality'       => {'hit_quality' => 'value'},
+#			  'NSE-rsMaploc_orient'            => {'orient' => 'value'},
+#			  'NSE-rsMaploc_physmap-str'       => 'phys_from',
+#			  'NSE-rsMaploc_physmap-int'       => 'phys_to',
+
+			  'FxnSet_geneId'             => 'locus_id',  # does the code realise that there can be multiple of these
+			  'FxnSet_symbol'              => 'symbol',
+			  'FxnSet_mrnaAcc'            => 'mrna',
+			  'FxnSet_protAcc'            => 'protein',
+			  'FxnSet_fxnClass'    => {'functional_class' => 'value'},
+
+			  #...
+			  #...
+			  #there are lots more, but i don't need them at the moment... -Allen
+			  );
+}
+
+sub _initialize{
+   my ($self, at args) = @_;
+   $self->SUPER::_initialize(@args);
+   my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
+   defined $usetempfile && $self->use_tempfile($usetempfile);
+   $self->{'_xmlparser'} = new XML::Parser::PerlSAX();
+   $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
+}
+
+=head2 next_cluster
+
+ Title   : next_cluster
+ Usage   : $dbsnp = $stream->next_cluster()
+ Function: returns the next refSNP in the stream
+ Returns : Bio::Variation::SNP object representing composite refSNP
+           and its component subSNP(s).
+ Args    : NONE
+
+=cut
+
+###
+#Adapted from Jason's blastxml.pm
+###
+sub next_cluster {
+  my $self = shift;
+  my $data = '';
+  my($tfh);
+
+  if( $self->use_tempfile ) {
+	$tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");
+	$tfh->autoflush(1);
+  }
+
+  my $start = 1;
+  while( defined( $_ = $self->_readline ) ){
+	#skip to beginning of refSNP entry
+	if($_ !~ m!<Rs>! && $start){
+	  next;
+	} elsif($_ =~ m!<Rs>! && $start){
+	  $start = 0;
+	} 
+
+	#slurp up the data
+	if( defined $tfh ) {
+	  print $tfh $_;
+	} else {
+	  $data .= $_;
+	}
+
+	#and stop at the end of the refSNP entry
+	last if $_ =~ m!</Rs>!;
+  }
+
+  #if we didn't find a start tag
+  return if $start;
+
+  my %parser_args;
+  if( defined $tfh ) {
+	seek($tfh,0,0);
+	%parser_args = ('Source' => { 'ByteStream' => $tfh },
+					'Handler' => $self);
+  } else {
+	%parser_args = ('Source' => { 'String' => $data },
+					'Handler' => $self);
+  }
+
+  my $starttime;
+  my $result;
+
+  if(  $DEBUG ) {  $starttime = [ Time::HiRes::gettimeofday() ]; }
+
+  eval {
+	$result = $self->{'_xmlparser'}->parse(%parser_args);
+  };
+
+  if( $@ ) {
+	$self->warn("error in parsing a report:\n $@");
+	$result = undef;
+  }
+
+  if( $DEBUG ) {
+	$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
+  }
+
+  return $self->refsnp;
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $parser->start_document;
+ Function: SAX method to indicate starting to parse a new document.
+           Creates a Bio::Variation::SNP
+ Returns : none
+ Args    : none
+
+=cut
+
+sub start_document{
+  my ($self) = @_;
+  $self->{refsnp} = Bio::Variation::SNP->new;
+}
+
+sub refsnp {
+  return shift->{refsnp};
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $parser->end_document;
+ Function: SAX method to indicate finishing parsing a new document
+ Returns : none
+ Args    : none
+
+=cut
+
+sub end_document{
+  my ($self, at args) = @_;
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $parser->start_element($data)
+ Function: SAX method to indicate starting a new element
+ Returns : none
+ Args    : hash ref for data
+
+=cut
+
+sub start_element{
+  my ($self,$data) = @_;
+  my $nm = $data->{'Name'};
+  my $at = $data->{'Attributes'};
+
+  if($nm eq 'Ss'){
+	$self->refsnp->add_subsnp;
+	return;
+  }
+  if(my $type = $MAPPING{$nm}){
+	if(ref $type eq 'HASH'){
+	  #okay, this is nasty.  what can you do?
+	  $self->{will_handle}   = (keys %$type)[0];
+	  my $valkey             = (values %$type)[0];
+	  $self->{last_data}     = $at->{$valkey};
+	} else {
+	  $self->{will_handle} = $type;
+	  $self->{last_data} = undef;
+	}
+  } else {
+	undef $self->{will_handle};
+  }
+}
+
+=head2 end_element
+
+ Title   : end_element
+ Usage   : $parser->end_element($data)
+ Function: Signals finishing an element
+ Returns : none
+ Args    : hash ref for data
+
+=cut
+
+sub end_element {
+  my ($self,$data) = @_;
+  my $nm = $data->{'Name'};
+  my $at = $data->{'Attributes'};
+
+  my $method = $self->{will_handle};
+  if($method){
+	if($nm =~ /^Rs/ or $nm =~ /^NSE-SeqLoc/ or $nm =~ /^FxnSet/){
+	  $self->refsnp->$method($self->{last_data});
+	} elsif ($nm =~ /^Ss/){
+	  $self->refsnp->subsnp->$method($self->{last_data});
+	}
+  }
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $parser->characters($data)
+ Function: Signals new characters to be processed
+ Returns : characters read
+ Args    : hash ref with the key 'Data'
+
+=cut
+
+sub characters{
+  my ($self,$data) = @_;
+  $self->{last_data} = $data->{Data}
+    if $data->{Data} =~ /\S/; #whitespace is meaningless -ad
+}
+
+=head2 use_tempfile
+
+ Title   : use_tempfile
+ Usage   : $obj->use_tempfile($newval)
+ Function: Get/Set boolean flag on whether or not use a tempfile
+ Example : 
+ Returns : value of use_tempfile
+ Args    : newvalue (optional)
+
+=cut
+
+sub use_tempfile{
+  my ($self,$value) = @_;
+  if( defined $value) {
+	$self->{'_use_tempfile'} = $value;
+  }
+  return $self->{'_use_tempfile'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/unigene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/unigene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO/unigene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,264 @@
+# $Id: unigene.pm,v 1.29.4.1 2006/10/02 23:10:13 sendu Exp $
+# BioPerl module for Bio::ClusterIO::unigene
+#
+# Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://meg.otago.ac.nz
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# April 17, 2002 - Initial implementation by Andrew Macgregor
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::ClusterIO::unigene - UniGene input stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::ClusterIO class.
+
+=head1 DESCRIPTION
+
+This object reads from Unigene *.data files downloaded from
+ftp://ftp.ncbi.nih.gov/repository/UniGene/.  It does not download and
+decompress the file, you have to do that yourself.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Andrew Macgregor
+
+Email: andrew at cbbc.murdoch.edu.au
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::ClusterIO::unigene;
+use strict;
+
+use Bio::Cluster::UniGene;
+use Bio::Cluster::ClusterFactory;
+
+use base qw(Bio::ClusterIO);
+
+my %line_is = (
+		ID			=>	q/ID\s+(\w{2,3}\.\d+)/,
+		TITLE			=>	q/TITLE\s+(\S.*)/,
+		GENE			=>	q/GENE\s+(\S.*)/,
+		CYTOBAND		=>	q/CYTOBAND\s+(\S.*)/,
+		MGI			=>	q/MGI\s+(\S.*)/,
+		LOCUSLINK		=>	q/LOCUSLINK\s+(\S.*)/,
+		HOMOL		=>	q/HOMOL\s+(\S.*)/,
+		EXPRESS			=>	q/EXPRESS\s+(\S.*)/,
+		RESTR_EXPR		=>	q/RESTR_EXPR\s+(\S.*)/,
+		GNM_TERMINUS		=>	q/GNM_TERMINUS\s+(\S.*)/,
+		CHROMOSOME		=>	q/CHROMOSOME\s+(\S.*)/,
+		STS			=>	q/STS\s+(\S.*)/,
+		TXMAP			=>	q/TXMAP\s+(\S.*)/,
+		PROTSIM			=>	q/PROTSIM\s+(\S.*)/,
+		SCOUNT			=>	q/SCOUNT\s+(\S.*)/,
+		SEQUENCE		=>	q/SEQUENCE\s+(\S.*)/,
+		ACC			=>	q/ACC=(\w+)(\.\d+)?/,
+		NID			=>	q/NID=\s*(\S.*)/,
+		PID			=>	q/PID=\s*(\S.*)/,
+		CLONE			=>	q/CLONE=\s*(\S.*)/,
+		END			=>	q/END=\s*(\S.*)/,
+		LID			=>	q/LID=\s*(\S.*)/,
+		MGC			=>	q/MGC=\s*(\S.*)/,
+		SEQTYPE		=>	q/SEQTYPE=\s*(\S.*)/,
+		TRACE			=>	q/TRACE=\s*(\S.*)/,
+		PERIPHERAL		=>	q/PERIPHERAL=\s*(\S.*)/,
+		DELIMITER		=>	q{^//},
+);
+
+# we set the right factory here
+sub _initialize {
+	my($self, @args) = @_;
+
+	$self->SUPER::_initialize(@args);
+	if(! $self->cluster_factory()) {
+	$self->cluster_factory(Bio::Cluster::ClusterFactory->new(
+						-type => 'Bio::Cluster::UniGene'));
+	}
+}
+
+=head2 next_cluster
+
+ Title	 : next_cluster
+ Usage	 : $unigene = $stream->next_cluster()
+ Function: returns the next unigene in the stream
+ Returns : Bio::Cluster::UniGene object
+ Args	 : NONE
+
+=cut
+
+sub next_cluster {
+	my( $self) = @_;
+	local $/ = "\n//";
+	return unless my $entry = $self->_readline;
+	
+# set up the variables we'll need
+	my (%unigene, at express, at locuslink, at chromosome,
+		@sts, at txmap, at protsim, at sequence);
+	my $UGobj;
+	
+# set up the regexes
+
+# add whitespace parsing and precompile regexes
+#foreach (values %line_is) {
+#	$_ =~ s/\s+/\\s+/g;
+#	print STDERR "Regex is $_\n";
+#	#$_ = qr/$_/x;
+#}
+
+#$line_is{'TITLE'} = qq/TITLE\\s+(\\S.+)/;
+
+# run each line in an entry against the regexes
+	foreach my $line (split /\n/, $entry) {
+	  #print STDERR "Wanting to match $line\n";
+		if ($line =~ /$line_is{ID}/gcx) {
+			$unigene{ID} = $1;
+		}
+		elsif ($line =~ /$line_is{TITLE}/gcx ) {
+		  #print STDERR "MATCHED with [$1]\n";
+			$unigene{TITLE} = $1;
+		}
+		elsif ($line =~ /$line_is{GENE}/gcx) {
+			$unigene{GENE} = $1;
+		}
+		elsif ($line =~ /$line_is{CYTOBAND}/gcx) {
+			$unigene{CYTOBAND} = $1;
+		}
+		elsif ($line =~ /$line_is{MGI}/gcx) {
+			$unigene{MGI} = $1;
+		}
+		elsif ($line =~ /$line_is{LOCUSLINK}/gcx) {
+			@locuslink = split /;/, $1;
+		}
+		elsif ($line =~ /$line_is{HOMOL}/gcx) {
+			$unigene{HOMOL} = $1;
+		}
+		elsif ($line =~ /$line_is{EXPRESS}/gcx) {
+			my $express = $1;
+			# remove initial semicolon if present
+			$express =~ s/^;//; 
+			@express = split /\s*;/, $express;
+		}
+		elsif ($line =~ /$line_is{RESTR_EXPR}/gcx) {
+			$unigene{RESTR_EXPR} = $1;
+		}
+		elsif ($line =~ /$line_is{GNM_TERMINUS}/gcx) {
+			$unigene{GNM_TERMINUS} = $1;
+		}
+		elsif ($line =~ /$line_is{CHROMOSOME}/gcx) {
+			push @chromosome, $1;
+		}
+		elsif ($line =~ /$line_is{TXMAP}/gcx) {
+			push @txmap, $1;
+		}
+		elsif ($line =~ /$line_is{STS}/gcx) {
+			push @sts, $1;
+		}
+		elsif ($line =~ /$line_is{PROTSIM}/gcx) {
+			push @protsim, $1;
+		}
+		elsif ($line =~ /$line_is{SCOUNT}/gcx) {
+			$unigene{SCOUNT} = $1;
+		}
+		elsif ($line =~ /$line_is{SEQUENCE}/gcx) { 
+			# parse into each sequence line
+			my $seq = {};
+			# add unigene id to each seq
+			#$seq->{unigene_id} = $unigene{ID}; 
+			my @items = split(/;/, $1);
+			foreach (@items) {
+                            if (/$line_is{ACC}/gcx) {
+                                $seq->{acc} = $1;
+                                # remove leading dot if version pattern matched
+                                $seq->{version} = substr($2,1) if defined $2;
+                            }
+                            elsif (/$line_is{NID}/gcx) {
+                                $seq->{nid} = $1;
+                            }
+                            elsif (/$line_is{PID}/gcx) {
+                                $seq->{pid} = $1;
+                            }
+                            elsif (/$line_is{CLONE}/gcx) {
+                                $seq->{clone} = $1;
+                            }
+                            elsif (/$line_is{END}/gcx) {
+                                $seq->{end} = $1;
+                            }
+                            elsif (/$line_is{LID}/gcx) {
+                                $seq->{lid} = $1;
+                            }
+                            elsif (/$line_is{MGC}/gcx) {
+                                $seq->{mgc} = $1;
+                            }
+                            elsif (/$line_is{SEQTYPE}/gcx) {
+                                $seq->{seqtype} = $1;
+                            }
+                            elsif (/$line_is{TRACE}/gcx) {
+                                $seq->{trace} = $1;
+                            }
+                            elsif (/$line_is{PERIPHERAL}/gcx) {
+                                $seq->{peripheral} = $1;
+                            }
+			}
+			push @sequence, $seq;			
+		}
+		elsif ($line =~ /$line_is{DELIMITER}/gcx) {
+			# at the end of the record, add data to the object
+			$UGobj = $self->cluster_factory->create_object(
+				  -display_id  => $unigene{ID},
+				  -description => $unigene{TITLE},
+				  -size		   => $unigene{SCOUNT},
+				  -members	   => \@sequence);
+			$UGobj->gene($unigene{GENE}) if defined ($unigene{GENE});
+			$UGobj->cytoband($unigene{CYTOBAND}) if defined($unigene{CYTOBAND});
+			$UGobj->mgi($unigene{MGI}) if defined ($unigene{MGI});
+			$UGobj->locuslink(\@locuslink);
+			$UGobj->homol($unigene{HOMOL}) if defined ($unigene{HOMOL});
+			$UGobj->express(\@express);
+			$UGobj->restr_expr($unigene{RESTR_EXPR}) if defined ($unigene{RESTR_EXPR});
+			$UGobj->gnm_terminus($unigene{GNM_TERMINUS}) if defined ($unigene{GNM_TERMINUS});
+			$UGobj->chromosome(\@chromosome);
+			$UGobj->sts(\@sts);
+			$UGobj->txmap(\@txmap);
+			$UGobj->protsim(\@protsim);
+		}
+	}
+	return $UGobj;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/ClusterIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,315 @@
+# $Id: ClusterIO.pm,v 1.18.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::ClusterIO.pm
+#
+# Cared for by Andrew Macgregor <andrew at anatomy.otago.ac.nz>
+#
+# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
+# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
+# http://anatomy.otago.ac.nz/meg
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+#
+# May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
+# by Andrew Macgregor
+#
+# April 17, 2002 - Initial implementation by Andrew Macgregor
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::ClusterIO - Handler for Cluster Formats
+
+=head1 SYNOPSIS
+
+  #NB: This example is unigene specific
+
+  use Bio::ClusterIO;
+
+  $stream  = Bio::ClusterIO->new('-file' => "Hs.data", 
+                                 '-format' => "unigene");
+  # note: we quote -format to keep older perl's from complaining.
+
+  while ( my $in = $stream->next_cluster() ) {
+      print $in->unigene_id() . "\n";
+      while ( my $sequence = $in->next_seq() ) {
+          print $sequence->accession_number() . "\n";
+      }
+  }
+  # Parsing errors are printed to STDERR.
+
+=head1 DESCRIPTION
+
+The ClusterIO module works with the ClusterIO format module to read
+various cluster formats such as NCBI UniGene.
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::ClusterIO-E<gt>new()
+
+   $str = Bio::ClusterIO->new(-file => 'filename',
+                              -format=>$format);
+
+The new() class method constructs a new Bio::ClusterIO object.  The
+returned object can be used to retrieve or print cluster
+objects. new() accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading.
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   unigene		*.data	UniGene build files.
+   dbsnp		*.xml	dbSNP XML files
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename.  If this is unsuccessful,
+the main UniGene build format is assumed.
+
+The format name is case insensitive.  'UNIGENE', 'UniGene' and
+'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
+
+=back
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $cluster = $str-E<gt>next_cluster()
+
+Fetch the next cluster from the stream.
+
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These I've left in here because they were in the SeqIO
+module. Feedback appreciated. There they provide the tie interface.
+See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Andrew Macgregor
+
+Email andrew at anatomy.otago.ac.nz
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::ClusterIO;
+
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : Bio::ClusterIO->new(-file => $filename, -format => 'format')
+ Function: Returns a new cluster stream
+ Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
+ Args    : -file => $filename
+           -format => format
+
+=cut
+
+
+my $entry = 0;
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+    
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);	
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{-file} || $ARGV[0] );
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	return unless( $class->_load_format_module($format) );
+	return "Bio::ClusterIO::$format"->new(@args);
+    }
+}
+
+
+# _initialize is chained for all ClusterIO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+    # initialize the IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_cluster
+
+ Title   : next_cluster
+ Usage   : $cluster = $stream->next_cluster()
+ Function: Reads the next cluster object from the stream and returns it.
+ Returns : a L<Bio::ClusterI> compliant object
+ Args    : none
+
+
+=cut
+
+sub next_cluster {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
+}
+
+=head2 cluster_factory
+
+ Title   : cluster_factory
+ Usage   : $obj->cluster_factory($newval)
+ Function: Get/set the object factory to use for creating the cluster
+           objects.
+ Example : 
+ Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
+ Args    : on set, new value (a L<Bio::Factory::ObjectFactoryI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub cluster_factory{
+    my $self = shift;
+
+    return $self->{'cluster_factory'} = shift if @_;
+    return $self->{'cluster_factory'};
+}
+
+=head2 object_factory
+
+ Title   : object_factory
+ Usage   : $obj->object_factory($newval)
+ Function: This is an alias to cluster_factory with a more generic name.
+ Example : 
+ Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
+ Args    : on set, new value (a L<Bio::Factory::ObjectFactoryI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub object_factory{
+    return shift->cluster_factory(@_);
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL ClusterIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::ClusterIO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: could not load $format - for more details on supported formats please see the ClusterIO docs
+Exception $@
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function: guess format based on file suffix
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+ Notes   : formats that _filehandle() will guess include unigene and dbsnp
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'unigene'   if /\.(data)$/i;
+   return 'dbsnp'     if /\.(xml)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+# I need some direction on these!! The module works so I haven't fiddled with them!
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'seqio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'seqio'}->next_seq() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'seqio'}->next_seq();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'seqio'}->write_seq(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# $Id: IO.pm,v 1.11.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::CodonUsage::IO
+#
+# Cared for by Richard Adams (richard.adams at ed.ac.uk)
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::CodonUsage::IO - for reading and writing codon usage tables to file
+
+=head1 SYNOPSIS
+
+  use Bio::CodonUsage::IO;
+
+  ## read in a codon usage file
+  my $io = Bio::CodonUsage::IO->new(-file => "in");
+  my $cut = $io->next_data();
+
+  ## write it out again
+  my $out = Bio::CodonUsage::IO->new(-file => ">out");
+  $out->write_data($cut);
+
+=head1 DESCRIPTION
+
+This class provides standard IO methods for reading and writing text files
+of codon usage tables. These tables can initially be retrieved using
+Bio::DB::CUTG. At present only this format is supported for read/write. 
+
+Reading a CUTG will return a Bio::CodonUsage::Table object. 
+
+=head1 SEE ALSO
+
+L<Bio::Tools::CodonTable>, 
+L<Bio::WebAgent>,
+L<Bio::CodonUsage::Table>,
+L<Bio::CodonUsage::IO>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin
+
+package Bio::CodonUsage::IO;
+use Bio::CodonUsage::Table;
+
+use base qw(Bio::Root::IO);
+
+=head2  new
+
+ Title  : new
+ Usage  : my $io = Bio::CodonUsage::IO->new(-file => "CUTfile");
+ Purpose: To  read/write a Bio:CodonUsage::Table object  
+ Returns: A  Bio:CodonUsage::IO object
+ Args   : a file or file handle 
+
+=cut
+
+sub new  {
+	my ($class , @args) = @_;
+	my $self = $class->SUPER::new(@args);
+}
+
+
+=head2  next_data
+
+ Title  : next_data
+ Usage  : my $cut = $io->next_data();
+ Purpose: To  obtain a Bio:CodonUsage::Table object 
+ Returns: A  Bio:CodonUsage::Table object
+ Args   : none
+
+=cut
+
+sub next_data {
+	my $self = shift;
+	my $cut = $self->_parse;
+	return $cut;
+}
+
+=head2  write_data
+
+ Title  : write_data
+ Usage  : $io->write_data($cut);
+ Purpose: To  write a CUT to file
+ Returns: void
+ Args   : a Bio::CodonUsage::Table object reference 
+
+=cut
+
+
+sub write_data {
+	my ($self, $cut) = @_;
+	if (!$cut || ! $cut->isa(Bio::CodonUsage::Table)) {
+		$self->throw("must supply a Bio::CodonUsage::Table object for writing\n");
+			}
+	my $outstring = "Codon usage table\n\n";
+
+	my $sp_string = $cut->species . "[" . $cut->_gb_db . "]  " .
+					$cut->cds_count . "  CDS's\n\n";
+	$outstring .= $sp_string;
+	my $colhead = sprintf("%-9s%-9s%15s%12s%12s\n\n", "AmAcid",
+							 "Codon", "Number", "/1000", "Fraction");
+	$outstring .= $colhead;
+
+	### now write bulk of codon data  ##
+	my $ctable =  Bio::Tools::CodonTable->new;
+
+	for my $f (qw(G A T C)) {
+		for my $s (qw(G A T C)) {
+			for my $t (qw(G A T C)) {
+				$cod = $f . $s . $t;
+				my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
+				my $codstr = sprintf("%-9s%-9s%15.2f%12.2f%12.2f\n",		
+
+						$aa, $cod, my $tt = $cut->codon_count($cod)|| 0.00, 
+						my $ll =$cut->{'_table'}{$aa}{$cod}{'per1000'}|| 0.00,
+						my $ss = $cut->codon_rel_frequency($cod) || 0.00);
+				$outstring .= $codstr;
+			}
+		$outstring .= "\n";
+		}
+	}
+	$outstring .= "\n\n";
+
+	## now append GC data
+	$outstring .= "Coding GC ". $cut->get_coding_gc('all'). "%\n";
+	$outstring .= "1st letter GC ". $cut->get_coding_gc(1). "%\n";
+	$outstring .= "2nd letter GC ". $cut->get_coding_gc(2). "%\n";
+	$outstring .= "3rd letter GC ". $cut->get_coding_gc(3). "%\n";
+	$outstring .= "Genetic code " . $cut->genetic_code() ."\n\n\n";
+
+$self->_print ($outstring);
+$self->flush();
+
+}
+
+sub _parse {
+	my $self = shift;
+	my $cdtableobj = Bio::CodonUsage::Table->new();
+	while (my $line = $self->_readline() ) {
+		next if $line =~ /^$/ ;
+		$line =~ s/End/Ter/;
+		## now parse in species name, cds number
+
+		if ($line =~ /^(.+?)\s*\[(\w+)\].+?(\d+)/) {
+			$cdtableobj->species($1);
+			$cdtableobj->{'_gb_db'} = $2;
+			$cdtableobj->cds_count($3);
+			}
+
+		## now parse in bulk of codon usage table
+		elsif ( $line =~ /^(\w\w\w)\s+(\w+)\s+(\d+\.\d+)
+					\s+(\d+\.\d+)\s+(\d+\.\d+)/x){
+			if (defined ($1)) {
+				$cdtableobj->{'_table'}{$1}{$2} = {
+						'abs_count'=>$3,
+						 'per1000'=> $4,
+						 'rel_freq'=> $5,
+						};
+				}
+			}
+
+		## now parse in gc data  ####
+		if($line =~ /^Cod.+?(\d\d\.\d\d)/ ){
+				$cdtableobj->{'_coding_gc'}{'all'} = $1;
+					}
+		elsif ($line =~ /^1st.+?(\d\d\.\d\d)/){ 
+				$cdtableobj->{'_coding_gc'}{'1'} = $1;
+			}
+		elsif($line =~ /^2nd.+?(\d\d\.\d\d)/){ 
+				$cdtableobj->{'_coding_gc'}{'2'} = $1;
+				}
+		elsif ($line =~ /^3rd.+?(\d\d\.\d\d)/){ 
+				$cdtableobj->{'_coding_gc'}{'3'} = $1;
+				}
+
+		elsif	($line =~ /^Gen.+?(\d+)/){ 
+				$cdtableobj->{'_genetic_code'} = $1;
+			}
+	}
+		## check has been parsed ok ##
+		if (scalar keys %{$cdtableobj->{'_table'}} != 21) {
+			$cdtableobj->warn("probable parsing error - should be 21 entries for 20aa + stop codon");
+		}
+		return $cdtableobj;
+		
+}
+
+1;
+
+__END__
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/IO.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/Table.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/Table.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/Table.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,633 @@
+# $Id: Table.pm,v 1.14.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# BioPerl module for Bio::CodonUsage::Table
+#
+# Cared for by Richard Adams (richard.adams at ed.ac.uk)
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::CodonUsage::Table - for access to the Codon usage Database
+at http://www.kazusa.or.jp/codon.
+
+=head1 SYNOPSIS
+
+  use Bio::CodonUsage::Table;
+  use Bio::DB::CUTG;
+
+  ## get  a codon usage table from web database ##
+  my $cdtable = Bio::DB::CUTG->new(-sp => 'Mus musculus'
+                                   -gc => 1);
+
+  ## or from local file
+
+  my $io      = Bio::CodonUsage::IO->new(-file=>"file");
+  my $cdtable = $io->next_data();
+
+
+  ## or create your own from your own sequences 
+
+  ## get a Bio::PrimarySeq compliant object ##
+  # $codonstats is a ref to a hash of codon name /count key-value pairs.
+
+  my $codonstats = Bio::Tools::SeqStats->codon_count($my_1ary_Seq_objct);
+
+  ### the '-data' field must be specified ##
+  ### the '-species' and 'genetic_code' fields are optional
+  my $CUT = Bio::CodonUsage::Table->new(-data    => $codonstats,
+                                        -species => 'Hsapiens_kinase');
+
+  print "leu frequency is ", $cdtable->aa_frequency('LEU'), "\n";
+  print "freqof ATG is ", $cdtable->codon_rel_frequency('ttc'), "\n";
+  print "abs freq of ATG is ", $cdtable->codon_abs_frequency('ATG'), "\n";
+  print "number of ATG codons is ", $cdtable->codon_count('ATG'), "\n";
+  print "gc content at position 1 is ", $cdtable->get_coding_gc('1'), "\n";
+  print "total CDSs for Mus musculus  is ", $cdtable->cds_count(), "\n";
+
+=head1 DESCRIPTION
+
+
+This class provides methods for accessing codon usage table data.
+
+All of the methods at present are simple look-ups of the table or are
+derived from simple calculations from the table. Future methods could
+include measuring the codon usage of a sequence , for example, or
+provide methods for examining codon usage in alignments.
+
+=head1 SEE ALSO
+
+L<Bio::Tools::CodonTable>, 
+L<Bio::WebAgent>,
+L<Bio::CodonUsage::IO>,
+L<Bio::DB::CUTG>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::CodonUsage::Table;
+use strict;
+use vars qw(%STRICTAA @AA);
+use Bio::SeqUtils;
+use Bio::Tools::CodonTable;
+
+use base qw(Bio::Root::Root);
+
+BEGIN{
+ @AA = qw(A C D E F G H I K L M N P Q R S T V W Y *);
+ map {$STRICTAA{$_} = undef} @AA;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $cut = Bio::CodonUsage::Table->new(-data => $cut_hash_ref,
+                                                 -species => 'H.sapiens_kinase'
+                                                 -genetic_code =>1);
+ Returns : a reference to a new  Bio::CodonUsage::Table object
+ Args    : none or a reference to a hash of codon counts. This constructor is
+           designed to be compatible with the output of
+           Bio::Tools::SeqUtils::count_codons()
+           Species and genetic code parameters can be entered here or via the 
+           species() and genetic_code() methods separately.
+
+=cut
+
+sub new {
+	my ($class, @args) = @_;
+	my $self= $class->SUPER::new(@args);
+	if (@args) {
+		$self->_rearrange([qw(DATA)], @args);
+		shift @args; # get rid of key
+		my $arg = shift @args;
+		$self->throw("need a hash reference, not a [" . ref($arg). "] reference") if ref($arg) ne 'HASH';
+		### flags to detect argument type, can be either to start with  ##
+		my $is_codon_hash = 1;
+		my $is_Aa_hash = 1;
+		for my $k (keys %$arg) {
+			## convert to UC
+			$k =~ s/(\w+)/\U$1/;
+			if (!exists($STRICTAA{$k}) ){
+				$is_Aa_hash = 0;
+				}
+			elsif ($k =~ /[^ATCGatcg]/) {
+				$is_codon_hash = 0;
+				}
+		}
+		if (!$is_codon_hash && !$is_Aa_hash) {
+			$self->throw(" invalid key values in CUT hash - must be unique aa or nucleotide identifiers");
+			}
+		elsif ($is_Aa_hash) {
+			$self->_init_from_aa($arg);
+			}
+		elsif($is_codon_hash) {
+			$self->_init_from_cod($arg);
+			}
+		while (@args) {
+			my $key = shift @args;
+			$key =~ s/\-(\w+)/\L$1/;
+			
+			$self->$key(shift @args);
+			}
+	}
+		
+	return $self;
+}
+
+=head2 all_aa_frequencies
+
+ Title   : all_aa_frequencies
+ Usage   : my $freq = $cdtable->all_aa_frequencies();
+ Returns : a reference to a hash where each key is an amino acid
+           and each value is its frequency in all proteins in that
+           species.
+ Args    : none
+
+=cut
+
+sub all_aa_frequencies {
+	my $self = shift;
+	my %aa_freqs =();
+	for my $aa (keys %STRICTAA) {
+		my $freq = $self->aa_frequency($aa);
+		$aa_freqs{$aa} = $freq;
+		}
+	return \%aa_freqs;
+}
+
+=head2 codon_abs_frequency
+
+ Title   : codon_abs_frequency
+ Usage   : my $freq = $cdtable->codon_abs_frequency('CTG');
+ Purpose : To return the frequency of that codon as a percentage
+           of all codons in the organism. 
+ Returns : a percentage frequency
+ Args    : a non-ambiguous codon string
+
+=cut
+
+sub codon_abs_frequency {
+	my ($self, $a) = @_;
+	my $cod = uc $a;
+	if ($self->_check_codon($cod))  {
+		my $ctable =  Bio::Tools::CodonTable->new;
+		$ctable->id($self->genetic_code() );
+		my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
+
+		return $self->{'_table'}{$aa}{$cod}{'per1000'}/10 ;
+		}
+	else {return 0;}
+}
+
+=head2 codon_rel_frequency
+
+ Title   : codon_rel_frequency
+ Usage   : my $freq = $cdtable->codon_rel_frequency('CTG');
+ Purpose : To return the frequency of that codon as a percentage
+           of codons coding for the same amino acid. E.g., ATG and TGG
+           would return 100 as those codons are unique.
+ Returns : a percentage frequency
+ Args    : a non-ambiguous codon string
+
+=cut
+
+
+sub codon_rel_frequency {
+	my ($self, $a) = @_;
+	my $cod = uc $a;
+	if ($self->_check_codon($cod)) {
+		my $ctable =  Bio::Tools::CodonTable->new;
+		$ctable->id($self->genetic_code () );
+		my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
+		return $self->{'_table'}{$aa}{$cod}{'rel_freq'};
+	}
+	else {
+		return 0;
+		}
+}
+
+=head2 probable_codons
+
+ Title    : probable_codons
+ Usage    : my $prob_codons = $cd_table->probable_codons(10);
+ Purpose  : to obtain a list of codons for the amino acid above a given
+            threshold % relative frequency
+ Returns  : A reference to a hash where keys are 1 letter amino acid  codes
+            and values are references to arrays of codons whose frequency
+            is above the threshold.
+ Arguments: a minimum threshold frequency
+
+=cut
+
+sub probable_codons {
+	my ($self, $threshold) = @_;
+	if (!$threshold || $threshold < 0 || $threshold > 100) {
+		$self->throw(" I need a threshold percentage ");
+		}
+	my %return_hash;
+	for my $a(keys %STRICTAA) {
+		my @common_codons;
+		my $aa =$Bio::SeqUtils::THREECODE{$a};
+		for my $codon (keys %{ $self->{'_table'}{$aa}}) {
+			if ($self->{'_table'}{$aa}{$codon}{'rel_freq'} > $threshold/100){
+				push @common_codons, $codon;
+			}
+		}
+		$return_hash{$a} = \@common_codons;
+	}
+    ## check to make sure that all codons are populated ##
+	if (grep{scalar @{$return_hash{$_}} == 0} keys %return_hash) {
+		$self->warn("Threshold is too high, some amino acids do not" .
+					" have any codon above the threshold!");
+		}
+    return \%return_hash;
+}
+		
+
+
+=head2 codon_count
+
+ Title   : codon_count
+ Usage   : my $count = $cdtable->codon_count('CTG');
+ Purpose : To obtain the absolute number of the codons in the
+           organism. 
+ Returns : an integer
+ Args    : a non-ambiguous codon string
+
+=cut
+
+sub codon_count {
+	my $self = shift;
+	if (@_) {
+		my $a = shift;
+		my $cod = uc $a;
+		if ($self->_check_codon($cod)) {
+			my $ctable =  Bio::Tools::CodonTable->new;
+			$ctable->id($self->genetic_code());
+			my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)};
+			return $self->{'_table'}{$aa}{$cod}{'abs_count'};
+			}
+		else {return 0;}
+	}
+	else {
+		$self->warn(" need to give a codon sequence as a parameter ");
+		return 0;
+		}
+	
+}
+
+=head2 get_coding_gc
+
+ Title   : get_coding_gc
+ Usage   : my $count = $cdtable->get_coding_gc(1);
+ Purpose : To return the percentage GC composition for the organism at
+           codon positions 1,2 or 3, or an average for all coding sequence
+          ('all').
+ Returns : a number (%-age GC content) or 0 if these fields are undefined
+ Args    : 1,2,3 or 'all'.
+
+=cut
+
+sub get_coding_gc {
+	my $self  = shift;
+	if (! @_) {
+		$self->warn(" no parameters supplied must be  a codon position (1,2,3) or 'all'");
+		return 0;
+			}
+	else{
+		my $n = shift;
+		##return request if valid ##
+		if ( exists($self->{'_coding_gc'}{$n} ) ) {
+			return sprintf("%.2f", $self->{'_coding_gc'}{$n});
+			}
+		##else return 'all' value if exists
+		elsif (exists($self->{'_coding_gc'}{'all'} )) {
+			$self->warn("coding gc doesn't have value for [$n], returning gc content for all CDSs");
+			return sprintf("%.2f", $self->{'_coding_gc'}{'all'});
+			}
+		### else return 0, 
+		else {
+			$self->warn("coding gc values aren't defined, returning 0");
+			return 0;
+		}
+
+	}#end of outer else
+		
+}
+
+=head2 set_coding_gc
+
+ Title   : set_coding_gc
+ Usage   : my $count = $cdtable->set_coding_gc(-1=>55.78);
+ Purpose : To set the percentage GC composition for the organism at
+           codon positions 1,2 or 3, or an average for all coding sequence
+           ('all').  
+ Returns : void
+ Args    : a hash where the key must be 1,2,3 or 'all' and the value the %age GC
+           at that codon position..
+
+=cut
+
+sub set_coding_gc {
+	my ($self, $key, $value) = @_;
+	my @allowed = qw(1 2 3 all);
+	$key =~ s/\-//;
+	if (!grep {$key eq $_} @allowed ) {
+		$self->warn ("invalid key! - must be one of [ ". (join " ", @allowed) . "]");
+		return;
+		}
+	$self->{'_coding_gc'}{$key} = $value;
+	
+
+}
+
+=head2 species
+
+ Title     : species
+ Usage     : my $sp = $cut->species();
+ Purpose   : Get/setter for species name of codon table
+ Returns   : Void or species name string
+ Args      : None or species name string
+
+=cut
+
+sub species {
+	my $self = shift;
+	if (@_ ){
+		$self->{'_species'} = shift;
+		}
+	return $self->{'_species'} || "unknown";
+}
+
+=head2 genetic_code
+
+ Title     : genetic_code
+ Usage     : my $sp = $cut->genetic_code();
+ Purpose   : Get/setter for genetic_code name of codon table
+ Returns   : Void or genetic_code id, 1 by default
+ Args      : None or genetic_code id, 1 by default if invalid argument.
+
+=cut
+
+sub genetic_code {
+	my $self = shift;
+	if (@_ ){
+		my $val = shift;
+		if ($val < 0 || $val >16 || $val =~ /[^\d]/ 
+				|| $val ==7 || $val ==8) {
+			$self->warn ("invalid genetic code - must be 1-16 but not 7 or 8,setting to default [1]");
+			$self->{'_genetic_code'} = 1;
+			}
+		else {
+			$self->{'_genetic_code'} = shift;
+			}
+		}
+	return $self->{'_genetic_code'} || 1;
+}
+
+=head2 cds_count
+
+ Title   : cds_count
+ Usage   : my $count = $cdtable->cds_count();
+ Purpose : To retrieve the total number of CDSs used to generate the Codon Table
+           for that organism. 
+ Returns : an integer
+ Args    : none (if retrieving the value) or an integer( if setting ). 
+
+=cut
+
+sub cds_count {
+	my $self= shift;
+	if (@_) {
+		my $val = shift;
+		if ($val < 0) {
+			$self->warn("can't have negative count initializing to 1");
+			$self->{'_cds_count'} = 0.00;
+			}
+		else{
+			$self->{'_cds_count'} = $val;
+		}
+	}
+	$self->warn("cds_count value is undefined, returning 0") 
+		if !exists($self->{'_cds_count'});
+
+	return $self->{'_cds_count'} || 0.00;
+	}
+
+=head2 aa_frequency
+
+ Title   : aa_frequency
+ Usage   : my $freq = $cdtable->aa_frequency('Leu');
+ Purpose : To retrieve the frequency of an amino acid in the organism
+ Returns : a percentage
+ Args    : a 1 letter or 3 letter string representing the amino acid
+
+=cut
+
+	
+
+sub aa_frequency {
+	my ($self, $a) = @_;
+	## process args ##
+
+	## deal with cases ##
+	my $aa = lc $a;	
+	$aa =~ s/^(\w)/\U$1/;
+	if (!exists($STRICTAA{$aa}) && !exists($Bio::SeqUtils::ONECODE{$aa}) ) {
+		$self->warn("Invalid amino acid! must be a unique 1 letter or 3 letter identifier");
+		return;
+		}
+	#translate to 3 letter code for Ctable #
+	my $aa3 = $Bio::SeqUtils::THREECODE{$aa} || $aa;
+
+	## return % of all amino acids in organism ## 
+	my $freq = 0;
+	map {$freq += $self->{'_table'}{$aa3}{$_}{'per1000'} } keys %{$self->{'_table'}{$aa3}};
+	return sprintf("%.2f", $freq/10);
+}
+
+=head2 common_codon
+
+ Title   : common_codon
+ Usage   : my $freq = $cdtable->common_codon('Leu');
+ Purpose : To retrieve the frequency of the most common codon of that aa
+ Returns : a percentage
+ Args    : a 1 letter or 3 letter string representing the amino acid
+
+=cut
+
+sub common_codon{
+
+	my ($self, $a) = @_;
+	my $aa = lc $a;	
+	$aa =~ s/^(\w)/\U$1/;
+
+	if ($self->_check_aa($aa))	{
+		my $aa3 = $Bio::SeqUtils::THREECODE{$aa} ;
+		$aa3 ||= $aa;
+		my $max = 0;
+		for my $cod (keys %{$self->{'_table'}{$aa3}}) {
+			$max = ($self->{'_table'}{$aa3}{$cod}{'rel_freq'} > $max) ?
+					$self->{'_table'}{$aa3}{$cod}{'rel_freq'}:$max;
+			}
+		return $max;
+		}else {return 0;}
+}
+
+=head2 rare_codon
+
+ Title   : rare_codon
+ Usage   : my $freq = $cdtable->rare_codon('Leu');
+ Purpose : To retrieve the frequency of the least common codon of that aa
+ Returns : a percentage
+ Args    : a 1 letter or 3 letter string representing the amino acid
+
+=cut
+
+sub rare_codon {
+my ($self, $a) = @_;
+	my $aa = lc $a;	
+	$aa =~ s/^(\w)/\U$1/;
+	if ($self->_check_aa($aa))	{
+		my $aa3 = $Bio::SeqUtils::THREECODE{$aa};
+		$aa3 ||= $aa;
+		my $min = 1;
+		for my $cod (keys %{$self->{'_table'}{$aa3}}) {
+			$min = ($self->{'_table'}{$aa3}{$cod}{'rel_freq'} < $min) ?
+					$self->{'_table'}{$aa3}{$cod}{'rel_freq'}:$min;
+			}
+		return $min;
+		}else {return 0;}
+
+
+}
+
+
+## internal sub that checks a codon is correct format
+sub _check_aa {
+	my ($self, $aa ) = @_;
+	if (!exists($STRICTAA{$aa}) && !exists($Bio::SeqUtils::ONECODE{$aa}) ) {
+		$self->warn("Invalid amino acid! must be a unique 1 letter or 3 letter identifier");
+		return 0;
+		}else {return 1;}
+}
+
+	
+
+
+sub _check_codon {
+	my ($self, $cod) = @_;
+	if ($cod =~ /[^ATCG]/  || $cod !~ /\w\w\w/) {
+		$self->warn(" impossible codon - must be 3 letters and just containing ATCG");
+		return 0;
+	}
+	else {return 1;}
+}
+sub _init_from_cod {
+
+	## make hash based on aa and then send to _init_from_aa
+	my ($self, $ref) = @_;
+	my $ct = Bio::Tools::CodonTable->new();
+	my %aa_hash;
+	for my $codon(keys %$ref ) {
+		my $aa = $ct->translate($codon);
+		$aa_hash{$aa}{$codon} = $ref->{$codon};
+		}
+	$self->_init_from_aa(\%aa_hash);
+}
+
+
+sub _init_from_aa {
+	my ($self, $ref) = @_;
+		## abs counts  and count codons
+	my $total_codons = 0;
+	my %threeletter;
+	map{$threeletter{$Bio::SeqUtils::THREECODE{$_}} = $ref->{$_} } keys %$ref;
+	$ref = \%threeletter;
+	for my $aa (keys %$ref) {
+		for my $cod(keys %{$ref->{$aa}} ) {
+			$self->{'_table'}{$aa}{$cod}{'abs_count'}  = $ref->{$aa}{$cod};	
+			$total_codons += $ref->{$aa}{$cod};
+		}
+	}
+	
+	## now calculate abs codon frequencies
+	for my $aa (keys %$ref) {
+		for my $cod(keys %{$ref->{$aa}} ) {
+			$self->{'_table'}{$aa}{$cod}{'per1000'}  = 
+				sprintf("%.2f",$ref->{$aa}{$cod} /$total_codons * 1000) ;
+		}
+	}
+	## now calculate rel codon_frequencies
+	for my $aa (keys %$ref) {
+		my $aa_freq = 0;
+		map{$aa_freq += $ref->{$aa}{$_} }
+						keys %{$ref->{$aa}};
+		for my $cod(keys %{$ref->{$aa}} ) {
+			$self->{'_table'}{$aa}{$cod}{'rel_freq'}=
+				sprintf("%.2f",$ref->{$aa}{$cod}/ $aa_freq );
+		}
+		
+	}	
+
+	## now calculate gc fields
+	my %GC;
+	for my $aa (keys %$ref) {
+		for my $cod(keys %{$ref->{$aa}} ) {
+			for my $index (qw(1 2 3) ) {
+				if (substr ($cod, $index -1, 1) =~ /g|c/oi) {
+					$GC{$index} += $ref->{$aa}{$cod};
+				}			
+			}
+		}
+	}
+	my $tot = 0;
+	map{$tot += $GC{$_}} qw(1 2 3);
+	$self->set_coding_gc('all',  $tot/(3 *$total_codons) * 100);
+	map{$self->set_coding_gc($_,$GC{$_}/$total_codons * 100)} qw(1 2 3);
+	
+	##
+	return $self;
+}
+
+sub _gb_db {
+	my $self = shift;
+	return $self->{'_gd_db'} || "unknown";
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/CodonUsage/Table.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Chain.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Chain.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Chain.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,205 @@
+# $Id: Chain.pm,v 1.7.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::Chain
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Chain - Mapping locations through a chain of  coordinate mappers
+
+=head1 SYNOPSIS
+
+  # create Bio::Coordinate::Pairs, or any MapperIs, somehow
+  $pair1; $pair2;
+
+  # add them into a Chain
+  $collection = Bio::Coordinate::Chain->new;
+  $collection->add_mapper($pair1);
+  $collection->add_mapper($pair2);
+
+  # create a position and map it
+  $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
+  $match = $collection->map($pos);
+  if ($match) {
+      sprintf "Matches at %d-%d\n", $match->start, $match->end,
+  } else {
+      print "No match\n";
+  }
+
+=head1 DESCRIPTION
+
+This class assumes that you have built several mappers and want to
+link them together so that output from the previous mapper is the next
+mappers input. This way you can build arbitrarily complex mappers from
+simpler components.
+
+Note that Chain does not do any sanity checking on its mappers. You
+are solely responsible that input and output coordinate systems,
+direction of mapping and parameters internal to mappers make sense
+when chained together.
+
+To put it bluntly, the present class is just a glorified foreach loop
+over an array of mappers calling the map method.
+
+It would be neat to an internal function that would generate a new
+single step mapper from those included in the chain. It should speed
+things up considerably. Any volunteers?
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Ewan Birney, birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::Chain;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Root::Root;
+use Bio::Coordinate::Result;
+
+use base qw(Bio::Coordinate::Collection Bio::Coordinate::MapperI);
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map($pos);
+ Function: Map the location through all the mappers in the chain.
+ Example :
+ Returns : new Location in the output coordiante system
+ Args    : a Bio::Location::Simple object
+
+=cut
+
+sub map {
+    my ($self,$value) = @_;
+
+    $self->throw("Need to pass me a value.")
+	unless defined $value;
+    $self->throw("I need a Bio::Location, not [$value]")
+	unless $value->isa('Bio::LocationI');
+    $self->throw("No coordinate mappers!")
+	unless $self->each_mapper;
+
+    my $res = new Bio::Coordinate::Result;
+
+    foreach my $mapper ($self->each_mapper) {
+
+	my $res = $mapper->map($value);
+	return unless $res->each_match;
+	$value = $res->match;
+    }
+
+   return $value;
+}
+
+
+=head2 Inherited methods
+
+=cut
+
+=head2 add_mapper
+
+ Title   : add_mapper
+ Usage   : $obj->add_mapper($mapper)
+ Function: Pushes one Bio::Coodinate::MapperI into the list of mappers.
+           Sets _is_sorted() to false.
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : mapper object
+
+=cut
+
+=head2 mappers
+
+ Title   : mappers
+ Usage   : $obj->mappers();
+ Function: Returns or sets a list of mappers.
+ Example : 
+ Returns : array of mappers
+ Args    : array of mappers
+
+=cut
+
+=head2 each_mapper
+
+ Title   : each_mapper
+ Usage   : $obj->each_mapper();
+ Function: Returns a list of mappers.
+ Example : 
+ Returns : array of mappers
+ Args    : none
+
+=cut
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of mapping;input <-> output
+ Example :
+ Returns : 1
+ Args    : 
+
+=cut
+
+=head2 test
+
+ Title   : test
+ Usage   : $obj->test;
+ Function: test that both components of all pairs are of the same length.
+           Ran automatically.
+ Example :
+ Returns : boolean
+ Args    :
+
+=cut
+
+
+
+sub sort{
+   my ($self) = @_;
+   $self->warn("You do not really want to sort your chain, do you!\nDoing nothing.");
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Collection.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Collection.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Collection.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,416 @@
+# $Id: Collection.pm,v 1.24.4.1 2006/10/02 23:10:13 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::Collection
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets
+
+=head1 SYNOPSIS
+
+  # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow
+  $pair1; $pair2;
+
+  # add them into a Collection
+  $collection = Bio::Coordinate::Collection->new;
+  $collection->add_mapper($pair1);
+  $collection->add_mapper($pair2);
+
+  # create a position and map it
+  $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
+  $res = $collection->map($pos);
+  $res->match->start == 1;
+  $res->match->end == 5;
+
+  # if mapping is many to one (*>1) or many-to-many (*>*)
+  # you have to give seq_id not get unrelevant entries
+  $pos = Bio::Location::Simple->new
+      (-start => 5, -end => 9 -seq_id=>'clone1');
+
+=head1 DESCRIPTION
+
+Generic, context neutral mapper to provide coordinate transforms
+between two B<disjoint> coordinate systems. It brings into Bioperl the
+functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into
+current bioperl.
+
+This class is aimed for representing mapping between whole chromosomes
+and contigs, or between contigs and clones, or between sequencing
+reads and assembly. The submaps are automatically sorted, so they can
+be added in any order.
+
+To map coordinates to the other direction, you have to swap() the
+collection. Keeping track of the direction and ID restrictions
+are left to the calling code.
+
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Ewan Birney, birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::Collection;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Coordinate::Result;
+use Bio::Coordinate::Result::Gap;
+
+use base qw(Bio::Root::Root Bio::Coordinate::MapperI);
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    $self->{'_mappers'} = [];
+
+    my($in, $out, $strict, $mappers, $return_match) =
+	$self->_rearrange([qw(IN
+                              OUT
+                              STRICT
+                              MAPPERS
+                              RETURN_MATCH
+			     )],
+			 @args);
+
+    $in  && $self->in($in);
+    $out  && $self->out($out);
+    $mappers && $self->mappers($mappers);
+    $return_match && $self->return_match('return_match');
+    return $self; # success - we hope!
+}
+
+
+=head2 add_mapper
+
+ Title   : add_mapper
+ Usage   : $obj->add_mapper($mapper)
+ Function: Pushes one Bio::Coordinate::MapperI into the list of mappers.
+           Sets _is_sorted() to false.
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : mapper object
+
+=cut
+
+sub add_mapper {
+  my ($self,$value) = @_;
+
+  $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
+      unless defined $value && $value->isa('Bio::Coordinate::MapperI');
+  
+  # test pair range lengths
+  $self->warn("Coordinates in pair [". $value . ":" .
+	      $value->in->seq_id . "/". $value->out->seq_id .
+	      "] are not right.")
+      unless $value->test;
+
+  $self->_is_sorted(0);
+  push(@{$self->{'_mappers'}},$value);
+}
+
+=head2 mappers
+
+ Title   : mappers
+ Usage   : $obj->mappers();
+ Function: Returns or sets a list of mappers.
+ Example : 
+ Returns : array of mappers
+ Args    : array of mappers
+
+=cut
+
+sub mappers{
+   my ($self, at args) = @_;
+
+   if (@args) {
+
+       $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
+	   unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI');
+       push(@{$self->{'_mappers'}}, @args);
+   }
+
+   return @{$self->{'_mappers'}};
+}
+
+
+=head2 each_mapper
+
+ Title   : each_mapper
+ Usage   : $obj->each_mapper();
+ Function: Returns a list of mappers.
+ Example : 
+ Returns : list of mappers
+ Args    : none
+
+=cut
+
+sub each_mapper{
+   my ($self) = @_;
+   return @{$self->{'_mappers'}};
+}
+
+=head2 mapper_count
+
+ Title   : mapper_count
+ Usage   : my $count = $collection->mapper_count;
+ Function: Get the count of the number of mappers stored 
+           in this collection
+ Example :
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub mapper_count{
+   my $self = shift;
+   return scalar @{$self->{'_mappers'} || []};
+}
+
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of mapping;input <-> output
+ Example :
+ Returns : 1
+ Args    : 
+
+=cut
+
+sub swap {
+   my ($self) = @_;
+   use Data::Dumper;
+
+   $self->sort unless $self->_is_sorted;
+   map {$_->swap;} @{$self->{'_mappers'}};
+   ($self->{'_in_ids'}, $self->{'_out_ids'}) =
+       ($self->{'_out_ids'}, $self->{'_in_ids'});
+   1;
+}
+
+=head2 test
+
+ Title   : test
+ Usage   : $obj->test;
+ Function: test that both components of all pairs are of the same length.
+           Ran automatically.
+ Example :
+ Returns : boolean
+ Args    :
+
+=cut
+
+sub test {
+   my ($self) = @_;
+
+   my $res = 1;
+
+   foreach my $mapper ($self->each_mapper) {
+       unless( $mapper->test ) {
+	   $self->warn("Coordinates in pair [". $mapper . ":" .
+		       $mapper->in->seq_id . "/". $mapper->out->seq_id .
+		       "] are not right."); 
+	   $res = 0;
+       }
+   }
+   $res;
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map($pos);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+ Example :
+ Returns : new value in the output coordinate system
+ Args    : integer
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a value.")
+       unless defined $value;
+   $self->throw("I need a Bio::Location, not [$value]")
+       unless $value->isa('Bio::LocationI');
+   $self->throw("No coordinate mappers!")
+       unless $self->each_mapper;
+
+   $self->sort unless $self->_is_sorted;
+
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+
+       my $result = new Bio::Coordinate::Result;
+       foreach my $loc ( $value->sub_Location(1) ) {
+
+           my $res = $self->_map($loc);
+           map { $result->add_sub_Location($_) } $res->each_Location;
+
+       }
+       return $result;
+
+   } else {
+       return $self->_map($value);
+   }
+
+
+}
+
+
+=head2 _map
+
+ Title   : _map
+ Usage   : $newpos = $obj->_map($simpleloc);
+ Function: Internal method that does the actual mapping. Called multiple times
+           by map() if the location  to be mapped is a split location
+
+ Example :
+ Returns : new location in the output coordinate system or undef
+ Args    : Bio::Location::Simple
+
+=cut
+
+sub _map {
+   my ($self,$value) = @_;
+
+   my $result = Bio::Coordinate::Result->new(-is_remote=>1);
+
+IDMATCH: {
+
+       # bail out now we if are forcing the use of an ID 
+       # and it is not in this collection
+       last IDMATCH if defined $value->seq_id && 
+	   ! $self->{'_in_ids'}->{$value->seq_id};
+
+       foreach my $pair ($self->each_mapper) {
+
+	   # if we are limiting input to a certain ID
+	   next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id;
+
+	   # if we haven't even reached the start, move on
+	   next if $pair->in->end < $value->start;
+	   # if we have over run, break
+	   last if $pair->in->start > $value->end;
+
+	   my $subres = $pair->map($value);
+	   $result->add_result($subres);
+       }
+   }
+
+   $result->seq_id($result->match->seq_id) if $result->match;
+   unless ($result->each_Location) {
+       #build one gap;
+       my $gap = Bio::Location::Simple->new(-start => $value->start,
+					    -end => $value->end,
+					    -strand => $value->strand,
+					    -location_type => $value->location_type
+					   );
+       $gap->seq_id($value->seq_id) if defined $value->seq_id;
+       bless $gap, 'Bio::Coordinate::Result::Gap';
+       $result->seq_id($value->seq_id) if defined $value->seq_id;
+       $result->add_sub_Location($gap);
+   }
+   return $result;
+}
+
+
+=head2 sort
+
+ Title   : sort
+ Usage   : $obj->sort;
+ Function: Sort function so that all mappings are sorted by
+           input coordinate start
+ Example :
+ Returns : 1
+ Args    : 
+
+=cut
+
+sub sort{
+   my ($self) = @_;
+
+   @{$self->{'_mappers'}} = map { $_->[0] }
+                            sort { $a->[1] <=> $b->[1] }
+                            map { [ $_, $_->in->start] } 
+                            @{$self->{'_mappers'}};
+
+   #create hashes for sequence ids
+   $self->{'_in_ids'} = ();
+   $self->{'_out_ids'} = ();
+   foreach ($self->each_mapper) {
+       $self->{'_in_ids'}->{$_->in->seq_id} = 1;
+       $self->{'_out_ids'}->{$_->out->seq_id} = 1;
+   }
+
+   $self->_is_sorted(1);
+}
+
+=head2 _is_sorted
+
+ Title   : _is_sorted
+ Usage   : $newpos = $obj->_is_sorted;
+ Function: toggle for whether the (internal) coodinate mapper data are sorted
+ Example :
+ Returns : boolean
+ Args    : boolean
+
+=cut
+
+sub _is_sorted{
+   my ($self,$value) = @_;
+
+   $self->{'_is_sorted'} = 1 if defined $value && $value;
+   return $self->{'_is_sorted'};
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ExtrapolatingPair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ExtrapolatingPair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ExtrapolatingPair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,232 @@
+# $Id: ExtrapolatingPair.pm,v 1.16.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::ExtrapolatingPair
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::ExtrapolatingPair - Continuous match between two coordinate sets
+
+=head1 SYNOPSIS
+
+
+  use Bio::Location::Simple;
+  use Bio::Coordinate::ExtrapolatingPair;
+
+
+  $match1 = Bio::Location::Simple->new 
+    (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
+  $match2 = Bio::Location::Simple->new
+    (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
+
+  $pair = Bio::Coordinate::ExtrapolatingPair->
+    new(-in => $match1,
+    	-out => $match2,
+    	-strict => 1
+       );
+
+  $pos = Bio::Location::Simple->new 
+      (-start => 40, -end => 60, -strand=> 1 );
+  $res = $pair->map($pos);
+  $res->start eq 20;
+  $res->end eq 20;
+
+=head1 DESCRIPTION
+
+This class represents a one continuous match between two coordinate
+systems represented by Bio::Location::Simple objects. The relationship
+is directed and reversible. It implements methods to ensure internal
+consistency, and map continuous and split locations from one
+coordinate system to another.
+
+This class is an elaboration of Bio::Coordoinate::Pair. The map
+function returns only matches which is the mode needed most of
+tehtime. By default the matching regions between coordinate systems
+are boundless, so that you can say e.g. that gene starts from here in
+the chromosomal coordinate system and extends indefinetely in both
+directions. If you want to define the matching regions exactly, you
+can do that and set strict() to true.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::ExtrapolatingPair;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Root::Root;
+use Bio::LocationI;
+
+use base qw(Bio::Coordinate::Pair);
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($strict) =
+	$self->_rearrange([qw(STRICT
+			     )],
+			 @args);
+
+    $strict  && $self->strict($strict);
+    return $self;
+}
+
+
+=head2 strict
+
+ Title   : strict
+ Usage   : $obj->strict(1);
+ Function: Set and read the strictness of the coordinate system.
+ Example :
+ Returns : value of input system
+ Args    : boolean
+
+=cut
+
+sub strict {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->{'_strict'} = 1 if $value;
+   }
+   return $self->{'_strict'};
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map($loc);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+
+           In extrapolating coodinate system there is no location zero.
+           Locations are...
+ Example :
+ Returns : new location in the output coordinate system or undef
+ Args    : Bio::Location::Simple
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a value.")
+       unless defined $value;
+   $self->throw("I need a Bio::Location, not [$value]")
+       unless $value->isa('Bio::LocationI');
+   $self->throw("Input coordinate system not set")
+       unless $self->in;
+   $self->throw("Output coordinate system not set")
+       unless $self->out;
+
+   my $match;
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+
+       my $split = Bio::Coordinate::Result->new(-seq_id=>$self->out->seq_id);
+       foreach my $loc ( sort { $a->start <=> $b->start }
+                         $value->sub_Location ) {
+
+           $match = $self->_map($loc);
+           $split->add_sub_Location($match) if $match;
+
+       }
+       $split->each_Location ? (return $split) : return ;
+
+   } else {
+       return $self->_map($value);
+   }
+}
+
+
+=head2 _map
+
+ Title   : _map
+ Usage   : $newpos = $obj->_map($simpleloc);
+ Function: Internal method that does the actual mapping. Called
+           multiple times by map() if the location to be mapped is a
+           split location
+
+ Example :
+ Returns : new location in the output coordinate system or undef
+ Args    : Bio::Location::Simple
+
+=cut
+
+sub _map {
+   my ($self,$value) = @_;
+
+   my ($offset, $start, $end);
+
+   if ($self->strand == -1) {
+       $offset = $self->in->end + $self->out->start;
+       $start = $offset - $value->end;
+       $end = $offset - $value->start ;
+   } else { # undef, 0 or 1
+       $offset = $self->in->start - $self->out->start;
+       $start = $value->start - $offset;
+       $end = $value->end - $offset;
+   }
+
+   # strict prevents matches outside stated range
+   if ($self->strict) {
+       return if $start < 0 and $end < 0;
+       return if $start > $self->out->end;
+       $start = 1 if $start < 0;
+       $end = $self->out->end if $end > $self->out->end;
+   }
+
+   my $match = Bio::Location::Simple->
+       new(-start => $start,
+	   -end => $end,
+	   -strand => $self->strand,
+	   -seq_id => $self->out->seq_id,
+	   -location_type => $value->location_type
+	  );
+   $match->strand($match->strand * $value->strand) if $value->strand;
+   bless $match, 'Bio::Coordinate::Result::Match';
+
+   return $match;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/GeneMapper.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/GeneMapper.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/GeneMapper.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1339 @@
+# $Id: GeneMapper.pm,v 1.27.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::GeneMapper
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems
+
+=head1 SYNOPSIS
+
+  use Bio::Coordinate::GeneMapper;
+
+  # get a Bio::RangeI representing the start, end and strand of the CDS
+  # in chromosomal (or entry) coordinates
+  my $cds;
+
+  # get a Bio::Location::Split or an array of Bio::LocationI objects
+  # holding the start, end and strand of all the exons in chromosomal
+  # (or entry) coordinates
+  my $exons;
+
+  # create a gene mapper and set it to map from chromosomal to cds coordinates
+  my $gene = Bio::Coordinate::GeneMapper->new(-in   =>'chr',
+                                              -out  =>'cds',
+                                              -cds  =>$cds,
+                                              -exons=>$exons
+                                             );
+
+  # get a a Bio::Location or sequence feature in input (chr) coordinates
+  my $loc;
+
+  # map the location into output coordinates and get a new location object
+  $newloc = $gene->map($loc);
+
+
+=head1 DESCRIPTION
+
+Bio::Coordinate::GeneMapper is a module for simplifying the mappings
+of coodinate locations between various gene related locations in human
+genetics. It also adds a special human genetics twist to coordinate
+systems by making it possible to disable the use of zero
+(0). Locations before position one start from -1. See method
+L<nozero>.
+
+It understands by name the following coordinate systems and mapping
+between them:
+
+                          peptide (peptide length)
+                             ^
+                             | -peptide_offset
+                             |
+                    frame  propeptide (propeptide length)
+                        ^    ^
+                         \   |
+             translate    \  |
+                           \ |
+                            cds  (transcript start and end)
+                             ^
+      negative_intron        | \
+              ^              |  \  transcribe
+               \             |   \
+              intron        exon  \
+               ^   ^         ^     /
+      splice    \   \      / |    /
+                 \   \    /  |   /
+                  \   inex   |  /
+                   \    ^    | /
+                    \    \   |/
+                     ----- gene (gene_length)
+                             ^
+                             | - gene_offset
+                             |
+                            chr (or entry)
+
+
+This structure is kept in the global variable $DAG which is a
+representation of a Directed Acyclic Graph. The path calculations
+traversing this graph are done in a helper class. See
+L<Bio::Coordinate::Graph>.
+
+Of these, two operations are special cases, translate and splice.
+Translating and reverse translating are implemented as internal
+methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs
+additional information that is provided by method L<exons> which takes
+in an array of Bio::LocationI objects.
+
+Most of the coordinate system names should be selfexplanatory to
+anyone familiar with genes. Negative intron coordinate system is
+starts counting backwards from -1 as the last nucleotide in the
+intron. This used when only exon and a few flanking intron nucleotides
+are known.
+
+
+This class models coordinates within one transcript of a gene, so to
+tackle multiple transcripts you need several instances of the
+class. It is therefore valid to argue that the name of the class
+should be TranscriptMapper. GeneMapper is a catchier name, so it
+stuck.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::GeneMapper;
+use vars qw(%COORDINATE_SYSTEMS %COORDINATE_INTS $TRANSLATION $DAG
+            $NOZERO_VALUES $NOZERO_KEYS);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Coordinate::Result;
+use Bio::Location::Simple;
+use Bio::Coordinate::Graph;
+use Bio::Coordinate::Collection;
+use Bio::Coordinate::Pair;
+use Bio::Coordinate::ExtrapolatingPair;
+
+use base qw(Bio::Root::Root Bio::Coordinate::MapperI);
+
+# first set internal values for all translation tables
+
+%COORDINATE_SYSTEMS = (
+		       peptide          => 10,
+		       propeptide       => 9,
+		       frame            => 8,
+		       cds              => 7,
+		       negative_intron  => 6,
+		       intron           => 5,
+		       exon             => 4,
+		       inex             => 3,
+		       gene             => 2,
+		       chr              => 1
+		      );
+
+%COORDINATE_INTS = (
+		    10 => 'peptide',
+		    9 => 'propeptide',
+		    8 => 'frame',
+		    7 => 'cds',
+		    6 => 'negative_intron',
+		    5 => 'intron',
+		    4 => 'exon',
+		    3 => 'inex',
+		    2 => 'gene',
+		    1 => 'chr'
+		   );
+
+$TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-".
+    $COORDINATE_SYSTEMS{'propeptide'};
+
+$DAG = {
+	10 => [],
+	9  => [10],
+	8  => [],
+	7  => [8, 9],
+	6  => [],
+	5  => [6],
+	4  => [7],
+	3  => [4, 5],
+	2  => [3, 4, 5, 7],
+	1  => [2]
+       };
+
+$NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 };
+$NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' };
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    # prime the graph
+    my $graph = new Bio::Coordinate::Graph;
+    $graph->hash_of_arrays($DAG);
+    $self->graph($graph);
+
+    my($in, $out, $peptide_offset, $exons,
+       $cds, $nozero, $strict) =
+	$self->_rearrange([qw(IN
+                              OUT
+                              PEPTIDE_OFFSET
+                              EXONS
+                              CDS
+                              NOZERO
+                              STRICT
+			     )],
+			 @args);
+
+    # direction of mapping when going chr to protein
+    $self->{_direction} = 1;
+
+    $in  && $self->in($in);
+    $out  && $self->out($out);
+    $cds && $self->cds($cds);
+    $exons  && ref($exons) =~ /ARRAY/i && $self->exons(@$exons);
+    $peptide_offset && $self->peptide_offset($peptide_offset);
+    $nozero && $self->nozero($nozero);
+    $strict && $self->strict($strict);
+
+    return $self; # success - we hope!
+}
+
+=head2 in
+
+ Title   : in
+ Usage   : $obj->in('peptide');
+ Function: Set and read the input coordinate system.
+ Example :
+ Returns : value of input system
+ Args    : new value (optional)
+
+=cut
+
+sub in {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid input coordinate system name [$value]\n".
+		    "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
+	   unless defined $COORDINATE_SYSTEMS{$value};
+
+       $self->{'_in'} = $COORDINATE_SYSTEMS{$value};
+   }
+   return $COORDINATE_INTS{ $self->{'_in'} };
+}
+
+
+=head2 out
+
+ Title   : out
+ Usage   : $obj->out('peptide');
+ Function: Set and read the output coordinate system.
+ Example :
+ Returns : value of output system
+ Args    : new value (optional)
+
+=cut
+
+sub out {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid input coordinate system name [$value]\n".
+		    "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
+	   unless defined $COORDINATE_SYSTEMS{$value};
+
+       $self->{'_out'} = $COORDINATE_SYSTEMS{$value};
+   }
+   return $COORDINATE_INTS{ $self->{'_out'} };
+}
+
+=head2 strict
+
+ Title   : strict
+ Usage   : $obj->strict('peptide');
+ Function: Set and read whether strict boundaried of coordinate
+           systems are enforced.
+           When strict is on, the end of the coordinate range must be defined.
+ Example :
+ Returns : boolean
+ Args    : boolean (optional)
+
+=cut
+
+sub strict {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 );
+       ## update in each mapper !!
+   }
+   return $self->{'_strict'} || 0 ;
+}
+
+
+=head2 nozero
+
+ Title   : nozero
+ Usage   : $obj->nozero(1);
+ Function: Flag to disable the use of zero in the input,
+           output or both coordinate systems. Use of coordinate
+           systems without zero is a peculiarity  common in
+           human genetics community.
+ Example :
+ Returns : 0 (default), or 'in', 'out', 'in&out'
+ Args    : 0 (default), or 'in', 'out', 'in&out'
+
+=cut
+
+sub nozero {
+   my ($self,$value) = @_;
+
+   if (defined $value) {
+       $self->throw("Not a valid value for nozero [$value]\n".
+		    "Valid values are ". join(", ", keys %{$NOZERO_VALUES} ))
+	   unless defined $NOZERO_VALUES->{$value};
+       $self->{'_nozero'} = $NOZERO_VALUES->{$value};
+   }
+
+   my $res = $self->{'_nozero'} || 0;
+   return $NOZERO_KEYS->{$res};
+}
+
+=head2 graph
+
+ Title   : graph
+ Usage   : $obj->graph($new_graph);
+ Function: Set and read the graph object representing relationships
+           between coordinate systems
+ Example :
+ Returns : Bio::Coordinate::Graph object
+ Args    : new Bio::Coordinate::Graph object (optional)
+
+=cut
+
+sub graph {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid graph [$value]\n")
+	   unless $value->isa('Bio::Coordinate::Graph');
+       $self->{'_graph'} = $value;
+   }
+   return $self->{'_graph'};
+}
+
+=head2 peptide
+
+ Title   : peptide
+ Usage   : $obj->peptide_offset($peptide_coord);
+ Function: Read and write the offset of peptide from the start of propeptide
+           and peptide length
+ Returns : a Bio::Location::Simple object
+ Args    : a Bio::LocationI object
+
+=cut
+
+sub peptide {
+   my ($self, $value) = @_;
+   if( defined $value) {
+       $self->throw("I need a Bio::LocationI, not  [". $value. "]")
+	   unless $value->isa('Bio::LocationI');
+
+       $self->throw("Peptide start not defined")
+	   unless defined $value->start;
+       $self->{'_peptide_offset'} = $value->start - 1;
+
+       $self->throw("Peptide end not defined")
+	   unless defined $value->end;
+       $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'};
+
+
+       my $a = $self->_create_pair
+	   ('propeptide', 'peptide', $self->strict,
+	    $self->{'_peptide_offset'}, $self->{'_peptide_length'} );
+       my $mapper =  $COORDINATE_SYSTEMS{'propeptide'}. "-".  $COORDINATE_SYSTEMS{'peptide'};
+       $self->{'_mappers'}->{$mapper} = $a;
+   }
+   return  Bio::Location::Simple->new
+       (-seq_id => 'propeptide',
+	-start => $self->{'_peptide_offset'} + 1 ,
+	-end => $self->{'_peptide_length'} + $self->{'_peptide_offset'},
+	-strand => 1,
+	-verbose => $self->verbose,
+       );
+}
+
+=head2 peptide_offset
+
+ Title   : peptide_offset
+ Usage   : $obj->peptide_offset(20);
+ Function: Set and read the offset of peptide from the start of propeptide
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+sub peptide_offset {
+   my ($self,$offset, $len) = @_;
+   if( defined $offset) {
+       $self->throw("I need an integer, not [$offset]")
+	   unless $offset =~ /^[+-]?\d+$/;
+       $self->{'_peptide_offset'} = $offset;
+
+       if (defined $len) {
+	   $self->throw("I need an integer, not [$len]")
+	       unless $len =~ /^[+-]?\d+$/;
+	   $self->{'_peptide_length'} = $len;
+       }
+
+       my $a = $self->_create_pair
+	   ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} );
+       my $mapper =  $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
+       $self->{'_mappers'}->{$mapper} = $a;
+   }
+   return $self->{'_peptide_offset'} || 0;
+}
+
+=head2 peptide_length
+
+ Title   : peptide_length
+ Usage   : $obj->peptide_length(20);
+ Function: Set and read the offset of peptide from the start of propeptide
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+
+sub peptide_length {
+   my ($self, $len) = @_;
+   if( defined $len) {
+       $self->throw("I need an integer, not [$len]")
+	   if defined $len && $len !~ /^[+-]?\d+$/;
+       $self->{'_peptide_length'} = $len;
+   }
+   return $self->{'_peptide_length'};
+}
+
+
+=head2 exons
+
+ Title   : exons
+ Usage   : $obj->exons(@exons);
+ Function: Set and read the offset of CDS from the start of transcipt
+           You do not have to sort the exons before calling this method as
+           they will be sorted automatically.
+           If you have not defined the CDS, is will be set to span all
+           exons here.
+ Returns : array of Bio::LocationI exons in genome coordinates or 0
+ Args    : array of Bio::LocationI exons in genome (or entry) coordinates
+
+=cut
+
+sub exons {
+   my ($self, at value) = @_;
+   my $cds_mapper =  $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'};
+   my $inex_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'};
+   my $exon_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'};
+   my $intron_mapper =
+       $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'};
+   my $negative_intron_mapper =
+       $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'};
+   my $exon_cds_mapper =  $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'};
+
+   if(@value) {
+       if (ref($value[0]) &&
+	   $value[0]->isa('Bio::SeqFeatureI') and
+	   $value[0]->location->isa('Bio::Location::SplitLocationI')) {
+	   @value = $value[0]->location->each_Location;
+       } else {
+	   $self->throw("I need an array , not [@value]")
+	       unless ref \@value eq 'ARRAY';
+	   $self->throw("I need a reference to an array of Bio::LocationIs, not to [".
+			$value[0]. "]")
+	       unless ref $value[0] and $value[0]->isa('Bio::LocationI');
+       }
+
+       #
+       # sort the input array
+       #
+       # and if the used has not defined CDS assume it is the complete exonic range
+       if (defined $value[0]->strand && 
+	   $value[0]->strand == - 1) {  #reverse strand
+	   @value = map { $_->[0] }
+	            sort { $b->[1] <=> $a->[1] }
+	            map { [ $_, $_->start] }
+                    @value;
+
+           unless ($self->cds) {
+               $self->cds(new Bio::Location::Simple
+			  (-start   => $value[-1]->start,
+			   -end     => $value[0]->end,
+			   -strand  => $value[0]->strand,
+			   -seq_id  => $value[0]->seq_id,
+			   -verbose => $self->verbose,
+			   )
+			  );
+           }
+       } else {               # undef or forward strand
+	   @value = map { $_->[0] }
+	            sort { $a->[1] <=> $b->[1] }
+                    map { [ $_, $_->start] }
+                    @value;
+           unless ($self->cds) {
+               $self->cds(new Bio::Location::Simple
+			  (-start   => $value[0]->start,
+			   -end     => $value[-1]->end,
+			   -strand  => $value[0]->strand,
+			   -seq_id  => $value[0]->seq_id,
+			   -verbose => $self->verbose,
+			   )
+                         );
+           }
+
+       }
+
+       $self->{'_chr_exons'} = \@value;
+
+       # transform exons from chromosome to gene coordinates
+       # but only if gene coordinate system has been set
+       my @exons ;
+       #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
+       my $gene_mapper = "1-2";
+       if (defined $self->{'_mappers'}->{$gene_mapper} ) {
+
+	   my $tmp_in = $self->{'_in'};
+	   my $tmp_out = $self->{'_out'};
+	   my $tmp_verb = $self->verbose;
+	   $self->verbose(0);
+
+	   $self->in('chr');
+	   $self->out('gene');
+	   @exons = map {$self->map($_) } @value;
+
+	   $self->{'_in'} = ($tmp_in);
+	   $self->{'_out'} = ($tmp_out);
+	   $self->verbose($tmp_verb);
+       } else {
+	   @exons = @value;
+       }
+
+       my $cds_map = Bio::Coordinate::Collection->new;
+       my $inex_map = Bio::Coordinate::Collection->new;
+       my $exon_map = Bio::Coordinate::Collection->new;
+       my $exon_cds_map = Bio::Coordinate::Collection->new;
+       my $intron_map = Bio::Coordinate::Collection->new;
+       my $negative_intron_map = Bio::Coordinate::Collection->new;
+
+       my $tr_end = 0;
+       my $coffset;
+       my $exon_counter;
+       my $prev_exon_end;
+
+       for my $exon ( @exons ) {
+	   $exon_counter++;
+
+	   #
+	   # gene -> cds
+	   #
+
+	   my $match1 = Bio::Location::Simple->new
+	       (-seq_id =>'gene' ,
+		-start  => $exon->start,
+		-end    => $exon->end, 
+		-strand => 1,
+		-verbose=> $self->verbose);
+
+	   my $match2 = Bio::Location::Simple->new
+	       (-seq_id => 'cds',
+		-start => $tr_end + 1,
+		-end => $tr_end + $exon->end - $exon->start +1,
+		-strand=>$exon->strand,
+		-verbose=>$self->verbose);
+
+	   $cds_map->add_mapper(Bio::Coordinate::Pair->new
+                                (-in => $match1,
+                                 -out => $match2,
+                                )
+                               );
+
+	   if ($exon->start <= 1 and $exon->end >= 1) {
+	       $coffset = $tr_end - $exon->start + 1;
+	   }
+	   $tr_end = $tr_end  + $exon->end - $exon->start + 1;
+
+	   #
+	   # gene -> intron
+	   #
+
+	   if (defined $prev_exon_end) {
+	       my $match3 = Bio::Location::Simple->new
+		   (-seq_id  => 'gene',
+		    -start   => $prev_exon_end + 1,
+		    -end     => $exon->start -1, 
+		    -strand  => $exon->strand,
+		    -verbose => $self->verbose);
+
+	       my $match4 = Bio::Location::Simple->new
+		   (-seq_id  => 'intron'. ($exon_counter -1),
+		    -start   => 1,
+		    -end     => $exon->start - 1 - $prev_exon_end,
+		    -strand  =>$exon->strand,
+		    -verbose => $self->verbose,);
+
+	       # negative intron coordinates
+	       my $match5 = Bio::Location::Simple->new
+		   (-seq_id  => 'intron'. ($exon_counter -1),
+		    -start   => -1 * ($exon->start - 2 - $prev_exon_end) -1,
+		    -end     => -1,
+		    -strand  => $exon->strand,
+		    -verbose => $self->verbose);
+
+	       $inex_map->add_mapper(Bio::Coordinate::Pair->new
+                                     (-in => $match3,
+                                      -out => $match4
+                                     )
+                                    );
+	       $intron_map->add_mapper(Bio::Coordinate::Pair->new
+                                       (-in => $self->_clone_loc($match3),
+                                        -out => $self->_clone_loc($match4)
+                                       )
+                                      );
+	       $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new
+                                                (-in => $self->_clone_loc($match4),
+                                                 -out => $match5
+                                                ));
+
+	   }
+
+	   # store the value
+	   $prev_exon_end = $exon->end;
+
+	   #
+	   # gene -> exon
+	   #
+	   my $match6 = Bio::Location::Simple->new
+	       (-seq_id => 'exon'. $exon_counter,
+		-start  => 1,
+		-end    => $exon->end - $exon->start +1,
+		-strand => $exon->strand,
+		-verbose=> $self->verbose,);
+
+	   my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1),
+						  -out => $match6
+						 );
+	   my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6),
+						  -out => $self->_clone_loc($match2)
+						 );
+	   $inex_map->add_mapper(Bio::Coordinate::Pair->new
+                                 (-in => $self->_clone_loc($match1),
+                                  -out => $match6
+                                 )
+                                );
+	   $exon_map->add_mapper(Bio::Coordinate::Pair->new
+                                 (-in => $self->_clone_loc($match1),
+                                  -out => $self->_clone_loc($match6)
+                                 )
+                                );
+           $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new
+                                     (-in => $self->_clone_loc($match6),
+                                      -out => $self->_clone_loc($match2)
+                                     )
+                                    );
+
+       }
+
+       # move coordinate start if exons have negative values
+       if ($coffset) {
+	   foreach my $m ($cds_map->each_mapper) {
+	       $m->out->start($m->out->start - $coffset);
+	       $m->out->end($m->out->end - $coffset);
+	   }
+
+       }
+
+       $self->{'_mappers'}->{$cds_mapper} = $cds_map;
+       $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map;
+       $self->{'_mappers'}->{$inex_mapper} = $inex_map;
+       $self->{'_mappers'}->{$exon_mapper} = $exon_map;
+       $self->{'_mappers'}->{$intron_mapper} = $intron_map;
+       $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map;
+   }
+   return  @{$self->{'_chr_exons'}}  || 0;
+}
+
+=head2 _clone_loc
+
+ Title   : _clone_loc
+ Usage   : $copy_of_loc = $obj->_clone_loc($loc);
+ Function: Make a deep copy of a simple location
+ Returns : a Bio::Location::Simple object
+ Args    : a Bio::Location::Simple object to be cloned
+
+=cut
+
+
+sub _clone_loc { # clone a simple location
+   my ($self,$loc) = @_;
+
+   $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]")
+       unless $loc->isa('Bio::Location::Simple');
+
+   return  Bio::Location::Simple->new
+       (-verbose       => $self->verbose,
+	-seq_id        => $loc->seq_id,
+        -start         => $loc->start,
+        -end           => $loc->end,
+        -strand        => $loc->strand,
+        -location_type => $loc->location_type
+       );
+}
+
+
+=head2 cds
+
+ Title   : cds
+ Usage   : $obj->cds(20);
+ Function: Set and read the offset of CDS from the start of transcipt
+
+           Simple input can be an integer which gives the start of the
+           coding region in genomic coordinate. If you want to provide
+           the end of the coding region or indicate the use of the
+           opposite strand, you have to pass a Bio::RangeI
+           (e.g. Bio::Location::Simple or Bio::SegFeature::Generic)
+           object to this method.
+
+ Returns : set value or 0
+ Args    : new value (optional)
+
+=cut
+
+sub cds {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ($value =~ /^[+-]?\d+$/ ) {
+	   my $loc = Bio::Location::Simple->new(-start=>$value,
+						-verbose=>$self->verbose);
+	   $self->{'_cds'} = $loc;
+       }
+       elsif (ref $value &&  $value->isa('Bio::RangeI') ) {
+	   $self->{'_cds'} = $value;
+       } else {
+	   $self->throw("I need an integer or Bio::RangeI, not [$value]")
+       }
+       # strand !!
+       my $len;
+
+       $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1
+	   if defined $self->{'_cds'}->end;
+
+       my $a = $self->_create_pair
+	   ('chr', 'gene', 0,
+	    $self->{'_cds'}->start-1,
+	    $len,
+	    $self->{'_cds'}->strand);
+       my $mapper =  $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
+       $self->{'_mappers'}->{$mapper} = $a;
+
+       # recalculate exon-based mappers
+       if ( defined $self->{'_chr_exons'} ) {
+	   $self->exons(@{$self->{'_chr_exons'}});
+       }
+
+   }
+   return $self->{'_cds'} || 0;
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map(5);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+ Example :
+ Returns : new value in the output coordiante system
+ Args    : a Bio::Location::Simple
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+   my ($res);
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::Simple or Bio::SeqFeatureI, not [".
+		ref($value). "]")
+       unless ref($value) && ($value->isa('Bio::Location::Simple') or
+                              $value->isa('Bio::Location::SplitLocationI') or
+			      $value->isa('Bio::SeqFeatureI'));
+   $self->throw("Input coordinate system not set")
+       unless $self->{'_in'};
+   $self->throw("Output coordinate system not set")
+       unless $self->{'_out'};
+   $self->throw("Do not be silly. Input and output coordinate ".
+		"systems are the same!")
+       unless $self->{'_in'} != $self->{'_out'};
+
+   $self->_check_direction();
+
+   $value = $value->location if $value->isa('Bio::SeqFeatureI');
+   $self->debug( "=== Start location: ". $value->start. ",".
+		 $value->end. " (". ($value->strand || ''). ")\n");
+	       
+   # if nozero coordinate system is used in the input values
+   if ( defined $self->{'_nozero'} &&
+	( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) {
+       $value->start($value->start + 1)
+	   if defined $value->start && $value->start < 1;
+       $value->end($value->end + 1)
+	   if defined $value->end && $value->end < 1;
+   }
+
+   my @steps = $self->_get_path();
+   $self->debug( "mapping ". $self->{'_in'}. "->". $self->{'_out'}.
+		 "  Mappers: ". join(", ", @steps). "\n");
+   
+   foreach my $mapper (@steps) {
+       if ($mapper eq $TRANSLATION) {
+	   if ($self->direction == 1) {
+
+	       $value = $self->_translate($value);
+	       $self->debug( "+   $TRANSLATION cds -> propeptide (translate) \n");
+	   } else {
+	       $value = $self->_reverse_translate($value);
+	       $self->debug("+   $TRANSLATION propeptide -> cds (reverse translate) \n");
+	   }
+       }
+       # keep the start and end values, and go on to next iteration
+       #  if this mapper is not set
+       elsif ( ! defined $self->{'_mappers'}->{$mapper} ) {
+	   # update mapper name
+	   $mapper =~ /\d+-(\d+)/;   my ($counter) = $1;
+	   $value->seq_id($COORDINATE_INTS{$counter});
+	   $self->debug( "-   $mapper\n");
+       } else {
+           #
+	   # the DEFAULT : generic mapping
+           #
+	   
+	   $value = $self->{'_mappers'}->{$mapper}->map($value);
+           
+	   $value->purge_gaps
+	       if ($value && $value->isa('Bio::Location::SplitLocationI') && 
+		   $value->can('gap'));
+	   
+	   $self->debug( "+  $mapper (". $self->direction. "):  start ".
+			 $value->start. " end ". $value->end. "\n")
+	       if $value && $self->verbose > 0;
+       }
+   }
+
+   # if nozero coordinate system is asked to be used in the output values
+   if ( defined $value && defined $self->{'_nozero'} &&
+	( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) {
+
+       $value->start($value->start - 1)
+	   if defined $value->start && $value->start < 1;
+       $value->end($value->end - 1)
+	   if defined $value->end && $value->end < 1;
+   }
+
+   # handle merging of adjacent split locations!
+
+   if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) {
+       my $prevloc;
+       my $merging = 0;
+       my $newvalue;
+       my @matches;
+       foreach my $loc ( $value->each_Location(1) ) {
+           unless ($prevloc) {
+               $prevloc = $loc;
+               push @matches, $prevloc;
+               next;
+           }
+           if ($prevloc->end == ($loc->start - 1) && 
+	       $prevloc->seq_id eq $loc->seq_id) {
+               $prevloc->end($loc->end);
+               $merging = 1;
+           } else {
+               push @matches, $loc;
+               $prevloc = $loc;
+           }
+       }
+       if ($merging) {
+           if (@matches > 1 ) {
+               $newvalue = Bio::Coordinate::Result->new;
+               map {$newvalue->add_sub_Location} @matches;
+           } else {
+               $newvalue = Bio::Coordinate::Result::Match->new
+                   (-seq_id   => $matches[0]->seq_id,
+                    -start    => $matches[0]->start,
+                    -end      => $matches[0]->end,
+                    -strand   => $matches[0]->strand,
+		    -verbose  => $self->verbose,);
+           }
+           $value = $newvalue;
+       }
+   } 
+   elsif (ref $value eq "Bio::Coordinate::Result" && 
+	  $value->each_match == 1 ){
+       $value = $value->match;
+   }
+
+   return $value;
+}
+
+=head2 direction
+
+ Title   : direction
+ Usage   : $obj->direction('peptide');
+ Function: Read-only method for the direction of mapping deduced from
+           predefined input and output coordinate names.
+ Example :
+ Returns : 1 or -1, mapping direction
+ Args    : new value (optional)
+
+=cut
+
+sub direction {
+   my ($self) = @_;
+   return $self->{'_direction'};
+}
+
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of transformation
+           (input <-> output)
+ Example :
+ Returns : 1
+ Args    :
+
+=cut
+
+sub swap {
+   my ($self,$value) = @_;
+
+   ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
+   map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
+
+   # record the changed direction;
+   $self->{_direction} *= -1;
+
+   return 1;
+}
+
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : $newpos = $obj->to_string(5);
+ Function: Dump the internal mapper values into a human readable format
+ Example :
+ Returns : string
+ Args    :
+
+=cut
+
+sub to_string {
+   my ($self) = shift;
+
+   print "-" x 40, "\n";
+
+   # chr-gene
+   my $mapper_str = 'chr-gene';
+   my $mapper = $self->_mapper_string2code($mapper_str);
+
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   if (defined $self->cds) {
+       my $end = $self->cds->end -1 if defined $self->cds->end;
+       printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || '';
+       printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0;
+   }
+
+   # gene-intron
+   $mapper_str = 'gene-intron';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   my $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+   # intron-negative_intron
+   $mapper_str = 'intron-negative_intron';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+
+   # gene-exon
+   $mapper_str = 'gene-exon';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+
+   # gene-cds
+   $mapper_str = 'gene-cds';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+
+   $i = 1;
+   foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
+       printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
+       printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
+       $i++;
+   }
+
+   # cds-propeptide
+   $mapper_str = 'cds-propeptide';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   printf "%9s%-12s\n", "", '"translate"';
+
+
+   # propeptide-peptide
+   $mapper_str = 'propeptide-peptide';
+   $mapper = $self->_mapper_string2code($mapper_str);
+   printf "\n     %-12s (%s)\n", $mapper_str, $mapper ;
+   printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset;
+
+
+
+   print "\nin : ", $self->in, "\n";
+   print "out: ", $self->out, "\n";
+   my $dir;
+   $self->direction ? ($dir='forward') : ($dir='reverse');
+   printf "direction: %-8s(%s)\n",  $dir, $self->direction;
+   print "\n", "-" x 40, "\n";
+
+   1;
+}
+
+sub _mapper_code2string {
+    my ($self, $code) = @_;
+    my ($a, $b) = $code =~ /(\d+)-(\d+)/;
+    return $COORDINATE_INTS{$a}. '-'.  $COORDINATE_INTS{$b};
+
+}
+
+sub _mapper_string2code {
+    my ($self, $string) =@_;
+    my ($a, $b) = $string =~ /([^-]+)-(.*)/;
+    return $COORDINATE_SYSTEMS{$a}. '-'.  $COORDINATE_SYSTEMS{$b};
+}
+
+
+=head2 _create_pair
+
+ Title   : _create_pair
+ Usage   : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1);
+ Function: Internal helper method to create a mapper between
+           two coordinate systems
+ Returns : a Bio::Coordinate::Pair object
+ Args    : string, input coordinate system name,
+           string, output coordinate system name,
+           boolean, strict mapping
+           positive integer, offset
+           positive integer, length
+           1 || -1 , strand
+
+=cut
+
+sub _create_pair {
+   my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_;
+   $strict ||= 0;
+   $strand ||= 1;
+   $length ||= 20;
+
+   my $match1 = Bio::Location::Simple->new
+       (-seq_id  => $in,
+	-start   => $offset+1,
+	-end     => $offset+$length, 
+	-strand  => 1,
+	-verbose => $self->verbose);
+
+   my $match2 = Bio::Location::Simple->new
+       (-seq_id  => $out,
+	-start   => 1,
+	-end     => $length, 
+	-strand  => $strand,
+	-verbose => $self->verbose);
+
+   my $pair = Bio::Coordinate::ExtrapolatingPair->new
+       (-in      => $match1,
+        -out     => $match2,
+        -strict  => $strict,
+	-verbose => $self->verbose,
+       );
+
+   return $pair;
+
+}
+
+
+=head2 _translate
+
+ Title   : _translate
+ Usage   : $newpos = $obj->_translate($loc);
+ Function: Translate the location from the CDS coordinate system
+           to a new value in the propeptide coordinate system.
+ Example :
+ Returns : new location
+ Args    : a Bio::Location::Simple or Bio::Location::SplitLocationI
+
+=cut
+
+sub _translate {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'propeptide';
+   
+   if ($value->isa("Bio::Location::SplitLocationI") ) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+           my $match = new Bio::Location::Simple
+	       (-start   => int ($loc->start / 3 ) +1,
+		-end     => int ($loc->end / 3 ) +1,
+		-seq_id  => $seqid,
+		-strand  => 1,
+		-verbose => $self->verbose,
+		);
+           $split->add_sub_Location($match);
+       }
+       return $split;
+
+   } else {
+       return new Bio::Location::Simple(-start  => int($value->start / 3 )+1,
+                                        -end    => int($value->end / 3 )+1,
+                                        -seq_id => $seqid,
+                                        -strand => 1,
+					-verbose=> $self->verbose,
+                                       );
+   }
+}
+
+sub _frame {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'propeptide';
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+
+           my $match = new Bio::Location::Simple
+	       (-start  => ($value->start-1) % 3 +1,
+		-end    => ($value->end-1) % 3 +1,
+		-seq_id => 'frame',
+		-strand => 1,
+		-verbose=> $self->verbose);
+           $split->add_sub_Location($match);
+       }
+       return $split;
+   } else {
+       return new Bio::Location::Simple(-start   => ($value->start-1) % 3 +1,
+                                        -end     => ($value->end-1) % 3 +1,
+                                        -seq_id  => 'frame',
+                                        -strand  => 1,
+					-verbose => $self->verbose,
+					);
+   }
+}
+
+
+=head2 _reverse_translate
+
+ Title   : _reverse_translate
+ Usage   : $newpos = $obj->_reverse_translate(5);
+ Function: Reverse translate the location from the propeptide
+           coordinate system to a new value in the CSD.
+           Note that a single peptide location expands to cover
+           the codon triplet
+ Example :
+ Returns : new location in the CDS coordinate system
+ Args    : a Bio::Location::Simple or Bio::Location::SplitLocationI
+
+=cut
+
+sub _reverse_translate {
+   my ($self,$value) = @_;
+
+
+   $self->throw("Need to pass me a Bio::Location::Simple or ".
+                "Bio::Location::SplitLocationI, not [". ref($value). "]")
+       unless defined $value &&
+           ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
+
+   my $seqid = 'cds';
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+       my $split = new Bio::Location::Split(-seq_id=>$seqid);
+       foreach my $loc ( $value->each_Location(1) ) {
+
+           my $match = new Bio::Location::Simple
+	       (-start   => $value->start * 3 - 2,
+		-end     => $value->end * 3,
+		-seq_id  => $seqid,
+		-strand  => 1,
+		-verbose => $self->verbose,
+		);
+           $split->add_sub_Location($match);
+       }
+       return $split;
+
+   } else {
+       return new Bio::Location::Simple(-start   => $value->start * 3 - 2,
+                                        -end     => $value->end * 3,
+                                        -seq_id  => $seqid,
+                                        -strand  => 1,
+					-verbose => $self->verbose,
+                                       );
+   }
+}
+
+
+=head2 _check_direction
+
+ Title   : _check_direction
+ Usage   : $obj->_check_direction();
+ Function: Check and swap when needed the direction the location
+           mapping Pairs based on input and output values
+ Example :
+ Returns : new location
+ Args    : a Bio::Location::Simple
+
+=cut
+
+sub _check_direction {
+   my ($self) = @_;
+
+   my $new_direction = 1;
+   $new_direction = -1 if $self->{'_in'} > $self->{'_out'};
+
+   unless ($new_direction == $self->{_direction} ) {
+       map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
+       # record the changed direction;
+       $self->{_direction} *= -1;
+   }
+   1;
+}
+
+
+=head2 _get_path
+
+ Title   : _get_path
+ Usage   : $obj->_get_path('peptide');
+ Function: internal method for finding that shortest path between
+           input and output coordinate systems.
+           Calculations and caching are handled by the graph class.
+           See L<Bio::Coordinate::Graph>.
+ Example :
+ Returns : array of the mappers
+ Args    : none
+
+=cut
+
+sub _get_path {
+   my ($self) = @_;
+
+   my $start = $self->{'_in'} || 0;
+   my $end = $self->{'_out'} || 0;
+
+   # note the order
+   # always go from smaller to bigger: it  makes caching more efficient
+   my $reverse;
+   if ($start > $end) {
+       ($start, $end) = ($end, $start );
+       $reverse++;
+   }
+
+   my @mappers;
+   if (exists $self->{'_previous_path'} and
+       $self->{'_previous_path'} eq "$start$end" ) {
+       # use cache
+       @mappers = @{$self->{'_mapper_path'}};
+   } else {
+       my $mapper;
+       my $prev_node = '';
+       @mappers =
+	   map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; }
+	       $self->{'_graph'}->shortest_path($start, $end);
+       shift @mappers;
+
+       $self->{'_previous_path'} = "$start$end";
+       $self->{'_mapper_path'} = \@mappers;
+   }
+
+   $reverse ? return reverse @mappers : return @mappers;
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Graph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Graph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Graph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,397 @@
+# $Id: Graph.pm,v 1.10.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::Graph
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Graph - Finds shortest path between nodes in a graph
+
+=head1 SYNOPSIS
+
+  # get a hash of hashes representing the graph. E.g.:
+  my $hash= {
+	     '1' => {
+		     '2' => 1
+		    },
+	     '2' => {
+		     '4' => 1,
+		     '3' => 1
+		    },
+	     '3' => undef,
+	     '4' => {
+		     '5' => 1
+		    },
+	     '5' => undef
+	    };
+
+  # create the object;
+  my $graph = Bio::Coordinate::Graph->new(-graph => $hash);
+
+  # find the shortest path between two nodes
+  my $a = 1;
+  my $b = 6;
+  my @path = $graph->shortest_paths($a);
+  print join (", ", @path), "\n";
+
+
+=head1 DESCRIPTION
+
+This class calculates the shortest path between input and output
+coordinate systems in a graph that defines the relationships between
+them. This class is primarely designed to analyze gene-related
+coordinate systems. See L<Bio::Coordinate::GeneMapper>.
+
+Note that this module can not be used to manage graphs.
+
+Technically the graph implemented here is known as Directed Acyclic
+Graph (DAG). DAG is composed of vertices (nodes) and edges (with
+optional weights) linking them. Nodes of the graph are the coordinate
+systems in gene mapper.
+
+The shortest path is found using the Dijkstra's algorithm. This
+algorithm is fast and greedy and requires all weights to be
+positive. All weights in the gene coordinate system graph are
+currently equal (1) making the graph unweighted. That makes the use of
+Dijkstra's algorithm an overkill. A impler and faster breadth-first
+would be enough. Luckily the difference for small graphs is not
+signigicant and the implementation is capable to take weights into
+account if needed at some later time.
+
+=head2 Input format
+
+The graph needs to be primed using a hash of hashes where there is a
+key for each node. The second keys are the names of the downstream
+neighboring nodes and values are the weights for reaching them. Here
+is part of the gene coordiante system graph::
+
+
+    $hash = {
+	     '6' => undef,
+	     '3' => {
+		     '6' => 1
+		    },
+	     '2' => {
+		     '6' => 1,
+		     '4' => 1,
+		     '3' => 1
+		    },
+	     '1' => {
+		     '2' => 1
+		    },
+	     '4' => {
+		     '5' => 1
+		    },
+	     '5' => undef
+	    };
+
+
+Note that the names need to be positive integrers. Root should be '1'
+and directness of the graph is taken advantage of to speed
+calculations by assuming that downsream nodes always have larger
+number as name.
+
+An alternative (shorter) way of describing input is to use hash of
+arrays. See L<Bio::Coordinate::Graph::hash_of_arrays>.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::Graph;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::Root::Root);
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($graph, $hasharray) =
+	$self->_rearrange([qw(
+                              GRAPH
+                              HASHARRAY
+			     )],
+			 @args);
+
+    $graph  && $self->graph($graph);
+    $hasharray  && $self->hasharray($hasharray);
+
+    $self->{'_root'} = undef;
+
+    return $self; # success - we hope!
+
+}
+
+=head2 Graph structure input methods
+
+=cut
+
+=head2 graph
+
+ Title   : graph
+ Usage   : $obj->graph($my_graph)
+ Function: Read/write method for the graph structure
+ Example : 
+ Returns : hash of hashes grah structure
+ Args    : reference to a hash of hashes
+
+=cut
+
+sub graph {
+
+  my ($self,$value) = @_;
+
+  if ($value) {
+      $self->throw("Need a hash of hashes")
+	  unless  ref($value) eq 'HASH' ;
+      $self->{'_dag'} = $value;
+
+      # empty the cache
+      $self->{'_root'} = undef;
+
+  }
+
+  return $self->{'_dag'};
+
+}
+
+
+=head2 hash_of_arrays
+
+ Title   : hash_of_arrays
+ Usage   : $obj->hash_of_array(%hasharray)
+ Function: An alternative method to read in the graph structure.
+           Hash arrays are easier to type. This method converts
+           arrays into hashes and assigns equal values "1" to
+           weights.
+
+ Example : Here is an example of simple structure containing a graph.
+
+           my $DAG = {
+	              6  => [],
+	              5  => [],
+	              4  => [5],
+	              3  => [6],
+	              2  => [3, 4, 6],
+	              1  => [2]
+	             };
+
+ Returns : hash of hashes graph structure
+ Args    : reference to a hash of arrays
+
+=cut
+
+sub hash_of_arrays {
+
+  my ($self,$value) = @_;
+
+  # empty the cache
+  $self->{'_root'} = undef;
+
+  if ($value) {
+
+      $self->throw("Need a hash of hashes")
+	  unless  ref($value) eq 'HASH' ;
+
+      #copy the hash of arrays into a hash of hashes;
+      my %hash;
+      foreach my $start ( keys %{$value}){
+	  $hash{$start} = undef;
+	  map { $hash{$start}{$_} = 1 } @{$value->{$start}};
+      }
+
+      $self->{'_dag'} = \%hash;
+  }
+
+  return $self->{'_dag'};
+
+}
+
+=head2 Methods for determining the shortest path in the graph
+
+=cut
+
+=head2 shortest_path
+
+ Title   : shortest_path
+ Usage   : $obj->shortest_path($a, $b);
+ Function: Method for retrieving the shortest path between nodes.
+           If the start node remains the same, the method is sometimes
+           able to use cached results, otherwise it will recalculate
+           the paths.
+ Example : 
+ Returns : array of node names, only the start node name if no path
+ Args    : name of the start node
+         : name of the end node
+
+=cut
+
+
+sub shortest_path {
+    my ($self, $root, $end) = @_;
+
+    $self->throw("Two arguments needed") unless @_ == 3;
+    $self->throw("No node name [$root]")
+	unless exists $self->{'_dag'}->{$root};
+    $self->throw("No node name [$end]")
+	unless exists $self->{'_dag'}->{$end};
+
+    my @res;     # results
+    my $reverse;
+
+    if ($root > $end) {
+	($root, $end) = ($end, $root );
+	$reverse++;
+    }
+
+    # try to use cached paths
+    $self->dijkstra($root) unless
+	defined $self->{'_root'} and $self->{'_root'} eq $root;
+
+    return @res unless $self->{'_paths'} ;
+
+    # create the list
+    my $node = $end;
+    my $prev = $self->{'_paths'}->{$end}{'prev'};
+    while ($prev) {
+	unshift @res, $node;
+	$node = $self->{'_paths'}->{$node}{'prev'};
+	$prev = $self->{'_paths'}->{$node}{'prev'};
+    }
+    unshift @res, $node;
+
+    $reverse ? return reverse @res : return @res;
+}
+
+
+=head2 dijkstra
+
+ Title   : dijkstra
+ Usage   : $graph->dijkstra(1);
+ Function: Implements Dijkstra's algorithm.
+           Returns or sets a list of mappers. The returned path
+           description is always directed down from the root.
+           Called from shortest_path().
+ Example : 
+ Returns : Reference to a hash of hashes representing a linked list
+           which contains shortest path down to all nodes from the start
+           node. E.g.:
+
+            $res = {
+                      '2' => {
+                               'prev' => '1',
+                               'dist' => 1
+                             },
+                      '1' => {
+                               'prev' => undef,
+                               'dist' => 0
+                             },
+                    };
+
+ Args    : name of the start node
+
+=cut
+
+#' keep emacs happy
+
+sub dijkstra {
+    my ($self,$root) = @_;
+
+    $self->throw("I need the name of the root node input") unless $root;
+    $self->throw("No node name [$root]")
+	unless exists $self->{'_dag'}->{$root};
+
+    my %est = ();          # estimate hash
+    my %res = ();          # result hash
+    my $nodes = keys %{$self->{'_dag'}};
+    my $maxdist = 1000000;
+
+    # cache the root value
+    $self->{'_root'} = $root;
+
+    foreach my $node ( keys %{$self->{'_dag'}} ){
+	if ($node eq $root) {
+	    $est{$node}{'prev'} = undef;
+	    $est{$node}{'dist'} = 0;
+	} else {
+	    $est{$node}{'prev'} = undef;
+	    $est{$node}{'dist'} = $maxdist;
+	}
+    }
+
+    # remove nodes from %est until it is empty
+    while (keys %est) {
+
+	#select the node closest to current one, or root node
+	my $min_node;
+	my $min = $maxdist;
+	foreach my $node (reverse sort keys %est) {
+	    if ( $est{$node}{'dist'} < $min ) {
+		$min = $est{$node}{'dist'};
+		$min_node = $node;
+	    }
+	}
+
+	# no more links between nodes
+	last unless ($min_node);
+
+	# move the node from %est into %res;
+	$res{$min_node} = delete $est{$min_node};
+
+	# recompute distances to the neighbours
+	my $dist = $res{$min_node}{'dist'};
+	foreach my $neighbour ( keys %{$self->{'_dag'}->{$min_node}} ){
+	    next unless $est{$neighbour}; # might not be there any more
+	    $est{$neighbour}{'prev'} = $min_node;
+	    $est{$neighbour}{'dist'} =
+		$dist + $self->{'_dag'}{$min_node}{$neighbour}
+		if $est{$neighbour}{'dist'} > $dist + 1 ;
+	}
+    }
+    return $self->{'_paths'} = \%res;
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/MapperI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/MapperI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/MapperI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,181 @@
+# $Id: MapperI.pm,v 1.10.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::MapperI
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::MapperI - Interface describing coordinate mappers
+
+=head1 SYNOPSIS
+
+  # not to be used directly
+
+=head1 DESCRIPTION
+
+MapperI defines methods for classes capable for mapping locations
+between coordinate systems.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::MapperI;
+use strict;
+
+# Object preamble - inherits from Bio::Root::RootI
+
+use base qw(Bio::Root::RootI);
+
+
+
+=head2 in
+
+ Title   : in
+ Usage   : $obj->in('peptide');
+ Function: Set and read the input coordinate system.
+ Example :
+ Returns : value of input system
+ Args    : new value (optional), Bio::LocationI
+
+=cut
+
+sub in {
+   my ($self,$value) = @_;
+
+   $self->throw_not_implemented();
+
+}
+
+
+=head2 out
+
+ Title   : out
+ Usage   : $obj->out('peptide');
+ Function: Set and read the output coordinate system.
+ Example :
+ Returns : value of output system
+ Args    : new value (optional), Bio::LocationI
+
+=cut
+
+sub out {
+   my ($self,$value) = @_;
+
+   $self->throw_not_implemented();
+}
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of mapping: input <-> output)
+ Example :
+ Returns : 1
+ Args    : 
+
+=cut
+
+sub swap {
+   my ($self) = @_;
+
+   $self->throw_not_implemented();
+
+}
+
+=head2 test
+
+ Title   : test
+ Usage   : $obj->test;
+ Function: test that both components are of same length
+ Example :
+ Returns : ( 1 | undef )
+ Args    :
+
+=cut
+
+sub test {
+   my ($self) = @_;
+
+   $self->throw_not_implemented();
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map($loc);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+ Example :
+ Returns : new value in the output coordiante system
+ Args    : Bio::LocationI
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+
+   $self->throw_not_implemented();
+
+}
+
+=head2 return_match
+
+ Title   : return_match
+ Usage   : $obj->return_match(1);
+ Function: A flag to turn on the simplified mode of
+           returning only one joined Match object or undef
+ Example :
+ Returns : boolean
+ Args    : boolean (optional)
+
+=cut
+
+sub return_match {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $value ? ( $self->{'_return_match'} = 1 ) :
+                ( $self->{'_return_match'} = 0 );
+   }
+   return $self->{'_return_match'} || 0 ;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Pair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Pair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Pair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,430 @@
+# $Id: Pair.pm,v 1.19.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::Pair
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Pair - Continuous match between two coordinate sets
+
+=head1 SYNOPSIS
+
+  use Bio::Location::Simple;
+  use Bio::Coordinate::Pair;
+
+  my $match1 = Bio::Location::Simple->new 
+      (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
+  my $match2 = Bio::Location::Simple->new
+      (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
+  my $pair = Bio::Coordinate::Pair->new(-in => $match1,
+  					-out => $match2
+                                        );
+  # location to match
+  $pos = Bio::Location::Simple->new 
+      (-start => 25, -end => 25, -strand=> -1 );
+
+  # results are in a Bio::Coordinate::Result
+  # they can be Matches and Gaps; are  Bio::LocationIs
+  $res = $pair->map($pos);
+  $res->isa('Bio::Coordinate::Result');
+  $res->each_match == 1;
+  $res->each_gap == 0;
+  $res->each_Location == 1;
+  $res->match->start == 5;
+  $res->match->end == 5;
+  $res->match->strand == -1;
+  $res->match->seq_id eq 'peptide';
+
+
+=head1 DESCRIPTION
+
+This class represents a one continuous match between two coordinate
+systems represented by Bio::Location::Simple objects. The relationship
+is directed and reversible. It implements methods to ensure internal
+consistency, and map continuous and split locations from one
+coordinate system to another.
+
+The map() method returns Bio::Coordinate::Results with
+Bio::Coordinate::Result::Gaps. The calling code have to deal (process
+or ignore) them.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::Pair;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Coordinate::Result;
+use Bio::Coordinate::Result::Match;
+use Bio::Coordinate::Result::Gap;
+
+use base qw(Bio::Root::Root Bio::Coordinate::MapperI);
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($in, $out) =
+	$self->_rearrange([qw(IN
+                              OUT
+			     )],
+			 @args);
+
+    $in  && $self->in($in);
+    $out  && $self->out($out);
+    return $self; # success - we hope!
+}
+
+=head2 in
+
+ Title   : in
+ Usage   : $obj->in('peptide');
+ Function: Set and read the input coordinate system.
+ Example :
+ Returns : value of input system
+ Args    : new value (optional), Bio::LocationI
+
+=cut
+
+sub in {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid input Bio::Location [$value] ")
+	   unless $value->isa('Bio::LocationI');
+       $self->{'_in'} = $value;
+   }
+   return $self->{'_in'};
+}
+
+
+=head2 out
+
+ Title   : out
+ Usage   : $obj->out('peptide');
+ Function: Set and read the output coordinate system.
+ Example :
+ Returns : value of output system
+ Args    : new value (optional), Bio::LocationI
+
+=cut
+
+sub out {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       $self->throw("Not a valid output coordinate Bio::Location [$value] ")
+	   unless $value->isa('Bio::LocationI');
+       $self->{'_out'} = $value;
+   }
+   return $self->{'_out'};
+}
+
+
+=head2 swap
+
+ Title   : swap
+ Usage   : $obj->swap;
+ Function: Swap the direction of mapping; input <-> output
+ Example :
+ Returns : 1
+ Args    : 
+
+=cut
+
+sub swap {
+   my ($self) = @_;
+   ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
+   return 1;
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $obj->strand;
+ Function: Get strand value for the pair
+ Example :
+ Returns : ( 1 | 0 | -1 )
+ Args    :
+
+=cut
+
+sub strand {
+   my ($self) = @_;
+   $self->warn("Outgoing coordinates are not defined")
+       unless $self->out;
+   $self->warn("Incoming coordinates are not defined")
+       unless $self->in;
+
+   return ($self->in->strand || 0) * ($self->out->strand || 0);
+}
+
+=head2 test
+
+ Title   : test
+ Usage   : $obj->test;
+ Function: test that both components are of the same length
+ Example :
+ Returns : ( 1 | undef )
+ Args    :
+
+=cut
+
+sub test {
+   my ($self) = @_;
+   $self->warn("Outgoing coordinates are not defined")
+       unless $self->out;
+   $self->warn("Incoming coordinates are not defined")
+       unless $self->in;
+   return ($self->in->end - $self->in->start) == ($self->out->end - $self->out->start);
+}
+
+
+=head2 map
+
+ Title   : map
+ Usage   : $newpos = $obj->map($pos);
+ Function: Map the location from the input coordinate system
+           to a new value in the output coordinate system.
+ Example :
+ Returns : new Bio::LocationI in the output coordinate system or undef
+ Args    : Bio::LocationI object
+
+=cut
+
+sub map {
+   my ($self,$value) = @_;
+
+   $self->throw("Need to pass me a value.")
+       unless defined $value;
+   $self->throw("I need a Bio::Location, not [$value]")
+       unless $value->isa('Bio::LocationI');
+   $self->throw("Input coordinate system not set")
+       unless $self->in;
+   $self->throw("Output coordinate system not set")
+       unless $self->out;
+
+
+   if ($value->isa("Bio::Location::SplitLocationI")) {
+
+       my $result = new Bio::Coordinate::Result;
+       my $split = new Bio::Location::Split(-seq_id=>$self->out->seq_id);
+       foreach my $loc ( $value->sub_Location(1) ) {
+           my $res = $self->_map($loc);
+           map { $result->add_sub_Location($_) } $res->each_Location;
+       }
+       return $result;
+   } else {
+       return $self->_map($value);
+   }
+}
+
+
+=head2 _map
+
+ Title   : _map
+ Usage   : $newpos = $obj->_map($simpleloc);
+ Function: Internal method that does the actual mapping. Called
+           multiple times by map() if the location to be mapped is a
+           split location
+ Example :
+ Returns : new location in the output coordinate system or undef
+ Args    : Bio::Location::Simple
+
+=cut
+
+sub _map {
+   my ($self,$value) = @_;
+
+   my $result = new Bio::Coordinate::Result;
+
+   my $offset = $self->in->start - $self->out->start;
+   my $start  = $value->start - $offset;
+   my $end    = $value->end - $offset;
+   
+   my $match = Bio::Location::Simple->new;
+   $match->location_type($value->location_type);
+   $match->strand($self->strand);
+
+   #within
+   #       |-------------------------|
+   #            |-|
+   if ($start >= $self->out->start and $end <= $self->out->end) {
+
+       $match->seq_id($self->out->seq_id);
+       $result->seq_id($self->out->seq_id);
+
+       if ($self->strand >= 0) {
+	   $match->start($start);
+	   $match->end($end);
+       } else {
+	   $match->start($self->out->end - $end + $self->out->start);
+	   $match->end($self->out->end - $start + $self->out->start);
+       }
+       if ($value->strand) {
+	   $match->strand($match->strand * $value->strand);
+	   $result->strand($match->strand);
+       }
+       bless $match, 'Bio::Coordinate::Result::Match';
+       $result->add_sub_Location($match);
+   }
+   #out
+   #       |-------------------------|
+   #   |-|              or              |-|
+   elsif ( ($end < $self->out->start or $start > $self->out->end ) or
+	   #insertions just outside the range need special settings
+	   ($value->location_type eq 'IN-BETWEEN' and 
+	    ($end = $self->out->start or $start = $self->out->end)))  {
+
+       $match->seq_id($self->in->seq_id);
+       $result->seq_id($self->in->seq_id);
+       $match->start($value->start);
+       $match->end($value->end);
+       $match->strand($value->strand);
+
+       bless $match, 'Bio::Coordinate::Result::Gap';
+       $result->add_sub_Location($match);
+   }
+   #partial I
+   #       |-------------------------|
+   #   |-----|
+   elsif ($start < $self->out->start and $end <= $self->out->end ) {
+
+       $result->seq_id($self->out->seq_id);
+       if ($value->strand) {
+	   $match->strand($match->strand * $value->strand);
+	   $result->strand($match->strand);
+       }
+       my $gap = Bio::Location::Simple->new;
+       $gap->start($value->start);
+       $gap->end($self->in->start - 1);
+       $gap->strand($value->strand);
+       $gap->seq_id($self->in->seq_id);
+
+       bless $gap, 'Bio::Coordinate::Result::Gap';
+       $result->add_sub_Location($gap);
+
+       # match
+       $match->seq_id($self->out->seq_id);
+
+       if ($self->strand >= 0) {
+	   $match->start($self->out->start);
+	   $match->end($end);
+       } else {
+	   $match->start($self->out->end - $end + $self->out->start);
+	   $match->end($self->out->end);
+       }
+       bless $match, 'Bio::Coordinate::Result::Match';
+       $result->add_sub_Location($match);
+   }
+   #partial II
+   #       |-------------------------|
+   #                             |------|
+   elsif ($start >= $self->out->start and $end > $self->out->end ) {
+
+       $match->seq_id($self->out->seq_id);
+       $result->seq_id($self->out->seq_id);
+       if ($value->strand) {
+	   $match->strand($match->strand * $value->strand);
+	   $result->strand($match->strand);
+       }
+       if ($self->strand >= 0) {
+	   $match->start($start);
+	   $match->end($self->out->end);
+       } else {
+	   $match->start($self->out->start);
+	   $match->end($self->out->end - $start + $self->out->start);
+       }
+       bless $match, 'Bio::Coordinate::Result::Match';
+       $result->add_sub_Location($match);
+
+       my $gap = Bio::Location::Simple->new;
+       $gap->start($self->in->end + 1);
+       $gap->end($value->end);
+       $gap->strand($value->strand);
+       $gap->seq_id($self->in->seq_id);
+       bless $gap, 'Bio::Coordinate::Result::Gap';
+       $result->add_sub_Location($gap);
+
+   }
+   #enveloping
+   #       |-------------------------|
+   #   |---------------------------------|
+   elsif ($start < $self->out->start and $end > $self->out->end ) {
+
+       $result->seq_id($self->out->seq_id);
+       if ($value->strand) {
+	   $match->strand($match->strand * $value->strand);
+	   $result->strand($match->strand);
+       }
+       # gap1
+       my $gap1 = Bio::Location::Simple->new;
+       $gap1->start($value->start);
+       $gap1->end($self->in->start - 1);
+       $gap1->strand($value->strand);
+       $gap1->seq_id($self->in->seq_id);
+       bless $gap1, 'Bio::Coordinate::Result::Gap';
+       $result->add_sub_Location($gap1);
+
+       # match
+       $match->seq_id($self->out->seq_id);
+
+       $match->start($self->out->start);
+       $match->end($self->out->end);
+       bless $match, 'Bio::Coordinate::Result::Match';
+       $result->add_sub_Location($match);
+
+       # gap2
+       my $gap2 = Bio::Location::Simple->new;
+       $gap2->start($self->in->end + 1);
+       $gap2->end($value->end);
+       $gap2->strand($value->strand);
+       $gap2->seq_id($self->in->seq_id);
+       bless $gap2, 'Bio::Coordinate::Result::Gap';
+       $result->add_sub_Location($gap2);
+
+   } else {
+       $self->throw("Should not be here!");
+   }
+   return $result;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Gap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Gap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Gap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,65 @@
+# $Id: Gap.pm,v 1.9.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::Coordinate::Result::Gap
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copywright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::Coordinate::Result::Gap - Another name for Bio::Location::Simple
+
+=head1 SYNOPSIS
+
+  $loc = new Bio::Coordinate::Result::Gap(-start=>10,
+                                          -end=>30,
+                                          -strand=>1);
+
+=head1 DESCRIPTION
+
+This is a location object for coordinate mapping results.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Coordinate::Result::Gap;
+use strict;
+
+
+use base qw(Bio::Location::Simple Bio::Coordinate::ResultI);
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Match.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Match.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result/Match.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,65 @@
+# $Id: Match.pm,v 1.9.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::Coordinate::Result::Match
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copywright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::Coordinate::Result::Match - Another name for Bio::Location::Simple
+
+=head1 SYNOPSIS
+
+  $loc = new Bio::Coordinate::Result::Match(-start=>10,
+                                            -end=>30,
+                                            -strand=>+1);
+
+=head1 DESCRIPTION
+
+This is a location class for coordinate mapping results.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Coordinate::Result::Match;
+use strict;
+
+
+use base qw(Bio::Location::Simple Bio::Coordinate::ResultI);
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Result.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,284 @@
+# $Id: Result.pm,v 1.15.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::Result
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Result - Results from coordinate transformation
+
+=head1 SYNOPSIS
+
+  use Bio::Coordinate::Result;
+
+  #get results from a Bio::Coordinate::MapperI
+  $matched = $result->each_match;
+
+=head1 DESCRIPTION
+
+The results from Bio::Coordinate::MapperI are kept in an object which
+itself is a split location, See L<Bio::Location::Split>. The results
+are either Matches or Gaps.  See L<Bio::Coordinate::Result::Match> and
+L<Bio::Coordinate::Result::Match>.
+
+If only one Match is returned, there is a convenience method of
+retrieving it or accessing its methods. Same holds true for a Gap.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::Result;
+use strict;
+
+
+use base qw(Bio::Location::Split Bio::Coordinate::ResultI);
+
+
+=head2 add_location
+
+ Title   : add_sub_Location
+ Usage   : $obj->add_sub_Location($variant)
+ Function: 
+
+           Pushes one Bio::LocationI into the list of variants.
+
+ Example : 
+ Returns : 1 when succeeds
+ Args    : Location object
+
+=cut
+
+sub add_sub_Location {
+  my ($self,$value) = @_;
+  if( ! $value ) {
+      $self->warn("provding an empty value for location\n");
+      return;
+  }
+  $self->throw("Is not a Bio::LocationI but [$value]")
+      unless $value->isa('Bio::LocationI');
+
+  $self->{'_match'} = $value
+      if $value->isa('Bio::Coordinate::Result::Match');
+
+  $self->{'_gap'} = $value
+      if $value->isa('Bio::Coordinate::Result::Gap');
+
+  $self->SUPER::add_sub_Location($value);
+
+}
+
+=head2 add_result
+
+ Title   : add_result
+ Usage   : $obj->add_result($result)
+ Function: Adds the contents of one Bio::Coordinate::Result
+ Example : 
+ Returns : 1 when succeeds
+ Args    : Result object
+
+=cut
+
+sub add_result {
+  my ($self,$value) = @_;
+
+  $self->throw("Is not a Bio::Coordinate::Result but [$value]")
+      unless $value->isa('Bio::Coordinate::Result');
+
+  map { $self->add_sub_Location($_) } $value->each_Location;
+}
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+
+            We override this here in order to propagate to all sublocations
+            which are not remote (provided this root is not remote either)
+
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+
+=cut
+
+sub seq_id {
+    my ($self, $seqid) = @_;
+
+    my @ls = $self->each_Location;
+    if (@ls) {
+	return $ls[0]->seq_id;
+    } else {
+	return;
+    }
+}
+
+
+=head2 Convenience methods
+
+These methods are shortcuts to Match and Gap locations.
+
+=cut
+
+=head2 each_gap
+
+ Title   : each_gap
+ Usage   : $obj->each_gap();
+ Function: 
+
+            Returns a list of Bio::Coordianate::Result::Gap objects.
+
+ Returns : list of gaps
+ Args    : none
+
+=cut
+
+sub each_gap {
+   my ($self) = @_;
+
+   my @gaps;
+   foreach my $gap ($self->each_Location) {
+       push @gaps, $gap if $gap->isa('Bio::Coordinate::Result::Gap');
+   }
+   return @gaps;
+
+}
+
+
+=head2 each_match
+
+ Title   : each_match
+ Usage   : $obj->each_match();
+ Function: 
+
+            Returns a list of Bio::Coordinate::Result::Match objects.
+
+ Returns : list of Matchs
+ Args    : none
+
+=cut
+
+sub each_match {
+   my ($self) = @_;
+
+   my @matches;
+   foreach my $match ($self->each_Location) {
+       push @matches, $match if $match->isa('Bio::Coordinate::Result::Match');
+   }
+   return @matches;
+}
+
+=head2 match
+
+ Title   : match
+ Usage   : $match_object = $obj->match(); #or
+           $gstart = $obj->gap->start;
+ Function: Read only method for retrieving or accessing the match object.
+ Returns : one Bio::Coordinate::Result::Match
+ Args    : 
+
+=cut
+
+sub match {
+   my ($self) = @_;
+
+   $self->warn("More than one match in results")
+       if $self->each_match > 1 and $self->verbose > 0;
+   unless (defined $self->{'_match'} ) {
+       my @m = $self->each_match;
+       $self->{'_match'} = $m[-1];
+   }
+   return $self->{'_match'};
+}
+
+=head2 gap
+
+ Title   : gap
+ Usage   : $gap_object = $obj->gap(); #or
+           $gstart = $obj->gap->start;
+ Function: Read only method for retrieving or accessing the gap object.
+ Returns : one Bio::Coordinate::Result::Gap
+ Args    : 
+
+=cut
+
+sub gap {
+   my ($self) = @_;
+
+   $self->warn("More than one gap in results")
+       if $self->each_gap > 1 and $self->verbose > 0;
+   unless (defined $self->{'_gap'} ) {
+       my @m = $self->each_gap;
+       $self->{'_gap'} = $m[-1];
+   }
+   return $self->{'_gap'};
+}
+
+
+=head2 purge_gaps
+
+ Title   : purge_gaps
+ Usage   : $gap_count = $obj->purge_gaps;
+ Function: remove all gaps from the Result
+ Returns : count of removed gaps
+ Args    : 
+
+=cut
+
+sub purge_gaps {
+    my ($self) = @_;
+    my @matches;
+    my $count = 0;
+
+    foreach my $loc ($self->each_Location) {
+        if ($loc->isa('Bio::Coordinate::Result::Match')) {
+            push @matches, $loc;
+        } else {
+            $count++
+        }
+    }
+    @{$self->{'_sublocations'}} = ();
+    delete $self->{'_gap'} ;
+    push @{$self->{'_sublocations'}}, @matches;
+    return $count;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ResultI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ResultI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/ResultI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+# $Id: ResultI.pm,v 1.6.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# bioperl module for Bio::Coordinate::ResultI
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::ResultI - Interface to identify coordinate mapper results
+
+=head1 SYNOPSIS
+
+  # not to be used directly
+
+=head1 DESCRIPTION
+
+ResultI identifies Bio::LocationIs returned by
+Bio::Coordinate::MapperI implementing classes from other locations.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Coordinate::ResultI;
+use strict;
+
+# Object preamble
+
+use base qw(Bio::LocationI);
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Utils.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Utils.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Coordinate/Utils.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,231 @@
+# $Id: Utils.pm,v 1.16.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::Coordinate::Utils
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Coordinate::Utils - Additional methods to create Bio::Coordinate objects
+
+=head1 SYNOPSIS
+
+    use Bio::Coordinate::Utils;
+    # get a Bio::Align::AlignI compliant object, $aln, somehow
+    # it could be a Bio::SimpleAlign
+
+    $mapper = Bio::Coordinate::Utils->from_align($aln, 1);
+
+    # Build a set of mappers which will map, for each sequence,
+    # that sequence position in the alignment (exon position to alignment 
+    # position) 
+    my @mappers = Bio::Coordinate::Utils->from_seq_to_alignmentpos($aln);
+
+
+=head1 DESCRIPTION
+
+This class is a holder of methods that work on or create
+Bio::Coordinate::MapperI- compliant objects. . These methods are not
+part of the Bio::Coordinate::MapperI interface and should in general
+not be essential to the primary function of sequence objects. If you
+are thinking of adding essential functions, it might be better to
+create your own sequence class.  See L<Bio::PrimarySeqI>,
+L<Bio::PrimarySeq>, and L<Bio::Seq> for more.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+Jason Stajich jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Coordinate::Utils;
+
+use Bio::Location::Simple;
+use Bio::Coordinate::Pair;
+use Bio::Coordinate::Collection;
+
+use strict;
+
+use base qw(Bio::Root::Root);
+# new inherited from Root
+
+=head2 from_align
+
+ Title   : from_align
+ Usage   : $mapper = Bio::Coordinate::Utils->from_align($aln, 1);
+ Function:
+           Create a mapper out of an alignment.
+           The mapper will return a value only when both ends of
+           the input range find a match.
+
+           Note: This implementation works only on pairwise alignments
+           and is not yet well tested!
+
+ Returns : A Bio::Coordinate::MapperI
+ Args    : Bio::Align::AlignI object
+           Id for the reference sequence, optional
+
+=cut
+
+sub from_align {
+   my ($self, $aln, $ref ) = @_;
+
+   $aln->isa('Bio::Align::AlignI') ||
+       $self->throw('Not a Bio::Align::AlignI object but ['. ref($aln). ']');
+
+   # default reference sequence to the first sequence
+   $ref ||= 1;
+   
+   my $collection = Bio::Coordinate::Collection->new(-return_match=>1);
+
+   # this works only for pairs, so split the MSA
+   # take the ref
+   #foreach remaining seq in aln, do:
+   $aln->map_chars('\.','-');
+   my $cs = $aln->gap_line;
+   my $seq1 = $aln->get_seq_by_pos(1);
+   my $seq2 = $aln->get_seq_by_pos(2);   
+   while ( $cs =~ /([^\-]+)/g) {
+       # alignment coordinates
+       my $lenmatch = length($1);
+       my $start = pos($cs) - $lenmatch +1;
+       my $end   = $start + $lenmatch -1;
+       my $match1 = Bio::Location::Simple->new
+	   (-seq_id => $seq1->id,
+	    -start  => $seq1->location_from_column($start)->start,
+	    -end    => $seq1->location_from_column($end)->start,
+	    -strand => $seq1->strand );
+
+       my $match2 = Bio::Location::Simple->new
+	   (-seq_id => $seq2->id,
+	    -start  => $seq2->location_from_column($start)->start,
+	    -end    => $seq2->location_from_column($end)->start,
+	    -strand => $seq2->strand );       
+       
+       my $pair = Bio::Coordinate::Pair->new
+	   (-in  => $match1,
+	    -out => $match2
+	    );
+       unless( $pair->test ) {
+	   $self->warn(join("",
+			    "pair align did not pass test ($start..$end):\n",
+			    "\tm1=",$match1->to_FTstring(), " len=",
+			    $match1->length, 
+			    " m2=", $match2->to_FTstring()," len=", 
+			    $match2->length,"\n"));
+       }
+       $collection->add_mapper($pair);
+   }
+   return ($collection->each_mapper)[0] if $collection->mapper_count == 1;
+   return $collection;
+
+}
+
+=head2 from_seq_to_alignmentpos
+
+ Title   : from_seq_to_alignmentpos
+ Usage   : $mapper = Bio::Coordinate::Utils->from_seq_to_alignmentpos($aln, 1);
+ Function:
+           Create a mapper out of an alignment.
+           The mapper will map the position of a sequence into that position
+           in the alignment.
+
+           Will work on alignments of >= 2 sequences 
+ Returns : An array of Bio::Coordinate::MapperI
+ Args    : Bio::Align::AlignI object
+
+=cut
+
+
+sub from_seq_to_alignmentpos {
+    my ($self, $aln ) = @_;
+
+    $aln->isa('Bio::Align::AlignI') ||
+	$self->throw('Not a Bio::Align::AlignI object but ['. ref($aln). ']');
+
+    # default reference sequence to the first sequence
+    my @mappers;
+    $aln->map_chars('\.','-');
+    for my $seq ( $aln->each_seq ) { 
+	my $collection = Bio::Coordinate::Collection->new(-return_match=>1);
+	my $cs = $seq->seq();
+	# do we change this over to use index and substr for speed?
+	while ( $cs =~ /([^\-]+)/g) {
+	    # alignment coordinates
+	    my $lenmatch = length($1);
+	    my $start = pos($cs) - $lenmatch +1;
+	    my $end   = $start + $lenmatch -1;
+
+	    my $match1 = Bio::Location::Simple->new
+		(-seq_id => $seq->id,
+		 -start  => $seq->location_from_column($start)->start,
+		 -end    => $seq->location_from_column($end)->start,
+		 -strand => $seq->strand );
+
+	    my $match2 = Bio::Location::Simple->new
+		(-seq_id => 'alignment',
+		 -start  => $start,
+		 -end    => $end,
+		 -strand => 0 );
+
+	    my $pair = Bio::Coordinate::Pair->new
+		(-in  => $match1,
+		 -out => $match2
+		 );
+	    unless ( $pair->test ) {
+		$self->warn(join("",
+				 "pair align did not pass test ($start..$end):\n",
+				 "\tm1=",$match1->to_FTstring(), " len=",
+				 $match1->length, 
+				 " m2=", $match2->to_FTstring()," len=", 
+				 $match2->length,"\n"));
+	    }
+	    $collection->add_mapper($pair);
+	}
+	if( $collection->mapper_count == 1) {
+	    push @mappers, ($collection->each_mapper)[0];
+	} else { 
+	    push @mappers, $collection;
+	}
+    }
+    return @mappers;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Ace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Ace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Ace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,204 @@
+
+# $Id: Ace.pm,v 1.15.4.2 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::Ace
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Ace - Database object interface to ACeDB servers
+
+=head1 SYNOPSIS
+
+    $db = Bio::DB::Ace->new( -server => 'myace.server.com', port => '120000');
+
+    $seq = $db->get_Seq_by_id('MUSIGHBA1'); # Unique ID
+
+    # or ...
+
+    $seq = $db->get_Seq_by_acc('J00522'); # Accession Number
+
+=head1 DESCRIPTION
+
+This provides a standard BioPerl database access to Ace, using Lincoln Steins
+excellent AcePerl module. You need to download and install the aceperl module from
+
+  http://stein.cshl.org/AcePerl/
+
+before this interface will work.
+
+This interface is designed at the moment to work through a aceclient/aceserver
+type mechanism
+
+=head1 INSTALLING ACEPERL
+
+Download the latest aceperl tar file, gunzip/untar and cd into the directory.
+This is a standard CPAN-style directory, so if you go
+
+  Perl Makefile.PL
+  make
+  <become root>
+  make install
+
+Then you will have installed Aceperl. Use the PREFIX mechanism to install elsewhere.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Ace;
+use strict;
+
+# Object preamble - inherits from Bio::DB::RandomAccessI
+
+use Bio::Seq;
+
+BEGIN {
+  eval "require Ace;";
+  if( $@) {
+    print STDERR "You have not installed Ace.pm.\n Read the docs in Bio::DB::Ace for more information about how to do this.\n It is very easy\n\nError message $@";
+  }
+}
+
+
+use base qw(Bio::DB::RandomAccessI);
+
+# new() is inherited from Bio::DB::Abstract
+
+# _initialize is where the heavy stuff will happen when new is called
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  my ($host,$port) = $self->_rearrange([qw(
+					 HOST
+					 PORT
+					 )],
+				     @args,
+				     );
+
+  if( !$host || !$port ) {
+    $self->throw("Must have a host and port for an acedb server to work");
+  }
+
+  my $aceobj = Ace->connect(-host => $host,
+			    -port => $port) ||
+			      $self->throw("Could not make acedb object to $host:$port");
+
+  $self->_aceobj($aceobj);
+
+
+  return $self;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id($uid);
+ Function: Gets a Bio::Seq object by its unique identifier/name
+ Returns : a Bio::Seq object
+ Args    : $id : the id (as a string) of the desired sequence entry
+
+=cut
+
+sub get_Seq_by_id {
+  my $self = shift;
+  my $id = shift or $self->throw("Must supply an identifier!\n");
+  my $ace = $self->_aceobj();
+  my ($seq,$dna,$out);
+
+  $seq = $ace->fetch( 'Sequence' , $id);
+
+  # get out the sequence somehow!
+
+  $dna = $seq->asDNA();
+
+  $dna =~ s/^>.*\n//;
+  $dna =~ s/\n//g;
+
+  $out = Bio::Seq->new( -id => $id, -alphabet => 'Dna', -seq => $dna, -name => "Sequence from Bio::DB::Ace $id");
+  return $out;
+
+}
+
+=head2 get_Seq_by_acc
+
+  Title   : get_Seq_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc($acc);
+  Function: Gets a Bio::Seq object by its accession number
+  Returns : a Bio::Seq object
+  Args    : $acc : the accession number of the desired sequence entry
+
+
+=cut
+
+sub get_Seq_by_acc {
+
+  my $self = shift;
+  my $acc = shift or $self->throw("Must supply an accesion number!\n");
+
+  return $self->get_Seq_by_id($acc);
+}
+
+=head2 _aceobj
+
+  Title   : _aceobj
+  Usage   : $ace = $db->_aceobj();
+  Function: Get/Set on the acedb object
+  Returns : Ace object
+  Args    : New value of the ace object
+
+=cut
+
+sub _aceobj {
+  my ($self,$arg) = @_;
+
+  if( $arg ) {
+    $self->{'_aceobj'} = $arg;
+  }
+
+  return $self->{'_aceobj'};
+}
+
+1;
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/biofetch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/biofetch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/biofetch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,294 @@
+# $Id: biofetch.pm,v 1.10.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module Bio::DB::Biblio::biofetch.pm
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic 
+  citation retrieval
+
+=head1 SYNOPSIS
+
+Do not use this object directly, only access it through the
+I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio (-access => 'biofetch');
+  my $ref = $biblio->get_by_id('20063307'));
+
+  my $ids = ['20063307', '98276153'];
+  my $refio = $biblio->get_all($ids);
+  while ($ref = $refio->next_bibref) { 
+    print $ref->identifier, "\n";
+  }
+
+=head1 DESCRIPTION
+
+This class uses BioFetch protocol based service to retrieve Medline
+references by their ID.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho (heikki-at-bioperl-dot-org)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+=over 1
+
+=item *
+
+Only method get_by_id() is supported.
+
+=back
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Biblio::biofetch;
+use vars qw(%HOSTS %FORMATMAP  $DEFAULTFORMAT $DEFAULTRETRIEVAL_TYPE
+	    $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
+use strict;
+
+use Bio::Biblio::IO;
+
+use base qw(Bio::DB::DBFetch Bio::Biblio);
+
+BEGIN {
+
+    # you can add your own here theoretically.
+    %HOSTS = (
+	       'dbfetch' => {
+		   baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
+		   hosts   => {
+		       'ebi'  => 'www.ebi.ac.uk'
+		       }
+	       }
+	      );
+    %FORMATMAP = ( 'default' => 'medlinexml'
+		   );
+    $DEFAULTFORMAT = 'medlinexml';
+
+    $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
+	 $DEFAULTRETRIEVAL_TYPE = 'tempfile';
+}
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    $self->{ '_hosts' } = {};
+    $self->{ '_formatmap' } = {};
+
+    $self->hosts(\%HOSTS);
+    $self->formatmap(\%FORMATMAP);
+	 $self->retrieval_type($DEFAULTRETRIEVAL_TYPE);
+    $self->{'_default_format'} = $DEFAULTFORMAT;
+
+    return $self;
+}
+
+=head2 get_by_id
+
+ Title   : get_by_id
+ Usage   : $entry = $db->get__by_id('20063307')
+ Function: Gets a Bio::Biblio::RefI object by its name
+ Returns : a Bio::Biblio::Medline object
+ Args    : the id (as a string) of the reference
+
+=cut
+
+sub get_by_id {
+    my ($self,$id) = @_;
+    my $io = $self->get_Stream_by_id([$id]);
+    $self->throw("id does not exist") if( !defined $io ) ;
+    return $io->next_bibref();
+}
+
+
+=head2 get_all
+
+  Title   : get_all
+  Usage   : $seq = $db->get_all($ref);
+  Function: Retrieves reference objects from the server 'en masse', 
+            rather than one  at a time.  For large numbers of sequences, 
+            this is far superior than get_by_id().
+  Example :
+  Returns : a stream of Bio::Biblio::Medline objects
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+=cut
+
+sub get_all {
+    my ($self, $ids) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : my $seqio = $self->get_seq_stream(%qualifiers)
+ Function: builds a url and queries a web db
+ Returns : a Bio::SeqIO stream capable of producing sequence
+ Args    : %qualifiers = a hash qualifiers that the implementing class 
+           will process to make a url suitable for web querying 
+
+=cut
+
+sub get_seq_stream {
+	my ($self, %qualifiers) = @_;
+	my ($rformat, $ioformat) = $self->request_format();
+	my $seen = 0;
+	foreach my $key ( keys %qualifiers ) {
+		if( $key =~ /format/i ) {
+			$rformat = $qualifiers{$key};
+			$seen = 1;
+		}
+	}
+	$qualifiers{'-format'} = $rformat if( !$seen);
+	($rformat, $ioformat) = $self->request_format($rformat);
+    
+	my $request = $self->get_request(%qualifiers);
+	my ($stream,$resp);
+	if ( $self->retrieval_type =~ /temp/i ) {
+		my $dir = $self->io()->tempdir( CLEANUP => 1);
+		my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
+		close $fh;
+		my ($resp) = $self->_request($request, $tmpfile);		
+		if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
+			$self->throw("WebDBSeqI Error - check query sequences!\n");
+		}
+		$self->postprocess_data('type' => 'file',
+										'location' => $tmpfile);	
+		# this may get reset when requesting batch mode
+		($rformat,$ioformat) = $self->request_format();
+		if ( $self->verbose > 0 ) {
+			open(my $ERR, "<", $tmpfile);
+			while(<$ERR>) { $self->debug($_);}
+		} 
+		$stream = new Bio::Biblio::IO('-format' => $ioformat,
+												'-file'   => $tmpfile);
+	} elsif ( $self->retrieval_type =~ /io_string/i ) {
+		my ($resp) = $self->_request($request);
+		my $content = $resp->content_ref;
+		$self->debug( "content is $$content\n");
+		if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
+			$self->throw("WebDBSeqI Error - check query sequences!\n");	
+		}  
+		($rformat,$ioformat) = $self->request_format();
+		$self->postprocess_data('type'=> 'string',
+										'location' => $content);
+		$stream = new Bio::Biblio::IO('-format' => $ioformat,
+			# '-data'   => "<tag>". $$content. "</tag>");
+												'-data'   => $$content
+											  );
+	} else { 
+		$self->throw("retrieval type " . $self->retrieval_type . 
+						 " unsupported\n");
+	}
+	return $stream;
+}
+
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                           reference containing data
+
+=cut
+
+# the default method, works for genbank/genpept, other classes should
+# override it with their own method.
+
+sub postprocess_data {    
+	my ($self, %args) = @_;
+	my ($data, $TMP);
+	my $type = uc $args{'type'};
+	my $location = $args{'location'};
+	if( !defined $type || $type eq '' || !defined $location) {
+		return;
+	} elsif( $type eq 'STRING' ) {
+		$data = $$location; 
+	} elsif ( $type eq 'FILE' ) {
+		open($TMP, "<", $location) or $self->throw("could not open file $location");
+		my @in = <$TMP>;
+		$data = join("", @in);
+	}
+
+	if( $type eq 'FILE'  ) {
+		open($TMP, ">", $location) or $self->throw("could overwrite file $location");
+		print $TMP $data;
+	} elsif ( $type eq 'STRING' ) {
+		${$args{'location'}} = $data;
+	}
+    
+	$self->debug("format is ". $self->request_format(). " data is $data\n");
+}
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::DB::Biblio::biofetch::VERSION;
+           print $Bio::DB::Biblio::biofetch::Revision;
+
+=cut
+
+=head2 Defaults
+
+ Usage   : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;
+
+=cut
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/eutils.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/eutils.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/eutils.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,633 @@
+# $Id: eutils.pm,v 1.11.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module Bio::DB::Biblio::eutils.pm
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Biblio::eutils - Access to PubMed's bibliographic query service
+
+=head1 SYNOPSIS
+
+Do not use this object directly, it is recommended to access it and use
+it through the I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio (-access => 'eutils');
+  $biblio->db('PMC'); #optional, default is PubMed.
+
+=head1 DESCRIPTION
+
+This object contains the real implementation of a Bibliographic Query
+Service as defined in L<Bio::DB::BiblioI>.
+
+L<Bio::DB::BiblioI> is not implemented as documented in the interface,
+particularly the find() method, which is not compatible with PubMed's
+query language.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 2004 Allen Day, University of California, Los Angeles.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+=over
+
+=item *
+
+More testing and debugging needed to ensure that returned citations
+are properly transferred even if they contain foreign characters.
+
+=item *
+
+Maximum record count (MAX_RECORDS) returned currently hard coded to
+100K.
+
+=item *
+
+Biblio retrieval methods should be more tightly integrated with
+L<Bio::Biblio::Ref> and L<Bio::DB::MeSH>.
+
+=back
+
+=head1 SEE ALSO
+
+ Pub Med Help:
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
+
+ Entrez Utilities:
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+
+ Example code:
+ examples/biblio/biblio-eutils-example.pl
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Interface methods first,
+followed by internal methods.
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::DB::Biblio::eutils;
+use vars qw($DEFAULT_URN);
+use strict;
+
+use LWP::Simple;
+use XML::Twig;
+use URI::Escape;
+use base qw(Bio::Biblio Bio::DB::BiblioI);
+
+our $EFETCH      = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';
+our $ESEARCH     = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi';
+our $MAX_RECORDS = 100_000;
+
+# -----------------------------------------------------------------------------
+
+=head2 _initialize
+
+ Usage   : my $obj = new Bio::Biblio (-access => 'eutils' ...);
+           (_initialize is internally called from this constructor)
+ Returns : 1 on success
+ Args    : none
+
+This is an actual new() method (except for the real object creation
+and its blessing which is done in the parent class Bio::Root::Root in
+method _create_object).
+
+Note that this method is called always as an I<object> method (never as
+a I<class> method) - and that the object who calls this method may
+already be partly initiated (from Bio::Biblio::new method); so if you
+need to do some tricks with the 'class invocation' you need to change
+Bio::Biblio::new method, not this one.
+
+=cut
+
+sub _initialize {
+    my ($self, @args) = @_;
+
+    #eutils doesn't need this code, but it doesn't hurt to leave it here... -ad
+
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key'
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/^-/_/;
+	$self->{ $new_key } = $param { $key };
+    }
+
+
+    # set up internal data
+    $self->twig(XML::Twig->new());
+
+    # finally add default values for those keys who have default value
+    # and who are not yet in the object
+
+    #AOK
+    return 1;
+}
+
+=head2 db
+
+ Title   : db
+ Usage   : $obj->db($newval)
+ Function: specifies the database to search.  valid values are:
+
+           pubmed, pmc, journals
+
+           it is also possible to add the following, and i will do
+           so on request:
+
+           genome, nucleotide, protein, popset, snp, sequence, taxonomy
+
+           pubmed is default.
+
+ Returns : value of db (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub db{
+    my($self,$arg) = @_;
+
+    if($arg){
+      my %ok = map {$_=>1} qw(pubmed pmc journals);
+      if($ok{lc($arg)}){
+        $self->{'db'} = lc($arg);
+      } else {
+        $self->warn("invalid db $arg, keeping value as ".$self->{'db'} || 'pubmed');
+      }
+    }
+    return $self->{'db'};
+}
+
+
+=head1 Methods implementing Bio::DB::BiblioI interface
+
+=head2 get_collection_id
+
+  Title   : get_collection_id
+  Usage   : $id = $biblio->get_collection_id();
+  Function: returns WebEnv value from ESearch
+  Returns : ESearch WebEnv value as a string
+  Args    : none
+
+
+=cut
+
+sub get_collection_id {
+   return shift->collection_id();
+}
+
+sub get_count {
+  return shift->count();
+}
+
+sub get_by_id {
+  my $self = shift;
+  my $id = shift;
+  my $db = $self->db || 'pubmed';
+  $self->throw("must provide valid ID, not undef") unless defined($id);
+  my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db='.$db.'&id='.$id);
+  return $xml;
+}
+
+=head2 reset_retrieval
+
+  Title   : reset_retrieval
+  Usage   : $biblio->reset_retrieval();
+  Function: reset cursor in id list, see cursor()
+  Returns : 1
+  Args    : none
+
+
+=cut
+
+sub reset_retrieval {
+  shift->cursor(0);
+  return 1;
+}
+
+=head2 get_next
+
+  Title   : get_next
+  Usage   : $xml = $biblio->get_next();
+  Function: return next record as xml
+  Returns : an xml string
+  Args    : none
+
+
+=cut
+
+sub get_next {
+  my $self = shift;
+
+  return unless $self->has_next;
+
+  my $xml = $self->get_by_id( @{ $self->ids }[$self->cursor] );
+  $self->cursor( $self->cursor + 1 );
+
+  return $xml;
+}
+
+=head2 get_more
+
+  Title   : get_more
+  Usage   : $xml = $biblio->get_more($more);
+  Function: returns next $more records concatenated
+  Returns : a string containing multiple xml documents
+  Args    : an integer representing how many records to retrieve
+
+
+=cut
+
+sub get_more {
+  my ($self,$more) = @_;
+
+  my @return = ();
+
+  for(1..$more){
+    my $next = $self->get_next();
+    last unless $next;
+    push @return, $next;
+  }
+
+  return \@return;
+}
+
+=head2 has_next
+
+  Title   : has_next
+  Usage   : $has_next = $biblio->has_next();
+  Function: check to see if there are more items to be retrieved
+  Returns : 1 on true, undef on false
+  Args    : none
+
+
+=cut
+
+sub has_next {
+  my $self = shift;
+  return ($self->cursor < $self->count) ? 1 : undef;
+}
+
+
+
+=head2 find
+
+  Title   : find
+  Usage   : $biblio = $biblio->find($pubmed_query_phrase);
+  Function: perform a PubMed query using Entrez ESearch
+  Returns : a reference to the object on which the method was called
+  Args    : a PubMed query phrase.  See
+            http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
+            for help on how to construct a query.
+
+=cut
+
+sub find {
+  my ($self,$query) = @_;
+
+  $query = uri_escape($query);
+
+  my $db = $self->db || 'pubmed';
+
+  my $url = $ESEARCH."?usehistory=y&db=$db&retmax=$MAX_RECORDS&term=$query";
+
+  my $xml = get($url) or $self->throw("couldn't retrieve results from $ESEARCH: $!");
+
+  $self->twig->parse($xml);
+
+  my @ids = map {$_->text} $self->twig->get_xpath('//IdList//Id');
+  $self->ids(\@ids);
+
+  ##
+  #should we be using the ids, or the count tag?
+  ##
+  my($count_element)  = $self->twig->get_xpath('//Count');
+  my $count = $count_element->text();
+  $self->count(scalar(@ids));
+
+  my($retmax_element) = $self->twig->get_xpath('//RetMax');
+  my $retmax = $retmax_element->text();
+
+  my($querykey_element) = $self->twig->get_xpath('//QueryKey');
+  my $querykey = $querykey_element->text();
+  $self->query_key($querykey);
+
+  my($webenv_element) = $self->twig->get_xpath('//WebEnv');
+  my $webenv = $webenv_element->text();
+  $self->collection_id($webenv);
+
+  #initialize/reset cursor
+  $self->cursor(0);
+
+  return $self;
+}
+
+
+=head2 get_all_ids
+
+  Title   : get_all_ids
+  Usage   : @ids = $biblio->get_all_ids();
+  Function: return a list of PubMed ids resulting from call to find()
+  Returns : a list of PubMed ids, or an empty list
+  Args    : none
+
+
+=cut
+
+sub get_all_ids {
+  my $self = shift;
+  return $self->ids() if $self->ids();
+  return ();
+}
+
+=head2 get_all
+
+  Title   : get_all
+  Usage   : $xml = $biblio->get_all();
+  Function: retrieve all records from query
+  Returns : return a large concatenated string of PubMed xml documents
+  Args    : none
+
+
+=cut
+
+sub get_all {
+  my ($self) = shift;
+
+  my $db = $self->db || 'pubmed';
+
+  my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db=pubmed&query_key='.
+                $self->query_key.'&WebEnv='.$self->collection_id.
+                '&retstart=1&retmax='.$MAX_RECORDS
+               );
+
+  return $xml;
+}
+
+=head2 exists
+
+  Title   : exists
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub exists {
+  return;
+
+}
+
+=head2 destroy
+
+  Title   : destroy
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub destroy {
+  return;
+
+}
+
+=head2 get_vocabulary_names
+
+  Title   : get_vocabulary_names
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : empty arrayref
+  Args    : none
+
+
+=cut
+
+sub get_vocabulary_names {
+  return [];
+}
+
+=head2 contains
+
+  Title   : contains
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub contains {
+  return;
+}
+
+=head2 get_entry_description
+
+  Title   : get_entry_description
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_entry_description {
+  return;
+}
+
+=head2 get_all_values
+
+  Title   : get_all_values
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_all_values {
+  return;
+}
+
+=head2 get_all_entries
+
+  Title   : get_all_entries
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_all_entries {
+  return;
+}
+
+=head1 Internal methods unrelated to Bio::DB::BiblioI
+
+=head2 cursor
+
+  Title   : cursor
+  Usage   : $obj->cursor($newval)
+  Function: holds position in reference collection
+  Returns : value of cursor (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub cursor {
+  my $self = shift;
+  my $arg  = shift;
+
+  return $self->{'cursor'} = $arg if defined($arg);
+  return $self->{'cursor'};
+}
+
+=head2 twig
+
+  Title   : twig
+  Usage   : $obj->twig($newval)
+  Function: holds an XML::Twig instance.
+  Returns : value of twig (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub twig {
+  my $self = shift;
+
+  return $self->{'twig'} = shift if @_;
+  return $self->{'twig'};
+}
+
+=head2 ids
+
+  Title   : ids
+  Usage   : $obj->ids($newval)
+  Function: store pubmed ids resulting from find() query
+  Returns : value of ids (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub ids {
+  my $self = shift;
+
+  return $self->{'ids'} = shift if @_;
+  return $self->{'ids'};
+}
+
+=head2 collection_id
+
+  Title   : collection_id
+  Usage   : $obj->collection_id($newval)
+  Function:
+  Returns : value of collection_id (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub collection_id {
+  my $self = shift;
+
+  return $self->{'collection_id'} = shift if @_;
+  return $self->{'collection_id'};
+}
+
+=head2 count
+
+  Title   : count
+  Usage   : $obj->count($newval)
+  Function:
+  Returns : value of count (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub count {
+  my $self = shift;
+
+  return $self->{'count'} = shift if @_;
+  return $self->{'count'};
+}
+
+=head2 query_key
+
+  Title   : query_key
+  Usage   : $obj->query_key($newval)
+  Function: holds query_key from ESearch document
+  Returns : value of query_key (a scalar)
+  Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub query_key {
+  my $self = shift;
+
+  return $self->{'query_key'} = shift if @_;
+  return $self->{'query_key'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/pdf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/pdf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/pdf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,530 @@
+# $Id: pdf.pm,v 1.6.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module Bio::DB::Biblio::pdf.pm
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Biblio::pdf - Fetch PDF for a PubMed ID
+
+=head1 SYNOPSIS
+
+Do not use this object directly, it is recommended to access it and use
+it through the I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio (-access => 'pdf');
+
+=head1 DESCRIPTION
+
+This object contains the real implementation of a Bibliographic Query
+Service as defined in L<Bio::DB::BiblioI>.
+
+L<Bio::DB::BiblioI> is not implemented as documented in the interface,
+particularly the find() method, which is not compatible with PubMed's
+query language.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 2004 Allen Day, University of California, Los Angeles.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+=over
+
+=item *
+
+Of course, you'll need access to the sites hosting the PDFs to download
+them.
+
+= item *
+
+If you're having problems retrieving PDF from a site you have access to,
+you might try adjusting the max_depth() attribute.  It is default set to 3,
+and affects how many links deep will be recursively followed in page
+fetches to try to find your PDF.
+
+=back
+
+=head1 SEE ALSO
+
+ Pub Med Help
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
+
+ Entrez Utilities
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Interface methods first,
+followed by internal methods.
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::DB::Biblio::pdf;
+use vars qw($DEFAULT_URN);
+use strict;
+
+use Data::Dumper;
+use WWW::Mechanize;
+use base qw(Bio::Biblio Bio::DB::BiblioI);
+
+use constant DEBUG         => 1;
+use constant NCBI_BASE     => 'http://www.ncbi.nlm.nih.gov';
+use constant ABSTRACT_BASE => NCBI_BASE . '/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=';
+
+my %visit = (); #for spidering
+
+# -----------------------------------------------------------------------------
+
+=head2 _initialize
+
+ Usage   : my $obj = new Bio::Biblio (-access => 'pdf' ...);
+           (_initialize is internally called from this constructor)
+ Returns : 1 on success
+ Args    : none
+
+This is an actual new() method (except for the real object creation
+and its blessing which is done in the parent class Bio::Root::Root in
+method _create_object).
+
+Note that this method is called always as an I<object> method (never as
+a I<class> method) - and that the object who calls this method may
+already be partly initiated (from Bio::Biblio::new method); so if you
+need to do some tricks with the 'class invocation' you need to change
+Bio::Biblio::new method, not this one.
+
+=cut
+
+sub _initialize {
+    my ($self, @args) = @_;
+
+    #pdf doesn't need this code, but it doesn't hurt to leave it here... -ad
+
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    $self->max_depth(3);
+    $self->depth(0);
+    $self->ua( WWW::Mechanize->new());
+    $self->ua->agent_alias('Linux Mozilla');
+
+    #now override with passed hash
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key'
+    my $new_key;
+    foreach my $key (keys %param) {
+      ($new_key = $key) =~ s/^-/_/;
+      $self->{ $new_key } = $param { $key };
+    }
+
+    #AOK
+    return 1;
+}
+
+=head2 get_next
+
+  Title   : get_next
+  Usage   : $xml = $biblio->get_next();
+  Function: return next record as xml
+  Returns : an xml string
+  Args    : none
+
+
+=cut
+
+sub get_next {
+  my $self = shift;
+
+  return $self->pdf();
+
+  return;
+}
+
+=head2 find
+
+  Title   : find
+  Usage   : $biblio = $biblio->find(1234);
+  Function: perform a PubMed query by PubMed ID
+  Returns : a reference to the object on which the method was called
+  Args    : a PubMed ID
+
+=cut
+
+sub find {
+  my ($self,$id) = @_;
+
+  $self->{pdf} = undef;
+  $self->_process_pubmed_html($id);
+}
+
+=head2 exists
+
+  Title   : exists
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub exists {
+  return;
+
+}
+
+=head2 destroy
+
+  Title   : destroy
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub destroy {
+  return;
+
+}
+
+=head2 get_vocabulary_names
+
+  Title   : get_vocabulary_names
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : empty arrayref
+  Args    : none
+
+
+=cut
+
+sub get_vocabulary_names {
+  return [];
+}
+
+=head2 contains
+
+  Title   : contains
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub contains {
+  return;
+}
+
+=head2 get_entry_description
+
+  Title   : get_entry_description
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_entry_description {
+  return;
+}
+
+=head2 get_all_values
+
+  Title   : get_all_values
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_all_values {
+  return;
+}
+
+=head2 get_all_entries
+
+  Title   : get_all_entries
+  Usage   : do not use
+  Function: no-op.  this is here only for interface compatibility
+  Returns : undef
+  Args    : none
+
+
+=cut
+
+sub get_all_entries {
+  return;
+}
+
+=head1 Internal methods unrelated to Bio::DB::BiblioI
+
+=cut
+
+=head2 depth()
+
+ Usage   : $obj->depth($newval)
+ Function: track link recursion depth
+ Example : 
+ Returns : value of depth (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub depth {
+  my($self,$val) = @_;
+  $self->{'depth'} = $val if defined($val);
+  return $self->{'depth'};
+}
+
+=head2 max_depth()
+
+ Usage   : $obj->max_depth($newval)
+ Function: how far should link recursion go?
+ Example : 
+ Returns : value of max_depth (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub max_depth {
+  my($self,$val) = @_;
+  $self->{'max_depth'} = $val if defined($val);
+  return $self->{'max_depth'};
+}
+
+
+=head2 ua()
+
+ Usage   : $obj->ua($newval)
+ Function: holds an LWP::UserAgent instance
+ Example : 
+ Returns : value of ua (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub ua {
+  my($self,$val) = @_;
+  $self->{'ua'} = $val if defined($val);
+  return $self->{'ua'};
+}
+
+sub _process_pubmed_html {
+  my ($self,$id) = @_;
+
+  $self->ua->get( ABSTRACT_BASE . $id );
+  my $page = $self->ua->content();
+
+  #here is the treasure
+  $page =~ m|<!---- Pager -- \(page header\) -- end ------>.+?<SPAN><a href="(.+?)" onClick="window.open|s;
+
+  if( ! defined($1) ) {
+    return;
+  }
+
+  $self->ua->follow_link( url => $1 );
+
+  #uncomment this to do site crawl -- old style
+  #$self->_crawl();
+
+  my $pdf_url = $self->guess_pdf_url($self->ua->uri);
+  $self->throw( "didn't recognize pattern in '".$self->ua->uri."', please patch module" ) unless $pdf_url;
+  $self->ua->get( $pdf_url );
+  my $content = $self->ua->content();
+  $self->pdf( $content );
+}
+
+sub guess_pdf_url {
+  my($self,$url) = @_;
+
+  #cancer research
+  if( $url =~ m!^(.+?)/cgi/content/full/(\d+)/(\d+)/(\d+)/?$! ) {
+    return qq($1/cgi/reprint/$2/$3/$4.pdf);
+  }
+  #nature
+  #http://www.nature.com/cgi-taf/DynaPage.taf?file=/onc/journal/v18/n27/abs/1202776a.html&dynoptions=doi1108513968
+  #http://www.nature.com/cgi-taf/DynaPage.taf?file=/onc/journal/v18/n27/full/1202776a.html&filetype=pdf
+  elsif( $url =~ m!^(.+?cgi-taf/DynaPage.taf.+?)/journal/(.+?)/abs/(.+?\.html)! ) {
+    return qq($1/journal/$2/full/$3\&filetype=pdf);
+  }
+  #science direct
+  #these pages contain some unpredictable md5 bullshit, so we need to parse the page
+  #http://www.sciencedirect.com/science?_ob=ArticleURL&_udi=B6VPM-480CTTS-5&_coverDate=04%2F30%2F2003&_alid=247076467&_rdoc=1&_fmt=&_orig=search&_qd=1&_cdi=6210&_sort=d&view=c&_acct=C000059605&_version=1&_urlVersion=0&_userid=4423&md5=8054dea49e32e98a6b30b206ea47fbfe
+  #http://www.sciencedirect.com/science?_ob=MImg&_imagekey=B6VPM-480CTTS-5-5&_cdi=6210&_user=4423&_orig=search&_coverDate=04%2F30%2F2003&_qd=1&_sk=999779997&view=c&wchp=dGLbVtz-zSkzV&md5=5b04979d84dab066be5cde52fd2affa7&ie=/sdarticle.pdf
+  elsif( $url =~ m!^(.+?science\?_ob=)ArticleURL(.+?)$! ) {
+    my $link = $self->ua->find_link( text_regex => qr/PDF \(.+?\)/s );
+    return unless $link;
+    return $link->url_abs();
+  }
+  #genome biology
+  #http://genomebiology.com/2003/4/7/R43
+  #http://genomebiology.com/content/pdf/gb-2003-4-7-r43.pdf
+  elsif( $url =~ m!^(.+?genomebiology.com)/(\d+)/(\d+)/(\d+)/(.+?)/?$! ) {
+    my $file = lc(sprintf("gb-%d-%d-%d-%s.pdf",$2,$3,$4,$5));
+    return qq($1/content/pdf/$file);
+  }
+  #wiley interscience
+  #http://www3.interscience.wiley.com/cgi-bin/abstract/91013753/ABSTRACT
+  #http://www3.interscience.wiley.com/cgi-bin/fulltext/91013753/PDFSTART
+  #http://download.interscience.wiley.com/cgi-bin/fulltext?ID=96515300&PLACEBO=IE.pdf&mode=pdf
+  elsif( $url =~ m!^(.+?/cgi-bin)/abstract/(\d+?)/ABSTRACT$! ) {
+    $self->ua->get( qq($1/fulltext/$2/PDFSTART) );
+    my $link = $self->ua->find_link( url_regex => qr/fulltext/ );
+    return unless $link;
+    return $link->url_abs();
+  }
+  #nar, bioinformatics
+  #http://nar.oupjournals.org/cgi/content/full/32/suppl_1/D258
+  #http://nar.oupjournals.org/cgi/reprint/32/suppl_1/D258.pdf
+  elsif( $url =~ m!^(.+?oupjournals.org/cgi)/reprint/(.+?)$! ) {
+    return qq($1/reprint/$2.pdf);
+  }
+  elsif( $url =~ m!^(.+?oupjournals.org/cgi)/content/full/(.+?)$! ) {
+    return qq($1/reprint/$2.pdf);
+  }
+
+  #plos
+  #http://biology.plosjournals.org/plosonline/?request=get-document&doi=10.1371/journal.pbio.0020009
+  #http://www.plosbiology.org/archive/1545-7885/2/1/pdf/10.1371_journal.pbio.0020009-S.pdf
+  elsif( $url =~ m!^http://[^.]+?\.plos! ) {
+    my $link = $self->ua->find_link( text_regex => qr/^Screen/s );
+    return unless $link;
+    return $link->url_abs();
+  }
+  #bmc bioinformatics
+  #http://www.biomedcentral.com/1471-2105/2/7
+  #http://www.biomedcentral.com/content/pdf/1471-2105-2-7.pdf
+  elsif( $url =~ m!^(.+?biomedcentral.+?)/(\d+\-\d+)/(\d+)/(\d+)/?$! ) {
+    my $file = lc(sprintf("%s-%d-%d.pdf",$2,$3,$4));
+    return qq($1/content/pdf/$file);
+  }
+
+
+warn $url;
+  return;
+}
+
+
+# sub _crawl {
+#   my( $self ) = @_;
+
+#   return if $self->depth() == $self->max_depth();
+#   return if $self->pdf();
+
+#   $self->depth( $self->depth + 1 );
+
+#   #try to find "PDF" link first
+#   my ( $link ) = $self->ua->find_link( text_regex => qr/PDF|View article/ );
+#   if ( $link ) {
+#     $self->_fetch_pdf( $link );
+#   }
+#   else {
+#     foreach my $link ( $self->ua->find_all_links ) {
+#       $self->_fetch_pdf( $link );
+#     }
+#   }
+
+#   $self->depth( $self->depth - 1 );
+
+#   return;
+# }
+
+# sub _fetch_pdf {
+#   my $self = shift;
+#   my $link = shift;
+
+#   return if $visit{ $link->url_abs };
+#   $visit{ $link->url_abs }++;
+
+#   $self->ua->get( $link->url_abs );
+#   print "[" . $self->depth() . "] fetching: " . $link->url_abs . " " . $self->ua->ct() . "\n" if DEBUG;
+
+#   #test for a likely string "href", because some misconfigured webservers will send pdf
+#   #as text/html
+#   if ( $self->ua->ct() eq 'application/pdf' or
+#        ( $self->ua->ct() =~ /text/ and $self->ua->content !~ /href|src/is )
+#      ) {
+#     print "*****FOUND IT (" . $link->url_abs . ") *****\n" if DEBUG;
+
+#     $self->ua->get( $link->url_abs );
+#     my $content = $self->ua()->content();
+#     $self->pdf( $content );
+#   }
+#   else {
+#     $self->_crawl();
+#   }
+# }
+
+=head2 pdf()
+
+ Usage   : $obj->pdf($newval)
+ Function: holds pdf data
+ Example : 
+ Returns : value of pdf (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub pdf {
+  my($self,$val) = @_;
+  $self->{'pdf'} = $val if defined($val);
+  return $self->{'pdf'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/soap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/soap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Biblio/soap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,543 @@
+# $Id: soap.pm,v 1.10.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module Bio::DB::Biblio::soap.pm
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Biblio::soap - A SOAP-based access to a bibliographic query service
+
+=head1 SYNOPSIS
+
+Do not use this object directly, it is recommended to access it and use
+it through the I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = Bio::Biblio->new (-access => 'soap');
+
+=head1 DESCRIPTION
+
+This object contains the real implementation of a Bibliographic Query
+Service as defined in L<Bio::DB::BiblioI> - using a SOAP protocol
+to access a WebService (a remote server) that represents a
+bibliographic repository.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 BUGS AND LIMITATIONS
+
+=over
+
+=item *
+
+Methods returning a boolean value (I<has_next>, I<exists> and
+I<contains>) can be used only with SOAP::Lite version 0.52 and newer
+(probably due to a bug in the older SOAP::Lite).
+
+=item *
+
+It does not use WSDL.
+
+=item *
+
+More testing and debugging needed to ensure that returned citations
+are properly transferred even if they contain foreign characters.
+
+=back
+
+=head1 APPENDIX
+
+The main documentation details are to be found in
+L<Bio::DB::BiblioI>.
+
+Here is the rest of the object methods.  Internal methods are preceded
+with an underscore _.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Biblio::soap;
+use vars qw($DEFAULT_SERVICE $DEFAULT_NAMESPACE);
+use strict;
+
+use SOAP::Lite
+    on_fault => sub {
+	my $soap = shift;
+	my $res = shift;
+	my $msg =
+	    ref $res ? "--- SOAP FAULT ---\n" . $res->faultcode . " " . $res->faultstring
+		     : "--- TRANSPORT ERROR ---\n" . $soap->transport->status . "\n$res\n";
+        Bio::DB::Biblio::soap->throw ( -text => $msg );
+    }
+;
+
+use base qw(Bio::Biblio);
+
+BEGIN {
+    # where to go...
+    $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/openbqs/services/MedlineSRS';
+
+    # ...and what to find there
+    $DEFAULT_NAMESPACE = 'http://industry.ebi.ac.uk/openBQS';
+}
+
+# -----------------------------------------------------------------------------
+
+=head2 _initialize
+
+ Usage   : my $obj = new Bio::Biblio (-access => 'soap' ...);
+           (_initialize is internally called from this constructor)
+ Returns : nothing interesting
+ Args    : This module recognises and uses following arguments:
+
+             -namespace => 'urn'
+               The namespace used by the WebService that is being
+               accessed. It is a string which guarantees its world-wide
+               uniqueness - therefore it often has a style of a URL -
+               but it does not mean that such pseudo-URL really exists.
+
+               Default is 'http://industry.ebi.ac.uk/openBQS'.
+
+             -destroy_on_exit => '0'
+                Default value is '1' which means that all Bio::Biblio
+                objects - when being finalised - will send a request
+                to the remote WebService to forget the query collections
+                they represent.
+
+                If you change it to '0' make sure that you know the
+                query collection identification - otherwise you will
+                not be able to re-established connection with it.
+                This can be done by calling method get_collection_id.
+
+              -collection_id => '...'
+                It defines what query collection will this object work
+                with. Use this argument when you know a collection ID
+                of an existing query collection and when you wish to
+                re-established connection with it.
+
+                By default, the collection IDs are set automatically
+                by the query methods - they return Bio::Biblio objects
+                already having a collection ID.
+
+                A missing or undefined collection ID means that the
+                object represents the whole bibliographic repository
+                (which again means that some methods, like get_all,
+                will be probably refused).
+
+              -soap => a SOAP::Lite object
+                Usually all Bio::Biblio objects share an instance of
+                the underlying SOAP::Lite module. But you are free
+                to have more - perhaps with different characteristics.
+
+                See the code for attributes of the default SOAP::Lite
+                object.
+
+              -httpproxy => 'http://server:port'
+                 In addition to the 'location' parameter, you may need
+                 to specify also a location/URL of a HTTP proxy server
+                 (if your site requires one).
+
+	   Additionally, the main module Bio::Biblio recognises
+	   also:
+             -access => '...'
+             -location => '...'
+
+It populates calling object with the given arguments, and then - for
+some attributes and only if they are not yet populated - it assigns
+some default values.
+
+This is an actual new() method (except for the real object creation
+and its blessing which is done in the parent class Bio::Root::Root in
+method _create_object).
+
+Note that this method is called always as an I<object> method (never as
+a I<class> method) - and that the object who calls this method may
+already be partly initiated (from Bio::Biblio::new method); so if you
+need to do some tricks with the 'class invocation' you need to change
+Bio::Biblio::new method, not this one.
+
+=cut
+
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    # make a hashtable from @args
+    my %param = @args;
+    @param { map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # copy all @args into this object (overwriting what may already be
+    # there) - changing '-key' into '_key'
+    my $new_key;
+    foreach my $key (keys %param) {
+	($new_key = $key) =~ s/^-/_/;
+	$self->{ $new_key } = $param { $key };
+    }
+
+    # finally add default values for those keys who have default value
+    # and who are not yet in the object
+    $self->{'_location'} = $DEFAULT_SERVICE unless $self->{'_location'};
+    $self->{'_namespace'} = $DEFAULT_NAMESPACE unless $self->{'_namespace'};
+    $self->{'_destroy_on_exit'} = 1 unless defined $self->{'_destroy_on_exit'};
+    unless ($self->{'_soap'}) {
+	if (defined $self->{'_httpproxy'}) {
+	    $self->{'_soap'} = SOAP::Lite
+	                          -> uri ($self->{'_namespace'})
+		                  -> proxy ($self->{'_location'},
+				            proxy => ['http' => $self->{'_httpproxy'}]);
+	} else {
+	    $self->{'_soap'} = SOAP::Lite
+	                          -> uri ($self->{'_namespace'})
+				  -> proxy ($self->{'_location'});
+	}
+#	$self->{'_soap'}->soapversion (1.2);
+    }
+}
+
+# -----------------------------------------------------------------------------
+
+#
+# objects representing query collections are being destroyed if they
+# have attribute '_destroy_on_exit' set to true - which is a default
+# value
+#
+sub DESTROY {
+    my $self = shift;
+    my $soap = $self->{'_soap'};
+    my $destroy = $self->{'_destroy_on_exit'};
+    return unless $destroy;
+    my $collection_id = $self->{'_collection_id'};
+    return unless $collection_id;
+
+    # ignore all errors here
+    eval {
+	$soap->destroy (SOAP::Data->type (string => $collection_id));
+    }
+}
+
+#
+# some methods must be called with an argument containing a collection
+# ID; here we return a proper error message explaining it
+#
+sub _no_id_msg {
+    my $self = shift;
+    my $package = ref $self;
+    my $method = (caller(1))[3];
+    my $strip_method = $method;
+    $strip_method =~ s/^$package\:\://;
+
+    return <<"END_OF_MSG";
+Method '$method' works only if its object has a query collection ID.
+Perhaps you need to use:
+\tnew Bio::Biblio (-collection_id => '1234567')->$strip_method;
+or to obtain a collection ID indirectly from a query method:
+\tnew Bio::Biblio->find ('keyword')->$strip_method;
+END_OF_MSG
+}
+    
+#
+# some methods do not work with older SOAP::Lite version; here we
+#return message explaining it
+#
+sub _old_version_msg {
+    my $self = shift;
+    my $method = (caller(1))[3];
+
+    return <<"END_OF_MSG";
+Method '$method' works only with SOAP::Lite
+version 0.52 and newer (the problem is with returning a boolean value from the server).
+END_OF_MSG
+}
+
+#
+# some controlled vocabulary methods needs two parameters; here we
+# return message explaining it
+#
+sub _two_params_msg {
+    my $self = shift;
+    my $method = (caller(1))[3];
+
+    return <<"END_OF_MSG";
+Method '$method' expects two parameters: vocabulary name and a value.
+END_OF_MSG
+}
+
+#
+# some controlled vocabulary methods needs a vocabulary name; here we
+# return message explaining it
+#
+sub _missing_name_msg {
+    my $self = shift;
+    my $method = (caller(1))[3];
+
+    return <<"END_OF_MSG";
+Method '$method' expects vocabulary name as parameter.
+END_OF_MSG
+}
+
+# 
+# return a copy of a given array, with all its elements replaced
+# with the SOAP-Data objects defining elements type as 'string'
+#
+sub _as_strings {
+    my ($ref_input_array) = @_;
+    my (@result) = map { SOAP::Data->new (type => 'string', value => $_) } @$ref_input_array;
+    return \@result;
+}
+    
+# ---------------------------------------------------------------------
+#
+#   Here are the methods implementing Bio::DB::BiblioI interface
+#   (documentation is in Bio::DB::BiblioI)
+#
+# ---------------------------------------------------------------------
+
+sub get_collection_id {
+   my ($self) = @_;
+   $self->{'_collection_id'};
+}
+
+sub get_count {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   if ($collection_id) {
+       $soap->getBibRefCountOfCollection (SOAP::Data->type (string => $collection_id))->result;
+   } else {
+       $soap->getBibRefCount->result;
+   }
+}
+
+# try: 12368254 (it's a Bioperl article)
+sub get_by_id {
+   my ($self, $citation_id) = @_;
+   $self->throw ("Citation ID is expected as a parameter of method 'get_by_id'.")
+       unless $citation_id;
+   my $soap = $self->{'_soap'};
+   $soap->getById (SOAP::Data->type (string => $citation_id))->result;
+}
+
+sub find {
+   my ($self, $keywords, $attrs) = @_;
+   my (@keywords, @attrs);
+
+   # $keywords can be a comma-delimited scalar or a reference to an array
+   if ($keywords) {
+       my $ref = ref $keywords;
+       @keywords = split (/,/, $keywords) unless $ref;
+       @keywords = @$keywords if $ref =~ /ARRAY/;
+   }
+   $self->throw ("No keywords given in 'find' method.\n")
+       unless (@keywords);
+
+   # ...and the same with $attrs
+   if ($attrs) {
+       my $ref = ref $attrs;
+       @attrs = split (/,/, $attrs) unless $ref;
+       @attrs = @$attrs if $ref =~ /ARRAY/;
+   }
+
+   my $soap = $self->{'_soap'};
+   my $collection_id = $self->{'_collection_id'};
+   my $new_id;
+   if ($collection_id) {
+       if (@attrs) {
+	   $new_id = $soap->reFindInAttrs (SOAP::Data->name ('arg0')->type (string => $collection_id),
+				           SOAP::Data->name ('arg1')->value (&_as_strings (\@keywords)),
+				           SOAP::Data->name ('arg2')->value (&_as_strings (\@attrs)))
+	       ->result;
+       } else {
+	   $new_id = $soap->reFind (SOAP::Data->name ('arg0')->type (string => $collection_id),
+				    SOAP::Data->name ('arg1')->value (&_as_strings (\@keywords)))
+	       ->result;
+       }
+   } else {
+       if (@attrs) {
+	   $new_id = $soap->findInAttrs (SOAP::Data->name ('arg0')->value (&_as_strings (\@keywords)),
+				         SOAP::Data->name ('arg1')->value (&_as_strings (\@attrs)))
+	       ->result;
+       } else {
+	   $new_id = $soap->find (SOAP::Data->name ('arg0')->value (&_as_strings (\@keywords)))
+	       ->result;
+       }
+   }
+
+   # clone itself but change the collection ID to a new one
+   return $self->new (-collection_id        => $new_id,
+		      -parent_collection_id => $collection_id);
+}
+
+sub get_all_ids {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $soap->getAllIDs (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub get_all {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $soap->getAllBibRefs (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub has_next {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $self->throw ($self->_old_version_msg) if $SOAP::Lite::VERSION lt '0.52';
+   $soap->hasNext (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub get_next {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $soap->getNext (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub get_more {
+   my ($self, $how_many) = @_;
+   my $soap = $self->{'_soap'};
+   my $collection_id = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+
+   unless (defined ($how_many) and $how_many =~ /^\d+$/) {
+       $self->warn ("Method 'get_more' expects a numeric argument. Changing to 1.\n");
+       $how_many = 1;
+   }
+   unless ($how_many > 0) {
+       $self->warn ("Method 'get_more' expects a positive argument. Changing to 1.\n");
+       $how_many = 1;
+   }
+
+   my $ra = $soap->getMore (SOAP::Data->type (string => $collection_id),
+			    SOAP::Data->type (int    => $how_many))->result;
+   $self->{'_collection_id'} = shift @{ $ra };
+   $ra;
+}
+
+sub reset_retrieval {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $self->{'_collection_id'} = $soap->resetRetrieval (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub exists {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $self->throw ($self->_old_version_msg) if $SOAP::Lite::VERSION lt '0.52';
+   $soap->exists (SOAP::Data->type (string => $collection_id))->result;
+}
+
+sub destroy {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   my ($collection_id) = $self->{'_collection_id'};
+   $self->throw ($self->_no_id_msg) unless $collection_id;
+   $soap->destroy (SOAP::Data->type (string => $collection_id));
+}
+
+sub get_vocabulary_names {
+   my ($self) = @_;
+   my $soap = $self->{'_soap'};
+   $soap->getAllVocabularyNames->result;
+}
+
+sub contains {
+   my ($self, $vocabulary_name, $value) = @_;
+   my $soap = $self->{'_soap'};
+   $self->throw ($self->_old_version_msg) if $SOAP::Lite::VERSION lt '0.52';
+   $self->throw ($self->_two_params_msg)
+       unless defined $vocabulary_name and defined $value;
+   $soap->contains (SOAP::Data->type (string => $vocabulary_name),
+		    SOAP::Data->type (string => $value))->result;
+}
+
+sub get_entry_description {
+   my ($self, $vocabulary_name, $value) = @_;
+   my $soap = $self->{'_soap'};
+   $self->throw ($self->_two_params_msg)
+       unless defined $vocabulary_name and defined $value;
+   $soap->getEntryDescription (SOAP::Data->type (string => $vocabulary_name),
+			         SOAP::Data->type (string => $value))->result;
+}
+
+sub get_all_values {
+   my ($self, $vocabulary_name) = @_;
+   my $soap = $self->{'_soap'};
+   $self->throw ($self->_missing_name_msg)
+       unless defined $vocabulary_name;
+   $soap->getAllValues (SOAP::Data->type (string => $vocabulary_name))->result;
+}
+
+sub get_all_entries {
+   my ($self, $vocabulary_name) = @_;
+   my $soap = $self->{'_soap'};
+   $self->throw ($self->_missing_name_msg)
+       unless defined $vocabulary_name;
+   $soap->getAllEntries (SOAP::Data->type (string => $vocabulary_name))->result;
+}
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::DB::Biblio::soap::VERSION;
+           print $Bio::DB::Biblio::soap::Revision;
+
+=cut
+
+=head2 Defaults
+
+ Usage   : print $Bio::DB::Biblio::soap::DEFAULT_SERVICE;
+           print $Bio::DB::Biblio::soap::DEFAULT_NAMESPACE;
+
+=cut
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/BiblioI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/BiblioI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/BiblioI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,483 @@
+# $Id: BiblioI.pm,v 1.10.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::BiblioI
+#
+# Cared for by Martin Senger <senger at ebi.ac.uk>
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::BiblioI - An interface to a Bibliographic Query Service
+
+=head1 SYNOPSIS
+
+This is an interface module - you do not instantiate it.
+Use I<Bio::Biblio> module:
+
+  use Bio::Biblio;
+  my $biblio = new Bio::Biblio (@args);
+
+=head1 DESCRIPTION
+
+This interface describes the methods for accessing a bibliographic
+repository, for quering it and for retrieving citations from it. The
+retrieved citations are in XML format and can be converted to perl
+objects using I<Bio::Biblio::IO>.
+
+The interface complies (with some simplifications) with the
+specification described in the B<OpenBQS> project. Its home page is at
+http://www.ebi.ac.uk/~senger/openbqs/.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+This is actually the main documentation...
+
+If you try to call any of these methods directly on this
+Bio::DB::BiblioI object you will get a I<not implemented> error
+message. You need to call them on a Bio::Biblio object.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::BiblioI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+# -----------------------------------------------------------------------------
+
+=head2 get_collection_id
+
+ Usage   : my $collection_id = $biblio->get_collection_id;
+ Returns : string identifying a query collection
+           represented by the $biblio object
+ Args    : none
+
+Every query collection is uniquely identify-able by its collection
+ID. The returned value can be used to populate another $biblio object
+and then to access that collection.
+
+=cut
+
+sub get_collection_id {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+# -----------------------------------------------------------------------------
+
+=head2 get_count
+
+ Usage   : my $count = $biblio->get_count;
+ Returns : integer
+ Args    : none, or a string identifying a query collection
+
+It returns a number of citations in the query collection represented
+by the calling $biblio object, or in the collection whose ID is given
+as an argument.
+
+=cut
+
+sub get_count { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 find
+
+ Usage   : my $new_biblio = $biblio->find ($keywords, $attrs);
+           my $new_biblio = $biblio->find ('perl', 'abstract');
+           my $new_biblio = $biblio->find ( [ 'perl', 'Java' ] );
+ Returns : new Bio::Biblio object representing a new query
+           collection
+ Args    : $keywords - what to look for (mandatory)
+            - a comma-delimited list of keywords, or
+            - an array reference with keywords as elements
+           $attrs - where to look in (optional)
+            - a comma-delimited list of attribute names, or
+            - an array reference with attribute names as elements
+
+This is the main query method. It looks for the $keywords in a default
+set of attributes, or - if $attrs given - only in the given
+attributes.
+
+Because it returns a new Bio::Biblio object which can be again queried
+it is possible to chain together several invocations:
+
+    $biblio->find ('Brazma')->find ('Robinson')->get_collection_id;
+
+=cut
+
+sub find { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+# TBD: AFAIK this method is not implemented on the server side.
+#      Let's comment it out for the time being...
+#sub query { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 reset_retrieval
+
+ Usage   : $biblio->reset_retrieval;
+ Returns : nothing
+ Args    : none
+
+It sets an iterator stored in the $biblio object back to its
+beginning. After this, the retrieval methods I<has_next>, I<get_next>
+and I<get_more> start to iterate the underlying query collection
+again from its start.
+
+It throws an exception if this object does not represent any query
+result (e.i. it does not contain a collection ID). Note that a
+collection ID is created automatically when this object was returned
+by a I<find> method, or it can be assigned in a constructor using
+argument I<-collection_id>.
+
+=cut
+
+sub reset_retrieval { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_next
+
+ Usage   : my $citation = $biblio->get_next;
+ Returns : a citation in an XML format
+ Args    : none
+
+It returns the next available citation from the underlying query
+collection. It throws an exception if there are no more citations. In
+order to avoid this, use it together with the I<has_next> method:
+
+  my $result = $biblio->find ('brazma', 'authors');
+  while ( $result->has_next ) {
+      print $result->get_next;
+  }
+
+It also throws an exception if this object does not represent any
+query result - see explanation in the I<reset_retrieval> elsewhere in
+this document.
+
+=cut
+
+sub get_next { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_more
+
+ Usage   : my $r_citations = $biblio->get_more (5);
+ Returns : an array reference - each element has a citation
+           in an XML format
+ Args    : an integer 'how_many' citations to return;
+           default is 1 - but it is assigned with warning
+
+It returns the next I<how_many> available citations from the
+underlying query collection. It does not throw any exception if
+'how_many' is more than currently available - it simply returns
+less. However, it throws an exception if used again without calling
+first I<reset_retrieval>.
+
+It also throws an exception if this object does not represent any
+query result - see explanation in method I<reset_retrieval> elsewhere
+in this document.
+
+=cut
+
+sub get_more { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 has_next
+
+ Usage   : my $is = $biblio->has_next;
+ Returns : 1 or undef
+ Args    : none
+
+It returns 1 if there is a next citation available in the underlying
+query collection. Otherwise it returns undef.
+
+It throws an exception if this object does not represent any query
+result - see explanation in method I<reset_retrieval> elsewhere in
+this document.
+
+=cut
+
+sub has_next { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_all_ids
+
+ Usage   : my $r_ids = $biblio->get_all_ids;
+ Returns : an array reference - each element has
+           a citation identifier
+ Args    : none
+
+The identifiers of all citations in the underlying query collection
+are returned. A usual pattern is to use them then in the I<get_by_id>
+method:
+
+    my $biblio = $repository->find ('brazma')->find ('robinson');
+    foreach my $id ( @{ $biblio->get_all_ids } ) {
+        print $biblio->get_by_id ($id);
+    }
+
+It throws an exception if this object does not represent any query
+result - see explanation in method I<reset_retrieval> elsewhere in
+this document.
+
+=cut
+
+sub get_all_ids { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_by_id
+
+ Usage   : my $citation = $biblio->get_by_id ('12368254');
+ Returns : a citation in an XML format
+ Args    : a citation identifier (PMID for Medline)
+
+It returns a citation - disregarding if the citation is or is not in
+the underlying query collection (of course, it must be in the
+repository).
+
+=cut
+
+sub get_by_id { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_all
+
+ Usage   : my $all = $biblio->get_all;
+ Returns : a (big) string with all citations in an XML format
+ Args    : none
+
+It returns an XML valid string (which means that individual citations
+are also surrounded by a "set" XML tag) representing all citations
+from the underlying query collection.
+
+Note that some servers may limit the number of citations which can be
+returned by this method. In such case you need either to refine
+further your query collection (using I<find> method) or to retrieve
+results by iteration (methods I<has_next>, I<get_next>, I<get_more>).
+
+It throws an exception if this object does not represent any query
+result - see explanation in method I<reset_retrieval> elsewhere in
+this document.
+
+=cut
+
+sub get_all { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 exists
+
+ Usage   : my $exists = $biblio->exists;
+ Returns : 1 or undef
+ Args    : none
+
+It returns 1 if the underlying query collection represented by the
+$biblio object still exists (on the server side).
+
+If you have a collection ID (e.g. stored or printed in a previous
+session) but you do not have anymore a C<Bio::Biblio> object representing
+it this is how you can check the collection existence:
+
+    use Bio::Biblio;
+    print
+      new Bio::Biblio (-collection_id => '1014324148861')->exists;
+
+It throws an exception if this object does not represent any query
+result - see explanation in method I<reset_retrieval> elsewhere in
+this document.
+
+=cut
+
+sub exists { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 destroy
+
+ Usage   : $biblio->destroy;
+ Returns : nothing
+ Args    : none
+
+It sends a message to the remote server to forget (or free, or destroy
+- whatever server choose to do) the query collection represented by
+this object.
+
+It throws an exception if this object does not represent any query
+collection.
+
+=cut
+
+sub destroy { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_vocabulary_names
+
+ Usage   : print join ("\n", @{ $biblio->get_vocabulary_names });
+ Returns : an array reference - each element has a name
+           of a controlled vocabulary
+ Args    : none
+
+The controlled vocabularies allow to introspect bibliographic
+repositories and to find what citation resource types (such as journal
+and book articles, patents or technical reports) are provided by the
+repository, what attributes they have, eventually what attribute
+values are allowed.
+
+This method returns names of all available controlled
+vocabularies. The names can than be used in other methods dealing with
+vocabularies: I<contains>, I<get_entry_description>,
+I<get_all_values>, and I<get_all_entries>.
+
+=cut
+
+sub get_vocabulary_names { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 contains
+
+ Usage   : my $yes = $biblio->contains ($vocabulary_name, $value);
+ Returns : 1 or undef
+ Args    : $vocabulary_name defines a vocabulary where to look,
+           and a $value defines what to look for
+
+It returns 1 if the given controlled vocabulary contains the given
+value.
+
+For example, when you know, that a vocabulary
+C<MEDLINE/JournalArticle/properties> contains value C<COUNTRY> you can
+use it in the I<find> method:
+
+    $biblio->find ('United States', 'COUNTRY');
+
+=cut
+
+sub contains { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_entry_description
+
+ Usage   : $biblio->get_entry_description ($voc_name, $value);
+ Returns : a string with a desciption
+ Args    : $voc_name defines a vocabulary where to look,
+           and a $value defines whose description to return
+
+Each vocabulary entry has its value (mandatory attribute), and can
+have a description (optional attribute). The description may be just a
+human readable explanation of an attribute, or it can have more exact
+meaning. For example, the server implementation of the bibliographic
+query service provided by the EBI puts into attribute descriptions
+words I<queryable> and/or I<retrievable> to distinguish the role of
+the attributes.
+
+It throws an exception if either vocabulary or value do not exist.
+
+=cut
+
+sub get_entry_description { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_all_values
+
+ Usage   : $biblio->get_all_values ($vocabulary_name);
+ Returns : an array reference - each element has a value (scalar)
+           from the given controlled vocabulary
+ Args    : $vocabulary_name defines a vocabulary whose values
+           are being returned
+
+It returns all values of the given vocabulary.  It throws an exception
+if the vocabulary does not exist.
+
+=cut
+
+sub get_all_values { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 get_all_entries
+
+ Usage   : $biblio->get_all_entries ($vocabulary_name);
+ Returns : a hash reference - keys are vocabulary values
+           and values are their descriptions
+ Args    : $vocabulary_name defines a vocabulary whose entries
+           are being returned
+
+It returns pairs of values and their descriptions of the whole
+vocabulary. It throws an exception if the vocabulary does not exist.
+
+This is one way how to get it and print it:
+
+    my $name = 'MEDLINE2005/JournalArticle/properties';
+    use Data::Dumper;
+    print Data::Dumper->Dump ( [$biblio->get_all_entries ($name)],
+			       ['All entries']);
+
+=cut
+
+sub get_all_entries { shift->throw_not_implemented; }
+
+# -----------------------------------------------------------------------------
+
+=head2 VERSION and Revision
+
+ Usage   : print $Bio::DB::BiblioI::VERSION;
+           print $Bio::DB::BiblioI::Revision;
+
+=cut
+
+1;
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/BioFetch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/BioFetch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/BioFetch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,518 @@
+# $Id: BioFetch.pm,v 1.29.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::BioFetch
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+package Bio::DB::BioFetch;
+use strict;
+use HTTP::Request::Common 'POST';
+
+=head1 NAME
+
+Bio::DB::BioFetch - Database object interface to BioFetch retrieval
+
+=head1 SYNOPSIS
+
+ use Bio::DB::BioFetch;
+
+ $bf = new Bio::DB::BioFetch;
+
+ $seq = $bf->get_Seq_by_id('BUM');  # EMBL or SWALL ID
+
+ # change formats, storage procedures
+ $bf = new Bio::DB::BioFetch(-format        => 'fasta',
+ 			     -retrievaltype => 'tempfile',
+  			     -db            => 'EMBL');
+
+ $stream = $bf->get_Stream_by_id(['BUM','J00231']);
+ while (my $s = $stream->next_seq) {
+    print $s->seq,"\n";
+ }
+ # get a RefSeq entry
+ $bf->db('refseq');
+ eval {
+     $seq = $bf->get_Seq_by_version('NM_006732.1'); # RefSeq VERSION
+ };
+ print "accession is ", $seq->accession_number, "\n" unless $@;
+
+
+=head1 DESCRIPTION
+
+Bio::DB::BioFetch is a guaranteed best effort sequence entry fetching
+method.  It goes to the Web-based dbfetch server located at the EBI
+(http://www.ebi.ac.uk/cgi-bin/dbfetch) to retrieve sequences in the
+EMBL or GenBank sequence repositories.
+
+This module implements all the Bio::DB::RandomAccessI interface, plus
+the get_Stream_by_id() and get_Stream_by_acc() methods that are found
+in the Bio::DB::SwissProt interface.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email Lincoln Stein  E<lt>lstein at cshl.orgE<lt>
+
+Also thanks to Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt> for the
+BioFetch server and interface specification.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+use vars qw(%FORMATMAP);
+use base qw(Bio::DB::WebDBSeqI Bio::Root::Root);
+
+# warning: names used here must map into Bio::SeqIO::* space
+use constant DEFAULT_LOCATION => 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
+
+BEGIN {
+    
+    %FORMATMAP = (
+	'embl' => {
+	    default   => 'embl',  # default BioFetch format/SeqIOmodule pair
+	    embl      => 'embl',  # alternative BioFetch format/module pair 
+	    fasta     => 'fasta', # alternative BioFetch format/module pair 
+	    namespace => 'embl',
+	},
+	'swissprot' => {
+	    default   => 'swiss',
+	    swissprot => 'swiss',
+	    fasta     => 'fasta',
+	    namespace => 'uniprot',
+	},
+	'refseq' => {
+	    default   => 'genbank',
+	    genbank   => 'genbank',
+	    fasta     => 'fasta',
+	    namespace => 'RefSeq',
+	},
+	'swall' => {
+	    default   => 'swiss',
+	    swissprot => 'swiss',
+	    fasta     => 'fasta',
+	    namespace => 'uniprot',
+	},
+    'uniprot' => {
+	    default   => 'swiss',
+	    swissprot => 'swiss',
+	    fasta     => 'fasta',
+	    namespace => 'uniprot',
+	},
+	'genbank' => {
+	    default   => 'genbank',
+	    genbank   => 'genbank',
+	    namespace => 'genbank',
+	},
+	'genpep' => {
+	    default   => 'genbank',
+	    genbank   => 'genbank',
+	    namespace => 'genpep',
+	},
+    );
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $bf = Bio::DB::BioFetch->new(@args)
+ Function: Construct a new Bio::DB::BioFetch object
+ Returns : a Bio::DB::BioFetch object
+ Args    : see below
+ Throws  :
+
+ at args are standard -name=E<gt>value options as listed in the following
+table. If you do not provide any options, the module assumes reasonable
+defaults.
+
+  Option         Value                            Default
+  ------         -----                            -------
+
+  -baseaddress   location of dbfetch server       http://www.ebi.ac.uk/cgi-bin/dbfetch
+  -retrievaltype "tempfile" or "io_string"        io_string
+  -format        "embl", "fasta", "swissprot",    embl
+                  or "genbank"
+  -db            "embl", "genbank" or "swissprot" embl
+
+=cut
+
+#'
+sub new {
+  my ($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  my ($db) = $self->_rearrange([qw(DB)], at args);
+  $db    ||= $self->default_db;
+  $self->db($db);
+  $self->url_base_address(DEFAULT_LOCATION) unless $self->url_base_address;
+  $self;
+}
+
+=head2 new_from_registry
+
+ Title   : new_from_registry
+ Usage   : $biofetch = $db->new_from_registry(%config)
+ Function: Creates a BioFetch object from the registry config hash
+ Returns : itself
+ Args    : A configuration hash (see Registry.pm)
+ Throws  : 
+
+
+=cut
+
+sub new_from_registry {
+    my ($class,%config)=@_;
+
+    my $self = $class->SUPER::new(
+				  -BASEADDRESS=>$config{'location'}
+				  );
+    $self->db($config{'dbname'}) if $config{dbname};
+    return $self;
+}
+
+# from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+=cut
+
+=head2 get_Seq_by_gi
+
+ Title   : get_Seq_by_gi
+ Usage   : $seq = $db->get_Seq_by_gi('405830');
+ Function: Gets a Bio::Seq object by gi number
+ Returns : A Bio::Seq object
+ Args    : gi number (as a string)
+ Throws  : "gi does not exist" exception
+
+=cut
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version {
+   my ($self,$seqid) = @_;
+   return $self->get_Seq_by_acc($seqid);
+}
+
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries
+
+=cut
+
+=head2 get_Stream_by_gi
+
+  Title   : get_Stream_by_gi
+  Usage   : $seq = $db->get_Seq_by_gi([$gi1, $gi2]);
+  Function: Gets a series of Seq objects by gi numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of gi numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+=head2 get_Stream_by_batch
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch($ref);
+  Function: Get a series of Seq objects by their IDs
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : an array reference containing a list of unique
+            ids/accession numbers.
+
+In some of the Bio::DB::* moduels, get_Stream_by_id() is called
+get_Stream_by_batch().  Since there seems to be no consensus, this
+is provided as an alias.
+
+=cut
+
+*get_Stream_by_batch = \&Bio::DB::WebDBSeqI::get_Stream_by_id;
+
+=head1 The remainder of these methods are for internal use
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: returns a HTTP::Request object
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+
+sub get_request {
+    my ($self, @qualifiers) = @_;
+    my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
+					    @qualifiers);
+    my $db     = $self->db;
+    my $namespace = $self->_namespace;
+
+    $self->throw("Must specify a value for UIDs to fetch")
+	unless defined $uids;
+    my $tmp;
+    my $format_string = '';
+
+    $format ||= $self->default_format;
+    ($format, $tmp) = $self->request_format($format);
+
+    my $base = $self->url_base_address;
+    my $uid = join('+',ref $uids ? @$uids : $uids);
+    $self->debug("\n$base$format_string&id=$uid\n");
+    return POST($base,
+		[ db     => $namespace,
+		  id     => join('+',ref $uids ? @$uids : $uids),
+		  format => $format,
+		  style  => 'raw'
+	     ]);
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : $format = $self->default_format
+ Function: return the default format
+ Returns : a string
+ Args    : 
+
+=cut
+
+sub default_format { 
+    return 'default';
+}
+
+=head2 default_db
+
+ Title   : default_db
+ Usage   : $db = $self->default_db
+ Function: return the default database
+ Returns : a string
+ Args    :
+
+=cut
+
+sub default_db     { 'embl' }
+
+=head2 db
+
+ Title   : db
+ Usage   : $db = $self->db([$db])
+ Function: get/set the database
+ Returns : a string
+ Args    : new database
+
+=cut
+
+sub db {
+  my $self = shift;
+
+  if (@_) {
+
+      my $db = lc shift;
+      $FORMATMAP{$db} or $self->throw("invalid db [$db], must be one of [".
+				     join(' ',keys %FORMATMAP).  "]");
+      $self->{_db} = $db;
+  }
+  return $self->{_db} || $self->default_db ;
+}
+
+sub _namespace {
+  my $self = shift;
+  my $db = $self->db;
+  return $FORMATMAP{$db}{namespace} or $db;
+}
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                 reference containing data
+
+=cut
+
+sub postprocess_data {
+  my ($self,%args) = @_;
+
+  # check for errors in the stream
+  if ($args{'type'} eq 'string') {
+    my $stringref = $args{'location'};
+    if ($$stringref =~ /^ERROR (\d+) (.+)/m) {
+      $self->throw("BioFetch Error $1: $2");
+    }
+  }
+
+  elsif ($args{'type'} eq 'file') {
+    open (F,$args{'location'}) or $self->throw("Couldn't open $args{location}: $!");
+    # this is dumb, but the error may be anywhere on the first three lines because the
+    # CGI headers are sometimes printed out by the server...
+    my @data = (scalar <F>,scalar <F>,scalar <F>);
+    if (join('', at data) =~ /^ERROR (\d+) (.+)/m) {
+      $self->throw("BioFetch Error $1: $2");
+    }
+    close F;
+  }
+
+  else {
+    $self->throw("Don't know how to postprocess data of type $args{'type'}");
+  }
+}
+
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally not
+           be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+    my ($self, $value) = @_;
+    if ( defined $value ) { 
+	my $db = $self->db;
+	my $namespace = $self->_namespace;
+	my $format = lc $value;
+	print "format:", $format, " module:", $FORMATMAP{$db}->{$format}, " ($namespace)\n" 
+	    if $self->verbose > 0; 
+	$self->throw("Invalid format [$format], must be one of [".
+		     join(' ',keys %{$FORMATMAP{$db}}). "]")
+	    unless  $format eq 'default' || $FORMATMAP{$db}->{$format};
+
+	$self->{'_format'} = [ $format, $FORMATMAP{$db}->{$format}];
+    }
+    return @{$self->{'_format'}};
+}
+
+
+=head2 Bio::DB::WebDBSeqI methods
+
+Overriding WebDBSeqI method to help newbies to retrieve sequences.
+EMBL database is all too often passed RefSeq accessions. This
+redirects those calls. See L<Bio::DB::RefSeq>.
+
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+
+=cut
+
+sub get_Stream_by_acc {
+    my ($self, $ids ) = @_;
+    $self->_check_id($ids);
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+}
+
+
+=head2 _check_id
+
+  Title   : _check_id
+  Usage   : 
+  Function: Throw on whole chromosome NCBI sequences not in sequence databases
+            and redirect RefSeq accession requests sent to EMBL.
+  Returns : 
+  Args    : $id(s), $string
+  Throws  : if accessionn number indicates whole chromosome NCBI sequence
+
+=cut
+
+sub _check_id {
+    my ($self, $id) = @_;
+
+    # NT contigs can not be retrieved
+    $self->throw("NT_ contigs are whole chromosome files which are not part of regular".
+		 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") 
+	if $id =~ /NT_/;
+
+    # Asking for a RefSeq from EMBL/GenBank
+
+    if ($id =~ /N._/ &&  $self->db ne 'refseq') {
+	$self->warn("[$id] is not a normal sequence entry but a RefSeq entry.".
+		   " Redirecting the request.\n")
+	    if $self->verbose >= 0;
+	$self->db('RefSeq');
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/CUTG.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/CUTG.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/CUTG.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,306 @@
+# $Id: CUTG.pm,v 1.11.4.2 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::CUTG
+#
+# Cared for by Richard Adams (richard.adams at ed.ac.uk)
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::CUTG - for access to the Codon usage Database
+at http://www.kazusa.or.jp/codon.
+
+=head1 SYNOPSIS
+
+       use Bio::CodonUsage::Table; 
+       use Bio::DB::CUTG;
+
+       my $db = Bio::DB::CUTG->new(-sp =>'Pan troglodytes');
+       my $CUT = $db->get_request();
+
+
+=head1 DESCRIPTION
+
+This class retrieves and objectifies codon usage tables either from the
+CUTG web database . The idea is that you can initially retrieve a CUT from
+the web database, and write it to file in a way that can be read in
+later, using the Bio::CodonUsage::IO module.
+
+For a web query, two parameters need to be specified: species(sp) and
+genetic code id (gc). The database is searched using regular
+expressions, therefore the full latin name must be given to specify
+the organism. If the species name is ambiguous the first CUT in the
+list is retrieved.  Defaults are Homo sapiens and 1(standard genetic
+code).  If you are retrieving CUTs from organisms using other genetic
+codes this needs to be put in as a parameter. Parameters can be
+entered in the constructor or in the get_web_request
+()method. Allowable parameters are listed in the $QUERY_KEYS hash
+reference variable.
+
+I intend at a later date to allow retrieval of multiple codon tables
+e.g., from a wildcard search.
+
+=head1 SEE ALSO
+
+L<Bio::Tools::CodonTable>, 
+L<Bio::WebAgent>, 
+L<Bio::CodonUsage::Table>, 
+L<Bio::CodonUsage::IO>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+
+package Bio::DB::CUTG;
+use Bio::CodonUsage::IO;
+use IO::String;
+use vars qw($URL $QUERY_KEYS);
+
+use base qw(Bio::WebAgent);
+
+$QUERY_KEYS = { 
+				sp => 'full Latin species name',	
+				gc => 'genetic code id'
+			 };
+
+BEGIN {
+		 $URL = "http://www.kazusa.or.jp"
+	}
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $db = Bio::DB::CUTG->new()
+ Returns : a reference to a new Bio::DB::CUTG 
+ Args    : hash of optional values for db query
+
+=cut
+
+sub new {
+	my ($class, @args ) =@_;
+	_check_args(@args);
+	my $self = $class->SUPER::new(@args);
+	return $self;
+}
+
+=head2 query_keys
+
+ Title   : query_keys
+ Usage   : $db->query_keys()
+ Purpose : To determine valid keys for parameters for db query.
+ Returns : a reference to a hash describing valid query keys
+ Args    : none
+
+=cut
+
+sub query_keys {
+	return $QUERY_KEYS;
+	}
+
+=head2  sp
+
+ Title  : sp
+ Usage  : my $sp = $db->sp();
+ Purpose: Get/set method for species name
+ Returns: void or species name string
+ Args   : None or species name string
+
+=cut
+
+sub sp {
+	my $self = shift;
+	if (@_) {
+		my $name = shift;
+		if ($name =~ /[^\w\s]/) {
+			$self->warn (" contains non-word characters, setting to default
+							of Homo sapiens");
+			$self->{'_sp'} = "Homo sapiens";
+				}
+		else{
+			$self->{'_sp'} = $name;
+			}
+		}
+	return $self->{'_sp'}|| "Homo sapiens";
+	
+}
+
+=head2  gc
+
+ Title  : gc
+ Usage  : my $gc = $db->gc();
+ Purpose: Get/set method for genetic code id
+ Returns: void or genetic code  integer
+ Args   : None or genetic code integer
+
+=cut
+
+sub gc {
+	#### genetic code id for translations ####
+	my $self = shift;
+	if (@_) {
+		if($_[0] =~ /^\d+$/ && $_[0] >= 1 && $_[0] <=15 && $_[0] != 7 
+				&& $_[0] != 8) {
+			$self->{'_gc'} = shift;
+			}
+		else {
+			$self->warn("invalid genetic code index - setting to standard default (1)");
+			$self->{'_gc'} = 1;
+			}
+		}
+	return $self->{'_gc'} || 1; #return 1 if not defined
+
+	}
+
+
+=head2  get_request
+
+ Title  : get_web_request
+ Usage  : my $cut = $db->get_web_request();
+ Purpose: To query remote CUT with a species name
+ Returns: a new codon usage table object 
+ Args   : species  name(mandatory), genetic code id(optional)
+
+=cut
+
+sub get_request {
+	my ($self, @args) = @_;
+	_check_args(@args);
+	shift;
+	### can put in parameters here as well
+	while( @_ ) {
+	my $key = shift;
+        $key =~ s/^-//;
+        $self->$key(shift);
+    }	
+	$self->url($URL);
+
+	###1st of all search DB to check species exists and is unique
+	my $nameparts =  join "+", $self->sp =~ /(\w+)/g;
+	my $search_url = $self->url . "/codon/cgi-bin/spsearch.cgi?species=" 
+					. $nameparts . "&c=s";
+	my $rq = HTTP::Request->new(GET=>$search_url);
+	my $reply = $self->request($rq);
+    if ($reply->is_error) {
+        $self->throw($reply->as_string()."\nError getting for url $search_url!\n");
+    }
+	my $content = $reply->content;
+        return 0 unless $content;
+    $self->debug (" reply from query is \n  $content");
+	#####  if no matches, assign defaults - or can throw here?  ######
+	if ($content =~ /not found/i) {
+		$self->warn ("organism not found -selecting human as default");
+		$self->sp("Homo sapiens");
+		$self->_db("gbpri");
+	
+	}
+
+	
+	else {
+		my @names = $content =~ /(species)/g;
+		### get 1st species data from report ####
+		my ($sp, $db)  = $content =~ /species=(.*)\+\[(\w+)\]"/;
+		
+		$sp =~ s/\+/ /g;
+		## warn if  more than 1 matching species ##
+		## if multiple species retrieved, choose first one by default ##
+		if (@names >1 ){
+			$self->warn ("too many species - not a unique species id - selecting $sp  ");
+			}
+		### now assign species and database value
+		$self->sp($sp);
+		$self->_db($db);
+		}
+
+
+	######## now get codon table , all defaults established now
+
+	##construct URL##
+	$nameparts =  join "+", $self->sp =~ /(\w+)/g;
+	my $CT_url = $self->url . "/codon/cgi-bin/showcodon.cgi?species="
+				. $nameparts . "+%5B" . $self->_db . "%5D&aa=" . $self->gc . "&style=GCG";
+
+	## retrieve data in html##
+	my $rq2 = HTTP::Request->new(GET=>$CT_url);
+    $reply = $self->request($rq2);
+    if ($reply->is_error) {
+        $self->throw($reply->as_string()."\nError getting for url $CT_url!\n");
+    }
+	my $content2 = $reply->content;
+
+	## strip html tags, basic but works here
+	$content2 =~ s/<[^>]+>//sg;
+	$content2 =~ s/Format.*//sg;
+    $self->debug ("raw DDB table is :\n $content2");
+
+	### and pass to Bio::CodonUsage::IO for parsing
+	my $iostr = IO::String->new($content2);
+	my $io = Bio::CodonUsage::IO->new (-fh=>$iostr);
+
+	##return object ##
+	return $io->next_data;
+	}
+
+
+
+sub _check_args {
+
+	###checks parameters for matching $QUERYKEYS
+	my @args = @_;
+	while (my $key = lc(shift @args)) {
+		$key =~ s/\-//;
+		
+		if (!exists ($QUERY_KEYS->{$key})) {
+			Bio::Root::Root->throw("invalid parameter - must be one of [" .
+						(join "] [", keys %$QUERY_KEYS) . "]");
+		}
+		shift @args;
+	}
+}
+
+#### internal URL parameter not specifiable ######
+sub _db {
+	my $self = shift;
+	if (@_) {
+		$self->{'_db'} = shift;
+		}
+	return $self->{'_db'};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/DB/CUTG.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/DBFetch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/DBFetch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/DBFetch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,368 @@
+# $Id: DBFetch.pm,v 1.13.4.2 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::DBFetch
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
+
+=head1 SYNOPSIS
+
+  #do not use this module directly
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of entries from databases using the
+dbfetch script at EBI:
+L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
+
+In order to make changes transparent we have host type (currently only
+ebi) and location (defaults to ebi) separated out.  This allows later
+additions of more servers in different geographical locations.
+
+This is a superclass which is called by instantiable subclasses with
+correct parameters.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::DBFetch;
+use strict;
+use vars qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
+	         $DEFAULTSERVERTYPE);
+
+$MODVERSION = '0.1';
+use HTTP::Request::Common;
+
+use base qw(Bio::DB::WebDBSeqI);
+
+# the new way to make modules a little more lightweight
+
+BEGIN { 	
+    # global vars
+    $DEFAULTSERVERTYPE = 'dbfetch';
+    $DEFAULTLOCATION = 'ebi';
+}
+
+
+=head1 Routines from Bio::DB::WebDBSeqI
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: returns a HTTP::Request object
+ Returns :
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+	my ($self, @qualifiers) = @_;
+	my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
+														 @qualifiers);
+
+	$self->throw("Must specify a value for UIDs to fetch")
+	  unless defined $uids;
+	my $tmp;
+	my $format_string = '';
+	$format ||= $self->default_format;
+	($format, $tmp) = $self->request_format($format);
+	$format_string = "&format=$format"; 
+	my $url = $self->location_url();
+	my $uid;
+	if( ref($uids) =~ /ARRAY/i ) {
+		$uid = join (',', @$uids);
+		$self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
+		  if scalar @$uids >50;
+	} else {
+		$uid = $uids;
+	}
+
+	return GET $url. $format_string. '&id='. $uid;
+}
+
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string
+                                           reference containing data
+
+=cut
+
+# remove occasional blank lines at top of web output
+sub postprocess_data {
+  my ($self, %args) = @_;
+  if ($args{type} eq 'string') {
+    ${$args{location}} =~ s/^\s+//;  # get rid of leading whitespace
+  }
+  elsif ($args{type} eq 'file') {
+    my $F;
+    open $F,"<", $args{location} or $self->throw("Cannot open $args{location}: $!");
+    my @data = <$F>;
+    for (@data) {
+      last unless /^\s+$/;
+      shift @data;
+    }
+    open $F,">", $args{location} or $self->throw("Cannot write to $args{location}: $!");
+    print $F @data;
+    close $F;
+  }
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    my ($self) = @_;
+    return $self->{'_default_format'};
+}
+
+=head1 Bio::DB::DBFetch specific routines
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $seq = $db->get_Stream_by_id($ref);
+  Function: Retrieves Seq objects from the server 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+NOTE: for backward compatibility, this method is also called
+get_Stream_by_batch.
+
+=cut
+
+sub get_Stream_by_id {
+    my ($self, $ids) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : version number (as a string)
+ Throws  : "version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version {
+   my ($self,$seqid) = @_;
+   my $seqio = $self->get_Stream_by_acc([$seqid]);
+   $self->throw("version does not exist") if( !defined $seqio );
+   return $seqio->next_seq();
+}
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally not
+           be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+    my ($self, $value) = @_;
+    if( defined $value ) {
+	$value = lc $value;
+	$self->{'_format'} = $value;
+	return ($value, $value);
+    }
+    $value = $self->{'_format'};
+    if( $value and defined $self->formatmap->{$value} ) {
+	return ($value, $self->formatmap->{$value});
+    } else {
+	# Try to fall back to a default.
+	return ($self->default_format, $self->default_format );
+    }
+}
+
+
+=head2 servertype
+
+ Title   : servertype
+ Usage   : my $servertype = $self->servertype
+	    $self->servertype($servertype);
+ Function: Get/Set server type
+ Returns : string
+ Args    : server type string [optional]
+
+=cut
+
+sub servertype {
+    my ($self, $servertype) = @_;
+    if( defined $servertype && $servertype ne '') {		
+	 $self->throw("You gave an invalid server type ($servertype)".
+			  " - available types are ".
+			  keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
+	 $self->{'_servertype'} = $servertype;
+    }
+    $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
+    return $self->{'_servertype'};
+}
+
+=head2 hostlocation
+
+ Title   : hostlocation
+ Usage   : my $location = $self->hostlocation()
+          $self->hostlocation($location)
+ Function: Set/Get Hostlocation
+ Returns : string representing hostlocation
+ Args    : string specifying hostlocation [optional]
+
+=cut
+
+sub hostlocation {
+    my ($self, $location ) = @_;
+    $location = lc $location;
+    my $servertype = $self->servertype;
+    $self->throw("Must have a valid servertype defined not $servertype")
+	unless defined $servertype; 
+    my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
+    if( defined $location && $location ne '' ) {
+	if( ! $hosts{$location} ) {
+	    $self->throw("Must specify a known host, not $location,".
+			 " possible values (".
+			 join(",", sort keys %hosts ). ")");
+	}
+	$self->{'_hostlocation'} = $location;
+    }
+    $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
+    return $self->{'_hostlocation'};
+}
+
+=head2 location_url
+
+ Title   : location
+ Usage   : my $url = $self->location_url()
+ Function: Get host url
+ Returns : string representing url
+ Args    : none
+
+=cut
+
+sub location_url {
+    my ($self) = @_;
+    my $servertype = $self->servertype();
+    my $location = $self->hostlocation();
+    if( ! defined $location || !defined $servertype )  {	
+	$self->throw("must have a valid hostlocation and servertype set before calling location_url");
+    }
+    return sprintf($self->hosts->{$servertype}->{'baseurl'},
+		   $self->hosts->{$servertype}->{'hosts'}->{$location});
+}		
+
+=head1 Bio::DB::DBFetch routines
+
+These methods allow subclasses to pass parameters.
+
+=head2 hosts
+
+ Title   : hosts
+ Usage   : 
+ Function: get/set for host hash 
+ Returns : 
+ Args    : optional hash
+
+=cut
+
+sub hosts {
+    my ($self, $value) = @_;
+    if (defined $value) {
+	$self->{'_hosts'} = $value;
+    }
+    unless (exists $self->{'_hosts'}) {
+	return ('');
+    } else {
+	return $self->{'_hosts'};
+    }
+}		
+
+=head2 formatmap
+
+ Title   : formatmap
+ Usage   : 
+ Function: get/set for format hash
+ Returns : 
+ Args    : optional hash
+
+=cut
+
+sub formatmap {
+    my ($self, $value) = @_;
+    if (defined $value) {
+	$self->{'_formatmap'} = $value;
+    }
+    unless (exists $self->{'_formatmap'}) {
+	return ('');
+    } else {
+	return $self->{'_formatmap'};
+    }
+}		
+
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EMBL.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EMBL.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EMBL.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,208 @@
+#
+# $Id: EMBL.pm,v 1.21.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::EMBL
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::EMBL - Database object interface for EMBL entry retrieval
+
+=head1 SYNOPSIS
+
+  use Bio::DB::EMBL;
+
+  $embl = new Bio::DB::EMBL;
+
+  # remember that EMBL_ID does not equal GenBank_ID!
+  $seq = $embl->get_Seq_by_id('BUM'); # EMBL ID
+  print "cloneid is ", $seq->id, "\n";
+
+  # or changeing to accession number and Fasta format ...
+  $embl->request_format('fasta');
+  $seq = $embl->get_Seq_by_acc('J02231'); # EMBL ACC
+  print "cloneid is ", $seq->id, "\n";
+
+  # especially when using versions, you better be prepared
+  # in not getting what what want
+  eval {
+      $seq = $embl->get_Seq_by_version('J02231.1'); # EMBL VERSION
+  };
+  print "cloneid is ", $seq->id, "\n" unless $@;
+
+  # or ... best when downloading very large files, prevents
+  # keeping all of the file in memory
+
+  # also don't want features, just sequence so let's save bandwith
+  # and request Fasta sequence
+  $embl = new Bio::DB::EMBL(-retrievaltype => 'tempfile' ,
+ 			    -format => 'fasta');
+  my $seqio = $embl->get_Stream_by_id(['AC013798', 'AC021953'] );
+  while( my $clone =  $seqio->next_seq ) {
+ 	print "cloneid is ", $clone->id, "\n";
+  }
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of sequence objects L<Bio::Seq> from the
+EMBL database using the dbfetch script at EBI:
+L<http://www.ebi.ac.uk/cgi-bin/dbfetch>.
+
+In order to make changes transparent we have host type (currently only
+ebi) and location (defaults to ebi) separated out.  This allows later
+additions of more servers in different geographical locations.
+
+The functionality of this module is inherited from L<Bio::DB::DBFetch>
+which implements L<Bio::DB::WebDBSeqI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EMBL;
+use strict;
+use vars qw($MODVERSION %HOSTS %FORMATMAP  $DEFAULTFORMAT);
+
+$MODVERSION = '0.2';
+use Bio::DB::RefSeq;
+
+use base qw(Bio::DB::DBFetch);
+
+BEGIN {
+    # you can add your own here theoretically.
+    %HOSTS = (
+	       'dbfetch' => {
+		   baseurl => 'http://%s/cgi-bin/dbfetch?db=embl&style=raw',
+		   hosts   => {
+		       'ebi'  => 'www.ebi.ac.uk'
+		       }
+	       }
+	      );
+    %FORMATMAP = ( 'embl' => 'embl',
+		   'fasta' => 'fasta'
+		   );
+    $DEFAULTFORMAT = 'embl';
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $gb = Bio::DB::GenBank->new(@options)
+ Function: Creates a new genbank handle
+ Returns : New genbank handle
+ Args    : -delay   number of seconds to delay between fetches (3s)
+
+NOTE:  There are other options that are used internally.
+
+=cut
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    $self->{ '_hosts' } = {};
+    $self->{ '_formatmap' } = {};
+
+    $self->hosts(\%HOSTS);
+    $self->formatmap(\%FORMATMAP);
+    $self->{'_default_format'} = $DEFAULTFORMAT;
+
+    return $self;
+}
+
+
+=head2 Bio::DB::WebDBSeqI methods
+
+Overriding WebDBSeqI method to help newbies to retrieve sequences.
+EMBL database is all too often passed RefSeq accessions. This
+redirects those calls. See L<Bio::DB::RefSeq>.
+
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_acc {
+    my ($self, $ids ) = @_;
+    my $newdb = $self->_check_id($ids);
+    if ($newdb && $newdb->isa('Bio::DB::RefSeq')) {
+	return $newdb->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+    } else {
+	return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+    }
+}
+
+
+=head2 _check_id
+
+  Title   : _check_id
+  Usage   : 
+  Function: 
+  Returns : A Bio::DB::RefSeq reference or throws
+  Args    : $id(s), $string
+
+=cut
+
+sub _check_id {
+    my ($self, $ids) = @_;
+
+    # NT contigs can not be retrieved
+    $self->throw("NT_ contigs are whole chromosome files which are not part of regular".
+		 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") 
+	if $ids =~ /NT_/;
+
+    # Asking for a RefSeq from EMBL/GenBank
+
+    if ($ids =~ /N._/) {
+	$self->warn("[$ids] is not a normal sequence entry but a RefSeq entry.".
+		   " Redirecting the request.\n")
+	    if $self->verbose >= 0;
+	return  new Bio::DB::RefSeq(-verbose => $self->verbose);
+    }
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/Cookie.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/Cookie.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/Cookie.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,138 @@
+# $Id: Cookie.pm,v 1.3.4.2 2006/10/02 23:10:15 sendu Exp $
+
+# simple object to hold NCBI cookie information and descriptions
+# POD to come...
+
+=head1 NAME
+
+Bio::DB::EUtilities::Cookie - simple object to hold NCBI cookie information and descriptions
+
+=head1 DESCRIPTION
+
+Some EUtilities (C<epost>, C<esearch>, or C<elink>) are able to retain information on
+the NCBI server under certain settings.  This information can be retrieved by
+using a B<cookie>.  Here, the idea of the 'cookie' is similar to the 'cookie' set
+on a user's computer when browsing the Web.  XML data returned by these
+EUtilities, when applicable, is parsed for the cookie information (the 'WebEnv'
+and 'query_key' tags to be specific)  The information along with other identifying
+data, such as the calling eutility, description of query, etc.) is stored as a
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie> object in an internal queue.  These can be retrieved
+one at a time by using the next_cookie method or all at once in an array using
+get_all_cookies.  Each cookie can then be 'fed', one at a time, to another
+EUtility object, thus enabling chained queries as demonstrated in the synopsis.
+
+By default, a EUtilities object will retrieve records using a cookie if the
+cookie parameter is set.  Also, the object will use the database parameter
+stored in the L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie> object when the parameter isn't set
+upon instantiation:
+
+  my $efetch = Bio::DB::EUtilities->new(-cookie       => $elink->next_cookie,
+                                        -rettype      => 'fasta');
+
+ELink, in particular, is capable of returning multiple cookies based on the
+setting for the database; if C<db> is set to C<'all'>, you will retrieve a cookie for
+each database with related records.
+
+=cut
+
+package Bio::DB::EUtilities::Cookie;
+use strict;
+use warnings;
+use URI::Escape qw(uri_unescape);
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($webenv, $querykey, $database, $dbfrom, $query_id, $eutil,
+      $total, $term, $linkname) = $self->_rearrange ([qw(WEBENV QUERYKEY
+      DATABASE DBFROM QUERY_ID EUTIL TOTAL TERM LINKNAME)], @args);
+    unless ($webenv && $querykey) {
+        my $missing;
+        if (!$webenv) {
+            $missing = 'WebEnv';
+        } elsif (!$querykey) {
+            $missing = 'query_key';
+        } else {
+            $self->throw("Abnormal cookie");
+        }
+        $self->throw("Missing ".$missing);
+    }
+    $self->cookie(uri_unescape($webenv), $querykey);
+    # holds originating eutil
+    $eutil      && $self->eutil($eutil);
+    # holds descriptions of database being queried
+    $database   && $self->database($database);
+    
+    # for elink only, originating database
+    $dbfrom     && $self->elink_dbfrom($dbfrom);
+    # holds elink dbfrom ID's used for querys
+    $query_id   && $self->elink_queryids($query_id);
+    # holds elink linkname; information can be found using einfo
+    $linkname   && $self->elink_linkname($linkname);    
+
+    # for esearch, to hold original search query
+    $term       && $self->esearch_query($term);
+    # for esearch, holds total hits if present
+    $total      && $self->esearch_total($total);
+
+    return $self;
+}
+
+sub cookie {
+    my $self = shift;
+    if (@_) {
+        my ($webenv, $querykey) = (shift, shift);
+        $self->throw("Missing part of cookie!") if (!$webenv || !$querykey);
+        return $self->{'_cookie'} = [$webenv, $querykey];
+    } else {
+        return $self->{'_cookie'};
+    }
+}
+
+sub eutil {
+    my $self = shift;
+    return $self->{'_eutil'} = shift if @_;
+    return $self->{'_eutil'};
+}
+
+sub database {
+    my $self = shift;
+    return $self->{'_database'} = shift if @_;
+    return $self->{'_database'};
+}
+
+sub esearch_total {
+    my $self = shift;
+    return $self->{'_esearch_total'} = shift if @_;
+    return $self->{'_esearch_total'};
+}
+
+sub esearch_query {
+    my $self = shift;
+    return $self->{'_esearch_query'} = shift if @_;
+    return $self->{'_esearch_query'};
+}
+
+sub elink_dbfrom {
+    my $self = shift;
+    return $self->{'_elink_dbfrom'} = shift if @_;
+    return $self->{'_elink_dbfrom'};
+}
+
+sub elink_queryids {
+    my $self = shift;
+    return $self->{'_query_ids'} = shift if @_;
+    return @{ $self->{'_query_ids'} } if wantarray;
+    return $self->{'_query_ids'};
+}
+
+sub elink_linkname {
+    my $self = shift;
+    return $self->{'_elink_linkname'} = shift if @_;
+    return $self->{'_elink_linkname'};
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/ElinkData.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/ElinkData.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/ElinkData.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,398 @@
+# $Id: ElinkData.pm,v 1.12.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::ElinkData
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::ElinkData 
+
+=head1 SYNOPSIS
+
+*** Give standard usage here
+
+=head1 DESCRIPTION
+
+*** Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::ElinkData;
+use strict;
+use warnings;
+
+#use Data::Dumper;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($command) = $self->_rearrange([qw(COMMAND)], @args);
+    $command    && $self->elink_command($command);
+    $self->{'_dbindex'} = 0;
+    $self->{'_scoreindex'} = 0;
+    $self->{'_linkcount'} = 0;
+    $self->{'_scoredb_key'} = '';
+    $self->{'_databases'} = [];
+    $self->{'_linksetdb'} = [];
+    return $self;
+}
+
+# this should make a deep copy of the XML data for each ElinkData Linkset
+
+sub _add_set {
+    my ($self, $ls) = @_;
+    if (!$ls) {
+        $self->throw('No linkset data!');
+    }
+    # is there any data returned
+    return 0 unless exists $ls->{LinkSetDb};
+    my $dbfrom = $ls->{DbFrom};
+    $self->elink_dbfrom($dbfrom);
+    my $query_ids = $ls->{IdList}->{Id};
+    if (!ref($query_ids)) {
+        my $tempid = $query_ids;
+        $query_ids = [$tempid];
+    }
+    $self->elink_queryids($query_ids);
+    
+    my $ct = 0;
+    
+    # each linkset database
+    for my $ls_db (@{ $ls->{LinkSetDb} }) {
+        $ct++;
+        my $dbto = $ls_db->{DbTo} ;
+        push @{ $self->{'_databases'}}, $dbto;
+        my $linkname = $ls_db->{LinkName};
+        if (exists $ls_db->{Info} || exists $ls->{ERROR} || !exists $ls_db->{Link}) {
+            my $err_msg = $ls_db->{Info} || $ls->{ERROR} || 'No Links!';
+            my $ids = (ref($query_ids) =~ /array/i) ?
+                            join q(,), @{$query_ids}: $query_ids;
+            $self->warn("ELink Error for $dbto and ids $ids: $err_msg");
+            next;
+        }
+        my @ids;
+        for my $id_ref (@{ $ls_db->{Link} } ) {
+            my $id = $id_ref->{Id};
+            my $score = exists $id_ref->{Score} ? $id_ref->{Score} : undef;
+            push @ids, $id;
+            # set up in case there are multiple databases that return scores
+            if ($score) {
+                $self->{'_scores'}->{$dbto}->{$id} = $score;
+                if (!($self->{'_has_scores'})) {
+                    push @{ $self->{'_has_scores'} }, $dbto;
+                }
+            }
+        }
+        my $linkset = {
+                       'LinkName' => $linkname,
+                       'DbTo'     => $dbto,
+                       'Id'       => \@ids,
+                      };
+        #$self->debug('Linkset:',Dumper($linkset));
+        push @{ $self->{'_linksetdb'}}, $linkset;
+    }
+    return 1; # good linkset
+}
+
+=head2 elink_dbfrom
+
+ Title   : elink_dbfrom
+ Usage   : $dbfrom = $linkset->elink_dbfrom;
+ Function: gets/sets dbfrom value
+ Returns : originating database
+ Args    : originating database
+
+=cut
+
+sub elink_dbfrom {
+    my $self = shift;
+    return $self->{'_elink_dbfrom'} = shift if @_;
+    return $self->{'_elink_dbfrom'};
+}
+
+=head2 elink_queryids
+
+ Title   : elink_queryids
+ Usage   : @ids = $linkset->elink_queryids;
+ Function: gets/sets original query ID values (ref to array)
+ Returns : array or array ref of IDs (based on wantarray)
+ Args    : array ref of IDs
+
+=cut
+
+sub elink_queryids {
+    my $self = shift;
+    return $self->{'_elink_queryids'} = shift if @_;
+    return @{ $self->{'_elink_queryids'} } if wantarray;
+    return $self->{'_elink_queryids'};
+}
+
+=head2 elink_command
+
+ Title   : elink_command
+ Usage   : $cmd = $linkset->elink_command;
+ Function: gets/sets cmd used for elink query
+ Returns : string (cmd parameter)
+ Args    : string (cmd parameter)
+
+=cut
+
+sub elink_command {
+    my $self = shift;
+    return $self->{'_elink_command'} = shift if @_;
+    return $self->{'_elink_command'};
+}
+
+=head2 get_LinkIds_by_db
+
+ Title   : get_LinkIds_by_db
+ Usage   : @ids = $linkset->get_LinkIds_by_db('protein');
+ Function: retrieves primary ID list based on the database for the object
+ Returns : array or array ref of IDs (based on wantarray)
+ Args    : None
+
+=cut
+
+sub get_LinkIds_by_db {
+    my $self = shift;
+    my $db = shift if @_;
+    $self->throw("Must use database to access IDs") if !$db;
+    my $ct = scalar(@{ $self->{'_linksetdb'} });
+    return [] if $ct == 0; # no linksets, blank anon array
+    for my $linkset (@{ $self->{'_linksetdb'}}) {
+        my $dbto = $linkset->{DbTo};
+        if ($dbto eq $db) {
+            return @{ $linkset->{Id} } if wantarray;
+            return $linkset->{Id};
+            
+        }
+    }
+    $self->warn("Couldn't find ids for database $db");
+}
+
+=head2 next_linkdb
+
+ Title   : next_linkdb
+ Usage   : while (my $db = $linkset->next_linkdb) {
+ Function: iterates through list of database names in internal queue
+ Returns : String (name of database)
+ Args    : None
+
+=cut
+
+sub next_linkdb {
+    my $self = shift;
+    my $index = $self->_next_db_index;
+    return if ($index > scalar($self->{'_databases'}));
+    return $self->{'_databases'}->[$index] ;
+}
+
+=head2 get_all_linkdbs
+
+ Title   : get_all_linkdbs
+ Usage   : @dbs = $linkset->get_all_linkdbs;
+ Function: returns all database names which contain IDs
+ Returns : array or array ref of databases (based on wantarray)
+ Args    : None
+
+=cut
+
+sub get_all_linkdbs {
+    my $self = shift;
+    return @{ $self->{'_databases'} } if wantarray;
+    return $self->{'_databases'};
+}
+
+=head2 next_scoredb
+
+ Title   : next_scoredb
+ Usage   : while (my $db = $linkset->next_scoredb) {
+ Function: iterates through list of database with score values
+ Returns : String (name of database)
+ Args    : None
+
+=cut
+
+sub next_scoredb {
+    my $self = shift;
+    my $index = $self->_next_scoredb_index;
+    return if ($index > scalar($self->{'_has_scores'}));
+    my $db = $self->{'_has_scores'}->[$index];
+    $self->set_scoredb($db);
+    return $db;
+}
+
+=head2 get_all_scoredbs
+
+ Title   : get_all_scoredbs
+ Usage   : @dbs = $linkset->get_all_scoredbs;
+ Function: returns database names which contain scores
+ Returns : array or array ref of databases (based on wantarray)
+ Args    : None
+
+=cut
+
+sub get_all_scoredbs {
+    my $self = shift;
+    return @{ $self->{'_has_scores'} } if wantarray;
+    return $self->{'_has_scores'}->[0];
+}
+
+=head2 get_score
+
+ Title   : get_score
+ Usage   : $score = $linkset->get_score($id);
+ Function: returns score value for ID
+ Returns : score value
+ Args    : ID
+ Note    : if multiple databases are returned with scores (rare but possible),
+         : you must set the default score database using set_scoredb.  If you
+         : use next_scoredb to iterate through the databases, this is done for you
+
+=cut
+
+sub get_score {
+    my $self = shift;
+    my $id = shift if @_;
+    if (!$self->get_all_scoredbs) {
+        $self->warn("No scores!");
+        return;
+    }
+    if (!$id) {
+        $self->throw("Must use ID to access scores");
+        return;
+    }
+    my $db = exists $self->{'_scoredb'} ? $self->{'_scoredb'} :
+             $self->get_all_scoredbs;
+    if ( exists $self->{'_scores'}->{$db}->{$id} ) {
+        return $self->{'_scores'}->{$db}->{$id};
+    }
+}
+
+=head2 get_score_hash
+
+ Title   : get_score_hash
+ Usage   : %scores = $linkset->get_score_hash($database);
+ Function: returns ID(key)-score(value) hash based on database name
+ Returns : score value
+ Args    : OPTIONAL : database name.  If there is only one score hash, returns
+         : that hash, otherwise throws an exception
+
+=cut
+
+sub get_score_hash {
+    my $self = shift;
+    $self->warn("No scores!") if !$self->has_scores;
+    my $db = exists $self->{'_scoredb'} ? $self->{'_scoredb'} : $self->has_scores;
+    if (exists $self->{'_scores'}->{$db}) {
+        return %{ $self->{'_scores'}->{$db} };
+    }
+}
+
+=head2 set_scoredb
+
+ Title   : set_scoredb
+ Usage   : $linkset->set_scoredb('protein');
+ Function: sets the database to retrieve scores from
+ Returns : None
+ Args    : database name
+
+=cut
+
+sub set_scoredb {
+    my ($self, $key) = shift;
+    $self->{'_scoredb'} if $key;
+}
+
+=head2 rewind_linkdbs
+
+ Title   : rewind_linkdbs
+ Usage   : $linkset->rewind_linkdbs;
+ Function: resets the iterator for next_database
+ Returns : None
+ Args    : None
+
+=cut
+
+sub rewind_linkdbs {
+    my $self = shift;
+    $self->{'_dbindex'} = 0;
+}
+
+=head2 rewind_scoredbs
+
+ Title   : rewind_scoredbs
+ Usage   : $linkset->rewind_scoredbs;
+ Function: resets the iterator, current database for next_scoredb
+ Returns : None
+ Args    : None
+
+=cut
+
+sub rewind_scoredbs {
+    my $self = shift;
+    $self->{'_scoreindex'} = 0;
+    $self->{'_scoredb'} = '';
+}
+
+# private methods
+
+#iterator for full database list
+sub _next_db_index {
+    my $self = shift;
+    return $self->{'_dbindex'}++;
+}
+
+#iterator for score database list
+sub _next_scoredb_index {
+    my $self = shift;
+    return $self->{'_scoreindex'}++;
+}
+
+1;
+
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/efetch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/efetch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/efetch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,236 @@
+# $Id: efetch.pm,v 1.8.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::efetch
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::efetch - retrieval of records from a list of IDs or the
+user's environment.
+
+=head1 SYNOPSIS
+
+  my $efetch = Bio::DB::EUtilities->new(
+                                       -verbose => 1,
+                                       -cookie   => $esearch->next_cookie,
+                                       -retmax   => $retmax,
+                                       -rettype  => 'fasta'
+                                        );
+
+  print $efetch->get_response->content;
+
+=head1 DESCRIPTION
+
+L<EFetch|Bio::DB::EUtilities::efetch> retrieves data records from a list of
+ID's.  This can be accomplished directly (using C<id>) or indirectly
+(by using a L<Cookie|Bio::DB::EUtilities::Cookie>.
+
+=head2 NCBI Efetch Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of Efetch.  Up-to-date help for Efetch is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
+
+=over 3
+
+=item C<db>
+
+One or more database available through EUtilities.  EFetch currently only
+supports database retrieval from the following databases:
+
+B<pubmed>, B<pmc> (PubMed Central), B<journals>, B<omim>, B<nucleotide>,
+B<protein>, B<genome>, B<gene>, B<snp> (dbSBP), B<popset>, and B<taxonomy>.
+
+Also supported are B<sequences> (nucleotide, protein, popset and genome), and
+the three subsets of nucleotide: B<nuccore>, B<nucest>, B<nucgss>
+
+=item C<id>
+
+a list of primary ID's
+
+Below are a list of IDs which can be used with EFetch:
+
+For sequence databases:
+
+B<NCBI sequence number> (GI), B<accession>, B<accession.version>, B<fasta>,
+B<GeneID>, B<genome ID>, B<seqid>
+
+All other databases:
+
+B<PMID> (pubmed), B<MIM number> (omim), B<GI number> (nucleotide, protein),
+B<Genome ID> (genome), B<Popset ID> (popset), B<SNP cluster ID> (snp),
+B<UniSTS ID> (unists), B<UniGene cluster ID> (unigene), B<MMDB-ID> (structure),
+B<PSSM-ID> (cdd), B<3D SDI> (domains), B<TAXID> (taxonomy), B<GEO ID> (geo)
+
+=item C<mindate>, C<maxdate>
+
+limits results by dates (C<yyyy/mm/dd> format, or by year)
+
+=item C<rettype>
+
+Output type based on the database.  Not all return types are compatible with
+all return modes (-C<retmode>).  For more information, see the specific
+literature or sequence database links at URL above.
+
+Literature databases have the below return types:
+
+B<uilist> (all databases),
+B<abstract>, B<citation>, B<medline> (not omim),
+B<full> (journals and omim)
+
+Literature databases have the below return types:
+
+B<native> (full record, all databases),
+B<fasta>, B<seqid>, B<acc> (nucleotide or protein),
+B<gb>, B<gbc>, B<gbwithparts> (nucleotide only),
+B<est> (dbEST only),
+B<gss> (dbGSS only),
+B<gp>, B<gpc> (protein only),
+B<chr>, B<flt>, B<rsr>, B<brief>, B<docset> (dbSNP only)
+
+=item C<retmode>
+
+EFetch is set, by default, to return a specific format for each Entrez database;
+this is set in the %DATABASE hash in L<Bio::DB::EUtilities>.  To override this
+format, you can set -C<retmode>.  The normal return modes are text, HTML, XML,
+and ASN1.  Error checking for the set return mode is currently not
+implemented.
+
+=item C<report>
+
+Used for the output format for Taxonomy; set to B<uilist>, B<brief>, B<docsum>,
+B<xml>
+
+=item C<strand> - I<sequence only>
+
+The strand of DNA to show: 1=plus, 2=minus
+
+=item C<seq_start>, C<seq_stop> - I<sequence only>
+
+the start and end coordinates of the sequence to display
+
+=item C<complexity> - I<sequence only>
+
+The GI is often part of a biological blob containing other GIs
+
+    * 0 - get the whole blob
+    * 1 - get the bioseq for gi of interest (default in Entrez)
+    * 2 - get the minimal bioseq-set containing the gi of interest
+    * 3 - get the minimal nuc-prot containing the gi of interest
+    * 4 - get the minimal pub-set containing the gi of interest
+
+=back
+
+=head2 Additional (Bioperl-related) Parameters
+
+These are Bioperl-related settings and are not used as CGI parameters when
+
+=over 3
+
+=item C<eutil>
+
+The relevant EUtility to be used (efetch).  
+
+=item C<cookie>
+
+Uses a L<Cookie|Bio::DB::EUtilities::Cookie>-based search (see below)
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::efetch;
+use strict;
+use warnings;
+
+use vars qw($EUTIL);
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'efetch';
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($retmode, $reldate, $mindate, $maxdate, $datetype, $rettype, $retstart, 
+        $retmax, $report, $seq_start, $seq_stop, $strand, $complexity) = 
+      $self->_rearrange([qw(RETMODE RELDATE MINDATE MAXDATE DATETYPE RETTYPE
+        RETSTART RETMAX REPORT SEQ_START SEQ_STOP STRAND COMPLEXITY)], @args);    
+    # set by default
+    $self->_eutil($EUTIL);
+    $datetype ||= 'mdat';
+    $self->datetype($datetype) if $datetype;
+    defined($retstart)       && $self->retstart($retstart);
+    $retmode        && $self->retmode($retmode);
+    $retmax         && $self->retmax($retmax);
+    $rettype        && $self->rettype($rettype);
+    $seq_start      && $self->seq_start($seq_start);
+    $seq_stop       && $self->seq_stop($seq_stop);
+    $strand         && $self->strand($strand);
+    defined($complexity) && $self->complexity($complexity);
+    $report         && $self->report($report);    
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+# this is NOOP b/c efetch returns raw data to be processed or saved;
+# HTTP errors caught in get_response 
+
+sub parse_response {
+}
+
+1;
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/egquery.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/egquery.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/egquery.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,123 @@
+# $Id: egquery.pm,v 1.6.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::egquery
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::egquery - counts for a global query of Entrez databases
+
+=head1 SYNOPSIS
+
+    my $egquery = Bio::DB::EUtilities->new(
+                                     -eutil    => 'egquery',
+                                     -term     => 'dihydroorotase'
+                                      );
+
+    print $egquery->get_response->content;
+
+=head1 DESCRIPTION
+
+L<EGQuery|Bio::DB::EUtilities::egquery> provides Entrez database counts
+in XML for a single search using NCBI's Global Query.  No further parsing of
+the XML data is processed at this time.
+
+=head2 NCBI EGQuery Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of EGQuery.  Up-to-date help for EGQuery is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
+
+=over 3
+
+=item C<term>
+
+Search term or phrase with or without Boolean operators.  This can use search
+field descriptions and tags (Note: these may be database specific and are
+better used with L<ESearch|Bio::DB::EUtilities::esearch>.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::egquery;
+use strict;
+use warnings;
+
+use vars qw($EUTIL);
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'egquery';
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+	my ($term) =  $self->_rearrange([qw(TERM)], at args);	
+    # set by default
+    $self->_eutil($EUTIL);
+    $term	        && $self->term($term);
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+# EGQuery doesn't have error checking, so this is NOOP for now
+
+sub parse_response {
+}
+
+1;
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/einfo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/einfo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/einfo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,253 @@
+# $Id: einfo.pm,v 1.8.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::einfo
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::einfo - NCBI database information
+
+=head1 SYNOPSIS
+
+  my $einfo = Bio::DB::EUtilities->new(
+                                     -eutil    => 'einfo',
+                                     -db       => 'pubmed'
+                                      );
+
+  print $einfo->get_response->content;
+
+=head1 DESCRIPTION
+
+L<EInfo|Bio::DB::EUtilities::einfo> queries provide
+information about NCBI databases.  At this time, data is postprocessed
+for a complete list of Entrez databases (when '-C<db>' is not set) or for
+specific database information, number of entries, date of the last update, or
+Field or Link information.  Using the base URL with no
+parameters returns a list of all Entrez databases.
+
+=head2 Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of EInfo.  Up-to-date help for EInfo is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
+
+=over 3
+
+=item C<db>
+
+Database parameter.  This is optional; not setting this will return a list of
+all the available Entrez databases.  If a database is specified, returned XML
+data will provide the following information : field names, index term counts,
+last update, and available links for each Entrez database.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::einfo;
+use strict;
+use warnings;
+use XML::Simple;
+#use Data::Dumper;
+
+use vars qw($EUTIL);
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'einfo';
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    # set by default
+    $self->_eutil($EUTIL);
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+sub parse_response {
+    my $self    = shift;
+    my $response = shift if @_;
+    if (!$response || !$response->isa("HTTP::Response")) {
+        $self->throw("Need HTTP::Response object");
+    }
+    my $xs = XML::Simple->new();
+    my $simple = $xs->XMLin($response->content,
+                            forcearray => [qw(DbName Field Link)]);
+    #$self->debug("Response dumper:\n".Dumper($simple));
+    # check for errors
+    if ($simple->{ERROR}) {
+        my $error = $simple->{ERROR} ? $simple->{ERROR} : 'No data returned';
+        $self->throw("NCBI einfo nonrecoverable error: ".$error);
+    }
+    if (exists $simple->{DbList}->{DbName}) {
+        $self->{'_einfo_dbname'} = $simple->{DbList}->{DbName};
+        return;
+    }
+    # start setting internal variables
+    for my $key (sort keys %{ $simple->{DbInfo} }) {
+        my $data =
+        ($key eq 'FieldList') ? $simple->{DbInfo}->{$key}->{Field} :
+        ($key eq 'LinkList' ) ? $simple->{DbInfo}->{$key}->{Link}  :
+        $simple->{DbInfo}->{$key};
+        $self->_set_einfo_data($key, $data);
+    }
+}
+
+sub einfo_dbs {
+    my $self = shift;
+    if (wantarray) {
+        if( ref($self->{'_einfo_dbname'} ) =~ m{ARRAY}i  ) {
+            return @{ $self->{'_einfo_dbname'} };
+        }
+    }
+    return $self->{'_einfo_dbname'};
+}
+
+=head2 einfo_dbfield_info
+
+ Title   : einfo_dbfield_info
+ Usage   : @fields = $info->einfo_dbfield_info;
+ Function: gets array of hashes with field information
+ Returns : An array or array reference (based on wantarray) of hashes
+           with information about each field 
+ Args    : None (this is set using the _set_einfo_data method)
+
+=cut
+
+sub einfo_dbfield_info {
+    my $self = shift;
+    return @{ $self->{'_einfo_fieldlist'} } if wantarray;
+    return $self->{'_einfo_fieldlist'};
+}
+
+=head2 einfo_dblink_info
+
+ Title   : einfo_dblink_info
+ Usage   : @links = $info->einfo_dblink_info;
+ Function: gets array of hashes with link information
+ Returns : An array or array reference (based on wantarray) of hashes
+           with information about each link 
+ Args    : None (this is set using the _set_einfo_data method)
+
+=cut
+
+sub einfo_dblink_info {
+    my $self = shift;
+    return @{ $self->{'_einfo_linklist'} } if wantarray;
+    return $self->{'_einfo_linklist'};
+}
+
+=head2 einfo_db_lastupdate
+
+ Title   : einfo_db_last_update
+ Usage   : $date = $info->einfo_db_lastupdate;
+ Function: returns last date database was updated
+ Returns : String containing date
+ Args    : None (this is set using the _set_einfo_data method)
+
+=cut
+
+sub einfo_db_lastupdate {
+    my $self = shift;
+    return $self->{'_einfo_lastupdate'};
+}
+
+=head2 einf_db_desc
+
+ Title   : einfo_db_desc
+ Usage   : $desc = $info->einfo_db_desc;
+ Function: returns database description
+ Returns : String containing descriptions
+ Args    : None (this is set using the _set_einfo_data method)
+
+=cut
+
+sub einfo_db_desc {
+    my $self = shift;
+    return $self->{'_einfo_description'};
+}
+
+=head2 einfo_db_count
+
+ Title   : einfo_db_count
+ Usage   : $count = $info->einfo_db_count;
+ Function: returns database record count
+ Returns : Integer (number of database records)
+ Args    : None (this is set using the _set_einfo_data method)
+
+=cut
+
+sub einfo_db_count {
+    my $self = shift;
+    return $self->{'_einfo_count'};
+}
+
+# no methods for MenuName
+
+# set method
+
+sub _set_einfo_data {
+    my ($self, $key, $data) = @_;
+    $self->throw('No data') if (!$data || !$key);
+    my $info_key = '_einfo_'.lc($key);
+    # fix to make a deep copy of data
+    $self->{$info_key} = $data;
+    return;
+}    
+
+1;
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/elink.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/elink.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/elink.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,662 @@
+# $Id: elink.pm,v 1.24.4.3 2006/11/10 16:48:09 cjfields Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::elink
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::elink - check for and retrieve external or related ID's
+from a list of one or more primary ID's, including relevancy scores.
+
+=head1 SYNOPSIS
+
+B<Do not use this module directly.>  Use it via the
+L<Bio::DB::EUtilities|Bio::DB::EUtilities> class.
+
+  # chain EUtilities for complex queries
+
+  use Bio::DB::EUtilities;
+
+  my $esearch = Bio::DB::EUtilities->new(-eutil      => 'esearch',
+                                         -db         => 'pubmed',
+                                         -term       => 'hutP',
+                                         -usehistory => 'y');
+
+  $esearch->get_response; # parse the response, fetch a cookie
+
+  my $elink = Bio::DB::EUtilities->new(-eutil        => 'elink',
+                                       -db           => 'protein,taxonomy',
+                                       -dbfrom       => 'pubmed',
+                                       -cookie       => $esearch->next_cookie,
+                                       -cmd          => 'neighbor');
+
+  # this retrieves the Bio::DB::EUtilities::ElinkData object
+
+  my ($linkset) = $elink->next_linkset;
+  my @ids;
+
+  # step through IDs for each linked database in the ElinkData object
+
+  for my $db ($linkset->get_databases) {   
+    @ids = $linkset->get_LinkIds_by_db($db); #returns primary ID's
+    # do something here
+  }
+
+  # multiple ID groups (for one-to-one-correspondence of IDs)
+
+  my $elink = Bio::DB::EUtilities->new(-eutil        => 'elink',
+                                       -db           => 'all',
+                                       -dbfrom       => 'protein',
+                                       -id           => [\@id1, @ids2],
+                                       -multi_id     => 1,
+                                       -cmd          => 'neighbor');
+
+  while (my $linkset = $elink->next_linkset) {
+    for my $db ($linkset->get_databases) {
+      my @ids = $linkset->get_LinkIds_by_db($db); #returns primary ID's
+      # do something here
+    }
+  }
+
+  # to retrieve scores for a linkset
+
+  while (my $linkset = $elink->next_linkset) {
+    my @score_dbs = $linkset->has_scores; # retrieve databases with score values
+    for my $db (@score_dbs) {
+      my @ids = $linkset->get_LinkIds_by_db($db); #returns primary ID's
+      $linkset->set_score_db($db);  # to current database containing scores
+      for my $id (@ids) {
+         my $score = get_score($id);  
+         # do something here, like screen for IDs based on score
+      }
+    }
+  }
+
+  # or just receive a hash containing ID-score key-value pairs
+
+  while (my $linkset = $elink->next_linkset) {
+    my @score_dbs = $linkset->has_scores; 
+    for my $db (@score_dbs) {
+      $linkset->set_score_db($db);
+      %scores = $linkset->get_score_hash;
+    }
+  }
+
+=head1 DESCRIPTION
+
+B<WARNING>: Please do B<NOT> spam the Entrez web server with multiple requests.
+
+The EUtility Elink is used to check for and retrieve external or related ID's
+from a list of one or more primary ID's.  Using the C<cmd> parameter, one can
+vary the returned data.  See the below command options for explanations on
+returned XML output.  For certain command options one can retrieve one or more
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie> objects to be used in
+other EUtility searches or efetch primary IDs.  Other will return the ID
+information and relevancy scores in one or more
+L<Bio::DB::EUtilities::ElinkData|Bio::DB::EUtilities::ElinkData> objects.
+
+=head2 NCBI ELink Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of ELink.  Up-to-date help for ELink is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
+
+=over 3
+
+=item C<db>
+
+One or more database available through EUtilities. If set to 'all', will
+retrieve all relevant information from each database based on the C<cmd>
+parameter (the default setting is to retrieve related primary ID's).  One
+interesting behaviour is when C<db> and C<dbfrom> are set to the same database;
+related IDs from database are retrieved along with a relevancy score.  This
+score differs from database to database; if protein-protein elinks are sought,
+the scores are generated from BLASTP
+
+=item C<dbfrom>
+
+originating database; useful only if using directly when querying with ID's
+
+=item C<id>
+
+a list of primary ID's
+
+Below are a list of IDs which can be used with ELink:
+
+B<PMID> (pubmed), B<MIM number> (omim), B<GI number> (nucleotide, protein),
+B<Genome ID> (genome), B<Popset ID> (popset), B<SNP cluster ID> (snp),
+B<UniSTS ID> (unists), B<UniGene cluster ID> (unigene), B<MMDB-ID> (structure),
+B<PSSM-ID> (cdd), B<3D SDI> (domains), B<TAXID> (taxonomy), B<GEO ID> (geo)
+
+=item C<reldate>
+
+limits results to the number of days preceding today's date
+
+=item C<mindate>, C<maxdate>
+
+limits results by dates (C<yyyy/mm/dd> format, or by year)
+
+=item C<term>
+
+limits results by Entrez query (only valid when C<cmd=neighbor> within a single
+database)
+
+=item C<retmode>
+
+set to XML, but can be changed to ref when needed
+
+=item C<cmd>
+
+command values (see below)
+
+=item C<holding>
+
+list LinkOut URLs for specified holding provider; used with C<cmd=llinks>
+or C<cmd=llinkslib> (rarely used)
+
+=back
+
+=head2 Additional (Bioperl-related) Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of ELink.  Up-to-date help for ELink is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
+
+=over 3
+
+=item C<eutil>
+
+The relevant EUtility to be used (elink).  
+
+=item C<cookie>
+
+Uses a L<Cookie|Bio::DB::EUtilities::Cookie>-based search (see below)
+
+=item C<multi_id>
+
+Sets a flag to treat the ID data (C<id> parameter) as multiple ID groups (see
+below).
+
+=item C<keep_cookies>
+
+Sets a flag to retain the cookie queue (this is normally cleared
+before 
+
+=back
+
+=head2 Command Values
+
+Command values are set using the C<cmd> parameter. 
+
+=over 3
+
+=item C<prlinks>
+
+List the hyperlink to the primary LinkOut provider for multiple IDs and
+database. Each ID is processed separately.
+
+=item C<prlinks&retmode=ref>
+
+Create a hyperlink to the primary LinkOut provider for a single ID and database.
+
+=item C<llinks>
+
+List LinkOut URLs and Attributes, except PubMed libraries, for multiple IDs
+and database. Each ID is processed separately.
+
+=item C<llinkslib>
+
+List LinkOut URLs and Attributes for multiple IDs and database.  Each ID is
+processed separately.
+
+=item C<lcheck>
+
+Check for the existence (Y or N) of an external link in for multiple IDs and
+database.
+
+=item C<ncheck>
+
+Check for the existence of a neighbor link for each ID within a database,
+e.g., Related Articles in PubMed.
+
+=item C<neighbor>
+
+The default setting. Display neighbors and their scores within a database.
+This module will parse XML output from an ELink query and will return a
+L<Bio::DB::EUtilities::ElinkData> object, which contains IDs for every database
+liked to using C<db> (see C<id> and C<db> for more details).  
+
+=item C<neighbor_history>
+
+Create history (WebEnv & query_key) for use in other EUtilities.
+
+=item C<acheck>
+
+Lists Entrez databases links for multiple IDs from a single database.
+
+=back
+
+=head2 Cookies
+
+Some EUtilities (C<epost>, C<esearch>, or C<elink>) are able to retain information on
+the NCBI server under certain settings.  This information can be retrieved by
+using a B<cookie>.  Here, the idea of the 'cookie' is similar to the 'cookie' set
+on a user's computer when browsing the Web.  XML data returned by these
+EUtilities, when applicable, is parsed for the cookie information (the 'WebEnv'
+and 'query_key' tags to be specific)  The information along with other identifying
+data, such as the calling eutility, description of query, etc.) is stored as a
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie> object in an internal
+queue.  These can be retrieved one at a time by using the next_cookie method or
+all at once in an array using get_all_cookies.  Each cookie can then be 'fed',
+one at a time, to another EUtility object, thus enabling chained queries as
+demonstrated in the synopsis.
+
+For more information, see the POD documentation for
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie>.
+
+=head2 ElinkData Objects
+
+Due to the diversity of information that can be returned via elink, a special
+object (ElinkData) has been created to hold data parsed from the XML output. This
+object holds returned IDs, scores, and potentially additional data as the need
+arises.  ElinkData objects are stored in an internal queue much like for Cookie
+objects; similarly, they can be accessed using L<next_linkset> and
+L<get_all_linksets>.  If a simple search is initiated, where one database is
+queried using one set of IDs, the default EUtilities method C<get_ids> can be
+used to retrieve the IDs.  If more than one database is specified for a single
+set of IDs, (such as when C<db> is set to 'all' or a comma-separated list, like
+'protein,taxonomy'), the database must be passed explicitly to C<get_ids> as an
+argument to retrieve the relevant IDs.
+
+The most complicated sitation comes when using multiple ID groups (see below).
+This requires that each ID group have a separate set of data (a linkset), each
+with potential multiple databases, multiple IDs, and so on.  Linkset data is
+stored in a special object
+(L<Bio::DB::EUtilities::ElinkData|Bio::DB::EUtilities::ElinkData>).
+
+For more information, see the POD documentation for
+L<Bio::DB::EUtilities::ElinkData|Bio::DB::EUtilities::ElinkData>.
+
+=head1 CURRENT USES
+
+=head2 Complex queries
+
+Chaining queries for retrieving related data using elink and other EUtilities is
+now possible (see the L</"SYNOPSIS"> for an example).  For instance, one can
+grab a large number of taxon IDs using protein/nucleotide IDs; these can be
+retrieved directly or saved on the server (setting C<cmd> to 'neighbor_history'),
+and the cookie passed on to efetch.
+
+=head2 Retrieving relevancy scores
+
+When the C<db> and C<dbfrom> parameters are set to the same database, one can
+retrieve relevancy scores for a single ID.  These are based on several different
+factors.  For proteins, they are precomputed BLASTP scores, so this is actually
+a quick way to get the best hits without having to run BLASTP directly!
+Similarly, scores returned for nucleotide-nucleotide are based on BLASTN scores.
+
+=head2 Multiple ID groups
+
+When C<multi_id> flag is set to a TRUE value, the id list is built based on
+different set of factors.  The default method for submitting an ID list for
+a query request for any EUtility is by having the C<id> parameter set to
+an array reference (multiple IDs) or pass a single ID as a scalar, like this:
+
+  -id  => \@ids,
+  -id  => '1621261',
+
+L<Bio::DB::EUtilities::elink|Bio::DB::EUtilities::elink> has the additional
+capability to submit ID groups where searches are performed on each ID group
+independently.  This is accomplished by setting the C<multi_id> flag to true,
+which indicates that the ID list will be evaluated as an array reference, with
+each ID group represented by another array reference or a single ID.  So, with
+C<multi_id> set to TRUE:
+
+  -id  => \@ids,  # evaluates each ID in the array independently
+  ...
+  -id  => [@ids], # same as above
+  ...
+  -id  => [\@ids, $id], # IDs in @ids are grouped together for one search
+                        # while single ID in scalar is searched independently
+
+It can get tricky:
+
+  -id  => [\@ids, $id1, @ids2], # @ids ID grouped together; IDs in $id1 and @id2
+                                # are flattened and evaluated independently
+
+This enables one-to-one correspondence with the returned data, so that one
+can determine, per ID, what the matching ELink ID is.  The default is to
+return them all as a group (no one-to-one correspondence).  Using a small ID
+array, C<multi_id> set to TRUE, '-id =E<gt> \@ids', and this loop:
+
+  while (my $linkset = $elink->next_linkset) {
+    print "Query ID : ",join q(,), $linkset->query_id,"\n";
+    print "\tTax ID : ",join q(,), $linkset->get_LinkIds_by_db('taxonomy'),"\n";
+  }
+
+gets this result:
+
+    Query ID : 1621261,
+            Tax ID : 83332,
+    Query ID : 31618162,
+            Tax ID : 233413,
+    Query ID : 31792573,
+            Tax ID : 233413,
+
+Setting C<multi_id> to FALSE or not setting, using all other conditions above,
+gets this result:
+
+Query ID : 31792573,31618162,1621261,
+        Tax ID : 233413,83332,
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the 
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::elink;
+
+use strict;
+use warnings;
+
+use Bio::DB::EUtilities::Cookie;
+use Bio::DB::EUtilities::ElinkData;
+use XML::Simple;
+#use Data::Dumper;
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'elink';
+our $DTDVERSION = '1';
+    # cmd parameter options; these haven't been mapped yet
+
+our %CMD = ('prlinks'   => 1,
+            'llinks'    => 1,
+            'llinkslib' => 1,
+            'lcheck'    => 1,
+            'ncheck'    => 1,
+            'neighbor'  => 1,
+            'neighbor_history'  => 1,
+            'acheck'    => 1,
+           );
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+	my ($term, $field, $reldate, $mindate, $maxdate, $datetype, $multi_id, $retstart,
+        $retmax, $report, $dbfrom, $cmd, $holding, $version, $retmode, $linkname) = 
+	  $self->_rearrange([qw(TERM FIELD RELDATE MINDATE MAXDATE DATETYPE MULTI_ID
+        RETSTART RETMAX REPORT DBFROM CMD HOLDING VERSION LINKNAME)], @args);
+    # set by default
+    $self->_eutil($EUTIL);
+    # defaults which can be overridden
+    # Note : retmode should be 'xml' for all elink queries except when cmd=prlinks
+    $datetype ||= 'mdat';
+    $self->datetype($datetype);
+    $version ||= $DTDVERSION; # DTD to use, should leave alone
+    $self->version($version);
+    # normal settings
+    $term       && $self->term($term);
+    $field      && $self->field($field);
+    $reldate    && $self->reldate($reldate);
+    $mindate    && $self->mindate($mindate);
+    $maxdate    && $self->maxdate($maxdate);
+    $retstart   && $self->retstart($retstart);
+    $retmax     && $self->retmax($retmax);
+    $report     && $self->report($report);
+    $dbfrom     && $self->dbfrom($dbfrom);
+    # validate cmd, otherwise don't set
+    $cmd        && exists $CMD{$cmd}  &&  $self->cmd($cmd);
+    $holding    && $self->holding($holding);
+    $linkname   && $self->linkname($linkname);
+	$multi_id	&& $self->multi_id($multi_id);
+    $self->{'_linksetindex'} = 0;
+    $self->{'_linksets'} = [];
+    $self->{'_ls_ct'} = 0;
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $elink->parse_response($content)
+ Function: parse out response for cookie and/or id's
+ Returns : none
+ Args    : HTTP::Response object
+ Throws  : 'NCBI elink nonrecoverable error'
+           'No links' error
+
+=cut
+
+sub parse_response {
+	# to add: parsing for dbfrom/dbto ids, tagging cookies with databases
+    my $self    = shift;
+    my $response = shift if @_;
+    if (!$response || !$response->isa("HTTP::Response")) {
+        $self->throw("Need HTTP::Response object");
+    }
+    my $xs = XML::Simple->new();
+    my $simple = $xs->XMLin($response->content,
+            forcearray => [qw(LinkSet LinkSetDb LinkSetDbHistory Link)]);
+    # check for errors
+    if (exists $simple->{ERROR}) {
+        $self->throw("NCBI elink nonrecoverable error: ".$simple->{ERROR});
+    }
+	#$self->debug("Response dumper:\n".Dumper($simple));
+    my $cmd = $self->cmd ? $self->cmd : 'neighbor'; # set default cmd
+    # process possible cookies first
+    if (defined($cmd) && $cmd eq 'neighbor_history') {
+        # process each LinkSet hash, one at at time;  
+        # No scores when using history (only ids)
+        if (!exists $simple->{LinkSet} ) {
+            $self->warn('No link history');
+        }
+        for my $linkset (@{ $simple->{LinkSet} }) {
+            my $webenv = $linkset->{WebEnv};
+            my $dbfrom =  $linkset->{DbFrom};
+            my $from_ids = $linkset->{IdList}->{Id};
+            if (!ref($from_ids)) {
+                my $tmp = $from_ids;
+                $from_ids = [$tmp];
+            }
+            for my $history (@{ $linkset->{LinkSetDbHistory} }) {
+                my $query_key = $history->{QueryKey};
+                next if (!$query_key || (exists $history->{Info} eq 'Empty result') );
+                my $lname = $history->{LinkName};
+                my $db = $history->{DbTo};
+                my $cookie = Bio::DB::EUtilities::Cookie->new(
+                                        -verbose   => $self->verbose,
+                                        -webenv    => $webenv,
+                                        -querykey  => $query_key,
+                                        -eutil     => 'elink',
+                                        -database  => $db,
+                                        -dbfrom    => $dbfrom,
+                                        -query_id  => $from_ids,
+                                        -linkname  => $lname,
+                                        );
+                $self->add_cookie($cookie);
+            }
+        }
+        return;
+    }
+    elsif ($cmd eq 'neighbor' || !$cmd) {
+        if (!exists $simple->{LinkSet}) {
+            $self->warn('No returned links.');
+            return;
+        }
+        for my $linkset (@{ $simple->{LinkSet} }) {
+            my $linkobj = Bio::DB::EUtilities::ElinkData->new
+                                (-verbose => $self->verbose,
+                                 -command =>$cmd);
+            my $status = $linkobj->_add_set($linkset);
+            $self->_add_linkset($linkobj) if $status;
+        }
+    } else {
+        $self->debug("$cmd not yet supported; no parsing occurred");
+        return;
+        # need to add a few things for cmd=llinks
+    }
+}
+
+=head2 multi_id
+
+ Title   : multi_id
+ Usage   : $elink->multi_id(1);
+ Function: gets/sets value (switch for using multiple ids)
+ Returns : Boolean (value evaluating to true or false)
+ Args    : Boolean (value evaluating to true or false)
+
+=cut
+
+sub multi_id {
+	my $self = shift;
+	return $self->{'_multi_id'} = shift if @_;
+	return $self->{'_multi_id'};
+}
+
+=head2 next_linkset
+
+ Title   : next_linkset
+ Usage   : $ls = $elink->next_linkset;
+ Function: returns next linkset in internal cache of 
+         : Bio::DB::EUtilities::ElinkData objects
+ Returns : Boolean (value evaluating to true or false)
+ Args    : Boolean (value evaluating to true or false)
+
+=cut
+
+sub next_linkset {
+    my $self = shift;
+    my $index = $self->_next_linkset_index;
+    return if ($index > scalar($self->{'_linksets'}));
+    return $self->{'_linksets'}->[$index] ;
+}
+
+=head2 get_all_linksets
+
+ Title   : get_all_linksets
+ Usage   : @ls = $elink->get_all_linksets;
+ Function: returns array of Bio::DB::EUtilities::ElinkData objects
+ Returns : array or array ref of Bio::DB::EUtilities::ElinkData objects
+           based on wantarray
+ Args    : None
+
+=cut
+
+sub get_all_linksets {
+    my $self = shift;
+    return @{ $self->{'_linksets'} } if wantarray;
+    return $self->{'_linksets'};
+}
+
+=head2 reset_linksets
+
+ Title   : reset_linksets
+ Usage   : $elink->reset_linksets;
+ Function: resets (empties) internal cache of Linkset objects
+ Returns : None
+ Args    : None
+
+=cut
+
+sub reset_linksets{
+    my $self = shift;
+    $self->{'_linksets'} = [];
+    $self->rewind_linksets;
+    $self->{'_ls_ct'} = 0;
+}
+
+=head2 rewind_linksets
+
+ Title   : rewind_linksets
+ Usage   : $elink->rewind_linksets;
+ Function: resets linkset index to 0 (starts over)
+ Returns : None
+ Args    : None
+
+=cut
+
+sub rewind_linksets{
+    my $self = shift;
+    $self->{'_linksetindex'} = 0;
+}
+
+=head2 get_linkset_count
+
+ Title   : get_linkset_count
+ Usage   : $ct = $elink->get_linkset_count;
+ Function: returns total # of linksets in Elink object
+ Returns : Integer (# linksets)
+ Args    : None
+
+=cut
+
+sub get_linkset_count {
+    my $self = shift;
+    return $self->{'_ls_ct'};
+}
+
+# holds and changes linkset index for next_linkset
+
+sub _next_linkset_index {
+    my $self = shift;
+    return $self->{'_linksetindex'}++;
+}
+
+# private method : parse linkset data and add ElinkData objects to linkset cache
+
+sub _add_linkset {
+    my $self = shift;
+    if (@_) {
+        my $data_links = shift;
+        $self->throw("Expecting a Bio::DB::EUtilities::ElinkData, got $data_links.")
+          unless $data_links->isa("Bio::DB::EUtilities::ElinkData");
+        push @{ $self->{'_linksets'} }, $data_links;
+        $self->{'_ls_ct'}++;
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/epost.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/epost.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/epost.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,172 @@
+# $Id: epost.pm,v 1.7.4.3 2006/11/10 16:48:09 cjfields Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::epost
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::epost - posting IDs on the remote NCBI server for batch
+retrieval and chained queries
+
+=head1 SYNOPSIS
+
+    my $epost = Bio::DB::EUtilities->new(
+                                          -eutil    => 'epost',
+                                          -id       => \@ids,
+                                          -db       => 'protein',
+                                          );
+
+    $epost->get_response;
+
+=head1 DESCRIPTION
+
+B<WARNING>: Please do B<NOT> spam the Entrez web server with multiple requests.
+
+The EUtility EPost is used to post a list of primary IDs to the NCBI EUtilities
+server for retrieval by L<EFetch|Bio::DB::EUtilities::efetch> or for using in
+futher searches using L<ELink|Bio::DB::EUtilities::elink> or
+L<ESearch|Bio::DB::EUtilities::esearch>.  The data is posted using:
+
+    $epost->get_response;
+
+When not used in void context, this will also return a
+L<HTTP::Response|HTTP::Response> object for further processing.  This is not
+necessary, as any posts made will automatically generate a
+L<Cookie|Bio::DB::EUtilities::Cookie>,
+which can be used to retrieve the posted information using
+L<EFetch|Bio::DB::EUtilities::efetch>.
+
+Using EPost is recommended for retrieving large lists of primary IDs and is
+capable, when used repeatedly and in combination with EFetch, of retrieving
+thousands of database entries.  
+
+=head2 Parameters
+
+The following are a general list of parameters that can be used to take
+advantage of EPost.  Up-to-date help for EPost is available at this URL
+(the information below is a summary of the options found there):
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
+
+=over 3
+
+=item C<db>
+
+The name of an Entrez database available through EUtilities. 
+
+=item C<id>
+
+a list of primary ID's
+
+Below are a list of IDs which can be used with EPost:
+
+B<PMID> (pubmed), B<MEDLINE UI> (NIH MedLine), B<MIM number> (omim),
+B<GI number> (nucleotide, protein), B<MMDB-ID> (structure), B<TAXID> (taxonomy)
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::epost;
+use strict;
+use warnings;
+use Bio::DB::EUtilities::Cookie;
+use XML::Simple;
+#use Data::Dumper;
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'epost';
+our $RETMODE = 'xml';
+
+sub _initialize {
+    my ($self, @args) = @_;
+    $self->SUPER::_initialize(@args);
+    # set by default
+    $self->_eutil($EUTIL);
+    $self->retmode($RETMODE);
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+sub parse_response {
+    my $self    = shift;
+    my $response = shift if @_;
+    if (!$response || !($response->isa("HTTP::Response"))) {
+        $self->throw("Need HTTP::Response object");
+    }
+    my $xs = XML::Simple->new();
+    my $simple = $xs->XMLin($response->content);
+    #$self->debug("Response dumper:\n".Dumper($simple));
+    # check for errors
+    if ($simple->{ERROR}) {
+        $self->throw("NCBI epost nonrecoverable error: ".$simple->{ERROR});
+    }
+    if ($simple->{InvalidIdList}) {
+        $self->warn("NCBI epost error: Invalid ID List".$simple->{InvalidIdList});
+    }
+    my $db = $self->db;
+    my $webenv    = $simple->{WebEnv};
+    my $querykey  = $simple->{QueryKey};
+    my $cookie = Bio::DB::EUtilities::Cookie->new(-webenv   => $webenv,
+                                                  -querykey => $querykey,
+                                                  -eutil    => 'epost',
+                                                  -database => $db,
+                                                  );
+    $self->add_cookie($cookie);
+    return $response;
+}
+
+1;
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esearch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esearch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esearch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,176 @@
+# $Id: esearch.pm,v 1.11.4.2 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::esearch
+#
+# Cared for by Chris Fields
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Part of the EUtilities BioPerl package
+
+=head1 NAME
+
+Bio::DB::EUtilities::esearch - Base interface class for handling web
+queries and data retrieval from Entrez Utilities from NCBI.
+You shouldn't use this class directly.
+
+=head1 SYNOPSIS
+
+*** Give standard usage here
+
+=head1 DESCRIPTION
+
+*** Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::esearch;
+use strict;
+use warnings;
+use Bio::DB::EUtilities::Cookie;
+use XML::Simple;
+#use Data::Dumper;
+
+use vars qw($EUTIL);
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'esearch';
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+	my ($term, $field, $reldate, $mindate, $maxdate, $datetype, $rettype, $retstart, 
+        $retmax, $sort, $usehistory) = 
+	  $self->_rearrange([qw(TERM FIELD RELDATE MINDATE MAXDATE DATETYPE RETTYPE
+        RETSTART RETMAX SORT USEHISTORY)],
+		@args);    
+    # set by default
+    $self->_eutil($EUTIL);
+    $datetype ||= 'mdat';
+    $self->datetype($datetype) if $datetype;
+	$term			&& $self->term($term);
+	$field			&& $self->field($field);
+	$reldate		&& $self->reldate($reldate);
+	$mindate		&& $self->mindate($mindate);
+	$maxdate		&& $self->maxdate($maxdate);
+    $retstart       && $self->retstart($retstart);
+    $retmax         && $self->retmax($retmax);
+    $rettype        && $self->rettype($rettype);
+    $sort           && $self->sort_results($sort);
+	$usehistory		&& $self->usehistory($usehistory);
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+sub parse_response {
+    my $self    = shift;
+    my $response = shift if @_;
+    if (!$response || !$response->isa("HTTP::Response")) {
+        $self->throw("Need HTTP::Response object");
+    }
+    my $history = $self->usehistory;
+    my $db = $self->db;
+    my $xs = XML::Simple->new();
+    my $simple = $xs->XMLin($response->content);
+    #$self->debug("Response dumper:\n".Dumper($simple));
+    # check for major and minor errors and warnings
+    if ($simple->{ERROR}) {
+        $self->throw("NCBI esearch nonrecoverable error: ".$simple->{ERROR});
+    }
+    if ($simple->{ErrorList} || $simple->{WarningList}) {
+        my %errorlist = %{ $simple->{ErrorList} };
+        my %warninglist = %{ $simple->{WarningList} };
+        my ($err_warn);
+        for my $key (sort keys %errorlist) {
+            $err_warn .= "Error : $key = $errorlist{$key}\n";
+        }    
+        for my $key (sort keys %warninglist) {
+            $err_warn .= "Warning : $key = $warninglist{$key}\n";
+        }
+        chomp($err_warn);
+        $self->warn("NCBI esearch Errors/Warnings:\n".$err_warn)
+    }
+	my $count = $simple->{Count};
+	$self->esearch_count($count);
+    my $id_ref = $simple->{IdList}->{Id};
+    $self->_add_db_ids($id_ref) if ($id_ref);
+    if ($history && $history eq 'y') {
+        my $webenv = $simple->{WebEnv};
+        my $querykey = $simple->{QueryKey};
+		my $cookie = Bio::DB::EUtilities::Cookie->new(
+										 -term 		=> $self->term,
+										 -webenv    => $webenv,
+										 -querykey  => $querykey,
+										 -eutil     => 'esearch',
+                                         -database  => $db,
+										 -total		=> $count
+										);
+        $self->add_cookie($cookie);
+	}
+}
+
+=head2 esearch_count
+
+ Title   : esearch_count
+ Usage   : $count = $db->esearch_count;
+ Function: return count of number of entries retrieved by query
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub esearch_count   {
+    my $self = shift;
+    return $self->{'_esearch_count'} = shift if @_;
+    return $self->{'_esearch_count'};
+}
+
+1;
+__END__
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esummary.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esummary.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities/esummary.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,336 @@
+# $Id: esummary.pm,v 1.8.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities::esummary
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+
+=head1 NAME
+
+Bio::DB::EUtilities::esummary - retrieval of NCBI DocSum data from a list
+of primary IDs or a Cookie
+
+=head1 SYNOPSIS
+
+B<Do not use this module directly.>
+Use it via the L<Bio::DB::EUtilities|Bio::DB::EUtilities> class.
+
+  use Bio::DB::EUtilities;
+
+  my $esearch = Bio::DB::EUtilities->new(-eutil      => 'esearch',
+                                         -db         => 'pubmed',
+                                         -term       => 'hutP',
+                                         -usehistory => 'y');
+
+  $esearch->get_response; # parse the response, fetch a cookie
+
+  my $esummary = Bio::DB::EUtilities->new(-eutil        => 'esummary',
+                                       -cookie       => $esearch->next_cookie);
+
+  print $esearch->get_response-content; # prints XML output
+
+=head1 DESCRIPTION
+
+B<WARNING>: Please do B<NOT> spam the Entrez web server with multiple requests.
+
+The EUtility ESummary is used to retrieve ducument summaries from a list of
+primary IDs or the user's history (stored on the remote server and accessible
+using a L<Cookie|Bio::DB::EUtilities::Cookie>.  The returned data is processed
+for errors, but no further processing is done at this time.
+
+=over 3
+
+=item C<db>
+
+one or more database available through EUtilities if set to 'all', will retrieve
+all related ID's from each database (see method get_db_ids to retrieve these)
+
+=item C<id>
+
+a list of primary ID's (see below)
+
+=item C<reldate>
+
+limits results to the number of days preceding today's date
+
+=item C<mindate>, C<maxdate>
+
+limits results by dates (C<yyyy/mm/dd> format, or by year)
+
+=item C<term>
+
+limits results by Entrez query (only valid when C<cmd=neighbor> within a single
+database)
+
+=item C<retmode>
+
+set to XML, but can be changed to ref when needed
+
+=item C<cookie>
+
+a Bio::DB::EUtilities::Cookie object (see below)
+
+=item C<cmd>
+
+command values (see below)
+
+=item C<holding>
+
+list LinkOut URLs for specified holding provider; used with C<cmd=llinks>
+or C<cmd=llinkslib> (rarely used)
+
+=back
+
+=head2 Command Values
+
+Command values are set using the C<cmd> parameter.  
+
+=over 3
+
+=item C<prlinks>
+
+List the hyperlink to the primary LinkOut provider for multiple IDs and
+database. Each ID is processed separately.
+
+=item C<prlinks&retmode=ref>
+
+Create a hyperlink to the primary LinkOut provider for a single ID and database.
+
+=item C<llinks>
+
+List LinkOut URLs and Attributes, except PubMed libraries, for multiple IDs
+and database. Each ID is processed separately.
+
+=item C<llinkslib>
+
+List LinkOut URLs and Attributes for multiple IDs and database.  Each ID is
+processed separately.
+
+=item C<lcheck>
+
+Check for the existence (Y or N) of an external link in for multiple IDs and
+database.
+
+=item C<ncheck>
+
+Check for the existence of a neighbor link for each ID within a database,
+e.g., Related Articles in PubMed.
+
+=item C<neighbor>
+
+The default setting. Display neighbors and their scores within a database.
+
+=item C<neighbor_history>
+
+Create history (WebEnv & query_key) for use in other EUtilities.
+
+=item C<acheck>
+
+Lists Entrez databases links for multiple IDs from a single database.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the 
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities::esummary;
+use strict;
+use warnings;
+use XML::Simple;
+#use Data::Dumper;
+
+use base qw(Bio::DB::EUtilities);
+
+our $EUTIL = 'esummary';
+
+sub _initialize {
+    my ($self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    # set by default
+    $self->_eutil($EUTIL);
+	my ($retstart, $retmax) =  $self->_rearrange([qw(RETSTART RETMAX)], at args);
+    $retstart       && $self->retstart($retstart);
+    $retmax         && $self->retmax($retmax);
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->parse_response($content)
+ Function: parse out response for cookie and/or id's
+ Returns : none
+ Args    : HTTP::Response object
+ Throws  : 'NCBI elink nonrecoverable error'
+
+=cut
+
+sub parse_response {
+    my $self    = shift;
+    my $response = shift if @_;
+    if (!$response || !$response->isa("HTTP::Response")) {
+        $self->throw("Need HTTP::Response object");
+    }
+    my $xs = XML::Simple->new();
+    my $simple = $xs->XMLin($response->content);
+    #$self->debug("Response dumper:\n".Dumper($simple));
+    # check for errors
+    if ($simple->{ERROR}) {
+        $self->throw("NCBI esummary nonrecoverable error: ".$simple->{ERROR});
+    }    
+}
+
+=head2 Methods inherited from L<Bio::DB::EUtilities|Bio::DB::EUtilities>
+
+=head3 add_cookie
+
+ Title   : cookie
+ Usage   : $db->add_cookie($cookie)
+ Function: adds an NCBI query cookie to the internal cookie queue
+ Returns : none
+ Args    : a Bio::DB::EUtilities::Cookie object
+
+=cut
+
+=head3 next_cookie
+
+ Title   : next_cookie
+ Usage   : $cookie = $db->next_cookie
+ Function: return a cookie from the internal cookie queue
+ Returns : a Bio::DB::EUtilities::Cookie object
+ Args    : none
+
+=cut
+
+=head3 reset_cookies
+
+ Title   : reset_cookie
+ Usage   : $db->reset_cookie
+ Function: resets the internal cookie queue
+ Returns : none
+ Args    : none
+
+=cut
+
+=head3 get_all_cookies
+
+ Title   : get_all_cookies
+ Usage   : @cookies = $db->get_all_cookies
+ Function: retrieves all cookies from the internal cookie queue; this leaves
+           the cookies in the queue intact 
+ Returns : none
+ Args    : none
+
+=cut
+
+=head3 get_response
+
+ Title   : get_response
+ Usage   : $db->get_response($content)
+ Function: main method to retrieve data stream; parses out response for cookie
+ Returns : HTTP::Response object
+ Args    : optional : Bio::DB::EUtilities::Cookie from a previous search
+ Throws  : 'not a cookie' exception, response errors (via HTTP::Response)
+
+=cut
+
+=head3 reset_parameters 
+
+ Title   : reset_parameters
+ Usage   : $db->reset_parameters(@args);
+ Function: resets the parameters for a EUtility with args (in @args)
+ Returns : none
+ Args    : array of arguments (arg1 => value, arg2 => value)
+
+B<Experimental method at this time>
+
+=cut
+
+=head3 count
+
+ Title   : count
+ Usage   : $count = $db->count;
+ Function: return count of number of entries retrieved by query
+ Returns : integer
+ Args    : none
+
+=cut
+
+=head3 get_db_ids
+
+ Title   : get_db_ids
+ Usage   : $count = $elink->get_db_ids($db); # gets array ref of IDs
+           @count = $elink->get_db_ids($db); # gets array of IDs
+           %hash  = $elink->get_db_ids(); # hash of databases (keys) and array_refs(value)
+ Function: returns an array or array ref if a database is the argument,
+           otherwise returns a hash of the database (keys) and id_refs (values)
+ Returns : array or array ref of ids (arg=database) or hash of
+           database-array_refs (no args)
+ Args    : database string;
+
+=cut
+
+=head3 get_score
+
+ Title   : get_score
+ Usage   : $score = $db->get_score($id);
+ Function: gets score for ID (if present)
+ Returns : integer (score) 
+ Args    : ID values
+
+=cut
+
+=head3 get_ids_by_score
+
+ Title   : get_ids_by_score
+ Usage   : @ids = $db->get_ids_by_score;  # returns IDs
+           @ids = $db->get_ids_by_score($score); # get IDs by score
+ Function: returns ref of array of ids based on relevancy score from elink;
+           To return all ID's above a score, use the normal score value;
+           to return all ID's below a score, append the score with '-';
+ Returns : ref of array of ID's; if array, an array of IDs
+ Args    : integer (score value); returns all if no arg provided
+
+=cut
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EUtilities.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,731 @@
+# $Id: EUtilities.pm,v 1.24.4.3 2006/11/23 12:36:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Interfaces with new GenericWebDBI interface 
+
+=head1 NAME
+
+Bio::DB::EUtilities - interface for handling web queries and data
+retrieval from Entrez Utilities at NCBI.
+
+=head1 SYNOPSIS
+
+use Bio::DB::EUtilities;
+
+  my $esearch = Bio::DB::EUtilities->new(-eutil      => 'esearch',
+                                         -db         => 'pubmed',
+                                         -term       => 'hutP',
+                                         -usehistory => 'y');
+
+  $esearch->get_response; # parse the response, fetch a cookie
+
+  my $elink = Bio::DB::EUtilities->new(-eutil        => 'elink',
+                                       -db           => 'protein',
+                                       -dbfrom       => 'pubmed',
+                                       -cookie       => $esearch->next_cookie,
+                                       -cmd          => 'neighbor_history');
+
+  $elink->get_response; # parse the response, fetch the next cookie
+
+  my $efetch = Bio::DB::EUtilities->new(-cookie       => $elink->next_cookie,
+                                        -retmax       => 10,
+                                        -rettype      => 'fasta');
+
+  print $efetch->get_response->content;
+
+=head1 DESCRIPTION
+
+WARNING: Please do B<NOT> spam the Entrez web server with multiple requests.
+NCBI offers Batch Entrez for this purpose, now accessible here via epost!
+
+This is a test interface to the Entrez Utilities at NCBI.  The main purpose of this
+is to enable access to all of the NCBI databases available through Entrez and
+allow for more complex queries.  It is likely that the API for this module as
+well as the documentation will change dramatically over time. So, novice users
+and neophytes beware!
+
+The experimental base class is L<Bio::DB::GenericWebDBI|Bio::DB::GenericWebDBI>,
+which as the name implies enables access to any web database which will accept
+parameters.  This was originally born from an idea to replace
+WebDBSeqI/NCBIHelper with a more general web database accession tool so one
+could access sequence information, taxonomy, SNP, PubMed, and so on.
+However, this may ultimately prove to be better used as a replacement for
+L<LWP::UserAgent|LWP::UserAgent> when ccessing NCBI-related web tools
+(Entrez Utilitites, or EUtilities).  Using the base class GenericWebDBI,
+one could also build web interfaces to other databases to access anything
+via CGI parameters.
+
+Currently, you can access any database available through the NCBI interface:
+
+  http://eutils.ncbi.nlm.nih.gov/
+
+At this point, Bio::DB::EUtilities uses the EUtilities plugin modules somewhat
+like Bio::SeqIO.  So, one would call the particular EUtility (epost, efetch,
+and so forth) upon instantiating the object using a set of parameters:
+
+  my $esearch = Bio::DB::EUtilities->new(-eutil      => 'esearch',
+                                         -db         => 'pubmed',
+                                         -term       => 'dihydroorotase',
+                                         -usehistory => 'y');
+
+The default EUtility (when C<eutil> is left out) is 'efetch'.  For specifics on
+each EUtility, see their respective POD (**these are incomplete**) or
+the NCBI Entrez Utilities page:
+
+  http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+
+At this time, retrieving the response is accomplished by using the method
+get_response (which also parses for cookies and other information, see below).
+This method returns an HTTP::Response object.  The raw data is accessed by using
+the object method C<content>, like so:
+
+  my $efetch = Bio::DB::EUtilities->new(-cookie       => $elink->next_cookie,
+                                        -retmax       => 10,
+                                        -rettype      => 'fasta');
+
+  print $efetch->get_response->content;
+
+Based on this, if one wanted to retrieve sequences or other raw data
+but was not interested in directly using Bio* objects (such as if
+genome sequences were to be retrieved) one could do so by using the
+proper EUtility object(s) and query(ies) and get the raw response back
+from NCBI through 'efetch'.  
+
+A great deal of the documentation here will likely end up in the form
+of a HOWTO at some future point, focusing on getting data into Bioperl
+objects.
+
+=head2 Cookies
+
+Some EUtilities (C<epost>, C<esearch>, or C<elink>) retain information on
+the NCBI server under certain settings.  This information can be retrieved by
+using a B<cookie>.  Here, the idea of the 'cookie' is similar to the
+'cookie' set on a your computer when browsing the Web.  XML data returned
+by these EUtilities, when applicable, is parsed for the cookie information
+(the 'WebEnv' and 'query_key' tags to be specific)  The information along
+with other identifying data, such as the calling eutility, description
+of query, etc.) is stored as a
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie> object
+in an internal queue.  These can be retrieved one at a time by using
+the next_cookie method or all at once in an array using get_all_cookies.
+Each cookie can then be 'fed', one at a time, to another EUtility object,
+thus enabling chained queries as demonstrated in the synopsis.
+
+For more information, see the POD documentation for
+L<Bio::DB::EUtilities::Cookie|Bio::DB::EUtilities::Cookie>.
+
+=head1 TODO
+
+Resetting internal parameters is planned so one could feasibly reuse
+the objects once instantiated, such as if one were to use this as a
+replacement for LWP::UserAgent when retrieving responses i.e. when
+using many of the Bio::DB* NCBI-related modules.
+
+File and filehandle support to be added.
+
+Switch over XML parsing in most EUtilities to XML::SAX (currently
+use XML::Simple)
+
+Any feedback is welcome.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the 
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EUtilities;
+use strict;
+
+use vars qw($HOSTBASE %CGILOCATION $MAX_ENTRIES %DATABASE @PARAMS
+            $DEFAULT_TOOL @COOKIE_PARAMS @METHODS);
+use URI;
+#use Data::Dumper;
+
+use base qw(Bio::DB::GenericWebDBI);
+
+our $DEFAULT_TOOL = 'bioperl';
+    # default host base
+our $HOSTBASE = 'http://eutils.ncbi.nlm.nih.gov';
+    # map eutility to location
+our %CGILOCATION = (
+            'einfo'     => ['get'  => '/entrez/eutils/einfo.fcgi', 'xml'],
+            'epost'     => ['post' => '/entrez/eutils/epost.fcgi', 'xml'],
+            'efetch'    => ['get'  => '/entrez/eutils/efetch.fcgi', 'dbspec'],
+            'esearch'   => ['get'  => '/entrez/eutils/esearch.fcgi', 'xml'],
+            'esummary'  => ['get'  => '/entrez/eutils/esummary.fcgi', 'xml'],
+            'elink'     => ['get'  => '/entrez/eutils/elink.fcgi', 'xml'],
+            'egquery'   => ['get'  => '/entrez/eutils/egquery.fcgi', 'xml']
+             );
+    # map database to return mode
+our %DATABASE = ('pubmed'           => 'xml',
+                 'protein'          => 'text',
+                 'nucleotide'       => 'text',
+                 'nuccore'          => 'text',
+                 'nucgss'           => 'text',
+                 'nucest'           => 'text',
+                 'structure'        => 'text',
+                 'genome'           => 'text',
+                 'books'            => 'xml',
+                 'cancerchromosomes'=> 'xml',
+                 'cdd'              => 'xml',
+                 'domains'          => 'xml',
+                 'gene'             => 'asn1',
+                 'genomeprj'        => 'xml',
+                 'gensat'           => 'xml',
+                 'geo'              => 'xml',
+                 'gds'              => 'xml',
+                 'homologene'       => 'xml',
+                 'journals'         => 'text',
+                 'mesh'             => 'xml',
+                 'ncbisearch'       => 'xml',
+                 'nlmcatalog'       => 'xml',
+                 'omia'             => 'xml',
+                 'omim'             => 'xml',
+                 'pmc'              => 'xml',
+                 'popset'           => 'xml',
+                 'probe'            => 'xml',
+                 'pcassay'          => 'xml',
+                 'pccompound'       => 'xml',
+                 'pcsubstance'      => 'xml',
+                 'snp'              => 'xml',
+                 'taxonomy'         => 'xml',
+                 'unigene'          => 'xml',
+                 'unists'           => 'xml',
+                 );
+
+    our @PARAMS = qw(rettype usehistory term field tool reldate mindate
+            maxdate datetype retstart retmax sort seq_start seq_stop strand
+            complexity report dbfrom cmd holding version linkname retmode);
+    our @COOKIE_PARAMS = qw(db sort seq_start seq_stop strand complexity rettype
+            retstart retmax cmd linkname retmode);
+BEGIN {
+    our @METHODS = qw(rettype usehistory term field tool reldate mindate
+        maxdate datetype retstart retmax sort seq_start seq_stop strand
+        complexity report dbfrom cmd holding version linkname);
+    for my $method (@METHODS) {
+        eval <<END;
+sub $method {
+    my \$self = shift;
+    return \$self->{'_$method'} = shift if \@_;
+    return \$self->{'_$method'};
+}
+END
+    }
+}
+
+sub new {
+    my($class, at args) = @_;
+    if( $class =~ /Bio::DB::EUtilities::(\S+)/ ) {
+        my ($self) = $class->SUPER::new(@args);
+        $self->_initialize(@args);
+        return $self;
+    } else { 
+        my %param = @args;
+        @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+        my $eutil = $param{'-eutil'} || 'efetch';
+        return unless ($class->_load_eutil_module($eutil));
+        return "Bio::DB::EUtilities::$eutil"->new(@args);
+    }
+}
+
+sub _initialize {
+    my ($self, @args) = @_;
+    my ( $tool, $ids, $retmode, $verbose, $cookie, $keep_cookies) =
+      $self->_rearrange([qw(TOOL ID RETMODE VERBOSE COOKIE KEEP_COOKIES)],  @args);
+        # hard code the base address
+    $self->url_base_address($HOSTBASE);
+    $tool ||= $DEFAULT_TOOL;
+    $self->tool($tool);
+    $ids            && $self->id($ids);
+    $verbose        && $self->verbose($verbose);
+    $retmode        && $self->retmode($retmode);
+    $keep_cookies   && $self->keep_cookies($keep_cookies);
+    if ($cookie && ref($cookie) =~ m{cookie}i) {
+        $self->db($cookie->database) if !($self->db);
+        $self->add_cookie($cookie);
+    }
+    $self->{'_cookieindex'} = 0;
+    $self->{'_cookiecount'} = 0;
+    $self->{'_authentication'} = [];
+}
+
+=head2 add_cookie
+
+ Title   : cookie
+ Usage   : $db->add_cookie($cookie)
+ Function: adds an NCBI query cookie to the internal cookie queue
+ Returns : none
+ Args    : a Bio::DB::EUtilities::Cookie object
+
+=cut
+
+sub add_cookie {
+    my $self = shift;
+    if (@_) {
+        my $cookie = shift;
+        $self->throw("Expecting a Bio::DB::EUtilities::Cookie, got $cookie.")
+          unless $cookie->isa("Bio::DB::EUtilities::Cookie");
+        push @{$self->{'_cookie'}}, $cookie;
+    }
+    $self->{'_cookiecount'}++;
+}
+
+=head2 next_cookie
+
+ Title   : next_cookie
+ Usage   : $cookie = $db->next_cookie
+ Function: return a cookie from the internal cookie queue
+ Returns : a Bio::DB::EUtilities::Cookie object
+ Args    : none
+
+=cut
+
+sub next_cookie {
+    my $self = shift;
+    my $index = $self->_next_cookie_index;
+    if ($self->{'_cookie'}) {
+        return $self->{'_cookie'}->[$index];
+    } else {
+        $self->warn("No cookies left in the jar!");
+    }
+}
+
+=head2 reset_cookies
+
+ Title   : reset_cookies
+ Usage   : $db->reset_cookies
+ Function: resets (empties) the internal cookie queue
+ Returns : none
+ Args    : none
+
+=cut
+
+sub reset_cookies {
+    my $self = shift;
+    $self->{'_cookie'} = [];
+    $self->{'_cookieindex'} = 0;
+    $self->{'_cookiecount'} = 0;
+}
+
+=head2 get_all_cookies
+
+ Title   : get_all_cookies
+ Usage   : @cookies = $db->get_all_cookies
+ Function: retrieves all cookies from the internal cookie queue; this leaves
+           the cookies in the queue intact 
+ Returns : array of cookies (if wantarray) of first cookie
+ Args    : none
+
+=cut
+
+sub get_all_cookies {
+    my $self = shift;
+    return @{ $self->{'_cookie'} } if $self->{'_cookie'} && wantarray;
+    return $self->{'_cookie'}->[0] if $self->{'_cookie'} 
+}
+
+=head2 get_cookie_count
+
+ Title   : get_cookie_count
+ Usage   : $ct = $db->get_cookie_count
+ Function: returns # cookies in internal queue
+ Returns : integer 
+ Args    : none
+
+=cut
+
+sub get_cookie_count {
+    my $self = shift;
+    return $self->{'_cookiecount'};
+}
+
+=head2 rewind_cookies
+
+ Title   : rewind_cookies
+ Usage   : $elink->rewind_cookies;
+ Function: resets cookie index to 0 (starts over)
+ Returns : None
+ Args    : None
+
+=cut
+
+sub rewind_cookies {
+    my $self = shift;
+    $self->{'_cookieindex'} = 0;
+}
+
+
+=head2 keep_cookies
+
+ Title   : keep_cookies
+ Usage   : $db->keep_cookie(1)
+ Function: Flag to retain the internal cookie queue;
+           this is normally emptied upon using get_response
+ Returns : none
+ Args    : Boolean - value that evaluates to TRUE or FALSE
+
+=cut
+
+sub keep_cookies {
+    my $self = shift;
+    return $self->{'_keep_cookies'} = shift if @_;
+    return $self->{'_keep_cookies'};
+}
+
+=head2 parse_response
+
+ Title   : parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookies and other goodies
+ Returns : empty
+ Args    : none
+ Throws  : Not implemented (implemented in plugin classes)
+
+=cut
+
+sub parse_response {
+  my $self = shift;
+  $self->throw_not_implemented;
+}
+
+=head2 get_response
+
+ Title   : get_response
+ Usage   : $db->get_response($content)
+ Function: main method to submit request and retrieves a response
+ Returns : HTTP::Response object
+ Args    : None
+
+=cut
+
+sub get_response {
+    my $self = shift;
+    $self->_sleep; # institute delay policy
+    my $request = $self->_submit_request;
+	if ($self->authentication) {
+        $request->proxy_authorization_basic($self->authentication)
+    }
+    if (!$request->is_success) {
+        $self->throw(ref($self)." Request Error:".$request->as_string);
+    }
+    $self->reset_cookies if !($self->keep_cookies);
+    $self->parse_response($request);  # grab cookies and what not
+    return $request;
+}
+
+# not implemented yet
+#=head2 reset_parameters
+#
+# Title   : reset_parameters
+# Usage   : $db->reset_parameters(@args);
+# Function: resets the parameters for a EUtility with args (in @args)
+# Returns : none
+# Args    : array of arguments (arg1 => value, arg2 => value)
+#
+#=cut
+
+#sub reset_parameters {
+#    my $self = shift;
+#    my @args = @_;
+#    $self->reset_cookies; # no baggage allowed
+#    if ($self->can('next_linkset')) {
+#        $self->reset_linksets;
+#    }
+#    # resetting the EUtility will not occur even if added as a parameter;
+#    $self->_initialize(@args); 
+#}
+
+=head2 get_ids
+
+ Title   : get_ids
+ Usage   : $count = $elink->get_ids($db); # array ref of specific db ids
+           @ids   = $esearch->get_ids(); # array
+           $ids   = $esearch->get_ids(); # array ref
+ Function: returns an array or array ref of unique IDs.
+ Returns : array or array ref of ids 
+ Args    : Optional : database string if elink used (required arg if searching
+           multiple databases for related IDs)
+           Currently implemented only for elink object with single linksets
+
+=cut
+
+sub get_ids {
+    my $self = shift;
+    my $user_db = shift if @_;
+    if ($self->can('get_all_linksets')) {
+        my $querydb = $self->db;
+        if (!$user_db && ($querydb eq 'all' || $querydb =~ m{,}) ) {
+            $self->throw(q(Multiple databases searched; must use a specific ).
+                         q(database as an argument.) );
+        }
+        
+        my $count = $self->get_linkset_count;
+        if ($count == 0) {
+            $self->throw( q(No linksets!) );
+        }
+        elsif ($count == 1) {
+            my ($linkset) = $self->get_all_linksets;
+            my ($db) = $user_db ? $user_db : $linkset->get_all_linkdbs;
+            $self->_add_db_ids( scalar( $linkset->get_LinkIds_by_db($db) ) );
+        }
+        else {
+            $self->throw( q(Multiple linkset objects present; can't use get_ids.).
+                 qq(\nUse get_all_linksets/get_databases/get_LinkIds_by_db ).
+                 qq(\n$count total linksets ));
+        }
+    }
+    if ($self->{'_db_ids'}) {
+        return @{$self->{'_db_ids'}} if wantarray;
+        return $self->{'_db_ids'};
+    }
+}
+
+# carried over from NCBIHelper/WebDBSeqI
+
+=head2 delay_policy
+
+  Title   : delay_policy
+  Usage   : $secs = $self->delay_policy
+  Function: return number of seconds to delay between calls to remote db
+  Returns : number of seconds to delay
+  Args    : none
+
+  NOTE: NCBI requests a delay of 3 seconds between requests.  This method
+        implements that policy.
+
+=cut
+
+sub delay_policy {
+  my $self = shift;
+  return 3;
+}
+
+=head2 get_entrezdbs
+
+  Title   : get_entrezdbs
+  Usage   : @dbs = $self->get_entrezdbs;
+  Function: return list of all Entrez databases; convenience method
+  Returns : array or array ref (based on wantarray) of databases 
+  Args    : none
+
+=cut
+
+sub get_entrezdbs {
+    my $self = shift;
+    my $info = Bio::DB::EUtilities->new(-eutil => 'einfo');
+    $info->get_response;
+    # copy list, not ref of list (so einfo obj doesn't stick around)
+    my @databases = $info->einfo_dbs;
+    return @databases;
+}
+
+=head1 Private methods
+
+=cut
+
+#=head2 _add_db_ids
+#
+# Title   : _add_db_ids
+# Usage   : $self->add_db_ids($db, $ids);
+# Function: sets internal hash of databases with reference to array of IDs
+# Returns : none
+# Args    : String (name of database) and ref to array of ID's 
+#
+#=cut
+
+# used by esearch and elink, hence here
+
+sub _add_db_ids {
+    my ($self, $ids) = @_;
+    $self->throw ("IDs must be an ARRAY reference") unless ref($ids) =~ m{ARRAY}i;
+    my @ids = @{ $ids}; # deep copy
+    $self->{'_db_ids'} = \@ids; 
+}
+
+=head2 _eutil
+
+ Title   : _eutil
+ Usage   : $db->_eutil;
+ Function: sets eutil 
+ Returns : eutil
+ Args    : eutil
+
+=cut
+
+sub _eutil   {
+    my $self = shift;
+    return $self->{'_eutil'} = shift if @_;
+    return $self->{'_eutil'};
+}
+
+# _submit_request
+
+ #Title   : _submit_request
+ #Usage   : my $url = $self->_submit_request
+ #Function: builds request object based on set parameters
+ #Returns : HTTP::Request
+ #Args    : None
+
+#
+# as the name implies....
+
+sub _submit_request {
+    my $self = shift;
+    my %params = $self->_get_params;
+    my $eutil = $self->_eutil;
+    if ($self->id) {
+        # this is in case multiple id groups are present
+        if ($self->can('multi_id') && $self->multi_id) {
+            # multiple id groups if groups are together in an array reference
+            # ids and arrays are flattened into individual groups
+            for my $id_group (@{ $self->id }) {
+                if (ref($id_group) eq 'ARRAY') {
+                    push @{ $params{'id'} }, (join q(,), @{ $id_group });
+                }
+                elsif (!ref($id_group)) {
+                    push @{ $params{'id'} }, $id_group;
+                }
+                else {
+                    $self->throw("Unknown ID type: $id_group");
+                }
+            }
+        }
+        else {
+            my @ids = @{ $self->id };
+            $params{'id'} = join ',', @ids;
+        }
+    }
+    my $url = URI->new($HOSTBASE . $CGILOCATION{$eutil}[1]);
+    $url->query_form(%params);
+    $self->debug("The web address:\n".$url->as_string."\n");
+    if ($CGILOCATION{$eutil}[0] eq 'post') {    # epost request
+        return $self->post($url);
+    } else {                                    # all other requests
+        return $self->get($url);
+    }
+}
+
+# _get_params
+
+# Title   : _get_params
+# Usage   : my $url = $self->_get_params
+# Function: builds parameter list for web request
+# Returns : hash of parameter-value paris
+# Args    : None
+
+# these get sorted out in a hash originally but end up in an array to
+# deal with multiple id parameters (hash values would kill that)
+
+sub _get_params {
+    my $self = shift;
+    my $cookie = $self->get_all_cookies ? $self->get_all_cookies : 0;
+    my @final;  # final parameter list; this changes dep. on presence of cookie
+    my $eutil = $self->_eutil;
+    my %params;
+    @final =  ($cookie && $cookie->isa("Bio::DB::EUtilities::Cookie")) ?
+              @COOKIE_PARAMS : @PARAMS;
+              
+    # build parameter hash based on final parameter list
+    for my $method (@final) {
+        if ($self->$method) {
+            $params{$method} = $self->$method;
+        }
+    }
+    
+    if ($cookie) {
+        my ($webenv, $qkey) = @{$cookie->cookie};
+        $self->debug("WebEnv:$webenv\tQKey:$qkey\n");
+        ($params{'WebEnv'}, $params{'query_key'}) = ($webenv, $qkey);
+        $params{'dbfrom'} = $cookie->database if $eutil eq 'elink';
+    }
+    
+    my $db = $self->db;
+    
+    # elink cannot set the db from a cookie (it is actually dbfrom)
+    $params{'db'} = $db                                   ? $db               : 
+                    ($cookie && $eutil ne 'elink') ? $cookie->database :
+                    'nucleotide';
+    # einfo db exception (db is optional)
+    if (!$db && ($eutil eq 'einfo' || $eutil eq 'egquery')) {
+        delete $params{'db'};
+    }
+    unless (exists $params{'retmode'}) { # set by user
+        my $format = $CGILOCATION{ $eutil }[2];  # set by eutil 
+        if ($format eq 'dbspec') {  # database-specific
+            $format = $DATABASE{$params{'db'}} ?
+                      $DATABASE{$params{'db'}} : 'xml'; # have xml as a fallback
+        }
+        $params{'retmode'} = $format;
+    }
+    $self->debug("Param: $_\tValue: $params{$_}\n") for keys %params;
+    return %params;
+}
+
+# enable dynamic loading of proper module at run time
+
+sub _load_eutil_module {
+  my ($self,$eutil) = @_;
+  my $module = "Bio::DB::EUtilities::" . $eutil;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+      print STDERR <<END;
+$self: $eutil cannot be found
+Exception $@
+For more information about the EUtilities system please see the EUtilities docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+sub _next_cookie_index {
+    my $self = shift;
+    return $self->{'_cookieindex'}++;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/EntrezGene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/EntrezGene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/EntrezGene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,200 @@
+# $Id: EntrezGene.pm,v 1.8.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::EntrezGene
+#
+# Cared for by Brian Osborne bosborne at alum.mit.edu
+#
+# Copyright Brian Osborne
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::EntrezGene - Database object interface to Entrez Gene
+
+=head1 SYNOPSIS
+
+    use Bio::DB::EntrezGene;
+
+    my $db = Bio::DB::EntrezGene->new;
+
+    my $seq = $db->get_Seq_by_id(2); # Gene id
+
+    # or ...
+
+    my $seqio = $db->get_Stream_by_id([2, 4693, 3064]); # Gene ids
+    while ( my $seq = $seqio->next_seq ) {
+	    print "id is ", $seq->display_id, "\n";
+    }
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of Sequence objects from the 
+Entrez Gene database at NCBI, via an Entrez query using Gene ids.
+
+This module requires the CPAN Bio::ASN1 module.
+
+WARNING: Please do NOT spam the Entrez web server with multiple requests.
+NCBI offers Batch Entrez for this purpose.
+
+=head1 NOTES
+
+The Entrez eutils API does not allow Entrez Gene queries by name as
+of this writing, therefore there are only get_Seq_by_id and 
+get_Stream_by_id methods in this module, and these expect Gene ids. 
+There are no get_Seq_by_acc or get_Stream_by_acc methods.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Brian Osborne
+
+Email bosborne at alum.mit.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::EntrezGene;
+use strict;
+use vars qw($DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING);
+
+use base qw(Bio::DB::NCBIHelper);
+BEGIN { 
+    $DEFAULTMODE   = 'single';
+    $DEFAULTFORMAT = 'asn.1';	    
+    %PARAMSTRING = ('batch'  => {'db'     => 'gene',
+											 'usehistory' => 'y',
+											 'tool'   => 'bioperl',
+											 'retmode' => 'asn.1'},
+						   'gi'     => {'db'     => 'gene',
+											 'usehistory' => 'y',
+											 'tool'   => 'bioperl',
+											 'retmode' => 'asn.1'},
+						  'version' => {'db'     => 'gene',
+											 'usehistory' => 'y',
+											 'tool'   => 'bioperl',
+											 'retmode' => 'asn.1'},
+						  'single'  => {'db'     => 'gene',
+											 'usehistory' => 'y',
+											 'tool'   => 'bioperl',
+											 'retmode' => 'asn.1'} );
+}
+
+# the new way to make modules a little more lightweight
+sub new {
+  my($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+  # Seems that Bio::SeqIO::entrezgene requires this:
+  $self->{_retrieval_type} = "tempfile"; 
+  $self->request_format($self->default_format);
+  return $self;
+}
+
+=head2 get_params
+
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to NCBI database
+           for either 'batch' or 'single' sequence retrieval method
+ Returns : A key,value pair hash
+ Args    : 'single' or 'batch' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    return defined $PARAMSTRING{$mode} ? %{$PARAMSTRING{$mode}} : 
+		%{$PARAMSTRING{$DEFAULTMODE}};
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+	return $DEFAULTFORMAT;
+}
+
+# from Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
+
+=head1 Routines from Bio::DB::WebDBSeqI and Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id(2)
+ Function: Gets a Bio::Seq object by its name
+ Returns : A Bio::Seq object
+ Args    : Gene id
+ Throws  : "id does not exist" exception
+
+=head1 Routines implemented by Bio::DB::NCBIHelper
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$gid1, $gid2] );
+  Function: Gets a series of Seq objects using Gene ids
+  Returns : A Bio::SeqIO stream object
+  Args    : A reference to an array of Gene ids
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my $format = $self->request_format;
+           $self->request_format($format);
+ Function: Get or set sequence format retrieval
+ Returns : String representing format
+ Args    : $format = sequence format
+
+=cut
+
+# override to force format
+sub request_format {
+    my ($self) = @_;
+    return $self->SUPER::request_format($self->default_format());
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression/geo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression/geo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression/geo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,281 @@
+=head1 NAME
+
+Bio::DB::Expression::geo - *** DESCRIPTION of Class
+
+=head1 SYNOPSIS
+
+*** Give standard usage here
+
+=head1 DESCRIPTION
+
+*** Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a '_'.  Methods are
+in alphabetical order for the most part.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::Expression::geo;
+use strict;
+use base qw(Bio::DB::Expression);
+
+use Bio::Expression::Contact;
+use Bio::Expression::DataSet;
+use Bio::Expression::Platform;
+use Bio::Expression::Sample;
+
+use constant URL_PLATFORMS => 'http://www.ncbi.nlm.nih.gov/geo/query/browse.cgi?pgsize=100000&mode=platforms&submitter=-1&filteron=0&filtervalue=-1&private=1&sorton=pub_date&sortdir=1&start=1';
+use constant URL_PLATFORM => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
+use constant URL_DATASET => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
+use constant URL_SAMPLE => 'http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?form=text&view=full&acc=';
+
+=head2 _initialize()
+
+ Usage   : $obj->_initialize(%arg);
+ Function: Internal method to initialize a new Bio::DB::Expression::geo object
+ Returns : true on success
+ Args    : Arguments passed to new()
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  foreach my $arg (keys %arg){
+    my $marg = $arg;
+    $marg =~ s/^-//;
+    $self->$marg($arg{$arg}) if $self->can($marg);
+  }
+
+  return 1;
+}
+
+=head2 get_platforms()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Platform objects
+ Args    :
+
+=cut
+
+sub get_platforms {
+  my ($self, at args) = @_;
+
+  my $doc = $self->_get_url( URL_PLATFORMS );
+  $doc =~ s!^.+?>Release date<.+?</tr>(.+)</table>!$1!gs;
+
+  my @platforms = ();
+  my @records = split m!</tr>\s+<tr>!, $doc;
+
+  foreach my $record ( @records ) {
+    my ($platform_acc,$name,$tax_acc,$contact_acc,$contact_name) =
+      $record =~ m!acc\.cgi\?acc=(.+?)".+?<td.+?>(.+?)<.+?<td.+?>.+?<.+?<td.+?>.+?href=".+?id=(.+?)".+?<td.+?OpenSubmitter\((\d+?)\).+?>(.+?)<!s;
+    next unless $platform_acc;
+
+    my $platform = Bio::Expression::Platform->new(
+                                                  -accession => $platform_acc,
+                                                  -name => $name,
+                                                  -_taxon_id => $tax_acc,
+                                                  -contact => Bio::Expression::Contact->new(
+                                                                                            -source => 'geo',
+                                                                                            -accession => $contact_acc,
+                                                                                            -name => $contact_name,
+                                                                                            -db => $self
+                                                                                           ),
+                                                  -db => $self,
+                                                 );
+    push @platforms, $platform;
+  }
+
+  return @platforms;
+}
+
+=head2 get_samples()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Sample objects
+ Args    :
+
+=cut
+
+sub get_samples {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_contacts()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Contact objects
+ Args    :
+
+=cut
+
+sub get_contacts {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_datasets()
+
+ Usage   : $db->get_datasets('accession');
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::DataSet objects
+ Args    :
+
+=cut
+
+sub get_datasets {
+  my ($self,$platform) = @_;
+
+  my @lines = split /\n/, $self->_get_url( URL_PLATFORM . $platform->accession );
+
+  my @datasets = ();
+
+  foreach my $line ( @lines ) {
+    my ($dataset_acc) = $line =~ /^\!Platform_series_id = (\S+?)\s*$/;
+    next unless $dataset_acc;
+
+    my $dataset = Bio::Expression::DataSet->new(
+                                                -accession => $dataset_acc,
+                                                -platform => $platform,
+                                                -db => $self,
+                                               );
+
+    push @datasets, $dataset;
+  }
+
+  return @datasets;
+}
+
+sub fill_sample {
+  my ( $self, $sample ) = @_;
+
+  my @lines = split /\n/, $self->_get_url( URL_SAMPLE. $sample->accession );
+
+  foreach my $line ( @lines ) {
+    if ( my ($name) = $line =~ /^\!Sample_title = (.+?)\s*$/ ) {
+      $sample->name( $name );
+    }
+    elsif ( my ($desc) = $line =~ /^\!Sample_characteristics.*? = (.+?)\s*$/ ) {
+      $sample->description( $desc );
+    }
+    elsif ( my ($source_name) = $line =~ /^\!Sample_source_name.*? = (.+?)\s*$/ ) {
+      $sample->source_name( $source_name );
+    }
+    elsif ( my ($treatment_desc) = $line =~ /^\!Sample_treatment_protocol.*? = (.+?)\s*$/ ) {
+      $sample->treatment_description( $treatment_desc );
+    }
+  }
+  return 1;
+}
+
+sub fill_dataset {
+  my ( $self, $dataset ) = @_;
+
+  my @lines = split /\n/, $self->_get_url( URL_DATASET . $dataset->accession );
+
+  my @samples = ();
+
+  foreach my $line ( @lines ) {
+    if ( my ($sample_acc) = $line =~ /^\!Series_sample_id = (\S+?)\s*$/ ) {
+      my $sample = Bio::Expression::Sample->new(
+                                                -accession => $sample_acc,
+                                                -dataset => $dataset,
+                                                -db => $self,
+                                               );
+      push @samples, $sample;
+    }
+    elsif ( my ($pubmed_acc) = $line =~ /^\!Series_pubmed_id = (.+?)\s*$/ ) {
+      $dataset->pubmed_id( $pubmed_acc );
+    }
+    elsif ( my ($web_link) = $line =~ /^\!Series_web_link = (.+?)\s*$/ ) {
+      $dataset->web_link( $web_link );
+    }
+    elsif ( my ($contact) = $line =~ /^\!Series_contact_name = (.+?)\s*$/ ) {
+      $dataset->contact( $contact );
+    }
+    elsif ( my ($name) = $line =~ /^\!Series_title = (.+?)\s*$/ ) {
+      $dataset->name( $name );
+    }
+    elsif ( my ($desc) = $line =~ /^\!Series_summary = (.+?)\s*$/ ) {
+      $dataset->description( $desc );
+    }
+    elsif ( my ($design) = $line =~ /^\!Series_type = (.+?)\s*$/ ) {
+      $dataset->design( $design );
+    }
+    elsif ( my ($design_desc) = $line =~ /^\!Series_overall_design = (.+?)\s*$/ ) {
+      $dataset->design_description( $design_desc );
+    }
+  }
+
+  $dataset->samples(\@samples);
+}
+
+#################################################
+
+=head2 _platforms_doc()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : an HTML document containing a table of all platforms
+ Args    :
+
+
+=cut
+
+sub _get_url {
+  my ($self,$url) = @_;
+
+  my $response;
+  eval {
+    $response = $self->get( $url );
+  };
+  if( $@ ) {
+    $self->warn("Can't query website: $@");
+    return;
+  }
+  $self->debug( "resp is $response\n"); 
+
+  return $response;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Expression.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,201 @@
+# $Id: Expression.pm,v 1.4.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::Expression
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Expression - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+  use Bio::DB::Expression;
+  my $db = Bio::DB::Expression->new( -source => 'geo' );
+  my @platforms = $db->get_platforms();
+  foreach my $platform ( @platforms ) {
+    my @datasets = $platform->get_datasets();
+    foreach my $dataset ( @datasets ) {
+      my @samples = $dataset->get_samples();
+      foreach my $sample ( @samples ) {
+        #...
+      }
+    }
+  }
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Expression;
+use strict;
+use base qw(Bio::Root::HTTPget Bio::Root::Root);
+use Bio::Root::HTTPget;
+our $DefaultSource = 'geo';
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::DB::Expression();
+ Function: Builds a new Bio::DB::Expression object 
+ Returns : an instance of Bio::DB::Expression
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  if( $class =~ /Bio::DB::Expression::(\S+)/ ) {
+    my ($self) = $class->SUPER::new(@args);
+    $self->_initialize(@args);
+    return $self;
+  } else {
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+    my $source = $param{'-source'} || $DefaultSource;
+
+    $source = "\L$source";	# normalize capitalization to lower case
+
+    # normalize capitalization
+    return unless( $class->_load_expression_module($source) );
+    return "Bio::DB::Expression::$source"->new(@args);
+  }
+}
+
+=head2 get_platforms()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Platform objects
+ Args    :
+
+=cut
+
+sub get_platforms {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_samples()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Sample objects
+ Args    :
+
+=cut
+
+sub get_samples {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_contacts()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::Contact objects
+ Args    :
+
+=cut
+
+sub get_contacts {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_datasets()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : a list of Bio::Expression::DataSet objects
+ Args    :
+
+=cut
+
+sub get_datasets {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+
+
+
+=head2 _load_expression_module
+
+ Title   : _load_expression_module
+ Usage   : *INTERNAL Bio::DB::Expression stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_expression_module {
+    my ($self, $source) = @_;
+    my $module = "Bio::DB::Expression::" . $source;
+    my $ok;
+
+    eval { $ok = $self->_load_module($module) };
+    if ( $@ ) {
+	print STDERR $@;
+	print STDERR <<END;
+$self: $source cannot be found
+Exception $@
+For more information about the Bio::DB::Expression system please see
+the Bio::DB::Expression docs.  This includes ways of checking for 
+formats at compile time, not run time.
+END
+  ;
+    }
+    return $ok;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Failover.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Failover.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Failover.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,194 @@
+# $Id: Failover.pm,v 1.10.4.2 2006/10/02 23:10:14 sendu Exp $
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Failover - A Bio::DB::RandomAccessI compliant class which
+wraps a prioritized list of DBs
+
+=head1 SYNOPSIS
+
+    $failover = Bio::DB::Failover->new();
+
+    $failover->add_database($db);
+
+    # fail over Bio::DB::RandomAccessI.pm
+
+    # this will check each database in priority, returning when
+    # the first one succeeds
+
+    $seq = $failover->get_Seq_by_id($id);
+
+=head1 DESCRIPTION
+
+This module provides fail over access to a set of Bio::DB::RandomAccessI
+objects.
+
+=head1 CONTACT
+
+Ewan Birney E<lt>birney at ebi.ac.ukE<gt> originally wrote this class.
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Failover;
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::DB::RandomAccessI);
+
+sub new {
+    my ($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    $self->{'_database'} = [];
+    return $self;
+}
+
+=head2 add_database
+
+ Title   : add_database
+ Usage   : add_database(%db)
+ Function: Adds a database to the Failover object
+ Returns : Count of number of databases
+ Args    : Array of db resources
+ Throws  : Not a RandomAccessI exception
+
+=cut
+
+sub add_database {
+	my ($self, at db) = @_;
+	for my $db ( @db ) {
+		if ( !ref $db || !$db->isa('Bio::DB::RandomAccessI') ) {
+			$self->throw("Database object $db is a not a Bio::DB::RandomAccessI");
+			next;
+		}
+
+		push(@{$self->{'_database'}},$db);
+	}
+	scalar @{$self->{'_database'}};
+}
+
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "no id" exception
+
+=cut
+
+sub get_Seq_by_id {
+	my ($self,$id) = @_;
+
+	if( !defined $id ) {
+		$self->throw("no id is given!");
+	}
+
+	foreach my $db ( @{$self->{'_database'}} ) {
+		my $seq;
+
+		eval {
+			$seq = $db->get_Seq_by_id($id);
+		};
+		$self->warn($@) if $@;
+		if ( defined $seq ) {
+			return $seq;
+		} else {
+			$self->warn("No sequence retrieved by database " . ref($db));
+		}
+	}
+
+	return;
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "no id" exception
+
+=cut
+
+sub get_Seq_by_acc {
+	my ($self,$id) = @_;
+
+	if( !defined $id ) {
+		$self->throw("no id is given!");
+	}
+
+	foreach my $db ( @{$self->{'_database'}} ) {
+		my $seq;
+		eval {
+			$seq = $db->get_Seq_by_acc($id);
+		};
+		$self->warn($@) if $@;
+		if ( defined $seq ) {
+			return $seq;
+		} else {
+			$self->warn("No sequence retrieved by database " . ref($db));
+		}
+	}
+	return;
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_acc('X77802.2');
+ Function: Gets a Bio::Seq object by versioned accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+=cut
+
+sub get_Seq_by_version {
+	my ($self,$id) = @_;
+
+	if( !defined $id ) {
+		$self->throw("no acc is given!");
+	}
+
+	foreach my $db ( @{$self->{'_database'}} ) {
+		my $seq;
+		eval {
+			$seq = $db->get_Seq_by_version($id);
+		};
+		$self->warn($@) if $@;
+		if ( defined $seq ) {
+			return $seq;
+		} else {
+			$self->warn("No sequence retrieved by database " . ref($db));
+		}
+	}
+	return;
+}
+
+## End of Package
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1169 @@
+# $Id: Fasta.pm,v 1.44.4.3 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::Fasta
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Fasta -- Fast indexed access to a directory of fasta files
+
+=head1 SYNOPSIS
+
+  use Bio::DB::Fasta;
+
+  # create database from directory of fasta files
+  my $db      = Bio::DB::Fasta->new('/path/to/fasta/files');
+
+  # simple access (for those without Bioperl)
+  my $seq      = $db->seq('CHROMOSOME_I',4_000_000 => 4_100_000);
+  my $revseq   = $db->seq('CHROMOSOME_I',4_100_000 => 4_000_000);
+  my @ids     = $db->ids;
+  my $length   = $db->length('CHROMOSOME_I');
+  my $alphabet = $db->alphabet('CHROMOSOME_I');
+  my $header   = $db->header('CHROMOSOME_I');
+
+  # Bioperl-style access
+  my $db      = Bio::DB::Fasta->new('/path/to/fasta/files');
+
+  my $obj     = $db->get_Seq_by_id('CHROMOSOME_I');
+  my $seq     = $obj->seq;
+  my $subseq  = $obj->subseq(4_000_000 => 4_100_000);
+  my $length  = $obj->length;
+  # (etc)
+
+  # Bio::SeqIO-style access
+  my $stream  = Bio::DB::Fasta->new('/path/to/files')->get_PrimarySeq_stream;
+  while (my $seq = $stream->next_seq) {
+    # Bio::PrimarySeqI stuff
+  }
+
+  my $fh = Bio::DB::Fasta->newFh('/path/to/fasta/files');
+  while (my $seq = <$fh>) {
+    # Bio::PrimarySeqI stuff
+  }
+
+  # tied hash access
+  tie %sequences,'Bio::DB::Fasta','/path/to/fasta/files';
+  print $sequences{'CHROMOSOME_I:1,20000'};
+
+=head1 DESCRIPTION
+
+Bio::DB::Fasta provides indexed access to one or more Fasta files.  It
+provides random access to each sequence entry, and to subsequences
+within each entry, allowing you to retrieve portions of very large
+sequences without bringing the entire sequence into memory.
+
+When you initialize the module, you point it at a single fasta file or
+a directory of multiple such files.  The first time it is run, the
+module generates an index of the contents of the file or directory
+using the AnyDBM module (Berkeley DB* preferred, followed by GDBM_File,
+NDBM_File, and SDBM_File).  Thereafter it uses the index file to find
+the file and offset for any requested sequence.  If one of the source
+fasta files is updated, the module reindexes just that one file.  (You
+can also force reindexing manually).  For improved performance, the
+module keeps a cache of open filehandles, closing less-recently used
+ones when the cache is full.
+
+The fasta files may contain any combination of nucleotide and protein
+sequences; during indexing the module guesses the molecular type.
+Entries may have any line length up to 65,536 characters, and
+different line lengths are allowed in the same file.  However, within
+a sequence entry, all lines must be the same length except for the
+last.
+
+The module uses /^E<gt>(\S+)/ to extract the primary ID of each sequence 
+from the Fasta header.  During indexing, you may pass a callback routine to
+modify this primary ID.  For example, you may wish to extract a
+portion of the gi|gb|abc|xyz nonsense that GenBank Fasta files use.
+The original header line can be recovered later.
+
+This module was developed for use with the C. elegans and human
+genomes, and has been tested with sequence segments as large as 20
+megabases.  Indexing the C. elegans genome (100 megabases of genomic
+sequence plus 100,000 ESTs) takes ~5 minutes on my 300 MHz pentium
+laptop. On the same system, average access time for any 200-mer within
+the C. elegans genome was E<lt>0.02s.
+
+*Berkeley DB can be obtained free from www.sleepycat.com. After it is 
+installed you will need to install the BerkeleyDB Perl module.
+
+=head1 DATABASE CREATION AND INDEXING
+
+The two constructors for this class are new() and newFh().  The former
+creates a Bio::DB::Fasta object which is accessed via method calls.
+The latter creates a tied filehandle which can be used Bio::SeqIO
+style to fetch sequence objects in a stream fashion.  There is also a
+tied hash interface.
+
+=over 2
+
+=item $db = Bio::DB::Fasta-E<gt>new($fasta_path [,%options])
+
+Create a new Bio::DB::Fasta object from the Fasta file or files
+indicated by $fasta_path.  Indexing will be performed automatically if
+needed.  If successful, new() will return the database accessor
+object.  Otherwise it will return undef.
+
+$fasta_path may be an individual Fasta file, or may refer to a
+directory containing one or more of such files.  Following the path,
+you may pass a series of name=E<gt>value options or a hash with these
+same name=E<gt>value pairs.  Valid options are:
+
+ Option Name   Description               Default
+ -----------   -----------               -------
+
+ -glob         Glob expression to use    *.{fa,fasta,fast,FA,FASTA,FAST,dna}
+               for searching for Fasta
+	            files in directories. 
+
+ -makeid       A code subroutine for     None
+	            transforming Fasta IDs.
+
+ -maxopen      Maximum size of		     32
+	            filehandle cache.
+
+ -debug        Turn on status		        0
+	            messages.
+
+ -reindex      Force the index to be     0
+               rebuilt.
+
+ -dbmargs      Additional arguments      none
+               to pass to the DBM
+               routines when tied
+               (scalar or array ref).
+
+-dbmargs can be used to control the format of the index.  For example,
+you can pass $DB_BTREE to this argument so as to force the IDs to be
+sorted and retrieved alphabetically.  Note that you must use the same
+arguments every time you open the index!
+
+-reindex can be used to force the index to be recreated from scratch.
+
+=item $fh = Bio::DB::Fasta-E<gt>newFh($fasta_path [,%options])
+
+Create a tied filehandle opened on a Bio::DB::Fasta object.  Reading
+from this filehandle with E<lt>E<gt> will return a stream of sequence objects,
+Bio::SeqIO style.
+
+=back
+
+The -makeid option gives you a chance to modify sequence IDs during
+indexing.  The option value should be a code reference that will
+take a scalar argument and return a scalar result, like this:
+
+  $db = Bio::DB::Fasta->new("file.fa",-makeid=>\&make_my_id);
+
+  sub make_my_id {
+    my $description_line = shift;
+    # get a different id from the fasta header, e.g.
+	 $description_line =~ /(\S+)$/;
+    return $1;
+  }
+
+make_my_id() will be called with the full fasta id line (including the
+"E<gt>" symbol!).  For example:
+
+ >A12345.3 Predicted C. elegans protein egl-2
+
+By default, this module will use the regular expression /^E<gt>(\S+)/
+to extract "A12345.3" for use as the ID.  If you pass a -makeid
+callback, you can extract any portion of this, such as the "egl-2"
+symbol.
+
+The -makeid option is ignored after the index is constructed.
+
+=head1 OBJECT METHODS
+
+The following object methods are provided.
+
+=over 10
+
+=item $raw_seq = $db-E<gt>seq($id [,$start, $stop])
+
+Return the raw sequence (a string) given an ID and optionally a start
+and stop position in the sequence.  In the case of DNA sequence, if
+$stop is less than $start, then the reverse complement of the sequence
+is returned (this violates Bio::Seq conventions).
+
+For your convenience, subsequences can be indicated with any of the
+following compound IDs:
+
+   $db->seq("$id:$start,$stop")
+
+   $db->seq("$id:$start..$stop")
+
+   $db->seq("$id:$start-$stop")
+
+=item $length = $db-E<gt>length($id)
+
+Return the length of the indicated sequence.
+
+=item $header = $db-E<gt>header($id)
+
+Return the header line for the ID, including the initial "E<gt>".
+
+=item $type  = $db-E<gt>alphabet($id)
+
+Return the molecular type of the indicated sequence.  One of "dna",
+"rna" or "protein".
+
+=item $filename  = $db-E<gt>file($id)
+
+Return the name of the file in which the indicated sequence can be
+found.
+
+=item $offset    = $db-E<gt>offset($id)
+
+Return the offset of the indicated sequence from the beginning of the
+file in which it is located.  The offset points to the beginning of
+the sequence, not the beginning of the header line.
+
+=item $header_length = $db-E<gt>headerlen($id)
+
+Return the length of the header line for the indicated sequence.
+
+=item $header_offset = $db-E<gt>header_offset($id)
+
+Return the offset of the header line for the indicated sequence from
+the beginning of the file in which it is located.
+
+=item $index_name  = $db-E<gt>index_name
+
+Return the path to the index file.
+
+=item $path = $db-E<gt>path
+
+Return the path to the Fasta file(s).
+
+=back
+
+For BioPerl-style access, the following methods are provided:
+
+=over 4
+
+=item $seq = $db-E<gt>get_Seq_by_id($id)
+
+Return a Bio::PrimarySeq::Fasta object, which obeys the
+Bio::PrimarySeqI conventions.  For example, to recover the raw DNA or
+protein sequence, call $seq-E<gt>seq().
+
+Note that get_Seq_by_id() does not bring the entire sequence into
+memory until requested.  Internally, the returned object uses the
+accessor to generate subsequences as needed.
+
+=item $seq = $db-E<gt>get_Seq_by_acc($id)
+
+=item $seq = $db-E<gt>get_Seq_by_primary_id($id)
+
+These methods all do the same thing as get_Seq_by_id().
+
+=item $stream = $db-E<gt>get_PrimarySeq_stream()
+
+Return a Bio::DB::Fasta::Stream object, which supports a single method
+next_seq(). Each call to next_seq() returns a new
+Bio::PrimarySeq::Fasta object, until no more sequences remain.
+
+=back
+
+See L<Bio::PrimarySeqI> for methods provided by the sequence objects
+returned from get_Seq_by_id() and get_PrimarySeq_stream().
+
+=head1 TIED INTERFACES
+
+This module provides two tied interfaces, one which allows you to
+treat the sequence database as a hash, and the other which allows you
+to treat the database as an I/O stream.
+
+=head2 Creating a Tied Hash
+
+The tied hash interface is very straightforward
+
+=over 1
+
+=item $obj = tie %db,'Bio::DB::Fasta','/path/to/fasta/files' [, at args]
+
+Tie %db to Bio::DB::Fasta using the indicated path to the Fasta files.
+The optional @args list is the same set of named argument/value pairs
+used by Bio::DB::Fasta-E<gt>new().
+
+If successful, tie() will return the tied object.  Otherwise it will
+return undef.
+
+=back
+
+Once tied, you can use the hash to retrieve an individual sequence by
+its ID, like this:
+
+  my $seq = $db{CHROMOSOME_I};
+
+You may select a subsequence by appending the comma-separated range to 
+the sequence ID in the format "$id:$start,$stop".  For example, here
+is the first 1000 bp of the sequence with the ID "CHROMOSOME_I":
+
+  my $seq = $db{'CHROMOSOME_I:1,1000'};
+
+(The regular expression used to parse this format allows sequence IDs
+to contain colons.)
+
+When selecting subsequences, if $start E<gt> stop, then the reverse
+complement will be returned for DNA sequences.
+
+The keys() and values() functions will return the sequence IDs and
+their sequences, respectively.  In addition, each() can be used to
+iterate over the entire data set:
+
+ while (my ($id,$sequence) = each %db) {
+    print "$id => $sequence\n";
+ }
+
+When dealing with very large sequences, you can avoid bringing them
+into memory by calling each() in a scalar context.  This returns the
+key only.  You can then use tied(%db) to recover the Bio::DB::Fasta
+object and call its methods.
+
+ while (my $id = each %db) {
+    print "$id => $db{$sequence:1,100}\n";
+    print "$id => ",tied(%db)->length($id),"\n";
+ }
+
+You may, in addition invoke Bio::DB::Fasta the FIRSTKEY and NEXTKEY tied
+hash methods directly.
+
+=over 2
+
+=item $id = $db-E<gt>FIRSTKEY
+
+Return the first ID in the database.
+
+=item $id = $db-E<gt>NEXTKEY($id)
+
+Given an ID, return the next ID in sequence.
+
+=back
+
+This allows you to write the following iterative loop using just the
+object-oriented interface:
+
+ my $db = Bio::DB::Fasta->new('/path/to/fasta/files');
+ for (my $id=$db->FIRSTKEY; $id; $id=$db->NEXTKEY($id)) {
+    # do something with sequence
+ }
+
+=head2 Creating a Tied Filehandle
+
+The Bio::DB::Fasta-E<gt>newFh() method creates a tied filehandle from
+which you can read Bio::PrimarySeq::Fasta sequence objects
+sequentially.  The following bit of code will iterate sequentially
+over all sequences in the database:
+
+ my $fh = Bio::DB::Fasta->newFh('/path/to/fasta/files');
+ while (my $seq = <$fh>) {
+   print $seq->id,' => ',$seq->length,"\n";
+ }
+
+When no more sequences remain to be retrieved, the stream will return
+undef.
+
+=head1 BUGS
+
+When a sequence is deleted from one of the Fasta files, this deletion
+is not detected by the module and removed from the index.  As a
+result, a "ghost" entry will remain in the index and will return
+garbage results if accessed.
+
+Currently, the only way to accomodate deletions is to rebuild the
+entire index, either by deleting it manually, or by passing
+-reindex=E<gt>1 to new() when initializing the module.
+
+=head1 SEE ALSO
+
+L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.  
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+#'
+package Bio::DB::Fasta;
+
+BEGIN {
+  @AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File)
+}
+
+use strict;
+use IO::File;
+use AnyDBM_File;
+use Fcntl;
+use File::Basename qw(basename dirname);
+
+use base qw(Bio::DB::SeqI Bio::Root::Root);
+
+*seq = *sequence = \&subseq;
+*ids = \&get_all_ids;
+*get_seq_by_primary_id = *get_Seq_by_acc  = \&get_Seq_by_id;
+
+use constant STRUCT =>'NNnnCa*';
+use constant STRUCTBIG =>'QQnnCa*'; # 64-bit file offset and seq length
+use constant DNA     => 1;
+use constant RNA     => 2;
+use constant PROTEIN => 3;
+
+# Bio::DB-like object
+# providing fast random access to a directory of FASTA files
+
+=head2 new
+
+ Title   : new
+ Usage   : my $db = new Bio::DB::Fasta( $path, @options);
+ Function: initialize a new Bio::DB::Fasta object
+ Returns : new Bio::DB::Fasta object
+ Args    : path to dir of fasta files or a single filename
+
+These are optional arguments to pass in as well.
+
+ -glob         Glob expression to use    *.{fa,fasta,fast,FA,FASTA,FAST}
+               for searching for Fasta
+	            files in directories. 
+
+ -makeid       A code subroutine for     None
+	            transforming Fasta IDs.
+
+ -maxopen      Maximum size of		     32
+	            filehandle cache.
+
+ -debug        Turn on status		        0
+	            messages.
+
+ -reindex      Force the index to be     0
+               rebuilt.
+
+ -dbmargs      Additional arguments      none
+               to pass to the DBM
+               routines when tied
+               (scalar or array ref).
+
+=cut
+
+sub new {
+  my $class = shift;
+  my $path  = shift;
+  my %opts  = @_;
+
+  my $self = bless { debug      => $opts{-debug},
+	  makeid     => $opts{-makeid},
+	  glob       => $opts{-glob} || '*.{fa,fasta,FA,FASTA,fast,FAST,dna,FNA,fna,FAA,faa,FSA,fsa}',
+	  maxopen    => $opts{-maxfh}   || 32,
+	  dbmargs    => $opts{-dbmargs} || undef,
+	  fhcache    => {},
+	  cacheseq   => {},
+	  curopen    => 0,
+	  openseq    => 1,
+	  dirname    => undef,
+	  offsets    => undef,
+		   }, $class;
+  my ($offsets,$dirname);
+
+  if (-d $path) {
+    # because Win32 glob() is broken with respect to long file names
+    # that contain whitespace.
+    $path = Win32::GetShortPathName($path)
+      if $^O =~ /^MSWin/i && eval 'use Win32; 1';
+    $offsets = $self->index_dir($path,$opts{-reindex});
+    $dirname = $path;
+  } elsif (-f _) {
+    $offsets = $self->index_file($path,$opts{-reindex});
+    $dirname = dirname($path);
+  } else {
+    $self->throw( "$path: Invalid file or dirname");
+  }
+  @{$self}{qw(dirname offsets)} = ($dirname,$offsets);
+
+  $self;
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Function: gets a new Fh for a file
+ Example : internal method
+ Returns : GLOB 
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  my $self  = $class->new(@_);
+  require Symbol;
+  my $fh = Symbol::gensym or return;
+  tie $$fh,'Bio::DB::Fasta::Stream',$self or return;
+  $fh;
+}
+
+sub _open_index {
+  my $self = shift;
+  my ($index,$write) = @_;
+  my %offsets;
+  my $flags = $write ? O_CREAT|O_RDWR : O_RDONLY;
+  my @dbmargs = $self->dbmargs;
+  tie %offsets,'AnyDBM_File',$index,$flags,0644, at dbmargs 
+	 or $self->throw( "Can't open cache file $index: $!");
+  return \%offsets;
+}
+
+sub _close_index {
+  my $self = shift;
+  my $index = shift;
+  untie %$index;
+}
+
+=head2 index_dir
+
+ Title   : index_dir
+ Usage   : $db->index_dir($dir)
+ Function: set the index dir and load all files in the dir
+ Returns : hashref of seq offsets in each file
+ Args    : dirname, boolean to force a reload of all files
+
+=cut
+
+sub index_dir {
+  my $self = shift;
+  my $dir  = shift;
+  my $force_reindex = shift;
+
+  # find all fasta files
+  my @files = glob("$dir/$self->{glob}");
+  $self->throw( "no fasta files in $dir") unless @files;
+
+  # get name of index
+  my $index = $self->index_name($dir,1);
+
+  # if caller has requested reindexing, then unlink
+  # the index file.
+  unlink $index if $force_reindex;
+
+  # get the modification time of the index
+  my $indextime   = 0;
+  for my $suffix('','.pag','.dir') {
+    $indextime ||= (stat("${index}${suffix}"))[9];
+  }
+  $indextime ||= 0;  # prevent some uninit variable warnings
+
+  # get the most recent modification time of any of the contents
+  my $modtime = 0;
+  my %modtime;
+  $self->set_pack_method( @files );
+  foreach (@files) {
+    my $m = (stat($_))[9];
+    $modtime{$_} = $m;
+    $modtime = $m if defined $m && $modtime < $m;
+  }
+
+  my $reindex      = $force_reindex || $indextime < $modtime;
+  $self->{offsets} = $self->_open_index($index,$reindex) or return;
+
+  # no indexing needed
+  return $self->{offsets} unless $reindex;
+
+  # otherwise reindex contents of changed files
+  $self->{indexing} = $index;
+  foreach (@files) {
+    next if( defined $indextime && $modtime{$_} <= $indextime);
+    $self->calculate_offsets($_,$self->{offsets});
+  }
+  delete $self->{indexing};
+
+  # we've been having troubles with corrupted index files on Windows systems,
+  # so possibly closing and reopening will help
+  $self->_close_index($self->{offsets});
+
+  return $self->{offsets}  = $self->_open_index($index);
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : my $seq = $db->get_Seq_by_id($id)
+ Function: Bio::DB::RandomAccessI method implemented
+ Returns : Bio::PrimarySeqI object
+ Args    : id
+
+=cut
+
+sub get_Seq_by_id {
+  my $self = shift;
+  my $id   = shift;
+  return unless exists $self->{offsets}{$id};
+  return Bio::PrimarySeq::Fasta->new($self,$id);
+}
+
+=head2 set_pack_method
+
+ Title   : set_pack_method
+ Usage   : $db->set_pack_method( @files )
+ Function: Determines whether data packing uses 32 or 64 bit integers
+ Returns :
+ Args    : one or more file paths
+
+=cut
+
+sub set_pack_method {
+  my $self = shift;
+  # Find the maximum file size:
+  my ($maxsize) = sort { $b <=> $a } map { -s $_ } @_;
+  my $fourGB    = (2 ** 32) - 1;
+
+  if ($maxsize > $fourGB) {
+      # At least one file exceeds 4Gb - we will need to use 64 bit ints
+      $self->{packmeth}   = \&_packBig;
+      $self->{unpackmeth} = \&_unpackBig;
+  } else {
+      $self->{packmeth}   = \&_pack;
+      $self->{unpackmeth} = \&_unpack;
+  }
+}
+
+=head2 index_file
+
+ Title   : index_file
+ Usage   : $db->index_file($filename)
+ Function: (re)loads a sequence file and indexes sequences offsets in the file
+ Returns : seq offsets in the file
+ Args    : filename, 
+           boolean to force reloading a file
+
+=cut
+
+
+sub index_file {
+  my $self = shift;
+  my $file = shift;
+  my $force_reindex = shift;
+
+  $self->set_pack_method( $file );
+  my $index = $self->index_name($file);
+  # if caller has requested reindexing, then unlink the index
+  unlink $index if $force_reindex;
+
+  # get the modification time of the index
+  my $indextime = (stat($index))[9] || 0;
+  my $modtime   = (stat($file))[9]  || 0;
+
+  my $reindex = $force_reindex || $indextime < $modtime;
+  my $offsets = $self->_open_index($index,$reindex) or return;
+  $self->{offsets} = $offsets;
+
+  return $self->{offsets} unless $reindex;
+
+  $self->{indexing} = $index;
+  $self->calculate_offsets($file,$offsets);
+  delete $self->{indexing};
+  return $self->{offsets};
+}
+
+=head2 dbmargs
+
+ Title   : dbmargs
+ Usage   : my @args = $db->dbmargs;
+ Function: gets stored dbm arguments
+ Returns : array
+ Args    : none
+
+=cut
+
+sub dbmargs {
+  my $self = shift;
+  my $args = $self->{dbmargs} or return;
+  return ref($args) eq 'ARRAY' ? @$args : $args;
+}
+
+=head2 index_name
+
+ Title   : index_name
+ Usage   : my $indexname = $db->index_name($path,$isdir);
+ Function: returns the name of the index for a specific path 
+ Returns : string
+ Args    : path to check, 
+           boolean if it is a dir
+
+=cut
+
+sub index_name {
+  my $self  = shift;
+  my ($path,$isdir) = @_;
+  unless ($path) {
+    my $dir = $self->{dirname} or return;
+    return $self->index_name($dir,-d $dir);
+  } 
+  return "$path/directory.index" if $isdir;
+  return "$path.index";
+}
+
+=head2 calculate_offsets
+
+ Title   : calculate_offsets
+ Usage   : $db->calculate_offsets($filename,$offsets);
+ Function: calculates the sequence offsets in a file based on id
+ Returns : offset hash for each file
+ Args    : file to process
+           $offsets - hashref of id to offset storage
+
+=cut
+
+sub calculate_offsets {
+  my $self = shift;
+  my ($file,$offsets) = @_;
+  my $base = $self->path2fileno(basename($file));
+
+  my $fh = IO::File->new($file) or $self->throw( "Can't open $file: $!");
+  binmode $fh;
+  warn "indexing $file\n" if $self->{debug};
+  my ($offset,$id,$linelength,$type,$firstline,$count,$termination_length,$seq_lines,$last_line,%offsets);
+  while (<$fh>) {		# don't try this at home
+    $termination_length ||= /\r\n$/ ? 2 : 1;  # account for crlf-terminated Windows files
+    if (/^>(\S+)/) {
+      print STDERR "indexed $count sequences...\n" 
+	if $self->{debug} && (++$count%1000) == 0;
+      my $pos = tell($fh);
+      if ($id) {
+	my $seqlength    = $pos - $offset - length($_);
+	$seqlength      -= $termination_length * $seq_lines;
+	$offsets->{$id}  = &{$self->{packmeth}}($offset,$seqlength,
+					$linelength,$firstline,
+					$type,$base);
+      }
+      $id = ref($self->{makeid}) eq 'CODE' ? $self->{makeid}->($_) : $1;
+      ($offset,$firstline,$linelength) = ($pos,length($_),0);
+      $self->_check_linelength($linelength);
+      $seq_lines = 0;
+    } else {
+      $linelength ||= length($_);
+      $type       ||= $self->_type($_);
+      $seq_lines++;
+    }
+    $last_line = $_;
+  }
+
+  $self->_check_linelength($linelength);
+  # deal with last entry
+  if ($id) {
+    my $pos = tell($fh);
+    my $seqlength   = $pos - $offset;
+
+    if ($linelength == 0) { # yet another pesky empty chr_random.fa file
+      $seqlength = 0;
+    } else {
+      if ($last_line !~ /\s$/) {
+        $seq_lines--;
+      }
+      $seqlength -= $termination_length * $seq_lines;
+    };
+    $offsets->{$id} = &{$self->{packmeth}}($offset,$seqlength,
+				   $linelength,$firstline,
+				   $type,$base);
+}
+  $offsets->{__termination_length} = $termination_length;
+  return \%offsets;
+}
+
+=head2 get_all_ids
+
+ Title   : get_all_ids
+ Usage   : my @ids = $db->get_all_ids
+ Function: gets all the stored ids in all indexes
+ Returns : list of ids
+ Args    : none
+
+=cut
+
+sub get_all_ids  { grep {!/^__/} keys %{shift->{offsets}} }
+
+sub offset {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  (&{$self->{unpackmeth}}($offset))[0];
+}
+
+sub length {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  (&{$self->{unpackmeth}}($offset))[1];
+}
+
+sub linelen {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  (&{$self->{unpackmeth}}($offset))[2];
+}
+
+sub headerlen {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  (&{$self->{unpackmeth}}($offset))[3];
+}
+
+sub alphabet {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  my $type = (&{$self->{unpackmeth}}($offset))[4];
+  return $type == DNA ? 'dna'
+         : $type == RNA ? 'rna'
+         : 'protein';
+
+}
+
+sub path { shift->{dirname} } 
+
+sub header_offset {
+    my $self = shift;
+    my $id   = shift;
+    return unless $self->{offsets}{$id};
+    return $self->offset($id) - $self->headerlen($id);
+}
+
+sub file {
+  my $self = shift;
+  my $id   = shift;
+  my $offset = $self->{offsets}{$id} or return;
+  $self->fileno2path((&{$self->{unpackmeth}}($offset))[5]);
+}
+
+sub fileno2path {
+  my $self = shift;
+  my $no   = shift;
+  return $self->{offsets}{"__file_$no"};
+}
+
+sub path2fileno {
+  my $self = shift;
+  my $path = shift;
+  if ( !defined $self->{offsets}{"__path_$path"} ) {
+    my $fileno  = ($self->{offsets}{"__path_$path"} = 0+ $self->{fileno}++);
+    $self->{offsets}{"__file_$fileno"} = $path;
+  }
+  return $self->{offsets}{"__path_$path"}
+}
+
+sub _check_linelength {
+  my $self       = shift;
+  my $linelength = shift;
+  return unless defined $linelength;
+  $self->throw("Each line of the fasta file must be less than 65,536 characters.  Line $. is $linelength chars.")	if $linelength > 65535.
+
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $seqdb->subseq($id,$start,$stop);
+ Function: returns a subseq of a sequence in the db
+ Returns : subsequence data
+ Args    : id of sequence, starting point, ending point
+
+=cut
+
+sub subseq {
+  my ($self,$id,$start,$stop) = @_;
+  if ($id =~ /^(.+):([\d_]+)(?:,|-|\.\.)([\d_]+)$/) {
+    ($id,$start,$stop) = ($1,$2,$3);
+    $start =~ s/_//g;
+    $stop =~ s/_//g;
+  }
+  $start ||= 1;
+  $stop  ||= $self->length($id);
+
+  my $reversed;
+  if (defined $stop && $start > $stop) {
+    ($start,$stop) = ($stop,$start);
+    $reversed++;
+  }
+
+  my $data;
+
+  my $fh = $self->fh($id) or return;
+  my $filestart = $self->caloffset($id,$start);
+  my $filestop  = $self->caloffset($id,$stop);
+
+  seek($fh,$filestart,0);
+  read($fh,$data,$filestop-$filestart+1);
+  $data =~ s/\n//g;
+  $data =~ s/\r//g;
+  if ($reversed) {
+    $data = reverse $data;
+    $data =~ tr/gatcGATC/ctagCTAG/;
+  }
+  $data;
+}
+
+sub fh {
+  my $self = shift;
+  my $id   = shift;
+  my $file = $self->file($id) or return;
+  $self->fhcache("$self->{dirname}/$file") or $self->throw( "Can't open file $file");
+}
+
+sub header {
+  my $self = shift;
+  my $id   = shift;
+  my ($offset,$seqlength,$linelength,$firstline,$type,$file) 
+    = &{$self->{unpackmeth}}($self->{offsets}{$id}) or return;
+  $offset -= $firstline;
+  my $data;
+  my $fh = $self->fh($id) or return;
+  seek($fh,$offset,0);
+  read($fh,$data,$firstline);
+  chomp $data;
+  substr($data,0,1) = '';
+  $data;
+}
+
+sub caloffset {
+  my $self = shift;
+  my $id   = shift;
+  my $a    = shift()-1;
+  my ($offset,$seqlength,$linelength,$firstline,$type,$file) = &{$self->{unpackmeth}}($self->{offsets}{$id});
+  $a = 0            if $a < 0;
+  $a = $seqlength-1 if $a >= $seqlength;
+  my $tl = $self->{offsets}{__termination_length};
+  $offset + $linelength * int($a/($linelength-$tl)) + $a % ($linelength-$tl);
+}
+
+sub fhcache {
+  my $self = shift;
+  my $path = shift;
+  if (!$self->{fhcache}{$path}) {
+    if ($self->{curopen} >= $self->{maxopen}) {
+      my @lru = sort {$self->{cacheseq}{$a} <=> $self->{cacheseq}{$b};} keys %{$self->{fhcache}};
+      splice(@lru, $self->{maxopen} / 3);
+      $self->{curopen} -= @lru;
+      for (@lru) { delete $self->{fhcache}{$_} }
+    }
+    $self->{fhcache}{$path} = IO::File->new($path) or return;
+    binmode $self->{fhcache}{$path};
+    $self->{curopen}++;
+  }
+  $self->{cacheseq}{$path}++;
+  $self->{fhcache}{$path}
+}
+
+sub _pack {
+  pack STRUCT, at _;
+}
+
+sub _packBig {
+  pack STRUCTBIG, at _;
+}
+
+sub _unpack {
+  unpack STRUCT,shift;
+}
+
+sub _unpackBig {
+  unpack STRUCTBIG,shift;
+}
+
+sub _type {
+  shift;
+  local $_ = shift;
+  return /^[gatcnGATCN*-]+$/   ? DNA
+         : /^[gaucnGAUCN*-]+$/ ? RNA
+	 : PROTEIN;
+}
+
+=head2 get_PrimarySeq_stream
+
+ Title   : get_PrimarySeq_stream
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub get_PrimarySeq_stream {
+  my $self = shift;
+  return Bio::DB::Fasta::Stream->new($self);
+}
+
+sub TIEHASH {
+  my $self = shift;
+  return $self->new(@_);
+}
+
+sub FETCH {
+  shift->subseq(@_);
+}
+sub STORE {
+    shift->throw("Read-only database");
+}
+sub DELETE {
+    shift->throw("Read-only database");
+}
+sub CLEAR {
+    shift->throw("Read-only database");
+}
+sub EXISTS {
+  defined shift->offset(@_);
+}
+sub FIRSTKEY { tied(%{shift->{offsets}})->FIRSTKEY(@_); }
+sub NEXTKEY  { tied(%{shift->{offsets}})->NEXTKEY(@_);  }
+
+sub DESTROY {
+  my $self = shift;
+  if ($self->{indexing}) {  # killed prematurely, so index file is no good!
+    warn "indexing was interrupted, so unlinking $self->{indexing}";
+    unlink $self->{indexing};
+  }
+}
+
+#-------------------------------------------------------------
+# Bio::PrimarySeqI compatibility
+#
+package Bio::PrimarySeq::Fasta;
+use overload '""' => 'display_id';
+
+use base qw(Bio::Root::Root Bio::PrimarySeqI);
+
+sub new {
+  my $class = shift;
+  $class = ref($class) if ref $class;
+  my ($db,$id,$start,$stop) = @_;
+  return bless { db    => $db,
+		 id    => $id,
+		 start => $start || 1,
+		 stop  => $stop  || $db->length($id)
+	       },$class;
+}
+
+sub seq {
+  my $self = shift;
+  return $self->{db}->seq($self->{id},$self->{start},$self->{stop});
+}
+
+sub subseq {
+  my $self = shift;
+  $self->trunc(@_)->seq();	
+}
+
+sub trunc {
+  my $self = shift;
+  my ($start,$stop) = @_;
+  $self->throw("Stop cannot be smaller than start")  unless $start <= $stop;
+  return $self->{start} <= $self->{stop} ?  $self->new($self->{db},
+						       $self->{id},
+						       $self->{start}+$start-1,
+						       $self->{start}+$stop-1)
+                                         :  $self->new($self->{db},
+						       $self->{id},
+						       $self->{start}-($start-1),
+						       $self->{start}-($stop-1)
+						      );  
+	
+}
+
+sub is_circular {
+  my $self = shift;
+  return $self->{is_circular};
+}
+
+sub display_id {
+  my $self = shift;
+  return $self->{id};
+}
+
+sub accession_number {
+  my $self = shift;
+  return "unknown";
+}
+
+sub primary_id {
+  my $self = shift;
+  return overload::StrVal($self);
+}
+
+sub can_call_new { return 0 }
+
+sub alphabet {
+  my $self = shift;
+  return $self->{db}->alphabet($self->{id});
+}
+
+sub revcom {
+  my $self = shift;
+  return $self->new(@{$self}{'db','id','stop','start'});
+}
+
+sub length {
+  my $self = shift;
+  return $self->{db}->length($self->{id});
+}
+
+sub description  { 
+    my $self = shift;
+    my $header = $self->{'db'}->header($self->{id});
+    # remove the id from the header
+    return (split(/\s+/,$header,2))[2];
+}
+
+*desc = \&description;
+
+#-------------------------------------------------------------
+# stream-based access to the database
+#
+package Bio::DB::Fasta::Stream;
+use base qw(Tie::Handle Bio::DB::SeqI);
+
+
+sub new {
+  my $class = shift;
+  my $db    = shift;
+  my $key = $db->FIRSTKEY;
+  return bless { db=>$db,key=>$key },$class;
+}
+
+sub next_seq {
+  my $self = shift;
+  my ($key,$db) = @{$self}{'key','db'};
+  while ($key =~ /^__/) {
+    $key = $db->NEXTKEY($key);
+    return unless defined $key;
+  }
+  my $value = $db->get_Seq_by_id($key);
+  $self->{key} = $db->NEXTKEY($key);
+  $value;
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  my $db    = shift;
+  return $class->new($db);
+}
+sub READLINE {
+  my $self = shift;
+  $self->next_seq;
+}
+
+1;
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/FileCache.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/FileCache.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/FileCache.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,330 @@
+# $Id: FileCache.pm,v 1.11.4.2 2006/10/02 23:10:14 sendu Exp $
+#
+# POD documentation - main docs before the code
+#
+#
+
+=head1 NAME
+
+Bio::DB::FileCache - In file cache for BioSeq objects
+
+=head1 SYNOPSIS
+
+
+
+  $cachedb = Bio::DB::FileCache->new($real_db);
+
+  #
+  # $real_db is a Bio::DB::RandomAccessI database
+  #
+
+  $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
+
+  #
+  # $seq is a Bio::Seq object
+  #
+
+  # more control provided with named-parameter form
+
+  $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db,
+				      -file  => $path,
+				      -keep  => $flag,
+				    );
+=head1 DESCRIPTION
+
+This is a disk cache system which saves the objects returned by
+Bio::DB::RandomAccessI on disk.  The disk cache grows without limit,
+while the process is running, but is automatically unlinked at process
+termination unless the -keep flag is set.
+
+This module requires DB_File and Storable.
+
+=head1 CONTACT
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::FileCache;
+
+use DB_File;
+use Storable qw(freeze thaw);
+use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
+use File::Temp 'tmpnam';
+
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::DB::SeqI);
+
+use Bio::Seq::RichSeq;
+use Bio::Location::Split;
+use Bio::Location::Fuzzy;
+use Bio::Seq;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Annotation::Collection;
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::FileCache->new(
+                 -seqdb => $db,   # Bio::DB::RandomAccessI database
+                 -file  => $path, # path to index file
+                 -keep  => $flag, # don't unlink index file
+          )
+ Function: creates a new on-disk cache
+ Returns : a Bio::DB::RandomAccessI database
+ Args    : as above
+ Throws  : "Must be a randomaccess database" exception
+           "Could not open primary index file" exception
+
+If no index file is specified, will create a temporary file in your
+system's temporary file directory.  The name of this temporary file
+can be retrieved using file_name().
+
+=cut
+
+#'
+sub new {
+    my ($class, at args) = @_;
+
+    my $self = Bio::Root::Root->new();
+    bless $self,$class;
+
+    my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE
+							 KEEP)], at args);
+
+    if( !defined $seqdb || !ref $seqdb ||
+	! $seqdb->isa('Bio::DB::RandomAccessI') ) {
+       $self->throw("Must be a randomaccess database not a [$seqdb]");
+    }
+
+    $self->seqdb($seqdb);
+    $file_name ||= tmpnam();
+    $self->file_name($file_name);
+    $self->keep($keep);
+
+    $self->_open_database($file_name);
+    return $self;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$id) = @_;
+
+   # look in the cache first
+   my $obj = $self->_get('id' => $id);
+   return $obj if defined $obj;
+
+   # get object from seqdb
+   $obj = $self->seqdb->get_Seq_by_id($id);
+   $self->_store('id' => $id, $obj);
+
+   return $obj;
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self,$acc) = @_;
+
+   # look in the cache first
+   my $obj = $self->_get('acc' => $acc);
+   return $obj if defined $obj;
+
+   # get object from seqdb
+   $obj = $self->seqdb->get_Seq_by_acc($acc);
+   $self->_store('acc' => $acc, $obj);
+
+   return $obj;
+}
+
+=head2 seqdb
+
+ Title   : seqdb
+ Usage   : $seqdb = $db->seqdb([$seqdb])
+ Function: gets/sets the Bio::DB::RandomAccessI database
+ Returns : a Bio::DB::RandomAccessI database
+ Args    : new sequence database (optional)
+ Throws  : nothing
+
+=cut
+
+sub seqdb {
+    my ($self, $seqdb) = @_;
+    if ($seqdb) {
+        $self->{'seqdb'} = $seqdb;
+    } else {
+        return $self->{'seqdb'};
+    }
+}
+
+=head2 file_name
+
+ Title   : file_name
+ Usage   : $path = $db->file_name([$file_name])
+ Function: gets/sets the name of the cache file
+ Returns : a path
+ Args    : new cache file name (optional)
+ Throws  : nothing
+
+It probably isn't useful to set the cache file name after you've
+opened it.
+
+=cut
+
+#'
+
+sub file_name {
+  my $self = shift;
+  my $d = $self->{file_name};
+  $self->{file_name} = shift if @_;
+  $d;
+}
+
+=head2 keep
+
+ Title   : keep
+ Usage   : $keep = $db->keep([$flag])
+ Function: gets/sets the value of the "keep" flag
+ Returns : current value
+ Args    : new value (optional)
+ Throws  : nothing
+
+The keep flag will cause the index file to be unlinked when the
+process exits.  Since on some operating systems (Unix, OS/2) the
+unlinking occurs during the new() call immediately after opening the
+file, it probably isn't safe to change this value.
+
+=cut
+
+#'
+sub keep {
+  my $self = shift;
+  my $d = $self->{keep};
+  $self->{keep} = shift if @_;
+  $d;
+}
+
+=head2 db
+
+ Title   : db
+ Usage   : $db->db
+ Function: returns tied hash to index database
+ Returns : a Berkeley DB tied hashref
+ Args    : none
+ Throws  : nothing
+
+=cut
+
+sub db { shift->{db} }
+
+=head2 flush
+
+ Title   : flush
+ Usage   : $db->flush
+ Function: flushes the cache
+ Returns : nothing
+ Args    : none
+ Throws  : nothing
+
+=cut
+
+sub flush {
+  my $db = shift->db or return;
+  %{$db} = ();
+}
+
+sub _get {
+  my $self = shift;
+  my ($type,$id) = @_;
+  my $serialized = $self->db->{"${type}_${id}"};
+  my $obj = thaw($serialized);
+  $obj;
+}
+
+sub _store {
+  my $self = shift;
+  my ($type,$id,$obj) = @_;
+  if( ! defined $obj ) {
+      # bug #1628
+      $self->debug("tried to store an undefined value for $id, skipping");
+      return;
+  }
+  my $serialized = freeze($obj);
+  $self->db->{"${type}_${id}"} = $serialized;
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version{
+   my ($self, at args) = @_;
+   $self->throw("Not implemented it");
+}
+
+sub DESTROY {
+  my $self = shift;
+  unlink $self->file_name unless $self->keep;
+}
+
+
+sub _open_database {
+  my $self = shift;
+  my $file = shift;
+  my $flags = O_CREAT|O_RDWR;
+  my %db;
+  tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE)
+    or $self->throw("Could not open primary index file");
+  $self->{db} = \%db;
+  unlink $file unless $self->keep;
+}
+
+## End of Package
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/embl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/embl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/embl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+#
+# $Id: embl.pm,v 1.13.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB::embl - embl adaptor for Open-bio standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+See Bio::DB::Flat.
+
+=head1 DESCRIPTION
+
+This module allows embl files to be stored in Berkeley DB flat files
+using the Open-Bio standard BDB-indexed flat file scheme.  You should
+not be using this directly, but instead use it via Bio::DB::Flat.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=cut
+
+package Bio::DB::Flat::BDB::embl;
+
+use strict;
+
+use base qw(Bio::DB::Flat::BDB);
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+
+  my $display_id = $seq->display_id;
+  my $accession  = $seq->accession_number;
+  my $version    = $seq->seq_version;
+
+  my %ids;
+  $ids{ID}       = $display_id;
+  $ids{ACC}      = $accession             if defined $accession;
+  $ids{VERSION}  = "$accession.$version"  if defined $accession && defined $version;
+  return \%ids;
+}
+
+sub default_primary_namespace {
+  return "ID";
+}
+
+sub default_secondary_namespaces {
+  return qw(ACC VERSION);
+}
+
+sub default_file_format { "embl" }
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,72 @@
+#
+# $Id: fasta.pm,v 1.9.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB::fasta - fasta adaptor for Open-bio standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+See Bio::DB::Flat.
+
+=head1 DESCRIPTION
+
+This module allows fasta files to be stored in Berkeley DB flat files
+using the Open-Bio standard BDB-indexed flat file scheme.  You should
+not be using this directly, but instead use it via Bio::DB::Flat.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=cut
+
+package Bio::DB::Flat::BDB::fasta;
+
+use strict;
+
+use base qw(Bio::DB::Flat::BDB);
+
+sub default_file_format { "fasta" }
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+  my %ids;
+  $ids{$self->primary_namespace} = $seq->primary_id;
+  \%ids;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/genbank.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/genbank.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/genbank.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+#
+# $Id: genbank.pm,v 1.8.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB::genbank - genbank adaptor for Open-bio standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+See Bio::DB::Flat.
+
+=head1 DESCRIPTION
+
+This module allows genbank files to be stored in Berkeley DB flat files
+using the Open-Bio standard BDB-indexed flat file scheme.  You should
+not be using this directly, but instead use it via Bio::DB::Flat.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=cut
+
+package Bio::DB::Flat::BDB::genbank;
+
+use strict;
+
+use base qw(Bio::DB::Flat::BDB);
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+
+  my $display_id = $seq->display_id;
+  my $accession  = $seq->accession_number;
+  my $version    = $seq->seq_version;
+  my $gi         = $seq->primary_id;
+  my %ids;
+  $ids{ID}       = $display_id;
+  $ids{ACC}      = $accession            if defined $accession;
+  $ids{VERSION}  = "$accession.$version" if defined $accession && defined $version;
+  $ids{GI}       = $gi                   if defined $gi && $gi =~ /^\d+$/;
+  return \%ids;
+}
+
+sub default_primary_namespace {
+  return "ID";
+}
+
+sub default_secondary_namespaces {
+  return qw(ACC GI VERSION);
+}
+
+sub default_file_format { "genbank" }
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swiss.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swiss.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swiss.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+#
+# $Id: swiss.pm,v 1.7.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB::swiss
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB::swiss - swissprot adaptor for Open-bio standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+See Bio::DB::Flat.
+
+=head1 DESCRIPTION
+
+This module allows swissprot files to be stored in Berkeley DB flat files
+using the Open-Bio standard BDB-indexed flat file scheme.  You should
+not be using this directly, but instead use it via Bio::DB::Flat.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=cut
+
+package Bio::DB::Flat::BDB::swiss;
+
+use strict;
+
+use base qw(Bio::DB::Flat::BDB);
+
+sub default_file_format { "swiss" }
+
+sub default_primary_namespace {
+  return "ID";
+}
+
+sub default_secondary_namespaces {
+  return qw(ACC VERSION);
+}
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+
+  my $display_id = $seq->display_id;
+  my $accession  = $seq->accession_number;
+  my $version    = $seq->seq_version;
+  my $gi         = $seq->primary_id;
+  my %ids;
+  $ids{ID}       = $display_id;
+  $ids{ACC}      = $accession              if defined $accession;
+  $ids{VERSION}  = "$accession.$version"   if defined $accession && defined $version;
+  return \%ids;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swissprot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swissprot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB/swissprot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+#
+# $Id: swissprot.pm,v 1.7.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB::swissprot
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB::swissprot - swissprot adaptor for Open-bio standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+See Bio::DB::Flat.
+
+=head1 DESCRIPTION
+
+This module allows swissprot files to be stored in Berkeley DB flat files
+using the Open-Bio standard BDB-indexed flat file scheme.  You should
+not be using this directly, but instead use it via Bio::DB::Flat.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=cut
+
+package Bio::DB::Flat::BDB::swissprot;
+
+use strict;
+
+use base qw(Bio::DB::Flat::BDB);
+
+sub default_file_format { "swiss" }
+
+sub default_primary_namespace {
+  return "ID";
+}
+
+sub default_secondary_namespaces {
+  return qw(ACC VERSION);
+}
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+
+  my $display_id = $seq->display_id;
+  my $accession  = $seq->accession_number;
+  my $version    = $seq->seq_version;
+  my $gi         = $seq->primary_id;
+  my %ids;
+  $ids{ID}       = $display_id;
+  $ids{ACC}      = $accession            if defined $accession;
+  $ids{VERSION}  = "$accession.version"  if defined $accession && defined $version;
+  return \%ids;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BDB.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,473 @@
+#
+# $Id: BDB.pm,v 1.17.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BDB
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BDB - Interface for BioHackathon standard BDB-indexed flat file
+
+=head1 SYNOPSIS
+
+  #You should not be using this module directly.
+
+See L<Bio::DB::Flat>.
+
+=head1 DESCRIPTION
+
+This object provides the basic mechanism to associate positions in
+files with primary and secondary name spaces. Unlike
+Bio::Index::Abstract (see L<Bio::Index::Abstract>), this is specialized
+to work with the BerkeleyDB-indexed "common" flat file format worked
+out at the 2002 BioHackathon.
+
+This object is the guts to the mechanism, which will be used by the
+specific objects inheriting from it.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=head1 SEE ALSO
+
+L<Bio::DB::Flat>,
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal
+methods are usually preceded with an "_" (underscore).
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::Flat::BDB;
+
+use strict;
+use DB_File;
+use IO::File;
+use Fcntl qw(O_CREAT O_RDWR O_RDONLY);
+use File::Spec;
+use Bio::SeqIO;
+use Bio::DB::RandomAccessI;
+use Bio::Root::Root;
+use Bio::Root::IO;
+
+use base qw(Bio::DB::Flat);
+
+sub _initialize {
+  my $self = shift;
+  my ($max_open) = $self->_rearrange(['MAXOPEN'], at _);
+  $self->{bdb_maxopen} = $max_open || 32;
+}
+
+# return a filehandle seeked to the appropriate place
+# this only works with the primary namespace
+sub _get_stream {
+  my ($self,$id) = @_;
+  my ($filepath,$offset,$length) = $self->_lookup_primary($id)
+    or $self->throw("Unable to find a record for $id in the flat file index");
+  my $fh = $self->_fhcache($filepath)
+    or $self->throw("couldn't open $filepath: $!");
+  seek($fh,$offset,0) or $self->throw("can't seek on $filepath: $!");
+  $fh;
+}
+
+# return records corresponding to the indicated index
+# if there are multiple hits will return a list in list context,
+# otherwise will throw an exception
+sub fetch_raw {
+  my ($self,$id,$namespace) = @_;
+
+  # secondary lookup
+  if (defined $namespace && $namespace ne $self->primary_namespace) {
+    my @hits = $self->_lookup_secondary($namespace,$id);
+    $self->throw("Multiple records correspond to $namespace=>$id but function called in a scalar context")
+      unless wantarray;
+    return map {$self->_read_record(@$_)} @hits;
+  }
+
+  # primary lookup
+  my @args = $self->_lookup_primary($id)
+    or $self->throw("Unable to find a record for $id in the flat file index");
+  return $self->_read_record(@args);
+}
+
+# create real live Bio::Seq object
+sub get_Seq_by_id {
+  my $self = shift;
+  my $id   = shift;
+  my $fh   = eval {$self->_get_stream($id)} or return;
+  my $seqio =
+    $self->{bdb_cached_parsers}{fileno $fh} ||= Bio::SeqIO->new( -Format => $self->file_format,
+								 -fh     => $fh);
+  return $seqio->next_seq;
+}
+
+# fetch array of Bio::Seq objects
+sub get_Seq_by_acc {
+  my $self = shift;
+  unshift @_,'ACC' if @_==1;
+  my ($ns,$key) = @_;
+  my @primary_ids = $self->expand_ids($ns => $key);
+  $self->throw("more than one sequences correspond to this accession")
+      if @primary_ids > 1 && ! wantarray;
+  my @rc = map {$self->get_Seq_by_id($_)} @primary_ids;
+  return wantarray ? @rc : $rc[0];
+}
+
+# fetch array of Bio::Seq objects
+sub get_Seq_by_version {
+  my $self = shift;
+  unshift @_,'VERSION' if @_==1;
+  my ($ns,$key) = @_;
+  my @primary_ids = $self->expand_ids($ns => $key);
+  $self->throw("more than one sequences correspond to this accession")
+    if @primary_ids > 1 && !wantarray;
+  my @rc = map {$self->get_Seq_by_id($_)} @primary_ids;
+  return wantarray ? @rc : $rc[0];
+}
+
+=head2 get_PrimarySeq_stream
+
+ Title   : get_PrimarySeq_stream
+ Usage   : $stream = get_PrimarySeq_stream
+ Function: Makes a Bio::DB::SeqStreamI compliant object
+           which provides a single method, next_primary_seq
+ Returns : Bio::DB::SeqStreamI
+ Args    : none
+
+
+=cut
+
+sub get_PrimarySeq_stream {
+  my $self = shift;
+  my @files  = $self->files || 0;
+  my $out = Bio::SeqIO::MultiFile->new( -format => $self->file_format ,
+					-files  => \@files);
+  return $out;
+}
+
+sub get_all_primary_ids {
+  my $self = shift;
+  my $db   = $self->primary_db;
+  return keys %$db;
+}
+
+=head2 get_all_primary_ids
+
+ Title   : get_all_primary_ids
+ Usage   : @ids = $seqdb->get_all_primary_ids()
+ Function: gives an array of all the primary_ids of the
+           sequence objects in the database.
+ Example :
+ Returns : an array of strings
+ Args    : none
+
+=cut
+
+# this will perform an ID lookup on a (possibly secondary)
+# id, returning all the corresponding ids
+sub expand_ids {
+  my $self = shift;
+  my ($ns,$key) = @_;
+  return $key unless defined $ns;
+  return $key if $ns eq $self->primary_namespace;
+  my $db   = $self->secondary_db($ns)
+    or $self->throw("invalid secondary namespace $ns");
+  my $record = $db->{$key} or return;  # nothing there
+  return $self->unpack_secondary($record);
+}
+
+# build index from files listed
+sub build_index {
+  my $self  = shift;
+  my @files = @_;
+  my $count = 0;
+  for my $file (@files) {
+    $file = File::Spec->rel2abs($file)
+      unless File::Spec->file_name_is_absolute($file);
+    $count += $self->_index_file($file);
+  }
+  $self->write_config;
+  $count;
+}
+
+sub _index_file {
+  my $self = shift;
+  my $file = shift;
+
+  my $fileno = $self->_path2fileno($file);
+  defined $fileno or $self->throw("could not create a file number for $file");
+
+  my $fh     = $self->_fhcache($file) or $self->throw("could not open $file for indexing: $!");
+  my $offset = 0;
+  my $count  = 0;
+  while (!eof($fh)) {
+    my ($ids,$adjustment)  = $self->parse_one_record($fh) or next;
+    $adjustment ||= 0;  # prevent uninit variable warning
+    my $pos = tell($fh) + $adjustment;
+    $self->_store_index($ids,$file,$offset,$pos-$offset);
+    $offset = $pos;
+    $count++;
+  }
+  $count;
+}
+
+=head2 To Be Implemented in Subclasses
+
+The following methods MUST be implemented by subclasses.
+
+=cut
+
+=head2 May Be Overridden in Subclasses
+
+The following methods MAY be overridden by subclasses.
+
+=cut
+
+sub default_primary_namespace {
+  return "ACC";
+}
+
+sub default_secondary_namespaces {
+  return;
+}
+
+sub _read_record {
+  my $self = shift;
+  my ($filepath,$offset,$length) = @_;
+  my $fh = $self->_fhcache($filepath)
+    or $self->throw("couldn't open $filepath: $!");
+  seek($fh,$offset,0) or $self->throw("can't seek on $filepath: $!");
+  my $record;
+  read($fh,$record,$length) or $self->throw("can't read $filepath: $!");
+  $record
+}
+
+# return a list in the form ($filepath,$offset,$length)
+sub _lookup_primary {
+  my $self    = shift;
+  my $primary = shift;
+  my $db     = $self->primary_db
+    or $self->throw("no primary namespace database is open");
+
+  my $record = $db->{$primary} or return;  # nothing here
+
+  my($fileid,$offset,$length) = $self->unpack_primary($record);
+  my $filepath = $self->_fileno2path($fileid)
+    or $self->throw("no file path entry for fileid $fileid");
+  return ($filepath,$offset,$length);
+}
+
+# return a list of array refs in the form [$filepath,$offset,$length]
+sub _lookup_secondary {
+  my $self = shift;
+  my ($namespace,$secondary) = @_;
+  my @primary = $self->expand_ids($namespace=>$secondary);
+  return map {[$self->_lookup_primary($_)]} @primary;
+}
+
+# store indexing information into a primary & secondary record
+# $namespaces is one of:
+#     1. a scalar corresponding to the primary name
+#     2. a hashref corresponding to namespace=>id identifiers
+#              it is valid for secondary id to be an arrayref
+sub _store_index {
+  my $self = shift;
+  my ($keys,$filepath,$offset,$length) = @_;
+  my ($primary,%secondary);
+
+  if (ref $keys eq 'HASH') {
+    my %valid_secondary = map {$_=>1} $self->secondary_namespaces;
+    while (my($ns,$value) = each %$keys) {
+      if ($ns eq $self->primary_namespace) {
+	$primary = $value;
+      } else {
+	$valid_secondary{$ns} or $self->throw("invalid secondary namespace $ns");
+	push @{$secondary{$ns}},$value;
+      }
+    }
+    $primary or $self->throw("no primary namespace ID provided");
+  } else {
+    $primary = $keys;
+  }
+
+  $self->throw("invalid primary ID; must be a scalar") 
+    if ref($primary) =~ /^(ARRAY|HASH)$/;  # but allow stringified objects
+
+  $self->_store_primary($primary,$filepath,$offset,$length);
+  for my $ns (keys %secondary) {
+    my @ids = ref $secondary{$ns} ? @{$secondary{$ns}} : $secondary{$ns};
+    $self->_store_secondary($ns,$_,$primary) foreach @ids;
+  }
+
+  1;
+}
+
+# store primary index
+sub _store_primary {
+  my $self = shift;
+  my ($id,$filepath,$offset,$length) = @_;
+
+  my $db = $self->primary_db
+    or $self->throw("no primary namespace database is open");
+  my $fileno = $self->_path2fileno($filepath);
+  defined $fileno or $self->throw("could not create a file number for $filepath");
+
+  my $record = $self->pack_primary($fileno,$offset,$length);
+  $db->{$id} = $record or return;  # nothing here
+  1;
+}
+
+# store a primary index name under a secondary index
+sub _store_secondary {
+  my $self = shift;
+  my ($secondary_ns,$secondary_id,$primary_id) = @_;
+
+  my $db   = $self->secondary_db($secondary_ns)
+    or $self->throw("invalid secondary namespace $secondary_ns");
+
+  # first get whatever secondary ids are already stored there
+  my @primary = $self->unpack_secondary($db->{$secondary_id});
+  # uniqueify
+  my %unique  = map {$_=>undef} @primary,$primary_id;
+
+  my $record = $self->pack_secondary(keys %unique);
+  $db->{$secondary_id} = $record;
+}
+
+# get output file handle
+sub _outfh {
+  my $self = shift;
+#### XXXXX FINISH #####
+#  my $
+}
+
+# unpack a primary record into fileid,offset,length
+sub unpack_primary {
+  my $self = shift;
+  my $index_record = shift;
+  return split "\t",$index_record;
+}
+
+# unpack a secondary record into a list of primary ids
+sub unpack_secondary {
+  my $self = shift;
+  my $index_record = shift or return;
+  return split "\t",$index_record;
+}
+
+# pack a list of fileid,offset,length into a primary id record
+sub pack_primary {
+  my $self = shift;
+  my ($fileid,$offset,$length) = @_;
+  return join "\t",($fileid,$offset,$length);
+}
+
+# pack a list of primary ids into a secondary id record
+sub pack_secondary {
+  my $self = shift;
+  my @secondaries = @_;
+  return join "\t", at secondaries;
+}
+
+sub primary_db {
+  my $self = shift;
+  # lazy opening
+  $self->_open_bdb unless exists $self->{bdb_primary_db};
+  return $self->{bdb_primary_db};
+}
+
+sub secondary_db {
+  my $self = shift;
+  my $secondary_namespace = shift
+    or $self->throw("usage: secondary_db(\$secondary_namespace)");
+  $self->_open_bdb unless exists $self->{bdb_primary_db};
+  return $self->{bdb_secondary_db}{$secondary_namespace};
+}
+
+sub _open_bdb {
+  my $self = shift;
+
+  my $flags = $self->write_flag ? O_CREAT|O_RDWR : O_RDONLY;
+
+  my $primary_db = {};
+  tie(%$primary_db,'DB_File',$self->_catfile($self->_primary_db_name),$flags,0666,$DB_BTREE)
+    or $self->throw("Could not open primary index file: $! (did you remember to use -write_flag=>1?)");
+  $self->{bdb_primary_db} = $primary_db;
+
+  for my $secondary ($self->secondary_namespaces) {
+    my $secondary_db = {};
+    tie(%$secondary_db,'DB_File',$self->_catfile($self->_secondary_db_name($secondary)),$flags,0666,$DB_BTREE)
+      or $self->throw("Could not open primary index file");
+    $self->{bdb_secondary_db}{$secondary} = $secondary_db;
+  }
+
+  1;
+}
+
+sub _primary_db_name {
+  my $self = shift;
+  my $pns  = $self->primary_namespace or $self->throw('no primary namespace defined');
+  return "key_$pns";
+}
+
+sub _secondary_db_name {
+  my $self  = shift;
+  my $sns   = shift;
+  return "id_$sns";
+}
+
+sub _fhcache {
+  my $self  = shift;
+  my $path  = shift;
+  my $write = shift;
+
+  if (!$self->{bdb_fhcache}{$path}) {
+    $self->{bdb_curopen} ||= 0;
+    if ($self->{bdb_curopen} >= $self->{bdb_maxopen}) {
+      my @lru = sort {$self->{bdb_cacheseq}{$a} <=> $self->{bdb_cacheseq}{$b};} keys %{$self->{bdb_fhcache}};
+      splice(@lru, $self->{bdb_maxopen} / 3);
+      $self->{bdb_curopen} -= @lru;
+      for (@lru) { delete $self->{bdb_fhcache}{$_} }
+    }
+    if ($write) {
+      my $modifier = $self->{bdb_fhcache_seenit}{$path}++ ? '>' : '>>';
+      $self->{bdb_fhcache}{$path} = IO::File->new("${modifier}${path}") or return;
+    } else {
+      $self->{bdb_fhcache}{$path} = IO::File->new($path) or return;
+    }
+    $self->{bdb_curopen}++;
+  }
+  $self->{bdb_cacheseq}{$path}++;
+  $self->{bdb_fhcache}{$path}
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BinarySearch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BinarySearch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat/BinarySearch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1790 @@
+# $Id: BinarySearch.pm,v 1.23.4.1 2006/10/02 23:10:16 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat::BinarySearch
+#
+# Cared for by Michele Clamp <michele at sanger.ac.uk>>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat::BinarySearch - BinarySearch search indexing system for sequence files
+
+=head1 SYNOPSIS
+
+BinarySearch search indexing system for sequence files
+
+=head1 DESCRIPTION
+
+This module can be used both to index sequence files and also to
+retrieve sequences from existing sequence files.
+
+This object allows indexing of sequence files both by a primary key
+(say accession) and multiple secondary keys (say ids).  This is
+different from the Bio::Index::Abstract (see L<Bio::Index::Abstract>)
+which uses DBM files as storage.  This module uses a binary search to
+retrieve sequences which is more efficient for large datasets.
+
+=head2 Index creation
+
+    my $sequencefile;  # Some fasta sequence file
+
+Patterns have to be entered to define where the keys are to be indexed
+and also where the start of each record.  E.g. for fasta
+
+    my $start_pattern   = '^>';
+    my $primary_pattern = '^>(\S+)';
+
+So the start of a record is a line starting with a E<gt> and the
+primary key is all characters up to the first space after the E<gt>
+
+A string also has to be entered to defined what the primary key
+(primary_namespace) is called.
+
+The index can now be created using 
+
+    my $index = new Bio::DB::Flat::BinarySearch(
+             -directory         => "/home/max/",
+             -dbname            => "mydb",
+	          -start_pattern     => $start_pattern,
+	          -primary_pattern   => $primary_pattern,
+             -primary_namespace => "ID",
+	          -format            => "fasta" );
+
+    my @files = ("file1","file2","file3");
+
+    $index->build_index(@files);
+
+The index is now ready to use.  For large sequence files the perl way
+of indexing takes a *long* time and a *huge* amount of memory.  For
+indexing things like dbEST I recommend using the DB_File indexer, BDB.
+
+The formats currently supported by this module are fasta, Swissprot,
+and EMBL.
+
+=head2 Creating indices with secondary keys
+
+Sometimes just indexing files with one id per entry is not enough.  For
+instance you may want to retrieve sequences from swissprot using
+their accessions as well as their ids.
+
+To be able to do this when creating your index you need to pass in 
+a hash of secondary_patterns which have their namespaces as the keys
+to the hash.
+
+e.g. For Indexing something like
+
+ID   1433_CAEEL     STANDARD;      PRT;   248 AA.
+AC   P41932;
+DT   01-NOV-1995 (Rel. 32, Created)
+DT   01-NOV-1995 (Rel. 32, Last sequence update)
+DT   15-DEC-1998 (Rel. 37, Last annotation update)
+DE   14-3-3-LIKE PROTEIN 1.
+GN   FTT-1 OR M117.2.
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+
+where we want to index the accession (P41932) as the primary key and the
+id (1433_CAEEL) as the secondary id.  The index is created as follows
+
+    my %secondary_patterns;
+
+    my $start_pattern   = '^ID   (\S+)';
+    my $primary_pattern = '^AC   (\S+)\;';
+
+    $secondary_patterns{"ID"} = '^ID   (\S+)';
+
+    my $index = new Bio::DB::Flat::BinarySearch(
+                -directory          => $index_directory,
+		          -dbname             => "ppp",
+		          -write_flag         => 1,
+                -verbose            => 1,
+                -start_pattern      => $start_pattern,
+                -primary_pattern    => $primary_pattern,
+                -primary_namespace  => 'AC',
+                -secondary_patterns => \%secondary_patterns);
+
+    $index->build_index($seqfile);
+
+Of course having secondary indices makes indexing slower and use more
+memory.
+
+=head2 Index reading
+
+To fetch sequences using an existing index first of all create your sequence 
+object 
+
+    my $index = new Bio::DB::Flat::BinarySearch(
+                  -directory => $index_directory);
+
+Now you can happily fetch sequences either by the primary key or
+by the secondary keys.
+
+    my $entry = $index->get_entry_by_id('HBA_HUMAN');
+
+This returns just a string containing the whole entry.  This is
+useful is you just want to print the sequence to screen or write it to a file.
+
+Other ways of getting sequences are
+
+    my $fh = $index->get_stream_by_id('HBA_HUMAN');
+
+This can then be passed to a seqio object for output or converting
+into objects.
+
+    my $seq = new Bio::SeqIO(-fh     => $fh,
+			                    -format => 'fasta');
+
+The last way is to retrieve a sequence directly.  This is the
+slowest way of extracting as the sequence objects need to be made.
+
+    my $seq = $index->get_Seq_by_id('HBA_HUMAN');
+
+To access the secondary indices the secondary namespace needs to be known
+
+    $index->secondary_namespaces("ID");
+
+Then the following call can be used
+
+    my $seq   = $index->get_Seq_by_secondary('ID','1433_CAEEL');
+
+These calls are not yet implemented
+
+    my $fh    = $index->get_stream_by_secondary('ID','1433_CAEEL');
+    my $entry = $index->get_entry_by_secondary('ID','1433_CAEEL');
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Michele Clamp
+
+Email - michele at sanger.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal
+methods are usually preceded with an "_" (underscore).
+
+=cut
+
+package Bio::DB::Flat::BinarySearch;
+
+use strict;
+
+use Fcntl qw(SEEK_END SEEK_CUR);
+# rather than using tell which might be buffered
+sub systell{ sysseek($_[0], 0, SEEK_CUR) }
+sub syseof{ sysseek($_[0], 0, SEEK_END) }
+
+use Bio::Root::RootI;
+use Bio::SeqIO;
+use Bio::Seq;
+
+use base qw(Bio::DB::RandomAccessI);
+
+use constant CONFIG_FILE_NAME => 'config.dat';
+use constant HEADER_SIZE      => 4;
+use constant DEFAULT_FORMAT   => 'fasta';
+my @formats = ['FASTA','SWISSPROT','EMBL'];
+
+=head2 new
+
+ Title   : new
+ Usage   : For reading 
+             my $index = new Bio::DB::Flat::BinarySearch(
+                     -directory => '/Users/michele/indices/dbest',
+		     -dbname    => 'mydb',
+                     -format    => 'fasta');
+
+           For writing 
+
+             my %secondary_patterns = {"ACC" => "^>\\S+ +(\\S+)"}
+             my $index = new Bio::DB::Flat::BinarySearch(
+		     -directory          => '/Users/michele/indices',
+                     -dbname             => 'mydb',
+		     -primary_pattern    => "^>(\\S+)",
+                     -secondary_patterns => \%secondary_patterns,
+		     -primary_namespace  => "ID");
+
+             my @files = ('file1','file2','file3');
+
+             $index->build_index(@files);
+
+
+ Function: create a new Bio::DB::Flat::BinarySearch object
+ Returns : new Bio::DB::Flat::BinarySearch
+ Args    : -directory          Root directory for index files
+           -dbname             Name of subdirectory containing indices 
+                               for named database
+           -write_flag         Allow building index
+           -primary_pattern    Regexp defining the primary id
+           -secondary_patterns A hash ref containing the secondary
+                               patterns with the namespaces as keys
+           -primary_namespace  A string defining what the primary key
+                               is
+
+ Status  : Public
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    bless $self, $class;
+
+    my ($index_dir,$dbname,$format,$write_flag,$primary_pattern,
+	$primary_namespace,$start_pattern,$secondary_patterns) =
+	    $self->_rearrange([qw(DIRECTORY
+				  DBNAME
+				  FORMAT
+				  WRITE_FLAG
+				  PRIMARY_PATTERN
+				  PRIMARY_NAMESPACE
+				  START_PATTERN
+				  SECONDARY_PATTERNS)], @args);
+
+    $self->index_directory($index_dir);
+    $self->dbname($dbname);
+
+    if ($self->index_directory && $self->read_config_file) {
+	
+	my $fh = $self->primary_index_filehandle;
+        my $record_width = $self->read_header($fh);
+        $self->record_size($record_width);
+    }
+    $format ||= DEFAULT_FORMAT;
+    $self->format            ($format);
+    $self->write_flag        ($write_flag);
+
+    if ($self->write_flag && ! $primary_namespace) {
+      ($primary_namespace,$primary_pattern,
+       $start_pattern,$secondary_patterns) =
+	$self->_guess_patterns($self->format);
+    }
+
+    $self->primary_pattern   ($primary_pattern);
+    $self->primary_namespace ($primary_namespace);
+    $self->start_pattern     ($start_pattern);
+    $self->secondary_patterns($secondary_patterns);
+
+    return $self;
+}
+
+sub new_from_registry {
+    my ($self,%config) =  @_;
+   
+    my $dbname   = $config{'dbname'};
+    my $location = $config{'location'};
+    
+    my $index =  new Bio::DB::Flat::BinarySearch(-dbname    => $dbname,
+						 -index_dir => $location,
+						 );
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $obj->get_Seq_by_id($newval)
+ Function: 
+ Example : 
+ Returns : value of get_Seq_by_id
+ Args    : newvalue (optional)
+
+=cut
+
+sub get_Seq_by_id {
+    my ($self,$id) = @_;
+
+    # too many uninit variables...
+    local $^W = 0;
+
+    my ($fh,$length) = $self->get_stream_by_id($id);
+
+    unless (defined($self->format)) {
+	$self->throw("Can't create sequence - format is not defined");
+    }
+
+    return unless $fh;
+
+    unless ( defined($self->{_seqio}) ) {
+
+	$self->{_seqio} = new Bio::SeqIO(-fh => $fh,
+					 -format => $self->format);
+    } else {
+	$self->{_seqio}->fh($fh);
+    }
+
+    return $self->{_seqio}->next_seq;
+}
+
+=head2 get_entry_by_id
+
+ Title   : get_entry_by_id
+ Usage   : $obj->get_entry_by_id($newval)
+ Function: Get a Bio::SeqI object for a unique ID
+ Returns : Bio::SeqI
+ Args    : string
+
+
+=cut
+
+sub get_entry_by_id {
+    my ($self,$id) = @_;
+
+    my ($fh,$length) = $self->get_stream_by_id($id);
+
+    my $entry;
+
+    sysread($fh,$entry,$length);
+
+    return $entry;
+}
+
+
+=head2 get_stream_by_id
+
+ Title   : get_stream_by_id
+ Usage   : $obj->get_stream_by_id($id)
+ Function: Gets a Sequence stream for an id
+ Returns : Bio::SeqIO stream
+ Args    : Id to lookup by
+
+
+=cut
+
+sub get_stream_by_id {
+    my ($self,$id) = @_;
+
+    unless( $self->record_size ) {
+	if ($self->index_directory && $self->read_config_file) {
+	    
+	    my $fh = $self->primary_index_filehandle;
+	    my $record_width = $self->read_header($fh);
+	    $self->record_size($record_width);
+	}
+    }
+    my $indexfh = $self->primary_index_filehandle;
+    syseof ($indexfh);
+
+    my $filesize = systell($indexfh);
+    
+    $self->throw("file was not parsed properly, record size is empty") 
+	unless $self->record_size;
+    
+    my $end = ($filesize - $self->{'_start_pos'}) / $self->record_size;
+    my ($newid,$rest,$fhpos) = $self->find_entry($indexfh,0,$end,$id,$self->record_size);
+
+    
+    my ($fileid,$pos,$length) = split(/\t/,$rest);
+
+    #print STDERR "BinarySearch Found id entry $newid $fileid $pos $length:$rest\n";
+
+    if (!$newid) {
+      return;
+    }
+
+    my $fh = $self->get_filehandle_by_fileid($fileid);
+    my $file = $self->{_file}{$fileid};
+
+    open (my $IN,"<$file");
+
+    my $entry;
+    
+    sysseek($IN,$pos,0);
+
+    return ($IN,$length);
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $obj->get_Seq_by_acc($acc)
+ Function: Gets a Bio::SeqI object by accession number
+ Returns : Bio::SeqI object
+ Args    : string representing accession number
+
+
+=cut
+
+sub get_Seq_by_acc {
+    my ($self,$acc) = @_;
+
+    # too many uninit variables...
+    local $^W = 0;
+
+    if ($self->primary_namespace eq "ACC") {
+       return $self->get_Seq_by_id($acc);
+    } else {
+      return $self->get_Seq_by_secondary("ACC",$acc);
+    }
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $obj->get_Seq_by_version($version)
+ Function: Gets a Bio::SeqI object by accession.version number
+ Returns : Bio::SeqI object
+ Args    : string representing accession.version number
+
+
+=cut
+
+sub get_Seq_by_version {
+    my ($self,$acc) = @_;
+
+    # too many uninit variables...
+    local $^W = 0;
+
+    if ($self->primary_namespace eq "VERSION") {
+       return $self->get_Seq_by_id($acc);
+    } else {
+      return $self->get_Seq_by_secondary("VERSION",$acc);
+    }
+}
+
+=head2 get_Seq_by_secondary
+
+ Title   : get_Seq_by_secondary
+ Usage   : $obj->get_Seq_by_secondary($namespace,$acc)
+ Function: Gets a Bio::SeqI object looking up secondary accessions
+ Returns : Bio::SeqI object
+ Args    : namespace name to check secondary namespace and an id
+
+
+=cut
+
+sub get_Seq_by_secondary {
+    my ($self,$name,$id) = @_;
+
+    my @names = $self->secondary_namespaces;
+
+    my $found = 0;
+    foreach my $tmpname (@names) {
+	if ($name eq $tmpname) {
+	    $found = 1;
+	}
+    }
+
+    if ($found == 0) {
+	$self->throw("Secondary index for $name doesn't exist\n");
+    }
+
+    my $fh = $self->open_secondary_index($name);
+
+    syseof ($fh);
+
+    my $filesize = systell($fh);
+
+    my $recsize = $self->{'_secondary_record_size'}{$name};
+#    print "Name " . $recsize . "\n";
+
+    my $end = ($filesize - $self->{'_start_pos'})/$recsize;
+
+#    print "End $end $filesize\n";
+    my ($newid,$primary_id,$pos) = $self->find_entry($fh,0,$end,$id,$recsize);
+
+    sysseek($fh,$pos,0);
+
+#    print "Found new id $newid $primary_id\n";    
+    # We now need to shuffle up the index file to find the top secondary entry
+
+    my $record = $newid;
+
+    while ($record =~ /^$newid/ && $pos >= 0) {
+
+	$record = $self->read_record($fh,$pos,$recsize);
+	$pos = $pos - $recsize;
+#	print "Up record = $record:$newid\n";
+    }
+
+    $pos += $recsize;
+
+#    print "Top position is $pos\n";
+
+    # Now we have to shuffle back down again to read all the secondary entries
+
+    my $current_id = $newid;
+    my %primary_id;
+
+    $primary_id{$primary_id} = 1;
+
+    while ($current_id eq $newid) {
+	$record = $self->read_record($fh,$pos,$recsize);
+	# print "Record is :$record:\n";
+	my ($secid,$primary_id) = split(/\t/,$record,2);
+	$current_id = $secid;
+
+	if ($current_id eq $newid) {
+	    $primary_id =~ s/ //g;
+	#    print "Primary $primary_id\n";
+	    $primary_id{$primary_id} = 1;
+
+	    $pos = $pos + $recsize;
+	 #   print "Down record = $record\n";
+	}
+    }
+
+    if (!defined($newid)) {
+      return;
+    }
+
+    my @entry;
+
+    foreach my $id (keys %primary_id) {
+      push @entry,$self->get_Seq_by_id($id);
+    }
+    return wantarray ? @entry : $entry[0];
+
+}
+
+=head2 read_header
+
+ Title   : read_header
+ Usage   : $obj->read_header($fhl)
+ Function: Reads the header from the db file
+ Returns : width of a record
+ Args    : filehandle
+
+
+=cut
+
+sub read_header {
+    my ($self,$fh) = @_;
+
+    my $record_width;
+
+    sysread($fh,$record_width,HEADER_SIZE);
+
+    $self->{'_start_pos'} = HEADER_SIZE;
+    $record_width =~ s/ //g;
+    $record_width = $record_width * 1;
+
+    return $record_width;
+}
+
+=head2 read_record
+
+ Title   : read_record
+ Usage   : $obj->read_record($fh,$pos,$len)
+ Function: Reads a record from a filehandle
+ Returns : String
+ Args    : filehandle, offset, and length
+
+
+=cut
+
+sub read_record {
+  my ($self,$fh,$pos,$len) = @_;
+
+  sysseek($fh,$pos,0);
+
+  my $record;
+    
+  sysread($fh,$record,$len);
+
+  return $record;
+
+}
+
+=head2 get_all_primary_ids
+
+ Title   : get_all_primary_ids
+ Usage   : @ids = $seqdb->get_all_primary_ids()
+ Function: gives an array of all the primary_ids of the
+           sequence objects in the database.
+ Returns : an array of strings
+ Args    : none
+
+=cut
+
+sub get_all_primary_ids {
+  my $self = shift;
+
+  my $fh = $self->primary_index_filehandle;
+  syseof($fh);
+  my $filesize = systell($fh);
+  my $recsize  = $self->record_size;
+  my $end = $filesize;
+
+  my @ids;
+  for (my $pos=$self->{'_start_pos'}; $pos < $end; $pos += $recsize) {
+    my $record = $self->read_record($fh,$pos,$recsize);
+    my ($entryid)  = split(/\t/,$record);
+    push @ids,$entryid;
+  }
+  @ids;
+}
+
+
+=head2 find_entry
+
+ Title   : find_entry
+ Usage   : $obj->find_entry($fh,$start,$end,$id,$recsize)
+ Function: Extract an entry based on the start,end,id and record size
+ Returns : string
+ Args    : filehandle, start, end, id, recordsize
+
+
+=cut
+
+sub find_entry {
+    my ($self,$fh,$start,$end,$id,$recsize) = @_;
+    
+    my $mid = int( ($end+1+$start) / 2);
+    my $pos = ($mid-1)*$recsize + $self->{'_start_pos'};
+    
+    my ($record) = $self->read_record($fh,$pos,$recsize);
+    my ($entryid,$rest)  = split(/\t/,$record,2);
+    $rest =~ s/\s+$//;
+
+#    print "Mid $recsize $mid $pos:$entryid:$rest:$record\n";
+#    print "Entry :$id:$entryid:$rest\n";
+
+    my ($first,$second) = $id le $entryid ? ($id,$entryid) : ($entryid,$id);
+
+    if ($id eq $entryid) {
+
+      return ($id,$rest,$pos-$recsize);
+
+    } elsif ($first eq $id) {
+	
+      if ($end-$start <= 1) {
+	return;
+      }
+      my $end = $mid;
+#      print "Moving up $entryid $id\n";
+      $self->find_entry($fh,$start,$end,$id,$recsize);
+
+    } elsif ($second eq $id ) {
+#	print "Moving down $entryid $id\n";
+      if ($end-$start <= 1) {
+	return;
+      }
+
+      $start = $mid;
+      
+      $self->find_entry($fh,$start,$end,$id,$recsize);
+    }
+
+ }   
+
+
+=head2 build_index
+
+ Title   : build_index
+ Usage   : $obj->build_index(@files)
+ Function: Build the index based on a set of files
+ Returns : count of the number of entries
+ Args    : List of filenames
+
+
+=cut
+
+sub build_index {
+    my ($self, at files) = @_;
+    $self->write_flag or 
+	$self->throw('Cannot build index unless -write_flag is true');
+
+    my $rootdir = $self->index_directory;
+
+    if (!defined($rootdir)) {
+	$self->throw("No index directory set - can't build indices");
+    }
+
+    if (! -d $rootdir) {
+	$self->throw("Index directory [$rootdir] is not a directory. Cant' build indices");
+    }
+
+    my $dbpath = Bio::Root::IO->catfile($rootdir,$self->dbname);
+    if (! -d $dbpath) {
+      warn "Creating directory $dbpath\n";
+      mkdir $dbpath,0777 or $self->throw("Couldn't create $dbpath: $!");
+    }
+
+    unless (@files ) {
+	$self->throw("Must enter an array of filenames to index");
+    }
+
+    foreach my $file (@files) {
+	$file = File::Spec->rel2abs($file)
+	    unless File::Spec->file_name_is_absolute($file);
+	unless ( -e $file) {
+	    $self->throw("Can't index file [$file] as it doesn't exist");
+	}
+    }
+    
+    if (my $filehash = $self->{_dbfile}) {
+      push @files,keys %$filehash;
+    }
+
+    my %seen;
+    @files = grep {!$seen{$_}++} @files;
+
+    # Lets index
+    $self->make_config_file(\@files);
+    my $entries = 0;
+    foreach my $file (@files) {
+      $entries += $self->_index_file($file);
+    }
+
+    # update alphabet if necessary
+    $self->make_config_file(\@files);
+
+    # And finally write out the indices
+    $self->write_primary_index;
+    $self->write_secondary_indices;
+
+    $entries;
+}
+
+=head2 _index_file
+
+ Title   : _index_file
+ Usage   : $obj->_index_file($newval)
+ Function: 
+ Example : 
+ Returns : value of _index_file
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _index_file {
+    my ($self,$file) = @_;
+    my $v = $self->verbose;
+    open(my $FILE,"<", $file) || $self->throw("Can't open file [$file]");
+
+    my $recstart = 0;
+    my $fileid = $self->get_fileid_by_filename($file);
+    my $found = 0;
+    my $id;
+    my $count = 0;
+
+    my $primary       = $self->primary_pattern;
+    my $start_pattern = $self->start_pattern;
+
+    my $pos = 0;
+
+    my $new_primary_entry;
+
+    my $length;
+    #my $pos = 0;
+    my $fh = $FILE;
+
+    my $done = -1;
+
+    my @secondary_names = $self->secondary_namespaces;
+    my %secondary_id;
+    my $last_one;
+
+    while (<$fh>) {
+      $last_one = $_;
+      $self->{alphabet} ||= $self->guess_alphabet($_);		
+      if ($_ =~ /$start_pattern/) {
+	 if ($done == 0) {
+	    $id = $new_primary_entry;
+	    $self->{alphabet} ||= $self->guess_alphabet($_);
+	  
+	  my $tmplen = (tell $fh) - length($_);
+
+	  $length = $tmplen  - $pos;
+		
+	  unless( defined($id)) {
+	    $self->throw("No id defined for sequence");
+	  }
+	  unless( defined($fileid)) {
+	    $self->throw("No fileid defined for file $file");
+	  }
+	  unless( defined($pos)) {
+	    $self->throw("No position defined for " . $id . "\n");
+	  }
+	  unless( defined($length)) {
+	    $self->throw("No length defined for " . $id . "\n");
+	  }
+	  $self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
+
+	  $pos   = $tmplen;
+		
+	  if ($count > 0 && $count%1000 == 0) {
+	    $self->debug( "Indexed $count ids\n") if $v > 0;
+	  }
+	    
+	  $count++;
+	} else {
+	  $done = 0;
+	}
+      }
+
+      if ($_ =~ /$primary/) {
+	$new_primary_entry = $1;    
+      }
+
+      my $secondary_patterns = $self->secondary_patterns;
+
+      foreach my $sec (@secondary_names) {
+	my $pattern = $secondary_patterns->{$sec};
+
+	if ($_ =~ /$pattern/) {
+	  $secondary_id{$sec} = $1;
+	}
+      }
+	
+    }
+
+    # Remember to add in the last one
+
+    $id = $new_primary_entry;
+    my $tmplen = (tell $fh) - length($last_one);
+
+    $length = $tmplen  - $pos;
+    
+    if (!defined($id)) {
+	$self->throw("No id defined for sequence");
+    }
+    if (!defined($fileid)) {
+	$self->throw("No fileid defined for file $file");
+    }
+    if (!defined($pos)) {
+	$self->throw("No position defined for " . $id . "\n");
+    }
+    if (!defined($length)) {
+	$self->throw("No length defined for " . $id . "\n");
+    }
+    
+    $self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
+    $count++;
+    
+    close(FILE);
+    $count;
+}
+
+=head2 write_primary_index
+
+ Title   : write_primary_index
+ Usage   : $obj->write_primary_index($newval)
+ Function: 
+ Example : 
+ Returns : value of write_primary_index
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub write_primary_index {
+    my ($self) = @_;
+
+    my @ids = keys %{$self->{_id}};
+
+    @ids = sort {$a cmp $b} @ids;
+
+    open (my $INDEX,">" . $self->primary_index_file) || 
+	$self->throw("Can't open primary index file [" . 
+		     $self->primary_index_file . "]");
+
+    my $recordlength = $self->{_maxidlength} +
+	               $self->{_maxfileidlength} + 
+	               $self->{_maxposlength} +
+   		       $self->{_maxlengthlength} + 3;
+	
+    
+    print $INDEX sprintf("%4d",$recordlength);
+
+    foreach my $id (@ids) {
+
+	if (!defined($self->{_id}{$id}{_fileid})) {
+	    $self->throw("No fileid for $id\n");
+	}
+	if (!defined($self->{_id}{$id}{_pos})) {
+	    $self->throw("No position for $id\n");
+	}
+	if (!defined($self->{_id}{$id}{_length})) {
+	    $self->throw("No length for $id");
+	}
+
+	my $record =  $id              . "\t" . 
+	    $self->{_id}{$id}{_fileid} . "\t" .
+	    $self->{_id}{$id}{_pos}    . "\t" .
+	    $self->{_id}{$id}{_length};
+
+	print $INDEX sprintf("%-${recordlength}s",$record);
+
+    }
+}
+
+=head2 write_secondary_indices
+
+ Title   : write_secondary_indices
+ Usage   : $obj->write_secondary_indices($newval)
+ Function: 
+ Example : 
+ Returns : value of write_secondary_indices
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub write_secondary_indices {
+    my ($self) = @_;
+
+    # These are the different 
+    my @names = keys (%{$self->{_secondary_id}});
+
+    
+    foreach my $name (@names) {
+
+	my @seconds = keys %{$self->{_secondary_id}{$name}};
+
+	# First we need to loop over to get the longest record.
+	my $length = 0;
+
+	foreach my $second (@seconds) {
+	    my $tmplen = length($second) + 1;
+	    my @prims = keys %{$self->{_secondary_id}{$name}{$second}};
+
+	    foreach my $prim (@prims) {
+		my $recordlen = $tmplen + length($prim);
+	    
+		if ($recordlen > $length) {
+		    $length = $recordlen;
+		}
+	    }
+	}
+
+	# Now we can print the index
+	
+	my $fh = $self->new_secondary_filehandle($name);	
+
+	print $fh sprintf("%4d",$length);
+	@seconds = sort @seconds;
+	
+	foreach my $second (@seconds) {
+
+	    my @prims = keys %{$self->{_secondary_id}{$name}{$second}};
+	    my $tmp = $second;
+
+	    foreach my $prim (@prims) {
+		my $record = $tmp . "\t" . $prim;
+		if (length($record) > $length) {
+		    $self->throw("Something has gone horribly wrong - length of record is more than we thought [$length]\n");
+		} else {
+		    print $fh sprintf("%-${length}s",$record);
+		    print $fh sprintf("%-${length}s",$record);
+		}
+	    }
+	}
+		
+	close($fh);
+    }
+}
+
+=head2 new_secondary_filehandle
+
+ Title   : new_secondary_filehandle
+ Usage   : $obj->new_secondary_filehandle($newval)
+ Function: 
+ Example : 
+ Returns : value of new_secondary_filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub new_secondary_filehandle {
+    my ($self,$name) = @_;
+
+    my $indexdir = $self->_config_path;
+
+    my $secindex = Bio::Root::IO->catfile($indexdir,"id_$name.index");
+
+    open(my $fh,">", $secindex) || $self->throw($!);
+    return $fh;
+}
+
+=head2 open_secondary_index
+
+ Title   : open_secondary_index
+ Usage   : $obj->open_secondary_index($newval)
+ Function: 
+ Example : 
+ Returns : value of open_secondary_index
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub open_secondary_index {
+    my ($self,$name) = @_;
+
+    if (!defined($self->{_secondary_filehandle}{$name})) {
+
+	my $indexdir = $self->_config_path;
+	my $secindex = $indexdir . "/id_$name.index";
+	
+	if (! -e $secindex) {
+	    $self->throw("Index is not present for namespace [$name]\n");
+	}
+
+	open(my $newfh,"<", $secindex) || $self->throw($!);
+	my $reclen = $self->read_header($newfh);
+
+	$self->{_secondary_filehandle} {$name} = $newfh;
+	$self->{_secondary_record_size}{$name} = $reclen;
+    }
+
+    return $self->{_secondary_filehandle}{$name};
+
+}
+
+=head2 _add_id_position
+
+ Title   : _add_id_position
+ Usage   : $obj->_add_id_position($newval)
+ Function: 
+ Example : 
+ Returns : value of _add_id_position
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _add_id_position {
+	my ($self,$id,$pos,$fileid,$length,$secondary_id) = @_;
+
+	if (!defined($id)) {
+		$self->throw("No id defined. Can't add id position");
+	}
+	if (!defined($pos)) {
+		$self->throw("No position defined. Can't add id position");
+	}
+	if ( ! defined($fileid)) {
+		$self->throw("No fileid defined. Can't add id position");
+	}
+	if (! defined($length) || $length <= 0) {
+		$self->throw("No length defined or <= 0 [$length]. Can't add id position");
+	}
+
+	$self->{_id}{$id}{_pos}    = $pos;
+	$self->{_id}{$id}{_length} = $length;
+	$self->{_id}{$id}{_fileid} = $fileid;
+
+	# Now the secondary ids
+
+	foreach my $sec (keys (%$secondary_id)) {
+		my $value = $secondary_id->{$sec};
+		$self->{_secondary_id}{$sec}{$value}{$id} = 1;
+	}
+
+	$self->{_maxidlength} = length($id)
+	  if !exists $self->{_maxidlength} or 
+		 length($id) >= $self->{_maxidlength};
+
+	$self->{_maxfileidlength} = length($fileid)
+	  if !exists $self->{_maxfileidlength} or 
+		 length($fileid) >= $self->{_maxfileidlength};
+
+	$self->{_maxposlength} = length($pos)
+	  if !exists $self->{_maxposlength} or 
+		 length($pos) >= $self->{_maxposlength};
+
+	$self->{_maxlengthlength} = length($length)
+	  if !exists $self->{_maxlengthlength} or 
+		 length($length) >= $self->{_maxlengthlength};
+}
+
+=head2 make_config_file
+
+ Title   : make_config_file
+ Usage   : $obj->make_config_file($newval)
+ Function: 
+ Example : 
+ Returns : value of make_config_file
+ Args    : newvalue (optional)
+
+=cut
+
+sub make_config_file {
+    my ($self,$files) = @_;
+    
+    my @files = @$files;
+
+    my $configfile = $self->_config_file;
+
+    open(my $CON,">", $configfile) || $self->throw("Can't create config file [$configfile]");
+
+    # First line must be the type of index - in this case flat
+    print $CON "index\tflat/1\n";
+
+    # Now the fileids
+
+    my $count = 0;
+
+    foreach my $file (@files) {
+
+	my $size = -s $file;
+
+	print $CON "fileid_$count\t$file\t$size\n";
+
+	my $fh;
+	open($fh,"<", $file) || $self->throw($!);
+	$self->{_fileid}{$count}   = $fh;
+	$self->{_file}  {$count}   = $file;
+	$self->{_dbfile}{$file}    = $count;
+	$self->{_size}{$count}     = $size; 
+	
+	$count++;
+    }
+
+    # Now the namespaces
+
+    print $CON "primary_namespace\t" .$self->primary_namespace. "\n";
+    
+    # Needs fixing for the secondary stuff
+
+    my $second_patterns = $self->secondary_patterns;
+
+    my @second = keys %$second_patterns;
+
+    if ((@second))  {
+	print $CON "secondary_namespaces";
+
+	foreach my $second (@second) {
+	    print $CON "\t$second";
+	}
+        print $CON "\n";
+    }
+
+    # Now the config format
+
+    unless (defined ($self->format) ) {
+	$self->throw("Format does not exist in module - can't write config file");
+    } else {
+	my $format = $self->format;
+	my $alphabet = $self->alphabet;
+	my $alpha    = $alphabet ? "/$alphabet" : '';
+	print $CON "format\t" . "$format\n";
+     }
+    close($CON);
+}
+
+=head2 read_config_file
+
+ Title   : read_config_file
+ Usage   : $obj->read_config_file($newval)
+ Function: 
+ Example : 
+ Returns : value of read_config_file
+ Args    : newvalue (optional)
+
+=cut
+
+sub read_config_file {
+	my ($self) = @_;
+	my $configfile = $self->_config_file;
+	return unless -e $configfile;
+
+	open(my $CON,"<", $configfile) || $self->throw("Can't open configfile [$configfile]");
+
+	# First line must be type
+	my $line = <$CON>; 
+	chomp($line);
+	my $version;
+
+	# This is hard coded as we only index flatfiles here
+	if ($line =~ m{index\tflat/(\d+)}) {
+		$version = $1;
+	} else {
+		$self->throw("First line not compatible with flat file index.  Should be something like\n\nindex\tflat/1");
+	}
+
+	$self->index_type("flat");
+	$self->index_version($version);
+
+	while (<$CON>) {
+		chomp;
+
+		# Look for fileid lines
+		if ($_ =~ /^fileid_(\d+)\t(\S+)\t(\d+)/) {
+			my $fileid   = $1;
+			my $filename = $2;
+			my $filesize = $3;
+	    
+			if (! -e $filename) {
+				$self->throw("File [$filename] does not exist!");
+			}
+			if (-s $filename != $filesize) {
+				$self->throw("Flatfile size for $filename differs from what the index thinks it is. Real size [" . (-s $filename) . "] Index thinks it is [" . $filesize  . "]");
+			}
+		
+			my $fh;
+			open($fh,"<", $filename) || $self->throw($!);
+			close $fh;
+
+			$self->{_fileid}{$fileid}   = $fh;
+			$self->{_file}  {$fileid}   = $filename;
+			$self->{_dbfile}{$filename} = $fileid;
+			$self->{_size}  {$fileid}   = $filesize; 
+		}
+		
+		# Look for namespace lines
+		if ( /(.*)_namespaces?\t(.+)/ ) {
+	      if ($1 eq "primary") {
+		       $self->primary_namespace($2);
+	      } elsif ($1 eq "secondary") {
+		       $self->secondary_namespaces(split "\t",$2);
+	      } else {
+		       $self->throw("Unknown namespace name in config file [$1");
+	      }
+	   }
+	
+	   # Look for format lines
+	   if ($_ =~ /format\t(\S+)/) {
+	      # Check the format here?
+	      my $format = $1;
+
+	      # handle LSID format
+	      if ($format =~ /^URN:LSID:open-bio\.org:(\w+)(?:\/(\w+))?/) {
+	         $self->format($1);
+	         $self->alphabet($2);
+	      } else {  # compatibility with older versions
+	         $self->format($1);
+	      }
+	    }
+    }
+    
+    close($CON);
+
+    # Now check we have all that we need
+
+    my @fileid_keys = keys (%{$self->{_fileid}});
+
+    if (!(@fileid_keys)) {
+	     $self->throw("No flatfile fileid files in config - check the index has been made correctly");
+    }
+
+    if (!defined($self->primary_namespace)) {
+	    $self->throw("No primary namespace exists");
+    }
+
+    if (! -e $self->primary_index_file) {
+	    $self->throw("Primary index file [" . $self->primary_index_file . "] doesn't exist");
+    }
+
+    1;
+}
+
+=head2 get_fileid_by_filename
+
+ Title   : get_fileid_by_filename
+ Usage   : $obj->get_fileid_by_filename($newval)
+ Function: 
+ Example : 
+ Returns : value of get_fileid_by_filename
+ Args    : newvalue (optional)
+
+=cut
+
+sub get_fileid_by_filename {
+    my ($self,$file) = @_;
+    
+    if (!defined($self->{_dbfile})) {
+	$self->throw("No file to fileid mapping present.  Has the fileid file been read?");
+    }
+
+    
+    return $self->{_dbfile}{$file};
+}
+
+=head2 get_filehandle_by_fileid
+
+ Title   : get_filehandle_by_fileid
+ Usage   : $obj->get_filehandle_by_fileid($newval)
+ Function: 
+ Example : 
+ Returns : value of get_filehandle_by_fileid
+ Args    : newvalue (optional)
+
+=cut
+
+sub get_filehandle_by_fileid {
+    my ($self,$fileid) = @_;
+
+    if (!defined($self->{_fileid}{$fileid})) {
+	$self->throw("ERROR: undefined fileid in index [$fileid]");
+    }
+   
+    return $self->{_fileid}{$fileid};
+}
+
+=head2 primary_index_file
+
+ Title   : primary_index_file
+ Usage   : $obj->primary_index_file($newval)
+ Function: 
+ Example : 
+ Returns : value of primary_index_file
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub primary_index_file {
+    my ($self) = @_;
+
+    return Bio::Root::IO->catfile($self->_config_path,"key_" . $self->primary_namespace . ".key");
+}
+
+=head2 primary_index_filehandle
+
+ Title   : primary_index_filehandle
+ Usage   : $obj->primary_index_filehandle($newval)
+ Function: 
+ Example : 
+ Returns : value of primary_index_filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub primary_index_filehandle {
+    my ($self) = @_;
+
+    unless (defined ($self->{'_primary_index_handle'})) {
+	open($self->{'_primary_index_handle'}, "<" . $self->primary_index_file) || self->throw($@);
+    }
+    return $self->{'_primary_index_handle'};
+}
+
+=head2 format
+
+ Title   : format
+ Usage   : $obj->format($newval)
+ Function: 
+ Example : 
+ Returns : value of format
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub format{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'format'} = $value;
+    }
+    return $obj->{'format'};
+
+}
+
+sub alphabet{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{alphabet} = $value;
+    }
+    return $obj->{alphabet};
+}
+
+=head2 write_flag
+
+ Title   : write_flag
+ Usage   : $obj->write_flag($newval)
+ Function: 
+ Example : 
+ Returns : value of write_flag
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub write_flag{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'write_flag'} = $value;
+    }
+    return $obj->{'write_flag'};
+
+}
+
+=head2 dbname
+
+ Title   : dbname
+ Usage   : $obj->dbname($newval)
+ Function: get/set database name
+ Example : 
+ Returns : value of dbname
+ Args    : newvalue (optional)
+
+=cut
+
+sub dbname {
+  my $self = shift;
+  my $d = $self->{flat_dbname};
+  $self->{flat_dbname} = shift if @_;
+  $d;
+}
+
+=head2 index_directory
+
+ Title   : index_directory
+ Usage   : $obj->index_directory($newval)
+ Function: 
+ Example : 
+ Returns : value of index_directory
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub index_directory {
+    my ($self,$arg) = @_;
+
+    if (defined($arg)) {
+	if ($arg !~ m{/$}) {
+	    $arg .= "/";
+	}
+	$self->{_index_directory} = $arg;
+    }
+    return $self->{_index_directory};
+
+}
+
+sub _config_path {
+  my $self = shift;
+  my $root = $self->index_directory;
+  my $dbname = $self->dbname;
+  Bio::Root::IO->catfile($root,$dbname);
+}
+
+sub _config_file {
+  my $self = shift;
+  my $path = $self->_config_path;
+  Bio::Root::IO->catfile($path,CONFIG_FILE_NAME);
+}
+
+=head2 record_size
+
+ Title   : record_size
+ Usage   : $obj->record_size($newval)
+ Function: 
+ Example : 
+ Returns : value of record_size
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub record_size {
+    my $self = shift;
+    $self->{_record_size} = shift if @_;
+    return $self->{_record_size};
+}
+
+=head2 primary_namespace
+
+ Title   : primary_namespace
+ Usage   : $obj->primary_namespace($newval)
+ Function: 
+ Example : 
+ Returns : value of primary_namespace
+ Args    : newvalue (optional)
+
+=cut
+
+sub primary_namespace {
+    my $self = shift;
+    $self->{_primary_namespace} =  shift if @_;
+    return $self->{_primary_namespace};
+}
+
+=head2 index_type
+
+ Title   : index_type
+ Usage   : $obj->index_type($newval)
+ Function: 
+ Example : 
+ Returns : value of index_type
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub index_type {
+    my $self = shift;
+    $self->{_index_type} = shift if @_;
+    return $self->{_index_type};
+}
+
+=head2 index_version
+
+ Title   : index_version
+ Usage   : $obj->index_version($newval)
+ Function: 
+ Example : 
+ Returns : value of index_version
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub index_version {
+    my $self = shift;
+    $self->{_index_version} = shift if @_;
+    return $self->{_index_version};
+}
+
+=head2 primary_pattern
+
+ Title   : primary_pattern
+ Usage   : $obj->primary_pattern($newval)
+ Function: 
+ Example : 
+ Returns : value of primary_pattern
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub primary_pattern{
+    my $obj = shift;
+    $obj->{'primary_pattern'} = shift if @_;
+    return $obj->{'primary_pattern'};
+}
+
+=head2 start_pattern
+
+ Title   : start_pattern
+ Usage   : $obj->start_pattern($newval)
+ Function: 
+ Example : 
+ Returns : value of start_pattern
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub start_pattern{
+    my $obj = shift;
+    $obj->{'start_pattern'} = shift if @_;
+    return $obj->{'start_pattern'};
+}
+
+=head2 secondary_patterns
+
+ Title   : secondary_patterns
+ Usage   : $obj->secondary_patterns($newval)
+ Function: 
+ Example : 
+ Returns : value of secondary_patterns
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub secondary_patterns{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'secondary_patterns'} = $value;
+
+      my @names = keys %$value;
+
+      foreach my $name (@names) {
+	  $obj->secondary_namespaces($name);
+      }
+    }
+    return $obj->{'secondary_patterns'};
+
+}
+
+=head2 secondary_namespaces
+
+ Title   : secondary_namespaces
+ Usage   : $obj->secondary_namespaces($newval)
+ Function: 
+ Example : 
+ Returns : value of secondary_namespaces
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub secondary_namespaces {
+    my ($obj, at values) = @_;
+
+    if (@values) {
+	push(@{$obj->{'secondary_namespaces'}}, at values);
+    }
+    return @{$obj->{'secondary_namespaces'} || []};
+}
+
+
+
+## These are indexing routines to index commonly used format - fasta
+## swissprot and embl
+
+sub new_SWISSPROT_index {
+    my ($self,$index_dir, at files) = @_;
+
+    my %secondary_patterns;
+
+    my $start_pattern = "^ID   (\\S+)";
+    my $primary_pattern = "^AC   (\\S+)\\;";
+
+    $secondary_patterns{"ID"} = $start_pattern;
+
+    my $index =  new Bio::DB::Flat::BinarySearch
+	(-index_dir          => $index_dir,
+	 -format             => 'swissprot',
+	 -primary_pattern    => $primary_pattern,
+	 -primary_namespace  => "ACC",
+	 -start_pattern      => $start_pattern,
+	 -secondary_patterns => \%secondary_patterns);
+
+    $index->build_index(@files);
+}
+
+sub new_EMBL_index {
+   my ($self,$index_dir, at files) = @_;
+
+   my %secondary_patterns;
+
+   my $start_pattern = "^ID   (\\S+)";
+   my $primary_pattern = "^AC   (\\S+)\\;";
+   my $primary_namespace = "ACC";
+
+   $secondary_patterns{"ID"} = $start_pattern;
+
+   my $index = new Bio::DB::Flat::BinarySearch
+       (-index_dir          => $index_dir,
+	-format             => 'embl',
+	-primary_pattern    => $primary_pattern,
+	-primary_namespace  => "ACC",
+	-start_pattern      => $start_pattern,
+	-secondary_patterns => \%secondary_patterns);
+
+    $index->build_index(@files);
+
+   return $index;
+}
+
+sub new_FASTA_index {
+   my ($self,$index_dir, at files) =  @_;
+
+   my %secondary_patterns;
+
+   my $start_pattern = "^>";
+   my $primary_pattern = "^>(\\S+)";
+   my $primary_namespace = "ACC"; 
+
+   $secondary_patterns{"ID"} = "^>\\S+ +(\\S+)";
+
+   my $index =  new Bio::DB::Flat::BinarySearch
+       (-index_dir          => $index_dir,
+	-format             => 'fasta',
+	-primary_pattern    => $primary_pattern,
+	-primary_namespace  => "ACC",
+	-start_pattern      => $start_pattern,
+	-secondary_patterns => \%secondary_patterns);
+
+   $index->build_index(@files);
+
+   return $index;
+}
+
+# EVERYTHING THAT FOLLOWS THIS
+# is an awful hack - in reality Michele's code needs to be rewritten
+# to use Bio::SeqIO, but I have too little time to do this -- LS
+sub guess_alphabet {
+  my $self = shift;
+  my $line = shift;
+
+  my $format = $self->format;
+  return 'protein' if $format eq 'swissprot';
+
+  if ($format eq 'genbank') {
+    return unless $line =~ /^LOCUS/;
+    return 'dna' if $line =~ /\s+\d+\s+bp/i;
+    return 'protein';
+  }
+
+  if ($format eq 'embl') {
+    return unless $line =~ /^ID/;
+    return 'dna' if $line =~ / DNA;/i;
+    return 'rna' if $line =~ / RNA;/i;
+    return 'protein';
+  }
+
+  return;
+}
+
+# return (namespace,primary_pattern,start_pattern,secondary_pattern)
+sub _guess_patterns {
+  my $self = shift;
+  my $format = shift;
+  if ($format =~ /swiss(prot)?/i) {
+    return ('ID',
+	    "^ID   (\\S+)",
+	    "^ID   (\\S+)",
+	    {
+	     ACC  => "^AC   (\\S+);"
+	    });
+  }
+
+  if ($format =~ /embl/i) {
+    return ('ID',
+	    "^ID   (\\S+)",
+	    "^ID   (\\S+)",
+	    {
+	     ACC     => q/^AC   (\S+);/,
+	     VERSION => q/^SV\s+(\S+)/
+	    });
+  }
+
+  if ($format =~ /genbank/i) {
+    return ('ID',
+	    q/^LOCUS\s+(\S+)/,
+	    q/^LOCUS/,
+	    {
+	     ACC     => q/^ACCESSION\s+(\S+)/,
+	     VERSION => q/^VERSION\s+(\S+)/
+	    });
+  }
+
+  if ($format =~ /fasta/i) {
+    return ('ACC',
+	    '^>(\S+)',
+	    '^>(\S+)',
+	   );
+  }
+
+  $self->throw("I can't handle format $format");
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Flat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,615 @@
+#
+# $Id: Flat.pm,v 1.24.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::Flat
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Flat - Interface for indexed flat files
+
+=head1 SYNOPSIS
+
+  $db = Bio::DB::Flat->new(-directory  => '/usr/share/embl',
+			   -dbname     => 'mydb',
+                           -format     => 'embl',
+                           -index      => 'bdb',
+                           -write_flag => 1);
+  $db->build_index('/usr/share/embl/primate.embl',
+                   '/usr/share/embl/protists.embl');
+  $seq       = $db->get_Seq_by_id('BUM');
+  @sequences = $db->get_Seq_by_acc('DIV' => 'primate');
+  $raw       = $db->fetch_raw('BUM');
+
+=head1 DESCRIPTION
+
+This object provides the basic mechanism to associate positions in
+files with primary and secondary name spaces. Unlike
+Bio::Index::Abstract (see L<Bio::Index::Abstract>), this is specialized
+to work with the "flat index" and BerkeleyDB indexed flat file formats
+worked out at the 2002 BioHackathon.
+
+This object is a general front end to the underlying databases.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal
+methods are usually preceded with an "_" (underscore).
+
+=cut
+
+
+# Let the code begin...
+package Bio::DB::Flat;
+
+use Bio::Root::IO;
+
+use base qw(Bio::Root::Root Bio::DB::RandomAccessI);
+
+use constant CONFIG_FILE_NAME => 'config.dat';
+
+=head2 new
+
+ Title   : new
+ Usage   : my $db = Bio::DB::Flat->new(
+                     -directory  => $root_directory,
+		     -dbname     => 'mydb',
+		     -write_flag => 1,
+                     -index      => 'bdb',
+                     -verbose    => 0,
+		     -out        => 'outputfile',
+                     -format     => 'genbank');
+ Function: create a new Bio::DB::Flat object
+ Returns : new Bio::DB::Flat object
+ Args    : -directory    Root directory containing "config.dat"
+           -write_flag   If true, allows creation/updating.
+           -verbose      Verbose messages
+           -out          File to write to when write_seq invoked
+           -index        'bdb' or 'binarysearch'
+ Status  : Public
+
+The required -directory argument indicates where the flat file indexes
+will be stored.  The build_index() and write_seq() methods will
+automatically create subdirectories of this root directory.  Each
+subdirectory will contain a human-readable configuration file named
+"config.dat" that specifies where the individual indexes are stored.
+
+The required -dbname argument gives a name to the database index.  The
+index files will actually be stored in a like-named subdirectory
+underneath the root directory.
+
+The -write_flag enables writing new entries into the database as well
+as the creation of the indexes.  By default the indexes will be opened
+read only.
+
+-index is one of "bdb" or "binarysearch" and indicates the type of
+index to generate.  "bdb" corresponds to Berkeley DB.  You *must* be
+using BerkeleyDB version 2 or higher, and have the Perl BerkeleyDB
+extension installed (DB_File will *not* work). "binarysearch"
+corresponds to the OBDA "flat" indexed file.
+
+The -out argument specifies the output file for writing objects created
+with write_seq().
+
+The -format argument specifies the format of the input file or files. If
+the file suffix is one that Bioperl can already associate with a format
+then this is optional.
+
+=cut
+
+sub new {
+  my $class = shift;
+  $class  = ref($class) if ref($class);
+  my $self = $class->SUPER::new(@_);
+
+  # first we initialize ourselves
+  my ($flat_directory,$dbname,$format) = 
+    $self->_rearrange([qw(DIRECTORY DBNAME FORMAT)], at _);
+
+  defined $flat_directory
+    or $self->throw('Please supply a -directory argument');
+  defined $dbname
+    or $self->throw('Please supply a -dbname argument');
+
+  # set values from configuration file
+  $self->directory($flat_directory);
+  $self->dbname($dbname);
+
+  $self->throw("Base directory $flat_directory doesn't exist")
+    unless -e $flat_directory;
+  $self->throw("$flat_directory isn't a directory")
+    unless -d _;
+  my $dbpath = Bio::Root::IO->catfile($flat_directory,$dbname);
+  unless (-d $dbpath) {
+    $self->debug("creating db directory $dbpath\n");
+    mkdir $dbpath,0777 or $self->throw("Can't create $dbpath: $!");
+  }
+  $self->_read_config();
+
+  # but override with initialization values
+  $self->_initialize(@_);
+
+  $self->throw('you must specify an indexing scheme') 
+    unless $self->indexing_scheme;
+
+  # now we figure out what subclass to instantiate
+  my $index_type = $self->indexing_scheme eq 'BerkeleyDB/1' ? 'BDB'
+                  :$self->indexing_scheme eq 'flat/1'       ? 'Binary'
+                  :$self->throw("unknown indexing scheme: " .
+				$self->indexing_scheme);
+  $format = $self->file_format;
+
+  # because Michele and Lincoln did it differently
+  # Michele's way is via a standalone concrete class
+  if ($index_type eq 'Binary') {
+    my $child_class = 'Bio::DB::Flat::BinarySearch';
+    eval "use $child_class";
+    $self->throw($@) if $@;
+    push @_, ('-format', $format);
+    return $child_class->new(@_);
+  }
+
+  # Lincoln uses Bio::SeqIO style delegation.
+  my $child_class= "Bio\:\:DB\:\:Flat\:\:$index_type\:\:\L$format";
+  eval "use $child_class";
+  $self->throw($@) if $@;
+
+  # rebless & reinitialize with the new class
+  # (this prevents subclasses from forgetting to call our own initialization)
+  bless $self,$child_class;
+  $self->_initialize(@_);
+  $self->_set_namespaces(@_);
+
+  $self;
+}
+
+sub _initialize {
+  my $self = shift;
+
+  my ($flat_write_flag,$dbname,$flat_indexing,$flat_verbose,$flat_outfile,$flat_format)
+    = $self->_rearrange([qw(WRITE_FLAG DBNAME INDEX VERBOSE OUT FORMAT)], at _);
+
+  $self->write_flag($flat_write_flag) if defined $flat_write_flag;
+
+  if (defined $flat_indexing) {
+    # very permissive
+    $flat_indexing = 'BerkeleyDB/1' if $flat_indexing =~ /bdb/;
+    $flat_indexing = 'flat/1'       if $flat_indexing =~ /^(flat|binary)/;
+    $self->indexing_scheme($flat_indexing);
+  }
+
+  $self->verbose($flat_verbose)    if defined $flat_verbose;
+  $self->dbname($dbname)           if defined $dbname;
+  $self->out_file($flat_outfile)   if defined $flat_outfile;
+  $self->file_format($flat_format) if defined $flat_format;
+}
+
+sub _set_namespaces {
+  my $self = shift;
+
+  $self->primary_namespace($self->default_primary_namespace)
+    unless defined $self->{flat_primary_namespace};
+
+  $self->secondary_namespaces($self->default_secondary_namespaces)
+    unless defined $self->{flat_secondary_namespaces};
+
+  $self->file_format($self->default_file_format)
+    unless defined $self->{flat_format};
+}
+
+=head2 new_from_registry
+
+ Title   : new_from_registry
+ Usage   : $db = Bio::DB::Flat->new_from_registry(%config)
+ Function: creates a new Bio::DB::Flat object in a Bio::DB::Registry-
+           compatible fashion
+ Returns : new Bio::DB::Flat
+ Args    : provided by the registry, see below
+ Status  : Public
+
+The following registry-configuration tags are recognized:
+
+  location     Root of the indexed flat file; corresponds to the new() method's
+               -directory argument.
+
+=cut
+
+sub new_from_registry {
+   my ($self,%config) =  @_;
+   my $location = $config{'location'} or 
+     $self->throw('location tag must be specified.');
+   my $dbname   = $config{'dbname'}   or 
+     $self->throw('dbname tag must be specified.');
+
+   my $db = $self->new(-directory => $location,
+			-dbname    => $dbname,
+		      );
+   $db;
+}
+
+# accessors
+sub directory {
+  my $self = shift;
+  my $d = $self->{flat_directory};
+  $self->{flat_directory} = shift if @_;
+  $d;
+}
+sub write_flag {
+  my $self = shift;
+  my $d = $self->{flat_write_flag};
+  $self->{flat_write_flag} = shift if @_;
+  $d;
+}
+sub verbose {
+  my $self = shift;
+  my $d = $self->{flat_verbose};
+  $self->{flat_verbose} = shift if @_;
+  $d;
+}
+sub out_file {
+  my $self = shift;
+  my $d = $self->{flat_outfile};
+  $self->{flat_outfile} = shift if @_;
+  $d;
+}
+sub dbname {
+  my $self = shift;
+  my $d = $self->{flat_dbname};
+  $self->{flat_dbname} = shift if @_;
+  $d;
+}
+sub primary_namespace {
+  my $self = shift;
+  my $d    = $self->{flat_primary_namespace};
+  $self->{flat_primary_namespace} = shift if @_;
+  $d;
+}
+
+# get/set secondary namespace(s)
+# pass an array ref.
+# get an array ref in scalar context, list in list context.
+sub secondary_namespaces {
+  my $self = shift;
+  my $d    = $self->{flat_secondary_namespaces};
+  $self->{flat_secondary_namespaces} = (ref($_[0]) eq 'ARRAY' ? shift : [@_]) if @_;
+  return unless $d;
+  $d = [$d] if $d && ref($d) ne 'ARRAY';  # just paranoia
+  return wantarray ? @$d : $d;
+}
+
+# return the file format
+sub file_format {
+  my $self = shift;
+  my $d    = $self->{flat_format};
+  $self->{flat_format} = shift if @_;
+  $d;
+}
+
+# return the alphabet
+sub alphabet {
+  my $self = shift;
+  my $d    = $self->{flat_alphabet};
+  $self->{flat_alphabet} = shift if @_;
+  $d;
+}
+
+sub parse_one_record {
+  my $self  = shift;
+  my $fh    = shift;
+  my $parser =
+    $self->{cached_parsers}{fileno($fh)}
+      ||= Bio::SeqIO->new(-fh=>$fh,-format=>$self->default_file_format);
+  my $seq = $parser->next_seq or return;
+  $self->{flat_alphabet} ||= $seq->alphabet;
+  my $ids = $self->seq_to_ids($seq);
+  return $ids;
+}
+
+
+# return the indexing scheme
+sub indexing_scheme {
+  my $self = shift;
+  my $d    = $self->{flat_indexing};
+  $self->{flat_indexing} = shift if @_;
+  $d;
+}
+
+sub add_flat_file {
+  my $self = shift;
+  my ($file_path,$file_length,$nf) = @_;
+
+  # check that file_path is absolute
+  unless (File::Spec->file_name_is_absolute($file_path)) {
+    $file_path = File::Spec->rel2abs($file_path);
+  }
+
+  -r $file_path or $self->throw("flat file $file_path cannot be read: $!");
+
+  my $current_size = -s _;
+  if (defined $file_length) {
+    $current_size == $file_length
+      or $self->throw("flat file $file_path has changed size.  Was $file_length bytes; now $current_size");
+  } else {
+    $file_length = $current_size;
+  }
+
+  unless (defined $nf) {
+    $self->{flat_file_index} = 0 unless exists $self->{flat_file_index};
+    $nf = $self->{flat_file_index}++;
+  }
+  $self->{flat_flat_file_path}{$nf}      = $file_path;
+  $self->{flat_flat_file_no}{$file_path} = $nf;
+  $nf;
+}
+
+sub write_config {
+  my $self = shift;
+  $self->write_flag or $self->throw("cannot write configuration file because write_flag is not set");
+  my $path = $self->_config_path;
+
+  open (my $F,">$path") or $self->throw("open error on $path: $!");
+
+  my $index_type = $self->indexing_scheme;
+  print $F "index\t$index_type\n";
+
+  my $format     = $self->file_format;
+  my $alphabet   = $self->alphabet;
+  my $alpha      = $alphabet ? "/$alphabet" : '';
+  print $F "format\tURN:LSID:open-bio.org:${format}${alpha}\n";
+
+  my @filenos = $self->_filenos or $self->throw("cannot write config file because no flat files defined");
+  for my $nf (@filenos) {
+    my $path = $self->{flat_flat_file_path}{$nf};
+    my $size = -s $path;
+    print $F join("\t","fileid_$nf",$path,$size),"\n";
+  }
+
+  # write primary namespace
+  my $primary_ns = $self->primary_namespace
+    or $self->throw('cannot write config file because no primary namespace defined');
+
+  print $F join("\t",'primary_namespace',$primary_ns),"\n";
+
+  # write secondary namespaces
+  my @secondary = $self->secondary_namespaces;
+  print $F join("\t",'secondary_namespaces', at secondary),"\n";
+
+  close $F or $self->throw("close error on $path: $!");
+}
+
+sub files {
+  my $self = shift;
+  return unless $self->{flat_flat_file_no};
+  return keys %{$self->{flat_flat_file_no}};
+}
+
+sub write_seq {
+  my $self  = shift;
+  my $seq   = shift;
+
+  $self->write_flag or $self->throw("cannot write sequences because write_flag is not set");
+
+  my $file  = $self->out_file or $self->throw('no outfile defined; use the -out argument to new()');
+  my $seqio = $self->{flat_cached_parsers}{$file}
+    ||= Bio::SeqIO->new(-Format => $self->file_format,
+			-file   => ">$file")
+      or $self->throw("couldn't create Bio::SeqIO object");
+
+  my $fh = $seqio->_fh or $self->throw("couldn't get filehandle from Bio::SeqIO object");
+  my $offset    = tell($fh);
+  $seqio->write_seq($seq);
+  my $length    = tell($fh)-$offset;
+  my $ids       = $self->seq_to_ids($seq);
+  $self->_store_index($ids,$file,$offset,$length);
+
+  $self->{flat_outfile_dirty}++;
+}
+
+sub close {
+  my $self = shift;
+  return unless $self->{flat_outfile_dirty};
+  $self->write_config;
+  delete $self->{flat_outfile_dirty};
+  delete $self->{flat_cached_parsers}{$self->out_file};
+}
+
+
+sub _filenos {
+  my $self = shift;
+  return unless $self->{flat_flat_file_path};
+  return keys %{$self->{flat_flat_file_path}};
+}
+
+# read the configuration file
+sub _read_config {
+  my $self   = shift;
+  my $path = $self->_config_path;
+  return unless -e $path;
+
+  open (my $F,$path) or $self->throw("open error on $path: $!");
+  my %config;
+  while (<$F>) {
+    chomp;
+    my ($tag, at values) = split "\t";
+    $config{$tag} = \@values;
+  }
+  CORE::close $F or $self->throw("close error on $path: $!");
+
+  $config{index}[0] =~ m~(flat/1|BerkeleyDB/1)~
+    or $self->throw("invalid configuration file $path: no index line");
+
+  $self->indexing_scheme($1);
+
+  if ($config{format}) {
+    # handle LSID format
+    if ($config{format}[0] =~ /^URN:LSID:open-bio\.org:(\w+)(?:\/(\w+))/) {
+      $self->file_format($1);
+      $self->alphabet($2);
+    } else {  # compatibility with older versions
+      $self->file_format($config{format}[0]);
+    }
+  }
+
+  # set up primary namespace
+  my $primary_namespace = $config{primary_namespace}[0]
+    or $self->throw("invalid configuration file $path: no primary namespace defined");
+  $self->primary_namespace($primary_namespace);
+
+  # set up secondary namespaces (may be empty)
+  $self->secondary_namespaces($config{secondary_namespaces});
+
+  # get file paths and their normalization information
+  my @normalized_files = grep {$_ ne ''} map {/^fileid_(\S+)/ && $1} keys %config;
+  for my $nf (@normalized_files) {
+    my ($file_path,$file_length) = @{$config{"fileid_${nf}"}};
+    $self->add_flat_file($file_path,$file_length,$nf);
+  }
+  1;
+}
+
+
+sub _config_path {
+  my $self = shift;
+  $self->_catfile($self->_config_name);
+}
+
+sub _catfile {
+  my $self = shift;
+  my $component = shift;
+  Bio::Root::IO->catfile($self->directory,$self->dbname,$component);
+}
+
+sub _config_name { CONFIG_FILE_NAME }
+
+sub _path2fileno {
+  my $self = shift;
+  my $path = shift;
+  return $self->add_flat_file($path)
+    unless exists $self->{flat_flat_file_no}{$path};
+  $self->{flat_flat_file_no}{$path};
+}
+
+sub _fileno2path {
+  my $self = shift;
+  my $fileno = shift;
+  $self->{flat_flat_file_path}{$fileno};
+}
+
+sub _files {
+  my $self = shift;
+  my $paths = $self->{flat_flat_file_no};
+  return keys %$paths;
+}
+
+=head2 fetch
+
+  Title   : fetch
+  Usage   : $index->fetch( $id )
+  Function: Returns a Bio::Seq object from the index
+  Example : $seq = $index->fetch( 'dJ67B12' )
+  Returns : Bio::Seq object
+  Args    : ID
+
+Deprecated.  Use get_Seq_by_id instead.
+
+=cut
+
+sub fetch { shift->get_Seq_by_id(@_) }
+
+
+=head2 To Be Implemented in Subclasses
+
+The following methods MUST be implemented by subclasses.
+
+=cut
+
+# create real live Bio::Seq object
+sub get_Seq_by_id {
+  my $self = shift;
+  my $id   = shift;
+  $self->throw_not_implemented;
+}
+
+
+# fetch array of Bio::Seq objects
+sub get_Seq_by_acc {
+  my $self = shift;
+  return $self->get_Seq_by_id(shift) if @_ == 1;
+  my ($ns,$key) = @_;
+
+  $self->throw_not_implemented;
+}
+
+sub fetch_raw {
+  my ($self,$id,$namespace) = @_;
+  $self->throw_not_implemented;
+}
+
+sub default_file_format {
+  my $self = shift;
+  $self->throw_not_implemented;
+}
+
+sub _store_index {
+   my $self = shift;
+   my ($ids,$file,$offset,$length) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 May Be Overridden in Subclasses
+
+The following methods MAY be overridden by subclasses.
+
+=cut
+
+sub default_primary_namespace {
+  return "ACC";
+}
+
+sub default_secondary_namespaces {
+  return;
+}
+
+sub seq_to_ids {
+  my $self = shift;
+  my $seq  = shift;
+  my %ids;
+  $ids{$self->primary_namespace} = $seq->accession_number;
+  \%ids;
+}
+
+sub DESTROY {
+  my $self = shift;
+  $self->close;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GDB.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GDB.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GDB.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,314 @@
+# $Id: GDB.pm,v 1.20.4.2 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::GDB
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+
+=head1 NAME
+
+Bio::DB::GDB - Database object interface to GDB HTTP query
+
+=head1 SYNOPSIS
+
+    use Bio::DB::GDB;
+
+    $gdb = new Bio::DB::GDB;
+
+    $info = $gdb->get_info(-type => 'marker',
+			                  -id => 'D1S243'); # Marker name
+
+   print "genbank id is ", $info->{'gdbid'},
+    "\nprimers are (fwd, rev) ", join(",", @{$info->{'primers'}}), 
+    "\nproduct length is ", $info->{'length'}, "\n";
+
+=head1 DESCRIPTION
+
+This class allows connections to the Genome Database (GDB) and queries
+to retrieve any database objects. See http://www.gdb.org/ or any
+mirror for details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::GDB;
+use strict;
+use LWP::UserAgent;
+use HTTP::Request::Common;
+use HTML::Parser;
+
+use vars qw($BASEADDRESS %PARAMSTRING $MODVERSION);
+
+use base qw(Bio::Root::Root);
+
+$MODVERSION = '0.01';
+$BASEADDRESS = 'http://www.gdb.org/gdb-bin/genera/genera/hgd/GenomicSegment';
+%PARAMSTRING = ( 
+		 gene   => { '!action' => 'query' }, 
+		 marker => { '!action' => 'query' },
+		 );
+
+# the new way to make modules a little more lightweight
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my $ua = new LWP::UserAgent(env_proxy => 1);
+    $ua->agent(ref($self) ."/$MODVERSION");
+    $self->ua($ua);    
+
+    return $self;
+}
+
+=head2 ua
+
+ Title   : ua
+ Usage   : my $ua = $self->ua or 
+           $self->ua($ua)
+ Function: Get/Set a LWP::UserAgent for use
+ Returns : reference to LWP::UserAgent Object
+ Args    : $ua - must be a LWP::UserAgent
+
+=cut
+
+sub ua {
+    my ($self, $ua) = @_;
+    if( defined $ua && $ua->isa("LWP::UserAgent") ) {
+	$self->{_ua} = $ua;
+    }
+    return $self->{_ua};
+}
+
+# helper method to get specific options
+
+=head2 get_params
+
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to query
+            for mode ('marker', 'gene')
+ Returns : a key,value pair hash
+ Args    : 'marker' or 'gene' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    return %{$PARAMSTRING{$mode}};
+}
+
+=head2 get_info
+
+ Title   : get_info
+ Usage   : my $info = $self->get_info(-type => 'marker',
+				      -id   => 'D1S234'); 
+ Function: Returns key,value pairs specific
+ Returns : a key,value pair hash
+ Args    : -type => 'marker' or 'gene' mode for retrieval
+           -id   => unique id to query for
+
+=cut
+
+sub get_info {
+    my ($self, @args) = @_;
+    my ( $type, $id) = $self->_rearrange([qw(TYPE ID)], @args);
+    if( !defined $type ) {
+	$self->throw("Must specify a type you are querying for");
+    } elsif( !defined $id ) {
+	$self->throw("Must specify a id to query for");
+    }
+    my %params = $self->get_params($type);
+
+    $params{'displayName'} = $id;
+
+    if( $type eq 'marker' ) {
+	# do more specific stuff?
+    } elsif( $type eq 'gene' ) {
+	# do more specific stuff?
+    }
+    my $url = $self->get_request(%params);    
+    
+    my ($resp) = $self->_request($url);
+    if( ! defined $resp || ! ref($resp) ) {
+	$self->warn("Did not get any data for url ". $url->uri);
+	return;
+    }
+    my $content = $resp->content;	
+    if( $content =~ /ERROR/ || length($resp->content) == 0 ) {
+	$self->warn("Error getting for url " . $url->uri . "!\n");
+	return;
+    }
+    my (@primers, $length, $markerurl, $realname);
+    my $state = 0;
+    my $title = 0;
+    my $p;
+    $p = new HTML::Parser( api_version => 3,
+			   start_h => [ sub { 
+			       return if( $title == 2 || $state == 3);
+			       my($tag,$attr,$text) = @_;
+			       return if( !defined $tag);
+			       if( $tag eq 'table' ) {
+				   $state = 1;
+			       } elsif( $tag eq 'title' ) {
+				   $title = 1;
+			       } elsif( $state == 2 && 
+					$tag eq 'a' &&
+					$attr->{'href'} ) {
+				   $state = 3; 
+				   if( $text =~ m(href="?(http://.+)"?\s*>) ) { 
+				       $markerurl = $1;
+				   }
+			       } 
+			   }, "tagname, attr, text" ],
+			   end_h   => [ sub { 
+			       return if ($title == 2 || $state == 3);
+			       my ( $tag ) = @_;
+			       $title = 0 if( $tag eq 'title' );
+			   }, "tagname" ],
+			   text_h  => [ sub { 
+			       return if( $title == 2 || $state == 3);
+			       my($text) = @_;
+			       if( $title && $text =~ /Amplimer/ ) {
+				   $markerurl = 'this';
+				   $title = 2;
+			       }
+			       $state = 2 if( $state == 1 && $text =~ /Amplimer/);
+			   }, "text" ],
+			   marked_sections =>1);
+    $p->parse($content) or $self->throw("Can't open: $!");        
+    if( ! defined $markerurl ) {
+	@primers = ('notfound','notfound', '?');
+    } elsif( $markerurl eq 'this' ) {
+
+    }
+    else { 
+	my $resp = $self->_request(GET $markerurl);
+        return if ( !defined $resp );
+	$content = $resp->content();
+    }
+    $state = 0;
+    $realname = 'unknown';
+    my $lasttag = '';
+    $p = HTML::Parser->new(api_version => 3,			      
+			   start_h => [ sub { my ($tag) = @_;
+					      $tag = lc $tag;
+					      $lasttag = $tag;
+					      if( $state == 3 && $tag eq 'dd' ) {
+						  $state = 4;
+					      }
+					  } , 'tagname'],			   
+			   text_h  => [ sub { 
+			       my($text) = @_; 
+			       if( $text =~ /Primer Sequence/ ) {
+				   $state =1;
+			       } elsif( $state == 1 ) {
+				   foreach my $l ( split(/\n+/,$text) ) {
+				       $l =~ s/\s+(\S+)/$1/;
+				       my ($name,$primer) = split(/\s+/,$l);
+				       next if( !defined $name);
+				       push @primers, $primer;
+				       $state = 2;
+				   }
+			       } elsif( $state == 2 && 
+					($text =~ /Seq Min Len/i ||
+					 $text =~ /Seq Max Len/i) ) {
+				   $state = 3;
+			       }  elsif ( $state == 4 ) {
+				   my ($len) = ( $text =~ /(\d+\.\d+)/
+);
+				   $length = $len;
+				   $length *= 1000 if( $len < 1 );
+				   $state = 0;
+			       } elsif( $lasttag eq 'dd' && 
+					$text =~ /(GDB:\d+)/i ) {
+				   $realname = $1;
+			       }
+			   }  , "text" ],
+			   marked_sections =>1,
+			   );
+    $p->parse($content) || $self->throw("Can't open: $!");
+
+    return { 'gdbid' => $realname, 'length' => $length, 'primers' => \@primers };
+}
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+    my ($self, %params) = @_;
+    if( ! %params ) {
+	$self->throw("must provide parameters with which to query");
+    }
+    my $url = $BASEADDRESS;    
+    my $querystr = '?' . join("&", map { "$_=$params{$_}" } keys %params);
+    return GET $url . $querystr;
+}
+
+# private methods
+sub _request {
+
+    my ($self, $url,$tmpfile) = @_;
+    my ($resp);
+    if( defined $tmpfile && $tmpfile ne '' ) { 
+	$resp =  $self->ua->request($url, $tmpfile);
+    } else { $resp =  $self->ua->request($url); } 
+
+    if( $resp->is_error  ) {
+	$self->throw($resp->as_string() . "\nError getting for url " .
+		     $url->uri . "!\n");
+	return;
+    }
+    return $resp;
+}
+
+sub _gdb_search_tag_start {
+
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/ace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/ace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/ace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+package Bio::DB::GFF::Adaptor::ace;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::ace -- ace interface (for multiple inheritance)
+
+=head1 SYNOPSIS
+
+Pending
+
+See L<Bio::DB::GFF> and L<Bio::DB::GFF::Adaptor::dbi::mysql>
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use Ace;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+
+sub dna_db      { 
+  my $self = shift;
+  my $d = $self->{dna_db};
+  $self->{dna_db} = shift if @_;
+  $d;
+}
+sub acedb      { 
+  my $self = shift;
+  my $d = $self->{acedb};
+  $self->{acedb} = shift if @_;
+  $d;
+}
+
+=head2 freshen_ace
+
+ Title   : freshen
+ Usage   : $flag = Bio::DB::GFF->freshen_ace;
+ Function: Refresh internal acedb handle
+ Returns : flag if correctly freshened
+ Args    : none
+ Status  : Public
+
+ACeDB has an annoying way of timing out, leaving dangling database
+handles.  This method will invoke the ACeDB reopen() method, which
+causes dangling handles to be refreshed.  It has no effect if you are
+not using ACeDB to create ACeDB objects.
+
+=cut
+
+sub freshen_ace {
+  my $acedb = shift->acedb or return;
+  $acedb->reopen();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,84 @@
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::berkeleydb::iterator - iterator for Bio::DB::GFF::Adaptor::berkeleydb
+
+=head1 SYNOPSIS
+
+For internal use only
+
+=head1 DESCRIPTION
+
+This is an internal module that is used by the Bio::DB::GFF in-memory
+adaptor to return an iterator across a sequence feature query.  The
+object has a single method, next_feature(), that returns the next
+feature from the query.  The method next_seq() is an alias for
+next_feature().
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::berkeleydb::iterator;
+use strict;
+# $Id: iterator.pm,v 1.3 2005/07/30 01:26:56 lstein Exp $
+use DB_File qw(R_FIRST R_NEXT);
+
+# this module needs to be cleaned up and documented
+use Bio::Root::Version;
+*next_seq = \&next_feature;
+
+sub new {
+  my $class = shift;
+  my ($data,$callback,$tmpfile) = @_;
+  return bless {data     => $data,
+		callback => $callback,
+		tmpfile  => $tmpfile,
+                cache    => []},$class;
+}
+
+sub next_feature {
+  my $self = shift;
+  return shift @{$self->{cache}} if @{$self->{cache}};
+
+  my $data     = $self->{data} or return;
+  my $callback = $self->{callback};
+
+  my $features;
+  my $db = tied(%$data);
+  my ($key,$value);
+
+  for (my $status = $db->seq($key,$value,$self->{iter}++ ? R_NEXT : R_FIRST);
+       $status == 0;
+       $status = $db->seq($key,$value,R_NEXT)) {
+    my @feature       = split ($;,$value);
+    $features   = $callback->(@feature);
+    last if $features;
+  }
+
+  unless ($features) {
+    $features = $callback->();
+    undef $self->{data};
+    undef $self->{cache};
+    unlink $self->{tmpfile};
+  }
+
+  $self->{cache} = $features or return;
+  shift @{$self->{cache}};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/berkeleydb.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1127 @@
+package Bio::DB::GFF::Adaptor::berkeleydb;
+
+# $Id: berkeleydb.pm,v 1.24.4.1 2006/10/02 23:10:16 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::berkeleydb -- Bio::DB::GFF database adaptor for in-memory databases
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+  my $db = Bio::DB::GFF->new(-adaptor=> 'berkeleydb',
+                             -create => 1, # on initial build you need this
+			     -dsn    => '/usr/local/share/gff/dmel');
+
+  # initialize an empty database, then load GFF and FASTA files
+  $db->initialize(1);
+  $db->load_gff('/home/drosophila_R3.2.gff');
+  $db->load_fasta('/home/drosophila_R3.2.fa');
+
+  # do queries
+  my $segment  = $db->segment(Chromosome => '1R');
+  my $subseg   = $segment->subseq(5000,6000);
+  my @features = $subseg->features('gene');
+
+See L<Bio::DB::GFF> for other methods.
+
+=head1 DESCRIPTION
+
+This adaptor implements a berkeleydb-indexed version of Bio::DB::GFF.
+It requires the DB_File and Storable modules. It can be used to store
+and retrieve short to medium-length GFF files of several million
+features in length.
+
+=head1 CONSTRUCTOR
+
+Use Bio::DB::GFF-E<gt>new() to construct new instances of this class.
+Three named arguments are recommended:
+
+ Argument    Description
+ --------    -----------
+
+ -adaptor    Set to "berkeleydb" to create an instance of this class.
+
+ -dsn        Path to directory where the database index files will be stored (alias -db)
+
+ -autoindex  Monitor the indicated directory path for FASTA and GFF files, and update the
+               indexes automatically if they change (alias -dir)
+
+ -write      Set to a true value in order to update the database.
+
+ -create     Set to a true value to create the database the first time
+               (implies -write)
+
+ -tmp        Location of temporary directory for storing intermediate files
+               during certain queries.
+
+ -preferred_groups  Specify the grouping tag. See L<Bio::DB::GFF>
+
+The -dsn argument selects the directory in which to store the database
+index files. If the directory does not exist it will be created
+automatically, provided that the current process has sufficient
+privileges. If no -dsn argument is specified, a database named "test"
+will be created in your system's temporary files directory.
+
+The -tmp argument specifies the temporary directory to use for storing
+intermediate search results. If not specified, your system's temporary
+files directory will be used. On Unix systems, the TMPDIR environment
+variable is honored. Note that some queries can require a lot of
+space.
+
+The -autoindex argument, if present, selects a directory to be
+monitored for GFF and FASTA files (which can be compressed with the
+gzip program if desired). Whenever any file in this directory is
+changed, the index files will be updated. Note that the indexing can
+take a long time to run: anywhere from 5 to 10 minutes for a million
+features. An alias for this argument is -dir, which gives this adaptor
+a similar flavor to the "memory" adaptor.
+
+-dsn and -dir can point to the same directory. If -dir is given but
+-dsn is absent the index files will be stored into the directory
+containing the source files.  For autoindexing to work, you must
+specify the same -dir path each time you open the database.
+
+If you do not choose autoindexing, then you will want to load the
+database using the bp_load_gff.pl command-line tool. For example:
+
+ bp_load_gff.pl -a berkeleydb -c -d /usr/local/share/gff/dmel dna1.fa dna2.fa features.gff
+
+=head1 METHODS
+
+See L<Bio::DB::GFF> for inherited methods
+
+=head1 BUGS
+
+The various get_Stream_* methods and the features() method with the
+-iterator argument only return an iterator after the query runs
+completely and the module has been able to generate a temporary
+results file on disk. This means that iteration is not as big a win as
+it is for the relational-database adaptors.
+
+Like the dbi::mysqlopt adaptor, this module uses a binning scheme to
+speed up range-based searches. The binning scheme used here imposes a
+hard-coded 1 gigabase (1000 Mbase) limit on the size of the largest
+chromosome or other reference sequence.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHORS
+
+Vsevolod (Simon) Ilyushchenko E<gt>simonf at cshl.eduE<lt>
+Lincoln Stein E<gt>lstein at cshl.eduE<lt>
+
+Copyright (c) 2005 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+
+use DB_File;
+use File::Path 'mkpath';
+use File::Spec;
+use File::Temp 'tempfile';
+
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::GFF::Util::Binning;
+use Bio::DB::Fasta;
+use Bio::DB::GFF::Adaptor::berkeleydb::iterator;
+use Bio::DB::GFF::Adaptor::memory::feature_serializer; # qw(feature2string string2feature @hash2array_map);
+
+# this is the smallest bin (1 K)
+use constant MIN_BIN    => 1000;
+# this is the largest that any reference sequence can be (1000 megabases)
+use constant MAX_BIN     => 1_000_000_000;
+use constant MAX_SEGMENT => 1_000_000_000;  # the largest a segment can get
+
+#We have to define a limit because Berkeleydb sorts in lexicografic order,
+#so all the numbers have to have the same length.	
+use constant MAX_NUM_LENGTH => length(MAX_BIN);
+
+use base 'Bio::DB::GFF::Adaptor::memory';
+
+sub new {
+  my $class = shift ;
+  my ($dbdir,$preferred_groups,$autoindex,$write,$create,$tmpdir) = rearrange([
+									       [qw(DSN DB)],
+									       'PREFERRED_GROUPS',
+									       [qw(DIR AUTOINDEX)],
+									       [qw(WRITE WRITABLE)],
+									       'CREATE',
+									       'TMP',
+									      ], at _);
+  $tmpdir ||= File::Spec->tmpdir;
+  $dbdir  ||= $autoindex;
+  $dbdir  ||= "$tmpdir/test";
+  $write  ||= $create;
+
+  my $self = bless {},$class;
+  $self->dsn($dbdir);
+  $self->tmpdir($tmpdir);
+  $self->preferred_groups($preferred_groups) if defined $preferred_groups;
+  $self->_autoindex($autoindex)              if $autoindex;
+  $self->_open_databases($write,$create);
+  return $self;
+}
+
+sub _autoindex {
+  my $self    = shift;
+  my $autodir = shift;
+
+  my $dir    = $self->dsn;
+  my %ignore = map {$_=>1} ($self->_index_file,$self->_data_file,
+			    $self->_fasta_file,$self->_temp_file,
+			    $self->_notes_file,
+			    $self->_timestamp_file);
+
+  my $maxtime   = 0;
+  my $maxfatime = 0;
+
+  opendir (my $D,$autodir) or $self->throw("Couldn't open directory $autodir for reading: $!");
+
+  while (defined (my $node = readdir($D))) {
+    next if $node =~ /^\./;
+    my $path      = "$dir/$node";
+    next if $ignore{$path};
+    next unless -f $path;
+    my $mtime = _mtime(\*_);  # not a typo
+    $maxtime   = $mtime if $mtime > $maxtime;
+    $maxfatime = $mtime if $mtime > $maxfatime && $node =~ /\.(?:fa|fasta|dna)(?:\.gz)?$/;
+  }
+
+  close $D;
+
+  my $timestamp_time  = _mtime($self->_timestamp_file) || 0;
+  my $all_files_exist = -e $self->_index_file && -e $self->_data_file && (-e $self->_fasta_file || !$maxfatime);
+
+  # to avoid rebuilding FASTA files if not changed
+  my $spare_fasta     = $maxfatime > 0 && $maxfatime < $timestamp_time && -e $self->_fasta_file;  
+
+  if ($maxtime > $timestamp_time || !$all_files_exist) {
+    print STDERR __PACKAGE__,": Reindexing files in $dir. This may take a while....\n";
+    $self->do_initialize(1,$spare_fasta);
+    $self->load_gff($autodir,1);
+    $self->load_fasta($autodir,1) unless $spare_fasta;
+    print STDERR __PACKAGE__,": Reindexing done\n";
+  }
+
+  else {
+    $self->_open_databases();
+  }
+
+}
+
+sub _open_databases {
+  my $self   = shift;
+  my ($write,$create) = @_;
+
+  my $dsn  = $self->dsn;
+  unless (-d $dsn) {  # directory does not exist
+    $create or $self->throw("Directory $dsn does not exist and you did not specify the -create flag");
+    mkpath($dsn) or $self->throw("Couldn't create database directory $dsn: $!");
+  }
+
+  my %db;
+  local $DB_BTREE->{flags} = R_DUP;
+  $DB_BTREE->{compare}     = sub { lc($_[0]) cmp lc($_[1]) };
+  my $flags = O_RDONLY;
+  $flags   |= O_RDWR  if $write;
+  $flags   |= O_CREAT if $create;
+
+  tie(%db,'DB_File',$self->_index_file,$flags,0666,$DB_BTREE)
+    or $self->throw("Couldn't tie ".$self->_index_file.": $!");
+  $self->{db}   = \%db;
+  $self->{data} = FeatureStore->new($self->_data_file,$write,$create);
+
+  if (-e $self->_fasta_file) {
+    my $dna_db = Bio::DB::Fasta->new($self->_fasta_file) or $self->throw("Can't reindex sequence file: $@");
+    $self->dna_db($dna_db);
+  }
+
+  my $mode =  $write  ? "+>>"
+            : $create ? "+>"
+            : "<";
+
+  open (my $F,$mode,$self->_notes_file) or $self->throw($self->_notes_file.": $!");
+  $self->{notes} = $F;
+}
+
+sub _close_databases {
+  my $self = shift;
+  delete $self->{db};
+  delete $self->{data};
+  delete $self->{notes};
+}
+
+sub _delete_features {
+  my $self        = shift;
+  my @feature_ids = @_;
+  my $removed = 0;
+  my $last_id = $self->{data}->last_id;
+  for my $id (@feature_ids) {
+    next unless $id >= 0 && $id < $last_id;
+    my $feat  = $self->{data}->get($id) or next;
+    $self->{data}->remove($id);
+    $self->_bump_class_count($feat->{gclass},-1);
+    my @keys = $self->_secondary_keys($feat);
+    $self->db->del_dup($_,$id) foreach @keys;
+    $removed++;
+  }
+  $removed;
+}
+
+sub _secondary_keys {
+  my $self = shift;
+  my $feat = shift;
+  return (
+		"__name__".lc(join ":",$feat->{gclass},$feat->{gname}),
+		"__bin__".lc("$feat->{ref}$;$feat->{bin}"),
+		"__type__".join(':',$feat->{method},$feat->{source}),
+		map {"__attr__".lc(join(':',$_->[0],$_->[1]))} @{$feat->{attributes}}
+	  );
+}
+
+sub _delete {
+  my $self        = shift;
+  my $delete_spec = shift;
+  return $self->SUPER::_delete($delete_spec) if @{$delete_spec->{segments}} or @{$delete_spec->{types}};
+  $self->throw("This operation would delete all feature data and -force not specified")
+    unless $delete_spec->{force};
+  my $deleted = $self->{db}{__count__};
+  $self->{data} = FeatureStore->new($self->_data_file,1,1);
+  %{$self->{db}}   = ();
+  $deleted;
+}
+
+# with duplicates enabled, we cannot simply do $db->{__index__}++;
+sub _bump_feature_count {
+  my $self = shift;
+  my $db = $self->{db};
+  if (@_) {
+    delete $db->{__count__};
+    return $db->{__count__} = shift;
+  } else {
+    my $index = ${db}->{__count__};
+    delete $db->{__count__};
+    $db->{__count__} = $index + 1;
+    return $index;
+  }
+}
+
+sub _bump_class_count {
+  my $self = shift;
+  my ($class,$count) = @_;
+  $count ||= 1;
+  my $db  = $self->{db};
+  my $key = "__class__$class";
+  my $newcount = $db->{$key} + $count;
+  delete $db->{$key};
+  $db->{$key} = $newcount;
+}
+
+sub classes {
+  my $self = shift;
+  my $db   = $self->db;
+  my ($key,$value) = ('__class__',undef);
+  my %classes;
+  for (my $status = $db->seq($key,$value,R_CURSOR);
+       $status == 0;
+       $status = $db->seq($key,$value,R_NEXT)) {
+    my ($class) = $key =~ /^__class__(.+)/ or last;
+    $classes{$class}++ if $value > 0;
+  }
+  my @classes = sort keys %classes;
+  return @classes;
+}
+
+sub do_initialize {
+  my $self  = shift;
+  my $erase = shift;
+  my $spare_fasta = shift; # used internally only!
+  if ($erase) {
+    $self->_close_databases;
+    unlink $self->_index_file;
+    unlink $self->_data_file;
+    unlink $self->_notes_file;
+    unless ($spare_fasta) {
+      unlink $self->_fasta_file;
+      unlink $self->_fasta_file.'.index';
+    }
+    unlink $self->_timestamp_file;
+    $self->_open_databases(1,1);
+  }
+  1;
+}
+
+# load_sequence($fasta_filehandle,$first_sequence_id)
+sub load_sequence {
+  my $self = shift;
+  my ($io_handle,$id) = @_;
+  my $file = $self->_fasta_file;
+  my $loaded = 0;
+
+  open (my $F,">>$file") or $self->throw("Couldn't open $file for writing: $!");
+
+  if (defined $id) {
+    print $F ">$id\n";
+    $loaded++;
+  }
+
+  while (<$io_handle>) {
+    $loaded++ if /^>/;
+    print F $_;
+  }
+  close F;
+  my $dna_db = Bio::DB::Fasta->new($file) or $self->throw("Can't reindex sequence file: $@");
+  $self->dna_db($dna_db);
+  $self->_touch_timestamp;
+  return $loaded;
+}
+
+sub _mtime {
+  my $file = shift;
+  my @stat = stat($file);
+  return $stat[9];
+}
+
+sub _index_file {
+  my $self = shift;
+  return $self->dsn . "/bdb_features.btree";
+}
+
+sub _data_file {
+  my $self = shift;
+  return $self->dsn . "/bdb_features.data";
+}
+
+sub _fasta_file {
+  my $self = shift;
+  return $self->dsn . "/bdb_sequence.fa";
+}
+
+sub _notes_file {
+  my $self = shift;
+  return $self->dsn . "/bdb_notes.idx";
+}
+
+sub _temp_file {
+  my $self = shift;
+  local $^W=0;
+  my (undef,$filename) = tempfile("bdb_temp_XXXXXX",DIR=>$self->tmpdir,OPEN=>0);
+  return $filename;
+}
+
+sub _timestamp_file {
+  my $self = shift;
+  return $self->dsn ."/bdb_timestamp";
+}
+
+sub db {
+  my $db   = shift()->{db} or return;
+  return tied(%$db);
+}
+
+sub dsn {
+  my $self = shift;
+  my $d    = $self->{dsn};
+  $self->{dsn} = shift if @_;
+  $d;
+}
+
+sub tmpdir {
+  my $self = shift;
+  my $d    = $self->{tmpdir};
+  $self->{tmpdir} = shift if @_;
+  $d;
+}
+
+sub load_gff_line {
+
+  my ($self, $feat) = @_;
+
+  $feat->{strand} = '' if $feat->{strand} && $feat->{strand} eq '.';
+  $feat->{phase} = ''  if $feat->{phase}  && $feat->{phase}  eq '.';
+
+  my $start = $feat->{start};
+  my $stop = $feat->{stop};
+  my $type = join(':',$feat->{method},$feat->{source});
+
+  my $bin =  bin($feat->{start},$feat->{stop},MIN_BIN);
+  $feat->{bin} = $bin;
+
+  my $id = $self->{data}->put($feat);
+  $bin = $self->normalizeNumber($bin);
+
+  my $db = $self->{db};
+  for my $skey ($self->_secondary_keys($feat)) {
+    $db->{$skey} = $id;
+  }
+
+  # save searchable notes to separate index
+  my $fh = $self->{notes};
+  my @notes = map {$_->[1]} grep {lc $_->[0] eq 'note'} @{$feat->{attributes}};
+  print $fh $_,"\t",pack("u*",$id) or $self->throw("An error occurred while updating indexes: $!")
+    foreach @notes;
+
+  $self->{records_loaded}++;
+  $self->_bump_feature_count();
+  $self->_bump_class_count($feat->{gclass});
+
+}
+
+# do nothing!
+sub setup_load {
+  my $self = shift;
+  $self->{records_loaded} = 0;
+  1;
+}
+
+sub finish_load {
+  my $self = shift;
+  $self->db->sync && $self->throw("An error occurred while updating indexes: $!");
+  $self->_touch_timestamp;
+  $self->{records_loaded};
+}
+
+sub _touch_timestamp {
+  my $self = shift;
+  my $tsf = $self->_timestamp_file;
+  open (my $F,">$tsf") or $self->throw("Couldn't open $tsf: $!");
+  print $F scalar(localtime);
+}
+
+
+# given sequence name, return (reference,start,stop,strand)
+sub get_abscoords {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  my %refs;
+  my $regexp;
+
+  if ($name =~ /[*?]/) {  # uh oh regexp time
+    $name = quotemeta($name);
+    $name =~ s/\\\*/.*/g;
+    $name =~ s/\\\?/.?/g;
+    $regexp++;
+  }
+  # Find all features that have the requested name and class.
+  # Sort them by reference point.
+  my @features = @{$self->retrieve_features(-table => 'name', -key=>"$class:$name")};
+  if (!@features) {  # nothing matched exactly, so try aliases
+    @features = @{$self->retrieve_features(-table=>'attr',-key=>"Alias:$name")};
+  }
+
+  foreach my $feature (@features){
+    push @{$refs{$feature->{ref}}},$feature;
+  }
+
+  # find out how many reference points we recovered
+  if (! %refs) {
+    $self->error("$name not found in database");
+    return;
+  }
+
+  # compute min and max
+  my ($ref) = keys %refs;
+  my @found = @{$refs{$ref}};
+  my ($strand,$start,$stop);
+
+  my @found_segments;
+  foreach my $ref (keys %refs) {
+    next if defined($refseq) and $ref ne $refseq;
+    my @found = @{$refs{$ref}};
+    my ($strand,$start,$stop,$name);
+    foreach (@found) {
+      $strand ||= $_->{strand};
+      $strand = '+' if $strand && $strand eq '.'; 
+      $start  = $_->{start} if !defined($start) || $start > $_->{start};
+      $stop   = $_->{stop}  if !defined($stop)  || $stop  < $_->{stop};
+      $name ||= $_->{gname};
+    }
+    push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
+
+  }
+
+  return \@found_segments;
+}
+
+sub get_types {
+  my $self = shift;
+  my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
+  my (%obj,%result,$key,$value);
+  $key = "__type__";
+
+  if (!$srcseq) { # optimized full type list
+    my $db = $self->db;
+    my $status = $db->seq($key,$value,R_CURSOR);
+
+    while ($status == 0 && $key =~ /^__type__(.+)/) {
+      my $type = $1;
+      my ($method,$source) = split ':',$type;
+      $obj{$type} = Bio::DB::GFF::Typename->new($method,$source);
+      $result{$type}++;
+
+      if ($want_count) {
+	$status = $db->seq($key,$value,R_NEXT);
+      } else { # skip to next key set
+	$key .= "\0";
+	$status = $db->seq($key,$value,R_CURSOR)
+      }
+
+    }
+  }
+
+  else { # range search
+    for my $feature (@{$self->_get_features_by_search_options(
+							      {rangetype => 'overlaps',
+							       refseq    => $srcseq,
+							       refclass  => ($class || undef),
+							       start     => ($start || undef),
+							       stop      => ($stop  || undef),
+							      },
+							      {}
+							     )}
+		    ) {
+      my $type = Bio::DB::GFF::Typename->new($feature->{method},$feature->{source});
+      $obj{$type} = $type;
+      $result{$type}++;
+    }
+  }
+
+  return $want_count ? %result : values %obj;
+}
+
+
+# Low level implementation of fetching a named feature.
+# GFF annotations are named using the group class and name fields.
+# May return zero, one, or several Bio::DB::GFF::Feature objects.
+
+=head2 _feature_by_name
+
+ Title   : _feature_by_name
+ Usage   : $db->get_features_by_name($class,$name,$callback)
+ Function: get a list of features by name and class
+ Returns : count of number of features retrieved
+ Args    : name of feature, class of feature, and a callback
+ Status  : protected
+
+This method is used internally.  The callback arguments are those used
+by make_feature().
+
+=cut
+
+sub _feature_by_name {
+  my $self = shift;
+  my ($class,$name,$location,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  #use Devel::StackTrace;
+  #warn Devel::StackTrace->new->as_string;
+
+  my $count = 0;
+  my $id    = -1;
+  my ($use_regexp, $use_glob,$using_alias_search);
+
+  if ($name =~ /[*?]/) {  # uh oh regexp time
+	
+    #If there is only one trailing *, do a range search
+    if ($name =~ /^([^\*]+)\*$/) {
+      $name = $1;
+      $use_glob++;
+    }
+	
+    else {
+      $name = quotemeta($name);
+      $name =~ s/\\\*/.*/g;
+      $name =~ s/\\\?/.?/g;
+      $use_regexp++;
+    }
+  }
+
+  my @features;
+  if ($use_glob) {
+    my $callback = sub {my $feat = shift; $feat->{gname} =~ /^$name/i};
+    @features = @{$self->retrieve_features_range (-table => 'name',
+						  -start => "$class:$name",
+						  -do_while => $callback)
+		};
+  }
+  elsif ($use_regexp) {
+    my $filter = sub {my $feat = shift; $feat->{gname} =~ /$name/i};
+    @features = @{$self->filter_features(-table =>'name', -filter => $filter)};
+  }
+
+  else {
+    @features = @{$self->retrieve_features(-table=>'name',   -key => "$class:$name")};
+  }
+
+  unless (@features) {
+    $using_alias_search++;
+    @features = @{$self->retrieve_features(-table=>'attr',   -key=>"Alias:$name")};
+  }
+
+  foreach my $feature (@features){
+    $id++;
+    next unless $using_alias_search || $feature->{gclass} eq $class;
+
+    if ($location) {
+      next if $location->[0] ne $feature->{ref};
+      next if $location->[1] && $location->[1] > $feature->{stop};
+      next if $location->[2] && $location->[2] < $feature->{start};
+    }
+    $count++;
+
+    $callback->(@{$feature}{@hash2array_map},0);
+  }
+  return $count;
+}
+
+#sub get_feature_by_attribute{
+sub _feature_by_attribute{
+  my $self = shift;
+  my ($attributes,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+  my $count = 0;
+  my $feature_group_id = undef;
+
+  #there could be more than one set of attributes......
+  while (my ($key, $value) = each %$attributes) {
+
+    my @features = @{$self->retrieve_features
+		       (-table => "attr", -key => "$key:$value")};
+
+    for my $feature (@features) {
+      $callback->(@{$feature}{@hash2array_map},$feature_group_id);
+      $count++;
+    }
+  }
+
+}
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @results;
+
+  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
+  my $search = join '|', at words;
+
+  my (%found,$found);
+  my $note_index = $self->{notes};
+  seek($note_index,0,0);  # back to start
+  while (<$note_index>) {
+    next unless /$search/;
+    chomp;
+    my ($note,$uu) = split "\t";
+    $found{unpack("u*",$uu)}++;
+    last if $limit && ++$found >= $limit;
+  }
+
+  my (@features, @matches);
+  for my $idx (keys %found) {
+    my $feature = $self->{data}->get($idx) or next;
+    my @attributes = @{$feature->{attributes}};
+    my @values     = map {lc $_->[0] eq 'note' ? $_->[1] : ()} @attributes;
+    my $value      = "@values";
+
+    my $hits;
+    $hits++ while $value =~ /($search)/ig;  # count the number of times we were hit
+    push @matches,$hits;
+    push @features,$feature;
+  }
+
+  for (my $i=0; $i<@matches; $i++)  {
+    my $feature = $features[$i];
+    my $matches = $matches[$i];
+
+    my $relevance = 10 * $matches;
+    my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
+    my $note;
+    $note   = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'}                @{$feature->{attributes}};
+    push @results,[$featname,$note,$relevance];
+  }
+
+  return @results;
+}
+
+sub _get_features_by_search_options {
+
+  #The $data argument is not used and is preserved for superclass compatibility
+  my ($self, $search,$options) = @_;
+  my $count = 0;
+
+  my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes,$temp_file) =
+    (@{$search}{qw(rangetype refseq refclass start stop types)},
+     @{$options}{qw(sparse sort_by_group ATTRIBUTES temp_file)}) ;
+
+  $start = 0               unless defined($start);
+  $stop  = MAX_BIN         unless defined($stop);
+
+  my $bin =  bin($start,$stop,MIN_BIN);  
+  $bin = $self->normalizeNumber($bin);
+
+  my ($results, at features,%found,%results_table);
+
+  if ($temp_file) {
+    local $DB_BTREE->{flags} = R_DUP;
+    # note: there is a race condition possible here, if someone reuses the
+    # same name between the time we get the tmpfile name and the time we
+    # ask DB_File to open it.
+    tie(%results_table,'DB_File',$temp_file,O_RDWR|O_CREAT,0666,$DB_BTREE)
+      or $self->throw("Couldn't tie temporary file ".$temp_file." for writing: $!");
+    $results = \%results_table;
+  } else {
+    $results = \@features;
+  }
+
+  my $filter = sub {
+    my $feature = shift;
+
+    my $ref           = $feature->{ref};
+    my $feature_start = $feature->{start};
+    my $feature_stop  = $feature->{stop};
+    my $feature_id    = $feature->{feature_id};
+
+    return 0 if $found{$feature_id}++;
+
+    if (defined $refseq) {
+      return 0 unless lc $refseq eq lc $ref;
+      $start = 0               unless defined($start);
+      $stop  = MAX_SEGMENT     unless defined($stop);
+
+      if ($rangetype eq 'overlaps') {
+	return 0 unless $feature_stop >= $start && $feature_start <= $stop;
+      } elsif ($rangetype eq 'contains') {
+	return 0 unless $feature_start >= $start && $feature_stop <= $stop;
+      } elsif ($rangetype eq 'contained_in') {
+	return 0 unless $feature_start <= $start && $feature_stop >= $stop;
+      } else {
+	return 0 unless $feature_start == $start && $feature_stop == $stop;
+      }
+    }
+
+    my $feature_source = $feature->{source};
+    my $feature_method = $feature->{method};
+
+    if (defined $types && @$types){
+      return 0 unless $self->_matching_typelist($feature_method,$feature_source,$types);
+    }
+
+    my $feature_attributes = $feature->{attributes};
+    if (defined $attributes){
+      return 0 unless $self->_matching_attributes($feature_attributes,$attributes);
+    }
+
+    return 1;
+  };
+
+  if (defined $refseq && !$sparse) {
+    my $tier = MAX_BIN;
+    while ($tier >= MIN_BIN) {
+      my ($tier_start,$tier_stop) = (bin_bot($tier,$start),bin_top($tier,$stop));
+      # warn "Using $tier_start $tier_stop\n";
+      if ($tier_start == $tier_stop) {
+	$self->retrieve_features(-table => "bin",
+				 -key => "$refseq$;$tier_start",
+				 -filter => $filter,
+				 -result => $results);
+      } else {
+	my $callback = sub {my $feat = shift; $feat->{bin} <= $tier_stop};
+	$self->retrieve_features_range(-table => "bin",
+				       -start => "$refseq$;$tier_start",
+				       -do_while => $callback,
+				       -filter => $filter,
+				       -result => $results);
+      }
+      $tier /= 10;
+    }
+  }
+
+  elsif (@$types) {
+    foreach (@$types) {
+      my $type = join ':',@$_;
+      $self->retrieve_features_range(-table    => 'type',
+				     -start    => $type,
+				     -filter   => $filter,
+				     -do_while => sub { my $f = shift;
+							lc($f->{method}) eq lc($_->[0]) 
+							  &&
+							lc($f->{source}||$_->[1]||'') eq lc($_->[1]||'')
+						      },
+				     -result => $results);
+    }
+  }
+
+  elsif (defined $attributes) {
+    my ($attribute_name,$attribute_value) = each %$attributes; # pick first one
+    $self->retrieve_features(-table => 'attr',
+			     -key   => "${attribute_name}:${attribute_value}",
+			     -filter => $filter,
+			     -result  => $results);
+  }
+
+  else {
+    $self->filter_features(-filter => $filter,-result=>$results);
+  }
+
+  return $results;
+}
+
+sub retrieve_features {
+  my $self = shift;
+  my ($table, $key, $filter, $result) = rearrange(['TABLE','KEY','FILTER', 'RESULT'], at _);
+
+  my @result;
+  $result ||= \@result;
+
+  my $frozen;
+  my @ids  = $self->db->get_dup("__".lc($table)."__".lc($key));
+  my $data = $self->{data};
+  local $^W = 0;   # because _hash_to_array() will generate lots of uninit values
+
+  foreach my $id (@ids) {
+    my $feat = $data->get($id);
+    my $filter_result = $filter ? $filter->($feat) : 1;
+    next unless $filter_result;
+    if (ref $result eq 'HASH') {
+     $result->{"$feat->{gclass}:$feat->{gname}"} = join ($;,$self->_hash_to_array($feat));
+    } else {
+      push @$result, $feat;
+    }
+    last if $filter_result == -1;
+  }
+  return $result;
+}
+
+sub retrieve_features_range {
+  my ($self) = shift;
+  my ($table, $start, $do_while, $filter, $result) = rearrange(['TABLE','START','DO_WHILE', 'FILTER', 'RESULT'], at _);
+  local $^W = 0;  # because _hash_to_array will generate lots of uninit warnings
+
+  my @result;
+  $result ||= \@result;
+  my ($id, $key, $value);
+
+  $key = "__".$table."__".$start;
+  my $db   = $self->db;
+
+  for (my $status = $db->seq($key,$value,R_CURSOR);
+       $status == 0;
+       $status = $db->seq($key,$value,R_NEXT)) {
+
+    my $feat = $self->{data}->get($value);
+    last unless $do_while->($feat,$key);
+
+    my $filter_result = $filter ? $filter->($feat) : 1;
+    next unless $filter_result;
+
+    if (ref $result eq 'HASH') {
+     $result->{"$feat->{gclass}:$feat->{gname}"} = join($;,$self->_hash_to_array($feat));
+    } else {
+      push @$result,$feat;
+    }
+    last if $filter_result == -1;
+  }
+
+  return $result;
+}
+
+
+sub filter_features {
+  my ($self) = shift;
+
+  my ($filter,$result) = rearrange(['FILTER','RESULT'], at _);
+
+  my @result;
+  $result ||= \@result;
+
+  my ($key, $frozen);
+  my $data = $self->{data};
+  $data->reset;
+  while (my $feat = $data->next) {
+
+    my $filter_result = $filter ? $filter->($feat) : 1;
+    next unless $filter_result;
+
+    if (ref($result) eq 'HASH') {
+      $result->{"$feat->{gclass}:$feat->{gname}"} = join($;,$self->_hash_to_array($feat));
+    } else {
+      push @$result,$feat;
+    }
+    last if $filter_result == -1;
+  }
+
+  return $result;
+}
+
+
+sub _basic_features_by_id{
+  my $self = shift;
+  my ($ids) = @_;
+
+  $ids = [$ids] unless ref $ids =~ /ARRAY/;
+
+  my @result;
+  my $data = $self->{data};
+  for my $feature_id (@$ids){
+    push @result, $data->get($feature_id);
+  }
+
+  return wantarray() ? @result : $result[0];
+}
+
+sub normalizeNumber {
+  my ($self, $num) = @_;
+  while ((length $num) < MAX_NUM_LENGTH)
+  {
+    $num = "0".$num;
+  }
+  return $num;
+}
+
+sub get_features_iterator {
+  my $self = shift;
+
+  my ($search,$options,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+  $options->{temp_file} = $self->_temp_file;
+
+  my $results = $self->_get_features_by_search_options($search,$options);
+  return Bio::DB::GFF::Adaptor::berkeleydb::iterator->new($results,$callback,$options->{temp_file});
+}
+
+#--------------------------------------------------------------------------#
+
+package FeatureStore;
+
+# This is a very specialized package that stores serialized features onto a file-based
+# array. The array is indexed by the physical offset to the beginning of each serialized
+# feature.
+
+use strict;
+use Fcntl qw(SEEK_SET SEEK_END);
+use base 'Bio::Root::Root';
+use Bio::DB::GFF::Adaptor::memory::feature_serializer; # qw(feature2string string2feature @hash2array_map);
+
+sub new {
+  my $class  = shift;
+  my $dbname = shift    or $class->throw("must provide a filepath argument");
+  my ($write,$create) = @_;
+
+  my $mode =  $create  ? "+>"
+            : $write   ? "+>>"
+            : "<";
+
+  open (my $F,$mode,$dbname) or $class->throw("$dbname: $!");
+  my $self = bless {
+		    fh        => $F,
+		    next_idx  => 0,
+		    last_id   => 0,
+		   },$class;
+  return $self;
+}
+
+sub put {
+  my $self   = shift;
+  my $feature = shift;
+  my $fh = $self->{fh};
+  seek($fh,0,SEEK_END);
+  my $offset = tell($fh) || 0;
+
+  $self->{last_id} = $offset;
+
+  my $id = pack("L",$offset);
+  $feature->{feature_id} = $id;
+  my $value = feature2string($feature);
+  print $fh pack("n/a*",$value) or $self->throw("An error occurred while updating the data file: $!");
+
+
+  return $id;
+}
+
+sub last_id {
+  shift->{last_id};
+}
+
+sub get {
+  my $self     = shift;
+  my $idx      = shift;
+  my $offset   = unpack("L",$idx);
+  my $fh = $self->{fh};
+
+  my ($value,$length);
+  $offset ||= 0;
+  seek($fh,$offset,SEEK_SET);
+  return unless read($fh,$length,2);
+  return unless read($fh,$value,unpack("n",$length));
+  $self->{next_idx} = tell($fh);
+  return if substr($value,0,1) eq "\0";
+  return string2feature($value);
+}
+
+sub next {
+  my $self = shift;
+  my $fh     = $self->{fh};
+  my $result;
+  do {
+    $result = $self->get(pack("L",$self->{next_idx}));
+  } until $result || eof($fh);
+  $self->{next_idx} = 0 unless $result;
+  $result;
+}
+
+sub remove {
+  my $self   = shift;
+  my $id     = shift;
+  my $offset = unpack("L",$id);
+  my $fh     = $self->{fh};
+  my ($value,$length);
+  seek($fh,$offset,SEEK_SET);
+  return unless read($fh,$length,2);
+  print $fh "\0"x$length;  # null it out
+  1;
+}
+
+sub _seek {
+  my $self = shift;
+  my $idx  = shift;
+  my $offset   = unpack("L",$idx);
+  seek($self->{fh},$offset,SEEK_SET);
+  $self->{next_idx} = tell($self->{fh});
+}
+
+sub reset {
+  my $self = shift;
+  $self->_seek(pack("L",0));
+}
+
+sub _feature2string {
+  my $feature = shift;
+  my @a = @{$feature}{@hash2array_map};
+  push @a,map {@$_} @{$feature->{attributes}} if $feature->{attributes};
+  return join $;, at a;
+}
+
+sub _string2feature {
+  my $string  = shift;
+  my (%feature, at attributes);
+
+  (@feature{@hash2array_map}, at attributes) = split $;,$string;
+  while (@attributes) {
+    my ($key,$value) = splice(@attributes,0,2);
+    push @{$feature{attributes}},[$key,$value];
+  }
+  $feature{group_id} = undef;
+  \%feature;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,355 @@
+package Bio::DB::GFF::Adaptor::biofetch;
+#$Id: biofetch.pm,v 1.17.8.1 2006/10/02 23:10:16 sendu Exp $
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::biofetch -- Cache BioFetch objects in a Bio::DB::GFF database
+
+=head1 SYNOPSIS
+
+Proof of principle.  Not for production use.
+
+=head1 DESCRIPTION
+
+This adaptor is a proof-of-principle.  It is used to fetch BioFetch
+sequences into a Bio::DB::GFF database (currently uses a hard-coded
+EMBL database) as needed.  This allows the Generic Genome Browser to
+be used as a Genbank/EMBL browser.
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::BioFetch;
+use Bio::SeqIO;
+
+use vars qw(%preferred_tags);
+
+# THIS IS WRONG: biofetch should delegate to an underlying
+# database adaptor, and not inherit from one.
+use base qw(Bio::DB::GFF::Adaptor::dbi::mysql);
+
+# priority for choosing names of CDS tags, higher is higher priority
+%preferred_tags = (
+		      strain        => 10,
+		      organism      => 20,
+		      protein_id    => 40,
+		      locus_tag     => 50,
+		      locus         => 60,
+		      gene          => 70,
+		      standard_name => 80,
+		      );
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(-adaptor=>'biofetch', at args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    :   -adaptor : required.  Which adaptor to use; biofetch for mysql, biofetch_oracle for Oracle
+             -preferred_tags : optional.  A hash of {classname => weight,...}
+                               used to determine the class and name of the feature
+                               when a choice of possible feature classes is available
+                               (e.g. a feature has both a 'gene' and a 'locus' tag).
+                               Common defaults are provided that work well for eukaryotic
+                               features (but not well for viral/prokaryotic)
+              see below for additional arguments.
+ Status  : Public
+
+This is the constructor for the adaptor.  It is called automatically
+by Bio::DB::GFF-E<gt>new.  In addition to arguments that are common among
+all adaptors, the following class-specific arguments are recgonized:
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040'
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+  -proxy         [['http','ftp'],'http://proxy:8080']
+
+  -source        source to use for loaded features ('EMBL')
+
+-dsn,-user and -pass indicate the local database to cache results in,
+and as are per Bio::DB::GFF::Adaptor::dbi.  The -proxy argument allows
+you to set the biofetch web proxy, and uses the same syntax described
+for the proxy() method of L<Bio::DB::WebDBSeqI>, except that the
+argument must be passed as an array reference.
+
+=cut
+
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+  my ($preferred,$proxy,$source) = rearrange(['PREFERRED_TAGS','PROXY','SOURCE'], at _);
+
+  # if the caller sent their own preferences, then use these, otherwise use defaults.
+  $self->_preferred_tags($preferred ? $preferred : \%preferred_tags);
+  $self->_source($source || 'EMBL');
+
+  if ($proxy) {
+    my @args = ref($proxy) ? @$proxy : eval $proxy;
+    $self->{_proxy} = \@args if @args;
+  }
+  $self;
+}
+
+sub segment {
+  my $self = shift;
+  my @segments = $self->SUPER::segment(@_);
+
+  if (!@segments) {
+    my $refclass = $self->refclass;
+
+    my %args = $self->setup_segment_args(@_);
+    if ($args{-class} && $args{-class} =~ /$refclass/oi) {
+      return unless $self->load_from_embl('embl'=>$args{-name});
+      @segments = $self->SUPER::segment(@_);
+    } elsif ($args{-class} && $args{-class} =~ /refseq|swall|embl/i) { #hack to get refseq names
+      return unless $self->load_from_embl(lc($args{-class})=>$args{-name});
+      $args{-class} = $self->refclass;
+      @segments = $self->SUPER::segment(%args);
+    }
+  }
+
+  $self->_multiple_return_args(@segments);
+}
+
+# default is to return 'Sequence' as the class of all references
+sub refclass {
+  my $self = shift;
+  my $refname = shift;
+  'Sequence';
+}
+
+sub load_from_embl {
+  my $self = shift;
+  my $db   = shift;
+  my $acc  = shift or $self->throw('Must provide an accession ID');
+
+  my $biofetch;
+  if ($self->{_biofetch}{$db}) {
+    $biofetch = $self->{_biofetch}{$db};
+  } else {
+    $biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
+    $biofetch->retrieval_type('tempfile');
+    $biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
+  }
+
+  my $seq  = eval {$biofetch->get_Seq_by_id($acc)} or return;
+  $self->_load_embl($acc,$seq);
+  1;
+}
+
+sub load_from_file {
+  my $self = shift;
+  my $file = shift;
+
+  my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';
+
+  my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
+  my $seq   = $seqio->next_seq;
+
+  $self->_load_embl($seq->accession,$seq);
+  1;
+}
+
+sub _load_embl {
+  my $self = shift;
+  my $acc  = shift;
+  my $seq  = shift;
+  my $refclass = $self->refclass;
+  my $locus    = $seq->id;
+  my $source   = $self->_source;
+
+  # begin loading
+  $self->setup_load();
+
+  # first synthesize the entry for the top-level feature
+  my @aliases;
+  foreach ($seq->accession,$seq->get_secondary_accessions) {
+    next if lc($_) eq lc($acc);
+    push @aliases,[Alias => $_];
+  }
+  $self->load_gff_line(
+		       {
+			ref    => $acc,
+			class  => $refclass,
+			source => $source,
+#			method => 'origin',
+			method => 'region',
+			start  => 1,
+			stop   => $seq->length,
+			score  => undef,
+			strand => '.',
+			phase  => '.',
+			gclass => $self->refclass,
+			gname  => $acc,
+			tstart => undef,
+			tstop  => undef,
+			attributes  => [[Note => $seq->desc], at aliases],
+		       }
+		      );
+  # now load each feature in turn
+  my ($transcript_version,$mRNA_version) = (0,0);
+  for my $feat ($seq->all_SeqFeatures) {
+    my $attributes = $self->get_attributes($feat);
+    my $name       = $self->guess_name($attributes);
+
+    my $location = $feat->location;
+    my @segments = map {[$_->start,$_->end,$_->seq_id]}
+      $location->can('sub_Location') ? $location->sub_Location : $location;
+
+# this changed CDS to coding, but that is the wrong thing to do, since
+# CDS is in SOFA and coding is not
+#    my $type     =   $feat->primary_tag eq 'CDS'   ? 'coding'
+#                   : $feat->primary_tag;
+    my $type=  $feat->primary_tag;
+    next if (lc($type) eq 'contig');
+#    next if (lc($type) eq 'variation');
+
+    if (lc($type) eq 'variation' and $feat->length == 1) {
+      $type = 'SNP';
+    } elsif (lc($type) eq 'variation' ) {
+      $type = 'chromosome_variation';
+    }
+
+    if ($type  eq 'source') {
+      $type = 'region';
+    }
+
+    if ($type =~ /misc.*RNA/i) {
+      $type = 'RNA';
+    }
+
+    if ($type eq 'misc_feature' and $name->[1] =~ /similar/i) {
+      $type = 'computed_feature_by_similarity';
+    } elsif ($type eq 'misc_feature') {
+      warn "skipping a misc_feature\n";
+      next;
+    }
+
+    my $parttype =  $feat->primary_tag eq 'mRNA'   ? 'exon' : $feat->primary_tag;
+
+    if ($type eq 'gene') {
+      $transcript_version = 0;
+      $mRNA_version       = 0;
+    } elsif ($type eq 'mRNA') {
+      $name->[1] = sprintf("%s.t%02d",$name->[1],++$transcript_version);
+    } elsif ($type eq 'CDS') {
+      $name->[0] = 'mRNA';
+      $name->[1] = sprintf("%s.t%02d",$name->[1],$transcript_version);
+    }
+
+    my $strand = $feat->strand;
+    my $str    = defined $strand ?
+                                     ($strand > 0 ? '+' : '-')
+				   : '.';
+    $self->load_gff_line( {
+			   ref    => $acc,
+			   class  => $refclass,
+			   source => $source,
+			   method => $type,
+			   start  => $location->start,
+			   stop   => $location->end,
+			   score  => $feat->score || undef,
+			   strand => $str,
+			   phase  => $feat->frame || '.',
+			   gclass => $name->[0],
+			   gname  => $name->[1],
+			   tstart => undef,
+			   tstop  => undef,
+			   attributes  => $attributes,
+			  }
+			) if ($type &&
+                           ($type ne 'CDS'||($type eq 'CDS'&&@segments==1) ) );
+
+    @$attributes = ();
+
+    next if @segments == 1;
+    for my $segment (@segments) {
+
+      my $strand = $feat->strand;
+      my $str    = defined $strand ?
+                                     ($strand > 0 ? '+' : '-')
+				   : '.';
+      $self->load_gff_line( {
+			     ref    => $segment->[2] eq $locus ? $acc : $segment->[2],
+			     class  => $refclass,
+			     source => $source,
+			     method => $parttype,
+			     start  => $segment->[0],
+			     stop   => $segment->[1],
+			     score  => $feat->score || undef,
+			     strand => $str,
+			     phase  => $feat->frame || '.',
+			     gclass => $name->[0],
+			     gname  => $name->[1],
+			     tstart => undef,
+			     tstop  => undef,
+			     attributes  => $attributes,
+			    }
+			  );
+    }
+
+  }
+
+  # finish loading
+  $self->finish_load();
+
+  # now load the DNA
+  $self->load_sequence_string($acc,$seq->seq);
+
+  1;
+}
+
+sub get_attributes {
+  my $self = shift;
+  my $seq  = shift;
+
+  my @tags = $seq->all_tags or return;
+  my @result;
+  foreach my $tag (@tags) {
+    foreach my $value ($seq->each_tag_value($tag)) {
+      push @result,[$tag=>$value];
+    }
+  }
+  \@result;
+}
+
+sub guess_name {
+  my $self = shift;
+  my $attributes = shift;
+# remove this fix when Lincoln fixes it properly
+  return ["Misc" => "Misc"] unless ($attributes);  # these are arbitrary, and possibly destructive defaults
+  my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
+  my $best = pop @ordered_attributes;
+  @$attributes = @ordered_attributes;
+  return $best;
+}
+
+
+sub _preferred_tags {
+  my $self = shift;
+  $self->{preferred_tags} = shift if @_;
+  return $self->{preferred_tags};
+}
+
+sub _source {
+  my $self = shift;
+  $self->{source} = shift if @_;
+  $self->{source};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch_oracle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch_oracle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/biofetch_oracle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,301 @@
+package Bio::DB::GFF::Adaptor::biofetch_oracle;
+
+#$Id: biofetch_oracle.pm,v 1.4.8.1 2006/10/02 23:10:16 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::biofetch_oracle -- Cache BioFetch objects in a Bio::DB::GFF database
+
+=head1 SYNOPSIS
+
+Proof of principle.  Not for production use.
+
+=head1 DESCRIPTION
+
+This adaptor is a proof-of-principle.  It is used to fetch BioFetch
+sequences into a Bio::DB::GFF database (currently uses a hard-coded
+EMBL database) as needed.  This allows the Generic Genome Browser to
+be used as a Genbank/EMBL browser.
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::BioFetch;
+use Bio::SeqIO;
+
+use vars qw(%default_preferred_tags);
+use base qw(Bio::DB::GFF::Adaptor::dbi::oracle);
+
+# priority for choosing names of CDS tags, higher is higher priority
+%default_preferred_tags = (
+		      strain        => 10,
+		      organism      => 20,
+		      protein_id    => 40,
+		      locus_tag     => 50,
+		      locus         => 60,
+		      gene          => 70,
+		      standard_name => 80,
+		      );
+
+sub _preferred_tags {
+    my ($self, $tags) = @_;
+    if ($tags && (ref($tags) =~ /HASH/)){
+        $self->{preferred_tags} = $tags;
+    }
+    return $self->{preferred_tags};
+}
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(-adaptor=>'biofetch_oracle', -preferred_tags => \%preferred, @args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    :   -adaptor : required.  Which adaptor to use; biofetch for mysql, biofetch_oracle for Oracle
+             -preferred_tags : optional.  A hash of {classname => weight,...}
+                               used to determine the class and name of the feature
+                               when a choice of possible feature classes is available
+                               (e.g. a feature has both a 'gene' and a 'locus' tag).
+                               Common defaults are provided that work well for eukaryotic
+                               features (but not well for viral/prokaryotic)
+              see below for additional arguments.                             
+ Status  : Public
+
+This is the constructor for the adaptor.  It is called automatically
+by Bio::DB::GFF-E<gt>new.  In addition to arguments that are common among
+all adaptors, the following class-specific arguments are recgonized:
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040'
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+  -proxy         [['http','ftp'],'http://proxy:8080']
+
+  -create        initialize the database
+
+-dsn,-user and -pass indicate the local database to cache results in,
+and as are per Bio::DB::GFF::Adaptor::dbi.  The -proxy argument allows
+you to set the biofetch web proxy, and uses the same syntax described
+for the proxy() method of L<Bio::DB::WebDBSeqI>, except that the
+argument must be passed as an array reference.
+
+=cut
+
+sub new {
+  my $class = shift;
+  my $args = shift;
+  my $self  = $class->SUPER::new($args);
+  my ($preferred) = rearrange(['PREFERRED_TAGS'],$args);
+  $self->_preferred_tags($preferred?$preferred:\%default_preferred_tags);  # if the caller sent their own preferences, then use these, otherwise use defaults.
+
+  my ($proxy) = rearrange(['PROXY'],$args);
+  if ($proxy) {
+    my @args = ref($proxy) ? @$proxy : eval $proxy;
+    $self->{_proxy} = \@args if @args;
+  }
+  $self;
+}
+
+sub segment {
+  my $self = shift;
+  my @segments = $self->SUPER::segment(@_);
+
+  if (!@segments) {
+    my $refclass = $self->refclass;
+
+    my %args = $self->setup_segment_args(@_);
+    if ($args{-class} && $args{-class} =~ /$refclass/oi) {
+      return unless $self->load_from_embl('embl'=>$args{-name});
+      @segments = $self->SUPER::segment(@_);
+    } elsif ($args{-class} && $args{-class} =~ /refseq|swall|embl/i) { #hack to get refseq names
+      return unless $self->load_from_embl(lc($args{-class})=>$args{-name});
+      $args{-class} = $self->refclass;
+      @segments = $self->SUPER::segment(%args);
+    }
+  }
+
+  $self->_multiple_return_args(@segments);
+}
+
+# default is to return 'Sequence' as the class of all references
+sub refclass {
+  my $self = shift;
+  my $refname = shift;
+  'Accession';
+}
+
+sub load_from_embl {
+  my $self = shift;
+  my $db   = shift;
+  my $acc  = shift or $self->throw('Must provide an accession ID');
+
+  my $biofetch;
+  if ($self->{_biofetch}{$db}) {
+    $biofetch = $self->{_biofetch}{$db};
+  } else {
+    $biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
+    $biofetch->retrieval_type('tempfile');
+    $biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
+  }
+
+  my $seq  = eval {$biofetch->get_Seq_by_id($acc)} or return;
+  $self->_load_embl($acc,$seq);
+  1;
+}
+
+sub load_from_file {
+  my $self = shift;
+  my $file = shift;
+
+  my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';
+
+  my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
+  my $seq   = $seqio->next_seq;
+
+  $self->_load_embl($seq->accession,$seq);
+  1;
+}
+
+sub _load_embl {
+  my $self = shift;
+  my $acc  = shift;
+  my $seq  = shift;
+  my $refclass = $self->refclass;
+  my $locus    = $seq->id;
+
+  # begin loading
+  $self->setup_load();
+
+  # first synthesize the entry for the top-level feature
+  my @aliases;
+  foreach ($seq->accession,$seq->get_secondary_accessions) {
+    next if lc($_) eq lc($acc);
+    push @aliases,[Alias => $_];
+  }
+  $self->load_gff_line(
+		       {
+			ref    => $acc,
+			class  => $refclass,
+			source => 'EMBL',
+			method => 'origin',
+			start  => 1,
+			stop   => $seq->length,
+			score  => undef,
+			strand => '.',
+			phase  => '.',
+			gclass => $self->refclass,
+			gname  => $acc,
+			tstart => undef,
+			tstop  => undef,
+			attributes  => [[Note => $seq->desc], at aliases],
+		       }
+		      );
+  # now load each feature in turn
+  for my $feat ($seq->all_SeqFeatures) {
+    my $attributes = $self->get_attributes($feat);
+    my $name       = $self->guess_name($attributes);
+
+    my $location = $feat->location;
+    my @segments = map {[$_->start,$_->end,$_->seq_id]}
+      $location->can('sub_Location') ? $location->sub_Location : $location;
+
+    my $type     =  $feat->primary_tag eq 'CDS' ? 'mRNA'  : $feat->primary_tag;
+    my $parttype =  $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag;
+
+    if ($feat->primary_tag =~ /^(gene|CDS)$/) {
+      $self->load_gff_line( {
+			     ref    => $acc,
+			     class  => $refclass,
+			     source => 'EMBL',
+			     method => $type,
+			     start  => $location->start,
+			     stop   => $location->end,
+			     score  => $feat->score || undef,
+			     strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
+			     phase  => $feat->frame || '.',
+			     gclass => $name->[0],
+			     gname  => $name->[1],
+			     tstart => undef,
+			     tstop  => undef,
+			     attributes  => $attributes,
+			    }
+			  );
+      @$attributes = ();
+    }
+
+    for my $segment (@segments) {
+
+      $self->load_gff_line( {
+			     ref    => $segment->[2] eq $locus ? $acc : $segment->[2],
+			     class  => $refclass,
+			     source => 'EMBL',
+			     method => $parttype,
+			     start  => $segment->[0],
+			     stop   => $segment->[1],
+			     score  => $feat->score || undef,
+			     strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'),
+			     phase  => $feat->frame || '.',
+			     gclass => $name->[0],
+			     gname  => $name->[1],
+			     tstart => undef,
+			     tstop  => undef,
+			     attributes  => $attributes,
+			    }
+			  );
+    }
+
+  }
+
+  # finish loading
+  $self->finish_load();
+
+  # now load the DNA
+  $self->load_sequence_string($acc,$seq->seq);
+
+  1;
+}
+
+sub get_attributes {
+  my $self = shift;
+  my $seq  = shift;
+
+  my @tags = $seq->all_tags or return;
+  my @result;
+  foreach my $tag (@tags) {
+    foreach my $value ($seq->each_tag_value($tag)) {
+      push @result,[$tag=>$value];
+    }
+  }
+  \@result;
+}
+
+sub guess_name {
+  my $self = shift;
+  my $attributes = shift;
+# remove this fix when Lincoln fixes it properly
+  return ["Misc" => "Misc"] unless ($attributes);  # these are arbitrary, and possibly destructive defaults
+  my @ordered_attributes = sort {($self->_preferred_tags->{$a->[0]} || 0) <=> ($self->_preferred_tags->{$b->[0]} || 0)} @$attributes;
+  my $best = pop @ordered_attributes;
+  @$attributes = @ordered_attributes;
+  return $best;
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,253 @@
+package Bio::DB::GFF::Adaptor::dbi::caching_handle;
+
+use strict;
+use DBI;
+use vars '$AUTOLOAD';
+use base qw(Bio::Root::Root);
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::caching_handle -- Cache for database handles
+
+=head1 SYNOPSIS
+
+ use Bio::DB::GFF::Adaptor::dbi::caching_handle;
+ $db  = Bio::DB::GFF::Adaptor::dbi::caching_handle->new('dbi:mysql:test');
+ $sth = $db->prepare('select * from foo');
+ @h   = $sth->fetch_rowarray;
+ $sth->finish
+
+=head1 DESCRIPTION
+
+This module handles a pool of database handles.  It was motivated by
+the MYSQL driver's {mysql_use_result} attribute, which dramatically
+improves query speed and memory usage, but forbids additional query
+statements from being evaluated while an existing one is in use.
+
+This module is a plug-in replacement for vanilla DBI.  It
+automatically activates the {mysql_use_result} attribute for the mysql
+driver, but avoids problems with multiple active statement handlers by
+creating new database handles as needed.
+
+=head1 USAGE
+
+The object constructor is
+Bio::DB::GFF::Adaptor::dbi::caching_handle-E<gt>new().  This is called
+like DBI-E<gt>connect() and takes the same arguments.  The returned object
+looks and acts like a conventional database handle.
+
+In addition to all the standard DBI handle methods, this package adds
+the following:
+
+=head2 dbi_quote
+
+ Title   : dbi_quote
+ Usage   : $string = $db->dbi_quote($sql, at args)
+ Function: perform bind variable substitution
+ Returns : query string
+ Args    : the query string and bind arguments
+ Status  : public
+
+This method replaces the bind variable "?" in a SQL statement with
+appropriately quoted bind arguments.  It is used internally to handle
+drivers that don't support argument binding.
+
+=head2 do_query
+
+ Title   : do_query
+ Usage   : $sth = $db->do_query($query, at args)
+ Function: perform a DBI query
+ Returns : a statement handler
+ Args    : query string and list of bind arguments
+ Status  : Public
+
+This method performs a DBI prepare() and execute(), returning a
+statement handle.  You will typically call fetch() of fetchrow_array()
+on the statement handle.  The parsed statement handle is cached for
+later use.
+
+=head2 debug
+
+ Title   : debug
+ Usage   : $debug = $db->debug([$debug])
+ Function: activate debugging messages
+ Returns : current state of flag
+ Args    : optional new setting of flag
+ Status  : public
+
+=cut
+
+sub new {
+  my $class    = shift;
+  my @dbi_args = @_;
+  my $self = bless {
+		    dbh    => [],
+		    args   => \@dbi_args,
+		    debug => 0,
+		   },$class;
+  $self->dbh || $self->throw("Can't connect to database: " . DBI->errstr);
+  $self;
+}
+
+sub AUTOLOAD {
+  my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
+  return if $func_name eq 'DESTROY';
+  my $self = shift or return DBI->$func_name(@_);
+  $self->dbh->$func_name(@_);
+}
+
+sub debug {
+  my $self = shift;
+  my $d = $self->{debug};
+  $self->{debug} = shift if @_;
+  $d;
+}
+
+sub prepare {
+  my $self  = shift;
+  my $query = shift;
+
+  # find a non-busy dbh
+  my $dbh = $self->dbh || $self->throw("Can't connect to database: " . DBI->errstr);
+  if (my $sth = $self->{$dbh}{$query}) {
+    warn "Using cached statement handler\n" if $self->debug;
+    return $sth;
+  } else {
+    warn "Creating new statement handler\n" if $self->debug;
+    $sth = $dbh->prepare($query) || $self->throw("Couldn't prepare query $query:\n ".DBI->errstr."\n");
+    return $self->{$dbh}{$query} = $sth;
+  }
+}
+
+sub do_query {
+  my $self = shift;
+  my ($query, at args) = @_;
+  warn $self->dbi_quote($query, at args),"\n" if $self->debug;
+  my $sth = $self->prepare($query);
+  $sth->execute(@args) || $self->throw("Couldn't execute query $query:\n ".DBI->errstr."\n");
+  $sth;
+}
+
+sub dbh {
+  my $self = shift;
+  foreach (@{$self->{dbh}}) {
+    return $_ if $_->inuse == 0;
+  }
+  # if we get here, we must create a new one
+  warn "(Re)connecting to database\n" if $self->debug;
+  my $dbh = DBI->connect(@{$self->{args}}) or return;
+
+  $dbh->{PrintError} = 0;
+  
+  # for Oracle - to retrieve LOBs, need to define the length (Jul 15, 2002)
+  $dbh->{LongReadLen} = 100*65535;
+  $dbh->{LongTruncOk} = 0;
+
+  my $wrapper = Bio::DB::GFF::Adaptor::dbi::faux_dbh->new($dbh);
+  push @{$self->{dbh}},$wrapper;
+  $wrapper;
+}
+
+=head2 attribute
+
+ Title   : attribute
+ Usage   : $value = $db->attribute(AttributeName , [$newvalue])
+ Function: get/set DBI::db handle attribute
+ Returns : current state of the attribute
+ Args    : name of the attribute and optional new setting of attribute
+ Status  : public
+
+  Under Bio::DB::GFF::Adaptor::dbi::caching_handle the DBI::db
+  attributes that are usually set using hashref calls are unavailable.
+  Use attribute() instead.  For example, instead of:
+
+    $dbh->{AutoCommit} = 0;
+
+  use
+
+    $dbh->attribute(AutoCommit=>0);
+
+=cut
+
+sub attribute {
+  my $self = shift;
+  my $dbh = $self->dbh->{dbh};
+  return $dbh->{$_[0]} = $_[1] if @_ == 2;
+  return $dbh->{$_[0]}         if @_ == 1;
+  return;
+}
+
+sub disconnect {
+  my $self = shift;
+  $_ && $_->disconnect foreach @{$self->{dbh}};
+  $self->{dbh} = [];
+}
+
+sub dbi_quote {
+  my $self = shift;
+  my ($query, at args) = @_;
+  my $dbh = $self->dbh;
+  $query =~ s/\?/$dbh->quote(shift @args)/eg;
+  $query;
+}
+
+package Bio::DB::GFF::Adaptor::dbi::faux_dbh;
+use vars '$AUTOLOAD';
+
+sub new {
+  my $class = shift;
+  my $dbh   = shift;
+  bless {dbh=>$dbh},$class;
+}
+
+sub prepare {
+  my $self = shift;
+  my $sth = $self->{dbh}->prepare(@_) or return;
+  $sth->{mysql_use_result} = 1 if $self->{dbh}->{Driver}{Name} eq 'mysql';
+  $sth;
+}
+
+sub prepare_delayed {
+  my $self = shift;
+  my $sth = $self->{dbh}->prepare(@_) or return;
+  $sth;
+}
+
+sub inuse {
+    shift->{dbh}->{ActiveKids};
+}
+
+sub DESTROY { }
+
+sub AUTOLOAD {
+  my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
+  return if $func_name eq 'DESTROY';
+  my $self = shift;
+  if( defined $self->{dbh} ) {
+      $self->{dbh}->$func_name(@_);
+  }
+}
+
+1;
+
+__END__
+
+=head1 BUGS
+
+Report to the author.
+
+=head1 SEE ALSO
+
+L<DBI>, L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/iterator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/iterator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/iterator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,74 @@
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::iterator - iterator for Bio::DB::GFF::Adaptor::dbi
+
+=head1 SYNOPSIS
+
+For internal use only
+
+=head1 DESCRIPTION
+
+This is an internal module that is used by the Bio::DB::GFF DBI
+adaptor to return an iterator across a sequence feature query.  The
+object has a single method, next_feature(), that returns the next
+feature from the query.  The method next_seq() is an alias for
+next_feature().
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::dbi::iterator;
+use strict;
+use Bio::Root::Version;
+
+use constant STH         => 0;
+use constant CALLBACK    => 1;
+use constant CACHE       => 2;
+
+*next_seq = \&next_feature;
+
+sub new {
+  my $class = shift;
+  my ($sth,$callback) = @_;
+  return bless [$sth,$callback,[]],$class;
+}
+
+sub next_feature {
+  my $self = shift;
+  return shift @{$self->[CACHE]} if @{$self->[CACHE]};
+  my $sth = $self->[STH] or return;
+  my $callback = $self->[CALLBACK];
+
+  my $features;
+  while (1) {
+    if (my @row = $sth->fetchrow_array) {
+      $features = $callback->(@row);
+      last if $features;
+    } else {
+      $sth->finish;
+      undef $self->[STH];
+      $features = $callback->();
+      last;
+    }
+  }
+  $self->[CACHE] = $features or return;
+  shift @{$self->[CACHE]};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysql.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysql.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysql.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,864 @@
+package Bio::DB::GFF::Adaptor::dbi::mysql;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::mysql -- Database adaptor for a specific mysql schema
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>
+
+=cut
+
+# a simple mysql adaptor
+use strict;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::GFF::Util::Binning;
+use base qw(Bio::DB::GFF::Adaptor::dbi);
+
+use constant MAX_SEGMENT => 100_000_000;  # the largest a segment can get
+
+use constant GETSEQCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup
+  WHERE fgroup.gname=?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand,gname
+END
+;
+
+use constant GETALIASCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value=?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gname
+END
+;
+
+use constant GETALIASLIKE =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value LIKE ?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gname
+END
+;
+
+use constant GETFORCEDSEQCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand
+  FROM fdata,fgroup
+  WHERE fgroup.gname=?
+    AND fgroup.gclass=?
+    AND fdata.fref=?
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand
+END
+;
+
+use constant FULLTEXTSEARCH => <<END;
+SELECT distinct gclass,gname,fattribute_value,MATCH(fattribute_value) AGAINST (?) as score
+  FROM fgroup,fattribute_to_feature,fdata
+  WHERE fgroup.gid=fdata.gid
+    AND fdata.fid=fattribute_to_feature.fid
+    AND MATCH(fattribute_value) AGAINST (?)
+END
+;
+
+=head1 DESCRIPTION
+
+This adaptor implements a specific mysql database schema that is
+compatible with Bio::DB::GFF.  It inherits from
+Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
+
+The schema uses several tables:
+
+=over 4
+
+=item fdata
+
+This is the feature data table.  Its columns are:
+-
+    fid	           feature ID (integer)
+    fref           reference sequence name (string)
+    fstart         start position relative to reference (integer)
+    fstop          stop postion relative to reference (integer)
+    ftypeid        feature type ID (integer)
+    fscore         feature score (float); may be null
+    fstrand        strand; one of "+" or "-"; may be null
+    fphase         phase; one of 0, 1 or 2; may be null
+    gid            group ID (integer)
+    ftarget_start  for similarity features, the target start position (integer)
+    ftarget_stop   for similarity features, the target stop position (integer)
+
+Note that it would be desirable to normalize the reference sequence
+name, since there are usually many features that share the same
+reference feature.  However, in the current schema, query performance
+suffers dramatically when this additional join is added.
+
+=item fgroup
+
+This is the group table. There is one row for each group.  Columns:
+
+    gid	      the group ID (integer)
+    gclass    the class of the group (string)
+    gname     the name of the group (string)
+
+The group table serves multiple purposes.  As you might expect, it is
+used to cluster features that logically belong together, such as the
+multiple exons of the same transcript.  It is also used to assign a
+name and class to a singleton feature.  Finally, the group table is
+used to identify the target of a similarity hit.  This is consistent
+with the way in which the group field is used in the GFF version 2
+format.
+
+The fgroup.gid field joins with the fdata.gid field. 
+
+Examples:
+
+  mysql> select * from fgroup where gname='sjj_2L52.1';
+  +-------+-------------+------------+
+  | gid   | gclass      | gname      |
+  +-------+-------------+------------+
+  | 69736 | PCR_product | sjj_2L52.1 |
+  +-------+-------------+------------+
+  1 row in set (0.70 sec)
+
+  mysql> select fref,fstart,fstop from fdata,fgroup 
+            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
+                  and fdata.gid=fgroup.gid;
+  +---------------+--------+-------+
+  | fref          | fstart | fstop |
+  +---------------+--------+-------+
+  | CHROMOSOME_II |   1586 |  2355 |
+  +---------------+--------+-------+
+  1 row in set (0.03 sec)
+
+=item ftype
+
+This table contains the feature types, one per row.  Columns are:
+
+    ftypeid      the feature type ID (integer)
+    fmethod      the feature type method name (string)
+    fsource      the feature type source name (string)
+
+The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:
+
+  mysql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
+         where gclass='PCR_product' 
+               and gname = 'sjj_2L52.1'
+               and fdata.gid=fgroup.gid
+               and fdata.ftypeid=ftype.ftypeid;
+  +---------------+--------+-------+-------------+-----------+
+  | fref          | fstart | fstop | fmethod     | fsource   |
+  +---------------+--------+-------+-------------+-----------+
+  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
+  +---------------+--------+-------+-------------+-----------+
+  1 row in set (0.08 sec)
+
+=item fdna
+
+This table holds the raw DNA of the reference sequences.  It has three
+columns:
+
+    fref          reference sequence name (string)
+    foffset       offset of this sequence
+    fdna          the DNA sequence (longblob)
+
+To overcome problems loading large blobs, DNA is automatically
+fragmented into multiple segments when loading, and the position of
+each segment is stored in foffset.  The fragment size is controlled by
+the -clump_size argument during initialization.
+
+=item fattribute_to_feature
+
+This table holds "attributes", which are tag/value pairs stuffed into
+the GFF line.  The first tag/value pair is treated as the group, and
+anything else is treated as an attribute (weird, huh?).
+
+ CHR_I assembly_tag Finished     2032 2036 . + . Note "Right: cTel33B"
+ CHR_I assembly_tag Polymorphism 668  668  . + . Note "A->C in cTel33B"
+
+The columns of this table are:
+
+    fid                 feature ID (integer)
+    fattribute_id       ID of the attribute (integer)
+    fattribute_value    text of the attribute (text)
+
+The fdata.fid column joins with fattribute_to_feature.fid.
+
+=item fattribute
+
+This table holds the normalized names of the attributes.  Fields are:
+
+  fattribute_id      ID of the attribute (integer)
+  fattribute_name    Name of the attribute (varchar)
+
+=back
+
+=head2 Data Loading Methods
+
+In addition to implementing the abstract SQL-generating methods of
+Bio::DB::GFF::Adaptor::dbi, this module also implements the data
+loading functionality of Bio::DB::GFF.
+
+=cut
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(@args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    : see below
+ Status  : Public
+
+The new constructor is identical to the "dbi" adaptor's new() method,
+except that the prefix "dbi:mysql" is added to the database DSN identifier
+automatically if it is not there already.
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+=cut
+
+#'
+
+sub new {
+  my $class = shift;
+  my ($dsn,$other) = rearrange([
+				[qw(FEATUREDB DB DSN)],
+			       ], at _);
+  $dsn = "dbi:mysql:$dsn" if !ref($dsn) && $dsn !~ /^(?:dbi|DBI):/;
+  my $self = $class->SUPER::new(-dsn=>$dsn,%$other);
+  $self;
+}
+
+=head2 get_dna
+
+ Title   : get_dna
+ Usage   : $string = $db->get_dna($name,$start,$stop,$class)
+ Function: get DNA string
+ Returns : a string
+ Args    : name, class, start and stop of desired segment
+ Status  : Public
+
+This method performs the low-level fetch of a DNA substring given its
+name, class and the desired range.  This should probably be moved to
+the parent class.
+
+=cut
+
+sub getseqcoords_query {
+   my $self = shift;
+   return GETSEQCOORDS ;
+}
+
+sub getaliascoords_query{
+  my $self = shift;
+  return GETALIASCOORDS ;
+}
+
+
+sub getforcedseqcoords_query{
+  my $self = shift;
+  return GETFORCEDSEQCOORDS ;
+}
+
+
+sub getaliaslike_query{
+  my $self = shift;
+  return GETALIASLIKE ;
+}
+
+
+# override parent
+sub get_abscoords_bkup {
+  my $self = shift;
+  my ($name,$class,$refseq)  = @_;
+
+  my $result = $self->SUPER::get_abscoords(@_);
+  return $result if $result;
+
+  my $sth;
+  if ($name =~ s/\*/%/g) {
+    $sth = $self->dbh->do_query(GETALIASLIKE,$name,$class);
+  } else {
+    $sth = $self->dbh->do_query(GETALIASCOORDS,$name,$class);
+  }
+  my @result;
+  while (my @row = $sth->fetchrow_array) { push @result,\@row }
+  $sth->finish;
+
+  if (@result == 0) {
+    $self->error("$name not found in database");
+    return;
+  } else {
+    return \@result;
+  }
+
+}
+
+
+
+sub make_features_select_part {
+  my $self = shift;
+  my $options = shift || {};
+  my $s;
+  if (my $b = $options->{bin_width}) {
+
+    $s = <<END;
+fref,
+  1+$b*floor(fstart/$b)   as fstart,
+  $b*(1+floor(fstart/$b)) as fstop,
+  IF(ISNULL(fsource),fmethod,concat(fmethod,':',fsource)),'bin',
+  count(*) as fscore,
+  '.','.','bin',
+  IF(ISNULL(fsource),concat(fref,':',fmethod),concat(fref,':',fmethod,':',fsource)),
+  NULL,NULL,NULL,NULL
+END
+;
+  } else {
+    $s = <<END;
+fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
+END
+;
+}
+  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
+  $s;
+}
+
+
+# IMPORTANT NOTE:
+# WHETHER OR NOT THIS WORKS IS CRITICALLY DEPENDENT ON THE RELATIVE MAGNITUDE OF THE
+sub make_features_from_part {
+  my $self = shift;
+  my $sparse_types  = shift;
+  my $options       = shift || {};
+  my $sparse_groups = $options->{sparse_groups};
+  my $index =  $sparse_groups ? ' USE INDEX(gid)'
+             : $sparse_types  ? ' USE INDEX(ftypeid)'
+             : '';
+  return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
+                                : "fdata${index},ftype,fgroup\n";
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string, using mysql full-text search
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is a mysql-specific method.  Given a search string, it performs a
+full-text search of the notes table and returns an array of results.
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my $query = FULLTEXTSEARCH;
+  $query .= " limit $limit" if defined $limit;
+  my $sth = $self->dbh->do_query($query,$search_string,$search_string);
+  my @results;
+  while (my ($class,$name,$note,$relevance) = $sth->fetchrow_array) {
+     next unless $class && $name;    # sorry, ignore NULL objects
+     $relevance = sprintf("%.2f",$relevance);  # trim long floats
+     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+     push @results,[$featname,$note,$relevance];
+  }
+
+  #added result filtering so that this method returns the expected results
+  #this section of code used to be in GBrowse's do_keyword_search method
+
+  my $match_sub = 'sub {';
+  foreach (split /\s+/,$search_string) {
+    $match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
+  }
+  $match_sub .= "};";
+  my $match = eval $match_sub;
+
+  my @matches = grep { $match->($_->[1]) } @results;
+
+  return @matches;
+}
+
+
+
+################################ loading and initialization ##################################
+
+=head2 schema
+
+ Title   : schema
+ Usage   : $schema = $db->schema
+ Function: return the CREATE script for the schema
+ Returns : a list of CREATE statemetns
+ Args    : none
+ Status  : protected
+
+This method returns a list containing the various CREATE statements
+needed to initialize the database tables.
+
+=cut
+
+sub schema {
+  my %schema = (
+		fdata =>{ 
+table=> q{
+ create table fdata (
+    fid	                int not null  auto_increment,
+    fref                varchar(100) not null,
+    fstart              int not null,
+    fstop               int not null,
+    fbin                double precision,
+    ftypeid             int not null,
+    fscore              float,
+    fstrand             enum('+','-'),
+    fphase              enum('0','1','2'),
+    gid                 int not null,
+    ftarget_start       int,
+    ftarget_stop        int,
+    primary key(fid),
+    unique index(fref,fbin,fstart,fstop,ftypeid,gid),
+    index(ftypeid),
+    index(gid)
+		   ) type=MyISAM
+}  # fdata table
+}, # fdata
+
+		fgroup =>{ 
+table=> q{
+create table fgroup (
+    gid	    int not null  auto_increment,
+    gclass  varchar(100),
+    gname   varchar(100),
+    primary key(gid),
+    unique(gclass,gname)
+) type=MyISAM
+}
+},
+
+          ftype => {
+table=> q{
+create table ftype (
+    ftypeid      int not null   auto_increment,
+    fmethod       varchar(100) not null,
+    fsource       varchar(100),
+    primary key(ftypeid),
+    index(fmethod),
+    index(fsource),
+    unique ftype (fmethod,fsource)
+)type=MyISAM
+}  #ftype table
+}, #ftype
+
+         fdna => {
+table=> q{
+create table fdna (
+		fref    varchar(100) not null,
+	        foffset int(10) unsigned not null,
+	        fdna    longblob,
+		primary key(fref,foffset)
+)type=MyISAM
+} # fdna table
+},#fdna
+
+        fmeta => {
+table=> q{
+create table fmeta (
+		fname   varchar(255) not null,
+	        fvalue  varchar(255) not null,
+		primary key(fname)
+)type=MyISAM
+} # fmeta table
+},#fmeta
+
+       fattribute => {
+table=> q{
+create table fattribute (
+	fattribute_id     int(10)         unsigned not null auto_increment,
+        fattribute_name   varchar(255)    not null,
+	primary key(fattribute_id)
+)type=MyISAM
+} #fattribute table
+},#fattribute
+
+       fattribute_to_feature => {
+table=> q{
+create table fattribute_to_feature (
+        fid              int(10) not null,
+        fattribute_id    int(10) not null,
+	fattribute_value text,
+        key(fid,fattribute_id),
+	key(fattribute_value(48)),
+        fulltext(fattribute_value)
+)type=MyISAM
+} # fattribute_to_feature table
+    }, # fattribute_to_feature
+);
+  return \%schema;
+}
+
+
+
+=head2 make_classes_query
+
+ Title   : make_classes_query
+ Usage   : ($query, at args) = $db->make_classes_query
+ Function: return query fragment for generating list of reference classes
+ Returns : a query and args
+ Args    : none
+ Status  : public
+
+=cut
+
+sub make_classes_query {
+  my $self = shift;
+  return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT ISNULL(gclass)';
+}
+
+
+=head2 make_meta_set_query
+
+ Title   : make_meta_set_query
+ Usage   : $sql = $db->make_meta_set_query
+ Function: return SQL fragment for setting a meta parameter
+ Returns : SQL fragment
+ Args    : none
+ Status  : public
+
+By default this does nothing; meta parameters are not stored or
+retrieved.
+
+=cut
+
+sub make_meta_set_query {
+   return 'REPLACE INTO fmeta VALUES (?,?)';
+}
+
+=head2 setup_load
+
+ Title   : setup_load
+ Usage   : $db->setup_load
+ Function: called before load_gff_line()
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method performs schema-specific initialization prior to loading a
+set of GFF records.  It prepares a set of DBI statement handlers to be 
+used in loading the data.
+
+=cut
+
+sub setup_load {
+  my $self      = shift;
+
+  my $dbh = $self->features_db;
+
+  if ($self->lock_on_load) {
+    my @tables = map { "$_ WRITE"} $self->tables;
+    my $tables = join ', ', at tables;
+    $dbh->do("LOCK TABLES $tables");
+  }
+#  for my $table (qw(fdata)) {
+#    $dbh->do("alter table $table disable keys");
+#  }
+
+  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
+  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
+
+  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
+  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
+
+  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
+  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
+  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
+
+  my $insert_data  = $dbh->prepare_delayed(<<END);
+INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
+		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
+       VALUES(?,?,?,?,?,?,?,?,?,?,?)
+END
+;
+
+
+  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
+  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
+  $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
+  $self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
+  $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;
+  $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
+  $self->{load_stuff}{types}  = {};
+  $self->{load_stuff}{groups} = {};
+  $self->{load_stuff}{counter} = 0;
+}
+
+=head2 load_gff_line
+
+ Title   : load_gff_line
+ Usage   : $db->load_gff_line($fields)
+ Function: called to load one parsed line of GFF
+ Returns : true if successfully inserted
+ Args    : hashref containing GFF fields
+ Status  : protected
+
+This method is called once per line of the GFF and passed a series of
+parsed data items that are stored into the hashref $fields.  The keys are:
+
+ ref          reference sequence
+ source       annotation source
+ method       annotation method
+ start        annotation start
+ stop         annotation stop
+ score        annotation score (may be undef)
+ strand       annotation strand (may be undef)
+ phase        annotation phase (may be undef)
+ group_class  class of annotation's group (may be undef)
+ group_name   ID of annotation's group (may be undef)
+ target_start start of target of a similarity hit
+ target_stop  stop of target of a similarity hit
+ attributes   array reference of attributes, each of which is a [tag=>value] array ref
+
+=cut
+
+sub load_gff_line {
+  my $self = shift;
+  my $gff = shift;
+
+  my $s    = $self->{load_stuff};
+  my $dbh  = $self->features_db;
+  local $dbh->{PrintError} = 0;
+
+  defined(my $typeid  = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
+  defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname}  => $gff->{gclass})) or return;
+
+  if ($gff->{stop}-$gff->{start}+1 > $self->max_bin) {
+    warn "$gff->{gclass}:$gff->{gname} is ",$gff->{stop}-$gff->{start}+1,
+      " bp long, but the maximum indexable feature is set to ",$self->max_bin," bp.\n";
+    warn "Please set the maxbin value to a length at least as large as the largest feature you wish to store.\n";
+    warn "\n* You will need to reinitialize the database from scratch.\n";
+    warn "* With the Perl API you do this using the -max_bin argument to \$db->initialize().\n";
+    warn "* With the command-line tools you do with this with --maxfeature option.\n";
+  }
+
+  my $bin =  bin($gff->{start},$gff->{stop},$self->min_bin);
+  my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
+					       $gff->{start},$gff->{stop},$bin,
+					       $typeid,
+					       $gff->{score},$gff->{strand},$gff->{phase},
+					       $groupid,
+					       $gff->{tstart},$gff->{tstop});
+
+  warn $dbh->errstr,"\n" && return unless $result;
+
+  my $fid = $dbh->{mysql_insertid}
+    || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
+
+
+  # insert attributes
+  foreach (@{$gff->{attributes}}) {
+    defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
+    $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
+  }
+
+  if ( (++$s->{counter} % 1000) == 0) {
+    print STDERR "$s->{counter} records loaded...";
+    print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+  }
+
+  $fid;
+}
+
+sub finish_load {
+  my $self = shift;
+  my $dbh = $self->features_db;
+  local $dbh->{PrintError} = 0;
+#  for my $table (qw(fdata)) {
+#    $dbh->do("alter table $table enable keys");
+#  }
+  $self->SUPER::finish_load;
+}
+
+
+sub insert_sequence {
+  my $self = shift;
+  my($id,$offset,$seq) = @_;
+  my $sth = $self->{_insert_sequence}
+    ||= $self->dbh->prepare_delayed('replace into fdna values (?,?,?)');
+  $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
+}
+
+
+=head2 get_table_id
+
+ Title   : get_table_id
+ Usage   : $integer = $db->get_table_id($table, at ids)
+ Function: get the ID of a group or type
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature type or group.  The arguments are the name
+of the table, and two string identifiers.  For feature types, the
+identifiers are the method and source.  For groups, the identifiers
+are group name and class.
+
+This method requires that a statement handler named I<lookup_$table>,
+have been created previously by setup_load().  It is here to overcome
+deficiencies in mysql's INSERT syntax.
+
+=cut
+
+#'
+# get the object ID from a named table
+sub get_table_id {
+  my $self   = shift;
+  my $table  = shift;
+  my @ids    = @_;
+
+  # irritating warning for null id
+  my $id_key;
+  {
+    local $^W=0;
+    $id_key = join ':', at ids;
+  }
+
+  my $s   = $self->{load_stuff};
+  my $sth = $s->{sth};
+  my $dbh = $self->features_db;
+
+  unless (defined($s->{$table}{$id_key})) {
+
+    #########################################
+    # retrieval of the last inserted id is now located at the adaptor and not in caching_handle
+    #######################################
+    if ( (my $result = $sth->{"lookup_$table"}->execute(@ids)) > 0) {
+      $s->{$table}{$id_key} = ($sth->{"lookup_$table"}->fetchrow_array)[0];
+    } else {
+      $sth->{"insert_$table"}->execute(@ids)
+	&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insert_$table"}));
+	#&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}{sth}{mysql_insertid});
+	#&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
+    }
+  }
+
+  my $id = $s->{$table}{$id_key};
+  unless (defined $id) {
+    warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
+    return;
+  }
+  $id;
+}
+
+sub insertid {
+  my $self = shift;
+  my $s = shift ;
+  $s->{mysql_insertid};
+}
+
+
+=head2 get_feature_id
+
+ Title   : get_feature_id
+ Usage   : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
+ Function: get the ID of a feature
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature.  It is ony needed when replacing a feature
+with new information.
+
+=cut
+
+# this method is called when needed to look up a feature's ID
+sub get_feature_id {
+  my $self = shift;
+  my ($ref,$start,$stop,$typeid,$groupid) = @_;
+  my $s = $self->{load_stuff};
+  unless ($s->{get_feature_id}) {
+    my $dbh = $self->features_db;
+    $s->{get_feature_id} =
+      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
+  }
+  my $sth = $s->{get_feature_id} or return;
+  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
+  my ($fid) = $sth->fetchrow_array;
+  return $fid;
+}
+
+1;
+
+
+__END__
+
+=head1 BUGS
+
+none ;-)
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,111 @@
+package Bio::DB::GFF::Adaptor::dbi::mysqlace;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::mysqlace -- Unholy union between mysql GFF database and acedb database
+
+=head1 SYNOPSIS
+
+Pending
+
+See L<Bio::DB::GFF> and L<Bio::DB::GFF::Adaptor::dbi::mysql>
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+
+use base qw(Bio::DB::GFF::Adaptor::dbi::mysql Bio::DB::GFF::Adaptor::ace);
+
+# Create a new Bio::DB::GFF::Adaptor::dbi object
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+  my ($dna_db,$acedb) = rearrange([[qw(DNADB DNA FASTA FASTA_DIR)],'ACEDB'], at _);
+  if ($dna_db) {
+    if (!ref($dna_db)) {
+      require Bio::DB::Fasta;
+      my $fasta_dir = $dna_db;
+      $dna_db = Bio::DB::Fasta->new($fasta_dir);
+      $dna_db or $class->throw("new(): Failed to create new Bio::DB::Fasta from files in $fasta_dir");
+    } else {
+      $dna_db->isa('Bio::DB::Fasta') or $class->throw("new(): $dna_db is not a Bio::DB::Fasta object");
+    }
+    $self->dna_db($dna_db);
+  }
+
+  if ($acedb) {
+    $acedb->isa('Ace') or $class->throw("$acedb is not an acedb accessor object");
+    $self->acedb($acedb);
+  }
+  $self;
+}
+
+=head2 freshen_ace
+
+ Title   : freshen
+ Usage   : $flag = Bio::DB::GFF->freshen_ace;
+ Function: Refresh internal acedb handle
+ Returns : flag if correctly freshened
+ Args    : none
+ Status  : Public
+
+ACeDB has an annoying way of timing out, leaving dangling database
+handles.  This method will invoke the ACeDB reopen() method, which
+causes dangling handles to be refreshed.  It has no effect if you are
+not using ACeDB to create ACeDB objects.
+
+=cut
+
+#########################
+# Moved from mysqlopt.pm
+#########################
+sub make_object {
+  my $self = shift;
+  my ($class,$name,$start,$stop) = @_;
+
+  if (my $db = $self->acedb) {
+
+    # for Notes we just return a text, no database associated
+    return $class->new(Text=>$name) if $class eq 'Note';
+
+    # for homols, we create the indicated Protein or Sequence object
+    # then generate a bogus Homology object (for future compatability??)
+    if ($start ne '') {
+      require Ace::Sequence::Homol;
+      return Ace::Sequence::Homol->new_homol($class,$name,$db,$start,$stop);
+    }
+
+    # General case:
+    my $obj = $db->class->new($class=>$name,$self->acedb);
+
+    return $obj if defined $obj;
+
+    # Last resort, return a Text
+    return $class->new(Text=>$name);
+  }
+
+  return $self->SUPER::make_object($class,$name,$start,$stop);
+}
+
+sub get_dna {
+  my $self = shift;
+  my ($ref,$start,$stop,$class) = @_;
+  my $dna_db = $self->dna_db or return $self->SUPER::get_dna(@_);
+  return $dna_db->seq($ref,$start,$stop,$class);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1175 @@
+package Bio::DB::GFF::Adaptor::dbi::mysqlcmap;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::mysqlcmap -- Database adaptor for an integraded
+CMap/GBrowse mysql schema
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>
+
+=cut
+
+# a simple mysql adaptor
+use strict;
+use Data::Dumper;
+use Bio::DB::GFF::Adaptor::dbi;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::GFF::Util::Binning;
+use base qw(Bio::DB::GFF::Adaptor::dbi::mysql);
+require Bio::DB::GFF::Adaptor::dbi::mysql;
+
+use constant GETSEQCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       feature_name as gname
+  FROM fdata,cmap_feature
+  WHERE cmap_feature.feature_name=?
+    AND cmap_feature.gclass=?
+    AND cmap_feature.feature_id=fdata.feature_id
+    GROUP BY fref,fstrand,feature_name
+END
+;
+
+use constant GETALIASCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       feature_name as gname
+  FROM fdata,cmap_feature,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value=?
+    AND cmap_feature.gclass=?
+    AND cmap_feature.feature_id=fdata.feature_id
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,feature_name
+END
+;
+
+use constant GETALIASLIKE =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       feature_name as gname
+  FROM fdata,cmap_feature,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value LIKE ?
+    AND cmap_feature.gclass=?
+    AND cmap_feature.feature_id=fdata.feature_id
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,feature_name
+END
+;
+
+use constant GETFORCEDSEQCOORDS =><<END;
+SELECT fref,
+       IF(ISNULL(gclass),'Sequence',gclass),
+       min(fstart),
+       max(fstop),
+       fstrand
+  FROM fdata,cmap_feature
+  WHERE cmap_feature.feature_name=?
+    AND cmap_feature.gclass=?
+    AND fdata.fref=?
+    AND cmap_feature.feature_id=fdata.feature_id
+    GROUP BY fref,fstrand
+END
+;
+
+use constant FULLTEXTSEARCH => <<END;
+SELECT distinct gclass,feature_name,fattribute_value,MATCH(fattribute_value) AGAINST (?) as score
+  FROM cmap_feature,fattribute_to_feature,fdata
+  WHERE cmap_feature.feature_id=fdata.feature_id
+    AND fdata.fid=fattribute_to_feature.fid
+    AND MATCH(fattribute_value) AGAINST (?)
+END
+;
+
+=head1 DESCRIPTION
+
+This adaptor implements a specific mysql database schema that is
+compatible with Bio::DB::GFF.  It inherits from
+Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
+
+The schema uses several tables:
+
+=over 4
+
+=item fdata
+
+This is the feature data table.  Its columns are:
+-
+    fid	           feature ID (integer)
+    fref           reference sequence name (string)
+    fstart         start position relative to reference (integer)
+    fstop          stop postion relative to reference (integer)
+    ftypeid        feature type ID (integer)
+    fscore         feature score (float); may be null
+    fstrand        strand; one of "+" or "-"; may be null
+    fphase         phase; one of 0, 1 or 2; may be null
+    feature_id     group ID used to be 'gid' (integer)
+    ftarget_start  for similarity features, the target start position (integer)
+    ftarget_stop   for similarity features, the target stop position (integer)
+
+Note that it would be desirable to normalize the reference sequence
+name, since there are usually many features that share the same
+reference feature.  However, in the current schema, query performance
+suffers dramatically when this additional join is added.
+
+=item cmap_feature (replaces fgroup)
+
+This is the group table. There is one row for each group.  This is the 
+shared table between CMap and GBrowse.  There are many CMap related 
+columns but only a few that GBrowse uses.  
+
+GBrowse Columns:
+
+    feature_id     the group ID (integer)
+    gclass         the class of the group (string)
+    feature_name   the name of the group (string)
+
+The group table serves multiple purposes.  As you might expect, it is
+used to cluster features that logically belong together, such as the
+multiple exons of the same transcript.  It is also used to assign a
+name and class to a singleton feature.  Finally, the group table is
+used to identify the target of a similarity hit.  This is consistent
+with the way in which the group field is used in the GFF version 2
+format.
+
+The cmap_feature.feature_id field joins with the fdata.feature_id field. 
+
+Examples:
+
+  mysql> select * from cmap_feature where feature_name='sjj_2L52.1';
+  +--------------+-------------+--------------+
+  | feature_id   | gclass      | feature_name |
+  +--------------+-------------+--------------+
+  | 69736        | PCR_product | sjj_2L52.1   |
+  +--------------+-------------+--------------+
+  1 row in set (0.70 sec)
+
+  mysql> select fref,fstart,fstop from fdata,cmap_feature 
+            where gclass='PCR_product' and feature_name = 'sjj_2L52.1' 
+                  and fdata.feature_id=cmap_feature.feature_id;
+  +---------------+--------+-------+
+  | fref          | fstart | fstop |
+  +---------------+--------+-------+
+  | CHROMOSOME_II |   1586 |  2355 |
+  +---------------+--------+-------+
+  1 row in set (0.03 sec)
+
+=item ftype
+
+This table contains the feature types, one per row.  Columns are:
+
+    ftypeid      the feature type ID (integer)
+    fmethod      the feature type method name (string)
+    fsource      the feature type source name (string)
+
+The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:
+
+  mysql> select fref,fstart,fstop,fmethod,fsource from fdata,cmap_feature,ftype 
+         where gclass='PCR_product' 
+               and feature_name = 'sjj_2L52.1'
+               and fdata.feature_id=cmap_feature.feature_id
+               and fdata.ftypeid=ftype.ftypeid;
+  +---------------+--------+-------+-------------+-----------+
+  | fref          | fstart | fstop | fmethod     | fsource   |
+  +---------------+--------+-------+-------------+-----------+
+  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
+  +---------------+--------+-------+-------------+-----------+
+  1 row in set (0.08 sec)
+
+=item fdna
+
+This table holds the raw DNA of the reference sequences.  It has three
+columns:
+
+    fref          reference sequence name (string)
+    foffset       offset of this sequence
+    fdna          the DNA sequence (longblob)
+
+To overcome problems loading large blobs, DNA is automatically
+fragmented into multiple segments when loading, and the position of
+each segment is stored in foffset.  The fragment size is controlled by
+the -clump_size argument during initialization.
+
+=item fattribute_to_feature
+
+This table holds "attributes", which are tag/value pairs stuffed into
+the GFF line.  The first tag/value pair is treated as the group, and
+anything else is treated as an attribute (weird, huh?).
+
+ CHR_I assembly_tag Finished     2032 2036 . + . Note "Right: cTel33B"
+ CHR_I assembly_tag Polymorphism 668  668  . + . Note "A->C in cTel33B"
+
+The columns of this table are:
+
+    fid                 feature ID (integer)
+    fattribute_id       ID of the attribute (integer)
+    fattribute_value    text of the attribute (text)
+
+The fdata.fid column joins with fattribute_to_feature.fid.
+
+=item fattribute
+
+This table holds the normalized names of the attributes.  Fields are:
+
+  fattribute_id      ID of the attribute (integer)
+  fattribute_name    Name of the attribute (varchar)
+
+=back
+
+=head2 Data Loading Methods
+
+In addition to implementing the abstract SQL-generating methods of
+Bio::DB::GFF::Adaptor::dbi, this module also implements the data
+loading functionality of Bio::DB::GFF.
+
+=cut
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(@args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    : see below
+ Status  : Public
+
+The new constructor is identical to the "dbi" adaptor's new() method,
+except that the prefix "dbi:mysql" is added to the database DSN identifier
+automatically if it is not there already.
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+=cut
+
+#'
+
+#Defined in mysql.pm
+
+=head2 get_dna
+
+ Title   : get_dna
+ Usage   : $string = $db->get_dna($name,$start,$stop,$class)
+ Function: get DNA string
+ Returns : a string
+ Args    : name, class, start and stop of desired segment
+ Status  : Public
+
+This method performs the low-level fetch of a DNA substring given its
+name, class and the desired range.  This should probably be moved to
+the parent class.
+
+=cut
+
+sub make_features_select_part {
+  my $self = shift;
+  my $options = shift || {};
+  my $s;
+  if (my $b = $options->{bin_width}) {
+
+    $s = <<END;
+fref,
+  1+$b*floor(fstart/$b)   as fstart,
+  $b*(1+floor(fstart/$b)) as fstop,
+  IF(ISNULL(fsource),fmethod,concat(fmethod,':',fsource)),'bin',
+  count(*) as fscore,
+  '.','.','bin',
+  IF(ISNULL(fsource),concat(fref,':',fmethod),concat(fref,':',fmethod,':',fsource)),
+  NULL,NULL,NULL,NULL
+END
+;
+  } else {
+    $s = <<END;
+fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,feature_name as gname,ftarget_start,ftarget_stop,fdata.fid,fdata.feature_id
+END
+;
+}
+  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
+  $s;
+}
+
+
+# IMPORTANT NOTE:
+# WHETHER OR NOT THIS WORKS IS CRITICALLY DEPENDENT ON THE RELATIVE MAGNITUDE OF THE
+sub make_features_from_part {
+  my $self = shift;
+  my $sparse_types  = shift;
+  my $options       = shift || {};
+  my $sparse_groups = $options->{sparse_groups};
+  my $index =  $sparse_groups ? ' USE INDEX(feature_id)'
+             : $sparse_types  ? ' USE INDEX(ftypeid)'
+             : '';
+  return $options->{attributes} ? "fdata${index},ftype,cmap_feature,fattribute,fattribute_to_feature\n"
+                                : "fdata${index},ftype,cmap_feature\n";
+}
+
+################################ loading and initialization ##################################
+
+=head2 schema
+
+ Title   : schema
+ Usage   : $schema = $db->schema
+ Function: return the CREATE script for the schema
+ Returns : a list of CREATE statemetns
+ Args    : none
+ Status  : protected
+
+This method returns a list containing the various CREATE statements
+needed to initialize the database tables.
+
+=cut
+
+sub schema {
+  my %schema = (
+		fdata =>{ 
+table=> q{
+#create table fdata (
+#    fid	         int not null  auto_increment,
+#    fref         varchar(100)    not null,
+#    fstart       int unsigned   not null,
+#    fstop        int unsigned   not null,
+#    ftypeid      int not null,
+#    fscore        float,
+#    fstrand       enum('+','-'),
+#    fphase        enum('0','1','2'),
+#    feature_id          int not null,
+#    ftarget_start int unsigned,
+#    ftarget_stop  int unsigned,
+#    primary key(fid),
+#    unique index(fref,fstart,fstop,ftypeid,feature_id),
+#    index(ftypeid),
+#    index(feature_id)
+#) type=MyISAM
+
+
+ create table fdata (
+    fid	                int not null  auto_increment,
+    fref                varchar(100) not null,
+    fstart              int unsigned   not null,
+    fstop               int unsigned   not null,
+    fbin                double(20,6)  not null,
+    ftypeid             int not null,
+    fscore              float,
+    fstrand             enum('+','-'),
+    fphase              enum('0','1','2'),
+    feature_id                 int not null,
+    ftarget_start       int unsigned,
+    ftarget_stop        int unsigned,
+    primary key(fid),
+    unique index(fref,fbin,fstart,fstop,ftypeid,feature_id),
+    index(ftypeid),
+    index(feature_id)
+		   ) type=MyISAM
+}  # fdata table
+}, # fdata
+
+          ftype => {
+table=> q{
+create table ftype (
+    ftypeid      int not null   auto_increment,
+    fmethod       varchar(100) not null,
+    fsource       varchar(100),
+    primary key(ftypeid),
+    index(fmethod),
+    index(fsource),
+    unique ftype (fmethod,fsource)
+)type=MyISAM
+}  #ftype table
+}, #ftype
+
+         fdna => {
+table=> q{
+create table fdna (
+		fref    varchar(100) not null,
+	        foffset int(10) unsigned not null,
+	        fdna    longblob,
+		primary key(fref,foffset)
+)type=MyISAM
+} # fdna table
+},#fdna
+
+        fmeta => {
+table=> q{
+create table fmeta (
+		fname   varchar(255) not null,
+	        fvalue  varchar(255) not null,
+		primary key(fname)
+)type=MyISAM
+} # fmeta table
+},#fmeta
+
+       fattribute => {
+table=> q{
+create table fattribute (
+	fattribute_id     int(10)         unsigned not null auto_increment,
+        fattribute_name   varchar(255)    not null,
+	primary key(fattribute_id)
+)type=MyISAM
+} #fattribute table
+},#fattribute
+
+       fattribute_to_feature => {
+table=> q{
+create table fattribute_to_feature (
+        fid              int(10) not null,
+        fattribute_id    int(10) not null,
+	fattribute_value text,
+        key(fid,fattribute_id),
+	key(fattribute_value(48)),
+        fulltext(fattribute_value)
+)type=MyISAM
+} # fattribute_to_feature table
+    }, # fattribute_to_feature
+
+
+cmap_attribute => {
+table=>q{
+create table cmap_attribute (
+  attribute_id int(11) NOT NULL default '0',
+  table_name varchar(30) NOT NULL default '',
+  object_id int(11) NOT NULL default '0',
+  display_order int(11) NOT NULL default '1',
+  is_public tinyint(4) NOT NULL default '1',
+  attribute_name varchar(200) NOT NULL default '',
+  attribute_value text NOT NULL,
+  PRIMARY KEY  (attribute_id),
+  KEY table_name (table_name,object_id,display_order,attribute_name)
+) TYPE=MyISAM;
+} # table
+},
+
+cmap_correspondence_evidence => {
+table=>q{
+create table cmap_correspondence_evidence (
+  correspondence_evidence_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  feature_correspondence_id int(11) NOT NULL default '0',
+  evidence_type_accession varchar(20) NOT NULL default '0',
+  score double(8,2) default NULL,
+  rank int(11) NOT NULL default '0',
+  PRIMARY KEY  (correspondence_evidence_id),
+  UNIQUE KEY accession_id (accession_id),
+  KEY feature_correspondence_id (feature_correspondence_id)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_correspondence_lookup => {
+table=>q{
+create table cmap_correspondence_lookup (
+  feature_id1 int(11) default NULL,
+  feature_id2 int(11) default NULL,
+  feature_correspondence_id int(11) default NULL,
+  start_position1 double(11,2) default NULL,
+  start_position2 double(11,2) default NULL,
+  stop_position1 double(11,2) default NULL,
+  stop_position2 double(11,2) default NULL,
+  map_id1 int(11) default NULL,
+  map_id2 int(11) default NULL,
+  feature_type_accession1 varchar(20) default NULL,
+  feature_type_accession2 varchar(20) default NULL,
+  KEY feature_id1 (feature_id1),
+  KEY corr_id (feature_correspondence_id),
+  KEY cl_map_id1 (map_id1),
+  KEY cl_map_id2 (map_id2),
+  KEY cl_map_id1_map_id2 (map_id1,map_id2),
+  KEY cl_map_id2_map_id1 (map_id2,map_id1)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_correspondence_matrix => {
+table=>q{
+create table cmap_correspondence_matrix (
+  reference_map_aid varchar(20) NOT NULL default '0',
+  reference_map_name varchar(32) NOT NULL default '',
+  reference_map_set_aid varchar(20) NOT NULL default '0',
+  reference_species_aid varchar(20) NOT NULL default '0',
+  link_map_aid varchar(20) default NULL,
+  link_map_name varchar(32) default NULL,
+  link_map_set_aid varchar(20) NOT NULL default '0',
+  link_species_aid varchar(20) NOT NULL default '0',
+  no_correspondences int(11) NOT NULL default '0'
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_feature => {
+table=>q{
+create table cmap_feature (
+  feature_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  map_id int(11) default NULL,
+  feature_type_accession varchar(20) NOT NULL default '0',
+  feature_name varchar(32) NOT NULL default '',
+  is_landmark tinyint(4) NOT NULL default '0',
+  start_position double(11,2) NOT NULL default '0.00',
+  stop_position double(11,2) default NULL,
+  default_rank int(11) NOT NULL default '1',
+  direction tinyint(4) NOT NULL default '1',
+  gclass varchar(100) default NULL,
+  PRIMARY KEY  (feature_id),
+  UNIQUE KEY gclass (gclass,feature_name),
+  UNIQUE KEY accession_id (accession_id),
+  KEY feature_name (feature_name),
+  KEY feature_id_map_id (feature_id,map_id),
+  KEY feature_id_map_id_start (feature_id,map_id,start_position),
+  KEY map_id (map_id),
+  KEY map_id_feature_id (map_id,feature_id)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_feature_alias => {
+table=>q{
+create table cmap_feature_alias (
+  feature_alias_id int(11) NOT NULL default '0',
+  feature_id int(11) NOT NULL default '0',
+  alias varchar(255) default NULL,
+  PRIMARY KEY  (feature_alias_id),
+  UNIQUE KEY feature_id_2 (feature_id,alias),
+  KEY feature_id (feature_id),
+  KEY alias (alias)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_feature_correspondence => {
+table=>q{
+create table cmap_feature_correspondence (
+  feature_correspondence_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  feature_id1 int(11) NOT NULL default '0',
+  feature_id2 int(11) NOT NULL default '0',
+  is_enabled tinyint(4) NOT NULL default '1',
+  PRIMARY KEY  (feature_correspondence_id),
+  UNIQUE KEY accession_id (accession_id),
+  KEY feature_id1 (feature_id1),
+  KEY cmap_feature_corresp_idx (is_enabled,feature_correspondence_id)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_map => {
+table=>q{
+create table cmap_map (
+  map_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  map_set_id int(11) NOT NULL default '0',
+  map_name varchar(32) NOT NULL default '',
+  display_order int(11) NOT NULL default '1',
+  start_position double(11,2) default NULL,
+  stop_position double(11,2) default NULL,
+  PRIMARY KEY  (map_id),
+  UNIQUE KEY accession_id (accession_id),
+  UNIQUE KEY map_id (map_id,map_set_id,map_name,accession_id),
+  KEY map_set_id_index (map_set_id)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_map_set => {
+table=>q{
+create table cmap_map_set (
+  map_set_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  map_set_name varchar(64) NOT NULL default '',
+  short_name varchar(30) NOT NULL default '',
+  map_type_accession varchar(20) NOT NULL default '0',
+  species_id int(11) NOT NULL default '0',
+  published_on date default NULL,
+  can_be_reference_map tinyint(4) NOT NULL default '1',
+  display_order int(11) NOT NULL default '1',
+  is_enabled tinyint(4) NOT NULL default '1',
+  shape varchar(12) default NULL,
+  color varchar(20) default NULL,
+  width int(11) default NULL,
+  map_units varchar(12) NOT NULL default '',
+  is_relational_map tinyint(11) NOT NULL default '0',
+  PRIMARY KEY  (map_set_id),
+  UNIQUE KEY accession_id (accession_id),
+  UNIQUE KEY map_set_id (map_set_id,species_id,short_name,accession_id),
+  KEY cmap_map_set_idx (can_be_reference_map,is_enabled,species_id,display_order,published_on,short_name)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_next_number => {
+table=>q{
+create table cmap_next_number (
+  table_name varchar(40) NOT NULL default '',
+  next_number int(11) NOT NULL default '0',
+  PRIMARY KEY  (table_name)
+) TYPE=MyISAM;
+}, # table
+insert=>{next_num=>q[ insert into cmap_next_number (table_name,next_number) VALUES ('cmap_feature',82);]}
+},
+
+
+cmap_species => {
+table=>q{
+create table cmap_species (
+  species_id int(11) NOT NULL default '0',
+  accession_id varchar(20) NOT NULL default '',
+  common_name varchar(64) NOT NULL default '',
+  full_name varchar(64) NOT NULL default '',
+  display_order int(11) NOT NULL default '1',
+  PRIMARY KEY  (species_id),
+  KEY acc_id_species_id (accession_id,species_id)
+) TYPE=MyISAM;
+} # table
+},
+
+
+cmap_xref => {
+table=>q{
+create table cmap_xref (
+  xref_id int(11) NOT NULL default '0',
+  table_name varchar(30) NOT NULL default '',
+  object_id int(11) default NULL,
+  display_order int(11) NOT NULL default '1',
+  xref_name varchar(200) NOT NULL default '',
+  xref_url text NOT NULL,
+  PRIMARY KEY  (xref_id),
+  KEY table_name (table_name,object_id,display_order)
+) TYPE=MyISAM;
+} # table
+},
+
+
+);
+  return \%schema;
+}
+
+
+
+=head2 make_classes_query
+
+ Title   : make_classes_query
+ Usage   : ($query, at args) = $db->make_classes_query
+ Function: return query fragment for generating list of reference classes
+ Returns : a query and args
+ Args    : none
+ Status  : public
+
+=cut
+
+sub make_classes_query {
+  my $self = shift;
+  return 'SELECT DISTINCT gclass FROM cmap_feature WHERE NOT ISNULL(gclass)';
+}
+
+
+=head2 setup_load
+
+ Title   : setup_load
+ Usage   : $db->setup_load
+ Function: called before load_gff_line()
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method performs schema-specific initialization prior to loading a
+set of GFF records.  It prepares a set of DBI statement handlers to be 
+used in loading the data.
+
+=cut
+
+sub setup_load {
+  my $self      = shift;
+
+  my $dbh = $self->features_db;
+
+  if ($self->lock_on_load) {
+    my @tables = map { "$_ WRITE"} $self->tables;
+    my $tables = join ', ', at tables;
+    $dbh->do("LOCK TABLES $tables");
+  }
+
+#xx1
+  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
+  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
+
+  my $lookup_group = $dbh->prepare_delayed('SELECT feature_id FROM cmap_feature WHERE feature_name=? AND gclass=?');
+  my $insert_group = $dbh->prepare_delayed(' INSERT into cmap_feature (feature_id, accession_id,feature_name, gclass ) VALUES (?,feature_id,?,?)');
+  my $aux_insert_group = $dbh->prepare_delayed(' update cmap_next_number set next_number = next_number +1 where table_name=\'cmap_feature\'');
+  my $next_id_group = $dbh->prepare_delayed('select next_number from cmap_next_number where table_name=\'cmap_feature\'');
+
+  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
+  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
+  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
+
+  my $insert_data  = $dbh->prepare_delayed(<<END);
+INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
+		   fstrand,fphase,feature_id,ftarget_start,ftarget_stop)
+       VALUES(?,?,?,?,?,?,?,?,?,?,?)
+END
+;
+
+
+  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
+  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
+  #$self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
+  #$self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
+  $self->{load_stuff}{sth}{lookup_cmap_feature}     = $lookup_group;
+  $self->{load_stuff}{sth}{insert_cmap_feature}     = $insert_group;
+  $self->{load_stuff}{sth}{aux_insert_cmap_feature} = $aux_insert_group;
+  $self->{load_stuff}{sth}{next_id_cmap_feature}   = $next_id_group;
+  $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;
+  $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
+  $self->{load_stuff}{types}  = {};
+  $self->{load_stuff}{groups} = {};
+  $self->{load_stuff}{counter} = 0;
+}
+
+=head2 load_gff_line
+
+ Title   : load_gff_line
+ Usage   : $db->load_gff_line($fields)
+ Function: called to load one parsed line of GFF
+ Returns : true if successfully inserted
+ Args    : hashref containing GFF fields
+ Status  : protected
+
+This method is called once per line of the GFF and passed a series of
+parsed data items that are stored into the hashref $fields.  The keys are:
+
+ ref          reference sequence
+ source       annotation source
+ method       annotation method
+ start        annotation start
+ stop         annotation stop
+ score        annotation score (may be undef)
+ strand       annotation strand (may be undef)
+ phase        annotation phase (may be undef)
+ group_class  class of annotation's group (may be undef)
+ group_name   ID of annotation's group (may be undef)
+ target_start start of target of a similarity hit
+ target_stop  stop of target of a similarity hit
+ attributes   array reference of attributes, each of which is a [tag=>value] array ref
+
+=cut
+
+sub load_gff_line {
+  my $self = shift;
+  my $gff = shift;
+
+  my $s    = $self->{load_stuff};
+  my $dbh  = $self->features_db;
+  local $dbh->{PrintError} = 0;
+
+  defined(my $typeid  = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
+  defined(my $groupid = $self->get_table_id('cmap_feature',$gff->{gname}  => $gff->{gclass})) or return;
+
+  if ($gff->{stop}-$gff->{start}+1 > $self->max_bin) {
+    warn "$gff->{gclass}:$gff->{gname} is longer than ",$self->maxbin,".\n";
+    warn "Please set the maxbin value to a larger length than the largest feature you wish to store.\n";
+    warn "With the command-line tools you do with this with --maxfeature option.\n";
+  }
+
+  my $bin =  bin($gff->{start},$gff->{stop},$self->min_bin);
+  my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
+					       $gff->{start},$gff->{stop},$bin,
+					       $typeid,
+					       $gff->{score},$gff->{strand},$gff->{phase},
+					       $groupid,
+					       $gff->{tstart},$gff->{tstop});
+
+  warn $dbh->errstr,"\n" && return unless $result;
+
+  my $fid = $dbh->{mysql_insertid}
+    || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
+
+
+  # insert attributes
+  foreach (@{$gff->{attributes}}) {
+    defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
+    $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
+  }
+
+  if ( (++$s->{counter} % 1000) == 0) {
+    print STDERR "$s->{counter} records loaded...";
+    print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+  }
+
+  $fid;
+}
+
+=head2 get_feature_id
+
+ Title   : get_feature_id
+ Usage   : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
+ Function: get the ID of a feature
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature.  It is ony needed when replacing a feature
+with new information.
+
+=cut
+
+# this method is called when needed to look up a feature's ID
+sub get_feature_id {
+  my $self = shift;
+  my ($ref,$start,$stop,$typeid,$groupid) = @_;
+  my $s = $self->{load_stuff};
+  unless ($s->{get_feature_id}) {
+    my $dbh = $self->features_db;
+    $s->{get_feature_id} =
+      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND feature_id=?');
+  }
+  my $sth = $s->{get_feature_id} or return;
+  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
+  my ($fid) = $sth->fetchrow_array;
+  return $fid;
+}
+
+=head2 get_table_id
+
+ Title   : get_table_id
+ Usage   : $integer = $db->get_table_id($table, at ids)
+ Function: get the ID of a group or type
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature type or group.  The arguments are the name
+of the table, and two string identifiers.  For feature types, the
+identifiers are the method and source.  For groups, the identifiers
+are group name and class.
+
+This method requires that a statement handler named I<lookup_$table>,
+have been created previously by setup_load().  It is here to overcome
+deficiencies in mysql's INSERT syntax.
+
+=cut
+
+#'
+# get the object ID from a named table
+sub get_table_id {
+  my $self   = shift;
+  my $table  = shift;
+  my @ids    = @_;
+
+  # irritating warning for null id
+  my $id_key;
+  {
+    local $^W=0;
+    $id_key = join ':', at ids;
+  }
+
+  my $s   = $self->{load_stuff};
+  my $sth = $s->{sth};
+  my $dbh = $self->features_db;
+
+  unless (defined($s->{$table}{$id_key})) {
+
+    #########################################
+    # retrieval of the last inserted id is now located at the adaptor and not in caching_handle
+    #######################################
+    if ( (my $result = $sth->{"lookup_$table"}->execute(@ids)) > 0) {
+      $s->{$table}{$id_key} = ($sth->{"lookup_$table"}->fetchrow_array)[0];
+    } else {
+      if (defined($sth->{"next_id_$table"})){
+
+        $sth->{"insert_$table"}->execute(3,'string1','string2');
+        # Can't use auto incrementing
+        $sth->{"next_id_$table"}->execute();
+        $s->{$table}{$id_key} = ($sth->{"next_id_$table"}->fetchrow_array)[0];
+        if ($s->{$table}{$id_key}){
+            $sth->{"insert_$table"}->execute($s->{$table}{$id_key}, at ids);
+            $sth->{"aux_insert_$table"}->execute() if $sth->{"aux_insert_$table"};
+        }
+      }
+      else{
+          $sth->{"insert_$table"}->execute(@ids);
+          $s->{$table}{$id_key} = $self->insertid($sth->{"insert_$table"}) unless $s->{$table}{$id_key};
+          $sth->{"aux_insert_$table"}->execute() if $sth->{"aux_insert_$table"};
+      }
+    }
+  }
+
+  my $id = $s->{$table}{$id_key};
+  unless (defined $id) {
+    warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
+    return;
+  }
+  $id;
+}
+
+
+
+#-----------------------------------
+=head2 make_features_by_name_where_part
+                                                                                                                             
+ Title   : make_features_by_name_where_part
+ Usage   : $db->make_features_by_name_where_part
+ Function: create the SQL fragment needed to select a feature by its group name & class
+ Returns : a SQL fragment and bind arguments
+ Args    : see below
+ Status  : Protected
+
+=cut
+
+sub make_features_by_name_where_part {
+  my $self = shift;
+  my ($class,$name) = @_;
+  if ($name =~ /\*/) {
+    $name =~ tr/*/%/;
+    return ("cmap_feature.gclass=? AND cmap_feature.feature_name LIKE ?",$class,$name);
+  } else {
+    return ("cmap_feature.gclass=? AND cmap_feature.feature_name=?",$class,$name);
+  }
+}
+
+=head2 make_features_join_part
+
+ Title   : make_features_join_part
+ Usage   : $string = $db->make_features_join_part()
+ Function: make join part of the features query
+ Returns : a string
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the WHERE keyword.
+
+=cut
+
+sub make_features_join_part {
+  my $self = shift;
+  my $options = shift || {};
+  return !$options->{attributes} ? <<END1 : <<END2;
+  cmap_feature.feature_id = fdata.feature_id
+  AND ftype.ftypeid = fdata.ftypeid
+END1
+  cmap_feature.feature_id = fdata.feature_id
+  AND ftype.ftypeid = fdata.ftypeid
+  AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
+  AND fdata.fid=fattribute_to_feature.fid
+END2
+}
+
+sub getseqcoords_query {
+   my $self = shift;
+   return GETSEQCOORDS ;
+}
+
+sub getaliascoords_query{
+  my $self = shift;
+  return GETALIASCOORDS ;
+}
+
+sub getforcedseqcoords_query{
+  my $self = shift;
+  return GETFORCEDSEQCOORDS ;
+}
+
+sub getaliaslike_query{
+  my $self = shift;
+  return GETALIASLIKE ;
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string, using mysql full-text search
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is a mysql-specific method.  Given a search string, it performs a
+full-text search of the notes table and returns an array of results.
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @words  = $search_string =~ /(\w+)/g;
+  my $regex  = join '|', at words;
+  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
+  my $search   = join(' OR ', at searches);
+
+  my $query = <<END;
+SELECT distinct gclass,feature_name as gname,fattribute_value
+  FROM cmap_feature,fattribute_to_feature,fdata
+  WHERE cmap_feature.feature_id=fdata.feature_id
+     AND fdata.fid=fattribute_to_feature.fid
+END
+;
+  $query .= " AND ($search) " if ($search);
+
+  my $sth = $self->dbh->do_query($query);
+  my @results;
+  while (my ($class,$name,$note) = $sth->fetchrow_array) {
+     next unless $class && $name;    # sorry, ignore NULL objects
+     my @matches = $note =~ /($regex)/g;
+     my $relevance = 10*@matches;
+     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+     push @results,[$featname,$note,$relevance];
+     last if $limit && @results >= $limit;
+  }
+  @results;
+}
+
+# sub search_notes {
+#   my $self = shift;
+#   my ($search_string,$limit) = @_;
+#   my $query = FULLTEXTSEARCH;
+#   $query .= " limit $limit" if defined $limit;
+#   my $sth = $self->dbh->do_query($query,$search_string,$search_string);
+#   my @results;
+#   while (my ($class,$name,$note,$relevance) = $sth->fetchrow_array) {
+#      next unless $class && $name;    # sorry, ignore NULL objects
+#      $relevance = sprintf("%.2f",$relevance);  # trim long floats
+#      my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+#      push @results,[$featname,$note,$relevance];
+#   }
+#   @results;
+# }
+
+=head2 make_features_order_by_part
+
+ Title   : make_features_order_by_part
+ Usage   : ($query, at args) = $db->make_features_order_by_part()
+ Function: make the ORDER BY part of the features() query
+ Returns : a SQL fragment and bind arguments, if any
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the ORDER BY part of the query issued by features() and
+related methods.
+
+=cut
+
+sub make_features_order_by_part {
+  my $self = shift;
+  my $options = shift || {};
+  return "cmap_feature.feature_name";
+}
+
+=head2 create_cmap_viewer_link
+
+ Title   : create_cmap_viewer_link
+ Usage   : $link_str = $db->create_cmap_viewer_link(data_source=>$ds,group_id=>$gid)
+ Function: 
+ Returns : 
+ Args    : 
+ Status  : 
+
+
+=cut
+
+sub create_cmap_viewer_link {
+  my $self = shift;
+  my %args = @_;
+  my $data_source = $args{'data_source'};
+  my $gid         = $args{'group_id'};
+  my $link_str    = undef;
+
+  my $db = $self->features_db;
+  my $sql_str = qq[
+    select f.feature_name, 
+        f.feature_type_accession feature_type_aid,
+        m.accession_id as map_aid,
+        ms.accession_id as map_set_aid 
+    from cmap_feature f, 
+        cmap_map m, 
+        cmap_map_set ms 
+    where f.map_id=m.map_id 
+        and ms.map_set_id=m.map_set_id 
+        and f.feature_id=$gid
+    ];
+
+  my $result_ref = $db->selectrow_hashref($sql_str,{ Columns => {} });
+  
+  if ( $result_ref ) {
+    $link_str='/cgi-bin/cmap/viewer?ref_map_set_aid='
+      . $result_ref->{'map_set_aid'}
+      . '&ref_map_aids='
+      . $result_ref->{'map_aid'}
+      . '&data_source='
+      . $data_source
+      . '&highlight='
+      .$result_ref->{'feature_name'}
+      . '&feature_type_'
+      .$result_ref->{'feature_type_aid'}
+      . '=2';
+  }
+
+  return $link_str;
+}
+
+1;
+
+
+__END__
+
+=head1 BUGS
+
+none ;-)
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Ben Faga E<lt>faga at cshl.orgE<gt>.
+
+Modified from mysql.pm by:
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+package Bio::DB::GFF::Adaptor::dbi::mysqlopt;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::mysqlopt -- Deprecated database adaptor
+
+=head1 SYNOPSIS
+
+This adaptor has been superseded by Bio::DB::GFF::Adaptor::dbi::mysql.
+
+See L<Bio::DB::GFF> and L<Bio::DB::GFF::Adaptor::dbi::mysql>
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use base qw(Bio::DB::GFF::Adaptor::dbi::mysql);
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1015 @@
+package Bio::DB::GFF::Adaptor::dbi::oracle;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::oracle -- Database adaptor for a specific oracle schema
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>
+
+=cut
+
+# a simple oracle adaptor
+use strict;
+#use Bio::DB::GFF::Adaptor::dbi::mysql;
+#use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
+use Bio::DB::GFF::Util::Binning;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use base qw(Bio::DB::GFF::Adaptor::dbi);
+
+use constant MAX_SEGMENT => 100_000_000;  # the largest a segment can get
+use constant DEFAULT_CHUNK => 2000;
+
+use constant GETSEQCOORDS =><<END;
+SELECT fref,
+       NVL(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup
+  WHERE fgroup.gname=?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand,gclass,gname
+END
+;
+
+use constant GETALIASCOORDS =><<END;
+SELECT fref,
+       NVL(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value=?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gclass,gname
+END
+;
+
+use constant GETALIASLIKE =><<END;
+SELECT fref,
+       NVL(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE fattribute_to_feature.fattribute_value LIKE ?
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gname
+END
+;
+
+
+use constant GETFORCEDSEQCOORDS =><<END;
+SELECT fref,
+       NVL(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand
+  FROM fdata,fgroup
+  WHERE fgroup.gname=?
+    AND fgroup.gclass=?
+    AND fdata.fref=?
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand,gclass
+END
+;
+
+########################
+# moved from mysqlopt.pm
+########################
+
+# this is the largest that any reference sequence can be (100 megabases)
+use constant MAX_BIN    => 100_000_000;
+
+# this is the smallest bin (1 K)
+use constant MIN_BIN    => 1000;
+
+# size of range over which it is faster to force mysql to use the range for indexing
+use constant STRAIGHT_JOIN_LIMIT => 200_000;
+
+##############################################################################
+
+=head1 DESCRIPTION
+
+This adaptor implements a specific oracle database schema that is
+compatible with Bio::DB::GFF.  It inherits from
+Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
+
+The schema uses several tables:
+
+=over 4
+
+=item fdata
+
+This is the feature data table.  Its columns are:
+
+    fid	           feature ID (integer)
+    fref           reference sequence name (string)
+    fstart         start position relative to reference (integer)
+    fstop          stop postion relative to reference (integer)
+    ftypeid        feature type ID (integer)
+    fscore         feature score (float); may be null
+    fstrand        strand; one of "+" or "-"; may be null
+    fphase         phase; one of 0, 1 or 2; may be null
+    gid            group ID (integer)
+    ftarget_start  for similarity features, the target start position (integer)
+    ftarget_stop   for similarity features, the target stop position (integer)
+
+Note that it would be desirable to normalize the reference sequence
+name, since there are usually many features that share the same
+reference feature.  However, in the current schema, query performance
+suffers dramatically when this additional join is added.
+
+=item fgroup
+
+This is the group table. There is one row for each group.  Columns:
+
+    gid	      the group ID (integer)
+    gclass    the class of the group (string)
+    gname     the name of the group (string)
+
+The group table serves multiple purposes.  As you might expect, it is
+used to cluster features that logically belong together, such as the
+multiple exons of the same transcript.  It is also used to assign a
+name and class to a singleton feature.  Finally, the group table is
+used to identify the target of a similarity hit.  This is consistent
+with the way in which the group field is used in the GFF version 2
+format.
+
+The fgroup.gid field joins with the fdata.gid field. 
+
+Examples:
+
+  sql> select * from fgroup where gname='sjj_2L52.1';
+  +-------+-------------+------------+
+  | gid   | gclass      | gname      |
+  +-------+-------------+------------+
+  | 69736 | PCR_product | sjj_2L52.1 |
+  +-------+-------------+------------+
+  1 row in set (0.70 sec)
+
+  sql> select fref,fstart,fstop from fdata,fgroup 
+            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
+                  and fdata.gid=fgroup.gid;
+  +---------------+--------+-------+
+  | fref          | fstart | fstop |
+  +---------------+--------+-------+
+  | CHROMOSOME_II |   1586 |  2355 |
+  +---------------+--------+-------+
+  1 row in set (0.03 sec)
+
+=item ftype
+
+This table contains the feature types, one per row.  Columns are:
+
+    ftypeid      the feature type ID (integer)
+    fmethod      the feature type method name (string)
+    fsource      the feature type source name (string)
+
+The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:
+
+  sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
+         where gclass='PCR_product' 
+               and gname = 'sjj_2L52.1'
+               and fdata.gid=fgroup.gid
+               and fdata.ftypeid=ftype.ftypeid;
+  +---------------+--------+-------+-------------+-----------+
+  | fref          | fstart | fstop | fmethod     | fsource   |
+  +---------------+--------+-------+-------------+-----------+
+  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
+  +---------------+--------+-------+-------------+-----------+
+  1 row in set (0.08 sec)
+
+=item fdna
+
+This table holds the raw DNA of the reference sequences.  It has three
+columns:
+
+    fref          reference sequence name (string)
+    foffset       offset of this sequence
+    fdna          the DNA sequence (longblob)
+
+To overcome problems loading large blobs, DNA is automatically
+fragmented into multiple segments when loading, and the position of
+each segment is stored in foffset.  The fragment size is controlled by
+the -clump_size argument during initialization.
+
+=item fattribute_to_feature
+
+This table holds "attributes", which are tag/value pairs stuffed into
+the GFF line.  The first tag/value pair is treated as the group, and
+anything else is treated as an attribute (weird, huh?).
+
+ CHR_I assembly_tag Finished     2032 2036 . + . Note "Right: cTel33B"
+ CHR_I assembly_tag Polymorphism 668  668  . + . Note "A->C in cTel33B"
+
+The columns of this table are:
+
+    fid                 feature ID (integer)
+    fattribute_id       ID of the attribute (integer)
+    fattribute_value    text of the attribute (text)
+
+The fdata.fid column joins with fattribute_to_feature.fid.
+
+=item fattribute
+
+This table holds the normalized names of the attributes.  Fields are:
+
+  fattribute_id      ID of the attribute (integer)
+  fattribute_name    Name of the attribute (varchar)
+
+=back
+
+=head2 Data Loading Methods
+
+In addition to implementing the abstract SQL-generating methods of
+Bio::DB::GFF::Adaptor::dbi, this module also implements the data
+loading functionality of Bio::DB::GFF.
+
+=cut
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(@args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    : see below
+ Status  : Public
+
+The new constructor is identical to the "dbi" adaptor's new() method,
+except that the prefix "dbi:oracle" is added to the database DSN identifier
+automatically if it is not there already.
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+=cut
+
+#'
+
+sub new {
+  my $class = shift;
+  my ($dsn,$other) = rearrange([
+				[qw(FEATUREDB DB DSN)],
+			       ], at _);
+  $dsn = "dbi:Oracle:$dsn" if !ref($dsn) && $dsn !~ /^(dbi|DBI):/;
+  my $self = $class->SUPER::new(-dsn=>$dsn,%$other);
+  $self;
+}
+
+=head2 schema
+
+ Title   : schema
+ Usage   : $schema = $db->schema
+ Function: return the CREATE script for the schema
+ Returns : a list of CREATE statemetns
+ Args    : none
+ Status  : protected
+
+This method returns a list containing the various CREATE statements
+needed to initialize the database tables.
+
+=cut
+
+sub schema {
+  my %schema = (
+		fdata =>{
+table=> q{
+create table fdata (
+  fid INTEGER  NOT NULL,
+  fref VARCHAR(100) DEFAULT '' NOT NULL,
+  fstart INTEGER DEFAULT '0' NOT NULL,
+  fstop INTEGER DEFAULT '0' NOT NULL,
+  fbin NUMBER DEFAULT '0.000000' NOT NULL,
+  ftypeid INTEGER DEFAULT '0' NOT NULL,
+  fscore NUMBER  ,
+  fstrand VARCHAR2(3)   CHECK (fstrand IN ('+','-')),
+  fphase VARCHAR2(3)   CHECK (fphase IN ('0','1','2')),
+  gid INTEGER DEFAULT '0' NOT NULL,
+  ftarget_start INTEGER  ,
+  ftarget_stop INTEGER  ,
+  CONSTRAINT fdata_pk PRIMARY KEY (fid)
+)
+}, # fdata table
+
+index=>{
+		fdata_fref_idx => q{
+CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
+},
+	
+		fdata_ftypeid_idx => q{
+CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
+},
+
+		fdata_gid_idx => q{
+CREATE  INDEX fdata_gid_idx ON fdata (gid)
+}
+	 }, # fdata indexes
+
+sequence=> {
+		fdata_fid_sq => q{
+CREATE SEQUENCE fdata_fid_sq START WITH 1
+}
+	    }, # fdata sequences
+
+trigger=> {
+		fdata_fid_ai => q{
+CREATE OR REPLACE TRIGGER fdata_fid_ai
+BEFORE INSERT ON fdata
+FOR EACH ROW WHEN (new.fid IS NULL OR new.fid = 0)
+BEGIN
+   SELECT fdata_fid_sq.nextval INTO :new.fid FROM dual;
+END;
+}
+	   }# fdata triggers
+			
+}, # fdata
+
+
+
+		fgroup => { 
+table => q{
+CREATE TABLE fgroup (
+  gid INTEGER  NOT NULL,
+  gclass VARCHAR(100)  ,
+  gname VARCHAR(100)  ,
+  CONSTRAINT fgroup_pk PRIMARY KEY (gid)
+)
+}, # fgroup table
+
+index => {
+		fgroup_gclass_idx => q{
+CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
+}
+	   }, # fgroup indexes
+
+sequence => {
+
+		fgroup_gid_sq => q{
+CREATE SEQUENCE fgroup_gid_sq START WITH 1
+}
+	     }, # fgroup sequences
+
+
+trigger => {
+		fgroup_gid_ai => q{
+CREATE OR REPLACE TRIGGER fgroup_gid_ai
+BEFORE INSERT ON fgroup
+FOR EACH ROW WHEN (new.gid IS NULL OR new.gid = 0)
+BEGIN
+   SELECT fgroup_gid_sq.nextval INTO :new.gid FROM dual;
+END;
+}
+	    } # fgroup triggers
+
+}, # fgroup
+
+		ftype => { 
+table => q{
+CREATE TABLE ftype (
+  ftypeid INTEGER  NOT NULL,
+  fmethod VARCHAR(100) DEFAULT '' NOT NULL,
+  fsource VARCHAR(100),
+  CONSTRAINT ftype_pk PRIMARY KEY (ftypeid)
+)
+}, # ftype table
+
+index => {
+		ftype_fmethod_idx => q{
+CREATE  INDEX ftype_fmethod_idx ON ftype (fmethod)
+},
+
+		ftype_fsource_idx => q{
+CREATE  INDEX ftype_fsource_idx ON ftype (fsource)
+},
+	
+		ftype_ftype_idx => q{
+CREATE UNIQUE INDEX ftype_ftype_idx ON ftype (fmethod,fsource)
+}
+	   }, # ftype indexes
+
+sequence => {
+		ftype_ftypeid_sq => q{
+CREATE SEQUENCE ftype_ftypeid_sq START WITH 1
+}
+	     }, #ftype sequences
+
+trigger => {
+		ftype_ftypeid_ai => q{
+CREATE OR REPLACE TRIGGER ftype_ftypeid_ai
+BEFORE INSERT ON ftype
+FOR EACH ROW WHEN (new.ftypeid IS NULL OR new.ftypeid = 0)
+BEGIN
+   SELECT ftype_ftypeid_sq.nextval INTO :new.ftypeid FROM dual;
+END;
+}
+	    } #ftype triggers
+}, # ftype
+
+
+         fdna => {
+table => q{
+CREATE TABLE fdna (
+  fref VARCHAR(100) DEFAULT '' NOT NULL,
+  foffset INTEGER DEFAULT '0' NOT NULL,
+  fdna LONG         /* LONGBLOB */  ,
+  CONSTRAINT fdna_pk PRIMARY KEY (fref,foffset)
+)
+} #fdna table
+		 }, #fdna 
+
+        fmeta => {
+table => q{
+CREATE TABLE fmeta (
+  fname VARCHAR(255) DEFAULT '' NOT NULL,
+  fvalue VARCHAR(255) DEFAULT '' NOT NULL,
+  CONSTRAINT fmeta_pk PRIMARY KEY (fname)
+)
+} # fmeta table
+		 }, # fmeta
+
+
+       fattribute => {
+table => q{
+CREATE TABLE fattribute (
+  fattribute_id INTEGER  NOT NULL,
+  fattribute_name VARCHAR(255) DEFAULT '' NOT NULL,
+  CONSTRAINT fattribute_pk PRIMARY KEY (fattribute_id)
+)
+}, # fattribute table
+
+sequence=> {
+       fattribute_fattribute_id_sq => q{
+CREATE SEQUENCE fattribute_fattribute_id_sq START WITH 1
+}
+	    }, # fattribute sequences
+
+trigger => {
+       fattribute_fattribute_id_ai => q{
+CREATE OR REPLACE TRIGGER fattribute_fattribute_id_ai
+BEFORE INSERT ON fattribute
+FOR EACH ROW WHEN (new.fattribute_id IS NULL OR new.fattribute_id = 0)
+BEGIN
+   SELECT fattribute_fattribute_id_sq.nextval INTO :new.fattribute_id FROM dual;
+END;
+}
+	    } # fattribute triggers
+}, # fattribute
+
+       fattribute_to_feature => {
+table => q{
+CREATE TABLE fattribute_to_feature (
+  fid INTEGER DEFAULT '0' NOT NULL,
+  fattribute_id INTEGER DEFAULT '0' NOT NULL,
+  fattribute_value VARCHAR2(255) /* TEXT */  
+)
+}, # fattribute_to_feature table
+
+index => {
+       fattribute_to_feature_fid => q{
+CREATE  INDEX fattribute_to_feature_fid ON fattribute_to_feature (fid,fattribute_id)
+}
+	   } # fattribute_to_feature indexes
+} # fattribute_to_feature  
+
+
+);
+  return \%schema;
+}
+
+
+=head2 do_initialize
+
+ Title   : do_initialize
+ Usage   : $success = $db->do_initialize($drop_all)
+ Function: initialize the database
+ Returns : a boolean indicating the success of the operation
+ Args    : a boolean indicating whether to delete existing data
+ Status  : protected
+
+This method will load the schema into the database.  If $drop_all is
+true, then any existing data in the tables known to the schema will be
+deleted.
+
+Internally, this method calls schema() to get the schema data.
+
+=cut
+
+# Create the schema from scratch.
+# You will need create privileges for this.
+#sub do_initialize {
+#  my $self = shift;
+#  my $erase = shift;
+#  $self->drop_all if $erase;
+
+#  my $dbh = $self->features_db;
+#  my $schema = $self->schema;
+ 
+#  foreach my $table_name(keys %$schema) {
+#    my $create_table_stmt = $$schema{$table_name}{table} ;
+#    $dbh->do($create_table_stmt) ||  warn $dbh->errstr;    
+#  }
+#  1;
+#}
+
+
+
+=head2 drop_all
+
+ Title   : drop_all
+ Usage   : $db->drop_all
+ Function: empty the database
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method drops the tables known to this module.  Internally it
+calls the abstract tables() method.
+
+=cut
+
+# Drop all the GFF tables -- dangerous!
+#sub drop_all {
+#  my $self = shift;
+#  my $dbh = $self->features_db;
+#  local $dbh->{PrintError} = 0;
+#  foreach ($self->tables) {
+#    $dbh->do("drop table $_");
+#  }
+#}
+
+
+
+
+
+
+=head2 setup_load
+
+ Title   : setup_load
+ Usage   : $db->setup_load
+ Function: called before load_gff_line()
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method performs schema-specific initialization prior to loading a
+set of GFF records.  It prepares a set of DBI statement handlers to be 
+used in loading the data.
+
+=cut
+
+sub setup_load {
+  my $self      = shift;
+  my $schema = $self->schema; 
+
+  my $dbh = $self->features_db;
+
+  if ($self->lock_on_load) {
+    my @tables = map { "$_ WRITE"} $self->tables;
+    my $tables = join ', ', at tables;
+    $dbh->do("LOCK TABLES $tables");
+  }
+
+  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
+  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
+  my $sequence_type = (keys %{$schema->{ftype}{sequence}})[0];
+  my $insertid_type = $dbh->prepare_delayed("SELECT $sequence_type.CURRVAL FROM dual");
+
+  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
+  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
+  my $sequence_group = (keys %{$schema->{fgroup}{sequence}})[0];
+  my $insertid_group = $dbh->prepare_delayed("SELECT $sequence_group.CURRVAL FROM dual");
+
+  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
+  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
+  my $sequence_attribute = (keys %{$schema->{fattribute}{sequence}})[0];
+  my $insertid_attribute = $dbh->prepare_delayed("SELECT $sequence_attribute.CURRVAL FROM dual");
+
+  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
+
+  my $insert_data  = $dbh->prepare_delayed(<<END);
+INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
+		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
+       VALUES(?,?,?,?,?,?,?,?,?,?,?)
+END
+;
+  my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
+  my $sequence_data =  (keys %{$schema->{fdata}{sequence}})[0];
+  my $insertid_data = $dbh->prepare_delayed("SELECT $sequence_data.CURRVAL FROM dual");
+
+
+
+  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
+  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
+  $self->{load_stuff}{sth}{insertid_ftype}   = $insertid_type;
+  $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
+  $self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
+  $self->{load_stuff}{sth}{insertid_fgroup}  = $insertid_group;
+  $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;
+  $self->{load_stuff}{sth}{insertid_fdata}   = $insertid_data;
+  $self->{load_stuff}{sth}{delete_existing_fdata} = $delete_existing_data;
+  $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
+  $self->{load_stuff}{sth}{insertid_fattribute} = $insertid_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
+  $self->{load_stuff}{types}  = {};
+  $self->{load_stuff}{groups} = {};
+  $self->{load_stuff}{counter} = 0;
+}
+
+=head2 load_gff_line
+
+ Title   : load_gff_line
+ Usage   : $db->load_gff_line($fields)
+ Function: called to load one parsed line of GFF
+ Returns : true if successfully inserted
+ Args    : hashref containing GFF fields
+ Status  : protected
+
+This method is called once per line of the GFF and passed a series of
+parsed data items that are stored into the hashref $fields.  The keys are:
+
+ ref          reference sequence
+ source       annotation source
+ method       annotation method
+ start        annotation start
+ stop         annotation stop
+ score        annotation score (may be undef)
+ strand       annotation strand (may be undef)
+ phase        annotation phase (may be undef)
+ group_class  class of annotation's group (may be undef)
+ group_name   ID of annotation's group (may be undef)
+ target_start start of target of a similarity hit
+ target_stop  stop of target of a similarity hit
+ attributes   array reference of attributes, each of which is a [tag=>value] array ref
+
+=cut
+
+sub load_gff_line {
+  my $self = shift;
+  my $gff = shift;
+  
+  if (defined $gff->{phase}){
+     chomp($gff->{phase}); 
+     undef($gff->{phase}) if $gff->{phase} eq '.';
+   }
+
+  if (defined $gff->{strand} && $gff->{strand} eq '.'){undef($gff->{strand})}; 
+  if (defined $gff->{score}  && $gff->{score} eq '.'){undef($gff->{score})};
+
+  my $s    = $self->{load_stuff};
+  my $dbh  = $self->features_db;
+  local $dbh->{PrintError} = 0;
+
+  defined(my $typeid  = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
+  defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname}  => $gff->{gclass})) or return;
+
+  my $bin =  bin($gff->{start},$gff->{stop},$self->min_bin);
+  my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
+					       $gff->{start},$gff->{stop},$bin,
+					       $typeid,
+					       $gff->{score},$gff->{strand},$gff->{phase},
+					       $groupid,
+					       $gff->{tstart},$gff->{tstop});
+  if (defined ($dbh->errstr)){
+    print  $dbh->errstr,"\n" ,%$gff,"\n";
+    if ($dbh->errstr =~ /ORA-02290: check constraint/){
+      print "PHASE=$gff->{phase}"."===","\n";
+    }
+
+    if ($dbh->errstr =~ /ORA-00001: unique constraint/){
+      $result = $s->{sth}{delete_existing_fdata}->execute($gff->{ref},
+    							   $gff->{start},$gff->{stop},$bin,
+    							   $typeid,
+    							   $groupid);
+    
+      print "delete row result=$result\n";
+      $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
+    					       $gff->{start},$gff->{stop},$bin,
+    					       $typeid,
+    					       $gff->{score},$gff->{strand},$gff->{phase},
+    					       $groupid,
+    					       $gff->{tstart},$gff->{tstop}); 
+    
+      print "insert row result=$result\n";
+    }
+  }
+  warn $dbh->errstr,"\n" and print "ref=",$gff->{ref}," start=",$gff->{start}," stop=",$gff->{stop}," bin=",$bin," typeid=",$typeid," groupid=",$groupid,"\n" 
+    and return unless $result;
+  
+  my $fid = $self->insertid($s->{sth},'fdata')
+    || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
+
+
+  # insert attributes
+
+  #  print STDERR map {"$fid attribute:". $_->[0]."=".$_->[1]."\n"} @{$gff->{attributes}};
+
+  foreach (@{$gff->{attributes}}) {
+    defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
+    $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
+  }
+
+  if ( (++$s->{counter} % 1000) == 0) {
+    print STDERR "$s->{counter} records loaded...";
+    print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+  }
+
+  $fid;
+}
+
+
+
+
+=head2 get_table_id
+
+ Title   : get_table_id
+ Usage   : $integer = $db->get_table_id($table, at ids)
+ Function: get the ID of a group or type
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature type or group.  The arguments are the name
+of the table, and two string identifiers.  For feature types, the
+identifiers are the method and source.  For groups, the identifiers
+are group name and class.
+
+This method requires that a statement handler named I<lookup_$table>,
+have been created previously by setup_load().  It is here to overcome
+deficiencies in mysql's INSERT syntax.
+
+=cut
+
+#'
+# get the object ID from a named table
+sub get_table_id {
+  my $self   = shift;
+  my $table  = shift;
+  my @ids    = @_;
+
+  # irritating warning for null id
+  my $id_key;
+  {
+    local $^W=0;
+    $id_key = join ':', at ids;
+  }
+
+  my $s   = $self->{load_stuff};
+  my $sth = $s->{sth};
+  my $dbh = $self->features_db;
+
+  unless (defined($s->{$table}{$id_key})) {
+    $sth->{"lookup_$table"}->execute(@ids);
+    my @result = $sth->{"lookup_$table"}->fetchrow_array;
+    if (@result > 0) {
+      $s->{$table}{$id_key} = $result[0];
+    } else {
+      $sth->{"insert_$table"}->execute(@ids)
+	&& ($s->{$table}{$id_key} = $self->insertid($sth,$table));
+	#&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insertid_$table"}));
+	#&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
+    }
+  }
+
+  my $id = $s->{$table}{$id_key};
+  unless (defined $id) {
+    warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
+    return;
+  }
+  $id;
+}
+
+sub insertid {
+  my $self = shift;
+  my $sth = shift ;
+  my $table = shift;
+
+  my $insert_id;
+  if ($sth->{"insertid_$table"}->execute()){
+     $insert_id = ($sth->{"insertid_$table"}->fetchrow_array)[0];
+  }
+  else{
+    warn "No CURRVAL for SEQUENCE of table $table ",$sth->errstr,"\n";
+    return;
+  }
+  return $insert_id;
+}
+
+
+#sub insertid {
+#  my $self = shift;
+#  my $insertid_sth = shift ;
+#  my $insert_id;
+#  if ($insertid_sth->execute){
+#     $insert_id = ($insertid_sth->fetchrow_array)[0];
+#  }
+#  else{
+#    warn "No CURRVAL for SEQUENCE  ",$insertid_sth->errstr,"\n";
+#    return;
+#  }
+#  return $insert_id;
+#}
+
+sub insert_sequence {
+  my $self = shift;
+  my($id,$offset,$seq) = @_;
+  my $sth = $self->{_insert_sequence}
+    ||= $self->dbh->prepare_delayed('insert into fdna values (?,?,?)');
+  $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string, using mysql full-text search
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is a mysql-specific method.  Given a search string, it performs a
+full-text search of the notes table and returns an array of results.
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @words  = $search_string =~ /(\w+)/g;
+  my $regex  = join '|', at words;
+  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
+  my $search   = join(' OR ', at searches);
+
+  my $query = <<END;
+SELECT distinct gclass,gname,fattribute_value 
+  FROM fgroup,fattribute_to_feature,fdata
+  WHERE fgroup.gid=fdata.gid
+     AND fdata.fid=fattribute_to_feature.fid
+     AND ($search)
+END
+;
+
+  my $sth = $self->dbh->do_query($query);
+  my @results;
+  while (my ($class,$name,$note) = $sth->fetchrow_array) {
+     next unless $class && $name;    # sorry, ignore NULL objects
+     my @matches = $note =~ /($regex)/g;
+     my $relevance = 10*@matches;
+     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+     push @results,[$featname,$note,$relevance];
+     last if $limit && @results >= $limit;
+  }
+  @results;
+}
+
+=head2 make_meta_set_query
+
+ Title   : make_meta_set_query
+ Usage   : $sql = $db->make_meta_set_query
+ Function: return SQL fragment for setting a meta parameter
+ Returns : SQL fragment
+ Args    : none
+ Status  : public
+
+By default this does nothing; meta parameters are not stored or
+retrieved.
+
+=cut
+
+sub make_meta_set_query {
+   return 'INSERT INTO fmeta VALUES (?,?)';
+}
+
+sub make_classes_query {
+  my $self = shift;
+  return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT gclass IS NULL';
+}
+
+
+sub chunk_size {
+  my $self = shift;
+  $self->meta('chunk_size') || DEFAULT_CHUNK;
+}
+
+sub getseqcoords_query {
+   my $self = shift;
+   return GETSEQCOORDS ;
+}
+
+sub getaliascoords_query{
+  my $self = shift;
+  return GETALIASCOORDS ;
+}
+
+
+sub getforcedseqcoords_query{
+  my $self = shift;
+  return GETFORCEDSEQCOORDS ;
+}
+
+
+sub getaliaslike_query{
+  my $self = shift;
+  return GETALIASLIKE ;
+}
+
+
+sub make_features_select_part {
+  my $self = shift;
+  my $options = shift || {};
+  my $s;
+  if (my $b = $options->{bin_width}) {
+
+    $s = <<END;
+fref,
+  1+$b*floor(fstart/$b)   as fstart,
+  $b*(1+floor(fstart/$b)) as fstop,
+  NVL2(fsource,fmethod||':'||fsource,fmethod),'bin',
+  count(*) as fscore,
+  '.','.','bin',
+  NVL2(fsource , fref||':'||fmethod||':'||fsource , fref||':'||fmethod),
+  NULL,NULL,NULL,NULL
+END
+;
+  } else {
+    $s = <<END;
+fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
+END
+;
+}
+  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
+  $s;
+}
+
+sub make_features_from_part_bkup {
+  my $self = shift;
+  my $sparse = shift;
+  my $options = shift || {};
+  #my $index = $sparse ? ' USE INDEX(ftypeid)': '';
+  my $index =  '';
+  return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
+                                : "fdata${index},ftype,fgroup\n";
+}
+
+
+####################################
+# moved from mysqlopt.pm
+###################################
+# meta values
+sub default_meta_values {
+  my $self = shift;
+  my @values = $self->SUPER::default_meta_values;
+  return (
+	  @values,
+	  max_bin => MAX_BIN,
+	  min_bin => MIN_BIN,
+	  straight_join_limit => STRAIGHT_JOIN_LIMIT,
+	 );
+}
+
+sub min_bin {
+  my $self = shift;
+  return $self->meta('min_bin') || MIN_BIN;
+}
+sub max_bin {
+  my $self = shift;
+  return $self->meta('max_bin') || MAX_BIN;
+}
+sub straight_join_limit {
+  my $self = shift;
+  return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracleace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracleace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/oracleace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+package Bio::DB::GFF::Adaptor::dbi::oracleace;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::oracleace -- Unholy union between oracle GFF database and acedb database
+
+=head1 SYNOPSIS
+
+Pending
+
+See L<Bio::DB::GFF> and L<Bio::DB::GFF::Adaptor::dbi::oracle>
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+
+use base qw(Bio::DB::GFF::Adaptor::ace Bio::DB::GFF::Adaptor::dbi::oracle);
+
+# Create a new Bio::DB::GFF::Adaptor::dbi object
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+  my ($dna_db,$acedb) = rearrange([[qw(DNADB DNA FASTA FASTA_DIR)],'ACEDB'], at _);
+  if ($dna_db) {
+    if (!ref($dna_db)) {
+      require Bio::DB::Fasta;
+      my $fasta_dir = $dna_db;
+      $dna_db = Bio::DB::Fasta->new($fasta_dir);
+      $dna_db or $class->throw("new(): Failed to create new Bio::DB::Fasta from files in $fasta_dir");
+    } else {
+      $dna_db->isa('Bio::DB::Fasta') or $class->throw("new(): $dna_db is not a Bio::DB::Fasta object");
+    }
+    $self->dna_db($dna_db);
+  }
+
+  if ($acedb) {
+    $acedb->isa('Ace') or $class->throw("$acedb is not an acedb accessor object");
+    $self->acedb($acedb);
+  }
+  $self;
+}
+
+sub make_object {
+  my $self = shift;
+  my ($class,$name,$start,$stop) = @_;
+
+  if (my $db = $self->acedb) {
+
+    # for Notes we just return a text, no database associated
+    return $class->new(Text=>$name) if $class eq 'Note';
+
+    # for homols, we create the indicated Protein or Sequence object
+    # then generate a bogus Homology object (for future compatability??)
+    if ($start ne '') {
+      require Ace::Sequence::Homol;
+      return Ace::Sequence::Homol->new_homol($class,$name,$db,$start,$stop);
+    }
+
+    # General case:
+    my $obj = $db->class->new($class=>$name,$self->acedb);
+
+    return $obj if defined $obj;
+
+    # Last resort, return a Text
+    return $class->new(Text=>$name);
+  }
+
+  return $self->SUPER::make_object($class,$name,$start,$stop);
+}
+
+sub get_dna {
+  my $self = shift;
+  my ($ref,$start,$stop,$class) = @_;
+  my $dna_db = $self->dna_db or return $self->SUPER::get_dna(@_);
+  return $dna_db->seq($ref,$start,$stop,$class);
+}
+
+=head2 freshen_ace
+
+ Title   : freshen
+ Usage   : $flag = Bio::DB::GFF->freshen_ace;
+ Function: Refresh internal acedb handle
+ Returns : flag if correctly freshened
+ Args    : none
+ Status  : Public
+
+ACeDB has an annoying way of timing out, leaving dangling database
+handles.  This method will invoke the ACeDB reopen() method, which
+causes dangling handles to be refreshed.  It has no effect if you are
+not using ACeDB to create ACeDB objects.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1376 @@
+package Bio::DB::GFF::Adaptor::dbi::pg;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::pg -- Database adaptor for a specific postgres schema
+
+=head1 NOTES 
+
+SQL commands that need to be executed before this adaptor will work:
+
+  CREATE DATABASE <dbname>;
+
+Also, select permission needs to be granted for each table in the
+database to the owner of the httpd process (usually 'nobody', but
+for some RedHat systems it is 'apache') if this adaptor is to be used
+with the Generic Genome Browser (gbrowse):
+
+  CREATE USER nobody;
+  GRANT SELECT ON TABLE fmeta                 TO nobody;
+  GRANT SELECT ON TABLE fgroup                TO nobody;
+  GRANT SELECT ON TABLE fdata                 TO nobody;
+  GRANT SELECT ON TABLE fattribute_to_feature TO nobody;
+  GRANT SELECT ON TABLE fdna                  TO nobody;
+  GRANT SELECT ON TABLE fattribute            TO nobody;
+  GRANT SELECT ON TABLE ftype                 TO nobody;
+
+=head2 Optimizing the database
+
+PostgreSQL generally requires some tuning before you get very good
+performance for large databases.  For general information on tuning
+a PostgreSQL server, see http://www.varlena.com/GeneralBits/Tidbits/perf.html
+Of particular importance is executing VACUUM FULL ANALYZE whenever
+you change the database.
+
+Additionally, for a GFF database, there are a few items you can tune.
+For each automatic class in your GBrowse conf file, there will be one
+or two searches done when searching for a feature.  If there are lots 
+of features, these search can take several seconds.  To speed these searches,
+do two things:
+
+=over
+
+=item 1
+
+Set 'enable_seqscan = false' in your postgresql.conf file (and restart
+your server).
+
+=item 2
+
+Create 'partial' indexes for each automatic class, doing this for the
+example class 'Allele':
+
+  CREATE INDEX partial_allele_gclass ON 
+    fgroup (lower('gname')) WHERE gclass='Allele';
+
+And be sure to run VACUUM FULL ANALYZE after creating the indexes.
+
+=back
+
+=cut
+
+# a simple postgres adaptor
+use strict;
+use Bio::DB::GFF::Util::Binning; 
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use base qw(Bio::DB::GFF::Adaptor::dbi);
+
+use constant MAX_SEGMENT => 100_000_000;  # the largest a segment can get
+use constant DEFAULT_CHUNK => 2000;
+
+use constant GETSEQCOORDS =><<END;
+SELECT fref,
+       COALESCE(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup
+  WHERE lower(fgroup.gname) = lower(?)
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand,gclass,gname
+END
+;
+
+use constant GETALIASCOORDS =><<END;
+SELECT fref,
+       COALESCE(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE lower(fattribute_to_feature.fattribute_value)=lower(?)
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gclass,gname
+END
+;
+
+use constant GETALIASLIKE =><<END;
+SELECT fref,
+       COALESCE(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand,
+       gname
+  FROM fdata,fgroup,fattribute,fattribute_to_feature
+  WHERE lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
+    AND fgroup.gclass=?
+    AND fgroup.gid=fdata.gid
+    AND fattribute.fattribute_name='Alias'
+    AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
+    AND fattribute_to_feature.fid=fdata.fid
+    GROUP BY fref,fstrand,gname
+END
+;
+
+
+use constant GETFORCEDSEQCOORDS =><<END;
+SELECT fref,
+       COALESCE(gclass,'Sequence'),
+       min(fstart),
+       max(fstop),
+       fstrand
+  FROM fdata,fgroup
+  WHERE lower(fgroup.gname) = lower(?)
+    AND fgroup.gclass=?
+    AND lower(fdata.fref) = lower(?)
+    AND fgroup.gid=fdata.gid
+    GROUP BY fref,fstrand,gclass
+END
+;
+
+use constant FULLTEXTWILDCARD => <<END;
+SELECT distinct gclass,gname,fattribute_value
+    FROM fgroup,fattribute_to_feature,fdata
+     WHERE fgroup.gid=fdata.gid
+       AND fdata.fid=fattribute_to_feature.fid
+       AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
+END
+;
+
+########################
+# moved from mysqlopt.pm
+########################
+
+# this is the largest that any reference sequence can be (100 megabases)
+use constant MAX_BIN    => 100_000_000;
+
+# this is the smallest bin (1 K)
+use constant MIN_BIN    => 1000;
+
+# size of range over which it is faster to force mysql to use the range for indexing
+use constant STRAIGHT_JOIN_LIMIT => 200_000;
+
+##############################################################################
+
+=head1 DESCRIPTION
+
+This adaptor implements a specific postgres database schema that is
+compatible with Bio::DB::GFF.  It inherits from
+Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
+
+The schema uses several tables:
+
+=over 4
+
+=item fdata
+
+This is the feature data table.  Its columns are:
+
+    fid	           feature ID (integer)
+    fref           reference sequence name (string)
+    fstart         start position relative to reference (integer)
+    fstop          stop postion relative to reference (integer)
+    ftypeid        feature type ID (integer)
+    fscore         feature score (float); may be null
+    fstrand        strand; one of "+" or "-"; may be null
+    fphase         phase; one of 0, 1 or 2; may be null
+    gid            group ID (integer)
+    ftarget_start  for similarity features, the target start position (integer)
+    ftarget_stop   for similarity features, the target stop position (integer)
+
+Note that it would be desirable to normalize the reference sequence
+name, since there are usually many features that share the same
+reference feature.  However, in the current schema, query performance
+suffers dramatically when this additional join is added.
+
+=item fgroup
+
+This is the group table. There is one row for each group.  Columns:
+
+    gid	      the group ID (integer)
+    gclass    the class of the group (string)
+    gname     the name of the group (string)
+
+The group table serves multiple purposes.  As you might expect, it is
+used to cluster features that logically belong together, such as the
+multiple exons of the same transcript.  It is also used to assign a
+name and class to a singleton feature.  Finally, the group table is
+used to identify the target of a similarity hit.  This is consistent
+with the way in which the group field is used in the GFF version 2
+format.
+
+The fgroup.gid field joins with the fdata.gid field. 
+
+Examples:
+
+  sql> select * from fgroup where gname='sjj_2L52.1';
+  +-------+-------------+------------+
+  | gid   | gclass      | gname      |
+  +-------+-------------+------------+
+  | 69736 | PCR_product | sjj_2L52.1 |
+  +-------+-------------+------------+
+  1 row in set (0.70 sec)
+
+  sql> select fref,fstart,fstop from fdata,fgroup 
+            where gclass='PCR_product' and gname = 'sjj_2L52.1' 
+                  and fdata.gid=fgroup.gid;
+  +---------------+--------+-------+
+  | fref          | fstart | fstop |
+  +---------------+--------+-------+
+  | CHROMOSOME_II |   1586 |  2355 |
+  +---------------+--------+-------+
+  1 row in set (0.03 sec)
+
+=item ftype
+
+This table contains the feature types, one per row.  Columns are:
+
+    ftypeid      the feature type ID (integer)
+    fmethod      the feature type method name (string)
+    fsource      the feature type source name (string)
+
+The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:
+
+  sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype 
+         where gclass='PCR_product' 
+               and gname = 'sjj_2L52.1'
+               and fdata.gid=fgroup.gid
+               and fdata.ftypeid=ftype.ftypeid;
+  +---------------+--------+-------+-------------+-----------+
+  | fref          | fstart | fstop | fmethod     | fsource   |
+  +---------------+--------+-------+-------------+-----------+
+  | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
+  +---------------+--------+-------+-------------+-----------+
+  1 row in set (0.08 sec)
+
+=item fdna
+
+This table holds the raw DNA of the reference sequences.  It has three
+columns:
+
+    fref          reference sequence name (string)
+    foffset       offset of this sequence
+    fdna          the DNA sequence (longblob)
+
+To overcome problems loading large blobs, DNA is automatically
+fragmented into multiple segments when loading, and the position of
+each segment is stored in foffset.  The fragment size is controlled by
+the -clump_size argument during initialization.
+
+=item fattribute_to_feature
+
+This table holds "attributes", which are tag/value pairs stuffed into
+the GFF line.  The first tag/value pair is treated as the group, and
+anything else is treated as an attribute (weird, huh?).
+
+ CHR_I assembly_tag Finished     2032 2036 . + . Note "Right: cTel33B"
+ CHR_I assembly_tag Polymorphism 668  668  . + . Note "A->C in cTel33B"
+
+The columns of this table are:
+
+    fid                 feature ID (integer)
+    fattribute_id       ID of the attribute (integer)
+    fattribute_value    text of the attribute (text)
+
+The fdata.fid column joins with fattribute_to_feature.fid.
+
+=item fattribute
+
+This table holds the normalized names of the attributes.  Fields are:
+
+  fattribute_id      ID of the attribute (integer)
+  fattribute_name    Name of the attribute (varchar)
+
+=back
+
+=head2 Data Loading Methods
+
+In addition to implementing the abstract SQL-generating methods of
+Bio::DB::GFF::Adaptor::dbi, this module also implements the data
+loading functionality of Bio::DB::GFF.
+
+=cut
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(@args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    : see below
+ Status  : Public
+
+The new constructor is identical to the "dbi" adaptor's new() method,
+except that the prefix "dbi:pg" is added to the database DSN identifier
+automatically if it is not there already.
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:Pg:dbname=:ens0040' or "ens0040"
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+=cut
+
+#'
+
+sub new {
+  my $class = shift;
+  my ($dsn,$other) = rearrange([
+				[qw(FEATUREDB DB DSN)],
+			       ], at _);
+  $dsn = "dbi:Pg:dbname=$dsn" if !ref($dsn) && $dsn !~ /^(dbi|DBI):/;
+  my $self = $class->SUPER::new(-dsn=>$dsn,%$other);
+  $self;
+}
+
+=head2 schema
+
+ Title   : schema
+ Usage   : $schema = $db->schema
+ Function: return the CREATE script for the schema
+ Returns : a list of CREATE statemetns
+ Args    : none
+ Status  : protected
+
+This method returns a list containing the various CREATE statements
+needed to initialize the database tables.
+
+=cut
+
+sub schema {
+  my %schema = (
+		fdata =>{
+table=> q{
+CREATE TABLE "fdata" (
+  "fid" serial NOT NULL,
+  "fref" character varying(100) DEFAULT '' NOT NULL,
+  "fstart" integer DEFAULT '0' NOT NULL,
+  "fstop" integer DEFAULT '0' NOT NULL,
+  "fbin" double precision DEFAULT '0.000000' NOT NULL,
+  "ftypeid" integer DEFAULT '0' NOT NULL,
+  "fscore" double precision DEFAULT NULL,
+  "fstrand" character varying(3) DEFAULT NULL,
+  "fphase" character varying(3) DEFAULT NULL,
+  "gid" integer DEFAULT '0' NOT NULL,
+  "ftarget_start" integer DEFAULT NULL,
+  "ftarget_stop" integer DEFAULT NULL,
+  CONSTRAINT chk_fdata_fstrand CHECK (fstrand IN ('+','-')),
+  CONSTRAINT chk_fdata_fphase CHECK (fphase IN ('0','1','2')),
+  CONSTRAINT pk_fdata PRIMARY KEY (fid)
+)
+}, # fdata table
+
+#CONSTRAINT fref_fdata UNIQUE (fref, fbin, fstart, fstop, ftypeid, gid)
+# fdata_fref_idx => q{ CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)}, 
+
+index=>{
+                fdata_fref_idx => q{
+CREATE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
+},
+
+		fdata_ftypeid_idx => q{
+CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
+},
+
+		fdata_gid_idx => q{
+CREATE INDEX fdata_gid_idx ON fdata (gid)
+}
+	 }, # fdata indexes
+
+}, # fdata
+
+
+
+		fgroup => { 
+table => q{
+CREATE TABLE "fgroup" (
+  "gid" serial NOT NULL,
+  "gclass" character varying(100) DEFAULT NULL,
+  "gname" character varying(100) DEFAULT NULL,
+  CONSTRAINT pk_fgroup PRIMARY KEY (gid)
+)
+}, # fgroup table
+
+index => {
+		fgroup_gclass_idx => q{
+CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
+},
+                fgroup_gname_idx => q{
+CREATE INDEX fgroup_gname_idx ON fgroup(gname)
+},
+                fgroup_lower_gname_idx => q{
+CREATE INDEX fgroup_lower_gname_idx ON fgroup (lower(gname))
+},
+	   }, # fgroup indexes
+
+}, # fgroup
+
+		ftype => { 
+table => q{
+CREATE TABLE "ftype" (
+  "ftypeid" serial NOT NULL,
+  "fmethod" character varying(100) DEFAULT '' NOT NULL,
+  "fsource" character varying(100) DEFAULT NULL,
+  CONSTRAINT pk_ftype PRIMARY KEY (ftypeid),
+  CONSTRAINT ftype_ftype UNIQUE (fmethod, fsource)
+)
+}, # ftype table
+
+index => {
+		ftype_fmethod_idx => q{
+CREATE  INDEX ftype_fmethod_idx ON ftype (fmethod)
+},
+
+		ftype_fsource_idx => q{
+CREATE  INDEX ftype_fsource_idx ON ftype (fsource)
+},
+	
+		ftype_ftype_idx => q{
+CREATE UNIQUE INDEX ftype_ftype_idx ON ftype (fmethod,fsource)
+}
+	   }, # ftype indexes
+
+}, # ftype
+
+
+         fdna => {
+table => q{
+CREATE TABLE "fdna" (
+  "fref" character varying(100) DEFAULT '' NOT NULL,
+  "foffset" integer DEFAULT '0' NOT NULL,
+  "fdna" bytea,
+  CONSTRAINT pk_fdna PRIMARY KEY (fref, foffset)
+)
+} #fdna table
+		 }, #fdna 
+
+        fmeta => {
+table => q{
+CREATE TABLE "fmeta" (
+  "fname" character varying(255) DEFAULT '' NOT NULL,
+  "fvalue" character varying(255) DEFAULT '' NOT NULL,
+  CONSTRAINT pk_fmeta PRIMARY KEY (fname)
+)
+} # fmeta table
+		 }, # fmeta
+
+
+       fattribute => {
+table => q{
+CREATE TABLE "fattribute" (
+  "fattribute_id" serial NOT NULL,
+  "fattribute_name" character varying(255) DEFAULT '' NOT NULL,
+  CONSTRAINT pk_fattribute PRIMARY KEY (fattribute_id)
+)
+}, # fattribute table
+
+}, # fattribute
+
+       fattribute_to_feature => {
+table => q{
+CREATE TABLE "fattribute_to_feature" (
+  "fid" integer DEFAULT '0' NOT NULL,
+  "fattribute_id" integer DEFAULT '0' NOT NULL,
+  "fattribute_value" text
+)
+}, # fattribute_to_feature table
+
+index => {
+       fattribute_to_feature_fid => q{
+CREATE  INDEX fattribute_to_feature_fid ON fattribute_to_feature (fid,fattribute_id)
+},
+       fattribute_txt_idx => q{
+CREATE INDEX fattribute_txt_idx ON fattribute_to_feature (fattribute_value)
+},
+       fattribute_lower_idx => q{
+CREATE INDEX fattribute_lower_idx ON fattribute_to_feature (lower(fattribute_value))
+},
+	   } # fattribute_to_feature indexes
+} # fattribute_to_feature  
+
+
+);
+  return \%schema;
+}
+
+
+=head2 setup_load
+
+ Title   : setup_load
+ Usage   : $db->setup_load
+ Function: called before load_gff_line()
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method performs schema-specific initialization prior to loading a
+set of GFF records.  It prepares a set of DBI statement handlers to be 
+used in loading the data.
+
+=cut
+
+sub setup_load {
+  my $self      = shift;
+  my $schema = $self->schema; 
+
+  my $dbh = $self->features_db;
+
+  if ($self->lock_on_load) {
+    my @tables = map { "$_ WRITE"} $self->tables;
+    my $tables = join ', ', at tables;
+    $dbh->do("LOCK TABLES $tables");
+  }
+
+  my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
+  my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
+  my $insertid_type = $dbh->prepare_delayed("SELECT currval('ftype_ftypeid_seq')");
+
+  my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE lower(gname)=lower(?) AND gclass=?');
+  my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
+  my $insertid_group = $dbh->prepare_delayed("SELECT currval('fgroup_gid_seq')");
+
+  my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
+  my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
+  my $insertid_attribute = $dbh->prepare_delayed("SELECT currval('fattribute_fattribute_id_seq')");
+
+  my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
+
+  my $insert_data  = $dbh->prepare_delayed(<<END);
+INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
+		   fstrand,fphase,gid,ftarget_start,ftarget_stop)
+       VALUES(?,?,?,?,?,?,?,?,?,?,?)
+END
+;
+  my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
+  my $insertid_data = $dbh->prepare_delayed("SELECT currval('fdata_fid_seq')");
+
+  $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
+  $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
+  $self->{load_stuff}{sth}{insertid_ftype}   = $insertid_type;
+  $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
+  $self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
+  $self->{load_stuff}{sth}{insertid_fgroup}    = $insertid_group;
+  $self->{load_stuff}{sth}{insertid_fdata}   = $insertid_data;
+  $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;
+  $self->{load_stuff}{sth}{delete_existing_fdata} = $delete_existing_data;
+  $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
+  $self->{load_stuff}{sth}{insertid_fattribute} = $insertid_attribute;
+  $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
+  $self->{load_stuff}{types}  = {};
+  $self->{load_stuff}{groups} = {};
+  $self->{load_stuff}{counter} = 0;
+}
+
+=head2 load_gff_line
+
+ Title   : load_gff_line
+ Usage   : $db->load_gff_line($fields)
+ Function: called to load one parsed line of GFF
+ Returns : true if successfully inserted
+ Args    : hashref containing GFF fields
+ Status  : protected
+
+This method is called once per line of the GFF and passed a series of
+parsed data items that are stored into the hashref $fields.  The keys are:
+
+ ref          reference sequence
+ source       annotation source
+ method       annotation method
+ start        annotation start
+ stop         annotation stop
+ score        annotation score (may be undef)
+ strand       annotation strand (may be undef)
+ phase        annotation phase (may be undef)
+ group_class  class of annotation's group (may be undef)
+ group_name   ID of annotation's group (may be undef)
+ target_start start of target of a similarity hit
+ target_stop  stop of target of a similarity hit
+ attributes   array reference of attributes, each of which is a [tag=>value] array ref
+
+=cut
+
+sub load_gff_line {
+  my $self = shift;
+  my $gff = shift;
+  
+  if (defined $gff->{phase}){
+     chomp($gff->{phase}); 
+     undef($gff->{phase}) if $gff->{phase} eq '.';
+   }
+
+  if (defined $gff->{strand} && $gff->{strand} eq '.'){undef($gff->{strand})}; 
+  if (defined $gff->{score}  && $gff->{score} eq '.'){undef($gff->{score})};
+
+  my $s    = $self->{load_stuff};
+  my $dbh  = $self->features_db;
+  local $dbh->{PrintError} = 0;
+
+  defined(my $typeid  = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
+  defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname}  => $gff->{gclass})) or return;
+
+  my $bin =  bin($gff->{start},$gff->{stop},$self->min_bin);
+  my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
+					       $gff->{start},$gff->{stop},$bin,
+					       $typeid,
+					       $gff->{score},$gff->{strand},$gff->{phase},
+					       $groupid,
+					       $gff->{tstart},$gff->{tstop});
+
+  warn $dbh->errstr,"\n" and print "ref=",$gff->{ref}," start=",$gff->{start}," stop=",$gff->{stop}," bin=",$bin," typeid=",$typeid," groupid=",$groupid,"\n" 
+    and return unless $result;
+ 
+  my $fid = $self->insertid($s->{sth},'fdata') 
+    || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
+ 
+  # insert attributes
+  foreach (@{$gff->{attributes}}) {
+    defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
+    $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
+  }
+
+  if ( (++$s->{counter} % 1000) == 0) {
+    print STDERR "$s->{counter} records loaded...";
+    print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+  }
+
+  $fid;
+}
+
+
+sub insertid {
+  my $self = shift;
+  my $sth = shift ;
+  my $table = shift;
+
+  my $insert_id;
+  if ($sth->{"insertid_$table"}->execute()){
+     $insert_id = ($sth->{"insertid_$table"}->fetchrow_array)[0];
+  }
+  else{
+    warn "No CURRVAL for SEQUENCE of table $table ",$sth->errstr,"\n";
+    return;
+  }
+  return $insert_id;
+}
+
+
+=head2 get_table_id
+
+ Title   : get_table_id
+ Usage   : $integer = $db->get_table_id($table, at ids)
+ Function: get the ID of a group or type
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature type or group.  The arguments are the name
+of the table, and two string identifiers.  For feature types, the
+identifiers are the method and source.  For groups, the identifiers
+are group name and class.
+
+This method requires that a statement handler named I<lookup_$table>,
+have been created previously by setup_load().  It is here to overcome
+deficiencies in mysql's INSERT syntax.
+
+=cut
+
+#'
+# get the object ID from a named table
+sub get_table_id {
+  my $self   = shift;
+  my $table  = shift;
+  my @ids    = @_;
+
+  # irritating warning for null id
+  my $id_key;
+  {
+    local $^W=0;
+    $id_key = join ':', at ids;
+  }
+
+  my $s   = $self->{load_stuff};
+  my $sth = $s->{sth};
+  my $dbh = $self->features_db;
+
+  unless (defined($s->{$table}{$id_key})) {
+    $sth->{"lookup_$table"}->execute(@ids);
+    my @result = $sth->{"lookup_$table"}->fetchrow_array;
+    if (@result > 0) {
+      $s->{$table}{$id_key} = $result[0];
+    } else {
+      $sth->{"insert_$table"}->execute(@ids)
+	&& ($s->{$table}{$id_key} = $self->insertid($sth,$table));
+	#&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insertid_$table"}));
+	#&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
+    }
+  }
+
+  my $id = $s->{$table}{$id_key};
+  unless (defined $id) {
+    warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
+    return;
+  }
+  $id;
+}
+
+
+#sub insertid {
+#  my $self = shift;
+#  my $insertid_sth = shift ;
+#  my $insert_id;
+#  if ($insertid_sth->execute){
+#     $insert_id = ($insertid_sth->fetchrow_array)[0];
+#  }
+#  else{
+#    warn "No CURRVAL for SEQUENCE  ",$insertid_sth->errstr,"\n";
+#    return;
+#  }
+#  return $insert_id;
+#}
+
+sub insert_sequence {
+  my $self = shift;
+  my($id,$offset,$seq) = @_;
+  my $sth = $self->{_insert_sequence}
+    ||= $self->dbh->prepare_delayed('insert into fdna values (?,?,?)');
+  $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
+}
+
+=head2 range_query
+
+ Title   : range_query
+ Usage   : $db->range_query($range_type,$refseq,$refclass,$start,$stop,$types,$order_by_group,$attributes,$binsize)
+ Function: create statement handle for range/overlap queries
+ Returns : a DBI statement handle
+ Args    : see below
+ Status  : Protected
+
+This method constructs the statement handle for this module's central
+query: given a range and/or a list of feature types, fetch their GFF
+records.  It overrides a method in dbi.pm so that the overlaps query
+can write SQL optimized for Postgres.  Specifically, instead of writing
+the bin related section as a set of ORs, each bin piece is place in 
+a separate select and then they are UNIONed together.  This subroutine
+requires several replacements for other subroutines in dbi.pm.  In this 
+module, they are named the same as those in dbi.pm but prefixed with 
+"pg_".
+
+The positional arguments are as follows:
+
+  Argument               Description
+
+  $isrange               A flag indicating that this is a range.
+                         query.  Otherwise an overlap query is
+                         assumed.
+
+  $refseq                The reference sequence name (undef if no range).
+
+  $refclass              The reference sequence class (undef if no range).
+
+  $start                 The start of the range (undef if none).
+
+  $stop                  The stop of the range (undef if none).
+
+  $types                 Array ref containing zero or feature types in the
+                         format [method,source].
+
+  $order_by_group        A flag indicating that statement handler should group
+                         the features by group id (handy for iterative fetches)
+
+  $attributes            A hash containing select attributes.
+
+  $binsize               A bin size for generating tables of feature density.
+
+=cut
+
+sub range_query {
+  my $self = shift;
+  my($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes,$bin) = @_;
+
+  my $dbh = $self->features_db;
+
+  #  my @bin_parts = split /\n\s+OR/, $self->bin_query($start,$stop);
+  #  warn "bin_part: @bin_parts\n";
+
+  my %a             = (refseq=>$refseq,class=>$class,start=>$start,stop=>$stop,types=>$types,attributes=>$attributes,bin_width=>$bin);
+  my ($query, @args, $order_by);
+
+  if ($rangetype ne 'overlaps') {
+
+    my $select        = $self->make_features_select_part(\%a);
+    my $from          = $self->make_features_from_part($sparse,\%a);
+    my $join          = $self->make_features_join_part(\%a);
+    my $where;
+       ($where, at args) = $self->make_features_by_range_where_part($rangetype,\%a);
+    my ($group_by, at more_args) = $self->make_features_group_by_part(\%a);
+       $order_by      = $self->make_features_order_by_part(\%a) if $order_by_group;
+
+    $query         = "SELECT $select FROM $from WHERE $join";
+    $query           .= " AND $where" if $where;
+
+    if ($group_by) {
+      $query           .= " GROUP BY $group_by";
+      push @args, at more_args;
+    }
+
+  } else {  # most common case: overlaps query
+
+    my @bin_parts            = split /\s*OR/, $self->bin_query($start,$stop);
+    my $select               = $self->make_features_select_part(\%a);
+    my $from                 = $self->make_features_from_part($sparse,\%a);
+    my $join                 = $self->make_features_join_part(\%a);
+    my $where;
+    ($where, at args)           = $self->pg_make_features_by_range_where_part($rangetype,\%a);
+    my ($group_by, at more_args)= $self->make_features_group_by_part(\%a);
+    $order_by                = $self->pg_make_features_order_by_part(\%a) if $order_by_group;
+
+    my @temp_args;
+    my @query_pieces; 
+    foreach my $bin (@bin_parts) {
+      my $temp_query = "SELECT $select FROM $from WHERE $join AND $where AND $bin\n"; 
+      push @temp_args, @args;
+
+      if ($group_by) {
+        $temp_query    .= " GROUP BY $group_by";
+        push @temp_args, at more_args;
+      }
+
+      push @query_pieces, $temp_query;
+    }
+    
+    @args             = @temp_args;
+    $query            = join("UNION\n", @query_pieces); 
+
+  }
+
+  $query           .= " ORDER BY $order_by" if $order_by;
+
+  $self->dbh->do('set enable_seqscan=off');
+  my $sth = $self->dbh->do_query($query, at args);
+  $sth;
+}
+
+sub pg_make_features_by_range_where_part {
+  my $self = shift;
+  my ($rangetype,$options) = @_;
+
+  return unless $rangetype eq 'overlaps';
+
+  $options ||= {};
+  my ($refseq,$class,$start,$stop,$types,$attributes) =
+    @{$options}{qw(refseq class start stop types attributes)};
+
+  my (@query, at args);
+
+  if ($refseq) {
+    my ($q, at a) = $self->refseq_query($refseq,$class);
+    push @query,$q;
+    push @args, at a;
+  }
+
+  if (defined $start or defined $stop) {
+    $start = 0               unless defined($start);
+    $stop  = MAX_SEGMENT     unless defined($stop);
+
+    my ($range_query, at range_args) = $self->pg_overlap_query($start,$stop);
+
+    push @query,$range_query;
+    push @args, at range_args;
+  }
+
+  if (defined $types && @$types) {
+    my ($type_query, at type_args) = $self->types_query($types);
+    push @query,$type_query;
+    push @args, at type_args;
+  }
+
+  if ($attributes) {
+    my ($attribute_query, at attribute_args) = $self->make_features_by_attribute_where_part($attributes);
+    push @query,"($attribute_query)";
+    push @args, at attribute_args;
+  }
+
+  my $query = join "AND", at query;
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+sub pg_overlap_query {
+  my $self = shift;
+  my ($start,$stop) = @_;
+
+  my ($iq, at iargs) = $self->overlap_query_nobin($start,$stop);
+  my $query = "\n$iq\n";
+  my @args = @iargs;
+
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+sub pg_make_features_order_by_part {
+  my $self = shift;
+  my $options = shift || {};
+  return "gname";
+}
+
+=head2 search_notes
+
+This PostgreSQL adaptor does not implement the search notes method
+because it can be very slow (although the code for the method is
+contained in this method but commented out).
+There is, however, a PostgreSQL adaptor that does implement it in
+a more efficient way: L<Bio::DB::GFF::Adaptor::dbi::pg_fts>,
+which inherits from this adaptor and uses the optional PostgreSQL
+module TSearch2 for full text indexing.  See that adaptor's
+documentation for more information.
+
+See also L<Bio::DB::GFF>
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string, using mysql full-text search
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is a replacement for the mysql-specific method.  Given a search string, it
+performs a ILIKE search of the notes table and returns an array of results.
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+Note that for large databases this can be very slow and may result in
+time out or 500-cgi errors.  If this is happening on a regular basis,
+you should look into using L<Bio::DB::GFF::Adaptor::dbi::pg_fts> which
+implements the TSearch2 full text indexing scheme.
+
+=cut
+
+sub search_notes{
+#  my $self = shift;
+#  my ($search_string,$limit) = @_;
+#
+#  $search_string =~ tr/*/%/s;
+#  $search_string =  '%'.$search_string unless $search_string =~ /^\%/;
+#  $search_string =  $search_string.'%' unless $search_string =~ /\%$/;
+#  warn "search_string:$search_string";
+#  my $query = FULLTEXTWILDCARD;
+#  $query   .= " limit $limit" if defined $limit;
+#  my $sth   = $self->dbh->do_query($query,$search_string);
+#
+#  my @results;
+#  while (my ($class,$name,$note) = $sth->fetchrow_array) {
+#
+#     next unless $class && $name;    # sorry, ignore NULL objects
+#     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+#
+#     push @results,[$featname,$note,0]; #gbrowse expects a score, but
+#                                        #pg doesn't give one, thus the 0
+#  }
+#  warn @results;
+#
+#  return @results;
+}
+
+
+=head2 make_meta_set_query
+
+ Title   : make_meta_set_query
+ Usage   : $sql = $db->make_meta_set_query
+ Function: return SQL fragment for setting a meta parameter
+ Returns : SQL fragment
+ Args    : none
+ Status  : public
+
+By default this does nothing; meta parameters are not stored or
+retrieved.
+
+=cut
+
+sub make_meta_set_query {
+   return 'INSERT INTO fmeta VALUES (?,?)';
+}
+
+sub make_classes_query {
+  my $self = shift;
+  return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT gclass IS NULL';
+}
+
+
+sub chunk_size {
+  my $self = shift;
+  $self->meta('chunk_size') || DEFAULT_CHUNK;
+}
+
+sub getseqcoords_query {
+   my $self = shift;
+   return GETSEQCOORDS ;
+}
+
+sub getaliascoords_query{
+  my $self = shift;
+  return GETALIASCOORDS ;
+}
+
+
+sub getforcedseqcoords_query{
+  my $self = shift;
+  return GETFORCEDSEQCOORDS ;
+}
+
+
+sub getaliaslike_query{
+  my $self = shift;
+  return GETALIASLIKE ;
+}
+
+
+sub make_features_select_part {
+  my $self = shift;
+  my $options = shift || {};
+  my $s;
+  if (my $b = $options->{bin_width}) {
+
+    $s = <<END;
+fref,
+  1+$b*floor(fstart/$b)   as fstart,
+  $b*(1+floor(fstart/$b)) as fstop,
+  CASE WHEN fsource IS NULL THEN fmethod
+       ELSE fmethod||':'||fsource,
+  'bin',
+  count(*) as fscore,
+  '.','.','bin',
+  CASE WHEN fsource IS NULL THEN fref||':'||fmethod
+       ELSE fref||':'||fmethod||':'||fsource,
+  NULL,NULL,NULL,NULL
+END
+;
+  } else {
+    $s = <<END;
+fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,fgroup.gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
+END
+;
+}
+  $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
+  $s;
+}
+
+sub make_features_from_part_bkup {
+  my $self = shift;
+  my $sparse = shift;
+  my $options = shift || {};
+  #my $index = $sparse ? ' USE INDEX(ftypeid)': '';
+  my $index =  '';
+  return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
+                                : "fdata${index},ftype,fgroup\n";
+}
+
+
+####################################
+# moved from mysqlopt.pm
+###################################
+# meta values
+sub default_meta_values {
+  my $self = shift;
+  my @values = $self->SUPER::default_meta_values;
+  return (
+	  @values,
+	  max_bin => MAX_BIN,
+	  min_bin => MIN_BIN,
+	  straight_join_limit => STRAIGHT_JOIN_LIMIT,
+	 );
+}
+
+sub min_bin {
+  my $self = shift;
+  return $self->meta('min_bin') || MIN_BIN;
+}
+sub max_bin {
+  my $self = shift;
+  return $self->meta('max_bin') || MAX_BIN;
+}
+sub straight_join_limit {
+  my $self = shift;
+  return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT;
+}
+
+
+sub _feature_by_name {
+  my $self = shift;
+  my ($class,$name,$location,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my @bin_parts      = split /\s*OR/, $self->bin_query($location->[1],$location->[2]) if $location;
+  my $select         = $self->make_features_select_part;
+  my $from           = $self->make_features_from_part(undef,{sparse_groups=>1});
+  my ($where, at args)  = $self->make_features_by_name_where_part($class,$name);
+  my $join           = $self->make_features_join_part;
+  my $range          = $self->pg_make_features_by_range_where_part('overlaps',
+                                                                {refseq=>$location->[0],
+                                                                 class =>'',
+                                                                 start=>$location->[1],
+                                                                 stop =>$location->[2]}) if $location;
+
+  my @temp_args;
+  my @query_pieces;
+  my $query;
+  if (@bin_parts) {
+    foreach my $bin (@bin_parts) {
+      my $temp_query = "SELECT $select FROM $from WHERE $join AND $where AND $range AND $bin\n";
+      push @temp_args, @args;
+      push @query_pieces, $temp_query;
+    }
+
+    @args  = @temp_args;
+    $query = join("UNION\n", @query_pieces);
+
+  } else {
+    $query  = "SELECT $select FROM $from WHERE $where AND $join";
+  }
+
+  my $sth    = $self->dbh->do_query($query, at args);
+
+  my $count = 0;
+  while (my @row = $sth->fetchrow_array) {
+    $callback->(@row);
+    $count++;
+  }
+  $sth->finish;
+  return $count;
+}
+
+sub update_sequences {
+  my $self = shift;
+  my $dbh  = $self->features_db;
+ 
+  $dbh->do("SELECT setval('public.fdata_fid_seq', max(fid)+1) FROM fdata");
+  $dbh->do("SELECT setval('public.fattribute_fattribute_id_seq', max(fattribute_id)+1) FROM fattribute");
+  $dbh->do("SELECT setval('public.fgroup_gid_seq', max(gid)+1) FROM fgroup");
+  $dbh->do("SELECT setval('public.ftype_ftypeid_seq', max(ftypeid)+1) FROM ftype");
+
+  1;
+}
+
+=head2 make_features_by_name_where_part
+
+ Title   : make_features_by_name_where_part
+ Usage   : $db->make_features_by_name_where_part
+ Function: Overrides a function in Bio::DB::GFF::Adaptor::dbi to insure
+           that searches will be case insensitive. It creates the SQL
+           fragment needed to select a feature by its group name & class
+ Returns : a SQL fragment and bind arguments
+ Args    : see below
+ Status  : Protected
+
+=cut
+
+sub make_features_by_name_where_part {
+  my $self = shift;
+  my ($class,$name) = @_;
+
+  if ($name !~ /\*/) {
+    #allows utilization of an index on lower(gname)
+    return ("fgroup.gclass=? AND lower(fgroup.gname) = lower(?)",$class,$name);
+  }
+  else {
+    $name =~ tr/*/%/;
+    return ("fgroup.gclass=? AND lower(fgroup.gname) LIKE lower(?)",$class,$name);
+  }
+}
+
+#
+# Methods from dbi.pm that need to be overridden to make
+# searching for fref case insensitive
+#
+#
+sub get_dna {
+  my $self = shift;
+  my ($ref,$start,$stop,$class) = @_;
+
+  my ($offset_start,$offset_stop);
+
+  my $has_start = defined $start;
+  my $has_stop  = defined $stop;
+
+  my $reversed;
+  if ($has_start && $has_stop && $start > $stop) {
+    $reversed++;
+    ($start,$stop) = ($stop,$start);
+  }
+
+  # turn start and stop into 0-based offsets
+  my $cs = $self->dna_chunk_size;
+  $start -= 1;  $stop -= 1;
+  $offset_start = int($start/$cs)*$cs;
+  $offset_stop  = int($stop/$cs)*$cs;
+
+  my $sth;
+  # special case, get it all
+  if (!($has_start || $has_stop)) {
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) order by foffset',$ref);
+  }
+
+  elsif (!$has_stop) {
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) and foffset>=? order by foffset',
+                                $ref,$offset_start);
+  }
+
+  else {  # both start and stop defined
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) and foffset>=? and foffset<=? order by foffset',
+                                $ref,$offset_start,$offset_stop);
+  }
+
+  my $dna = '';
+  while (my($frag,$offset) = $sth->fetchrow_array) {
+      substr($frag,0,$start-$offset) = '' if $has_start && $start > $offset;
+      $dna .= $frag;
+  }
+  substr($dna,$stop-$start+1) = '' if $has_stop && $stop-$start+1 < length($dna);
+  if ($reversed) {
+    $dna = reverse $dna;
+    $dna =~ tr/gatcGATC/ctagCTAG/;
+  }
+
+  $sth->finish;
+  $dna;
+}
+
+
+sub refseq_query {
+  my $self = shift;
+  my ($refseq,$refclass) = @_;
+  my $query = "lower(fdata.fref)=lower(?)";
+  return wantarray ? ($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
+}
+
+sub make_types_where_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count,$typelist) = @_;
+  my (@query, at args);
+  if (defined($srcseq)) {
+    push @query,'lower(fdata.fref)=lower(?)';
+    push @args,$srcseq;
+    if (defined $start or defined $stop) {
+      $start = 1           unless defined $start;
+      $stop  = MAX_SEGMENT unless defined $stop;
+      my ($q, at a) = $self->overlap_query($start,$stop);
+      push @query,"($q)";
+      push @args, at a;
+    }
+  }
+  if (defined $typelist && @$typelist) {
+    my ($q, at a) = $self->types_query($typelist);
+    push @query,($q);
+    push @args, at a;
+  }
+  my $query = @query ? join(' AND ', at query) : '1=1';
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+sub get_feature_id {
+  my $self = shift;
+  my ($ref,$start,$stop,$typeid,$groupid) = @_;
+  my $s = $self->{load_stuff};
+  unless ($s->{get_feature_id}) {
+    my $dbh = $self->features_db;
+    $s->{get_feature_id} =
+      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE lower(fref)=lower(?) AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
+  }
+  my $sth = $s->{get_feature_id} or return;
+  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
+  my ($fid) = $sth->fetchrow_array;
+  return $fid;
+}
+
+sub _delete {
+  my $self = shift;
+  my $delete_spec = shift;
+  my $ranges      = $delete_spec->{segments} || [];
+  my $types       = $delete_spec->{types}    || [];
+  my $force       = $delete_spec->{force};
+  my $range_type  = $delete_spec->{range_type};
+  my $dbh         = $self->features_db;
+
+  my $query = 'delete from fdata';
+  my @where;
+
+  my @range_part;
+  for my $segment (@$ranges) {
+    my $ref   = $dbh->quote($segment->abs_ref);
+    my $start = $segment->abs_start;
+    my $stop  = $segment->abs_stop;
+    my $range =  $range_type eq 'overlaps'     ? $self->overlap_query($start,$stop)
+               : $range_type eq 'contains'     ? $self->contains_query($start,$stop)
+               : $range_type eq 'contained_in' ? $self->contained_in_query($start,$stop)
+               : $self->throw("Invalid range type '$range_type'");
+    push @range_part,"(lower(fref)=lower($ref) AND $range)";
+  }
+  push @where,'('. join(' OR ', at range_part).')' if @range_part;
+
+  # get all the types
+  if (@$types) {
+    my $types_where = $self->types_query($types);
+    my $types_query = "select ftypeid from ftype where $types_where";
+    my $result      = $dbh->selectall_arrayref($types_query);
+    my @typeids     = map {$_->[0]} @$result;
+    my $typelist    = join ',',map{$dbh->quote($_)} @typeids;
+    $typelist ||= "0"; # don't cause DBI to die with invalid SQL when
+                       # unknown feature types were requested.
+    push @where,"(ftypeid in ($typelist))";
+  }
+  $self->throw("This operation would delete all feature data and -force not specified")
+    unless @where || $force;
+  $query .= " where ".join(' and ', at where) if @where;
+  warn "$query\n" if $self->debug;
+  my $result = $dbh->do($query);
+  defined $result or $self->throw($dbh->errstr);
+  $result;
+}
+
+sub make_abscoord_query {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  #my $query = GETSEQCOORDS;
+  my $query = $self->getseqcoords_query();
+  my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
+  if ($name =~ /\*/) {
+    $name =~ s/%/\\%/g;
+    $name =~ s/_/\\_/g;
+    $name =~ tr/*/%/;
+    $query =~ s/gname\) = lower/gname) LIKE lower/;
+  }
+  defined $refseq 
+    ? $self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq)
+    : $self->dbh->do_query($query,$name,$class);
+}
+
+sub make_aliasabscoord_query {
+  my $self = shift;
+  my ($name,$class) = @_;
+  #my $query = GETALIASCOORDS;
+  my $query = $self->getaliascoords_query();
+  if ($name =~ /\*/) {
+    $name =~ s/%/\\%/g;
+    $name =~ s/_/\\_/g;
+    $name =~ tr/*/%/;
+    $query =~ s/gname\) = lower/gname) LIKE lower/;
+  }
+  $self->dbh->do_query($query,$name,$class);
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg_fts.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg_fts.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi/pg_fts.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,362 @@
+package Bio::DB::GFF::Adaptor::dbi::pg_fts;
+
+# $Id: pg_fts.pm,v 1.2.4.1 2006/10/02 23:10:16 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi::pg_fts -- Database adaptor for a specific postgres schema with a TSearch2 implementation
+
+=head1 SYNOPSIS
+
+    #create new GFF database connection
+    my $db      = Bio::DB::GFF->new( -adaptor => 'dbi::pg_fts',
+                                     -dsn     => 'dbi:Pg:dbname=worm');
+
+    #add full text indexing 'stuff'
+    #assumes that TSearch2 is available to PostgreSQL
+    #this will take a VERY long time for a reasonably large database
+    $db->install_TSearch2();
+
+    ...some time later...
+    #we don't like full text searching...
+    $db->remove_TSearch2();
+
+=head1 DESCRIPTION
+
+This adaptor is based on Bio::DB::GFF::Adaptor::dbi::pg but it implements
+the TSearch2 PostgreSQL contrib module for fast full text searching.  To
+use this module with your PostgreSQL GFF database, you need to make
+TSearch2 available in the database. 
+
+To use this adaptor, follow these steps:
+
+=over
+
+=item Install TSearch2 contrib module for Pg
+
+Can be as easy as `sudo yum install postgresql-contrib`, or you may
+need to recompile PostgreSQL to include it.  See
+L<http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html>
+for more details
+
+=item Load the TSearch2 functions to you database
+
+  % cat tsearch2.sql | psql <your database>
+
+=item Load your data using the pg adaptor:
+
+ % bp_pg_bulk_load_gff.pl -c -d yeast saccharomyces_cerevisiae.gff
+
+or
+
+ % bp_load_gff.pl -c -d yeast -a dbi::pg saccharomyces_cerevisiae.gff
+
+=item Add GFF/TSearch2 specific modifications
+
+Execute a perl script like this one:
+
+  #!/usr/bin/perl -w
+  use strict;
+
+  use Bio::DB::GFF;
+
+  my $db = Bio::DB::GFF->new(
+      -adaptor   => 'dbi::pg_fts',
+      -dsn       => 'dbi:Pg:dbname=yeast',
+      -user      => 'scott',
+    );
+
+  print "Installing TSearch2 columns...\n";
+
+  $db->install_TSearch2();
+
+  print "Done\n";
+
+=back
+
+Note that this last step will take a long time.  For a S. cerevisiae
+database with 15K rows, it took over an hour on my laptop, and
+with a C. elegans database (~10 million rows) it took well over a day.
+
+If at some point you add more data you your database, you need to run
+a similar script to the one above, only executing the update_TSearch2()
+method.  Finally, if you want to remove the TSearch2 columns from your 
+database and go back to using the pg adaptor, you can execute a script
+like the one above, only executing the remove_TSearch2() method.
+
+=head1 NOTES ABOUT TSearch2 SEARCHING
+
+You should know a few things about how searching with TSearch2 works in
+the GBrowse enviroment:
+
+=over
+
+=item 1
+
+TSearch2 does not do wild cards, so you should encourage your users not
+to use them.  If wild cards are used, the adaptor will fall back on 
+an ILIKE search, which will be much slower.
+
+=item 2
+
+However, TSearch2 does do 'word stemming'.  That is, if you search
+for 'copy', it will find 'copy', 'copies', and 'copied'.
+
+=item 3
+
+TSearch2 does not do phrase searching; all of the terms in the
+search string are ANDed together.
+
+=back
+
+=head1 ACKNOWLEDGEMENTS
+
+Special thanks to Russell Smithies and Paul Smale at AgResearch in
+New Zealand for giving me their recipe for doing full text indexing
+in a GFF database.
+
+=head1 BUGS
+
+Please report bugs to the BioPerl and/or GBrowse mailing lists
+(L<mailto:bioperl-l at lists.open-bio.org> and L<mailto:gmod-gbrowse at lists.sourceforge.net>
+respectively).
+
+=head1 SEE ALSO
+
+Please see L<Bio::DB::GFF::Adaptor::dbi::pg> for more information
+about tuning your PostgreSQL server for GFF data, and for general
+information about GFF database access, see L<Bio::DB::GFF>.
+
+=head1 AUTHOR
+
+Scott Cain, cain at cshl.edu
+
+=head1 APPENDIX
+
+=cut
+
+# a simple postgres adaptor
+use strict;
+use Bio::DB::GFF::Adaptor::dbi;
+use base qw(Bio::DB::GFF::Adaptor::dbi::pg);
+
+use constant FULLTEXTSEARCH => <<END;
+SELECT distinct gclass,gname,fattribute_value
+    FROM fgroup,fattribute_to_feature,fdata
+     WHERE fgroup.gid=fdata.gid
+       AND fdata.fid=fattribute_to_feature.fid
+       AND (fattribute_to_feature.idxfti @@ to_tsquery('default', ?))
+END
+;
+
+use constant FULLTEXTWILDCARD => <<END;
+SELECT distinct gclass,gname,fattribute_value
+    FROM fgroup,fattribute_to_feature,fdata
+     WHERE fgroup.gid=fdata.gid
+       AND fdata.fid=fattribute_to_feature.fid
+       AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
+END
+;
+
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+  return $self;
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text string",$limit)
+ Function: Search the notes for a text string, using PostgreSQL TSearch2
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is based on the mysql-specific method that makes use of the TSearch2
+functionality in PosgreSQL's contrib directory. Given a search string,
+it performs a full-text search of the notes table and returns an array
+of results.  Each row of the returned array is a arrayref containing
+the following fields:
+
+  column 1   A Bio::DB::GFF::Featname object, for passing to segment()
+  column 2   The text of the note
+  column 3   A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  my @terms = split /\s+/, $search_string;
+
+  my $sth;
+  if ($search_string =~ /\*/) {
+      $search_string =~ tr/*/%/s;
+      my $query = FULLTEXTWILDCARD;
+      $query   .= " limit $limit" if defined $limit;
+      $sth      = $self->dbh->do_query($query,$search_string);
+  }
+  elsif (@terms == 1) {
+      my $query = FULLTEXTSEARCH;
+      $query   .= " limit $limit" if defined $limit;
+      $sth      = $self->dbh->do_query($query,$search_string);
+  }
+  else {
+      my $query = FULLTEXTSEARCH;
+      my $andstring = join (' & ', @terms);
+#      $query   .= qq{ AND (fattribute_to_feature.fattribute_value ILIKE '\%$search_string%')};
+      $query   .= " LIMIT $limit" if defined $limit;
+      $sth      = $self->dbh->do_query($query,$andstring);
+  } 
+  
+  my @results;
+  while (my ($class,$name,$note) = $sth->fetchrow_array) {
+
+     next unless $class && $name;    # sorry, ignore NULL objects
+     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+
+     push @results,[$featname,$note,0]; #gbrowse expects a score, but
+                                        #pg doesn't give one, thus the 0
+  }
+
+  return @results;
+}
+
+=head2 make_features_by_name_where_part
+
+ Title   : make_features_by_name_where_part
+ Function: constructs a TSearch2-compliant WHERE clause for a name search
+ Status  : protected
+
+=cut
+
+#need a make_features_by_name_where_part method to override pg
+sub make_features_by_name_where_part {
+  my $self = shift;
+  my ($class,$name) = @_;
+
+  my @terms = split /\s+/, $name; 
+
+  if ($name =~ /\*/) {
+    $name =~ tr/*/%/s;
+    return ("fgroup.gclass=? AND lower(fgroup.gname) LIKE lower(?)",$class,$name);
+  }
+  else {
+    my $where_str = "fgroup.gclass=? AND (fgroup.idxfti @@ to_tsquery('default', ?)) ";
+    if (@terms == 1) {
+      return ($where_str,$class,$name);
+    }
+    else {
+      my $andstring = join (' & ', @terms);
+#      $where_str .= qq{ AND (fgroup.gname ILIKE '\%$name%')};
+      return ($where_str,$class,$andstring); 
+    }
+  }
+}
+
+=head2 install_TSearch2
+
+ Title   : install_TSearch2
+ Function: installs schema modifications for use with TSearch2
+ Usage   : $db->install_TSearch2
+ Status  : public
+
+=cut
+
+
+#needs method for installing TSearch2 (does that mean that the SQL for
+#creating the tables and functions should go in here?  That would be
+#the safest and easiest thing to do
+sub install_TSearch2 {
+  my $self = shift;
+
+  my $dbh = $self->features_db;
+
+  $dbh->do('ALTER TABLE fattribute_to_feature ADD COLUMN idxFTI tsvector') 
+     or $self->throw('adding FTI column to f_to_f failed');
+
+  $dbh->do('ALTER TABLE fgroup ADD COLUMN idxFTI tsvector')
+     or $self->throw('adding FTI column to fgroup failed');
+
+  $self->update_TSearch2();
+
+  return;
+}
+
+=head2 update_TSearch2
+
+ Title   : update_TSearch2
+ Function: Updates TSearch2 columns
+ Usage   : $db->update_TSearch2
+ Status  : public
+
+=cut
+
+
+sub update_TSearch2 {
+  my $self = shift;
+
+  my $dbh = $self->features_db;
+
+  $self->warn('updating full text column; this may take a very long time...');
+  $dbh->do("UPDATE fattribute_to_feature "
+          ."SET idxFTI= to_tsvector('default', fattribute_value) "
+          ."WHERE idxFTI IS NULL") 
+       or $self->throw('updating fti column failed');
+  $dbh->do("UPDATE fgroup "
+          ."SET idxFTI= to_tsvector('default', gname) "
+          ."WHERE idxFTI IS NULL")
+       or $self->throw('updating fgroup fti column failed');
+
+  $self->warn('Preliminary optimization of database; this may also take a long time...');
+  $dbh->do('VACUUM FULL ANALYZE')
+       or $self->throw('vacuum failed');
+
+  $self->warn('Updating full text index; again, this may take a long time');
+  $dbh->do('CREATE INDEX idxFTI_idx ON fattribute_to_feature '
+          .'USING gist(idxFTI)')
+       or $self->warn('creating full text index failed');
+  $dbh->do('CREATE INDEX fgroup_idxFTI_idx ON fgroup '
+          .'USING gist(idxFTI)')
+       or $self->warn('creating fgroup full text index failed');
+
+  $self->warn('Optimizing database; hopefully, this will not take as long as other steps');
+  $dbh->do('VACUUM FULL ANALYZE');
+  $dbh->do("SELECT set_curcfg('default')");
+
+  return;
+}
+
+=head2 remove_TSearch2
+
+ Title   : remove_TSearch2
+ Function: Removes TSearch2 columns
+ Usage   : $db->remove_TSearch2
+ Status  : public
+
+=cut
+
+sub remove_TSearch2 {
+  my $self = shift;
+
+  my $dbh = $self->features_db;
+
+  $self->warn('Removing full text search capabilities');
+  $dbh->do('DROP INDEX idxFTI_idx')
+     or $self->throw('dropping full text index failed');
+  $dbh->do('DROP INDEX fgroup_idxFTI_idx')
+     or $self->throw('dropping full text index failed');
+
+  $dbh->do('ALTER TABLE fattribute_to_feature DROP COLUMN idxFTI')
+     or $self->throw('dropping full text column failed');
+  $dbh->do('ALTER TABLE fgroup DROP COLUMN idxFTI')
+     or $self->throw('dropping full text column failed');
+
+
+  return;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/dbi.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2117 @@
+# $Id: dbi.pm,v 1.60.4.1 2006/10/02 23:10:16 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::dbi -- Database adaptor for DBI (SQL) databases
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>
+
+=head1 DESCRIPTION
+
+This is the base class for DBI-based adaptors.  It does everything
+except generating the text of the queries to be used.  See the section
+QUERIES TO IMPLEMENT for the list of methods that must be implemented.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::dbi;
+
+# base class for dbi-based implementations
+use strict;
+
+use DBI;
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::GFF::Util::Binning;
+use Bio::DB::GFF::Adaptor::dbi::iterator;
+use Bio::DB::GFF::Adaptor::dbi::caching_handle;
+
+use base qw(Bio::DB::GFF);
+
+# constants for choosing
+
+use constant MAX_SEGMENT => 100_000_000;  # the largest a segment can get
+
+# this is the largest that any reference sequence can be (100 megabases)
+use constant MAX_BIN    => 100_000_000;
+
+# this is the smallest bin (1 K)
+use constant MIN_BIN    => 1000;
+
+# size of range over which it is faster to force the database to use the range for indexing
+use constant STRAIGHT_JOIN_LIMIT => 200_000;
+
+# this is the size to which DNA should be shredded
+use constant DNA_CHUNK_SIZE  => 2000;
+
+# for debugging fbin optimization
+use constant EPSILON  => 1e-7;  # set to zero if you trust mysql's floating point comparisons
+use constant OPTIMIZE => 1;     # set to zero to turn off optimization completely
+
+##############################################################################
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::GFF->new(@args)
+ Function: create a new adaptor
+ Returns : a Bio::DB::GFF object
+ Args    : see below
+ Status  : Public
+
+This is the constructor for the adaptor.  It is called automatically
+by Bio::DB::GFF-E<gt>new.  In addition to arguments that are common among
+all adaptors, the following class-specific arguments are recgonized:
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040'
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+=cut
+
+# Create a new Bio::DB::GFF::Adaptor::dbi object
+sub new {
+  my $class = shift;
+  my ($features_db,$username,$auth,$other) = rearrange([
+							[qw(FEATUREDB DB DSN)],
+							[qw(USERNAME USER)],
+							[qw(PASSWORD PASSWD PASS)],
+						       ], at _);
+
+  $features_db  || $class->throw("new(): Provide a data source or DBI database");
+
+  if (!ref($features_db)) {
+    my $dsn = $features_db;
+    my @args;
+    push @args,$username if defined $username;
+    push @args,$auth     if defined $auth;
+    $features_db = Bio::DB::GFF::Adaptor::dbi::caching_handle->new($dsn, at args)
+      || $class->throw("new(): Failed to connect to $dsn: "
+		       . Bio::DB::GFF::Adaptor::dbi::caching_handle->errstr);
+  } else {
+    $features_db->isa('DBI::db') 
+      || $class->throw("new(): $features_db is not a DBI handle");
+  }
+
+  # fill in object
+  return bless {
+		features_db => $features_db
+	       },$class;
+}
+
+sub debug {
+  my $self = shift;
+  $self->features_db->debug(@_);
+  $self->SUPER::debug(@_);
+}
+
+=head2 features_db
+
+ Title   : features_db
+ Usage   : $dbh = $db->features_db
+ Function: get database handle
+ Returns : a DBI handle
+ Args    : none
+ Status  : Public
+
+ Note: what is returned is not really a DBI::db handle, but a
+ subclass of one.  This means that you cannot manipulate the
+ handle's attributes directly.  Instead call the attribute
+ method:
+
+ my $dbh = $db->features_db;
+ $dbh->attribute(AutoCommit=>0);
+
+=cut
+
+sub features_db { shift->{features_db} }
+sub dbh         { shift->{features_db} }
+
+=head2 get_dna
+
+ Title   : get_dna
+ Usage   : $string = $db->get_dna($name,$start,$stop,$class)
+ Function: get DNA string
+ Returns : a string
+ Args    : name, class, start and stop of desired segment
+ Status  : Public
+
+This method performs the low-level fetch of a DNA substring given its
+name, class and the desired range.  It is actually a front end to the
+abstract method make_dna_query(), which it calls after some argument
+consistency checking.
+
+=cut
+
+sub get_dna {
+  my $self = shift;
+  my ($ref,$start,$stop,$class) = @_;
+  
+  my ($offset_start,$offset_stop);
+
+  my $has_start = defined $start;
+  my $has_stop  = defined $stop;
+
+  my $reversed;
+  if ($has_start && $has_stop && $start > $stop) {
+    $reversed++;
+    ($start,$stop) = ($stop,$start);
+  }
+
+  # turn start and stop into 0-based offsets
+  my $cs = $self->dna_chunk_size;
+  $start -= 1;  $stop -= 1;
+  $offset_start = int($start/$cs)*$cs;
+  $offset_stop  = int($stop/$cs)*$cs;
+
+  my $sth;
+  # special case, get it all
+  if (!($has_start || $has_stop)) {
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? order by foffset',$ref);
+  }
+
+  elsif (!$has_stop) {
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? and foffset>=? order by foffset',
+				$ref,$offset_start);
+  }
+
+  else {  # both start and stop defined
+    $sth = $self->dbh->do_query('select fdna,foffset from fdna where fref=? and foffset>=? and foffset<=? order by foffset',
+				$ref,$offset_start,$offset_stop);
+  }
+
+  my $dna = '';
+  while (my($frag,$offset) = $sth->fetchrow_array) {
+      substr($frag,0,$start-$offset) = '' if $has_start && $start > $offset;
+      $dna .= $frag;
+  }
+  substr($dna,$stop-$start+1) = '' if $has_stop && $stop-$start+1 < length($dna);
+  if ($reversed) {
+    $dna = reverse $dna;
+    $dna =~ tr/gatcGATC/ctagCTAG/;
+  }
+
+  $sth->finish;
+  $dna;
+}
+
+
+=head2 get_abscoords
+
+ Title   : get_abscoords
+ Usage   : ($refseq,$refclass,$start,$stop,$strand) = $db->get_abscoords($name,$class)
+ Function: get absolute coordinates for landmark
+ Returns : an array ref -- see below
+ Args    : name and class of desired landmark
+ Status  : Public
+
+This method performs the low-level resolution of a landmark into a
+reference sequence and position.
+
+The result is an array ref, each element of which is a five-element
+list containing reference sequence name, class, start, stop and strand.
+
+=cut
+
+sub get_abscoords {
+  my $self = shift;
+  my ($name,$class,$refseq)  = @_;
+
+  my $sth = $self->make_abscoord_query($name,$class,$refseq);
+
+  my @result;
+  while (my @row = $sth->fetchrow_array) {
+    push @result,\@row
+  }
+  $sth->finish;
+
+  if (@result == 0) {
+    #$self->error("$name not found in database");
+    my $sth2 = $self->make_aliasabscoord_query($name,$class);
+
+    while (my @row2 = $sth2->fetchrow_array) {
+        push @result,\@row2
+    }
+    $sth->finish;
+
+    if (@result == 0){
+        $self->error("$name not found in database");
+        return;
+    }
+  }
+  return \@result;
+}
+
+
+=head2 get_features
+
+ Title   : get_features
+ Usage   : $db->get_features($search,$options,$callback)
+ Function: retrieve features from the database
+ Returns : number of features retrieved
+ Args    : see below
+ Status  : Public
+
+This is the low-level method that is called to retrieve GFF lines from
+the database.  It is responsible for retrieving features that satisfy
+range and feature type criteria, and passing the GFF fields to a
+callback subroutine.
+
+See the manual page for Bio::DB::GFF for the interpretation of the
+arguments and how the information retrieved by get_features is passed
+to the callback for processing.
+
+Internally, get_features() is a front end for range_query().  The
+latter method constructs the query and executes it.  get_features()
+calls fetchrow_array() to recover the fields and passes them to the
+callback.
+
+=cut
+
+# Given sequence name, range, and optional filter, retrieve list of
+# all features.  Passes features through callback.
+sub get_features {
+  my $self = shift;
+  my ($search,$options,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $sth = $self->range_query(@{$search}{qw(rangetype
+					     refseq
+					     refclass
+					     start
+					     stop
+					     types) },
+			       @{$options}{qw(
+					      sparse
+					      sort_by_group
+					      ATTRIBUTES
+					      BINSIZE)}) or return;
+
+  my $count = 0;
+  while (my @row = $sth->fetchrow_array) {
+    $callback->(@row);
+    $count++;
+  }
+  $sth->finish;
+  return $count;
+}
+
+=head2 classes
+
+ Title   : classes
+ Usage   : $db->classes
+ Function: return list of landmark classes in database
+ Returns : a list of classes
+ Args    : none
+ Status  : public
+
+This routine returns the list of reference classes known to the
+database, or empty if classes are not used by the database.  Classes
+are distinct from types, being essentially qualifiers on the reference
+namespaces.
+
+NOTE: In the current mysql-based schema, this query takes a while to
+run due to the classes not being normalized.
+
+=cut
+
+sub classes {
+  my $self = shift;
+  my ($query, at args) = $self->make_classes_query or return;
+  my $sth           = $self->dbh->do_query($query, at args);
+  my @classes;
+  while (my ($c) = $sth->fetchrow_array) {
+     push @classes,$c;
+  }
+  @classes;
+}
+
+=head2 make_classes_query
+
+ Title   : make_classes_query
+ Usage   : ($query, at args) = $db->make_classes_query
+ Function: return query fragment for generating list of reference classes
+ Returns : a query and args
+ Args    : none
+ Status  : public
+
+=cut
+
+sub make_classes_query {
+  my $self = shift;
+  return;
+}
+
+=head2 _feature_by_name
+
+ Title   : _feature_by_name
+ Usage   : $db->get_features_by_name($name,$class,$callback)
+ Function: get a list of features by name and class
+ Returns : count of number of features retrieved
+ Args    : name of feature, class of feature, and a callback
+ Status  : protected
+
+This method is used internally.  The callback arguments are those used
+by make_feature().  Internally, it invokes the following abstract procedures:
+
+ make_features_select_part
+ make_features_from_part
+ make_features_by_name_where_part
+ make_features_by_alias_where_part  (for aliases)
+ make_features_join_part
+
+=cut
+
+sub _feature_by_name {
+  my $self = shift;
+  my ($class,$name,$location,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $select         = $self->make_features_select_part;
+  my $from           = $self->make_features_from_part(undef,{sparse_groups=>1});
+  my ($where, at args)  = $self->make_features_by_name_where_part($class,$name);
+  my $join           = $self->make_features_join_part;
+  my $range          = $self->make_features_by_range_where_part('overlaps',
+								{refseq=>$location->[0],
+								 class =>'',
+								 start=>$location->[1],
+								 stop =>$location->[2]}) if $location;
+  # group query
+  my $query1  = "SELECT $select FROM $from WHERE $where AND $join";
+  $query1    .= " AND $range" if $range;
+
+  # alias query
+  $from  = $self->make_features_from_part(undef,{attributes=>1});
+  ($where, at args) = $self->make_features_by_alias_where_part($class,$name);  # potential bug - @args1==@args2?
+
+  my $query2  = "SELECT $select FROM $from WHERE $where AND $join";
+  $query2    .= " AND $range" if $range;
+
+  my $count = 0;
+
+  for my $query ($query1,$query2) {
+    my $sth    = $self->dbh->do_query($query, at args);
+    while (my @row = $sth->fetchrow_array) {
+      $callback->(@row);
+      $count++;
+    }
+    $sth->finish;
+  }
+
+  return $count;
+}
+
+=head2 _feature_by_id
+
+ Title   : _feature_by_id
+ Usage   : $db->_feature_by_id($ids,$type,$callback)
+ Function: get a list of features by ID
+ Returns : count of number of features retrieved
+ Args    : arrayref containing list of IDs to fetch and a callback
+ Status  : protected
+
+This method is used internally.  The $type selector is one of
+"feature" or "group".  The callback arguments are those used by
+make_feature().  Internally, it invokes the following abstract
+procedures:
+
+ make_features_select_part
+ make_features_from_part
+ make_features_by_id_where_part
+ make_features_join_part
+
+=cut
+
+sub _feature_by_id {
+  my $self = shift;
+  my ($ids,$type,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $select         = $self->make_features_select_part;
+  my $from           = $self->make_features_from_part;
+  my ($where, at args)  = $type eq 'feature' ? $self->make_features_by_id_where_part($ids)
+                                          : $self->make_features_by_gid_where_part($ids);
+  my $join           = $self->make_features_join_part;
+  my $query          = "SELECT $select FROM $from WHERE $where AND $join";
+  my $sth            = $self->dbh->do_query($query, at args);
+
+  my $count = 0;
+  while (my @row = $sth->fetchrow_array) {
+    $callback->(@row);
+    $count++;
+  }
+  $sth->finish;
+  return $count;
+}
+
+sub _feature_by_attribute {
+  my $self = shift;
+  my ($attributes,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $select         = $self->make_features_select_part;
+  my $from           = $self->make_features_from_part(undef,{attributes=>$attributes});
+  my ($where, at args)  = $self->make_features_by_range_where_part('',{attributes=>$attributes});
+  my $join           = $self->make_features_join_part({attributes=>$attributes});
+  my $query          = "SELECT $select FROM $from WHERE $where AND $join";
+  my $sth            = $self->dbh->do_query($query, at args);
+
+  my $count = 0;
+  while (my @row = $sth->fetchrow_array) {
+    $callback->(@row);
+    $count++;
+  }
+  $sth->finish;
+  return $count;
+}
+
+=head2 get_types
+
+ Title   : get_types
+ Usage   : $db->get_types($refseq,$refclass,$start,$stop,$count)
+ Function: get list of types
+ Returns : a list of Bio::DB::GFF::Typename objects
+ Args    : see below
+ Status  : Public
+
+This method is responsible for fetching the list of feature type names
+from the database.  The query may be limited to a particular range, in
+which case the range is indicated by a landmark sequence name and
+class and its subrange, if any.  These arguments may be undef if it is
+desired to retrieve all feature types in the database (which may be a
+slow operation in some implementations).
+
+If the $count flag is false, the method returns a simple list of
+vBio::DB::GFF::Typename objects.  If $count is true, the method returns
+a list of $name=E<gt>$count pairs, where $count indicates the number of
+times this feature occurs in the range.
+
+Internally, this method calls upon the following functions to generate
+the SQL and its bind variables:
+
+  ($q1, at args) = make_types_select_part(@args);
+  ($q2, at args) = make_types_from_part(@args);
+  ($q3, at args) = make_types_where_part(@args);
+  ($q4, at args) = make_types_join_part(@args);
+  ($q5, at args) = make_types_group_part(@args);
+
+The components are then combined as follows:
+
+  $query = "SELECT $q1 FROM $q2 WHERE $q3 AND $q4 GROUP BY $q5";
+
+If any of the query fragments contain the ? bind variable, then the
+same number of bind arguments must be provided in @args.  The
+fragment-generating functions are described below.
+
+=cut
+
+sub get_types {
+  my $self = shift;
+  my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
+  my $straight      = $self->do_straight_join($srcseq,$start,$stop,[]) ? 'straight_join' : '';
+  my ($select, at args1) = $self->make_types_select_part($srcseq,$start,$stop,$want_count,$typelist);
+  my ($from, at args2)   = $self->make_types_from_part($srcseq,$start,$stop,$want_count,$typelist);
+  my ($join, at args3)   = $self->make_types_join_part($srcseq,$start,$stop,$want_count,$typelist);
+  my ($where, at args4)  = $self->make_types_where_part($srcseq,$start,$stop,$want_count,$typelist);
+  my ($group, at args5)  = $self->make_types_group_part($srcseq,$start,$stop,$want_count,$typelist);
+
+  my $query = "SELECT $straight $select FROM $from WHERE $join AND $where";
+  $query   .= " GROUP BY $group" if $group;
+  my @args  = (@args1, at args2, at args3, at args4, at args5);
+  my $sth = $self->dbh->do_query($query, at args) or return;
+
+  my (%result,%obj);
+  while (my ($method,$source,$count) = $sth->fetchrow_array) {
+    my $type = Bio::DB::GFF::Typename->new($method,$source);
+    $result{$type} = $count;
+    $obj{$type} = $type;
+  }
+  return $want_count ? %result : values %obj;
+}
+
+=head2 range_query
+
+ Title   : range_query
+ Usage   : $db->range_query($range_type,$refseq,$refclass,$start,$stop,$types,$order_by_group,$attributes,$binsize)
+ Function: create statement handle for range/overlap queries
+ Returns : a DBI statement handle
+ Args    : see below
+ Status  : Protected
+
+This method constructs the statement handle for this module's central
+query: given a range and/or a list of feature types, fetch their GFF
+records.
+
+The positional arguments are as follows:
+
+  Argument               Description
+
+  $isrange               A flag indicating that this is a range.
+			 query.  Otherwise an overlap query is
+			 assumed.
+
+  $refseq		 The reference sequence name (undef if no range).
+
+  $refclass		 The reference sequence class (undef if no range).
+
+  $start		 The start of the range (undef if none).
+
+  $stop                  The stop of the range (undef if none).
+
+  $types                 Array ref containing zero or feature types in the
+			 format [method,source].
+
+  $order_by_group        A flag indicating that statement handler should group
+                         the features by group id (handy for iterative fetches)
+
+  $attributes            A hash containing select attributes.
+
+  $binsize               A bin size for generating tables of feature density.
+
+If successful, this method returns a statement handle.  The handle is
+expected to return the fields described for get_features().
+
+Internally, range_query() makes calls to the following methods,
+each of which is expected to be overridden in subclasses:
+
+  $select        = $self->make_features_select_part;
+  $from          = $self->make_features_from_part;
+  $join          = $self->make_features_join_part;
+  ($where, at args) = $self->make_features_by_range_where_part($isrange,$srcseq,$class,
+						           $start,$stop,$types,$class);
+
+The query that is constructed looks like this:
+
+  SELECT $select FROM $from WHERE $join AND $where
+
+The arguments that are returned from make_features_by_range_where_part() are
+passed to the statement handler's execute() method.
+
+range_query() also calls a do_straight_join() method, described
+below.  If this method returns true, then the keyword "straight_join"
+is inserted right after SELECT.
+
+=cut
+
+sub range_query {
+  my $self = shift;
+  my($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes,$bin) = @_;
+
+  my $dbh = $self->features_db;
+
+  # NOTE: straight_join is necessary in some database to force the right index to be used.
+  my %a             = (refseq=>$refseq,class=>$class,start=>$start,stop=>$stop,types=>$types,attributes=>$attributes,bin_width=>$bin);
+  my $straight      = $self->do_straight_join(\%a) ? 'straight_join' : '';
+  my $select        = $self->make_features_select_part(\%a);
+  my $from          = $self->make_features_from_part($sparse,\%a);
+  my $join          = $self->make_features_join_part(\%a);
+  my ($where, at args) = $self->make_features_by_range_where_part($rangetype,\%a);
+  my ($group_by, at more_args) = $self->make_features_group_by_part(\%a);
+  my $order_by      = $self->make_features_order_by_part(\%a) if $order_by_group;
+
+  my $query         = "SELECT $straight $select FROM $from WHERE $join";
+  $query           .= " AND $where" if $where;
+  if ($group_by) {
+    $query           .= " GROUP BY $group_by";
+    push @args, at more_args;
+  }
+  $query           .= " ORDER BY $order_by" if $order_by;
+
+  my $sth = $self->dbh->do_query($query, at args);
+  $sth;
+}
+
+=head2 make_features_by_range_where_part
+
+ Title   : make_features_by_range_where_part
+ Usage   : ($string, at args) =
+     $db->make_features_select_part($isrange,$refseq,$class,$start,$stop,$types)
+ Function: make where part of the features query
+ Returns : the list ($query, at bind_args)
+ Args    : see below
+ Status  : Protected
+
+This method creates the part of the features query that immediately
+follows the WHERE keyword and is ANDed with the string returned by
+make_features_join_part().
+
+The six positional arguments are a flag indicating whether to perform
+a range search or an overlap search, the reference sequence, class,
+start and stop, all of which define an optional range to search in,
+and an array reference containing a list [$method,$souce] pairs.
+
+The method result is a multi-element list containing the query string
+and the list of runtime arguments to bind to it with the execute()
+method.
+
+This method's job is to clean up arguments and perform consistency
+checking.  The real work is done by the following abstract methods:
+
+  Method             Description
+
+  refseq_query()     Return the query string needed to match the reference
+		     sequence.
+
+  range_query()	     Return the query string needed to find all features contained
+		     within a range.
+
+  overlap_query()    Return the query string needed to find all features that overlap
+		     a range.
+
+See Bio::DB::Adaptor::dbi::mysql for an example of how this works.
+
+=cut
+
+#'
+
+sub make_features_by_range_where_part {
+  my $self = shift;
+  my ($rangetype,$options) = @_;
+  $options ||= {};
+  my ($refseq,$class,$start,$stop,$types,$attributes) =
+    @{$options}{qw(refseq class start stop types attributes)};
+
+  my (@query, at args);
+
+  if ($refseq) {
+    my ($q, at a) = $self->refseq_query($refseq,$class);
+    push @query,$q;
+    push @args, at a;
+  }
+
+  if (defined $start or defined $stop) {
+    $start = 0               unless defined($start);
+    $stop  = MAX_SEGMENT     unless defined($stop);
+
+    my ($range_query, at range_args) =
+           $rangetype eq 'overlaps'     ? $self->overlap_query($start,$stop)
+	 : $rangetype eq 'contains'     ? $self->contains_query($start,$stop)
+         : $rangetype eq 'contained_in' ? $self->contained_in_query($start,$stop)
+         : ();
+
+    push @query,$range_query;
+    push @args, at range_args;
+  }
+
+  if (defined $types && @$types) {
+    my ($type_query, at type_args) = $self->types_query($types);
+    push @query,$type_query;
+    push @args, at type_args;
+  }
+
+  if ($attributes) {
+    my ($attribute_query, at attribute_args) = $self->make_features_by_attribute_where_part($attributes);
+    push @query,"($attribute_query)";
+    push @args, at attribute_args;
+  }
+
+  my $query = join "\n\tAND ", at query;
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+=head2 do_straight_join
+
+ Title   : do_straight_join
+ Usage   : $boolean = $db->do_straight_join($refseq,$class,$start,$stop,$types)
+ Function: optimization flag
+ Returns : a flag
+ Args    : see range_query()
+ Status  : Protected
+
+This subroutine, called by range_query() returns a boolean flag.
+If true, range_query() will perform a straight join, which can be
+used to optimize certain SQL queries.  The four arguments correspond
+to similarly-named arguments passed to range_query().
+
+=cut
+
+sub do_straight_join { 0 }  # false by default
+
+=head2 string_match
+
+ Title   : string_match
+ Usage   : $string = $db->string_match($field,$value)
+ Function: create a SQL fragment for performing exact or regexp string matching
+ Returns : query string
+ Args    : the table field and match value
+ Status  : public
+
+This method examines the passed value for meta characters.  If so it
+produces a SQL fragment that performs a regular expression match.
+Otherwise, it produces a fragment that performs an exact string match.
+
+This method is not used in the module, but is available for use by
+subclasses.
+
+=cut
+
+sub string_match {
+  my $self           = shift;
+  my ($field,$value) = @_;
+  return qq($field = ?) if $value =~ /^[!@%&a-zA-Z0-9_\'\" ~-]+$/;
+  return qq($field REGEXP ?);
+}
+
+=head2 exact_match
+
+ Title   : exact_match
+ Usage   : $string = $db->exact_match($field,$value)
+ Function: create a SQL fragment for performing exact string matching
+ Returns : query string
+ Args    : the table field and match value
+ Status  : public
+
+This method produces the SQL fragment for matching a field name to a
+constant string value.
+
+=cut
+
+sub exact_match {
+  my $self           = shift;
+  my ($field,$value) = @_;
+  return qq($field = ?);
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string, using mysql full-text search
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+This is a mysql-specific method.  Given a search string, it performs a
+full-text search of the notes table and returns an array of results.
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d; 
+
+  my @words  = $search_string =~ /(\w+)/g;
+  my $regex  = join '|', at words;
+  my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
+  my $search   = join(' OR ', at searches);
+
+  my $query = <<END;
+SELECT distinct gclass,gname,fattribute_value 
+  FROM fgroup,fattribute_to_feature,fdata
+  WHERE fgroup.gid=fdata.gid
+     AND fdata.fid=fattribute_to_feature.fid
+     AND ($search)
+END
+;
+
+  my $sth = $self->dbh->do_query($query);
+  my @results;
+  while (my ($class,$name,$note) = $sth->fetchrow_array) {
+     next unless $class && $name;    # sorry, ignore NULL objects
+     my @matches = $note =~ /($regex)/g;
+     my $relevance = 10*@matches;
+     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+     push @results,[$featname,$note,$relevance];
+     last if $limit && @results >= $limit;
+  }
+  @results;
+}
+
+
+=head2 meta
+
+ Title   : meta
+ Usage   : $value = $db->meta($name [,$newval])
+ Function: get or set a meta variable
+ Returns : a string
+ Args    : meta variable name and optionally value
+ Status  : public
+
+Get or set a named metavariable for the database.  Metavariables can
+be used for database-specific settings.  This method calls two
+class-specific methods which must be implemented:
+
+  make_meta_get_query()   Returns a sql fragment which given a meta
+                          parameter name, returns its value.  One bind
+                          variable.
+  make_meta_set_query()   Returns a sql fragment which takes two bind
+                          arguments, the parameter name and its value
+
+
+Don't make changes unless you know what you're doing!  It will affect the
+persistent database.
+
+=cut
+
+sub meta {
+  my $self = shift;
+  my $param_name = uc shift;
+
+  # getting
+  if (@_) {
+    my $value = shift;
+    my $sql = $self->make_meta_set_query() or return;
+    my $sth = $self->dbh->prepare_delayed($sql) 
+              or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
+    $sth->execute($param_name,$value)
+              or $self->error("Can't execute $sql: ",$self->dbh->errstr), return;
+    $sth->finish;
+    return $self->{meta}{$param_name} = $value;
+  }
+
+  elsif (exists $self->{meta}{$param_name}) {
+    return $self->{meta}{$param_name};
+  }
+
+  else {
+    undef $self->{meta}{$param_name};  # so that we don't check again
+    my $sql = $self->make_meta_get_query() or return;
+    my $sth  = $self->dbh->prepare_delayed($sql)
+            or $self->error("Can't prepare $sql: ",$self->dbh->errstr), return;
+    $sth->execute($param_name)
+            or $self->error("Can't execute $sql: ",$sth->errstr),return;
+    my ($value) = $sth->fetchrow_array;
+    $sth->finish;
+    return $self->{meta}{$param_name} = $value;
+  }
+
+}
+
+=head2 make_meta_get_query
+
+ Title   : make_meta_get_query
+ Usage   : $sql = $db->make_meta_get_query
+ Function: return SQL fragment for getting a meta parameter
+ Returns : SQL fragment
+ Args    : none
+ Status  : public
+
+By default this does nothing; meta parameters are not stored or
+retrieved.
+
+=cut
+
+sub make_meta_get_query {
+   return 'SELECT fvalue FROM fmeta WHERE fname=?';
+}
+
+
+sub dna_chunk_size {
+  my $self = shift;
+  $self->meta('chunk_size') || DNA_CHUNK_SIZE;
+}
+
+=head2 make_meta_set_query
+
+ Title   : make_meta_set_query
+ Usage   : $sql = $db->make_meta_set_query
+ Function: return SQL fragment for setting a meta parameter
+ Returns : SQL fragment
+ Args    : none
+ Status  : public
+
+By default this does nothing; meta parameters are not stored or
+retrieved.
+
+=cut
+
+sub make_meta_set_query {
+  return;
+}
+
+=head2 default_meta_values
+
+ Title   : default_meta_values
+ Usage   : %values = $db->default_meta_values
+ Function: empty the database
+ Returns : a list of tag=>value pairs
+ Args    : none
+ Status  : protected
+
+This method returns a list of tag=E<gt>value pairs that contain default
+meta information about the database.  It is invoked by initialize() to
+write out the default meta values.  The base class version returns an
+empty list.
+
+For things to work properly, meta value names must be UPPERCASE.
+
+=cut
+
+sub default_meta_values {
+  my $self = shift;
+  my @values = $self->SUPER::default_meta_values;
+  return (
+	  @values,
+	  max_bin             => MAX_BIN,
+	  min_bin             => MIN_BIN,
+	  straight_join_limit => STRAIGHT_JOIN_LIMIT,
+          chunk_size          => DNA_CHUNK_SIZE,
+	 );
+}
+
+sub min_bin {
+  my $self = shift;
+  return $self->meta('min_bin') || MIN_BIN;
+}
+sub max_bin {
+  my $self = shift;
+  return $self->meta('max_bin') || MAX_BIN;
+}
+
+sub straight_join_limit {
+  my $self = shift;
+  return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT;
+}
+
+=head2 get_features_iterator
+
+ Title   : get_features_iterator
+ Usage   : $iterator = $db->get_features_iterator($search,$options,$callback)
+ Function: create an iterator on a features() query
+ Returns : A Bio::DB::GFF::Adaptor::dbi::iterator object
+ Args    : see get_features()
+ Status  : public
+
+This method is similar to get_features(), except that it returns an
+iterator across the query.  See
+L<Bio::DB::GFF::Adaptor::dbi::iterator>.
+
+=cut
+
+sub get_features_iterator {
+  my $self = shift;
+  my ($search,$options,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+  my $sth = $self->range_query(@{$search}{qw(rangetype
+					     refseq
+					     refclass
+					     start
+					     stop
+					     types)},
+			       @{$options}{qw(
+					      sparse
+					      sort_by_group
+					      ATTRIBUTES
+					      BINSIZE)}) or return;
+  return Bio::DB::GFF::Adaptor::dbi::iterator->new($sth,$callback);
+}
+
+########################## loading and initialization  #####################
+
+=head2 do_initialize
+
+ Title   : do_initialize
+ Usage   : $success = $db->do_initialize($drop_all)
+ Function: initialize the database
+ Returns : a boolean indicating the success of the operation
+ Args    : a boolean indicating whether to delete existing data
+ Status  : protected
+
+This method will load the schema into the database.  If $drop_all is
+true, then any existing data in the tables known to the schema will be
+deleted.
+
+Internally, this method calls schema() to get the schema data.
+
+=cut
+
+# Create the schema from scratch.
+# You will need create privileges for this.
+sub do_initialize {
+  #shift->throw("do_initialize(): must be implemented by subclass");
+  my $self = shift;
+  my $erase = shift;
+  $self->drop_all if $erase;
+
+  my $dbh = $self->features_db;
+  my $schema = $self->schema;
+  foreach my $table_name ($self->tables) {
+    my $create_table_stmt = $schema->{$table_name}{table} ;
+    $dbh->do($create_table_stmt) ||  warn $dbh->errstr;
+    $self->create_other_schema_objects(\%{$schema->{$table_name}});
+  }
+
+  1;
+}
+
+=head2 finish_load
+
+ Title   : finish_load
+ Usage   : $db->finish_load
+ Function: called after load_gff_line()
+ Returns : number of records loaded
+ Args    : none
+ Status  : protected
+
+This method performs schema-specific cleanup after loading a set of
+GFF records.  It finishes each of the statement handlers prepared by
+setup_load().
+
+=cut
+
+sub finish_load {
+  my $self = shift;
+
+  my $dbh = $self->features_db or return;
+  $dbh->do('UNLOCK TABLES') if $self->lock_on_load;
+
+  foreach (keys %{$self->{load_stuff}{sth}}) {
+    $self->{load_stuff}{sth}{$_}->finish;
+  }
+
+  my $counter = $self->{load_stuff}{counter};
+  delete $self->{load_stuff};
+  return $counter;
+}
+
+
+=head2 create_other_schema_objects
+
+ Title   : create_other_schema_objects
+ Usage   : $self->create_other_schema_objects($table_name)
+ Function: create other schema objects like : indexes, sequences, triggers
+ Returns : 
+ Args    : 
+ Status  : Abstract
+
+=cut
+
+sub create_other_schema_objects{
+  #shift->throw("create_other_schema_objects(): must be implemented by subclass");
+  my $self = shift ;
+  my $table_schema = shift ;
+  my $dbh = $self->features_db;
+  foreach my $object_type(keys %$table_schema){
+    if ($object_type !~ /table/) {
+      foreach my $object_name(keys %{$table_schema->{$object_type}}){
+        my $create_object_stmt = $table_schema->{$object_type}{$object_name};
+        $dbh->do($create_object_stmt) ||  warn $dbh->errstr;   
+      }
+    }
+  }
+  1;
+}
+
+=head2 drop_all
+
+ Title   : drop_all
+ Usage   : $db->drop_all
+ Function: empty the database
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This method drops the tables known to this module.  Internally it
+calls the abstract tables() method.
+
+=cut
+
+# Drop all the GFF tables -- dangerous!
+sub drop_all {
+  #shift->throw("drop_all(): must be implemented by subclass");
+  my $self = shift;
+  my $dbh = $self->features_db;
+  my $schema = $self->schema;
+
+  local $dbh->{PrintError} = 0;
+  foreach ($self->tables) {
+    $dbh->do("drop table $_") || warn $dbh->errstr;
+
+    #when dropping a table - the indexes and triggers are being dropped automatically
+    # sequences needs to be dropped - if there are any (Oracle, PostgreSQL)
+    if ($schema->{$_}{sequence}){
+      foreach my $sequence_name(keys %{$schema->{$_}{sequence}}) {
+	$dbh->do("drop sequence $sequence_name");
+      }
+    }
+
+    #$self->drop_other_schema_objects($_);
+    
+  }
+}
+
+
+=head1 QUERIES TO IMPLEMENT
+
+The following astract methods either return DBI statement handles or
+fragments of SQL.  They must be implemented by subclasses of this
+module.  See Bio::DB::GFF::Adaptor::dbi::mysql for examples.
+
+
+
+
+=head2 drop_other_schema_objects
+
+ Title   : drop_other_schema_objects
+ Usage   : $self->create_other_schema_objects($table_name)
+ Function: create other schema objects like : indexes, sequences, triggers
+ Returns : 
+ Args    : 
+ Status  : Abstract
+
+
+=cut
+
+sub drop_other_schema_objects{
+  #shift->throw("drop_other_schema_objects(): must be implemented by subclass");
+  
+}
+
+=head2 make_features_select_part
+
+ Title   : make_features_select_part
+ Usage   : $string = $db->make_features_select_part()
+ Function: make select part of the features query
+ Returns : a string
+ Args    : none
+ Status  : Abstract
+
+This abstract method creates the part of the features query that
+immediately follows the SELECT keyword.
+
+=cut
+
+sub make_features_select_part {
+  shift->throw("make_features_select_part(): must be implemented by subclass");
+}
+
+=head2 tables
+
+ Title   : tables
+ Usage   : @tables = $db->tables
+ Function: return list of tables that belong to this module
+ Returns : list of tables
+ Args    : none
+ Status  : protected
+
+This method lists the tables known to the module.
+
+=cut
+
+# return list of tables that "belong" to us. 
+sub tables {
+  my $schema = shift->schema;
+  return keys %$schema;
+}
+
+=head2 schema
+
+ Title   : schema
+ Usage   : $schema = $db->schema
+ Function: return the CREATE script for the schema
+ Returns : a hashref
+ Args    : none
+ Status  : abstract
+
+This method returns an array ref containing the various CREATE
+statements needed to initialize the database tables.  The keys are the
+table names, and the values are strings containing the appropriate
+CREATE statement.
+
+=cut
+
+sub schema {
+  shift->throw("The schema() method must be implemented by subclass");
+}
+
+=head2 DESTROY
+
+ Title   : DESTROY
+ Usage   : $db->DESTROY
+ Function: disconnect database at destruct time
+ Returns : void
+ Args    : none
+ Status  : protected
+
+This is the destructor for the class.
+
+=cut
+
+sub DESTROY {
+  my $self = shift;
+  $self->features_db->disconnect if defined $self->features_db;
+}
+
+################## query cache ##################
+
+
+#########################################  
+## Moved from mysql.pm and mysqlopt.pm ##
+#########################################
+
+=head2 make_features_by_name_where_part
+
+ Title   : make_features_by_name_where_part
+ Usage   : $db->make_features_by_name_where_part
+ Function: create the SQL fragment needed to select a feature by its group name & class
+ Returns : a SQL fragment and bind arguments
+ Args    : see below
+ Status  : Protected
+
+=cut
+
+sub make_features_by_name_where_part {
+  my $self = shift;
+  my ($class,$name) = @_;
+  if ($name =~ /\*/) {
+    $name =~ s/%/\\%/g;
+    $name =~ s/_/\\_/g;
+    $name =~ tr/*/%/;
+    return ("fgroup.gclass=? AND fgroup.gname LIKE ?",$class,$name);
+  } else {
+    return ("fgroup.gclass=? AND fgroup.gname=?",$class,$name);
+  }
+}
+
+sub make_features_by_alias_where_part {
+  my $self = shift;
+  my ($class,$name) = @_;
+  if ($name =~ /\*/) {
+    $name =~ tr/*/%/;
+    $name =~ s/_/\\_/g;
+    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value LIKE ? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata.fid AND ftype.ftypeid=fdata.ftypeid",$class,$name)
+  } else {
+    return ("fgroup.gclass=? AND fattribute_to_feature.fattribute_value=? AND fgroup.gid=fdata.gid AND fattribute.fattribute_name in ('Alias','Name') AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id AND fattribute_to_feature.fid=fdata.fid AND ftype.ftypeid=fdata.ftypeid",$class,$name);
+  }
+
+}
+
+sub make_features_by_attribute_where_part {
+  my $self = shift;
+  my $attributes = shift;
+  my @args;
+  my @sql;
+  foreach (keys %$attributes) {
+     push @sql,"(fattribute.fattribute_name=? AND fattribute_to_feature.fattribute_value=?)";
+     push @args,($_,$attributes->{$_});
+  }
+  return (join(' OR ', at sql), at args);
+}
+
+=head2 make_features_by_id_where_part
+
+ Title   : make_features_by_id_where_part
+ Usage   : $db->make_features_by_id_where_part($ids)
+ Function: create the SQL fragment needed to select a set of features by their ids
+ Returns : a SQL fragment and bind arguments
+ Args    : arrayref of IDs
+ Status  : Protected
+
+=cut
+
+sub make_features_by_id_where_part {
+  my $self = shift;
+  my $ids = shift;
+  my $set = join ",",@$ids;
+  return ("fdata.fid IN ($set)");
+}
+
+=head2 make_features_by_gid_where_part
+
+ Title   : make_features_by_id_where_part
+ Usage   : $db->make_features_by_gid_where_part($ids)
+ Function: create the SQL fragment needed to select a set of features by their ids
+ Returns : a SQL fragment and bind arguments
+ Args    : arrayref of IDs
+ Status  : Protected
+
+=cut
+
+sub make_features_by_gid_where_part {
+  my $self = shift;
+  my $ids = shift;
+  my $set = join ",",@$ids;
+  return ("fgroup.gid IN ($set)");
+}
+
+
+=head2 make_features_from_part
+
+ Title   : make_features_from_part
+ Usage   : $string = $db->make_features_from_part()
+ Function: make from part of the features query
+ Returns : a string
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the FROM keyword.
+
+=cut
+
+sub make_features_from_part {
+  my $self = shift;
+  my $sparse = shift;
+  my $options = shift || {};
+  return $options->{attributes} ? "fdata,ftype,fgroup,fattribute,fattribute_to_feature\n"
+                                : "fdata,ftype,fgroup\n";
+}
+
+
+=head2 make_features_join_part
+
+ Title   : make_features_join_part
+ Usage   : $string = $db->make_features_join_part()
+ Function: make join part of the features query
+ Returns : a string
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the WHERE keyword.
+
+=cut
+
+sub make_features_join_part {
+  my $self = shift;
+  my $options = shift || {};
+  return !$options->{attributes} ? <<END1 : <<END2;
+  fgroup.gid = fdata.gid 
+  AND ftype.ftypeid = fdata.ftypeid
+END1
+  fgroup.gid = fdata.gid 
+  AND ftype.ftypeid = fdata.ftypeid
+  AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
+  AND fdata.fid=fattribute_to_feature.fid
+END2
+}
+
+=head2 make_features_order_by_part
+
+ Title   : make_features_order_by_part
+ Usage   : ($query, at args) = $db->make_features_order_by_part()
+ Function: make the ORDER BY part of the features() query
+ Returns : a SQL fragment and bind arguments, if any
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the ORDER BY part of the query issued by features() and
+related methods.
+
+=cut
+
+sub make_features_order_by_part {
+  my $self = shift;
+  my $options = shift || {};
+  return "fgroup.gname";
+}
+
+=head2 make_features_group_by_part
+
+ Title   : make_features_group_by_part
+ Usage   : ($query, at args) = $db->make_features_group_by_part()
+ Function: make the GROUP BY part of the features() query
+ Returns : a SQL fragment and bind arguments, if any
+ Args    : none
+ Status  : protected
+
+This method creates the part of the features query that immediately
+follows the GROUP BY part of the query issued by features() and
+related methods.
+
+=cut
+
+sub make_features_group_by_part {
+  my $self = shift;
+  my $options = shift || {};
+  if (my $att = $options->{attributes}) {
+    my $key_count = keys %$att;
+    return unless $key_count > 1;
+    return ("fdata.fid,fref,fstart,fstop,fsource,
+           fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,
+           ftarget_stop,fdata.gid
+     HAVING count(fdata.fid) > ?",$key_count-1);
+  }
+  elsif (my $b = $options->{bin_width}) {
+    return "fref,fstart,fdata.ftypeid";
+  }
+
+}
+
+=head2 refseq_query
+
+ Title   : refseq_query
+ Usage   : ($query, at args) = $db->refseq_query($name,$class)
+ Function: create SQL fragment that selects the desired reference sequence
+ Returns : a list containing the query and bind arguments
+ Args    : reference sequence name and class
+ Status  : protected
+
+This method is called by make_features_by_range_where_part() to
+construct the part of the select WHERE section that selects a
+particular reference sequence.  It returns a mult-element list in
+which the first element is the SQL fragment and subsequent elements
+are bind values.
+
+For example:
+
+  sub refseq_query {
+     my ($name,$class) = @_;
+     return ('gff.refseq=? AND gff.refclass=?',
+	     $name,$class);
+  }
+
+The current schema does not distinguish among different classes of
+reference sequence.
+
+=cut
+
+# IMPORTANT NOTE: THE MYSQL SCHEMA IGNORES THE SEQUENCE CLASS
+# THIS SHOULD BE FIXED
+sub refseq_query {
+  my $self = shift;
+  my ($refseq,$refclass) = @_;
+  my $query = "fdata.fref=?";
+  return wantarray ? ($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
+}
+
+=head2 attributes
+
+ Title   : attributes
+ Usage   : @attributes = $db->attributes($id,$name)
+ Function: get the attributes on a particular feature
+ Returns : an array of string
+ Args    : feature ID
+ Status  : public
+
+Some GFF version 2 files use the groups column to store a series of
+attribute/value pairs.  In this interpretation of GFF, the first such
+pair is treated as the primary group for the feature; subsequent pairs
+are treated as attributes.  Two attributes have special meaning:
+"Note" is for backward compatibility and is used for unstructured text
+remarks.  "Alias" is considered as a synonym for the feature name.
+
+If no name is provided, then attributes() returns a flattened hash, of
+attribute=E<gt>value pairs.  This lets you do:
+
+  %attributes = $db->attributes($id);
+
+Normally, attributes() will be called by the feature:
+
+  @notes = $feature->attributes('Note');
+
+=cut
+
+sub do_attributes {
+  my $self        = shift;
+  my ($id,$tag)   = @_;
+  my $sth;
+  if ($id) {
+    my $from   = 'fattribute_to_feature,fattribute';
+    my $join   = 'fattribute.fattribute_id=fattribute_to_feature.fattribute_id';
+    my $where1 = 'fid=? AND fattribute_name=?';
+    my $where2 = 'fid=?';
+    $sth = defined($tag) ? $self->dbh->do_query("SELECT fattribute_value FROM $from WHERE $where1 AND $join",$id,$tag)
+                         : $self->dbh->do_query("SELECT fattribute_name,fattribute_value FROM $from WHERE $where2 AND $join",$id);
+  }
+  else {
+    $sth = $self->dbh->do_query("SELECT fattribute_name FROM fattribute");
+  }
+  my @result;
+  while (my @stuff = $sth->fetchrow_array) {
+    push @result, at stuff;
+  }
+  $sth->finish;
+  return @result;
+}
+
+
+
+=head2 overlap_query_nobin
+
+ Title   : overlap_query
+ Usage   : ($query, at args) = $db->overlap_query($start,$stop)
+ Function: create SQL fragment that selects the desired features by range
+ Returns : a list containing the query and bind arguments
+ Args    : the start and stop of a range, inclusive
+ Status  : protected
+
+This method is called by make_features_byrange_where_part() to construct the
+part of the select WHERE section that selects a set of features that
+overlap a range. It returns a multi-element list in which the first
+element is the SQL fragment and subsequent elements are bind values.
+
+
+sub overlap_query_nobin {
+     my ($start,$stop) = @_;
+     return ('gff.stopE<gt>=? AND gff.startE<lt>=?',
+	     $start,$stop);
+
+=cut
+
+# find features that overlap a given range
+sub overlap_query_nobin {
+  my $self = shift;
+  my ($start,$stop) = @_;
+
+  my $query    = qq(fdata.fstop>=? AND fdata.fstart<=?);
+  return wantarray ? ($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
+}
+
+=head2 contains_query_nobin
+
+ Title   : contains_query
+ Usage   : ($query, at args) = $db->contains_query_nobin($start,$stop)
+ Function: create SQL fragment that selects the desired features by range
+ Returns : a list containing the query and bind arguments
+ Args    : the start and stop of a range, inclusive
+ Status  : protected
+
+This method is called by make_features_byrange_where_part() to construct the
+part of the select WHERE section that selects a set of features
+entirely enclosed by a range. It returns a multi-element list in which
+the first element is the SQL fragment and subsequent elements are bind
+values. For example:
+
+  sub contains_query_nobin {
+     my ($start,$stop) = @_;
+     return ('gff.start>=? AND gff.stop<=?',
+	     $start,$stop);
+
+=cut
+
+# find features that are completely contained within a range
+sub contains_query_nobin {
+  my $self = shift;
+  my ($start,$stop) = @_;
+  my $query    = qq(fdata.fstart>=? AND fdata.fstop<=?);
+  return wantarray ? ($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
+}
+
+=head2 contained_in_query_nobin
+
+ Title   : contained_in_query_nobin
+ Usage   : ($query, at args) = $db->contained_in_query($start,$stop)
+ Function: create SQL fragment that selects the desired features by range
+ Returns : a list containing the query and bind arguments
+ Args    : the start and stop of a range, inclusive
+ Status  : protected
+
+This method is called by make_features_byrange_where_part() to construct the
+part of the select WHERE section that selects a set of features
+entirely enclosed by a range. It returns a multi-element list in which
+the first element is the SQL fragment and subsequent elements are bind
+values.For example:
+
+  sub contained_in_query_nobin {
+     my ($start,$stop) = @_;
+     return ('gff.start<=? AND gff.stop>=?',
+	     $start,$stop);
+  }
+
+=cut
+
+# find features that are completely contained within a range
+sub contained_in_query_nobin {
+  my $self = shift;
+  my ($start,$stop) = @_;
+  my $query    = qq(fdata.fstart<=? AND fdata.fstop>=?);
+  return wantarray ? ($query,$start,$stop) : $self->dbh->dbi_quote($query,$start,$stop);
+}
+
+=head2 types_query
+
+ Title   : types_query
+ Usage   : ($query, at args) = $db->types_query($types)
+ Function: create SQL fragment that selects the desired features by type
+ Returns : a list containing the query and bind arguments
+ Args    : an array reference containing the types
+ Status  : protected
+
+This method is called by make_features_byrange_where_part() to construct the
+part of the select WHERE section that selects a set of features based
+on their type. It returns a multi-element list in which the first
+element is the SQL fragment and subsequent elements are bind values.
+The argument is an array reference containing zero or more
+[$method,$source] pairs.
+
+=cut
+
+# generate the fragment of SQL responsible for searching for
+# features with particular types and methods
+sub types_query {
+  my $self = shift;
+  my $types = shift;
+
+  my @method_queries;
+  my @args;
+  for my $type (@$types) {
+    my ($method,$source) = @$type;
+    my ($mlike, $slike) = (0, 0);
+    if ($method && $method =~ m/\.\*/) {
+      $method =~ s/%/\\%/g;
+      $method =~ s/_/\\_/g;
+      $method =~ s/\.\*\??/%/g;
+      $mlike++;
+    }
+    if ($source && $source =~ m/\.\*/) {
+      $source =~ s/%/\\%/g;
+      $source =~ s/_/\\_/g;
+      $source =~ s/\.\*\??/%/g;
+      $slike++;
+    }
+    my @pair;
+    if (defined $method && length $method) {
+	push @pair, $mlike ? qq(fmethod LIKE ?) : qq(fmethod = ?);
+	push @args, $method;
+    }
+    if (defined $source && length $source) {
+	push @pair, $slike ? qq(fsource LIKE ?) : qq(fsource = ?);
+	push @args, $source;
+    }
+    push @method_queries,"(" . join(' AND ', at pair) .")" if @pair;
+}
+  my $query = " (".join(' OR ', at method_queries).")\n" if @method_queries;
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+=head2 make_types_select_part
+
+ Title   : make_types_select_part
+ Usage   : ($string, at args) = $db->make_types_select_part(@args)
+ Function: create the select portion of the SQL for fetching features type list
+ Returns : query string and bind arguments
+ Args    : see below
+ Status  : protected
+
+This method is called by get_types() to generate the query fragment
+and bind arguments for the SELECT part of the query that retrieves
+lists of feature types.  The four positional arguments are as follows:
+
+ $refseq      reference sequence name
+ $start       start of region
+ $stop        end of region
+ $want_count  true to return the count of this feature type
+
+If $want_count is false, the SQL fragment returned must produce a list
+of feature types in the format (method, source).
+
+If $want_count is true, the returned fragment must produce a list of
+feature types in the format (method, source, count).
+
+=cut
+
+#------------------------- support for the types() query ------------------------
+sub make_types_select_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count) = @_;
+  my $query = $want_count ? 'ftype.fmethod,ftype.fsource,count(fdata.ftypeid)'
+                          : 'fmethod,fsource';
+  return $query;
+}
+
+=head2 make_types_from_part
+
+ Title   : make_types_from_part
+ Usage   : ($string, at args) = $db->make_types_from_part(@args)
+ Function: create the FROM portion of the SQL for fetching features type lists
+ Returns : query string and bind arguments
+ Args    : see below
+ Status  : protected
+
+This method is called by get_types() to generate the query fragment
+and bind arguments for the FROM part of the query that retrieves lists
+of feature types.  The four positional arguments are as follows:
+
+ $refseq      reference sequence name
+ $start       start of region
+ $stop        end of region
+ $want_count  true to return the count of this feature type
+
+If $want_count is false, the SQL fragment returned must produce a list
+of feature types in the format (method, source).
+
+If $want_count is true, the returned fragment must produce a list of
+feature types in the format (method, source, count).
+
+=cut
+
+sub make_types_from_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count) = @_;
+  my $query = defined($srcseq) || $want_count ? 'fdata,ftype' : 'ftype';
+  return $query;
+}
+
+=head2 make_types_join_part
+
+ Title   : make_types_join_part
+ Usage   : ($string, at args) = $db->make_types_join_part(@args)
+ Function: create the JOIN portion of the SQL for fetching features type lists
+ Returns : query string and bind arguments
+ Args    : see below
+ Status  : protected
+
+This method is called by get_types() to generate the query fragment
+and bind arguments for the JOIN part of the query that retrieves lists
+of feature types.  The four positional arguments are as follows:
+
+ $refseq      reference sequence name
+ $start       start of region
+ $stop        end of region
+ $want_count  true to return the count of this feature type
+
+=cut
+
+sub make_types_join_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count) = @_;
+  my $query = defined($srcseq) || $want_count ? 'fdata.ftypeid=ftype.ftypeid'
+                                              : '';
+  return $query || '1=1';
+}
+
+=head2 make_types_where_part
+
+ Title   : make_types_where_part
+ Usage   : ($string, at args) = $db->make_types_where_part(@args)
+ Function: create the WHERE portion of the SQL for fetching features type lists
+ Returns : query string and bind arguments
+ Args    : see below
+ Status  : protected
+
+This method is called by get_types() to generate the query fragment
+and bind arguments for the WHERE part of the query that retrieves
+lists of feature types.  The four positional arguments are as follows:
+
+ $refseq      reference sequence name
+ $start       start of region
+ $stop        end of region
+ $want_count  true to return the count of this feature type
+
+=cut
+
+sub make_types_where_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count,$typelist) = @_;
+  my (@query, at args);
+  if (defined($srcseq)) {
+    push @query,'fdata.fref=?';
+    push @args,$srcseq;
+    if (defined $start or defined $stop) {
+      $start = 1           unless defined $start;
+      $stop  = MAX_SEGMENT unless defined $stop;
+      my ($q, at a) = $self->overlap_query($start,$stop);
+      push @query,"($q)";
+      push @args, at a;
+    }
+  }
+  if (defined $typelist && @$typelist) {
+    my ($q, at a) = $self->types_query($typelist);
+    push @query,($q);
+    push @args, at a;
+  }
+  my $query = @query ? join(' AND ', at query) : '1=1';
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+=head2 make_types_group_part
+
+ Title   : make_types_group_part
+ Usage   : ($string, at args) = $db->make_types_group_part(@args)
+ Function: create the GROUP BY portion of the SQL for fetching features type lists
+ Returns : query string and bind arguments
+ Args    : see below
+ Status  : protected
+
+This method is called by get_types() to generate the query fragment
+and bind arguments for the GROUP BY part of the query that retrieves
+lists of feature types.  The four positional arguments are as follows:
+
+ $refseq      reference sequence name
+ $start       start of region
+ $stop        end of region
+ $want_count  true to return the count of this feature type
+
+=cut
+
+sub make_types_group_part {
+  my $self = shift;
+  my ($srcseq,$start,$stop,$want_count) = @_;
+  return unless $srcseq or $want_count;
+  return 'ftype.ftypeid,ftype.fmethod,ftype.fsource';
+}
+
+
+=head2 get_feature_id
+
+ Title   : get_feature_id
+ Usage   : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
+ Function: get the ID of a feature
+ Returns : an integer ID or undef
+ Args    : none
+ Status  : private
+
+This internal method is called by load_gff_line to look up the integer
+ID of an existing feature.  It is ony needed when replacing a feature
+with new information.
+
+=cut
+
+# this method is called when needed to look up a feature's ID
+sub get_feature_id {
+  my $self = shift;
+  my ($ref,$start,$stop,$typeid,$groupid) = @_;
+  my $s = $self->{load_stuff};
+  unless ($s->{get_feature_id}) {
+    my $dbh = $self->features_db;
+    $s->{get_feature_id} =
+      $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
+  }
+  my $sth = $s->{get_feature_id} or return;
+  $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
+  my ($fid) = $sth->fetchrow_array;
+  return $fid;
+}
+
+
+
+=head2 make_abscoord_query
+
+ Title   : make_abscoord_query
+ Usage   : $sth = $db->make_abscoord_query($name,$class);
+ Function: create query that finds the reference sequence coordinates given a landmark & classa
+ Returns : a DBI statement handle
+ Args    : name and class of landmark
+ Status  : protected
+
+The statement handler should return rows containing five fields:
+
+  1. reference sequence name
+  2. reference sequence class
+  3. start position
+  4. stop position
+  5. strand ("+" or "-")
+
+This query always returns "Sequence" as the class of the reference
+sequence.
+
+=cut
+
+# given sequence name, return (reference,start,stop,strand)
+sub make_abscoord_query {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  #my $query = GETSEQCOORDS;
+  my $query = $self->getseqcoords_query();
+  my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
+  if ($name =~ /\*/) {
+    $name =~ s/%/\\%/g;
+    $name =~ s/_/\\_/g;
+    $name =~ tr/*/%/;
+    $query =~ s/gname=\?/gname LIKE ?/;
+  }
+  defined $refseq ? $self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq) 
+    : $self->dbh->do_query($query,$name,$class);
+}
+
+sub make_aliasabscoord_query {
+  my $self = shift;
+  my ($name,$class) = @_;
+  #my $query = GETALIASCOORDS;
+  my $query = $self->getaliascoords_query();
+  if ($name =~ /\*/) {
+    $name =~ s/%/\\%/g;
+    $name =~ s/_/\\_/g;
+    $name =~ tr/*/%/;
+    $query =~ s/gname=\?/gname LIKE ?/;
+  }
+  $self->dbh->do_query($query,$name,$class);
+}
+
+sub getseqcoords_query {
+  shift->throw("getseqcoords_query(): must be implemented by a subclass");
+}
+
+sub getaliascoords_query {
+  shift->throw("getaliascoords_query(): must be implemented by a subclass");
+}
+
+sub bin_query {
+  my $self = shift;
+  my ($start,$stop,$minbin,$maxbin) = @_;
+  my ($query, at args);
+
+  $start = 0               unless defined($start);
+  $stop  = $self->meta('max_bin') unless defined($stop);
+
+  my @bins;
+  $minbin = defined $minbin ? $minbin : $self->min_bin;
+  $maxbin = defined $maxbin ? $maxbin : $self->max_bin;
+  my $tier = $maxbin;
+  while ($tier >= $minbin) {
+    my ($tier_start,$tier_stop) = (bin_bot($tier,$start)-EPSILON(),bin_top($tier,$stop)+EPSILON());
+    if ($tier_start == $tier_stop) {
+      push @bins,'fbin=?';
+      push @args,$tier_start;
+    } else {
+      push @bins,'fbin between ? and ?';
+      push @args,($tier_start,$tier_stop);
+    }
+    $tier /= 10;
+  }
+  $query = join("\n\t OR ", at bins);
+  return wantarray ? ($query, at args)
+                   : $self->dbh->dbi_quote($query, at args);
+}
+
+# find features that overlap a given range
+sub overlap_query {
+  my $self = shift;
+  my ($start,$stop) = @_;
+
+  my ($query, at args);
+  my ($iq, at iargs)   = $self->overlap_query_nobin($start,$stop);
+  if (OPTIMIZE) {
+    my ($bq, at bargs)   = $self->bin_query($start,$stop);
+    $query = "($bq)\n\tAND $iq";
+    @args  = (@bargs, at iargs);
+  }
+  else {
+    $query = $iq;
+    @args  = @iargs;
+  }
+
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+# find features that are completely contained within a ranged
+sub contains_query {
+  my $self = shift;
+  my ($start,$stop) = @_;
+  my ($bq, at bargs)   = $self->bin_query($start,$stop,undef,bin($start,$stop,$self->min_bin));
+  my ($iq, at iargs)   = $self->contains_query_nobin($start,$stop);
+  my $query = "($bq)\n\tAND $iq";
+  my @args  = (@bargs, at iargs);
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+# find features that are completely contained within a range
+sub contained_in_query {
+  my $self = shift;
+  my ($start,$stop) = @_;
+  my ($bq, at bargs)   = $self->bin_query($start,$stop,abs($stop-$start)+1,undef);
+  my ($iq, at iargs)   = $self->contained_in_query_nobin($start,$stop);
+  my $query = "($bq)\n\tAND $iq";
+  my @args  = (@bargs, at iargs);
+  return wantarray ? ($query, at args) : $self->dbh->dbi_quote($query, at args);
+}
+
+# implement the _delete_features() method
+sub _delete_features {
+  my $self = shift;
+  my @feature_ids = @_;
+  my $dbh          = $self->features_db;
+  my $fields       = join ',',map{$dbh->quote($_)} @feature_ids;
+  my $query = "delete from fdata where fid in ($fields)";
+  warn "$query\n" if $self->debug;
+  my $result = $dbh->do($query);
+  defined $result or $self->throw($dbh->errstr);
+  $result;
+}
+
+# implement the _delete_groups() method
+sub _delete_groups {
+  my $self = shift;
+  my @group_ids    = @_;
+  my $dbh          = $self->features_db;
+  my $fields       = join ',',map{$dbh->quote($_)} @group_ids;
+
+  my $query = "delete from fdata  where gid in ($fields)";
+  warn "$query\n" if $self->debug;
+  my $result = $dbh->do($query);
+  defined $result or $self->throw($dbh->errstr);
+
+  $query  = "delete from fgroup where gid in ($fields)";
+  warn "$query\n" if $self->debug;
+  $result = $dbh->do($query);
+  defined $result or $self->throw($dbh->errstr);
+  $result;
+}
+
+# implement the _delete() method
+sub _delete {
+  my $self = shift;
+  my $delete_spec = shift;
+  my $ranges      = $delete_spec->{segments} || [];
+  my $types       = $delete_spec->{types}    || [];
+  my $force       = $delete_spec->{force};
+  my $range_type  = $delete_spec->{range_type};
+  my $dbh         = $self->features_db;
+
+  my $query = 'delete from fdata';
+  my @where;
+
+  my @range_part;
+  for my $segment (@$ranges) {
+    my $ref   = $dbh->quote($segment->abs_ref);
+    my $start = $segment->abs_start;
+    my $stop  = $segment->abs_stop;
+    my $range =  $range_type eq 'overlaps'     ? $self->overlap_query($start,$stop)
+               : $range_type eq 'contains'     ? $self->contains_query($start,$stop)
+	       : $range_type eq 'contained_in' ? $self->contained_in_query($start,$stop)
+	       : $self->throw("Invalid range type '$range_type'");
+    push @range_part,"(fref=$ref AND $range)";
+  }
+  push @where,'('. join(' OR ', at range_part).')' if @range_part;
+
+  # get all the types
+  if (@$types) {
+    my $types_where = $self->types_query($types);
+    my $types_query = "select ftypeid from ftype where $types_where";
+    my $result      = $dbh->selectall_arrayref($types_query);
+    my @typeids     = map {$_->[0]} @$result;
+    my $typelist    = join ',',map{$dbh->quote($_)} @typeids;
+    $typelist ||= "0"; # don't cause DBI to die with invalid SQL when
+                       # unknown feature types were requested.
+    push @where,"(ftypeid in ($typelist))";
+  }
+  $self->throw("This operation would delete all feature data and -force not specified")
+    unless @where || $force;
+  $query .= " where ".join(' and ', at where) if @where;
+  warn "$query\n" if $self->debug;
+  my $result = $dbh->do($query);
+
+  defined $result or $self->throw($dbh->errstr);
+  $result;
+}
+
+
+1;
+
+__END__
+
+=head1 BUGS
+
+Schemas need work to support multiple hierarchical groups.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/feature_serializer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/feature_serializer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/feature_serializer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,38 @@
+package Bio::DB::GFF::Adaptor::memory::feature_serializer;
+
+# $Id: feature_serializer.pm,v 1.2.6.1 2006/10/02 23:10:16 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::memory::feature_serializer
+
+=cut
+
+use strict;
+
+require Exporter;
+use vars qw(@EXPORT @EXPORT_OK @hash2array_map);
+use base qw(Exporter);
+ at EXPORT_OK = qw(feature2string string2feature @hash2array_map);
+ at EXPORT = @EXPORT_OK;
+
+ at hash2array_map = qw(ref start stop source method score strand phase gclass gname tstart tstop feature_id group_id bin);
+
+sub feature2string {
+  my $feature = shift;
+  local $^W = 0;
+  my @a = @{$feature}{@hash2array_map};
+  push @a,map {join "\0",@$_} @{$feature->{attributes}} if $feature->{attributes};
+  return join $;, at a;
+}
+
+sub string2feature {
+  my $string  = shift;
+  my (@attributes,%feature);
+  (@feature{@hash2array_map}, at attributes) = split $;,$string;
+  $feature{attributes} = [map {[split "\0",$_]} @attributes];
+  undef $feature{group_id};
+  \%feature;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/iterator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/iterator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory/iterator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,80 @@
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::memory::iterator - iterator for Bio::DB::GFF::Adaptor::memory
+
+=head1 SYNOPSIS
+
+For internal use only
+
+=head1 DESCRIPTION
+
+This is an internal module that is used by the Bio::DB::GFF in-memory
+adaptor to return an iterator across a sequence feature query.  The
+object has a single method, next_feature(), that returns the next
+feature from the query.  The method next_seq() is an alias for
+next_feature().
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::memory::iterator;
+use strict;
+# $Id: iterator.pm,v 1.1 2005/07/27 22:15:15 lstein Exp $
+# this module needs to be cleaned up and documented
+use Bio::Root::Version;
+
+*next_seq = \&next_feature;
+
+sub new {
+  my $class = shift;
+  my ($data,$callback) = @_;
+  my $pos = 0;
+  return bless {data     => $data,
+		pos      => $pos,
+		callback => $callback,
+                cache    => []},$class;
+  #return bless [$sth,$callback,[]],$class;
+}
+
+sub next_feature {
+  my $self = shift;
+  return shift @{$self->{cache}} if @{$self->{cache}};
+
+  my $data     = $self->{data} or return;
+  my $callback = $self->{callback};
+
+  my $features;
+  while (1) {
+    my $feature = $data->[$self->{pos}++];
+    if ($feature) {
+      $features   = $callback->(@{$feature});
+      last if $features;
+    } else {
+      $features = $callback->();
+      undef $self->{pos};
+      undef $self->{data};
+      undef $self->{cache};
+      last;
+    }
+  }
+  $self->{cache} = $features or return;
+  shift @{$self->{cache}};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Adaptor/memory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,706 @@
+package Bio::DB::GFF::Adaptor::memory;
+
+=head1 NAME
+
+Bio::DB::GFF::Adaptor::memory -- Bio::DB::GFF database adaptor for in-memory databases
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+  my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
+                             -gff    => 'my_features.gff',
+                             -fasta  => 'my_dna.fa'
+                            );
+
+See L<Bio::DB::GFF> for other methods.
+
+=head1 DESCRIPTION
+
+This adaptor implements an in-memory version of Bio::DB::GFF.  It can be used to
+store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
+
+=head1 CONSTRUCTOR
+
+Use Bio::DB::GFF-E<gt>new() to construct new instances of this class.
+Three named arguments are recommended:
+
+   Argument         Description
+
+   -adaptor         Set to "memory" to create an instance of this class.
+   -gff             Read the indicated file or directory of .gff file.
+   -fasta           Read the indicated file or directory of fasta files.
+   -dir             Indicates a directory containing .gff and .fa files
+
+If you use the -dsn option and the indicated directory is writable by
+the current process, then this library will create a FASTA file index
+that greatly diminishes the memory usage of this module.
+
+=head1 METHODS
+
+See L<Bio::DB::GFF> for inherited methods.
+
+=head1 BUGS
+
+none ;-)
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bioperl>
+
+=head1 AUTHOR
+
+Shuly Avraham E<lt>avraham at cshl.orgE<gt>.
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use strict;
+# $Id: memory.pm,v 1.46.4.1 2006/10/02 23:10:16 sendu Exp $
+# AUTHOR: Shulamit Avraham
+# This module needs to be cleaned up and documented
+
+# Bio::DB::GFF::Adaptor::memory --  in-memory db adaptor
+# implements the low level handling of data which stored in memory.
+# This adaptor implements a specific in memory schema that is compatible with Bio::DB::GFF.
+# Inherits from Bio::DB::GFF.
+
+
+use Bio::DB::GFF::Util::Rearrange; # for rearrange()
+use Bio::DB::GFF::Adaptor::memory::iterator;
+use File::Basename 'dirname';
+use Bio::DB::GFF::Adaptor::memory::feature_serializer qw(@hash2array_map);
+
+
+use constant MAX_SEGMENT => 1_000_000_000;  # the largest a segment can get
+
+use base qw(Bio::DB::GFF);
+
+sub new {
+  my $class = shift ;
+  my ($file,$fasta,$dbdir,$preferred_groups) = rearrange([
+							  [qw(GFF FILE)],
+							  'FASTA',
+							  [qw(DSN DB DIR DIRECTORY)],
+							  'PREFERRED_GROUPS',
+							 ], at _);
+
+  # fill in object
+  my $self = bless{ data => [] },$class;
+  $self->preferred_groups($preferred_groups) if defined $preferred_groups;
+  $file  ||= $dbdir;
+  $fasta ||= $dbdir;
+  $self->load_gff($file)             if $file;
+  $self->load_or_store_fasta($fasta) if $fasta;
+  return $self;
+}
+
+sub load_or_store_fasta {
+  my $self  = shift;
+  my $fasta = shift;
+  if ((-f $fasta && -w dirname($fasta))
+      or
+      (-d $fasta && -w $fasta)) {
+    require Bio::DB::Fasta;
+    my $dna_db = eval {Bio::DB::Fasta->new($fasta);}
+      or warn "$@\nCan't open sequence file(s). Use -gff instead of -dir if you wish to load features without sequence.\n";
+    $dna_db && $self->dna_db($dna_db);
+  } else {
+    $self->load_fasta($fasta);
+  }
+}
+
+sub dna_db {
+  my $self = shift;
+  my $d    = $self->{dna_db};
+  $self->{dna_db} = shift if @_;
+  $d;
+}
+
+sub insert_sequence {
+  my $self = shift;
+  my($id,$offset,$seq) = @_;
+  $self->{dna}{$id} .= $seq;
+}
+
+# low-level fetch of a DNA substring given its
+# name, class and the desired range.
+sub get_dna {
+  my $self = shift;
+  my ($id,$start,$stop,$class) = @_;
+  if (my $dna_db = $self->dna_db) {
+    return $dna_db->seq($id,$start=>$stop);
+  }
+  return '' unless $self->{dna};
+
+  return $self->{dna}{$id} unless defined $start || defined $stop;
+  $start = 1 if !defined $start;
+
+  my $reversed = 0;
+  if ($start > $stop) {
+    $reversed++;
+    ($start,$stop) = ($stop,$start);
+  }
+  my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
+  if ($reversed) {
+    $dna =~ tr/gatcGATC/ctagCTAG/;
+    $dna = reverse $dna;
+  }
+
+  $dna;
+}
+
+sub setup_load {
+  my $self = shift;
+  $self->{tmp}  = {};
+  $self->{data} = [];
+  1;
+}
+
+sub finish_load {
+  my $self = shift;
+  my $idx  = 0;
+  foreach my $arrayref (values %{$self->{tmp}}) {
+    foreach (@$arrayref) {$_->{feature_id} = $idx++; }
+    push @{$self->{data}},@$arrayref;
+  }
+  1;
+}
+
+# this method loads the feature as a hash into memory -
+# keeps an array of features-hashes as an in-memory db
+sub load_gff_line {
+  my $self = shift;
+  my $feature_hash  = shift;
+  $feature_hash->{strand} = ''  if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
+  $feature_hash->{phase}  = ''  if $feature_hash->{phase}  && $feature_hash->{phase} eq '.';
+  $feature_hash->{gclass} = 'Sequence' unless length $feature_hash->{gclass} > 0;
+  # sort by group please
+  push @{$self->{tmp}{$feature_hash->{gclass},$feature_hash->{gname}}},$feature_hash;
+}
+
+# given sequence name, return (reference,start,stop,strand)
+sub get_abscoords {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  my %refs;
+  my $regexp;
+  
+  if ($name =~ /[*?]/) {  # uh oh regexp time
+    $name = quotemeta($name);
+    $name =~ s/\\\*/.*/g;
+    $name =~ s/\\\?/.?/g;
+    $regexp++;
+  }
+
+  # Find all features that have the requested name and class.
+  # Sort them by reference point.
+  for my $feature (@{$self->{data}}) {
+
+    my $no_match_class_name;
+    my $empty_class_name;
+    my $class_matches = !defined($feature->{gclass}) ||
+      length($feature->{gclass}) == 0 ||
+	$feature->{gclass} eq $class;
+
+    if (defined $feature->{gname}) {
+      my $matches = $class_matches
+	&& ($regexp ? $feature->{gname} =~ /$name/i : lc($feature->{gname}) eq lc($name));
+      $no_match_class_name = !$matches;  # to accomodate Shuly's interesting logic
+    }
+
+    else{
+      $empty_class_name = 1;
+    }
+
+    if ($no_match_class_name){
+      my $feature_attributes = $feature->{attributes};
+      my $attributes = {Alias => $name};
+      if (!$self->_matching_attributes($feature_attributes,$attributes)){
+	next;
+      }
+    }
+
+    push @{$refs{$feature->{ref}}},$feature;
+  }
+
+  # find out how many reference points we recovered
+  if (! %refs) {
+    $self->error("$name not found in database");
+    return;
+  }
+
+  # compute min and max
+  my ($ref) = keys %refs;
+  my @found = @{$refs{$ref}};
+  my ($strand,$start,$stop);
+
+  my @found_segments;
+  foreach my $ref (keys %refs) {
+    next if defined($refseq) and lc($ref) ne lc($refseq);
+    my @found = @{$refs{$ref}};
+    my ($strand,$start,$stop,$name);
+    foreach (@found) {
+      $strand ||= $_->{strand};
+      $strand = '+' if $strand && $strand eq '.'; 
+      $start  = $_->{start} if !defined($start) || $start > $_->{start};
+      $stop   = $_->{stop}  if !defined($stop)  || $stop  < $_->{stop};
+      $name ||= $_->{gname};
+    }
+    push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
+
+  }
+  
+  return \@found_segments;
+}
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @results;
+  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
+  my $search = join '|', at words;
+
+  for my $feature (@{$self->{data}}) {
+    next unless defined $feature->{gclass} && defined $feature->{gname}; # ignore NULL objects
+    next unless $feature->{attributes};
+    my @attributes = @{$feature->{attributes}};
+    my @values     = map {$_->[1]} @attributes;
+    my $value      = "@values";
+    my $matches    = 0;
+    for my $w (@words) {
+      my @hits = $value =~ /($w)/ig;
+      $matches += @hits;
+    }
+    next unless $matches;
+
+    my $relevance = 10 * $matches;
+    my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
+    my $note;
+    $note   = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'}                @{$feature->{attributes}};
+    $note  .= join ' ',grep /$search/,map {$_->[1]} grep {$_->[0] ne 'Note'} @{$feature->{attributes}};
+    push @results,[$featname,$note,$relevance];
+    last if defined $limit && @results >= $limit;
+  }
+   
+  #added result filtering so that this method returns the expected results
+  #this section of code used to be in GBrowse's do_keyword_search method
+
+  my $match_sub = 'sub {';
+  foreach (split /\s+/,$search_string) {
+    $match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
+  }
+  $match_sub .= "};";
+  my $match = eval $match_sub;
+
+  my @matches = grep { $match->($_->[1]) } @results;
+
+  return @matches;
+}
+
+sub _delete_features {
+  my $self        = shift;
+  my @feature_ids = sort {$b<=>$a} @_;
+  my $removed = 0;
+  foreach (@feature_ids) {
+    next unless $_ >= 0 && $_ < @{$self->{data}};
+    $removed += defined splice(@{$self->{data}},$_,1);
+  }
+  $removed;
+}
+
+sub _delete {
+  my $self = shift;
+  my $delete_spec = shift;
+  my $ranges      = $delete_spec->{segments} || [];
+  my $types       = $delete_spec->{types}    || [];
+  my $force       = $delete_spec->{force};
+  my $range_type  = $delete_spec->{range_type};
+
+  my $deleted = 0;
+  if (@$ranges) {
+    my @args = @$types ? (-type=>$types) : ();
+    push @args,(-range_type => $range_type);
+    my %ids_to_remove = map {$_->id => 1} map {$_->features(@args)} @$ranges;
+    $deleted = $self->delete_features(keys %ids_to_remove);
+  } elsif (@$types) {
+    my %ids_to_remove = map {$_->id => 1} $self->features(-type=>$types);
+    $deleted = $self->delete_features(keys %ids_to_remove);
+  } else {
+    $self->throw("This operation would delete all feature data and -force not specified")
+      unless $force;
+    $deleted = @{$self->{data}};
+    @{$self->{data}} = ();
+  }
+  $deleted;
+}
+
+# attributes -
+
+# Some GFF version 2 files use the groups column to store a series of
+# attribute/value pairs.  In this interpretation of GFF, the first such
+# pair is treated as the primary group for the feature; subsequent pairs
+# are treated as attributes.  Two attributes have special meaning:
+# "Note" is for backward compatibility and is used for unstructured text
+# remarks.  "Alias" is considered as a synonym for the feature name.
+# If no name is provided, then attributes() returns a flattened hash, of
+# attribute=>value pairs.
+
+sub do_attributes{
+  my $self = shift;
+  my ($feature_id,$tag) = @_;
+  my $attr ;
+
+  #my $feature = ${$self->{data}}[$feature_id];
+  my $feature = $self->_basic_features_by_id($feature_id);
+
+  my @result;
+  for my $attr (@{$feature->{attributes}}) {
+    my ($attr_name,$attr_value) = @$attr ;
+    if (defined($tag) && lc($attr_name) eq lc($tag)){push @result,$attr_value;}
+    elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
+  }
+  return @result;
+}
+
+
+#sub get_feature_by_attribute{
+sub _feature_by_attribute{
+  my $self = shift;
+  my ($attributes,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+  my $count = 0;
+  my $feature_id = -1;
+  my $feature_group_id = undef;
+
+  for my $feature (@{$self->{data}}) {
+
+    $feature_id++;
+    for my $attr (@{$feature->{attributes}}) {
+      my ($attr_name,$attr_value) = @$attr ;
+      #there could be more than one set of attributes......
+      foreach (keys %$attributes) {
+	if (lc($_) eq lc($attr_name) && lc($attributes->{$_}) eq lc($attr_value)) {
+	  $callback->($self->_hash_to_array($feature));
+	  $count++;
+	}
+      }
+    }
+  }
+
+}
+
+
+# This is the low-level method that is called to retrieve GFF lines from
+# the database.  It is responsible for retrieving features that satisfy
+# range and feature type criteria, and passing the GFF fields to a
+# callback subroutine.
+
+sub get_features{
+  my $self = shift;
+  my $count = 0;
+  my ($search,$options,$callback) = @_;
+
+  my $found_features;
+
+  $found_features = $self->_get_features_by_search_options($search,$options);
+
+  # only true if the sort by group option was specified
+  @{$found_features} = sort {lc("$a->{gclass}:$a->{gname}") cmp lc("$b->{gclass}:$b->{gname}")}
+    @{$found_features} if $options->{sort_by_group} ;
+
+  for my $feature (@{$found_features}) {  # only true if the sort by group option was specified
+    $count++;
+    $callback->(
+		$self->_hash_to_array($feature)
+	       );
+  }
+
+  return $count;
+}
+
+
+# Low level implementation of fetching a named feature.
+# GFF annotations are named using the group class and name fields.
+# May return zero, one, or several Bio::DB::GFF::Feature objects.
+
+=head2 _feature_by_name
+
+ Title   : _feature_by_name
+ Usage   : $db->get_features_by_name($name,$class,$callback)
+ Function: get a list of features by name and class
+ Returns : count of number of features retrieved
+ Args    : name of feature, class of feature, and a callback
+ Status  : protected
+
+This method is used internally.  The callback arguments are those used
+by make_feature().
+
+=cut
+
+sub _feature_by_name {
+  my $self = shift;
+  my ($class,$name,$location,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+  my $count = 0;
+  my $regexp;
+
+  if ($name =~ /[*?]/) {  # uh oh regexp time
+    $name = quotemeta($name);
+    $name =~ s/\\\*/.*/g;
+    $name =~ s/\\\?/.?/g;
+    $regexp++;
+  }
+
+  for my $feature (@{$self->{data}}) {
+    next unless ($regexp && $feature->{gname} =~ /$name/i) || lc($feature->{gname})  eq lc($name);
+    next if defined($feature->{gclass}) && length($feature->{gclass}) > 0 && $feature->{gclass} ne $class;
+
+    if ($location) {
+      next if $location->[0] ne $feature->{ref};
+      next if $location->[1] && $location->[1] > $feature->{stop};
+      next if $location->[2] && $location->[2] < $feature->{start};
+    }
+    $count++;
+    $callback->($self->_hash_to_array($feature),0);
+  }
+  return $count;
+}
+
+# Low level implementation of fetching a feature by it's id. 
+# The id of the feature as implemented in the in-memory db, is the location of the 
+# feature in the features hash array.
+sub _feature_by_id{
+  my $self = shift;
+  my ($ids,$type,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $feature_group_id = undef;
+
+  my $count = 0;
+  if ($type eq 'feature'){
+    for my $feature_id (@$ids){
+      my $feature = $self->_basic_features_by_id($feature_id);
+      $callback->($self->_hash_to_array($feature)) if $callback;
+      $count++;
+    }
+  }
+}
+
+sub _basic_features_by_id{
+  my $self = shift;
+  my ($ids) = @_;
+  
+  $ids = [$ids] unless ref $ids =~ /ARRAY/;
+
+  my @result;
+  for my $feature_id (@$ids){
+	  push @result, ${$self->{data}}[$feature_id];
+  }
+  return wantarray() ? @result : $result[0];
+}
+
+# This method is similar to get_features(), except that it returns an
+# iterator across the query.
+# See Bio::DB::GFF::Adaptor::memory::iterator.
+
+sub get_features_iterator {
+  my $self = shift;
+  my ($search,$options,$callback) = @_;
+  $callback || $self->throw('must provide a callback argument');
+
+  my $results = $self->_get_features_by_search_options($search,$options);
+  my $results_array = $self->_convert_feature_hash_to_array($results);
+
+  return Bio::DB::GFF::Adaptor::memory::iterator->new($results_array,$callback);
+}
+
+
+# This method is responsible for fetching the list of feature type names.
+# The query may be limited to a particular range, in
+# which case the range is indicated by a landmark sequence name and
+# class and its subrange, if any.  These arguments may be undef if it is
+# desired to retrieve all feature types.
+
+# If the count flag is false, the method returns a simple list of
+# Bio::DB::GFF::Typename objects.  If $count is true, the method returns
+# a list of $name=>$count pairs, where $count indicates the number of
+# times this feature occurs in the range.
+
+sub get_types {
+  my $self = shift;
+  my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
+	  
+  my(%result,%obj);
+
+  for my $feature (@{$self->{data}}) {
+    my $feature_start = $feature->{start};
+    my $feature_stop  = $feature->{stop};
+    my $feature_ref   = $feature->{ref};
+    my $feature_class = $feature->{class};
+    my $feature_method = $feature->{method};
+    my $feature_source = $feature->{source};
+
+    if (defined $srcseq){
+      next unless lc($feature_ref) eq lc($srcseq);
+    }
+
+    if (defined $class){ 
+      next unless defined $feature_class && $feature_class eq $class ;
+    }
+
+     # the requested range should OVERLAP the retrieved features
+     if (defined $start or defined $stop) {
+      $start = 1           unless defined $start;
+      $stop  = MAX_SEGMENT unless defined $stop;
+      next unless $feature_stop >= $start && $feature_start <= $stop;
+    }
+
+    if (defined $typelist && @$typelist){
+      next unless $self->_matching_typelist($feature_method,$feature_source,$typelist);
+    }
+
+    my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
+    $result{$type}++;
+    $obj{$type} = $type;
+
+  }   #end features loop
+
+  return $want_count ? %result : values %obj;
+}
+
+sub classes {
+  my $self = shift;
+  my %classes;
+  for my $feature (@{$self->{data}}) {
+    $classes{$feature->{gclass}}++;
+  }
+  my @classes = sort keys %classes;
+  return @classes;
+}
+
+# Internal method that performs a search on the features array, 
+# sequentialy retrieves the features, and performs a check on each feature
+# according to the search options.
+sub _get_features_by_search_options{
+  my $count = 0;
+  my ($self, $search,$options) = @_;
+  my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) = 
+    (@{$search}{qw(rangetype refseq refclass start stop types)},
+     @{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
+
+  my @found_features;
+  my $data = $self->{data};
+
+  my $feature_id = -1 ;
+  my $feature_group_id = undef;
+
+  for my $feature (@{$data}) {
+
+    $feature_id++;
+
+    my $feature_start = $feature->{start};
+    my $feature_stop  = $feature->{stop};
+    my $feature_ref   = $feature->{ref};
+
+    if (defined $refseq){
+      next unless lc($feature_ref) eq lc($refseq);
+    }
+
+     if (defined $start or defined $stop) {
+      $start = 0               unless defined($start);
+      $stop  = MAX_SEGMENT     unless defined($stop);
+
+      if ($rangetype eq 'overlaps') {
+	next unless $feature_stop >= $start && $feature_start <= $stop;
+      } elsif ($rangetype eq 'contains') {
+	next unless $feature_start >= $start && $feature_stop <= $stop;
+      } elsif ($rangetype eq 'contained_in') {
+	next unless $feature_start <= $start && $feature_stop >= $stop;
+      } else {
+	next unless $feature_start == $start && $feature_stop == $stop;
+      }
+
+    }
+
+    my $feature_source = $feature->{source};
+    my $feature_method = $feature->{method};
+
+    if (defined $types && @$types){
+      next unless $self->_matching_typelist($feature_method,$feature_source,$types);
+    }
+
+    my $feature_attributes = $feature->{attributes};
+    if (defined $attributes){
+      next unless $self->_matching_attributes($feature_attributes,$attributes);
+    }
+
+    # if we get here, then we have a feature that meets the criteria.
+    # Then we just push onto an array
+    # of found features and continue. 
+
+    my $found_feature = $feature ;
+    $found_feature->{feature_id} = $feature_id;
+    $found_feature->{group_id} = $feature_group_id;
+    push @found_features,$found_feature;
+  }
+
+  return \@found_features; 
+}
+
+
+sub _hash_to_array {
+  my ($self,$feature_hash) = @_;
+  my @array = @{$feature_hash}{@hash2array_map};
+  return wantarray ? @array : \@array;
+}
+
+# this subroutine is needed for convertion of the feature from hash to array in order to 
+# pass it to the callback subroutine
+sub _convert_feature_hash_to_array{
+  my ($self, $feature_hash_array) = @_;
+  my @features_array_array = map {scalar $self->_hash_to_array($_)} @$feature_hash_array;
+  return \@features_array_array;
+}
+
+sub _matching_typelist{ 
+  my ($self, $feature_method,$feature_source,$typelist) = @_; 
+  foreach (@$typelist) {
+	 my ($search_method,$search_source) = @$_;
+	 next if lc($search_method) ne lc($feature_method);
+	 next if defined($search_source) && lc($search_source) ne lc($feature_source);
+	 return 1;
+  }
+  return 0;
+}
+
+sub _matching_attributes {
+  my ($self, $feature_attributes,$attributes) = @_ ;
+  foreach (keys %$attributes) {
+    return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
+  }
+  return 1;
+}
+
+sub _match_all_attr_in_feature{
+  my ($attr_name,$attr_value,$feature_attributes) = @_;
+  for my $attr (@$feature_attributes) {
+      my ($feature_attr_name,$feature_attr_value) = @$attr ;
+      next if ($attr_name ne $feature_attr_name || $attr_value ne $feature_attr_value);
+      return 1;
+  }
+  return 0;
+}
+
+
+sub do_initialize { 1; }
+sub get_feature_by_group_id{ 1; }
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/alignment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/alignment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/alignment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,138 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::alignment -- Alignment aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['alignment'],
+				 );
+
+ -----------------------------
+ Aggregator method: alignment
+ Main method:       (none)
+ Sub methods:       nucleotide_match,EST_match,cDNA_match,expressed_sequence_match,
+                    translated_nucleotide_match,protein_match,HSP
+ -----------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::alignment is one of the default aggregators,
+and was written to be compatible with the C elegans GFF files.  It
+aggregates raw "similarity" features into composite features of type
+"alignment".  A better name for this class might be
+"gapped_alignment."
+
+This aggregator does not insist that there be a single top-level
+feature that spans one end of the alignment to the other.  As a
+result, it can produce truncated alignments if the entire alignment is
+not contained within the segment of interest.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::alignment;
+
+use strict;
+
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 aggregate
+
+ Title   : aggregate
+ Usage   : $features = $a->aggregate($features,$factory)
+ Function: aggregate a feature list into composite features
+ Returns : an array reference containing modified features
+ Args    : see L<Bio::DB::GFF::Aggregator>
+ Status  : Public
+
+Because of the large number of similarity features, the aggregate()
+method is overridden in order to perform some optimizations.
+
+=cut
+
+# we look for features of type Sequence and add them to a pseudotype transcript
+sub aggregate {
+  my $self = shift;
+  my $features = shift;
+  my $factory  = shift;
+
+  my $matchsub = $self->match_sub($factory) or return;
+  my $passthru = $self->passthru_sub($factory);
+  my $method   = $self->get_method;
+
+  my (%alignments,%targets, at result);
+
+  warn "running alignment aggregator" if $factory->debug;
+  for my $feature (@$features) {
+
+    if ($matchsub->($feature)) {
+
+      my $group  = $feature->{group};
+      my $source = $feature->source;
+      unless (exists $alignments{$group,$source}) {
+	my $type = Bio::DB::GFF::Typename->new($method,$source);
+	
+	my $f = $feature->clone;
+	# this is a violation of OO encapsulation, but need to do it this way
+	# to achieve desired performance
+	@{$f}{qw(type score phase)} = ($type,undef,undef);
+
+	$alignments{$group,$source} = $f or next;
+      }
+
+      my $main = $alignments{$group,$source};
+      $main->add_subfeature($feature);
+      push @result,$feature if $passthru && $passthru->($feature);
+    } else {
+      push @result,$feature;
+    }
+  }
+
+  warn "running aligner adjuster" if $factory->debug;
+  for my $alignment (values %alignments) {
+    $alignment->adjust_bounds;
+    $alignment->compound(1);
+    push @result,$alignment;
+  }
+  warn "aligner done" if $factory->debug;
+  @$features = @result;
+}
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "alignment"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'alignment' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the full list of aggregated methods
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  my $self = shift;
+ return qw(nucleotide_match EST_match cDNA_match
+	   expressed_sequence_match
+	   translated_nucleotide_match
+	   protein_match HSP);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/clone.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/clone.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/clone.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,160 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::clone -- Clone aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ ----------------------------------------------------------------------------
+ Aggregator method: clone
+ Main method:       -none-
+ Sub methods:       Clone_left_end Clone_right_end region:Genomic_canonical
+ ----------------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::clone is one of the default aggregators, and
+was written to be compatible with the C elegans GFF files.  It
+aggregates raw "Clone_left_end", "Clone_right_end", and
+"region:Genomic_canonical" features into composite features of type
+"clone".
+
+=cut
+
+package Bio::DB::GFF::Aggregator::clone;
+
+use strict;
+
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 aggregate
+
+ Title   : aggregate
+ Usage   : $features = $a->aggregate($features,$factory)
+ Function: aggregate a feature list into composite features
+ Returns : an array reference containing modified features
+ Args    : see L<Bio::DB::GFF::Aggregator>
+ Status  : Public
+
+The WormBase GFF model is unusual in that clones aren't identified as
+a single feature with start and stop positions, but as two features, a
+"left end" and a "right end".  One or both of these features may be
+absent.  In order to accomodate this, the aggregator will return undef
+for the start and/or stop if one or both of the ends are missing.
+
+=cut
+
+#'
+
+# we look for features of type Sequence and add them to a pseudotype transcript
+sub aggregate {
+  my $self = shift;
+  my $features = shift;
+  my $factory  = shift;
+
+  my $matchsub    = $self->match_sub($factory) or return;
+  my $passthru    = $self->passthru_sub($factory);
+  my $method      = $self->get_method;
+
+  my (%clones,%types, at result);
+  for my $feature (@$features) {
+
+    if ($feature->group && $matchsub->($feature)) {
+
+      if ($feature->method =~ /^region|Sequence$/ && $feature->source eq 'Genomic_canonical') {
+	$clones{$feature->group}{canonical} = $feature;
+      } elsif ($feature->method eq 'Clone_left_end') {
+	$clones{$feature->group}{left} = $feature;
+      } elsif ($feature->method eq 'Clone_right_end') {
+	$clones{$feature->group}{right} = $feature;
+      }
+      push @result,$feature if $passthru && $passthru->($feature);
+    } else {
+      push @result,$feature;
+    }
+  }
+
+  for my $clone (keys %clones) {
+    my $canonical = $clones{$clone}{canonical} or next;
+
+    # the genomic_canonical doesn't tell us where the clone starts and stops
+    # so don't assume it
+    my $duplicate = $canonical->clone;   # make a duplicate of the feature
+    # munge the method and source fields
+    my $source    = $duplicate->source;
+    my $type = $types{$method,$source} ||= Bio::DB::GFF::Typename->new($method,$source);
+    $duplicate->type($type);
+
+    my ($start,$stop) = $duplicate->strand > 0 ? ('start','stop') : ('stop','start');
+    @{$duplicate}{$start,$stop} =(undef,undef);
+
+    $duplicate->{$start} = $clones{$clone}{left}{$start}  if exists $clones{$clone}{left};
+    $duplicate->{$stop}  = $clones{$clone}{right}{$stop}  if exists $clones{$clone}{right};
+    $duplicate->method($self->method);
+    push @result,$duplicate;
+  }
+
+  @$features = @result;
+}
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "clone"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'clone' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list ("Clone_left_end", "Clone_right_end", "region:Genomic_canonical")
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  my $self = shift;
+  return qw(Clone_left_end Clone_right_end region:Genomic_canonical Sequence:Genomic_canonical);
+}
+
+1;
+
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/coding.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/coding.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/coding.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,102 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::coding -- The Coding Region Aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['coding'],
+				 );
+
+ ------------------------------------------------------------------------
+ Aggregator method: coding
+ Main method:       mRNA
+ Sub methods:       CDS
+ ------------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::coding aggregates "CDS" features into a
+feature called "coding" and was written to be compatible with the
+Sequence Ontology canonical gene.  The CDS features are expected to
+belong to a parent of type "mRNA," but the aggregator will work even
+if this isn't the case.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::coding;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "coding"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'coding' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list (CDS cds)
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return qw(CDS cds);
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "mRNA"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'mRNA';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/match.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/match.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/match.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::match -- Match aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['match'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: match
+ Main method:       match
+ Sub methods:       similarity HSP
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+This aggregator is used for Sequence Ontology-compatible gapped
+alignments, in which there is a single top-level alignment called
+"match" and a series of subalignments called either "similarity" or
+"HSP".
+
+Also see the "alignment" aggregator.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::match;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "match"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'match' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list "similarity", "HSP"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return qw(similarity HSP);
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "match"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'match';
+}
+
+sub require_whole_object {1}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/none.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/none.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/none.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,43 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::none -- No aggregation
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => 'none'
+				 );
+
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::none can be used to indicate that you do not
+want any aggregation performed.  It is equivalent to providing undef
+to the B<-aggregator> argument.  It overrides disaggregate() and
+aggregate() so that they do exactly nothing.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::none;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+sub disaggregate {
+  my $self  = shift;
+  my $types = shift;
+  # no change
+}
+
+sub aggregate {
+  my $self = shift;
+  my $features = shift;
+  return;  # no change
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/processed_transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/processed_transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/processed_transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::processed_transcript -- Sequence Ontology Transcript
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['processed_transcript'],
+				 );
+
+ ------------------------------------------------------------------------
+ Aggregator method: processed_transcript
+ Main method:       mRNA
+ Sub methods:       CDS exon five_prime_UTR three_prime_UTR transcription_start_site polyA_site 5'-UTR 3'-UTR
+ ------------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::processed_transcript is one of the default
+aggregators, and was written to be compatible with the Sequence
+Ontology canonical gene.  It aggregates raw "exon," "CDS",
+"five_prime_UTR", "three_prime_UTR", "transcription_start_site" and
+"polyA_site" features into "mRNA" features.  The UTRs may also be
+named "untranslated_region," "five_prime_untranslated_region,"
+"three_prime_untranslated_region,", "5'-UTR," and other synonyms.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::processed_transcript;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "processed_transcript"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'processed_transcript' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list CDS 5'-UTR 3'-UTR transcription_start_site polyA_site
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return qw(CDS 5'-UTR 3'-UTR transcription_start_site
+	    polyA_site UTR five_prime_untranslated_region
+	    three_prime_untranslated_region
+	   five_prime_UTR three_prime_UTR exon);
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "mRNA"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'mRNA';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/so_transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/so_transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/so_transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,110 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::so_transcript -- Sequence Ontology Transcript
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['so_transcript'],
+				 );
+
+ ------------------------------------------------------------------------
+ Aggregator method: processed_transcript
+ Main method:       mRNA
+ Sub methods:       CDS exon five_prime_UTR three_prime_UTR transcription_start_site polyA_site 5'-UTR 3'-UTR
+ ------------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::so_transcript is identical to the
+processed_transcript aggregator, which was designed to be compatible
+with the Sequence Ontology canonical gene.  It aggregates raw "exon,"
+"CDS", "five_prime_UTR", "three_prime_UTR", "transcription_start_site"
+and "polyA_site" features into "mRNA" features.  The UTRs may also be
+named "untranslated_region," "five_prime_untranslated_region,"
+"three_prime_untranslated_region,", "5'-UTR," and other synonyms.
+
+The processed_transcript aggregator is loaded by default, so this is
+only needed for backward compatibility.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::so_transcript;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "processed_transcript"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'so_transcript' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list CDS 5'-UTR 3'-UTR transcription_start_site polyA_site
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return qw(CDS 5'-UTR 3'-UTR transcription_start_site
+	    polyA_site UTR five_prime_untranslated_region
+	    three_prime_untranslated_region
+	   five_prime_UTR three_prime_UTR exon);
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "mRNA"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'mRNA';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,114 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::transcript -- Transcript aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: transcript
+ Main method:       transcript
+ Sub methods:       exon CDS 5'UTR 3'UTR TSS PolyA
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::transcript is one of the default
+aggregators, and was written to be compatible with the C elegans GFF
+files.  It aggregates raw ""exon", "CDS", "5'UTR", "3'UTR", "polyA"
+and "TSS" features into "transcript" features.  For compatibility with
+the idiosyncrasies of the Sanger GFF format, it expects that the full
+range of the transcript is contained in a main feature of type
+"Transcript" (notice the capital "T").
+
+Internally this module is very simple.  To override it with one that
+recognizes a main feature named "gene", simply follow this
+template:
+
+ my $db = Bio::DB::GFF->new(...etc...)
+ my $aggregator = Bio::DB::GFF::Aggregator->new(-method => 'transcript',
+ 					        -main_method => 'gene',
+					        -sub_parts => ['exon','CDS']);
+ $db->add_aggregator($aggregator);
+
+=cut
+
+package Bio::DB::GFF::Aggregator::transcript;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "transcript"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'transcript' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list "intron", "exon" and "CDS"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return qw(exon CDS 5'UTR 3'UTR TSS PolyA);
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_acembly.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_acembly.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_acembly.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_acembly -- UCSC acembly aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: transcript
+ Main method:       transcript
+ Sub methods:       exon CDS 5'UTR 3'UTR TSS PolyA
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_acembly;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "acembly"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'acembly' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:acembly"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:acembly';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_ensgene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_ensgene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_ensgene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_ensgene -- UCSC ensGene aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: ensgene
+ Main method:       transcript
+ Sub methods:       ensGene
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_ensgene;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "ensgene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'ensgene' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:ensGene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:ensGene';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_genscan.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_genscan.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_genscan.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_genscan -- UCSC genscan aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: genscan
+ Main method:       transcript
+ Sub methods:       genscan
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_genscan;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "genscan"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'genscan' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:genscan"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:genscan';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_refgene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_refgene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_refgene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_refgene -- UCSC refGene aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: refgene
+ Main method:       transcript
+ Sub methods:       refGene
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_refgene;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "refgene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'refgene' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:refGene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:refGene';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_sanger22 -- UCSC sanger22 aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: sanger22
+ Main method:       transcript
+ Sub methods:       sanger22
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_sanger22;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "sanger22"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'sanger22' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:sanger22"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:sanger22';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_sanger22pseudo -- UCSC sanger22pseudo aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: sanger22pseudo
+ Main method:       transcript
+ Sub methods:       sanger22pseudo
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_sanger22pseudo;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "sanger22pseudo"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'sanger22pseudo' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:sanger22pseudo"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:sanger22pseudo';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_softberry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_softberry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_softberry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_softberry -- UCSC softberry aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: softberry
+ Main method:       transcript
+ Sub methods:       softberryGene
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_softberry;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "softberry"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'softberry' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:softberryGene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:softberryGene';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_twinscan.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_twinscan.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_twinscan.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_twinscan -- UCSC twinscan aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: twinscan
+ Main method:       transcript
+ Sub methods:       twinscan
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_twinscan;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "twinscan"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'twinscan' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript:twinscan"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+  return 'transcript:twinscan';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_unigene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_unigene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator/ucsc_unigene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::ucsc_unigene -- UCSC UniGene aggregator
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42',
+				   -aggregator => ['transcript','clone'],
+				 );
+
+ -------------------------------------------------
+ Aggregator method: unigene
+ Main method:       transcript
+ Sub methods:       unigene_2
+ -------------------------------------------------
+
+=head1 DESCRIPTION
+
+L<Bio::DB::GFF::Aggregator::transcript>
+
+=cut
+
+package Bio::DB::GFF::Aggregator::ucsc_unigene;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "unigene"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub method { 'unigene' }
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : empty list
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub part_names {
+  return ();
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "transcript"
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub main_name {
+#transcript
+  return 'transcript:uniGene_2';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<Bio::DB::GFF::Aggregator>
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>.
+
+Copyright (c) 2002 Allen Day, University of California, Los Angeles.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Aggregator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,626 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator -- Aggregate GFF groups into composite features
+
+=head1 SYNOPSIS
+
+ use Bio::DB::GFF;
+
+ my $agg1 = Bio::DB::GFF::Aggregator->new(-method       => 'cistron',
+                                          -main_method  => 'locus',
+                                          -sub_parts    => ['allele','variant']
+                                         );
+
+ my $agg2 = Bio::DB::GFF::Aggregator->new(-method       => 'splice_group',
+                                          -sub_parts    => 'transcript');
+
+ my $db      = Bio::DB::GFF->new( -adaptor    => 'dbi:mysql',
+			          -aggregator => [$agg1,$agg2],
+                                  -dsn        => 'dbi:mysql:elegans42',
+				 );
+
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator is used to aggregate GFF groups into
+composite features.  Each composite feature has a "main part", the
+top-level feature, and a series of zero or more subparts, retrieved
+with the sub_SeqFeature() method.  The aggregator class is designed to
+be subclassable, allowing a variety of GFF feature types to be
+supported.
+
+The base Bio::DB::GFF::Aggregator class is generic, and can be used to
+create specific instances to be passed to the -aggregator argument of
+Bio::DB::GFF-E<gt>new() call.  The various subclasses of
+Bio::DB::GFF::Aggregator are tuned for specific common feature types
+such as clones, gapped alignments and transcripts.
+
+Instances of Bio::DB::GFF::Aggregator have three attributes:
+
+=over 3
+
+=item method
+
+This is the GFF method field of the composite feature as a whole.  For
+example, "transcript" may be used for a composite feature created by
+aggregating individual intron, exon and UTR features.
+
+=item main method
+
+Sometimes GFF groups are organized hierarchically, with one feature
+logically containing another.  For example, in the C. elegans schema,
+methods of type "Sequence:curated" correspond to regions covered by
+curated genes.  There can be zero or one main methods.
+
+=item subparts
+
+This is a list of one or more methods that correspond to the component
+features of the aggregates.  For example, in the C. elegans database,
+the subparts of transcript are "intron", "exon" and "CDS".
+
+=back
+
+Aggregators have two main methods that can be overridden in
+subclasses:
+
+=over 4
+
+=item disaggregate()
+
+This method is called by the Adaptor object prior to fetching a list
+of features.  The method is passed an associative array containing the
+[method,source] pairs that the user has requested, and it returns a
+list of raw features that it would like the adaptor to fetch.
+
+=item aggregate()
+
+This method is called by the Adaptor object after it has fetched 
+features.  The method is passed a list of raw features and is expected 
+to add its composite features to the list.
+
+=back
+
+The disaggregate() and aggregate() methods provided by the base
+Aggregator class should be sufficient for many applications.  In this
+case, it suffices for subclasses to override the following methods:
+
+=over 4
+
+=item method()
+
+Return the default method for the composite feature as a whole.
+
+=item main_name()
+
+Return the default main method name.
+
+=item part_names()
+
+Return a list of subpart method names.
+
+=back
+
+Provided that method() and part_names() are overridden (and optionally
+main_name() as well), then the bare name of the aggregator subclass
+can be passed to the -aggregator of Bio::DB::GFF-E<gt>new().  For example,
+this is a small subclass that will aggregate features of type "allele"
+and "polymorphism" into an aggregate named "mutant":
+
+  package Bio::DB::GFF::Aggregator::mutant;
+
+  use strict;
+  use Bio::DB::GFF::Aggregator;
+
+  use base qw(Bio::DB::GFF::Aggregator);
+
+  sub method { 'mutant' }
+
+  sub part_names {
+    return qw(allele polymorphism);
+  }
+
+  1;
+
+Once installed, this aggregator can be passed to Bio::DB::GFF-E<gt>new()
+by name like so:
+
+ my $db      = Bio::DB::GFF->new( -adaptor    => 'dbi:mysql',
+			          -aggregator => 'mutant',
+                                  -dsn        => 'dbi:mysql:elegans42',
+				 );
+
+=head1 API
+
+The remainder of this document describes the public and private
+methods implemented by this module.
+
+=cut
+
+package Bio::DB::GFF::Aggregator;
+
+use strict;
+use Bio::DB::GFF::Util::Rearrange;  # for rearrange()
+use Bio::DB::GFF::Feature;
+
+use base qw(Bio::Root::Root);
+
+my $ALWAYS_TRUE   = sub { 1 };
+
+=head2 new
+
+ Title   : new
+ Usage   : $a = Bio::DB::GFF::Aggregator->new(@args)
+ Function: create a new aggregator
+ Returns : a Bio::DB::GFF::Aggregator object
+ Args    : see below
+ Status  : Public
+
+This is the constructor for Bio::DB::GFF::Aggregator.  Named arguments 
+are as follows:
+
+  -method           the method for the composite feature
+
+  -main_method      the top-level raw feature, if any
+
+  -sub_parts        the list of raw features that will form the subparts
+		    of the composite feature (array reference or scalar)
+
+=cut
+
+sub new {
+  my $class = shift;
+  my ($method,$main,$sub_parts,$whole_object) = rearrange(['METHOD',
+							   ['MAIN_PART','MAIN_METHOD'],
+							   ['SUB_METHODS','SUB_PARTS'],
+							   'WHOLE_OBJECT'
+							  ], at _);
+  return bless {
+		method      => $method,
+		main_method => $main,
+		sub_parts   => $sub_parts,
+		require_whole_object => $whole_object,
+	       },$class;
+}
+
+=head2 disaggregate
+
+ Title   : disaggregate
+ Usage   : $a->disaggregate($types,$factory)
+ Function: disaggregate type list into components
+ Returns : a true value if this aggregator should be called to reaggregate
+ Args    : see below
+ Status  : Public
+
+This method is called to disaggregate a list of types into the set of
+low-level features to be retrieved from the GFF database.  The list of
+types is passed as an array reference containing a series of
+[method,source] pairs.  This method synthesizes a new set of
+[method,source] pairs, and appends them to the list of requested
+types, changing the list in situ.
+
+Arguments:
+
+  $types           reference to an array of [method,source] pairs
+
+  $factory         reference to the Adaptor object that is calling
+		   this method
+
+Note that the API allows disaggregate() to remove types from the type
+list.  This feature is probably not desirable and may be deprecated in 
+the future.
+
+=cut
+
+# this is called at the beginning to turn the pseudo-type 
+# into its component feature types
+sub disaggregate {
+  my $self  = shift;
+  my $types = shift;
+  my $factory = shift;
+
+  my $sub_features = $factory->parse_types($self->get_part_names);
+  my $main_feature = $factory->parse_types($self->get_main_name);
+
+  if (@$types) {
+    my (@synthetic_types, at unchanged);
+    foreach (@$types) {
+      my ($method,$source) = @$_;
+      if (lc $method eq lc $self->get_method) { # e.g. "transcript"
+	push @synthetic_types,map { [$_->[0],$_->[1] || $source] } @$sub_features,@$main_feature;
+      }
+      else {
+	push @unchanged,$_;
+      }
+    }
+    # remember what we're searching for
+    $self->components(\@synthetic_types);
+    $self->passthru(\@unchanged);
+    @$types = (@unchanged, at synthetic_types);
+  }
+
+  # we get here when no search types are listed
+  else {
+    my @stypes = map { [$_->[0],$_->[1]] }  @$sub_features,@$main_feature;
+    $self->components(\@stypes);
+    $self->passthru(undef);
+  }
+
+  return $self->component_count > 0;
+}
+
+
+=head2 aggregate
+
+ Title   : aggregate
+ Usage   : $features = $a->aggregate($features,$factory)
+ Function: aggregate a feature list into composite features
+ Returns : an array reference containing modified features
+ Args    : see below
+ Status  : Public
+
+This method is called to aggregate a list of raw GFF features into the
+set of composite features.  The method is called an array reference to
+a set of Bio::DB::GFF::Feature objects.  It runs through the list,
+creating new composite features when appropriate.  The method result
+is an array reference containing the composite features.
+
+Arguments:
+
+  $features        reference to an array of Bio::DB::GFF::Feature objects
+
+  $factory         reference to the Adaptor object that is calling
+		   this method
+
+NOTE: The reason that the function result contains the raw features as
+well as the aggregated ones is to allow queries like this one:
+
+  @features =  $segment->features('exon','transcript:curated');
+
+Assuming that "transcript" is the name of an aggregated feature and
+that "exon" is one of its components, we do not want the transcript
+aggregator to remove features of type "exon" because the user asked
+for them explicitly.
+
+=cut
+
+sub aggregate {
+  my $self = shift;
+  my $features = shift;
+  my $factory  = shift;
+
+  my $main_method = $self->get_main_name;
+  my $matchsub    = $self->match_sub($factory) or return;
+  my $strictmatch = $self->strict_match();
+  my $passthru    = $self->passthru_sub($factory);
+
+  my (%aggregates, at result);
+  for my $feature (@$features) {
+
+    if ($feature->group && $matchsub->($feature)) {
+      my $key = $strictmatch->{lc $feature->method,lc $feature->source} 
+          ? join ($;,$feature->group,$feature->refseq,$feature->source)
+          : join ($;,$feature->group,$feature->refseq);
+      if ($main_method && lc $feature->method eq lc $main_method) {
+	$aggregates{$key}{base} ||= $feature->clone;
+      } else {
+	push @{$aggregates{$key}{subparts}},$feature;
+      }
+      push @result,$feature if $passthru && $passthru->($feature);
+
+    } else {
+      push @result,$feature;
+    }
+  }
+
+  # aggregate components
+  my $pseudo_method        = $self->get_method;
+  my $require_whole_object = $self->require_whole_object;
+  foreach (keys %aggregates) {
+    if ($require_whole_object && $self->components) {
+      next unless $aggregates{$_}{base}; # && $aggregates{$_}{subparts};
+    }
+    my $base = $aggregates{$_}{base};
+    unless ($base) { # no base, so create one
+      my $first = $aggregates{$_}{subparts}[0];
+      $base = $first->clone;     # to inherit parent coordinate system, etc
+      $base->score(undef);
+      $base->phase(undef);
+    }
+    $base->method($pseudo_method);
+    $base->add_subfeature($_) foreach @{$aggregates{$_}{subparts}};
+    $base->adjust_bounds;
+    $base->compound(1);  # set the compound flag
+    push @result,$base;
+  }
+  @$features = @result;
+}
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $string = $a->method
+ Function: get the method type for the composite feature
+ Returns : a string
+ Args    : none
+ Status  : Protected
+
+This method is called to get the method to be assigned to the
+composite feature once it is aggregated.  It is called if the user did
+not explicitly supply a -method argument when the aggregator was
+created.
+
+This is the method that should be overridden in aggregator subclasses.
+
+=cut
+
+# default method - override in subclasses
+sub method {
+  my $self = shift;
+  $self->{method};
+}
+
+=head2 main_name
+
+ Title   : main_name
+ Usage   : $string = $a->main_name
+ Function: get the method type for the "main" component of the feature
+ Returns : a string
+ Args    : none
+ Status  : Protected
+
+This method is called to get the method of the "main component" of the
+composite feature.  It is called if the user did not explicitly supply
+a -main-method argument when the aggregator was created.
+
+This is the method that should be overridden in aggregator subclasses.
+
+=cut
+
+# no default main method
+sub main_name {
+  my $self = shift;
+  return;
+}
+
+=head2 part_names
+
+ Title   : part_names
+ Usage   : @methods = $a->part_names
+ Function: get the methods for the non-main various components of the feature
+ Returns : a list of strings
+ Args    : none
+ Status  : Protected
+
+This method is called to get the list of methods of the "main component" of the
+composite feature.  It is called if the user did not explicitly supply
+a -main-method argument when the aggregator was created.
+
+This is the method that should be overridden in aggregator subclasses.
+
+=cut
+
+# no default part names
+sub part_names {
+  my $self = shift;
+  return;
+}
+
+=head2 require_whole_object
+
+ Title   : require_whole_object
+ Usage   : $bool = $a->require_whole_object
+ Function: see below
+ Returns : a boolean flag
+ Args    : none
+ Status  : Internal
+
+This method returns true if the aggregator should refuse to aggregate
+an object unless both its main part and its subparts are present.
+
+=cut
+
+sub require_whole_object {
+  my $self = shift;
+  my $d    = $self->{require_whole_object};
+  $self->{require_whole_object} = shift if @_;
+  $d;
+}
+
+=head2 match_sub
+
+ Title   : match_sub
+ Usage   : $coderef = $a->match_sub($factory)
+ Function: generate a code reference that will match desired features
+ Returns : a code reference
+ Args    : see below
+ Status  : Internal
+
+This method is used internally to generate a code sub that will
+quickly filter out the raw features that we're interested in
+aggregating.  The returned sub accepts a Feature and returns true if
+we should aggregate it, false otherwise.
+
+=cut
+
+#' make emacs happy
+
+sub match_sub {
+  my $self    = shift;
+  my $factory = shift;
+  my $types_to_aggregate = $self->components() or return;  # saved from disaggregate call
+  return unless @$types_to_aggregate;
+  return $factory->make_match_sub($types_to_aggregate);
+}
+
+=head2 strict_match
+
+ Title   : strict_match
+ Usage   : $strict = $a->strict_match
+ Function: generate a hashref that indicates which subfeatures
+           need to be tested strictly for matching sources before
+           aggregating
+ Returns : a hash ref
+ Status  : Internal
+
+=cut
+
+sub strict_match {
+  my $self = shift;
+  my $types_to_aggregate = $self->components();
+  my %strict;
+  for my $t (@$types_to_aggregate) {
+    $strict{lc $t->[0],lc $t->[1]}++ if defined $t->[1];
+  }
+  \%strict;
+}
+
+sub passthru_sub {
+  my $self    = shift;
+  my $factory = shift;
+  my $passthru = $self->passthru() or return;
+  return unless @$passthru;
+  return $factory->make_match_sub($passthru);
+}
+
+=head2 components
+
+ Title   : components
+ Usage   : @array= $a->components([$components])
+ Function: get/set stored list of parsed raw feature types
+ Returns : an array in list context, an array ref in scalar context
+ Args    : new arrayref of feature types
+ Status  : Internal
+
+This method is used internally to remember the parsed list of raw
+features that we will aggregate.  The need for this subroutine is
+seen when a user requests a composite feature of type
+"clone:cosmid".  This generates a list of components in which the
+source is appended to the method, like "clone_left_end:cosmid" and
+"clone_right_end:cosmid".  components() stores this information for
+later use.
+
+=cut
+
+sub components {
+  my $self = shift;
+  my $d = $self->{components};
+  $self->{components} = shift if @_;
+  return unless ref $d;
+  return wantarray ? @$d : $d;
+}
+
+sub component_count {
+  my @c = shift->components;
+  scalar @c;
+}
+
+sub passthru {
+  my $self = shift;
+  my $d = $self->{passthru};
+  $self->{passthru} = shift if @_;
+  return unless ref $d;
+  return wantarray ? @$d : $d;
+}
+
+sub clone {
+  my $self = shift;
+  my %new = %{$self};
+  return bless \%new,ref($self);
+}
+
+=head2 get_part_names
+
+ Title   : get_part_names
+ Usage   : @array = $a->get_part_names
+ Function: get list of sub-parts for this type of feature
+ Returns : an array
+ Args    : none
+ Status  : Internal
+
+This method is used internally to fetch the list of feature types that
+form the components of the composite feature.  Type names in the
+format "method:source" are recognized, as are "method" and
+Bio::DB::GFF::Typename objects as well.  It checks instance variables
+first, and if not defined calls the part_names() method.
+
+=cut
+
+sub get_part_names {
+  my $self = shift;
+  if ($self->{sub_parts}) {
+    return ref $self->{sub_parts} ? @{$self->{sub_parts}} : $self->{sub_parts};
+  } else {
+    return $self->part_names;
+  }
+}
+
+=head2 get_main_name
+
+ Title   : get_main_name
+ Usage   : $string = $a->get_main_name
+ Function: get the "main" method type for this feature
+ Returns : a string
+ Args    : none
+ Status  : Internal
+
+This method is used internally to fetch the type of the "main part" of
+the feature.  It checks instance variables first, and if not defined
+calls the main_name() method.
+
+=cut
+
+sub get_main_name {
+  my $self = shift;
+  return $self->{main_method} if defined $self->{main_method};
+  return $self->main_name;
+}
+
+=head2 get_method
+
+ Title   : get_method
+ Usage   : $string = $a->get_method
+ Function: get the method type for the composite feature
+ Returns : a string
+ Args    : none
+ Status  : Internal
+
+This method is used internally to fetch the type of the method that
+will be assigned to the composite feature once it is synthesized.
+
+=cut
+
+sub get_method {
+  my $self = shift;
+  return $self->{method} if defined $self->{method};
+  return $self->method;
+}
+
+1;
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+L<Bio::DB::GFF::Aggregator::alignment>,
+L<Bio::DB::GFF::Aggregator::clone>,
+L<Bio::DB::GFF::Aggregator::coding>,
+L<Bio::DB::GFF::Aggregator::match>,
+L<Bio::DB::GFF::Aggregator::processed_transcript>,
+L<Bio::DB::GFF::Aggregator::transcript>,
+L<Bio::DB::GFF::Aggregator::none>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Featname.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Featname.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Featname.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,153 @@
+=head1 NAME
+
+Bio::DB::GFF::Featname -- The name of a feature
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+                                   -dsn     => 'dbi:mysql:elegans42');
+
+  my $feature = Bio::DB::GFF::Featname->new(Locus => 'unc-19');
+  my $segment = $db->segment($feature);
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Featname is the name of a feature.  It contains two
+fields: name and class.  It is typically used by the Bio::DB::GFF
+module to denote a group, and is accepted by
+Bio::DB::Relsegment-E<gt>new() and Bio::DB::GFF-E<gt>segment() as a
+replacement for the -name and -class arguments.
+
+=head1 METHODS
+
+=cut
+
+package Bio::DB::GFF::Featname;
+use strict;
+use base qw(Bio::Root::RootI);
+
+use overload 
+  '""' => 'asString',
+  fallback => 1;
+
+=head2 new
+
+ Title   : new
+ Usage   : $name = Bio::DB::GFF::Featname->new($class,$name)
+ Function: create a new Bio::DB::GFF::Featname object
+ Returns : a new Bio::DB::GFF::Featname object
+ Args    : class and ID
+ Status  : Public
+
+=cut
+
+sub new    {
+  # use a blessed array for speed
+  my $pack = shift;
+  bless [@_],$pack;  # class,name
+}
+
+sub _cleanup_methods { return; }
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $name->id
+ Function: return a unique ID for the combination of class and name
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method returns a unique combination of the name and class in the
+form "class:name".  Coincidentally, this is the same format used
+by AceDB.
+
+=cut
+
+sub id     {
+  my $self = shift;
+  return join ':',@$self;
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $name = $name->name
+ Function: return the name of the Featname
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub name   { shift->[1] }
+
+=head2 class
+
+ Title   : class
+ Usage   : $class = $name->class
+ Function: return the name of the Featname
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub class  { shift->[0] }
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $string = $name->asString
+ Function: same as name()
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method is used to overload the "" operator.  It is equivalent to
+calling name().
+
+=cut
+
+sub asString { shift->name }
+
+=head2 clone
+
+ Title   : clone
+ Usage   : $new_clone = $type->clone;
+ Function: clone this object
+ Returns : a new Bio::DB::GFF::Featname object
+ Args    : none
+ Status  : Public
+
+This method creates an exact copy of the object.
+
+=cut
+
+sub clone {
+  my $self = shift;
+  return bless [@$self],ref $self;
+}
+
+=head1 BUGS
+
+This module is still under development.
+
+=head1 SEE ALSO
+
+L<bioperl>, L<Bio::DB::GFF>, L<Bio::DB::RelSegment>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Feature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Feature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Feature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1373 @@
+=head1 NAME
+
+Bio::DB::GFF::Feature -- A relative segment identified by a feature type
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Feature is a stretch of sequence that corresponding to a
+single annotation in a GFF database.  It inherits from
+Bio::DB::GFF::RelSegment, and so has all the support for relative
+addressing of this class and its ancestors.  It also inherits from
+Bio::SeqFeatureI and so has the familiar start(), stop(),
+primary_tag() and location() methods (it implements Bio::LocationI
+too, if needed).
+
+Bio::DB::GFF::Feature adds new methods to retrieve the annotation
+type, group, and other GFF attributes.  Annotation types are
+represented by Bio::DB::GFF::Typename objects, a simple class that has 
+two methods called method() and source().  These correspond to the
+method and source fields of a GFF file.
+
+Annotation groups serve the dual purpose of giving the annotation a
+human-readable name, and providing higher-order groupings of
+subfeatures into features.  The groups returned by this module are
+objects of the Bio::DB::GFF::Featname class.
+
+Bio::DB::GFF::Feature inherits from and implements the abstract
+methods of Bio::SeqFeatureI, allowing it to interoperate with other
+Bioperl modules.
+
+Generally, you will not create or manipulate Bio::DB::GFF::Feature
+objects directly, but use those that are returned by the
+Bio::DB::GFF::RelSegment-E<gt>features() method.
+
+=head2 Important note about start() vs end()
+
+If features are derived from segments that use relative addressing
+(which is the default), then start() will be less than end() if the
+feature is on the opposite strand from the reference sequence.  This
+breaks Bio::SeqI compliance, but is necessary to avoid having the real
+genomic locations designated by start() and end() swap places when
+changing reference points.
+
+To avoid this behavior, call $segment-E<gt>absolute(1) before fetching
+features from it.  This will force everything into absolute
+coordinates.
+
+For example:
+
+ my $segment = $db->segment('CHROMOSOME_I');
+ $segment->absolute(1);
+ my @features = $segment->features('transcript');
+
+=head1 API
+
+The remainder of this document describes the public and private
+methods implemented by this module.
+
+=cut
+
+package Bio::DB::GFF::Feature;
+
+use strict;
+
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::GFF::Featname;
+use Bio::DB::GFF::Typename;
+use Bio::DB::GFF::Homol;
+use Bio::LocationI;
+use Data::Dumper;
+
+use vars qw($AUTOLOAD);
+use base qw(Bio::DB::GFF::RelSegment Bio::SeqFeatureI Bio::Root::Root);
+
+#' 
+
+*segments = *get_SeqFeatures = \&sub_SeqFeature;
+
+my %CONSTANT_TAGS = (method=>1, source=>1, score=>1, phase=>1, notes=>1, id=>1, group=>1);
+
+=head2 new_from_parent
+
+ Title   : new_from_parent
+ Usage   : $f = Bio::DB::GFF::Feature->new_from_parent(@args);
+ Function: create a new feature object
+ Returns : new Bio::DB::GFF::Feature object
+ Args    : see below
+ Status  : Internal
+
+This method is called by Bio::DB::GFF to create a new feature using
+information obtained from the GFF database.  It is one of two similar
+constructors.  This one is called when the feature is generated from a
+RelSegment object, and should inherit the coordinate system of that 
+object.
+
+The 13 arguments are positional (sorry):
+
+  $parent       a Bio::DB::GFF::RelSegment object (or descendent)
+  $start        start of this feature
+  $stop         stop of this feature
+  $method       this feature's GFF method
+  $source       this feature's GFF source
+  $score	       this feature's score
+  $fstrand      this feature's strand (relative to the source
+                      sequence, which has its own strandedness!)
+  $phase        this feature's phase
+  $group        this feature's group (a Bio::DB::GFF::Featname object)
+  $db_id        this feature's internal database ID
+  $group_id     this feature's internal group database ID
+  $tstart       this feature's target start
+  $tstop        this feature's target stop
+
+tstart and tstop are not used for anything at the moment, since the
+information is embedded in the group object.
+
+=cut
+
+# this is called for a feature that is attached to a parent sequence,
+# in which case it inherits its coordinate reference system and strandedness
+sub new_from_parent {
+  my $package   = shift;
+  my ($parent,
+      $start,$stop,
+      $method,$source,$score,
+      $fstrand,$phase,
+      $group,$db_id,$group_id,
+      $tstart,$tstop) = @_;
+
+  ($start,$stop) = ($stop,$start) if defined($fstrand) and $fstrand eq '-';
+  my $class = $group ? $group->class : $parent->class;
+
+  my $self =  bless {
+		     factory   => $parent->{factory},
+		     sourceseq => $parent->{sourceseq},
+		     strand    => $parent->{strand},
+		     ref       => $parent->{ref},
+		     refstart  => $parent->{refstart},
+		     refstrand => $parent->{refstrand},
+		     absolute  => $parent->{absolute},
+		     start     => $start,
+		     stop      => $stop,
+		     type      => Bio::DB::GFF::Typename->new($method,$source),
+		     fstrand   => $fstrand,
+		     score     => $score,
+		     phase     => $phase,
+		     group     => $group,
+		     db_id     => $db_id,
+		     group_id  => $group_id,
+		     class     => $class,
+		    },$package;
+  $self;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $f = Bio::DB::GFF::Feature->new(@args);
+ Function: create a new feature object
+ Returns : new Bio::DB::GFF::Feature object
+ Args    : see below
+ Status  : Internal
+
+This method is called by Bio::DB::GFF to create a new feature using
+information obtained from the GFF database.  It is one of two similar
+constructors.  This one is called when the feature is generated
+without reference to a RelSegment object, and should therefore use its
+default coordinate system (relative to itself).
+
+The 11 arguments are positional:
+
+  $factory      a Bio::DB::GFF adaptor object (or descendent)
+  $srcseq       the source sequence
+  $start        start of this feature
+  $stop         stop of this feature
+  $method       this feature's GFF method
+  $source       this feature's GFF source
+  $score	       this feature's score
+  $fstrand      this feature's strand (relative to the source
+                      sequence, which has its own strandedness!)
+  $phase        this feature's phase
+  $group        this feature's group
+  $db_id        this feature's internal database ID
+
+=cut
+
+# 'This is called when creating a feature from scratch.  It does not have
+# an inherited coordinate system.
+
+sub new {
+  my $package = shift;
+  my ($factory,
+      $srcseq,
+      $start,$stop,
+      $method,$source,
+      $score,$fstrand,$phase,
+      $group,$db_id,$group_id,
+      $tstart,$tstop) = @_;
+
+  my $self = bless { },$package;
+  ($start,$stop) = ($stop,$start) if defined($fstrand) and $fstrand eq '-';
+
+  my $class =  $group ? $group->class : 'Sequence';
+
+  @{$self}{qw(factory sourceseq start stop strand class)} =
+    ($factory,$srcseq,$start,$stop,$fstrand,$class);
+
+  # if the target start and stop are defined, then we use this information to create 
+  # the reference sequence
+  # THIS SHOULD BE BUILT INTO RELSEGMENT
+  if (0 && $tstart ne '' && $tstop ne '') {
+    if ($tstart < $tstop) {
+      @{$self}{qw(ref refstart refstrand)} = ($group,$start - $tstart + 1,'+');
+    } else {
+      @{$self}{'start','stop'} = @{$self}{'stop','start'};
+      @{$self}{qw(ref refstart refstrand)} = ($group,$tstop + $stop - 1,'-');
+    }
+
+  } else {
+    @{$self}{qw(ref refstart refstrand)} = ($srcseq,1,'+');
+  }
+
+  @{$self}{qw(type fstrand score phase group db_id group_id absolute)} =
+    (Bio::DB::GFF::Typename->new($method,$source),$fstrand,$score,$phase,
+     $group,$db_id,$group_id,$factory->{absolute});
+
+  $self;
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $type = $f->type([$newtype])
+ Function: get or set the feature type
+ Returns : a Bio::DB::GFF::Typename object
+ Args    : a new Typename object (optional)
+ Status  : Public
+
+This method gets or sets the type of the feature.  The type is a
+Bio::DB::GFF::Typename object, which encapsulates the feature method
+and source.  
+
+The method() and source() methods described next provide shortcuts to
+the individual fields of the type.
+
+=cut
+
+sub type   {
+  my $self = shift;
+  my $d = $self->{type};
+  $self->{type} = shift if @_;
+  $d;
+}
+
+=head2 method
+
+ Title   : method
+ Usage   : $method = $f->method([$newmethod])
+ Function: get or set the feature method
+ Returns : a string
+ Args    : a new method (optional)
+ Status  : Public
+
+This method gets or sets the feature method.  It is a convenience
+feature that delegates the task to the feature's type object.
+
+=cut
+
+sub method {
+  my $self = shift;
+  my $d = $self->{type}->method;
+  $self->{type}->method(shift) if @_;
+  $d;
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $source = $f->source([$newsource])
+ Function: get or set the feature source
+ Returns : a string
+ Args    : a new source (optional)
+ Status  : Public
+
+This method gets or sets the feature source.  It is a convenience
+feature that delegates the task to the feature's type object.
+
+=cut
+
+sub source {
+  my $self = shift;
+  my $d = $self->{type}->source;
+  $self->{type}->source(shift) if @_;
+  $d;
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $f->score([$newscore])
+ Function: get or set the feature score
+ Returns : a string
+ Args    : a new score (optional)
+ Status  : Public
+
+This method gets or sets the feature score.
+
+=cut
+
+sub score  {
+  my $self = shift;
+  my $d    = $self->{score};
+  $self->{score} = shift if @_;
+  $d;
+}
+
+=head2 phase
+
+ Title   : phase
+ Usage   : $phase = $f->phase([$phase])
+ Function: get or set the feature phase
+ Returns : a string
+ Args    : a new phase (optional)
+ Status  : Public
+
+This method gets or sets the feature phase.
+
+=cut
+
+sub phase  {
+  my $self = shift;
+  my $d    = $self->{phase};
+  $self->{phase} = shift if @_;
+  $d;
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $f->strand
+ Function: get the feature strand
+ Returns : +1, 0 -1
+ Args    : none
+ Status  : Public
+
+Returns the strand of the feature.  Unlike the other methods, the
+strand cannot be changed once the object is created (due to coordinate
+considerations).
+
+=cut
+
+sub strand {
+  my $self = shift;
+  return 0 unless $self->{fstrand};
+  if ($self->absolute) {
+    return Bio::DB::GFF::RelSegment::_to_strand($self->{fstrand});
+  }
+  return $self->SUPER::strand || Bio::DB::GFF::RelSegment::_to_strand($self->{fstrand});
+}
+
+=head2 group
+
+ Title   : group
+ Usage   : $group = $f->group([$new_group])
+ Function: get or set the feature group
+ Returns : a Bio::DB::GFF::Featname object
+ Args    : a new group (optional)
+ Status  : Public
+
+This method gets or sets the feature group.  The group is a
+Bio::DB::GFF::Featname object, which has an ID and a class.
+
+=cut
+
+sub group  {
+  my $self = shift;
+  my $d    = $self->{group};
+  $self->{group} = shift if @_;
+  $d;
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $display_id = $f->display_id([$display_id])
+ Function: get or set the feature display id
+ Returns : a Bio::DB::GFF::Featname object
+ Args    : a new display_id (optional)
+ Status  : Public
+
+This method is an alias for group().  It is provided for
+Bio::SeqFeatureI compatibility.
+
+=cut
+
+=head2 info
+
+ Title   : info
+ Usage   : $info = $f->info([$new_info])
+ Function: get or set the feature group
+ Returns : a Bio::DB::GFF::Featname object
+ Args    : a new group (optional)
+ Status  : Public
+
+This method is an alias for group().  It is provided for AcePerl
+compatibility.
+
+=cut
+
+*info         = \&group;
+*display_id   = \&group;
+*display_name = \&group;
+
+=head2 target
+
+ Title   : target
+ Usage   : $target = $f->target([$new_target])
+ Function: get or set the feature target
+ Returns : a Bio::DB::GFF::Homol object
+ Args    : a new group (optional)
+ Status  : Public
+
+This method works like group(), but only returns the group if it
+implements the start() method.  This is typical for
+similarity/assembly features, where the target encodes the start and
+stop location of the alignment.
+
+The returned object is of type Bio::DB::GFF::Homol, which is a
+subclass of Bio::DB::GFF::Segment.
+
+=cut
+
+
+sub target {
+    my $self = shift;
+    my $group = $self->group or return;
+    return unless $group->can('start');
+    $group;
+}
+
+=head2 flatten_target
+
+ Title   : flatten_target
+ Usage   : $target = $f->flatten_target($f->target)
+ Function: flatten a target object
+ Returns : a string (GFF2), an array [GFF2.5] or an array ref [GFF3]
+ Args    : a target object (required), GFF version (optional) 
+ Status  : Public
+
+This method flattens a target object into text for
+GFF dumping.  If a second argument is provided, version-specific
+vocabulary is used for the flattened target.
+
+=cut
+
+sub flatten_target {
+    my $self = shift;
+    my $t    = shift || return;
+    my $v    = shift;
+
+    return 0 unless $t->can('start');
+    my $class = $t->class;
+    my $name  = $t->name;
+    my $start = $t->start;
+    my $stop  = $t->stop;
+
+    $v ||=2;
+    if ( $v == 2.5 ) {
+	
+	print STDERR qq(Target "$class:$name"), "tstart $start", "tstop $stop\n";
+	return (qq(Target "$class:$name"), "tstart $start", "tstop $stop");
+    }
+    elsif ( $v == 3 ) {
+	return [Target=>"$name $start $stop"];
+    }
+    else {
+	return qq(Target "$class:$name" $start $stop);
+    }
+}
+
+# override parent a smidgeon so that setting the ref for top-level feature
+# sets ref for all subfeatures
+sub refseq {
+  my $self   = shift;
+  my $result = $self->SUPER::refseq(@_);
+  if (@_) {
+    my $newref = $self->SUPER::refseq;
+    for my $sub ($self->get_SeqFeatures) {
+      $sub->refseq(@_);
+    }
+  }
+  $result;
+}
+
+
+=head2 hit
+
+ Title   : hit
+ Usage   : $hit = $f->hit([$new_hit])
+ Function: get or set the feature hit
+ Returns : a Bio::DB::GFF::Featname object
+ Args    : a new group (optional)
+ Status  : Public
+
+This is the same as target(), for compatibility with
+Bio::SeqFeature::SimilarityPair.
+
+=cut
+
+*hit = \&target;
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $f->id
+ Function: get the feature ID
+ Returns : a database identifier
+ Args    : none
+ Status  : Public
+
+This method retrieves the database identifier for the feature.  It
+cannot be changed.
+
+=cut
+
+sub id        { shift->{db_id}   }
+
+=head2 group_id
+
+ Title   : group_id
+ Usage   : $id = $f->group_id
+ Function: get the feature ID
+ Returns : a database identifier
+ Args    : none
+ Status  : Public
+
+This method retrieves the database group identifier for the feature.
+It cannot be changed.  Often the group identifier is more useful than
+the feature identifier, since it is used to refer to a complex object
+containing subparts.
+
+=cut
+
+sub group_id  { shift->{group_id}   }
+
+=head2 clone
+
+ Title   : clone
+ Usage   : $feature = $f->clone
+ Function: make a copy of the feature
+ Returns : a new Bio::DB::GFF::Feature object
+ Args    : none
+ Status  : Public
+
+This method returns a copy of the feature.
+
+=cut
+
+sub clone {
+  my $self = shift;
+  my $clone = $self->SUPER::clone;
+
+  if (ref(my $t = $clone->type)) {
+    my $type = $t->can('clone') ? $t->clone : bless {%$t},ref $t;
+    $clone->type($type);
+  }
+
+  if (ref(my $g = $clone->group)) {
+    my $group = $g->can('clone') ? $g->clone : bless {%$g},ref $g;
+    $clone->group($group);
+  }
+
+  if (my $merged = $self->{merged_segs}) {
+    $clone->{merged_segs} = { %$merged };
+  }
+
+  $clone;
+}
+
+=head2 compound
+
+ Title   : compound
+ Usage   : $flag = $f->compound([$newflag])
+ Function: get or set the compound flag
+ Returns : a boolean
+ Args    : a new flag (optional)
+ Status  : Public
+
+This method gets or sets a flag indicated that the feature is not a
+primary one from the database, but the result of aggregation.
+
+=cut
+
+sub compound  {
+  my $self = shift;
+  my $d    = $self->{compound};
+  $self->{compound} = shift if @_;
+  $d;
+}
+
+=head2 sub_SeqFeature
+
+ Title   : sub_SeqFeature
+ Usage   : @feat = $feature->sub_SeqFeature([$method])
+ Function: get subfeatures
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : a feature method (optional)
+ Status  : Public
+
+This method returns a list of any subfeatures that belong to the main
+feature.  For those features that contain heterogeneous subfeatures,
+you can retrieve a subset of the subfeatures by providing a method
+name to filter on.
+
+This method may also be called as segments() or get_SeqFeatures().
+
+=cut
+
+sub sub_SeqFeature {
+  my $self = shift;
+  my $type = shift;
+  my $subfeat = $self->{subfeatures} or return;
+  $self->sort_features;
+  my @a;
+  if ($type) {
+    my $features = $subfeat->{lc $type} or return;
+    @a = @{$features};
+  } else {
+    @a = map {@{$_}} values %{$subfeat};
+  }
+  return @a;
+}
+
+=head2 add_subfeature
+
+ Title   : add_subfeature
+ Usage   : $feature->add_subfeature($feature)
+ Function: add a subfeature to the feature
+ Returns : nothing
+ Args    : a Bio::DB::GFF::Feature object
+ Status  : Public
+
+This method adds a new subfeature to the object.  It is used
+internally by aggregators, but is available for public use as well.
+
+=cut
+
+sub add_subfeature {
+  my $self    = shift;
+  my $feature = shift;
+  my $type = $feature->method;
+  my $subfeat = $self->{subfeatures}{lc $type} ||= [];
+  push @{$subfeat},$feature;
+}
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+ Example :
+ Returns : TRUE on success
+ Args    : a Bio::PrimarySeqI compliant object
+
+=cut
+
+sub attach_seq { }
+
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location 
+	   of feature on sequence or parent feature  
+ Returns : Bio::LocationI object
+ Args    : none
+
+=cut
+
+sub location {
+   my $self = shift;
+   require Bio::Location::Split unless Bio::Location::Split->can('new');
+   require Bio::Location::Simple unless Bio::Location::Simple->can('new');
+
+   my $location;
+   if (my @segments = $self->segments) {
+       $location = Bio::Location::Split->new(-seq_id => $self->seq_id);
+       foreach (@segments) {
+          $location->add_sub_Location($_->location);
+       }
+   } else {
+       $location = Bio::Location::Simple->new(-start  => $self->start,
+					      -end    => $self->stop,
+					      -strand => $self->strand,
+					      -seq_id => $self->seq_id);
+   }
+   $location;
+}
+
+=head2 entire_seq
+
+ Title   : entire_seq
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Example :
+ Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
+           sequence attached
+ Args    : none
+
+
+=cut
+
+sub entire_seq {
+    my $self = shift;
+    $self->factory->segment($self->sourceseq);
+}
+
+=head2 merged_segments
+
+ Title   : merged_segments
+ Usage   : @segs = $feature->merged_segments([$method])
+ Function: get merged subfeatures
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : a feature method (optional)
+ Status  : Public
+
+This method acts like sub_SeqFeature, except that it merges
+overlapping segments of the same time into contiguous features.  For
+those features that contain heterogeneous subfeatures, you can
+retrieve a subset of the subfeatures by providing a method name to
+filter on.
+
+A side-effect of this method is that the features are returned in
+sorted order by their start tposition.
+
+=cut
+
+#'
+
+sub merged_segments {
+  my $self = shift;
+  my $type = shift;
+  $type ||= '';    # prevent uninitialized variable warnings
+
+  my $truename = overload::StrVal($self);
+
+  return @{$self->{merged_segs}{$type}} if exists $self->{merged_segs}{$type};
+  my @segs = map  { $_->[0] } 
+             sort { $a->[1] <=> $b->[1] ||
+		    $a->[2] cmp $b->[2] }
+             map  { [$_, $_->start, $_->type] } $self->sub_SeqFeature($type);
+
+  # attempt to merge overlapping segments
+  my @merged = ();
+  for my $s (@segs) {
+    my $previous = $merged[-1] if @merged;
+    my ($pscore,$score) = (eval{$previous->score}||0,eval{$s->score}||0);
+    if (defined($previous)
+	&& $previous->stop+1 >= $s->start
+	&& $pscore == $score
+	&& $previous->method eq $s->method
+       ) {
+      if ($self->absolute && $self->strand < 0) {
+	$previous->{start} = $s->{start};
+      } else {
+	$previous->{stop} = $s->{stop};
+      }
+      # fix up the target too
+      my $g = $previous->{group};
+      if ( ref($g) &&  $g->isa('Bio::DB::GFF::Homol')) {
+	my $cg = $s->{group};
+	$g->{stop} = $cg->{stop};
+      }
+    }
+     elsif (defined($previous)
+	    && $previous->start == $s->start
+	    && $previous->stop  == $s->stop
+	    && $previous->method eq $s->method
+	   ) {
+       next;
+     }
+
+  else {
+      my $copy = $s->clone;
+      push @merged,$copy;
+    }
+  }
+  $self->{merged_segs}{$type} = \@merged;
+  @merged;
+}
+
+=head2 sub_types
+
+ Title   : sub_types
+ Usage   : @methods = $feature->sub_types
+ Function: get methods of all sub-seqfeatures
+ Returns : a list of method names
+ Args    : none
+ Status  : Public
+
+For those features that contain subfeatures, this method will return a
+unique list of method names of those subfeatures, suitable for use
+with sub_SeqFeature().
+
+=cut
+
+sub sub_types {
+  my $self = shift;
+  my $subfeat = $self->{subfeatures} or return;
+  return keys %$subfeat;
+}
+
+=head2 attributes
+
+ Title   : attributes
+ Usage   : @attributes = $feature->attributes($name)
+ Function: get the "attributes" on a particular feature
+ Returns : an array of string
+ Args    : feature ID
+ Status  : public
+
+Some GFF version 2 files use the groups column to store a series of
+attribute/value pairs.  In this interpretation of GFF, the first such
+pair is treated as the primary group for the feature; subsequent pairs
+are treated as attributes.  Two attributes have special meaning:
+"Note" is for backward compatibility and is used for unstructured text
+remarks.  "Alias" is considered as a synonym for the feature name.
+
+ @gene_names = $feature->attributes('Gene');
+ @aliases    = $feature->attributes('Alias');
+
+If no name is provided, then attributes() returns a flattened hash, of
+attribute=E<gt>value pairs.  This lets you do:
+
+  %attributes = $db->attributes;
+
+=cut
+
+sub attributes {
+  my $self = shift;
+  my $factory = $self->factory;
+  defined(my $id = $self->id) or return;
+  $factory->attributes($id, at _)
+}
+
+
+=head2 notes
+
+ Title   : notes
+ Usage   : @notes = $feature->notes
+ Function: get the "notes" on a particular feature
+ Returns : an array of string
+ Args    : feature ID
+ Status  : public
+
+Some GFF version 2 files use the groups column to store various notes
+and remarks.  Adaptors can elect to store the notes in the database,
+or just ignore them.  For those adaptors that store the notes, the
+notes() method will return them as a list.
+
+=cut
+
+sub notes {
+  my $self = shift;
+  $self->attributes('Note');
+}
+
+=head2 aliases
+
+ Title   : aliases
+ Usage   : @aliases = $feature->aliases
+ Function: get the "aliases" on a particular feature
+ Returns : an array of string
+ Args    : feature ID
+ Status  : public
+
+This method will return a list of attributes of type 'Alias'.
+
+=cut
+
+sub aliases {
+  my $self = shift;
+  $self->attributes('Alias');
+}
+
+
+
+=head2 Autogenerated Methods
+
+ Title   : AUTOLOAD
+ Usage   : @subfeat = $feature->Method
+ Function: Return subfeatures using autogenerated methods
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : none
+ Status  : Public
+
+Any method that begins with an initial capital letter will be passed
+to AUTOLOAD and treated as a call to sub_SeqFeature with the method
+name used as the method argument.  For instance, this call:
+
+  @exons = $feature->Exon;
+
+is equivalent to this call:
+
+  @exons = $feature->sub_SeqFeature('exon');
+
+=cut
+
+=head2 SeqFeatureI methods
+
+The following Bio::SeqFeatureI methods are implemented:
+
+primary_tag(), source_tag(), all_tags(), has_tag(), each_tag_value() [renamed get_tag_values()].
+
+=cut
+
+*primary_tag = \&method;
+*source_tag  = \&source;
+sub all_tags {
+  my $self = shift;
+  my %atts = $self->attributes;
+  my @tags = keys %atts;
+
+  # autogenerated methods
+  #if (my $subfeat = $self->{subfeatures}) {
+  #  push @tags,keys %$subfeat;
+  #}
+
+  @tags;
+}
+*get_all_tags = \&all_tags;
+
+sub has_tag {
+  my $self = shift;
+  my $tag  = shift;
+  my %att  = $self->attributes;
+  my %tags = map {$_=>1} ( $self->all_tags );
+  
+  return $tags{$tag};
+}
+
+*each_tag_value = \&get_tag_values;
+
+sub get_tag_values {
+  my $self = shift;
+  my $tag  = shift;
+  return $self->$tag() if $CONSTANT_TAGS{$tag};
+  
+  my $atts = $self->attributes;
+  return @{$atts->{$tag}} if $atts && $atts->{$tag};
+
+  $tag = ucfirst $tag;
+  return $self->$tag();  # try autogenerated tag
+}
+
+sub AUTOLOAD {
+  my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
+  my $sub = $AUTOLOAD;
+  my $self = $_[0];
+
+  # ignore DESTROY calls
+  return if $func_name eq 'DESTROY';
+
+  # fetch subfeatures if func_name has an initial cap
+#  return sort {$a->start <=> $b->start} $self->sub_SeqFeature($func_name) if $func_name =~ /^[A-Z]/;
+  return $self->sub_SeqFeature($func_name) if $func_name =~ /^[A-Z]/;
+
+  # error message of last resort
+  $self->throw(qq(Can't locate object method "$func_name" via package "$pack"));
+}#'
+
+=head2 adjust_bounds
+
+ Title   : adjust_bounds
+ Usage   : $feature->adjust_bounds
+ Function: adjust the bounds of a feature
+ Returns : ($start,$stop,$strand)
+ Args    : none
+ Status  : Public
+
+This method adjusts the boundaries of the feature to enclose all its
+subfeatures.  It returns the new start, stop and strand of the
+enclosing feature.
+
+=cut
+
+# adjust a feature so that its boundaries are synched with its subparts' boundaries.
+# this works recursively, so subfeatures can contain other features
+sub adjust_bounds {
+  my $self = shift;
+  my $shrink = shift;
+  my $g = $self->{group};
+
+  my $first = 0;
+  my $tfirst = 0;
+  if (my $subfeat = $self->{subfeatures}) {
+    for my $list (values %$subfeat) {
+      for my $feat (@$list) {
+	# fix up our bounds to hold largest subfeature
+	my($start,$stop,$strand) = $feat->adjust_bounds($shrink);
+
+	if (defined($self->{fstrand})) {
+	  $self->debug("Subfeature's strand ($strand) doesn't match parent strand ($self->{fstrand})\n") if $self->{fstrand} ne $strand;
+	} else {
+	  $self->{fstrand} = $strand;
+	}
+
+	my ($low,$high)  = $start < $stop ? ($start,$stop) : ($stop,$start);
+	if ($shrink && !$first++) {
+	  # first subfeature resets start & stop:
+	  $self->{start} = $self->{fstrand} ne '-' ? $low : $high;
+	  $self->{stop}  = $self->{fstrand} ne '-' ? $high : $low;
+	} else {
+	  if ($self->{fstrand} ne '-') {
+	    $self->{start} = $low
+	      if (!defined($self->{start})) || $low < $self->{start};
+	    $self->{stop}  = $high
+	      if (!defined($self->{stop}))  || $high  > $self->{stop};
+	  } else {
+	    $self->{start} = $high
+	      if (!defined($self->{start})) || $high > $self->{start};
+	    $self->{stop}  = $low
+	      if (!defined($self->{stop}))  || $low  < $self->{stop};
+	  }
+	}
+
+	# fix up endpoints of targets too (for homologies only)
+	my $h = $feat->group;
+	next unless $h && $h->isa('Bio::DB::GFF::Homol');
+	next unless $g && $g->isa('Bio::DB::GFF::Homol');
+
+	($start,$stop) = ($h->{start},$h->{stop});
+	if ($shrink && !$tfirst++) {
+	    $g->{start} = $start;
+	    $g->{stop}  = $stop;
+	} else {
+	  if ($start <= $stop) {
+	    $g->{start} = $start if (!defined($g->{start})) || $start < $g->{start};
+	    $g->{stop}  = $stop  if (!defined($g->{stop}))  || $stop  > $g->{stop};
+	  } else {
+	    $g->{start} = $start if (!defined($g->{start})) || $start > $g->{start};
+	    $g->{stop}  = $stop  if (!defined($g->{stop}))  || $stop  < $g->{stop};
+	  }
+	}
+      }
+    }
+  }
+
+  ($self->{start},$self->{stop},$self->strand);
+}
+
+=head2 sort_features
+
+ Title   : sort_features
+ Usage   : $feature->sort_features
+ Function: sort features
+ Returns : nothing
+ Args    : none
+ Status  : Public
+
+This method sorts subfeatures in ascending order by their start
+position.  For reverse strand features, it sorts subfeatures in
+descending order.  After this is called sub_SeqFeature will return the
+features in order.
+
+This method is called internally by merged_segments().
+
+=cut
+
+# sort features
+sub sort_features {
+  my $self = shift;
+  return if $self->{sorted}++;
+  my $strand = $self->strand or return;
+  my $subfeat = $self->{subfeatures} or return;
+  for my $type (keys %$subfeat) {
+      $subfeat->{$type} = [map { $_->[0] }
+			   sort {$a->[1] <=> $b->[1] }
+			   map { [$_,$_->start] }
+			   @{$subfeat->{$type}}] if $strand > 0;
+      $subfeat->{$type} = [map { $_->[0] }
+			   sort {$b->[1] <=> $a->[1]}
+			   map { [$_,$_->start] }
+			   @{$subfeat->{$type}}] if $strand < 0;
+  }
+}
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $string = $feature->asString
+ Function: return human-readabled representation of feature
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method returns a human-readable representation of the feature and
+is called by the overloaded "" operator.
+
+=cut
+
+sub asString {
+  my $self = shift;
+  my $type = $self->type;
+  my $name = $self->group;
+  return "$type($name)" if $name;
+  return $type;
+#  my $type = $self->method;
+#  my $id   = $self->group || 'unidentified';
+#  return join '/',$id,$type,$self->SUPER::asString;
+}
+
+sub name {
+  my $self =shift;
+  return $self->group || $self->SUPER::name;
+}
+
+=head2 gff_string
+
+ Title   : gff_string
+ Usage   : $string = $feature->gff_string
+ Function: return GFF2 of GFF2.5 representation of feature
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub gff_string {
+  my $self = shift;
+  my $version = $self->version;
+
+  # gff3_string and gff_string are synonymous if the version is set to 3
+  return $self->gff3_string(@_) if $version == 3;
+
+  my ($start,$stop) = ($self->start,$self->stop);
+
+  # the defined() tests prevent uninitialized variable warnings, when dealing with clone objects
+  # whose endpoints may be undefined
+  ($start,$stop) = ($stop,$start) if defined($start) && defined($stop) && $start > $stop;
+
+  my ($class,$name) = ('','');
+  my $strand = ('-','.','+')[$self->strand+1];
+
+  my @group;
+
+  if (my $t = $self->target) {
+    push @group, $version == 2.5 ? $self->flatten_target($t,2.5) 
+                                 : $self->flatten_target($t);
+  }
+  elsif (my $g = $self->group) {
+    $class = $g->class || '';
+    $name  = $g->name  || '';
+    ($name =~ /\S\s\S/)?(push @group, "$class '$name'"):(push @group,"$class $name");
+  }
+
+  # add exhaustive list of attributes
+  my $att = $self->attributes;
+  for ( keys %$att ) {
+      for my $v ( @{$att->{$_}} ) {     
+	  $v = qq("$v") if $v=~ /\S\s+\S/;
+	  push @group, qq($_ $v);
+      }
+  }
+
+  my $group_field = join ' ; ', at group;
+  my $ref = $self->refseq;
+  my $n   = ref($ref) ? $ref->name : $ref;
+  my $phase = $self->phase;
+  $phase = '.' unless defined $phase;
+  return join("\t",
+	      $n,
+	      $self->source,$self->method,
+	      (defined $start ? $start : '.'),
+	      (defined $stop  ? $stop  : '.'),
+	      (defined $self->score ? $self->score : '.'),
+	      (defined $strand ? $strand : '.'),
+	      $phase,
+	      $group_field);
+}
+
+=head2 gff3_string
+
+ Title   : gff3_string
+ Usage   : $string = $feature->gff3_string([$recurse])
+ Function: return GFF3 representation of feature
+ Returns : a string
+ Args    : An optional flag, which if true, will cause the feature to recurse over
+           subfeatures.
+ Status  : Public
+
+=cut
+
+sub gff3_string {
+  my $self = shift;
+  my ($recurse,$parent) = @_;
+  my ($start,$stop) = ($self->start,$self->stop);
+
+  # the defined() tests prevent uninitialized variable warnings, when dealing with clone objects
+  # whose endpoints may be undefined
+  ($start,$stop) = ($stop,$start) if defined($start) && defined($stop) && $start > $stop;
+
+  my $strand = ('-','.','+')[$self->strand+1];
+  my $ref = $self->refseq;
+  my $n   = ref($ref) ? $ref->name : $ref;
+  my $phase = $self->phase;
+  $phase = '.' unless defined $phase;
+
+  my ($class,$name) = ('','');
+  my @group;
+  if (my $g = $self->group) {
+    $class = $g->class || '';
+    $name  = $g->name  || '';
+    $name  = "$class:$name" if defined $class;
+    push @group,[ID =>  $name] if !defined($parent) || $name ne $parent;
+  }
+
+  push @group,[Parent => $parent] if defined $parent && $parent ne '';
+
+  if (my $t = $self->target) {
+    $strand = '-' if $t->stop < $t->start;
+    push @group, $self->flatten_target($t,3);
+  }
+
+  my @attributes = $self->attributes;
+  while (@attributes) {
+    push @group,[shift(@attributes),shift(@attributes)]
+  }
+  my $group_field = join ';',map {join '=',_escape($_->[0]),_escape($_->[1])} @group;
+  my $string = join("\t",$n,$self->source,$self->method,$start||'.',$stop||'.',
+                         $self->score||'.',$strand||'.',$phase,$group_field);
+  $string .= "\n";
+  if ($recurse) {
+    foreach ($self->sub_SeqFeature) {
+      $string .= $_->gff3_string(1,$name);
+    }
+  }
+  $string;
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $feature->version()
+ Function: get/set the GFF version to be returned by gff_string
+ Returns : the GFF version (default is 2)
+ Args    : the GFF version (2, 2.5 of 3)
+ Status  : Public
+
+=cut
+
+sub version {
+  my ($self, $version) = @_;
+  $self->{version} = $version if $version;
+  return $self->{version} || 2;
+}
+
+
+sub _escape {
+  my $toencode = shift;
+  $toencode    =~ s/([^a-zA-Z0-9_. :?^*\(\)\[\]@!-])/uc sprintf("%%%02x",ord($1))/eg;
+  $toencode    =~ tr/ /+/;
+  $toencode;
+}
+
+=head2 cmap_link()
+
+ Title   : cmap_link
+ Usage   : $link = $feature->cmap_link
+ Function: returns a URL link to the corresponding feature in cmap
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+If integrated cmap/gbrowse installation, it returns a link to the map otherwise
+it returns a link to a feature search on the feature name.  See the cmap
+documentation for more information.
+
+This function is intended primarily to be used in gbrowse conf files. 
+For example:
+
+  link       = sub {my $self = shift; return $self->cmap_viewer_link(data_source);}
+
+=cut
+
+
+sub cmap_viewer_link {
+  # Use ONLY if CMap is installed 
+  my $self        = shift;
+  my $data_source = shift;
+  my $group_id    = $self->group_id;
+  my $factory     = $self->factory; # aka adaptor
+
+  my $link_str; 
+
+  if ($factory->can("create_cmap_viewer_link")){
+    $link_str = $factory->create_cmap_viewer_link(
+        data_source => $data_source,
+        group_id    => $group_id,
+    );
+  }
+  my $name = $self->name();
+  $link_str = '/cgi-bin/cmap/feature_search?features='
+    . $name
+    . '&search_field=feature_name&order_by=&data_source='
+    . $data_source
+    . '&submit=Submit'
+    unless $link_str;
+
+  return $link_str; 
+
+}
+
+=head1 A Note About Similarities
+
+The current default aggregator for GFF "similarity" features creates a
+composite Bio::DB::GFF::Feature object of type "gapped_alignment".
+The target() method for the feature as a whole will return a
+RelSegment object that is as long as the extremes of the similarity
+hit target, but will not necessarily be the same length as the query
+sequence.  The length of each "similarity" subfeature will be exactly
+the same length as its target().  These subfeatures are essentially
+the HSPs of the match.
+
+The following illustrates this:
+
+  @similarities = $segment->feature('similarity:BLASTN');
+  $sim          = $similarities[0];
+
+  print $sim->type;        # yields "gapped_similarity:BLASTN"
+
+  $query_length  = $sim->length;
+  $target_length = $sim->target->length;  # $query_length != $target_length
+
+  @matches = $sim->Similarity;   # use autogenerated method
+  $query1_length  = $matches[0]->length;
+  $target1_length = $matches[0]->target->length; # $query1_length == $target1_length
+
+If you merge segments by calling merged_segments(), then the length of
+the query sequence segments will no longer necessarily equal the
+length of the targets, because the alignment information will have
+been lost.  Nevertheless, the targets are adjusted so that the first
+and last base pairs of the query match the first and last base pairs
+of the target.
+
+=cut
+
+1;
+
+=head1 BUGS
+
+This module is still under development.
+
+=head1 SEE ALSO
+
+L<bioperl>, L<Bio::DB::GFF>, L<Bio::DB::RelSegment>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Homol.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Homol.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Homol.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+=head1 NAME
+
+Bio::DB::GFF::Homol -- A segment of DNA that is homologous to another
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Homol is a named subtype of Bio::DB::GFF::Segment.  It
+inherits all the methods of its parent, and was created primarily to
+allow for isa() queries and for compatibility with
+Ace::Sequence::Homol.  
+
+A Homol object is typically returned as the method result of the
+Bio::DB::GFF::Feature-E<gt>target() method.
+
+=head1 METHODS
+
+=cut
+
+package Bio::DB::GFF::Homol;
+use strict;
+
+use base qw(Bio::DB::GFF::Segment);
+
+=head2 name
+
+ Title   : name
+ Usage   : $name = $homol->name
+ Function: get the ID of the homology object
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub name     { shift->refseq }
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $name = $homol->asString
+ Function: same as name(), for operator overloading
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub asString { shift->name }
+
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $homol->id
+ Function: get database ID in class:id format
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub id       {
+  my $self = shift;
+  return "$self->{class}:$self->{name}";
+}
+
+sub new_from_segment {
+  my $package   = shift;
+  $package      = ref $package if ref $package;
+  my $segment   = shift;
+  my $new = {};
+  @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)}
+    = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)};
+  return bless $new,__PACKAGE__;
+}
+
+=head1 BUGS
+
+This module is still under development.
+
+=head1 SEE ALSO
+
+L<bioperl>, L<Bio::DB::GFF>, L<Bio::DB::RelSegment>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/RelSegment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/RelSegment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/RelSegment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1162 @@
+=head1 NAME
+
+Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::RelSegment is a stretch of sequence that can handle
+relative coordinate addressing.  It inherits from
+Bio::DB::GFF::Segment, and is the base class for
+Bio::DB::GFF::Feature.
+
+In addition to the source sequence, a relative segment has a
+"reference sequence", which is used as the basis for its coordinate
+system.  The reference sequence can be changed at will, allowing you
+freedom to change the "frame of reference" for features contained
+within the segment.  For example, by setting a segment's reference
+sequence to the beginning of a gene, you can view all other features
+in gene-relative coordinates.
+
+The reference sequence and the source sequence must be on the same
+physical stretch of DNA, naturally.  However, they do not have to be
+on the same strand.  The strandedness of the reference sequence
+determines whether coordinates increase to the right or the left.
+
+Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment
+objects directly, but use those that are returned by the Bio::DB::GFF
+module.
+
+=head2 An Example
+
+To understand how relative coordinates work, consider the following
+example from the C. elegans database.  First we create the appropriate
+GFF accessor object (the factory):
+
+   my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans',
+                              -adaptor=>'dbi:mysqlopt');
+
+Now we fetch out a segment based on cosmid clone ZK909:
+
+  my $seg = $db->segment('ZK909');
+
+If we call the segment's refseq() method, we see that the base of the
+coordinate system is the sequence "ZK154", and that its start and
+stop positions are 1 and the length of the cosmid:
+
+  print $seg->refseq;
+  => ZK909
+
+  print $seg->start,' - ',$seg->stop;
+  => 1 - 33782
+
+As a convenience, the "" operator is overloaded in this class, to give
+the reference sequence, and start and stop positions:
+
+  print $seg;
+  => ZK909:1,33782
+
+Internally, Bio::DB::GFF::RelSegment has looked up the absolute
+coordinates of this segment and maintains the source sequence and the
+absolute coordinates relative to the source sequence.  We can see this 
+information using sourceseq() (inherited from Bio::DB::GFF::Segment)
+and the abs_start() and abs_end() methods:
+
+  print $seg->sourceseq;
+  => CHROMOSOME_I
+
+  print $seg->abs_start,' - ',$seg->abs_end;
+  => 14839545 - 14873326
+
+We can also put the segment into absolute mode, so that it behaves
+like Bio::DB::Segment, and always represents coordinates on the source
+sequence.  This is done by passing a true value to the absolute()
+method:
+
+  $seq->absolute(1);
+  print $seg;
+  => CHROMOSOME_I:14839545,14873326
+
+We can change the reference sequence at any time.  One way is to call
+the segment's ref() method, giving it the ID (and optionally the
+class) of another landmark on the genome.  For example, if we know
+that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in
+ZK337-relative coordinates:
+
+  $seg->refseq('ZK337');
+  print $seg;
+  => ZK337:-33670,111
+
+We can call the segment's features() method in order to get the list
+of contigs that overlap this segment (in the C. elegans database,
+contigs have feature type "Sequence:Link"):
+
+  @links = $seg->features('Sequence:Link');
+
+We can now set the reference sequence to the first of these contigs like so:
+
+  $seg->refseq($links[0]);
+  print $seg;
+  => Sequence:Link(LINK_Y95D11A):3997326,4031107
+
+=cut
+
+package Bio::DB::GFF::RelSegment;
+
+use strict;
+
+use Bio::DB::GFF::Feature;
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::RangeI;
+
+use base qw(Bio::DB::GFF::Segment);
+
+use overload '""' => 'asString',
+             'bool' => sub { overload::StrVal(shift) },
+             fallback=>1;
+
+=head1 API
+
+The remainder of this document describes the API for
+Bio::DB::GFF::Segment.
+
+=cut
+
+=head2 new
+
+ Title   : new
+ Usage   : $s = Bio::DB::GFF::RelSegment->new(@args)
+ Function: create a new relative segment
+ Returns : a new Bio::DB::GFF::RelSegment object
+ Args    : see below
+ Status  : Public
+
+This method creates a new Bio::DB::GFF::RelSegment object.  Generally
+this is called automatically by the Bio::DB::GFF module and
+derivatives.
+
+This function uses a named-argument style:
+
+ -factory      a Bio::DB::GFF::Adaptor to use for database access
+ -seq          ID of the source sequence
+ -class        class of the source sequence
+ -start        start of the desired segment relative to source sequence
+ -stop         stop of the desired segment relative to source sequence
+ -ref          ID of the reference sequence
+ -refclass     class of the reference sequence
+ -offset       0-based offset from source sequence to start of segment
+ -length       length of desired segment
+ -absolute, -force_absolute
+               use absolute coordinates, rather than coordinates relative
+               to the start of self or the reference sequence
+
+The -seq argument accepts the ID of any landmark in the database.  The
+stored source sequence becomes whatever the GFF file indicates is the
+proper sequence for this landmark.  A class of "Sequence" is assumed
+unless otherwise specified in the -class argument.
+
+If the argument to -seq is a Bio::GFF::Featname object (such as
+returned by the group() method), then the class is taken from that.
+
+The optional -start and -stop arguments specify the end points for the
+retrieved segment.  For those who do not like 1-based indexing,
+-offset and -length are provided.  If both -start/-stop and
+-offset/-length are provided, the latter overrides the former.
+Generally it is not a good idea to mix metaphors.
+
+-ref and -refclass together indicate a sequence to be used for
+relative coordinates.  If not provided, the source sequence indicated
+by -seq is used as the reference sequence.  If the argument to -ref is
+a Bio::GFF::Featname object (such as returned by the group() method),
+then the class is taken from that.
+
+-force_absolute should be used if you wish to skip the lookup of the
+absolute position of the source sequence that ordinarily occurs when
+you create a relative segment.  In this case, the source sequence must
+be a sequence that has been specified as the "source" in the GFF file.
+
+=cut
+
+# Create a new Bio::DB::GFF::RelSegment Object
+# arguments are:
+#      -factory    => factory and DBI interface
+#      -seq        => $sequence_name
+#      -start      => $start_relative_to_sequence
+#      -stop       => $stop_relative_to_sequence
+#      -ref        => $sequence which establishes coordinate system
+#      -offset     => 0-based offset relative to sequence
+#      -length     => length of segment
+#      -nocheck    => turn off checking, force segment to be constructed
+#      -absolute   => use absolute coordinate addressing
+
+sub new {
+  my $package = shift;
+  my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) =
+    rearrange([
+	       'FACTORY',
+	       [qw(NAME SEQ SEQUENCE SOURCESEQ)],
+	       [qw(START BEGIN)],
+	       [qw(STOP END)],
+	       [qw(REFSEQ REF REFNAME)],
+	       [qw(CLASS SEQCLASS)],
+	       qw(REFCLASS),
+	       [qw(OFFSET OFF)],
+	       [qw(LENGTH LEN)],
+	       [qw(ABSOLUTE)],
+	       [qw(NOCHECK FORCE)],
+	     ], at _);
+
+  $package = ref $package if ref $package;
+  $factory or $package->throw("new(): provide a -factory argument");
+
+  # to allow people to use segments as sources
+  if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) {
+    $start = 1              unless defined $start;
+    $stop  = $name->length  unless defined $stop;
+    return $name->subseq($start,$stop);
+  }
+
+  my @object_results;
+
+  # support for Featname objects
+  if (ref($name) && $name->can('class')) {
+    $class = $name->class;
+    $name  = $name->name;
+  }
+
+  # if the class of the landmark is not specified then default to 'Sequence'
+  $class ||= eval{$factory->default_class} || 'Sequence';
+
+  # confirm that indicated sequence is actually in the database!
+  my @abscoords;
+
+  # abscoords() will now return an array ref, each element of which is
+  # ($absref,$absclass,$absstart,$absstop,$absstrand)
+
+  if ($nocheck) {
+    $force_absolute++;
+    $start = 1;
+  }
+
+  if ($force_absolute && defined($start)) { # absolute position is given to us
+    @abscoords = ([$name,$class,$start,$stop,'+']);
+  } else {
+    my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return;
+    @abscoords = @$result;
+  }
+
+  foreach (@abscoords) {
+    my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_;
+    $sname = $name unless defined $sname;
+    my ($this_start,$this_stop,$this_length) = ($start,$stop,$length);
+
+    # partially fill in object
+    my $self = bless { factory => $factory },$package;
+
+    $absstrand ||= '+';
+
+    if ($absstart > $absstop) { # AAARGH!  DATA FORMAT ERROR!  FIX.
+	($absstart,$absstop) = ($absstop,$absstart);
+	$absstrand = $absstrand eq '+' ? '-' : '+';
+    }
+
+    # an explicit length overrides start and stop
+    if (defined $offset) {
+      warn "new(): bad idea to call new() with both a start and an offset"
+	if defined $this_start;
+      $this_start = $offset+1;
+    }
+    if (defined $this_length) {
+      warn "new(): bad idea to call new() with both a stop and a length"
+	if defined $this_stop;
+      $this_stop = $this_start + $length - 1;
+    }
+
+    # this allows a SQL optimization way down deep
+    $self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop);
+
+    $this_start     = 1                    if !defined $this_start;
+    $this_stop      = $absstop-$absstart+1 if !defined $this_stop;
+    $this_length = $this_stop - $this_start + 1;
+
+    # now offset to correct subsegment based on desired start and stop
+    if ($force_absolute) {
+      ($this_start,$this_stop) = ($absstart,$absstop);
+      $self->absolute(1);
+    } elsif ($absstrand eq '+') {
+      $this_start =  $absstart   + $this_start - 1;
+      $this_stop  =  $this_start + $this_length - 1;
+    } else {
+      $this_start =  $absstop - ($this_start - 1);
+      $this_stop  =  $absstop - ($this_stop - 1);
+    }
+
+    # handle truncation in either direction
+    # This only happens if the segment runs off the end of
+    # the reference sequence
+    if ($factory->strict_bounds_checking &&
+	(($this_start < $absstart) || ($this_stop > $absstop))) {
+      # return empty if we are completely off the end of the ref se
+      next unless $this_start<=$absstop && $this_stop>=$absstart;
+      if (my $a = $factory->abscoords($absref,'Sequence')) {
+	my $refstart = $a->[0][2];
+	my $refstop  = $a->[0][3];
+	if ($this_start < $refstart) {
+	  $this_start = $refstart;
+	  $self->{truncated}{start}++;
+	}
+	if ($this_stop > $refstop) {
+	  $this_stop = $absstop;
+	  $self->{truncated}{stop}++;
+	}
+      }
+    }
+
+    @{$self}{qw(sourceseq start stop strand class)}
+      = ($absref,$this_start,$this_stop,$absstrand,$absclass);
+
+    # handle reference sequence
+    if (defined $refseq) {
+      $refclass = $refseq->class if $refseq->can('class');
+      $refclass ||= 'Sequence';
+      my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass);
+      unless ($refref eq $absref) {
+	$self->error("reference sequence is on $refref but source sequence is on $absref");
+	return;
+      }
+      $refstart = $refstop if $refstrand eq '-';
+      @{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand);
+    } else {
+      $absstart = $absstop if $absstrand eq '-';
+      @{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand);
+    }
+    push @object_results,$self;
+  }
+
+  return wantarray ? @object_results : $object_results[0];
+}
+
+# overridden methods
+# start, stop, length
+sub start {
+  my $self = shift;
+  return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute;
+  $self->_abs2rel($self->{start});
+}
+sub end {
+  my $self = shift;
+  return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute;
+  $self->_abs2rel($self->{stop});
+}
+*stop = \&end;
+
+sub length {
+  my $self = shift;
+  return unless defined $self->abs_end;
+  abs($self->abs_end - $self->abs_start) + 1;
+}
+
+sub abs_start {
+  my $self = shift;
+  if ($self->absolute) {
+    my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
+    return ($a<$b) ? $a : $b;
+  }
+  else {
+    return $self->SUPER::abs_start(@_);
+  }
+}
+sub abs_end {
+  my $self = shift;
+  if ($self->absolute) {
+    my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
+    return ($a>$b) ? $a : $b;
+  }
+
+  else {
+    return $self->SUPER::abs_end(@_);
+  }
+}
+
+*abs_stop = \&abs_end;
+
+=head2 refseq
+
+ Title   : refseq
+ Usage   : $ref = $s->refseq([$newseq] [,$newseqclass])
+ Function: get/set reference sequence
+ Returns : current reference sequence
+ Args    : new reference sequence and class (optional)
+ Status  : Public
+
+This method will get or set the reference sequence.  Called with no
+arguments, it returns the current reference sequence.  Called with
+either a sequence ID and class, a Bio::DB::GFF::Segment object (or
+subclass) or a Bio::DB::GFF::Featname object, it will set the current
+reference sequence and return the previous one.
+
+The method will generate an exception if you attempt to set the
+reference sequence to a sequence that isn't contained in the database,
+or one that has a different source sequence from the segment.
+
+=cut
+
+#'
+sub refseq {
+  my $self = shift;
+  my $g    = $self->{ref};
+  if (@_) {
+    my ($newref,$newclass);
+    if (@_ == 2) {
+      $newclass = shift;
+      $newref   = shift;
+    } else {
+      $newref   = shift;
+      $newclass = 'Sequence';
+    }
+
+    defined $newref or $self->throw('refseq() called with an undef reference sequence');
+
+    # support for Featname objects
+    $newclass = $newref->class if ref($newref) && $newref->can('class');
+
+    # $self->throw("Cannot define a segment's reference sequence in terms of itself!")
+    # if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self);
+
+    my ($refsource,undef,$refstart,$refstop,$refstrand);
+    if ($newref->isa('Bio::DB::GFF::RelSegment')) {
+      ($refsource,undef,$refstart,$refstop,$refstrand) =
+	($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-');
+    } else {
+      my $coords = $self->factory->abscoords($newref,$newclass);
+      foreach (@$coords) { # find the appropriate one
+	($refsource,undef,$refstart,$refstop,$refstrand) = @$_;
+	last if $refsource eq $self->{sourceseq};
+      }
+	
+    }
+    $self->throw("can't set reference sequence: $newref and $self are on different sequence segments")
+      unless $refsource eq $self->{sourceseq};
+
+    @{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand);
+    $self->absolute(0);
+  }
+  return $self->absolute ? $self->sourceseq : $g;
+}
+
+
+=head2 abs_low
+
+ Title   : abs_low
+ Usage   : $s->abs_low
+ Function: the absolute lowest coordinate of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is for GadFly compatibility, and returns the low coordinate in
+absolute coordinates;
+
+=cut
+
+sub abs_low {
+  my $self = shift;
+  my ($a,$b) = ($self->abs_start,$self->abs_end);
+  return ($a<$b) ? $a : $b;
+}
+
+=head2 abs_high
+
+ Title   : abs_high
+ Usage   : $s->abs_high
+ Function: the absolute highest coordinate of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is for GadFly compatibility, and returns the high coordinate in
+absolute coordinates;
+
+=cut
+
+sub abs_high {
+  my $self = shift;
+  my ($a,$b) = ($self->abs_start,$self->abs_end);
+  return ($a>$b) ? $a : $b;
+}
+
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $s->asString
+ Function: human-readable representation of the segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method will return a human-readable representation of the
+segment.  It is the overloaded method call for the "" operator.
+
+Currently the format is:
+
+  refseq:start,stop
+
+=cut
+
+sub asString {
+  my $self = shift;
+  return $self->SUPER::asString if $self->absolute;
+  my $label = $self->{ref};
+  my $start = $self->start || '';
+  my $stop  = $self->stop  || '';
+  if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) {
+    $label = $self->abs_ref;
+    $start = $self->abs_start;
+    $stop  = $self->abs_end;
+  }
+  return "$label:$start,$stop";
+}
+
+sub name { shift->asString }
+
+=head2 absolute
+
+ Title   : absolute
+ Usage   : $abs = $s->absolute([$abs])
+ Function: get/set absolute coordinates
+ Returns : a boolean flag
+ Args    : new setting for flag (optional)
+ Status  : Public
+
+Called with a boolean flag, this method controls whether to display
+relative coordinates (relative to the reference sequence) or absolute
+coordinates (relative to the source sequence).  It will return the
+previous value of the setting.
+
+=cut
+
+sub absolute {
+  my $self = shift;
+  my $g = $self->{absolute};
+  $self->{absolute} = shift if @_;
+  $g;
+}
+
+=head2 features
+
+ Title   : features
+ Usage   : @features = $s->features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see below
+ Status  : Public
+
+This method will find all features that overlap the segment and return
+a list of Bio::DB::GFF::Feature objects.  The features will use
+coordinates relative to the reference sequence in effect at the time
+that features() was called.
+
+The returned list can be limited to certain types of feature by
+filtering on their method and/or source.  In addition, it is possible
+to obtain an iterator that will step through a large number of
+features sequentially.
+
+Arguments can be provided positionally or using the named arguments
+format.  In the former case, the arguments are a list of feature types
+in the format "method:source".  Either method or source can be
+omitted, in which case the missing component is treated as a wildcard.
+If no colon is present, then the type is treated as a method name.
+Multiple arguments are ORed together.
+
+Examples:
+
+ @f = $s->features('exon:curated');           # all curated exons
+ @f = $s->features('exon:curated','intron');  # curated exons and all introns
+ @f = $s->features('similarity:.*EST.*');     # all similarities
+                                              # having something to do
+                                              # with ESTs
+
+The named parameter form gives you control over a few options:
+
+  -types      an array reference to type names in the format
+	      "method:source"
+
+  -merge     Whether to apply aggregators to the generated features (default yes)
+
+  -rare      Turn on an optimization suitable for a relatively rare feature type,
+             where it will be faster to filter by feature type first
+             and then by position, rather than vice versa.
+
+  -attributes a hashref containing a set of attributes to match
+
+  -range_type One of 'overlapping', 'contains', or 'contained_in'
+
+  -iterator  Whether to return an iterator across the features.
+
+  -binsize   A true value will create a set of artificial features whose
+             start and stop positions indicate bins of the given size, and
+             whose scores are the number of features in the bin.  The
+             class and method of the feature will be set to "bin",
+             its source to "method:source", and its group to "bin:method:source".
+             This is a handy way of generating histograms of feature density.
+
+-merge is a boolean flag that controls whether the adaptor's
+aggregators wll be applied to the features returned by this method.
+
+If -iterator is true, then the method returns a single scalar value
+consisting of a Bio::SeqIO object.  You can call next_seq() repeatedly
+on this object to fetch each of the features in turn.  If iterator is
+false or absent, then all the features are returned as a list.
+
+The -attributes argument is a hashref containing one or more
+attributes to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple string matching, and multiple attributes
+are ANDed together.
+
+=cut
+
+#'
+
+# return all features that overlap with this segment;
+# optionally modified by a list of types to filter on
+sub features {
+  my $self = shift;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->overlapping_features(@args);
+}
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   :
+ Function: returns the top level sequence features
+ Returns : L<Bio::SeqFeatureI> objects
+ Args    : none
+
+Segments do not ordinarily return any subfeatures.
+
+=cut
+
+# A SEGMENT DOES NOT HAVE SUBFEATURES!
+sub get_SeqFeatures { return }
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $seq->feature_count()
+ Function: Return the number of SeqFeatures attached to a sequence
+ Returns : integer representing the number of SeqFeatures
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> for more information.
+
+=cut
+
+sub feature_count { 
+    my $self = shift;
+    my $ct = 0;
+    my %type_counts = $self->types(-enumerate=>1);
+    map { $ct += $_ } values %type_counts;
+    $ct;
+}
+
+=head2 get_feature_stream
+
+ Title   : features
+ Usage   : $stream = $s->get_feature_stream(@args)
+ Function: get a stream of features that overlap this segment
+ Returns : a Bio::SeqIO::Stream-compliant stream
+ Args    : see below
+ Status  : Public
+
+This is the same as features(), but returns a stream.  Use like this:
+
+ $stream = $s->get_feature_stream('exon');
+ while (my $exon = $stream->next_seq) {
+    print $exon->start,"\n";
+ }
+
+=cut
+
+sub get_feature_stream {
+  my $self = shift;
+  my @args = defined($_[0]) && $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
+  $self->features(@args);
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $stream = $s->get_seq_stream(@args)
+ Function: get a stream of features that overlap this segment
+ Returns : a Bio::SeqIO::Stream-compliant stream
+ Args    : see below
+ Status  : Public
+
+This is the same as feature_stream(), and is provided for Bioperl
+compatibility.  Use like this:
+
+ $stream = $s->get_seq_stream('exon');
+ while (my $exon = $stream->next_seq) {
+    print $exon->start,"\n";
+ }
+
+=cut
+
+*get_seq_stream = \&get_feature_stream;
+
+
+=head2 overlapping_features
+
+ Title   : overlapping_features
+ Usage   : @features = $s->overlapping_features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is an alias for the features() method, and takes the same
+arguments.
+
+=cut
+
+*overlapping_features = \&features;
+
+=head2 contained_features
+
+ Title   : contained_features
+ Usage   : @features = $s->contained_features(@args)
+ Function: get features that are contained by this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is identical in behavior to features() except that it returns
+only those features that are completely contained within the segment,
+rather than any that overlap.
+
+=cut 
+
+# return all features completely contained within this segment
+sub contained_features {
+  my $self = shift;
+  local $self->{whole} = 0;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->contained_features(@args);
+}
+
+# *contains = \&contained_features;
+
+=head2 contained_in
+
+ Title   : contained_in
+ Usage   : @features = $s->contained_in(@args)
+ Function: get features that contain this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is identical in behavior to features() except that it returns
+only those features that completely contain the segment.
+
+=cut
+
+# return all features completely contained within this segment
+sub contained_in {
+  my $self = shift;
+  local $self->{whole} = 0;
+  my @args = $self->_process_feature_args(@_);
+  return $self->factory->contained_in(@args);
+}
+
+=head2 delete
+
+ Title   : delete
+ Usage   : $db->delete(@args)
+ Function: delete features
+ Returns : count of features deleted -- if available
+ Args    : numerous, see below
+ Status  : public
+
+This method deletes all features that overlap the specified region or
+are of a particular type.  If no arguments are provided and the -force
+argument is true, then deletes ALL features.
+
+Arguments:
+
+ -type,-types  Either a single scalar type to be deleted, or an
+               reference to an array of types.
+
+ -range_type   Control the range type of the deletion.  One of "overlaps" (default)
+               "contains" or "contained_in"
+
+Examples:
+
+  $segment->delete(-type=>['intron','repeat:repeatMasker']);  # remove all introns & repeats
+  $segment->delete(-type=>['intron','repeat:repeatMasker']
+		   -range_type => 'contains');                # remove all introns & repeats
+                                                              # strictly contained in segment
+
+IMPORTANT NOTE: This method only deletes features.  It does *NOT*
+delete the names of groups that contain the deleted features.  Group
+IDs will be reused if you later load a feature with the same group
+name as one that was previously deleted.
+
+NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the
+result code from the SQL DELETE operation.  Some dbd drivers return the
+count of rows deleted, while others return 0E0.  Caveat emptor.
+
+=cut
+
+# return all features completely contained within this segment
+sub delete {
+  my $self = shift;
+  my ($type,$range_type) =
+    rearrange([[qw(TYPE TYPES)],'RANGE_TYPE'], at _);
+  my $types = $self->factory->parse_types($type);  # parse out list of types
+  $range_type ||= 'overlaps';
+  return $self->factory->_delete({
+                                  segments   => [$self],
+                                  types      => $types,
+                                  range_type => $range_type
+                                  });
+}
+
+=head2 _process_feature_args
+
+ Title   : _process_feature_args
+ Usage   : @args = $s->_process_feature_args(@args)
+ Function: preprocess arguments passed to features, 
+           contained_features, and overlapping_features
+ Returns : a list of parsed arguents
+ Args    : see feature()
+ Status  : Internal
+
+This is an internal method that is used to check and format the
+arguments to features() before passing them on to the adaptor.
+
+=cut 
+
+sub _process_feature_args {
+  my $self       = shift;
+
+  my ($ref,$class,$start,$stop,$strand,$whole)
+    = @{$self}{qw(sourceseq class start stop strand whole)};
+
+  ($start,$stop) = ($stop,$start) if defined $strand && $strand eq '-';
+
+  my @args = (-ref=>$ref,-class=>$class);
+
+  # indicating that we are fetching the whole segment allows certain
+  # SQL optimizations.
+  push @args,(-start=>$start,-stop=>$stop) unless $whole;
+
+  if (@_) {
+    if ($_[0] =~ /^-/) {
+      push @args, at _;
+    } else {
+      my @types = @_;
+      push @args,-types=>\@types;
+    }
+  }
+  push @args,-parent=>$self;
+  @args;
+}
+
+=head2 types
+
+ Title   : types
+ Usage   : @types = $s->types([-enumerate=>1])
+ Function: list feature types that overlap this segment
+ Returns : a list of Bio::DB::GFF::Typename objects or a hash
+ Args    : see below
+ Status  : Public
+
+The types() method will return a list of Bio::DB::GFF::Typename
+objects, each corresponding to a feature that overlaps the segment.
+If the optional -enumerate parameter is set to a true value, then the
+method will return a hash in which the keys are the type names and the 
+values are the number of times a feature of that type is present on
+the segment.  For example:
+
+  %count = $s->types(-enumerate=>1);
+
+=cut 
+
+# wrapper for lower-level types() call.
+sub types {
+  my $self = shift;
+  my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)};
+  ($start,$stop) = ($stop,$start) if $strand eq '-';
+
+  my @args;
+  if (@_ && $_[0] !~ /^-/) {
+    @args = (-type => \@_)
+  } else {
+    @args = @_;
+  }
+  $self->factory->types(-ref  => $ref,
+			-class => $class,
+			-start=> $start,
+			-stop => $stop,
+			@args);
+}
+
+=head1 Internal Methods
+
+The following are internal methods and should not be called directly.
+
+=head2 new_from_segment
+
+ Title   : new_from_segment
+ Usage   : $s = $segment->new_from_segment(@args)
+ Function: create a new relative segment
+ Returns : a new Bio::DB::GFF::RelSegment object
+ Args    : see below
+ Status  : Internal
+
+This constructor is used internally by the subseq() method.  It forces
+the new segment into the Bio::DB::GFF::RelSegment package, regardless
+of the package that it is called from.  This causes subclass-specfic
+information, such as feature types, to be dropped when a subsequence
+is created.
+
+=cut
+
+sub new_from_segment {
+  my $package   = shift;
+  $package      = ref $package if ref $package;
+  my $segment   = shift;
+  my $new = {};
+  @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)}
+    = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)};
+  return bless $new,__PACKAGE__;
+}
+
+=head2 _abs2rel
+
+ Title   : _abs2rel
+ Usage   : @coords = $s->_abs2rel(@coords)
+ Function: convert absolute coordinates into relative coordinates
+ Returns : a list of relative coordinates
+ Args    : a list of absolute coordinates
+ Status  : Internal
+
+This is used internally to map from absolute to relative
+coordinates. It does not take the offset of the reference sequence
+into account, so please use abs2rel() instead.
+
+=cut
+
+sub _abs2rel {
+  my $self = shift;
+  my @result;
+  return unless defined $_[0];
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)};
+    @result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_
+                                                       : map { $_ - $refstart + 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+=head2 rel2abs
+
+ Title   : rel2abs
+ Usage   : @coords = $s->rel2abs(@coords)
+ Function: convert relative coordinates into absolute coordinates
+ Returns : a list of absolute coordinates
+ Args    : a list of relative coordinates
+ Status  : Public
+
+This function takes a list of positions in relative coordinates to the
+segment, and converts them into absolute coordinates.
+
+=cut
+
+sub rel2abs {
+  my $self = shift;
+  my @result;
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
+    @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
+                              : map { $_ + $abs_start - 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+=head2 abs2rel
+
+ Title   : abs2rel
+ Usage   : @rel_coords = $s->abs2rel(@abs_coords)
+ Function: convert absolute coordinates into relative coordinates
+ Returns : a list of relative coordinates
+ Args    : a list of absolute coordinates
+ Status  : Public
+
+This function takes a list of positions in absolute coordinates
+and returns a list expressed in relative coordinates.
+
+=cut
+
+sub abs2rel {
+  my $self = shift;
+  my @result;
+
+  if ($self->absolute) {
+    @result = @_;
+  } else {
+    my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
+    @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
+                              : map { $_ - $abs_start + 1 } @_;
+  }
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+sub subseq {
+  my $self = shift;
+  my $obj  = $self->SUPER::subseq(@_);
+  bless $obj,__PACKAGE__;    # always bless into the generic RelSegment package
+}
+
+sub strand {
+  my $self = shift;
+  if ($self->absolute) {
+    return _to_strand($self->{strand});
+  }
+  my $start = $self->start;
+  my $stop  = $self->stop;
+  return 0 unless defined $start and defined $stop;
+  return $stop <=> $start;
+}
+
+sub _to_strand {
+  my $s = shift;
+  return -1 if $s eq '-';
+  return +1 if $s eq '+';
+  return 0;
+}
+
+=head2 Bio::RangeI Methods
+
+The following Bio::RangeI methods are supported:
+
+overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
+
+=cut
+
+sub intersection {
+  my $self     = shift;
+  my (@ranges) = @_;
+  unshift @ranges,$self if ref $self;
+  $ranges[0]->isa('Bio::DB::GFF::RelSegment')
+    or return $self->SUPER::intersection(@_);
+
+  my $ref = $ranges[0]->abs_ref;
+  my ($low,$high);
+  foreach (@ranges) {
+    return unless $_->can('abs_ref');
+    $ref eq $_->abs_ref or return;
+    $low  = $_->abs_low   if !defined($low)  or $low  < $_->abs_low;
+    $high = $_->abs_high  if !defined($high) or $high > $_->abs_high;
+  }
+
+  return unless $low < $high;
+  return Bio::DB::GFF::RelSegment->new(-factory => $self->factory,
+				       -seq     => $ref,
+				       -start   => $low,
+				       -stop    => $high,
+				      );
+}
+
+sub overlaps {
+  my $self = shift;
+  my($other,$so) = @_;
+  return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
+  return if $self->abs_ref ne $other->abs_ref;
+  return if $self->abs_low  > $other->abs_high;
+  return if $self->abs_high < $other->abs_low;
+  1;
+}
+
+sub contains {
+  my $self = shift;
+  my($other,$so) = @_;
+  return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
+  return if $self->abs_ref ne $other->abs_ref;
+  return unless $self->abs_low <= $other->abs_low;
+  return unless $self->abs_high >= $other->abs_high;
+  1;
+}
+
+sub union {
+  my $self     = shift;
+  my (@ranges) = @_;
+  unshift @ranges,$self if ref $self;
+  $ranges[0]->isa('Bio::DB::GFF::RelSegment')
+    or return $self->SUPER::union(@_);
+
+  my $ref = $ranges[0]->abs_ref;
+  my ($low,$high);
+  foreach (@ranges) {
+    return unless $_->can('abs_ref');
+    $ref eq $_->abs_ref or return;
+    $low  = $_->abs_low  if !defined($low)  or $low  > $_->abs_low;
+    $high = $_->abs_high if !defined($high) or $high < $_->abs_high;
+  }
+  $self->new(-factory=> $self->factory,
+	     -seq    => $ref,
+	     -start  => $low,
+	     -stop   => $high);
+}
+
+sub version { 0 }
+
+
+1;
+
+__END__
+
+=head1 BUGS
+
+Schemas need some work.
+
+=head1 SEE ALSO
+
+L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.  
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Segment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Segment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Segment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,883 @@
+=head1 NAME
+
+Bio::DB::GFF::Segment -- Simple DNA segment object
+
+=head1 SYNOPSIS
+
+See L<Bio::DB::GFF>.
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Segment provides the basic representation of a range of
+DNA contained in a GFF database.  It is the base class from which the
+Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
+derived.
+
+Generally, you will not create or manipulate Bio::DB::GFF::Segment
+objects directly, but use those that are returned by the Bio::DB::GFF
+module.
+
+=cut
+
+package Bio::DB::GFF::Segment;
+
+use strict;
+use Bio::Annotation::Collection;
+
+use base qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
+
+use overload 
+  '""'     => 'asString',
+  eq       => 'equals',
+  fallback => 1;
+
+=head1 API
+
+The remainder of this document describes the API for
+Bio::DB::GFF::Segment.
+
+=cut
+
+=head2 new
+
+ Title   : new
+ Usage   : $s = Bio::DB::GFF::Segment->new(@args)
+ Function: create a new segment
+ Returns : a new Bio::DB::GFF::Segment object
+ Args    : see below
+ Status  : Public
+
+This method creates a new Bio::DB::GFF::Segment object.  Generally
+this is called automatically by the Bio::DB::GFF module and
+derivatives.
+
+There are five positional arguments:
+
+ $factory      a Bio::DB::GFF::Adaptor to use for database access
+ $sourceseq    ID of the source sequence
+ $sourceclass  class of the source sequence
+ $start        start of the desired segment relative to source sequence
+ $stop         stop of the desired segment relative to source sequence
+
+=cut
+
+sub new {
+  my $class = shift;
+  my ($factory,$segclass,$segname,$start,$stop) = @_;
+  $segclass = $segname->class if ref($segname) && $segname->can('class');
+  $segclass ||= 'Sequence';
+
+  $factory or $class->throw("->new(): provide a factory argument");
+  $class = ref $class if ref $class;
+  return bless { factory   => $factory,
+		 sourceseq => $segname,
+		 class     => $segclass,
+		 start     => $start,
+		 stop      => $stop,
+		 strand    => 0,
+	       },$class;
+}
+
+# read-only accessors
+
+=head2 factory
+
+ Title   : factory
+ Usage   : $s->factory
+ Function: get the factory object
+ Returns : a Bio::DB::GFF::Adaptor
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the Bio::DB::GFF::Adaptor object used 
+to create the segment.
+
+=cut
+
+sub factory { shift->{factory} }
+
+# start, stop, length
+
+=head2 start
+
+ Title   : start
+ Usage   : $s->start
+ Function: start of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the start of the segment.
+
+=cut
+
+sub start  { shift->{start} }
+
+=head2 end
+
+ Title   : end
+ Usage   : $s->end
+ Function: end of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the end of the segment.
+
+=cut
+
+sub end   { shift->{stop}  }
+
+=head2 stop
+
+ Title   : stop
+ Usage   : $s->stop
+ Function: stop of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is an alias for end(), provided for AcePerl compatibility.
+
+=cut
+
+*stop = \&end;
+
+=head2 length
+
+ Title   : length
+ Usage   : $s->length
+ Function: length of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+Returns the length of the segment.  Always a positive number.
+
+=cut
+
+sub length { abs($_[0]->{start} - $_[0]->{stop})+1 }
+
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $s->strand
+ Function: strand of segment
+ Returns : +1,0,-1
+ Args    : none
+ Status  : Public
+
+Returns the strand on which the segment resides, either +1, 0 or -1.
+
+=cut
+
+sub strand {
+  my $self = shift;
+  0;
+}
+
+=head2 low
+
+ Title   : low
+ Usage   : $s->low
+ Function: return lower coordinate
+ Returns : lower coordinate
+ Args    : none
+ Status  : Public
+
+Returns the lower coordinate, either start or end.
+
+=cut
+
+sub low {
+  my $self = shift;
+  my ($start,$stop) = ($self->start,$self->stop);
+  return $start < $stop ? $start : $stop;
+}
+*abs_low = \&low;
+
+=head2 high
+
+ Title   : high
+ Usage   : $s->high
+ Function: return higher coordinate
+ Returns : higher coordinate
+ Args    : none
+ Status  : Public
+
+Returns the higher coordinate, either start or end.
+
+=cut
+
+sub high {
+  my $self = shift;
+  my ($start,$stop) = ($self->start,$self->stop);
+  return $start > $stop ? $start : $stop;
+}
+*abs_high = \&high;
+
+=head2 sourceseq
+
+ Title   : sourceseq
+ Usage   : $s->sourceseq
+ Function: get the segment source
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns the name of the source sequence for this segment.
+
+=cut
+
+sub sourceseq { shift->{sourceseq} }
+
+=head2 class
+
+ Title   : class
+ Usage   : $s->class([$newclass])
+ Function: get the source sequence class
+ Returns : a string
+ Args    : new class (optional)
+ Status  : Public
+
+Gets or sets the class for the source sequence for this segment.
+
+=cut
+
+sub class     { 
+  my $self = shift;
+  my $d = $self->{class};
+  $self->{class} = shift if @_;
+  $d;
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $s->subseq($start,$stop)
+ Function: generate a subsequence
+ Returns : a Bio::DB::GFF::Segment object
+ Args    : start and end of subsequence
+ Status  : Public
+
+This method generates a new segment from the start and end positions
+given in the arguments.  If stop E<lt> start, then the strand is reversed.
+
+=cut
+
+sub subseq {
+  my $self = shift;
+  my ($newstart,$newstop) = @_;
+  my ($refseq,$start,$stop,$class) = ($self->{sourceseq},
+				      $self->{start},$self->{stop},
+				      $self->class);
+
+  # We deliberately force subseq to return objects of type RelSegment
+  # Otherwise, when we get a subsequence from a Feature object,
+  # its method and source go along for the ride, which is incorrect.
+  my $new = $self->new_from_segment($self);
+  if ($start <= $stop) {
+    @{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop  - 1);
+  } else {
+    @{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop  - 1)),
+
+  }
+
+  $new;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $s->seq
+ Function: get the sequence string for this segment
+ Returns : a Bio::PrimarySeq
+ Args    : none
+ Status  : Public
+
+Returns the sequence for this segment as a Bio::PrimarySeq.  (-)
+strand segments are automatically reverse complemented
+
+The method is called dna() return the data as a simple sequence
+string.
+
+=cut
+
+sub seq {
+  my $self = shift;
+  my $dna = $self->dna;
+  require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
+  return Bio::PrimarySeq->new(-seq => $dna,
+			      -id  => $self->display_name);
+}
+
+=head2 dna
+
+ Title   : dna
+ Usage   : $s->dna
+ Function: get the DNA string for this segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns the sequence for this segment as a simple string. (-) strand
+segments are automatically reverse complemented
+
+The method is also called protein().
+
+=cut
+
+sub dna {
+  my $self = shift;
+  my ($ref,$class,$start,$stop,$strand) 
+    = @{$self}{qw(sourceseq class start stop strand)};
+  return $self->factory->dna($ref,$start,$stop,$class);
+}
+
+*protein = \&dna;
+
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $s->primary_seq
+ Function: returns a Bio::PrimarySeqI compatible object
+ Returns : a Bio::PrimarySeqI object
+ Args    : none
+ Status  : Public
+
+This is for compatibility with BioPerl's separation of SeqI
+from PrimarySeqI.  It just returns itself.
+
+=cut
+
+#'
+
+sub primary_seq { shift }
+
+=head2 type
+
+ Title   : type
+ Usage   : $s->type
+ Function: return the string "feature"
+ Returns : the string "feature"
+ Args    : none
+ Status  : Public
+
+This is for future sequence ontology-compatibility and
+represents the default type of a feature on the genome
+
+=cut
+
+sub type { "feature" }
+
+=head2 equals
+
+ Title   : equals
+ Usage   : $s->equals($d)
+ Function: segment equality
+ Returns : true, if two segments are equal
+ Args    : another segment
+ Status  : Public
+
+Returns true if the two segments have the same source sequence, start and stop.
+
+=cut
+
+sub equals {
+  my $self = shift;
+  my $peer = shift;
+  return unless defined $peer;
+  return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
+  return $self->{start} eq $peer->{start}
+         && $self->{stop}  eq $peer->{stop}
+         && $self->{sourceseq} eq $peer->{sourceseq};
+}
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $s->asString
+ Function: human-readable string for segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns a human-readable string representing this sequence.  Format
+is:
+
+   sourceseq/start,stop
+
+=cut
+
+sub asString {
+  my $self = shift;
+  my $label = $self->refseq;
+  my $start = $self->start;
+  my $stop  = $self->stop;
+  return "$label:$start,$stop";
+}
+
+=head2 clone
+
+ Title   : clone
+ Usage   : $copy = $s->clone
+ Function: make a copy of this segment
+ Returns : a Bio::DB::GFF::Segment object
+ Args    : none
+ Status  : Public
+
+This method creates a copy of the segment and returns it.
+
+=cut
+
+# deep copy of the thing
+sub clone {
+  my $self = shift;
+  my %h = %$self;
+  return bless \%h,ref($self);
+}
+
+=head2 error
+
+ Title   : error
+ Usage   : $error = $s->error([$new_error])
+ Function: get or set the last error
+ Returns : a string
+ Args    : an error message (optional)
+ Status  : Public
+
+In case of a fault, this method can be used to obtain the last error
+message.  Internally it is called to set the error message.
+
+=cut
+
+sub error {
+  my $self = shift;
+  my $g = $self->{error};
+  $self->{error} = shift if @_;
+  $g;
+}
+
+=head1 Relative Addressing Methods
+
+The following methods are provided for compatibility with
+Bio::DB::GFF::RelSegment, which provides relative addressing
+functions.
+
+=head2 abs_start
+
+ Title   : abs_start
+ Usage   : $s->abs_start
+ Function: the absolute start of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is an alias to start(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_start  = \&start;
+
+=head2 abs_end
+
+ Title   : abs_end
+ Usage   : $s->abs_end
+ Function: the absolute stop of the segment
+ Returns : an integer
+ Args    : none
+ Status  : Public
+
+This is an alias to stop(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_stop   = \&stop;
+*abs_end    = \&stop;
+
+=head2 abs_strand
+
+ Title   : abs_strand
+ Usage   : $s->abs_strand
+ Function: the absolute strand of the segment
+ Returns : +1,0,-1
+ Args    : none
+ Status  : Public
+
+This is an alias to strand(), and provided for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+sub abs_strand {
+  my $self = shift;
+  return $self->abs_end <=> $self->abs_start;
+}
+
+=head2 abs_ref
+
+ Title   : abs_ref
+ Usage   : $s->abs_ref
+ Function: the reference sequence for this segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This is an alias to sourceseq(), and is here to provide API
+compatibility with Bio::DB::GFF::RelSegment.
+
+=cut
+
+*abs_ref    = \&sourceseq;
+
+=head2 refseq
+
+ Title   : refseq
+ Usage   : $s->refseq
+ Function: get or set the reference sequence
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Examine or change the reference sequence. This is an alias to
+sourceseq(), provided here for API compatibility with
+Bio::DB::GFF::RelSegment.
+
+=cut
+
+*refseq     = \&sourceseq;
+
+=head2 ref
+
+ Title   : ref
+ Usage   : $s->refseq
+ Function: get or set the reference sequence
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+An alias for refseq()
+
+=cut
+
+sub ref { shift->refseq(@_) }
+
+=head2 seq_id
+
+ Title   : seq_id
+ Usage   : $ref = $s->seq_id
+ Function: get the reference sequence in a LocationI-compatible way
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+An alias for refseq() but only allows reading.
+
+=cut
+
+sub seq_id { shift->refseq }
+*seqname = \&seq_id;
+
+=head2 truncated
+
+ Title   : truncated
+ Usage   : $truncated = $s->truncated
+ Function: Flag indicating that the segment was truncated during creation
+ Returns : A boolean flag
+ Args    : none
+ Status  : Public
+
+This indicates that the sequence was truncated during creation.  The
+returned flag is undef if no truncation occured.  If truncation did
+occur, the flag is actually an array ref in which the first element is
+true if truncation occurred on the left, and the second element
+occurred if truncation occurred on the right.
+
+=cut
+
+sub truncated {
+  my $self = shift;
+  my $hash = $self->{truncated} or return;
+  CORE::ref($hash) eq 'HASH' or return [1,1];  # paranoia -- not that this would ever happen ;-)
+  return [$hash->{start},$hash->{stop}];
+}
+
+=head2 Bio::RangeI Methods
+
+The following Bio::RangeI methods are supported:
+
+overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
+
+=cut
+
+sub overlaps {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::overlaps(@_);
+}
+
+sub contains {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::contains(@_);
+}
+#sub equals {
+#  my $self  = shift;
+#  my($other,$so) = @_;
+#  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+#    return if $self->abs_ref ne $other->abs_ref;
+#  }
+#  $self->SUPER::equals(@_);
+#}
+sub intersection {
+  my $self  = shift;
+  my($other,$so) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::intersection(@_);
+}
+sub union {
+  my $self  = shift;
+  my($other) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::union(@_);
+}
+
+sub overlap_extent {
+  my $self  = shift;
+  my($other) = @_;
+  if ($other->isa('Bio::DB::GFF::RelSegment')) {
+    return if $self->abs_ref ne $other->abs_ref;
+  }
+  $self->SUPER::overlap_extent(@_);
+}
+
+
+=head2 Bio::SeqI implementation
+
+=cut
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no accession number, this method should
+           return a stringified memory location.
+
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   if( ! exists $obj->{'primary_id'} ) {
+       return "$obj";
+   }
+   return $obj->{'primary_id'};
+}
+
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $id = $obj->display_name or $obj->display_name($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+Note, this used to be called display_id(), and this name is preserved for
+backward compatibility.  The default is to return the seq_id().
+
+=cut
+
+sub display_name { shift->seq_id }
+*display_id = \&display_name;
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub accession_number {
+    return 'unknown';
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    return 'dna'; # no way this will be anything other than dna!
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc { shift->asString }
+
+*description = \&desc;
+
+=head2 species
+
+ Title   : species
+ Usage   : $species = $seq->species() or $seq->species($species)
+ Function: Gets or sets the species
+ Example :
+ Returns : Bio::Species object
+ Args    : None or Bio::Species object
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $seq->annotation or $seq->annotation($annotation)
+ Function: Gets or sets the annotation
+ Example :
+ Returns : Bio::Annotation object
+ Args    : None or Bio::Annotation object
+
+See L<Bio::Annotation> for more information
+
+=cut
+
+sub annotation {
+   my ($obj,$value) = @_;
+   if( defined $value || ! defined $obj->{'annotation'} ) {
+       $value = new Bio::Annotation::Collection() unless defined $value;
+      $obj->{'annotation'} = $value;
+    }
+    return $obj->{'annotation'};
+
+}
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular{
+    return 0;
+}
+
+
+1;
+__END__
+
+=head1 BUGS
+
+Report them please.
+
+=head1 SEE ALSO
+
+L<bioperl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.  
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 CONTRIBUTORS
+
+Jason Stajich E<lt>jason at bioperl.orgE<gt>.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Typename.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Typename.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Typename.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,185 @@
+=head1 NAME
+
+Bio::DB::GFF::Typename -- The name of a feature type
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  my $type = Bio::DB::GFF::Typename->new(similarity => 'BLAT_EST_GENOME');
+  my $segment = $segment->features($type);
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Typename objects encapsulate the combination of feature
+method and source used by the GFF flat file format.  They can be used
+in the Bio::DB::GFF modules wherever a feature type is called for.
+
+Since there are relatively few types and many features, this module
+maintains a memory cache of unique types so that two features of the
+same type will share the same Bio::DB::GFF::Typename object.
+
+=head1 METHODS
+
+=cut
+
+package Bio::DB::GFF::Typename;
+
+use strict;
+use overload 
+  '""'     => 'asString',
+  fallback => 1;
+
+
+use base qw(Bio::Root::Root Bio::Das::FeatureTypeI);
+
+# cut down on the number of equivalent objects we have to create
+my %OBJECT_CACHE;
+
+=head2 new
+
+ Title   : new
+ Usage   : $type = Bio::DB::GFF::Typename->new($method,$source)
+ Function: create a new Bio::DB::GFF::Typename object
+ Returns : a new Bio::DB::GFF::Typename object
+ Args    : method and source
+ Status  : Public
+
+=cut
+
+sub new    {
+  my $package = shift;
+  my ($method,$source) = @_;
+  $method ||= '';
+  $source ||= '';
+  if ($source eq '' && $method =~ /^(\w+):(\w+)$/) {
+    $method = $1;
+    $source = $2;
+  }
+  return $OBJECT_CACHE{"$method:$source"} ||= bless [$method,$source],$package;
+}
+
+=head2 method
+
+ Title   : method
+ Usage   : $method = $type->method([$newmethod])
+ Function: get or set the method
+ Returns : a method name
+ Args    : new method name (optional)
+ Status  : Public
+
+=cut
+
+sub method {
+  my $self = shift;
+  my $d = $self->[0];
+  $self->[0] = shift if @_;
+  $d;
+}
+
+
+=head2 source
+
+ Title   : source
+ Usage   : $source = $type->source([$newsource])
+ Function: get or set the source
+ Returns : a source name
+ Args    : new source name (optional)
+ Status  : Public
+
+=cut
+
+sub source {
+  my $self = shift;
+  my $d = $self->[1];
+  $self->[1] = shift if @_;
+  $d;
+}
+
+=head2 asString
+
+ Title   : asString
+ Usage   : $string = $type->asString
+ Function: get the method and source as a string
+ Returns : a string in "method:source" format
+ Args    : none
+ Status  : Public
+
+This method is used by operator overloading to overload the '""'
+operator.
+
+=cut
+
+sub asString {
+  $_[0]->[1] ? join ':',@{$_[0]} : $_[0]->[0];
+}
+
+=head2 clone
+
+ Title   : clone
+ Usage   : $new_clone = $type->clone;
+ Function: clone this object
+ Returns : a new Bio::DB::GFF::Typename object
+ Args    : none
+ Status  : Public
+
+This method creates an exact copy of the object.
+
+=cut
+
+sub clone {
+  my $self = shift;
+  return bless [@$self],ref $self;
+}
+
+=head2 match
+
+ Title   : match
+ Usage   : $boolean = $type->match($type_or_string)
+ Function: fuzzy match on types
+ Returns : a flag indicating that the argument matches the object
+ Args    : a Bio::DB::GFF::typename object, or a string in method:source format
+ Status  : Public
+
+This match allows Sequence:Link and Sequence: to match, but not
+Sequence:Link and Sequence:Genomic_canonical.
+
+=cut
+
+sub match {
+  my $self   = shift;
+  my $target = shift;
+  my ($method,$source);
+  if (UNIVERSAL::isa($target,'Bio::DB::GFF::Typename')) {
+    ($method,$source) = ($target->method,$target->source);
+  } else {
+    ($method,$source) = split /:/,$target;
+  }
+
+  return if $method ne '' && $self->method ne '' && $method ne $self->method;
+  return if $source ne '' && $self->source ne '' && $source ne $self->source;
+  1;
+}
+
+1;
+
+=head1 BUGS
+
+This module is still under development.
+
+=head1 SEE ALSO
+
+L<bioperl>, L<Bio::DB::GFF>, L<Bio::DB::RelSegment>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Binning.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Binning.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Binning.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+=head1 NAME
+
+Bio::DB::GFF::Util::Binning - binning utility for Bio::DB::GFF index
+
+=head1 SYNOPSIS
+
+ use Bio::DB::GFF::Util::Binning qw(bin bin_bot bin_top);
+ my $tier = bin($start,$stop,$min);
+
+=head1 DESCRIPTION
+
+This is a utility module that exports the functions bin(), bin_bot()
+and bin_top().  These functions translate a range on the genome into a
+named bin that is used as an index in the Bio::DB::GFF schema.  The
+index makes certain range retrieval queries much faster.
+
+=head1 API
+
+The remainder of the document describes the function calls.  No calls
+are exported by default, but must be imported explicitly.
+
+=over 4
+
+=cut
+
+package Bio::DB::GFF::Util::Binning;
+
+use strict;
+require Exporter;
+use vars qw(@EXPORT @EXPORT_OK);
+use base qw(Exporter);
+ at EXPORT_OK = qw(bin bin_bot bin_top);
+ at EXPORT = @EXPORT_OK;
+use Bio::Root::Version;
+
+=item $bin_name = bin($start,$stop,$bin_size)
+
+Given a start, stop and bin size on the genome, translate this
+location into a bin name.  In a list context, returns the bin tier
+name and the position that the bin begins.
+
+=cut
+
+sub bin {
+  my ($start,$stop,$min) = @_;
+  my $tier = $min;
+  my ($bin_start,$bin_end);
+  while (1) {
+    $bin_start = int $start/$tier;
+    $bin_end   = int $stop/$tier;
+    last if $bin_start == $bin_end;
+    $tier *= 10;
+  }
+  return wantarray ? ($tier,$bin_start) : bin_name($tier,$bin_start);
+}
+
+=item $bottom = bin_bot($tier,$start)
+
+Given a tier name and a range start position, returns the lower end of
+the bin range.
+
+=cut
+
+sub bin_bot {
+  my $tier = shift;
+  my $pos  = shift;
+  bin_name($tier,int($pos/$tier));
+}
+
+=item $top = bin_top($tier,$end)
+
+Given a tier name and the end of a range, returns the upper end of the
+bin range.
+
+=cut
+
+sub bin_top {
+  my $tier = shift;
+  my $pos  = shift;
+  bin_name($tier,int($pos/$tier));  #  bin_name($tier,int($pos/$tier),+1);
+}
+
+sub bin_name {
+  my ($tier, $int, $fudge) = @_;
+  my $pos = abs($int) + ($fudge || 0);
+  $pos    = 0 if $pos < 0;
+  sprintf("%d.%06d",$tier, $pos);
+}
+
+sub log10 {
+  my $i = shift;
+  log($i)/log(10);
+}
+
+1;
+
+=back
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Rearrange.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Rearrange.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF/Util/Rearrange.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+=head1 NAME
+
+Bio::DB::GFF::Util::Rearrange - rearrange utility
+
+=head1 SYNOPSIS
+
+ use Bio::DB::GFF::Util::Rearrange 'rearrange';
+
+ my ($arg1,$arg2,$arg3,$others) = rearrange(['ARG1','ARG2','ARG3'], at args);
+
+=head1 DESCRIPTION
+
+This is a different version of the _rearrange() method from
+Bio::Root::Root.  It runs as a function call, rather than as a method
+call, and it handles unidentified parameters slightly differently.
+
+It exports a single function call:
+
+=over 4
+
+=item @rearranged_args = rearrange(\@parameter_names, at parameters);
+
+The first argument is an array reference containing list of parameter
+names in the desired order.  The second and subsequent arguments are a
+list of parameters in the format:
+
+  (-arg1=>$arg1,-arg2=>$arg2,-arg3=>$arg3...)
+
+The function calls returns the parameter values in the order in which
+they were specified in @parameter_names.  Any parameters that were not
+found in @parameter_names are returned in the form of a hash reference
+in which the keys are the uppercased forms of the parameter names, and
+the values are the parameter values.
+
+=back
+
+=head1 BUGS
+
+None known yet.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+package Bio::DB::GFF::Util::Rearrange;
+
+use strict;
+require Exporter;
+use vars qw(@EXPORT @EXPORT_OK);
+use base qw(Exporter);
+ at EXPORT_OK = qw(rearrange);
+ at EXPORT = qw(rearrange);
+use Bio::Root::Version;
+
+# default export
+sub rearrange {
+    my($order, at param) = @_;
+    return unless @param;
+    my %param;
+
+    if (ref $param[0] eq 'HASH') {
+      %param = %{$param[0]};
+    } else {
+      return @param unless (defined($param[0]) && substr($param[0],0,1) eq '-');
+
+      my $i;
+      for ($i=0;$i<@param;$i+=2) {
+        $param[$i]=~s/^\-//;     # get rid of initial - if present
+        $param[$i]=~tr/a-z/A-Z/; # parameters are upper case
+      }
+
+      %param = @param;                # convert into associative array
+    }
+
+    my(@return_array);
+
+    local($^W) = 0;
+    my($key)='';
+    foreach $key (@$order) {
+        my($value);
+        if (ref($key) eq 'ARRAY') {
+            foreach (@$key) {
+                last if defined($value);
+                $value = $param{$_};
+                delete $param{$_};
+            }
+        } else {
+            $value = $param{$key};
+            delete $param{$key};
+        }
+        push(@return_array,$value);
+    }
+    push (@return_array,\%param) if %param;
+    return @return_array;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GFF.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3693 @@
+# $Id: GFF.pm,v 1.139.4.1 2006/10/02 23:10:14 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::GFF -- Storage and retrieval of sequence annotation data
+
+=head1 SYNOPSIS
+
+  use Bio::DB::GFF;
+
+  # Open the sequence database
+  my $db      = Bio::DB::GFF->new( -adaptor => 'dbi::mysqlopt',
+                                   -dsn     => 'dbi:mysql:elegans');
+
+  # fetch a 1 megabase segment of sequence starting at landmark "ZK909"
+  my $segment = $db->segment('ZK909', 1 => 1000000);
+
+  # pull out all transcript features
+  my @transcripts = $segment->features('transcript');
+
+  # for each transcript, total the length of the introns
+  my %totals;
+  for my $t (@transcripts) {
+    my @introns = $t->Intron;
+    $totals{$t->name} += $_->length foreach @introns;
+  }
+
+  # Sort the exons of the first transcript by position
+  my @exons = sort {$a->start <=> $b->start} $transcripts[0]->Exon;
+
+  # Get a region 1000 bp upstream of first exon
+  my $upstream = $exons[0]->subseq(-1000,0);
+
+  # get its DNA
+  my $dna = $upstream->seq;
+
+  # and get all curated polymorphisms inside it
+  @polymorphisms = $upstream->contained_features('polymorphism:curated');
+
+  # get all feature types in the database
+  my @types = $db->types;
+
+  # count all feature types in the segment
+  my %type_counts = $segment->types(-enumerate=>1);
+
+  # get an iterator on all curated features of type 'exon' or 'intron'
+  my $iterator = $db->get_seq_stream(-type     => ['exon:curated','intron:curated']);
+
+  while (my $s = $iterator->next_seq) {
+      print $s,"\n";
+  }
+
+  # find all transcripts annotated as having function 'kinase'
+  my $iterator = $db->get_seq_stream(-type=>'transcript',
+			             -attributes=>{Function=>'kinase'});
+  while (my $s = $iterator->next_seq) {
+      print $s,"\n";
+  }
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF provides fast indexed access to a sequence annotation
+database.  It supports multiple database types (ACeDB, relational),
+and multiple schemas through a system of adaptors and aggregators.
+
+The following operations are supported by this module:
+
+  - retrieving a segment of sequence based on the ID of a landmark
+  - retrieving the DNA from that segment
+  - finding all annotations that overlap with the segment
+  - finding all annotations that are completely contained within the
+    segment
+  - retrieving all annotations of a particular type, either within a
+    segment, or globally
+  - conversion from absolute to relative coordinates and back again,
+    using any arbitrary landmark for the relative coordinates
+  - using a sequence segment to create new segments based on relative 
+    offsets
+
+The data model used by Bio::DB::GFF is compatible with the GFF flat
+file format (http://www.sanger.ac.uk/software/GFF).  The module can
+load a set of GFF files into the database, and serves objects that
+have methods corresponding to GFF fields.
+
+The objects returned by Bio::DB::GFF are compatible with the
+SeqFeatureI interface, allowing their use by the Bio::Graphics and
+Bio::DAS modules.
+
+=head2 Auxiliary Scripts
+
+The bioperl distribution includes several scripts that make it easier
+to work with Bio::DB::GFF databases.  They are located in the scripts
+directory under a subdirectory named Bio::DB::GFF:
+
+=over 4
+
+=item bp_load_gff.pl
+
+This script will load a Bio::DB::GFF database from a flat GFF file of
+sequence annotations.  Only the relational database version of
+Bio::DB::GFF is supported.  It can be used to create the database from
+scratch, as well as to incrementally load new data.
+
+This script takes a --fasta argument to load raw DNA into the database
+as well.  However, GFF databases do not require access to the raw DNA
+for most of their functionality.
+
+load_gff.pl also has a --upgrade option, which will perform a
+non-destructive upgrade of older schemas to newer ones.
+
+=item bp_bulk_load_gff.pl
+
+This script will populate a Bio::DB::GFF database from a flat GFF file
+of sequence annotations.  Only the MySQL database version of
+Bio::DB::GFF is supported.  It uses the "LOAD DATA INFILE" query in
+order to accelerate loading considerably; however, it can only be used
+for the initial load, and not for updates.
+
+This script takes a --fasta argument to load raw DNA into the database
+as well.  However, GFF databases do not require access to the raw DNA
+for most of their functionality.
+
+=item bp_fast_load_gff.pl
+
+This script is as fast as bp_bulk_load_gff.pl but uses Unix pipe
+tricks to allow for incremental updates.  It only supports the MySQL
+database version of Bio::DB::GFF and is guaranteed not to work on
+non-Unix platforms.
+
+Arguments are the same as bp_load_gff.pl
+
+=item gadfly_to_gff.pl
+
+This script will convert the GFF-like format used by the Berkeley
+Drosophila Sequencing project into a format suitable for use with this
+module.
+
+=item sgd_to_gff.pl
+
+This script will convert the tab-delimited feature files used by the
+Saccharomyces Genome Database into a format suitable for use with this
+module.
+
+=back
+
+=head2 GFF Fundamentals
+
+The GFF format is a flat tab-delimited file, each line of which
+corresponds to an annotation, or feature.  Each line has nine columns
+and looks like this:
+
+ Chr1  curated  CDS 365647  365963  .  +  1  Transcript "R119.7"
+
+The 9 columns are as follows:
+
+=over 4
+
+=item 1. reference sequence
+
+This is the ID of the sequence that is used to establish the
+coordinate system of the annotation.  In the example above, the
+reference sequence is "Chr1".
+
+=item 2. source
+
+The source of the annotation.  This field describes how the annotation
+was derived.  In the example above, the source is "curated" to
+indicate that the feature is the result of human curation.  The names
+and versions of software programs are often used for the source field,
+as in "tRNAScan-SE/1.2".
+
+=item 3. method
+
+The annotation method.  This field describes the type of the
+annotation, such as "CDS".  Together the method and source describe
+the annotation type.
+
+=item 4. start position
+
+The start of the annotation relative to the reference sequence. 
+
+=item 5. stop position
+
+The stop of the annotation relative to the reference sequence.  Start
+is always less than or equal to stop.
+
+=item 6. score
+
+For annotations that are associated with a numeric score (for example,
+a sequence similarity), this field describes the score.  The score
+units are completely unspecified, but for sequence similarities, it is
+typically percent identity.  Annotations that don't have a score can
+use "."
+
+=item 7. strand
+
+For those annotations which are strand-specific, this field is the
+strand on which the annotation resides.  It is "+" for the forward
+strand, "-" for the reverse strand, or "." for annotations that are
+not stranded.
+
+=item 8. phase
+
+For annotations that are linked to proteins, this field describes the
+phase of the annotation on the codons.  It is a number from 0 to 2, or
+"." for features that have no phase.
+
+=item 9. group
+
+GFF provides a simple way of generating annotation hierarchies ("is
+composed of" relationships) by providing a group field.  The group
+field contains the class and ID of an annotation which is the logical
+parent of the current one.  In the example given above, the group is
+the Transcript named "R119.7".
+
+The group field is also used to store information about the target of
+sequence similarity hits, and miscellaneous notes.  See the next
+section for a description of how to describe similarity targets.
+
+The format of the group fields is "Class ID" with a single space (not
+a tab) separating the class from the ID. It is VERY IMPORTANT to
+follow this format, or grouping will not work properly.
+
+=back
+
+The sequences used to establish the coordinate system for annotations
+can correspond to sequenced clones, clone fragments, contigs or
+super-contigs.  Thus, this module can be used throughout the lifecycle
+of a sequencing project.
+
+In addition to a group ID, the GFF format allows annotations to have a
+group class.  For example, in the ACeDB representation, RNA
+interference experiments have a class of "RNAi" and an ID that is
+unique among the RNAi experiments.  Since not all databases support
+this notion, the class is optional in all calls to this module, and
+defaults to "Sequence" when not provided.
+
+Double-quotes are sometimes used in GFF files around components of the
+group field.  Strictly, this is only necessary if the group name or
+class contains whitespace.
+
+=head2 Making GFF files work with this module
+
+Some annotations do not need to be individually named.  For example,
+it is probably not useful to assign a unique name to each ALU repeat
+in a vertebrate genome.  Others, such as predicted genes, correspond
+to named biological objects; you probably want to be able to fetch the
+positions of these objects by referring to them by name.
+
+To accomodate named annotations, the GFF format places the object
+class and name in the group field.  The name identifies the object,
+and the class prevents similarly-named objects, for example clones and
+sequences, from collding.
+
+A named object is shown in the following excerpt from a GFF file:
+
+ Chr1  curated transcript  939627 942410 . +  . Transcript Y95B8A.2
+
+This object is a predicted transcript named Y95BA.2.  In this case,
+the group field is used to identify the class and name of the object,
+even though no other annotation belongs to that group.
+
+It now becomes possible to retrieve the region of the genome covered
+by transcript Y95B8A.2 using the segment() method:
+
+  $segment = $db->segment(-class=>'Transcript',-name=>'Y95B8A.2');
+
+It is not necessary for the annotation's method to correspond to the
+object class, although this is commonly the case.
+
+As explained above, each annotation in a GFF file refers to a
+reference sequence.  It is important that each reference sequence also
+be identified by a line in the GFF file.  This allows the Bio::DB::GFF
+module to determine the length and class of the reference sequence,
+and makes it possible to do relative arithmetic.
+
+For example, if "Chr1" is used as a reference sequence, then it should
+have an entry in the GFF file similar to this one:
+
+ Chr1 assembly chromosome 1 14972282 . + . Sequence Chr1
+
+This indicates that the reference sequence named "Chr1" has length
+14972282 bp, method "chromosome" and source "assembly".  In addition,
+as indicated by the group field, Chr1 has class "Sequence" and name
+"Chr1".
+
+The object class "Sequence" is used by default when the class is not
+specified in the segment() call.  This allows you to use a shortcut
+form of the segment() method:
+
+ $segment = $db->segment('Chr1');          # whole chromosome
+ $segment = $db->segment('Chr1',1=>1000);  # first 1000 bp
+
+For your convenience, if, during loading a GFF file, Bio::DB::GFF
+encounters a line like the following:
+
+  ##sequence-region Chr1 1 14972282
+
+It will automatically generate the following entry:
+
+ Chr1 reference Component 1 14972282 . + . Sequence Chr1
+
+This is sufficient to use Chr1 as a reference point.
+The ##sequence-region line is frequently found in the GFF files
+distributed by annotation groups.
+
+=head2 Specifying the group tag
+
+A frequent problem with GFF files is the problem distinguishing
+which of the several tag/value pairs in the 9th column is the grouping
+pair.  Ordinarily the first tag will be used for grouping, but some
+GFF manipulating tools do not preserve the order of attributes.  To
+eliminate this ambiguity, this module provides two ways of explicitly
+specifying which tag to group on:
+
+=over 4
+
+=item Using -preferred_groups
+
+When you create a Bio::DB::GFF object, pass it a -preferred_groups=E<gt>
+argument.  This specifies a tag that will be used for grouping.  You
+can pass an array reference to specify a list of such tags.
+
+=item In the GFF header
+
+The GFF file itself can specify which tags are to be used for
+grouping.  Insert a comment like the following:
+
+ ##group-tags Accession Locus
+
+This says to use the Accession tag for grouping.  If it is not
+available, use the Locus tag.  If neither tag is available, use the
+first pair to appear.
+
+=back
+
+These options only apply when B<loading> a GFF file into the database,
+and have no effect on existing databases.
+
+The group-tags comment in the GFF file will *override* the preferred
+groups set when you create the Bio::DB::GFF object.
+
+For backward compatibility, the tags Sequence and Transcript are
+always treated as grouping tags unless preferred_tags are specified.
+The "Target" tag is always used for grouping regardless of the
+preferred_groups() setting, and the tags "tstart", "tend" and "Note"
+cannot be used for grouping.  These are historical artefacts coming
+from various interpretations of GFF2, and cannot be changed.
+
+=head2 Sequence alignments
+
+There are two cases in which an annotation indicates the relationship
+between two sequences.  The first case is a similarity hit, where the
+annotation indicates an alignment.  The second case is a map assembly,
+in which the annotation indicates that a portion of a larger sequence
+is built up from one or more smaller ones.
+
+Both cases are indicated by using the B<Target> tag in the group
+field.  For example, a typical similarity hit will look like this:
+
+ Chr1 BLASTX similarity 76953 77108 132 + 0 Target Protein:SW:ABL_DROME 493 544
+
+The group field contains the Target tag, followed by an identifier for
+the biological object referred to.  The GFF format uses the notation
+I<Class>:I<Name> for the biological object, and even though this is
+stylistically inconsistent, that's the way it's done.  The object
+identifier is followed by two integers indicating the start and stop
+of the alignment on the target sequence.
+
+Unlike the main start and stop columns, it is possible for the target
+start to be greater than the target end.  The previous example
+indicates that the the section of Chr1 from 76,953 to 77,108 aligns to
+the protein SW:ABL_DROME starting at position 493 and extending to
+position 544.
+
+A similar notation is used for sequence assembly information as shown
+in this example:
+
+ Chr1        assembly Link   10922906 11177731 . . . Target Sequence:LINK_H06O01 1 254826
+ LINK_H06O01 assembly Cosmid 32386    64122    . . . Target Sequence:F49B2       6 31742
+
+This indicates that the region between bases 10922906 and 11177731 of
+Chr1 are composed of LINK_H06O01 from bp 1 to bp 254826.  The region
+of LINK_H0601 between 32386 and 64122 is, in turn, composed of the
+bases 5 to 31742 of cosmid F49B2.
+
+=head2 Attributes
+
+While not intended to serve as a general-purpose sequence database
+(see bioperl-db for that), GFF allows you to tag features with
+arbitrary attributes.  Attributes appear in the Group field following
+the initial class/name pair.  For example:
+
+ Chr1  cur trans  939 942 . +  . Transcript Y95B8A.2 ; Gene sma-3 ; Alias sma3
+
+This line tags the feature named Transcript Y95B8A.2 as being "Gene"
+named sma-3 and having the Alias "sma3".  Features having these
+attributes can be looked up using the fetch_feature_by_attribute() method.
+
+Two attributes have special meaning: "Note" is for backward
+compatibility and is used for unstructured text remarks.  "Alias" is
+considered as a synonym for the feature name and will be consulted
+when looking up a feature by its name.
+
+=head2 Adaptors and Aggregators
+
+This module uses a system of adaptors and aggregators in order to make
+it adaptable to use with a variety of databases.
+
+=over 4
+
+=item Adaptors
+
+The core of the module handles the user API, annotation coordinate
+arithmetic, and other common issues.  The details of fetching
+information from databases is handled by an adaptor, which is
+specified during Bio::DB::GFF construction.  The adaptor encapsulates
+database-specific information such as the schema, user authentication
+and access methods.
+
+There are currently five adaptors recommended for general use:
+
+  Adaptor Name             Description
+  ------------             -----------
+
+  memory                   A simple in-memory database suitable for testing
+                            and small data sets.
+
+  berkeleydb               An indexed file database based on the DB_File module,
+                            suitable for medium-sized read-only data sets.
+
+  dbi::mysql               An interface to a schema implemented in the Mysql
+                            relational database management system.
+
+  dbi::oracle              An interface to a schema implemented in the Oracle
+                            relational database management system.
+
+  dbi::pg                  An interface to a schema implemented in the PostgreSQL
+                            relational database management system.
+
+Check the Bio/DB/GFF/Adaptor directory and subdirectories for other,
+more specialized adaptors, as well as experimental ones.
+
+=item Aggregators
+
+The GFF format uses a "group" field to indicate aggregation properties
+of individual features.  For example, a set of exons and introns may
+share a common transcript group, and multiple transcripts may share
+the same gene group.
+
+Aggregators are small modules that use the group information to
+rebuild the hierarchy.  When a Bio::DB::GFF object is created, you
+indicate that it use a set of one or more aggregators.  Each
+aggregator provides a new composite annotation type.  Before the
+database query is generated each aggregator is called to
+"disaggregate" its annotation type into list of component types
+contained in the database.  After the query is generated, each
+aggregator is called again in order to build composite annotations
+from the returned components.
+
+For example, during disaggregation, the standard
+"processed_transcript" aggregator generates a list of component
+feature types including "UTR", "CDS", and "polyA_site".  Later, it
+aggregates these features into a set of annotations of type
+"processed_transcript".
+
+During aggregation, the list of aggregators is called in reverse
+order.  This allows aggregators to collaborate to create multi-level
+structures: the transcript aggregator assembles transcripts from
+introns and exons; the gene aggregator then assembles genes from sets
+of transcripts.
+
+Three default aggregators are provided:
+
+      transcript   assembles transcripts from features of type
+                   exon, CDS, 5'UTR, 3'UTR, TSS, and PolyA
+      clone        assembles clones from Clone_left_end, Clone_right_end
+                   and Sequence features.
+      alignment    assembles gapped alignments from features of type
+                   "similarity".
+
+In addition, this module provides the optional "wormbase_gene"
+aggregator, which accomodates the WormBase representation of genes.
+This aggregator aggregates features of method "exon", "CDS", "5'UTR",
+"3'UTR", "polyA" and "TSS" into a single object.  It also expects to
+find a single feature of type "Sequence" that spans the entire gene.
+
+The existing aggregators are easily customized.
+
+Note that aggregation will not occur unless you specifically request
+the aggregation type.  For example, this call:
+
+  @features = $segment->features('alignment');
+
+will generate an array of aggregated alignment features.  However,
+this call:
+
+  @features = $segment->features();
+
+will return a list of unaggregated similarity segments.
+
+For more informnation, see the manual pages for
+Bio::DB::GFF::Aggregator::processed_transcript, Bio::DB::GFF::Aggregator::clone,
+etc.
+
+=back
+
+=head2 Loading GFF3 Files
+
+This module will accept GFF3 files, as described at
+http://song.sourceforge.net/gff3.shtml. However, the implementation
+has some limitations.
+
+=over 4
+
+=item 1. GFF version string is required
+
+The GFF file B<must> contain the version comment:
+
+ ##gff-version 3
+
+Unless this version string is present at the top of the GFF file, the
+loader will attempt to parse the file in GFF2 format, with
+less-than-desirable results.
+
+=item 2. Only one level of nesting allowed
+
+A major restriction is that Bio::DB::GFF only allows one level of
+nesting of features.  For nesting, the Target tag will be used
+preferentially followed by the ID tag, followed by the Parent tag.
+This means that if genes are represented like this:
+
+  XXXX XXXX gene XXXX XXXX XXXX ID=myGene
+  XXXX XXXX mRNA XXXX XXXX XXXX ID=myTranscript;Parent=myGene
+  XXXX XXXX exon XXXX XXXX XXXX Parent=myTranscript
+  XXXX XXXX exon XXXX XXXX XXXX Parent=myTranscript
+
+Then there will be one group called myGene containing the "gene"
+feature and one group called myTranscript containing the mRNA, and two
+exons.
+
+You can work around this restriction to some extent by using the Alias
+attribute literally:
+
+  XXXX XXXX gene XXXX XXXX XXXX ID=myGene
+  XXXX XXXX mRNA XXXX XXXX XXXX ID=myTranscript;Parent=myGene;Alias=myGene
+  XXXX XXXX exon XXXX XXXX XXXX Parent=myTranscript;Alias=myGene
+  XXXX XXXX exon XXXX XXXX XXXX Parent=myTranscript;Alias=myGene
+
+This limitation will be corrected in the next version of Bio::DB::GFF.
+
+=back
+
+=head1 API
+
+The following is the API for Bio::DB::GFF.
+
+=cut
+
+package Bio::DB::GFF;
+
+use strict;
+
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::GFF::RelSegment;
+use Bio::DB::GFF::Feature;
+use Bio::DB::GFF::Aggregator;
+
+use base qw(Bio::Root::Root Bio::DasI);
+
+my %valid_range_types = (overlaps     => 1,
+			 contains     => 1,
+			 contained_in => 1);
+
+=head1 Querying GFF Databases
+
+=head2 new
+
+ Title   : new
+ Usage   : my $db = new Bio::DB::GFF(@args);
+ Function: create a new Bio::DB::GFF object
+ Returns : new Bio::DB::GFF object
+ Args    : lists of adaptors and aggregators
+ Status  : Public
+
+These are the arguments:
+
+ -adaptor      Name of the adaptor module to use.  If none
+               provided, defaults to "dbi::mysqlopt".
+
+ -aggregator   Array reference to a list of aggregators
+               to apply to the database.  If none provided,
+	       defaults to ['processed_transcript','alignment'].
+
+  -preferred_groups  When interpreteting the 9th column of a GFF2 file,
+                 the indicated group names will have preference over
+                 other attributes, even if they do not come first in
+                 the list of attributes.  This can be a scalar value
+                 or an array reference.
+
+  <other>      Any other named argument pairs are passed to
+               the adaptor for processing.
+
+The adaptor argument must correspond to a module contained within the
+Bio::DB::GFF::Adaptor namespace.  For example, the
+Bio::DB::GFF::Adaptor::dbi::mysql adaptor is loaded by specifying
+'dbi::mysql'.  By Perl convention, the adaptors names are lower case
+because they are loaded at run time.
+
+The aggregator array may contain a list of aggregator names, a list of
+initialized aggregator objects, or a string in the form
+"aggregator_name{subpart1,subpart2,subpart3/main_method}" (the
+"/main_method" part is optional, but if present a feature with the
+main_method must be present in order for aggregation to occur).  For
+example, if you wish to change the components aggregated by the
+transcript aggregator, you could pass it to the GFF constructor this
+way:
+
+  my $transcript = 
+     Bio::DB::Aggregator::transcript->new(-sub_parts=>[qw(exon intron utr
+                                                          polyA spliced_leader)]);
+
+  my $db = Bio::DB::GFF->new(-aggregator=>[$transcript,'clone','alignment],
+                             -adaptor   => 'dbi::mysql',
+                             -dsn      => 'dbi:mysql:elegans42');
+
+Alternatively, you could create an entirely new transcript aggregator
+this way:
+
+  my $new_agg = 'transcript{exon,intron,utr,polyA,spliced_leader}';
+  my $db      = Bio::DB::GFF->new(-aggregator=>[$new_agg,'clone','alignment],
+                                  -adaptor   => 'dbi::mysql',
+                                  -dsn       => 'dbi:mysql:elegans42');
+
+See L<Bio::DB::GFF::Aggregator> for more details.
+
+The B<-preferred_groups> argument is used to change the default
+processing of the 9th column of GFF version 2 files.  By default, the
+first tag/value pair is used to establish the group class and name.
+If you pass -preferred_groups a scalar, the parser will look for a tag
+of the indicated type and use it as the group even if it is not first
+in the file.  If you pass this argument a list of group classes as an
+array ref, then the list will establish the precedence for searching.
+
+The commonly used 'dbi::mysql' adaptor recognizes the following
+adaptor-specific arguments:
+
+  Argument       Description
+  --------       -----------
+
+  -dsn           the DBI data source, e.g. 'dbi:mysql:ens0040'
+                 If a partial name is given, such as "ens0040", the
+                 "dbi:mysql:" prefix will be added automatically.
+
+  -user          username for authentication
+
+  -pass          the password for authentication
+
+  -refclass      landmark Class; defaults to "Sequence"
+
+
+The commonly used 'dbi::mysqlopt' adaptor also recogizes the following
+arguments.
+
+  Argument       Description
+  --------       -----------
+
+  -fasta         path to a directory containing FASTA files for the DNA
+                 contained in this database (e.g. "/usr/local/share/fasta")
+
+  -acedb         an acedb URL to use when converting features into ACEDB
+                    objects (e.g. sace://localhost:2005)
+
+=cut
+
+#'
+
+sub new {
+  my $package   = shift;
+  my ($adaptor,$aggregators,$args,$refclass,$preferred_groups);
+
+  if (@_ == 1) {  # special case, default to dbi::mysqlopt
+    $adaptor = 'dbi::mysqlopt';
+    $args = {DSN => shift};
+  } else {
+    ($adaptor,$aggregators,$refclass,$preferred_groups,$args) = rearrange([
+									   [qw(ADAPTOR FACTORY)],
+									   [qw(AGGREGATOR AGGREGATORS)],
+									   'REFCLASS',
+									   'PREFERRED_GROUPS'
+									  ], at _);
+  }
+
+  $adaptor    ||= 'dbi::mysqlopt';
+  my $class = "Bio::DB::GFF::Adaptor::\L${adaptor}\E";
+  unless ($class->can('new')) {
+    eval "require $class;1;" or $package->throw("Unable to load $adaptor adaptor: $@");
+  }
+
+  # this hack saves the memory adaptor, which loads the GFF file in new()
+  $args->{PREFERRED_GROUPS} = $preferred_groups if defined $preferred_groups;
+
+  my $self = $class->new($args);
+
+  # handle preferred groups
+  $self->preferred_groups($preferred_groups) if defined $preferred_groups;
+  $self->default_class($refclass || 'Sequence');
+
+  # handle the aggregators.
+  # aggregators are responsible for creating complex multi-part features
+  # from the GFF "group" field.  If none are provided, then we provide a
+  # list of the two used in WormBase.
+  # Each aggregator can be a scalar or a ref.  In the former case
+  # it is treated as a class name to call new() on.  In the latter
+  # the aggreator is treated as a ready made object.
+  $aggregators = $self->default_aggregators unless defined $aggregators;
+  my @a = ref($aggregators) eq 'ARRAY' ? @$aggregators : $aggregators;
+  for my $a (@a) {
+    $self->add_aggregator($a);
+  }
+
+  # default settings go here.....
+  $self->automerge(1);  # set automerge to true
+
+  $self;
+}
+
+
+=head2 types
+
+ Title   : types
+ Usage   : $db->types(@args)
+ Function: return list of feature types in range or database
+ Returns : a list of Bio::DB::GFF::Typename objects
+ Args    : see below
+ Status  : public
+
+This routine returns a list of feature types known to the database.
+The list can be database-wide or restricted to a region.  It is also
+possible to find out how many times each feature occurs.
+
+For range queries, it is usually more convenient to create a
+Bio::DB::GFF::Segment object, and then invoke it's types() method.
+
+Arguments are as follows:
+
+  -ref        ID of reference sequence
+  -class      class of reference sequence
+  -start      start of segment
+  -stop       stop of segment
+  -enumerate  if true, count the features
+
+The returned value will be a list of Bio::DB::GFF::Typename objects,
+which if evaluated in a string context will return the feature type in 
+"method:source" format.  This object class also has method() and
+source() methods for retrieving the like-named fields.
+
+If -enumerate is true, then the function returns a hash (not a hash
+reference) in which the keys are type names in "method:source" format
+and the values are the number of times each feature appears in the
+database or segment.
+
+The argument -end is a synonum for -stop, and -count is a synonym for
+-enumerate.
+
+=cut
+
+sub types {
+  my $self = shift;
+  my ($refseq,$start,$stop,$enumerate,$refclass,$types) = rearrange ([
+								      [qw(REF REFSEQ)],
+								      qw(START),
+								      [qw(STOP END)],
+								      [qw(ENUMERATE COUNT)],
+								      [qw(CLASS SEQCLASS)],
+								      [qw(TYPE TYPES)],
+								     ], at _);
+  $types = $self->parse_types($types) if defined $types;
+  $self->get_types($refseq,$refclass,$start,$stop,$enumerate,$types);
+}
+
+=head2 classes
+
+ Title   : classes
+ Usage   : $db->classes
+ Function: return list of landmark classes in database
+ Returns : a list of classes
+ Args    : none
+ Status  : public
+
+This routine returns the list of reference classes known to the
+database, or empty if classes are not used by the database.  Classes
+are distinct from types, being essentially qualifiers on the reference
+namespaces.
+
+=cut
+
+sub classes {
+  my $self = shift;
+  return ();
+}
+
+=head2 segment
+
+ Title   : segment
+ Usage   : $db->segment(@args);
+ Function: create a segment object
+ Returns : segment object(s)
+ Args    : numerous, see below
+ Status  : public
+
+This method generates a segment object, which is a Perl object
+subclassed from Bio::DB::GFF::Segment.  The segment can be used to
+find overlapping features and the raw DNA.
+
+When making the segment() call, you specify the ID of a sequence
+landmark (e.g. an accession number, a clone or contig), and a
+positional range relative to the landmark.  If no range is specified,
+then the entire extent of the landmark is used to generate the
+segment.
+
+You may also provide the ID of a "reference" sequence, which will set
+the coordinate system and orientation used for all features contained
+within the segment.  The reference sequence can be changed later.  If
+no reference sequence is provided, then the coordinate system is based
+on the landmark.
+
+Arguments:
+
+ -name         ID of the landmark sequence.
+
+ -class        Database object class for the landmark sequence.
+               "Sequence" assumed if not specified.  This is
+               irrelevant for databases which do not recognize
+               object classes.
+
+ -start        Start of the segment relative to landmark.  Positions
+               follow standard 1-based sequence rules.  If not specified,
+               defaults to the beginning of the landmark.
+
+ -end          Stop of the segment relative to the landmark.  If not specified,
+               defaults to the end of the landmark.
+
+ -stop         Same as -end.
+
+ -offset       For those who prefer 0-based indexing, the offset specifies the
+               position of the new segment relative to the start of the landmark.
+
+ -length       For those who prefer 0-based indexing, the length specifies the
+               length of the new segment.
+
+ -refseq       Specifies the ID of the reference landmark used to establish the
+               coordinate system for the newly-created segment.
+
+ -refclass     Specifies the class of the reference landmark, for those databases
+               that distinguish different object classes.  Defaults to "Sequence".
+
+ -absolute
+               Return features in absolute coordinates rather than relative to the
+               parent segment.
+
+ -nocheck      Don't check the database for the coordinates and length of this
+               feature.  Construct a segment using the indicated name as the
+               reference, a start coordinate of 1, an undefined end coordinate,
+               and a strand of +1.
+
+ -force        Same as -nocheck.
+
+ -seq,-sequence,-sourceseq   Aliases for -name.
+
+ -begin,-end   Aliases for -start and -stop
+
+ -off,-len     Aliases for -offset and -length
+
+ -seqclass     Alias for -class
+
+Here's an example to explain how this works:
+
+  my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:human',-adaptor=>'dbi::mysql');
+
+If successful, $db will now hold the database accessor object.  We now
+try to fetch the fragment of sequence whose ID is A0000182 and class
+is "Accession."
+
+  my $segment = $db->segment(-name=>'A0000182',-class=>'Accession');
+
+If successful, $segment now holds the entire segment corresponding to
+this accession number.  By default, the sequence is used as its own
+reference sequence, so its first base will be 1 and its last base will
+be the length of the accession.
+
+Assuming that this sequence belongs to a longer stretch of DNA, say a
+contig, we can fetch this information like so:
+
+  my $sourceseq = $segment->sourceseq;
+
+and find the start and stop on the source like this:
+
+  my $start = $segment->abs_start;
+  my $stop = $segment->abs_stop;
+
+If we had another segment, say $s2, which is on the same contiguous
+piece of DNA, we can pass that to the refseq() method in order to
+establish it as the coordinate reference point:
+
+  $segment->refseq($s2);
+
+Now calling start() will return the start of the segment relative to
+the beginning of $s2, accounting for differences in strandedness:
+
+  my $rel_start = $segment->start;
+
+IMPORTANT NOTE: This method can be used to return the segment spanned
+by an arbitrary named annotation.  However, if the annotation appears
+at multiple locations on the genome, for example an EST that maps to
+multiple locations, then, provided that all locations reside on the
+same physical segment, the method will return a segment that spans the
+minimum and maximum positions.  If the reference sequence occupies
+ranges on different physical segments, then it returns them all in an
+array context, and raises a "multiple segment exception" exception in
+a scalar context.
+
+=cut
+
+#'
+
+sub segment {
+  my $self = shift;
+  my @segments =  Bio::DB::GFF::RelSegment->new(-factory => $self,
+						$self->setup_segment_args(@_));
+  foreach (@segments) {
+    $_->absolute(1) if $self->absolute;
+  }
+
+  $self->_multiple_return_args(@segments);
+}
+
+sub _multiple_return_args {
+  my $self = shift;
+  my @args = @_;
+  if (@args == 0) {
+    return;
+  } elsif (@args == 1) {
+    return $args[0];
+  } elsif (wantarray) { # more than one reference sequence
+    return @args;
+  } else {
+    $self->error($args[0]->name,
+		 " has more than one reference sequence in database.  Please call in a list context to retrieve them all.");
+    $self->throw('multiple segment exception');
+    return;
+  }
+
+}
+
+# backward compatibility -- don't use!
+# (deliberately undocumented too)
+sub abs_segment {
+  my $self = shift;
+  return $self->segment($self->setup_segment_args(@_),-absolute=>1);
+}
+
+sub setup_segment_args {
+  my $self = shift;
+  return @_ if defined $_[0] && $_[0] =~ /^-/;
+  return (-name=>$_[0],-start=>$_[1],-stop=>$_[2]) if @_ == 3;
+  return (-class=>$_[0],-name=>$_[1])              if @_ == 2;
+  return (-name=>$_[0])                            if @_ == 1;
+}
+
+=head2 features
+
+ Title   : features
+ Usage   : $db->features(@args)
+ Function: get all features, possibly filtered by type
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see below
+ Status  : public
+
+This routine will retrieve features in the database regardless of
+position.  It can be used to return all features, or a subset based on
+their method and source.
+
+Arguments are as follows:
+
+  -types     List of feature types to return.  Argument is an array
+	     reference containing strings of the format "method:source"
+
+  -merge     Whether to apply aggregators to the generated features.
+
+  -rare      Turn on optimizations suitable for a relatively rare feature type,
+             where it makes more sense to filter by feature type first,
+             and then by position.
+
+  -attributes A hash reference containing attributes to match.
+
+  -iterator  Whether to return an iterator across the features.
+
+  -binsize   A true value will create a set of artificial features whose
+             start and stop positions indicate bins of the given size, and
+             whose scores are the number of features in the bin.  The
+             class and method of the feature will be set to "bin",
+             its source to "method:source", and its group to "bin:method:source".
+             This is a handy way of generating histograms of feature density.
+
+If -iterator is true, then the method returns a single scalar value
+consisting of a Bio::SeqIO object.  You can call next_seq() repeatedly
+on this object to fetch each of the features in turn.  If iterator is
+false or absent, then all the features are returned as a list.
+
+Currently aggregation is disabled when iterating over a series of
+features.
+
+Types are indicated using the nomenclature "method:source".  Either of
+these fields can be omitted, in which case a wildcard is used for the
+missing field.  Type names without the colon (e.g. "exon") are
+interpreted as the method name and a source wild card.  Regular
+expressions are allowed in either field, as in: "similarity:BLAST.*".
+
+The -attributes argument is a hashref containing one or more attributes
+to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple string matching, and multiple attributes
+are ANDed together.
+
+=cut
+
+sub features {
+  my $self = shift;
+  my ($types,$automerge,$sparse,$iterator,$other);
+  if (defined $_[0] && 
+      $_[0] =~ /^-/) {
+    ($types,$automerge,$sparse,$iterator,$other) = rearrange([
+							      [qw(TYPE TYPES)],
+							      [qw(MERGE AUTOMERGE)],
+							      [qw(RARE SPARSE)],
+							      'ITERATOR'
+							     ], at _);
+  } else {
+    $types = \@_;
+  }
+
+  # for whole database retrievals, we probably don't want to automerge!
+  $automerge = $self->automerge unless defined $automerge;
+  $other ||= {};
+  $self->_features({
+		    rangetype => 'contains',
+		    types     => $types,
+		   },
+		   { sparse    => $sparse,
+		     automerge => $automerge,
+		     iterator  =>$iterator,
+		     %$other,
+		   }
+		   );
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : my $seqio = $self->get_seq_sream(@args)
+ Function: Performs a query and returns an iterator over it
+ Returns : a Bio::SeqIO stream capable of producing sequence
+ Args    : As in features()
+ Status  : public
+
+This routine takes the same arguments as features(), but returns a
+Bio::SeqIO::Stream-compliant object.  Use it like this:
+
+  $stream = $db->get_seq_stream('exon');
+  while (my $exon = $stream->next_seq) {
+     print $exon,"\n";
+  }
+
+NOTE: This is also called get_feature_stream(), since that's what it
+really does.
+
+=cut
+
+sub get_seq_stream {
+  my $self = shift;
+  my @args = !defined($_[0]) || $_[0] =~ /^-/ ? (@_,-iterator=>1)
+                                              : (-types=>\@_,-iterator=>1);
+  $self->features(@args);
+}
+
+*get_feature_stream = \&get_seq_stream;
+
+=head2 get_feature_by_name
+
+ Title   : get_feature_by_name
+ Usage   : $db->get_feature_by_name($class => $name)
+ Function: fetch features by their name
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch a named feature from the database.
+GFF annotations are named using the group class and name fields, so
+for features that belong to a group of size one, this method can be
+used to retrieve that group (and is equivalent to the segment()
+method).  Any Alias attributes are also searched for matching names.
+
+An alternative syntax allows you to search for features by name within
+a circumscribed region:
+
+  @f = $db->get_feature_by_name(-class => $class,-name=>$name,
+                                -ref   => $sequence_name,
+                                -start => $start,
+                                -end   => $end);
+
+This method may return zero, one, or several Bio::DB::GFF::Feature
+objects.
+
+Aggregation is performed on features as usual.
+
+NOTE: At various times, this function was called fetch_group(),
+fetch_feature(), fetch_feature_by_name() and segments().  These names
+are preserved for backward compatibility.
+
+=cut
+
+sub get_feature_by_name {
+  my $self = shift;
+  my ($gclass,$gname,$automerge,$ref,$start,$end);
+  if (@_ == 1) {
+    $gclass = $self->default_class;
+    $gname  = shift;
+  } else  {
+    ($gclass,$gname,$automerge,$ref,$start,$end) = rearrange(['CLASS','NAME','AUTOMERGE',
+							      ['REF','REFSEQ'],
+							      'START',['STOP','END']
+							     ], at _);
+    $gclass ||= $self->default_class;
+  }
+  $automerge = $self->automerge unless defined $automerge;
+
+  # we need to refactor this... It's repeated code (see below)...
+  my @aggregators;
+  if ($automerge) {
+    for my $a ($self->aggregators) {
+      push @aggregators,$a if $a->disaggregate([],$self);
+    }
+  }
+
+  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
+  my $features = [];
+  my $callback = sub { push @$features,$self->make_feature(undef,\%groups, at _) };
+  my $location = [$ref,$start,$end] if defined $ref;
+  $self->_feature_by_name($gclass,$gname,$location,$callback);
+
+  warn "aggregating...\n" if $self->debug;
+  foreach my $a (@aggregators) {  # last aggregator gets first shot
+      $a->aggregate($features,$self) or next;
+  }
+
+  @$features;
+}
+
+# horrible indecision regarding proper names!
+*fetch_group   = *fetch_feature = *fetch_feature_by_name = \&get_feature_by_name;
+*segments      = \&segment;
+
+=head2 get_feature_by_target
+
+ Title   : get_feature_by_target
+ Usage   : $db->get_feature_by_target($class => $name)
+ Function: fetch features by their similarity target
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch a named feature from the database
+based on its similarity hit.
+
+=cut
+
+sub get_feature_by_target {
+  shift->get_feature_by_name(@_);
+}
+
+=head2 get_feature_by_attribute
+
+ Title   : get_feature_by_attribute
+ Usage   : $db->get_feature_by_attribute(attribute1=>value1,attribute2=>value2)
+ Function: fetch segments by combinations of attribute values
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch a set of features from the database.
+Attributes are a list of name=E<gt>value pairs.  They will be logically
+ANDED together.
+
+=cut
+
+sub get_feature_by_attribute {
+  my $self = shift;
+  my %attributes = ref($_[0]) ? %{$_[0]} : @_;
+
+  # we need to refactor this... It's repeated code (see above)...
+  my @aggregators;
+  if ($self->automerge) {
+    for my $a ($self->aggregators) {
+      unshift @aggregators,$a if $a->disaggregate([],$self);
+    }
+  }
+
+  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
+  my $features = [];
+  my $callback = sub { push @$features,$self->make_feature(undef,\%groups, at _) };
+  $self->_feature_by_attribute(\%attributes,$callback);
+
+  warn "aggregating...\n" if $self->debug;
+  foreach my $a (@aggregators) {  # last aggregator gets first shot
+      $a->aggregate($features,$self) or next;
+  }
+
+  @$features;
+}
+
+# more indecision...
+*fetch_feature_by_attribute = \&get_feature_by_attribute;
+
+=head2 get_feature_by_id
+
+ Title   : get_feature_by_id
+ Usage   : $db->get_feature_by_id($id)
+ Function: fetch segments by feature ID
+ Returns : a Bio::DB::GFF::Feature object
+ Args    : the feature ID
+ Status  : public
+
+This method can be used to fetch a feature from the database using its
+ID.  Not all GFF databases support IDs, so be careful with this.
+
+=cut
+
+sub get_feature_by_id {
+  my $self = shift;
+  my $id   = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
+  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
+  my $features = [];
+  my $callback = sub { push @$features,$self->make_feature(undef,\%groups, at _) };
+  $self->_feature_by_id($id,'feature',$callback);
+  return wantarray ? @$features : $features->[0];
+}
+*fetch_feature_by_id = \&get_feature_by_id;
+
+=head2 get_feature_by_gid
+
+ Title   : get_feature_by_gid
+ Usage   : $db->get_feature_by_gid($id)
+ Function: fetch segments by feature ID
+ Returns : a Bio::DB::GFF::Feature object
+ Args    : the feature ID
+ Status  : public
+
+This method can be used to fetch a feature from the database using its
+group ID.  Not all GFF databases support IDs, so be careful with this.
+
+The group ID is often more interesting than the feature ID, since
+groups can be complex objects containing subobjects.
+
+=cut
+
+sub get_feature_by_gid {
+  my $self = shift;
+  my $id   = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
+  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
+  my $features = [];
+  my $callback = sub { push @$features,$self->make_feature(undef,\%groups, at _) };
+  $self->_feature_by_id($id,'group',$callback);
+  return wantarray ? @$features : $features->[0];
+}
+*fetch_feature_by_gid = \&get_feature_by_gid;
+
+=head2 delete_features
+
+ Title   : delete_features
+ Usage   : $db->delete_features(@ids_or_features)
+ Function: delete one or more features
+ Returns : count of features deleted
+ Args    : list of features or feature ids
+ Status  : public
+
+Pass this method a list of numeric feature ids or a set of features.
+It will attempt to remove the features from the database and return a
+count of the features removed.  
+
+NOTE: This method is also called delete_feature().  Also see
+delete_groups().
+
+=cut
+
+*delete_feature = \&delete_features;
+
+sub delete_features {
+  my $self = shift;
+  my @features_or_ids = @_;
+  my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->id : $_} @features_or_ids;
+  return unless @ids;
+  $self->_delete_features(@ids);
+}
+
+=head2 delete_groups
+
+ Title   : delete_groups
+ Usage   : $db->delete_groups(@ids_or_features)
+ Function: delete one or more feature groups
+ Returns : count of features deleted
+ Args    : list of features or feature group ids
+ Status  : public
+
+Pass this method a list of numeric group ids or a set of features.  It
+will attempt to recursively remove the features and ALL members of
+their group from the database.  It returns a count of the number of
+features (not groups) returned.
+
+NOTE: This method is also called delete_group().  Also see
+delete_features().
+
+=cut
+
+*delete_group = \&delete_groupss;
+
+sub delete_groups {
+  my $self = shift;
+  my @features_or_ids = @_;
+  my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->group_id : $_} @features_or_ids;
+  return unless @ids;
+  $self->_delete_groups(@ids);
+}
+
+=head2 delete
+
+ Title   : delete
+ Usage   : $db->delete(@args)
+ Function: delete features
+ Returns : count of features deleted -- if available
+ Args    : numerous, see below
+ Status  : public
+
+This method deletes all features that overlap the specified region or
+are of a particular type.  If no arguments are provided and the -force
+argument is true, then deletes ALL features.
+
+Arguments:
+
+ -name         ID of the landmark sequence.
+
+ -ref          ID of the landmark sequence (synonym for -name).
+
+ -class        Database object class for the landmark sequence.
+               "Sequence" assumed if not specified.  This is
+               irrelevant for databases which do not recognize
+               object classes.
+
+ -start        Start of the segment relative to landmark.  Positions
+               follow standard 1-based sequence rules.  If not specified,
+               defaults to the beginning of the landmark.
+
+ -end          Stop of the segment relative to the landmark.  If not specified,
+               defaults to the end of the landmark.
+
+ -offset       Zero-based addressing
+
+ -length       Length of region
+
+ -type,-types  Either a single scalar type to be deleted, or an
+               reference to an array of types.
+
+ -force        Force operation to be performed even if it would delete
+               entire feature table.
+
+ -range_type   Control the range type of the deletion.  One of "overlaps" (default)
+               "contains" or "contained_in"
+
+Examples:
+
+  $db->delete(-type=>['intron','repeat:repeatMasker']);  # remove all introns & repeats
+  $db->delete(-name=>'chr3',-start=>1,-end=>1000);       # remove annotations on chr3 from 1 to 1000
+  $db->delete(-name=>'chr3',-type=>'exon');              # remove all exons on chr3
+
+The short form of this call, as described in segment() is also allowed:
+
+  $db->delete("chr3",1=>1000);
+  $db->delete("chr3");
+
+IMPORTANT NOTE: This method only deletes features.  It does *NOT*
+delete the names of groups that contain the deleted features.  Group
+IDs will be reused if you later load a feature with the same group
+name as one that was previously deleted.
+
+NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the
+result code from the SQL DELETE operation.  Some dbd drivers return the
+count of rows deleted, while others return 0E0.  Caveat emptor.
+
+=cut
+
+sub delete {
+  my $self = shift;
+  my @args = $self->setup_segment_args(@_);
+  my ($name,$class,$start,$end,$offset,$length,$type,$force,$range_type) =
+    rearrange([['NAME','REF'],'CLASS','START',[qw(END STOP)],'OFFSET',
+	       'LENGTH',[qw(TYPE TYPES)],'FORCE','RANGE_TYPE'], at args);
+  $offset = 0 unless defined $offset;
+  $start = $offset+1 unless defined $start;
+  $end   = $start+$length-1 if !defined $end and $length;
+  $class ||= $self->default_class;
+
+  my $types = $self->parse_types($type);  # parse out list of types
+
+  $range_type ||= 'overlaps';
+  $self->throw("range type must be one of {".
+	       join(',',keys %valid_range_types).
+	       "}\n")
+    unless $valid_range_types{lc $range_type};
+
+
+  my @segments;
+  if (defined $name && $name ne '') {
+    my @args = (-name=>$name,-class=>$class);
+    push @args,(-start=>$start) if defined $start;
+    push @args,(-end  =>$end)   if defined $end;
+    @segments = $self->segment(@args);
+    return unless @segments;
+  }
+  $self->_delete({segments   => \@segments,
+		  types      => $types,
+		  range_type => $range_type,
+		  force      => $force}
+		);
+}
+
+=head2 absolute
+
+ Title   : absolute
+ Usage   : $abs = $db->absolute([$abs]);
+ Function: gets/sets absolute mode
+ Returns : current setting of absolute mode boolean
+ Args    : new setting for absolute mode boolean
+ Status  : public
+
+$db-E<gt>absolute(1) will turn on absolute mode for the entire database.
+All segments retrieved will use absolute coordinates by default,
+rather than relative coordinates.  You can still set them to use
+relative coordinates by calling $segment-E<gt>absolute(0).
+
+Note that this is not the same as calling abs_segment(); it continues
+to allow you to look up groups that are not used directly as reference
+sequences.
+
+=cut
+
+sub absolute {
+  my $self = shift;
+  my $d = $self->{absolute};
+  $self->{absolute} = shift if @_;
+  $d;
+}
+
+=head2 strict_bounds_checking
+
+ Title   : strict_bounds_checking
+ Usage   : $flag = $db->strict_bounds_checking([$flag])
+ Function: gets/sets strict bounds checking
+ Returns : current setting of bounds checking flag
+ Args    : new setting for bounds checking flag
+ Status  : public
+
+This flag enables extra checks for segment requests that go beyond the
+ends of their reference sequences.  If bounds checking is enabled,
+then retrieved segments will be truncated to their physical length,
+and their truncated() methods will return true.
+
+If the flag is off (the default), then the module will return segments
+that appear to extend beyond their physical boundaries.  Requests for
+features beyond the end of the segment will, however, return empty.
+
+=cut
+
+sub strict_bounds_checking {
+  my $self = shift;
+  my $d = $self->{strict};
+  $self->{strict} = shift if @_;
+  $d;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+NOTE: Bio::DB::RandomAccessI compliant method
+
+=cut
+
+sub  get_Seq_by_id {
+  my $self = shift;
+  $self->get_feature_by_name(@_);
+}
+
+
+=head2 get_Seq_by_accession
+
+ Title   : get_Seq_by_accession
+ Usage   : $seq = $db->get_Seq_by_accession('AL12234')
+ Function: Gets a Bio::Seq object by its accession
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+NOTE: Bio::DB::RandomAccessI compliant method
+
+=cut
+
+sub  get_Seq_by_accession {
+  my $self = shift;
+  $self->get_feature_by_name(@_);
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+NOTE: Bio::DB::RandomAccessI compliant method
+
+=cut
+
+sub  get_Seq_by_acc {
+  my $self = shift;
+  $self->get_feature_by_name(@_);
+}
+
+=head2 get_Stream_by_name
+
+  Title   : get_Stream_by_name
+  Usage   : $seq = $db->get_Stream_by_name(@ids);
+  Function: Retrieves a stream of Seq objects given their names
+  Returns : a Bio::SeqIO stream object
+  Args    : an array of unique ids/accession numbers, or 
+            an array reference
+
+NOTE: This is also called get_Stream_by_batch()
+
+=cut
+
+sub get_Stream_by_name {
+  my $self = shift;
+  my @ids  = @_;
+  my $id = ref($ids[0]) ? $ids[0] : \@ids;
+  Bio::DB::GFF::ID_Iterator->new($self,$id,'name');
+}
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $seq = $db->get_Stream_by_id(@ids);
+  Function: Retrieves a stream of Seq objects given their ids
+  Returns : a Bio::SeqIO stream object
+  Args    : an array of unique ids/accession numbers, or 
+            an array reference
+
+NOTE: This is also called get_Stream_by_batch()
+
+=cut
+
+sub get_Stream_by_id {
+  my $self = shift;
+  my @ids  = @_;
+  my $id = ref($ids[0]) ? $ids[0] : \@ids;
+  Bio::DB::GFF::ID_Iterator->new($self,$id,'feature');
+}
+
+=head2 get_Stream_by_batch ()
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch(@ids);
+  Function: Retrieves a stream of Seq objects given their ids
+  Returns : a Bio::SeqIO stream object
+  Args    : an array of unique ids/accession numbers, or 
+            an array reference
+
+NOTE: This is the same as get_Stream_by_id().
+
+=cut
+
+*get_Stream_by_batch = \&get_Stream_by_id;
+
+
+=head2 get_Stream_by_group ()
+
+Bioperl compatibility.
+
+=cut
+
+sub get_Stream_by_group {
+  my $self = shift;
+  my @ids  = @_;
+  my $id = ref($ids[0]) ? $ids[0] : \@ids;
+  Bio::DB::GFF::ID_Iterator->new($self,$id,'group');
+}
+
+=head2 all_seqfeatures
+
+ Title   : all_seqfeatures
+ Usage   : @features = $db->all_seqfeatures(@args)
+ Function: fetch all the features in the database
+ Returns : an array of features, or an iterator
+ Args    : See below
+ Status  : public
+
+This is equivalent to calling $db-E<gt>features() without any types, and
+will return all the features in the database.  The -merge and
+-iterator arguments are recognized, and behave the same as described
+for features().
+
+=cut
+
+sub all_seqfeatures {
+  my $self = shift;
+  my ($automerge,$iterator)= rearrange([
+					[qw(MERGE AUTOMERGE)],
+					'ITERATOR'
+				       ], at _);
+  my @args;
+  push @args,(-merge=>$automerge)   if defined $automerge;
+  push @args,(-iterator=>$iterator) if defined $iterator;
+  $self->features(@args);
+}
+
+=head1 Creating and Loading GFF Databases
+
+=head2 initialize
+
+ Title   : initialize
+ Usage   : $db->initialize(-erase=>$erase,-option1=>value1,-option2=>value2);
+ Function: initialize a GFF database
+ Returns : true if initialization successful
+ Args    : a set of named parameters
+ Status  : Public
+
+This method can be used to initialize an empty database.  It takes the following
+named arguments:
+
+  -erase     A boolean value.  If true the database will be wiped clean if it
+             already contains data.
+
+Other named arguments may be recognized by subclasses.  They become database
+meta values that control various settable options.
+
+As a shortcut (and for backward compatibility) a single true argument
+is the same as initialize(-erase=E<gt>1).
+
+=cut
+
+sub initialize {
+  my $self = shift;
+
+  my ($erase,$meta) = rearrange(['ERASE'], at _);
+  $meta ||= {};
+
+  # initialize (possibly erasing)
+  return unless $self->do_initialize($erase);
+  my @default = $self->default_meta_values;
+
+  # this is an awkward way of uppercasing the 
+  # even-numbered values (necessary for case-insensitive SQL databases)
+  for (my $i=0; $i<@default; $i++) {
+    $default[$i] = uc $default[$i] if !($i % 2);
+  }
+
+  my %values = (@default,%$meta);
+  foreach (keys %values) {
+    $self->meta($_ => $values{$_});
+  }
+  1;
+}
+
+
+=head2 load_gff
+
+ Title   : load_gff
+ Usage   : $db->load_gff($file|$directory|$filehandle [,$verbose]);
+ Function: load GFF data into database
+ Returns : count of records loaded
+ Args    : a directory, a file, a list of files, 
+           or a filehandle
+ Status  : Public
+
+This method takes a single overloaded argument, which can be any of:
+
+=over 4
+
+=item 1. a scalar corresponding to a GFF file on the system
+
+A pathname to a local GFF file.  Any files ending with the .gz, .Z, or
+.bz2 suffixes will be transparently decompressed with the appropriate
+command-line utility.
+
+=item 2. an array reference containing a list of GFF files on the system
+
+For example ['/home/gff/gff1.gz','/home/gff/gff2.gz']
+
+=item 3. directory path
+
+The indicated directory will be searched for all files ending in the
+suffixes .gff, .gff.gz, .gff.Z or .gff.bz2.
+
+=item 4. filehandle
+
+An open filehandle from which to read the GFF data.  Tied filehandles
+now work as well.
+
+=item 5. a pipe expression
+
+A pipe expression will also work. For example, a GFF file on a remote
+web server can be loaded with an expression like this:
+
+  $db->load_gff("lynx -dump -source http://stein.cshl.org/gff_test |");
+
+=back
+
+The optional second argument, if true, will turn on verbose status
+reports that indicate the progress.
+
+If successful, the method will return the number of GFF lines
+successfully loaded.
+
+NOTE:this method used to be called load(), but has been changed.  The
+old method name is also recognized.
+
+=cut
+
+sub load_gff {
+  my $self              = shift;
+  my $file_or_directory = shift || '.';
+  my $verbose           = shift;
+
+  local $self->{__verbose__} = $verbose;
+  return $self->do_load_gff($file_or_directory) if ref($file_or_directory) 
+                                                   && tied *$file_or_directory;
+
+  my $tied_stdin = tied(*STDIN);
+  open my $SAVEIN,"<&STDIN" unless $tied_stdin;
+  local @ARGV = $self->setup_argv($file_or_directory,'gff','gff3') or return;  # to play tricks with reader
+  my $result = $self->do_load_gff('ARGV');
+  open STDIN,"<", $SAVEIN unless $tied_stdin;  # restore STDIN
+  return $result;
+}
+
+*load = \&load_gff;
+
+=head2 load_fasta
+
+ Title   : load_fasta
+ Usage   : $db->load_fasta($file|$directory|$filehandle);
+ Function: load FASTA data into database
+ Returns : count of records loaded
+ Args    : a directory, a file, a list of files, 
+           or a filehandle
+ Status  : Public
+
+This method takes a single overloaded argument, which can be any of:
+
+=over 4
+
+=item 1. scalar corresponding to a FASTA file on the system
+
+A pathname to a local FASTA file.  Any files ending with the .gz, .Z, or
+.bz2 suffixes will be transparently decompressed with the appropriate
+command-line utility.
+
+=item 2. array reference containing a list of FASTA files on the
+system
+
+For example ['/home/fasta/genomic.fa.gz','/home/fasta/genomic.fa.gz']
+
+=item 3. path to a directory
+
+The indicated directory will be searched for all files ending in the
+suffixes .fa, .fa.gz, .fa.Z or .fa.bz2.
+
+a=item 4. filehandle
+
+An open filehandle from which to read the FASTA data.
+
+=item 5. pipe expression
+
+A pipe expression will also work. For example, a FASTA file on a remote
+web server can be loaded with an expression like this:
+
+  $db->load_gff("lynx -dump -source http://stein.cshl.org/fasta_test.fa |");
+
+=back
+
+=cut
+
+sub load_fasta {
+  my $self              = shift;
+  my $file_or_directory = shift || '.';
+  my $verbose           = shift;
+
+  local $self->{__verbose__} = $verbose;
+  return $self->load_sequence($file_or_directory) if ref($file_or_directory)
+                                                     && tied *$file_or_directory;
+
+  my $tied = tied(*STDIN);
+  open my $SAVEIN, "<&STDIN" unless $tied;
+  local @ARGV = $self->setup_argv($file_or_directory,'fa','dna','fasta') or return;  # to play tricks with reader
+  my $result = $self->load_sequence('ARGV');
+  open STDIN,"<", $SAVEIN unless $tied;  # restore STDIN
+  return $result;
+}
+
+=head2 load_sequence_string
+
+ Title   : load_sequence_string
+ Usage   : $db->load_sequence_string($id,$dna)
+ Function: load a single DNA entry
+ Returns : true if successfully loaded
+ Args    : a raw sequence string (DNA, RNA, protein)
+ Status  : Public
+
+=cut
+
+sub load_sequence_string {
+  my $self = shift;
+  my ($acc,$seq)  = @_;
+  my $offset = 0;
+  $self->insert_sequence_chunk($acc,\$offset,\$seq) or return;
+  $self->insert_sequence($acc,$offset,$seq) or return;
+  1;
+}
+
+sub setup_argv {
+  my $self = shift;
+  my $file_or_directory = shift;
+  my @suffixes          = @_;
+  no strict 'refs';  # so that we can call fileno() on the argument
+
+  my @argv;
+
+  if (-d $file_or_directory) {
+    # Because glob() is broken with long file names that contain spaces
+    $file_or_directory = Win32::GetShortPathName($file_or_directory)
+      if $^O =~ /^MSWin/i && eval 'use Win32; 1';
+    @argv = map { glob("$file_or_directory/*.{$_,$_.gz,$_.Z,$_.bz2}")} @suffixes;
+  }elsif (my $fd = fileno($file_or_directory)) {
+    open STDIN,"<&=$fd" or $self->throw("Can't dup STDIN");
+    @argv = '-';
+  } elsif (ref $file_or_directory) {
+    @argv = @$file_or_directory;
+  } else {
+    @argv = $file_or_directory;
+  }
+
+  foreach (@argv) {
+    if (/\.gz$/) {
+      $_ = "gunzip -c $_ |";
+    } elsif (/\.Z$/) {
+      $_ = "uncompress -c $_ |";
+    } elsif (/\.bz2$/) {
+      $_ = "bunzip2 -c $_ |";
+    }
+  }
+  @argv;
+}
+
+=head2 lock_on_load
+
+ Title   : lock_on_load
+ Usage   : $lock = $db->lock_on_load([$lock])
+ Function: set write locking during load
+ Returns : current value of lock-on-load flag
+ Args    : new value of lock-on-load-flag
+ Status  : Public
+
+This method is honored by some of the adaptors.  If the value is true,
+the tables used by the GFF modules will be locked for writing during
+loads and inaccessible to other processes.
+
+=cut
+
+sub lock_on_load {
+  my $self = shift;
+  my $d = $self->{lock};
+  $self->{lock} = shift if @_;
+  $d;
+}
+
+=head2 meta
+
+ Title   : meta
+ Usage   : $value = $db->meta($name [,$newval])
+ Function: get or set a meta variable
+ Returns : a string
+ Args    : meta variable name and optionally value
+ Status  : abstract
+
+Get or set a named metavalues for the database.  Metavalues can be
+used for database-specific settings.
+
+By default, this method does nothing!
+
+=cut
+
+sub meta {
+  my $self = shift;
+  my ($name,$value) = @_;
+  return;
+}
+
+=head2 default_meta_values
+
+ Title   : default_meta_values
+ Usage   : %values = $db->default_meta_values
+ Function: empty the database
+ Returns : a list of tag=>value pairs
+ Args    : none
+ Status  : protected
+
+This method returns a list of tag=E<gt>value pairs that contain default
+meta information about the database.  It is invoked by initialize() to
+write out the default meta values.  The base class version returns an
+empty list.
+
+For things to work properly, meta value names must be UPPERCASE.
+
+=cut
+
+sub default_meta_values {
+  my $self = shift;
+  return ();
+}
+
+
+=head2 error
+
+ Title   : error
+ Usage   : $db->error( [$new error] );
+ Function: read or set error message
+ Returns : error message
+ Args    : an optional argument to set the error message
+ Status  : Public
+
+This method can be used to retrieve the last error message.  Errors
+are not reset to empty by successful calls, so contents are only valid
+immediately after an error condition has been detected.
+
+=cut
+
+sub error {
+  my $self = shift;
+  my $g = $self->{error};
+  $self->{error} = join '', at _ if @_;
+  $g;
+}
+
+=head2 debug
+
+ Title   : debug
+ Usage   : $db->debug( [$flag] );
+ Function: read or set debug flag
+ Returns : current value of debug flag
+ Args    : new debug flag (optional)
+ Status  : Public
+
+This method can be used to turn on debug messages.  The exact nature
+of those messages depends on the adaptor in use.
+
+=cut
+
+sub debug {
+  my $self = shift;
+  my $g = $self->{debug};
+  $self->{debug} = shift if @_;
+  $g;
+}
+
+
+=head2 automerge
+
+ Title   : automerge
+ Usage   : $db->automerge( [$new automerge] );
+ Function: get or set automerge value
+ Returns : current value (boolean)
+ Args    : an optional argument to set the automerge value
+ Status  : Public
+
+By default, this module will use the aggregators to merge groups into
+single composite objects.  This default can be changed to false by
+calling automerge(0).
+
+=cut
+
+sub automerge {
+  my $self = shift;
+  my $g = $self->{automerge};
+  $self->{automerge} = shift if @_;
+  $g;
+}
+
+=head2 attributes
+
+ Title   : attributes
+ Usage   : @attributes = $db->attributes($id,$name)
+ Function: get the "attributres" on a particular feature
+ Returns : an array of string
+ Args    : feature ID
+ Status  : public
+
+Some GFF version 2 files use the groups column to store a series of
+attribute/value pairs.  In this interpretation of GFF, the first such
+pair is treated as the primary group for the feature; subsequent pairs
+are treated as attributes.  Two attributes have special meaning:
+"Note" is for backward compatibility and is used for unstructured text
+remarks.  "Alias" is considered as a synonym for the feature name.
+
+If no name is provided, then attributes() returns a flattened hash, of
+attribute=E<gt>value pairs.  This lets you do:
+
+  %attributes = $db->attributes($id);
+
+If no arguments are provided, attributes() will return the list of
+all attribute names:
+
+  @attribute_names = $db->attributes();
+
+Normally, however, attributes() will be called by the feature:
+
+  @notes = $feature->attributes('Note');
+
+In a scalar context, attributes() returns the first value of the
+attribute if a tag is present, otherwise a hash reference in which the
+keys are attribute names and the values are anonymous arrays
+containing the values.
+
+=cut
+
+sub attributes {
+  my $self = shift;
+  my ($id,$tag) = @_;
+  my @result = $self->do_attributes(@_) or return;
+  return @result if wantarray;
+
+  # what to do in an array context
+  return $result[0] if $tag;
+  my %result;
+  while (my($key,$value) = splice(@result,0,2)) {
+     push @{$result{$key}},$value;
+  }
+  return \%result;
+}
+
+=head2 fast_queries
+
+ Title   : fast_queries
+ Usage   : $flag = $db->fast_queries([$flag])
+ Function: turn on and off the "fast queries" option
+ Returns : a boolean
+ Args    : a boolean flag (optional)
+ Status  : public
+
+The mysql database driver (and possibly others) support a "fast" query
+mode that caches results on the server side.  This makes queries come
+back faster, particularly when creating iterators.  The downside is
+that while iterating, new queries will die with a "command synch"
+error.  This method turns the feature on and off.
+
+For databases that do not support a fast query, this method has no
+effect.
+
+=cut
+
+# override this method in order to set the mysql_use_result attribute, which is an obscure
+# but extremely powerful optimization for both performance and memory.
+sub fast_queries {
+  my $self = shift;
+  my $d = $self->{fast_queries};
+  $self->{fast_queries} = shift if @_;
+  $d;
+}
+
+=head2 add_aggregator
+
+ Title   : add_aggregator
+ Usage   : $db->add_aggregator($aggregator)
+ Function: add an aggregator to the list
+ Returns : nothing
+ Args    : an aggregator
+ Status  : public
+
+This method will append an aggregator to the end of the list of
+registered aggregators.  Three different argument types are accepted:
+
+  1) a Bio::DB::GFF::Aggregator object -- will be added
+  2) a string in the form "aggregator_name{subpart1,subpart2,subpart3/main_method}"
+         -- will be turned into a Bio::DB::GFF::Aggregator object (the /main_method
+        part is optional).
+  3) a valid Perl token -- will be turned into a Bio::DB::GFF::Aggregator
+        subclass, where the token corresponds to the subclass name.
+
+=cut
+
+sub add_aggregator {
+  my $self       = shift;
+  my $aggregator = shift;
+  my $list = $self->{aggregators} ||= [];
+  if (ref $aggregator) { # an object
+    @$list = grep {$_->get_method ne $aggregator->get_method} @$list;
+    push @$list,$aggregator;
+  }
+
+  elsif ($aggregator =~ /^(\w+)\{([^\/\}]+)\/?(.*)\}$/) {
+    my($agg_name,$subparts,$mainpart) = ($1,$2,$3);
+    my @subparts = split /,\s*/,$subparts;
+    my @args = (-method      => $agg_name,
+		-sub_parts   => \@subparts);
+    if ($mainpart) {
+      push @args,(-main_method => $mainpart,
+		  -whole_object => 1);
+    }
+    warn "making an aggregator with (@args), subparts = @subparts" if $self->debug;
+    push @$list,Bio::DB::GFF::Aggregator->new(@args);
+  }
+
+  else {
+    my $class = "Bio::DB::GFF::Aggregator::\L${aggregator}\E";
+    eval "require $class; 1" or  $self->throw("Unable to load $aggregator aggregator: $@");
+    push @$list,$class->new();
+  }
+}
+
+=head2 aggregators
+
+ Title   : aggregators
+ Usage   : $db->aggregators([@new_aggregators]);
+ Function: retrieve list of aggregators
+ Returns : list of aggregators
+ Args    : a list of aggregators to set (optional)
+ Status  : public
+
+This method will get or set the list of aggregators assigned to
+the database.  If 1 or more arguments are passed, the existing
+set will be cleared.
+
+=cut
+
+sub aggregators {
+  my $self = shift;
+  my $d = $self->{aggregators};
+  if (@_) {
+    $self->clear_aggregators;
+    $self->add_aggregator($_) foreach @_;
+  }
+  return unless $d;
+  return @$d;
+}
+
+=head2 clear_aggregators
+
+ Title   : clear_aggregators
+ Usage   : $db->clear_aggregators
+ Function: clears list of aggregators
+ Returns : nothing
+ Args    : none
+ Status  : public
+
+This method will clear the aggregators stored in the database object.
+Use aggregators() or add_aggregator() to add some back.
+
+=cut
+
+sub clear_aggregators { shift->{aggregators} = [] }
+
+=head2 preferred_groups
+
+ Title   : preferred_groups
+ Usage   : $db->preferred_groups([$group_name_or_arrayref])
+ Function: get/set list of groups for altering GFF2 parsing
+ Returns : a list of classes
+ Args    : new list (scalar or array ref)
+ Status  : public
+
+=cut
+
+sub preferred_groups {
+  my $self = shift;
+  my $d    = $self->{preferred_groups};
+  if (@_) {
+    my @v = map {ref($_) eq 'ARRAY' ? @$_ : $_} @_;
+    $self->{preferred_groups} = \@v;
+    delete $self->{preferred_groups_hash};
+  }
+  return unless $d;
+  return @$d;
+}
+
+sub _preferred_groups_hash {
+  my $self = shift;
+  my $gff3 = shift;
+  return $self->{preferred_groups_hash} if exists $self->{preferred_groups_hash};
+  my $count = 0;
+
+  my @preferred = $self->preferred_groups;
+
+  # defaults
+  if (!@preferred) {
+    @preferred = $gff3 || $self->{load_data}{gff3_flag} ? qw(Target Parent ID) : qw(Target Sequence Transcript);
+  }
+
+  my %preferred = map {lc($_) => @preferred-$count++} @preferred;
+  return $self->{preferred_groups_hash} = \%preferred;
+}
+
+=head1 Methods for use by Subclasses
+
+The following methods are chiefly of interest to subclasses and are
+not intended for use by end programmers.
+
+=head2 abscoords
+
+ Title   : abscoords
+ Usage   : $db->abscoords($name,$class,$refseq)
+ Function: finds position of a landmark in reference coordinates
+ Returns : ($ref,$class,$start,$stop,$strand)
+ Args    : name and class of landmark
+ Status  : public
+
+This method is called by Bio::DB::GFF::RelSegment to obtain the
+absolute coordinates of a sequence landmark.  The arguments are the
+name and class of the landmark.  If successful, abscoords() returns
+the ID of the reference sequence, its class, its start and stop
+positions, and the orientation of the reference sequence's coordinate
+system ("+" for forward strand, "-" for reverse strand).
+
+If $refseq is present in the argument list, it forces the query to
+search for the landmark in a particular reference sequence.
+
+=cut
+
+sub abscoords {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  $class ||= $self->{default_class};
+  $self->get_abscoords($name,$class,$refseq);
+}
+
+=head1 Protected API
+
+The following methods are not intended for public consumption, but are
+intended to be overridden/implemented by adaptors.
+
+=head2 default_aggregators
+
+ Title   : default_aggregators
+ Usage   : $db->default_aggregators;
+ Function: retrieve list of aggregators
+ Returns : array reference containing list of aggregator names
+ Args    : none
+ Status  : protected
+
+This method (which is intended to be overridden by adaptors) returns a
+list of standard aggregators to be applied when no aggregators are
+specified in the constructor.
+
+=cut
+
+sub default_aggregators {
+  my $self = shift;
+  return ['processed_transcript','alignment'];
+}
+
+=head2 do_load_gff
+
+ Title   : do_load_gff
+ Usage   : $db->do_load_gff($handle)
+ Function: load a GFF input stream
+ Returns : number of features loaded
+ Args    : A filehandle.
+ Status  : protected
+
+This method is called to load a GFF data stream.  The method will read
+GFF features from E<lt>E<gt> and load them into the database.  On exit the
+method must return the number of features loaded.
+
+Note that the method is responsible for parsing the GFF lines.  This
+is to allow for differences in the interpretation of the "group"
+field, which are legion.
+
+You probably want to use load_gff() instead.  It is more flexible
+about the arguments it accepts.
+
+=cut
+
+sub do_load_gff {
+  my $self      = shift;
+  my $io_handle = shift;
+
+  local $self->{load_data} = {
+			      lineend => (-t STDERR && !$ENV{EMACS} ? "\r" : "\n"),
+			      count   => 0
+			     };
+
+  $self->setup_load();
+  my $mode = 'gff';
+
+  while (<$io_handle>) {
+    chomp;
+    if ($mode eq 'gff') {
+      if (/^>/) {    # Sequence coming
+	$mode = 'fasta';
+	$self->_load_sequence_start;
+	$self->_load_sequence_line($_);
+      } else {
+	$self->_load_gff_line($_);
+      }
+    }
+    elsif ($mode eq 'fasta') {
+      if (/^##|\t/) {    # Back to GFF mode
+	$self->_load_sequence_finish;
+	$mode = 'gff';
+	$self->_load_gff_line($_);
+      } else {
+	$self->_load_sequence_line($_);
+      }
+    }
+  }
+  $self->finish_load();
+  $self->_load_sequence_finish;
+
+  return $self->{load_data}{count};
+}
+
+sub _load_gff_line {
+  my $self = shift;
+  my $line = shift;
+  my $lineend = $self->{load_data}{lineend};
+
+  $self->{load_data}{gff3_flag}++           if $line =~ /^\#\#\s*gff-version\s+3/;
+  $self->preferred_groups(split(/\s+/,$1))  if $line =~ /^\#\#\s*group-tags?\s+(.+)/;
+
+  if ($line =~ /^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
+    $self->load_gff_line(
+			 {
+			  ref    => $1,
+			  class  => 'Sequence',
+			  source => 'reference',
+			  method => 'Component',
+			  start  => $2,
+			  stop   => $3,
+			  score  => undef,
+			  strand => undef,
+			  phase  => undef,
+			  gclass => 'Sequence',
+			  gname  => $1,
+			  tstart => undef,
+			  tstop  => undef,
+			  attributes  => [],
+			 }
+			);
+    return $self->{load_data}{count}++;
+  }
+
+  return if /^#/;
+
+  my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t",$line;
+  return unless defined($ref) && defined($method) && defined($start) && defined($stop);
+  foreach (\$score,\$strand,\$phase) {
+    undef $$_ if $$_ eq '.';
+  }
+
+  print STDERR $self->{load_data}{count}," records$lineend" 
+    if $self->{__verbose__} && $self->{load_data}{count} % 1000 == 0;
+
+  my ($gclass,$gname,$tstart,$tstop,$attributes) = $self->split_group($group,$self->{load_data}{gff3_flag});
+
+  # no standard way in the GFF file to denote the class of the reference sequence -- drat!
+  # so we invoke the factory to do it
+  my $class = $self->refclass($ref);
+
+  # call subclass to do the dirty work
+  if ($start > $stop) {
+    ($start,$stop) = ($stop,$start);
+    if ($strand eq '+') {
+      $strand = '-';
+    } elsif ($strand eq '-') {
+      $strand = '+';
+    }
+  }
+  # GFF2/3 transition stuff
+  $gclass = [$gclass] unless ref $gclass;
+  $gname  = [$gname]  unless ref $gname;
+  for (my $i=0; $i<@$gname;$i++) {
+    $self->load_gff_line({ref    => $ref,
+			  class  => $class,
+			  source => $source,
+			  method => $method,
+			  start  => $start,
+			  stop   => $stop,
+			  score  => $score,
+			  strand => $strand,
+			  phase  => $phase,
+			  gclass => $gclass->[$i],
+			  gname  => $gname->[$i],
+			  tstart => $tstart,
+			  tstop  => $tstop,
+			  attributes  => $attributes}
+			);
+    $self->{load_data}{count}++;
+  }
+}
+
+sub _load_sequence_start {
+  my $self = shift;
+  my $ld   = $self->{load_data};
+  undef $ld->{id};
+  $ld->{offset} = 0;
+  $ld->{seq}    = '';
+}
+sub _load_sequence_finish {
+  my $self = shift;
+  my $ld   = $self->{load_data};
+  $self->insert_sequence($ld->{id},$ld->{offset},$ld->{seq}) if defined $ld->{id};
+}
+
+sub _load_sequence_line {
+  my $self = shift;
+  my $line = shift;
+  my $ld   = $self->{load_data};
+  my $lineend = $ld->{lineend};
+
+  if (/^>(\S+)/) {
+    $self->insert_sequence($ld->{id},$ld->{offset},$ld->{seq}) if defined $ld->{id};
+    $ld->{id}     = $1;
+    $ld->{offset} = 0;
+    $ld->{seq}    = '';
+    $ld->{count}++;
+    print STDERR $ld->{count}," sequences loaded$lineend" if $self->{__verbose__} && $ld->{count} % 1000 == 0;
+  } else {
+    $ld->{seq} .= $_;
+    $self->insert_sequence_chunk($ld->{id},\$ld->{offset},\$ld->{seq});
+  }
+
+}
+
+=head2 load_sequence
+
+ Title   : load_sequence
+ Usage   : $db->load_sequence($handle)
+ Function: load a FASTA data stream
+ Returns : number of sequences
+ Args    : a filehandle to the FASTA file
+ Status  : protected
+
+You probably want to use load_fasta() instead.
+
+=cut
+
+# note - there is some repeated code here
+sub load_sequence {
+  my $self = shift;
+  my $io_handle = shift;
+
+  local $self->{load_data} = {
+			      lineend => (-t STDERR && !$ENV{EMACS} ? "\r" : "\n"),
+			      count   => 0
+			     };
+
+  $self->_load_sequence_start;
+  while (<$io_handle>) {
+    chomp;
+    $self->_load_sequence_line($_);
+  }
+  $self->_load_sequence_finish;
+  return $self->{load_data}{count};
+}
+
+sub insert_sequence_chunk {
+  my $self = shift;
+  my ($id,$offsetp,$seqp) = @_;
+  if (my $cs = $self->dna_chunk_size) {
+    while (length($$seqp) >= $cs) {
+      my $chunk = substr($$seqp,0,$cs);
+      $self->insert_sequence($id,$$offsetp,$chunk);
+      $$offsetp += length($chunk);
+      substr($$seqp,0,$cs) = '';
+    }
+  }
+  return 1;  # the calling routine may expect success or failure
+}
+
+# used to store big pieces of DNA in itty bitty pieces
+sub dna_chunk_size {
+  return 0;
+}
+
+sub insert_sequence {
+  my $self = shift;
+  my($id,$offset,$seq) = @_;
+  $self->throw('insert_sequence(): must be defined in subclass');
+}
+
+# This is the default class for reference points.  Defaults to Sequence.
+sub default_class {
+   my $self = shift;
+   return 'Sequence' unless ref $self;
+   my $d = $self->{default_class};
+   $self->{default_class} = shift if @_;
+   $d;
+}
+
+# gets name of the reference sequence, and returns its class
+# currently just calls default_class
+sub refclass {
+  my $self = shift;
+  my $name = shift;
+  return $self->default_class;
+}
+
+=head2 setup_load
+
+ Title   : setup_load
+ Usage   : $db->setup_load
+ Function: called before load_gff_line()
+ Returns : void
+ Args    : none
+ Status  : abstract
+
+This abstract method gives subclasses a chance to do any
+schema-specific initialization prior to loading a set of GFF records.
+It must be implemented by a subclass.
+
+=cut
+
+sub setup_load {
+  # default, do nothing
+}
+
+=head2 finish_load
+
+ Title   : finish_load
+ Usage   : $db->finish_load
+ Function: called after load_gff_line()
+ Returns : number of records loaded
+ Args    : none
+ Status  :abstract
+
+This method gives subclasses a chance to do any schema-specific
+cleanup after loading a set of GFF records.
+
+=cut
+
+sub finish_load {
+  # default, do nothing
+}
+
+=head2 load_gff_line
+
+ Title   : load_gff_line
+ Usage   : $db->load_gff_line(@args)
+ Function: called to load one parsed line of GFF
+ Returns : true if successfully inserted
+ Args    : see below
+ Status  : abstract
+
+This abstract method is called once per line of the GFF and passed a
+hashref containing parsed GFF fields.  The fields are:
+
+ {ref    => $ref,
+  class  => $class,
+  source => $source,
+  method => $method,
+  start  => $start,
+  stop   => $stop,
+  score  => $score,
+  strand => $strand,
+  phase  => $phase,
+  gclass => $gclass,
+  gname  => $gname,
+  tstart => $tstart,
+  tstop  => $tstop,
+  attributes  => $attributes}
+
+=cut
+
+sub load_gff_line {
+  shift->throw("load_gff_line(): must be implemented by an adaptor");
+}
+
+
+=head2 do_initialize
+
+ Title   : do_initialize
+ Usage   : $db->do_initialize([$erase])
+ Function: initialize and possibly erase database
+ Returns : true if successful
+ Args    : optional erase flag
+ Status  : protected
+
+This method implements the initialize() method described above, and
+takes the same arguments.
+
+=cut
+
+sub do_initialize {
+    shift->throw('do_initialize(): must be implemented by an adaptor');
+}
+
+=head2 dna
+
+ Title   : dna
+ Usage   : $db->dna($id,$start,$stop,$class)
+ Function: return the raw DNA string for a segment
+ Returns : a raw DNA string
+ Args    : id of the sequence, its class, start and stop positions
+ Status  : public
+
+This method is invoked by Bio::DB::GFF::Segment to fetch the raw DNA
+sequence.
+
+Arguments: -name          sequence name
+           -start         start position
+           -stop          stop position
+           -class         sequence class
+
+If start and stop are both undef, then the entire DNA is retrieved.
+So to fetch the whole dna, call like this:
+
+  $db->dna($name_of_sequence);
+
+or like this:
+
+   $db->dna(-name=>$name_of_sequence,-class=>$class_of_sequence);
+
+NOTE: you will probably prefer to create a Segment and then invoke its
+dna() method.
+
+=cut
+
+# call to return the DNA string for the indicated region
+# real work is done by get_dna()
+sub dna {
+  my $self = shift;
+  my ($id,$start,$stop,$class)  = rearrange([
+					     [qw(NAME ID REF REFSEQ)],
+					     qw(START),
+					     [qw(STOP END)],
+    					    'CLASS',
+					   ], at _);
+# return unless defined $start && defined $stop;
+  $self->get_dna($id,$start,$stop,$class);
+}
+
+sub features_in_range {
+  my $self = shift;
+  my ($range_type,$refseq,$class,$start,$stop,$types,$parent,$sparse,$automerge,$iterator,$other) =
+    rearrange([
+	       [qw(RANGE_TYPE)],
+	       [qw(REF REFSEQ)],
+	       qw(CLASS),
+	       qw(START),
+	       [qw(STOP END)],
+	       [qw(TYPE TYPES)],
+	       qw(PARENT),
+	       [qw(RARE SPARSE)],
+	       [qw(MERGE AUTOMERGE)],
+	       'ITERATOR'
+	      ], at _);
+  $other ||= {};
+  # $automerge = $types && $self->automerge unless defined $automerge;
+  $automerge = $self->automerge unless defined $automerge;
+  $self->throw("range type must be one of {".
+	       join(',',keys %valid_range_types).
+	       "}\n")
+    unless $valid_range_types{lc $range_type};
+  $self->_features({
+		    rangetype => lc $range_type,
+		    refseq    => $refseq,
+		    refclass  => $class,
+		    start     => $start,
+		    stop      => $stop,
+		    types     => $types },
+		   {
+		    sparse    => $sparse,
+		    automerge => $automerge,
+		    iterator  => $iterator,
+		    %$other,
+		   },
+		   $parent);
+}
+
+=head2 get_dna
+
+ Title   : get_dna
+ Usage   : $db->get_dna($id,$start,$stop,$class)
+ Function: get DNA for indicated segment
+ Returns : the dna string
+ Args    : sequence ID, start, stop and class
+ Status  : protected
+
+If start E<gt> stop and the sequence is nucleotide, then this method
+should return the reverse complement.  The sequence class may be
+ignored by those databases that do not recognize different object
+types.
+
+=cut
+
+sub get_dna {
+  my $self = shift;
+  my ($id,$start,$stop,$class,) = @_;
+  $self->throw("get_dna() must be implemented by an adaptor");
+}
+
+=head2 get_features
+
+ Title   : get_features
+ Usage   : $db->get_features($search,$options,$callback)
+ Function: get list of features for a region
+ Returns : count of number of features retrieved
+ Args    : see below
+ Status  : protected
+
+The first argument is a hash reference containing search criteria for
+retrieving features.  It contains the following keys:
+
+   rangetype One of "overlaps", "contains" or "contained_in".  Indicates
+              the type of range query requested.
+
+   refseq    ID of the landmark that establishes the absolute 
+              coordinate system.
+
+   refclass  Class of this landmark.  Can be ignored by implementations
+              that don't recognize such distinctions.
+
+   start     Start of the range, inclusive.
+
+   stop      Stop of the range, inclusive.
+
+   types     Array reference containing the list of annotation types
+              to fetch from the database.  Each annotation type is an
+              array reference consisting of [source,method].
+
+The second argument is a hash reference containing certain options
+that affect the way information is retrieved:
+
+   sort_by_group
+             A flag.  If true, means that the returned features should be
+             sorted by the group that they're in.
+
+   sparse    A flag.  If true, means that the expected density of the 
+             features is such that it will be more efficient to search
+             by type rather than by range.  If it is taking a long
+             time to fetch features, give this a try.
+
+   binsize   A true value will create a set of artificial features whose
+             start and stop positions indicate bins of the given size, and
+             whose scores are the number of features in the bin.  The
+             class of the feature will be set to "bin", and its name to
+             "method:source".  This is a handy way of generating histograms
+             of feature density.
+
+The third argument, the $callback, is a code reference to which
+retrieved features are passed.  It is described in more detail below.
+
+This routine is responsible for getting arrays of GFF data out of the
+database and passing them to the callback subroutine.  The callback
+does the work of constructing a Bio::DB::GFF::Feature object out of
+that data.  The callback expects a list of 13 fields:
+
+  $refseq      The reference sequence
+  $start       feature start
+  $stop        feature stop
+  $source      feature source
+  $method      feature method
+  $score       feature score
+  $strand      feature strand
+  $phase       feature phase
+  $groupclass  group class (may be undef)
+  $groupname   group ID (may be undef)
+  $tstart      target start for similarity hits (may be undef)
+  $tstop       target stop for similarity hits (may be undef)
+  $feature_id  A unique feature ID (may be undef)
+
+These fields are in the same order as the raw GFF file, with the
+exception that the group column has been parsed into group class and
+group name fields.
+
+The feature ID, if provided, is a unique identifier of the feature
+line.  The module does not depend on this ID in any way, but it is
+available via Bio::DB::GFF-E<gt>id() if wanted.  In the dbi::mysql and
+dbi::mysqlopt adaptor, the ID is a unique row ID.  In the acedb
+adaptor it is not used.
+
+=cut
+
+sub get_features{
+  my $self = shift;
+  my ($search,$options,$callback) = @_;
+  $self->throw("get_features() must be implemented by an adaptor");
+}
+
+
+=head2 _feature_by_name
+
+ Title   : _feature_by_name
+ Usage   : $db->_feature_by_name($class,$name,$location,$callback)
+ Function: get a list of features by name and class
+ Returns : count of number of features retrieved
+ Args    : name of feature, class of feature, and a callback
+ Status  : abstract
+
+This method is used internally.  The callback arguments are the same
+as those used by make_feature().  This method must be overidden by
+subclasses.
+
+=cut
+
+sub _feature_by_name {
+  my $self = shift;
+  my ($class,$name,$location,$callback) = @_;
+  $self->throw("_feature_by_name() must be implemented by an adaptor");
+}
+
+sub _feature_by_attribute {
+  my $self = shift;
+  my ($attributes,$callback) = @_;
+  $self->throw("_feature_by_name() must be implemented by an adaptor");
+}
+
+=head2 _feature_by_id
+
+ Title   : _feature_by_id
+ Usage   : $db->_feature_by_id($ids,$type,$callback)
+ Function: get a feature based
+ Returns : count of number of features retrieved
+ Args    : arrayref to feature IDs to fetch
+ Status  : abstract
+
+This method is used internally to fetch features either by their ID or
+their group ID.  $ids is a arrayref containing a list of IDs, $type is
+one of "feature" or "group", and $callback is a callback.  The
+callback arguments are the same as those used by make_feature().  This
+method must be overidden by subclasses.
+
+=cut
+
+sub _feature_by_id {
+  my $self = shift;
+  my ($ids,$type,$callback) = @_;
+  $self->throw("_feature_by_id() must be implemented by an adaptor");
+}
+
+=head2 overlapping_features
+
+ Title   : overlapping_features
+ Usage   : $db->overlapping_features(@args)
+ Function: get features that overlap the indicated range
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see below
+ Status  : public
+
+This method is invoked by Bio::DB::GFF::Segment-E<gt>features() to find
+the list of features that overlap a given range.  It is generally
+preferable to create the Segment first, and then fetch the features.
+
+This method takes set of named arguments:
+
+  -refseq    ID of the reference sequence
+  -class     Class of the reference sequence
+  -start     Start of the desired range in refseq coordinates
+  -stop      Stop of the desired range in refseq coordinates
+  -types     List of feature types to return.  Argument is an array
+	     reference containing strings of the format "method:source"
+  -parent    A parent Bio::DB::GFF::Segment object, used to create
+	     relative coordinates in the generated features.
+  -rare      Turn on an optimization suitable for a relatively rare feature type,
+             where it will be faster to filter by feature type first
+             and then by position, rather than vice versa.
+  -merge     Whether to apply aggregators to the generated features.
+  -iterator  Whether to return an iterator across the features.
+
+If -iterator is true, then the method returns a single scalar value
+consisting of a Bio::SeqIO object.  You can call next_seq() repeatedly
+on this object to fetch each of the features in turn.  If iterator is
+false or absent, then all the features are returned as a list.
+
+Currently aggregation is disabled when iterating over a series of
+features.
+
+Types are indicated using the nomenclature "method:source".  Either of
+these fields can be omitted, in which case a wildcard is used for the
+missing field.  Type names without the colon (e.g. "exon") are
+interpreted as the method name and a source wild card.  Regular
+expressions are allowed in either field, as in: "similarity:BLAST.*".
+
+=cut
+
+# call to return the features that overlap the named region
+# real work is done by get_features
+sub overlapping_features {
+  my $self = shift;
+  $self->features_in_range(-range_type=>'overlaps', at _);
+}
+
+=head2 contained_features
+
+ Title   : contained_features
+ Usage   : $db->contained_features(@args)
+ Function: get features that are contained within the indicated range
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see overlapping_features()
+ Status  : public
+
+This call is similar to overlapping_features(), except that it only
+retrieves features whose end points are completely contained within
+the specified range.
+
+Generally you will want to fetch a Bio::DB::GFF::Segment object and
+call its contained_features() method rather than call this directly.
+
+=cut
+
+# The same, except that it only returns features that are completely contained within the
+# range (much faster usually)
+sub contained_features {
+  my $self = shift;
+  $self->features_in_range(-range_type=>'contains', at _);
+}
+
+=head2 contained_in
+
+ Title   : contained_in
+ Usage   : @features = $s->contained_in(@args)
+ Function: get features that contain this segment
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see features()
+ Status  : Public
+
+This is identical in behavior to features() except that it returns
+only those features that completely contain the segment.
+
+=cut
+
+sub contained_in {
+  my $self = shift;
+  $self->features_in_range(-range_type=>'contained_in', at _);
+}
+
+=head2 get_abscoords
+
+ Title   : get_abscoords
+ Usage   : $db->get_abscoords($name,$class,$refseq)
+ Function: get the absolute coordinates of sequence with name & class
+ Returns : ($absref,$absstart,$absstop,$absstrand)
+ Args    : name and class of the landmark
+ Status  : protected
+
+Given the name and class of a genomic landmark, this function returns
+a four-element array consisting of:
+
+  $absref      the ID of the reference sequence that contains this landmark
+  $absstart    the position at which the landmark starts
+  $absstop     the position at which the landmark stops
+  $absstrand   the strand of the landmark, relative to the reference sequence
+
+If $refseq is provided, the function searches only within the
+specified reference sequence.
+
+=cut
+
+sub get_abscoords {
+  my $self = shift;
+  my ($name,$class,$refseq) = @_;
+  $self->throw("get_abscoords() must be implemented by an adaptor");
+}
+
+=head2 get_types
+
+ Title   : get_types
+ Usage   : $db->get_types($absref,$class,$start,$stop,$count)
+ Function: get list of all feature types on the indicated segment
+ Returns : list or hash of Bio::DB::GFF::Typename objects
+ Args    : see below
+ Status  : protected
+
+Arguments are:
+
+  $absref      the ID of the reference sequence
+  $class       the class of the reference sequence
+  $start       the position to start counting
+  $stop        the position to end counting
+  $count       a boolean indicating whether to count the number
+	       of occurrences of each feature type
+
+If $count is true, then a hash is returned.  The keys of the hash are
+feature type names in the format "method:source" and the values are
+the number of times a feature of this type overlaps the indicated
+segment.  Otherwise, the call returns a set of Bio::DB::GFF::Typename
+objects.  If $start or $stop are undef, then all features on the
+indicated segment are enumerated.  If $absref is undef, then the call
+returns all feature types in the database.
+
+=cut
+
+sub get_types {
+  my $self = shift;
+  my ($refseq,$class,$start,$stop,$count,$types) = @_;
+  $self->throw("get_types() must be implemented by an adaptor");
+}
+
+=head2 make_feature
+
+ Title   : make_feature
+ Usage   : $db->make_feature(@args)
+ Function: Create a Bio::DB::GFF::Feature object from string data
+ Returns : a Bio::DB::GFF::Feature object
+ Args    : see below
+ Status  : internal
+
+ This takes 14 arguments (really!):
+
+  $parent                A Bio::DB::GFF::RelSegment object
+  $group_hash            A hashref containing unique list of GFF groups
+  $refname               The name of the reference sequence for this feature
+  $refclass              The class of the reference sequence for this feature
+  $start                 Start of feature
+  $stop                  Stop of feature
+  $source                Feature source field
+  $method                Feature method field
+  $score                 Feature score field
+  $strand                Feature strand
+  $phase                 Feature phase
+  $group_class           Class of feature group
+  $group_name            Name of feature group
+  $tstart                For homologies, start of hit on target
+  $tstop                 Stop of hit on target
+
+The $parent argument, if present, is used to establish relative
+coordinates in the resulting Bio::DB::Feature object.  This allows one
+feature to generate a list of other features that are relative to its
+coordinate system (for example, finding the coordinates of the second
+exon relative to the coordinates of the first).
+
+The $group_hash allows the group_class/group_name strings to be turned
+into rich database objects via the make_obect() method (see above).
+Because these objects may be expensive to create, $group_hash is used
+to uniquefy them.  The index of this hash is the composite key
+{$group_class,$group_name,$tstart,$tstop}.  Values are whatever object
+is returned by the make_object() method.
+
+The remainder of the fields are taken from the GFF line, with the
+exception that "Target" features, which contain information about the
+target of a homology search, are parsed into their components.
+
+=cut
+
+# This call is responsible for turning a line of GFF into a
+# feature object.
+# The $parent argument is a Bio::DB::GFF::Segment object and is used
+# to establish the coordinate system for the new feature.
+# The $group_hash argument is an hash ref that holds previously-
+# generated group objects.
+# Other arguments are taken right out of the GFF table.
+sub make_feature {
+  my $self = shift;
+  my ($parent,$group_hash,          # these arguments provided by generic mechanisms
+      $srcseq,                      # the rest is provided by adaptor
+      $start,$stop,
+      $source,$method,
+      $score,$strand,$phase,
+      $group_class,$group_name,
+      $tstart,$tstop,
+      $db_id,$group_id) = @_;
+
+  return unless $srcseq;            # return undef if called with no arguments.  This behavior is used for
+                                    # on-the-fly aggregation.
+
+  my $group;  # undefined
+  if (defined $group_class && defined $group_name) {
+    $tstart ||= '';
+    $tstop  ||= '';
+    if ($group_hash) {
+      $group = $group_hash->{$group_class,$group_name,$tstart,$tstop}
+	||= $self->make_object($group_class,$group_name,$tstart,$tstop);
+    } else {
+      $group = $self->make_object($group_class,$group_name,$tstart,$tstop);
+    }
+  }
+
+# fix for some broken GFF files
+# unfortunately - has undesired side effects
+#  if (defined $tstart && defined $tstop && !defined $strand) {
+#    $strand = $tstart <= $tstop ? '+' : '-';
+#  }
+
+  if (ref $parent) { # note that the src sequence is ignored
+    return Bio::DB::GFF::Feature->new_from_parent($parent,$start,$stop,
+						  $method,$source,
+						  $score,$strand,$phase,
+						  $group,$db_id,$group_id,
+						  $tstart,$tstop);
+  } else {
+    return Bio::DB::GFF::Feature->new($self,$srcseq,
+				      $start,$stop,
+				      $method,$source,
+				      $score,$strand,$phase,
+				      $group,$db_id,$group_id,
+				      $tstart,$tstop);
+  }
+}
+
+sub make_aggregated_feature {
+  my $self                 = shift;
+  my ($accumulated_features,$parent,$aggregators) = splice(@_,0,3);
+  my $feature = $self->make_feature($parent,undef, at _);
+  return [$feature] if $feature && !$feature->group;
+
+  # if we have accumulated features and either: 
+  # (1) make_feature() returned undef, indicated very end or
+  # (2) the current group is different from the previous one
+
+  local $^W = 0;  # irritating uninitialized value warning in next statement
+  if (@$accumulated_features &&
+      (!defined($feature) || ($accumulated_features->[-1]->group ne $feature->group))) {
+    foreach my $a (@$aggregators) {  # last aggregator gets first shot
+      $a->aggregate($accumulated_features,$self) or next;
+    }
+    my @result = @$accumulated_features;
+    @$accumulated_features = $feature ? ($feature) : ();
+    return unless @result;
+    return \@result ;
+  }
+  push @$accumulated_features,$feature;
+  return;
+}
+
+=head2 make_match_sub
+
+ Title   : make_match_sub
+ Usage   : $db->make_match_sub($types)
+ Function: creates a subroutine used for filtering features
+ Returns : a code reference
+ Args    : a list of parsed type names
+ Status  : protected
+
+This method is used internally to generate a code subroutine that will
+accept or reject a feature based on its method and source.  It takes
+an array of parsed type names in the format returned by parse_types(),
+and generates an anonymous subroutine.  The subroutine takes a single
+Bio::DB::GFF::Feature object and returns true if the feature matches
+one of the desired feature types, and false otherwise.
+
+=cut
+
+# a subroutine that matches features indicated by list of types
+sub make_match_sub {
+  my $self = shift;
+  my $types = shift;
+
+  return sub { 1 } unless ref $types && @$types;
+
+  my @expr;
+  for my $type (@$types) {
+    my ($method,$source) = @$type;
+    $method ||= '.*';
+    $source  = $source ? ":$source" : "(?::.+)?";
+    push @expr,"${method}${source}";
+  }
+  my $expr = join '|', at expr;
+  return $self->{match_subs}{$expr} if $self->{match_subs}{$expr};
+
+  my $sub =<<END;
+sub {
+  my \$feature = shift or return;
+  return \$feature->type =~ /^($expr)\$/i;
+}
+END
+  warn "match sub: $sub\n" if $self->debug;
+  undef $@;
+  my $compiled_sub = eval $sub;
+  $self->throw($@) if $@;
+  return $self->{match_subs}{$expr} = $compiled_sub;
+}
+
+=head2 make_object
+
+ Title   : make_object
+ Usage   : $db->make_object($class,$name,$start,$stop)
+ Function: creates a feature object
+ Returns : a feature object
+ Args    : see below
+ Status  : protected
+
+This method is called to make an object from the GFF "group" field.
+By default, all Target groups are turned into Bio::DB::GFF::Homol
+objects, and everything else becomes a Bio::DB::GFF::Featname.
+However, adaptors are free to override this method to generate more
+interesting objects, such as true BioPerl objects, or Acedb objects.
+
+Arguments are:
+
+  $name      database ID for object
+  $class     class of object
+  $start     for similarities, start of match inside object
+  $stop      for similarities, stop of match inside object
+
+=cut
+
+# abstract call to turn a feature into an object, given its class and name
+sub make_object {
+  my $self = shift;
+  my ($class,$name,$start,$stop) = @_;
+  return Bio::DB::GFF::Homol->new($self,$class,$name,$start,$stop)
+    if defined $start and length $start;
+  return Bio::DB::GFF::Featname->new($class,$name);
+}
+
+
+=head2 do_attributes
+
+ Title   : do_attributes
+ Usage   : $db->do_attributes($id [,$tag]);
+ Function: internal method to retrieve attributes given an id and tag
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : a feature id and a attribute tag (optional)
+ Status  : protected
+
+This method is overridden by subclasses in order to return a list of
+attributes.  If called with a tag, returns the value of attributes of
+that tag type.  If called without a tag, returns a flattened array of
+(tag=E<gt>value) pairs.  A particular tag can be present multiple times.
+
+=cut
+
+sub do_attributes {
+  my $self = shift;
+  my ($id,$tag) = @_;
+  return ();
+}
+
+
+
+=head1 Internal Methods
+
+The following methods are internal to Bio::DB::GFF and are not
+guaranteed to remain the same.
+
+=head2 _features
+
+ Title   : _features
+ Usage   : $db->_features($search,$options,$parent)
+ Function: internal method
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : see below
+ Status  : internal
+
+This is an internal method that is called by overlapping_features(),
+contained_features() and features() to create features based on a
+parent segment's coordinate system.  It takes three arguments, a
+search options hashref, an options hashref, and a parent segment.
+
+The search hashref contains the following keys:
+
+  rangetype     One of "overlaps", "contains" or "contained_in".  Indicates
+                the type of range query requested.
+  refseq        reference sequence ID
+  refclass      reference sequence class
+  start	        start of range
+  stop		stop of range
+  types	        arrayref containing list of types in "method:source" form
+
+The options hashref contains zero or more of the following keys:
+
+  sparse	turn on optimizations for a rare feature
+  automerge	if true, invoke aggregators to merge features
+  iterator	if true, return an iterator
+
+The $parent argument is a scalar object containing a
+Bio::DB::GFF::RelSegment object or descendent.
+
+=cut
+
+#'
+
+sub _features {
+  my $self = shift;
+  my ($search,$options,$parent) = @_;
+  (@{$search}{qw(start stop)}) = (@{$search}{qw(stop start)})
+    if defined($search->{start}) && $search->{start} > $search->{stop};
+
+  my $types = $self->parse_types($search->{types});  # parse out list of types
+  my @aggregated_types = @$types;         # keep a copy
+
+  # allow the aggregators to operate on the original
+  my @aggregators;
+  if ($options->{automerge}) {
+    for my $a ($self->aggregators) {
+      $a = $a->clone if $options->{iterator};
+      unshift @aggregators,$a
+	if $a->disaggregate(\@aggregated_types,$self);
+    }
+  }
+
+  if ($options->{iterator}) {
+    my @accumulated_features;
+    my $callback = $options->{automerge} ? sub { $self->make_aggregated_feature(\@accumulated_features,$parent,\@aggregators, at _) }
+                                         : sub { [$self->make_feature($parent,undef, at _)] };
+    return $self->get_features_iterator({ %$search, 
+					  types => \@aggregated_types  },
+					{ %$options,
+					 sort_by_group => $options->{automerge}  },
+					$callback
+				       );
+  }
+
+  my %groups;         # cache the groups we create to avoid consuming too much unecessary memory
+  my $features = [];
+
+  my $callback = sub { push @$features,$self->make_feature($parent,\%groups, at _) };
+  $self->get_features({ %$search,
+			types  => \@aggregated_types },
+		        $options,
+		        $callback);
+
+  if ($options->{automerge}) {
+    warn "aggregating...\n" if $self->debug;
+    foreach my $a (@aggregators) {  # last aggregator gets first shot
+      warn "Aggregator $a:\n" if $self->debug;
+      $a->aggregate($features,$self);
+    }
+  }
+
+  @$features;
+}
+
+=head2 get_features_iterator
+
+ Title   : get_features_iterator
+ Usage   : $db->get_features_iterator($search,$options,$callback)
+ Function: get an iterator on a features query
+ Returns : a Bio::SeqIO object
+ Args    : as per get_features()
+ Status  : Public
+
+This method takes the same arguments as get_features(), but returns an
+iterator that can be used to fetch features sequentially, as per
+Bio::SeqIO.
+
+Internally, this method is simply a front end to range_query().
+The latter method constructs and executes the query, returning a
+statement handle. This routine passes the statement handle to the
+constructor for the iterator, along with the callback.
+
+=cut
+
+sub get_features_iterator {
+  my $self = shift;
+  my ($search,$options,$callback) = @_;
+  $self->throw('feature iteration is not implemented in this adaptor');
+}
+
+=head2 split_group
+
+ Title   : split_group
+ Usage   : $db->split_group($group_field,$gff3_flag)
+ Function: parse GFF group field
+ Returns : ($gclass,$gname,$tstart,$tstop,$attributes)
+ Args    : the gff group column and a flag indicating gff3 compatibility
+ Status  : internal
+
+This is a method that is called by load_gff_line to parse out the
+contents of one or more group fields.  It returns the class of the
+group, its name, the start and stop of the target, if any, and an
+array reference containing any attributes that were stuck into the
+group field, in [attribute_name,attribute_value] format.
+
+=cut
+
+sub split_group {
+  my $self = shift;
+  my ($group,$gff3) = @_;
+  if ($gff3) {
+    my @groups = split /[;&]/,$group;  # so easy!
+    return $self->_split_gff3_group(@groups);
+  } else {
+    # handle group parsing
+    # protect embedded semicolons in the group; there must be faster/more elegant way
+    # to do this.
+    $group =~ s/\\;/$;/g;
+    while ($group =~ s/( \"[^\"]*);([^\"]*\")/$1$;$2/) { 1 }
+    my @groups = split(/\s*;\s*/,$group);
+    foreach (@groups) { s/$;/;/g }
+    return $self->_split_gff2_group(@groups);
+  }
+}
+
+=head2 _split_gff2_group
+
+This is an internal method called by split_group().
+
+=cut
+
+# this has gotten quite nasty due to transition from GFF2 to GFF2.5
+# (artemis) to GFF3.
+
+sub _split_gff2_group {
+  my $self = shift;
+  my @groups = @_;
+  my $target_found;
+
+  my ($gclass,$gname,$tstart,$tstop, at attributes, at notes);
+
+  for (@groups) {
+
+    my ($tag,$value) = /^(\S+)(?:\s+(.+))?/;
+    $value = '' unless defined $value;
+    if ($value =~ /^\"(.+)\"$/) {  #remove quotes
+      $value = $1;
+    }
+    $value =~ s/\\t/\t/g;
+    $value =~ s/\\r/\r/g;
+    $value =~ s/\s+$//;
+
+    # Any additional groups become part of the attributes hash
+    # For historical reasons, the tag "Note" is treated as an
+    # attribute, even if it is the only group.
+    $tag ||= '';
+    if ($tag eq 'tstart' && $target_found) {
+      $tstart = $value;
+    }
+
+    elsif ($tag eq 'tend' && $target_found) {
+      $tstop = $value;
+    }
+
+    elsif (ucfirst $tag eq 'Note') {
+      push @notes, [$tag => $value];
+    }
+
+    elsif ($tag eq 'Target' && /([^:\"\s]+):([^\"\s]+)/) { # major disagreement in implementors of GFF2 here
+      $target_found++;
+      ($gclass,$gname) = ($1,$2);
+      ($tstart,$tstop) = / (\d+) (\d+)/;
+    }
+
+    elsif (!defined($value)) {
+      push @notes, [Note => $tag];  # e.g. "Confirmed_by_EST"
+    }
+
+    else {
+      push @attributes, [$tag => $value];
+    }
+  }
+
+  # group assignment
+  if (@attributes && !($gclass && $gname) ) {
+
+    my $preferred = ref($self) ? $self->_preferred_groups_hash : {};
+
+    for my $pair (@attributes) {
+      my ($c,$n) = @$pair;
+      ($gclass,$gname) = ($c,$n) 
+	if !$gclass # pick up first one
+	  ||
+	    ($preferred->{lc $gclass}||0) < ($preferred->{lc $c}||0); # pick up higher priority one
+    }
+
+    @attributes = grep {$gclass ne $_->[0]} @attributes;
+  }
+
+  push @attributes, @notes;
+
+  return ($gclass,$gname,$tstart,$tstop,\@attributes);
+}
+
+
+=head2 gff3_name_munging
+
+ Title   : gff3_name_munging
+ Usage   : $db->gff3_name_munging($boolean)
+ Function: get/set gff3_name_munging flag
+ Returns : $current value of flag
+ Args    : new value of flag (optional)
+ Status  : utility
+
+If this is set to true (default false), then features identified in
+gff3 files with an ID in the format foo:bar will be parsed so that
+"foo" is the class and "bar" is the name.  This is mostly for backward
+compatibility with GFF2.
+
+=cut
+
+sub gff3_name_munging {
+  my $self = shift;
+  my $d = $self->{gff3_name_munging};
+  $self->{gff3_name_munging} = shift if @_;
+  $d;
+}
+
+=head2 _split_gff3_group
+
+This is called internally from split_group().
+
+=cut
+
+sub _split_gff3_group {
+  my $self   = shift;
+  my @groups = @_;
+  my $dc     = $self->default_class;
+  my (%id, at attributes);
+
+  for my $group (@groups) {
+    my ($tag,$value) = split /=/,$group;
+    $tag             = unescape($tag);
+    my @values       = map {unescape($_)} split /,/,$value;
+
+    # GFF2 traditionally did not distinguish between a feature's name
+    # and the group it belonged to.  This code is a transition between
+    # gff2 and the new parent/ID dichotomy in gff3.
+    if ($tag eq 'Parent') {
+      my (@names, at classes);
+      for (@values) {
+	my ($name,$class) = $self->_gff3_name_munging($_,$dc);
+	push @names,$name;
+	push @classes,$class;
+      }
+      $id{$tag} = @names > 1 ? [\@names,\@classes] : [$names[0],$classes[0]];
+    }
+    elsif ($tag eq 'ID') {
+      $id{$tag} = [$self->_gff3_name_munging(shift(@values),$dc)];
+    }
+    elsif ($tag eq 'Target') {
+      my ($gname,$tstart,$tstop) = split /\s+/,shift @values;
+      $id{$tag} = [$self->_gff3_name_munging($gname,$dc),$tstart,$tstop];
+    }
+    elsif ($tag =~ /synonym/i) {
+      $tag = 'Alias';
+    }
+    push @attributes,[$tag=>$_] foreach @values;
+  }
+
+  my $priorities = $self->_preferred_groups_hash(1);
+  my ($gclass,$gname,$tstart,$tstop);
+  for my $preferred (sort {$priorities->{lc $b}<=>$priorities->{lc $a}}
+		     keys %id) {
+    unless (defined $gname) {
+      ($gname,$gclass,$tstart,$tstop) = @{$id{$preferred}};
+    }
+  }
+
+  # set null gclass to empty string to preserve compatibility with
+  # programs that expect a defined gclass if no gname
+  $gclass ||= '' if defined $gname;
+
+  return ($gclass,$gname,$tstart,$tstop,\@attributes);
+}
+
+# accomodation for wormbase style of class:name naming
+sub _gff3_name_munging {
+  my $self = shift;
+  my ($name,$default_class) = @_;
+  return ($name,$default_class) unless $self->gff3_name_munging;
+
+  if ($name =~ /^(\w+):(.+)/) {
+    return ($2,$1);
+  } else {
+    return ($name,$default_class);
+  }
+}
+
+=head2 _delete_features(), _delete_groups(),_delete()
+
+ Title   : _delete_features(), _delete_groups(),_delete()
+ Usage   : $count = $db->_delete_features(@feature_ids)
+           $count = $db->_delete_groups(@group_ids)
+           $count = $db->_delete(\%delete_spec)
+ Function: low-level feature/group deleter
+ Returns : count of groups removed
+ Args    : list of feature or group ids removed
+ Status  : for implementation by subclasses
+
+These methods need to be implemented in adaptors.  For
+_delete_features and _delete_groups, the arguments are a list of
+feature or group IDs to remove.  For _delete(), the argument is a
+hashref with the three keys 'segments', 'types' and 'force'.  The
+first contains an arrayref of Bio::DB::GFF::RelSegment objects to
+delete (all FEATURES within the segment are deleted).  The second
+contains an arrayref of [method,source] feature types to delete.  The
+two are ANDed together.  If 'force' has a true value, this forces the
+operation to continue even if it would delete all features.
+
+=cut
+
+sub _delete_features {
+  my $self = shift;
+  my @feature_ids = @_;
+  $self->throw('_delete_features is not implemented in this adaptor');
+}
+
+sub _delete_groups {
+  my $self = shift;
+  my @group_ids = @_;
+  $self->throw('_delete_groups is not implemented in this adaptor');
+}
+
+sub _delete {
+  my $self = shift;
+  my $delete_options = shift;
+  $self->throw('_delete is not implemented in this adaptor');
+}
+
+sub unescape {
+  my $v = shift;
+  $v =~ tr/+/ /;
+  $v =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
+  return $v;
+}
+
+
+package Bio::DB::GFF::ID_Iterator;
+use strict;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+  my $class            = shift;
+  my ($db,$ids,$type)  = @_;
+  return bless {ids=>$ids,db=>$db,type=>$type},$class;
+}
+
+sub next_seq {
+  my $self = shift;
+  my $next = shift @{$self->{ids}};
+  return unless $next;
+  my $name = ref($next) eq 'ARRAY' ? Bio::DB::GFF::Featname->new(@$next) : $next;
+  my $segment = $self->{type} eq 'name'      ? $self->{db}->segment($name)
+                : $self->{type} eq 'feature' ? $self->{db}->fetch_feature_by_id($name)
+                : $self->{type} eq 'group'   ? $self->{db}->fetch_feature_by_gid($name)
+                : $self->throw("Bio::DB::GFF::ID_Iterator called to fetch an unknown type of identifier");
+  $self->throw("id does not exist") unless $segment;
+  return $segment;
+}
+
+1;
+
+__END__
+
+=head1 BUGS
+
+Features can only belong to a single group at a time.  This must be
+addressed soon.
+
+Start coordinate can be greater than stop coordinate for relative
+addressing.  This breaks strict BioPerl compatibility and must be
+fixed.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::GFF::RelSegment>,
+L<Bio::DB::GFF::Aggregator>,
+L<Bio::DB::GFF::Feature>,
+L<Bio::DB::GFF::Adaptor::dbi::mysqlopt>,
+L<Bio::DB::GFF::Adaptor::dbi::oracle>,
+L<Bio::DB::GFF::Adaptor::memory>
+L<Bio::DB::GFF::Adaptor::berkeleydb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenBank.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenBank.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenBank.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,349 @@
+# $Id: GenBank.pm,v 1.59.4.1 2006/10/02 23:10:14 sendu Exp $
+#
+# BioPerl module for Bio::DB::GenBank
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Added LWP support - Jason Stajich 2000-11-6
+# completely reworked by Jason Stajich 2000-12-8
+# to use WebDBSeqI
+
+# Added batch entrez back when determined that new entrez cgi will
+# essentially work (there is a limit to the number of characters in a
+# GET request so I am not sure how we can get around this).  The NCBI
+# Batch Entrez form has changed some and it does not support retrieval
+# of text only data.  Still should investigate POST-ing (tried and
+# failed) a message to the entrez cgi to get around the GET
+# limitations.
+
+=head1 NAME
+
+Bio::DB::GenBank - Database object interface to GenBank
+
+=head1 SYNOPSIS
+
+    use Bio::DB::GenBank;
+    $gb = new Bio::DB::GenBank;
+
+    $seq = $gb->get_Seq_by_id('MUSIGHBA1'); # Unique ID
+
+    # or ...
+
+    $seq = $gb->get_Seq_by_acc('J00522'); # Accession Number
+    $seq = $gb->get_Seq_by_version('J00522.1'); # Accession.version
+    $seq = $gb->get_Seq_by_gi('405830'); # GI Number
+
+    # get a stream via a query string
+    my $query = Bio::DB::Query::GenBank->new
+        (-query   =>'Oryza sativa[Organism] AND EST',
+         -reldate => '30',
+	 -db      => 'nucleotide');
+    my $seqio = $gb->get_Stream_by_query($query);
+
+    while( my $seq =  $seqio->next_seq ) {
+      print "seq length is ", $seq->length,"\n";
+    }
+
+    # or ... best when downloading very large files, prevents
+    # keeping all of the file in memory
+
+    # also don't want features, just sequence so let's save bandwith
+    # and request Fasta sequence
+    $gb = new Bio::DB::GenBank(-retrievaltype => 'tempfile' , 
+			                      -format => 'Fasta');
+    my $seqio = $gb->get_Stream_by_acc(['AC013798', 'AC021953'] );
+    while( my $clone =  $seqio->next_seq ) {
+      print "cloneid is ", $clone->display_id, " ", 
+             $clone->accession_number, "\n";
+    }
+    # note that get_Stream_by_version is not implemented
+
+    # don't want the entire sequence or more options
+    my $gb = Bio::DB::GenBank->new(-format     => 'Fasta',
+                                   -seq_start  => 100,
+                                   -seq_stop   => 200,
+                                   -strand     => 1,
+                                   -complexity => 4));
+    my $seqi = $gb->get_Stream_by_query($query);
+
+These alternate methods are described at
+L<http://www.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html>
+
+NOTE: strand should be 1 for plus or 2 for minus.
+
+Complexity: gi is often a part of a biological blob, containing other gis
+
+complexity regulates the display:
+0 - get the whole blob
+1 - get the bioseq for gi of interest (default in Entrez)
+2 - get the minimal bioseq-set containing the gi of interest
+3 - get the minimal nuc-prot containing the gi of interest
+4 - get the minimal pub-set containing the gi of interest
+
+'seq_start' and 'seq_stop' will not work when setting complexity to any value
+other than 1.  'strand' works for any setting other than a complexity of 0
+(whole glob); when you try this with a GenBank return format nothing happens,
+whereas using FASTA works but causes display problems with the other sequences
+in the glob.  As Tao Tao says from NCBI, "Better left it out or set it to 1."
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of L<Bio::Seq> sequence objects from the
+GenBank database at NCBI, via an Entrez query.
+
+WARNING: Please do B<NOT> spam the Entrez web server with multiple
+requests.  NCBI offers Batch Entrez for this purpose.
+
+Note that when querying for GenBank accessions starting with 'NT_' you
+will need to call $gb-E<gt>request_format('fasta') beforehand, because
+in GenBank format (the default) the sequence part will be left out
+(the reason is that NT contigs are rather annotation with references
+to clones).
+
+Some work has been done to automatically detect and retrieve whole NT_
+clones when the data is in that format (NCBI RefSeq clones).  More
+testing and feedback from users is needed to achieve a good fit of
+functionality and ease of use.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey, Jason Stajich
+
+Email amackey at virginia.edu
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::GenBank;
+use strict;
+use vars qw(%PARAMSTRING $DEFAULTFORMAT $DEFAULTMODE);
+
+use base qw(Bio::DB::NCBIHelper);
+BEGIN {    
+    $DEFAULTMODE   = 'single';
+    $DEFAULTFORMAT = 'gbwithparts';
+    %PARAMSTRING = (
+			 'batch' => { 'db'     => 'nucleotide',
+				  'usehistory' => 'n',
+				  'tool'   => 'bioperl'},
+		     'query' => { 'usehistory' => 'y',
+				  'tool'   => 'bioperl',
+				  'retmode' => 'text'},
+		     'gi' => { 'db'     => 'nucleotide',
+			       'usehistory' => 'n',
+			       'tool'   => 'bioperl',
+			       'retmode' => 'text'},
+		     'version' => { 'db'     => 'nucleotide',
+				    'usehistory' => 'n',
+				    'tool'   => 'bioperl',
+				    'retmode' => 'text'},
+		     'single' => { 'db'     => 'nucleotide',
+				   'usehistory' => 'n',
+				   'tool'   => 'bioperl',
+				   'retmode' => 'text'},
+			 'webenv' => {    
+				  'query_key'  => 'querykey',
+				  'WebEnv'  => 'cookie',
+				  'db'     => 'nucleotide',
+				  'usehistory' => 'n',
+				  'tool'   => 'bioperl',
+				  'retmode' => 'text'},
+		     );
+}
+
+# new is in NCBIHelper
+
+# helper method to get db specific options
+
+=head2 new
+
+ Title   : new
+ Usage   : $gb = Bio::DB::GenBank->new(@options)
+ Function: Creates a new genbank handle
+ Returns : New genbank handle
+ Args    : -delay   number of seconds to delay between fetches (3s)
+
+NOTE:  There are other options that are used internally.  By NCBI policy, this
+module introduces a 3s delay between fetches.  If you are fetching multiple genbank
+ids, it is a good idea to use get
+
+=cut
+
+=head2 get_params
+
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to NCBI database
+           for either 'batch' or 'single' sequence retrieval method
+ Returns : a key,value pair hash
+ Args    : 'single' or 'batch' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    return defined $PARAMSTRING{$mode} ?
+        %{$PARAMSTRING{$mode}} : %{$PARAMSTRING{$DEFAULTMODE}};
+}
+
+# from Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
+
+=head1 Routines Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+=head2 get_Seq_by_acc
+
+  Title   : get_Seq_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc($acc);
+  Function: Gets a Seq object by accession numbers
+  Returns : a Bio::Seq object
+  Args    : the accession number as a string
+  Note    : For GenBank, this just calls the same code for get_Seq_by_id()
+  Throws  : "id does not exist" exception
+
+=head2 get_Seq_by_gi
+
+ Title   : get_Seq_by_gi
+ Usage   : $seq = $db->get_Seq_by_gi('405830');
+ Function: Gets a Bio::Seq object by gi number
+ Returns : A Bio::Seq object
+ Args    : gi number (as a string)
+ Throws  : "gi does not exist" exception
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=head1 Routines implemented by Bio::DB::NCBIHelper
+
+=head2 get_Stream_by_query
+
+  Title   : get_Stream_by_query
+  Usage   : $seq = $db->get_Stream_by_query($query);
+  Function: Retrieves Seq objects from Entrez 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $query :   An Entrez query string or a
+            Bio::DB::Query::GenBank object.  It is suggested that you
+            create a Bio::DB::Query::GenBank object and get the entry
+            count before you fetch a potentially large stream.
+
+=cut
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+=head2 get_Stream_by_gi
+
+  Title   : get_Stream_by_gi
+  Usage   : $seq = $db->get_Seq_by_gi([$gi1, $gi2]);
+  Function: Gets a series of Seq objects by gi numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of gi numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=head2 get_Stream_by_batch
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch($ref);
+  Function: Retrieves Seq objects from Entrez 'en masse', rather than one
+            at a time.
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+NOTE: This method is redundant and deprecated.  Use get_Stream_by_id()
+instead.
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    return $DEFAULTFORMAT;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenPept.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenPept.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenPept.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,240 @@
+# $Id: GenPept.pm,v 1.31.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::GenPept
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+# completely reworked by Jason Stajich to use Bio::DB::WebDBSeqI 2000-12-12
+
+=head1 NAME
+
+Bio::DB::GenPept - Database object interface to GenPept
+
+=head1 SYNOPSIS
+
+    $gb = new Bio::DB::GenPept;
+
+    $seq = $gb->get_Seq_by_id('195055'); # Unique ID
+
+    # or ...
+
+    $seq = $gb->get_Seq_by_acc('DEECTH'); # Accession Number
+
+    my $seqio = $gb->get_Stream_by_id(['195055', 'DEECTH']);
+    while( my $seq = $seqio->next_seq ) {
+	    print "seq is is ", $seq->display_id, "\n";
+    }
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenPept
+database at NCBI, via an Entrez query.
+
+WARNING: Please do NOT spam the Entrez web server with multiple requests.
+NCBI offers Batch Entrez for this purpose.  Batch Entrez support will likely
+be supported in a future version of DB::GenPept.
+
+Currently the only return format supported by NCBI Entrez for GenPept
+database is GenPept format, so any format specification passed to
+GenPept will be ignored still be forced to GenPept format (which is
+just GenBank format).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey, Jason Stajich
+
+Email amackey at virginia.edu
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::GenPept;
+use strict;
+use vars qw($DEFAULTFORMAT $DEFAULTMODE %PARAMSTRING);
+
+use base qw(Bio::DB::NCBIHelper);
+BEGIN { 
+    $DEFAULTMODE   = 'single';
+    $DEFAULTFORMAT = 'gp';	    
+    %PARAMSTRING = ( 
+		     'batch' => { 'db'     => 'protein',
+				  'usehistory' => 'n',
+				  'tool'   => 'bioperl'},
+			 # no query?
+		     'gi' => { 'db'     => 'protein',
+			       'usehistory' => 'n',
+			       'tool'   => 'bioperl',
+			       'retmode' => 'text'},
+		     'version' => { 'db'     => 'protein',
+				    'usehistory' => 'n',
+				    'tool'   => 'bioperl',
+				    'retmode' => 'text'},
+		     'single' => { 'db'     => 'protein',
+				   'usehistory' => 'n',
+				   'tool'   => 'bioperl',
+				   'retmode' => 'text'},
+			 'webenv' => {    
+				  'query_key'  => 'querykey',
+				  'WebEnv'  => 'cookie',
+				  'db'     => 'protein',
+				  'usehistory' => 'n',
+				  'tool'   => 'bioperl',
+				  'retmode' => 'text'},
+		     );
+}
+
+# the new way to make modules a little more lightweight
+sub new {
+  my($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->request_format($self->default_format);
+  return $self;
+}
+
+=head2 get_params
+
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to NCBI database
+           for either 'batch' or 'single' sequence retrieval method
+ Returns : a key,value pair hash
+ Args    : 'single' or 'batch' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    return defined $PARAMSTRING{$mode} ? %{$PARAMSTRING{$mode}} : %{$PARAMSTRING{$DEFAULTMODE}};
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    return $DEFAULTFORMAT;
+}
+
+# from Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
+
+=head1 Routines from Bio::DB::WebDBSeqI and Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+=head2 get_Seq_by_acc
+
+  Title   : get_Seq_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc('AAC73346');
+  Function: Gets a Seq objects by accession number
+  Returns : Bio::Seq object
+  Args    : accession number to retrive by
+
+=head1 Routines implemented by Bio::DB::NCBIHelper
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries
+
+=head2 get_Stream_by_acc (2)
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Stream_by_acc($acc);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my $format = $self->request_format;
+           $self->request_format($format);
+ Function: Get/Set sequence format retrieval
+ Returns : string representing format
+ Args    : $format = sequence format
+
+=cut
+
+# override to force format to be GenPept regardless
+sub request_format {
+    my ($self) = @_;
+    return $self->SUPER::request_format($self->default_format());
+}
+
+1;
+__END__
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenericWebDBI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenericWebDBI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/GenericWebDBI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,351 @@
+# $Id: GenericWebDBI.pm,v 1.3.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EUtilities
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+# Interfaces with new GenericWebDBI interface
+
+=head1 NAME
+
+Bio::DB::GenericWebDBI - abstract interface for parameter-based remote
+database access
+
+=head1 SYNOPSIS
+
+  #
+  # grab data from HTTP::Response object using concrete class
+  #
+
+  $data = $db->get_response->content;
+
+  #
+  # $data is the raw data output from the HTTP::Response object;
+  # this data may be preparsed using the private method _parse_response
+
+=head1 DESCRIPTION
+
+WARNING: Please do B<NOT> spam the web servers with multiple requests.
+
+This class acts as a user agent interface for any generic web database, but
+is specifically geared towards CGI-based databases which accept parameters.
+
+=head1 TODO
+
+File and filehandle support to be added
+
+Any feedback is welcome.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::GenericWebDBI;
+use strict;
+use warnings;
+use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE
+         $DEFAULT_RETURN_FORMAT $LAST_INVOCATION_TIME);
+
+use base qw(Bio::Root::Root LWP::UserAgent);
+
+BEGIN {
+    $MODVERSION = '0.8';
+    %RETRIEVAL_TYPES = ('io_string' => 1,
+                'tempfile'  => 1,
+                'pipeline'  => 1,
+                );
+    $DEFAULT_RETRIEVAL_TYPE = 'pipeline';
+    $DEFAULT_RETURN_FORMAT = 'text';
+    $LAST_INVOCATION_TIME = 0;
+}
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($url_base, $retmode, $delay, $db) =
+        $self->_rearrange([qw(URL_BASE RETMODE DELAY DB)],
+        @args);
+    # from LWP::UserAgent; set agent and env proxy
+    $self->agent(ref($self)."/$Bio::Root::Root::VERSION");
+    $self->env_proxy;
+    $db             && $self->db($db);
+    # these will likely be overridden in base classes
+    $retmode        && $self->retmode($retmode);
+    $url_base       && $self->url_base_address($url_base);
+    # delay policy needs to be worked out; not set up correctly
+    $delay = defined($delay) ? $delay: $self->delay_policy;
+    $self->delay($delay);
+    return $self;
+}
+
+=head2 url_base_address
+
+ Title   : url_base_address
+ Usage   : my $address = $self->url_base_address or
+           $self->url_base_address($address)
+ Function: Get/Set the base URL for the Web Database
+ Returns : Base URL for the Web Database
+ Args    : $address - URL for the WebDatabase
+
+=cut
+
+sub url_base_address {
+    my $self = shift;
+    return $self->{'_baseaddress'} = shift if @_;
+    return $self->{'_baseaddress'};
+}
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+           $username : username (if proxy requires authentication)
+           $password : password (if proxy requires authentication)
+
+=cut
+
+sub proxy {
+    my ($self,$protocol,$proxy,$username,$password) = @_;
+    return undef if ( !defined $protocol || !defined $proxy );
+    $self->authentication($username, $password)
+    if ($username && $password);
+    return $self->SUPER::proxy($protocol,$proxy);
+}
+
+=head2 authentication
+
+ Title   : authentication
+ Usage   : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass
+ Args    : Array or user/pass
+
+=cut
+
+sub authentication{
+   my ($self,$u,$p) = @_;
+   if( defined $u && defined $p ) {
+       $self->{'_authentication'} = [ $u,$p];
+   }
+   return @{$self->{'_authentication'}};
+}
+
+=head2 db
+
+ Title   : db
+ Usage   : $db->db
+ Function: Get/Set database parameter
+ Returns : string
+ Args    : optional string
+
+=cut
+
+sub db {
+	my $self = shift;
+	return $self->{'_db'} = shift if @_;
+	return $self->{'_db'};
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $agent->id($id)
+           $agent->id(\@id)
+ Function: Get/Set id(s)
+ Returns : reference to id(s)
+ Args    : a single id or reference to array of id(s)
+
+=cut
+
+sub id {
+	my $self = shift;
+    if (@_) {
+        my $id = shift;
+        if (ref($id) !~ /ARRAY/) { # single ID
+            $self->{'_ids'} = [$id];
+        }
+        else {
+            $self->{'_ids'} = $id;
+        }
+    }
+	return $self->{'_ids'};
+}
+
+=head2 retmode
+
+ Title   : retmode
+ Usage   : $agent->retmode($mode)
+ Function: Get/Set return mode for query (text, xml, html, asn.1, etc)
+ Returns : string for return mode
+ Args    : optional string
+
+=cut
+
+sub retmode {
+	my $self = shift;
+	return $self->{'_retmode'} = shift if @_;
+	return $self->{'_retmode'};
+}
+
+=head2 get_response
+
+ Title   : get_response
+ Usage   : $agent->get_response;
+ Function: get the request based on set object parameters, retrieved using
+           the private method _get_params
+ Returns : HTTP::Response object
+ Args    : none
+
+ This is implemented by the derived class
+
+=cut
+
+sub get_response {
+    my ($self) = @_;
+    my $msg = "Implementing class must define method get_response in class GenericWebDBI";
+    $self->throw($msg);
+}
+
+=head2 delay
+
+ Title   : delay
+ Usage   : $secs = $self->delay([$secs])
+ Function: get/set number of seconds to delay between fetches
+ Returns : number of seconds to delay
+ Args    : new value
+
+NOTE: the default is to use the value specified by delay_policy().
+This can be overridden by calling this method, or by passing the
+-delay argument to new().
+
+=cut
+
+sub delay {
+   my $self = shift;
+   return $self->{'_delay'} = shift if @_;
+   return $self->{'_delay'};
+}
+
+=head2 delay_policy
+
+ Title   : delay_policy
+ Usage   : $secs = $self->delay_policy
+ Function: return number of seconds to delay between calls to remote db
+ Returns : number of seconds to delay
+ Args    : none
+
+NOTE: The default delay policy is 0s.  Override in subclasses to
+implement delays.  The timer has only second resolution, so the delay
+will actually be +/- 1s.
+
+=cut
+
+sub delay_policy {
+   my $self = shift;
+   return 0;
+}
+
+=head2 _submit_request
+
+  Title   : _submit_request
+  Usage   : my $url = $self->get_request
+  Function: builds request object based on set parameters
+  Returns : HTTP::Request
+  Args    : optional : Bio::DB::EUtilities cookie
+
+=cut
+
+sub _submit_request {
+    my ($self) = @_;
+    my $msg = "Implementing class must define method _submit_request in class GenericWebDBI";
+    $self->throw($msg);
+}
+
+=head2 _get_params
+
+  Title   : _get_params
+  Usage   : my $url = $self->_get_params
+  Function: builds parameter list for web request
+  Returns : hash of parameter-value paris
+  Args    : optional : Bio::DB::EUtilities cookie
+
+=cut
+
+# these get sorted out in a hash originally but end up in an array to
+# deal with multiple id parameters (hash values would kill that)
+
+sub _get_params {
+    my ($self) = @_;
+    my $msg = "Implementing class must define method _get_params in class GenericWebDBI";
+    $self->throw($msg);
+}
+
+=head2 _sleep
+
+ Title   : _sleep
+ Usage   : $self->_sleep
+ Function: sleep for a number of seconds indicated by the delay policy
+ Returns : none
+ Args    : none
+
+NOTE: This method keeps track of the last time it was called and only
+imposes a sleep if it was called more recently than the delay_policy()
+allows.
+
+=cut
+
+sub _sleep {
+   my $self = shift;
+   my $last_invocation = $LAST_INVOCATION_TIME;
+   if (time - $LAST_INVOCATION_TIME < $self->delay) {
+      my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
+      $self->debug("sleeping for $delay seconds\n");
+      sleep $delay;
+   }
+   $LAST_INVOCATION_TIME = time;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/InMemoryCache.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/InMemoryCache.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/InMemoryCache.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,262 @@
+# $Id: InMemoryCache.pm,v 1.7.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::InMemoryCache
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::InMemoryCache - Abstract interface for a sequence database
+
+=head1 SYNOPSIS
+
+  $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
+                                          -number => 1000);
+  #
+  # get a database object somehow using a concrete class
+  #
+
+  $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
+
+  #
+  # $seq is a Bio::Seq object
+  #
+
+=head1 DESCRIPTION
+
+This is a memory cache system which saves the objects returned by
+Bio::DB::RandomAccessI in memory to a hard limit of sequences.
+
+=head1 CONTACT
+
+Ewan Birney E<lt>birney at ebi.ac.ukE<gt>
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::InMemoryCache;
+
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Root::Root Bio::DB::SeqI);
+
+sub new {
+    my ($class, at args) = @_;
+
+    my $self = Bio::Root::Root->new();
+    bless $self,$class;
+
+    my ($seqdb,$number,$agr) =
+		$self->_rearrange([qw(SEQDB NUMBER AGRESSION)], at args);
+
+    if( !defined $seqdb || !ref $seqdb ||
+		  !$seqdb->isa('Bio::DB::RandomAccessI') ) {
+       $self->throw("Must be a RandomAccess database not a [$seqdb]");
+    }
+
+    if( !defined $number ) {
+        $number = 1000;
+    }
+
+    $self->seqdb($seqdb);
+    $self->number($number);
+    $self->agr($agr);
+
+    # we consider acc as the primary id here
+    $self->{'_cache_number_hash'} = {};
+    $self->{'_cache_id_hash'}     = {};
+    $self->{'_cache_acc_hash'}    = {};
+    $self->{'_cache_number'}      = 1;
+
+    return $self;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$id) = @_;
+
+   if( defined $self->{'_cache_id_hash'}->{$id} ) {
+		my $acc = $self->{'_cache_id_hash'}->{$id};
+		my $seq = $self->{'_cache_acc_hash'}->{$acc};
+		$self->{'_cache_number_hash'}->{$seq->accession} =
+		  $self->{'_cache_number'}++;
+		return $seq;
+   } else {
+		return $self->_load_Seq('id',$id);
+   }
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self,$acc) = @_;
+
+   #print STDERR "In cache get for $acc\n";
+   if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
+       #print STDERR "Returning cached $acc\n";
+       my $seq = $self->{'_cache_acc_hash'}->{$acc};
+       $self->{'_cache_number_hash'}->{$seq->accession} =
+			$self->{'_cache_number'}++;
+       return $seq;
+   } else {
+     return $self->_load_Seq('acc',$acc);
+   }
+}
+
+
+
+sub number {
+    my ($self, $number) = @_;
+    if ($number) {
+        $self->{'number'} = $number;
+    } else {
+        return $self->{'number'};
+    }
+}
+
+sub seqdb {
+    my ($self, $seqdb) = @_;
+    if ($seqdb) {
+        $self->{'seqdb'} = $seqdb;
+    } else {
+        return $self->{'seqdb'};
+    }
+}
+
+sub agr {
+    my ($self, $agr) = @_;
+    if ($agr) {
+        $self->{'agr'} = $agr;
+    } else {
+        return $self->{'agr'};
+    }
+}
+
+
+sub _load_Seq {
+  my ($self,$type,$id) = @_;
+
+  my $seq;
+
+  if( $type eq 'id') {
+    $seq = $self->seqdb->get_Seq_by_id($id);
+  }elsif ( $type eq 'acc' ) {
+    $seq = $self->seqdb->get_Seq_by_acc($id);
+  } else {
+    $self->throw("Bad internal error. Don't understand $type");
+  }
+  if( ! $seq ) {
+      # warding off bug #1628
+      $self->debug("could not find seq $id in seqdb\n");
+      return;
+  }
+
+  if( $self->agr() ) {
+      #print STDERR "Pulling out into memory\n";
+      my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
+				  -accession_number  => $seq->accession,
+				  -seq        => $seq->seq,
+				  -desc       => $seq->desc,
+				  );
+      if( $self->agr() == 1 ) {
+	  foreach my $sf ( $seq->top_SeqFeatures() ) {
+	      $newseq->add_SeqFeature($sf);
+	  }
+
+	  $newseq->annotation($seq->annotation);
+      }
+      $seq = $newseq;
+  }
+
+  if( $self->_number_free < 1 ) {
+    # remove the latest thing from the hash
+    my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=>
+								$self->{'_cache_number_hash'}->{$b} }
+                          keys %{$self->{'_cache_number_hash'}};
+
+    my $acc = shift @accs;
+    # remove this guy
+    my $seq = $self->{'_cache_acc_hash'}->{$acc};
+
+    delete $self->{'_cache_number_hash'}->{$acc};
+    delete $self->{'_cache_id_hash'}->{$seq->id};
+    delete $self->{'_cache_acc_hash'}->{$acc};
+  }
+
+  # up the number, register this sequence into the hash.
+  $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
+  $self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
+  $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
+
+  return $seq;
+}
+
+
+sub _number_free {
+  my $self = shift;
+
+  return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version{
+   my ($self, at args) = @_;
+   $self->throw("Not implemented it");
+}
+
+## End of Package
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/LocationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/LocationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/LocationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,144 @@
+# $Id: LocationI.pm,v 1.1.2.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::LocationI
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::LocationI - A RandomAccessI-like abstract interface for
+retrieving location data from a sequence database and returning
+Bio::LocationI objects 
+
+=head1 SYNOPSIS
+
+  #
+  # get a database object somehow using a concrete class
+  #
+
+  $loc = $db->get_Location_by_id('123456');
+
+  #
+  # $loc is a Bio::LocationI object
+  #
+
+=head1 DESCRIPTION
+
+This is a pure interface class - in other words, all this does is define
+methods which other (concrete) classes will actually implement. 
+
+The Bio::DB::LocationI class defines methods used to retrieve location data
+from a sequence.  This is returned in the form of Bio::LocationI objects,
+which can include:
+
+Bio::Location::Simple
+Bio::Location::Fuzzy
+Bio::Location::Split
+
+At the moment it is just the ability to make Bio::LocationI objects
+from a name or unique id (id), an accession number (acc), and so on.
+
+=head1 CONTACT
+
+Ewan Birney originally wrote Bio::DB::RandomAccessI, from which this class
+is based.
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the 
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::LocationI;
+
+use strict;
+
+use Bio::Root::RootI;
+
+use base qw(Bio::Root::Root);
+
+=head2 get_Location_by_id
+
+ Title   : get_Location_by_id
+ Usage   : $loc = $db->get_Location_by_id('123456')
+ Function: Gets a Bio::LocationI-implementing object by its name (id)
+ Returns : a Bio::LocationI object or undef if not found
+ Args    : the id (as a string) of a sequence
+
+=cut
+
+sub get_Location_by_id{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Location_by_acc
+
+ Title   : get_Location_by_acc
+ Usage   : $loc = $db->get_Location_by_acc('X77802');
+ Function: Gets a Bio::LocationI object by accession number
+ Returns : A Bio::LocationI object or undef if not found
+ Args    : accession number (as a string)
+ Throws  : "more than one sequences correspond to this accession"
+            if the accession maps to multiple primary ids and
+            method is called in a scalar context
+
+=cut
+
+sub get_Location_by_acc{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Location_by_version
+
+ Title   : get_Location_by_version
+ Usage   : $loc = $db->get_Location_by_version('X77802.1');
+ Function: Gets a Bio::LocationI object by sequence version
+ Returns : A Bio::LocationI object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Location_by_version{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+## End of Package
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/MeSH.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/MeSH.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/MeSH.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,309 @@
+# $Id: MeSH.pm,v 1.8.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::MeSH
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::MeSH - Term retrieval from a Web MeSH database
+
+=head1 SYNOPSIS
+
+ my $mesh = new Bio::DB::MeSH();
+ my $term = $mesh->get_exact_term('Butter');
+ print $term->description;
+
+=head1 DESCRIPTION
+
+This class retrieves a term from the Medical Subject Headings database
+by the National Library of Medicine of USA. 
+See L<http://www.nlm.nih.gov/mesh/meshhome.html>.
+
+This class implements L<Bio::SimpleAnalysisI> and wraps its methods under
+L<get_exact_term()>.
+
+By default, web access uses L<WWW::Mechanize>, but in its absense
+falls back to bioperl module L<Bio::WebAgent> which is a subclass of
+L<LWP::UserAgent>. If not even that is not installed, it uses
+L<Bio::Root::HTTPget>.
+
+=head1 SEE ALSO
+
+L<Bio::Phenotype::MeSH::Term>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::MeSH;
+use strict;
+
+use Bio::Phenotype::MeSH::Term;
+use Bio::Phenotype::MeSH::Twig;
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+
+my $URL = 'http://www.nlm.nih.gov/mesh/MBrowser.html';
+
+my $ANALYSIS_SPEC= {name => 'MeSH term retrival',
+                    type => 'Entry retrieval'};
+my $INPUT_SPEC = [
+                  {mandatory=>'true',
+                   type => 'scalar',
+                   'name'=> 'value',
+                  },
+                 ];
+
+my  $RESULT_SPEC =
+    {
+     '' => 'Bio::Phenotype::MeSH::Term',
+     raw => 'raw output',
+    };
+
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
+    $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} = $ANALYSIS_SPEC->{'name'};
+    $self->_webmodule;
+    return $self;
+}
+
+sub _webmodule {
+    my ($self) = shift;
+    $self->{'_webmodule'} = '';
+    eval {
+        require WWW::Mechanize;
+    };
+    unless ($@) {
+        $self->{'_webmodule'} = 'WWW::Mechanize';
+        return;
+    }
+    eval {
+        require LWP::UserAgent;
+    };
+    unless ($@) {
+        $self->{'_webmodule'} = 'Bio::WebAgent';
+        return;
+    }
+    require Bio::Root::HTTPget;
+    $self->{'_webmodule'} = 'Bio::Root::HTTPget';
+    1;
+}
+
+=head2 get_exact_term
+
+  Title   : get_exact_term
+  Usage   : $s = $db->get_exact_term($value);
+  Function: Retrive a single MeSH term using a unique ID or exact name.
+  Example :
+  Returns : a Bio::Phenotype::MeSH::Term object
+  Args    : scalar, UID or name of a MeSH term
+
+The returned term object contains information about the immediate
+vincinity of the term in the terminology hierarchy. See
+L<Bio::Phenotype::MeSH::Twig>.
+
+=cut
+
+
+sub get_exact_term {
+    my ($self, $value) = @_;
+    $self->{'_term'} = undef;
+    $self->run($value) if $value;
+    $self->throw("Could not connect to the server")
+        unless $self->status eq 'COMPLETED';
+    return $self->result;
+}
+
+
+sub run {
+    my ($self, $value) = @_;
+
+    # check input
+    $self->throw("Need a MeSH name or ID  as an input [$value]") if ref $value;
+
+    # internal run()
+    $self->_run($value);
+}
+
+
+sub _cgi_url {
+  my($self, $field, $term) = @_;
+  # we don't bother to URI::Escape $field and $term as this is an untainted private sub
+  return 'http://www.nlm.nih.gov/cgi/mesh/2003/MB_cgi?field='.$field.'&term='.$term;
+}
+
+
+sub  _run {
+    my ($self, $value)  = @_;
+    $self->throw('Need a value [$value]')
+        unless $value;;
+    # delay repeated calls by default by 3 sec, set delay() to change
+#    $self->sleep;
+
+    $self->status('TERMINATED_BY_ERROR');
+
+    if ($self->{'_webmodule'} eq 'WWW::Mechanize') {
+        $self->debug("using WWW::Mechanize...\n");
+        my $agent = WWW::Mechanize->new();
+        $agent->get($self->url);
+        $agent->status == 200
+            or $self->warn("Could not connect to the server\n") and return;
+
+        $agent->form_name('MB');
+
+        $agent->field("term", $value);
+        if ($value =~ /\w\d{6}/) {
+            $agent->field("field", 'uid');
+        } else {
+            $agent->field("field", 'entry');
+        }
+        $agent->click("exact");
+
+        $self->{'_content'} = $agent->content();
+        $self->status('COMPLETED');
+        return;
+    }
+    elsif ($self->{'_webmodule'} eq 'Bio::WebAgent') {
+        $self->debug("using LWP::UserAgent...\n");
+        my $response;
+        if ($value =~ /\w\d{6}/) {
+            $self->{'_content'} =
+                $response = eval {
+                    $self->get( $self->_cgi_url('uid', $value) )
+                };
+            $self->warn("Could not connect to the server\n") and return
+                if $@;
+        } else {
+            $self->{'_content'} =
+                eval {
+                    $response = $self->get( $self->_cgi_url('entry', $value) )
+                };
+            $self->warn("Could not connect to the server\n") and return
+                if $@;
+        }
+        if ($response->is_success) {
+            $self->{'_content'} =  $response->content;
+            $self->status('COMPLETED');
+        }
+        return;
+    } else {
+        $self->debug("using Bio::Root::HTTPget...\n");
+        my $agent = Bio::Root::HTTPget->new();
+        if ($value =~ /\w\d{6}/) {
+            $self->{'_content'} =
+                eval {
+                    $agent->get( $self->_cgi_url('uid', $value) )
+                };
+            $self->warn("Could not connect to the server\n") and return
+                if $@;
+        } else {
+            $self->{'_content'} =
+                eval {
+                    $agent->get( $self->_cgi_url('entry', $value) )
+                };
+            $self->debug("Could not connect to the server\n") and return
+                if $@;
+        }
+        $self->status('COMPLETED');
+    }
+}
+
+sub result {
+    my ($self,$value) = @_;
+
+    $self->throw("Could not retrive results") unless $self->status('COMPLETED');
+
+    # no processing
+    return $self->{'_content'} if $value && $value eq 'raw';
+
+
+    # create a MeSH::Term object
+    $_ = $self->{'_content'};
+    $self->debug( substr($_, 0, 100) . "\n");
+    my ($id) = m|Unique ID</TH><TD>(.*?)</TD>|i;
+    my ($name) = m|MeSH Heading</TH><TD>([^<]+)|i;
+    my ($desc) = m|Scope Note</TH><TD>(.*?)</TD>|is;
+    $desc =~ s/<.*?>//sg;
+	 $desc =~ s/\n/ /g;
+
+    my $term = Bio::Phenotype::MeSH::Term->new(-id => $id,
+                                               -name => $name,
+                                               -description => $desc
+                                              );
+    my ($trees) = $self->{'_content'} =~ /MeSH Tree Structures(.*)/s;
+
+    while (m|Entry Term</TH><TD>([^<]+)|ig) {
+        $term->add_synonym($1);
+        $self->debug("Synonym: |$1|\n");
+    }
+
+    foreach (split /<HR>/i, $trees ) {
+        next unless /$name/;
+        s/<TD.*?>/ /sgi;
+        s/<.*?>//sg;
+        s/&nbsp;/ /sg;
+        #print "|$_|";
+        my ($treeno) = /$name \[([^]]+)]/;
+        my ($parent_treeno) = $treeno =~ /(.*)\.\d{3}/;
+        my ($parent) =  /\n +(\w.+) \[$parent_treeno\]/;
+
+        my $twig = Bio::Phenotype::MeSH::Twig->new(-parent => $parent);
+        $term->add_twig($twig);
+
+        $self->debug("Parent: |$parent|\n");
+        while (/\n +(\w.+) \[$treeno\./g ) {
+            $twig->add_child($1);
+            $self->debug("Child: |$1|\n");
+        }
+
+        while (/\n +(\w.+) \[$parent_treeno\./g ) {
+            next if $name eq $1;
+            $twig->add_sister($1);
+            $self->debug("Sister: |$1|\n");
+        }
+    }
+    return $term;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/NCBIHelper.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/NCBIHelper.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/NCBIHelper.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,466 @@
+# $Id: NCBIHelper.pm,v 1.54.2.2 2006/11/10 16:45:19 cjfields Exp $
+#
+# BioPerl module for Bio::DB::NCBIHelper
+#
+# Cared for by Jason Stajich
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+# 
+# Interfaces with new WebDBSeqI interface 
+
+=head1 NAME
+
+Bio::DB::NCBIHelper - A collection of routines useful for queries to
+NCBI databases.
+
+=head1 SYNOPSIS
+
+ # Do not use this module directly.
+
+ # get a Bio::DB::NCBIHelper object somehow
+ my $seqio = $db->get_Stream_by_acc(['MUSIGHBA1']);
+ foreach my $seq ( $seqio->next_seq ) {
+     # process seq
+ }
+
+=head1 DESCRIPTION
+
+Provides a single place to setup some common methods for querying NCBI
+web databases.  This module just centralizes the methods for
+constructing a URL for querying NCBI GenBank and NCBI GenPept and the
+common HTML stripping done in L<postprocess_data>().
+
+The base NCBI query URL used is:
+http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::NCBIHelper;
+use strict;
+use vars qw($HOSTBASE %CGILOCATION %FORMATMAP 	    $DEFAULTFORMAT $MAX_ENTRIES $VERSION @ATTRIBUTES);
+
+use Bio::DB::Query::GenBank;
+use HTTP::Request::Common;
+use URI;
+use Bio::Root::IO;
+use Bio::DB::RefSeq;
+use URI::Escape qw(uri_unescape);
+
+use base qw(Bio::DB::WebDBSeqI Bio::Root::Root);
+
+BEGIN {
+    $MAX_ENTRIES = 19000;
+    $HOSTBASE = 'http://eutils.ncbi.nlm.nih.gov';
+    %CGILOCATION = (
+			'batch'  => ['post' => '/entrez/eutils/epost.fcgi'],
+		    'query'  => ['get'  => '/entrez/eutils/efetch.fcgi'],
+		    'single' => ['get'  => '/entrez/eutils/efetch.fcgi'],
+		    'version'=> ['get'  => '/entrez/eutils/efetch.fcgi'],
+		    'gi'   =>   ['get'  => '/entrez/eutils/efetch.fcgi'],
+			'webenv' => ['get'  => '/entrez/eutils/efetch.fcgi']
+		     );
+
+    %FORMATMAP = ( 'gb' => 'genbank',
+						   'gp' => 'genbank',
+						   'fasta' => 'fasta',
+						   'asn.1' => 'entrezgene',
+						   'gbwithparts' => 'genbank',
+					  );
+    $DEFAULTFORMAT = 'gb';
+	@ATTRIBUTES = qw(complexity strand seq_start seq_stop no_redirect);
+	for my $method (@ATTRIBUTES) {
+		eval <<END;
+sub $method {
+	my \$self = shift;
+	my \$d    = \$self->{'_$method'};
+	\$self->{'_$method'} = shift if \@_;
+	\$d;
+}
+END
+	}
+}
+
+# the new way to make modules a little more lightweight
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($seq_start,$seq_stop,$no_redirect,$complexity,$strand) =
+	 $self->_rearrange([qw(SEQ_START SEQ_STOP NO_REDIRECT COMPLEXITY STRAND)],
+							 @args);
+	$seq_start     && $self->seq_start($seq_start);
+    $seq_stop      && $self->seq_stop($seq_stop);
+    $no_redirect   && $self->no_redirect($no_redirect);
+    $strand        && $self->strand($strand);
+	# adjust statement to accept zero value
+	defined $complexity && ($complexity >=0 && $complexity <=4)
+		&& $self->complexity($complexity);
+    return $self;
+}
+
+
+=head2 get_params
+
+ Title   : get_params
+ Usage   : my %params = $self->get_params($mode)
+ Function: Returns key,value pairs to be passed to NCBI database
+           for either 'batch' or 'single' sequence retrieval method
+ Returns : a key,value pair hash
+ Args    : 'single' or 'batch' mode for retrieval
+
+=cut
+
+sub get_params {
+    my ($self, $mode) = @_;
+    $self->throw("subclass did not implement get_params");
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    return $DEFAULTFORMAT;
+}
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: HTTP::Request
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+	my ($self, @qualifiers) = @_;
+	my ($mode, $uids, $format, $query, $seq_start, $seq_stop, $strand, $complexity) = 
+	  $self->_rearrange([qw(MODE UIDS FORMAT QUERY SEQ_START SEQ_STOP STRAND COMPLEXITY)],
+							  @qualifiers);
+	$mode = lc $mode;
+	($format) = $self->request_format() unless ( defined $format);
+	if( !defined $mode || $mode eq '' ) { $mode = 'single'; }
+	my %params = $self->get_params($mode);
+	if( ! %params ) {
+		$self->throw("must specify a valid retrieval mode 'single' or 'batch' not '$mode'") 
+	}
+	my $url = URI->new($HOSTBASE . $CGILOCATION{$mode}[1]);
+	unless( $mode eq 'webenv' || defined $uids || defined $query) {
+		$self->throw("Must specify a query or list of uids to fetch");
+	}
+	if ($query && $query->can('cookie')) {
+		@params{'WebEnv','query_key'} = $query->cookie;
+		$params{'db'}                 = $query->db;
+	}
+	elsif ($query) {
+		$params{'id'} = join ',',$query->ids;
+	}
+	# for batch retrieval, non-query style
+	elsif ($mode eq 'webenv' && $self->can('cookie')) {
+		@params{'WebEnv','query_key'} = $self->cookie;
+	}
+	elsif ($uids) {
+		if( ref($uids) =~ /array/i ) {
+			$uids = join(",", @$uids);
+		}
+		$params{'id'}      = $uids;
+	}
+	$seq_start && ($params{'seq_start'} = $seq_start);
+	$seq_stop && ($params{'seq_stop'} = $seq_stop);
+	$strand && ($params{'strand'} = $strand);
+	if (defined $complexity && ($seq_start || $seq_stop || $strand)) {
+		$self->warn("Complexity set to $complexity; seq_start and seq_stop may not work!")
+			if ($complexity != 1 && ($seq_start || $seq_stop));
+		$self->warn("Complexity set to 0; expect strange results with strand set to 2")
+			if ($complexity == 0 && $strand == 2 && $format eq 'fasta');
+	}
+	defined $complexity && ($params{'complexity'} = $complexity);
+	$params{'rettype'} = $format unless $mode eq 'batch';
+	# for now, 'post' is batch retrieval
+	if ($CGILOCATION{$mode}[0] eq 'post') {
+		my $response = $self->ua->request(POST $url,[%params]);
+		$response->proxy_authorization_basic($self->authentication)
+			if ( $self->authentication);
+		$self->_parse_response($response->content);
+		my ($cookie, $querykey) = $self->cookie;
+		my %qualifiers = ('-mode' 			=> 'webenv',
+						  '-seq_start' 		=> $seq_start,
+						  '-seq_stop' 		=> $seq_stop,
+						  '-strand'			=> $strand,
+						  '-complexity'		=> $complexity,
+						  '-format'			=> $format);
+		return $self->get_request(%qualifiers);
+	} else {
+		$url->query_form(%params);
+		return GET $url;
+	}
+}
+
+=head2 get_Stream_by_batch
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch($ref);
+  Function: Retrieves Seq objects from Entrez 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+NOTE: deprecated API.  Use get_Stream_by_id() instead.
+
+=cut
+
+*get_Stream_by_batch = sub { 
+   my $self = shift;
+   $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
+   $self->get_Stream_by_id(@_) 
+};
+
+=head2 get_Stream_by_query
+
+  Title   : get_Stream_by_query
+  Usage   : $seq = $db->get_Stream_by_query($query);
+  Function: Retrieves Seq objects from Entrez 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $query :   An Entrez query string or a
+            Bio::DB::Query::GenBank object.  It is suggested that you
+            create a Bio::DB::Query::GenBank object and get the entry
+            count before you fetch a potentially large stream.
+
+=cut
+
+sub get_Stream_by_query {
+    my ($self, $query) = @_;
+    unless (ref $query && $query->can('query')) {
+       $query = Bio::DB::Query::GenBank->new($query);
+    }
+    return $self->get_seq_stream('-query' => $query, '-mode'=>'query');
+}
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				                         'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                           reference containing data
+
+=cut
+
+# the default method, works for genbank/genpept, other classes should
+# override it with their own method.
+
+sub postprocess_data {
+	# retain this in case postprocessing is needed at a future date
+}
+
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally not
+           be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+	my ($self, $value) = @_;    
+	if( defined $value ) {
+		$value = lc $value;	
+		if( defined $FORMATMAP{$value} ) {
+			$self->{'_format'} = [ $value, $FORMATMAP{$value}];
+		} else {
+			# Try to fall back to a default. Alternatively, we could throw
+			# an exception
+			$self->{'_format'} = [ $value, $value ];
+		}
+	}
+	return @{$self->{'_format'}};
+}
+
+=head2 Bio::DB::WebDBSeqI methods
+
+Overriding WebDBSeqI method to help newbies to retrieve sequences
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_acc {
+    my ($self, $ids ) = @_;
+    my $newdb = $self->_check_id($ids);
+    if (defined $newdb && ref($newdb) && $newdb->isa('Bio::DB::RefSeq')) {
+	return $newdb->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+    } else {
+	return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+    }
+}
+
+
+=head2 _check_id
+
+  Title   : _check_id
+  Usage   : 
+  Function: 
+  Returns : A Bio::DB::RefSeq reference or throws
+  Args    : $id(s), $string
+
+=cut
+
+sub _check_id {
+	my ($self, $ids) = @_;
+
+	# NT contigs can not be retrieved
+	$self->throw("NT_ contigs are whole chromosome files which are not part of regular".
+					 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") 
+	  if $ids =~ /NT_/;
+
+	# Asking for a RefSeq from EMBL/GenBank
+   
+	unless ($self->no_redirect) {
+		if ($ids =~ /N._/) {
+			$self->warn("[$ids] is not a normal sequence database but a RefSeq entry.".
+							" Redirecting the request.\n")
+			  if $self->verbose >= 0;
+			return  new Bio::DB::RefSeq;
+		}
+	}
+}
+
+=head2 delay_policy
+
+  Title   : delay_policy
+  Usage   : $secs = $self->delay_policy
+  Function: return number of seconds to delay between calls to remote db
+  Returns : number of seconds to delay
+  Args    : none
+
+  NOTE: NCBI requests a delay of 3 seconds between requests.  This method
+        implements that policy.
+
+=cut
+
+sub delay_policy {
+  my $self = shift;
+  return 3;
+}
+
+=head2 cookie
+
+ Title   : cookie
+ Usage   : ($cookie,$querynum) = $db->cookie
+ Function: return the NCBI query cookie
+ Returns : list of (cookie,querynum)
+ Args    : none
+
+NOTE: this information is used by Bio::DB::GenBank in
+conjunction with efetch.
+
+=cut
+
+# ripped from Bio::DB::Query::GenBank
+sub cookie {
+  my $self = shift;
+  if (@_) {
+    $self->{'_cookie'}   = shift;
+    $self->{'_querynum'} = shift;
+  }
+  else {
+    return @{$self}{qw(_cookie _querynum)};
+  }
+}
+
+=head2 _parse_response
+
+ Title   : _parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response for cookie
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+# trimmed-down version of _parse_response from Bio::DB::Query::GenBank
+sub _parse_response {
+  my $self    = shift;
+  my $content = shift;
+  if (my ($warning) = $content =~ m!<ErrorList>(.+)</ErrorList>!s) {
+    $self->warn("Warning(s) from GenBank: $warning\n");
+  }
+  if (my ($error) = $content =~ /<OutputMessage>([^<]+)/) {
+    $self->throw("Error from Genbank: $error");
+  }
+  my ($cookie)    = $content =~ m!<WebEnv>(\S+)</WebEnv>!;
+  my ($querykey)  = $content =~ m!<QueryKey>(\d+)!;
+  $self->cookie(uri_unescape($cookie),$querykey);
+}
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/GenBank.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/GenBank.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/GenBank.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,300 @@
+# $Id: GenBank.pm,v 1.18.2.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::DB::Query::GenBank.pm
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::DB::Query::GenBank - Build a GenBank Entrez Query
+
+=head1 SYNOPSIS
+
+   my $query_string = 'Oryza[Organism] AND EST[Keyword]';
+   my $query = Bio::DB::Query::GenBank->new(-db=>'nucleotide',
+                                            -query=>$query_string,
+					    -mindate => '2001',
+					    -maxdate => '2002');
+   my $count = $query->count;
+   my @ids   = $query->ids;
+
+   # get a genbank database handle
+   my $gb = new Bio::DB::GenBank;
+   my $stream = $gb->get_Stream_by_query($query);
+   while (my $seq = $stream->next_seq) {
+      # do something with the sequence object
+   }
+
+   # initialize the list yourself
+   my $query = Bio::DB::Query::GenBank->new(-ids=>[195052,2981014,11127914]);
+
+
+=head1 DESCRIPTION
+
+This class encapsulates NCBI Entrez queries.  It can be used to store
+a list of GI numbers, to translate an Entrez query expression into a
+list of GI numbers, or to count the number of terms that would be
+returned by a query.  Once created, the query object can be passed to
+a Bio::DB::GenBank object in order to retrieve the entries
+corresponding to the query.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Query::GenBank;
+use strict;
+use URI::Escape 'uri_unescape';
+
+use constant EPOST               => 'http://www.ncbi.nih.gov/entrez/eutils/epost.fcgi';
+use constant ESEARCH             => 'http://www.ncbi.nih.gov/entrez/eutils/esearch.fcgi';
+use constant DEFAULT_DB          => 'protein';
+use constant MAXENTRY            => 100;
+
+use vars qw(@ATTRIBUTES);
+
+use base qw(Bio::DB::Query::WebQuery);
+
+BEGIN {
+  @ATTRIBUTES = qw(db reldate mindate maxdate datetype maxids);
+  for my $method (@ATTRIBUTES) {
+    eval <<END;
+sub $method {
+   my \$self = shift;
+   my \$d    = \$self->{'_$method'};
+   \$self->{'_$method'} = shift if \@_;
+   \$d;
+}
+END
+  }
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::Query::GenBank->new(@args)
+ Function: create new query object
+ Returns : new query object
+ Args    : -db       database ('protein' or 'nucleotide')
+           -query    query string
+           -mindate  minimum date to retrieve from
+           -maxdate  maximum date to retrieve from
+           -reldate  relative date to retrieve from (days)
+           -datetype date field to use ('edat' or 'mdat')
+           -ids      array ref of gids (overrides query)
+           -maxids   the maximum number of IDs you wish to collect (defaults to 100)
+
+This method creates a new query object.  Typically you will specify a
+-db and a -query argument, possibly modified by -mindate, -maxdate, or
+-reldate.  -mindate and -maxdate specify minimum and maximum dates for
+entries you are interested in retrieving, expressed in the form
+YYYY/MM/DD.  -reldate is used to fetch entries that are more recent
+than the indicated number of days.
+
+If you provide an array reference of IDs in -ids, the query will be
+ignored and the list of IDs will be used when the query is passed to a
+Bio::DB::GenBank object's get_Stream_by_query() method.  A variety of
+IDs are automatically recognized, including GI numbers, Accession
+numbers, Accession.version numbers and locus names.
+
+By default, the query will collect only the first 100 IDs and will
+generate an exception if you call the ids() method and the query
+returned more than that number.  To increase this maximum, set -maxids
+to a number larger than the number of IDs you expect to obtain.  This
+only affects the list of IDs you obtain when you call the ids()
+method, and does not affect in any way the number of entries you
+receive when you generate a SeqIO stream from the query.
+
+=cut
+
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+  my ($query,$db,$reldate,$mindate,$maxdate,$datetype,$ids,$maxids)
+    = $self->_rearrange([qw(QUERY DB RELDATE MINDATE MAXDATE DATETYPE IDS MAXIDS)], at _);
+  $self->db($db || DEFAULT_DB);
+  $reldate  && $self->reldate($reldate);
+  $mindate  && $self->mindate($mindate);
+  $maxdate  && $self->maxdate($maxdate);
+  $maxids   && $self->maxids($maxids);
+  $datetype ||= 'mdat';
+  $datetype && $self->datetype($datetype);
+  $self;
+}
+
+=head2 cookie
+
+ Title   : cookie
+ Usage   : ($cookie,$querynum) = $db->cookie
+ Function: return the NCBI query cookie
+ Returns : list of (cookie,querynum)
+ Args    : none
+
+NOTE: this information is used by Bio::DB::GenBank in
+conjunction with efetch.
+
+=cut
+
+sub cookie {
+  my $self = shift;
+  if (@_) {
+    $self->{'_cookie'}   = shift;
+    $self->{'_querynum'} = shift;
+  }
+
+  else {
+    $self->_run_query;
+    @{$self}{qw(_cookie _querynum)};
+  }
+}
+
+=head2 _request_parameters
+
+ Title   : _request_parameters
+ Usage   : ($method,$base, at params = $db->_request_parameters
+ Function: return information needed to construct the request
+ Returns : list of method, url base and key=>value pairs
+ Args    : none
+
+=cut
+
+sub _request_parameters {
+  my $self = shift;
+  my ($method,$base);
+  my @params = map {eval("\$self->$_") ? ($_ => eval("\$self->$_")) : () } @ATTRIBUTES;
+  push @params,('usehistory'=>'y','tool'=>'bioperl');
+  $method = 'get';
+  $base   = ESEARCH;
+  push @params,('term'   => $self->query);
+
+  # Providing 'retmax' limits queries to 500 sequences  ?? I don't think so LS
+  push @params,('retmax' => $self->maxids || MAXENTRY);
+
+  # And actually, it seems that we need 'retstart' equal to 0 ?? I don't think so LS
+  # push @params, ('retstart' => 0);
+
+  ($method,$base, at params);
+}
+
+
+=head2 count
+
+ Title   : count
+ Usage   : $count = $db->count;
+ Function: return count of number of entries retrieved by query
+ Returns : integer
+ Args    : none
+
+Returns the number of entries that are matched by the query.
+
+=cut
+
+sub count   {
+  my $self = shift;
+  if (@_) {
+    my $d = $self->{'_count'};
+    $self->{'_count'}   = shift;
+    return $d;
+  }
+  else {
+    $self->_run_query;
+    return $self->{'_count'};
+  }
+}
+
+=head2 ids
+
+ Title   : ids
+ Usage   : @ids = $db->ids([@ids])
+ Function: get/set matching ids
+ Returns : array of sequence ids
+ Args    : (optional) array ref with new set of ids
+
+=cut
+
+=head2 query
+
+ Title   : query
+ Usage   : $query = $db->query([$query])
+ Function: get/set query string
+ Returns : string
+ Args    : (optional) new query string
+
+=cut
+
+=head2 _parse_response
+
+ Title   : _parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+=cut
+
+sub _parse_response {
+  my $self    = shift;
+  my $content = shift;
+  if (my ($warning) = $content =~ m!<ErrorList>(.+)</ErrorList>!s) {
+    $self->warn("Warning(s) from GenBank: $warning\n");
+  }
+  if (my ($error) = $content =~ /<OutputMessage>([^<]+)/) {
+    $self->throw("Error from Genbank: $error");
+  }
+
+  my ($count) = $content =~  /<Count>(\d+)/;
+  my ($max)   = $content =~  /<RetMax>(\d+)/;
+  my $truncated = $count > $max;
+  $self->count($count);
+  if (!$truncated) {
+    my @ids = $content =~ /<Id>(\d+)/g;
+    $self->ids(\@ids);
+  } else {
+    $self->debug("ids truncated at $max\n");
+  }
+  $self->_truncated($truncated);
+  my ($cookie)    = $content =~ m!<WebEnv>(\S+)</WebEnv>!;
+  my ($querykey)  = $content =~ m!<QueryKey>(\d+)!;
+  $self->cookie(uri_unescape($cookie),$querykey);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/WebQuery.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/WebQuery.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Query/WebQuery.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,350 @@
+# $Id: WebQuery.pm,v 1.15.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::DB::WebQuery.pm
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::DB::Query::WebQuery - Helper class for web-based sequence queryies
+
+=head1 SYNOPSIS
+
+  # Do not use this class directly.  See Bio::DB::QueryI and one of
+  # the implementor classes (such as Bio::DB::GenBankQuery) for
+  # information.
+
+See L<Bio::DB::QueryI>, L<Bio::DB::GenBankQuery>
+
+
+=head1 DESCRIPTION
+
+Do not use this class directly.  See Bio::DB::QueryI and one of the
+implementor classes (such as Bio::DB::Query::GenBank) for information.
+
+Those writing subclasses must define _get_params() and
+_parse_response(), and possibly override _request_method().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Query::WebQuery;
+use strict;
+use URI;
+use LWP::UserAgent;
+use HTTP::Request::Common;
+
+
+use base qw(Bio::Root::Root Bio::DB::QueryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::WebQuery->new(@args)
+ Function: create new query object
+ Returns : new query object
+ Args    : -db       database (e.g. 'protein')
+           -ids      array ref of ids (overrides query)
+           -verbose  turn on verbose debugging
+
+This method creates a new query object.  Typically you will specify a
+-db and a -query argument.  The value of -query is a database-specific
+string.
+
+If you provide an array reference of IDs in -ids, the query will be
+ignored and the list of IDs will be used when the query is passed to
+the database.
+
+=cut
+
+# Borrowed shamelessly from WebDBSeqI.  Some of this code should be
+# refactored.
+sub new {
+  my $class = shift;
+  my $self  = $class->SUPER::new(@_);
+
+  my ($query,$ids,$verbose) = $self->_rearrange(['QUERY','IDS','VERBOSE'], at _);
+  $self->throw('must provide one of the the -query or -ids arguments')
+    unless defined($query) || defined($ids);
+  $query ||= join ',',ref($ids) ? @$ids : $ids;
+  $query && $self->query($query);
+  $verbose && $self->verbose($verbose);
+
+  my $ua = new LWP::UserAgent(env_proxy => 1);
+  $ua->agent(ref($self) ."/".($Bio::DB::Query::WebQuery::VERSION || '0.1'));
+  $self->ua($ua);
+  $self->{'_authentication'} = [];
+  $self;
+}
+
+=head2 ua
+
+ Title   : ua
+ Usage   : my $ua = $self->ua or 
+           $self->ua($ua)
+ Function: Get/Set a LWP::UserAgent for use
+ Returns : reference to LWP::UserAgent Object
+ Args    : $ua - must be a LWP::UserAgent
+
+=cut
+
+sub ua {
+   my ($self, $ua) = @_;
+   my $d = $self->{'_ua'};
+   if( defined $ua && $ua->isa("LWP::UserAgent") ) {
+      $self->{'_ua'} = $ua;
+   }
+   $d;
+}
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or 
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+           $username : username (if proxy requires authentication)
+           $password : password (if proxy requires authentication)
+
+=cut
+
+sub proxy {
+    my ($self,$protocol,$proxy,$username,$password) = @_;
+    return undef if ( !defined $self->ua || !defined $protocol 
+		      || !defined $proxy );
+    $self->authentication($username, $password) 	
+	if ($username && $password);
+    return $self->ua->proxy($protocol,$proxy);
+}
+
+=head2 authentication
+
+ Title   : authentication
+ Usage   : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass 
+ Args    : Array or user/pass
+
+
+=cut
+
+sub authentication{
+   my ($self,$u,$p) = @_;
+
+   if( defined $u && defined $p ) {
+       $self->{'_authentication'} = [ $u,$p];
+   }
+   return @{$self->{'_authentication'}};
+}
+
+=head2 ids
+
+ Title   : ids
+ Usage   : @ids = $db->ids([@ids])
+ Function: get/set matching ids
+ Returns : array of sequence ids
+ Args    : (optional) array ref with new set of ids
+
+=cut
+
+sub ids     {
+  my $self = shift;
+  if (@_) {
+    my $d = $self->{'_ids'};
+    my $arg = shift;
+    $self->{'_ids'} = ref $arg ? $arg : [$arg];
+    return $d ? @$d : ();
+  } else {
+    $self->_fetch_ids;
+    return @{$self->{'_ids'} || []};
+  }
+}
+
+=head2 query
+
+ Title   : query
+ Usage   : $query = $db->query([$query])
+ Function: get/set query string
+ Returns : string
+ Args    : (optional) new query string
+
+=cut
+
+sub query   {
+  my $self = shift;
+  my $d    = $self->{'_query'};
+  $self->{'_query'} = shift if @_;
+  $d;
+}
+
+=head2 _fetch_ids
+
+ Title   : _fetch_ids
+ Usage   : @ids = $db->_fetch_ids
+ Function: run query, get ids
+ Returns : array of sequence ids
+ Args    : none
+
+=cut
+
+sub _fetch_ids     {
+  my $self = shift;
+  $self->_run_query;
+  $self->_run_query(1) if $self->_truncated;
+  $self->throw('Id list has been truncated even after maxids requested')
+    if $self->_truncated;
+  return @{$self->{'_ids'}} if $self->{'_ids'};
+}
+
+=head2 _run_query
+
+ Title   : _run_query
+ Usage   : $success = $db->_run_query
+ Function: run query, parse results
+ Returns : true if successful
+ Args    : none
+
+=cut
+
+sub _run_query {
+  my $self   = shift;
+  my $force  = shift;
+
+  # allow the query to be run one extra time if truncated
+  return $self->{'_ran_query'} if $self->{'_ran_query'}++ && !$force;
+
+  my $request = $self->_get_request;
+  $self->debug("request is ".$request->url."\n");
+  my $response = $self->ua->request($request);
+  return unless $response->is_success;
+  $self->debug("response is ".$response->content."\n");
+  $self->_parse_response($response->content);
+  1;
+}
+
+=head2 _truncated
+
+ Title   : _truncated
+ Usage   : $flag = $db->_truncated([$newflag])
+ Function: get/set truncation flag
+ Returns : boolean
+ Args    : new flag
+
+Some databases will truncate output unless explicitly asked
+not to.  This flag allows a "two probe" attempt.
+
+=cut
+
+sub _truncated {
+  my $self = shift;
+  my $d = $self->{'_truncated'};
+  $self->{'_truncated'} = shift if @_;
+  $d;
+}
+
+=head2 _get_request
+
+ Title   : _get_request
+ Usage   : $http_request = $db->_get_request(@params)
+ Function: create an HTTP::Request with indicated parameters
+ Returns : HTTP::Request object
+ Args    : CGI parameter list
+
+=cut
+
+sub _get_request {
+  my $self   = shift;
+  my ($method,$base, at params) = $self->_request_parameters;
+  my $uri = URI->new($base);
+  my $request;
+  if ($method eq 'get') {
+    $uri->query_form(@params);
+    $request = GET $uri;
+  } else {
+    $request = POST $uri,\@params;
+  }
+
+  $request->proxy_authorization_basic($self->authentication)
+	if $self->authentication;
+  $request;
+}
+
+=head2 _parse_response
+
+ Title   : _parse_response
+ Usage   : $db->_parse_response($content)
+ Function: parse out response
+ Returns : empty
+ Args    : none
+ Throws  : 'unparseable output exception'
+
+NOTE: This method must be implemented by subclass.
+
+=cut
+
+sub _parse_response {
+  my $self    = shift;
+  my $content = shift;
+  $self->throw_not_implemented;
+}
+
+=head2 _request_parameters
+
+ Title   : _request_parameters
+ Usage   : ($method,$base, at params = $db->_request_parameters
+ Function: return information needed to construct the request
+ Returns : list of method, url base and key=>value pairs
+ Args    : none
+
+NOTE: This method must be implemented by subclass.
+
+=cut
+
+sub _request_parameters {
+  my $self = shift;
+  $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/QueryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/QueryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/QueryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,153 @@
+# $Id: QueryI.pm,v 1.5.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::QueryI.pm
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::DB::QueryI - Object Interface to queryable sequence databases
+
+=head1 SYNOPSIS
+
+   # using Bio::DB::Query::GenBank as an example
+   my $query_string = 'Oryza[Organism] AND EST[Keyword]';
+   my $query = Bio::DB::Query::GenBank->new(-db=>'nucleotide',
+                                            -query=>$query_string);
+   my $count = $query->count;
+   my @ids   = $query->ids;
+
+   # get a genbank database handle
+   $gb = new Bio::DB::GenBank;
+   my $stream = $db->get_Stream_by_query($query);
+   while (my $seq = $stream->next_seq) {
+      ...
+   }
+
+   # initialize the list yourself
+   my $query = Bio::DB::Query::GenBank->new(-ids=>['X1012','CA12345']);
+
+=head1 DESCRIPTION
+
+This interface provides facilities for managing sequence queries such
+as those offered by Entrez.  A query object is created by calling
+new() with a database-specific argument list. From the query object
+you can either obtain the list of IDs returned by the query, or a
+count of entries that would be returned.  You can pass the query
+object to a Bio::DB::RandomAccessI object to return the entries
+themselves as a list or a stream.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::QueryI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::QueryI->new(@args);
+ Function: constructor
+ Returns : QueryI object
+ Args    : -query       a query string
+           -ids         a list of ids as an arrayref
+
+Create new QueryI object.  You may initialize with either a query
+string or with a list of ids.  If both ids and a query are provided,
+the former takes precedence.
+
+Subclasses may recognize additional arguments.
+
+=cut
+
+=head2 count
+
+ Title   : count
+ Usage   : $count = $db->count;
+ Function: return count of number of entries retrieved by query
+ Returns : integer
+ Args    : none
+
+Returns the number of entries that are matched by the query.
+
+=cut
+
+sub count   {
+  my $self = shift;
+  my @ids  = $self->ids;
+  scalar @ids;
+}
+
+=head2 ids
+
+ Title   : ids
+ Usage   : @ids = $db->ids([@ids])
+ Function: get/set matching ids
+ Returns : array of sequence ids
+ Args    : (optional) array ref with new set of ids
+
+=cut
+
+sub ids     {
+  my $self = shift;
+  $self->throw_not_implemented;
+}
+
+=head2 query
+
+ Title   : query
+ Usage   : $query = $db->query([$query])
+ Function: get/set query string
+ Returns : string
+ Args    : (optional) new query string
+
+=cut
+
+sub query   {
+  my $self = shift;
+  $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/RandomAccessI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/RandomAccessI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/RandomAccessI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+# POD documentation - main docs before the code
+#
+# $Id: RandomAccessI.pm,v 1.14.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+
+=head1 NAME
+
+Bio::DB::RandomAccessI - Abstract interface for a sequence database
+
+=head1 SYNOPSIS
+
+  #
+  # get a database object somehow using a concrete class
+  #
+
+  $seq = $db->get_Seq_by_id('ROA1_HUMAN');
+
+  #
+  # $seq is a Bio::Seq object
+  #
+
+=head1 DESCRIPTION
+
+This is a pure interface class - in other words, all this does is define
+methods which other (concrete) classes will actually implement.
+
+The Bio::DB::RandomAccessI class defines what methods a generic database class
+should have. At the moment it is just the ability to make Bio::Seq objects
+from a name (id) or a accession number.
+
+=head1 CONTACT
+
+Ewan Birney E<lt>birney at ebi.ac.ukE<gt> originally wrote this class.
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::DB::RandomAccessI;
+
+use strict;
+
+use Bio::Root::RootI;
+
+use base qw(Bio::Root::Root);
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object or undef if not found
+ Args    : the id (as a string) of a sequence,
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+           $seq = $db->get_Seq_by_acc(Locus => 'X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object or undef if not found
+ Args    : accession number (as a string), or a two
+               element list consisting of namespace=>accession
+ Throws  : "more than one sequences correspond to this accession"
+            if the accession maps to multiple primary ids and
+            method is called in a scalar context
+
+NOTE: The two-element form allows you to choose the namespace for the
+accession number.
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+
+sub get_Seq_by_version{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+## End of Package
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/RefSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/RefSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/RefSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,156 @@
+#
+# $Id: RefSeq.pm,v 1.13.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::EMBL
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::RefSeq - Database object interface for RefSeq retrieval
+
+=head1 SYNOPSIS
+
+  use Bio::DB::RefSeq;
+
+  $db = new Bio::DB::RefSeq;
+
+  # most of the time RefSeq_ID eq RefSeq acc
+  $seq = $db->get_Seq_by_id('NM_006732'); # RefSeq ID
+  print "accession is ", $seq->accession_number, "\n";
+
+  # or changeing to accession number and Fasta format ...
+  $db->request_format('fasta');
+  $seq = $db->get_Seq_by_acc('NM_006732'); # RefSeq ACC
+  print "seq is ", $seq->seq, "\n";
+
+  # especially when using versions, you better be prepared
+  # in not getting what what want
+  eval {
+      $seq = $db->get_Seq_by_version('NM_006732.1'); # RefSeq VERSION
+  };
+  print "accesion is ", $seq->accession_number, "\n" unless $@;
+
+  # or ... best when downloading very large files, prevents
+  # keeping all of the file in memory
+
+  # also don't want features, just sequence so let's save bandwith
+  # and request Fasta sequence
+  $db = new Bio::DB::RefSeq(-retrievaltype => 'tempfile' ,
+ 			       -format => 'fasta');
+  my $seqio = $db->get_Stream_by_id(['NM_006732', 'NM_005252'] );
+  while( my $seq  =  $seqio->next_seq ) {
+ 	print "seqid is ", $seq->id, "\n";
+  }
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of sequence objects L<Bio::Seq> from the
+RefSeq database using the dbfetch script at EBI:
+L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
+
+In order to make changes transparent we have host type (currently only
+ebi) and location (defaults to ebi) separated out.  This allows later
+additions of more servers in different geographical locations.
+
+The functionality of this module is inherited from L<Bio::DB::DBFetch>
+which implements L<Bio::DB::WebDBSeqI>.
+
+This module retrieves entries from EBI although it
+retrives database entries produced at NCBI. When read into bioperl
+objects, the parser for GenBank format it used. RefSeq is a
+NONSTANDARD GenBank file so be ready for surprises.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::RefSeq;
+use strict;
+use vars qw($MODVERSION %HOSTS %FORMATMAP $DEFAULTFORMAT);
+
+$MODVERSION = '0.1';
+
+use base qw(Bio::DB::DBFetch);
+
+BEGIN {
+    # you can add your own here theoretically.
+    %HOSTS = (
+	       'dbfetch' => {
+		   baseurl => 'http://%s/cgi-bin/dbfetch?db=refseq&style=raw',
+		   hosts   => {
+		       'ebi'  => 'www.ebi.ac.uk'
+		       }
+	       }
+	      );
+    %FORMATMAP = ( 'embl'    => 'embl',
+		   'genbank' => 'genbank',
+		   'fasta' => 'fasta'
+		   );
+    $DEFAULTFORMAT = 'genbank';
+}
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    $self->{ '_hosts' } = {};
+    $self->{ '_formatmap' } = {};
+
+    $self->hosts(\%HOSTS);
+    $self->formatmap(\%FORMATMAP);
+    $self->{'_default_format'} = $DEFAULTFORMAT;
+
+    return $self;
+}
+
+
+sub get_seq_stream {
+   my ($self,%qualifiers) = @_;
+   if( exists $qualifiers{'-uids'} ) {
+       if( ref($qualifiers{'-uids'}) =~ /ARRAY/i ) {
+	   foreach my $u ( @{$qualifiers{'-uids'}} ) {
+	       $u =~ s/^(\S+)\|//;
+	   }
+       } else { 
+	   $qualifiers{'-uids'} =~ s/^(\S+)\|//;
+       }
+   }
+   $self->SUPER::get_seq_stream(%qualifiers);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/ReferenceI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/ReferenceI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/ReferenceI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+# $Id: ReferenceI.pm,v 1.1.2.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::ReferenceI
+#
+# Cared for by Chris Fields <cjfields at uiuc dot edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::ReferenceI - A RandomAccessI-like abstract interface for
+retrieving Reference data from a sequence database and returning
+Bio::Annotation::Reference objects 
+
+=head1 SYNOPSIS
+
+  #
+  # get a database object somehow using a concrete class
+  #
+
+  $ref = $db->get_Reference_by_id('123456');
+
+  #
+  # $ref is a Bio::Annotation::Reference object
+  #
+
+=head1 DESCRIPTION
+
+This is a pure interface class - in other words, all this does is define
+methods which other (concrete) classes will actually implement. 
+
+The Bio::DB::ReferenceI class defines methods used to retrieve reference data
+from a sequence.  This is returned in the form of Bio::Annotation::Reference
+objects.
+
+At the moment it is just the ability to make Bio::Annotation::Reference
+objects from a name or unique id (id), an accession number (acc), and so on.
+
+=head1 CONTACT
+
+Ewan Birney originally wrote Bio::DB::RandomAccessI, from which this class
+is based.
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the 
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at lists.open-bio.org               - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Email cjfields at uiuc dot edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::ReferenceI;
+
+use strict;
+
+=head2 get_Reference_by_id
+
+ Title   : get_Reference_by_id
+ Usage   : $ref = $db->get_Reference_by_id('123456')
+ Function: Gets a Bio::Annotation::Reference-implementing object by its name (id)
+ Returns : a Bio::Annotation::Reference object or undef if not found
+ Args    : the id (as a string) of a sequence
+
+=cut
+
+sub get_Reference_by_id{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Reference_by_acc
+
+ Title   : get_Reference_by_acc
+ Usage   : $ref = $db->get_Reference_by_acc('X77802');
+ Function: Gets a Bio::Annotation::Reference object by accession number
+ Returns : A Bio::Annotation::Reference object or undef if not found
+ Args    : accession number (as a string)
+ Throws  : "more than one sequences correspond to this accession"
+            if the accession maps to multiple primary ids and
+            method is called in a scalar context
+
+=cut
+
+sub get_Reference_by_acc{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Reference_by_version
+
+ Title   : get_Reference_by_version
+ Usage   : $ref = $db->get_Reference_by_version('X77802.1');
+ Function: Gets a Bio::Annotation::Reference object by sequence version
+ Returns : A Bio::Annotation::Reference object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Reference_by_version{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+## End of Package
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Registry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Registry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Registry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,289 @@
+# $Id: Registry.pm,v 1.32.4.4 2006/11/30 09:24:00 sendu Exp $
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Registry - Access to the Open Bio Database Access registry scheme
+
+=head1 SYNOPSIS
+
+    use Bio::DB::Registry();
+
+    $registry = new Bio::DB::Registry();
+
+    @available_services = $registry->services;
+
+    $db = $registry->get_database('embl');
+    # $db is a Bio::DB::SeqI implementing class
+
+    $seq = $db->get_Seq_by_acc("J02231");
+
+=head1 DESCRIPTION
+
+This module provides access to the Open Bio Database Access (OBDA)
+scheme, which provides a single cross-language and cross-platform 
+specification of how to get to databases. These databases may be 
+accessible through the Web, they may be BioSQL databases, or
+they may be local, indexed flatfile databases.
+
+If the user or system administrator has not installed the default init 
+file, seqdatabase.ini, in /etc/bioinformatics or ${HOME}/.bioinformatics 
+then creating the first Registry object copies the default settings from 
+the www.open-bio.org. The Registry object will attempt to store these 
+settings in a new file, ${HOME}/.bioinformatics/seqdatabase.ini.
+
+Users can specify one or more custom locations for the init file by 
+setting $OBDA_SEARCH_PATH to those directories, where multiple 
+directories should be separated by ';'.
+
+Please see the OBDA Access HOWTO for more information
+(L<http://bioperl.open-bio.org/wiki/HOWTO:OBDA>).
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Registry;
+
+use vars qw($OBDA_SPEC_VERSION $OBDA_SEARCH_PATH
+			   $HOME $PRIVATE_DIR $PUBLIC_DIR $REGISTRY 
+			   $FALLBACK_REGISTRY);
+use strict;
+
+use Bio::DB::Failover;
+use Bio::Root::HTTPget;
+use base qw(Bio::Root::Root);
+
+BEGIN {
+   $OBDA_SPEC_VERSION = 1.0;
+	$HOME = $ENV{HOME} if (defined $ENV{HOME});
+	if (defined $ENV{OBDA_SEARCH_PATH}) {
+		$OBDA_SEARCH_PATH = $ENV{OBDA_SEARCH_PATH} || '';
+   }
+}
+
+my %implement = ('flat'     => 'Bio::DB::Flat',
+					   'biosql'   => 'Bio::DB::BioSQL::OBDA',
+					   'biofetch' => 'Bio::DB::BioFetch'
+					   # 'biocorba' => 'Bio::CorbaClient::SeqDB',
+					   );
+
+$FALLBACK_REGISTRY = 'http://www.open-bio.org/registry/seqdatabase.ini';
+$PRIVATE_DIR = '.bioinformatics';
+$PUBLIC_DIR = '/etc/bioinformatics';
+$REGISTRY = 'seqdatabase.ini';
+
+sub new {
+    my ($class, at args) = shift;
+    my $self = $class->SUPER::new(@args);
+    # open files in order
+    $self->{'_dbs'} = {};
+    $self->_load_registry();
+    return $self;
+}
+
+=head2 _load_registry
+
+ Title   : _load_registry
+ Usage   :
+ Function: Looks for seqdatabase.ini files in the expected locations and
+           in the directories specified by $OBDA_SEARCH_PATH. If no files
+           are found download a default file from www.open-bio.org
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub _load_registry {
+   my $self = shift;
+	eval { $HOME = (getpwuid($>))[7]; } unless $HOME;
+	if ($@) {
+		$self->warn("This Perl doesn't implement function getpwuid(), no \$HOME");
+	}
+	my @ini_files = $self->_get_ini_files();
+
+	@ini_files = $self->_make_private_registry() unless (@ini_files);
+
+   my ($db,$hash) = ();
+   for my $file (@ini_files) {
+      open my $FH,"$file";
+      while( <$FH> ) {
+			if (/^VERSION=([\d\.]+)/) {
+				if ($1 > $OBDA_SPEC_VERSION or !$1) {
+					$self->throw("Do not know about this version [$1] > $OBDA_SPEC_VERSION");
+					last;
+				}
+				next;
+         }
+			next if( /^#/ );
+			next if( /^\s/ );
+			if ( /^\[(\S+)\]/ ) {
+				$db = $1;
+				next;
+			}
+			my ($tag,$value) = split('=',$_);
+			$value =~ s/\s//g;
+			$tag =~ s/\s//g;
+			$hash->{$db}->{"\L$tag"} = $value;
+      }
+   }
+
+   for my $db ( keys %{$hash} ) {
+      if ( !exists $self->{'_dbs'}->{$db} ) {
+			my $failover = Bio::DB::Failover->new();
+			$self->{'_dbs'}->{$db} = $failover;
+      }
+      my $class;
+      if (defined $implement{$hash->{$db}->{'protocol'}}) {
+			$class = $implement{$hash->{$db}->{'protocol'}};
+      } else {
+			$self->warn("Registry does not support protocol " .
+							$hash->{$db}->{'protocol'});
+			next;
+      }
+      eval "require $class";
+      if ($@) {
+			$self->warn("Couldn't load $class");
+			next;
+      } else {
+			eval {
+				my $randi = $class->new_from_registry( %{$hash->{$db}} );
+				$self->{'_dbs'}->{$db}->add_database($randi); 
+			};
+			if ($@) {
+				$self->warn("Couldn't call new_from_registry() on [$class]\n$@");
+			}
+      }
+   }
+}
+
+
+=head2 get_database
+
+ Title   : get_database
+ Usage   : my $db = $registry->get_database($dbname);
+ Function: Retrieve a Database object which implements Bio::DB::SeqI interface
+ Returns : Bio::DB::SeqI object
+ Args    : string describing the name of the database
+
+=cut
+
+sub get_database {
+	my ($self,$dbname) = @_;
+
+	$dbname = lc $dbname;
+	if( !defined $dbname ) {
+		$self->warn("must get_database with a database name");
+		return;
+	}
+	if( !exists $self->{'_dbs'}->{$dbname} ) {
+		$self->warn("No database with name $dbname in Registry");
+		return;
+	}
+	return $self->{'_dbs'}->{$dbname};
+}
+
+=head2 services
+
+ Title   : services
+ Usage   : my @available = $registry->services();
+ Function: returns list of possible services
+ Returns : list of strings
+ Args    : none
+
+=cut
+
+sub services {
+    my ($self) = @_;
+    return () unless ( defined $self->{'_dbs'} &&
+		       ref( $self->{'_dbs'} ) =~ /HASH/i);
+    return keys %{$self->{'_dbs'}};
+}
+
+=head2 _get_ini_files
+
+ Title   : _get_ini_files
+ Usage   : my @files = $self->_get_ini_files
+ Function: To find all the seqdatabase.ini files
+ Returns : list of seqdatabase.ini paths
+ Args    : None
+
+=cut
+
+sub _get_ini_files {
+	my $self = shift;
+   my @ini_files = ();
+   if ( $OBDA_SEARCH_PATH ) {
+      foreach my $dir ( split /;/, $OBDA_SEARCH_PATH ) {
+			my $file = $dir . "/" . $REGISTRY;
+			next unless -e $file;
+			push @ini_files,$file;
+      }
+   }
+   push @ini_files,"$HOME/$PRIVATE_DIR/$REGISTRY" 
+     if ( $HOME && -e "$HOME/$PRIVATE_DIR/$REGISTRY" );
+   push @ini_files, "$PUBLIC_DIR/$REGISTRY"
+     if ( -e "$PUBLIC_DIR/$REGISTRY" );
+   @ini_files;
+}
+
+=head2 _make_private_registry
+
+ Title   : _make_private_registry
+ Usage   :
+ Function: Make private registry in file in $HOME
+ Returns : Path to private registry file
+ Args    : None
+
+=cut
+
+sub _make_private_registry {
+	my $self = shift;
+   my @ini_file;
+
+	my $nor_in = $OBDA_SEARCH_PATH ? 
+	  "nor in directory specified by\n$OBDA_SEARCH_PATH" : 
+	  "and environment variable OBDA_SEARCH_PATH wasn't set";
+
+	$self->warn("No $REGISTRY file found in $HOME/$PRIVATE_DIR/\n" . 
+					"nor in $PUBLIC_DIR $nor_in.\n" .
+					"Using web to get registry from\n$FALLBACK_REGISTRY");
+
+	# Last gasp. Try to use HTTPget module to retrieve the registry from
+	# the web...
+	my $f = Bio::Root::HTTPget::getFH($FALLBACK_REGISTRY);
+
+	# store the default registry file
+	eval {
+		mkdir "$HOME/$PRIVATE_DIR" unless -e "$HOME/$PRIVATE_DIR";
+	};
+	$self->throw("Could not make directory $HOME/$PRIVATE_DIR, " .
+					 "no $REGISTRY file available") if $@;
+
+	open(my $F,">$HOME/$PRIVATE_DIR/$REGISTRY");
+	print $F while (<$F>);
+	close $F;
+
+	$self->warn("Stored $REGISTRY file in $HOME/$PRIVATE_DIR");
+
+	push @ini_file,"$HOME/$PRIVATE_DIR/$REGISTRY";
+	@ini_file;
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,762 @@
+package Bio::DB::SeqFeature::NormalizedFeature;
+
+# $Id: NormalizedFeature.pm,v 1.13.4.5 2006/11/08 17:25:54 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::NormalizedFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
+
+=head1 SYNOPSIS
+
+ use Bio::DB::SeqFeature::Store;
+ # Open the sequence database
+ my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
+                                                -dsn     => 'dbi:mysql:test');
+ my ($feature)   = $db->get_features_by_name('ZK909');
+ my @subfeatures = $feature->get_SeqFeatures();
+ my @exons_only  = $feature->get_SeqFeatures('exon');
+
+ # create a new object
+ $db->seqfeature_class('Bio::DB::SeqFeature::NormalizedFeature');
+ my $new = $db->new_feature(-primary_tag=>'gene',
+                            -seq_id     => 'chr3',
+                            -start      => 10000,
+                            -end        => 11000);
+
+ # add a new exon
+ $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
+                                           -seq_id     => 'chr3',
+                                           -start      => 5000,
+                                           -end        => 5551));
+
+=head1 DESCRIPTION
+
+The Bio::DB::SeqFeature::NormalizedFeature object is an alternative
+representation of SeqFeatures for use with Bio::DB::SeqFeature::Store
+database system. It is identical to Bio::DB::SeqFeature, except that
+instead of storing feature/subfeature relationships in a database
+table, the information is stored in the object itself. This actually
+makes the objects somewhat inconvenient to work with from SQL, but
+does speed up access somewhat.
+
+To use this class, pass the name of the class to the
+Bio::DB::SeqFeature::Store object's seqfeature_class() method. After
+this, $db-E<gt>new_feature() will create objects of type
+Bio::DB::SeqFeature::NormalizedFeature. If you are using the GFF3
+loader, pass Bio::DB::SeqFeature::Store::GFF3Loader-E<gt>new() the
+-seqfeature_class argument:
+
+  use Bio::DB::SeqFeature::Store::GFF3Loader;
+
+  my $store  = connect_to_db_somehow();
+  my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(
+                -store=>$db,
+                -seqfeature_class => 'Bio::DB::SeqFeature::NormalizedFeature'
+               );
+
+=cut
+
+use strict;
+use Carp 'croak';
+use base 'Bio::Graphics::FeatureBase';
+use base 'Bio::DB::SeqFeature::NormalizedFeatureI';
+use overload '""' => \&as_string,
+              eq  => \&eq,
+              ne  => \&ne,
+              fallback => 1;
+
+use vars '$AUTOLOAD';
+
+my $USE_OVERLOADED_NAMES     = 1;
+
+# some of this is my fault and some of it is changing bioperl API
+*get_all_SeqFeatures = *sub_SeqFeature = *merged_segments = \&segments;
+
+##### CLASS METHODS ####
+
+=head2 new
+
+ Title   : new
+ Usage   : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
+ Function: create a new feature
+ Returns : the new seqfeature
+ Args    : see below
+ Status  : public
+
+This method creates and, if possible stores into a database, a new
+Bio::DB::SeqFeature::NormalizedFeature object using the specialized
+Bio::DB::SeqFeature class.
+
+The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
+Bio::Graphics::Feature-E<gt>new(). The most important difference is the
+B<-store> option, which if present creates the object in a
+Bio::DB::SeqFeature::Store database, and he B<-index> option, which
+controls whether the feature will be indexed for retrieval (default is
+true). Ordinarily, you would only want to turn indexing on when
+creating top level features, and off only when storing
+subfeatures. The default is on.
+
+Arguments are as follows:
+
+  -seq_id       the reference sequence
+  -start        the start position of the feature
+  -end          the stop position of the feature
+  -display_name the feature name (returned by seqname)
+  -primary_tag  the feature type (returned by primary_tag)
+  -source       the source tag
+  -score        the feature score (for GFF compatibility)
+  -desc         a description of the feature
+  -segments     a list of subfeatures (see Bio::Graphics::Feature)
+  -subtype      the type to use when creating subfeatures
+  -strand       the strand of the feature (one of -1, 0 or +1)
+  -phase        the phase of the feature (0..2)
+  -url          a URL to link to when rendered with Bio::Graphics
+  -attributes   a hashref of tag value attributes, in which the key is the tag
+                  and the value is an array reference of values
+  -store        a previously-opened Bio::DB::SeqFeature::Store object
+  -index        index this feature if true
+
+Aliases:
+
+  -id           an alias for -display_name
+  -seqname      an alias for -display_name
+  -display_id   an alias for -display_name
+  -name         an alias for -display_name
+  -stop         an alias for end
+  -type         an alias for primary_tag
+
+=cut
+
+sub new {
+  my $class = shift;
+  my %args  = @_;
+  my $db      = $args{-store} || $args{-factory};
+  my $index = exists $args{-index} ? $args{-index} : 1;
+  my $self  = $class->SUPER::new(@_);
+
+  if ($db) {
+    if ($index) {
+      $db->store($self); # this will set the primary_id
+    } else {
+      $db->store_noindex($self); # this will set the primary_id
+    }
+    $self->object_store($db);
+  }
+  $self;
+}
+
+=head2 Bio::SeqFeatureI methods
+
+The following Bio::SeqFeatureI methods are supported:
+
+ seq_id(), start(), end(), strand(), get_SeqFeatures(),
+ display_name(), primary_tag(), source_tag(), seq(),
+ location(), primary_id(), overlaps(), contains(), equals(),
+ intersection(), union(), has_tag(), remove_tag(),
+ add_tag_value(), get_tag_values(), get_all_tags()
+
+Some methods that do not make sense in the context of a genome
+annotation database system, such as attach_seq(), are not supported.
+
+Please see L<Bio::SeqFeatureI> for more details.
+
+=cut
+
+sub seq {
+  my $self = shift;
+
+  require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
+
+  my ($start,$end) = ($self->start,$self->end);
+  if ($self->strand < 0) {
+    ($start,$end) = ($end,$start);
+  }
+
+  if (my $store = $self->object_store) {
+    return Bio::PrimarySeq->new(-seq => $store->fetch_sequence($self->seq_id,$start,$end) || '',
+				-id  => $self->display_name);
+  } else {
+    return $self->SUPER::seq($self->seq_id,$start,$end);
+  }
+}
+
+sub subseq {
+  my $self = shift;
+  my ($newstart,$newstop) = @_;
+  my $store = $self->object_store or return;
+  my ($start,$stop) = ($self->start+$newstart-1,$self->end+$newstop-1);
+  if ($self->strand < 0) {
+    ($start,$stop) = ($stop,$start);
+  }
+  my $seq = $store->fetch_sequence($self->seq_id,$start,$stop);
+  return Bio::PrimarySeq->new($seq);
+}
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $flag = $feature->add_SeqFeature(@features)
+ Function: Add subfeatures to the feature
+ Returns : true if successful
+ Args    : list of Bio::SeqFeatureI objects
+ Status  : public
+
+Add one or more subfeatures to the feature. For best results,
+subfeatures should be of the same class as the parent feature
+(i.e. don't try mixing Bio::DB::SeqFeature::NormalizedFeature with
+other feature types).
+
+An alias for this method is add_segment().
+
+=cut
+
+sub add_SeqFeature {
+  my $self = shift;
+  $self->_add_segment(1, at _);
+}
+
+=head2 update
+
+ Title   : update
+ Usage   : $flag = $feature->update()
+ Function: Update feature in the database
+ Returns : true if successful
+ Args    : none
+ Status  : public
+
+After changing any fields in the feature, call update() to write it to
+the database. This is not needed for add_SeqFeature() as update() is
+invoked automatically.
+
+=cut
+
+sub update {
+  my $self = shift;
+  my $store = $self->object_store or return;
+  $store->store($self);
+}
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeature
+ Usage   : @subfeatures = $feature->get_SeqFeatures([@types])
+ Function: return subfeatures of this feature
+ Returns : list of subfeatures
+ Args    : list of subfeature primary_tags (optional)
+ Status  : public
+
+This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
+allowing you to pass a list of primary_tags, in which case only
+subfeatures whose primary_tag is contained on the list will be
+returned. Without any types passed all subfeatures are returned.
+
+=cut
+
+
+# segments can be either normalized IDs or ordinary feature objects
+sub get_SeqFeatures {
+  my $self = shift;
+  my @types        = @_;
+
+  my $s     = $self->{segments} or return;
+  my $store = $self->object_store;
+  my (@ordinary, at ids);
+  for (@$s) {
+    if (ref ($_)) {
+      push @ordinary,$_;
+    } else {
+      push @ids,$_;
+    }
+  }
+  my @r = grep {$_->type_match(@types)} (@ordinary,$store->fetch_many(\@ids));
+  for my $r (@r) {
+    eval {$r->object_store($store) };
+  }
+  return @r;
+}
+
+=head2 object_store
+
+ Title   : object_store
+ Usage   : $store = $feature->object_store([$new_store])
+ Function: get or set the database handle
+ Returns : current database handle
+ Args    : new database handle (optional)
+ Status  : public
+
+This method will get or set the Bio::DB::SeqFeature::Store object that
+is associated with the feature. After changing the store, you should
+probably unset the feature's primary_id() and call update() to ensure
+that the object is written into the database as a new feature.
+
+=cut
+
+sub object_store {
+  my $self = shift;
+  my $d = $self->{store};
+  $self->{store} = shift if @_;
+  $d;
+}
+
+
+=head2 overloaded_names
+
+ Title   : overloaded_names
+ Usage   : $overload = $feature->overloaded_names([$new_overload])
+ Function: get or set overloading of object strings
+ Returns : current flag
+ Args    : new flag (optional)
+ Status  : public
+
+For convenience, when objects of this class are stringified, they are
+represented in the form "primary_tag(display_name)". To turn this
+feature off, call overloaded_names() with a false value. You can
+invoke this on an individual feature object or on the class:
+
+  Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
+
+=cut
+
+
+sub overloaded_names {
+  my $class = shift;
+  my $d     = $USE_OVERLOADED_NAMES;
+  $USE_OVERLOADED_NAMES = shift if @_;
+  $d;
+}
+
+=head2 segment
+
+ Title   : segment
+ Usage   : $segment = $feature->segment
+ Function: return a Segment object corresponding to feature
+ Returns : a Bio::DB::SeqFeature::Segment
+ Args    : none
+ Status  : public
+
+This turns the feature into a Bio::DB::SeqFeature::Segment object,
+which you can then use to query for overlapping features. See
+L<Bio::DB::SeqFeature::Segment>.
+
+=cut
+
+sub segment  {
+  my $self = shift;
+  return Bio::DB::SeqFeature::Segment->new($self);
+}
+
+### instance methods
+
+=head2 AUTOLOADED methods
+
+ @subfeatures = $feature->Exon;
+
+If you use an unknown method that begins with a capital letter, then
+the feature autogenerates a call to get_SeqFeatures() using the
+lower-cased method name as the primary_tag. In other words
+$feature-E<gt>Exon is equivalent to:
+
+ @subfeature s= $feature->get_SeqFeatures('exon')
+
+If you use an unknown method that begins with Tag_(tagname),
+Att_(tagname) Is_(tagname), then it will be the same as calling the
+each_tag_value() method with the tagname. In a list context, these
+autogenerated procedures return the list of results. In scalar
+context, they return the first item in the list!!
+
+=cut
+
+
+sub AUTOLOAD {
+  my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
+  my $sub = $AUTOLOAD;
+  my $self = $_[0];
+
+  # ignore DESTROY calls
+  return if $func_name eq 'DESTROY';
+
+  # call attributes if func_name begins with "Tag_" or "Att_":
+  if ($func_name =~ /^(Tag|Att|Is)_(\w+)/) {
+    my @result = $self->each_tag_value($2);
+    return wantarray ? @result : $result[0];
+  }
+
+  # fetch subfeatures if func_name has an initial cap
+  if ($func_name =~ /^[A-Z]/) {
+    return $self->get_SeqFeatures(lc $func_name);
+  }
+
+  # error message of last resort
+  $self->throw(qq(Can't locate object method "$func_name" via package "$pack"));
+}#'
+
+
+sub add_segment {
+  my $self = shift;
+  $self->_add_segment(0, at _);
+}
+
+# This adds subfeatures. It has the property of converting the
+# provided features into an object like itself and storing them
+# into the database. If the feature already has a primary id and
+# an object_store() method, then it is not stored into the database,
+# but its primary id is reused.
+sub _add_segment {
+  my $self       = shift;
+  my $normalized = shift;
+  my $store      = $self->object_store;
+
+  my @segments   = $self->_create_subfeatures($normalized, at _);
+
+  # fix boundaries
+  $self->_fix_boundaries(\@segments,$normalized);
+
+  # freakish fixing of our non-standard Target attribute
+  $self->_fix_target(\@segments);
+
+  $self->update if $self->primary_id; # write us back to disk
+}
+
+sub _fix_boundaries {
+  my $self     = shift;
+  my ($segments,$normalized) = @_;
+
+  my $min_start = $self->start ||  999_999_999_999;
+  my $max_stop  = $self->end   || -999_999_999_999;
+
+  for my $seg (@$segments) {
+    $min_start     = $seg->start if $seg->start < $min_start;
+    $max_stop      = $seg->end   if $seg->end   > $max_stop;
+    my $id_or_seg  = $normalized ? $seg->primary_id : $seg;
+    defined $id_or_seg or $self->throw("No primary ID when there should be");
+    push @{$self->{segments}},$id_or_seg;
+  }
+
+  # adjust our boundaries, etc.
+  $self->start($min_start) if $min_start < $self->start;
+  $self->end($max_stop)    if $max_stop  > $self->end;
+  $self->{ref}           ||= $segments->[0]->seq_id;
+  $self->{strand}        ||= $segments->[0]->strand;
+}
+
+sub _fix_target {
+  my $self = shift;
+  my $segs = shift;
+
+  # freakish fixing of our non-standard Target attribute
+  if (my $t = ($self->attributes('Target'))[0]) {
+    my ($seqid,$tstart,$tend,$strand) = split /\s+/,$t;
+    my $min_tstart = $tstart;
+    my $max_tend   = $tend;
+    for my $seg (@$segs) {
+      my $st = ($seg->attributes('Target'))[0] or next;
+      (undef,$tstart,$tend) = split /\s+/,$st;
+      $min_tstart     = $tstart if $tstart < $min_tstart;
+      $max_tend       = $tend   if $tend   > $max_tend;
+    }
+    if ($min_tstart < $tstart or $max_tend > $tend) {
+      $self->{attributes}{Target}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
+    }
+  }
+}
+
+# undo the load_id and Target hacks on the way out
+sub format_attributes {
+  my $self   = shift;
+  my $parent = shift;
+  my $load_id   = $self->load_id || '';
+  my ($target)  = split /\s+/,($self->attributes('Target'))[0];
+  $target ||= '';
+  my @tags = $self->all_tags;
+  my @result;
+  for my $t (@tags) {
+    my @values = $self->each_tag_value($t);
+    @values = grep {$_ ne $load_id && $_ ne $target} @values if $t eq 'Alias';
+    # these are hacks, which we don't want to appear in the file
+    next if $t eq 'load_id';
+    next if $t eq 'parent_id';
+    foreach (@values) { s/\s+$// } # get rid of trailing whitespace
+
+    push @result,join '=',$self->escape($t),$self->escape($_) foreach @values;
+  }
+  my $id   = $self->primary_id;
+  my $name = $self->display_name;
+  push @result,"ID=".$self->escape($id)                     if defined $id;
+  push @result,"Parent=".$self->escape($parent->primary_id) if defined $parent;
+  push @result,"Name=".$self->escape($name)                   if defined $name;
+  return join ';', at result;
+}
+
+sub _create_subfeatures {
+  my $self = shift;
+  my $normalized = shift;
+
+  my $type = $self->{subtype} || $self->{type};
+  my $ref   = $self->seq_id;
+  my $name  = $self->name;
+  my $class = $self->class;
+  my $store = $self->object_store
+    or $self->throw("Feature must be associated with a Bio::DB::SeqFeature::Store database before attempting to add subfeatures");
+
+  my $index_subfeatures_policy = $store->index_subfeatures;
+
+  my @segments;
+
+  for my $seg (@_) {
+
+    if (UNIVERSAL::isa($seg,ref $self)) {
+
+      if (!$normalized) {  # make sure the object has no lazy behavior
+	$seg->primary_id(undef);
+	$seg->object_store(undef);
+      }
+      push @segments,$seg;
+    }
+
+    elsif (ref($seg) eq 'ARRAY') {
+      my ($start,$stop) = @{$seg};
+      next unless defined $start && defined $stop;  # fixes an obscure bug somewhere above us
+      my $strand = $self->{strand};
+
+      if ($start > $stop) {
+	($start,$stop) = ($stop,$start);
+	$strand = -1;
+      }
+      push @segments,$self->new(-start  => $start,
+				-stop   => $stop,
+				-strand => $strand,
+				-ref    => $ref,
+				-type   => $type,
+			        -name   => $name,
+			        -class  => $class,
+			       );
+    }
+
+
+    elsif (UNIVERSAL::isa($seg,'Bio::SeqFeatureI')) {
+      my $score = $seg->score if $seg->can('score');
+      my $f = $self->new(-start  => $seg->start,
+			 -end    => $seg->end,
+			 -strand => $seg->strand,
+			 -seq_id => $seg->seq_id,
+			 -name   => $seg->display_name,
+			 -primary_tag => $seg->primary_tag,
+			 -source_tag  => $seg->source,
+			 -score       => $score,
+			);
+      for my $tag ($seg->get_all_tags) {
+	my @values = $seg->get_tag_values($tag);
+	$f->{attributes}{$tag} = \@values;
+      }
+      push @segments,$f;
+    }
+
+    else {
+      croak "$seg is neither a Bio::SeqFeatureI object nor an arrayref";
+    }
+  }
+
+  return unless @segments;
+
+  if ($normalized && $store) {  # parent/child data is going to be stored in the database
+
+    my @need_loading = grep {!defined $_->primary_id || $_->object_store ne $store} @segments;
+    if (@need_loading) {
+      my $result;
+      if ($index_subfeatures_policy) {
+	$result = $store->store(@need_loading);
+      } else {
+	$result = $store->store_noindex(@need_loading);
+      }
+      $result or croak "Couldn't store one or more subseqfeatures";
+    }
+  }
+
+  return @segments;
+}
+
+=head2 load_id
+
+ Title   : load_id
+ Usage   : $id = $feature->load_id
+ Function: get the GFF3 load ID
+ Returns : the GFF3 load ID (string)
+ Args    : none
+ Status  : public
+
+For features that were originally loaded by the GFF3 loader, this
+method returns the GFF3 load ID. This method may not be supported in
+future versions of the module.
+
+=cut
+
+sub load_id {
+  return (shift->attributes('load_id'))[0];
+}
+
+
+=head2 notes
+
+ Title   : notes
+ Usage   : @notes = $feature->notes
+ Function: get contents of the GFF3 Note tag
+ Returns : List of GFF3 Note tags
+ Args    : none
+ Status  : public
+
+For features that were originally loaded by the GFF3 loader, this
+method returns the contents of the Note tag as a list. This is a
+convenience for Bio::Graphics, which looks for notes() when it
+constructs a default description line.
+
+=cut
+
+sub notes {
+  return shift->attributes('Note');
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $id = $feature->primary_id([$new_id])
+ Function: get/set the feature's database ID
+ Returns : the current primary ID
+ Args    : none
+ Status  : public
+
+This method gets or sets the primary ID of the feature in the
+underlying Bio::DB::SeqFeature::Store database. If you change this
+field and then call update(), it will have the effect of making a copy
+of the feature in the database under a new ID.
+
+=cut
+
+sub primary_id {
+  my $self = shift;
+  my $d    = $self->{primary_id};
+  $self->{primary_id} = shift if @_;
+  $d;
+}
+
+=head2 target
+
+ Title   : target
+ Usage   : $segment = $feature->target
+ Function: return the segment correspondent to the "Target" attribute
+ Returns : a Bio::DB::SeqFeature::Segment object
+ Args    : none
+ Status  : public
+
+For features that are aligned with others via the GFF3 Target
+attribute, this returns a segment corresponding to the aligned
+region. The CIGAR gap string is not yet supported.
+
+=cut
+
+sub target {
+  my $self    = shift;
+  my @targets = $self->attributes('Target');
+  my @result;
+  for my $t (@targets) {
+    my ($seqid,$start,$end,$strand) = split /\s+/,$t;
+    $strand ||= '';
+    $strand = $strand eq '+' ? 1
+              : $strand eq '-' ? -1
+	      : 0;
+    push @result,Bio::DB::SeqFeature::Segment->new($self->object_store,
+						   $seqid,
+						   $start,
+						   $end,
+						   $strand);
+  }
+  return wantarray ? @result : $result[0];
+}
+
+=head2 Internal methods
+
+=over 4
+
+=item $feature-E<gt>as_string()
+
+Internal method used to implement overloaded stringification.
+
+=item $boolean = $feature-E<gt>type_match(@list_of_types)
+
+Internal method that will return true if the feature's primary_tag and
+source_tag match any of the list of types (in primary_tag:source_tag
+format) provided.
+
+=back
+
+=cut
+
+sub as_string {
+  my $self = shift;
+  return overload::StrVal($self) unless $self->overloaded_names;
+  my $name   = $self->display_name || $self->load_id || "id=".$self->primary_id;
+  my $method = $self->primary_tag;
+  my $source= $self->source_tag;
+  my $type  = $source ? "$method:$source" : $method;
+  return "$type($name)";
+}
+
+sub eq {
+  my $self = shift;
+  my $b    = shift;
+  my $store1 = $self->object_store;
+  my $store2 = eval {$b->object_store} || '';
+  return $store1 eq $store2 && $self->primary_id eq $b->primary_id;
+}
+
+sub ne {
+  my $self = shift;
+  return !$self->eq(shift);
+}
+
+# completely case insensitive
+sub type_match {
+  my $self = shift;
+  my @types = @_;
+  my $method = lc $self->primary_tag;
+  my $source = lc $self->source_tag;
+  for my $t (@types) {
+    my ($m,$s) = map {lc $_} split /:/,$t;
+    return 1 if $method eq $m && (!defined $s || $source eq $s);
+  }
+  return;
+}
+
+sub segments { shift->get_SeqFeatures(@_) }
+
+1;
+
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeatureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeatureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedFeatureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+package Bio::DB::SeqFeature::NormalizedFeatureI;
+
+# $Id: NormalizedFeatureI.pm,v 1.2 2006/05/18 20:11:34 lstein Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::NormalizedFeatureI -- Interface for normalized features
+
+=head1 SYNOPSIS
+
+none
+
+=head1 DESCRIPTION
+
+This is an extremely simple interface that contains a single method,
+subfeatures_are_normalized(). This method returns a true value.
+
+Bio::DB::SeqFeature::Store feature classes will inherit this interface
+to flag that they are able to store subfeatures in a normalized way
+such that the subfeature is actually contained in the
+Bio::DB::SeqFeature::Store database and the parent feature contains
+only the subfeatures primary ID.
+
+=head1 BUGS
+
+None, but the whole class design might be flawed.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+
+
+sub subfeatures_are_normalized { 1 }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedTableFeatureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedTableFeatureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/NormalizedTableFeatureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+package Bio::DB::SeqFeature::NormalizedTableFeatureI;
+
+# $Id: NormalizedTableFeatureI.pm,v 1.2 2006/05/18 20:11:34 lstein Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::NormalizedTableFeatureI -- Interface for normalized features whose hierarchy is stored in a table
+
+=head1 SYNOPSIS
+
+none
+
+=head1 DESCRIPTION
+
+This is an extremely simple interface that contains a single method,
+subfeatures_are_stored_in_a_table(). This method returns a true value.
+
+Bio::DB::SeqFeature::Store feature classes will inherit this interface
+to flag that in addition to being able to store features in a
+normalized way, they will use the Bio::DB::SeqFeature::Store database
+to record their parent/child relationships. A class that inherits from
+NormalizedTableFeatureI will also inherit from NormalizedFeatureI, as
+the first is a subclass of the second.
+
+=head1 BUGS
+
+None, but the whole class design might be flawed.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+use base 'Bio::DB::SeqFeature::NormalizedFeatureI';
+
+sub subfeatures_are_stored_in_a_table { 1 }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Segment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Segment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Segment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,501 @@
+package Bio::DB::SeqFeature::Segment;
+
+# $Id: Segment.pm,v 1.13.4.2 2006/11/01 17:24:30 lstein Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Segment -- Location-based access to genome annotation data
+
+=head1 SYNOPSIS
+
+ use Bio::DB::SeqFeature::Store;
+ # Open the sequence database
+ my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
+                                                -dsn     => 'dbi:mysql:test');
+ my $segment  = $db->segment('Chr1',5000=>6000);
+ my @features = $segment->features('mRNA','match');
+
+=head1 DESCRIPTION
+
+The segment object simplifies access to Bio::DB::SeqFeature store by
+acting as a placeholder for a region of the genome. You can replace
+this statement:
+
+ @features = $db->features(-seq_id=>'Chr1',
+                           -start=>5000,
+                           -end=>6000,
+                           -types=>['mRNA','match','repeat_region']);
+
+with these statements:
+
+ $segment = $db->segment('Chr1',5000=>6000);
+ @features = $segment->features('mRNA','match','repeat_region');
+
+You can also initialize a segment from an existing SeqFeature
+object. The range will be picked up from the SeqFeature boundaries:
+
+ $segment = Bio::DB::SeqFeature::Segment->new($feature);        # for Bio::DB::SeqFeature
+ $segment = Bio::DB::SeqFeature::Segment->new($feature,$store); # for other Bio::SeqFeatureI objects
+
+The segment object implements the full Bio::SeqFeature::CollectionI
+interface, thereby allowing you to iterate over all features in the
+range.
+
+=cut
+
+use strict;
+
+use base 'Bio::SeqFeature::CollectionI','Bio::RangeI';
+use Bio::DB::GFF::Util::Rearrange;
+use overload '""' => \&as_string,
+  fallback => 1;
+
+=head1 PUBLIC METHODS
+
+The following are public methods intended for external use.
+
+=head2 new
+
+ Title   : new
+ Usage   : $segment = Bio::DB::SeqFeature::Segment->new(@options)
+ Function: create a new Segment object
+ Returns : A Bio::DB::SeqFeature::Segment object
+ Args    : several - see below
+ Status  : public
+
+This class method creates a Bio::DB::SeqFeature::Segment object. You
+must provide a Bio::DB::SeqFeature::Store as well as the coordinates
+of the segment. These arguments can be provided explicitly or
+indirectly.
+
+First form:
+
+ $segment = Bio::DB::SeqFeature::Segment->new($store,$seqid,$start,$end,$strand)
+
+In this form a segment is defined by a Bio::DB::SeqFeature::Store, the
+sequence ID, the start, end and strand. This is the form that is
+invoked internally by Bio::DB::SeqFeature::Store when you call its
+segment() method.
+
+Second form:
+
+ $segment = Bio::DB::SeqFeature::Segment->new($seqfeature [,$store]);
+
+In this form, you pass new() a Bio::SeqFeatureI object. The segment is
+constructed from the seq_id and coordinates are taken from the
+object. If you pass a store-aware seqfeature object
+(e.g. Bio::DB::SeqFeature) then the store database is also derived
+from the feature. Otherwise you will have to pass the store as a
+second argument.
+
+=cut
+
+###
+# new()
+#
+# Call as Bio::DB::SeqFeature::Segment->new($seqfeature,$store)
+#
+# or
+# Bio::DB::SeqFeature::Segment->new(-seqid=>$seqid,-start=>$start,-end=>$end,-strand=>$strand,-store=>$store)
+#
+sub new {
+  my $class = shift;
+  my ($store,$seqid,$start,$end,$strand);
+  if (ref $_[0] && UNIVERSAL::isa($_[0],'Bio::SeqFeatureI')) {
+    my $seqfeature = shift;
+    $store      = shift;
+    $store       ||= eval {$seqfeature->object_store};
+    $class->throw("I could not derive the Bio::DB::SeqFeature::Store object from the arguments passed to Bio::DB::SeqFeature::Segment->new(). Please pass the Store object as the second argument") unless $store;
+    $seqid = $seqfeature->seq_id;
+    $start = $seqfeature->start;
+    $end   = $seqfeature->end;
+    $strand= $seqfeature->strand;
+  }
+  else {
+    ($store,$seqid,$start,$end,$strand) = @_;
+  }
+  return bless {
+		store => $store,
+		seqid => $seqid,
+		start => $start,
+		end   => $end,
+		strand => $strand,
+	       },ref($class) || $class;
+}
+
+=head2 features
+
+ Title   : features
+ Usage   : @features = $segment->features(@args)
+ Function: fetch seqfeatures that overlap the segment
+ Returns : list of features
+ Args    : see below
+ Status  : Public
+
+This is the workhorse for feature query and retrieval. It takes a
+series of -name=E<gt>$value arguments filter arguments. Features that
+match all the filters are returned.
+
+  Argument       Value
+  --------       -----
+
+ Location filters:
+  -strand        Strand
+  -range_type    Type of range match ('overlaps','contains','contained_in')
+
+ Name filters:
+  -name          Name of feature (may be a glob expression)
+  -aliases       If true, match aliases as well as display names
+  -class         Archaic argument for backward compatibility.
+                  (-class=>'Clone',-name=>'ABC123') is equivalent
+                  to (-name=>'Clone:ABC123')
+
+ Type filters:
+  -types         List of feature types (array reference) or one type (scalar)
+  -type          Synonym for the above
+  -primary_tag   Synonym for the above
+
+  -attributes    Hashref of attribute=>value pairs as per
+                    get_features_by_attribute(). Multiple alternative values
+                    can be matched by providing an array reference.
+  -attribute     synonym for -attributes
+
+This is identical to the Bio::DB::SeqFeature::Store-E<gt>features()
+method, except that the -seq_id, -start, and -end arguments are
+provided by the segment object. If a simple list of arguments is
+provided, then the list is taken to be the set of feature types
+(primary tags) to filter on.
+
+Examples:
+
+All features that overlap the current segment:
+
+ @features = $segment->features;
+
+All features of type mRNA that overlap the current segment:
+
+ @features = $segment->features('mRNA');
+
+All features that are completely contained within the current segment:
+
+ @features = $segment->features(-range_type=>'contains');
+
+All "confirmed" mRNAs that overlap the current segment:
+
+ @features = $segment->features(-attributes=>{confirmed=>1},-type=>'mRNA');
+
+=cut
+
+sub features {
+  my $self = shift;
+  my @args;
+  if (@_ == 0) {
+    @args = ();
+  }
+  elsif ($_[0] !~/^-/) {
+    my @types = @_;
+    @args = (-type=>\@types);
+  } else {
+    @args = @_;
+  }
+  $self->{store}->features(@args,-seqid=>$self->{seqid},-start=>$self->{start},-end=>$self->{end});
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $iterator = $segment->get_seq_stream(@args)
+ Function: return an iterator across all features in the database
+ Returns : a Bio::DB::SeqFeature::Store::Iterator object
+ Args    : (optional) the feature() method
+ Status  : public
+
+This is identical to Bio::DB::SeqFeature::Store-E<gt>get_seq_stream()
+except that the location filter is always automatically applied so
+that the iterator you receive returns features that overlap the segment's region.
+
+When called without any arguments this method will return an iterator
+object that will traverse all indexed features in the database that
+overlap the segment's region. Call the iterator's next_seq() method to
+step through them (in no particular order):
+
+  my $iterator = $db->get_seq_stream;
+  while (my $feature = $iterator->next_seq) {
+    print $feature->primary_tag,' ',$feature->display_name,"\n";
+  }
+
+You can select a subset of features by passing a series of filter
+arguments. The arguments are identical to those accepted by
+$segment-E<gt>features().
+
+get_feature_stream() ican be used as a synonym for this method.
+
+=cut
+
+sub get_seq_stream {
+  my $self = shift;
+  $self->{store}->get_seq_stream(@_,-seqid=>$self->{seqid},-start=>$self->{start},-end=>$self->{end});
+}
+
+sub get_feature_stream { shift->get_seq_stream(@_) }
+
+=head2 store
+
+ Title   : store
+ Usage   : $store = $segment->store
+ Function: return the Bio::DB::SeqFeature::Store object associated with the segment
+ Returns : a Bio::DB::SeqFeature::Store: object
+ Args    : none
+ Status  : public
+
+=cut
+
+sub factory { shift->{store} }
+sub store   { shift->{store} }
+
+=head2 primary_tag, type,
+
+ Title   : primary_tag,type
+ Usage   : $primary_tag = $segment->primary_tag
+ Function: returns the string "region"
+ Returns : "region"
+ Args    : none
+ Status  : public
+
+The primary_tag method returns the constant tag "region". type() is a
+synonym for this method.
+
+=cut
+
+sub type    { shift->primary_tag }
+
+=head2 as_string
+
+ Title   : as_string
+ Usage   : $name = $segment->as_string
+ Function: expands the object into a human-readable string
+ Returns : "seq_id:start..end"
+ Args    : none
+ Status  : public
+
+The as_string() method is overloaded into the "" operator so that the
+object is represented as a human readable string in the form
+"seq_id:start..end" when used in a string context.
+
+=cut
+
+sub as_string {
+  my $self = shift;
+  my $label = $self->seq_id;
+  my $start = $self->start || '';
+  my $end   = $self->end   || '';
+  return "$label:$start..$end";
+}
+
+=head2 rel2abs
+
+ Title   : rel2abs
+ Usage   : @coords = $s->rel2abs(@coords)
+ Function: convert relative coordinates into absolute coordinates
+ Returns : a list of absolute coordinates
+ Args    : a list of relative coordinates
+ Status  : Public
+
+This function takes a list of positions in relative coordinates to the
+segment, and converts them into absolute coordinates.
+
+=cut
+
+sub rel2abs {
+  my $self = shift;
+  my @result;
+
+  my ($start,$strand) = ($self->start,$self->strand);
+  @result = $strand < 0 ? map { $start - $_ + 1 } @_
+                        : map { $_ + $start - 1 } @_;
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+=head2 abs2rel
+
+ Title   : abs2rel
+ Usage   : @rel_coords = $s->abs2rel(@abs_coords)
+ Function: convert absolute coordinates into relative coordinates
+ Returns : a list of relative coordinates
+ Args    : a list of absolute coordinates
+ Status  : Public
+
+This function takes a list of positions in absolute coordinates
+and returns a list expressed in relative coordinates.
+
+=cut
+
+sub abs2rel {
+  my $self = shift;
+  my @result;
+
+  my ($start,$strand) = ($self->start,$self->abs_strand);
+  @result = $strand < 0 ? map { $start - $_ + 1 } @_
+                        : map { $_ - $start + 1 } @_;
+
+  # if called with a single argument, caller will expect a single scalar reply
+  # not the size of the returned array!
+  return $result[0] if @result == 1 and !wantarray;
+  @result;
+}
+
+
+
+=head2 Bio::SeqFeatureI compatibility methods
+
+For convenience, segments are interchangeable with Bio::SeqFeature
+objects in many cases. This means that segments can be passed to
+BioPerl modules that expect Bio::SeqFeature objects and they should
+work as expected. The primary tag of segment objects is "region"
+(SO:0000001 "Continous sequence E<gt>=1 base pair").
+
+All these methods are read-only except for the primary_id, which can
+be get or set.
+
+The following Bio::SeqFeatureI methods are supported:
+
+=over 4
+
+=item start
+
+=item end
+
+=item seq_id
+
+=item strand
+
+=item length
+
+=item display_name
+
+=item primary_id
+
+=item primary_tag (always returns "region")
+
+=item source_tag (always returns "Bio::DB::SeqFeature::Segment")
+
+=item get_SeqFeatures (always returns an empty list)
+
+=item seq
+
+=item entire_seq
+
+=item location
+
+=item All Bio::RangeI methods
+
+=back
+
+=cut
+
+sub start   { shift->{start}  }
+sub end     { shift->{end}    }
+sub seq_id  { shift->{seqid}  }
+sub strand  { shift->{strand} }
+sub ref     { shift->seq_id   }
+sub length  { my $self = shift; return $self->end-$self->start+1; }
+sub primary_tag  { 'region' }
+sub source_tag   { __PACKAGE__ }
+sub display_name { shift->as_string }
+sub name         { shift->display_name }
+sub class        { 'region' }
+sub abs_ref      { shift->ref}
+sub abs_start    { shift->start}
+sub abs_end      { shift->end}
+sub abs_strand   { shift->strand}
+sub get_SeqFeatures { }
+sub get_all_tags { }
+sub get_tag_values { }
+sub add_tag_value { }
+sub remove_tag { }
+sub has_tag { }
+sub seq {
+  my $self = shift;
+  require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
+  my ($start,$end) = ($self->start,$self->end);
+  if ($self->strand < 0) {
+    ($start,$end) = ($end,$start);
+  }
+  return Bio::PrimarySeq->new(
+			      -seq => $self->store->fetch_sequence($self->seq_id,$start,$end),
+			      -id  => $self->display_name);
+}
+sub subseq {
+  my $self = shift;
+  my ($newstart,$newstop) = @_;
+  my $store = $self->store or return;
+  my $seq   = $store->fetch_sequence($self->seq_id,$self->start+$newstart-1,$self->end+$newstop-1);
+  return Bio::PrimarySeq->new(-seq=>$seq);
+}
+sub dna {
+  my $seq = shift->seq;
+  $seq    = $seq->seq if CORE::ref($seq);
+  return $seq;
+}
+
+sub entire_seq {
+  my $self = shift;
+  require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
+  return Bio::PrimarySeq->new(
+			      -seq => $self->store->fetch_sequence($self->seq_id),
+			      -id  => $self->seq_id);
+}
+sub location {
+  my $self = shift;
+  require Bio::Location::Simple unless Bio::Location::Simple->can('new');
+  return Bio::Location::Simple->new(-start=>$self->start,-end=>$self->end);
+}
+sub primary_id   {
+  my $self = shift;
+  my $d    = $self->{primary_id};
+  $self->{primary_id} = shift if @_;
+  $d;
+}
+
+sub target { return }
+sub score  { return }
+sub stop   { shift->end }
+sub absolute { return 1 }
+sub desc   { shift->as_string }
+sub display_id { shift->display_name }
+sub primary_seq { shift->seq }
+sub accession_number { return undef }  # intended return undef
+sub alphabet { return undef }          # intended return undef
+
+1;
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/Iterator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/Iterator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/Iterator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,32 @@
+package Bio::DB::SeqFeature::Store::DBI::Iterator;
+
+# $Id: Iterator.pm,v 1.1.4.1 2006/10/02 23:10:17 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::DBI::Iterator
+
+=cut
+
+sub new {
+  my $class          = shift;
+  my ($sth,$store)   = @_;
+  return bless {sth   => $sth,
+		store => $store
+	       },ref($class) || $class;
+}
+
+sub next_seq {
+  my $self  = shift;
+  my $sth   = $self->{sth}   or return;
+  my $store = $self->{store} or return;
+  my $obj   = $store->_sth2obj($sth);
+  if (!$obj) {
+    undef $self->{sth};
+    undef $self->{store};
+    return;
+  }
+  return $obj;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/mysql.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/mysql.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/DBI/mysql.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1575 @@
+package Bio::DB::SeqFeature::Store::DBI::mysql;
+# $Id: mysql.pm,v 1.20.4.4 2006/11/07 20:12:20 lstein Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::DBI::mysql -- Mysql implementation of Bio::DB::SeqFeature::Store
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqFeature::Store;
+
+  # Open the sequence database
+  my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::mysql',
+                                          -dsn     => 'dbi:mysql:test');
+
+  # get a feature from somewhere
+  my $feature = Bio::SeqFeature::Generic->new(...);
+
+  # store it
+  $db->store($feature) or die "Couldn't store!";
+
+  # primary ID of the feature is changed to indicate its primary ID
+  # in the database...
+  my $id = $feature->primary_id;
+
+  # get the feature back out
+  my $f  = $db->fetch($id);
+
+  # change the feature and update it
+  $f->start(100);
+  $db->update($f) or die "Couldn't update!";
+
+  # searching...
+  # ...by id
+  my @features = $db->fetch_many(@list_of_ids);
+
+  # ...by name
+  @features = $db->get_features_by_name('ZK909');
+
+  # ...by alias
+  @features = $db->get_features_by_alias('sma-3');
+
+  # ...by type
+  @features = $db->get_features_by_name('gene');
+
+  # ...by location
+  @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
+
+  # ...by attribute
+  @features = $db->get_features_by_attribute({description => 'protein kinase'})
+
+  # ...by the GFF "Note" field
+  @result_list = $db->search_notes('kinase');
+
+  # ...by arbitrary combinations of selectors
+  @features = $db->features(-name => $name,
+                            -type => $types,
+                            -seq_id => $seqid,
+                            -start  => $start,
+                            -end    => $end,
+                            -attributes => $attributes);
+
+  # ...using an iterator
+  my $iterator = $db->get_seq_stream(-name => $name,
+                                     -type => $types,
+                                     -seq_id => $seqid,
+                                     -start  => $start,
+                                     -end    => $end,
+                                     -attributes => $attributes);
+
+  while (my $feature = $iterator->next_seq) {
+    # do something with the feature
+  }
+
+  # ...limiting the search to a particular region
+  my $segment  = $db->segment('Chr1',5000=>6000);
+  my @features = $segment->features(-type=>['mRNA','match']);
+
+  # getting & storing sequence information
+  # Warning: this returns a string, and not a PrimarySeq object
+  $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
+  my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
+
+  # create a new feature in the database
+  my $feature = $db->new_feature(-primary_tag => 'mRNA',
+                                 -seq_id      => 'chr3',
+                                 -start      => 10000,
+                                 -end        => 11000);
+
+=head1 DESCRIPTION
+
+Bio::DB::SeqFeature::Store::mysql is the Mysql adaptor for
+Bio::DB::SeqFeature::Store. You will not create it directly, but
+instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
+
+See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
+
+=head2 Using the Mysql adaptor
+
+Before you can use the adaptor, you must use the mysqladmin tool to
+create a database and establish a user account with write
+permission. In order to use "fast" loading, the user account must have
+"file" privileges.
+
+To establish a connection to the database, call
+Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::mysql', at more_args). The
+additional arguments are as follows:
+
+  Argument name       Description
+  -------------       -----------
+
+ -dsn              The database name. You can abbreviate 
+                   "dbi:mysql:foo" as "foo" if you wish.
+
+ -user             Username for authentication.
+
+ -pass             Password for authentication.
+
+ -namespace        A prefix to attach to each table. This allows you
+                   to have several virtual databases in the same
+                   physical database.
+
+ -temp             Boolean flag. If true, a temporary database
+                   will be created and destroyed as soon as
+                   the Store object goes out of scope. (synonym -temporary)
+
+ -autoindex        Boolean flag. If true, features in the database will be
+                   reindexed every time they change. This is the default.
+
+
+ -tmpdir           Directory in which to place temporary files during "fast" loading.
+                   Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
+
+ -dbi_options      A hashref to pass to DBI->connect's 4th argument, the "attributes."
+                   (synonyms -options, -dbi_attr)
+
+ -write            Pass true to open database for writing or updating.
+
+If successful, a new instance of
+Bio::DB::SeqFeature::Store::DBI::mysql will be returned.
+
+In addition to the standard methods supported by all well-behaved
+Bio::DB::SeqFeature::Store databases, several following
+adaptor-specific methods are provided. These are described in the next
+sections.
+
+=cut
+
+use strict;
+
+use base 'Bio::DB::SeqFeature::Store';
+use Bio::DB::SeqFeature::Store::DBI::Iterator;
+use DBI;
+use Memoize;
+use Cwd 'abs_path';
+use Bio::DB::GFF::Util::Rearrange 'rearrange';
+use File::Spec;
+use constant DEBUG=>0;
+
+# from the MySQL documentation...
+# WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
+use constant MAX_INT =>  2_147_483_647;
+use constant MIN_INT => -2_147_483_648;
+use constant MAX_BIN =>  1_000_000_000;  # size of largest feature = 1 Gb
+use constant MIN_BIN =>  1000;           # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
+
+memoize('_typeid');
+memoize('_locationid');
+memoize('_attributeid');
+memoize('dump_path');
+
+###
+# object initialization
+#
+sub init {
+  my $self          = shift;
+  my ($dsn,
+      $is_temporary,
+      $autoindex,
+      $namespace,
+      $dump_dir,
+      $user,
+      $pass,
+      $dbi_options,
+      $writeable,
+      $create,
+     ) = rearrange(['DSN',
+		    ['TEMP','TEMPORARY'],
+		    'AUTOINDEX',
+		    'NAMESPACE',
+		    ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
+		    'USER',
+		    ['PASS','PASSWD','PASSWORD'],
+		    ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
+		    ['WRITE','WRITEABLE'],
+		    'CREATE',
+		   ], at _);
+  $dbi_options  ||= {};
+  $writeable    = 1 if $is_temporary or $dump_dir;
+
+  $dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)");
+
+  my $dbh;
+  if (ref $dsn) {
+    $dbh = $dsn;
+  } else {
+    $dsn = "dbi:mysql:$dsn" unless $dsn =~ /^dbi:/;
+    $dbh = DBI->connect($dsn,$user,$pass,$dbi_options);
+  }
+  $self->{dbh}       = $dbh;
+  $self->{is_temp}   = $is_temporary;
+  $self->{namespace} = $namespace;
+  $self->{writeable} = $writeable;
+
+  $self->default_settings;
+  $self->autoindex($autoindex)                   if defined $autoindex;
+  $self->dumpdir($dump_dir)                      if $dump_dir;
+  if ($self->is_temp) {
+    $self->init_tmp_database();
+  } elsif ($create) {
+    $self->init_database('erase');
+  }
+}
+
+sub writeable { shift->{writeable} }
+
+sub can_store_parentage { 1 }
+
+sub table_definitions {
+  my $self = shift;
+  return {
+	  feature => <<END,
+(
+  id       int(10) auto_increment primary key,
+  typeid   int(10)      not null,
+  seqid    int(10),
+  start    int,
+  end      int,
+  strand   tinyint      default 0,
+  tier     tinyint,
+  bin      int,
+  indexed  tinyint default 1,
+  object     MEDIUMBLOB not null,
+  index(seqid,tier,bin,typeid),
+  index(typeid)
+)
+END
+
+	  locationlist => <<END,
+(
+  id         int(10)       auto_increment primary key,
+  seqname    varchar(50)   not null,
+  index(seqname)
+)
+END
+
+	  typelist => <<END,
+(
+  id       int(10) auto_increment primary key,
+  tag      varchar(40)  not null,
+  index(tag)
+)
+END
+	  name => <<END,
+(
+  id           int(10)       not null,
+  name         varchar(128)  not null,
+  display_name tinyint       default 0,
+  index(id),
+  index(name)
+)
+END
+
+	  attribute => <<END,
+(
+  id               int(10)       not null,
+  attribute_id     int(10)   not null,
+  attribute_value  text,
+  index(id),
+  index(attribute_id,attribute_value(10))
+)
+END
+
+	  attributelist => <<END,
+(
+  id       int(10) auto_increment primary key,
+  tag      varchar(50)  not null,
+  index(tag)
+)
+END
+	  parent2child => <<END,
+(
+  id               int(10)       not null,
+  child            int(10)       not null,
+  index(id,child)
+)
+END
+
+	  meta => <<END,
+(
+  name      varchar(128) primary key,
+  value     varchar(128) not null
+)
+END
+	  sequence => <<END,
+(
+  id       int(10) not null,
+  offset   int(10) unsigned not null,
+  sequence longblob,
+  primary key(id,offset)
+)
+END
+	 };
+}
+
+###
+# default settings -- will create and populate meta table if needed
+#
+sub default_settings {
+  my $self = shift;
+  $self->maybe_create_meta();
+  $self->SUPER::default_settings;
+  $self->autoindex(1);
+  $self->dumpdir(File::Spec->tmpdir);
+}
+
+
+###
+# retrieve database handle
+#
+sub dbh {
+  my $self = shift;
+  my $d    = $self->{dbh};
+  $self->{dbh} = shift if @_;
+  $d;
+}
+
+###
+# get/set directory for bulk load tables
+#
+sub dumpdir {
+  my $self = shift;
+  my $d = $self->{dumpdir};
+  $self->{dumpdir} = abs_path(shift) if @_;
+  $d;
+}
+
+###
+# table namespace (multiple dbs in one mysql db)
+#
+sub namespace {
+  my $self = shift;
+  my $d    = $self->{namespace};
+  $self->{namespace} = shift if @_;
+  $d;
+}
+
+###
+# find a path that corresponds to a dump table
+#
+sub dump_path {
+  my $self = shift;
+  my $table = $self->_qualify(shift);
+  return "$self->{dumpdir}/$table.$$";
+}
+
+###
+# make a filehandle (writeable) that corresponds to a dump table
+#
+sub dump_filehandle {
+  my $self = shift;
+  my $table = shift;
+  eval "require IO::File" unless IO::File->can('new');
+  my $path  = $self->dump_path($table);
+  my $fh = $self->{filehandles}{$path} ||= IO::File->new(">$path");
+  $fh;
+}
+
+###
+# find the next ID for a feature (used only during bulk loading)
+#
+sub next_id {
+  my $self = shift;
+  $self->{max_id} ||= $self->max_id;
+  return ++$self->{max_id};
+}
+
+###
+# find the maximum ID for a feature (used only during bulk loading)
+#
+sub max_id {
+  my $self = shift;
+  my $sth  = $self->_prepare("SELECT max(id) from feature");
+  $sth->execute or $self->throw($sth->errstr);
+  my ($id) = $sth->fetchrow_array;
+  $id;
+}
+
+###
+# wipe database clean and reinstall schema
+#
+sub _init_database {
+  my $self = shift;
+  my $erase = shift;
+
+  my $dbh    = $self->dbh;
+  my $tables = $self->table_definitions;
+  foreach (keys %$tables) {
+    next if $_ eq 'meta';      # don't get rid of meta data!
+    my $table = $self->_qualify($_);
+    $dbh->do("DROP table IF EXISTS $table") if $erase;
+    my $query = "CREATE TABLE IF NOT EXISTS $table $tables->{$_}";
+    $dbh->do($query) or $self->throw($dbh->errstr);
+  }
+  $self->subfeatures_are_indexed(1) if $erase;
+  1;
+}
+
+sub maybe_create_meta {
+  my $self = shift;
+  return unless $self->writeable;
+  my $table = $self->_qualify('meta');
+  my $tables = $self->table_definitions;
+  $self->dbh->do("CREATE TABLE IF NOT EXISTS $table $tables->{meta}");
+}
+
+sub init_tmp_database {
+  my $self = shift;
+  my $dbh    = $self->dbh;
+  my $tables = $self->table_definitions;
+  for my $t (keys %$tables) {
+    my $table = $self->_qualify($t);
+    my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
+    $dbh->do($query) or $self->throw($dbh->errstr);
+  }
+  1;
+}
+
+###
+# use temporary tables
+#
+sub is_temp {
+  shift->{is_temp};
+}
+
+sub _store {
+  my $self    = shift;
+
+  # special case for bulk updates
+  return $self->_dump_store(@_) if $self->{bulk_update_in_progress};
+
+  my $indexed = shift;
+  my $count = 0;
+
+  my $autoindex = $self->autoindex;
+
+  my $dbh = $self->dbh;
+  local $dbh->{RaiseError} = 1;
+  $dbh->begin_work;
+  eval {
+    for my $obj (@_) {
+      $self->replace($obj,$indexed);
+      $self->_update_indexes($obj) if $indexed && $autoindex;
+      $count++;
+    }
+  };
+
+  if ($@) {
+    warn "Transaction aborted because $@";
+    $dbh->rollback;
+  }
+  else {
+    $dbh->commit;
+  }
+
+  # remember whether we are have ever stored a non-indexed feature
+  unless ($indexed or $self->{indexed_flag}++) {
+    $self->subfeatures_are_indexed(0);
+  }
+  $count;
+}
+
+# we memoize this in order to avoid making zillions of calls
+sub autoindex {
+  my $self = shift;
+
+  # special case for bulk update -- need to build the indexes
+  # at the same time we build the main feature table
+  return 1 if $self->{bulk_update_in_progress};
+  my $d = $self->setting('autoindex');
+  $self->setting(autoindex=>shift) if @_;
+  $d;
+}
+
+sub _start_bulk_update {
+  my $self = shift;
+  my $dbh  = $self->dbh;
+  $self->{bulk_update_in_progress}++;
+}
+
+sub _finish_bulk_update {
+  my $self = shift;
+  my $dbh  = $self->dbh;
+  my $dir = $self->{dumpdir} || '.';
+  for my $table ('feature',$self->index_tables) {
+    my $fh = $self->dump_filehandle($table);
+    my $path = $self->dump_path($table);
+    $fh->close;
+    my $qualified_table = $self->_qualify($table);
+    $dbh->do("LOAD DATA INFILE '$path' REPLACE INTO TABLE $qualified_table FIELDS OPTIONALLY ENCLOSED BY '\\''") 
+      or $self->throw($dbh->errstr);
+    unlink $path;
+  }
+  delete $self->{bulk_update_in_progress};
+  delete $self->{filehandles};
+}
+
+
+###
+# Add a subparts to a feature. Both feature and all subparts must already be in database.
+#
+sub _add_SeqFeature {
+  my $self     = shift;
+
+  # special purpose method for case when we are doing a bulk update
+  return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress};
+
+  my $parent   = shift;
+  my @children = @_;
+
+  my $dbh = $self->dbh;
+  local $dbh->{RaiseError} = 1;
+
+  my $child_table = $self->_parent2child_table();
+  my $count = 0;
+
+  my $sth = $self->_prepare(<<END);
+REPLACE INTO $child_table (id,child) VALUES (?,?)
+END
+
+  my $parent_id = (ref $parent ? $parent->primary_id : $parent) 
+    or $self->throw("$parent should have a primary_id");
+
+  $dbh->begin_work or $self->throw($dbh->errstr);
+  eval {
+    for my $child (@children) {
+      my $child_id = ref $child ? $child->primary_id : $child;
+      defined $child_id or die "no primary ID known for $child";
+      $sth->execute($parent_id,$child_id);
+      $count++;
+    }
+  };
+
+  if ($@) {
+    warn "Transaction aborted because $@";
+    $dbh->rollback;
+  }
+  else {
+    $dbh->commit;
+  }
+  $sth->finish;
+  $count;
+}
+
+sub _fetch_SeqFeatures {
+  my $self   = shift;
+  my $parent = shift;
+  my @types  = @_;
+
+  my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
+  my $feature_table = $self->_feature_table;
+  my $child_table   = $self->_parent2child_table();
+
+  my @from  = ("$feature_table as f","$child_table as c");
+  my @where = ('f.id=c.child','c.id=?');
+  my @args  = $parent_id;
+
+  if (@types) {
+    my ($from,$where,undef, at a) = $self->_types_sql(\@types,'f');
+    push @from,$from   if $from;
+    push @where,$where if $where;
+    push @args, at a;
+  }
+
+  my $from  = join ', ', at from;
+  my $where = join ' AND ', at where;
+
+  my $query = <<END;
+SELECT f.id,f.object
+  FROM $from
+  WHERE $where
+END
+
+  $self->_print_query($query, at args) if DEBUG || $self->debug;
+
+  my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
+
+  $sth->execute(@args) or $self->throw($sth->errstr);
+  return $self->_sth2objs($sth);
+}
+
+###
+# get primary sequence between start and end
+#
+sub _fetch_sequence {
+  my $self = shift;
+  my ($seqid,$start,$end) = @_;
+
+  # backward compatibility to the old days when I liked reverse complementing
+  # dna by specifying $start > $end
+  my $reversed;
+  if (defined $start && defined $end && $start > $end) {
+    $reversed++;
+    ($start,$end) = ($end,$start);
+  }
+  $start-- if defined $start;
+  $end--   if defined $end;
+
+  my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
+  my $offset2 = $self->_offset_boundary($seqid,$end   || 'right');
+  my $sequence_table = $self->_sequence_table;
+  my $locationlist_table = $self->_locationlist_table;
+
+  my $sth     = $self->_prepare(<<END);
+SELECT sequence,offset
+   FROM $sequence_table as s,$locationlist_table as ll
+   WHERE s.id=ll.id
+     AND ll.seqname= ?
+     AND offset >= ?
+     AND offset <= ?
+   ORDER BY offset
+END
+
+  my $seq = '';
+  $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
+
+  while (my($frag,$offset) = $sth->fetchrow_array) {
+    substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
+    $seq .= $frag;
+  }  
+  substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
+  if ($reversed) {
+    $seq = reverse $seq;
+    $seq =~ tr/gatcGATC/ctagCTAG/;
+  }
+  $sth->finish;
+  $seq;
+}
+
+sub _offset_boundary {
+  my $self = shift;
+  my ($seqid,$position) = @_;
+
+  my $sequence_table     = $self->_sequence_table;
+  my $locationlist_table = $self->_locationlist_table;
+
+  my $sql;
+  $sql =  $position eq 'left'  ? "SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
+         :$position eq 'right' ? "SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
+	 :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
+  my $sth = $self->_prepare($sql);
+  my @args = $position =~ /^-?\d+$/ ? ($seqid,$position) : ($seqid);
+  $sth->execute(@args) or $self->throw($sth->errstr);
+  my $boundary = $sth->fetchall_arrayref->[0][0];
+  $sth->finish;
+  return $boundary;
+}
+
+
+###
+# add namespace to tablename
+#
+sub _qualify {
+  my $self = shift;
+  my $table_name = shift;
+  my $namespace = $self->namespace;
+  return $table_name unless defined $namespace;
+  return "${namespace}_${table_name}";
+}
+
+###
+# Fetch a Bio::SeqFeatureI from database using its primary_id
+#
+sub _fetch {
+  my $self       = shift;
+  @_ or $self->throw("usage: fetch(\$primary_id)");
+  my $primary_id = shift;
+  my $features = $self->_feature_table;
+  my $sth = $self->_prepare(<<END);
+SELECT id,object FROM $features WHERE id=?
+END
+  $sth->execute($primary_id) or $self->throw($sth->errstr);
+  my $obj = $self->_sth2obj($sth);
+  $sth->finish;
+  $obj;
+}
+
+###
+# Efficiently fetch a series of IDs from the database
+# Can pass an array or an array ref
+#
+sub _fetch_many {
+  my $self       = shift;
+  @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
+  my $ids = join ',',map {ref($_) ? @$_ : $_} @_ or return;
+  my $features = $self->_feature_table;
+
+  my $sth = $self->_prepare(<<END);
+SELECT id,object FROM $features WHERE id IN ($ids)
+END
+  $sth->execute() or $self->throw($sth->errstr);
+  return $self->_sth2objs($sth);
+}
+
+sub _features {
+  my $self = shift;
+  my ($seq_id,$start,$end,$strand,
+      $name,$class,$allow_aliases,
+      $types,
+      $attributes,
+      $range_type,
+      $fromtable,
+      $iterator
+     ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
+		    'NAME','CLASS','ALIASES',
+		    ['TYPES','TYPE','PRIMARY_TAG'],
+		    ['ATTRIBUTES','ATTRIBUTE'],
+		    'RANGE_TYPE',
+		    'FROM_TABLE',
+		    'ITERATOR',
+		   ], at _);
+
+  my (@from, at where, at args, at group);
+  $range_type ||= 'overlaps';
+
+  my $feature_table         = $self->_feature_table;
+  @from = "$feature_table as f";
+
+  if (defined $name) {
+    # hacky backward compatibility workaround
+    undef $class if $class && $class eq 'Sequence';
+    $name = "$class:$name" if defined $class && length $class > 0;
+    # last argument is the join field
+    my ($from,$where,$group, at a) = $self->_name_sql($name,$allow_aliases,'f.id');
+    push @from,$from   if $from;
+    push @where,$where if $where;
+    push @group,$group if $group;
+    push @args, at a;
+  }
+
+  if (defined $seq_id) {
+    # last argument is the name of the features table
+    my ($from,$where,$group, at a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
+    push @from,$from   if $from;
+    push @where,$where if $where;
+    push @group,$group if $group;
+    push @args, at a;
+  }
+
+  if (defined($types)) {
+    # last argument is the name of the features table
+    my ($from,$where,$group, at a) = $self->_types_sql($types,'f');
+    push @from,$from   if $from;
+    push @where,$where if $where;
+    push @group,$group if $group;
+    push @args, at a;
+  }
+
+  if (defined $attributes) {
+    # last argument is the join field
+    my ($from,$where,$group, at a) = $self->_attributes_sql($attributes,'f.id');
+    push @from,$from    if $from;
+    push @where,$where  if $where;
+    push @group,$group  if $group;
+    push @args, at a;
+  }
+
+  if (defined $fromtable) {
+    # last argument is the join field
+    my ($from,$where,$group, at a) = $self->_from_table_sql($fromtable,'f.id');
+    push @from,$from    if $from;
+    push @where,$where  if $where;
+    push @group,$group  if $group;
+    push @args, at a;
+  }
+
+  # if no other criteria are specified, then
+  # only fetch indexed (i.e. top level objects)
+  @where = 'indexed=1' unless @where;
+
+  my $from  = join ', ', at from;
+  my $where = join ' AND ',map {"($_)"} @where;
+  my $group = join ', ', at group;
+  $group    = "GROUP BY $group" if @group;
+
+  my $query = <<END;
+SELECT f.id,f.object
+  FROM $from
+  WHERE $where
+  $group
+END
+
+  $self->_print_query($query, at args) if DEBUG || $self->debug;
+
+  my $sth = $self->_prepare($query);
+  $sth->execute(@args) or $self->throw($sth->errstr);
+  return $iterator ? Bio::DB::SeqFeature::Store::DBI::Iterator->new($sth,$self) : $self->_sth2objs($sth);
+}
+
+sub _name_sql {
+  my $self = shift;
+  my ($name,$allow_aliases,$join) = @_;
+  my $name_table   = $self->_name_table;
+
+  my $from  = "$name_table as n";
+  my ($match,$string) = $self->_match_sql($name);
+
+  my $where = "n.id=$join AND n.name $match";
+  $where   .= " AND n.display_name>0" unless $allow_aliases;
+  return ($from,$where,'',$string);
+}
+
+sub _search_attributes {
+  my $self = shift;
+  my ($search_string,$attribute_names,$limit) = @_;
+  my @words               = map {quotemeta($_)} split /\s+/,$search_string;
+  my $name_table          = $self->_name_table;
+  my $attribute_table     = $self->_attribute_table;
+  my $attributelist_table = $self->_attributelist_table;
+
+  my @tags    = @$attribute_names;
+  my $tag_sql = join ' OR ',("al.tag=?") x @tags;
+
+  my $perl_regexp = join '|', at words;
+
+  my $sql_regexp = join ' AND ',("a.attribute_value REGEXP ?")  x @words;
+  my $sql = <<END;
+SELECT name,attribute_value
+  FROM $name_table as n,$attribute_table as a,$attributelist_table as al
+  WHERE n.id=a.id
+    AND al.id=a.attribute_id
+    AND n.display_name=1
+    AND ($tag_sql)
+    AND ($sql_regexp)
+END
+  $sql .= "LIMIT $limit" if defined $limit;
+  $self->_print_query($sql, at tags, at words) if DEBUG || $self->debug;
+  my $sth = $self->_prepare($sql);
+  $sth->execute(@tags, at words) or $self->throw($sth->errstr);
+
+  my @results;
+  while (my($name,$value) = $sth->fetchrow_array) {
+    my (@hits) = $value =~ /$perl_regexp/ig;
+    my @words_in_row = split /\b/,$value;
+    my $score  = int(@hits*100/@words/@words_in_row);
+    push @results,[$name,$value,$score];
+  }
+  $sth->finish;
+  @results = sort {$b->[2]<=>$a->[2]} @results;
+  return @results;
+}
+
+sub _match_sql {
+  my $self = shift;
+  my $name = shift;
+
+  my ($match,$string);
+  if ($name =~ /(?:^|[^\\])[*?]/) {
+    $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
+    $name =~ s/(^|[^\\])\*/$1%/g;
+    $name =~ s/(^|[^\\])\?/$1_/g;
+    $match = "LIKE ?";
+    $string  = $name;
+  } else {
+    $match = "= ?";
+    $string  = $name;
+  }
+  return ($match,$string);
+}
+
+sub _from_table_sql {
+  my $self = shift;
+  my ($from_table,$join) = @_;
+  my $from  = "$from_table as ft";
+  my $where = "ft.id=$join";
+  return ($from,$where,'');
+}
+
+sub _attributes_sql {
+  my $self = shift;
+  my ($attributes,$join) = @_;
+
+  my ($wf, at bind_args)       = $self->_make_attribute_where('a','al',$attributes);
+  my ($group_by, at group_args)= $self->_make_attribute_group('a',$attributes);
+
+  my $attribute_table       = $self->_attribute_table;
+  my $attributelist_table   = $self->_attributelist_table;
+
+  my $from = "$attribute_table as a, $attributelist_table as al";
+
+  my $where = <<END;
+  a.id=$join
+  AND   a.attribute_id=al.id
+  AND ($wf)
+END
+
+  my $group = $group_by;
+
+  my @args  = (@bind_args, at group_args);
+  return ($from,$where,$group, at args);
+}
+
+sub subfeature_types_are_indexed     { 1 }
+sub subfeature_locations_are_indexed { 1 }
+
+sub _types_sql {
+  my $self  = shift;
+  my ($types,$type_table) = @_;
+  my ($primary_tag,$source_tag);
+
+  my @types = ref $types eq 'ARRAY' ?  @$types : $types;
+
+  my $typelist      = $self->_typelist_table;
+  my $from = "$typelist AS tl";
+
+  my (@matches, at args);
+
+  for my $type (@types) {
+
+    if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
+      $primary_tag = $type->method;
+      $source_tag  = $type->source;
+    } else {
+      ($primary_tag,$source_tag) = split ':',$type,2;
+    }
+
+    if (defined $source_tag) {
+      push @matches,"tl.tag=?";
+      push @args,"$primary_tag:$source_tag";
+    } else {
+      push @matches,"tl.tag LIKE ?";
+      push @args,"$primary_tag:%";
+    }
+  }
+  my $matches = join ' OR ', at matches;
+
+  my $where = <<END;
+   tl.id=$type_table.typeid
+   AND   ($matches)
+END
+
+  return ($from,$where,'', at args);
+}
+
+sub _location_sql {
+  my $self = shift;
+  my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
+
+  # the additional join on the location_list table badly impacts performance
+  # so we build a copy of the table in memory
+  my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
+
+  $start = MIN_INT unless defined $start;
+  $end   = MAX_INT unless defined $end;
+
+  my ($bin_where, at bin_args) = $self->bin_where($start,$end,$location);
+
+  my ($range, at range_args);
+  if ($range_type eq 'overlaps') {
+    $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
+    @range_args = ($start,$end, at bin_args);
+  } elsif ($range_type eq 'contains') {
+    $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
+    @range_args = ($start,$end, at bin_args);
+  } elsif ($range_type eq 'contained_in') {
+    $range = "$location.start<=? AND $location.end>=?";
+    @range_args = ($start,$end);
+  } else {
+    $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
+  }
+
+  if (defined $strand) {
+    $range .= " AND strand=?";
+    push @range_args,$strand;
+  }
+
+  my $where = <<END;
+   $location.seqid=?
+   AND   $range
+END
+
+  my $from  = '';
+  my $group = '';
+
+  my @args  = ($seqid, at range_args);
+  return ($from,$where,$group, at args);
+}
+
+###
+# force reindexing
+#
+sub reindex {
+  my $self = shift;
+  my $from_update_table = shift;  # if present, will take ids from "update_table"
+
+  my $dbh  = $self->dbh;
+  my $count = 0;
+  my $now;
+
+  # try to bring in highres time() function
+  eval "require Time::HiRes";
+
+  my $last_time = $self->time();
+
+  # tell _delete_index() not to bother removing the index rows corresponding
+  # to each individual feature
+  local $self->{reindexing} = 1;
+
+  $dbh->begin_work;
+  eval {
+    my $update = $from_update_table;
+    for my $table ($self->index_tables) {
+      my $query = $from_update_table ? "DELETE $table FROM $table,$update WHERE $table.id=$update.id"
+	                             : "DELETE FROM $table";
+      $dbh->do($query);
+      $dbh->do("ALTER TABLE $table DISABLE KEYS");
+    }
+    my $iterator = $self->get_seq_stream(-from_table=>$from_update_table ? $update : undef);
+    while (my $f = $iterator->next_seq) {
+      if (++$count %1000 == 0) {
+	$now = $self->time();
+	my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
+	$last_time = $now;
+	print STDERR "$count features indexed$elapsed...",' 'x60;
+	print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+      }
+      $self->_update_indexes($f);
+    }
+  };
+  for my $table ($self->index_tables) {
+    $dbh->do("ALTER TABLE $table ENABLE KEYS");
+  }
+  if (@_) {
+    warn "Couldn't complete transaction: $@";
+    $dbh->rollback;
+    return;
+  } else {
+    $dbh->commit;
+    return 1;
+  }
+}
+
+sub optimize {
+  my $self = shift;
+  $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
+}
+
+sub all_tables {
+  my $self = shift;
+  my @index_tables = $self->index_tables;
+  my $feature_table = $self->_feature_table;
+  return ($feature_table, at index_tables);
+}
+
+sub index_tables {
+  my $self = shift;
+  return map {$self->_qualify($_)} qw(name attribute parent2child)
+}
+
+sub _firstid {
+  my $self = shift;
+  my $features = $self->_feature_table;
+  my $query = <<END;
+SELECT min(id) FROM $features
+END
+  my $sth=$self->_prepare($query);
+  $sth->execute();
+  my ($first) = $sth->fetchrow_array;
+  $sth->finish;
+  $first;
+}
+
+sub _nextid {
+  my $self = shift;
+  my $lastkey = shift;
+  my $features = $self->_feature_table;
+  my $query = <<END;
+SELECT min(id) FROM $features WHERE id>?
+END
+  my $sth=$self->_prepare($query);
+  $sth->execute($lastkey);
+  my ($next) = $sth->fetchrow_array;
+  $sth->finish;
+  $next;
+}
+
+sub _existsid {
+  my $self = shift;
+  my $key  = shift;
+  my $features = $self->_feature_table;
+  my $query = <<END;
+SELECT count(*) FROM $features WHERE id=?
+END
+  my $sth=$self->_prepare($query);
+  $sth->execute($key);
+  my ($count) = $sth->fetchrow_array;
+  $sth->finish;
+  $count > 0;
+}
+
+sub _deleteid {
+  my $self = shift;
+  my $key  = shift;
+  my $dbh = $self->dbh;
+  for my $table ($self->all_tables) {
+    $dbh->do("DELETE FROM $table WHERE id=$key");
+  }
+}
+
+sub _clearall {
+  my $self = shift;
+  my $dbh = $self->dbh;
+  for my $table ($self->all_tables) {
+    $dbh->do("DELETE FROM $table");
+  }
+}
+
+sub _featurecount {
+  my $self = shift;
+  my $dbh = $self->dbh;
+  my $features = $self->_feature_table;
+  my $query = <<END;
+SELECT count(*) FROM $features
+END
+  my $sth=$self->_prepare($query);
+  $sth->execute();
+  my ($count) = $sth->fetchrow_array;
+  $sth->finish;
+  $count;
+}
+
+sub _seq_ids {
+  my $self = shift;
+  my $dbh = $self->dbh;
+  my $location = $self->_locationlist_table;
+  my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
+  $sth->execute() or $self->throw($sth->errstr);
+  my @result;
+  while (my ($id) = $sth->fetchrow_array) {
+    push @result,$id;
+  }
+  return @result;
+}
+
+sub setting {
+  my $self = shift;
+  my ($variable_name,$value) = @_;
+  my $meta  = $self->_meta_table;
+
+  if (defined $value && $self->writeable) {
+    my $query = <<END;
+REPLACE INTO $meta (name,value) VALUES (?,?)
+END
+    my $sth = $self->_prepare($query);
+    $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
+    $sth->finish;
+    $self->{settings_cache}{$variable_name} = $value;
+  }
+  else {
+    return $self->{settings_cache}{$variable_name} if exists $self->{settings_cache}{$variable_name};
+    my $query = <<END;
+SELECT value FROM $meta as m WHERE m.name=?
+END
+    my $sth = $self->_prepare($query);
+    $sth->execute($variable_name) or $self->throw($sth->errstr);
+    my ($value) = $sth->fetchrow_array;
+    $sth->finish;
+    return $self->{settings_cache}{$variable_name} = $value;
+  }
+}
+
+###
+# Replace Bio::SeqFeatureI into database.
+#
+sub replace {
+  my $self       = shift;
+  my $object     = shift;
+  my $index_flag = shift || undef;
+
+  # ?? shouldn't need to do this
+  # $self->_load_class($object);
+  my $id = $object->primary_id;
+  my $features = $self->_feature_table;
+
+  my $sth = $self->_prepare(<<END);
+REPLACE INTO $features (id,object,indexed,seqid,start,end,strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)
+END
+
+  my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
+
+  my $primary_tag = $object->primary_tag;
+  my $source_tag  = $object->source_tag || '';
+  $primary_tag    .= ":$source_tag";
+  my $typeid   = $self->_typeid($primary_tag,1);
+
+  $sth->execute($id,$self->freeze($object),$index_flag||0, at location,$typeid) or $self->throw($sth->errstr);
+
+  my $dbh = $self->dbh;
+  $object->primary_id($dbh->{mysql_insertid}) unless defined $id;
+
+  $self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
+}
+
+###
+# Insert one Bio::SeqFeatureI into database. primary_id must be undef
+#
+sub insert {
+  my $self = shift;
+  my $object = shift;
+  my $index_flag = shift || 0;
+
+  $self->_load_class($object);
+  defined $object->primary_id and $self->throw("$object already has a primary id");
+
+  my $features = $self->_feature_table;
+  my $sth = $self->_prepare(<<END);
+INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
+END
+  $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
+  my $dbh = $self->dbh;
+  $object->primary_id($dbh->{mysql_insertid});
+  $self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
+}
+
+###
+# Insert a bit of DNA or protein into the database
+#
+sub _insert_sequence {
+  my $self = shift;
+  my ($seqid,$seq,$offset) = @_;
+  my $id = $self->_locationid($seqid);
+  my $seqtable = $self->_sequence_table;
+  my $sth = $self->_prepare(<<END);
+REPLACE INTO $seqtable (id,offset,sequence) VALUES (?,?,?)
+END
+  $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
+}
+
+###
+# This subroutine flags the given primary ID for later reindexing
+#
+sub flag_for_indexing {
+  my $self = shift;
+  my $id   = shift;
+  my $needs_updating = $self->_update_table;
+  my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
+  $sth->execute($id) or $self->throw($self->dbh->errstr);
+}
+
+###
+# Update indexes for given object
+#
+sub _update_indexes {
+  my $self = shift;
+  my $obj  = shift;
+  defined (my $id   = $obj->primary_id) or return;
+
+  if ($self->{bulk_update_in_progress}) {
+    $self->_dump_update_name_index($obj,$id);
+    $self->_dump_update_attribute_index($obj,$id);
+  } else {
+    $self->_update_name_index($obj,$id);
+    $self->_update_attribute_index($obj,$id);
+  }
+}
+
+sub _update_name_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  my $name = $self->_name_table;
+  my $primary_id = $obj->primary_id;
+
+  $self->_delete_index($name,$id);
+  my ($names,$aliases) = $self->feature_names($obj);
+
+  my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
+
+  $sth->execute($id,$_,1) or $self->throw($sth->errstr)   foreach @$names;
+  $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @$aliases;
+  $sth->finish;
+}
+
+sub _update_attribute_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  my $attribute = $self->_attribute_table;
+  $self->_delete_index($attribute,$id);
+
+  my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
+  for my $tag ($obj->all_tags) {
+    my $tagid = $self->_attributeid($tag);
+    for my $value ($obj->each_tag_value($tag)) {
+      $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
+    }
+  }
+  $sth->finish;
+}
+
+sub _genericid {
+  my $self = shift;
+  my ($table,$namefield,$name,$add_if_missing) = @_;
+  my $qualified_table = $self->_qualify($table);
+  my $sth = $self->_prepare(<<END);
+SELECT id FROM $qualified_table WHERE $namefield=?
+END
+  $sth->execute($name) or die $sth->errstr;
+  my ($id) = $sth->fetchrow_array;
+  $sth->finish;
+  return $id if defined $id;
+  return     unless $add_if_missing;
+
+  $sth = $self->_prepare(<<END);
+INSERT INTO $qualified_table ($namefield) VALUES (?)
+END
+  $sth->execute($name) or die $sth->errstr;
+  my $dbh = $self->dbh;
+  return $dbh->{mysql_insertid};
+}
+
+sub _typeid {
+  shift->_genericid('typelist','tag',shift,1);
+}
+sub _locationid {
+  shift->_genericid('locationlist','seqname',shift,1);
+}
+sub _attributeid {
+  shift->_genericid('attributelist','tag',shift,1);
+}
+
+sub _get_location_and_bin {
+  my $self = shift;
+  my $feature = shift;
+  my $seqid   = $self->_locationid($feature->seq_id);
+  my $start   = $feature->start;
+  my $end     = $feature->end;
+  my $strand  = $feature->strand || 0;
+  my ($tier,$bin) = $self->get_bin($start,$end);
+  return ($seqid,$start,$end,$strand,$tier,$bin);
+}
+
+sub get_bin {
+  my $self = shift;
+  my ($start,$end) = @_;
+  my $binsize = MIN_BIN;
+  my ($bin_start,$bin_end,$tier);
+  $tier = 0;
+  while (1) {
+    $bin_start = int $start/$binsize;
+    $bin_end   = int $end/$binsize;
+    last if $bin_start == $bin_end;
+    $binsize *= 10;
+    $tier++;
+  }
+  return ($tier,$bin_start);
+}
+
+sub bin_where {
+  my $self = shift;
+  my ($start,$end,$f) = @_;
+  my (@bins, at args);
+
+  my $tier         = 0;
+  my $binsize      = MIN_BIN;
+  while ($binsize <= MAX_BIN) {
+    my $bin_start = int($start/$binsize);
+    my $bin_end   = int($end/$binsize);
+    push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
+    push @args,($tier,$bin_start,$bin_end);
+    $binsize *= 10;
+    $tier++;
+  }
+  my $query = join ("\n\t OR ", at bins);
+  return wantarray ? ($query, at args) : substitute($query, at args);
+}
+
+
+sub _delete_index {
+  my $self = shift;
+  my ($table_name,$id) = @_;
+  return if $self->{reindexing};
+  my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
+  $sth->execute($id);
+}
+
+# given a statement handler that is expected to return rows of (id,object)
+# unthaw each object and return a list of 'em
+sub _sth2objs {
+  my $self = shift;
+  my $sth  = shift;
+  my @result;
+  while (my ($id,$o) = $sth->fetchrow_array) {
+    my $obj = $self->thaw($o,$id);
+    push @result,$obj;
+  }
+  $sth->finish;
+  return @result;
+}
+
+# given a statement handler that is expected to return rows of (id,object)
+# unthaw each object and return a list of 'em
+sub _sth2obj {
+  my $self = shift;
+  my $sth  = shift;
+  my ($id,$o) = $sth->fetchrow_array;
+  return unless $o;
+  my $obj = $self->thaw($o,$id);
+  $obj;
+}
+
+sub _prepare {
+  my $self = shift;
+  my $query = shift;
+  my $dbh   = $self->dbh;
+  my $sth   = $dbh->prepare_cached($query) or $self->throw($dbh->errstr);
+  $sth;
+}
+
+
+####################################################################################################
+# SQL Fragment generators
+####################################################################################################
+
+sub _feature_table       {  shift->_qualify('feature')  }
+sub _location_table      {  shift->_qualify('location') }
+sub _locationlist_table  {  shift->_qualify('locationlist') }
+sub _type_table          {  shift->_qualify('feature')     }
+sub _typelist_table      {  shift->_qualify('typelist') }
+sub _name_table          {  shift->_qualify('name')     }
+sub _attribute_table     {  shift->_qualify('attribute')}
+sub _attributelist_table {  shift->_qualify('attributelist')}
+sub _parent2child_table  {  shift->_qualify('parent2child')}
+sub _meta_table          {  shift->_qualify('meta')}
+sub _update_table        {  shift->_qualify('update_table')}
+sub _sequence_table      {  shift->_qualify('sequence')}
+
+sub _make_attribute_where {
+  my $self                     = shift;
+  my ($attributetable,$attributenametable,$attributes) = @_;
+  my @args;
+  my @sql;
+  my $dbh = $self->dbh;
+  foreach (keys %$attributes) {
+    my @match_values;
+    my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @{$attributes->{$_}} : $attributes->{$_};
+    foreach (@values) {  # convert * into % for wildcard matches
+      s/\*/%/g;
+    }
+    my $match  = join ' OR ',map {
+      /%/ ? "$attributetable.attribute_value LIKE ?"
+	  : "$attributetable.attribute_value=?"
+    } @values;
+    push @sql,"($attributenametable.tag=? AND ($match))";
+    push @args,($_, at values);
+  }
+  return (join(' OR ', at sql), at args);
+}
+
+sub _make_attribute_group {
+  my $self                     = shift;
+  my ($table_name,$attributes) = @_;
+  my $key_count = keys %$attributes or return;
+  return "f.id HAVING count(f.id)>?",$key_count-1;
+}
+
+sub _print_query {
+  my $self = shift;
+  my ($query, at args) = @_;
+  while ($query =~ /\?/) {
+    my $arg = $self->dbh->quote(shift @args);
+    $query =~ s/\?/$arg/;
+  }
+  warn $query,"\n";
+}
+
+###
+# special-purpose store for bulk loading - write to a file rather than to the db
+#
+sub _dump_store {
+  my $self    = shift;
+  my $indexed = shift;
+
+  my $count = 0;
+  my $store_fh = $self->dump_filehandle('feature');
+  my $dbh      = $self->dbh;
+
+  my $autoindex = $self->autoindex;
+
+  for my $obj (@_) {
+    my $id       = $self->next_id;
+    my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
+    my $primary_tag = $obj->primary_tag;
+    my $source_tag  = $obj->source_tag || '';
+    $primary_tag    .= ":$source_tag";
+    my $typeid   = $self->_typeid($primary_tag,1);
+
+    print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$dbh->quote($self->freeze($obj))),"\n";
+    $obj->primary_id($id);
+    $self->_update_indexes($obj) if $indexed && $autoindex;
+    $count++;
+  }
+
+  # remember whether we are have ever stored a non-indexed feature
+  unless ($indexed or $self->{indexed_flag}++) {
+    $self->subfeatures_are_indexed(0);
+  }
+  $count;
+}
+
+sub _dump_add_SeqFeature {
+  my $self     = shift;
+  my $parent   = shift;
+  my @children = @_;
+
+  my $dbh = $self->dbh;
+  my $fh = $self->dump_filehandle('parent2child');
+  my $parent_id = (ref $parent ? $parent->primary_id : $parent) 
+    or $self->throw("$parent should have a primary_id");
+  my $count = 0;
+
+  for my $child_id (@children) {
+    print $fh join("\t",$parent_id,$child_id),"\n";
+    $count++;
+  }
+  $count;
+}
+
+sub _dump_update_name_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  my $fh      = $self->dump_filehandle('name');
+  my $dbh     = $self->dbh;
+  my ($names,$aliases) = $self->feature_names($obj);
+  print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
+  print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
+}
+
+sub _dump_update_attribute_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  my $fh        = $self->dump_filehandle('attribute');
+  my $dbh       = $self->dbh;
+  for my $tag ($obj->all_tags) {
+    my $tagid = $self->_attributeid($tag);
+    for my $value ($obj->each_tag_value($tag)) {
+      print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
+    }
+  }
+}
+
+sub time {
+  return Time::HiRes::time() if Time::HiRes->can('time');
+  return time();
+}
+
+sub DESTROY {
+  my $self = shift;
+  if ($self->{bulk_update_in_progress}) {  # be sure to remove temp files
+    for my $table ('feature',$self->index_tables) {
+      my $path = $self->dump_path($table);
+      unlink $path;
+    }
+  }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/GFF3Loader.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/GFF3Loader.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/GFF3Loader.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,983 @@
+package Bio::DB::SeqFeature::Store::GFF3Loader;
+
+# $Id: GFF3Loader.pm,v 1.15.4.3 2006/10/02 23:10:17 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::GFF3Loader -- GFF3 file loader for Bio::DB::SeqFeature::Store
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqFeature::Store;
+
+  # Open the sequence database
+  my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
+                                                 -dsn     => 'dbi:mysql:test',
+                                                 -write   => 1 );
+
+  my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $db,
+							   -verbose  => 1,
+							   -fast     => 1);
+
+  $loader->load('./my_genome.gff3');
+
+
+=head1 DESCRIPTION
+
+The Bio::DB::SeqFeature::Store::GFF3Loader object parsers GFF3-format
+sequence annotation files and loads Bio::DB::SeqFeature::Store
+databases. For certain combinations of SeqFeature classes and
+SeqFeature::Store databases it features a "fast load" mode which will
+greatly accelerate the loading of GFF3 databases by a factor of 5-10.
+
+The GFF3 file format has been extended very slightly to accomodate
+Bio::DB::SeqFeature::Store. First, the loader recognizes is a new
+directive:
+
+  # #index-subfeatures [0|1]
+
+Note that you can place a space between the two #'s in order to
+prevent GFF3 validators from complaining.
+
+If this is true, then subfeatures are indexed (the default) so that
+they can be retrieved with a query. See L<Bio::DB::SeqFeature::Store>
+for an explanation of this. If false, then subfeatures can only be
+accessed through their parent feature. The default is to index all
+subfeatures.
+
+Second, the loader recognizes a new attribute tag called index, which
+if present, controls indexing of the current feature. Example:
+
+ ctg123	. TF_binding_site 1000 1012 . + . ID=tfbs00001;index=1
+
+You can use this to turn indexing on and off, overriding the default
+for a particular feature.
+
+=cut
+
+
+# load utility - incrementally load the store based on GFF3 file
+#
+# two modes:
+#   slow mode -- features can occur in any order in the GFF3 file
+#   fast mode -- all features with same ID must be contiguous in GFF3 file
+
+use strict;
+use Carp 'croak';
+use IO::File;
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::SeqFeature::Store;
+use File::Spec;
+use base 'Bio::Root::Root';
+
+use constant DEFAULT_SEQ_CHUNK_SIZE => 2000;
+
+my %Special_attributes =(
+			 Gap    => 1, Target => 1,
+			 Parent => 1, Name   => 1,
+			 Alias  => 1, ID     => 1,
+			 index  => 1, Index  => 1,
+			);
+my %Strandedness = ( '+'  => 1,
+		     '-'  => -1,
+		     '.'  => 0,
+		     ''   => 0,
+		     0    => 0,
+		     1    => 1,
+		     -1   => -1,
+		     +1   => 1,
+		     undef => 0,
+		   );
+
+=head2 new
+
+ Title   : new
+ Usage   : $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(@options)
+ Function: create a new parser
+ Returns : a Bio::DB::SeqFeature::Store::GFF3Loader gff3 parser and loader
+ Args    : several - see below
+ Status  : public
+
+This method creates a new GFF3 loader and establishes its connection
+with a Bio::DB::SeqFeature::Store database. Arguments are -name=E<gt>$value
+pairs as described in this table:
+
+ Name               Value
+ ----               -----
+
+ -store             A writeable Bio::DB::SeqFeature::Store database handle.
+
+ -seqfeature_class  The name of the type of Bio::SeqFeatureI object to create
+                      and store in the database (Bio::DB::SeqFeature by default)
+
+ -sf_class          A shorter alias for -seqfeature_class
+
+ -verbose           Send progress information to standard error.
+
+ -fast              If true, activate fast loading (see below)
+
+ -chunk_size        Set the storage chunk size for nucleotide/protein sequences
+                       (default 2000 bytes)
+
+ -tmp               Indicate a temporary directory to use when loading non-normalized
+                       features.
+
+When you call new(), a connection to a Bio::DB::SeqFeature::Store
+database should already have been established and the database
+initialized (if appropriate).
+
+Some combinations of Bio::SeqFeatures and Bio::DB::SeqFeature::Store
+databases support a fast loading mode. Currently the only reliable
+implementation of fast loading is the combination of DBI::mysql with
+Bio::DB::SeqFeature. The other important restriction on fast loading
+is the requirement that a feature that contains subfeatures must occur
+in the GFF3 file before any of its subfeatures. Otherwise the
+subfeatures that occurred before the parent feature will not be
+attached to the parent correctly. This restriction does not apply to
+normal (slow) loading.
+
+If you use an unnormalized feature class, such as
+Bio::SeqFeature::Generic, then the loader needs to create a temporary
+database in which to cache features until all their parts and subparts
+have been seen. This temporary databases uses the "bdb" adaptor. The
+-tmp option specifies the directory in which that database will be
+created. If not present, it defaults to the system default tmp
+directory specified by File::Spec-E<gt>tmpdir().
+
+The -chunk_size option allows you to tune the representation of
+DNA/Protein sequence in the Store database. By default, sequences are
+split into 2000 base/residue chunks and then reassembled as
+needed. This avoids the problem of pulling a whole chromosome into
+memory in order to fetch a short subsequence from somewhere in the
+middle. Depending on your usage patterns, you may wish to tune this
+parameter using a chunk size that is larger or smaller than the
+default.
+
+=cut
+
+sub new {
+  my $self = shift;
+  my ($store,$seqfeature_class,$tmpdir,$verbose,$fast,$seq_chunk_size) = rearrange(['STORE',
+										    ['SF_CLASS','SEQFEATURE_CLASS'],
+										    ['TMP','TMPDIR'],
+										    'VERBOSE',
+										    'FAST',
+										    'CHUNK_SIZE',
+										   ], at _);
+
+  $seqfeature_class ||= $self->default_seqfeature_class;
+  eval "require $seqfeature_class" unless $seqfeature_class->can('new');
+  $self->throw($@) if $@;
+
+  my $normalized = $seqfeature_class->can('subfeatures_are_normalized')
+    && $seqfeature_class->subfeatures_are_normalized;
+
+  my $in_table = $seqfeature_class->can('subfeatures_are_stored_in_a_table')
+    && $seqfeature_class->subfeatures_are_stored_in_a_table;
+
+  if ($fast) {
+    my $canfast = $normalized && $in_table;
+    warn <<END unless $canfast;
+Only features that support the Bio::DB::SeqFeature::NormalizedTableFeature interface
+can be loaded using the -fast method. Reverting to slower feature-by-feature method.
+END
+    $fast &&= $canfast;
+  }
+
+  # try to bring in highres time() function
+  eval "require Time::HiRes";
+
+  $tmpdir ||= File::Spec->tmpdir();
+
+  my $tmp_store = Bio::DB::SeqFeature::Store->new(-adaptor  => 'berkeleydb',
+						  -temporary=> 1,
+						  -dsn      => $tmpdir,
+						  -cache    => 1,
+						  -write    => 1)
+    unless $normalized;
+
+  return bless {
+		store            => $store,
+		tmp_store        => $tmp_store,
+		seqfeature_class => $seqfeature_class,
+		fast             => $fast,
+		seq_chunk_size   => $seq_chunk_size || DEFAULT_SEQ_CHUNK_SIZE,
+		verbose          => $verbose,
+		load_data        => {},
+		subfeatures_normalized => $normalized,
+		subfeatures_in_table   => $in_table,
+	       },ref($self) || $self;
+}
+
+=head2 load
+
+ Title   : load
+ Usage   : $count = $loader->load(@ARGV)
+ Function: load the indicated files or filehandles
+ Returns : number of feature lines loaded
+ Args    : list of files or filehandles
+ Status  : public
+
+Once the loader is created, invoke its load() method with a list of
+GFF3 or FASTA file paths or previously-opened filehandles in order to
+load them into the database. Compressed files ending with .gz, .Z and
+.bz2 are automatically recognized and uncompressed on the fly. Paths
+beginning with http: or ftp: are treated as URLs and opened using the
+LWP GET program (which must be on your path).
+
+FASTA files are recognized by their initial "E<gt>" character. Do not feed
+the loader a file that is neither GFF3 nor FASTA; I don't know what
+will happen, but it will probably not be what you expect.
+
+=cut
+
+sub load {
+  my $self       = shift;
+  my $start      = $self->time();
+  my $count = 0;
+
+  for my $file_or_fh (@_) {
+    $self->msg("loading $file_or_fh...\n");
+    my $fh = $self->open_fh($file_or_fh) or $self->throw("Couldn't open $file_or_fh: $!");
+    $count += $self->load_fh($fh);
+    $self->msg(sprintf "load time: %5.2fs\n",$self->time()-$start);
+  }
+  $count;
+}
+
+=head2 accessors
+
+The following read-only accessors return values passed or created during new():
+
+ store()          the long-term Bio::DB::SeqFeature::Store object
+
+ tmp_store()      the temporary Bio::DB::SeqFeature::Store object used
+                    during loading
+
+ sfclass()        the Bio::SeqFeatureI class
+
+ fast()           whether fast loading is active
+
+ seq_chunk_size() the sequence chunk size
+
+ verbose()        verbose progress messages
+
+=cut
+
+sub store          { shift->{store}            }
+sub tmp_store      { shift->{tmp_store}        }
+sub sfclass        { shift->{seqfeature_class} }
+sub fast           { shift->{fast}             }
+sub seq_chunk_size { shift->{seq_chunk_size}             }
+sub verbose        { shift->{verbose}          }
+
+=head2 Internal Methods
+
+The following methods are used internally and may be overidden by
+subclasses.
+
+=over 4
+
+=item default_seqfeature_class
+
+  $class = $loader->default_seqfeature_class
+
+Return the default SeqFeatureI class (Bio::DB::SeqFeature).
+
+=cut
+
+sub default_seqfeature_class {
+  my $self = shift;
+  return 'Bio::DB::SeqFeature';
+}
+
+=item subfeatures_normalized
+
+  $flag = $loader->subfeatures_normalized([$new_flag])
+
+Get or set a flag that indicates that the subfeatures are
+normalized. This is deduced from the SeqFeature class information.
+
+=cut
+
+sub subfeatures_normalized {
+  my $self = shift;
+  my $d    = $self->{subfeatures_normalized};
+  $self->{subfeatures_normalized} = shift if @_;
+  $d;
+}
+
+=item subfeatures_in_table
+
+  $flag = $loader->subfeatures_in_table([$new_flag])
+
+Get or set a flag that indicates that feature/subfeature relationships
+are stored in a table. This is deduced from the SeqFeature class and
+Store information.
+
+=cut
+
+sub subfeatures_in_table {
+  my $self = shift;
+  my $d    = $self->{subfeatures_in_table};
+  $self->{subfeatures_in_table} = shift if @_;
+  $d;
+}
+
+=item load_fh
+
+  $count = $loader->load_fh($filehandle)
+
+Load the GFF3 data at the other end of the filehandle and return true
+if successful. Internally, load_fh() invokes:
+
+  start_load();
+  do_load($filehandle);
+  finish_load();
+
+=cut
+
+sub load_fh {
+  my $self = shift;
+  my $fh   = shift;
+  $self->start_load();
+  my $count = $self->do_load($fh);
+  $self->finish_load();
+  $count;
+}
+
+
+=item start_load, finish_load
+
+These methods are called at the start and end of a filehandle load.
+
+=cut
+
+sub start_load {
+  my $self = shift;
+  $self->{load_data}{Parent2Child}     = {};
+  $self->{load_data}{Local2GlobalID}   = {};
+  $self->{load_data}{TemporaryID}      = "GFFLoad0000000";
+  $self->{load_data}{IndexSubfeatures} = 1;
+  $self->{load_data}{CurrentFeature}   = undef;
+  $self->{load_data}{CurrentID}        = undef;
+  $self->store->start_bulk_update() if $self->fast;
+}
+
+sub finish_load {
+  my $self  = shift;
+
+  $self->msg("Building object tree...");
+  my $start = $self->time();
+  $self->build_object_tree;
+  $self->msg(sprintf "%5.2fs\n",$self->time()-$start);
+
+  if ($self->fast) {
+    $self->msg("Loading bulk data into database...");
+    $start = $self->time();
+    $self->store->finish_bulk_update;
+    $self->msg(sprintf "%5.2fs\n",$self->time()-$start);
+  }
+  eval {$self->store->commit};
+  delete $self->{load_data};
+}
+
+=item do_load
+
+  $count = $loader->do_load($fh)
+
+This is called by load_fh() to load the GFF3 file's filehandle and
+return the number of lines loaded.
+
+=cut
+
+sub do_load {
+  my $self = shift;
+  my $fh   = shift;
+
+  my $start = $self->time();
+  my $count = 0;
+  my $mode  = 'gff';  # or 'fasta'
+
+  while (<$fh>) {
+    chomp;
+
+    next unless /^\S/;     # blank line
+    $mode = 'gff' if /\t/;  # if it has a tab in it, switch to gff mode
+
+    if (/^\#\s?\#\s*(.+)/) {  ## meta instruction
+      $mode = 'gff';
+      $self->handle_meta($1);
+
+    } elsif (/^\#/) {
+      $mode = 'gff';  # just to be safe
+      next;  # comment
+    }
+
+    elsif (/^>\s*(\S+)/) { # FASTA lines are coming
+      $mode = 'fasta';
+      $self->start_or_finish_sequence($1);
+    }
+
+    elsif ($mode eq 'fasta') {
+      $self->load_sequence($_);
+    }
+
+    elsif ($mode eq 'gff') {
+      $self->handle_feature($_);
+      if (++$count % 1000 == 0) {
+	my $now = $self->time();
+	my $nl = -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+	$self->msg(sprintf("%d features loaded in %5.2fs...$nl",$count,$now - $start));
+	$start = $now;
+      }
+    }
+
+    else {
+      $self->throw("I don't know what to do with this line:\n$_");
+    }
+  }
+  $self->store_current_feature();      # during fast loading, we will have a feature left at the very end
+  $self->start_or_finish_sequence();   # finish any half-loaded sequences
+  $self->msg(' 'x80,"\n"); #clear screen
+  $count;
+}
+
+=item handle_meta
+
+  $loader->handle_meta($meta_directive)
+
+This method is called to handle meta-directives such as
+##sequence-region. The method will receive the directive with the
+initial ## stripped off.
+
+=cut
+
+sub handle_meta {
+  my $self = shift;
+  my $instruction = shift;
+
+  if ($instruction =~ /sequence-region\s+(.+)\s+(-?\d+)\s+(-?\d+)/i) {
+    my $feature = $self->sfclass->new(-name        => $1,
+				      -seq_id      => $1,
+				      -start       => $2,
+				      -end         => $3,
+				      -primary_tag => 'region');
+    $self->store->store($feature);
+    return;
+  }
+
+  if ($instruction =~/index-subfeatures\s+(\S+)/i) {
+    $self->{load_data}{IndexSubfeatures} = $1;
+    $self->store->index_subfeatures($1);
+    return;
+  }
+}
+
+=item handle_feature
+
+  $loader->handle_feature($gff3_line)
+
+This method is called to process a single GFF3 line. It manipulates
+information stored a data structure called $self-E<gt>{load_data}.
+
+=cut
+
+sub handle_feature {
+  my $self     = shift;
+  my $gff_line = shift;
+  my $ld       = $self->{load_data};
+
+  my @columns = map {$_ eq '.' ? undef : $_ } split /\t/,$gff_line;
+  return unless @columns >= 8;
+  my ($refname,$source,$method,$start,$end, $score,$strand,$phase,$attributes)      = @columns;
+  $strand = $Strandedness{$strand||0};
+
+  my ($reserved,$unreserved) = $self->parse_attributes($attributes);
+
+  my $name        = ($reserved->{Name}   && $reserved->{Name}[0]);
+
+  my $has_loadid  = defined $reserved->{ID}[0];
+
+  my $feature_id  = $reserved->{ID}[0] || $ld->{TemporaryID}++;
+  my @parent_ids  = @{$reserved->{Parent}} if $reserved->{Parent};
+
+  my $index_it = $ld->{IndexSubfeatures};
+  if (exists $reserved->{Index} || exists $reserved->{index}) {
+    $index_it = $reserved->{Index}[0] || $reserved->{index}[0];
+  }
+
+  # Everything in the unreserved hash becomes an attribute, so we copy
+  # some attributes over
+  $unreserved->{Note}   = $reserved->{Note}   if exists $reserved->{Note};
+  $unreserved->{Alias}  = $reserved->{Alias}  if exists $reserved->{Alias};
+  $unreserved->{Target} = $reserved->{Target} if exists $reserved->{Target};
+  $unreserved->{Gap}    = $reserved->{Gap}    if exists $reserved->{Gap};
+  $unreserved->{load_id}= $reserved->{ID}     if exists $reserved->{ID};
+
+  # TEMPORARY HACKS TO SIMPLIFY DEBUGGING
+  push @{$unreserved->{Alias}},$feature_id  if $has_loadid;
+  $unreserved->{parent_id} = \@parent_ids   if @parent_ids;
+
+  # POSSIBLY A PERMANENT HACK -- TARGETS BECOME ALIASES
+  # THIS IS TO ALLOW FOR TARGET-BASED LOOKUPS
+  if (exists $reserved->{Target}) {
+    my %aliases = map {$_=>1} @{$unreserved->{Alias}};
+    for my $t (@{$reserved->{Target}}) {
+      (my $tc = $t) =~ s/\s+.*$//;  # get rid of coordinates
+      $name ||= $tc;
+      push @{$unreserved->{Alias}},$tc unless $name eq $tc || $aliases{$tc};
+    }
+  }
+
+  my @args = (-display_name => $name,
+	      -seq_id       => $refname,
+	      -start        => $start,
+	      -end          => $end,
+	      -strand       => $strand || 0,
+	      -score        => $score,
+	      -phase        => $phase,
+	      -primary_tag  => $method || 'feature',
+	      -source       => $source,
+	      -tag          => $unreserved,
+	      -attributes   => $unreserved,
+	     );
+
+  # Here's where we handle feature lines that have the same ID (multiple locations, not
+  # parent/child relationships)
+
+  my $old_feat;
+
+  # Current feature is the same as the previous feature, which hasn't yet been loaded
+  if (defined $ld->{CurrentID} && $ld->{CurrentID} eq $feature_id) {
+    $old_feat = $ld->{CurrentFeature};
+  }
+
+  # Current feature is the same as a feature that was loaded earlier
+  elsif (my $id = $self->{load_data}{Local2GlobalID}{$feature_id}) {
+    $old_feat = $self->fetch($feature_id)
+      or $self->warn(<<END);
+ID=$feature_id has been used more than once, but it cannot be found in the database.
+This can happen if you have specified fast loading, but features sharing the same ID
+are not contiguous in the GFF file. This will be loaded as a separate feature.
+Line $.: "$_"
+END
+  }
+
+  # contiguous feature, so add a segment
+  if (defined $old_feat) {
+    $self->add_segment($old_feat,$self->sfclass->new(@args));
+    return;
+  }
+
+  # we get here if this is a new feature
+  # first of all, store the current feature if it is there
+  $self->store_current_feature() if defined $ld->{CurrentID};
+
+  # now create the new feature
+  # (index top-level features only if policy asks us to)
+  my $feature = $self->sfclass->new(@args);
+  $feature->object_store($self->store) if $feature->can('object_store');  # for lazy table features
+  $ld->{CurrentFeature} = $feature;
+  $ld->{CurrentID}      = $feature_id;
+
+  my $top_level = !@parent_ids;
+  my $has_id    = defined $reserved->{ID}[0];
+  $index_it   ||= $top_level;
+
+  $ld->{IndexIt}{$feature_id}++    if $index_it;
+  $ld->{TopLevel}{$feature_id}++   if !$self->{fast} && $top_level;  # need to track top level features
+
+  # remember parentage
+  for my $parent (@parent_ids) {
+    push @{$ld->{Parent2Child}{$parent}},$feature_id;
+  }
+
+}
+
+=item store_current_feature
+
+  $loader->store_current_feature()
+
+This method is called to store the currently active feature in the
+database. It uses a data structure stored in $self-E<gt>{load_data}.
+
+=cut
+
+
+sub store_current_feature {
+  my $self    = shift;
+
+  my $ld   = $self->{load_data};
+  defined $ld->{CurrentFeature} or return;
+  my $f    = $ld->{CurrentFeature};
+
+  my $normalized = $self->subfeatures_normalized;
+  my $indexed    = $ld->{IndexIt}{$ld->{CurrentID}};
+
+  # logic is as follows:
+  # 1. If the feature is an indexed feature, then we store it into the main database
+  #    so that it can be searched. It doesn't matter whether it is a top-level feature
+  #    or a subfeature.
+  # 2. If the feature class is normalized, but not indexed, then we store it into the
+  #    main database using the "no_index" method. This will make it accessible to
+  #    queries on the top level parent, but it won't come up by itself in range or
+  #    attribute searches.
+  # 3. Otherwise, this is an unindexed subfeature; we store it in the temporary database
+  #    until the object build step, at which point it gets integrated into its object tree
+  #    and copied into the main database.
+
+  if ($indexed) {
+    $self->store->store($f);
+  }
+
+  elsif ($normalized) {
+    $self->store->store_noindex($f)
+  }
+
+  else {
+    $self->tmp_store->store_noindex($f)
+  }
+	
+  my $id        = $f->primary_id;    # assigned by store()
+  $ld->{Local2GlobalID}{$ld->{CurrentID}} = $id;
+
+  undef $ld->{IndexIt}{$ld->{CurrentID}} if $normalized;  # no need to remember this
+  undef $ld->{CurrentID};
+  undef $ld->{CurrentFeature};
+}
+
+=item build_object_tree
+
+ $loader->build_object_tree()
+
+This method gathers together features and subfeatures and builds the graph that connects them.
+
+=cut
+
+###
+# put objects together
+#
+sub build_object_tree {
+  my $self = shift;
+  $self->subfeatures_in_table ? $self->build_object_tree_in_tables : $self->build_object_tree_in_features;
+}
+
+=item build_object_tree_in_tables
+
+ $loader->build_object_tree_in_tables()
+
+This method gathers together features and subfeatures and builds the
+graph that connects them, assuming that parent/child relationships
+will be stored in a database table.
+
+=cut
+
+sub build_object_tree_in_tables {
+  my $self = shift;
+  my $store = $self->store;
+  my $ld    = $self->{load_data};
+
+  while (my ($load_id,$children) = each %{$ld->{Parent2Child}}) {
+    my $parent_id = $ld->{Local2GlobalID}{$load_id} or $self->throw("$load_id doesn't have a primary id");
+    my @children  = map {$ld->{Local2GlobalID}{$_}} @$children;
+
+    # this updates the table that keeps track of parent/child relationships,
+    # but does not update the parent object -- so (start,end) had better be right!!!
+    $store->add_SeqFeature($parent_id, at children);
+  }
+
+}
+
+=item build_object_tree_in_features
+
+ $loader->build_object_tree_in_features()
+
+This method gathers together features and subfeatures and builds the
+graph that connects them, assuming that parent/child relationships are
+stored in the seqfeature objects themselves.
+
+=cut
+
+sub build_object_tree_in_features {
+  my $self  = shift;
+  my $store      = $self->store;
+  my $tmp        = $self->tmp_store;
+  my $ld         = $self->{load_data};
+  my $normalized = $self->subfeatures_normalized;
+
+  while (my ($load_id) = each %{$ld->{TopLevel}}) {
+    my $feature  = $self->fetch($load_id)
+      or $self->throw("$load_id (id=$ld->{Local2GlobalID}{$load_id}) should have a database entry, but doesn't");
+    $self->attach_children($store,$ld,$load_id,$feature);
+    $feature->primary_id(undef) unless $ld->{IndexIt}{$load_id};  # Indexed objects are updated, not created anew
+    $store->store($feature);
+  }
+
+}
+
+=item attach_children
+
+ $loader->attach_children($store,$load_data,$load_id,$feature)
+
+This recursively adds children to features and their subfeatures. It
+is called when subfeatures are directly contained within other
+features, rather than stored in a relational table.
+
+=cut
+
+sub attach_children {
+  my $self = shift;
+  my ($store,$ld,$load_id,$feature)  = @_;
+
+  my $children   = $ld->{Parent2Child}{$load_id} or return;
+  for my $child_id (@$children) {
+    my $child = $self->fetch($child_id)
+      or $self->throw("$child_id should have a database entry, but doesn't");
+    $self->attach_children($store,$ld,$child_id,$child);   # recursive call
+    $feature->add_SeqFeature($child);
+  }
+}
+
+=item fetch
+
+ my $feature = $loader->fetch($load_id)
+
+Given a load ID (from the ID= attribute) this method returns the
+feature from the temporary database or the permanent one, depending on
+where it is stored.
+
+=cut
+
+sub fetch {
+  my $self    = shift;
+  my $load_id = shift;
+  my $ld      = $self->{load_data};
+  my $id      = $ld->{Local2GlobalID}{$load_id};
+
+  return
+    $self->subfeatures_normalized || $ld->{IndexIt}{$load_id}
+      ? $self->store->fetch($id)
+      : $self->tmp_store->fetch($id);
+}
+
+=item add_segment
+
+ $loader->add_segment($parent,$child)
+
+This method is used to add a split location to the parent.
+
+=cut
+
+sub add_segment {
+  my $self = shift;
+  my ($parent,$child) = @_;
+
+  if ($parent->can('add_segment')) { # probably a lazy table feature
+    my $segment_count =  $parent->can('denormalized_segment_count') ? $parent->denormalized_segment_count
+                       : $parent->can('denormalized_segments ')     ? $parent->denormalized_segments
+		       : $parent->can('segments')                   ? $parent->segments
+		       : 0;
+    unless ($segment_count) {  # convert into a segmented object
+      my $segment;
+      if ($parent->can('clone')) {
+	$segment = $parent->clone;
+      } else {
+	my %clone   = %$parent;
+	$segment = bless \%clone,ref $parent;
+      }
+      delete $segment->{segments};
+      eval {$segment->object_store(undef) };
+      $segment->primary_id(undef);
+
+      # this updates the object and expands its start and end positions without writing
+      # the segments into the database as individual objects
+      $parent->add_segment($segment);
+    }
+    $parent->add_segment($child);
+    1; # for debugging
+  }
+
+  # a conventional Bio::SeqFeature::Generic object - create a split location
+  else {
+    my $current_location = $parent->location;
+    if ($current_location->can('add_sub_Location')) {
+      $current_location->add_sub_Location($child->location);
+    } else {
+      eval "require Bio::Location::Split" unless Bio::Location::Split->can('add_sub_Location');
+      my $new_location = Bio::Location::Split->new();
+      $new_location->add_sub_Location($current_location);
+      $new_location->add_sub_Location($child->location);
+      $parent->location($new_location);
+    }
+  }
+}
+
+=item parse_attributes
+
+ ($reserved,$unreserved) = $loader->parse_attributes($attribute_line)
+
+This method parses the information contained in the $attribute_line
+into two hashrefs, one containing the values of reserved attribute
+tags (e.g. ID) and the other containing the values of unreserved ones.
+
+=cut
+
+sub parse_attributes {
+  my $self = shift;
+  my $att  = shift;
+  my @pairs =  map {my ($name,$value) = split /=/; [unescape($name) => unescape($value)] } split /;/,$att;
+  my (%reserved,%unreserved);
+  foreach (@pairs) {
+    my $tag    = $_->[0];
+    my @values = split /,/,$_->[1];
+
+    if ($Special_attributes{$tag}) {  # reserved attribute
+      push @{$reserved{$tag}}, at values;
+    } else {
+      push @{$unreserved{$tag}}, at values
+    }
+  }
+  return (\%reserved,\%unreserved);
+}
+
+=item start_or_finish_sequence
+
+  $loader->start_or_finish_sequence('Chr9')
+
+This method is called at the beginning and end of a fasta section.
+
+=cut
+
+
+# this gets called at the beginning and end of a fasta section
+sub start_or_finish_sequence {
+  my $self  = shift;
+  my $seqid = shift;
+  if (my $sl    = $self->{fasta_load}) {
+    if (defined $sl->{seqid}) {
+      $self->store->insert_sequence($sl->{seqid},$sl->{sequence},$sl->{offset});
+      delete $self->{fasta_load};
+    }
+  }
+  if (defined $seqid) {
+    $self->{fasta_load} = {seqid  => $seqid,
+			   offset => 0,
+			   sequence => ''};
+  }
+}
+
+=item load_sequence
+
+  $loader->load_sequence('gatttcccaaa')
+
+This method is called to load some amount of sequence after
+start_or_finish_sequence() is first called.
+
+=cut
+
+sub load_sequence {
+  my $self = shift;
+  my $seq  = shift;
+  my $sl   = $self->{fasta_load} or return;
+  my $cs   = $self->seq_chunk_size;
+  $sl->{sequence} .= $seq;
+  while (length $sl->{sequence} >= $cs) {
+    my $chunk = substr($sl->{sequence},0,$cs);
+    $self->store->insert_sequence($sl->{seqid},$chunk,$sl->{offset});
+    $sl->{offset} += length $chunk;
+    substr($sl->{sequence},0,$cs) = '';
+  }
+}
+
+=item open_fh
+
+ my $io_file = $loader->open_fh($filehandle_or_path)
+
+This method opens up the indicated file or pipe, using some intelligence to recognized compressed files and URLs and doing 
+the right thing.
+
+=cut
+
+
+sub open_fh {
+  my $self  = shift;
+  my $thing = shift;
+
+  no strict 'refs';
+
+  return $thing                                  if defined fileno($thing);
+  return IO::File->new("gunzip -c $thing |")     if $thing =~ /\.gz$/;
+  return IO::File->new("uncompress -c $thing |") if $thing =~ /\.Z$/;
+  return IO::File->new("bunzip2 -c $thing |")    if $thing =~ /\.bz2$/;
+  return IO::File->new("GET $thing |")           if $thing =~ /^(http|ftp):/;
+  return IO::File->new($thing);
+}
+
+sub msg {
+  my $self = shift;
+  my @msg  = @_;
+  return unless $self->verbose;
+  print STDERR @msg;
+}
+
+=item time
+
+ my $time = $loader->time
+
+This method returns the current time in seconds, using Time::HiRes if available.
+
+=cut
+
+sub time {
+  return Time::HiRes::time() if Time::HiRes->can('time');
+  return time();
+}
+
+=item escape
+
+ my $unescaped = GFF3Loader::unescape($escaped)
+
+This is an internal utility.  It is the same as CGI::Util::unescape,
+but don't change pluses into spaces and ignores unicode escapes.
+
+=cut
+
+sub unescape {
+  my $todecode = shift;
+  $todecode =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
+  return $todecode;
+}
+
+1;
+__END__
+
+=back
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::NormalizedFeature>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/bdb.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/bdb.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/bdb.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,109 @@
+package Bio::DB::SeqFeature::Store::bdb;
+
+# $Id: bdb.pm,v 1.3.4.1 2006/10/02 23:10:17 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::bdb - fetch and store objects from a BerkeleyDB
+
+=cut
+
+use strict;
+use base 'Bio::DB::SeqFeature::Store';
+use Bio::DB::GFF::Util::Rearrange 'rearrange';
+use DB_File;
+use Fcntl qw(O_RDWR O_CREAT);
+use File::Temp 'tempdir';
+use File::Path 'rmtree';
+
+# this is a partial implementation -- just enough has been implemented so that we can
+# fetch and store objects. It is used as a temporary failsafe store by the GFF3Loader module
+
+###
+# object initialization
+#
+sub init {
+  my $self          = shift;
+  my ($directory,
+      $is_temporary) = rearrange([['DSN','DIR','DIRECTORY'],
+				 ['TMP','TEMP','TEMPORARY']
+				 ], at _);
+  $directory ||= $is_temporary ? File::Spec->tmpdir : '.';
+  $directory = tempdir(__PACKAGE__.'_XXXXXX',TMPDIR=>1,CLEANUP=>1,DIR=>$directory) if $is_temporary;
+  -d $directory && -w _ or $self->throw("Can't write into the directory $directory");
+  $self->default_settings;
+  $self->directory($directory);
+  $self->temporary($is_temporary);
+
+  my %h;
+  tie (%h,'DB_File',$self->path,O_RDWR|O_CREAT,0666,$DB_HASH) or $self->throw("Couldn't tie: $!");
+  $self->db(\%h);
+  $h{'.next_id'} ||= 1;
+}
+
+sub _store {
+  my $self = shift;
+  my $indexed = shift;
+  my $db   = $self->db;
+  my $count = 0;
+  for my $obj (@_) {
+    my $primary_id = $obj->primary_id;
+    $primary_id    = $db->{'.next_id'}++ unless defined $primary_id;
+    $db->{$primary_id} = $self->freeze($obj);
+    $obj->primary_id($primary_id);
+    $count++;
+  }
+  $count;
+}
+
+sub _update {
+  my $self = shift;
+  my ($object,$primary_id) = @_;
+  my $db = $self->db;
+  $self->throw("$object is not in database") unless exists $db->{$primary_id};
+  $db->{$primary_id} = $self->freeze($object);
+}
+
+sub _fetch {
+  my $self = shift;
+  my $id   = shift;
+  my $db = $self->db;
+  my $obj = $self->thaw($db->{$id},$id);
+  $obj;
+}
+
+sub db {
+  my $self = shift;
+  my $d = $self->setting('db');
+  $self->setting(db=>shift) if @_;
+  $d;
+}
+
+sub directory {
+  my $self = shift;
+  my $d = $self->setting('directory');
+  $self->setting(directory=>shift) if @_;
+  $d;
+}
+
+sub temporary {
+  my $self = shift;
+  my $d = $self->setting('temporary');
+  $self->setting(temporary=>shift) if @_;
+  $d;
+}
+
+sub path {
+  my $self = shift;
+  return $self->directory .'/' . 'feature.bdb';
+}
+
+sub DESTROY {
+  my $self = shift;
+  my $db   = $self->db;
+  warn "CLEANING UP";
+  untie %$db;
+  rmtree($self->directory,0,1) if $self->temporary;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/berkeleydb.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/berkeleydb.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/berkeleydb.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1197 @@
+package Bio::DB::SeqFeature::Store::berkeleydb;
+
+# $Id: berkeleydb.pm,v 1.5.4.5 2006/11/22 20:27:47 lstein Exp $
+
+
+use strict;
+use base 'Bio::DB::SeqFeature::Store';
+use Bio::DB::GFF::Util::Rearrange 'rearrange';
+use Bio::DB::Fasta;
+use DB_File;
+use Fcntl qw(O_RDWR O_CREAT);
+use File::Temp 'tempdir';
+use File::Path 'rmtree','mkpath';
+use constant BINSIZE => 10_000;
+use constant MININT  => -999_999_999_999;
+use constant MAXINT  => 999_999_999_999;
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::berkeleydb -- Storage and retrieval of sequence annotation data in Berkeleydb files
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqFeature::Store;
+
+  # Create a database from the feature files located in /home/fly4.3 and store
+  # the database index in the same directory:
+  $db =  Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                          -dir     => '/home/fly4.3');
+
+  # Create a database that will monitor the files in /home/fly4.3, but store
+  # the indexes in /var/databases/fly4.3
+  $db      = Bio::DB::SeqFeature::Store->new( -adaptor    => 'berkeleydb',
+                                              -dsn        => '/var/databases/fly4.3',
+                                              -dir        => '/home/fly4.3');
+
+  # Create a feature database from scratch
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dsn     => '/var/databases/fly4.3',
+                                             -create  => 1);
+
+  # get a feature from somewhere
+  my $feature = Bio::SeqFeature::Generic->new(...);
+
+  # store it
+  $db->store($feature) or die "Couldn't store!";
+
+  # primary ID of the feature is changed to indicate its primary ID
+  # in the database...
+  my $id = $feature->primary_id;
+
+  # get the feature back out
+  my $f  = $db->fetch($id);
+
+  # change the feature and update it
+  $f->start(100);
+  $db->update($f) or $self->throw("Couldn't update!");
+
+  # use the GFF3 loader to do a bulk write:
+  my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store   => $db,
+                                                           -verbose => 1,
+                                                           -fast    => 1);
+  $loader->load('/home/fly4.3/dmel-all.gff');
+
+
+  # searching...
+  # ...by id
+  my @features = $db->fetch_many(@list_of_ids);
+
+  # ...by name
+  @features = $db->get_features_by_name('ZK909');
+
+  # ...by alias
+  @features = $db->get_features_by_alias('sma-3');
+
+  # ...by type
+  @features = $db->get_features_by_type('gene');
+
+  # ...by location
+  @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
+
+  # ...by attribute
+  @features = $db->get_features_by_attribute({description => 'protein kinase'})
+
+  # ...by the GFF "Note" field
+  @result_list = $db->search_notes('kinase');
+
+  # ...by arbitrary combinations of selectors
+  @features = $db->features(-name => $name,
+                            -type => $types,
+                            -seq_id => $seqid,
+                            -start  => $start,
+                            -end    => $end,
+                            -attributes => $attributes);
+
+  # ...using an iterator
+  my $iterator = $db->get_seq_stream(-name => $name,
+                                     -type => $types,
+                                     -seq_id => $seqid,
+                                     -start  => $start,
+                                     -end    => $end,
+                                     -attributes => $attributes);
+
+  while (my $feature = $iterator->next_seq) {
+    # do something with the feature
+  }
+
+  # ...limiting the search to a particular region
+  my $segment  = $db->segment('Chr1',5000=>6000);
+  my @features = $segment->features(-type=>['mRNA','match']);
+
+  # getting & storing sequence information
+  # Warning: this returns a string, and not a PrimarySeq object
+  $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
+  my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
+
+  # create a new feature in the database
+  my $feature = $db->new_feature(-primary_tag => 'mRNA',
+                                 -seq_id      => 'chr3',
+                                 -start      => 10000,
+                                 -end        => 11000);
+
+=head1 DESCRIPTION
+
+Bio::DB::SeqFeature::Store::berkeleydb is the Berkeleydb adaptor for
+Bio::DB::SeqFeature::Store. You will not create it directly, but
+instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
+
+See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
+
+=head2 Using the berkeleydb adaptor
+
+The Berkeley database consists of a series of Berkeleydb index files,
+and a couple of special purpose indexes. You can create the index
+files from scratch by creating a new database and calling
+new_feature() repeatedly, you can create the database and then bulk
+populate it using the GFF3 loader, or you can monitor a directory of
+preexisting GFF3 and FASTA files and rebuild the indexes whenever one
+or more of the fiels changes. The last mode is probably the most
+convenient.
+
+=over 4
+
+=item The new() constructor
+
+The new() constructor method all the arguments recognized by
+Bio::DB::SeqFeature::Store, and a few additional ones. 
+
+Standard arguments:
+
+ Name               Value
+ ----               -----
+
+ -adaptor           The name of the Adaptor class (default DBI::mysql)
+
+ -serializer        The name of the serializer class (default Storable)
+
+ -index_subfeatures Whether or not to make subfeatures searchable
+                    (default true)
+
+ -cache             Activate LRU caching feature -- size of cache
+
+ -compress          Compresses features before storing them in database
+                    using Compress::Zlib
+
+Adaptor-specific arguments
+
+ Name               Value
+ ----               -----
+
+ -dsn               Where the index files are stored
+
+ -dir               Where the source (GFF3, FASTA) files are stored
+
+ -autoindex         An alias for -dir.
+
+ -write             Pass true to open the index files for writing.
+
+ -create            Pass true to create the index files if they don't exist
+                    (implies -write=>1)
+
+ -temp              Pass true to create temporary index files that will
+                    be deleted once the script exits.
+
+Examples:
+
+To create an empty database which will be populated using calls to
+store() or new_feature(), or which will be bulk-loaded using the GFF3
+loader:
+
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dsn     => '/var/databases/fly4.3',
+                                             -create  => 1);
+
+To open a preexisting database in read-only mode:
+
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dsn     => '/var/databases/fly4.3');
+
+To open a preexisting database in read/write (update) mode:
+
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dsn     => '/var/databases/fly4.3',
+                                             -write   => 1);
+
+To monitor a set of GFF3 and FASTA files located in a directory and
+create/update the database indexes as needed. The indexes will be
+stored in a new subdirectory named "indexes":
+
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dir     => '/var/databases/fly4.3');
+
+As above, but store the source files and index files in separate directories:
+
+  $db     = Bio::DB::SeqFeature::Store->new( -adaptor => 'berkeleydb',
+                                             -dsn     => '/var/databases/fly4.3',
+                                             -dir     => '/home/gff3_files/fly4.3');
+
+B<-autoindex> is an alias for B<-dir>.
+
+=back
+
+See L<Bio::DB::SeqFeature::Store> for all the access methods supported
+by this adaptor. The various methods for storing and updating features
+and sequences into the database are supported, but there is no
+locking. If two processes try to update the same database
+simultaneously, the database will likely become corrupted.
+
+=cut
+
+###
+# object initialization
+#
+sub init {
+  my $self          = shift;
+  my ($directory,
+      $autoindex,
+      $is_temporary,
+      $write,
+      $create,
+     ) = rearrange([['DSN','DB'],
+		   [qw(DIR AUTOINDEX)],
+		   ['TMP','TEMP','TEMPORARY'],
+		   [qw(WRITE WRITABLE)],
+		   'CREATE',
+		  ], at _);
+  if ($autoindex) {
+    -d $autoindex or $self->throw("Invalid directory $autoindex");
+    $directory ||= "$autoindex/indexes";
+  }
+  $directory ||= $is_temporary ? File::Spec->tmpdir : '.';
+  # 
+  my $pacname = __PACKAGE__;
+  if ($^O =~ /mswin/i) {
+    $pacname =~ s/:+/_/g;
+  }
+  $directory = tempdir($pacname.'_XXXXXX',
+		       TMPDIR=>1,
+		       CLEANUP=>1,
+		       DIR=>$directory) if $is_temporary;
+  mkpath($directory);
+  -d $directory or $self->throw("Invalid directory $directory");
+
+  $create++ if $is_temporary;
+  $write ||= $create;
+  $self->throw("Can't write into the directory $directory") 
+    if $write && !-w $directory;
+
+
+  $self->default_settings;
+  $self->directory($directory);
+  $self->temporary($is_temporary);
+  $self->_delete_databases()    if $create;
+  $self->_open_databases($write,$create,$autoindex);
+  $self->_permissions($write,$create);
+  return $self;
+}
+
+sub can_store_parentage { 1 }
+
+sub post_init {
+  my $self = shift;
+  my ($autodir) = rearrange([['DIR','AUTOINDEX']], at _);
+  return unless $autodir && -d $autodir;
+
+  my $maxtime   = 0;
+
+  opendir (my $D,$autodir) or $self->throw("Couldn't open directory $autodir for reading: $!");
+  my @reindex;
+  my $fasta_files_present;
+
+  while (defined (my $node = readdir($D))) {
+    next if $node =~ /^\./;
+    my $path      = "$autodir/$node";
+    next unless -f $path;
+
+    # skip fasta files - the Bio::DB::Fasta module indexes them on its own
+    if ($node =~ /\.(?:fa|fasta|dna)(?:\.gz)?$/) {
+      $fasta_files_present++;
+      next;
+    }
+
+    # skip index files
+    next if $node =~ /\.(?:bdb|idx|index|stamp)/;
+
+    # skip autosave files, etc
+    next if $node =~ /^\#/;
+    next if $node =~ /~$/;
+
+    my $mtime = _mtime(\*_);  # not a typo
+    $maxtime   = $mtime if $mtime > $maxtime;
+    push @reindex,$path;
+  }
+
+  close $D;
+
+  my $timestamp_time  = _mtime($self->_mtime_path) || 0;
+
+  if ($maxtime > $timestamp_time) {
+    warn "Reindexing... this may take a while.";
+    $self->_permissions(1,1);
+    $self->_close_databases();
+    $self->_open_databases(1,1);
+    require Bio::DB::SeqFeature::Store::GFF3Loader
+      unless Bio::DB::SeqFeature::Store::GFF3Loader->can('new');
+    my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $self,
+							     -sf_class => $self->seqfeature_class) 
+      or $self->throw("Couldn't create GFF3Loader");
+    $loader->load(@reindex);
+    $self->_touch_timestamp;
+  }
+
+  if ($fasta_files_present) {
+    my $dna_db = Bio::DB::Fasta->new($autodir);
+    $self->dna_db($dna_db);
+  }
+}
+
+sub _open_databases {
+  my $self = shift;
+  my ($write,$create,$ignore_errors) = @_;
+
+  my $directory  = $self->directory;
+  unless (-d $directory) {  # directory does not exist
+    $create or $self->throw("Directory $directory does not exist and you did not specify the -create flag");
+    mkpath($directory) or $self->throw("Couldn't create database directory $directory: $!");
+  }
+
+  my $flags = O_RDONLY;
+  $flags   |= O_RDWR  if $write;
+  $flags   |= O_CREAT if $create;
+
+  # Create the main database; this is a DB_HASH implementation
+  my %h;
+  my $result = tie (%h,'DB_File',$self->_features_path,$flags,0666,$DB_HASH);
+  unless ($result) {
+    return if $ignore_errors;  # autoindex set, so defer this
+    $self->throw("Couldn't tie: ".$self->_features_path . " $!");
+  }
+  if ($create) {
+    %h = ();
+    $h{'.next_id'} = 1;
+  }
+  $self->db(\%h);
+
+  # Create the index databases; these are DB_BTREE implementations with duplicates allowed.
+  local $DB_BTREE->{flags} = R_DUP;
+  $DB_BTREE->{compare}     = sub { lc($_[0]) cmp lc($_[1]) };
+  for my $idx ($self->_index_files) {
+    my $path = $self->_qualify("$idx.idx");
+    my %db;
+    tie(%db,'DB_File',$path,$flags,0666,$DB_BTREE)
+      or $self->throw("Couldn't tie $path: $!");
+    %db = () if $create;
+    $self->index_db($idx=>\%db);
+  }
+
+  # Create the parentage database
+  my %p;
+  tie (%p,'DB_File',$self->_parentage_path,$flags,0666,$DB_BTREE)
+    or $self->throw("Couldn't tie: ".$self->_parentage_path . $!);
+    %p = () if $create;
+  $self->parentage_db(\%p);
+
+  if (-e $self->_fasta_path) {
+    my $dna_db = Bio::DB::Fasta->new($self->_fasta_path) or $self->throw("Can't reindex sequence file: $@");
+    $self->dna_db($dna_db);
+  }
+
+  my $mode =  $write  ? "+>>"
+            : $create ? "+>"
+            : "<";
+
+  open (my $F,$mode,$self->_notes_path) or $self->throw($self->_notes_path.": $!");
+  $self->notes_db($F);
+}
+
+sub commit { # reindex fasta files
+  my $self = shift;
+  if (my $fh = $self->{fasta_fh}) {
+    $fh->close;
+    $self->dna_db(Bio::DB::Fasta->new($self->{fasta_file}));
+  } elsif (-d $self->directory) {
+    $self->dna_db(Bio::DB::Fasta->new($self->directory));
+  }
+}
+
+sub _close_databases {
+  my $self = shift;
+  $self->db(undef);
+  $self->dna_db(undef);
+  $self->notes_db(undef);
+  $self->index_db($_=>undef) foreach $self->_index_files;
+}
+
+# do nothing -- new() with -create=>1 will do the trick
+sub _init_database { }
+
+sub _delete_databases {
+  my $self = shift;
+  for my $idx ($self->_index_files) {
+    my $path = $self->_qualify("$idx.idx");
+    unlink $path;
+  }
+  unlink $self->_parentage_path;
+  unlink $self->_fasta_path;
+  unlink $self->_features_path;
+  unlink $self->_mtime_path;
+}
+
+sub _touch_timestamp {
+  my $self = shift;
+  my $tsf = $self->_mtime_path;
+  open (F,">$tsf") or $self->throw("Couldn't open $tsf: $!");
+  print F scalar(localtime);
+  close F;
+}
+
+sub _store {
+  my $self    = shift;
+  my $indexed = shift;
+  my $db   = $self->db;
+  my $count = 0;
+  for my $obj (@_) {
+    my $primary_id = $obj->primary_id;
+    $self->_delete_indexes($obj,$primary_id)  if $indexed && $primary_id;
+    $primary_id    = $db->{'.next_id'}++ unless defined $primary_id;
+    $db->{$primary_id} = $self->freeze($obj);
+    $obj->primary_id($primary_id);
+    $self->_update_indexes($obj)              if $indexed;
+    $count++;
+  }
+  $count;
+}
+
+sub _delete_indexes {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  # the additional "1" causes the index to be deleted
+  $self->_update_name_index($obj,$id,1);
+  $self->_update_type_index($obj,$id,1);
+  $self->_update_location_index($obj,$id,1);
+  $self->_update_attribute_index($obj,$id,1);
+  $self->_update_note_index($obj,$id,1);
+}
+
+sub _fetch {
+  my $self = shift;
+  my $id   = shift;
+  my $db = $self->db;
+  my $obj = $self->thaw($db->{$id},$id);
+  $obj;
+}
+
+sub _add_SeqFeature {
+  my $self = shift;
+  my $parent   = shift;
+  my @children = @_;
+  my $parent_id = (ref $parent ? $parent->primary_id : $parent)
+    or $self->throw("$parent should have a primary_id");
+  my $p = $self->parentage_db;
+  for my $child (@children) {
+    my $child_id = ref $child ? $child->primary_id : $child;
+    defined $child_id or $self->throw("no primary ID known for $child");
+    $p->{$parent_id} = $child_id;
+  }
+}
+
+sub _fetch_SeqFeatures {
+  my $self   = shift;
+  my $parent = shift;
+  my @types  = @_;
+  my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
+  my $index     = $self->parentage_db;
+  my $db        = tied %$index;
+
+  my @children_ids  = $db->get_dup($parent_id);
+  my @children      = map {$self->fetch($_)} @children_ids;
+
+  if (@types) {
+    my $regexp = join '|',map {quotemeta($_)} $self->find_types(@types);
+    return grep {($_->primary_tag.':'.$_->source_tag) =~ /^$regexp$/i} @children;
+  } else {
+    return @children;
+  }
+}
+
+sub _update_indexes {
+  my $self = shift;
+  my $obj  = shift;
+  defined (my $id   = $obj->primary_id) or return;
+  $self->_update_name_index($obj,$id);
+  $self->_update_type_index($obj,$id);
+  $self->_update_location_index($obj,$id);
+  $self->_update_attribute_index($obj,$id);
+  $self->_update_note_index($obj,$id);
+}
+
+sub _update_name_index {
+  my $self = shift;
+  my ($obj,$id,$delete) = @_;
+  my $db = $self->index_db('names') or $self->throw("Couldn't find 'names' index file");
+  my ($names,$aliases) = $self->feature_names($obj);
+
+  # little stinky - needs minor refactoring
+  foreach (@$names) {
+    my $key = lc $_;
+    $self->update_or_delete($delete,$db,$key,$id);
+  }
+
+  foreach (@$aliases) {
+    my $key = lc($_)."_2"; # the _2 indicates a secondary (alias) ID
+    $self->update_or_delete($delete,$db,$key,$id);
+  }
+
+}
+
+sub _update_type_index {
+  my $self = shift;
+  my ($obj,$id,$delete) = @_;
+  my $db = $self->index_db('types')
+    or $self->throw("Couldn't find 'types' index file");
+  my $primary_tag = $obj->primary_tag;
+  my $source_tag  = $obj->source_tag || '';
+  return unless defined $primary_tag;
+
+  $primary_tag    .= ":$source_tag";
+  my $key          = lc $primary_tag;
+  $self->update_or_delete($delete,$db,$key,$id);
+}
+
+# Note: this indexing scheme is space-inefficient because it stores the
+# denormalized sequence ID followed by the bin in XXXXXX zero-leading
+# format. It should be replaced with a binary numeric encoding and the
+# BTREE {compare} attribute changed accordingly.
+sub _update_location_index {
+  my $self = shift;
+  my ($obj,$id,$delete) = @_;
+  my $db = $self->index_db('locations')
+    or $self->throw("Couldn't find 'locations' index file");
+
+  my $seq_id      = $obj->seq_id || '';
+  my $start       = $obj->start  || '';
+  my $end         = $obj->end    || '';
+  my $strand      = $obj->strand;
+  my $bin_min     = int $start/BINSIZE;
+  my $bin_max     = int $end/BINSIZE;
+
+  for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
+    my $key = sprintf("%s%06d",lc($seq_id),$bin);
+    $self->update_or_delete($delete,$db,$key,pack("i4",$id,$start,$end,$strand));
+  }
+}
+
+sub _update_attribute_index {
+  my $self      = shift;
+  my ($obj,$id,$delete) = @_;
+  my $db = $self->index_db('attributes')
+    or $self->throw("Couldn't find 'attributes' index file");
+
+  for my $tag ($obj->all_tags) {
+    for my $value ($obj->each_tag_value($tag)) {
+      my $key = "\L${tag}:${value}\E";
+      $self->update_or_delete($delete,$db,$key,$id);
+    }
+  }
+}
+
+sub _update_note_index {
+  my $self = shift;
+  my ($obj,$id,$delete) = @_;
+  return if $delete; # we don't know how to do this
+
+  my $fh = $self->notes_db;
+  my @notes = $obj->get_tag_values('Note') if $obj->has_tag('Note');
+
+
+  print $fh $_,"\t",pack("u*",$id) or $self->throw("An error occurred while updating note index: $!")
+    foreach @notes;
+}
+
+sub update_or_delete {
+  my $self = shift;
+  my ($delete,$db,$key,$id) = @_;
+  if ($delete) {
+    tied(%$db)->del_dup($key,$id);
+  } else {
+    $db->{$key} = $id;
+  }
+}
+
+# these methods return pointers to....
+# the database that stores serialized objects
+sub db {
+  my $self = shift;
+  my $d = $self->setting('db');
+  $self->setting(db=>shift) if @_;
+  $d;
+}
+
+sub parentage_db {
+  my $self = shift;
+  my $d = $self->setting('parentage_db');
+  $self->setting(parentage_db=>shift) if @_;
+  $d;
+}
+
+# the Bio::DB::Fasta object
+sub dna_db {
+  my $self = shift;
+  my $d = $self->setting('dna_db');
+  $self->setting(dna_db=>shift) if @_;
+  $d;
+}
+
+# the specialized notes database
+sub notes_db {
+  my $self = shift;
+  my $d = $self->setting('notes_db');
+  $self->setting(notes_db=>shift) if @_;
+  $d;
+}
+
+# The indicated index berkeley db
+sub index_db {
+  my $self = shift;
+  my $index_name = shift;
+  my $d = $self->setting($index_name);
+  $self->setting($index_name=>shift) if @_;
+  $d;
+}
+
+
+sub _mtime {
+  my $file = shift;
+  my @stat = stat($file);
+  return $stat[9];
+}
+
+# return names of all the indexes
+sub _index_files {
+  return qw(names types locations attributes);
+}
+
+# the directory in which we store our indexes
+sub directory {
+  my $self = shift;
+  my $d = $self->setting('directory');
+  $self->setting(directory=>shift) if @_;
+  $d;
+}
+
+# flag indicating that we are a temporary database
+sub temporary {
+  my $self = shift;
+  my $d = $self->setting('temporary');
+  $self->setting(temporary=>shift) if @_;
+  $d;
+}
+
+sub _permissions {
+  my $self = shift;
+  my $d = $self->setting('permissions') or return;
+  if (@_) {
+    my ($write,$create) = @_;
+    $self->setting(permissions=>[$write,$create]);
+  }
+  @$d;
+}
+
+# file name utilities...
+sub _qualify {
+  my $self = shift;
+  my $file = shift;
+  return $self->directory .'/' . $file;
+}
+
+sub _features_path {
+  shift->_qualify('features.bdb');
+}
+
+sub _parentage_path {
+  shift->_qualify('parentage.bdb');
+}
+
+sub _type_path {
+  shift->_qualify('types.idx');
+}
+
+sub _location_path {
+  shift->_qualify('locations.idx');
+}
+
+sub _attribute_path {
+  shift->_qualify('attributes.idx');
+}
+
+sub _notes_path {
+  shift->_qualify('notes.idx');
+}
+
+sub _fasta_path {
+  shift->_qualify('sequence.fa');
+}
+
+sub _mtime_path {
+  shift->_qualify('mtime.stamp');
+}
+
+###########################################
+# searching
+###########################################
+
+sub _features {
+  my $self = shift;
+  my ($seq_id,$start,$end,$strand,
+      $name,$class,$allow_aliases,
+      $types,
+      $attributes,
+      $range_type,
+      $iterator
+     ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
+		    'NAME','CLASS','ALIASES',
+		    ['TYPES','TYPE','PRIMARY_TAG'],
+		    ['ATTRIBUTES','ATTRIBUTE'],
+		    'RANGE_TYPE',
+		    'ITERATOR',
+		   ], at _);
+
+  my (@from, at where, at args, at group);
+  $range_type ||= 'overlaps';
+
+  my @result;
+  unless (defined $name or defined $seq_id or defined $types or defined $attributes) {
+    @result = grep {$_ ne '.next_id' } keys %{$self->db};
+  }
+
+  my %found = ();
+  my $result = 1;
+
+  if (defined($name)) {
+    # hacky backward compatibility workaround
+    undef $class if $class && $class eq 'Sequence';
+    $name     = "$class:$name" if defined $class && length $class > 0;
+    $result &&= $self->filter_by_name($name,$allow_aliases,\%found);
+  }
+
+  if (defined $seq_id) {
+    $result &&= $self->filter_by_location($seq_id,$start,$end,$strand,$range_type,\%found);
+  }
+
+  if (defined $types) {
+    $result &&= $self->filter_by_type($types,\%found);
+  }
+
+  if (defined $attributes) {
+    $result &&= $self->filter_by_attribute($attributes,\%found);
+  }
+
+  push @result,keys %found if $result;
+  return $iterator ? Bio::DB::SeqFeature::Store::berkeleydb::Iterator->new($self,\@result)
+                   : map {$self->fetch($_)} @result;
+}
+
+sub filter_by_name {
+  my $self = shift;
+  my ($name,$allow_aliases,$filter) = @_;
+
+  my $index = $self->index_db('names');
+  my $db    = tied(%$index);
+
+  my ($stem,$regexp) = $self->glob_match($name);
+  $stem   ||= $name;
+  $regexp ||= $name;
+  $regexp .= "(?:_2)?" if $allow_aliases;
+
+  my $key   = $stem;
+  my $value;
+  my @results;
+  for (my $status = $db->seq($key,$value,R_CURSOR);
+       $status == 0 and $key =~ /^$regexp$/i;
+       $status = $db->seq($key,$value,R_NEXT)) {
+    push @results,$value;
+  }
+  $self->update_filter($filter,\@results);
+}
+
+sub filter_by_type {
+  my $self = shift;
+  my ($types,$filter) = @_;
+  my @types = ref $types eq 'ARRAY' ?  @$types : $types;
+
+  my $index = $self->index_db('types');
+  my $db    = tied(%$index);
+
+  my @results;
+
+  for my $type (@types) {
+    my ($primary_tag,$source_tag);
+    if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
+      $primary_tag = $type->method;
+      $source_tag  = $type->source;
+    } else {
+      ($primary_tag,$source_tag) = split ':',$type,2;
+    }
+    my $match = defined $source_tag ? "^$primary_tag:$source_tag\$" : "^$primary_tag:";
+    $source_tag ||= '';
+    my $key   = lc "$primary_tag:$source_tag";
+    my $value;
+
+    for (my $status = $db->seq($key,$value,R_CURSOR);
+ 	 $status == 0 && $key =~ /$match/i;
+	 $status = $db->seq($key,$value,R_NEXT)) {
+      push @results,$value;
+    }
+
+  }
+  $self->update_filter($filter,\@results);
+}
+
+sub filter_by_location {
+  my $self = shift;
+  my ($seq_id,$start,$end,$strand,$range_type,$filter) = @_;
+  $strand ||= 0;
+
+  my $index    = $self->index_db('locations');
+  my $db       = tied(%$index);
+
+  my $binstart = defined $start ? sprintf("%06d",int $start/BINSIZE) : '';
+  my $binend   = defined $end   ? sprintf("%06d",int $end/BINSIZE)   : 'z';  # beyond a number
+
+  my %seenit;
+  my @results;
+
+  $start = MININT  if !defined $start;
+  $end   = MAXINT  if !defined $end;
+
+  if ($range_type eq 'overlaps' or $range_type eq 'contains') {
+    my $key     = "\L$seq_id\E$binstart";
+    my $keystop = "\L$seq_id\E$binend";
+    my $value;
+    for (my $status = $db->seq($key,$value,R_CURSOR);
+	 $status == 0 && $key le $keystop;
+	 $status = $db->seq($key,$value,R_NEXT)) {
+      my ($id,$fstart,$fend,$fstrand) = unpack("i4",$value);
+      next if $seenit{$id}++;
+      next if $strand && $fstrand != $strand;
+      if ($range_type eq 'overlaps') {
+	next unless $fend >= $start && $fstart <= $end;
+      }
+      elsif ($range_type eq 'contains') {
+	next unless $fstart >= $start && $fend <= $end;
+      }
+      push @results,$id;
+    }
+  }
+
+  # for contained in, we look for features originating and terminating outside the specified range
+  # this is incredibly inefficient, but fortunately the query is rare (?)
+  elsif ($range_type eq 'contained_in') {
+    my $key     = "\L$seq_id";
+    my $keystop = "\L$seq_id\E$binstart";
+    my $value;
+
+    # do the left part of the range
+    for (my $status = $db->seq($key,$value,R_CURSOR);
+	 $status == 0 && $key le $keystop;
+	 $status = $db->seq($key,$value,R_NEXT)) {
+      my ($id,$fstart,$fend,$fstrand) = unpack("i4",$value);
+      next if $seenit{$id}++;
+      next if $strand && $fstrand != $strand;
+      next unless $fstart <= $start && $fend >= $end;
+      push @results,$id;
+    }
+
+    # do the right part of the range
+    $key = "\L$seq_id\E$binend";
+    for (my $status = $db->seq($key,$value,R_CURSOR);
+	 $status == 0;
+	 $status = $db->seq($key,$value,R_NEXT)) {
+      my ($id,$fstart,$fend,$fstrand) = unpack("i4",$value);
+      next if $seenit{$id}++;
+      next if $strand && $fstrand != $strand;
+      next unless $fstart <= $start && $fend >= $end;
+      push @results,$id;
+    }
+
+  }
+
+  $self->update_filter($filter,\@results);
+}
+
+sub filter_by_attribute {
+  my $self = shift;
+  my ($attributes,$filter) = @_;
+
+  my $index = $self->index_db('attributes');
+  my $db    = tied(%$index);
+  my $result;
+
+  for my $att_name (keys %$attributes) {
+    my @result;
+    my @search_terms = ref($attributes->{$att_name}) && ref($attributes->{$att_name}) eq 'ARRAY'
+                           ? @{$attributes->{$att_name}} : $attributes->{$att_name};
+
+    for my $v (@search_terms) {
+      my ($stem,$regexp) = $self->glob_match($v);
+      $stem   ||= $v;
+      $regexp ||= $v;
+      my $key = "\L${att_name}:${stem}\E";
+      my $value;
+      for (my $status = $db->seq($key,$value,R_CURSOR);
+	   $status == 0 && $key =~ /^$att_name:$regexp$/i;
+	   $status = $db->seq($key,$value,R_NEXT)) {
+	push @result,$value;
+      }
+    }
+    $result ||= $self->update_filter($filter,\@result);
+  }
+  $result;
+}
+
+sub _search_attributes {
+  my $self = shift;
+  my ($search_string,$attribute_array,$limit) = @_;
+  $search_string =~ tr/*?//d;
+  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
+  my $search = join '|', at words;
+
+  my $index = $self->index_db('attributes');
+  my $db    = tied(%$index);
+
+  my (%results,%notes);
+
+  for my $tag (@$attribute_array) {
+    my $id;
+    my $key = "\L$tag:\E";
+    for (my $status = $db->seq($key,$id,R_CURSOR);
+	 $status == 0 and $key =~ /^$tag:(.*)/i;
+	 $status = $db->seq($key,$id,R_NEXT)) {
+      my $text = $1;
+      next unless $text =~ /$search/;
+      for my $w (@words) {
+	my @hits = $text =~ /($w)/ig or next;
+	$results{$id} += @hits;
+      }
+      $notes{$id} .= "$text ";
+    }
+  }
+
+  my @results;
+  for my $id (keys %results) {
+    my $hits = $results{$id};
+    my $note = $notes{$id};
+    $note =~ s/\s+$//;
+    my $relevance = 10 * $hits;
+    my $feature   = $self->fetch($id) or next;
+    my $name      = $feature->display_name or next;
+    push @results,[$name,$note,$relevance];
+  }
+
+  return @results;
+}
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @results;
+
+  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
+  my $search = join '|', at words;
+
+  my (%found,$found);
+  my $note_index = $self->notes_db;
+  seek($note_index,0,0);  # back to start
+  while (<$note_index>) {
+    next unless /$search/;
+    chomp;
+    my ($note,$uu) = split "\t";
+    $found{unpack("u*",$uu)}++;
+    last if $limit && ++$found >= $limit;
+  }
+
+  my (@features, @matches);
+  for my $idx (keys %found) {
+    my $feature    = $self->fetch($idx) or next;
+    my @values     = $feature->get_tag_values('Note') if $feature->has_tag('Note');
+    my $value      = "@values";
+
+    my $hits;
+    $hits++ while $value =~ /($search)/ig;  # count the number of times we were hit
+    push @matches,$hits;
+    push @features,$feature;
+  }
+
+  for (my $i=0; $i<@matches; $i++)  {
+    my $feature = $features[$i];
+    my $matches = $matches[$i];
+
+    my $relevance = 10 * $matches;
+    my $note;
+    $note   = join ' ',$feature->get_tag_values('Note') if $feature->has_tag('Note');
+    push @results,[$feature->display_name,$note,$relevance];
+  }
+
+  return @results;
+}
+
+sub glob_match {
+  my $self = shift;
+  my $term = shift;
+  return unless $term =~ /([^*?]*)(?:^|[^\\])?[*?]/;
+  my $stem = $1;
+  $term =~ s/(^|[^\\])([+\[\]^{}\$|\(\).])/$1\\$2/g;
+  $term =~ s/(^|[^\\])\*/$1.*/g;
+  $term =~ s/(^|[^\\])\?/$1./g;
+  return ($stem,$term);
+}
+
+
+sub update_filter {
+  my $self = shift;
+  my ($filter,$results) = @_;
+  return unless @$results;
+
+  if (%$filter) {
+    my @filtered = grep {$filter->{$_}} @$results;
+    %$filter     = map {$_=>1} @filtered;
+  } else {
+    %$filter     = map {$_=>1} @$results;
+  }
+
+}
+
+# this is ugly
+sub _insert_sequence {
+  my $self = shift;
+  my ($seqid,$seq,$offset) = @_;
+  my $dna_fh = $self->private_fasta_file or return;
+  if ($offset == 0) { # start of the sequence
+    print $dna_fh ">$seqid\n";
+  }
+  print $dna_fh $seq,"\n";
+}
+
+sub _fetch_sequence {
+  my $self = shift;
+  my ($seqid,$start,$end) = @_;
+  my $db = $self->dna_db or return;
+  return $db->seq($seqid,$start,$end);
+}
+
+sub private_fasta_file {
+  my $self = shift;
+  return $self->{fasta_fh} if exists $self->{fasta_fh};
+  $self->{fasta_file}   = $self->_qualify("sequence.fa");
+  return $self->{fasta_fh} = IO::File->new($self->{fasta_file},">");
+}
+
+sub finish_bulk_update {
+  my $self = shift;
+  if (my $fh = $self->{fasta_fh}) {
+    $fh->close;
+    $self->{fasta_db} = Bio::DB::Fasta->new($self->{fasta_file});
+  }
+}
+
+
+sub DESTROY {
+  my $self = shift;
+  $self->_close_databases();
+  rmtree($self->directory,0,1) if $self->temporary;
+}
+
+# TIE interface -- a little annoying because we are storing magic ".variable"
+# meta-variables in the same data structure as the IDs, so these variables
+# must be skipped.
+sub _firstid {
+  my $self  = shift;
+  my $db    = $self->db;
+  my ($key,$value);
+  while ( ($key,$value) = each %{$db}) {
+    last unless $key =~ /^\./;
+  }
+  $key;
+}
+
+sub _nextid {
+  my $self = shift;
+  my $id   = shift;
+  my $db    = $self->db;
+  my ($key,$value);
+  while ( ($key,$value) = each %$db) {
+    last unless $key =~ /^\./;
+  }
+  $key;
+}
+
+sub _existsid {
+  my $self = shift;
+  my $id   = shift;
+  return exists $self->db->{$id};
+}
+
+sub _deleteid {
+  my $self = shift;
+  my $id   = shift;
+  my $obj  = $self->fetch($id) or return;
+  $self->_delete_indexes($obj,$id);
+  delete $self->db->{$id};
+}
+
+sub _clearall {
+  my $self = shift;
+  $self->_close_databases();
+  $self->_delete_databases();
+  my ($write,$create) = $self->_permissions;
+  $self->_open_databases($write,$create);
+}
+
+sub _featurecount {
+  my $self = shift;
+  return scalar %{$self->db};
+}
+
+
+package Bio::DB::SeqFeature::Store::berkeleydb::Iterator;
+
+sub new {
+  my $class = shift;
+  my $store = shift;
+  my $ids   = shift;
+  return bless {store => $store,
+		ids   => $ids},ref($class) || $class;
+}
+
+sub next_seq {
+  my $self  = shift;
+  my $store = $self->{store} or return;
+  my $id    = shift @{$self->{ids}};
+  defined $id or return;
+  return $store->fetch($id);
+}
+
+1;
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::Store::memory>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/memory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/memory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store/memory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,632 @@
+package Bio::DB::SeqFeature::Store::memory;
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store::memory -- In-memory implementation of Bio::DB::SeqFeature::Store
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqFeature::Store;
+
+  # Open the sequence database
+  my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'memory',
+                                                 -dsn     => '/var/databases/test');
+
+  # search... by id
+  my @features = $db->fetch_many(@list_of_ids);
+
+  # ...by name
+  @features = $db->get_features_by_name('ZK909');
+
+  # ...by alias
+  @features = $db->get_features_by_alias('sma-3');
+
+  # ...by type
+  @features = $db->get_features_by_name('gene');
+
+  # ...by location
+  @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
+
+  # ...by attribute
+  @features = $db->get_features_by_attribute({description => 'protein kinase'})
+
+  # ...by the GFF "Note" field
+  @result_list = $db->search_notes('kinase');
+
+  # ...by arbitrary combinations of selectors
+  @features = $db->features(-name => $name,
+                            -type => $types,
+                            -seq_id => $seqid,
+                            -start  => $start,
+                            -end    => $end,
+                            -attributes => $attributes);
+
+  # ...using an iterator
+  my $iterator = $db->get_seq_stream(-name => $name,
+                                     -type => $types,
+                                     -seq_id => $seqid,
+                                     -start  => $start,
+                                     -end    => $end,
+                                     -attributes => $attributes);
+
+  while (my $feature = $iterator->next_seq) {
+    # do something with the feature
+  }
+
+  # ...limiting the search to a particular region
+  my $segment  = $db->segment('Chr1',5000=>6000);
+  my @features = $segment->features(-type=>['mRNA','match']);
+
+  # getting & storing sequence information
+  # Warning: this returns a string, and not a PrimarySeq object
+  $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
+  my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
+
+  # create a new feature in the database
+  my $feature = $db->new_feature(-primary_tag => 'mRNA',
+                                 -seq_id      => 'chr3',
+                                 -start      => 10000,
+                                 -end        => 11000);
+
+=head1 DESCRIPTION
+
+Bio::DB::SeqFeature::Store::memory is the in-memory adaptor for
+Bio::DB::SeqFeature::Store. You will not create it directly, but
+instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
+
+See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
+
+=head2 Using the memory adaptor
+
+Before using the memory adaptor, populate a readable-directory on the
+file system with annotation and/or sequence files. The annotation
+files must be in GFF3 format, and shold end in the extension .gff or
+.gff3. They may be compressed with "compress", "gzip" or "bzip2" (in
+which case the appropriate compression extension must be present as
+well.)
+
+You may include sequence data inline in the GFF3 files, or put the
+sequence data in one or more separate FASTA-format files. These files
+must end with .fa or .fasta and may be compressed. Because of the way
+the adaptor works, you will get much better performance if you keep
+the sequence data in separate FASTA files.
+
+Initialize the database using the -dsn option. This should point to
+the directory creating the annotation and sequence files, or to a
+single GFF3 file. Examples:
+
+
+  # load all GFF3 and FASTA files located in /var/databases/test directory
+  $db  = Bio::DB::SeqFeature::Store->new( -adaptor => 'memory',
+                                          -dsn     => '/var/databases/test');
+
+
+  # load the data in a single compressed GFF3 file located at
+  # /usr/annotations/worm.gf33.gz
+  $db  = Bio::DB::SeqFeature::Store->new( -adaptor => 'memory',
+                                          -dsn     => '/usr/annotations/worm.gff3.gz');
+
+See L<Bio::DB::SeqFeature::Store> for all the access methods supported
+by this adaptor. The various methods for storing and updating features
+and sequences into the database are supported, including GFF3 loading
+support, but since this is an in-memory adaptor all changes you make
+will be lost when the script exits.
+
+=cut
+
+# $Id: memory.pm,v 1.5.4.3 2006/10/02 23:10:17 sendu Exp $
+use strict;
+use base 'Bio::DB::SeqFeature::Store';
+use Bio::DB::SeqFeature::Store::GFF3Loader;
+use Bio::DB::GFF::Util::Rearrange 'rearrange';
+use File::Temp 'tempdir';
+use IO::File;
+use Bio::DB::Fasta;
+
+use constant BINSIZE => 10_000;
+
+###
+# object initialization
+#
+sub init {
+  my $self          = shift;
+  my $args          = shift;
+  $self->SUPER::init($args);
+  $self->{_data}     = [];
+  $self->{_children} = {};
+  $self->{_index}    = {};
+  $self;
+}
+
+sub post_init {
+  my $self = shift;
+  my ($file_or_dir) = rearrange([['DIR','DSN','FILE']], at _);
+  return unless $file_or_dir;
+
+  my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $self,
+							   -sf_class => $self->seqfeature_class) 
+    or $self->throw("Couldn't create GFF3Loader");
+  my @argv;
+  if (-d $file_or_dir) {
+    @argv = (
+	     glob("$file_or_dir/*.gff"),            glob("$file_or_dir/*.gff3"),
+	     glob("$file_or_dir/*.gff.{gz,Z,bz2}"), glob("$file_or_dir/*.gff3.{gz,Z,bz2}")
+	     );
+  } else {
+    @argv = $file_or_dir;
+  }
+  local $self->{file_or_dir} = $file_or_dir;
+  $loader->load(@argv);
+}
+
+sub commit { # reindex fasta files
+  my $self = shift;
+
+  if (my $fh = $self->{fasta_fh}) {
+    $fh->close;
+    $self->{fasta_db} = Bio::DB::Fasta->new($self->{fasta_file});
+  } elsif (exists $self->{file_or_dir}) {
+    $self->{fasta_db} = Bio::DB::Fasta->new($self->{file_or_dir});
+  }
+}
+
+sub can_store_parentage { 1 }
+
+# return an array ref in which each index is primary id
+sub data {
+  shift->{_data};
+}
+
+sub _init_database { shift->init }
+
+sub _store {
+  my $self    = shift;
+  my $indexed = shift;
+  my $data    = $self->data;
+  my $count = 0;
+  for my $obj (@_) {
+    my $primary_id = $obj->primary_id;
+    $primary_id    = @{$data} unless defined $primary_id;
+    $self->data->[$primary_id] = $obj;
+    $obj->primary_id($primary_id);
+    $self->{_index}{ids}{$primary_id} = undef if $indexed;
+    $self->_update_indexes($obj) if $indexed;
+    $count++;
+  }
+  $count;
+}
+
+sub _fetch {
+  my $self = shift;
+  my $id   = shift;
+  my $data = $self->data;
+  return $data->[$id];
+}
+
+sub _add_SeqFeature {
+  my $self = shift;
+  my $parent   = shift;
+  my @children = @_;
+  my $parent_id = (ref $parent ? $parent->primary_id : $parent);
+  defined $parent_id or $self->throw("$parent should have a primary_id");
+  for my $child (@children) {
+    my $child_id = ref $child ? $child->primary_id : $child;
+    defined $child_id or $self->throw("no primary ID known for $child");
+    $self->{_children}{$parent_id}{$child_id}++;
+  }
+}
+
+sub _fetch_SeqFeatures {
+  my $self   = shift;
+  my $parent = shift;
+  my @types  = @_;
+  my $parent_id = $parent->primary_id;
+  defined $parent_id or $self->throw("$parent should have a primary_id");
+  my @children_ids  = keys %{$self->{_children}{$parent_id}};
+  my @children      = map {$self->fetch($_)} @children_ids;
+
+  if (@types) {
+    my $regexp = join '|',map {quotemeta($_)} $self->find_types(@types);
+    return grep {($_->primary_tag.':'.$_->source_tag) =~ /^$regexp$/i} @children;
+  } else {
+    return @children;
+  }
+}
+
+sub _update_indexes {
+  my $self = shift;
+  my $obj  = shift;
+  defined (my $id   = $obj->primary_id) or return;
+  $self->_update_name_index($obj,$id);
+  $self->_update_type_index($obj,$id);
+  $self->_update_location_index($obj,$id);
+  $self->_update_attribute_index($obj,$id);
+}
+
+sub _update_name_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+  my ($names,$aliases) = $self->feature_names($obj);
+  foreach (@$names) {
+    $self->{_index}{name}{lc $_}{$id} = 1;
+  }
+  foreach (@$aliases) {
+    $self->{_index}{name}{lc $_}{$id} = 2;
+  }
+}
+
+sub _update_type_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+
+  my $primary_tag = $obj->primary_tag;
+  my $source_tag  = $obj->source_tag || '';
+  return unless defined $primary_tag;
+
+  $primary_tag    .= ":$source_tag";
+  $self->{_index}{type}{lc $primary_tag}{$id} = undef;
+}
+
+sub _update_location_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+
+  my $seq_id      = $obj->seq_id || '';
+  my $start       = $obj->start  || '';
+  my $end         = $obj->end    || '';
+  my $strand      = $obj->strand;
+  my $bin_min     = int $start/BINSIZE;
+  my $bin_max     = int $end/BINSIZE;
+
+  for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
+    $self->{_index}{location}{lc $seq_id}{$bin}{$id} = undef;
+  }
+
+}
+
+sub _update_attribute_index {
+  my $self = shift;
+  my ($obj,$id) = @_;
+
+  for my $tag ($obj->all_tags) {
+    for my $value ($obj->each_tag_value($tag)) {
+      $self->{_index}{attribute}{lc $tag}{lc $value}{$id} = undef;
+    }
+  }
+}
+
+sub _features {
+  my $self = shift;
+  my ($seq_id,$start,$end,$strand,
+      $name,$class,$allow_aliases,
+      $types,
+      $attributes,
+      $range_type,
+      $iterator
+     ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
+		    'NAME','CLASS','ALIASES',
+		    ['TYPES','TYPE','PRIMARY_TAG'],
+		    ['ATTRIBUTES','ATTRIBUTE'],
+		    'RANGE_TYPE',
+		    'ITERATOR',
+		   ], at _);
+
+  my (@from, at where, at args, at group);
+  $range_type ||= 'overlaps';
+
+  my @result;
+  unless (defined $name or defined $seq_id or defined $types or defined $attributes) {
+    @result = keys %{$self->{_index}{ids}};
+  }
+
+  my %found  = ();
+  my $result = 1;
+
+  if (defined($name)) {
+    # hacky backward compatibility workaround
+    undef $class if $class && $class eq 'Sequence';
+    $name     = "$class:$name" if defined $class && length $class > 0;
+    $result &&= $self->filter_by_name($name,$allow_aliases,\%found);
+  }
+
+  if (defined $seq_id) {
+    $result &&= $self->filter_by_location($seq_id,$start,$end,$strand,$range_type,\%found);
+  }
+
+  if (defined $types) {
+    $result &&= $self->filter_by_type($types,\%found);
+  }
+
+  if (defined $attributes) {
+    $result &&= $self->filter_by_attribute($attributes,\%found);
+  }
+
+  push @result,keys %found if $result;
+  return $iterator ? Bio::DB::SeqFeature::Store::memory::Iterator->new($self,\@result)
+                   : map {$self->fetch($_)} @result;
+}
+
+
+sub filter_by_type {
+  my $self = shift;
+  my ($types,$filter) = @_;
+  my @types = ref $types eq 'ARRAY' ?  @$types : $types;
+
+  my $index = $self->{_index}{type};
+
+  my @types_found = $self->find_types(@types);
+
+  my @results;
+  for my $type (@types_found) {
+    next unless exists $index->{$type};
+    push @results,keys %{$index->{$type}};
+  }
+
+  $self->update_filter($filter,\@results);
+}
+
+sub find_types {
+  my $self = shift;
+  my @types = @_;
+
+  my @types_found;
+  my $index = $self->{_index}{type};
+
+  for my $type (@types) {
+
+    my ($primary_tag,$source_tag);
+    if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
+      $primary_tag = $type->method;
+      $source_tag  = $type->source;
+    } else {
+      ($primary_tag,$source_tag) = split ':',$type,2;
+    }
+    push @types_found,defined $source_tag ? lc "$primary_tag:$source_tag"
+                                          : grep {/^$primary_tag:/i} keys %{$index};
+  }
+  return @types_found;
+}
+
+sub filter_by_attribute {
+  my $self = shift;
+  my ($attributes,$filter) = @_;
+
+  my $index = $self->{_index}{attribute};
+  my $result;
+
+  for my $att_name (keys %$attributes) {
+    my @result;
+    my @matching_values;
+    my @search_terms = ref($attributes->{$att_name}) && ref($attributes->{$att_name}) eq 'ARRAY'
+                           ? @{$attributes->{$att_name}} : $attributes->{$att_name};
+    my @regexp_terms;
+    my @terms;
+
+    for my $v (@search_terms) {
+      if (my $regexp = $self->glob_match($v)) {
+	@regexp_terms      = keys %{$index->{lc $att_name}} unless @regexp_terms;
+	push @terms,grep {/^$v$/i} @regexp_terms;
+      } else {
+	push @terms,lc $v;
+      }
+    }
+
+    for my $v (@terms) {
+      push @result,keys %{$index->{lc $att_name}{$v}};
+    }
+
+    $result ||= $self->update_filter($filter,\@result);
+  }
+
+  $result;
+}
+
+sub filter_by_location {
+  my $self = shift;
+  my ($seq_id,$start,$end,$strand,$range_type,$filter) = @_;
+  $strand ||= 0;
+
+  my $index = $self->{_index}{location}{lc $seq_id};
+  my @bins;
+
+  if (!defined $start or !defined $end or $range_type eq 'contained_in') {
+    @bins = sort {$a<=>$b} keys %{$index};
+    # be suspicious of this -- possibly a fencepost error at $end
+    $start = $bins[0]  * BINSIZE  unless defined $start;
+    $end   = $bins[-1] * BINSIZE  unless defined $end;
+  }
+  my %seenit;
+  my $bin_min       = int $start/BINSIZE;
+  my $bin_max       = int $end/BINSIZE;
+  my @bins_in_range = $range_type eq 'contained_in' ? ($bins[0]..$bin_min,$bin_max..$bins[-1])
+                                                    : ($bin_min..$bin_max);
+
+  my @results;
+  for my $bin (@bins_in_range) {
+    next unless exists $index->{$bin};
+    my @found = keys %{$index->{$bin}};
+    for my $f (@found) {
+      next if $seenit{$f}++;
+      my $feature = $self->_fetch($f) or next;
+      next if $strand && $feature->strand != $strand;
+
+      if ($range_type eq 'overlaps') {
+	next unless $feature->end >= $start && $feature->start <= $end;
+      }
+      elsif ($range_type eq 'contains') {
+	next unless $feature->start >= $start && $feature->end <= $end;
+      }
+      elsif ($range_type eq 'contained_in') {
+	next unless $feature->start <= $start && $feature->end >= $end;
+      }
+
+      push @results,$f;
+    }
+  }
+  $self->update_filter($filter,\@results);
+}
+
+
+sub filter_by_name {
+  my $self = shift;
+  my ($name,$allow_aliases,$filter) = @_;
+
+  my $index = $self->{_index}{name};
+
+  my @names_to_fetch;
+  if (my $regexp = $self->glob_match($name)) {
+    @names_to_fetch = grep {/^$regexp$/i} keys %{$index};
+  } else {
+    @names_to_fetch = lc $name;
+  }
+
+  my @results;
+  for my $n (@names_to_fetch) {
+    if ($allow_aliases) {
+      push @results,keys %{$index->{$n}};
+    } else {
+      push @results,grep {$index->{$n}{$_} == 1} keys %{$index->{$n}};
+    }
+  }
+  $self->update_filter($filter,\@results);
+}
+
+sub glob_match {
+  my $self = shift;
+  my $term = shift;
+  return unless $term =~ /(?:^|[^\\])[*?]/;
+  $term =~ s/(^|[^\\])([+\[\]^{}\$|\(\).])/$1\\$2/g;
+  $term =~ s/(^|[^\\])\*/$1.*/g;
+  $term =~ s/(^|[^\\])\?/$1./g;
+  return $term;
+}
+
+
+sub update_filter {
+  my $self = shift;
+  my ($filter,$results) = @_;
+  return unless @$results;
+
+  if (%$filter) {
+    my @filtered = grep {$filter->{$_}} @$results;
+    %$filter     = map {$_=>1} @filtered;
+  } else {
+    %$filter     = map {$_=>1} @$results;
+  }
+
+}
+
+sub _search_attributes {
+  my $self = shift;
+  my ($search_string,$attribute_array,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
+  my $search = join '|', at words;
+
+  my (%results,%notes);
+
+  my $index  = $self->{_index}{attribute};
+  for my $tag (@$attribute_array) {
+    my $attributes = $index->{lc $tag};
+    for my $value (keys %{$attributes}) {
+      next unless $value =~ /$search/i;
+      my @ids = keys %{$attributes->{$value}};
+      for my $w (@words) {
+	my @hits = $value =~ /($w)/ig or next;
+	$results{$_} += @hits foreach @ids;
+      }
+      $notes{$_} .= "$value " foreach @ids;
+    }
+  }
+
+  my @results;
+  for my $id (keys %results) {
+    my $hits = $results{$id};
+    my $note = $notes{$id};
+    $note =~ s/\s+$//;
+    my $relevance = 10 * $hits;
+    my $feature   = $self->fetch($id);
+    my $name      = $feature->display_name or next;
+    push @results,[$name,$note,$relevance];
+  }
+
+  return @results;
+}
+
+
+# this is ugly
+sub _insert_sequence {
+  my $self = shift;
+  my ($seqid,$seq,$offset) = @_;
+  my $dna_fh = $self->private_fasta_file or return;
+  if ($offset == 0) { # start of the sequence
+    print $dna_fh ">$seqid\n";
+  }
+  print $dna_fh $seq,"\n";
+}
+
+sub _fetch_sequence {
+  my $self = shift;
+  my ($seqid,$start,$end) = @_;
+  my $db = $self->{fasta_db} or return;
+  $db->seq($seqid,$start,$end);
+}
+
+sub private_fasta_file {
+  my $self = shift;
+  return $self->{fasta_fh} if exists $self->{fasta_fh};
+  my $dir = tempdir (CLEANUP => 1);
+  $self->{fasta_file}   = "$dir/sequence.$$.fasta";
+  return $self->{fasta_fh} = IO::File->new($self->{fasta_file},">");
+}
+
+package Bio::DB::SeqFeature::Store::memory::Iterator;
+
+sub new {
+  my $class = shift;
+  my $store = shift;
+  my $ids   = shift;
+  return bless {store => $store,
+		ids   => $ids},ref($class) || $class;
+}
+
+sub next_seq {
+  my $self  = shift;
+  my $store = $self->{store} or return;
+  my $id    = shift @{$self->{ids}};
+  defined $id or return;
+  return $store->fetch($id);
+}
+
+1;
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::Store::berkeleydb>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature/Store.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2353 @@
+package Bio::DB::SeqFeature::Store;
+
+# $Id: Store.pm,v 1.21.4.4 2006/10/07 12:54:33 lstein Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature::Store -- Storage and retrieval of sequence annotation data
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqFeature::Store;
+
+  # Open the feature database
+  my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
+                                                 -dsn     => 'dbi:mysql:test',
+                                                 -write   => 1 );
+
+  # get a feature from somewhere
+  my $feature = Bio::SeqFeature::Generic->new(...);
+
+  # store it
+  $db->store($feature) or die "Couldn't store!";
+
+  # primary ID of the feature is changed to indicate its primary ID
+  # in the database...
+  my $id = $feature->primary_id;
+
+  # get the feature back out
+  my $f  = $db->fetch($id);
+
+  # change the feature and update it
+  $f->start(100);
+  $db->update($f) or die "Couldn't update!";
+
+  # searching...
+  # ...by id
+  my @features = $db->fetch_many(@list_of_ids);
+
+  # ...by name
+  @features = $db->get_features_by_name('ZK909');
+
+  # ...by alias
+  @features = $db->get_features_by_alias('sma-3');
+
+  # ...by type
+  @features = $db->get_features_by_name('gene');
+
+  # ...by location
+  @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
+
+  # ...by attribute
+  @features = $db->get_features_by_attribute({description => 'protein kinase'})
+
+  # ...by the GFF "Note" field
+  @result_list = $db->search_notes('kinase');
+
+  # ...by arbitrary combinations of selectors
+  @features = $db->features(-name => $name,
+                            -type => $types,
+                            -seq_id => $seqid,
+                            -start  => $start,
+                            -end    => $end,
+                            -attributes => $attributes);
+
+  # ...using an iterator
+  my $iterator = $db->get_seq_stream(-name => $name,
+                                     -type => $types,
+                                     -seq_id => $seqid,
+                                     -start  => $start,
+                                     -end    => $end,
+                                     -attributes => $attributes);
+
+  while (my $feature = $iterator->next_seq) {
+    # do something with the feature
+  }
+
+  # ...limiting the search to a particular region
+  my $segment  = $db->segment('Chr1',5000=>6000);
+  my @features = $segment->features(-type=>['mRNA','match']);
+
+  # getting & storing sequence information
+  # Warning: this returns a string, and not a PrimarySeq object
+  $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
+  my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
+
+  # create a new feature in the database
+  my $feature = $db->new_feature(-primary_tag => 'mRNA',
+                                 -seq_id      => 'chr3',
+                                 -start      => 10000,
+                                 -end        => 11000);
+
+  # load an entire GFF3 file, using the GFF3 loader...
+  my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $db,
+							   -verbose  => 1,
+							   -fast     => 1);
+
+  $loader->load('./my_genome.gff3');
+
+
+
+=head1 DESCRIPTION
+
+Bio::DB::SeqFeature::Store implements the Bio::SeqFeature::CollectionI
+interface to allow you to persistently store Bio::SeqFeatureI objects
+in a database and to later to retrieve them by a variety of
+searches. This module is similar to the older Bio::DB::GFF module,
+with the following differences:
+
+=over 4
+
+=item 1. No limitation on Bio::SeqFeatureI implementations
+
+Unlike Bio::DB::GFF, Bio::DB::SeqFeature::Store works with
+any Bio::SeqFeatureI object.
+
+=item 2. No limitation on nesting of features & subfeatures
+
+Bio::DB::GFF is limited to features that have at most one
+level of subfeature. Bio::DB::SeqFeature::Store can work with features
+that have unlimited levels of nesting.
+
+=item 3. No aggregators
+
+The aggregator architecture, which was necessary to impose order on
+the GFF2 files that Bio::DB::GFF works with, does not apply to
+Bio::DB::SeqFeature::Store. It is intended to store features that obey
+well-defined ontologies, such as the Sequence Ontology
+(http://song.sourceforge.net).
+
+=item 4. No relative locations
+
+All locations defined by this module are relative to an absolute
+sequence ID, unlike Bio::DB::GFF which allows you to define the
+location of one feature relative to another.
+
+=back
+
+We'll discuss major concepts in Bio::DB::SeqFeature::Store and then
+describe how to use the module.
+
+=head2 Adaptors
+
+Bio::DB::SeqFeature::Store is designed to work with a variety of
+storage back ends called "adaptors." Adaptors are subclasses of
+Bio::DB::SeqFeature::Store and provide the interface between the
+store() and fetch() methods and the physical database. Currently the
+number of adaptors is quite limited, but the number will grow soon.
+
+=over 4
+
+=item DBI::mysql
+
+A full-featured implementation on top of the MySQL relational database
+system.
+
+=item bdb
+
+A partial implementation that runs on top of the BerkeleyDB
+database. The fetch() and store() methods are implemented, but the
+various search functions (e.g. get_features_by_name()) are not.
+
+=back
+
+If you do not explicitly specify the adaptor, then DBI::mysql will be
+used by default.
+
+=head2 Serializers
+
+When Bio::DB::SeqFeature::Store stores a Bio::SeqFeatureI object into
+the database, it serializes it into binary or text form. When it later
+fetches the feature from the database, it unserializes it. Two
+serializers are available: Recent versions of 
+
+=over 4
+
+=item Storable
+
+This is a fast binary serializer. It is available in Perl versions
+5.8.7 and higher and is used when available.
+
+=item Data::Dumper
+
+This is a slow text serializer that is available in Perl 5.8.0 and
+higher. It is used when Storable is unavailable.
+
+=back
+
+If you do not specify the serializer, then Storable will be used if
+available; otherwise Data::Dumper.
+
+=head2 Loaders and Normalized Features
+
+The Bio::DB::SeqFeature::Store::GFF3Loader parses a GFF3-format file
+and loads the annotations and sequence data into the database of your
+choice. The script bp_seqfeature_load.pl (found in the
+scripts/Bio-SeqFeature-Store/ subdirectory) is a thin front end to the
+GFF3Loader. Other loaders may be written later.
+
+Although Bio::DB::SeqFeature::Store should work with any
+Bio::SeqFeatureI object, there are some disadvantages to using
+Bio::SeqFeature::Generic and other vanilla implementations. The major
+issue is that if two vanilla features share the same subfeature
+(e.g. two transcripts sharing an exon), the shared subfeature will be
+cloned when stored into the database.
+
+The special-purpose L<Bio::DB::SeqFeature> class is able to normalize
+its subfeatures in the database, so that shared subfeatures are stored
+only once. This minimizes wasted storage space. In addition, when
+in-memory caching is turned on, each shared subfeature will usually
+occupy only a single memory location upon restoration.
+
+=cut
+
+
+use strict;
+use warnings;
+
+use base 'Bio::SeqFeature::CollectionI';
+use Carp 'croak';
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::SeqFeature::Segment;
+use Scalar::Util 'blessed';
+
+# this probably shouldn't be here
+use Bio::DB::SeqFeature;
+
+*dna = *get_dna = *get_sequence = \&fetch_sequence;
+*get_SeqFeatures = \&fetch_SeqFeatures;
+
+=head1 Methods for Connecting and Initializating a Database
+
+=head2 new
+
+ Title   : new
+ Usage   : $db = Bio::DB::SeqFeature::Store->new(@options)
+ Function: connect to a database
+ Returns : A descendent of Bio::DB::Seqfeature::Store
+ Args    : several - see below
+ Status  : public
+
+This class method creates a new database connection. The following
+-name=E<gt>$value arguments are accepted:http://iowg.brcdevel.org/gff3.html#a_fasta
+
+ Name               Value
+ ----               -----
+
+ -adaptor           The name of the Adaptor class (default DBI::mysql)
+
+ -serializer        The name of the serializer class (default Storable)
+
+ -index_subfeatures Whether or not to make subfeatures searchable
+                    (default true)
+
+ -cache             Activate LRU caching feature -- size of cache
+
+ -compress          Compresses features before storing them in database
+                    using Compress::Zlib
+
+The B<-index_subfeatures> argument, if true, tells the module to
+create indexes for a feature and all its subfeatures (and its
+subfeatues' subfeatures). Indexing subfeatures means that you will be
+able to search for the gene, its mRNA subfeatures and the exons inside
+each mRNA. It also means when you search the database for all features
+contained within a particular location, you will get the gene, the
+mRNAs and all the exons as individual objects as well as subfeatures
+of each other. NOTE: this option is only honored when working with a
+normalized feature class such as Bio::DB::SeqFeature.
+
+The B<-cache> argument, if true, tells the module to try to create a
+LRU (least-recently-used) object cache using the Tie::Cacher
+module. Caching will cause two objects that share the same primary_id
+to (often, but not always) share the same memory location, and may
+improve performance modestly. The argument is taken as the desired
+size for the cache. If you pass "1" as the cache value, a reasonable
+default cache size will be chosen. Caching requires the Tie::Cacher
+module to be installed. If the module is not installed, then caching
+will silently be disabled.
+
+The B<-compress> argument, if true, will cause the feature data to be
+compressed before storing it. This will make the database somewhat
+smaller at the cost of decreasing performance.
+
+The new() method of individual adaptors recognize additional
+arguments. The default DBI::mysql adaptor recognizes the following
+ones:
+
+ Name               Value
+ ----               -----
+
+ -dsn               DBI data source (default dbi:mysql:test)
+
+ -autoindex         A flag that controls whether or not to update
+                    all search indexes whenever a feature is stored
+                    or updated (default true).
+
+ -namespace         A string that will be used to qualify each table,
+                    thereby allowing you to store several independent
+                    sequence feature databases in a single Mysql
+                    database.
+
+ -dumpdir           The path to a temporary directory that will be
+                    used during "fast" loading. See
+		    L<Bio::DB::SeqFeature::Store::GFF3Loader> for a
+		    description of this. Default is the current
+                    directory.
+
+=cut
+
+### 
+# object constructor
+#
+sub new {
+  my $self      = shift;
+  my ($adaptor,$serializer,$index_subfeatures,$cache,$compress,$debug,$create,$args);
+  if (@_ == 1) {
+    $args = {DSN => shift}
+  }
+  else {
+    ($adaptor,$serializer,$index_subfeatures,$cache,$compress,$debug,$create,$args) =
+      rearrange(['ADAPTOR',
+		 'SERIALIZER',
+		 'INDEX_SUBFEATURES',
+		 'CACHE',
+		 'COMPRESS',
+		 'DEBUG',
+		 'CREATE',
+		], at _);
+  }
+  $adaptor ||= 'DBI::mysql';
+  $args->{WRITE}++  if $create;
+  $args->{CREATE}++ if $create;
+
+  my $class = "Bio::DB::SeqFeature::Store::$adaptor";
+  eval "require $class " or croak $@;
+  $cache &&= eval "require Tie::Cacher; 1";
+  my $obj = $class->new_instance();
+  $obj->debug($debug) if defined $debug;
+  $obj->init($args);
+  $obj->init_cache($cache) if $cache;
+  $obj->do_compress($compress);
+  $obj->serializer($serializer)               if defined $serializer;
+  $obj->index_subfeatures($index_subfeatures) if defined $index_subfeatures;
+  $obj->seqfeature_class('Bio::DB::SeqFeature');
+  $obj->post_init($args);
+  $obj;
+}
+
+=head2 init_database
+
+ Title   : init_database
+ Usage   : $db->init_database([$erase_flag])
+ Function: initialize a database
+ Returns : true
+ Args    : (optional) flag to erase current data
+ Status  : public
+
+Call this after Bio::DB::SeqFeature::Store-E<gt>new() to initialize a new
+database. In the case of a DBI database, this method installs the
+schema but does B<not> create the database. You have to do this
+offline using the appropriate command-line tool. In the case of the
+"bdb" BerkeleyDB adaptor, this creates an empty BTREE database.
+
+If there is any data already in the database, init_database() called
+with no arguments will have no effect. To permanently erase the data
+already there and prepare to receive a fresh set of data, pass a true
+argument.
+
+=cut
+
+###
+# wipe database clean and reinstall schema
+#
+sub init_database {
+  my $self = shift;
+  $self->_init_database(@_);
+}
+
+=head2 post_init
+
+This method is invoked after init_database for use by certain adaptors
+(currently only the memory adaptor) to do automatic data loading after
+initialization. It is passed a copy of the init_database() args.
+
+=cut
+
+sub post_init { }
+
+=head2 store
+
+ Title   : store
+ Usage   : $success = $db->store(@features)
+ Function: store one or more features into the database
+ Returns : true if successful
+ Args    : list of Bio::SeqFeatureI objects
+ Status  : public
+
+This method stores a list of features into the database. Each feature
+is updated so that its primary_id becomes the primary ID of the
+serialized feature stored in the database. If all features were
+successfully stored, the method returns true. In the DBI
+implementation, the store is performed as a single transaction and the
+transaction is rolled back if one or more store operations failed.
+
+You can find out what the primary ID of the feature has become by
+calling the feature's primary_id() method:
+
+  $db->store($my_feature) or die "Oh darn";
+  my $id = $my_feature->primary_id;
+
+If the feature contains subfeatures, they will all be stored
+recursively. In the case of Bio::DB::SeqFeature and
+Bio::DB::SeqFeature::Store::NormalizedFeature, the subfeatures will be
+stored in a normalized way so that each subfeature appears just once
+in the database.
+
+Subfeatures will be indexed for separate retrieval based on the
+current value of index_subfeatures().
+
+If you call store() with one or more features that already have valid
+primary_ids, then an existing object(s) will be B<replaced>. Note that
+when using normalized features such as Bio::DB::SeqFeature, the
+subfeatures are not recursively updated when you update the parent
+feature. You must manually update each subfeatures that has changed.
+
+=cut
+
+###
+# store one or more Bio::SeqFeatureI objects
+#      if they already have a primary_id will replace into the database
+#      otherwise will insert and primary_id will be added
+#
+
+# this version stores the object and flags it to be indexed
+# for search via attributes, name, type or location
+
+sub store {
+  my $self = shift;
+  my $result = $self->store_and_cache(1, at _);
+}
+
+=head2 store_noindex
+
+ Title   : store_noindex
+ Usage   : $success = $db->store_noindex(@features)
+ Function: store one or more features into the database without indexing
+ Returns : true if successful
+ Args    : list of Bio::SeqFeatureI objects
+ Status  : public
+
+This method stores a list of features into the database but does not
+make them searchable. The only way to access the features is via their
+primary IDs. This method is ordinarily only used internally to store
+subfeatures that are not indexed.
+
+=cut
+
+# this version stores the object and flags it so that it is
+# not searchable via attributes, name, type or location
+# (typically used only for subfeatures)
+sub store_noindex {
+  my $self = shift;
+  $self->store_and_cache(0, at _);
+}
+
+=head2 new_feature
+
+ Title   : new_feature
+ Usage   : $feature = $db->new_feature(@args)
+ Function: create a new Bio::DB::SeqFeature object in the database
+ Returns : the new seqfeature
+ Args    : see below
+ Status  : public
+
+This method creates and stores a new Bio::SeqFeatureI object using the
+specialized Bio::DB::SeqFeature class. This class is able to store its
+subfeatures in a normalized fashion, allowing subfeatures to be shared
+among multiple parents (e.g. multiple exons shared among several
+mRNAs).
+
+The arguments are the same as for Bio::DB::SeqFeature-E<gt>new(), which in
+turn are similar to Bio::SeqFeature::Generic-E<gt>new() and
+Bio::Graphics::Feature-E<gt>new(). The most important difference is the
+B<-index> option, which controls whether the feature will be indexed
+for retrieval (default is true). Ordinarily, you would only want to
+turn indexing off when creating subfeatures, because features stored
+without indexes will only be reachable via their primary IDs or their
+parents.
+
+Arguments are as follows:
+
+  -seq_id       the reference sequence
+  -start        the start position of the feature
+  -end          the stop position of the feature
+  -display_name the feature name (returned by seqname)
+  -primary_tag  the feature type (returned by primary_tag)
+  -source       the source tag
+  -score        the feature score (for GFF compatibility)
+  -desc         a description of the feature
+  -segments     a list of subfeatures (see Bio::Graphics::Feature)
+  -subtype      the type to use when creating subfeatures
+  -strand       the strand of the feature (one of -1, 0 or +1)
+  -phase        the phase of the feature (0..2)
+  -url          a URL to link to when rendered with Bio::Graphics
+  -attributes   a hashref of tag value attributes, in which the key is the tag
+                  and the value is an array reference of values
+  -index        index this feature if true
+
+Aliases:
+
+  -id           an alias for -display_name
+  -seqname      an alias for -display_name
+  -display_id   an alias for -display_name
+  -name         an alias for -display_name
+  -stop         an alias for end
+  -type         an alias for primary_tag
+
+You can change the seqfeature implementation generated by new() by
+passing the name of the desired seqfeature class to
+$db-E<gt>seqfeature_class().
+
+=cut
+
+sub new_feature {
+  my $self = shift;
+  return $self->seqfeature_class->new(-store=>$self, at _);
+}
+
+=head2 delete
+
+ Title   : delete
+ Usage   : $success = $db->delete(@features)
+ Function: delete a list of feature from the database
+ Returns : true if successful
+ Args    : list of features
+ Status  : public
+
+This method looks up the primary IDs from a list of features and
+deletes them from the database, returning true if all deletions are
+successful.
+
+WARNING: The current DBI::mysql implementation has some issues that
+need to be resolved, namely (1) normalized subfeatures are NOT
+recursively deleted; and (2) the deletions are not performed in a
+transaction.
+
+=cut
+
+sub delete {
+  my $self   = shift;
+  my $success = 1;
+  for my $object (@_) {
+    my $id = $object->primary_id;
+    $success &&= $self->_deleteid($id);
+  }
+  $success;
+}
+
+=head2 fetch
+
+ Title   : fetch
+ Usage   : $feature = $db->fetch($primary_id)
+ Function: fetch a feature from the database using its primary ID
+ Returns : a feature
+ Args    : primary ID of desired feature
+ Status  : public
+
+This method returns a previously-stored feature from the database
+using its primary ID. If the primary ID is invalid, it returns undef.
+
+=cut
+
+###
+# Fetch a Bio::SeqFeatureI from database using its primary_id
+#
+sub fetch {
+  my $self       = shift;
+  @_ or croak "usage: fetch(\$primary_id)";
+  my $primary_id = shift;
+  if (my $cache = $self->cache()) {
+    return $cache->fetch($primary_id) if $cache->exists($primary_id);
+    my $object = $self->_fetch($primary_id);
+    $cache->store($primary_id,$object);
+    return $object;
+  }
+  else {
+    return $self->_fetch($primary_id);
+  }
+}
+
+=head2 fetch_many
+
+ Title   : fetch_many
+ Usage   : @features = $db->fetch_many($primary_id,$primary_id,$primary_id...)
+ Function: fetch many features from the database using their primary ID
+ Returns : list of features
+ Args    : a list of primary IDs or an array ref of primary IDs
+ Status  : public
+
+Same as fetch() except that you can pass a list of primary IDs or a
+ref to an array of IDs.
+
+=cut
+
+###
+# Efficiently fetch a series of IDs from the database
+# Can pass an array or an array ref
+#
+sub fetch_many {
+  my $self       = shift;
+  @_ or croak 'usage: fetch_many($id1,$id2,$id3...)';
+  my @ids = map {ref($_) ? @$_ : $_} @_ or return;
+  $self->_fetch_many(@ids);
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $iterator = $db->get_seq_stream(@args)
+ Function: return an iterator across all features in the database
+ Returns : a Bio::DB::SeqFeature::Store::Iterator object
+ Args    : feature filters (optional)
+ Status  : public
+
+When called without any arguments this method will return an iterator
+object that will traverse all indexed features in the database. Call
+the iterator's next_seq() method to step through them (in no
+particular order):
+
+  my $iterator = $db->get_seq_stream;
+  while (my $feature = $iterator->next_seq) {
+    print $feature->primary_tag,' ',$feature->display_name,"\n";
+  }
+
+You can select a subset of features by passing a series of filter
+arguments. The arguments are identical to those accepted by
+$db-E<gt>features().
+
+=cut
+
+###
+# Return an iterator across all features that are indexable
+#
+sub get_seq_stream {
+  my $self = shift;
+  $self->_features(-iterator=>1, at _);
+}
+
+=head2 get_features_by_name
+
+ Title   : get_features_by_name
+ Usage   : @features = $db->get_features_by_name($name)
+ Function: looks up features by their display_name
+ Returns : a list of matching features
+ Args    : the desired name
+ Status  : public
+
+This method searches the display_name of all features for matches
+against the provided name. GLOB style wildcares ("*", "?") are
+accepted, but may be slow.
+
+The method returns the list of matches, which may be zero, 1 or more
+than one features. Be prepared to receive more than one result, as
+display names are not guaranteed to be unique.
+
+For backward compatibility with gbrowse, this method is also known as
+get_feature_by_name().
+
+=cut
+
+# backward compatibility for gbrowse
+sub get_feature_by_name { shift->get_features_by_name(@_) }
+
+###
+# get_feature_by_name() return 0 or more features using a name lookup
+# uses the Bio::DB::GFF API
+#
+sub get_features_by_name {
+  my $self   = shift;
+  my ($class,$name,$types,$allow_alias);
+
+  if (@_ == 1) {  # get_features_by_name('name');
+    $name = shift;
+  } else {        # get_features_by_name('class'=>'name'), get_feature_by_name(-name=>'name')
+    ($class,$name,$allow_alias,$types) = rearrange([qw(CLASS NAME ALIASES),[qw(TYPE TYPES)]], at _);
+  }
+
+  # hacky workaround for assumption in Bio::DB::GFF that unclassed reference points were of type "Sequence"
+  undef $class if $class && $class eq 'Sequence';
+
+  $self->_features(-name=>$name,-class=>$class,-aliases=>$allow_alias,-type=>$types);
+}
+
+=head2 get_features_by_alias
+
+ Title   : get_features_by_alias
+ Usage   : @features = $db->get_features_by_alias($name)
+ Function: looks up features by their display_name or alias
+ Returns : a list of matching features
+ Args    : the desired name
+ Status  : public
+
+This method is similar to get_features_by_name() except that it will
+also search through the feature aliases.  Aliases can be created by
+storing features that contain one or more Alias tags. Wildards are
+accepted.
+
+=cut
+
+sub get_features_by_alias {
+  my $self = shift;
+  my @args = @_;
+  if (@_ == 1) {
+    @args  = (-name=>shift);
+  }
+  push @args,(-aliases=>1);
+  $self->get_features_by_name(@args);
+}
+
+=head2 get_features_by_type
+
+ Title   : get_features_by_type
+ Usage   : @features = $db->get_features_by_type(@types)
+ Function: looks up features by their primary_tag
+ Returns : a list of matching features
+ Args    : list of primary tags
+ Status  : public
+
+This method will return a list of features that have any of the
+primary tags given in the argument list. For compatibility with
+gbrowse and Bio::DB::GFF, types can be qualified using a colon:
+
+  primary_tag:source_tag
+
+in which case only features that match both the primary_tag B<and> the
+indicated source_tag will be returned. If the database was loaded from
+a GFF3 file, this corresponds to the third and second columns of the
+row, in that order.
+
+For example, given the GFF3 lines:
+
+  ctg123 geneFinder exon 1300 1500 . + . ID=exon001
+  ctg123 fgenesH    exon 1300 1520 . + . ID=exon002
+
+exon001 and exon002 will be returned by searching for type "exon", but
+only exon001 will be returned by searching for type "exon:fgenesH".
+
+=cut
+
+sub get_features_by_type {
+  my $self = shift;
+  my @types = @_;
+  $self->_features(-type=>\@types);
+}
+
+=head2 get_features_by_location
+
+ Title   : get_features_by_location
+ Usage   : @features = $db->get_features_by_location(@args)
+ Function: looks up features by their location
+ Returns : a list of matching features
+ Args    : see below
+ Status  : public
+
+This method fetches features based on a location range lookup. You
+call it using a positional list of arguments, or a list of
+(-argument=E<gt>$value) pairs.
+
+The positional form is as follows:
+
+ $db->get_features_by_location($seqid [[,$start,]$end])
+
+The $seqid is the name of the sequence on which the feature resides,
+and start and end are optional endpoints for the match. If the
+endpoints are missing then any feature on the indicated seqid is
+returned.
+
+Examples:
+
+ get_features_by_location('chr1');      # all features on chromosome 1
+ get_features_by_location('chr1',5000); # features between 5000 and the end
+ get_features_by_location('chr1',5000,8000); # features between 5000 and 8000
+
+Location lookups are overlapping. A feature will be returned if it
+partially or completely overlaps the indicated range.
+
+The named argument form gives you more control:
+
+  Argument       Value
+  --------       -----
+
+  -seq_id        The name of the sequence on which the feature resides
+  -start         Start of the range
+  -end           End of the range
+  -strand        Strand of the feature
+  -range_type    Type of range to search over
+
+The B<-strand> argument, if present, can be one of "0" to find
+features that are on both strands, "+1" to find only plus strand
+features, and "-1" to find only minus strand features. Specifying a
+strand of undef is the same as not specifying this argument at all,
+and retrieves all features regardless of their strandedness.
+
+The B<-range_type> argument, if present, can be one of "overlaps" (the
+default), to find features whose positions overlap the indicated
+range, "contains," to find features whose endpoints are completely
+contained within the indicated range, and "contained_in" to find
+features whose endpoints are both outside the indicated range.
+
+=cut
+
+sub get_features_by_location {
+  my $self = shift;
+  my ($seqid,$start,$end,$strand,$rangetype) = 
+    rearrange([['SEQ_ID','SEQID','REF'],'START',['STOP','END'],'STRAND','RANGE_TYPE'], at _);
+  $self->_features(-seqid=>$seqid,
+		   -start=>$start||undef,
+		   -end=>$end||undef,
+		   -strand=>$strand||undef,
+		   -range_type=>$rangetype);
+}
+
+=head2 get_features_by_attribute
+
+ Title   : get_features_by_attribute
+ Usage   : @features = $db->get_features_by_attribute(@args)
+ Function: looks up features by their attributes/tags
+ Returns : a list of matching features
+ Args    : see below
+ Status  : public
+
+This implements a simple tag filter. Pass a list of tag names and
+their values. The module will return a list of features whose tag
+names and values match. Tag names are case insensitive. If multiple
+tag name/value pairs are present, they will be ANDed together. To
+match any of a list of values, use an array reference for the value.
+
+Examples:
+
+ # return all features whose "function" tag is "GO:0000123"
+ @features = $db->get_features_by_attribute(function => 'GO:0000123');
+
+ # return all features whose "function" tag is "GO:0000123" or "GO:0000555"
+ @features = $db->get_features_by_attribute(function => ['GO:0000123','GO:0000555']);
+
+ # return all features whose "function" tag is "GO:0000123" or "GO:0000555"
+ # and whose "confirmed" tag is 1
+ @features = $db->get_features_by_attribute(function  => ['GO:0000123','GO:0000555'],
+                                            confirmed => 1);
+
+=cut
+
+sub get_features_by_attribute {
+  my $self       = shift;
+  my %attributes = ref($_[0]) ? %{$_[0]} : @_;
+  %attributes  or $self->throw("Usage: get_feature_by_attribute(attribute_name=>\$attribute_value...)");
+  $self->_features(-attributes=>\%attributes);
+}
+###
+# features() call -- main query interface
+#
+
+=head2 features
+
+ Title   : features
+ Usage   : @features = $db->features(@args)
+ Function: generalized query & retrieval interface
+ Returns : list of features
+ Args    : see below
+ Status  : Public
+
+This is the workhorse for feature query and retrieval. It takes a
+series of -name=E<gt>$value arguments filter arguments. Features that
+match all the filters are returned.
+
+  Argument       Value
+  --------       -----
+
+ Location filters:
+  -seq_id        Chromosome, contig or other DNA segment
+  -seqid         Synonym for -seqid
+  -ref           Synonym for -seqid
+  -start         Start of range
+  -end           End of range
+  -stop          Synonym for -end
+  -strand        Strand
+  -range_type    Type of range match ('overlaps','contains','contained_in')
+
+ Name filters:
+  -name          Name of feature (may be a glob expression)
+  -aliases       If true, match aliases as well as display names
+  -class         Archaic argument for backward compatibility.
+                  (-class=>'Clone',-name=>'ABC123') is equivalent
+                  to (-name=>'Clone:ABC123')
+
+ Type filters:
+  -types         List of feature types (array reference) or one type (scalar)
+  -type          Synonym for the above
+  -primary_tag   Synonym for the above
+
+  -attributes    Hashref of attribute=>value pairs as per
+                    get_features_by_attribute(). Multiple alternative values
+                    can be matched by providing an array reference.
+  -attribute     synonym for -attributes
+
+You may also provide features() with a list of scalar values (the
+first element of which must B<not> begin with a dash), in which case
+it will treat the list as a feature type filter.
+
+Examples:
+
+All features on chromosome 1:
+
+ @features = $db->features(-seqid=>'Chr1');
+
+All features on chromosome 1 between 5000 and 6000:
+
+ @features = $db->features(-seqid=>'Chr1',-start=>5000,-end=>6000);
+
+All mRNAs on chromosome 1 between 5000 and 6000:
+
+ @features = $db->features(-seqid=>'Chr1',-start=>5000,-end=>6000,-types=>'mRNA');
+
+All confirmed mRNAs and repeats on chromosome 1 that overlap the range 5000..6000:
+
+ @features = $db->features(-seqid     => 'Chr1',-start=>5000,-end=>6000,
+                           -types     => ['mRNA','repeat'],
+                           -attributes=> {confirmed=>1}
+                          );
+
+All confirmed mRNAs and repeats on chromosome 1 strictly contained within the range 5000..6000:
+
+ @features = $db->features(-seqid     => 'Chr1',-start=>5000,-end=>6000,
+                           -types     => ['mRNA','repeat'],
+                           -attributes=> {confirmed=>1}
+                           -range_type => 'contained_in',
+                          );
+
+
+
+All genes and repeats:
+
+ @features = $db->features('gene','repeat_region');
+
+=cut
+
+# documentation of args
+#   my ($seq_id,$start,$end,$strand,
+#       $name,$class,$allow_aliases,
+#       $types,
+#       $attributes,
+#       $range_type,
+#       $iterator,
+#      ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
+# 		    'NAME','CLASS','ALIASES',
+# 		    ['TYPES','TYPE','PRIMARY_TAG'],
+# 		    ['ATTRIBUTES','ATTRIBUTE'],
+# 		    'RANGE_TYPE',
+# 		   ], at _);
+#   $range_type ||= 'overlaps';
+sub features {
+  my $self = shift;
+  my @args;
+  if (@_ == 0) {
+    @args = ();
+  }
+  elsif ($_[0] !~/^-/) {
+    my @types = @_;
+    @args = (-type=>\@types);
+  } else {
+    @args = @_;
+  }
+  $self->_features(@args);
+}
+
+=head2 seq_ids
+
+ Title   : seq_ids
+ Usage   : @ids = $db->seq_ids()
+ Function: Return all sequence IDs contained in database
+ Returns : list of sequence Ids
+ Args    : none
+ Status  : public
+
+=cut
+
+sub seq_ids {
+  my $self = shift;
+  return $self->_seq_ids();
+}
+
+=head2 search_attributes
+
+ Title   : search_attributes
+ Usage   : @result_list = $db->search_attributes("text search string",[$tag1,$tag2...],$limit)
+ Function: Search attributes for keywords occurring in a text string
+ Returns : array of results
+ Args    : full text search string, array ref of attribute names, and an optional feature limit
+ Status  : public
+
+Given a search string, this method performs a full-text search of the
+specified attributes and returns an array of results.  You may pass a
+scalar attribute name to search the values of one attribute
+(e.g. "Note") or you may pass an array reference to search inside
+multiple attributes (['Note','Alias','Parent']).Each row of the
+returned array is a arrayref containing the following fields:
+
+  column 1     The display name of the feature
+  column 2     The text of the note
+  column 3     A relevance score.
+
+NOTE: This search will fail to find features that do not have a display name!
+
+=cut
+
+sub search_attributes {
+  my $self = shift;
+  my ($search_string,$attribute_names,$limit) = @_;
+  my $attribute_array   = ref $attribute_names
+                      && ref $attribute_names eq 'ARRAY' ? $attribute_names : [$attribute_names];
+  return $self->_search_attributes($search_string,$attribute_array,$limit);
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @result_list = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string
+ Returns : array of results
+ Args    : full text search string, and an optional feature limit
+ Status  : public
+
+Given a search string, this method performs a full-text search of the
+"Notes" attribute and returns an array of results.  Each row of the
+returned array is a arrayref containing the following fields:
+
+  column 1     The display_name of the feature, suitable for passing to get_feature_by_name()
+  column 2     The text of the note
+  column 3     A relevance score.
+
+NOTE: This is equivalent to $db-E<gt>search_attributes('full text search
+string','Note',$limit). This search will fail to find features that do
+not have a display name!
+
+=cut
+
+###
+# search_notes()
+#
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+  return $self->_search_attributes($search_string,['Note'],$limit);
+}
+
+=head2 insert_sequence
+
+ Title   : insert_sequence
+ Usage   : $success = $db->insert_sequence($seqid,$sequence_string,$offset)
+ Function: Inserts sequence data into the database at the indicated offset
+ Returns : true if successful
+ Args    : see below
+ Status  : public
+
+This method inserts the DNA or protein sequence fragment
+$sequence_string, identified by the ID $seq_id, into the database at
+the indicated offset $offset. It is used internally by the GFF3Loader
+to load sequence data from the files.
+
+=cut
+
+###
+# insert_sequence()
+#
+# insert a bit of primary sequence into the database
+#
+sub insert_sequence {
+  my $self = shift;
+  my ($seqid,$seq,$offset) = @_;
+  $offset ||= 0;
+  $self->_insert_sequence($seqid,$seq,$offset);
+}
+
+
+=head2 fetch_sequence
+
+ Title   : fetch_sequence
+ Usage   : $sequence = $db->fetch_sequence(-seq_id=>$seqid,-start=>$start,-end=>$end)
+ Function: Fetch the indicated subsequene from the database
+ Returns : The sequence string (not a Bio::PrimarySeq object!)
+ Args    : see below
+ Status  : public
+
+This method retrieves a portion of the indicated sequence. The arguments are:
+
+  Argument       Value
+  --------       -----
+  -seq_id        Chromosome, contig or other DNA segment
+  -seqid         Synonym for -seq_id
+  -name          Synonym for -seq_id
+  -start         Start of range
+  -end           End of range
+  -class         Obsolete argument used for Bio::DB::GFF compatibility. If
+                  specified will qualify the seq_id as "$class:$seq_id".
+  -bioseq        Boolean flag; if true, returns a Bio::PrimarySeq object instead
+                  of a sequence string.
+
+You can call fetch_sequence using the following shortcuts:
+
+ $seq = $db->fetch_sequence('chr3');  # entire chromosome
+ $seq = $db->fetch_sequence('chr3',1000);        # position 1000 to end of chromosome
+ $seq = $db->fetch_sequence('chr3',undef,5000);  # position 1 to 5000
+ $seq = $db->fetch_sequence('chr3',1000,5000);   # positions 1000 to 5000
+
+=cut
+
+###
+# fetch_sequence()
+#
+# equivalent to old Bio::DB::GFF->dna() method
+#
+sub fetch_sequence {
+  my $self = shift;
+  my ($seqid,$start,$end,$class,$bioseq) = rearrange([['NAME','SEQID','SEQ_ID'],'START',['END','STOP'],'CLASS','BIOSEQ'], at _);
+  $seqid = "$seqid:$class" if defined $class;
+  my $seq = $self->_fetch_sequence($seqid,$start,$end);
+  return $seq unless $bioseq;
+
+  require Bio::Seq unless Bio::Seq->can('new');
+  my $display_id = defined $start ? "$seqid:$start..$end" : $seqid;
+  return Bio::Seq->new(-display_id=>$display_id,-seq=>$seq);
+}
+
+=head2 segment
+
+ Title   : segment
+ Usage   : $segment = $db->segment($seq_id [,$start] [,$end])
+ Function: restrict the database to a sequence range
+ Returns : a Bio::DB::SeqFeature::Segment object
+ Args    : sequence id, start and end ranges (optional)
+ Status  : public
+
+This is a convenience method that can be used when you are interested
+in the contents of a particular sequence landmark, such as a
+contig. Specify the ID of a sequence or other landmark in the database
+and optionally a start and endpoint relative to that landmark. The
+method will look up the region and return a
+Bio::DB::SeqFeature::Segment object that spans it. You can then use
+this segment object to make location-restricted queries on the database.
+
+Example:
+
+ $segment  = $db->segment('contig23',1,1000);  # first 1000 bp of contig23
+ my @mRNAs = $segment->features('mRNA');       # all mRNAs that overlap segment
+
+Although you will usually want to fetch segments that correspond to
+physical sequences in the database, you can actually use any feature
+in the database as the sequence ID. The segment() method will perform
+a get_features_by_name() internally and then transform the feature
+into the appropriate coordinates.
+
+=cut
+
+###
+# Replacement for Bio::DB::GFF->segment() method
+#
+sub segment {
+  my $self = shift;
+  my (@features, at args);
+
+  if (@_ == 1 && blessed($_[0])) {
+    @features = @_;
+    @args = ();
+  }
+  else {
+    @args     = $self->setup_segment_args(@_);
+    @features = $self->get_features_by_name(@args);
+  }
+  if (!wantarray && @features > 1) {
+    $self->throw(<<END);
+segment() called in a scalar context but multiple features match.
+Either call in a list context or narrow your search using the -types or -class arguments
+END
+  }
+  my ($rel_start,$rel_end) = rearrange(['START',['STOP','END']], at args);
+  $rel_start = 1 unless defined $rel_start;
+
+  my @segments;
+  for my $f (@features) {
+    my $seqid  = $f->seq_id;
+    my $strand = $f->strand;
+    my ($start,$end);
+    my $re = defined $rel_end ? $rel_end : $f->end - $f->start + 1;
+
+    if ($strand >= 0) {
+      $start = $f->start + $rel_start - 1;
+      $end   = $f->start + $re   - 1;
+    }
+    else {
+      $start = $f->end - $re   + 1;
+      $end   = $f->end - $rel_start + 1;
+    }
+    push @segments,Bio::DB::SeqFeature::Segment->new($self,$seqid,$start,$end,$strand);
+  }
+  return wantarray ? @segments : $segments[0];
+}
+
+=head2 seqfeature_class
+
+ Title   : seqfeature_class
+ Usage   : $classname = $db->seqfeature_class([$new_classname])
+ Function: get or set the name of the Bio::SeqFeatureI class generated by new_feature()
+ Returns : name of class
+ Args    : new classname (optional)
+ Status  : public
+
+=cut
+
+sub seqfeature_class {
+  my $self = shift;
+  my $d = $self->{seqfeatureclass};
+  if (@_) {
+    my $class = shift;
+    eval "require $class";
+    $self->throw("$class does not implement the Bio::SeqFeatureI interface")
+      unless $class->isa('Bio::SeqFeatureI');
+    $self->{seqfeatureclass} = $class;
+  }
+  $d;
+}
+
+=head2 reindex
+
+ Title   : reindex
+ Usage   : $db->reindex
+ Function: reindex the database
+ Returns : nothing
+ Args    : nothing
+ Status  : public
+
+This method will force the secondary indexes (name, location,
+attributes, feature types) to be recalculated. It may be useful to
+rebuild a corrupted database.
+
+=cut
+
+###
+# force reindexing
+#
+sub reindex {
+  my $self = shift;
+
+  my $count = 0;
+  my $now;
+  my $last_time = time();
+
+  $self->_start_reindexing;
+
+  my $iterator = $self->get_seq_stream;
+  while (my $f = $iterator->next_seq) {
+    if (++$count %1000 == 0) {
+      $now = time();
+      my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
+      $last_time = $now;
+      print STDERR "$count features indexed$elapsed...",' 'x60;
+      print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+    }
+    $self->_update_indexes($f);
+  }
+
+  $self->_end_reindexing;
+}
+
+=head2 start_bulk_update,finish_bulk_update
+
+ Title   : start_bulk_update,finish_bulk_update
+ Usage   : $db->start_bulk_update
+           $db->finish_bulk_update
+ Function: Activate optimizations for large number of insertions/updates
+ Returns : nothing
+ Args    : nothing
+ Status  : public
+
+With some adaptors (currently only the DBI::mysql adaptor), these
+methods signal the adaptor that a large number of insertions or
+updates are to be performed, and activate certain optimizations. These
+methods are called automatically by the
+Bio::DB::SeqFeature::Store::GFF3Loader module.
+
+Example:
+
+  $db->start_bulk_update;
+  for my $f (@features) {
+    $db->store($f);
+  }
+  $db->finish_bulk_update;
+
+=cut
+
+sub start_bulk_update  { shift->_start_bulk_update(@_) }
+sub finish_bulk_update { shift->_finish_bulk_update(@_) }
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $count = $db->add_SeqFeature($parent, at children)
+ Function: store a parent/child relationship between $parent and @children
+ Returns : number of children successfully stored
+ Args    : parent feature and one or more children
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
+
+If can_store_parentage() returns true, then some store-aware features
+(e.g. Bio::DB::SeqFeature) will invoke this method to store
+feature/subfeature relationships in a normalized table.
+
+=cut
+
+# these two are called only if _can_store_subFeatures() returns true
+# _add_SeqFeature ($parent, at children)
+sub add_SeqFeature  { shift->_add_SeqFeature(@_)   }
+
+=head2 fetch_SeqFeatures
+
+ Title   : fetch_SeqFeatures
+ Usage   : @children = $db->fetch_SeqFeatures($parent_feature)
+ Function: return the immediate subfeatures of the indicated feature
+ Returns : list of subfeatures
+ Args    : the parent feature
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
+
+If can_store_parentage() returns true, then some store-aware features
+(e.g. Bio::DB::SeqFeature) will invoke this method to retrieve
+feature/subfeature relationships from the database.
+
+=cut
+
+# _get_SeqFeatures($parent, at list_of_child_types)
+sub fetch_SeqFeatures {
+  my $self = shift;
+  my $obj  = shift;
+  return unless defined $obj->primary_id;
+  $self->_fetch_SeqFeatures($obj, at _);
+}
+
+
+
+=head1 Changing the Behavior of the Database
+
+These methods allow you to modify the behavior of the database.
+
+=head2 debug
+
+ Title   : debug
+ Usage   : $debug_flag = $db->debug([$new_flag])
+ Function: set the debug flag
+ Returns : current debug flag
+ Args    : new debug flag
+ Status  : public
+
+This method gets/sets a flag that turns on verbose progress
+messages. Currently this will not do very much.
+
+=cut
+
+sub debug {
+  my $self = shift;
+  my $d = $self->{debug};
+  $self->{debug} = shift if @_;
+  $d;
+}
+
+=head2 serializer
+
+ Title   : serializer
+ Usage   : $serializer = $db->serializer([$new_serializer])
+ Function: get/set the name of the serializer
+ Returns : the name of the current serializer class
+ Args    : (optional) the name of a new serializer
+ Status  : public
+
+You can use this method to set the serializer, but do not attempt to
+change the serializer once the database is initialized and populated.
+
+=cut
+
+###
+# serializer
+#
+sub serializer {
+  my $self = shift;
+  my $d    = $self->setting('serializer');
+  if (@_) {
+    my $serializer = shift;
+    eval "require $serializer; 1" or croak $@;
+    $self->setting(serializer=>$serializer);
+    if ($serializer eq 'Storable') {
+      $Storable::forgive_me =1;
+      $Storable::Deparse = 1;
+      $Storable::Eval    = 1;
+    }
+  }
+  $d;
+}
+
+sub do_compress {
+  my $self = shift;
+  if (@_) {
+    my $do_compress = shift;
+    $self->setting(compress => $do_compress);
+  }
+  my $d    = $self->setting('compress');
+  if ($d) {
+    eval "use Compress::Zlib; 1" or croak $@ unless Compress::Zlib->can('compress');
+  }
+  $d;
+}
+
+=head2 index_subfeatures
+
+ Title   : index_subfeatures
+ Usage   : $flag = $db->index_subfeatures([$new_value])
+ Function: flag whether to index subfeatures
+ Returns : current value of the flag
+ Args    : (optional) new value of the flag
+ Status  : public
+
+If true, the store() method will add a searchable index to both the
+top-level feature and all its subfeatures, allowing the search
+functions to return features at any level of the conainment
+hierarchy. If false, only the top level feature will be indexed,
+meaning that you will only be able to get at subfeatures by fetching
+the top-level feature and then traversing downward using
+get_SeqFeatures().
+
+You are free to change this setting at any point during the creation
+and population of a database. One database can contain both indexed
+and unindexed subfeatures.
+
+=cut
+
+###
+# whether to index subfeatures by default
+#
+sub index_subfeatures {
+  my $self = shift;
+  my $d    = $self->setting('index_subfeatures');
+  $self->setting('index_subfeatures'=>shift) if @_;
+  $d;
+}
+
+################################# TIE interface ####################
+
+=head1 TIE Interface
+
+This module implements a full TIEHASH interface. The keys are the
+primary IDs of the features in the database. Example:
+
+ tie %h,'Bio::DB::SeqFeature::Store',-adaptor=>'DBI::mysql',-dsn=>'dbi:mysql:elegans';
+ $h{123} = $feature1;
+ $h{124} = $feature2;
+ print $h{123}->display_name;
+
+=cut
+
+sub TIEHASH {
+  my $class = shift;
+  return $class->new(@_);
+}
+
+sub STORE {
+  my $self = shift;
+  my ($key,$feature) = @_;
+  $key =~ /^\d+$/ && $key > 0 or croak "keys must be positive integers";
+  $self->load_class($feature);
+  $feature->primary_id($key);
+  $self->store($feature);
+}
+
+sub FETCH {
+  my $self = shift;
+  $self->fetch(@_);
+}
+
+sub FIRSTKEY {
+  my $self = shift;
+  $self->_firstid;
+}
+
+sub NEXTKEY {
+  my $self    = shift;
+  my $lastkey = shift;
+  $self->_nextid($lastkey);
+}
+
+sub EXISTS {
+  my $self = shift;
+  my $key  = shift;
+  $self->existsid($key);
+}
+
+sub DELETE {
+  my $self = shift;
+  my $key  = shift;
+  $self->_deleteid($key);
+}
+
+sub CLEAR {
+  my $self = shift;
+  $self->_clearall;
+}
+
+sub SCALAR {
+  my $self = shift;
+  $self->_featurecount;
+}
+
+
+###################### TO BE IMPLEMENTED BY ADAPTOR ##########
+
+=head2 _init_database
+
+ Title   : _init_database
+ Usage   : $success = $db->_init_database([$erase])
+ Function: initialize an empty database
+ Returns : true on success
+ Args    : optional boolean flag to erase contents of an existing database
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
+
+This method is the back end for init_database(). It must be
+implemented by an adaptor that inherits from
+Bio::DB::SeqFeature::Store. It returns true on success.
+
+=cut
+
+sub _init_database { shift->throw_not_implemented }
+
+=head2 _store
+
+ Title   : _store
+ Usage   : $success = $db->_store($indexed, at objects)
+ Function: store seqfeature objects into database
+ Returns : true on success
+ Args    : a boolean flag indicating whether objects are to be indexed,
+           and one or more objects
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
+
+This method is the back end for store() and store_noindex(). It should
+write the seqfeature objects into the database. If indexing is
+requested, the features should be indexed for query and
+retrieval. Otherwise the features should be stored without indexing
+(it is not required that adaptors respect this).
+
+If the object has no primary_id (undef), then the object is written
+into the database and assigned a new primary_id. If the object already
+has a primary_id, then the system will perform an update, replacing
+whatever was there before.
+
+In practice, the implementation will serialize each object using the
+freeze() method and then store it in the database under the
+corresponding primary_id. The object is then updated with the
+primary_id.
+
+=cut
+
+# _store($indexed, at objs)
+sub _store {
+  my $self    = shift;
+  my $indexed = shift;
+  my @objs    = @_;
+  $self->throw_not_implemented;
+}
+
+=head2 _fetch
+
+ Title   : _fetch
+ Usage   : $feature = $db->_fetch($primary_id)
+ Function: fetch feature from database
+ Returns : feature
+ Args    : primary id
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
+
+This method is the back end for fetch(). It accepts a primary_id and
+returns a feature object. It must be implemented by the adaptor.
+
+In practice, the implementation will retrieve the serialized
+Bio::SeqfeatureI object from the database and pass it to the thaw()
+method to unserialize it and synchronize the primary_id.
+
+=cut
+
+# _fetch($id)
+sub _fetch { shift->throw_not_implemented }
+
+=head2 _fetch_many
+
+ Title   : _fetch_many
+ Usage   : $feature = $db->_fetch_many(@primary_ids)
+ Function: fetch many features from database
+ Returns : feature
+ Args    : primary id
+ Status  : private -- does not need to be implemented
+
+This method fetches many features specified by a list of IDs. The
+default implementation simply calls _fetch() once for each
+primary_id. Implementors can override it if needed for efficiency.
+
+=cut
+
+# _fetch_many(@ids)
+# this one will fall back to many calls on fetch() if you don't
+# override it
+sub _fetch_many {
+  my $self = shift;
+  return map {$self->_fetch($_)} @_;
+}
+
+=head2 _update_indexes
+
+ Title   : _update_indexes
+ Usage   : $success = $db->_update_indexes($feature)
+ Function: update the indexes for a feature
+ Returns : true on success
+ Args    : A seqfeature object
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
+
+This method is called by reindex() to update the searchable indexes
+for a feature object that has changed.
+
+=cut
+
+# this is called to index a feature
+sub _update_indexes { shift->throw_not_implemented }
+
+=head2 _start_reindexing, _end_reindexing
+
+ Title   : _start_reindexing, _end_reindexing
+ Usage   : $db->_start_reindexing()
+           $db->_end_reindexing
+ Function: flag that a series of reindexing operations is beginning/ending
+ Returns : true on success
+ Args    : none
+ Status  : MAY BE IMPLEMENTED BY AN ADAPTOR (optional)
+
+These methods are called by reindex() before and immediately after a
+series of reindexing operations. The default behavior is to do
+nothing, but these methods can be overridden by an adaptor in order to
+perform optimizations, turn off autocommits, etc.
+
+=cut
+
+# these do not necessary have to be overridden
+# they are called at beginning and end of reindexing process
+sub _start_reindexing {}
+sub _end_reindexing   {}
+
+=head2 _features
+
+ Title   : _features
+ Usage   : @features = $db->_features(@args)
+ Function: back end for all get_feature_by_*() queries
+ Returns : list of features
+ Args    : see below
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
+
+This is the backend for features(), get_features_by_name(),
+get_features_by_location(), etc. Arguments are as described for the
+features() method, except that only the named-argument form is
+recognized.
+
+=cut
+
+# bottleneck query generator
+sub _features { shift->throw_not_implemented }
+
+=head2 _search_attributes
+
+ Title   : _search_attributes
+ Usage   : @result_list = $db->_search_attributes("text search string",[$tag1,$tag2...],$limit)
+ Function: back end for the search_attributes() method
+ Returns : results list
+ Args    : as per search_attributes()
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
+
+See search_attributes() for the format of the results list. The only
+difference between this and the public method is that the tag list is
+guaranteed to be an array reference.
+
+=cut
+
+sub _search_attributes { shift->throw_not_implemented }
+
+=head2 can_store_parentage
+
+ Title   : can_store_parentage
+ Usage   : $flag = $db->can_store_parentage
+ Function: return true if this adaptor can store parent/child relationships
+ Returns : boolean
+ Args    : none
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
+
+Override this method and return true if this adaptor supports the
+_add_SeqFeature() and _get_SeqFeatures() methods, which are used for
+storing feature parent/child relationships in a normalized
+fashion. Default is false (parent/child relationships are stored in
+denormalized form in each feature).
+
+=cut
+
+# return true here if the storage engine is prepared to store parent/child
+# relationships using _add_SeqFeature and return them using _fetch_SeqFeatures
+sub can_store_parentage { return; }
+
+=head2 _add_SeqFeature
+
+ Title   : _add_SeqFeature
+ Usage   : $count = $db->_add_SeqFeature($parent, at children)
+ Function: store a parent/child relationship between $parent and @children
+ Returns : number of children successfully stored
+ Args    : parent feature and one or more children
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
+
+If can_store_parentage() returns true, then some store-aware features
+(e.g. Bio::DB::SeqFeature) will invoke this method to store
+feature/subfeature relationships in a normalized table.
+
+=cut
+
+sub _add_SeqFeature { shift->throw_not_implemented }
+
+=head2 _fetch_SeqFeatures
+
+ Title   : _fetch_SeqFeatures
+ Usage   : @children = $db->_fetch_SeqFeatures($parent_feature)
+ Function: return the immediate subfeatures of the indicated feature
+ Returns : list of subfeatures
+ Args    : the parent feature
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
+
+If can_store_parentage() returns true, then some store-aware features
+(e.g. Bio::DB::SeqFeature) will invoke this method to retrieve
+feature/subfeature relationships from the database.
+
+=cut
+
+# _get_SeqFeatures($parent, at list_of_child_types)
+sub _fetch_SeqFeatures {shift->throw_not_implemented }
+
+=head2 _insert_sequence
+
+ Title   : _insert_sequence
+ Usage   : $success = $db->_insert_sequence($seqid,$sequence_string,$offset)
+ Function: Inserts sequence data into the database at the indicated offset
+ Returns : true if successful
+ Args    : see below
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
+
+This is the back end for insert_sequence(). Adaptors must implement
+this method in order to store and retrieve nucleotide or protein
+sequence.
+
+=cut
+
+sub _insert_sequence   { shift->throw_not_implemented }
+
+# _fetch_sequence() is similar to old dna() method
+
+=head2 _fetch_sequence
+
+ Title   : _fetch_sequence
+ Usage   : $sequence = $db->_fetch_sequence(-seq_id=>$seqid,-start=>$start,-end=>$end)
+ Function: Fetch the indicated subsequene from the database
+ Returns : The sequence string (not a Bio::PrimarySeq object!)
+ Args    : see below
+ Status  : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
+
+This is the back end for fetch_sequence(). Adaptors must implement
+this method in order to store and retrieve nucleotide or protein
+sequence.
+
+=cut
+
+sub _fetch_sequence    { shift->throw_not_implemented }
+
+=head2 _seq_ids
+
+ Title   : _seq_ids
+ Usage   : @ids = $db->_seq_ids()
+ Function: Return all sequence IDs contained in database
+ Returns : list of sequence Ids
+ Args    : none
+ Status  : TO BE IMPLEMENTED BY ADAPTOR
+
+This method is invoked by seq_ids() to return all sequence IDs
+(coordinate systems) known to the database.
+
+=cut
+
+sub _seq_ids { shift->throw_not_implemented }
+
+=head2 _start_bulk_update,_finish_bulk_update
+
+ Title   : _start_bulk_update, _finish_bulk_update
+ Usage   : $db->_start_bulk_update
+           $db->_finish_bulk_update
+ Function: Activate optimizations for large number of insertions/updates
+ Returns : nothing
+ Args    : nothing
+ Status  : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTOR
+
+These are the backends for start_bulk_update() and
+finish_bulk_update(). The default behavior of both methods is to do
+nothing.
+
+=cut
+
+# Optional flags to change behavior to optimize bulk updating.
+sub _start_bulk_update { }
+sub _finish_bulk_update { }
+
+
+# for full TIE() interface  - not necessary to implement in most cases
+
+=head2 Optional methods needed to implement full TIEHASH interface
+
+The core TIEHASH interface will work if just the _store() and _fetch()
+methods are implemented. To support the full TIEHASH interface,
+including support for keys(), each(), and exists(), the following
+methods should be implemented:
+
+=over 4
+
+=item $id = $db-E<gt>_firstid()
+
+Return the first primary ID in the database. Needed for the each()
+function.
+
+=item $next_id = $db-E<gt>_nextid($id)
+
+Given a primary ID, return the next primary ID in the series. Needed
+for the each() function.
+
+=item $boolean = $db-E<gt>_existsid($id)
+
+Returns true if the indicated primary ID is in the database. Needed
+for the exists() function.
+
+=item $db-E<gt>_deleteid($id)
+
+Delete the feature corresponding to the given primary ID. Needed for
+delete().
+
+=item $db-E<gt>_clearall()
+
+Empty the database. Needed for %tied_hash = ().
+
+=item $count = $db-E<gt>_featurecount()
+
+Return the number of features in the database. Needed for scalar
+%tied_hash.
+
+=back
+
+=cut
+
+sub _firstid  { shift->throw_not_implemented }
+sub _nextid   { shift->throw_not_implemented }
+sub _existsid { shift->throw_not_implemented }
+sub _deleteid { shift->throw_not_implemented }
+sub _clearall { shift->throw_not_implemented }
+sub _featurecount { shift->throw_not_implemented }
+
+
+=head1 Internal Methods
+
+These methods are internal to Bio::DB::SeqFeature::Store and adaptors.
+
+=head2 new_instance
+
+ Title   : new_instance
+ Usage   : $db = $db->new_instance()
+ Function: class constructor
+ Returns : A descendent of Bio::DB::SeqFeature::Store
+ Args    : none
+ Status  : internal
+
+This method is called internally by new() to create a new
+uninitialized instance of Bio::DB::SeqFeature::Store. It is used
+internally and should not be called by application software.
+
+=cut
+
+sub new_instance {
+  my $class = shift;
+  return bless {},ref($class) || $class;
+}
+
+=head2 init
+
+ Title   : init
+ Usage   : $db->init(@args)
+ Function: initialize object
+ Returns : none
+ Args    : Arguments passed to new()
+ Status  : private
+
+This method is called internally by new() to initialize a
+newly-created object using the arguments passed to new(). It is to be
+overridden by Bio::DB::SeqFeature::Store adaptors.
+
+=cut
+
+sub init {
+  my $self = shift;
+  $self->default_settings();
+}
+
+=head2 default_settings
+
+ Title   : default_settings
+ Usage   : $db->default_settings()
+ Function: set up default settings for the adaptor
+ Returns : none
+ Args    : none
+ Status  : private
+
+This method is may be overridden by adaptors. It is responsible for
+setting up object default settings.
+
+=cut
+
+###
+# default settings -- set up whatever are the proper default settings
+#
+sub default_settings {
+  my $self = shift;
+  $self->serializer($self->default_serializer);
+  $self->index_subfeatures(1);
+}
+
+=head2 default_serializer
+
+ Title   : default_serializer
+ Usage   : $serializer = $db->default_serializer
+ Function: finds an available serializer
+ Returns : the name of an available serializer
+ Args    : none
+ Status  : private
+
+This method returns the name of an available serializer module.
+
+=cut
+
+###
+# choose a serializer
+#
+sub default_serializer {
+  my $self = shift;
+  # try Storable
+  eval "require Storable; 1"     and return 'Storable';
+  eval "require Data::Dumper; 1" and return 'Data::Dumper';
+  croak "Unable to load either Storable or Data::Dumper. Please provide a serializer using -serializer";
+}
+
+=head2 setting
+
+ Title   : setting
+ Usage   : $value = $db->setting('setting_name' [=> $new_value])
+ Function: get/set the value of a setting
+ Returns : the value of the current setting
+ Args    : the name of the setting and optionally a new value for the setting
+ Status  : private
+
+This is a low-level procedure for persistently storing database
+settings. It can be overridden by adaptors.
+
+=cut
+
+# persistent settings
+# by default we store in the object
+sub setting {
+  my $self  = shift;
+  my $variable_name = shift;
+  my $d    = $self->{setting}{$variable_name};
+  $self->{setting}{$variable_name} = shift if @_;
+  $d;
+}
+
+=head2 subfeatures_are_indexed
+
+ Title   : subfeatures_are_indexed
+ Usage   : $flag = $db->subfeatures_are_indexed([$new_value])
+ Function: flag whether subfeatures are indexed
+ Returns : a flag indicating that all subfeatures are indexed
+ Args    : (optional) new value of the flag
+ Status  : private
+
+This method is used internally by the Bio::DB::SeqFeature class to
+optimize some of its operations. It returns true if all of the
+subfeatures in the database are indexed; it returns false if at least
+one of the subfeatures is not indexed. Do not attempt to change the
+value of this setting unless you are writing an adaptor.
+
+=cut
+
+###
+# whether subfeatures are all indexed
+#
+sub subfeatures_are_indexed {
+  my $self = shift;
+  my $d    = $self->setting('subfeatures_are_indexed');
+  $self->setting(subfeatures_are_indexed => shift) if @_;
+  $d;
+}
+
+=head2 subfeature_types_are_indexed
+
+ Title   : subfeature_types_are_indexed
+ Usage   : $flag = $db->subfeature_types_are_indexed
+ Function: whether subfeatures are indexed by type
+ Returns : a flag indicating that all subfeatures are indexed
+ Args    : none
+ Status  : private
+
+This method returns true if subfeature types are indexed. Default is
+to return the value of subfeatures_are_indexed().
+
+=cut
+
+sub subfeature_types_are_indexed {
+  my $self = shift;
+  return $self->subfeatures_are_indexed;
+}
+
+=head2 subfeature_locations_are_indexed
+
+ Title   : subfeature_locations_are_indexed
+ Usage   : $flag = $db->subfeature_locations_are_indexed
+ Function: whether subfeatures are indexed by type
+ Returns : a flag indicating that all subfeatures are indexed
+ Args    : none
+ Status  : private
+
+This method returns true if subfeature locations are indexed. Default is
+to return the value of subfeatures_are_indexed().
+
+=cut
+
+sub subfeature_locations_are_indexed {
+  my $self = shift;
+  return $self->subfeatures_are_indexed;
+}
+
+=head2 setup_segment_args
+
+ Title   : setup_segment_args
+ Usage   : @args = $db->setup_segment_args(@args)
+ Function: munge the arguments to the segment() call
+ Returns : munged arguments
+ Args    : see below
+ Status  : private
+
+This method is used internally by segment() to translate positional
+arguments into named argument=E<gt>value pairs.
+
+=cut
+
+sub setup_segment_args {
+  my $self = shift;
+  return @_ if defined $_[0] && $_[0] =~ /^-/;
+  return (-name=>$_[0],-start=>$_[1],-end=>$_[2]) if @_ == 3;
+  return (-class=>$_[0],-name=>$_[1])              if @_ == 2;
+  return (-name=>$_[0])                            if @_ == 1;
+  return;
+}
+
+=head2 store_and_cache
+
+ Title   : store_and_cache
+ Usage   : $success = $db->store_and_cache(@features)
+ Function: store features into database and update cache
+ Returns : number of features stored
+ Args    : list of features
+ Status  : private
+
+This private method stores the list of Bio::SeqFeatureI objects into
+the database and caches them in memory for retrieval.
+
+=cut
+
+sub store_and_cache {
+  my $self = shift;
+  my $indexit = shift;
+  my $result = $self->_store($indexit, at _);
+  if (my $cache = $self->cache) {
+    for my $obj (@_) {
+      defined (my $id     = eval {$obj->primary_id}) or next;
+      $cache->store($id,$obj);
+    }
+  }
+  $result;
+}
+
+=head2 init_cache
+
+ Title   : init_cache
+ Usage   : $db->init_cache($size)
+ Function: initialize the in-memory feature cache
+ Returns : the Tie::Cacher object
+ Args    : desired size of the cache
+ Status  : private
+
+This method is used internally by new() to create the Tie::Cacher
+instance used for the in-memory feature cache.
+
+=cut
+
+sub init_cache {
+  my $self       = shift;
+  my $cache_size = shift;
+  $cache_size    = 5000 if $cache_size == 1;   # in case somebody treats it as a flag
+  $self->{cache} = Tie::Cacher->new($cache_size) or $self->throw("Couldn't tie cache: $!");
+}
+
+=head2 cache
+
+ Title   : cache
+ Usage   : $cache = $db->cache
+ Function: return the cache object
+ Returns : the Tie::Cacher object
+ Args    : none
+ Status  : private
+
+This method returns the Tie::Cacher object used for the in-memory
+feature cache.
+
+=cut
+
+sub cache { shift->{cache} }
+
+=head2 load_class
+
+ Title   : load_class
+ Usage   : $db->load_class($blessed_object)
+ Function: loads the module corresponding to a blessed object
+ Returns : empty
+ Args    : a blessed object
+ Status  : private
+
+This method is used by thaw() to load the code for a blessed
+object. This ensures that all the object's methods are available.
+
+=cut
+
+sub load_class {
+  my $self = shift;
+  my $obj  = shift;
+  return unless defined $obj;
+  return if $self->{class_loaded}{ref $obj}++;
+  unless ($obj && $obj->can('primary_id')) {
+    my $class = ref $obj;
+    eval "require $class";
+  }
+}
+
+
+#################################### Internal methods ####################
+
+=head2 freeze
+
+ Title   : freeze
+ Usage   : $serialized_object = $db->freeze($feature)
+ Function: serialize a feature object into a string
+ Returns : serialized feature object
+ Args    : a seqfeature object
+ Status  : private
+
+This method converts a Bio::SeqFeatureI object into a serialized form
+suitable for storage into a database. The feature's primary ID is set
+to undef before it is serialized. This avoids any potential mismatch
+between the primary ID used as the database key and the primary ID
+stored in the serialized object.
+
+=cut
+
+sub freeze {
+  my $self = shift;
+  my $obj  = shift;
+
+  # Bio::SeqFeature::Generic contains cleanup methods, so we need to
+  # localize the methods to undef temporarily so that we can serialize
+  local $obj->{'_root_cleanup_methods'} if exists $obj->{'_root_cleanup_methods'};
+
+  my ($id,$store);
+  $id    = $obj->primary_id();
+  $obj->primary_id(undef);     # don't want primary ID to be stored in object
+  eval {
+    $store = $obj->object_store;
+    $obj->object_store(undef);   # don't want a copy of the store in the object
+  };
+  my $serializer = $self->serializer;
+  my $data;
+  if ($serializer eq 'Data::Dumper') {
+    my $d    = Data::Dumper->new([$obj]);
+    $d->Terse(1);
+    $d->Deepcopy(1);
+    $data = $d->Dump;
+  } elsif ($serializer eq 'Storable') {
+    $data = Storable::nfreeze($obj);
+  }
+
+  $obj->primary_id($id);       # restore to original state
+  eval {
+    $obj->object_store($store);
+  };
+
+  $data = compress($data) if $self->do_compress;
+  return $data;
+}
+
+=head2 thaw
+
+ Title   : thaw
+ Usage   : $feature = $db->thaw($serialized_object,$primary_id)
+ Function: unserialize a string into a feature object
+ Returns : Bio::SeqFeatureI object
+ Args    : serialized form of object from freeze() and primary_id of object
+ Status  : private
+
+This method is the reverse of the freeze(). The supplied primary_id
+becomes the primary_id() of the returned Bio::SeqFeatureI object. This
+implementation checks for a deserialized object in the cache before it
+calls thaw_object() to do the actual deserialization.
+
+=cut
+
+sub thaw {
+  my $self               = shift;
+  my ($obj,$primary_id)  = @_;
+
+  if (my $cache = $self->cache) {
+    return $cache->fetch($primary_id) if $cache->exists($primary_id);
+    my $object = $self->thaw_object($obj,$primary_id) or return;
+    $cache->store($primary_id,$object);
+    return $object;
+  } else {
+    return $self->thaw_object($obj,$primary_id);
+  }
+
+}
+
+=head2 thaw_object
+
+ Title   : thaw_object
+ Usage   : $feature = $db->thaw_object($serialized_object,$primary_id)
+ Function: unserialize a string into a feature object
+ Returns : Bio::SeqFeatureI object
+ Args    : serialized form of object from freeze() and primary_id of object
+ Status  : private
+
+After thaw() checks the cache and comes up empty, this method is
+invoked to thaw the object.
+
+=cut
+
+sub thaw_object {
+  my $self               = shift;
+  my ($obj,$primary_id)  = @_;
+
+  my $serializer = $self->serializer;
+  my $object;
+
+  $obj = uncompress($obj) if $self->do_compress;
+
+  if ($serializer eq 'Data::Dumper') {
+    $object = eval $obj;
+  } elsif ($serializer eq 'Storable') {
+    $object = Storable::thaw($obj);
+  }
+
+  # remember the primary ID of this object as well as the
+  # identity of the store, so that we can do lazy loading;
+  # both of these are wrapped in an eval because not all
+  # bioseqfeatures support them (or want to)
+  $self->load_class($object);
+  eval {
+    $object->primary_id($primary_id);
+    $object->object_store($self);
+  };
+  $object;
+}
+
+=head2 feature_names
+
+ Title   : feature_names
+ Usage   : ($names,$aliases) = $db->feature_names($feature)
+ Function: get names and aliases for a feature
+ Returns : an array of names and an array of aliases
+ Args    : a Bio::SeqFeatureI object
+ Status  : private
+
+This is an internal utility function which, given a Bio::SeqFeatureI
+object, returns two array refs. The first is a list of official names
+for the feature, and the second is a list of aliases. This is slightly
+skewed towards GFF3 usage, so the official names are the
+display_name(), plus all tag values named 'Name', plus all tag values
+named 'ID'. The aliases are all tag values named 'Alias'.
+
+=cut
+
+sub feature_names {
+  my $self = shift;
+  my $obj  = shift;
+
+  my $primary_id = $obj->primary_id;
+  my @names = $obj->display_name;
+  push @names,$obj->get_tag_values('Name') if $obj->has_tag('Name');
+  push @names,$obj->get_tag_values('ID')   if $obj->has_tag('ID');
+  @names = grep {defined $_ && $_ ne $primary_id} @names;
+
+  my @aliases = grep {defined} $obj->get_tag_values('Alias') if $obj->has_tag('Alias');
+
+  return (\@names,\@aliases);
+}
+
+
+1;
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature>,
+L<Bio::DB::SeqFeature::Store::GFF3Loader>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqFeature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,414 @@
+package Bio::DB::SeqFeature;
+
+# $Id: SeqFeature.pm,v 1.7.4.2 2006/10/02 23:10:15 sendu Exp $
+
+=head1 NAME
+
+Bio::DB::SeqFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
+
+=head1 SYNOPSIS
+
+ use Bio::DB::SeqFeature::Store;
+ # Open the sequence database
+ my $db      = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
+                                                -dsn     => 'dbi:mysql:test');
+ my ($feature)   = $db->get_features_by_name('ZK909');
+ my @subfeatures = $feature->get_SeqFeatures();
+ my @exons_only  = $feature->get_SeqFeatures('exon');
+
+ # create a new object
+ my $new = $db->new_feature(-primary_tag=>'gene',
+                            -seq_id     => 'chr3',
+                            -start      => 10000,
+                            -end        => 11000);
+
+ # add a new exon
+ $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
+                                           -seq_id     => 'chr3',
+                                           -start      => 5000,
+                                           -end        => 5551));
+
+=head1 DESCRIPTION
+
+The Bio::DB::SeqFeature object is the default SeqFeature class stored
+in Bio::DB::SeqFeature databases. It implements both the
+Bio::DB::SeqFeature::NormalizedFeatureI and
+Bio::DB::SeqFeature::TableFeatureI interfaces, which means that its
+subfeatures, if any, are stored in the database in a normalized
+fashion, and that the parent/child hierarchy of features and
+subfeatures are also stored in the database as set of tuples. This
+provides efficiencies in both storage and retrieval speed.
+
+Typically you will not create Bio::DB::SeqFeature directly, but will
+ask the database to do so on your behalf, as described in
+L<Bio::DB::SeqFeature::Store>.
+
+=cut
+
+# just like Bio::DB::SeqFeature::NormalizedFeature except that the parent/child relationships are
+# stored in a table in the Bio::DB::SeqFeature::Store
+
+use strict;
+use Carp 'croak';
+use Bio::DB::SeqFeature::Store;
+use base qw(Bio::DB::SeqFeature::NormalizedFeature Bio::DB::SeqFeature::NormalizedTableFeatureI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
+ Function: create a new feature
+ Returns : the new seqfeature
+ Args    : see below
+ Status  : public
+
+This method creates and, if possible stores into a database, a new
+Bio::DB::SeqFeature::NormalizedFeature object using the specialized
+Bio::DB::SeqFeature class.
+
+The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
+Bio::Graphics::Feature-E<gt>new(). The most important difference is the
+B<-store> option, which if present creates the object in a
+Bio::DB::SeqFeature::Store database, and he B<-index> option, which
+controls whether the feature will be indexed for retrieval (default is
+true). Ordinarily, you would only want to turn indexing on when
+creating top level features, and off only when storing
+subfeatures. The default is on.
+
+Arguments are as follows:
+
+  -seq_id       the reference sequence
+  -start        the start position of the feature
+  -end          the stop position of the feature
+  -display_name the feature name (returned by seqname)
+  -primary_tag  the feature type (returned by primary_tag)
+  -source       the source tag
+  -score        the feature score (for GFF compatibility)
+  -desc         a description of the feature
+  -segments     a list of subfeatures (see Bio::Graphics::Feature)
+  -subtype      the type to use when creating subfeatures
+  -strand       the strand of the feature (one of -1, 0 or +1)
+  -phase        the phase of the feature (0..2)
+  -url          a URL to link to when rendered with Bio::Graphics
+  -attributes   a hashref of tag value attributes, in which the key is the tag
+                  and the value is an array reference of values
+  -store        a previously-opened Bio::DB::SeqFeature::Store object
+  -index        index this feature if true
+
+Aliases:
+
+  -id           an alias for -display_name
+  -seqname      an alias for -display_name
+  -display_id   an alias for -display_name
+  -name         an alias for -display_name
+  -stop         an alias for end
+  -type         an alias for primary_tag
+
+=cut
+
+sub add_segment {
+  my $self = shift;
+  $self->_add_segment(0, at _);
+}
+
+
+=head2 Bio::SeqFeatureI methods
+
+The following Bio::SeqFeatureI methods are supported:
+
+ seq_id(), start(), end(), strand(), get_SeqFeatures(),
+ display_name(), primary_tag(), source_tag(), seq(),
+ location(), primary_id(), overlaps(), contains(), equals(),
+ intersection(), union(), has_tag(), remove_tag(),
+ add_tag_value(), get_tag_values(), get_all_tags()
+
+Some methods that do not make sense in the context of a genome
+annotation database system, such as attach_seq(), are not supported.
+
+Please see L<Bio::SeqFeatureI> for more details.
+
+=cut
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $flag = $feature->add_SeqFeature(@features)
+ Function: Add subfeatures to the feature
+ Returns : true if successful
+ Args    : list of Bio::SeqFeatureI objects
+ Status  : public
+
+Add one or more subfeatures to the feature. For best results,
+subfeatures should be of the same class as the parent feature
+(i.e. don't try mixing Bio::DB::SeqFeature::NormalizedFeature with
+other feature types).
+
+An alias for this method is add_segment().
+
+=cut
+
+=head2 update
+
+ Title   : update
+ Usage   : $flag = $feature->update()
+ Function: Update feature in the database
+ Returns : true if successful
+ Args    : none
+ Status  : public
+
+After changing any fields in the feature, call update() to write it to
+the database. This is not needed for add_SeqFeature() as update() is
+invoked automatically.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeature
+ Usage   : @subfeatures = $feature->get_SeqFeatures([@types])
+ Function: return subfeatures of this feature
+ Returns : list of subfeatures
+ Args    : list of subfeature primary_tags (optional)
+ Status  : public
+
+This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
+allowing you to pass a list of primary_tags, in which case only
+subfeatures whose primary_tag is contained on the list will be
+returned. Without any types passed all subfeatures are returned.
+
+=cut
+
+=head2 object_store
+
+ Title   : object_store
+ Usage   : $store = $feature->object_store([$new_store])
+ Function: get or set the database handle
+ Returns : current database handle
+ Args    : new database handle (optional)
+ Status  : public
+
+This method will get or set the Bio::DB::SeqFeature::Store object that
+is associated with the feature. After changing the store, you should
+probably unset the feature's primary_id() and call update() to ensure
+that the object is written into the database as a new feature.
+
+=cut
+
+=head2 overloaded_names
+
+ Title   : overloaded_names
+ Usage   : $overload = $feature->overloaded_names([$new_overload])
+ Function: get or set overloading of object strings
+ Returns : current flag
+ Args    : new flag (optional)
+ Status  : public
+
+For convenience, when objects of this class are stringified, they are
+represented in the form "primary_tag(display_name)". To turn this
+feature off, call overloaded_names() with a false value. You can
+invoke this on an individual feature object or on the class:
+
+  Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
+
+=cut
+
+=head2 segment
+
+ Title   : segment
+ Usage   : $segment = $feature->segment
+ Function: return a Segment object corresponding to feature
+ Returns : a Bio::DB::SeqFeature::Segment
+ Args    : none
+ Status  : public
+
+This turns the feature into a Bio::DB::SeqFeature::Segment object,
+which you can then use to query for overlapping features. See
+L<Bio::DB::SeqFeature::Segment>.
+
+=cut
+
+=head2 AUTOLOADED methods
+
+ @subfeatures = $feature->Exon;
+
+If you use an unknown method that begins with a capital letter, then
+the feature autogenerates a call to get_SeqFeatures() using the
+lower-cased method name as the primary_tag. In other words
+$feature-E<gt>Exon is equivalent to:
+
+ @subfeature s= $feature->get_SeqFeatures('exon')
+
+=cut
+
+=head2 load_id
+
+ Title   : load_id
+ Usage   : $id = $feature->load_id
+ Function: get the GFF3 load ID
+ Returns : the GFF3 load ID (string)
+ Args    : none
+ Status  : public
+
+For features that were originally loaded by the GFF3 loader, this
+method returns the GFF3 load ID. This method may not be supported in
+future versions of the module.
+
+=cut
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $id = $feature->primary_id([$new_id])
+ Function: get/set the feature's database ID
+ Returns : the current primary ID
+ Args    : none
+ Status  : public
+
+This method gets or sets the primary ID of the feature in the
+underlying Bio::DB::SeqFeature::Store database. If you change this
+field and then call update(), it will have the effect of making a copy
+of the feature in the database under a new ID.
+
+=cut
+
+=head2 target
+
+ Title   : target
+ Usage   : $segment = $feature->target
+ Function: return the segment correspondent to the "Target" attribute
+ Returns : a Bio::DB::SeqFeature::Segment object
+ Args    : none
+ Status  : public
+
+For features that are aligned with others via the GFF3 Target
+attribute, this returns a segment corresponding to the aligned
+region. The CIGAR gap string is not yet supported.
+
+=cut
+
+=head2 Internal methods
+
+=over 4
+
+=item $feature-E<gt>as_string()
+
+Internal method used to implement overloaded stringification.
+
+=item $boolean = $feature-E<gt>type_match(@list_of_types)
+
+Internal method that will return true if the feature's primary_tag and
+source_tag match any of the list of types (in primary_tag:source_tag
+format) provided.
+
+=back
+
+=cut
+
+# This adds subfeatures. It has the property of converting the
+# provided features into an object like itself and storing them
+# into the database. If the feature already has a primary id and
+# an object_store() method, then it is not stored into the database,
+# but its primary id is reused.
+sub _add_segment {
+  my $self       = shift;
+  my $normalized = shift;
+
+  my $store      = $self->object_store;
+  return         $self->SUPER::_add_segment($normalized, at _)
+    unless $normalized && eval{$store->can_store_parentage};
+
+  my @segments   = $self->_create_subfeatures($normalized, at _);
+
+  my $pos = "@{$self}{'start','end','ref','strand'}";
+
+  # fix boundaries
+  $self->_fix_boundaries(\@segments);
+
+  # freakish fixing of our non-standard Target attribute
+  $self->_fix_target(\@segments);
+
+  # write our children out
+  if ($normalized) {
+    $store->add_SeqFeature($self, at segments);
+  } else {
+    push @{$self->{segments}}, at segments;
+  }
+
+  # write us back to disk
+  $self->update if $self->primary_id && $pos ne "@{$self}{'start','end','ref','strand'}"; 
+}
+
+# segments can be stored directly in the object (legacy behavior)
+# or stored in the database
+# an optional list of types can be used to specify which types to return
+sub get_SeqFeatures {
+  my $self         = shift;
+  my @types        = @_;
+
+  my @inline_segs  = exists $self->{segments} ? @{$self->{segments}} : ();
+  @inline_segs     = grep {$_->type_match(@types)} @inline_segs if @types;
+  my $store        = $self->object_store;
+
+  my @db_segs;
+
+  if ($store && $store->can_store_parentage) {
+    if (!@types || $store->subfeature_types_are_indexed) {
+      @db_segs = $store->fetch_SeqFeatures($self, at types);
+    } else {
+      @db_segs     = grep {$_->type_match(@types)} $store->fetch_SeqFeatures($self);
+    }
+  }
+
+  my @segs         = (@inline_segs, at db_segs);
+  foreach (@segs) {
+    eval {$_->object_store($store)};
+  }
+  return @segs;
+}
+
+sub denormalized_segments {
+  my $self = shift;
+  return exists $self->{segments} ? @{$self->{segments}} : ();
+}
+
+sub denormalized_segment_count {
+  my $self = shift;
+  return 0 unless exists $self->{segments};
+  return scalar @{$self->{segments}};
+}
+
+# for Bio::LocationI compatibility
+sub is_remote { return }
+
+1;
+
+
+__END__
+
+=head1 BUGS
+
+This is an early version, so there are certainly some bugs. Please
+use the BioPerl bug tracking system to report bugs.
+
+=head1 SEE ALSO
+
+L<bioperl>,
+L<Bio::DB::SeqFeature::Store>,
+L<Bio::DB::SeqFeature::Segment>,
+L<Bio::DB::SeqFeature::NormalizedFeature>,
+L<Bio::DB::SeqFeature::GFF3Loader>,
+L<Bio::DB::SeqFeature::Store::DBI::mysql>,
+L<Bio::DB::SeqFeature::Store::bdb>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqHound.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqHound.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqHound.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,751 @@
+# BioPerl module for Bio::DB::SeqHound
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+# 
+
+=head1 NAME
+
+Bio::DB::SeqHound - Database object interface to SeqHound
+
+=head1 SYNOPSIS
+
+    use Bio::DB::SeqHound;
+    $sh = new Bio::DB::SeqHound();
+
+    $seq = $sh->get_Seq_by_acc("CAA28783"); # Accession Number
+
+    # or ...
+
+    $seq = $sh->get_Seq_by_gi(4557225); # GI Number
+
+=head1 VERSION
+
+1.1
+
+=head1 DESCRIPTION
+
+SeqHound is a database of biological sequences and structures.  This
+script allows the retrieval of sequence objects (Bio::Seq) from the
+SeqHound database at the Blueprint Initiative.
+
+Bioperl extension permitting use of the SeqHound Database System
+developed by researchers at
+
+ The Blueprint Initiative
+ Samuel Lunenfeld Research Institute
+ Mount Sinai Hospital
+ Toronto, Canada
+
+
+=head1 FEEDBACK/BUGS
+
+known bugs: fail to get sequences for some RefSeq record with CONTIG,
+example GI = 34871762
+
+E<lt>seqhound at blueprint.orgE<gt>
+
+=head1 MAILING LISTS
+
+User feedback is an integral part of the evolution of this Bioperl module. Send
+your comments and suggestions preferably to seqhound.usergroup mailing lists.
+Your participation is much appreciated.
+
+E<lt>seqhound.usergroup at lists.blueprint.orgE<gt>
+
+=head1 WEBSITE
+
+For more information on SeqHound http://www.blueprint.org/seqhound/
+
+=head1 DISCLAIMER
+
+This software is provided 'as is' without warranty of any kind.
+
+=head1 AUTHOR
+
+Rong Yao, Hao Lieu, Ian Donaldson
+
+E<lt>seqhound at blueprint.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::SeqHound;
+use strict;
+use vars qw($HOSTBASE $CGILOCATION $LOGFILENAME);
+
+use Bio::Root::IO;
+use Bio::SeqIO;
+use IO::String;
+use POSIX qw(strftime);
+
+use base qw(Bio::DB::WebDBSeqI Bio::Root::Root);
+BEGIN {    
+    $HOSTBASE = 'http://seqhound.blueprint.org';
+    $CGILOCATION = '/cgi-bin/seqrem?fnct=';
+    $LOGFILENAME = 'shoundlog';
+}
+
+
+# helper method to get db specific options
+
+=head2 new
+
+ Title   : new
+ Usage   : $sh = Bio::DB::SeqHound->new(@options);
+ Function: Creates a new seqhound handle
+ Returns : New seqhound handle
+ Args    : 
+
+=cut
+
+sub new {
+    	my ($class, @args ) = @_;
+    	my $self = $class->SUPER::new(@args);
+	if ($self->_init_SeqHound eq "TRUE"){
+		return $self;
+	}
+	else {
+		return;
+	}
+}
+
+=head1 Routines Bio::DB::WebDBSeqI from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN'); 
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+ Example : Each of these calls retrieves the same sequence record
+ 	   $seq = $db->get_Seq_by_id(56);        #retrieval by GI
+	   $seq = $db->get_Seq_by_id("X02597");  #retrieval by NCBI accession
+	   $seq = $db->get_Seq_by_id("BTACHRE"); #retrieval by sequence "name"
+	   a sequence "name" is a secondary identifier (usually assigned by the
+	   submitting database external to the NCBI) that may not be visible in
+	   the GenBank flat file version of the record but is always present in
+	   the ASN.1 format.
+ Note    : Since in GenBank.pm, this function accepts a gi, an accession number
+           or a sequence name, SeqHound also satisfies these inputs.
+	   If the input uid is a number, it is treated as a gi, if the uid is a
+	   string, it is treated as an accession number first. If the search still
+	   fails, it is treated as a sequence name.
+	   Since SeqHound stores biological data from different source sequence
+	   databases like: GenBank, GenPept, SwissProt, EMBL, RefSeq,
+	   you can pass ids from the above databases to this function. 
+	   The Bio::Seq object returned by this function is identical to the
+	   Bio::Seq generated by the GenBank.pm and GenPept.pm.
+	   The Bio::Seq object returned by this function sometimes has minor
+	   difference in the SeqFeature from the Bio::Seq object generated 
+	   in RefSeq.pm. 
+	   The Bio::Seq objects created from this function will have the NCBI
+	   versions of the SwissProt and EMBL sequence data information.
+
+=cut
+
+sub get_Seq_by_id {
+	my ($self, $id)= @_;
+	if ($id =~ /^\d+$/){
+		my $seqio= $self-> _get_Seq_from_gbff ($id);
+		if (defined $seqio){
+			return $seqio->next_seq;
+		}
+	}
+	elsif ($id =~ /^\S+$/){
+	    #print "id is string, try search by accession or name\n";
+	    my $gi = $self ->_get_gi_from_acc ($id);
+	    if (!defined $gi){
+			my $gi = $self->_get_gi_from_name($id);
+			if (defined $gi){
+				my $seqio = $self->_get_Seq_from_gbff($gi);
+				if (defined $seqio){
+					return $seqio->next_seq;
+				}
+			}
+		}
+		else{
+			my $seqio = $self->_get_Seq_from_gbff($gi);
+			if (defined $seqio){
+				return $seqio->next_seq;
+			}
+			else {
+				my $gi = $self->_get_gi_from_name($id);
+				if (defined $gi) {
+					my $seqio = $self->_get_Seq_from_gbff($gi);
+					if (defined $seqio){
+						return $seqio->next_seq;
+					}
+				}
+			}
+			
+		}
+	}
+    	else{
+		$self->warn("[get_Seq_by_id]: invalid input id.");
+		return;
+	}
+	$self->warn("[get_Seq_by_id]: id $id does not exist");
+	return;
+}
+						                    
+
+=head2 get_Seq_by_acc
+
+  Title   : get_Seq_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc('M34830');
+  Function: Gets a Seq object by accession numbers
+  Returns : a Bio::Seq object
+  Args    : the accession number as a string
+  Throws  : "id does not exist" exception
+  Note    : Since in GenBank.pm, this function accepts an accession number
+            or a sequence name, SeqHound also satisfies these inputs.
+	    If the input uid is a string, it is treated as an accession number first.
+	    If the search fails, it is treated as a sequence name.
+	    Since SeqHound stores biological data from different source sequence
+	    databases like: GenBank, GenPept, SwissProt, EMBL, RefSeq,
+	    you can pass ids from the above databases to this function. 
+	    The Bio::Seq object returned by this function is identical to the
+	    Bio::Seq generated by the GenBank.pm and GenPept.pm.
+	    The Bio::Seq object returned by this function sometimes has minor
+	    difference in the SeqFeature from the Bio::Seq object generated 
+	    in RefSeq.pm. 
+	    The Bio::Seq objects created from this function will have the NCBI
+	    versions of the SwissProt and EMBL sequence data information.
+
+=cut
+
+sub get_Seq_by_acc {
+	my ($self, $acc) = @_;
+	#exclude $acc is a number, since function does not accept gi as input
+	if ($acc =~ /^\d+$/) {
+		$self->warn ("[get_Seq_by_acc]: id $acc does not exist");
+		return;
+	}
+	my ($ret, $gi);
+	$gi= $self->_get_gi_from_acc($acc);
+	#print "get_Seq_by_acc: gi = $gi\n";
+    	if (defined $gi) {
+		my $seqio = $self->_get_Seq_from_gbff($gi);
+		if (defined $seqio){
+			return $seqio->next_seq;
+		}
+	}
+	#else, treat input as sequence name
+	else {
+		$gi = $self->_get_gi_from_name($acc);   	 	
+		#print "in get_Seq_by_acc: else gi = $gi\n";
+		if (defined $gi){
+			my $seqio = $self->_get_Seq_from_gbff($gi);
+			if (defined $seqio){
+				return $seqio->next_seq;
+			}
+		}
+	}
+	$self->warn("[get_Seq_by_acc]: id $acc does not exist.");
+	return;
+}
+
+
+=head2 get_Seq_by_gi
+
+ Title   : get_Seq_by_gi
+ Usage   : $seq = $sh->get_Seq_by_gi('405830');
+ Function: Gets a Bio::Seq object by gi number
+ Returns : A Bio::Seq object
+ Args    : gi number (as a string)
+ Throws  : "gi does not exist" exception
+ Note    : call the same code get_Seq_by_id
+
+=cut
+
+sub get_Seq_by_gi
+{
+    	my ($self, $gi) = @_;
+    	return get_Seq_by_id($self, $gi);
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+ Note    : SeqHound only keeps the most up-to-date version of a sequence. So
+           for the above example, use 
+	   $seq = $db->get_Seq_by_acc('X77802'); 
+	   instead of X77802.1
+
+
+=head2 get_Stream_by_query
+
+  Title   : get_Stream_by_query
+  Usage   : $seq = $db->get_Stream_by_query($query);
+  Function: Retrieves Seq objects from Entrez 'en masse', rather than one
+            at a time.  For large numbers of sequences, this is far superior
+            than get_Stream_by_[id/acc]().
+  Example : $query_string = 'Candida maltosa 26S ribosomal RNA gene'; 
+  	    $query = Bio::DB::Query::GenBank->new(-db=>'nucleotide',
+                                        -query=>$query_string);
+            $stream = $sh->get_Stream_by_query($query);
+	    or
+	    $query = Bio::DB::Query::GenBank->new (-db=> 'nucleotide',
+	    				-ids=>['X02597', 'X63732', 11002, 4557284]);
+	    $stream = $sh->get_Stream_by_query($query);
+  Returns : a Bio::SeqIO stream object
+  Args    : $query :   A Bio::DB::Query::GenBank object. It is suggested that
+            you create a Bio::DB::Query::GenBank object and get the entry
+            count before you fetch a potentially large stream.
+
+=cut
+
+sub get_Stream_by_query{
+	my ($self, $query) = @_;
+	my @ids = $query->ids;
+	#print join ",", @ids, "\n";
+	return get_Stream_by_id($self, \@ids);	
+}	
+
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id(['J05128', 'S43442', 34996479]);
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries, according to genbank.pm
+		   this function accepts gi, accession number
+		   and sequence name
+  Note    : Since in GenBank.pm, this function accepts a gi, an accession number
+            or a sequence name, SeqHound also satisfies these inputs.
+	    If the input uid is a number, it is treated as a gi, if the uid is a
+	    string, it is treated as an accession number first. If the search still
+	    fails, it is treated as a sequence name.
+	    Since SeqHound stores biological data from different source sequence
+	    databases like: GenBank, GenPept, SwissProt, EMBL, RefSeq,
+	    you can pass ids from the above databases to this function. 
+	    The Bio::Seq object returned by this function is identical to the
+	    Bio::Seq generated by the GenBank.pm and GenPept.pm.
+	    The Bio::Seq object returned by this function sometimes has minor
+	    difference in the SeqFeature from the Bio::Seq object generated 
+	    in RefSeq.pm. 
+	    The Bio::Seq objects created from this function will have the NCBI
+	    versions of the SwissProt and EMBL sequence data information.   
+
+=cut
+
+sub get_Stream_by_id
+{
+	my ($self, $id) = @_;
+	my (@gilist, @not_exist);
+	if(!defined $id) {
+		$self->warn("[get_Stream_by_id]: undefined input id");
+		return;
+    	}
+	if (ref($id)=~ /array/i){
+		foreach my $i (@$id){
+			if ($i =~ /^\d+$/){
+				push(@gilist, $i);
+			}
+			elsif ($i =~ /^\S+$/) {
+				my $gi = _get_gi_from_acc($self, $i);
+				if (!defined $gi){
+					$gi = _get_gi_from_name($self, $i);
+					if (!defined $gi){
+					    $self->warn("[get_Stream_by_id]: id $i does not exist.");
+						push (@not_exist, $i);
+					}
+					else {
+						push (@gilist, $gi);
+					}
+				}
+				else {
+					push(@gilist, $gi);
+				}
+			}
+			else {
+			    $self->warn("[get_Stream_by_id]: id $i does not exist.");
+				push (@not_exist, $i);
+			}
+		}
+		my $seqio = _get_Seq_from_gbff($self, \@gilist);
+		return $seqio;
+	}
+	else {
+		return;
+	}
+}
+
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Stream_by_acc(['M98777', 'M34830']);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For SeqHound, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_acc
+{
+	my ($self, $acc) = @_;
+	return get_Stream_by_id($self, $acc);
+}
+
+=head2 get_Stream_by_gi
+
+  Title   : get_Stream_by_gi
+  Usage   : $seq = $db->get_Seq_by_gi([161966, 255064]);
+  Function: Gets a series of Seq objects by gi numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of gi numbers for
+                   the desired sequence entries
+  Note    : For SeqHound, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_gi{
+	my ($self, $gi) = @_;
+	return get_Stream_by_id($self, $gi);	
+}	
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $lcontent = $self->get_request;
+ Function: get the output from SeqHound API http call
+ Returns : the result of the remote call from SeqHound
+ Args    : %qualifiers = a hash of qualifiers 
+           (SeqHound function name, id, query etc)
+ Example : $lcontent = $self->get_request(-funcname=>'SeqHoundGetGenBankff',
+		 			-query=>'gi',
+					-uid=>555);
+ Note    : this function overrides the implementation in Bio::DB::WebDBSeqI.
+
+=cut
+
+sub get_request {
+	my $self = shift;
+    	my ( @qualifiers) = @_;
+    	my ($funcname, $query, $uids, $other) = $self->_rearrange([qw(FUNCNAME QUERY UIDS OTHER)],
+							@qualifiers);
+	# print ("get funcname = $funcname, query = $query, uids= $uids\n"); 
+	unless( defined $funcname ne '') {
+	$self->throw("please specify the SeqHound function for query");
+    	}
+    	my $url = $HOSTBASE . $CGILOCATION . $funcname;
+    	unless( defined $uids ne '') {
+	$self->throw("please specify a uid or a list of uids to fetch");
+    	}
+    	unless ( defined $query && $query ne '') {
+	$self->throw("please specify a valid query field");
+	}
+	
+ 	if (defined $uids && defined $query) {
+		if( ref($uids) =~ /array/i ) {
+	       	$uids = join(",", @$uids);
+		}
+		$url=$url."&".$query."=".$uids;
+		if (defined $other){
+			$url=$url."&".$other;
+		}
+		my $ua = LWP::UserAgent->new(env_proxy => 1);
+		my $req = HTTP::Request->new ('GET', $url);
+		my $res = $ua->request($req);
+		if ($res->is_success){
+			return $res->content;
+		}
+		else {
+			my $result = "HTTP::Request error: ".$res->status_line."\n";
+			$self->warn("$result");
+			return $result;
+		}
+	}
+
+}
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data (-funcname => $funcname,
+		                    -lcontent => $lcontent,
+				    -outtype  => $outtype);
+ Function: process return String from http seqrem call 
+           output type can be a string or a Bio::SeqIO object.
+ Returns : void
+ Args    : $funcname is the API function name of SeqHound 
+           $lcontent is a string output from SeqHound server http call
+           $outtype is a string or a Bio::SeqIO object 
+ Example : $seqio = $self->postprocess_data ( -lcontent => $lcontent,
+                             		-funcname => 'SeqHoundGetGenBankffList',
+				      	-outtype => 'Bio::SeqIO');
+	   or
+	   $gi = $self->postprocess_data( -lcontent => $lcontent,
+			                -funcname => 'SeqHoundFindAcc',
+					-outtype => 'string');
+ Note    : this method overrides the method works for genbank/genpept,
+           this is for SeqHound
+
+=cut
+
+sub postprocess_data
+{
+    my ($self, @args) = @_;
+    my ($funcname, $lcontent, $outtype) = $self->_rearrange(
+                        [qw(FUNCNAME LCONTENT OUTTYPE)], @args);
+    my $result;
+	if (!defined $outtype){ 
+		$self->throw("please specify the output type, string, Bio::SeqIO etc");
+	}
+        if (!defined $lcontent){
+		$self->throw("please provide the result from SeqHound call");
+	}
+	if (!defined $funcname){
+		$self->throw("Please provide the function name");
+	}
+
+	#set up verbosity level if need record in the log file
+    my $log_msg = "Writing into '$LOGFILENAME' log file.\n";
+    my $now = strftime("%a %b %e %H:%M:%S %Y", localtime);
+    if ($lcontent eq "") {
+        $self->debug($log_msg);
+        open (my $LOG, '>>', $LOGFILENAME);
+        print $LOG "$now		$funcname. No reply.\n";
+        return;
+    }
+    elsif ($lcontent =~ /HTTP::Request error/) {
+        $self->debug($log_msg);
+        open (my $LOG, '>>', $LOGFILENAME);
+        print $LOG "$now		$funcname. Http::Request error problem.\n";
+        return;
+    }
+    elsif ($lcontent =~ /SEQHOUND_ERROR/) {
+        $self->debug($log_msg);
+        open (my $LOG, '>>', $LOGFILENAME);
+        print $LOG "$now	$funcname error. SEQHOUND_ERROR found.\n";
+        return;
+    }
+    elsif ($lcontent =~ /SEQHOUND_NULL/) {
+        $self->debug($log_msg);
+        open (my $LOG, '>>', $LOGFILENAME);
+        print $LOG "$now	$funcname Value not found in the database. SEQHOUND_NULL found.\n";
+        return;
+    }
+    else {
+        chomp $lcontent;
+        my @lines = split(/\n/, $lcontent, 2);
+        if ($lines[1] =~ /^-1/) {
+            $self->debug($log_msg);
+            open (my $LOG, '>>', $LOGFILENAME);
+            print $LOG "$now	$funcname Value not found in the database. -1 found.\n";
+            return;
+        }
+        elsif ($lines[1]  =~ /^0/) {
+            $self->debug($log_msg);
+            open (my $LOG, '>>', $LOGFILENAME);
+            print $LOG "$now	$funcname failed.\n";
+            return;
+        }
+        else {
+            $result = $lines[1];
+        }
+    }
+
+	#a list of functions in SeqHound which can wrap into Bio::seqIO object
+	if ($outtype eq 'Bio::SeqIO'){
+		my $buf = IO::String->new($result);
+		my $io = Bio::SeqIO->new (-format => 'genbank', -fh => $buf);
+		if (defined $io && $io ne ''){
+		    return $io;
+		}
+		else { return;}
+   	}	
+   	#return a string if outtype is "string"
+   	return $result;
+}
+
+
+=head2 _get_gi_from_name
+ 
+ Title   : _get_gi_from_name
+ Usage   : $self->_get_gi_from_name('J05128');
+ Function: get the gene identifier from a sequence name
+           in SeqHound database
+ Return  : gene identifier or undef
+ Args    : a string represented sequence name
+
+=cut
+
+sub _get_gi_from_name
+{
+	my ($self, $name) = @_;
+	my ($ret, $gi);
+	$ret = $self->get_request( -funcname => 'SeqHoundFindName',
+			               -query => 'name',
+				       -uids  => $name);
+	#print "_get_gi_from_name:  ret = $ret\n";
+	$gi = $self->postprocess_data(-lcontent => $ret,
+			                -funcname => 'SeqHoundFindName',
+					-outtype => 'string');
+	#print "_get_gi_from_name: gi = $gi\n";
+	return $gi;
+}
+
+=head2 _get_gi_from_acc
+ 
+ Title   : _get_gi_from_acc
+ Usage   : $self->_get_gi_from_acc('M34830')
+ Function: get the gene identifier from an accession number
+ 	  in SeqHound database
+ Return  : gene identifier or undef
+ Args    : a string represented accession number
+
+=cut
+
+sub _get_gi_from_acc
+{
+	my ($self, $acc) = @_;
+	my ($ret, $gi);
+	$ret = $self->get_request ( -funcname => 'SeqHoundFindAcc',
+			               -query => 'acc',
+				       -uids  => $acc);
+	#print "_get_gi_from_acc:  ret = $ret\n";
+	$gi = $self->postprocess_data(  -lcontent => $ret,
+			                -funcname => 'SeqHoundFindAcc',
+					-outtype => 'string');
+	#print "_get_gi_from_acc:  gi = $gi\n";
+	return $gi;
+}
+
+=head2 _get_Seq_from_gbff
+ 
+ Title   : _get_Seq_from_gbff
+ Usage   : $self->_get_Seq_from_gbff($str)
+ Function: get the Bio::SeqIO stream object from gi or a list of gi
+           in SeqHound database
+ Return  : Bio::SeqIO or undef
+ Args    : a string represented gene identifier or
+           a list of gene identifiers
+ Example : $seq = $self->_get_Seq_from_gbff(141740);
+           or
+	   $seq = $self->_get_Seq_from_gbff([141740, 255064, 45185482]);
+
+=cut
+
+sub _get_Seq_from_gbff
+{
+	my ($self, $gi) = @_;
+	if(!defined $gi) {
+		$self->warn("[_get_Seq_from_gbff]: undefined input gi");
+		return;
+    	}
+	my $lcontent;
+	if (ref($gi) =~ /array/i){
+		my @copyArr = @$gi;
+		my @tempArr;
+		$lcontent = "SEQHOUND_OK\n";
+		while ($#copyArr != -1){
+			@tempArr =_MaxSizeArray(\@copyArr);
+		    	#in order to keep the correct output order as GenBank does
+			my $gi = join (",", reverse(@tempArr));
+    			my $result;
+    			my $ret = $self->get_request(  -funcname => 'SeqHoundGetGenBankffList',
+            	                   			-query => 'pgi',
+			 				-uids => $gi);
+			if (defined $ret){
+				my @lines = split(/\n/, $ret, 2);
+      				if($lines[0] =~ /SEQHOUND_ERROR/ || $lines[0] =~ /SEQHOUND_NULL/){
+      				}
+	  			else {
+					if ($lines[1] =~ /^(null)/ || $lines[1] eq ""){
+     	 			}
+         			else{
+           				$result = $lines[1];
+         			}
+			}
+			#append genbank flat files for long list
+			$lcontent = $lcontent.$result;
+			}
+		}
+	}
+    	#else $gi is a single variable
+	else {
+		$lcontent = $self->get_request(  -funcname => 'SeqHoundGetGenBankffList',
+            	                   		-query => 'pgi',
+			 			-uids => $gi);
+	}
+     	my $seqio = $self->postprocess_data ( -lcontent => $lcontent,
+                             			-funcname => 'SeqHoundGetGenBankffList',
+				      		-outtype => 'Bio::SeqIO');
+		
+	return $seqio;
+}
+
+
+=head2 _init_SeqHound
+
+ Title   : _init_SeqHound
+ Usage   : $self->_init_SeqHound();
+ Function: call SeqHoundInit at blueprint server 
+ Return  : $result (TRUE or FALSE)
+ Args    : 
+
+=cut
+
+sub _init_SeqHound
+{
+	my $self = shift;
+	my $ret = $self->get_request(-funcname => 'SeqHoundInit',
+						-query => 'NetEntrezOnToo',
+						-uids => 'true',
+						-other => 'appname=Bioperl');
+	my $result = $self->postprocess_data(-lcontent => $ret,
+					-funcname => 'SeqHoundInit',
+					-outtype => 'string');
+	return $result || 'FALSE';
+
+}
+
+=head2 _MaxSizeArray
+
+ Title   : _MaxSizeArray
+ Usage   : $self->_MaxSizeArray(\@arr)
+ Function: get an array with the limit size
+ Return  : an array with the limit size
+ Args    : a reference to an array
+
+=cut
+
+sub _MaxSizeArray 
+{
+  my $argArr = shift;
+  my @copyArr;
+  my $MAXQ = 5;
+  my $len = scalar(@$argArr);
+  for(my $i = 0; $i < $len;){
+    $copyArr[$i++] = $$argArr[0]; 
+    shift(@$argArr);
+    if($i == $MAXQ) 
+    {
+       last;
+    }
+  }
+  return @copyArr;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,173 @@
+
+#
+# $Id: SeqI.pm,v 1.10.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::SeqI.pm
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::SeqI - Abstract Interface for Sequence databases
+
+=head1 SYNOPSIS
+
+   # get a Bio::DB::SeqI somehow
+
+   $seq = $seqdb->get_Seq_by_id('some-id');
+   $seq = $seqdb->get_Seq_by_acc('some-accession-number');
+
+   @ids = $seqdb->get_all_ids();
+   $stream = $seqdb->get_PrimarySeq_stream();
+   while((my $seq = $stream->next_seq()) {
+      # $seq is a PrimarySeqI compliant object
+   }
+
+
+=head1 DESCRIPTION
+
+Abstract interface for a sequence database
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::SeqI;
+use strict;
+
+
+use base qw(Bio::DB::RandomAccessI);
+
+=head1 Methods inherieted from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+=head1 Methods [that were] specific for Bio::DB::SeqI
+
+=head2 get_PrimarySeq_stream
+
+ Title   : get_PrimarySeq_stream
+ Usage   : $stream = get_PrimarySeq_stream
+ Function: Makes a Bio::SeqIO compliant object
+           which provides a single method, next_seq
+ Returns : Bio::SeqIO
+ Args    : none
+
+=cut
+
+sub get_PrimarySeq_stream{
+   my ($self, at args) = @_;
+
+   $self->throw("Object did not provide a PrimarySeq stream object");
+}
+
+=head2 get_all_primary_ids
+
+ Title   : get_all_ids
+ Usage   : @ids = $seqdb->get_all_primary_ids()
+ Function: gives an array of all the primary_ids of the 
+           sequence objects in the database. These
+           maybe ids (display style) or accession numbers
+           or something else completely different - they
+           *are not* meaningful outside of this database
+           implementation.
+ Example :
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub get_all_primary_ids{
+   my ($self, at args) = @_;
+   $self->throw("Object did not provide a get_all_ids method");
+}
+
+
+=head2 get_Seq_by_primary_id
+
+ Title   : get_Seq_by_primary_id
+ Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
+ Function: Gets a Bio::Seq object by the primary id. The primary
+           id in these cases has to come from $db->get_all_primary_ids.
+           There is no other way to get (or guess) the primary_ids
+           in a database.
+
+           The other possibility is to get Bio::PrimarySeqI objects
+           via the get_PrimarySeq_stream and the primary_id field
+           on these objects are specified as the ids to use here.
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_primary_id {
+   my ($self, at args) = @_;
+
+   $self->throw("Abstract database call of get_Seq_by_primary_id. Your database has not implemented this method!");
+
+}
+
+1;
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion/gi.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion/gi.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion/gi.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,248 @@
+# $Id: gi.pm,v 1.4.4.2 2006/10/05 12:17:55 sendu Exp $
+#
+# BioPerl module for Bio::DB::SeqVersion::gi
+#
+# Cared for by Brian Osborne
+#
+# Copyright Brian Osborne 2006
+#
+# You may distribute this module under the same terms as Perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::SeqVersion::gi - interface to NCBI Sequence Revision History page
+
+=head1 SYNOPSIS
+
+Do not use this module directly, use Bio::DB::SeqVersion.
+
+    use Bio::DB::SeqVersion;
+
+    my $query = Bio::DB::SeqVersion->new(-type => 'gi');
+
+    # all GIs, which will include the GI used to query
+    my @all_gis = $query->get_all(2);
+
+    # the most recent GI, which may or may not be the GI used to query
+    my $live_gi = $query->get_recent(2);
+
+    # get all the visible data on the Sequence Revision page
+    my $array_ref = $query->get_history(11111111);
+
+These methods can also take accession numbers as arguments, just like
+the Sequence Revision page itself.
+
+=head1 DESCRIPTION
+
+All sequence entries at GenBank are identified by a pair of 
+identifiers, an accession and a numeric identifier, and this number is 
+frequently called a GI number (B<G>enInfo B<I>dentifier). The accession
+is stable, but each new version of the sequence entry for the accession 
+receives a new GI number (see L<http://www.ncbi.nlm.nih.gov/Sitemap/sequenceIDs.html>
+for more information on GenBank identifiers). One accession
+can have one or more GI numbers and the highest of these is the most recent,
+or "live", GI.
+
+Information on an accession and its associated GI numbers is available at
+the Sequence Revision History page at NCBI, 
+L<http://www.ncbi.nlm.nih.gov/entrez/sutils/girevhist.cgi>, this information is
+not available in file format. This module queries the Web page and retrieves GI 
+numbers and related data given an accession (e.g. NP_111111, A11111, P12345) or 
+a GI number (e.g. 2, 11111111) as query.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Brian Osborne
+
+Email E<lt> osborne at optonline dot net E<gt>
+
+=head1 CONTRIBUTORS
+
+Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::SeqVersion::gi;
+use strict;
+
+use base qw(Bio::DB::SeqVersion);
+
+# Private class variables
+
+my $CGIBASE = 'http://www.ncbi.nlm.nih.gov';
+my $CGIARGS = '/entrez/sutils/girevhist.cgi?val=';
+
+=head2 new
+
+ Title   : new
+ Usage   : $gb = Bio::DB::SeqVersion::gi->new
+ Function: Creates a new query object
+ Returns : New query object
+
+=cut
+
+sub new {
+	my ($class, @args) = @_;
+	my $self = $class->SUPER::new(@args);
+	$self->_initialize;
+	return $self;
+}
+
+=head2 get_all
+
+ Title   : get_all
+ Usage   : my @gis = $q->get_all(2)
+ Function: Get all GI numbers given a GI number
+ Returns : An array of GI numbers, earliest GI number is the 0 element
+ Args    : A single GI number (string)
+
+=cut
+
+sub get_all {
+	my ($self,$id) = @_;
+	my (@arr,$ref);
+	$id eq $self->{_last_id} ? $ref = $self->{_last_result}
+	  : $ref = $self->get_history($id);		
+	for my $row (@{$ref}) {
+		push @arr,$$row[0];
+	}
+	@arr;
+}
+
+=head2 get_recent
+
+ Title   : get_recent
+ Usage   : my $newest_gi = $q->get_recent(2)
+ Function: Get most recent GI given a single GI
+ Returns : String
+ Args    : A single GI number (string)
+
+=cut
+
+sub get_recent {
+	my ($self,$id) = @_;
+	my $ref;
+	$id eq $self->{_last_id} ? $ref = $self->{_last_result}
+	  : $ref = $self->get_history($id);		
+	$ref->[0]->[0];
+}
+
+=head2 get_history
+
+ Title   : get_history
+ Usage   : my $ref = $query_obj->get_history()
+ Function: Queries the NCBI Revision page, gets the data from the HTML table
+ Returns : Reference to an array of arrays where element 0 refers to the most 
+           recent version and the last element refers to the oldest version. 
+           In the second dimension the elements are:
+
+           0      GI number
+           1      Version
+           2      Update Date
+           3      Status
+
+           For example, to get the GI number of the first version:
+
+           $ref->[$#{@$ref}]->[0]
+
+           To get the Update Date of the latest version:
+
+           $ref->[0]->[2]
+
+ Args    : One identifier (string)
+
+=cut
+
+sub get_history {
+	my ($self,$id) = @_;
+	my $html = $self->_get_request($id);
+	my $ref = $self->_process_data($html);
+	# store the very last result in case some other methods
+	# are called using the same identifier
+	$self->{_last_result} = $ref;
+	$self->{_last_id} = $id;
+	$ref;
+}
+
+
+=head2 _get_request
+
+ Title   : _get_request
+ Usage   : my $url = $self->_get_request
+ Function: GET using NCBI Revision page URL, uses Root::HTTPget
+ Returns : HTML
+ Args    : One identifier (string)
+
+=cut
+
+sub _get_request {
+  my ($self,$id) = @_;
+
+  $self->throw("Must specify a single id to query") if  (!$id || ref($id));
+
+  my $url = $CGIBASE . $CGIARGS . $id;
+  my $response = $self->get( $url );
+  if ( not $response->is_success ) {
+    $self->warn("Can't query $url: ".$response->status_line."\n");
+    return;
+  }
+  $self->debug("Response is:\n",$response->content,"\n"); 
+  return $response->content;
+}
+
+=head2 _process_data
+
+ Title   : _process_data
+ Usage   : $self->_process_data($html)
+ Function: extract data from HTML
+ Args    : HTML from Revision History page
+ Returns : reference to an array of arrays
+
+=cut
+
+sub _process_data {
+         my ($self,$html) = @_;    
+         my @table = ();
+	 my $count = 0;
+	 my ($table) = $html =~ /Revision \s+ history \s+ for \s+ .+? (<table.+)/sx;
+	 $self->throw("Could not parse 'Revision history' HTML table") if not defined $table;
+	 my (@rows) = $table =~ /<tr>(.+?)<\/tr>/g;
+	 shift @rows; # get rid of header
+	 for my $row (@rows) {
+		 my (@arr) = $row =~ />([^<>]+)/g;
+		 $table[$count] = \@arr;
+		 $count++;
+	 }
+	 \@table;
+}
+
+1;
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SeqVersion.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,183 @@
+# $Id: SeqVersion.pm,v 1.5.4.2 2006/10/05 12:17:55 sendu Exp $
+#
+# BioPerl module for Bio::DB::SeqVersion
+#
+# Cared for by Brian Osborne
+#
+# Copyright Brian Osborne 2006
+#
+# You may distribute this module under the same terms as Perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::SeqVersion - front end to querying databases for identifier 
+versions
+
+=head1 SYNOPSIS
+
+  use Bio::DB::SeqVersion;
+
+  my $query = Bio::DB::SeqVersion->new(-type => 'gi');
+
+  my @all_gis = $query->get_all(2);
+
+  my $live_gi = $query->get_recent(2);
+
+=head1 DESCRIPTION
+
+The default type is 'gi'.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Brian Osborne
+
+Email bosborne at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::SeqVersion;
+use strict;
+
+use base qw(Bio::WebAgent Bio::Root::Root);
+
+# Private class variable
+
+my $DEFAULTIDTYPE = 'gi'; # sub default_id_type()
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::DB::SeqVersion();
+ Function: Create a Bio::DB::SeqVersion object 
+ Returns : An instance of Bio::DB::SeqVersion
+ Args    : -type      Identifier namespace, default is 'gi' 
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  if( $class =~ /Bio::DB::SeqVersion::\S+/ ) {
+    my ($self) = $class->SUPER::new(@args);
+    $self->_initialize(@args);
+    return $self;
+  } 
+  else {
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+
+    # we delete '-type' so it doesn't get passed to the sub-class constructor
+    # note: delete() returns the value of the item deleted (undef if non-existent)
+    my $type = lc( delete($param{'-type'}) || $DEFAULTIDTYPE );
+
+    return unless( $class->_load_seqversion_module($type) );
+    
+    # we pass %param here, not @args, as we have filtered out -type
+    return "Bio::DB::SeqVersion::$type"->new(%param);
+  }
+}
+
+=head2 get_recent()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub get_recent {
+  my ($self, at args) = @_;
+  $self->throw_not_implemented();
+}
+
+=head2 get_all()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub get_all {
+	my ($self, at args) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 _load_seqversion_module
+
+ Title   : _load_seqversion_module
+ Usage   : Used internally
+ Function: Loads up a module at run time on demand
+ Example :
+ Returns :
+ Args    : Name of identifier type
+
+=cut
+
+sub _load_seqversion_module {
+	my ($self,$db) = @_;
+	my $module = "Bio::DB::SeqVersion::" . $db;
+	my $ok;
+
+	eval { $ok = $self->_load_module($module) };
+	if ( $@ ) {
+		print STDERR $@;
+		print STDERR <<END;
+$self: $module cannot be found
+Exception $@
+For more information about the Bio::DB::SeqVersion system please see
+the Bio::DB::SeqVersion docs.
+END
+		;
+	}
+	return $ok;
+}
+
+=head2 default_id_type
+
+ Title   : default_id_type
+ Usage   : my $type = $self->default_id_type
+ Function: Returns default identifier type for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_id_type {
+    return $DEFAULTIDTYPE;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/SwissProt.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/SwissProt.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/SwissProt.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,448 @@
+#
+# $Id: SwissProt.pm,v 1.30.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::SwissProt
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+# Reworked to use Bio::DB::WebDBSeqI 2000-12-11
+
+=head1 NAME
+
+Bio::DB::SwissProt - Database object interface to SwissProt retrieval
+
+=head1 SYNOPSIS
+
+    use Bio::DB::SwissProt;
+
+    $sp = new Bio::DB::SwissProt;
+
+    $seq = $sp->get_Seq_by_id('KPY1_ECOLI'); # SwissProt ID
+    # <4-letter-identifier>_<species 5-letter code>
+    # or ...
+    $seq = $sp->get_Seq_by_acc('P43780'); # SwissProt AC      
+    # [OPQ]xxxxx
+
+
+    # In fact in this implementation 
+    # these methods call the same webscript so you can use 
+    # then interchangeably
+
+    # choose a different server to query
+    $sp = new Bio::DB::SwissProt('-servertype' => 'expasy',
+				 '-hostlocation' => 'us');
+
+    $seq = $sp->get_Seq_by_id('BOLA_HAEIN'); # SwissProtID
+
+=head1 DESCRIPTION
+
+SwissProt is a curated database of proteins managed by the Swiss
+Bioinformatics Institute. Additional tools for
+parsing and manipulating swissprot files can be found at
+ftp://ftp.ebi.ac.uk/pub/software/swissprot/Swissknife/.
+
+Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the
+SwissProt database via an Expasy retrieval.
+
+In order to make changes transparent we have host type (currently only
+expasy) and location (default to Switzerland) separated out.  This
+allows the user to pick the closest Expasy mirror for running their
+queries.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email Jason Stajich  E<lt>jason at bioperl.org E<lt>
+
+Thanks go to Alexandre Gattiker E<lt>gattiker at isb-sib.chE<gt> of Swiss
+Institute of Bioinformatics for helping point us in the direction of
+the correct expasy scripts and for swissknife references.
+
+Also thanks to Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt> 
+for help with adding EBI swall server.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::SwissProt;
+use strict;
+use vars qw($MODVERSION %HOSTS $DEFAULTFORMAT $DEFAULTSERVERTYPE);
+
+$MODVERSION = '0.8.1';
+use HTTP::Request::Common;
+
+use base qw(Bio::DB::WebDBSeqI);
+
+# global vars
+$DEFAULTSERVERTYPE = 'ebi';
+$DEFAULTFORMAT = 'swissprot';
+
+# you can add your own here theoretically.
+%HOSTS = ( 
+	   'expasy' => { 
+	       'default' => 'us',
+	       'baseurl' => 'http://%s/cgi-bin/sprot-retrieve-list.pl',
+	       'hosts'   => 	       
+	       { 
+		   'switzerland'  => 'ch.expasy.org',
+		   'canada' => 'ca.expasy.org',
+		   'china'  => 'cn.expasy.org',
+		   'taiwan' => 'tw.expasy.org',
+		   'australia' => 'au.expasy.org',
+		   'korea'  => 'kr.expasy.org',
+		   'us'     => 'us.expasy.org',
+	       },
+	       # ick, CGI variables
+	       'jointype' => ' ',
+	       'idvar'    => 'list',
+	       'basevars' => [ ],	       
+	   },
+	   'ebi'    => {
+	       'default' => 'uk',
+	       'baseurl' => 'http://%s/cgi-bin/dbfetch',
+	       'hosts' => { 
+		   'uk'   => 'www.ebi.ac.uk',
+	       },
+	       'jointype' => ',',
+	       'idvar'    => 'id',
+	       'basevars' => [ 'db'    => 'UniProtKB',
+			       'style' => 'raw' ],
+	   }
+	   );
+
+# new modules should be a little more lightweight and
+# should use Bio::Root::Root
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($format, $hostlocation,$servertype) = 
+	$self->_rearrange([qw(FORMAT HOSTLOCATION SERVERTYPE)],
+			  @args);    
+
+    if( $format && $format !~ /(swiss)|(fasta)/i ) {
+	$self->warn("Requested Format $format is ignored because only SwissProt and Fasta formats are currently supported");
+	$format = $self->default_format;
+    } 
+    $servertype = $DEFAULTSERVERTYPE unless $servertype;
+    $servertype = lc $servertype;
+    $self->servertype($servertype);
+    if (  $hostlocation ) {
+	$self->hostlocation(lc $hostlocation);
+    }
+
+    $self->request_format($format); # let's always override the format, as it must be swiss or fasta
+    return $self;
+}
+
+=head2 Routines from Bio::DB::RandomAccessI
+
+=cut
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+=cut
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+=cut
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries
+
+=cut
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+=head2 get_Stream_by_batch
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch($ref);
+  Function: Retrieves Seq objects from SwissProt 'en masse', rather than one
+            at a time.  This is implemented the same way as get_Stream_by_id, 
+            but is provided here in keeping with access methods of NCBI 
+            modules.
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : either an array reference, a filename, or a filehandle
+            from which to get the list of unique ids/accession numbers.
+
+NOTE: deprecated API.  Use get_Stream_by_id() instead.
+
+=cut
+
+*get_Stream_by_batch = sub { 
+   my $self = shift;
+   $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
+   $self->get_Stream_by_id(@_) 
+};
+
+=head2 Implemented Routines from Bio::DB::WebDBSeqI interface
+
+=cut
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: returns a HTTP::Request object
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+    my ($self, @qualifiers) = @_;
+    my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
+					    @qualifiers);
+
+    if( !defined $uids ) {
+	$self->throw("Must specify a value for uids to query");
+    }
+    my ($f,undef) = $self->request_format($format);
+    
+    my %vars = ( 
+		 @{$HOSTS{$self->servertype}->{'basevars'}}, 
+		 ( 'format' => $f )
+		 );
+    
+    my $url = $self->location_url;
+    
+    my $uid;
+    my $jointype = $HOSTS{$self->servertype}->{'jointype'} || ' ';
+    my $idvar = $HOSTS{$self->servertype}->{'idvar'} || 'id';
+    
+    if( ref($uids) =~ /ARRAY/i ) {	
+	# HTTP::Request automagically converts the ' ' to %20
+	$uid = join($jointype, @$uids);	
+    } else {
+	$uid = $uids;
+    }
+    $vars{$idvar} = $uid;
+
+    return POST $url, \%vars;
+}
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                           reference containing data
+
+=cut
+
+# don't need to do anything 
+
+sub postprocess_data {
+    my ($self, %args) = @_;    
+    return;
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    return $DEFAULTFORMAT;
+}
+
+=head2 Bio::DB::SwissProt specific routines
+
+=cut
+
+=head2 servertype
+
+ Title   : servertype
+ Usage   : my $servertype = $self->servertype
+           $self->servertype($servertype);
+ Function: Get/Set server type
+ Returns : string
+ Args    : server type string [optional]
+
+=cut
+
+sub servertype {
+    my ($self, $servertype) = @_;
+    if( defined $servertype && $servertype ne '') {		
+	$self->throw("You gave an invalid server type ($servertype)".
+			 " - available types are ".  
+			 keys %HOSTS) unless( $HOSTS{$servertype} );
+	$self->{'_servertype'} = $servertype;
+	$self->{'_hostlocation'} = $HOSTS{$servertype}->{'default'};
+	
+	# make sure format is reset properly in that different
+	# servers have different syntaxes
+	my ($existingformat,$seqioformat) = $self->request_format;
+	$self->request_format($existingformat);		
+    }
+    return $self->{'_servertype'} || $DEFAULTSERVERTYPE;
+}
+
+
+=head2 hostlocation
+
+ Title   : hostlocation
+ Usage   : my $location = $self->hostlocation() 
+          $self->hostlocation($location) 
+ Function: Set/Get Hostlocation 
+ Returns : string representing hostlocation
+ Args    : string specifying hostlocation [optional]
+
+=cut
+
+sub hostlocation {
+    my ($self, $location ) = @_;
+    $location = lc $location;
+    my $servertype = $self->servertype;
+    $self->throw("Must have a valid servertype defined not $servertype")
+	unless defined $servertype; 
+    my %hosts = %{$HOSTS{$servertype}->{'hosts'}};
+    if( defined $location && $location ne '' ) {
+	if( ! $hosts{$location} ) {
+	    $self->throw("Must specify a known host, not $location,".
+			 " possible values (".
+			 join(",", sort keys %hosts ). ")"); 
+	}
+	$self->{'_hostlocation'} = $location;
+    }
+    return $self->{'_hostlocation'};
+}
+
+=head2 location_url
+
+ Title   : location
+ Usage   : my $url = $self->location_url()
+ Function: Get host url
+ Returns : string representing url
+ Args    : none
+
+=cut
+
+sub location_url {
+    my ($self) = @_;    
+    my $servertype = $self->servertype();
+    my $location = $self->hostlocation();
+
+    if( ! defined $location || !defined $servertype )  {	
+	$self->throw("must have a valid hostlocation and servertype set before calling location_url");
+    }
+    return sprintf($HOSTS{$servertype}->{'baseurl'}, 
+		   $HOSTS{$servertype}->{'hosts'}->{$location});
+}		   
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally
+           not be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO 
+           format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+    my ($self, $value) = @_;
+    if( defined $value ) {
+	if( $self->servertype =~ /expasy/ ) {
+	    if( $value =~ /sprot/ || $value =~ /swiss/ ) {
+		$self->{'_format'} = [ 'sprot', 'swiss'];	    
+	    } elsif( $value =~ /^fa/ ) {
+		$self->{'_format'} = [ 'fasta', 'fasta'];
+	    } else {
+		$self->warn("Unrecognized format $value requested");
+		$self->{'_format'} = [ 'fasta', 'fasta'];
+	    }
+	} elsif( $self->servertype =~ /ebi/ ) {
+	    if( $value =~ /sprot/ || $value =~ /swiss/ ) {		
+		$self->{'_format'} = [ 'swissprot', 'swiss' ];
+	    } elsif( $value =~ /^fa/ ) {
+		$self->{'_format'} = [ 'fasta', 'fasta'];
+	    } else { 
+		$self->warn("Unrecognized format $value requested");
+		$self->{'_format'} = [ 'swissprot', 'swiss'];
+	    }
+	}
+    }
+    return @{$self->{'_format'}};
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/entrez.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/entrez.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/entrez.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,627 @@
+# $Id: entrez.pm,v 1.18.4.3 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::DB::Taxonomy::entrez
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Taxonomy::entrez - Taxonomy Entrez driver
+
+=head1 SYNOPSIS
+
+# Do not use this object directly, rather through the Bio::DB::Taxonomy
+# interface
+
+  use Bio::DB::Taxonomy;
+
+  my $db = new Bio::DB::Taxonomy(-source => 'entrez');
+
+  my $taxonid = $db->get_taxonid('Homo sapiens');
+  my $node   = $db->get_Taxonomy_Node(-taxonid => $taxonid);
+
+  my $gi = 71836523;
+  my $node = $db->get_Taxonomy_Node(-gi => $gi, -db => 'protein');
+  print $node->binomial, "\n";
+  my ($species,$genus,$family) =  $node->classification;
+  print "family is $family\n";
+
+  # Can also go up 4 levels
+  my $p = $node;  
+  for ( 1..4 ) { 
+    $p = $db->get_Taxonomy_Node(-taxonid => $p->parent_id);
+  }
+  print $p->rank, " ", ($p->classification)[0], "\n";
+
+  # could then classify a set of BLAST hits based on their GI numbers
+  # into taxonomic categories.
+
+
+It is not currently possibly to query a node for its children so we
+cannot completely replace the advantage of the flatfile
+Bio::DB::Taxonomy::flatfile module.
+
+
+=head1 DESCRIPTION
+
+A driver for querying NCBI Entrez Taxonomy database.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala: bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Taxonomy::entrez;
+use vars qw($EntrezLocation $UrlParamSeparatorValue %EntrezParams
+	    $EntrezGet $EntrezSummary $EntrezFetch %SequenceParams
+	    $XMLTWIG $DATA_CACHE $RELATIONS);
+use strict;
+
+use Bio::Taxon;
+
+eval {
+    require XML::Twig;
+    $XMLTWIG = 1;
+};
+if( $@ ) {
+    $XMLTWIG = 0;
+}
+
+use base qw(Bio::WebAgent Bio::DB::Taxonomy);
+
+$EntrezLocation = 'http://www.ncbi.nih.gov/entrez/eutils/';
+$EntrezGet      = 'esearch.fcgi';
+$EntrezFetch    = 'efetch.fcgi';
+$EntrezSummary  = 'esummary.fcgi';
+
+$DATA_CACHE = {};
+$RELATIONS  = {};
+
+%EntrezParams = ( 'db'     => 'taxonomy', 
+                  'report' => 'xml',
+                  'retmode'=> 'xml',
+                  'tool'   => 'Bioperl');
+
+%SequenceParams = ( 'db'      => 'nucleotide', # or protein
+		            'retmode' => 'xml',
+		            'tool'    => 'Bioperl');
+
+$UrlParamSeparatorValue = '&';
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::DB::Taxonomy::entrez();
+ Function: Builds a new Bio::DB::Taxonomy::entrez object
+ Returns : an instance of Bio::DB::Taxonomy::entrez
+ Args    : -location => URL to Entrez (if you want to override the default)
+           -params   => Hashref of URL params if you want to override the
+                        default
+
+=cut
+
+sub new {
+	my ($class, @args) = @_;
+	
+	# need to initialise Bio::WebAgent...
+	my ($self) = $class->SUPER::new(@args);
+	
+	# ... as well as our normal Bio::DB::Taxonomy selves:
+	$self->_initialize(@args);
+	return $self;
+}
+
+sub _initialize {
+  my($self) = shift;
+
+  $self->SUPER::_initialize(@_);
+
+  my ($location,$params) = $self->_rearrange([qw(LOCATION PARAMS)], at _);
+
+  if( $params ) {
+      if( ref($params) !~ /HASH/i ) {
+	  $self->warn("Must have provided a valid HASHref for -params");
+	  $params = \%EntrezParams;
+      }
+  } else {
+      $params = \%EntrezParams;
+  }
+  $self->entrez_params($params);
+  $self->entrez_url($location || $EntrezLocation );
+}
+
+=head2 get_taxon
+
+ Title   : get_taxon
+ Usage   : my $taxon = $db->get_taxon(-taxonid => $taxonid)
+ Function: Get a Bio::Taxon object from the database.
+ Returns : Bio::Taxon object
+ Args    : just a single value which is the database id, OR named args:
+           -taxonid => taxonomy id (to query by taxonid)
+            OR
+           -name    => string (to query by a taxonomy name: common name, 
+                               scientific name, etc)
+            OR
+           To retrieve a taxonomy node for a GI number provide the -gi option
+           with the gi number and -db with either 'nucleotide' or 'protein' to
+           define the db.
+            AND optionally,
+           -full    => 1 (to force retrieval of full information - sometimes
+                          minimal information about your taxon may have been
+                          cached, which is normally used to save database
+                          accesses)
+
+=cut
+
+sub get_taxon {
+    my $self = shift;
+    if (! $XMLTWIG) {
+        $self->throw("Need to have installed XML::Twig");
+    }
+    
+    my %p = $self->entrez_params;
+    
+    # convert input request to one or more ids
+    my (@taxonids, $taxonid, $want_full);
+    if (@_ > 1) {
+        my %params = @_;
+        if ($params{'-taxonid'}) {
+            $taxonid = $params{'-taxonid'};
+        }
+        elsif ($params{'-gi'}) {
+            my $db = $params{'-db'};
+            # we're going to do all the work here and then redirect
+            # the call based on the TaxId
+            my %p = %SequenceParams;
+            my %items;
+            if( ref($params{'-gi'}) =~ /ARRAY/i ) {	       
+                $p{'id'} = join(',', @{$params{'-gi'}});
+            } else { 
+                $p{'id'} = $params{'-gi'}; 
+            }
+            $p{'db'} = $db if defined $db;
+            my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
+            my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezSummary,$params);
+            $self->debug("url is $url\n");
+            
+            my @ids;
+            if (exists $DATA_CACHE->{gi_to_ids}->{$url}) {
+                @ids = @{$DATA_CACHE->{gi_to_ids}->{$url}};
+            }
+            else {
+                my $response = $self->get($url);
+				if ($response->is_success) {
+					$response = $response->content;
+				}
+				else {
+					$self->throw("Can't query website: ".$response->status_line);
+				}
+				
+                $self->debug("resp is $response\n");
+                my $twig = XML::Twig->new;
+                $twig->parse($response);
+                my $root = $twig->root;
+                
+                for my $topnode ( $root->children('DocSum') ) {
+                    for my $child ( $topnode->children('Item') ) {
+                        if( uc($child->{att}->{'Name'}) eq 'TAXID' ) {
+                            push @ids, $child->text;
+                        }
+                    }
+                }
+                
+                $DATA_CACHE->{gi_to_ids}->{$url} = \@ids;
+            }
+            
+            return $self->get_taxon(-taxonid => \@ids);
+        }
+        elsif ($params{'-name'}) {
+            @taxonids = $self->get_taxonid($params{'-name'});
+        }
+        else { 
+            $self->warn("Need to have provided either a -taxonid or -name value to get_taxon");
+        }
+        
+        if ($params{'-full'}) {
+            $want_full = 1;
+        }
+    }
+    else {
+        $taxonid = shift;
+    }
+    
+    if (ref($taxonid) =~ /ARRAY/i ) {
+        @taxonids = @{$taxonid};
+    }
+    else {
+        push(@taxonids, $taxonid) if $taxonid;
+    }
+    
+    # return answer(s) from the cache if possible
+    my @results;
+    my @uncached;
+    foreach my $taxonid (@taxonids) {
+        $taxonid || $self->throw("In taxonids list one was undef! '@taxonids'\n");
+        if (defined $DATA_CACHE->{full_info}->{$taxonid}) {
+            push(@results, $self->_make_taxon($DATA_CACHE->{full_info}->{$taxonid}));
+        }
+        elsif (! $want_full && defined $DATA_CACHE->{minimal_info}->{$taxonid}) {
+            push(@results, $self->_make_taxon($DATA_CACHE->{minimal_info}->{$taxonid}));
+        }
+        else {
+            push(@uncached, $taxonid);
+        }
+    }
+    
+    if (@uncached > 0) {
+        $taxonid = join(',', @uncached);
+        
+        $p{'id'}      = $taxonid;
+        $self->debug("id is $taxonid\n");
+        my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
+        
+        my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezFetch,$params);
+        $self->debug("url is $url\n");
+        my $response = $self->get($url);
+		if ($response->is_success) {
+			$response = $response->content;
+		}
+		else {
+			$self->throw("Can't query website: ".$response->status_line);
+		}
+        $self->debug("resp is $response\n");
+        
+        my $twig = XML::Twig->new;
+        $twig->parse($response);
+        
+        my $root = $twig->root;
+        for my $taxon ( $root->children('Taxon') ) {
+            my $taxid = $taxon->first_child_text('TaxId');
+            $self->throw("Got a result with no TaxId!") unless $taxid;
+            
+            my $data = {};
+            if (exists $DATA_CACHE->{minimal_info}->{$taxid}) {
+                $data = $DATA_CACHE->{minimal_info}->{$taxid};
+            }
+            
+            $data->{id} = $taxid;
+            $data->{rank} = $taxon->first_child_text('Rank');
+            
+            my $other_names = $taxon->first_child('OtherNames');
+            my @other_names = $other_names->children_text() if $other_names;
+            my $sci_name = $taxon->first_child_text('ScientificName');
+            my $orig_sci_name = $sci_name;
+            $sci_name =~ s/ \(class\)$//;
+            push(@other_names, $orig_sci_name) if $orig_sci_name ne $sci_name;
+            $data->{scientific_name} = $sci_name;
+            $data->{common_names} = \@other_names;
+            
+            $data->{division} = $taxon->first_child_text('Division');
+            $data->{genetic_code} = $taxon->first_child('GeneticCode')->first_child_text('GCId');
+            $data->{mitochondrial_genetic_code} = $taxon->first_child('MitoGeneticCode')->first_child_text('MGCId');
+            $data->{create_date} = $taxon->first_child_text('CreateDate');
+            $data->{update_date} = $taxon->first_child_text('UpdateDate');
+            $data->{pub_date} = $taxon->first_child_text('PubDate');
+            
+            # since we have some information about all the ancestors of our
+            # requested node, we may as well cache data for the ancestors to
+            # reduce the number of accesses to website in future
+            my $lineage_ex = $taxon->first_child('LineageEx');
+            my ($ancestor, $lineage_data, @taxa);
+            foreach my $lineage_taxon ($lineage_ex->children) {
+                my $lineage_taxid = $lineage_taxon->first_child_text('TaxId');
+                
+                if (exists $DATA_CACHE->{minimal_info}->{$lineage_taxid} || exists $DATA_CACHE->{full_info}->{$lineage_taxid}) {
+                    $lineage_data = $DATA_CACHE->{minimal_info}->{$lineage_taxid} || $DATA_CACHE->{full_info}->{$lineage_taxid};
+                    next;
+                }
+                else {
+                    $lineage_data = {};
+                }
+                
+                $lineage_data->{id} = $lineage_taxid;
+                $lineage_data->{scientific_name} = $lineage_taxon->first_child_text('ScientificName');
+                $lineage_data->{rank} = $lineage_taxon->first_child_text('Rank');
+                
+                $RELATIONS->{ancestors}->{$lineage_taxid} = $ancestor->{id} if $ancestor;
+                
+                $DATA_CACHE->{minimal_info}->{$lineage_taxid} = $lineage_data;
+            } continue { $ancestor = $lineage_data; unshift(@taxa, $lineage_data); }
+            
+            $RELATIONS->{ancestors}->{$taxid} = $ancestor->{id} if $ancestor;
+            
+            # go through the lineage in reverse so we can remember the children
+            my $child = $data;
+            foreach my $lineage_data (@taxa) {
+                $RELATIONS->{children}->{$lineage_data->{id}}->{$child->{id}} = 1;
+            } continue { $child = $lineage_data; }
+            
+            delete $DATA_CACHE->{minimal_info}->{$taxid};
+            $DATA_CACHE->{full_info}->{$taxid} = $data;
+            push(@results, $self->_make_taxon($data));
+        }
+    }
+    
+    wantarray() ? @results : shift @results;
+}
+
+*get_Taxonomy_Node = \&get_taxon;
+
+=head2 get_taxonids
+
+ Title   : get_taxonids
+ Usage   : my $taxonid = $db->get_taxonids('Homo sapiens');
+ Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
+           string. Note that multiple taxonids can match to the same supplied
+           name.
+ Returns : array of integer ids in list context, one of these in scalar context
+ Args    : string representing taxon's name
+
+=cut
+
+sub get_taxonids {
+    my ($self,$query) = @_;
+    my %p = $self->entrez_params;
+    
+    # queries don't work correctly with special characters, so get rid of them.
+    if ($query =~ /<.+>/) {
+        # queries with <something> will fail, so workaround by removing, doing
+        # the query, getting multiple taxonids, then picking the one id that
+        # has a parent node with a scientific_name() or common_names()
+        # case-insensitive matching to the word(s) within <>
+        $query =~ s/ <(.+?)>//;
+        my $desired_parent_name = lc($1);
+        
+        ID: foreach my $start_id ($self->get_taxonids($query)) {
+            my $node = $self->get_taxon($start_id) || next ID;
+            
+            # walk up the parents until we hit a node with a named rank
+            while (1) {
+                my $parent_node = $self->ancestor($node) || next ID;
+                my $parent_sci_name = $parent_node->scientific_name || next ID;
+                my @parent_common_names = $parent_node->common_names;
+                
+                foreach my $name ($parent_sci_name, @parent_common_names) {
+                    if (lc($name) eq $desired_parent_name) {
+                        return wantarray() ? ($start_id) : $start_id;
+                    }
+                }
+                
+                my $parent_rank = $parent_node->rank || 'no rank';
+                if ($parent_rank ne 'no rank') {
+                    last;
+                }
+                else {
+                    $node = $parent_node;
+                }
+            }
+        }
+        return;
+    }
+    $query =~ s/[\"\(\)]//g; # not an exhaustive list; these are just the ones I know cause problems
+    $query =~ s/\s/+/g;
+    
+    my @data;
+    if (defined $DATA_CACHE->{name_to_id}->{$query}) {
+        @data = @{$DATA_CACHE->{name_to_id}->{$query}};
+    }
+    else {
+        $p{'term'} = $query;
+        my $params = join($UrlParamSeparatorValue, map { "$_=".$p{$_} } keys %p);
+        my $url = sprintf("%s%s?%s",$self->entrez_url,$EntrezGet,$params);
+        my $response = $self->get($url);
+		if ($response->is_success) {
+			$response = $response->content;
+		}
+		else {
+			$self->throw("Can't query website: ".$response->status_line);
+		}
+        $self->debug("response is $response\n");
+        my $twig = XML::Twig->new;
+        $twig->parse($response);
+        my $root = $twig->root;
+        my $list = $root->first_child('IdList');
+        @data = map { $_->text } $list->children('Id');
+        
+        $DATA_CACHE->{name_to_id}->{$query} = [@data];
+    }
+    
+    wantarray() ? @data : shift @data;
+}
+
+*get_taxonid = \&get_taxonids;
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $ancestor_taxon = $db->ancestor($taxon)
+ Function: Retrieve the ancestor taxon of a supplied Taxon from the database.
+
+           Note that unless the ancestor has previously been directly
+           requested with get_taxon(), the returned Taxon object will only have
+           a minimal amount of information.
+
+ Returns : Bio::Taxon
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub ancestor {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+    
+    my $ancestor_id = $RELATIONS->{ancestors}->{$id} || return;
+    return $self->_make_taxon($DATA_CACHE->{full_info}->{$ancestor_id} || $DATA_CACHE->{minimal_info}->{$ancestor_id});
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @taxa = $db->each_Descendent($taxon);
+ Function: Get all the descendents of the supplied Taxon (but not their
+           descendents, ie. not a recursive fetchall).
+
+           Note that this implementation is unable to return a taxon that
+           hasn't previously been directly fetched with get_taxon(), or wasn't
+           an ancestor of such a fetch.
+
+ Returns : Array of Bio::Taxon objects
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub each_Descendent {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+    
+    my @children_ids = keys %{$RELATIONS->{children}->{$id} || {}};
+    my @children;
+    foreach my $child_id (@children_ids) {
+        push(@children, $self->_make_taxon($DATA_CACHE->{full_info}->{$child_id} || $DATA_CACHE->{minimal_info}->{$child_id}));
+    }
+    
+    return @children;
+}
+
+=head2 Some Get/Setter methods
+
+=cut
+
+=head2 entrez_url
+
+ Title   : entrez_url
+ Usage   : $obj->entrez_url($newval)
+ Function: Get/set entrez URL
+ Returns : value of entrez url (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub entrez_url{
+    my $self = shift;
+
+    return $self->{'_entrez_url'} = shift if @_;
+    return $self->{'_entrez_url'};
+}
+
+=head2 entrez_params
+
+ Title   : entrez_params
+ Usage   : $obj->entrez_params($newval)
+ Function: Get/set entrez params
+ Returns : value of entrez_params (a hashref)
+ Args    : on set, new value Hashref
+
+=cut
+
+sub entrez_params{
+    my $self = shift;
+    my $f;
+    if( @_ ) {
+	$f = $self->{'_entrez_params'} = shift;
+    } else {
+	$f = $self->{'_entrez_params'};
+    }
+    return %$f;
+}
+
+=head2 Bio::DB::WebBase methods
+
+=cut
+
+=head2 proxy_string
+
+ Title   : proxy_string
+ Usage   : my $proxy_string = $self->proxy_string($protocol)
+ Function: Get the proxy string (plus user/pass )
+ Returns : string
+ Args    : protocol ('http' or 'ftp'), default 'http'
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+           $username : username (if proxy requires authentication)
+           $password : password (if proxy requires authentication)
+
+=head2 authentication
+
+ Title   : authentication
+ Usage   : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass
+ Args    : Array or user/pass
+
+=cut
+
+# make a Taxon object from data hash ref
+sub _make_taxon {
+    my ($self, $data) = @_;
+    
+    my $taxon = new Bio::Taxon();
+    
+    my $taxid;
+    while (my ($method, $value) = each %{$data}) {
+        if ($method eq 'id') {
+            $method = 'ncbi_taxid'; # since this is a real ncbi taxid, explicitly set it as one
+            $taxid = $value;
+        }
+        $taxon->$method(ref($value) eq 'ARRAY' ? @{$value} : $value);
+    }
+    
+    # we can't use -dbh or the db_handle() method ourselves or we'll go
+    # infinite on the merge attempt
+    $taxon->{'db_handle'} = $self;
+    
+    $self->_handle_internal_id($taxon);
+    
+    return $taxon;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/flatfile.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/flatfile.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/flatfile.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,468 @@
+# $Id: flatfile.pm,v 1.15.4.2 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::DB::Taxonomy::flatfile
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Taxonomy::flatfile - An implementation of Bio::DB::Taxonomy
+which uses local flat files
+
+=head1 SYNOPSIS
+
+  use Bio::DB::Taxonomy;
+
+  my $db = new Bio::DB::Taxonomy(-source => 'flatfile'
+                                 -nodesfile => $nodesfile,
+                                 -namesfile => $namefile);
+
+=head1 DESCRIPTION
+
+This is an implementation which uses local flat files and the DB_File
+module RECNO data structures to manage a local copy of the NCBI
+Taxonomy database.
+
+Required database files can be obtained from
+ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala: bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Taxonomy::flatfile;
+use vars qw($DEFAULT_INDEX_DIR $DEFAULT_NODE_INDEX 	    $DEFAULT_NAME2ID_INDEX $DEFAULT_ID2NAME_INDEX
+	    $NCBI_TAXONOMY_HOSTNAME $DEFAULT_PARENT_INDEX
+	    $NCBI_TAXONOMY_FILE @DIVISIONS);
+use strict;
+use Bio::Taxon;
+use DB_File;
+
+use constant SEPARATOR => ':';
+
+$DEFAULT_INDEX_DIR = '/tmp';
+$DEFAULT_NODE_INDEX = 'nodes';
+$DEFAULT_NAME2ID_INDEX = 'names2id';
+$DEFAULT_ID2NAME_INDEX = 'id2names';
+$DEFAULT_PARENT_INDEX = 'parents';
+$NCBI_TAXONOMY_HOSTNAME = 'ftp.ncbi.nih.gov';
+$NCBI_TAXONOMY_FILE = '/pub/taxonomy/taxdump.tar.gz';
+
+$DB_BTREE->{'flags'} = R_DUP; # allow duplicate values in DB_File BTREEs
+
+ at DIVISIONS =   ([qw(BCT Bacteria)],
+                [qw(INV Invertebrates)],
+                [qw(MAM Mammals)],
+                [qw(PHG Phages)],
+                [qw(PLN Plants)], # (and fungi)
+                [qw(PRI Primates)],
+                [qw(ROD Rodents)],
+                [qw(SYN Synthetic)],
+                [qw(UNA Unassigned)],
+                [qw(VRL Viruses)],
+                [qw(VRT Vertebrates)],
+                [qw(ENV 'Environmental samples')]);
+
+use base qw(Bio::DB::Taxonomy);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::DB::Taxonomy::flatfile();
+ Function: Builds a new Bio::DB::Taxonomy::flatfile object 
+ Returns : an instance of Bio::DB::Taxonomy::flatfile
+ Args    : -directory => name of directory where index files should be created
+           -nodesfile => name of file containing nodes (nodes.dmp from NCBI)
+           -namesfile => name of the file containing names(names.dmp from NCBI)
+           -force     => 1 replace current indexes even if they exist
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($dir,$nodesfile,$namesfile,$force) = $self->_rearrange([qw
+	  (DIRECTORY NODESFILE NAMESFILE FORCE)], @args);
+  
+  $self->index_directory($dir || $DEFAULT_INDEX_DIR);
+  if ( $nodesfile ) {
+	  $self->_build_index($nodesfile,$namesfile,$force);
+  }
+
+  $self->_db_connect;
+  return $self;
+}
+
+=head2 Bio::DB::Taxonomy Interface implementation
+
+=cut
+
+=head2 get_taxon
+
+ Title   : get_taxon
+ Usage   : my $taxon = $db->get_taxon(-taxonid => $taxonid)
+ Function: Get a Bio::Taxon object from the database.
+ Returns : Bio::Taxon object
+ Args    : just a single value which is the database id, OR named args:
+           -taxonid => taxonomy id (to query by taxonid)
+            OR
+           -name    => string (to query by a taxonomy name: common name, 
+                               scientific name, etc)
+
+=cut
+
+sub get_taxon {
+    my ($self) = shift;
+    my ($taxonid, $name);
+ 
+    if (@_ > 1) {
+        ($taxonid, $name) = $self->_rearrange([qw(TAXONID NAME)], at _);
+        if ($name) {
+            ($taxonid, my @others) = $self->get_taxonids($name);
+            $self->warn("There were multiple ids ($taxonid @others) matching '$name', using '$taxonid'") if @others > 0;
+        }
+    }
+    else {  
+        $taxonid = shift;
+    }
+    
+    $taxonid =~ /^\d+$/ || return;
+    my $node = $self->{'_nodes'}->[$taxonid] || return;
+    length($node) || return;
+    my ($taxid, undef, $rank, $code, $divid, $gen_code, $mito) = split(SEPARATOR,$node);
+    last unless defined $taxid;
+    my ($taxon_names) = $self->{'_id2name'}->[$taxid];
+    my ($sci_name, @common_names) = split(SEPARATOR, $taxon_names);
+    
+    my $taxon = new Bio::Taxon(
+                        -name         => $sci_name,
+                        -common_names => [@common_names],
+                        -ncbi_taxid   => $taxid, # since this is a real ncbi taxid, explicitly set it as one
+                        -rank         => $rank,
+                        -division     => $DIVISIONS[$divid]->[1],
+                        -genetic_code => $gen_code,
+                        -mito_genetic_code => $mito );
+    # we can't use -dbh or the db_handle() method ourselves or we'll go
+    # infinite on the merge attempt
+    $taxon->{'db_handle'} = $self;
+    
+    $self->_handle_internal_id($taxon);
+    
+    return $taxon;
+}
+
+*get_Taxonomy_Node = \&get_taxon;
+
+=head2 get_taxonids
+
+ Title   : get_taxonids
+ Usage   : my @taxonids = $db->get_taxonids('Homo sapiens');
+ Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
+           string. Note that multiple taxonids can match to the same supplied
+           name.
+ Returns : array of integer ids in list context, one of these in scalar context
+ Args    : string representing taxon's name
+
+=cut
+
+sub get_taxonids {
+    my ($self, $query) = @_;
+    my $ids = $self->{'_name2id'}->{lc($query)} || return;
+    my @ids = split(SEPARATOR, $ids);
+    return wantarray() ? @ids : shift @ids;
+}
+
+*get_taxonid = \&get_taxonids;
+
+=head2 get_Children_Taxids
+
+ Title   : get_Children_Taxids
+ Usage   : my @childrenids = $db->get_Children_Taxids 
+ Function: Get the ids of the children of a node in the taxonomy
+ Returns : Array of Ids
+ Args    : Bio::Taxon or a taxon_id
+ Status  : deprecated (use each_Descendent())
+
+=cut
+
+sub get_Children_Taxids {
+   my ($self,$node) = @_;
+   $self->warn("get_Children_Taxids is deprecated, use each_Descendent instead");
+   my $id;
+   if( ref($node) ) {
+       if( $node->can('object_id') ) {
+	   $id = $node->object_id;
+       } elsif( $node->can('ncbi_taxid') ) {
+	   $id = $node->ncbi_taxid;
+       } else { 
+	   $self->warn("Don't know how to extract a taxon id from the object of type ".ref($node)."\n");
+	   return;
+       }
+   } else { $id = $node }
+   my @vals = $self->{'_parentbtree'}->get_dup($id);
+   return @vals;
+}
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $ancestor_taxon = $db->ancestor($taxon)
+ Function: Retrieve the full ancestor taxon of a supplied Taxon from the
+           database. 
+ Returns : Bio::Taxon
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub ancestor {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+    
+    my $node = $self->{'_nodes'}->[$id];
+    if (length($node)) {
+        my (undef, $parent_id) = split(SEPARATOR,$node);
+        $parent_id || return;
+		$parent_id eq $id && return; # one of the roots
+        return $self->get_taxon($parent_id);
+    }
+    return;
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @taxa = $db->each_Descendent($taxon);
+ Function: Get all the descendents of the supplied Taxon (but not their
+           descendents, ie. not a recursive fetchall).
+ Returns : Array of Bio::Taxon objects
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub each_Descendent {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+	
+    my @desc_ids = $self->{'_parentbtree'}->get_dup($id);
+    my @descs;
+    foreach my $desc_id (@desc_ids) {
+        push(@descs, $self->get_taxon($desc_id) || next);
+    }
+	return @descs;
+}
+
+=head2 Helper methods 
+
+=cut
+
+# internal method which does the indexing
+sub _build_index {
+    my ($self,$nodesfile,$namesfile,$force) = @_;
+    
+    my ($dir) = ($self->index_directory);
+    my $nodeindex = "$dir/$DEFAULT_NODE_INDEX";
+    my $name2idindex = "$dir/$DEFAULT_NAME2ID_INDEX";
+    my $id2nameindex = "$dir/$DEFAULT_ID2NAME_INDEX";
+    my $parent2childindex = "$dir/$DEFAULT_PARENT_INDEX";
+    $self->{'_nodes'}    = [];
+    $self->{'_id2name'} = [];
+    $self->{'_name2id'} = {};
+    $self->{'_parent2children'} = {};
+    
+    if (! -e $nodeindex || $force) {
+        my (%parent2children, at nodes);
+        open(NODES,$nodesfile) || 
+            $self->throw("Cannot open node file '$nodesfile' for reading");
+        
+        unlink $nodeindex;
+        unlink $parent2childindex;
+        my $nh = tie ( @nodes, 'DB_File', $nodeindex, O_RDWR|O_CREAT, 0644, $DB_RECNO) || 
+            $self->throw("Cannot open file '$nodeindex': $!");	
+        my $btree = tie( %parent2children, 'DB_File', $parent2childindex, O_RDWR|O_CREAT, 0644, $DB_BTREE) || 
+            $self->throw("Cannot open file '$parent2childindex': $!");	
+        
+        while (<NODES>) {
+            chomp;
+            my ($taxid,$parent,$rank,$code,$divid,undef,$gen_code,undef,$mito) = split(/\t\|\t/,$_);
+			# don't include the fake root node 'root' with id 1; we essentially have multiple roots here
+			next if $taxid == 1;
+			if ($parent == 1) {
+				$parent = $taxid;
+			}
+			
+            # keep this stringified
+            $nodes[$taxid] = join(SEPARATOR, ($taxid,$parent,$rank,$code,$divid,$gen_code,$mito));
+            $btree->put($parent,$taxid);
+        }
+        close(NODES);
+        
+        $nh = $btree = undef;
+        untie @nodes ;
+        untie %parent2children;
+    }
+    
+    if ((! -e $name2idindex || -z $name2idindex) || (! -e $id2nameindex || -z $id2nameindex) || $force) { 
+        open(NAMES,$namesfile) || 
+            $self->throw("Cannot open names file '$namesfile' for reading");
+        
+        unlink $name2idindex;
+        unlink $id2nameindex;
+        my (@id2name,%name2id);
+        my $idh = tie (@id2name, 'DB_File', $id2nameindex, O_RDWR|O_CREAT, 0644, $DB_RECNO) || 
+            $self->throw("Cannot open file '$id2nameindex': $!");
+        my $nameh = tie ( %name2id, 'DB_File', $name2idindex, O_RDWR|O_CREAT, 0644, $DB_HASH) || 
+            $self->throw("Cannot open file '$name2idindex': $!");
+        
+        while (<NAMES>) {
+            chomp;	    
+            my ($taxid, $name, $unique_name, $class) = split(/\t\|\t/,$_);
+			# don't include the fake root node 'root' or 'all' with id 1
+			next if $taxid == 1;
+			
+            $class =~ s/\s+\|\s*$//;
+            my $lc_name = lc($name);
+            my $orig_name = $name;
+            
+            # unique names aren't always in the correct column, sometimes they
+            # are uniqued by adding bracketed rank names to the normal name;
+            # store the uniqued version then fix the name for normal use
+            if ($lc_name =~ /\(class\)$/) { # it seems that only rank of class is ever used in this situation
+                $name2id{$lc_name} = $taxid;
+                $name =~ s/\s+\(class\)$//;
+                $lc_name = lc($name);
+            }
+            
+            # handle normal names which aren't necessarily unique
+            my $taxids = $name2id{$lc_name} || '';
+            my %taxids = map { $_ => 1 } split(SEPARATOR, $taxids);
+            unless (exists $taxids{$taxid}) {
+                $taxids{$taxid} = 1;
+                $name2id{$lc_name} = join(SEPARATOR, keys %taxids);
+            }
+            
+            # store unique names in name2id
+            if ($unique_name) {
+                $name2id{lc($unique_name)} = $taxid;
+            }
+            
+            # store all names in id2name array
+            my $names = $id2name[$taxid] || '';
+            my @names = split(SEPARATOR, $names);
+            if ($class && $class eq 'scientific name') {
+                # the scientific name should be the first name stored
+                unshift(@names, $name);
+                push(@names, $orig_name) if ($orig_name ne $name);
+                push(@names, $unique_name) if $unique_name;
+            }
+            else {
+                # all other ('common' in this simplification) names get added after
+                push(@names, $name);
+                push(@names, $orig_name) if ($orig_name ne $name);
+                push(@names, $unique_name) if $unique_name;
+            }
+            $id2name[$taxid] = join(SEPARATOR, @names);
+        }
+        close(NAMES);
+        
+        $idh = $nameh = undef;
+        untie( %name2id);
+        untie( @id2name);
+    }
+}
+
+# connect the internal db handle
+sub _db_connect {
+    my $self = shift;
+    return if $self->{'_initialized'};
+    
+    $self->{'_nodes'}   = [];
+    $self->{'_id2name'} = [];
+    $self->{'_name2id'} = {};
+    
+    my ($dir) = ($self->index_directory);
+    my $nodeindex = "$dir/$DEFAULT_NODE_INDEX";
+    my $name2idindex = "$dir/$DEFAULT_NAME2ID_INDEX";
+    my $id2nameindex = "$dir/$DEFAULT_ID2NAME_INDEX";
+    my $parent2childindex = "$dir/$DEFAULT_PARENT_INDEX";
+    
+    if( ! -e $nodeindex ||
+	! -e $name2idindex || 
+	! -e $id2nameindex ) {
+	$self->warn("Index files have not been created");
+	return 0;
+    }
+    tie ( @{$self->{'_nodes'}}, 'DB_File', $nodeindex, O_RDWR,undef, $DB_RECNO) 
+	|| $self->throw("$! $nodeindex");
+    tie (@{$self->{'_id2name'}}, 'DB_File', $id2nameindex,O_RDWR, undef, 
+	 $DB_RECNO) || $self->throw("$! $id2nameindex");
+    
+    tie ( %{$self->{'_name2id'}}, 'DB_File', $name2idindex, O_RDWR,undef, 
+	  $DB_HASH) || $self->throw("$! $name2idindex");
+    $self->{'_parentbtree'} = tie( %{$self->{'_parent2children'}},
+				   'DB_File', $parent2childindex, 
+				   O_RDWR, 0644, $DB_BTREE);
+    $self->{'_initialized'}  = 1;
+}
+
+
+=head2 index_directory
+
+ Title   : index_directory
+ Funtion : Get/set the location that index files are stored. (this module
+           will index the supplied database)
+ Usage   : $obj->index_directory($newval)
+ Returns : value of index_directory (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub index_directory {
+    my $self = shift;
+    return $self->{'index_directory'} = shift if @_;
+    return $self->{'index_directory'};
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/list.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/list.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/list.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,371 @@
+# $Id: list.pm,v 1.1.4.5 2006/12/05 20:54:38 sendu Exp $
+#
+# BioPerl module for Bio::DB::Taxonomy::list
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Taxonomy::list - An implementation of Bio::DB::Taxonomy
+that accepts lists of words to build a database
+
+=head1 SYNOPSIS
+
+  use Bio::DB::Taxonomy;
+
+  my @names = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
+  my @ranks = qw(superkingdom class genus species);
+  my $db = new Bio::DB::Taxonomy(-source => 'list', -names => \@names,
+                                                    -ranks => \@ranks);
+
+  @names = ('Eukaryota', 'Mammalia', 'Mus', 'Mus musculus');
+  $db->add_lineage(-names => \@names, -ranks => \@ranks);
+
+=head1 DESCRIPTION
+
+This is an implementation which uses supplied lists of words to create a
+database from which you can extract Bio::Taxon objects.
+
+=head1 TODO
+
+It is possible this module could do something like store the data it builds
+up to disc. Would that be useful?
+At any rate, this is why the module is called 'list' and not 'in_memory' or
+similar.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Taxonomy::list;
+use strict;
+use Bio::Taxon;
+
+use base qw(Bio::DB::Taxonomy);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::DB::Taxonomy::list();
+ Function: Builds a new Bio::DB::Taxonomy::list object 
+ Returns : an instance of Bio::DB::Taxonomy::list
+ Args    : optional, as per the add_lineage() method.
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->{db} = {};
+    $self->add_lineage(@args) if @args;
+    
+    return $self;
+}
+
+=head2 add_lineage
+
+ Title   : add_lineage
+ Usage   : $db->add_lineage(-names => \@names)
+ Function: Add a lineage to the database, where the lineage is described by
+           a list of scientific names in the order root->leaf. The rank of each
+           name can optionally be described by supplying an additional list
+           of rank names in the same order (eg. superkingdom->species).
+ Returns : n/a
+ Args    : -names => [] : array ref of scientific names, REQUIRED
+           -ranks => [] : array ref of rank names, same order as above, OPTIONAL
+
+=cut
+
+sub add_lineage {
+    my $self = shift;
+    my ($names, $ranks) = $self->_rearrange([qw (NAMES RANKS)], @_);
+    $self->throw("-names must be supplied and its value must be an array reference") unless $names && ref($names) eq 'ARRAY';
+    my @names = @{$names};
+    
+    my @ranks;
+    if ($ranks) {
+        $self->throw("-ranks must be an array reference") unless ref($ranks) eq 'ARRAY';
+        $self->throw("The -names and -ranks lists must be of equal length") unless @{$names} == @{$ranks};
+        @ranks = @{$ranks};
+    }
+    else {
+        for (0..$#names) {
+            push(@ranks, 'no rank');
+        }
+    }
+    
+    # This is non-trivial because names are not guaranteed unique in a taxonomy,
+    # and neither are name&rank combinations. Furthermore, different name&rank
+    # combinations can actually refer to the same taxon, eg. when one time
+    # 'Homo'&'genus' is supplied, while another time 'Homo'&'no rank'.
+    #
+    # name&rank&ancestor could well be unique (or good enough 99.9999% of the
+    # time), but we have the added complication that lineages could sometimes be
+    # supplied with differing numbers of taxa. Ideally we want realise that
+    # the first of these two lineages shares all its nodes with the second:
+    # ('Mammalia', 'Homo', 'Homo sapiens')
+    # ('Mammalia', 'Hominidae', 'Homo', 'Homo sapiens')
+    #
+    # Clearly with limited information we can't do a perfect job, but we can try
+    # and do a reasonable one.
+    
+    
+    #...
+    
+    
+    # All that said, let's just do the trivial implementation now and see how
+    # bad it is! (assume names are unique, always have the same ancestor)
+    
+    my %names;
+    foreach my $i (0..$#names) {
+        my $name = $names[$i];
+        $names{$name}++;
+        if ($names{$name} > 1 && $name ne $names[$i - 1]) {
+            $self->throw("The lineage '".join(', ', @names)."' had two non-consecutive nodes with the same name. Can't cope!");
+        }
+    }
+    
+    my $ancestor_node_id;
+    my @node_ids;
+    for my $i (0..$#names) {
+        my $name = $names[$i];
+        my $rank = $ranks[$i];
+        
+        # this is a new node with a new id if we haven't seen this name before,
+        # or if the ancestor of this node in this supplied lineage has the
+        # same name as this node (like '... Pinus, Pinus, Pinus densiflora').
+        my $db_name = $name eq $names[$i - 1] ? $name.'_'.$rank : $name;
+        if (! exists $self->{db}->{name_to_id}->{$db_name} || $name eq $names[$i - 1]) {
+            my $next_num = ++$self->{db}->{node_ids};
+            $self->{db}->{name_to_id}->{$db_name} = 'list'.$next_num; # so definitely not confused with ncbi taxonomy ids
+        }
+        my $node_id = $self->{db}->{name_to_id}->{$db_name};
+        
+        unless (exists $self->{db}->{node_data}->{$node_id}) {
+            $self->{db}->{node_data}->{$node_id} = [($name, '')];
+        }
+        my $node_data = $self->{db}->{node_data}->{$node_id};
+        
+        if (!$node_data->[1] || ($node_data->[1] eq 'no rank' && $rank ne 'no rank')) {
+            $node_data->[1] = $rank;
+        }
+        
+        if ($ancestor_node_id) {
+            if ($self->{db}->{ancestors}->{$node_id} && $self->{db}->{ancestors}->{$node_id} ne $ancestor_node_id) {
+                $self->throw("This lineage (".join(', ', @names).") and a previously computed lineage share a node name but have different ancestries for that node. Can't cope!");
+            }
+            $self->{db}->{ancestors}->{$node_id} = $ancestor_node_id;
+        }
+        
+        $ancestor_node_id = $node_id;
+        push(@node_ids, $node_id);
+    }
+    
+    # go through the lineage in reverse so we can remember the children
+    my $child_id;
+    foreach my $node_id (reverse @node_ids) {
+        unless ($child_id) {
+            $child_id = $node_id;
+            next;
+        }
+        
+        $self->{db}->{children}->{$node_id}->{$child_id} = 1;
+    }
+    
+    #*** would prefer to use Digest::MD5 or similar for the hash keys, but this
+    #    needs to work for everyone without hassle
+    #
+    #my $rank_list_id;
+    #if (exists $DATABASE->{rank_lists}->{"@ranks"}) {
+    #    $rank_list_id = ${$DATABASE->{rank_lists}->{"@ranks"}}[1];
+    #}
+    #else {
+    #    $DATABASE->{rank_lists}->{"@ranks"} = [\@ranks, ++$DATABASE->{rank_id}];
+    #    $DATABASE->{rank_id_to_list}->{$DATABASE->{rank_id}} = "@ranks";
+    #}
+    #
+    ## have we already added this lineage?
+    #if (exists $DATABASE->{name_lists}->{"@names"}) {
+    #    foreach my $this_rank_id (@{${$DATABASE->{name_lists}->{"@names"}}[1]}) {
+    #        return if $this_rank_id == $rank_list_id;
+    #    }
+    #}
+    #else {
+    #    $DATABASE->{name_lists}->{"@names"} = [\@names, []];
+    #}
+    #
+    #push(@{${$DATABASE->{name_lists}->{"@names"}}[1]}, $rank_list_id);
+    #
+    #*** ideally we would also avoid the next step if new lineage is a branch
+    #    of a longer existing lineage in the database
+    #
+    # compute the whole taxonomic tree from scratch, so that we aren't dependant
+    # on the order lineages are added
+    #$self->_compute_tree;
+}
+
+=head2 Bio::DB::Taxonomy Interface implementation
+
+=cut
+
+=head2 get_taxon
+
+ Title   : get_taxon
+ Usage   : my $taxon = $db->get_taxon(-taxonid => $taxonid)
+ Function: Get a Bio::Taxon object from the database.
+ Returns : Bio::Taxon object
+ Args    : just a single value which is the database id, OR named args:
+           -taxonid => taxonomy id (to query by taxonid; NB: these are not
+                       NCBI taxonomy ids but 'list' pre-fixed ids unique to the
+                       list database)
+            OR
+           -name    => string (to query by a taxonomy name)
+
+=cut
+
+sub get_taxon {
+    my $self = shift;
+    my ($taxonid, $name);
+    
+    if (@_ > 1) {
+        ($taxonid, $name) = $self->_rearrange([qw(TAXONID NAME)], at _);
+        if ($name) {
+            ($taxonid, my @others) = $self->get_taxonids($name);
+            $self->warn("There were multiple ids ($taxonid @others) matching '$name', using '$taxonid'") if @others > 0;
+        }
+    }
+    else {
+        $taxonid = shift;
+    }
+    
+    my $node = $self->{db}->{node_data}->{$taxonid} || return;
+    my ($sci_name, $rank) = @{$node};
+    
+    my $taxon = new Bio::Taxon(
+                        -name         => $sci_name,
+                        -object_id    => $taxonid, # since this is NOT a real ncbi taxid, set it as simply the object id
+                        -rank         => $rank );
+    # we can't use -dbh or the db_handle() method ourselves or we'll go
+    # infinite on the merge attempt
+    $taxon->{'db_handle'} = $self;
+    
+    $self->_handle_internal_id($taxon, 1);
+    
+    return $taxon;
+}
+
+*get_Taxonomy_Node = \&get_taxon;
+
+=head2 get_taxonids
+
+ Title   : get_taxonids
+ Usage   : my @taxonids = $db->get_taxonids('Homo sapiens');
+ Function: Searches for a taxonid (generated by the list module) based on a
+           query string. Note that multiple taxonids can match to the same
+           supplied name.
+ Returns : array of integer ids in list context, one of these in scalar context
+ Args    : string representing taxon's name
+
+=cut
+
+sub get_taxonids {
+    my ($self, $query) = @_;
+    my $id = $self->{db}->{name_to_id}->{$query} || return;
+    return $id;
+}
+
+*get_taxonid = \&get_taxonids;
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $ancestor_taxon = $db->ancestor($taxon)
+ Function: Retrieve the full ancestor taxon of a supplied Taxon from the
+           database. 
+ Returns : Bio::Taxon
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub ancestor {
+    my ($self, $taxon) = @_;
+    $taxon || return; # for bug 2092, or something similar to it at least: shouldn't need this!
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+    
+    my $ancestor_id = $self->{db}->{ancestors}->{$id} || return;
+    return $self->get_taxon($ancestor_id);
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @taxa = $db->each_Descendent($taxon);
+ Function: Get all the descendents of the supplied Taxon (but not their
+           descendents, ie. not a recursive fetchall).
+ Returns : Array of Bio::Taxon objects
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub each_Descendent {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    $self->throw("The supplied Taxon must belong to this database") unless $taxon->db_handle && $taxon->db_handle eq $self;
+    my $id = $taxon->id || $self->throw("The supplied Taxon is missing its id!");
+    
+    my @children_ids = keys %{$self->{db}->{children}->{$id} || {}};
+    my @children;
+    foreach my $child_id (@children_ids) {
+        push(@children, $self->get_taxon($child_id) || next);
+    }
+    
+    return @children;
+}
+
+=head2 Helper methods 
+
+=cut
+
+# look at all the lineages we have and work out the overall tree
+#sub _compute_tree {
+#    my $self = shift;
+#    #tba
+#}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy/list.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Taxonomy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,267 @@
+# $Id: Taxonomy.pm,v 1.11.4.2 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::Taxonomy
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Taxonomy - Access to a taxonomy database
+
+=head1 SYNOPSIS
+
+  use Bio::DB::Taxonomy;
+  my $db = new Bio::DB::Taxonomy(-source => 'entrez');
+  # use NCBI Entrez over HTTP
+  my $taxonid = $db->get_taxonid('Homo sapiens');
+
+  # get a taxon
+  my $taxon = $db->get_taxon(-taxonid => $taxonid);
+
+=head1 DESCRIPTION
+
+This is a front end module for access to a taxonomy database.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala: bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::Taxonomy;
+use vars qw($DefaultSource $TAXON_IIDS);
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+$DefaultSource = 'entrez';
+$TAXON_IIDS = {};
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::DB::Taxonomy(-source => 'entrez');
+ Function: Builds a new Bio::DB::Taxonomy object.
+ Returns : an instance of Bio::DB::Taxonomy
+ Args    : -source => which database source 'entrez' or 'flatfile' or 'list'
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  if( $class =~ /Bio::DB::Taxonomy::(\S+)/ ) {
+      my ($self) = $class->SUPER::new(@args);
+      $self->_initialize(@args);
+      return $self;
+  } else { 
+      my %param = @args;
+      @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+      my $source = $param{'-source'} || $DefaultSource;
+
+      $source = "\L$source";	# normalize capitalization to lower case
+
+      # normalize capitalization
+      return unless( $class->_load_tax_module($source) );
+      return "Bio::DB::Taxonomy::$source"->new(@args);
+  }
+}
+
+# empty for now
+sub _initialize { }
+
+=head2 get_taxon
+
+ Title   : get_taxon
+ Usage   : my $taxon = $db->get_taxon(-taxonid => $taxonid)
+ Function: Get a Bio::Taxon object from the database.
+ Returns : Bio::Taxon object
+ Args    : just a single value which is the database id, OR named args:
+           -taxonid => taxonomy id (to query by taxonid)
+            OR
+           -name    => string (to query by a taxonomy name: common name, 
+                               scientific name, etc)
+
+=cut
+
+sub get_taxon {
+    shift->throw_not_implemented();
+}
+
+*get_Taxonomy_Node = \&get_taxon;
+
+=head2 get_taxonids
+
+ Title   : get_taxonids
+ Usage   : my @taxonids = $db->get_taxonids('Homo sapiens');
+ Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
+           string. Note that multiple taxonids can match to the same supplied
+           name.
+ Returns : array of integer ids in list context, one of these in scalar context
+ Args    : string representing taxon's name
+
+=cut
+
+sub get_taxonids {
+    shift->throw_not_implemented();
+}
+
+*get_taxonid = \&get_taxonids;
+*get_taxaid = \&get_taxonids;
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $ancestor_taxon = $db->ancestor($taxon)
+ Function: Retrieve the full ancestor taxon of a supplied Taxon from the
+           database. 
+ Returns : Bio::Taxon
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub ancestor {
+    shift->throw_not_implemented();
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @taxa = $db->each_Descendent($taxon);
+ Function: Get all the descendents of the supplied Taxon (but not their
+           descendents, ie. not a recursive fetchall).
+ Returns : Array of Bio::Taxon objects
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub each_Descendent {
+    shift->throw_not_implemented();
+}
+
+=head2 get_all_Descendents
+
+ Title   : get_all_Descendents
+ Usage   : my @taxa = $db->get_all_Descendents($taxon);
+ Function: Like each_Descendent(), but do a recursive fetchall
+ Returns : Array of Bio::Taxon objects
+ Args    : Bio::Taxon (that was retrieved from this database)
+
+=cut
+
+sub get_all_Descendents {
+    my ($self, $taxon) = @_;
+    my @taxa;
+    foreach my $desc_taxon ($self->each_Descendent($taxon)) {
+      push @taxa, ($desc_taxon, $self->get_all_Descendents($desc_taxon));
+    }
+    return @taxa;
+}
+
+=head2 _load_tax_module
+
+ Title   : _load_tax_module
+ Usage   : *INTERNAL Bio::DB::Taxonomy stuff*
+ Function: Loads up (like use) a module at run time on demand
+
+=cut
+
+sub _load_tax_module {
+    my ($self, $source) = @_;
+    my $module = "Bio::DB::Taxonomy::" . $source;
+    my $ok;
+
+    eval { $ok = $self->_load_module($module) };
+    if ( $@ ) {
+	print STDERR $@;
+	print STDERR <<END;
+$self: $source cannot be found
+Exception $@
+For more information about the Bio::DB::Taxonomy system please see
+the Bio::DB::Taxonomy docs.  This includes ways of checking for 
+formats at compile time, not run time.
+END
+  ;
+    }
+    return $ok;
+}
+
+=head2 _handle_internal_id
+
+ Title   : _handle_internal_id
+ Usage   : *INTERNAL Bio::DB::Taxonomy stuff*
+ Function: Tries to ensure that when a taxon is requested from any database,
+           the Taxon object returned will have the same internal id regardless
+           of database.
+ Args    : Bio::Taxon, and optionally true value to try and do the job using
+           scientific name & rank if your ids aren't comparable to other dbs.
+
+=cut
+
+sub _handle_internal_id {
+    my ($self, $taxon, $try_name) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    my $taxid = $taxon->id || return;
+    my $sci_name = $taxon->scientific_name || '';
+    my $rank = $taxon->rank || 'no rank';
+    
+    if ($try_name && $sci_name && defined $TAXON_IIDS->{names}->{$sci_name}) {
+        if (defined $TAXON_IIDS->{names}->{$sci_name}->{$rank}) {
+            $TAXON_IIDS->{taxids}->{$taxid} = $TAXON_IIDS->{names}->{$sci_name}->{$rank};
+        }
+        elsif ($rank eq 'no rank') {
+            # pick the internal id of one named rank taxa at random
+            my ($iid) = values %{$TAXON_IIDS->{names}->{$sci_name}};
+            $TAXON_IIDS->{taxids}->{$taxid} = $iid;
+        }
+    }
+    
+    if (defined $TAXON_IIDS->{taxids}->{$taxid}) {
+        # a little dangerous to use this internal method of Bio::Tree::Node;
+        # but it is how internal_id() is set
+        $taxon->_creation_id($TAXON_IIDS->{taxids}->{$taxid});
+    }
+    else {
+        $TAXON_IIDS->{taxids}->{$taxid} = $taxon->internal_id;
+        $TAXON_IIDS->{names}->{$sci_name}->{$rank} = $taxon->internal_id if $sci_name;
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/Universal.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/Universal.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/Universal.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,242 @@
+
+#
+# BioPerl module for Bio::DB::Universal
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::Universal - Artificial database that delegates to specific databases
+
+=head1 SYNOPSIS
+
+    $uni = Bio::DB::Universal->new();
+
+    # by default connects to web databases. We can also
+    # substitute local databases
+
+    $embl = Bio::Index::EMBL->new( -filename => '/some/index/filename/locally/stored');
+    $uni->use_database('embl',$embl);
+
+    # treat it like a normal database. Recognises strings
+    # like gb|XXXXXX and embl:YYYYYY
+
+    $seq1 = $uni->get_Seq_by_id("embl:HSHNRNPA");
+    $seq2 = $uni->get_Seq_by_acc("gb|A000012");
+
+    # with no separator, tries to guess database. In this case the
+    # _ is considered to be indicative of swissprot
+    $seq3 = $uni->get_Seq_by_id('ROA1_HUMAN');
+
+=head1 DESCRIPTION
+
+Artificial database that delegates to specific databases, with a
+"smart" (well, smartish) guessing routine for what the ids. No doubt
+the smart routine can be made smarter.
+
+The hope is that you can make this database and just throw ids at it -
+for most easy cases it will sort you out. Personally, I would be
+making sure I knew where each id came from and putting it into its own
+database first - but this is a quick and dirty solution.
+
+By default this connects to web orientated databases, with all the
+reliability and network bandwidth costs this implies. However you can
+subsistute your own local databases - they could be Bio::Index
+databases (DBM file and flat file) or bioperl-db based (MySQL based)
+or biocorba-based (whatever you like behind the corba interface).
+
+Internally the tags for the databases are
+
+   genbank - ncbi dna database
+   embl    - ebi's dna database (these two share accession number space)
+   swiss   - swissprot + sptrembl (EBI's protein database)
+
+We should extend this for RefSeq and other sequence databases which 
+are out there... ;)
+
+Inspired by Lincoln Stein, written by Ewan Birney.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bio.perl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::DB::Universal;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use Bio::DB::GenBank;
+use Bio::DB::SwissProt;
+use Bio::DB::EMBL;
+
+
+use base qw(Bio::DB::RandomAccessI Bio::Root::Root);
+# new() can be inherited from Bio::Root::Root
+
+sub new {
+    my ($class) = @_;
+
+    my $self = {};
+    bless $self,$class;
+
+    $self->{'db_hash'} = {};
+
+    # default databases
+
+    $self->use_database('embl',Bio::DB::EMBL->new);
+    $self->use_database('genbank',Bio::DB::GenBank->new);
+    $self->use_database('swiss',Bio::DB::GenBank->new);
+
+    return $self;
+}
+
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub get_Seq_by_id{
+   my ($self,$str) = @_;
+
+   my ($tag,$id) = $self->guess_id($str);
+
+   return $self->{'db_hash'}->{$tag}->get_Seq_by_id($id);
+}
+
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub get_Seq_by_acc {
+   my ($self,$str) = @_;
+
+   my ($tag,$id) = $self->guess_id($str);
+
+   return $self->{'db_hash'}->{$tag}->get_Seq_by_acc($id);
+}
+
+
+
+=head2 guess_id
+
+ Title   : guess_id
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub guess_id{
+   my ($self,$str) = @_;
+   
+   if( $str =~ /(\S+)[:|\/;](\w+)/ ) {
+       my $tag;
+       my $db = $1;
+       my $id = $2;
+       if( $db =~ /gb/i || $db =~ /genbank/i || $db =~ /ncbi/i ) {
+	   $tag = 'genbank';
+       } elsif ( $db =~ /embl/i || $db =~ /emblbank/ || $db =~ /^em/i ) {
+	   $tag = 'embl';
+       } elsif ( $db =~ /swiss/i || $db =~ /^sw/i || $db =~ /sptr/ ) {
+	   $tag = 'swiss';
+       } else {
+	   # throw for the moment
+	   $self->throw("Could not guess database type $db from $str");
+       }
+       return ($tag,$id);
+
+   } else {
+       my $tag;
+       # auto-guess from just the id
+       if( $str =~ /_/ ) {
+	   $tag = 'swiss';
+       } elsif ( $str =~ /^[QPR]\w+\d$/ ) {
+	   $tag = 'swiss';
+       } elsif ( $str =~ /[A-Z]\d+/ ) {
+	   $tag = 'genbank';
+       } else {
+	   # default genbank...
+	   $tag = 'genbank';
+       }
+       return ($tag,$str);
+   }
+
+   
+}
+
+
+=head2 use_database
+
+ Title   : use_database
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub use_database{
+   my ($self,$name,$database) = @_;
+
+   $self->{'db_hash'}->{$name} = $database;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/UpdateableSeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/UpdateableSeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/UpdateableSeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,229 @@
+#
+# $Id: UpdateableSeqI.pm,v 1.8.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::UpdateableSeqI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# June 18, 2000 - module begun
+#
+# POD Doc - main docs before code
+
+=head1 NAME
+
+Bio::DB::UpdateableSeqI - An interface for writing to a database of sequences.
+
+=head1 SYNOPSIS 
+
+    # get a Bio::DB::UpdateableSeqI somehow
+    eval {
+	my ( @updatedseqs, @newseqs, @deadseqs);
+	my $seq = $db->get_Seq_by_id('ROA1_HUMAN');
+	$seq->desc('a new description');
+
+	push @updatedseqs, $seq;
+
+	$db->write_seq(\@updatedseqs, \@newseqs, \@deadseqs);
+    };
+    if( $@ ) {
+	print STDERR "an error when trying to write seq : $@\n";
+    }
+
+=head1 DESCRIPTION
+
+This module seeks to provide a simple method for pushing sequence changes 
+back to a Sequence Database - which can be an SQL compliant database, a file 
+based database, AceDB, etc.
+
+=head1 AUTHOR
+
+Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+#Lets start some code
+
+package Bio::DB::UpdateableSeqI;
+
+use strict;
+
+
+
+use base qw(Bio::DB::SeqI);
+
+=head2 write_seq
+
+  Title   : write_seq
+  Usage   : write_seq(\@updatedseqs, \@addedseqs, \@deadseqs)
+  Function: updates sequences in first array,
+            adds sequences in the second array,
+            and removes sequences in the third array.
+  Example :
+  Returns :
+  Args    : arrays of sequence objects that must be obtained from
+            Bio::DB::UpdateableSeqI.
+
+=cut
+
+sub write_seq {
+    my ($self) = @_;
+    
+    $self->throw("Abstract database call of write_seq. Your database has not implemented this method!");
+
+}
+
+=head2 _add_seq
+
+ Title   : _add_seq
+ Usage   : _add_seq($seq)
+ Function: Adds a new sequence
+ Example : 
+ Returns : will throw an exception if
+           sequences accession number already exists
+ Args    : a new seq object - should have an accession number
+
+=cut
+
+sub _add_seq {
+    my ($self ) = @_;
+    
+    $self->throw("Abstract database call of _add_seq. Your database has not implemented this method!");
+
+}
+
+=head2 _remove_seq
+
+ Title   : _remove_seq
+ Usage   : _remove_seq($seq)
+ Function: Removes an existing sequence
+ Example : 
+ Returns : will throw an exception if
+           sequence does not exists for the primary_id
+ Args    : a seq object that was retrieved from Bio::DB::UpdateableSeqI
+
+=cut
+
+sub _remove_seq {
+    my ($self) = @_;
+    
+    $self->throw("Abstract database call of _remove_seq. Your database has not implemented this method!");
+
+}
+
+=head2 _update_seq
+
+ Title   : _update_seq
+ Usage   : _update_seq($seq)
+ Function: Updates a sequence
+ Example : 
+ Returns : will throw an exception if
+           sequence is out of sync from expected val.
+ Args    : a seq object that was retrieved from Bio::DB::UpdateableSeqI
+
+=cut
+
+sub _update_seq {
+    my ($self) = @_;
+    
+    $self->throw("Abstract database call of _update_seq. Your database has not implemented this method!");
+
+}
+
+
+=head1 Methods inherieted from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+=head1 Methods inheirited from Bio::DB::SeqI
+
+=head2 get_PrimarySeq_stream
+
+ Title   : get_PrimarySeq_stream
+ Usage   : $stream = get_PrimarySeq_stream
+ Function: Makes a Bio::DB::SeqStreamI compliant object
+           which provides a single method, next_primary_seq
+ Returns : Bio::DB::SeqStreamI
+ Args    : none
+
+
+=cut
+
+=head2 get_all_primary_ids
+
+ Title   : get_all_ids
+ Usage   : @ids = $seqdb->get_all_primary_ids()
+ Function: gives an array of all the primary_ids of the 
+           sequence objects in the database. These
+           maybe ids (display style) or accession numbers
+           or something else completely different - they
+           *are not* meaningful outside of this database
+           implementation.
+ Example :
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+=head2 get_Seq_by_primary_id
+
+ Title   : get_Seq_by_primary_id
+ Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
+ Function: Gets a Bio::Seq object by the primary id. The primary
+           id in these cases has to come from $db->get_all_primary_ids.
+           There is no other way to get (or guess) the primary_ids
+           in a database.
+
+           The other possibility is to get Bio::PrimarySeqI objects
+           via the get_PrimarySeq_stream and the primary_id field
+           on these objects are specified as the ids to use here.
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+1;
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/WebDBSeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/WebDBSeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/WebDBSeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,843 @@
+# $Id: WebDBSeqI.pm,v 1.57.2.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::WebDBSeqI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#  
+
+=head1 NAME
+
+Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases
+for retrieving sequences
+
+=head1 SYNOPSIS
+
+   # get a WebDBSeqI object somehow
+   # assuming it is a nucleotide db
+   my $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+
+=head1 DESCRIPTION
+
+Provides core set of functionality for connecting to a web based
+database for retriving sequences.
+
+Users wishing to add another Web Based Sequence Dabatase will need to
+extend this class (see L<Bio::DB::SwissProt> or L<Bio::DB::NCBIHelper> for
+examples) and implement the get_request method which returns a
+HTTP::Request for the specified uids (accessions, ids, etc depending
+on what query types the database accepts).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the
+evolution of this and other Bioperl modules. Send
+your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation
+is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to
+help us keep track the bugs and their resolution.
+Bug reports can be submitted via the web.
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email E<lt> jason at bioperl.org E<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the
+object methods. Internal methods are usually
+preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::WebDBSeqI;
+use strict;
+use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE
+	    $DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES);
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+use LWP::UserAgent;
+use POSIX 'setsid';
+use HTTP::Request::Common;
+use HTTP::Response;
+use File::Spec;
+use IO::Pipe;
+use IO::String;
+use Bio::Root::Root;
+
+use base qw(Bio::DB::RandomAccessI);
+
+BEGIN {
+	$MODVERSION = '0.8';
+	%RETRIEVAL_TYPES = ('io_string' => 1,
+								 'tempfile'  => 1,
+								 'pipeline'  => 1,
+							 );
+	$DEFAULT_RETRIEVAL_TYPE = 'pipeline';
+	$DEFAULTFORMAT = 'fasta';
+	$LAST_INVOCATION_TIME = 0;
+}
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($baseaddress, $params, $ret_type, $format,$delay,$db) =
+	 $self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)],
+							 @args);
+
+    $ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type);
+    $baseaddress   && $self->url_base_address($baseaddress);
+    $params        && $self->url_params($params);
+    $db            && $self->db($db);
+    $ret_type      && $self->retrieval_type($ret_type);
+    $delay          = $self->delay_policy unless defined $delay;
+    $self->delay($delay);
+
+    # insure we always have a default format set for retrieval
+    # even though this will be immedietly overwritten by most sub classes
+    $format = $self->default_format unless ( defined $format && 
+					     $format ne '' );
+
+    $self->request_format($format);
+    my $ua = new LWP::UserAgent(env_proxy => 1);
+    $ua->agent(ref($self) ."/$MODVERSION");
+    $self->ua($ua);  
+    $self->{'_authentication'} = [];
+    return $self;
+}
+
+# from Bio::DB::RandomAccessI
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_id {
+    my ($self,$seqid) = @_;
+    $self->_sleep;
+    my $seqio = $self->get_Stream_by_id([$seqid]);
+    $self->throw("id does not exist") if( !defined $seqio ) ;
+    if ($self->can('complexity') &&  defined $self->complexity && $self->complexity==0) {
+        $self->warn("When complexity is set to 0, use get_Stream_by_id\n".
+                    "Returning Bio::SeqIO object");
+        return $seqio;
+    }
+    my @seqs;
+    while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
+    $self->throw("id does not exist") unless @seqs;
+    if( wantarray ) { return @seqs } else { return shift @seqs }
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+=cut
+
+sub get_Seq_by_acc {
+   my ($self,$seqid) = @_;
+   $self->_sleep;
+   my $seqio = $self->get_Stream_by_acc($seqid);
+   $self->throw("acc $seqid does not exist") if( ! defined $seqio );
+    if ($self->can('complexity') &&  defined $self->complexity && $self->complexity==0) {
+        $self->warn("When complexity is set to 0, use get_Stream_by_acc\n".
+                    "Returning Bio::SeqIO object");
+        return $seqio;
+    }
+   my @seqs;
+   while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
+   $self->throw("acc $seqid does not exist") unless @seqs;
+   if( wantarray ) { return @seqs } else { return shift @seqs }
+}
+
+
+=head2 get_Seq_by_gi
+
+ Title   : get_Seq_by_gi
+ Usage   : $seq = $db->get_Seq_by_gi('405830');
+ Function: Gets a Bio::Seq object by gi number
+ Returns : A Bio::Seq object
+ Args    : gi number (as a string)
+ Throws  : "gi does not exist" exception
+
+=cut
+
+sub get_Seq_by_gi {
+   my ($self,$seqid) = @_;
+    $self->_sleep;
+   my $seqio = $self->get_Stream_by_gi($seqid);
+   $self->throw("gi does not exist") if( !defined $seqio );
+    if ($self->can('complexity') &&  defined $self->complexity && $self->complexity==0) {
+        $self->warn("When complexity is set to 0, use get_Stream_by_gi\n".
+                    "Returning Bio::SeqIO object");
+        return $seqio;
+    }
+   my @seqs;
+   while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
+   $self->throw("gi does not exist") unless @seqs;
+   if( wantarray ) { return @seqs } else { return shift @seqs }
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version {
+   my ($self,$seqid) = @_;
+    $self->_sleep;
+   my $seqio = $self->get_Stream_by_version($seqid);
+   $self->throw("accession.version does not exist") if( !defined $seqio );
+    if ($self->can('complexity') &&  defined $self->complexity && $self->complexity==0) {
+        $self->warn("When complexity is set to 0, use get_Stream_by_version\n".
+                    "Returning Bio::SeqIO object");
+        return $seqio;
+    }
+    my @seqs;
+   while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
+   $self->throw("accession.version does not exist") unless @seqs;
+   if( wantarray ) { return @seqs } else { return shift @seqs }
+}
+
+# implementing class must define these
+
+=head2 get_request
+
+ Title   : get_request
+ Usage   : my $url = $self->get_request
+ Function: returns a HTTP::Request object
+ Returns : 
+ Args    : %qualifiers = a hash of qualifiers (ids, format, etc)
+
+=cut
+
+sub get_request {
+    my ($self) = @_;
+    my $msg = "Implementing class must define method get_request in class WebDBSeqI";
+    $self->throw($msg);
+}
+
+# class methods
+
+=head2 get_Stream_by_id
+
+  Title   : get_Stream_by_id
+  Usage   : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
+  Function: Gets a series of Seq objects by unique identifiers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of unique identifiers for
+                   the desired sequence entries
+
+
+=cut
+
+sub get_Stream_by_id {
+    my ($self, $ids) = @_;
+    my ($webfmt,$localfmt) = $self->request_format;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single',
+				 '-format' => $webfmt);
+}
+
+*get_Stream_by_batch = sub {
+  my $self = shift;
+  $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
+  $self->get_Stream_by_id(@_) 
+};
+
+
+=head2 get_Stream_by_acc
+
+  Title   : get_Stream_by_acc
+  Usage   : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
+  Function: Gets a series of Seq objects by accession numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_acc {
+    my ($self, $ids ) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
+}
+
+
+=head2 get_Stream_by_gi
+
+  Title   : get_Stream_by_gi
+  Usage   : $seq = $db->get_Stream_by_gi([$gi1, $gi2]);
+  Function: Gets a series of Seq objects by gi numbers
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of gi numbers for
+                   the desired sequence entries
+  Note    : For GenBank, this just calls the same code for get_Stream_by_id()
+
+=cut
+
+sub get_Stream_by_gi {
+    my ($self, $ids ) = @_;
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi');
+}
+
+=head2 get_Stream_by_version
+
+  Title   : get_Stream_by_version
+  Usage   : $seq = $db->get_Stream_by_version([$version1, $version2]);
+  Function: Gets a series of Seq objects by accession.versions
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : a reference to an array of accession.version strings for
+                   the desired sequence entries
+  Note    : For GenBank, this is implemeted in NCBIHelper
+
+=cut
+
+sub get_Stream_by_version {
+    my ($self, $ids ) = @_;
+#    $self->throw("Implementing class should define this method!"); 
+    return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work
+}
+
+=head2 get_Stream_by_query
+
+  Title   : get_Stream_by_query
+  Usage   : $stream = $db->get_Stream_by_query($query);
+  Function: Gets a series of Seq objects by way of a query string or oject
+  Returns : a Bio::SeqIO stream object
+  Args    : $query :   A string that uses the appropriate query language
+            for the database or a Bio::DB::QueryI object.  It is suggested 
+            that you create the Bio::DB::Query object first and interrogate
+            it for the entry count before you fetch a potentially large stream.
+
+=cut
+
+sub get_Stream_by_query {
+    my ($self, $query ) = @_;
+    return $self->get_seq_stream('-query' => $query, '-mode'=>'query');
+}
+
+=head2 default_format
+
+ Title   : default_format
+ Usage   : my $format = $self->default_format
+ Function: Returns default sequence format for this module
+ Returns : string
+ Args    : none
+
+=cut
+
+sub default_format {
+    return $DEFAULTFORMAT;
+}
+
+# sorry, but this is hacked in because of BioFetch problems...
+sub db {
+  my $self = shift;
+  my $d    = $self->{_db};
+  $self->{_db} = shift if @_;
+  $d;
+}
+
+=head2 request_format
+
+ Title   : request_format
+ Usage   : my ($req_format, $ioformat) = $self->request_format;
+           $self->request_format("genbank");
+           $self->request_format("fasta");
+ Function: Get/Set sequence format retrieval. The get-form will normally not
+           be used outside of this and derived modules.
+ Returns : Array of two strings, the first representing the format for
+           retrieval, and the second specifying the corresponding SeqIO format.
+ Args    : $format = sequence format
+
+=cut
+
+sub request_format {
+    my ($self, $value) = @_;
+
+    if( defined $value ) {
+	$self->{'_format'} = [ $value, $value];
+    }
+    return @{$self->{'_format'}};
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : my $seqio = $self->get_seq_sream(%qualifiers)
+ Function: builds a url and queries a web db
+ Returns : a Bio::SeqIO stream capable of producing sequence
+ Args    : %qualifiers = a hash qualifiers that the implementing class 
+           will process to make a url suitable for web querying 
+
+=cut
+
+sub get_seq_stream {
+	my ($self, %qualifiers) = @_;
+	my ($rformat, $ioformat) = $self->request_format();
+	my $seen = 0;
+	foreach my $key ( keys %qualifiers ) {
+		if( $key =~ /format/i ) {
+			$rformat = $qualifiers{$key};
+			$seen = 1;
+		}
+	}
+	$qualifiers{'-format'} = $rformat if( !$seen);
+	($rformat, $ioformat) = $self->request_format($rformat);
+	# These parameters are implemented for Bio::DB::GenBank objects only
+	if($self->isa('Bio::DB::GenBank')) {
+		$self->seq_start() &&  ($qualifiers{'-seq_start'} = $self->seq_start());
+		$self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop());
+		$self->strand() && ($qualifiers{'-strand'} = $self->strand());
+		defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity());
+	}
+	my $request = $self->get_request(%qualifiers);
+	$request->proxy_authorization_basic($self->authentication)
+	  if ( $self->authentication);
+	$self->debug("request is ". $request->as_string(). "\n");
+
+	# workaround for MSWin systems
+	$self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/;
+
+	if ($self->retrieval_type =~ /pipeline/) {
+		# Try to create a stream using POSIX fork-and-pipe facility.
+		# this is a *big* win when fetching thousands of sequences from
+		# a web database because we can return the first entry while 
+		# transmission is still in progress.
+		# Also, no need to keep sequence in memory or in a temporary file.
+		# If this fails (Windows, MacOS 9), we fall back to non-pipelined access.
+
+		# fork and pipe: _stream_request()=><STREAM>
+		my ($result,$stream) = $self->_open_pipe();
+
+		if (defined $result) {
+			$DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugge
+			if (!$result) { # in child process
+			        $self->_stream_request($request,$stream);
+			        POSIX::_exit(0); #prevent END blocks from executing in this forked child
+			}
+			else {
+				return Bio::SeqIO->new('-verbose' => $self->verbose,
+											  '-format'  => $ioformat,
+											  '-fh'      => $stream);
+			}
+		}
+		else {
+			$self->retrieval_type('io_string');
+		}
+	}
+
+	if ($self->retrieval_type =~ /temp/i) {
+		my $dir = $self->io->tempdir( CLEANUP => 1);
+		my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
+		close $fh;
+		my $resp = $self->_request($request, $tmpfile);		
+		if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
+			$self->throw("WebDBSeqI Error - check query sequences!\n");
+		}
+		$self->postprocess_data('type' => 'file',
+										'location' => $tmpfile);	
+		# this may get reset when requesting batch mode
+		($rformat,$ioformat) = $self->request_format();
+		if( $self->verbose > 0 ) {
+			open(my $ERR, "<", $tmpfile);
+			while(<$ERR>) { $self->debug($_);}
+		}
+
+		return Bio::SeqIO->new('-verbose' => $self->verbose,
+									  '-format' => $ioformat,
+									  '-file'   => $tmpfile);
+	}
+
+	if ($self->retrieval_type =~ /io_string/i ) {
+		my $resp = $self->_request($request);
+		my $content = $resp->content_ref;
+		$self->debug( "content is $$content\n");
+		if (!$resp->is_success() || length($$content) == 0) {
+			$self->throw("WebDBSeqI Error - check query sequences!\n");	
+		}
+		($rformat,$ioformat) = $self->request_format();
+		$self->postprocess_data('type'=> 'string',
+										'location' => $content);
+		$self->debug( "str is $$content\n");
+		return Bio::SeqIO->new('-verbose' => $self->verbose,
+									  '-format' => $ioformat,
+									  '-fh'   => new IO::String($$content));
+	}
+
+	# if we got here, we don't know how to handle the retrieval type
+	$self->throw("retrieval type " . $self->retrieval_type . 
+					 " unsupported\n");
+}
+
+=head2 url_base_address
+
+ Title   : url_base_address
+ Usage   : my $address = $self->url_base_address or 
+           $self->url_base_address($address)
+ Function: Get/Set the base URL for the Web Database
+ Returns : Base URL for the Web Database 
+ Args    : $address - URL for the WebDatabase 
+
+=cut
+
+sub url_base_address {
+    my $self = shift;
+    my $d = $self->{'_baseaddress'};
+    $self->{'_baseaddress'} = shift if @_;
+    $d;
+}
+
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or 
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+           $username : username (if proxy requires authentication)
+           $password : password (if proxy requires authentication)
+
+=cut
+
+sub proxy {
+    my ($self,$protocol,$proxy,$username,$password) = @_;
+    return if ( !defined $self->ua || !defined $protocol 
+		      || !defined $proxy );
+    $self->authentication($username, $password) 	
+	if ($username && $password);
+    return $self->ua->proxy($protocol,$proxy);
+}
+
+=head2 authentication
+
+ Title   : authentication
+ Usage   : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass 
+ Args    : Array or user/pass
+
+
+=cut
+
+sub authentication{
+   my ($self,$u,$p) = @_;
+
+   if( defined $u && defined $p ) {
+       $self->{'_authentication'} = [ $u,$p];
+   }
+   return @{$self->{'_authentication'}};
+}
+
+
+=head2 retrieval_type
+
+ Title   : retrieval_type
+ Usage   : $self->retrieval_type($type);
+           my $type = $self->retrieval_type
+ Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile)
+ Returns : string representing retrieval type
+ Args    : $value - the value to store
+
+This setting affects how the data stream from the remote web server is
+processed and passed to the Bio::SeqIO layer. Three types of retrieval
+types are currently allowed:
+
+   pipeline  Perform a fork in an attempt to begin streaming
+             while the data is still downloading from the remote
+             server.  Disk, memory and speed efficient, but will
+             not work on Windows or MacOS 9 platforms.
+
+   io_string Store downloaded database entry(s) in memory.  Can be
+             problematic for batch downloads because entire set
+             of entries must fit in memory.  Alll entries must be
+             downloaded before processing can begin.
+
+   tempfile  Store downloaded database entry(s) in a temporary file.
+             All entries must be downloaded before processing can
+             begin.
+
+The default is pipeline, with automatic fallback to io_string if
+pipelining is not available.
+
+=cut
+
+sub retrieval_type {
+    my ($self, $value) = @_;
+    if( defined $value ) {
+	$value = lc $value;
+	if( ! $RETRIEVAL_TYPES{$value} ) {
+	    $self->warn("invalid retrieval type $value must be one of (" . 
+			join(",", keys %RETRIEVAL_TYPES), ")"); 
+	    $value = $DEFAULT_RETRIEVAL_TYPE;
+	}
+	$self->{'_retrieval_type'} = $value;
+    }
+    return $self->{'_retrieval_type'};
+}
+
+=head2 url_params
+
+ Title   : url_params
+ Usage   : my $params = $self->url_params or 
+           $self->url_params($params)
+ Function: Get/Set the URL parameters for the Web Database
+ Returns : url parameters for Web Database
+ Args    : $params - parameters to be appended to the URL for the WebDatabase 
+
+=cut
+
+sub url_params {
+	my ($self, $value) = @_;
+	if( defined $value ) {
+		$self->{'_urlparams'} = $value;
+	}    
+}
+
+=head2 ua
+
+ Title   : ua
+ Usage   : my $ua = $self->ua or 
+           $self->ua($ua)
+ Function: Get/Set a LWP::UserAgent for use
+ Returns : reference to LWP::UserAgent Object
+ Args    : $ua - must be a LWP::UserAgent
+
+=cut
+
+sub ua {
+	my ($self, $ua) = @_;
+	if( defined $ua && $ua->isa("LWP::UserAgent") ) {
+		$self->{'_ua'} = $ua;
+	}
+	return $self->{'_ua'};
+}
+
+=head2 postprocess_data
+
+ Title   : postprocess_data
+ Usage   : $self->postprocess_data ( 'type' => 'string',
+				     'location' => \$datastr);
+ Function: process downloaded data before loading into a Bio::SeqIO
+ Returns : void
+ Args    : hash with two keys - 'type' can be 'string' or 'file'
+                              - 'location' either file location or string 
+                                           reference containing data
+
+=cut
+
+sub postprocess_data {
+	my ( $self, %args) = @_;
+	return;
+}
+
+# private methods
+sub _request {
+	my ($self, $url,$tmpfile) = @_;
+	my ($resp);
+	if( defined $tmpfile && $tmpfile ne '' ) { 
+		$resp =  $self->ua->request($url, $tmpfile);
+	} else { 
+		$resp =  $self->ua->request($url); 
+	} 
+	
+	if( $resp->is_error  ) {
+		$self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
+	}
+	return $resp;
+}
+
+#mod_perl-safe replacement for the open(BLEH,'-|') call.  if running
+#under mod_perl, detects it and closes the child's STDIN and STDOUT
+#handles
+sub _open_pipe {
+  my ($self) = @_;
+
+  if($ENV{MOD_PERL} and ! our $loaded_apache_sp) {
+    eval 'use Apache::SubProcess';
+    $@ and $self->throw("$@\nApache::SubProcess module required for running under mod_perl");
+    $loaded_apache_sp = 1;
+  }
+
+  my $pipe = IO::Pipe->new();
+
+  $SIG{CHLD} = 'IGNORE';
+  defined(my $pid = fork)
+    or $self->throw("Couldn't fork: $!");
+
+  unless($pid) {
+    #CHILD
+    $pipe->writer();
+
+    #if we're running under mod_perl, clean up some things after this fork
+    if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) {
+      $r->cleanup_for_exec;
+      #don't read or write the mod_perl parent's tied filehandles
+      close STDIN; close STDOUT;
+      setsid() or $self->throw('Could not detach from parent');
+    }
+  } else {
+    #PARENT
+    $pipe->reader();
+  }
+  return ( $pid, $pipe );
+}
+
+# send web request to specified filehandle, or stdout, for streaming purposes
+sub _stream_request {
+  my $self    = shift;
+  my $request = shift;
+  my $dest_fh = shift || \*STDOUT;
+
+  # fork so as to pipe output of fetch process through to
+  # postprocess_data method call.
+  my ($child,$fetch) = $self->_open_pipe();
+
+  if ($child) {
+    #PARENT
+    local ($/) = "//\n";  # assume genbank/swiss format
+    $| = 1;
+    my $records = 0;
+    while (my $record = <$fetch>) {
+      $records++;
+      $self->postprocess_data('type'     => 'string',
+			      'location' => \$record);
+      print $dest_fh $record;
+    }
+    $/ = "\n"; # reset to be safe;
+    close $dest_fh; #must explicitly close here, because the hard
+                    #exits don't cloes them for us
+  }
+  else {
+    #CHILD
+    $| = 1;
+    my $resp =  $self->ua->request($request,
+				   sub { print $fetch $_[0] }
+				   );
+    if( $resp->is_error  ) {
+      $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
+    }
+    close $fetch; #must explicitly close here, because the hard exists
+                  #don't close them for us
+    POSIX::_exit(0);
+  }
+}
+
+sub io {
+    my ($self,$io) = @_;
+
+    if(defined($io) || (! exists($self->{'_io'}))) {
+	$io = Bio::Root::IO->new() unless $io;
+	$self->{'_io'} = $io;
+    }
+    return $self->{'_io'};
+}
+
+
+=head2 delay
+
+ Title   : delay
+ Usage   : $secs = $self->delay([$secs])
+ Function: get/set number of seconds to delay between fetches
+ Returns : number of seconds to delay
+ Args    : new value
+
+NOTE: the default is to use the value specified by delay_policy().
+This can be overridden by calling this method, or by passing the
+-delay argument to new().
+
+=cut
+
+sub delay {
+   my $self = shift;
+   my $d = $self->{'_delay'};
+   $self->{'_delay'} = shift if @_;
+   $d;
+}
+
+=head2 delay_policy
+
+ Title   : delay_policy
+ Usage   : $secs = $self->delay_policy
+ Function: return number of seconds to delay between calls to remote db
+ Returns : number of seconds to delay
+ Args    : none
+
+NOTE: The default delay policy is 0s.  Override in subclasses to
+implement delays.  The timer has only second resolution, so the delay
+will actually be +/- 1s.
+
+=cut
+
+sub delay_policy {
+   my $self = shift;
+   return 0;
+}
+
+=head2 _sleep
+
+ Title   : _sleep
+ Usage   : $self->_sleep
+ Function: sleep for a number of seconds indicated by the delay policy
+ Returns : none
+ Args    : none
+
+NOTE: This method keeps track of the last time it was called and only
+imposes a sleep if it was called more recently than the delay_policy()
+allows.
+
+=cut
+
+sub _sleep {
+   my $self = shift;
+   my $last_invocation = $LAST_INVOCATION_TIME;
+   if (time - $LAST_INVOCATION_TIME < $self->delay) {
+      my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
+      warn "sleeping for $delay seconds\n" if $self->verbose;
+      sleep $delay;
+   }
+   $LAST_INVOCATION_TIME = time;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBL.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBL.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBL.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+#
+# $Id: XEMBL.pm,v 1.10.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::XEMBL
+#
+# Cared for by Lincoln Stein
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::XEMBL - Database object interface for XEMBL entry retrieval
+
+=head1 SYNOPSIS
+
+  use Bio::DB::XEMBL;
+
+  $embl = new Bio::DB::XEMBL;
+
+  # remember that XEMBL_ID does not equal GenBank_ID!
+  $seq = $embl->get_Seq_by_id('BUM'); # EMBL ID
+ 	print "cloneid is ", $seq->id, "\n";
+
+  # or changeing to accession number and Fasta format ...
+  $seq = $embl->get_Seq_by_acc('J02231'); # XEMBL ACC
+ 	print "cloneid is ", $seq->id, "\n";
+
+  # especially when using versions, you better be prepared
+  # in not getting what what want
+  eval {
+      $seq = $embl->get_Seq_by_version('J02231.1'); # XEMBL VERSION
+  };
+  print "cloneid is ", $seq->id, "\n" unless $@;
+
+  my $seqio = $embl->get_Stream_by_batch(['U83300','U83301','U83302']);
+  while( my $clone =  $seqio->next_seq ) {
+ 	print "cloneid is ", $clone->id, "\n";
+  }
+
+=head1 DESCRIPTION
+
+Allows the dynamic retrieval of Bio::Seq objects from the XEMBL
+database. See L<Bio::Seq> for details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DB::XEMBL;
+use strict;
+use SOAP::Lite;
+# bsml parser appears broken...
+use Bio::SeqIO::bsml;
+use File::Temp 'tempfile';
+use vars qw($MODVERSION);
+
+use base qw(Bio::DB::RandomAccessI);
+$MODVERSION = '0.2';
+
+use constant DEFAULT_ENDPOINT => 'http://www.ebi.ac.uk:80/cgi-bin/xembl/XEMBL-SOAP.pl';
+
+sub new {
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+    my $endpoint = $self->_rearrange([qw(ENDPOINT)]);
+    $endpoint ||= DEFAULT_ENDPOINT;
+    $self->endpoint($endpoint);
+    return $self;
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
+ Function: Gets a Bio::Seq object by its name
+ Returns : a Bio::Seq object
+ Args    : the id (as a string) of a sequence
+ Throws  : "id does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_id {
+   my ($self, at args) = @_;
+   my $seqio = $self->get_Stream_by_batch([@args]);
+   return $seqio->next_seq;
+}
+
+=head2 get_Stream_by_batch
+
+  Title   : get_Stream_by_batch
+  Usage   : $seq = $db->get_Stream_by_batch($ref);
+  Function: Retrieves Seq objects from XEMBL 'en masse', rather than one
+            at a time. Currently this is not particularly efficient, as
+            it loads the entire result into memory and parses it.
+  Example :
+  Returns : a Bio::SeqIO stream object
+  Args    : $ref : an array reference containing a list of unique 
+            ids/accession numbers.
+
+=cut
+
+sub get_Stream_by_batch {
+  my ($self, $ids) = @_;
+  $self->throw("expected an array ref, but got $ids")
+    unless ref($ids) eq 'ARRAY';
+  my @args = @$ids;
+
+  my $endpoint = $self->endpoint;
+  my $som = SOAP::Lite
+    ->uri('http://www.ebi.ac.uk/XEMBL')
+    ->proxy($endpoint)
+    ->getNucSeq(SOAP::Data->name(format=>'bsml'),
+		SOAP::Data->name(ids=>"@args"));
+  if ($som->fault) {
+    $self->throw($som->faultstring);
+  }
+  my $result = $som->result;
+  my($fh,$filename) = tempfile(File::Spec->tmpdir . '/bsmlXXXXXX',SUFFIX=>'.bsml');
+  print $fh $result;
+  close $fh;
+  my $seqio = Bio::SeqIO->new(-file=>$filename,-format=>'bsml');
+  unlink $filename;
+  $seqio;
+}
+
+*get_Stream_by_id = \&get_Stream_by_batch;
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc('X77802');
+ Function: Gets a Bio::Seq object by accession number
+ Returns : A Bio::Seq object
+ Args    : accession number (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_acc{
+   my ($self, at args) = @_;
+   return $self->get_Seq_by_id(@args);
+}
+
+=head2 get_Seq_by_version
+
+ Title   : get_Seq_by_version
+ Usage   : $seq = $db->get_Seq_by_version('X77802.1');
+ Function: Gets a Bio::Seq object by sequence version
+ Returns : A Bio::Seq object
+ Args    : accession.version (as a string)
+ Throws  : "acc.version does not exist" exception
+
+=cut
+
+sub get_Seq_by_version{
+   my ($self, at args) = @_;
+   return $self->get_Seq_by_id(@args);
+}
+
+=head2 endpoint
+
+ Title   : endpoint
+ Usage   : $endpoint = $db->endpoint([$endpoint])
+ Function: Gets/sets endpoint for SOAP connection
+ Returns : old endpoint
+ Args    : new endpoint(optional)
+
+=cut
+
+sub endpoint {
+  my $self = shift;
+  my $d = $self->{endpoint};
+  $self->{endpoint} = shift if @_;
+  $d;
+}
+
+=head2 new_from_registry
+
+ Title   : new_from_registry
+ Usage   : $db = Bio::DB::XEMBL->new_from_registry(%config)
+ Function: creates a new Bio::DB::XEMBL object in a Bio::DB::Registry-
+           compatible fashion
+ Returns : new Bio::DB::XEMBL
+ Args    : provided by the registry, see below
+ Status  : Public
+
+The following registry-configuration tags are recognized:
+
+  location     Endpoint for the XEMBL service.  Currently the only
+               known valid endpoint is 
+               http://www.ebi.ac.uk:80/cgi-bin/xembl/XEMBL-SOAP.pl
+
+NOTE: Since this info is supposed to be coming from WSDL, the location
+is currently ignored.
+
+=cut
+
+sub new_from_registry {
+    my ($self,%config) =  @_;
+    my $location = $config{'location'} or $self->throw('Location must be specified.');
+    my $index    = $self->new(-endpoint => $location);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBLService.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBLService.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DB/XEMBLService.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+#
+# $Id: XEMBLService.pm,v 1.8.4.1 2006/10/02 23:10:15 sendu Exp $
+#
+# BioPerl module for Bio::DB::XEMBLService
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DB::XEMBLService - SOAP service definition for XEMBL
+
+=head1 SYNOPSIS
+
+  #usage
+
+=head1 DESCRIPTION
+
+SOAP service definition for XEMBL.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+
+package Bio::DB::XEMBLService;
+
+# -- generated by SOAP::Lite (v0.51) for Perl -- soaplite.com -- Copyright (C) 2000-2001 Paul Kulchenko --
+# -- generated from http://www.ebi.ac.uk/xembl/XEMBL.wsdl [Sat Jan 26 14:47:29 2002]
+
+my %methods = (
+  getNucSeq => {
+    endpoint => 'http://www.ebi.ac.uk:80/cgi-bin/xembl/XEMBL-SOAP.pl',
+    soapaction => 'http://www.ebi.ac.uk/XEMBL#getNucSeq',
+    uri => 'http://www.ebi.ac.uk/XEMBL',
+    parameters => [
+      SOAP::Data->new(name => 'format', type => 'xsd:string', attr => {}),
+      SOAP::Data->new(name => 'ids', type => 'xsd:string', attr => {}),
+    ],
+  },
+);
+use Bio::Root::Version;
+use Carp ();
+
+use vars qw($AUTOLOAD @EXPORT_OK %EXPORT_TAGS);
+use base qw(Exporter SOAP::Lite);
+ at EXPORT_OK = (keys %methods);
+%EXPORT_TAGS = ('all' => [@EXPORT_OK]);
+
+no strict 'refs';
+for my $method (@EXPORT_OK) {
+  my %method = %{$methods{$method}};
+  *$method = sub {
+    my $self = UNIVERSAL::isa($_[0] => __PACKAGE__) 
+      ? ref $_[0] ? shift # OBJECT
+                  # CLASS, either get self or create new and assign to self
+                  : (shift->self || __PACKAGE__->self(__PACKAGE__->new))
+      # function call, either get self or create new and assign to self
+      : (__PACKAGE__->self || __PACKAGE__->self(__PACKAGE__->new));
+    $self->proxy($method{endpoint} || Carp::croak "No server address (proxy) specified") unless $self->proxy;
+    my @templates = @{$method{parameters}};
+    my $som = $self
+      -> endpoint($method{endpoint})
+      -> uri($method{uri})
+      -> on_action(sub{qq!"$method{soapaction}"!})
+      -> call($method => map {shift(@templates)->value($_)} @_); 
+    UNIVERSAL::isa($som => 'SOAP::SOM') ? wantarray ? $som->paramsall : $som->result 
+                                        : $som;
+  }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DBLinkContainerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DBLinkContainerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DBLinkContainerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+# $Id: DBLinkContainerI.pm,v 1.12.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::DBLinkContainerI
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DBLinkContainerI - Abstract interface for any object wanting to use  
+                        database cross references
+
+=head1 SYNOPSIS
+
+    # get an objects containing database cross reference
+
+        foreach $obj ( @objs ) {
+                if( $obj->isa('Bio::DBLinkContainerI') ) {
+                        foreach $dblink ( $obj->each_DBLink() ) {
+                                # do stuff
+                        }
+                }
+        }
+
+=head1 DESCRIPTION
+
+This interface defines the functions one can expect for any object
+wanting to use database cross-references. This class does not actually
+provide any implemention, it just provides the definitions of what
+methods one can call.
+
+The database cross-references are implemented as L<Bio::Annotation::DBLink>
+objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::DBLinkContainerI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 each_DBLink
+
+ Title   : each_DBLink
+ Usage   : foreach $ref ( $self->each_DBlink() )
+ Function: gets an array of DBlink of objects
+ Example :
+ Returns : an array of Bio::Annotation::DBLink objects
+ Args    : none
+
+
+=cut
+
+sub each_DBLink{
+   my ($self) = @_;
+   my $class = ref($self) || $self;
+   $self->throw("Class $class did not define method 'each_DBLink' for interface DBLinkContainerI");
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Das/FeatureTypeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Das/FeatureTypeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Das/FeatureTypeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,393 @@
+# $Id: FeatureTypeI.pm,v 1.6.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Das::FeatureTypeI
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Das::FeatureTypeI - Simple interface to Sequence Ontology feature types
+
+=head1 SYNOPSIS
+
+  # Get a Bio::Das::FeatureTypeI object from somewhere
+  $term = $db->fetch....
+
+  # Get the name of the term
+  $definition = $term->name;
+
+  # Get the accession of the term
+  $accession = $term->accession;
+
+  # Get the definition of the term
+  $definition = $term->definition;
+
+  # Get the parents of the term, optionally filtered by relationship
+  @parents = $term->parents($relationship);
+
+  # Get the children of the term, optionally filtered by relationship
+  @children = $term->children($relationship);
+
+  # Given a parent and child, returns their relationship, or undef if
+  # not directly related
+  $relationship = $parent->relationship($child);
+
+  # Return true if two terms are identical
+  $match = $term1->equals($term2);
+
+  # Return true if $term2 is a descendent of $term1, optionally
+  # filtering by relationship ("isa" assumed)
+  $match = $term1->is_descendent($term2,$relationship);
+
+  # Return true if $term2 is a parent of $term1, optionally
+  # filtering by relationship ("isa" assumed)
+  $match = $term1->is_parent($term2,$relationship);
+
+  # Return true if $term2 is equal to $term1 or if $term2 descends
+  # from term 1 via the "isa" relationship
+  $match = $term1->match($term2);
+
+  # Create a new term de novo
+  $term = Bio::Das::FeatureTypeI->new(-name       => $name,
+                                      -accession  => $accession,
+                                      -definition => $definition);
+
+  # Add a child to a term
+  $term1->add_child($term2,$relationship);
+
+  # Delete a child from a term
+  $term1->delete_child($term2);
+
+=head1 DESCRIPTION
+
+Bio::Das::FeatureTypeI is an interface to the Gene Ontology
+Consortium's Sequence Ontology (SO).  The SO, like other ontologies,
+is a directed acyclic graph in which a child node may have multiple
+parents.  The relationship between parent and child is one of a list
+of relationships.  The SO currently recognizes two relationships "isa"
+and "partof".
+
+The intent of this interface is to interoperate with older software
+that uses bare strings to represent feature types.  For this reason,
+the interface overloads the stringify ("") and string equals (eq)
+operations.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bio.perl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::Das::FeatureTypeI;
+use strict;
+
+use overload '""'     => 'name',
+             eq       => 'match',
+             fallback => 1;
+
+# Object preamble - inherits from Bio::Root::RootI;
+
+=pod
+
+this is somehow FUBAR, implementation classes cannot successfully inherit from Bio::Das::FeatureTypeI
+
+=cut
+
+use base qw(Bio::Root::RootI);
+
+=head2 name
+
+ Title   : name
+ Usage   : $string = $term->name
+ Function: return the term for the type
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub name { shift->throw_not_implemented }
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $string = $term->accession
+ Function: return the accession number for the term
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub accession  { shift->throw_not_implemented }
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $string = $term->definition
+ Function: return the human-readable definition for the term
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub definition  { shift->throw_not_implemented  }
+
+=head2 parents
+
+ Title   : parents
+ Usage   : @terms = $term->parents($relationship)
+ Function: return parent terms
+ Returns : list of Bio::Das::FeatureTypeI
+ Args    : none
+ Status  : Public
+
+Returns the parents for the current term, empty if there are none.  An
+optional relationship argument will return those parents
+that are related via the specified relationship type.
+
+The relationship is one of "isa" or "partof".
+
+=cut
+
+sub parents { shift->throw_not_implemented; }
+
+=head2 children
+
+ Title   : children
+ Usage   : @terms = $term->children($relationship)
+ Function: return children terms
+ Returns : list of Bio::Das::FeatureTypeI
+ Args    : none
+ Status  : Public
+
+Returns the children for the current term, empty if there are none.  An
+optional relationship argument will return those children
+that are related via the specified relationship type.
+
+The relationship is one of "isa" or "partof".
+
+=cut
+
+sub children { shift->throw_not_implemented; }
+
+=head2 relationship
+
+ Title   : relationship
+ Usage   : $relationship = $parent->relationship($child)
+ Function: return the relationship between a parent and a child
+ Returns : one of "isa" or "partof"
+ Args    : none
+ Status  : Public
+
+This method returns the relationship between a parent and one of its
+immediate descendents.  It can return "isa", "partof", or undef if
+there is not a direct parent/child relationship (kissing cousins are
+*not* recognized).
+
+=cut
+
+sub relationship { shift->throw_not_implemented }
+
+=head2 equals
+
+ Title   : equals
+ Usage   : $boolean = $term1->equals($term2)
+ Function: return true if $term1 and $term2 are the same
+ Returns : boolean
+ Args    : second term
+ Status  : Public
+
+The two terms must be identical.  In practice, this means that if
+term2 is a Bio::Das::FeatureI object, then its accession number must
+match the first term's accession number.  Otherwise, if term2 is a
+bare string, then it must equal (in a case insensitive manner)
+the name of term1.
+
+NOTE TO IMPLEMENTORS: This method is defined in terms of other
+methods, so does not need to be implemented.
+
+=cut
+
+#'
+sub equals {
+  my $self = shift;
+  my $term2 = shift;
+  if ($term2->isa('Bio::Das::FeatureTypeI')) {
+    return $self->accession eq $term2->accession;
+  } else {
+    return lc $self->name eq lc $term2;
+  }
+}
+
+=head2 is_descendent
+
+ Title   : is_descendent
+ Usage   : $boolean = $term1->is_descendent($term2 [,$relationship])
+ Function: return true of $term2 is a descendent of $term1
+ Returns : boolean
+ Args    : second term
+ Status  : Public
+
+This method returns true if $term2 descends from $term1.  The
+operation traverses the tree.  The traversal can be limited to the
+relationship type ("isa" or "partof") if desired.  $term2 can be a
+bare string, in which case the term names will be used as the basis
+for term matching (see equals()).
+
+NOTE TO IMPLEMENTORS: this method is defined as the inverse of
+is_parent().  Do not implement it directly, but do implement
+is_parent().
+
+=cut
+
+sub is_descendent {
+  my $self = shift;
+  my ($term,$relationship) = @_;
+  $self->throw("$term is not a Bio::Das::FeatureTypeI")
+    unless $term->isa('Bio::Das::FeatureTypeI');
+  $term->is_parent($self,$relationship);
+}
+
+=head2 is_parent
+
+ Title   : is_parent
+ Usage   : $boolean = $term1->is_parent($term2 [,$relationship])
+ Function: return true of $term2 is a parent of $term1
+ Returns : boolean
+ Args    : second term
+ Status  : Public
+
+This method returns true if $term2 is a parent of $term1.  The
+operation traverses the tree.  The traversal can be limited to the
+relationship type ("isa" or "partof") if desired.  $term2 can be a
+bare string, in which case the term names will be used as the basis
+for term matching (see equals()).
+
+NOTE TO IMPLEMENTORS: Implementing this method will also implement
+is_descendent().
+
+=cut
+
+sub is_parent { shift->throw_not_implemented }
+
+=head2 match
+
+ Title   : match
+ Usage   : $boolean = $term1->match($term2)
+ Function: return true if $term1 equals $term2 or if $term2 is an "isa" descendent
+ Returns : boolean
+ Args    : second term
+ Status  : Public
+
+This method combines equals() and is_descendent() in such a way that
+the two terms will match if they are the same or if the second term is
+an instance of the first one.  This is also the basis of the operator
+overloading of eq.
+
+NOTE TO IMPLEMENTORS: This method is defined in terms of other methods
+and does not need to be implemented.
+
+=cut
+
+sub match {
+  my $self  = shift;
+  my $term2 = shift;
+  return 1 if $self->equals($term2);
+  return $self->is_descendent($term2,'isa');
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $term = Bio::Das::FeatureTypeI->new(@args)
+ Function: create a new term
+ Returns : new term
+ Args    : see below
+ Status  : Public
+
+This method creates a new Bio::Das::FeatureTypeI.  Arguments:
+
+  Argument    Description
+  --------   ------------
+
+  -name       Name of this term
+
+  -accession  Accession number for the term
+
+  -definition Definition of the term
+
+=cut
+
+sub new { shift->throw_not_implemented }
+
+=head2 add_child
+
+ Title   : add_child
+ Usage   : $boolean = $term->add_child($term2,$relationship)
+ Function: add a child to a term
+ Returns : a boolean indicating success
+ Args    : new child
+ Throws  : a "cycle detected" exception
+ Status  : Public
+
+This method adds a new child to the indicated node.  It may detect a
+cycle in the DAG and throw a "cycle detected" exception.
+
+=cut
+
+sub add_child { shift->throw_not_implemented }
+
+
+=head2 delete_child
+
+ Title   : delete_child
+ Usage   : $boolean = $term->delete_child($term2);
+ Function: delete a child of the term
+ Returns : a boolean indicating success
+ Args    : child to be deleted
+ Throws  : a "not a child" exception
+ Status  : Public
+
+This method deletes a new child from the indicated node.  It will
+throw an exception if the indicated child is not a direct descendent.
+
+=cut
+
+sub delete_child { shift->throw_not_implemented }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Das/SegmentI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Das/SegmentI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Das/SegmentI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,464 @@
+# $Id: SegmentI.pm,v 1.11.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Das::SegmentI
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Das::SegmentI - DAS-style access to a feature database
+
+=head1 SYNOPSIS
+
+  # Get a Bio::Das::SegmentI object from a Bio::DasI database...
+
+  $segment = $das->segment(-name=>'Landmark',
+                           -start=>$start,
+                           -end => $end);
+
+  @features = $segment->overlapping_features(-type=>['type1','type2']);
+  # each feature is a Bio::SeqFeatureI-compliant object
+
+  @features = $segment->contained_features(-type=>['type1','type2']);
+
+  @features = $segment->contained_in(-type=>['type1','type2']);
+
+  $stream = $segment->get_feature_stream(-type=>['type1','type2','type3'];
+  while (my $feature = $stream->next_seq) {
+     # do something with feature
+  }
+
+  $count = $segment->features_callback(-type=>['type1','type2','type3'],
+                                       -callback => sub { ... { }
+                                       );
+
+=head1 DESCRIPTION
+
+Bio::Das::SegmentI is a simplified alternative interface to sequence
+annotation databases used by the distributed annotation system. In
+this scheme, the genome is represented as a series of landmarks.  Each
+Bio::Das::SegmentI object ("segment") corresponds to a genomic region
+defined by a landmark and a start and end position relative to that
+landmark.  A segment is created using the Bio::DasI segment() method.
+
+Features can be filtered by the following attributes:
+
+  1) their location relative to the segment (whether overlapping,
+          contained within, or completely containing)
+
+  2) their type
+
+  3) other attributes using tag/value semantics
+
+Access to the feature list uses three distinct APIs:
+
+  1) fetching entire list of features at a time
+
+  2) fetching an iterator across features
+
+  3) a callback
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bio.perl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::Das::SegmentI;
+use strict;
+
+
+# Object preamble - inherits from Bio::Root::RootI;
+use base qw(Bio::Root::RootI);
+
+=head2 seq_id
+
+ Title   : seq_id
+ Usage   : $ref = $s->seq_id
+ Function: return the ID of the landmark
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+=cut
+
+sub seq_id { shift->throw_not_implemented }
+
+=head2 display_name
+
+ Title   : seq_name
+ Usage   : $ref = $s->seq_name
+ Function: return the human-readable name for the landmark
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This defaults to the same as seq_id.
+
+=cut
+
+sub display_name { shift->seq_id }
+
+=head2 start
+
+ Title   : start
+ Usage   : $s->start
+ Function: start of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the start of the segment.  Alias
+to low() for Gadfly compatibility.
+
+=cut
+
+sub start  { shift->throw_not_implemented }
+sub low    { shift->start }
+
+=head2 end
+
+ Title   : end
+ Usage   : $s->end
+ Function: end of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+This is a read-only accessor for the end of the segment. Alias to
+high() for Gadfly compatibility.
+
+=cut
+
+sub end   { shift->throw_not_implemented  }
+sub stop  { shift->end }
+sub high  { shift->end }
+
+=head2 length
+
+ Title   : length
+ Usage   : $s->length
+ Function: length of segment
+ Returns : integer
+ Args    : none
+ Status  : Public
+
+Returns the length of the segment.  Always a positive number.
+
+=cut
+
+sub length { shift->throw_not_implemented; }
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $s->seq
+ Function: get the sequence string for this segment
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+Returns the sequence for this segment as a simple string.
+
+=cut
+
+sub seq {shift->throw_not_implemented}
+
+=head2 ref
+
+ Title   : ref
+ Usage   : $ref = $s->ref([$newlandmark])
+ Function: get/set the reference landmark for addressing
+ Returns : a string
+ Args    : none
+ Status  : Public
+
+This method is used to examine/change the reference landmark used to
+establish the coordinate system.  By default, the landmark cannot be
+changed and therefore this has the same effect as seq_id().  The new
+landmark might be an ID, or another Das::SegmentI object.
+
+=cut
+
+sub ref    { shift->seq_id }
+*refseq = \&ref;
+
+=head2 absolute
+
+ Title   : absolute
+ Usage   : $s->absolute([$new_value])
+ Function: get/set absolute addressing mode
+ Returns : flag
+ Args    : new flag (optional)
+ Status  : Public
+
+Turn on and off absolute-addressing mode.  In absolute addressing
+mode, coordinates are relative to some underlying "top level"
+coordinate system (such as a chromosome). ref() returns the identity
+of the top level landmark, and start() and end() return locations
+relative to that landmark.  In relative addressing mode, coordinates
+are relative to the landmark sequence specified at the time of segment
+creation or later modified by the ref() method.
+
+The default is to return false and to do nothing in response to
+attempts to set absolute addressing mode.
+
+=cut
+
+sub absolute { return }
+
+=head2 features
+
+ Title   : features
+ Usage   : @features = $s->features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : Public
+
+This method will find all features that intersect the segment in a
+variety of ways and return a list of Bio::SeqFeatureI objects.  The
+feature locations will use coordinates relative to the reference
+sequence in effect at the time that features() was called.
+
+The returned list can be limited to certain types, attributes or
+range intersection modes.  Types of range intersection are one of:
+
+   "overlaps"      the default
+   "contains"      return features completely contained within the segment
+   "contained_in"  return features that completely contain the segment
+
+Two types of argument lists are accepted.  In the positional argument
+form, the arguments are treated as a list of feature types.  In the
+named parameter form, the arguments are a series of -name=E<gt>value
+pairs.
+
+  Argument    Description
+  --------   ------------
+
+  -types      An array reference to type names in the format
+	      "method:source"
+
+  -attributes A hashref containing a set of attributes to match
+
+  -rangetype  One of "overlaps", "contains", or "contained_in".
+
+  -iterator   Return an iterator across the features.
+
+  -callback   A callback to invoke on each feature
+
+The -attributes argument is a hashref containing one or more
+attributes to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple string matching, and multiple attributes
+are ANDed together.  More complex filtering can be performed using the
+-callback option (see below).
+
+If -iterator is true, then the method returns an object reference that
+implements the next_seq() method.  Each call to next_seq() returns a
+new Bio::SeqFeatureI object.
+
+If -callback is passed a code reference, the code reference will be
+invoked on each feature returned.  The code will be passed two
+arguments consisting of the current feature and the segment object
+itself, and must return a true value. If the code returns a false
+value, feature retrieval will be aborted.
+
+-callback and -iterator are mutually exclusive options.  If -iterator
+is defined, then -callback is ignored.
+
+NOTE: the following methods all build on top of features(), and do not
+need to be explicitly implemented.
+
+    overlapping_features()
+    contained_features()
+    contained_in()
+    get_feature_stream()
+
+=cut
+
+sub features {shift->throw_not_implemented}
+
+=head2 overlapping_features
+
+ Title   : overlapping_features
+ Usage   : @features = $s->overlapping_features(@args)
+ Function: get features that overlap this segment
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : Public
+
+This method is identical to features() except that it defaults to
+finding overlapping features.
+
+=cut
+
+sub overlapping_features {
+  my $self = shift;
+  my @args = $_[0] =~ /^-/ ? (@_,         -rangetype=>'overlaps')
+                           : (-types=>\@_,-rangetype=>'overlaps');
+  $self->features(@args);
+}
+
+=head2 contained_features
+
+ Title   : contained_features
+ Usage   : @features = $s->contained_features(@args)
+ Function: get features that are contained in this segment
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : Public
+
+This method is identical to features() except that it defaults to
+a range type of 'contained'.
+
+=cut
+
+sub contained_features {
+  my $self = shift;
+  my @args = $_[0] =~ /^-/ ? (@_,         -rangetype=>'contained')
+                           : (-types=>\@_,-rangetype=>'contained');
+  $self->features(@args);
+}
+
+=head2 contained_in
+
+ Title   : contained_in
+ Usage   : @features = $s->contained_in(@args)
+ Function: get features that contain this segment
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : Public
+
+This method is identical to features() except that it defaults to
+a range type of 'contained_in'.
+
+=cut
+
+sub contained_in {
+  my $self = shift;
+  my @args = $_[0] =~ /^-/ ? (@_,         -rangetype=>'contained_in')
+                           : (-types=>\@_,-rangetype=>'contained_in');
+  $self->features(@args);
+}
+
+=head2 get_feature_stream
+
+ Title   : get_feature_stream
+ Usage   : $iterator = $s->get_feature_stream(@args)
+ Function: get an iterator across the segment
+ Returns : an object that implements next_seq()
+ Args    : see below
+ Status  : Public
+
+This method is identical to features() except that it always generates
+an iterator.
+
+NOTE: This is defined in the interface in terms of features().  You do not
+have to implement it.
+
+=cut
+
+sub get_feature_stream {
+  my $self = shift;
+  my @args = defined $_[0] && $_[0] =~ /^-/ ? (@_,         -iterator=>1)
+                                            : (-types=>\@_,-iterator=>1);
+  $self->features(@args);
+}
+
+=head2 factory
+
+ Title   : factory
+ Usage   : $factory = $s->factory
+ Function: return the segment factory
+ Returns : a Bio::DasI object
+ Args    : see below
+ Status  : Public
+
+This method returns a Bio::DasI object that can be used to fetch
+more segments.  This is typically the Bio::DasI object from which
+the segment was originally generated.
+
+=cut
+
+#'
+
+sub factory {shift->throw_not_implemented}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $s->primary_tag
+ Function: identifies the segment as type "DasSegment"
+ Returns : a string named "DasSegment"
+ Args    : none
+ Status  : Public, but see below
+
+This method provides Bio::Das::Segment objects with a primary_tag()
+field that identifies them as being of type "DasSegment".  This allows
+the Bio::Graphics engine to render segments just like a feature in order
+nis way useful.
+
+This does not need to be implemented.  It is defined by the interface.
+
+=cut
+
+#'
+
+sub primary_tag {"DasSegment"}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $s->strand
+ Function: identifies the segment strand as 0
+ Returns : the number 0
+ Args    : none
+ Status  : Public, but see below
+
+This method provides Bio::Das::Segment objects with a strand() field
+that identifies it as being strandless.  This allows the Bio::Graphics
+engine to render segments just like a feature in order nis way useful.
+
+This does not need to be implemented.  It is defined by the interface.
+
+=cut
+
+sub strand      { 0 }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DasI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DasI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DasI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,436 @@
+# $Id: DasI.pm,v 1.18.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::DasI
+#
+# Cared for by Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::DasI - DAS-style access to a feature database
+
+=head1 SYNOPSIS
+
+  # Open up a feature database somehow...
+  $db = Bio::DasI->new(@args);
+
+  @segments = $db->segment(-name  => 'NT_29921.4',
+                           -start => 1,
+			   -end   => 1000000);
+
+  # segments are Bio::Das::SegmentI - compliant objects
+
+  # fetch a list of features
+  @features = $db->features(-type=>['type1','type2','type3']);
+
+  # invoke a callback over features
+  $db->features(-type=>['type1','type2','type3'],
+                -callback => sub { ... }
+		);
+
+  $stream   = $db->get_seq_stream(-type=>['type1','type2','type3']);
+  while (my $feature = $stream->next_seq) {
+     # each feature is a Bio::SeqFeatureI-compliant object
+  }
+
+  # get all feature types
+  @types   = $db->types;
+
+  # count types
+  %types   = $db->types(-enumerate=>1);
+
+  @feature = $db->get_feature_by_name($class=>$name);
+  @feature = $db->get_feature_by_target($target_name);
+  @feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2);
+  $feature = $db->get_feature_by_id($id);
+
+  $error = $db->error;
+
+=head1 DESCRIPTION
+
+Bio::DasI is a simplified alternative interface to sequence annotation
+databases used by the distributed annotation system (see
+L<Bio::Das>). In this scheme, the genome is represented as a series of
+features, a subset of which are named.  Named features can be used as
+reference points for retrieving "segments" (see L<Bio::Das::SegmentI>),
+and these can, in turn, be used as the basis for exploring the genome
+further.
+
+In addition to a name, each feature has a "class", which is
+essentially a namespace qualifier and a "type", which describes what
+type of feature it is.  Das uses the GO consortium's ontology of
+feature types, and so the type is actually an object of class
+Bio::Das::FeatureTypeI (see L<Bio::Das::FeatureTypeI>). Bio::DasI
+provides methods for interrogating the database for the types it
+contains and the counts of each type.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::DasI;
+use strict;
+
+use Bio::Das::SegmentI;
+# Object preamble - inherits from Bio::Root::Root;
+use base qw(Bio::Root::RootI Bio::SeqFeature::CollectionI);
+
+=head2 new
+
+ Title   : new
+ Usage   : Bio::DasI->new(@args)
+ Function: Create new Bio::DasI object
+ Returns : a Bio::DasI object
+ Args    : see below
+
+The new() method creates a new object.  The argument list is either a
+single argument consisting of a connection string, or the following
+list of -name=E<gt>value arguments:
+
+   Argument        Description
+   --------        -----------
+
+   -dsn            Connection string for database
+   -adaptor        Name of an adaptor class to use when connecting
+   -aggregator     Array ref containing list of aggregators
+                     "semantic mappers" to apply to database
+   -user           Authentication username
+   -pass           Authentication password
+
+Implementors of DasI may add other arguments.
+
+=cut
+
+sub new {shift->throw_not_implemented}
+
+=head2 types
+
+ Title   : types
+ Usage   : $db->types(@args)
+ Function: return list of feature types in database
+ Returns : a list of Bio::Das::FeatureTypeI objects
+ Args    : see below
+
+This routine returns a list of feature types known to the database. It
+is also possible to find out how many times each feature occurs.
+
+Arguments are -option=E<gt>value pairs as follows:
+
+  -enumerate  if true, count the features
+
+The returned value will be a list of Bio::Das::FeatureTypeI objects
+(see L<Bio::Das::FeatureTypeI>.
+
+If -enumerate is true, then the function returns a hash (not a hash
+reference) in which the keys are the stringified versions of
+Bio::Das::FeatureTypeI and the values are the number of times each
+feature appears in the database.
+
+=cut
+
+sub types {  shift->throw_not_implemented; }
+
+=head2 parse_types
+
+ Title   : parse_types
+ Usage   : $db->parse_types(@args)
+ Function: parses list of types
+ Returns : an array ref containing ['method','source'] pairs
+ Args    : a list of types in 'method:source' form
+ Status  : internal
+
+This method takes an array of type names in the format "method:source"
+and returns an array reference of ['method','source'] pairs.  It will
+also accept a single argument consisting of an array reference with
+the list of type names.
+
+=cut
+
+# turn feature types in the format "method:source" into a list of [method,source] refs
+sub parse_types {
+  my $self  = shift;
+  return []   if !@_ or !defined($_[0]);
+  return $_[0] if ref $_[0] eq 'ARRAY' && ref $_[0][0];
+  my @types = ref($_[0]) ? @{$_[0]} : @_;
+  my @type_list = map { [split(':',$_,2)] } @types;
+  return \@type_list;
+}
+
+=head2 segment
+
+ Title   : segment
+ Usage   : $db->segment(@args);
+ Function: create a segment object
+ Returns : segment object(s)
+ Args    : see below
+
+This method generates a Bio::Das::SegmentI object (see
+L<Bio::Das::SegmentI>).  The segment can be used to find overlapping
+features and the raw sequence.
+
+When making the segment() call, you specify the ID of a sequence
+landmark (e.g. an accession number, a clone or contig), and a
+positional range relative to the landmark.  If no range is specified,
+then the entire region spanned by the landmark is used to generate the
+segment.
+
+Arguments are -option=E<gt>value pairs as follows:
+
+ -name         ID of the landmark sequence.
+
+ -class        A namespace qualifier.  It is not necessary for the
+               database to honor namespace qualifiers, but if it
+               does, this is where the qualifier is indicated.
+
+ -version      Version number of the landmark.  It is not necessary for
+               the database to honor versions, but if it does, this is
+               where the version is indicated.
+
+ -start        Start of the segment relative to landmark.  Positions
+               follow standard 1-based sequence rules.  If not specified,
+               defaults to the beginning of the landmark.
+
+ -end          End of the segment relative to the landmark.  If not specified,
+               defaults to the end of the landmark.
+
+The return value is a list of Bio::Das::SegmentI objects.  If the method
+is called in a scalar context and there are no more than one segments
+that satisfy the request, then it is allowed to return the segment.
+Otherwise, the method must throw a "multiple segment exception".
+
+=cut
+
+#'
+
+sub segment { shift->throw_not_implemented }
+
+=head2 features
+
+ Title   : features
+ Usage   : $db->features(@args)
+ Function: get all features, possibly filtered by type
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : public
+
+This routine will retrieve features in the database regardless of
+position.  It can be used to return all features, or a subset based on
+their type
+
+Arguments are -option=E<gt>value pairs as follows:
+
+  -types     List of feature types to return.  Argument is an array
+             of Bio::Das::FeatureTypeI objects or a set of strings
+             that can be converted into FeatureTypeI objects.
+
+  -callback   A callback to invoke on each feature.  The subroutine
+              will be passed each Bio::SeqFeatureI object in turn.
+
+  -attributes A hash reference containing attributes to match.
+
+The -attributes argument is a hashref containing one or more attributes
+to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple exact string matching, and multiple
+attributes are ANDed together.  See L<Bio::DB::ConstraintsI> for a
+more sophisticated take on this.
+
+If one provides a callback, it will be invoked on each feature in
+turn.  If the callback returns a false value, iteration will be
+interrupted.  When a callback is provided, the method returns undef.
+
+=cut
+
+sub features { shift->throw_not_implemented }
+
+=head2 get_feature_by_name
+
+ Title   : get_feature_by_name
+ Usage   : $db->get_feature_by_name(-class=>$class,-name=>$name)
+ Function: fetch features by their name
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch named feature(s) from the database.
+The -class and -name arguments have the same meaning as in segment(),
+and the method also accepts the following short-cut forms:
+
+  1) one argument: the argument is treated as the feature name
+  2) two arguments: the arguments are treated as the class and name
+     (note: this uses _rearrange() so the first argument must not
+     begin with a hyphen or it will be interpreted as a named
+     argument).
+
+This method may return zero, one, or several Bio::SeqFeatureI objects.
+The implementor may allow the name to contain wildcards, in which case
+standard C-shell glob semantics are expected.
+
+=cut
+
+sub get_feature_by_name {
+  shift->throw_not_implemented();
+}
+
+=head2 get_feature_by_target
+
+ Title   : get_feature_by_target
+ Usage   : $db->get_feature_by_target($class => $name)
+ Function: fetch features by their similarity target
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch a named feature from the database
+based on its similarity hit.  The arguments are the same as
+get_feature_by_name().  If this is not implemented, the interface
+defaults to using get_feature_by_name().
+
+=cut
+
+sub get_feature_by_target {
+  shift->get_feature_by_name(@_);
+}
+
+=head2 get_feature_by_id
+
+ Title   : get_feature_by_id
+ Usage   : $db->get_feature_by_target($id)
+ Function: fetch a feature by its ID
+ Returns : a Bio::SeqFeatureI objects
+ Args    : the ID of the feature
+ Status  : public
+
+If the database provides unique feature IDs, this can be used to
+retrieve a single feature from the database.  If not overridden, this
+interface calls get_feature_by_name() and returns the first element.
+
+=cut
+
+sub get_feature_by_id {
+  (shift->get_feature_by_name(@_))[0];
+}
+
+=head2 get_feature_by_attribute
+
+ Title   : get_feature_by_attribute
+ Usage   : $db->get_feature_by_attribute(attribute1=>value1,attribute2=>value2)
+ Function: fetch features by combinations of attribute values
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : the class and name of the desired feature
+ Status  : public
+
+This method can be used to fetch a set of features from the database.
+Attributes are a list of name=E<gt>value pairs.  They will be
+logically ANDed together.  If an attribute value is an array
+reference, the list of values in the array is treated as an
+alternative set of values to be ORed together.
+
+=cut
+
+sub get_feature_by_attribute {
+  shift->throw_not_implemented();
+}
+
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : $db->search_notes($search_term,$max_results)
+ Function: full-text search on features, ENSEMBL-style
+ Returns : an array of [$name,$description,$score]
+ Args    : see below
+ Status  : public
+
+This routine performs a full-text search on feature attributes (which
+attributes depend on implementation) and returns a list of
+[$name,$description,$score], where $name is the feature ID,
+$description is a human-readable description such as a locus line, and
+$score is the match strength.
+
+Since this is a decidedly non-standard thing to do (but the generic
+genome browser uses it), the default method returns an empty list.
+You do not have to implement it.
+
+=cut
+
+sub search_notes { return }
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $seqio = $db->get_seq_stream(@args)
+ Function: Performs a query and returns an iterator over it
+ Returns : a Bio::SeqIO stream capable of returning Bio::SeqFeatureI objects
+ Args    : As in features()
+ Status  : public
+
+This routine takes the same arguments as features(), but returns a
+Bio::SeqIO::Stream-compliant object.  Use it like this:
+
+  $stream = $db->get_seq_stream('exon');
+  while (my $exon = $stream->next_seq) {
+     print $exon,"\n";
+  }
+
+NOTE: In the interface this method is aliased to get_feature_stream(),
+as the name is more descriptive.
+
+=cut
+
+sub get_seq_stream { shift->throw_not_implemented }
+sub get_feature_stream {shift->get_seq_stream(@_) }
+
+=head2 refclass
+
+ Title   : refclass
+ Usage   : $class = $db->refclass
+ Function: returns the default class to use for segment() calls
+ Returns : a string
+ Args    : none
+ Status  : public
+
+For data sources which use namespaces to distinguish reference
+sequence accessions, this returns the default namespace (or "class")
+to use.  This interface defines a default of "Accession".
+
+=cut
+
+sub refclass { "Accession" }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/DescribableI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/DescribableI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/DescribableI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+# $Id: DescribableI.pm,v 1.10.4.1 2006/10/02 23:10:12 sendu Exp $
+
+#
+# This module is licensed under the same terms as Perl itself. You use,
+# modify, and redistribute it under the terms of the Perl Artistic License.
+#
+
+=head1 NAME
+
+Bio::DescribableI - interface for objects with human readable names and descriptions
+
+=head1 SYNOPSIS
+
+
+    # to test this is a describable object
+
+    $obj->isa("Bio::DescribableI") || 
+      $obj->throw("$obj does not implement the Bio::DescribableI interface");
+
+    # accessors
+
+    $name = $obj->display_name();
+    $desc = $obj->description();
+
+
+
+=head1 DESCRIPTION
+
+This interface describes methods expected on describable objects, ie
+ones which have human displayable names and descriptions
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at sanger.ac.uk
+
+=cut
+
+package Bio::DescribableI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking 
+           this is a good idea)
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub display_name {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a 
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub description {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventGeneratorI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventGeneratorI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventGeneratorI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,82 @@
+# $Id: EventGeneratorI.pm,v 1.11.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Event::EventGeneratorI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Event::EventGeneratorI - This interface describes the basic event
+generator class.
+
+=head1 SYNOPSIS
+
+    # Do not use this object directly
+    # This object has the basic methods for describing an event generator
+
+=head1 DESCRIPTION
+
+This object describes the basic event generator system.  It basically
+allows one to attach one or many event handlers.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Event::EventGeneratorI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : Bio::Event::EventHandlerI
+
+=cut
+
+sub attach_EventHandler{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventHandlerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventHandlerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Event/EventHandlerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,206 @@
+# $Id: EventHandlerI.pm,v 1.9.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Event::EventHandlerI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Event::EventHandlerI - An Event Handler Interface
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::SearchIO::SearchResultEventHandler for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+This interface describes the basic methods required for
+EventHandlers.  These are essentially SAX methods. 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Event::EventHandlerI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+sub will_handle{
+   my ($self,$type) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document();
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document();
+ Function: Handle an end document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'within' because 'in' tests only
+           if one has reached a specific element.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Contact.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Contact.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Contact.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# $Id: Contact.pm,v 1.4.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Expression::Contact
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::Contact - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Expression::Contact;
+use strict;
+use base qw(Bio::Root::Root);
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::Expression::Contact();
+ Function: Builds a new Bio::Expression::Contact object 
+ Returns : an instance of Bio::Expression::Contact
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize(@args);
+  return $self;
+}
+
+=head2 _initialize()
+
+ Usage   : $obj->_initialize(%arg);
+ Function: Internal method to initialize a new Bio::Expression::Contact object
+ Returns : true on success
+ Args    : passed through to new()
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  foreach my $arg (keys %arg){
+    my $marg = $arg;
+    $marg =~ s/^-//;
+    $self->$marg($arg{$arg}) if $self->can($marg);
+  }
+
+  return 1;
+}
+
+=head2 source()
+
+ Usage   : $obj->source($newval)
+ Function: 
+ Example : 
+ Returns : value of source (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source {
+  my($self,$val) = @_;
+  $self->{'source'} = $val if defined($val);
+  return $self->{'source'};
+}
+
+=head2 accession()
+
+ Usage   : $obj->accession($newval)
+ Function: 
+ Example : 
+ Returns : value of accession (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub accession {
+  my($self,$val) = @_;
+  $self->{'accession'} = $val if defined($val);
+  return $self->{'accession'};
+}
+
+=head2 name()
+
+ Usage   : $obj->name($newval)
+ Function: 
+ Example : 
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub name {
+  my($self,$val) = @_;
+  $self->{'name'} = $val if defined($val);
+  return $self->{'name'};
+}
+
+=head2 db()
+
+ Usage   : $obj->db($newval)
+ Function: 
+ Example : 
+ Returns : value of db (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub db {
+  my($self,$val) = @_;
+  $self->{'db'} = $val if defined($val);
+  return $self->{'db'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/DataSet.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/DataSet.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/DataSet.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,329 @@
+# $Id: DataSet.pm,v 1.3.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Expression::DataSet
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::DataSet - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Expression::DataSet;
+use strict;
+use base qw(Bio::Root::Root);
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::Expression::DataSet();
+ Function: Builds a new Bio::Expression::DataSet object 
+ Returns : an instance of Bio::Expression::DataSet
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize(@args);
+  return $self;
+}
+
+=head2 _initialize()
+
+ Usage   : $obj->_initialize(%arg);
+ Function: Internal method to initialize a new Bio::Expression::DataSet object
+ Returns : true on success
+ Args    : passed through to new()
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  foreach my $arg (keys %arg){
+    my $marg = $arg;
+    $marg =~ s/^-//;
+    $self->$marg($arg{$arg}) if $self->can($marg);
+  }
+
+  return 1;
+}
+
+=head2 accession()
+
+ Usage   : $obj->accession($newval)
+ Function: 
+ Example : 
+ Returns : value of accession (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub accession {
+  my($self,$val) = @_;
+  $self->{'accession'} = $val if defined($val);
+  return $self->{'accession'};
+}
+
+=head2 name()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub name {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'db'} = $val if defined($val);
+  return $self->{'db'};
+}
+
+=head2 db()
+
+ Usage   : $obj->db($newval)
+ Function: 
+ Example : 
+ Returns : value of db (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub db {
+  my($self,$val) = @_;
+  $self->{'db'} = $val if defined($val);
+  return $self->{'db'};
+}
+
+=head2 pubmed_id()
+
+ Usage   : $obj->pubmed_id($newval)
+ Function: 
+ Example : 
+ Returns : value of pubmed_id (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub pubmed_id {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'pubmed_id'} = $val if defined($val);
+  return $self->{'pubmed_id'};
+}
+
+=head2 web_link()
+
+ Usage   : $obj->web_link($newval)
+ Function: 
+ Example : 
+ Returns : value of web_link (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub web_link {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'web_link'} = $val if defined($val);
+  return $self->{'web_link'};
+}
+
+=head2 contact()
+
+ Usage   : $obj->contact($newval)
+ Function: 
+ Example : 
+ Returns : value of contact (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub contact {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'contact'} = $val if defined($val);
+  return $self->{'contact'};
+}
+
+=head2 samples()
+
+ Usage   : $obj->samples($newval)
+ Function: 
+ Example : 
+ Returns : value of samples (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub samples {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'samples'} = $val if defined($val);
+  return $self->{'samples'};
+}
+
+=head2 description()
+
+ Usage   : $obj->description($newval)
+ Function: 
+ Example : 
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub description {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'description'} = $val if defined($val);
+  return $self->{'description'};
+}
+
+=head2 design()
+
+ Usage   : $obj->design($newval)
+ Function: 
+ Example : 
+ Returns : value of design (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub design {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'design'} = $val if defined($val);
+  return $self->{'design'};
+}
+
+=head2 design_description()
+
+ Usage   : $obj->design_description($newval)
+ Function: 
+ Example : 
+ Returns : value of design_description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub design_description {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'design_description'} = $val if defined($val);
+  return $self->{'design_description'};
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+=head2 get_samples()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub get_samples {
+  my ($self, at args) = @_;
+  if ( $self->samples() ) {
+    return @{ $self->samples() };
+  }
+  else {
+    return ();
+  }
+}
+
+
+
+
+
+
+
+sub _load {
+  my $self = shift;
+  if ( $self->{'_load'} ) {
+    return 1;
+  }
+  $self->{'_load'}++;
+  $self->db->fill_dataset( $self );
+  return $self->{'_load'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup/FeatureGroupMas50.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup/FeatureGroupMas50.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup/FeatureGroupMas50.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+# Let the code begin...
+package Bio::Expression::FeatureGroup::FeatureGroupMas50;
+
+# $Id: FeatureGroupMas50.pm,v 1.2.8.1 2006/10/02 23:10:17 sendu Exp $
+
+=head1 NAME
+
+Bio::Expression::FeatureGroup::FeatureGroupMas50
+
+=cut
+
+use strict;
+
+use base qw(Bio::Expression::FeatureGroup);
+use vars qw($DEBUG);
+
+use Class::MakeMethods::Emulator::MethodMaker
+  get_set => [qw(
+  
+  probe_set_name stat_pairs stat_pairs_used
+  signal detection detection_p_value
+  stat_common_pairs signal_log_ratio
+  signal_log_ratio_low
+  signal_log_ratio_high change change_p_value
+  positive negative pairs pairs_used
+  pairs_inavg pos_fraction log_avg
+  pos_neg avg_diff abs_call inc dec
+  inc_ratio dec_ratio pos_change
+  neg_change inc_dec dpos_dneg_ratio
+  log_avg_ratio_change diff_call
+  avg_diff_change b_a fold_change
+  sort_score		 
+
+  )],
+;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureGroup.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,286 @@
+# $Id: FeatureGroup.pm,v 1.8 2006/07/04 22:23:15 mauricio Exp $
+# BioPerl module for Bio::Expression::FeatureGroup
+#
+# Copyright Allen Day <allenday at ucla.edu>, Stanley Nelson <snelson at ucla.edu>
+# Human Genetics, UCLA Medical School, University of California, Los Angeles
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::FeatureGroup - a set of DNA/RNA features.  ISA
+Bio::Expression::FeatureI
+
+=head1 SYNOPSIS
+
+#
+
+=head1 DESCRIPTION
+
+A set of DNA/RNA features.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::Expression::FeatureGroup;
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::Expression::FeatureI);
+use vars qw($DEBUG);
+
+=head2 new
+
+ Title   : new
+ Usage   : $featuregroup = Bio::Expression::FeatureGroup->new(%args);
+ Function: create a new featuregroup object
+ Returns : a Bio::Expression::FeatureGroup object
+ Args    : an optional hash of parameters to be used in initialization:
+           -id    --  the featuregroup ID
+           -type  --  the featuregroup type
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = bless {}, $class;
+  $self->_initialize(@args);
+  return $self;
+}
+
+=head2 _initialize
+
+ Title   : _initialize
+ Usage   : $featuregroup->_initialize(@args);
+ Function: initialize the featuregroup object
+ Returns : nothing
+ Args    : @args
+
+=cut
+
+sub _initialize{
+  my ($self, at args) = @_;
+  my %param = @args;
+
+  $self->type($param{-type});
+  $self->id($param{-id}    );
+
+  $self->SUPER::_initialize(@args);
+  $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $featuregroup->type($optional_arg);
+ Function: get/set the type of the featuregroup
+ Comments: this is probably going to be a string like
+           "quality control", "mismatch blah blah", etc.
+ Returns : the featuregroup type
+ Args    : a new value for the featuregroup type
+
+=cut
+
+sub type {
+  my $self = shift;
+  $self->{type} = shift if @_;
+  return $self->{type};
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $featuregroup->id($optional_arg);
+ Function: get/set the id of the featuregroup
+ Returns : the featuregroup id
+ Args    : a new value for the featuregroup id
+
+=cut
+
+sub id {
+  my $self = shift;
+  $self->{id} = shift if @_;
+  return $self->{id};
+}
+
+
+=head2 standard_deviation
+
+ Title   : standard_deviation
+ Usage   : $featuregroup->standard_deviation($optional_arg);
+ Function: get/set the standard deviation of the featuregroup value
+ Returns : the featuregroup standard deviation
+ Args    : a new value for the featuregroup standard deviation
+ Notes   : this method does no calculation, it merely holds a value
+
+=cut
+
+sub standard_deviation {
+  my $self = shift;
+  $self->{standard_deviation} = shift if @_;
+  return $self->{standard_deviation};
+}
+
+=head2 quantitation
+
+ Title   : quantitation
+ Usage   : $featuregroup->quantitation($optional_arg);
+ Function: get/set the quantitation of the featuregroup
+ Returns : the featuregroup's quantitated value
+ Args    : a new value for the featuregroup's quantitated value
+ Notes   : this method does no calculation, it merely holds a value
+
+=cut
+
+sub quantitation {
+  my $self = shift;
+  $self->{quantitation} = shift if @_;
+  return $self->{quantitation};
+}
+
+=head2 quantitation_units
+
+ Title   : quantitation_units
+ Usage   : $featuregroup->quantitation_units($optional_arg);
+ Function: get/set the quantitation units of the featuregroup
+ Returns : the featuregroup's quantitated value units
+ Args    : a new value for the featuregroup's quantitated value units
+
+=cut
+
+sub quantitation_units {
+  my $self = shift;
+  $self->{quantitation_units} = shift if @_;
+  return $self->{quantitation_units};
+}
+
+=head2 presence
+
+ Title   : presence
+ Usage   : $featuregroup->presence($optional_arg);
+ Function: get/set the presence call of the featuregroup
+ Returns : the featuregroup's presence call
+ Args    : a new value for the featuregroup's presence call
+
+=cut
+
+sub presence {
+  my $self = shift;
+  $self->{presence} = shift if @_;
+  return $self->{presence};
+}
+
+=head2 add_feature
+
+ Title   : add_feature
+ Usage   : $feature_copy = $featuregroup->add_feature($feature);
+ Function: add a feature to the featuregroup
+ Returns : see this_feature()
+ Args    : a Bio::Expression::FeatureI compliant object
+
+=cut
+
+sub add_feature {
+  my($self, at args) = @_;
+  foreach my $feature (@args){
+	$self->throw('Features must be Bio::Expression::FeatureI compliant') unless $feature->isa('Bio::Expression::FeatureI');
+    push @{$self->{features}}, $feature;
+  }
+
+  return $self->{features} ? $self->{features}->[-1] : undef;
+}
+
+=head2 this_feature
+
+ Title   : this_feature
+ Usage   : $feature = $featuregroup->this_feature
+ Function: access the last feature added to the featuregroup
+ Returns : the last feature added to the featuregroup
+ Args    : none
+
+=cut
+
+sub this_feature {
+  my $self = shift;
+  return $self->{features} ? $self->{features}->[-1] : undef;
+}
+
+=head2 each_feature
+
+ Title   : each_feature
+ Usage   : @features = $featuregroup->each_feature
+ Function: returns a list of Bio::Expression::FeatureI compliant
+           objects
+ Returns : a list of objects
+ Args    : none
+
+=cut
+
+sub each_feature {
+  my $self = shift;
+  return @{$self->{features}} if defined($self->{features});
+  return ();
+}
+
+=head2 each_feature_quantitation
+
+ Title   : each_feature_quantitation
+ Usage   : @featurequantitions = $featuregroup->each_feature_quantitation;
+ Function: returns an list of quantitations of the features in the featuregroup
+ Returns : a list of numeric values
+ Args    : none
+
+=cut
+
+sub each_feature_quantitation {
+  my $self = shift;
+  my @values = ();
+  push @values, $_->value foreach $self->each_feature;
+  return @values;
+}
+
+=head2 is_qc
+
+ Title   : is_qc
+ Usage   : $is_quality_control = $featuregroup->is_qc
+ Function: get/set whether or not the featuregroup is used for quality control purposes
+ Returns : a boolean (equivalent)
+ Args    : a new value
+
+=cut
+
+sub is_qc {
+  my $self = shift;
+  $self->{is_qc} = shift if defined @_;
+  return $self->{is_qc};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+# $Id: FeatureI.pm,v 1.9 2006/07/04 22:23:15 mauricio Exp $
+# BioPerl module for Bio::Expression::FeatureI
+#
+# Copyright Allen Day <allenday at ucla.edu>, Stan Nelson <snelson at ucla.edu>
+# Human Genetics, UCLA Medical School, University of California, Los Angeles
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::FeatureI - an interface class for DNA/RNA features
+
+=head1 SYNOPSIS
+
+Do not use this module directly
+
+=head1 DESCRIPTION
+
+This provides a standard bioperl interface class for representing
+DNA and RNA features.  It cannot be instantiated directly, but serves
+as an abstract base class for implementors.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::Expression::FeatureI;
+
+use strict;
+use Bio::Root::RootI;
+
+use base qw(Bio::Root::RootI Bio::PrimarySeqI);
+use vars qw($DEBUG);
+
+=head2 quantitation()
+
+  Title   : value
+  Usage   : $val = $ftr->quantitation()
+  Function: get/set the feature's quantitation
+  Returns : A numeric value
+  Args    : a new numeric value (optional)
+
+=cut
+
+sub quantitation {
+  shift->throw_not_implemented();
+}
+
+=head2 quantitation_units()
+
+  Title   : quantitation_units
+  Usage   : $units = $ftr->quantitation_units()
+  Function: get/set the units of the feature's quantitation
+  Returns : A string or undef
+  Args    : a new string (optional)
+
+=cut
+
+sub quantitation_units {
+  shift->throw_not_implemented();
+}
+
+=head2 standard_deviation()
+
+  Title   : standard_deviation
+  Usage   : $std_dev = $ftr->standard_deviation()
+  Function: get/set the feature's standard deviation of quantitation()
+  Returns : A numeric value
+  Args    : a new numeric value (optional)
+  Comments: no calculation is done here
+
+=cut
+
+sub standard_deviation {
+  shift->throw_not_implemented();
+}
+
+=head2 sample_count()
+
+  Title   : sample_count
+  Usage   : $sample_count = $ftr->sample_count()
+  Function: get/set the number of samples used to calculate
+            quantitation()
+  Returns : An integer
+  Args    : a new integer (optional)
+
+=cut
+
+sub sample_count {
+  shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/FeatureSetMas50.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/FeatureSetMas50.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/FeatureSetMas50.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+# Let the code begin...
+package Bio::Expression::FeatureSet::FeatureSetMas50;
+
+# $Id: FeatureSetMas50.pm,v 1.2.6.1 2006/10/02 23:10:18 sendu Exp $
+
+=head1 NAME
+
+Bio::Expression::FeatureSet::FeatureSetMas50
+
+=cut
+
+use strict;
+
+use base qw(Bio::Expression::FeatureSet);
+use vars qw($DEBUG);
+
+use Class::MakeMethods::Emulator::MethodMaker
+  get_set => [qw(
+  
+  probe_set_name stat_pairs stat_pairs_used
+  signal detection detection_p-value
+  stat_common_pairs signal_log_ratio
+  signal_log_ratio_low
+  signal_log_ratio_high change change_p-value
+  positive negative pairs pairs_used
+  pairs_inavg pos_fraction log_avg
+  pos_neg avg_diff abs_call inc dec
+  inc_ratio dec_ratio pos_change
+  neg_change inc_dec dpos-dneg_ratio
+  log_avg_ratio_change diff_call
+  avg_diff_change b_a fold_change
+  sort_score		 
+
+  )],
+;
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/FeatureSet/FeatureSetMas50.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Platform.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Platform.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Platform.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,247 @@
+# $Id: Platform.pm,v 1.4.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Expression::Platform
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::Platform - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Expression::Platform;
+use strict;
+use base qw(Bio::Root::Root);
+use Bio::DB::Taxonomy;
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::Expression::Platform();
+ Function: Builds a new Bio::Expression::Platform object 
+ Returns : an instance of Bio::Expression::Platform
+ Args    :
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize(@args);
+  return $self;
+}
+
+=head2 _initialize()
+
+ Usage   : $obj->_initialize(%arg);
+ Function: Internal method to initialize a new Bio::Expression::Platform object
+ Returns : true on success
+ Args    : passed through to new()
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  foreach my $arg (keys %arg){
+    my $marg = $arg;
+    $marg =~ s/^-//;
+    $self->$marg($arg{$arg}) if $self->can($marg);
+  }
+
+  $self->taxdb( Bio::DB::Taxonomy->new(-source => 'entrez') );
+  return 1;
+}
+
+
+
+=head2 get_datasets()
+
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub get_datasets {
+  my ($self, at args) = @_;
+  my $db = $self->db();
+
+  my @datasets = $db->get_datasets( $self );
+
+  return @datasets;
+}
+
+=head2 accession()
+
+ Usage   : $obj->accession($newval)
+ Function: 
+ Example : 
+ Returns : value of accession (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub accession {
+  my($self,$val) = @_;
+  $self->{'accession'} = $val if defined($val);
+  return $self->{'accession'};
+}
+
+=head2 name()
+
+ Usage   : $obj->name($newval)
+ Function: 
+ Example : 
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub name {
+  my($self,$val) = @_;
+  $self->{'name'} = $val if defined($val);
+  return $self->{'name'};
+}
+
+=head2 taxon()
+
+ Usage   : $obj->taxon()
+ Function: 
+ Example : 
+ Returns : A Bio::Taxonomy::Node object
+ Args    : none
+
+
+=cut
+
+sub taxon {
+  my($self) = @_;
+  if ( ! $self->{'taxon'} ) {
+    $self->{'taxon'} = $self->taxdb->get_Taxonomy_Node( $self->_taxon_id() );
+  }
+  return $self->{'taxon'};
+}
+
+=head2 contact()
+
+ Usage   : $obj->contact($newval)
+ Function: 
+ Example : 
+ Returns : a Bio::Expression::Contact object
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub contact {
+  my($self,$val) = @_;
+  $self->{'contact'} = $val if defined($val);
+  return $self->{'contact'};
+}
+
+=head2 db()
+
+ Usage   : $obj->db($newval)
+ Function: 
+ Example : 
+ Returns : value of db (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub db {
+  my($self,$val) = @_;
+  $self->{'db'} = $val if defined($val);
+  return $self->{'db'};
+}
+
+=head2 _taxon_id()
+
+ Usage   : $obj->_taxon_id($newval)
+ Function: 
+ Example : 
+ Returns : value of _taxon_id (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub _taxon_id {
+  my($self,$val) = @_;
+  $self->{'_taxon_id'} = $val if defined($val);
+  return $self->{'_taxon_id'};
+}
+
+=head2 taxdb()
+
+ Usage   : $obj->taxdb($newval)
+ Function: 
+ Example : 
+ Returns : a Bio::DB::Taxonomy object
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub taxdb {
+  my($self,$val) = @_;
+  $self->{'taxdb'} = $val if defined($val);
+  return $self->{'taxdb'};
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/ProbeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/ProbeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/ProbeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+# $Id: ProbeI.pm,v 1.8.6.1 2006/08/20 22:03:29 sendu Exp $
+# BioPerl module for Bio::Expression::ProbeI
+#
+# Copyright Allen Day <allenday at ucla.edu>, Stan Nelson <snelson at ucla.edu>
+# Human Genetics, UCLA Medical School, University of California, Los Angeles
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::ProbeI - an interface class for DNA/RNA probes
+
+=head1 SYNOPSIS
+
+Do not use this module directly
+
+=head1 DESCRIPTION
+
+This class ISA Bio::Expression::FeatureI, nothing more.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org            - General discussion
+  http://bioperl.org/MailList.shtml - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via email or the web:
+
+  bioperl-bugs at bio.perl.org
+  http://bio.perl.org/bioperl-bugs/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::Expression::ProbeI;
+
+use strict;
+use Bio::Root::Root;
+
+use base qw(Bio::Expression::FeatureI);
+use vars qw($DEBUG);
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Sample.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Sample.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Expression/Sample.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,240 @@
+# $Id: Sample.pm,v 1.3.4.1 2006/10/02 23:10:17 sendu Exp $
+#
+# BioPerl module for Bio::Expression::Sample
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Expression::Sample - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Expression::Sample;
+use strict;
+use base qw(Bio::Root::Root);
+
+=head2 new()
+
+ Usage   : my $obj = new Bio::Expression::Sample();
+ Function: Builds a new Bio::Expression::Sample object 
+ Returns : an instance of Bio::Expression::Sample
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize(@args);
+  return $self;
+}
+
+=head2 _initialize()
+
+ Usage   : $obj->_initialize(%arg);
+ Function: Internal method to initialize a new Bio::Expression::Sample object
+ Returns : true on success
+ Args    : passed through to new()
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  foreach my $arg (keys %arg){
+    my $marg = $arg;
+    $marg =~ s/^-//;
+    $self->$marg($arg{$arg}) if $self->can($marg);
+  }
+
+  return 1;
+}
+
+=head2 accession()
+
+ Usage   : $obj->accession($newval)
+ Function: 
+ Example : 
+ Returns : value of accession (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub accession {
+  my($self,$val) = @_;
+  $self->{'accession'} = $val if defined($val);
+  return $self->{'accession'};
+}
+
+=head2 dataset()
+
+ Usage   : $obj->dataset($newval)
+ Function: 
+ Example : 
+ Returns : value of dataset (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub dataset {
+  my($self,$val) = @_;
+  $self->{'dataset'} = $val if defined($val);
+  return $self->{'dataset'};
+}
+
+=head2 db()
+
+ Usage   : $obj->db($newval)
+ Function: 
+ Example : 
+ Returns : value of db (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub db {
+  my($self,$val) = @_;
+  $self->{'db'} = $val if defined($val);
+  return $self->{'db'};
+}
+
+
+
+
+
+
+=head2 name()
+
+ Usage   : $obj->name($newval)
+ Function: 
+ Example : 
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub name {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'name'} = $val if defined($val);
+  return $self->{'name'};
+}
+
+=head2 source_name()
+
+ Usage   : $obj->source_name($newval)
+ Function: 
+ Example : 
+ Returns : value of source_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source_name {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'source_name'} = $val if defined($val);
+  return $self->{'source_name'};
+}
+
+=head2 description()
+
+ Usage   : $obj->description($newval)
+ Function: 
+ Example : 
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub description {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'description'} = $val if defined($val);
+  return $self->{'description'};
+}
+
+=head2 treatment_description()
+
+ Usage   : $obj->treatment_description($newval)
+ Function: 
+ Example : 
+ Returns : value of treatment_description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub treatment_description {
+  my($self,$val) = @_;
+  $self->_load();
+  $self->{'treatment_description'} = $val if defined($val);
+  return $self->{'treatment_description'};
+}
+
+sub _load {
+  my $self = shift;
+  if ( $self->{'_load'} ) {
+    return 1;
+  }
+  $self->{'_load'}++;
+  $self->db->fill_sample( $self );
+  return $self->{'_load'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/AnalysisI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/AnalysisI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/AnalysisI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,149 @@
+# $Id: AnalysisI.pm,v 1.7.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::AnalysisI
+#
+# Cared for by Martin Senger <martin.senger at gmail.com>
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::AnalysisI - An interface to analysis tool factory
+
+=head1 SYNOPSIS
+
+This is an interface module - you do not instantiate it.
+Use I<Bio::Tools::Run::AnalysisFactory> module:
+
+  use Bio::Tools::Run::AnalysisFactory;
+  my $list = new Bio::Tools::Run::AnalysisFactory->available_analyses;
+
+=head1 DESCRIPTION
+
+This interface contains all public methods for showing available
+analyses and for creating objects representing them.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003, Martin Senger and EMBL-EBI.
+All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+=over
+
+=item *
+
+http://www.ebi.ac.uk/soaplab/Perl_Client.html
+
+=back
+
+=head1 APPENDIX
+
+This is actually the main documentation...
+
+If you try to call any of these methods directly on this
+C<Bio::Factory::AnalysisI> object you will get a I<not implemented>
+error message. You need to call them on a
+C<Bio::Tools::Run::AnalysisFactory> object instead.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Factory::AnalysisI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+
+# -----------------------------------------------------------------------------
+
+=head2 available_categories
+
+ Usage   : $factory->available_categories;
+ Returns : an array reference with the names of
+           available categories
+ Args    : none
+
+The analysis tools may be grouped into categories by their functional
+similarity, or by the similar data types they deal with. This method
+shows all available such categories.
+
+=cut
+
+sub available_categories { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 available_analyses
+
+ Usage   : $factory->available_analyses;
+           $factory->available_analyses ($category);
+ Returns : an array reference with the names of
+           all available analyses, or the analyses
+           available in the given '$category'
+ Args    : none || category_name
+
+Show available analyses. Their names usually consist of category
+analysis names, separated by C<::>.
+
+=cut
+
+sub available_analyses { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 create_analysis
+
+ Usage   : $factory->create_analysis ($name);
+ Returns : a Bio::Tools::Run::Analyis object
+ Args    : analysis name
+
+A real I<factory> method creating an analysis object. The created
+object gets all access and location information from the factory
+object.
+
+=cut
+
+sub create_analysis { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+
+1;
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ApplicationFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ApplicationFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ApplicationFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+# $Id: ApplicationFactoryI.pm,v 1.10.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::ApplicationFactoryI
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::ApplicationFactoryI - Interface class for Application Factories
+
+=head1 SYNOPSIS
+
+You wont be using this as an object, but using a derived class.
+
+=head1 DESCRIPTION
+
+Holds common Application Factory attributes in place.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Factory::ApplicationFactoryI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2  version
+
+ Title   : version
+ Usage   : exit if $prog->version() < 1.8
+ Function: Determine the version number of the program
+ Example :
+ Returns : float or undef
+ Args    : none
+
+=cut
+
+sub version {
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/DriverFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/DriverFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/DriverFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,184 @@
+# $Id: DriverFactory.pm,v 1.15.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::DriverFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org> and
+#              Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Jason Stajich, Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::DriverFactory - Base class for factory classes loading drivers
+
+=head1 SYNOPSIS
+
+ #this class is not instantiable
+
+=head1 DESCRIPTION
+
+This a base class for factory classes that load drivers. Normally, you don't
+instantiate this class directly.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+package Bio::Factory::DriverFactory;
+use strict;
+use Bio::Root::IO;
+
+use vars qw(%DRIVERS);
+
+use base qw(Bio::Root::Root);
+
+BEGIN {
+    %DRIVERS = ();
+}
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+=head2 register_driver
+
+ Title   : register_driver
+ Usage   : $factory->register_driver("genscan", "Bio::Tools::Genscan");
+ Function: Registers a driver a factory class should be able to instantiate.
+
+           This method can be called both as an instance and as a class
+           method.
+
+ Returns : 
+ Args    : Key of the driver (string) and the module implementing the driver
+           (string).
+
+=cut
+
+sub register_driver {
+    my ($self, @args) = @_;
+    my %drivers = @args;
+
+    foreach my $drv (keys(%drivers)) {
+	# note that this doesn't care whether $self is the class or the object
+	$self->driver_table()->{$drv} = $drivers{$drv};
+    }
+}
+
+=head2 driver_table
+
+ Title   : driver_table
+ Usage   : $table = $factory->driver_table();
+ Function: Returns a reference to the hash table storing associations of
+           methods with drivers.
+
+           You use this table to look up registered methods (keys) and
+           drivers (values).
+
+           In this implementation the table is class-specific and therefore
+           shared by all instances. You can override this in a derived class,
+           but note that this method can be called both as an instance and a
+           class method.
+
+           This will be the table used by the object internally. You should
+           definitely know what you're doing if you modify the table's
+           contents. Modifications are shared by _all_ instances, those present
+           and those yet to be created.
+
+ Returns : A reference to a hash table.
+ Args    : 
+
+
+=cut
+
+sub driver_table {
+    my ($self, @args) = @_;
+
+    return \%DRIVERS;
+}
+
+=head2 get_driver
+
+ Title   : get_driver
+ Usage   : $module = $factory->get_driver("genscan");
+ Function: Returns the module implementing a driver registered under the
+           given key.
+ Example : 
+ Returns : A string.
+ Args    : Key of the driver (string).
+
+=cut
+
+sub get_driver {
+    my ($self, $key) = @_;
+
+    if(exists($self->driver_table()->{$key})) {
+	return $self->driver_table()->{$key};
+    }
+    return;
+}
+
+=head2 _load_module
+
+ Title   : _load_module
+ Usage   : $self->_load_module("Bio::Tools::Genscan");
+ Function: Loads up (like use) a module at run time on demand.
+ Example : 
+ Returns : TRUE on success
+ Args    :
+
+=cut
+
+sub _load_module {
+    my ($self, $name) = @_;
+    my ($module, $load, $m);
+    $module = "_<$name.pm";
+    return 1 if $main::{$module};
+    $load = "$name.pm";
+
+    my $io = new Bio::Root::IO();
+    # catfile comes from IO
+    $load = $io->catfile((split(/::/,$load)));
+    eval {
+	require $load;
+    };
+    if ( $@ ) {
+	$self->throw("$load: $name cannot be found: ".$@);
+    }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/FTLocationFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/FTLocationFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/FTLocationFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,303 @@
+# $Id: FTLocationFactory.pm,v 1.21.4.2 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::FTLocationFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+#
+# (c) Hilmar Lapp, hlapp at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::FTLocationFactory - A FeatureTable Location Parser
+
+=head1 SYNOPSIS
+
+    # parse a string into a location object
+    $loc = Bio::Factory::FTLocationFactory->from_string("join(100..200, 
+                                                         400..500");
+
+=head1 DESCRIPTION
+
+Implementation of string-encoded location parsing for the Genbank feature
+table encoding of locations.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl-dot-org
+Chris Fields, cjfields-at-uiuc-dot-edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Factory::FTLocationFactory;
+use vars qw($LOCREG);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Location::Simple;
+use Bio::Location::Split;
+use Bio::Location::Fuzzy;
+
+
+use base qw(Bio::Root::Root Bio::Factory::LocationFactoryI);
+
+BEGIN {
+    # the below is an optimized regex obj. from J. Freidl's Mastering Reg Exp.
+    $LOCREG = qr{
+                (?>
+                [^()]+
+                |
+                \(
+                (??{$LOCREG})
+                \)
+                )*
+                }x;     
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Factory::FTLocationFactory();
+ Function: Builds a new Bio::Factory::FTLocationFactory object 
+ Returns : an instance of Bio::Factory::FTLocationFactory
+ Args    :
+
+=cut
+
+=head2 from_string
+
+ Title   : from_string
+ Usage   : $loc = $locfactory->from_string("100..200");
+ Function: Parses the given string and returns a Bio::LocationI implementing
+           object representing the location encoded by the string.
+
+           This implementation parses the Genbank feature table
+           encoding of locations.
+ Example :
+ Returns : A Bio::LocationI implementing object.
+ Args    : A string.
+
+=cut
+
+sub from_string {
+    my ($self,$locstr,$op) = @_;
+    my $loc;
+    
+    #$self->debug("$locstr\n");
+    
+    # $op for operator (error handling)
+    
+    # run on first pass only
+    # Note : These location types are now deprecated in GenBank (Oct. 2006)
+    if (!defined($op)) {
+        # convert all (X.Y) to [X.Y]
+        $locstr =~ s{\((\d+\.\d+)\)}{\[$1\]}g;
+        # convert ABC123:(X..Y) to ABC123:[X..Y]
+        # we should never see the above
+        $locstr =~ s{:\((\d+\.{2}\d+)\)}{:\[$1\]}g;
+    }
+    
+    if ($locstr =~ m{(.*?)\(($LOCREG)\)(.*)}o) { # any matching parentheses?
+
+        my ($beg, $mid, $end) = ($1, $2, $3);
+        my (@sublocs) = (split(q(,),$beg), $mid, split(q(,),$end));
+        
+        my @loc_objs;
+        my $loc_obj;
+        
+        SUBLOCS:
+        while (@sublocs) {
+            my $subloc = shift @sublocs;
+            next if !$subloc;
+            my $oparg = ($subloc eq 'join'   || $subloc eq 'bond' ||
+                         $subloc eq 'order'  || $subloc eq 'complement') ? $subloc : undef;
+
+            # has operator, requires further work (recurse)
+            if ($oparg) {
+                my $sub = shift @sublocs;
+                if (($oparg eq 'join' || $oparg eq 'order' || $oparg eq 'bond' )
+                     && $sub !~ m{$oparg}) {
+                    my @splitlocs = split(q(,), $sub);
+                    $loc_obj = Bio::Location::Split->new();
+                    while (my $splitloc = shift @splitlocs) {
+                        next unless $splitloc;
+                        #$loc_obj->add_sub_Location($self->from_string($splitloc, 1));
+                        # this should work but doesn't
+                        my $sobj;
+                        if ($splitloc =~ m{\(($LOCREG)\)}) {
+                            my $comploc = $1;
+                            $sobj = $self->_parse_location($comploc);
+                            $sobj->strand(-1);
+                        } else {
+                            $sobj = $self->_parse_location($splitloc);
+                        }
+                        $loc_obj->add_sub_Location($sobj);
+                    }
+                } else {
+                    $loc_obj = $self->from_string($sub, $oparg);
+                }
+            }
+            # no operator, simple or fuzzy 
+            else {
+                $loc_obj = $self->from_string($subloc,1);
+            }
+            $loc_obj->strand(-1) if ($op && $op eq 'complement');
+            push @loc_objs, $loc_obj;
+        }
+        my $ct = @loc_objs;
+        if ($op && !($op eq 'join' || $op eq 'order' || $op eq 'bond')
+                && $ct > 1 ) {
+            $self->throw("Bad operator $op: had multiple locations ".
+                         scalar(@loc_objs).", should be SplitLocationI");
+        }
+        if ($ct > 1) {
+            $loc = Bio::Location::Split->new();
+            $loc->add_sub_Location(shift @loc_objs) while (@loc_objs);
+            return $loc;
+        } else {
+            $loc = shift @loc_objs;
+            return $loc;
+        }
+    } else { # simple location(s)
+        $loc = $self->_parse_location($locstr);
+        $loc->strand(-1) if ($op && $op eq 'complement');
+    }
+    return $loc;
+}
+
+=head2 _parse_location
+
+ Title   : _parse_location
+ Usage   : $loc = $locfactory->_parse_location( $loc_string)
+
+ Function: Parses the given location string and returns a location object 
+           with start() and end() and strand() set appropriately.
+           Note that this method is private.
+ Returns : A Bio::LocationI implementing object or undef on failure
+ Args    : location string
+
+=cut
+
+sub _parse_location {
+    my ($self, $locstr) = @_;
+    my ($loc, $seqid);
+    #$self->debug( "Location parse, processing $locstr\n");
+    # 'remote' location?
+    if($locstr =~ m{^(\S+):(.*)$}o) {
+        # yes; memorize remote ID and strip from location string
+        $seqid = $1;
+        $locstr = $2;
+    }
+    
+    # split into start and end
+    my ($start, $end) = split(/\.\./, $locstr);
+    # remove enclosing parentheses if any; note that because of parentheses
+    # possibly surrounding the entire location the parentheses around start
+    # and/or may be asymmetrical
+    # Note: these are from X.Y fuzzy locations, which are deprecated!
+    $start =~ s/(?:^\[+|\]+$)//g if $start;
+    $end   =~ s/(?:^\[+|\]+$)//g if $end;
+
+    # Is this a simple (exact) or a fuzzy location? Simples have exact start
+    # and end, or is between two adjacent bases. Everything else is fuzzy.
+    my $loctype = ".."; # exact with start and end as default
+
+    $loctype = '?' if ( ($locstr =~ /\?/) && ($locstr !~ /\?\d+/) );
+
+    my $locclass = "Bio::Location::Simple";
+    if(! defined($end)) {
+        if($locstr =~ /(\d+)([\.\^])(\d+)/) {
+            $start = $1;
+            $end = $3;
+            $loctype = $2;
+            $locclass = "Bio::Location::Fuzzy"
+              unless (abs($end-$start) <= 1) && ($loctype eq "^");
+        } else {
+            $end = $start;
+        }
+    }
+    # start_num and end_num are for the numeric only versions of 
+    # start and end so they can be compared
+    # in a few lines
+    my ($start_num, $end_num) = ($start,$end);
+    if ( ($start =~ /[\>\<\?\.\^]/) || ($end   =~ /[\>\<\?\.\^]/) ) {
+        $locclass = 'Bio::Location::Fuzzy';
+        if($start =~ /(\d+)/) {
+            ($start_num) = $1;
+        } else { 
+            $start_num = 0
+        }
+        if ($end =~ /(\d+)/) {
+            ($end_num)   = $1;
+        } else { $end_num = 0 }
+    } 
+    my $strand = 1;
+
+    if( $start_num > $end_num && $loctype ne '?') {
+        ($start,$end,$strand) = ($end,$start,-1);
+    }
+    # instantiate location and initialize
+    $loc = $locclass->new(-verbose => $self->verbose,
+                                 -start   => $start, 
+                                 -end     => $end, 
+                                 -strand  => $strand, 
+                                 -location_type => $loctype);
+    # set remote ID if remote location
+    if($seqid) {
+        $loc->is_remote(1);
+        $loc->seq_id($seqid);
+    }
+
+    # done (hopefully)
+    return $loc;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/HitFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/HitFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/HitFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,91 @@
+#-----------------------------------------------------------------
+# $Id: HitFactoryI.pm,v 1.11.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::HitFactoryI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::HitFactoryI - Interface for an object that builds Bio::Search::Hit::HitI objects
+
+=head1 SYNOPSIS
+
+To be completed.
+
+=head1 DESCRIPTION
+
+To be completed.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+
+=cut
+
+#'
+
+package Bio::Factory::HitFactoryI;
+
+use strict;
+use Bio::Factory::ObjectFactoryI;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 create_hit
+
+ Title   : create_hit
+ Usage   : $hit = $factory->create_hit( %params );
+ Function: Creates a new Bio::Search::Hit::HitI object.
+ Returns : An object that implements the Bio::Search::Hit::HitI interface
+ Args    : Named parameters (to be defined)
+
+=cut
+
+sub create_hit {
+    my ($self, @args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/LocationFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/LocationFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/LocationFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+# $Id: LocationFactoryI.pm,v 1.7.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::LocationFactoryI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::LocationFactoryI - A factory interface for generating locations from a string
+
+=head1 SYNOPSIS
+
+ # Do not use directly, see Bio::Factory::LocationFactory for example
+ use Bio::Factory::FTLocationFactory;
+ my $locfact = Bio::Factory::FTLocationFactory->new();
+ my $location = $locfact->from_string("1..200");
+ print $location->start(), " ", $location->end(), " ", $location->strand,"\n";
+
+=head1 DESCRIPTION
+
+An interface for Location Factories which generate Bio::LocationI
+objects from a string.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::LocationFactoryI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 from_string
+
+ Title   : from_string
+ Usage   : $loc = $locfactory->from_string("100..200");
+ Function: Parses the given string and returns a Bio::LocationI implementing
+           object representing the location encoded by the string.
+
+           Different implementations may support different encodings. An
+           example of a commonly used encoding is the Genbank feature table
+           encoding of locations.
+ Example :
+ Returns : A Bio::LocationI implementing object.
+ Args    : A string.
+
+
+=cut
+
+sub from_string{
+    my ($self, at args) = @_;
+
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/MapFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/MapFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/MapFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,102 @@
+# $Id: MapFactoryI.pm,v 1.8.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::MapFactoryI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::MapFactoryI - A Factory for getting markers
+
+=head1 SYNOPSIS
+
+    # get a Map Factory somehow likely from Bio::MapIO system
+
+    while( my $map = $mapin->next_map ) {
+	print "map name is ", $map->name, " length is ", 
+	    $map->length, " ", $map->units, "\n";
+	$mapout->write_map($map);
+    }
+
+=head1 DESCRIPTION
+
+This interface describes the necessary minimum methods for getting
+Maps from a data stream.  It also supports writing Map data back to a
+stream.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::MapFactoryI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 next_map
+
+ Title   : next_map
+ Usage   : my $map = $factory->next_map;
+ Function: Get a map from the factory
+ Returns : L<Bio::Map::MapI>
+ Args    : none
+
+=cut
+
+sub next_map{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write_map
+
+ Title   : write_map
+ Usage   : $factory->write_map($map);
+ Function: Write a map out through the factory
+ Returns : none
+ Args    : L<Bio::Map::MapI>
+
+=cut
+
+sub write_map{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectBuilderI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectBuilderI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectBuilderI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,192 @@
+# $Id: ObjectBuilderI.pm,v 1.5.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::ObjectBuilderI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::ObjectBuilderI - Interface for an object builder
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+An object builder is different from an object factory in that it
+accumulates information for the object and finally, or constantly,
+depending on the implementation, builds the object. It also allows for
+implementations that can tell the information feed in which kind of
+information the builder is interested in which not. In addition, the
+implementation may choose to filter, transform, or completely ignore
+certain content it is fed for certain slots.
+
+Implementations will hence be mostly used by stream-based parsers to
+parse only desired content, and/or skip over undesired entries.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::ObjectBuilderI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 want_slot
+
+ Title   : want_slot
+ Usage   :
+ Function: Whether or not the object builder wants to populate the
+           specified slot of the object to be built.
+
+           The slot can be specified either as the name of the
+           respective method, or the initialization parameter that
+           would be otherwise passed to new() of the object to be
+           built.
+
+ Example :
+ Returns : TRUE if the object builder wants to populate the slot, and
+           FALSE otherwise.
+ Args    : the name of the slot (a string)
+
+
+=cut
+
+sub want_slot{
+    shift->throw_not_implemented();
+}
+
+=head2 add_slot_value
+
+ Title   : add_slot_value
+ Usage   :
+ Function: Adds one or more values to the specified slot of the object
+           to be built.
+
+           Naming the slot is the same as for want_slot().
+
+           The object builder may further filter the content to be
+           set, or even completely ignore the request.
+
+           If this method reports failure, the caller should not add
+           more values to the same slot. In addition, the caller may
+           find it appropriate to abandon the object being built
+           altogether.
+
+ Example :
+ Returns : TRUE on success, and FALSE otherwise
+ Args    : the name of the slot (a string)
+           parameters determining the value to be set
+
+
+=cut
+
+sub add_slot_value{
+    shift->throw_not_implemented();
+}
+
+=head2 want_object
+
+ Title   : want_object
+ Usage   :
+ Function: Whether or not the object builder is still interested in
+           continuing with the object being built.
+
+           If this method returns FALSE, the caller should not add any
+           more values to slots, or otherwise risks that the builder
+           throws an exception. In addition, make_object() is likely
+           to return undef after this method returned FALSE.
+
+ Example :
+ Returns : TRUE if the object builder wants to continue building
+           the present object, and FALSE otherwise.
+ Args    : none
+
+
+=cut
+
+sub want_object{
+    shift->throw_not_implemented();
+}
+
+=head2 make_object
+
+ Title   : make_object
+ Usage   :
+ Function: Get the built object.
+
+           This method is allowed to return undef if no value has ever
+           been added since the last call to make_object(), or if
+           want_object() returned FALSE (or would have returned FALSE)
+           before calling this method.
+
+           For an implementation that allows consecutive building of
+           objects, a caller must call this method once, and only
+           once, between subsequent objects to be built. I.e., a call
+           to make_object implies 'end_object.'
+
+ Example :
+ Returns : the object that was built
+ Args    : none
+
+
+=cut
+
+sub make_object{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,253 @@
+# $Id: ObjectFactory.pm,v 1.5.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::ObjectFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::ObjectFactory - Instantiates a new Bio::Root::RootI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Factory::ObjectFactory;
+
+    my $factory = new Bio::Factory::ObjectFactory(-type => 'Bio::Ontology::GOterm');
+    my $term = $factory->create_object(-name => 'peroxisome',
+                                       -ontology => 'Gene Factory',
+                                       -identifier => 'GO:0005777');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::Root::RootI> objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 CONTRIBUTORS
+
+This is mostly copy-and-paste with subsequent adaptation from
+Bio::Seq::SeqFactory by Jason Stajich. Most credits should in fact go
+to him.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::ObjectFactory;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Factory::ObjectFactory();
+ Function: Builds a new Bio::Factory::ObjectFactory object 
+ Returns : Bio::Factory::ObjectFactory
+ Args    : -type      => string, name of a L<Bio::Root::RootI> derived class.
+                         There is no default.
+           -interface => string, name of the interface or class any type
+                         specified needs to at least implement.
+                         The default is Bio::Root::RootI.
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+  
+    my ($type,$interface) = $self->_rearrange([qw(TYPE INTERFACE)], @args);
+
+    $self->{'_loaded_types'} = {};
+    $self->interface($interface || "Bio::Root::RootI");
+    $self->type($type) if $type;
+
+    return $self;
+}
+
+
+=head2 create_object
+
+ Title   : create_object
+ Usage   : my $seq = $factory->create_object(<named parameters>);
+ Function: Instantiates a new object of the previously set type.
+
+           This object allows us to genericize the instantiation of
+           objects.
+
+           You must have provided -type at instantiation, or have
+           called type($mytype) before you can call this method.
+
+ Returns : an object of the type returned by type()
+
+           The return type is configurable using new(-type =>"..."),
+           or by calling $self->type("My::Fancy::Class").
+ Args    : Initialization parameters specific to the type of
+           object we want. Check the POD of the class you set as type.
+
+=cut
+
+sub create_object {
+   my ($self, at args) = @_;
+
+   my $type = $self->type(); # type has already been loaded upon set
+   return $type->new(-verbose => $self->verbose, @args);
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $obj->type($newval)
+ Function: Get/set the type of object to be created.
+
+           This may be changed at any time during the lifetime of this
+           factory.
+
+ Returns : value of type (a string)
+ Args    : newvalue (optional, a string)
+
+
+=cut
+
+sub type{
+    my $self = shift;
+
+    if(@_) {
+	my $type = shift;
+	if($type && (! $self->{'_loaded_types'}->{$type})) {
+	    eval {
+		$self->_load_module($type);
+	    };
+	    if( $@ ) {
+		$self->throw("module for '$type' failed to load: ".
+			     $@);
+	    }
+	    my $o = bless {},$type;
+	    if(!$self->_validate_type($o)) { # this may throw an exception
+		$self->throw("'$type' is not valid for factory ".ref($self));
+	    }
+	    $self->{'_loaded_types'}->{$type} = 1;
+	}
+	return $self->{'type'} = $type;
+    }
+    return $self->{'type'};
+}
+
+=head2 interface
+
+ Title   : interface
+ Usage   : $obj->interface($newval)
+ Function: Get/set the interface or base class that supplied types
+           must at least implement (inherit from).
+ Example : 
+ Returns : value of interface (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub interface{
+    my $self = shift;
+    my $interface = shift;
+
+    if($interface) {
+	return $self->{'interface'} = $interface;
+    }
+    return $self->{'interface'};
+}
+
+=head2 _validate_type
+
+ Title   : _validate_type
+ Usage   : $factory->_validate_type($object)
+ Function: Called to let derived factories validate the type set
+           via type().
+
+           The default implementation here checks whether the supplied
+           object skeleton implements the interface set via -interface
+           upon factory instantiation.
+
+ Example :
+ Returns : TRUE if the type is to be considered valid, and FALSE otherwise.
+           Instead of returning FALSE this method may also just throw
+           an informative exception.
+
+           The default implementation here will throw an exception
+           if the supplied object does not inherit from the interface
+           provided by the interface() method.
+
+ Args    : A hash reference blessed into the specified type, allowing
+           queries like isa().
+
+
+=cut
+
+sub _validate_type{
+    my ($self,$obj) = @_;
+
+    if(! $obj->isa($self->interface())) {
+	$self->throw("invalid type: '".ref($obj).
+		     "' does not implement '".$self->interface()."'");
+    }
+    return 1;
+}
+
+#####################################################################
+# aliases for naming consistency or other reasons                   #
+#####################################################################
+
+*create = \&create_object;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ObjectFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+# $Id: ObjectFactoryI.pm,v 1.6.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::ObjectFactoryI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::ObjectFactoryI - A General object creator factory
+
+=head1 SYNOPSIS
+
+# see the implementations of this interface for details but
+# basically
+
+    my $obj = $factory->create(%args);
+
+=head1 DESCRIPTION
+
+This interface is the basic structure for a factory which creates new
+objects.  In this case it is up to the implementer to check arguments
+and initialize whatever new object the implementing class is designed for.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::ObjectFactoryI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 create
+
+ Title   : create
+ Usage   : $factory->create(%args)
+ Function: Create a new object  
+ Returns : a new object
+ Args    : hash of initialization parameters
+
+
+=cut
+
+sub create{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 create_object
+
+ Title   : create_object
+ Usage   : $obj = $factory->create_object(%args)
+ Function: Create a new object.
+
+           This is supposed to supercede create(). Right now it only delegates
+           to create().
+ Returns : a new object
+ Args    : hash of initialization parameters
+
+
+=cut
+
+sub create_object{
+   my ($self, at args) = @_;
+   return $self->create(@args);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ResultFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ResultFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/ResultFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,90 @@
+#-----------------------------------------------------------------
+# $Id: ResultFactoryI.pm,v 1.10.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module Bio::Factory::ResultFactoryI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::ResultFactoryI - Interface for an object that builds Bio::Search::Result::ResultI objects
+
+=head1 SYNOPSIS
+
+To be completed.
+
+=head1 DESCRIPTION
+
+To be completed.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+
+=cut
+
+#'
+
+package Bio::Factory::ResultFactoryI;
+
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 create_result
+
+ Title   : create_result
+ Usage   : $result = $factory->create_result( %params );
+ Function: Creates a new Bio::Search::Result::ResultI object.
+ Returns : An object that implements the Bio::Search::Result::ResultI interface
+ Args    : Named parameters (to be defined)
+
+=cut
+
+sub create_result {
+    my ($self, @args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,210 @@
+# $Id: SeqAnalysisParserFactory.pm,v 1.15.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::SeqAnalysisParserFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>,
+# and Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Jason Stajich, Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::SeqAnalysisParserFactory - class capable of creating
+SeqAnalysisParserI compliant parsers
+
+=head1 SYNOPSIS
+
+    # initialize an object implementing this interface, e.g.
+    $factory = Bio::Factory::SeqAnalysisParserFactory->new();
+    # find out the methods it knows about
+    print "registered methods: ",
+          join(', ', keys %{$factory->driver_table}), "\n";
+    # obtain a parser object
+    $parser = $factory->get_parser(-input=>$inputobj,
+                                   -params=>[@params],
+		                   -method => $method);
+    # $parser is an object implementing Bio::SeqAnalysisParserI
+    # annotate sequence with features produced by parser
+    while(my $feat = $parser->next_feature()) {
+	$seq->add_SeqFeature($feat);
+    }
+
+=head1 DESCRIPTION
+
+This is a factory class capable of instantiating SeqAnalysisParserI 
+implementing parsers.
+
+The concept behind this class and the interface it implements
+(Bio::Factory::SeqAnalysisParserFactoryI) is a generic analysis result
+parsing in high-throughput automated sequence annotation
+pipelines. See Bio::SeqAnalysisParserI for more documentation of this
+concept.
+
+You can always find out the methods an instance of this class knows
+about by the way given in the SYNOPSIS section. By default, and
+assuming that the documentation is up-to-date, this will comprise of
+genscan, mzef, estscan, blast, hmmer, gff, and sim4 (all
+case-insensitive).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email Hilmar Lapp E<lt>hlapp at gmx.netE<gt>, Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Factory::SeqAnalysisParserFactory;
+use strict;
+
+
+use base qw(Bio::Factory::DriverFactory Bio::Factory::SeqAnalysisParserFactoryI);
+
+BEGIN {
+    Bio::Factory::DriverFactory->register_driver
+	(
+	 "genscan"   => "Bio::Tools::Genscan",
+	 "mzef"      => "Bio::Tools::MZEF",
+	 "estscan"   => "Bio::Tools::ESTScan",
+	 "bplite"    => "Bio::Tools::BPlite",
+	 "blast"     => "Bio::Tools::BPlite",
+	 "hmmer"     => "Bio::Tools::HMMER::Result",
+	 "gff"       => "Bio::Tools::GFF",
+	 "sim4"      => "Bio::Tools::Sim4::Results",
+	 "epcr"      => "Bio::Tools::EPCR", 
+	 "exonerate" => "Bio::Tools::Exonerate",
+	 );
+}
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    # no per-object initialization right now - registration of default drivers
+    # is only done once when the module is loaded
+    return $self;
+}
+
+=head2 get_parser
+
+ Title   : get_parser
+ Usage   : $factory->get_parser(-input=>$inputobj, 
+                                [ -params=>[@params] ],
+		                -method => $method)
+ Function: Creates and returns a parser object for the given input and method.
+           Both file names and streams (filehandles) are allowed.
+
+           Parameters (-params argument) are passed on to the parser object
+           and therefore are specific to the parser to be created.
+ Example :
+ Returns : A Bio::SeqAnalysisParserI implementing object. Exception if
+           creation of the parser object fails.
+ Args    : B<input>  - object/file where analysis results are coming from,
+	   B<params> - parameter to use when parsing/running analysis
+	   B<method> - method of analysis
+
+=cut
+
+sub get_parser {
+    my ($self, @args) = @_;
+    my $parser;
+    my $module;
+
+    my ($input, $params, $method) =
+	$self->_rearrange([qw(INPUT PARAMS METHOD)], @args);
+
+    # retrieve module name for requested method
+    $method = lc $method; # method is case-insensitive
+    $module = $self->get_driver($method);
+    if(! defined($module)) {
+	$self->throw("Analysis parser driver for method $method not registered.");
+    }
+    # load module
+    $self->_load_module($module); # throws an exception on failure to load
+    # make sure parameters is not undef
+    $params = [] if( !defined $params );
+    # figure out input method (file or stream)
+    my $inputmethod = '-file';
+    if( ref($input) =~ /GLOB/i ) {
+	$inputmethod = '-fh';
+    }
+    # instantiate parser and return the result
+    $parser = $module->new($inputmethod => $input, @$params);
+    if(! $parser->isa('Bio::SeqAnalysisParserI')) {
+	$self->throw("Driver $module registered for method $method does not ".
+                     "implement Bio::SeqAnalyisParserI. How come?");
+    }
+    return $parser;
+}
+
+
+=head2 register_driver
+
+ Title   : register_driver
+ Usage   : $factory->register_driver("genscan", "Bio::Tools::Genscan");
+ Function: Registers a driver a factory class should be able to instantiate.
+
+           This method can be called both as an instance and as a
+           class method.
+
+ Returns : 
+ Args    : Key of the driver (string) and the module implementing the driver
+           (string).
+
+=cut
+
+=head2 driver_table
+
+ Title   : driver_table
+ Usage   : $table = $factory->driver_table();
+ Function: Returns a reference to the hash table storing associations of
+           methods with drivers.
+
+           You use this table to look up registered methods (keys) and
+           drivers (values).
+
+           In this implementation the table is class-specific and
+           therefore shared by all instances. You can override this in
+           a derived class, but note that this method can be called
+           both as an instance and a class method.
+
+           This will be the table used by the object internally. You
+           should definitely know what you're doing if you modify the
+           table's contents.  Modifications are shared by _all_
+           instances, those present and those yet to be created.
+
+ Returns : A reference to a hash table.
+ Args    : 
+
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SeqAnalysisParserFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,110 @@
+# $Id: SeqAnalysisParserFactoryI.pm,v 1.12.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::SeqAnalysisParserFactoryI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>,
+# and Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Jason Stajich, Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::SeqAnalysisParserFactoryI - interface describing objects capable
+              of creating SeqAnalysisParserI compliant parsers
+
+=head1 SYNOPSIS
+
+    # initialize an object implementing this interface, e.g.
+    $factory = Bio::Factory::SeqAnalysisParserFactory->new();
+    # obtain a parser object
+    $parser = $factory->get_parser(-input=>$inputobj,
+                                   -params=>[@params],
+		                   -method => $method);
+    # $parser is an object implementing Bio::SeqAnalysisParserI
+    # annotate sequence with features produced by parser
+    while(my $feat = $parser->next_feature()) {
+	$seq->add_SeqFeature($feat);
+    }
+
+=head1 DESCRIPTION
+
+This is an interface for factory classes capable of instantiating
+SeqAnalysisParserI implementing parsers.
+
+The concept behind the interface is a generic analysis result parsing
+in high-throughput automated sequence annotation pipelines. See
+L<Bio::SeqAnalysisParserI> for more documentation of this concept.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email Hilmar Lapp E<lt>hlapp at gmx.netE<gt>, Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Factory::SeqAnalysisParserFactoryI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 get_parser
+
+ Title   : get_parser
+ Usage   : $factory->get_parser(-input=>$inputobj, 
+                                [ -params=>[@params] ],
+		                -method => $method)
+ Function: Creates and returns a parser object for the given input and method.
+           The type of input which is suitable depends on the implementation,
+           but a good-style implementation should allow both file names and
+           streams (filehandles).
+
+           A particular implementation may not be able to create a parser for
+           the requested method. In this case it shall return undef.
+
+           Parameters (-params argument) are passed on to the parser object
+           and therefore are specific to the parser to be created. An
+           implementation of this interface should make this argument optional.
+ Example :
+ Returns : A Bio::SeqAnalysisParserI implementing object.
+ Args    : B<input>  - object/file where analysis results are coming from,
+	   B<params> - parameter to use when parsing/running analysis
+	   B<method> - method of analysis
+
+=cut
+
+sub get_parser {
+    my ( $self, @args) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+# $Id: SequenceFactoryI.pm,v 1.10.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::SequenceFactoryI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::SequenceFactoryI - This interface allows for generic building of sequences in factories which create sequences (like SeqIO) 
+
+=head1 SYNOPSIS
+
+# do not use this object directly it is an interface
+# get a Bio::Factory::SequenceFactoryI object like
+
+    use Bio::Seq::SeqFactory;
+    my $seqbuilder = new Bio::Seq::SeqFactory('type' => 'Bio::PrimarySeq');
+
+    my $seq = $seqbuilder->create(-seq => 'ACTGAT',
+				  -display_id => 'exampleseq');
+
+    print "seq is a ", ref($seq), "\n";
+
+=head1 DESCRIPTION
+
+A generic way to build Sequence objects via a pluggable factory.  This
+reduces the amount of code that looks like
+
+  if( $type  eq 'Bio::PrimarySeq' ) { ... } 
+  elsif( $type eq 'Bio::Seq::RichSeq' ) { ... }
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::SequenceFactoryI;
+
+use strict;
+
+use base qw(Bio::Factory::ObjectFactoryI);
+
+=head2 create
+
+ Title   : create
+ Usage   : my $seq = $seqbuilder->create(-seq => 'CAGT', 
+					 -id => 'name');
+ Function: Instantiates new Bio::PrimarySeqI (or one of its child classes)
+           This object allows us to genericize the instantiation of sequence
+           objects.
+ Returns : Bio::PrimarySeqI
+ Args    : initialization parameters specific to the type of sequence
+           object we want.  Typically 
+           -seq        => $str,
+           -display_id => $name
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceProcessorI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceProcessorI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceProcessorI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+# $Id: SequenceProcessorI.pm,v 1.4.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::SequenceProcessorI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::SequenceProcessorI - Interface for chained sequence 
+                                   processing algorithms
+
+=head1 SYNOPSIS
+
+    use Bio::SeqIO;
+    use MySeqProcessor; # is-a Bio::Factory::SequenceProcessorI
+
+    # obtain your source stream, e.g., an EMBL file
+    my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => 'embl');
+    # create your processor (it must implement this interface)
+    my $seqalgo = MySeqProcessor->new();
+    # chain together
+    $seqalgo->source_stream($seqin);
+    # you could create more processors and chain them one after another
+    # ...
+    # finally, the last link in the chain is your SeqIO stream
+    my $seqpipe = $seqalgo;
+
+    # once you've established the pipeline, proceed as if you had a
+    # single SeqIO stream
+    while(my $seq = $seqpipe->next_seq()) {
+	# ... do something ...
+    }
+
+=head1 DESCRIPTION
+
+This defines an interface that allows seamless chaining of sequence
+processing algorithms encapsulated in modules while retaining the
+overall Bio::SeqIO interface at the end of the pipeline.
+
+This is especially useful if you want an easily configurable
+processing pipeline of re-usable algorithms as building blocks instead
+of (hard-)coding the whole algorithm in a single script.
+
+There are literally no restrictions as to what an individual module
+can do with a sequence object it obtains from the source stream before
+it makes it available through its own next_seq() method. It can
+manipulate the sequence object, but otherwise keep it intact, but it
+can also create any number of new sequence objects from it, or it can
+discard some, or any combination thereof. The only requirement is that
+its next_seq() method return Bio::PrimarySeqI compliant objects. In
+order to play nice, if a processor creates new objects it should try
+to use the same sequence factory that the source stream uses, but this
+is not strongly mandated.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::SequenceProcessorI;
+use strict;
+use Carp;
+use Bio::Root::RootI;
+
+use base qw(Bio::Factory::SequenceStreamI);
+
+=head2 source_stream
+
+ Title   : source_stream
+ Usage   : $obj->source_stream($newval)
+ Function: Get/set the source sequence stream for this sequence
+           processor.
+
+           An implementation is not required to allow set, but will
+           usually do so.
+
+ Example : 
+ Returns : A Bio::Factory::SequenceStreamI compliant object
+ Args    : on set, new value (a Bio::Factory::SequenceStreamI compliant
+           object)
+
+
+=cut
+
+sub source_stream{
+    shift->throw_not_implemented();
+}
+
+=head1 Bio::Factory::SequenceStreamI methods
+
+ The requirement to implement these methods is inherited from
+ L<Bio::Factory::SequenceStreamI>. An implementation may not
+ necessarily have to implement all methods in a meaningful way. Which
+ methods will be necessary very much depends on the context in which
+ an implementation of this interface is used. E.g., if it is only used
+ for post-processing sequences read from a SeqIO stream, write_seq()
+ will not be used and hence does not need to be implemented in a
+ meaningful way (it may in fact even throw an exception).
+
+ Also, since an implementor will already receive built objects from a
+ sequence stream, sequence_factory() may or may not be relevant,
+ depending on whether the processing method does or does not involve
+ creating new objects.
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = stream->next_seq
+ Function: Reads the next sequence object from the stream and returns it.
+
+           In the case of a non-recoverable situation an exception
+           will be thrown.  Do not assume that you can resume parsing
+           the same stream after catching the exception. Note that you
+           can always turn recoverable errors into exceptions by
+           calling $stream->verbose(2).
+
+ Returns : a Bio::Seq sequence object
+ Args    : none
+
+See L<Bio::Root::RootI>
+
+=cut
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+=cut
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : none
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceStreamI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceStreamI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/SequenceStreamI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,125 @@
+# $Id: SequenceStreamI.pm,v 1.6.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::SequenceStreamI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::SequenceStreamI - Interface describing the basics of a Sequence Stream.
+
+=head1 SYNOPSIS
+
+  # get a SequenceStreamI object somehow like with SeqIO
+  use Bio::SeqIO;
+  my $in = new Bio::SeqIO(-file => '< fastafile');
+  while( my $seq = $in->next_seq ) {
+  }
+
+=head1 DESCRIPTION
+
+This interface is for describing objects which produces
+Bio::PrimarySeqI objects or processes Bio::PrimarySeqI objects to a
+data stream.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::SequenceStreamI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = stream->next_seq
+ Function: Reads the next sequence object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream that
+           are either misformatted or that use syntax not yet understood
+           by the driver. If such an incident is recoverable, e.g., by
+           dismissing a feature of a feature table or some other non-mandatory
+           part of an entry, the driver will issue a warning. In the case
+           of a non-recoverable situation an exception will be thrown.
+           Do not assume that you can resume parsing the same stream after
+           catching the exception. Note that you can always turn recoverable
+           errors into exceptions by calling $stream->verbose(2).
+ Returns : a Bio::Seq sequence object
+ Args    : none
+
+See L<Bio::Root::RootI>
+
+=cut
+
+sub next_seq {
+    shift->throw_not_implemented();
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+=cut
+
+sub write_seq {
+    shift->throw_not_implemented();
+}
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : none
+
+
+=cut
+
+sub sequence_factory{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Factory/TreeFactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Factory/TreeFactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Factory/TreeFactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,106 @@
+# $Id: TreeFactoryI.pm,v 1.10.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Factory::TreeFactoryI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Factory::TreeFactoryI - Factory Interface for getting and writing trees
+  from/to a data stream
+
+=head1 SYNOPSIS
+
+  # get a $factory from somewhere Bio::TreeIO likely
+  my $treeio = new Bio::TreeIO(-format => 'newick', #this is phylip/newick format
+  			       -file   => 'file.tre');
+  my $treeout = new Bio::TreeIO(-format => 'nexus',
+  				-file   => ">file.nexus");
+
+  # convert tree formats from newick/phylip to nexus
+  while(my $tree = $treeio->next_tree) {
+      $treeout->write_tree($tree);
+  }
+
+=head1 DESCRIPTION
+
+This interface describes the minimal functions needed to get and write
+trees from a data stream.  It is implemented by the L<Bio::TreeIO> factory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Factory::TreeFactoryI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree;
+ Function: Get a tree from the factory
+ Returns : L<Bio::Tree::TreeI>
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Writes a tree onto the stream
+ Returns : none
+ Args    : L<Bio::Tree::TreeI>
+
+
+=cut
+
+sub write_tree{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureHolderI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureHolderI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureHolderI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,250 @@
+# $Id: FeatureHolderI.pm,v 1.10.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::FeatureHolderI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::FeatureHolderI - the base interface an object with features must implement
+
+=head1 SYNOPSIS
+
+    use Bio::SeqIO;
+    # get a feature-holding object somehow: for example, Bio::SeqI objects
+    # have features
+    my $seqio = Bio::SeqIO->new(-fh => \*STDIN, -format => 'genbank');
+    while (my $seq = $seqio->next_seq()) {
+        # $seq is-a Bio::FeatureHolderI, hence:
+        my @feas = $seq->get_SeqFeatures();
+        # each element is-a Bio::SeqFeatureI
+        foreach my $fea (@feas) {
+            # do something with the feature objects
+        }
+    }
+
+=head1 DESCRIPTION
+
+This is the base interface that all feature-holding objects must
+implement.
+
+Popular feature-holders are for instance L<Bio::Seq> objects. Since
+L<Bio::SeqFeatureI> defines a sub_SeqFeature() method, most
+Bio::SeqFeatureI implementations like L<Bio::SeqFeature::Generic> will
+implement the feature holder interface as well.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 CONTRIBUTORS
+
+Steffen Grossmann [SG], grossman-at-molgen.mpg.de
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+
+package Bio::FeatureHolderI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 get_SeqFeatures()
+
+ Usage   :
+ Function: Get the feature objects held by this feature holder.
+ Example :
+ Returns : an array of Bio::SeqFeatureI implementing objects
+ Args    : none
+
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+
+=cut
+
+sub get_SeqFeatures {
+  shift->throw_not_implemented();
+}
+
+=head2 add_SeqFeature()
+
+ Usage   : $feat->add_SeqFeature($subfeat);
+           $feat->add_SeqFeature($subfeat,'EXPAND')
+ Function: adds a SeqFeature into the subSeqFeature array.
+           with no 'EXPAND' qualifer, subfeat will be tested
+           as to whether it lies inside the parent, and throw
+           an exception if not.
+
+           If EXPAND is used, the parent''s start/end/strand will
+           be adjusted so that it grows to accommodate the new
+           subFeature
+ Example :
+ Returns : nothing
+ Args    : a Bio::SeqFeatureI object
+
+=cut
+
+sub add_SeqFeature {
+  shift->throw_not_implemented();
+}
+
+
+=head2 remove_SeqFeatures()
+
+ Usage   : $obj->remove_SeqFeatures
+ Function: Removes all sub SeqFeatures.  If you want to remove only a subset,
+           remove that subset from the returned array, and add back the rest.
+ Returns : The array of Bio::SeqFeatureI implementing sub-features that was
+           deleted from this feature.
+ Args    : none
+
+=cut
+
+sub remove_SeqFeatures {
+  shift->throw_not_implemented();
+}
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $obj->feature_count()
+ Function: Return the number of SeqFeatures attached to a feature holder.
+
+           This is before flattening a possible sub-feature tree.
+
+           We provide a default implementation here that just counts
+           the number of objects returned by get_SeqFeatures().
+           Implementors may want to override this with a more
+           efficient implementation.
+
+ Returns : integer representing the number of SeqFeatures
+ Args    : None
+
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+
+Our default implementation allows for any number of additional
+arguments and will pass them on to get_SeqFeatures(). I.e., in order to
+support filter arguments, just support them in get_SeqFeatures().
+
+=cut
+
+sub feature_count {
+    return scalar(shift->get_SeqFeatures(@_));
+}
+
+=head2 get_all_SeqFeatures
+
+ Title   : get_all_SeqFeatures
+ Usage   :
+ Function: Get the flattened tree of feature objects held by this
+           feature holder. The difference to get_SeqFeatures is that
+           the entire tree of sub-features will be flattened out.
+
+           We provide a default implementation here, so implementors
+           don''t necessarily need to implement this method.
+
+ Example :
+ Returns : an array of Bio::SeqFeatureI implementing objects
+ Args    : none
+
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+
+Our default implementation allows for any number of additional
+arguments and will pass them on to any invocation of
+get_SeqFeatures(), wherever a component of the tree implements
+FeatureHolderI. I.e., in order to support filter arguments, just
+support them in get_SeqFeatures().
+
+=cut
+
+sub get_all_SeqFeatures{
+    my $self = shift;
+    my @flatarr;
+
+    foreach my $feat ( $self->get_SeqFeatures(@_) ){
+	push(@flatarr,$feat);
+	&_add_flattened_SeqFeatures(\@flatarr,$feat, at _);
+    }
+
+    # needed to deal with subfeatures which appear more than once in the hierarchy [SG]
+    my %seen = ();
+    my @uniq_flatarr = ();
+    foreach my $feat (@flatarr) {
+	push(@uniq_flatarr, $feat) unless $seen{$feat}++;
+    }
+    return @uniq_flatarr;
+}
+
+sub _add_flattened_SeqFeatures {
+    my ($arrayref,$feat, at args) = @_;
+    my @subs = ();
+
+    if($feat->isa("Bio::FeatureHolderI")) {
+	@subs = $feat->get_SeqFeatures(@args);
+    } elsif($feat->isa("Bio::SeqFeatureI")) {
+	@subs = $feat->sub_SeqFeature();
+    } else {
+	confess ref($feat)." is neither a FeatureHolderI nor a SeqFeatureI. ".
+	    "Don't know how to flatten.";
+    }
+    foreach my $sub (@subs) {
+	push(@$arrayref,$sub);
+	&_add_flattened_SeqFeatures($arrayref,$sub);
+    }
+
+}
+
+sub set_ParentIDs_from_hierarchy(){
+    # DEPRECATED - use IDHandler
+    my $self = shift;
+    require "Bio/SeqFeature/Tools/IDHandler.pm";
+    Bio::SeqFeature::Tools::IDHandler->new->set_ParentIDs_from_hierarchy($self);
+}
+
+sub create_hierarchy_from_ParentIDs(){
+    # DEPRECATED - use IDHandler
+    my $self = shift;
+    require "Bio/SeqFeature/Tools/IDHandler.pm";
+    Bio::SeqFeature::Tools::IDHandler->new->create_hierarchy_from_ParentIDs($self);
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/bed.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/bed.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/bed.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,191 @@
+=pod
+
+=head1 NAME
+
+Bio::FeatureIO::bed - write features from UCSC BED format
+
+=head1 SYNOPSIS
+
+  my $out = Bio::FeatureIO(-format=>'bed');
+  for my $feat ($seq->get_seqFeatures) {
+    $out->write_feature($feat);
+  }
+
+=head1 DESCRIPTION
+
+See L<http://www.genome.ucsc.edu/goldenPath/help/customTrack.html#BED>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::FeatureIO::bed;
+
+use strict;
+use base qw(Bio::FeatureIO);
+use Bio::SeqFeature::Annotated;
+use Bio::OntologyIO;
+
+=head2 _initialize
+
+ Title   : _initialize
+ Function: initializes BED for reading/writing (currently write-only)
+ Args    : all optional:
+           name          description
+           ----------------------------------------------------------
+           -name         the name for the BED track, stored in header
+                         name defaults to localtime()
+           -description  the description for the BED track, stored in
+                         header.  defaults to localtime().
+           -use_score    whether or not the score attribute of
+                         features should be used when rendering them.
+                         the higher the score the darker the color.
+                         defaults to 0 (false)
+
+
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  $self->SUPER::_initialize(%arg);
+
+  $self->name($arg{-name} || scalar(localtime()));
+  $self->description($arg{-description} || scalar(localtime()));
+  $self->use_score($arg{-use_score} || 0);
+
+  $self->_print(sprintf('track name="%s" description="%s" useScore=%d',
+                        $self->name,
+                        $self->description,
+                        $self->use_score ? 1 : 0
+                       )
+               );
+}
+
+=head2 use_score
+
+ Title   : use_score
+ Usage   : $obj->use_score($newval)
+ Function: should score be used to adjust feature color when rendering?  set to true if so.
+ Example : 
+ Returns : value of use_score (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub use_score{
+    my $self = shift;
+
+    return $self->{'use_score'} = shift if @_;
+    return $self->{'use_score'};
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function: name of BED track
+ Example : 
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub name{
+    my $self = shift;
+
+    return $self->{'name'} = shift if @_;
+    return $self->{'name'};
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: description of BED track
+ Example : 
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub description{
+    my $self = shift;
+
+    return $self->{'description'} = shift if @_;
+    return $self->{'description'};
+}
+
+
+sub write_feature {
+  my($self,$feature) = @_;
+  $self->throw("only Bio::SeqFeature::Annotated objects are writeable") unless $feature->isa('Bio::SeqFeature::Annotated');
+
+  my $chrom       = $feature->seq_id || '';
+  my $chrom_start = $feature->start  || 0;
+  my $chrom_end   = $feature->end   || 0;
+
+  #try to make a reasonable name
+  my $name        = undef;
+  if(my @v = ($feature->annotation->get_Annotations('Name'))){
+    $name = $v[0];
+    $self->warn("only using first of feature's multiple names: ".join ',', map {$_->value} @v) if scalar(@v) > 1;
+  } elsif(my @v = ($feature->annotation->get_Annotations('ID'))){
+    $name = $v[0];
+    $self->warn("only using first of feature's multiple IDs: ".join ',', map {$_->value} @v) if scalar(@v) > 1;
+  } else {
+    $name = 'anonymous';
+  }
+
+  my $score = $feature->score || 0;
+  my $strand = $feature->strand == 0 ? '-' : '+'; #default to +
+  my $thick_start = '';  #not implemented, used for CDS
+  my $thick_end = '';    #not implemented, used for CDS
+  my $reserved = 0;
+  my $block_count = '';  #not implemented, used for sub features
+  my $block_sizes = '';  #not implemented, used for sub features
+  my $block_starts = ''; #not implemented, used for sub features
+
+  $self->_print(join("\t",($chrom,$chrom_start,$chrom_end,$name,$score,$strand,$thick_start,$thick_end,$reserved,$block_count,$block_sizes, $block_starts)));
+  $self->write_feature($_) foreach $feature->get_SeqFeatures();
+}
+
+sub next_feature {
+  shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gff.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gff.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gff.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,914 @@
+=pod
+
+=head1 NAME
+
+Bio::FeatureIO::gff - read/write GFF feature files
+
+=head1 SYNOPSIS
+
+  my $feature; #get a Bio::SeqFeature::Annotated somehow
+  my $featureOut = Bio::FeatureIO->new(
+    -format => 'gff',
+    -version => 3,
+    -fh => \*STDOUT,
+    -validate_terms => 1, #boolean. validate ontology terms online?  default 0 (false).
+  );
+  $featureOut->write_feature($feature);
+
+=head1 DESCRIPTION
+
+ Currently implemented:
+
+ version         read?   write?
+ ------------------------------
+ GFF 1             N       N
+ GFF 2             N       N
+ GFF 2.5 (GTF)     N       Y
+ GFF 3             Y       Y
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                 - General discussion
+  http://bioperl.org/wiki/Mailing_list  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Allen Day, <allenday at ucla.edu>
+
+=head1 CONTRIBUTORS
+
+ Steffen Grossmann, <grossman at molgen.mpg.de>
+ Scott Cain, <cain at cshl.edu>
+ Rob Edwards <rob at salmonella.org>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::FeatureIO::gff;
+use strict;
+
+#these are alphabetical, keep them that way.
+use Bio::Annotation::DBLink;
+use Bio::Annotation::OntologyTerm;
+use Bio::Annotation::SimpleValue;
+use Bio::Annotation::Target;
+use Bio::FeatureIO;
+use Bio::Ontology::OntologyStore;
+use Bio::OntologyIO;
+use Bio::SeqFeature::Annotated;
+use Bio::SeqIO;
+use URI::Escape;
+
+use base qw(Bio::FeatureIO);
+
+use constant DEFAULT_VERSION => 3;
+my $RESERVED_TAGS   = "ID|Name|Alias|Parent|Target|Gap|Derives_from|Note|Dbxref|Ontology_term|Index";
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  $self->SUPER::_initialize(%arg);
+
+  $self->version( $arg{-version}        || DEFAULT_VERSION);
+  $self->validate($arg{-validate_terms} || 0);
+
+  if ($arg{-file} =~ /^>.*/ ) {
+    $self->_print("##gff-version " . $self->version() . "\n");
+  }
+  else {
+    my $directive;
+    while(($directive = $self->_readline()) && ( $directive =~ /^##/ || $directive =~ /^>/)){
+      $self->_handle_directive($directive);
+    }
+    $self->_pushback($directive);
+  }
+
+  if ($arg{-file} =~ /^>.*/ ) {
+    $self->_print("##gff-version " . $self->version() . "\n");
+  }
+
+  #need to validate against SOFA, no SO
+  if ($self->validate) {
+    $self->so(
+              Bio::Ontology::OntologyStore->get_ontology('Sequence Ontology Feature Annotation')
+              );
+  }
+}
+
+=head2 next_feature()
+
+ Usage   : my $feature = $featureio->next_feature();
+ Function: reads a feature record from a GFF stream and returns it as an object.
+ Returns : a Bio::SeqFeature::Annotated object
+ Args    : N/A
+
+=cut
+
+sub next_feature {
+  my $self = shift;
+  my $gff_string;
+
+  my($f) = $self->_buffer_feature();
+  if($f){
+    return $f;
+  }
+
+  return if $self->fasta_mode();
+
+  # be graceful about empty lines or comments, and make sure we return undef
+  # if the input is consumed
+  while(($gff_string = $self->_readline()) && defined($gff_string)) {
+    next if $gff_string =~ /^\s*$/;   #skip blank lines
+    next if $gff_string =~ /^\#[^#]/; #skip comments, but not directives
+    last;
+  }
+
+  return unless $gff_string;
+
+  # looks like we went into FASTA mode without a directive.
+  if($gff_string =~ /^>/){
+    $self->_pushback($gff_string);
+    $self->fasta_mode(1);
+    return;
+  }
+
+  # got a directive
+  elsif($gff_string =~ /^##/){
+    $self->_handle_directive($gff_string);
+    # recurse down to  the next line.  this will bottom out on finding a real feature or EOF
+    return $self->next_feature();
+  }
+
+  # got a feature
+  else {
+    return $self->_handle_feature($gff_string);
+  }
+}
+
+=head2 next_feature_group
+
+ Title   : next_feature_group
+ Usage   : @feature_group = $stream->next_feature_group
+ Function: Reads the next feature_group from $stream and returns it.
+
+           Feature groups in GFF3 files are separated by '###' directives. The
+           features in a group might form a hierarchical structure. The
+           complete hierarchy of features is returned, i.e. the returned array
+           represents only the top-level features.  Lower-level features can
+           be accessed using the 'get_SeqFeatures' method recursively.
+
+ Example : # getting the complete hierarchy of features in a GFF3 file
+           my @toplevel_features;
+           while (my @fg = $stream->next_feature_group) {
+               push(@toplevel_features, @fg);
+           }
+ Returns : an array of Bio::SeqFeature::Annotated objects
+ Args    : none
+
+=cut
+
+sub next_feature_group {
+  my $self = shift;
+
+  my $feat;
+  my %seen_ids;
+  my @all_feats;
+  my @toplevel_feats;
+
+  $self->{group_not_done} = 1;
+
+  while ($self->{group_not_done} && ($feat = $self->next_feature()) && defined($feat)) {
+	# we start by collecting all features in the group and
+	# memorizing those which have an ID attribute
+	if(my $anno_ID = $feat->get_Annotations('ID')) {
+      my $attr_ID = $anno_ID->value;
+      $self->throw("Oops! ID $attr_ID exists more than once in your file!")
+		if (exists($seen_ids{$attr_ID}));
+      $seen_ids{$attr_ID} = $feat;
+	}
+	push(@all_feats, $feat);
+  }
+
+  # assemble the top-level features
+  foreach $feat (@all_feats) {
+	my @parents = $feat->get_Annotations('Parent');
+	if (@parents) {
+      foreach my $parent (@parents) {
+		my $parent_id = $parent->value;
+		$self->throw("Parent with ID $parent_id not found!") unless (exists($seen_ids{$parent_id}));
+		$seen_ids{$parent_id}->add_SeqFeature($feat);
+      }
+	} else {
+	    push(@toplevel_feats, $feat);
+      }
+  }
+
+  return @toplevel_feats;
+}
+
+=head2 next_seq()
+
+access the FASTA section (if any) at the end of the GFF stream.  note that this method
+will return undef if not all features in the stream have been handled
+
+=cut
+
+sub next_seq() {
+  my $self = shift;
+  return unless $self->fasta_mode();
+
+  #first time next_seq has been called.  initialize Bio::SeqIO instance
+  if(!$self->seqio){
+    $self->seqio( Bio::SeqIO->new(-format => 'fasta', -fh => $self->_fh()) );
+  }
+  return $self->seqio->next_seq();
+}
+
+=head2 write_feature()
+
+ Usage   : $featureio->write_feature( Bio::SeqFeature::Annotated->new(...) );
+ Function: writes a feature in GFF format.  the GFF version used is governed by the
+           '-version' argument passed to Bio::FeatureIO->new(), and defaults to GFF
+           version 3.
+ Returns : ###FIXME
+ Args    : a Bio::SeqFeature::Annotated object.
+
+=cut
+
+sub write_feature {
+  my($self,$feature) = @_;
+  if (!$feature) {
+    $self->throw("gff.pm cannot write_feature unless you give a feature to write.\n");
+  }
+  $self->throw("only Bio::SeqFeature::Annotated objects are writeable") unless $feature->isa('Bio::SeqFeature::Annotated');
+
+  if($self->version == 1){
+    return $self->_write_feature_1($feature);
+  } elsif($self->version == 2){
+    return $self->_write_feature_2($feature);
+  } elsif($self->version == 2.5){
+    return $self->_write_feature_25($feature);
+  } elsif($self->version == 3){
+    return $self->_write_feature_3($feature);
+  } else {
+    $self->throw(sprintf("don't know how to write GFF version %s",$self->version));
+  }
+}
+
+################################################################################
+
+=head1 ACCESSORS
+
+=cut
+
+=head2 fasta_mode()
+
+ Usage   : $obj->fasta_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of fasta_mode (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub fasta_mode {
+  my($self,$val) = @_;
+  $self->{'fasta_mode'} = $val if defined($val);
+  return $self->{'fasta_mode'};
+}
+
+=head2 seqio()
+
+ Usage   : $obj->seqio($newval)
+ Function: holds a Bio::SeqIO instance for handling the GFF3 ##FASTA section.
+ Returns : value of seqio (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub seqio {
+  my($self,$val) = @_;
+  $self->{'seqio'} = $val if defined($val);
+  return $self->{'seqio'};
+}
+
+=head2 sequence_region()
+
+ Usage   :
+ Function: ###FIXME
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub sequence_region {
+  my ($self,$k,$v) = @_;
+  if(defined($k) && defined($v)){
+    $self->{'sequence_region'}{$k} = $v;
+    return $v;
+  }
+  elsif(defined($k)){
+    return $self->{'sequence-region'}{$k};
+  }
+  else {
+    return;
+  }
+}
+
+
+=head2 so()
+
+ Usage   : $obj->so($newval)
+ Function: holds a Sequence Ontology instance
+ Returns : value of so (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub so {
+  my $self = shift;
+  my $val = shift;
+  ###FIXME validate $val object's type
+  $self->{so} = $val if defined($val);
+  return $self->{so};
+}
+
+=head2 validate()
+
+ Usage   : $obj->validate($newval)
+ Function: true if encountered ontology terms in next_feature()
+           mode should be validated.
+ Returns : value of validate (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub validate {
+  my($self,$val) = @_;
+  $self->{'validate'} = $val if defined($val);
+  return $self->{'validate'};
+}
+
+=head2 version()
+
+ Usage   : $obj->version($newval)
+ Function: version of GFF to read/write.  valid values are 1, 2, 2.5, and 3.
+ Returns : value of version (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub version {
+  my $self = shift;
+  my $val = shift;
+  my %valid = map {$_=>1} (1, 2, 2.5, 3);
+  if(defined $val && $valid{$val}){
+    return $self->{'version'} = $val;
+  }
+  elsif(defined($val)){
+    $self->throw('invalid version.  valid versions: '.join(' ', sort keys %valid));
+  }
+  return $self->{'version'};
+}
+
+################################################################################
+
+=head1 INTERNAL METHODS
+
+=cut
+
+=head2 _buffer_feature()
+
+ Usage   :
+ Function: ###FIXME
+ Returns : 
+ Args    :
+
+=cut
+
+sub _buffer_feature {
+  my ($self,$f) = @_;
+
+  if ( $f ) {
+    push @{ $self->{'buffer'} }, $f;
+    return $f;
+  }
+  elsif ( $self->{'buffer'} ) {
+    return shift @{ $self->{'buffer'} };
+  }
+  else {
+    return;
+  }
+}
+
+
+=head1 _handle_directive()
+
+this method is called for lines beginning with '##'.
+
+=cut
+
+sub _handle_directive {
+  my($self,$directive_string) = @_;
+
+  $directive_string =~ s/^##//; #remove escape
+  my($directive, at arg) = split /\s+/, $directive_string;
+
+  if($directive eq 'gff-version'){
+    my $version = $arg[0];
+    if($version != 3){
+      $self->throw("this is not a gff version 3 document, it is version '$version'");
+    }
+  }
+
+  elsif($directive eq 'sequence-region'){
+    # RAE: Sequence regions are in the format sequence-region seqid start end
+    # for these we want to store the seqid, start, and end. Then when we validate
+    # we want to make sure that the features are within the seqid/start/end
+
+    my $fta = Bio::Annotation::OntologyTerm->new();
+    $fta->name( 'region');
+
+    my $f = Bio::SeqFeature::Annotated->new();
+    $f->seq_id( $arg[0] );
+    $f->start(  $arg[1] );
+    $f->end(    $arg[2] );
+
+    $f->type(   $fta    );
+
+    #cache this in sequence_region(), we may need it for validation later.
+    $self->sequence_region($f->seq_id => $f);
+
+    #NOTE: is this the right thing to do -- treat this as a feature? -allenday
+    #buffer it to be returned by next_feature()
+    $self->_buffer_feature($f);
+  }
+
+  elsif($directive eq 'feature-ontology'){
+    $self->warn("'##$directive' directive handling not yet implemented");
+  }
+
+  elsif($directive eq 'attribute-ontology'){
+    $self->warn("'##$directive' directive handling not yet implemented");
+  }
+
+  elsif($directive eq 'source-ontology'){
+    $self->warn("'##$directive' directive handling not yet implemented");
+  }
+
+  elsif($directive eq 'FASTA' or $directive =~ /^>/){
+    #next_seq() will take care of this.
+    $self->fasta_mode(1);
+    return;
+  }
+
+  elsif($directive eq '#'){
+    #all forward references resolved
+    $self->{group_not_done} = 0;
+  }
+
+  else {
+    $self->throw("don't know what do do with directive: '##".$directive."'");
+  }
+}
+
+=head1 _handle_feature()
+
+this method is called for each line not beginning with '#'.  it parses the line and returns a
+Bio::SeqFeature::Annotated object.
+
+=cut
+
+sub _handle_feature {
+  my($self,$feature_string) = @_;
+
+  my $feat = Bio::SeqFeature::Annotated->new();
+
+  my($seq,$source,$type,$start,$end,$score,$strand,$phase,$attribute_string) = split /\t/, $feature_string;
+
+  $feat->seq_id($seq);
+  $feat->source($source);
+  $feat->start($start) unless $start eq '.';
+  $feat->end($end) unless $end eq '.';
+  $feat->strand($strand eq '+' ? 1 : $strand eq '-' ? -1 : 0);
+  $feat->score($score);
+  $feat->phase($phase);
+
+  my $fta = Bio::Annotation::OntologyTerm->new();
+
+  if($self->validate()){
+    # RAE Added a couple of validations based on the GFF3 spec at http://song.sourceforge.net/gff3.shtml
+    # 1. Validate the id
+    if ($seq =~ /[^a-zA-Z0-9\.\-\:\^\*\$\@\!\+\_\?]/) { # I just escaped everything.
+      $self->throw("Validation Error: seqid ($seq) contains characters that are not [a-zA-Z0-9.:^*\$\@!+_?\-] and not escaped");
+    }
+
+    if ($seq =~ /\s/) {
+      $self->throw("Validation Error: seqid ($seq) contains unescaped whitespace")
+    }
+
+    # NOTE i think we're handling this in as a directive, and this test may be removed -allenday
+    if ($seq =~ /^>/) {
+      $self->throw("Validation Error: seqid ($seq) begins with a >")
+    }
+
+    # 2. Validate the starts and stops.
+    # these need to be within the region's bounds, and
+    # also start <= end.  bail out if either is not true.
+    if ($start > $end) {
+      $self->throw("Validation Error: start ($start) must be less than or equal to end in $seq");
+    }
+
+    my $region = $self->sequence_region($seq);
+    # NOTE: we can only validate against sequence-region that are declared in the file.
+    # if i reference some region from elsewhere, can't validate.  if we want to be really strict
+    # we should bail out here. -allenday
+    if ( defined($region) && $start < $region->start() || $end > $region->end() ) {
+      $self->throw("Validation Error: sequence location ($seq from $start to $end) does not appear to lie within a defined sequence-region")
+    }
+
+    # 3. Validate the strand.
+    # In the unvalidated version +=1 and -=-1. Everything else is 0. We just need to warn when it is not [+-.?]
+    $self->throw("Validation Error: strand is not one of [+-.?] at $seq") if ($strand =~ /^[^\+\-\.\?]$/);
+
+    # 4. Validate the phase to be one of [.012]
+    $self->throw("Validation Error: phase is not one of [.012] at $seq") if ($phase =~ /^[^\.012]$/);
+
+    my $feature_type;
+    if($type =~ /^\D+:\d+$/){
+      #looks like an identifier
+      ($feature_type) = $self->so->find_terms(-identifier => $type);
+    } else {
+      #looks like a name
+      ($feature_type) = $self->so->find_terms(-name => $type);
+    }
+
+    if(!$feature_type){
+      $self->throw("Validation Error: couldn't find ontology term for '$type'.");
+    }
+    $fta->term($feature_type);
+  } else {
+
+    if($type =~ /^\D+:\d+$/){
+      #looks like an identifier
+      $fta->identifier($type)
+    } else {
+      $fta->name($type);
+    }
+  }
+
+  $feat->type($fta);
+
+  my %attr = ();
+  chomp $attribute_string;
+
+  unless ( $attribute_string eq '.' ) {
+    my @attributes = split ';', $attribute_string;
+    foreach my $attribute (@attributes){
+      my($key,$values) = split '=', $attribute;
+
+      # remove leading and trailing quotes from values
+      $values =~ s/^["']//;
+      $values =~ s/["']$//;
+
+      my @values = map{uri_unescape($_)} split ',', $values;
+
+     #minor hack to allow for multiple instances of the same tag
+      if ($attr{$key}) {
+        my @tmparray = @{$attr{$key}};
+        push @tmparray, @values;
+        $attr{$key} = [@tmparray];
+      } else {
+        $attr{$key} = [@values];
+      }
+    }
+  }
+
+  #Handle Dbxref attributes
+  if($attr{Dbxref}){
+    foreach my $value (@{ $attr{Dbxref} }){
+      my $a = Bio::Annotation::DBLink->new();
+      my($db,$accession) = $value =~ /^(.+?):(.+)$/;
+
+      if(!$db or !$accession){ #dbxref malformed
+        $self->throw("Error in line:\n$feature_string\nDbxref value '$value' did not conform to GFF3 specification");
+        next;
+      }
+
+      $a->database($db);
+      $a->primary_id($accession);
+      $feat->add_Annotation('Dbxref',$a);
+    }
+  }
+
+  #Handle Ontology_term attributes
+  if($attr{Ontology_term}){
+    foreach my $id (@{ $attr{Ontology_term} }){
+      my $a = Bio::Annotation::OntologyTerm->new();
+
+      if($self->validate()){
+        my $ont_name = Bio::Ontology::OntologyStore->guess_ontology($id);
+        my $ont = Bio::Ontology::OntologyStore->get_ontology($ont_name);
+        my($term) = $ont->find_terms(-identifier => $id);
+        $a->term($term);
+      } else {
+        $a->identifier($id);
+      }
+
+      $feat->add_Annotation('Ontology_term',$a);
+    }
+  }
+
+  #Handle Gap attributes
+  if($attr{Gap}){
+     for my $value (@{ $attr{Gap} }) {
+       my $a = Bio::Annotation::SimpleValue->new();
+       $a->value($value);
+       $feat->add_Annotation('Gap',$a);
+     }
+  }
+
+  #Handle Target attributes
+  if($attr{Target}){
+    my $target_collection = Bio::Annotation::Collection->new();
+
+    foreach my $target_string (@{ $attr{Target} } ) {
+
+      #only replace + for space if + has been used in place of it
+      #that is, + could also mean plus strand, and we don't want
+      #to accidentally remove it
+ 
+      #presumably you can't use + for space and + for strand in the same string.      
+      $target_string =~ s/\+/ /g unless $target_string =~ / /; 
+
+      my ($t_id,$tstart,$tend,$strand,$extra) = split /\s+/, $target_string; 
+      if (!$tend || $extra) { # too much or too little stuff in the string
+        $self->throw("The value in the Target string, $target_string, does not conform to the GFF3 specification");
+      }
+
+      my $a = Bio::Annotation::Target->new(
+           -target_id => $t_id,
+           -start     => $tstart,
+           -end       => $tend,
+      );
+
+      if ($strand && $strand eq '+') {
+        $strand = 1;
+      } elsif ($strand && $strand eq '-') {
+        $strand = -1;
+      } else {
+        $strand = '';
+      }
+
+      $a->strand($strand) if $strand;
+      $feat->add_Annotation('Target',$a); 
+    }
+  }
+
+  #Handle ID attribute.  May only have one ID, throw error otherwise
+
+  if($attr{ID}){
+    if(scalar( @{ $attr{ID} } ) > 1){
+      $self->throw("Error in line:\n$feature_string\nA feature may have at most one ID value");
+    }
+
+    #ID's must be unique in the file
+    if ($self->{'allIDs'}->{${$attr{ID}}[0]} && $self->validate()) {
+      $self->throw("Validation Error: The ID ${$attr{ID}}[0] occurs more than once in the file, but should be unique");
+    }
+    $self->{'allIDs'}->{${$attr{ID}}[0]} = 1;
+
+
+    my $a = Bio::Annotation::SimpleValue->new();
+    $a->value( @{ $attr{ID} }[0] );
+    $feat->add_Annotation('ID',$a);
+  }
+
+  #Handle Name attribute.  May only have one Name, throw error otherwise
+  if($attr{Name}){
+    if(scalar( @{ $attr{Name} } ) > 1){
+      $self->throw("Error in line:\n$feature_string\nA feature may have at most one Name value");
+    }
+
+    my $a = Bio::Annotation::SimpleValue->new();
+    $a->value( @{ $attr{Name} }[0] );
+    $feat->add_Annotation('Name',$a);
+  }
+
+  foreach my $other_canonical (qw(Alias Parent Note Derives_from Index)){
+    if($attr{$other_canonical}){
+      foreach my $value (@{ $attr{$other_canonical} }){
+        my $a = Bio::Annotation::SimpleValue->new();
+        $a->value($value);
+        $feat->add_Annotation($other_canonical,$a);
+      }
+    }
+  }
+
+  my @non_reserved_tags = grep {/^[a-z]/} keys %attr;
+  foreach my $non_reserved_tag (@non_reserved_tags) {
+    foreach my $value (@{ $attr{$non_reserved_tag} }){
+      $feat = $self->_handle_non_reserved_tag($feat,$non_reserved_tag,$value);
+    }
+  }
+
+  my @illegal_tags = grep 
+ {!/($RESERVED_TAGS)/} 
+ grep {/^[A-Z]/} keys %attr;
+
+  if (@illegal_tags > 0) {
+      my $tags = join(", ", @illegal_tags);
+      $self->throw("The following tag(s) are illegal and are causing this parser to die: $tags");
+  }
+
+  return $feat;
+}
+
+=head2 _handle_non_reserved_tag()
+
+ Usage   : $self->_handle_non_reserved_tag($feature,$tag,$value)
+ Function: Deal with non-reserved word tags in the ninth column
+ Returns : An updated Bio::SeqFeature::Annotated object
+ Args    : A Bio::SeqFeature::Annotated and a tag/value pair
+
+Note that this method can be overridden in a subclass to provide
+special handling of non-reserved word tags.
+
+=cut
+
+sub _handle_non_reserved_tag {
+  my $self = shift;
+  my ($feat,$tag,$value) = @_;
+
+  # to customize through subclassing and overriding:
+  #if ($tag eq 'someTagOfInterest') {
+  #  do something different
+  # else { do what is below
+
+  my $a = Bio::Annotation::SimpleValue->new();
+  $a->value($value);
+  $feat->add_Annotation($tag,$a);
+
+  return $feat;
+}
+
+=head1 _write_feature_1()
+
+write a feature in GFF v1 format.  currently not implemented.
+
+=cut
+
+sub _write_feature_1 {
+  my($self,$feature) = @_;
+  $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version));
+}
+
+=head1 _write_feature_2()
+
+write a feature in GFF v2 format.  currently not implemented.
+
+=cut
+
+sub _write_feature_2 {
+  my($self,$feature) = @_;
+  $self->throw(sprintf("write_feature unimplemented for GFF version %s",$self->version));
+}
+
+=head1 _write_feature_25()
+
+write a feature in GFF v2.5 (aka GTF) format.
+
+=cut
+
+sub _write_feature_25 {
+  my($self,$feature,$group) = @_;
+
+  #the top-level feature is an aggregate of all subfeatures
+  if(!defined($group)){
+    $group = ($feature->get_Annotations('ID'))[0]->value;
+  }
+
+  my $seq    = $feature->seq_id->value;
+  my $source = $feature->source->value;
+  my $type   = $feature->type->name;
+  $type = 'EXON' if $type eq 'exon'; #a GTF peculiarity, incosistent with the sequence ontology.
+  my $min    = $feature->start   || '.';
+  my $max    = $feature->end     || '.';
+  my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.';
+  my $score  = $feature->score->value;
+  my $phase  = $feature->phase->value;
+
+  #these are the only valid types in a GTF document
+  if($type eq 'EXON' or $type eq 'CDS' or $type eq 'start_codon' or $type eq 'stop_codon'){
+    my $attr = sprintf('gene_id "%s"; transcript_id "%s";',$group,$group);
+    my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+                            $seq,$source,$type,$min,$max,$score,$strand,$phase,$attr);
+
+    $self->_print($outstring);
+  }
+
+  foreach my $subfeat ($feature->get_SeqFeatures){
+    $self->_write_feature_25($subfeat,$group);
+  }
+}
+
+=head1 _write_feature_3()
+
+write a feature in GFF v3 format.
+
+=cut
+
+sub _write_feature_3 {
+  my($self,$feature) = @_;
+  my $seq    = $feature->seq_id->value;
+  my $source;
+  if ($feature->source()) {
+    $source = $feature->source->value;
+  }
+  else {
+    $source = $feature->source() || "unknownsource";
+  }
+  my $type;
+  if ($feature->type()) { $type = $feature->type->name; }
+  else { $type = "region"; }
+  my $min    = $feature->start   || '.';
+  my $max    = $feature->end     || '.';
+  my $strand = $feature->strand == 1 ? '+' : $feature->strand == -1 ? '-' : '.';
+  my $score  = $feature->score->value;
+  my $phase  = $feature->phase->value;
+
+  my @attr;
+  if(my @v = ($feature->get_Annotations('Name'))){
+    my $vstring = join ',', map {uri_escape($_->value)} @v;
+    push @attr, "Name=$vstring";
+  }
+  if(my @v = ($feature->get_Annotations('ID'))){
+    my $vstring = join ',', map {uri_escape($_->value)} @v;
+    push @attr, "ID=$vstring";
+    $self->throw('GFF3 features may have at most one ID, feature with these IDs is invalid:\n'.$vstring) if scalar(@v) > 1;
+  }
+  if(my @v = ($feature->get_Annotations('Parent'))){
+    my $vstring = join ',', map {uri_escape($_->value)} @v;
+    push @attr, "Parent=$vstring";
+  }
+  if(my @v = ($feature->get_Annotations('dblink'))){
+    my $vstring = join ',', map {uri_escape($_->database .':'. $_->primary_id)} @v;
+    push @attr, "Dbxref=$vstring";
+  }
+  if(my @v = ($feature->get_Annotations('ontology_term'))){
+    my $vstring = join ',', map {uri_escape($_->identifier)} @v;
+    push @attr, "Ontology_term=$vstring";
+  }
+  if(my @v = ($feature->get_Annotations('comment'))){
+    my $vstring = join ',', map {uri_escape($_->text)} @v;
+    push @attr, "Note=$vstring";
+  }
+  if(my @v = ($feature->get_Annotations('Target'))){
+    my %strand_map = ( 1=>'+', 0=>'', -1=>'-', '+' => '+', '-' => '-' );
+    my $vstring = join ',', map {
+      uri_escape($_->target_id).' '.$_->start.' '.$_->end.(defined $_->strand ? ' '.$strand_map{$_->strand} : '')
+    } @v;
+    push @attr, "Target=$vstring";
+  }
+
+  my $attr = join ';', @attr;
+
+  my $outstring = sprintf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+                          $seq,$source,$type,$min,$max,$score,$strand,$phase,$attr);
+
+  $self->_print($outstring);
+
+  foreach my $subfeat ($feature->get_SeqFeatures){
+    $self->_write_feature_3($subfeat);
+  }
+}
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gtf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gtf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/gtf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,73 @@
+# $Id: gtf.pm,v 1.4.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::FeatureIO::gtf
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::FeatureIO::gtf - read write features in GTF format
+
+=head1 SYNOPSIS
+
+L<Bio::FeatureIO::gff>
+
+=head1 DESCRIPTION
+
+GTF, is also known as GFF v2.5.  This class is simply a subclass
+of Bio::FeatureIO::gff that initializes with -version =E<gt> 2.5.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::FeatureIO::gtf;
+use base qw(Bio::FeatureIO::gff);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+sub initialize {
+  my($self,%arg) = @_;
+  $arg{-version} = 2.5;
+  $self->SUPER::_initialize(%arg);
+  return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/interpro.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/interpro.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/interpro.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,206 @@
+
+=head1 NAME
+
+Bio::FeatureIO::interpro - read features from InterPro XML
+
+=head1 SYNOPSIS
+
+  my $in = Bio::FeatureIO(-format=>'interpro');
+  while (my $feat = $in->next_feature) {
+    # do something with the Bio::SeqFeatureI object
+  }
+
+=head1 DESCRIPTION
+
+See L<http://www.ebi.ac.uk/interpro/documentation.html>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::FeatureIO::interpro;
+
+use strict;
+use base qw(Bio::FeatureIO);
+use Bio::SeqFeature::Annotated;
+use Bio::OntologyIO;
+
+use Bio::Annotation::Comment;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::OntologyTerm;
+use Bio::Annotation::SimpleValue;
+use Bio::Annotation::Target;
+
+use URI::Escape;
+use XML::DOM;
+use XML::DOM::XPath;
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  $self->SUPER::_initialize(%arg);
+  $self->xml_parser(XML::DOM::Parser->new());
+  my $buf;
+  while(($buf = $self->_readline()) && $buf !~ /<protein/){
+    next;
+  }
+  $self->_pushback($buf);
+}
+
+sub next_feature {
+  my $self =shift;
+  my $buf;    #line buffer
+  my $ok = 0; #true if there is another <protein/> record in stream
+  my $record; #holds the record to be parsed and returned.
+
+  #try to dump buffer from last record before moving on to next record
+  my $f = $self->_shift_feature_buffer();
+  if($f){
+    return $f;
+  }
+
+  while(my $buf = $self->_readline()){
+    $ok = 1 if $buf =~ m!<protein!;
+    $record .= $buf;
+    last if $buf =~ m!</protein>!;
+  }
+  return unless $ok;
+
+  my $dom = $self->xml_parser->parse($record);
+
+
+  my ($pNode) = $dom->findnodes('/protein');
+
+  my @iNodes = $pNode->findnodes('/protein/interpro');
+
+  foreach my $iNode (@iNodes){
+    my @cNodes = $iNode->findnodes('classification');
+    my @mNodes = $iNode->findnodes('match');
+
+    #we don't handle these
+    #my @nNodes = $iNode->findnodes('contains');
+    #my @fNodes = $iNode->findnodes('found_in');
+
+    foreach my $mNode (@mNodes){
+      my @lNodes = $mNode->findnodes('location');
+      foreach my $lNode (@lNodes){
+        my $feature = Bio::SeqFeature::Annotated->new(
+                                                      -start  => $lNode->getAttribute('start'),
+                                                      -end    => $lNode->getAttribute('end'),
+                                                      -score  => $lNode->getAttribute('score'),
+#                                                      -seq_id => $pNode->getAttribute('id'),
+                                                     );
+        $feature->seq_id->value($pNode->getAttribute('id'));
+
+#warn $pNode->getAttribute('id');
+
+        $feature->source( $lNode->getAttribute('evidence') );
+
+        my $t = Bio::Annotation::OntologyTerm->new(-identifier => 'SO:0000417', -name => 'polypeptide_domain');
+        $feature->add_Annotation('type',$t);
+
+        my $c = Bio::Annotation::Comment->new(-tagname => 'comment', -text => $iNode->getAttribute('name'));
+        $feature->add_Annotation($c);
+
+        my $d = Bio::Annotation::DBLink->new();
+        $d->database($mNode->getAttribute('dbname'));
+        $d->primary_id($mNode->getAttribute('id'));
+        $d->optional_id($mNode->getAttribute('name'));
+        $feature->annotation->add_Annotation('dblink',$d);
+
+        my $s = Bio::Annotation::SimpleValue->new(-tagname => 'status', -value => $lNode->getAttribute('status'));
+        $feature->annotation->add_Annotation($s);
+
+        foreach my $cNode (@cNodes){
+          my $o = Bio::Annotation::OntologyTerm->new(-identifier => $cNode->getAttribute('id'));
+          $feature->annotation->add_Annotation('ontology_term',$o);
+        }
+
+        $self->_push_feature_buffer($feature);
+      }
+    }
+  }
+
+  return $self->_shift_feature_buffer;
+}
+
+=head2 _push_feature_buffer()
+
+ Usage   :
+ Function:
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _push_feature_buffer {
+  my ($self,$f) = @_;
+
+  if(ref($f)){
+    push @{ $self->{feature_buffer} }, $f;
+  }
+}
+
+=head2 _shift_feature_buffer()
+
+ Usage   :
+ Function:
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _shift_feature_buffer {
+  my ($self) = @_;
+  return $self->{feature_buffer} ? shift @{ $self->{feature_buffer} } : undef;
+}
+
+=head2 xml_parser()
+
+ Usage   : $obj->xml_parser($newval)
+ Function: 
+ Example : 
+ Returns : value of xml_parser (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub xml_parser {
+  my($self,$val) = @_;
+  $self->{'xml_parser'} = $val if defined($val);
+  return $self->{'xml_parser'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/ptt.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/ptt.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO/ptt.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,233 @@
+=pod
+
+=head1 NAME
+
+Bio::FeatureIO::ptt - read/write features in PTT format
+
+=head1 SYNOPSIS
+
+ # read features 
+ my $fin = Bio::FeatureIO->new(-file=>'genes.ptt', -format=>'ptt');
+ my @cds;
+ while (my $f = $fin->next_feature) {
+   push @cds, $f if $f->strand > 0;
+ }
+
+ # write features (NOT IMPLEMENTED)
+ my $fout = Bio::FeatureIO->new(-fh=>\*STDOUT, -format=>'ptt');
+ for my $f (@cds) {
+   $fout->write_feature($f);
+ }
+
+=head1 DESCRIPTION
+
+The PTT file format is a table of protein features. 
+It is used mainly by NCBI who produce PTT files for 
+all their published genomes found in L<ftp://ftp.ncbi.nih.gov/genomes/>.
+It has the following format:
+
+==over 4
+
+=item Line 1
+
+Description of sequence to which the features belong
+ eg. "Leptospira interrogans chromosome II, complete sequence - 0..358943"
+
+It is usually equivalent to the DEFINITION line of a Genbank file,
+with the length of the sequence appended. It is unclear why "0" is 
+used as a starting range, it should be "1".
+
+=item Line 2
+
+Number of feature lines in the table
+ eg. "367 proteins"
+
+=item Line 3
+
+Column headers, tab separated
+ eg. "Location  Strand  Length  PID Gene  Synonym Code  COG Product"
+
+ Location : "begin..end" span of feature
+ Strand   : "+" or "-"
+ Length   : number of amino acids excluding the stop codon
+ PID      : analogous to Genbank /db_xref="GI:xxxxxxxxx"
+ Gene     : analogous to Genbank /gene="xxxx"
+ Synonym  : analogous to Genbank /locus_tag="xxxx"
+ Synonym  : analogous to Genbank /locus_tag="xxxx"
+ COG      : CDD COG code with COG letter categories appended
+ Product  : analogous to Genbank /product="xxxx"
+
+=item Line 4 onwards
+
+Feature lines, nine columns, tab separated, "-" used for empty fields
+ eg. "2491..3423  + 310 24217063  metF  LB002 - COG0685E  5,10-methylenetetrahydrofolate reductase"
+
+
+==back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Torsten Seemann
+
+Email torsten.seemann AT infotech.monash.edu.au
+
+=head1 CONTRIBUTORS
+
+Based on bed.pm and gff.pm by Allen Day.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::FeatureIO::ptt;
+
+require 5.006_001;
+
+use strict;
+use base qw(Bio::FeatureIO);
+use Bio::SeqFeature::Generic;
+
+# map tab-separated column number to field name
+our %NAME_OF = (
+  0 => 'Location',
+  1 => 'Strand',
+  2 => 'Length', 
+  3 => 'PID', 
+  4 => 'Gene',  
+  5 => 'Synonym',
+  6 => 'Code',  
+  7 => 'COG', 
+  8 => 'Product',
+);
+our $NUM_COL = 9;
+
+=head2 _initialize
+
+ Title   : _initialize
+ Function: Reading? parses the header of the input
+           Writing? 
+
+=cut
+
+sub _initialize {
+  my($self,%arg) = @_;
+
+  $self->SUPER::_initialize(%arg);
+
+  if ($self->mode eq 'r') {
+    # Line 1
+    my $desc = $self->_readline();
+    chomp $desc;
+    $self->description($desc);
+    # Line 2
+    my $line = $self->_readline();
+    $line =~ m/^(\d+) proteins/ or $self->throw("Invalid protein count");
+    $self->protein_count($1);
+    # Line 3
+    $self->_readline();
+  }
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $io->next_feature()
+ Function: read the next feature from the PTT file
+ Example : 
+ Args    : 
+ Returns : Bio::SeqFeatureI object
+
+=cut
+
+sub next_feature {
+  my $self = shift;
+  $self->mode eq 'r' || return; # returns if can't read next_feature when we're in write mode
+  
+  my $line = $self->_readline() or return; # returns if end of file, no more features?
+  chomp $line;
+  my @col = split m/\t/, $line;
+  @col==$NUM_COL or $self->throw("Too many columns for PTT line");
+
+  $col[0] =~ m/(\d+)\.\.(\d+)/ or $self->throw("Invalid location (column 1)");
+  my $feat = Bio::SeqFeature::Generic->new(-start=>$1, -end=>$2, -primary=>'CDS');
+  $col[1] =~ m/^([+-])$/ or $self->throw("Invalid strand (column 2)");
+  $feat->strand($1 eq '+' ? +1 : -1);
+  for my $i (2 .. $NUM_COL-1) {
+    $feat->add_tag_value($NAME_OF{$i}, $col[$i]) if $col[$i] ne '-';
+  }
+  return $feat;
+}
+
+=head2 write_feature (NOT IMPLEMENTED)
+
+ Title   : write_feature
+ Usage   : $io->write_feature($feature)
+ Function: write a Bio::SeqFeatureI object in PTT format
+ Example : 
+ Args    : Bio::SeqFeatureI object
+ Returns : 
+
+=cut
+
+sub write_feature {
+  shift->throw_not_implemented;
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: set/get the PTT file description for/from line one
+ Example : 
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub description {
+  my $self = shift;
+  return $self->{'description'} = shift if @_;
+  return $self->{'description'};
+}
+
+=head2 protein_count
+
+ Title   : protein_count
+ Usage   : $obj->protein_count($newval)
+ Function: set/get the PTT protein count for/from line two
+ Example : 
+ Args    : on set, new value (a scalar or undef, optional)
+ Returns : value of protein_count (a scalar)
+
+=cut
+
+sub protein_count {
+  my $self = shift;
+  return $self->{'protein_count'} = shift if @_;
+  return $self->{'protein_count'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/FeatureIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,506 @@
+# $Id: FeatureIO.pm,v 1.15.4.3 2006/10/10 03:40:09 jason Exp $
+#
+# BioPerl module for Bio::FeatureIO
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::FeatureIO - Handler for FeatureIO
+
+=head1 SYNOPSIS
+
+  use Bio::FeatureIO;
+
+  #read from a file
+  $in  = Bio::FeatureIO->new(-file => "my.gff" , -format => 'GFF');
+
+  #read from a filehandle
+  $in  = Bio::FeatureIO->new(-fh => \*GFF , -format => 'GFF');
+
+  #read features already attached to a sequence
+  my $feat = Bio::FeatureIO->new(-seq => $seq , -format => 'features');
+
+  #read new features for existing sequence
+  my $seq = Bio::FeatureIO->new(-seq => $seq , -format => 'Das');
+
+  #write out features
+  $out = Bio::FeatureIO->new(-file    => ">outputfilename" ,
+                             -format  => 'GFF' ,
+                             -version => 3);
+
+  while ( my $feature = $in->next_feature() ) {
+    $out->write_feature($feature);
+  }
+
+=head1 DESCRIPTION
+
+An I/O iterator subsystem for genomic sequence features.
+
+Bio::FeatureIO is a handler module for the formats in the FeatureIO set (eg,
+Bio::FeatureIO::GFF). It is the officially sanctioned way of getting at
+the format objects, which most people should use.
+
+The Bio::FeatureIO system can be thought of like biological file handles.
+They are attached to filehandles with smart formatting rules (eg,
+GFF format, or BED format) and 
+can either read or write feature objects (Bio::SeqFeature objects, or
+more correctly, Bio::FeatureHolderI implementing objects, of which 
+Bio::SeqFeature is one such object). If you want to know what to 
+do with a Bio::SeqFeatureI object, read L<Bio::SeqFeatureI>.
+
+The idea is that you request a stream object for a particular format.
+All the stream objects have a notion of an internal file that is read
+from or written to. A particular FeatureIO object instance is configured
+for either input or output. A specific example of a stream object is
+the Bio::FeatureIO::gff object.
+
+Each stream object has functions:
+
+  $stream->next_feature();
+  $stream->write_feature($feature);
+
+=head1 SUPPORTED FORMATS
+
+ name                         module
+ -----------------------------------
+ BED                          bed.pm
+ GFF                          gff.pm
+ GTF                          gtf.pm
+ InterPro (IPRScan 4.0)  interpro.pm
+ PTT (NCBI protein table)     ptt.pm
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::FeatureIO-E<gt>new()
+
+   $featureIO = Bio::FeatureIO->new(-file => 'filename',   -format=>$format);
+   $featureIO = Bio::FeatureIO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $featureIO = Bio::FeatureIO->new(-seq  => $seq,         -format=>$format);
+
+The new() class method constructs a new Bio::FeatureIO object.  The
+returned object can be used to retrieve or print Seq objects. new()
+accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $featio = Bio::FeatureIO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+A string filehandle is handy if you want to modify the output in the
+memory, before printing it out. The following program reads in EMBL
+formatted entries from a file and prints them out in fasta format with
+some HTML tags:
+
+  use Bio::FeatureIO;
+  use IO::String;
+  my $in  = Bio::FeatureIO->new('-file' => "my.gff" ,
+  			    '-format' => 'EMBL');
+  while ( my $f = $in->next_feature() ) {
+      # the output handle is reset for every file
+      my $stringio = IO::String->new($string);
+      my $out = Bio::FeatureIO->new('-fh' => $stringio,
+  			        '-format' => 'gtf');
+      # output goes into $string
+      $out->write_feature($f);
+      # modify $string
+      $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
+      # print into STDOUT
+      print $string;
+  }
+
+=item -format
+
+Specify the format of the file.  See above for list of supported formats
+
+=item -flush
+
+By default, all files (or filehandles) opened for writing sequences
+will be flushed after each write_seq() (making the file immediately
+usable).  If you don't need this facility and would like to marginally
+improve the efficiency of writing multiple sequences to the same file
+(or filehandle), pass the -flush option '0' or any other value that
+evaluates as defined but false:
+
+  my $f1 = new Bio::FeatureIO -file   => "<a.f1",
+                              -format => "f1";
+  my $f2 = new Bio::FeatureIO -file   => ">a.f2",
+                              -format => "f2",
+                              -flush  => 0; # go as fast as we can!
+
+  while($feature = $f1->next_feature) { $f2->write_feature($feature) }
+
+=back
+
+=head2 Bio::FeatureIO-E<gt>newFh()
+
+   $fh = Bio::FeatureIO->newFh(-fh   => \*FILEHANDLE, -format=>$format);
+   $fh = Bio::FeatureIO->newFh(-format => $format);
+   # etc.
+
+This constructor behaves like new(), but returns a tied filehandle
+rather than a Bio::FeatureIO object.  You can read sequences from this
+object using the familiar E<lt>E<gt> operator, and write to it using
+print().  The usual array and $_ semantics work.  For example, you can
+read all sequence objects into an array like this:
+
+  @features = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and printf()
+are not supported.
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $feature = $featureIO-E<gt>next_feature()
+
+Fetch the next feature from the stream.
+
+=head2 $featureIO-E<gt>write_feature($feature [,$another_feature,...])
+
+Write the specified feature(s) to the stream.
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#' Let the code begin...
+
+package Bio::FeatureIO;
+
+use strict;
+
+use Symbol();
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::FeatureIO->new(-file => $filename, -format => 'Format')
+ Function: Returns a new feature stream
+ Returns : A Bio::FeatureIO stream initialised with the appropriate format
+ Args    : Named parameters:
+             -file => $filename
+             -fh => filehandle to attach to
+             -format => format
+
+=cut
+
+my $entry = 0;
+
+sub new {
+  my ($caller, at args) = @_;
+  my $class = ref($caller) || $caller;
+
+  # or do we want to call SUPER on an object if $caller is an
+  # object?
+  if( $class =~ /Bio::FeatureIO::(\S+)/ ) {
+
+    my ($self) = $class->SUPER::new(@args);	
+    $self->_initialize(@args);
+    return $self;
+
+  } else {
+
+	my %param = @args;
+
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} ||
+      $class->_guess_format( $param{-file} || $ARGV[0] );
+	
+	if( ! $format ) {
+      if ($param{-file}) {
+        $format = $class->_guess_format($param{-file});
+      } elsif ($param{-fh}) {
+        $format = $class->_guess_format(undef);
+      }
+	}
+	$format = "\L$format";	# normalize capitalization to lower case
+	return unless( $class->_load_format_module($format) );
+	return "Bio::FeatureIO::$format"->new(@args);
+
+  }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
+           $feature = <$fh>;   # read a feature object
+           print $fh $feature; # write a feature object
+ Returns : filehandle tied to the Bio::FeatureIO::Fh class
+ Args    :
+
+See L<Bio::FeatureIO::Fh>
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $feature = <$fh>;   # read a feature object
+           print $fh $feature; # write a feature object
+ Returns : filehandle tied to Bio::FeatureIO class
+ Args    : none
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+# _initialize is chained for all FeatureIO classes
+
+sub _initialize {
+    my($self, %arg) = @_;
+
+    # flush is initialized by the Root::IO init
+
+    # initialize the IO part
+    $self->seq($arg{-seq});
+    $self->_initialize_io(%arg);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $feature = stream->next_feature
+ Function: Reads the next feature object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream
+           that are either misformatted or that use syntax not yet
+           understood by the driver. If such an incident is
+           recoverable, e.g., by dismissing a feature of a feature
+           table or some other non-mandatory part of an entry, the
+           driver will issue a warning. In the case of a
+           non-recoverable situation an exception will be thrown.  Do
+           not assume that you can resume parsing the same stream
+           after catching the exception. Note that you can always turn
+           recoverable errors into exceptions by calling
+           $stream->verbose(2).
+
+ Returns : a Bio::SeqFeatureI feature object
+ Args    : none
+
+See L<Bio::Root::RootI>, L<Bio::SeqFeatureI>
+
+=cut
+
+sub next_feature {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::FeatureIO object.");
+}
+
+=head2 write_feature
+
+ Title   : write_feature
+ Usage   : $stream->write_feature($feature)
+ Function: writes the $feature object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::SeqFeature object
+
+=cut
+
+sub write_feature {
+    my ($self, $seq) = @_;
+    if(ref($self) eq __PACKAGE__){
+      $self->throw("Sorry, you cannot write to a generic Bio::FeatureIO object.");
+    } else {
+      $self->throw_not_implemented();
+    }
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL FeatureIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+    my ($self, $format) = @_;
+    my $class = ref($self) || $self;
+    my $module = $class."::$format";#"Bio::Feature::" . $format;
+    my $ok;
+
+    eval {
+	$ok = $self->_load_module($module);
+    };
+    if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the FeatureIO system please see the FeatureIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $obj->seq() OR $obj->seq($newSeq)
+ Example :
+ Returns : Bio::SeqI object
+ Args    : newSeq (optional)
+
+=cut
+
+sub seq {
+  my $self = shift;
+  my $val = shift;
+
+  $self->{'seq'} = $val if defined($val);
+  return $self->{'seq'};
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function: This method is deprecated. Call _fh() instead.
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filehandle {
+    my ($self, at args) = @_;
+    return $self->_fh(@args);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function: guess format based on file suffix
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+ Notes   : See "SUPPORTED FORMATS"
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'gff'     if /\.gff3?$/i;
+   return 'gff'     if /\.gtf$/i;
+   return 'bed'     if /\.bed$/i;
+   return 'ptt'     if /\.ptt$/i;
+
+   return 'gff'; #the default
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'featio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'featio'}->next_feature() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'featio'}->next_feature();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'featio'}->write_feature(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/Edge.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/Edge.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/Edge.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,149 @@
+# $Id: Edge.pm,v 1.11.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::Edge
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::Edge - encapsulation of an interaction between 2 Bio::Seq objects
+
+=head1 SYNOPSIS
+
+  ## get an interaction between two nodes ##
+
+  my $edge  = $gr->edge( $gr->nodes_by_id('P12345'),
+                         $gr->nodes_by_id('P23456'));
+  my $id    = $edge->object_id();
+  my $wt    = $edge->weight();
+  my @nodes = $edge->nodes();
+
+=head1 DESCRIPTION
+
+This class contains information about a bimolecular interaction.
+At present it just contains data about its component node, a weight
+(if set) and an identifier. Subclasses could hold more specific 
+information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Richard Adams
+
+Email richard.adams at ed.ac.uk
+
+=cut
+
+use strict;
+package Bio::Graph::Edge;
+use base qw(Bio::Root::Root Bio::IdentifiableI);
+
+
+=head2 new
+
+ Name       : new
+ Purpose    : constructor for an edge object
+ Usage      : my $edge = Bio::Graph::Edge->new(nodes => [$node1,$node2]
+                                               id => $id);
+              $graph->add_edge($edge);
+ Returns    : a new Bio::Graph::Edge object 
+ Arguments  : hash nodes            => array reference of 2 nodes
+                   id               => edge id
+                   weight(optional) => weight score.
+
+=cut
+
+sub new {
+      ##array based, not hash based ##..., therefore does not use 
+      #Bio::Root::Root->new().
+
+	my ($caller, @args) = @_;
+	my $class  = ref ($caller) || $caller;
+	my $self    = [];
+	bless ($self, $class);
+
+	my ($weight, $id, $nodes) = $self->_rearrange([qw( WEIGHT ID NODES)], @args);
+	$self->[0] = $nodes->[0];
+	$self->[1] = $nodes->[1];
+	$self->[2] = defined($weight)?$weight:undef; 
+	$self->[3] = defined($id)?$id:undef; 
+	return $self;
+
+}
+
+=head2 weight
+
+ Name      : weight
+ Purpose   : get/setter for weight score
+ Usage     : my $weight = $edge->weight();
+ Returns   : anumber
+ Arguments : void/ a number
+
+=cut
+
+sub weight {
+	my $self = shift;
+	if (@_) {$self->[2] = shift;}
+	return defined($self->[2])?$self->[2]:undef;
+}
+
+=head2 object_id
+
+ Name      : object_id
+ Purpose   : get/setter for object_id
+ Usage     : my $id = $edge->object_id();
+ Returns   : a string identifier
+ Arguments : void/ an identifier 
+
+=cut
+
+sub object_id {
+	my $self            = shift;
+	if (@_) {
+		my $v  = shift;
+		if (ref ($v)) {
+			$self->throw ("Edge ID must be a text value, not a [".
+							ref($v). "].");
+			} 
+		$self->[3] = shift;
+	}
+	return defined($self->[3])?$self->[3]:undef;
+}
+
+=head2  nodes
+
+ Name      : nodes
+ Purpose   : get/setter for nodes
+ Usage     : my @nodes = $edge->nodes();
+ Returns   : a 2 element list of nodes /void
+ Arguments : void/ a 2 element list of nodes. 
+
+=cut
+
+sub nodes {
+	my ($self, @args) = @_;
+	if (@args >= 2 ) {
+		$self->[0] =  $args[0];
+		$self->[1] =  $args[1];
+		}
+	return ($self->[0], $self->[1]);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/dip.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/dip.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/dip.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,202 @@
+# $Id: dip.pm,v 1.14.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::IO::dip
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::IO::dip - class for parsing interaction data in dip format
+
+=head1 SYNOPSIS
+
+Do not use this module directly, use Bio::Graph::IO, for example:
+
+  my $graph_io = Bio::Graph::IO->new(-format => 'dip',
+                                     -file   => 'data.dip');
+
+=head1 METHODS
+
+The naming system is analagous to the SeqIO system, although usually
+next_network() will be called only once per file.
+
+=cut
+
+package Bio::Graph::IO::dip;
+use vars qw($FAC);
+
+use Bio::Graph::ProteinGraph;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Collection;
+use Bio::Graph::Edge;
+use strict;
+use base qw(Bio::Graph::IO);
+
+BEGIN{
+	$FAC = Bio::Seq::SeqFactory->new(-type=>'Bio::Seq::RichSeq');
+}
+
+=head2        next_network
+
+  name        : next_network
+  purpose     : parses a graph file and returns a Bio::Graph::ProteinGraph 
+                object
+  usage       : my $g = $graph_io->next_network();
+  arguments   : none
+  returns     : a Bio::Graph::ProteinGraph object
+
+=cut
+
+sub next_network {
+
+	my $self = shift;
+	my %seen_nodes = ();
+	my $graph = new Bio::Graph::ProteinGraph();
+
+	while (my $l = $self->_readline() ) {
+
+		##get line, only gi and node_id always defined
+		my ($edge_id, $n1, $s1, $p1, $g1, $n2, $s2, $p2, $g2, $score) =
+	 	$l =~/^DIP:(\d+E)\t+
+			(DIP\S+)\t+
+			(SWP\S+)?\t*
+			(PIR\S+)?\t*
+			(GI\S+)\t+
+			(DIP\S+)\t+
+			(SWP\S+)?\t*
+			(PIR\S+)?\t*
+			(GI\S+)\t*
+			(\d\.\d+)? #optional confidence  or weight score 
+			/x;
+
+		## skip if score is below threshold
+		if (defined($self->{'_th'}) && defined($score)) {
+			next unless $score >= $self->{'_th'};
+		}
+
+	   ## build node object if is new node
+	   my ($node1, $node2);
+	 	$n1 =~ s/DIP://;  
+	   if(!exists($seen_nodes{$n1}) ) {
+			if($g1){ $g1 =~ s/GI://;     }
+			if($p1){ $p1 =~ s/PIR://;  }
+			if($s1){ $s1 =~ s/SWP://;  }
+			my $acc = $s1 || $p1 || $g1;
+			my $ac  = $self->_add_db_links($acc, $s1, $p1,  $n1, $g1);
+			$node1 = $FAC->create(
+								-accession_number => $acc,
+								-primary_id       => $g1,
+								-display_id		  => $acc,
+								-annotation       => $ac,
+								);		
+			for my $n ($g1, $p1, $s1, $n1) {
+				$seen_nodes{$n} = $node1 if $n;
+				}
+			} else {
+			$node1 = $seen_nodes{$n1};
+		}
+	 	$n2 =~ s/DIP://;  
+		if(!exists($seen_nodes{$n2}) ) {
+			if($g2){$g2 =~ s/GI://; }
+			if($p2){$p2 =~ s/PIR://; }
+			if($s2){$s2 =~ s/SWP://; }
+			my $acc = $s2 || $p2 || $g2;
+			my $ac  = $self->_add_db_links($acc, $s2, $p2,  $n2, $g2);
+			$node2  = $FAC->create(
+								-accession_number => $acc,
+								-primary_id       => $g2,
+								-display_id		  => $acc,
+								-annotation       => $ac,
+								);		
+			for my $n ($g2, $p2, $s2, $n2) {
+				$seen_nodes{$n} = $node2 if $n;
+				}
+		  } else {
+			$node2 = $seen_nodes{$n2};
+		}
+
+		## create new edge object based on node, weight. 
+		$graph->add_edge(Bio::Graph::Edge->new( -nodes  => [$node1, $node2],
+									-weight => $score,
+									-id     => $edge_id),
+									);
+	}
+
+	## now ensure nodes are accessible by either 1ary or 2ndary ids. 
+	$graph->{'_id_map'} = \%seen_nodes;
+	return $graph;
+}
+
+=head2      write_network
+
+ name     : write_network
+ purpose  : write graph out in dip format
+ arguments: a Bio::Graph::ProteinGraph object
+ returns  : void
+ usage    : $out->write_network($gr);
+
+=cut
+
+sub write_network {
+
+my ($self, $gr) = @_;
+if (!$gr || !$gr->isa('Bio::Graph::ProteinGraph')) {
+	$self->throw("I need a Bio::Graph::ProteinGraph, not a [".
+	              ref($gr) . "]");
+	  }
+my @edges = $gr->edges();
+
+# need to have all ids as annotations with database ids as well
+# idea is to be able to round trip, to write it in same way as 
+
+#for each edge	
+for my $edge (@edges) {
+	my $str  = "DIP:" .$edge->object_id(). "\t"; #output string
+	my @nodes = $edge->nodes();
+
+	# add node ids to string in correct order
+	for my $n (@nodes){
+
+	    # print out nodes in dip order
+		my %ids = $gr->_get_ids_by_db($n); #need to modify this in graph()
+		for my $db (qw(DIP SWP PIR GI Ref-Seq RefSeq psixml ens)){
+			if (exists($ids{$db})){
+				$str .= "$db:$ids{$db}\t";
+			} else {
+				$str .= "\t";
+			}
+		}
+	}
+	# add weights if defined
+	$str =~ s/\t$//;
+	if(defined($edge->weight)) {
+		$str .= "\t" .$edge->weight. "\n";
+		}else {
+		$str .= "\n";
+	}
+	$self->_print($str);
+ }# next edge
+$self->flush();
+}
+
+sub _add_db_links {
+	my ($self, $acc, $s1, $p1,  $n1, $g1) = @_;
+	my %ids;
+	$ids{'PIR'} = $p1 if $p1;
+	$ids{'SWP'} = $s1 if $s1;
+	$ids{'DIP'} = $n1 if $n1;
+	$ids{'GI'}  = $g1 if $g1;
+	my $ac = Bio::Annotation::Collection->new();
+	for my $db (keys %ids) {
+		#next if  $ids{$db}  eq $acc;
+		my $an = Bio::Annotation::DBLink->new( -database   => $db,
+											   -primary_id => $ids{$db},
+											);
+		$ac->add_Annotation('dblink', $an);
+	}
+	return $ac;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/psi_xml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/psi_xml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO/psi_xml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,235 @@
+# $Id: psi_xml.pm,v 1.13.2.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::IO::psi_xml
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::IO::psi_xml - access and manipulate PSI XML graphs
+
+=head1 SYNOPSIS
+
+Do not use this module directly, use Bio::Graph::IO, for example:
+
+  my $graph_io = Bio::Graph::IO->new(-format => 'psi_xml',
+                                     -file   => 'data.xml');
+
+=head1 DESCRIPTION
+
+PSI XML is a format to describe protein-protein interactions and 
+interaction networks. The following databases support PSI XML:
+
+=over 4
+
+=item BIND  
+
+L<http://www.bind.ca>
+
+=item DIP 
+
+L<http://dip.doe-mbi.ucla.edu/>
+
+=item HPRD    
+
+L<http://www.hprd.org>
+
+=item IntAct  
+
+L<http://www.ebi.ac.uk/intact>
+
+=item MINT    
+
+L<http://cbm.bio.uniroma2.it/mint/>
+
+=back 
+
+Notes on PSI XML from various databases can be found in the Bioperl Wiki
+at L<http://bioperl.org/wiki/Module:Bio::Graph::IO::psi_xml>
+
+Documentation for PSI XML can be found at L<http://psidev.sourceforge.net>.
+ 
+=head1 METHODS
+
+The naming system is analagous to the SeqIO system, although usually
+L<next_network()> will be called only once per file.
+
+=cut
+
+package Bio::Graph::IO::psi_xml;
+use strict;
+use XML::Twig;
+use Bio::Seq::SeqFactory;
+use Bio::Graph::ProteinGraph;
+use Bio::Graph::Edge;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Collection;
+use Bio::Species;
+use vars qw(%species $g $c $fac);
+use base qw(Bio::Graph::IO);
+
+BEGIN{
+	$fac  = Bio::Seq::SeqFactory->new(-type => 'Bio::Seq::RichSeq');
+}
+
+#parsing done by XML::Twig, not by RootIO, therefore override usual new
+sub new {
+	my ($class, at args) = @_;
+	my $self = bless {}, $class;
+	$self->_initialize(@args);
+	return $self;
+}
+
+sub _initialize  {
+
+  my($self, at args) = @_;
+  return unless $self->SUPER::_initialize_io(@args);
+
+}
+
+=head2     next_network
+
+ name       : next_network
+ purpose    : to construct a protein interaction graph from xml data
+ usage      : my $gr = $io->next_network();
+ arguments  : void
+ returns    : A Bio::Graph::ProteinGraph object
+
+=cut
+
+sub next_network {
+
+ my $self = shift;
+ $g = Bio::Graph::ProteinGraph->new(); ## bugfix, now is reset each time
+ my $t    = XML::Twig->new
+	(  TwigHandlers => {
+							  proteinInteractor   => \&_proteinInteractor,
+							  interaction         => \&_addEdge
+							 });
+  $t->parsefile($self->file);
+ return $g;
+}
+
+=head2   _proteinInteractor
+
+ name      : _proteinInteractor
+ purpose   : parses protein information into Bio::Seq::RichSeq objects
+ returns   : void
+ usage     : internally called by next_network(), 
+ arguments : none.
+
+=cut
+
+sub _proteinInteractor {
+
+	my ($twig, $pi) = @_;
+
+	my ($acc, $sp, $desc, $taxid,  $prim_id);
+
+	my $org =  $pi->first_child('organism');
+	$taxid  = $org->att('ncbiTaxId');
+
+	## just make new species object if doesn't already exist ##
+	if (!exists($species{$taxid})) {
+		my $common     =  $org->first_child('names')->first_child('shortLabel')->text;
+		my $full       =  $org->first_child('names')->first_child('fullName')->text;
+		my ($gen, $sp) = $full =~ /(\S+)\s+(.+)/;
+		my $sp_obj     = Bio::Species->new(-ncbi_taxid     => $taxid,
+											-classification => [$sp, $gen],
+											-common_name    => $common
+										   );
+		$sp_obj->name('scientific', $full);
+		$species{$taxid} = $sp_obj;
+        print "species parse error $@" if $@;
+      }
+      
+
+	## next extract sequence id info ##
+	my @ids          = $pi->first_child('xref')->children();
+	my %ids          = map {$_->att('db'), $_->att('id')} @ids;
+	$ids{'psixml'}  = $pi->att('id');
+
+	$prim_id = defined ($ids{'GI'}) ?  $ids{'GI'} : '';
+	$acc = $ids{'RefSeq'} || 
+	       $ids{'SWP'} || 
+			 $ids{'Swiss-Prot'} || # db name from HPRD
+			 $ids{'Ref-Seq'} ||    # db name from HPRD
+			 $ids{'GI'} || 
+			 $ids{'PIR'} ||
+			 $ids{'intact'} ||     # db name from IntAct
+			 $ids{'psi-mi'};       # db name from IntAct
+
+	## get description line - certain files, like PSI XML from HPRD, have
+	## "shortLabel" but no "fullName"
+	eval {
+		$desc = $pi->first_child('names')->first_child('fullName')->text; 
+	};
+	if ($@) {
+		warn("No fullName, use shortLabel for description instead");
+		$desc = $pi->first_child('names')->first_child('shortLabel')->text;
+	}
+	
+	# use ids that aren't accession_no or primary_tag to build 
+   # dbxref Annotations
+	my $ac = Bio::Annotation::Collection->new();	
+	for my $db (keys %ids) {
+		next if $ids{$db} eq $acc;
+		next if $ids{$db} eq $prim_id;
+		my $an = Bio::Annotation::DBLink->new( -database   => $db,
+															-primary_id => $ids{$db},
+											);
+			$ac->add_Annotation('dblink',$an);
+			}
+
+		## now we can make sequence object ##
+		my $node = $fac->create(
+						-accession_number => $acc,
+						-desc             => $desc,
+						-display_id       => $acc,
+						-primary_id       => $prim_id,
+						-species          => $species{$taxid},
+						-annotation       => $ac);
+
+	## now fill hash with keys = ids and vals = node refs to have lookup
+	## hash for nodes by any id.	
+	$g->{'_id_map'}{$ids{'psixml'}} = $node;
+	if (defined($node->primary_id)) {
+		$g->{'_id_map'}{$node->primary_id} = $node;
+	}
+	if (defined($node->accession_number)) {
+		$g->{'_id_map'}{$node->accession_number} = $node;
+	}
+
+	## cycle thru annotations
+	$ac = $node->annotation();
+	for my $an ($ac->get_Annotations('dblink')) {
+		$g->{'_id_map'}{$an->primary_id} = $node;
+	}
+	$twig->purge();
+}
+
+=head2 add_edge
+
+ name     : _addEdge
+ purpose  : adds a new edge to a graph
+ usage    : do not call, called by next_network
+ returns  : void
+
+=cut
+
+sub _addEdge {
+
+	my ($twig, $i) = @_;
+	my @ints = $i->first_child('participantList')->children;
+	my @node = map {$_->first_child('proteinInteractorRef')->att('ref')} @ints;
+	my $edge_id = $i->first_child('xref')->first_child('primaryRef')->att('id');
+	$g->add_edge(Bio::Graph::Edge->new(
+					-nodes =>[($g->{'_id_map'}{$node[0]}, 
+                               $g->{'_id_map'}{$node[1]})],
+					-id    => $edge_id));
+	$twig->purge();
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,188 @@
+# $Id: IO.pm,v 1.17.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::IO
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1  NAME
+
+Bio::Graph::IO - Class for reading /writing biological graph data.
+
+=head1  SYNOPSIS
+
+  # This is a class for reading /writing biological data that can
+  # be represented by graphs e.g., protein interaction data.
+
+  # e.g., a graph reformatter..
+  my $graph_in = Bio::Graph::IO->new(-file =>'myfile.dat',
+                                     -format=>'dip' );
+  my $graph = $graph_in->next_graph();
+  my $graph_out = Bio::Graph::IO->new(-file =>'outfile.dat',
+                                      -format=>'psixml') ;
+  $graph_out->write_graph($graph);
+
+=head1  DESCRIPTION
+
+This class is analagous to the SeqIO and AlignIO classes. To read in a
+file of a particular format, file and format are given as key/value
+pairs as arguments.  The Bio::Graph::IO checks that the appropriate
+module is available and loads it.
+
+At present only the DIP tab delimited format and PSI XML format are supported
+
+=head1 METHODS
+
+The main methods are:
+
+=head2  $graph = $io-E<gt>next_graph()
+
+The next_graph method does not imply that multiple graphs are
+contained in a file, more to maintain the consistency of nomenclature
+with the $seqio-E<gt>next_seq() and $alnio-E<gt>next_aln() methods.
+
+=head2  $io-E<gt>write_graph($graph) (not implemented yet).
+
+Writes the graph data to file in requested format.
+
+
+=head1  REQUIREMENTS
+
+To read or write from XML you will need the XML::Twig module available
+from CPAN.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Richard Adams
+
+Email richard.adams at ed.ac.uk
+
+=cut
+
+use strict;
+package Bio::Graph::IO;
+use base qw(Bio::Root::IO);
+
+=head2  new
+
+ Name       : new
+ Usage      : $io = Bio::Graph::IO->new(-file => 'myfile.dat', 
+                                        -format => 'dip');
+ Returns    : A Bio::Graph::IO stream initialised to the appropriate format.
+ Args       : Named parameters: 
+              -file      => $filename
+              -format    => format
+	      -threshold => a confidence score for the interaction, optional
+
+=cut
+
+sub new {
+	my ($caller, @args) = @_;
+	my $class           = ref($caller) || $caller;
+	if ($class =~ /Bio::Graph::IO::(\S+)/){
+		my $self = $class->SUPER::new(@args);
+		$self->_initialize_io(@args);
+		return $self;
+	} else {
+		my %param = @args;
+		@param{ map { lc $_ } keys %param } = values %param;
+		if (!exists($param{'-format'})) {
+			Bio::Root::Root->throw("Must specify a valid format!");
+		} 
+		my $format = $param{'-format'};
+		$format    = "\L$format";	
+		return unless ($class->_load_format_module($format)); 
+		return "Bio::Graph::IO::$format"->new(@args);
+	}
+}
+
+=head2    next_graph
+
+ Name       : next_graph
+ Usage      : $gr = $io->next_graph().
+ Returns    : A Bio::Graph::ProteinGraph object.
+ Args       : None
+
+=cut
+
+sub next_graph {
+   my ($self, $gr) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::Graph::IO object.");
+}
+
+=head2    write_graph
+
+ Name       : write_graph
+ Usage      : $gr = $io->write_graph($graph).
+ Args       : A Bio::Graph object.
+ Returns    : None
+
+=cut
+
+sub write_graph {
+   my ($self, $gr) = @_;
+   $self->throw("Sorry, you can't write from a generic Bio::GraphIO object.");
+}
+
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL BioGraphIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+
+my ($self, $format) = @_;
+    my $module = "Bio::Graph::IO::" . $format;
+    my $ok;
+
+    eval {
+	$ok = $self->_load_module($module);
+    };
+    if ( $@ ) {
+    print STDERR <<END
+$self: $format cannot be found
+Exception $@
+For more information about the Bio::Graph::IO system please see the Bio:Graph::IO docs.
+END
+  ;
+  }
+  return $ok;
+
+}
+
+sub _initialize_io {
+
+	my ($self, @args) = @_;
+	$self->SUPER::_initialize_io(@args);
+	my ($th) = $self->_rearrange( [qw(THRESHOLD)], @args);
+	$self->{'_th'} = $th;
+	return $self;
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/ProteinGraph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/ProteinGraph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/ProteinGraph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1120 @@
+# $Id: ProteinGraph.pm,v 1.35.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::ProteinGraph
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::ProteinGraph - a representation of a protein interaction graph.
+
+=head1 SYNOPSIS
+
+  # Read in from file
+  my $graphio = Bio::Graph::IO->new(-file   => 'myfile.dat',
+                                    -format => 'dip');
+  my $graph   = $graphio->next_network();
+
+=head2 Using ProteinGraph
+
+  # Remove duplicate interactions from within a dataset
+  $graph->remove_dup_edges();
+
+  # Get a node (represented by a sequence object) from the graph.
+  my $seqobj = $gr->nodes_by_id('P12345');
+
+  # Get clustering coefficient of a given node.
+  my $cc = $gr->clustering_coefficient($graph->nodes_by_id('NP_023232'));
+  if ($cc != -1) {  ## result is -1 if cannot be calculated
+    print "CC for NP_023232 is $cc";
+  }
+
+  # Get graph density
+  my $density = $gr->density();
+
+  # Get connected subgraphs
+  my @graphs = $gr->components();
+
+  # Remove a node
+  $gr->remove_nodes($gr->nodes_by_id('P12345'));
+
+  # How many interactions are there?
+  my $count = $gr->edge_count;
+
+  # How many nodes are there?
+  my $ncount = $gr->node_count();
+
+  # Let's get interactions above a threshold confidence score.
+  my $edges = $gr->edges;
+  for my $edge (keys %$edges) {
+	 if (defined($edges->{$edge}->weight()) &&
+      $edges->{$edge}->weight() > 0.6) {
+		    print $edges->{$edge}->object_id(), "\t",
+             $edges->{$edge}->weight(),"\n";
+	 }
+  }
+
+  # Get interactors of your favourite protein
+  my $node      = $graph->nodes_by_id('NP_023232');
+  my @neighbors = $graph->neighbors($node); 
+  print "      NP_023232 interacts with ";
+  print join " ,", map{$_->object_id()} @neighbors;
+  print "\n";
+
+  # Annotate your sequences with interaction info
+  my @seqs; ## array of sequence objects
+  for my $seq(@seqs) {
+    if ($graph->has_node($seq->accession_number)) {
+       my $node = $graph->nodes_by_id( $seq->accession_number);
+       my @neighbors = $graph->neighbors($node);
+       for my $n (@neighbors) {
+         my $ft = Bio::SeqFeature::Generic->new(
+                      -primary_tag => 'Interactor',
+                      -tags        => { id => $n->accession_number }
+                      );
+            $seq->add_SeqFeature($ft);
+        }
+     }
+  }
+
+  # Get proteins with > 10 interactors
+  my @nodes = $graph->nodes();
+  my @hubs;
+  for my $node (@nodes) {
+    if ($graph->neighbor_count($node) > 10) {
+       push @hubs, $node;
+    }
+  }
+  print "the following proteins have > 10 interactors:\n";
+  print join "\n", map{$_->object_id()} @hubs;
+
+  # Merge graphs 1 and 2 and flag duplicate edges
+  $g1->union($g2);
+  my @duplicates = $g1->dup_edges();
+  print "these interactions exist in $g1 and $g2:\n";
+  print join "\n", map{$_->object_id} @duplicates;
+
+=head2 Creating networks from your own data
+
+If you have interaction data in your own format, e.g. 
+
+  edgeid  node1  node2  score
+
+  my $io = Bio::Root::IO->new(-file => 'mydata');
+  my $gr = Bio::Graph::ProteinGraph->new();
+  my %seen = (); # to record seen nodes
+  while (my $l = $io->_readline() ) {
+
+  # Parse out your data...
+  my ($e_id, $n1, $n2, $sc) = split /\s+/, $l;
+
+  # ...then make nodes if they don't already exist in the graph...
+  my @nodes =();
+    for my $n ($n1, $n2 ) {
+		if (!exists($seen{$n})) {
+        push @nodes,  Bio::Seq->new(-accession_number => $n);
+		  $seen{$n} = $nodes[$#nodes];
+      } else {
+			push @nodes, $seen{$n};
+	   }
+    }
+  }
+
+  # ...and add a new edge to the graph
+  my $edge  = Bio::Graph::Edge->new(-nodes => \@nodes,
+                                    -id    => 'myid',
+                                    -weight=> 1);
+  $gr->add_edge($edge);
+
+=head1 DESCRIPTION
+
+A ProteinGraph is a representation of a protein interaction network.
+It derives most of its functionality from the L<Bio::Graph::SimpleGraph>
+module, but is adapted to be able to use protein identifiers to
+identify the nodes.
+
+This graph can use any objects that implement L<Bio::AnnotatableI> and 
+L<Bio::IdentifiableI> interfaces.  L<Bio::Seq> (but not L<Bio::PrimarySeqI>)
+objects can therefore be used for the nodes but any object that supports 
+annotation objects and the object_id() method should work fine. 
+
+At present it is fairly 'lightweight' in that it represents nodes and
+edges but does not contain all the data about experiment ids etc. found
+in the Protein Standards Initiative schema. Hopefully that will be
+available soon.
+
+A dataset may contain duplicate or redundant interactions. 
+Duplicate interactions are interactions that occur twice in the dataset 
+but with a different interaction ID, perhaps from a different 
+experiment. The dup_edges method will retrieve these.
+
+Redundant interaction are interactions that occur twice or more in a 
+dataset with the same interaction id. These are more likely to be 
+due to database errors. These methods are useful when merging 2 
+datasets using the union() method. Interactions present in both 
+datasets, with different IDs, will be duplicate edges. 
+
+=head2 For Developers
+
+In this module, nodes are represented by L<Bio::Seq::RichSeq> objects
+containing all possible database identifiers but no sequence, as
+parsed from the interaction files. However, a node represented by a
+L<Bio::PrimarySeq> object should work fine too.
+
+Edges are represented by L<Bio::Graph::Edge> objects. In order to
+work with SimpleGraph these objects must be array references, with the
+first 2 elements being references to the 2 nodes. More data can be
+added in $e[2]. etc. Edges should  be L<Bio::Graph::Edge> objects, which 
+are L<Bio::IdentifiableI> implementing objects.
+
+At present edges only have an identifier and a weight() method, to 
+hold confidence data, but subclasses of this could hold all the 
+interaction data held in an XML document.
+
+So, a graph has the following data:
+
+1. A hash of nodes ('_nodes'), where keys are the text representation of a 
+nodes memory address and values are the sequence object references.
+
+2. A hash of neighbors ('_neighbors'), where keys are the text representation of a 
+nodes memory address and a value is a reference to a list of 
+neighboring node references.
+
+3. A hash of edges ('_edges'), where a key is a text representation of the 2 nodes.
+E.g., "address1,address2" as a string, and values are Bio::Graph::Edge 
+objects.
+
+4. Look up hash ('_id_map') for finding a node by any of its ids. 
+
+5. Look up hash for edges ('_edge_id_map') for retrieving an edge 
+object  from its identifier.
+
+6. Hash ('_components').
+
+7. An array of duplicate edges ('_dup_edges').
+
+8. Hash ('_is_connected').
+
+=head1  REQUIREMENTS
+
+To use this code you will need the Clone.pm module availabe from CPAN.
+You also need Class::AutoClass, available from CPAN as well.  To read in
+XML data you will need XML::Twig available from CPAN.
+
+=head1 SEE ALSO
+
+L<Bio::Graph::SimpleGraph>
+L<Bio::Graph::IO>
+L<Bio::Graph::Edge>
+L<Bio::Graph::IO::dip>
+L<Bio::Graph::IO::psi_xml>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+ Richard Adams - this module, Graph::IO modules.
+
+ Email richard.adams at ed.ac.uk
+
+=head2 AUTHOR2
+
+ Nat Goodman - SimpleGraph.pm, and all underlying graph algorithms.
+
+=cut
+
+use strict;
+package Bio::Graph::ProteinGraph;
+use Bio::Graph::Edge;
+use Clone qw(clone);
+use base qw(Bio::Graph::SimpleGraph);
+
+=head2  has_node
+
+ name      : has_node
+ purpose   : Is a protein in the graph?
+ usage     : if ($g->has_node('NP_23456')) {....}
+ returns   : 1 if true, 0 if false
+ arguments : A sequence identifier.
+
+=cut
+
+sub has_node {
+
+ my ($self, $arg) = @_;
+ if (!$arg) {
+		$self->throw ("I need a sequence identifier!");
+   }
+ my @nodes = $self->nodes_by_id($arg);
+ if (defined($nodes[0])){return 1;}else{return 0};
+
+}
+
+=head2 nodes_by_id
+
+ Name      : nodes_by_id
+ Purpose   : get node memory address from an id
+ Usage     : my @neighbors= $self->neighbors($self->nodes_by_id('O232322'))
+ Returns   : a SimpleGraph node representation ( a text representation
+             of a node needed for other graph methods e.g.,
+             neighbors(), edges()
+ Arguments : a protein identifier., e.g., its accession number.
+
+=cut
+
+sub nodes_by_id {
+
+	my $self  = shift;
+	my @nodes  = $self->_ids(@_);
+	wantarray? @nodes: $nodes[0];
+
+}
+
+
+=head2 union
+
+ Name        : union
+ Purpose     : To merge two graphs together, flagging interactions as 
+               duplicate.
+ Usage       : $g1->union($g2), where g1 and g2 are 2 graph objects. 
+ Returns     : void, $g1 is modified
+ Arguments   : A Graph object of the same class as the calling object. 
+ Description : This method merges 2 graphs. The calling graph is modified, 
+               the parameter graph ($g2) in usage) is unchanged. To take 
+               account of differing IDs identifying the same protein, all 
+               ids are compared. The following rules are used to modify $g1.
+
+               First of all both graphs are scanned for nodes that share 
+               an id in common. 
+
+         1. If 2 nodes(proteins) share an interaction in both graphs,
+            the edge in graph 2 is copied to graph 1 and added as a
+            duplicate edge to graph 1,
+
+         2. If 2 nodes interact in $g2 but not $g1, but both nodes exist
+            in $g1, the attributes of the interaction in $g2 are 
+            used to make a new edge in $g1.
+
+         3. If 2 nodes interact in g2 but not g1, and 1 of them is a new
+            protein, that protein is put in $g1 and a new edge made to
+            it. 
+
+         4. At present, if there is an interaction in $g2 composed of a
+            pair of interactors that are not present in $g1, they are 
+            not copied to $g1. This is rather conservative but prevents
+            the problem of having redundant nodes in $g1 due to the same
+            protein being identified by different ids in the same graph.
+
+         So, for example 
+
+              Edge   N1  N2 Comment
+
+    Graph 1:  E1     P1  P2
+              E2     P3  P4
+              E3     P1  P4
+
+    Graph 2:  X1     P1  P2 - will be added as duplicate to Graph1
+              X2     P1  X4 - X4 added to Graph 1 and new edge made
+              X3     P2  P3 - new edge links existing proteins in G1
+              X4     Z4  Z5 - not added to Graph1. Are these different
+                              proteins or synonyms for proteins in G1?
+
+=cut
+
+sub union {
+
+	my ($self, $other) = @_;
+	my $class      = ref($self);
+	if (!$other->isa($class)) {
+		$self->throw("I need a ". $class . " object, not a [".
+						 ref($other). "] object");
+	}
+	my @common_nodes;
+	my %detected_common_nodes;
+	my %seen_ids; # holds ids of nodes  already known to be common. 
+
+	## for each node see if Ids are in common between the 2 graphs
+	## just get1 common id per sequence.
+
+	## Produces too many common nodes we only need 1 common id between nodes.
+	for my $id (sort keys %{$self->{'_id_map'}}) {
+		if (exists($other->{'_id_map'}{$id}) ) { 
+			## check  if this node has a commonlink kown lready:
+			my $node = $self->nodes_by_id($id);
+			my $acc = $node->object_id;
+			if (!exists($detected_common_nodes{$acc})) {
+			   push @common_nodes, $id; ## we store the common id
+			   $detected_common_nodes{$acc} = undef; ## this means we won't store >1 common identifier
+			}
+		}
+	}
+
+	## now cyle through common nodes..
+	$self->debug( "there are ". scalar @common_nodes. " common nodes\n");
+	my $i = 0;
+	for my $common (@common_nodes) {
+		if ($i++ % 10 ==0 ) {
+			$self->debug(".");
+		}
+		## get neighbours of common node for self and other
+		my @self_ns   = $self->neighbors($self->nodes_by_id($common));
+		my @other_ns  = $other->neighbors($other->nodes_by_id($common));
+
+		## now get all ids of all neighbours
+		my %self_n_ids = $self->_get_ids(@self_ns);	# get all ids of neighbors
+
+		## cycle through other neighbors
+		for my $other_n(@other_ns){ 
+			my %other_n_ids = $self->_get_ids($other_n); # get ids of single other neighbor
+
+			## case (1) in description
+			## do any ids in other graph exist in self ?
+			# if yes,  @int_match is defined, interaction does not involve a new node
+			my @int_match = grep{exists($self->{'_id_map'}{$_}) } keys %other_n_ids;
+			if (@int_match){
+				my $i = 0;
+				my $edge;
+
+				## we cycle through until we have an edge defined, this deals with 
+				## multiple id matches
+				while (!$edge && $i <= $#int_match){
+
+					## get edge from other graph
+					my $other_edge = $other->edge(
+												 [$other->nodes_by_id($common),
+												  $other->nodes_by_id($other_n->object_id)]
+														  );
+
+					## copy it
+					my $edge = Bio::Graph::Edge->new(
+													 -weight=> $other_edge->weight(),
+													 -id    => $other_edge->object_id(),
+													 -nodes =>[$self->nodes_by_id($common),
+								   $self->nodes_by_id($int_match[$i])
+																 ]);
+					## add it to self graph.
+					## add_edge() works out if the edge is a new,  
+					## duplicate or a redundant edge.
+					$self->add_edge($edge);
+
+					$i++;
+				}
+			}		# end if
+			## but if other neighbour is entirely new, clone it and 
+			## make connection.
+			else  {
+				my $other_edge = $other->edge($other->nodes_by_id($other_n->object_id()),
+														$other->nodes_by_id($common));
+				my $new = clone($other_n);
+				$self->add_edge(Bio::Graph::Edge->new(
+							  -weight => $other_edge->weight(),
+							  -id     => $other_edge->object_id(),
+							  -nodes  =>[$new, $self->nodes_by_id($common)],
+																 )
+									);
+
+				## add new ids to self graph look up table
+				map {$self->{'_id_map'}{$_} = $new} keys %other_n_ids;
+			} #end if
+		} #next neighbor
+	} #next node
+}
+
+=head2 edge_count
+
+ Name     : edge_count
+ Purpose  : returns number of unique interactions, excluding 
+            redundancies/duplicates
+ Arguments: void
+ Returns  : An integer
+ Usage    : my $count  = $graph->edge_count;
+
+=cut
+
+sub edge_count {
+
+    my $self = shift;
+    return scalar keys %{$self->_edges};
+
+}
+
+=head2 node_count
+
+ Name     : node_count
+ Purpose  : returns number of nodes.
+ Arguments: void
+ Returns  : An integer
+ Usage    : my $count = $graph->node_count;
+
+=cut
+
+sub node_count {
+
+    my $self = shift;
+    return scalar keys %{$self->_nodes};
+
+}
+
+=head2 neighbor_count
+
+ Name      : neighbor_count
+ Purpose   : returns number of neighbors of a given node
+ Usage     : my $count = $gr->neighbor_count($node)
+ Arguments : a node object
+ Returns   : an integer
+
+=cut
+
+sub neighbor_count{
+
+ my ($self, $node) = @_;
+ if (!$node->isa('Bio::NodeI')) {
+  $self->throw ("I need a Bio::NodeI implementing object here , not a " . ref($node) . ".");
+	}
+ my @nbors = $self->neighbors($node);
+ return scalar @nbors;
+}
+
+=head2 _get_ids_by_db
+
+ Name     : _get_ids_by_db
+ Purpose  : gets all ids for a node, assuming its Bio::Seq object
+ Arguments: A Bio::SeqI object
+ Returns  : A hash: Keys are db ids, values are accessions
+ Usage    : my %ids = $gr->_get_ids_by_db($seqobj);
+
+=cut
+
+sub _get_ids_by_db {
+	my %ids;
+	my $dummy_self = shift;
+	while (my $n = shift @_ ){  #ref to node, assume is a Bio::Seq
+		if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
+			$n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI  implementing object, not a [" .ref($n) ."]");
+		}
+
+		##if BioSeq getdbxref ids as well.
+		my $ac = $n->annotation();	
+		for my $an($ac->get_Annotations('dblink')) {
+			$ids{$an->database()} = $an->primary_id();
+		}
+	}
+	return %ids;
+}
+
+sub _get_ids {
+
+	my %ids;
+	my $dummy_self = shift;
+	while (my $n = shift @_ ){  #ref to node, assume is a Bio::Seq
+		if (!$n->isa('Bio::AnnotatableI') || ! $n->isa('Bio::IdentifiableI' )) {
+			$n->throw("I need a Bio::AnnotatableI and Bio::IdentifiableI  implementing object, not a [" .ref($n) ."]");
+		}
+		#get ids
+		map {$ids{$_} = undef} ($n->object_id);
+
+		##if BioSeq getdbxref ids as well.
+		if ($n->can('annotation')) {
+			my $ac = $n->annotation();	
+			for my $an($ac->get_Annotations('dblink')) {
+				$ids{$an->primary_id()} = undef;
+			}
+		}
+	}
+	return %ids;
+
+}
+
+=head2 add_edge
+
+ Name        : add_edge
+ Purpose     : adds an interaction to a graph.
+ Usage       : $gr->add_edge($edge)
+ Arguments   : a Bio::Graph::Edge object, or a reference to a 2 element list. 
+ Returns     : void
+ Description : This is the method to use to add an interaction to a graph. 
+               It contains the logic used to determine if a graph is a 
+               new edge, a duplicate (an existing interaction with a 
+               different edge id) or a redundant edge (same interaction, 
+               same edge id).
+
+=cut
+
+sub add_edge {
+
+	my $self      = shift;
+	my $edges     = $self->_edges;
+	my $neighbors = $self->_neighbors;
+	my $dup_edges = $self->_dup_edges;
+	my $edge;
+	while (@_) {
+		if ( ref($_[0]) eq 'ARRAY' || !ref($_[0])) {
+      	$self->SUPER::add_edges(@_);
+			return;
+		} 
+		elsif ( $_[0]->isa('Bio::Graph::Edge') ) {	# it's already an edge
+			$edge = shift;
+		}
+		else {
+			$self->throw(" Invalid edge! - must be an array of nodes, or an edge object");
+		}
+
+		my ($m, $n) = $edge->nodes();
+		next if $m eq $n;		# no self edges
+		last unless defined $m && defined $n;
+		($m,$n) = ($n,$m) if "$n" lt "$m";
+
+		if (!exists($edges->{$m,$n})) {
+			$self->add_node($m,$n);
+			($m,$n)         = $self->nodes($m,$n);
+			$edges->{$m,$n} = $edge;
+			push(@{$neighbors->{$m}},$n);
+			push(@{$neighbors->{$n}},$m);
+
+			## create look up hash for edge ##
+			$self->{'_edge_id_map'}{$edge->object_id()} = $edge;
+		} else {
+			## is it a redundant edge, ie with same edge id?
+			my $curr_edge = $edges->{$m,$n};
+			if($curr_edge->object_id() eq $edge->object_id()) {
+				$self->redundant_edge($edge);
+			}
+			## else it is a duplicate i.e., same nodes but different edge id
+			else {
+				$self->add_dup_edge($edge); 
+			}
+		}
+	}
+	$self->_is_connected(undef);	# clear cached value
+
+}
+
+=head2 subgraph
+
+ Name      : subgraph
+ Purpose   : To construct a subgraph of  nodes from the main network.This 
+             method overrides that of Bio::Graph::SimpleGraph in its dealings with 
+             Edge objects. 
+ Usage     : my $sg = $gr->subgraph(@nodes).
+ Returns   : A subgraph of the same class as the original graph. Edge objects are 
+             cloned from the original graph but node objects are shared, so beware if you 
+             start deleting nodes from the parent graph whilst operating on subgraph nodes. 
+ Arguments : A list of node objects.
+
+=cut
+
+sub subgraph {
+ my $self=shift;
+
+  ## make new graph of same type as parent
+  my $class    = ref($self);
+  my $subgraph = new $class;
+  $subgraph->add_node(@_);
+  # add all edges amongst the nodes
+  my @nodes=$subgraph->nodes;
+  my $i = 1;
+  while(@nodes) {
+    if ($i++ % 100 == 0) { print STDERR ".";}
+    my $m=shift @nodes;
+    my $edges = $self->_edges;
+    for my $n (@nodes) { 
+       if ($self->has_edge([$m,$n])) {
+          my ($edge) = $self->edges([$m,$n]); ## returns list of edges
+          my $id = $edge->object_id;
+          $subgraph->add_edge(Bio::Graph::Edge->new(-nodes=>[$m,$n],
+                                                    -id   => $id));
+        }
+    }
+  }#next node
+  return $subgraph;
+}
+
+=head2 add_dup_edge
+
+ Name       : add_dup_edge
+ Purpose    : to flag an interaction as a duplicate, take advantage of 
+              edge ids. The idea is that interactions from 2 sources with 
+              different interaction ids can be used to provide more 
+              evidence for a interaction being true, while preventing 
+              redundancy of the same interaction being present more than 
+              once in the same dataset. 
+ Returns    : 1 on successful addition, 0 on there being an existing 
+              duplicate. 
+ Usage      : $gr->add_dup_edge(edge->new (-nodes => [$n1, $n2],
+                                           -score => $score
+                                           -id    => $id);
+ Arguments  : an EdgeI implementing object.
+ Descripton : 
+
+
+=cut
+
+sub add_dup_edge {
+
+	## get the 2 nodes
+	my ($self, $newedge) = @_;
+	## prelimaries
+	my $newedge_id   = $newedge->object_id();
+
+	## now we have node objects, an edge id.
+	## is edge id new?
+	my $dup_edges = $self->_dup_edges();
+	if(!grep{$_->object_id eq $newedge_id } @$dup_edges) {
+		push @$dup_edges, $newedge;
+		}
+	else {
+		$self->redundant_edge($newedge);
+	}
+}
+
+=head2 edge_by_id
+
+ Name        : edge_by_id
+ Purpose     : retrieve data about an edge from its id
+ Arguments   : a text identifier
+ Returns     : a Bio::Graph::Edge object or undef
+ Usage       : my $edge = $gr->edge_by_id('1000E');
+
+=cut
+
+sub edge_by_id {
+
+ my ($self, $id) = @_;
+ if (!$id) {
+	$self->warn ("Need a text identifier");
+   	return;
+	}
+ if (ref($id)) {
+    $self->throw(" I need a text identifier, not a [" . ref($id) . "].");
+    }
+  if (defined($self->{'_edge_id_map'}{$id})) {
+     return $self->{'_edge_id_map'}{$id};
+       }else {return;}
+
+}
+
+
+=head2 remove_dup_edges 
+
+ Name        : remove_dup_edges
+ Purpose     : removes duplicate edges from graph
+ Arguments   : none         - removes all duplicate edges
+               edge id list - removes specified edges
+ Returns     : void
+ Usage       :    $gr->remove_dup_edges()
+               or $gr->remove_dup_edges($edgeid1, $edgeid2);
+
+=cut
+
+sub  remove_dup_edges{
+  my ($self, @args) = @_;
+  my $dups = $self->_dup_edges(); 
+	if (!@args) {
+  		$dups   = [];
+		}
+	else {
+		while (my $node = shift @args) {
+			my @new_dups;
+			for my $dup (@$dups) {
+				if (!grep{$node eq $_} $dup->nodes) {
+					push @new_dups, $dup;
+				}
+			}
+			$dups = \@new_dups;
+		}
+	}
+	return 1;
+
+}
+
+=head2 redundant_edge
+
+ Name        : redundant_edge
+ Purpose     : adds/retrieves redundant edges to graph
+ Usage       : $gr->redundant_edge($edge)
+ Arguments   : none (getter) or a Biuo::Graph::Edge object (setter). 
+ Description : redundant edges are edges in a graph that have the 
+               same edge id, ie. are 2 identical interactions. 
+               With edge arg adds it to list, else returns list as reference. 
+
+=cut
+
+sub redundant_edge {
+
+	my ($self, $edge) =@_;
+	if ($edge) {
+		if (!$edge->isa('Bio::Graph::Edge')) {
+			$self->throw ("I need a Bio::Graph::Edge object , not a [". ref($edge). "] object.");
+		}
+		if (!exists($self->{'_redundant_edges'})) {
+			$self->{'_redundant_edges'} = [];
+		}
+		## add edge to list if not already listed
+		if (!grep{$_->object_id eq $edge->object_id} @{$self->{'_redundant_edges'}}){
+			push @{$self->{'_redundant_edges'}}, $edge;
+		}
+	}
+	else {
+		if (exists ($self->{'_redundant_edges'})){
+			return @{$self->{'_redundant_edges'}};
+		}
+		else{
+			
+		}
+	}
+}
+
+=head2 redundant_edges
+
+ Name         : redundant_edges
+ Purpose      : alias for redundant_edge
+
+=cut
+
+sub redundant_edges {
+	my $self = shift;
+	return $self->redundant_edge(shift);
+}
+
+=head2 remove_redundant_edges 
+
+ Name        : remove_redundant_edges
+ Purpose     : removes redundant_edges from graph, used by remove_node(),
+               may be better as an internal method??
+ Arguments   : none         - removes all redundant edges
+               edge id list - removes specified edges
+ Returns     : void
+ Usage       :    $gr->remove_redundant_edges()
+               or $gr->remove_redundant_edges($edgeid1, $edgeid2);
+
+=cut
+
+sub remove_redundant_edges {
+my ($self, @args) = @_;
+  my @dups = $self->redundant_edge(); 
+	## if no args remove all 
+	if (!@args) {
+		$self->{'_redundant_edges'} = [];
+		}
+	else {
+		while (my $node = shift @args) {
+			my @new_dups;
+			for my $dup (@dups) {
+				if (!grep{$node eq $_} $dup->nodes) {
+					push @new_dups, $dup;
+				}
+			}
+			$self->{'_redundant_edges'} = \@new_dups;
+		}
+	}
+	return 1;
+
+}
+
+=head2 clustering_coefficient
+
+ Name      : clustering_coefficient
+ Purpose   : determines the clustering coefficient of a node, a number 
+             in range 0-1 indicating the extent to which the neighbors of
+             a node are interconnnected.
+ Arguments : A sequence object (preferred) or a text identifier
+ Returns   : The clustering coefficient. 0 is a valid result.
+             If the CC is not calculable ( if the node has <2 neighbors),
+                returns -1.
+ Usage     : my $node = $gr->nodes_by_id('P12345');
+             my $cc   = $gr->clustering_coefficient($node);
+
+=cut
+
+sub clustering_coefficient {
+	my  ($self, $val)  = @_;
+	my $n = $self->_check_args($val);
+	$self->throw("[$val] is an incorrect parameter, not presnt in the graph")
+		unless defined($n);
+	my @n = $self->neighbors($n);
+	my $n_count = scalar @n;
+	my $c = 0;
+
+	## calculate cc if we can
+	if ($n_count >= 2){
+		for (my $i = 0; $i <= $#n; $i++ ) {
+			for (my $j = $i+1; $j <= $#n; $j++) {
+				if ($self->has_edge($n[$i], $n[$j])){
+					$c++;
+				}
+			}
+		}
+		$c = 2 * $c / ($n_count *($n_count - 1));
+		return $c; # can be 0 if unconnected. 
+	}else{
+		return -1; # if value is not calculable
+	}
+}
+
+=head2 remove_nodes
+
+ Name      : remove_nodes
+ Purpose   : to delete a node from a graph, e.g., to simulate effect 
+             of mutation
+ Usage     : $gr->remove_nodes($seqobj);
+ Arguments : a single $seqobj or list of seq objects (nodes)
+ Returns   : 1 on success
+
+=cut
+
+
+sub remove_nodes {
+	my $self = shift @_;
+	if (!@_) {
+		$self->warn("You have to specify a node");
+		return;
+		}
+	my $edges     = $self->_edges;
+	my $ns = $self->_neighbors;
+	my $dups      = $self->_dup_edges;
+	my $nodes     = $self->_nodes;
+	while (my $val = shift @_ ) {
+
+		## check argument
+		my $node = $self->_check_args($val);
+		$self->throw("[$val] is an incorrect parameter, not present in the graph")
+				unless defined($node);
+
+		##1. remove dup edges and redundant edges containing the node ##
+		$self->remove_dup_edges($node);
+		$self->remove_redundant_edges($node);
+
+		##2. remove node from interactor's neighbours
+		my @ns = $self->neighbors($node);
+		for my $n (@ns) {
+			my @otherns    = $self->neighbors($n); #get neighbors of neighbors 
+			my @new_others = ();
+			##look for node in neighbor's neighbors
+			@new_others    = grep{$node ne $_} @otherns;
+			$ns->{$n}   = \@new_others;
+		}
+
+		##3. Delete node from neighbour hash
+		delete $ns->{$node};
+
+		##4. Now remove edges involving node
+		for my $k (keys %$edges) {
+			##access via internal hash rather than by object. 
+			if ($edges->{$k}->[0] eq $node ||
+			   		$edges->{$k}->[1] eq $node){
+                ## delete edge from look up hash
+                my $edge_id = $edges->{$k}->object_id();
+                delete $self->{'_edge_id_map'}{$edge_id};
+				delete($edges->{$k});
+			}
+		}
+
+		##5. Now remove node itself;
+		delete $nodes->{$node}{'_node_id'};
+		delete $nodes->{$node};
+
+		##6. now remove aliases from look up hash so it can no longer be accessed.
+		## is this wise? or shall we keep the sequence object available??
+	}
+	return 1;
+}
+
+=head2 unconnected_nodes
+
+ Name      : unconnected_nodes
+ Purpose   : return a list of nodes with no connections. 
+ Arguments : none
+ Returns   : an array or array reference of unconnected nodes
+ Usage     : my @ucnodes = $gr->unconnected_nodes();
+
+=cut
+
+sub unconnected_nodes {
+ my $self = shift;
+ my $neighbours = $self->_neighbors;
+ my $nodes      = $self->_nodes;
+ my $uc_nodes   = [];
+ for my $n (keys %$neighbours) {
+	 if (@{$neighbours->{$n}} == 0){ 
+		 push @$uc_nodes, $nodes->{$n};
+	 }
+ }
+ wantarray?@$uc_nodes:$uc_nodes;
+}
+
+=head2 articulation_points
+
+ Name      : articulation_points
+ Purpose   : to find edges in a graph that if broken will fragment
+               the graph into islands.
+ Usage     : my $edgeref = $gr->articulation_points();
+             for my $e (keys %$edgeref) {
+				   print $e->[0]->accession_number. "-".
+                     $e->[1]->accession_number ."\n";
+             }
+ Arguments : none
+ Returns   : a list references to nodes that will fragment the graph 
+             if deleted. 
+ Notes     : This is a "slow but sure" method that works with graphs
+               up to a few hundred nodes reasonably fast.
+
+=cut
+
+sub articulation_points {
+
+ my $self      = shift;
+ ## see if results are cahced already
+ $self->{'_artic_points'} ||= '';
+ return $self->{'_artic_points'} if $self->{'_artic_points'};
+
+## else calculate...
+ $self->debug( "doing subgraphs\n");
+ my @subgraphs = $self->components();
+ 
+ my %rts;
+
+ for my $sg (@subgraphs) {
+     my $all_nodes = $sg->_nodes;
+     $self->debug( "in subgraph - size". scalar (keys %$all_nodes) . "\n");
+     ##ignore isolated vertices
+     next if scalar keys %$all_nodes <= 2;
+     my $neighbors = $sg->_neighbors;
+
+     ## find most connected - will be artic point if has >2 neighbors.
+     ## use this to initiate DFS
+     my ($c, $id);
+     my $max = 0;
+     for my $n (keys %$neighbors) {
+	 my $c = scalar @{$neighbors->{$n}};#
+	 ($max, $id) = ($c, $n) if  $c > $max;#
+     }
+
+     my $t      = $sg->node_traversal($all_nodes->{$id},'d');
+     my @nodes  = $t->get_all();
+     $id = 0;
+     #assign node ids
+     for my $n(@nodes) {
+	 $n->{'_node_id'} = $id;	
+	 $id++;
+     }
+
+     ## cycle through each node 
+     for (my $i       = $#nodes; $i >= 0; $i--) {
+
+	 ## initiate minimumn to node_id
+	 my $curr_min = $all_nodes->{$nodes[$i]}{'_node_id'};
+	 #print STDERR "currmin - $curr_min, i = $i\n";
+	 ## cycle through neighbors, reset minumum if required
+	 my $nbors    = $neighbors->{$nodes[$i]};
+	 for my $nbor (@$nbors) {	
+	     my $nbor_id = $all_nodes->{$nbor}{'_node_id'};
+
+	     ## if is back edge ##
+	     if ($nbor_id < $i) {
+		 $curr_min  = $nbor_id if $nbor_id < $curr_min ;
+	     }
+
+	     ## else is tree edge
+	     elsif($nbor_id > $i) {
+		 my $wlow   = $all_nodes->{$nbor}{'_wlow'};
+		 $curr_min  = $wlow if $wlow < $curr_min;
+	     }
+	 }#next neighbor
+
+	 ## now we know the minimum, save. 
+	 $all_nodes->{$nodes[$i]}{'_wlow'} = $curr_min;
+
+	 ## now get tree nodes and test condition
+	 my @treenodes = grep{$all_nodes->{$_}{'_node_id'} > $i}@$nbors;
+	 for my $tn (@treenodes) {
+	     if(($all_nodes->{$tn}{'_wlow'} >= $i && $i != 0) ||
+		($i == 0  && scalar @{$neighbors->{$nodes[0]}} > 1) ){
+		 $rts{$nodes[$i]} = $nodes[$i] unless exists $rts{$nodes[$i]};
+	     }
+	 }
+
+     }#next node
+ }#next sg
+## cache results and return
+$self->{'_artic_points'} =   [values %rts]; ## 
+return $self->{'_artic_points'}; 
+}
+
+=head2 is_articulation_point
+
+ Name      : is_articulation_point
+ Purpose   : to determine if a given node is an articulation point or not. 
+ Usage     : if ($gr->is_articulation_point($node)) {.... 
+ Arguments : a text identifier for the protein or the node itself
+ Returns   : 1 if node is an articulation point, 0 if it is not 
+
+=cut
+
+sub is_articulation_point {
+	my ($self, $val) = @_;
+	my $node = $self->_check_args($val);
+ 
+	## this uses a cached value so it does not have to recalculate each time..
+	my $artic_pt_ref = $self->articulation_points();
+	my $acc = $node->accession_number;
+	if (grep{$_->accession_number eq $acc} @$artic_pt_ref ){
+		return 1;
+   }
+	else {
+		return 0;
+   }
+}
+
+sub _ids {
+	my $self = shift;
+	my @refs;
+	while (my $id = shift at _) {
+		push @refs, $self->{'_id_map'}{$id};
+	}
+	return @refs;
+}
+
+sub _check_args {
+## used to check a parameter is a valid node or a text identifier
+	my ($self, $val) = @_;
+	my $n;
+	if (!$val ) {
+		$self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI");
+		}
+
+	## if param is text try to get sequence object..
+	if (!ref($val)){
+		 $n = $self->nodes_by_id($val);
+		if(!defined($n)) {
+			$self->throw ("Cannnot find node given by the id [$val]");
+			}
+	}
+	# if reference should be a NodeI implementing object.
+    elsif (!$val->isa('Bio::AnnotatableI') || !$val->isa('Bio::IdentifiableI')) {
+		$self->throw( "I need a node that's a Bio::AnnotatableI and Bio::IdentifiableI ,not a [". ref($val) . "].");
+		}
+
+	## is a seq obj
+	else {$n = $val};
+	return $n; #n is either a node or undef
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph/Traversal.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph/Traversal.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph/Traversal.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,228 @@
+# Traversal.pm,v 1.10.2.1 2005/10/09 15:16:25 jason Exp
+#
+# BioPerl module for Bio::Graph::SimpleGraph::Traversal;
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::SimpleGraph::Traversal - graph traversal operations for Bio::Graph::SimpleGraph and Bio::Graph::Protein::Graph objects 
+
+=head1 SYNOPSIS
+
+  use Bio::Graph::SimpleGraph::Traversal;
+  use Bio::Graph::SimpleGraph;
+
+  ## get a graph , $g.
+
+  my $traversal = Bio::Graph::SimpleGraph::Traversal->new(-graph=>$g,
+                                                          -start=>$start,
+                                                          -order=>$order,
+                                                          -what =>$what);
+ ## cycle through nodes one at a time
+ while ($traversal->has_next() ) {
+        my $node = $traversal->get_next();
+      }
+ ## reset traversal to start
+  $traversal->reset;
+
+ ## get all nodes
+  my @all_nodes = $traversal->get_all();
+
+
+
+=head1 DESCRIPTION
+
+This is a helper class for performing graph traversal operations for
+Bio::Graph::SimpleGraph objects and Bio::Graph::Protein::Graph
+objects. The documentation concerning the use of this class is
+described in the "Graph algorithms" section of the
+Bio::Graph::SimpleGraph modules. Only the methods are documented here.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Nat Goodman, Richard Adams
+
+Email natg at shore.net, richard.adams at ed.ac.uk
+
+=cut
+
+package Bio::Graph::SimpleGraph::Traversal;
+use vars qw(@AUTO_ATTRIBUTES @OTHER_ATTRIBUTES %SYNONYMS %DEFAULTS);
+use Bio::Graph::SimpleGraph;
+use strict;
+use base qw(Class::AutoClass);
+
+ at AUTO_ATTRIBUTES=qw(order what graph start is_initialized
+		    _past _present _future);
+ at OTHER_ATTRIBUTES=qw();
+%SYNONYMS=();
+%DEFAULTS=(order   => 'dfs',
+	        what   => 'node',
+	       _past   => {},
+	      _future  => []);
+Class::AutoClass::declare(__PACKAGE__);
+
+sub _init_self {
+	my($self,$class,$args)=@_;
+	return unless $class eq __PACKAGE__; 
+	# to prevent subclasses from re-running this
+	$self->graph or $self->graph(new Bio::Graph::SimpleGraph);
+	# can't be in DEFAULTS - circular includes!
+}
+
+=head2      has_next
+
+ name      : has_next
+ usage     : while (my $traversal->has_next() ) {..
+ purpose   : returns true if there are more items in traversal, else undef
+ arguments : none
+ returns   : true or unde;
+
+=cut 
+
+sub has_next {
+  my($self)=@_;
+  $self->reset unless $self->is_initialized;
+  @{$self->_future}>0;
+}
+
+=head2      get_next
+
+ name      : get_next
+ usage     : my $node =  $traversal->get_next() ;
+ purpose   : returns  next item in traversal or undef if traversal is exhausted. 
+ arguments : none
+ returns   : a node  or undef;
+
+=cut 
+
+sub get_next {
+  my($self)= @_;
+  $self->reset unless $self->is_initialized;
+  my $past   = $self->_past;
+  my $future = $self->_future;
+  my $present;
+  my $graph  = $self->graph;
+  while (@$future) {
+    $present = shift @$future;
+    unless($past->{$present}) {	# this is a new node
+      $self->_present($present);
+      $past->{$present}=1;
+      if ($self->order =~ /^d/i) {
+		unshift(@$future,$graph->neighbors($present,$self->what));
+      } else {
+		push(@$future,$graph->neighbors($present,$self->what));
+      }
+       return $present;
+    }
+  }
+  $self->_present(undef);
+}
+
+=head2      get_all
+
+ name      : get_all
+ usage     : my @nodes =  $traversal->get_all() ;
+ purpose   : get all remaining items in traversal as ARRAY (in array context)
+              or ARRAY ref.
+ arguments : none
+ returns   : an array, an array reference or undef.
+
+=cut 
+
+sub get_all {
+  my($self, $val)   = @_;
+  $self->reset unless $self->is_initialized;
+  my $past    = $self->_past;
+  my $future  = $self->_future;
+  my $i = 0; 
+  my $present;
+  my $graph   = $self->graph;
+  my $nodes   = $graph->_nodes;
+
+  my $results =[];
+  while (@$future) {
+    $present = shift @$future;
+     if(!$past->{$present}) {	# this is a new node
+         $past->{$present} = 1;
+
+         push(@$results,$present);
+		 $i++;
+         if ($self->order =~ /^d/i) {
+		    unshift(@$future,$graph->neighbors($present,$self->what));
+             } else {
+			push(@$future,$graph->neighbors($present,$self->what));
+           }
+        }
+  }
+  $self->_present(undef);
+  wantarray? @$results: $results;
+}
+
+=head2      get_this
+
+ name      : get_all
+ usage     : my @nodes =  $traversal->get_all() ;
+ purpose   : gets current node in traversal 
+ arguments : none
+ returns   : the current node or undef.
+
+=cut 
+
+sub get_this {
+  my($self)=@_;
+  $self->reset unless $self->is_initialized;
+  $self->_present;
+}
+
+=head2      reset
+
+ name      : reset
+ usage     : $traversal->reset() ;
+ purpose   : restarts traversal from first node
+ arguments : none
+ returns   : void.
+
+=cut 
+
+sub reset {
+  my($self)= @_;
+  $self->_past({});
+  $self->order('d');
+  $self->_present(undef);
+  $self->_future([]);
+  $self->is_initialized(1);
+  my $graph = $self->graph;
+  my $start = $self->start;
+  my $what  = $self->what || 'node';
+  if ($what=~/^n/i) {
+    defined $start or $start=$graph->nodes->[0];
+  } elsif ($what=~/^e/i) {
+    $start=defined $start? $graph->edge($start): $graph->edges->[0];
+  } else {
+    $self->throw("Unrecognized \$what parameter $what: should be 'node' or 'edge'");
+  }
+  return unless defined $start;
+  $self->_future([$start]);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graph/SimpleGraph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,905 @@
+# $Id: SimpleGraph.pm,v 1.12.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graph::SimpleGraph
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graph::SimpleGraph - create and manipulate undirected graphs
+
+=head1 SYNOPSIS
+
+  use Bio::Graph::SimpleGraph;
+
+  my $graph=new SimpleGraph;
+  # read pairs of nodes from STDIN
+  while (<>) {
+    my($node1,$node2)=split;
+    $graph->add_edge($node1,$node2);
+  }
+  my @nodes=graph->nodes;	    # get list of nodes
+  my @edges=graph->edges;	    # get list of edges
+  foreach my $node (@nodes) {
+    my @neighbors=$node->neighbors; # get list of neighboring nodes
+  }
+
+=head1 DESCRIPTION
+
+This is a simple, hopefully fast undirected graph package. The only
+reason this exists is that the standard CPAN Graph pacakge,
+Graph::Base, is seriously broken.  The package implements a small and
+eclectic assortment of standard graph algorithms that we happened to
+need for our applications.
+
+This module is a subclass of Class::AutoClass (available at CPAN).
+AutoClass auotgenerates simple accessor and mutator methods (aka get
+and set methods).  It also automates class initialization.
+
+Nodes can be any Perl values, including object references. Edges are 
+pairs of nodes. 
+
+(Caveat: be careful with values that contain embedded instances of $;
+(the character Perl uses to separate components of multi-dimensional
+subscripts), because we use this in the text representation of edges.
+
+The main data structures are:
+
+  An edge (x,y) is represented canonically as a two element list in
+  which the lexically smaller value is first.  Eg, the node ('b','a')
+  is represented as ['a','b'].  
+
+  The graph contains 
+
+  1) A hash mapping the text representation of a node to the node
+     itself.  This is mostly relevant when the node is a reference.
+
+  2) A hash mapping the text representation of a node to a list of 
+     the node's neighbors.
+
+  3) A hash mapping the text representation of an edge to the edge itself.
+
+
+=head1 KNOWN BUGS AND CAVEATS
+
+This is still a work in progress.
+
+=head1 AUTHOR - Nat Goodman
+
+Email natg at shore.net
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Institute for Systems Biology (ISB). All Rights Reserved.
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 APPENDIX
+
+=head2 Conventions for nodes and edges
+
+A node can be any Perl values, including an object, ARRAY, or HASH
+reference.  When nodes are references, the software often works with
+the text representaion of the reference, ie, what you get if you print
+the reference.  This can be confusing.  Sorry.  For example if a node
+is the HASH
+
+  {name=>'caspase-9',symbol=>'CASP9'}
+
+The text representation would be something like
+
+  HASH(0x804c830)
+
+When nodes are scalar values, eg, a string, the value and the text
+representation are the same.  This is a common case in test programs
+and examples, but less common in real applications.
+
+An edge is represented internally as an ARRAY ref of two nodes, in
+which the lexically smaller value is first.  Actually, the first node
+is the one whose text representation is lexically smaller.
+
+When passing edges as arguments to SimpleGraph methods, the edge can
+be represented in several ways.
+
+  1) An ARRAY ref of the nodes, eg, ['a','b'].
+
+  2) A list of the two nodes, eg, ('a','b')
+
+  3) Form (1) or (2) using the text represention of the node 
+     instead of the node itself
+
+You needn't worry about which node is lexically smaller.  SimpleGraph
+performs this calculation internally.
+
+When SimpleGraph returns edges as results, they are always in form
+(1), ie, as ARRAY refs of nodes in correct lexical order.
+
+=head2 General conventions for methods
+
+When methods return lists, we generally check the context (via
+wantarray) and return an ARRSY or ARRAY ref as appropriate.  We're not
+100% consistent in this (sorry), so check the code if you have doubts.
+
+We often define singular and plural forms of methods, eg, node and
+nodes.  These differ in how they behave in a scalar context.  The
+singular form assumes you want one answer and returns that, while the
+plural form assumes you want a list of answers are returns it as an
+ARRAY ref.  We're not 100% consistent in this (sorry), so check the
+code if you have doubts.
+
+The rest of the documentation describes the methods.
+
+=head2 Constructors
+
+ Title   : new (inherited from Class::AutoClass)
+ Usage   : my $graph=new SimpleGraph;
+ Function: Create new SimpleGraph object
+ Returns : Newly created object
+ Args    : (optional)
+           nodes=>ARRAY of nodes, eg, ['a','b','c']
+           edges=>ARRAY of edges, see add_edges for details
+
+=head2 Basic node and edge operations
+
+ Title   : add_nodes, add_node
+ Usage   : $graph->add_nodes('a','b');
+           $graph->add_node('a')) {
+ Function: Add nodes to graph. Nodes that are already in graph
+           are ignored.
+ Args    : ARRAY of nodes.
+ Returns : Nothing useful
+ Note    : Singular and plural forms are synonymous
+
+ Title   : add_edges, add_edge
+ Usage   : $graph->add_edges('a','b',['b','c']);
+           $graph->add_edge('c','d')) {
+ Function: Add edges to graph. 
+           Edges that are already in graph are not added again, but
+           are placed in a separate 'duplicate edges' list.
+           Automatically adds any nodes that are not yet in the graph.
+ Args    : ARRAY of edges in any of the forms described in the
+           previous section.  The forms can be mixed as shown in
+           the Usage here.
+ Returns : Nothing useful
+ Note    : Singular and plural forms are synonymous
+
+ Title   : nodes
+ Usage   : my @nodes=$graph->nodes;
+           if (@{$graph->nodes('a','b')}==2) {
+             print "a, b are both nodes\n";
+           }
+ Function: Return all nodes or the given ones.  
+           With no args returns all nodes.  
+           With args, returns the nodes corresponding to each arg, or
+           undef if the arg is not a node.  Useful for testing whether
+           a given value is a node in the graph.
+ Args    : (optional)
+           ARRAY of nodes or text representations of nodes
+ Returns : ARRAY or ARRAY ref of nodes (for args that correspond to
+           nodes), or undef (for args that are not nodes)
+
+ Title   : edges
+ Usage   : my @edges=$graph->edges;
+           if (@{$graph->edges('a','b',['b','c'])}==2) {
+             print "[a,b] and [b,c] are both edges\n";
+           }
+ Function: Return all edges or the given ones.  
+           With no args returns all edges.  
+           With args, returns the edges corresponding to each arg, or
+           undef if the arg is not a edge.  Useful for testing whether
+           a given value is a edge in the graph.
+ Args    : (optional)
+           One or more edges in any of the forms described in the
+           previous section.  The forms can be mixed as shown in
+           the Usage here.
+ Returns : ARRAY or ARRAY ref of edges for args that correspond to
+           edges), or undef (for args that are not edges)
+
+ Title   : node
+ Usage   : if ($graph->node('a')) {
+             print "a is a node\n";
+           }
+ Function: Test whether a value is a node in the graph, or map the
+           text representation of a node to the node itself.  The
+           method can also be fed a list of values (like the 'nodes'
+           method) and it will test all of them.
+ Args    : Usually, a single node.
+           The function also accepts a list of nodes.
+ Returns : In scalar context (the usual case): the node corresponding
+           to the arg (if there's just one), or the node corresponding
+           to the first arg (if a list of args were provided, which is
+           kind of dumb in this case), or undef if the arg is not a
+           node.
+
+           In array context, it behaves just like 'nodes', returning
+           an ARRAY of nodes (for args that correspond to nodes), or
+           undef (for args that are not nodes)
+
+ Title   : edge
+ Usage   : if ($graph->edge('a','b')) {
+             print "a,b is a edge\n";
+           }
+           if ($graph->edge(['a','b'])) {
+             print "[a,b] is a edge\n";
+           }
+ Function: Test whether a value is a edge in the graph, or map the
+           text representation of a edge to the edge itself.  The
+           method can also be fed a list of edges (like the 'edges'
+           method) and it will test all of them.
+ Args    : Usually, a single edge.  Same format as 'edges'
+           The function also accepts a list of edges, exactly like 
+           'edges'
+ Returns : In scalar context (the usual case): the edge corresponding
+           to the arg (if there's just one), or the or the edge
+           corresponding to the first arg (if a list of args were
+           provided, which is kind of dumb in this case), or undef if
+           the arg is not a edge.
+
+           In array context, it behaves just like 'edge's, returning
+           an ARRAY of edges (for args that correspond to edges), or
+           undef (for args that are not edges)
+
+ Title   : has_nodes, has_node
+ Usage   : if ($graph->has_nodes('a','b')) {
+             print "a, b are both nodes\n";
+           }
+           if ($graph->has_node('a')) {
+             print "a is a node\n";
+           }
+ Function: Return true is all args are nodes.
+ Args    : ARRAY of nodes or text representations of nodes
+ Returns : Boolean
+ Note    : Singular and plural forms are synonymous
+
+ Title   : has_edges
+ Usage   : if ($graph->has_edges('a','b',['b','c'])) {
+             print "[a,b] and [b,c] are both edges\n";
+           }
+           if ($graph->has_edge('a','b')) {
+             print "[a,b] is an edge\n";
+           }
+ Function: Return true is all args are edges.
+ Args    : ARRAY of edges in the forms described in the section above
+ Returns : Boolean
+ Note    : Singular and plural forms are synonymous
+
+ Title   : neighbors, neighbor
+ Usage   : my @nodes=$graph->neighbors($node)
+           my @nodes=$graph->neighbors($node,'node')
+           my @edges=$graph->neighbors($edge,'edge');
+ Function: Return the node or edge neighbors of a given node or edge.
+ Args    : (mandatory)
+           $source: node or edge whose neighbors are sought
+           (optional)
+           $what: the word 'node' or 'edge' (actually, anything starting
+                  with 'n' or 'e' will do)
+                  default: 'node'
+ Returns : ARRAY or ARRAY ref of nodes or edges
+ Note    : Singular and plural forms are synonymous. This may not be
+           right.
+
+ Title   : dup_edges
+ Usage   : my @dups=$graph->dup_edges;
+ Function: Return duplicate edges
+ Args    : None
+ Returns : ARRAY or ARRAY ref of edges that have been added more than
+           once.
+
+=head2 Graph properties
+
+ Title   : is_connected
+ Usage   : if ($graph->is_connected) {
+             print "graph has only one connected component\n";
+           }
+ Function: Return true if the graph is connected
+ Args    : None
+ Returns : Boolean
+
+ Title   : is_empty
+ Usage   : if ($graph->is_empty) {
+             print "graph has no nodes or edges\n";
+           }
+ Function: Return true if the graph is empty, ie, has no nodes or edges
+ Args    : None
+ Returns : Boolean
+
+ Title   : is_tree
+ Usage   : if ($graph->is_tree) {
+             print "graph is a tree\n";
+           }
+ Function: Return true if the graph is a tree, ie, it's connected and
+           has no cycles
+ Args    : None
+ Returns : Boolean
+
+ Title   : is_forest
+ Usage   : if ($graph->is_forest) {
+             print "graph is a forest\n";
+           }
+ Function: Return true if the graph is a forest, ie, it has no cycles
+           but may not be connected
+ Args    : None
+ Returns : Boolean
+
+ Title   : is_cyclic
+ Usage   : if ($graph->is_cyclic) {
+             print "graph contains at least one cycle\n";
+           }
+ Function: Return true if the graph is a cyclic.
+ Args    : None
+ Returns : Boolean
+
+ Title   : density
+ Usage   : my $density=$graph->density
+ Function: Compute graph 'density' which is the number of edges
+           divided by the maximum possible number of edges
+ Args    : None
+ Returns : Number
+
+=head2 Graph operations
+
+ Title   : subgraph
+ Usage   : my $subgraph=$graph->subgraph('a','b','c');
+ Function: Compute node subgraph. Constructs a new graph whose nodes
+           are the arguments, and whose edges are the edges of the
+           original graph that only involve the given nodes.
+ Args    : ARRAY of nodes or text representations of nodes
+ Returns : New graph
+
+ Title   : neighbor_subgraph
+ Usage   : my $subgraph=$graph->subgraph('a');
+ Function: Construct node subgraph graph whose nodes are the given
+           node and its neighbors.  are the arguments, and whose edges
+           are the edges of the original graph that only involve the
+           given nodes.
+ Args    : Node or text representations of node
+ Returns : New graph
+
+ Title   : union
+ Usage   : my $union=$graph->union($other_graph);
+ Function: Construct new graph whose nodes are the union of the nodes
+           of the current graph and $other_graph, and whose edges are
+           the union of the edges of the current graph and
+           $other_graph.
+ Args    : $other_graph: a graph
+ Returns : New graph
+
+ Title   : intersection
+ Usage   : my $intersection=$graph->intersection($other_graph);
+ Function: Construct new graph whose nodes are the intersection of the
+           nodes of the current graph and $other_graph, and whose
+           edges are the intersection of the edges of the current
+           graph and $other_graph.
+ Args    : $other_graph: a graph
+ Returns : New graph
+
+=head2 Graph algorithms
+
+ Title   : traversal
+ Usage   : my $traversal=$graph->traversal('a','depth first','node');
+           my @nodes;
+           while (my $node=$traversal->get_next) {
+             push(@nodes,$node);
+           }
+           my $traversal=$graph->traversal('a','depth first','node');
+           my @nodes=$traversal->get_all;
+ Function: Do node or edge traversal in depth or breadth first order.
+ Args    : (optional)
+           $start: starting node or edge for traversal
+                   default: software picks arbitrary start
+           $order: 'depth first' or 'breadth first' (actually,
+                   anything starting with 'd' or 'b' will do)
+                   default: 'depth first'
+           $what: 'node' or 'edge' (actually, anything starting
+                  with 'n' or 'e' will do)
+                  default: 'node'
+ Returns : SimpleGraph::Traversal object
+           This is an iterator with the following methods:
+
+           get_next: get next item in traversal or undef if 
+                     traversal is exhausted
+           get_this: get current item in traversal
+           get_all : get all remaining items in traversal as
+                     ARRAY (in array context) or ARRAY ref
+           has_next: return true if there are more items in
+                     traversal, else undef
+           reset   : restart traversal
+
+ Note    : It's also possible, and perhaps easier, to perform a
+           traversal by creating a SimpleGraph::Traversal object
+           directly.  The constructor is
+
+           new SimpleGraph::Traversal(-graph=>$graph,-start=>$start,
+                                      -order=>$order,-what=>$what)
+
+ Title   : node_traversal
+ Usage   : my $traversal=$graph->node_traversal('a','depth first');
+           my @nodes;
+           while (my $node=$traversal->get_next) {
+             push(@nodes,$node);
+           }
+           my $traversal=$graph->node_traversal('a','depth first');;
+           my @nodes;
+
+
+           my @nodes;
+
+           my @nodes=$traversal->get_all;
+ Function: Do node traversal in depth or breadth first order.
+           Wrapper for 'traversal' method. See above.
+ Args    : (optional)
+           $start: starting node for traversal
+                   default: software picks arbitrary start
+           $order: 'depth first' or 'breadth first' (actually,
+                   anything starting with 'd' or 'b' will do)
+                   default: 'depth first'
+ Returns : SimpleGraph::Traversal object
+
+ Title   : edge_traversal
+ Usage   : my $traversal=$graph->edge_traversal('a','depth first');
+           my @edges;
+           while (my $edge=$traversal->get_next) {
+             push(@edges,$edge);
+           }
+           my $traversal=$graph->edge_traversal('a','depth first');
+           my @edges=$traversal->get_all;
+ Function: Do edge traversal in depth or breadth first order.
+           Wrapper for 'traversal' method. See above.
+ Args    : (optional)
+           $start: starting edge for traversal
+                   default: software picks arbitrary start
+           $order: 'depth first' or 'breadth first' (actually,
+                   anything starting with 'd' or 'b' will do)
+                   default: 'depth first'
+ Returns : SimpleGraph::Traversal object
+
+ Title   : components
+ Usage   : my @components=$graph->components;
+           for my $component (@components) {
+             my @nodes=$component->nodes;
+             my @edges=$component->edges;
+           }
+ Function: Compute the connected components of the graph.  A connected
+           component is a maximal connected subgraph.  'Connected'
+           means you can get from any node of the component to any
+           other by following a path.  'Maximal' means that every node
+           you can reach from the component is in the component.
+ Args    : None
+ Returns : ARRAY or ARRAY ref of SimpleGraphs
+ Note    : The software caches the components once computed, so it's efficient
+           to call this repeatedly.
+
+ Title   : shortest_paths
+ Usage   : my @paths=$graph->shortest_paths;
+           for my $path (@paths) {
+             my @nodes_on_path=@$path;
+             my $start=$nodes_on_path[0];
+             my $end=$nodes_on_path[$#nodes_on_path];
+           }
+ Function: Compute shortest path between each pair of nodes.
+ Args    : None
+ Returns : ARRAY or ARRAY ref of paths, where each path is an ARRAY
+           ref of nodes.  The result contains one path for each pair
+           of nodes for which a path exists.
+
+ Title   : connected_nodesets
+ Usage   : my @nodesets=$graph->connected_nodesets;
+           for my $nodeset (@nodesets) {
+             my @nodes=@$nodeset;
+           }
+ Function: Compute all sets of nodes that form connected subgraphs. 
+           A connected nodeset is a set of nodes such that it's
+           possible to get from any node to any other by following a
+           path that only includes nodes in the set. 
+ Args    : None
+ Returns : ARRAY or ARRAY ref of nodeset, where each nodeset is an ARRAY
+           ref of nodes.  
+ Note    : Use with caution.  The number of nodesets is very
+           large for graphs that are highly connected.
+
+ Title   : connected_subgraphs
+ Usage   : my @subgraphs=$graph->connected_subgraphs;
+ Function: Compute all connected subgraphs of the current graph.
+ Args    : None
+ Returns : ARRAY or ARRAY ref of subgraphs
+ Note    : Use with caution.  The number of connected subgraphs is
+           very large for graphs that are highly connected.
+
+=cut
+
+package Bio::Graph::SimpleGraph;
+use vars qw(@AUTO_ATTRIBUTES @OTHER_ATTRIBUTES %SYNONYMS %DEFAULTS);
+use Bio::Graph::SimpleGraph::Traversal;
+use strict;
+use base qw(Class::AutoClass);
+ at AUTO_ATTRIBUTES=qw(_nodes _edges _neighbors _dup_edges _is_connected _components);
+%SYNONYMS=();
+ at OTHER_ATTRIBUTES=qw();
+%DEFAULTS=(_nodes=>{},_edges=>{},_neighbors=>{},_dup_edges=>[]);
+Class::AutoClass::declare(__PACKAGE__);
+
+# Implementation:
+#  An edge (x,y) is represented canonically as a two element list in which the 
+#    lexically smaller value is first.  Eg, the node ('b','a') is represented 
+#    as ['a','b'].
+#  Graph contains
+#    A hash mapping the text representation of a node to the node itself
+#    A hash mapping the text representation of a node to the nodes neighbors
+#    A hash mapping the text representation of an edge to the edge itself.
+
+sub _init_self {
+  my($self,$class,$args)=@_;
+  return unless $class eq __PACKAGE__; # to prevent subclasses from re-running this
+  my $nodes = $args->nodes;
+  defined($nodes) and $self->add_nodes(@$nodes);
+  my $edges = $args->edges;
+  defined($edges) and $self->add_edges(@$edges);
+}
+sub nodes {
+  my $self = shift;
+  my @ret  = @_? @{$self->_nodes}{@_}: values %{$self->_nodes};
+  wantarray? @ret: \@ret;
+}
+
+sub node {
+  my $self   = shift; 
+  my @result = $self->nodes(@_); 
+  wantarray? @result: $result[0];
+}
+
+sub edges {
+  my $self = shift;
+  my @ret;
+  unless (@_) {
+    @ret = values %{$self->_edges};
+  } else {
+    my $edges=$self->_edges;
+    while (@_) {
+      my($m,$n);
+      if ('ARRAY' eq ref $_[0] || $_[0]->isa('Bio::Graph::Edge')) {	# it's already an edge
+			my $edge = shift;
+			($m,$n)  = @$edge[0..1];
+      	} else {
+		($m,$n)=(shift,shift);
+      }
+      last unless defined $m && defined $n;
+      ($m,$n) = ($n,$m) if "$n" lt "$m";
+      push(@ret, $edges->{$m,$n});
+    }
+  }
+  wantarray? @ret: \@ret;
+}
+
+sub edge {
+  my $self   = shift; 
+  my @result = $self->edges(@_);
+  wantarray? @result: $result[0];
+}
+
+sub has_nodes {
+  my $self = shift;
+  my @ret  = @{$self->_nodes}{@_};
+  return (grep {!defined $_} @ret)? undef: 1;
+}
+sub has_node {my $self=shift; $self->has_nodes(@_); }
+
+sub has_edges {
+  my $self  = shift;
+  my $edges = $self->_edges;
+  while (@_) {
+    my($m,$n);
+    if ('ARRAY' eq ref $_[0] || $_[0]->isa('Bio::Graph::Edge')) {	# it's already an edge
+      my $edge = shift;
+      ($m,$n)  = @$edge[0..1];#first 2 elements are nodes
+    } else {
+      ($m,$n)  = (shift,shift);
+    }
+    last unless defined $m && defined $n;
+    ($m,$n)=($n,$m) if "$n" lt "$m";
+    return unless ($edges->{$m,$n});
+  }
+  return 1;
+}
+sub has_edge {my $self=shift; $self->has_edges(@_); }
+
+sub neighbors {
+  my($self,$source,$what)=@_;
+  $what or $what='node';
+  my $result;
+  if ($what=~/^n/i) {
+    $result=$self->_neighbors->{$source};
+  } elsif ($what=~/^e/i) {
+    my $edge=$self->edge($source);
+    return unless $edge;
+    my($m,$n)=@$edge;
+    my %edges;
+    for my $node (@{$self->_neighbors->{$m}}) {
+      my $edge=$self->edge([$m,$node]);
+      $edges{$edge}=$edge;
+    }
+    for my $node (@{$self->_neighbors->{$n}}) {
+      my $edge=$self->edge([$n,$node]);
+      $edges{$edge}=$edge;
+    }
+    delete $edges{$edge};	# remove source edge from result
+    @$result=values %edges;
+  } else {
+    $self->throw("Unrecognized \$what parameter $what: should be 'node' or 'edge'");
+  }
+ if ($result){
+ 	 return wantarray? @$result: $result;
+	}
+	else { return ();}
+}
+sub neighbor {my $self=shift; $self->neighbors(@_); }
+
+sub add_nodes {
+  my $self      = shift @_;
+  my $nodes     = $self->_nodes;
+  my $neighbors = $self->_neighbors;
+  for my $n (@_) {
+    next if defined $nodes->{$n};
+    $nodes->{$n}     = $n;
+    $neighbors->{$n} = [];
+  }
+  $self->_components(undef);	# clear cached value
+}
+sub add_node {my $self=shift; $self->add_nodes(@_); }
+
+sub add_edges {
+  my $self  = shift @_;
+  my $edges = $self->_edges;
+  my $neighbors = $self->_neighbors;
+  my $dup_edges = $self->_dup_edges;
+  while (@_) {
+    my($m,$n);
+    if ('ARRAY' eq ref $_[0] ) {	# it's already an edge
+      my $edge = shift;
+      ($m,$n)  = @$edge;
+    } else {
+      ($m, $n )=(shift,shift, shift);
+    }
+    next if $m eq $n;		# no self edges
+    last unless defined $m && defined $n;
+    ($m,$n)=($n,$m) if "$n" lt "$m";
+    unless ($edges->{$m,$n}) {
+      $self->add_node($m,$n);
+      ($m,$n) = $self->nodes($m,$n);
+      $edges->{$m,$n} = [$m,$n];
+      push(@{$neighbors->{$m}},$n);
+      push(@{$neighbors->{$n}},$m);
+    } else {
+      push(@$dup_edges,[$m,$n]);
+    }
+  }
+  $self->_is_connected(undef);	# clear cached value
+}
+sub add_edge {my $self=shift; $self->add_edges(@_); }
+
+sub dup_edges {
+  my $self=shift;
+  my @ret=@{$self->_dup_edges};
+  wantarray? @ret: \@ret;
+}
+
+sub is_connected {@{$_[0]->components}<=1;} # connected graph has 0 or 1 components
+sub is_empty {@{$_[0]->nodes}==0;}
+
+sub is_tree {			# tree is (connected and #edges=#nodes-1) or empty
+  ($_[0]->is_connected && (@{$_[0]->edges}==(@{$_[0]->nodes}-1))) ||
+    $_[0]->is_empty;
+}
+sub is_forest {
+  my($self)=@_;
+  my @components=$self->components;
+  for my  $component (@components) {
+    return unless $component->is_tree;
+  }
+  return 1;
+}
+sub is_cyclic {!$_[0]->is_forest}
+
+sub density {
+  my($self) = @_;
+  my @nodes = $self->nodes;
+  my @edges = $self->edges;
+  return 0 if @nodes < 2;
+  my $max = @nodes*(@nodes-1)/2;
+  @edges/$max;
+}
+
+sub subgraph {
+  my $self=shift;
+
+  ## make new graph of same type as parent
+  my $class    = ref($self);
+  my $subgraph = new $class;
+
+  $subgraph->add_node(@_);
+  # add all edges amongst the nodes
+  my @nodes=$subgraph->nodes;
+  while(@nodes) {
+    my $m=shift @nodes;
+    for my $n (@nodes) {
+      $subgraph->add_edge([$m,$n]) if $self->has_edge([$m,$n]);
+    }
+  }
+#  my @edges=grep{my($n1,$n2)=@$_; $subgraph->has_node($n1,$n2)} $self->edges;
+#  $subgraph->add_edge(@edges);
+  $subgraph;
+}
+sub neighbor_subgraph {
+  my($self,$n)=@_;
+  $self->subgraph($self->node($n),$self->neighbors($n));
+}
+sub union {
+  my($self,$other)=@_;
+  my $result=new Bio::Graph::SimpleGraph;
+  $result->add_node($self->nodes,$other->nodes);
+  $result->add_edge($self->edges,$other->edges);
+  $result;
+}
+sub intersection {
+  my($self,$other)=@_;
+  my $result=new Bio::Graph::SimpleGraph;
+  for my $node ($self->nodes) {
+    next unless $other->has_node($node);
+    $result->add_node($node);
+  }
+  for my $edge ($self->edges) {
+    next unless $other->has_edge($edge);
+    $result->add_edge($edge);
+  }
+  $result;
+}
+
+sub traversal {
+  my($self,$start,$order,$what)=@_;
+  new Bio::Graph::SimpleGraph::Traversal(-graph=>$self,-start=>$start,-order=>$order,-what=>$what);
+}
+sub node_traversal {
+  my($self,$start,$order)=@_;
+  new Bio::Graph::SimpleGraph::Traversal(-graph=>$self,-start=>$start,-order=>$order,-what=>'node');
+}
+sub edge_traversal {
+  my($self,$start,$order)=@_;
+  new Bio::Graph::SimpleGraph::Traversal(-graph=>$self,-start=>$start,-order=>$order,-what=>'edge');
+}
+
+sub shortest_paths {
+  my($self)=@_;
+  # initialization
+  my @nodes=$self->nodes;
+  my $dist={};
+  my $path={};
+  for (my $i=0; $i<@nodes; $i++) { # start from i
+    my $node0=$nodes[$i];
+    for (my $j=0; $j<@nodes; $j++) { # end at j
+      my $node1=$nodes[$j];
+      if ($i==$j) {
+	$dist->{$i,$j}=0;
+	next;
+      }
+      next unless $self->has_edge([$node0,$node1]);
+      $dist->{$i,$j}=1;
+      $path->{$i,$j}=[$i,$j];
+    }
+  }
+  # compute paths
+  for (my $k=0; $k<@nodes; $k++) {        # k is intermediate point
+    for (my $i=0; $i<@nodes-1; $i++) {    # start from i
+      next unless defined $dist->{$i,$k};
+      for (my $j=$i+1; $j<@nodes; $j++) { # NG 04-02-10 added optimization
+#      for (my $j=0; $j<@nodes; $j++) { # end at j
+	next unless defined $dist->{$k,$j};
+	# path i..k..j exists -- is it shorter than what we already have?
+	if (!defined $dist->{$i,$j} || $dist->{$i,$k}+$dist->{$k,$j} < $dist->{$i,$j}) {
+	  $dist->{$i,$j}=$dist->{$i,$k}+$dist->{$k,$j};
+	  $path->{$i,$j}=_sp_join_paths($path,$i,$k,$j);
+#	  # NG 04-02-10 next two lines needed for optimization above
+	  $dist->{$j,$i}=$dist->{$i,$j};
+	  $path->{$j,$i}=[reverse @{$path->{$i,$j}}];
+	}
+      }
+    }
+  }
+  # convert node indices (i,j,..) into nodes
+  my $paths={};
+  for (my $i=0; $i<@nodes-1; $i++) {    # start from i
+    my $nodei=$nodes[$i];
+    for (my $j=$i+1; $j<@nodes; $j++) { # end at j
+      my $p=$path->{$i,$j};
+      my $nodej=$nodes[$j];
+      my $path_nodes=[map {$nodes[$_]} @$p];
+      if ("$nodei" lt "$nodej") {
+	$paths->{$nodei,$nodej}=$path_nodes;
+      } else {
+	$paths->{$nodej,$nodei}=[reverse @$path_nodes];
+      }
+    }
+  }
+  my @paths=grep {@$_} values %$paths;
+  wantarray? @paths : \@paths;
+}
+sub _sp_join_paths {
+  my($path,$i,$k,$j)=@_;
+  my $path0=$path->{$i,$k} || [];
+  my $path1=$path->{$k,$j} || [];
+  my $last0=@$path0-1;
+  my $last1=@$path1-1;
+  my $result=[];
+  @$result=(@$path0[0..$last0-1],$k,@$path1[1..$last1]);
+  $path->{$i,$j}=$result;
+}
+
+sub connected_nodesets {
+  my($self)=@_;
+  my @nodes=$self->nodes;
+  my %node2num;
+  # algorithm works with node numbers, not nodes
+  for (my $i=0;$i<@nodes;$i++) {
+    $node2num{$nodes[$i]}=$i;
+  }
+  my %nodesets;			     # nodesets that have been seen (not necessarily processed)
+  my @future=map {[$_]} 0.. at nodes-1; # nodesets to be processed -- init to nodes
+  @nodesets{0.. at nodes-1}=@future;
+
+  while (@future) {
+    my $present=shift @future;
+    my %present;
+    @present{@$present}=(1)x@$present; # for quick check of redundant neighbors
+    for my $node (@$present) {
+      for my $neighbor (map {$node2num{$_}} $self->neighbors($nodes[$node])) {
+	next if $present{$neighbor};
+	my $future=[sort by_num (@$present,$neighbor)];
+	my $key=join($;,@$future);
+	next if $nodesets{$key}; # skip if already seen
+	$nodesets{$key}=$future;
+	push(@future,$future);
+      }
+    }
+  }
+  my $nodesets=[];
+  for my $nodeset (values %nodesets) {
+    push(@$nodesets,[map {$nodes[$_]} @$nodeset]);
+  }
+  wantarray? @$nodesets: $nodesets;
+}
+
+sub connected_subgraphs {
+  my($self)=@_;
+  my $subgraphs;
+  @$subgraphs=map {$self->subgraph(@$_)} $self->connected_nodesets;
+  wantarray? @$subgraphs: $subgraphs;
+}
+
+sub components {
+    my($self)  = @_;
+    return $self->_components if defined $self->_components;
+    my $components = [];
+    my @nodes      = $self->nodes;
+    my %future;
+    my $i = 1;
+    @future{@nodes}=(0)x at nodes;
+    while(my($node, $used)=each %future) {
+	if ($i++ %10 ==0 ) {
+	    $self->debug("|");
+	}
+	next if $used;
+	my @nodes = $self->traversal($self->node($node))->get_all;
+
+	my $component   = $self->subgraph(@nodes);
+	my @nodes2       = $component->nodes;
+	@future{@nodes2} = (1)x at nodes2;
+	push(@$components,$component);
+    }
+    wantarray? @$components: $components;
+}
+
+sub by_num {$a <=> $b}
+
+1;
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/ConfiguratorI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/ConfiguratorI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/ConfiguratorI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# $Id: ConfiguratorI.pm,v 1.8.4.1 2006/10/02 23:10:18 sendu Exp $
+#
+# BioPerl module for Bio::Graphics::ConfiguratorI
+#
+# Cared for by Robert Hubley <rhubley at systemsbiology.org>
+#
+# Copyright Robert Hubley
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graphics::ConfiguratorI - A sectioned map of configuration
+options (a map of maps), with a default section.  Intended to augment
+existing tag-E<gt>value semantics (ie. of Bio::AnnotationCollectionI) for
+object-representation information (eg. foreground color), and for
+general interface preferences (eg. image width in gbrowse).
+
+=head1 SYNOPSIS
+
+    # get a ConfiguratorI somehow
+    my $fg_color = $configurator->get('fgcolor');
+
+=head1 DESCRIPTION
+
+This object contains various configuration parameters.  It is divided
+up into sections and tags.  This is essentially a multi-level map
+(section-E<gt>tag-E<gt>value).  There is also the concept of a default
+section which is referenced when no section is passed to the
+ConfiguratorI methods.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Robert Hubley
+
+Email rhubley at systemsbiology.org
+
+=head1 CONTRIBUTORS
+
+Paul Edlefsen, pedlefsen at systemsbiology.org
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Graphics::ConfiguratorI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 get_sections
+
+ Title   : get_sections
+ Usage   : my @values = $configurator->get_sections();
+ Function: Returns a list of the valid sections except
+           the default or undef.
+ Returns : A list of the sections which can be queried.
+ Args    : (optional section as string, tag as string)
+
+=cut
+
+sub get_sections {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_tags
+
+ Title   : get_tags
+ Usage   : my @values = $configurator->get_tags();
+           or
+           my @values = $configurator->get_tags('dna');
+ Function: Returns a list of tags for a given section
+           or only the default tags section if no section
+           is given.
+ Returns : A scalar list of tags
+ Args    :
+
+=cut
+
+sub get_tags {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get
+
+ Title   : get
+ Usage   : my $value = $configurator->get('height');
+           or
+           my $value = $configurator->get('dna','height');
+ Function: Returns a tag value from a configurator from the
+           either the default "_general" section or from
+           a specified section or undef.
+ Returns : A scalar value for the tag
+ Args    : (optional section as string, tag as string)
+
+=cut
+
+sub get {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 set
+
+ Title   : set
+ Usage   : $configurator->set('fgcolor','chartreuse');
+           or
+           $configurator->set('EST','fgcolor','chartreuse');
+ Function: Set a value for a tag
+ Returns : The old value of the tag
+ Args    : (optional section as string, tag as string, value as scalar)
+
+=cut
+
+sub set {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 get_and_eval
+
+ Title   : get_and_eval
+ Usage   : my $value = $configurator->get_and_eval('height');
+           or
+           my $value = $configurator->get_and_eval('dna','height');
+ Function: This works like get() except that it is
+           also able to evaluate code references.  These are
+           options whose values begin with the characters
+           "sub {".  In this case the value will be passed to
+           an eval() and the resulting codereference returned.
+ Returns : A value of the tag or undef.
+ Args    : (optional section as string, tag as string)
+
+=cut
+
+sub get_and_eval {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Feature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Feature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Feature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,362 @@
+package Bio::Graphics::Feature;
+
+=head1 NAME
+
+Bio::Graphics::Feature - A simple feature object for use with Bio::Graphics::Panel
+
+=head1 SYNOPSIS
+
+ use Bio::Graphics::Feature;
+
+ # create a simple feature with no internal structure
+ $f = Bio::Graphics::Feature->new(-start => 1000,
+                                  -stop  => 2000,
+                                  -type  => 'transcript',
+                                  -name  => 'alpha-1 antitrypsin',
+				  -desc  => 'an enzyme inhibitor',
+                                 );
+
+ # create a feature composed of multiple segments, all of type "similarity"
+ $f = Bio::Graphics::Feature->new(-segments => [[1000,1100],[1500,1550],[1800,2000]],
+                                  -name     => 'ABC-3',
+                                  -type     => 'gapped_alignment',
+                                  -subtype  => 'similarity');
+
+ # build up a gene exon by exon
+ $e1 = Bio::Graphics::Feature->new(-start=>1,-stop=>100,-type=>'exon');
+ $e2 = Bio::Graphics::Feature->new(-start=>150,-stop=>200,-type=>'exon');
+ $e3 = Bio::Graphics::Feature->new(-start=>300,-stop=>500,-type=>'exon');
+ $f  = Bio::Graphics::Feature->new(-segments=>[$e1,$e2,$e3],-type=>'gene');
+
+=head1 DESCRIPTION
+
+This is a simple Bio::SeqFeatureI-compliant object that is compatible
+with Bio::Graphics::Panel.  With it you can create lightweight feature
+objects for drawing.
+
+All methods are as described in L<Bio::SeqFeatureI> with the following additions:
+
+=head2 The new() Constructor
+
+ $feature = Bio::Graphics::Feature->new(@args);
+
+This method creates a new feature object.  You can create a simple
+feature that contains no subfeatures, or a hierarchically nested object.
+
+Arguments are as follows:
+
+  -seq_id      the reference sequence
+  -start       the start position of the feature
+  -end         the stop position of the feature
+  -stop        an alias for end
+  -name        the feature name (returned by seqname())
+  -type        the feature type (returned by primary_tag())
+  -primary_tag the same as -type
+  -source      the source tag
+  -score       the feature score (for GFF compatibility)
+  -desc        a description of the feature
+  -segments    a list of subfeatures (see below)
+  -subtype     the type to use when creating subfeatures
+  -strand      the strand of the feature (one of -1, 0 or +1)
+  -phase       the phase of the feature (0..2)
+  -id          an alias for -name
+  -seqname     an alias for -name
+  -display_id  an alias for -name
+  -display_name an alias for -name  (do you get the idea the API has changed?)
+  -primary_id  unique database ID
+  -url         a URL to link to when rendered with Bio::Graphics
+  -configurator an object (like a Bio::Graphics::FeatureFile) that knows how 
+               to configure the graphical representation of the object based
+               on its type.
+  -attributes  a hashref of tag value attributes, in which the key is the tag
+               and the value is an array reference of values
+  -factory     a reference to a feature factory, used for compatibility with
+               more obscure parts of Bio::DB::GFF
+
+The subfeatures passed in -segments may be an array of
+Bio::Graphics::Feature objects, or an array of [$start,$stop]
+pairs. Each pair should be a two-element array reference.  In the
+latter case, the feature type passed in -subtype will be used when
+creating the subfeatures.
+
+If no feature type is passed, then it defaults to "feature".
+
+=head2 Non-SeqFeatureI methods
+
+A number of new methods are provided for compatibility with
+Ace::Sequence, which has a slightly different API from SeqFeatureI:
+
+=over 4
+
+=item url()
+
+Get/set the URL that the graphical rendering of this feature will link to.
+
+=item add_segment(@segments)
+
+Add one or more segments (a subfeature).  Segments can either be
+Feature objects, or [start,stop] arrays, as in the -segments argument
+to new().  The feature endpoints are automatically adjusted.
+
+=item segments()
+
+An alias for sub_SeqFeature().
+
+=item get_SeqFeatures()
+
+Alias for sub_SeqFeature()
+
+=item get_all_SeqFeatures()
+
+Alias for sub_SeqFeature()
+
+=item merged_segments()
+
+Another alias for sub_SeqFeature().
+
+=item stop()
+
+An alias for end().
+
+=item name()
+
+An alias for seqname().
+
+=item exons()
+
+An alias for sub_SeqFeature() (you don't want to know why!)
+
+=item configurator()
+
+Get/set the configurator that knows how to adjust the graphical
+representation of this feature based on its type.  Currently the only
+configurator that will work is Bio::Graphics::FeatureFile.
+
+=back
+
+=cut
+
+use strict;
+use base 'Bio::Graphics::FeatureBase';
+
+# usage:
+# Bio::Graphics::Feature->new(
+#                         -start => 1,
+#                         -end   => 100,
+#                         -name  => 'fred feature',
+#                         -strand => +1);
+#
+# Alternatively, use -segments => [ [start,stop],[start,stop]...]
+# to create a multisegmented feature.
+sub new {
+  my $self = shift->SUPER::new(@_);
+
+  my %arg = @_;
+  for my $option (qw(factory configurator)) {
+    $self->{$option} = $arg{"-$option"} if exists $arg{"-$option"};
+  }
+  $self;
+}
+
+=head2 factory
+
+ Title   : factory
+ Usage   : $factory = $obj->factory([$new_factory])
+ Function: Returns the feature factory from which this feature was generated.
+           Mostly for compatibility with weird dependencies in gbrowse.
+ Returns : A feature factory
+ Args    : None
+
+=cut
+
+sub factory {
+  my $self = shift;
+  my $d = $self->{factory};
+  $self->{factory} = shift if @_;
+  $d;
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $id = $obj->display_name or $obj->display_name($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location
+	   of feature on sequence or parent feature
+ Returns : Bio::LocationI object
+ Args    : none
+
+
+=head2 location_string
+
+ Title   : location_string
+ Usage   : my $string = $seqfeature->location_string()
+ Function: Returns a location string in a format recognized by gbrowse
+ Returns : a string
+ Args    : none
+
+This is a convenience function used by the generic genome browser. It
+returns the location of the feature and its subfeatures in the compact
+form "start1..end1,start2..end2,...".  Use
+$seqfeature-E<gt>location()-E<gt>toFTString() to obtain a standard
+GenBank/EMBL location representation.
+
+=head2 configurator
+
+ Title   : configurator
+ Usage   : my $configurator = $seqfeature->configurator([$new_configurator])
+ Function: Get/set an object that provides configuration information for this feature
+ Returns : configurator object
+ Args    : new configurator object (optional)
+
+A configurator object provides hints to the Bio::Graphics::Feature as
+to how to display itself on a canvas. Currently this stores the
+Bio::Graphics::FeatureFile and descendents.
+
+=cut
+
+
+# get/set the configurator (Bio::Graphics::FeatureFile) for this feature
+sub configurator {
+  my $self = shift;
+  my $d = $self->{configurator};
+  $self->{configurator} = shift if @_;
+  $d;
+}
+
+=head2 url
+
+ Title   : url
+ Usage   : my $url = $seqfeature->url([$new_url])
+ Function: Get/set the URL associated with this feature
+ Returns : a URL string
+ Args    : new URL (optional)
+
+Features link to URLs when displayed as a clickable image map. This
+field holds that information.
+
+=cut
+
+
+# get/set the url for this feature
+sub url {
+  my $self = shift;
+  my $d = $self->{url};
+  $self->{url} = shift if @_;
+  $d;
+}
+
+=head2 make_link
+
+ Title   : make_link
+ Usage   : my $url = $seqfeature->make_link()
+ Function: Create a URL for the feature
+ Returns : a URL string
+ Args    : none
+
+This method will invoke the configurator in order to turn the feature
+into a link. Used by Bio::Graphics::Panel to create imagemaps.
+
+=cut
+
+# make a link
+sub make_link {
+  my $self = shift;
+
+  if (my $url = $self->url) {
+    return $url;
+  }
+
+  elsif (my $configurator = $self->configurator) {
+    return $configurator->make_link($self) if $configurator->can('make_link');
+  }
+
+  else {
+    return;
+  }
+}
+
+1;
+
+__END__
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>, L<Bio::Graphics::FeatureBase>
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureBase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureBase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureBase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,630 @@
+package Bio::Graphics::FeatureBase;
+
+=head1 NAME
+
+Bio::Graphics::FeatureBase - Base class for Bio::Graphics::Feature
+
+=head1 SYNOPSIS
+
+ See Bio::Graphics::Feature for full synopsis.
+
+=head1 DESCRIPTION
+
+This is the base class for Bio::Graphics::Feature. It has all the
+methods of Bio::Graphics::Feature except for those that are required
+to interface with Bio::Graphics::FeatureFile, namely factory(),
+configurator(), url(), and make_link().  Please see
+L<Bio::Graphics::Feature> for full documentation.
+
+=cut
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI Bio::RangeI);
+
+*stop        = \&end;
+*info        = \&name;
+*seqname     = \&name;
+*exons       = *sub_SeqFeature = *merged_segments = \&segments;
+*get_all_SeqFeatures = *get_SeqFeatures = \&segments;
+*method         = \&primary_tag;
+*source         = \&source_tag;
+*get_tag_values = \&each_tag_value;
+*add_SeqFeature = \&add_segment;
+*get_all_tags   = \&all_tags;
+*abs_ref        = \&ref;
+*abs_start      = \&start;
+*abs_end        = \&end;
+*abs_strand     = \&strand;
+
+# implement Bio::SeqI and FeatureHolderI interface
+
+sub primary_seq { return $_[0] }
+sub annotation { 
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->throw("object of class ".ref($value)." does not implement ".
+		    "Bio::AnnotationCollectionI. Too bad.")
+	    unless $value->isa("Bio::AnnotationCollectionI");
+	$obj->{'_annotation'} = $value;
+    } elsif( ! defined $obj->{'_annotation'}) {
+	$obj->{'_annotation'} = new Bio::Annotation::Collection;
+    }
+    return $obj->{'_annotation'};
+}
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+sub feature_count { return scalar @{shift->{segments} || []} }
+
+sub target { return; }
+sub hit    { shift->target }
+
+sub type {
+  my $self = shift;
+  my $method = $self->primary_tag;
+  my $source = $self->source_tag;
+  return $source ne '' ? "$method:$source" : $method;
+}
+
+# usage:
+# Bio::Graphics::Feature->new(
+#                         -start => 1,
+#                         -end   => 100,
+#                         -name  => 'fred feature',
+#                         -strand => +1);
+#
+# Alternatively, use -segments => [ [start,stop],[start,stop]...]
+# to create a multisegmented feature.
+sub new {
+  my $class= shift;
+  $class = ref($class) if ref $class;
+  my %arg = @_;
+
+  my $self = bless {},$class;
+
+  $arg{-strand} ||= 0;
+  if ($arg{-strand} =~ /^[\+\-\.]$/){
+	$arg{-strand} = "+" && $self->{strand} ='1';
+	$arg{-strand} = "-" && $self->{strand} = '-1';
+	$arg{-strand} = "." && $self->{strand} = '0';
+  } else {
+	  $self->{strand}  = $arg{-strand} ? ($arg{-strand} >= 0 ? +1 : -1) : 0;
+  }
+  $self->{name}    = $arg{-name}   || $arg{-seqname} || $arg{-display_id} 
+    || $arg{-display_name} || $arg{-id};
+  $self->{type}    = $arg{-type}   || $arg{-primary_tag} || 'feature';
+  $self->{subtype} = $arg{-subtype} if exists $arg{-subtype};
+  $self->{source}  = $arg{-source} || $arg{-source_tag} || '';
+  $self->{score}   = $arg{-score}   if exists $arg{-score};
+  $self->{start}   = $arg{-start};
+  $self->{stop}    = $arg{-end} || $arg{-stop};
+  $self->{ref}     = $arg{-seq_id} || $arg{-ref};
+  for my $option (qw(class url seq phase desc attributes primary_id)) {
+    $self->{$option} = $arg{"-$option"} if exists $arg{"-$option"};
+  }
+
+  # fix start, stop
+  if (defined $self->{stop} && defined $self->{start}
+      && $self->{stop} < $self->{start}) {
+    @{$self}{'start','stop'} = @{$self}{'stop','start'};
+    $self->{strand} *= -1;
+  }
+
+  my @segments;
+  if (my $s = $arg{-segments}) {
+    $self->add_segment(@$s);
+  }
+
+  $self;
+}
+
+sub add_segment {
+  my $self        = shift;
+  my $type = $self->{subtype} || $self->{type};
+  $self->{segments} ||= [];
+  my $ref   = $self->seq_id;
+  my $name  = $self->name;
+  my $class = $self->class;
+
+  my $min_start = $self->start ||  999_999_999_999;
+  my $max_stop  = $self->end   || -999_999_999_999;
+
+  my @segments = @{$self->{segments}};
+
+  for my $seg (@_) {
+    if (ref($seg) eq 'ARRAY') {
+      my ($start,$stop) = @{$seg};
+      next unless defined $start && defined $stop;  # fixes an obscure bug somewhere above us
+      my $strand = $self->{strand};
+
+      if ($start > $stop) {
+	($start,$stop) = ($stop,$start);
+	$strand = -1;
+      }
+      push @segments,$self->new(-start  => $start,
+				-stop   => $stop,
+				-strand => $strand,
+				-ref    => $ref,
+				-type   => $type,
+			        -name   => $name,
+			        -class  => $class);
+      $min_start = $start if $start < $min_start;
+      $max_stop  = $stop  if $stop  > $max_stop;
+
+    } elsif (ref $seg) {
+      push @segments,$seg;
+
+      $min_start = $seg->start if ($seg->start && $seg->start < $min_start);
+      $max_stop  = $seg->end   if ($seg->end && $seg->end > $max_stop);
+    }
+  }
+  if (@segments) {
+    local $^W = 0;  # some warning of an uninitialized variable...
+    # this was killing performance!
+    #  $self->{segments} = [ sort {$a->start <=> $b->start } @segments ];
+    # this seems much faster and seems to work still
+    $self->{segments} = \@segments;
+    $self->{ref}    ||= $self->{segments}[0]->seq_id;
+    $self->{start}    = $min_start;
+    $self->{stop}     = $max_stop;
+  }
+}
+
+sub segments {
+  my $self = shift;
+  my $s = $self->{segments} or return wantarray ? () : 0;
+  @$s;
+}
+sub score    {
+  my $self = shift;
+  my $d = $self->{score};
+  $self->{score} = shift if @_;
+  $d;
+}
+sub primary_tag     { shift->{type}        }
+sub name            {
+  my $self = shift;
+  my $d    = $self->{name};
+  $self->{name} = shift if @_;
+  $d;
+}
+sub seq_id          { shift->ref()         }
+sub ref {
+  my $self = shift;
+  my $d = $self->{ref};
+  $self->{ref} = shift if @_;
+  $d;
+}
+sub start    {
+  my $self = shift;
+  my $d = $self->{start};
+  $self->{start} = shift if @_;
+  $d;
+}
+sub end    {
+  my $self = shift;
+  my $d = $self->{stop};
+  $self->{stop} = shift if @_;
+  $d;
+}
+sub strand {
+  my $self = shift;
+  my $d = $self->{strand};
+  $self->{strand} = shift if @_;
+  $d;
+}
+sub length {
+  my $self = shift;
+  return $self->end - $self->start + 1;
+}
+
+sub seq {
+  my $self = shift;
+  my $seq =  exists $self->{seq} ? $self->{seq} : '';
+  return $seq;
+}
+
+sub dna {
+  my $seq = shift->seq;
+  $seq    = $seq->seq if CORE::ref($seq);
+  return $seq;
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $id = $obj->display_name or $obj->display_name($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+
+=cut
+
+sub display_name { shift->name }
+
+*display_id = \&display_name;
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub accession_number {
+    return 'unknown';
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    return 'dna'; # no way this will be anything other than dna!
+}
+
+
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc {
+  my $self = shift;
+  my $d    = $self->{desc};
+  $self->{desc} = shift if @_;
+  $d;
+}
+
+sub attributes {
+  my $self = shift;
+  if (@_) {
+    return $self->each_tag_value(@_);
+  } else {
+    return $self->{attributes} ? %{$self->{attributes}} : ();
+  }
+}
+
+sub primary_id {
+  my $self = shift;
+  my $d = $self->{primary_id};
+  $self->{primary_id} = shift if @_;
+  $d;
+}
+
+sub notes {
+  return shift->desc;
+}
+
+sub low {
+  my $self = shift;
+  return $self->start < $self->end ? $self->start : $self->end;
+}
+
+sub high {
+  my $self = shift;
+  return $self->start > $self->end ? $self->start : $self->end;
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location
+	   of feature on sequence or parent feature
+ Returns : Bio::LocationI object
+ Args    : none
+
+=cut
+
+sub location {
+   my $self = shift;
+   require Bio::Location::Split unless Bio::Location::Split->can('new');
+   my $location;
+   if (my @segments = $self->segments) {
+       $location = Bio::Location::Split->new();
+       foreach (@segments) {
+	 $location->add_sub_Location($_);
+       }
+   } else {
+       $location = $self;
+   }
+   $location;
+}
+
+sub each_Location {
+  my $self = shift;
+  require Bio::Location::Simple unless Bio::Location::Simple->can('new');
+  if (my @segments = $self->segments) {
+    return map {
+	Bio::Location::Simple->new(-start  => $_->start,
+				   -end    => $_->end,
+				   -strand => $_->strand);
+      } @segments;
+  } else {
+    return Bio::Location::Simple->new(-start  => $self->start,
+				      -end    => $self->end,
+				      -strand => $self->strand);
+  }
+}
+
+=head2 location_string
+
+ Title   : location_string
+ Usage   : my $string = $seqfeature->location_string()
+ Function: Returns a location string in a format recognized by gbrowse
+ Returns : a string
+ Args    : none
+
+This is a convenience function used by the generic genome browser. It
+returns the location of the feature and its subfeatures in the compact
+form "start1..end1,start2..end2,...".  Use
+$seqfeature-E<gt>location()-E<gt>toFTString() to obtain a standard
+GenBank/EMBL location representation.
+
+=cut
+
+sub location_string {
+  my $self = shift;
+  my @segments = $self->segments or return $self->to_FTstring;
+  join ',',map {$_->to_FTstring} @segments;
+}
+
+sub coordinate_policy {
+   require Bio::Location::WidestCoordPolicy unless Bio::Location::WidestCoordPolicy->can('new');
+   return Bio::Location::WidestCoordPolicy->new();
+}
+
+sub min_start { shift->low }
+sub max_start { shift->low }
+sub min_end   { shift->high }
+sub max_end   { shift->high}
+sub start_pos_type { 'EXACT' }
+sub end_pos_type   { 'EXACT' }
+sub to_FTstring {
+  my $self = shift;
+  my $low  = $self->min_start;
+  my $high = $self->max_end;
+  return "$low..$high";
+}
+sub phase { shift->{phase} }
+sub class {
+  my $self = shift;
+  my $d = $self->{class};
+  $self->{class} = shift if @_;
+  return defined($d) ? $d : 'Sequence';  # acedb is still haunting me - LS
+}
+
+# set GFF dumping version
+sub version {
+  my $self = shift;
+  my $d    = $self->{gff3_version} || 2;
+  $self->{gff3_version} = shift if @_;
+  $d;
+}
+
+sub gff_string {
+  my $self    = shift;
+  my $recurse = shift;
+
+  if ($self->version == 3) {
+    return $self->gff3_string(@_);
+  }
+
+  my $name  = $self->name;
+  my $class = $self->class;
+  my $group = "$class $name" if $name;
+  my $strand = ('-','.','+')[$self->strand+1];
+  my $string;
+  $string .= join("\t",$self->ref||'.',$self->source||'.',$self->method||'.',
+                       $self->start||'.',$self->stop||'.',
+                       $self->score||'.',$strand||'.',$self->phase||'.',
+                       $group||'');
+  $string .= "\n";
+  if ($recurse) {
+    foreach ($self->sub_SeqFeature) {
+      $string .= $_->gff_string($recurse);
+    }
+  }
+  $string;
+}
+
+sub gff3_string {
+  my $self              = shift;
+  my ($recurse,$parent) = @_;
+
+  my $name  = $self->name;
+  my $class = $self->class;
+  my $group = $self->format_attributes($parent);
+  my $strand = ('-','.','+')[$self->strand+1];
+  my $p      = join("\t",$self->ref||'.',$self->source||'.',$self->method||'.',
+		    $self->start||'.',$self->stop||'.',
+		    $self->score||'.',$strand||'.',$self->phase||'.',
+		    $group||'');
+
+  # the "homogeneous" flag will be true if the parent and children are all of the same type,
+  # meaning that they can be collapsed into a set of children with all the same ID
+  my ($parent_type,$homogeneous);
+  $homogeneous = 1;
+  my @children;
+  if ($recurse) {
+    foreach ($self->sub_SeqFeature) {
+      push @children,$_->gff3_string(1,$self);
+      $parent_type   ||= $self->type;
+      $homogeneous   &&= $_->type eq $parent_type && !defined $_->primary_id;
+    }
+  }
+
+  # if we get here we're dealing with a homogeneous set of Parent[child,child...]
+  # where parent and child all have the same type. In this case, we omit the Parent
+  # and give the children the same ID. This removes an extraneous level of parentage.
+
+  if (@children && $homogeneous) {
+    foreach (@children) { 
+      s/Parent=/ID=/g; 
+    } # replace Parent tag with ID
+    return join "\n", at children;
+  }
+
+  return join("\n",$p, at children);
+}
+
+
+sub db { return }
+
+sub source_tag {
+  my $self = shift;
+  my $d = $self->{source};
+  $self->{source} = shift if @_;
+  $d;
+}
+
+# This probably should be deleted.  Not sure why it's here, but might
+# have been added for Ace::Sequence::Feature-compliance.
+sub introns {
+  my $self = shift;
+  return;
+}
+
+sub has_tag { exists shift->{attributes}{shift()} }
+
+sub escape {
+  my $self    = shift;
+  my $toencode = shift;
+  $toencode    =~ s/([^a-zA-Z0-9_. :?^*\(\)\[\]@!+-])/uc sprintf("%%%02x",ord($1))/eg;
+#  $toencode    =~ tr/ /+/;  # not needed in GFF3
+  $toencode;
+}
+
+sub all_tags {
+  my $self = shift;
+  return keys %{$self->{attributes}};
+}
+sub each_tag_value {
+  my $self = shift;
+  my $tag  = shift;
+  my $value = $self->{attributes}{$tag} or return;
+  return CORE::ref $value ? @{$self->{attributes}{$tag}}
+                          : $self->{attributes}{$tag};
+}
+
+sub format_attributes {
+  my $self   = shift;
+  my $parent = shift;
+  my @tags = $self->all_tags;
+  my @result;
+  for my $t (@tags) {
+    my @values = $self->each_tag_value($t);
+    push @result,join '=',$self->escape($t),$self->escape($_) foreach @values;
+  }
+  my $id   = $self->primary_id;
+  my $name = $self->display_name;
+  push @result,"ID=".$self->escape($id)                     if defined $id;
+  push @result,"Parent=".$self->escape($parent->primary_id) if defined $parent;
+  push @result,"Name=".$self->escape($name)                 if defined $name;
+  return join ';', at result;
+}
+
+sub DESTROY { }
+
+1;
+
+=head2 clone
+
+ Title   : clone
+ Usage   : my $feature = $seqfeature->clone
+ Function: Create a deep copy of the feature
+ Returns : A copy of the feature
+ Args    : none
+
+=cut
+
+sub clone {
+  my $self  = shift;
+  my %clone  = %$self;
+  # overwrite attributes
+  my $clone = bless \%clone,CORE::ref($self);
+  $clone{attributes} = {};
+  for my $k (keys %{$self->{attributes}}) {
+    @{$clone{attributes}{$k}} = @{$self->{attributes}{$k}};
+  }
+  return $clone;
+}
+
+
+__END__
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Feature>, L<Bio::Graphics::FeatureFile>,
+L<Bio::Graphics::Panel>,L<Bio::Graphics::Glyph>, L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2006 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile/Iterator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile/Iterator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile/Iterator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,64 @@
+package Bio::Graphics::FeatureFile::Iterator;
+
+# $Id: Iterator.pm,v 1.1 2003/08/25 22:33:36 lstein Exp $
+
+=head1 NAME
+
+Bio::Graphics::FeatureFile::Iterator -- Iterator across a Bio::Graphics::FeatureFile
+
+=head1 SYNOPSIS
+
+ use Bio::Graphics::FeatureFile;
+ my $data  = Bio::Graphics::FeatureFile->new(-file => 'features.txt');
+ my $iterator = $data->get_seq_stream;
+ while (my $feature = $iterator->next_seq) {
+   print $feature->display_id,"\t",$feature->start,"\t",$feature->end,"\n";
+ }
+
+=head1 DESCRIPTION
+
+This is a Bio::SeqIO-like object that recognizes the next_seq() and
+next_feature() methods.  The two methods are synonymous.
+
+There is also a rewind() method which will start iterating from the
+beginning again.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Feature>,
+L<Bio::Graphics::FeatureFile>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+sub new {
+  my $package = shift;
+  return bless {features => shift,
+		index    => 0},$package;
+}
+
+sub next_seq {
+  my $self = shift;
+  return unless $self->{features};
+  return $self->{features}[$self->{index}++];
+}
+
+*next_features = \&next_seq;
+
+sub rewind {
+  my $self = shift;
+  $self->{index} = 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/FeatureFile.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1578 @@
+package Bio::Graphics::FeatureFile;
+
+# $Id: FeatureFile.pm,v 1.78.4.7 2006/12/02 18:35:32 lstein Exp $
+# This package parses and renders a simple tab-delimited format for features.
+# It is simpler than GFF, but still has a lot of expressive power.
+# See __END__ for the file format
+
+=head1 NAME
+
+Bio::Graphics::FeatureFile -- A set of Bio::Graphics features, stored in a file
+
+=head1 SYNOPSIS
+
+ use Bio::Graphics::FeatureFile;
+ my $data  = Bio::Graphics::FeatureFile->new(-file => 'features.txt');
+
+
+ # create a new panel and render contents of the file onto it
+ my $panel = $data->new_panel;
+ my $tracks_rendered = $data->render($panel);
+
+ # or do it all in one step
+ my ($tracks_rendered,$panel) = $data->render;
+
+ # for more control, render tracks individually
+ my @feature_types = $data->types;
+ for my $type (@feature_types) {
+    my $features = $data->features($type);
+    my %options  = $data->style($type);
+    $panel->add_track($features,%options);  # assuming we have a Bio::Graphics::Panel
+ }
+
+ # get individual settings
+ my $est_fg_color = $data->setting(EST => 'fgcolor');
+
+ # or create the FeatureFile by hand
+
+ # add a type
+ $data->add_type(EST => {fgcolor=>'blue',height=>12});
+
+ # add a feature
+ my $feature = Bio::Graphics::Feature->new(
+                                             # params
+                                          ); # or some other SeqI
+ $data->add_feature($feature=>'EST');
+
+=head1 DESCRIPTION
+
+The Bio::Graphics::FeatureFile module reads and parses files that
+describe sequence features and their renderings.  It accepts both GFF
+format and a more human-friendly file format described below.  Once a
+FeatureFile object has been initialized, you can interrogate it for
+its consistuent features and their settings, or render the entire file
+onto a Bio::Graphics::Panel.
+
+This module is a precursor of Jason Stajich's
+Bio::Annotation::Collection class, and fulfills a similar function of
+storing a collection of sequence features.  However, it also stores
+rendering information about the features, and does not currently
+follow the CollectionI interface.
+
+=head2 The File Format
+
+There are two types of entry in the file format: feature entries, and
+formatting entries.  They can occur in any order.  See the Appendix
+for a full example.
+
+Feature entries can take several forms.  At their simplest, they look
+like this:
+
+ Gene	B0511.1	516-11208
+
+This means that a feature of type "Gene" and name "B0511.1" occupies
+the range between bases 516 and 11208.  A range can be specified
+equally well using a hyphen, or two dots as in 516..11208.  Negative
+coordinates are allowed, such as -187..1000.
+
+A discontinuous range ("split location") uses commas to separate the
+ranges.  For example:
+
+ Gene B0511.1  516-619,3185-3294,10946-11208
+
+Alternatively, the locations can be split by repeating the features
+type and name on multiple adjacent lines:
+
+ Gene	B0511.1	516-619
+ Gene	B0511.1	3185-3294
+ Gene	B0511.1	10946-11208
+
+A comment can be added to features by adding a fourth column.  These
+comments will be rendered as under-the-glyph descriptions by those
+glyphs that honor descriptions:
+
+ Gene  B0511.1  516-619,3185-3294,10946-11208 "Putative primase"
+
+Columns are separated using whitespace, not (necessarily) tabs.
+Embedded whitespace can be escaped using quote marks or backslashes in
+the same way as in the shell:
+
+ 'Putative Gene' my\ favorite\ gene 516-11208
+
+Features can be grouped so that they are rendered by the "group" glyph
+(so far this has only been used to relate 5' and 3' ESTs).  To start a
+group, create a two-column feature entry showing the group type and a
+name for the group.  Follow this with a list of feature entries with a
+blank type.  For example:
+
+ EST	yk53c10
+ 	yk53c10.3	15000-15500,15700-15800
+ 	yk53c10.5	18892-19154
+
+This example is declaring that the ESTs named yk53c10.3 and yk53c10.5
+belong to the same group named yk53c10.  
+
+=cut
+
+use strict;
+use Bio::Graphics::Feature;
+use Bio::DB::GFF::Util::Rearrange;
+use Carp 'cluck','carp','croak';
+# use Bio::DB::GFF; # not needed - load later
+use IO::File;
+use Text::ParseWords 'shellwords';
+
+# default colors for unconfigured features
+my @COLORS = qw(cyan blue red yellow green wheat turquoise orange);
+
+use constant WIDTH => 600;
+use constant MAX_REMAP => 100;
+
+=head2 METHODS
+
+=over 4
+
+=item $features = Bio::Graphics::FeatureFile-E<gt>new(@args)
+
+Create a new Bio::Graphics::FeatureFile using @args to initialize the
+object.  Arguments are -name=E<gt>value pairs:
+
+  Argument         Value
+  --------         -----
+
+   -file           Read data from a file path or filehandle.  Use
+                   "-" to read from standard input.
+
+   -text           Read data from a text scalar.
+
+   -map_coords     Coderef containing a subroutine to use for remapping
+                   all coordinates.
+
+   -smart_features Flag indicating that the features created by this
+                   module should be made aware of the FeatureFile
+		   object by calling their configurator() method.
+
+   -safe           Indicates that the contents of this file is trusted.
+                   Any option value that begins with the string "sub {"
+                   or \&subname will be evaluated as a code reference.
+
+The -file and -text arguments are mutually exclusive, and -file will
+supersede the other if both are present.
+
+-map_coords points to a coderef with the following signature:
+
+  ($newref,[$start1,$end1],[$start2,$end2]....)
+            = coderef($ref,[$start1,$end1],[$start2,$end2]...)
+
+See the Bio::Graphics::Browser (part of the generic genome browser
+package) for an illustration of how to use this to do wonderful stuff.
+
+The -smart_features flag is used by the generic genome browser to
+provide features with a way to access the link-generation code.  See
+gbrowse for how this works.
+
+If the file is trusted, and there is an option named "init_code" in
+the [GENERAL] section of the file, it will be evaluated as perl code
+immediately after parsing.  You can use this to declare global
+variables and subroutines for use in option values.
+
+=back
+
+=cut
+
+# args array:
+# -file => parse from a file (- allowed for ARGV)
+# -text => parse from a text scalar
+# -map_coords => code ref to do coordinate mapping
+#                called with ($ref,[$start1,$stop1],[$start2,$stop2]...)
+#                returns     ($newref,$new_coord1,$new_coord2...)
+
+sub new {
+  my $class = shift;
+  my %args  = @_;
+  my $self = bless {
+		    config   => {},
+		    features => {},
+		    seenit   => {},
+		    types    => [],
+		    max      => undef,
+		    min      => undef,
+		    stat     => [],
+		    refs     => {},
+                    safe     => undef,
+		   },$class;
+  $self->{coordinate_mapper} = $args{-map_coords} 
+    if exists $args{-map_coords} && ref($args{-map_coords}) eq 'CODE';
+
+  $self->smart_features($args{-smart_features})       if exists $args{-smart_features};
+  $self->{safe}              = $args{-safe}           if exists $args{-safe};
+
+  # call with
+  #   -file
+  #   -text
+  my $fh;
+  if (my $file = $args{-file}) {
+    no strict 'refs';
+    if (defined fileno($file)) {
+      $fh = $file;
+    } elsif ($file eq '-') {
+      $self->parse_argv();
+    } else {
+      $fh = IO::File->new($file) or croak("Can't open $file: $!\n");
+    }
+    $self->parse_file($fh);
+  } elsif (my $text = $args{-text}) {
+    $self->parse_text($text);
+  }
+  close($fh) or warn "Error closing file: $!" if $fh;
+  $self;
+}
+
+# render our features onto a panel using configuration data
+# return the number of tracks inserted
+
+=over 4
+
+=item ($rendered,$panel) = $features-E<gt>render([$panel, $position_to_insert, $options, $max_bump, $max_label, $selector])
+
+Render features in the data set onto the indicated
+Bio::Graphics::Panel.  If no panel is specified, creates one.
+
+All arguments are optional.
+
+$panel is a Bio::Graphics::Panel that has previously been created and
+configured.
+
+$position_to_insert indicates the position at which to start inserting
+new tracks. The last current track on the panel is assumed.
+
+$options is a scalar used to control automatic expansion of the
+tracks. 0=auto, 1=compact, 2=expanded, 3=expand and label,
+4=hyperexpand, 5=hyperexpand and label.
+
+$max_bump and $max_label indicate the maximum number of features
+before bumping and labeling are turned off.
+
+$selector is a code ref that can be used to filter which features to
+render. It receives a feature and should return true to include the
+feature and false to exclude it.
+
+In a scalar context returns the number of tracks rendered.  In a list
+context, returns a three-element list containing the number of
+features rendered, the created panel, and a list of all the track
+objects created.
+
+=back
+
+=cut
+
+#"
+
+sub render {
+  my $self = shift;
+  my $panel = shift;
+  my ($position_to_insert,$options,$max_bump,$max_label,$selector) = @_;
+
+  $panel ||= $self->new_panel;
+
+  # count up number of tracks inserted
+  my @tracks;
+  my $color;
+  my %types = map {$_=>1} $self->configured_types;
+
+  my @configured_types   = grep {exists $self->{features}{$_}} $self->configured_types;
+  my @unconfigured_types = sort grep {!exists $types{$_}}      $self->types;
+
+  my @base_config = $self->style('general');
+
+  my @override = ();
+  if ($options && ref $options eq 'HASH') {
+    @override = %$options;
+  } else {
+    $options ||= 0;
+    if ($options == 1) {  # compact
+      push @override,(-bump => 0,-label=>0);
+    } elsif ($options == 2) { #expanded
+      push @override,(-bump=>1);
+    } elsif ($options == 3) { #expand and label
+      push @override,(-bump=>1,-label=>1);
+    } elsif ($options == 4) { #hyperexpand
+      push @override,(-bump => 2);
+    } elsif ($options == 5) { #hyperexpand and label
+      push @override,(-bump => 2,-label=>1);
+    }
+  }
+
+  for my $type (@configured_types, at unconfigured_types) {
+    next if defined $selector && !$selector->($self,$type);
+    next unless length $type > 0; # avoid empty ''
+    my $f = $self->features($type);
+    my @features = grep {$self->{visible}{$_} || $_->type eq 'group'} @$f;
+    next unless @features;  # suppress tracks for features that don't appear
+    my $features = \@features;
+
+    my @auto_bump;
+    push @auto_bump,(-bump  => @$features < $max_bump)  if defined $max_bump;
+    push @auto_bump,(-label => @$features < $max_label) if defined $max_label;
+
+    my @config = ( -glyph   => 'segments',         # really generic
+		   -bgcolor => $COLORS[$color++ % @COLORS],
+		   -label   => 1,
+		   -description => 1,
+		   -key     => $type,
+		   @auto_bump,
+		   @base_config,         # global
+		   $self->style($type),  # feature-specific
+		   @override,
+		 );
+    if (defined($position_to_insert)) {
+      push @tracks,$panel->insert_track($position_to_insert++,$features, at config);
+    } else {
+      push @tracks,$panel->add_track($features, at config);
+    }
+  }
+  return wantarray ? (scalar(@tracks),$panel,\@tracks) : scalar @tracks;
+}
+
+sub _stat {
+  my $self = shift;
+  my $fh   = shift;
+  $self->{stat} = [stat($fh)];
+}
+
+=over 4
+
+=item $error = $features-E<gt>error([$error])
+
+Get/set the current error message.
+
+=back
+
+=cut
+
+sub error {
+  my $self = shift;
+  my $d = $self->{error};
+  $self->{error} = shift if @_;
+  $d;
+}
+
+=over 4
+
+=item $smart_features = $features-E<gt>smart_features([$flag]
+
+Get/set the "smart_features" flag.  If this is set, then any features
+added to the featurefile object will have their configurator() method
+called using the featurefile object as the argument.
+
+=back
+
+=cut
+
+sub smart_features {
+  my $self = shift;
+  my $d = $self->{smart_features};
+  $self->{smart_features} = shift if @_;
+  $d;
+}
+
+sub parse_argv {
+  my $self = shift;
+  $self->init_parse;
+
+  local $/ = "\n";
+  while (<>) {
+    chomp;
+    $self->parse_line($_);
+  }
+  $self->finish_parse;
+}
+
+sub parse_file {
+  my $self = shift;
+  my $fh   = shift or return;
+
+  $self->_stat($fh);
+  $self->init_parse;
+
+  local $/ = "\n";
+  while (<$fh>) {
+    chomp;
+    $self->parse_line($_) || last;
+  }
+  $self->finish_parse;
+}
+
+sub parse_text {
+  my $self = shift;
+  my $text = shift;
+
+  $self->init_parse;
+  foreach (split /\015?\012|\015\012?/,$text) {
+    $self->parse_line($_);
+  }
+  $self->finish_parse;
+}
+
+sub parse_line {
+  my $self = shift;
+  local $_ = shift;
+
+  s/\015//g;  # get rid of carriage returns left over by MS-DOS/Windows systems
+  s/\s+$//;   # get rid of trailing whitespace
+
+  # capture GFF header
+  if (/^\#\#gff-version\s+(\d+)/) {
+    $self->{gff_version} = $1;
+    require Bio::DB::GFF;
+    return 1;
+  }
+
+  # remove comments (but rescue hex-code colors)
+  s/\s*\#.+$// unless /\s*\#[0-9A-Fa-f]{6}\b/;
+
+  # skip on blank lines
+  return 1 if /^\s*$/;
+
+  # abort if we see a >FASTA line
+  return 0 if /^>/;
+
+  if (/^\s+(.+)/ && $self->{current_tag}) { # configuration continuation line
+    my $value = $1;
+    my $cc = $self->{current_config} ||= 'general';       # in case no configuration named
+    $self->{config}{$cc}{$self->{current_tag}} .= ' ' . $value;
+    # respect newlines in code subs
+    $self->{config}{$cc}{$self->{current_tag}} .= "\n"
+      if $self->{config}{$cc}{$self->{current_tag}}=~ /^sub\s*\{/;
+    return 1;
+  }
+
+  if (/^\s*\[([^\]]+)\]/) {  # beginning of a configuration section
+    my $label = $1;
+    my $cc = $label =~ /^(general|default)$/i ? 'general' : $label;  # normalize
+    push @{$self->{types}},$cc unless $cc eq 'general';
+    $self->{current_config} = $cc;
+    return 1;
+  }
+
+  if (/^([\w: -]+?)\s*=\s*(.*)/) {   # key value pair within a configuration section
+    my $tag = lc $1;
+    my $cc = $self->{current_config} ||= 'general';       # in case no configuration named
+    my $value = defined $2 ? $2 : '';
+    $self->{config}{$cc}{$tag} = $value;
+    $self->{current_tag} = $tag;
+    return 1;
+  }
+
+
+  if (/^$/) { # empty line
+    undef $self->{current_tag};
+    return 1;
+  }
+
+  undef $self->{current_tag};
+
+  # parse data lines
+  my @tokens = shellwords($_);
+  unshift @tokens,'' if /^\s+/;
+
+  # close any open group
+  if ($self->{group} && $self->{grouptype} && $tokens[0] && length $tokens[0] > 0) {
+    push @{$self->{features}{$self->{grouptype}}},$self->{group};
+    undef $self->{group};
+    undef $self->{grouptype};
+  }
+
+  if (@tokens < 3) {      # short line; assume a group identifier
+    my $type               = shift @tokens;
+    my $name               = shift @tokens;
+    $self->{group}         = Bio::Graphics::Feature->new(-name => $name,
+							 -type => 'group');
+    $self->{grouptype}     = $type;
+    return 1;
+  }
+
+  my($ref,$type,$name,$strand,$bounds,$description,$url,$score,%attributes);
+
+  my @parts;
+
+  # conventional GFF file, with check for numeric start/end
+  if (@tokens >= 8 && $tokens[3]=~ /^-?\d+$/ && $tokens[4]=~ /^-?\d+$/) {
+    require Bio::DB::GFF unless Bio::DB::GFF->can('split_group');
+    my ($r,$source,$method,$start,$stop,$scor,$s,$phase, at rest) = @tokens;
+    # sanity checks
+    my $group = join ' ', at rest;
+    $type   = defined $source && $source ne '.' ? join(':',$method,$source) : $method;
+    #$bounds = join '..',$start,$stop;
+    @parts   = ([$start,$stop]);
+    $strand = $s;
+    if ($group) {
+      my ($notes, at notes);
+      (undef,$name,undef,undef,$notes) = $self->split_group($group);
+      foreach (@$notes) {
+	my ($key,$value) = @$_;
+	if ($value =~ m!^(http|ftp)://!) { 
+	  $url = $_ 
+	} else {
+	  push @notes,"$key=$value";
+	}
+      }
+      $description = join '; ',map {_escape($_)} @notes if @notes;
+      $score       = $scor if defined $scor && $scor ne '.';
+    }
+    $name ||= $self->{group}->display_id if $self->{group};
+    $ref = $r;
+  }
+
+  elsif ($tokens[2] =~ /^([+-.]|[+-]?[01])$/) { # old simplified version
+    ($type,$name,$strand,$bounds,$description,$url) = @tokens;
+  } else {                              # new simplified version
+    ($type,$name,$bounds,$description,$url) = @tokens;
+  }
+
+  $type ||= $self->{grouptype} || '';
+  $type =~ s/\s+$//;  # get rid of excess whitespace
+
+  # the reference is specified by the GFF reference line first,
+  # the last reference line we saw second,
+  # or the reference line in the "general" section.
+  {
+    local $^W = 0;
+    $ref  ||= $self->{config}{$self->{current_config}}{'reference'}
+      || $self->{config}{general}{reference};
+  }
+  $self->{refs}{$ref}++ if defined $ref;
+
+  @parts = map { [/(-?\d+)(?:-|\.\.)(-?\d+)/]} split /(?:,| )\s*/,$bounds
+    if $bounds && !@parts;
+
+  foreach (@parts) { # max and min calculation, sigh...
+    $self->{min} = $_->[0] if defined $_->[0] && defined $self->{min} ? ($_->[0] < $self->{min}) : 1;
+    $self->{max} = $_->[1] if defined $_->[1] && defined $self->{max} ? ($_->[1] > $self->{max}) : 1;
+  }
+
+  my $visible = 1;
+
+  if ($self->{coordinate_mapper} && $ref) {
+    my @remapped = $self->{coordinate_mapper}->($ref, at parts);
+    ($ref, at parts) = @remapped if @remapped;
+    $visible   = @remapped;
+    return 1 if !$visible && $self->{feature_count} > MAX_REMAP;
+  }
+
+  $type = '' unless defined $type;
+  $name = '' unless defined $name;
+
+  # if strand is not explicitly given in file, we infer it
+  # from the order of start and end coordinates
+  # (this is to deal with confusing documentation, actually)
+  unless (defined $strand) {
+    foreach (@parts) {
+      if (defined $_ && ref($_) eq 'ARRAY' && defined $_->[0] && defined $_->[1]) {
+        $strand           ||= $_->[0] <= $_->[1] ? '+' : '-';
+        ($_->[0],$_->[1])   = ($_->[1],$_->[0]) if $_->[0] > $_->[1];
+      }
+    }
+  }
+
+  # attribute handling
+  if (defined $description && $description =~ /\w+=\S+/) { # attribute line
+    my @attributes = split /;\s*/,$description;
+    foreach (@attributes) {
+      my ($name,$value) = split /=/,$_,2;
+      Bio::Root::Root->throw(qq("$_" is not a valid attribute=value pair)) unless defined $value;
+      _unescape($name);
+      my @values = split /,/,$value;
+      _unescape(@values);
+      if ($name =~ /^(note|description)/) {
+	$description = "@values";
+      } elsif ($name eq 'url') {
+	$url = $value;
+      } elsif ($name eq 'score') {
+	$score = $value;
+      } else {
+	push @{$attributes{$name}}, at values;
+      }
+    }
+  }
+
+  # either create a new feature or add a segment to it
+  if (my $feature = $self->{seenit}{$type,$name}) {
+
+    # create a new segment to hold the parts
+    if (!$feature->segments) {
+      my $new_segment  = bless {%$feature},ref $feature;
+      $feature->add_segment($new_segment);
+    }
+    # add the segments
+    $feature->add_segment(map {
+      _make_feature($name,$type,$strand,$description,$ref,\%attributes,$url,$score,[$_])
+    }  @parts);
+    $self->{visible}{$feature}++  if $visible;
+  }
+
+  else {
+    $feature = $self->{seenit}{$type,$name} = _make_feature($name,$type,$strand,
+							    $description,$ref,
+							    \%attributes,$url,$score,\@parts);
+    $feature->configurator($self) if $self->smart_features;
+    if ($self->{group}) {
+      $self->{group}->add_segment($feature);
+    } else {
+      push @{$self->{features}{$type}},$feature;  # for speed; should use add_feature() instead
+      $self->{visible}{$feature}++  if $visible;
+      $self->{feature_count}++;
+    }
+  }
+
+  return 1;
+}
+
+sub _unescape {
+  foreach (@_) {
+    tr/+/ /;       # pluses become spaces
+    s/%([0-9a-fA-F]{2})/chr hex($1)/g;
+  }
+  @_;
+}
+
+sub _escape {
+  my $toencode = shift;
+  $toencode =~ s/([^a-zA-Z0-9_.=-])/uc sprintf("%%%02x",ord($1))/eg;
+  $toencode;
+}
+
+sub _make_feature {
+  my ($name,$type,$strand,$description,$ref,$attributes,$url,$score,$parts) = @_;
+  my @coordinates = @$parts > 1 ? (-segments => $parts) : (-start=>$parts->[0][0],-end=>$parts->[0][1]);
+  Bio::Graphics::Feature->new(-name       => $name,
+			      -type       => $type,
+			      -subtype    => "${type}_part",
+			      $strand ? (-strand   => make_strand($strand)) : (),
+			      -desc       => $description,
+			      -ref        => $ref,
+			      -attributes => $attributes,
+			      defined $url   ? (-url  => $url) : (),
+			      defined $score ? (-score=>$score) : (),
+			      @coordinates,
+			     );
+}
+
+=over 4
+
+=item $features-E<gt>add_feature($feature [=E<gt>$type])
+
+Add a new Bio::FeatureI object to the set.  If $type is specified, the
+object will be added with the indicated type.  Otherwise, the
+feature's primary_tag() method will be invoked to get the type.
+
+=back
+
+=cut
+
+# add a feature of given type to our list
+# we use the primary_tag() method
+sub add_feature {
+  my $self = shift;
+  my ($feature,$type) = @_;
+  $feature->configurator($self) if $self->smart_features;
+  $type = $feature->primary_tag unless defined $type;
+  $self->{visible}{$feature}++;
+  $self->{feature_count}++;
+  push @{$self->{features}{$type}},$feature;
+}
+
+
+=over 4
+
+=item $features-E<gt>add_type($type=E<gt>$hashref)
+
+Add a new feature type to the set.  The type is a string, such as
+"EST".  The hashref is a set of key=E<gt>value pairs indicating options to
+set on the type.  Example:
+
+  $features->add_type(EST => { glyph => 'generic', fgcolor => 'blue'})
+
+When a feature of type "EST" is rendered, it will use the generic
+glyph and have a foreground color of blue.
+
+=back
+
+=cut
+
+# Add a type to the list.  Hash values are used for key/value pairs
+# in the configuration.  Call as add_type($type,$configuration) where
+# $configuration is a hashref.
+sub add_type {
+  my $self = shift;
+  my ($type,$type_configuration) = @_;
+  my $cc = $type =~ /^(general|default)$/i ? 'general' : $type;  # normalize
+  push @{$self->{types}},$cc unless $cc eq 'general' or $self->{config}{$cc};
+  if (defined $type_configuration) {
+    for my $tag (keys %$type_configuration) {
+      $self->{config}{$cc}{lc $tag} = $type_configuration->{$tag};
+    }
+  }
+}
+
+
+
+=over 4
+
+=item $features-E<gt>set($type,$tag,$value)
+
+Change an individual option for a particular type.  For example, this
+will change the foreground color of EST features to my favorite color:
+
+  $features->set('EST',fgcolor=>'chartreuse')
+
+=back
+
+=cut
+
+# change configuration of a type.  Call as set($type,$tag,$value)
+# $type will be added if not already there.
+sub set {
+  my $self = shift;
+  croak("Usage: \$featurefile->set(\$type,\$tag,\$value\n")
+    unless @_ == 3;
+  my ($type,$tag,$value) = @_;
+  unless ($self->{config}{$type}) {
+    return $self->add_type($type,{$tag=>$value});
+  } else {
+    $self->{config}{$type}{lc $tag} = $value;
+  }
+}
+
+# break circular references
+sub finished {
+  my $self = shift;
+  delete $self->{features};
+}
+
+sub DESTROY { shift->finished(@_) }
+
+=over 4
+
+=item $value = $features-E<gt>setting($stanza =E<gt> $option)
+
+In the two-element form, the setting() method returns the value of an
+option in the configuration stanza indicated by $stanza.  For example:
+
+  $value = $features->setting(general => 'height')
+
+will return the value of the "height" option in the [general] stanza.
+
+Call with one element to retrieve all the option names in a stanza:
+
+  @options = $features->setting('general');
+
+Call with no elements to retrieve all stanza names:
+
+  @stanzas = $features->setting;
+
+=back
+
+=cut
+
+sub setting {
+  my $self = shift;
+  if (@_ > 2) {
+    $self->{config}->{$_[0]}{$_[1]} = $_[2];
+  }
+  if ($self->safe) {
+     $self->code_setting(@_);
+  } else {
+     $self->_setting(@_);
+  }
+}
+
+# return configuration information
+# arguments are ($type) => returns tags for type
+#               ($type=>$tag) => returns values of tag on type
+#               ($type=>$tag,$value) => sets value of tag
+sub _setting {
+  my $self = shift;
+  my $config = $self->{config} or return;
+  return keys %{$config} unless @_;
+  return keys %{$config->{$_[0]}}        if @_ == 1;
+  return $config->{$_[0]}{$_[1]}         if @_ == 2 && exists $config->{$_[0]};
+  return $config->{$_[0]}{$_[1]} = $_[2] if @_ > 2;
+  return;
+}
+
+
+=over 4
+
+=item $value = $features-E<gt>code_setting($stanza=E<gt>$option);
+
+This works like setting() except that it is also able to evaluate code
+references.  These are options whose values begin with the characters
+"sub {".  In this case the value will be passed to an eval() and the
+resulting codereference returned.  Use this with care!
+
+=back
+
+=cut
+
+sub code_setting {
+  my $self = shift;
+  my $section = shift;
+  my $option  = shift;
+
+  my $setting = $self->_setting($section=>$option);
+  return unless defined $setting;
+  return $setting if ref($setting) eq 'CODE';
+  if ($setting =~ /^\\&(\w+)/) {  # coderef in string form
+    my $subroutine_name = $1;
+    my $package         = $self->base2package;
+    my $codestring      = "\\&${package}\:\:${subroutine_name}";
+    my $coderef         = eval $codestring;
+    $self->_callback_complain($section,$option) if $@;
+    $self->set($section,$option,$coderef);
+    return $coderef;
+  }
+  elsif ($setting =~ /^sub\s*(\(\$\$\))*\s*\{/) {
+    my $package         = $self->base2package;
+    my $coderef         = eval "package $package; $setting";
+    $self->_callback_complain($section,$option) if $@;
+    $self->set($section,$option,$coderef);
+    return $coderef;
+  } else {
+    return $setting;
+  }
+}
+
+sub _callback_complain {
+  my $self    = shift;
+  my ($section,$option) = @_;
+  carp "An error occurred while evaluating the callback at section='$section', option='$option':\n   => $@";
+}
+
+=over 4
+
+=item $flag = $features-E<gt>safe([$flag]);
+
+This gets or sets and "safe" flag.  If the safe flag is set, then
+calls to setting() will invoke code_setting(), allowing values that
+begin with the string "sub {" to be interpreted as anonymous
+subroutines.  This is a potential security risk when used with
+untrusted files of features, so use it with care.
+
+=back
+
+=cut
+
+sub safe {
+   my $self = shift;
+   my $d = $self->{safe};
+   $self->{safe} = shift if @_;
+   $self->evaluate_coderefs if $self->{safe} && !$d;
+   $d;
+}
+
+
+=over 4
+
+=item @args = $features-E<gt>style($type)
+
+Given a feature type, returns a list of track configuration arguments
+suitable for suitable for passing to the
+Bio::Graphics::Panel-E<gt>add_track() method.
+
+=back
+
+=cut
+
+# turn configuration into a set of -name=>value pairs suitable for add_track()
+sub style {
+  my $self = shift;
+  my $type = shift;
+
+  my $config  = $self->{config}  or return;
+  my $hashref = $config->{$type};
+  unless ($hashref) {
+    $type =~ s/:.+$//;
+    $hashref = $config->{$type} or return;
+  }
+
+  return map {("-$_" => $hashref->{$_})} keys %$hashref;
+}
+
+
+=over 4
+
+=item $glyph = $features-E<gt>glyph($type);
+
+Return the name of the glyph corresponding to the given type (same as
+$features-E<gt>setting($type=E<gt>'glyph')).
+
+=back
+
+=cut
+
+# retrieve just the glyph part of the configuration
+sub glyph {
+  my $self = shift;
+  my $type = shift;
+  my $config  = $self->{config}  or return;
+  my $hashref = $config->{$type} or return;
+  return $hashref->{glyph};
+}
+
+
+=over 4
+
+=item @types = $features-E<gt>configured_types()
+
+Return a list of all the feature types currently known to the feature
+file set.  Roughly equivalent to:
+
+  @types = grep {$_ ne 'general'} $features->setting;
+
+=back
+
+=cut
+
+# return list of configured types, in proper order
+sub configured_types {
+  my $self = shift;
+  my $types = $self->{types} or return;
+  return @{$types};
+}
+
+=over 4
+
+=item  @types = $features-E<gt>types()
+
+This is similar to the previous method, but will return *all* feature
+types, including those that are not configured with a stanza.
+
+=back
+
+=cut
+
+sub types {
+  my $self = shift;
+  my $features = $self->{features} or return;
+  return keys %{$features};
+}
+
+=over 4
+
+=item $features = $features-E<gt>features($type)
+
+Return a list of all the feature types of type "$type".  If the
+featurefile object was created by parsing a file or text scalar, then
+the features will be of type Bio::Graphics::Feature (which follow the
+Bio::FeatureI interface).  Otherwise the list will contain objects of
+whatever type you added with calls to add_feature().
+
+Two APIs:
+
+  1) original API:
+
+      # Reference to an array of all features of type "$type"
+      $features = $features-E<gt>features($type)
+
+      # Reference to an array of all features of all types
+      $features = $features-E<gt>features()
+
+      # A list when called in a list context
+      @features = $features-E<gt>features()
+
+   2) Bio::Das::SegmentI API:
+
+       @features = $features-E<gt>features(-type=>['list','of','types']);
+
+       # variants
+       $features = $features-E<gt>features(-type=>['list','of','types']);
+       $features = $features-E<gt>features(-type=>'a type');
+       $iterator = $features-E<gt>features(-type=>'a type',-iterator=>1);
+
+=back
+
+=cut
+
+# return features
+sub features {
+  my $self = shift;
+  my ($types,$iterator, at rest) = defined($_[0] && $_[0]=~/^-/)
+    ? rearrange([['TYPE','TYPES']], at _) : (\@_);
+  $types = [$types] if $types && !ref($types);
+  my @types = ($types && @$types) ? @$types : $self->types;
+  my @features = map {@{$self->{features}{$_}}} @types;
+  if ($iterator) {
+    require Bio::Graphics::FeatureFile::Iterator;
+    return Bio::Graphics::FeatureFile::Iterator->new(\@features);
+  }
+  return wantarray ? @features : \@features;
+}
+
+=over 4
+
+=item @features = $features-E<gt>features($type)
+
+Return a list of all the feature types of type "$type".  If the
+featurefile object was created by parsing a file or text scalar, then
+the features will be of type Bio::Graphics::Feature (which follow the
+Bio::FeatureI interface).  Otherwise the list will contain objects of
+whatever type you added with calls to add_feature().
+
+=back
+
+=cut
+
+sub make_strand {
+  local $^W = 0;
+  return +1 if $_[0] =~ /^\+/ || $_[0] > 0;
+  return -1 if $_[0] =~ /^\-/ || $_[0] < 0;
+  return 0;
+}
+
+=head2 get_seq_stream
+
+ Title   : get_seq_stream
+ Usage   : $stream = $s->get_seq_stream(@args)
+ Function: get a stream of features that overlap this segment
+ Returns : a Bio::SeqIO::Stream-compliant stream
+ Args    : see below
+ Status  : Public
+
+This is the same as feature_stream(), and is provided for Bioperl
+compatibility.  Use like this:
+
+ $stream = $s->get_seq_stream('exon');
+ while (my $exon = $stream->next_seq) {
+    print $exon->start,"\n";
+ }
+
+=cut
+
+sub get_seq_stream {
+  my $self = shift;
+  local $^W = 0;
+  my @args = $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
+  $self->features(@args);
+}
+
+=head2 get_feature_by_name
+
+ Usage   : $db->get_feature_by_name(-name => $name)
+ Function: fetch features by their name
+ Returns : a list of Bio::DB::GFF::Feature objects
+ Args    : the name of the desired feature
+ Status  : public
+
+This method can be used to fetch a named feature from the file.
+
+The full syntax is as follows.  Features can be filtered by
+their reference, start and end positions
+
+  @f = $db->get_feature_by_name(-name  => $name,
+                                -ref   => $sequence_name,
+                                -start => $start,
+                                -end   => $end);
+
+This method may return zero, one, or several Bio::Graphics::Feature
+objects.
+
+=cut
+
+sub get_feature_by_name {
+   my $self = shift;
+   my ($name,$ref,$start,$end) = rearrange(['NAME','REF','START','END'], at _);
+   my $match = <<'END';
+sub {
+        my $f = shift;
+END
+   if (defined $name) {
+      if ($name =~ /[\?\*]/) {  # regexp
+        $name =  quotemeta($name);
+        $name =~ s/\\\?/.?/g;
+        $name =~ s/\\\*/.*/g;
+        $match .= "     return unless \$f->display_name =~ /$name/i;\n";
+      } else {
+        $match .= "     return unless \$f->display_name eq '$name';\n";
+      }
+   }
+
+   if (defined $ref) {
+      $match .= "     return unless \$f->ref eq '$ref';\n";
+   }
+   if (defined $start && $start =~ /^-?\d+$/) {
+      $match .= "     return unless \$f->stop >= $start;\n";
+   }
+   if (defined $end && $end =~ /^-?\d+$/) {
+      $match .= "     return unless \$f->start <= $end;\n";
+   }
+   $match .= "     return 1;\n}";
+
+   my $match_sub = eval $match;
+   unless ($match_sub) {
+     warn $@;
+     return;
+   }
+
+   return grep {$match_sub->($_)} $self->features;
+}
+
+=head2 search_notes
+
+ Title   : search_notes
+ Usage   : @search_results = $db->search_notes("full text search string",$limit)
+ Function: Search the notes for a text string
+ Returns : array of results
+ Args    : full text search string, and an optional row limit
+ Status  : public
+
+Each row of the returned array is a arrayref containing the following fields:
+
+  column 1     Display name of the feature
+  column 2     The text of the note
+  column 3     A relevance score.
+
+=cut
+
+sub search_notes {
+  my $self = shift;
+  my ($search_string,$limit) = @_;
+
+  $search_string =~ tr/*?//d;
+
+  my @results;
+  my $search = join '|',map {quotemeta($_)} $search_string =~ /(\S+)/g;
+
+  for my $feature ($self->features) {
+    next unless $feature->{attributes};
+    my @attributes = $feature->all_tags;
+    my @values     = map {$feature->each_tag_value} @attributes;
+    push @values,$feature->notes        if $feature->notes;
+    push @values,$feature->display_name if $feature->display_name;
+    next unless @values;
+    my $value      = "@values";
+    my $matches    = 0;
+    my $note;
+    my @hits = $value =~ /($search)/ig;
+    $note ||= $value if @hits;
+    $matches += @hits;
+    next unless $matches;
+
+    my $relevance = 10 * $matches;
+    push @results,[$feature,$note,$relevance];
+    last if @results >= $limit;
+  }
+
+  @results;
+}
+
+
+=head2 get_feature_stream(), top_SeqFeatures(), all_SeqFeatures()
+
+Provided for compatibility with older BioPerl and/or Bio::DB::GFF
+APIs.
+
+=cut
+
+*get_feature_stream = \&get_seq_stream;
+*top_SeqFeatures    = *all_SeqFeatures = \&features;
+
+
+=over 4
+
+=item @refs = $features-E<gt>refs
+
+Return the list of reference sequences referred to by this data file.
+
+=back
+
+=cut
+
+sub refs {
+  my $self = shift;
+  my $refs = $self->{refs} or return;
+  keys %$refs;
+}
+
+=over 4
+
+=item  $min = $features-E<gt>min
+
+Return the minimum coordinate of the leftmost feature in the data set.
+
+=back
+
+=cut
+
+sub min { shift->{min} }
+
+=over 4
+
+=item $max = $features-E<gt>max
+
+Return the maximum coordinate of the rightmost feature in the data set.
+
+=back
+
+=cut
+
+sub max { shift->{max} }
+
+sub init_parse {
+  my $s = shift;
+
+  $s->{seenit} = {}; 
+  $s->{max}         = $s->{min} = undef;
+  $s->{types}       = [];
+  $s->{features}    = {};
+  $s->{config}      = {};
+  $s->{gff_version} = 0;
+  $s->{feature_count}=0; 
+}
+
+sub finish_parse {
+  my $s = shift;
+  $s->evaluate_coderefs if $s->safe;
+  $s->{seenit} = {};
+  delete $s->{gff_version};
+}
+
+sub evaluate_coderefs {
+  my $self = shift;
+  $self->initialize_code();
+  for my $s ($self->_setting) {
+    for my $o ($self->_setting($s)) {
+      $self->code_setting($s,$o);
+    }
+  }
+}
+
+sub initialize_code {
+  my $self       = shift;
+  my $package = $self->base2package;
+  my $init_code = $self->_setting(general => 'init_code') or return;
+  my $code = "package $package; $init_code; 1;";
+  eval $code;
+  $self->_callback_complain(general=>'init_code') if $@;
+}
+
+sub base2package {
+  my $self = shift;
+  (my $package = overload::StrVal($self)) =~ s/[^a-z0-9A-Z_]/_/g;
+  $package     =~ s/^[^a-zA-Z_]/_/g;
+  $package;
+}
+
+sub split_group {
+  my $self = shift;
+  my $gff = $self->{gff} ||= Bio::DB::GFF->new(-adaptor=>'memory');
+  return $gff->split_group(shift, $self->{gff_version} > 2);
+}
+
+# create a panel if needed
+sub new_panel {
+  my $self = shift;
+
+  require Bio::Graphics::Panel;
+
+  # general configuration of the image here
+  my $width         = $self->setting(general => 'pixels')
+                      || $self->setting(general => 'width')
+			|| WIDTH;
+
+  my ($start,$stop);
+  my $range_expr = '(-?\d+)(?:-|\.\.)(-?\d+)';
+
+  if (my $bases = $self->setting(general => 'bases')) {
+    ($start,$stop) =  $bases =~ /([\d-]+)(?:-|\.\.)([\d-]+)/;
+  }
+
+  if (!defined $start || !defined $stop) {
+    $start = $self->min unless defined $start;
+    $stop  = $self->max unless defined $stop;
+  }
+
+  my $new_segment = Bio::Graphics::Feature->new(-start=>$start,-stop=>$stop);
+  my $panel = Bio::Graphics::Panel->new(-segment   => $new_segment,
+					-width     => $width,
+					-key_style => 'between',
+					$self->style('general'));
+  $panel;
+}
+
+=over 4
+
+=item $mtime = $features-E<gt>mtime
+
+=item $atime = $features-E<gt>atime
+
+=item $ctime = $features-E<gt>ctime
+
+=item $size = $features-E<gt>size
+
+Returns stat() information about the data file, for featurefile
+objects created using the -file option.  Size is in bytes.  mtime,
+atime, and ctime are in seconds since the epoch.
+
+=back
+
+=cut
+
+sub mtime {
+  my $self = shift;
+  my $d = $self->{m_time} || $self->{stat}->[9];
+  $self->{m_time} = shift if @_;
+  $d;
+}
+sub atime { shift->{stat}->[8];  }
+sub ctime { shift->{stat}->[10]; }
+sub size  { shift->{stat}->[7];  }
+
+=over 4
+
+=item $label = $features-E<gt>feature2label($feature)
+
+Given a feature, determines the configuration stanza that bests
+describes it.  Uses the feature's type() method if it has it (DasI
+interface) or its primary_tag() method otherwise.
+
+=back
+
+=cut
+
+sub feature2label {
+  my $self = shift;
+  my $feature = shift;
+  my $type  = $feature->primary_tag or return;
+  (my $basetype = $type) =~ s/:.+$//;
+  my @labels = $self->type2label($type);
+  @labels = $self->type2label($basetype) unless @labels;
+  @labels = ($type) unless @labels;;
+  wantarray ? @labels : $labels[0];
+}
+
+=over 4
+
+=item $link = $features-E<gt>link_pattern($linkrule,$feature,$panel)
+
+Given a feature, tries to generate a URL to link out from it.  This
+uses the 'link' option, if one is present.  This method is a
+convenience for the generic genome browser.
+
+=back
+
+=cut
+
+sub link_pattern {
+  my $self     = shift;
+  my ($linkrule,$feature,$panel) = @_;
+
+  $panel ||= 'Bio::Graphics::Panel';
+
+  if (ref($linkrule) && ref($linkrule) eq 'CODE') {
+    my $val = eval {$linkrule->($feature,$panel)};
+    $self->_callback_complain(none=>"linkrule for $feature") if $@;
+    return $val;
+  }
+
+  require CGI unless defined &CGI::escape;
+  my $n;
+  $linkrule ||= ''; # prevent uninit warning
+  $linkrule =~ s/\$(\w+)/
+    CGI::escape(
+    $1 eq 'ref'              ? (($n = $feature->location->seq_id) && "$n") || ''
+      : $1 eq 'name'         ? (($n = $feature->display_name) && "$n")     || ''
+      : $1 eq 'class'        ? eval {$feature->class}  || ''
+      : $1 eq 'type'         ? eval {$feature->method} || $feature->primary_tag || ''
+      : $1 eq 'method'       ? eval {$feature->method} || $feature->primary_tag || ''
+      : $1 eq 'source'       ? eval {$feature->source} || $feature->source_tag  || ''
+      : $1 eq 'start'        ? $feature->start || ''
+      : $1 eq 'end'          ? $feature->end   || ''
+      : $1 eq 'stop'         ? $feature->end   || ''
+      : $1 eq 'segstart'     ? $panel->start   || ''
+      : $1 eq 'segend'       ? $panel->end     || ''
+      : $1 eq 'description'  ? eval {join '',$feature->notes} || ''
+      : $1 eq 'id'           ? $feature->feature_id || ''
+      : $1
+       )
+	/exg;
+  return $linkrule;
+}
+
+sub make_link {
+  my $self             = shift;
+  my ($feature,$panel) = @_;
+
+  for my $label ($self->feature2label($feature)) {
+    my $linkrule     = $self->setting($label,'link');
+    $linkrule        = $self->setting(general=>'link') unless defined $linkrule;
+    return $self->link_pattern($linkrule,$feature,$panel);
+  }
+}
+
+sub make_title {
+  my $self = shift;
+  my $feature = shift;
+
+  for my $label ($self->feature2label($feature)) {
+    my $linkrule     = $self->setting($label,'title');
+    $linkrule        ||= $self->setting(general=>'title');
+    next unless $linkrule;
+    return $self->link_pattern($linkrule,$feature);
+  }
+
+  my $method  = eval {$feature->method} || $feature->primary_tag;
+  my $seqid   = $feature->can('seq_id')      ? $feature->seq_id : $feature->location->seq_id;
+  my $title = eval {
+    if ($feature->can('target') && (my $target = $feature->target)) {
+      join (' ',
+	    $method,
+	    (defined $seqid ? "$seqid:" : '').
+	    $feature->start."..".$feature->end,
+	    $feature->target.':'.
+	    $feature->target->start."..".$feature->target->end);
+    } else {
+      join(' ',
+	   $method,
+	   $feature->can('display_name') ? $feature->display_name : $feature->info,
+	   (defined $seqid ? "$seqid:" : '').
+	   ($feature->start||'?')."..".($feature->end||'?')
+	  );
+    }
+  };
+  warn $@ if $@;
+  $title;
+}
+
+# given a feature type, return its label(s)
+sub type2label {
+  my $self = shift;
+  my $type = shift;
+  $self->{_type2label} ||= $self->invert_types;
+  my @labels = keys %{$self->{_type2label}{$type}};
+  wantarray ? @labels : $labels[0]
+}
+
+sub invert_types {
+  my $self = shift;
+  my $config  = $self->{config} or return;
+  my %inverted;
+  for my $label (keys %{$config}) {
+    my $feature = $config->{$label}{feature} or next;
+    foreach (shellwords($feature||'')) {
+      $inverted{$_}{$label}++;
+    }
+  }
+  \%inverted;
+}
+
+=over 4
+
+=item $citation = $features-E<gt>citation($feature)
+
+Given a feature, tries to generate a citation for it, using the
+"citation" option if one is present.  This method is a convenience for
+the generic genome browser.
+
+=back
+
+=cut
+
+# This routine returns the "citation" field.  It is here in order to simplify the logic
+# a bit in the generic browser
+sub citation {
+  my $self = shift;
+  my $feature = shift || 'general';
+  return $self->setting($feature=>'citation');
+}
+
+=over 4
+
+=item $name = $features-E<gt>name([$feature])
+
+Get/set the name of this feature set.  This is a convenience method
+useful for keeping track of multiple feature sets.
+
+=back
+
+=cut
+
+# give this feature file a nickname
+sub name {
+  my $self = shift;
+  my $d = $self->{name};
+  $self->{name} = shift if @_;
+  $d;
+}
+
+1;
+
+__END__
+
+=head1 Appendix -- Sample Feature File
+
+ # file begins
+ [general]
+ pixels = 1024
+ bases = 1-20000
+ reference = Contig41
+ height = 12
+
+ [Cosmid]
+ glyph = segments
+ fgcolor = blue
+ key = C. elegans conserved regions
+
+ [EST]
+ glyph = segments
+ bgcolor= yellow
+ connector = dashed
+ height = 5;
+
+ [FGENESH]
+ glyph = transcript2
+ bgcolor = green
+ description = 1
+
+ Cosmid	B0511	516-619
+ Cosmid	B0511	3185-3294
+ Cosmid	B0511	10946-11208
+ Cosmid	B0511	13126-13511
+ Cosmid	B0511	11394-11539
+ EST	yk260e10.5	15569-15724
+ EST	yk672a12.5	537-618,3187-3294
+ EST	yk595e6.5	552-618
+ EST	yk595e6.5	3187-3294
+ EST	yk846e07.3	11015-11208
+ EST	yk53c10
+ 	yk53c10.3	15000-15500,15700-15800
+ 	yk53c10.5	18892-19154
+ EST	yk53c10.5	16032-16105
+ SwissProt	PECANEX	13153-13656	Swedish fish
+ FGENESH	Predicted gene 1	1-205,518-616,661-735,3187-3365,3436-3846	Pfam domain
+ FGENESH	Predicted gene 2	5513-6497,7968-8136,8278-8383,8651-8839,9462-9515,10032-10705,10949-11340,11387-11524,11765-12067,12876-13577,13882-14121,14169-14535,15006-15209,15259-15462,15513-15753,15853-16219	Mysterious
+ FGENESH	Predicted gene 3	16626-17396,17451-17597
+ FGENESH	Predicted gene 4	18459-18722,18882-19176,19221-19513,19572-19835	Transmembrane protein
+ # file ends
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Feature>,
+L<Bio::Graphics::FeatureFile>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/Factory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/Factory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/Factory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,434 @@
+=head1 NAME
+
+Bio::Graphics::Glyph::Factory - Factory for Bio::Graphics::Glyph objects
+
+=head1 SYNOPSIS
+
+See L<Bio::Graphics::Panel>.
+
+=head1 DESCRIPTION
+
+This class is used internally by Bio::Graphics to generate new Glyph
+objects by combining a list of features with the user's desired
+configuration.  It is intended to be used internally by Bio::Graphics.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email - lstein at cshl.org
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with an "_"
+(underscore).
+
+=cut
+
+package Bio::Graphics::Glyph::Factory;
+
+use strict;
+use Carp qw(:DEFAULT cluck);
+use Bio::Root::Version;
+use base qw(Bio::Root::Root);
+
+my %LOADED_GLYPHS = ();
+my %GENERIC_OPTIONS = (
+		       bgcolor    => 'turquoise',
+		       fgcolor    => 'black',
+		       fontcolor  => 'black',
+		       font2color => 'turquoise',
+		       height     => 8,
+		       font       => 'gdSmallFont', # This must be a string not method call
+		       bump       => +1,       # bump by default (perhaps a mistake?)
+		       );
+
+=head2 new
+
+  Title   : new
+  Usage   : $f = Bio::Graphics::Glyph::Factory->new(
+                     -stylesheet => $stylesheet,
+		     -glyph_map  => $glyph_map,
+		     -options    => $options);
+  Function : create a new Bio::Graphics::Glyph::Factory object
+  Returns  : the new object
+  Args     : $stylesheet is a Bio::Das::Stylesheet object that can
+                 convert Bio::Das feature objects into glyph names and
+                 associated options.
+             $glyph_map is a hash that maps primary tags to glyph names.
+             $options is a hash that maps option names to their values.
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub new {
+  my $class = shift;
+  my $panel = shift;
+  my %args = @_;
+  my $stylesheet = $args{-stylesheet};   # optional, for Bio::Das compatibility
+  my $map        = $args{-map};          # map type name to glyph name
+  my $options    = $args{-options};      # map type name to glyph options
+  return bless {
+		stylesheet => $stylesheet,
+		glyph_map  => $map,
+		options    => $options,
+		panel      => $panel,
+		},$class;
+}
+
+=head2 clone
+
+  Title    : clone
+  Usage    : $f2 = $f->clone
+  Function : Deep copy of a factory object
+  Returns  : a deep copy of the factory object
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub clone {
+  my $self = shift;
+  my %new = %$self;
+  my $new = bless \%new,ref($self);
+  $new;
+}
+
+=head2 stylesheet
+
+  Title    : stylesheet
+  Usage    : $stylesheet = $f->stylesheet
+  Function : accessor for stylesheet
+  Returns  : a Bio::Das::Stylesheet object
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub stylesheet { shift->{stylesheet}  }
+
+=head2 glyph_map
+
+  Title    : glyph_map
+  Usage    : $map = $f->glyph_map
+  Function : accessor for the glyph map
+  Returns  : a hash mapping primary tags to glyphs
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub glyph_map  { shift->{glyph_map}   }
+
+=head2 option_map
+
+  Title    : option_map
+  Usage    : $map = $f->option_map
+  Function : accessor for the option map
+  Returns  : a hash mapping option names to values
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub option_map { shift->{options}     }
+
+=head2 global_opts
+
+  Title    : global_opts
+  Usage    : $map = $f->global_opts
+  Function : accessor for global options
+  Returns  : a hash mapping option names to values
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+This returns a set of defaults for option values.
+
+=cut
+
+sub global_opts{ shift->{global_opts} }
+
+=head2 panel
+
+  Title    : panel
+  Usage    : $panel = $f->panel
+  Function : accessor for Bio::Graphics::Panel
+  Returns  : a Bio::Graphics::Panel
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+This returns the panel with which the factory is associated.
+
+=cut
+
+sub panel      { shift->{panel}       }
+
+=head2 scale
+
+  Title    : scale
+  Usage    : $scale = $f->scale
+  Function : accessor for the scale
+  Returns  : a floating point number
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+This returns the scale, in pixels/bp for glyphs constructed by this
+factory.
+
+=cut
+
+sub scale      { shift->{panel}->scale }
+
+=head2 font
+
+  Title    : font
+  Usage    : $font = $f->font
+  Function : accessor for the font
+  Returns  : a font name
+  Args     : None
+  Status   : Internal to Bio::Graphics
+
+This returns a GD font name.
+
+=cut
+
+sub font       {
+  my $self = shift;
+  my $glyph = shift;
+  $self->option($glyph,'font') || $self->{font};
+}
+
+=head2 map_pt
+
+  Title    : map_pt
+  Usage    : @pixel_positions = $f->map_pt(@bp_positions)
+  Function : map bp positions to pixel positions
+  Returns  : a list of pixel positions
+  Args     : a list of bp positions
+  Status   : Internal to Bio::Graphics
+
+The real work is done by the panel, but factory subclasses can
+override if desired.
+
+=cut
+
+sub map_pt {
+  my $self = shift;
+  my @result = $self->panel->map_pt(@_);
+  return wantarray ? @result : $result[0];
+}
+
+=head2 map_no_trunc
+
+  Title    : map_no_trunc
+  Usage    : @pixel_positions = $f->map_no_trunc(@bp_positions)
+  Function : map bp positions to pixel positions
+  Returns  : a list of pixel positions
+  Args     : a list of bp positions
+  Status   : Internal to Bio::Graphics
+
+Same as map_pt(), but it will NOT clip pixel positions to be within
+the drawing frame.
+
+=cut
+
+sub map_no_trunc {
+  my $self = shift;
+  my @result = $self->panel->map_no_trunc(@_);
+  return wantarray ? @result : $result[0];
+}
+
+=head2 translate_color
+
+  Title    : translate_color
+  Usage    : $index = $f->translate_color($color_name)
+  Function : translate symbolic color names into GD indexes
+  Returns  : an integer
+  Args     : a color name in format "green" or "#00FF00"
+  Status   : Internal to Bio::Graphics
+
+The real work is done by the panel, but factory subclasses can
+override if desired.
+
+=cut
+
+sub translate_color {
+  my $self = shift;
+  my $color_name = shift;
+  $self->panel->translate_color($color_name);
+}
+
+=head2 glyph
+
+  Title    : glyph
+  Usage    : @glyphs = $f->glyph($level,$feature1,$feature2...)
+  Function : transform features into glyphs.
+  Returns  : a list of Bio::Graphics::Glyph objects
+  Args     : a feature "level", followed by a list of FeatureI objects.
+  Status   : Internal to Bio::Graphics
+
+The level is used to track the level of nesting of features that have
+subfeatures.
+
+=cut
+
+# create a glyph
+sub make_glyph {
+  my $self  = shift;
+  my $level = shift;
+  my @result;
+  my $panel = $self->panel;
+  my $flip   = $panel->flip;
+
+  for my $f (@_) {
+    my $type = $self->feature_to_glyph($f);
+    my $glyphclass = 'Bio::Graphics::Glyph';
+    $type ||= 'generic';
+    $glyphclass .= "\:\:\L$type";
+
+    unless ($LOADED_GLYPHS{$glyphclass}++) {
+      $self->throw("The requested glyph class, ``$type'' is not available: $@")
+        unless (eval "require $glyphclass");
+    }
+
+    my $glyph = $glyphclass->new(-feature  => $f,
+				 -factory  => $self,
+				 -flip     => $flip,
+				 -level    => $level);
+
+    push @result,$glyph;
+
+  }
+  return wantarray ? @result : $result[0];
+}
+
+=head2 feature_to_glyph
+
+  Title    : feature_to_glyph
+  Usage    : $glyph_name = $f->feature_to_glyph($feature)
+  Function : choose the glyph name given a feature
+  Returns  : a glyph name
+  Args     : a Bio::Seq::FeatureI object
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub feature_to_glyph {
+  my $self    = shift;
+  my $feature = shift;
+
+  return scalar $self->{stylesheet}->glyph($feature) if $self->{stylesheet};
+  my $map = $self->glyph_map    or return 'generic';
+  if (ref($map) eq 'CODE') {
+    my $val = eval {$map->($feature)};
+    warn $@ if $@;
+    return $val || 'generic';
+  }
+  return $map->{$feature->primary_tag} || 'generic';
+}
+
+
+=head2 set_option
+
+  Title    : set_option
+  Usage    : $f->set_option($option_name=>$option_value)
+  Function : set or change an option
+  Returns  : nothing
+  Args     : a name/value pair
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+sub set_option {
+  my $self = shift;
+  my ($option_name,$option_value) = @_;
+  $self->{overriding_options}{lc $option_name} = $option_value;
+}
+
+# options:
+#    the overriding_options hash has precedence
+#    ...followed by the option_map
+#    ...followed by the stylesheet
+#    ...followed by generic options
+sub option {
+  my $self = shift;
+  my ($glyph,$option_name,$partno,$total_parts) = @_;
+  return unless defined $option_name;
+  $option_name = lc $option_name;   # canonicalize
+
+  return $self->{overriding_options}{$option_name} 
+    if exists $self->{overriding_options} && exists $self->{overriding_options}{$option_name};
+
+  if (exists $self->{options} && (my $map    = $self->{options})) {
+    if (exists $map->{$option_name} && defined(my $value  = $map->{$option_name})) {
+      my $feature = $glyph->feature;
+      return $value unless ref $value eq 'CODE';
+      my $val = eval { $value->($feature,$option_name,$partno,$total_parts,$glyph)};
+      warn "Error returned while evaluating value of '$option_name' option for glyph $glyph, feature $feature: ",$@,"\n"
+	if $@;
+      return defined $val && $val eq '*default*' ? $GENERIC_OPTIONS{$option_name} : $val;
+    }
+  }
+
+  if (exists $self->{stylesheet} && (my $ss = $self->{stylesheet})) {
+    my($glyph,%options) = $ss->glyph($glyph->feature);
+    my $value = $options{$option_name};
+    return $value if defined $value;
+  }
+
+  return $GENERIC_OPTIONS{$option_name};
+}
+
+sub get_option {
+  my $self = shift;
+  my $option_name = shift;
+  my $map = $self->{options} or return;
+  $map->{$option_name};
+}
+
+
+=head2 options
+
+  Title    : options
+  Usage    : @option_names = $f->options
+  Function : return all configured option names
+  Returns  : a list of option names
+  Args     : none
+  Status   : Internal to Bio::Graphics
+
+=cut
+
+# return names of all the options in the option hashes
+sub options {
+  my $self = shift;
+  my %options;
+  if (my $map    = $self->option_map) {
+    $options{lc($_)}++ foreach keys %$map;
+  }
+  $options{lc($_)}++ foreach keys %GENERIC_OPTIONS;
+  return keys %options;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/alignment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/alignment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/alignment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,123 @@
+package Bio::Graphics::Glyph::alignment;
+
+use strict;
+
+use base qw(Bio::Graphics::Glyph::graded_segments);
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::alignment - The "alignment" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is identical to the "graded_segments" glyph, and is used for
+drawing features that consist of discontinuous segments.  The
+color intensity of each segment is proportionate to the score.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -max_score  Maximum value of the	   Calculated
+              feature's "score" attribute
+
+  -min_score  Minimum value of the         Calculated
+              feature's "score" attribute
+
+If max_score and min_score are not specified, then the glyph will
+calculate the local maximum and minimum scores at run time.
+
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/anchored_arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/anchored_arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/anchored_arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,195 @@
+package Bio::Graphics::Glyph::anchored_arrow;
+# package to use for drawing an arrow
+
+use strict;
+use base qw(Bio::Graphics::Glyph::arrow);
+
+sub draw_label {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  my $label = $self->label or return;
+  my $label_align = $self->option('label_align');
+  if ($label_align && ($label_align eq 'center' || $label_align eq 'right')) {
+      my $x = $self->left + $left;
+      my $font = $self->option('labelfont') || $self->font;
+      my $middle = $self->left + $left + ($self->right - $self->left) / 2;
+      my $label_width = $font->width * length($label);
+      if ($label_align eq 'center') {
+          my $new_x = $middle - $label_width / 2;
+          $x = $new_x if ($new_x > $x);;
+      }
+      else {
+          my $new_x = $left + $self->right - $label_width;
+          $x = $new_x if ($new_x > $x);
+      }
+      $x = $self->panel->left + 1 if $x <= $self->panel->left;
+      #detect collision (most likely no bump when want centering label)
+      #lay down all features on one line e.g. cyto bands
+      return if (!$self->option('bump') && ($label_width + $x) > $self->right);
+      $gd->string($font,
+                  $x,
+                  $self->top + $top,
+                  $label,
+                  $self->fontcolor);
+  }
+  else {
+      $self->SUPER::draw_label(@_);
+  }
+}
+
+sub arrowheads {
+  my $self = shift;
+  my ($ne,$sw,$base_e,$base_w);
+  my $feature = $self->feature;
+  my $gstart  = $feature->start;
+  my $gend    = $feature->end;
+  my $pstart  = $self->panel->start;
+  my $pend    = $self->panel->end;
+
+  if (!defined $gstart || $gstart <= $pstart) {  # off left end
+    $sw = 1;
+  }
+  if (!defined $gend || $gend >= $pend) { # off right end
+    $ne = 1;
+  }
+  ($sw,$ne) = ($ne,$sw) if $self->panel->{flip};
+  return ($sw,$ne,!$sw,!$ne);
+}
+
+sub no_trunc {
+  !shift->option('no_arrows');
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::anchored_arrow - The "anchored_arrow" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an arrowhead which is anchored at one or both ends
+(has a vertical base) or has one or more arrowheads.  The arrowheads
+indicate that the feature does not end at the edge of the picture, but
+continues.  For example:
+
+    |-----------------------------|          both ends in picture
+ <----------------------|                    left end off picture
+         |---------------------------->      right end off picture
+ <------------------------------------>      both ends off picture
+
+You can also set the glyph so that the end is just truncated at the
+end of the picture.
+
+         |-----------------------------
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the standard options, this glyph recognizes the following:
+
+  Option         Description                Default
+
+  -tick          draw a scale               0 (false)
+
+  -relative_coords 
+                 use relative coordinates   0 (false)
+                 for scale
+
+  -relative_coords_offset 
+                 set the relative offset    1 
+                 for scale
+
+  -no_arrows     don't draw an arrow when   0 (false)
+                 glyph is partly offscreen
+
+The argument for B<-tick> is an integer between 0 and 2 and has the same
+interpretation as the B<-tick> option in Bio::Graphics::Glyph::arrow.
+
+If B<-rel_coords> is set to a true value, then the scale drawn on the
+glyph will be in relative (1-based) coordinates relative to the beginning
+of the glyph.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,494 @@
+package Bio::Graphics::Glyph::arrow;
+# package to use for drawing an arrow
+
+# $Id: arrow.pm,v 1.29.4.1 2006/10/02 23:10:19 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Glyph::arrow - the "arrow" glyph
+
+=cut
+
+use strict;
+use Bio::Coordinate::Pair;
+use Bio::Location::Simple;
+use base qw(Bio::Graphics::Glyph::generic);
+
+my %UNITS = (p => 1e-12,
+	     n => 1e-9,
+	     u => 1e-6,
+	     m => 0.001,
+	     c => 0.01,
+	     k => 1000,
+	     M => 1_000_000,
+	     G => 1_000_000_000);
+
+sub pad_bottom {
+  my $self = shift;
+  my $val = $self->SUPER::pad_bottom(@_);
+  $val += $self->font->height if $self->option('tick');
+  $val;
+}
+
+# override draw method
+sub draw_component {
+  my $self = shift;
+  my $parallel = $self->option('parallel');
+  $parallel = 1 unless defined $parallel;
+  $self->draw_parallel(@_) if $parallel;
+  $self->draw_perpendicular(@_) unless $parallel;
+}
+
+sub draw_perpendicular {
+  my $self = shift;
+  my $gd = shift;
+  my ($dx,$dy) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $ne = $self->option('northeast');
+  my $sw = $self->option('southwest');
+  $ne = $sw = 1 unless defined($ne) || defined($sw);
+
+  # draw a perpendicular arrow at position indicated by $x1
+  my $fg = $self->set_pen;
+  my $a2 = ($y2-$y1)/4;
+
+  my @positions = $x1 == $x2 ? ($x1) : ($x1,$x2);
+  for my $x (@positions) {
+    if ($ne) {
+      $gd->line($x,$y1,$x,$y2,$fg);
+      $gd->line($x-$a2,$y1+$a2,$x,$y1,$fg);
+      $gd->line($x+$a2,$y1+$a2,$x,$y1,$fg);
+    }
+    if ($sw) {
+      $gd->line($x,$y1,$x,$y2,$fg);
+      $gd->line($x-$a2,$y2-$a2,$x,$y2,$fg);
+      $gd->line($x+$a2,$y2-$a2,$x,$y2,$fg);
+    }
+  }
+
+  # add a label if requested
+  $self->draw_label($gd,$dx,$dy) if $self->option('label');  # this draws the label aligned to the left
+}
+
+sub draw_parallel {
+  my $self = shift;
+  my $gd = shift;
+  my ($dx,$dy) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $fg = $self->set_pen;
+  my $a2 = ($self->height)/2;
+  my $center = $y1+$a2;
+
+  my $trunc_left  = $x1 < $self->panel->left;
+  my $trunc_right = $x2 > $self->panel->right;
+  $x1 = $self->panel->left  if $trunc_left;
+  $x2 = $self->panel->right if $trunc_right;
+
+#  warn $self->feature,": x1=$x1, x2=$x2, start=$self->{start},end=$self->{end}, strand=$self->{strand}";
+#  warn join ' ',%$self;
+
+  $trunc_left  = 0 if $self->no_trunc;
+  $trunc_right = 0 if $self->no_trunc;
+
+  my ($sw,$ne,$base_w,$base_e) = $self->arrowheads;
+  $gd->line($x1,$center,$x2,$center,$fg);
+  $self->arrowhead($gd,$x1,$center,$a2,-1) if $sw && !$trunc_left;  # west arrow
+  $self->arrowhead($gd,$x2,$center,$a2,+1) if $ne && !$trunc_right; # east arrow
+  $gd->line($x1,$center-$a2,$x1,$center+$a2,$fg) if $base_w && !$trunc_left;  #west base
+  $gd->line($x2,$center-$a2,$x2,$center+$a2,$fg) if $base_e && !$trunc_right; #east base
+
+  # turn on ticks
+  if ($self->option('tick')) {
+    local $^W = 0;  # dumb uninitialized variable warning
+    my $font       = $self->font;
+    my $width      = $font->width;
+    my $font_color = $self->fontcolor;
+    my $height     = $self->height;
+
+    my $relative   = $self->option('relative_coords');
+    my $flipped    = $self->{flip};
+    my $end        = $self->panel->end + 1;
+
+    my $tickwidth  = $self->option('tickwidth'); $tickwidth = $self->linewidth unless defined $tickwidth;
+    my $tickcolor  = $self->color($self->option('tickcolor') || $self->option('fgcolor'));
+    my $tickpen    = $self->set_pen($tickwidth, $tickcolor);
+
+    my $relative_coords_offset = $self->option('relative_coords_offset');
+    $relative_coords_offset    = 1 unless defined $relative_coords_offset;
+
+    my $start    = $relative ? $relative_coords_offset : $self->feature->start-1;
+    my $stop     = $start + $self->feature->length - 1;
+
+    my $map = Bio::Coordinate::Pair->new(-in  => Bio::Location::Simple->new( -seq_id => "rel",
+									     -start => $start,
+									     -end => $stop,
+									     -strand => 1,
+									     ),
+					 -out => Bio::Location::Simple->new( -seq_id => "abs",
+									     -start => $self->feature->start,
+									     -end => $self->feature->end,
+									     -strand => $self->feature->strand,
+									     ),
+					 ) if $relative;
+
+    my $unit_label     = $self->option('units')        || '';
+    my $unit_divider   = $self->option('unit_divider') || 1;
+    my $units_in_label = $self->option('units_in_label');
+
+    my $units      = $self->calculate_units($start/$unit_divider,$self->feature->length/$unit_divider);
+    my $divisor    = $UNITS{$units} || 1;
+
+    $divisor *= $unit_divider;
+
+    my $format     = min($self->feature->length,$self->panel->length)/$divisor > 10
+      ? "%d" : "%.6g";
+
+    $format .= "$units%s" unless $units_in_label;
+
+    my $scale  = $self->option('scale') || 1;  ## Does the user want to override the internal scale?
+
+    my $model  = sprintf("$format ",$stop/($divisor*$scale),$unit_label);
+    $model     = "-$model" if $start < 0;
+
+    my $minlen = $width * length($model);# * 1.5;
+
+    my ($major_interval,$minor_interval) = $self->panel->ticks(($stop-$start+1)/$unit_divider,$minlen);
+
+    my $left  = $sw ? $x1+$height : $x1;
+    my $right = $ne ? $x2-$height : $x2;
+
+    # adjust for portions of arrow that are outside panel
+    if ($relative && $self->feature->strand == -1) {
+	$start += $self->feature->end - $self->panel->end if $self->feature->end > $self->panel->end;
+	$stop -= $self->panel->start - $self->feature->start if $self->feature->start < $self->panel->start;
+    } else {
+	$start += $self->panel->start - $self->feature->start
+	    if $self->feature->start < $self->panel->start;
+	$stop  -= $self->feature->end - $self->panel->end
+	    if $self->feature->end   > $self->panel->end;
+    }
+	
+    my $first_tick = $major_interval * int($start/$major_interval);
+    my $last_tick  = $major_interval * int(($stop+2)/$major_interval);
+
+    my $label_intervals = $self->label_intervals;
+    my $interval_width  = $major_interval * $self->scale/2;
+    my %drewit;
+
+    for (my $i = $first_tick; $i <= $last_tick; $i += $major_interval) {
+      my $abs = $i;
+      if ($relative) {
+	  $abs = $map->map( Bio::Location::Simple->new(-seq_id => "rel",
+						       -start  => $i,
+						       -end   => $i,
+						       -strand => 1,
+						       )
+			    )->match;
+	  next unless $abs;
+	  $abs = $abs->start;
+      }
+
+      $abs = $end - $abs + 1 if $flipped;
+
+      my $tickpos = int $dx + $self->map_pt($abs);
+      next if $tickpos < $x1 || $tickpos > $x2;
+      $drewit{$tickpos}++;
+
+      $gd->line($tickpos,$center-$a2,$tickpos,$center+$a2,$tickpen)
+	unless $tickpos < $left or $tickpos > $right;
+
+      my $label = $scale ? $i / $scale : $i;
+      my $scaled = $label/$divisor;
+      $label = sprintf($format,$scaled,$unit_label);
+
+      my $label_len = length($label) * $width;
+
+      my $middle = $tickpos - $label_len/2;
+      $middle   += $interval_width if $label_intervals;
+
+      $gd->string($font,$middle,$center+$a2-1,$label,$font_color)
+        unless ($self->option('no_tick_label') || $middle > $x2);
+    }
+
+    if ($self->option('tick') >= 2) {
+
+      $first_tick = $minor_interval * int($start/$minor_interval);
+      $last_tick  = $minor_interval * int(($stop+2)/$minor_interval);
+
+      my $a4 = $self->height/4;
+      for (my $i = $first_tick; $i <= $last_tick; $i += $minor_interval) {
+	  my $abs = $i;
+	  if ($relative) {
+	      $abs = $map->map( Bio::Location::Simple->new(-seq_id => "rel",
+							   -start  => $i,
+							   -end    => $i,
+							   -strand => 1,
+							   )
+				)->match;
+	      next unless $abs;
+	      $abs = $abs->start;
+	  }
+	  $abs = $end - $abs if $flipped;
+
+	  my $tickpos = int $dx + $self->map_pt($abs);
+	  next if $tickpos < $left-1 or $tickpos > $right+1;
+	  next if $drewit{$tickpos} || $drewit{$tickpos-1} || $drewit{$tickpos+1}; # prevent roundoff errors from appearing
+
+	  $gd->line($tickpos,$center-$a4,$tickpos,$center+$a4,$tickpen);
+      }
+    }
+  }
+
+  # add a label if requested
+  $self->draw_label($gd,$dx,$dy)       if $self->option('label');
+  $self->draw_description($gd,$dx,$dy) if $self->option('description');
+}
+
+sub label {
+  my $self  = shift;
+  my $label = $self->SUPER::label(@_);
+  return $label unless $self->option('units_in_label');
+  my $unit_divider = $self->option('unit_divider') || 1;
+  my $unit_label   = $self->option('units')        || '';
+  my $start        = $self->feature->start-1;
+  my $units        = $self->calculate_units($start/$unit_divider,$self->feature->length/$unit_divider);
+  return $label . " ($units$unit_label)";
+}
+
+sub label_intervals {
+  return shift->option('label_intervals');
+}
+
+sub arrowheads {
+  my $self = shift;
+  my ($ne,$sw,$base_e,$base_w);
+  if ($self->option('double')) {
+    $ne = $sw = 1;
+  } else {
+    $ne   = $self->option('northeast') || $self->option('east');
+    $sw   = $self->option('southwest') || $self->option('west');
+  }
+  # otherwise use strandedness to define the arrow
+  unless (defined($ne) || defined($sw)) {
+    # turn on both if neither specified
+    $ne = 1 if $self->feature->strand > 0;
+    $sw = 1 if $self->feature->strand < 0;
+    ($ne,$sw) = ($sw,$ne) if $self->{flip};
+  }
+  return ($sw,$ne,0,0) unless $self->option('base');
+  return ($sw,$ne,
+	  (!$sw && $self->feature->start>= $self->panel->start),
+	  (!$ne && $self->feature->end  <= $self->panel->end));
+}
+
+sub no_trunc { 0; }
+
+sub calculate_units {
+  my $self   = shift;
+  my ($start,$length) = @_;
+  return 'G' if $length >= 1e9;
+  return 'M' if $length >= 1e6;
+  return 'k' if $length >= 1e3;
+  return ''  if $length >= 1;
+  return 'c' if $length >= 1e-2;
+  return 'm' if $length >= 1e-3;
+  return 'u' if $length >= 1e-6;
+  return 'n' if $length >= 1e-9;
+  return 'p';
+}
+
+sub min { $_[0]<$_[1] ? $_[0] : $_[1] }
+
+1;
+
+__END__
+
+=head1 NAME
+
+Ace::Graphics::Glyph::arrow - The "arrow" glyph
+
+=head1 SYNOPSIS
+
+  See L<Ace::Graphics::Panel> and L<Ace::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws arrows.  Depending on options, the arrows can be
+labeled, be oriented vertically or horizontally, or can contain major
+and minor ticks suitable for use as a scale.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description               Default
+  ------      -----------               -------
+
+  -tick       Whether to draw major             0
+              and minor ticks.
+	      0 = no ticks
+	      1 = major ticks
+	      2 = minor ticks
+
+  -tickcolor  Color to use for tick marks       fgcolor
+
+  -tickwidth  Line width to use for ticks       linewidth
+
+  -parallel   Whether to draw the arrow         1 (true)
+	      parallel to the sequence
+	      or perpendicular to it.
+
+  -northeast  Force a north or east             1 (true)
+	      arrowhead(depending 
+	      on orientation)
+
+  -east       synonym of above
+
+  -southwest  Force a south or west             1 (true)
+	      arrowhead(depending 
+	      on orientation)
+
+  -west       synonym of above
+
+  -double     force-doubleheaded arrow          0 (false)
+
+  -base       Draw a vertical base at the       0 (false)
+              non-arrowhead side
+
+  -scale      Reset the labels on the arrow     0 (false)
+              to reflect an externally 
+              established scale.
+
+  -arrowstyle "regular" to create a simple      regular
+              arrowhead.  "filled" to create
+              a thick filled arrowhead
+
+  -relative_coords 
+                 use relative coordinates       0 (false)
+                 for scale
+
+  -relative_coords_offset 
+                 set the relative offset        1 
+                 for scale
+
+  -label_intervals                              0 (false)
+              Put the numeric labels on the
+              intervals between the ticks 
+              rather than on the ticks
+              themselves.
+
+  -units      add units to the tick labels      none
+              e.g. bp
+
+  -unit_divider                                 1
+              divide tick labels by the
+              indicated amount prior to
+              displaying (use, for example
+              if you want to display in
+              cR units)
+
+Set -parallel to 0 (false) to display a point-like feature such as a
+polymorphism, or to indicate an important location.  If the feature
+start == end, then the glyph will draw a single arrow at the
+designated location:
+
+       ^
+       |
+
+Otherwise, there will be two arrows at the start and end:
+
+       ^              ^
+       |              |
+
+Scale: Pass in a externally established scale to reset the labels on
+the arrow.  This is particularly useful for manually constructed
+images where the founding parameters of the panel are not 1-based.
+For example, a genetic map interval ranging from 0.1 - 0.3 can be
+constructed by first multiplying every value by 100. Passing
+
+  arrow(-scale=>100);
+
+will draw tick marks labelled appropriately to your external scale.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/box.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/box.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/box.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,127 @@
+package Bio::Graphics::Glyph::box;
+# DAS-compatible package to use for drawing a box
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+# treat like one big component
+sub draw {
+  my $self = shift;
+  $self->draw_component(@_);
+  $self->draw_label(@_)       if $self->option('label');
+  $self->draw_description(@_) if $self->option('description');
+}
+
+sub maxdepth {
+  my $self = shift;
+  my $maxdepth =  $self->option('maxdepth');
+  return $maxdepth if defined $maxdepth;
+  return 0;
+}
+
+sub subseq {
+  return ();
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::box - The "box" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is the most basic glyph.  It draws a filled box and optionally a
+label.  It does *NOT* draw subparts, and so is useful for semantic
+zooming when one is zoomed out too far to see substructure.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/broken_line.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/broken_line.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/broken_line.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,229 @@
+package Bio::Graphics::Glyph::broken_line;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_draw_beak
+{
+  return 1;  
+}
+
+sub default_shear
+{
+  return 5;  
+}
+
+sub default_shear_up
+{
+  return 1;  
+}
+
+sub default_break
+{
+  return 8;  
+}
+
+sub default_extend
+{
+  return 1;  
+}
+
+sub default_size
+{
+  return 30;  
+}
+
+sub default_omit_left
+{
+  return 0;  
+}
+
+sub default_omit_right
+{
+  return 0;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  my $bg = $self->bgcolor;
+  
+  my $shear = defined $self->option('shear') ? $self->option('shear') : $self->default_shear();
+  my $shear_up = defined $self->option('shear_up') ? $self->option('shear_up') : $self->default_shear_up();
+  my $break = defined $self->option('break') ? $self->option('break') : $self->default_break();
+  my $draw_beak = defined $self->option('draw_beak') ? $self->option('draw_beak') : $self->default_draw_beak();
+  my $extend= defined $self->option('extend') ? $self->option('extend') : $self->default_extend();
+  my $size = defined $self->option('size') ? $self->option('size') : $self->default_size();
+  my $omit_left = defined $self->option('omit_left') ? $self->option('omit_left') : $self->default_omit_left();
+  my $omit_right = defined $self->option('omit_right') ? $self->option('omit_right') : $self->default_omit_right();
+
+  my $midY = ($y1+$y2)/2;
+  
+  if ($x2-$x1 < $size)
+  {
+    $gd->line($x1, $midY, $x2, $midY, $bg);
+    return;
+  }
+  
+  my $midX = ($x1+$x2)/2;
+  
+  my $break_start = $midX - $break/2;
+  my $break_end = $midX + $break/2;
+  
+  my ($x11, $x12, $x21, $x22);
+  $x12 = $break_start;
+  $x21 = $break_end;
+
+  if ($omit_left)
+  {
+    $break_start = $x1;
+    $break_end = $x1+$break;
+    $x21 = $break_end;
+    $x22 = ($extend ? $x2 : $x21 + $size - $break);
+  }
+  elsif ($omit_right)
+  {
+    $x11 = $x1;
+    $x12 = ($extend ? $x2 - $break : $x11 + $size - $break);
+    $break_end = $x12+$break;
+    $break_start = $x12;
+  }
+  else
+  {
+    if ($extend)
+    {
+      $x11 = $x1;
+      $x22 = $x2;
+    }
+    else
+    {
+      $x11 = $break_start - ($size - $break) / 2;
+      $x22 = $break_end + ($size - $break) / 2;
+    }
+  }
+  
+  unless ($omit_left)
+  {
+    $gd->line($x11, $midY, $x12, $midY, $bg);
+  }
+  
+  my $shear_y = ($shear_up ? $midY - $shear : $midY + $shear);
+  $gd->line($break_start, $shear_y, $break_end, $shear_y, $fg);
+  if ($draw_beak)
+  {
+    $midX = ($break_start + $break_end) / 2;
+    
+    my $beak_y1 = $shear_up ? $midY + $shear/2 : $midY - $shear/2;
+    my $beak_y2 = $shear_up ? $midY - $shear/2 : $midY + $shear/2;
+    
+    $gd->line($midX, $beak_y1, $midX-$shear, $beak_y2, $fg);  
+    $gd->line($midX, $beak_y1, $midX+$shear, $beak_y2, $fg);  
+  }
+  
+  unless ($omit_right)
+  {
+    $gd->line($x21, $midY, $x22, $midY, $bg);
+  }
+  
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::broken_line - The "broken line" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a straight line whose segment is shifted ('sheared')
+up or down. There can be an optional "beak' (two diagonal lines
+passing between the main line and its segment).
+Either the left or the right side of the main line can be absent.
+The line can be of fixed size or extend to take up all available space.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -draw_beak Whether to draw the 'beak'.        1
+
+  -shear    Vertical distance between     	5
+			the main line and the segment
+
+  -shear_up Whether to shift the segment 	1
+				up or down (1 or 0)
+
+  -break 	Width of the break in the line 	8
+
+  -extend  	Whether to extend the line or   1 
+			to keep the length fixed (1 or 0) 
+
+  -size  	Total length of the line and   30 
+			the break, if extend is 0
+
+  -omit_left	Whether to omit the left	0
+			half of the main line
+
+  -omit_right	Whether to omit the right	0
+			half of the main line
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/cds.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/cds.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/cds.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,477 @@
+package Bio::Graphics::Glyph::cds;
+
+use strict;
+use Bio::Graphics::Glyph::segments;
+use Bio::Graphics::Util qw(frame_and_offset);
+use Bio::Tools::CodonTable;
+use base qw(Bio::Graphics::Glyph::segmented_keyglyph Bio::Graphics::Glyph::translation);
+
+my %default_colors = qw(
+			frame0f  cornflowerblue
+			frame1f  blue
+			frame2f  darkblue
+			frame0r  magenta
+			frame1r  red
+			frame2r  darkred
+		       );
+
+my %swap_phase = ( 0  => 0,
+		   1  => 2,
+		   2  => 1,
+		   '' => 0);
+
+sub connector   { 0 };
+sub description {
+  my $self = shift;
+  return if $self->level;
+  return $self->SUPER::description;
+};
+
+sub default_color {
+  my ($self,$key) = @_;
+  return $self->factory->translate_color($default_colors{$key});
+}
+
+sub sixframe {
+  my $self = shift;
+  return $self->{sixframe} if exists $self->{sixframe};
+  my $sixframe = $self->option('sixframe');
+  $sixframe    = $self->option('translation') eq '6frame' unless defined $sixframe;
+  return $self->{sixframe} = $sixframe;
+}
+
+sub maxdepth { 1 };
+
+sub require_subparts {
+  my $self = shift;
+  my $rs   = $self->option('require_subparts');
+  $rs      = $self->feature->type eq 'coding' if !defined $rs;  # shortcut for the "coding" aggregator
+  $rs;
+}
+
+sub ignore_undef_phase {
+  shift->option('ignore_empty_phase');
+}
+
+sub ignore_non_cds {
+  shift->option('cds_only');
+}
+
+sub phase_style {
+  shift->option('phase_style') || '012';
+}
+
+# figure out (in advance) the color of each component
+sub draw {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+
+  my @parts = $self->parts;
+  @parts    = $self if !@parts && $self->level == 0 && !$self->require_subparts;
+
+  my $fits = $self->protein_fits;
+  my $strand = $self->feature->strand || 1;
+
+  # draw the staff (musically speaking)
+  if ($self->level == 0) {
+    my ($x1,$y1,$x2,$y2) = $self->bounds($left,$top);
+    my $line_count = $self->sixframe ? 6 : 3;
+    my $height = ($y2-$y1)/$line_count;
+    my $grid  = $self->gridcolor;
+    for (0..$line_count-1) {
+      my $offset = $y1+$height*$_+1;
+      $gd->line($x1,$offset,$x2,$offset,$grid);
+      # with three-frame translation, the position of the arrows changes depending on
+      # the strand of the feature. With six-frame translation, we draw the first three
+      # staff lines with an arrow to the right, and the second three to the left
+      my $forward = ($line_count == 6) ? ($_ < 3) : ($strand > 0);
+      if ($forward) {
+	$gd->line($x2,$offset,$x2-2,$offset-2,$grid);
+	$gd->line($x2,$offset,$x2-2,$offset+2,$grid);
+      } else {
+	$gd->line($x1,$offset,$x1+2,$offset-2,$grid);
+	$gd->line($x1,$offset,$x1+2,$offset+2,$grid);
+      }
+    }
+  }
+
+  $self->{cds_part2color} ||= {};
+  my $fill   = $self->bgcolor;
+
+  # figure out the colors of each part
+  # sort minus strand features backward
+  @parts = map { $_->[0] }
+  sort { $b->[1] <=> $a->[1] }
+  map { [$_, $_->left ] } @parts if $strand < 0;
+
+  my $codon_table = $self->option('codontable');
+  $codon_table    = 1 unless defined $codon_table;
+  my $translate_table = Bio::Tools::CodonTable->new(-id=>$codon_table);
+
+  my $ignore_undef_phase = $self->ignore_undef_phase;
+  my $ignore_non_cds     = $self->ignore_non_cds;
+  my $broken_phase       = $self->phase_style eq '021';
+
+  for (my $i=0; $i < @parts; $i++) {
+    my $part    = $parts[$i];
+    my $feature = $part->feature;
+
+    my $type = $feature->method;
+    next if ($self->option('sub_part') && $type ne $self->option('sub_part'));
+
+    next if $ignore_non_cds && lc($type) ne 'cds';
+
+    my $pos     = $feature->strand >= 0 ? $feature->start : $feature->end;
+    my $phase   = $feature->can('phase') ? $feature->phase  # bioperl uses "frame" but this is incorrect usage
+                 :$feature->can('frame') ? $feature->frame
+                 :undef;
+    next if $ignore_undef_phase && !defined($phase);
+    $phase ||= 0;
+    $phase = $swap_phase{$phase} if $broken_phase;
+    my $strand  = $feature->strand;
+    my ($frame,$offset) = frame_and_offset($pos,
+					   $strand,
+					   $phase);
+    my $suffix = $strand < 0 ? 'r' : 'f';
+    my $key = "frame$frame$suffix";
+    $self->{cds_frame2color}{$key} ||= $self->color($key) || $self->default_color($key) || $fill;
+    $part->{cds_partcolor} = $self->{cds_frame2color}{$key};
+    $part->{cds_frame}     = $frame;
+    $part->{cds_offset}    = $offset;
+
+    if ($fits && $part->feature->seq) {
+
+      # do in silico splicing in order to find the codon that
+      # arises from the splice
+      my $seq     = $self->get_seq($part->feature->seq);
+      my $protein = $seq->translate(undef,undef,$phase,$codon_table)->seq;
+      $part->{cds_translation}  = $protein;
+
+    BLOCK: {
+	length $protein >= $feature->length/3           and last BLOCK;
+	($feature->length - $phase) % 3 == 0            and last BLOCK;
+
+	my $next_part    = $parts[$i+1]
+	  or do {
+	    $part->{cds_splice_residue} = '?';
+	    last BLOCK; };
+
+	my $next_feature = $next_part->feature         or  last BLOCK;
+	my $next_phase   = eval {$next_feature->phase} or  last BLOCK;
+	my $splice_codon = '';
+	my $left_of_splice  = substr($self->get_seq($feature->seq),    -$next_phase, $next_phase);
+	my $right_of_splice = substr($self->get_seq($next_feature->seq),0          , 3-$next_phase);
+	$splice_codon = $left_of_splice . $right_of_splice;
+	length $splice_codon == 3                      or last BLOCK;
+	my $amino_acid = $translate_table->translate($splice_codon);
+	$part->{cds_splice_residue} = $amino_acid;
+      }
+    }
+  }
+
+  $self->Bio::Graphics::Glyph::generic::draw($gd,$left,$top);
+}
+
+
+# draw the notes on the staff
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $color = $self->{cds_partcolor} or return;
+  my $feature   = $self->feature;
+  my $frame     = $self->{cds_frame};
+  my $linecount = $self->sixframe ? 6 : 3;
+
+  unless ($self->protein_fits) {
+    my $height = ($y2-$y1)/$linecount;
+    my $offset = $y1 + $height*$frame;
+    $offset   += ($y2-$y1)/2 if $self->sixframe && $self->strand < 0;
+    $offset   = $y1 + (($y2-$y1) - ($offset-$y1))-$height if $self->{flip}; # ugh. This works, but I don't know why
+    $gd->filledRectangle($x1,$offset,$x2,$offset+2,$color);
+    return;
+  }
+
+  # we get here if there's room to draw the primary sequence
+  my $font  = $self->font;
+  my $pixels_per_residue = $self->pixels_per_residue;
+  my $strand = $feature->strand;
+  my $y      = $y1-1;
+  my $fontwidth = $font->width;
+
+  $strand *= -1 if $self->{flip};
+
+  # have to remap feature start and end into pixel coords in order to:
+  # 1) correctly align the amino acids with the nucleotide seq
+  # 2) correct for the phase offset
+  my $start = $self->map_no_trunc($feature->start + $self->{cds_offset});
+  my $stop  = $self->map_no_trunc($feature->end   + $self->{cds_offset});
+
+  ($start,$stop) = ($stop,$start) if $stop < $start;  # why does this keep happening?
+  #  ($start,$stop) = ($stop,$start) if $self->{flip};
+
+  my @residues = split '',$self->{cds_translation};
+
+  push @residues,$self->{cds_splice_residue} if $self->{cds_splice_residue};
+  for (my $i=0;$i<@residues;$i++) {
+    my $x = $strand > 0 ? $start + $i * $pixels_per_residue
+                        : $stop  - $i * $pixels_per_residue;
+    next unless ($x >= $x1 && $x <= $x2);
+    $x -= $fontwidth + 1 if $self->{flip}; # align right when flipped
+    $gd->char($font,$x+1,$y,$residues[$i],$color);
+  }
+}
+
+sub make_key_feature {
+  my $self = shift;
+  my @gatc = qw(g a t c);
+  my $offset = $self->panel->offset;
+  my $scale = 1/$self->scale;  # base pairs/pixel
+  my $start = $offset;
+  my $stop  = $offset + 100 * $scale;
+  my $seq   = join('',map{$gatc[rand 4]} (1..1500));
+  my $feature =
+    Bio::Graphics::Feature->new(-start=> $start,
+				-end  => $stop,
+				-seq  => $seq,
+				-name => $self->option('key'),
+				-strand=> +1,
+			       );
+  $feature->add_segment(Bio::Graphics::Feature->new(
+						    -start=> $start,
+						    -end => $start + ($stop - $start)/2,
+						    -seq  => $seq,
+						    -name => $self->option('key'),
+						    -strand=> +1,
+						   ),
+			Bio::Graphics::Feature->new(
+						    -start=> $start + ($stop - $start)/2+1,
+						    -end => $stop,
+						    -seq  => $seq,
+						    -name => $self->option('key'),
+						    -phase=> 1,
+						    -strand=> +1,
+						   ));
+  $feature;
+}
+
+# never allow our components to bump
+sub bump {
+  my $self = shift;
+  return $self->SUPER::bump(@_) if $self->all_callbacks;
+  return 0;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::cds - The "cds" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws features that are associated with a protein coding
+region.  At high magnifications, draws a series of boxes that are
+color-coded to indicate the frame in which the translation occurs.  At
+low magnifications, draws the amino acid sequence of the resulting
+protein.  Amino acids that are created by a splice are optionally
+shown in a distinctive color.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the cds glyph recognizes the following glyph-specific
+options:
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -frame0f    Color for first (+) frame        background color
+
+  -frame1f    Color for second (+) frame       background color
+
+  -frame2f    Color for third (+) frame        background color
+
+  -frame0r    Color for first (-) frame        background color
+
+  -frame1r    Color for second (-) frame       background color
+
+  -frame2r    Color for third (-) frame        background color
+
+  -gridcolor  Color for the "staff"            lightslategray
+
+  -translation Number of lines of reading      3frame
+               frames to show. One of
+               "3frame", or "6frame".
+               For 6frame, specify a height
+               of at least 30 pixels.
+
+  -sixframe   Draw a six-frame staff           0 (false; usually draws 3 frame)
+              This value overrides
+              -translation, which essentially
+              does the same thing.
+
+  -require_subparts
+              Don't draw the reading frame 0   false
+              unless it is a feature
+              subpart.
+
+  -sub_part   For objects with multiple	       undef
+              subpart types, defines which
+              is the CDS part.
+
+  -codontable   Codon table to use             1 (see Bio::Tools::CodonTable)
+
+  -phase_style  The way phase is to be
+                interpreted. One of            "012"
+                "012" or "021"
+  -ignore_empty_phase                          false
+              Only draw features that have
+              their phase defined.
+
+  -cds_only   Only draw features of type       false
+              'CDS'
+
+This glyph is more sensitive to the underlying data model than usual,
+so there are a few additional options to use to help adapt the glyph
+to different environments.
+
+The -require_subparts option is suggested when rendering spliced
+transcripts which contain multiple CDS subparts.  Otherwise, the glyph
+will hickup when zoomed way down onto an intron between two CDSs (a
+phantom reading frame will appear).  For unspliced sequences, do *not*
+use -require_subparts.
+
+The -phase_style controls how the value returned by the phase() or
+frame() methods is to be interpreted. The official interpretation is
+that the phase value indicates the offset into the feature at which
+the reading frame starts -- e.g. a phase of "2" means the reading
+frame starts after skipping two bases from the beginning of the
+feature.  However, many GFF2 format feature files interpret this field
+to mean the position reading frame of the first base of the feature --
+e.g. a phase of "2" means that the reading frame starts after skipping
+just one base from the beginning of the feature. Specify "012" to
+interpret the phase field in the correct way, and "021" to interpret
+the phase field in the legacy way. The default is "012."
+
+Here is how the option names were chosen:
+
+    * * *                  Base the reading frame starts on
+    A B C A B C A B C...
+    0 1 2                  PHASE REPRESENTED CORRECTLY
+    0 2 1                  PHASE REPRESENTED IN THE LEGACY WAY
+
+Set the -ignore_empty_phase option to true if you wish to skip
+subfeatures that do not have a defined phase() or frame(). This is useful
+if you are rendering exons that have both translated and untranslated
+parts, and you wish to skip the untranslated parts.
+
+Set the -cds_only option to true if you wish to draw the glyph only
+for subfeatures of type 'CDS'. This is recommended.
+
+=head1 SUGGESTED STANZA FOR GENOME BROWSER
+
+Using the "coding" aggregator, this produces a nice gbrowse display.
+
+ [CDS]
+ feature      = coding
+ glyph        = cds
+ frame0f      = cadetblue
+ frame1f      = blue
+ frame2f      = darkblue
+ frame0r      = darkred
+ frame1r      = red
+ frame2r      = crimson
+ description  = 0
+ height       = 13
+ label        = CDS frame
+ key          = CDS
+ citation     = This track shows CDS reading frames.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/christmas_arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/christmas_arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/christmas_arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,128 @@
+package Bio::Graphics::Glyph::christmas_arrow;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+use Math::Trig;
+
+sub default_radius
+{
+  return 4;  
+}
+
+sub default_length
+{
+  return 20;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  
+  my $radius = defined $self->option('radius') ? $self->option('radius') : $self->default_radius ();
+  
+  $gd->filledEllipse($x1+$radius, $y2-$radius, 2*$radius, 2*$radius, $fg);
+  
+  my $length = defined $self->option('length') ? $self->option('length') : $self->default_length();
+
+  my $angle = deg2rad(30);
+  my $dx = 6;
+  my $dy = 4;
+  my $midX = $x2-$dx;
+  my $midY = $y1+$dy;
+
+  $gd->line($x1+$radius, $y2-$radius, $x1+$radius, $y1+$dy, $fg);
+
+  return if ($x2-$x1 <= $radius);
+  
+  $x2 = $x1+$radius+$length;
+  $gd->line($x1+$radius, $midY, $x2, $midY, $fg);
+  $gd->line($x2, $midY, $x2-$dx, $y1, $fg);
+  $gd->line($x2, $midY, $x2-$dx, $y1+2*$dy, $fg);
+   
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::christmas_arrow - The "christmas arrow" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an arrow which has a circle ("christmas ball")
+dangling at one end.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -radius     Radius of the circle          4
+              glyph
+
+  -length     Length of the arrow           20
+
+  -height     Standard option, but          10
+              important here
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/crossbox.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/crossbox.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/crossbox.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,124 @@
+package Bio::Graphics::Glyph::crossbox;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+# override draw_component to draw a crossed box rather than empty
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_);
+  $self->unfilled_box($gd,
+		      $x1, $y1,
+		      $x2, $y2);
+
+  if ($self->option('bgcolor')){
+    my $c = $self->color('bgcolor');
+    my $xmid = ($x2+$x1)/2;
+    my $ymid = ($y2+$y1)/2;
+    $gd->fill($xmid,$ymid,$c);
+  }
+
+  $gd->line($x1,$y1,$x2,$y2,$fg);
+  $gd->line($x1,$y2,$x2,$y1,$fg);
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::crossbox - The "crossbox" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is a box with an 'X' inside the glyph.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dashed_line.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dashed_line.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dashed_line.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,163 @@
+package Bio::Graphics::Glyph::dashed_line;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_linewidth
+{
+  return 1;  
+}
+
+sub default_dash_size
+{
+  return 6;  
+}
+
+sub default_space_size
+{
+  return 3;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $fg = $self->fgcolor;
+  
+  my $midY = ($y1+$y2) / 2;
+  
+  my $linewidth = defined $self->option('linewidth') ? $self->option('linewidth')  : $self->default_linewidth();
+  my $dash_size = defined $self->option('dash_size') ? $self->option('dash_size') : $self->default_dash_size();
+  my $space_size = defined $self->option('space_size') ? $self->option('space_size') : $self->default_space_size();
+  my $space_color = $self->option('space_color');
+  my $shear = $self->option('shear') || "";
+  $space_color = $self->factory->translate_color($space_color) if $space_color;
+  
+  my ($x, $_y1, $_y2);
+  $x = $x1;
+  while ($x<$x2)
+  {
+    my $newX = $x+$dash_size;
+    $newX = $x2 if $newX > $x2;
+    if ($shear == 1)
+    {
+      $_y1 = $midY-$linewidth;
+      $_y2 = $midY;
+    }
+    else
+    {
+      $_y1 = $midY - $linewidth/2;  
+      $_y2 = $midY + $linewidth/2;  
+    }
+    $self->filled_box($gd,$x,$_y1,$newX,$_y2,$fg,$fg);
+    last if $newX >= $x2;
+    
+    $x = $newX;
+    $newX = $x+$space_size;
+    $newX = $x2 if $newX > $x2;
+    if ($space_color)
+    {
+      if ($shear == 1)
+      {
+        $_y1 = $midY;
+        $_y2 = $midY+$linewidth;
+      }
+      else
+      {
+        $_y1 = $midY - $linewidth/2;  
+        $_y2 = $midY + $linewidth/2;  
+      }
+      $self->filled_box($gd, $x,$_y1,$newX,$_y2,$space_color,$space_color);
+    }
+    $x = $newX;
+  }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::dashed_line - The "dashed line" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a dashed line. The lengths of the dash and the space are configurable.
+The space can be filled with a different color, thus making a two-colored line.
+Also, the two colors can be "sheared".
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -dash_size  Width of one dash              6
+
+  -space_size Width of one space             3
+              between dashes       
+
+  -space_color Color of the space            none 
+              between dashes       
+
+  -shear      Whether to use shearing       0
+              (1 or 0)
+
+  -linewidth  Standard option, but          1
+              important here
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/diamond.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/diamond.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/diamond.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+package Bio::Graphics::Glyph::diamond;
+# DAS-compatible package to use for drawing a colored diamond
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+
+  # find the center and vertices
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $xmid = ($x1+$x2)/2;
+  my $ymid = ($y1+$y2)/2;
+
+  my $h = $self->option('height')/2;
+  $y1 = $ymid - $h;
+  $y2 = $ymid + $h;
+
+  # if it's a point-like feature, then draw symmetrically
+  # around the midpoint
+  if ($self->option('point') || $x2 - $x1 < $h*2) {
+    $x1 = $xmid - $h;
+    $x2 = $xmid + $h;
+  }
+
+  elsif ($self->option('fallback_to_rectangle')) {
+    return $self->SUPER::draw_component($gd, at _);
+  }
+
+
+  my $poly_pkg = $self->polygon_package;
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($x1,$ymid);
+  $polygon->addPt($xmid,$y1);
+  $polygon->addPt($x2,$ymid);
+  $polygon->addPt($xmid,$y2);
+
+  # Have to draw TWO polygons for fills in order to get an outline
+  # because filledPolygon in GD croaks with extra parameters (and
+  # doesn't support drawing of stroke anyways).
+  if (my $c = $self->bgcolor) {
+      $gd->filledPolygon($polygon,$c);
+      $gd->polygon($polygon,$fg);
+  } else {
+    $gd->polygon($polygon,$fg);
+  }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::diamond - The "diamond" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a diamond of fixed size, positioned in the center of
+the feature.  The height and width of the diamond are set by the
+"height" option.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+If the feature is wider than a point, then the label and description
+are placed where the feature's boundary is, and not where the diamond
+is drawn.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>, Todd Harris E<lt>harris at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dna.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dna.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dna.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,392 @@
+package Bio::Graphics::Glyph::dna;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',n=>'n',
+		  G=>'C',A=>'T',T=>'A',C=>'G',N=>'N');
+
+# turn off description
+sub description { 0 }
+
+# turn off label
+# sub label { 1 }
+
+sub height {
+  my $self = shift;
+  my $font = $self->font;
+  return $self->dna_fits ? 2*$font->height
+       : $self->do_gc    ? $self->SUPER::height
+       : 0;
+}
+
+sub do_gc {
+  my $self = shift;
+  my $do_gc = $self->option('do_gc');
+  return  if defined($do_gc) && !$do_gc;
+  return  1;
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $dna        = eval { $self->feature->seq };
+  $dna           = $dna->seq if ref($dna) and $dna->can('seq'); # to catch Bio::PrimarySeqI objects
+  $dna or return;
+
+  # workaround for my misreading of interface -- LS
+  $dna = $dna->seq if ref($dna) && $dna->can('seq');
+
+  if ($self->dna_fits) {
+    $self->draw_dna($gd,$dna,$x1,$y1,$x2,$y2);
+  } elsif ($self->do_gc) {
+    $self->draw_gc_content($gd,$dna,$x1,$y1,$x2,$y2);
+  }
+}
+
+sub draw_dna {
+  my $self = shift;
+
+  my ($gd,$dna,$x1,$y1,$x2,$y2) = @_;
+  my $pixels_per_base = $self->scale;
+  my $feature = $self->feature;
+
+  my $strand = $feature->strand || 1;
+  $strand *= -1 if $self->{flip};
+
+  my @bases = split '',$strand >= 0 ? $dna : $self->reversec($dna);
+
+  my $color = $self->fgcolor;
+  my $font  = $self->font;
+  my $lineheight = $font->height;
+  $y1 -= $lineheight/2 - 3;
+  my $strands = $self->option('strand') || 'auto';
+
+  my ($forward,$reverse);
+  if ($strands eq 'auto') {
+    $forward = $feature->strand >= 0;
+    $reverse = $feature->strand <= 0;
+  } elsif ($strands eq 'both') {
+    $forward = $reverse = 1;
+  } elsif ($strands eq 'reverse') {
+    $reverse = 1;
+  } else {
+    $forward = 1;
+  }
+  # minus strand features align right, not left
+  $x1 += $pixels_per_base - $font->width - 1 if $strand < 0;
+  for (my $i=0;$i<@bases;$i++) {
+    my $x = $x1 + $i * $pixels_per_base;
+    $gd->char($font,$x+2,$y1,$bases[$i],$color)                                   if $forward;
+    $gd->char($font,$x+2,$y1+($forward ? $lineheight:0),
+	      $complement{$bases[$i]}||$bases[$i],$color)                         if $reverse;
+  }
+
+}
+
+sub draw_gc_content {
+  my $self     = shift;
+  my $gd       = shift;
+  my $dna      = shift;
+  my ($x1,$y1,$x2,$y2) = @_;
+
+# get the options that tell us how to draw the GC content
+
+  my $bin_size = length($dna) / ($self->option('gc_bins') || 100);
+  $bin_size = 10 if $bin_size < 10;
+  my $gc_window = $self->option('gc_window');
+  if ($gc_window && $gc_window eq 'auto' or $gc_window <= length($dna)) {
+    $gc_window = length($dna)/100;
+  }
+
+# Calculate the GC content...
+
+  my @bins;
+  my @datapoints;
+  my $maxgc = -1000;
+  my $mingc = +1000;
+  if ($gc_window)
+  {
+
+# ...using a sliding window...
+    for (my $i=$gc_window/2; $i <= length($dna) - $gc_window/2; $i++)
+      {
+	my $subseq = substr($dna, $i-$gc_window/2, $gc_window);
+	my $gc = $subseq =~ tr/gcGC/gcGC/;
+	my $content = $gc / $gc_window;
+	push @datapoints, $content;
+	$maxgc = $content if ($content > $maxgc);
+	$mingc = $content if ($content < $mingc);
+      }
+    push @datapoints, 0.5 unless @datapoints;
+
+    my $scale = $maxgc - $mingc;
+    foreach (my $i; $i < @datapoints; $i++)
+      {
+	$datapoints[$i] = ($datapoints[$i] - $mingc) / $scale;
+      }
+    $maxgc = int($maxgc * 100);
+    $mingc = int($mingc * 100);
+  }
+  else
+  {
+
+# ...or a fixed number of bins.
+
+    for (my $i = 0; $i < length($dna) - $bin_size; $i+= $bin_size) {
+      my $subseq  = substr($dna,$i,$bin_size);
+      my $gc      = $subseq =~ tr/gcGC/gcGC/;
+      my $content = $gc/$bin_size;
+      $maxgc = $content if ($content > $maxgc);
+      $mingc = $content if ($content < $mingc);
+      push @bins,$content;
+    }
+
+    my $scale = $maxgc - $mingc;
+    foreach (my $i; $i < @bins; $i++)
+      {
+	$bins[$i] = ($bins[$i] - $mingc) / $scale;
+      }
+    $maxgc = int($maxgc * 100);
+    $mingc = int($mingc * 100);
+
+  }
+
+# Calculate values that will be used in the layout
+  
+  push @bins,0.5 unless @bins;  # avoid div by zero
+  my $bin_width  = ($x2-$x1)/@bins;
+  my $bin_height = $y2-$y1;
+  my $fgcolor    = $self->fgcolor;
+  my $bgcolor    = $self->factory->translate_color($self->panel->gridcolor);
+  my $axiscolor  = $self->color('axis_color') || $fgcolor;
+
+# Draw the axes
+  my $fontwidth = $self->font->width;
+  $gd->line($x1,  $y1,        $x1,  $y2,        $axiscolor);
+  $gd->line($x2-2,$y1,        $x2-2,$y2,        $axiscolor);
+  $gd->line($x1,  $y1,        $x1+3,$y1,        $axiscolor);
+  $gd->line($x1,  $y2,        $x1+3,$y2,        $axiscolor);
+  $gd->line($x1,  ($y2+$y1)/2,$x1+3,($y2+$y1)/2,$axiscolor);
+  $gd->line($x2-4,$y1,        $x2-1, $y1,       $axiscolor);
+  $gd->line($x2-4,$y2,        $x2-1, $y2,       $axiscolor);
+  $gd->line($x2-4,($y2+$y1)/2,$x2-1,($y2+$y1)/2,$axiscolor);
+  $gd->line($x1+5,$y2,        $x2-5,$y2,        $bgcolor);
+  $gd->line($x1+5,($y2+$y1)/2,$x2-5,($y2+$y1)/2,$bgcolor);
+  $gd->line($x1+5,$y1,        $x2-5,$y1,        $bgcolor);
+  $gd->string($self->font,$x1-length('% gc')*$fontwidth,$y1,'% gc',$axiscolor) if $bin_height > $self->font->height*2;
+
+# If we are using a sliding window, the GC graph will be scaled to use the full
+# height of the glyph, so label the right vertical axis to show the scaling that# is in effect
+
+  $gd->string($self->font,$x2+3,$y1,"${maxgc}%",$axiscolor) 
+    if $bin_height > $self->font->height*2.5;
+  $gd->string($self->font,$x2+3,$y2-$self->font->height,"${mingc}%",$axiscolor) 
+    if $bin_height > $self->font->height*2.5;
+
+# Draw the GC content graph itself
+
+  if ($gc_window)
+  {
+    my $graphwidth = $x2 - $x1;
+    my $scale = $graphwidth / @datapoints;
+    my $gc_window_width = $gc_window/2 * $self->panel->scale;
+    for (my $i = 1; $i < @datapoints; $i++)
+      {
+	my $x = $i + $gc_window_width;
+	my $xlo = $x1 + ($x - 1) * $scale;
+	my $xhi = $x1 + $x * $scale;
+	last if $xhi >= $self->panel->right-$gc_window_width;
+	my $y = $y2 - ($bin_height*$datapoints[$i]);
+	$gd->line($xlo, $y2 - ($bin_height*$datapoints[$i-1]), 
+		  $xhi, $y, 
+		  $fgcolor);
+      }
+  }
+  else
+  {
+    for (my $i = 0; $i < @bins; $i++) 
+      {
+	  my $bin_start  = $x1+$i*$bin_width;
+	  my $bin_stop   = $bin_start + $bin_width;
+	  my $y          = $y2 - ($bin_height*$bins[$i]);
+	  $gd->line($bin_start,$y,
+		    $bin_stop,$y,
+		    $fgcolor);
+	  $gd->line($bin_stop,$y,
+		    $bin_stop,$y2 - ($bin_height*$bins[$i+1]),
+		    $fgcolor)
+	      if $i < @bins-1;
+      }
+  }
+}
+
+sub make_key_feature {
+  my $self = shift;
+  my @gatc = qw(g a t c);
+  my $offset = $self->panel->offset;
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  my $start = $offset+1;
+  my $stop  = $offset+100*$scale;
+  my $feature =
+    Bio::Graphics::Feature->new(-start=> $start,
+				-stop => $stop,
+				-seq  => join('',map{$gatc[rand 4]} (1..500)),
+				-name => $self->option('key'),
+				-strand => '+1',
+			       );
+  $feature;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::dna - The "dna" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws DNA sequences.  At high magnifications, this glyph
+will draw the actual base pairs of the sequence (both strands).  At
+low magnifications, the glyph will plot the GC content.  By default,
+the GC calculation will use non-overlapping bins, but this can be
+changed by specifying the gc_window option, in which case, a 
+sliding window calculation will be used.
+
+For this glyph to work, the feature must return a DNA sequence string
+in response to the dna() method.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description               Default
+  ------      -----------               -------
+
+  -do_gc      Whether to draw the GC      true
+              graph at low mags
+
+  -gc_window  Size of the sliding window  E<lt>noneE<gt>
+  	      to use in the GC content 
+	      calculation.  If this is 
+	      not defined, non-
+	      overlapping bins will be 
+	      used. If this is set to
+              "auto", then the glyph will
+              choose a window equal to
+              1% of the interval.
+
+  -gc_bins    Fixed number of intervals   100
+              to sample across the
+              panel.
+
+  -axis_color Color of the vertical axes  fgcolor
+              in the GC content graph
+
+  -strand      Show both forward and      auto
+              reverse strand, one of
+              "forward", "reverse",
+              "both" or "auto".
+              In "auto" mode,
+              +1 strand features will
+              show the plus strand
+              -1 strand features will
+              show the reverse complement
+              and strandless features will
+              show both
+
+NOTE: -gc_window=E<gt>'auto' gives nice results and is recommended for
+drawing GC content. The GC content axes draw slightly outside the
+panel, so you may wish to add some extra padding on the right and
+left.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Sliding window GC calculation added by Peter Ashton E<lt>pda at sanger.ac.ukE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,160 @@
+package Bio::Graphics::Glyph::dot;
+# DAS-compatible package to use for drawing a ring or filled circle
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+use constant PI => 3.14159;
+
+sub draw {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+
+  # now draw a circle
+  my ($left,$top) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  my $xmid   = (($x1+$x2)/2);  my $width  = abs($x2-$x1);
+  my $ymid   = (($y1+$y2)/2);  my $height = abs($y2-$y1);
+
+  # only point ovals allowed now
+  my $r = $self->height;
+
+  # Code to maintain compliancy with gd 1.8.4 
+  # gd 1.8.4 does not support the ellipse() or filledEllipse() methods.
+  # Let's maintain the filledEllipse approach for installations
+  # using gd2 or for drawing images with GD::SVG
+  # Otherwise, we will use fill as before.
+  # The can() method fails with GD::SVG. Why?
+  my $bg = $self->bgcolor;
+  if ($gd->can('filledEllipse') || $gd =~ /SVG/ ) {
+    $gd->filledEllipse($xmid,$ymid,$r,$r,$bg) if ($bg);
+    # Draw the border (or unfilled ellipse)
+    $gd->ellipse($xmid,$ymid,$r,$r,$fg);
+  } else {
+    # Let's draw a circle in the gd 1.8.4 manner
+    $gd->arc($xmid,$ymid,$r,$r,0,360,$fg);
+    $gd->fillToBorder($xmid,$ymid,$fg,$bg) if ($bg);
+  }
+
+  #how about a fuse for the bomb?
+  #work in degrees, not radians.  So we define PI above
+  if(defined $self->option('stem')){
+    my $angle = $self->option('stem');
+
+    $gd->line($xmid+($r/PI*sin($angle*PI/180)),
+	      $ymid+($r/PI*cos($angle*PI/180)),
+	      $xmid+($r*sin($angle*PI/180)),
+	      $ymid+($r*cos($angle*PI/180)),$fg);
+  }
+
+  $self->draw_label($gd, at _) if $self->option('label');
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::dot - The "dot" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an ellipse the width of the scaled feature passed,
+and height a possibly configured height (See Bio::Graphics::Glyph).
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -point      Whether to draw an ellipse   feature width
+              the scaled width of the
+              feature or with radius
+              point.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dumbbell.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dumbbell.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/dumbbell.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,389 @@
+package Bio::Graphics::Glyph::dumbbell;
+# DAS-compatible package to use for drawing a line of repeating shapes
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+use Math::Trig;
+
+sub default_shape_size
+{
+  return 10;  
+}
+
+sub default_shape
+{
+  return 'square';  
+}
+
+sub draw_end_shape
+{
+  my ($self, @args) = @_;
+  my $shape = $self->option('end_shape') || $self->default_shape();
+  my $method = "draw_end_$shape";
+  if ($self->can($method))
+  {
+    return $self->$method(@args);
+  }
+  else
+  {
+    return $self->draw_end_square(@args);  
+  }
+}
+
+sub draw_end_square
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($x1,$y1);
+  $polygon->addPt($x2,$y1);
+  $polygon->addPt($x2,$y2);
+  $polygon->addPt($x1, $y2);
+
+  $gd->filledPolygon($polygon,$fg);
+  
+  return ($x1, $x2);
+}
+
+sub draw_end_diamond
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+
+  my $poly_pkg = $self->polygon_package;
+
+  my $midX = ($x1+$x2)/2;
+  my $midY = ($y1+$y2)/2;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($x1,$midY);
+  $polygon->addPt($midX,$y1);
+  $polygon->addPt($x2,$midY);
+  $polygon->addPt($midX,$y2);
+
+  $gd->filledPolygon($polygon,$fg);
+  
+  return ($x1, $x2);  
+}
+
+sub translated_polygon
+{
+  my ($self, $midX, $midY, $scale_factor, @coords) = @_;
+
+  my $poly_pkg = $self->polygon_package;
+
+  my $polygon   = $poly_pkg->new();
+  for (my $i=0; $i<(scalar @coords) / 2; $i++)
+  {
+    $polygon->addPt($coords[2*$i], $coords[2*$i+1]);
+  }
+  
+  $polygon->scale($scale_factor, $scale_factor);
+  $polygon->offset($midX, $midY);
+	
+	return $polygon;
+}
+
+sub draw_end_star
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+  
+  #my @coords = (95, -31, -58, 81, 0, -100, 58, 81, -95, -31);
+  my @coords1 = (31, 42, 31, -42, -49, -30, -38, 0, -49, 30);
+  my @coords2 = (100, 0, -81, 59, 31, -95, 31, 95, -81, -58);
+
+  my $star_size = 190;
+  
+  my $scale_factor = $shape_size / $star_size;
+  
+  my ($midX, $midY) = ($x1+$shape_size/2, $y1+$shape_size/2);
+ 
+	$gd->filledPolygon($self->translated_polygon($midX, $midY, $scale_factor,  @coords1), $fg);
+	$gd->filledPolygon($self->translated_polygon($midX, $midY, $scale_factor, @coords2), $fg);
+
+  return ($midX, $midX);
+}
+ 
+sub draw_end_tree
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+  
+  my $trunk_width = $shape_size/4;
+
+  my $midX = ($x1+$x2)/2;
+  my $midY = ($y1+$y2)/2;
+
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($midX-$trunk_width/2,$midY);
+  $polygon->addPt($midX+$trunk_width/2,$midY);
+  $polygon->addPt($midX+$trunk_width/2,$y2);
+  $polygon->addPt($midX-$trunk_width/2,$y2);
+
+  $gd->filledPolygon($polygon, $fg);
+  
+  $self->filled_oval($gd, $x1, $y1, $x2, $y1+2*$shape_size/3, $fg, $fg);
+  
+  return ($midX, $midX);
+}
+
+sub draw_end_clover
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+  
+  my $trunk_width = $shape_size/4;
+
+  my $midX = ($x1+$x2)/2;
+
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($midX-$trunk_width/2,$y1+0.4*$shape_size);
+  $polygon->addPt($midX+$trunk_width/2,$y1+0.4*$shape_size);
+  $polygon->addPt($midX+$trunk_width/2,$y2);
+  $polygon->addPt($midX-$trunk_width/2,$y2);
+
+  $gd->filledPolygon($polygon, $fg);
+  
+  my $radius = $shape_size / 4.3;
+  
+  $self->filled_oval($gd, $midX-$radius, $y1, $midX+$radius, $y1+2*$radius, $fg, $fg);
+  $self->filled_oval($gd, $x1, $y1+1.3*$radius, $x1+2*$radius, $y1+3.3*$radius, $fg, $fg);
+  $self->filled_oval($gd, $x2-2*$radius, $y1+1.3*$radius, $x2, $y1+3.3*$radius, $fg, $fg);
+  
+  return ($midX, $midX);
+}
+
+sub draw_end_bubble
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+  
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+
+  my $midX = ($x1+$x2)/2;
+  my $midY = ($y1+$y2)/2;
+  
+  my $bubble_text = defined $self->option('bubble_text') ? $self->option('bubble_text') : "Text";
+
+  my $font = $self->option('labelfont') || $self->font;
+  my $bubble_text_length = $font->width * length($bubble_text);
+  my $bubble_text_x = $midX -  $bubble_text_length / 2;
+  my $bubble_text_y = $midY - $font->height / 2;
+  
+  $gd->string($font, $bubble_text_x, $bubble_text_y, $bubble_text, $self->fontcolor);
+  
+  my $oval_width = $bubble_text_length * 1.414;
+  my $oval_height = $font->height * 1.414;
+
+  $self->oval($gd, $midX-$oval_width/2, $midY-$oval_height/2, $midX+$oval_width/2, $midY+$oval_height/2);
+
+  return ($midX-$oval_width/2, $midX+$oval_width/2);
+}
+
+sub draw_end_arrow
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg, $antiparallel) = @_;
+  
+  my $x2 = $x1 + $shape_size;
+  my $y2 = $y1 + $shape_size;
+  
+  my $angle = deg2rad(30);
+  my $dx = 2*$shape_size*cos($angle)/5;
+  my $dy = 2*$shape_size*sin($angle)/5;
+  my $midX = $x2-$dx;
+  my $midY = ($y1+$y2)/2;
+
+  $gd->line($x1, $midY, $x2, $midY, $fg);
+  if ($antiparallel)
+  {
+    $gd->line($x1, $midY, $x1+$dx, $midY-$dy, $fg);
+    $gd->line($x1, $midY, $x1+$dx, $midY+$dy, $fg);
+  }  
+  else
+  {
+    $gd->line($x2, $midY, $x2-$dx, $midY-$dy, $fg);
+    $gd->line($x2, $midY, $x2-$dx, $midY+$dy, $fg);
+  }  
+  return ($x1, $x2);
+}
+
+sub draw_end_wave
+{
+  my ($self, $gd, $x1, $y1, $shape_size, $fg) = @_;
+  
+  my $x2 = $x1 + $shape_size;
+  
+  #Make the heigh constant
+  my $y2 = $y1 + $shape_size/2;
+  $y1 = $y2-10;
+  
+  my $step = $shape_size/6;
+  $gd->line($x1, $y2, $x1+$step, $y1, $fg);
+  $gd->line($x1+$step, $y1, $x1+2*$step, $y2, $fg);
+  $gd->line($x1+2*$step, $y2, $x1+3*$step, $y1, $fg);
+  $gd->line($x1+3*$step, $y1, $x1+4*$step, $y2, $fg);
+  $gd->line($x1+4*$step, $y2, $x1+5*$step, $y1, $fg);
+  $gd->line($x1+5*$step, $y1, $x1+6*$step, $y2, $fg);
+  return ($x1, $x2);
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  
+  my $shape_size = defined $self->option('shape_size') ? $self->option('shape_size') : $self->default_shape_size;
+
+  # find the center and vertices
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  if ($x2-$x1 < $shape_size)
+  {
+    return $self->SUPER::draw_component($gd, @_);  
+  }
+  
+  my $midX = ($x2-$x1) / 2 + $x1;
+  my $midY = ($y2-$y1) / 2 + $y1;
+  my $startY = $midY - $shape_size/2;
+  
+  my $antiparallel = $self->option('antiparallel');
+  
+  #We need to store the bounds of the shapes drawn because the connecting line will have
+  #different length depending on them.
+  my ($leftX1, $leftX2) = $self->draw_end_shape($gd, $x1, $startY, $shape_size, $fg);
+  my ($rightX1, $rightX2) = $self->draw_end_shape($gd, $x2-$shape_size, $startY, $shape_size, $fg, $antiparallel);
+
+  if ($self->option('arc') == 1)
+  {
+    #Draw an arc of an ellipse relative to the midpoint between shapes
+    #whose center is at (0, -q) and which intersects with the X axis at (p,0) and (-p, 0).
+    my $p = ($rightX1 - $leftX2) / 2;
+    my $q = $shape_size/2;
+    
+    my $c = 2 * $p / sqrt(3);
+    my $d = 2 * $q;
+    my $b = $q - $d;
+    my $angle = atan2(sqrt(3), 1);
+    my $deg = rad2deg($angle);
+    $gd->arc($leftX2+$p,$midY+$q,2*$c,2*$d,270-$deg,270+$deg,$self->factory->translate_color('black'));
+  }  
+  else
+  {
+    $gd->line($leftX2,$midY,$rightX1,$midY,$fg);  
+  }
+  
+  if (my $caption = $self->option('caption'))
+  {
+    my $font = $self->option('labelfont') || $self->font;
+    my $midX = ($x2-$x1-2*$shape_size)/2+$x1+$shape_size;
+    my $startCaption = $midX - $font->width * length($caption) / 2;
+    $gd->string($font, $startCaption, $midY-$font->height, $caption, $self->fontcolor);
+  }
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::dumbbell - A glyph that draws a "dumbbell" with the same shapes on both ends. 
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a "dumbbell" with the same shapes on both ends. 
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                       Default
+  ------      -----------                       -------
+
+  -shape_size The size of the shape               10
+              on both ends.
+
+  -end_shape  One of 'square', 'diamond',         square
+              'tree', 'clover', 'star',
+              'bubble', 'arrow', 'wave'
+
+  -bubble_text The text to show in the bubble     Text
+                if the bubble option is chosen
+                above (shape_size is then ignored)
+
+  -antiparallel Whether the right arrow               0
+                is reversed
+
+  -arc        Whether the shapes are               0
+              connected by an arc
+              (a straight line is the default).
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ellipse.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ellipse.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ellipse.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+package Bio::Graphics::Glyph::ellipse;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+# override draw_component to draw an oval rather than a rectangle (weird)
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_);
+  $self->filled_oval($gd,
+		     $x1, $y1,
+		     $x2, $y2);
+
+  $self->draw_label($gd, at _) if $self->option('label');
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::ellipse - The "ellipse" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an oval instead of a box; otherwise it is similar to
+the "generic" or "box" glyphs.  The width of the oval is determined by
+the feature width, and the height by the -height option.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ex.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ex.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ex.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+package Bio::Graphics::Glyph::ex;
+
+# $Id: ex.pm,v 1.5.8.1 2006/10/02 23:10:19 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Glyph::ex - the "ex", or "crossed box" glyph
+
+=cut
+
+use strict;
+use base 'Bio::Graphics::Glyph::generic';
+
+# override draw_component to draw a crossed box rather than empty
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  #if widthless
+  if($self->option('point')){
+    my $arm = int($self->height/2);
+    my $minx    = $x2 > $x1 ? $x1 : $x2;
+    my $centerx = abs($x2 - $x1) + $minx;
+    my $miny    = $y2 > $y1 ? $y1 : $y2;
+    my $centery = abs($y2 - $y1) + $miny;
+    $gd->line($centerx-$arm, $centery-$arm, $centerx+$arm, $centery+$arm, $fg);
+    $gd->line($centerx-$arm, $centery+$arm, $centerx+$arm, $centery-$arm, $fg);
+    return;
+  } else {
+    $gd->line($x1,$y1,$x2,$y2,$fg);
+    $gd->line($x1,$y2,$x2,$y1,$fg);
+  }
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::crossbox - The "crossbox" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is a box with an 'X' inside glyph.
+
+=head2 OPTIONS
+
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/extending_arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/extending_arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/extending_arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+package Bio::Graphics::Glyph::extending_arrow;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::anchored_arrow);
+
+=head1 NAME
+
+Bio::Graphics::Glyph::extending_arrow -- The "extending arrow" glyph
+
+=head1 SYNOPSIS
+
+This is deprecated.  Use L<Bio::Graphics::Glyph::anchored_arrow>
+instead.
+
+=head1 DESCRIPTION
+
+This glyph was designed to show a segment that goes beyond the panel.
+If the segment is contained within the panel, a vertical base is
+shown.  Otherwise, an arrow is shown.
+
+Also see the arrow glyph.
+
+=head2 OPTIONS
+
+See L<Bio::Graphics::Glyph::anchored_arrow>.  This glyph has been
+deprecated.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Originally by Shengqiang Shu.  Temporarily deprecated by Lincoln
+Stein.
+
+Copyright (c) 2001 Berkeley Drosophila Genome Project
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/flag.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/flag.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/flag.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+package Bio::Graphics::Glyph::flag;
+
+# $Id: flag.pm,v 1.2.6.1 2006/10/02 23:10:19 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Glyph::flag - the "flag" glyph
+
+=cut
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_text
+{
+  return "ori";  
+}
+
+sub default_width
+{
+  return 20;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $fg = $self->fgcolor;
+  my $bg = $self->bgcolor;
+  
+  my $width = $self->option('width') || $self->default_width;
+  my $text = $self->option('text') || $self->default_text;
+  
+  my $oneThirdY = $y1 + ($y2-$y1) / 3;
+  my $twoThirdsY = $y1 + 2 * ($y2-$y1) / 3;
+ 
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($x1, $y1);
+  $polygon->addPt($x1+$width, $oneThirdY);
+  $polygon->addPt($x1, $twoThirdsY);
+
+  $gd->polygon($polygon, $fg);
+  
+  $gd->fillToBorder($x1+$width/2, $oneThirdY, $fg, $bg);  
+  
+  $gd->line($x1, $y1, $x1, $y2, $fg);
+  
+  my $font = $self->option('labelfont') || $self->font;
+
+  $gd->string($font, $x1 + 3, $twoThirdsY-3, $text, $self->fontcolor);  
+  
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::line - The flag glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a flag with a text next to it.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -text       Text to draw next to the flag  ori
+
+  -width      Width of the flag               20
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/gene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/gene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/gene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,272 @@
+package Bio::Graphics::Glyph::gene;
+
+# $Id: gene.pm,v 1.10 2006/07/05 15:53:49 lstein Exp $
+
+use strict;
+use base 'Bio::Graphics::Glyph::processed_transcript';
+
+sub extra_arrow_length {
+  my $self = shift;
+  return 0 unless $self->{level} == 1;
+  local $self->{level} = 0;  # fake out superclass
+  return $self->SUPER::extra_arrow_length;
+}
+
+sub pad_left {
+  my $self = shift;
+  my $type = $self->feature->primary_tag;
+  return 0 unless $type =~ /gene|mRNA/;
+  $self->SUPER::pad_left;
+}
+
+sub pad_right {
+  my $self = shift;
+  return 0 unless $self->{level} < 2; # don't invoke this expensive call on exons
+  my $strand = $self->feature->strand;
+  $strand *= -1 if $self->{flip};
+  my $pad    = $self->SUPER::pad_right;
+  return $pad unless defined($strand) && $strand > 0;
+  my $al = $self->arrow_length;
+  return $al > $pad ? $al : $pad;
+}
+
+sub pad_bottom {
+  my $self = shift;
+  return 0 unless $self->{level} < 2; # don't invoke this expensive call on exons
+  return $self->SUPER::pad_bottom;
+}
+
+sub pad_top {
+  my $self = shift;
+  return 0 unless $self->{level} < 2; # don't invoke this expensive call on exons
+  return $self->SUPER::pad_top;
+}
+
+sub bump {
+  my $self = shift;
+  return 1 if $self->{level} == 0; # top level bumps, other levels don't unless specified in config
+  return $self->SUPER::bump;
+}
+
+sub label {
+  my $self = shift;
+  return unless $self->{level} < 2;
+  if ($self->label_transcripts && $self->{feature}->primary_tag eq 'mRNA') { # the mRNA
+    return $self->_label;
+  } else {
+    return $self->SUPER::label;
+  }
+}
+
+sub label_position {
+  my $self = shift;
+  return 'top' if $self->{level} == 0;
+  return 'left';
+}
+
+sub label_transcripts {
+  my $self = shift;
+  return $self->{label_transcripts} if exists $self->{label_transcripts};
+  return $self->{label_transcripts} = $self->_label_transcripts;
+}
+
+sub _label_transcripts {
+  my $self = shift;
+  return $self->option('label_transcripts');
+}
+
+sub draw_connectors {
+  my $self = shift;
+  return if $self->feature->primary_tag eq 'gene';
+  $self->SUPER::draw_connectors(@_);
+}
+
+sub maxdepth {
+  my $self = shift;
+  my $md   = $self->Bio::Graphics::Glyph::maxdepth;
+  return $md if defined $md;
+  return 2;
+}
+
+
+sub _subfeat {
+  my $class   = shift;
+  my $feature = shift;
+  return $feature->get_SeqFeatures('mRNA') if $feature->primary_tag eq 'gene';
+
+  my @subparts;
+  if ($class->option('sub_part')) {
+    @subparts = $feature->get_SeqFeatures($class->option('sub_part'));
+  }
+  else {
+
+    @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR));
+  }
+ 
+  # The CDS and UTRs may be represented as a single feature with subparts or as several features
+  # that have different IDs. We handle both cases transparently.
+  my @result;
+  foreach (@subparts) {
+    if ($_->primary_tag =~ /CDS|UTR/i) {
+      my @cds_seg = $_->get_SeqFeatures;
+      if (@cds_seg > 0) { push @result, at cds_seg  } else { push @result,$_ }
+    } else {
+      push @result,$_;
+    }
+  }
+  return @result;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::gene - A GFF3-compatible gene glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing genes that may have
+alternatively-spliced transcripts. The various isoforms are stacked on
+top of each other and given a single label and description that apply
+to the entire stack. Each individual transcript's name is optionally
+printed to the left of the transcript glyph.
+
+Transcripts (splice isoforms) are drawn using the processed_transcript
+glyph.  CDS features are drawn in the background color, and the UTRs
+are drawn in an alternate color selected by the utr_color option.  In
+addition, you can make the UTRs thinner than the CDS by setting the
+"thin_utr" option.
+
+This glyph is designed to work properly with GFF3-style three-tier
+genes, in which the top level feature has the Sequence Ontology type
+of "gene", the second level feature(s) have the SO type "mRNA", and
+the third level feature(s) have the SO type "CDS", "five_prime_utr"
+and "three_prime_utr."  Subparts named "UTR" are also honored.  The
+feature can contain other subparts as well (e.g. exon, intron,
+translation), but they are currently ignored unless the option
+sub_part is supplied.  If the sub_part option is used that feature 
+type will be used and CDS and UTR features will be excluded.
+This could be used for specifying that exons be used instead,
+for example.
+
+This glyph is a subclass of processed_transcript, and recognizes the
+same options.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 undef (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       undef (false)
+
+  -description  Whether to draw a description  undef (false)
+
+  -strand_arrow Whether to indicate            undef (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option         Description                   Default
+  ------         -----------                   -------
+
+  -label_transcripts                           undef (false)
+                 Flag. If true, then the
+                 display name of each
+                 transcript will be drawn
+                 to the left of the transcript
+                 glyph.
+
+  -thin_utr      Flag.  If true, UTRs will      undef (false)
+                 be drawn at 2/3 of the
+                 height of CDS segments.
+
+  -utr_color     Color of UTR segments.         Gray #D0D0D0
+
+  -decorate_introns
+                 Draw strand with little arrows undef (false)
+                 on the intron.
+
+The B<-adjust_exons> and B<-implied_utrs> options are inherited from
+processed_transcript, but are quietly ignored. Please use the
+processed_transcript glyph for this type of processing.
+
+=head1 BUGS
+
+The SO terms are hard-coded. They should be more flexible and should
+recognize ISA relationships.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/generic.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/generic.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/generic.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,689 @@
+package Bio::Graphics::Glyph::generic;
+
+use strict;
+use Bio::Graphics::Util qw(frame_and_offset);
+use base qw(Bio::Graphics::Glyph);
+
+my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',
+		  G=>'C',A=>'T',T=>'A',C=>'G');
+
+# new options are 'label'       -- short label to print over glyph
+#                 'description'  -- long label to print under glyph
+# label and description can be flags or coderefs.
+# If a flag, label will be taken from seqname, if it exists or primary_tag().
+#            description will be taken from source_tag().
+
+sub height {
+  my $self = shift;
+  my $h    = $self->SUPER::height;
+  return $h unless
+    $self->option('draw_translation') && $self->protein_fits
+      or
+	$self->option('draw_dna') && $self->dna_fits;
+  my $fh = $self->font->height + 2;
+  return $h > $fh ? $h : $fh;
+}
+
+sub pad_top {
+  my $self = shift;
+  my $top  = $self->option('pad_top');
+  return $top if defined $top;
+  my $pad = $self->SUPER::pad_top;
+  $pad   += $self->labelheight if $self->label && $self->label_position eq 'top';
+  $pad;
+}
+sub pad_bottom {
+  my $self = shift;
+  my $bottom  = $self->option('pad_bottom');
+  return $bottom if defined $bottom;
+  my $pad = $self->SUPER::pad_bottom;
+  $pad   += $self->labelheight if $self->description;
+  $pad   += $self->labelheight if $self->part_labels && $self->label_position eq 'top';
+  $pad;
+}
+sub pad_right {
+  my $self = shift;
+  my $pad = $self->SUPER::pad_right;
+  my $label_width       = $self->label_position eq 'top' ? $self->labelwidth : 0;
+  my $description_width = $self->descriptionwidth;
+  my $max = $label_width > $description_width ? $label_width : $description_width;
+  my $right = $max - $self->width;
+  return $pad > $right ? $pad : $right;
+}
+sub pad_left {
+  my $self = shift;
+  my $pad = $self->SUPER::pad_left;
+  return $pad unless $self->label_position eq 'left' && $self->label;
+  $pad += $self->labelwidth;
+  $pad;
+}
+sub labelfont {
+  my $self = shift;
+  return $self->getfont('label_font',$self->font);
+}
+sub descfont {
+  my $self = shift;
+  return $self->getfont('desc_font',$self->font);
+}
+sub labelwidth {
+  my $self = shift;
+  return $self->{labelwidth} ||= length($self->label||'') * $self->font->width;
+}
+sub descriptionwidth {
+  my $self = shift;
+  return $self->{descriptionwidth} ||= length($self->description||'') * $self->font->width;
+}
+sub labelheight {
+  my $self = shift;
+  return $self->{labelheight} ||= $self->font->height;
+}
+sub label_position {
+  my $self = shift;
+  return $self->{labelposition} ||= $self->option('label_position') || 'top';
+}
+sub label {
+  my $self = shift;
+  return if $self->{overbumped};  # set by the bumper when we have hit bump limit
+  return unless $self->subpart_callbacks;  # returns true if this is level 0 or if subpart callbacks allowed
+  return $self->_label if $self->{level} >= 0;
+  return exists $self->{label} ? $self->{label}
+                               : ($self->{label} = $self->_label);
+}
+sub description {
+  my $self = shift;
+  return if $self->{overbumped}; # set by the bumper when we have hit bump limit
+  return unless $self->subpart_callbacks;  # returns true if this is level 0 or if subpart callbacks allowed
+  return $self->_description if $self->{level} > 0;
+  return exists $self->{description} ? $self->{description}
+                                     : ($self->{description} = $self->_description);
+}
+
+sub part_labels {
+  my $self = shift;
+  my @parts = $self->parts;
+  return ($self->{level} == 0) && @parts && @parts>1 && $self->option('part_labels');
+}
+
+sub part_label_merge {
+  shift->option('part_label_merge');
+}
+
+sub maxdepth {
+  my $self = shift;
+  my $maxdepth =  $self->option('maxdepth');
+  return $maxdepth if defined $maxdepth;
+  return 1;
+}
+
+sub _label {
+  my $self = shift;
+
+  # allow caller to specify the label
+  my $label = $self->option('label');
+
+  return unless defined $label;
+  return "1"    if $label eq '1 '; # 1 with a space
+  return $label unless $label eq '1';
+
+  # figure it out ourselves
+  my $f = $self->feature;
+
+  return $f->display_name if $f->can('display_name');
+  return $f->info         if $f->can('info');   # deprecated API
+  return $f->seq_id       if $f->can('seq_id');
+  return eval{$f->primary_tag};
+}
+sub _description {
+  my $self = shift;
+
+  # allow caller to specify the long label
+  my $label = $self->option('description');
+  return unless defined $label;
+  return "1"   if $label eq '1 ';
+  return $label unless $label eq '1';
+
+  return $self->{_description} if exists $self->{_description};
+  return $self->{_description} = $self->get_description($self->feature);
+}
+
+sub get_description {
+  my $self = shift;
+  my $feature = shift;
+
+  # common places where we can get descriptions
+  return join '; ',$feature->notes if $feature->can('notes');
+  return $feature->desc            if $feature->can('desc');
+
+  if ($feature->can('has_tag')) {
+    return join '; ',$feature->get_tag_values('note')        if $feature->has_tag('note');
+    return join '; ',$feature->get_tag_values('description') if $feature->has_tag('description');
+  }
+
+  my $tag = $feature->source_tag;
+  return if $tag eq '';
+  $tag;
+}
+
+sub draw {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+
+  local($self->{partno},$self->{total_parts});
+  @{$self}{qw(partno total_parts)} = ($partno,$total_parts);
+
+  $self->calculate_cds()      if $self->option('draw_translation') && $self->protein_fits;
+
+  $self->SUPER::draw(@_);
+  $self->draw_label(@_)       if $self->option('label');
+  $self->draw_description(@_) if $self->option('description');
+  $self->draw_part_labels(@_) if $self->option('label') && $self->option('part_labels');
+}
+
+sub draw_component {
+  my $self = shift;
+  $self->SUPER::draw_component(@_);
+  $self->draw_translation(@_) if $self->{cds_translation}; # created earlier by calculate_cds()
+  $self->draw_sequence(@_)    if $self->option('draw_dna') && $self->dna_fits;
+}
+
+# mostly stolen from cds.pm -- draw the protein translation
+sub draw_translation {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $feature = $self->feature;
+  my $strand = $feature->strand;
+
+  my $font    = $self->font;
+  my $pixels_per_residue = $self->scale * 3;
+
+  my $y         = $y1 + ($self->height - $font->height)/2;
+  my $fontwidth = $font->width;
+  my $color     = $self->fontcolor;
+
+  $strand *= -1 if $self->{flip};
+
+  # have to remap feature start and end into pixel coords in order to:
+  # 1) correctly align the amino acids with the nucleotide seq
+  # 2) correct for the phase offset
+  my $start = $self->map_no_trunc($feature->start + $self->{cds_offset});
+  my $stop  = $self->map_no_trunc($feature->end   + $self->{cds_offset});
+
+  ($start,$stop) = ($stop,$start) if $stop < $start;  # why does this keep happening?
+  my $x_fudge    = $self->{flip} ? 1 : 2;
+  my $right      = $self->panel->right;
+  my $left       = $self->panel->left;
+
+  my @residues = split '',$self->{cds_translation};
+  push @residues,$self->{cds_splice_residue} if $self->{cds_splice_residue};
+  for (my $i=0;$i<@residues;$i++) {
+    my $x = $strand > 0 ? $start + $i * $pixels_per_residue
+                        : $stop  - $i * $pixels_per_residue;
+    next unless ($x >= $x1 && $x <= $x2);
+    $x -= $fontwidth + 1 if $self->{flip}; # align right when flipped
+    last if $x+$fontwidth >= $right;
+    last if $x            <= $left;
+    $gd->char($font,$x+$x_fudge,$y,$residues[$i],$color);
+  }
+}
+
+sub draw_sequence {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $feature = $self->feature;
+  my $strand = $feature->strand;
+
+  my $font            = $self->font;
+  my $pixels_per_base = $self->scale;
+
+  my $y         = $y1 + ($self->height - $font->height)/2 - 1;
+  my $fontwidth = $font->width;
+  my $color     = $self->fontcolor;
+
+  $strand *= -1 if $self->{flip};
+
+  # have to remap feature start and end into pixel coords in order to:
+  my $start = $self->map_no_trunc($feature->start);
+  my $stop  = $self->map_no_trunc($feature->end);
+
+  ($start,$stop) = ($stop,$start) if $stop < $start;  # why does this keep happening?
+  my $x_fudge    = $self->{flip} ? 1 : 2;
+  my $right      = $self->panel->right;
+  my $left       = $self->panel->left;
+
+  my $seq   = $self->get_seq($self->feature->seq);
+  $seq      = $seq->seq if $seq;   # get the dna
+
+  my $canonical = $self->option('canonical_strand');
+
+  my @bases = split '',$seq;
+  for (my $i=0;$i<@bases;$i++) {
+    my $x = $strand >= 0 ? $start + $i * $pixels_per_base
+                         : $stop  - $i * $pixels_per_base;
+    next unless ($x >= $x1 && $x <= $x2);
+    $x -= $fontwidth + 1 if $self->{flip}; # align right when flipped
+    if ($strand >= 0) {
+      last if $x + $fontwidth > $right;
+    } else {
+      next if $x >= $right;
+      last if $x < $left;
+    }
+    my $base = $self->{flip} ? $complement{$bases[$i]} : $bases[$i];
+    $base    = $complement{$base} if $canonical && $strand < 0;
+    $gd->char($font,$x+$x_fudge,$y,$base,$color);
+  }
+}
+
+sub min { $_[0] <= $_[1] ? $_[0] : $_[1] }
+sub max { $_[0] >= $_[1] ? $_[0] : $_[1] }
+
+sub draw_label {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  my $label = $self->label or return;
+  my $x    = $self->left + $left; # valid for both "top" and "left" because the left-hand side is defined by pad_left
+  my $font = $self->labelfont;
+  if ($self->label_position eq 'top') {
+    $x += $self->pad_left;  # offset to beginning of the drawn part of the feature
+    $x = $self->panel->left + 1 if $x <= $self->panel->left;
+    $gd->string($font,
+		$x,
+		$self->top + $top - 1,
+		$label,
+		$self->fontcolor);
+  }
+  elsif ($self->label_position eq 'left') {
+    $gd->string($font,
+		$x,
+		$self->{top} + ($self->height - $font->height)/2 + $top,
+		$label,
+		$self->fontcolor);
+  }
+}
+sub draw_description {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  my $label = $self->description or return;
+  my $x = $self->left + $left;
+  $x   += $self->pad_left;  # offset to beginning of drawn part of feature
+  $x = $self->panel->left + 1 if $x <= $self->panel->left;
+  my $dy= $self->part_labels ? $self->font->height : 0;
+  $gd->string($self->descfont,
+	      $x,
+	      $self->bottom - $self->pad_bottom + $top + $dy,
+	      $label,
+	      $self->font2color);
+}
+
+sub draw_part_labels {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  return unless $self->{level} == 0;
+  my @p = $self->parts or return;
+  @p > 1 or return;
+  @p = reverse @p if $self->flip;
+
+  my $font  = $self->font;
+  my $width = $font->width;
+  my $color = $self->fontcolor;
+
+  my $y     = $top + $self->bottom - $self->pad_bottom;
+  my $merge_em = $self->part_label_merge;
+
+  my @parts;
+  my $previous;
+
+  if ($merge_em) {
+    my $current_contig = [];
+
+    for my $part (@p) {
+      if (!$previous || $part->feature->start - $previous->feature->end <= 1) {
+	push @$current_contig,$part;
+      } else {
+	push @parts,$current_contig;
+	$current_contig = [$part];
+      }
+      $previous = $part;
+    }
+    push @parts,$current_contig;
+  }
+
+  else {
+    @parts = map {[$_]} @p;
+  }
+
+  my $last_x;  # avoid overlapping labels
+  for (my $i=0; $i<@parts; $i++) {
+    my $x1     = $parts[$i][0]->left;
+    my $x2     = $parts[$i][-1]->right;
+
+    my $string = $self->part_label($i,scalar @parts);
+    my $x    = $left + $x1 + ($x2 - $x1 - $width*length($string))/2;
+    my $w    = $width * length($string);
+    next if defined $last_x && $self->flip ?  $x + $w > $last_x : $x < $last_x;
+    $gd->string($font,
+		$x,$y,
+		$string,
+		$color);
+    $last_x = $x + ($self->flip ? 0 : $w);
+  }
+}
+
+sub part_label {
+  my $self = shift;
+  my ($part,$total)  = @_;
+
+  local $self->{partno} = $self->feature->strand < 0 ? $total - $part -1 : $part;
+  my $label = $self->option('part_labels');
+  return unless defined $label;
+  return "1"   if $label eq '1 ';
+  return $label unless $label eq '1';
+  return $self->{partno}+1;
+}
+
+sub dna_fits {
+  my $self = shift;
+
+  my $pixels_per_base = $self->scale;
+  my $font            = $self->font;
+  my $font_width      = $font->width;
+
+  return $pixels_per_base >= $font_width;
+}
+
+sub protein_fits {
+  my $self = shift;
+  my $font               = $self->font;
+
+  # return unless $font->height <= $self->height;
+
+  my $font_width         = $font->width;
+  my $pixels_per_residue = $self->scale * 3;
+
+  return $pixels_per_residue >= $font_width;
+}
+
+sub arrowhead {
+  my $self = shift;
+  my $image = shift;
+  my ($x,$y,$height,$orientation) = @_;
+
+  my $fg = $self->set_pen;
+  my $style = $self->option('arrowstyle') || 'regular';
+
+  if ($style eq 'filled') {
+    my $poly_pkg = $self->polygon_package;
+    my $poly = $poly_pkg->new();
+    if ($orientation >= 0) {
+      $poly->addPt($x-$height,$y-$height);
+      $poly->addPt($x,$y);
+      $poly->addPt($x-$height,$y+$height,$y);
+    } else {
+      $poly->addPt($x+$height,$y-$height);
+      $poly->addPt($x,$y);
+      $poly->addPt($x+$height,$y+$height,$y);
+    }
+    $image->filledPolygon($poly,$fg);
+  }
+  else {
+    if ($orientation >= 0) {
+      $image->line($x,$y,$x-$height,$y-$height,$fg);
+      $image->line($x,$y,$x-$height,$y+$height,$fg);
+    } else {
+      $image->line($x,$y,$x+$height,$y-$height,$fg);
+      $image->line($x,$y,$x+$height,$y+$height,$fg);
+    }
+  }
+}
+
+sub arrow {
+  my $self  = shift;
+  my $image = shift;
+  my ($x1,$x2,$y) = @_;
+
+  my $fg     = $self->set_pen;
+  my $height = $self->height/3;
+
+  $image->line($x1,$y,$x2,$y,$fg);
+  $self->arrowhead($image,$x2,$y,$height,+1) if $x1 < $x2;
+  $self->arrowhead($image,$x2,$y,$height,-1) if $x2 < $x1;
+}
+
+sub reversec {
+  my $self = shift;
+  my $dna  = shift;
+  $dna =~ tr/gatcGATC/ctagCTAG/;
+  $dna = reverse $dna;
+  return $dna;
+}
+
+# this gets invoked if the user has requested that the protein translation
+# gets drawn using the draw_translation option and protein_fits() returns
+# true. It is a rather specialized function and possibly belongs somewhere else,
+# but putting it here makes it possible for any feature to display its protein
+# translation.
+sub calculate_cds {
+  my $self = shift;
+  my @parts = $self->feature_has_subparts ? $self->parts : $self;
+
+  my $codon_table = $self->option('codontable');
+  $codon_table    = 1 unless defined $codon_table;
+  require Bio::Tools::CodonTable unless Bio::Tools::CodonTable->can('new');
+  my $translate_table = Bio::Tools::CodonTable->new(-id=>$codon_table);
+
+  for (my $i=0; $i < @parts; $i++) {
+    my $part    = $parts[$i];
+    my $feature = $part->feature;
+
+    my $pos     = $feature->strand >= 0 ? $feature->start : $feature->end;
+    my $phase   = eval {$feature->phase};
+    next unless defined $phase;
+    my $seq     = $feature->seq;
+    next unless defined $seq;
+
+    my $strand          = $feature->strand;
+    my ($frame,$offset) = frame_and_offset($pos,
+					   $strand,
+					   -$phase);
+    $strand *= -1 if $self->{flip};
+    $part->{cds_frame}     = $frame;
+    $part->{cds_offset}    = $offset;
+
+    # do in silico splicing in order to find the codon that
+    # arises from the splice
+    my $protein = $seq->translate(undef,undef,$phase,$codon_table)->seq;
+    $part->{cds_translation}  = $protein;
+
+  BLOCK: {
+      length $protein >= $feature->length/3           and last BLOCK;
+      ($feature->length - $phase) % 3 == 0            and last BLOCK;
+	
+      my $next_part    = $parts[$i+1]
+	or do {
+	  $part->{cds_splice_residue} = '?';
+	  last BLOCK; };
+
+      my $next_feature = $next_part->feature         or  last BLOCK;
+      my $next_phase   = eval {$next_feature->phase} or  last BLOCK;
+      my $splice_codon = '';
+      my $left_of_splice  = substr($self->get_seq($feature->seq),     -$next_phase, $next_phase);
+      my $right_of_splice = substr($self->get_seq($next_feature->seq),0           , 3-$next_phase);
+      $splice_codon = $left_of_splice . $right_of_splice;
+      length $splice_codon == 3                      or last BLOCK;
+      my $amino_acid = $translate_table->translate($splice_codon);
+      $part->{cds_splice_residue} = $amino_acid;
+    }
+  }
+}
+
+# hack around changed feature API
+sub get_seq {
+  my $self = shift;
+  my $seq = shift;
+  return $seq if ref $seq && $seq->can('translate');
+  require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
+  return Bio::PrimarySeq->new(-seq=>$seq);
+}
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::generic - The "generic" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is identical to the "box" glyph except that it will draw the
+subparts of features that contain subfeatures.  The subparts are not
+connected -- use the "segments" glyph for that.  "Generic" is the
+default glyph used when not otherwise specified.
+
+=head2 METHODS
+
+This module overrides the maxdepth() method to return 0 unless the
+-maxdepth option is provided explicitly. This means that any module
+that inherits from generic will need to override maxdepth() again in
+order to draw subfeatures. In general, those implementing
+multi-segmented feature glyphs should inherit from
+Bio::Graphics::Glyph::segments, which allows for one level of descent.
+
+In addition, the following new methods are implemented:
+
+=over 4
+
+=item labelfont(), descfont(), labelwidth(), descriptionwidth()
+
+Return the font, width for the label or description.
+
+=item label()
+
+Return the glyph label text (printed above the glyph).
+
+=item description()
+
+Return the glyph description text (printed below the glyph).
+
+=item draw_translation()
+
+Draw the protein translation of the feature (assumes that the feature is attached to a DNA sequence).
+
+=item draw_sequence()
+
+Draw the sequence of the feature (either DNA or protein).
+
+=back
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Default font                   gdSmallFont
+
+  -label_font   Font used for label	       gdSmallFont
+
+  -desc_font    Font used for description      gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -pad_top      Top padding                    0
+
+  -pad_bottom   Bottom padding                 0
+
+  -label        Whether to draw a label	       0 (false)
+
+  -label_position Where to draw the label      "top" (default) or "left"
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+  -draw_dna     If true, draw the dna residues        0 (false)
+                 when magnification level
+                 allows.
+
+  -canonical_strand If true, draw the dna residues        0 (false)
+                 as they appear on the plus strand
+                 even if the feature is on the minus
+                 strand.
+
+-pad_top and -pad_bottom allow you to insert some blank space between
+the glyph's boundary and its contents.  This is useful if you are
+changing the glyph's height dynamically based on its feature's score.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::Graphics::Glyph::xyplot>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+p
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/graded_segments.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/graded_segments.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/graded_segments.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,247 @@
+package Bio::Graphics::Glyph::graded_segments;
+#$Id: graded_segments.pm,v 1.18.6.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::minmax Bio::Graphics::Glyph::merge_parts);
+
+# override draw method to calculate the min and max values for the components
+sub draw {
+  my $self = shift;
+
+  # bail out if this isn't the right kind of feature
+  # handle both das-style and Bio::SeqFeatureI style,
+  # which use different names for subparts.
+  my @parts = $self->parts;
+  @parts    = $self if !@parts && $self->level == 0;
+  return $self->SUPER::draw(@_) unless @parts;
+
+  my ($min_score,$max_score) = $self->minmax(\@parts);
+
+  return $self->SUPER::draw(@_)
+    unless defined($max_score) && defined($min_score)
+      && $min_score < $max_score;
+
+  my $span = $max_score - $min_score;
+
+  # allocate colors
+  my $fill   = $self->bgcolor;
+  my ($red,$green,$blue) = $self->panel->rgb($fill);
+
+  @parts = $self->merge_parts(@parts) if $self->option('merge_parts');
+
+  foreach my $part (@parts) {
+    my $s = eval { $part->feature->score };
+    unless (defined $s) {
+      $part->{partcolor} = $fill;
+      next;
+    }
+    my ($r,$g,$b) = $self->calculate_color($s,[$red,$green,$blue],$min_score,$span);
+    my $idx      = $self->panel->translate_color($r,$g,$b);
+    $part->{partcolor} = $idx;
+  }
+  $self->SUPER::draw(@_);
+}
+
+sub calculate_color {
+  my $self = shift;
+  my ($s,$rgb,$min_score,$span) = @_;
+  return map { 255 - (255-$_) * min(max( ($s-$min_score)/$span, 0), 1) } @$rgb;
+}
+
+sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
+sub max { $_[0] > $_[1] ? $_[0] : $_[1] }
+
+sub subseq {
+  my $class = shift;
+  my $feature = shift;
+  return $feature->segments        if $feature->can('segments');
+  return $feature->sub_SeqFeature  if $feature->can('sub_SeqFeature');
+  return;
+}
+
+# synthesize a key glyph
+sub keyglyph {
+  my $self = shift;
+
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  # two segments, at pixels 0->50, 60->80
+  my $offset = $self->panel->offset;
+
+  my $feature =
+    Bio::Graphics::Feature->new(
+				-segments=>[ [ 0*$scale +$offset,20*$scale+$offset],
+					     [ 30*$scale +$offset,50*$scale+$offset],
+					     [60*$scale+$offset, 80*$scale+$offset]
+					   ],
+				-name => $self->option('key'),
+				-strand => '+1');
+  ($feature->segments)[0]->score(10);
+  ($feature->segments)[1]->score(50);
+  ($feature->segments)[2]->score(100);
+  my $factory = $self->factory->clone;
+  $factory->set_option(label => 1);
+  $factory->set_option(bump  => 0);
+  $factory->set_option(connector  => 'solid');
+  return $factory->make_glyph($feature);
+}
+
+# component draws a shaded box
+sub bgcolor { 
+  my $self = shift;
+  return defined $self->{partcolor} ? $self->{partcolor} : $self->SUPER::bgcolor;
+}
+sub fgcolor {
+  my $self = shift;
+  return $self->SUPER::fgcolor unless $self->option('vary_fg');
+  return defined $self->{partcolor} ? $self->{partcolor} : $self->SUPER::fgcolor;
+}
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::graded_segments - The "graded_segments" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is identical to the "alignment" glyph, and is used for
+drawing features that consist of discontinuous segments.  The
+color intensity of each segment is proportionate to the score.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option      Description                   Default
+  ------      -----------                   -------
+
+  -max_score  Maximum value of the	    Calculated
+              feature's "score" attribute
+
+  -min_score  Minimum value of the          Calculated
+              feature's "score" attribute
+
+  -vary_fg    Vary the foreground color as  0 (false)
+              well as the background
+
+  -merge_parts                             0 (false)
+              Whether to simplify the
+              alignment at low magnification
+
+  -max_gap    Do not merge across gaps     Calculated
+              that exceed this threshold
+
+
+If max_score and min_score are not specified, then the glyph will
+calculate the local maximum and minimum scores at run time.  Since
+many scoring functions are exponential you may wish to take the log of
+your scores before passing them to this glyph.
+
+
+=head2 Simplifying the display of alignment features for large segments
+
+The "merge_parts" option is used for semantic zooming.
+Specifically, if features are small and dense, they
+will not be displayed very well for large segments and the
+color-coding will be lost.  If merge-parts is set to a
+true value, adjacent alignment parts will be merged until a gap
+exceeding a calculated or user-specified value is encountered.
+Unless specified, the maximum gap allowed for merging adjacent features is
+calculated as (L/10000)*(L/500), where L = the length of the sequence
+displayed in the browser.  The exponentially increasing gap threshold
+allows more aggressive merging of alignment features as the size of
+the displayed sequence grows larger.
+
+The score of the merged feature is calculated as a weighted average.
+For example, consider two adjacent HSPs that are each 400 bp in
+length and have scores of 60% and 70%.  If the merge_parts option
+is set to a true value, the two HSPs would be merged in the display to
+a single 800 bp alignment block with an average score of 65%.
+
+The merge_parts option is turned off by default.
+
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/group.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/group.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/group.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+package Bio::Graphics::Glyph::group;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::segmented_keyglyph);
+
+# group sets connector to 'dashed'
+sub connector {
+  my $self = shift;
+  my $super = $self->SUPER::connector(@_);
+  return $super if $self->all_callbacks;
+  return 'dashed' unless defined($super) && ($super eq 'none' or !$super);
+}
+
+# we don't label group (yet)
+sub label { 0 }
+
+sub new {
+  my $self = shift;
+  return $self->SUPER::new(@_,-level=>-1);
+}
+
+
+# don't allow simple bumping in groups -- it looks terrible...
+sub bump {
+  my $bump = shift->SUPER::bump(@_);
+  return unless defined $bump;
+  return 1  if $bump >  1;
+  return -1 if $bump < -1;
+  return $bump;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::group - The "group" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used internally by Bio::Graphics::Panel for laying out
+groups of glyphs that move in concert.  It should not be used
+explicitly.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Ace::Sequence>, L<Ace::Sequence::Feature>, L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>, L<Bio::Graphics::Glyph::anchored_arrow>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::box>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/heterogeneous_segments.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/heterogeneous_segments.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/heterogeneous_segments.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,180 @@
+package Bio::Graphics::Glyph::heterogeneous_segments;
+
+# this glyph acts like graded_segments but the bgcolor of each segment is
+# controlled by the source field of the feature. Use the source field name
+# to set the background color:
+# -waba_strong_color => 'blue'
+# -waba_weak_color   => 'red'
+# -waba_coding_color => 'green' 
+
+# $Id: heterogeneous_segments.pm,v 1.9.4.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::graded_segments);
+
+# override draw method to calculate the min and max values for the components
+sub draw {
+  my $self = shift;
+
+  # bail out if this isn't the right kind of feature
+  # handle both das-style and Bio::SeqFeatureI style,
+  # which use different names for subparts.
+  my @parts = $self->parts;
+  @parts    = $self if !@parts && $self->level == 0;
+  return $self->SUPER::draw(@_) unless @parts;
+
+  @parts = $self->merge_parts(@parts) if $self->option('merge_parts');
+
+  # figure out the colors
+  $self->{source2color} ||= {};
+  my $fill = $self->bgcolor;
+  for my $part (@parts) {
+#    if ($self->option('merge_parts')) {
+#      $part->{partcolor} = $fill;
+#      next;
+#    }  
+    my $s = eval { $part->feature->source_tag } or next;
+    $self->{source2color}{$s} ||= $self->color(lc($s)."_color") || $fill;
+    $part->{partcolor} = $self->{source2color}{$s};
+  }
+
+  $self->Bio::Graphics::Glyph::segments::draw(@_);
+}
+
+
+# synthesize a key glyph
+sub keyglyph {
+  my $self = shift;
+  
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  # two segments, at pixels 0->50, 60->80
+  my $offset = $self->panel->offset;
+
+  my $feature =
+    Bio::Graphics::Feature->new(
+				-segments=>[ [ 0*$scale +$offset,25*$scale+$offset],
+					     [ 25*$scale +$offset,50*$scale+$offset],
+					     [ 50*$scale+$offset, 75*$scale+$offset]
+					   ],
+				-name => $self->option('key'),
+				-strand => '+1');
+  my @sources = grep {/_color$/} $self->factory->options;
+  foreach (@sources) {s/_color$//}
+  ($feature->segments)[0]->source_tag($sources[1]);
+  ($feature->segments)[1]->source_tag($sources[0]);
+  ($feature->segments)[2]->source_tag($sources[2]);
+  my $factory = $self->factory->clone;
+  $factory->set_option(label => 1);
+  $factory->set_option(bump  => 0);
+  $factory->set_option(connector  => 'solid');
+  my $glyph = $factory->make_glyph(0,$feature);
+  return $glyph;
+}
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::heterogeneous_segments - The "heterogeneous_segments" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph acts like graded_segments but the bgcolor of each segment (sub-feature)
+can be individually set using the source field of the feature.
+
+Each segment type color is specified using the following nomenclature:
+
+ -{source}_color => $color
+
+For example, if the feature consists of a gene containing both
+confirmed and unconfirmed exons, you can make the confirmed exons
+green and the unconfirmed ones red this way:
+
+  -confirmed_color   => 'green',
+  -unconfirmed_color => 'red'
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/image.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/image.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/image.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,426 @@
+package Bio::Graphics::Glyph::image;
+
+# $Id: image.pm,v 1.3.4.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use GD;
+use base 'Bio::Graphics::Glyph::generic';
+our @ISA;
+
+#
+#       |--------------------| true position  ('height' high)
+#       .                    .
+#      .                      .     diagonal  (vertical spacing high)
+#     .                        .
+#    +--------------------------+
+#    |                          |
+#    |                          |
+#    |                          |   image
+#    |                          |
+#    |                          |
+#    |                          |
+#    +--------------------------+
+
+use constant VERTICAL_SPACING => 20;
+
+sub new {
+  my $self  = shift->SUPER::new(@_);
+  $self->{image} = $self->get_image();
+  return $self;
+}
+
+sub get_image {
+  my $self    = shift;
+  my ($format,$image)   = eval { $self->image_data };
+  unless ($image) {
+    warn $@ if $@;
+    return;
+  }
+  my $gd      =   $format eq 'image/png'  ? GD::Image->newFromPngData($image,1)
+                : $format eq 'image/jpeg' ? GD::Image->newFromJpegData($image,1)
+		: $format eq 'image/gif'  ? GD::Image->newFromGifData($image)
+		: $format eq 'image/gd'   ? GD::Image->newFromGdData($image)
+		: $format eq 'image/gd2'  ? GD::Image->newFromGd2Data($image)
+		: $self->throw("This module cannot handle images of type $format");
+  return $gd;
+}
+
+sub _guess_format {
+  my $self = shift;
+  my $path = shift;
+  return 'image/png'   if $path =~ /\.png$/i;
+  return 'image/jpeg'  if $path =~ /\.jpe?g$/i;
+  return 'image/gif'   if $path =~ /\.gif(87)?$/i;
+  return 'image/gd'    if $path =~ /\.gd$/i;
+  return 'image/gd2'   if $path =~ /\.gd2$/i;
+  my ($extension) = $path =~ /\.(\w+)$/;  #cop-out
+  return $extension;
+}
+
+sub image_path {
+  my $self = shift;
+  my $feature  = $self->feature  or $self->throw("no feature!");
+  my $dirname  = $self->image_dir;
+  my $basename = $self->option('image');
+
+  # can't get it from callback, so try looking for an 'image' attribute
+  if (!$basename && $feature->can('has_tag') && $feature->has_tag('image')) {
+    ($basename)  = $feature->get_tag_values('image');
+  }
+
+  return unless $basename;
+  return $basename             if $basename =~ m!^\w+:/!;  # looks like a URL
+  return $basename             if $basename =~ m!^/!;      # looks like an abs path
+  return "$dirname/$basename";
+}
+
+sub image_data {
+  my $self = shift;
+  my $path = $self->image_path;
+
+  if ($path =~ m!^\w+:/!) { # looks like a URL
+    require LWP::UserAgent;
+    my $ua = LWP::UserAgent->new(env_proxy => 1);
+    my $response = $ua->get($path);
+    if ($response->is_success) {
+      return ($response->content_type,$response->content);
+    } else {
+      $self->throw($response->status_line);
+    }
+
+
+  } else {
+    my $content_type = $self->_guess_format($path);
+    open F,$path or $self->throw("Can't open $path: $!");
+    binmode F;
+    my $data;
+    $data .= $_ while read(F,$_,1024);
+    close F;
+    return ($content_type,$data);
+  }
+}
+
+sub pad_left {
+  my $self = shift;
+  my $pad          = $self->SUPER::pad_left;
+  my $image        = $self->{image} or return $pad;
+  my $width_needed = ($image->width - $self->width)/2;
+  return $pad > $width_needed ? $pad : $width_needed;
+}
+
+sub pad_right {
+  my $self = shift;
+  my $pad          = $self->SUPER::pad_right;
+  my $image        = $self->{image} or return $pad;
+  my $width_needed = ($image->width - $self->width)/2;
+  return $pad > $width_needed ? $pad : $width_needed;
+}
+
+sub pad_bottom {
+  my $self   = shift;
+  my $pb     = $self->SUPER::pad_bottom;
+  my $image  = $self->{image} or return $pb;
+  $pb       += $self->vertical_spacing;
+  $pb       += $image->height;
+  return $pb;
+}
+
+sub vertical_spacing {
+  my $self  = shift;
+  my $vs    = $self->option('vertical_spacing');
+  return $vs if defined $vs;
+  return VERTICAL_SPACING;
+}
+
+sub draw_description {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  $top += $self->{image}->height+$self->vertical_spacing if $self->{image};
+  $self->SUPER::draw_description($gd,$left,$top,$partno,$total_parts);
+}
+
+sub image_dir {
+  my $self = shift;
+  return $self->option('image_prefix');
+}
+
+sub draw_component {
+  my $self  = shift;
+  my $gd    = shift;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $delegate = $self->option('glyph_delegate') || 'generic';
+  if ($delegate eq 'generic') {
+    $self->SUPER::draw_component($gd, at _);
+  } else {
+    eval "require Bio::Graphics::Glyph::$delegate";
+    local @ISA = ("Bio::Graphics::Glyph::$delegate");
+    my $method = "Bio::Graphics::Glyph::${delegate}::draw_component";
+    $self->$method($gd, at _);
+  }
+
+  my $image  = $self->{image} or return;
+
+  my $fgcolor = $self->fgcolor;
+  my $bgcolor = $self->bgcolor;
+  my $height  = $self->option('height');
+  my $half    = 4;
+  my $vs      = $self->vertical_spacing;
+
+  my $delta = (($x2-$x1) - $image->width)/2;
+  my($x,$y) = ($x1+$delta,$y1+$vs+$self->height);
+  if ($gd->can('copy')) {
+    $gd->copy($image,$x,$y,0,0,$image->width,$image->height) ;
+  } else {
+    my $gray = $self->panel->translate_color('gray');
+    $gd->filledRectangle($x,$y,$x+$image->width,$y+$image->height,$gray);
+  }
+
+  if ($vs > 0) {
+    $gd->line($x1,$y2+2,$x1,$y2+$half,$fgcolor);
+    $gd->line($x2,$y2+2,$x2,$y2+$half,$fgcolor);
+    $gd->line($x1,$y2+$half,$x,$y-$half,$fgcolor);
+    $gd->line($x2,$y2+$half,$x+$image->width-1,$y-$half,$fgcolor);
+    $gd->line($x,$y-$half,$x,$y-2,$fgcolor);
+    $gd->line($x+$image->width-1,$y-$half,$x+$image->width-1,$y-2,$fgcolor);
+  }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::image - A glyph that draws photographs & other images
+
+=head1 SYNOPSIS
+
+ use Bio::Graphics;
+ use Bio::Seq;
+ use Bio::SeqFeature::Generic;
+
+ my $bsg = 'Bio::SeqFeature::Generic';
+
+ my $seq    = Bio::Seq->new(-length=>1000);
+
+ my $whole  = $bsg->new(-display_name => 'Clone82',
+ 		        -start        => 1,
+		        -end          => $seq->length);
+
+ my $image1 = $bsg->new(-start        => 100,
+		        -end          => 300,
+		        -display_name => 'Excretory System',
+		        -tag=>{
+			      image=>"http://www.flybase.org/anatomy/image-browser_files/excretory-system.gif"
+			      }
+		       );
+
+ my $image2 = $bsg->new(-start        => 500,
+		        -end          => 800,
+		        -display_name => 'Expression Pattern',
+		        -tag=>{
+			      image=>"http://www.flybase.org/anatomy/image-browser_files/embryonic-expression-pattern.gif"
+			      }
+		       );
+
+ my $panel = Bio::Graphics::Panel->new(-length    => $seq->length,
+				       -width     => 800,
+				       -truecolor => 1,
+				       -key_style => 'between',
+				       -pad_left  => 10,
+				       -pad_right => 10,
+				      );
+
+ $panel->add_track($whole,
+		   -glyph    => 'arrow',
+		   -double   => 1,
+		   -tick     => 2,
+		   -label    => 1,
+		   );
+
+ $panel->add_track([$image1,$image2],
+		   -glyph    => 'image',
+		   -label    => 1,
+		   -key       => 'Example images');
+
+ binmode STDOUT;
+ print $panel->png;
+
+=head1 DESCRIPTION
+
+This glyph inserts an image into the track at the indicated feature
+coordinates. The image can be in PNG, JPEG, GIF or GD format, and can
+be either 8-bit or 24-bit ("truecolor"). The image can be located on
+the local filesystem or located at a remote URL (provided that you
+have the LWP module installed).
+
+When working with photographic images, you may wish to have
+Bio::Graphics::Panel create 24-bit (truecolor) images in order to
+avoid running out of colors. The symptom of this is that images appear
+posterized. To turn on truecolor images, pass the -truecolor option to
+Bio::Graphics::Panel as shown in the synopsis.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+The following additional options are available to the "image" glyph:
+
+  Option            Description                       Default
+  ------            -----------                       -------
+
+  -image            Specify the image path or URL     none
+                    to use for this feature.
+
+  -image_prefix     String to prepend to              none
+                    each image path. You may prepend
+                    a directory or a partial URL.
+
+  -vertical_spacing Vertical distance from the box    20
+                    that shows the physical span of
+                    of the feature to the top of
+                    the picture (in pixels).
+
+  -glyph_delegate   Glyph to use for the part of      'generic'
+                    the glyph that shows the physical
+                    span of the feature.
+
+Set B<-vertical_spacing> to 0 to completely suppress the diagonal
+lines that connect the physical span of the feature to the image.
+
+=head2 Specifying the Image
+
+The path to the image can be specified in two ways. First, you can
+place it in the feature itself using a tag named "image". Second, you
+can specify it as a track option using a callback:
+
+  $panel->add_track(\@features,
+                    -glyph=>'image',
+                    -image => sub { my $feature = shift;
+                                    my $image_path = do_something();
+                                    return $image }
+                    );
+
+You can of course give -image a constant string, in which case each
+feature will show the same image.
+
+The image can be a file on the local operating system or a
+URL. However, URL fetching will only work if the LWP module is
+installed on your system. Otherwise the glyph will fail with an error
+message.
+
+If the image is a relative path (it does not begin with a slash or a
+URL protocol), then the contents of -image_prefix will be prepended to
+it. This allows you to specify images that are relative to a
+particular directory or a partial URL. Example:
+
+  $panel->add_track(\@features,
+                    -glyph => 'image',
+                    -image_prefix => 'http://www.flybase.org/anatomy/image-browser_files',
+                   );
+
+This specifies that each feature's "image" tag is to be appended to
+the partial FlyBase URL, thereby saving space.
+
+=head2 Glyph Delegation
+
+The image glyph consists of two parts: an upper part that shows the
+extent of the feature in base pair coordinates, and a lower part that
+shows the image. No scaling of the image is done; its height and width
+are fixed.
+
+By default the upper part uses the "generic" glyph, which is a simple
+rectangle filled with the bgcolor and outlined with the fgcolor. To
+use a different glyph in the upper part, specify the -glyph_delegate
+option, giving the name of the glyph you wish to use. For instance, to
+use the "span" glyph:
+
+  $panel->add_track(\@features,
+                    -glyph          => 'image',
+                    -glyph_delegate => 'span'
+                   );
+
+This feature does not work with all glyphs, and in particular requires
+a recent CVS checkout of Bio::Perl to work properly with the "arrow",
+"span" and "primers" glyphs (support for the feature did not make it
+into version 1.5).
+
+=head1 BUGS AND LIMITATIONS
+
+This glyph does not work with GD::SVG. If you try to render it onto a
+GD::SVG panel, the image will be shown as a gray box. This will be
+fixed in a future version of GD::SVG.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>, Todd Harris E<lt>harris at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/lightning.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/lightning.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/lightning.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,168 @@
+package Bio::Graphics::Glyph::lightning;
+
+# A lightning bolt glyph to add some pizazz to your displays. Yeow!
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub draw_component {
+  my $self = shift;
+  my $gd   = shift;
+  my $fg   = $self->fgcolor;
+
+  # find the center and vertices
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $h = $self->option('height');
+  my $w = $h*0.6;
+
+  my $poly_pkg = $self->polygon_package;
+  my $polygon   = $poly_pkg->new();
+
+  # lightning bolt points up or down
+  if ($self->option('orient') eq 'N') {
+    $y1 = $y1 + $h/4;
+    $polygon->addPt($x2,$y1+($h/2));
+    $polygon->addPt($x2-($w*0.7),$y1+($h/2));
+    $polygon->addPt($x2-($w*0.2),$y1+($h*0.15));
+    $polygon->addPt($x2-($w*0.6),$y1+($h*0.15));
+    $polygon->addPt($x2,$y1-$h/2);
+    $polygon->addPt($x2-($w*0.1),$y1-($h*0.05));
+    $polygon->addPt($x2+($w*0.5),$y1-($h*0.05));
+    $polygon->addPt($x2,$y1+($h/2));
+  }
+  else {
+    $y1 = $y1 + $h/2;
+    $polygon->addPt($x1,$y1-($h/2));
+    $polygon->addPt($x1+($w*0.7),$y1-($h/2));
+    $polygon->addPt($x1+($w*0.2),$y1-($h*0.15));
+    $polygon->addPt($x1+($w*0.6),$y1-($h*0.15));
+    $polygon->addPt($x1,$y1+$h/2);
+    $polygon->addPt($x1+($w*0.1),$y1+($h*0.05));
+    $polygon->addPt($x1-($w*0.5),$y1+($h*0.05));
+    $polygon->addPt($x1,$y1-($h/2));
+}
+
+  # Have to draw TWO polygons for fills in order to get an outline
+  # because filledPolygon in GD croaks with extra parameters (and
+  # doesn't support drawing of stroke anyways).
+  if (my $c = $self->bgcolor) {
+      $gd->filledPolygon($polygon,$c);
+      $gd->polygon($polygon,$fg);
+  } else {
+    $gd->polygon($polygon,$fg);
+  }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::lightning - The "lightning" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a lightning bolt of specified height with relative
+width, with the point of the lightning bolt centered on the
+feature. The height of the bolt is specified by the "height"
+option. Due to the complexity of this glyph, it doesn't resolve well
+with heights less than 11 pixels.
+
+This glyph was designed to indicate point mutations on a nucleotide or
+protein backbone.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+
+
+The following options are specific to this Glyph.
+
+  Option      Description                      Default
+  ------      -----------                      -------  
+  -orient     direction of lightning bolt      N
+
+
+=head1 BUGS
+
+No reported bugs.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Todd Harris E<lt>harris at cshl.orgE<gt>
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/line.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/line.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/line.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,128 @@
+package Bio::Graphics::Glyph::line;
+# an arrow without the arrowheads
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub bottom {
+  my $self = shift;
+  my $val = $self->SUPER::bottom(@_);
+  $val += $self->font->height if $self->option('tick');
+  $val += $self->labelheight if $self->option('label');
+  $val;
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $fg = $self->fgcolor;
+  my $a2 = $self->SUPER::height/2;
+  my $center = $y1+$a2;
+
+  $gd->line($x1,$center,$x2,$center,$fg);
+  # add a label if requested
+  $self->draw_label($gd, at _) if $self->option('label');
+
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::line - The "line" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a line parallel to the sequence segment.
+
+=head2 OPTIONS
+
+This glyph takes only the standard options. See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merge_parts.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merge_parts.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merge_parts.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,121 @@
+package Bio::Graphics::Glyph::merge_parts;
+
+use strict;
+use base qw(Bio::Graphics::Glyph);
+
+sub merge_parts {
+    my ($self, at parts)  = @_;
+    
+    # This is the largest gap across which adjacent segments will be merged
+    my $max_gap = $self->max_gap;
+
+    my $last_part;
+
+    my @sorted_parts = sort {$a->start <=> $b->start} @parts;
+
+    for my $part (@sorted_parts) {
+        if ($last_part) {
+            my $start  = $part->start;
+            my $end    = $part->stop;
+            my $score  = $part->score;
+            my $pstart = $last_part->start;
+            my $pend   = $last_part->stop;
+            my $pscore = $last_part->score || 0;
+            my $len    = 1 + abs($end - $start);
+            my $plen   = 1 + abs($pend - $pstart);
+
+            # weighted average score
+            my $new_score = (($score*$len)+($pscore*$plen))/($len+$plen);
+
+            # don't merge if there is a gap > than the allowed size
+            my $gap   = abs($start - $pend);
+            my $total = abs($end - $pstart);
+
+	    my $last_f = $last_part->feature;
+            if ($gap > $max_gap) {
+                $last_part = $part;
+                next;
+            }
+
+            $part->{start}    = $pstart;
+            $part->{score}    = $new_score;
+            my ($left,$right) = $self->map_pt($pstart,$end+1);
+            $part->{left}     = $left;
+            $part->{width}    = ($right - $left) + 1;
+
+            # flag the left feature for removal
+            $last_part->{remove} = 1;
+        }
+
+        $last_part = $part;
+
+    }
+
+    @parts =  grep {!defined $_->{remove}} @parts;
+
+    return @parts;
+}
+
+sub max_gap {
+    my $self = shift;
+    $self->panel->{max_gap} ||= $self->option('max_gap');
+    return $self->panel->{max_gap} || $self->calculate_max_gap;
+}
+
+sub calculate_max_gap {
+    my $self = shift;
+    my $segment_length = $self->panel->length;
+
+    # allow more aggressive merging for larger segments
+    # by exponentially increasing max_gap
+    my $max_gap = ($segment_length/10000)*($segment_length/500);
+
+    $self->panel->{max_gap} = $max_gap;
+
+    return $max_gap;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::merge_parts - a base class which suppors semantic zooming of scored alignment features
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is a base class for
+Bio::Graphics::Glyph::graded_segments, 
+Bio::Graphics::Glyph::heterogeneous_segments
+and Bio::Graphics::Glyph::merged_alignment.
+It adds internal methods to support semantic zooming of scored
+alignment features. It is not intended for end users.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>,
+L<Bio::Graphics::Glyph::graded_segments>
+L<Bio::Graphics::Glyph::heterogeneous_segments>
+L<Bio::Graphics::Glyph::merged_alignment>
+
+=head1 AUTHOR
+
+Sheldon McKay E<lt>mckays at cshl.eduE<gt>
+
+Copyright (c) 2005 Cold Spring Harbor Laboratory
+
+    This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merged_alignment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merged_alignment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/merged_alignment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,340 @@
+package Bio::Graphics::Glyph::merged_alignment;
+
+# $Id: merged_alignment.pm,v 1.4.6.1 2006/10/02 23:10:20 sendu Exp $
+
+# this glyph acts like graded_segments but the bgcolor of each segment is
+# more configurable.  Supply a list of colors and corresponding
+# bins.  Each bin is a range of x-y scores, where score n is > x and <= y
+# e.g.
+# [ALIGNMENT]
+# feature    = alignment
+# bins       = 0-50 50-70 70-90 90-100 
+# bincolors  = white powderblue cornflowerblue blue  
+
+# for sematic zooming (at lower magnification), to
+# reduce visual complexity of alignment
+# [ALIGNMENT:20000] # segment length >= 20000 
+# feature     = alignment
+# merge_parts = 1
+# max_gap     = 500 # do not merge across gaps > 500 bp 
+
+
+use strict;
+use base qw(Bio::Graphics::Glyph::graded_segments);
+
+use constant COLORS => "lightgrey powderblue cornflowerblue blue";
+
+# override draw method
+sub draw {
+  my $self = shift;
+
+  # bail out if this isn't the right kind of feature
+  # handle both das-style and Bio::SeqFeatureI style,
+  # which use different names for subparts.
+  my @parts = $self->parts;
+  @parts    = $self if !@parts && $self->level == 0;
+  return $self->SUPER::draw(@_) unless @parts;
+
+  my $cols = $self->option('bincolors') || COLORS;
+  my @cols = split /\s+/, $cols;
+  my $bins = $self->option('bins');
+  my @bins = $bins ? split /\s+/, $bins : $self->get_bins(\@parts, @cols);
+  my %color;
+  @color{@bins} = @cols;
+
+  @parts = $self->merge_parts(@parts) if $self->option('merge_parts');
+
+  # figure out the colors
+  for my $part (@parts) {
+      my ($bin) = grep { $part->in_range($_) } @bins;
+    
+      my $idx   = $bin ? $self->panel->translate_color($color{$bin}) 
+	  : $self->panel->translate_color('white');
+      $part->{partcolor} = $idx;
+  }
+  
+  $self->{parts} = \@parts;
+
+  $self->SUPER::draw(@_);
+}
+
+sub in_range {
+    my $self = shift;
+    my $range = shift;
+    my ($low,$high) = split '-', $range;
+    my $s = $self->score || shift;
+    return 1 if $s > $low && $s <= $high;
+    return 0;    
+}
+
+# overide background method to paint glyph white as
+# a last resort
+sub bgcolor {
+    my $self = shift;
+    return $self->{partcolor} || 'white';
+}
+
+# used if bins are not defined in the configuration
+# makes equal sized bins corresponding to the number of
+# colors specified
+sub get_bins {
+    my $self  = shift;
+    my $parts = shift;
+    my $cols  = @_;
+    my ($min,$max) = $self->minmax($parts);
+    my $range = $max - $min;
+    return ($max) if $range == 0;
+    my $increment = $range/$cols;
+    
+    my ($score, at bins) = $min;
+    until ($score >= $max) {
+	my $range = "$score-";
+	$score += $increment;
+	$range .= $score;
+	push @bins, $range;
+    }
+    
+    return @bins;
+}
+
+
+# synthesize a key glyph
+sub keyglyph {
+  my $self = shift;
+
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  # two segments, at pixels 0->50, 60->80
+  my $offset = $self->panel->offset;
+  
+  my $feature =
+    Bio::Graphics::Feature->new(
+				-segments=>[ [ 0*$scale +$offset,25*$scale+$offset],
+					     [ 25*$scale +$offset,50*$scale+$offset],
+					     [ 50*$scale+$offset, 75*$scale+$offset]
+					   ],
+				-name => $self->option('key'),
+				-strand => '+1');
+
+  my @scores = $self->example_scores;
+  my @segments = $feature->segments;
+  for ($feature->segments) {
+      $_->score(shift @scores);
+  }
+
+  my $factory = $self->factory->clone;
+  $factory->set_option(label => 1);
+  $factory->set_option(bump  => 0);
+  $factory->set_option(connector  => 'solid');
+  my $glyph = $factory->make_glyph(0,$feature);
+}
+
+sub example_scores {
+    my $self = shift;
+    my $bins = $self->option('bins');
+
+    if ($bins) {
+	my @bins = split /\s+/, $bins;
+	$bins[0] =~ s/(\S+)\-\S+/$1/;
+	$bins[-1] =~ s/\S+\-(\S+)/$1/;
+	my $mid  = $bins[0] + ($bins[-1] - $bins[0])/2;
+    
+	return ($bins[0], $mid, $bins[-1]);
+    }
+    if ($self->option('min_score') || $self->option('max_score')) {
+	my ($min,$max) = $self->minmax;
+	my $mid  = $min + ($max - $min)/2;
+	return($min,$mid,$max);
+    }
+    
+    return (0,50,100);
+}
+
+1;
+
+=pod
+
+=head1 NAME
+
+Bio::Graphics::Glyph::merged_alignment - The "merged_alignment" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph acts like graded_segments but the bgcolor of segments 
+(sub-feature) is controlled by binned scores.  It also supports
+semantic zooming to optimize glyph drawing for larger sequence
+displays.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+
+In addition, the merged-alignment glyph recognizes the following
+glyph-specific options:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -max_score  Maximum value of the         Calculated
+              feature's "score" attribute
+
+  -min_score  Minimum value of the         Calculated
+              feature's "score" attribute
+
+  -bincolors  Colors assigned to bins      lightgrey powderblue cornflowerblue blue
+              (in order)
+
+  -bins       Bins to which scores are     Calculated
+              assigned
+
+  -merge_parts                             0 (false)
+              Whether to simplify the 
+              alignment at low magnification
+
+  -max_gap    Do not merge across gaps     Calculated
+              that exceed this threshold
+
+
+If max_score and min_score are not specified, then the glyph will
+calculate the local maximum and minimum scores at run time.
+
+If the bins are not specified, they will be calculated
+based on the number of colors assigned and the local
+(or user-specified) minimum and maximum scores.
+Calculated bins are equal in size.  
+
+User-specified bins are expressed as ranges,
+
+  bins  = 0-50 50-70 70-90 90-100
+
+where each range means greater than the lower number and
+less than or equal to the higher number.
+
+
+=head2 Simplifying the display of alignment features for large segments
+
+The "merge_parts" option is used for semantic zooming.
+Specifically, if features are small and dense, they
+will not be displayed very well for large segments and the 
+color-coding will be lost.  If merge-parts is set to a
+true value, adjacent alignment parts will be merged until a gap
+exceeding a calculated or user-specified value is encountered. 
+Unless specified, the maximum gap allowed for merging adjacent features is
+calculated as (L/10000)*(L/500), where L = the length of the sequence
+displayed in the browser.  The exponentially increasing gap threshold
+allows more aggressive merging of alignment features as the size of
+the displayed sequence grows larger.
+
+The score of the merged feature is calculated as a weighted average.
+For example, consider two adjacent HSPs that are each 400 bp in 
+length and have scores of 60% and 70%.  If the merge_parts option
+is set to a true value, the two HSPs would be merged in the display to
+a single 800 bp alignment block with an average score of 65%.
+
+The merge_parts option is turned off by default.
+
+=head2 SAMPLE CONFIGURATION
+
+Sample gbrowse configuration stanzas for an alignment feature
+using this glyph.  The scores are assumed to be expressed 
+as percent identity (0-100).
+
+ # base configuration
+ [BLASTZ]
+ feature      = blastz_alignment
+ glyph        = merged_alignment
+ bincolors    = #A0A0A0 powderblue cornflowerblue blue
+ bins         = 60-70 70-80 80-90 90-100
+ category     = Sequence Similarity Tracks
+ height       = 6
+ bump         = 1
+ label        = 1
+ fgcolor      = black
+ key          = BLASTZ
+
+Semantic zooming with defined maximum gap between
+merged features for different zoom levels
+
+ # if the displayed segment is >= 20000 in length,
+ # use the merge_parts option to simplify the alignment
+ # display
+ [BLASTZ:20000]
+ feature      = blastz_alignment
+ merge_parts  = 1
+ max_gap      = 50 # do not merge across gaps > 50 bp
+
+ # if the displayed segment is >= 50000 in length
+ [BLASTZ:50000]
+ feature      = blastz_alignment
+ merge_parts  = 1
+ max_gap      = 500 # do not merge across gaps > 500 bp
+
+--OR-- 
+
+Semantic zooming with dynamically calculated maximum
+gap
+
+ # if the displayed segment is >= 20000 in length,
+ [BLASTZ:20000]
+ feature      = blastz_alignment
+ merge_parts  = 1
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Sheldon McKay E<lt>mckays at cshl.eduE<gt>
+
+Copyright (c) 2005 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/minmax.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/minmax.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/minmax.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,73 @@
+package Bio::Graphics::Glyph::minmax;
+# $Id: minmax.pm,v 1.2.6.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::segments);
+
+sub minmax {
+  my $self = shift;
+  my $parts = shift;
+
+  # figure out the colors
+  my $max_score = $self->option('max_score');
+  my $min_score = $self->option('min_score');
+
+  my $do_min = !defined $min_score;
+  my $do_max = !defined $max_score;
+
+  if ($do_min or $do_max) {
+    my $first = $parts->[0];
+    for my $part (@$parts) {
+      my $s = eval { $part->feature->score };
+      next unless defined $s;
+      $max_score = $s if $do_max && (!defined $max_score or $s > $max_score);
+      $min_score = $s if $do_min && (!defined $min_score or $s < $min_score);
+    }
+  }
+
+  ($min_score,$max_score);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::minmax - The minmax glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is a common base class for
+L<Bio::Graphics::Glyph::graded_segments> and
+L<Bio::Graphics::Glyph::xyplot>.  It adds an internal method named
+minmax() for calculating the upper and lower boundaries of scored
+features, and is not intended for end users.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::xyplot>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2003 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/oval.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/oval.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/oval.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,102 @@
+package Bio::Graphics::Glyph::oval;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::ellipse);
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::oval - The "oval" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an oval instead of a box.  It is an alias for the
+"ellipse" glyph.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pentagram.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pentagram.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pentagram.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,175 @@
+package Bio::Graphics::Glyph::pentagram;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub pad_top
+{
+  my ($self) = @_;
+  
+  my $font = $self->option('labelfont') || $self->font;
+  
+  my $pad = $font->height;
+  
+  if ($self->option('text'))
+  {
+    $pad *= 2;
+  }
+  return $pad;
+}
+
+sub default_text
+{
+  return '';  
+}
+
+sub default_text_pad_x
+{
+  return 0;  
+}
+
+sub default_text_pad_y
+{
+  return 3;  
+}
+
+sub default_size
+{
+  return 20;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  my $bg = $self->bgcolor;
+  
+  my $size = defined $self->option('size') ? $self->option('size') : $self->default_size();
+
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+
+  if ($self->option('inverted') == 1)
+  {
+    $polygon->addPt($x1,$y2);
+    $polygon->addPt($x1+$size/2,$y2-$size/2);
+    $polygon->addPt($x1,$y2-$size);
+    $polygon->addPt($x1+$size, $y2-$size);
+    $polygon->addPt($x1+$size, $y2);      
+  }
+  else
+  {
+    $polygon->addPt($x1,$y2);
+    $polygon->addPt($x1,$y2-$size);
+    $polygon->addPt($x1+$size/2,$y2-$size);
+    $polygon->addPt($x1+$size, $y2-$size/2);
+    $polygon->addPt($x1+$size/2, $y2);      
+  }
+  
+  $gd->filledPolygon($polygon, $bg);
+  $gd->polygon($polygon,$fg);
+
+  my $text = defined $self->option('text') ? $self->option('text') : $self->default_text();
+
+  if ($text)
+  {
+    my $text_pad_x = defined $self->option('text_pad_x') ? $self->option('text_pad_x') : $self->default_text_pad_x();
+    my $text_pad_y = defined $self->option('text_pad_y') ? $self->option('text_pad_y') : $self->default_text_pad_y();
+    my $font = $self->option('labelfont') || $self->font;
+    $gd->string($font, $x1+$text_pad_x, $y2-$size-$text_pad_y-$font->height, $text, $fg);
+    
+  }
+  
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::pentagram - The "pentagram" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a pentagram with the sharp angle pointing right
+or,if the 'inverted' option is set to 1, an "inverted" pentagram
+(with the sharp angle pointing inwards, not outwards).
+There may be an optional text above the glyph.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -size       Width and height of the       20
+              glyph
+
+  -text       Text to show                  none
+
+  -text_pad_x Number of pixels between        0
+              the left edge of the glyph
+              and the start of text
+
+  -text_pad_x Number of pixels between        3
+              the pentagram
+              and the text
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pinsertion.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pinsertion.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/pinsertion.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+package Bio::Graphics::Glyph::pinsertion;
+# package to use for drawing P insertion as a triangle
+# p insertion is a point (one base).
+
+use strict;
+use GD;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub box {
+  my $self = shift;
+  my $half = $self->insertion_width/2;
+  return ($self->left-$half,$self->top,$self->right+$half,$self->bottom);
+}
+
+sub insertion_width {
+  my $self = shift;
+  return $self->option('insertion_width') || 6;
+}
+
+# override draw method
+sub draw {
+  my $self = shift;
+
+  my $gd = shift;
+  my ($left,$top) = @_;
+
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $height = $self->height;
+
+  my $half = $self->insertion_width/2;
+  my $fill = $self->bgcolor;
+  my $border = $self->fgcolor;
+
+  my $poly_pkg = $self->polygon_package;
+  my $poly     = $poly_pkg->new();
+  if ($self->feature->strand > 0) { #plus strand
+      $poly->addPt($x1 - $half, $y1);
+      $poly->addPt($x1 + ($half), $y1);
+      $poly->addPt($x1, $y2); #pointer down
+  } else {
+      $poly->addPt($x1, $y1); #pointer up
+      $poly->addPt($x1 - $half, $y2);
+      $poly->addPt($x1 + ($half), $y2);
+  }
+  $gd->filledPolygon($poly,$fill);
+  $gd->polygon($poly,$border);
+
+  # add a label if requested
+  $self->draw_label($gd,$left,$top)       if $self->option('label');
+  $self->draw_description($gd,$left,$top) if $self->option('description');
+}
+
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::pinsertion - The "Drosophila P-element Insertion" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph was designed to show P-element insertions in the Drosophila
+genome, but in fact is suitable for any type of zero-width feature.
+Also see the triangle glyph.
+
+=head2 OPTIONS
+
+In addition to the generic options, this glyph recognizes:
+
+ Option Name       Description              Default
+ -----------       -----------              -------
+
+ -insertion_width  Width of glyph in pixels    3
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>, Shengqiang Shu E<lt>sshu at bdgp.lbl.govE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory, BDGP
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/primers.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/primers.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/primers.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,150 @@
+package Bio::Graphics::Glyph::primers;
+#$Id: primers.pm,v 1.7.4.3 2006/10/02 23:10:20 sendu Exp $
+# package to use for drawing something that looks like
+# primer pairs.
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+use constant HEIGHT => 8;
+
+# override draw method
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  my $height = $self->option('height') || $self->option('size') || HEIGHT;
+
+  my $fg = $self->fgcolor;
+  my $a2 = $height/2;
+  my $center = $y1 + $a2;
+
+
+  # just draw us as a solid line -- very simple
+  if ($x2-$x1 < $height*2) {
+    $gd->line($x1,$center,$x2,$center,$fg);
+    return;
+  }
+
+  # otherwise draw two pairs of arrows
+  # -->   <--
+  my $trunc_left  = $x1 < $self->panel->left;
+  my $trunc_right = $x2 > $self->panel->right;
+
+  unless ($trunc_left) { 
+    $gd->setThickness(2) if $height > 6;
+    $gd->line($x1,$center,$x1 + $height,$center,$fg);
+    $gd->line($x1 + $height,$center,$x1 + $height - $a2,$center-$a2,$fg);
+    $gd->line($x1 + $height,$center,$x1 + $height - $a2,$center+$a2,$fg);
+    $gd->setThickness(1);
+  }
+
+  unless ($trunc_right) {
+    $gd->setThickness(2) if $height > 6;
+    $gd->line($x2,$center,$x2 - $height,$center,$fg);
+    $gd->line($x2 - $height,$center,$x2 - $height + $a2,$center+$a2,$fg);
+    $gd->line($x2 - $height,$center,$x2 - $height + $a2,$center-$a2,$fg);
+    $gd->setThickness(1);
+  }
+
+  # connect the dots if requested
+  if ($self->connect) {
+    my $c = $self->color('connect_color') || $self->bgcolor;
+    $gd->line($x1 + ($trunc_left  ? 0 : $height + 2),$center,
+	      $x2 - ($trunc_right ? 0 : $height + 2),$center,
+	      $c);
+  }
+
+  # add a label if requested
+  $self->draw_label($gd, at _)       if $self->option('label');
+  $self->draw_description($gd, at _) if $self->option('description');
+
+}
+
+sub connect {
+  my $self = shift;
+  return $self->option('connect') if defined $self->option('connect');
+  1;  # default
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::primers - The "STS primers" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws two arrows oriented towards each other and connected
+by a line of a contrasting color.  The length of the arrows is
+immaterial, but the length of the glyph itself corresponds to the
+length of the scaled feature.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description               Default
+  ------      -----------               -------
+
+  -connect    Whether to connect the      true
+              two arrowheads by a line.
+
+  -connect_color  The color to use for the    bgcolor
+              connecting line.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/processed_transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/processed_transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/processed_transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,360 @@
+package Bio::Graphics::Glyph::processed_transcript;
+
+# $Id: processed_transcript.pm,v 1.12.4.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::transcript2);
+use constant DEFAULT_UTR_COLOR => '#D0D0D0';
+
+sub new {
+  my $class = shift;
+  my $self = $class->SUPER::new(@_);
+  $self->guess_options if !defined $self->option('implied_utrs') 
+    && !defined $self->option('adjust_exons');
+  $self;
+}
+
+sub guess_options {
+  my $self = shift;
+  my ($exons,$utrs,$cds);
+  foreach ($self->parts) {
+    $exons++ if $_->feature->type =~ /exon/i;
+    $utrs++  if $_->feature->type =~ /utr$/i;
+    $cds++   if $_->feature->type =~ /^cds/i;
+    $self->configure(implied_utrs=>1) if $exons && $cds && !$utrs;
+    $self->configure(adjust_exons=>1) if $exons && $utrs;
+  }
+}
+
+# this option will generate implied UTRs by subtracting the
+# CDS features from the exons.
+sub create_implied_utrs {
+  my $self = shift;
+  return if $self->{'.implied_utrs'}++;
+
+  # parts should be ordered from left to right
+  my @features = sort {$a->start <=> $b->start} map {$_->feature} $self->parts;
+  my @exons   = grep {$_->type eq 'exon'} @features;
+  my @cds     = grep {$_->type eq 'CDS'}  @features;
+  my @old_utr = grep {$_->type =~ /UTR/}  @features;
+
+  # if there are already UTRs then we don't modify anything
+  return if @old_utr;
+
+  # if exons or CDS features are missing, then we abandon ship
+  return unless @exons && @cds;
+
+  my $first_cds = $cds[0];
+  my $last_cds  = $cds[-1];
+  my $strand = $self->feature->strand;
+
+  my $factory    = $self->factory;
+
+  # make the left-hand UTRs
+  for (my $i=0;$i<@exons;$i++) {
+    my $start = $exons[$i]->start;
+    last if $start >= $first_cds->start;
+    my $end  = $first_cds->start > $exons[$i]->end ? $exons[$i]->end : $first_cds->start-1;
+    my $utr = Bio::Graphics::Feature->new(-start=>$start,
+					  -end=>$end,
+					  -strand=>$strand,
+					  -type=>$strand >= 0 ? 'five_prime_UTR' : 'three_prime_UTR');
+    unshift @{$self->{parts}},$factory->make_glyph($self->{level}+1,$utr);
+  }
+  # make the right-hand UTRs
+  for (my $i=$#exons; $i>=0; $i--) {
+    my $end = $exons[$i]->end;
+    last if $end <= $last_cds->end;
+    my $start = $last_cds->end < $exons[$i]->start ? $exons[$i]->start : $last_cds->end+1;
+    my $utr = Bio::Graphics::Feature->new(-start=>$start,
+					  -end=>$end,
+					  -strand=>$strand,
+					  -type=>$strand >= 0 ? 'three_prime_UTR' : 'five_prime_UTR');
+    push @{$self->{parts}},$factory->make_glyph($self->{level}+1,$utr);
+  }
+}
+
+# Preprocess the glyph to remove overlaps between UTRs and
+# exons.  The exons are clipped so that UTRs have precedence
+sub adjust_exons {
+  my $self = shift;
+
+  return if $self->{'.adjust_exons'}++;
+
+  # find everything that is not an exon (utrs and cds's)
+  my @parts  = sort {$a->{left}<=>$b->{left}} $self->parts;
+  my @exon   = grep {$_->feature->type =~ /exon/i} @parts;
+  my %seen   = map {$_=>1} @exon;
+  my @other  = grep {!$seen{$_}} @parts;
+
+  my @clipped_parts;
+  my %positions    = map {("$_->{left}:$_->{width}" =>1)} @other;
+  my @unique_exons = grep {!$positions{"$_->{left}:$_->{width}"}} @exon;
+
+  # the first and last exons may need to be clipped if they overlap
+  # with another feature (CDS or UTR)
+  my $first_exon = $unique_exons[0];
+  my $last_exon  = $unique_exons[-1];
+
+  # deal with left hand side first
+  my $e_left    = $first_exon->{left};
+  my $e_right   = $e_left + $first_exon->{width};
+  for my $other (@other) {
+    my $o_left  = $other->{left};
+    my $o_right = $o_left + $other->{width};
+    next if $e_left  > $o_right;
+    last if $e_right < $o_left;
+    #dgg- need to skip 3prime/right utr for 1exon; end same as exon
+    last if (@unique_exons == 1 && $o_left > $e_left); #dgg- o_ is 3prime not 5
+    # clip left hand side; may get clipped into oblivion!
+    $first_exon->{left}  = $o_right + 1;
+    $first_exon->{width} = $e_right - $first_exon->{left};
+  }
+
+  # deal with right hand side
+  $e_left  = $last_exon->{left};
+  $e_right = $e_left + $last_exon->{width};
+  for (my $i=$#other; $i>=0; $i--) {
+    my $o_left  = $other[$i]->{left};
+    my $o_right = $o_left + $other[$i]->{width};
+    next if $e_right < $o_left;
+    last if $e_left  > $o_right;
+    # clip right hand side; may get clipped into oblivion!
+    #dgg- !! this always clips to oblivion: $last_exon->{width} = ($e_left - 1) - $last_exon->{left};
+    $last_exon->{width} = $o_left - $last_exon->{left}; #dgg-
+  }
+
+  $self->{parts} =  [ grep {$_->width > 0} sort {$a->{left}<=>$b->{left}} (@other, at unique_exons)];
+}
+
+sub fixup_glyph {
+  my $self = shift;
+  return unless $self->level == 0;
+  $self->create_implied_utrs if $self->option('implied_utrs');
+  $self->adjust_exons        if $self->option('implied_utrs') || $self->option('adjust_exons');
+}
+
+sub draw {
+  my $self = shift;
+  $self->fixup_glyph();
+  $self->SUPER::draw(@_);
+}
+
+sub boxes {
+  my $self = shift;
+  $self->fixup_glyph();
+  $self->SUPER::boxes(@_);
+}
+
+sub is_utr {
+  my $self = shift;
+  return $self->feature->primary_tag =~ /UTR|untranslated_region/i;
+}
+
+sub thin_utr {
+  my $self = shift;
+  $self->option('thin_utr');
+}
+
+sub utr_color {
+  my $self = shift;
+  return $self->color('utr_color') if $self->option('utr_color');
+  return $self->factory->translate_color(DEFAULT_UTR_COLOR);
+}
+
+sub height {
+  my $self = shift;
+  my $height    = $self->SUPER::height;
+  return $height unless $self->thin_utr;
+  return $self->is_utr ? int($height/1.5+0.5) : $height;
+}
+
+sub pad_top {
+  my $self = shift;
+  my $pad_top = $self->SUPER::pad_top;
+  return $pad_top unless $self->thin_utr && $self->is_utr;
+  return $pad_top + int(0.167*$self->SUPER::height + 0.5);
+}
+
+sub bgcolor {
+  my $self = shift;
+  return $self->SUPER::bgcolor unless $self->is_utr;
+  return $self->utr_color;
+}
+
+sub connector {
+  my $self = shift;
+  return 'quill' if $self->option('decorate_introns');
+  return $self->SUPER::connector(@_);
+}
+
+
+sub _subfeat {
+  my $self   = shift;
+  return $self->SUPER::_subfeat(@_) unless ref($self) && $self->{level} == 0 && $self->option('one_cds');
+  my $feature = shift;
+
+  my @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR));
+
+  # The CDS and UTRs may be represented as a single feature with subparts or as several features
+  # that have different IDs. We handle both cases transparently.
+  my @result;
+  foreach (@subparts) {
+    if ($_->primary_tag =~ /CDS|UTR/i) {
+      my @cds_seg = $_->get_SeqFeatures;
+      if (@cds_seg > 0) { push @result, at cds_seg  } else { push @result,$_ }
+    } else {
+      push @result,$_;
+    }
+  }
+  return @result;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::processed_transcript - The sequence ontology transcript glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing processed transcripts that have both
+CDS and UTR segments.  The CDS is drawn in the background color, and
+the UTRs are drawn in an alternate color selected by the utr_color
+option.  In addition, you can make the UTRs thinner than the CDS by
+setting the "thin_utr" option.
+
+For this glyph to produce the desired results, you should pass it a
+compound Bio::SeqFeature that has subfeatures of primary_tag "CDS" and
+"UTR".  In fact, you may give it more specific types of UTR, including
+5'-UTR, 3'-UTR, or the Sequence Ontology terms "untranslated_region,"
+"five_prime_untranslated_region," and
+"three_prime_untranslated_region."
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 undef (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       undef (false)
+
+  -description  Whether to draw a description  undef (false)
+
+  -strand_arrow Whether to indicate            undef (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option         Description                  Default
+  ------         -----------                  -------
+
+  -thin_utr      Flag.  If true, UTRs will      undef (false)
+                 be drawn at 2/3 of the
+                 height of CDS segments.
+
+  -utr_color     Color of UTR segments.         Gray #D0D0D0
+
+  -decorate_introns
+                 Draw strand with little arrows undef (false)
+                 on the intron.
+
+  -adjust_exons  Fix exons so that they don't   undef (false)
+                 overlap UTRs
+
+  -implied_utrs  Whether UTRs should be implied undef (false)
+                 from exons and CDS features
+
+  -one_cds       Some databases (e.g. FlyBase) represent their
+                 transcripts as having a single CDS that is
+                 broken up into multiple parts. Set this to
+                 true to display this type of feature.
+
+The B<-adjust_exons> option is needed to handle features in which the
+exons (SO type "exon") overlaps with the UTRs (SO types
+"five_prime_UTR" and "three_prime_UTR").  The exon parts of the glyph
+will be clipped so that it doesn't overlap with the UTR parts.
+
+The B<-implied_utrs> option is needed if there are no explicit UTR
+features.  In this case, UTRs are derived by subtracting the positions
+of "CDS" subfeatures from the positions of "exon" subfeatures.
+B<-implied_utrs> implies the B<-adjust_exons> option.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/protein.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/protein.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/protein.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,313 @@
+package Bio::Graphics::Glyph::protein;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+# turn off description
+sub description { 0 }
+
+# turn off label
+# sub label { 1 }
+
+sub height {
+  my $self = shift;
+  my $font = $self->font;
+  return $self->dna_fits ? 2 * $font->height
+       : $self->do_kd    ? $self->SUPER::height
+       : 0;
+}
+
+sub do_kd {
+  my $self = shift;
+  my $do_kd = $self->option('do_kd');
+  return  if defined($do_kd) && !$do_kd;
+  return  1;
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $protein = eval { $self->feature->seq };
+  $protein = $protein->seq
+      if ref($protein) and $protein->can('seq'); # to catch Bio::PrimarySeqI objects
+  $protein or return;
+
+  # workaround for my misreading of interface -- LS
+  $protein = $protein->seq if ref($protein) && $protein->can('seq');
+
+  if ($self->dna_fits) {
+    $self->draw_protein($gd,$protein,$x1,$y1,$x2,$y2);
+  } elsif ($self->do_kd) {
+    $self->draw_kd_plot($gd,$protein,$x1,$y1,$x2,$y2);
+  }
+}
+
+sub draw_protein {
+  my $self = shift;
+
+  my ($gd,$protein,$x1,$y1,$x2,$y2) = @_;
+  my $pixels_per_base = $self->scale;
+  
+  my $feature = $self->feature;
+
+  my @bases = split '', $protein;
+  my $color = $self->fgcolor;
+  my $font  = $self->font;
+  my $lineheight = $font->height;
+  $y1 -= $lineheight/2 - 3;
+
+  my $start   = $self->panel->left + $self->map_pt($feature->start);
+  my $end     = $self->panel->left + $self->map_pt($feature->end);
+
+  my $offset  = int(($x1-$start-1)/$pixels_per_base);
+
+  for (my $i=$offset;$i<@bases;$i++) {
+    my $x = $start + $i * $pixels_per_base;
+    next if $x+1 < $x1;
+    last if $x > $x2;
+    $gd->char($font,$x+2,$y1,$bases[$i],$color);
+  }
+
+}
+
+sub draw_kd_plot {
+  my $self     = shift;
+  my $gd       = shift;
+  my $protein = shift;
+  my ($x1,$y1,$x2,$y2) = @_;
+
+  my $kd_window = $self->option('kd_window') || 9;
+
+  # Calculate the KD plot ...
+
+  my %scores = ( I => 4.5,
+		 V => 4.2,
+		 L => 3.8,
+		 F => 2.8,
+		 C => 2.5,
+		 M => 1.9,
+		 A => 1.8,
+		 G => -0.4,
+		 T => -0.7,
+		 W => -0.9,
+		 S => -0.8,
+		 Y => -1.3,
+		 P => -1.6,
+		 H => -3.2,
+		 E => -3.5,
+		 Q => -3.5,
+		 D => -3.5,
+		 N => -3.5,
+		 K => -3.9,
+		 R => -4.5,
+	       );
+		 
+  my @datapoints;
+  my @seq = split('', uc($protein));
+
+  $kd_window = $kd_window < scalar(@seq) ? $kd_window : scalar(@seq);
+
+  my $maxkd = 4.5;
+  my $minkd = -4.5;
+
+  my $kd = 0;
+  for (my $i = 0 ; $i < @seq && $i < $kd_window ; $i++) {
+    $kd += $scores{$seq[$i]} || 0;
+  }
+
+  my $content = $kd / $kd_window;
+  push @datapoints, $content;
+
+  for (my $i = $kd_window; $i < @seq; $i++) {
+    $kd -= $scores{$seq[$i-$kd_window]} || 0;
+    $kd += $scores{$seq[$i]} || 0;
+    $content = $kd / $kd_window;
+    push @datapoints, $content;
+  }
+
+  my $scale = $maxkd - $minkd;
+  foreach (my $i = 0; $i < @datapoints; $i++) {
+    $datapoints[$i] = ($datapoints[$i] - $minkd) / $scale;
+  }
+
+  # Calculate values that will be used in the layout
+  
+  my $bin_height = $y2-$y1;
+  my $fgcolor    = $self->fgcolor;
+  my $bgcolor    = $self->factory->translate_color($self->panel->gridcolor);
+  my $axiscolor  = $self->color('axis_color') || $fgcolor;
+
+  # Draw the axes
+  
+  $gd->line($x1,  $y1,        $x1,  $y2,        $axiscolor);
+  $gd->line($x2-2,$y1,        $x2-2,$y2,        $axiscolor);
+  $gd->line($x1,  $y1,        $x1+3,$y1,        $axiscolor);
+  $gd->line($x1,  $y2,        $x1+3,$y2,        $axiscolor);
+  $gd->line($x1,  ($y2+$y1)/2,$x1+3,($y2+$y1)/2,$axiscolor);
+  $gd->line($x2-4,$y1,        $x2-1, $y1,       $axiscolor);
+  $gd->line($x2-4,$y2,        $x2-1, $y2,       $axiscolor);
+  $gd->line($x2-4,($y2+$y1)/2,$x2-1,($y2+$y1)/2,$axiscolor);
+  $gd->line($x1+5,$y2,        $x2-5,$y2,        $bgcolor);
+  $gd->line($x1+5,($y2+$y1)/2,$x2-5,($y2+$y1)/2,$bgcolor);
+  $gd->line($x1+5,$y1,        $x2-5,$y1,        $bgcolor);
+  $gd->string($self->font,$x1+5,$y1,'Kyte-Doolittle hydropathy plot',$axiscolor)
+      if $bin_height > $self->font->height*2;
+
+  $gd->string($self->font,$x2-20,$y1,$maxkd,$axiscolor) 
+    if $bin_height > $self->font->height*2.5;
+  $gd->string($self->font,$x2-20,$y2-$self->font->height,$minkd,$axiscolor) 
+    if $bin_height > $self->font->height*2.5;
+
+  my $graphwidth = $x2 - $x1;
+  my $scale = $graphwidth / (@datapoints + $kd_window - 1);
+  for (my $i = 1; $i < @datapoints; $i++) {
+    my $x = $i + $kd_window / 2;
+    my $xlo = $x1 + ($x - 1) * $scale;
+    my $xhi = $x1 + $x * $scale;
+    my $y = $y2 - ($bin_height*$datapoints[$i]);
+    $gd->line($xlo, $y2 - ($bin_height*$datapoints[$i-1]), 
+	      $xhi, $y, 
+	      $fgcolor);
+  }
+}
+
+sub make_key_feature {
+  my $self = shift;
+  my @gatc = qw(A C D E F G H I K L M N P Q R S T V W Y);
+  my $offset = $self->panel->offset;
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  my $start = $offset+1;
+  my $stop  = $offset+100*$scale;
+  my $feature =
+    Bio::Graphics::Feature->new(-start=> $start,
+				-stop => $stop,
+				-seq  => join('',map{$gatc[rand 4]} (1..500)),
+				-name => $self->option('key'),
+				-strand => '+1',
+			       );
+  $feature;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::protein - The "protein" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws protein sequences.  At high magnifications, this
+glyph will draw the actual amino acids of the sequence.  At low
+magnifications, the glyph will plot the Kyte-Doolite hydropathy.  By
+default, the KD plot will use a window size of 9 residues, but this
+can be changed by specifying the kd_window option.
+
+For this glyph to work, the feature must return a protein sequence
+string in response to the seq() method.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description               Default
+  ------      -----------               -------
+
+  -do_kd      Whether to draw the Kyte-  true
+              Doolittle hydropathy plot
+              at low mags
+
+  -kd_window  Size of the sliding window  9
+  	      to use in the KD hydropathy 
+	      calculation.
+
+  -axis_color Color of the vertical axes  fgcolor
+              in the KD hydropathy plot
+
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Aaron J. Mackey, based on the "dna" glyphy by Lincoln Stein
+E<lt>lstein at cshl.orgE<gt> and Peter Ashton E<lt>pda at sanger.ac.ukE<gt>.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ragged_ends.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ragged_ends.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ragged_ends.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+package Bio::Graphics::Glyph::ragged_ends;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  my $bg = $self->option('bgcolor');
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $zig = $self->option('zig') || 4;
+  my $zag = $self->option('zag') || 4;
+
+  my $polygon = GD::Polygon->new;
+
+  $polygon->addPt($x1, $y1);
+  my $yoff = $y1 + $zig;
+  my $i = 1;
+  if ($self->option("ragged_left")) {
+      while ($yoff <= $y2) {
+	  $polygon->addPt( $x1 + ($i * $zag),
+			   $yoff );
+	  $i = !$i;
+	  $yoff += $zig;
+      }
+  }
+  $polygon->addPt($x1, $y2);
+
+  $polygon->addPt($x2, $y2);
+  $yoff = $y2 - $zig;
+  $i = 1;
+  if ($self->option("ragged_right")) {
+      while ($yoff >= $y1) {
+	  $polygon->addPt( $x2 - ($i * $zag),
+			   $yoff );
+	  $i = !$i;
+	  $yoff -= $zig;
+      }
+  }
+  $polygon->addPt($x2, $y1);
+
+  $polygon->addPt($x1, $y1); # close the polygon
+
+  $gd->polygon($polygon, $fg);
+  $gd->filledPolygon($polygon, $bg) if $bg;
+}
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::ragged_ends - The "ragged ends" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is identical to the "box" glyph except that it will draw the
+subparts of features that contain subfeatures.  The subparts are not
+connected -- use the "segments" glyph for that.  "Generic" is the
+default glyph used when not otherwise specified.
+
+=head2 OPTIONS
+
+This glyph provides two extra options to control whether the right
+and/or left ends of the drawn box are to be drawn "raggedly" with
+zigzags instead of vertical lines.
+
+  Option        Values    Default
+  -raggedleft    0 | 1       1
+  -raggedright   0 | 1       1
+  -zig           > 3         4
+  -zag           > 3         4
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -pad_top      Top padding                    0
+
+  -pad_bottom   Bottom padding                 0
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+-pad_top and -pad_bottom allow you to insert some blank space between
+the glyph's boundary and its contents.  This is useful if you are
+changing the glyph's height dynamically based on its feature's score.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::Graphics::Glyph::xyplot>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Aaron J Mackey E<lt>amackey at pcbi.upenn.eduE<gt>.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_box.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_box.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_box.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,216 @@
+package Bio::Graphics::Glyph::redgreen_box;
+#$Id: redgreen_box.pm,v 1.5.8.2 2006/11/19 17:18:45 lstein Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub bgcolor {
+  my $self = shift;
+  $self->{force_bgcolor};
+}
+
+sub fgcolor {
+  my $self = shift;
+  return $self->option('border') ? $self->SUPER::fgcolor : $self->{force_bgcolor};
+}
+
+sub color_subparts { shift->option('color_subparts') }
+
+sub bump {
+  my $self = shift;
+  return 0 if $self->color_subparts;
+  return $self->SUPER::bump;
+}
+
+sub draw {
+  my $self = shift;
+  my $val  = $self->feature->score;
+
+  # we're going to force all our parts to share the same colors
+  # unless otherwise requested
+  my @parts = $self->parts;
+  @parts    = $self if !@parts && $self->level == 0;
+  unless ($self->color_subparts) {
+    my @rgb   = map {int($_)} HSVtoRGB(120*(1-$val),1,255);
+    my $color =  $self->panel->translate_color(@rgb);
+    $_->{force_bgcolor} = $color foreach @parts;
+  } else {
+    foreach (@parts) {
+      my $val    = $_->feature->score;
+      my @rgb    = map {int($_)} HSVtoRGB(120*(1-$val),1,255);
+      my $color  =  $self->panel->translate_color(@rgb);
+      $_->{force_bgcolor} = $color;
+    }
+  }
+
+  $self->SUPER::draw(@_);
+}
+
+sub HSVtoRGB ($$$) {
+  my ($h,$s,$v)=@_;
+  my ($r,$g,$b,$i,$f,$p,$q,$t);
+
+  if( $s == 0 ) {
+    ## achromatic (grey)
+    return ($v,$v,$v);
+  }
+
+  $h /= 60;                       ## sector 0 to 5
+  $i = int($h);
+  $f = $h - $i;                   ## factorial part of h
+  $p = $v * ( 1 - $s );
+  $q = $v * ( 1 - $s * $f );
+  $t = $v * ( 1 - $s * ( 1 - $f ) );
+  
+  if($i<1) {
+    $r = $v;
+    $g = $t;
+    $b = $p;
+  } elsif($i<2){
+    $r = $q;
+    $g = $v;
+    $b = $p;
+  } elsif($i<3){
+    $r = $p;
+    $g = $v;
+    $b = $t;
+  } elsif($i<4){
+    $r = $p;
+    $g = $q;
+    $b = $v;
+  } elsif($i<5){
+    $r = $t;
+    $g = $p;
+    $b = $v;
+  } else {
+    $r = $v;
+    $g = $p;
+    $b = $q;
+  }
+  return ($r,$g,$b);
+}
+
+sub mMin {
+        my $n=10000000000000;
+        map { $n=($n>$_) ? $_ : $n } @_;
+        return($n);     
+}
+
+sub mMax {
+        my $n=0;
+        map { $n=($n<$_) ? $_ : $n } @_;
+        return($n);     
+}
+
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::redgreen_box - The "redgreen_box" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is similar to the graded_segments glyph except that it
+generates a green-E<gt>red gradient suitable for use with microarray data.
+A feature score of 0 is full green; a feature score of 1.0 is full
+red; intermediate scores are shades of yellow.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+The following glyph-specific option is recognized:
+
+  -border       Draw a fgcolor border around   0 (false)
+                the box
+
+  -color_subparts
+               Give each subpart a separate    0 (false)
+               color based on its score
+
+If the B<-color_subparts> option is true, then the glyph will
+individually coloriz each of its subparts. In addition, internal
+bumping of features will be turned off. This will produce an effect
+similar to graded_segments.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_segment.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_segment.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/redgreen_segment.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,165 @@
+package Bio::Graphics::Glyph::redgreen_segment;
+#$Id: redgreen_segment.pm,v 1.3.10.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::graded_segments);
+
+sub calculate_color {
+  my $self = shift;
+  my $val  = shift;
+  return (0,0,0) unless $val =~ /^[\d.]+$/;
+  return HSVtoRGB(120*(1-$val),1,255);
+}
+
+sub HSVtoRGB ($$$) {
+  my ($h,$s,$v)=@_;
+  my ($r,$g,$b,$i,$f,$p,$q,$t);
+
+  if( $s == 0 ) {
+    ## achromatic (grey)
+    return ($v,$v,$v);
+  }
+
+  $h /= 60;                       ## sector 0 to 5
+  $i = int($h);
+  $f = $h - $i;                   ## factorial part of h
+  $p = $v * ( 1 - $s );
+  $q = $v * ( 1 - $s * $f );
+  $t = $v * ( 1 - $s * ( 1 - $f ) );
+  
+  if($i<1) {
+    $r = $v;
+    $g = $t;
+    $b = $p;
+  } elsif($i<2){
+    $r = $q;
+    $g = $v;
+    $b = $p;
+  } elsif($i<3){
+    $r = $p;
+    $g = $v;
+    $b = $t;
+  } elsif($i<4){
+    $r = $p;
+    $g = $q;
+    $b = $v;
+  } elsif($i<5){
+    $r = $t;
+    $g = $p;
+    $b = $v;
+  } else {
+    $r = $v;
+    $g = $p;
+    $b = $q;
+  }
+  return ($r,$g,$b);
+}
+
+sub mMin {
+        my $n=10000000000000;
+        map { $n=($n>$_) ? $_ : $n } @_;
+        return($n);     
+}
+
+sub mMax {
+        my $n=0;
+        map { $n=($n<$_) ? $_ : $n } @_;
+        return($n);     
+}
+
+
+1;
+
+=head1 NAME
+
+Bio::Graphics::Glyph::redgreen_segments - The "redgreen_segments" glyph
+
+=head1 SYNOPSIS
+
+See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is similar to the graded_segments glyph except that it
+generates a green-E<gt>red gradient suitable for use with microarray data.
+A feature score of 0 is full green; a feature score of 1.0 is full
+red; intermediate scores are shades of yellow.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/repeating_shape.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/repeating_shape.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/repeating_shape.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,145 @@
+package Bio::Graphics::Glyph::repeating_shape;
+# DAS-compatible package to use for drawing a line of repeating shapes
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_width
+{
+  return 10;  
+}
+
+sub default_interval
+{
+  return 10;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  
+  my $width = defined $self->option('width') ? $self->option('width') : $self->default_width;
+  my $interval = defined $self->option('interval') ? $self->option('interval') : $self->default_interval;
+
+  # find the center and vertices
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $bWidth = $x2-$x1;
+  
+  if ($bWidth < $width)
+  {
+    $self->draw_repeating_shape($gd,$x1,$y1,$x2,$y2,$fg);
+    return;
+  }
+  
+  if ($bWidth < $width+2*$interval)
+  {
+    my $leftoverInterval = $bWidth - $width;
+    my $halfInt = $leftoverInterval/2;
+    $halfInt = 0 unless $interval;
+    
+    $gd->line($x1,$y2,$x1+$halfInt,$y2,$fg);
+    $self->draw_repeating_shape($gd,$x1+$halfInt,$y1,$x2-$halfInt,$y2,$fg);
+    $gd->line($x2-$halfInt,$y2,$x2,$y2,$fg);
+    return;
+  }
+  
+  my $count = int ($bWidth / ($width+$interval));
+  my $leftoverInterval = $bWidth % ($width+$interval)+$interval;
+  
+  my $halfInt = $leftoverInterval/2;
+  $halfInt = 0 unless $interval;
+  $gd->line($x1,$y2,$x1+$halfInt,$y2,$fg);
+  foreach (my $i=1; $i<=$count; $i++)
+  {
+    my $shapeStart = $x1 + $halfInt + ($i-1)*($width+$interval);
+    $self->draw_repeating_shape($gd,$shapeStart,$y1,$shapeStart+$width,$y2,$fg);
+    if ($i < $count)
+    {
+      $gd->line($shapeStart+$width,$y2,$shapeStart+$width+$interval,$y2,$fg);  
+    }
+  }
+  $gd->line($x2-$halfInt,$y2,$x2,$y2,$fg);
+}
+
+sub draw_repeating_shape
+{
+	warn "Subclasses must implement 'draw_repeating_shape'!\n";
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::repeating_shape - A glyph that draws the same shape repeatedly. 
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is a generic superclass for drawing the same shape repeatedly.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -width      Width of one tooth            10
+
+  -interval   Interval between teeth        10
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/rndrect.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/rndrect.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/rndrect.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+package Bio::Graphics::Glyph::rndrect;
+
+use strict;
+use base 'Bio::Graphics::Glyph::generic';
+
+# override draw_component to draw an round edge rect rather than a rectangle
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);#$self->bounds(@_);
+
+  my $poly_pkg = $self->polygon_package;
+  my $poly     = $poly_pkg->new();
+  my $boxheight = $y2 - $y1;
+
+  if (($x2-$x1) > 3) {
+      $poly->addPt($x1+1, $y1+1);
+      $poly->addPt($x1+2, $y1);
+      $poly->addPt($x2-2, $y1);
+      $poly->addPt($x2-1, $y1+1);
+      $poly->addPt($x2, $y1 + $boxheight / 2)
+        if (($y2 - $y1) > 6);
+
+      $poly->addPt($x2-1, $y2-1);
+      $poly->addPt($x2-2, $y2);
+      $poly->addPt($x1+2, $y2);
+      $poly->addPt($x1+1, $y2-1);
+      $poly->addPt($x1, $y1 + $boxheight / 2)
+        if (($y2 - $y1) > 6);
+  } else {
+      $poly->addPt($x1, $y1);
+      $poly->addPt($x2, $y1);
+
+      $poly->addPt($x2, $y2);
+      $poly->addPt($x1, $y2);
+  }
+
+  $gd->filledPolygon($poly,$self->fillcolor);
+  $gd->polygon($poly,$self->fgcolor);
+}
+
+# group sets connector to 'solid'
+sub connector {
+  my $self = shift;
+  return $self->SUPER::connector(@_) if $self->all_callbacks;
+  return 'solid';
+}
+
+sub bump {
+  my $self = shift;
+  return $self->SUPER::bump(@_) if $self->all_callbacks;
+  return 0;
+}
+
+
+1;
+
+
+=head1 NAME
+
+Bio::Graphics::Glyph::rndrect - The "round rect" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph was designed to show seq features in round edge rectangles.
+The glyph will be a rectangle if its width is E<lt> 4 pixels
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Shengqiang Shu E<lt>sshu at bdgp.lbl.govE<gt>
+
+Copyright (c) 2001 BDGP
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ruler_arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ruler_arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/ruler_arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,358 @@
+package Bio::Graphics::Glyph::ruler_arrow;
+# package to use for drawing an arrow as ruler (5' and 3' are marked as label)
+
+# $Id: ruler_arrow.pm,v 1.5.12.1 2006/10/02 23:10:20 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Glyph::ruler_arrow - glyph for drawing an arrow as ruler (5' and 3' are marked as label)
+
+=cut
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+my %UNITS = (K => 1000,
+	     M => 1_000_000,
+	     G => 1_000_000_000);
+
+sub pad_bottom {
+  my $self = shift;
+  my $val = $self->SUPER::pad_bottom(@_);
+  $val += $self->font->height if $self->option('tick');
+  $val;
+}
+
+# override draw method
+sub draw {
+  my $self = shift;
+  my $parallel = $self->option('parallel');
+  $parallel = 1 unless defined $parallel;
+  $self->draw_parallel(@_) if $parallel;
+  $self->draw_perpendicular(@_) unless $parallel;
+  $self->draw_label(@_) if ($self->option('label'));
+}
+
+sub draw_perpendicular {
+  my $self = shift;
+  my $gd = shift;
+  my ($dx,$dy) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $ne = $self->option('northeast');
+  my $sw = $self->option('southwest');
+  $ne = $sw = 1 unless defined($ne) || defined($sw);
+
+  # draw a perpendicular arrow at position indicated by $x1
+  my $fg = $self->set_pen;
+  my $a2 = ($y2-$y1)/4;
+
+  my @positions = $x1 == $x2 ? ($x1) : ($x1,$x2);
+  for my $x (@positions) {
+    if ($ne) {
+      $gd->line($x,$y1,$x,$y2,$fg);
+      $gd->line($x-$a2,$y1+$a2,$x,$y1,$fg);
+      $gd->line($x+$a2,$y1+$a2,$x,$y1,$fg);
+    }
+    if ($sw) {
+      $gd->line($x,$y1,$x,$y2,$fg);
+      $gd->line($x-$a2,$y2-$a2,$x,$y2,$fg);
+      $gd->line($x+$a2,$y2-$a2,$x,$y2,$fg);
+    }
+  }
+
+  # add a label if requested
+#  $self->draw_label($gd,$dx,$dy) if ($self->option('label') && !$self->option('ruler'));
+  # this draws the label aligned to the left
+}
+
+sub draw_parallel {
+  my $self = shift;
+  my $gd = shift;
+  my ($dx,$dy) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $fg = $self->set_pen;
+  my $a2 = ($self->height)/2;
+  my $center = $y1+$a2;
+
+  $x1 = $self->panel->left  if $x1 < $self->panel->left;
+  $x2 = $self->panel->right if $x2 > $self->panel->right;
+
+  my ($sw,$ne,$base_w,$base_e) = $self->arrowheads;
+  $gd->line($x1,$center,$x2,$center,$fg);
+  $self->arrowhead($gd,$x1,$center,$a2,-1) if $sw; # west arrow
+  $self->arrowhead($gd,$x2,$center,$a2,+1) if $ne; # east arrow
+  $gd->line($x2,$center-$a2,$x2,$center+$a2,$fg) if $base_e; #east base
+  $gd->line($x1,$center-$a2,$x1,$center+$a2,$fg) if $base_w; #west base
+
+  # turn on ticks
+  if ($self->option('tick')) {
+      local $^W = 0;  # dumb uninitialized variable warning
+    my $font = $self->font;
+    my $width      = $font->width;
+    my $font_color = $self->fontcolor;
+    my $height   = $self->height;
+
+    my $relative = $self->option('relative_coords');
+    my $start    = $relative ? 1 : $self->feature->start;
+    my $stop     = $start + $self->feature->length  - 1;
+
+    my $offset   = $relative ? $self->feature->start-1 : 0;
+    my $reversed = $self->feature->strand < 0;
+
+    my $units = $self->option('units') || '';
+    my $divisor = $UNITS{$units} || 1 if $units;
+
+    my ($major_ticks,$minor_ticks) = $self->panel->ticks($start,$stop,$font,$divisor);
+
+    ## Does the user want to override the internal scale?
+    my $scale = $self->option('scale');
+
+    my $left  = $sw ? $x1+$height : $x1;
+    my $right = $ne ? $x2-$height : $x2;
+
+    my $format = ($major_ticks->[1]-$major_ticks->[0])/($divisor||1) < 1 ? "%.1f$units" : "%d$units";
+
+    for my $i (@$major_ticks) {
+      my $tickpos = $dx + ($reversed ? $self->map_pt($stop - $i + $offset)
+	                             : $self->map_pt($i + $offset));
+      next if $tickpos < $left or $tickpos > $right;
+      $gd->line($tickpos,$center-$a2,$tickpos,$center+$a2,$fg);
+      my $label = $scale ? $i / $scale : $i;
+      if ($units) {
+	my $scaled = $label/$divisor;
+	$label = sprintf($format,$scaled);
+      }
+      my $middle = $tickpos - (length($label) * $width)/2;
+      $gd->string($font,$middle,$center+$a2-1,$label,$font_color)
+        unless ($self->option('no_tick_label'));
+    }
+
+    if ($self->option('tick') >= 2) {
+      my $a4 = $self->height/4;
+      for my $i (@$minor_ticks) {
+	my $tickpos = $dx + ($reversed ? $self->map_pt($stop - $i + $offset)
+	                               : $self->map_pt($i + $offset));
+	next if $tickpos < $left or $tickpos > $right;
+	$gd->line($tickpos,$center-$a4,$tickpos,$center+$a4,$fg);
+      }
+    }
+  }
+
+  # add a label if requested
+#  $self->draw_label($gd,$dx,$dy)       if ($self->option('label');
+#  $self->draw_description($gd,$dx,$dy) if $self->option('description');
+}
+
+sub arrowheads {
+  my $self = shift;
+  my ($ne,$sw,$base_e,$base_w);
+  if ($self->option('double')) {
+    $ne = $sw = 1;
+  } else {
+    $ne   = $self->option('northeast') || $self->option('east');
+    $sw   = $self->option('southwest') || $self->option('west');
+  }
+  # otherwise use strandedness to define the arrow
+  unless (defined($ne) || defined($sw)) {
+    # turn on both if neither specified
+    $ne = 1 if $self->feature->strand > 0;
+    $sw = 1 if $self->feature->strand < 0;
+  }
+  return ($sw,$ne,0,0) unless $self->option('base');
+  return ($sw,$ne,!$sw,!$ne);
+}
+
+sub draw_label {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+
+  my $label5 = "5'";
+  my $label3 = "3'";
+  my $relative = $self->option('relative_coords');
+  my $start    = $relative ? 1 : $self->feature->start;
+  my $stop     = $start + $self->feature->length  - 1;
+
+  my $offset   = $relative ? $self->feature->start-1 : 0;
+  my $reversed = $self->feature->strand < 0;
+
+  my $units = $self->option('units') || '';
+  my $divisor = $UNITS{$units} || 1 if $units;
+
+  my ($major_ticks,$minor_ticks) = $self->panel->ticks($start,$stop,$self->font,$divisor);
+  my $tick_scale = " ($major_ticks bp/";
+  $tick_scale .= ($self->option('tick') >= 2)?"major tick)":"tick)";
+
+  my $top_left_label = $label5;
+  $top_left_label .= $tick_scale if ($self->option('no_tick_label') && $self->option('tick'));
+  #-1 direction mean lower end is 3' (minus strand on top)
+  ($label5, $label3) = ($label3, $label5) if ($self->option('direction') == -1);
+  my $x = $self->left + $left;
+  $x = $self->panel->left + 1 if $x <= $self->panel->left;
+  my $font = $self->option('labelfont') || $self->font;
+  $gd->string($font,
+              $x,
+              $self->top + $top,
+              $top_left_label,
+              $self->fontcolor);
+  my $x1 = $left + $self->panel->right - $font->width*length($label3);
+  $gd->string($font,
+              $x1,
+              $self->top + $top,
+              $label3,
+              $self->fontcolor);
+  if ($self->option('both')) {#minus strand as well
+      $gd->string($font,
+                  $x,
+                  $self->bottom - $self->pad_bottom + $top,
+                  $label3,
+                  $self->fontcolor);
+      my $x1 = $left + $self->panel->right - $font->width*length($label5);
+      $gd->string($font,
+                  $x1,
+                  $self->bottom - $self->pad_bottom + $top,
+                  $label5,
+                  $self->fontcolor);
+  }
+}
+
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::arrow - The "ruler_arrow" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws arrows.  Label, if requested, will be 5' and 3' at both ends
+and tick scale is printed if no_tick_label option is set and tick option set.
+Depending on options, the arrows can be labeled, be oriented vertically 
+or horizontally, or can contain major and minor ticks suitable for use as a scale.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description               Default
+  ------      -----------               -------
+
+  -tick       Whether to draw major         0
+              and minor ticks.
+	      0 = no ticks
+	      1 = major ticks
+	      2 = minor ticks
+  -label      5' at start, 3' at end        0
+              above arrow
+  -both       5', 3' above,                 0
+              and 3', 5' below arrow
+  -direction  0 = ruler is plus strand      0
+              -1 = ruler is minus strand
+
+  -parallel   Whether to draw the arrow     true
+	      parallel to the sequence
+	      or perpendicular to it.
+
+  -northeast  Force a north or east         true
+	      arrowhead(depending 
+	      on orientation)
+
+  -east       synonym of above
+
+  -southwest  Force a south or west         true
+	      arrowhead(depending 
+	      on orientation)
+
+  -west       synonym of above
+
+  -double     force-doubleheaded arrow
+
+  -base       Draw a vertical base at the   false
+              non-arrowhead side
+
+  -scale      Reset the labels on the arrow false
+              to reflect an externally 
+              established scale.
+
+Set -parallel to false to display a point-like feature such as a
+polymorphism, or to indicate an important location.  If the feature
+start == end, then the glyph will draw a single arrow at the
+designated location:
+
+       ^
+       |
+
+Otherwise, there will be two arrows at the start and end:
+
+       ^              ^
+       |              |
+
+Scale: Pass in a externally established scale to reset the labels on
+the arrow.  This is particularly useful for manually constructed
+images where the founding parameters of the panel are not 1-based.
+For example, a genetic map interval ranging from 0.1 - 0.3 can be
+constructed by first multiplying every value by 100. Passing
+
+  arrow(-scale=>100);
+
+will draw tick marks labelled appropriately to your external scale.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Shengqiang Shu E<lt>sshu at bdgp.lbl.govE<gt>
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 BDGP, Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/saw_teeth.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/saw_teeth.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/saw_teeth.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,90 @@
+package Bio::Graphics::Glyph::saw_teeth;
+# DAS-compatible package to use for drawing a line of saw teeth
+
+use strict;
+use base qw(Bio::Graphics::Glyph::repeating_shape);
+
+sub draw_repeating_shape
+{
+  my ($self, $gd, $x1, $y1, $x2, $y2, $fg) = @_;
+  
+  my $midX = ($x2-$x1) / 2 + $x1;
+  $gd->line($x1,$y2,$midX,$y1,$fg);
+  $gd->line($midX,$y1,$x2,$y2,$fg);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::saw_teeth - The "saw teeth" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a line of saw teeth.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -width      Width of one tooth            10
+
+  -interval   Interval between teeth        10
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segmented_keyglyph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segmented_keyglyph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segmented_keyglyph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+package Bio::Graphics::Glyph::segmented_keyglyph;
+
+# $Id: segmented_keyglyph.pm,v 1.6.6.1 2006/10/02 23:10:20 sendu Exp $
+# Don't use this package.  It's just for inheriting the segmented glyph in the panel key.
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub make_key_feature {
+  my $self = shift;
+  my $scale = 1/$self->scale;  # base pairs/pixel
+  # two segments, at pixels 0->50, 60->80
+  my $offset = $self->panel->offset;
+  my $feature =
+    Bio::Graphics::Feature->new(
+				-segments=>[ [ 0*$scale +$offset,50*$scale+$offset],
+					     [60*$scale+$offset, 80*$scale+$offset]
+					   ],
+				-name => $self->make_key_name(),
+				-strand => '+1',
+			       );
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::segmented_keyglyph - The "segmented_keyglyph" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used internally by Bio::Graphics::Panel as a base calss
+for drawing the keys at the bottom of the panel.  It should not be
+used explicitly.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segments.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segments.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/segments.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,780 @@
+package Bio::Graphics::Glyph::segments;
+#$Id: segments.pm,v 1.44.2.5 2006/11/02 03:44:41 lstein Exp $
+
+use strict;
+use Bio::Location::Simple;
+
+use constant RAGGED_START_FUZZ => 25;  # will show ragged ends of alignments
+                                       # up to this many bp.
+
+use constant DEBUG => 0;
+
+# These are just offsets into an array data structure
+use constant TARGET    => 0;
+use constant SRC_START => 1;
+use constant SRC_END   => 2;
+use constant TGT_START => 3;
+use constant TGT_END   => 4;
+
+use base qw(Bio::Graphics::Glyph::segmented_keyglyph Bio::Graphics::Glyph::generic);
+
+my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',n=>'n',
+		  G=>'C',A=>'T',T=>'A',C=>'G',N=>'N');
+
+sub pad_left {
+  my $self = shift;
+  return $self->SUPER::pad_left unless $self->level > 0;
+  my $ragged = $self->option('ragged_start') 
+    ? RAGGED_START_FUZZ 
+    : $self->option('ragged_extra');
+
+  return $self->SUPER::pad_left 
+    unless $self->draw_target && $ragged && $self->dna_fits;
+  my $target = eval {$self->feature->hit} or return $self->SUPER::pad_left;
+
+  return $self->SUPER::pad_left unless $target->start<$target->end && $target->start < $ragged;
+  return ($target->start-1) * $self->scale;
+}
+
+sub pad_right {
+  my $self = shift;
+  return $self->SUPER::pad_right unless $self->level > 0;
+  my $ragged = $self->option('ragged_start') 
+    ? RAGGED_START_FUZZ 
+    : $self->option('ragged_extra');
+  return $self->SUPER::pad_right 
+    unless $self->draw_target && $ragged && $self->dna_fits;
+  my $target = eval {$self->feature->hit} or return $self->SUPER::pad_right;
+  return $self->SUPER::pad_right unless $target->end < $target->start && $target->start < $ragged;
+  return ($target->end-1) * $self->scale;
+}
+
+sub draw_target {
+  my $self = shift;
+  return if $self->option('draw_dna');
+  return $self->option('draw_target');
+}
+
+sub draw_protein_target {
+  my $self = shift;
+  return if $self->option('draw_protein');
+  return $self->option('draw_protein_target');
+  return $self->option('draw_target');
+}
+
+sub height {
+  my $self = shift;
+  my $height = $self->SUPER::height;
+  return $height unless $self->draw_target || $self->draw_protein_target;
+  if ($self->draw_target) {
+    return $height unless $self->dna_fits;
+  }
+  if ($self->draw_protein_target) {
+    return $height unless $self->protein_fits;
+  }
+  my $fontheight = $self->font->height;
+  return $fontheight if $fontheight > $height;
+}
+
+# group sets connector to 'solid'
+sub connector {
+  my $self = shift;
+  return $self->SUPER::connector(@_) if $self->all_callbacks;
+  return ($self->SUPER::connector(@_) || 'solid');
+}
+
+# never allow our components to bump
+sub bump {
+  my $self = shift;
+  return $self->SUPER::bump(@_) if $self->all_callbacks;
+  return 0;
+}
+
+sub maxdepth {
+  my $self = shift;
+  my $md   = $self->Bio::Graphics::Glyph::maxdepth;
+  return $md if defined $md;
+  return 1;
+}
+
+# this was willfully confusing
+#sub fontcolor {
+#  my $self = shift;
+#  return $self->SUPER::fontcolor unless $self->draw_target;# || $self->option('draw_dna');
+#  return $self->SUPER::fontcolor unless $self->dna_fits;
+#  return $self->bgcolor;
+#}
+
+sub draw {
+  my $self = shift;
+
+  my $draw_target         = $self->draw_target;
+  return $self->SUPER::draw(@_) unless $draw_target;
+  return $self->SUPER::draw(@_) unless $self->dna_fits;
+
+  $self->draw_label(@_)       if $self->option('label');
+  $self->draw_description(@_) if $self->option('description');
+  $self->draw_part_labels(@_) if $self->option('part_labels');
+
+  my $drew_sequence;
+
+  if ($draw_target) {
+    return $self->SUPER::draw(@_) unless eval {$self->feature->hit->seq};
+    $drew_sequence = $self->draw_multiple_alignment(@_);
+  }
+
+  my ($gd,$x,$y) = @_;
+  $y  += $self->top + $self->pad_top if $drew_sequence;  # something is wrong - this is a hack/workaround
+  my $connector     =  $self->connector;
+  $self->draw_connectors($gd,$x,$y)
+    if $connector && $connector ne 'none' && $self->level == 0;
+
+}
+
+sub draw_component {
+  my $self = shift;
+  my ($gd,$l,$t) = @_;
+  $self->SUPER::draw_component(@_);
+  return unless $self->option('draw_protein_target') && $self->protein_fits;
+  my $hit      = eval {$self->feature->hit} or return;
+  my $protein  = uc eval {$hit->seq->seq} or return;
+  my ($left,$top,$right,$bottom) = $self->bounds($l,$t);
+
+  my $scale = $self->scale;
+  warn "scale = $scale";
+  my @letters = split '',$protein;
+  my $color = $self->fgcolor;
+  my $font  = $self->font;
+  my $fw    = $font->width;
+  my $strand = $self->feature->strand || 0;
+
+  my $panel_left           = $self->panel->left;
+  my $panel_right          = $self->panel->right;
+
+  my ($x1,$x2)                    = $self->map_no_trunc($self->feature->start,$self->feature->end);
+
+  if ($strand >= 0) {  # + strand features
+    for (0.. at letters-1) {
+      next if $x1 < $panel_left or $x1 > $panel_right;
+      $gd->char($font,$x1+1,$top,$letters[$_],$color);
+    } continue {
+      $x1 += $scale * 3;
+    }
+  } else {             # - strand features
+    for (0.. at letters-1) {
+      next if $x2 < $panel_left or $x2 > $panel_right;
+      $gd->char($font,$x2+1,$top,$letters[$_],$color);
+    } continue {
+      $x2 -= $scale * 3;
+    }
+  }
+}
+
+sub draw_multiple_alignment {
+  my $self = shift;
+  my $gd   = shift;
+  my ($left,$top,$partno,$total_parts) = @_;
+
+  my $flipped              = $self->flip;
+  my $ragged_extra         = $self->option('ragged_start') 
+                               ? RAGGED_START_FUZZ : $self->option('ragged_extra');
+  my $true_target          = $self->option('true_target');
+  my $show_mismatch        = $self->option('show_mismatch');
+  my $do_realign           = $self->option('realign');
+
+  my $pixels_per_base      = $self->scale;
+  my $feature              = $self->feature;
+  my $panel                = $self->panel;
+  my ($abs_start,$abs_end)     = ($feature->start,$feature->end);
+  my ($tgt_start,$tgt_end)     = ($feature->hit->start,$feature->hit->end);
+  my ($panel_start,$panel_end) = ($self->panel->start,$self->panel->end);
+  my $strand               = $feature->strand;
+  my $panel_left           = $self->panel->left;
+  my $panel_right          = $self->panel->right;
+  my $drew_sequence;
+
+  if ($tgt_start > $tgt_end) { #correct for data problems
+    $strand    = -1;
+    ($tgt_start,$tgt_end) = ($tgt_end,$tgt_start);
+  }
+
+  warn "TGT_START..TGT_END = $tgt_start..$tgt_end" if DEBUG;
+
+  my ($bl,$bt,$br,$bb)     = $self->bounds($left,$top);
+  $top = $bt;
+
+  for my $p ($self->parts) {
+    my @bounds = $p->bounds($left,$top);
+    $self->filled_box($gd, at bounds,$self->bgcolor,$self->bgcolor);
+  }
+
+  my @s                     = $self->_subfeat($feature);
+
+  # FIX ME
+  # workaround for features in which top level feature does not have a hit but
+  # subfeatures do. There is total breakage of encapsulation here because sometimes
+  # a chado alignment places the aligned segment in the top-level feature, and sometimes
+  # in the child feature.
+  unless (@s || $feature->isa('Bio::DB::GFF::Feature')) {
+    @s = ($feature);
+  }
+
+  my $can_realign = $do_realign && eval { require Bio::Graphics::Browser::Realign; 1 };
+
+  my (@segments,%strands);
+  for my $s (@s) {
+    my $target = $s->hit;
+    my ($src_start,$src_end) = ($s->start,$s->end);
+    next unless $src_start <= $panel_end && $src_end >= $panel_start;
+
+    my ($tgt_start,$tgt_end) = ($target->start,$target->end);
+
+    my $strand_bug;
+    unless (exists $strands{$target}) {
+      my $strand = $feature->strand;
+      if ($tgt_start > $tgt_end) { #correct for data problems
+	$strand    = -1;
+	($tgt_start,$tgt_end) = ($tgt_end,$tgt_start);
+	$strand_bug++;
+      }
+      $strands{$target} = $strand;
+    }
+
+    # realign for internal gaps, if requested
+    if ($can_realign) {
+      warn   "Realigning [$target,$src_start,$src_end,$tgt_start,$tgt_end].\n" if DEBUG;
+      my ($sdna,$tdna) = ($s->dna,$target->dna);
+      my @result = $self->realign($sdna,$tdna);
+      foreach (@result) {
+	warn "=========> [$target,@$_]\n" if DEBUG;
+	my $a = $strands{$target} >= 0 ? [$target,$_->[0]+$src_start,$_->[1]+$src_start,$_->[2]+$tgt_start,$_->[3]+$tgt_start]
+	                               : [$target,$src_end-$_->[1],$src_end-$_->[0],$_->[2]+$tgt_start,$_->[3]+$tgt_start];
+	warn "[$target,$_->[0]+$src_start,$_->[1]+$src_start,$tgt_end-$_->[3],$tgt_end-$_->[2]]" if DEBUG;
+	warn "=========> [@$a]\n" if DEBUG;
+	warn substr($sdna,     $_->[0],$_->[1]-$_->[0]+1),"\n" if DEBUG;
+	warn substr($tdna,$_->[2],$_->[3]-$_->[2]+1),"\n"      if DEBUG;
+	push @segments,$a;
+      }
+    }
+    else {
+      push @segments,[$target,$src_start,$src_end,$tgt_start,$tgt_end];
+    }
+  }
+
+  # get 'em in the right order so that we don't have to worry about
+  # where the beginning and end are.
+  @segments = sort {$a->[TGT_START]<=>$b->[TGT_START]} @segments;
+
+  # adjust for ragged (nonaligned) ends
+  my ($offset_left,$offset_right) = (0,0);
+  if ($ragged_extra && $ragged_extra > 0) {
+
+    # add a little rag to the left end
+    $offset_left = $segments[0]->[TGT_START] > $ragged_extra ? $ragged_extra : $segments[0]->[TGT_START]-1;
+    if ($strand >= 0) {
+      $offset_left     = $segments[0]->[SRC_START]-1 if $segments[0]->[SRC_START] - $offset_left < 1;
+      $abs_start                -= $offset_left;
+      $tgt_start                -= $offset_left;
+      $segments[0]->[SRC_START] -= $offset_left;
+      $segments[0]->[TGT_START] -= $offset_left;
+    } else {
+      $abs_end                  += $offset_left;
+      $tgt_start                -= $offset_left;
+      $segments[0]->[SRC_END]   += $offset_left;
+      $segments[0]->[TGT_START] -= $offset_left;
+    }
+
+    # add a little rag to the right end - this is complicated because
+    # we don't know what the length of the underlying dna is, so we
+    # use the subfeat method to find out
+    my $current_end     = $segments[-1]->[TGT_END];
+    $offset_right          = length $segments[-1]->[TARGET]->subseq($current_end+1,$current_end+$ragged_extra)->seq;
+    if ($strand >= 0) {
+      $abs_end                 += $offset_right;
+      $tgt_end                 += $offset_left;
+      $segments[-1]->[TGT_END] += $offset_right;
+      $segments[-1]->[SRC_END] += $offset_right;
+    } else {
+      $abs_start                 -= $offset_right;
+      $tgt_end                   += $offset_left;
+      $segments[-1]->[TGT_END]   += $offset_right;
+      $segments[-1]->[SRC_START] -= $offset_right;
+    }
+  }
+
+  # get the DNAs now - a little complicated by the necessity of using
+  # the subseq() method
+  my $ref_dna = $feature->subseq(1-$offset_left,$feature->length+$offset_right)->seq;
+  my $tgt_dna = $feature->hit->subseq(1-$offset_left,$feature->length+$offset_right)->seq;
+
+  # work around changes in the API
+  $ref_dna    = $ref_dna->seq if ref $ref_dna and $ref_dna->can('seq');
+  $tgt_dna    = $tgt_dna->seq if ref $tgt_dna and $tgt_dna->can('seq');
+
+  $ref_dna    = lc $ref_dna;
+  $tgt_dna    = lc $tgt_dna;
+
+  # sanity check.  Let's see if they look like they're lining up
+  warn "$feature dna sanity check:\n$ref_dna\n$tgt_dna\n" if DEBUG;
+
+  # now we're all lined up, and we're going to adjust everything to fall within the bounds
+  # of the left and right panel coordinates
+  my %clip;
+  for my $seg (@segments) {
+
+    my $target = $seg->[TARGET];
+    warn "preclip [@$seg]\n" if DEBUG;
+
+    # left clipping
+    if ( (my $delta = $seg->[SRC_START] - $panel_start) < 0 ) {
+      warn "clip left delta = $delta" if DEBUG;
+      $seg->[SRC_START] = $panel_start;
+      if ($strand >= 0) {
+	$seg->[TGT_START] -= $delta;
+      }
+    }
+
+    # right clipping
+    if ( (my $delta = $panel_end - $seg->[SRC_END]) < 0) {
+      warn "clip right delta = $delta" if DEBUG;
+      $seg->[SRC_END] = $panel_end;
+      if ($strand < 0) {
+	$seg->[TGT_START] -= $delta;
+      }
+    }
+
+    my $length = $seg->[SRC_END]-$seg->[SRC_START]+1;
+    $seg->[TGT_END] = $seg->[TGT_START]+$length-1;
+
+    warn "Clipping gives [@$seg], tgt_start = $tgt_start\n" if DEBUG;
+  }
+
+  # remove segments that got clipped out of existence
+  @segments = grep { $_->[SRC_START]<=$_->[SRC_END] } @segments;
+
+  # relativize coordinates
+  if ($strand < 0) {
+    $ref_dna = $self->reversec($ref_dna);
+    $tgt_dna = $self->reversec($tgt_dna);
+  }
+
+  for my $seg (@segments) {
+    $seg->[SRC_START] -= $abs_start - 1;
+    $seg->[SRC_END]   -= $abs_start - 1;
+    $seg->[TGT_START] -= $tgt_start - 1;
+    $seg->[TGT_END]   -= $tgt_start - 1;
+
+    warn "src segment = $seg->[SRC_START]", "..",$seg->[SRC_END] if DEBUG;
+    warn "tgt segment = $seg->[TGT_START]", "..",$seg->[TGT_END] if DEBUG;
+    if ($strand < 0) {
+      ($seg->[TGT_START],$seg->[TGT_END]) = (length($tgt_dna)-$seg->[TGT_END]+1,length($tgt_dna)-$seg->[TGT_START]+1);
+    }
+    if (DEBUG) {
+      warn "$feature: relativized coordinates = [@$seg]\n";
+      warn $self->_subsequence($ref_dna,$seg->[SRC_START],$seg->[SRC_END]),"\n";
+      warn $self->_subsequence($tgt_dna,$seg->[TGT_START],$seg->[TGT_END]),"\n";
+    }
+  }
+
+  # draw
+  my $color = $self->fgcolor;
+  my $font  = $self->font;
+  my $lineheight = $font->height;
+  my $fontwidth  = $font->width;
+
+  my $mismatch = $self->factory->translate_color($self->option('mismatch_color') || 'lightgrey');
+  my $grey     = $self->factory->translate_color('gray');
+
+  my $base2pixel = 
+    $self->flip ?
+      sub {
+	my ($src,$tgt) = @_;
+	my $a = $fontwidth + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1;    
+	$panel_right - $a;
+      }
+      : sub {
+	my ($src,$tgt) = @_;
+	$fontwidth/2 + $left + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1;    
+      };
+
+  my ($tgt_last_end,$src_last_end,$leftmost,$rightmost);
+  for my $seg (sort {$a->[SRC_START]<=>$b->[SRC_START]} @segments) {
+    my $y = $top-1;
+
+    for (my $i=0; $i<$seg->[SRC_END]-$seg->[SRC_START]+1; $i++) {
+
+      my $src_base = $self->_subsequence($ref_dna,$seg->[SRC_START]+$i,$seg->[SRC_START]+$i);
+      my $tgt_base = $self->_subsequence($tgt_dna,$seg->[TGT_START]+$i,$seg->[TGT_START]+$i);
+#      warn $seg->[TGT_START]+$i,' ',$seg->[TGT_START]+$i if DEBUG;
+      my $x = $base2pixel->($seg->[SRC_START],$i);
+      $leftmost = $x if !defined $leftmost  || $leftmost  > $x;
+      $rightmost= $x if !defined $rightmost || $rightmost < $x;
+
+      next unless $tgt_base && $x >= $panel_left && $x <= $panel_right;
+
+      $self->filled_box($gd,$x-$pixels_per_base/2+2,$y+1,$x+$pixels_per_base/2+1,$y+$lineheight,$mismatch,$mismatch)
+	if $show_mismatch && $tgt_base && $src_base ne $tgt_base && $tgt_base !~ /[nN]/;
+      $tgt_base = $complement{$tgt_base} if $true_target && $strand < 0;
+      $gd->char($font,$x,$y,$tgt_base,$tgt_base =~ /[nN]/ ? $grey : $color);
+
+      $drew_sequence++;
+    }
+
+    # indicate the presence of insertions in the target
+    if (defined $tgt_last_end) {
+      my $delta     = $seg->[TGT_START] - $tgt_last_end;
+      my $src_delta = $seg->[SRC_START] - $src_last_end;
+      if ($delta > 1 and $src_delta > 0) {  # an insertion in the target relative to the source
+	my $gap_left  = $fontwidth + $base2pixel->($src_last_end,0);
+	my $gap_right = $base2pixel->($seg->[SRC_START],0);
+	($gap_left,$gap_right) = ($gap_right+$fontwidth,$gap_left-$fontwidth) if $self->flip;
+	warn "delta=$delta, gap_left=$gap_left, gap_right=$gap_right" if DEBUG;
+
+	if ($delta == $src_delta) {
+	  $gap_left  += $pixels_per_base/2-2;
+	  $gap_right -= $pixels_per_base/2-2;
+	}
+
+	next if $gap_left <= $panel_left || $gap_right >= $panel_right;
+
+	$self->filled_box($gd,$gap_left,$y+1,
+			      $gap_right-2,$y+$lineheight,$mismatch,$mismatch) if
+				$show_mismatch && $gap_left >= $panel_left && $gap_right <= $panel_right;
+
+
+	my $gap_distance             = $gap_right - $gap_left + 1;
+	my $pixels_per_inserted_base = $gap_distance/($delta-1);
+
+ 	if ($pixels_per_inserted_base >= $fontwidth) {  # Squeeze the insertion in
+ 	  for (my $i = 0; $i<$delta-1; $i++) {
+ 	    my $x = $gap_left + ($pixels_per_inserted_base-$fontwidth)/2 + $pixels_per_inserted_base * $i;
+ 	    my $bp = $self->_subsequence($tgt_dna,$tgt_last_end+$i+1,$tgt_last_end+$i+1);
+	    next if $x < $panel_left;
+ 	    $gd->char($font,$x,$y,$bp,$color);
+ 	  }
+	}
+	# stick in a blob
+	$self->_draw_insertion_point($gd,$gap_left,$gap_right,$y,$y+$lineheight,$mismatch) if $delta > 2;
+      }
+      # deal with gaps in the alignment
+      elsif ( (my $delta = $seg->[SRC_START] - $src_last_end) > 1) {
+	for (my $i=0;$i<$delta-1;$i++) {
+	  my $x = $base2pixel->($src_last_end,$i+1);
+	  next if $x > $panel_right;
+	  $self->filled_box($gd,$x-$pixels_per_base/2+2,$y,$x+$pixels_per_base/2+1,$y+$lineheight,$mismatch,$mismatch)
+	    if $show_mismatch;
+	  $gd->char($font,$x,$y,'-',$color);
+	}
+	
+      }
+
+    }
+
+    $tgt_last_end  = $seg->[TGT_END];
+    $src_last_end  = $seg->[SRC_END];
+  }
+
+  # additional fixup -- insert dashes at beginnings and ends of the segments, if they don't span the full
+  # alignment for some reason - THIS SHOULD NOT BE NECESSARY AND INDICATES THAT THIS WHOLE METHOD NEEDS
+  # TO BE REWRITTEN!
+  if (defined $leftmost && $leftmost-$bl > $pixels_per_base) {
+    $gd->char($font,$_,$top-1,'-',$color) for map {$bl+$_*$pixels_per_base} 0..($leftmost-$bl)/$pixels_per_base-1;
+  }
+  if (defined $rightmost && $br-$rightmost > $pixels_per_base) {
+    $gd->char($font,$_,$top-1,'-',$color) for map {$rightmost+$_*$pixels_per_base} (0..($br-$rightmost)/$pixels_per_base);
+  }
+
+  return $drew_sequence;
+}
+
+sub _subsequence {
+  my $self = shift;
+  my ($seq,$start,$end,$strand) = @_;
+  my $sub;
+  if ((defined $strand && $strand < 0)) {
+    my $piece = substr($seq,length($seq)-$end,$end-$start+1);
+    $sub = $self->reversec($piece);
+  } else {
+    $sub = substr($seq,$start-1,$end-$start+1);
+  }
+  return $self->flip ? $complement{$sub} : $sub;
+}
+
+sub realign {
+  my $self = shift;
+  my ($src,$tgt) = @_;
+  return Bio::Graphics::Browser::Realign::align_segs($src,$tgt);
+}
+
+# Override _subfeat() method to make it appear that a top-level feature that
+# has no subfeatures appears as a feature that has a single subfeature.
+# Otherwise at high mags gaps will be drawn as components rather than
+# as connectors.  Because of differing representations of split features
+# in Bio::DB::GFF::Feature and Bio::SeqFeature::Generic, there is
+# some breakage of encapsulation here.
+sub _subfeat {
+  my $self    = shift;
+  my $feature = shift;
+  my @subfeat  = $self->SUPER::_subfeat($feature);
+  return @subfeat if @subfeat;
+  if ($self->level == 0 && !@subfeat && !$self->feature_has_subparts) {
+    return $self->feature;
+  } else {
+    return;
+  }
+}
+
+# draw the classic "i-beam" icon to indicate that an insertion fits between
+# two bases
+# sub _draw_insertion_point {
+#   my $self = shift;
+#   my ($gd,$x,$y,$color) = @_;
+#   my $top    = $y;
+#   $x--;
+#   my $bottom = $y + $self->font->height - 4;
+#   $gd->line($x,$top+2, $x,$bottom-2,$color);
+#   $gd->setPixel($x+1,  $top+1,$color);
+#   $gd->setPixel($x+$_, $top,$color) for (2..3);
+#   $gd->setPixel($x-1,  $top+1,$color);
+#   $gd->setPixel($x-$_, $top,$color) for (2..3);
+
+#   $gd->setPixel($x+1,  $bottom-1,$color);
+#   $gd->setPixel($x+$_, $bottom,  $color) for (2..3);
+#   $gd->setPixel($x-1,  $bottom-1,$color);
+#   $gd->setPixel($x-$_, $bottom,  $color) for (2..3);
+# }
+
+# don't like that -- try drawing carets
+sub _draw_insertion_point {
+   my $self = shift;
+   my ($gd,$left,$right,$top,$bottom,$color) = @_;
+
+   my $poly = GD::Polygon->new();
+   $poly->addPt($left-3,$top+1);
+   $poly->addPt($right+2,$top+1);
+   $poly->addPt(($left+$right)/2-1,$top+3);
+   $gd->filledPolygon($poly,$color);
+
+   $poly = GD::Polygon->new();
+   $poly->addPt($left-3,$bottom);
+   $poly->addPt($right+2,$bottom);
+   $poly->addPt(($left+$right)/2-1,$bottom-2);
+   $gd->filledPolygon($poly,$color);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::segments - The "segments" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing features that consist of discontinuous
+segments.  Unlike "graded_segments" or "alignment", the segments are a
+uniform color and not dependent on the score of the segment.
+
+=head2 METHODS
+
+This module overrides the maxdepth() method to return 1 unless
+explicitly specified by the -maxdepth option. This means that modules
+inheriting from segments will only be presented with one level of
+subfeatures. Override the maxdepth() method to get more levels.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the following glyph-specific options are recognized:
+
+  -draw_dna     If true, draw the dna residues        0 (false)
+                 when magnification level
+                 allows.
+
+  -draw_target  If true, draw the dna residues        0 (false)
+                 of the TARGET sequence when
+                 magnification level allows.
+                 See "Displaying Alignments".
+
+  -draw_protein_target  If true, draw the protein residues        0 (false)
+                 of the TARGET sequence when
+                 magnification level allows.
+                 See "Displaying Alignments".
+
+  -ragged_extra When combined with -draw_target,      0 (false)
+                draw extra bases beyond the end
+                of the alignment. The value is
+                the maximum number of extra
+                bases.
+                See "Displaying Alignments".
+
+  -ragged_start  Deprecated option.  Use
+                 -ragged_extra instead
+
+  -show_mismatch When combined with -draw_target,     0 (false)
+                 highlights mismatched bases in
+                 the mismatch color.  
+                 See "Displaying Alignments".
+
+  -mismatch_color The mismatch color to use           'lightgrey'
+
+  -true_target   Show the target DNA in its native    0 (false)
+                 (plus strand) orientation, even if
+                 the alignment is to the minus strand.
+                 See "Displaying Alignments".
+
+  -realign       Attempt to realign sequences at      0 (false)
+                 high mag to account for indels.
+                 See "Displaying Alignments".
+
+If the -draw_dna flag is set to a true value, then when the
+magnification is high enough, the underlying DNA sequence will be
+shown.  This option is mutually exclusive with -draw_target. See
+Bio::Graphics::Glyph::generic for more details.
+
+The -draw_target, -ragged_extra, and -show_mismatch options only work
+with seqfeatures that implement the hit() method
+(Bio::SeqFeature::SimilarityPair). -draw_target will cause the DNA of
+the hit sequence to be displayed when the magnification is high enough
+to allow individual bases to be drawn. The -ragged_extra option will
+cause the alignment to be extended at the extreme ends by the
+indicated number of bases, and is useful for looking for polyAs and
+cloning sites at the ends of ESTs and cDNAs. -show_mismatch will cause
+mismatched bases to be highlighted in with the color indicated by
+-mismatch_color (default lightgray).
+
+At high magnifications, minus strand matches will automatically be
+shown as their reverse complement (so that the match has the same
+sequence as the plus strand of the source dna).  If you prefer to see
+the actual sequence of the target as it appears on the minus strand,
+then set -true_target to true.
+
+Note that -true_target has the opposite meaning from
+-canonical_strand, which is used in conjunction with -draw_dna to draw
+minus strand features as if they appear on the plus strand.
+
+=head2 Displaying Alignments
+
+When the B<-draw_target> option is true, this glyph can be used to
+display nucleotide alignments such as BLAST, FASTA or BLAT
+similarities.  At high magnification, this glyph will attempt to show
+how the sequence of the source (query) DNA matches the sequence of the
+target (the hit).  For this to work, the feature must implement the
+hit() method, and both the source and the target DNA must be
+available.  If you pass the glyph a series of
+Bio::SeqFeature::SimilarityPair objects, then these criteria will be
+satisified.
+
+Without additional help, this glyph cannot display gapped alignments
+correctly.  To display gapped alignments, you can use the
+Bio::Graphics::Brower::Realign module, which is part of the Generic
+Genome Browser package (http://www.gmod.org).  If you wish to install
+the Realign module and not the rest of the package, here is the
+recipe:
+
+  cd Generic-Genome-Browser-1.XX
+  perl Makefile.PL DO_XS=1
+  make
+  make install_site
+
+If possible, build the gbrowse package with the DO_XS=1 option.  This
+compiles a C-based DP algorithm that both gbrowse and gbrowse_details
+will use if they can.  If DO_XS is not set, then the scripts will use
+a Perl-based version of the algorithm that is 10-100 times slower.
+
+The display of alignments can be tweaked using the -ragged_extra,
+-show_mismatch, -true_target, and -realign options.  See the options
+section for further details.
+
+There is also a B<-draw_protein_target> option, which is designed for
+protein to nucleotide alignments. It draws the target sequence every
+third base pair and is supposed to align correctly with the forward
+and reverse translation glyphs. This option is experimental at the
+moment, and may not work correctly, to use with care.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/so_transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/so_transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/so_transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+package Bio::Graphics::Glyph::so_transcript;
+
+# $Id: so_transcript.pm,v 1.2.6.2 2006/11/17 09:32:42 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::processed_transcript);
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::so_transcript - The sequence ontology transcript glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This is a sequence-ontology compatible glyph, which works hand-in-hand
+with the so_transcript aggregator in BioPerl.
+
+This glyph is identical to "processed_transcript," which is described
+in detail in L<Bio::Graphics::Glyph::processed_transcript>.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::processed_transcript>,
+L<Bio::DB::GFF::Aggregators::so_transcript>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2005 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/span.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/span.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/span.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+package Bio::Graphics::Glyph::span;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::anchored_arrow);
+
+sub no_trunc { 0 }
+
+sub arrowheads {0,0,1,1}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::span - The "span" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a span that looks like this:
+
+    |-----------------------------|
+
+If one or both ends go off the edges of the panel, they are truncated:
+
+ ----------------------|                    left end off picture
+         |----------------------------      right end off picture
+ -------------------------------------      both ends off picture
+
+=head1 OPTIONS
+
+The standard options are recognized.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::anchored_arrow>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/splice_site.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/splice_site.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/splice_site.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,113 @@
+package Bio::Graphics::Glyph::splice_site;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+use constant PWIDTH => 3;
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($left,$top) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds(@_); 
+
+  my $center = int(0.5+($x1+$x2)/2);
+  my $direction = $self->option('direction');
+  
+  my $height    = $y2 - $y1;
+  my $fraction  = $self->option('height_fraction') || 1.0;
+  my $bottom    = $y2;
+  $top          = $y2 - $fraction * $height;
+  
+  # draw the base
+  my $fgcolor = $self->fgcolor;
+  $gd->line($center,$bottom,$center,$top,$fgcolor);
+
+  if ($direction eq 'right') {
+    $gd->line($center,$top,$center + PWIDTH,$top,$fgcolor);
+  } elsif ($direction eq 'left') {
+    $gd->line($center,$top,$center - PWIDTH,$top,$fgcolor);
+  }
+    
+}
+
+1;
+
+
+=head1 NAME
+
+Bio::Graphics::Glyph::splice_site - The "splice_site" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph was designed to show an inverted "L" representing splice
+donors and acceptors.  The vertical part of the L points downwards and
+is positioned in the center of the range (even if the range is quite
+large).  
+
+In addition to the usual glyph options, this glyph recognizes:
+
+   Option            Value              Description
+   ------            -----              -----------
+
+   direction         "left" or "right"  direction the short part of the L
+                                        points
+
+   height_fraction   0.0 - 1.0          fractional height of the glyph,
+                                        usually a callback that uses the
+                                        feature's score to determine its
+                                        height
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::chromosome>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Xiaokang Pan E<lt>pan at cshl.orgE<gt>
+
+Copyright (c) 2001 BDGP
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/text_in_box.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/text_in_box.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/text_in_box.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,130 @@
+package Bio::Graphics::Glyph::text_in_box;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_text
+{
+  return "3'";  
+}
+
+sub default_text_pad
+{
+  return 3;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  
+  my $font = $self->option('labelfont') || $self->font;
+  
+  my $text = defined $self->option('text') ? $self->option('text') : $self->default_text();
+  my $text_pad = defined $self->option('text_pad') ? $self->option('text_pad') : $self->default_text_pad();
+  
+  my $width = $font->width * length $text;
+  my $height = $font->height;
+
+  my $midY = ($y2+$y1) / 2;
+
+  my $poly_pkg = $self->polygon_package;
+  
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($x1,$midY-$height/2-$text_pad);
+  $polygon->addPt($x1+$width+2*$text_pad,$midY-$height/2-$text_pad);
+  $polygon->addPt($x1+$width+2*$text_pad,$midY+$height/2+$text_pad);
+  $polygon->addPt($x1, $midY+$height/2+$text_pad);
+
+  if (defined (my $bgcolor = $self->option('text_bgcolor')))
+  {
+    $gd->filledPolygon($polygon,$self->factory->translate_color($bgcolor));
+  }
+
+  $gd->polygon($polygon,$fg);
+      
+  $gd->string($font, $x1+$text_pad, $midY-$height/2, $text, $self->fontcolor);
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::text_in_box - The "text in box" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws the specified text in a rectangular box.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -text       The text to draw in the box    3'
+
+  -text_pad   The number of pixels to offset 3
+              the box
+
+  -text_bgcolor                             none
+              The background color of the box
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/three_letters.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/three_letters.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/three_letters.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,133 @@
+package Bio::Graphics::Glyph::three_letters;
+# DAS-compatible package to use for drawing a line of groups of three letters
+
+# $Id: three_letters.pm,v 1.2.6.1 2006/10/02 23:10:20 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Glyph::three_letters - DAS-compatible package to use for drawing a line of groups of three letters
+
+=cut
+
+use strict;
+use base qw(Bio::Graphics::Glyph::repeating_shape);
+
+sub pad_top {
+  my $self = shift;
+  my $top = $self->SUPER::pad_top;
+  my $extra = 0.2 * $self->font->height;
+  return $top + $extra;
+}
+
+sub default_interval
+{
+  return 20;  
+}
+
+sub default_text
+{
+	return "CAG";
+}
+
+sub draw_repeating_shape
+{
+  my ($self, $gd, $x1, $y1, $x2, $y2, $fg) = @_;
+  
+  my $text = defined $self->option('text') ? $self->option('text') : $self->default_text();
+  
+  while (length $text < 3)
+  {
+    $text .= " ";  
+  }
+  
+  $text = substr($text,0,3);
+  my @letters = split //, $text;  
+  
+  my $oneThird = ($x2-$x1) / 3;
+  my $secondLetterX = $x1 + $oneThird;
+  my $thirdLetterX = $x1 + 2*$oneThird;
+
+  my $font = $self->option('labelfont') || $self->font;
+  $gd->string($font, $x1, $y2-$font->height, $letters[0], $self->fontcolor);
+  $gd->string($font, $secondLetterX, $y2-1.7*$font->height, $letters[1], $self->fontcolor);
+  $gd->string($font, $thirdLetterX, $y2-$font->height, $letters[2], $self->fontcolor);  
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::three-letters - The "three letters" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws groups of three letters separated by horizontal lines.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+  -text       The three letters to show     "CAG"
+
+  -width      Width of one letter group     20
+
+  -interval   Interval between              10
+              letter groups
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/tic_tac_toe.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/tic_tac_toe.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/tic_tac_toe.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+package Bio::Graphics::Glyph::tic_tac_toe;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_mode
+{
+  return 'x';  
+}
+
+sub default_size
+{
+  return 10;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  
+  my $size = defined $self->option('size') ? $self->option('size') : $self->default_size();
+
+  my $mode= defined $self->option('mode') ? $self->option('mode') : $self->default_mode();
+  
+  my $midY = ($y1+$y2)/2;
+  
+  for (my $i=0; $i<($x2-$x1)/$size; $i++)
+  {
+    my $start = $x1+$i*$size;
+    my $end = $x1+($i+1)*$size;
+    if ($mode eq "x" || ($mode eq "xo" && $i%2==0))
+    {
+      $gd->line($start, $midY-$size/2, $end, $midY+$size/2, $fg);
+      $gd->line($end, $midY-$size/2, $start, $midY+$size/2, $fg);
+    }
+    elsif ($mode eq "o" || ($mode eq "xo" && $i%2==1))
+    {
+      $gd->ellipse(($start+$end)/2, $midY, $size, $size, $fg);
+    }
+  }   
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::tic_tac_toe - The "tic-tac-toe" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a sequence of either 'xxx', 'ooo' or 'xoxo',
+depending on the value of 'mode'.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -mode       One of 'x', 'o', or 'xo'.     'x'
+
+  -size       Size of either 'x' or 'o'     10
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/toomany.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/toomany.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/toomany.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+package Bio::Graphics::Glyph::toomany;
+# DAS-compatible package to use for drawing a box
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+# draw the thing onto a canvas
+# this definitely gets overridden
+sub draw {
+  my $self = shift;
+  my $gd   = shift;
+  my ($left,$top) = @_;
+
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries($left,$top);
+
+#  $self->filled_oval($gd,$x1,$y1,$x2,$y2);
+  for (my $m = 3;$m > 0;$m--){
+    my $stack = $m * $self->height / 2;
+    $self->unfilled_box($gd,$x1-$stack,$y1-$stack,$x2-$stack,$y2-$stack);
+  }
+
+  # add a label if requested
+  $self->draw_label($gd,$left,($top-($self->height*1.1))) if $self->option('label');
+}
+
+sub label {
+  return "too many to display";
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::toomany - The "too many to show" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is intended for features that are too dense to show
+properly.  Mostly a placeholder, it currently shows a filled oval.  If
+you choose a bump of 0, the ovals will overlap, to give a cloud
+effect.
+
+=head2 OPTIONS
+
+There are no glyph-specific options.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/track.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/track.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/track.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,109 @@
+package Bio::Graphics::Glyph::track;
+
+use strict;
+use base qw(Bio::Graphics::Glyph);
+
+# track sets connector to empty
+sub connector {
+  my $self = shift;
+  return $self->SUPER::connector(@_) if $self->all_callbacks;
+  return 'none';
+}
+
+sub draw {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+
+  # the clipping code here prevents poorly-behaving glyphs from
+  # drawing outside the track
+  my @clip;
+  if ($gd->can('clip')) {
+    @clip = $gd->clip();
+    # glyphs are allowed a slop area of ~3 on either side and 6 on the top and bottom
+    # in order to spill out over their boundaries.  Beyond this they start overlapping
+    # with other glyphs in an ugly way.
+    my @cliprect = ($left-$self->panel->pad_left,
+		    $top-6,
+		    $self->panel->right+$self->panel->pad_right,
+		    $top+$self->layout_height+6);
+    $gd->clip(@cliprect);
+  }
+
+  my @parts = $self->parts;
+  for (my $i=0; $i<@parts; $i++) {
+    $parts[$i]->draw_highlight($gd,$left,$top);
+    $parts[$i]->draw($gd,$left,$top,0,1);
+  }
+
+  $gd->clip(@clip) if @clip;
+}
+
+# do nothing for components
+# sub draw_component { }
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::track - The "track" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used internally by Bio::Graphics::Panel for laying out
+tracks.  It should not be used explicitly.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,202 @@
+package Bio::Graphics::Glyph::transcript;
+# $Id: transcript.pm,v 1.23.4.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::segments);
+
+sub pad_left  {
+  my $self = shift;
+  return 0 unless $self->{level} == 0;
+  my $pad  = $self->SUPER::pad_left;
+  my $strand = $self->feature->strand;
+  return $pad unless defined $strand && $strand < 0;
+  return $self->arrow_length > $pad ? $self->arrow_length : $pad;
+}
+
+sub pad_right {
+  my $self = shift;
+  return 0 unless $self->{level} == 0;
+  my $pad = $self->SUPER::pad_right;
+  my $strand = $self->feature->strand;
+  return $pad unless defined($strand) && $strand > 0;
+  return $self->arrow_length > $pad ? $self->arrow_length : $pad;
+}
+
+sub draw_component {
+  my $self = shift;
+  return unless $self->level > 0;
+  $self->SUPER::draw_component(@_);
+}
+
+sub part_label_merge {
+  my $self = shift;
+  my $label = $self->SUPER::part_label_merge;
+  return $label if defined $label;
+  1;
+}
+
+sub draw_connectors {
+  my $self = shift;
+  my $gd = shift;
+  my ($left,$top) = @_;
+
+  $self->SUPER::draw_connectors($gd,$left,$top);
+  my @parts = $self->parts; # or return;
+
+  # H'mmm.  No parts.  Must be in an intron, so draw intron
+  # spanning entire range
+  if (!@parts) {
+    return unless $self->feature_has_subparts;
+    my($x1,$y1,$x2,$y2) = $self->bounds(0,0);
+    $self->_connector($gd,$left,$top,$x1,$y1,$x1,$y2,$x2,$y1,$x2,$y2);
+    @parts = ($self);
+  }
+
+  # flip argument makes this confusing
+  # certainly there's a simpler way to express this idea
+  my $strand    = $self->feature->strand;
+  my ($first,$last) = ($parts[0],$parts[-1]);
+
+  ($first,$last) = ($last,$first) if exists $self->{flip};
+
+  if ($strand > 0) {
+    my($x1,$y1,$x2,$y2) = $last->bounds(@_);
+    my $center = ($y2+$y1)/2;
+    $self->{flip} ?
+	$self->arrow($gd,$x1,$x1-$self->arrow_length,$center)
+      :
+	$self->arrow($gd,$x2,$x2+$self->arrow_length,$center);
+  }
+
+  elsif ($strand < 0) {
+    my($x1,$y1,$x2,$y2) = $first->bounds(@_);
+    my $center = ($y2+$y1)/2;
+    $self->{flip } ?
+	$self->arrow($gd,$x2,$x2+$self->arrow_length,$center)
+      :
+	$self->arrow($gd,$x1,$x1 - $self->arrow_length,$center);
+  }
+}
+
+sub arrow_length {
+  my $self = shift;
+  return $self->option('arrow_length') || 8;
+}
+
+# override option() for force the "hat" type of connector
+sub connector {
+  my $self = shift;
+  return $self->SUPER::connector(@_) if $self->all_callbacks;
+  return ($self->option('connector') || 'hat');
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::transcript - The "transcript" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing transcripts.  It is essentially a
+"segments" glyph in which the connecting segments are hats.  The
+direction of the transcript is indicated by an arrow attached to the
+end of the glyph.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option         Description                  Default
+  ------         -----------                  -------
+
+  -arrow_length  Length of the directional   8
+                 arrow.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript2.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript2.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/transcript2.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,230 @@
+package Bio::Graphics::Glyph::transcript2;
+
+# $Id: transcript2.pm,v 1.28.4.1 2006/10/02 23:10:20 sendu Exp $
+
+use strict;
+use base qw(Bio::Graphics::Glyph::transcript);
+
+use constant MIN_WIDTH_FOR_ARROW => 8;
+
+sub extra_arrow_length {
+  my $self = shift;
+#  return 0 unless $self->{level} == 0;
+  my $strand = $self->feature->strand || 0;
+  $strand *= -1 if $self->{flip};
+  return 0 unless $strand < 0;
+  my $first = ($self->parts)[0] || $self;
+  my @rect  = $first->bounds();
+  my $width = abs($rect[2] - $rect[0]);
+  return 0 if $width >= MIN_WIDTH_FOR_ARROW;
+  return $self->arrow_length;
+}
+
+sub pad_left  {
+   my $self = shift;
+#   return 0 unless $self->{level} == 0;
+   my $pad = $self->Bio::Graphics::Glyph::generic::pad_left;
+   my $extra_arrow_length = $self->extra_arrow_length;
+   if ($self->label_position eq 'left' && $self->label) {
+     return $extra_arrow_length+$pad;
+   } else {
+     return $extra_arrow_length > $pad ? $extra_arrow_length : $pad;
+   }
+}
+
+sub pad_right  {
+  my $self = shift;
+#  return 0 unless $self->{level} == 0;
+  my $pad = $self->Bio::Graphics::Glyph::generic::pad_right;
+  return $pad if $self->{level} > 0;
+  my $last = ($self->parts)[-1] || $self;
+  my @rect  = $last->bounds();
+  my $width = abs($rect[2] - $rect[0]);
+  return $self->SUPER::pad_right if $width < MIN_WIDTH_FOR_ARROW;
+  return $pad
+}
+
+sub draw_connectors {
+  my $self = shift;
+  my ($gd,$dx,$dy) = @_;
+
+  my $part;
+  my $strand = $self->feature->strand;
+  $strand   *= -1 if $self->{flip};  #sigh
+  if (my @parts  = $self->parts) {
+    $part   = $strand >= 0 ? $parts[-1] : $parts[0];
+  } elsif ($self->feature_has_subparts) {
+    # no parts -- so draw an intron spanning whole thing
+    my($x1,$y1,$x2,$y2) = $self->bounds(0,0);
+    $self->_connector($gd,$dx,$dy,$x1,$y1,$x1,$y2,$x2,$y1,$x2,$y2);
+    $part = $self;
+  } else {
+    return;
+  }
+  my @rect   = $part->bounds();
+  my $width  = abs($rect[2] - $rect[0]);
+  my $filled = $width >= MIN_WIDTH_FOR_ARROW;
+
+  if ($filled) {
+    $self->Bio::Graphics::Glyph::generic::draw_connectors(@_);
+  } else {
+    $self->SUPER::draw_connectors(@_);
+  }
+}
+
+sub draw_component {
+  my $self = shift;
+  return unless $self->level > 0;
+
+  my $gd = shift;
+  my ($left,$top) = @_;
+  my @rect = $self->bounds(@_);
+
+  my $f      = $self->feature;
+  my $strand = $f->strand;
+  my $str    = $strand * ($self->{flip} ? -1 : 1);
+
+  my $width = abs($rect[2] - $rect[0]);
+  my $filled = defined($self->{partno}) && $width >= MIN_WIDTH_FOR_ARROW;
+  my ($pwidth) = $gd->getBounds;
+  $filled = 0 if $str < 0 && $rect[0] < $self->panel->pad_left;
+  $filled = 0 if $str > 0 && $rect[2] > $pwidth - $self->panel->pad_right;
+
+  if ($filled) {
+    my ($first,$last)  = ($self->{partno} == 0 , $self->{partno} == $self->{total_parts}-1);
+    ($first,$last)     = ($last,$first) if $self->{flip};
+
+    if ($strand < 0 && $first) { # first exon, minus strand transcript
+      $self->filled_arrow($gd,-1, at rect);
+    } elsif ($strand >= 0 && $last) { # last exon, plus strand
+      $self->filled_arrow($gd,+1, at rect);
+    } else {
+      $self->SUPER::draw_component($gd, at _);
+    }
+  }
+
+  else {
+    $self->SUPER::draw_component($gd, at _);
+  }
+
+}
+
+sub bump {
+  my $self = shift;
+  return $self->SUPER::bump(@_) if $self->all_callbacks;
+  return 0;  # never allow our components to bump
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::transcript2 - The "transcript2" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing transcripts.  It is like "transcript"
+except that if there is sufficient room the terminal exon is shaped
+like an arrow in order to indicate the direction of transcription.  If
+there isn't enough room, a small arrow is drawn.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -strand_arrow Whether to indicate            0 (false)
+                 strandedness
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option         Description                  Default
+  ------         -----------                  -------
+
+  -arrow_length  Length of the directional   8
+                 arrow.
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/translation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/translation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/translation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,492 @@
+package Bio::Graphics::Glyph::translation;
+
+use strict;
+use Bio::Graphics::Util qw(frame_and_offset);
+use base qw(Bio::Graphics::Glyph::generic);
+
+my %default_colors = qw(
+			frame0f  cornflowerblue
+			frame1f  blue
+			frame2f  darkblue
+			frame0r  magenta
+			frame1r  red
+			frame2r  darkred
+		       );
+
+# turn off description
+sub description { 0 }
+
+# turn off label
+# sub label { 1 }
+
+sub default_color {
+  my ($self,$key) = @_;
+  return $self->factory->translate_color($default_colors{$key});
+}
+
+sub height {
+  my $self = shift;
+  my $font = $self->font;
+  my $lines = $self->translation_type eq '3frame' ? 3
+            : $self->translation_type eq '6frame' ? 6
+            : 1;
+  return $self->protein_fits ? $lines*$font->height
+       : $self->SUPER::height;
+}
+
+sub pixels_per_base {
+  my $self = shift;
+  return $self->scale;
+}
+
+sub pixels_per_residue {
+  my $self = shift;
+  return $self->scale * 3;
+}
+
+sub gridcolor {
+  my $self = shift;
+  my $color = $self->option('gridcolor') || 'lightgrey';
+  $self->factory->translate_color($color);
+}
+
+sub show_sequence {
+  my $self = shift;
+  my $show_sequence = $self->option('show_sequence');
+  return 1 unless defined $show_sequence;  # default to true
+  return $show_sequence;
+}
+
+sub triletter_code {
+  my $self = shift;
+  my $triletter_code = $self->option("triletter_code");
+  return 0 unless defined $triletter_code; # default to false
+  return $triletter_code;
+}
+
+sub longprotein_fits {
+  my $self = shift;
+  return unless $self->show_sequence;
+
+  my $pixels_per_residue = $self->pixels_per_residue;
+  my $font               = $self->font;
+  my $font_width         = $font->width * 4; # not 3; leave room for whitespace
+
+  return $pixels_per_residue >= $font_width;
+}
+
+sub translation_type {
+  my $self = shift;
+  return $self->option('translation') || '1frame';
+}
+
+sub arrow_height {
+  my $self = shift;
+  $self->option('arrow_height') || 1;
+}
+
+sub show_stop_codons {
+  my $self = shift;
+  my $show = $self->option('stop_codons');
+  return $show if defined $show;
+  return 1;
+}
+
+sub show_start_codons {
+  my $self = shift;
+  my $show = $self->option('start_codons');
+  return $show if defined $show;
+  return 0;
+}
+
+sub strand {
+  my $self = shift;
+  return $self->option('strand') || '+1';
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);
+
+  my $type   = $self->translation_type;
+  my $strand = $self->strand;
+
+  my @strands =  $type eq '6frame' ? (1,-1)
+	       : $strand > 0       ? (1)
+	       : -1;
+  my @phase = (0,1,2);
+  for my $s (@strands) {
+    for (my $i=0; $i < @phase; $i++) {
+      $self->draw_frame($self->feature,$s,$i,$phase[$i],$gd,$x1,$y1,$x2,$y2);
+    }
+  }
+
+}
+
+sub draw_frame {
+  my $self = shift;
+  my ($feature,$strand,$base_offset,$phase,$gd,$x1,$y1,$x2,$y2) = @_;
+  my ($seq,$pos);
+  $seq = $feature->seq or return; # no sequence, arggh.
+
+  my $strand0 = $strand;
+  $strand *= -1 if $self->{flip};
+
+  $pos = $strand < 0 ? $feature->end : $feature->start;
+
+  my ($frame,$offset) = frame_and_offset($pos,$strand,$phase);
+  # warn "frame=$frame, phase=$phase";
+
+  my ($x1_orig,$x2_orig) = ($x1,$x2);  # remember this for arrowheads
+
+  ($strand >= 0 ? $x1 : $x2) += $self->pixels_per_base * $offset;
+  my $y0 = $y1;
+  my $lh;
+  if ($self->translation_type eq '6frame') {
+    $lh = $self->height / 6;
+    $y1 += $lh * $frame;
+    $y1 += $self->height/2 if $strand < 0;
+  } else {
+    $lh = $self->height / 3;
+    $y1 += $lh * $frame;
+  }
+
+  $y1  = $y0 + ($self->height - ($y1-$y0)) - $lh if $self->{flip};
+
+  $y2 = $y1;
+
+  my $codon_table = $self->option('codontable') || $self->option('geneticcode') || 1;
+
+  # the dreaded difference between a Bio::SeqFeature and a Bio::Seq
+
+  my $realseq  = $self->get_seq($seq);
+  return unless $realseq;
+  $realseq    = $realseq->revcom if $strand < 0;
+
+  my $protein = $realseq->translate(undef,undef,$base_offset,$codon_table)->seq;
+
+  my $k       = $strand >= 0     ? 'f' : 'r';
+
+  my $color   = $self->color("frame$frame$k") ||
+                $self->color("frame$frame") ||
+                $self->default_color("frame$frame$k") || $self->fgcolor;
+
+  my $awo = 0;
+  if ($self->protein_fits) {
+    $self->draw_protein(\$protein,$strand,$color,$gd,$x1,$y1,$x2,$y2);
+    $awo += $self->font->height/2;
+  } else {
+    $self->draw_orfs(\$protein,$strand,$color,$gd,$x1,$y1,$x2,$y2);
+  }
+
+  $strand0 > 0 ? $self->arrowhead($gd,$x2_orig+5,$y1+$awo,3,+1)
+               : $self->arrowhead($gd,$x1_orig-5,$y1+$awo,3,-1)
+
+}
+
+sub draw_protein {
+  my $self = shift;
+  my ($protein,$strand,$color,$gd,$x1,$y1,$x2,$y2) = @_;
+  my $pixels_per_base = $self->pixels_per_base;
+  my $font   = $self->font;
+  my $flip   = $self->{flip};
+  my $left   = $self->panel->left;
+  my $right  = $self->panel->right;
+
+  my $longprotein = $self->triletter_code && $self->longprotein_fits;
+
+  my %abbrev = ( A => "Ala", B => "Asx", C => "Cys", D => "Asp",
+		 E => "Glu", F => "Phe", G => "Gly", H => "His",
+		 I => "Ile", J => "???", K => "Lys", L => "Leu",
+		 M => "Met", N => "Asn", O => "???", P => "Pro",
+		 Q => "Gln", R => "Arg", S => "Ser", T => "Thr",
+		 U => "Sec", V => "Val", W => "Trp", X => "Xaa",
+		 Y => "Tyr", Z => "Glx", '*' => " * ",
+	       );
+
+  my @residues = split '',$$protein;
+  my $fontwidth = $font->width;
+  for (my $i=0;$i<@residues;$i++) {
+    my $x = $strand > 0
+      ? $x1 + 3 * $i * $pixels_per_base
+      : $x2 - 3 * $i * $pixels_per_base - $pixels_per_base;
+    next if $x+1 < $x1;
+    last if $x > $x2;
+    if ($flip) {
+      $x -= $pixels_per_base - $font->width - 1; #align right, not left
+      if ($longprotein) {
+	$gd->string($font,$right-($x-$left+$pixels_per_base)+1,$y1,$abbrev{$residues[$i]},$color);
+      } else {
+	$gd->char($font,$right-($x-$left+$pixels_per_base)+2,$y1,$residues[$i],$color);
+      }
+    } else {
+      if ($longprotein) {
+	$gd->string($font, $x+1, $y1, $abbrev{$residues[$i]}, $color);
+      } else {
+	$gd->char($font,$x+2,$y1,$residues[$i],$color);
+      }
+    }
+  }
+}
+
+sub draw_orfs {
+  my $self     = shift;
+  my ($protein,$strand,$color,$gd,$x1,$y1,$x2,$y2) = @_;
+  my $pixels_per_base = $self->pixels_per_base * 3;
+  $y1++;
+  my $right  = $self->panel->right;
+  my $left   = $self->panel->left;
+  my $flip   = $self->{flip};
+
+  my $gcolor = $self->gridcolor;
+  $gd->line($x1,$y1,$x2,$y1,$gcolor);
+
+  if ($self->show_stop_codons) {
+    my $stops  = $self->find_codons($protein,'*');
+
+    for my $stop (@$stops) {
+      my $pos = $strand > 0 
+	? $x1 + $stop * $pixels_per_base
+        : $x2 - $stop * $pixels_per_base;
+      next if $pos+1 < $x1;
+      last if $pos   > $x2;
+      if ($flip) {
+	$gd->line($right-($pos-$left),$y1-2,$right-($pos-$left),$y1+2,$color);
+      } else {
+	$gd->line($pos,$y1-2,$pos,$y1+2,$color);
+      }
+    }
+  }
+
+  my $arrowhead_height = $self->arrow_height;
+
+  if ($self->show_start_codons) {
+    my $starts  = $self->find_codons($protein,'M');
+
+    for my $start (@$starts) {
+      my $pos = $strand > 0 
+	? $x1 + $start * $pixels_per_base
+        : $x2 - $start * $pixels_per_base;
+      next if $pos+1 < $x1;
+      last if $pos   > $x2;
+      $pos = $self->{flip} ? $right - $pos : $pos;
+
+      # little arrowheads at the start codons
+      $strand > 0 ? $self->arrowhead($gd,$pos-$arrowhead_height,$y1,
+				     $arrowhead_height,+1)
+	          : $self->arrowhead($gd,$pos+$arrowhead_height,$y1,
+				     $arrowhead_height,-1)
+    }
+  }
+  $strand *= -1 if $flip;
+
+}
+
+sub find_codons {
+  my $self    = shift;
+  my $protein = shift;
+  my $codon   = shift || '*';
+  my $pos = -1;
+  my @stops;
+  while ( ($pos = index($$protein,$codon,$pos+1)) >= 0) {
+    push @stops,$pos;
+  }
+  \@stops;
+}
+
+sub make_key_feature {
+  my $self = shift;
+  my @gatc = qw(g a t c);
+  my $offset = $self->panel->offset;
+  my $scale = 1/$self->scale;  # base pairs/pixel
+  my $start = $offset;
+  my $stop  = $offset + 100 * $scale;
+  my $seq   = join('',map{$gatc[rand 4]} (1..500));
+  my $feature =
+    Bio::Graphics::Feature->new(-start=> $start,
+				-end  => $stop,
+				-seq  => $seq,
+				-name => $self->option('key')
+			       );
+  $feature;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::translation - The "6-frame translation" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws the conceptual translation of DNA sequences.  At high
+magnifications, it simply draws lines indicating open reading frames.
+At low magnifications, it draws a conceptual protein translation.
+Options can be used to set 1-frame, 3-frame or 6-frame translations.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 0 (false)
+
+  -connector_color
+                Connector color                black
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option        Description                 Default
+  ------        -----------                 -------
+
+  -translation  Type of translation to      1frame
+                perform.  One of "1frame",
+                "3frame", or "6frame"
+
+  -strand       Forward (+1) or reverse (-1) +1
+                translation.
+
+  -frame0       Color for the first frame    fgcolor
+
+  -frame1       Color for the second frame   fgcolor
+
+  -frame2       Color for the third frame    fgcolor
+
+  -gridcolor    Color for the horizontal     lightgrey
+                lines of the reading frames
+
+  -start_codons Draw little arrowheads       0 (false)
+                indicating start codons
+
+  -stop_codons  Draw little vertical ticks   1 (true)
+                indicating stop codons
+
+  -arrow_height Height of the start codon    1
+                arrowheads
+
+  -show_sequence Show the amino acid sequence 1 (true)
+                if there's room.
+
+  -triletter_code Show the 3-letter amino acid 0 (false)
+                code if there's room
+
+  -codontable   Codon table to use           1 (see Bio::Tools::CodonTable)
+
+=head1 SUGGESTED STANZA FOR GENOME BROWSER
+
+This produces a nice gbrowse display in which the DNA/GC Content glyph
+is sandwiched between the forward and reverse three-frame
+translations.  The frames are color-coordinated with the example
+configuration for the "cds" glyph.
+
+ [TranslationF]
+ glyph        = translation
+ global feature = 1
+ frame0       = cadetblue
+ frame1       = blue
+ frame2       = darkblue
+ height       = 20
+ fgcolor      = purple
+ strand       = +1
+ translation  = 3frame
+ key          = 3-frame translation (forward)
+
+ [DNA/GC Content]
+ glyph        = dna
+ global feature = 1
+ height       = 40
+ do_gc        = 1
+ fgcolor      = red
+ axis_color   = blue
+
+ [TranslationR]
+ glyph        = translation
+ global feature = 1
+ frame0       = darkred
+ frame1       = red
+ frame2       = crimson
+ height       = 20
+ fgcolor      = blue
+ strand       = -1
+ translation  = 3frame
+ key          = 3-frame translation (reverse)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/triangle.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/triangle.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/triangle.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+package Bio::Graphics::Glyph::triangle;
+# DAS-compatible package to use for drawing a triangle
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub pad_left {
+  my $self = shift;
+  my $left = $self->SUPER::pad_left;
+  return $left unless $self->option('point');
+  my $extra = $self->option('height')/3;
+  return $extra > $left ? $extra : $left;
+}
+
+sub pad_right {
+  my $self = shift;
+  my $right = $self->SUPER::pad_right;
+  return $right unless $self->option('point');
+  my $extra = $self->option('height')/3;
+  return $extra > $right ? $extra : $right;
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my $fg = $self->fgcolor;
+  my $orient = $self->option('orient') || 'S';
+
+  # find the center and vertices
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  my $xmid = ($x1+$x2)/2;
+  my $ymid = ($y1+$y2)/2;
+
+  my ($vx1,$vy1,$vx2,$vy2,$vx3,$vy3);
+
+  #make an equilateral
+  my ($p,$q) = ($self->option('height'),($x2-$x1)/2);
+  if ($self->option('point')){
+    $q = $p/sqrt(3); #2;
+    $x1 = $xmid - $q; $x2 = $xmid + $q;
+    $y1 = $ymid - $q; $y2 = $ymid + $q;
+  }
+
+  if   ($orient eq 'S'){$vx1=$x1;$vy1=$y1;$vx2=$x2;$vy2=$y1;$vx3=$xmid;$vy3=$y2;}
+  elsif($orient eq 'N'){$vx1=$x1;$vy1=$y2;$vx2=$x2;$vy2=$y2;$vx3=$xmid;$vy3=$y1;}
+  elsif($orient eq 'W'){$vx1=$x2;$vy1=$y1;$vx2=$x2;$vy2=$y2;$vx3=$x2-$q*2;$vy3=$ymid;}
+  elsif($orient eq 'E'){$vx1=$x1;$vy1=$y1;$vx2=$x1;$vy2=$y2;$vx3=$x1+$q*2;$vy3=$ymid;}
+
+  # now draw the triangle
+  my $poly_pkg = $self->polygon_package;
+  my $poly     = $poly_pkg->new();
+  $poly->addPt($vx1,$vy1);
+  $poly->addPt($vx2,$vy2);
+  $poly->addPt($vx3,$vy3);
+
+  if (my $c = $self->bgcolor){
+    $gd->filledPolygon($poly,$c);
+  }
+  $gd->polygon($poly,$fg);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::triangle - The "triangle" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an equilateral triangle when -point is defined.
+It draws an isoceles triangle otherwise.  It is possible to draw
+the triangle with the base on the N, S, E, or W side.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -point      If true, the triangle         0
+              will drawn at the center
+              of the range, and not scaled
+              to the feature width.
+
+  -orient     On which side shall the       S
+              base be? (NSEW)
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Allen Day E<lt>day at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/two_bolts.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/two_bolts.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/two_bolts.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+package Bio::Graphics::Glyph::two_bolts;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_bolt_height
+{
+  return 10;  
+}
+
+sub default_bolt_length
+{
+  return 20;  
+}
+
+sub default_remainder_length
+{
+  return 10;  
+}
+
+sub default_bolt_color
+{
+  return 'red';  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+
+  my $fg = $self->fgcolor;
+  
+  my $midY1 = $y1+($y2-$y1) / 3;
+  my $midY2 = $y1 + 2*($y2-$y1) / 3;
+    
+  my $bolt_color = defined $self->option('bolt_color') ? $self->option('bolt_color')  : $self->default_bolt_color();
+  $bolt_color = $self->factory->translate_color($bolt_color);
+
+  my $bolt_height = defined $self->option('bolt_height') ? $self->option('bolt_height')  : $self->default_bolt_height();  
+  my $bolt_length = defined $self->option('bolt_length') ? $self->option('bolt_length')  : $self->default_bolt_length();  
+  my $remainder_length = defined $self->option('remainder_length') ? $self->option('remaindert_length')  : $self->default_remainder_length();  
+
+  if ($x2-$x1 < $bolt_length+$remainder_length)
+  {
+    $gd->line($x1, $y1, $x2, $y2, $bolt_color);
+    return;
+  }
+  
+  my $bolt_start = $x2-$bolt_length-$remainder_length;
+  my $step = $bolt_length / 8;
+  my $shift = $bolt_height/2;
+  $gd->line($x1, $midY1, $bolt_start, $midY1, $fg);
+  $self->draw_bolt($gd, $bolt_start, $step, $midY1, $shift, $bolt_color);
+  $gd->line($x2-$remainder_length, $midY1, $x2, $midY1, $fg);
+
+  $bolt_start = $x1+$remainder_length;
+  $gd->line($x1, $midY2, $bolt_start, $midY2, $fg);
+  $self->draw_bolt($gd, $bolt_start, $step, $midY2, $shift, $bolt_color);
+  $gd->line($x1+$bolt_length+$remainder_length, $midY2, $x2, $midY2, $fg);
+  
+  
+}
+
+sub draw_bolt
+{
+  my ($self, $gd, $bolt_start, $step, $y, $shift, $bolt_color) = @_;
+  $gd->line($bolt_start, $y, $bolt_start+$step, $y-$shift, $bolt_color);
+  $gd->line($bolt_start+$step, $y-$shift, $bolt_start+3*$step, $y+$shift, $bolt_color);
+  $gd->line($bolt_start+3*$step, $y+$shift, $bolt_start+5*$step, $y-$shift, $bolt_color);
+  $gd->line($bolt_start+5*$step, $y-$shift, $bolt_start+7*$step, $y+$shift, $bolt_color);
+  $gd->line($bolt_start+7*$step, $y+$shift, $bolt_start+8*$step, $y, $bolt_color);
+  
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::two_bolts - The "two bolts" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws two "bolts" on a line. They look like this;
+
+--------/\/\/\--
+--/\/\/\--------
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -bolt_height  Height of the bolt          10
+
+  -bolt_length  Length of the bolt          20
+
+  -bolt_color   Color of the bolt           red
+
+  -remainder_length
+                Length of the short line    10
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/wave.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/wave.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/wave.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,141 @@
+package Bio::Graphics::Glyph::wave;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+sub default_spread
+{
+  return 0.3;  
+}
+
+sub default_radius
+{
+  return 5;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $spread = defined $self->option('spread') ? $self->option('spread') : $self->default_spread();
+  
+  my $fg = $self->fgcolor;
+
+  my $height = ($y2-$y1)/2;
+  my $midY = $y1 + $height;
+  
+  if ($self->option('circle') == 1)
+  {
+    my $radius = defined $self->option('radius') ? $self->option('radius') : $self->default_radius();
+    $gd->ellipse($x1+$radius,$midY,2*$radius,2*$radius,$fg);
+    $x1 = $x1+2*$radius;
+  }
+  
+  if ($self->option('line') == 1)
+  {
+    if ($x1 < $x2)
+    {
+      $gd->line($x1,$midY,$x2,$midY,$fg);
+    }
+    return;
+  }
+    
+  my ($oldX, $oldY);
+  foreach my $x ($x1..$x2)
+  {
+    my $y = -$height * sin ($spread * ($x-$x1))+$midY;
+    if ($x>$x1)
+    {
+      $gd->line($oldX,$oldY,$x,$y,$fg);
+    }
+    $oldX=$x;
+    $oldY=$y;
+  }
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::wave - The "wave" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws a sine wave with an optional circle in the beginning.
+The wave can also become a straight line.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -spread     The "spread" of the sine curve 0.3
+              Values from 0.1 to 0.5 look best
+
+  -line       Whether  to draw a line         0
+              instead of a wave (1 or 0)
+
+  -circle     Whether to draw a circle        0
+              in the left corner (1 or 0)
+
+  -radius     The radius of the circle        5
+              if present
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/weighted_arrow.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/weighted_arrow.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/weighted_arrow.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,166 @@
+package Bio::Graphics::Glyph::weighted_arrow;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::generic);
+
+use Math::Trig;
+
+sub default_weight_size
+{
+  return 8;  
+}
+
+sub default_length
+{
+  return 20;  
+}
+
+sub default_left_alignment
+{
+  return 1;  
+}
+
+sub draw_component {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  
+  my $fg = $self->fgcolor;
+  my $bg = $self->bgcolor;
+  
+  my $weight_size = defined $self->option('weight_size') ? $self->option('weight_size') : $self->default_weight_size();
+
+  my $length = defined $self->option('length') ? $self->option('length') : $self->default_length();
+  my $left_alignment = defined $self->option('left_alignment') ? $self->option('left_alignment') : $self->default_left_alignment();
+  
+  my ($w_x1, $l_x1);
+  if ($left_alignment == 1)
+  {
+    $w_x1 = $x1+1;
+    $l_x1 = $x1;
+  }
+  else
+  {
+    $w_x1 = $x2-1-$weight_size;
+    $l_x1 = $x2;
+    
+  }
+  
+  my $poly_pkg = $self->polygon_package;
+  my $polygon   = $poly_pkg->new();
+  $polygon->addPt($w_x1,$y2);
+  $polygon->addPt($w_x1,$y2-$weight_size);
+  $polygon->addPt($w_x1+$weight_size,$y2-$weight_size);
+  $polygon->addPt($w_x1+$weight_size,$y2);
+  $gd->filledPolygon($polygon,$bg);
+
+  my $angle = deg2rad(30);
+  my $dx = 6;
+  my $dy = 4;
+  my $midX = $x2-$dx;
+  my $midY = $y1+$dy;
+  
+  $gd->line($l_x1, $y2, $l_x1, $y1+$dy, $fg);
+
+  return unless $left_alignment == 1;
+    
+  return if ($x2-$x1 <= $weight_size);
+  
+  $x2 = $x1+$weight_size+1+$length;
+  $gd->line($l_x1, $midY, $x2, $midY, $fg);
+  $gd->line($x2, $midY, $x2-$dx, $y1, $fg);
+  $gd->line($x2, $midY, $x2-$dx, $y1+2*$dy, $fg);
+   
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::weighted_arrow - The "weighted arrow" glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph draws an arrow which has is "weighted" by a square
+on the left side of the glyph or a "weight" and a vertical line, but
+no arrow on its right side. The arrow/line is drawn with the foreground
+color, the square - with the background color.
+
+The first mode is the default. To get the second mode, specify
+the "left_alignment 0" option.
+
+=head2 OPTIONS
+
+In addition to the common options, the following glyph-specific
+options are recognized:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -weight_size
+              Size of the square            8
+
+  -length     Length of the arrow           20
+
+  -left_alignment
+              Whether the glyph is drawn    1
+              on the left or on the right
+              side of the available space.
+
+  -height     Standard option, but          10
+              important here
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::DB::GFF>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+
+=head1 AUTHOR
+
+Vsevolod (Simon) Ilyushchenko E<lt>simonf at cshl.eduE<gt>.
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/whiskerplot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/whiskerplot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/whiskerplot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,404 @@
+package Bio::Graphics::Glyph::whiskerplot;
+
+use strict;
+use base qw(Bio::Graphics::Glyph::xyplot);
+
+sub lookup_draw_method {
+  my $self = shift;
+  return 'draw_whiskerplot';
+}
+
+sub draw_whiskerplot {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+  my @parts   = $self->parts;
+  my $fgcolor = $self->fgcolor;
+
+  for my $part (@parts) {
+    my ($x1,$y1,$x2,$y2) = $part->calculate_boundaries($left,$top);
+
+    my $y = $part->{_y_position};
+
+    # get the range and tendency
+    if (my $range =  $part->get_range) {
+
+      unshift @$range,undef if @$range == 4;  # backward compatibility
+
+      my ($median,$range_low,$range_high,$lower_quartile,$higher_quartile) = @$range;
+      $y = $part->{_y_position} = $self->score2position($median) if defined $median;
+
+      # draw the quartile box
+      my ($box_top,$box_bottom) = ($y,$y);
+      if (defined $lower_quartile && defined $higher_quartile) {
+	$box_top    = $self->score2position($higher_quartile);
+	$box_bottom = $self->score2position($lower_quartile);
+	$self->filled_box($gd,$x1,$box_top,$x2,$box_bottom);
+      }
+
+      # calculate positions of the range whiskers
+      if (defined $range_low && defined $range_high) {
+	my $range_top    = $self->score2position($range_high);
+	my $range_bottom = $self->score2position($range_low);
+	my $center       = ($x1+$x2)/2;
+	my $whisker_left  = $center-5;
+	my $whisker_right = $center+5;
+	$whisker_left     = $x1 if $whisker_left  < $x1;
+	$whisker_right    = $x2 if $whisker_right > $x2;
+
+	# top whisker
+	$gd->line($center,$box_top,$center,$range_top,$fgcolor);
+	$gd->line($whisker_left,$range_top,$whisker_right,$range_top,$fgcolor);
+
+	# bottom whisker
+	$gd->line($center,$box_bottom,$center,$range_bottom,$fgcolor);
+	$gd->line($whisker_left,$range_bottom,$whisker_right,$range_bottom,$fgcolor);
+      }
+    }
+
+    # draw the median
+    $gd->line($x1,$y,$x2,$y,$fgcolor);
+
+  }
+}
+
+sub get_range {
+  my $self  = shift;
+  my $range = $self->option('range');
+  return $range if defined $range;
+  # otherwise get it from the feature
+  return [$self->feature->get_tag_values('range')];
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::whiskerplot - The whiskerplot glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing features associated with numeric data
+using "box and whisker" style data points, which display the mean
+value, extreme ranges and first and third quartiles (or standard
+deviation). The boxes drawn by this glyph are similar to
+L<http://www.abs.gov.au/websitedbs/D3310116.NSF/0/3c35ac1e828c23ef4a2567ac0020ec8a?OpenDocument>,
+except that they are oriented vertically so that the position and
+height of the box indicates the mean value and spread of the data, and
+the width indicates the genomic extent of the value.
+
+Like the xyplot glyph (from which it inherits the whiskerplot is
+designed to work on a single feature group that contains subfeatures.
+It is the subfeatures that carry the score information. The best way
+to arrange for this is to create an aggregator for the feature.  We'll
+take as an example a histogram of repeat density in which interval are
+spaced every megabase and the score indicates the number of repeats in
+the interval; we'll assume that the database has been loaded in in
+such a way that each interval is a distinct feature with the method
+name "density" and the source name "repeat".  Furthermore, all the
+repeat features are grouped together into a single group (the name of
+the group is irrelevant).  If you are using Bio::DB::GFF and
+Bio::Graphics directly, the sequence of events would look like this:
+
+  my $agg = Bio::DB::GFF::Aggregator->new(-method    => 'repeat_density',
+                                          -sub_parts => 'density:repeat');
+  my $db  = Bio::DB::GFF->new(-dsn=>'my_database',
+                              -aggregators => $agg);
+  my $segment  = $db->segment('Chr1');
+  my @features = $segment->features('repeat_density');
+
+  my $panel = Bio::Graphics::Panel->new;
+  $panel->add_track(\@features,
+                    -glyph => 'xyplot',
+                    -scale => 'both',
+);
+
+If you are using Generic Genome Browser, you will add this to the
+configuration file:
+
+  aggregators = repeat_density{density:repeat}
+                clone alignment etc
+
+Note that it is a good idea to add some padding to the left and right
+of the panel; otherwise the scale will be partially cut off by the
+edge of the image.
+
+The mean (or median) of the data will be taken from the feature
+score. The range and quartile data must either be provided in a
+feature tag named "range", or must be generated dynamically by a
+-range callback option passed to add_track. The data returned by the
+tag or option should be an array reference containing the following
+five fields:
+
+ [$median,$range_low,$range_high,$quartile_low,$quartile_high]
+
+where $range_low and $range_high correspond to the low and high value
+of the "whiskers" and $quartile_low and $quartile_high correspond to
+the low and high value of the "box."
+
+If $median is undef or missing, then the score field of the feature
+will be used instead. It may be useful to repeat the median in the
+score field in any case, in order to allow the minimum and maximum
+range calculations of the graph itself to occur.
+
+See Examples for three ways of generating an image.
+
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes all the options of the
+xyplot glyph, as well as the following glyph-specific option:
+
+  Option         Description                  Default
+  ------         -----------                  -------
+
+  -range        Callback to return median,    none - data comes from feature "range" tag
+                range and quartiles for each
+                sub feature
+
+=head1 EXAMPLES
+
+Here are three examples of how to use this glyph.
+
+=head2 Example 1: Incorporating the numeric data in each subfeature
+
+ #!/usr/bin/perl
+ use strict;
+
+ use Bio::Graphics;
+ use Bio::SeqFeature::Generic;
+
+ my $bsg = 'Bio::SeqFeature::Generic';
+
+ my $feature = $bsg->new(-start=>0,-end=>1000);
+
+ for (my $i=0;$i<1000;$i+=20) {
+   my $y = (($i-500)/10)**2;
+   my $range = make_range($y);
+   my $part = $bsg->new(-start=>$i,-end=>$i+16,
+ 		       -score=>$y,-tag => { range=>$range });
+   $feature->add_SeqFeature($part);
+ }
+
+ my $panel = Bio::Graphics::Panel->new(-length=>1000,-width=>800,-key_style=>'between',
+ 				      -pad_left=>40,-pad_right=>40);
+ $panel->add_track($feature,
+ 		  -glyph=>'arrow',
+ 		  -double=>1,
+ 		  -tick=>2);
+
+ $panel->add_track($feature,
+ 		  -glyph=>'whiskerplot',
+ 		  -scale=>'both',
+ 		  -height=>200,
+ 		  -min_score => -500,
+ 		  -key  =>'Whiskers',
+ 		  -bgcolor => 'orange',
+ 		 );
+ print $panel->png;
+
+ sub make_range {
+   my $score        = shift;
+   my $range_top    = $score + 5*sqrt($score) + rand(50);
+   my $range_bottom = $score - 5*sqrt($score) - rand(50);
+   my $quartile_top    = $score + 2*sqrt($score) + rand(50);
+   my $quartile_bottom = $score - 2*sqrt($score) - rand(50);
+   return [$score,$range_bottom,$range_top,$quartile_bottom,$quartile_top];
+ }
+
+=head2 Example 2: Generating the range data with a callback
+
+ #!/usr/bin/perl
+ use strict;
+
+ use Bio::Graphics;
+ use Bio::SeqFeature::Generic;
+
+ my $bsg = 'Bio::SeqFeature::Generic';
+ my $feature = $bsg->new(-start=>0,-end=>1000);
+
+ for (my $i=0;$i<1000;$i+=20) {
+   my $y = (($i-500)/10)**2;
+   my $part = $bsg->new(-start=>$i,-end=>$i+16,-score=>$y);
+   $feature->add_SeqFeature($part);
+ }
+
+ my $panel = Bio::Graphics::Panel->new(-length=>1000,-width=>800,-key_style=>'between',
+ 				      -pad_left=>40,-pad_right=>40);
+ $panel->add_track($feature,
+ 		  -glyph=>'arrow',
+ 		  -double=>1,
+ 		  -tick=>2);
+
+ $panel->add_track($feature,
+ 		  -glyph=>'whiskerplot',
+ 		  -scale=>'both',
+ 		  -height=>200,
+ 		  -min_score => -500,
+ 		  -key  =>'Whiskers',
+ 		  -bgcolor => 'orange',
+ 		  -range => \&make_range,
+ 		 );
+ print $panel->png;
+
+ sub make_range {
+   my $feature = shift;
+   my $score        = $feature->score;
+   my $range_top    = $score + 5*sqrt($score) + rand(50);
+   my $range_bottom = $score - 5*sqrt($score) - rand(50);
+   my $quartile_top    = $score + 2*sqrt($score) + rand(50);
+   my $quartile_bottom = $score - 2*sqrt($score) - rand(50);
+   return [$score,$range_bottom,$range_top,$quartile_bottom,$quartile_top];
+ }
+
+=head2 Example 3: Generating the image from a FeatureFile
+
+=over 4
+
+=item The file:
+
+ [general]
+ pixels = 840
+ pad_left = 40
+ pad_right = 40
+
+ [contig]
+ glyph     = arrow
+ double    = 1
+ tick      = 2
+
+ [data]
+ glyph     = whiskerplot
+ scale     = both
+ height    = 200
+ min_score = -500
+ max_score = 2800
+ key       = Whiskers
+ bgcolor   = orange
+
+ chr1	.	contig	1	1000	.	.	.	Contig chr1
+ chr1	.	data	0	16	2500	.	.	Dataset data1; range 2209,2769,2368,2619
+ chr1	.	data	20	36	2304	.	.	Dataset data1; range 2051,2553,2163,2435
+ chr1	.	data	40	56	2116	.	.	Dataset data1; range 1861,2384,1983,2253
+ chr1	.	data	60	76	1936	.	.	Dataset data1; range 1706,2181,1819,2059
+ chr1	.	data	80	96	1764	.	.	Dataset data1; range 1516,1995,1646,1849
+ chr1	.	data	100	116	1600	.	.	Dataset data1; range 1359,1834,1513,1699
+ chr1	.	data	120	136	1444	.	.	Dataset data1; range 1228,1654,1330,1565
+ chr1	.	data	140	156	1296	.	.	Dataset data1; range 1105,1520,1198,1385
+ chr1	.	data	160	176	1156	.	.	Dataset data1; range 983,1373,1062,1270
+ chr1	.	data	180	196	1024	.	.	Dataset data1; range 853,1184,914,1116
+ chr1	.	data	200	216	900	.	.	Dataset data1; range 722,1093,801,965
+ chr1	.	data	220	236	784	.	.	Dataset data1; range 621,945,724,859
+ chr1	.	data	240	256	676	.	.	Dataset data1; range 532,833,605,742
+ chr1	.	data	260	276	576	.	.	Dataset data1; range 433,714,485,653
+ chr1	.	data	280	296	484	.	.	Dataset data1; range 331,600,418,545
+ chr1	.	data	300	316	400	.	.	Dataset data1; range 275,535,336,459
+ chr1	.	data	320	336	324	.	.	Dataset data1; range 198,434,270,374
+ chr1	.	data	340	356	256	.	.	Dataset data1; range 167,378,219,322
+ chr1	.	data	360	376	196	.	.	Dataset data1; range 114,303,118,249
+ chr1	.	data	380	396	144	.	.	Dataset data1; range 39,248,87,197
+ chr1	.	data	400	416	100	.	.	Dataset data1; range 17,173,68,141
+ chr1	.	data	420	436	64	.	.	Dataset data1; range -14,125,18,84
+ chr1	.	data	440	456	36	.	.	Dataset data1; range -8,74,11,64
+ chr1	.	data	460	476	16	.	.	Dataset data1; range -46,77,0,43
+ chr1	.	data	480	496	4	.	.	Dataset data1; range -40,43,-7,36
+ chr1	.	data	500	516	0	.	.	Dataset data1; range -43,0,-43,22
+ chr1	.	data	520	536	4	.	.	Dataset data1; range -6,52,-4,54
+ chr1	.	data	540	556	16	.	.	Dataset data1; range -5,38,-27,52
+ chr1	.	data	560	576	36	.	.	Dataset data1; range -43,109,18,66
+ chr1	.	data	580	596	64	.	.	Dataset data1; range -1,134,3,112
+ chr1	.	data	600	616	100	.	.	Dataset data1; range 49,186,69,124
+ chr1	.	data	620	636	144	.	.	Dataset data1; range 79,225,71,169
+ chr1	.	data	640	656	196	.	.	Dataset data1; range 124,289,120,266
+ chr1	.	data	660	676	256	.	.	Dataset data1; range 154,378,197,320
+ chr1	.	data	680	696	324	.	.	Dataset data1; range 220,439,249,396
+ chr1	.	data	700	716	400	.	.	Dataset data1; range 291,511,331,458
+ chr1	.	data	720	736	484	.	.	Dataset data1; range 350,627,400,572
+ chr1	.	data	740	756	576	.	.	Dataset data1; range 446,718,502,633
+ chr1	.	data	760	776	676	.	.	Dataset data1; range 515,833,576,777
+ chr1	.	data	780	796	784	.	.	Dataset data1; range 606,959,724,856
+ chr1	.	data	800	816	900	.	.	Dataset data1; range 747,1058,799,1004
+ chr1	.	data	820	836	1024	.	.	Dataset data1; range 817,1231,958,1089
+ chr1	.	data	840	856	1156	.	.	Dataset data1; range 961,1341,1069,1225
+ chr1	.	data	860	876	1296	.	.	Dataset data1; range 1103,1511,1219,1385
+ chr1	.	data	880	896	1444	.	.	Dataset data1; range 1218,1660,1338,1535
+ chr1	.	data	900	916	1600	.	.	Dataset data1; range 1377,1828,1496,1703
+ chr1	.	data	920	936	1764	.	.	Dataset data1; range 1547,2020,1674,1858
+ chr1	.	data	940	956	1936	.	.	Dataset data1; range 1691,2188,1824,2043
+ chr1	.	data	960	976	2116	.	.	Dataset data1; range 1869,2376,2019,2225
+ chr1	.	data	980	996	2304	.	.	Dataset data1; range 2040,2554,2178,2418
+
+=item The script to render it
+
+ #!/usr/bin/perl
+
+ use strict;
+ use Bio::Graphics::FeatureFile;
+
+ my $data = Bio::Graphics::FeatureFile->new(-file=>'test.gff');
+
+ my(undef,$panel) = $data->render;
+ print $panel->png;
+
+=back
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::anchored_arrow>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::box>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/xyplot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/xyplot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph/xyplot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,669 @@
+package Bio::Graphics::Glyph::xyplot;
+
+use strict;
+#use GD 'gdTinyFont';
+
+use base qw(Bio::Graphics::Glyph::minmax);
+
+use constant DEFAULT_POINT_RADIUS=>4;
+
+my %SYMBOLS = (
+	       triangle => \&draw_triangle,
+	       square   => \&draw_square,
+	       disc     => \&draw_disc,
+	       point    => \&draw_point,
+	      );
+
+# Default pad_left is recursive through all parts. We certainly
+# don't want to do this for all parts in the graph.
+sub pad_left {
+  my $self = shift;
+  return 0 unless $self->level == 0;
+  return $self->SUPER::pad_left(@_);
+}
+
+# Default pad_left is recursive through all parts. We certainly
+# don't want to do this for all parts in the graph.
+sub pad_right {
+  my $self = shift;
+  return 0 unless $self->level == 0;
+  return $self->SUPER::pad_right(@_);
+}
+
+sub point_radius {
+  shift->option('point_radius') || DEFAULT_POINT_RADIUS;
+}
+
+sub pad_top {
+  shift->Bio::Graphics::Glyph::generic::pad_top(@_);
+}
+
+sub pad_bottom {
+  my $self = shift;
+  my $pad  = $self->Bio::Graphics::Glyph::generic::pad_bottom(@_);
+  if ($pad < ($self->font('gdTinyFont')->height)/4) {
+    $pad = ($self->font('gdTinyFont')->height)/4;  # extra room for the scale
+  }
+}
+
+sub default_scale
+{
+  return 'right';
+}
+
+sub draw {
+  my $self = shift;
+
+  my ($gd,$dx,$dy) = @_;
+  my ($left,$top,$right,$bottom) = $self->calculate_boundaries($dx,$dy);
+  my @parts = $self->parts;
+
+  return $self->SUPER::draw(@_) unless @parts > 0;
+
+  my ($min_score,$max_score) = $self->minmax(\@parts);
+
+  my $side = $self->_determine_side();
+
+  # if a scale is called for, then we adjust the max and min to be even
+  # multiples of a power of 10.
+  if ($side) {
+    $max_score = max10($max_score);
+    $min_score = min10($min_score);
+  }
+
+  my $height = $self->height;
+  my $scale  = $max_score > $min_score ? $height/($max_score-$min_score)
+                                       : 1;
+
+  my $x = $left;
+  my $y = $top + $self->pad_top;
+
+  # position of "0" on the scale
+  my $y_origin = $min_score <= 0 ? $bottom - (0 - $min_score) * $scale : $bottom;
+  $y_origin    = $top if $max_score < 0;
+
+  my $clip_ok = $self->option('clip');
+  $self->{_clip_ok}   = $clip_ok;
+  $self->{_scale}     = $scale;
+  $self->{_min_score} = $min_score;
+  $self->{_max_score} = $max_score;
+  $self->{_top}       = $top;
+  $self->{_bottom}    = $bottom;
+
+  # now seed all the parts with the information they need to draw their positions
+  foreach (@parts) {
+    my $s = $_->score;
+    next unless defined $s;
+    $_->{_y_position}   = $self->score2position($s);
+  }
+
+  my $type        = $self->option('graph_type') || $self->option('graphtype') || 'boxes';
+  my $draw_method = $self->lookup_draw_method($type);
+  $self->throw("Invalid graph type '$type'") unless $draw_method;
+  $self->$draw_method($gd,$x,$y,$y_origin);
+
+  $self->_draw_scale($gd,$scale,$min_score,$max_score,$dx,$dy,$y_origin);
+  $self->draw_label(@_)       if $self->option('label');
+  $self->draw_description(@_) if $self->option('description');
+}
+
+sub lookup_draw_method {
+  my $self = shift;
+  my $type = shift;
+
+  return '_draw_histogram'            if $type eq 'histogram';
+  return '_draw_boxes'                if $type eq 'boxes';
+  return '_draw_line'                 if $type eq 'line'   or $type eq 'linepoints';
+  return '_draw_points'               if $type eq 'points' or $type eq 'linepoints';
+}
+
+sub score {
+  my $self    = shift;
+  my $s       = $self->option('score');
+  return $s   if defined $s;
+  return eval { $self->feature->score };
+}
+
+sub score2position {
+  my $self  = shift;
+  my $score = shift;
+
+  return unless defined $score;
+
+  if ($self->{_clip_ok} && $score < $self->{_min_score}) {
+    return $self->{_bottom};
+  }
+
+  elsif ($self->{_clip_ok} && $score > $self->{_max_score}) {
+    return $self->{_top};
+  }
+
+  else {
+    my $position      = ($score-$self->{_min_score}) * $self->{_scale};
+    return $self->{_bottom} - $position;
+  }
+}
+
+sub log10 { log(shift)/log(10) }
+sub max10 {
+  my $a = shift;
+  return 0 if $a==0;
+  return -min10(-$a) if $a<0;
+  return max10($a*10)/10 if $a < 1;
+  
+  my $l=int(log10($a));
+  $l = 10**$l; 
+  my $r = $a/$l;
+  return $r*$l if int($r) == $r;
+  return $l*int(($a+$l)/$l);
+}
+sub min10 {
+  my $a = shift;
+  return 0 if $a==0;
+  return -max10(-$a) if $a<0;
+  return min10($a*10)/10 if $a < 1;
+  
+  my $l=int(log10($a));
+  $l = 10**$l; 
+  my $r = $a/$l; 
+  return $r*$l if int($r) == $r;
+  return $l*int($a/$l);
+}
+
+sub _draw_histogram {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+
+  my @parts  = $self->parts;
+  my $fgcolor = $self->fgcolor;
+
+  # draw each of the component lines of the histogram surface
+  for (my $i = 0; $i < @parts; $i++) {
+    my $part = $parts[$i];
+    my $next = $parts[$i+1];
+    my ($x1,$y1,$x2,$y2) = $part->calculate_boundaries($left,$top);
+    $gd->line($x1,$part->{_y_position},$x2,$part->{_y_position},$fgcolor);
+    next unless $next;
+    my ($x3,$y3,$x4,$y4) = $next->calculate_boundaries($left,$top);
+    if ($x2 == $x3) {# connect vertically to next level
+      $gd->line($x2,$part->{_y_position},$x2,$next->{_y_position},$fgcolor); 
+    } else {
+      $gd->line($x2,$part->{_y_position},$x2,$y2,$fgcolor); # to bottom
+      $gd->line($x2,$y2,$x3,$y2,$fgcolor);                        # to right
+      $gd->line($x3,$y4,$x3,$next->{_y_position},$fgcolor);   # up
+    }
+  }
+
+  # end points: from bottom to first
+  my ($x1,$y1,$x2,$y2) = $parts[0]->calculate_boundaries($left,$top);
+  $gd->line($x1,$y2,$x1,$parts[0]->{_y_position},$fgcolor);
+  # from last to bottom
+  my ($x3,$y3,$x4,$y4) = $parts[-1]->calculate_boundaries($left,$top);
+  $gd->line($x4,$parts[-1]->{_y_position},$x4,$y4,$fgcolor);
+
+  # That's it.  Not too hard.
+}
+
+sub _draw_boxes {
+  my $self = shift;
+  my ($gd,$left,$top,$y_origin) = @_;
+
+  my @parts    = $self->parts;
+  my $fgcolor  = $self->fgcolor;
+  my $bgcolor  = $self->bgcolor;
+  my $lw       = $self->linewidth;
+  my $negative = $self->color('neg_color') || $bgcolor;
+  my $height   = $self->height;
+
+  my $partcolor = $self->code_option('part_color');
+  my $factory  = $self->factory;
+
+  # draw each of the component lines of the histogram surface
+  for (my $i = 0; $i < @parts; $i++) {
+
+    my $part = $parts[$i];
+    my $next = $parts[$i+1];
+
+    my ($color,$negcolor);
+
+    # special check here for the part_color being defined so as not to introduce lots of
+    # checking overhead when it isn't
+    if ($partcolor) {
+      $color    = $factory->translate_color($factory->option($part,'part_color',0,0));
+      $negcolor = $color;
+    } else {
+      $color    = $bgcolor;
+      $negcolor = $negative;
+    }
+
+    # my ($x1,$y1,$x2,$y2) = $part->calculate_boundaries($left,$top);
+    my ($x1,$x2) = ($left+$part->{left},$left+$part->{left}+$part->{width}-1);
+    if ($part->{_y_position} < $y_origin) {
+      $self->filled_box($gd,$x1,$part->{_y_position},$x2,$y_origin,$color,$fgcolor,$lw);
+    } else {
+      $self->filled_box($gd,$x1,$y_origin,$x2,$part->{_y_position},$negcolor,$fgcolor,$lw);
+    }
+  }
+
+  # That's it.
+}
+
+sub _draw_line {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+
+  my @parts  = $self->parts;
+  my $fgcolor = $self->fgcolor;
+  my $bgcolor = $self->bgcolor;
+
+  # connect to center positions of each interval
+  my $first_part = shift @parts;
+  my ($x1,$y1,$x2,$y2) = $first_part->calculate_boundaries($left,$top);
+  my $current_x = ($x1+$x2)/2;
+  my $current_y = $first_part->{_y_position};
+
+  for my $part (@parts) {
+    my ($x1,$x2) = ($left+$part->{left},$left+$part->{left}+$part->{width}-1);
+    my $next_x = ($x1+$x2)/2;
+    my $next_y = $part->{_y_position};
+    $gd->line($current_x,$current_y,$next_x,$next_y,$fgcolor);
+    ($current_x,$current_y) = ($next_x,$next_y);
+  }
+
+}
+
+sub _draw_points {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+  my $symbol_name = $self->option('point_symbol') || 'point';
+  my $symbol_ref  = $SYMBOLS{$symbol_name};
+
+  my @parts   = $self->parts;
+  my $bgcolor = $self->bgcolor;
+  my $pr      = $self->point_radius;
+
+  my $partcolor = $self->code_option('part_color');
+  my $factory  = $self->factory;
+
+  for my $part (@parts) {
+    my ($x1,$x2) = ($left+$part->{left},$left+$part->{left}+$part->{width}-1);
+    my $x = ($x1+$x2)/2;
+    my $y = $part->{_y_position};
+
+    my $color;
+    if ($partcolor) {
+      $color    = $factory->translate_color($factory->option($part,'part_color',0,0));
+    } else {
+      $color    = $bgcolor;
+    }
+
+    $symbol_ref->($gd,$x,$y,$pr,$color);
+  }
+}
+
+sub _determine_side
+{
+  my $self = shift;
+  my $side = $self->option('scale');
+  return if $side eq 'none';
+  $side   ||= $self->default_scale();
+  return $side;
+}
+
+sub _draw_scale {
+  my $self = shift;
+  my ($gd,$scale,$min,$max,$dx,$dy,$y_origin) = @_;
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries($dx,$dy);
+
+  $y2 -= $self->pad_bottom - 1;
+
+  my $side = $self->_determine_side();
+
+  my $fg    = $self->fgcolor;
+  my $font  = $self->font('gdTinyFont');
+
+  $gd->line($x1,$y1,$x1,$y2,$fg) if $side eq 'left'  || $side eq 'both';
+  $gd->line($x2,$y1,$x2,$y2,$fg) if $side eq 'right' || $side eq 'both';
+
+  $gd->line($x1,$y_origin,$x2,$y_origin,$fg);
+
+  my @points = ([$y1,$max],[($y1+$y2)/2,($min+$max)/2],[$y2,$min]);
+  push @points,[$y_origin,0] if ($min < 0 && $max > 0);
+
+  my $last_font_pos = -99999999999;
+
+  for (@points) {
+    $gd->line($x1-3,$_->[0],$x1,$_->[0],$fg) if $side eq 'left'  || $side eq 'both';
+    $gd->line($x2,$_->[0],$x2+3,$_->[0],$fg) if $side eq 'right' || $side eq 'both';
+
+    my $font_pos = $_->[0]-($font->height/2);
+
+    next unless $font_pos > $last_font_pos + $font->height; # prevent labels from clashing
+    if ($side eq 'left' or $side eq 'both') {
+      $gd->string($font,
+		  $x1 - $font->width * length($_->[1]) - 3,$font_pos,
+		  $_->[1],
+		  $fg);
+    }
+    if ($side eq 'right' or $side eq 'both') {
+      $gd->string($font,
+		  $x2 + 5,$font_pos,
+		  $_->[1],
+		  $fg);
+    }
+    $last_font_pos = $font_pos;
+  }
+}
+
+# we are unbumpable!
+sub bump {
+  return 0;
+}
+
+sub connector {
+  my $self = shift;
+  my $type = $self->option('graph_type');
+  return 1 if $type eq 'line' or $type eq 'linepoints';
+}
+
+sub height {
+  my $self = shift;
+  return $self->option('graph_height') || $self->SUPER::height;
+}
+
+sub draw_triangle {
+  my ($gd,$x,$y,$pr,$color) = @_;
+  $pr /= 2;
+  my ($vx1,$vy1) = ($x-$pr,$y+$pr);
+  my ($vx2,$vy2) = ($x,  $y-$pr);
+  my ($vx3,$vy3) = ($x+$pr,$y+$pr);
+  $gd->line($vx1,$vy1,$vx2,$vy2,$color);
+  $gd->line($vx2,$vy2,$vx3,$vy3,$color);
+  $gd->line($vx3,$vy3,$vx1,$vy1,$color);
+}
+sub draw_square {
+  my ($gd,$x,$y,$pr,$color) = @_;
+  $pr /= 2;
+  $gd->line($x-$pr,$y-$pr,$x+$pr,$y-$pr,$color);
+  $gd->line($x+$pr,$y-$pr,$x+$pr,$y+$pr,$color);
+  $gd->line($x+$pr,$y+$pr,$x-$pr,$y+$pr,$color);
+  $gd->line($x-$pr,$y+$pr,$x-$pr,$y-$pr,$color);
+}
+sub draw_disc {
+  my ($gd,$x,$y,$pr,$color) = @_;
+  $gd->arc($x,$y,$pr,$pr,0,360,$color);
+}
+sub draw_point {
+  my ($gd,$x,$y,$pr,$color) = @_;
+  $gd->setPixel($x,$y,$color);
+}
+
+sub keyglyph {
+  my $self = shift;
+
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  my $feature =
+    Bio::Graphics::Feature->new(
+				-segments=>[ [ 0*$scale,9*$scale],
+					     [ 10*$scale,19*$scale],
+					     [ 20*$scale, 29*$scale]
+					   ],
+				-name => 'foo bar',
+				-strand => '+1');
+  ($feature->segments)[0]->score(10);
+  ($feature->segments)[1]->score(50);
+  ($feature->segments)[2]->score(25);
+  my $factory = $self->factory->clone;
+  $factory->set_option(label => 1);
+  $factory->set_option(bump  => 0);
+  $factory->set_option(connector  => 'solid');
+  my $glyph = $factory->make_glyph(0,$feature);
+  return $glyph;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph::xyplot - The xyplot glyph
+
+=head1 SYNOPSIS
+
+  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
+
+=head1 DESCRIPTION
+
+This glyph is used for drawing features that have a position on the
+genome and a numeric value.  It can be used to represent gene
+prediction scores, motif-calling scores, percent similarity,
+microarray intensities, or other features that require a line plot.
+
+The X axis represents the position on the genome, as per all other
+glyphs.  The Y axis represents the score.  Options allow you to set
+the height of the glyph, the maximum and minimum scores, the color of
+the line and axis, and the symbol to draw.
+
+The plot is designed to work on a single feature group that contains
+subfeatures.  It is the subfeatures that carry the score
+information. The best way to arrange for this is to create an
+aggregator for the feature.  We'll take as an example a histogram of
+repeat density in which interval are spaced every megabase and the
+score indicates the number of repeats in the interval; we'll assume
+that the database has been loaded in in such a way that each interval
+is a distinct feature with the method name "density" and the source
+name "repeat".  Furthermore, all the repeat features are grouped
+together into a single group (the name of the group is irrelevant).
+If you are using Bio::DB::GFF and Bio::Graphics directly, the sequence
+of events would look like this:
+
+  my $agg = Bio::DB::GFF::Aggregator->new(-method    => 'repeat_density',
+                                          -sub_parts => 'density:repeat');
+  my $db  = Bio::DB::GFF->new(-dsn=>'my_database',
+                              -aggregators => $agg);
+  my $segment  = $db->segment('Chr1');
+  my @features = $segment->features('repeat_density');
+
+  my $panel = Bio::Graphics::Panel->new(-pad_left=>40,-pad_right=>40);
+  $panel->add_track(\@features,
+                    -glyph => 'xyplot',
+  		    -graph_type=>'points',
+		    -point_symbol=>'disc',
+		    -point_radius=>4,
+		    -scale=>'both',
+		    -height=>200,
+  );
+
+If you are using Generic Genome Browser, you will add this to the
+configuration file:
+
+  aggregators = repeat_density{density:repeat}
+                clone alignment etc
+
+Note that it is a good idea to add some padding to the left and right
+of the panel; otherwise the scale will be partially cut off by the
+edge of the image.
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See
+L<Bio::Graphics::Glyph> for a full explanation.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -label        Whether to draw a label	       0 (false)
+
+  -description  Whether to draw a description  0 (false)
+
+  -hilite       Highlight color                undef (no color)
+
+In addition, the alignment glyph recognizes the following
+glyph-specific options:
+
+  Option         Description                  Default
+  ------         -----------                  -------
+
+  -max_score   Maximum value of the	      Calculated
+               feature's "score" attribute
+
+  -min_score   Minimum value of the           Calculated
+               feature's "score" attribute
+
+  -graph_type  Type of graph to generate.     Histogram
+               Options are: "histogram",
+               "boxes", "line", "points",
+               or "linepoints".
+
+  -point_symbol Symbol to use. Options are    none
+                "triangle", "square", "disc",
+                "point", and "none".
+
+  -point_radius Radius of the symbol, in      4
+                pixels (does not apply
+                to "point")
+
+  -scale        Position where the Y axis     none
+                scale is drawn if any.
+                It should be one of
+                "left", "right", "both" or "none"
+
+  -graph_height Specify height of the graph   Same as the
+                                              "height" option.
+
+  -neg_color   For boxes only, bgcolor for    Same as bgcolor
+               points with negative scores
+
+  -part_color  For boxes & points only,       none
+               bgcolor of each part (should
+               be a callback). Supersedes
+               -neg_color.
+
+  -clip        If min_score and/or max_score  false
+               are manually specified, then
+               setting this to true will
+               cause values outside the
+               range to be clipped.
+
+Note that when drawing scales on the left or right that the scale is
+actually drawn a few pixels B<outside> the boundaries of the glyph.
+You may wish to add some padding to the image using -pad_left and
+-pad_right when you create the panel.
+
+The B<-part_color> option can be used to color each part of the
+graph. Only the "boxes", "points" and "linepoints" styles are
+affected by this.  Here's a simple example:
+
+  $panel->add_track->(\@affymetrix_data,
+                      -glyph      => 'xyplot',
+                      -graph_type => 'boxes',
+                      -part_color => sub {
+                                   my $score = shift->score;
+	                           return 'red' if $score < 0;
+	                           return 'lightblue' if $score < 500;
+                                   return 'blue'      if $score >= 500;
+                                  }
+                      );
+
+=head2 METHODS
+
+For those developers wishing to derive new modules based on this
+glyph, the main method to override is:
+
+=over 4
+
+=item 'method_name' = $glyph-E<gt>lookup_draw_method($type)
+
+This method accepts the name of a graph type (such as 'histogram') and
+returns the name of a method that will be called to draw the contents
+of the graph, for example '_draw_histogram'. This method will be
+called with three arguments:
+
+   $self->$draw_method($gd,$left,$top,$y_origin)
+
+where $gd is the GD object, $left and $top are the left and right
+positions of the whole glyph (which includes the scale and label), and
+$y_origin is the position of the zero value on the y axis (in
+pixels). By the time this method is called, the y axis and labels will
+already have been drawn, and the scale of the drawing (in pixels per
+unit score) will have been calculated and stored in
+$self-E<gt>{_scale}. The y position (in pixels) of each point to graph
+will have been stored into the part, as $part-E<gt>{_y_position}. Hence
+you could draw a simple scatter plot with this code:
+
+ sub lookup_draw_method {
+    my $self = shift;
+    my $type = shift;
+    if ($type eq 'simple_scatterplot') {
+      return 'draw_points';
+    } else {
+      return $self->SUPER::lookup_draw_method($type);
+    }
+ }
+
+ sub draw_points {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+  my @parts   = $self->parts;
+  my $bgcolor = $self->bgcolor;
+
+  for my $part (@parts) {
+    my ($x1,$y1,$x2,$y2) = $part->calculate_boundaries($left,$top);
+    my $x = ($x1+$x2)/2;  # take center
+    my $y = $part->{_y_position};
+    $gd->setPixel($x,$y,$bgcolor);
+ }
+
+=item $y_position = $self-E<gt>score2position($score)
+
+Translate a score into a y pixel position, obeying clipping rules and
+min and max values.
+
+=back
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::anchored_arrow>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::box>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Glyph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1835 @@
+package Bio::Graphics::Glyph;
+
+# $Id: Glyph.pm,v 1.113.4.9 2006/11/29 02:38:33 lstein Exp $
+
+use strict;
+use Carp 'croak','cluck';
+use constant BUMP_SPACING => 2; # vertical distance between bumped glyphs
+use Bio::Root::Version;
+
+use base qw(Bio::Root::Root);
+
+my %LAYOUT_COUNT;
+
+# the CM1 and CM2 constants control the size of the hash used to
+# detect collisions.
+use constant CM1 => 200; # big bin, x axis
+use constant CM2 => 50;  # big bin, y axis
+use constant CM3 => 50;  # small bin, x axis
+use constant CM4 => 50;  # small bin, y axis
+use constant DEBUG => 0;
+
+use constant QUILL_INTERVAL => 8;  # number of pixels between Jim Kent style intron "quills"
+
+# a bumpable graphical object that has bumpable graphical subparts
+
+# args:  -feature => $feature_object (may contain subsequences)
+#        -factory => $factory_object (called to create glyphs for subsequences)
+# In this scheme, the factory decides based on stylesheet information what glyph to
+# draw and what configurations options to us. This allows for heterogeneous tracks.
+sub new {
+  my $class = shift;
+  my %arg = @_;
+
+  my $feature = $arg{-feature} or $class->throw("No feature $class");
+  my $factory = $arg{-factory} || $class->default_factory;
+  my $level   = $arg{-level} || 0;
+  my $flip    = $arg{-flip};
+
+  my $self = bless {},$class;
+  $self->{feature} = $feature;
+  $self->{factory} = $factory;
+  $self->{level}   = $level;
+  $self->{flip}++  if $flip;
+  $self->{top} = 0;
+
+  my $panel = $factory->panel;
+  my $p_start = $panel->start;
+  my $p_end   = $panel->end;
+
+  my @subfeatures;
+  my @subglyphs;
+
+  warn $self if DEBUG;
+  warn $feature if DEBUG;
+
+  @subfeatures         = $self->subfeat($feature);
+
+  if ($self->option('ignore_sub_part')) {
+    my @tmparray;
+    foreach (@subfeatures) {
+      my $type = $_->method;
+
+      my @ignore_list = split /\s+/, $self->option('ignore_sub_part');
+      my $ignore_str  = join('|', @ignore_list);
+
+      unless ($type =~ /$ignore_str/) {
+        push @tmparray, $_;
+      }
+    }
+    @subfeatures = @tmparray;
+  }
+
+  my @visible_subfeatures = grep {$p_start <= $_->end && $p_end >= $_->start} @subfeatures;
+
+  $self->feature_has_subparts(@subfeatures>0);
+
+  if (@visible_subfeatures) {
+    # dynamic glyph resolution
+    @subglyphs = map { $_->[0] }
+          sort { $a->[1] <=> $b->[1] }
+	    map { [$_, $_->left ] }
+	      $factory->make_glyph($level+1, at visible_subfeatures);
+    $self->{parts}   = \@subglyphs;
+  }
+
+  my ($start,$stop) = ($self->start, $self->stop);
+  if (defined $start && defined $stop && $start ne '') {  # more paranoia
+    ($start,$stop) = ($stop,$start) if $start > $stop;  # sheer paranoia
+    # the +1 here is critical for allowing features to meet nicely at nucleotide resolution
+    my ($left,$right) = $factory->map_pt($start,$stop+1);
+    $self->{left}    = $left;
+    $self->{width}   = $right - $left + 1;
+  }
+
+  if (@subglyphs) {
+      my $l            = $subglyphs[0]->left;
+      # this clashes with the pad_left calculation and is unecessary
+      # $self->{left}    = $l if !defined($self->{left}) || $l < $self->{left};
+      my $right        = (
+			  sort { $b<=>$a } 
+			  map {$_->right} @subglyphs)[0];
+      my $w            = $right - $self->{left} + 1;
+      # this clashes with the pad_right calculation and is unecessary
+      # $self->{width}   = $w if !defined($self->{width}) || $w > $self->{width};
+  }
+
+  $self->{point} = $arg{-point} ? $self->height : undef;
+
+  return $self;
+}
+
+sub parts      {
+  my $self = shift;
+  return unless $self->{parts};
+  return wantarray ? @{$self->{parts}} : $self->{parts};
+}
+
+# this is different than parts(). parts() will return subglyphs
+# that are contained within the current viewing range. feature_has_subparts()
+# will return true if the feature has any subparts, even if they are off the
+# screen.
+sub feature_has_subparts {
+  my $self = shift;
+
+  return $self->{feature_has_subparts} = shift if @_;
+  return 0 if $self->maxdepth == 0;
+  my $feature = $self->feature;
+  return 1 if $feature->can('compound') && $feature->compound;
+  return $self->{feature_has_subparts};
+}
+
+sub feature { shift->{feature} }
+sub factory { shift->{factory} }
+sub panel   { shift->factory->panel }
+sub point   { shift->{point}   }
+sub scale   { shift->factory->scale }
+sub flip    {
+  my $self      = shift;
+  my $d         = $self->{flip};
+  $self->{flip} = shift if @_;
+  $d;
+}
+sub start   {
+  my $self = shift;
+  return $self->{start} if exists $self->{start};
+  if ($self->{flip}) {
+    $self->{start} = defined $self->{feature}->end
+                     ? $self->panel->end + 1 - $self->{feature}->end
+                     : 0;
+  } else {
+    $self->{start} = defined $self->{feature}->start
+                     ? $self->{feature}->start
+		     : $self->panel->offset - 1
+  }
+
+  return $self->{start};
+}
+
+sub stop    {
+  my $self = shift;
+  return $self->{stop} if exists $self->{stop};
+  if ($self->{flip}) {
+    $self->{stop} = defined $self->{feature}->start 
+      ? $self->panel->end + 1 - $self->{feature}->start
+      : $self->panel->offset - 1;
+  } else {
+    $self->{stop} = defined $self->{feature}->end
+      ?  $self->{feature}->end
+      : $self->panel->offset+$self->panel->length+1;
+  }
+
+  return $self->{stop}
+}
+sub end     { shift->stop }
+sub length { my $self = shift; $self->stop - $self->start };
+sub score {
+    my $self = shift;
+    return $self->{score} if exists $self->{score};
+    return $self->{score} = ($self->{feature}->score || 0);
+}
+sub strand {
+    my $self = shift;
+    return $self->{strand} if exists $self->{strand};
+    return $self->{strand} = ($self->{feature}->strand || 0);
+}
+sub map_pt  { shift->{factory}->map_pt(@_) }
+sub map_no_trunc { shift->{factory}->map_no_trunc(@_) }
+
+# add a feature (or array ref of features) to the list
+sub add_feature {
+  my $self       = shift;
+  my $factory    = $self->factory;
+
+  for my $feature (@_) {
+    if (ref $feature eq 'ARRAY') {
+      $self->add_group(@$feature);
+    } else {
+      warn $factory if DEBUG;
+      push @{$self->{parts}},$factory->make_glyph(0,$feature);
+    }
+  }
+}
+
+# link a set of features together so that they bump as a group
+sub add_group {
+  my $self = shift;
+  my @features = ref($_[0]) eq 'ARRAY' ? @{$_[0]} : @_;
+  my $f    = Bio::Graphics::Feature->new(
+					 -segments=>\@features,
+					 -type => 'group',
+					);
+  $self->add_feature($f);
+  $f;
+}
+
+sub top {
+  my $self = shift;
+  my $g = $self->{top};
+  $self->{top} = shift if @_;
+  $g;
+}
+sub left {
+  my $self = shift;
+  return $self->{left} - $self->pad_left;
+}
+sub right {
+  my $self = shift;
+  return $self->left + $self->layout_width - 1;
+}
+sub bottom {
+  my $self = shift;
+  $self->top + $self->layout_height - 1;
+}
+sub height {
+  my $self = shift;
+  return $self->{height} if exists $self->{height};
+  my $baseheight = $self->option('height');  # what the factory says
+  return $self->{height} = $baseheight;
+}
+sub width {
+  my $self = shift;
+  my $g = $self->{width};
+  $self->{width} = shift if @_;
+  $g;
+}
+sub layout_height {
+  my $self = shift;
+  return $self->layout;
+}
+sub layout_width {
+  my $self = shift;
+  return $self->width + $self->pad_left + $self->pad_right;
+}
+
+# returns the rectangle that surrounds the physical part of the
+# glyph, excluding labels and other "extra" stuff
+sub calculate_boundaries {return shift->bounds(@_);}
+
+sub bounds {
+  my $self = shift;
+  my ($dx,$dy) = @_;
+  $dx += 0; $dy += 0;
+  ($dx + $self->{left},
+   $dy + $self->top    + $self->pad_top,
+   $dx + $self->{left} + $self->{width} - 1,
+   $dy + $self->bottom - $self->pad_bottom);
+}
+
+sub box {
+  my $self = shift;
+  my @result = ($self->left,$self->top,$self->right,$self->bottom);
+  return @result;
+}
+
+sub unfilled_box {
+  my $self = shift;
+  my $gd   = shift;
+  my ($x1,$y1,$x2,$y2,$fg,$bg,$lw) = @_;
+  $lw = $self->linewidth;
+
+  unless ($fg) {
+      $fg ||= $self->fgcolor;
+  $fg = $self->set_pen($lw,$fg) if $lw > 1;
+  }
+
+  unless ($bg) {
+      $bg ||= $self->bgcolor;
+      $bg = $self->set_pen($lw,$bg) if $lw > 1;
+  }
+
+  # draw a box
+  $gd->rectangle($x1,$y1,$x2,$y2,$fg);
+
+  # if the left end is off the end, then cover over
+  # the leftmost line
+  my ($width) = $gd->getBounds;
+
+  $gd->line($x1,$y1+$lw,$x1,$y2-$lw,$bg)
+    if $x1 < $self->panel->pad_left;
+
+  $gd->line($x2,$y1+$lw,$x2,$y2-$lw,$bg)
+    if $x2 > $width - $self->panel->pad_right;
+}
+
+# return boxes surrounding each part
+sub boxes {
+  my $self = shift;
+
+  my ($left,$top,$parent) = @_;
+  $top  += 0; $left += 0;
+  my @result;
+
+  $self->layout;
+  $parent         ||= $self;
+  my $subparts = $self->box_subparts || 0;
+
+  for my $part ($self->parts) {
+    my $type = $part->feature->primary_tag || '';
+    if ($type eq 'group' or $subparts > $part->level) {
+      push @result,$part->boxes($left,$top+$self->top+$self->pad_top,$parent);
+      next if $type eq 'group';
+    }
+    my ($x1,$y1,$x2,$y2) = $part->box;
+    $x2++ if $x1==$x2;
+    push @result,[$part->feature,
+		  $left + $x1,$top+$self->top+$self->pad_top+$y1,
+		  $left + $x2,$top+$self->top+$self->pad_top+$y2,
+		  $parent];
+  }
+
+  return wantarray ? @result : \@result;
+}
+
+sub box_subparts {
+  my $self = shift;
+  return $self->{box_subparts} if exists $self->{box_subparts};
+  return $self->{box_subparts} = $self->_box_subparts;
+}
+
+sub _box_subparts { shift->option('box_subparts') }
+
+# this should be overridden for labels, etc.
+# allows glyph to make itself thicker or thinner depending on
+# domain-specific knowledge
+sub pad_top {
+  my $self = shift;
+  return 0;
+}
+sub pad_bottom {
+  my $self = shift;
+  return 0;
+}
+sub pad_left {
+  my $self = shift;
+  my @parts = $self->parts or return 0;
+  my $max = 0;
+  foreach (@parts) {
+    my $pl = $_->pad_left;
+    $max = $pl if $max < $pl;
+  }
+  $max;
+}
+sub pad_right {
+  my $self = shift;
+  my @parts = $self->parts or return 0;
+  my $max = 0;
+  foreach (@parts) {
+    my $pr = $_->pad_right;
+    $max = $pr if $max < $pr;
+  }
+  $max;
+}
+
+# move relative to parent
+sub move {
+  my $self = shift;
+  my ($dx,$dy) = @_;
+  $self->{left} += $dx;
+  $self->{top}  += $dy;
+
+  # because the feature parts use *absolute* not relative addressing
+  # we need to move each of the parts horizontally, but not vertically
+  $_->move($dx,0) foreach $self->parts;
+}
+
+# get an option
+sub option {
+  my $self = shift;
+  my $option_name = shift;
+  my @args = ($option_name,@{$self}{qw(partno total_parts)});
+  my $factory = $self->{factory} or return;
+  return $factory->option($self, at args)
+}
+
+# get an option that might be a code reference
+sub code_option {
+  my $self = shift;
+  my $option_name = shift;
+  my $factory = $self->factory or return;
+  $factory->get_option($option_name);
+}
+
+# set an option globally
+sub configure {
+  my $self = shift;
+  my $factory = $self->factory;
+  my $option_map = $factory->option_map;
+  while (@_) {
+    my $option_name  = shift;
+    my $option_value = shift;
+    ($option_name = lc $option_name) =~ s/^-//;
+    $option_map->{$option_name} = $option_value;
+  }
+}
+
+# some common options
+sub color {
+  my $self = shift;
+  my $color = shift;
+  my $index = $self->option($color);
+  # turn into a color index
+  return $self->factory->translate_color($index) if defined $index;
+  return 0;
+}
+
+sub connector {
+  return shift->option('connector', at _);
+}
+
+# return value:
+#              0    no bumping
+#              +1   bump down
+#              -1   bump up
+#              +2   simple bump down
+#              -2   simple bump up
+sub bump {
+  my $self = shift;
+  return $self->option('bump');
+}
+
+# control horizontal and vertical collision control
+sub hbumppad {
+  my $self = shift;
+  return $self->{_hbumppad} if exists $self->{_hbumppad};
+  return $self->{_hbumppad}= $self->option('hbumppad');
+}
+
+# we also look for the "color" option for Ace::Graphics compatibility
+sub fgcolor {
+  my $self  = shift;
+  my $index   = $self->option('color') || $self->option('fgcolor');
+  $index = 'black' unless defined $index;
+  $self->factory->translate_color($index);
+}
+
+#add for compatibility
+sub fillcolor {
+    my $self = shift;
+    return $self->bgcolor;
+}
+
+# we also look for the "background-color" option for Ace::Graphics compatibility
+sub bgcolor {
+  my $self = shift;
+  my $bgcolor = $self->option('bgcolor');
+  my $index = defined $bgcolor ? $bgcolor : $self->option('fillcolor');
+  $index = 'white' unless defined $index;
+  $self->factory->translate_color($index);
+}
+
+sub getfont {
+  my $self    = shift;
+  my $option  = shift || 'font';
+  my $default = shift;
+
+  my $font = $self->option($option) || $default;
+  return unless $font;
+
+  my $img_class = $self->image_class;
+
+  unless (UNIVERSAL::isa($font,$img_class . '::Font')) {
+    my $ref    = {
+		  gdTinyFont       => $img_class->gdTinyFont(),
+		  gdSmallFont      => $img_class->gdSmallFont(),
+		  gdMediumBoldFont => $img_class->gdMediumBoldFont(),
+		  gdLargeFont      => $img_class->gdLargeFont(),
+		  gdGiantFont      => $img_class->gdGiantFont(),
+    		 };
+
+    my $gdfont = $ref->{$font};
+    $self->configure($option => $gdfont);
+    return $gdfont;
+  }
+  return $font;
+}
+
+sub font {
+  my $self = shift;
+  return $self->getfont('font','gdSmallFont');
+}
+
+sub fontcolor {
+  my $self = shift;
+  my $fontcolor = $self->color('fontcolor');
+  return defined $fontcolor ? $fontcolor : $self->fgcolor;
+}
+sub font2color {
+  my $self = shift;
+  my $font2color = $self->color('font2color');
+  return defined $font2color ? $font2color : $self->fgcolor;
+}
+sub tkcolor { # "track color"
+  my $self = shift;
+  $self->option('tkcolor') or return;
+  return $self->color('tkcolor')
+}
+sub connector_color {
+  my $self = shift;
+  $self->color('connector_color') || $self->fgcolor;
+}
+
+sub image_class { shift->{factory}->{panel}->{image_class}; }
+sub polygon_package { shift->{factory}->{panel}->{polygon_package}; }
+
+sub layout_sort {
+    my $self = shift;
+    my $sortfunc;
+
+    my $opt = $self->code_option("sort_order");
+
+    if (!$opt) {
+       $sortfunc = sub { $a->left <=> $b->left };
+    } elsif (ref $opt eq 'CODE') {
+      $self->throw('sort_order subroutines must use the $$ prototype') unless prototype($opt) eq '$$';
+      $sortfunc = $opt;
+    } elsif ($opt =~ /^sub\s+\{/o) {
+       $sortfunc = eval $opt;
+    } else {
+       # build $sortfunc for ourselves:
+       my @sortbys = split(/\s*\|\s*/o, $opt);
+       $sortfunc = 'sub { ';
+       my $sawleft = 0;
+
+       # not sure I can make this schwartzian transformed
+       for my $sortby (@sortbys) {
+	 if ($sortby eq "left" || $sortby eq "default") {
+	   $sortfunc .= '($a->left <=> $b->left) || ';
+	   $sawleft++;
+	 } elsif ($sortby eq "right") {
+	   $sortfunc .= '($a->right <=> $b->right) || ';
+	 } elsif ($sortby eq "low_score") {
+	   $sortfunc .= '($a->score <=> $b->score) || ';
+	 } elsif ($sortby eq "high_score") {
+	   $sortfunc .= '($b->score <=> $a->score) || ';
+	 } elsif ($sortby eq "longest") {
+	   $sortfunc .= '(($b->length) <=> ($a->length)) || ';
+	 } elsif ($sortby eq "shortest") {
+	   $sortfunc .= '(($a->length) <=> ($b->length)) || ';
+	 } elsif ($sortby eq "strand") {
+	   $sortfunc .= '($b->strand <=> $a->strand) || ';
+	 } elsif ($sortby eq "name") {
+	   $sortfunc .= '($a->feature->display_name cmp $b->feature->display_name) || ';
+	 }
+       }
+       unless ($sawleft) {
+           $sortfunc .= ' ($a->left <=> $b->left) ';
+       } else {
+           $sortfunc .= ' 0';
+       }
+       $sortfunc .= '}';
+       $sortfunc = eval $sortfunc;
+    }
+
+    # cache this
+    # $self->factory->set_option(sort_order => $sortfunc);
+
+    my @things = sort $sortfunc @_;
+    return @things;
+}
+
+# handle collision detection
+sub layout {
+  my $self = shift;
+  return $self->{layout_height} if exists $self->{layout_height};
+
+  my @parts = $self->parts;
+  return $self->{layout_height} = $self->height + $self->pad_top + $self->pad_bottom unless @parts;
+
+  my $bump_direction = $self->bump;
+  my $bump_limit = $self->option('bump_limit') || -1;
+
+  $_->layout foreach @parts;  # recursively lay out
+
+  # no bumping requested, or only one part here
+  if (@parts == 1 || !$bump_direction) {
+    my $highest = 0;
+    foreach (@parts) {
+      my $height = $_->layout_height;
+      $highest   = $height > $highest ? $height : $highest;
+    }
+    return $self->{layout_height} = $highest + $self->pad_top + $self->pad_bottom;
+  }
+
+  my (%bin1,%bin2);
+  my $limit = 0;
+
+  for my $g ($self->layout_sort(@parts)) {
+
+    my $height = $g->{layout_height};
+
+    # Simple +/- 2 bumping.  Every feature gets its very own line
+    if (abs($bump_direction) >= 2) {
+      $g->move(0,$limit);
+      $limit += $height + BUMP_SPACING if $bump_direction > 0;
+      $limit -= $height + BUMP_SPACING if $bump_direction < 0;
+      next;
+    }
+
+    # we get here for +/- 1 bumping
+    my $pos = 0;
+    my $bumplevel = 0;
+    my $left   = $g->left;
+    my $right  = $g->right;
+
+    while (1) {
+
+      # stop bumping if we've gone too far down
+      if ($bump_limit > 0 && $bumplevel++ >= $bump_limit) {
+	$g->{overbumped}++;  # this flag can be used to suppress label and description
+	foreach ($g->parts) {
+	  $_->{overbumped}++;
+	}
+	last;
+      }
+
+      # look for collisions
+      my $bottom = $pos + $height;
+      $self->collides(\%bin1,CM1,CM2,$left,$pos,$right,$bottom) or last;
+      my $collision = $self->collides(\%bin2,CM3,CM4,$left,$pos,$right,$bottom) or last;
+
+      if ($bump_direction > 0) {
+	$pos += $collision->[3]-$collision->[1] + BUMP_SPACING;    # collision, so bump
+      } else {
+	$pos -= BUMP_SPACING;
+      }
+
+      $pos++ if $pos % 2; # correct for GD rounding errors
+    }
+
+    $g->move(0,$pos);
+    $self->add_collision(\%bin1,CM1,CM2,$left,$g->top,$right,$g->bottom);
+    $self->add_collision(\%bin2,CM3,CM4,$left,$g->top,$right,$g->bottom);
+  }
+
+  # If -1 bumping was allowed, then normalize so that the top glyph is at zero
+  if ($bump_direction < 0) {
+    my $topmost;
+    foreach (@parts) {
+      my $top  = $_->top;
+      $topmost = $top if !defined($topmost) or $top < $topmost;
+    }
+    my $offset = - $topmost;
+    $_->move(0,$offset) foreach @parts;
+  }
+
+  # find new height
+  my $bottom = 0;
+  foreach (@parts) {
+    $bottom = $_->bottom if $_->bottom > $bottom;
+  }
+  # return $self->{layout_height} = $self->pad_bottom + $self->pad_top + $bottom - $self->top  + 1;
+  return $self->{layout_height} = $bottom + $self->pad_top + $self->pad_bottom;
+}
+
+# the $%occupied structure is a hash of {left,top} = [left,top,right,bottom]
+sub collides {
+  my $self = shift;
+  my ($occupied,$cm1,$cm2,$left,$top,$right,$bottom) = @_;
+  my @keys = $self->_collision_keys($cm1,$cm2,$left,$top,$right,$bottom);
+  my $hspacing = $self->hbumppad || 0;
+  my $collides = 0;
+  for my $k (@keys) {
+    next unless exists $occupied->{$k};
+    for my $bounds (@{$occupied->{$k}}) {
+      my ($l,$t,$r,$b) = @$bounds;
+      next unless $right+$hspacing >= $l and $left-$hspacing <= $r 
+	and $bottom >= $t and $top <= $b;
+      $collides = $bounds;
+      last;
+    }
+  }
+  $collides;
+}
+
+sub add_collision {
+  my $self = shift;
+  my ($occupied,$cm1,$cm2,$left,$top,$right,$bottom) = @_;
+  my $value = [$left,$top,$right+2,$bottom];
+  my @keys = $self->_collision_keys($cm1,$cm2,@$value);
+  push @{$occupied->{$_}},$value foreach @keys;
+}
+
+sub _collision_keys {
+  my $self = shift;
+  my ($binx,$biny,$left,$top,$right,$bottom) = @_;
+  my @keys;
+  my $bin_left   = int($left/$binx);
+  my $bin_right  = int($right/$binx);
+  my $bin_top    = int($top/$biny);
+  my $bin_bottom = int($bottom/$biny);
+  for (my $x=$bin_left;$x<=$bin_right; $x++) {
+    for (my $y=$bin_top;$y<=$bin_bottom; $y++) {
+      push @keys,join(',',$x,$y);
+    }
+  }
+  @keys;
+}
+
+sub draw {
+  my $self = shift;
+  my $gd = shift;
+  my ($left,$top,$partno,$total_parts) = @_;
+
+  my $connector = $self->connector;
+
+  if (my @parts = $self->parts) {
+
+    # invoke sorter if user wants to sort always and we haven't already sorted
+    # during bumping.
+    @parts = $self->layout_sort(@parts) if !$self->bump && $self->option('always_sort');
+
+    my $x = $left;
+    my $y = $top  + $self->top + $self->pad_top;
+
+    $self->draw_connectors($gd,$x,$y) if $connector && $connector ne 'none';
+
+    my $last_x;
+    for (my $i=0; $i<@parts; $i++) {
+      # lie just a little bit to avoid lines overlapping and make the picture prettier
+      my $fake_x = $x;
+      $fake_x-- if defined $last_x && $parts[$i]->left - $last_x == 1;
+      $parts[$i]->draw($gd,$fake_x,$y,$i,scalar(@parts));
+      $last_x = $parts[$i]->right;
+    }
+  }
+
+  else {  # no part
+    $self->draw_connectors($gd,$left,$top)
+      if $connector && $connector ne 'none'; # && $self->{level} == 0;
+    $self->draw_component($gd,$left,$top,$partno,$total_parts) unless $self->feature_has_subparts;
+  }
+
+}
+
+# the "level" is the level of testing of the glyph
+# groups are level -1, top level glyphs are level 0, subcomponents are level 1 and so forth.
+sub level {
+  shift->{level};
+}
+
+sub draw_connectors {
+  my $self = shift;
+
+  return if $self->{overbumped};
+  my $gd = shift;
+  my ($dx,$dy) = @_;
+  my @parts = sort { $a->left <=> $b->left } $self->parts;
+  for (my $i = 0; $i < @parts-1; $i++) {
+    # don't let connectors double-back on themselves
+    next if ($parts[$i]->bounds)[2] > ($parts[$i+1]->bounds)[0];
+    $self->_connector($gd,$dx,$dy,$parts[$i]->bounds,$parts[$i+1]->bounds);
+  }
+
+  # extra connectors going off ends
+  if (@parts) {
+    my($x1,$y1,$x2,$y2) = $self->bounds(0,0);
+    my($xl,$xt,$xr,$xb) = $parts[0]->bounds;
+    $self->_connector($gd,$dx,$dy,$x1,$xt,$x1,$xb,$xl,$xt,$xr,$xb)      if $x1 < $xl;
+    my ($xl2,$xt2,$xr2,$xb2) = $parts[-1]->bounds;
+
+    my $feature = $self->feature;
+    my @p       = map {$_->feature} @parts;
+    $self->_connector($gd,$dx,$dy,$parts[-1]->bounds,$x2,$xt2,$x2,$xb2) if $x2 > $xr2;
+  } else {
+    my ($x1,$y1,$x2,$y2) = $self->bounds($dx,$dy);
+    $self->draw_connector($gd,$y1,$y2,$x1,$y1,$y2,$x2);
+  }
+
+}
+
+# return true if this feature should be highlited
+sub hilite_color {
+  my $self         = shift;
+  return     if $self->level; # only highlite top level glyphs
+  my $index   = $self->option('hilite') or return;
+  $self->factory->translate_color($index);
+}
+
+sub draw_highlight {
+  my $self              = shift;
+  my ($gd,$left,$top)   = @_;
+  my $color  = $self->hilite_color or return;
+  my @bounds = $self->bounds;
+  $gd->filledRectangle($bounds[0]+$left - 3,
+		       $bounds[1]+$top  - 3,
+		       $bounds[2]+$left + 3,
+		       $bounds[3]+$top  + 3,
+		       $color);
+}
+
+sub _connector {
+  my $self = shift;
+  my ($gd,
+      $dx,$dy,
+      $xl,$xt,$xr,$xb,
+      $yl,$yt,$yr,$yb) = @_;
+  my $left   = $dx + $xr;
+  my $right  = $dx + $yl;
+  my $top1     = $dy + $xt;
+  my $bottom1  = $dy + $xb;
+  my $top2     = $dy + $yt;
+  my $bottom2  = $dy + $yb;
+
+  # restore this comment if you don't like the group dash working
+  # its way backwards.
+  return if $right-$left < 1 && !$self->isa('Bio::Graphics::Glyph::group');
+
+  $self->draw_connector($gd,
+			$top1,$bottom1,$left,
+			$top2,$bottom2,$right,
+		       );
+}
+
+sub draw_connector {
+  my $self   = shift;
+  my $gd     = shift;
+
+  my $color          = $self->connector_color;
+  my $connector_type = $self->connector or return;
+
+  if ($connector_type eq 'hat') {
+    $self->draw_hat_connector($gd,$color, at _);
+  } elsif ($connector_type eq 'solid') {
+    $self->draw_solid_connector($gd,$color, at _);
+  } elsif ($connector_type eq 'dashed') {
+    $self->draw_dashed_connector($gd,$color, at _);
+  } elsif ($connector_type eq 'quill') {
+    $self->draw_quill_connector($gd,$color, at _);
+  } elsif ($connector_type eq 'crossed') {
+    $self->draw_crossed_connector($gd,$color, at _);
+  } else {
+    ; # draw nothing
+  }
+}
+
+sub draw_hat_connector {
+  my $self = shift;
+  my $gd   = shift;
+  my $color = shift;
+  my ($top1,$bottom1,$left,$top2,$bottom2,$right) = @_;
+
+  cluck "gd object is $gd" unless ref $gd;
+
+  my $center1  = ($top1 + $bottom1)/2;
+  my $quarter1 = $top1 + ($bottom1-$top1)/4;
+  my $center2  = ($top2 + $bottom2)/2;
+  my $quarter2 = $top2 + ($bottom2-$top2)/4;
+
+  if ($center1 != $center2) {
+    $self->draw_solid_connector($gd,$color, at _);
+    return;
+  }
+
+  if ($right - $left > 4) {  # room for the inverted "V"
+      my $middle = $left + int(($right - $left)/2);
+      $gd->line($left,$center1,$middle,$top1,$color);
+      $gd->line($middle,$top1,$right-1,$center1,$color);
+    } elsif ($right-$left > 1) { # no room, just connect
+      $gd->line($left,$quarter1,$right-1,$quarter1,$color);
+    }
+
+}
+
+sub draw_solid_connector {
+  my $self = shift;
+  my $gd   = shift;
+  my $color = shift;
+  my ($top1,$bottom1,$left,$top2,$bottom2,$right) = @_;
+
+  my $center1  = ($top1 + $bottom1)/2;
+  my $center2  = ($top2 + $bottom2)/2;
+
+  $gd->line($left,$center1,$right,$center2,$color);
+}
+
+sub draw_dashed_connector {
+  my $self = shift;
+  my $gd   = shift;
+  my $color = shift;
+  my ($top1,$bottom1,$left,$top2,$bottom2,$right) = @_;
+
+  my $center1  = ($top1 + $bottom1)/2;
+  my $center2  = ($top2 + $bottom2)/2;
+  my $image_class   = $self->panel->image_class;
+  my $gdTransparent = $image_class->gdTransparent;
+  my $gdStyled      = $image_class->gdStyled;
+  $gd->setStyle($color,$color,$gdTransparent,$gdTransparent);
+  $gd->line($left,$center1,$right,$center2,$gdStyled);
+}
+
+sub draw_quill_connector {
+  my $self = shift;
+  my $gd   = shift;
+  my $color = shift;
+  my ($top1,$bottom1,$left,$top2,$bottom2,$right) = @_;
+
+  my $center1  = ($top1 + $bottom1)/2;
+  my $center2  = ($top2 + $bottom2)/2;
+
+  $gd->line($left,$center1,$right,$center2,$color);
+  my $direction = $self->feature->strand;
+  return unless $direction;
+  $direction *= -1 if $self->{flip};
+
+  if ($direction > 0) {
+    my $start = $left+4;
+    my $end   = $right-1;
+    for (my $position=$start; $position <= $end; $position += QUILL_INTERVAL) {
+      $gd->line($position,$center1,$position-2,$center1-2,$color);
+      $gd->line($position,$center1,$position-2,$center1+2,$color);
+    }
+  } else {
+    my $start = $left+1;
+    my $end   = $right-4;
+    for (my $position=$start; $position <= $end; $position += QUILL_INTERVAL) {
+      $gd->line($position,$center1,$position+2,$center1-2,$color);
+      $gd->line($position,$center1,$position+2,$center1+2,$color);
+    }
+  }
+}
+
+sub draw_crossed_connector {
+  my $self = shift;
+  my $gd = shift;
+  my $color = shift;
+  my ($top1,$bottom1,$left,$top2,$bottom2,$right) = @_;
+
+  #Draw the horizontal line
+  my $center1  = ($top1 + $bottom1)/2;
+  my $center2  = ($top2 + $bottom2)/2;
+
+  $gd->line($left,$center1,$right,$center2,$color);
+
+  #Extra validations
+  ($left, $right)   = ($right, $left)   if ($right < $left);
+  ($top1, $bottom1) = ($bottom1, $top1) if ($bottom1 < $top1);
+  ($top2, $bottom2) = ($bottom2, $top2) if ($bottom2 < $top2);
+
+  #Draw the "X"
+  my $middle = int(($right - $left) / 2) + $left;
+  my $midLen = int(($bottom1 - $top1) / 2);
+
+  $gd->line($middle-$midLen,$top1,   $middle+$midLen,$bottom2,$color);
+  $gd->line($middle-$midLen,$bottom1,$middle+$midLen,$top2,$color);
+}
+
+sub filled_box {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2,$bg,$fg,$lw) = @_;
+
+  $bg ||= $self->bgcolor;
+  $fg ||= $self->fgcolor;
+  $lw ||= $self->option('linewidth') || 1;
+
+  $gd->filledRectangle($x1,$y1,$x2,$y2,$bg);
+  $fg = $self->set_pen($lw,$fg) if $lw > 1;
+
+  # draw a box
+  $gd->rectangle($x1,$y1,$x2,$y2,$fg);
+
+  # if the left end is off the end, then cover over
+  # the leftmost line
+  my ($width) = $gd->getBounds;
+
+  $bg = $self->set_pen($lw,$bg) if $lw > 1;
+
+  $gd->line($x1,$y1+$lw,$x1,$y2-$lw,$bg)
+    if $x1 < $self->panel->pad_left;
+
+  $gd->line($x2,$y1+$lw,$x2,$y2-$lw,$bg)
+    if $x2 > $width - $self->panel->pad_right;
+}
+
+sub filled_oval {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2,$bg,$fg,$lw) = @_;
+  my $cx = ($x1+$x2)/2;
+  my $cy = ($y1+$y2)/2;
+
+  $fg ||= $self->fgcolor;
+  $bg ||= $self->bgcolor;
+  $lw ||= $self->linewidth;
+
+  $fg = $self->set_pen($lw) if $lw > 1;
+
+  # Maintain backwards compatability with gd 1.8.4
+  # which does not support the ellipse methods.
+  # can() method fails with GD::SVG...
+  if ($gd->can('ellipse') || $gd =~ /SVG/ ) {
+    $gd->filledEllipse($cx,$cy,$x2-$x1,$y2-$y1,$bg);
+    # Draw the edge around the ellipse
+    $gd->ellipse($cx,$cy,$x2-$x1,$y2-$y1,$fg);
+  } else {
+    $gd->arc($cx,$cy,$x2-$x1,$y2-$y1,0,360,$fg);
+    $gd->fillToBorder($cx,$cy,$fg,$bg);
+  }
+}
+
+sub oval {
+  my $self = shift;
+  my $gd = shift;
+  my ($x1,$y1,$x2,$y2) = @_;
+  my $cx = ($x1+$x2)/2;
+  my $cy = ($y1+$y2)/2;
+
+  my $fg = $self->fgcolor;
+  my $linewidth = $self->linewidth;
+  $fg = $self->set_pen($linewidth) if $linewidth > 1;
+
+  # Maintain backwards compatability with gd 1.8.4 which does not
+  # support the ellipse method.
+  if ($gd->can('ellipse') || $gd =~ /SVG/ ) {
+    $gd->ellipse($cx,$cy,$x2-$x1,$y2-$y1,$fg);
+  } else {
+    $gd->arc($cx,$cy,$x2-$x1,$y2-$y1,0,360,$fg);
+  }
+}
+
+sub filled_arrow {
+  my $self = shift;
+  my $gd   = shift;
+  my $orientation = shift;
+  my ($x1,$y1,$x2,$y2,$fg,$bg)  = @_;
+
+  $orientation *= -1 if $self->{flip};
+
+  my ($width) = $gd->getBounds;
+  my $indent = $y2-$y1 < $x2-$x1 ? $y2-$y1 : ($x2-$x1)/2;
+
+  return $self->filled_box($gd, at _)
+    if ($orientation == 0)
+      or ($x1 < 0 && $orientation < 0)
+        or ($x2 > $width && $orientation > 0)
+	  or ($indent <= 0)
+	    or ($x2 - $x1 < 3);
+
+  $fg   ||= $self->fgcolor;
+  $bg   ||= $self->bgcolor;
+  my $pkg  = $self->polygon_package;
+  my $poly = $pkg->new();
+  if ($orientation >= 0) {
+    $poly->addPt($x1,$y1);
+    $poly->addPt($x2-$indent,$y1);
+    $poly->addPt($x2,($y2+$y1)/2);
+    $poly->addPt($x2-$indent,$y2);
+    $poly->addPt($x1,$y2);
+  } else {
+    $poly->addPt($x2,$y1);
+    $poly->addPt($x2,$y2);
+    $poly->addPt($x1+$indent,$y2);
+    $poly->addPt($x1,($y2+$y1)/2);
+    $poly->addPt($x1+$indent,$y1);
+  }
+  $gd->filledPolygon($poly,$bg);
+  $gd->polygon($poly,$fg);
+
+  # blunt it a bit if off the end
+  # good idea - but isn't inuitive
+  # if ($orientation >= 0 && $x2 > $width - $self->panel->pad_right) {
+  # $gd->filledRectangle($x2-3,$y1,$x2,$y2,$self->panel->bgcolor);
+  #}
+}
+
+sub linewidth {
+  shift->option('linewidth') || 1;
+}
+
+sub fill {
+  my $self = shift;
+  my $gd   = shift;
+  my ($x1,$y1,$x2,$y2) = @_;
+  if ( ($x2-$x1) >= 2 && ($y2-$y1) >= 2 ) {
+    $gd->fill($x1+1,$y1+1,$self->bgcolor);
+  }
+}
+sub set_pen {
+  my $self = shift;
+  my ($linewidth,$color) = @_;
+  $linewidth ||= $self->linewidth;
+  $color     ||= $self->fgcolor;
+  return $color unless $linewidth > 1;
+  $self->panel->set_pen($linewidth,$color);
+}
+
+sub draw_component {
+  my $self = shift;
+  my ($gd,$left,$top,$partno,$total_parts) = @_;
+  my($x1,$y1,$x2,$y2) = $self->bounds($left,$top);
+
+  # clipping
+  my $panel = $self->panel;
+  return unless $x2 >= $panel->left and $x1 <= $panel->right;
+
+  if ($self->option('strand_arrow') || $self->option('stranded')) {
+    $self->filled_arrow($gd,$self->feature->strand,
+			$x1, $y1,
+			$x2, $y2)
+  } else {
+    $self->filled_box($gd,
+		      $x1, $y1,
+		      $x2, $y2)
+  }
+}
+
+
+sub no_subparts {
+  return shift->option('no_subparts');
+}
+
+sub maxdepth {
+  my $self = shift;
+
+  my $maxdepth =  $self->option('maxdepth');
+  return $maxdepth if defined $maxdepth;
+
+  # $feature->compound is an artefact from aggregators. Sadly, an aggregated feature can miss
+  # parts that are out of the query range - this is a horrible mis-feature. Aggregated features have
+  # a compound flag to hack around this.
+  my $feature = $self->feature;
+  return 1 if $feature->can('compound') && $feature->compound;
+
+  return;
+}
+
+sub exceeds_depth {
+  my $self = shift;
+  my $max_depth     = $self->maxdepth;
+  return unless defined $max_depth;
+
+  my $current_depth = $self->level || 0;
+  return $current_depth >= $max_depth;
+}
+
+# memoize _subfeat -- it's a bottleneck with segments
+sub subfeat {
+  my $self    = shift;
+  my $feature = shift;
+
+  return $self->_subfeat($feature) unless ref $self;  # protect against class invocation
+
+  return if $self->level == 0 && $self->no_subparts;
+  return if $self->exceeds_depth;
+
+  return @{$self->{cached_subfeat}{$feature}} if exists $self->{cached_subfeat}{$feature};
+  my @ss = $self->_subfeat($feature);
+  $self->{cached_subfeat}{$feature} = \@ss;
+  @ss;
+}
+
+sub _subfeat {
+  my $class   = shift;
+  my $feature = shift;
+
+  return $feature->segments     if $feature->can('segments');
+
+  my @split = eval { my $id   = $feature->location->seq_id;
+		     my @subs = $feature->location->sub_Location;
+		     grep {$id eq $_->seq_id} @subs;
+		   };
+
+  return @split if @split;
+
+  # Either the APIs have changed, or I got confused at some point...
+  return $feature->get_SeqFeatures         if $feature->can('get_SeqFeatures');
+  return $feature->sub_SeqFeature          if $feature->can('sub_SeqFeature');
+  return;
+}
+
+# synthesize a key glyph
+sub keyglyph {
+  my $self = shift;
+  my $feature = $self->make_key_feature;
+  my $factory = $self->factory->clone;
+  $factory->set_option(label       => 1);
+  $factory->set_option(description => 0);
+  $factory->set_option(bump  => 0);
+  $factory->set_option(connector  => 'solid');
+  return $factory->make_glyph(0,$feature);
+}
+
+# synthesize a key glyph
+sub make_key_feature {
+  my $self = shift;
+
+  my $scale = 1/$self->scale;  # base pairs/pixel
+
+  # one segments, at pixels 0->80
+  my $offset = $self->panel->offset;
+
+  my $feature =
+    Bio::Graphics::Feature->new(-start =>0 * $scale +$offset,
+				-end   =>80*$scale+$offset,
+				-name => $self->make_key_name(),
+				-strand => '+1');
+  return $feature;
+}
+
+sub make_key_name {
+  my $self = shift;
+
+  # breaking encapsulation - this should be handled by the panel
+  my $key      = $self->option('key') || '';
+  return $key unless $self->panel->add_category_labels;
+
+  my $category = $self->option('category');
+  my $name     = defined $category ? "$key ($category)" : $key;
+  return $name;
+}
+
+sub all_callbacks {
+  my $self = shift;
+  return $self->{all_callbacks} if exists $self->{all_callbacks}; # memoize
+  return $self->{all_callbacks} = $self->_all_callbacks;
+}
+
+sub _all_callbacks {
+  my $self = shift;
+  my $track_level = $self->option('all_callbacks');
+  return $track_level if defined $track_level;
+  return $self->panel->all_callbacks;
+}
+
+sub subpart_callbacks {
+  my $self = shift;
+  return $self->{subpart_callbacks} if exists $self->{subpart_callbacks}; # memoize
+  return $self->{subpart_callbacks} = $self->_subpart_callbacks;
+}
+
+sub _subpart_callbacks {
+  my $self = shift;
+  return 1 if $self->all_callbacks;
+  my $do_subparts = $self->option('subpart_callbacks');
+  return $self->{level} == 0 || ($self->{level} > 0 && $do_subparts);
+}
+
+sub default_factory {
+  croak "no default factory implemented";
+}
+
+sub finished {
+  my $self = shift;
+  delete $self->{factory};
+  foreach (@{$self->{parts} || []}) {
+    $_->finished;
+  }
+  delete $self->{parts};
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Glyph - Base class for Bio::Graphics::Glyph objects
+
+=head1 SYNOPSIS
+
+See L<Bio::Graphics::Panel>.
+
+=head1 DESCRIPTION
+
+Bio::Graphics::Glyph is the base class for all glyph objects.  Each
+glyph is a wrapper around an Bio:SeqFeatureI object, knows how to
+render itself on an Bio::Graphics::Panel, and has a variety of
+configuration variables.
+
+End developers will not ordinarily work directly with
+Bio::Graphics::Glyph objects, but with Bio::Graphics::Glyph::generic
+and its subclasses.  Similarly, most glyph developers will want to
+subclass from Bio::Graphics::Glyph::generic because the latter
+provides labeling and arrow-drawing facilities.
+
+=head1 METHODS
+
+This section describes the class and object methods for
+Bio::Graphics::Glyph.
+
+=head2 CONSTRUCTORS
+
+Bio::Graphics::Glyph objects are constructed automatically by an
+Bio::Graphics::Glyph::Factory, and are not usually created by
+end-developer code.
+
+=over 4
+
+=item $glyph = Bio::Graphics::Glyph-E<gt>new(-feature=E<gt>$feature,-factory=E<gt>$factory)
+
+Given a sequence feature, creates an Bio::Graphics::Glyph object to
+display it.  The B<-feature> argument points to the Bio:SeqFeatureI
+object to display, and B<-factory> indicates an
+Bio::Graphics::Glyph::Factory object from which the glyph will fetch
+all its run-time configuration information.  Factories are created and
+manipulated by the Bio::Graphics::Panel object.
+
+A standard set of options are recognized.  See L<OPTIONS>.
+
+=back
+
+=head2 OBJECT METHODS
+
+Once a glyph is created, it responds to a large number of methods.  In
+this section, these methods are grouped into related categories.
+
+Retrieving glyph context:
+
+=over 4
+
+=item $factory = $glyph-E<gt>factory
+
+Get the Bio::Graphics::Glyph::Factory associated with this object.
+This cannot be changed once it is set.
+
+=item $panel = $glyph-E<gt>panel
+
+Get the Bio::Graphics::Panel associated with this object.  This cannot
+be changed once it is set.
+
+=item $feature = $glyph-E<gt>feature
+
+Get the sequence feature associated with this object.  This cannot be
+changed once it is set.
+
+=item $feature = $glyph-E<gt>add_feature(@features)
+
+Add the list of features to the glyph, creating subparts.  This is
+most common done with the track glyph returned by
+Ace::Graphics::Panel-E<gt>add_track().
+
+=item $feature = $glyph-E<gt>add_group(@features)
+
+This is similar to add_feature(), but the list of features is treated
+as a group and can be configured as a set.
+
+=item $glyph-E<gt>finished
+
+When you are finished with a glyph, you can call its finished() method
+in order to break cycles that would otherwise cause memory leaks.
+finished() is typically only used by the Panel object.
+
+=back
+
+Retrieving glyph options:
+
+=over 4
+
+=item $fgcolor = $glyph-E<gt>fgcolor
+
+=item $bgcolor = $glyph-E<gt>bgcolor
+
+=item $fontcolor = $glyph-E<gt>fontcolor
+
+=item $fontcolor = $glyph-E<gt>font2color
+
+=item $fillcolor = $glyph-E<gt>fillcolor
+
+These methods return the configured foreground, background, font,
+alternative font, and fill colors for the glyph in the form of a
+GD::Image color index.
+
+=item $color = $glyph-E<gt>tkcolor
+
+This method returns a color to be used to flood-fill the entire glyph
+before drawing (currently used by the "track" glyph).
+
+=item $width = $glyph-E<gt>width([$newwidth])
+
+Return the width of the glyph, not including left or right padding.
+This is ordinarily set internally based on the size of the feature and
+the scale of the panel.
+
+=item $width = $glyph-E<gt>layout_width
+
+Returns the width of the glyph including left and right padding.
+
+=item $width = $glyph-E<gt>height
+
+Returns the height of the glyph, not including the top or bottom
+padding.  This is calculated from the "height" option and cannot be
+changed.
+
+
+=item $font = $glyph-E<gt>font
+
+Return the font for the glyph.
+
+=item $option = $glyph-E<gt>option($option)
+
+Return the value of the indicated option.
+
+=item $index = $glyph-E<gt>color($color)
+
+Given a symbolic or #RRGGBB-form color name, returns its GD index.
+
+=item $level = $glyph-E<gt>level
+
+The "level" is the nesting level of the glyph.
+Groups are level -1, top level glyphs are level 0,
+subparts (e.g. exons) are level 1 and so forth.
+
+=back
+
+Setting an option:
+
+=over 4
+
+=item $glyph-E<gt>configure(-name=E<gt>$value)
+
+You may change a glyph option after it is created using set_option().
+This is most commonly used to configure track glyphs.
+
+=back
+
+Retrieving information about the sequence:
+
+=over 4
+
+=item $start = $glyph-E<gt>start
+
+=item $end   = $glyph-E<gt>end
+
+These methods return the start and end of the glyph in base pair
+units.
+
+=item $offset = $glyph-E<gt>offset
+
+Returns the offset of the segment (the base pair at the far left of
+the image).
+
+=item $length = $glyph-E<gt>length
+
+Returns the length of the sequence segment.
+
+=back
+
+
+Retrieving formatting information:
+
+=over 4
+
+=item $top = $glyph-E<gt>top
+
+=item $left = $glyph-E<gt>left
+
+=item $bottom = $glyph-E<gt>bottom
+
+=item $right = $glyph-E<gt>right
+
+These methods return the top, left, bottom and right of the glyph in
+pixel coordinates.
+
+=item $height = $glyph-E<gt>height
+
+Returns the height of the glyph.  This may be somewhat larger or
+smaller than the height suggested by the GlyphFactory, depending on
+the type of the glyph.
+
+=item $scale = $glyph-E<gt>scale
+
+Get the scale for the glyph in pixels/bp.
+
+=item $height = $glyph-E<gt>labelheight
+
+Return the height of the label, if any.
+
+=item $label = $glyph-E<gt>label
+
+Return a human-readable label for the glyph.
+
+=back
+
+These methods are called by Bio::Graphics::Track during the layout
+process:
+
+=over 4
+
+=item $glyph-E<gt>move($dx,$dy)
+
+Move the glyph in pixel coordinates by the indicated delta-x and
+delta-y values.
+
+=item ($x1,$y1,$x2,$y2) = $glyph-E<gt>box
+
+Return the current position of the glyph.
+
+=back
+
+These methods are intended to be overridden in subclasses:
+
+=over 4
+
+=item $glyph-E<gt>calculate_height
+
+Calculate the height of the glyph.
+
+=item $glyph-E<gt>calculate_left
+
+Calculate the left side of the glyph.
+
+=item $glyph-E<gt>calculate_right
+
+Calculate the right side of the glyph.
+
+=item $glyph-E<gt>draw($gd,$left,$top)
+
+Optionally offset the glyph by the indicated amount and draw it onto
+the GD::Image object.
+
+=item $glyph-E<gt>draw_label($gd,$left,$top)
+
+Draw the label for the glyph onto the provided GD::Image object,
+optionally offsetting by the amounts indicated in $left and $right.
+
+=item $glyph-E<gt>maxdepth()
+
+This returns the maximum number of levels of feature subparts that the
+glyph will recurse through. For example, returning 0 indicates that
+the glyph will only draw the top-level feature. Returning 1 indicates
+that it will only draw the top-level feature and one level of
+subfeatures. Returning 2 will descend down two levels. Overriding this
+method will speed up rendering by avoiding creating of a bunch of
+subglyphs that will never be drawn.
+
+The default behavior is to return undef (unlimited levels of descent)
+unless the -maxdepth option is passed, in which case this number is
+returned.
+
+Note that Bio::Graphics::Glyph::generic overrides maxdepth() to return
+0, meaning no descent into subparts will be performed.
+
+=back
+
+These methods are useful utility routines:
+
+=over 4
+
+=item $pixels = $glyph-E<gt>map_pt($bases);
+
+Map the indicated base position, given in base pair units, into
+pixels, using the current scale and glyph position.
+
+=item $glyph-E<gt>filled_box($gd,$x1,$y1,$x2,$y2)
+
+Draw a filled rectangle with the appropriate foreground and fill
+colors, and pen width onto the GD::Image object given by $gd, using
+the provided rectangle coordinates.
+
+=item $glyph-E<gt>filled_oval($gd,$x1,$y1,$x2,$y2)
+
+As above, but draws an oval inscribed on the rectangle.
+
+=item $glyph-E<gt>exceeds_depth
+
+Returns true if descending into another level of subfeatures will
+exceed the value returned by maxdepth().
+
+=back
+
+=head2 OPTIONS
+
+The following options are standard among all Glyphs.  See individual
+glyph pages for more options.
+
+  Option      Description                      Default
+  ------      -----------                      -------
+
+  -fgcolor      Foreground color	       black
+
+  -outlinecolor	Synonym for -fgcolor
+
+  -bgcolor      Background color               turquoise
+
+  -fillcolor    Synonym for -bgcolor
+
+  -linewidth    Line width                     1
+
+  -height       Height of glyph		       10
+
+  -font         Glyph font		       gdSmallFont
+
+  -connector    Connector type                 undef (false)
+
+  -connector_color
+                Connector color                black
+
+  -strand_arrow Whether to indicate            undef (false)
+                 strandedness
+
+  -label        Whether to draw a label	       undef (false)
+
+  -description  Whether to draw a description  undef (false)
+
+  -no_subparts  Set to true to prevent         undef (false)
+                drawing of the subparts
+                of a feature.
+
+  -ignore_sub_part Give the types/methods of   undef
+                subparts to ignore (as a 
+                space delimited list).
+
+  -maxdepth     Specifies the maximum number   undef (unlimited) 
+                child-generations to decend
+                when getting subfeatures
+
+  -sort_order   Specify layout sort order      "default"
+
+  -always_sort  Sort even when bumping is off  undef (false)
+
+  -bump_limit   Maximum number of levels to bump undef (unlimited)
+
+  -hilite       Highlight color                undef (no color)
+
+  -link, -title, -target
+               These options are used when creating imagemaps
+               for display on the web.  See L<Bio::Graphics::Panel/"Creating Imagemaps">.
+
+
+For glyphs that consist of multiple segments, the B<-connector> option
+controls what's drawn between the segments.  The default is undef (no
+connector).  Options include:
+
+   "hat"     an upward-angling conector
+   "solid"   a straight horizontal connector
+   "quill"   a decorated line with small arrows indicating strandedness
+             (like the UCSC Genome Browser uses)
+   "dashed"  a horizontal dashed line.
+   "crossed" a straight horizontal connector with an "X" on it
+              (Can be used when segments are not yet validated
+               by some internal experiments...)
+
+The B<-connector_color> option controls the color of the connector, if
+any.
+
+The label is printed above the glyph.  You may pass an anonymous
+subroutine to B<-label>, in which case the subroutine will be invoked
+with the feature as its single argument.  and is expected to return
+the string to use as the description.  If you provide the numeric
+value "1" to B<-description>, the description will be read off the
+feature's seqname(), info() and primary_tag() methods will be called
+until a suitable name is found.  To create a label with the
+text "1", pass the string "1 ".  (A 1 followed by a space).
+
+The description is printed below the glyph.  You may pass an anonymous
+subroutine to B<-description>, in which case the subroutine will be
+invoked with the feature as its single argument and is expected to
+return the string to use as the description.  If you provide the
+numeric value "1" to B<-description>, the description will be read off
+the feature's source_tag() method.  To create a description with the
+text "1", pass the string "1 ".  (A 1 followed by a space).
+
+In the case of ACEDB Ace::Sequence feature objects, the feature's
+info(), Brief_identification() and Locus() methods will be called to
+create a suitable description.
+
+The B<-strand_arrow> option, if true, requests that the glyph indicate
+which strand it is on, usually by drawing an arrowhead.  Not all
+glyphs will respond to this request.  For historical reasons,
+B<-stranded> is a synonym for this option.
+
+B<sort_order>: By default, features are drawn with a layout based only on the
+position of the feature, assuring a maximal "packing" of the glyphs
+when bumped.  In some cases, however, it makes sense to display the
+glyphs sorted by score or some other comparison, e.g. such that more
+"important" features are nearer the top of the display, stacked above
+less important features.  The -sort_order option allows a few
+different built-in values for changing the default sort order (which
+is by "left" position): "low_score" (or "high_score") will cause
+features to be sorted from lowest to highest score (or vice versa).
+"left" (or "default") and "right" values will cause features to be
+sorted by their position in the sequence.  "longer" (or "shorter")
+will cause the longest (or shortest) features to be sorted first, and
+"strand" will cause the features to be sorted by strand: "+1"
+(forward) then "0" (unknown, or NA) then "-1" (reverse).
+
+In all cases, the "left" position will be used to break any ties.  To
+break ties using another field, options may be strung together using a
+"|" character; e.g. "strand|low_score|right" would cause the features
+to be sorted first by strand, then score (lowest to highest), then by
+"right" position in the sequence.
+
+Finally, a subroutine coderef with a $$ prototype can be provided.  It
+will receive two B<glyph> as arguments and should return -1, 0 or 1
+(see Perl's sort() function for more information).  For example, to
+sort a set of database search hits by bits (stored in the features'
+"score" fields), scaled by the log of the alignment length (with
+"start" position breaking any ties):
+
+  sort_order = sub ($$) {
+    my ($glyph1,$glyph2) = @_;
+    my $a = $glyph1->feature;
+    my $b = $glyph2->feature;
+    ( $b->score/log($b->length)
+          <=>
+      $a->score/log($a->length) )
+          ||
+    ( $a->start <=> $b->start )
+  }
+
+It is important to remember to use the $$ prototype as shown in the
+example.  Otherwise Bio::Graphics will quit with an exception. The
+arguments are subclasses of Bio::Graphics::Glyph, not the features
+themselves.  While glyphs implement some, but not all, of the feature
+methods, to be safe call the two glyphs' feature() methods in order to
+convert them into the actual features.
+
+The '-always_sort' option, if true, will sort features even if bumping
+is turned off.  This is useful if you would like overlapping features
+to stack in a particular order.  Features towards the end of the list
+will overlay those towards the beginning of the sort order.
+
+The B<-hilite> option draws a colored box behind each feature using the
+indicated color. Typically you will pass it a code ref that returns a
+color name.  For example:
+
+  -hilite => sub { my $name = shift->display_name; 
+                   return 'yellow' if $name =~ /XYZ/ }
+
+The B<-no_subparts> option will prevent the glyph from searching its
+feature for subfeatures. This may enhance performance if you know in
+advance that none of your features contain subfeatures.
+
+=head1 SUBCLASSING Bio::Graphics::Glyph
+
+By convention, subclasses are all lower-case.  Begin each subclass
+with a preamble like this one:
+
+ package Bio::Graphics::Glyph::crossbox;
+
+ use strict;
+ use base qw(Bio::Graphics::Glyph);
+
+Then override the methods you need to.  Typically, just the draw()
+method will need to be overridden.  However, if you need additional
+room in the glyph, you may override calculate_height(),
+calculate_left() and calculate_right().  Do not directly override
+height(), left() and right(), as their purpose is to cache the values
+returned by their calculating cousins in order to avoid time-consuming
+recalculation.
+
+A simple draw() method looks like this:
+
+ sub draw {
+  my $self = shift;
+  $self->SUPER::draw(@_);
+  my $gd = shift;
+
+  # and draw a cross through the box
+  my ($x1,$y1,$x2,$y2) = $self->calculate_boundaries(@_);
+  my $fg = $self->fgcolor;
+  $gd->line($x1,$y1,$x2,$y2,$fg);
+  $gd->line($x1,$y2,$x2,$y1,$fg);
+ }
+
+This subclass draws a simple box with two lines criss-crossed through
+it.  We first call our inherited draw() method to generate the filled
+box and label.  We then call calculate_boundaries() to return the
+coordinates of the glyph, disregarding any extra space taken by
+labels.  We call fgcolor() to return the desired foreground color, and
+then call $gd-E<gt>line() twice to generate the criss-cross.
+
+For more complex draw() methods, see Bio::Graphics::Glyph::transcript
+and Bio::Graphics::Glyph::segments.
+
+Please avoid using a specific image class (via "use GD" for example)
+within your glyph package. Instead, rely on the image package passed
+to the draw() method. This approach allows for future expansion of
+supported image classes without requiring glyph redesign. If you need
+access to the specific image classes such as Polygon, Image, or Font,
+generate them like such:
+
+ sub draw {
+  my $self = shift;
+  my $image_class = shift;
+
+  my $polygon_package = $self->polygon_package->new()
+  ...
+  }
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF::Feature>,
+L<Ace::Sequence>,
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Track>,
+L<Bio::Graphics::Glyph::anchored_arrow>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::box>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::wormbase_transcript>
+L<Bio::Graphics::Glyph::xyplot>
+L<Bio::Graphics::Glyph::whiskerplot>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Panel.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Panel.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Panel.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2643 @@
+package Bio::Graphics::Panel;
+
+use strict;
+use Bio::Graphics::Glyph::Factory;
+use Bio::Graphics::Feature;
+
+# KEYLABELFONT must be treated as string until image_class is established
+use constant KEYLABELFONT => 'gdMediumBoldFont';
+use constant KEYSPACING   => 5; # extra space between key columns
+use constant KEYPADTOP    => 5;  # extra padding before the key starts
+use constant KEYCOLOR     => 'wheat';
+use constant KEYSTYLE     => 'bottom';
+use constant KEYALIGN     => 'left';
+use constant GRIDCOLOR    => 'lightcyan';
+use constant MISSING_TRACK_COLOR =>'gray';
+use constant EXTRA_RIGHT_PADDING => 30;
+
+use base qw(Bio::Root::Root);
+
+my %COLORS;  # translation table for symbolic color names to RGB triple
+my $IMAGEMAP = 'bgmap00001';
+read_colors();
+
+sub api_version { 1.654 }
+
+# Create a new panel of a given width and height, and add lists of features
+# one by one
+sub new {
+  my $class = shift;
+  $class    = ref($class) || $class;
+  my %options = @_;
+
+  $class->read_colors() unless %COLORS;
+
+  my $length = $options{-length} || 0;
+  my $offset = $options{-offset}  || 0;
+  my $spacing = $options{-spacing} || 5;
+  my $bgcolor = $options{-bgcolor} || 'white';
+  my $keyfont = $options{-key_font} || KEYLABELFONT;
+  my $keycolor = $options{-key_color} || KEYCOLOR;
+  my $keyspacing = $options{-key_spacing} || KEYSPACING;
+  my $keystyle = $options{-key_style} || KEYSTYLE;
+  my $keyalign = $options{-key_align} || KEYALIGN;
+  my $allcallbacks = $options{-all_callbacks} || 0;
+  my $gridcolor    = $options{-gridcolor} || GRIDCOLOR;
+  my $grid         = $options{-grid}       || 0;
+  my $extend_grid  = $options{-extend_grid}|| 0;
+  my $flip         = $options{-flip}       || 0;
+  my $empty_track_style   = $options{-empty_tracks} || 'key';
+  my $autopad      = defined $options{-auto_pad} ? $options{-auto_pad} : 1;
+  my $truecolor    = $options{-truecolor}  || 0;
+  my $image_class  = ($options{-image_class} && $options{-image_class} =~ /SVG/)
+                      ? 'GD::SVG'
+		      : $options{-image_class} || 'GD';  # Allow users to specify GD::SVG using SVG
+  my $linkrule     = $options{-link};
+  my $titlerule    = $options{-title};
+  my $targetrule   = $options{-target};
+  my $background   = $options{-background};
+  my $postgrid     = $options{-postgrid};
+  $options{-stop}||= $options{-end};  # damn damn damn
+  my $add_categories= $options{-add_category_labels};
+
+  if (my $seg = $options{-segment}) {
+    $offset = eval {$seg->start-1} || 0;
+    $length = $seg->length;
+  }
+
+  $offset   ||= $options{-start}-1 if defined $options{-start};
+  $length   ||= $options{-stop}-$options{-start}+1 
+     if defined $options{-start} && defined $options{-stop};
+
+  # bring in the image generator class, since we will need it soon anyway
+  eval "require $image_class; 1" or $class->throw($@);
+
+  return bless {
+		tracks => [],
+		width      => $options{-width} || 600,
+		pad_top    => $options{-pad_top}||0,
+		pad_bottom => $options{-pad_bottom}||0,
+		pad_left   => $options{-pad_left}||0,
+		pad_right  => $options{-pad_right}||0,
+		length => $length,
+		offset => $offset,
+		gridcolor => $gridcolor,
+		grid    => $grid,
+		extend_grid    => $extend_grid,
+		bgcolor => $bgcolor,
+		height => 0, # AUTO
+		spacing => $spacing,
+		key_font => $keyfont,
+		key_color => $keycolor,
+		key_spacing => $keyspacing,
+		key_style => $keystyle,
+		key_align => $keyalign,
+		background => $background,
+		postgrid   => $postgrid,
+		autopad   => $autopad,
+		all_callbacks => $allcallbacks,
+		truecolor     => $truecolor,
+		flip          => $flip,
+		linkrule      => $linkrule,
+		titlerule     => $titlerule,
+		targetrule    => $targetrule,
+		empty_track_style  => $empty_track_style,
+		image_class  => $image_class,
+		image_package => $image_class . '::Image',     # Accessors
+		polygon_package => $image_class . '::Polygon',
+		add_category_labels => $add_categories,
+		key_boxes  => [],
+	       },$class;
+}
+
+sub pad_left {
+  my $self = shift;
+  my $g = $self->{pad_left};
+  $self->{pad_left} = shift if @_;
+  $g;
+}
+sub pad_right {
+  my $self = shift;
+  my $g = $self->{pad_right};
+  $self->{pad_right} = shift if @_;
+  $g;
+}
+sub pad_top {
+  my $self = shift;
+  my $g = $self->{pad_top};
+  $self->{pad_top} = shift if @_;
+  $g;
+}
+sub pad_bottom {
+  my $self = shift;
+  my $g = $self->{pad_bottom};
+  $self->{pad_bottom} = shift if @_;
+  $g;
+}
+sub extend_grid {
+  my $self = shift;
+  my $g = $self->{extend_grid};
+  $self->{extend_grid} = shift if @_;
+  $g;
+}
+sub flip {
+  my $self = shift;
+  my $g = $self->{flip};
+  $self->{flip} = shift if @_;
+  $g;
+}
+
+# values of empty_track_style are:
+#    "suppress" -- suppress empty tracks entirely (default)
+#    "key"      -- show just the key in "between" mode
+#    "line"     -- draw a thin grey line
+#    "dashed"   -- draw a dashed line
+sub empty_track_style {
+  my $self = shift;
+  my $g = $self->{empty_track_style};
+  $self->{empty_track_style} = shift if @_;
+  $g;
+}
+
+sub key_style {
+  my $self = shift;
+  my $g = $self->{key_style};
+  $self->{key_style} = shift if @_;
+  $g;
+}
+
+sub auto_pad {
+  my $self = shift;
+  my $g = $self->{autopad};
+  $self->{autopad} = shift if @_;
+  $g;
+}
+
+# public routine for mapping from a base pair
+# location to pixel coordinates
+sub location2pixel {
+  my $self   = shift;
+  my $end    = $self->end + 1;
+  my @coords = $self->{flip} ? map { $end-$_ } @_ : @_;
+  $self->map_pt(@coords);
+}
+
+# numerous direct calls into array used here for performance considerations
+sub map_pt {
+  my $self   = shift;
+  my $offset = $self->{offset};
+  my $scale  = $self->{scale} || $self->scale;
+  my $pl     = $self->{pad_left};
+  my $pr     = $self->{width};
+  my $flip   = $self->{flip};
+  my $length = $self->{length};
+  my @result;
+  foreach (@_) {
+    my $val = $flip 
+      ? int (0.5 + $pr - ($length - ($_- 1)) * $scale)
+      : int (0.5 + ($_-$offset-1) * $scale);
+    $val = -1 if $val < 0;
+    $val = $pr+1 if $val > $pr;
+    push @result,$val;
+  }
+  @result;
+}
+
+sub map_no_trunc {
+  my $self   = shift;
+  my $offset = $self->{offset};
+  my $scale  = $self->scale;
+  my $pl     = $self->{pad_left};
+  my $pr     = $pl + $self->{width}; # - $self->{pad_right};
+  my $flip   = $self->{flip};
+  my $length = $self->{length};
+  my $end    = $offset+$length;
+  my @result;
+  foreach (@_) {
+    my $val = $flip ? int (0.5 + $pl + ($end - ($_- 1)) * $scale) : int (0.5 + $pl + ($_-$offset-1) * $scale);
+    push @result,$val;
+  }
+  @result;
+}
+
+sub scale {
+  my $self = shift;
+  # $self->{scale} ||= ($self->{width}-$self->pad_left-$self->pad_right)/($self->length);
+  $self->{scale} ||= $self->width/($self->length);
+}
+
+sub start { shift->{offset}+1}
+sub end   { $_[0]->start + $_[0]->{length}-1}
+
+sub offset { shift->{offset} }
+sub width {
+  my $self = shift;
+  my $d = $self->{width};
+  $self->{width} = shift if @_;
+  $d;
+#  $d + $self->pad_left + $self->pad_right;
+}
+
+sub left {
+  my $self = shift;
+  $self->pad_left;
+}
+sub right {
+  my $self = shift;
+  $self->pad_left + $self->width; # - $self->pad_right;
+}
+sub top {
+  shift->pad_top;
+}
+sub bottom {
+  my $self = shift;
+  $self->height - $self->pad_bottom;
+}
+
+sub spacing {
+  my $self = shift;
+  my $d = $self->{spacing};
+  $self->{spacing} = shift if @_;
+  $d;
+}
+
+sub key_spacing {
+  my $self = shift;
+  my $d = $self->{key_spacing};
+  $self->{key_spacing} = shift if @_;
+  $d;
+}
+
+sub length {
+  my $self = shift;
+  my $d = $self->{length};
+  if (@_) {
+    my $l = shift;
+    $l = $l->length if ref($l) && $l->can('length');
+    $self->{length} = $l;
+  }
+  $d;
+}
+
+sub gridcolor {shift->{gridcolor}}
+
+sub all_callbacks { shift->{all_callbacks} }
+
+sub add_track {
+  my $self = shift;
+  $self->_do_add_track(scalar(@{$self->{tracks}}), at _);
+}
+
+sub unshift_track {
+  my $self = shift;
+  $self->_do_add_track(0, at _);
+}
+
+sub insert_track {
+  my $self = shift;
+  my $position = shift;
+  $self->_do_add_track($position, at _);
+}
+
+
+# create a feature and factory pair
+# see Factory.pm for the format of the options
+# The thing returned is actually a generic Glyph
+sub _do_add_track {
+  my $self     = shift;
+  my $position = shift;
+
+  # due to indecision, we accept features
+  # and/or glyph types in the first two arguments
+  my ($features,$glyph_name) = ([],undef);
+  while ( @_ && $_[0] !~ /^-/) {
+    my $arg = shift;
+    $features   = $arg and next if ref($arg);
+    $glyph_name = $arg and next unless ref($arg);
+  }
+
+  my %args = @_;
+  my ($map,$ss,%options);
+
+  foreach (keys %args) {
+    (my $canonical = lc $_) =~ s/^-//;
+    if ($canonical eq 'glyph') {
+      $map = $args{$_};
+      delete $args{$_};
+    } elsif ($canonical eq 'stylesheet') {
+      $ss  = $args{$_};
+      delete $args{$_};
+    } else {
+      $options{$canonical} = $args{$_};
+    }
+  }
+
+  $glyph_name = $map if defined $map;
+  $glyph_name ||= 'generic';
+
+  local $^W = 0;  # uninitialized variable warnings under 5.00503
+
+  my $panel_map =
+    ref($map) eq 'CODE' ?  sub {
+      my $feature = shift;
+      return 'track' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'track' };
+      return 'group' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'group' };
+      return $map->($feature);
+    }
+   : ref($map) eq 'HASH' ? sub {
+     my $feature = shift;
+     return 'track' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'track' };
+     return 'group' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'group' };
+     return eval {$map->{$feature->primary_tag}} || 'generic';
+   }
+   : sub {
+     my $feature = shift;
+     return 'track' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'track' };
+     return 'group' if eval { defined $feature->primary_tag && $feature->primary_tag  eq 'group' };
+     return $glyph_name;
+   };
+
+  $self->_add_track($position,$features,-map=>$panel_map,-stylesheet=>$ss,-options=>\%options);
+}
+
+sub _add_track {
+  my $self = shift;
+  my ($position,$features, at options) = @_;
+
+  # build the list of features into a Bio::Graphics::Feature object
+  $features = [$features] unless ref $features eq 'ARRAY';
+
+  # optional middle-level glyph is the group
+  foreach my $f (grep {ref $_ eq 'ARRAY'} @$features) {
+    next unless ref $f eq 'ARRAY';
+    $f = Bio::Graphics::Feature->new(
+				     -segments=>$f,
+				     -type => 'group'
+				    );
+  }
+
+  # top-level glyph is the track
+  my $feature = Bio::Graphics::Feature->new(
+					    -segments=>$features,
+					    -start   => $self->offset+1,
+					    -stop    => $self->offset+$self->length,
+					    -type    => 'track'
+					   );
+
+  my $factory = Bio::Graphics::Glyph::Factory->new($self, at options);
+  my $track   = $factory->make_glyph(-1,$feature);
+
+  splice(@{$self->{tracks}},$position,0,$track);
+  return $track;
+}
+
+sub _expand_padding {
+  my $self   = shift;
+  my $track  = shift;
+  my $extra_padding = $self->extra_right_padding;
+
+  my $keystyle          = $self->key_style;
+  my $empty_track_style = $self->empty_track_style;
+
+  return unless $keystyle eq 'left' or $keystyle eq 'right';
+  return unless $self->auto_pad;
+
+  $self->setup_fonts();
+  my $width    = $self->{key_font}->width;
+
+  my $key       = $self->track2key($track);
+  return unless defined $key;
+
+  my $has_parts = $track->parts;
+  next if !$has_parts && $empty_track_style eq 'suppress';
+
+  my $width_needed = $self->{key_font}->width * CORE::length($key)+3;
+  if ($keystyle eq 'left') {
+    my $width_i_have = $self->pad_left;
+    $self->pad_left($width_needed)  if $width_needed > $width_i_have;
+  } elsif ($keystyle eq 'right') {
+    $width_needed += $extra_padding;
+    my $width_i_have = $self->pad_right;
+    $self->pad_right($width_needed) if $width_needed > $width_i_have;
+  }
+}
+
+sub extra_right_padding { EXTRA_RIGHT_PADDING }
+
+sub height {
+  my $self = shift;
+  $self->setup_fonts;
+
+  for my $track (@{$self->{tracks}}) {
+    $self->_expand_padding($track);
+  }
+
+  my $spacing           = $self->spacing;
+  my $key_height        = $self->format_key;
+  my $empty_track_style = $self->empty_track_style;
+  my $key_style         = $self->key_style;
+  my $bottom_key        = $key_style eq 'bottom';
+  my $between_key       = $key_style eq 'between';
+  my $side_key          = $key_style =~ /left|right/;
+  my $draw_empty        = $empty_track_style =~ /^(line|dashed)$/;
+  my $keyheight         = $self->{key_font}->height;
+  my $height = 0;
+  for my $track (@{$self->{tracks}}) {
+    my $draw_between =  $between_key && $track->option('key');
+    my $has_parts = $track->parts;
+    next if !$has_parts && ($empty_track_style eq 'suppress'
+		        or  $empty_track_style eq 'key' && $bottom_key);
+    $height += $keyheight if $draw_between;
+    $height += $self->spacing;
+    my $layout_height = $track->layout_height;
+    $height += ($side_key && $keyheight > $layout_height) ? $keyheight : $layout_height;
+  }
+
+  # get rid of spacing under last track
+  $height -= $self->spacing unless $bottom_key;
+  return $height + $key_height + $self->pad_top + $self->pad_bottom + 2;
+}
+
+sub setup_fonts {
+  my $self = shift;
+  return if ref $self->{key_font};
+
+  my $image_class = $self->image_class;
+  my $keyfont = $self->{key_font};
+  my $font_obj = $image_class->$keyfont;
+  $self->{key_font} = $font_obj;
+}
+
+sub gd {
+  my $self        = shift;
+  my $existing_gd = shift;
+
+  local $^W = 0;  # can't track down the uninitialized variable warning
+
+  return $self->{gd} if $self->{gd};
+
+  $self->setup_fonts;
+
+  unless ($existing_gd) {
+    my $image_class = $self->image_class;
+    eval "require $image_class; 1" or $self->throw($@);
+  }
+
+  my $height = $self->height;
+  my $width  = $self->width + $self->pad_left + $self->pad_right;
+
+  my $pkg = $self->image_package;
+  my $gd  = $existing_gd || $pkg->new($width,$height,
+				      ($self->{truecolor} && $pkg->can('isTrueColor') ? 1 : ())
+				     );
+  my %translation_table;
+  for my $name ('white','black',keys %COLORS) {
+    my $idx = $gd->colorAllocate(@{$COLORS{$name}});
+    $translation_table{$name} = $idx;
+  }
+
+  $self->{translations} = \%translation_table;
+  $self->{gd}           = $gd;
+  if ($self->bgcolor) {
+    $gd->fill(0,0,$self->bgcolor);
+  } elsif (eval {$gd->isTrueColor}) {
+    $gd->fill(0,0,$translation_table{'white'});
+  }
+
+  my $pl = $self->pad_left;
+  my $pt = $self->pad_top;
+  my $offset = $pt;
+  my $keyheight   = $self->{key_font}->height;
+  my $bottom_key  = $self->{key_style} eq 'bottom';
+  my $between_key = $self->{key_style} eq 'between';
+  my $left_key    = $self->{key_style} eq 'left';
+  my $right_key   = $self->{key_style} eq 'right';
+  my $empty_track_style = $self->empty_track_style;
+  my $spacing = $self->spacing;
+
+  # we draw in two steps, once for background of tracks, and once for
+  # the contents.  This allows the grid to sit on top of the track background.
+  for my $track (@{$self->{tracks}}) {
+    my $draw_between = $between_key && $track->option('key');
+    next if !$track->parts && ($empty_track_style eq 'suppress'
+			   or  $empty_track_style eq 'key' && $bottom_key);
+    $gd->filledRectangle($pl,
+			 $offset,
+			 $width-$self->pad_right,
+			 $offset+$track->layout_height
+			 + ($between_key ? $self->{key_font}->height : 0),
+			 $track->tkcolor)
+      if defined $track->tkcolor;
+    $offset += $keyheight if $draw_between;
+    $offset += $track->layout_height + $spacing;
+  }
+
+  $self->draw_background($gd,$self->{background})  if $self->{background};
+  $self->draw_grid($gd)                            if $self->{grid};
+  $self->draw_background($gd,$self->{postgrid})    if $self->{postgrid};
+
+  $offset = $pt;
+  for my $track (@{$self->{tracks}}) {
+    my $draw_between = $between_key && $track->option('key');
+    my $has_parts = $track->parts;
+    my $side_key_height = 0;
+
+    next if !$has_parts && ($empty_track_style eq 'suppress'
+			or  $empty_track_style eq 'key' && $bottom_key);
+
+    if ($draw_between) {
+      $offset += $self->draw_between_key($gd,$track,$offset);
+    }
+
+
+    $self->draw_empty($gd,$offset,$empty_track_style)
+      if !$has_parts && $empty_track_style=~/^(line|dashed)$/;
+
+    $track->draw($gd,$pl,$offset,0,1);
+
+    if ($self->{key_style} =~ /^(left|right)$/) {
+      $side_key_height = $self->draw_side_key($gd,$track,$offset,$self->{key_style});
+    }
+
+    $self->track_position($track,$offset);
+    my $layout_height = $track->layout_height;
+    $offset += ($side_key_height > $layout_height ? $side_key_height : $layout_height)+$spacing;
+  }
+
+
+  $self->draw_bottom_key($gd,$pl,$offset) if $self->{key_style} eq 'bottom';
+  return $self->{gd} = $gd;
+}
+
+
+# Package accessors
+# GD (and GD::SVG)'s new() resides in GD::Image
+sub image_class     { return shift->{image_class}; }
+sub image_package   { return shift->{image_package}; }
+sub polygon_package { return shift->{polygon_package}; }
+
+sub boxes {
+  my $self = shift;
+  my @boxes;
+  my $offset = 0;
+
+  $self->setup_fonts;
+
+  my $pl = $self->pad_left;
+  my $pt = $self->pad_top;
+  my $between_key       = $self->{key_style} eq 'between';
+  my $bottom_key        = $self->{key_style} eq 'bottom';
+  my $empty_track_style = $self->empty_track_style;
+  my $keyheight         = $self->{key_font}->height;
+  my $spacing = $self->spacing;
+
+  for my $track (@{$self->{tracks}}) {
+    my $draw_between =  $between_key && $track->option('key');
+    next if !$track->parts && ($empty_track_style eq 'suppress'
+			    or  $empty_track_style eq 'key' && $bottom_key);
+    $offset += $keyheight if $draw_between;
+    my $boxes = $track->boxes($pl,$offset+$pt);
+    $self->track_position($track,$offset);
+    push @boxes,@$boxes;
+    $offset += $track->layout_height + $self->spacing;
+  }
+  return wantarray ? @boxes : \@boxes;
+}
+
+sub track_position {
+  my $self  = shift;
+  my $track = shift;
+  my $d = $self->{_track_position}{$track};
+  $self->{_track_position}{$track} = shift if @_;
+  $d;
+}
+
+# draw the keys -- between
+sub draw_between_key {
+  my $self   = shift;
+  my ($gd,$track,$offset) = @_;
+  my $key = $self->track2key($track) or return 0;
+  my $x =   $self->{key_align} eq 'center' ? $self->width - (CORE::length($key) * $self->{key_font}->width)/2
+          : $self->{key_align} eq 'right'  ? $self->width - CORE::length($key)
+          : $self->pad_left;
+
+  # Key color hard-coded. Should be configurable for the control freaks.
+  my $color = $self->translate_color('black');
+  $gd->string($self->{key_font},$x,$offset,$key,$color);
+  $self->add_key_box($track,$key,$x,$offset);
+  return $self->{key_font}->height;
+}
+
+# draw the keys -- left or right side
+sub draw_side_key {
+  my $self   = shift;
+  my ($gd,$track,$offset,$side) = @_;
+  my $key = $self->track2key($track) or return;
+  my $pos = $side eq 'left' ? $self->pad_left - $self->{key_font}->width * CORE::length($key)-3
+                            : $self->pad_left + $self->width + EXTRA_RIGHT_PADDING;
+  my $color = $self->translate_color('black');
+  $gd->filledRectangle($pos,$offset,
+		 $pos+$self->{key_font}->width*CORE::length($key),$offset,#-$self->{key_font}->height)/2,
+		 $self->bgcolor);
+  $gd->string($self->{key_font},$pos,$offset,$key,$color);
+  $self->add_key_box($track,$key,$pos,$offset);
+  return $self->{key_font}->height;
+}
+
+# draw the keys -- bottom
+sub draw_bottom_key {
+  my $self = shift;
+  my ($gd,$left,$top) = @_;
+  my $key_glyphs = $self->{key_glyphs} or return;
+
+  my $color = $self->translate_color($self->{key_color});
+  $gd->filledRectangle($left,$top,$self->width - $self->pad_right,$self->height-$self->pad_bottom,$color);
+  my $text_color = $self->translate_color('black');
+  $gd->string($self->{key_font},$left,KEYPADTOP+$top,"KEY:",$text_color);
+  $top += $self->{key_font}->height + KEYPADTOP;
+  $_->draw($gd,$left,$top) foreach @$key_glyphs;
+}
+
+# Format the key section, and return its height
+sub format_key {
+  my $self = shift;
+  return 0 unless $self->key_style eq 'bottom';
+
+  return $self->{key_height} if defined $self->{key_height};
+
+  my $suppress = $self->{empty_track_style} eq 'suppress';
+  my $between  = $self->{key_style}         eq 'between';
+
+  if ($between) {
+    my @key_tracks = $suppress
+      ? grep {$_->option('key') && $_->parts} @{$self->{tracks}}
+      : grep {$_->option('key')} @{$self->{tracks}};
+    return $self->{key_height} = @key_tracks * $self->{key_font}->height;
+  }
+
+  elsif ($self->{key_style} eq 'bottom') {
+
+    my ($height,$width) = (0,0);
+    my %tracks;
+    my @glyphs;
+    local $self->{flip} = 0;  # don't want to worry about flipped keys!
+
+    # determine how many glyphs become part of the key
+    # and their max size
+    for my $track (@{$self->{tracks}}) {
+
+      next unless $track->option('key');
+      next if $suppress && !$track->parts;
+
+      my $glyph;
+      if (my @parts = $track->parts) {
+	$glyph = $parts[0]->keyglyph;
+      } else {
+	my $t = Bio::Graphics::Feature->new(-segments=>
+					    [Bio::Graphics::Feature->new(-start => $self->offset,
+									 -stop  => $self->offset+$self->length)]);
+	my $g = $track->factory->make_glyph(0,$t);
+	$glyph = $g->keyglyph;
+      }
+      next unless $glyph;
+
+
+      $tracks{$track} = $glyph;
+      my ($h,$w) = ($glyph->layout_height,
+		    $glyph->layout_width);
+      $height = $h if $h > $height;
+      $width  = $w if $w > $width;
+      push @glyphs,$glyph;
+
+    }
+
+    $width += $self->key_spacing;
+
+    # no key glyphs, no key
+    return $self->{key_height} = 0 unless @glyphs;
+
+    # now height and width hold the largest glyph, and $glyph_count
+    # contains the number of glyphs.  We will format them into a
+    # box that is roughly 3 height/4 width (golden mean)
+    my $rows = 0;
+    my $cols = 0;
+    my $maxwidth = $self->width - $self->pad_left - $self->pad_right;
+    while (++$rows) {
+      $cols = @glyphs / $rows;
+      $cols = int ($cols+1) if $cols =~ /\./;  # round upward for fractions
+      my $total_width  = $cols * $width;
+      my $total_height = $rows * $width;
+      last if $total_width < $maxwidth;
+    }
+
+    # move glyphs into row-major format
+    my $spacing = $self->key_spacing;
+    my $i = 0;
+    for (my $c = 0; $c < $cols; $c++) {
+      for (my $r = 0; $r < $rows; $r++) {
+	my $x = $c * ($width  + $spacing);
+	my $y = $r * ($height + $spacing);
+	next unless defined $glyphs[$i];
+	$glyphs[$i]->move($x,$y);
+	$i++;
+      }
+    }
+
+    $self->{key_glyphs} = \@glyphs;     # remember our key glyphs
+    # remember our key height
+    return $self->{key_height} =
+      ($height+$spacing) * $rows + $self->{key_font}->height +KEYPADTOP;
+  }
+
+  else {  # no known key style, neither "between" nor "bottom"
+    return $self->{key_height} = 0;
+  }
+}
+
+sub add_key_box {
+  my $self = shift;
+  my ($track,$label,$x,$y) = @_;
+  my $value = [$label,$x,$y,$x+$self->{key_font}->width*CORE::length($label),$y+$self->{key_font}->height,$track];
+  push @{$self->{key_boxes}},$value;
+}
+
+sub key_boxes {
+  my $ref  = shift->{key_boxes};
+  return wantarray ? @$ref : $ref;
+}
+
+sub add_category_labels {
+  my $self = shift;
+  my $d    = $self->{add_category_labels};
+  $self->{add_category_labels} = shift if @_;
+  $d;
+}
+
+sub track2key {
+  my $self = shift;
+  my $track = shift;
+  return $track->make_key_name();
+}
+
+sub draw_empty {
+  my $self  = shift;
+  my ($gd,$offset,$style) = @_;
+  $offset  += $self->spacing/2;
+  my $left  = $self->pad_left;
+  my $right = $self->width-$self->pad_right;
+  my $color = $self->translate_color(MISSING_TRACK_COLOR);
+  my $ic    = $self->image_class;
+  if ($style eq 'dashed') {
+    $gd->setStyle($color,$color,$ic->gdTransparent(),$ic->gdTransparent());
+    $gd->line($left,$offset,$right,$offset,$ic->gdStyled());
+  } else {
+    $gd->line($left,$offset,$right,$offset,$color);
+  }
+  $offset;
+}
+
+# draw a grid
+sub draw_grid {
+  my $self = shift;
+  my $gd = shift;
+
+  my $gridcolor = $self->translate_color($self->{gridcolor});
+  my @positions;
+  if (ref $self->{grid} eq 'ARRAY') {
+    @positions = @{$self->{grid}};
+  } else {
+    my ($major,$minor) = $self->ticks;
+    my $first_tick = $minor * int($self->start/$minor);
+    for (my $i = $first_tick-1; $i <= $self->end+1; $i += $minor) {
+      push @positions,$i;
+    }
+  }
+  my $pl = $self->pad_left;
+  my $pt = $self->extend_grid ? 0 : $self->pad_top;
+  my $pr = $self->right;
+  my $pb = $self->extend_grid ? $self->height : $self->height - $self->pad_bottom;
+  my $offset = $self->{offset}+$self->{length}+1;
+  for my $tick (@positions) {
+    my ($pos) = $self->map_pt($self->{flip} ? $offset - $tick
+                                            : $tick);
+
+    $gd->line($pl+$pos,$pt,$pl+$pos,$pb,$gridcolor);
+  }
+}
+
+# draw an image (or invoke a drawing routine)
+sub draw_background {
+  my $self = shift;
+  my ($gd,$image_or_routine) = @_;
+  if (ref $image_or_routine eq 'CODE') {
+    return $image_or_routine->($gd,$self);
+  }
+  if (-f $image_or_routine) { # a file to draw
+    my $method = $image_or_routine =~ /\.png$/i   ? 'newFromPng'
+               : $image_or_routine =~ /\.jpe?g$/i ? 'newFromJpeg'
+               : $image_or_routine =~ /\.gd$/i    ? 'newFromGd'
+               : $image_or_routine =~ /\.gif$/i   ? 'newFromGif'
+               : $image_or_routine =~ /\.xbm$/i   ? 'newFromXbm'
+	       : '';
+    return unless $method;
+    my $image = eval {$self->image_package->$method($image_or_routine)};
+    unless ($image) {
+      warn $@;
+      return;
+    }
+    my ($src_width,$src_height) = $image->getBounds;
+    my ($dst_width,$dst_height) = $gd->getBounds;
+    # tile the thing on
+    for (my $x = 0; $x < $dst_width; $x += $src_width) {
+      for (my $y = 0; $y < $dst_height; $y += $src_height) {
+	$gd->copy($image,$x,$y,0,0,$src_width,$src_height);
+      }
+    }
+  }
+}
+
+# calculate major and minor ticks, given a start position
+sub ticks {
+  my $self = shift;
+  my ($length,$minwidth) = @_;
+
+  my $img = $self->image_class;
+  $length   = $self->{length}             unless defined $length;
+  $minwidth = $img->gdSmallFont->width*7  unless defined $minwidth;
+
+  my ($major,$minor);
+
+  # figure out tick mark scale
+  # we want no more than 1 major tick mark every 40 pixels
+  # and enough room for the labels
+  my $scale = $self->scale;
+
+  my $interval = 1;
+
+  while (1) {
+    my $pixels = $interval * $scale;
+    last if $pixels >= $minwidth;
+    $interval *= 10;
+  }
+
+  # to make sure a major tick shows up somewhere in the first half
+  #
+  # $interval *= .5 if ($interval > 0.5*$length);
+
+  return ($interval,$interval/10);
+}
+
+# reverse of translate(); given index, return rgb triplet
+sub rgb {
+  my $self = shift;
+  my $idx  = shift;
+  my $gd = $self->{gd} or return;
+  return $gd->rgb($idx);
+}
+
+sub translate_color {
+  my $self = shift;
+  my @colors = @_;
+  if (@colors == 3) {
+    my $gd = $self->gd or return 1;
+    return $self->colorClosest($gd, at colors);
+  }
+  elsif ($colors[0] =~ /^\#([0-9A-F]{2})([0-9A-F]{2})([0-9A-F]{2})$/i) {
+    my $gd = $self->gd or return 1;
+    my ($r,$g,$b) = (hex($1),hex($2),hex($3));
+    return $self->colorClosest($gd,$r,$g,$b);
+  }
+  else {
+    my $color = $colors[0];
+    my $table = $self->{translations} or return 1;
+    return defined $table->{$color} ? $table->{$color} : 1;
+  }
+}
+
+# workaround for bad GD
+sub colorClosest {
+  my ($self,$gd, at c) = @_;
+  return $self->{closestcache}{"@c"} if exists $self->{closestcache}{"@c"};
+  return $self->{closestcache}{"@c"} = $gd->colorResolve(@c) if $GD::VERSION < 2.04;
+
+  my $index = $gd->colorResolve(@c);
+  return $self->{closestcache}{"@c"} = $index if $index >= 0;
+
+  my $value;
+  for (keys %COLORS) {
+    my ($r,$g,$b) = @{$COLORS{$_}};
+    my $dist = ($r-$c[0])**2 + ($g-$c[1])**2 + ($b-$c[2])**2;
+    ($value,$index) = ($dist,$_) if !defined($value) || $dist < $value;
+  }
+  return $self->{closestcache}{"@c"} = $self->{translations}{$index};
+}
+
+sub bgcolor {
+   my $self = shift;
+   return unless $self->{bgcolor};
+   $self->translate_color($self->{bgcolor});
+}
+
+sub set_pen {
+  my $self = shift;
+  my ($linewidth,$color) = @_;
+  return $self->{pens}{$linewidth,$color} if $self->{pens}{$linewidth,$color};
+  my $gd = $self->{gd};
+  my $pkg = $self->image_package;
+  my $pen = $self->{pens}{$linewidth} = $pkg->new($linewidth,$linewidth);
+  my @rgb = $self->rgb($color);
+  my $bg = $pen->colorAllocate(255,255,255);
+  my $fg = $pen->colorAllocate(@rgb);
+  $pen->fill(0,0,$fg);
+  $gd->setBrush($pen);
+  return $self->image_class->gdBrushed();
+}
+
+sub png {
+  my $gd = shift->gd;
+  $gd->png;
+}
+
+sub svg {
+  my $gd = shift->gd;
+  $gd->svg;
+}
+
+
+# WARNING: THIS STUFF IS COPIED FROM Bio::Graphics::Browser.pm AND
+# Bio::Graphics::FeatureFile AND MUST BE REFACTORED
+# write a png image to disk and generate an image map in a convenient
+# CGIish way.
+sub image_and_map {
+  my $self        = shift;
+  my %args        = @_;
+  my $link_rule   = $args{-link}    || $self->{linkrule};
+  my $title_rule  = $args{-title}   || $self->{titlerule};
+  my $target_rule = $args{-target}  || $self->{targetrule};
+  my $tmpurl      = $args{-url}     || '/tmp';
+  my $docroot     = $args{-root}    || $ENV{DOCUMENT_ROOT} || '';
+  my $mapname     = $args{-mapname} || $IMAGEMAP++;
+  $docroot       .= '/' if $docroot && $docroot !~ m!/$!;
+
+  # get rid of any netstat part please
+  (my $tmpurlbase = $tmpurl) =~ s!^\w+://[^/]+!!;
+
+  my $tmpdir    = "${docroot}${tmpurlbase}";
+
+  my $url       = $self->create_web_image($tmpurl,$tmpdir);
+  my $map       = $self->create_web_map($mapname,$link_rule,$title_rule,$target_rule);
+  return ($url,$map,$mapname);
+}
+
+sub create_web_image {
+  my $self             = shift;
+  my ($tmpurl,$tmpdir) = @_;
+
+  # create directory if it isn't there already
+  # we need to untaint tmpdir before calling mkpath()
+  return unless $tmpdir =~ /^(.+)$/;
+  my $path = $1;
+  unless (-d $path) {
+    require File::Path unless defined &File::Path::mkpath;
+    File::Path::mkpath($path,0,0777) or $self->throw("Couldn't create temporary image directory $path: $!");
+  }
+
+  unless (defined &Digest::MD5::md5_hex) {
+    eval "require Digest::MD5; 1"
+      or $self->throw("Sorry, but the image_and_map() method requires the Digest::MD5 module.");
+  }
+  my $data      = $self->png;
+  my $signature = Digest::MD5::md5_hex($data);
+  my $extension = 'png';
+
+  # untaint signature for use in open
+  $signature =~ /^([0-9A-Fa-f]+)$/g or return;
+  $signature = $1;
+
+  my $url         = sprintf("%s/%s.%s",$tmpurl,$signature,$extension);
+  my $imagefile   = sprintf("%s/%s.%s",$tmpdir,$signature,$extension);
+
+  open (my $F,">", $imagefile) || $self->throw("Can't open image file $imagefile for writing: $!\n");
+  binmode($F);
+  print $F $data;
+
+  return $url;
+}
+
+sub create_web_map {
+  my $self     = shift;
+  my ($name,$linkrule,$titlerule,$targetrule) = @_;
+  $name ||= 'map';
+  my $boxes    = $self->boxes;
+  my (%track2link,%track2title,%track2target);
+
+  my $map = qq(<map name="$name" id="$name">\n);
+  foreach (@$boxes){
+    my ($feature,$left,$top,$right,$bottom,$track) = @$_;
+    next unless $feature->can('primary_tag');
+
+    my $lr  = $track2link{$track} ||= (defined $track->option('link') ? $track->option('link') : $linkrule);
+    next unless   $lr;
+
+    my $tr   = exists $track2title{$track} 
+      ? $track2title{$track}
+      : $track2title{$track} ||= (defined $track->option('title')  ? $track->option('title')  : $titlerule);
+    my $tgr  = exists $track2target{$track} 
+      ? $track2target{$track}
+      : $track2target{$track} ||= (defined $track->option('target')? $track->option('target')  : $targetrule);
+
+    my $href   = $self->make_link($lr,$feature);
+    my $alt    = $self->make_link($tr,$feature);
+    my $target = $self->make_link($tgr,$feature);
+    $alt       = $self->make_title($feature) unless defined $alt;
+
+    my $a      = $alt    ? qq(title="$alt" alt="$alt") : '';
+    my $t      = $target ? qq(target="$target")        : '';
+    $map .= qq(<area shape="rect" coords="$left,$top,$right,$bottom" href="$href" $a $t/>\n);
+  }
+  $map .= "</map>\n";
+  $map;
+}
+
+sub make_link {
+  my $self = shift;
+  my ($linkrule,$feature) = @_;
+  eval "require Bio::Graphics::FeatureFile;1"
+    unless Bio::Graphics::FeatureFile->can('link_pattern');
+  return Bio::Graphics::FeatureFile->link_pattern($linkrule,$feature,$self);
+}
+
+sub make_title {
+  my $self = shift;
+  my $feature = shift;
+  eval "require Bio::Graphics::FeatureFile;1"
+    unless Bio::Graphics::FeatureFile->can('make_title');
+  return Bio::Graphics::FeatureFile->make_title($feature);
+}
+
+sub read_colors {
+  my $class = shift;
+  lock %COLORS;
+  local ($/) = "\n";
+  while (<DATA>) {
+    chomp;
+    last if /^__END__/;
+    my ($name,$r,$g,$b) = split /\s+/;
+    @{$COLORS{$name}} = (hex $r,hex $g, hex $b);
+  }
+}
+
+sub color_name_to_rgb {
+  my $class = shift;
+  my $color_name  = shift;
+  $class->read_colors() unless %COLORS;
+  return unless $COLORS{$color_name};
+  return wantarray ? @{$COLORS{$color_name}}
+                   : $COLORS{$color_name};
+}
+
+sub color_names {
+    my $class = shift;
+    $class->read_colors unless %COLORS;
+    return wantarray ? keys %COLORS : [keys %COLORS];
+}
+
+sub finished {
+    my $self = shift;
+    for my $track (@{$self->{tracks} || []}) {
+	$track->finished();
+    }
+    delete $self->{tracks};
+}
+
+1;
+
+__DATA__
+white                FF           FF            FF
+black                00           00            00
+aliceblue            F0           F8            FF
+antiquewhite         FA           EB            D7
+aqua                 00           FF            FF
+aquamarine           7F           FF            D4
+azure                F0           FF            FF
+beige                F5           F5            DC
+bisque               FF           E4            C4
+blanchedalmond       FF           EB            CD
+blue                 00           00            FF
+blueviolet           8A           2B            E2
+brown                A5           2A            2A
+burlywood            DE           B8            87
+cadetblue            5F           9E            A0
+chartreuse           7F           FF            00
+chocolate            D2           69            1E
+coral                FF           7F            50
+cornflowerblue       64           95            ED
+cornsilk             FF           F8            DC
+crimson              DC           14            3C
+cyan                 00           FF            FF
+darkblue             00           00            8B
+darkcyan             00           8B            8B
+darkgoldenrod        B8           86            0B
+darkgray             A9           A9            A9
+darkgreen            00           64            00
+darkkhaki            BD           B7            6B
+darkmagenta          8B           00            8B
+darkolivegreen       55           6B            2F
+darkorange           FF           8C            00
+darkorchid           99           32            CC
+darkred              8B           00            00
+darksalmon           E9           96            7A
+darkseagreen         8F           BC            8F
+darkslateblue        48           3D            8B
+darkslategray        2F           4F            4F
+darkturquoise        00           CE            D1
+darkviolet           94           00            D3
+deeppink             FF           14            100
+deepskyblue          00           BF            FF
+dimgray              69           69            69
+dodgerblue           1E           90            FF
+firebrick            B2           22            22
+floralwhite          FF           FA            F0
+forestgreen          22           8B            22
+fuchsia              FF           00            FF
+gainsboro            DC           DC            DC
+ghostwhite           F8           F8            FF
+gold                 FF           D7            00
+goldenrod            DA           A5            20
+gray                 80           80            80
+grey                 80           80            80
+green                00           80            00
+greenyellow          AD           FF            2F
+honeydew             F0           FF            F0
+hotpink              FF           69            B4
+indianred            CD           5C            5C
+indigo               4B           00            82
+ivory                FF           FF            F0
+khaki                F0           E6            8C
+lavender             E6           E6            FA
+lavenderblush        FF           F0            F5
+lawngreen            7C           FC            00
+lemonchiffon         FF           FA            CD
+lightblue            AD           D8            E6
+lightcoral           F0           80            80
+lightcyan            E0           FF            FF
+lightgoldenrodyellow FA           FA            D2
+lightgreen           90           EE            90
+lightgrey            D3           D3            D3
+lightpink            FF           B6            C1
+lightsalmon          FF           A0            7A
+lightseagreen        20           B2            AA
+lightskyblue         87           CE            FA
+lightslategray       77           88            99
+lightsteelblue       B0           C4            DE
+lightyellow          FF           FF            E0
+lime                 00           FF            00
+limegreen            32           CD            32
+linen                FA           F0            E6
+magenta              FF           00            FF
+maroon               80           00            00
+mediumaquamarine     66           CD            AA
+mediumblue           00           00            CD
+mediumorchid         BA           55            D3
+mediumpurple         100          70            DB
+mediumseagreen       3C           B3            71
+mediumslateblue      7B           68            EE
+mediumspringgreen    00           FA            9A
+mediumturquoise      48           D1            CC
+mediumvioletred      C7           15            85
+midnightblue         19           19            70
+mintcream            F5           FF            FA
+mistyrose            FF           E4            E1
+moccasin             FF           E4            B5
+navajowhite          FF           DE            AD
+navy                 00           00            80
+oldlace              FD           F5            E6
+olive                80           80            00
+olivedrab            6B           8E            23
+orange               FF           A5            00
+orangered            FF           45            00
+orchid               DA           70            D6
+palegoldenrod        EE           E8            AA
+palegreen            98           FB            98
+paleturquoise        AF           EE            EE
+palevioletred        DB           70            100
+papayawhip           FF           EF            D5
+peachpuff            FF           DA            B9
+peru                 CD           85            3F
+pink                 FF           C0            CB
+plum                 DD           A0            DD
+powderblue           B0           E0            E6
+purple               80           00            80
+red                  FF           00            00
+rosybrown            BC           8F            8F
+royalblue            41           69            E1
+saddlebrown          8B           45            13
+salmon               FA           80            72
+sandybrown           F4           A4            60
+seagreen             2E           8B            57
+seashell             FF           F5            EE
+sienna               A0           52            2D
+silver               C0           C0            C0
+skyblue              87           CE            EB
+slateblue            6A           5A            CD
+slategray            70           80            90
+snow                 FF           FA            FA
+springgreen          00           FF            7F
+steelblue            46           82            B4
+tan                  D2           B4            8C
+teal                 00           80            80
+thistle              D8           BF            D8
+tomato               FF           63            47
+turquoise            40           E0            D0
+violet               EE           82            EE
+wheat                F5           DE            B3
+whitesmoke           F5           F5            F5
+yellow               FF           FF            00
+yellowgreen          9A           CD            32
+gradient1	00 ff 00
+gradient2	0a ff 00
+gradient3	14 ff 00
+gradient4	1e ff 00
+gradient5	28 ff 00
+gradient6	32 ff 00
+gradient7	3d ff 00
+gradient8	47 ff 00
+gradient9	51 ff 00
+gradient10	5b ff 00
+gradient11	65 ff 00
+gradient12	70 ff 00
+gradient13	7a ff 00
+gradient14	84 ff 00
+gradient15	8e ff 00
+gradient16	99 ff 00
+gradient17	a3 ff 00
+gradient18	ad ff 00
+gradient19	b7 ff 00
+gradient20	c1 ff 00
+gradient21	cc ff 00
+gradient22	d6 ff 00
+gradient23	e0 ff 00
+gradient24	ea ff 00
+gradient25	f4 ff 00
+gradient26	ff ff 00
+gradient27	ff f4 00
+gradient28	ff ea 00
+gradient29	ff e0 00
+gradient30	ff d6 00
+gradient31	ff cc 00
+gradient32	ff c1 00
+gradient33	ff b7 00
+gradient34	ff ad 00
+gradient35	ff a3 00
+gradient36	ff 99 00
+gradient37	ff 8e 00
+gradient38	ff 84 00
+gradient39	ff 7a 00
+gradient40	ff 70 00
+gradient41	ff 65 00
+gradient42	ff 5b 00
+gradient43	ff 51 00
+gradient44	ff 47 00
+gradient45	ff 3d 00
+gradient46	ff 32 00
+gradient47	ff 28 00
+gradient48	ff 1e 00
+gradient49	ff 14 00
+gradient50	ff 0a 00
+__END__
+
+=head1 NAME
+
+Bio::Graphics::Panel - Generate GD images of Bio::Seq objects
+
+=head1 SYNOPSIS
+
+ # This script parses a GenBank or EMBL file named on the command
+ # line and produces a PNG rendering of it.  Call it like this:
+ # render.pl my_file.embl | display -
+
+ use strict;
+ use Bio::Graphics;
+ use Bio::SeqIO;
+
+ my $file = shift                       or die "provide a sequence file as the argument";
+ my $io = Bio::SeqIO->new(-file=>$file) or die "could not create Bio::SeqIO";
+ my $seq = $io->next_seq                or die "could not find a sequence in the file";
+
+ my @features = $seq->all_SeqFeatures;
+
+ # sort features by their primary tags
+ my %sorted_features;
+ for my $f (@features) {
+   my $tag = $f->primary_tag;
+   push @{$sorted_features{$tag}},$f;
+ }
+
+ my $panel = Bio::Graphics::Panel->new(
+                                      -length    => $seq->length,
+ 				      -key_style => 'between',
+ 				      -width     => 800,
+ 				      -pad_left  => 10,
+ 				      -pad_right => 10,
+ 				      );
+ $panel->add_track( arrow => Bio::SeqFeature::Generic->new(-start=>1,
+                                                           -end=>$seq->length),
+ 		  -bump => 0,
+ 		  -double=>1,
+ 		  -tick => 2);
+ $panel->add_track(generic => Bio::SeqFeature::Generic->new(-start=>1,
+							  -end=>$seq->length),
+ 		  -glyph  => 'generic',
+ 		  -bgcolor => 'blue',
+ 		  -label  => 1,
+ 		 );
+
+ # general case
+ my @colors = qw(cyan orange blue purple green chartreuse magenta yellow aqua);
+ my $idx    = 0;
+ for my $tag (sort keys %sorted_features) {
+   my $features = $sorted_features{$tag};
+   $panel->add_track($features,
+ 		    -glyph    =>  'generic',
+ 		    -bgcolor  =>  $colors[$idx++ % @colors],
+ 		    -fgcolor  => 'black',
+ 		    -font2color => 'red',
+ 		    -key      => "${tag}s",
+ 		    -bump     => +1,
+ 		    -height   => 8,
+ 		    -label    => 1,
+ 		    -description => 1,
+ 		   );
+ }
+
+ print $panel->png;
+ $panel->finished;
+
+ exit 0;
+
+=head1 DESCRIPTION
+
+The Bio::Graphics::Panel class provides drawing and formatting
+services for any object that implements the Bio::SeqFeatureI
+interface, including Ace::Sequence::Feature and Das::Segment::Feature
+objects.  It can be used to draw sequence annotations, physical
+(contig) maps, or any other type of map in which a set of discrete
+ranges need to be laid out on the number line.
+
+The module supports a drawing style in which each type of feature
+occupies a discrete "track" that spans the width of the display.  Each
+track will have its own distinctive "glyph", a configurable graphical
+representation of the feature.
+
+The module also supports a more flexible style in which several
+different feature types and their associated glyphs can occupy the
+same track.  The choice of glyph is under run-time control.
+
+Semantic zooming (for instance, changing the type of glyph depending
+on the density of features) is supported by a callback system for
+configuration variables.  The module has built-in support for Bio::Das
+stylesheets, and stylesheet-driven configuration can be intermixed
+with semantic zooming, if desired.
+
+You can add a key to the generated image using either of two key
+styles.  One style places the key captions at the top of each track.
+The other style generates a graphical key at the bottom of the image.
+
+Note that this module depends on GD. The optional SVG output depends
+on GD::SVG and SVG.
+
+=head1 METHODS
+
+This section describes the class and object methods for
+Bio::Graphics::Panel.
+
+Typically you will begin by creating a new Bio::Graphics::Panel
+object, passing it the desired width of the image to generate and an
+origin and length describing the coordinate range to display.  The
+Bio::Graphics::Panel-E<gt>new() method has may configuration variables
+that allow you to control the appearance of the image.
+
+You will then call add_track() one or more times to add sets of
+related features to the picture.  add_track() places a new horizontal
+track on the image, and is likewise highly configurable.  When you
+have added all the features you desire, you may call png() to convert
+the image into a PNG-format image, or boxes() to return coordinate
+information that can be used to create an imagemap.
+
+=head2 CONSTRUCTORS
+
+new() is the constructor for Bio::Graphics::Panel:
+
+=over 4
+
+=item $panel = Bio::Graphics::Panel-E<gt>new(@options)
+
+The new() method creates a new panel object.  The options are
+a set of tag/value pairs as follows:
+
+  Option      Value                                  Default
+  ------      -----                                  -------
+
+  -offset     Base pair to place at extreme left     none
+	      of image, in zero-based coordinates
+
+  -length     Length of sequence segment, in bp      none
+
+  -start      Start of range, in 1-based             none
+              coordinates.
+
+  -stop       Stop of range, in 1-based              none
+	      coordinates.
+
+  -end        Same as -stop.
+
+  -segment    A Bio::SeqI or Das::Segment            none
+              object, used to derive sequence
+	      range if not otherwise specified.
+
+  -width      Desired width of image, in pixels      600
+
+  -spacing    Spacing between tracks, in pixels      5
+
+  -pad_top    Additional whitespace between top      0
+	      of image and contents, in pixels
+
+  -pad_bottom Additional whitespace between top      0
+	      of image and bottom, in pixels
+
+  -pad_left   Additional whitespace between left     0
+	      of image and contents, in pixels
+
+  -pad_right  Additional whitespace between right    0
+	      of image and bottom, in pixels
+
+  -bgcolor    Background color for the panel as a    white
+	      whole
+
+  -key_color  Background color for the key printed   wheat
+              at bottom of panel (if any)
+
+  -key_spacing Spacing between key glyphs in the     10
+               key printed at bottom of panel
+               (if any)
+
+  -key_font    Font to use in printed key            gdMediumBoldFont
+	       captions.
+
+  -key_style   Whether to print key at bottom of     none
+	       panel ("bottom"), between each
+	       track ("between"), to the left of
+               each track ("left"), to the right
+               of each track ("right") or
+               not at all ("none").
+
+  -add_category_labels                               false
+               Whether to add the "category" to
+               the track key. The category is
+               an optional argument that can
+               be attached to each track. If
+               a category is present, and this
+               option is true, then the category
+               will be added to the track label
+               in parentheses. For example, if
+               -key is "Protein matches" and
+               -category is "vertebrate", then
+               the track will be labeled
+               "Protein matches (vertebrate)".
+
+  -auto_pad    If "left" or "right" keys are in use  true
+               then setting auto_pad to a true value
+               will allow the panel to adjust its
+               width in order to accomodate the
+               length of the longest key.
+
+  -empty_tracks What to do when a track is empty.    suppress
+              Options are to suppress the track
+              completely ("suppress"), to show just
+              the key in "between" mode ("key"),
+              to draw a thin grey line ("line"),
+              or to draw a dashed line ("dashed").
+
+  -flip       flip the drawing coordinates left     false
+              to right, so that lower coordinates
+              are to the right.  This can be
+              useful for drawing (-) strand
+              features.
+
+  -all_callbacks Whether to invoke callbacks on      false
+               the automatic "track" and "group"
+               glyphs.
+
+  -grid        Whether to draw a vertical grid in    false
+               the background.  Pass a scalar true
+               value to have a grid drawn at
+               regular intervals (corresponding
+               to the minor ticks of the arrow
+	       glyph).  Pass an array reference
+               to draw the grid at the specified
+               positions.
+
+  -gridcolor   Color of the grid                     lightcyan
+
+  -extend_grid If true, extend the grid into the pad false
+               top and pad_bottom regions
+
+  -background  An image or callback to use for the   none
+               background of the image. Will be
+               invoked I<before> drawing the grid.
+
+  -postgrid    An image or callback to use for the   none
+               background of the image.  Will be 
+               invoked I<after> drawing the grid.
+
+  -truecolor   Create a truecolor (24-bit) image.    false
+               Useful when working with the
+               "image" glyph.
+
+  -image_class To create output in scalable vector
+               graphics (SVG), optionally pass the image
+               class parameter 'GD::SVG'. Defaults to
+               using vanilla GD. See the corresponding
+               image_class() method below for details.
+
+  -link, -title, -target
+               These options are used when creating imagemaps
+               for display on the web.  See L</"Creating Imagemaps">.
+
+
+Typically you will pass new() an object that implements the
+Bio::RangeI interface, providing a length() method, from which the
+panel will derive its scale.
+
+  $panel = Bio::Graphics::Panel->new(-segment => $sequence,
+				     -width   => 800);
+
+new() will return undef in case of an error.
+
+Note that if you use the "left" or "right" key styles, you are
+responsible for allocating sufficient -pad_left or -pad_right room for
+the labels to appear.  The necessary width is the number of characters
+in the longest key times the font width (gdMediumBoldFont by default)
+plus 3 pixels of internal padding.  The simplest way to calculate this
+is to iterate over the possible track labels, find the largest one,
+and then to compute its width using the formula:
+
+  $width = gdMediumBoldFont->width * length($longest_key) +3;
+
+In order to obtain scalable vector graphics (SVG) output, you should
+pass new() the -image_class=E<gt>'GD::SVG' parameter. This will cause
+Bio::Graphics::Panel to load the optional GD::SVG module. See the gd()
+and svg() methods below for additional information.
+
+You can tile an image onto the panel either before or after it draws
+the grid. Simply provide the filename of the image in the -background
+or -postgrid options. The image file must be of type PNG, JPEG, XBM or
+GIF and have a filename ending in .png, .jpg, .jpeg, .xbm or .gif.
+
+You can also pass a code ref for the -background or -postgrid option,
+in which case the subroutine will be invoked at the appropriate time
+with the GD::Image object and the Panel object as its two arguments.
+You can then use the panel methods to map base pair coordinates into
+pixel coordinates and do some custom drawing.  For example, this code
+fragment will draw a gray rectangle between bases 500 and 600 to
+indicate a "gap" in the sequence:
+
+  my $panel = Bio::Graphics::Panel->new(-segment=>$segment,
+                                        -grid=>1,
+                                        -width=>600,
+                                        -postgrid=> \&draw_gap);
+  sub gap_it {
+     my $gd    = shift;
+     my $panel = shift;
+     my ($gap_start,$gap_end) = $panel->location2pixel(500,600);
+     my $top                  = $panel->top;
+     my $bottom               = $panel->bottom;
+     my $gray                 = $panel->translate_color('gray');
+     $gd->filledRectangle($gap_start,$top,$gap_end,$bottom,$gray);
+}
+
+
+=back
+
+=head2 OBJECT METHODS
+
+=over 4
+
+=item $track = $panel-E<gt>add_track($glyph,$features, at options)
+
+The add_track() method adds a new track to the image. 
+
+Tracks are horizontal bands which span the entire width of the panel.
+Each track contains a number of graphical elements called "glyphs",
+corresponding to a sequence feature. 
+
+There are a large number of glyph types.  By default, each track will
+be homogeneous on a single glyph type, but you can mix several glyph
+types on the same track by providing a code reference to the -glyph
+argument.  Other options passed to add_track() control the color and
+size of the glyphs, whether they are allowed to overlap, and other
+formatting attributes.  The height of a track is determined from its
+contents and cannot be directly influenced.
+
+The first two arguments are the glyph name and an array reference
+containing the list of features to display.  The order of the
+arguments is irrelevant, allowing either of these idioms:
+
+  $panel->add_track(arrow => \@features);
+  $panel->add_track(\@features => 'arrow');
+
+
+The glyph name indicates how each feature is to be rendered.  A
+variety of glyphs are available, and the number is growing. You may
+omit the glyph name entirely by providing a B<-glyph> argument among
+ at options, as described below.
+
+Currently, the following glyphs are available:
+
+  Name        Description
+  ----        -----------
+
+  anchored_arrow
+              a span with vertical bases |---------|.  If one or
+              the other end of the feature is off-screen, the base
+              will be replaced by an arrow.
+
+  arrow	      An arrow; can be unidirectional or bidirectional.
+	      It is also capable of displaying a scale with
+	      major and minor tickmarks, and can be oriented
+	      horizontally or vertically.
+
+  cds         Draws CDS features, using the phase information to
+              show the reading frame usage.  At high magnifications
+              draws the protein translation.
+
+  crossbox    A box with a big "X" inside it.
+
+  diamond     A diamond, useful for point features like SNPs.
+
+  dna         At high magnification draws the DNA sequence.  At
+              low magnifications draws the GC content.
+
+  dot         A circle, useful for point features like SNPs, stop
+              codons, or promoter elements.
+
+  ellipse     An oval.
+
+  extending_arrow
+              Similar to arrow, but a dotted line indicates when the
+              feature extends beyond the end of the canvas.
+
+  generic     A filled rectangle, nondirectional.
+
+  graded_segments
+              Similar to segments, but the intensity of the color
+              is proportional to the score of the feature.  This
+              is used for showing the intensity of blast hits or
+              other alignment features.
+
+  group	      A group of related features connected by a dashed line.
+	      This is used internally by Panel.
+
+  image	      A pixmap image that will be layered on top of the graphic.
+
+  heterogeneous_segments
+              Like segments, but you can use the source field of the feature
+              to change the color of each segment.
+
+  line        A simple line.
+
+  pinsertion  A triangle designed to look like an insertion location
+              (e.g. a transposon insertion).
+
+  processed_transcript  multi-purpose representation of a spliced mRNA, including
+			positions of UTRs
+
+  primers     Two inward pointing arrows connected by a line.
+	      Used for STSs.
+
+  redgreen_box A box that changes from green->yellow->red as the score
+              of the feature increases from 0.0 to 1.0.  Useful for
+              representing microarray results.
+
+  rndrect     A round-cornered rectangle.
+
+  segments    A set of filled rectangles connected by solid lines.
+	      Used for interrupted features, such as gapped
+	      alignments.
+
+  ruler_arrow An arrow with major and minor tick marks and interval
+              labels.
+
+  toomany     Tries to show many features as a cloud.  Not very successful.
+
+  track	      A group of related features not connected by a line.
+	      This is used internally by Panel.
+
+  transcript  Similar to segments, but the connecting line is
+	      a "hat" shape, and the direction of transcription
+	      is indicated by a small arrow.
+
+  transcript2  Similar to transcript, but the direction of
+              transcription is indicated by a terminal exon
+              in the shape of an arrow.
+
+  translation 1, 2 and 3-frame translations.  At low magnifications,
+              can be configured to show start and stop codon locations.
+              At high magnifications, shows the multi-frame protein
+              translation.
+
+  triangle    A triangle whose width and orientation can be altered.
+
+  xyplot      Histograms and other graphs plotted against the genome.
+
+  whiskerplot Box and whisker plot for statistical data
+
+If the glyph name is omitted from add_track(), the "generic" glyph
+will be used by default.  To get more information about a glyph, run
+perldoc on "Bio::Graphics::Glyph::glyphname", replacing "glyphname"
+with the name of the glyph you are interested in.
+
+The @options array is a list of name/value pairs that control the
+attributes of the track.  Some options are interpretered directly by
+the track.  Others are passed down to the individual glyphs (see
+L<"GLYPH OPTIONS">).  The following options are track-specific:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -tkcolor    Track color                  white
+
+  -glyph      Glyph class to use.         "generic"
+
+  -stylesheet Bio::Das::Stylesheet to     none
+              use to generate glyph
+	      classes and options.
+
+B<-tkcolor> controls the background color of the track as a whole.
+
+B<-glyph> controls the glyph type.  If present, it supersedes the
+glyph name given in the first or second argument to add_track().  The
+value of B<-glyph> may be a constant string, a hash reference, or a
+code reference.  In the case of a constant string, that string will be
+used as the class name for all generated glyphs.  If a hash reference
+is passed, then the feature's primary_tag() will be used as the key to
+the hash, and the value, if any, used to generate the glyph type.  If
+a code reference is passed, then this callback will be passed each
+feature in turn as its single argument.  The callback is expected to
+examine the feature and return a glyph name as its single result.
+
+Example:
+
+  $panel->add_track(\@exons,
+		    -glyph => sub { my $feature = shift;
+                                    $feature->source_tag eq 'curated'
+                                          ? 'ellipse' : 'generic'; }
+                    );
+
+The B<-stylesheet> argument is used to pass a Bio::Das stylesheet
+object to the panel.  This stylesheet will be called to determine both
+the glyph and the glyph options.  If both a stylesheet and direct
+options are provided, the latter take precedence.
+
+If successful, add_track() returns an Bio::Graphics::Glyph object.
+You can use this object to add additional features or to control the
+appearance of the track with greater detail, or just ignore it.
+Tracks are added in order from the top of the image to the bottom.  To
+add tracks to the top of the image, use unshift_track().
+
+B<Adding groups of features:> It is not uncommon to add a group of
+features which are logically connected, such as the 5' and 3' ends of
+EST reads.  To group features into sets that remain on the same
+horizontal position and bump together, pass the sets as an anonymous
+array.  For example:
+
+  $panel->add_track(segments => [[$abc_5,$abc_3],
+				 [$xxx_5,$xxx_3],
+				 [$yyy_5,$yyy_3]]
+		    );
+
+Typical usage is:
+
+ $panel->add_track( transcript    => \@genes,
+ 		    -fillcolor =>  'green',
+ 		    -fgcolor   =>  'black',
+ 		    -bump      =>  +1,
+ 		    -height    => 10,
+ 		    -label     => 1);
+
+=item $track = unshift_track($glyph,$features, at options)
+
+unshift_track() works like add_track(), except that the new track is
+added to the top of the image rather than the bottom.
+
+=item $track = $panel-E<gt>insert_track($position,$glyph,$features, at options)
+
+This works like add_track(), but the track is inserted into the
+indicated position.  The track will be inserted B<before> the
+indicated position; thus specify a track of 0 to insert the new track
+at the beginning.
+
+=item $gd = $panel-E<gt>gd([$gd])
+
+The gd() method lays out the image and returns a GD::Image object
+containing it.  You may then call the GD::Image object's png() or
+jpeg() methods to get the image data.
+
+Optionally, you may pass gd() a preexisting GD::Image object that you
+wish to draw on top of.  If you do so, you should call the width() and
+height() methods first to ensure that the image has sufficient
+dimensions.
+
+If you passed new() the -image_class=E<gt>'GD::SVG' parameter, the gd() method
+returns a GD::SVG::Image object. This object overrides GD::Image
+methods in order to generate SVG output. It behaves exactly as
+described for GD::Image objects with one exception: it implements and
+svg() method instead of the png() or jpeg() methods. Currently there
+is no direct access to underlying SVG calls but this is subject to
+change in the future.
+
+=item $png = $panel-E<gt>png
+
+The png() method returns the image as a PNG-format drawing, without
+the intermediate step of returning a GD::Image object.
+
+=item $svg = $panel-E<gt>svg
+
+The svg() method returns the image in an XML-ified SVG format.
+
+=item $panel-E<gt>finished
+
+Bio::Graphics creates memory cycles.  When you are finished with the
+panel, you should call its finished() method.  Otherwise you will have
+memory leaks.  This is only an issue if you're going to create several
+panels in a single program.
+
+=item $image_class = $panel-E<gt>image_class
+
+The image_class() method returns the current drawing package being
+used, currently one of GD or GD::SVG.  This is primarily used
+internally to ensure that calls to GD's exported methods are called in
+an object-oriented manner to avoid compile time undefined string
+errors.  This is usually not needed for external use.
+
+=item $image_package = $panel-E<gt>image_package
+
+This accessor method, like image_class() above is provided as a
+convenience.  It returns the current image package in use, currently
+one of GD::Image or GD::SVG::Image.  This is not normally used
+externally.
+
+=item $polygon_package = $panel-E<gt>polygon_package
+
+This accessor method, like image_package() above is provided as a
+convenience.  It returns the current polygon package in use, currently
+one of GD::Polygon or GD::SVG::Polygon.  This is not normally used
+externally except in the design of glyphs.
+
+=item $boxes = $panel-E<gt>boxes
+
+=item @boxes = $panel-E<gt>boxes
+
+The boxes() method returns a list of arrayrefs containing the
+coordinates of each glyph.  The method is useful for constructing an
+image map.  In a scalar context, boxes() returns an arrayref.  In an
+list context, the method returns the list directly.
+
+Each member of the list is an arrayref of the following format:
+
+  [ $feature, $x1, $y1, $x2, $y2, $track ]
+
+The first element is the feature object; either an
+Ace::Sequence::Feature, a Das::Segment::Feature, or another Bioperl
+Bio::SeqFeatureI object.  The coordinates are the topleft and
+bottomright corners of the glyph, including any space allocated for
+labels. The track is the Bio::Graphics::Glyph object corresponding to
+the track that the feature is rendered inside.
+
+=item $boxes = $panel-E<gt>key_boxes
+
+=item @boxes = $panel-E<gt>key_boxes
+
+Returns the positions of the track keys as an arrayref or a list,
+depending on context. Each value in the list is an arrayref of format:
+
+ [ $key_text, $x1, $y1, $x2, $y2, $track ]
+
+=item $position = $panel-E<gt>track_position($track)
+
+After calling gd() or boxes(), you can learn the resulting Y
+coordinate of a track by calling track_position() with the value
+returned by add_track() or unshift_track().  This will return undef if
+called before gd() or boxes() or with an invalid track.
+
+=item @pixel_coords = $panel-E<gt>location2pixel(@feature_coords)
+
+Public routine to map feature coordinates (in base pairs) into pixel
+coordinates relative to the left-hand edge of the picture. If you
+define a -background callback, the callback may wish to invoke this
+routine in order to translate base coordinates into pixel coordinates.
+
+=item $left = $panel-E<gt>left
+
+=item $right = $panel-E<gt>right
+
+=item $top   = $panel-E<gt>top
+
+=item $bottom = $panel-E<gt>bottom
+
+Return the pixel coordinates of the I<drawing area> of the panel, that
+is, exclusive of the padding.
+
+=back
+
+=head1 GLYPH OPTIONS
+
+Each glyph has its own specialized subset of options, but
+some are shared by all glyphs:
+
+  Option      Description                  Default
+  ------      -----------                  -------
+
+  -key        Description of track for     undef
+	      display in the track label.
+
+  -category   The category of the track    undef
+	      for display in the
+              track label.
+
+  -fgcolor    Foreground color		   black
+
+  -bgcolor    Background color             turquoise
+
+  -linewidth  Width of lines drawn by	   1
+	      glyph
+
+  -height     Height of glyph		   10
+
+  -font       Glyph font		   gdSmallFont
+
+  -fontcolor  Primary font color	   black
+
+  -font2color Secondary font color	   turquoise
+
+  -label      Whether to draw a label	   false
+
+  -description  Whether to draw a          false
+              description
+
+  -bump	      Bump direction		   0
+
+  -sort_order Specify layout sort order    "default"
+
+  -bump_limit Maximum number of levels     undef (unlimited)
+              to bump
+
+  -hbumppad   Additional horizontal        0
+              padding between bumped
+              features
+
+  -strand_arrow Whether to indicate        undef (false)
+                 strandedness
+
+  -stranded    Synonym for -strand_arrow   undef (false)
+
+  -part_labels Whether to label individual undef (false)
+               subparts.
+
+  -part_label_merge Whether to merge       undef (false)
+              adjacent subparts when
+              labeling.
+
+  -connector  Type of connector to         none
+	      use to connect related
+	      features.  Options are
+	      "solid," "hat", "dashed", 
+              "quill" and "none".
+
+  -all_callbacks Whether to invoke         undef
+              callbacks for autogenerated
+              "track" and "group" glyphs
+
+  -subpart_callbacks Whether to invoke     false
+              callbacks for subparts of
+              the glyph.
+
+  -box_subparts Return boxes around feature          false
+               subparts rather than around the
+               feature itself.
+
+  -link, -title, -target
+               These options are used when creating imagemaps
+               for display on the web.  See L</"Creating Imagemaps">.
+
+  -filter      Select which features to
+               display. Must be a CODE reference.
+
+B<Specifying colors:> Colors can be expressed in either of two ways:
+as symbolic names such as "cyan" and as HTML-style #RRGGBB triples.
+The symbolic names are the 140 colors defined in the Netscape/Internet
+Explorer color cube, and can be retrieved using the
+Bio::Graphics::Panel-E<gt>color_names() method.
+
+B<Foreground color:> The -fgcolor option controls the foreground
+color, including the edges of boxes and the like.
+
+B<Background color:> The -bgcolor option controls the background used
+for filled boxes and other "solid" glyphs.  The foreground color
+controls the color of lines and strings.  The -tkcolor argument
+controls the background color of the entire track.
+
+B<Track color:> The -tkcolor option used to specify the background of
+the entire track.
+
+B<Font color:> The -fontcolor option controls the color of primary
+text, such as labels
+
+B<Secondary Font color:> The -font2color option controls the color of
+secondary text, such as descriptions.
+
+B<Labels:> The -label argument controls whether or not the ID of the
+feature should be printed next to the feature.  It is accepted by all
+glyphs.  By default, the label is printed just above the glyph and
+left aligned with it.  
+
+-label can be a constant string or a code reference.  Values can be
+any of:
+
+  -label value     Description
+  ------------     -----------
+
+    0              Don't draw a label
+    1              Calculate a label based on primary tag of sequence
+    "a string"     Use "a string" as the label
+    code ref       Invoke the code reference to compute the label
+
+A known bug with this naming scheme is that you can't label a feature
+with the string "1".  To work around this, use "1 " (note the terminal 
+space).
+
+B<Descriptions:> The -description argument controls whether or not a
+brief description of the feature should be printed next to it.  By
+default, the description is printed just below the glyph and
+left-aligned with it.  A value of 0 will suppress the description.  A
+value of 1 will "magically" look for tags of type "note" or
+"description" and draw them if found, otherwise the source tag, if
+any, will be displayed.  A code reference will be invoked to calculate
+the description on the fly.  Anything else will be treated as a string
+and used verbatim.
+
+B<Connectors:> A glyph can contain subglyphs, recursively.  The top
+level glyph is the track, which contains one or more groups, which
+contain features, which contain subfeatures, and so forth.  By
+default, the "group" glyph draws dotted lines between each of its
+subglyphs, the "segment" glyph draws a solid line between each of its
+subglyphs, and the "transcript" and "transcript2" glyphs draw
+hat-shaped lines between their subglyphs.  All other glyphs do not
+connect their components.  You can override this behavior by providing 
+a -connector option, to explicitly set the type of connector.  Valid
+options are:
+
+
+   "hat"     an upward-angling conector
+   "solid"   a straight horizontal connector
+   "quill"   a decorated line with small arrows indicating strandedness
+             (like the UCSC Genome Browser uses)
+   "dashed"  a horizontal dashed line.
+
+The B<-connector_color> option controls the color of the connector, if
+any.
+
+B<Collision control:> The B<-bump> argument controls what happens when
+glyphs collide.  By default, they will simply overlap (value 0).  A
+-bump value of +1 will cause overlapping glyphs to bump downwards
+until there is room for them.  A -bump value of -1 will cause
+overlapping glyphs to bump upwards.  You may also provide a -bump
+value of +2 or -2 to activate a very simple type of collision control
+in which each feature occupies its own line.  This is useful for
+showing dense, nearly-full length features such as similarity hits.
+The bump argument can also be a code reference; see below.
+
+If you would like to see more horizontal whitespace between features
+that occupy the same line, you can specify it with the B<-hbumppad>
+option.  Positive values increase the amount of whitespace between
+features.  Negative values decrease the whitespace.
+
+B<Keys:> The -key argument declares that the track is to be shown in a
+key appended to the bottom of the image.  The key contains a picture
+of a glyph and a label describing what the glyph means.  The label is
+specified in the argument to -key.
+
+B<box_subparts:> Ordinarily, when you invoke the boxes() methods to
+retrieve the rectangles surrounding the glyphs (which you need to do
+to create clickable imagemaps, for example), the rectangles will
+surround the top level features.  If you wish for the rectangles to
+surround subpieces of the glyph, such as the exons in a transcript,
+set box_subparts to a true numeric value. The value you specify will
+control the number of levels of subfeatures that the boxes will
+descend into. For example, if using the "gene" glyph, set
+-box_subparts to 2 to create boxes for the whole gene (level 0), the
+mRNAs (level 1) and the exons (level 2).
+
+B<part_labels:> If set to true, each subpart of a multipart feature
+will be labeled with a number starting with 1 at the 5'-most
+part. This is useful for counting exons. You can pass a callback to
+this argument; the part number and the total number of parts will be
+arguments three and four. For example, to label the exons as "exon 1",
+"exon 2" and so on:
+
+ -part_labels  =>  sub {
+		     my ($feature,undef,$partno) = @_;
+		     return 'exon '.($partno+1);
+	           }
+
+The B<-label> argument must also be true.
+
+B<part_labels_merge:> If true, changes the behavior of -part_labels so
+that features that abut each other without a gap are treated as a
+single feature. Useful if you want to count the UTR and CDS segments
+of an exon as a single unit, and the default for transcript glyphs.
+
+B<strand_arrow:> If set to true, some glyphs will indicate their
+strandedness, usually by drawing an arrow.  For this to work, the
+Bio::SeqFeature must have a strand of +1 or -1.  The glyph will ignore
+this directive if the underlying feature has a strand of zero or
+undef.
+
+B<sort_order>: By default, features are drawn with a layout based only on the
+position of the feature, assuring a maximal "packing" of the glyphs
+when bumped.  In some cases, however, it makes sense to display the
+glyphs sorted by score or some other comparison, e.g. such that more
+"important" features are nearer the top of the display, stacked above
+less important features.  The -sort_order option allows a few
+different built-in values for changing the default sort order (which
+is by "left" position): "low_score" (or "high_score") will cause
+features to be sorted from lowest to highest score (or vice versa).
+"left" (or "default") and "right" values will cause features to be
+sorted by their position in the sequence.  "longest" (or "shortest")
+will cause the longest (or shortest) features to be sorted first, and
+"strand" will cause the features to be sorted by strand: "+1"
+(forward) then "0" (unknown, or NA) then "-1" (reverse).
+
+In all cases, the "left" position will be used to break any ties.  To
+break ties using another field, options may be strung together using a
+"|" character; e.g. "strand|low_score|right" would cause the features
+to be sorted first by strand, then score (lowest to highest), then by
+"right" position in the sequence.
+
+Finally, a subroutine coderef with a $$ prototype can be provided.  It
+will receive two B<glyph> as arguments and should return -1, 0 or 1
+(see Perl's sort() function for more information).  For example, to
+sort a set of database search hits by bits (stored in the features'
+"score" fields), scaled by the log of the alignment length (with
+"start" position breaking any ties):
+
+  sort_order = sub ($$) {
+    my ($glyph1,$glyph2) = @_;
+    my $a = $glyph1->feature;
+    my $b = $glyph2->feature;
+    ( $b->score/log($b->length)
+          <=>
+      $a->score/log($a->length) )
+          ||
+    ( $a->start <=> $b->start )
+  }
+
+It is important to remember to use the $$ prototype as shown in the
+example.  Otherwise Bio::Graphics will quit with an exception. The
+arguments are subclasses of Bio::Graphics::Glyph, not the features
+themselves.  While glyphs implement some, but not all, of the feature
+methods, to be safe call the two glyphs' feature() methods in order to
+convert them into the actual features.
+
+The '-always_sort' option, if true, will sort features even if bumping
+is turned off.  This is useful if you would like overlapping features
+to stack in a particular order.  Features towards the end of the list
+will overlay those towards the beginning of the sort order.
+
+B<bump_limit>: When bumping is chosen, colliding features will
+ordinarily move upward or downward without limit.  When many features
+collide, this can lead to excessively high images.  You can limit the
+number of levels that features will bump by providing a numeric
+B<bump_limit> option.
+
+The B<-filter> option, which must be a CODE reference, will be invoked
+once for each feature prior to rendering it. The coderef will receive
+the feature as its single option and should return true if the feature
+is to be shown and false otherwise.
+
+=head2 Options and Callbacks
+
+Instead of providing a constant value to an option, you may subsitute
+a code reference.  This code reference will be called every time the
+panel needs to configure a glyph.  The callback will be called with
+three arguments like this:
+
+   sub callback {
+      my ($feature,$option_name,$part_no,$total_parts,$glyph) = @_;
+      # do something which results in $option_value being set
+      return $option_value;
+   }
+
+The five arguments are C<$feature>, a reference to the IO::SeqFeatureI
+object, C<$option_name>, the name of the option to configure,
+C<$part_no>, an integer index indicating which subpart of the feature
+is being drawn, C<$total_parts>, an integer indicating the total
+number of subfeatures in the feature, and finally C<$glyph>, the Glyph
+object itself.  The latter fields are useful in the case of treating
+the first or last subfeature differently, such as using a different
+color for the terminal exon of a gene.  Usually you will only need to
+examine the first argument.  This example shows a callback examining
+the score() attribute of a feature (possibly a BLAST hit) and return
+the color "red" for high-scoring features, and "green" for low-scoring
+features:
+
+  sub callback {
+     my $feature = shift;
+     if ($feature->score > 90) {
+       return 'red';
+     else {
+       return 'green';
+    }
+  }
+
+The callback should return a string indicating the desired value of
+the option.  To tell the panel to use the default value for this
+option, return the string "*default*".
+
+When you install a callback for a feature that contains subparts, the
+callback will be invoked first for the top-level feature, and then for
+each of its subparts (recursively).  You should make sure to examine
+the feature's type to determine whether the option is appropriate.
+
+Some glyphs deliberately disable this recursive feature.  The "track",
+"group", "transcript", "transcript2" and "segments" glyphs selectively
+disable the -bump, -label and -description options.  This is to avoid,
+for example, a label being attached to each exon in a transcript, or
+the various segments of a gapped alignment bumping each other.  You
+can override this behavior and force your callback to be invoked by
+providing add_track() with a true B<-all_callbacks> argument.  In this
+case, you must be prepared to handle configuring options for the
+"group" and "track" glyphs.
+
+In particular, this means that in order to control the -bump option
+with a callback, you should specify -all_callbacks=E<gt>1, and turn on
+bumping when the callback is in the track or group glyphs.
+
+The -subpart_callbacks options is similar, except that when this is
+set to true callbacks are invoked for the main glyph and its
+subparts. This option only affects the -label and -description
+options.
+
+=head2 ACCESSORS
+
+The following accessor methods provide access to various attributes of
+the panel object.  Called with no arguments, they each return the
+current value of the attribute.  Called with a single argument, they
+set the attribute and return its previous value.
+
+Note that in most cases you must change attributes prior to invoking
+gd(), png() or boxes().  These three methods all invoke an internal
+layout() method which places the tracks and the glyphs within them,
+and then caches the result.
+
+   Accessor Name      Description
+   -------------      -----------
+
+   width()	      Get/set width of panel
+   spacing()	      Get/set spacing between tracks
+   key_spacing()      Get/set spacing between keys
+   length()	      Get/set length of segment (bp)
+   flip()             Get/set coordinate flipping
+   pad_top()	      Get/set top padding
+   pad_left()	      Get/set left padding
+   pad_bottom()	      Get/set bottom padding
+   pad_right()	      Get/set right padding
+   start()            Get the start of the sequence (bp; read only)
+   end()              Get the end of the sequence (bp; read only)
+   left()             Get the left side of the drawing area (pixels; read only)
+   right()            Get the right side of the drawing area (pixels; read only)
+
+=head2 COLOR METHODS
+
+The following methods are used internally, but may be useful for those
+implementing new glyph types.
+
+=over 4
+
+=item @names = Bio::Graphics::Panel-E<gt>color_names
+
+Return the symbolic names of the colors recognized by the panel
+object.  In a scalar context, returns an array reference.
+
+=item ($red,$green,$blue) = Bio::Graphics::Panel-E<gt>color_name_to_rgb($color)
+
+Given a symbolic color name, returns the red, green, blue components
+of the color.  In a scalar context, returns an array reference to the
+rgb triplet.  Returns undef for an invalid color name.
+
+=item @rgb = $panel-E<gt>rgb($index)
+
+Given a GD color index (between 0 and 140), returns the RGB triplet
+corresponding to this index.  This method is only useful within a
+glyph's draw() routine, after the panel has allocated a GD::Image and
+is populating it.
+
+=item $index = $panel-E<gt>translate_color($color)
+
+Given a color, returns the GD::Image index.  The color may be
+symbolic, such as "turquoise", or a #RRGGBB triple, as in #F0E0A8.
+This method is only useful within a glyph's draw() routine, after the
+panel has allocated a GD::Image and is populating it.
+
+=item $panel-E<gt>set_pen($width,$color)
+
+Changes the width and color of the GD drawing pen to the values
+indicated.  This is called automatically by the GlyphFactory fgcolor()
+method.  It returns the GD value gdBrushed, which should be used for
+drawing.
+
+=back
+
+=head2 Creating Imagemaps
+
+You may wish to use Bio::Graphics to create clickable imagemaps for
+display on the web.  The main method for achieving this is
+image_and_map().  Under special circumstances you may instead wish to
+call either or both of create_web_image() and create_web_map().
+
+Here is a synopsis of how to use image_and_map() in a CGI script,
+using CGI.pm calls to provide the HTML scaffolding:
+
+   print h2('My Genome');
+
+   my ($url,$map,$mapname) =
+       $panel->image_and_map(-root => '/var/www/html',
+                             -url  => '/tmpimages',
+                             -link => 'http://www.google.com/search?q=$name');
+
+   print img({-src=>$url,-usemap=>"#$mapname"});
+
+   print $map;
+
+We call image_and_map() with various arguments (described below) to
+generate a three element list consisting of the URL at which the image
+can be accessed, an HTML fragment containing the clickable imagemap
+data, and the name of the map.  We print out an E<lt>imageE<gt> tag
+that uses the URL of the map as its src attribute and the name of the
+map as the value of its usemap attribute.  It is important to note
+that we must put a "#" in front of the name of the map in order to
+indicate that the map can be found in the same document as the
+E<lt>imageE<gt> tag.  Lastly, we print out the map itself.
+
+=over 4
+
+=item ($url,$map,$mapname) = $panel-E<gt>image_and_map(@options)
+
+Create the image in a web-accessible directory and return its URL, its
+clickable imagemap, and the name of the imagemap.  The following
+options are recognized:
+
+ Option        Description
+ ------        -----------
+
+ -url          The URL to store the image at.
+
+
+ -root         The directory path that should be appended to the
+               start of -url in order to obtain a physical
+               directory path.
+ -link         A string pattern or coderef that will be used to
+               generate the outgoing hypertext links for the imagemap.
+
+ -title        A string pattern or coderef that will be used to
+               generate the "title" tags of each element in the imagemap
+               (these appear as popup hint boxes in certain browsers).
+
+ -target       A string pattern or coderef that will be used to
+               generate the window target for each element.  This can
+               be used to pop up a new window when the user clicks on
+               an element.
+
+ -mapname      The name to use for the E<lt>mapE<gt> tag.  If not provided,
+               a unique one will be autogenerated for you.
+
+This method returns a three element list consisting of the URL at
+which the image has been written to, the imagemap HTML, and the name
+of the map.  Usually you will incorporate this information into an
+HTML document like so:
+
+  my ($url,$map,$mapname) =
+          $panel->image_and_map(-link=>'http://www.google.com/searche?q=$name');
+  print qq(<img src="$url" usemap="#$map">),"\n";
+  print $map,"\n";
+
+=item $url = $panel-E<gt>create_web_image($url,$root)
+
+Create the image, write it into the directory indicated by
+concatenating $root and $url (i.e. "$root/$url"), and return $url.
+
+=item $map = $panel-E<gt>create_web_map('mapname',$linkrule,$titlerule,$targetrule)
+
+Create a clickable imagemap named "mapname" using the indicated rules
+to generate the hypertext links, the element titles, and the window
+targets for the graphical elements.  Return the HTML for the map,
+including the enclosing E<lt>mapE<gt> tag itself.
+
+=back
+
+To use this method effectively, you will need a web server and an
+image directory in the document tree that is writable by the web
+server user.  For example, if your web server's document root is
+located at /var/www/html, you might want to create a directory named
+"tmpimages" for this purpose:
+
+  mkdir /var/www/html/tmpimages
+  chmod 1777 /var/www/html/tmpimages
+
+The 1777 privilege will allow anyone to create files and
+subdirectories in this directory, but only the owner of the file will
+be able to delete it.
+
+When you call image_and_map(), you must provide it with two vital
+pieces of information: the URL of the image directory and the physical
+location of the web server's document tree.  In our example, you would
+call:
+
+  $panel->image_and_map(-root => '/var/www/html',-url=>'/tmpimages');
+
+If you are working with virtual hosts, you might wish to provide the
+hostname:portnumber part of the URL.  This will work just as well:
+
+  $panel->image_and_map(-root => '/var/www/html',
+                        -url  => 'http://myhost.com:8080/tmpimages');
+
+If you do not provide the -root argument, the method will try to
+figure it out from the DOCUMENT_ROOT environment variable.  If you do
+not provide the -url argument, the method will assume "/tmp".
+
+During execution, the image_and_map() method will generate a unique
+name for the image using the Digest::MD5 module.  You can get this
+module on CPAN and it B<must> be installed in order to use
+image_and_map().  The imagename will be a long hexadecimal string such
+as "e7457643f12d413f20843d4030c197c6.png".  Its URL will be
+/tmpimages/e7457643f12d413f20843d4030c197c6.png, and its physical path
+will be /var/www/html/tmpimages/e7457643f12d413f20843d4030c197c6.png
+
+In addition to providing directory information, you must also tell
+image_and_map() how to create outgoing links for each graphical
+feature, and, optionally, how to create the "hover title" (the popup
+yellow box displayed by most modern browsers), and the name of the
+window or frame to link to when the user clicks on it.
+
+There are three ways to specify the link destination:
+
+=over 4
+
+=item 1.
+
+By configuring one or more tracks with a -link argument.
+
+=item 2.
+
+By configuring the panel with a -link argument.
+
+=item 3.
+
+By passing a -link argument in the call to image_and_map().
+
+=back
+
+The -link argument can be either a string or a coderef.  If you pass a
+string, it will be interpreted as a URL pattern containing runtime
+variables.  These variables begin with a dollar sign ($), and are
+replaced at run time with the information relating to the selected
+annotation.  Recognized variables include:
+
+     $name        The feature's name (display name)
+     $id          The feature's id (eg, PK from a database)
+     $class       The feature's class (group class)
+     $method      The feature's method (same as primary tag)
+     $source      The feature's source
+     $ref         The name of the sequence segment (chromosome, contig)
+                     on which this feature is located
+     $description The feature's description (notes)
+     $start       The start position of this feature, relative to $ref
+     $end         The end position of this feature, relative to $ref
+     $segstart    The left end of $ref displayed in the detailed view
+     $segend      The right end of $ref displayed in the detailed view
+
+For example, to link each feature to a Google search on the feature's
+description, use the argument:
+
+  -link => 'http://www.google.com/search?q=$description'
+
+Be sure to use single quotes around the pattern, or Perl will attempt
+to perform variable interpretation before image_and_map() has a chance
+to work on it.
+
+You may also pass a code reference to -link, in which case the code
+will be called every time a URL needs to be generated for the
+imagemap.  The subroutine will be called with two arguments, the
+feature and the Bio::Graphics::Panel object, and it should return the
+URL to link to, or an empty string if a link is not desired. Here is a
+simple example:
+
+  -link => sub {
+         my ($feature,$panel) = @_;
+         my $type = $feature->primary_tag;
+         my $name = $feature->display_name;
+         if ($primary_tag eq 'clone') {
+            return "http://www.google.com/search?q=$name";
+         } else {
+            return "http://www.yahoo.com/search?p=$name";
+         }
+
+The -link argument cascades. image_and_map() will first look for a
+-link option in the track configuration, and if that's not found, it
+will look in the Panel configuration (created during
+Bio::Graphics::Panel-E<gt>new). If no -link configuration option is found
+in either location, then image_and_map() will use the value of -link
+passed in its argument list, if any.
+
+The -title and -target options behave in a similar manner to -link.
+-title is used to assign each feature "title" and "alt" attributes.
+The "title" attribute is used by many browsers to create a popup hints
+box when the mouse hovers over the feature's glyph for a preset length
+of time, while the "alt" attribute is used to create navigable menu
+items for the visually impaired.  As with -link, you can set the title
+by passing either a substitution pattern or a code ref, and the -title
+option can be set in the track, the panel, or the method call itself
+in that order of priority.
+
+If not provided, image_and_map() will autogenerate its own title in
+the form "E<lt>methodE<gt> E<lt>display_nameE<gt> E<lt>seqidE<gt>:start..end".
+
+The -target option can be used to specify the window or frame that
+clicked features will link to.  By default, when the user clicks on a
+feature, the loaded URL will replace the current page.  You can modify
+this by providing -target with the name of a preexisting or new window
+name in order to create effects like popup windows, multiple frames,
+popunders and the like.  The value of -target follows the same rules
+as -title and -link, including variable substitution and the use of
+code refs.
+
+NOTE: Each time you call image_and_map() it will generate a new image
+file.  Images that are identical to an earlier one will reuse the same
+name, but those that are different, even by one pixel, will result in
+the generation of a new image.  If you have limited disk space, you
+might wish to check the images directory periodically and remove those
+that have not been accessed recently.  The following cron script will
+remove image files that haven't been accessed in more than 20 days.
+
+30 2 * * * find /var/www/html/tmpimages -type f -atime +20 -exec rm {} \;
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Glyph>,
+L<Bio::Graphics::Glyph::arrow>,
+L<Bio::Graphics::Glyph::cds>,
+L<Bio::Graphics::Glyph::crossbox>,
+L<Bio::Graphics::Glyph::diamond>,
+L<Bio::Graphics::Glyph::dna>,
+L<Bio::Graphics::Glyph::dot>,
+L<Bio::Graphics::Glyph::ellipse>,
+L<Bio::Graphics::Glyph::extending_arrow>,
+L<Bio::Graphics::Glyph::generic>,
+L<Bio::Graphics::Glyph::graded_segments>,
+L<Bio::Graphics::Glyph::heterogeneous_segments>,
+L<Bio::Graphics::Glyph::line>,
+L<Bio::Graphics::Glyph::pinsertion>,
+L<Bio::Graphics::Glyph::primers>,
+L<Bio::Graphics::Glyph::rndrect>,
+L<Bio::Graphics::Glyph::segments>,
+L<Bio::Graphics::Glyph::redgreen_box>,
+L<Bio::Graphics::Glyph::ruler_arrow>,
+L<Bio::Graphics::Glyph::toomany>,
+L<Bio::Graphics::Glyph::transcript>,
+L<Bio::Graphics::Glyph::transcript2>,
+L<Bio::Graphics::Glyph::translation>,
+L<Bio::Graphics::Glyph::triangle>,
+L<Bio::Graphics::Glyph::xyplot>,
+L<Bio::Graphics::Glyph::whiskerplot>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<GD>
+L<GD::SVG>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Pictogram.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Pictogram.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Pictogram.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,503 @@
+# BioPerl module for Bio::Graphics::Pictogram
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Graphics::Pictogram - generate SVG output of Pictogram display for consensus motifs
+
+=head1 SYNOPSIS
+
+  use Bio::Graphics::Pictogram;
+  use Bio::SeqIO;
+
+  my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
+  my @seq;
+  while(my $seq = $sio->next_seq){
+    push @seq, $seq;
+  }
+
+  my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
+                                            -height=>"500",
+                                            -fontsize=>"60",
+                                            -plot_bits=>1,
+                                            -background=>{
+                                                          'A'=>0.25,
+                                                          'C'=>0.18,
+                                                          'T'=>0.32,
+                                                          'G'=>0.25},
+                                            -color=>{'A'=>'red',
+                                                     'G'=>'blue',
+                                                     'C'=>'green',
+                                                     'T'=>'magenta'});
+
+  my $svg = $picto->make_svg(\@seq);
+
+  print $svg->xmlify."\n";
+
+  #Support for Bio::Matrix::PSM::SiteMatrix now included
+
+   use Bio::Matrix::PSM::IO;
+
+   my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
+                                            -height=>"500",
+                                            -fontsize=>"60",
+                                            -plot_bits=>1,
+                                            -background=>{
+                                                          'A'=>0.25,
+                                                          'C'=>0.18,
+                                                          'T'=>0.32,
+                                                          'G'=>0.25},
+                                            -color=>{'A'=>'red',
+                                                     'G'=>'blue',
+                                                     'C'=>'green',
+                                                     'T'=>'magenta'});
+
+  my $psm = $psmIO->next_psm;
+  my $svg = $picto->make_svg($psm);
+  print $svg->xmlify;
+
+
+=head1 DESCRIPTION
+
+A module for generating SVG output of Pictogram display for consensus
+motifs.  This method of representation was describe by Burge and
+colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
+525-560, CSHL press, 1999)
+
+This is a simple module that takes in an array of sequences (assuming
+equal lengths) and calculates relative base frequencies where the
+height of each letter reflects the frequency of each nucleotide at a
+given position. It can also plot the information content at each
+position scaled by the background frequencies of each nucleotide.
+
+It requires the SVG-2.26 or later module by Ronan Oger available at
+http://www.cpan.org
+
+Recommended viewing of the SVG is the plugin available at Adobe:
+http://www.adobe.com/svg
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+package Bio::Graphics::Pictogram;
+use strict;
+use SVG 2.26;
+use Bio::SeqIO;
+use base qw(Bio::Root::Root);
+
+use constant MAXBITS => 2;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
+                                            -height=>"500",
+                                            -fontsize=>"60",
+                                            -plot_bits=>1,
+                                            -background=>{
+                                                          'A'=>0.25,
+                                                          'C'=>0.18,
+                                                          'T'=>0.32,
+                                                          'G'=>0.25},
+                                            -color=>{'A'=>'red',
+                                                      'G'=>'blue',
+                                                      'C'=>'green',
+                                                      'T'=>'magenta'});
+ Function: Constructor for Pictogram Object
+ Returns : L<Bio::Graphics::Pictogram>
+
+=cut
+
+sub new {
+  my ($caller, at args) = @_;
+  my $self = $caller->SUPER::new(@args);
+  my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)], at args);
+  $width||=800;
+  $height||=600;
+  my $svg = SVG->new(width=>$width,height=>$height);
+  $self->svg_obj($svg);
+  $fontsize ||= 80;
+  $self->fontsize($fontsize) if $fontsize;
+  $color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
+  $self->color($color);
+  $background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
+  $self->background($background);
+  $self->plot_bits($bit) if $bit;
+  $self->normalize($normalize) if $normalize;
+
+  return $self;
+}
+
+=head2 make_svg
+
+ Title   : make_svg
+ Usage   : $picto->make_svg();
+ Function: make the SVG object
+ Returns : L<SVG>
+ Arguments: A fasta file or array ref of L<Bio::Seq> objects or a L<Bio::Matrix::PSM::SiteMatrixI>
+
+=cut
+
+sub make_svg {
+  my ($self,$input) = @_;
+  my $fontsize = $self->fontsize;
+  my $size = $fontsize * 0.75;
+  my $width= $size;
+  my $height= $size+40;
+  my $color = $self->color;
+
+  #starting x coordinate for pictogram
+  my $x = 45+$size/2;
+  my $pos_y = $size * 2;
+  my $bit_y = $pos_y+40;
+  my @pwm;
+
+  my $bp = 1;
+
+  #input can be file or array ref of sequences
+  if(ref($input) eq 'ARRAY'){
+    @pwm = @{$self->_make_pwm($input)};
+  }
+  elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
+    @pwm = $self->_make_pwm_from_site_matrix($input);
+  }
+  else {
+    my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
+    my @seq;
+    while (my $seq = $sio->next_seq){
+      push @seq, $seq;
+    }
+    @pwm = @{$self->_make_pwm(\@seq)};
+  }
+
+
+  my $svg = $self->svg_obj;
+  my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
+  my $seq_grp;
+
+  #scale the svg if length greater than svg width
+  if($seq_length > $svg->{-document}->{'width'}){
+    my $ratio = $svg->{-document}->{'width'}/($seq_length);
+    $seq_grp = $svg->group(transform=>"scale($ratio,1)");
+  }
+  else {
+    $seq_grp= $svg->group();
+  }
+
+  #do the drawing, each set is a base position
+  foreach my $set(@pwm){
+    my ($A,$C,$G,$T,$bits) = @$set;
+    my @array;
+    push @array,  ['a',($A)];
+    push @array, ['g',($G)];
+    push @array, ['c',($C)];
+    push @array, ['t',($T)];
+    @array = sort {$b->[1]<=>$a->[1]}@array;
+    my $count = 1;
+    my $pos_group = $seq_grp->group(id=>"bp $bp");
+    my $prev_size;
+    my $y_trans;
+
+    #draw each letter at each position
+    foreach my $letter(@array){
+	  my $scale;
+	  if($self->normalize){
+		$scale = $letter->[1];
+	  } else {
+		$scale = $letter->[1] * ($bits / MAXBITS);
+	  }
+
+      if($count == 1){
+		if($self->normalize){
+		  $y_trans = 0;
+		} else {
+		  $y_trans = (1 - ($bits / MAXBITS)) * $size;
+		}
+      }
+      else {
+        $y_trans += $prev_size;
+      }
+      $pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
+                      'width'=>$width,x=>$x,y=>$size,
+                      'transform'=>"translate(0,$y_trans),scale(1,$scale)",
+                      'style'=>{"font-size"=>$fontsize,
+                      'text-anchor'=>'middle',
+                      'font-family'=>'Verdana',
+                      'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
+
+     $prev_size = $scale * $size;
+     $count++;
+    }
+    #plot the bit if required
+    if($self->plot_bits){
+         $seq_grp->text('x'=>$x,
+                        'y'=>$bit_y,
+                        'style'=>{"font-size"=>'10',
+                                'text-anchor'=>'middle',
+                                'font-family'=>'Verdana',
+                                'fill'=>'black'})->cdata($bits);
+    }
+    $bp++;
+    $x+=$width;
+  }
+
+  #plot the tags
+  $seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
+
+ $seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
+
+  #plot the base positions
+  $x = 45+$size/2-int($width/2);
+  foreach my $nbr(1..($bp-1)){
+    $seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
+    $x+=$width;
+  }
+
+
+#  $seq_grp->transform("scale(2,2)");
+
+  return $self->svg_obj($svg);
+}
+
+sub _make_pwm_from_site_matrix{
+  my ($self,$matrix) = @_;
+  my $bgd = $self->background;
+  my @pwm;
+  my $consensus = $matrix->consensus;
+  foreach my $i(1..length($consensus)){
+    my %base = $matrix->next_pos;
+    my $bits;
+    $bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
+    $bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
+    $bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
+    $bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
+    push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
+  }
+  return @pwm;
+}
+
+sub _make_pwm {
+  my ($self,$input) = @_;
+  my $count = 1;
+  my %hash;
+  my $bgd = $self->background;
+  #sum up the frequencies at each base pair
+  foreach my $seq(@$input){
+    my $string = $seq->seq;
+    $string =  uc $string;
+    my @motif = split('',$string);
+    my $pos = 1;
+    foreach my $t(@motif){
+      $hash{$pos}{$t}++;
+      $pos++;
+    }
+    $count++;
+  }
+
+  #calculate relative freq
+  my @pwm;
+
+  #decrement last count
+  $count--;
+  foreach my $pos(sort{$a<=>$b} keys %hash){
+    my @array;
+    push @array,($hash{$pos}{'A'}||0)/$count;
+    push @array,($hash{$pos}{'C'}||0)/$count;
+    push @array,($hash{$pos}{'G'}||0)/$count;
+    push @array,($hash{$pos}{'T'}||0)/$count;
+
+    #calculate bits
+    # relative entropy (RelEnt) or Kullback-Liebler distance
+    # relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
+    # gk the background frequency of nucleotide k
+
+    my $bits;
+    $bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
+    $bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
+    $bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
+    $bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
+    push @array, abs(sprintf("%.3f",$bits));
+
+    push @pwm,\@array;
+  }
+  return $self->pwm(\@pwm);
+}
+
+
+###various get/sets
+
+=head2 fontsize
+
+ Title   : fontsize
+ Usage   : $picto->fontsize();
+ Function: get/set for fontsize
+ Returns : int
+ Arguments: int
+
+=cut
+
+sub fontsize {
+  my ($self,$obj) = @_;
+  if($obj){
+    $self->{'_fontsize'} = $obj;
+  }
+  return   $self->{'_fontsize'};
+}
+
+=head2 color
+
+ Title   : color
+ Usage   : $picto->color();
+ Function: get/set for color
+ Returns : a hash reference
+ Arguments: a hash  reference
+
+=cut
+
+sub color {
+  my ($self,$obj) = @_;
+  if($obj){
+    $self->{'_color'} = $obj;
+  }
+  return   $self->{'_color'};
+}
+
+=head2 svg_obj
+
+ Title   : svg_obj
+ Usage   : $picto->svg_obj();
+ Function: get/set for svg_obj
+ Returns : L<SVG>
+ Arguments: L<SVG>
+
+=cut
+
+sub svg_obj {
+  my ($self,$obj) = @_;
+  if($obj){
+    $self->{'_svg_obj'} = $obj;
+  }
+  return   $self->{'_svg_obj'};
+}
+
+=head2 plot_bits
+
+ Title   : plot_bits
+ Usage   : $picto->plot_bits();
+ Function: get/set for plot_bits to indicate whether to plot
+           information content at each base position
+ Returns :1/0
+ Arguments: 1/0
+
+=cut
+
+sub plot_bits {
+  my ($self,$obj) = @_;
+  if($obj){
+    $self->{'_plot_bits'} = $obj;
+  }
+  return   $self->{'_plot_bits'};
+}
+
+=head2 normalize
+
+ Title   : normalize
+ Usage   : $picto->normalize($newval)
+ Function: get/set to make all columns the same height.
+           default is to scale height with information
+           content.
+ Returns : value of normalize (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub normalize{
+    my $self = shift;
+
+    return $self->{'normalize'} = shift if @_;
+    return $self->{'normalize'};
+}
+
+=head2 background
+
+ Title   : background
+ Usage   : $picto->background();
+ Function: get/set for hash reference of nucleodtide bgd frequencies
+ Returns : hash reference
+ Arguments: hash reference
+
+=cut
+
+sub background {
+  my ($self,$obj) = @_;
+  if($obj){
+    $self->{'_background'} = $obj;
+  }
+  return   $self->{'_background'};
+}
+
+=head2 pwm
+
+ Title   : pwm
+ Usage   : $picto->pwm();
+ Function: get/set for pwm
+ Returns : int
+ Arguments: int
+
+=cut
+
+sub pwm {
+  my ($self,$pwm) = @_;
+  if($pwm){
+    $self->{'_pwm'} = $pwm;
+  }
+  return $self->{'_pwm'};
+}
+
+#utility method for returning log 2
+sub log2 {
+    my ($val) = @_;
+    return 0 if $val==0;
+    return log($val)/log(2);
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/RendererI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/RendererI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/RendererI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,90 @@
+# $Id: RendererI.pm,v 1.6.4.1 2006/10/02 23:10:19 sendu Exp $
+
+=head1 NAME
+
+Bio::Graphics::RendererI - A renderer for the Bio::Graphics class that
+renders Bio::SeqFeature::CollectionI objects onto
+Bio::Graphics::Panels using configuration information provided by a
+Bio::Graphics::ConfiguratorI.
+
+=head1 SYNOPSIS
+
+ # Get a renderer somehow, called $renderer
+
+ # create a new panel and render contents a feature collection onto it
+ my $config = new ConfigIO( $config_file )->getConfig();
+ my $features = $data_provider->getCollection();
+ my ( $tracks_rendered, $panel ) = $renderer->render( $features, $prefs );
+
+=head1 DESCRIPTION
+
+Renderer of Bio::SeqFeature::CollectionIs (collections of features)
+onto a Bio::Graphics::Panel using a Bio::Graphics::ConfiguratorI for
+general and track-specific rendering options.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Paul Edlefsen E<lt>paul at systemsbiology.orgE<gt>.
+
+Copyright (c) 2003 Institute for Systems Biology
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Graphics::RendererI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 render
+
+ Title   : render
+ Usage   : ( $rendered, $panel ) =
+               $renderer->render( $collection, $configurator [, $panel ] );
+ Function: Renders the SeqFeatures in the given collection onto a
+           Bio::Graphics::Panel (if no panel is given, one will be
+           created), using the given Bio::Graphics::ConfiguratorI for general
+           and track-specific rendering options.
+ Returns : In a scalar context returns the number of tracks rendered.
+           In a list context, returns a two-element list containing
+           the number of features rendered and the panel.
+ Args    : A Bio::SeqFeature::CollectionI and a Bio::Graphics::ConfiguratorI
+           and optionally a Bio::Graphics::Panel.
+ Status  : Public
+
+=cut
+
+sub render {
+  shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Util.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Util.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics/Util.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+package Bio::Graphics::Util;
+
+# $Id: Util.pm,v 1.4.4.1 2006/10/02 23:10:19 sendu Exp $
+# Non object-oriented utilities used here-and-there in Bio::Graphics modules
+
+=head1 NAME
+
+Bio::Graphics::Util - non-object-oriented utilities used in Bio::Graphics modules
+
+=cut
+
+use strict;
+require Exporter;
+use base qw(Exporter);
+use vars '@EXPORT','@EXPORT_OK';
+ at EXPORT = 'frame_and_offset';
+use Bio::Root::Version;
+
+=over 4
+
+=item ($frame,$offset) = frame_and_offset($pos,$strand,$phase)
+
+Calculate the reading frame for a given genomic position, strand and
+phase.  The offset is the offset from $pos to the first nucleotide
+of the reading frame.
+
+In a scalar context, returns the frame only.
+
+=back
+
+=cut
+
+sub frame_and_offset {
+  my ($pos,$strand,$phase) = @_;
+  $strand ||= +1;
+  $phase  ||= 0;
+  my $codon_start =  $strand >= 0
+                   ? $pos + $phase
+	           : $pos - $phase;  # probably wrong
+  my $frame  = ($codon_start-1) % 3;
+#  my $frame = $strand >= 0
+#    ? ($pos - $phase - 1) % 3
+#    : (1 - $pos - $phase) % 3;
+  my $offset = $strand >= 0 ? $phase : -$phase;
+  return wantarray ? ($frame,$offset) : $frame;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Graphics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Graphics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Graphics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,128 @@
+package Bio::Graphics;
+
+use Bio::Graphics::Panel;
+use strict;
+
+1;
+
+=head1 NAME
+
+Bio::Graphics - Generate GD images of Bio::Seq objects
+
+=head1 SYNOPSIS
+
+ # This script generates a PNG picture of a 10K region containing a
+ # set of red features and a set of blue features. Call it like this:
+ #         red_and_blue.pl > redblue.png
+ # you can now view the picture with your favorite image application
+
+
+ # This script parses a GenBank or EMBL file named on the command
+ # line and produces a PNG rendering of it.  Call it like this:
+ # render.pl my_file.embl | display -
+
+ use strict;
+ use Bio::Graphics;
+ use Bio::SeqIO;
+
+ my $file = shift                       or die "provide a sequence file as the argument";
+ my $io = Bio::SeqIO->new(-file=>$file) or die "could not create Bio::SeqIO";
+ my $seq = $io->next_seq                or die "could not find a sequence in the file";
+
+ my @features = $seq->all_SeqFeatures;
+
+ # sort features by their primary tags
+ my %sorted_features;
+ for my $f (@features) {
+   my $tag = $f->primary_tag;
+   push @{$sorted_features{$tag}},$f;
+ }
+
+ my $wholeseq = Bio::SeqFeature::Generic->new(-start=>1,-end=>$seq->length);
+
+ my $panel = Bio::Graphics::Panel->new(
+				      -length    => $seq->length,
+ 				      -key_style => 'between',
+ 				      -width     => 800,
+ 				      -pad_left  => 10,
+ 				      -pad_right => 10,
+ 				      );
+ $panel->add_track($wholeseq,
+ 		  -glyph => 'arrow',
+ 		  -bump => 0,
+ 		  -double=>1,
+ 		  -tick => 2);
+
+ $panel->add_track($wholeseq,
+ 		  -glyph  => 'generic',
+ 		  -bgcolor => 'blue',
+ 		  -label  => 1,
+ 		 );
+
+ # general case
+ my @colors = qw(cyan orange blue purple green chartreuse magenta yellow aqua);
+ my $idx    = 0;
+ for my $tag (sort keys %sorted_features) {
+   my $features = $sorted_features{$tag};
+   $panel->add_track($features,
+ 		    -glyph    =>  'generic',
+ 		    -bgcolor  =>  $colors[$idx++ % @colors],
+ 		    -fgcolor  => 'black',
+ 		    -font2color => 'red',
+ 		    -key      => "${tag}s",
+ 		    -bump     => +1,
+ 		    -height   => 8,
+ 		    -label    => 1,
+ 		    -description => 1,
+ 		   );
+ }
+
+ print $panel->png;
+ exit 0;
+
+=head1 DESCRIPTION
+
+Please see L<Bio::Graphics::Panel> for the full interface.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics::Panel>,
+L<Bio::Graphics::Glyph>,
+L<Bio::SeqI>,
+L<Bio::SeqFeatureI>,
+L<Bio::Das>,
+L<Bio::DB::GFF::Feature>,
+L<Ace::Sequence>,
+L<GD>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>.
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/IdCollectionI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/IdCollectionI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/IdCollectionI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,102 @@
+# $Id: IdCollectionI.pm,v 1.6.4.1 2006/10/02 23:10:12 sendu Exp $
+
+#
+# This module is licensed under the same terms as Perl itself. You use,
+# modify, and redistribute it under the terms of the Perl Artistic License.
+#
+
+=head1 NAME
+
+Bio::IdCollectionI - interface for objects with multiple identifiers
+
+=head1 SYNOPSIS
+
+
+    # to test this is an identifiable collection object
+
+    $obj->isa("Bio::IdCollectionI") ||
+      $obj->throw("$obj does not implement the Bio::IdCollectionI interface");
+
+    # accessors
+    @authorities = $obj->id_authorities();
+    @ids         = $obj->ids();
+    $id          = $obj->ids($authority);
+
+=head1 DESCRIPTION
+
+This interface describes methods expected on objects that have
+multiple identifiers, each of which is controlled by a different
+authority.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+Email lstein at cshl.org
+
+=cut
+
+package Bio::IdCollectionI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 id_authorities
+
+ Title   : id_authorities
+ Usage   : @array    = $obj->id_authorities()
+ Function: Return the authorities which have names for this object.
+           The authorities can then be used to select ids.
+
+ Returns : An array
+ Status  : Virtual
+
+=cut
+
+sub id_authorities {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 ids
+
+ Title   : ids
+ Usage   : @ids    = $obj->ids([$authority1,$authority2...])
+ Function: return a list of Bio::IdentifiableI objects, optionally
+           filtered by the list of authorities.
+
+ Returns : A list of Bio::IdentifiableI objects.
+ Status  : Virtual
+
+=cut
+
+sub ids {
+   my ($self) = @_;
+   my @authorities = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/IdentifiableI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/IdentifiableI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/IdentifiableI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,207 @@
+# $Id: IdentifiableI.pm,v 1.11.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# This module is licensed under the same terms as Perl itself. You use,
+# modify, and redistribute it under the terms of the Perl Artistic License.
+#
+
+=head1 NAME
+
+Bio::IdentifiableI - interface for objects with identifiers
+
+=head1 SYNOPSIS
+
+    # to test this is an identifiable object
+
+    $obj->isa("Bio::IdentifiableI") ||
+      $obj->throw("$obj does not implement the Bio::IdentifiableI interface");
+
+    # Accessors
+
+    $object_id = $obj->object_id();
+    $namespace = $obj->namespace();
+    $authority = $obj->authority();
+    $version   = $obj->version();
+    # Gets authority:namespace:object_id
+    $lsid = $obj->lsid_string();
+    # Gets namespace:object_id.version
+    $ns_string = $obj->namespace_string();
+
+=head1 DESCRIPTION
+
+This interface describes methods expected on identifiable objects, i.e.
+ones which have identifiers expected to make sense across a number of
+instances and/or domains. This interface is modeled after pretty much
+ubiquitous ideas for names in bioinformatics being
+
+ databasename:object_id.version
+
+Example:
+
+ swissprot:P012334.2
+
+or:
+
+ GO:0007048
+
+The object will also work with LSID proposals which adds the concept of an
+authority, being the DNS name of the organisation assigning the namespace.
+See L<http://lsid.sourceforge.net/>.
+
+Helper functions are provided to make useful strings:
+
+  lsid_string - string complying to the LSID standard
+
+  namespace_string - string complying to the usual convention of
+                     namespace:object_id.version
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=cut
+
+package Bio::IdentifiableI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub object_id {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+ Returns : A number
+ Status  : Virtual
+
+=cut
+
+sub version {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for
+           organisation (eg, wormbase.org)
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub authority {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub namespace {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head1 Implementation optional functions
+
+These functions are helper functions that are provided by
+the interface but can be overridden if so wished
+
+=head2 lsid_string
+
+ Title   : lsid_string
+ Usage   : $string   = $obj->lsid_string()
+ Function: a string which gives the LSID standard
+           notation for the identifier of interest
+
+
+ Returns : A scalar
+
+=cut
+
+sub lsid_string {
+  my ($self) = @_;
+
+  return $self->authority.":".$self->namespace.":".$self->object_id;
+}
+
+
+
+=head2 namespace_string
+
+ Title   : namespace_string
+ Usage   : $string   = $obj->namespace_string()
+ Function: a string which gives the common notation of
+           namespace:object_id.version
+ Returns : A scalar
+
+=cut
+
+sub namespace_string {
+  my ($self) = @_;
+
+  return $self->namespace.":".$self->object_id .
+      (defined($self->version()) ? ".".$self->version : '');
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Abstract.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Abstract.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Abstract.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,791 @@
+#
+# $Id: Abstract.pm,v 1.49.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::Abstract
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#          and James Gilbert <jgrg at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Abstract - Abstract interface for indexing a flat file
+
+=head1 SYNOPSIS
+
+You should not be using this module directly
+
+=head1 USING DB_FILE
+
+To use DB_File and not SDBM for this index, pass the value:
+
+    -dbm_package => 'DB_File'
+
+to new (see below).
+
+=head1 DESCRIPTION
+
+This object provides the basic mechanism to associate positions
+in files with names. The position and filenames are stored in DBM
+which can then be accessed later on. It is the equivalent of flat
+file indexing (eg, SRS or efetch).
+
+This object is the guts to the mechanism, which will be used by the
+specific objects inheriting from it.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, James Gilbert
+
+Email - birney at sanger.ac.uk, jgrg at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal
+methods are usually preceded with an "_" (underscore).
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Index::Abstract;
+
+use strict;
+use Fcntl qw( O_RDWR O_CREAT O_RDONLY );
+use vars qw( $TYPE_AND_VERSION_KEY
+             $USE_DBM_TYPE $DB_HASH );
+
+
+use Bio::Root::IO;
+use Symbol();
+
+use base qw(Bio::Root::Root);
+
+# Generate accessor methods for simple object fields
+BEGIN {
+	foreach my $func (qw(filename write_flag)) {
+		no strict 'refs';
+		my $field = "_$func";
+
+		*$func = sub {
+			my( $self, $value ) = @_;
+
+			if (defined $value) {
+				$self->{$field} = $value;
+			}
+			return $self->{$field};
+		}
+	}
+}
+
+=head2 new
+
+  Usage   : $index = Bio::Index::Abstract->new(
+                -filename    => $dbm_file,
+                -write_flag  => 0,
+                -dbm_package => 'DB_File',
+                -verbose     => 0);
+  Function: Returns a new index object.  If filename is
+            specified, then open_dbm() is immediately called. 
+            Bio::Index::Abstract->new() will usually be called
+            directly only when opening an existing index.
+  Returns : A new index object
+  Args    : -filename    The name of the dbm index file.
+            -write_flag  TRUE if write access to the dbm file is
+                         needed.
+            -dbm_package The Perl dbm module to use for the
+                         index.
+            -verbose     Print debugging output to STDERR if
+                         TRUE.
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my( $filename, $write_flag, $dbm_package, $cachesize, $ffactor ) =
+        $self->_rearrange([qw(FILENAME 
+			                     WRITE_FLAG
+			                     DBM_PACKAGE
+			                     CACHESIZE
+			                     FFACTOR
+			      )], @args);
+
+    # Store any parameters passed
+    $self->filename($filename)       if $filename;
+    $self->cachesize($cachesize)     if $cachesize;
+    $self->ffactor($ffactor)     	 if $ffactor;
+    $self->write_flag($write_flag)   if $write_flag;
+    $self->dbm_package($dbm_package) if $dbm_package;
+
+    $self->{'_filehandle'} = []; # Array in which to cache SeqIO objects
+    $self->{'_DB'}         = {}; # Gets tied to the DBM file
+
+    # Open database
+    $self->open_dbm() if $filename;
+    return $self;
+}
+
+=pod
+
+=head2 filename
+
+ Title   : filename
+ Usage   : $value = $self->filename();
+           $self->filename($value);
+ Function: Gets or sets the name of the dbm index file.
+ Returns : The current value of filename
+ Args    : Value of filename if setting, or none if
+           getting the value.
+
+=head2 write_flag
+
+ Title   : write_flag
+ Usage   : $value = $self->write_flag();
+           $self->write_flag($value);
+ Function: Gets or sets the value of write_flag, which
+           is wether the dbm file should be opened with
+           write access.
+ Returns : The current value of write_flag (default 0)
+ Args    : Value of write_flag if setting, or none if
+           getting the value.
+
+=head2 dbm_package
+
+ Usage   : $value = $self->dbm_package();
+           $self->dbm_package($value);
+
+ Function: Gets or sets the name of the Perl dbm module used. 
+           If the value is unset, then it returns the value of
+           the package variable $USE_DBM_TYPE or if that is
+           unset, then it chooses the best available dbm type,
+           choosing 'DB_File' in preference to 'SDBM_File'. 
+           Bio::Abstract::Index may work with other dbm file
+           types.
+
+ Returns : The current value of dbm_package
+ Args    : Value of dbm_package if setting, or none if
+           getting the value.
+
+=cut
+
+sub dbm_package {
+	my( $self, $value ) = @_;
+	my $to_require = 0;
+	if( $value || ! $self->{'_dbm_package'} ) {
+		my $type = $value || $USE_DBM_TYPE || 'DB_File';
+		if( $type =~ /DB_File/i ) {
+			eval {
+				require DB_File;
+			};
+			$type = ( $@ ) ? 'SDBM_File' : 'DB_File';
+		}
+		if( $type ne 'DB_File' ) {
+			eval { require "$type.pm"; };
+			$self->throw($@) if( $@ );
+		}
+		$self->{'_dbm_package'} = $type;
+		if( ! defined $USE_DBM_TYPE ) {
+			$USE_DBM_TYPE = $self->{'_dbm_package'};
+		}
+    }
+	return $self->{'_dbm_package'};
+}
+
+=head2 db
+
+  Title   : db
+  Usage   : $index->db
+  Function: Returns a ref to the hash which is tied to the dbm
+            file.  Used internally when adding and retrieving
+            data from the database.
+  Example : $db = $index->db();
+            $db->{ $some_key } = $data
+            $data = $index->db->{ $some_key };
+  Returns : ref to HASH
+  Args    : NONE
+
+=cut
+
+sub db {
+	return $_[0]->{'_DB'};
+}
+
+
+=head2 get_stream
+
+ Title   : get_stream
+ Usage   : $stream = $index->get_stream( $id );
+ Function: Returns a file handle with the file pointer
+           at the approprite place
+
+           This provides for a way to get the actual
+           file contents and not an object 
+
+           WARNING: you must parse the record deliminter
+           *yourself*. Abstract wont do this for you 
+           So this code
+
+           $fh = $index->get_stream($myid);
+           while( <$fh> ) {
+              # do something
+           }
+           will parse the entire file if you don't put in
+           a last statement in, like
+
+           while( <$fh> ) {
+              /^\/\// && last; # end of record
+              # do something
+           }
+
+ Returns : A filehandle object
+ Args    : string represents the accession number
+ Notes   : This method should not be used without forethought 
+
+=cut
+
+#'
+
+sub get_stream {
+   my ($self,$id) = @_;
+
+   my ($desc,$acc,$out);
+   my $db = $self->db();
+
+   if (my $rec = $db->{ $id }) {
+		my( @record );
+
+		my ($file, $begin, $end) = $self->unpack_record( $rec );
+
+		# Get the (possibly cached) filehandle
+		my $fh = $self->_file_handle( $file );
+
+		# move to start
+		seek($fh, $begin, 0);
+
+		return $fh;
+   } else {
+		$self->throw("Unable to find a record for $id in the flat file index");
+   }
+}
+
+
+=head2 cachesize
+
+  Usage   : $index->cachesize(1000000)
+  Function: Sets the dbm file cache size for the index.
+  	    Needs to be set before the DBM file gets opened.
+  Example : $index->cachesize(1000000)
+  Returns : size of the curent cache
+
+=cut
+
+sub cachesize {
+	my( $self, $size ) = @_;
+
+	if(defined $size){
+		$self->{'_cachesize'} = $size;
+	}
+	return ( $self->{'_cachesize'} );
+}
+
+
+=head2 ffactor
+
+  Usage   : $index->ffactor(1000000)
+  Function: Sets the dbm file fill factor.
+  			Needs to be set before the DBM file gets opened.
+
+  Example : $index->ffactor(1000000)
+  Returns : size of the curent cache
+
+=cut
+
+sub ffactor {
+	my( $self, $size ) = @_;
+
+	if(defined $size){
+		$self->{'_ffactor'} = $size;
+	}
+	return ( $self->{'_ffactor'} );
+}
+
+
+=head2 open_dbm
+
+  Usage   : $index->open_dbm()
+  Function: Opens the dbm file associated with the index
+            object.  Write access is only given if explicitly
+            asked for by calling new(-write => 1) or having set
+            the write_flag(1) on the index object.  The type of
+            dbm file opened is that returned by dbm_package(). 
+            The name of the file to be is opened is obtained by
+            calling the filename() method.
+
+  Example : $index->_open_dbm()
+  Returns : 1 on success
+
+=cut
+
+sub open_dbm {
+	my( $self ) = @_;
+
+	my $filename = $self->filename()
+	  or $self->throw("filename() not set");
+
+	my $db = $self->db();
+
+	# Close the dbm file if already open (maybe we're getting
+	# or dropping write access
+	if (ref($db) ne 'HASH') {
+		untie($db);
+	}
+
+	# What kind of DBM file are we going to open?
+	my $dbm_type = $self->dbm_package;
+
+	# Choose mode for opening dbm file (read/write+create or read-only).
+	my $mode_flags = $self->write_flag ? O_RDWR|O_CREAT : O_RDONLY;
+ 
+	# Open the dbm file
+	if ($dbm_type eq 'DB_File') {
+		my $hash_inf = DB_File::HASHINFO->new();
+		my $cache = $self->cachesize();
+		my $ffactor = $self->ffactor();
+		if ($cache){
+			$hash_inf->{'cachesize'} = $cache;
+		}
+		if ($ffactor){
+			$hash_inf->{'ffactor'} = $ffactor;
+		}
+		tie( %$db, $dbm_type, $filename, $mode_flags, 0644, $hash_inf )
+		  or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!");
+	} else {
+		tie( %$db, $dbm_type, $filename, $mode_flags, 0644 )
+		  or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!");
+	}
+
+	# The following methods access data in the dbm file:
+
+	# Now, if we're a Bio::Index::Abstract caterpillar, then we
+	# transform ourselves into a Bio::Index::<something> butterfly!
+	if( ref($self) eq "Bio::Index::Abstract" ) { 
+		my $pkg = $self->_code_base();
+		bless $self, $pkg;
+	}
+
+	# Check or set this is the right kind and version of index
+	$self->_type_and_version();
+
+	# Check files haven't changed size since they were indexed
+	$self->_check_file_sizes();
+
+	return 1;
+}
+
+=head2 _version
+
+  Title   : _version
+  Usage   : $type = $index->_version()
+  Function: Returns a string which identifes the version of an
+            index module.  Used to permanently identify an index
+            file as having been created by a particular version
+            of the index module.  Must be provided by the sub class
+  Example : 
+  Returns : 
+  Args    : none
+
+=cut
+
+sub _version {
+	my $self = shift;
+	$self->throw("In Bio::Index::Abstract, no _version method in sub class");
+}
+
+=head2 _code_base
+
+ Title   : _code_base
+ Usage   : $code = $db->_code_base();
+ Function:
+ Example :
+ Returns : Code package to be used with this 
+ Args    :
+
+
+=cut
+
+sub _code_base {
+   my ($self) = @_;
+   my $code_key    = '__TYPE_AND_VERSION';
+   my $record;
+
+   $record = $self->db->{$code_key};
+
+   my($code,$version) = $self->unpack_record($record);
+   if( wantarray ) {
+       return ($code,$version);
+   } else {
+       return $code;
+   }
+}
+
+
+=head2 _type_and_version
+
+  Title   : _type_and_version
+  Usage   : Called by _initalize
+  Function: Checks that the index opened is made by the same index
+            module and version of that module that made it.  If the
+            index is empty, then it adds the information to the
+            database.
+  Example : 
+  Returns : 1 or exception
+  Args    : none
+
+=cut
+
+sub _type_and_version {
+	my $self    = shift;
+	my $key     = '__TYPE_AND_VERSION';
+	my $version = $self->_version();
+	my $type    = ref $self;
+
+	# Run check or add type and version key if missing
+	if (my $rec = $self->db->{ $key }) {
+		my( $db_type, $db_version ) = $self->unpack_record($rec);
+		$self->throw("This index file is from version [$db_version] - You need to rebuild it to use module version [$version]")
+		  unless $db_version == $version;
+		$self->throw("This index file is type [$db_type] - Can't access it with module for [$type]")
+		  unless $db_type eq $type;
+	} else {
+		$self->add_record( $key, $type, $version )
+		  or $self->throw("Can't add Type and Version record");
+	}
+	return 1;
+}
+
+
+=head2 _check_file_sizes
+
+  Title   : _check_file_sizes
+  Usage   : $index->_check_file_sizes()
+  Function: Verifies that the files listed in the database
+            are the same size as when the database was built,
+            or throws an exception.  Called by the new()
+            function.
+  Example : 
+  Returns : 1 or exception
+  Args    : 
+
+=cut
+
+sub _check_file_sizes {
+	my $self = shift;
+	my $num  = $self->_file_count() || 0;
+
+	for (my $i = 0; $i < $num; $i++) {
+		my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} );
+		my $size = -s $file;
+		unless ($size == $stored_size) {
+			$self->throw("file $i [ $file ] has changed size $stored_size -> $size. This probably means you need to rebuild the index.");
+		}
+	}
+	return 1;
+}
+
+
+=head2 make_index
+
+  Title   : make_index
+  Usage   : $index->make_index( FILE_LIST )
+  Function: Takes a list of file names, checks that they are
+            all fully qualified, and then calls _filename() on
+            each.  It supplies _filename() with the name of the
+            file, and an integer which is stored with each record
+            created by _filename().  Can be called multiple times,
+            and can be used to add to an existing index file.
+  Example : $index->make_index( '/home/seqs1', '/home/seqs2', '/nfs/pub/big_db' );
+  Returns : Number of files indexed
+  Args    : LIST OF FILES
+
+=cut
+
+sub make_index {
+	my($self, @files) = @_;
+	my $count = 0;
+	my $recs = 0;
+	# blow up if write flag is not set. EB fix
+
+	if( !defined $self->write_flag ) {
+		$self->throw("Attempting to make an index on a read-only database. What about a WRITE flag on opening the index?");
+	}
+
+	# We're really fussy/lazy, expecting all file names to be fully qualified
+	$self->throw("No files to index provided") unless @files;
+	for(my $i=0;$i<scalar @files; $i++)  {
+		if( $Bio::Root::IO::FILESPECLOADED && File::Spec->can('rel2abs') ) {	    
+			if( ! File::Spec->file_name_is_absolute($files[$i]) ) {
+				$files[$i] = File::Spec->rel2abs($files[$i]);
+			}
+		} else {
+			if(  $^O =~ /MSWin/i ) {
+				($files[$i] =~ m|^[A-Za-z]:/|) || 
+				  $self->throw("Not an absolute file path '$files[$i]'");
+			} else {
+				($files[$i] =~ m|^/|) || 
+				  $self->throw("Not an absolute file path '$files[$i]'"); 
+			}
+		}
+		$self->throw("File does not exist '$files[$i]'")   unless -e $files[$i];
+	}
+
+	# Add each file to the index
+	FILE :
+		 foreach my $file (@files) {
+
+			 my $i; # index for this file
+
+			 # Get new index for this file and increment file count
+			 if ( defined(my $count = $self->_file_count) ) {
+				 $i = $count;
+			 } else {
+				 $i = 0; $self->_file_count(0);
+        }
+
+			 # see whether this file has been already indexed
+			 my ($record,$number,$size);
+
+			 if( ($record = $self->db->{"__FILENAME_$file"}) ) {
+				 ($number,$size) = $self->unpack_record($record);
+
+				 # if it is the same size - fine. Otherwise die 
+				 if( -s $file == $size ) {
+					 warn "File $file already indexed. Skipping...\n" 
+						if $self->verbose >= 0;
+					 next FILE;
+				 } else {
+					 $self->throw("In index, $file has changed size ($size). Indicates that the index is out of date");
+				 }
+			 }
+
+			 # index this file
+			 warn "Indexing file $file\n" if( $self->verbose > 0);
+
+			 # this is supplied by the subclass and does the serious work
+			 $recs += $self->_index_file( $file, $i ); # Specific method for each type of index
+
+			 # Save file name and size for this index
+			 $self->add_record("__FILE_$i", $file, -s $file)
+            or $self->throw("Can't add data to file: $file");
+			 $self->add_record("__FILENAME_$file", $i, -s $file)
+            or $self->throw("Can't add data to file: $file");
+
+			 # increment file lines
+			 $i++; $self->_file_count($i);
+			 my $temp;
+			 $temp = $self->_file_count();
+		 }
+	return ($count, $recs);
+}
+
+=head2 _filename
+
+  Title   : _filename
+  Usage   : $index->_filename( FILE INT )
+  Function: Indexes the file
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+	my $self = shift;
+
+	my $pkg = ref($self);
+	$self->throw("Error: '$pkg' does not provide the _index_file() method");
+}
+
+
+
+=head2 _file_handle
+
+  Title   : _file_handle
+  Usage   : $fh = $index->_file_handle( INT )
+  Function: Returns an open filehandle for the file
+            index INT.  On opening a new filehandle it
+            caches it in the @{$index->_filehandle} array.
+            If the requested filehandle is already open,
+            it simply returns it from the array.
+  Example : $fist_file_indexed = $index->_file_handle( 0 );
+  Returns : ref to a filehandle
+  Args    : INT
+
+=cut
+
+sub _file_handle {
+	my( $self, $i ) = @_;
+
+	unless ($self->{'_filehandle'}[$i]) {
+		my @rec = $self->unpack_record($self->db->{"__FILE_$i"})
+		  or $self->throw("Can't get filename for index : $i");
+		my $file = $rec[0];
+#		my $fh = Symbol::gensym();
+#		open $fh, '<', $file or $self->throw("Can't read file '$file' : $!");
+		open my $fh, '<', $file or $self->throw("Can't read file '$file' : $!");
+		$self->{'_filehandle'}[$i] = $fh; # Cache filehandle
+	}
+	return $self->{'_filehandle'}[$i];
+}
+
+
+=head2 _file_count
+
+  Title   : _file_count
+  Usage   : $index->_file_count( INT )
+  Function: Used by the index building sub in a sub class to
+            track the number of files indexed.  Sets or gets
+            the number of files indexed when called with or
+            without an argument.
+  Example : 
+  Returns : INT
+  Args    : INT
+
+=cut
+
+sub _file_count {
+	my $self = shift;
+	if (@_) {
+		$self->db->{'__FILE_COUNT'} = shift;
+	}
+	return $self->db->{'__FILE_COUNT'};
+}
+
+
+=head2 add_record
+
+  Title   : add_record
+  Usage   : $index->add_record( $id, @stuff );
+  Function: Calls pack_record on @stuff, and adds the result
+            of pack_record to the index database under key $id.
+            If $id is a reference to an array, then a new entry
+            is added under a key corresponding to each element
+            of the array.
+  Example : $index->add_record( $id, $fileNumber, $begin, $end )
+  Returns : TRUE on success or FALSE on failure
+  Args    : ID LIST
+
+=cut
+
+sub add_record {
+	my( $self, $id, @rec ) = @_;
+	$self->debug( "Adding key $id\n") if( $self->verbose > 0 );
+	if( exists $self->db->{$id} ) {
+		$self->warn("overwriting a current value stored for $id\n");
+	}
+	$self->db->{$id} = $self->pack_record( @rec );
+	return 1;
+}
+
+
+=head2 pack_record
+
+  Title   : pack_record
+  Usage   : $packed_string = $index->pack_record( LIST )
+  Function: Packs an array of scalars into a single string
+            joined by ASCII 034 (which is unlikely to be used
+            in any of the strings), and returns it. 
+  Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
+  Returns : STRING or undef
+  Args    : LIST
+
+=cut
+
+sub pack_record {
+	my( $self, @args ) = @_;
+	return join "\034", @args;
+}
+
+=head2 unpack_record
+
+  Title   : unpack_record
+  Usage   : $index->unpack_record( STRING )
+  Function: Splits the sting provided into an array,
+            splitting on ASCII 034.
+  Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
+  Returns : A 3 element ARRAY
+  Args    : STRING containing ASCII 034
+
+=cut
+
+sub unpack_record {
+	my( $self, @args ) = @_;
+	return split /\034/, $args[0];
+}
+
+=head2 count_records
+
+ Title   : count_records
+ Usage   : $recs = $seqdb->count_records()
+ Function: return count of all recs in the index 
+ Example :
+ Returns : a scalar
+ Args    : none
+
+
+=cut
+
+sub count_records {
+   my ($self, at args) = @_;
+   my $db = $self->db;
+   my $c = 0;
+   while (my($id, $rec) = each %$db) {
+		if( $id =~ /^__/ ) {
+			# internal info
+			next;
+		}
+		$c++;
+   }
+   return ($c);
+}
+
+
+=head2 DESTROY
+
+ Title   : DESTROY
+ Usage   : Called automatically when index goes out of scope
+ Function: Closes connection to database and handles to
+           sequence files
+ Returns : NEVER
+ Args    : NONE
+
+
+=cut
+
+sub DESTROY {
+	my $self = shift;
+	untie($self->{'_DB'});
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/AbstractSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/AbstractSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/AbstractSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,303 @@
+# $Id: AbstractSeq.pm,v 1.24.4.3 2006/11/27 15:46:48 sendu Exp $
+#
+# BioPerl module for Bio::Index::AbstractSeq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::AbstractSeq - base class for AbstractSeq
+
+=head1 SYNOPSIS
+
+  # Make a new sequence file indexing package
+
+  package MyShinyNewIndexer;
+
+  use base qw(Bio::Index::AbstractSeq);
+
+  # Now provide the necessary methods...
+
+=head1 DESCRIPTION
+
+Provides a common base class for multiple sequence files built using 
+the Bio::Index::Abstract system, and provides a Bio::DB::SeqI 
+interface.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions 
+preferably to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=head1 SEE ALSO
+
+L<Bio::Index::Abstract>, which provides dbm indexing for flat files of 
+any type, containing sequence or not. L<Bio::Index::AbstractSeq> inherits 
+from L<Bio::Index::Abstract>
+
+=cut
+
+# Let's begin the code ...
+
+package Bio::Index::AbstractSeq;
+use strict;
+
+use Bio::SeqIO::MultiFile;
+
+use base qw(Bio::Index::Abstract Bio::DB::SeqI);
+
+sub new {
+	my ($class, @args) = @_;
+	my $self = $class->SUPER::new(@args);
+    
+	$self->{'_seqio_cache'} = [];
+	return $self;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Usage   : $self->_file_format
+ Function: Derived classes should override this
+           method (it throws an exception here)
+           to give the file format of the files used
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _file_format {
+   my ($self, at args) = @_;
+
+   my $pkg = ref($self);
+   $self->throw("Class '$pkg' must provide a file format method correctly");
+}
+
+=head2 fetch
+
+  Title   : fetch
+  Usage   : $index->fetch( $id )
+  Function: Returns a Bio::Seq object from the index
+  Example : $seq = $index->fetch( 'dJ67B12' )
+  Returns : Bio::Seq object
+  Args    : ID
+
+=cut
+
+sub fetch {
+	my( $self, $id ) = @_;
+	my $db = $self->db();
+	my $seq;
+
+	if (my $rec = $db->{ $id }) {
+		my ($file, $begin) = $self->unpack_record( $rec );
+        
+		# Get the (possibly cached) SeqIO object
+		my $seqio = $self->_get_SeqIO_object( $file );
+		my $fh = $seqio->_fh();
+
+		# move to start of record
+		# $begin-- if( $^O =~ /mswin/i); # workaround for Win DB_File bug
+		seek($fh, $begin, 0);
+	
+		$seq = $seqio->next_seq();	
+	}
+
+	# we essentially assumme that the primary_id for the database
+	# is the display_id
+	if (ref($seq) && $seq->isa('Bio::PrimarySeqI') &&
+		 $seq->primary_id =~ /^\D+$/) {
+		$seq->primary_id( $seq->display_id() );
+	}
+	return $seq;
+}
+
+=head2 _get_SeqIO_object
+
+  Title   : _get_SeqIO_object
+  Usage   : $index->_get_SeqIO_object( $file )
+  Function: Returns a Bio::SeqIO object for the file
+  Example : $seq = $index->_get_SeqIO_object( 0 )
+  Returns : Bio::SeqIO object
+  Args    : File number (an integer)
+
+=cut
+
+sub _get_SeqIO_object {
+    my( $self, $i ) = @_;
+    
+    unless ($self->{'_seqio_cache'}[$i]) {
+        my $fh = $self->_file_handle($i);
+        # make a new SeqIO object
+        my $seqio = Bio::SeqIO->new( -Format => $self->_file_format,
+				     -fh     => $fh);
+        $self->{'_seqio_cache'}[$i] = $seqio;
+    }
+    return $self->{'_seqio_cache'}[$i];
+}
+
+=head2 get_Seq_by_id
+
+ Title   : get_Seq_by_id
+ Usage   : $seq = $db->get_Seq_by_id()
+ Function: retrieves a sequence object, identically to
+           ->fetch, but here behaving as a Bio::DB::BioSeqI
+ Returns : new Bio::Seq object
+ Args    : string represents the id
+
+
+=cut
+
+sub get_Seq_by_id {
+   my ($self,$id) = @_;
+
+   return $self->fetch($id);
+}
+
+=head2 get_Seq_by_acc
+
+ Title   : get_Seq_by_acc
+ Usage   : $seq = $db->get_Seq_by_acc()
+ Function: retrieves a sequence object, identically to
+           ->fetch, but here behaving as a Bio::DB::BioSeqI
+ Returns : new Bio::Seq object
+ Args    : string represents the accession number
+
+
+=cut
+
+sub get_Seq_by_acc {
+   my ($self,$id) = @_;
+
+   return $self->fetch($id);
+}
+
+=head2 get_PrimarySeq_stream
+
+ Title   : get_PrimarySeq_stream
+ Usage   : $stream = get_PrimarySeq_stream
+ Function: Makes a Bio::DB::SeqStreamI compliant object
+           which provides a single method, next_primary_seq
+ Returns : Bio::DB::SeqStreamI
+ Args    : none
+
+
+=cut
+
+sub get_PrimarySeq_stream {
+    my $self = shift;
+    my $num  = $self->_file_count() || 0;
+    my @file;
+    
+    for (my $i = 0; $i < $num; $i++) {
+        my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} );
+	push(@file,$file);
+    }
+   
+    my $out = Bio::SeqIO::MultiFile->new( '-format' => $self->_file_format , -files => \@file);
+    return $out;
+}
+
+=head2 get_all_primary_ids
+
+ Title   : get_all_primary_ids
+ Usage   : @ids = $seqdb->get_all_primary_ids()
+ Function: gives an array of all the primary_ids of the 
+           sequence objects in the database. These
+           maybe ids (display style) or accession numbers
+           or something else completely different - they
+           *are not* meaningful outside of this database
+           implementation.
+ Example :
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub get_all_primary_ids {
+   my ($self, at args) = @_;
+    my $db = $self->db;
+   
+   # the problem is here that we have indexed things both on
+   # accession number and name. 
+
+   # We could take two options
+   # here - loop over the database, returning only one copy of each
+   # id that points to the same byte position, or we rely on semantics
+   # of accession numbers.
+
+   # someone is going to index a database with no accession numbers.
+   # doh!. We have to uniquify the index...
+
+   my( %bytepos );
+   while (my($id, $rec) = each %$db) {
+       if( $id =~ /^__/ ) {
+           # internal info
+           next;
+       }
+       my ($file, $begin) = $self->unpack_record( $rec );
+       
+       $bytepos{"$file:$begin"} = $id;
+   }
+
+   return values %bytepos;
+}
+
+
+=head2 get_Seq_by_primary_id
+
+ Title   : get_Seq_by_primary_id
+ Usage   : $seq = $db->get_Seq_by_primary_id($primary_id_string);
+ Function: Gets a Bio::Seq object by the primary id. The primary
+           id in these cases has to come from $db->get_all_primary_ids.
+           There is no other way to get (or guess) the primary_ids
+           in a database.
+
+           The other possibility is to get Bio::PrimarySeqI objects
+           via the get_PrimarySeq_stream and the primary_id field
+           on these objects are specified as the ids to use here.
+ Returns : A Bio::Seq object
+ Args    : primary id (as a string)
+ Throws  : "acc does not exist" exception
+
+
+=cut
+
+sub get_Seq_by_primary_id {
+   my ($self,$id) = @_;
+   return $self->fetch($id);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Blast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Blast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Blast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,405 @@
+# $Id: Blast.pm,v 1.21.4.4 2006/11/17 09:32:42 sendu Exp $
+#
+# BioPerl module for Bio::Index::Blast
+#
+# Cared for by Jason Stajich <jason at cgt.mc.duke.edu>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Blast - Indexes Blast reports and supports retrieval 
+based on query accession(s)
+
+=head1 SYNOPSIS
+
+    use strict;
+    use Bio::Index::Blast;
+    my ($indexfile,$file1,$file2,$query);
+    my $index = new Bio::Index::Blast(-filename => $indexfile,
+				                          -write_flag => 1);
+    $index->make_index($file1,$file2);
+
+    my $data = $index->get_stream($query);
+
+    my $blast_report = $index->fetch_report($query);
+    print "query is ", $blast_report->query, "\n";
+    while ( my $result = $blast_report->next_result ) {
+            print $result->algorithm, "\n";
+            while ( my $hsp = $result->next_hit ) {
+              print "\t name ", $hsp->name,
+            }
+            print "\n";
+    }
+
+=head1 DESCRIPTION
+
+This object allows one to build an index on a blast file (or files)
+and provide quick access to the blast report for that accession.
+Note: for best results 'use strict'.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Index::Blast;
+use strict;
+
+use IO::String;
+use Bio::Root::Version;
+
+use base qw(Bio::Index::Abstract Bio::Root::Root);
+
+sub _version {
+	return ${Bio::Root::Version::VERSION};
+}
+
+=head2 new
+
+  Usage   : $index = Bio::Index::Abstract->new(
+                -filename    => $dbm_file,
+                -write_flag  => 0,
+                -dbm_package => 'DB_File',
+                -verbose     => 0);
+
+  Function: Returns a new index object.  If filename is
+            specified, then open_dbm() is immediately called. 
+            Bio::Index::Abstract->new() will usually be called
+            directly only when opening an existing index.
+  Returns : A new index object
+  Args    : -filename    The name of the dbm index file.
+            -write_flag  TRUE if write access to the dbm file is
+                         needed.
+            -dbm_package The Perl dbm module to use for the
+                         index.
+            -verbose     Print debugging output to STDERR if
+                         TRUE.
+
+=cut
+
+sub new {
+
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+}
+
+=head2 Bio::Index::Blast implemented methods
+
+=cut
+
+=head2 fetch_report
+
+ Title   : fetch_report
+ Usage   : my $blastreport = $idx->fetch_report($id);
+ Function: Returns a Bio::SearchIO report object 
+           for a specific blast report
+ Returns : Bio::SearchIO
+ Args    : valid id
+
+=cut
+
+sub fetch_report{
+	my ($self,$id) = @_;
+	my $fh = $self->get_stream($id);
+	my $report = new Bio::SearchIO(-noclose => 1,
+											 -format => 'blast',
+											 -fh => $fh);
+	return $report->next_result;
+}
+
+
+=head2 Require methods from Bio::Index::Abstract
+
+=cut
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index BLAST report file(s).
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+	my( $self,
+		 $file, # File name
+		 $i,    # Index-number of file being indexed
+	  ) = @_;
+
+	my( $begin,  # Offset from start of file of the start
+		          # of the last found record.
+	  );
+
+	open(my $BLAST, '<', $file) or $self->throw("cannot open file $file\n");
+
+	my (@data, @records);
+	my $indexpoint = 0;
+	my $lastline = 0;
+
+	while( <$BLAST> ) {
+		if( /(T)?BLAST[PNX]/ ) {
+			if( @data ) { 
+				# if we have already read a report
+				# then store the data for this report 
+				# in the CURRENT index
+				$self->_process_report($indexpoint, $i, join('', @data));
+
+			} # handle fencepost problem (beginning) 
+	        # by skipping here when empty
+
+			# since we are at the beginning of a new report
+			# store this begin location for the next index	   
+			$indexpoint = $lastline;
+			@data = ();
+		}
+		push(@data, $_) if $_;
+		$lastline = tell $BLAST;
+	}
+	# handle fencepost problem (end)
+	if( @data ) {
+		$self->_process_report($indexpoint, $i, join('', @data));
+	}
+}
+
+sub _process_report {
+	my ($self,$begin,$i,$data) = @_;
+
+	if( ! $data ) { 
+		$self->warn("calling _process_report without a valid data string"); 
+		return ; 
+	}
+	# my $id_parser = $self->id_parser;
+
+	my $datal = new IO::String($data);
+	my $report = new Bio::SearchIO->new(-fh => $datal,
+												   -noclose => 1);
+	for (my $result = $report->next_result) {
+		my $id = $result->query_name;
+		print "id is $id, begin is $begin\n" if ( $self->verbose > 0);
+		$self->add_record($id, $i, $begin);
+	}
+}
+
+=head2 Bio::Index::Abstract methods
+
+=cut
+
+=head2 filename
+
+ Title   : filename
+ Usage   : $value = $self->filename();
+           $self->filename($value);
+ Function: Gets or sets the name of the dbm index file.
+ Returns : The current value of filename
+ Args    : Value of filename if setting, or none if
+           getting the value.
+
+=head2 write_flag
+
+ Title   : write_flag
+ Usage   : $value = $self->write_flag();
+           $self->write_flag($value);
+ Function: Gets or sets the value of write_flag, which
+           is wether the dbm file should be opened with
+           write access.
+ Returns : The current value of write_flag (default 0)
+ Args    : Value of write_flag if setting, or none if
+           getting the value.
+
+=head2 dbm_package
+
+ Usage   : $value = $self->dbm_package();
+           $self->dbm_package($value);
+
+ Function: Gets or sets the name of the Perl dbm module used. 
+           If the value is unset, then it returns the value of
+           the package variable $USE_DBM_TYPE or if that is
+           unset, then it chooses the best available dbm type,
+           choosing 'DB_File' in preference to 'SDBM_File'. 
+           Bio::Abstract::Index may work with other dbm file
+           types.
+
+ Returns : The current value of dbm_package
+ Args    : Value of dbm_package if setting, or none if
+           getting the value.
+
+
+=head2 get_stream
+
+ Title   : get_stream
+ Usage   : $stream = $index->get_stream( $id );
+ Function: Returns a file handle with the file pointer
+           at the approprite place
+
+           This provides for a way to get the actual
+           file contents and not an object 
+
+           WARNING: you must parse the record deliminter
+           *yourself*. Abstract wont do this for you 
+           So this code
+
+           $fh = $index->get_stream($myid);
+           while( <$fh> ) {
+              # do something
+           }
+           will parse the entire file if you do not put in
+           a last statement in, like
+
+           while( <$fh> ) {
+              /^\/\// && last; # end of record
+              # do something
+           }
+
+ Returns : A filehandle object
+ Args    : string represents the accession number
+ Notes   : This method should not be used without forethought 
+
+
+=head2 open_dbm
+
+  Usage   : $index->open_dbm()
+  Function: Opens the dbm file associated with the index
+            object.  Write access is only given if explicitly
+            asked for by calling new(-write => 1) or having set
+            the write_flag(1) on the index object.  The type of
+            dbm file opened is that returned by dbm_package(). 
+            The name of the file to be is opened is obtained by
+            calling the filename() method.
+
+  Example : $index->_open_dbm()
+  Returns : 1 on success
+
+
+=head2 _version
+
+  Title   : _version
+  Usage   : $type = $index->_version()
+  Function: Returns a string which identifes the version of an
+            index module.  Used to permanently identify an index
+            file as having been created by a particular version
+            of the index module.  Must be provided by the sub class
+  Example : 
+  Returns : 
+  Args    : none
+
+=head2 _filename
+
+  Title   : _filename
+  Usage   : $index->_filename( FILE INT )
+  Function: Indexes the file
+  Example : 
+  Returns : 
+  Args    : 
+
+=head2 _file_handle
+
+  Title   : _file_handle
+  Usage   : $fh = $index->_file_handle( INT )
+  Function: Returns an open filehandle for the file
+            index INT.  On opening a new filehandle it
+            caches it in the @{$index->_filehandle} array.
+            If the requested filehandle is already open,
+            it simply returns it from the array.
+  Example : $fist_file_indexed = $index->_file_handle( 0 );
+  Returns : ref to a filehandle
+  Args    : INT
+
+=head2 _file_count
+
+  Title   : _file_count
+  Usage   : $index->_file_count( INT )
+  Function: Used by the index building sub in a sub class to
+            track the number of files indexed.  Sets or gets
+            the number of files indexed when called with or
+            without an argument.
+  Example : 
+  Returns : INT
+  Args    : INT
+
+
+=head2 add_record
+
+  Title   : add_record
+  Usage   : $index->add_record( $id, @stuff );
+  Function: Calls pack_record on @stuff, and adds the result
+            of pack_record to the index database under key $id.
+            If $id is a reference to an array, then a new entry
+            is added under a key corresponding to each element
+            of the array.
+  Example : $index->add_record( $id, $fileNumber, $begin, $end )
+  Returns : TRUE on success or FALSE on failure
+  Args    : ID LIST
+
+=head2 pack_record
+
+  Title   : pack_record
+  Usage   : $packed_string = $index->pack_record( LIST )
+  Function: Packs an array of scalars into a single string
+            joined by ASCII 034 (which is unlikely to be used
+            in any of the strings), and returns it. 
+  Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
+  Returns : STRING or undef
+  Args    : LIST
+
+=head2 unpack_record
+
+  Title   : unpack_record
+  Usage   : $index->unpack_record( STRING )
+  Function: Splits the sting provided into an array,
+            splitting on ASCII 034.
+  Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
+  Returns : A 3 element ARRAY
+  Args    : STRING containing ASCII 034
+
+=head2 DESTROY
+
+ Title   : DESTROY
+ Usage   : Called automatically when index goes out of scope
+ Function: Closes connection to database and handles to
+           sequence files
+ Returns : NEVER
+ Args    : NONE
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/EMBL.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/EMBL.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/EMBL.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,205 @@
+# $Id: EMBL.pm,v 1.32.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::EMBL
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::EMBL - Interface for indexing (multiple) EMBL/Swissprot
+.dat files (i.e. flat file EMBL/Swissprot format).
+
+=head1 SYNOPSIS
+
+    # Complete code for making an index for several
+    # EMBL files
+    use Bio::Index::EMBL;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::EMBL->new(-filename => $Index_File_Name,
+				    -write_flag => 'WRITE');
+    $inx->make_index(@ARGV);
+
+    # Print out several sequences present in the index
+    # in Fasta format
+    use Bio::Index::EMBL;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::EMBL->new(-filename => $Index_File_Name);
+    my $out = Bio::SeqIO->new(-format => 'Fasta',-fh => \*STDOUT);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq object
+	$out->write_seq($seq);
+    }
+
+    # alternatively
+    my ($id, $acc);
+    my $seq1 = $inx->get_Seq_by_id($id);
+    my $seq2 = $inx->get_Seq_by_acc($acc);
+
+=head1 DESCRIPTION
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
+and provides the basic funtionallity for indexing EMBL files, and
+retrieving the sequence from them. Heavily snaffled from James Gilbert
+and his Fasta system. Note: for best results 'use strict'.
+
+The keys are the identifiers in the ID and AC lines.
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email - birney at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let's begin the code...
+
+
+package Bio::Index::EMBL;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+sub _type_stamp {
+    return '__EMBL_FLAT__'; # What kind of index are we?
+}
+
+
+sub _version {
+    return 0.1;
+}
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index EMBL format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+        $file, # File name
+        $i     # Index-number of file being indexed
+        ) = @_;
+
+    my( $begin, # Offset from start of file of the start
+                # of the last found record.
+        $id,    # ID of last found record.
+	@accs,   # accession of last record. Also put into the index
+        );
+
+    $begin = 0;
+
+    open my $EMBL, '<', $file or $self->throw("Can't open file for read : $file");
+
+    # Main indexing loop
+    $id = undef;
+    @accs = ();
+    while (<$EMBL>) {
+	if( m{^//} ) {
+	    if( ! defined $id ) {
+		$self->throw("Got to a end of entry line for an EMBL flat file with no parsed ID. Considering this a problem!");
+		next;
+	    }
+	    if( ! @accs ) {
+		$self->warn("For id [$id] in embl flat file, got no accession number. Storing id index anyway");
+	    }
+
+	    $self->add_record($id, $i, $begin);
+
+	    foreach my $acc (@accs) {
+		if( $acc ne $id ) {
+		    $self->add_record($acc, $i, $begin);
+		}
+	    }
+	} elsif (/^ID\s+(\S+)/) {
+	    $id = $1;
+	    # not sure if I like this. Assummes tell is in bytes.
+	    # we could tell before each line and save it.
+            $begin = tell($EMBL) - length( $_ );
+	
+	} elsif (/^AC\s+(.*)?/) {
+            push @accs , split (/[; ]+/, $1);
+	} else {
+	    # do nothing
+	}
+    }
+
+    close $EMBL;
+    return 1;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Usage   : Internal function for indexing system
+ Function: Provides file format for this database
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _file_format{
+   my ($self, at args) = @_;
+
+   return 'EMBL';
+}
+
+
+
+1;
+
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,237 @@
+#
+# $Id: Fasta.pm,v 1.35.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::Fasta
+#
+# Cared for by James Gilbert <jgrg at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Fasta - Interface for indexing (multiple) fasta files
+
+=head1 SYNOPSIS
+
+    # Make an index for one or more fasta files
+    use Bio::Index::Fasta;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fasta->new(-filename => $Index_File_Name,
+                                     -write_flag => 1);
+    $inx->make_index(@ARGV);
+
+
+    # Once the index is made it can accessed, either in the
+    # same script or a different one
+    use Bio::Index::Fasta;
+    use strict;
+
+    $ENV{BIOPERL_INDEX} = "."; # find index in current directory
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fasta->new(-filename => $Index_File_Name);
+    my $out = Bio::SeqIO->new(-format => 'Fasta',
+                              -fh => \*STDOUT);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq object
+	     $out->write_seq($seq);
+    }
+
+    # or, alternatively
+    my $id;
+    my $seq = $inx->get_Seq_by_id($id); # identical to fetch()
+
+=head1 DESCRIPTION
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
+and provides the basic funtionallity for indexing fasta files, and
+retrieving the sequence from them. For best results 'use strict'.
+
+Bio::Index::Fasta supports the Bio::DB::BioSeqI interface, meaning
+it can be used as a Sequence database for other parts of bioperl
+
+Additional example code is available in scripts/index/*PLS and in 
+the Bioperl Tutorial (L<http://www.bioperl.org/wiki/Bptutorial.pl>)
+
+Note that by default the key for the sequence will be the first continuous
+string after the 'E<gt>' in the fasta header. If you want to use a specific
+substring of the fasta header you must use the id_parser() method.
+
+You can also set or customize the unique key used to retrieve by 
+writing your own function and calling the id_parser() method.
+For example:
+
+   $inx->id_parser(\&get_id);
+   # make the index
+   $inx->make_index($file_name);
+
+   # here is where the retrieval key is specified
+   sub get_id {
+      my $line = shift;
+      $line =~ /^>.+gi\|(\d+)/;
+      $1;
+   }
+
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - James Gilbert
+
+Email - jgrg at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Index::Fasta;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+#
+# Suggested fix by Michael G Schwern <schwern at pobox.com> to
+# get around a clash with CPAN shell...
+#
+
+sub _version {
+    return 0.2;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Function: The file format for this package, which is needed
+           by the SeqIO system when reading the sequence.
+ Returns : 'Fasta'
+
+=cut
+
+sub _file_format {
+    return 'Fasta';
+}
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index FASTA format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+	my( $self,
+		 $file, # File name
+		 $i,    # Index-number of file being indexed
+	  ) = @_;
+
+	my( $begin,     # Offset from start of file of the start
+		             # of the last found record.
+	  );
+
+	$begin = 0;
+
+	my $id_parser = $self->id_parser;
+
+	open my $FASTA, '<', $file or $self->throw("Can't open file for read : $file");
+
+	# Main indexing loop
+	while (<$FASTA>) {
+		if (/^>/) {
+			# $begin is the position of the first character after the '>'
+			my $begin = tell($FASTA) - length( $_ ) + 1;
+
+			foreach my $id (&$id_parser($_)) {
+				$self->add_record($id, $i, $begin);
+			}
+		}
+	}
+	close $FASTA;
+	return 1;
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of FASTA file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+	my( $self, $code ) = @_;
+
+	if ($code) {
+		$self->{'_id_parser'} = $code;
+	}
+	return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Fasta ID parser for Fasta.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a fasta header line string
+
+=cut
+
+sub default_id_parser {
+	if ($_[0] =~ /^>\s*(\S+)/) {
+		return $1;
+	} else {
+		return;
+	}
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fastq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fastq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Fastq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,215 @@
+#
+#
+# BioPerl module for Bio::Index::Fastq
+#
+# Cared for by Tony Cox <avc at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Fastq - Interface for indexing (multiple) fastq files
+
+=head1 SYNOPSIS
+
+    # Complete code for making an index for several
+    # fastq files
+    use Bio::Index::Fastq;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fastq->new(
+        '-filename' => $Index_File_Name,
+        '-write_flag' => 1);
+    $inx->make_index(@ARGV);
+
+    # Print out several sequences present in the index
+    # in Fastq format
+    use Bio::Index::Fastq;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Fastq->new('-filename' => $Index_File_Name);
+    my $out = Bio::SeqIO->new('-format' => 'Fastq','-fh' => \*STDOUT);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq::Quality object
+	$out->write_seq($seq);
+    }
+
+    # or, alternatively
+    my $id;
+    my $seq = $inx->get_Seq_by_id($id); #identical to fetch
+
+=head1 DESCRIPTION
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
+and provides the basic funtionallity for indexing fastq files, and
+retrieving the sequence from them. Note: for best results 'use strict'.
+
+Bio::Index::Fastq supports the Bio::DB::BioSeqI interface, meaning
+it can be used as a Sequence database for other parts of bioperl
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Tony Cox
+
+Email - avc at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Index::Fastq;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+#
+# Suggested fix by Michael G Schwern <schwern at pobox.com> to
+# get around a clash with CPAN shell...
+#
+
+sub _version {
+    return 0.2;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Function: The file format for this package, which is needed
+           by the SeqIO system when reading the sequence.
+ Returns : 'Fastq'
+
+=cut
+
+sub _file_format {
+    return 'Fastq';
+}
+
+
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index FASTQ format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+        $file, # File name
+        $i,    # Index-number of file being indexed
+        ) = @_;
+    
+    my( $begin,     # Offset from start of file of the start
+                    # of the last found record.
+        );
+
+    $begin = 0;
+
+    my $id_parser = $self->id_parser;
+    my $c = 0;
+    open my $FASTQ, '<', $file or $self->throw("Can't open file for read : $file");
+    # Main indexing loop
+    while (<$FASTQ>) {
+        if (/^@/) {
+            # $begin is the position of the first character after the '@'
+            my $begin = tell($FASTQ) - length( $_ ) + 1;
+            foreach my $id (&$id_parser($_)) {
+                $self->add_record($id, $i, $begin);
+		$c++;
+            }
+        }
+    }
+
+    close $FASTQ;
+    return ($c);
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of FASTQ file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fastq
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my( $self, $code ) = @_;
+    
+    if ($code) {
+        $self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Fastq ID parser for Fastq.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a fastq header line string
+
+=cut
+
+sub default_id_parser {    
+    if ($_[0] =~ /^@\s*(\S+)/) {
+        return $1;
+    } else {
+        return;
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/GenBank.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/GenBank.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/GenBank.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,234 @@
+#
+# $Id: GenBank.pm,v 1.23.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::Abstract
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::GenBank - Interface for indexing one or more GenBank
+files (i.e. flat file GenBank format).
+
+=head1 SYNOPSIS
+
+    # Complete code for making an index for one or more GenBank files
+    use strict;
+    use Bio::Index::GenBank;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::GenBank->new(-filename => $Index_File_Name, 
+				                           -write_flag => 'WRITE');
+    $inx->make_index(@ARGV);
+
+    # Print out sequences present in the index in gcg format
+    use Bio::Index::GenBank;
+    use Bio::SeqIO;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::GenBank->new(-filename => $Index_File_Name);
+    my $seqio = new Bio::SeqIO(-format => 'gcg');
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq object
+        $seqio->write_seq($seq);
+    }
+
+    # alternatively
+    my ($locus, $acc);
+    my $seq1 = $inx->get_Seq_by_id($locus);
+    my $seq2 = $inx->get_Seq_by_acc($acc);
+
+=head1 DESCRIPTION
+
+By default the index that is created uses the LOCUS, ACCESSION, and
+VERSION identifiers as keys. Inherits functions for managing dbm 
+files from Bio::Index::Abstract.pm, and provides the basic 
+functionality for indexing GenBank files, and retrieving the 
+sequence from them. For best results 'use strict'.
+
+You can also set or customize the unique key used to retrieve by 
+writing your own function and calling the id_parser() method.
+For example:
+
+   $inx->id_parser(\&get_id);
+   # make the index
+   $inx->make_index($file_name);
+
+   # here is where the retrieval key is specified
+   sub get_id {
+      my $line = shift;
+      $line =~ /clone="(\S+)"/;
+      $1;
+   }
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email - birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let's begin the code...
+
+package Bio::Index::GenBank;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+sub _type_stamp {
+    return '__GenBank_FLAT__'; # What kind of index are we?
+}
+
+sub _version {
+    return 0.1;
+}
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file($file_name, $i)
+  Function: Specialized function to index GenBank format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+	$file,			# File name
+	$i			# Index-number of file being indexed
+	) = @_;
+
+    my $begin = 0;
+
+    my $id_parser = $self->id_parser;
+
+    open my $GENBANK, '<', $file or 
+	$self->throw("Can't open file for read : $file");
+
+    my %done_ids;
+    while (<$GENBANK>) {
+        if (/^LOCUS/) {
+            $begin = tell($GENBANK) - length($_);
+        }
+        for my $id (&$id_parser($_)) {
+            next if exists $done_ids{$id};
+            $self->add_record($id, $i, $begin) if $id;
+            $done_ids{$id} = 1;
+        }
+        if (m{//}) {
+            %done_ids = ();
+        }
+    }
+    close $GENBANK;
+    return 1;
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.
+            Returns \&default_id_parser (see below) if not
+            set. An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : reference to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my ($self,$code) = @_;
+
+    if ($code) {
+	$self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser($line)
+  Function: The default parser for GenBank.pm
+  Returns : Array of specified ids
+  Args    : a line string
+
+=cut
+
+#'
+
+sub default_id_parser {
+    my $line = shift;
+    my %accs;
+    if ( $line =~ /^LOCUS\s+(\S+)/ ) {
+	$accs{$1}++;
+    } elsif ( $line =~ /^ACCESSION\s+(.*)/ ) {
+	for my $acc ( split(/\s+/,$1) ) {
+	    $accs{$acc}++;
+	}
+    } elsif ( /^VERSION\s+(.*)/) {	
+	my $x = $1;
+	for my $acc ( split(/\s+/,$1) ) {
+	    $acc=~ s/GI\://;
+	    $accs{$acc}++;
+	}
+    }
+    keys %accs;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Usage   : Internal function for indexing system
+ Function: Provides file format for this database
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _file_format{
+   my ($self, at args) = @_;
+   return 'GenBank';
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Hmmer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Hmmer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Hmmer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,467 @@
+#
+# BioPerl module for Bio::Index::Hmmer
+# 
+# Cared for by Josh Lauricha <laurichj at bioinfo.ucr.edu>
+#
+# Copyright Josh Lauricha
+# Unless otherwise noted, this was shamelessly ripped from 
+# Bio::Index::Blast
+#
+# You may distribute this module under the terms of perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Hmmer Indexes HMMER reports and supports retreival based on query
+
+=head1 SYNOPSIS
+
+	# Complete Code for indexing a set off report files
+	#!/usr/bin/perl -w
+	use strict;
+	use Bio::Index::Hmmer;
+	my $indexfile = shift;
+	my $index = new Bio::Index::Hmmer(
+		-filename => $indexfile,
+		-write_flag => 1
+	);
+	$index->make_index(@ARGV);
+
+
+	# Complete code for fetching a report
+	use strict;
+	use Bio::Index::Hmmer;
+	my $indexfile = shift;
+	my $index = new Bio::Index::Hmmer(
+		-filename => $indexfile,
+		-write_flag => 0
+	);
+
+	foreach my $id (@ARGV) {
+		my $report = $index->fetch_report($id);
+		print "Query: ", $report->query_name(), "\n";
+		while( my $hit = $report->next_hit() ) {
+			print "\tHit Name: ", $hit->name(), "\n";
+			while( my $hsp = $hit->next_domain() ) {
+				print "\t\tE-Value: ", $hsp->evalue(), "\n";
+			}
+		}
+	}
+
+=head1 DESCRIPTION
+
+This object allows one to build an index on a HMMER file (or files)
+and provide quick access to the HMMER report for that accession.
+For best results 'use strict'.
+
+You can also set or customize the unique key used to retrieve by 
+writing your own function and calling the id_parser() method.
+For example:
+
+   $inx->id_parser(\&get_id);
+   # make the index
+   $inx->make_index($file_name);
+
+   # here is where the retrieval key is specified
+   sub get_id {
+      my $line = shift;
+      $line =~ /^KW\s+([A-Z]+)/i;
+      $1;
+   }
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Josh Lauricha
+
+Email laurichj at bioinfo.ucr.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Index::Hmmer;
+use strict;
+
+use Bio::SearchIO;
+use IO::String;
+use Bio::Root::Version;
+
+use base qw(Bio::Index::Abstract Bio::Root::Root);
+
+sub _version
+{
+	return ${Bio::Root::Version::VERSION};
+}
+
+=head2 new
+
+ Usage   : $index = new Bio::Index::Hmmer (
+               -filename    => $dbm_file,
+               -write_flag  => 0,
+               -dbm_package => 'DB_File',
+               -verbose     => 0
+           );
+ Function: Returns a new index object.  If filename is
+ specified, then open_dbm() is immediately called.
+ Returns : A new index object
+ Args    : -filename    The name of the dbm index file.
+           -write_flag  TRUE if write access to the dbm file is
+                        needed.
+           -dbm_package The Perl dbm module to use for the
+                        index.
+           -verbose     Print debugging output to STDERR if
+                        TRUE.
+
+=cut
+
+sub new
+{
+	my($class, @args) = @_;
+	my $self = $class->SUPER::new(@args);
+}
+
+=head2 Bio::Index::Hmmer implemented methods
+
+=cut
+
+=head2 fetch_report
+
+ Title   : fetch_report
+ Usage   : my $report = $idx->fetch_report($id);
+ Function: Returns a Bio::Search::Result::HMMERResult report object
+           for a specific HMMER report
+ Returns : Bio::Search::Result::HMMERResult
+ Args    : valid id
+
+=cut
+
+sub fetch_report
+{
+	my ($self, $id) = @_;
+	my (@header, @data, $line);
+	my  $fh = $self->get_stream($id);
+	my  $pos = tell($fh);
+
+	seek($fh, 0, 0); # The HMMER SearchIO wants the header, so we fetch it
+	while($line = <$fh>) {
+		push @header, $line; 
+		last if $line =~ /Query sequence:/o;
+	}
+	seek($fh, $pos, 0);
+
+	# Then the data
+	while(<$fh>) {
+		push @data, $_ if defined;
+		last if m{//}o;
+	}
+
+	# Then join them and send
+	my $rfh = new IO::String(join('', @header, @data));
+	my $report = new Bio::SearchIO(
+		-noclose => 1,
+		-format  => 'hmmer',
+		-fh      => $rfh
+	);
+	return $report->next_result();
+}
+
+# shamelessly stolen from Bio::Index::Fasta
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of blast dbs. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser
+{
+	my( $self, $code ) =@_;
+
+	if ($code) {
+		$self->{'_id_parser'} = $code;
+	}
+	return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Blast Query ID parser for Bio::Index::Blast.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a header line string
+
+=cut
+
+sub default_id_parser
+{
+	if ($_[0] =~ /^\s*(\S+)/) {
+		return $1;
+	} else {
+		return;
+	}
+}
+
+=head2 Require methods from Bio::Index::Abstract
+
+=cut
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index HMMER report file(s).
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+
+sub _index_file {
+	my($self, $file, $i) = @_;
+	my($begin);
+
+	open(my $HMMER, '<', $file) or $self->throw("cannot open file $file");
+
+	my $id;
+	my $indexpoint = 0;
+
+	while(<$HMMER>) {
+		if( /Query sequence: ([^\s]+)/o ) {
+			$indexpoint = tell($HMMER);
+			foreach my $id ($self->id_parser()->($1)) {
+				print "id is $id, begin is $indexpoint\n" if $self->verbose() > 0;
+				$self->add_record($id, $i, $indexpoint);
+			}
+		}
+	}
+	close $HMMER;
+	return 1;
+}
+
+=head2 Bio::Index::Abstract methods
+
+=cut
+
+=head2 filename
+
+ Title   : filename
+ Usage   : $value = $self->filename();
+           $self->filename($value);
+ Function: Gets or sets the name of the dbm index file.
+ Returns : The current value of filename
+ Args    : Value of filename if setting, or none if
+           getting the value.
+
+=head2 write_flag
+
+ Title   : write_flag
+ Usage   : $value = $self->write_flag();
+           $self->write_flag($value);
+ Function: Gets or sets the value of write_flag, which
+           is wether the dbm file should be opened with
+           write access.
+ Returns : The current value of write_flag (default 0)
+ Args    : Value of write_flag if setting, or none if
+           getting the value.
+
+=head2 dbm_package
+
+ Usage   : $value = $self->dbm_package();
+           $self->dbm_package($value);
+
+ Function: Gets or sets the name of the Perl dbm module used. 
+           If the value is unset, then it returns the value of
+           the package variable $USE_DBM_TYPE or if that is
+           unset, then it chooses the best available dbm type,
+           choosing 'DB_File' in preference to 'SDBM_File'. 
+           Bio::Abstract::Index may work with other dbm file
+           types.
+
+ Returns : The current value of dbm_package
+ Args    : Value of dbm_package if setting, or none if
+           getting the value.
+
+
+=head2 get_stream
+
+ Title   : get_stream
+ Usage   : $stream = $index->get_stream( $id );
+ Function: Returns a file handle with the file pointer
+           at the approprite place
+
+           This provides for a way to get the actual
+           file contents and not an object 
+
+           WARNING: you must parse the record deliminter
+           *yourself*. Abstract wont do this for you 
+           So this code
+
+           $fh = $index->get_stream($myid);
+           while( <$fh> ) {
+              # do something
+           }
+           will parse the entire file if you don't put in
+           a last statement in, like
+
+           while( <$fh> ) {
+              /^\/\// && last; # end of record
+              # do something
+           }
+
+ Returns : A filehandle object
+ Args    : string represents the accession number
+ Notes   : This method should not be used without forethought 
+
+
+=head2 open_dbm
+
+  Usage   : $index->open_dbm()
+  Function: Opens the dbm file associated with the index
+            object.  Write access is only given if explicitly
+            asked for by calling new(-write => 1) or having set
+            the write_flag(1) on the index object.  The type of
+            dbm file opened is that returned by dbm_package(). 
+            The name of the file to be is opened is obtained by
+            calling the filename() method.
+
+  Example : $index->_open_dbm()
+  Returns : 1 on success
+
+
+=head2 _version
+
+  Title   : _version
+  Usage   : $type = $index->_version()
+  Function: Returns a string which identifes the version of an
+            index module.  Used to permanently identify an index
+            file as having been created by a particular version
+            of the index module.  Must be provided by the sub class
+  Example : 
+  Returns : 
+  Args    : none
+
+=head2 _filename
+
+  Title   : _filename
+  Usage   : $index->_filename( FILE INT )
+  Function: Indexes the file
+  Example : 
+  Returns : 
+  Args    : 
+
+=head2 _file_handle
+
+  Title   : _file_handle
+  Usage   : $fh = $index->_file_handle( INT )
+  Function: Returns an open filehandle for the file
+            index INT.  On opening a new filehandle it
+            caches it in the @{$index->_filehandle} array.
+            If the requested filehandle is already open,
+            it simply returns it from the array.
+  Example : $fist_file_indexed = $index->_file_handle( 0 );
+  Returns : ref to a filehandle
+  Args    : INT
+
+=head2 _file_count
+
+  Title   : _file_count
+  Usage   : $index->_file_count( INT )
+  Function: Used by the index building sub in a sub class to
+            track the number of files indexed.  Sets or gets
+            the number of files indexed when called with or
+            without an argument.
+  Example : 
+  Returns : INT
+  Args    : INT
+
+
+=head2 add_record
+
+  Title   : add_record
+  Usage   : $index->add_record( $id, @stuff );
+  Function: Calls pack_record on @stuff, and adds the result
+            of pack_record to the index database under key $id.
+            If $id is a reference to an array, then a new entry
+            is added under a key corresponding to each element
+            of the array.
+  Example : $index->add_record( $id, $fileNumber, $begin, $end )
+  Returns : TRUE on success or FALSE on failure
+  Args    : ID LIST
+
+=head2 pack_record
+
+  Title   : pack_record
+  Usage   : $packed_string = $index->pack_record( LIST )
+  Function: Packs an array of scalars into a single string
+            joined by ASCII 034 (which is unlikely to be used
+            in any of the strings), and returns it. 
+  Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
+  Returns : STRING or undef
+  Args    : LIST
+
+=head2 unpack_record
+
+  Title   : unpack_record
+  Usage   : $index->unpack_record( STRING )
+  Function: Splits the sting provided into an array,
+            splitting on ASCII 034.
+  Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
+  Returns : A 3 element ARRAY
+  Args    : STRING containing ASCII 034
+
+=head2 DESTROY
+
+ Title   : DESTROY
+ Usage   : Called automatically when index goes out of scope
+ Function: Closes connection to database and handles to
+           sequence files
+ Returns : NEVER
+ Args    : NONE
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Qual.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Qual.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Qual.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,239 @@
+#
+# $Id: Qual.pm,v 1.9.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::Qual
+#
+# Copied almost verbatim from James Gilbert's Bio::Index::Fasta 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Qual - Interface for indexing (multiple) fasta qual files
+
+=head1 SYNOPSIS
+
+    # Complete code for making an index for several
+    # qual files
+    use Bio::Index::Qual;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Qual->new(
+        '-filename' => $Index_File_Name,
+        '-write_flag' => 1);
+    $inx->make_index(@ARGV);
+
+    # Print out several sequences present in the index
+    # in Qual format
+    use Bio::Index::Qual;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::Qual->new('-filename' => $Index_File_Name);
+    my $out = Bio::SeqIO->new('-format' => 'qual','-fh' => \*STDOUT);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns Bio::Seq object
+	$out->write_seq($seq);
+    }
+
+    # or, alternatively
+    my $id;
+    my $seq = $inx->get_Seq_by_id($id); #identical to fetch
+
+=head1 DESCRIPTION
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
+and provides the basic funtionallity for indexing qual files, and
+retrieving the sequence from them. For best results 'use strict'.
+
+Bio::Index::Qual supports the Bio::DB::BioSeqI interface, meaning
+it can be used as a Sequence database for other parts of bioperl
+
+Additional example code is available in scripts/index/*PLS and in 
+the Bioperl Tutorial (L<http://www.bioperl.org/wiki/Bptutorial.pl>).
+
+Note that by default the key for the sequence will be the first continuous
+string after the 'E<gt>' in the qual header. If you want to use a specific
+substring of the qual header you must use the id_parser() method.
+
+You can also set or customize the unique key used to retrieve by 
+writing your own function and calling the id_parser() method.
+For example:
+
+   $inx->id_parser(\&get_id);
+   # make the index
+   $inx->make_index($file_name);
+
+   # here is where the retrieval key is specified
+   sub get_id {
+      my $line = shift;
+      $line =~ /^(\d+)/;
+      $1;
+   }
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - James Gilbert, Mark Johnson
+
+Email - jgrg at sanger.ac.uk, mjohnson at watson.wustl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Index::Qual;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+#
+# Suggested fix by Michael G Schwern <schwern at pobox.com> to
+# get around a clash with CPAN shell...
+#
+
+
+sub _version {
+	return 0.2;
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Function: The file format for this package, which is needed
+           by the SeqIO system when reading the sequence.
+ Returns : 'qual'
+
+=cut
+
+sub _file_format {
+	return 'qual';
+}
+
+
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index QUAL format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+	my( $self,
+		 $file, # File name
+		 $i,    # Index-number of file being indexed
+	  ) = @_;
+
+	my( $begin,     # Offset from start of file of the start
+                    # of the last found record.
+	  );
+
+	$begin = 0;
+
+	my $id_parser = $self->id_parser;
+
+	open my $QUAL, '<', $file or $self->throw("Can't open file for read : $file");
+
+	# Main indexing loop
+	while (<$QUAL>) {
+		if (/^>/) {
+			# $begin is the position of the first character after the '>'
+			my $begin = tell($QUAL) - length( $_ ) + 1;
+
+			foreach my $id (&$id_parser($_)) {
+				$self->add_record($id, $i, $begin);
+			}
+		}
+	}
+
+	close $QUAL;
+	return 1;
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of Qual file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a qual
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+	my( $self, $code ) = @_;
+
+	if ($code) {
+		$self->{'_id_parser'} = $code;
+	}
+	return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Qual ID parser for Qual.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a qual header line string
+
+=cut
+
+sub default_id_parser {    
+	if ($_[0] =~ /^>\s*(\S+)/) {
+		return $1;
+	} else {
+		return;
+	}
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Qual.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/SwissPfam.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/SwissPfam.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/SwissPfam.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,192 @@
+#
+# $Id: SwissPfam.pm,v 1.24.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::SwissPfam
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::SwissPfam - Interface for indexing swisspfam files
+
+=head1 SYNOPSIS
+
+    use Bio::Index::SwissPfam;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name, 
+                         					 '-write_flag' => 'WRITE');
+    $inx->make_index(@ARGV);
+
+    use Bio::Index::SwissPfam;
+    use strict;
+
+    my $Index_File_Name = shift;
+    my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns stream
+	     while( <$seq> ) {
+	         if(/^>/) {
+	    	       print;
+		          last;
+	         }
+	     }
+    }
+
+
+=head1 DESCRIPTION
+
+SwissPfam is one of the flat files released with Pfam. This modules
+provides a way of indexing this module.
+
+Inherits functions for managing dbm files from Bio::Index::Abstract.pm, and 
+provides the basic funtionallity for indexing SwissPfam files.  Only 
+retrieves FileStreams at the moment. Once we have something better 
+(ie, an object!), will use that. Heavily snaffled from Index::Fasta system of 
+James Gilbert. Note: for best results 'use strict'.
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email - birney at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let's begin the code...
+
+
+package Bio::Index::SwissPfam;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::Abstract);
+
+sub _version {
+    return 0.1;
+}
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index swisspfam format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+    my( $self,
+        $file, # File name
+        $i     # Index-number of file being indexed
+        ) = @_;
+    
+    my( $begin, # Offset from start of file of the start
+                # of the last found record.
+        $end,   # Offset from start of file of the end
+                # of the last found record.
+        $id,    # ID of last found record.
+	$acc,   # accession of last record. Also put into the index
+	$nid, $nacc, # new ids for the record just found
+        );
+
+    $begin = 0;
+    $end   = 0;
+
+    open my $SP, '<', $file or $self->throw("Can't open file for read : $file");
+
+    # Main indexing loop
+    while (<$SP>) {
+        if (/^>(\S+)\s+\|=*\|\s+(\S+)/) {
+	    $nid = $1;
+	    $nacc = $2;
+            my $new_begin = tell($SP) - length( $_ );
+            $end = $new_begin - 1;
+
+	    if( $id ) {
+		$self->add_record($id, $i, $begin, $end);
+		if( $acc ne $id ) {
+		    $self->add_record($acc, $i, $begin, $end);
+		}
+	    }
+            $begin = $new_begin;
+	    $id = $nid;
+	    $acc = $nacc;
+        }
+    }
+    # Don't forget to add the last record
+    $end = tell($SP);
+    $self->add_record($id, $i, $begin, $end) if $id;
+
+    close $SP;
+    return 1;
+}
+
+
+=head2 fetch
+
+  Title   : fetch
+  Usage   : $index->fetch( $id )
+  Function: Returns a Bio::Seq object from the index
+  Example : $seq = $index->fetch( 'dJ67B12' )
+  Returns : Bio::Seq object
+  Args    : ID
+
+=cut
+
+sub fetch {
+    my( $self, $id ) = @_;
+    my $desc;
+    my $db = $self->db();
+    if (my $rec = $db->{ $id }) {
+        my( @record );
+        
+        my ($file, $begin, $end) = $self->unpack_record( $rec );
+        
+        # Get the (possibly cached) filehandle
+        my $fh = $self->_file_handle( $file );
+
+        # move to start
+        seek($fh, $begin, 0);
+
+        return $fh;
+    } else {
+	$self->throw("Unable to find a record for $id in SwissPfam flat file index");
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Index/Swissprot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Index/Swissprot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Index/Swissprot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# $Id: Swissprot.pm,v 1.26.4.1 2006/10/02 23:10:20 sendu Exp $
+#
+# BioPerl module for Bio::Index::Swissprot
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Index::Swissprot - Interface for indexing one or more
+Swissprot files.
+
+=head1 SYNOPSIS
+
+Make an index for one or more Swissprot files:
+
+    use Bio::Index::Swissprot;
+    use strict;
+
+    my $index_file_name = shift;
+    my $inx = Bio::Index::Swissprot->new(
+                           -filename => $index_file_name,
+					            -write_flag => 1);
+    $inx->make_index(@ARGV);
+
+Print out several sequences present in the index in Genbank format:
+
+    use Bio::Index::Swissprot;
+    use Bio::SeqIO;
+    use strict;
+
+    my $out = Bio::SeqIO->new( -format => 'genbank',
+                               -fh => \*STDOUT );
+    my $index_file_name = shift;
+    my $inx = Bio::Index::Swissprot->new(-filename => $index_file_name);
+
+    foreach my $id (@ARGV) {
+        my $seq = $inx->fetch($id); # Returns a Bio::Seq object
+        $out->write_seq($seq);
+    }
+
+    # alternatively
+    my ($id, $acc);
+    my $seq1 = $inx->get_Seq_by_id($id);
+    my $seq2 = $inx->get_Seq_by_acc($acc);
+
+=head1 DESCRIPTION
+
+By default the index that is created uses the AC and ID identifiers
+as keys. This module inherits functions for managing dbm files from 
+Bio::Index::Abstract.pm, and provides the basic functionality 
+for indexing Swissprot files and retrieving Sequence objects from 
+them. For best results 'use strict'.
+
+You can also set or customize the unique key used to retrieve by 
+writing your own function and calling the id_parser() method.
+For example:
+
+   $inx->id_parser(\&get_id);
+   # make the index
+   $inx->make_index($index_file_name);
+
+   # here is where the retrieval key is specified
+   sub get_id {
+      my $line = shift;
+      $line =~ /^KW\s+([A-Z]+)/i;
+      $1;
+   }
+
+=head1 FEED_BACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Also lorenz at ist.org, bosborne at alum.mit.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let's begin the code...
+
+package Bio::Index::Swissprot;
+
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::Index::AbstractSeq);
+
+sub _type_stamp {
+	return '__Swissprot_FLAT__'; # What kind of index are we?
+}
+
+sub _version {
+	return 0.1;
+}
+
+=head2 _index_file
+
+  Title   : _index_file
+  Usage   : $index->_index_file( $file_name, $i )
+  Function: Specialist function to index Swissprot format files.
+            Is provided with a filename and an integer
+            by make_index in its SUPER class.
+  Example : 
+  Returns : 
+  Args    : 
+
+=cut
+
+sub _index_file {
+	# $file is file name, $i is number of file being indexed
+	my( $self, $file, $i ) = @_;
+
+	# Offset from start of file
+	my $begin = 0;
+
+	my $id_parser = $self->id_parser;
+
+	open my $SWISSPROT,'<',$file or $self->throw("Can't read file: $file");
+    
+        my %done_ids;
+	while (<$SWISSPROT>) {
+		if (/^ID\s+\S+/) {
+			$begin = tell($SWISSPROT) - length( $_ );
+		}
+		for my $id (&$id_parser($_)) {
+                        next if exists $done_ids{$id};
+  			$self->add_record($id, $i, $begin) if $id;
+                        $done_ids{$id} = 1;
+		}
+        if (m{//}) {
+            %done_ids = ();
+        }
+	}
+	close $SWISSPROT;
+	return 1;
+}
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.
+            Returns \&default_id_parser (see below) if not
+            set. An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+	my( $self, $code ) = @_;
+
+	if ($code) {
+		$self->{'_id_parser'} = $code;
+	}
+	return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $line )
+  Function: The default parser for Swissprot.pm
+            Returns $1 from applying the regexp /^ID\s*(\S+)/
+            or /^AC\s+([A-Z0-9]+)/ to the current line.
+  Returns : ID string
+  Args    : a line string
+
+=cut
+
+sub default_id_parser {
+	my $line = shift;
+	if ($line =~ /^ID\s*(\S+)/) {
+		return $1;
+	} 
+	elsif ($line =~ /^AC\s+([A-Z0-9]+)/) {
+		return $1;
+	}
+}
+
+=head2 _file_format
+
+ Title   : _file_format
+ Usage   : Internal function for indexing system
+ Function: Provides file format for this database
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _file_format {
+   return 'swiss';
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/AARange.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/AARange.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/AARange.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,407 @@
+# $Id: AARange.pm,v 1.12.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::AARange
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::AARange - AARange abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This is used as possible parent for aminoacid range object classes.
+Or it can be used straight away to define aminoacid ranges.  The idea
+is that the ranges defined are attached to a Translation object and
+they refer to its coordinate-system when they are first created (via
+the new() method).  When they are created they are anyway linked to
+the underlying DNA LiveSeq by way of the LiveSeq labels. This allows
+to preserve the ranges even if the numbering changes in the
+Translation due to deletions or insertions.
+
+The protein sequence associated with the AARange can be accessed via
+the usual seq() or subseq() methods.
+
+The start and end of the AARange in protein coordinate system can be
+fetched with aa_start() and aa_end() methods. Note: the behaviour of
+these methods would be influenced by the coordinate_start set in the
+corresponding Translation object. This can be desirable but can also
+lead to confusion if the coordinate_start had been changed and the
+original position of the AARange was to be retrieved.
+
+start() and end() methods of the AARange will point to the labels
+identifying the first nucleotide of the first and last triplet coding
+for the start and end of the AminoAcidRange.
+
+The underlying nucleotide sequence of the AARange can be retrieved
+with the labelsubseq() method. This would retrieve the whole DNA
+sequence, including possible introns. This is called "DNA_sequence".
+
+To fetch the nucleotide sequence of the Transcript, without introns,
+the labelsubseq() of the attached Transcript (the Transcript the
+Translation comes from) has to be accessed. This is called
+"cDNA_sequence".
+
+Here are the operations to retrieve these latter two kinds of
+sequences:
+
+   $startlabel=$AARange->start;
+   $endtripletlabel=$AARange->end;
+   $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand);
+
+   $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel));
+
+   $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel);
+
+To simplify, these operations have been included in two additional
+methods: dna_seq() and cdna_seq().
+
+These would return the whole sequence, as in the examples above.  But
+the above general scheme can be used by specifying different labels,
+to retrieve hypothetical subsequences of interest.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::AARange;
+
+use strict;
+use base qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref,
+                                               -start => $beginaa,
+                                               -end => $endaa,
+                                               -name => "ABCD",
+                                               -description => "DCBA",
+                                               -translength => $length);
+
+  Function: generates a new AminoAcidRange LiveSeq object
+  Returns : reference to a new object of class AARange
+  Errorcode -1
+  Args    : two positions in AminoAcid coordinate numbering
+            an object reference specifying to which translation the aminoacid
+            ranges refer to
+            a name and a description (optional)
+            an optional "translength" argument: this can be given when
+            a lot of AARanges are to be created at the same time for the same
+            Translation object, calculating it with $translation->length
+            This would increase the speed, avoiding the new() function to
+            calculate everytime the same length again and again for every obj.
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%range);
+
+  $obj = \%range;
+  $obj = bless $obj, $class;
+  my $self=$obj;
+
+  my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength});
+
+  unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) {
+    $self->warn("No -translation or wrong type given");
+    return (-1);
+  }
+  unless ($translength) { # if it's not given, fetch it
+    $translength=$translation->length;
+  }
+  my $seq=$translation->{'seq'};
+
+  if (($start < 1)&&($start > $translength)) {
+    $self->warn("$class not initialised because start aminoacid position not valid");
+    return (-1);
+  }
+  if (($end < 1)&&($end > $translength)) {
+    $self->warn("$class not initialised because end aminoacid position not valid");
+    return (-1);
+  }
+  if ($start > $end) {
+    $self->warn("$class not initialised because start position > end position!");
+    return (-1);
+  }
+
+  my ($starttripletlabel,$endtripletlabel);
+  if ($start == $end) { # trick to increase speed
+    $starttripletlabel=$endtripletlabel=$translation->label($start);
+  } else {
+    ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end));
+  }
+  unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) {
+    $self->warn("$class not initialised because of problems in retrieving start or end label!");
+    return (-1);
+  }
+
+  # unsure if needed:
+  #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end
+  #unless ($endlabel > 0) {
+    #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid";
+    #return (-1);
+  #}
+  $self->{'seq'}=$seq;
+  $self->{'start'}=$starttripletlabel;
+  $self->{'end'}=$endtripletlabel;
+  $self->{'strand'}=$translation->strand;
+  $self->{'translation'}=$translation;
+  $self->{'name'}=$name;
+  $self->{'description'}=$description;
+  $self->{'alphabet'}="protein";
+
+  return $obj;
+}
+
+sub coordinate_start {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+sub all_labels {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+sub valid {
+  my $self=shift;
+  $self->warn("Cannot perform this operation in an AminoAcidRange object!");
+  return (-1);
+}
+
+=head2 get_Transcript
+
+  Title   : valid
+  Usage   : $transcript = $obj->get_Transcript()
+  Function: retrieves the reference to the object of class Transcript (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Transcript {
+  my $self=shift;
+  return ($self->get_Translation->get_Transcript);
+}
+
+=head2 get_Translation
+
+  Title   : valid
+  Usage   : $translation = $obj->get_Translation()
+  Function: retrieves the reference to the object of class Translation (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Translation {
+  my $self=shift;
+  return ($self->{'translation'});
+}
+
+sub change {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+sub positionchange {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+sub labelchange {
+  my $self=shift;
+  $self->warn("Cannot change an AminoAcidRange object!");
+  return (-1);
+}
+
+sub subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  if (defined ($length)) {
+    if ($length < 1) {
+      $self->warn("No sense asking for a subseq of length < 1");
+      return (-1);
+    }
+  }
+  unless (defined ($pos1)) {
+    $pos1=1;
+  } elsif ($pos1 < 1) { # if position out of boundaries
+    $self->warn("Starting position for AARange cannot be < 1!"); return (-1);
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+  }
+  my $seq=$self->seq;
+  my $objlength=length($seq);
+  unless (defined ($length)) {
+    $length=$objlength-$pos1+1;
+  }
+  if (defined ($pos2)) {
+    if ($pos2 > $objlength) { # if position out of boundaries
+      $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1);
+    }
+    $length=$pos2-$pos1+1;
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+  }
+  my $str=substr($seq,$pos1-1,$length);
+  if (length($str) < $length) {
+    $self->warn("Attention, cannot return the length requested for subseq",1);
+  }
+  return $str;
+}
+
+sub seq {
+  my $self=shift;
+  my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end);
+  unless (($aa_start)&&($aa_end)) { # they must both exist
+    $self->warn("Not able to find start or end of the AminoAcid Range");
+    return (0);
+  }
+  my $translseq=$self->get_Translation->seq;
+  return substr($translseq,$aa_start-1,$aa_end-$aa_start+1);
+  # Note: it will return "undef" if the translation stops before the start
+  # of the aarange (because of upstream nonsense mutation creating STOP).
+  # For the same reason it would return uncomplete (up to the STOP) string
+  # if the stop happens in between aarange's start and stop
+}
+
+sub length {
+  my $self=shift;
+  my $seq=$self->seq;
+  my $length=length($seq);
+  return $length;
+}
+
+sub label {
+  my ($self,$position)=@_;
+  my $translation=$self->get_Translation;
+  my $origstart=$translation->coordinate_start; # preserve it
+  $translation->coordinate_start($self->start); # change it
+  my $label=$translation->label($position);
+  $translation->coordinate_start($origstart); # restore it
+  return ($label);
+}
+
+sub position {
+  my ($self,$label)=@_;
+  my $translation=$self->get_Translation;
+  my $origstart=$translation->coordinate_start; # preserve it
+  $translation->coordinate_start($self->start); # change it
+  my $position=$translation->position($label);
+  $translation->coordinate_start($origstart); # restore it
+  return ($position);
+}
+
+=head2 aa_start
+
+  Title   : aa_start
+  Usage   : $end = $aarange->aa_start()
+  Returns : integer (position, according to Translation coordinate system) of
+            the start of an AminoAcidRange object
+  Args    : none
+
+=cut
+
+sub aa_start {
+  my $self=shift;
+  my $aastart=$self->get_Translation->position($self->{'start'});
+}
+
+=head2 aa_end
+
+  Title   : aa_end
+  Usage   : $end = $aarange->aa_end()
+  Returns : integer (position, according to Translation coordinate system) of
+            the end of an AminoAcidRange object
+  Args    : none
+
+=cut
+
+sub aa_end {
+  my $self=shift;
+  my $aastart=$self->get_Translation->position($self->{'end'});
+}
+
+=head2 dna_seq
+
+  Title   : dna_seq
+  Usage   : $end = $aarange->dna_seq()
+  Returns : the sequence at DNA level of the entire AminoAcidRange
+            this would include introns (if present)
+  Args    : none
+
+=cut
+
+sub dna_seq {
+  my $self=shift;
+  my $startlabel=$self->start;
+  my $endtripletlabel=$self->end;
+  my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
+  return ($self->labelsubseq($startlabel,undef,$endlabel));
+}
+
+=head2 cdna_seq
+
+  Title   : cdna_seq
+  Usage   : $end = $aarange->cdna_seq()
+  Returns : the sequence at cDNA level of the entire AminoAcidRange
+            i.e. this is the part of the Transcript that codes for the
+            AminoAcidRange. It would be composed just of exonic DNA.
+  Args    : none
+
+=cut
+
+sub cdna_seq {
+  my $self=shift;
+  my $startlabel=$self->start;
+  my $endtripletlabel=$self->end;
+  my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
+  return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel));
+}
+
+# this checks if the attached Transcript has a Gene object attached
+sub gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'gene'} = $value;
+  }
+  unless (exists $self->{'gene'}) {
+    unless (exists $self->get_Transcript->{'gene'}) {
+      return (0);
+    } else {
+      return ($self->get_Transcript->{'gene'});
+    }
+  } else {
+    return $self->{'gene'};
+  }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Chain.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Chain.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Chain.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1340 @@
+#!/usr/bin/perl
+# $Id: Chain.pm,v 1.15 2005/11/17 09:54:47 heikki Exp $
+#
+# bioperl module for Bio::LiveSeq::Chain
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::LiveSeq::Chain - DoubleChain DataStructure for Perl
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This is a general purpose module (that's why it's not in object-oriented
+form) that introduces a novel datastructure in PERL. It implements
+the "double linked chain". The elements of the chain can contain basically
+everything. From chars to strings, from object references to arrays or hashes.
+It is used in the LiveSequence project to create a dynamical DNA sequence,
+easier to manipulate and change. It's use is mainly for sequence variation
+analysis but it could be used - for example - in e-cell projects.
+The Chain module in itself doesn't have any biological bias, so can be
+used for any programming purpose.
+
+Each element of the chain (with the exclusion of the first and the last of the
+chain) is connected to other two elements (the PREVious and the NEXT one).
+There is no absolute position (like in an array), hence if positions are
+important, they need to be computed (methods are provided).
+Otherwise it's easy to keep track of the elements with their "LABELs".
+There is one LABEL (think of it as a pointer) to each ELEMENT. The labels
+won't change after insertions or deletions of the chain. So it's
+always possible to retrieve an element even if the chain has been
+modified by successive insertions or deletions.
+From this the high potential profit for bioinformatics: dealing with
+sequences in a way that doesn't have to rely on positions, without
+the need of constantly updating them if the sequence changes, even
+dramatically.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+# DoubleChain Data Structure for PERL
+# by Joseph A.L. Insana - Deathson - Filius Mortis - Fal Mortais
+# insana at ebi.ac.uk, jinsana at gmx.net
+
+package Bio::LiveSeq::Chain;
+# TODO_list:
+# **** cleanup code
+# **** performance concerns
+# *??* create hash2dchain ???? (with hashkeys used for label)
+# **????** how about using array of arrays instead than hash of arrays??
+#
+# further strict complaints:
+# in verbose $string assignment around line 721 ???
+
+# TERMINOLOGY update, naming convention:
+# "chain" the datastructure
+# "element" the individual units that compose a chain
+# "label" the unique name of a single element
+# "position" the position of an element into the chain according to a
+#            particular coordinate system (e.g. counting from the start)
+# "value" what is stored in a single element
+
+use Carp qw(croak cluck carp);
+use Bio::Root::Version;
+use strict; 
+use integer; # WARNING: this is to increase performance
+             # a little bit of attention has to be given if float need to
+             # be stored as elements of the array
+             # the use of this "integer" affects all operations but not
+             # assignments. So float CAN be assigned as elements of the chain
+             # BUT, if you assign $z=-1.8;, $z will be equal to -1 because
+             # "-" counts as a unary operation!
+
+=head2 _updown_chain2string
+
+  Title   : chain2string
+  Usage   : $string = Bio::LiveSeq::Chain::chain2string("down",$chain,6,9)
+  Function: reads the contents of the chain, outputting a string
+  Returns : a string
+  Examples:
+          : down_chain2string($chain) -> all the chain from begin to end
+          : down_chain2string($chain,6) -> from 6 to the end
+          : down_chain2string($chain,6,4) -> from 6, going on 4 elements
+          : down_chain2string($chain,6,"",10) -> from 6 to 10
+          : up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
+  Defaults: start=first element; if len undef, goes to last
+            if last undef, goes to end
+            if last defined, it overrides len (undefining it)
+  Error code: -1
+  Args    : "up"||"down" as first argument to specify the reading direction 
+            reference (to the chain)
+            [first] [len] [last] optional integer arguments to specify how
+            much and from (and to) where to read
+
+=cut
+
+# methods rewritten 2.61
+sub up_chain2string {
+  _updown_chain2string("up", at _);
+}
+sub down_chain2string {
+  _updown_chain2string("down", at _);
+}
+
+sub _updown_chain2string {
+  my ($direction,$chain,$first,$len,$last)=@_;
+  unless($chain) { cluck "no chain input"; return (-1); }
+  my $begin=$chain->{'begin'}; # the label of the BEGIN element
+  my $end=$chain->{'end'}; # the label of the END element
+  my $flow;
+
+  if ($direction eq "up") {
+    $flow=2; # used to determine the direction of chain navigation
+    unless ($first) { $first=$end; } # if undef or 0, use $end
+  } else { # defaults to "down"
+    $flow=1; # used to determine the direction of chain navigation
+    unless ($first) { $first=$begin; } # if undef or 0, use $begin
+  }
+
+  unless($chain->{$first}) {
+    cluck "label for first not defined"; return (-1); }
+  if ($last) { # if last is defined, it gets priority and len is not used
+    unless($chain->{$last}) {
+      cluck "label for last not defined"; return (-1); }
+    if ($len) {
+      warn "Warning chain2string: argument LAST:$last overriding LEN:$len!";
+      undef $len;
+    }
+  } else {
+    if ($direction eq "up") {
+      $last=$begin; # if last not defined, go 'till begin (or upto len elements)
+    } else {
+      $last=$end; # if last not defined, go 'till end (or upto len elements)
+    }
+  }
+
+  my ($string, at array);
+  my $label=$first; my $i=1;
+  my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
+  unless (defined $afterlast) { $afterlast=0; } # keep strict happy
+
+  # proceed for len elements or until last, whichever comes first
+  # if $len undef goes till end
+  while (($label) && ($label != $afterlast) && ($i <= ($len || $i + 1))) {
+    @array=@{$chain->{$label}};
+    $string .= $array[0];
+    $label = $array[$flow];
+    $i++;
+  }
+  return ($string); # if chain is interrupted $string won't be complete
+}
+
+=head2 _updown_labels
+
+ Title   : labels
+ Usage   : @labels = Bio::LiveSeq::Chain::_updown_labels("down",$chain,4,16)
+ Function: returns all the labels in a chain or those between two
+           specified ones (termed "first" and "last")
+ Returns : a reference to an array containing the labels
+ Args    : "up"||"down" as first argument to specify the reading direction 
+           reference (to the chain)
+           [first] [last] (integer for the starting and eneding labels)
+
+=cut
+
+
+# arguments: CHAIN_REF [FIRSTLABEL] [LASTLABEL]
+# returns: reference to array containing the labels
+sub down_labels {
+  my ($chain,$first,$last)=@_;
+  _updown_labels("down",$chain,$first,$last);
+}
+sub up_labels {
+  my ($chain,$first,$last)=@_;
+  _updown_labels("up",$chain,$first,$last);
+}
+# arguments: "up"||"down" CHAIN_REF [FIRSTLABEL] [LASTLABEL]
+# returns: reference to array containing the labels
+sub _updown_labels {
+  my ($direction,$chain,$first,$last)=@_;
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $begin=$chain->{'begin'}; # the label of the BEGIN element
+  my $end=$chain->{'end'}; # the label of the END element
+  my $flow;
+  if ($direction eq "up") { $flow=2;
+    unless ($first) { $first=$end; }
+    unless ($last) { $last=$begin; }
+  } else { $flow=1;
+    unless ($last) { $last=$end; }
+    unless ($first) { $first=$begin; }
+  }
+  unless($chain->{$first}) { warn "not existing label $first"; return (0); }
+  unless($chain->{$last}) { warn "not existing label $last"; return (0); }
+
+  my $label=$first; my @labels;
+  my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
+  unless (defined $afterlast) { $afterlast=0; } # keep strict happy
+
+  while (($label)&&($label != $afterlast)) {
+    push(@labels,$label);
+    $label=$chain->{$label}[$flow];
+  }
+  return (\@labels); # if chain is interrupted @labels won't be complete
+}
+
+
+=head2 start
+
+ Title   : start
+ Usage   : $start = Bio::LiveSeq::Chain::start()
+ Returns : the label marking the start of the chain
+ Errorcode: -1
+ Args    : none
+
+=cut
+
+sub start {
+  my $chain=$_[0];
+  unless($chain) { cluck "no chain input"; return (-1); }
+  return ($chain->{'begin'});
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $end = Bio::LiveSeq::Chain::end()
+ Returns : the label marking the end of the chain
+ Errorcode: -1
+ Args    : none
+
+=cut
+
+sub end {
+  my $chain=$_[0];
+  unless($chain) { cluck "no chain input"; return (-1); }
+  return ($chain->{'end'});
+}
+
+=head2 label_exists
+
+ Title   : label_exists
+ Usage   : $check = Bio::LiveSeq::Chain::label_exists($chain,$label)
+ Function: It checks if a label is defined, i.e. if an element is there or
+           is not there anymore
+ Returns : 1 if the label exists, 0 if it is not there, -1 error
+ Errorcode: -1
+ Args    : reference to the chain, integer
+
+=cut
+
+sub label_exists {
+  my ($chain,$label)=@_;
+  unless($chain) { cluck "no chain input"; return (-1); }
+  if ($label && $chain->{$label}) { return (1); } else { return (0) };
+}
+
+
+=head2 down_get_pos_of_label
+
+ Title   : down_get_pos_of_label
+ Usage   : $position = Bio::LiveSeq::Chain::down_get_pos_of_label($chain,$label,$first)
+ Function: returns the position of $label counting from $first, i.e. taking
+           $first as 1 of coordinate system. If $first is not specified it will
+           count from the start of the chain.
+ Returns : 
+ Errorcode: 0
+ Args    : reference to the chain, integer (the label of interest)
+           optional: integer (a different label that will be taken as the
+           first one, i.e. the one to count from)
+ Note:     It counts "downstream". To proceed backward use up_get_pos_of_label
+
+=cut
+
+sub down_get_pos_of_label {
+  #down_chain2string($_[0],$_[2],undef,$_[1],"counting");
+  my ($chain,$label,$first)=@_;
+  _updown_count("down",$chain,$first,$label);
+}
+sub up_get_pos_of_label {
+  #up_chain2string($_[0],$_[2],undef,$_[1],"counting");
+  my ($chain,$label,$first)=@_;
+  _updown_count("up",$chain,$first,$label);
+}
+
+=head2 down_subchain_length
+
+ Title   : down_subchain_length
+ Usage   : $length = Bio::LiveSeq::Chain::down_subchain_length($chain,$first,$last)
+ Function: returns the length of the chain between the labels "first" and "last", included
+ Returns : integer
+ Errorcode: 0
+ Args    : reference to the chain, integer, integer
+ Note:     It counts "downstream". To proceed backward use up_subchain_length
+
+=cut
+
+# arguments: chain_ref [first] [last]
+# returns the length of the chain between first and last (included)
+sub down_subchain_length {
+  #down_chain2string($_[0],$_[1],undef,$_[2],"counting");
+  my ($chain,$first,$last)=@_;
+  _updown_count("down",$chain,$first,$last);
+}
+sub up_subchain_length {
+  #up_chain2string($_[0],$_[1],undef,$_[2],"counting");
+  my ($chain,$first,$last)=@_;
+  _updown_count("up",$chain,$first,$last);
+}
+
+# arguments: DIRECTION CHAIN_REF FIRSTLABEL LASTLABEL
+# errorcode 0
+sub _updown_count {
+  my ($direction,$chain,$first,$last)=@_;
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $begin=$chain->{'begin'}; # the label of the BEGIN element
+  my $end=$chain->{'end'}; # the label of the END element
+  my $flow;
+  if ($direction eq "up") { $flow=2;
+    unless ($first) { $first=$end; }
+    unless ($last) { $last=$begin; }
+  } else { $flow=1;
+    unless ($last) { $last=$end; }
+    unless ($first) { $first=$begin; }
+  }
+  unless($chain->{$first}) { warn "not existing label $first"; return (0); }
+  unless($chain->{$last}) { warn "not existing label $last"; return (0); }
+
+  my $label=$first; my $count;
+  my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
+  unless (defined $afterlast) { $afterlast=0; } # keep strict happy
+
+  while (($label)&&($label != $afterlast)) {
+    $count++;
+    $label=$chain->{$label}[$flow];
+  }
+  return ($count); # if chain is interrupted, $i will be up to the breaking point
+}
+
+=head2 invert_chain
+
+ Title   : invert_chain
+ Usage   : $errorcode=Bio::LiveSeq::Chain::invert_chain($chain)
+ Function: completely inverts the order of the chain elements; begin is swapped with end and all links updated (PREV&NEXT fields swapped)
+ Returns : 1 if all OK, 0 if errors
+ Errorcode: 0
+ Args    : reference to the chain
+
+=cut
+
+sub invert_chain {
+  my $chain=$_[0];
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $begin=$chain->{'begin'}; # the name of the first element
+  my $end=$chain->{'end'}; # the name of the last element
+  my ($label, at array);
+  $label=$begin; # starts from the beginning
+  while ($label) { # proceed with linked elements, swapping PREV and NEXT
+    @array=@{$chain->{$label}};
+    ($chain->{$label}[1],$chain->{$label}[2])=($array[2],$array[1]); # swap
+    $label = $array[1]; # go to the next one
+  }
+  # now swap begin and end fields
+  ($chain->{'begin'},$chain->{'end'})=($end,$begin);
+  return (1); # that's it
+}
+
+# warning that method has changed name
+#sub mutate_element {
+  #croak "Warning: old method name. Please update code to 'set_value_at_label'\n";
+  # &set_value_at_label;
+#}
+
+=head2 down_get_value_at_pos
+
+ Title   : down_get_value_at_pos
+ Usage   : $value = Bio::LiveSeq::Chain::down_get_value_at_pos($chain,$position,$first)
+ Function: used to access the value of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
+ Returns : whatever is stored in the element of the chain
+ Errorcode: 0
+ Args    : reference to the chain, integer, [integer]
+ Note:     It works "downstream". To proceed backward use up_get_value_at_pos
+
+=cut
+
+#sub get_value_at_pos {
+  #croak "Please use instead: down_get_value_at_pos";
+  ##&down_get_value_at_pos;
+#}
+sub down_get_value_at_pos {
+  my ($chain,$position,$first)=@_;
+  my $label=down_get_label_at_pos($chain,$position,$first);
+  # check place of change
+  if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
+    warn "not existing element $label"; return (0); }
+  return _get_value($chain,$label);
+}
+sub up_get_value_at_pos {
+  my ($chain,$position,$first)=@_;
+  my $label=up_get_label_at_pos($chain,$position,$first);
+  # check place of change
+  if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
+    warn "not existing element $label"; return (0); }
+  return _get_value($chain,$label);
+}
+
+=head2 down_set_value_at_pos
+
+ Title   : down_set_value_at_pos
+ Usage   : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_pos($chain,$newvalue,$position,$first)
+ Function: used to store a new value inside an element of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
+ Returns : 1
+ Errorcode: 0
+ Args    : reference to the chain, newvalue, integer, [integer]
+           (newvalue can be: integer, string, object reference, hash ref)
+ Note:     It works "downstream". To proceed backward use up_set_value_at_pos
+ Note2:    If the $newvalue is undef, it will delete the contents of the
+           element but it won't remove the element from the chain.
+
+=cut
+
+#sub set_value_at_pos {
+  #croak "Please use instead: down_set_value_at_pos";
+  ##&down_set_value_at_pos;
+#}
+sub down_set_value_at_pos {
+  my ($chain,$value,$position,$first)=@_;
+  my $label=down_get_label_at_pos($chain,$position,$first);
+  # check place of change
+  if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
+    warn "not existing element $label"; return (0); }
+  _set_value($chain,$label,$value);
+  return (1);
+}
+sub up_set_value_at_pos {
+  my ($chain,$value,$position,$first)=@_;
+  my $label=up_get_label_at_pos($chain,$position,$first);
+  # check place of change
+  if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
+    warn "not existing element $label"; return (0); }
+  _set_value($chain,$label,$value);
+  return (1);
+}
+
+
+=head2 down_set_value_at_label
+
+ Title   : down_set_value_at_label
+ Usage   : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_label($chain,$newvalue,$label)
+ Function: used to store a new value inside an element of the chain defined by its label.
+ Returns : 1
+ Errorcode: 0
+ Args    : reference to the chain, newvalue, integer
+           (newvalue can be: integer, string, object reference, hash ref)
+ Note:     It works "downstream". To proceed backward use up_set_value_at_label
+ Note2:    If the $newvalue is undef, it will delete the contents of the
+           element but it won't remove the element from the chain.
+
+=cut
+
+sub set_value_at_label {
+  my ($chain,$value,$label)=@_;
+  unless($chain) { cluck "no chain input"; return (0); }
+
+  # check place of change
+  unless($chain->{$label}) { # complain if label doesn't exist
+    warn "not existing element $label"; return (0); }
+  _set_value($chain,$label,$value);
+  return (1);
+}
+
+=head2 down_get_value_at_label
+
+ Title   : down_get_value_at_label
+ Usage   : $value = Bio::LiveSeq::Chain::down_get_value_at_label($chain,$label)
+ Function: used to access the value of the chain from one element defined by its label.
+ Returns : whatever is stored in the element of the chain
+ Errorcode: 0
+ Args    : reference to the chain, integer
+ Note:     It works "downstream". To proceed backward use up_get_value_at_label
+
+=cut
+
+sub get_value_at_label {
+  my $chain=$_[0];
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $label = $_[1]; # the name of the element
+
+  # check place of change
+  unless($chain->{$label}) { # complain if label doesn't exist
+    warn "not existing label $label"; return (0); }
+  return _get_value($chain,$label);
+}
+
+# arguments: CHAIN_REF LABEL VALUE
+sub _set_value {
+  my ($chain,$label,$value)=@_;
+  $chain->{$label}[0]=$value;
+}
+# arguments: CHAIN_REF LABEL
+sub _get_value {
+  my ($chain,$label)=@_;
+  return $chain->{$label}[0];
+}
+
+=head2 down_get_label_at_pos
+
+ Title   : down_get_label_at_pos
+ Usage   : $label = Bio::LiveSeq::Chain::down_get_label_at_pos($chain,$position,$first)
+ Function: used to retrieve the label of an an element of the chain at a particular position. It will count the position from the start of the chain or from the label $first, if $first is specified
+ Returns : integer
+ Errorcode: 0
+ Args    : reference to the chain, integer, [integer]
+ Note:     It works "downstream". To proceed backward use up_get_label_at_pos
+
+=cut
+
+# arguments: CHAIN_REF POSITION [FIRST]
+# returns: LABEL of element found counting from FIRST
+sub down_get_label_at_pos {
+  _updown_get_label_at_pos("down", at _);
+}
+sub up_get_label_at_pos {
+  _updown_get_label_at_pos("up", at _);
+}
+
+# arguments: [DIRECTION] CHAIN_REF POSITION [FIRST]
+# Default DIRECTION="down"
+# if FIRST is undefined, FIRST=START (if DIRECTION=down) or FIRST=END (up)
+
+sub _updown_get_label_at_pos {
+  my ($direction,$chain,$position,$first)=@_;
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $begin=$chain->{'begin'}; # the label of the BEGIN element
+  my $end=$chain->{'end'}; # the label of the END element
+  my $flow;
+  if ($direction eq "up") { $flow=2; unless ($first) { $first=$end; }
+  } else { $flow=1; unless ($first) { $first=$begin; } }
+  unless($chain->{$first}) { warn "not existing label $first"; return (0); }
+
+  my $label=$first;
+  my $i=1;
+  while ($i < $position) {
+    $label=$chain->{$label}[$flow];
+    $i++;
+    unless ($label) { return (0); } # chain ended before position reached
+  }
+  return ($label);
+}
+
+# for english_concerned, latin_unconcerned people
+sub preinsert_string { &praeinsert_string }
+sub preinsert_array { &praeinsert_array }
+
+# praeinsert_string CHAIN_REF STRING [POSITION]
+# the chars of STRING are passed to praeinsert_array
+# the chars are inserted in CHAIN, before POSITION
+# if POSITION is undef, default is to prepend the string to the beginning
+# i.e. POSITION is START of CHAIN
+sub praeinsert_string {
+  my @string=split(//,$_[1]);
+  praeinsert_array($_[0],\@string,$_[2]);
+}
+
+# postinsert_string CHAIN_REF STRING [POSITION]
+# the chars of STRING are passed to postinsert_array
+# the chars are inserted in CHAIN, after POSITION
+# if POSITION is undef, default is to append the string to the end
+# i.e. POSITION is END of CHAIN
+sub postinsert_string {
+  my @string=split(//,$_[1]);
+  postinsert_array($_[0],\@string,$_[2]);
+}
+
+# praeinsert_array CHAIN_REF ARRAY_REF [POSITION]
+# the elements of ARRAY are inserted in CHAIN, before POSITION
+# if POSITION is undef, default is to prepend the elements to the beginning
+# i.e. POSITION is START of CHAIN
+sub praeinsert_array {
+  _praepostinsert_array($_[0],"prae",$_[1],$_[2]);
+}
+
+# postinsert_array CHAIN_REF ARRAY_REF [POSITION]
+# the elements of ARRAY are inserted in CHAIN, after POSITION
+# if POSITION is undef, default is to append the elements to the end
+# i.e. POSITION is END of CHAIN
+sub postinsert_array {
+  _praepostinsert_array($_[0],"post",$_[1],$_[2]);
+}
+
+
+=head2 _praepostinsert_array
+
+ Title   : _praepostinsert_array
+ Usage   : ($insbegin,$insend) = Bio::LiveSeq::Chain::_praepostinsert_array($chainref,"post",$arrayref,$position)
+ Function: the elements of the array specified by $arrayref are inserted (creating a new subchain) in the chain specified by $chainref, before or after (depending on the "prae"||"post" keyword passed as second argument) the specified position.
+ Returns : two labels: the first and the last of the inserted subchain
+ Defaults: if no position is specified, the new chain will be inserted after
+ (post) the first element of the chain
+ Errorcode: 0
+ Args    : chainref, "prae"||"post", arrayref, integer (position)
+
+=cut
+
+# returns: 0 if errors, otherwise returns references of begin and end of
+# the insertion
+sub _praepostinsert_array {
+  my $chain=$_[0];
+  unless($chain) { cluck "no chain input"; return (0); }
+  my $praepost=$_[1] || "post"; # defaults to post
+  my ($prae,$post);
+  my $position=$_[3];
+  my $begin=$chain->{'begin'}; # the name of the first element of the chain
+  my $end=$chain->{'end'}; # the name of the the last element of the chain
+  # check if prae or post insertion and prepare accordingly
+  if ($praepost eq "prae") {
+    $prae=1;
+    unless (($position eq 0)||($position)) { $position=$begin; } # if undef, use $begin
+  } else {
+    $post=1;
+    unless (($position eq 0)||($position)) { $position=$end; } # if undef, use $end
+  }
+  # check place of insertion
+  unless($chain->{$position}) { # complain if position doesn't exist
+    warn ("Warning _praepostinsert_array: not existing element $position");
+    return (0);
+  }
+
+  # check if there are elements to insert
+  my $elements=$_[2]; # reference to the array containing the new elements
+  my $elements_count=scalar(@{$elements});
+  unless ($elements_count) {
+    warn ("Warning _praepostinsert_array: no elements input"); return (0); }
+
+  # create new chainelements with offset=firstfree(chain)
+  my ($insertbegin,$insertend)=_create_chain_elements($chain,$elements);
+
+  # DEBUGGING
+  #print "Executing ${praepost}insertion of $elements_count elements ('@{$elements}') at position: $position\n";
+
+  # attach the new chain to the old chain
+  # 4 cases: prae at begin, prae at middle, post at middle, post at end
+  # NOTE: in case of double joinings always join wisely so not to
+  # delete the PREV/NEXT attribute before it is needed
+  my $noerror=1;
+  if ($prae) {
+    if ($position==$begin) { # 1st case: prae at begin
+      $noerror=_join_chain_elements($chain,$insertend,$begin);
+      $chain->{'begin'}=$insertbegin;
+    } else { # 2nd case: prae at middle
+      $noerror=_join_chain_elements($chain,up_element($chain,$position),$insertbegin);
+      $noerror=_join_chain_elements($chain,$insertend,$position);
+    }
+  } elsif ($post) {
+    if ($position==$end) { # 4th case: post at end
+      $noerror=_join_chain_elements($chain,$end,$insertbegin);
+      $chain->{'end'}=$insertend;
+    } else { # 3rd case: post at middle # note the order of joins (important)
+      $noerror=_join_chain_elements($chain,$insertend,down_element($chain,$position));
+      $noerror=_join_chain_elements($chain,$position,$insertbegin);
+    }
+  } else { # this should never happen
+    die "_praepostinsert_array: Something went very wrong";
+  }
+
+  # check for errors and return begin,end of insertion
+  if ($noerror) {
+    return ($insertbegin,$insertend);
+  } else { # something went wrong with the joinings
+    warn "Warning _praepostinsert_array: Joining of insertion failed";
+    return (0);
+  }
+}
+
+# create new chain elements with offset=firstfree
+# arguments: CHAIN_REF ARRAY_REF
+# returns: pointers to BEGIN and END of new chained elements created
+# returns 0 if error(s) encountered
+sub _create_chain_elements {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning _create_chain_elements: no chain input"); return (0); }
+  my $arrayref=$_[1];
+  my $array_count=scalar(@{$arrayref});
+  unless ($array_count) {
+    warn ("Warning _create_chain_elements: no elements input"); return (0); }
+  my $begin=$chain->{'firstfree'};
+  my $i=$begin-1;
+  my $element;
+  foreach $element (@{$arrayref}) {
+    $i++;
+    $chain->{$i}=[$element,$i+1,$i-1];
+  }
+  my $end=$i;
+  $chain->{'firstfree'}=$i+1; # what a new added element should be called
+  $chain->{'size'} += $end-$begin+1; # increase size of chain
+  # leave sticky edges (to be joined by whoever called this subroutine)
+  $chain->{$begin}[2]=undef;
+  $chain->{$end}[1]=undef;
+  return ($begin,$end); # return pointers to first and last of the newelements
+}
+
+# argument: CHAIN_REF ELEMENT
+# returns: name of DOWN/NEXT element (the downstream one)
+# returns -1 if error encountered (e.g. chain or elements undefined)
+# returns 0 if there's no DOWN element
+sub down_element {
+  _updown_element("down", at _);
+}
+# argument: CHAIN_REF ELEMENT
+# returns: name of UP/PREV element (the upstream one)
+# returns -1 if error encountered (e.g. chain or elements undefined)
+# returns 0 if there's no UP element
+sub up_element {
+  _updown_element("up", at _);
+}
+
+# used by both is_up_element and down_element
+sub _updown_element {
+  my $direction=$_[0] || "down"; # defaults to downstream
+  my $flow;
+  if ($direction eq "up") {
+    $flow=2; # used to determine the direction of chain navigation
+  } else {
+    $flow=1; # used to determine the direction of chain navigation
+  }
+  my $chain=$_[1];
+  unless($chain) {
+    warn ("Warning ${direction}_element: no chain input"); return (-1); }
+  my $me = $_[2]; # the name of the element
+  my $it = $chain->{$me}[$flow]; # the prev||next one, upstream||downstream
+  if ($it) {
+    return ($it); # return the name of prev||next element
+  } else {
+    return (0); # there is no prev||next element ($it is undef)
+  }
+}
+
+# used by both is_downstream and is_upstream
+sub _is_updownstream {
+  my $direction=$_[0] || "down"; # defaults to downstream
+  my $flow;
+  if ($direction eq "up") {
+    $flow=2; # used to determine the direction of chain navigation
+  } else {
+    $flow=1; # used to determine the direction of chain navigation
+  }
+  my $chain=$_[1];
+  unless($chain) {
+    warn ("Warning is_${direction}stream: no chain input"); return (-1); }
+  my $first=$_[2]; # the name of the first element
+  my $second=$_[3]; # the name of the first element
+  if ($first==$second) {
+    warn ("Warning is_${direction}stream: first==second!!"); return (0); }
+  unless($chain->{$first}) {
+    warn ("Warning is_${direction}stream: first element not defined"); return (-1); }
+  unless($chain->{$second}) {
+    warn ("Warning is_${direction}stream: second element not defined"); return (-1); }
+  my ($label, at array);
+  $label=$first;
+  my $found=0;
+  while (($label)&&(!($found))) { # searches till the end or till found
+    if ($label==$second) {
+      $found=1;
+    }
+    @array=@{$chain->{$label}};
+    $label = $array[$flow]; # go to the prev||next one, upstream||downstream
+  }
+  return $found;
+}
+
+=head2 is_downstream
+
+  Title   : is_downstream
+  Usage   : Bio::LiveSeq::Chain::is_downstream($chainref,$firstlabel,$secondlabel)
+  Function: checks if SECONDlabel follows FIRSTlabel
+            It runs downstream the elements of the chain from FIRST searching
+            for SECOND.
+  Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
+            reaches the end of the chain without having found it)
+  Errorcode -1
+  Args    : two labels (integer)
+
+=cut
+
+sub is_downstream {
+  _is_updownstream("down", at _);
+}
+
+=head2 is_upstream
+
+  Title   : is_upstream
+  Usage   : Bio::LiveSeq::Chain::is_upstream($chainref,$firstlabel,$secondlabel)
+  Function: checks if SECONDlabel follows FIRSTlabel
+            It runs upstream the elements of the chain from FIRST searching
+            for SECOND.
+  Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
+            reaches the end of the chain without having found it)
+  Errorcode -1
+  Args    : two labels (integer)
+
+=cut
+
+sub is_upstream {
+  _is_updownstream("up", at _);
+}
+
+=head2 check_chain
+
+ Title   : check_chain
+ Usage   : @errorcodes = Bio::LiveSeq::Chain::check_chain()
+ Function: a wraparound to a series of check for consistency of the chain
+           It will check for boundaries, size, backlinking and forwardlinking
+ Returns : array of 4 warn codes, each can be 1 (all ok) or 0 (something wrong)
+ Errorcode: 0
+ Args    : none
+ Note    : this is slow and through. It is not really needed. It is mostly
+           a code-developer tool.
+
+=cut
+
+sub check_chain {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning check_chain: no chain input"); return (-1); }
+  my ($warnbound,$warnsize,$warnbacklink,$warnforlink);
+  $warnbound=&_boundcheck; # passes on the arguments of the subroutine
+  $warnsize=&_sizecheck;
+  $warnbacklink=&_downlinkcheck;
+  $warnforlink=&_uplinkcheck;
+  return ($warnbound,$warnsize,$warnbacklink,$warnforlink);
+}
+
+# consistency check for forwardlinks walking upstream
+# argument: a chain reference
+# returns: 1 all OK 0 problems
+sub _uplinkcheck {
+  _updownlinkcheck("up", at _);
+}
+
+# consistency check for backlinks walking downstream
+# argument: a chain reference
+# returns: 1 all OK 0 problems
+sub _downlinkcheck {
+  _updownlinkcheck("down", at _);
+}
+
+# consistency check for links, common to _uplinkcheck and _downlinkcheck
+# argument: "up"||"down", check_ref
+# returns: 1 all OK 0 problems
+sub _updownlinkcheck {
+  my $direction=$_[0] || "down"; # defaults to downstream
+  my ($flow,$wolf);
+  my $chain=$_[1];
+  unless($chain) {
+    warn ("Warning _${direction}linkcheck: no chain input"); return (0); }
+  my $begin=$chain->{'begin'}; # the name of the first element
+  my $end=$chain->{'end'}; # the name of the last element
+  my ($label, at array,$me,$it,$itpoints);
+  if ($direction eq "up") {
+    $flow=2; # used to determine the direction of chain navigation
+    $wolf=1;
+    $label=$end; # start from end
+  } else {
+    $flow=1; # used to determine the direction of chain navigation
+    $wolf=2;
+    $label=$begin; # start from beginning
+  }
+  my $warncode=1;
+
+  while ($label) { # proceed with linked elements, checking neighbours
+    $me=$label;
+    @array=@{$chain->{$label}};
+    $label = $array[$flow]; # go to the next one
+    $it=$label;
+    if ($it) { # no sense in checking if next one not defined (END element)
+      @array=@{$chain->{$label}};
+      $itpoints=$array[$wolf];
+      unless ($me==$itpoints) {
+	warn "Warning: ${direction}LinkCheck: LINK wrong in $it, that doesn't point back to me ($me). It points to $itpoints\n";
+	$warncode=0;
+      }
+    }
+  }
+  return $warncode;
+}
+
+# consistency check for size of chain
+# argument: a chain reference
+# returns: 1 all OK 0 wrong size
+sub _sizecheck {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning _sizecheck: no chain input"); return (0); }
+  my $begin=$chain->{'begin'}; # the name of the first element
+  my $warncode=1;
+  my ($label, at array);
+  my $size=$chain->{'size'};
+  my $count=0;
+  $label=$begin;
+  while ($label) { # proceed with linked elements, counting
+    @array=@{$chain->{$label}};
+    $label = $array[1]; # go to the next one
+    $count++;
+  }
+  if ($size != $count) {
+    warn "Size check reports error: assumed size: $size, real size: $count ";
+    $warncode=0;
+  }
+  return $warncode;
+}
+
+
+# consistency check for begin and end (boundaries)
+# argument: a chain reference
+# returns: 1 all OK 0 problems
+sub _boundcheck {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning _boundcheck: no chain input"); return (0); }
+  my $begin=$chain->{'begin'}; # the name of the first element
+  my $end=$chain->{'end'}; # the name of the (supposedly) last element
+  my $warncode=1;
+
+  # check SYNC of beginning
+  if (($begin)&&($chain->{$begin})) { # if the BEGIN points to existing element
+    if ($chain->{$begin}[2]) { # if BEGIN element has PREV not undef
+      warn "Warning: BEGIN element has PREV field defined \n";
+      warn "\tWDEBUG begin: $begin\t";
+      warn "\tWDEBUG begin's PREV: $chain->{$begin}[2] \n";
+      $warncode=0;
+    }
+  } else {
+    warn "Warning: BEGIN key of chain does not point to existing element!\n";
+    warn "\tWDEBUG begin: $begin\n";
+    $warncode=0;
+  }
+  # check SYNC of end
+  if (($end)&&($chain->{$end})) { # if the END points to an existing element
+    if ($chain->{$end}[1]) { # if END element has NEXT not undef
+      warn "Warning: END element has NEXT field defined \n";
+      warn "\tWDEBUG end: $end\t";
+      warn "\tWDEBUG end's NEXT: $chain->{$end}[1] \n";
+      $warncode=0;
+    }
+  } else {
+    warn "Warning: END key of chain does not point to existing element!\n";
+    warn "\tWDEBUG end: $end\n";
+    $warncode=0;
+  }
+  return $warncode;
+}
+
+# arguments: chain_ref
+# returns: the size of the chain (the number of elements)
+# return code -1: unexistant chain, errors...
+sub chain_length {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning chain_length: no chain input"); return (-1); }
+  my $size=$chain->{'size'};
+  if ($size) {
+    return ($size);
+  } else {
+    return (-1);
+  }
+}
+
+# arguments: chain ref, first element name, second element name
+# returns: 1 or 0 (1 ok, 0 errors)
+sub _join_chain_elements {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning _join_chain_elements: no chain input"); return (0); }
+  my $leftelem=$_[1];
+  my $rightelem=$_[2];
+  unless(($leftelem)&&($rightelem)) {
+    warn ("Warning _join_chain_elements: element arguments??"); return (0); }
+  if (($chain->{$leftelem})&&($chain->{$rightelem})) { # if the elements exist
+    $chain->{$leftelem}[1]=$rightelem;
+    $chain->{$rightelem}[2]=$leftelem;
+    return 1;
+  } else {
+    warn ("Warning _join_chain_elements: elements not defined");
+    return 0;
+  }
+}
+
+=head2 splice_chain
+
+ Title   : splice_chain
+ Usage   : @errorcodes = Bio::LiveSeq::Chain::splice_chain($chainref,$first,$length,$last)
+ Function: removes the elements designated by FIRST and LENGTH from a chain.
+           The chain shrinks accordingly. If LENGTH is omitted, removes
+           everything from FIRST onward.
+           If END is specified, LENGTH is ignored and instead the removal
+           occurs from FIRST to LAST.
+ Returns : the elements removed as a string
+ Errorcode: -1
+ Args    : chainref, integer, integer, integer
+
+=cut
+
+sub splice_chain {
+  my $chain=$_[0];
+  unless($chain) {
+    warn ("Warning splice_chain: no chain input"); return (-1); }
+  my $begin=$chain->{'begin'}; # the name of the first element
+  my $end=$chain->{'end'}; # the name of the (supposedly) last element
+  my $first=$_[1];
+  unless (($first eq 0)||($first)) { $first=$begin; } # if undef, use $begin
+  my $len=$_[2];
+  my $last=$_[3];
+  my (@array, $string);
+  my ($beforecut,$aftercut);
+
+  unless($chain->{$first}) {
+    warn ("Warning splice_chain: first element not defined"); return (-1); }
+  if ($last) { # if last is defined, it gets priority and len is not used
+    unless($chain->{$last}) {
+      warn ("Warning splice_chain: last element not defined"); return (-1); }
+    if ($len) {
+      warn ("Warning splice_chain: argument LAST:$last overriding LEN:$len!");
+      undef $len;
+    }
+  } else {
+    $last=$end; # if last not defined, go 'till end (or to len, whichever 1st)
+  }
+
+  $beforecut=$chain->{$first}[2]; # what's the element before 1st deleted?
+  # if it is undef then it means we are splicing since the beginning
+
+  my $i=1;
+  my $label=$first;
+  my $afterlast=$chain->{$last}[1]; # if $last=$end $afterlast should be undef
+  unless (defined $afterlast) { $afterlast=0; } # keep strict happy
+
+  # proceed for len elements or until the end, whichever comes first
+  # if len undef goes till last
+  while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
+    @array=@{$chain->{$label}};
+    $string .= $array[0];
+    $aftercut = $array[1]; # what's the element next last deleted?
+			   # also used as savevar to change label posdeletion
+    delete $chain->{$label}; # this can be deleted now
+    $label=$aftercut; # label is updated using the savevar
+    $i++;
+  }
+  
+  # Now fix the chain (sticky edges, fields)
+  # 4 cases: cut in the middle, cut from beginning, cut till end, cut all
+    #print "\n\tstickyDEBUG beforecut: $beforecut "; # DEBUG
+    #print "\taftercut: $aftercut \n"; # DEBUG
+  if ($beforecut) {
+    if ($aftercut) { # 1st case, middle cut
+      _join_chain_elements($chain,$beforecut,$aftercut);
+    } else { # 3rd case, end cut
+      $chain->{'end'}=$beforecut; # update the END field
+      $chain->{$beforecut}[1]=undef; # since we cut till the end
+    }
+  } else {
+    if ($aftercut) { # 2nd case, begin cut
+      $chain->{'begin'}=$aftercut; # update the BEGIN field
+      $chain->{$aftercut}[2]=undef; # since we cut from beginning
+    } else { # 4th case, all has been cut
+      $chain->{'begin'}=undef;
+      $chain->{'end'}=undef;
+    }
+  }
+  $chain->{'size'}=($chain->{'size'}) - $i + 1; # update the SIZE field
+
+  return $string;
+}
+
+
+# arguments: CHAIN_REF POSITION [FIRST]
+# returns: element counting POSITION from FIRST or from START if FIRST undef
+# i.e. returns the element at POSITION counting from FIRST
+#sub element_at_pos {
+  #croak "Warning: old method name. Please update code to 'down_get_label_at_position'\n";
+  ##&down_element_at_pos;
+#}
+#sub up_element_at_pos {
+  ## old wraparound
+  ##my @array=up_chain2string($_[0],$_[2],$_[1],undef,"elements");
+  ##return $array[-1];
+  #croak "old method name. Update code to: up_get_label_at_position";
+  ##&up_get_label_at_pos;
+#}
+#sub down_element_at_pos {
+  ## old wraparound
+  ##my @array=down_chain2string($_[0],$_[2],$_[1],undef,"elements");
+  ##return $array[-1];
+  #croak "old method name. Update code to: down_get_label_at_position";
+  ##&down_get_label_at_pos;
+#}
+
+# arguments: CHAIN_REF ELEMENT [FIRST]
+# returns: the position of ELEMENT counting from FIRST or from START
+#i         if FIRST is undef
+# i.e. returns the Number of elements between FIRST and ELEMENT
+# i.e. returns the position of element taking FIRST as 1 of coordinate system
+#sub pos_of_element {
+  #croak ("Warning: old and ambiguous method name. Please update code to 'down_get_pos_of_label'\n");
+  ##&down_pos_of_element;
+#}
+#sub up_pos_of_element {
+  #croak ("Warning: old method name. Please update code to 'up_get_pos_of_label'\n");
+  ##up_chain2string($_[0],$_[2],undef,$_[1],"counting");
+#}
+#sub down_pos_of_element {
+  #croak ("Warning: old method name. Please update code to 'down_get_pos_of_label'\n");
+  ##down_chain2string($_[0],$_[2],undef,$_[1],"counting");
+#}
+
+# wraparounds to calculate length of subchain from first to last
+# arguments: chain_ref [first] [last]
+#sub subchain_length {
+  #croak "Warning: old method name. Please update code to 'down_subchain_length'\n";
+  ##&down_subchain_length;
+#}
+
+# wraparounds to have elements output
+# same arguments as chain2string
+# returns label|name of every element
+#sub elements {
+  #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
+  ##&down_elements;
+#}
+#sub up_elements {
+  #croak ("Warning: method no more supported. Please update code to 'up_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
+  ##up_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
+#}
+#sub down_elements {
+  #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
+  ##down_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
+#}
+
+# wraparounds to have verbose output
+# same arguments as chain2string
+# returns the chain in a very verbose way
+sub chain2string_verbose {
+  carp "Warning: method no more supported.\n";
+  &old_down_chain2string_verbose;
+}
+sub up_chain2string_verbose {
+  carp "Warning: method no more supported.\n";
+  old_up_chain2string($_[0],$_[1],$_[2],$_[3],"verbose");
+}
+sub down_chain2string_verbose {
+  carp "Warning: method no more supported.\n";
+  old_down_chain2string($_[0],$_[1],$_[2],$_[3],"verbose");
+}
+
+#sub chain2string {
+  #croak ("Warning: old method name. Please update code to 'down_chain2string'\n");
+  ##&down_chain2string;
+#}
+sub old_up_chain2string {
+  old_updown_chain2string("up", at _);
+}
+sub old_down_chain2string {
+  old_updown_chain2string("down", at _);
+}
+
+# common to up_chain2string and down_chain2string
+# arguments: "up"||"down" chain_ref [first] [len] [last] [option]
+# [option] can be any of "verbose", "counting", "elements"
+# error: return -1
+# defaults: start = first element; if len undef, goes to last
+#           if last undef, goes to end
+#           if last def it overrides len (that gets undef)
+# returns: a string
+# example usage: down_chain2string($chain) -> all the chain from begin to end
+# example usage: down_chain2string($chain,6) -> from 6 to the end
+# example usage: down_chain2string($chain,6,4) -> from 6, going on 4 elements
+# example usage: down_chain2string($chain,6,"",10) -> from 6 to 10
+# example usage: up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
+sub old_updown_chain2string {
+  my ($direction,$chain,$first,$len,$last,$option)=@_;
+  unless($chain) {
+    warn ("Warning chain2string: no chain input"); return (-1); }
+  my $begin=$chain->{'begin'}; # the name of the BEGIN element
+  my $end=$chain->{'end'}; # the name of the END element
+  my $flow;
+  if ($direction eq "up") {
+    $flow=2; # used to determine the direction of chain navigation
+    unless ($first) { $first=$end; } # if undef or 0, use $end
+  } else { # defaults to "down"
+    $flow=1; # used to determine the direction of chain navigation
+    unless ($first) { $first=$begin; } # if undef or 0, use $begin
+  }
+
+  unless($chain->{$first}) {
+    warn ("Warning chain2string: first element not defined"); return (-1); }
+  if ($last) { # if last is defined, it gets priority and len is not used
+    unless($chain->{$last}) {
+      warn ("Warning chain2string: last element not defined"); return (-1); }
+    if ($len) {
+      warn ("Warning chain2string: argument LAST:$last overriding LEN:$len!");
+      undef $len;
+    }
+  } else {
+    if ($direction eq "up") {
+      $last=$begin; # if last not defined, go 'till begin (or upto len elements)
+    } else {
+      $last=$end; # if last not defined, go 'till end (or upto len elements)
+    }
+  }
+  my (@array, $string, $count);
+  # call for verbosity (by way of chain2string_verbose);
+  my $verbose=0; my $elements=0; my @elements; my $counting=0;
+  if ($option) { # keep strict happy
+    if ($option eq "verbose") { $verbose=1; }
+    if ($option eq "elements") { $elements=1; }
+    if ($option eq "counting") { $counting=1; }
+  }
+
+  if ($verbose) {
+    print "BEGIN=$begin"; print " END=$end"; print " SIZE=$chain->{'size'}";
+    print " FIRSTFREE=$chain->{'firstfree'} \n";
+  }
+
+  my $i=1;
+  my $label=$first;
+  my $afterlast=$chain->{$last}[$flow]; # if $last=$end $afterlast should be undef
+  unless (defined $afterlast) { $afterlast=0; } # keep strict happy
+
+  # proceed for len elements or until last, whichever comes first
+  # if $len undef goes till end
+  while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
+    @array=@{$chain->{$label}};
+    if ($verbose) {
+      $string .= "$array[2]_${label}_$array[1]=$array[0] ";
+      $count++;
+    } elsif ($elements) {
+      push (@elements,$label); # returning element names/references/identifiers
+    } elsif ($counting) {
+      $count++;
+    } else {
+      $string .= $array[0];   # returning element content
+    }
+    $label = $array[$flow]; # go to next||prev i.e. downstream||upstream
+    $i++;
+  }
+#DEBUG#print "len: $len, first: $first, last: $last, afterlast=$afterlast \n";
+  if ($verbose) { print "TOTALprinted: $count\n"; } 
+  if ($counting) {
+    return $count;
+  } elsif ($elements) {
+    return @elements;
+  } else {
+    return $string;
+  }
+}
+
+# sub string2schain
+# --------> deleted, no more supported <--------
+# creation of a single linked list/chain from a string
+# basically could be recreated by taking the *2chain methods and
+# omitting to set the 3rd field (label 2) containing the back links
+
+
+# creation of a double linked list/chain from a string
+# returns reference to a hash containing the chain
+# arguments: STRING [OFFSET]
+# defaults: OFFSET defaults to 1 if undef
+# the chain will contain as elements the single characters in the string
+sub string2chain {
+  my @string=split(//,$_[0]);
+  array2chain(\@string,$_[1]);
+}
+
+=head2 array2chain
+
+  Title   : array2chain
+  Usage   : $chainref = Bio::LiveSeq::Chain::array2chain($arrayref,$offset)
+  Function: creation of a double linked chain from an array
+  Returns : reference to a hash containing the chain
+  Defaults: OFFSET defaults to 1 if undef
+  Error code: 0
+  Args    : a reference to an array containing the elements to be chainlinked
+            an optional integer > 0 (this will be the starting count for
+            the chain labels instead than having them begin from "1")
+
+=cut
+
+sub array2chain {
+  my $arrayref=$_[0];
+  my $array_count=scalar(@{$arrayref});
+  unless ($array_count) {
+    warn ("Warning array2chain: no elements input"); return (0); }
+  my $begin=$_[1];
+  if (defined $begin) {    
+    if ($begin < 1) {
+      warn "Warning array2chain: Zero or Negative offsets not allowed"; return (0); }
+  } else {
+    $begin=1;
+  }
+  my ($element,%hash);
+  $hash{'begin'}=$begin;
+  my $i=$begin-1;
+  foreach $element (@{$arrayref}) {
+    $i++;
+    # hash with keys begin..end pointing to the arrays
+    $hash{$i}=[$element,$i+1,$i-1];
+  }
+  my $end=$i;
+  $hash{'end'}=$end;
+  $hash{firstfree}=$i+1; # what a new added element should be called
+  $hash{size}=$end-$begin+1; # how many elements in the chain
+
+  # eliminate pointers to unexisting elements
+  $hash{$begin}[2]=undef;
+  $hash{$end}[1]=undef;
+
+  return (\%hash);
+}
+
+1; # returns 1

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/ChainI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/ChainI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/ChainI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,271 @@
+# $Id: ChainI.pm,v 1.16 2006/07/04 22:23:17 mauricio Exp $
+#
+# bioperl module for Bio::LiveSeq::ChainI
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::ChainI - Double linked chain data structure
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This class generates and manipulates generic double linked list, chain,
+that can be used to manage biological sequences.
+
+The advantages over strings or plain arrays is the ease of tracking
+changes (mutations) in the elements (sequence). The other side of the
+coin is that these structures need consideraly more memory, but that
+is cheap and constantly inceasing resource in computers.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::ChainI;
+
+use Carp qw(croak);
+use strict; # this will be moved before when strict enforced in Chain.pm
+
+use Bio::LiveSeq::Chain; # package where all the subroutines are defined
+
+
+=head2 new
+
+  Title   : new
+  Usage   : $chain = Bio::LiveSeq::ChainI->new(-string => "thequickbrownfoxjumpsoverthelazydog",
+					     -offset => 3 );
+         OR $chain = Bio::LiveSeq::ChainI->new(-array => \@array,
+					     -offset => 3 );
+  Function: generates a new Bio::LiveSeq:ChainI
+  Returns : a new Chain
+  Args    : string
+         OR arrayreference
+        AND optional offset to create element labels
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my $obj;
+
+  if ($args{-string}) {
+    $obj = $thing->string2chain($args{-string}, $args{-offset});
+  } elsif ($args{-array}) {
+    $obj = $thing->array2chain($args{-array}, $args{-offset});
+  } else {
+    croak "$class not initialized properly";
+  }
+
+  $obj = bless $obj, $class;
+  return $obj;
+}
+
+# added as of 1.9
+sub string2chain {
+  shift @_; # so that it doesn't pass the object reference
+  return Bio::LiveSeq::Chain::string2chain(@_);
+}
+sub array2chain {
+  shift @_; # so that it doesn't pass the object reference
+  return Bio::LiveSeq::Chain::array2chain(@_);
+}
+#
+sub chain2string {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub down_chain2string {
+  return Bio::LiveSeq::Chain::down_chain2string(@_);
+}
+sub up_chain2string {
+  return Bio::LiveSeq::Chain::up_chain2string(@_);
+}
+sub chain2string_verbose {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub down_chain2string_verbose {
+  return Bio::LiveSeq::Chain::down_chain2string_verbose(@_);
+}
+sub up_chain2string_verbose {
+  return Bio::LiveSeq::Chain::up_chain2string_verbose(@_);
+}
+sub invert_chain {
+  return Bio::LiveSeq::Chain::invert_chain(@_);
+}
+sub mutate_element {
+  croak "Old method name, please update code to: set_value_at_label";
+}
+
+# new as of version 2.33 of Chain.pm
+sub down_labels {
+  return Bio::LiveSeq::Chain::down_labels(@_);
+}
+sub up_labels {
+  return Bio::LiveSeq::Chain::up_labels(@_);
+}
+
+sub start {
+  return Bio::LiveSeq::Chain::start(@_);
+}
+sub end {
+  return Bio::LiveSeq::Chain::end(@_);
+}
+sub label_exists {
+  return Bio::LiveSeq::Chain::label_exists(@_);
+}
+
+sub get_value_at_pos {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub down_get_value_at_pos {
+  return Bio::LiveSeq::Chain::down_get_value_at_pos(@_);
+}
+sub up_get_value_at_pos {
+  return Bio::LiveSeq::Chain::up_get_value_at_pos(@_);
+}
+sub set_value_at_pos {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub down_set_value_at_pos {
+  return Bio::LiveSeq::Chain::down_set_value_at_pos(@_);
+}
+sub up_set_value_at_pos {
+  return Bio::LiveSeq::Chain::up_set_value_at_pos(@_);
+}
+sub get_value_at_label {
+  return Bio::LiveSeq::Chain::get_value_at_label(@_);
+}
+sub set_value_at_label {
+  return Bio::LiveSeq::Chain::set_value_at_label(@_);
+}
+sub get_label_at_pos {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub up_get_label_at_pos {
+  return Bio::LiveSeq::Chain::up_get_label_at_pos(@_);
+}
+sub down_get_label_at_pos {
+  return Bio::LiveSeq::Chain::down_get_label_at_pos(@_);
+}
+sub get_pos_of_label {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub up_get_pos_of_label {
+  return Bio::LiveSeq::Chain::up_get_pos_of_label(@_);
+}
+sub down_get_pos_of_label {
+  return Bio::LiveSeq::Chain::down_get_pos_of_label(@_);
+}
+#
+
+sub preinsert_string {
+  return Bio::LiveSeq::Chain::praeinsert_string(@_);
+}
+sub preinsert_array {
+  return Bio::LiveSeq::Chain::praeinsert_array(@_);
+}
+sub praeinsert_string {
+  return Bio::LiveSeq::Chain::praeinsert_string(@_);
+}
+sub postinsert_string {
+  return Bio::LiveSeq::Chain::postinsert_string(@_);
+}
+sub praeinsert_array {
+  return Bio::LiveSeq::Chain::praeinsert_array(@_);
+}
+sub postinsert_array {
+  return Bio::LiveSeq::Chain::postinsert_array(@_);
+}
+sub down_element{
+  return Bio::LiveSeq::Chain::down_element(@_);
+}
+sub up_element {
+  return Bio::LiveSeq::Chain::up_element(@_);
+}
+sub is_downstream {
+  return Bio::LiveSeq::Chain::is_downstream(@_);
+}
+sub is_upstream {
+  return Bio::LiveSeq::Chain::is_upstream(@_);
+}
+sub check_chain {
+  return Bio::LiveSeq::Chain::check_chain(@_);
+}
+sub chain_length {
+  return Bio::LiveSeq::Chain::chain_length(@_);
+}
+sub splice_chain {
+  return Bio::LiveSeq::Chain::splice_chain(@_);
+}
+sub pos_of_element {
+  croak "ambiguous and old method name. use: down_pos_of_label";
+}
+sub up_pos_of_element {
+  croak "old method name. use: down_pos_of_label";
+  return Bio::LiveSeq::Chain::up_pos_of_element(@_);
+}
+sub down_pos_of_element {
+  croak "old method name. use: up_pos_of_label";
+  return Bio::LiveSeq::Chain::down_pos_of_element(@_);
+}
+sub subchain_length {
+  croak "ambiguous method call. Explicit down_ or up_";
+}
+sub down_subchain_length {
+  return Bio::LiveSeq::Chain::down_subchain_length(@_);
+}
+sub up_subchain_length {
+  return Bio::LiveSeq::Chain::up_subchain_length(@_);
+}
+
+# these have to be deleted and changed names to conform to terminology
+sub elements {
+  return Bio::LiveSeq::Chain::down_elements(@_);
+}
+sub up_elements {
+  return Bio::LiveSeq::Chain::up_elements(@_);
+}
+sub down_elements {
+  return Bio::LiveSeq::Chain::down_elements(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/DNA.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/DNA.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/DNA.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,101 @@
+# $Id: DNA.pm,v 1.11.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::DNA
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::DNA - DNA object for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This holds the DNA sequence (or the RNA in the case of cDNA entries)
+and is accessed by exons, genes, transcripts... objects
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::DNA;
+
+use strict;
+use base qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $dna = Bio::LiveSeq::DNA->new(-seq => "atcgaccaatggacctca",
+					  -offset => 3 );
+
+  Function: generates a new Bio::LiveSeq::DNA
+  Returns : reference to a new object of class DNA
+  Errorcode -1
+  Args    : a string
+        AND an optional offset to create nucleotide labels (default is 1, i.e.
+            starting the count of labels from "1") -> do not bother using it ->
+            it could be used by alternative loaders !EMBL format
+  NOTE    : strand of DNA is set to 1 by default
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my (%empty,$obj);
+
+  if ($args{-seq}) {
+    $obj = $thing->string2chain($args{-seq},$args{-offset}); # inherited from ChainI
+    $obj = bless $obj, $class;
+  } else {
+    $obj=\%empty;
+    $obj = bless $obj, $class;
+    $obj->throw("$class not initialized properly");
+  }
+
+  $obj->{'alphabet'}='dna'; # set alphabet default
+  $obj->{'strand'}=1; # set strand default = 1
+  $obj->{'seq'}=$obj; # set seq field to itself
+
+  return $obj;
+}
+
+# START method
+# it has to be redefined here because default from SeqI accesses field "start"
+sub start {
+  my $self = shift;
+  return $self->{'begin'}; # the chain's start is called begin
+}
+
+# it is overridden to provide faster output
+sub length {
+  my $self=shift;
+  return $self->chain_length();
+}
+
+# it is overridden to provide MUCH faster output
+sub valid {
+  my $self=shift(@_);
+  return $self->label_exists(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Exon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Exon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Exon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+# $Id: Exon.pm,v 1.10.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Exon
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Exon - Range abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+Class for EXON objects. They consist of a beginlabel, an endlabel (both
+referring to a LiveSeq DNA object) and a strand.
+The strand could be 1 (forward strand, default), -1 (reverse strand).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Exon;
+
+use strict;
+use base qw(Bio::LiveSeq::Range);
+
+=head2 new
+
+  Title   : new
+  Usage   : $exon1 = Bio::LiveSeq::Exon-> new(-seq => $objref,
+					      -start => $startlabel,
+					      -end => $endlabel, -strand => 1);
+
+  Function: generates a new Bio::LiveSeq::Exon
+  Returns : reference to a new object of class Exon
+  Errorcode -1
+  Args    : two labels and an integer
+
+=cut
+
+=head2 get_Transcript
+
+  Title   : get_Transcript
+  Usage   : $transcript = $obj->get_Transcript()
+  Function: retrieves the reference to the object of class Transcript (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+  Note    : only Exons that compose a Transcript (i.e. those created out of
+            a CDS Entry-Feature) will have an attached Transcript
+
+=cut
+
+sub get_Transcript {
+  my $self=shift;
+  return ($self->{'transcript'}); # this is set on all Exons a Transcript is made of when Transcript->new is called
+}
+
+# this checks if the attached Transcript has a Gene object attached
+sub gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'gene'} = $value;
+  }
+  unless (exists $self->{'gene'}) {
+    unless (exists $self->get_Transcript->{'gene'}) {
+      return (0);
+    } else {
+      return ($self->get_Transcript->{'gene'});
+    }
+  } else {
+    return $self->{'gene'};
+  }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Gene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Gene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Gene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,425 @@
+# $Id: Gene.pm,v 1.15.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Gene
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Gene - Range abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This is used as storage for all object references concerning a particular gene.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Gene;
+use strict;
+use Carp;
+use Bio::LiveSeq::Prim_Transcript; # needed to create maxtranscript obj
+
+=head2 new
+
+  Title   : new
+  Usage   : $gene = Bio::LiveSeq::Gene->new(-name => "name",
+                                            -features => $hashref
+                                            -upbound => $min
+                                            -downbound => $max);
+
+  Function: generates a new Bio::LiveSeq::Gene
+  Returns : reference to a new object of class Gene
+  Errorcode -1
+  Args    : one string and one hashreference containing all features defined
+            for the Gene and the references to the LiveSeq objects for those
+            features.
+            Two labels for defining boundaries of the gene. Usually the
+            boundaries will reflect max span of transcript, exon... features,
+            while the DNA sequence will be created with some flanking regions
+            (e.g. with the EMBL_SRS::gene2liveseq routine).
+            If these two labels are not given, they will default to the start
+            and end of the DNA object.
+  Note    : the format of the hash has to be like
+               DNA => reference to LiveSeq::DNA object
+               Transcripts => reference to array of transcripts objrefs
+               Transclations => reference to array of transcripts objrefs
+               Exons => ....
+               Introns => ....
+               Prim_Transcripts => ....
+               Repeat_Units => ....
+               Repeat_Regions => ....
+            Only DNA and Transcripts are mandatory
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($i,$self,%gene);
+
+  my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
+
+  unless (ref($inputfeatures) eq "HASH") {
+    carp "$class not initialised because features hash not given";
+    return (-1);
+  }
+
+  my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
+  my $features=\%features; # the ones input'ed, that could get destroyed
+  
+  my $DNA=$features->{'DNA'};
+  unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
+    carp "$class not initialised because DNA feature not found";
+    return (-1);
+  }
+
+  my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
+
+  my ($start,$end);
+
+  my @Transcripts=@{$features->{'Transcripts'}};
+
+  my $strand;
+  unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
+    $self->warn("$class not initialised: first Transcript not a LiveSeq object");
+    return (-1);
+  } else {
+    $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
+  }
+
+  for $i (@Transcripts) {
+    ($start,$end)=($i->start,$i->end);
+    unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
+      $self->warn("$class not initialised because of problems in Transcripts feature");
+      return (-1);
+    } else {
+    }
+    unless($minstart) { $minstart=$start; } # initialize
+    unless($maxend) { $maxend=$end; } # initialize
+    if ($i->strand != $strand) {
+      $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
+      return (-1);
+    }
+    if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
+    if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
+  }  
+  my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
+  my @Prim_Transcripts; my @Exons;
+  if (defined($features->{'Translations'})) {
+    @Translations=@{$features->{'Translations'}}; }
+  if (defined($features->{'Exons'})) {
+    @Exons=@{$features->{'Exons'}}; }
+  if (defined($features->{'Introns'})) {
+    @Introns=@{$features->{'Introns'}}; }
+  if (defined($features->{'Repeat_Units'})) {
+    @Repeat_Units=@{$features->{'Repeat_Units'}}; }
+  if (defined($features->{'Repeat_Regions'})) {
+    @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
+  if (defined($features->{'Prim_Transcripts'})) {
+    @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
+
+  
+  if (@Translations) {
+    for $i (@Translations) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Translations feature");
+	return (-1);
+      }
+    }
+  }
+  if (@Exons) {
+    for $i (@Exons) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Exons feature");
+	return (-1);
+      }
+      if ($i->strand != $strand) {
+	$self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
+	return (-1);
+      }
+      if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
+      if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
+    }
+  }
+  if (@Introns) {
+    for $i (@Introns) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Introns feature");
+	return (-1);
+      }
+    }
+  }
+  if (@Repeat_Units) {
+    for $i (@Repeat_Units) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Repeat_Units feature");
+	return (-1);
+      }
+    }
+  }
+  if (@Repeat_Regions) {
+    for $i (@Repeat_Regions) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Repeat_Regions feature");
+	return (-1);
+      }
+    }
+  }
+  if (@Prim_Transcripts) {
+    for $i (@Prim_Transcripts) {
+      ($start,$end)=($i->start,$i->end);
+      unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
+	$self->warn("$class not initialised because of problems in Prim_Transcripts feature");
+	return (-1);
+      }
+      if ($i->strand != $strand) {
+	$self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
+	return (-1);
+      }
+      if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
+      if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
+    }
+  }
+
+  # create an array containing all obj references for all Gene Features
+  # useful for _set_Gene_in_all
+  my @allfeatures;
+  push (@allfeatures,$DNA, at Transcripts, at Translations, at Exons, at Introns, at Repeat_Units, at Repeat_Regions, at Prim_Transcripts);
+
+  # create hash holding numbers for Gene Features
+  my %multiplicity; 
+  my $key; my @array;
+  foreach $key (keys(%features)) {
+    unless ($key eq "DNA") {
+      @array=@{$features{$key}};
+      $multiplicity{$key}=scalar(@array);
+    }
+  }
+  $multiplicity{DNA}=1;
+
+  # create maxtranscript object. It's a Prim_Transcript with start as the
+  # minimum start and end as the maximum end.
+  # usually these start and end will be the same as the gene->upbound and
+  # gene->downbound, but maybe there could be cases when this will be false
+  # (e.g. with repeat_units just before the prim_transcript or first exon,
+  # but still labelled with the same /gene qualifier)
+
+  my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
+
+
+  # check the upbound downbound parameters
+  if (defined($upbound)) {
+    unless ($DNA->valid($upbound)) {
+      $self->warn("$class not initialised because upbound label not valid");
+      return (-1);
+    }
+  } else {
+    $upbound=$DNA->start;
+  }
+  if (defined($downbound)) {
+    unless ($DNA->valid($downbound)) {
+      $self->warn("$class not initialised because downbound label not valid");
+      return (-1);
+    }
+  } else {
+    $downbound=$DNA->end;
+  }
+
+  %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
+          upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
+  $self = \%gene;
+  $self = bless $self, $class;
+  _set_Gene_in_all($self, at allfeatures);
+  return $self;
+}
+
+# this sets the "gene" objref in all the objects "belonging" to the Gene,
+# i.e. in all its Features.
+sub _set_Gene_in_all {
+  my $Gene=shift;
+  my $self;
+  foreach $self (@_) {
+    $self->gene($Gene);
+  }
+}
+
+# you can get or set the name of the gene
+sub name {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'name'} = $value;
+  }
+  unless (exists $self->{'name'}) {
+    return "unknown";
+  } else {
+    return $self->{'name'};
+  }
+}
+
+# gets the features hash
+sub features {
+  my $self=shift;
+  return ($self->{'features'});
+}
+sub get_DNA {
+  my $self=shift;
+  return ($self->{'features'}->{'DNA'});
+}
+sub get_Transcripts {
+  my $self=shift;
+  return ($self->{'features'}->{'Transcripts'});
+}
+sub get_Translations {
+  my $self=shift;
+  return ($self->{'features'}->{'Translations'});
+}
+sub get_Prim_Transcripts {
+  my $self=shift;
+  return ($self->{'features'}->{'Prim_Transcripts'});
+}
+sub get_Repeat_Units {
+  my $self=shift;
+  return ($self->{'features'}->{'Repeat_Units'});
+}
+sub get_Repeat_Regions {
+  my $self=shift;
+  return ($self->{'features'}->{'Repeat_Regions'});
+}
+sub get_Introns {
+  my $self=shift;
+  return ($self->{'features'}->{'Introns'});
+}
+sub get_Exons {
+  my $self=shift;
+  return ($self->{'features'}->{'Exons'});
+}
+sub featuresnum {
+  my $self=shift;
+  return ($self->{'multiplicity'});
+}
+sub upbound {
+  my $self=shift;
+  return ($self->{'upbound'});
+}
+sub downbound {
+  my $self=shift;
+  return ($self->{'downbound'});
+}
+sub printfeaturesnum {
+  my $self=shift;
+  my ($key,$value);
+  my %hash=%{$self->featuresnum};
+  foreach $key (keys(%hash)) {
+    $value=$hash{$key};
+    print "\t$key => $value\n";
+  }
+}
+sub maxtranscript {
+  my $self=shift;
+  return ($self->{'maxtranscript'});
+}
+
+sub delete_Obj {
+  my $self = shift;
+  my @values= values %{$self};
+  my @keys= keys %{$self};
+
+  foreach my $key ( @keys ) {
+    delete $self->{$key};
+  }
+  foreach my $value ( @values ) {
+    if (index(ref($value),"LiveSeq") != -1) { # object case
+      eval {
+	# delete $self->{$value};
+	$value->delete_Obj;
+      };
+    } elsif (index(ref($value),"ARRAY") != -1) { # array case
+      my @array=@{$value};
+      my $element;
+      foreach $element (@array) {
+	eval {
+	  $element->delete_Obj;
+	};
+      }
+    } elsif (index(ref($value),"HASH") != -1) { # object case
+      my %hash=%{$value};
+      my $element;
+      foreach $element (%hash) {
+	eval {
+	  $element->delete_Obj;
+	};
+      }
+    }
+  }
+  return(1);
+}
+
+
+=head2 verbose
+
+ Title   : verbose
+ Usage   : $self->verbose(0)
+ Function: Sets verbose level for how ->warn behaves
+           -1 = silent: no warning
+            0 = reduced: minimal warnings
+            1 = default: all warnings
+            2 = extended: all warnings + stack trace dump
+            3 = paranoid: a warning becomes a throw and the program dies
+
+           Note: a quick way to set all LiveSeq objects at the same verbosity
+           level is to change the DNA level object, since they all look to
+           that one if their verbosity_level attribute is not set.
+           But the method offers fine tuning possibility by changing the
+           verbose level of each object in a different way.
+
+           So for example, after $loader= and $gene= have been retrieved
+           by a program, the command $gene->verbose(0); would
+           set the default verbosity level to 0 for all objects.
+
+ Returns : the current verbosity level
+ Args    : -1,0,1,2 or 3
+
+=cut
+
+
+sub verbose {
+  my $self=shift;
+  my $value = shift;
+  return $self->{'features'}->{'DNA'}->verbose($value);
+}
+
+sub warn {
+  my $self=shift;
+  my $value = shift;
+  return $self->{'features'}->{'DNA'}->warn($value);
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/BioPerl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/BioPerl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/BioPerl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,397 @@
+# $Id: BioPerl.pm,v 1.20.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::IO::BioPerl
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::IO::BioPerl - Loader for LiveSeq from EMBL entries with BioPerl
+
+=head1 SYNOPSIS
+
+  my $db="EMBL";
+  my $file="../data/M20132";
+  my $id="HSANDREC";
+
+  my $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"$db", -file=>"$file");
+  #                      or
+  my $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"$db", -id=>"$id");
+
+  my @translationobjects=$loader->entry2liveseq();
+
+  my $genename="AR";
+  my $gene=$loader->gene2liveseq(-gene_name => "$genename",
+                                    -getswissprotinfo => 0);
+
+  #NOTE1: The only -db now supported is EMBL. Hence it defaults to EMBL.
+  #NOTE2: -file requires a filename (and path if necessary) containing an
+  #             EMBL entry
+  #       -id will use Bio::DB::EMBL.pm to fetch the sequence from the web,
+  #             (bioperl wraparound to [w]getz from SRS)
+  #NOTE3: To retrieve the swissprot (if possible) attached to the embl entry
+  #             (to get protein domains at dna level), only Bio::DB::EMBL.pm
+  #             is supported under BioPerl. Refer to Bio::LiveSeq::IO::SRS
+  #             otherwise.
+  #NOTE4: NOTE3 is not implemented yet for bioperl, working on it
+
+
+=head1 DESCRIPTION
+
+This package uses BioPerl (SeqIO) to fetch a sequence database entry,
+analyse it and create LiveSeq objects out of it.
+
+A filename (or an ID that will fetch entry through the web) has to be passed
+to this package which will return references to all translation objects
+created from the EMBL entry. References to Transcription, DNA and Exon
+objects can all be retrieved departing from these.
+
+Alternatively, a specific "gene" name can be specified, together with
+the embl-acc ID. This will create a LiveSeq::Gene object with all
+relevant gene features attached/created.
+
+ATTENTION: if web fetching is requested, the package HTTP::Request needs
+to be installed.
+
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::IO::BioPerl;
+
+# TODO->TOCHECK
+# each_secondary_access not working
+# why array from each_tag_value($qual) ? When will there be more than one
+#                                        element in such array?
+# what is the annotation object? ($seqobj->annotation)
+# unsatisfied by both BioPerl binomial and SRS "org" to retrieve Organism info
+
+use strict;
+use Carp qw(cluck croak carp);
+use vars qw($DBEMBLLOADED);
+use Bio::SeqIO; # for -file entry loading
+
+# Note, the following requires HTTP::Request. If the modules are not installed
+# uncomment the following and use only -filename and don't request swissprotinfo
+eval { 
+    require Bio::DB::EMBL; # for -id entry loading
+    $DBEMBLLOADED = 1;
+};
+
+
+use base qw(Bio::LiveSeq::IO::Loader);
+
+# This package can in the future host other databases loading subroutines.
+# e.g. ensembl2hash
+
+=head2 load
+
+  Title   : load
+  Usage   : my $filename="../data/M20132";
+            $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"EMBL", -file=>"$filename");
+                                   or
+            $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"EMBL", -id=>"HSANDREC");
+
+  Function: loads an entry with BioPerl from a database into a hash
+  Returns : reference to a new object of class IO::BioPerl holding an entry
+  Errorcode 0
+  Args    : an filename containing an EMBL entry OR an ID or ACCESSION code
+
+=cut
+
+sub load {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%loader);
+
+  my ($db,$filename,$id)=($args{-db},$args{-file},$args{-id});
+
+  if (defined($db)) {
+    unless ($db eq "EMBL") {
+      carp "Note: only EMBL now supported!";
+      return(0);
+    }
+  } else {
+    $db="EMBL";
+  }
+
+  if (defined($id) && defined($filename)) {
+    carp "You can either specify a -id or a -filename!";
+    return(0);
+  }
+
+  unless (defined($id) || defined($filename)) {
+    carp "You must specify either a -id or a -filename!";
+    return(0);
+  }
+
+  my $hashref;
+  if ($db eq "EMBL") {
+    my $test_transl=0; # change to 0 to avoid comparison of translation
+
+    # these can be changed for future needs
+    my @embl_valid_feature_names=qw(CDS CDS_span exon prim_transcript intron repeat_unit repeat_region mRNA);
+    my @embl_valid_qual_names=qw(gene codon_start db_xref product note number rpt_family transl_table);
+
+    # dunno yet how to implement test_transl again....
+    # probably on a one-on-one basis with each translation?
+    if ($test_transl) {
+      push (@embl_valid_qual_names,"translation"); # needed for test_transl
+    }
+
+    my $seqobj; # bioperl sequence object, to be passed to embl2hash
+
+    if (defined($filename)) {
+      my $stream = Bio::SeqIO->new('-file' => $filename, '-format' => 'EMBL');
+      $seqobj = $stream->next_seq();
+    } else { # i.e. if -id
+	
+	if( $DBEMBLLOADED ) {
+	    my $embl = new Bio::DB::EMBL;
+	    $seqobj = $embl->get_Seq_by_id($id); # EMBL ID or ACC
+	} else { 
+	    my $root = new Bio::Root::Root();
+	    $root->warn("Must have HTTP::Request::Common installed, cannot run load without the -filename option specified, see docs for Bio::LiveSeq::IO::BioPerl");
+	    return;
+	}
+    }
+
+    $hashref=&embl2hash($seqobj,\@embl_valid_feature_names,\@embl_valid_qual_names);
+  }
+  unless ($hashref) { return (0); }
+
+  %loader = (db => $db, filename => $filename, id => $id, hash => $hashref);
+  $obj = \%loader;
+  $obj = bless $obj, $class;
+  return $obj;
+}
+
+=head2 embl2hash
+
+  Title   : embl2hash
+  Function: retrieves with BioPerl an EMBL entry, parses it and creates
+            a hash that contains all the information.
+  Returns : a reference to a hash
+  Errorcode: 0
+  Args    : a BioPerl Sequence Object (from file or web fetching)
+	    two array references to skip features and qualifiers (for
+	    performance)
+  Example: @valid_features=qw(CDS exon prim_transcript mRNA);
+           @valid_qualifiers=qw(gene codon_start db_xref product rpt_family);
+           $hashref=&embl2hash($seqobj,\@valid_features,\@valid_qualifiers);
+
+=cut
+
+# arguments: Bioperl $seqobj
+# to skip features and qualifiers (for performance), two array
+# references must be passed (this can change into string arguments to
+# be passed....)
+# returns: a reference to a hash containing the important features requested
+sub embl2hash {
+  my $seqobj=$_[0];
+  my %valid_features; my %valid_names;
+  if ($_[1]) {
+    %valid_features = map {$_, 1} @{$_[1]}; # to skip features
+  }
+  if ($_[2]) {
+    %valid_names = map {$_, 1} @{$_[2]}; # to skip qualifiers
+  }
+
+  my $annobj = $seqobj->annotation(); # what's this?
+
+  my $entry_Sequence = lc($seqobj->seq()); # SRS returns lowercase
+
+  my $entry_ID = $seqobj->display_id;
+  my $entry_AccNumber = $seqobj->accession; # or maybe accession_number ?
+  my $secondary_acc; # to fetch the other acc numbers
+  foreach $secondary_acc ($seqobj->get_secondary_accessions) { # not working!
+    $entry_AccNumber .= " $secondary_acc";
+  }
+  my $entry_Molecule = $seqobj->molecule; # this alone returns molec+division
+  my $entry_Division = $seqobj->division;
+  # fixed: now Molecule works in BioPerl, no need for next lines
+  #my @Molecule=split(" ",$entry_Molecule);
+  #my $entry_Division = pop(@Molecule); # only division
+  #$entry_Molecule = join(" ", at Molecule); # only molecule
+  my $entry_Description = $seqobj->desc;
+
+  my $speciesobj = $seqobj->species;
+  my $entry_Organism = $speciesobj->binomial;
+
+  my $entry_SeqLength = $seqobj->length;
+  
+  # put into the hash
+  my %entryhash;
+  $entryhash{ID}=$entry_ID;
+  $entryhash{AccNumber}=$entry_AccNumber;
+  $entryhash{Molecule}=$entry_Molecule;
+  $entryhash{Division}=$entry_Division;
+  $entryhash{Description}=$entry_Description;
+  $entryhash{Organism}=$entry_Organism;
+  $entryhash{Sequence}=$entry_Sequence;
+  $entryhash{SeqLength}=$entry_SeqLength;
+
+  my @topfeatures=$seqobj->top_SeqFeatures();
+  # create features array
+  my $featuresnumber= scalar(@topfeatures);
+  $entryhash{FeaturesNumber}=$featuresnumber;
+  my $feature_name;
+  my @feature_qual_names; my @feature_qual_value;
+  my ($feature_qual_name,$feature_qual_number);
+  my @features;
+
+  my ($feat,$qual,$subfeat);
+  my @subfeat;
+  my $i=0;
+  foreach $feat (@topfeatures) {
+      my %feature;
+      $feature_name = $feat->primary_tag;
+      unless ($valid_features{$feature_name}) {
+	  #print "skipping $feature_name\n";
+	  next;
+      }
+# works ok with 0.6.2
+#    if ($feature_name eq "CDS_span") { # case of CDS with various exons 0.6.2
+#      $feature_name="CDS"; # 0.6.2
+      my $featlocation=$feat->location; # 0.7
+      if (($feature_name eq "CDS")&&($featlocation->isa('Bio::Location::SplitLocationI'))) { # case of CDS with various exons BioPerl 0.7
+#      @subfeat=$feat->sub_SeqFeature; # 0.6.2
+	  @subfeat=$featlocation->sub_Location(); # 0.7
+	  my @transcript;
+	  foreach $subfeat (@subfeat) {
+	      my @range;
+	      if ($subfeat->strand == -1) {
+		  @range=($subfeat->end,$subfeat->start,$subfeat->strand);
+	      } else {
+		  @range=($subfeat->start,$subfeat->end,$subfeat->strand);
+	      }
+	      push (@transcript,\@range);
+	  }
+	  $feature{range}=\@transcript;
+      } else {
+	  my @range;
+	  ($feat->strand == -1) ? (@range = ($feat->end, $feat->start, $feat->strand) ) :
+	      (@range = ( $feat->start,$feat->end,$feat->strand) );
+# works ok with 0.6.2
+	  if ($feature_name eq "CDS") { # case of single exon CDS (CDS name but not split location)
+	      my @transcript=(\@range);
+	      $feature{range}=\@transcript;
+	  } else { # all other range features
+	      $feature{range}=\@range;
+	  }
+      }
+      $feature{location}="deprecated";
+      
+      $feature{position}=$i;
+      $feature{name}=$feature_name;
+      
+      @feature_qual_names= $feat->all_tags();
+      $feature_qual_number= scalar(@feature_qual_names);
+      
+      $feature{qual_number}=$feature_qual_number;
+      
+      my %feature_qualifiers;
+      for $qual (@feature_qual_names) {
+	  $feature_qual_name=$qual;
+	  unless ($valid_names{$feature_qual_name}) {
+	      next;
+	  }
+      @feature_qual_value=$feat->each_tag_value($qual);
+	  #print "$qual => @feature_qual_value \n";
+	  $feature_qualifiers{$feature_qual_name}=$feature_qual_value[0]; # ?
+      # maybe the whole array should be entered, not just the 1st element?
+	  # what could be the other elements? TOCHECK!
+      }
+      $feature{qualifiers}=\%feature_qualifiers;
+      push (@features,\%feature); # array of features
+      $i++;
+  }
+  $entryhash{Features}=\@features; # put this also into the hash
+  
+  my @cds; # array just of CDSs
+  for $i (0..$#features) {
+      if ($features[$i]->{'name'} eq "CDS") {
+	  push(@cds,$features[$i]);
+      }
+  }
+  $entryhash{CDS}=\@cds; # put this also into the hash
+  return (\%entryhash);
+}
+
+=head2 novelaasequence2gene
+
+  Title   : novelaasequence2gene
+  Usage   : $gene=Bio::LiveSeq::IO::BioPerl->novelaasequence2gene(-aasequence => "MGLAAPTRS*");
+          : $gene=Bio::LiveSeq::IO::BioPerl->novelaasequence2gene(-aasequence => "MGLAAPTRS*",
+                                             -cusg_data => "58 44 7 29 3 3 480 267 105 143 122 39 144 162 14 59 53 25 233 292 19 113 88 246 28 68 161 231 27 102 128 151 67 60 138 131 48 61 153 19 233 73 150 31 129 38 147 71 138 43 181 81 44 15 255 118 312 392 236 82 20 10 14 141");
+          : $gene=Bio::LiveSeq::IO::BioPerl->novelaasequence2gene(-aasequence => "MGLAAPTRS*",
+                                             -cusg_data => "58 44 7 29 3 3 480 267 105 143 122 39 144 162 14 59 53 25 233 292 19 113 88 246 28 68 161 231 27 102 128 151 67 60 138 131 48 61 153 19 233 73 150 31 129 38 147 71 138 43 181 81 44 15 255 118 312 392 236 82 20 10 14 141",
+                                             -translation_table => "2",
+                                             -gene_name => "tyr-kinase");
+
+  Function: creates LiveSeq objects from a novel amino acid sequence,
+            using codon usage information (loaded from a file) to choose
+            codons according to relative frequencies.
+            If a codon_usage information is not specified,
+            the default is to use Homo sapiens data (taxonomy ID 9606).
+            If a translation_table ID is not specified, it will default to 1
+            (standard code).
+  Returns : reference to a Gene object containing references to LiveSeq objects
+  Errorcode 0
+  Args    : string containing an amino acid sequence
+	    string (optional) with codon usage data (64 integer numbers)
+            string (optional) specifying a gene_name
+            integer (optional) specifying a translation_table ID
+
+=cut
+
+sub novelaasequence2gene {
+  my ($self, %args) = @_;
+  my ($gene_name,$cusg_data,$aasequence,$ttabid)=($args{-gene_name},$args{-cusg_data},$args{-aasequence},$args{-translation_table});
+
+  my @species_codon_usage;
+  unless ($aasequence) {
+    carp "aasequence not given";
+    return (0);
+  }
+  unless ($gene_name) {
+    $gene_name="Novel Unknown";
+  }
+  unless ($ttabid) {
+    $ttabid=1;
+  }
+  unless ($cusg_data) {
+    @species_codon_usage=
+	qw(68664 118404 126679 51100 125600 123646 75667 210903 435317
+	139009 79303 135218 128429 192616 49456 161556 211962 131222
+	162837 213626 69346 140780 182506 219428 76684 189374 173010
+	310626 82647 202329 180955 250410 180001 118798 76398 160764
+	317359 119013 262630 359627 218376 186915 130857 377006 162826
+	113684 317703 441298 287040 245435 174805 133427 134523 108740
+	225633 185619 78463 240138 174021 244236 142435 8187 5913
+	14381); # updated 21Jul2000
+  } else {
+    @species_codon_usage=split(/ /,$cusg_data);
+  }
+  
+  my $gene=Bio::LiveSeq::IO::Loader::_common_novelaasequence2gene(\@species_codon_usage,$ttabid,$aasequence,$gene_name);
+  return ($gene);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/Loader.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/Loader.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/Loader.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,979 @@
+# $Id: Loader.pm,v 1.19.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::IO::Loader
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::IO::Loader - Parent Loader for LiveSeq
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This package holds common methods used by BioPerl and file loaders.
+It contains methods to create LiveSeq objects out of entire entries or from a
+localized sequence region surrounding a particular gene.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::IO::Loader;
+
+use strict;
+use Carp qw(cluck croak carp);
+use Bio::LiveSeq::DNA;
+use Bio::LiveSeq::Exon;
+use Bio::LiveSeq::Transcript ;
+use Bio::LiveSeq::Translation;
+use Bio::LiveSeq::Gene;
+use Bio::LiveSeq::Intron;
+use Bio::LiveSeq::Prim_Transcript;
+use Bio::LiveSeq::Repeat_Region;
+use Bio::LiveSeq::Repeat_Unit;
+use Bio::LiveSeq::AARange;
+use Bio::Tools::CodonTable;
+
+=head2 entry2liveseq
+
+  Title   : entry2liveseq
+  Usage   : @translationobjects=$loader->entry2liveseq();
+          : @translationobjects=$loader->entry2liveseq(-getswissprotinfo => 0);
+  Function: creates LiveSeq objects from an entry previously loaded
+  Returns : array of references to objects of class Translation
+  Errorcode 0
+  Args    : optional boolean flag to avoid the retrieval of SwissProt
+            informations for all Transcripts containing SwissProt x-reference
+            default is 1 (to retrieve those informations and create AARange
+            LiveSeq objects)
+  Note    : this method can get really slow for big entries. The lightweight
+            gene2liveseq method is recommended
+
+=cut
+
+sub entry2liveseq {
+  my ($self, %args) = @_;
+  my ($getswissprotinfo)=($args{-getswissprotinfo});
+  if (defined($getswissprotinfo)) {
+    if (($getswissprotinfo ne 0)&&($getswissprotinfo ne 1)) {
+      carp "-getswissprotinfo argument can take only boolean (1 or 0) values. Setting it to 0, i.e. not trying to retrieve SwissProt information....";
+      $getswissprotinfo=0;
+    }
+  } else {
+    $getswissprotinfo=1;
+  }
+  my $hashref=$self->{'hash'};
+  unless ($hashref) { return (0); }
+  my @translationobjects=$self->hash2liveseq($hashref,$getswissprotinfo);
+  my $test_transl=0;
+  if ($test_transl) { $self->test_transl($hashref,\@translationobjects);}
+  return @translationobjects;
+}
+
+=head2 novelaasequence2gene
+
+  Title   : novelaasequence2gene
+  Usage   : $gene=$loader->novelaasequence2gene(-aasequence => "MGLAAPTRS*");
+          : $gene=$loader->novelaasequence2gene(-aasequence => "MGLAAPTRS*");
+                                             -taxon => 9606,
+                                             -gene_name => "tyr-kinase");
+
+  Function: creates LiveSeq objects from a novel amino acid sequence,
+            using codon usage database to choose codons according to
+            relative frequencies.
+            If a taxon ID is not specified, the default is to use the human
+            one (taxonomy ID 9606).
+  Returns : reference to a Gene object containing references to LiveSeq objects
+  Errorcode 0
+  Args    : string containing an amino acid sequence
+            integer (optional) with a taxonomy ID
+            string specifying a gene name
+
+=cut
+
+=head2 gene2liveseq
+
+  Title   : gene2liveseq
+  Usage   : $gene=$loader->gene2liveseq(-gene_name => "gene name");
+          : $gene=$loader->gene2liveseq(-gene_name => "gene name",
+                                        -flanking => 64);
+          : $gene=$loader->gene2liveseq(-gene_name => "gene name",
+                                        -getswissprotinfo => 0);
+          : $gene=$loader->gene2liveseq(-position => 4);
+
+  Function: creates LiveSeq objects from an entry previously loaded
+            It is a "light weight" creation because it creates
+            a LiveSequence just for the interesting region in an entry
+            (instead than for the total entry, like in entry2liveseq) and for
+            the flanking regions up to 500 nucleotides (default) or up to
+            the specified amount of nucleotides (given as argument) around the
+            Gene.
+  Returns : reference to a Gene object containing possibly alternative
+            Transcripts.
+  Errorcode 0
+  Args    : string containing the gene name as in the EMBL feature qualifier
+            integer (optional) "flanking": amount of flanking bases to be kept
+            boolean (optional) "getswissprotinfo": if set to "0" it will avoid
+             trying to fetch information from a crossreference to a SwissProt
+             entry, avoding the process of creation of AARange objects
+             It is "1" (on) by default
+
+            Alternative to a gene_name, a position can be given: an
+            integer (1-) containing the position of the desired CDS in the
+            loaded entry
+
+=cut
+
+sub gene2liveseq {
+  my ($self, %args) = @_;
+  my ($gene_name,$flanking,$getswissprotinfo,$cds_position)=($args{-gene_name},$args{-flanking},$args{-getswissprotinfo},$args{-position});
+  my $input;
+  unless (($gene_name)||($cds_position)) {
+    carp "Gene_Name or Position not specified for gene2liveseq loading function";
+    return (0);
+  }
+  if (($gene_name)&&($cds_position)) {
+    carp "Gene_Name and Position cannot be given together, use one";
+    return (0);
+  } elsif ($gene_name) {
+    $input=$gene_name;
+  } else {
+    $input="cds-position:".$cds_position;
+  }
+
+  if (defined($getswissprotinfo)) {
+    if (($getswissprotinfo ne 0)&&($getswissprotinfo ne 1)) {
+      carp "-getswissprotinfo argument can take only boolean (1 or 0) values. Setting it to 0, i.e. not trying to retrieve SwissProt information....";
+      $getswissprotinfo=0;
+    }
+  } else {
+    $getswissprotinfo=1;
+  }
+
+  if (defined($flanking)) {
+    unless ($flanking >= 0) {
+      carp "No sense in specifying a number < 0 for flanking regions to be created for gene2liveseq loading function";
+      return (0);
+    }
+  } else {
+    $flanking=500; # the default flanking length
+  }
+  my $hashref=$self->{'hash'};
+  unless ($hashref) { return (0); }
+  my $gene=$self->hash2gene($hashref,$input,$flanking,$getswissprotinfo);
+  unless ($gene) { # if $gene == 0 it means problems in hash2gene
+    carp "gene2liveseq produced error";
+    return (0);
+  }
+  return $gene;
+}
+
+# TODO: update so that it will work even if CDS is not only accepted FEATURE!!
+# this method is for now deprecated and not supported
+sub test_transl {
+  my ($self,$entry)=@_;
+  my @features=@{$entry->{'Features'}};
+  my @translationobjects=@{$_[1]};
+  my ($i,$translation);
+  my ($obj_transl,$hash_transl);
+  my @cds=@{$entry->{'CDS'}};
+  foreach $translation (@translationobjects) {
+    $obj_transl=$translation->seq;
+    $hash_transl=$cds[$i]->{'qualifiers'}->{'translation'};
+    #before seq was changed in Translation 1.4# chop $obj_transl; # to remove trailing "*"
+    unless ($obj_transl eq $hash_transl) {
+      cluck "Serious error: Translation from the Entry does not match Translation from object's seq for CDS at position $i";
+      carp "\nEntry's transl: ",$hash_transl,"\n";
+      carp "\nObject's transl: ",$obj_transl,"\n";
+      exit;
+    }
+    $i++;
+  }
+}
+
+# argument: hashref containing the EMBL entry datas,
+#           getswissprotinfo boolean flag
+# creates the liveseq objects
+# returns: an array of Translation object references
+sub hash2liveseq {
+  my ($self,$entry,$getswissprotinfo)=@_;
+  my $i;
+  my @transcripts;
+  my $dna=Bio::LiveSeq::DNA->new(-seq => $entry->{'Sequence'} );
+  $dna->alphabet(lc($entry->{'Molecule'}));
+  $dna->display_id($entry->{'ID'});
+  $dna->accession_number($entry->{'AccNumber'});
+  $dna->desc($entry->{'Description'});
+  my @cds=@{$entry->{'CDS'}};
+  my ($swissacc,$swisshash); my @swisshashes;
+  for $i (0..$#cds) {
+    #my @transcript=@{$cds[$i]->{'range'}};
+    #$transcript=\@transcript;
+    #push (@transcripts,$transcript);
+    push (@transcripts,$cds[$i]->{'range'});
+    if ($getswissprotinfo) {
+      $swissacc=$cds[$i]->{'qualifiers'}->{'db_xref'};
+      $swisshash=$self->get_swisshash($swissacc);
+      #$self->printswissprot($swisshash); # DEBUG
+      push (@swisshashes,$swisshash);
+    }
+  }
+  my @translations=($self->transexonscreation($dna,\@transcripts));
+  my $translation; my $j=0;
+  foreach $translation (@translations) {
+    if ($swisshashes[$j]) { # if not 0
+      $self->swisshash2liveseq($swisshashes[$j],$translation);
+    }
+    $j++;
+  }
+  return (@translations);
+}
+
+# only features pertaining to a specified gene are created
+# only the sequence of the gene and appropriate context flanking regions
+# are created as chain
+# arguments: hashref, gene_name (OR: cds_position), length_of_flanking_sequences, getswissprotinfo boolean flag
+# returns: reference to Gene object
+#
+# Note: if entry contains just one CDS, all the features get added
+#       this is useful because often the features in these entries do not
+#       carry the /gene qualifier
+#
+# errorcode: 0
+sub hash2gene {
+  my ($self,$entry,$input,$flanking,$getswissprotinfo)=@_;
+  my $entryfeature;
+  my $genefeatureshash;
+
+  my @cds=@{$entry->{'CDS'}};
+
+  # checking if a position has been given instead than a gene_name
+  if (index($input,"cds-position:") == 0 ) {
+    my $cds_position=substr($input,13); # extracting the cds position
+    if (($cds_position >= 1)&&($cds_position <= scalar(@cds))) {
+      $genefeatureshash=$self->_findgenefeatures($entry,undef,$cds_position,$getswissprotinfo);
+    }
+  } else {
+    $genefeatureshash=$self->_findgenefeatures($entry,$input,undef,$getswissprotinfo);
+  }
+
+  unless (($genefeatureshash)&&(scalar(@{$genefeatureshash->{'genefeatures'}}))) { # array empty, no gene features found
+    my @genes=$self->genes($entry);
+    my $cds_number=scalar(@cds);
+    warn "Warning! Not even one genefeature found for /$input/....
+    The genes present in this entry are:\n\t at genes\n
+    The number of CDS in this entry is:\n\t$cds_number\n";
+    return(0);
+  }
+
+  # get max and min, check flankings
+  my ($min,$max)=$self->rangeofarray(@{$genefeatureshash->{'labels'}}); # gene "boundaries"
+  my $seqlength=$entry->{'SeqLength'};
+  my ($mindna,$maxdna); # some flanking region next to the gene "boundaries"
+  if ($min-$flanking < 1) {
+    $mindna=1;
+  } else {
+    $mindna=$min-$flanking;
+  }
+  if ($max+$flanking > $seqlength) {
+    $maxdna=$seqlength;
+  } else {
+    $maxdna=$max+$flanking;
+  }
+  my $subseq=substr($entry->{'Sequence'},$mindna-1,$maxdna-$mindna+1);
+
+  # create LiveSeq objects
+
+  # create DNA
+  my $dna=Bio::LiveSeq::DNA->new(-seq => $subseq, -offset => $mindna);
+  $dna->alphabet(lc($entry->{'Molecule'}));
+  $dna->source($entry->{'Organism'});
+  $dna->display_id($entry->{'ID'});
+  $dna->accession_number($entry->{'AccNumber'});
+  $dna->desc($entry->{'Description'});
+
+  my @transcripts=@{$genefeatureshash->{'transcripts'}};
+  # create Translations, Transcripts, Exons out of the CDS
+  unless (@transcripts) {
+    cluck "no CDS feature found for /$input/....";
+    return(0);
+  }
+  my @translationobjs=$self->transexonscreation($dna,\@transcripts);
+  my @transcriptobjs;
+
+  # get the Transcript obj_refs
+  my $translation;
+  my $j=0;
+  my @ttables=@{$genefeatureshash->{'ttables'}};
+  my @swisshashes=@{$genefeatureshash->{'swisshashes'}};
+  foreach $translation (@translationobjs) {
+    push(@transcriptobjs,$translation->get_Transcript);
+    if ($ttables[$j]) { # if not undef
+      $translation->get_Transcript->translation_table($ttables[$j]);
+    #} else { # DEBUG
+    #  print "\n\t\tno translation table information....\n";
+    }
+    if ($swisshashes[$j]) { # if not 0
+      $self->swisshash2liveseq($swisshashes[$j],$translation);
+    }
+    $j++;
+  }
+
+  my %gene; # this is the hash to store created object references
+  $gene{DNA}=$dna;
+  $gene{Transcripts}=\@transcriptobjs;
+  $gene{Translations}=\@translationobjs;
+
+  my @exonobjs; my @intronobjs;
+  my @repeatunitobjs; my @repeatregionobjs;
+  my @primtranscriptobjs;
+
+  my ($object,$range,$start,$end,$strand);
+
+  my @exons=@{$genefeatureshash->{'exons'}};
+  my @exondescs=@{$genefeatureshash->{'exondescs'}};
+  if (@exons) {
+    my $exoncount = 0;
+    foreach $range (@exons) {
+      ($start,$end,$strand)=@{$range};
+      $object = Bio::LiveSeq::Exon->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      if ($object != -1) {
+	$object->desc($exondescs[$exoncount]) if defined $exondescs[$exoncount];
+	$exoncount++;
+	push (@exonobjs,$object);
+      } else {
+	$exoncount++;
+      }
+    }
+    $gene{Exons}=\@exonobjs;
+  }
+  my @introns=@{$genefeatureshash->{'introns'}};
+  my @introndescs=@{$genefeatureshash->{'introndescs'}};
+  if (@introns) {
+    my $introncount = 0;
+    foreach $range (@introns) {
+      ($start,$end,$strand)=@{$range};
+      $object=Bio::LiveSeq::Intron->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      if ($object != -1) {
+	$object->desc($introndescs[$introncount]);
+	$introncount++;
+	push (@intronobjs,$object);
+      } else {
+	$introncount++;
+      }
+    }
+    $gene{Introns}=\@intronobjs;
+  }
+  my @prim_transcripts=@{$genefeatureshash->{'prim_transcripts'}};
+  if (@prim_transcripts) {
+    foreach $range (@prim_transcripts) {
+      ($start,$end,$strand)=@{$range};
+      $object=Bio::LiveSeq::Prim_Transcript->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      if ($object != -1) { push (@primtranscriptobjs,$object); }
+    }
+    $gene{Prim_Transcripts}=\@primtranscriptobjs;
+  }
+  my @repeat_regions=@{$genefeatureshash->{'repeat_regions'}};
+  my @repeat_regions_family=@{$genefeatureshash->{'repeat_regions_family'}};
+  if (@repeat_regions) {
+    my $k=0;
+    foreach $range (@repeat_regions) {
+      ($start,$end,$strand)=@{$range};
+      $object=Bio::LiveSeq::Repeat_Region->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      if ($object != -1) {
+	$object->desc($repeat_regions_family[$k]);
+	$k++;
+	push (@repeatregionobjs,$object);
+      } else {
+	$k++;
+      }
+    }
+    $gene{Repeat_Regions}=\@repeatregionobjs;
+  }
+  my @repeat_units=@{$genefeatureshash->{'repeat_units'}};
+  my @repeat_units_family=@{$genefeatureshash->{'repeat_units_family'}};
+  if (@repeat_units) {
+    my $k=0;
+    foreach $range (@repeat_units) {
+      ($start,$end,$strand)=@{$range};
+      $object=Bio::LiveSeq::Repeat_Unit->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      if ($object != -1) {
+	$object->desc($repeat_units_family[$k]);
+	$k++;
+	push (@repeatunitobjs,$object);
+      } else {
+	$k++;
+      }
+    }
+    $gene{Repeat_Units}=\@repeatunitobjs;
+  }
+
+  # create the Gene
+  my $gene_name=$genefeatureshash->{'gene_name'}; # either a name or a cdspos
+  return (Bio::LiveSeq::Gene->new(-name=>$gene_name,-features=>\%gene,
+                                  -upbound=>$min,-downbound=>$max));
+}
+
+# maybe this function will be moved to general utility package
+# argument: array of numbers
+# returns: (min,max) numbers in the array
+sub rangeofarray {
+  my $self=shift;
+  my @array=@_;
+  #print "\n-=-=-=-=-=-=-=-=-=-=array: @array\n";
+  my ($max,$min,$element);
+  $min=$max=shift(@array);
+  foreach $element (@array) {
+      $element = 0 unless defined $element;
+    if ($element < $min) {
+      $min=$element;
+    }
+    if ($element > $max) {
+      $max=$element;
+    }
+  }
+  #print "\n-=-=-=-=-=-=-=-=-=-=min: $min\tmax: $max\n";
+  return ($min,$max);
+}
+
+
+# argument: reference to DNA object, reference to array of transcripts
+# returns: an array of Translation object references
+sub transexonscreation {
+  my $self=shift;
+  my $dna=$_[0];
+  my @transcripts=@{$_[1]};
+
+  my (@transexons,$start,$end,$strand,$exonref,$exonobj,$transcript,$transcriptobj);
+  my $translationobj;
+  my @translationobjects;
+  foreach $transcript (@transcripts) {
+    foreach $exonref (@{$transcript}) {
+      ($start,$end,$strand)=@{$exonref};
+      #print "Creating Exon: start $start end $end strand $strand\n";
+      $exonobj=Bio::LiveSeq::Exon->new(-seq=>$dna,-start=>$start,-end=>$end,-strand=>$strand);
+      #push (@exonobjects,$exonobj);
+      push (@transexons,$exonobj);
+    }
+    $transcriptobj=Bio::LiveSeq::Transcript->new(-exons => \@transexons );
+    if ($transcriptobj != -1) {
+      $translationobj=Bio::LiveSeq::Translation->new(-transcript=>$transcriptobj);
+      @transexons=(); # cleans it
+      #push (@transcriptobjects,$transcriptobj);
+      push (@translationobjects,$translationobj);
+    }
+  }
+  return (@translationobjects);
+}
+
+#sub printgene {
+# deleted. Some functionality placed in Gene->printfeaturesnum
+
+=head2 printswissprot
+
+  Title   : printswissprot
+  Usage   : $loader->printswissprot($hashref);
+  Function: prints out all informations loaded from a database entry into the
+            loader. Mainly used for testing purposes.
+  Args    : a hashref containing the SWISSPROT entry datas
+  Note    : the hashref can be obtained with a call to the method
+               $loader->get_swisshash()      (BioPerl via Bio::DB::EMBL.pm)
+	    that takes as argument a string like "SWISS-PROT:P10275"
+
+=cut
+
+# argument: hashref containing the SWISSPROT entry datas
+# prints out that hash, showing the informations loaded
+sub printswissprot {
+  my ($self,$entry)=@_;
+  unless ($entry) {
+    return;
+  }
+  printf "ID: %s\n",
+      $entry->{'ID'};
+  printf "ACC: %s\n",
+      $entry->{'AccNumber'};
+  printf "GENE: %s\n",
+      $entry->{'Gene'};
+  printf "DES: %s\n",
+      $entry->{'Description'};
+  printf "ORG: %s\n",
+      $entry->{'Organism'};
+  printf "SEQLN: %s\n",
+      $entry->{'SeqLength'};
+  printf "SEQ: %s\n",
+      substr($entry->{'Sequence'},0,64);
+  if ($entry->{'Features'}) {
+    my @features=@{$entry->{'Features'}};
+    my $i;
+    for $i (0..$#features) {
+      print "|",$features[$i]->{'name'},"|";
+      print " at ",$features[$i]->{'location'},": ";
+      print "",$features[$i]->{'desc'},"\n";
+    }
+  }
+}
+
+=head2 printembl
+
+  Title   : printembl
+  Usage   : $loader->printembl();
+  Function: prints out all informations loaded from a database entry into the
+            loader. Mainly used for testing purposes.
+  Args    : none
+
+=cut
+
+# argument: hashref containing the EMBL entry datas
+# prints out that hash, showing the informations loaded
+sub printembl {
+  my ($self,$entry)=@_;
+  unless ($entry) {
+    $entry=$self->{'hash'};
+  }
+  my ($i,$featurename);
+  printf "ID: %s\n",
+      $entry->{'ID'};
+  printf "ACC: %s\n",
+      $entry->{'AccNumber'};
+  printf "MOL: %s\n",
+      $entry->{'Molecule'};
+  printf "DIV: %s\n",
+      $entry->{'Division'};
+  printf "DES: %s\n",
+      $entry->{'Description'};
+  printf "ORG: %s\n",
+      $entry->{'Organism'};
+  printf "SEQLN: %s\n",
+      $entry->{'SeqLength'};
+  printf "SEQ: %s\n",
+      substr($entry->{'Sequence'},0,64);
+  my @features=@{$entry->{'Features'}};
+  my @cds=@{$entry->{'CDS'}};
+  print "\nFEATURES\nNumber of CDS: ",scalar(@cds)," (out of ",$entry->{'FeaturesNumber'}, " total features)\n";
+  my ($exonref,$transcript);
+  my ($qualifiernumber,$qualifiers,$key);
+  my ($start,$end,$strand);
+  my $j=0;
+  for $i (0..$#features) {
+    $featurename=$features[$i]->{'name'};
+    if ($featurename eq "CDS") {
+      print "|CDS| number $j at feature position: $i\n";
+      #print $features[$i]->{'location'},"\n";
+      $transcript=$features[$i]->{'range'};
+      foreach $exonref (@{$transcript}) {
+	($start,$end,$strand)=@{$exonref};
+	print "\tExon: start $start end $end strand $strand\n";
+      }
+      $j++;
+    } else {
+      print "|$featurename| at feature position: $i\n";
+      print "\trange: ";
+      print join("\t",@{$features[$i]->{'range'}}),"\n";
+      #print $features[$i]->{'location'},"\n";
+    }
+    $qualifiernumber=$features[$i]->{'qual_number'};
+    $qualifiers=$features[$i]->{'qualifiers'}; # hash
+    foreach $key (keys (%{$qualifiers})) {
+    print "\t\t",$key,": ";
+      print $qualifiers->{$key},"\n";
+    }
+  }
+}
+
+=head2 genes
+
+  Title   : genes
+  Usage   : $loader->genes();
+  Function: Returns an array of gene_names (strings) contained in the loaded
+            entry.
+  Args    : none
+
+=cut
+
+# argument: entryhashref
+# returns: array of genenames found in the entry
+sub genes {
+  my ($self,$entry)=@_;
+  unless ($entry) {
+    $entry=$self->{'hash'};
+  }
+  my @entryfeatures=@{$entry->{'Features'}};
+  my ($genename,$genenames,$entryfeature);
+  for $entryfeature (@entryfeatures) {
+    $genename=$entryfeature->{'qualifiers'}->{'gene'};
+    if ($genename) {
+      if (index($genenames,$genename) == -1) { # if name is new
+	$genenames .= $genename . " "; # add the name
+      }
+    }
+  }
+  return (split(/ /,$genenames)); # assumes no space inbetween each genename
+}
+
+# arguments: swisshash, translation objref
+# adds information to the Translation, creates AARange objects, sets the
+# aa_range attribute on the Translation, pointing to those objects
+sub swisshash2liveseq {
+  my ($self,$entry,$translation)=@_;
+  my $translength=$translation->length;
+  $translation->desc($translation->desc . $entry->{'Description'});
+  $translation->display_id("SWISSPROT:" . $entry->{'ID'});
+  $translation->accession_number("SWISSPROT:" . $entry->{'AccNumber'});
+  $translation->name($entry->{'Gene'});
+  $translation->source($entry->{'Organism'});
+  my @aarangeobjects;
+  my ($start,$end,$aarangeobj,$feature);
+  my @features; my @newfeatures;
+  if ($entry->{'Features'}) {
+    @features=@{$entry->{'Features'}};
+  }
+  my $cleavedmet=0;
+  # check for cleaved Met
+  foreach $feature (@features) {
+    if (($feature->{'name'} eq "INIT_MET")&&($feature->{'location'} eq "0 0")) {
+      $cleavedmet=1;
+      $translation->{'offset'}="1"; # from swissprot to liveseq protein sequence
+    } else {
+      push(@newfeatures,$feature);
+    }
+  }
+
+  my $swissseq=$entry->{'Sequence'};
+  my $liveseqtransl=$translation->seq;
+  chop $liveseqtransl; # to take away the trailing STOP "*"
+  my $translated=substr($liveseqtransl,$cleavedmet);
+
+  my ($liveseq_aa,$swiss_aa,$codes_aa)=$self->_get_alignment($translated,$swissseq); # alignment after cleavage of possible initial met
+
+  if ((index($liveseq_aa,"-") != -1)||(index($swiss_aa,"-") != -1)) { # there are gaps, how to proceed?
+    print "LIVE-SEQ=\'$liveseq_aa\'\nIDENTITY=\'$codes_aa\'\nSWS-PROT=\'$swiss_aa\'\n";
+    carp "Nucleotides translation and SwissProt translation are different in size, cannot attach the SwissSequence to the EMBL one, cannot add any AminoAcidRange object/Domain information!";
+    return;
+  }
+
+  #my $i=0; # debug
+  @features=@newfeatures;
+  foreach $feature (@features) {
+    #print "Processing SwissProtFeature: $i\n"; # debug
+    ($start,$end)=split(/ /,$feature->{'location'});
+    # Note: cleavedmet is taken in account for updating numbering
+    $aarangeobj=Bio::LiveSeq::AARange->new(-start => $start+$cleavedmet, -end => $end+$cleavedmet, -name => $feature->{'name'}, -description => $feature->{'description'}, -translation => $translation, -translength => $translength);
+    if ($aarangeobj != -1) {
+      push(@aarangeobjects,$aarangeobj);
+    }
+    # $i++; # debug
+  }
+  $translation->{'aa_ranges'}=\@aarangeobjects;
+}
+
+# if there is no SRS support, the default will be to return 0
+# i.e. this function is overridden in SRS package
+sub get_swisshash {
+  return (0);
+}
+
+# Args: $entry hashref, gene_name OR cds_position (undef is used to
+# choose between the two), getswissprotinfo boolean flag
+# Returns: an hash holding various arrayref used in the hash2gene method
+# Function: examines the nucleotide entry, identifying features belonging
+# to the gene (defined either by its name or by the position of its CDS in
+# the entry)
+
+sub _findgenefeatures {
+  my ($self,$entry,$gene_name,$cds_position,$getswissprotinfo)=@_;
+
+  my @entryfeatures=@{$entry->{'Features'}};
+  my @exons; my @introns; my @prim_transcripts; my @transcripts;
+  my @repeat_units; my @repeat_regions;
+  my @repeat_units_family; my @repeat_regions_family; my $rpt_family;
+  my $entryfeature; my @genefeatures;
+  my $desc; my @exondescs; my @introndescs;
+
+  # for swissprot xreference
+  my ($swissacc,$swisshash); my @swisshashes;
+
+  # for translation_tables
+  my @ttables;
+
+  # to create labels
+  my ($name,$exon);
+  my @range; my @cdsexons; my @labels;
+
+  # maybe here also could be added special case when there is no CDS feature
+  # in the entry (e.g. tRNA entry -> TOCHECK).
+  # let's deal with the special case in which there is just one gene per entry
+  # usually without /gene qualifier
+  my @cds=@{$entry->{'CDS'}};
+
+  my $skipgenematch=0;
+  if (scalar(@cds) == 1) {
+    #carp "Note: only one CDS in this entry. Treating all features found in entry as Gene features.";
+    $skipgenematch=1;
+  }
+
+  my ($cds_begin,$cds_end,$proximity);
+  if ($cds_position) { # if a position has been requested
+    my @cds_exons=@{$cds[$cds_position-1]->{'range'}};
+    ($cds_begin,$cds_end)=($cds_exons[0]->[0],$cds_exons[-1]->[1]); # begin and end of CDS
+    $gene_name=$cds[$cds_position-1]->{'qualifiers'}->{'gene'};
+    # DEBUG
+    unless ($skipgenematch) {
+      carp "--DEBUG-- cdsbegin $cds_begin cdsend $cds_end--------";
+    }
+    $proximity=100; # proximity CONSTANT to decide whether a feature "belongs" to the CDS
+  }
+
+  for $entryfeature (@entryfeatures) { # get only features for the desired gene
+    if (($skipgenematch)||(($cds_position)&&($self->_checkfeatureproximity($entryfeature->{'range'},$cds_begin,$cds_end,$proximity)))||(!($cds_position)&&($entryfeature->{'qualifiers'}->{'gene'} eq "$gene_name"))) {
+      push(@genefeatures,$entryfeature);
+
+      my @range=@{$entryfeature->{'range'}};
+      $name=$entryfeature->{'name'};
+      my %qualifierhash=%{$entryfeature->{'qualifiers'}};
+      if ($name eq "CDS") { # that has range containing array of exons
+
+	# swissprot crossindexing (if without SRS support it will fill array
+	# with zeros and do nothing
+	if ($getswissprotinfo) {
+	  $swissacc=$entryfeature->{'qualifiers'}->{'db_xref'};
+	  $swisshash=$self->get_swisshash($swissacc);
+	  #$self->printswissprot($swisshash); # DEBUG
+	  push (@swisshashes,$swisshash);
+	}
+
+	push (@ttables,$entryfeature->{'qualifiers'}->{'transl_table'}); # undef if not specified
+	
+	# create labels array
+	for $exon (@range) {
+	  push(@labels,$exon->[0],$exon->[1]); # start and end of every exon of the CDS
+	}
+	push (@transcripts,$entryfeature->{'range'});
+      } else {
+	# "simplifying" the joinedlocation features. I.e. changing them from
+	# multijoined ones to simple plain start-end features, taking only
+	# the start of the first "exon" and the end of the last "exon" as
+	# start and end of the entire feature
+	if ($entryfeature->{'locationtype'} && $entryfeature->{'locationtype'} eq "joined") { # joined location
+	  @range=($range[0]->[0],$range[-1]->[1]);
+	}
+	push(@labels,$range[0],$range[1]); # start and end of every feature
+	if ($name eq "exon") {
+	  $desc=$entryfeature->{'qualifiers'}->{'number'};
+	  if ($entryfeature->{'qualifiers'}->{'note'}) {
+	    if ($desc) {
+	      $desc .= "|" . $entryfeature->{'qualifiers'}->{'note'};
+	    } else {
+	      $desc = $entryfeature->{'qualifiers'}->{'note'};
+	    }
+	  }
+	  push (@exondescs,$desc || "unknown");
+	  push(@exons,\@range);
+	}
+	if ($name eq "intron") {
+ 	  $desc=$entryfeature->{'qualifiers'}->{'number'};
+	  if ($desc) {
+	    $desc .= "|" . $entryfeature->{'qualifiers'}->{'note'};
+	  } else {
+	    $desc = $entryfeature->{'qualifiers'}->{'note'};
+	  }
+	  push (@introndescs,$desc || "unknown"); 
+	  push(@introns,\@range);
+	}
+	if (($name eq "prim_transcript")||($name eq "mRNA")) { push(@prim_transcripts,\@range); }
+	if ($name eq "repeat_unit") { push(@repeat_units,\@range);
+	  $rpt_family=$entryfeature->{'qualifiers'}->{'rpt_family'};
+	  push (@repeat_units_family,$rpt_family || "unknown");
+	}
+	if ($name eq "repeat_region") { push(@repeat_regions,\@range);
+	  $rpt_family=$entryfeature->{'qualifiers'}->{'rpt_family'};
+	  push (@repeat_regions_family,$rpt_family || "unknown");
+	}
+      }
+    }
+  }
+  unless ($gene_name) { $gene_name="cds-position:".$cds_position; }
+  my %genefeatureshash;
+  $genefeatureshash{gene_name}=$gene_name;
+  $genefeatureshash{genefeatures}=\@genefeatures;
+  $genefeatureshash{labels}=\@labels;
+  $genefeatureshash{ttables}=\@ttables;
+  $genefeatureshash{swisshashes}=\@swisshashes;
+  $genefeatureshash{transcripts}=\@transcripts;
+  $genefeatureshash{exons}=\@exons;
+  $genefeatureshash{exondescs}=\@exondescs;
+  $genefeatureshash{introns}=\@introns;
+  $genefeatureshash{introndescs}=\@introndescs;
+  $genefeatureshash{prim_transcripts}=\@prim_transcripts;
+  $genefeatureshash{repeat_units}=\@repeat_units;
+  $genefeatureshash{repeat_regions}=\@repeat_regions;
+  $genefeatureshash{repeat_units_family}=\@repeat_units_family;
+  $genefeatureshash{repeat_regions_family}=\@repeat_regions_family;
+  return (\%genefeatureshash);
+}
+
+
+# used by _findgenefeatures, when a CDS at a certain position is requested,
+# to retrieve only features quite close to the wanted CDS.
+# Args: range hashref, begin and end positions of the CDS, $proximity
+# $proximity holds the maximum distance between the extremes of the CDS
+# and of the feature under exam.
+# Returns: boolean
+sub _checkfeatureproximity {
+  my ($self,$range,$cds_begin,$cds_end,$proximity)=@_;
+  my @range=@{$range};
+  my ($begin,$end,$strand);
+  if (ref($range[0]) eq "ARRAY") { # like in CDS, whose range equivals to exons
+    ($begin,$end,$strand)=($range[0]->[0],$range[-1]->[1],$range[0]->[2]);
+  } else {
+    ($begin,$end,$strand)=@range;
+  }
+  if ($cds_begin > $cds_end) { # i.e. reverse strand CDS
+    ($cds_begin,$cds_end)=($cds_end,$cds_begin); # swap boundaries
+  }
+  if ($strand == -1) { # reverse strand
+    ($begin,$end)=($end,$begin); # swap boundaries
+  }
+  if (($cds_begin-$end)>$proximity) {
+    carp "--DEBUG-- feature rejected: begin $begin end $end -------";
+    return (0);
+  }
+  if (($begin-$cds_end)>$proximity) {
+    carp "--DEBUG-- feature rejected: begin $begin end $end -------";
+    return (0);
+  }
+  carp "--DEBUG-- feature accepted: begin $begin end $end -------";
+  return (1); # otherwise ok, feature considered next to CDS
+}
+
+
+# function that calls the external program "align" (on the fasta2 package)
+# to create an alignment between two sequences, returning the aligned
+# strings and the codes for the identity (:: ::::)
+
+sub _get_alignment {
+  my ($self,$seq1,$seq2)=@_;
+  my $fastafile1="/tmp/tmpfastafile1";
+  my $fastafile2="/tmp/tmpfastafile2";
+  my $grepcut='egrep -v "[[:digit:]]|^ *$|sequences" | cut -c8-'; # grep/cut
+  my $alignprogram="/usr/local/etc/bioinfo/fasta2/align -s /usr/local/etc/bioinfo/fasta2/idnaa.mat $fastafile1 $fastafile2 2>/dev/null | $grepcut"; # ALIGN
+  open my $TMPFASTAFILE1,">$fastafile1" || croak "Cannot write into $fastafile1 for aa alignment";
+  open my $TMPFASTAFILE2,">$fastafile2" || croak "Cannot write into $fastafile1 for aa alignment";
+  print $TMPFASTAFILE1 ">firstseq\n$seq1\n";
+  print $TMPFASTAFILE2 ">secondseq\n$seq2\n";
+  close $TMPFASTAFILE1;
+  close $TMPFASTAFILE2;
+  my $alignment=`$alignprogram`;
+  my @alignlines=split(/\n/,$alignment);
+  my ($linecount,$seq1_aligned,$seq2_aligned,$codes);
+  for ($linecount=0; $linecount < @alignlines; $linecount+=3) {  
+    $seq1_aligned .= $alignlines[$linecount];
+    $codes .= $alignlines[$linecount+1];
+    $seq2_aligned .= $alignlines[$linecount+2];
+  }
+  return ($seq1_aligned,$seq2_aligned,$codes);
+}
+
+# common part of the function to create a novel liveseq gene structure
+# from an amino acid sequence, using codon usage frequencies
+# args: codon_usage_array transltableid aasequence gene_name
+sub _common_novelaasequence2gene {
+  my ($species_codon_usage,$ttabid,$aasequence,$gene_name)=@_;
+  my @species_codon_usage=@{$species_codon_usage};
+  my @codon_usage_label= 
+      qw (cga cgc cgg cgt aga agg cta ctc ctg ctt tta ttg tca tcc tcg
+      tct agc agt aca acc acg act cca ccc ccg cct gca gcc gcg gct gga
+      ggc ggg ggt gta gtc gtg gtt aaa aag aac aat caa cag cac cat gaa
+      gag gac gat tac tat tgc tgt ttc ttt ata atc att atg tgg taa tag
+      tga);
+  my ($i,$j);
+  my %codon_usage_value;
+  my $aa_codon_total;
+  for ($i=0;$i<64;$i++) {
+    $codon_usage_value{$codon_usage_label[$i]}=$species_codon_usage[$i];
+  }
+
+  my $CodonTable  = Bio::Tools::CodonTable->new ( -id => $ttabid );
+  my @aminoacids = split(//,uc($aasequence));
+  my @alt_codons; my ($relativeusage,$dnasequence,$chosen_codon,$dice,$partial,$thiscodon);
+  for $i (@aminoacids) {
+    @alt_codons = $CodonTable->revtranslate($i);
+    unless (@alt_codons) {
+      carp "No reverse translation possible for aminoacid \'$i\'";
+      $dnasequence .= "???";
+    } else {
+      $aa_codon_total=0;
+      for $j (@alt_codons) {
+	$aa_codon_total+=$codon_usage_value{$j};
+      }
+      # print "aminoacid $i, codonchoice: "; # verbose
+      #$partial=0;
+      #for $j (@alt_codons) {
+	#printf "%s %.2f ",$j,$partial+$codon_usage_value{$j}/$aa_codon_total;
+	#$partial+=($codon_usage_value{$j}/$aa_codon_total);
+      #}
+      #print "\n";
+      $dice=rand(1);
+      #print "roulette: $dice\n"; # verbose
+      $partial=0;
+      $chosen_codon="";
+      CODONCHOICE:
+      for $j (0.. at alt_codons) { # last one not accounted
+	$thiscodon=$alt_codons[$j];
+	$relativeusage=($codon_usage_value{$thiscodon}/$aa_codon_total);
+	if ($dice < $relativeusage+$partial) {
+	  $chosen_codon=$thiscodon;
+	  last CODONCHOICE;
+	} else {
+	  $partial += $relativeusage;
+	}
+      }
+      unless ($chosen_codon) {
+	$chosen_codon = $alt_codons[-1]; # the last one
+      }
+      # print ".....adding $chosen_codon\n"; # verbose
+      $dnasequence .= $chosen_codon;
+    }
+  }
+
+  my $dna = Bio::LiveSeq::DNA->new(-seq => $dnasequence);
+  my $min=1;
+  my $max=length($dnasequence);
+  my $exon = Bio::LiveSeq::Exon->new(-seq => $dna, -start => $min, -end => $max, -strand => 1);
+  my @exons=($exon);
+  my $transcript = Bio::LiveSeq::Transcript->new(-exons => \@exons);
+  $transcript->translation_table($ttabid);
+  my @transcripts=($transcript);
+  my $translation = Bio::LiveSeq::Translation->new(-transcript => $transcript);
+  my @translations=($translation);
+  my %features=(DNA => $dna, Transcripts => \@transcripts, Translations => \@translations);
+  my $gene = Bio::LiveSeq::Gene->new(-name => $gene_name, -features => \%features, -upbound => $min, -downbound => $max);
+
+  # creation of gene
+  unless ($gene) { # if $gene == 0 it means problems in hash2gene
+    carp "Error in Gene creation phase";
+    return (0);
+  }
+  return $gene;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/IO/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,29 @@
+# $Id: README,v 1.4 2006/01/13 09:51:59 heikki Exp $
+
+README for Bio::LiveSeq::IO
+
+LiveSeq objects representing known gene structures and their sequences
+have to be created from nucleotide sequence files. The current IO
+files do it by reading in EMBL entries and parsing out sequences as
+well as CDS, exon and primary_transcript features from the feature
+table.
+
+Bio::LiveSeq::IO::Loader
+
+	is a superclass holding methods common to other methods.
+
+Bio::LiveSeq::IO::BioPerl
+
+	is the preferred method which uses Bio::DB::EMBL to retrive
+	sequences over the Web by accession number.
+
+Bio::LiveSeq::IO::SRS
+
+        outdated, removed from distribution 13 Jan 2006
+
+	retrieves sequences from a local installation of SRS. It needs
+        srsperl.pm which is part of SRS. SRS is short for Sequence
+        Retrieval System, a comprehensive program suite for indexing
+        and serving biological databases. SRS is a product of Lion
+        BioSciences (http://www.lionbio.co.uk/). The license for
+        academic users is free.

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Intron.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Intron.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Intron.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+# $Id: Intron.pm,v 1.9.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Intron
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Intron - Range abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+Class for INTRON objects. They consist of a beginlabel, an endlabel (both
+referring to a LiveSeq DNA object) and a strand.
+The strand could be 1 (forward strand, default), -1 (reverse strand).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Intron;
+
+use strict;
+use base qw(Bio::LiveSeq::Range);
+
+=head2 new
+
+  Title   : new
+  Usage   : $intron1=Bio::LiveSeq::Intron->new(-seq => $objref,
+					       -start => $startlabel,
+					       -end => $endlabel, 
+                                               -strand => 1
+					       );
+
+  Function: generates a new Bio::LiveSeq::Intron
+  Returns : reference to a new object of class Intron
+  Errorcode -1
+  Args    : two labels and an integer
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,406 @@
+# $Id: Mutation.pm,v 1.13.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::LiveSeq::Mutation
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Mutation - Mutation event descriptor class
+
+=head1 SYNOPSIS
+
+  # full descrition of a point mutation
+  $mutation1a = Bio::LiveSeq::Mutation->new ( -seq => 'A',
+					      -seqori => 'T',
+					      -pos  => 100,
+					      -len => 1 # optional, defaults to length(seq)
+					     );
+
+  # minimal information for a point mutation
+  $mutation1b = Bio::LiveSeq::Mutation->new ( -seq => 'A',
+					      -pos  => 100
+					      );
+  # insertion
+  $mutation2 = Bio::LiveSeq::Mutation->new ( -seq => 'ATT',
+					     -pos  => 100,
+					     -len => 0
+					     );
+  # deletion
+  $mutation3 = Bio::LiveSeq::Mutation->new ( -seq => '',  # optional
+					     -seqori => 'TTG',  # optional
+					     -pos  => 100
+					     -len => 3
+					     );
+  # complex
+  $mutation4 = Bio::LiveSeq::Mutation->new ( -seq => 'CC', 
+					     -seqori => 'TTG',  # optional
+					     -pos  => 100
+					     -len => 3
+					     );
+
+
+=head1 DESCRIPTION
+
+This class describes a local mutation event using minimalistic
+description.  It is not necessary to know anything about the original
+sequence. You need to give the changed sequence, the position of the
+mutation in the (unidentified) reference sequence, and the length of
+the affected subsequence in the reference sequence. If the original
+allele sequence is given, the objects applying the mutation into the
+reference sequence (e.g. L<Bio::LiveSeq::Mutator>) might check for its
+validity.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::LiveSeq::Mutation;
+use strict;
+
+# Object preamble - inheritance
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my($class, at args) = @_;
+    my $self;
+    $self = {};
+    bless $self, $class;
+
+    my ($seq, $seqori, $pos, $len, $label) =
+	    $self->_rearrange([qw(SEQ
+				  SEQORI
+				  POS
+				  LEN
+				  )],
+			      @args);
+
+    $seq && $self->seq($seq);
+    $seqori && $self->seqori($seqori);
+    $pos && $self->pos($pos);
+    defined($len) && $self->len($len); # defined() added otherwise won't work for len==0
+    
+    return $self; # success - we hope!
+}
+
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $obj->seq();
+ Function: 
+
+            Sets and returns the mutated sequence. No checking is done
+            to validate the symbols.
+
+ Example : 
+ Returns : string
+ Args    : integer
+
+=cut
+
+
+sub seq {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'seq'} = $value;
+    }
+    return $self->{'seq'} || '';
+}
+
+
+=head2 seqori
+
+ Title   : seqori
+ Usage   : $obj->seqori();
+ Function: 
+
+            Sets and returns the original subsequence in the reference
+            sequence. No checking is done to validate the symbols.
+            Optional value.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub seqori {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'seqori'} = $value;
+    }
+    return $self->{'seqori'} || '';
+}
+
+
+=head2 pos
+
+ Title   : pos
+ Usage   : $obj->pos();
+ Function: 
+
+            Sets and returns the position of the first element in the
+            sequence.
+
+ Example : 
+ Returns : string
+ Args    : integer
+
+=cut
+
+
+sub pos {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if ( $value !~ /^([+-])?\d+$/ ) {
+	    $self->throw("[$value] for pos has to be an integer\n");
+	} else {
+	    $self->{'pos'} = $value;
+	}
+    }
+    return $self->{'pos'};
+}
+
+=head2 len
+
+ Title   : len
+ Usage   : $obj->len();
+ Function: 
+
+            Sets and returns the len of the affected original allele
+            sequence.  If value is not set, defaults to the lenght of
+            the mutated sequence (seq).
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub len {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'len'} = $value;
+  }
+   if ( ! exists $self->{'len'} ) {
+       return length $self->{'seq'};
+   }
+   return $self->{'len'};
+}
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function: 
+
+            Sets and returns the label of the affected original allele
+            location. Label is a stable identifier whereas location
+            can be changed by mutations. Label comes from
+            l<Bio::LiveSeq::Gene>.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub label {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'label'} = $value;
+   }
+   if ( ! exists $self->{'label'} ) {
+       return;
+   } 
+   return $self->{'label'};
+}
+
+
+=head2 transpos
+
+ Title   : transpos
+ Usage   : $obj->transpos();
+ Function: 
+
+            Sets and returns the transcript position of the mutation.
+            Set when associated with a reference sequence. Value
+            depends on reference molecule and the co-ordinate system
+            used.
+
+ Example : 
+ Returns : string
+ Args    : integer
+
+=cut
+
+
+sub transpos {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if ( $value !~ /^([+-])?\d+$/ ) {
+	    $self->throw("[$value] for transpos has to be an integer\n");
+	} else {
+	    $self->{'transpos'} = $value;
+	}
+    }
+    return $self->{'transpos'};
+}
+
+
+=head2 issue
+
+ Title   : issue
+ Usage   : $obj->issue();
+ Function: 
+
+            Sets and returns the position of the mutation in an array
+            of mutations to be issued. Set after the validity of the
+            mutation has been confirmed.
+
+ Example : 
+ Returns : string
+ Args    : integer
+
+=cut
+
+
+sub issue {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if ( $value !~ /^([+-])?\d+$/ ) {
+	    $self->throw("[$value] for issue has to be an integer\n");
+	} else {
+	    $self->{'issue'} = $value;
+	}
+    }
+    return $self->{'issue'};
+}
+
+
+=head2 prelabel
+
+ Title   : prelabel
+ Usage   : $obj->prelabel();
+ Function: 
+
+            Sets and returns the prelabel of the affected original allele
+            location. Prelabel is a stable identifier whereas location
+            can be changed by mutations. Prelabel comes from
+            l<Bio::LiveSeq::Gene>.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub prelabel {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'prelabel'} = $value;
+   }
+   if ( ! exists $self->{'prelabel'} ) {
+       return;
+   } 
+   return $self->{'prelabel'};
+}
+
+
+=head2 postlabel
+
+ Title   : postlabel
+ Usage   : $obj->postlabel();
+ Function: 
+
+            Sets and returns the postlabel of the affected original allele
+            location. Postlabel is a stable identifier whereas location
+            can be changed by mutations. Postlabel comes from
+            l<Bio::LiveSeq::Gene>.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub postlabel {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'postlabel'} = $value;
+   }
+   if ( ! exists $self->{'postlabel'} ) {
+       return;
+   } 
+   return $self->{'postlabel'};
+}
+
+
+=head2 lastlabel
+
+ Title   : lastlabel
+ Usage   : $obj->lastlabel();
+ Function: 
+
+            Sets and returns the lastlabel of the affected original allele
+            location. Lastlabel is a stable identifier whereas location
+            can be changed by mutations. Lastlabel comes from
+            l<Bio::LiveSeq::Gene>.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub lastlabel {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'lastlabel'} = $value;
+   }
+   if ( ! exists $self->{'lastlabel'} ) {
+       return;
+   } 
+   return $self->{'lastlabel'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Mutator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1439 @@
+# $Id: Mutator.pm,v 1.33.4.3 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Mutator
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Mutator - Package mutating LiveSequences
+
+=head1 SYNOPSIS
+
+  # $gene is a Bio::LiveSeq::Gene object
+  my $mutate = Bio::LiveSeq::Mutator->new('-gene' => $gene,
+  					  '-numbering' => "coding"
+  					   );
+  # $mut is a Bio::LiveSeq::Mutation object
+  $mutate->add_Mutation($mut);
+  # $results is a Bio::Variation::SeqDiff object
+  my $results=$mutate->change_gene();
+  if ($results) {
+      my $out = Bio::Variation::IO->new( '-format' => 'flat');
+      $out->write($results);
+  }
+
+=head1 DESCRIPTION
+
+This class mutates Bio::LiveSeq::Gene objects and returns a
+Bio::Variation::SeqDiff object. Mutations are described as
+Bio::LiveSeq::Mutation objects. See L<Bio::LiveSeq::Gene>,
+L<Bio::Variation::SeqDiff>, and L<Bio::LiveSeq::Mutation> for details.
+
+=head1 FEEDBACK
+
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho & Joseph A.L. Insana
+
+  Email:  heikki-at-bioperl-dot-org
+          insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+  The rest of the documentation details each of the object
+  methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Mutator;
+use strict;
+
+use Bio::Variation::SeqDiff;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::RNAChange;
+use Bio::Variation::AAChange;
+use Bio::Variation::Allele;
+use Bio::LiveSeq::Mutation;
+
+#use integer;
+# Object preamble - inheritance
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my($class, at args) = @_;
+    my $self;
+    $self = {};
+    bless $self, $class;
+
+    my ($gene, $numbering) =
+	    $self->_rearrange([qw(GENE
+				  NUMBERING
+				  )],
+			      @args);
+
+    $self->{ 'mutations' } = [];
+
+    $gene && $self->gene($gene);
+    $numbering && $self->numbering($numbering);
+
+    #class constant;
+    $self->{'flanklen'} = 25;
+    return $self; # success - we hope!
+}
+
+=head2 gene
+
+ Title   : gene
+ Usage   : $mutobj = $obj->gene;
+         : $mutobj = $obj->gene($objref);
+ Function:
+
+           Returns or sets the link-reference to a
+           Bio::LiveSeq::Gene object. If no value has ben set, it
+           will return undef
+
+ Returns : an object reference  or undef
+ Args    : a Bio::LiveSeq::Gene
+
+See L<Bio::LiveSeq::Gene> for more information.
+
+=cut
+
+sub gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::LiveSeq::Gene') ) {
+	  $self->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'gene'} = $value;
+      }
+  }
+  unless (exists $self->{'gene'}) {
+      return;
+  } else {
+      return $self->{'gene'};
+  }
+}
+
+
+=head2 numbering
+
+ Title   : numbering
+ Usage   : $obj->numbering();
+ Function:
+
+            Sets and returns coordinate system used in positioning the
+            mutations. See L<change_gene> for details.
+
+ Example :
+ Returns : string
+ Args    : string (coding [transcript number] | gene | entry)
+
+=cut
+
+
+sub numbering {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if ($value =~ /(coding)( )?(\d+)?/ or $value eq 'entry' or $value eq 'gene') {
+	    $self->{'numbering'} = $value;
+	} else { # defaulting to 'coding'
+	    $self->{'numbering'} = 'coding';
+	}
+    }
+    unless (exists $self->{'numbering'}) {
+	return 'coding';
+    } else {
+	return $self->{'numbering'};
+    }
+}
+
+=head2 add_Mutation
+
+ Title   : add_Mutation
+ Usage   : $self->add_Mutation($ref)
+ Function: adds a Bio::LiveSeq::Mutation object
+ Example :
+ Returns :
+ Args    : a Bio::LiveSeq::Mutation
+
+See L<Bio::LiveSeq::Mutation> for more information.
+
+=cut
+
+sub add_Mutation{
+    my ($self,$value) = @_;
+    if( $value->isa('Bio::Liveseq::Mutation') ) {
+	my $com = ref $value;
+	$self->throw("Is not a Mutation object but a [$com]" );
+	return;
+    }
+    if (! $value->pos) {
+	$self->warn("No value for mutation position in the sequence!");
+	return;
+    }
+    if (! $value->seq && ! $value->len) {
+	$self->warn("Either mutated sequence or length of the deletion must be given!");
+	return;
+    }
+    push(@{$self->{'mutations'}},$value);
+}
+
+=head2 each_Mutation
+
+ Title   : each_Mutation
+ Usage   : foreach $ref ( $a->each_Mutation )
+ Function: gets an array of Bio::LiveSeq::Mutation objects
+ Example :
+ Returns : array of Mutations
+ Args    :
+
+See L<Bio::LiveSeq::Mutation> for more information.
+
+=cut
+
+sub each_Mutation{
+   my ($self) = @_;
+   return @{$self->{'mutations'}};
+}
+
+
+=head2 mutation
+
+ Title   : mutation
+ Usage   : $mutobj = $obj->mutation;
+         : $mutobj = $obj->mutation($objref);
+ Function:
+
+           Returns or sets the link-reference to the current mutation
+           object.  If the value is not set, it will return undef.
+           Internal method.
+
+ Returns : an object reference  or undef
+
+=cut
+
+
+sub mutation {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::LiveSeq::Mutation') ) {
+	  $self->throw("Is not a Bio::LiveSeq::Mutation object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'mutation'} = $value;
+      }
+  }
+  unless (exists $self->{'mutation'}) {
+      return;
+  } else {
+      return $self->{'mutation'};
+  }
+}
+
+=head2 DNA
+
+ Title   : DNA
+ Usage   : $mutobj = $obj->DNA;
+         : $mutobj = $obj->DNA($objref);
+ Function:
+
+           Returns or sets the reference to the LiveSeq object holding
+           the reference sequence. If there is no link, it will return
+           undef.
+           Internal method.
+
+ Returns : an object reference or undef
+
+=cut
+
+sub DNA {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::LiveSeq::DNA') and ! $value->isa('Bio::LiveSeq::Transcript') ) {
+	  $self->throw("Is not a Bio::LiveSeq::DNA/Transcript object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'DNA'} = $value;
+      }
+  }
+  unless (exists $self->{'DNA'}) {
+      return;
+  } else {
+      return $self->{'DNA'};
+  }
+}
+
+
+=head2 RNA
+
+ Title   : RNA
+ Usage   : $mutobj = $obj->RNA;
+         : $mutobj = $obj->RNA($objref);
+ Function:
+
+           Returns or sets the reference to the LiveSeq object holding
+           the reference sequence. If the value is not set, it will return
+           undef.
+           Internal method.
+
+ Returns : an object reference  or undef
+
+=cut
+
+
+sub RNA {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::LiveSeq::Transcript') ) {
+	  $self->throw("Is not a Bio::LiveSeq::RNA/Transcript object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'RNA'} = $value;
+      }
+  }
+  unless (exists $self->{'RNA'}) {
+      return;
+  } else {
+      return $self->{'RNA'};
+  }
+}
+
+
+=head2 dnamut
+
+ Title   : dnamut
+ Usage   : $mutobj = $obj->dnamut;
+         : $mutobj = $obj->dnamut($objref);
+ Function:
+
+           Returns or sets the reference to the current DNAMutation object.
+           If the value is not set, it will return undef.
+           Internal method.
+
+ Returns : a Bio::Variation::DNAMutation object or undef
+
+See L<Bio::Variation::DNAMutation> for more information.
+
+=cut
+
+
+sub dnamut {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::DNAMutation') ) {
+	  $self->throw("Is not a Bio::Variation::DNAMutation object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'dnamut'} = $value;
+      }
+  }
+  unless (exists $self->{'dnamut'}) {
+      return;
+  } else {
+      return $self->{'dnamut'};
+  }
+}
+
+
+=head2 rnachange
+
+ Title   : rnachange
+ Usage   : $mutobj = $obj->rnachange;
+         : $mutobj = $obj->rnachange($objref);
+ Function:
+
+           Returns or sets the reference to the current RNAChange object.
+           If the value is not set, it will return undef.
+           Internal method.
+
+ Returns : a Bio::Variation::RNAChange object or undef
+
+See L<Bio::Variation::RNAChange> for more information.
+
+=cut
+
+
+sub rnachange {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::RNAChange') ) {
+	  $self->throw("Is not a Bio::Variation::RNAChange object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'rnachange'} = $value;
+      }
+  }
+  unless (exists $self->{'rnachange'}) {
+      return;
+  } else {
+      return $self->{'rnachange'};
+  }
+}
+
+
+=head2 aachange
+
+ Title   : aachange
+ Usage   : $mutobj = $obj->aachange;
+         : $mutobj = $obj->aachange($objref);
+ Function:
+
+           Returns or sets the reference to the current AAChange object.
+           If the value is not set, it will return undef.
+           Internal method.
+
+ Returns : a Bio::Variation::AAChange object or undef
+
+See L<Bio::Variation::AAChange> for more information.
+
+=cut
+
+
+sub aachange {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::AAChange') ) {
+	  $self->throw("Is not a Bio::Variation::AAChange object but a [$value]");
+	  return;
+      }
+      else {
+	  $self->{'aachange'} = $value;
+      }
+  }
+  unless (exists $self->{'aachange'}) {
+      return;
+  } else {
+      return $self->{'aachange'};
+  }
+}
+
+
+=head2 exons
+
+ Title   : exons
+ Usage   : $mutobj = $obj->exons;
+         : $mutobj = $obj->exons($objref);
+ Function:
+
+           Returns or sets the reference to a current array of Exons.
+           If the value is not set, it will return undef.
+           Internal method.
+
+ Returns : an array of Bio::LiveSeq::Exon objects or undef
+
+See L<Bio::LiveSeq::Exon> for more information.
+
+=cut
+
+
+sub exons {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      $self->{'exons'} = $value;
+  }
+  unless (exists $self->{'exons'}) {
+      return;
+  } else {
+      return $self->{'exons'};
+  }
+}
+
+=head2 change_gene_with_alignment
+
+ Title   : change_gene_with_alignment
+ Usage   : $results=$mutate->change_gene_with_alignment($aln);
+
+ Function:
+
+           Returns a Bio::Variation::SeqDiff object containing the
+           results of the changes in the alignment. The alignment has
+           to be pairwise and have one sequence named 'QUERY', the
+           other one is assumed to be a part of the sequence from
+           $gene.
+
+           This method offers a shortcut to change_gene and
+           automates the creation of Bio::LiveSeq::Mutation objects.
+           Use it with almost identical sequnces, e.g. to locate a SNP.
+
+ Args    : Bio::SimpleAlign object representing a short local alignment
+ Returns : Bio::Variation::SeqDiff object or 0 on error
+
+See L<Bio::LiveSeq::Mutation>, L<Bio::SimpleAlign>, and
+L<Bio::Variation::SeqDiff>  for more information.
+
+=cut
+
+sub change_gene_with_alignment {
+    my ($self, $aln) = @_;
+
+    #
+    # Sanity checks
+    #
+
+    $self->throw("Argument is not a Bio::SimpleAlign object but a [$aln]")
+	unless $aln->isa('Bio::SimpleAlign');
+    $self->throw("'Pairwise alignments only, please") 
+	if $aln->no_sequences != 2;
+
+    # find out the order the two sequences are given
+    my $queryseq_pos = 1; #default
+    my $refseq_pos = 2;
+    unless ($aln->get_seq_by_pos(1)->id eq 'QUERY') {
+	carp('Query sequence has to be named QUERY') 
+	    if $aln->get_seq_by_pos(2)->id ne 'QUERY';
+	$queryseq_pos = 2; # alternative
+	$refseq_pos = 1;
+    }
+
+    # trim the alignment
+    my $start =  $aln->column_from_residue_number('QUERY', 1);
+    my $end =  $aln->column_from_residue_number('QUERY', 
+						$aln->get_seq_by_pos($queryseq_pos)->end );
+    
+    my $aln2 = $aln->slice($start, $end);
+
+    #
+    # extracting mutations
+    #
+
+    my $cs = $aln2->consensus_string(51);
+    my $queryseq = $aln2->get_seq_by_pos($queryseq_pos);
+    my $refseq = $aln2->get_seq_by_pos($refseq_pos);
+
+    while ( $cs =~ /(\?+)/g) {
+	# pos in local coordinates
+	my $pos = pos($cs) - length($1) + 1;
+	my $mutation = create_mutation($self, 
+				       $refseq, 
+				       $queryseq, 
+				       $pos, 
+				       CORE::length($1)
+				       );
+	# reset pos to refseq coordinates
+	$pos +=  $refseq->start - 1;
+	$mutation->pos($pos);
+
+        $self->add_Mutation($mutation);
+    }
+    return $self->change_gene();
+}
+
+=head2 create_mutation
+
+ Title   : create_mutation
+ Usage   : 
+ Function:
+
+           Formats sequence differences from two sequences into
+           Bio::LiveSeq::Mutation objects which can be applied to a
+           gene.
+
+           To keep it generic, sequence arguments need not to be
+           Bio::LocatableSeq. Coordinate change to parent sequence
+           numbering needs to be done by the calling code.
+
+           Called from change_gene_with_alignment
+
+ Args    : Bio::PrimarySeqI inheriting object for the reference sequence
+           Bio::PrimarySeqI inheriting object for the query sequence
+           integer for the start position of the local sequence difference
+           integer for the length of the sequence difference
+ Returns : Bio::LiveSeq::Mutation object 
+
+=cut
+
+sub create_mutation {
+    my ($self, $refseq, $queryseq, $pos, $len) = @_;
+    
+    $self->throw("Is not a Bio::PrimarySeqI object but a [$refseq]") 
+	unless $refseq->isa('Bio::PrimarySeqI');
+    $self->throw("Is not a Bio::PrimarySeqI object but a [$queryseq]") 
+	unless $queryseq->isa('Bio::PrimarySeqI');
+    $self->throw("Position is not a positive integer but [$pos]") 
+	unless $pos =~ /^\+?\d+$/;
+    $self->throw("Length is not a positive integer but [$len]") 
+	unless $len =~ /^\+?\d+$/;
+
+    my $mutation;
+    my $refstring = $refseq->subseq($pos, $pos + $len - 1);
+    my $varstring = $queryseq->subseq($pos, $pos + $len - 1);
+    
+    if ($len == 1 and $refstring =~ /[^\.\-\*\?]/ and 
+	$varstring  =~ /[^\.\-\*\?]/ ) { #point
+	$mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
+						-pos => $pos,
+						);
+    }
+    elsif ( $refstring =~ /^[^\.\-\*\?]+$/ and 
+	    $varstring  !~ /^[^\.\-\*\?]+$/ ) { # deletion
+	$mutation = new Bio::LiveSeq::Mutation (-pos => $pos,
+						-len => $len
+						);
+    }
+    elsif ( $refstring !~ /^[^\.\-\*\?]+$/ and 
+	    $varstring  =~ /^[^\.\-\*\?]+$/ ) { # insertion
+	$mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
+						-pos => $pos,
+						-len => 0
+						);
+    } else { # complex
+	$mutation = new Bio::LiveSeq::Mutation (-seq => $varstring,
+						-pos => $pos,
+						-len => $len
+						);
+    }
+    
+    return $mutation;
+}
+
+=head2 change_gene
+
+ Title   : change_gene
+ Usage   : my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene,
+						   numbering => "coding"
+						   );
+           # $mut is Bio::LiveSeq::Mutation object
+           $mutate->add_Mutation($mut);
+           my $results=$mutate->change_gene();
+
+ Function:
+
+           Returns a Bio::Variation::SeqDiff object containing the
+           results of the changes performed according to the
+           instructions present in Mutation(s).  The -numbering
+           argument decides what molecule is being changed and what
+           numbering scheme being used:
+
+            -numbering => "entry"
+
+               determines the DNA level, using the numbering from the
+               beginning of the sequence
+
+            -numbering => "coding"
+
+               determines the RNA level, using the numbering from the
+               beginning of the 1st transcript
+
+               Alternative transcripts can be used by specifying
+               "coding 2" or "coding 3" ...
+
+            -numbering => "gene"
+
+               determines the DNA level, using the numbering from the
+               beginning of the 1st transcript and inluding introns.
+               The meaning equals 'coding' if the reference molecule
+               is cDNA.
+
+ Args    : Bio::LiveSeq::Gene object
+           Bio::LiveSeq::Mutation object(s)
+           string specifying a numbering scheme (defaults to 'coding')
+ Returns : Bio::Variation::SeqDiff object or 0 on error
+
+=cut
+
+sub change_gene {
+    my ($self) = @_;
+
+    #
+    # Sanity check
+    #
+    unless ($self->gene) {
+	$self->warn("Input object Bio::LiveSeq::Gene is not given");
+	return 0;
+    }
+    #
+    # Setting the reference sequence based on -numbering
+    #
+    my @transcripts=@{$self->gene->get_Transcripts};
+    my $refseq; # will hold Bio::LiveSeq:Transcript object or Bio::LiveSeq::DNA
+
+    # 'gene' eq 'coding' if reference sequence is cDNA
+    $self->numbering ('coding') if $self->gene->get_DNA->alphabet eq 'rna' and $self->numbering eq 'gene';
+
+    if ($self->numbering =~ /(coding)( )?(\d+)?/ ) {
+	$self->numbering($1);
+	my $transnumber = $3;
+	$transnumber-- if $3; # 1 -> 0, 2 -> 1
+	if ($transnumber && $transnumber >= 0 && $transnumber <= $#transcripts) {
+	    $refseq=$transcripts[$transnumber];
+	} else {
+	    $transnumber && $self->warn("The alternative transcript number ". $transnumber+1 .
+	    "- does not exist. Reverting to the 1st transcript\n");
+	    $refseq=$transcripts[0];
+	}
+    } else {
+	$refseq=$transcripts[0]->{'seq'};
+    }
+    #
+    # Recording the state: SeqDiff object creation  ?? transcript no.??
+    #
+    my $seqDiff = Bio::Variation::SeqDiff->new(-verbose => $self->verbose);
+    $seqDiff->alphabet($self->gene->get_DNA->alphabet);
+    $seqDiff->numbering($self->numbering);
+    my ($DNAobj, $RNAobj);
+    if ($refseq->isa("Bio::LiveSeq::Transcript")) {
+	$self->RNA($refseq);
+	$self->DNA($refseq->{'seq'});
+	$seqDiff->rna_ori($refseq->seq );
+	$seqDiff->aa_ori($refseq->get_Translation->seq);
+    } else {
+	$self->DNA($refseq);
+	$self->RNA($transcripts[0]);
+	$seqDiff->rna_ori($self->RNA->seq);
+	$seqDiff->aa_ori($self->RNA->get_Translation->seq);
+    }
+    $seqDiff->dna_ori($self->DNA->seq);
+    # put the accession number into the SeqDiff object ID
+    $seqDiff->id($self->DNA->accession_number);
+
+    # the atg_offset takes in account that DNA object could be a subset of the
+    # whole entry (via the light_weight loader)
+    my $atg_label=$self->RNA->start;
+    my $atg_offset=$self->DNA->position($atg_label)+($self->DNA->start)-1;
+    $seqDiff->offset($atg_offset - 1);
+    $self->DNA->coordinate_start($atg_label);
+
+    my @exons = $self->RNA->all_Exons;
+    $seqDiff->cds_end($exons[$#exons]->end);
+
+    #
+    # Converting mutation positions to labels
+    #
+    $self->warn("no mutations"), return 0 
+	unless $self->_mutationpos2label($refseq, $seqDiff);
+
+    # need to add more than one rna & aa
+    #foreach $transcript (@transcripts) {
+    #  $seqDiff{"ori_transcript_${i}_seq"}=$transcript->seq;
+    #  $seqDiff{"ori_translation_${i}_seq"}=$transcript->get_Translation->seq;
+    #}
+
+    # do changes
+    my $k;
+    foreach my $mutation ($self->each_Mutation) {
+	next unless $mutation->label > 0;
+	$self->mutation($mutation);
+
+	$mutation->issue(++$k);
+	#
+	# current position on the transcript
+	#
+	if ($self->numbering =~ /coding/) {
+	    $mutation->transpos($mutation->pos); # transpos given by user
+	} else {
+	    #transpos of label / It will be 0 if mutating an intron, negative if upstream of ATG
+	    $mutation->transpos($self->RNA->position($mutation->label,$atg_label));
+	}
+	#
+	# Calculate adjacent labels based on the position on the current sequence
+	#
+	$mutation->prelabel($self->DNA->label(-1, $mutation->label)); # 1 before label
+	if ($mutation->len == 0) {
+	    $mutation->postlabel($mutation->label);
+	    $mutation->lastlabel($mutation->label);
+	} elsif ($mutation->len == 1) {
+	    $mutation->lastlabel($mutation->label); # last nucleotide affected
+	    $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); # $len after label
+	} else {
+	    $mutation->lastlabel($self->DNA->label($mutation->len,$mutation->label));
+	    $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel));
+	}
+	my $dnamut = $self->_set_DNAMutation($seqDiff);
+	#
+	#
+	#
+	if ($self->_rnaAffected) {
+	    $self->_set_effects($seqDiff, $dnamut);
+	}
+	elsif ($seqDiff->offset != 0 and $dnamut->region ne 'intron') {
+	    $self->_untranslated ($seqDiff, $dnamut);
+	} else {
+	    #$self->warn("Mutation starts outside coding region, RNAChange object not created");
+	}
+
+	#########################################################################
+	# Mutations are done here!                                              #
+	$refseq->labelchange($mutation->seq, $mutation->label, $mutation->len); #
+	#########################################################################
+
+	$self->_post_mutation ($seqDiff);
+
+	$self->dnamut(undef);
+	$self->rnachange(undef);
+	$self->aachange(undef);
+	$self->exons(undef);
+    }
+    # record the final state of all three sequences
+    $seqDiff->dna_mut($self->DNA->seq);
+    $seqDiff->rna_mut($self->RNA->seq);
+    if ($refseq->isa("Bio::LiveSeq::Transcript")) {
+	$seqDiff->aa_mut($refseq->get_Translation->seq);
+    } else {
+	$seqDiff->aa_mut($self->RNA->get_Translation->seq);
+    }
+
+    #$seqDiff{mut_dna_seq}=$gene->get_DNA->seq;
+    #my $i=1;
+    #foreach $transcript (@transcripts) {
+    #  $seqDiff{"mut_transcript_${i}_seq"}=$transcript->seq;
+    #  $seqDiff{"mut_translation_${i}_seq"}=$transcript->get_Translation->seq;
+    #}
+    return $seqDiff;
+}
+
+=head2 _mutationpos2label
+
+ Title   : _mutationpos2label
+ Usage   :
+ Function: converts mutation positions into labels
+ Example :
+ Returns : number of valid mutations
+ Args    : LiveSeq sequence object
+
+=cut
+
+sub _mutationpos2label {
+    my ($self, $refseq, $SeqDiff) = @_;
+    my $count;
+    my @bb = @{$self->{'mutations'}};
+    my $cc = scalar @bb;
+    foreach my $mut (@{$self->{'mutations'}}) {
+#	 if ($self->numbering eq 'gene' and $mut->pos < 1) {
+#	     my $tmp = $mut->pos;
+#	     print STDERR "pos: ", "$tmp\n";
+#	     $tmp++ if $tmp < 1;
+#	     $tmp += $SeqDiff->offset;
+#	     print STDERR "pos2: ", "$tmp\n";
+#	     $mut->pos($tmp);
+#	 }
+#	elsif ($self->numbering eq 'entry') {
+	if ($self->numbering eq 'entry') {
+	    my $tmp = $mut->pos;
+	    $tmp -= $SeqDiff->offset;
+	    $tmp-- if $tmp < 1;
+	    $mut->pos($tmp);
+	}
+
+	my $label = $refseq->label($mut->pos); # get the label for the position
+	$mut->label($label), $count++ if $label > 0 ;
+	#print STDERR "x", $mut->pos,'|' ,$mut->label, "\n";
+    }
+    return $count;
+}
+
+#
+# Calculate labels around mutated nucleotide
+#
+
+=head2 _set_DNAMutation
+
+ Title   : _set_DNAMutation
+ Usage   :
+ Function:
+
+           Stores DNA level mutation attributes before mutation into
+           Bio::Variation::DNAMutation object.  Links it to SeqDiff
+           object.
+
+ Example :
+ Returns : Bio::Variation::DNAMutation object
+ Args    : Bio::Variation::SeqDiff object
+
+See L<Bio::Variation::DNAMutation> and L<Bio::Variation::SeqDiff>.
+
+=cut
+
+sub _set_DNAMutation {
+    my ($self, $seqDiff) = @_;
+
+    my $dnamut_start = $self->mutation->label - $seqDiff->offset;
+    # if negative DNA positions (before ATG)
+    $dnamut_start-- if $dnamut_start <= 0;
+    my $dnamut_end;
+    ($self->mutation->len == 0 or $self->mutation->len == 1) ?
+	($dnamut_end = $dnamut_start) :
+	($dnamut_end = $dnamut_start+$self->mutation->len);
+    #print "start:$dnamut_start, end:$dnamut_end\n";
+    my $dnamut = Bio::Variation::DNAMutation->new(-start => $dnamut_start,
+						  -end => $dnamut_end,
+						  );
+    $dnamut->mut_number($self->mutation->issue);
+    $dnamut->isMutation(1);
+    my $da_m = Bio::Variation::Allele->new;
+    $da_m->seq($self->mutation->seq) if $self->mutation->seq;
+    $dnamut->allele_mut($da_m);
+    $dnamut->add_Allele($da_m);
+    # allele_ori
+    my $allele_ori = $self->DNA->labelsubseq($self->mutation->prelabel,
+					     undef,
+					     $self->mutation->postlabel); # get seq
+    chop $allele_ori; # chop the postlabel nucleotide
+    $allele_ori=substr($allele_ori,1); # away the prelabel nucleotide
+    my $da_o = Bio::Variation::Allele->new;
+    $da_o->seq($allele_ori) if $allele_ori;
+    $dnamut->allele_ori($da_o);
+    ($self->mutation->len == 0) ?
+	($dnamut->length($self->mutation->len)) : ($dnamut->length(CORE::length $allele_ori));
+    #print " --------------- $dnamut_start -$len-  $dnamut_end -\n";
+    $seqDiff->add_Variant($dnamut);
+    $self->dnamut($dnamut);
+    $dnamut->mut_number($self->mutation->issue);
+    # setting proof
+    if ($seqDiff->numbering eq "entry" or $seqDiff->numbering eq "gene") {
+	 $dnamut->proof('experimental');
+    } else {
+	 $dnamut->proof('computed');
+    }
+    # how many nucleotides to store upstream and downstream of the change
+    my $flanklen = $self->{'flanklen'};
+    #print  `date`, " flanking sequences start\n";
+    my $uplabel = $self->DNA->label(1-$flanklen,$self->mutation->prelabel); # this could be unavailable!
+
+    my $upstreamseq;
+    if ($uplabel > 0) {
+	 $upstreamseq =
+	     $self->DNA->labelsubseq($uplabel, undef, $self->mutation->prelabel);
+    } else { # from start (less than $flanklen nucleotides)
+	 $upstreamseq =
+	     $self->DNA->labelsubseq($self->DNA->start, undef, $self->mutation->prelabel);
+    }
+    $dnamut->upStreamSeq($upstreamseq);
+    my $dnstreamseq = $self->DNA->labelsubseq($self->mutation->postlabel, $flanklen);
+    $dnamut->dnStreamSeq($dnstreamseq); # $flanklen or less nucleotides
+    return $dnamut;
+}
+
+
+#
+### Check if mutation propagates to RNA (and AA) level
+#  
+# side effect: sets intron/exon information
+# returns a boolean value
+#
+
+sub _rnaAffected {
+    my ($self) = @_;
+    my @exons=$self->RNA->all_Exons;
+    my $RNAstart=$self->RNA->start;
+    my $RNAend=$self->RNA->end;
+    my ($firstexon,$before,$after,$i);
+    my ($rnaAffected) = 0;
+
+    # check for inserted labels (that require follows instead of <,>)
+    my $DNAend=$self->RNA->{'seq'}->end;
+    if ($self->mutation->prelabel > $DNAend or $self->mutation->postlabel > $DNAend) {
+	 #this means one of the two labels is an inserted one
+	 #(coming from a previous mutation. This would falsify all <,>
+	 #checks, so the follow() has to be used
+	 $self->warn("Attention, workaround not fully tested yet! Expect unpredictable results.\n");
+	 if (($self->mutation->postlabel==$RNAstart) or (follows($self->mutation->postlabel,$RNAstart))) {
+	     $self->warn("RNA not affected because change occurs before RNAstart");
+	 }
+	 elsif (($RNAend==$self->mutation->prelabel) or (follows($RNAend,$self->mutation->prelabel))) {
+	     $self->warn("RNA not affected because change occurs after RNAend");
+	 }
+	 elsif (scalar @exons == 1) {
+	     #no introns, just one exon
+	     $rnaAffected = 1; # then RNA is affected!
+	 } else {
+	     # otherwise check for change occurring inside an intron
+	     $firstexon=shift(@exons);
+	     $before=$firstexon->end;
+	
+	     foreach $i (0..$#exons) {
+		 $after=$exons[$i]->start;
+		 if (follows($self->mutation->prelabel,$before) or
+			($after==$self->mutation->prelabel) or
+			follows($after,$self->mutation->prelabel) or
+			follows($after,$self->mutation->postlabel)) {
+
+		     $rnaAffected = 1;
+		     # $i is number of exon and can be used for proximity check
+		 }
+		 $before=$exons[$i]->end;
+	     }
+	
+	 }
+    } else {
+	my $strand = $exons[0]->strand;
+	if (($strand == 1 and $self->mutation->postlabel <= $RNAstart) or
+	    ($strand != 1 and $self->mutation->postlabel >= $RNAstart)) {
+	    #$self->warn("RNA not affected because change occurs before RNAstart");
+	    $rnaAffected = 0;
+	}
+	elsif (($strand == 1 and $self->mutation->prelabel >= $RNAend) or
+		($strand != 1 and $self->mutation->prelabel <= $RNAend)) {
+	     #$self->warn("RNA not affected because change occurs after RNAend");
+	     $rnaAffected = 0;
+	     my $dist;
+	     if ($strand == 1){
+		 $dist = $self->mutation->prelabel - $RNAend;
+	     } else {
+		 $dist = $RNAend - $self->mutation->prelabel;
+	     }
+	     $self->dnamut->region_dist($dist);
+	 }
+	 elsif (scalar @exons == 1) {
+	     #if just one exon -> no introns, 
+	     $rnaAffected = 1; # then RNA is affected!
+	 } else {	
+	     # otherwise check for mutation occurring inside an intron
+	     $firstexon=shift(@exons);
+	     $before=$firstexon->end;
+	     if ( ($strand == 1 and $self->mutation->prelabel < $before) or 
+		  ($strand == -1 and $self->mutation->prelabel > $before) 
+		  ) {
+		 $rnaAffected = 1 ;
+
+		 #print "Exon 1 : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
+		 my $afterdist = $self->mutation->prelabel - $firstexon->start;
+		 my $beforedist =  $firstexon->end - $self->mutation->postlabel;
+		 my $exonvalue = $i + 1;
+		 $self->dnamut->region('exon');
+		 $self->dnamut->region_value($exonvalue);
+		 if ($afterdist < $beforedist) {
+		     $afterdist++; 		  
+		     $afterdist++;
+		     $self->dnamut->region_dist($afterdist);
+		     #print "splice site $afterdist nt upstream!<br>";
+		 } else {
+		     $self->dnamut->region_dist($beforedist);
+		     #print "splice site $beforedist nt downstream!<br>";
+		 }
+	     } else {
+		 #print "first exon  : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
+		 foreach $i (0..$#exons) {
+		     $after=$exons[$i]->start;
+		     #proximity test for intronic mutations
+		     if ( ($strand == 1 and 
+			   $self->mutation->prelabel >=  $before and 
+			   $self->mutation->postlabel <= $after) 
+			  or
+			  ($strand == -1 and 
+			   $self->mutation->prelabel <=  $before and 
+			   $self->mutation->postlabel >= $after)  ) {
+			 $self->dnamut->region('intron');
+			 #$self->dnamut->region_value($i);
+			 my $afterdist = $self->mutation->prelabel - $before;
+			 my $beforedist =  $after - $self->mutation->postlabel;
+			 my $intronvalue = $i + 1;
+			 if ($afterdist < $beforedist) {
+			     $afterdist++;
+			     $self->dnamut->region_value($intronvalue);
+			     $self->dnamut->region_dist($afterdist);
+			     #print "splice site $afterdist nt upstream!<br>";
+			 } else {
+			     $self->dnamut->region_value($intronvalue);
+			     $self->dnamut->region_dist($beforedist * -1);
+			     #print "splice site $beforedist nt downstream!<br>";
+			 }
+			 $self->rnachange(undef);
+			 last;
+		     } 
+		     #proximity test for exon mutations
+		     #proximity test for exon mutations
+		     elsif ( ( $strand == 1 and 
+			       $exons[$i]->start < $self->mutation->prelabel and 
+			       $exons[$i]->end > $self->mutation->prelabel) or 
+			     ( $strand == 1 and 
+			       $exons[$i]->start < $self->mutation->postlabel and 
+			       $exons[$i]->end > $self->mutation->postlabel) or 
+			     ( $strand == -1 and 
+			       $exons[$i]->start > $self->mutation->prelabel and 
+			       $exons[$i]->end < $self->mutation->prelabel) or
+			     ( $strand == -1 and 
+			       $exons[$i]->start > $self->mutation->postlabel and 
+			       $exons[$i]->end < $self->mutation->postlabel) ) {
+			 $rnaAffected = 1;
+
+			 my $afterdist = $self->mutation->prelabel - $exons[$i]->start;
+			 my $beforedist =  $exons[$i]->end - $self->mutation->postlabel;
+			 my $exonvalue = $i + 1;
+			 $self->dnamut->region('exon');
+			 if ($afterdist < $beforedist) {
+			     $afterdist++;
+			     $self->dnamut->region_value($exonvalue+1);
+			     $self->dnamut->region_dist($afterdist);
+			     #print "splice site $afterdist nt upstream!<br>";
+			 } else {
+			     #$beforedist;
+			     $self->dnamut->region_value($exonvalue+1);
+			     $self->dnamut->region_dist($beforedist * -1);
+			     #print "splice site $beforedist nt downstream!<br>";
+			 }
+			 last;
+		     }
+		     $before=$exons[$i]->end;
+		 }
+	     }
+	 }
+     }
+    #$self->warn("RNA not affected because change occurs inside an intron");
+    #return(0); # if still not returned, then not affected, return 0
+    return $rnaAffected;
+}
+
+#
+# ### Creation of RNA and AA variation objects
+#
+
+=head2 _set_effects
+
+ Title   : _set_effects
+ Usage   :
+ Function:
+
+           Stores RNA and AA level mutation attributes before mutation
+           into Bio::Variation::RNAChange and
+           Bio::Variation::AAChange objects.  Links them to
+           SeqDiff object.
+
+ Example :
+ Returns :
+ Args    : Bio::Variation::SeqDiff object
+           Bio::Variation::DNAMutation object
+
+See L<Bio::Variation::RNAChange>, L<Bio::Variation::RNAChange>,
+L<Bio::Variation::SeqDiff>, and L<Bio::Variation::DNAMutation>.
+
+=cut
+
+sub _set_effects {
+    my ($self, $seqDiff, $dnamut) = @_;
+    my ($rnapos_end, $upstreamseq, $dnstreamseq);
+    my $flanklen = $self->{'flanklen'};
+
+    ($self->mutation->len == 0) ?
+	($rnapos_end = $self->mutation->transpos) :
+	($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
+    my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos,
+						    -end =>  $rnapos_end
+						    );
+    $rnachange->isMutation(1);
+
+    # setting proof
+    if ($seqDiff->numbering eq "coding") {
+	 $rnachange->proof('experimental');
+    } else {
+	 $rnachange->proof('computed');
+    }
+
+    $seqDiff->add_Variant($rnachange);
+    $self->rnachange($rnachange);
+    $rnachange->DNAMutation($dnamut);
+    $dnamut->RNAChange($rnachange);
+    $rnachange->mut_number($self->mutation->issue);
+
+    # setting the codon_position of the "start" nucleotide of the change
+    $rnachange->codon_pos(($self->RNA->frame($self->mutation->label))+1); # codon_pos=frame+1
+
+    my @exons=$self->RNA->all_Exons;
+    $self->exons(\@exons);
+    #print  `date`, " before flank, after exons. RNAObj query\n";
+    # if cannot retrieve from Transcript, Transcript::upstream_seq will be used
+    # before "fac7 g 65" bug discovered
+    # $uplabel=$self->RNA->label(1-$flanklen,$prelabel);
+    my $RNAprelabel=$self->RNA->label(-1,$self->mutation->label); # to fix fac7g65 bug
+    # for the fix, all prelabel used in the next block have been changed to RNAprelabel
+    my $uplabel=$self->RNA->label(1-$flanklen,$RNAprelabel);
+    if ($self->RNA->valid($uplabel)) {
+	 $upstreamseq = $self->RNA->labelsubseq($uplabel, undef, $RNAprelabel);
+    } else {
+	$upstreamseq = $self->RNA->labelsubseq($self->RNA->start, undef, $RNAprelabel)
+	    if $self->RNA->valid($RNAprelabel);
+	my $lacking=$flanklen-length($upstreamseq); # how many missing
+	my $upstream_atg=$exons[0]->subseq(-$lacking,-1);
+	$upstreamseq=$upstream_atg . $upstreamseq;
+    }
+
+    $rnachange->upStreamSeq($upstreamseq);
+
+    # won't work OK if postlabel NOT in Transcript
+    # now added RNApostlabel but this has to be /fully tested/
+    # for the fix, all postlabel used in the next block have been changed to RNApostlabel
+    my $RNApostlabel; # to fix fac7g64 bug
+    if ($self->mutation->len == 0) {
+      $RNApostlabel=$self->mutation->label;
+    } else {
+      my $mutlen = 1 + $self->mutation->len;
+      $RNApostlabel=$self->RNA->label($mutlen,$self->mutation->label);
+    }
+    $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel, $flanklen);
+    if ($dnstreamseq eq '-1') { # if out of transcript was requested
+	 my $lastexon=$exons[-1];
+	 my $lastexonlength=$lastexon->length;
+	 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel); # retrieves till RNAend
+	 my $lacking=$flanklen-length($dnstreamseq); # how many missing
+	 my $downstream_stop=$lastexon->subseq($lastexonlength+1,undef,$lacking);
+	 $dnstreamseq .= $downstream_stop;
+    } else {
+	 $rnachange->dnStreamSeq($dnstreamseq);
+    }
+    # AAChange creation
+    my $AAobj=$self->RNA->get_Translation;
+    # storage of prelabel here, to be used in create_mut_objs_after
+    my $aachange = Bio::Variation::AAChange->new(-start => $RNAprelabel
+						  );
+    $aachange->isMutation(1);
+    $aachange->proof('computed');
+
+    $seqDiff->add_Variant($aachange);
+    $self->aachange($aachange);
+    $rnachange->AAChange($aachange);
+    $aachange->RNAChange($rnachange);
+
+    $aachange->mut_number($self->mutation->issue);
+#    $before_mutation{aachange}=$aachange;
+
+    my $ra_o = Bio::Variation::Allele->new;
+    $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
+    $rnachange->allele_ori($ra_o);
+
+    $rnachange->length(CORE::length $rnachange->allele_ori->seq);
+
+    my $ra_m = Bio::Variation::Allele->new;
+    $ra_m->seq($self->mutation->seq) if $self->mutation->seq;
+    $rnachange->allele_mut($ra_m);
+    $rnachange->add_Allele($ra_m);
+
+    #$rnachange->allele_mut($seq);
+    $rnachange->end($rnachange->start) if $rnachange->length == 0;
+
+    # this holds the aminoacid sequence that will be affected by the mutation
+    my $aa_allele_ori=$AAobj->labelsubseq($self->mutation->label,undef, 
+					  $self->mutation->lastlabel);
+
+    my $aa_o = Bio::Variation::Allele->new;
+    $aa_o->seq($aa_allele_ori) if $aa_allele_ori;
+    $aachange->allele_ori($aa_o);
+    #$aachange->allele_ori($aa_allele_ori);
+
+    my $aa_length_ori = length($aa_allele_ori);
+    $aachange->length($aa_length_ori); #print "==========$aa_length_ori\n";
+    $aachange->end($aachange->start + $aa_length_ori - 1 );
+}
+
+=head2 _untranslated
+
+ Title   : _untranslated
+ Usage   :
+ Function:
+
+           Stores RNA change attributes before mutation
+           into Bio::Variation::RNAChange object.  Links it to
+           SeqDiff object.
+
+ Example :
+ Returns :
+ Args    : Bio::Variation::SeqDiff object
+           Bio::Variation::DNAMutation object
+
+See L<Bio::Variation::RNAChange>, L<Bio::Variation::SeqDiff> and
+L<Bio::Variation::DNAMutation> for details.
+
+=cut
+
+sub  _untranslated {
+    my ($self, $seqDiff, $dnamut) = @_;
+    my $rnapos_end;
+    ($self->mutation->len == 0) ?
+	($rnapos_end = $self->mutation->transpos) :
+	($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
+    my $rnachange = Bio::Variation::RNAChange->new(-start => $self->mutation->transpos,
+						    -end =>  $rnapos_end
+						    );
+    #my $rnachange = Bio::Variation::RNAChange->new;
+
+    $rnachange->isMutation(1);
+    my $ra_o = Bio::Variation::Allele->new;
+    $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
+    $rnachange->allele_ori($ra_o);
+    my $ra_m = Bio::Variation::Allele->new;
+    $ra_m->seq($dnamut->allele_mut->seq) if $dnamut->allele_mut->seq;
+    $rnachange->allele_mut($ra_m);
+    $rnachange->add_Allele($ra_m);
+    $rnachange->upStreamSeq($dnamut->upStreamSeq);
+    $rnachange->dnStreamSeq($dnamut->dnStreamSeq);
+    $rnachange->length($dnamut->length);
+    $rnachange->mut_number($dnamut->mut_number);
+    # setting proof
+    if ($seqDiff->numbering eq "coding") {
+	$rnachange->proof('experimental');
+    } else {
+	$rnachange->proof('computed');
+    }
+
+    my $dist; 
+    if ($rnachange->end < 0) {
+	$rnachange->region('5\'UTR');
+	$dnamut->region('5\'UTR');
+	my $dist = $dnamut->end ;
+	$dnamut->region_dist($dist);
+	$dist = $seqDiff->offset - $self->gene->maxtranscript->start + 1 + $dist;
+	$rnachange->region_dist($dist);
+	return if $dist < 1; # if mutation is not in mRNA 
+    } else {
+	$rnachange->region('3\'UTR');
+	$dnamut->region('3\'UTR');
+	my $dist = $dnamut->start - $seqDiff->cds_end + $seqDiff->offset;
+	$dnamut->region_dist($dist);
+	$dist = $seqDiff->cds_end - $self->gene->maxtranscript->end -1 + $dist;
+	$rnachange->region_dist($dist);
+	return if $dist > 0; # if mutation is not in mRNA 
+    }
+    $seqDiff->add_Variant($rnachange);
+    $self->rnachange($rnachange);
+    $rnachange->DNAMutation($dnamut);
+    $dnamut->RNAChange($rnachange);
+}
+
+# args: reference to label changearray, reference to position changearray
+# Function: take care of the creation of mutation objects, with
+# information AFTER the change takes place
+sub _post_mutation {
+    my ($self, $seqDiff) = @_;
+
+    if ($self->rnachange and $self->rnachange->region eq 'coding') {
+
+	#$seqDiff->add_Variant($self->rnachange);
+
+	my $aachange=$self->aachange;
+	my ($AAobj,$aa_start_prelabel,$aa_start,$mut_translation);
+	$AAobj=$self->RNA->get_Translation;
+	$aa_start_prelabel=$aachange->start;
+	$aa_start=$AAobj->position($self->RNA->label(2,$aa_start_prelabel));
+	$aachange->start($aa_start);
+	$mut_translation=$AAobj->seq;
+
+	# this now takes in account possible preinsertions
+	my $aa_m = Bio::Variation::Allele->new;
+	$aa_m->seq(substr($mut_translation,$aa_start-1)) if substr($mut_translation,$aa_start-1);
+	$aachange->allele_mut($aa_m);
+	$aachange->add_Allele($aa_m);
+	#$aachange->allele_mut(substr($mut_translation,$aa_start-1));
+	#$aachange->allele_mut($mut_translation);
+	my ($rlenori, $rlenmut);
+	$rlenori = CORE::length($aachange->RNAChange->allele_ori->seq);
+	$rlenmut = CORE::length($aachange->RNAChange->allele_mut->seq);
+	#point mutation
+
+	if ($rlenori == 1 and $rlenmut == 1 and $aachange->allele_ori->seq ne '*') {
+	     my $alleleseq;
+	     if ($aachange->allele_mut->seq) {
+		 $alleleseq = substr($aachange->allele_mut->seq, 0, 1);
+		 $aachange->allele_mut->seq($alleleseq);
+	     }
+	     $aachange->end($aachange->start);
+	     $aachange->length(1);
+	 }
+	elsif ( $rlenori == $rlenmut and 
+		$aachange->allele_ori->seq ne '*' ) { #complex inframe mutation
+	    $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 
+				       0, 
+				       length($aachange->allele_ori->seq));
+	}
+	#inframe mutation
+	elsif ((int($rlenori-$rlenmut))%3 == 0) {
+	    if ($aachange->RNAChange->allele_mut->seq  and
+		$aachange->RNAChange->allele_ori->seq ) {
+		# complex
+		my $rna_len = length ($aachange->RNAChange->allele_mut->seq);
+		my $len = $rna_len/3;
+		$len++ unless $rna_len%3 == 0;
+		$aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, $len );
+	    }
+	    elsif  ($aachange->RNAChange->codon_pos == 1){
+		 # deletion
+		if ($aachange->RNAChange->allele_mut->seq eq  '') {
+		    $aachange->allele_mut->seq('');
+		    $aachange->end($aachange->start + $aachange->length - 1 );
+		}
+		 # insertion
+		 elsif ($aachange->RNAChange->allele_ori->seq eq '' ) {
+		     $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
+					   length ($aachange->RNAChange->allele_mut->seq) / 3);
+		     $aachange->allele_ori->seq('');
+		     $aachange->end($aachange->start + $aachange->length - 1 );
+		     $aachange->length(0);
+		 }
+	    } else {
+		#elsif  ($aachange->RNAChange->codon_pos == 2){
+		 # deletion
+		 if (not $aachange->RNAChange->allele_mut->seq ) {
+		     $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, 1);
+		 }
+		 # insertion
+		 elsif (not $aachange->RNAChange->allele_ori->seq) {
+		     $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
+						length ($aachange->RNAChange->allele_mut->seq) / 3 +1);
+		 }
+	     }
+	 } else {
+	     #frameshift
+	     #my $pos = index $aachange->allele_mut
+	     #$aachange->allele_mut(substr($aachange->allele_mut, 0, 1));
+	     $aachange->length(CORE::length($aachange->allele_ori->seq));
+	     my $aaend = $aachange->start + $aachange->length -1;
+	     $aachange->end($aachange->start);
+	 }
+
+	 # splicing site deletion check
+	 my @beforeexons=@{$self->exons};
+	 my @afterexons=$self->RNA->all_Exons;
+	 my $i;
+	 if (scalar(@beforeexons) ne scalar(@afterexons)) {
+	     my $mut_number = $self->mutation->issue;
+	     $self->warn("Exons have been modified at mutation n.$mut_number!");
+	     $self->rnachange->exons_modified(1);
+	 } else {
+	   EXONCHECK:
+	     foreach $i (0..$#beforeexons) {
+		 if ($beforeexons[$i] ne $afterexons[$i]) {
+	     my $mut_number = $self->mutation->issue;
+		     $self->warn("Exons have been modified at mutation n.$mut_number!");
+		     $self->rnachange->exons_modified(1);
+		     last EXONCHECK;
+		 }
+	     }
+	 }
+     } else {
+	 #$seqDiff->rnachange(undef);
+	 #print "getting here?";
+     }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Prim_Transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Prim_Transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Prim_Transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+# $Id: Prim_Transcript.pm,v 1.9.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Prim_Transcript
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Prim_Transcript - Prim_Transcript class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+Class for PRIM_TRANSCRIPT objects. They consist of a beginlabel, an endlabel (both
+referring to a LiveSeq DNA object) and a strand.
+The strand could be 1 (forward strand, default), -1 (reverse strand).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Prim_Transcript;
+
+use strict;
+use base qw(Bio::LiveSeq::Range);
+
+=head2 new
+
+  Title   : new
+  Usage   : $intron1=Bio::LiveSeq::Prim_Transcript->new(-seq => $objref,
+							-start => $startlabel,
+							-end => $endlabel, 
+							-strand => 1
+							);
+
+  Function: generates a new Bio::LiveSeq::Prim_Transcript
+  Returns : reference to a new object of class Prim_Transcript
+  Errorcode -1
+  Args    : two labels and an integer
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Range.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Range.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Range.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,118 @@
+# $Id: Range.pm,v 1.9.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Range
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Range - Range abstract class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This is used as parent for exon and intron classes.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Range;
+use strict;
+use base qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $range1 = Bio::LiveSeq::Range->new(-seq => $obj_ref,
+                                               -start => $beginlabel,
+                                               -end => $endlabel, -strand => 1);
+
+  Function: generates a new Bio::LiveSeq::Range
+  Returns : reference to a new object of class Range
+  Errorcode -1
+  Args    : two labels, an obj_ref and an integer
+            strand 1=forward strand, strand -1=reverse strand
+            if strand not specified, it defaults to 1
+            the -seq argument must point to the underlying DNA LiveSeq object
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%range);
+
+  my ($seq,$start,$end,$strand)=($args{-seq},$args{-start},$args{-end},$args{-strand});
+
+  $obj = \%range;
+  $obj = bless $obj, $class;
+
+  unless ($seq->valid($start)) {
+    $obj->warn("$class not initialised because start label not valid");
+    return (-1);
+  }
+  unless ($seq->valid($end)) {
+    $obj->warn("$class not initialised because end label not valid");
+    return (-1);
+  }
+  unless (defined $strand) {
+    $strand = 1;
+  }
+  if (($strand != 1)&&($strand != -1)) {
+    $obj->warn("$class not initialised because strand identifier not valid. Use 1 (forward strand) or -1 (reverse strand).");
+    return (-1);
+  }
+  if ($start eq $end) {
+    $obj->warn("$class reports: start and end label are the same....");
+  } else {
+    unless ($seq->follows($start,$end,$strand)==1) {
+      $obj->warn("Fatal: end label $end doesn't follow start label $start for strand $strand!");
+      return (-1);
+    }
+  }
+  #if ($strand == 1) {
+  #  unless ($seq->is_downstream($start,$end)==1) {
+  #    croak "Fatal: end label not downstream of start label for forward strand!";
+  #  }
+  #} else {
+  #  unless ($seq->is_upstream($start,$end)==1) {
+  #    croak "Fatal: end label not upstream of start label for reverse strand!";
+  #  }
+  #}
+  $obj->{'seq'}=$seq;
+  $obj->{'start'}=$start;
+  $obj->{'end'}=$end;
+  $obj->{'strand'}=$strand;
+  return $obj;
+}
+
+=head2 valid
+
+  Title   : valid
+  Usage   : $boolean = $obj->valid($label)
+  Function: tests if a label exists AND is part of the object
+  Returns : boolean
+  Args    : label
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Region.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Region.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Region.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+# $Id: Repeat_Region.pm,v 1.9.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Repeat_Region
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Repeat_Region - Repeat_Region class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+Class for REPEAT_REGION objects. They consist of a beginlabel, an endlabel (both
+referring to a LiveSeq DNA object) and a strand.
+The strand could be 1 (forward strand, default), -1 (reverse strand).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Repeat_Region;
+
+
+use strict;
+use base qw(Bio::LiveSeq::Range);
+
+=head2 new
+
+  Title   : new
+  Usage   : $intron1=Bio::LiveSeq::Repeat_Region->new(-seq => $objref,
+					      -start => $startlabel,
+					      -end => $endlabel, -strand => 1);
+
+  Function: generates a new Bio::LiveSeq::Repeat_Region
+  Returns : reference to a new object of class Repeat_Region
+  Errorcode -1
+  Args    : two labels and an integer
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Unit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Unit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Repeat_Unit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,59 @@
+# $Id: Repeat_Unit.pm,v 1.9.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Repeat_Unit
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Repeat_Unit - Repeat_Unit class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+Class for REPEAT_UNIT objects. They consist of a beginlabel, an endlabel (both
+referring to a LiveSeq DNA object) and a strand.
+The strand could be 1 (forward strand, default), -1 (reverse strand).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Repeat_Unit;
+
+use strict;
+use base qw(Bio::LiveSeq::Repeat_Region);
+
+=head2 new
+
+  Title   : new
+  Usage   : $intron1=Bio::LiveSeq::Repeat_Unit->new(-seq => $objref,
+					      -start => $startlabel,
+					      -end => $endlabel, -strand => 1);
+
+  Function: generates a new Bio::LiveSeq::Repeat_Unit
+  Returns : reference to a new object of class Repeat_Unit
+  Errorcode -1
+  Args    : two labels and an integer
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/SeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/SeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/SeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1183 @@
+# $Id: SeqI.pm,v 1.31.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::SeqI
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This class implements BioPerl PrimarySeqI interface for Live Seq objects.
+
+One of the main difference in LiveSequence compared to traditional
+"string" sequences is that coordinate systems are flexible. Typically
+gene nucleotide numbering starts from 1 at the first character of the
+initiator codon (A in ATG). This means that negative positions are
+possible and common!
+
+Secondly, the sequence manipulation methods do not return a new
+sequence object but change the current object. The current status can
+be written out to BioPerl sequence objects. 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+Some note on the terminology/notation of method names:
+ label: a unique pointer to a single nucleotide
+ position: the position of a nucleotide according to a particular coordinate
+           system (e.g. counting downstream from a particular label taken as
+           number 1)
+ base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
+
+       a base is the "value" that an "element" of a "chain" can assume
+         (see documentation on the Chain datastructure if interested)
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::LiveSeq::SeqI;
+use strict;
+use Bio::Tools::CodonTable; # for the translate() function
+
+use base qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI);
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string    = $obj->seq()
+ Function: Returns the complete sequence of an object as a string of letters.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard),
+ Returns : a string
+
+
+=cut
+
+sub seq {
+  my $self = shift;
+  my ($start,$end) = ($self->start(),$self->end());
+  if ($self->strand() == 1) {
+    return $self->{'seq'}->down_chain2string($start,undef,$end);
+  } else { # reverse strand
+    my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
+    $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+    return $str;
+  }
+}
+
+=head2 all_labels
+
+ Title   : all_labels
+ Usage   : @labels = $obj->all_labels()
+ Function: all the labels of every nucleotide an object is composed of
+ Returns : an array of labels
+ Args    : none
+
+=cut
+
+sub all_labels {
+  my $self = shift;
+  my ($start,$end) = ($self->start(),$self->end());
+  my $labels;
+  if ($self->strand() == 1) {
+    $labels=$self->{'seq'}->down_labels($start,$end);
+  } else {
+    $labels=$self->{'seq'}->up_labels($start,$end);
+  }
+  return (@{$labels});
+}
+
+=head2 labelsubseq
+
+  Title   : labelsubseq
+  Usage   : $dna->labelsubseq();
+          : $dna->labelsubseq($startlabel);
+          : $dna->labelsubseq($startlabel,$length);
+          : $dna->labelsubseq($startlabel,undef,$endlabel);
+  e.g.    : $dna->labelsubseq(4,undef,8);
+  Function: prints the sequence as string. The difference between labelsubseq
+            and normal subseq is that it uses /labels/ as arguments, instead
+            than positions. This allows for faster and more efficient lookup,
+            skipping the (usually) lengthy conversion of positions into labels.
+            This is expecially useful for manipulating with high power
+            LiveSeq objects, knowing the labels and exploiting their
+            usefulness.
+  Returns : a string
+  Errorcode -1
+  Args    : without arguments it returns the entire sequence
+            with a startlabel it returns the sequence downstream that label
+            if a length is specified, it returns only that number of bases
+            if an endlabel is specified, it overrides the length argument
+             and prints instead up to that label (included)
+  Defaults: $startlabel defaults to the beginning of the entire sequence
+            $endlabel defaults to the end of the entire sequence
+
+=cut
+
+# NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!!
+
+sub labelsubseq {
+  my ($self,$start,$length,$end,$unsecuremode) = @_;
+  if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested") 
+  { # to skip security checks (faster)
+    unless ($start) {
+      $start=$self->start;
+    }
+    if ($end) {
+      if ($end == $start) {
+	$length=1;
+	undef $end;
+      } else {
+	undef $length;
+      }
+    } else {
+      unless ($length) {
+	$end=$self->end;
+      }
+    }
+  } else {
+    if ($start) {
+      unless ($self->{'seq'}->valid($start)) {
+	$self->warn("Start label not valid"); return (-1);
+      }
+    }
+    if ($end) {
+      if ($end == $start) {
+	$length=1;
+	undef $end;
+      } else {
+	unless ($self->{'seq'}->valid($end)) {
+	  $self->warn("End label not valid"); return (-1);
+	}
+	unless ($self->follows($start,$end) == 1) {
+	  $self->warn("End label does not follow Start label!"); return (-1);
+	}
+	undef $length;
+      }
+    }
+  }
+  if ($self->strand() == 1) {
+    return $self->{'seq'}->down_chain2string($start,$length,$end);
+  } else { # reverse strand
+    my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
+    $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+    return $str;
+  }
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+         : $substring = $obj->subseq(10,undef,4);
+ Function: returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+
+           Start cannot be larger than end but can be equal.
+
+           Allows for negative numbers $obj->subseq(-10,-1). By
+           definition, there is no 0!
+                       -5  -1 1   5
+                gctagcgcccaac atggctcgctg
+
+           This allows to retrieve sequences upstream from given position.
+
+           The precedence is from left to right: if END is given LENGTH is
+           ignored.
+
+ Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
+           $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
+
+ Returns : a string
+ Errorcode: -1
+ Args    : start,  integer, defaults to start of the sequence
+           end,    integer, '' or undef, defaults to end of the sequence
+           length, integer, '' or undef
+           an optional strand (1 or -1) 4th argument 
+            if strand argument is not given, it will default to the object
+            argment. This argument is useful when a call is issued from a child
+            of a parent object containing the subseq method
+
+=cut
+
+#'
+# check the fact about reverse strand!
+# is it feasible? Is it correct? Should we do it? How about exons? Does it
+# work when you ask subseq of an exon?
+# eliminated now (Mon night)
+sub subseq {
+  ##my ($self,$pos1,$pos2,$length,$strand) = @_;
+  my ($self,$pos1,$pos2,$length,$strand) = @_;
+  ##unless (defined ($strand)) { # if optional [strand] argument not given
+  ##  $strand=$self->strand;
+  ##}
+  $strand=$self->strand;
+  my ($str,$startlabel,$endlabel);
+  if (defined ($length)) {
+    if ($length < 1) {
+      $self->warn("No sense asking for a subseq of length < 1");
+      return (-1);
+    }
+  }
+  unless (defined ($pos1)) {
+    #print "\n##### DEBUG pos1 not defined\n";
+    $startlabel=$self->start;
+  } else {
+    if ($pos1 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    ##if ($strand == 1) { # CHECK THIS!
+      if ((defined ($pos2))&&($pos1>$pos2)) {
+	$self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+      }
+    ##} else { # CHECK THIS!
+    ##  if ((defined ($pos2))&&($pos1<$pos2)) {
+##	$self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
+    ##  }
+    ##}
+    $startlabel=$self->label($pos1);
+    if ($startlabel < 1) {
+      $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
+    }
+  }
+  unless (defined ($pos2)) {
+    #print "\n##### pos2 not defined\n";
+    unless (defined ($length)) {
+      $endlabel=$self->end;
+    }
+  } else {
+    if ($pos2 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    undef $length;
+    ##if ($strand == 1) { # CHECK THIS!
+      if ((defined ($pos1))&&($pos1>$pos2)) {
+	$self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+      }
+    ##} else { # CHECK THIS!
+    ##  if ((defined ($pos1))&&($pos1<$pos2)) {
+##	$self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
+    ##  }
+    ##}
+    $endlabel=$self->label($pos2);
+    if ($endlabel < 1) {
+      $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
+    }
+  }
+  #print "\n    ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
+
+  if ($strand == 1) {
+    $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
+  } else { # reverse strand
+    $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
+    $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+  }
+  return $str;
+}
+
+=head2 length
+
+  Title   : length
+  Usage   : $seq->length();
+  Function: returns the number of nucleotides (or the number of aminoacids)
+            in the entire sequence
+  Returns : an integer
+  Errorcode -1
+  Args    : none
+
+=cut
+
+sub length {
+  my $self=shift;
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  if ($strand == 1) {
+    return $self->{'seq'}->down_subchain_length($start,$end);
+  } else {
+    return $self->{'seq'}->up_subchain_length($start,$end);
+  }
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, alias the common name of the object
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the ID
+           field of the GenBank/EMBL databanks and the id field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information.
+
+ See also: accession_number
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub display_id {
+  my ($self,$value) = @_;
+  if(defined $value) {
+    $self->{'display_id'} = $value;
+  }
+  return $self->{'display_id'};
+}
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number.
+           Notice that primary_id() provides the unique id for the
+           implemetation, allowing multiple objects to have the same accession
+           number in a particular implementation.
+
+           For objects with no accession_number this method returns "unknown".
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub accession_number {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'accession_number'} = $value;
+  }
+  unless (exists $self->{'accession_number'}) {
+    return "unknown";
+  } else {
+    return $self->{'accession_number'};
+  }
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their own
+           object ids in a way the implementation can control. Clients can
+           expect one id to map to one object.
+
+           For sequences with no primary_id, this method returns
+           a stringified memory location.
+
+ Returns : A string
+ Args    : None
+
+=cut
+
+
+sub primary_id {
+  my ($self,$value) = @_;
+  if(defined $value) {
+    $self->{'primary_id'} = $value;
+  }
+  unless (exists $self->{'primary_id'}) {
+    return "$self";
+  } else {
+    return $self->{'primary_id'};
+  }
+}
+
+=head2 change
+
+ Title   : change
+ Usage   : $substring = $obj->change('AA', 10);
+ Function: changes, modifies, mutates the LiveSequence
+ Examples:
+        $obj->change('',   10);      delete nucleotide #10     
+        $obj->change('',   10, 2);   delete two nucleotides starting from #10
+        $obj->change('G',  10);      change nuc #10 to 'G'
+        $obj->change('GA', 10, 4);   replace #10 and 3 following with 'GA'
+        $obj->change('GA', 10, 2));  is same as $obj->change('GA',  10);
+        $obj->change('GA', 10, 0 );  insert 'GA' before nucleotide at #10
+        $obj->change('GA', 10, 1);   GA inserted before #10, #10 deleted
+        $obj->change('GATC', 10, 2); GATC inserted before #10, #10&#11 deleted
+        $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
+
+
+ Returns : a string of deleted bases (if any) or 1 (everything OK)
+ Errorcode: -1
+ Args    : seq,    string, or '' ('' = undef = 0 = deletion)
+           start,  integer
+           length, integer (optional)
+
+=cut
+
+sub change {
+  &positionchange;
+}
+
+=head2 positionchange
+
+ Title   : positionchange
+ Function: Exactly like change. I.e. change() defaults to positionchange()
+
+=cut
+
+sub positionchange {
+  my ($self,$newseq,$position,$length)=@_;
+  unless ($position) {
+    $self->warn("Position not given or position 0");
+    return (-1);
+  }
+  my $label=$self->label($position);
+  unless ($label > 0) { # label not found or error
+    $self->warn("No valid label found at that position!");
+    return (-1);
+  }
+  return ($self->labelchange($newseq,$label,$length));
+}
+
+=head2 labelchange
+
+ Title   : labelchange
+ Function: Exactly like change but uses a /label/ instead than a position
+           as second argument. This allows for multiple changes in a LiveSeq
+           without the burden of recomputing positions. I.e. for a multiple
+           change in two different points of the LiveSeq, the approach would
+           be the following: fetch the correct labels out of the two different
+           positions (method: label($position)) and then use the labelchange()
+           method to modify the sequence using those labels instead than
+           relying on the positions (that would have modified after the
+           first change).
+
+=cut
+
+sub labelchange {
+  my ($self,$newseq,$label,$length)=@_;
+  unless ($self->valid($label)) {
+    if ($self->{'seq'}->valid($label)) {
+       #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
+      shift @_;
+      return($self->{'seq'}->labelchange(@_));
+    } else {
+      $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
+      return (-1);
+    }
+  }
+  unless ($newseq) { # it means this is a simple deletion
+    if (defined($length)) {
+      unless ($length >= 0) {
+	$self->warn("No sense having length < 0 in a deletion");
+	return (-1);
+      }
+    } else {
+      $self->warn("Length not defined for deletion!");
+      return (-1);
+    }
+    return $self->_delete($label,$length);
+  }
+  my $newseqlength=CORE::length($newseq);
+  if (defined($length)) {
+    unless ($length >= 0) {
+      $self->warn("No sense having length < 0 in a change()");
+      return (-1);
+    }
+  } else {
+    $length=$newseqlength; # defaults to pointmutation(s)
+  }
+  if ($length == 0) { # it means this is a simple insertion, length def&==0
+    my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
+    if ($insertbegin == -1) {
+      return (-1);
+    } else {
+      return (1);
+    }
+  }
+  if ($newseqlength == $length) { # it means this is simple pointmutation(s)
+    return $self->_mutate($label,$newseq,$length);
+  }
+  # if we arrived here then change is complex mixture
+  my $strand=$self->strand();
+  my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
+  unless ($afterendlabel > 0) { # label not found or error
+    $self->warn("No valid afterendlabel found for executing the complex mutation!");
+    return (-1);
+  }
+  my $deleted=$self->_delete($label,$length); # first delete length nucs
+  if ($deleted == -1) { # if errors
+    return (-1);
+  } else { # then insert the newsequence
+    my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
+    if ($insertbegin == -1) {
+      return (-1);
+    } else {
+      return (1);
+    }
+  }
+}
+
+# internal methods for change()
+
+# arguments: label for beginning of deletion, new sequence to insert
+# returns: labels of beginning and end of the inserted sequence
+# errorcode: -1
+sub _praeinsert {
+  my ($self,$label,$newseq)=@_;
+  my ($insertbegin,$insertend);
+  my $strand=$self->strand();
+  if ($strand == 1) {
+    ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
+  } else { # since it's reverse strand and we insert in forward direction....
+    $newseq=reverse($newseq);
+    $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
+    ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
+  }
+  if (($insertbegin==0)||($insertend==0)) {
+    $self->warn("Some error occurred while inserting!");
+    return (-1);
+  } else {
+    return ($insertbegin,$insertend);
+  }
+}
+
+# arguments: label for beginning of deletion, length of deletion
+# returns: string of deleted bases
+# errorcode: -1
+sub _delete {
+  my ($self,$label,$length)=@_;
+  my $strand=$self->strand();
+  my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
+  unless ($endlabel > 0) { # label not found or error
+    $self->warn("No valid endlabel found for executing the deletion!");
+    return (-1);
+  }
+  # this is important in Transcript to fix exon structure
+  $self->_deletecheck($label,$endlabel);
+  my $deletedseq;
+  if ($strand == 1) {
+    $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
+  } else {
+    $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
+    $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
+                         # in forward direction
+    $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
+  }
+  return ($deletedseq);
+}
+
+# empty function, overridden in Transcript, not useful here
+sub _deletecheck {
+}
+
+# arguments: label for beginning of mutation, newsequence, number of mutations
+# returns: 1 all OK
+# errorcode: -1
+sub _mutate {
+  my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
+  my ($i,$base,$nextlabel);
+  my @labels; # array of labels
+  my $strand=$self->strand();
+  if ($length == 1) { # special cases first
+    @labels=($label);
+  } else {
+    my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
+    unless ($endlabel > 0) { # label not found or error
+      $self->warn("No valid endlabel found for executing the mutation!");
+      return (-1);
+    }
+    if ($length == 2) { # another special case
+      @labels=($label,$endlabel);
+    } else { # more than 3 bases changed
+      # this wouldn't work for Transcript
+      #my $labelsarrayref;
+      #if ($strand == 1) {
+	#$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
+      #} else {
+	#$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
+      #}
+      #@labels=@{$labelsarrayref};
+      #if ($length != scalar(@labels)) { # not enough labels returned
+	#$self->warn("Not enough valid labels found for executing the mutation!");
+	#return (-1);
+      #}
+
+      # this should be more general
+      @labels=($label); # put the first one
+      while ($label != $endlabel) {
+	$nextlabel=$self->label(2,$label,$strand); # retrieve the next label
+	push (@labels,$nextlabel);
+	$label=$nextlabel; # move on reference
+      }
+    }
+  }
+  if ($strand == -1) { # only for reverse strand
+    $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
+  }
+  my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
+  $i = 0;
+  foreach $base (split(//,$newseq)) {
+    $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
+    $i++;
+  }
+  if ($errorcheck != $length) {
+    $self->warn("Some error occurred while mutating!");
+    return (-1);
+  } else {
+    return (1);
+  }
+}
+
+=head2 valid
+
+  Title   : valid
+  Usage   : $boolean = $obj->valid($label)
+  Function: tests if a label exists inside the object
+  Returns : boolean
+  Args    : label
+
+=cut
+
+# argument: label
+# returns: 1 YES 0 NO
+sub valid {
+  my ($self,$label)=@_;
+  my $checkme;
+  my @labels=$self->all_labels;
+  foreach $checkme (@labels) {
+    if ($label == $checkme) {
+      return (1); # found
+    }
+  }
+  return (0); # not found
+}
+
+
+=head2 start
+
+  Title   : start
+  Usage   : $startlabel=$obj->start()
+  Function: returns the label of the first nucleotide of the object (exon, CDS)
+  Returns : label
+  Args    : none
+
+=cut
+
+sub start {
+  my ($self) = @_;
+  return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $endlabel=$obj->end()
+  Function: returns the label of the last nucleotide of the object (exon, CDS)
+  Returns : label
+  Args    : none
+
+=cut
+
+sub end {
+  my ($self) = @_;
+  return $self->{'end'};
+}
+
+=head2 strand
+
+  Title   : strand
+  Usage   : $strand=$obj->strand()
+            $obj->strand($strand)
+  Function: gets or sets strand information, being 1 or -1 (forward or reverse)
+  Returns : -1 or 1
+  Args    : none OR -1 or 1
+
+=cut
+
+sub strand {
+  my ($self,$strand) = @_;
+  if ($strand) {
+    if (($strand != 1)&&($strand != -1)) {
+      $self->warn("strand information not changed because strand identifier not valid");
+    } else {
+      $self->{'strand'} = $strand;
+    }
+  }
+  return $self->{'strand'};
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+ Returns : a string either 'dna','rna','protein'.
+ Args    : none
+ Note    : "circular dna" is set as dna
+
+=cut
+
+
+sub alphabet {
+  my %valid_type = map {$_, 1} qw( dna rna protein );
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $value =~ s/circular dna/dna/;
+    unless ( $valid_type{$value} ) {
+      $self->warn("Molecular type '$value' is not a valid type");
+    }
+    $self->{'alphabet'} = $value;
+  }
+  return $self->{'alphabet'};
+}
+
+=head2 coordinate_start
+
+  Title   : coordinate_start
+  Usage   : $coordstartlabel=$obj->coordinate_start()
+          : $coordstartlabel=$obj->coordinate_start($label)
+  Function: returns and optionally sets the first label of the coordinate
+            system used
+            For some objects only labels inside the object or in frame (for
+            Translation objects) will be allowed to get set as coordinate start
+
+  Returns : label. It returns 0 if label not found.
+  Errorcode -1 
+  Args    : an optional reference $label that is position 1
+
+=cut
+
+
+sub coordinate_start {
+  my ($self,$label) = @_;
+  if ($label) {
+    if ($self->valid($label)) {
+      $self->{'coordinate_start'} = $label;
+    } else {
+      $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
+    }
+  }
+  my $coord_start = $self->{'coordinate_start'};
+  if ($coord_start) { 
+    return $coord_start;
+  } else {
+    return $self->start();
+  }
+}
+
+=head2 label
+
+  Title   : label
+  Usage   : $seq->label($position)
+          : $seq->label($position,$firstlabel)
+  Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
+          : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
+
+  Function: returns the label of the nucleotide at $position from current
+            coordinate start
+  Returns : a label. It returns 0 if label not found.
+  Errorcode -1 
+  Args    : a position, 
+            an optional reference $firstlabel that is to be used as position 1
+            an optional strand (1 or -1) argument 
+             if strand argument is not given, it will default to the object
+             argument. This argument is useful when a call is issued from a child
+             of a parent object containing the subseq method
+
+=cut
+
+
+sub label {
+  my ($self,$position,$firstlabel,$strand)=@_;
+  my $label;
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start;
+  }
+  unless ($position) {  # if position = 0 complain ?
+    $self->warn("Position not given or position 0");
+    return (-1);
+  }
+  unless (defined ($strand)) { # if optional [strand] argument not given
+    $strand=$self->strand;
+  }
+  if ($strand == 1) {
+    if ($position > 0) {
+      $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
+    } else { # if < 0
+      $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
+    }
+  } else {
+    if ($position > 0) {
+      $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
+    } else { # if < 0
+      $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
+    }
+  }
+  return $label;
+}
+
+
+=head2 position
+
+  Title   : position
+  Usage   : $seq->position($label)
+          : $seq->position($label,$firstlabel)
+  Function: returns the position of nucleotide at $label
+  Returns : the position of the label from current coordinate start
+  Errorcode 0
+  Args    : a label pointing to a certain nucleotide (e.g. start of exon)
+            an optional "firstlabel" as reference to count from
+            an optional strand (1 or -1) argument 
+             if strand argument is not given, it will default to the object
+             argument. This argument is useful when a call is issued from a child
+             of a parent object containing the subseq method
+
+=cut
+
+
+sub position {
+  my ($self,$label,$firstlabel,$strand)=@_;
+  unless (defined ($strand)) { # if optional [strand] argument not given
+    $strand=$self->strand;
+  }
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start;
+  }
+  unless ($self->valid($label)) {
+    $self->warn("label not valid");
+    return (0);
+  }
+  if ($firstlabel == $label) {
+    return (1);
+  }
+  my ($coordpos,$position0,$position);
+  $position0=$self->{'seq'}->down_get_pos_of_label($label);
+  $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
+  $position=$position0-$coordpos+1;
+  if ($position <= 0) {
+    $position--;
+  }
+  if ($strand == -1) {
+    #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
+    return (1-$position);
+  } else {
+    #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
+    return ($position);
+  }
+}
+
+=head2 follows
+
+  Title   : follows
+  Usage   : $seq->follows($firstlabel,$secondlabel)
+          : $seq->follows($firstlabel,$secondlabel,$strand)
+  Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
+            i.e. it checks downstream for forward strand and
+            upstream for reverse strand
+  Returns : 1 or 0
+  Errorcode -1
+  Args    : two labels
+            an optional strand (1 or -1) argument 
+             if strand argument is not given, it will default to the object
+             argument. This argument is useful when a call is issued from a child
+             of a parent object containing the subseq method
+
+=cut
+
+#'
+# wraparound to is_downstream and is_upstream that chooses the correct one
+# depending on the strand
+sub follows {
+  my ($self,$firstlabel,$secondlabel,$strand)=@_;
+  unless (defined ($strand)) { # if optional [strand] argument not given
+    $strand=$self->strand;
+  }
+  if ($strand == 1) {
+    return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
+  } else {
+    return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
+  }
+}
+#
+#=head2 translate
+#
+# Title   : translate
+# Usage   : $protein_seq = $obj->translate
+# Function: Provides the translation of the DNA sequence
+#	    using full IUPAC ambiguities in DNA/RNA and amino acid codes.
+#
+#	    The resulting translation is identical to EMBL/TREMBL database 
+#	    translations.
+#
+# Returns : a string
+# Args    : character for terminator (optional) defaults to '*'
+#	    character for unknown amino acid (optional) defaults to 'X'
+#	    frame (optional) valid values 0, 1, 3, defaults to 0
+#	    codon table id (optional) defaults to 1
+#
+#=cut
+#
+#sub translate {
+#  my ($self) = shift;
+#  return ($self->translate_string($self->seq, at _));
+#}
+#
+#=head2 translate_string
+#
+# Title   : translate_string
+# Usage   : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
+# Function: Like translate, but can be used to translate subsequences after
+#	    having retrieved them as string.
+# Args    : 1st argument is a string. Optional following arguments: like in
+#	    the translate method
+#
+#=cut
+#
+#
+#sub translate_string {
+#  my($self) = shift;
+#  my($seq) = shift;
+#  my($stop, $unknown, $frame, $tableid) = @_;
+#  my($i, $len, $output) = (0,0,'');
+#  my($codon)   = "";
+#  my $aa;
+#
+#
+#  ## User can pass in symbol for stop and unknown codons
+#  unless(defined($stop) and $stop ne '')    { $stop = "*"; }
+#  unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
+#  unless(defined($frame) and $frame ne '') { $frame = 0; }
+#
+#  ## the codon table ID 
+#  if ($self->translation_table) {
+#    $tableid = $self->translation_table;
+#  }
+#  unless(defined($tableid) and $tableid ne '')    { $tableid = 1; }
+#
+#  ##Error if monomer is "Amino"
+#  $self->warn("Can't translate an amino acid sequence.") 
+#      if (defined $self->alphabet && $self->alphabet eq 'protein');
+#
+#  ##Error if frame is not 0, 1 or 2
+#  $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
+#      unless ($frame == 0 or $frame == 1 or $frame == 2);
+#
+#  #thows a warning if ID is invalid 
+#  my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
+#
+#  # deal with frame offset.
+#  if( $frame ) {
+#      $seq = substr ($seq,$frame);
+#  }
+#
+#  for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
+#      my $aa = $codonTable->translate($codon);
+#      if ($aa eq '*') {
+#	    $output .= $stop;
+#      }
+#      elsif ($aa eq 'X') {
+#	    $output .= $unknown;
+#      }
+#      else { 
+#	   $output .= $aa ;
+#      }   
+#  }
+#  #if( substr($output,-1,1) eq $stop ) {
+#  #    chop $output;
+#  #}
+#
+#  return ($output);
+#}
+
+=head2 gene
+
+ Title   : gene
+ Usage   : my $gene=$obj->gene;
+ Function: Gets or sets the reference to the LiveSeq::Gene object.
+           Objects that are features of a LiveSeq Gene will have this
+           attribute set automatically.
+
+ Returns : reference to an object of class Gene
+ Note    : if Gene object is not set, this method will return 0;
+ Args    : none or reference to object of class Bio::LiveSeq::Gene
+
+=cut
+
+sub gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'gene'} = $value;
+  }
+  unless (exists $self->{'gene'}) {
+    return (0);
+  } else {
+    return $self->{'gene'};
+  }
+}
+
+=head2 obj_valid
+
+ Title   : obj_valid
+ Usage   : if ($obj->obj_valid) {do something;}
+ Function: Checks if start and end labels are still valid for the ojbect,
+           i.e. tests if the LiveSeq object is still valid
+ Returns : boolean
+ Args    : none
+
+=cut
+
+sub obj_valid {
+  my $self=shift;
+  unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
+    return (0);
+  }
+  return (1);
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $name = $obj->name;
+         : $name = $obj->name("ABCD");
+ Function: Returns or sets the name of the object.
+           If there is no name, it will return "unknown";
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub name {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'name'} = $value;
+  }
+  unless (exists $self->{'name'}) {
+    return "unknown";
+  } else {
+    return $self->{'name'};
+  }
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $desc = $obj->desc;
+         : $desc = $obj->desc("ABCD");
+ Function: Returns or sets the description of the object.
+           If there is no description, it will return "unknown";
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub desc {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'desc'} = $value;
+  }
+  unless (exists $self->{'desc'}) {
+    return "unknown";
+  } else {
+    return $self->{'desc'};
+  }
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $name = $obj->source;
+         : $name = $obj->source("Homo sapiens");
+ Function: Returns or sets the organism that is source of the object.
+           If there is no source, it will return "unknown";
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub source {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'source'} = $value;
+  }
+  unless (exists $self->{'source'}) {
+    return "unknown";
+  } else {
+    return $self->{'source'};
+  }
+}
+
+sub delete_Obj {
+  my $self = shift;
+  my @values= values %{$self};
+  my @keys= keys %{$self};
+
+  foreach my $key ( @keys ) {
+    delete $self->{$key};
+  }
+  foreach my $value ( @values ) {
+    if (index(ref($value),"LiveSeq") != -1) { # object case
+      eval {
+	# delete $self->{$value};
+	$value->delete_Obj;
+      };
+    } elsif (index(ref($value),"ARRAY") != -1) { # array case
+      my @array=@{$value};
+      my $element;
+      foreach $element (@array) {
+	eval {
+	  $element->delete_Obj;
+	};
+      }
+    } elsif (index(ref($value),"HASH") != -1) { # object case
+      my %hash=%{$value};
+      my $element;
+      foreach $element (%hash) {
+	eval {
+	  $element->delete_Obj;
+	};
+      }
+    }
+  }
+  return(1);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,816 @@
+# $Id: Transcript.pm,v 1.21.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Transcript
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Transcript - Transcript class for LiveSeq
+
+=head1 SYNOPSIS
+
+  # documentation needed
+
+=head1 DESCRIPTION
+
+This stores informations about coding sequences (CDS).
+The implementation is that a Transcript object accesses a collection of
+Exon objects, inferring from them the nucleotide structure and sequence.
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Transcript;
+
+use strict;
+# use Carp qw(carp cluck);
+use Bio::LiveSeq::Exon; # uses Exon to create new exon in case of deletion
+use base qw(Bio::LiveSeq::SeqI);
+
+=head2 new
+
+  Title   : new
+  Usage   : $transcript = Bio::LiveSeq::Transcript->new(-exons => \@obj_refs);
+
+  Function: generates a new Bio::LiveSeq::Transcript
+  Returns : reference to a new object of class Transcript
+  Errorcode -1
+  Args    : reference to an array of Exon object references
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%transcript);
+
+  my @exons=@{$args{-exons}};
+
+  $obj = \%transcript;
+  $obj = bless $obj, $class;
+
+  unless (@exons) {
+    $obj->warn("$class not initialised because exons array empty");
+    return(-1);
+  }
+
+  # now useless, after start and end methods have been overridden here
+  my $firstexon = $exons[0];
+  #my $lastexon = $exons[-1];
+  #my $start = $firstexon->start;
+  #my $end = $lastexon->end;
+  my $strand = $firstexon->strand;
+  my $seq = $firstexon->{'seq'};
+  $obj->alphabet('rna');
+
+  unless (_checkexons(\@exons)) {
+    $obj->warn("$class not initialised because of problems in the exon structure");
+    return(-1);
+  }
+  $obj->{'strand'}=$strand;
+  $obj->{'exons'}=\@exons;
+  $obj->{'seq'}=$seq;
+
+  # set Transcript into each Exon
+  my $exon;
+  foreach $exon (@exons) {
+    $exon->{'transcript'}=$obj;
+  }
+  return $obj;
+}
+
+
+=head2 all_Exons
+
+ Title   : all_Exons
+ Usage   : $transcript_obj->all_Exons()
+ Function: returns references to all Exon objects the Transcript is composed of
+ Example : foreach $exon ($transcript->all_Exons()) { do_something }
+ Returns : array of object references
+ Args    : none
+
+=cut
+
+sub all_Exons {
+  my $self=shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  my @newexons;
+  my $exon;
+  foreach $exon (@exons) {
+    unless ($exon->obj_valid) {
+      $self->warn("$exon no more valid, start or end label lost, skipping....",1); # ignorable
+    } else {
+      push(@newexons,$exon);
+    }
+  }
+  if ($#exons != $#newexons) {
+    # update exons field
+    $self->{'exons'}=\@newexons;
+  }
+  return (@newexons);
+}
+
+=head2 downstream_seq
+
+ Title   : downstream_seq
+ Usage   : $transcript_obj->downstream_seq()
+         : $transcript_obj->downstream_seq(64)
+ Function: returns a string of nucleotides downstream of the end of the
+           CDS. If there is some information of the real mRNA, from features in
+           an attached Gene object, it will return up to those boundaries.
+           Otherwise it will return 1000 nucleotides.
+           If an argument is given it will override the default 1000 number
+           and return instead /that/ requested number of nucleotides.
+           But if a Gene object is attached, this argument will be ignored.
+ Returns : string
+ Args    : an optional integer number of nucleotides to be returned instead of
+           the default if no gene attached
+
+=cut
+
+sub downstream_seq {
+  my ($self,$howmany)=@_;
+  my $str;
+  if (defined ($howmany)) {
+    unless ($howmany > 0) {
+      $self->throw("No sense in asking less than 1 downstream nucleotides!");
+    }
+  } else {
+    unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve until the end
+      #$str=$DNAobj->labelsubseq($self->end,undef,undef,"unsecuremoderequested");
+      #return(substr($str,1)); # delete first nucleotide that is the last of Transcript
+      if ($self->gene) { # if there is Gene object attached fetch relevant info
+	$str=$self->{'seq'}->labelsubseq($self->end,undef,$self->gene->maxtranscript->end); # retrieve from end of this Transcript to end of the maxtranscript
+	$str=substr($str,1); # delete first nucleotide that is the last of Transcript
+	if (CORE::length($str) > 0) {
+	  return($str);
+	} else { # if there was no downstream through the gene's maxtranscript, go the usual way
+	  $howmany = 1000;
+	}
+      } else {
+	$howmany = 1000;
+      }
+    }
+  }
+  my @exons=$self->all_Exons;
+  my $strand=$self->strand();
+  my $lastexon=$exons[-1];
+  my $lastexonlength=$lastexon->length;
+  # $howmany nucs after end of last exon
+  #my $downstream_seq=$lastexon->subseq($lastexonlength+1,undef,$howmany);
+  my $downstream_seq;
+
+  if ($howmany) {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,$howmany,undef,"unsecuremoderequested"),1);
+  } else {
+    if ($strand == 1) {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->end,"unsecuremoderequested"),1);
+    } else {
+      $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->start,"unsecuremoderequested"),1);
+    }
+  }
+  return $downstream_seq;
+}
+
+=head2 upstream_seq
+
+ Title   : upstream_seq
+ Usage   : $transcript_obj->upstream_seq()
+         : $transcript_obj->upstream_seq(64)
+ Function: just like downstream_seq but returns nucleotides before the ATG
+ Note    : the default, if no Gene information present and no nucleotides
+           number given, is to return up to 400 nucleotides.
+
+=cut
+
+sub upstream_seq {
+  my ($self,$howmany)=@_;
+  if (defined ($howmany)) {
+    unless ($howmany > 0) {
+      $self->throw("No sense in asking less than 1 upstream nucleotides!");
+    }
+  } else {
+    unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve from the start
+      if ($self->gene) { # if there is Gene object attached fetch relevant info
+	my $str=$self->{'seq'}->labelsubseq($self->gene->maxtranscript->start,undef,$self->start); # retrieve from start of maxtranscript to start of this Transcript
+	chop $str; # delete last nucleotide that is the A of starting ATG
+	if (length($str) > 0) {
+	  return($str);
+	} else { # if there was no upstream through the gene's maxtranscript, go the usual way
+	  $howmany = 400;
+	}
+      } else {
+	$howmany = 400;
+      }
+    }
+  }
+  my @exons=$self->all_Exons;
+  my $firstexon=$exons[0];
+  
+  my $upstream_seq;
+  my $strand=$self->strand();
+
+  if ($howmany) {# $howmany nucs before begin of first exon
+    my $labelbefore=$firstexon->label(-$howmany,$firstexon->start);
+    if ($labelbefore < 1) {
+      if ($strand == 1) {
+	$labelbefore=$self->{'seq'}->start;
+      } else {
+	$labelbefore=$self->{'seq'}->end;
+      }
+    }
+    $upstream_seq=$firstexon->labelsubseq($labelbefore,undef,$firstexon->start,"unsecuremoderequested");
+    chop $upstream_seq;
+  } else {
+    if ($strand == 1) {
+      $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->start,undef,$self->start,"unsecuremoderequested");
+      chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
+    } else {
+      $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->end,undef,$self->start,"unsecuremoderequested");
+      chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
+    }
+  }
+  return $upstream_seq;
+}
+
+# These get redefined here, overriding the SeqI one because they draw their
+# information from the Exons a Transcript is built of
+# optional argument: firstlabel. If not given, it checks coordinate_start
+#                                This is useful when called by Translation
+#                                also used by _delete
+sub label {
+  my ($self,$position,$firstlabel)=@_;
+  unless ($position) {  # if position = 0 complain ?
+    $self->warn("Position not given or position 0");
+    return (-1);
+  }
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($label, at labels,$length,$arraypos);
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  }
+  my $coord_pos=$self->_inside_position($firstlabel);
+  $length=$self->length;
+  #if ($strand == 1) {
+    if ($position < 1) {
+      $position++; # to account for missing of 0 position
+    }
+    $arraypos=$position+$coord_pos-2;
+    #print "\n=-=-=-=-DEBUG: arraypos $arraypos, pos $position, coordpos: $coord_pos";
+    if ($arraypos < 0) {
+      $label=$self->{'seq'}->label($arraypos,$start,$strand); #?
+    } elsif ($arraypos >= $length) {
+      $label=$self->{'seq'}->label($arraypos-$length+2,$end,$strand); #?
+    } else { # inside the Transcript
+      @labels=$self->all_labels;
+      $label=$labels[$arraypos];
+    }
+  #}
+}
+
+# argument: label
+# returns: position of label according to coord_start
+# errorcode: 0 label not found
+# optional argument: firstlabel. If not given, it checks coordinate_start
+#                                This is useful when called by Translation
+sub position {
+  my ($self,$label,$firstlabel)=@_;
+  unless ($self->{'seq'}->valid($label)) {
+    $self->warn("label is not valid");
+    return (0);
+  }
+  unless (defined ($firstlabel)) {
+    $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  }
+  if ($label == $firstlabel) {
+    return (1);
+  }
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($position,$in_pos,$out_pos,$coord_pos);
+  my $length=$self->length;
+  $coord_pos=$self->_inside_position($firstlabel);
+  if ($self->valid($label)) { # if label is inside the Transcript
+    $in_pos=$self->_inside_position($label);
+    $position=$in_pos-$coord_pos+1;
+    if ($position <= 0) {
+      return ($position-1); # accounts for the missing of the 0 position
+    }
+  } else {
+    if ($self->follows($end,$label)) { # label after end of transcript
+      $out_pos=$self->{'seq'}->position($label,$end,$strand);
+      #print "\n+++++++++DEBUG label $label FOLLOWS end $end outpos $out_pos coordpos $coord_pos";
+      $position=$out_pos+$length-$coord_pos;
+    } elsif ($self->follows($label,$start)) { # label before begin of transcript
+      #print "\n+++++++++DEBUG label $label BEFORE start $start outpos $out_pos coordpos $coord_pos";
+      $out_pos=$self->{'seq'}->position($label,$start,$strand);
+      $position=$out_pos-$coord_pos+1;
+    } else { # label is in intron (not valid, not after, not before)!
+      $self->warn("Cannot give position of label pointing to intron according to CDS numbering!",1);
+      return (0);
+    }
+  }
+  return ($position);
+}
+
+sub seq {
+  my $self=shift;
+  my ($exon,$str);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    $str .= $exon->seq();
+  }
+  return $str;
+}
+
+sub length {
+  my $self=shift;
+  my ($exon,$length);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    $length += $exon->length();
+  }
+  return $length;
+}
+
+sub all_labels {
+  my $self=shift;
+  my ($exon, at labels);
+  my @exons=$self->all_Exons();
+  foreach $exon (@exons) {
+    push (@labels,$exon->all_labels());
+  }
+  return @labels;
+}
+
+# redefined here so that it will retrieve effective subseq without introns
+# otherwise it would have retrieved an underlying DNA (possibly with introns)
+# subsequence
+# Drawback: this is really bulky, label->position and then a call to
+# subseq that will do the opposite position-> label
+#
+# one day this can be rewritten as the main one so that the normal subseq
+# will rely on this one and hence avoid this double (useless and lengthy)
+# conversion between labels and positions
+sub old_labelsubseq {
+  my ($self,$start,$length,$end)=@_;
+  my ($pos1,$pos2);
+  if ($start) {
+    unless ($self->valid($start)) {
+      $self->warn("Start label not valid"); return (-1);
+    }
+    $pos1=$self->position($start);
+  }
+  if ($end) {
+    if ($end == $start) {
+      $length=1;
+    } else {
+      unless ($self->valid($end)) {
+	$self->warn("End label not valid"); return (-1);
+      }
+      unless ($self->follows($start,$end) == 1) {
+	$self->warn("End label does not follow Start label!"); return (-1);
+      }
+      $pos2=$self->position($end);
+      undef $length;
+    }
+  }
+  return ($self->subseq($pos1,$pos2,$length));
+}
+
+# rewritten, eventually
+
+sub labelsubseq {
+  my ($self,$start,$length,$end,$unsecuremode)=@_;
+  unless (defined $unsecuremode && 
+	  $unsecuremode eq "unsecuremoderequested") 
+  { # to skip security checks (faster)
+    if ($start) {
+      unless ($self->valid($start)) {
+	$self->warn("Start label not valid"); return (-1);
+      }
+    } else {
+      $start=$self->start;
+    }
+    if ($end) {
+      if ($end == $start) {
+	$length=1;
+	undef $end;
+      } else {
+	undef $length; # end argument overrides length argument
+	unless ($self->valid($end)) {
+	  $self->warn("End label not valid"); return (-1);
+	}
+	unless ($self->follows($start,$end) == 1) {
+	  $self->warn("End label does not follow Start label!"); return (-1);
+	}
+      }
+    } else {
+      $end=$self->end;
+    }
+  }
+  my ($seq,$exon,$startexon,$endexon); my @exonlabels;
+  my @exons=$self->all_Exons;
+  EXONCHECK:
+  foreach $exon (@exons) {
+    if ((!(defined($startexon)))&&($exon->valid($start))) { # checks only if not yet found
+      $startexon=$exon;
+    }
+    if ($exon->valid($end)) {
+      $endexon=$exon;
+    }
+    if ((!(defined($seq)) && (defined($startexon)))) { # initializes only once
+      if ((defined($endexon)) && ($endexon eq $startexon)) { # then perfect, we are finished
+	if ($length) {
+	  $seq = $startexon->labelsubseq($start,$length,undef,"unsecuremoderequested");
+
+
+	  last EXONCHECK;
+	} else {
+	  $seq = $startexon->labelsubseq($start,undef,$end,"unsecuremoderequested");
+	}
+	last EXONCHECK;
+      } else { # get up to the end of the exon
+	$seq = $startexon->labelsubseq($start,undef,undef,"unsecuremoderequested");
+      }
+    }
+    if (($startexon)&&($exon ne $startexon)) {
+      if (defined($endexon)) { # we arrived to the last exon
+	$seq .= $endexon->labelsubseq(undef,undef,$end,"unsecuremoderequested"); # get from the start of the exon
+	last EXONCHECK;
+
+      } elsif (defined($startexon)) { # we are in a whole-exon-in-the-middle case
+	  $seq .= $exon->seq; # we add it completely to the seq
+      } # else, we still have to reach the start point, exon useless, we move on
+      if ($length) { # if length argument specified
+	if (($seq && (CORE::length($seq) >= $length))) {
+	  last EXONCHECK;
+	}
+      }
+    }
+  }
+  if ($length) {
+    return (substr($seq,0,$length));
+  } else {
+    return ($seq);
+  }
+}
+
+
+# argument: label
+# returns: the objref and progressive number of the Exon containing that label
+# errorcode: -1
+sub in_which_Exon {
+  my ($self,$label)=@_;
+  my ($count,$exon);
+  my @exons=$self->all_Exons;
+  foreach $exon (@exons) {
+    $count++; # 1st exon is numbered "1"
+    if ($exon->valid($label)) {
+      return ($exon,$count)
+    }
+  }
+  return (-1); # if nothing found
+}
+
+# recoded to exploit the new fast labelsubseq()
+# valid only inside Transcript
+sub subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  my ($str,$startlabel,$endlabel);
+  if (defined ($pos1)) {
+    if ($pos1 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+    $startlabel=$self->label($pos1);
+    unless ($self->valid($startlabel)) {
+      $self->warn("Start label not valid"); return (-1);
+    }
+    if ($startlabel < 1) {
+      $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
+    }
+  } else {
+    $startlabel=$self->start;
+  }
+  if (defined ($pos2)) {
+    if ($pos2 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    }
+    undef $length;
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
+    }
+    $endlabel=$self->label($pos2);
+    unless ($self->valid($endlabel)) {
+      $self->warn("End label not valid"); return (-1);
+    }
+    if ($endlabel < 1) {
+      $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
+    }
+  } else {
+    unless (defined ($length)) {
+      $endlabel=$self->end;
+    }
+  }
+  return ($self->labelsubseq($startlabel,$length,$endlabel,"unsecuremoderequested"));
+}
+
+# works only inside the transcript, complains if asked outside
+sub old_subseq {
+  my ($self,$pos1,$pos2,$length) = @_;
+  my ($str,$startcount,$endcount,$seq,$seqlength);
+  if (defined ($length)) {
+    if ($length < 1) {
+      $self->warn("No sense asking for a subseq of length < 1");
+      return (-1);
+    }
+  }
+  my $firstlabel=$self->coordinate_start; # this is inside Transcript obj
+  my $coord_pos=$self->_inside_position($firstlabel); # TESTME old
+  $seq=$self->seq;
+  $seqlength=CORE::length($seq);
+  unless (defined ($pos1)) {
+    $startcount=1+$coord_pos-1; # i.e. coord_pos
+  } else {
+    if ($pos1 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    } elsif ($pos1 < 0) {
+      $pos1++;
+    }
+    if ((defined ($pos2))&&($pos1>$pos2)) {
+      $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
+      return (-1);
+    }
+    $startcount=$pos1+$coord_pos-1;
+  }
+  unless (defined ($pos2)) {
+  ;
+  } else {
+    if ($pos2 == 0) {  # if position = 0 complain
+      $self->warn("Position cannot be 0!"); return (-1);
+    } elsif ($pos2 < 0) {
+      $pos2++;
+    }
+    if ((defined ($pos1))&&($pos1>$pos2)) {
+      $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
+      return (-1);
+    }
+    $endcount=$pos2+$coord_pos-1;
+    if ($endcount > $seqlength) {
+      #print "\n###DEBUG###: pos1 $pos1 pos2 $pos2 coordpos $coord_pos endcount $endcount seqln $seqlength\n";
+      $self->warn("Cannot access end position after the end of Transcript");
+      return (-1);
+    }
+    $length=$endcount-$startcount+1;
+  }
+  #print "\n###DEBUG pos1 $pos1 pos2 $pos2 start $startcount end $endcount length $length coordpos $coord_pos\n";
+  my $offset=$startcount-1;
+  if ($offset < 0) {
+    $self->warn("Cannot access startposition before the beginning of Transcript, returning from start",1); # ignorable
+    return (substr($seq,0,$length));
+  } elsif ($offset >= $seqlength) {
+    $self->warn("Cannot access startposition after the end of Transcript");
+    return (-1);
+  } else {
+    $str=substr($seq,$offset,$length);
+    if (CORE::length($str) < $length) {
+      $self->warn("Attention, cannot return the length requested ".
+		  "for subseq",1) if $self->verbose > 0; # ignorable
+    }
+    return $str;
+  }
+}
+
+# redefined so that it doesn't require other methods (after deletions) to
+# reset it.
+sub start {
+  my $self = shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  return ($exons[0]->start);
+}
+
+sub end {
+  my $self = shift;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  return ($exons[-1]->end);
+}
+
+
+# internal methods begin here
+
+# returns: position of label in transcript's all_labels
+#          with STARTlabel == 1
+# errorcode 0 -> label not found
+# argument: label
+sub _inside_position {
+  my ($self,$label)=@_;
+  my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
+  my ($position,$checkme);
+  my @labels=$self->all_labels;
+  foreach $checkme (@labels) {
+    $position++;
+    if ($label == $checkme) {
+      return ($position);
+    }
+  }
+  return (0);
+}
+
+# returns 1 OK or 0 ERROR
+# arguments: reference to array of Exon object references
+sub _checkexons {
+  my ($exon,$thisstart);
+  my $self=$exon;
+  my $exonsref=$_[0];
+  my @exons=@{$exonsref};
+
+  my $firstexon = $exons[0];
+
+  unless (ref($firstexon) eq "Bio::LiveSeq::Exon") {
+    $self->warn("Object not of class Exon");
+    return (0);
+  }
+  my $strand = $firstexon->strand;
+
+  my $prevend = $firstexon->end;
+  shift @exons; # skip first one
+  foreach $exon (@exons) {
+    unless (ref($exon) eq "Bio::LiveSeq::Exon") { # object class check
+      $self->warn("Object not of class Exon");
+      return (0);
+    }
+    if ($exon->strand != $strand) { # strand consistency check
+      $self->warn("Exons' strands not consistent when trying to create Transcript");
+      return (0);
+    }
+    $thisstart = $exon->start;
+    unless ($exon->{'seq'}->follows($prevend,$thisstart,$strand)) {
+      $self->warn("Exons not in correct order when trying to create Transcript");
+      return (0);
+    }
+    $prevend = $exon->end;
+  }
+  return (1);
+}
+
+=head2 get_Translation
+
+  Title   : valid
+  Usage   : $translation = $obj->get_Translation()
+  Function: retrieves the reference to the object of class Translation (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Translation {
+  my $self=shift;
+  return ($self->{'translation'}); # this is set when Translation->new is called
+}
+
+# this checks so that deletion spanning multiple exons is
+# handled accordingly and correctly
+# arguments: begin and end label of a deletion
+# this is called BEFORE any deletion in the chain
+sub _deletecheck {
+  my ($self,$startlabel,$endlabel)=@_;
+  my $exonsref=$self->{'exons'};
+  my @exons=@{$exonsref};
+  my ($startexon,$endexon,$exon);
+  $startexon=$endexon=0;
+  foreach $exon (@exons) {
+    if (($startexon == 0)&&($exon->valid($startlabel))) {
+      $startexon=$exon; # exon containing start of deletion
+    }
+    if (($endexon == 0)&&($exon->valid($endlabel))) {
+      $endexon=$exon; # exon containing end of deletion
+    }
+    if (($startexon)&&($endexon)) {
+      last; # don't check further
+    }
+  }
+  my $nextend=$self->label(2,$endlabel); # retrieve the next label
+  my $prevstart=$self->label(-1,$startlabel); # retrieve the prev label
+
+  if ($startexon eq $endexon) { # intra-exon deletion
+    if (($startexon->start eq $startlabel) && ($startexon->end eq $endlabel)) {
+      # let's delete the entire exon
+      my @newexons;
+      foreach $exon (@exons) {
+	unless ($exon eq $startexon) {
+	  push(@newexons,$exon);
+	}
+      }
+      $self->{'exons'}=\@newexons;
+    } elsif ($startexon->start eq $startlabel) { # special cases
+      $startexon->{'start'}=$nextend; # set a new start of exon
+    } elsif ($startexon->end eq $endlabel) {
+      $startexon->{'end'}=$prevstart; # set a new end of exon
+    } else {
+      return; # no problem
+    }
+  } else { # two new exons to be created, inter-exons deletion
+    my @newexons;
+    my $exonobj;
+    my $dna=$self->{'seq'};
+    my $strand=$self->strand;
+    my $notmiddle=1; # flag for skipping exons in the middle of deletion
+    foreach $exon (@exons) {
+      if ($exon eq $startexon) {
+	$exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$exon->start,'-end'=>$prevstart,'-strand'=>$strand); # new partial exon
+	push(@newexons,$exonobj);
+	$notmiddle=0; # now we enter totally deleted exons
+      } elsif ($exon eq $endexon) {
+	$exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$nextend,'-end'=>$exon->end,'-strand'=>$strand); # new partial exon
+	push(@newexons,$exonobj);
+	$notmiddle=1; # exiting totally deleted exons
+      } else {
+	if ($notmiddle) { # if before or after exons with deletion
+	  push(@newexons,$exon); 
+	}# else skip them
+      }
+    }
+    $self->{'exons'}=\@newexons;
+  }
+}
+
+=head2 translation_table
+
+ Title   : translation_table
+ Usage   : $name = $obj->translation_table;
+         : $name = $obj->translation_table(11);
+ Function: Returns or sets the translation_table used for translating the
+           transcript.
+           If it has never been set, it will return undef.
+ Returns : an integer
+
+=cut
+
+sub translation_table {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'translation_table'} = $value;
+  }
+  unless (exists $self->{'translation_table'}) {
+    return;
+  } else {
+    return $self->{'translation_table'};
+  }
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $transcript->frame($label);
+ Function: Returns the frame of a particular nucleotide.
+           Frame can be 0 1 or 2 and means the position in the codon triplet
+           of the particulat nucleotide. 0 is the first codon_position.
+           Codon_position (1 2 3) is simply frame+1.
+           If the label asked for is not inside the Transcript, -1 will be
+           returned.
+ Args    : a label
+ Returns : 0 1 or 2
+ Errorcode -1
+
+=cut
+
+# args: label
+# returns: frame of nucleotide (0 1 2)
+# errorcode: -1
+sub frame {
+  my ($self,$inputlabel)=@_;
+  my @labels=$self->all_labels;
+  my ($label,$frame,$count);
+  foreach $label (@labels) {
+    if ($inputlabel == $label) {
+      return ($count % 3);
+    }
+    $count++; # 0 1 2 3 4....
+  }
+  return (-1); # label not found amid Transcript labels
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Translation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Translation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LiveSeq/Translation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,322 @@
+# $Id: Translation.pm,v 1.15.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# bioperl module for Bio::LiveSeq::Translation
+#
+# Cared for by Joseph Insana <insana at ebi.ac.uk> <jinsana at gmx.net>
+#
+# Copyright Joseph Insana
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LiveSeq::Translation - Translation class for LiveSeq
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+This stores informations about aminoacids translations of transcripts.
+The implementation is that a Translation object is the translation of
+a Transcript object, with different possibilities of manipulation,
+different coordinate system and eventually its own ranges (protein domains).
+
+=head1 AUTHOR - Joseph A.L. Insana
+
+Email:  Insana at ebi.ac.uk, jinsana at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LiveSeq::Translation;
+
+use strict;
+#use Carp qw(croak carp cluck);
+use Bio::LiveSeq::SeqI; # uses SeqI, inherits from it
+use Bio::PrimarySeq;
+use base qw(Bio::LiveSeq::Transcript);
+
+
+=head2 new
+
+  Title   : new
+  Usage   : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
+
+  Function: generates a new Bio::LiveSeq::Translation
+  Returns : reference to a new object of class Translation
+  Errorcode -1
+  Args    : reference to an object of class Transcript
+
+=cut
+
+sub new {
+  my ($thing, %args) = @_;
+  my $class = ref($thing) || $thing;
+  my ($obj,%translation);
+
+  my $transcript=$args{-transcript};
+
+  $obj = \%translation;
+  $obj = bless $obj, $class;
+
+  unless ($transcript) {
+    $obj->throw("$class not initialised because no -transcript given");
+  }
+  unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
+    $obj->throw("$class not initialised because no object of class Transcript given");
+  }
+
+  #my $startbase = $transcript->start;
+  #my $endbase = $transcript->end;
+  my $strand = $transcript->strand;
+  my $seq = $transcript->{'seq'};
+
+  $obj->{'strand'}=$strand;
+  $obj->{'seq'}=$seq;
+  $obj->{'transcript'}=$transcript;
+  $obj->{'alphabet'}="protein";
+
+  $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
+  return $obj;
+}
+
+=head2 get_Transcript
+
+  Title   : valid
+  Usage   : $transcript = $obj->get_Transcript()
+  Function: retrieves the reference to the object of class Transcript (if any)
+            attached to a LiveSeq object
+  Returns : object reference
+  Args    : none
+
+=cut
+
+sub get_Transcript {
+  my $self=shift;
+  return ($self->{'transcript'});
+}
+
+# These get redefined here, overriding the SeqI ones
+
+sub change {
+  my ($self)=@_;
+  $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
+  return (-1);
+}
+sub positionchange {
+  my ($self)=@_;
+  $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
+  return (-1);
+}
+sub labelchange {
+  my ($self)=@_;
+  $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
+  return (-1);
+}
+
+# this just returns the translation of the transcript, without checking for
+# stop codons
+sub transl_seq {
+  my $self=shift;
+  my $transcript=$self->get_Transcript;
+  my $translation=$transcript->translate(undef, undef, undef, 
+					 $self->translation_table)->seq;
+  return $translation;
+}
+
+# version 1.74 -> now the "*" is printed
+sub seq {
+  my $self=shift;
+  my $proteinseq;
+  my $transcript=$self->get_Transcript;
+  my $translation=$transcript->translate(undef, undef, undef, 
+					 $self->translation_table)->seq;
+  my $stop_pos=index($translation,"*");
+  if ($stop_pos == -1) { # no stop present, continue downstream
+    my $downstreamseq=$transcript->downstream_seq();
+    #carp "the downstream is: $downstreamseq"; # debug
+    my $cdnaseq=$transcript->seq();
+    my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq",
+					  -alphabet => 'dna'
+					  );
+
+    $translation=$extendedseq->translate(undef, undef, undef, 
+					 $self->translation_table)->seq;
+    #carp "the new translation is: $translation"; # debug
+    $stop_pos=index($translation,"*");
+    if ($stop_pos == -1) { # still no stop present, return warning
+      $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
+      undef $stop_pos;
+      $proteinseq=$translation;
+    } else {
+      $proteinseq=substr($translation,0,$stop_pos+1);
+      #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
+    }
+  } else {
+    $proteinseq=substr($translation,0,$stop_pos+1);
+  }
+  return $proteinseq;
+}
+
+sub length {
+  my $self=shift;
+  my $seq=$self->seq;
+  my $length=length($seq);
+  return $length;
+}
+
+sub all_labels {
+  my $self=shift;
+  return $self->get_Transcript->all_labels;
+}
+
+# counts in triplet. Only a label matching the beginning of a triplet coding
+# for an aminoacid is considered valid when setting coordinate_start
+# (i.e. only in frame!)
+sub valid {
+  my ($self,$label)=@_;
+  my $i;
+  my @labels=$self->get_Transcript->all_labels;
+  my $length=$#labels;
+  while ($i <= $length) {
+    if ($label == $labels[$i]) {
+      return (1); # found
+    }
+    $i=$i+3;
+  }
+  return (0); # not found
+}
+
+# returns the label to the first nucleotide of the triplet coding for $position aminoacid
+sub label {
+  my ($self,$position)=@_;
+  my $firstlabel=$self->coordinate_start; # this is in_frame checked
+  if ($position > 0) {
+    $position=$position*3-2;
+  } else { # if position = 0 this will be caught by Transcript, error thrown
+    $position=$position*3;
+  }
+  return $self->get_Transcript->label($position,$firstlabel);
+  # check for coord_start different
+}
+
+# returns position (aminoacids numbering) of a particular label
+# used to return 0 for not in frame labels
+# now returns the position anyway (after version 1.66)
+sub position {
+  my ($self,$label)=@_;
+  my $firstlabel=$self->coordinate_start; # this is in_frame checked
+  my $position=$self->get_Transcript->position($label,$firstlabel);
+  use integer;
+  my $modulus=$position % 3;
+  if ($position == 0) {
+    return (0);
+  } elsif ($position > 0) {
+    if ($modulus != 1) {
+      $self->warn("Attention! Label $label is not in frame ". 
+		  "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
+      if ($modulus == 2) {
+	return ($position / 3 + 1);
+      } else { # i.e. modulus == 0
+	return ($position / 3);
+      }
+    }
+    return ($position / 3 + 1);
+  } else { # pos < 0
+    if ($modulus != 0) {
+      $self->warn("Attention! Label $label is not in frame ".
+		  "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
+      return ($position / 3 - 1); # ok for both other positions
+    }
+    return ($position / 3);
+  }
+  $self->throw( "WEIRD: execution shouldn't have reached here");
+  return (0); # this should never happen, but just in case
+}
+
+# note: it inherits subseq and labelsubseq from Transcript!
+
+sub start {
+  my $self=shift;
+  return ($self->{'transcript'}->start);
+}
+
+sub end {
+  my $self=shift;
+  return ($self->{'transcript'}->end);
+}
+
+=head2 aa_ranges
+
+  Title   : aa_ranges
+  Usage   : @proteinfeatures = $translation->aa_ranges()
+  Function: to retrieve all the LiveSeq AARange objects attached to a
+            Translation, usually created out of a SwissProt database entry
+            crossreferenced from an EMBL CDS feature.
+  Returns : an array
+  Args    : none
+
+=cut
+
+# returns an array of obj_ref of AARange objects attached to the Translation
+sub aa_ranges {
+  my $self=shift;
+  return ($self->{'aa_ranges'});
+}
+
+sub translation_table {
+  my $self=shift;
+  $self->get_Transcript->translation_table(@_);
+}
+
+# returns all aminoacids "affected" i.e. all aminoacids coded by any codon
+# "touched" by the range selected between the labels, even if only partially.
+
+# it's not optimized for performance but it's useful
+
+sub labelsubseq {
+  my ($self,$start,$length,$end)=@_;
+  my ($pos1,$pos2);
+  my $transcript=$self->get_Transcript;
+  if ($start) {
+    unless ($transcript->valid($start)) {
+      $self->warn("Start label not valid"); return (-1);
+    }
+    $pos1=$self->position($start);
+  }
+  if ($end) {
+    if ($end == $start) {
+      $length=1;
+    } else {
+      unless ($transcript->valid($end)) {
+	$self->warn("End label not valid"); return (-1);
+      }
+      unless ($transcript->follows($start,$end) == 1) {
+	$self->warn("End label does not follow Start label!"); return (-1);
+      }
+      $pos2=$self->position($end);
+      $length=$pos2-$pos1+1;
+    }
+  }
+  my $sequence=$self->seq;
+  return (substr($sequence,$pos1-1,$length));
+}
+
+# return the offset in aminoacids from LiveSeq protein sequence and SwissProt
+# sequence (usually as a result of an INIT_MET or a gap)
+sub offset {
+  my $self=shift;
+  return ($self->{'offset'});
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LocatableSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LocatableSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LocatableSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,443 @@
+# $Id: LocatableSeq.pm,v 1.39.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::LocatableSeq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LocatableSeq - A Sequence object with start/end points on it
+that can be projected into a MSA or have coordinates relative to
+another seq.
+
+=head1 SYNOPSIS
+
+
+    use Bio::LocatableSeq;
+    my $seq = new Bio::LocatableSeq(-seq => "CAGT-GGT",
+				    -id  => "seq1",
+				    -start => 1,
+				    -end   => 7);
+
+
+=head1 DESCRIPTION
+
+    # a normal sequence object
+    $locseq->seq();
+    $locseq->id();
+
+    # has start,end points
+    $locseq->start();
+    $locseq->end();
+
+    # inherits off RangeI, so range operations possible
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+The locatable sequence object was developed mainly because the
+SimpleAlign object requires this functionality, and in the rewrite
+of the Sequence object we had to decide what to do with this.
+
+It is, to be honest, not well integrated with the rest of bioperl, for
+example, the trunc() function does not return a LocatableSeq object,
+as some might have thought. There are all sorts of nasty gotcha's
+about interactions between coordinate systems when these sort of
+objects are used.
+
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::LocatableSeq;
+use strict;
+
+use Bio::Location::Simple;
+use Bio::Location::Fuzzy;
+
+
+use base qw(Bio::PrimarySeq Bio::RangeI);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($start,$end,$strand) =
+	$self->_rearrange( [qw(START END STRAND)],
+			   @args);
+
+    defined $start && $self->start($start);
+    defined $end   && $self->end($end);
+    defined $strand && $self->strand($strand);
+
+    return $self; # success - we hope!
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : $obj->start($newval)
+ Function:
+ Returns : value of start
+ Args    : newvalue (optional)
+
+=cut
+
+sub start{
+   my $self = shift;
+   if( @_ ) {
+      my $value = shift;
+      $self->{'start'} = $value;
+  }
+   return $self->{'start'} if defined $self->{'start'};
+   return 1                if $self->seq;
+   return;
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $obj->end($newval)
+ Function:
+ Returns : value of end
+ Args    : newvalue (optional)
+
+=cut
+
+sub end {
+   my $self = shift;
+   if( @_ ) {
+      my $value = shift;
+      my $string = $self->seq;
+      if ($self->seq) {
+          my $len = $self->_ungapped_len;
+	  my $id = $self->id;
+	  $self->warn("In sequence $id residue count gives end value $len.
+Overriding value [$value] with value $len for Bio::LocatableSeq::end().")
+	      and $value = $len if $len != $value and $self->verbose > 0;
+      }
+
+      $self->{'end'} = $value;
+    }
+
+   return $self->{'end'} || $self->_ungapped_len;
+}
+
+sub _ungapped_len {
+    my $self = shift;
+    my $string = $self->seq || '';
+    $string =~ s/[\.\-]+//g;
+    $self->seq ? (return $self->start + CORE::length($string) - 1 ) : undef;
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $obj->strand($newval)
+ Function:
+ Returns : value of strand
+ Args    : newvalue (optional)
+
+=cut
+
+sub strand{
+   my $self = shift;
+   if( @_ ) {
+      my $value = shift;
+      $self->{'strand'} = $value;
+    }
+    return $self->{'strand'};
+}
+
+=head2 get_nse
+
+ Title   : get_nse
+ Usage   :
+ Function: read-only name of form id/start-end
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub get_nse{
+   my ($self,$char1,$char2) = @_;
+
+   $char1 ||= "/";
+   $char2 ||= "-";
+
+   $self->throw("Attribute id not set") unless defined($self->id());
+   $self->throw("Attribute start not set") unless defined($self->start());
+   $self->throw("Attribute end not set") unless defined($self->end());
+
+   return $self->id() . $char1 . $self->start . $char2 . $self->end ;
+
+}
+
+
+=head2 no_gap
+
+ Title   : no_gaps
+ Usage   :$self->no_gaps('.')
+ Function:
+
+           Gets number of gaps in the sequence. The count excludes
+           leading or trailing gap characters.
+
+           Valid bioperl sequence characters are [A-Za-z\-\.\*]. Of
+           these, '.' and '-' are counted as gap characters unless an
+           optional argument specifies one of them.
+
+ Returns : number of internal gaps in the sequnce.
+ Args    : a gap character (optional)
+
+=cut
+
+sub no_gaps {
+    my ($self,$char) = @_;
+    my ($seq, $count) = (undef, 0);
+
+    # default gap characters
+    $char ||= '-.';
+
+    $self->warn("I hope you know what you are doing setting gap to [$char]")
+	unless $char =~ /[-.]/;
+
+    $seq = $self->seq;
+    return 0 unless $seq; # empty sequence does not have gaps
+
+    $seq =~ s/^([$char]+)//;
+    $seq =~ s/([$char]+)$//;
+    $count++ while $seq =~ /[$char]+/g;
+
+    return $count;
+
+}
+
+
+=head2 column_from_residue_number
+
+ Title   : column_from_residue_number
+ Usage   : $col = $seq->column_from_residue_number($resnumber)
+ Function:
+
+           This function gives the position in the alignment
+           (i.e. column number) of the given residue number in the
+           sequence. For example, for the sequence
+
+  	     Seq1/91-97 AC..DEF.GH
+
+           column_from_residue_number(94) returns 5.
+
+           An exception is thrown if the residue number would lie
+           outside the length of the aligment
+           (e.g. column_from_residue_number( "Seq2", 22 )
+
+ Returns : A column number for the position of the
+           given residue in the given sequence (1 = first column)
+ Args    : A residue number in the whole sequence (not just that
+           segment of it in the alignment)
+
+=cut
+
+sub column_from_residue_number {
+    my ($self, $resnumber) = @_;
+
+    $self->throw("Residue number has to be a positive integer, not [$resnumber]")
+	unless $resnumber =~ /^\d+$/ and $resnumber > 0;
+
+    if ($resnumber >= $self->start() and $resnumber <= $self->end()) {
+	my @residues = split //, $self->seq;
+	my $count = $self->start();
+	my $i;
+	my ($start,$end,$inc,$test);
+        my $strand = $self->strand || 0;
+	# the following bit of "magic" allows the main loop logic to be the
+	# same regardless of the strand of the sequence
+	($start,$end,$inc,$test)= ($strand == -1)?
+            (scalar(@residues-1),0,-1,sub{$i >= $end}) :
+                (0,scalar(@residues-1),1,sub{$i <= $end});
+
+	for ($i=$start; $test->(); $i+= $inc) {
+	    if ($residues[$i] ne '.' and $residues[$i] ne '-') {
+		$count == $resnumber and last;
+		$count++;
+	    }
+	}
+	# $i now holds the index of the column.
+        # The actual column number is this index + 1
+
+	return $i+1;
+    }
+
+    $self->throw("Could not find residue number $resnumber");
+
+}
+
+
+=head2 location_from_column
+
+ Title   : location_from_column
+ Usage   : $loc = $ali->location_from_column($column_number)
+ Function:
+
+           This function gives the residue number for a given position
+           in the alignment (i.e. column number) of the given. Gaps
+           complicate this process and force the output to be a
+           L<Bio::Range> where values can be undefined. For example,
+           for the sequence:
+
+  	     Seq/91-97 .AC..DEF.G.
+
+           location_from_column( 3 ) position 93
+           location_from_column( 2 ) position 92^93
+           location_from_column(10 ) position 97^98
+           location_from_column( 1 ) position undef
+
+           An exact position returns a Bio::Location::Simple object
+           where where location_type() returns 'EXACT', if a position
+           is between bases location_type() returns 'IN-BETWEEN'.
+           Column before the first residue returns undef. Note that if
+           the position is after the last residue in the alignment,
+           that there is no guarantee that the original sequence has
+           residues after that position.
+
+           An exception is thrown if the column number is not within
+           the sequence.
+
+ Returns : Bio::Location::Simple or undef
+ Args    : A column number
+ Throws  : If column is not within the sequence
+
+See L<Bio::Location::Simple> for more.
+
+=cut
+
+sub location_from_column {
+    my ($self, $column) = @_;
+
+    $self->throw("Column number has to be a positive integer, not [$column]")
+	unless $column =~ /^\d+$/ and $column > 0;
+    $self->throw("Column number [$column] is larger than".
+		 " sequence length [". $self->length. "]")
+	unless $column <= $self->length;
+
+    my ($loc);
+    my $s = $self->subseq(1,$column);
+    $s =~ s/[^a-zA-Z\*]//g;
+
+    my $pos = CORE::length $s;
+
+    my $start = $self->start || 0 ;
+    my $strand = $self->strand() || 1;
+    my $relative_pos = ($strand == -1)
+        ? ($self->end - $pos + 1)
+	: ($pos + $start - 1);
+    if ($self->subseq($column, $column) =~ /[a-zA-Z\*]/ ) {
+	$loc = new Bio::Location::Simple
+	    (-start  => $relative_pos,
+	     -end    => $relative_pos,
+	     -strand => 1,
+	     );
+    } elsif ($pos == 0 and $self->start == 1) {
+    } else {
+      my ($start,$end) = ($relative_pos, $relative_pos + $strand);
+      if ($strand == -1) {
+	($start,$end) = ($end,$start);
+      }
+	$loc = new Bio::Location::Simple
+	    (-start         => $start,
+	     -end           => $end,
+	     -strand        => 1,
+	     -location_type => 'IN-BETWEEN'
+	     );
+    }
+    return $loc;
+}
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::LocatableSeq object which
+           has the reversed complement of the sequence. For protein
+           sequences this throws an exception of "Sequence is a
+           protein. Cannot revcom"
+
+ Returns : A new Bio::LocatableSeq object
+ Args    : none
+
+=cut
+
+sub revcom {
+    my ($self) = @_;
+
+    my $new = $self->SUPER::revcom;
+    $new->strand($self->strand * -1);
+    $new->start($self->start) if $self->start;
+    $new->end($self->end) if $self->end;
+    return $new;
+}
+
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+
+ Example :
+ Returns : a fresh Bio::PrimarySeqI implementing object
+ Args    : Two integers denoting first and last columns of the
+           sequence to be included into sub-sequence.
+
+
+=cut
+
+sub trunc {
+    my ($self, $start, $end) = @_;
+    my $new = $self->SUPER::trunc($start, $end);
+    $new->strand($self->strand);
+
+    # end will be automatically calculated
+    $start = $end if $self->strand == -1;
+
+    $start = $self->location_from_column($start);
+    $start ? ($start = $start->end) : ($start = 1);
+    $new->start($start) if $start;
+
+    return $new;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/Atomic.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/Atomic.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/Atomic.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,516 @@
+# $Id: Atomic.pm,v 1.16.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::Atomic
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::Atomic - Implementation of a Atomic Location on a Sequence
+
+=head1 SYNOPSIS
+
+    use Bio::Location::Atomic;
+
+    my $location = new Bio::Location::Atomic(-start => 1, -end => 100,
+					     -strand => 1 );
+
+    if( $location->strand == -1 ) {
+	printf "complement(%d..%d)\n", $location->start, $location->end;
+    } else {
+	printf "%d..%d\n", $location->start, $location->end;
+    }
+
+=head1 DESCRIPTION
+
+This is an implementation of Bio::LocationI to manage simple location
+information on a Sequence.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::Atomic;
+use strict;
+
+use Bio::Location::WidestCoordPolicy;
+
+use base qw(Bio::Root::Root Bio::LocationI);
+
+our $coord_policy = Bio::Location::WidestCoordPolicy->new();
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = {};
+    # This is for the case when we've done something like this
+    # get a 2 features from somewhere (like Bio::Tools::GFF)
+    # Do
+    # my $location = $f1->location->union($f2->location);
+    # We get an error without the following code which 
+    # explictly loads the Bio::Location::Simple class
+    eval {
+	($class) = ref($class) if ref($class);
+	Bio::Root::Root->_load_module($class);
+      };
+    if ( $@ ) {
+	Bio::Root::Root->throw("$class cannot be found\nException $@");
+      }
+    bless $self,$class;
+
+    my ($v,$start,$end,$strand,$seqid) = $self->_rearrange([qw(VERBOSE
+							       START
+							       END
+							       STRAND
+							       SEQ_ID)], at args);
+    defined $v && $self->verbose($v);
+    defined $strand && $self->strand($strand);
+
+    defined $start  && $self->start($start);
+    defined $end    && $self->end($end);
+    if( defined $self->start && defined $self->end &&
+	$self->start > $self->end && $self->strand != -1 ) {
+	$self->warn("When building a location, start ($start) is expected to be less than end ($end), ".
+		    "however it was not. Switching start and end and setting strand to -1");
+
+	$self->strand(-1);
+	my $e = $self->end;
+	my $s = $self->start;
+	$self->start($e);
+	$self->end($s);
+    }
+    $seqid          && $self->seq_id($seqid);
+
+    return $self;
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $loc->start();
+  Function: get/set the start of this range
+  Returns : the start of this range
+  Args    : optionaly allows the start to be set
+          : using $loc->start($start)
+
+=cut
+
+sub start {
+  my ($self, $value) = @_;
+  $self->min_start($value) if( defined $value );
+  return $self->SUPER::start();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $loc->end();
+  Function: get/set the end of this range
+  Returns : the end of this range
+  Args    : optionaly allows the end to be set
+          : using $loc->end($start)
+
+=cut
+
+sub end {
+  my ($self, $value) = @_;
+
+  $self->min_end($value) if( defined $value );
+  return $self->SUPER::end();
+}
+
+=head2 strand
+
+  Title   : strand
+  Usage   : $strand = $loc->strand();
+  Function: get/set the strand of this range
+  Returns : the strandidness (-1, 0, +1)
+  Args    : optionaly allows the strand to be set
+          : using $loc->strand($strand)
+
+=cut
+
+sub strand {
+  my $self = shift;
+
+  if ( @_ ) {
+       my $value = shift;
+       if ( defined($value) ) {
+	   if ( $value eq '+' ) { $value = 1; }
+	   elsif ( $value eq '-' ) { $value = -1; }
+	   elsif ( $value eq '.' ) { $value = 0; }
+	   elsif ( $value != -1 && $value != 1 && $value != 0 ) {
+	       $self->throw("$value is not a valid strand info");
+	   }
+           $self->{'_strand'} = $value;
+       }
+  }
+  # do not pretend the strand has been set if in fact it wasn't
+  return $self->{'_strand'};
+  #return $self->{'_strand'} || 0;
+}
+
+=head2 flip_strand
+
+  Title   : flip_strand
+  Usage   : $location->flip_strand();
+  Function: Flip-flop a strand to the opposite
+  Returns : None
+  Args    : None
+
+=cut
+
+
+sub flip_strand {
+    my $self= shift;
+    $self->strand($self->strand * -1);
+}
+
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id (a string)
+  Args    : [optional] seq_id value to set
+
+=cut
+
+
+sub seq_id {
+    my ($self, $seqid) = @_;
+    if( defined $seqid ) {
+	$self->{'_seqid'} = $seqid;
+    }
+    return $self->{'_seqid'};
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $loc->length();
+ Function: get the length in the coordinate space this location spans
+ Example :
+ Returns : an integer
+ Args    : none
+
+
+=cut
+
+sub length {
+   my ($self) = @_;
+   return abs($self->end() - $self->start()) + 1;
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : my $minstart = $location->min_start();
+  Function: Get minimum starting location of feature startpoint   
+  Returns : integer or undef if no minimum starting point.
+  Args    : none
+
+=cut
+
+sub min_start {
+    my ($self,$value) = @_;
+
+    if(defined($value)) {
+	$self->{'_start'} = $value;
+    }
+    return $self->{'_start'};
+}
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint.
+
+            In this implementation this is exactly the same as min_start().
+
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+sub max_start {
+    my ($self, at args) = @_;
+    return $self->min_start(@args);
+}
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^).
+
+            In this implementation this will always be 'EXACT'.
+
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub start_pos_type {
+    my($self) = @_;
+    return 'EXACT';
+}
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+sub min_end {
+    my($self,$value) = @_;
+
+    if(defined($value)) {
+	$self->{'_end'} = $value;
+    }
+    return $self->{'_end'};
+}
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+
+            In this implementation this is exactly the same as min_end().
+
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+sub max_end {
+    my($self, at args) = @_;
+    return $self->min_end(@args);
+}
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+
+            In this implementation this will always be 'EXACT'.
+
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub end_pos_type {
+    my($self) = @_;
+    return 'EXACT';
+}
+
+=head2 location_type
+
+  Title   : location_type
+  Usage   : my $location_type = $location->location_type();
+  Function: Get location type encoded as text
+  Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN')
+  Args    : none
+
+=cut
+
+sub location_type {
+    my ($self) = @_;
+    return 'EXACT';
+}
+
+=head2 is_remote
+
+ Title   : is_remote
+ Usage   : $self->is_remote($newval)
+ Function: Getset for is_remote value
+ Returns : value of is_remote
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub is_remote {
+   my $self = shift;
+   if( @_ ) {
+       my $value = shift;
+       $self->{'is_remote'} = $value;
+   }
+   return $self->{'is_remote'};
+
+}
+
+=head2 each_Location
+
+ Title   : each_Location
+ Usage   : @locations = $locObject->each_Location($order);
+ Function: Conserved function call across Location:: modules - will
+           return an array containing the component Location(s) in
+           that object, regardless if the calling object is itself a
+           single location or one containing sublocations.
+ Returns : an array of Bio::LocationI implementing objects - for
+           Simple locations, the return value is just itself.
+ Args    : 
+
+=cut
+
+sub each_Location {
+   my ($self) = @_;
+   return ($self);
+}
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+sub to_FTstring { 
+    my($self) = @_;
+    if( $self->start == $self->end ) {
+	return $self->start;
+    }
+    my $str = $self->start . ".." . $self->end;
+    if( $self->strand == -1 ) {
+	$str = sprintf("complement(%s)", $str);
+    }
+    return $str;
+}
+
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See L<Bio::Location::CoordinatePolicyI> for documentation
+            about the policy object and its use.
+
+            The interface *does not* require implementing classes to
+            accept setting of a different policy. The implementation
+            provided here does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize
+            every new instance with a
+            L<Bio::Location::CoordinatePolicyI> object. The
+            implementation provided here will return a default policy
+            object if none has been set yet. To change this default
+            policy object call this method as a class method with an
+            appropriate argument. Note that in this case only
+            subsequently created Location objects will be affected.
+
+  Returns : A L<Bio::Location::CoordinatePolicyI> implementing object.
+  Args    : On set, a L<Bio::Location::CoordinatePolicyI> implementing object.
+
+See L<Bio::Location::CoordinatePolicyI> for more information
+
+
+=cut
+
+sub coordinate_policy {
+    my ($self, $policy) = @_;
+
+    if(defined($policy)) {
+	if(! $policy->isa('Bio::Location::CoordinatePolicyI')) {
+	    $self->throw("Object of class ".ref($policy)." does not implement".
+			 " Bio::Location::CoordinatePolicyI");
+	}
+	if(ref($self)) {
+	    $self->{'_coordpolicy'} = $policy;
+	} else {
+	    # called as class method
+	    $coord_policy = $policy;
+	}
+    }
+    return (ref($self) && exists($self->{'_coordpolicy'}) ?
+	    $self->{'_coordpolicy'} : $coord_policy);
+}
+
+
+# comments, not function added by jason 
+#
+# trunc is untested, and as of now unannounced method for truncating a
+# location.  This is to eventually be part of the procedure to
+# truncate a sequence with annotatioin and properly remap the location
+# of all the features contained within the truncated segment.
+
+# presumably this might do things a little differently for the case 
+# where the truncation splits the location in half
+# 
+# in short- you probably don't want to use  this method.
+
+sub trunc {
+  my ($self,$start,$end,$relative_ori) = @_;
+
+  my $newstart  = $self->start - $start+1;
+  my $newend    = $self->end   - $start+1;
+  my $newstrand = $relative_ori * $self->strand;
+
+  my $out;
+  if( $newstart < 1 || $newend > ($end-$start+1) ) {
+    $out = Bio::Location::Atomic->new();
+    $out->start($self->start);
+    $out->end($self->end);
+    $out->strand($self->strand);
+    $out->seq_id($self->seqid);
+    $out->is_remote(1);
+  } else {
+    $out = Bio::Location::Atomic->new();
+    $out->start($newstart);
+    $out->end($newend);
+    $out->strand($newstrand);
+    $out->seq_id();
+  }
+
+  return $out;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/AvWithinCoordPolicy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/AvWithinCoordPolicy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/AvWithinCoordPolicy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+# $Id: AvWithinCoordPolicy.pm,v 1.8.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::AvWithinCoordPolicy
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#          and Jason Stajich <jason at bioperl.org>
+#
+# Copyright Hilmar Lapp, Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::AvWithinCoordPolicy - class implementing 
+Bio::Location::CoordinatePolicy as the average for WITHIN and the widest possible and reasonable range otherwise
+
+=head1 SYNOPSIS
+
+See Bio::Location::CoordinatePolicyI
+
+=head1 DESCRIPTION
+
+CoordinatePolicyI implementing objects are used by Bio::LocationI
+implementing objects to determine integer-valued coordinates when
+asked for it.
+
+This class will compute the coordinates such that for fuzzy locations
+of type WITHIN and BETWEEN the average of the two limits will be
+returned, and for all other locations it will return the widest
+possible range, but by using some common sense. This means that
+e.g. locations like "E<lt>5..100" (start before position 5) will return 5
+as start (returned values have to be positive integers).
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email E<lt>hlapp-at-gmx-dot-netE<gt>, E<lt>jason-at-bioperl-dot-orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::AvWithinCoordPolicy;
+use strict;
+
+
+use base qw(Bio::Location::WidestCoordPolicy);
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    return $self;
+}
+
+
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $policy->start($location);
+  Function: Get the integer-valued start coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub start {
+    my ($self,$loc) = @_;
+
+    if(($loc->start_pos_type() eq 'WITHIN') ||
+       ($loc->start_pos_type() eq 'BETWEEN')) {
+	my ($min, $max) = ($loc->min_start(), $loc->max_start());
+	return int(($min+$max)/2) if($min && $max);
+    }
+    return $self->SUPER::start($loc);
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $policy->end($location);
+  Function: Get the integer-valued end coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub end {
+    my ($self,$loc) = @_;
+
+    if(($loc->end_pos_type() eq 'WITHIN') ||
+       ($loc->end_pos_type() eq 'BETWEEN')) {
+	my ($min, $max) = ($loc->min_end(), $loc->max_end());
+	return int(($min+$max)/2) if($min && $max);
+    }
+    return $self->SUPER::end($loc);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/CoordinatePolicyI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/CoordinatePolicyI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/CoordinatePolicyI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+# $Id: CoordinatePolicyI.pm,v 1.8.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::CoordinatePolicyI
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#          and Jason Stajich <jason at bioperl.org>
+#
+# Copyright Hilmar Lapp, Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::CoordinatePolicyI - Abstract interface for objects implementing
+a certain policy of computing integer-valued coordinates of a Location
+
+=head1 SYNOPSIS
+
+    # get a location, e.g., from a SeqFeature
+    $location = $feature->location();
+    # examine its coordinate computation policy
+    print "Location of feature ", $feature->primary_tag(), " employs a ",
+          ref($location->coordinate_policy()), 
+          " instance for coordinate computation\n";
+    # change the policy, e.g. because the user chose to do so
+    $location->coordinate_policy(Bio::Location::NarrowestCoordPolicy->new());
+
+=head1 DESCRIPTION
+
+Objects implementing this interface are used by Bio::LocationI
+implementing objects to determine integer-valued coordinates when
+asked for it. While this may seem trivial for simple locations, there
+are different ways to do it for fuzzy or compound (split)
+locations. Classes implementing this interface implement a certain
+policy, like 'always widest range', 'always smallest range', 'mean for
+BETWEEN locations', etc. By installing a different policy object in a
+Location object, the behaviour of coordinate computation can be changed
+on-the-fly, and with a single line of code client-side.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email hlapp at gmx.net, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::CoordinatePolicyI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $policy->start($location);
+  Function: Get the integer-valued start coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub start {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $policy->end($location);
+  Function: Get the integer-valued end coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub end {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/Fuzzy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/Fuzzy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/Fuzzy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,582 @@
+# $Id: Fuzzy.pm,v 1.33.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::Fuzzy
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::Fuzzy - Implementation of a Location on a Sequence
+which has unclear start and/or end locations
+
+=head1 SYNOPSIS
+
+    use Bio::Location::Fuzzy;
+    my $fuzzylocation = new Bio::Location::Fuzzy(
+                                                 -start => '<30',
+                                                 -end   => 90,
+                                                 -location_type => '..');
+
+    print "location string is ", $fuzzylocation->to_FTstring(), "\n";
+    print "location is of the type ", $fuzzylocation->location_type, "\n";
+
+=head1 DESCRIPTION
+
+This module contains the necessary methods for representing a
+Fuzzy Location, one that does not have clear start and/or end points.
+This will initially serve to handle features from Genbank/EMBL feature
+tables that are written as 1^100 meaning between bases 1 and 100 or
+E<lt>100..300 meaning it starts somewhere before 100.  Advanced
+implementations of this interface may be able to handle the necessary
+logic of overlaps/intersection/contains/union.  It was constructed to
+handle fuzzy locations that can be represented in Genbank/EMBL and
+Swissprot.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Location::Fuzzy;
+use strict;
+
+use base qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI);
+
+our @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN', 'UNCERTAIN',
+            'BEFORE', 'AFTER');
+
+our %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
+   # Exact position is unknown, but is within the range specified, ((1.2)..100)
+            'WITHIN' => '.', 
+            # 1^2
+            'BETWEEN'    => '^',
+            'IN-BETWEEN' => '^',
+            'UNCERTAIN'  => '?',
+            # <100
+            'BEFORE'  => '<',
+            # >10
+            'AFTER'   => '>');   
+   
+    # The following regular expressions map to fuzzy location types. Every
+    # expression must match the complete encoded point string, and must
+    # contain two groups identifying min and max. Empty matches are automatic.
+    # converted to undef, except for 'EXACT', for which max is set to equal
+    # min.
+    
+our %FUZZYPOINTENCODE = ( 
+    '\>(\d+)(.{0})' => 'AFTER',
+    '\<(.{0})(\d+)' => 'BEFORE',
+    '(\d+)'         => 'EXACT',
+    '\?(\d*)'       => 'UNCERTAIN',
+    '(\d+)(.{0})\>' => 'AFTER',
+    '(.{0})(\d+)\<' => 'BEFORE',
+    '(\d+)\.(\d+)'  => 'WITHIN',
+    '(\d+)\^(\d+)'  => 'BETWEEN',
+   );
+    
+our %FUZZYRANGEENCODE  = (  '\.'   => 'WITHIN',
+                            '\.\.' => 'EXACT',
+                            '\^'   => 'IN-BETWEEN' );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $fuzzyloc = new Bio::Location::Fuzzy( @args);
+ Function:
+ Returns : 
+ Args    : -start    => value for start  (initialize by superclass)
+           -end      => value for end    (initialize by superclass)
+           -strand   => value for strand (initialize by superclass)
+           -location_type => either ('EXACT','WITHIN','IN-BETWEEN',
+                             'UNCERTAIN') OR ( 1,2,3,4)
+           -start_ext=> extension for start - defaults to 0, 
+           -start_fuz=  fuzzy code for start can be 
+                      ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
+                       'UNCERTAIN' ) OR
+                      a value 1 - 5 corresponding to index+1 above
+           -end_ext=> extension for end - defaults to 0, 
+           -end_fuz=  fuzzy code for end can be 
+                      ('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
+                       'UNCERTAIN') OR
+                      a value 1 - 5 corresponding to index+1 above
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($location_type, $start_ext, $start_fuz, $end_ext, $end_fuz) = 
+        $self->_rearrange([ qw(LOCATION_TYPE START_EXT START_FUZ 
+                   END_EXT END_FUZ )
+                ], @args);
+
+    $location_type  && $self->location_type($location_type);
+    $start_ext && $self->max_start($self->min_start + $start_ext);
+    $end_ext   && $self->max_end($self->min_end + $end_ext);
+    $start_fuz && $self->start_pos_type($start_fuz);
+    $end_fuz   && $self->end_pos_type($end_fuz);
+
+    return $self;
+}
+
+=head2 location_type
+
+  Title   : location_type
+  Usage   : my $location_type = $location->location_type();
+  Function: Get location type encoded as text
+  Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN', 'UNCERTAIN')
+  Args    : none
+
+=cut
+
+sub location_type {
+    my ($self,$value) = @_;
+    if( defined $value || ! defined $self->{'_location_type'} ) {
+        $value = 'EXACT' unless defined $value;
+        if(! defined $FUZZYCODES{$value} )  {
+            $value = uc($value);
+            if( $value =~ /\.\./ ) {
+                $value = 'EXACT';
+            } elsif( $value =~ /^\.$/ ) {
+                $value = 'WITHIN';
+            } elsif( $value =~ /\^/ ) {
+                $value = 'IN-BETWEEN';
+                $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
+                             $self->start. "] and [". $self->end. "]")
+                  if defined $self->start && defined $self->end &&
+                            ($self->end - 1 == $self->start);
+            } elsif( $value =~ /\?/ ) {
+                $value = 'UNCERTAIN';
+            } elsif( $value ne 'EXACT' && $value ne 'WITHIN' && 
+                        $value ne 'IN-BETWEEN' ) {
+                $self->throw("Did not specify a valid location type");
+            }
+        }
+        $self->{'_location_type'} = $value;
+    }
+    return $self->{'_location_type'};
+}
+
+=head1 LocationI methods
+
+=head2 length
+
+  Title   : length
+  Usage   : $length = $fuzzy_loc->length();
+  Function: Get the length of this location.
+
+            Note that the length of a fuzzy location will always depend
+            on the currently active interpretation of start and end. The
+            result will therefore vary for different CoordinatePolicy objects.
+
+  Returns : an integer
+  Args    : none
+
+=cut
+
+#sub length {
+#    my($self) = @_;
+#    return $self->SUPER::length() if( !$self->start || !$self->end);
+#    $self->warn('Length is not valid for a FuzzyLocation'); 
+#    return 0;
+#}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $fuzzy->start();
+  Function: get/set start of this range, handling fuzzy_starts
+  Returns : a positive integer representing the start of the location
+  Args    : start location on set (can be fuzzy point string)
+
+=cut
+
+sub start {
+    my($self,$value) = @_;
+    if( defined $value ) {
+    my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
+    $self->start_pos_type($encode);
+    $self->min_start($min);
+    $self->max_start($max);
+    }
+
+    $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations ["
+                 . $self->SUPER::start. "] and [". $self->SUPER::end. "]")
+    if $self->location_type eq 'IN-BETWEEN'  && defined $self->SUPER::end &&
+                  ($self->SUPER::end - 1 == $self->SUPER::start);
+
+    return $self->SUPER::start();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $fuzzy->end();
+  Function: get/set end of this range, handling fuzzy_ends
+  Returns : a positive integer representing the end of the range
+  Args    : end location on set (can be fuzzy string)
+
+=cut
+
+sub end {
+    my($self,$value) = @_;
+    if( defined $value ) {
+    my ($encode,$min,$max) = $self->_fuzzypointdecode($value);
+    $self->end_pos_type($encode);
+    $self->min_end($min);
+    $self->max_end($max);
+    }
+
+    $self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
+                 $self->SUPER::start. "] and [". $self->SUPER::end. "]")
+    if $self->location_type eq 'IN-BETWEEN' && defined $self->SUPER::start &&
+                ($self->SUPER::end - 1 == $self->SUPER::start);
+
+    return $self->SUPER::end();
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : $min_start = $fuzzy->min_start();
+  Function: get/set the minimum starting point
+  Returns : the minimum starting point from the contained sublocations
+  Args    : integer or undef on set
+
+=cut
+
+sub min_start {
+    my ($self, at args) = @_;
+
+    if(@args) {
+    $self->{'_min_start'} = $args[0]; # the value may be undef!
+    }
+    return $self->{'_min_start'};
+}
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get/set maximum starting location of feature startpoint  
+  Returns : integer or undef if no maximum starting point.
+  Args    : integer or undef on set
+
+=cut
+
+sub max_start {
+    my ($self, at args) = @_;
+
+    if(@args) {
+        $self->{'_max_start'} = $args[0]; # the value may be undef!
+    }
+    return $self->{'_max_start'};
+}
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get/set start position type.
+  Returns : type of position coded as text 
+            ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
+  Args    : a string on set
+
+=cut
+
+sub start_pos_type {
+    my ($self,$value) = @_;
+    if(defined $value &&  $value =~ /^\d+$/ ) {
+        if( $value == 0 ) { $value = 'EXACT'; }
+        else { 
+            my $v = $LOCATIONCODESBSANE[$value];
+            if( ! defined $v ) {
+                $self->warn("Provided value $value which I don't understand,".
+                            " reverting to 'EXACT'");
+                $v = 'EXACT';
+            }
+            $value = $v;
+        }
+    }
+    if(defined($value)) {
+        $self->{'_start_pos_type'} = $value;
+    }
+    return $self->{'_start_pos_type'};
+}
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get/set minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : integer or undef on set
+
+=cut
+
+sub min_end {
+    my ($self, at args) = @_;
+
+    if(@args) {
+        $self->{'_min_end'} = $args[0]; # the value may be undef!
+    }
+    return $self->{'_min_end'};
+}
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get/set maximum ending location of feature endpoint 
+  Returns : integer or undef if no maximum ending point.
+  Args    : integer or undef on set
+
+=cut
+
+sub max_end {
+    my ($self, at args) = @_;
+
+    if(@args) {
+        $self->{'_max_end'} = $args[0]; # the value may be undef!
+    }
+    return $self->{'_max_end'};
+}
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get/set end position type.
+  Returns : type of position coded as text 
+            ('BEFORE','AFTER','EXACT','WITHIN','BETWEEN','UNCERTAIN')
+  Args    : a string on set
+
+=cut
+
+sub end_pos_type {
+    my ($self,$value) = @_;
+    if( defined $value && $value =~ /^\d+$/ ) {
+        if( $value == 0 ) { $value = 'EXACT'; }
+        else { 
+            my $v = $LOCATIONCODESBSANE[$value];
+            if( ! defined $v ) {
+                $self->warn("Provided value $value which I don't understand,".
+                            " reverting to 'EXACT'");
+                $v = 'EXACT';
+            }
+            $value = $v;
+        }
+    }
+
+    if(defined($value)) {
+        $self->{'_end_pos_type'} = $value;
+    }
+    return $self->{'_end_pos_type'};
+}
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+=cut
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See Bio::Location::CoordinatePolicyI for documentation about
+            the policy object and its use.
+
+            The interface *does not* require implementing classes to accept
+            setting of a different policy. The implementation provided here
+            does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize every
+            new instance with a CoordinatePolicyI object. The implementation
+            provided here will return a default policy object if none has
+            been set yet. To change this default policy object call this
+            method as a class method with an appropriate argument. Note that
+            in this case only subsequently created Location objects will be
+            affected.
+
+  Returns : A Bio::Location::CoordinatePolicyI implementing object.
+  Args    : On set, a Bio::Location::CoordinatePolicyI implementing object.
+
+See L<Bio::Location::CoordinatePolicyI>
+
+=cut
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+=cut
+
+sub to_FTstring {
+    my ($self) = @_;    
+    my (%vals) = ( 'start' => $self->start,
+           'min_start' => $self->min_start,
+           'max_start' => $self->max_start,
+           'start_code' => $self->start_pos_type,
+           'end' => $self->end,
+           'min_end' => $self->min_end,
+           'max_end' => $self->max_end,
+           'end_code' => $self->end_pos_type );
+
+    my (%strs) = ( 'start' => '',
+           'end'   => '');
+    my ($delimiter) = $FUZZYCODES{$self->location_type};
+    $delimiter = $FUZZYCODES{'EXACT'} if ($self->location_type eq 'UNCERTAIN');
+    
+    my $policy = ref($self->coordinate_policy);
+    
+    # I'm lazy, lets do this in a loop since behaviour will be the same for 
+    # start and end
+    # The CoordinatePolicy now dictates start/end data here (bug 992) - cjf
+    foreach my $point ( qw(start end) ) {
+        if( ($vals{$point."_code"} ne 'EXACT') &&
+            ($vals{$point."_code"} ne 'UNCERTAIN') ) {
+            
+            # must have max and min defined to use 'WITHIN', 'BETWEEN'
+            if ((!defined $vals{"min_$point"} ||
+                 !defined $vals{"max_$point"}) && 
+                ( $vals{$point."_code"} eq 'WITHIN' || 
+                  $vals{$point."_code"} eq 'BETWEEN'))
+            {
+                $vals{"min_$point"} = '' unless defined $vals{"min_$point"};
+                $vals{"max_$point"} = '' unless defined $vals{"max_$point"};
+                
+                $self->warn("Fuzzy codes for start are in a strange state, (".
+                        join(",", ($vals{"min_$point"}, 
+                               $vals{"max_$point"},
+                               $vals{$point."_code"})). ")");
+                return '';
+            }
+            
+            if (defined $vals{$point."_code"} && 
+               ($vals{$point."_code"} eq 'BEFORE' ||
+                $vals{$point."_code"} eq 'AFTER'))
+            {
+                $strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
+                $strs{$point} .= $vals{"$point"};
+            }
+ 
+            if( defined $vals{$point."_code"} && 
+              ($vals{$point."_code"} eq 'WITHIN' ||
+               $vals{$point."_code"} eq 'BETWEEN'))
+            {
+                # Expect odd results with anything but WidestCoordPolicy for now
+                $strs{$point} .= ($point eq 'start') ?
+                        $vals{"$point"}.
+                        $FUZZYCODES{$vals{$point."_code"}}.
+                        $vals{'max_'.$point}
+                        :
+                        $vals{'min_'.$point}.
+                        $FUZZYCODES{$vals{$point."_code"}}.
+                        $vals{"$point"};
+                $strs{$point} = "(".$strs{$point}.")";
+            }
+            
+        } elsif ($vals{$point."_code"} eq 'UNCERTAIN') {
+            $strs{$point}  = $FUZZYCODES{$vals{$point."_code"}};
+            $strs{$point} .= $vals{$point} if defined $vals{$point};
+        } else {
+            $strs{$point} = $vals{$point};
+        }
+    }
+    
+    my $str = $strs{'start'} . $delimiter . $strs{'end'};
+    if($self->is_remote() && $self->seq_id()) {
+    $str = $self->seq_id() . ":" . $str;
+    }
+    if( defined $self->strand && 
+    $self->strand == -1 &&
+    $self->location_type() ne "UNCERTAIN") {
+    $str = "complement(" . $str . ")";
+    } elsif($self->location_type() eq "WITHIN") {
+    $str = "(".$str.")";
+    }
+    return $str;
+}
+
+=head2 _fuzzypointdecode
+
+  Title   : _fuzzypointdecode
+  Usage   : ($type,$min,$max) = $self->_fuzzypointdecode('<5');
+  Function: Decode a fuzzy string.
+  Returns : A 3-element array consisting of the type of location, the
+            minimum integer, and the maximum integer describing the range
+            of coordinates this start or endpoint refers to. Minimum or
+            maximum coordinate may be undefined.
+          : Returns empty array on fail.
+  Args    : fuzzypoint string
+
+=cut
+
+sub _fuzzypointdecode {
+    my ($self, $string) = @_;
+    return () if( !defined $string);
+    # strip off leading and trailing space
+    $string =~ s/^\s*(\S+)\s*/$1/;
+    foreach my $pattern ( keys %FUZZYPOINTENCODE ) {
+        if( $string =~ /^$pattern$/ ) {
+            my ($min,$max) = ($1,$2) unless (($1 eq '') && (!defined $2));
+            if( ($FUZZYPOINTENCODE{$pattern} eq 'EXACT') ||
+                 ($FUZZYPOINTENCODE{$pattern} eq 'UNCERTAIN')
+              ) {
+                $max = $min;
+            } else {
+                $max = undef if((defined $max) && (length($max) == 0));
+                $min = undef if((defined $min) && (length($min) == 0));
+            }
+            return ($FUZZYPOINTENCODE{$pattern},$min,$max);
+        }
+    }
+    if( $self->verbose >= 1 ) {
+        $self->warn("could not find a valid fuzzy encoding for $string");
+    }
+    return ();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/FuzzyLocationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/FuzzyLocationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/FuzzyLocationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,198 @@
+# $Id: FuzzyLocationI.pm,v 1.21.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::FuzzyLocationI
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::FuzzyLocationI - Abstract interface of a Location on a Sequence
+which has unclear start/end location
+
+=head1 SYNOPSIS
+
+    # Get a FuzzyLocationI object somehow
+    print "Fuzzy FT location string is ", $location->to_FTstring();
+    print "location is of the type ", $location->loc_type, "\n";
+
+=head1 DESCRIPTION
+
+This interface encapsulates the necessary methods for representing a
+Fuzzy Location, one that does not have clear start and/or end points.
+This will initially serve to handle features from Genbank/EMBL feature
+tables that are written as 1^100 meaning between bases 1 and 100 or
+E<lt>100..300 meaning it starts somewhere before 100.  Advanced
+implementations of this interface may be able to handle the necessary
+logic of overlaps/intersection/contains/union.  It was constructed to
+handle fuzzy locations that can be represented in Genbank/EMBL.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::FuzzyLocationI;
+use strict;
+
+use base qw(Bio::LocationI);
+
+=head1 LocationI methods
+
+=head2 location_type
+
+  Title   : loc_type
+  Usage   : my $location_type = $location->location_type();
+  Function: Get location type encoded as text
+  Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN')
+  Args    : none
+
+=cut
+
+sub location_type {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Bio::LocationI methods
+
+Bio::LocationI methods follow
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : my $minstart = $location->min_start();
+  Function: Get minimum starting location of feature startpoint   
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint  
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+=cut
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See Bio::Location::CoordinatePolicyI for documentation about
+            the policy object and its use.
+
+            The interface *does not* require implementing classes to accept
+            setting of a different policy. The implementation provided here
+            does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize every
+            new instance with a CoordinatePolicyI object. The implementation
+            provided here will return a default policy object if none has
+            been set yet. To change this default policy object call this
+            method as a class method with an appropriate argument. Note that
+            in this case only subsequently created Location objects will be
+            affected.
+
+  Returns : A Bio::Location::CoordinatePolicyI implementing object.
+  Args    : On set, a Bio::Location::CoordinatePolicyI implementing object.
+
+=cut
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/NarrowestCoordPolicy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/NarrowestCoordPolicy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/NarrowestCoordPolicy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,123 @@
+# $Id: NarrowestCoordPolicy.pm,v 1.11.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::NarrowestCoordPolicy
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#          and Jason Stajich <jason at bioperl.org>
+#
+# Copyright Hilmar Lapp, Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::NarrowestCoordPolicy - class implementing 
+Bio::Location::CoordinatePolicy as the narrowest possible and reasonable range
+
+=head1 SYNOPSIS
+
+See Bio::Location::CoordinatePolicyI
+
+=head1 DESCRIPTION
+
+CoordinatePolicyI implementing objects are used by Bio::LocationI
+implementing objects to determine integer-valued coordinates when
+asked for it.
+
+This class will compute the coordinates such that always the narrowest possible
+range is returned, but by using some common sense. This means that e.g.
+locations like "E<gt>5..100" (start before position 5) will return 5 as start
+(returned values have to be positive integers).
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email E<lt>hlapp-at-gmx.netE<gt>, E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::NarrowestCoordPolicy;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Location::CoordinatePolicyI);
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    return $self;
+}
+
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $policy->start($location);
+  Function: Get the integer-valued start coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub start {
+    my ($self,$loc) = @_;
+
+    # For performance reasons we don't check that it's indeed a Bio::LocationI
+    # object. Hopefully, Location-object programmers are smart enough.
+    my $pos = $loc->max_start();
+    # if max is not defined or equals 0 we resort to min
+    $pos = $loc->min_start() if(! $pos);
+    return $pos;
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $policy->end($location);
+  Function: Get the integer-valued end coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub end {
+    my ($self,$loc) = @_;
+
+    # For performance reasons we don't check that it's indeed a Bio::LocationI
+    # object. Hopefully, Location-object programmers are smart enough.
+    my $pos = $loc->min_end();
+    # if min is not defined or equals 0 we resort to max
+    $pos = $loc->max_end() if(! $pos);
+    return $pos;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/Simple.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/Simple.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/Simple.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,360 @@
+# $Id: Simple.pm,v 1.41.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::Simple
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::Simple - Implementation of a Simple Location on a Sequence
+
+=head1 SYNOPSIS
+
+    use Bio::Location::Simple;
+
+    my $location = new Bio::Location::Simple(-start => 1, -end => 100,
+					     -strand => 1 );
+
+    if( $location->strand == -1 ) {
+	printf "complement(%d..%d)\n", $location->start, $location->end;
+    } else {
+	printf "%d..%d\n", $location->start, $location->end;
+    }
+
+=head1 DESCRIPTION
+
+This is an implementation of Bio::LocationI to manage exact location
+information on a Sequence: '22' or '12..15' or '16^17'.
+
+You can test the type of the location using length() function () or
+directly location_type() which can one of two values: 'EXACT' or
+'IN-BETWEEN'.
+
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::Simple;
+use strict;
+
+use base qw(Bio::Location::Atomic);
+
+our %RANGEENCODE  = ('\.\.' => 'EXACT',
+		     '\^'   => 'IN-BETWEEN' );
+
+our %RANGEDECODE  = ('EXACT'      => '..',
+		     'IN-BETWEEN' => '^' );
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($locationtype) = $self->_rearrange([qw(LOCATION_TYPE)], at args);
+
+    $locationtype && $self->location_type($locationtype);
+
+    return $self;
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $loc->start();
+  Function: get/set the start of this range
+  Returns : the start of this range
+  Args    : optionaly allows the start to be set
+          : using $loc->start($start)
+
+=cut
+
+sub start {
+  my ($self, $value) = @_;
+
+  $self->{'_start'} = $value if defined $value ;
+
+  $self->throw("Only adjacent residues when location type ".
+	       "is IN-BETWEEN. Not [". $self->{'_start'}. "] and [".
+	       $self->{'_end'}. "]" )
+      if defined $self->{'_start'} && defined $self->{'_end'} && 
+	  $self->location_type eq 'IN-BETWEEN' &&
+	  ($self->{'_end'} - 1 != $self->{'_start'});
+  return $self->{'_start'};
+}
+
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $loc->end();
+  Function: get/set the end of this range
+  Returns : the end of this range
+  Args    : optionaly allows the end to be set
+          : using $loc->end($start)
+
+=cut
+
+sub end {
+  my ($self, $value) = @_;
+
+  $self->{'_end'} = $value if defined $value ;
+  $self->throw("Only adjacent residues when location type ".
+	      "is IN-BETWEEN. Not [". $self->{'_start'}. "] and [".
+	       $self->{'_end'}. "]" )
+      if defined $self->{'_start'} && defined $self->{'_end'} && 
+	  $self->location_type eq 'IN-BETWEEN' &&
+	  ($self->{'_end'} - 1 != $self->{'_start'});
+
+  return $self->{'_end'};
+}
+
+=head2 strand
+
+  Title   : strand
+  Usage   : $strand = $loc->strand();
+  Function: get/set the strand of this range
+  Returns : the strandedness (-1, 0, +1)
+  Args    : optionaly allows the strand to be set
+          : using $loc->strand($strand)
+
+=cut
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $loc->length();
+ Function: get the length in the coordinate space this location spans
+ Example :
+ Returns : an integer
+ Args    : none
+
+
+=cut
+
+sub length {
+   my ($self) = @_;
+   if ($self->location_type eq 'IN-BETWEEN' ) {
+       return 0;
+   } else {
+       return abs($self->end - $self->start) + 1;
+   }
+
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : my $minstart = $location->min_start();
+  Function: Get minimum starting location of feature startpoint
+  Returns : integer or undef if no minimum starting point.
+  Args    : none
+
+=cut
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint.
+
+            In this implementation this is exactly the same as min_start().
+
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^).
+
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+
+            In this implementation this is exactly the same as min_end().
+
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 location_type
+
+  Title   : location_type
+  Usage   : my $location_type = $location->location_type();
+  Function: Get location type encoded as text
+  Returns : string ('EXACT' or 'IN-BETWEEN')
+  Args    : 'EXACT' or '..' or 'IN-BETWEEN' or '^'
+
+=cut
+
+sub location_type {
+    my ($self, $value) = @_;
+
+    if( defined $value || ! defined $self->{'_location_type'} ) {
+	$value = 'EXACT' unless defined $value;
+	$value = uc $value;
+	if (! defined $RANGEDECODE{$value}) {
+	    $value = '\^' if $value eq '^';
+	    $value = '\.\.' if $value eq '..';
+	    $value = $RANGEENCODE{$value};
+	}
+	$self->throw("Did not specify a valid location type. [$value] is no good")
+	    unless defined $value;
+	$self->{'_location_type'} = $value;
+    }
+    $self->throw("Only adjacent residues when location type ".
+		 "is IN-BETWEEN. Not [". $self->{'_start'}. "] and [".
+		 $self->{'_end'}. "]" )
+	if $self->{'_location_type'} eq 'IN-BETWEEN' &&
+	    defined $self->{'_start'} &&
+		defined $self->{'_end'} &&
+		    ($self->{'_end'} - 1 != $self->{'_start'});
+
+    return $self->{'_location_type'};
+}
+
+=head2 is_remote
+
+ Title   : is_remote
+ Usage   : $self->is_remote($newval)
+ Function: Getset for is_remote value
+ Returns : value of is_remote
+ Args    : newvalue (optional)
+
+
+=cut
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+sub to_FTstring { 
+    my($self) = @_;
+
+    my $str;
+    if( $self->start == $self->end ) {
+	$str =  $self->start;
+    } else {
+        $str = $self->start . $RANGEDECODE{$self->location_type} . $self->end;
+    }
+    if($self->is_remote() && $self->seq_id()) {
+	$str = $self->seq_id() . ":" . $str;
+    }
+    if( defined $self->strand &&
+	$self->strand == -1 ) {
+	$str = "complement(".$str.")";
+    }
+    return $str;
+}
+
+# comments, not function added by jason 
+#
+# trunc is untested, and as of now unannounced method for truncating a
+# location.  This is to eventually be part of the procedure to
+# truncate a sequence with annotatioin and properly remap the location
+# of all the features contained within the truncated segment.
+
+# presumably this might do things a little differently for the case 
+# where the truncation splits the location in half
+# 
+# in short- you probably don't want to use  this method.
+
+sub trunc {
+  my ($self,$start,$end,$relative_ori) = @_;
+  my $newstart  = $self->start - $start+1;
+  my $newend    = $self->end   - $start+1;
+  my $newstrand = $relative_ori * $self->strand;
+
+  my $out;
+  if( $newstart < 1 || $newend > ($end-$start+1) ) {
+    $out = Bio::Location::Simple->new();
+    $out->start($self->start);
+    $out->end($self->end);
+    $out->strand($self->strand);
+    $out->seq_id($self->seqid);
+    $out->is_remote(1);
+  } else {
+    $out = Bio::Location::Simple->new();
+    $out->start($newstart);
+    $out->end($newend);
+    $out->strand($newstrand);
+    $out->seq_id();
+  }
+
+  return $out;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/Split.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/Split.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/Split.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,660 @@
+# $Id: Split.pm,v 1.46.4.3 2006/10/31 22:26:31 cjfields Exp $
+#
+# BioPerl module for Bio::Location::Split
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::Split - Implementation of a Location on a Sequence
+which has multiple locations (start/end points)
+
+=head1 SYNOPSIS
+
+    use Bio::Location::Split;
+
+    my $splitlocation = new Bio::Location::Split();
+    $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>1,
+							       -end=>30,
+							       -strand=>1));
+    $splitlocation->add_sub_Location(new Bio::Location::Simple(-start=>50,
+							       -end=>61,
+							       -strand=>1));   
+    my @sublocs = $splitlocation->sub_Location();
+
+    my $count = 1;
+    # print the start/end points of the sub locations
+    foreach my $location ( sort { $a->start <=> $b->start } 
+			   @sublocs ) {
+	printf "sub feature %d [%d..%d]\n", 
+	       $count, $location->start,$location->end, "\n";
+        $count++;
+    }
+
+=head1 DESCRIPTION
+
+This implementation handles locations which span more than one
+start/end location, or and/or lie on different sequences, and can
+work with split locations that depend on the specific order of the
+sublocations ('join') or don't have a specific order but represent
+a feature spanning discontiguous sublocations ('order', 'bond').
+
+Note that the order in which sublocations are added may be very important,
+depending on the specific split location type.  For instance, a 'join'
+must have the sublocations added in the order that one expects to
+join the sublocations, whereas all other types are sorted based on the
+sequence location.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-AT-bioperl_DOT_org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Location::Split;
+
+# as defined by BSANE 0.03
+our @CORBALOCATIONOPERATOR = ('NONE','JOIN', undef, 'ORDER');;
+
+use Bio::Root::Root;
+
+use base qw(Bio::Location::Atomic Bio::Location::SplitLocationI);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    # initialize
+    $self->{'_sublocations'} = [];
+    my ( $type, $seqid, $locations ) = 
+	$self->_rearrange([qw(SPLITTYPE
+                              SEQ_ID
+			      LOCATIONS
+                              )], @args);
+    if( defined $locations && ref($locations) =~ /array/i ) {
+	$self->add_sub_Location(@$locations);
+    }
+    $seqid  && $self->seq_id($seqid);
+    $type = lc ($type);    
+    $self->splittype($type || 'JOIN');
+    return $self;
+}
+
+=head2 each_Location
+
+ Title   : each_Location
+ Usage   : @locations = $locObject->each_Location($order);
+ Function: Conserved function call across Location:: modules - will
+           return an array containing the component Location(s) in
+           that object, regardless if the calling object is itself a
+           single location or one containing sublocations.
+ Returns : an array of Bio::LocationI implementing objects
+ Args    : Optional sort order to be passed to sub_Location()
+
+=cut
+
+sub each_Location {
+    my ($self, $order) = @_;
+    my @locs = ();
+    foreach my $subloc ($self->sub_Location($order)) {
+	# Recursively check to get hierarchical split locations:
+	push @locs, $subloc->each_Location($order);
+    }
+    return @locs;
+}
+
+=head2 sub_Location
+
+ Title   : sub_Location
+ Usage   : @sublocs = $splitloc->sub_Location();
+ Function: Returns the array of sublocations making up this compound (split)
+           location. Those sublocations referring to the same sequence as
+           the root split location will be sorted by start position (forward
+           sort) or end position (reverse sort) and come first (before
+           those on other sequences).
+
+           The sort order can be optionally specified or suppressed by the
+           value of the first argument. The default is no sort.
+
+ Returns : an array of Bio::LocationI implementing objects
+ Args    : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
+           sort order
+
+=cut
+
+sub sub_Location {
+    my ($self, $order) = @_;
+    $order = 0 unless defined $order;
+    if( defined($order) && ($order !~ /^-?\d+$/) ) {
+	$self->throw("value $order passed in to sub_Location is $order, an invalid value");
+    } 
+    $order = 1 if($order > 1);
+    $order = -1 if($order < -1);
+    my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : ();
+
+    # return the array if no ordering requested
+    return @sublocs if( ($order == 0) || (! @sublocs) );
+    	
+    # sort those locations that are on the same sequence as the top (`master')
+    # if the top seq is undefined, we take the first defined in a sublocation
+    my $seqid = $self->seq_id();
+    my $i = 0;
+    while((! defined($seqid)) && ($i <= $#sublocs)) {
+		$seqid = $sublocs[$i++]->seq_id();
+    }
+    if((! $self->seq_id()) && $seqid) {
+		$self->warn("sorted sublocation array requested but ".
+				"root location doesn't define seq_id ".
+				"(at least one sublocation does!)");
+    }
+    my @locs = ($seqid ?
+		grep { $_->seq_id() eq $seqid; } @sublocs :
+		@sublocs);
+    if(@locs) {
+		if($order == 1) {
+			# Schwartzian transforms for performance boost	  
+			@locs = map { $_->[0] }
+			sort {
+				(defined $a && defined $b) ? $a->[1] <=> $b->[1] :
+                $a                         ?  -1                 : 1
+				}
+			map {
+				[$_, (defined $_->start ? $_->start : $_->end)]
+				} @locs;;
+		} else { # $order == -1
+			@locs = map { $_->[0]}
+			sort { 
+				(defined $a && defined $b) ? $b->[1] <=> $a->[1] :
+				$a                         ? -1                  : 1
+				}
+			map {
+				[$_, (defined $_->end ? $_->end : $_->start)]
+				} @locs;
+		}
+    }
+    # push the rest unsorted
+    if($seqid) {
+		push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
+    }
+    # done!
+
+    return @locs;
+}
+
+=head2 add_sub_Location
+
+ Title   : add_sub_Location
+ Usage   : $splitloc->add_sub_Location(@locationIobjs);
+ Function: add an additional sublocation
+ Returns : number of current sub locations
+ Args    : list of Bio::LocationI implementing object(s) to add
+
+=cut
+
+sub add_sub_Location {
+    my ($self, at args) = @_;
+    my @locs;    
+    foreach my $loc ( @args ) {
+	if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
+	    $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
+	    next;
+	}	
+	push @{$self->{'_sublocations'}}, $loc;
+    }
+
+    return scalar @{$self->{'_sublocations'}};
+}
+
+=head2 splittype
+
+  Title   : splittype
+  Usage   : $splittype = $location->splittype();
+  Function: get/set the split splittype
+  Returns : the splittype of split feature (join, order)
+  Args    : splittype to set
+
+=cut
+
+sub splittype {
+    my ($self, $value) = @_;
+    if( defined $value || ! defined $self->{'_splittype'} ) {
+	$value = 'JOIN' unless( defined $value );
+	$self->{'_splittype'} = uc ($value);
+    }
+    return $self->{'_splittype'};
+}
+
+=head2 is_single_sequence
+
+  Title   : is_single_sequence
+  Usage   : if($splitloc->is_single_sequence()) {
+                print "Location object $splitloc is split ".
+                      "but only across a single sequence\n";
+	    }
+  Function: Determine whether this location is split across a single or
+            multiple sequences.
+
+            This implementation ignores (sub-)locations that do not define
+            seq_id(). The same holds true for the root location.
+
+  Returns : TRUE if all sublocations lie on the same sequence as the root
+            location (feature), and FALSE otherwise.
+  Args    : none
+
+=cut
+
+sub is_single_sequence {
+    my ($self) = @_;
+
+    my $seqid = $self->seq_id();
+    foreach my $loc ($self->sub_Location(0)) {
+	$seqid = $loc->seq_id() if(! $seqid);
+	if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
+	    return 0;
+	}
+    }
+    return 1;
+}
+
+=head2 guide_strand
+
+  Title   : guide_strand
+  Usage   : $str = $loc->guide_strand();
+  Function: Get/Set the guide strand.  Of use only if the split type is
+            a 'join' (this helps determine the order of sublocation
+			retrieval)
+  Returns : value of guide strand (1, -1, or undef)
+  Args    : new value (-1 or 1, optional)
+
+=cut
+
+sub guide_strand {
+	my $self = shift;
+	return $self->{'strand'} = shift if @_;
+	return $self->{'strand'};
+}
+
+=head1 LocationI methods
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $obj->strand($newval)
+ Function: For SplitLocations, setting the strand of the container
+           (this object) is a short-cut for setting the strand of all
+           sublocations.
+
+           In get-mode, checks if no sub-location is remote, and if
+           all have the same strand. If so, it returns that shared
+           strand value. Otherwise it returns undef.
+
+ Example : 
+ Returns : on get, value of strand if identical between sublocations 
+           (-1, 1, or undef)
+ Args    : new value (-1 or 1, optional)
+
+
+=cut
+
+sub strand{
+    my ($self,$value) = @_;
+    if( defined $value) {
+		$self->{'strand'} = $value;
+		# propagate to all sublocs
+		foreach my $loc ($self->sub_Location(0)) {
+			$loc->strand($value);
+		}
+    } else {
+		my ($strand, $lstrand);
+		foreach my $loc ($self->sub_Location(0)) {
+			# we give up upon any location that's remote or doesn't have
+			# the strand specified, or has a differing one set than 
+			# previously seen.
+			# calling strand() is potentially expensive if the subloc is also
+			# a split location, so we cache it
+			$lstrand = $loc->strand();
+			if((! $lstrand) ||
+			   ($strand && ($strand != $lstrand)) ||
+			   $loc->is_remote()) {
+			$strand = undef;
+			last;
+			} elsif(! $strand) {
+			$strand = $lstrand;
+			}
+		}
+		return $strand;
+    }
+}
+
+=head2 flip_strand
+
+  Title   : flip_strand
+  Usage   : $location->flip_strand();
+  Function: Flip-flop a strand to the opposite.  Also switch Split strand
+            from undef to -1 or -1 to undef
+  Returns : None
+  Args    : None
+
+=cut
+
+sub flip_strand {
+    my $self = shift;
+    for my $loc ( $self->sub_Location(0) ) {
+		$loc->flip_strand;
+		if ($loc->isa('Bio::Location::SplitLocationI')) {
+			my $gs = ($self->guide_strand == -1) ? undef : -1;
+			$loc->guide_strand($gs);
+		}
+    }
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $location->start();
+  Function: get the starting point of the first (sorted) sublocation
+  Returns : integer
+  Args    : none
+
+=cut
+
+sub start {
+    my ($self,$value) = @_;    
+    if( defined $value ) {
+	$self->throw("Trying to set the starting point of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    return $self->SUPER::start();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $location->end();
+  Function: get the ending point of the last (sorted) sublocation
+  Returns : integer
+  Args    : none
+
+=cut
+
+sub end {
+    my ($self,$value) = @_;    
+    if( defined $value ) {
+	$self->throw("Trying to set the ending point of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    return $self->SUPER::end();
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : $min_start = $location->min_start();
+  Function: get the minimum starting point
+  Returns : the minimum starting point from the contained sublocations
+  Args    : none
+
+=cut
+
+sub min_start {
+    my ($self, $value) = @_;    
+
+    if( defined $value ) {
+	$self->throw("Trying to set the minimum starting point of a split ".
+				 "location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location(1);
+    return $locs[0]->min_start() if @locs; 
+    return;
+}
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint  
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+sub max_start {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the maximum starting point of a split ".
+				 "location, that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location(1);
+    return $locs[0]->max_start() if @locs; 
+    return;
+}
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub start_pos_type {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the start_pos_type of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location();
+    return ( @locs ) ? $locs[0]->start_pos_type() : undef;    
+}
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+sub min_end {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the minimum end point of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    # reverse sort locations by largest ending to smallest ending
+    my @locs = $self->sub_Location(-1);
+    return $locs[0]->min_end() if @locs; 
+    return;
+}
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+sub max_end {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set the maximum end point of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    # reverse sort locations by largest ending to smallest ending
+    my @locs = $self->sub_Location(-1);
+    return $locs[0]->max_end() if @locs; 
+    return;
+}
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub end_pos_type {
+    my ($self,$value) = @_;
+
+    if( defined $value ) {
+	$self->throw("Trying to set end_pos_type of a split location, ".
+				 "that is not possible, try manipulating the sub Locations");
+    }
+    my @locs = $self->sub_Location();
+    return ( @locs ) ? $locs[0]->end_pos_type() : undef;    
+}
+
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+
+            We override this here in order to propagate to all sublocations
+            which are not remote (provided this root is not remote either)
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+
+=cut
+
+sub seq_id {
+    my ($self, $seqid) = @_;
+
+    if(! $self->is_remote()) {
+	foreach my $subloc ($self->sub_Location(0)) {
+	    $subloc->seq_id($seqid) if ! $subloc->is_remote();
+	}
+    }
+    return $self->SUPER::seq_id($seqid);
+}
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See Bio::Location::CoordinatePolicyI for documentation about
+            the policy object and its use.
+
+            The interface *does not* require implementing classes to accept
+            setting of a different policy. The implementation provided here
+            does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize every
+            new instance with a CoordinatePolicyI object. The implementation
+            provided here will return a default policy object if none has
+            been set yet. To change this default policy object call this
+            method as a class method with an appropriate argument. Note that
+            in this case only subsequently created Location objects will be
+            affected.
+
+  Returns : A Bio::Location::CoordinatePolicyI implementing object.
+  Args    : On set, a Bio::Location::CoordinatePolicyI implementing object.
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+sub to_FTstring {
+    my ($self) = @_;
+    my @strs;
+	my $strand = $self->strand() || 0;
+	my $stype = lc($self->splittype());
+	my $guide = $self->guide_strand();
+
+    if( $strand < 0 ) {
+		$self->flip_strand; # this will recursively set the strand
+							# to +1 for all the sub locations
+    }
+	# If the split type is join, the order is important;
+	# otherwise must be 5'->3' regardless
+	
+	my @locs = ($stype eq 'join' && (!$guide && $strand == -1)) ?
+	           reverse $self->sub_Location() : $self->sub_Location() ;
+	
+    foreach my $loc ( @locs ) {
+		$loc->verbose($self->verbose);
+		my $str = $loc->to_FTstring();
+		# we only append the remote seq_id if it hasn't been done already
+		# by the sub-location (which it should if it knows it's remote)
+		# (and of course only if it's necessary)
+		if( (! $loc->is_remote) &&
+			defined($self->seq_id) && defined($loc->seq_id) &&
+			($loc->seq_id ne $self->seq_id) ) {
+			$str = sprintf("%s:%s", $loc->seq_id, $str);
+		} 
+		push @strs, $str;
+	}
+	$self->flip_strand if $strand < 0;
+	my $str;
+	if( @strs == 1 ) {
+		($str) = @strs;
+	} elsif( @strs == 0 ) {
+		$self->warn("no Sublocations for this splitloc, so not returning anything\n");
+	} else { 
+		$str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
+	}
+	if( $strand < 0 ) {  # wrap this in a complement if it was unrolled
+		$str = sprintf("%s(%s)",'complement',$str);
+	}
+
+    return $str;
+}
+
+# we'll probably need to override the RangeI methods since our locations will
+# not be contiguous.
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/SplitLocationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/SplitLocationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/SplitLocationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,241 @@
+# $Id: SplitLocationI.pm,v 1.18.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::SplitLocationI
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::SplitLocationI - Abstract interface of a Location on a Sequence
+which has multiple locations (start/end points)
+
+=head1 SYNOPSIS
+
+  # get a SplitLocationI somehow
+    print $splitlocation->start, "..", $splitlocation->end, "\n";
+    my @sublocs = $splitlocation->sub_Location();
+
+    my $count = 1;
+    # print the start/end points of the sub locations
+    foreach my $location ( sort { $a->start <=> $b->start } 
+			   @sublocs ) {
+	printf "sub feature %d [%d..%d]\n", $location->start,$location->end;
+        $count++;
+    }
+
+=head1 DESCRIPTION
+
+This interface encapsulates the necessary methods for representing the
+location of a sequence feature that has more that just a single
+start/end pair.  Some examples of this are the annotated exons in a
+gene or the annotated CDS in a sequence file.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::SplitLocationI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::LocationI);
+
+
+=head2 sub_Location
+
+ Title   : sub_Location
+ Usage   : @locations = $feat->sub_Location();
+ Function: Returns an array of LocationI objects
+ Returns : An array
+ Args    : none
+
+=cut
+
+sub sub_Location {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 splittype
+
+  Title   : splittype
+  Usage   : $splittype = $fuzzy->splittype();
+  Function: get/set the split splittype
+  Returns : the splittype of split feature (join, order)
+  Args    : splittype to set
+
+=cut
+
+sub splittype {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 is_single_sequence
+
+  Title   : is_single_sequence
+  Usage   : if($splitloc->is_single_sequence()) {
+                print "Location object $splitloc is split ".
+                      "but only across a single sequence\n";
+	    }
+  Function: Determine whether this location is split across a single or
+            multiple sequences.
+  Returns : TRUE if all sublocations lie on the same sequence as the root
+            location (feature), and FALSE otherwise.
+  Args    : none
+
+=cut
+
+sub is_single_sequence {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head1 Bio::LocationI methods
+
+Bio::LocationI inherited methods follow
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : my $minstart = $location->min_start();
+  Function: Get minimum starting location of feature startpoint   
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting location of feature startpoint  
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending location of feature endpoint 
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending location of feature endpoint 
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position type (ie <,>, ^) 
+  Returns : type of position coded as text 
+            ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id
+  Args    : [optional] seq_id value to set
+
+=cut
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See Bio::Location::CoordinatePolicyI for documentation about
+            the policy object and its use.
+
+            The interface *does not* require implementing classes to accept
+            setting of a different policy. The implementation provided here
+            does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize every
+            new instance with a CoordinatePolicyI object. The implementation
+            provided here will return a default policy object if none has
+            been set yet. To change this default policy object call this
+            method as a class method with an appropriate argument. Note that
+            in this case only subsequently created Location objects will be
+            affected.
+
+  Returns : A Bio::Location::CoordinatePolicyI implementing object.
+  Args    : On set, a Bio::Location::CoordinatePolicyI implementing object.
+
+=cut
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Location/WidestCoordPolicy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Location/WidestCoordPolicy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Location/WidestCoordPolicy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,124 @@
+# $Id: WidestCoordPolicy.pm,v 1.10.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Location::WidestCoordPolicy
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#          and Jason Stajich <jason at bioperl.org>
+#
+# Copyright Hilmar Lapp, Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Location::WidestCoordPolicy - class implementing 
+Bio::Location::CoordinatePolicy as the widest possible and reasonable range
+
+=head1 SYNOPSIS
+
+See Bio::Location::CoordinatePolicyI
+
+=head1 DESCRIPTION
+
+CoordinatePolicyI implementing objects are used by Bio::LocationI
+implementing objects to determine integer-valued coordinates when
+asked for it.
+
+This class will compute the coordinates such that always the widest possible
+range is returned, but by using some common sense. This means that e.g.
+locations like "E<lt>5..100" (start before position 5) will return 5 as start
+(returned values have to be positive integers).
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email E<lt>hlapp-at-gmx-dot-netE<gt>, E<lt>jason-at-bioperl-dot-orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Location::WidestCoordPolicy;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Location::CoordinatePolicyI);
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    return $self;
+}
+
+
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $policy->start($location);
+  Function: Get the integer-valued start coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub start {
+    my ($self,$loc) = @_;
+
+    # For performance reasons we don't check that it's indeed a Bio::LocationI
+    # object. Hopefully, Location-object programmers are smart enough.
+    my $pos = $loc->min_start();
+    # if min is not defined or equals 0 we resort to max
+    $pos = $loc->max_start() if(! $pos);
+    return $pos;
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $policy->end($location);
+  Function: Get the integer-valued end coordinate of the given location as
+            computed by this computation policy.
+  Returns : A positive integer number.
+  Args    : A Bio::LocationI implementing object.
+
+=cut
+
+sub end {
+    my ($self,$loc) = @_;
+
+    # For performance reasons we don't check that it's indeed a Bio::LocationI
+    # object. Hopefully, Location-object programmers are smart enough.
+    my $pos = $loc->max_end();
+    # if max is not defined or equals 0 we resort to min
+    $pos = $loc->min_end() if(! $pos);
+    return $pos;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/LocationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/LocationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/LocationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,406 @@
+# $Id: LocationI.pm,v 1.25.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::LocationI
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::LocationI - Abstract interface of a Location on a Sequence
+
+=head1 SYNOPSIS
+
+    # get a LocationI somehow
+    printf( "start = %d, end = %d, strand = %s, seq_id = %s\n", 
+	    $location->start, $location->end, $location->strand,
+	    $location->seq_id);
+    print "location str is ", $location->to_FTstring(), "\n"; 
+
+
+=head1 DESCRIPTION
+
+This Interface defines the methods for a Bio::LocationI, an object
+which encapsulates a location on a biological sequence.  Locations
+need not be attached to actual sequences as they are stand alone
+objects.  LocationI objects are used by L<Bio::SeqFeatureI> objects to
+manage and represent locations for a Sequence Feature.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::LocationI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::RangeI);
+
+=head2 location_type
+
+  Title   : location_type
+  Usage   : my $location_type = $location->location_type();
+  Function: Get location type encoded as text
+  Returns : string ('EXACT', 'WITHIN', 'IN-BETWEEN')
+  Args    : none
+
+=cut
+
+sub location_type { 
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $location->start();
+  Function: Get the start coordinate of this location as defined by
+            the currently active coordinate computation policy. In
+            simple cases, this will return the same number as
+            min_start() and max_start(), in more ambiguous cases like
+            fuzzy locations the number may be equal to one or neither
+            of both.
+
+            We override this here from RangeI in order to delegate
+            'get' to a L<Bio::Location::CoordinatePolicy> implementing
+            object.  Implementing classes may also wish to provide
+            'set' functionality, in which case they *must* override
+            this method. The implementation provided here will throw
+            an exception if called with arguments.
+
+  Returns : A positive integer value.
+  Args    : none
+
+See L<Bio::Location::CoordinatePolicy> for more information
+
+=cut
+
+sub start {
+    my ($self, at args) = @_;
+
+    # throw if @args means that we don't support updating information
+    # in the interface but will delegate to the coordinate policy object
+    # for interpreting the 'start' value
+
+    $self->throw_not_implemented if @args;
+    return $self->coordinate_policy()->start($self);
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $location->end();
+  Function: Get the end coordinate of this location as defined by the
+            currently active coordinate computation policy. In simple
+            cases, this will return the same number as min_end() and
+            max_end(), in more ambiguous cases like fuzzy locations
+            the number may be equal to one or neither of both.
+
+            We override this here from Bio::RangeI in order to delegate
+            'get' to a L<Bio::Location::CoordinatePolicy> implementing
+            object. Implementing classes may also wish to provide
+            'set' functionality, in which case they *must* override
+            this method. The implementation provided here will throw
+            an exception if called with arguments.
+
+  Returns : A positive integer value.
+  Args    : none
+
+See L<Bio::Location::CoordinatePolicy> and L<Bio::RangeI> for more
+information
+
+=cut
+
+sub end {
+    my ($self, at args) = @_;
+
+    # throw if @args means that we don't support updating information
+    # in the interface but will delegate to the coordinate policy object
+    # for interpreting the 'end' value
+    $self->throw_not_implemented if @args;
+    return $self->coordinate_policy()->end($self);
+}
+
+=head2 min_start
+
+  Title   : min_start
+  Usage   : my $minstart = $location->min_start();
+  Function: Get minimum starting point of feature.
+
+            Note that an implementation must not call start() in this method.
+
+  Returns : integer or undef if no minimum starting point.
+  Args    : none
+
+=cut
+
+sub min_start {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 max_start
+
+  Title   : max_start
+  Usage   : my $maxstart = $location->max_start();
+  Function: Get maximum starting point of feature.
+
+            Note that an implementation must not call start() in this method
+            unless start() is overridden such as not to delegate to the
+            coordinate computation policy object.
+
+  Returns : integer or undef if no maximum starting point.
+  Args    : none
+
+=cut
+
+sub max_start {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 start_pos_type
+
+  Title   : start_pos_type
+  Usage   : my $start_pos_type = $location->start_pos_type();
+  Function: Get start position type encoded as text
+
+            Known valid values are 'BEFORE' (<5..100), 'AFTER' (>5..100), 
+            'EXACT' (5..100), 'WITHIN' ((5.10)..100), 'BETWEEN', (5^6), with
+            their meaning best explained by their GenBank/EMBL location string
+            encoding in brackets.
+
+  Returns : string ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub start_pos_type {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 flip_strand
+
+  Title   : flip_strand
+  Usage   : $location->flip_strand();
+  Function: Flip-flop a strand to the opposite
+  Returns : None
+  Args    : None
+
+=cut
+
+
+sub flip_strand {
+    my $self= shift;
+    $self->strand($self->strand * -1);
+}
+
+=head2 min_end
+
+  Title   : min_end
+  Usage   : my $minend = $location->min_end();
+  Function: Get minimum ending point of feature. 
+
+            Note that an implementation must not call end() in this method
+            unless end() is overridden such as not to delegate to the
+            coordinate computation policy object.
+
+  Returns : integer or undef if no minimum ending point.
+  Args    : none
+
+=cut
+
+sub min_end {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 max_end
+
+  Title   : max_end
+  Usage   : my $maxend = $location->max_end();
+  Function: Get maximum ending point of feature.
+
+            Note that an implementation must not call end() in this method
+            unless end() is overridden such as not to delegate to the
+            coordinate computation policy object.
+
+  Returns : integer or undef if no maximum ending point.
+  Args    : none
+
+=cut
+
+sub max_end {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 end_pos_type
+
+  Title   : end_pos_type
+  Usage   : my $end_pos_type = $location->end_pos_type();
+  Function: Get end position encoded as text.
+
+            Known valid values are 'BEFORE' (5..<100), 'AFTER' (5..>100), 
+            'EXACT' (5..100), 'WITHIN' (5..(90.100)), 'BETWEEN', (5^6), with
+            their meaning best explained by their GenBank/EMBL location string
+            encoding in brackets.
+
+  Returns : string ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+  Args    : none
+
+=cut
+
+sub end_pos_type {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 seq_id
+
+  Title   : seq_id
+  Usage   : my $seqid = $location->seq_id();
+  Function: Get/Set seq_id that location refers to
+  Returns : seq_id (a string)
+  Args    : [optional] seq_id value to set
+
+=cut
+
+sub seq_id {
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 is_remote
+
+ Title   : is_remote
+ Usage   : $is_remote_loc = $loc->is_remote()
+ Function: Whether or not a location is a remote location.
+
+           A location is said to be remote if it is on a different
+           'object' than the object which 'has' this
+           location. Typically, features on a sequence will sometimes
+           have a remote location, which means that the location of
+           the feature is on a different sequence than the one that is
+           attached to the feature. In such a case, $loc->seq_id will
+           be different from $feat->seq_id (usually they will be the
+           same).
+
+           While this may sound weird, it reflects the location of the
+           kind of AB18375:450-900 which can be found in GenBank/EMBL
+           feature tables.
+
+ Example : 
+ Returns : TRUE if the location is a remote location, and FALSE otherwise
+ Args    : 
+
+
+=cut
+
+sub is_remote{
+    shift->throw_not_implemented();
+}
+
+=head2 coordinate_policy
+
+  Title   : coordinate_policy
+  Usage   : $policy = $location->coordinate_policy();
+            $location->coordinate_policy($mypolicy); # set may not be possible
+  Function: Get the coordinate computing policy employed by this object.
+
+            See L<Bio::Location::CoordinatePolicyI> for documentation
+            about the policy object and its use.
+
+            The interface *does not* require implementing classes to
+            accept setting of a different policy. The implementation
+            provided here does, however, allow to do so.
+
+            Implementors of this interface are expected to initialize
+            every new instance with a
+            L<Bio::Location::CoordinatePolicyI> object. The
+            implementation provided here will return a default policy
+            object if none has been set yet. To change this default
+            policy object call this method as a class method with an
+            appropriate argument. Note that in this case only
+            subsequently created Location objects will be affected.
+
+  Returns : A L<Bio::Location::CoordinatePolicyI> implementing object.
+  Args    : On set, a L<Bio::Location::CoordinatePolicyI> implementing object.
+
+See L<Bio::Location::CoordinatePolicyI> for more information
+
+
+=cut
+
+sub coordinate_policy {
+    shift->throw_not_implemented();
+}
+
+=head2 to_FTstring
+
+  Title   : to_FTstring
+  Usage   : my $locstr = $location->to_FTstring()
+  Function: returns the FeatureTable string of this location
+  Returns : string
+  Args    : none
+
+=cut
+
+sub to_FTstring { 
+    my($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 each_Location
+
+ Title   : each_Location
+ Usage   : @locations = $locObject->each_Location($order);
+ Function: Conserved function call across Location:: modules - will
+           return an array containing the component Location(s) in
+           that object, regardless if the calling object is itself a
+           single location or one containing sublocations.
+ Returns : an array of Bio::LocationI implementing objects
+ Args    : Optional sort order to be passed to sub_Location() for Splits
+
+=cut
+
+sub each_Location {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Clone.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Clone.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Clone.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,461 @@
+# $Id: Clone.pm,v 1.6.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::clone
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Gaurav Gupta
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Clone - An central map object representing a clone
+
+=head1 SYNOPSIS
+
+   # get the clone object of $clone from the Bio::Map::Clone
+   my $cloneobj = $physical->get_cloneobj($clone);
+
+   # acquire all the markers that hit this clone
+   foreach my $marker ($cloneobj->each_markerid()) {
+       print "   +++$marker\n";
+   }
+
+See L<Bio::Map::Position> and L<Bio::Map::PositionI> for more information.
+
+=head1 DESCRIPTION
+
+This object handles the notion of a clone. This clone will
+have a name and a position in a map.
+
+This object is intended to be used by a map parser like fpc.pm.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gaurav Gupta
+
+Email gaurav at genome.arizona.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala  bix at sendu.me.uk
+
+=head1 PROJECT LEADERS
+
+Jamie Hatfield      jamie at genome.arizona.edu
+Dr. Cari Soderlund  cari at genome.arizona.edu
+
+=head1 PROJECT DESCRIPTION
+
+The project was done in Arizona Genomics Computational Laboratory (AGCoL)
+at University of Arizona.
+
+This work was funded by USDA-IFAFS grant #11180 titled "Web Resources for 
+the Computation and Display of Physical Mapping Data".
+
+For more information on this project, please refer: 
+  http://www.genome.arizona.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut 
+
+# Let the code begin...
+
+package Bio::Map::Clone;
+use strict;
+use Bio::Map::Position;
+
+use base qw(Bio::Root::Root Bio::Map::MappableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $clone = Bio::Map::Clone->new
+                      (
+		       -name    => $clone,
+		       -markers => \@markers,
+		       -contig  => $contig,
+		       -type    => $type,
+		       -bands   => $bands,
+		       -gel     => $gel,
+		       -group   => $group,
+		       -remark  => $remark,
+		       -fpnumber=> $fp_number,
+		       -sequencetype  => $seq_type,
+		       -sequencestatus=> $seq_status,
+		       -fpcremark => $fpc_remark,
+		       -matche    => \@ematch,
+		       -matcha    => \@amatch,
+		       -matchp    => \@pmatch,
+		       -range     => Bio::Range->new(-start => $startrange,
+						     -end   => $endrange)
+		       );
+ Function: Initialize a new Bio::Map::Clone object
+           Most people will not use this directly but get Clones 
+           through L<Bio::MapIO::fpc>
+ Returns : L<Bio::Map::Clone> object
+ Args    :   -name => marker name string,
+	     -markers => array ref of markers,
+	     -contig  => contig name string,
+	     -type    => type string,
+	     -bands   => band string,
+	     -gel     => gel string,
+	     -group   => group name string,
+	     -remark  => remark string,
+	     -fpnumber=> FP number string,
+	     -sequencetype  => seq type string,
+	     -sequencestatus=> seq status string,
+	     -fpcremark => FPC remark,
+	     -matche    => array ref,
+	     -matcha    => array ref,
+	     -matchp    => array ref,
+	     -range     => L<Bio::Range> object,
+
+=cut
+
+sub new {
+   my ($class, at args) = @_;
+   my $self= $class->SUPER::new(@args);
+   
+   my ($name,$markers,$contig,$type,$bands,$gel,$group,
+       $remark,$fpnumber,$seqtype,$seqstatus,$fpcremark,
+       $matche,$matcha,$matchp,
+       $range) = $self->_rearrange([qw(NAME  MARKERS CONTIG TYPE
+				       BANDS GEL GROUP REMARK FPNUMBER
+				       SEQUENCETYPE SEQUENCESTATUS
+				       FPCREMARK MATCHE MATCHA MATCHP
+				       RANGE)], at args);
+
+   $self->name($name)                  if defined $name;
+   $self->markers($markers)            if defined $markers;
+   $self->contigid($contig)            if defined $contig;
+   $self->type($type)                  if defined $type;
+   $self->bands($bands)                if defined $bands;
+   $self->gel($gel)                    if defined $gel;
+   $self->group($group)                if defined $group;
+   $self->remark($remark)              if defined $remark;
+   $self->fp_number($fpnumber)         if defined $fpnumber;
+   $self->sequence_type($seqtype)     if defined $seqtype;
+   $self->sequence_status($seqstatus) if defined $seqstatus;
+   $self->fpc_remark($fpcremark)       if defined $fpcremark;
+   $self->range($range)                if defined $range;
+
+   $self->set_match('approx', $matcha) if defined $matcha;
+   $self->set_match('pseudo', $matchp) if defined $matchp;
+   $self->set_match('exact',  $matche) if defined $matche; 
+
+   return $self;
+}
+
+=head1 Access Methods
+
+These methods let you get and set the member variables
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $cloneobj->name();
+ Function: Get/set the name for this Clone
+ Returns : scalar representing the current name of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub name {
+    my ($self) = shift;    
+    return $self->{'_name'} = shift if @_;
+    return $self->{'_name'};
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $cloneobj->type();
+ Function: Get/set the type for this clone
+ Returns : scalar representing the current type of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub type {
+    my ($self) = shift;
+    return $self->{'_type'} = shift if @_;
+    return $self->{'_type'};
+}
+
+=head2 range
+
+ Title   : range
+ Usage   : my $range = $cloneobj->range();
+ Function: Get/set the range of the contig that this clone covers
+ Returns : Bio::Range representing the current range of this contig,
+           start and end of the contig can be thus found using:
+           my $start = $contigobj->range()->start();
+           my $end   = $contigobj->range()->end();
+ Args    : none to get, OR Bio::Range to set
+
+=cut
+
+sub range {
+    my ($self) = shift;    
+    return $self->{'_range'} = shift if @_;
+    return $self->{'_range'};
+}
+
+=head2 match
+
+ Title   : match
+ Usage   : @eclone = $cloneobj->match('exact');
+           @aclone = $cloneobj->match('approximate');
+           @pclone = $cloneobj->match('pseudo');
+ Function: get all matching clones
+ Returns : list 
+ Args    : scalar representing the type of clone to be 
+           queried.
+
+=cut
+
+sub match {
+  my ($self,$type) = @_;
+
+  $type = "_match" . lc(substr($type, 0, 1));
+  return @{$self->{$type} || []};
+}
+
+=head2 each_match
+
+ Title   : each_match
+ Function: Synonym of the match() method.
+
+=cut
+
+*each_match = \&match;
+
+=head2 set_match
+
+ Title   : set_match
+ Usage   : $clone->set_match($type,$values);
+ Function: Set the Matches per type
+ Returns : None
+ Args    : type (one of 'exact' 'approx' 'pseudo')
+           array ref of match values
+
+=cut
+
+sub set_match{
+   my ($self,$type,$val) = @_;
+   $type = "_match" . lc(substr($type, 0, 1));
+   $self->{$type} = $val;
+}
+
+=head2 gel
+
+ Title   : gel
+ Usage   : $clonegel = $cloneobj->gel();
+ Function: Get/set the gel number for this clone
+ Returns : scalar representing the gel number of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub gel {
+    my ($self) = shift;
+    return $self->{'_gel'} = shift if @_;
+    return $self->{'_gel'};
+}
+
+=head2 remark
+
+ Title   : remark
+ Usage   : $cloneremark = $cloneobj->remark();
+ Function: Get/set the remark for this clone
+ Returns : scalar representing the current remark of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub remark {
+    my ($self) = shift;
+    return $self->{'_remark'} = shift if @_;
+    return $self->{'_remark'};
+}
+
+=head2 fp_number
+
+ Title   : fp_number
+ Usage   : $clonefpnumber = $cloneobj->fp_number();
+ Function: Get/set the fp number for this clone
+ Returns : scalar representing the fp number of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub fp_number {
+    my ($self) = shift;
+    return $self->{'_fpnumber'} = shift if @_;
+    return $self->{'_fpnumber'};
+}
+
+=head2 sequence_type
+
+ Title   : sequence_type
+ Usage   : $cloneseqtype = $cloneobj->sequence_type();
+ Function: Get/set the sequence type for this clone
+ Returns : scalar representing the sequence type of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub sequence_type {
+    my ($self) = shift;
+    return $self->{'_sequencetype'} = shift if @_;
+    return $self->{'_sequencetype'};
+}
+
+=head2 sequence_status
+
+ Title   : sequence_status
+ Usage   : $cloneseqstatus = $cloneobj->sequence_status();
+ Function: Get/set the sequence status for this clone
+ Returns : scalar representing the sequence status of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub sequence_status {
+    my ($self) = shift;
+    return $self->{'_sequencestatus'} = shift if @_;
+    return $self->{'_sequencestatus'};
+}
+
+=head2 fpc_remark
+
+ Title   : fpc_remark
+ Usage   : $clonefpcremark = $cloneobj->fpc_remark();
+ Function: Get/set the fpc remark for this clone
+ Returns : scalar representing the fpc remark of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub fpc_remark {
+    my ($self) = shift;
+    return $self->{'_fpcremark'} = shift if @_;
+    return $self->{'_fpcremark'};
+}
+
+=head2 band
+
+ Title   : band
+ Usage   : @clonebands = $cloneobj->bands();
+ Function: Get/set the bands for this clone
+ Returns : liat representing the band of this clone, if 
+           readcor = 1 while creating the MapIO object and the
+           .cor exists
+ Args    : none to get, OR string to set
+
+=cut
+
+sub bands {
+    my ($self) = shift; 
+    return $self->{'_bands'} = shift if @_;
+    return $self->{'_bands'};
+}
+
+=head2 group
+
+ Title   : group
+ Usage   : $cloneobj->group($chrno);
+ Function: Get/set the group number for this clone.
+           This is a generic term, used for Linkage-Groups as well as for
+           Chromosomes.
+ Returns : scalar representing the group number of this clone
+ Args    : none to get, OR string to set
+
+=cut
+
+sub group {
+    my ($self) = shift;
+    return $self->{'_group'} = shift if @_;    
+    return $self->{'_group'};
+}
+
+=head2 contigid
+
+ Title   : contigid
+ Usage   : my $ctg = $cloneobj->contigid();
+ Function: Get/set the contig this clone belongs to
+ Returns : scalar representing the contig
+ Args    : none to get, OR string to set
+
+=cut
+
+sub contigid {
+    my ($self) = shift;
+    $self->{'_contig'} = shift if @_;
+    return $self->{'_contig'} || 0;
+}
+
+=head2 each_markerid
+
+ Title   : each_markerid
+ Usage   : @markers = $cloneobj->each_markerid();
+ Function: retrieves all the elements in a map unordered
+ Returns : list of strings (ids)
+ Args    : none
+
+ *** This only supplies the ids set with the set_markers method ***
+ *** It has nothing to do with actual Bio::Map::MarkerI objects ***
+
+=cut
+
+sub each_markerid {
+  my ($self,$value) = @_;   
+  return @{$self->{"_markers"}};
+}
+
+=head2 set_markers
+
+ Title   : markers
+ Usage   : $obj->set_markers($newval)
+ Function: Set list of Marker ids (arrayref)
+ Returns : None
+ Args    : arrayref of strings (ids)
+
+ *** This only sets a list of ids ***
+ *** It has nothing to do with actual Bio::Map::MarkerI objects ***
+
+=cut
+
+sub set_markers {
+    my ($self,$markers) = @_;
+    if( defined $markers && ref($markers) =~ /ARRAY/ ) { 
+	$self->{'_markers'} = $markers;
+    }
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Contig.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Contig.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Contig.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,400 @@
+# $Id: Contig.pm,v 1.6.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::Contig
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Gaurav Gupta
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Contig - A MapI implementation handling the contigs of a
+Physical Map (such as FPC)
+
+=head1 SYNOPSIS
+
+    # get the contig object of $contig from the Bio::Map::Physical
+    my $ctgobj = $physical->get_contigobj($contig);
+
+    # acquire all the markers that lie in this contig
+    foreach my $marker ($ctgobj->each_markerid()) {
+	print "   +++$marker\n";
+    }
+
+    # find the group of this contig
+    print "Group: ",$ctgobj->group(),"\n";
+
+    # find the range of this contig
+    print "RANGE: start:",$ctgobj->range()->start(),"\tend: ",
+           $ctgobj->range()->end(),"\n";
+
+    # find the position of this contig in $group (chromosome)
+    print "Position in Group $group"," = ",$ctgobj->position($group),"\n";
+
+
+=head1 DESCRIPTION
+
+This is an implementation of Bio::Map::MapI.  It handles the
+essential storage of name, species, type, and units as well as in
+memory representation of the elements of a map.
+
+Bio::Map::Contig has been tailored to work for FPC physical maps, but
+could probably be used for others as well (with the appropriate MapIO
+module).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gaurav Gupta
+
+Email gaurav at genome.arizona.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala  bix at sendu.me.uk
+
+=head1 PROJECT LEADERS
+
+Jamie Hatfield      jamie at genome.arizona.edu
+Dr. Cari Soderlund  cari at genome.arizona.edu
+
+=head1 PROJECT DESCRIPTION
+
+The project was done in Arizona Genomics Computational Laboratory (AGCoL)
+at University of Arizona.
+
+This work was funded by USDA-IFAFS grant #11180 titled "Web Resources for 
+the Computation and Display of Physical Mapping Data".
+
+For more information on this project, please refer: 
+  http://www.genome.arizona.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Contig;
+use vars qw($MAPCOUNT);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Range;
+
+use base qw(Bio::Map::SimpleMap);
+BEGIN { $MAPCOUNT = 1; }
+
+=head2 new
+
+ Title   : new
+ Usage   : my $clone = Bio::Map::Contig->new
+                      (
+		       -name    => $name,
+		       -chr_remark   => $cremark,
+		       -user_remark  => $uremark,
+		       -trace_remark => $tremark,
+		       -group   => $group,
+		       -subgroup=> $subgroup,
+		       -anchor  => $anchor,
+		       -markers => \%markers,
+		       -clones  => \%clones,
+		       -position => $pos
+		       -range    => Bio::Range->new(-start =>$s,-end=>$e),
+		       );
+
+ Function: Initialize a new Bio::Map::Contig object
+           Most people will not use this directly but get Markers
+           through L<Bio::MapIO::fpc>
+ Returns : L<Bio::Map::Contig> object
+ Args    : ( -name    => name string,
+	     -chr_remark   => chr remark string,
+	     -user_remark  => userremark string,
+	     -trace_remark => tremark string,
+	     -group   => group string,
+	     -subgroup=> subgroup string,
+	     -anchor  => boolean if this is anchored or not,
+	     -markers => hashref of contained markers,
+	     -clones  => hashref of contained clones,
+	     -position => position
+	     -range    => L<Bio::Range>
+
+=cut
+
+sub new {
+   my ($class, at args) = @_;
+   my $self = $class->SUPER::new(@args);
+
+   my ($name,$cremark,$uremark,$tremark,
+       $group,$subgroup, $anchor,$markers, $clones,
+       $position,$range) = $self->_rearrange([qw(NAME CHR_REMARK USER_REMARK
+						 TRACE_REMARK GROUP SUBGROUP
+						 ANCHOR MARKERS CLONES
+						 POSITION RANGE)], at args);
+
+   $self->name($name)                  if defined $name;
+   $self->chr_remark($cremark)         if defined $cremark;
+   $self->user_remark($uremark)        if defined $uremark;
+   $self->trace_remark($tremark)       if defined $tremark;
+   $self->group($group)                if defined $group;
+   $self->subgroup($group)             if defined $subgroup;
+   $self->anchor($anchor)              if defined $anchor;
+
+   $self->set_markers($markers)        if defined $markers;
+   $self->set_clones($clones)          if defined $clones;
+   $self->range($range)                if defined $range;
+   $self->position($position)          if defined $position;
+
+   return $self;
+}
+
+=head2 Modifier methods
+
+All methods present in L<Bio::Map::SimpleMap> are implemented by this class.
+Most of the methods are inherited from SimpleMap.  The following methods
+have been modified to reflect the needs of physical maps.
+
+=head2 chr_remark
+
+ Title   : chr_remark
+ Usage   : my $chrremark = $contigobj->chr_remark();
+ Function: Get/set the group remark for this contig
+ Returns : scalar representing the current group_remark of this contig
+ Args    : none to get, OR string to set
+
+=cut
+
+sub chr_remark {
+    my ($self) = shift;
+    $self->{'_cremark'} = shift if @_;
+    return defined $self->{'_cremark'} ? $self->{'_cremark'} : '';
+}
+
+=head2 user_remark
+
+ Title   : user_remark
+ Usage   : my $userremark = $contigobj->user_remark();
+ Function: Get/set the user remark for this contig
+ Returns : scalar representing the current user_remark of this contig
+ Args    : none to get, OR string to set
+
+=cut
+
+sub user_remark {
+    my ($self) = shift;
+    $self->{'_uremark'} = shift if @_;
+    return defined $self->{'_uremark'} ? $self->{'_uremark'} : '';
+}
+
+=head2 trace_remark
+
+ Title   : trace_remark
+ Usage   : my $traceremark = $contigobj->trace_remark();
+ Function: Get/set the trace remark for this contig
+ Returns : scalar representing the current trace_remark of this contig
+ Args    : none to get, OR string to set
+
+=cut
+
+sub trace_remark {
+    my ($self) = shift;
+    $self->{'_tremark'} = shift if @_;
+    return defined $self->{'_tremark'} ? $self->{'_tremark'} : '';
+}
+
+=head2 range
+
+ Title   : range
+ Usage   : my $range = $contigobj->range();
+ Function: Get/set the range for this Contig
+ Returns : Bio::Range representing the current range of this contig,
+           start and end of the contig can be thus found using:
+           my $start = $contigobj->range()->start();
+           my $end   = $contigobj->range()->end();
+ Args    : none to get, OR Bio::Range to set
+
+=cut
+
+sub range {
+    my ($self) = shift;
+    return $self->{'_range'} = shift if @_;
+    return $self->{'_range'};
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : $ctgpos = $contigobj->position();
+ Function: Get/set the position of the contig in the group
+ Returns : scalar representing the position of the contig in the group
+ Args    : none to get, OR string to set
+
+=cut
+
+sub position {
+    my ($self) = shift;
+    $self->{'_position'} = shift if @_;
+    return $self->{'_position'} || 0;
+}
+
+=head2 anchor
+
+ Title   : anchor
+ Usage   : $ctganchor = $contig->anchor();
+ Function: Get/set the anchor value for this Contig (True | False)
+ Returns : scalar representing the anchor (1 | 0) for this contig
+ Args    : none to get, OR string to set
+
+=cut
+
+sub anchor {
+    my ($self) = shift;
+    return $self->{'_anchor'} = shift if @_;
+    return $self->{'_anchor'};
+}
+
+=head2 group
+
+ Title   : group
+ Usage   : $groupno = $contigobj->group();
+ Function: Get/set the group number for this contig.
+           This is a generic term, used for Linkage-Groups as well as for
+           Chromosomes. 
+ Returns : scalar representing the group number of this contig
+ Args    : none
+
+=cut
+
+sub group {
+    my ($self) = shift;
+    $self->{'_group'} = shift if @_;
+    return $self->{'_group'} || 0;
+}
+
+=head2 subgroup
+
+ Title   : subgroup
+ Usage   : $subgroup = $contig->subgroup();	
+ Function: Get/set the subgroup for this contig. This is a generic term:
+           subgroup here could represent subgroup of a Chromosome or of a
+           Linkage Group. The user must take care of which subgroup he/she is
+           querying for.	
+ Returns : A scalar representing the subgroup of this contig
+ Args    : none
+
+=cut
+
+sub subgroup {
+    my ($self) = @_;
+    return $self->{'_subgroup'} = shift if @_;
+    return $self->{'_subgroup'} || 0;
+}
+
+=head2 each_cloneid
+
+ Title   : each_cloneid
+ Usage   : my @clones  = $map->each_cloneid();
+ Function: retrieves all the clone ids in a map unordered
+ Returns : list of strings (ids)
+ Args    : none
+
+ *** This only supplies the ids set with the set_clones method ***
+ *** It has nothing to do with actual Bio::Map::MappableI objects ***
+
+=cut
+
+sub each_cloneid {
+    my ($self) = @_;
+    return $self->_each_element('clones');
+}
+
+=head2 each_markerid
+
+ Title   : each_markerid
+ Usage   : my @markers = $map->each_markerid();
+ Function: retrieves all the marker ids in a map unordered
+ Returns : list of strings (ids)
+ Args    : none
+
+ *** This only supplies the ids set with the set_markers method ***
+ *** It has nothing to do with actual Bio::Map::MarkerI objects ***
+
+=cut
+
+sub each_markerid {
+    my ($self) = @_;
+    return $self->_each_element('markers');
+}
+
+sub _each_element {
+    my ($self, $type) = @_;
+    $type = 'clones' if (!defined($type));
+    $type = lc("_$type");
+    return keys %{$self->{$type} || {}};
+}
+
+=head2 set_clones
+
+ Title   : set_clones
+ Usage   : $marker->set_clones(\%clones)
+ Function: Set the clones hashref
+ Returns : None
+ Args    : Hashref of clone ids
+
+ *** This only sets a hash of ids ***
+ *** It has nothing to do with actual Bio::Map::MappableI objects ***
+
+=cut
+
+sub set_clones {
+   my ($self,$clones) = @_;
+   if( defined $clones && ref($clones) =~ /HASH/ ) {
+       $self->{'_clones'} = $clones;
+   }
+}
+
+=head2 set_markers
+
+ Title   : markers
+ Usage   : $obj->set_markers($newval)
+ Function: Set list of Markers (hashref)
+ Returns : None
+ Args    : Hashref of marker ids
+
+ *** This only sets a hash of ids ***
+ *** It has nothing to do with actual Bio::Map::MarkerI objects ***
+
+=cut
+
+sub set_markers {
+    my ($self,$markers) = @_;
+    if( defined $markers && ref($markers) =~ /HASH/ ) {
+	$self->{'_markers'} = $markers;
+    }
+}
+
+1;
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Contig.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+# $Id: CytoMap.pm,v 1.10.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::CytoMap
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::CytoMap - A Bio::MapI compliant map implementation handling cytogenic bands 
+
+=head1 SYNOPSIS
+
+    use Bio::Map::CytoMap;
+    my $map = new Bio::Map::CytoMap(-name => 'human1',
+				      -species => $human);
+    foreach my $marker ( @markers ) { # get a list of markers somewhere
+	$map->add_element($marker);
+    }
+
+=head1 DESCRIPTION
+
+This is the simple implementation of cytogenetic maps based on
+L<Bio::Map::MapI>.  It handles the essential storage of name, species,
+type, and units as well as in memory representation of the elements of
+a map.
+
+For CytoMaps type is hard coded to be 'cytogeneticmap' and
+units are set to '' but can be set to something else.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Jason Stajich      jason at bioperl.org
+Lincoln Stein      lstein at cshl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Map::CytoMap;
+use vars qw($MAPCOUNT);
+use strict;
+
+
+use base qw(Bio::Map::SimpleMap);
+BEGIN { $MAPCOUNT = 1; }
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::CytoMap();
+ Function: Builds a new Bio::Map::CytoMap object
+ Returns : Bio::Map::CytoMap
+ Args    : -name    => name of map (string)
+           -species => species for this map (Bio::Species) [optional]
+           -elements=> elements to initialize with
+                       (arrayref of Bio::Map::MappableI objects) [optional]
+
+           -uid     => Unique Id
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+	
+    my $self = $class->SUPER::new(@args);
+	
+    $self->{'_uid'} = $MAPCOUNT++;
+    my ($uid) = $self->_rearrange([qw(UID)], @args);
+    defined $uid && $self->unique_id($uid);
+	
+    return $self;
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $map->type
+ Function: Get hard-coded Map type
+ Returns : String coding Map type (always 'cyto')
+ Args    : none
+
+=cut
+
+sub type {
+   return 'cyto';
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $length = $map->length();
+ Function: Retrieves the length of the map,
+ Returns : 0 since length is not calculatable for cytogenetic maps
+ Args    : none
+
+=cut
+
+sub length {
+   return 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMarker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMarker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoMarker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,138 @@
+# $Id: CytoMarker.pm,v 1.12.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::CytoMarker
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::CytoMarker - An object representing a marker.
+
+=head1 SYNOPSIS
+
+  $o_usat = new Bio::Map::CytoMarker(-name=>'Chad Super Marker 2',
+				 -position => $pos);
+
+=head1 DESCRIPTION
+
+This object handles markers with a positon in a cytogenetic map known.
+This marker will have a name and a position.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho 
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Chad Matsalla      bioinformatics1 at dieselwurks.com
+Lincoln Stein      lstein at cshl.org
+Jason Stajich      jason at bioperl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::CytoMarker;
+use strict;
+use Bio::Map::CytoPosition;
+
+use base qw(Bio::Map::Marker);
+
+
+=head2 Bio::Map::MarkerI methods
+
+=cut
+
+=head2 get_position_object
+
+ Title   : get_position_class
+ Usage   : my $position = $marker->get_position_object();
+ Function: To get an object of the default Position class
+           for this Marker. Subclasses should redefine this method.
+           The Position returned needs to be a L<Bio::Map::PositionI> with
+		   -element set to self.
+ Returns : L<Bio::Map::PositionI>
+ Args    : none for an 'empty' PositionI object, optionally
+           Bio::Map::MapI and value string to set the Position's -map and -value
+           attributes.
+
+=cut
+
+sub get_position_object {
+   my ($self, $map, $value) = @_;
+   $map ||= $self->default_map;
+   if ($value) {
+	  $self->throw("Value better be scalar, not [$value]") unless ref($value) eq '';
+   }
+   
+   my $pos = new Bio::Map::CytoPosition();
+   $pos->map($map) if $map;
+   $pos->value($value) if $value;
+   $pos->element($self);
+   return $pos;
+}
+
+
+=head2 Comparison methods
+
+The numeric values for cutogeneic loctions go from the p tip of
+chromosome 1, down to the q tip and similarly throgh consecutive
+chromosomes, through X and end the the q tip of X. See
+L<Bio::Map::CytoPosition::cytorange> for more details.
+
+=cut
+
+=head2 New methods
+
+=cut
+
+=head2 get_chr
+
+ Title   : get_chr
+ Usage   : my $mychr = $marker->get_chr();
+ Function: Read only method for the  chromosome string of the location.
+           A shortcut to $marker->position->chr().
+ Returns : chromosome value
+ Args    : [optional] new chromosome value
+
+=cut
+
+sub get_chr {
+    my ($self) = @_;
+    return unless $self->position;
+    return $self->position->chr;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoPosition.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoPosition.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/CytoPosition.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,557 @@
+# $Id: CytoPosition.pm,v 1.16.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::CytoPosition
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::CytoPosition - Marker class with cytogenetic band storing attributes
+
+=head1 SYNOPSIS
+
+  $m1 = Bio::Map::CytoPosition->new ( '-id' => 'A1',
+				       '-value' => '2q1-3'
+					     );
+  $m2 = Bio::Map::CytoPosition->new ( '-id' => 'A2',
+				       '-value' => '2q2'
+					     );
+
+  if ($m1->cytorange->overlaps($m2->cytorange)) {
+      print "Makers overlap\n";
+  }
+
+
+=head1 DESCRIPTION
+
+CytoPosition is marker (Bio::Map::MarkerI compliant) with a location in a
+cytogenetic map. See L<Bio::Map::MarkerI> for more information.
+
+Cytogenetic locations are names of bands visible in stained mitotic
+eucaryotic chromosomes. The naming follows strict rules which are
+consistant at least in higher vertebates, e.g. mammals. The chromosome
+name preceds the band names.
+
+The shorter arm of the chromosome is called 'p' ('petit') and usually
+drawn pointing up. The lower arm is called 'q' ('queue'). The bands
+are named from the region separting these, a centromere (cen), towards
+the tips or telomeric regions (ter) counting from 1 upwards. Depending
+of the resolution used the bands are identified with one or more
+digit. The first digit determines the major band and subsequent digits
+sub bands: p1 band can be divided into subbands p11, p12 and 13 and
+p11 can furter be divided into subbands p11.1 and p11.2. The dot after
+second digit makes it easier to read the values. A region between ands
+is given from the centromere outwards towards the telomere (e.g. 2p2-5
+or 3p21-35) or from a band in the p arm to a band in the q arm.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala  bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::CytoPosition;
+
+use strict;
+use integer;
+
+use base qw(Bio::Map::Position);
+
+=head2 cytorange
+
+ Title   : cytorange
+ Usage   : my $range = $obj->cytorange();
+ Function:
+            Converts cytogenetic location set by value method into
+            an integer range. The chromosome number determines the
+            "millions" in the values.  Human X and Y chromosome
+            symbols are represented by values 100 and 101.
+
+            The localization within chromosomes are converted into
+            values between the range of 0 and 200,000:
+
+            pter                    cen                       qter
+            |------------------------|-------------------------|
+            0                     100,000                   200,000
+
+            The values between -100,000 through 0 for centromere to
+            100,000 would have reflected the band numbering better but
+            use of positive integers was choosen since the
+            transformation is very easy. These values are not metric.
+
+            Each band defines a range in a chromosome. A band string
+            is converted into a range by padding it with lower and and
+            higher end digits (for q arm: '0' and '9') to the length
+            of five. The arm and chromosome values are added to these:
+            e.g. 21000 & 21999 (band 21) + 100,000 (q arm) + 2,000,000
+            (chromosome 2) => 2q21 : 2,121,000 .. 2,121,999. Note that
+            this notation breaks down if there is a band or a subband
+            using digit 9 in its name!  This is not the case in human
+            karyotype.
+
+            The full algorithm used for bands:
+
+            if arm is 'q' then
+               pad char for start is '0', for end '9'
+               range is chromosome + 100,000 + padded range start or end
+            elsif arm is 'p' then
+               pad char for start is '9', for end '0'
+               range is chromosome + 100,000 - padded range start or end
+
+ Returns : Bio::Range object or undef
+ Args    : none
+
+=cut
+
+sub cytorange {
+    my ($self) = @_;
+    my ($chr, $r, $band, $band2, $arm, $arm2, $lc, $uc, $lcchar, $ucchar);
+
+    return $r if not defined $self->{_value}; # returns undef
+    $self->{_value} =~
+	#  -----1-----  --------2---------   -----3-----     -------4-------   ---6---
+	m/([XY]|[0-9]+)(cen|qcen|pcen|[pq])?(ter|[.0-9]+)?-?([pq]?(cen|ter)?)?([.0-9]+)?/;
+    $self->warn("Not a valid value: ". $self->{_value}), return $r
+	if not defined $1 ; # returns undef
+
+    $chr = uc $1;
+    $self->chr($chr);
+
+    $chr = 100 if $chr eq 'X';
+    $chr = 101 if $chr eq 'Y';
+    $chr *= 1000000;
+
+    $r = new Bio::Range();
+
+    $band = '';
+    if (defined $3 ) {
+	$2 || $self->throw("$& does not make sense: 'arm' or 'cen' missing");
+	$band = $3;
+	$band =~ tr/\.//d;
+    }
+    if (defined $6 ) {
+	$arm2 = $4;
+	$arm2 = $2 if $4 eq ''; # it is not necessary to repeat the arm [p|q]
+	$band2 = $6;
+	$band2 =~ tr/\.//d;
+    
+	#find the correct order
+    #print STDERR "-|$&|----2|$2|-----3|$band|---4|$4|--------arm2|$arm2|-------------\n";
+	if ($band ne '' and (defined $arm2 and $2 ne $arm2 and $arm2 eq 'q') ) {
+	    $lc = 'start'; $lcchar = '9';
+	    $uc = 'end'; $ucchar = '9';
+	}
+	elsif ($band ne 'ter' and $2 ne $arm2 and $arm2 eq 'p') {
+	    $lc = 'end'; $lcchar = '9';
+	    $uc = 'start'; $ucchar = '9';
+	}
+	elsif ($band eq 'ter' and  $arm2 eq 'p') {
+	    $uc = 'start'; $ucchar = '9';
+	} # $2 eq $arm2
+	elsif ($arm2 eq 'q') {
+	    if (_pad($band, 5, '0') < _pad($band2, 5, '0')) {
+		$lc = 'start'; $lcchar = '0';
+		$uc = 'end'; $ucchar = '9';
+	    } else {
+		$lc = 'end'; $lcchar = '9';
+		$uc = 'start'; $ucchar = '0';		
+	    }
+	}
+	elsif ($arm2 eq 'p') {
+	    if (_pad($band, 5, '0') < _pad($band2, 5, '0')) {
+		$lc = 'end'; $lcchar = '0';
+		$uc = 'start'; $ucchar = '9';
+	    } else {
+		$lc = 'start'; $lcchar = '9';
+		$uc = 'end'; $ucchar = '0';		
+	    }
+	}
+	else {
+	    $self->throw("How did you end up here? $&");
+	}
+
+	#print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
+	if ( (defined $arm2 and $arm2 eq 'p') or (defined $arm2 and $arm2 eq 'p') ) {
+	    $r->$uc(-(_pad($band2, 5, $ucchar)) + 100000 + $chr );
+	    if (defined $3 and $3 eq 'ter') {
+		$r->end(200000 + $chr);
+	    }
+	    elsif ($2 eq 'cen' or $2 eq 'qcen' or $2 eq 'pcen'){
+		$r->$lc(100000 + $chr);
+	    } 
+	    elsif ($2 eq 'q') {
+		$r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr );
+	    } else {
+		$r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr );
+	    }
+	} else { #if:$arm2=q e.g. 9p22-q32
+	    #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
+	    $r->$uc(_pad($band2, 5, $ucchar) +  100000 + $chr);
+	    if ($2 eq 'cen' or $2 eq 'pcen') {
+		$r->$lc(100000 + $chr);
+	    }
+	    elsif ($2 eq 'p') {
+		if ($3 eq 'ter') {
+		    $r->$lc(200000 + $chr);
+		} else {
+		    $r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr);
+		}
+	    } else { #$2.==q
+		$r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr);
+	    }
+	}
+    }
+    #
+    # e.g. 10p22.1-cen
+    #
+    elsif (defined $4 and $4 ne '') {
+	#print STDERR "$4-----$&----\n";
+	if ($4 eq 'cen' || $4 eq 'qcen' || $4 eq 'pcen') { # e.g. 10p22.1-cen;
+	    # '10pcen-qter' does not really make sense but lets have it in anyway
+	    $r->end(100000 + $chr);
+	    if ($2 eq 'p') {
+		if ($3 eq 'ter') {
+		    $r->start($chr);
+		} else {
+		    $r->start(_pad($band, 5, '9') + $chr);
+		}
+	    }
+	    elsif ($2 eq 'cen') {
+		$self->throw("'cen-cen' does not make sense: $&");
+	    } else {
+		$self->throw("Only order p-cen is valid: $&");
+	    }
+	}
+	elsif ($4 eq 'qter' || $4 eq 'ter') { # e.g. 10p22.1-qter, 1p21-qter, 10pcen-qter, 7q34-qter
+	    $r->end(200000 + $chr);
+	    if ($2 eq 'p'){
+		$r->start(-(_pad($band, 5, '9')) + 100000 + $chr); #??? OK?
+	    }
+	    elsif ($2 eq 'q') {
+		$r->start(_pad($band, 5, '0') + 100000 + $chr);
+	    }
+	    elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
+		$r->start(100000 + $chr);
+	    }
+	}
+	elsif ($4 eq 'pter' ) {
+	    #print STDERR "$2,$3--$4-----$&----\n";
+	    $r->start( $chr);
+	     if ($2 eq 'p'){
+		$r->end(-(_pad($band, 5, '0')) + 100000 + $chr);
+	    }
+	    elsif ($2 eq 'q') {
+		$r->end(_pad($band, 5, '9') + 100000 + $chr);
+	    }
+	    elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
+		$r->end(100000 + $chr);
+	    }
+	} else { # -p or -q at the end of the range
+	    $self->throw("lone '$4' in $& does not make sense");
+	}
+    }
+    #
+    #  e.g 10p22.1, 10pter
+    #
+    elsif (defined $3 ) {
+	if ($2 eq 'p') {
+	    if ($3 eq 'ter') { # e.g. 10pter
+		$r = new Bio::Range('-start' => $chr,
+				    '-end' => $chr,
+				    );
+	    } else { # e.g 10p22.1
+		$r = new Bio::Range('-start' => -(_pad($band, 5, '9')) + 100000 + $chr,
+				    '-end' => -(_pad($band, 5, '0')) + 100000 + $chr,
+				    );
+	    }
+	} elsif ($2 eq 'q') {
+	    if ($3 eq 'ter') { # e.g. 10qter
+		$r = new Bio::Range('-start' => 200000 + $chr,
+				    '-end' => 200000 + $chr,
+				    );
+	    } else { # e.g 10q22.1
+		$r = new Bio::Range('-start' => _pad($band, 5, '0') + 100000 + $chr,
+				    '-end' => _pad($band, 5, '9') + 100000 + $chr,
+				    );
+	    }
+	} else { # e.g. 10qcen1.1 !
+	    $self->throw("'cen' in $& does not make sense");
+	}
+    }
+    #
+    # e.g. 10p
+    #
+    elsif (defined $2 ) { # e.g. 10p
+	if ($2 eq 'p' ) {
+	    $r = new Bio::Range('-start' => $chr,
+				'-end' => 100000  + $chr
+				);
+	}
+	elsif ($2 eq 'q' )  {
+	    $r = new Bio::Range('-start' => 100000 + $chr,
+				'-end' => 200000 + $chr
+				);
+	} else { # $2 eq 'cen' || 'qcen'
+	    $r = new Bio::Range('-start' => 100000 + $chr,
+				'-end' => 100000 + $chr
+				);
+	}
+    }
+    #
+    # chr only, e.g. X
+    #
+    else {
+	$r = new Bio::Range('-start' => $chr,
+			    '-end' => 200000 + $chr
+			    );
+    }
+    
+    if ($r) {
+        $self->start($r->start);
+        $self->end($r->end);
+    }
+    return $r;
+}
+
+
+sub _pad {
+    my ($string, $len, $pad_char) = @_;
+    __PACKAGE__->throw("function _pad needs a positive integer length, not [$len]") 
+	unless $len =~ /^\+?\d+$/;
+    __PACKAGE__->throw("function _pad needs a single character pad_char, not [$pad_char]") 
+	unless length $pad_char == 1;
+    $string ||= '';
+    return $string . $pad_char x ( $len - length( $string ) );
+}
+
+=head2 range2value
+
+ Title   : range2value
+ Usage   : my $value = $obj->range2value($range);
+ Function: Sets and returns the value string based on start and end values of
+           the Bio::Range object passes as an argument.
+ Returns : string or false
+ Args    : Bio::Range object
+
+=cut
+
+sub range2value {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if( ! $value->isa('Bio::Range') ) {
+	    $self->throw("Is not a Bio::Range object but a [$value]");
+	    return;
+	}
+	if( ! $value->start ) {
+	    $self->throw("Start is not defined in [$value]");
+	    return;
+	}
+	if( ! $value->end ) {
+	    $self->throw("End is not defined in [$value]");
+	    return;
+	}
+	if( $value->start < 100000 ) {
+	    $self->throw("Start value has to be in millions, not ". $value->start);
+	    return;
+	}
+	if( $value->end < 100000 ) {
+	    $self->throw("End value has to be in millions, not ". $value->end);
+	    return;
+	}
+
+	my ($chr, $arm, $band) = $value->start =~ /(\d+)(\d)(\d{5})/;	
+	my ($chr2, $arm2, $band2) = $value->end =~ /(\d+)(\d)(\d{5})/;	
+
+	my ($chrS, $armS, $bandS, $arm2S, $band2S, $sep) = ('', '', '', '', '', '' );
+      LOC: {
+	  #
+	  # chromosome
+	  #
+	  if ($chr == 100) {
+	      $chrS = 'X';
+	  }
+	  elsif ($chr == 100) {
+	      $chrS = 'Y';
+	  } else {
+	      $chrS = $chr;
+	  }
+	  last LOC if  $arm == 0 and $arm2 == 2 and $band == 0 and $band2 == 0 ;
+	  #
+	  # arm
+	  #
+	  if ($arm == $arm2 ) {
+	      if ($arm == 0) {
+		  $armS = 'p';
+		  #$armS = 'pter' if $band == 0 and $band2 == 0;
+		  $bandS = 'ter' if $band == 0;
+		  #$arm2S = 'p'; #?
+	      }
+	      elsif ($arm == 2) {
+		  $armS = 'q';
+		  $bandS = 'ter' if $band == 0;
+	      } else {
+		  $armS = 'q';
+		  #$arm2S = 'q'; #?
+		  $armS = 'cen',  if $band == 0;# and $band2 == 0;
+	      }
+	  } else {
+	      if ($arm == 0) {
+		  $armS = 'p';
+		  $arm2S = 'q';
+		  $arm2S = '' if $band == 0 and $band2 == 0;
+	      } else {
+		  $armS = 'q';
+		  $arm2S = 'p';
+		  $arm2S = '' if $arm2 == 2 and $band == 0 and $band2 == 0;
+	      }
+	  }
+	  last LOC if $band == $band2 ;
+	  my $c;
+	  #
+	  # first band (ter is hadled with the arm)
+	  #
+	  if ($bandS ne 'ter') {
+	      if ($armS eq 'p') {
+		  $band = 100000 - $band;
+		  $c = '9';
+	      } else {
+		  $c = '0';
+	      }
+	      $band =~ s/$c+$//; 
+	      $bandS = $band;
+	      $bandS = substr($band, 0, 2). '.'. substr($band, 2) if length $band > 2;
+	  }
+	  last LOC unless $band2;
+	  #
+	  # second band
+	  #
+	  if ($arm2 == 0) {
+	      $arm2S = 'p';
+	      $band2 = 100000 - $band2;
+	      $c = '0';
+	  } else { # 1 or 2
+	      $arm2S = 'q';
+	      $c = '9';
+	  }
+	  if ($band2 == 0) {
+	      if ($arm2 == 1) {
+		  $arm2S = 'p';
+		  $band2S = 'cen';
+	      } else {
+		  $band2S = 'ter';
+	      }
+	      last LOC;
+	  }
+	  last LOC if $band eq $band2 and $arm == $arm2;
+
+	  $band2 =~ s/$c+$//; 
+	  $band2S = $band2;
+	  $band2S = substr($band2, 0, 2). '.'. substr($band2, 2) if length $band2 > 2;
+
+      } # end of LOC:
+
+	if ($armS eq 'p' and $arm2S eq 'p') {
+	    my $tmp = $band2S;
+	    $band2S = $bandS;
+	    $bandS = $tmp;
+	}
+	$band2S = '' if $bandS eq $band2S ;
+	$armS = '' if $bandS eq 'cen';
+	$arm2S = '' if $armS eq $arm2S and $band2S ne 'ter';
+	$sep = '-' if $arm2S || $band2S;
+	$self->value( $chrS. $armS. $bandS. $sep. $arm2S. $band2S);
+    }
+   return $self->value;
+}
+
+=head2 value
+
+ Title   : value
+ Usage   : my $pos = $position->value;
+ Function: Get/Set the value for this postion
+ Returns : scalar, value
+ Args    : none to get, OR scalar to set
+
+=cut
+
+sub value {
+   my ($self,$value) = @_;
+   if( defined $value ) {
+       $self->{'_value'} = $value;
+       $self->cytorange;
+   }
+   return $self->{'_value'};
+}
+
+=head2 numeric
+
+ Title   : numeric
+ Usage   : my $num = $position->numeric;
+ Function: Read-only method that is guarantied to return a numeric 
+           representation of the start of this position.
+ Returns : int (the start of the range)
+ Args    : optional Bio::RangeI object 
+
+=cut
+
+sub numeric {
+   my $self = shift;
+   return $self->start(@_);
+}
+
+=head2 chr
+
+ Title   : chr
+ Usage   : my $mychr = $position->chr();
+ Function: Get/Set method for the chromosome string of the location.
+ Returns : chromosome value
+ Args    : none to get, OR scalar to set
+
+=cut
+
+sub chr {
+   my ($self,$chr) = @_;
+   if( defined $chr ) {
+       $self->{'_chr'} = $chr;
+   }
+   return $self->{'_chr'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/EntityI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/EntityI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/EntityI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+# $Id: EntityI.pm,v 1.3.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::EntityI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::EntityI - An Entity Interface
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+
+=head1 DESCRIPTION
+
+This interface describes the basic methods required for entities. An Entity is a
+kind of Bio::Map object that holds instance-specific data but relies on
+registering itself with a PositionHandler to handle its relationships with
+other entities. These relationships between objects are based around shared
+Positions, so Bio::Map::PositionI objects are a special kind of EntityI, along
+with Bio::Map::MappableI and Bio::Map::MapI objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::EntityI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 get_position_handler
+
+ Title   : get_position_handler
+ Usage   : my $position_handler = $entity->get_position_handler();
+ Function: Gets a PositionHandlerI that $entity is registered with.
+ Returns : Bio::Map::PositionHandlerI object
+ Args    : none
+
+=cut
+
+sub get_position_handler {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 PositionHandlerI-based methods
+
+ Any methods related to interation with other entities should be implemented
+ as a call to the PositionHandler
+
+=cut
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/EntityI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/FPCMarker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/FPCMarker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/FPCMarker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,427 @@
+# $Id: FPCMarker.pm,v 1.7.4.3 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::fpcmarker
+#
+# Cared for by Gaurav Gupta <gaurav at genome.arizona.edu>
+#
+# Copyright Gaurav Gupta
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::FPCMarker - An central map object representing a marker
+
+=head1 SYNOPSIS
+
+   # get the marker object of $marker from the Bio::Map::FPCMarker
+   my $markerobj = $physical->get_markerobj($marker);
+
+   # acquire all the clones that hit this marker
+   foreach my $clone ($markerobj->each_cloneid()) {
+       print "   +++$clone\n";
+   }
+
+   # find the position of this marker in $contig
+   print "Position in contig $contig"," = ",$markerobj->position($contig),
+         "\n";
+
+   # find the group of the marker
+   print "Group : ",$markerobj->group();
+
+
+See L<Bio::Map::Position> and L<Bio::Map::PositionI> for more information.
+
+=head1 DESCRIPTION
+
+This object handles the notion of a marker.
+This object is intended to be used by a map parser like fpc.pm.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gaurav Gupta
+
+Email gaurav at genome.arizona.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala  bix at sendu.me.uk
+
+=head1 PROJECT LEADERS
+
+Jamie Hatfield      jamie at genome.arizona.edu
+Dr. Cari Soderlund  cari at genome.arizona.edu
+
+=head1 PROJECT DESCRIPTION
+
+The project was done in Arizona Genomics Computational Laboratory (AGCoL)
+at University of Arizona.
+
+This work was funded by USDA-IFAFS grant #11180 titled "Web Resources for 
+the Computation and Display of Physical Mapping Data".
+
+For more information on this project, please refer: 
+  http://www.genome.arizona.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::FPCMarker;
+use strict;
+use Bio::Map::Position;
+use Time::Local;
+
+use base qw(Bio::Root::Root Bio::Map::MappableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $clone = Bio::Map::FPCMarker->new
+                      (
+		       -name    => $marker,
+		       -type    => $type,
+		       -global  => $global,
+		       -frame   => $frame,
+		       -group   => $group,
+		       -subgroup=> $subgroup,
+		       -anchor  => $anchor,
+		       -clones  => \%clones,
+		       -contigs => \%contigs,
+		       -position => \%markerpos,
+               -remark => $remark
+		       );
+
+ Function: Initialize a new Bio::Map::FPCMarker object
+           Most people will not use this directly but get Markers
+           through L<Bio::MapIO::fpc>
+ Returns : L<Bio::Map::FPCMarker> object
+ Args    : -name     => marker name string,
+	       -type     => type string,
+	       -global   => global position for marker,
+	       -frame    => boolean if marker is framework or placement,
+	       -group    => group number for marker,
+	       -subgroup => subgroup number of marker,
+	       -anchor   => boolean if marker is anchored,
+	       -clones   => all the clone elements in map (hashref),
+	       -contigs  => all the contig elements (hasref),
+	       -position => mapping of marker names to map position (hasref),
+           -remark   => remarks, separated by newlines
+
+=cut
+
+sub new {
+   my ($class, at args) = @_;
+   my $self= $class->SUPER::new(@args);
+
+   my ($name,$type,$global,$frame,$group,
+       $subgroup, $anchor, $clones,$contigs,
+       $positions, $remark) = $self->_rearrange([qw(NAME TYPE GLOBAL FRAME
+					   GROUP SUBGROUP ANCHOR
+					   CLONES CONTIGS POSITIONS REMARK)], at args);
+
+   $self->name($name)                  if defined $name;
+   $self->type($type)                  if defined $type;
+   $self->global($global)              if defined $global;
+   $self->group($group)                if defined $group;
+   $self->subgroup($group)             if defined $subgroup;
+   $self->anchor($anchor)              if defined $anchor;
+   $self->remark($remark)              if defined $remark;
+
+   $self->set_clones($clones)          if defined $clones;
+   $self->set_contigs($contigs)        if defined $contigs;
+   $self->set_positions($positions)    if defined $positions;
+
+   return $self;
+}
+
+=head1 Access Methods
+
+These methods let you get and set the member variables
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $markerobj->name();
+ Function: Get/set the name for this marker
+ Returns : scalar representing the current name of this marker
+ Args    : none to get, OR string to set
+
+=cut
+
+sub name {
+    my ($self) = shift;
+    return $self->{'_name'} = shift if @_;
+    return $self->{'_name'};
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $markerobj->type();
+ Function: Get/set the type for this marker
+ Returns : scalar representing the current type of this marker
+ Args    : none to get, OR string to set
+
+=cut
+
+sub type {
+    my ($self) = shift;
+    return $self->{'_type'} = shift if @_;
+    return $self->{'_type'};
+}
+
+=head2 global
+
+ Title   : global
+ Usage   : my $type = $markerobj->global();
+ Function: Get/set the global position for this marker
+ Returns : scalar representing the current global position of this marker
+ Args    : none to get, OR string to set
+
+=cut
+
+sub global {
+    my ($self) = shift;
+    return $self->{'_global'} = shift if @_;
+    return $self->{'_global'};
+}
+
+=head2 anchor
+
+ Title   : anchor
+ Usage   : my $anchor = $markerobj->anchor();
+ Function: indicate if the Marker is anchored or not (True | False)
+ Returns : scalar representing the anchor (1 | 0) for this marker
+ Args    : none to get, OR 1|0 to set
+
+=cut
+
+sub anchor {
+    my ($self) = shift;
+    return $self->{'_anchor'} = shift if @_;
+    return $self->{'_anchor'};
+}
+
+=head2 framework
+
+ Title   : framework
+ Usage   : $frame = $markerobj->framework();
+ Function: indicate if the Marker is framework or placement (1 | 0)
+ Returns : scalar representing if the marker is framework
+           (1 if framework, 0 if placement)
+ Args    : none to get, OR 1|0 to set
+
+=cut
+
+sub framework {
+    my ($self) = shift;
+    return $self->{'_frame'} = shift if @_;
+    return $self->{'_frame'};
+}
+
+=head2 group
+
+ Title   : group
+ Usage   : $grpno = $markerobj->group();
+ Function: Get/set the group number for this marker. This is a generic term,
+           used for Linkage-Groups as well as for Chromosomes.
+ Returns : scalar representing the group number of this marker
+ Args    : none to get, OR string to set
+
+=cut
+
+sub group {
+    my ($self) = shift;
+    $self->{'_group'} = shift if @_;
+    return $self->{'_group'} || 0;
+}
+
+=head2 subgroup
+
+ Title   : subgroup
+ Usage   : $subgroup = $marker->subgroup();	
+ Function: Get/set the subgroup for this marker. This is a generic term:
+           subgroup here could represent subgroup of a Chromosome or of a
+           Linkage Group. The user must take care of which subgroup he/she is
+           querying for.	
+ Returns : scalar representing the subgroup of this marker
+ Args    : none to get, OR string to set
+
+=cut
+
+sub subgroup {
+    my ($self) = shift;
+    $self->{'_subgroup'} = shift if @_;
+    return $self->{'_subgroup'} || 0;
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : $markerpos = $markerobj->position($ctg);
+ Function: get the position of the marker in the contig
+ Returns : scalar representing the position of the markernumber of
+           the contig
+ Args    : $ctg is necessary to look for the position of the marker
+           in that contig.
+
+ *** This has nothing to do with an actual Bio::Map::PositionI object ***
+
+=cut
+
+sub position {
+    my ($self,$ctg) = @_;
+    return 0 unless defined $ctg;
+
+    return 0 unless( defined $self->{'_position'} &&
+		     defined $self->{'_position'}{$ctg});
+    return $self->{'_position'}{$ctg};
+}
+
+=head2 remark
+
+ Title   : remark
+ Usage   : $markerremark = $markerobj->remark();
+ Function: get the remarks for this marker
+ Returns : scalar of newline-separated markers
+ Args    : none
+
+=cut
+
+sub remark {
+    my ($self) = shift;
+    return $self->{'_remark'} = shift if @_;
+    return $self->{'_remark'};
+}
+
+=head2 each_cloneid
+
+ Title   : each_cloneid
+ Usage   : my @clones  = $map->each_cloneid();
+ Function: retrieves all the clone ids in a map unordered
+ Returns : list of strings (ids)
+ Args    : none
+
+ *** This only supplies the ids set with the set_clones method ***
+ *** It has nothing to do with actual Bio::Map::MappableI objects ***
+
+=cut
+
+sub each_cloneid {
+    my ($self) = @_;
+    return $self->_each_element('clones');
+}
+
+=head2 each_contigid
+
+ Title   : each_contigid
+ Usage   : my @contigs = $map->each_contigid();
+ Function: retrieves all the contig ids in a map unordered
+ Returns : list of strings (ids)
+ Args    : none
+
+ *** This only supplies the ids set with the set_contigs method ***
+ *** It has nothing to do with actual Bio::Map::MapI objects ***
+
+=cut
+
+sub each_contigid {
+    my ($self) = @_;
+    return $self->_each_element('contigs');
+}
+
+sub _each_element{
+    my ($self, $type) = @_;
+
+    $type = 'clones' unless defined $type;
+    $type = lc("_$type");
+
+    return keys %{$self->{$type} || {}};
+}
+
+=head2 set_clones
+
+ Title   : set_clones
+ Usage   : $marker->set_clones(\%clones)
+ Function: Set the clone ids hashref
+ Returns : None
+ Args    : Hashref of clone ids
+
+ *** This only sets a hash of ids ***
+ *** It has nothing to do with actual Bio::Map::MappableI objects ***
+
+=cut
+
+sub set_clones{
+   my ($self,$clones) = @_;
+   if( defined $clones && ref($clones) =~ /HASH/ ) {
+       $self->{'_clones'} = $clones;
+   }
+}
+
+=head2 set_contigs
+
+ Title   : set_contigs
+ Usage   : $marker->set_contigs(\%contigs)
+ Function: Set the contig ids hashref
+ Returns : None
+ Args    : Hashref of contig ids
+
+ *** This only sets a hash of ids ***
+ *** It has nothing to do with actual Bio::Map::MapI objects ***
+
+=cut
+
+sub set_contigs{
+   my ($self,$contigs) = @_;
+   if( defined $contigs && ref($contigs) =~ /HASH/ ) {
+       $self->{'_contigs'} = $contigs;
+   }
+}
+
+=head2 set_positions
+
+ Title   : set_positions
+ Usage   : $marker->set_positions(\%markerpos)
+ Function: Set the positions hashref
+ Returns : None
+ Args    : Hashref of marker positions
+
+ *** This only sets a hash of numbers ***
+ *** It has nothing to do with actual Bio::Map::PositionI objects ***
+
+=cut
+
+sub set_positions{
+   my ($self,$pos) = @_;
+   if( defined $pos && ref($pos) =~ /HASH/ ) {
+       $self->{'_positions'} = $pos;
+   }
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkageMap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkageMap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkageMap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,172 @@
+# BioPerl module for Bio::Map::LinkageMap
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::LinkageMap - A representation of a genetic linkage map.
+
+=head1 SYNOPSIS
+
+    use Bio::Map::LinkageMap;
+	# create a new map
+    my $map = new Bio::Map::LinkageMap(-name => 'Chads Superterriffic Map',
+                                      -type => 'Linkage',
+                                      -units=> 'cM');
+	# create the location of a marker for that map
+    my $position = new Bio::Map::LinkagePosition( -positions => 1,
+                -distance => "22.3");
+	# create a marker and place it at that position
+    my $marker = new Bio::Map::Marker::Microsatellite(
+			-name => 'SuuuperMarker',
+			-position => $position);
+	# place that marker on that map
+    $map->add_element($marker);
+
+	# done!
+
+=head1 DESCRIPTION
+
+This object describes the basic functionality of a genetic linkage map in
+Bioperl. Each 'position' can have one or more markers that map some number of
+units from the markers at the previous position.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein       lstein at cshl.org
+Heikki Lehvaslaiho  heikki-at-bioperl-dot-org
+Jason Stajich       jason at bioperl.org
+Sendu Bala          bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::LinkageMap;
+use strict;
+
+use base qw(Bio::Map::SimpleMap);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $linkage_map = new Bio::Map::LinkageMap();
+ Function: Builds a new Bio::Map::LinkageMap object
+ Returns : Bio::Map::LinkageMap
+ Args    : -name    => the name of the map (string) [optional]
+	       -type    => the type of this map (string, defaults to Linkage) [optional]
+           -species => species for this map (Bio::Species) [optional]
+           -units   => the map units (string, defaults to cM) [optional]
+           -elements=> elements to initialize with
+                       (arrayref of Bio::Map::MappableI objects) [optional]
+           -uid      => Unique ID of this map
+
+=cut
+
+=head2 length
+
+ Title   : length
+ Usage   : my $length = $map->length();
+ Function: Retrieves the length of the map. In the case of a LinkageMap, the
+	       length is the sum of all marker distances.
+ Returns : An integer representing the length of this LinkageMap. Will return
+	       0 if length is not calculateable
+ Args    : None.
+
+
+=cut
+
+sub length {
+    my ($self) = @_;
+    $self->throw("Not yet implemented correctly");
+    
+    my $total_distance;
+    foreach my $element (@{$self->get_elements}) {
+        #*** there is no such method ->each_position_value!
+        $total_distance += ($element->position->each_position_value($self))[0];
+    }
+    return $total_distance;
+}
+
+=head2 add_element($marker)
+
+ Title   : add_element($marker)
+ Usage   : $map->add_element($marker)
+ Function: Add a Bio::Map::MappableI object to the Map
+ Returns : none
+ Args    : Bio::Map::MappableI object
+ Notes   : It is strongly recommended that you use a
+	   Bio::Map::LinkagePosition as the position in any
+	   Bio::Map::Mappable that you create to place on this
+	   map. Using some other Bio::Map::Position might work but might
+	   be unpredictable.
+           N.B. I've added Bio::Map::OrderedPosition which should achieve
+                similar things from LinkagePosition and will work for
+                RH markers too.
+=cut
+
+#*** what is this? what calls it? note that it seems to be private
+sub _add_element_will_be_deleted {
+    my ($self,$marker) = @_;
+
+    my $o_position = $marker->position();
+
+    $self->debug( "marker position is ". $marker->position());
+#     print("add_element: \$o_position is $o_position\n");
+#     print("add_element: \$marker is $marker\n");
+
+    my $position;
+    unless ( $o_position->isa('Bio::Map::LinkagePosition') ||
+	     $o_position->isa('Bio::Map::OrderedPosition')
+	     ) {
+	$self->warn("You really should use a Linkage Position for this object. This insures that there is only one position. Trying anyway...");	
+	my @p = ( $o_position->each_position_value($self));
+	$position = shift @p;
+	if( ! defined $position ) {
+	    $self->throw("This marker ($marker) does not have a position in this map ($self)");
+	}
+    } else {
+	$position = $o_position->order;
+    }
+
+    if ($self->{'_elements'}[$position]) {
+	$self->warn("Replacing the marker in position $position because in a linkage map the position is a key.");
+    }	
+    $self->{'_elements'}[$position] = $marker;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkagePosition.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkagePosition.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/LinkagePosition.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+# BioPerl module for Bio::Map::LinkagePosition
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::LinkagePosition - Create a Position for a Marker that will be placed
+	                        on a Bio::Map::LinkageMap
+
+=head1 SYNOPSIS
+
+    use Bio::Map::Position;
+    my $position = new Bio::Map::LinkagePosition(-positions => 1,
+						 -distance => 22.1 );
+
+	# can get listing of positions
+    my @positions = $position->each_position;
+
+
+=head1 DESCRIPTION
+
+Position for a Bio::Map::MarkerI compliant object that will be
+placed on a Bio::Map::LinkageMap. See L<Bio::Map::MarkerI> and
+L<Bio::Map::LinkageMap> for details
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Jason Stajich jason at bioperl.org
+Sendu Bala bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::LinkagePosition;
+use strict;
+
+
+use base qw(Bio::Map::OrderedPosition);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::LinkagePosition(-positions => $position,
+				                                   -distance => $distance);
+ Function: Builds a new Bio::Map::LinkagePosition object
+ Returns : Bio::Map::LinkagePosition
+ Args    : -order => the relative order of this marker on a linkage map
+ 	       -positions => positions on a map
+
+=cut
+
+=head2 Bio::Map::PositionI methods
+
+=cut
+
+=head2 order
+
+ Title   : order
+ Usage   : $o_position->order($order)
+           my $order = $o_position->order()
+ Function: get/set the order position of this position in a map
+ Returns : int
+ Args    : none to get, int to set
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/MapI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/MapI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/MapI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,340 @@
+# $Id: MapI.pm,v 1.15.4.4 2006/10/17 09:35:22 sendu Exp $
+#
+# BioPerl module for Bio::Map::MapI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::MapI - Interface for describing Map objects in bioperl 
+
+=head1 SYNOPSIS
+
+    # get a MapI somehow
+    my $name   = $map->name();     # string
+    my $length = $map->length();   # integer
+    my $species= $map->species;    # Bio::Species
+    my $type   = $map->type();     # genetic/sts/rh/
+
+=head1 DESCRIPTION
+
+This object describes the basic functionality of a Map in bioperl.
+Maps are anything from Genetic Map to Sequence Map to Assembly Map
+to Restriction Enzyme to FPC.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::MapI;
+use strict;
+use Bio::Map::PositionHandler;
+
+use base qw(Bio::Map::EntityI Bio::AnnotatableI);
+
+=head2 EntityI methods
+
+ These are fundamental to coordination of Maps and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 get_position_handler
+
+ Title   : get_position_handler
+ Usage   : my $position_handler = $entity->get_position_handler();
+ Function: Gets a PositionHandlerI that $entity is registered with.
+ Returns : Bio::Map::PositionHandlerI object
+ Args    : none
+
+=cut
+
+sub get_position_handler {
+    my $self = shift;
+    unless (defined $self->{_eh}) {
+        my $ph = Bio::Map::PositionHandler->new(-self => $self);
+        $self->{_eh} = $ph;
+        $ph->register;
+    }
+    return $self->{_eh};
+}
+
+=head2 PositionHandlerI-related methods
+
+ These are fundamental to coordination of Maps and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 get_positions
+
+ Title   : get_positions
+ Usage   : my @positions = $mappable->get_positions();
+ Function: Get all the Positions on this Map (sorted).
+ Returns : Array of L<Bio::Map::PositionI> objects
+ Args    : none for all, OR
+           L<Bio::Map::MappableI> object for positions of the given Mappable
+
+=cut
+
+sub get_positions {
+    my ($self, $mappable) = @_;
+	my @positions = $self->get_position_handler->get_positions($mappable);
+    # precompute sortable for effieciency and to avoid bugs
+    @positions = map { $_->[1] }
+                 sort { $a->[0] <=> $b->[0] }
+                 map { [$_->sortable, $_] }
+                 @positions;
+    return @positions;
+}
+
+=head2 each_position
+
+ Title   : each_position
+ Function: Synonym of the get_positions() method.
+ Status  : deprecated, will be removed in next version
+
+=cut
+
+*each_position = \&get_positions;
+
+=head2 purge_positions
+
+ Title   : purge_positions
+ Usage   : $map->purge_position();
+ Function: Remove all positions from this map. Notifies the positions they are
+           no longer on this map.
+ Returns : n/a
+ Args    : none to remove all positions, OR
+           L<Bio::Map::PositionI> object to remove just that Position, OR
+		   L<Bio::Map::MappableI> object to remove only those positions of the
+           given mappable
+
+=cut
+
+sub purge_positions {
+    my ($self, $thing) = @_;
+    $self->get_position_handler->purge_positions($thing);
+}
+
+=head2 get_elements
+
+ Title   : get_elements
+ Usage   : my @elements = $map->get_elements;
+ Function: Retrieves all the elements on a map (unordered)
+ Returns : Array of Map elements (L<Bio::Map::MappableI>)
+ Args    : none
+
+=cut
+
+sub get_elements {
+    my $self = shift;
+    return $self->get_position_handler->get_other_entities;
+}
+
+=head2 each_element
+
+ Title   : each_element
+ Function: Synonym of the get_elements() method.
+ Status  : deprecated, will be removed in the next version
+
+=cut
+
+=head2 common_elements
+
+ Title   : common_elements
+ Usage   : my @common_elements = $map->common_elements(\@other_maps);
+           my @common_elements = Bio::Map::SimpleMap->common_elements(\@maps);
+ Function: Find the elements that are common to multiple maps.
+ Returns : array of Bio::Map::MappableI
+ Args    : arg #1 = L<Bio::Map::MapI> to compare this one to, or an array ref
+                    of such objects (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+           -min_num => int        : the minimum number of input maps an element
+                                    must be found on before before returned
+                                    [default is 1]
+           -min_percent => number : as above, but the minimum percentage of
+                                    input maps [default is 100 - note that this
+                                    will effectively override all other options]
+           -require_self => 1|0   : require that all output elements at least
+                                    be on the calling map [default is 1, has no
+                                    effect when the second usage form is used]
+           -required => \@maps    : require that all output elements be on at
+                                    least all the maps supplied here
+
+=cut
+
+sub common_elements {
+    my ($self, $maps_ref, @extra_args) = @_;
+    $self->throw("Must supply a reference first argument") unless ref($maps_ref);
+    my @maps;
+    if (ref($maps_ref) eq 'ARRAY') {
+        @maps = @{$maps_ref};
+    }
+    elsif ($maps_ref->isa('Bio::Map::MapI')) {
+        @maps = ($maps_ref);
+    }
+    if (ref($self)) {
+        unshift(@maps, $self);
+    }
+    $self->throw("Need at least 2 maps") unless @maps >= 2;
+    
+    my %args = (-min_num => 1, -min_percent => 100, -require_self => 1, -required => undef, @extra_args);
+    my $min_num = $args{-min_num};
+    if ($args{-min_percent}) {
+        my $mn = @maps / 100 * $args{-min_percent};
+        if ($mn > $min_num) {
+            $min_num = $mn;
+        }
+    }
+    my %required = map { $_ => 1 } $args{-required} ? @{$args{-required}} : ();
+    $required{$self} = 1 if ref($self) && $args{-require_self};
+    my @required = keys %required;
+    
+    my %map_elements;
+    my %elements;
+    my %count;
+    foreach my $map (@maps) {
+        $map_elements{$map} = {};
+        foreach my $element ($map->get_elements) {
+            $map_elements{$map}->{$element} = 1;
+            $elements{$element} = $element;
+            $count{$element}++;
+        }
+    }
+    
+    my @elements;
+    ELEMENT: while (my ($key, $value) = each %elements) {
+        $count{$key} >= $min_num or next;
+        foreach my $required (@required) {
+            exists $map_elements{$required}->{$key} or next ELEMENT;
+        }
+        
+        push(@elements, $value);
+    }
+    return @elements;
+}
+
+=head2 MapI-specific methods
+
+=cut
+
+=head2 species
+
+ Title   : species
+ Usage   : my $species = $map->species;
+ Function: Get/Set Species for a map
+ Returns : L<Bio::Species> object
+ Args    : (optional) Bio::Species
+
+=cut
+
+sub species{
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 units
+
+ Title   : units
+ Usage   : $map->units('cM');
+ Function: Get/Set units for a map
+ Returns : units for a map
+ Args    : units for a map (string)
+
+=cut
+
+sub units{
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $map->type
+ Function: Get/Set Map type
+ Returns : String coding map type
+ Args    : (optional) string
+
+=cut
+
+sub type {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $map->name
+ Function: Get/Set Map name
+ Returns : Map name
+ Args    : (optional) string
+
+=cut
+
+sub name {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $length = $map->length();
+ Function: Retrieves the length of the map. 
+           It is possible for the length to be unknown for maps such as
+           Restriction Enzyme, will return 0 in that case
+ Returns : integer representing length of map in current units
+           will return undef if length is not calculateable
+ Args    : none
+
+=cut
+
+sub length {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Mappable.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Mappable.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Mappable.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,687 @@
+# $Id: Mappable.pm,v 1.6.4.5 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::Map::Mappable
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+# 
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Mappable - An object representing a generic map element
+that can have multiple locations in several maps.
+
+=head1 SYNOPSIS
+
+  # a map element in two different positions on the same map
+  $map1 = new Bio::Map::SimpleMap ();
+  $position1 = new Bio::Map::Position (-map => $map1, -value => 100);
+  $position2 = new Bio::Map::Position (-map => $map1, -value => 200);
+  $mappable = new Bio::Map::Mappable (-positions => [$position1, $position2] );
+
+  # add another position on a different map
+  $map2 = new Bio::Map::SimpleMap ();
+  $position3 = new Bio::Map::Position (-map => $map2, $value => 50);
+  $mappable->add_position($position3);
+
+  # get all the places our map element is found, on a particular map of interest
+  foreach $pos ($mappable->get_positions($map1)) {
+     print $pos->value, "\n";
+  }
+
+=head1 DESCRIPTION
+
+This object handles the notion of a generic map element. Mappables are
+entities with one or more positions on one or more maps.
+
+This object is a pure perl implementation of L<Bio::Map::MappableI>. That
+interface implements some of its own methods so check the docs there for
+those.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Mappable;
+use strict;
+use Bio::Map::Relative;
+use Bio::Map::Position;
+
+use base qw(Bio::Root::Root Bio::Map::MappableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $mappable = new Bio::Map::Mappable();
+ Function: Builds a new Bio::Map::Mappable object
+ Returns : Bio::Map::Mappable
+ Args    : -name => string : name of the mappable element
+           -id   => string : id of the mappable element
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($name, $id) = $self->_rearrange([qw(NAME ID)], @args);
+    $self->name($name) if $name;
+    $self->id($id) if $id;
+    
+    return $self;
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $mappable->name($new_name);
+	       my $name = $mappable->name();
+ Function: Get/Set the name for this Mappable
+ Returns : A scalar representing the current name of this Mappable
+ Args    : none to get
+           string to set
+
+=cut
+
+sub name {
+    my $self = shift;
+    if (@_) { $self->{_name} = shift }
+    return $self->{_name} || '';
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : my $id = $mappable->id();
+           $mappable->id($new_id);
+ Function: Get/Set the id for this Mappable.
+ Returns : A scalar representing the current id of this Mappable
+ Args    : none to get
+           string to set
+
+=cut
+
+sub id {
+    my $self = shift;
+    if (@_) { $self->{_id} = shift }
+    return $self->{_id} || return;
+}
+
+=head2 in_map
+
+ Title   : in_map
+ Usage   : if ($mappable->in_map($map)) {...}
+ Function: Tests if this mappable is found on a specific map
+ Returns : boolean
+ Args    : L<Bio::Map::MapI>
+
+=cut
+
+sub in_map {
+	my ($self, $query_map) = @_;
+	$self->throw("Must supply an argument") unless $query_map;
+    $self->throw("This is [$query_map], not an object") unless ref($query_map);
+    $self->throw("This is [$query_map], not a Bio::Map::MapI object") unless $query_map->isa('Bio::Map::MapI');
+    
+    foreach my $map ($self->known_maps) {
+        ($map eq $query_map) && return 1;
+    }
+    
+    return 0;
+}
+
+=head2 Comparison methods
+
+=cut
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if ($mappable->equals($other_mappable)) {...}
+           my @equal_positions = $mappable->equals($other_mappable);
+ Function: Finds the positions in this mappable that are equal to any
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+
+=cut
+
+sub equals {
+    my $self = shift;
+    return $self->_compare('equals', @_);
+}
+
+=head2 less_than
+
+ Title   : less_than
+ Usage   : if ($mappable->less_than($other_mappable)) {...}
+           my @lesser_positions = $mappable->less_than($other_mappable);
+ Function: Finds the positions in this mappable that are less than all
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+
+=cut
+
+sub less_than {
+    my $self = shift;
+    return $self->_compare('less_than', @_);
+}
+
+=head2 greater_than
+
+ Title   : greater_than
+ Usage   : if ($mappable->greater_than($other_mappable)) {...}
+           my @greater_positions = $mappable->greater_than($other_mappable);
+ Function: Finds the positions in this mappable that are greater than all
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+
+=cut
+
+sub greater_than {
+    my $self = shift;
+    return $self->_compare('greater_than', @_);
+}
+
+=head2 overlaps
+
+ Title   : overlaps
+ Usage   : if ($mappable->overlaps($other_mappable)) {...}
+           my @overlapping_positions = $mappable->overlaps($other_mappable);
+ Function: Finds the positions in this mappable that overlap with any
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+
+=cut
+
+sub overlaps {
+    my $self = shift;
+    return $self->_compare('overlaps', @_);
+}
+
+=head2 contains
+
+ Title   : contains
+ Usage   : if ($mappable->contains($other_mappable)) {...}
+           my @container_positions = $mappable->contains($other_mappable);
+ Function: Finds the positions in this mappable that contain any comparison
+           positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+
+=cut
+
+sub contains {
+    my $self = shift;
+    return $self->_compare('contains', @_);
+}
+
+=head2 overlapping_groups
+
+ Title   : overlapping_groups
+ Usage   : my @groups = $mappable->overlapping_groups($other_mappable);
+           my @groups = Bio::Map::Mappable->overlapping_groups(\@mappables);
+ Function: Look at all the positions of all the supplied mappables and group
+           them according to overlap.
+ Returns : array of array refs, each ref containing the Bio::Map::PositionI
+           objects that overlap with each other
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to  compare
+                    this one to, or an array ref of such objects (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+           -min_pos_num => int    : the minimum number of positions that must
+                                    be in a group before it will be returned
+                                    [default is 1]
+           -min_mappables_num => int        : the minimum number of different
+                                              mappables represented by the
+                                              positions in a group before it
+                                              will be returned [default is 1]
+           -min_mappables_percent => number : as above, but the minimum
+                                              percentage of input mappables
+                                              [default is 0]
+           -min_map_num => int              : the minimum number of different
+                                              maps represented by the positions
+                                              in a group before it will be
+                                              returned [default is 1]
+           -min_map_percent => number       : as above, but the minimum
+                                              percentage of maps known by the
+                                              input mappables [default is 0]
+           -require_self => 1|0             : require that at least one of the
+                                              calling object's positions be in
+                                              each group [default is 1, has no
+                                              effect when the second usage form
+                                              is used]
+           -required => \@mappables         : require that at least one position
+                                              for each mappable supplied in this
+                                              array ref be in each group
+
+=cut
+
+sub overlapping_groups {
+    my $self = shift;
+    return $self->_compare('overlapping_groups', @_);
+}
+
+=head2 disconnected_intersections
+
+ Title   : disconnected_intersections
+ Usage   : @positions = $mappable->disconnected_intersections($other_mappable);
+           @positions = Bio::Map::Mappable->disconnected_intersections(\@mappables);
+ Function: Make the positions that are at the intersection of each group of
+           overlapping positions, considering all the positions of the supplied
+           mappables.
+ Returns : new Bio::Map::Mappable who's positions on maps are the calculated
+           disconnected unions
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to  compare
+                    this one to, or an array ref of such objects (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+           -min_pos_num => int    : the minimum number of positions that must
+                                    be in a group before the intersection will
+                                    be calculated and returned [default is 1]
+           -min_mappables_num => int        : the minimum number of different
+                                              mappables represented by the
+                                              positions in a group before the
+                                              intersection will be calculated
+                                              and returned [default is 1]
+           -min_mappables_percent => number : as above, but the minimum
+                                              percentage of input mappables
+                                              [default is 0]
+           -min_map_num => int              : the minimum number of different
+                                              maps represented by the positions
+                                              in a group before the intersection
+                                              will be calculated and returned
+                                              [default is 1]
+           -min_map_percent => number       : as above, but the minimum
+                                              percentage of maps known by the
+                                              input mappables [default is 0]
+           -require_self => 1|0             : require that at least one of the
+                                              calling object's positions be in
+                                              each group [default is 1, has no
+                                              effect when the second usage form
+                                              is used]
+           -required => \@mappables         : require that at least one position
+                                              for each mappable supplied in this
+                                              array ref be in each group
+
+=cut
+
+sub disconnected_intersections {
+    my $self = shift;
+    return $self->_compare('intersection', @_);
+}
+
+=head2 disconnected_unions
+
+ Title   : disconnected_unions
+ Usage   : my @positions = $mappable->disconnected_unions($other_mappable);
+           my @positions = Bio::Map::Mappable->disconnected_unions(\@mappables);
+ Function: Make the positions that are the union of each group of overlapping
+           positions, considering all the positions of the supplied mappables.
+ Returns : new Bio::Map::Mappable who's positions on maps are the calculated
+           disconnected unions
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to  compare
+                    this one to, or an array ref of such objects (mandatory)
+           arg #2 = optionally, one or more of the key => value pairs below
+		   -map => MapI           : a Bio::Map::MapI to only consider positions
+		                            on the given map
+		   -relative => RelativeI : a Bio::Map::RelativeI to calculate in terms
+                                    of each Position's relative position to the
+                                    thing described by that Relative
+           -min_pos_num => int    : the minimum number of positions that must
+                                    be in a group before the union will be
+                                    calculated and returned [default is 1]
+           -min_mappables_num => int        : the minimum number of different
+                                              mappables represented by the
+                                              positions in a group before the
+                                              union will be calculated and
+                                              returned [default is 1]
+           -min_mappables_percent => number : as above, but the minimum
+                                              percentage of input mappables
+                                              [default is 0]
+           -min_map_num => int              : the minimum number of different
+                                              maps represented by the positions
+                                              in a group before the union will
+                                              be calculated and returned
+                                              [default is 1]
+           -min_map_percent => number       : as above, but the minimum
+                                              percentage of maps known by the
+                                              input mappables [default is 0]
+           -require_self => 1|0             : require that at least one of the
+                                              calling object's positions be in
+                                              each group [default is 1, has no
+                                              effect when the second usage form
+                                              is used]
+           -required => \@mappables         : require that at least one position
+                                              for each mappable supplied in this
+                                              array ref be in each group
+
+=cut
+
+sub disconnected_unions {
+    my $self = shift;
+    return $self->_compare('union', @_);
+}
+
+# do a RangeI-related comparison by calling the corresponding PositionI method
+# on all the requested Positions of our Mappables
+sub _compare {
+    my ($self, $method, $input, @extra_args) = @_;
+    $self->throw("Must supply an object or array ref of them") unless ref($input);
+    $self->throw("Wrong number of extra args (should be key => value pairs)") unless @extra_args % 2 == 0;
+    my @compares = ref($input) eq 'ARRAY' ? @{$input} : ($input);
+    
+    my %args = (-map => undef, -relative => undef, -min_pos_num => 1,
+                -min_mappables_num => 1, -min_mappables_percent => 0,
+                -min_map_num => 1, -min_map_percent => 0,
+                -require_self => 0, -required => undef, @extra_args);
+    my $map = $args{-map};
+    my $rel = $args{-relative};
+    my $min_pos_num = $args{-min_pos_num};
+    my $min_pables_num = $args{-min_mappables_num};
+    if ($args{-min_mappables_percent}) {
+        my $mn = (@compares + (ref($self) ? 1 : 0)) / 100 * $args{-min_mappables_percent};
+        if ($mn > $min_pables_num) {
+            $min_pables_num = $mn;
+        }
+    }
+    my $min_map_num = $args{-min_map_num};
+    if ($args{-min_map_percent}) {
+        my %known_maps;
+        foreach my $pable (@compares, ref($self) ? ($self) : ()) {
+            foreach my $known ($pable->known_maps) {
+                $known_maps{$known->unique_id} = 1;
+            }
+        }
+        my $mn = scalar(keys %known_maps) / 100 * $args{-min_map_percent};
+        if ($mn > $min_map_num) {
+            $min_map_num = $mn;
+        }
+    }
+    my %required = map { $_ => 1 } $args{-required} ? @{$args{-required}} : ();
+    my (@mine, @yours);
+    
+    if (ref($self)) {
+        @mine = $self->get_positions($map);
+        if ($args{-require_self}) {
+            @mine > 0 or return;
+            $required{$self} = 1;
+        }
+    }
+    my @required = keys %required;
+    
+    foreach my $compare (@compares) {
+        if ($compare->isa('Bio::Map::PositionI')) {
+            push(@yours, $compare);
+        }
+        elsif ($compare->isa('Bio::Map::MappableI')) {
+            push(@yours, $compare->get_positions($map));
+        }
+        else {
+            $self->throw("This is [$compare], not a Bio::Map::MappableI or Bio::Map::PositionI");
+        }
+    }
+    @yours > 0 or return;
+    
+    my @ok;
+    SWITCH: for ($method) {
+        /equals|overlaps|contains/ && do {
+            @mine > 0 or return;
+            foreach my $my_pos (@mine) {
+                foreach my $your_pos (@yours) {
+                    if ($my_pos->$method($your_pos, undef, $rel)) {
+                        push(@ok, $my_pos);
+                        last;
+                    }
+                }
+            }
+            last SWITCH;
+        };
+        /less_than|greater_than/ && do {
+            @mine > 0 or return;
+            if ($method eq 'greater_than') {
+                @mine =  map { $_->[1] }
+                         sort { $b->[0] <=> $a->[0] }
+                         map { [$_->end($_->absolute_relative), $_] }
+                         @mine;
+                @yours = map { $_->[1] }
+                         sort { $b->[0] <=> $a->[0] }
+                         map { [$_->end($_->absolute_relative), $_] }
+                         @yours;
+            }
+            my $test_pos = shift(@yours);
+            
+            foreach my $my_pos (@mine) {
+                if ($my_pos->$method($test_pos, $rel)) {
+                    push(@ok, $my_pos);
+                }
+                else {
+                    last;
+                }
+            }
+            
+            if ($method eq 'greater_than') {
+                @ok = map { $_->[1] }
+                      sort { $a->[0] <=> $b->[0] }
+                      map { [$_->sortable, $_] }
+                      @ok;
+            }
+            
+            last SWITCH;
+        };
+        /overlapping_groups|intersection|union/ && do {
+            my @positions = (@mine, @yours);
+            my $start_pos = shift(@positions);
+            
+            my $dr_able = $start_pos->disconnected_ranges(\@positions, $rel) || return;
+            my @disconnected_ranges = $dr_able->get_positions;
+            
+            my %all_groups;
+            my %done_ranges;
+            for my $i (0..$#disconnected_ranges) {
+                my $range = $disconnected_ranges[$i];
+                my $range_string = $range->toString;
+                next if $done_ranges{$range_string};
+                $done_ranges{$range_string} = 1;
+                
+                foreach my $pos ($start_pos, @positions) {
+                    if ($pos->overlaps($range, undef, $rel)) {
+                        $all_groups{$range_string}->{$pos} = $pos;
+                    }
+                }
+            }
+            
+            # purge the temporary working (not $dr_able->purge_positions since
+            # that removes the element from each position, but leaves it on
+            # the map. *** need complete purge that removes position from
+            # memory...
+            foreach my $pos (@disconnected_ranges) {
+                my $map = $pos->map || next;
+                $map->purge_positions($pos);
+            }
+            
+            my @groups;
+            GROUPS: foreach my $group (values %all_groups) {
+                my @group = values %{$group};
+                @group >= $min_pos_num or next;
+                @group >= $min_pables_num or next; # shortcut before having to work it out properly
+                @group >= $min_map_num or next; # shortcut before having to work it out properly
+                
+                my %mappables;
+                foreach my $pos (@group) {
+                    my $mappable = $pos->element || next;
+                    $mappables{$mappable} = 1;
+                }
+                keys %mappables >= $min_pables_num or next;
+                
+                my %maps;
+                foreach my $pos (@group) {
+                    my $map = $pos->map || next;
+                    $maps{$map->unique_id} = 1;
+                }
+                keys %maps >= $min_map_num or next;
+                
+                foreach my $required (@required) {
+                    exists $mappables{$required} or next GROUPS;
+                }
+                
+                my @sorted = map { $_->[1] }
+                             sort { $a->[0] <=> $b->[0] }
+                             map { [$_->sortable, $_] }
+                             @group;
+                
+                push(@groups, \@sorted);
+            }
+            
+            if ($method eq 'overlapping_groups') {
+                return @groups;
+            }
+            else {
+                foreach my $group (@groups) {
+                    my $start_pos = shift(@{$group});
+                    
+                    unless (@{$group}) {
+                        # we'll consider the 'intersection' or 'union' of just
+                        # one position as the position itself
+                        push(@ok, Bio::Map::Position->new(-map => $start_pos->map,
+                                                          -start => $start_pos->start,
+                                                          -end => $start_pos->end));
+                    }
+                    else {
+                        my @rel_arg = $method eq 'intersection' ? (undef, $rel) : ($rel);
+                        my $result = $start_pos->$method($group, @rel_arg) || next;
+                        push(@ok, $result->get_positions);
+                    }
+                }
+                
+                # assign all the positions to a result mappable
+                my $result = $self->new();
+                $result->add_position(@ok) if @ok; # add_position can actually take a list
+                
+                return $result;
+            }
+            
+            last SWITCH;
+        };
+        
+        $self->throw("Unknown method '$method'");
+    }
+    
+    return @ok;
+}
+
+=head2 tuple
+
+ Title   : tuple
+ Usage   : Do Not Use!
+ Function: tuple was supposed to be a private method; this method no longer
+           does anything
+ Returns : warning
+ Args    : none
+ Status  : deprecated, will be removed in next version
+
+=cut
+
+sub tuple {
+    my $self = shift;
+    $self->warn("The tuple method was supposed to be a private method, don't call it!");
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $mappable->annotation($an_col);
+           my $an_col = $mappable->annotation();
+ Function: Get the annotation collection (see Bio::AnnotationCollectionI)
+           for this annotatable object.
+ Returns : a Bio::AnnotationCollectionI implementing object, or undef
+ Args    : none to get, OR
+           a Bio::AnnotationCollectionI implementing object to set
+
+=cut
+
+sub annotation {
+    my $self = shift;
+    if (@_) { $self->{_annotation} = shift }
+    return $self->{_annotation} || return;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Mappable.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/MappableI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/MappableI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/MappableI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,393 @@
+# $Id: MappableI.pm,v 1.17.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::MappableI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::MappableI - An object that can be placed in a map
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::Map::Mappable for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+This object handles the generic notion of an element placed on a
+(linear) Map. A Mappable can have multiple positions in multiple maps, such as
+is the case of Restriction enzyme cut sites on sequence maps. For exact
+information about a mappable's position in a map one must query the associate
+PositionI objects which are accessible through the get_positions() method.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho heikki-at-bioperl-dot-org
+Lincoln Stein      lstein at cshl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::MappableI;
+use strict;
+use Bio::Map::PositionHandler;
+
+use base qw(Bio::Map::EntityI Bio::AnnotatableI);
+
+=head2 EntityI methods
+
+ These are fundamental to coordination of Mappables and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 get_position_handler
+
+ Title   : get_position_handler
+ Usage   : my $position_handler = $entity->get_position_handler();
+ Function: Gets a PositionHandlerI that $entity is registered with.
+ Returns : Bio::Map::PositionHandlerI object
+ Args    : none
+
+=cut
+
+sub get_position_handler {
+    my $self = shift;
+    unless (defined $self->{_eh}) {
+        my $ph = Bio::Map::PositionHandler->new(-self => $self);
+        $self->{_eh} = $ph;
+        $ph->register;
+    }
+    return $self->{_eh};
+}
+
+=head2 PositionHandlerI-related methods
+
+ These are fundamental to coordination of Mappables and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 add_position
+
+ Title   : add_position
+ Usage   : $mappable->add_position($position);
+ Function: Add a position to this mappable (defining where on which map it is).
+ Returns : n/a
+ Args    : L<Bio::Map::PositionI> object
+
+=cut
+
+sub add_position {
+    my $self = shift;
+	# actually, we allow multiple positions to be set at once
+    $self->get_position_handler->add_positions(@_);
+}
+
+=head2 get_positions
+
+ Title   : get_positions
+ Usage   : my @positions = $mappable->get_positions();
+ Function: Get all the Positions of this Mappable (sorted).
+ Returns : Array of L<Bio::Map::PositionI> objects
+ Args    : none for all, OR
+           L<Bio::Map::MapI> object for positions on the given map, AND/OR some
+           other true value to avoid sorting
+
+=cut
+
+sub get_positions {
+    my ($self, $thing, $no_sort) = @_;
+    my $map;
+    if (ref($thing) && $thing->isa('Bio::Map::MapI')) {
+        $map = $thing;
+    }
+    else {
+        $no_sort = $thing;
+    }
+    my @positions = $self->get_position_handler->get_positions($map);
+    return @positions if @positions == 1;
+    
+    unless ($no_sort) {
+        # don't do
+        # @positions = sort { $a->sortable <=> $b->sortable } @positions;
+        # directly since sortable() can result in the call of another sort
+        # routine and cause problems; pre-compute sortable values instead
+        # (which is also more efficient)
+        @positions = map { $_->[1] }
+                     sort { $a->[0] <=> $b->[0] }
+                     map  { [$_->sortable, $_] }
+                     @positions;
+    }
+    return @positions;
+}
+
+=head2 each_position
+
+ Title   : each_position
+ Function: Synonym of the get_positions() method.
+ Status  : deprecated, will be removed in next version
+
+=cut
+
+*each_position = \&get_positions;
+
+=head2 purge_positions
+
+ Title   : purge_positions
+ Usage   : $mappable->purge_positions();
+ Function: Remove positions from this mappable.
+ Returns : n/a
+ Args    : none to remove all positions, OR
+           L<Bio::Map::PositionI> object to remove just that Position, OR
+		   L<Bio::Map::MapI> object to remove only those positions on the given
+		   map
+
+=cut
+
+sub purge_positions {
+    my ($self, $thing) = @_;
+    $self->get_position_handler->purge_positions($thing);
+}
+
+=head2 known_maps
+
+ Title   : known_maps
+ Usage   : my @maps = $marker->known_maps()
+ Function: Returns the maps that this mappable is found on
+ Returns : Array of L<Bio::Map::MapI> objects
+ Args    : none
+
+=cut
+
+sub known_maps {
+	my $self = shift;
+	return $self->get_position_handler->get_other_entities;
+}
+
+=head2 MappableI-specific methods
+
+=cut
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $marker->name();
+           $marker->name($new_name);
+ Function: Get/Set the name for this Mappable.
+ Returns : A scalar representing the current name of this Mappable
+ Args    : none to get
+           string to set
+
+=cut
+
+sub name {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : my $id = $marker->id();
+           $marker->id($new_id);
+ Function: Get/Set the id for this Mappable.
+ Returns : A scalar representing the current id of this Mappable
+ Args    : none to get
+           string to set
+
+=cut
+
+sub id {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 in_map
+
+ Title   : in_map
+ Usage   : if ($marker->in_map($map)) {...}
+ Function: Tests if this mappable is found on a specific map
+ Returns : boolean
+ Args    : L<Bio::Map::MapI>
+
+=cut
+
+sub in_map {
+	my $self = shift;
+	$self->throw_not_implemented();
+}
+
+=head1 RangeI-related Methods
+
+They throw an error if start and end are not defined in the Positions of the
+Mappables supplied.
+
+=cut
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if ($mappable->equals($other_mappable)) {...}
+           my @equal_positions = $mappable->equals($other_mappable);
+ Function: Finds the positions in this mappable that are equal to any
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, the key => value pairs below
+		   -map => Bio::Map::MapI           : optionally a Map to only consider
+		                                      positions on the given map
+		   -relative => Bio::Map::RelativeI : optionally a Relative to ask if
+											  the Positions equal in terms of
+											  their relative position to the
+											  thing described by that Relative
+
+=cut
+
+sub equals {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 overlaps
+
+ Title   : overlaps
+ Usage   : if ($mappable->overlaps($other_mappable)) {...}
+           my @overlapping_positions = $mappable->overlaps($other_mappable);
+ Function: Finds the positions in this mappable that overlap with any
+           comparison positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, the key => value pairs below
+		   -map => Bio::Map::MapI           : optionally a Map to only consider
+		                                      positions on the given map
+		   -relative => Bio::Map::RelativeI : optionally a Relative to ask if
+                                              the Positions overlap in terms of
+                                              their relative position to the
+                                              thing described by that Relative
+
+=cut
+
+sub overlaps {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 contains
+
+ Title   : contains
+ Usage   : if ($mappable->contains($other_mappable)) {...}
+           my @container_positions = $mappable->contains($other_mappable);
+ Function: Finds the positions in this mappable that contain any comparison
+           positions.
+ Returns : array of L<Bio::Map::PositionI> objects
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to compare
+                    this one to (mandatory)
+           arg #2 = optionally, the key => value pairs below
+		   -map => Bio::Map::MapI           : optionally a Map to only consider
+		                                      positions on the given map
+		   -relative => Bio::Map::RelativeI : optionally a Relative to ask if
+                                              the Positions contains in terms of
+                                              their relative position to the
+                                              thing described by that Relative
+
+=cut
+
+sub contains {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 intersection
+
+ Title   : intersection
+ Usage   : my $position = $mappable->intersection($other_mappable);
+           my $position = Bio::Map::Mappable->intersection(\@mappables);
+ Function: Make the position that is at the intersection of all positions of all
+           supplied mappables.
+ Returns : L<Bio::Map::PositionI> object or undef (if not all positions overlap)
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to  compare
+                    this one to, or an array ref of such objects (mandatory)
+           arg #2 = optionally, the key => value pairs below
+		   -map => Bio::Map::MapI           : optionally a Map to only consider
+		                                      positions on the given map
+		   -relative => Bio::Map::RelativeI : optionally a Relative to to ask
+											  how the Positions intersect in
+											  terms of their relative position
+											  to the thing described by that
+											  Relative
+
+=cut
+
+sub intersection {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 union
+
+ Title   : union
+ Usage   : my $position = $mappable->union($other_mappable);
+           my $position = Bio::Map::Mappable->union(@mappables);
+ Function: Make the minimal position that contains all of the positions of all
+           supplied mappables.
+ Returns : L<Bio::Map::PositionI> object or undef (if not all positions overlap)
+ Args    : arg #1 = L<Bio::Map::MappableI> OR L<Bio::Map::PositionI> to  compare
+                    this one to, or an array ref of such objects (mandatory)
+           arg #2 = optionally, the key => value pairs below
+		   -map => Bio::Map::MapI           : optionally a Map to only consider
+		                                      positions on the given map
+		   -relative => Bio::Map::RelativeI : optionally a Relative to to ask
+											  if the union of the Positions in
+											  terms of their relative position
+											  to the thing described by that
+											  Relative
+
+=cut
+
+sub union {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Marker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Marker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Marker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,362 @@
+# $Id: Marker.pm,v 1.22.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::Marker
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Chad Matsalla
+# 
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Marker - An central map object representing a generic marker
+that can have multiple location in several maps.
+
+=head1 SYNOPSIS
+
+  # get map objects somehow
+
+  # a marker with complex localisation
+  $o_usat = new Bio::Map::Marker (-name=>'Chad Super Marker 2',
+				  -positions => [ [$map1, $position1], 
+                                                  [$map1, $position2] 
+						] );
+
+  # The markers deal with Bio::Map::Position objects which can also
+  # be explicitely created and passed on to markers as an array ref:
+  $o_usat2 = new Bio::Map::Marker (-name=>'Chad Super Marker 3',
+				  -positions => [ $pos1, 
+                                                  $pos2 
+						] );
+
+  # a marker with unique position in a map
+  $marker1 = new Bio::Map::Marker (-name=>'hypervariable1',
+				   -map => $map1,
+				   -position => 100
+				   );
+
+  # an other way of creating a marker with unique position in a map:
+  $marker2 = new Bio::Map::Marker (-name=>'hypervariable2');
+  $map1->add_element($marker2);
+  $marker2->position(100);
+
+  # position method is a short cut for get/setting unique positions
+  # which overwrites previous values
+  # to place a marker to other maps or to have multiple positions 
+  # for a map within the same map use add_position()
+
+  $marker2->add_position(200);	# new position in the same map
+  $marker2->add_position($map2,200); # new map
+
+  # setting a map() in a marker or adding a marker into a map are
+  # identical mathods. Both set the bidirectional connection which is
+  # used by the marker to remember its latest, default map.
+
+  # Regardes of how marker positions are created, they are stored and
+  # returned as Bio::Map::PositionI objects:
+
+  # unique position
+  print $marker1->position->value, "\n";
+  # several positions
+  foreach $pos ($marker2->each_position($map1)) {
+     print $pos->value, "\n";
+  }
+
+See L<Bio::Map::Position> and L<Bio::Map::PositionI> for more information.
+
+=head1 DESCRIPTION
+
+A Marker is a Bio::Map::Mappable with some properties particular to markers.
+It also offers a number of convienience methods to make dealing with map
+elements easier.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho heikki-at-bioperl-dot-org
+Lincoln Stein      lstein at cshl.org
+Jason Stajich      jason at bioperl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Marker;
+use strict;
+use Bio::Map::Position;
+
+use base qw(Bio::Map::Mappable Bio::Map::MarkerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $marker = new Bio::Map::Marker( -name => 'Whizzy marker',
+	                                          -position => $position);
+ Function: Builds a new Bio::Map::Marker object
+ Returns : Bio::Map::Marker
+ Args    :
+           -name    => name of this microsatellite 
+                       [optional], string,default 'Unknown'
+           -default_map => the default map for this marker, a Bio::Map::MapI
+           -position => map position for this marker, a Bio::Map::PositionI
+           -positions => array ref of Bio::Map::PositionI objects
+
+           position and positions can also take as values anything the
+           corresponding methods can take
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	bless($self, ref $class || $class);
+	
+    my ($name, $default_map, $map, $position, $positions) = 
+	$self->_rearrange([qw(NAME
+				  DEFAULT_MAP
+			      MAP
+			      POSITION
+			      POSITIONS
+			      )], @args);
+    
+    if ($name) { $self->name($name); } 
+    else {$self->name('Unnamed marker'); }
+    
+    $map         && $self->default_map($map);
+	$default_map && $self->default_map($default_map);
+    $position    && $self->position($position); 
+    $positions   && $self->positions($positions);
+ 
+    return $self;
+}
+
+=head2 default_map
+
+ Title   : default_map
+ Usage   : my $map = $marker->default_map();
+ Function: Get/Set the default map for the marker.
+ Returns : L<Bio::Map::MapI>
+ Args    : [optional] new L<Bio::Map::MapI>
+
+=cut
+
+sub default_map {
+	my ($self, $map) = @_;
+	if (defined $map) {
+		$self->thow("This is [$map], not Bio::Map::MapI object") unless $map->isa('Bio::Map::MapI');
+		$self->{'_default_map'} = $map;
+	}
+	return $self->{'_default_map'} || return;
+}
+
+=head2 map
+
+ Title   : map
+ Function: This is a synonym of the default_map() method
+
+		   *** does not actually add this marker to the map! ***
+
+ Status  : deprecated, will be removed in next version
+
+=cut
+
+*map = \&default_map;
+
+=head2 get_position_object
+
+ Title   : get_position_class
+ Usage   : my $position = $marker->get_position_object();
+ Function: To get an object of the default Position class
+           for this Marker. Subclasses should redefine this method.
+           The Position returned needs to be a L<Bio::Map::PositionI> with
+		   -element set to self.
+ Returns : L<Bio::Map::PositionI>
+ Args    : none for an 'empty' PositionI object, optionally
+           Bio::Map::MapI and value string to set the Position's -map and -value
+           attributes.
+
+=cut
+
+sub get_position_object {
+	my ($self, $map, $value) = @_;
+	$map ||= $self->default_map;
+	if ($value) {
+		$self->throw("Value better be scalar, not [$value]") unless ref($value) eq '';
+	}
+	
+	my $pos = new Bio::Map::Position();
+	$pos->map($map) if $map;
+    $pos->value($value) if $value;
+    $pos->element($self);
+	return $pos;
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : my $position = $mappable->position();
+		   $mappable->position($position);
+ Function: Get/Set the Position of this Marker (where it is on which map),
+           purging all other positions before setting.
+ Returns : L<Bio::Map::PositionI>
+ Args    : Bio::Map::PositionI
+            OR
+           Bio::Map::MapI AND
+           scalar
+            OR
+           scalar, but only if the marker has a default map
+
+=cut
+
+sub position {
+    my ($self, $pos, $pos_actual) = @_;
+    
+	if ($pos) {
+		$self->purge_positions;
+		$self->add_position($pos, $pos_actual);
+	}
+    
+    my @positions = $self->each_position;
+    $self->warn('This marker has more than one Position, returning the most recently added') if scalar @positions > 1;
+    return pop(@positions);
+}
+
+=head2 add_position
+
+ Title   : add_position
+ Usage   : $marker->add_position($position);
+ Function: Add a Position to this marker
+ Returns : n/a
+ Args    : Bio::Map::PositionI
+            OR
+           Bio::Map::MapI AND
+           scalar
+            OR
+           scalar, but only if the marker has a default map
+
+=cut
+
+sub add_position  {
+    my ($self, $pos, $pos_actual) = @_;
+    $self->throw("Must give a Position") unless defined $pos;
+    
+    my $map = $self->default_map;
+	my $pos_map;
+	if (ref($pos)) {
+		if (ref($pos) eq 'ARRAY') {
+			($pos, $pos_actual) = @{$pos};
+			unless ($pos && $pos_actual && ref($pos)) {
+				$self->throw("Supplied an array ref but did not contain two values, the first an object");
+			}
+		}
+		
+		if ($pos->isa('Bio::Map::PositionI')) {
+			$pos_map = $pos->map;
+			$self->default_map($pos_map) unless $map;
+			$map = $pos_map if $pos_map;
+		}
+		elsif ($pos->isa('Bio::Map::MapI')) {
+			$self->default_map($pos) unless $map;
+			$map = $pos;
+			$pos = $pos_actual;
+		}
+		else {
+			$self->throw("This is [$pos], not a Bio::Map::PositionI or Bio::Map::MapI object");
+		}
+	}
+	
+	$self->throw("You need to give a marker a default map before you can set positions without explicit map!" ) unless $map;
+	
+	if (ref($pos) && $pos->isa('Bio::Map::PositionI')) {
+		$pos->map($map) unless $pos_map;
+		$self->SUPER::add_position($pos);
+	}
+	else {
+		$self->get_position_object($map, $pos); # adds position to us
+	}
+}
+
+=head2 positions
+
+ Title   : positions
+ Usage   : $marker->positions([$pos1, $pos2, $pos3]);
+ Function: Add multiple Bio::Map::PositionI to this marker
+ Returns : n/a
+ Args    : array ref of $map/value tuples or array ref of Bio::Map::PositionI
+
+=cut
+
+sub positions {
+    my ($self, $args_ref) = @_;
+    
+    foreach my $arg (@{$args_ref}) {
+        if (ref($arg) eq 'ARRAY') {
+            $self->add_position(@{$arg});
+        }
+        else {
+            $self->add_position($arg);
+        }
+    }
+}
+
+=head2 in_map
+
+ Title   : in_map
+ Usage   : if ( $marker->in_map($map) ) {}
+ Function: Tests if this marker is found on a specific map
+ Returns : boolean
+ Args    : a map unique id OR Bio::Map::MapI
+
+=cut
+
+sub in_map {
+	my ($self, $query) = @_;
+	$self->throw("Must supply an argument") unless defined($query);
+    
+	if (ref($query) eq '') {
+		foreach my $map ($self->known_maps) {
+			my $uid = $map->unique_id;
+			if ($uid) {
+				($uid eq $query) && return 1;
+			}
+		}
+	}
+    else {
+		return $self->SUPER::in_map($query);
+	}
+    
+    return 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/MarkerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/MarkerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/MarkerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,154 @@
+# $Id: MarkerI.pm,v 1.16.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::MarkerI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::MarkerI - Interface for basic marker functionality
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::Map::Marker for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+A Marker is a Bio::Map::Mappable with some properties particular to markers.
+It also offers a number of convienience methods to make dealing with map
+elements easier.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho heikki-at-bioperl-dot-org
+Lincoln Stein      lstein at cshl.org
+Jason Stajich      jason at bioperl.org
+Chad Matsalla      bioinformatics1 at dieselwurks.com
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Map::MarkerI;
+use strict;
+
+use base qw(Bio::Map::MappableI);
+
+=head2 get_position_object
+
+ Title   : get_position_class
+ Usage   : my $position = $marker->get_position_object();
+ Function: To get an object of the default Position class
+           for this Marker. Subclasses should redefine this method.
+           The Position returned needs to be a L<Bio::Map::PositionI> with
+		   -element set to self.
+ Returns : L<Bio::Map::PositionI>
+ Args    : none for an 'empty' PositionI object, optionally
+           Bio::Map::MapI and value string to set the Position's -map and -value
+           attributes.
+
+=cut
+
+sub get_position_object {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : my $position = $mappable->position();
+		   $mappable->position($position);
+ Function: Get/Set the Position of this Marker (where it is on which map),
+           purging all other positions before setting.
+ Returns : L<Bio::Map::PositionI>
+ Args    : Bio::Map::PositionI
+            OR
+           Bio::Map::MapI AND
+           scalar
+            OR
+           scalar, but only if the marker has a default map
+
+=cut
+
+sub position {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 positions
+
+ Title   : positions
+ Usage   : $marker->positions([$pos1, $pos2, $pos3]);
+ Function: Add multiple Bio::Map::PositionI to this marker
+ Returns : n/a
+ Args    : array ref of $map/value tuples or array ref of Bio::Map::PositionI
+
+=cut
+
+sub positions {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 default_map
+
+ Title   : default_map
+ Usage   : my $map = $marker->default_map();
+ Function: Get/Set the default map for the marker.
+ Returns : L<Bio::Map::MapI>
+ Args    : [optional] new L<Bio::Map::MapI>
+
+=cut
+
+sub default_map {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 in_map
+
+ Title   : in_map
+ Usage   : if ( $marker->in_map($map) ) {}
+ Function: Tests if this marker is found on a specific map
+ Returns : boolean
+ Args    : a map unique id OR Bio::Map::MapI
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Microsatellite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Microsatellite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Microsatellite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,337 @@
+# BioPerl module for Bio::Map::Microsatellite
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Microsatellite - An object representing a Microsatellite marker.
+
+=head1 SYNOPSIS
+
+  $o_usat = new Bio::Map::Microsatellite
+      (-name=>'Chad Super Marker 2',
+       -sequence => 'gctgactgatcatatatatatatatatatatatatatatatcgcgatcgtga',
+       -motif => 'at',
+       -repeats => 15,
+       -repeat_start_position => 11
+       );
+
+  $sequence_before_usat = $o_usat->get_leading_flank();
+  $sequence_after_usat = $o_usat->get_trailing_flank();
+
+
+=head1 DESCRIPTION
+
+This object handles the notion of an Microsatellite. This microsatellite can
+be placed on a (linear) Map or used on its own.  If this Microsatellites
+will be used in a mapping context (it doesn't have to, you know) it can have
+multiple positions in a map. For information about a Microsatellite's position
+in a map one must query the associate PositionI object which is accessible
+through the position() method.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho heikki-at-bioperl-dot-org
+Lincoln Stein      lstein at cshl.org
+Jason Stajich      jason at bioperl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Microsatellite;
+use strict;
+
+use base qw(Bio::Map::Marker);
+
+=head2 new
+
+ Title   : new
+ Usage   : $o_usat = 
+ Function: Builds a new Bio::Map::Microsatellite object
+ Returns : Bio::Map::Microsatellite
+ Args    :
+	-name    => name of this microsatellite (optional, string,
+		default 'Unknown microsatellite')
+        -positions => position(s) for this marker in maps[optional],
+                An array reference of tuples (array refs themselves)
+                Each tuple conatins a Bio::Map::MapI-inherited object and a 
+		Bio::Map::PositionI-inherited obj, no default)
+	-sequence => the sequence of this microsatellite (optional,
+		 scalar, no default)
+	-motif => the repeat motif of this microsatellite (optional,
+		 scalar, no default)
+	-repeats => the number of motif repeats for this microsatellite
+		(optional, scalar, no default)
+	-repeat_start_position => the starting position of the
+		microsatellite in this sequence. The first base of the
+		sequence is position "1". (optional, scalar, no default)
+
+ Note    : Creating a Bio::Map::Microsatellite object with no position
+	might be useful for microsatellite people wanting to embrace
+	and extend this module. <raising hand> Me! Me! Me!
+	- using repeat_start_position will trigger a mechinism to
+	calculate a value for repeat_end_position. 
+
+
+=cut
+
+sub new {
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($map, $position, $sequence, $motif, $repeats, $start) = 
+	$self->_rearrange([qw(MAP
+			      POSITION
+			      SEQUENCE 
+			      MOTIF 
+			      REPEATS 
+			      REPEAT_START_POSITION
+			      )], @args);
+    if( ! $self->name ) { 
+	$self->name('Unnamed microsatellite');
+    }
+    $map && $self->map($map);
+    $position && $self->position($position);
+    $sequence && $self->sequence($sequence);
+    $self->motif(defined $motif ? $motif : 'Unknown motif'); 
+    $repeats && $self->repeats($repeats);
+    $start && $self->repeat_start_position($start);
+    return $self;
+}
+
+=head2 motif
+
+ Title   : motif
+ Usage   : $o_usat->motif($new_motif);
+	       my $motif = $o_usat->motif();
+ Function: Get/Set the repeat motif for this Microsatellite.
+ Returns : A scalar representing the current repeat motif of this
+	       Microsatellite.
+ Args    : none to get, OR string to set
+
+=cut
+
+sub motif {
+	my ($self,$motif) = @_;
+	if ($motif) {
+		$self->{'_motif'} = $motif;
+	}
+	return $self->{'_motif'};	
+}
+
+=head2 sequence
+
+ Title   : sequence
+ Usage   : $o_usat->sequence($new_sequence);
+	       my $sequence = $o_usat->sequence();
+ Function: Get/Set the sequence for this Microsatellite.
+ Returns : A scalar representing the current sequence of this
+	       Microsatellite.
+ Args    : none to get, OR string to set
+
+=cut
+
+sub sequence {
+	my ($self,$sequence) = @_;
+	if ($sequence) {
+		$self->{'_sequence'} = $sequence;
+	}
+	return $self->{'_sequence'};	
+}
+
+=head2 repeats
+
+ Title   : repeats
+ Usage   : $o_usat->repeats($new_repeats);
+	       my $repeats = $o_usat->repeats()
+ Function: Get/Set the repeat repeats for this Microsatellite.
+ Returns : A scalar representing the current number of repeats of this
+	       Microsatellite.
+ Args    : none to get, OR int to set
+
+=cut
+
+sub repeats {
+	my ($self,$repeats) = @_;
+	if ($repeats) {
+		$self->{'_repeats'} = $repeats;
+	}
+	return $self->{'_repeats'};	
+}
+
+=head2 repeat_start_position
+
+ Title   : repeat_start_position
+ Usage   : $o_usat->repeat_start_position($new_repeat_start_position);
+	       my $repeat_start_position = $o_usat->repeat_start_position();
+ Function: Get/Set the repeat repeat_start_position for this
+	       Microsatellite
+ Returns : A scalar representing the repeat start position for this 
+	       Microsatellite.
+ Args    : none to get, OR string to set
+	       This method will also try to set the repeat end position. This
+	       depends on having information for the motif and the number of
+	       repeats. If you want to use methods like get_trailing_flank or
+	       get_leading flank, be careful to include the right information.
+
+=cut
+
+sub repeat_start_position {
+	my ($self,$repeat_start_position) = @_;
+	if ($repeat_start_position) {
+		$self->{'_repeat_start_position'} = $repeat_start_position;
+		$self->repeat_end_position("set");
+	}
+	return $self->{'_repeat_start_position'};	
+}
+
+=head2 repeat_end_position
+
+ Title   : repeat_end_position
+ Usage   : $o_usat->repeat_end_position("set");
+	       $o_usat->repeat_end_position($value);
+	       $current_repeat_end_position = $o_usat->repeat_end_position();
+ Function: Get/set the end position of the repeat in this sequence.
+ Returns : A scalar representing the base index of the end of the
+	       repeat in this Microsatellite. The first base in the sequence
+	       is base 1.
+ Args    : A scalar representing a value, the string "set", or no
+	       argument (see Notes).
+ Notes   : If you do not provide an argument to this method, the current
+           end position of the repeat in this Microsatellite will be
+           returned (a scalar).
+           If you provide the string "set" to this method it will set the
+           end position based on the start position, the length of the
+           motif, and the number of repeats.
+           If you specify a value the current end position of the repeat
+           will be set to that value. This is a really bad idea. Don't do
+           it.
+
+=cut
+
+sub repeat_end_position {
+    my ($self,$caller) = @_;
+    if( defined $caller ) { 
+	if ($caller eq "set") {
+	    $self->{'_repeat_end_position'} = 
+		$self->{'_repeat_start_position'} + 
+		    (length($self->motif()) * $self->repeats());
+	}
+	elsif ($caller) {
+	    $self->{'_repeat_end_position'} = $caller;
+	}
+    }
+    return $self->{'_repeat_end_position'};
+}
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if ($mappable->equals($mapable2)) {...}
+ Function: Test if a position is equal to another position
+ Returns : boolean
+ Args    : Bio::Map::MappableI
+
+=cut
+
+sub equals {
+	my ($self, at args) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 less_than
+
+ Title   : less_than
+ Usage   : if ($mappable->less_than($m2)) {...}
+ Function: Tests if a position is less than another position
+ Returns : boolean
+ Args    : Bio::Map::MappableI
+
+=cut
+
+sub less_than {
+	my ($self, at args) = @_;
+        $self->throw_not_implemented();
+}
+
+=head2 greater_than
+
+ Title   : greater_than
+ Usage   : if ($mappable->greater_than($m2)) {...}
+ Function: Tests if position is greater than another position
+ Returns : boolean
+ Args    : Bio::Map::MappableI
+
+=cut
+
+sub greater_than {
+	my ($self, at args) = @_;
+	$self->throw_not_implemented();
+}
+
+=head2 get_leading_flank
+
+ Title   : get_leading_flank
+ Usage   : $leading_sequence = $o_usat->get_leading_flank();
+ Returns : A scalar representing the sequence before the repeats in this
+	       Microsatellite.
+ Args    : none
+
+=cut
+
+sub get_leading_flank {
+	my $self = shift;
+	return substr $self->sequence(),0,$self->repeat_start_position-1;
+}
+
+=head2 get_trailing_flank
+
+ Title   : get_trailing_flank
+ Usage   : $trailing_flank = $o_usat->get_trailing_flank();
+ Returns : A scalar representing the sequence after the repeats in this
+	       Microsatellite.
+ Args    : none
+
+=cut
+
+sub get_trailing_flank {
+	my $self = shift;
+	return substr $self->sequence(),$self->repeat_end_position()-1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPosition.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPosition.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPosition.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,203 @@
+# BioPerl module for Bio::Map::OrderedPosition
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::OrderedPosition - Abstracts the notion of a member
+	of an ordered list of markers. Each marker is a certain distance
+	from the one in the ordered list before it.
+
+=head1 SYNOPSIS
+
+    use Bio::Map::OrderedPosition;
+	# the first marker in the sequence
+    my $position = new Bio::Map::OrderedPosition(-order => 1,
+			-positions => [ [ $map, 22.3] ] );
+	# the second marker in the sequence, 15.6 units from the fist one
+    my $position2 = new Bio::Map::OrderedPosition(-order => 2,
+			-positions => [ [ $map, 37.9] ] );
+	# the third marker in the sequence, coincidental with the second
+	# marker
+    my $position3 = new Bio::Map::OrderedPosition(-order => 3,
+                        -posititions => [ [ $map, 37.9]] );
+
+=head1 DESCRIPTION
+
+This object is an implementation of the PositionI interface and the
+Position object handles the specific values of a position.
+OrderedPosition is intended to be slightly more specific then Position
+but only specific enough for a parser from the MarkerIO subsystem to
+create and then pass to a client application to bless into the proper
+type. For an example of how this is intended to work, see the
+Mapmaker.pm.
+
+No units are assumed here - units are handled by context of which Map
+a position is placed in.
+
+Se Bio::Map::Position for additional information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Jason Stajich, jason at bioperl.org
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::OrderedPosition;
+use strict;
+
+
+use base qw(Bio::Map::Position);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::OrderedPosition();
+ Function: Builds a new Bio::Map::OrderedPosition object 
+ Returns : Bio::Map::OrderedPosition
+ Args    : -order : The order of this position
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($order) = $self->_rearrange([qw(ORDER)], @args);
+    $order && $self->order($order);
+    
+    return $self;
+}
+
+=head2 order
+
+ Title   : order
+ Usage   : $o_position->order($new_order);
+           my $order = $o_position->order();
+ Function: Get/set the order position of this position in a map.
+ Returns : int, the order of this position
+ Args    : none to get, OR int to set
+
+=cut
+
+sub order {
+    my ($self, $order) = @_;
+    if ($order) {
+        $self->{'_order'} = $order;
+    }
+    return $self->{'_order'} || return;
+}
+
+=head2 sortable
+
+ Title   : sortable
+ Usage   : my $num = $position->sortable();
+ Function: Read-only method that is guaranteed to return a value suitable
+           for correctly sorting this kind of position amongst other positions
+           of the same kind on the same map. Note that sorting different kinds
+           of position together is unlikely to give sane results.
+ Returns : numeric
+ Args    : none
+
+=cut
+
+sub sortable {
+    my $self = shift;
+    return $self->order;
+}
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if ($mappable->equals($mapable2)) {...}
+ Function: Test if a position is equal to another position.
+ Returns : boolean
+ Args    : Bio::Map::PositionI
+
+=cut
+
+sub equals {
+   my ($self,$compare) = @_;
+   return 0 if (! defined $compare || ! $compare->isa('Bio::Map::OrderedPosition'));
+   return ($compare->order == $self->order);
+}
+
+# admittedly these aren't really the best comparisons in the world
+# but it is a first pass we'll need to refine the algorithm or not 
+# provide general comparisions and require these to be implemented
+# by objects closer to the specific type of data
+
+=head2 less_than
+
+ Title   : less_than
+ Usage   : if ($mappable->less_than($m2)) {...}
+ Function: Tests if a position is less than another position
+           It is assumed that 2 positions are in the same map.
+ Returns : boolean
+ Args    : Bio::Map::PositionI
+
+=cut
+
+sub less_than {
+   my ($self,$compare) = @_;
+   return 0 if (! defined $compare || ! $compare->isa('Bio::Map::OrderedPosition'));
+   return ($compare->order < $self->order);
+}
+
+=head2 greater_than
+
+ Title   : greater_than
+ Usage   : if ($mappable->greater_than($m2)) {...}
+ Function: Tests if position is greater than another position.
+           It is assumed that 2 positions are in the same map.
+ Returns : boolean
+ Args    : Bio::Map::PositionI
+
+=cut
+
+sub greater_than {
+   my ($self,$compare) = @_;
+   return 0 if (! defined $compare || ! $compare->isa('Bio::Map::OrderedPosition'));
+   return ($compare->order > $self->order);
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPositionWithDistance.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPositionWithDistance.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/OrderedPositionWithDistance.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+### TO BE DELETED ###
+
+# BioPerl module for Bio::Map::OrderedPositionWithDistance
+#
+# Cared for by Chad Matsalla <bioinformatics1 at dieselwurks.com>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::OrderedPositionWithDistance - Abstracts the notion of a member
+	of an ordered list of markers. Each marker is a certain distance
+	from the one in the ordered list before it.
+
+=head1 SYNOPSIS
+
+    use Bio::Map::OrderedPositionWithDistance;
+	# the first marker in the sequence
+    my $position = new Bio::Map::OrderedPositionWithDistance(-positions => 1,
+			-distance => 22.3 );
+	# the second marker in the sequence, 15.6 units from the fist one
+    my $position2 = new Bio::Map::OrderedPositionWithDistance(-positions => 2,
+			-distance => 15.6 );
+	# the third marker in the sequence, coincidental with the second
+	# marker
+    my $position3 = new Bio::Map::OrderedPositionWithDistance(-positions => 3,
+			-distance => 0 );
+
+
+=head1 DESCRIPTION
+
+This object is an implementation of the PositionI interface and the
+Position object handles the specific values of a position.
+OrderedPositionWithDistance is intended to be slightly more specific
+then Position but only specific enough for a parser from the MarkerIO
+subsystem to create and then pass to a client application to bless into
+the proper type. For an example of how this is intended to work, see the
+Mapmaker.pm.
+
+No units are assumed here - units are handled by context of which Map
+a position is placed in.
+
+Se Bio::Map::Position for additional information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Map::OrderedPositionWithDistance;
+use strict;
+
+
+use base qw(Bio::Map::Position);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::OrderedPositionWithDistance();
+ Function: Builds a new Bio::Map::OrderedPositionWithDistance object 
+ Returns : Bio::Map::OrderedPositionWithDistance
+ Args    : -positions  - Should be a single value representing the order
+	of this marker within the list of markers
+	-distance - The distance this marker is from the marker before it.
+		0 reflects coincidentality.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->{'_positions'} = [];
+  my ($positions,$distance) = $self->_rearrange([qw(POSITIONS DISTANCE)], @args);
+  if( ref($positions) =~ /array/i ) { 
+      foreach my $p ( @$positions ) {
+	  $self->add_position($p);
+      }
+  } else { 
+      $self->add_position($positions);
+  }
+	$distance && $self->distance($distance);
+	
+  return $self;
+
+}
+
+
+=head2 distance($new_distance)
+
+ Title   : distance($new_distance)
+ Usage   : $position->distance(new_distance) _or_
+        $position->distance()
+ Function: get/set the distance of this position from the previous marker
+ Returns : A scalar representing the current distance for this position.
+ Args    : If $new_distance is provided the distance of this Position will
+        be set to $new_distance
+
+=cut
+
+sub distance {
+        my ($self,$distance) = @_;
+        if ($distance) {
+           $self->{'_distance'} = $distance;
+        }
+        return $self->{'_distance'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Physical.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Physical.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Physical.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1314 @@
+# $Id: Physical.pm,v 1.8.4.3 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::Physical
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright AGCoL
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Physical - A class for handling a Physical Map (such as FPC)
+
+=head1 SYNOPSIS
+
+    use Bio::MapIO;
+
+    # accquire a Bio::Map::Physical using Bio::MapIO::fpc
+    my $mapio = new Bio::MapIO(-format => "fpc",-file => "rice.fpc",
+                               -readcor => 0);
+
+    my $physical = $mapio->next_map();
+
+    # get all the markers ids
+    foreach my $marker ( $physical->each_markerid() ) {
+      print "Marker $marker\n";
+
+      # acquire the marker object using Bio::Map::FPCMarker
+      my $markerobj = $physical->get_markerobj($marker);
+
+      # get all the clones hit by this marker
+      foreach my $clone ($markerobj->each_cloneid() ) {
+          print " +++$clone\n";
+      }
+  }
+
+=head1 DESCRIPTION
+
+This class is basically a continer class for a collection of Contig maps and
+other physical map information.
+
+Bio::Map::Physical has been tailored to work for FPC physical maps, but
+could probably be used for others as well (with the appropriate MapIO
+module).
+
+This class also has some methods with specific functionalities:
+
+  print_gffstyle()     : Generates GFF; either Contigwise[Default] or
+                         Groupwise
+
+  print_contiglist()   : Prints the list of Contigs, markers that hit the
+                         contig, the global position and whether the marker
+                         is a placement (<P>) or a Framework (<F>) marker.
+
+  print_markerlist()   : Prints the markers list; contig and corresponding
+                         number of clones.
+
+  matching_bands()     : Given two clones [and tolerence], this method
+                         calculates how many matching bands do they have.
+
+  coincidence_score()  : Given two clones [,tolerence and gellen], this
+                         method calculates the Sulston Coincidence score.
+
+For faster access and better optimization, the data is stored internally in
+hashes. The corresponding objects are created on request.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gaurav Gupta
+
+Email gaurav at genome.arizona.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala  bix at sendu.me.uk
+
+=head1 PROJECT LEADERS
+
+Jamie Hatfield      jamie at genome.arizona.edu
+Dr. Cari Soderlund  cari at genome.arizona.edu
+
+=head1 PROJECT DESCRIPTION
+
+The project was done in Arizona Genomics Computational Laboratory (AGCoL)
+at University of Arizona.
+
+This work was funded by USDA-IFAFS grant #11180 titled "Web Resources for 
+the Computation and Display of Physical Mapping Data".
+
+For more information on this project, please refer: 
+  http://www.genome.arizona.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Physical;
+use vars qw($MAPCOUNT);
+use strict;
+use POSIX;
+
+use Bio::Map::Clone;
+use Bio::Map::Contig;
+use Bio::Map::FPCMarker;
+
+use base qw(Bio::Map::SimpleMap);
+BEGIN { $MAPCOUNT = 1; }
+
+=head1 Access Methods
+
+These methods let you get and set the member variables
+
+=head2 version
+
+ Title   : version
+ Usage   : my $version = $map->version();
+ Function: Get/set the version of the program used to
+           generate this map
+ Returns : scalar representing the version
+ Args    : none to get, OR string to set
+
+=cut
+
+sub version {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	$self->{'_version'} = $value;
+    }
+    return $self->{'_version'};
+}
+
+=head2 modification_user
+
+ Title   : modification_user
+ Usage   : my $modification_user = $map->modification_user();
+ Function: Get/set the name of the user who last modified this map
+ Returns : scalar representing the username
+ Args    : none to get, OR string to set
+
+=cut
+
+sub modification_user {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	$self->{'_modification_user'} = $value;
+    }
+    return $self->{'_modification_user'};
+}
+
+=head2 group_type
+
+ Title   : group_type
+ Usage   : $map->group_type($grptype);
+	       my $grptype = $map->group_type();
+ Function: Get/set the group type of this map
+ Returns : scalar representing the group type
+ Args    : none to get, OR string to set
+
+=cut
+
+sub group_type {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	$self->{'_grouptype'} = $value;
+    }
+    return $self->{'_grouptype'};
+}
+
+=head2 group_abbr
+
+ Title   : group_abbr
+ Usage   : $map->group_abbr($grpabbr);
+	       my $grpabbr = $map->group_abbr();
+ Function: get/set the group abbrev of this map
+ Returns : string representing the group abbrev
+ Args    : none to get, OR string to set
+
+=cut
+
+sub group_abbr {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	$self->{'_groupabbr'} = $value;
+    }
+    return $self->{'_groupabbr'};
+}
+
+=head2 core_exists
+
+ Title   : core_exists
+ Usage   : my $core_exists = $map->core_exists();
+ Function: Get/set if the FPC file is accompanied by COR file
+ Returns : boolean
+ Args    : none to get, OR 1|0 to set
+
+=cut
+
+sub core_exists {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	$self->{'_corexists'} = $value ? 1 : 0;
+    }
+    return $self->{'_corexists'};
+}
+
+=head2 each_cloneid
+
+ Title   : each_cloneid
+ Usage   : my @clones = $map->each_cloneid();
+ Function: returns an array of clone names
+ Returns : list of clone names
+ Args    : none
+
+=cut
+
+sub each_cloneid {
+    my ($self) = @_;
+    return keys %{$self->{'_clones'}};
+}
+
+=head2 get_cloneobj
+
+ Title   : get_cloneobj
+ Usage   : my $cloneobj = $map->get_cloneobj('CLONEA');
+ Function: returns an object of the clone given in the argument
+ Returns : object of the clone
+ Args    : scalar representing the clone name
+
+=cut
+
+sub get_cloneobj {
+    my ($self,$clone) = @_;
+
+    return 0     if(!defined($clone));
+    return if($clone eq "");
+    return if(!exists($self->{'_clones'}{$clone}));
+
+    my ($type,$contig,$bands,$gel,$group,$remark,$fp_number);
+    my ($sequence_type,$sequence_status,$fpc_remark, at amatch, at pmatch, at ematch,
+        $startrange,$endrange);
+    my %clones = %{$self->{'_clones'}{$clone}};
+    my @markers;
+
+    if (ref($clones{'clone'}) eq 'Bio::Map::Clone') {
+	return $clones{'clone'};
+    }
+
+    $type    = $clones{'type'}              if (exists($clones{'type'}));
+    @markers = (keys %{$clones{'markers'}}) if (exists($clones{'markers'}));
+    $contig  =  $clones{'contig'}           if (exists($clones{'contig'}));
+    $bands   =  $clones{'bands'}            if (exists($clones{'bands'}));
+    $gel     =  $clones{'gel'}              if (exists($clones{'gel'}));
+    $group   =  $clones{'group'}            if (exists($clones{'group'}));
+    $remark  =  $clones{'remark'}           if (exists($clones{'remark'}));
+
+    $fp_number  =  $clones{'fp_number'}  if (exists($clones{'fp_number'}));
+    $fpc_remark =  $clones{'fpc_remark'} if (exists($clones{'fpc_remark'}));
+
+    $sequence_type   =  $clones{'sequence_type'}
+        if (exists($clones{'sequence_type'}));
+    $sequence_status =  $clones{'sequence_status'}
+        if (exists($clones{'sequence_status'} ));
+
+    @amatch  =  (keys %{$clones{'matcha'}})  if (exists($clones{'matcha'}));
+    @ematch  =  (keys %{$clones{'matche'}})  if (exists($clones{'matche'}));
+    @pmatch  =  (keys %{$clones{'matchp'}})  if (exists($clones{'matchp'}));
+
+    $startrange =  $clones{'range'}{'start'}
+        if (exists($clones{'range'}{'start'}));
+    $endrange   =  $clones{'range'}{'end'}
+        if (exists($clones{'range'}{'end'}));
+
+    #*** why doesn't it call Bio::Map::Clone->new ? Seems dangerous...
+    my $cloneobj = bless( {
+	_name       => $clone,
+	_markers    => \@markers,
+	_contig     => $contig,
+	_type       => $type,
+	_bands      => $bands,
+	_gel        => $gel,
+	_group      => $group,
+	_remark     => $remark,
+	_fpnumber   => $fp_number,
+	_sequencetype   => $sequence_type,
+	_sequencestatus => $sequence_status,
+	_fpcremark      => $fpc_remark,
+	_matche     => \@ematch, 		
+	_matcha     => \@amatch,
+	_matchp     => \@pmatch,
+	_range      => Bio::Range->new(-start => $startrange,
+				       -end   => $endrange),	
+    }, 'Bio::Map::Clone'); 		
+
+    $self->{'_clones'}{$clone}{'clone'} = $cloneobj;
+    return $cloneobj;
+}
+
+=head2 each_markerid
+
+ Title   : each_markerid
+ Usage   : my @markers = $map->each_markerid();
+ Function: returns list of marker names
+ Returns : list of marker names
+ Args    : none
+
+=cut
+
+sub each_markerid {
+   my ($self) = @_;
+   return keys (%{$self->{'_markers'}});
+}
+
+=head2 get_markerobj
+
+ Title   : get_markerobj
+ Usage   : my $markerobj = $map->get_markerobj('MARKERA');
+ Function: returns an object of the marker given in the argument
+ Returns : object of the marker
+ Args    : scalar representing the marker name
+
+=cut
+
+sub get_markerobj {
+    my ($self,$marker) = @_;
+
+    return 0 if(!defined($marker));
+    return if($marker eq "");
+    return if(!exists($self->{'_markers'}{$marker}));
+
+    my ($global,$framework,$group,$anchor,$remark,$type,$linkage,$subgroup);
+    my %mkr = %{$self->{'_markers'}{$marker}};
+
+    return $mkr{'marker'} if (ref($mkr{'marker'}) eq 'Bio::Map::FPCMarker');
+
+    $type       = $mkr{'type'}       if(exists($mkr{'type'}));
+    $global     = $mkr{'global'}     if(exists($mkr{'global'} ));
+    $framework  = $mkr{'framework'}  if(exists($mkr{'framework'}));
+    $anchor     = $mkr{'anchor'}     if(exists($mkr{'anchor'}));
+    $group      = $mkr{'group'}      if(exists($mkr{'group'}));
+    $subgroup   =  $mkr{'subgroup'}  if(exists($mkr{'subgroup'}));
+    $remark     =  $mkr{'remark'}    if(exists($mkr{'remark'}));
+
+    my %clones  = %{$mkr{'clones'}};
+    my %contigs = %{$mkr{'contigs'}};
+
+    my %markerpos = %{$mkr{'posincontig'}} if(exists($mkr{'posincontig'}));
+
+    #*** why doesn't it call Bio::Map::FPCMarker->new ? Seems dangerous...
+    my $markerobj = bless( {
+	_name    => $marker,
+	_type    => $type,
+	_global  => $global,
+	_frame   => $framework,
+    _group   => $group,
+	_subgroup   => $subgroup,
+	_anchor     => $anchor,
+    _remark     => $remark,
+	_clones     => \%clones,
+	_contigs    => \%contigs,
+	_position   => \%markerpos,	
+    }, 'Bio::Map::FPCMarker');
+
+    $self->{'_markers'}{$marker}{'marker'} = $markerobj;
+    return $markerobj;
+}
+
+=head2 each_contigid
+
+ Title   : each_contigid
+ Usage   : my @contigs = $map->each_contigid();
+ Function: returns a list of contigs (numbers)
+ Returns : list of contigs
+ Args    : none
+
+=cut
+
+sub each_contigid {
+    my ($self) = @_;
+    return keys (%{$self->{'_contigs'}});
+}
+
+=head2 get_contigobj
+
+ Title   : get_contigobj
+ Usage   : my $contigobj = $map->get_contigobj('CONTIG1');
+ Function: returns an object of the contig given in the argument
+ Returns : object of the contig
+ Args    : scalar representing the contig number
+
+=cut
+
+sub get_contigobj {
+    my ($self,$contig) = @_;
+
+    return 0     if(!defined($contig));
+    return if($contig eq "");
+    return if(!exists($self->{'_contigs'}{$contig}));
+
+    my ($group,$anchor,$uremark,$tremark,$cremark,$startrange,$endrange,
+	$linkage,$subgroup);
+    my %ctg = %{$self->{'_contigs'}{$contig}};
+    my (%position, %pos);
+
+    return $ctg{'contig'} if (ref($ctg{'contig'}) eq 'Bio::Map::Contig');
+
+    $group        =  $ctg{'group'}        if (exists($ctg{'group'}));
+    $subgroup     =  $ctg{'subgroup'}     if (exists($ctg{'subgroup'}));
+    $anchor       =  $ctg{'anchor'}       if (exists($ctg{'anchor'}));
+    $cremark      =  $ctg{'chr_remark'}   if (exists($ctg{'chr_remark'}));
+    $uremark      =  $ctg{'usr_remark'}   if (exists($ctg{'usr_remark'}));
+    $tremark      =  $ctg{'trace_remark'} if (exists($ctg{'trace_remark'}));
+
+    $startrange =  $ctg{'range'}{'start'}
+        if (exists($ctg{'range'}{'start'}));
+    $endrange   =  $ctg{'range'}{'end'}
+        if (exists($ctg{'range'}{'end'}));
+
+    my %clones    =  %{$ctg{'clones'}}     if (exists($ctg{'clones'}));
+    my %markers   =  %{$ctg{'markers'}}    if (exists($ctg{'markers'}));
+
+    my $pos       =  $ctg{'position'};
+
+    #*** why doesn't it call Bio::Map::Contig->new ? Seems dangerous...
+    my $contigobj = bless( {
+	_group      => $group,
+	_subgroup   => $subgroup,
+	_anchor     => $anchor,
+	_markers    => \%markers,
+	_clones     => \%clones,
+	_name       => $contig,
+	_cremark    => $cremark,
+	_uremark    => $uremark,
+	_tremark    => $tremark,
+	_position   => $pos,
+	_range      => Bio::Range->new(-start => $startrange,
+				       -end => $endrange),	
+    }, 'Bio::Map::Contig');
+
+    $self->{'_contigs'}{$contig}{'contig'} = $contigobj;
+    return $contigobj;
+}
+
+=head2 matching_bands
+
+ Title   : matching_bands
+ Usage   : $self->matching_bands('cloneA','cloneB',[$tol]);
+ Function: given two clones [and tolerence], this method calculates how many
+           matching bands do they have.
+           (this method is ported directly from FPC)
+ Returns : scalar representing the number of matching bands
+ Args    : names of the clones ('cloneA', 'cloneB') [Default tolerence=7]
+
+=cut
+
+sub matching_bands {
+    my($self,$cloneA,$cloneB,$tol) = @_;
+    my($lstart,$kband,$match,$diff,$i,$j);
+
+    return 0 if(!defined($cloneA) || !defined($cloneB) ||
+		!($self->core_exists()));
+
+    $tol = 7 if (!defined($tol));
+
+    my %_clones  = %{$self->{'_clones'}};
+
+    my @bandsA = @{$_clones{$cloneA}{'bands'}};
+    my @bandsB = @{$_clones{$cloneB}{'bands'}};
+
+    $match  = 0;
+    $lstart = 0;
+
+    for ($i=0; $i<scalar(@bandsA);$i++) {
+	$kband = $bandsA[$i];
+	for ($j = $lstart; $j<scalar(@bandsB); $j++) {
+	    $diff = $kband - $bandsB[$j];
+	    if (abs($diff)  <= $tol ) {
+		$match++;
+		$lstart = $j+1;
+		last;
+	    }
+	    elsif ($diff < 0) {
+		$lstart = $j;
+		last;
+	    }
+	}
+    }
+    return $match;
+}
+
+=head2 coincidence_score
+
+ Title   : coincidence_score
+ Usage   : $self->coincidence_score('cloneA','cloneB'[,$tol,$gellen]);
+ Function: given two clones [,tolerence and gellen], this method calculates
+           the Sulston Coincidence score.
+           (this method is ported directly from FPC)
+ Returns : scalar representing the Sulston coincidence score.
+ Args    : names of the clones ('cloneA', 'cloneB')
+           [Default tol=7 gellen=3300.0]
+
+=cut
+
+sub coincidence_score {
+    my($self,$cloneA,$cloneB,$tol,$gellen) = @_;
+
+    return 0 if(!defined($cloneA) || !defined($cloneB) ||
+		!($self->core_exists()));
+
+    my %_clones  = %{$self->{'_clones'}};
+
+    my $numbandsA = scalar(@{$_clones{$cloneA}{'bands'}});
+    my $numbandsB = scalar(@{$_clones{$cloneB}{'bands'}});
+
+    my ($nL,$nH,$m,$i,$psmn,$pp,$pa,$pb,$t,$c,$a,$n);
+    my @logfact;
+    my $score;
+
+    $gellen = 3300.0 if (!defined($gellen));
+    $tol    = 7      if (!defined($tol));
+
+    if ($numbandsA > $numbandsB) {
+	$nH = $numbandsA;
+	$nL = $numbandsB;
+    }
+    else {
+	$nH = $numbandsB;
+	$nL = $numbandsA;
+    }
+
+    $m = $self->matching_bands($cloneA, $cloneB,$tol);
+
+    $logfact[0] = 0.0;
+    $logfact[1] = 0.0;
+    for ($i=2; $i<=$nL; $i++) {
+	$logfact[$i] = $logfact[$i - 1] + log($i);
+    }
+
+    $psmn = 1.0 - ((2*$tol)/$gellen);
+
+    $pp = $psmn ** $nH;
+    $pa = log($pp);
+    $pb = log(1 - $pp);
+    $t  = 1e-37;
+
+    for ($n = $m; $n <= $nL; $n++)  {
+	$c = $logfact[$nL] - $logfact[$nL - $n] - $logfact[$n];
+	$a = exp($c + ($n * $pb) + (($nL - $n) * $pa));
+	$t += $a;
+    }
+
+    $score = sprintf("%.e",$t);
+    return $score;
+}
+
+=head2 print_contiglist
+
+ Title   : print_contiglist
+ Usage   : $map->print_contiglist([showall]); #[Default 0]
+ Function: prints the list of contigs, markers that hit the contig, the
+           global position and whether the marker is a placement (P) or
+           a Framework (F) marker.
+ Returns : none
+ Args    : [showall] [Default 0], 1 includes all the discrepant markers
+
+=cut
+
+sub print_contiglist{
+    my ($self,$showall) = @_;
+    my $pos;
+
+    $showall = 0 if (!defined($showall));
+    my %_contigs = %{$self->{'_contigs'}};
+    my %_markers = %{$self->{'_markers'}};
+    my %_clones  = %{$self->{'_clones'}};
+
+    my @contigs       = $self->each_contigid();
+    my @sortedcontigs = sort {$a <=> $b } @contigs;
+
+    print "\n\nContig List\n\n";
+    foreach my $contig (@sortedcontigs) {
+        my %list;
+	my %alist;
+	
+	my $ctgAnchor  = $_contigs{$contig}{'anchor'};
+	my $ctgGroup   = $_contigs{$contig}{'group'};	
+	
+	my @mkr = keys ( %{$_contigs{$contig}{'markers'}} );
+	
+	foreach my $marker (@mkr)  {	
+	    my $mrkGroup       = $_markers{$marker}{'group'};
+	    my $mrkGlobal      = $_markers{$marker}{'global'};
+	    my $mrkFramework   = $_markers{$marker}{'framework'};
+	    my $mrkAnchor      = $_markers{$marker}{'anchor'}; 	    	
+
+	    if($ctgGroup =~ /\d+|\w/ && $ctgGroup != 0)  {		
+		if ($mrkGroup eq $ctgGroup) {
+		    if ($mrkFramework == 0)  {		
+			$pos = $mrkGlobal."P";
+		    }
+		    else {
+			$pos = $mrkGlobal."F";
+		    }		
+		    $list{$marker} = $pos;
+		}
+		elsif ($showall == 1) {			
+		    my $chr = $self->group_abbr().$mrkGroup;
+		    $alist{$marker} = $chr;
+		} 	
+	    }
+	    elsif ($showall == 1 &&  $ctgGroup !~ /\d+/) {
+		my $chr = $self->group_abbr().$mrkGroup;
+		$alist{$marker} = $chr;
+	    }
+	}
+	
+	my $chr = $ctgGroup;
+	$chr = $self->group_abbr().$ctgGroup if ($ctgGroup =~ /\d+|\w/);
+	
+	if ($showall == 1 ) {
+	   	
+	    print "   ctg$contig  ", $chr, "  "
+		if ($_contigs{$contig}{'group'} !~ /\d+|\w/);  		
+        }
+	elsif ($ctgGroup =~ /\d+|\w/ && $ctgGroup ne 0){
+	        print "   ctg",$contig, "  ",$chr, "  ";
+	}  	
+	
+	while (my ($k,$v) = each %list) {
+            print "$k/$v  ";		
+	}
+	
+	print "\n" if ($showall == 0 && $ctgGroup =~ /\d+|\w/ &&
+		       $ctgGroup ne 0 );
+	
+	if ($showall == 1) {
+            while (my ($k,$v) = each %alist) {
+		print "$k/$v  ";		
+            }  		
+	    print "\n";
+        }
+    }
+}
+
+=head2 print_markerlist
+
+ Title    : print_markerlist
+ Usage    : $map->print_markerlist();
+ Function : prints the marker list; contig and corresponding number of
+            clones for each marker.
+ Returns  : none
+ Args     : none
+
+=cut
+
+sub print_markerlist {
+    my ($self) = @_;
+
+    my %_contigs = %{$self->{'_contigs'}};
+    my %_markers = %{$self->{'_markers'}};
+    my %_clones  = %{$self->{'_clones'}};
+
+    print "Marker List\n\n";
+
+    foreach my $marker ($self->each_markerid()) {
+        print "  ",$marker, "  ";
+	
+	my %list;
+	my %mclones = %{$_markers{$marker}{'clones'}};
+	
+	foreach my $clone (%mclones) {
+	    if (exists($_clones{$clone}{'contig'}) ) {
+		my $ctg = $_clones{$clone}{'contig'};
+		
+		if (exists($list{$ctg})) {
+		    my $clonehits = $list{$ctg};
+		    $clonehits++;
+		    $list{$ctg} = $clonehits;
+		}
+		else {
+		    $list{$ctg} = 1;
+		}
+	    }
+	}
+	while (my ($k,$v) = each %list) {
+	    print "$k/$v  ";
+        }
+        print "\n";
+    }
+}
+
+=head2 print_gffstyle
+
+ Title    : print_gffstyle
+ Usage    : $map->print_gffstyle([style]);
+ Function : prints GFF; either Contigwise (default) or Groupwise
+ Returns  : none
+ Args     : [style] default = 0 contigwise, else
+                              1 groupwise (chromosome-wise).
+
+=cut
+
+sub print_gffstyle {
+    my ($self,$style) = @_;
+
+    $style = 0 if(!defined($style));
+
+    my %_contigs = %{$self->{'_contigs'}};
+    my %_markers = %{$self->{'_markers'}};
+    my %_clones  = %{$self->{'_clones'}};
+
+    my $i;
+    my ($depth, $save_depth);
+    my ($x, $y);
+    my @stack;
+    my ($k, $j, $s);
+    my $pos;
+    my $contig;
+
+    # Calculate the position for the marker in the contig
+
+    my @contigs       = $self->each_contigid();
+    my @sortedcontigs = sort {$a <=> $b } @contigs;
+    my $offset = 0;
+    my %gffclones;
+    my %gffcontigs;
+    my %gffmarkers;
+    my $basepair = 4096;
+
+    foreach my $contig (@sortedcontigs) {
+        if($_contigs{$contig}{'range'} ) {	
+	    $offset =  $_contigs{$contig}{'range'}{'start'};	
+	
+	    if ($offset <= 0){
+	        $offset = $offset * -1;	
+		$gffcontigs{$contig}{'start'} = 1;
+		$gffcontigs{$contig}{'end'}   =
+		    ($_contigs{$contig}{'range'}{'end'} +
+		     $offset ) * $basepair + 1;				
+	    }
+	    else {
+	        $offset = 0;
+		$gffcontigs{$contig}{'start'} =
+		    $_contigs{$contig}{'range'}{'start'} * $basepair;
+		$gffcontigs{$contig}{'end'}   =
+		    $_contigs{$contig}{'range'}{'end'} * $basepair;
+	    }	    		
+	}
+	else {
+	    $gffcontigs{$contig}{'start'} = 1;
+            $gffcontigs{$contig}{'end'}   = 1;		
+	} 	
+	
+	my @clones  =  keys %{$_contigs{$contig}{'clones'}};	
+	foreach my $clone (@clones) {
+	    if(exists ($_clones{$clone}{'range'}) ) {
+	        my $gffclone = $clone;
+		
+		$gffclone =~ s/sd1$//;
+		
+		$gffclones{$gffclone}{'start'} =
+		    (($_clones{$clone}{'range'}{'start'} + $offset) *
+		     $basepair + 1);
+
+		$gffclones{$gffclone}{'end'}   =
+		    (($_clones{$clone}{'range'}{'end'}
+		      + $offset) * $basepair + 1);
+	    }
+	
+	    if(!$contig) {	
+	        my %markers = %{$_clones{$clone}{'markers'}}
+		if (exists($_clones{$clone}{'markers'}));
+
+	        while (my ($k,$v) = each %markers) {
+		    $gffmarkers{$contig}{$k} =
+		    ( ( $_clones{$clone}{'range'}{'start'} +
+			$_clones{$clone}{'range'}{'end'} ) / 2 ) *
+			$basepair + 1 ;
+		}	
+	    }
+	}	
+	
+	if($contig) {
+	    my %markers = %{$_contigs{$contig}{'markers'}}
+	        if (exists($_contigs{$contig}{'markers'}));
+
+	    while (my ($k,$v) = each %markers) {
+	        $gffmarkers{$contig}{$k} = ($v + $offset) * $basepair + 1;
+	    }
+	}
+    }
+
+    if (!$style) {
+	foreach my $contig (@sortedcontigs) {
+	   	
+	    if(exists ($_contigs{$contig}{'range'} )  ) {	
+		print join("\t","ctg$contig","assembly","contig",
+			   $gffcontigs{$contig}{'start'},
+			   $gffcontigs{$contig}{'end'},".",".",".",
+			   "Sequence \"ctg$contig\"; Name \"ctg$contig\"\n"
+                          );
+	    }
+	
+	    my @clones = (keys %{$_contigs{$contig}{'clones'}} );
+	
+	    foreach my $clone (@clones) {
+		if(exists ($_clones{$clone}{'range'}) ) {	
+		    print join("\t","ctg$contig","FPC");
+		
+		    my $type = $_clones{$clone}{'type'};
+		
+		    if($clone =~ /sd1$/) {
+			$clone =~ s/sd1$//;
+   		        $type  = "sequenced";
+		    }		
+		    print join ("\t","\t$type",$gffclones{$clone}{'start'},
+				$gffclones{$clone}{'end'},".",".",".",
+				"$type \"$clone\"; Name \"$clone\"");
+
+		    my @markers = keys %{$_clones{$clone}{'markers'}};
+		    print "; Marker_hit" if (scalar(@markers));
+		
+		    foreach my $mkr(@markers) {
+			if (exists($_markers{$mkr}{'framework'})) {
+			    print " \"$mkr ",$_markers{$mkr}{'group'}," ",
+				   $_markers{$mkr}{'global'},"\"";
+			}
+			else {
+			    print " \"$mkr 0 0\"";
+			}
+		    }	
+		    print "; Contig_hit \"",$_clones{$clone}{'contig'},"\" "
+		        if (defined($_clones{$clone}{'contig'}));
+		}
+		print "\n";
+	    }
+	
+	    if (exists ($_contigs{$contig}{'markers'}) ) {	
+		my %list = %{$_contigs{$contig}{'markers'}};
+		
+		while (my ($k,$v) = each %list) {
+		    print "ctg", $contig, "\tFPC\t";
+		    my $position = $gffmarkers{$contig}{$k};
+		
+		    my $type = "marker";
+		
+		    $type = "electronicmarker"
+		         if ($_markers{$k}{'type'} eq "eMRK");
+		
+		    if( exists($_markers{$k}{'framework'})) {
+			$type = "frameworkmarker"
+			    if($_markers{$k}{'framework'} == 1);
+			
+			$type = "placementmarker"
+			    if($_markers{$k}{'framework'} == 0);
+		    }	
+		
+		    print join ("\t","$type",$position,$position,".",".",
+                                ".","$type \"$k\"; Name \"$k\"");
+		
+	            my @clonelist;
+		    my @clones  = keys %{$_markers{$k}{'clones'}};
+		
+		    foreach my $cl (@clones) {
+			push (@clonelist, $cl)
+			    if($_clones{$cl}{'contig'} == $contig);
+		    }
+		
+		    $" = " ";
+		    print("; Contig_hit \"ctg$contig - ",scalar(@clonelist),
+			  "\" (@clonelist)\n");
+		}
+	    }  		   	
+	}
+    }
+    else {
+	my %_groups;
+	my $margin       = 2 * $basepair;
+	my $displacement = 0;
+	my @grouplist;
+	
+	foreach my $contig (@sortedcontigs) {
+	    my $recordchr;
+            my $chr = $_contigs{$contig}{'group'};		
+	    $chr = 0 if ($chr !~ /\d+|\w+/);
+	
+            $recordchr->{group}      = $chr;
+	    $recordchr->{contig}     = $contig;
+	    $recordchr->{position}   = $_contigs{$contig}{'position'};
+
+	    push @grouplist, $recordchr;	
+	}
+	
+	my @chr       = keys (%{$_groups{'group'}});
+	my @sortedchr;
+	
+	if ($self->group_type eq 'Chromosome') {
+	    @sortedchr = sort { $a->{'group'} <=> $b->{'group'}
+				               ||
+				$a->{'contig'} <=> $b->{'contig'}
+                              } @grouplist;
+	}
+	else {
+	    @sortedchr = sort { $a->{'group'}  cmp $b->{'group'} 	
+				                ||
+				$a->{'contig'} cmp $b->{'contig'}
+                              } @grouplist;
+	}
+	my $lastchr   = -1;
+	my $chrend    = 0;
+
+	foreach my $chr (@sortedchr) {
+	    my $chrname = $self->group_abbr().$chr->{'group'};	
+	
+	    if ($lastchr eq -1 || $chr->{'group'} ne $lastchr ) {
+		$lastchr = $chr->{'group'} if ($lastchr eq -1);		
+		$displacement = 0;	
+		
+		# caluclate the end position of the contig		
+		my $ctgcount = 0;
+		my $prevchr  = 0;		
+		$chrend = 0;
+		
+		if ($chr->{contig} != 0) {		
+		    foreach my $ch (@sortedchr) {
+			if ($ch->{'group'} eq $chr->{'group'}) {
+			    if($ch->{'contig'} != 0) {	
+				my $ctg  = $ch->{'contig'}
+				    if($ch->{'contig'} != 0);
+
+				$chrend += $gffcontigs{$ctg}->{'end'};
+				++$ctgcount;
+			    }			    	
+			}
+		    }	
+		    $chrend += ($ctgcount-1) * $margin;
+		}
+		else {
+		    $chrend  = $gffcontigs{'0'}->{'end'};
+		}
+		
+		$chrname    = $self->group_abbr()."ctg0"
+		if ($chr->{'contig'} == 0);
+		
+		print join ("\t", $chrname,"assembly","Chromosome",1,
+			    "$chrend",".",".",".",
+			    "Sequence \"$chrname\"; Name \"$chrname\"\n");
+	    }
+	
+	    print join ("\t", $chrname,"assembly","Chromosome",1,
+			"$chrend",".",".",".",
+			"Sequence \"$chrname\"; Name \"$chrname\"\n")
+	        if ($chr->{'group'} ne $lastchr && $chr->{'group'} eq 0 );
+	
+	    $lastchr = $chr->{'group'};
+	    $lastchr = -1 if ($chr->{'contig'} == 0);	
+	
+	    my $contig = $chr->{'contig'};
+	    	
+	    if(exists ($_contigs{$contig}{'range'} )  ) {
+		
+		print join ("\t",$chrname, "FPC","contig",
+			    $gffcontigs{$contig}{'start'}+$displacement,
+		            $gffcontigs{$contig}{'end'}+$displacement,
+			    ".",".",".",
+			    "contig \"ctg$contig\"; Name \"ctg$contig\"\n");
+	    }
+	
+	    my @clones = (keys %{$_contigs{$contig}{'clones'}} );
+	    foreach my $clone (@clones) {
+		if(exists ($_clones{$clone}{'range'}) ) {	
+		    print join ("\t",$chrname,"FPC");
+		    my $type = $_clones{$clone}{'type'};
+		
+		    if ($clone =~ /sd1$/) {
+			$clone =~ s/sd1$//;
+			$type  = "sequenced";
+		    }
+		
+		    print join ("\t","\t$type",$gffclones{$clone}{'start'}
+				+$displacement,$gffclones{$clone}{'end'}
+				+$displacement,".",".",".",
+				"$type \"$clone\"; Name \"$clone\"");
+		
+		    my @markers = keys %{$_clones{$clone}{'markers'}};
+		    print "; Marker_hit" if (scalar(@markers));
+		    		
+		    foreach my $mkr(@markers) {
+			if (exists($_markers{$mkr}{'framework'})) {
+			    print " \"$mkr ",$_markers{$mkr}{'group'}," ",
+				   $_markers{$mkr}{'global'},"\"";
+			}
+			else {
+			    print (" \"$mkr 0 0\"");
+			}
+		    }	
+		    print "; Contig_hit \"",$_clones{$clone}{'contig'},"\" "
+		        if (defined($_clones{$clone}{'contig'}));
+		}
+		print "\n";
+	    }
+	
+	    if (exists ($_contigs{$contig}{'markers'}) ) {	
+		my %list = %{$_contigs{$contig}{'markers'}};
+		
+		while (my ($k,$v) = each %list) {
+		    print join ("\t",$chrname,"FPC");
+		    my $type = "marker";
+		
+		    $type = "electronicmarker"
+		        if ($_markers{$k}{'type'} eq "eMRK");
+		
+		    if( exists($_markers{$k}{'framework'})) {
+			$type = "frameworkmarker"
+			    if($_markers{$k}{'framework'} == 1);
+			
+			$type = "placementmarker"
+			    if($_markers{$k}{'framework'} == 0);	
+		    }	
+		    		    		    	
+		    print join ("\t","\t$type",$gffmarkers{$contig}{$k}
+				+ $displacement,$gffmarkers{$contig}{$k}
+				+ $displacement,".",".",".",
+				"$type \"$k\"; Name \"$k\"");
+
+		    my @clonelist;
+		    my @clones  = keys %{$_markers{$k}{'clones'}};
+		
+		    foreach my $cl (@clones) {
+			push (@clonelist, $cl)
+			    if($_clones{$cl}{'contig'} == $contig);
+		    }
+		
+		    $" = " ";		
+		    print("; Contig_hit \"ctg$contig - ",
+			  scalar(@clonelist),"\" (@clonelist)\n");
+		}
+	    }
+	    $displacement += $margin + $gffcontigs{$contig}{'end'};
+	}
+    }
+}
+
+=head2 _calc_markerposition
+
+ Title   : _calc_markerposition
+ Usage   : $map->_calc_markerposition();
+ Function: Calculates the position of the marker in the contig
+ Returns : none
+ Args    : none
+
+=cut
+
+sub _calc_markerposition {
+    my ($self) = @_;
+    my %_contigs = %{$self->{'_contigs'}};
+    my %_markers = %{$self->{'_markers'}};
+    my %_clones  = %{$self->{'_clones'}};
+
+    my $i;
+    my ($depth, $save_depth);
+    my ($x, $y);
+    my @stack;
+    my ($k, $j, $s);
+    my $pos;
+    my $contig;
+
+    # Calculate the position for the marker in the contig
+
+    my @contigs       = $self->each_contigid();
+    my @sortedcontigs = sort {$a <=> $b } @contigs;
+    my $offset;
+    my %gffclones;
+    my %gffcontigs;
+
+    foreach my $marker ($self->each_markerid()) {
+        my (@ctgmarker, @sortedctgmarker);
+	
+	my @clones = (keys %{$_markers{$marker}{'clones'}})
+	    if (exists ($_markers{$marker}{'clones'} ));
+	
+        foreach my $clone (@clones) {
+	    my $record;
+	    $record->{contig} = $_clones{$clone}{'contig'};		
+	    $record->{start}  = $_clones{$clone}{'range'}{'start'};
+	    $record->{end}    = $_clones{$clone}{'range'}{'end'};
+	    push @ctgmarker,$record;
+	}
+	
+	# sorting by contig and left position
+	@sortedctgmarker = sort { $a->{'contig'} <=> $b->{'contig'}
+				                  ||
+				  $b->{'start'}  <=> $a->{'start'}
+			        } @ctgmarker;
+				
+	my $ctg = -1;
+	
+	for ($i=0; $i < scalar(@sortedctgmarker); $i++) {
+	    if ($ctg != $sortedctgmarker[$i]->{'contig'}) {
+		if ($ctg == -1) {
+		    $ctg = $sortedctgmarker[$i]->{'contig'};
+		}
+		else  {	
+		    if ($depth > $save_depth){
+			$pos = ($x + $y) >> 1;
+			$_contigs{$ctg}{'markers'}{$marker}      = $pos;
+			$_markers{$marker}{'posincontig'}{$ctg}  = $pos;
+		    }
+		}
+		
+		$ctg      = $sortedctgmarker[$i]->{'contig'};
+		$x        = $sortedctgmarker[$i]->{'start'};
+		$y        = $sortedctgmarker[$i]->{'end'};
+		$stack[0] = $y;
+		
+		$pos = ($x + $y) >> 1;
+		$_contigs{$ctg}{'markers'}{$marker}     = $pos;
+		$_markers{$marker}{'posincontig'}{$ctg} = $pos;
+		
+		$depth = $save_depth = 1;
+	    }
+	    elsif ($sortedctgmarker[$i] <= $y) {
+		$stack[$depth++] = $sortedctgmarker[$i]->{'end'};
+		# MAX
+		if ($x < $sortedctgmarker[$i]->{'start'} ) {
+		    $x = $sortedctgmarker[$i]->{'start'};
+		}
+		# MIN
+		if ($y > $sortedctgmarker[$i]->{'end'}) {
+		    $y = $sortedctgmarker[$i]->{'end'};
+		}	
+	    }
+	    else {
+		if ($depth > $save_depth) {
+		    $save_depth = $depth;
+		    $pos = ($x + $y) >> 1;
+		    $_contigs{$ctg}{'markers'}{$marker}     = $pos;
+		    $_markers{$marker}{'posincontig'}{$ctg} = $pos;
+		}
+		
+		$x               = $sortedctgmarker[$i]->{'start'};
+		$y               = $sortedctgmarker[$i]->{'end'};
+		$stack[$depth++] = $y;
+		
+		for($j=-1, $k=0, $s=0; $s<$depth; $s++) {
+		    if ($stack[$s] <$x) {
+			$stack[$s] = -1;
+			$j = $s if ($j == -1);
+		    }
+		    else {
+			$k++;
+			# MIN
+			$y = $stack[$s] if ($y > $stack[$s]);
+			if ($stack[$j] == -1) {
+			    $stack[$j] = $stack[$s];
+			    $stack[$s] = -1;
+			    while ($stack[$j] != -1) {$j++;}
+			}
+			else {
+			    $j = $s;
+			}
+		    }
+		    $depth = $k;
+		}	
+	    }
+	    if ($depth > $save_depth) {
+		$pos = ($x + $y) >> 1;
+		$_contigs{$ctg}{'markers'}{$marker}     = $pos;
+		$_markers{$marker}{'posincontig'}{$ctg} = $pos;
+	    }
+	}	
+    }
+}
+
+=head2 _calc_contigposition
+
+ Title   : _calc_contigposition
+ Usage   : $map->_calc_contigposition();
+ Function: calculates the position of the contig in the group
+ Returns : none
+ Args    : none
+
+=cut
+
+sub _calc_contigposition{
+    my ($self) = @_;
+
+    my %_contigs = %{$self->{'_contigs'}};
+    my %_markers = %{$self->{'_markers'}};
+    my %_clones  = %{$self->{'_clones'}};
+
+    my @contigs       = $self->each_contigid();
+    my @sortedcontigs = sort {$a <=> $b } @contigs;
+
+    foreach my $contig (@sortedcontigs) {
+		my $position = 0;
+	my $group;
+	
+	if (exists($_contigs{$contig}{'group'}) ) {		
+	
+	    my %weightedmarkers;
+	    my @mkrs = keys (%{$_contigs{$contig}{'markers'}})
+	        if (exists($_contigs{$contig}{'markers'})) ;
+
+	    my $chr = $_contigs{$contig}{'group'};
+	    $chr = 0 if ($_contigs{$contig}{'group'} =~ /\?/);	
+
+	    foreach my $mkr (@mkrs) {
+		if (exists($_markers{$mkr}{'group'})) {
+		    if ( $_markers{$mkr}{'group'} == $chr ) {
+			my @mkrclones = keys( %{$_markers{$mkr}{'clones'}});
+			my $clonescount = 0;
+			foreach my $clone (@mkrclones) {
+			    ++$clonescount
+			        if ($_clones{$clone}{'contig'} == $contig);
+			}
+			$weightedmarkers{$_markers{$mkr}{'global'}} =
+			    $clonescount;			
+		    }
+		}
+	    }
+	
+	    my $weightedctgsum = 0;
+	    my $totalhits      = 0;
+
+	    while (my ($mpos,$hits) = each %weightedmarkers) {
+		$weightedctgsum += ($mpos * $hits);
+		$totalhits      += $hits;
+	    }
+	
+	    $position = sprintf("%.2f",$weightedctgsum / $totalhits)
+	        if ($totalhits != 0);	
+	
+	    $_contigs{$contig}{'position'} = $position;	
+	}
+    }
+}
+
+=head2 _calc_contiggroup
+
+ Title   : _calc_contiggroup
+ Usage   : $map->_calc_contiggroup();
+ Function: calculates the group of the contig
+ Returns : none
+ Args    : none
+
+=cut
+
+sub _calc_contiggroup {
+    my ($self)  = @_;
+    my %_contig = %{$self->{'_contigs'}};
+    my @contigs = $self->each_contigid();
+
+    foreach my $ctg (@contigs) {
+        my $chr = floor($ctg/1000);
+		$_contig{$ctg}{'group'} = $chr;
+    }
+}
+
+=head2 _setI<E<lt>TypeE<gt>>Ref
+
+ Title   : _set<Type>Ref
+ Usage   : These are used for initializing the reference of the hash in
+           Bio::MapIO (fpc.pm) to the corresponding hash in Bio::Map
+           (physical.pm). Should be used only from Bio::MapIO System.
+               $map->setCloneRef(\%_clones);
+               $map->setMarkerRef(\%_markers);
+               $map->setContigRef(\%_contigs);
+ Function: sets the hash references to the corresponding hashes
+ Returns : none
+ Args    : reference of the hash.
+
+=cut
+
+sub _setCloneRef {
+    my ($self, $ref)    = @_;
+    %{$self->{'_clones'}} = %{$ref};
+}
+
+sub _setMarkerRef {
+    my ($self, $ref)     = @_;
+    %{$self->{'_markers'}} = %{$ref};
+}
+
+sub _setContigRef {
+    my ($self, $ref)    = @_;
+    %{$self->{'_contigs'}} = %{$ref};
+}
+
+1;
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Physical.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Position.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Position.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Position.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,455 @@
+# $Id: Position.pm,v 1.18.4.3 2006/10/17 09:35:22 sendu Exp $
+#
+# BioPerl module for Bio::Map::Position
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Position - A single position of a Marker, or the range over which
+                     that marker lies, in a Map
+
+=head1 SYNOPSIS
+
+    use Bio::Map::Position;
+    my $position = new Bio::Map::Position(-map => $map, 
+					  -element => $marker,
+					  -value => 100
+					  );
+
+	my $position_with_range = new Bio::Map::Position(-map => $map, 
+					  -element => $marker,
+					  -start => 100,
+					  -length => 10
+					  );
+
+=head1 DESCRIPTION
+
+This object is an implementation of the PositionI interface that
+handles the specific values of a position. This allows a map element
+(e.g. Marker) to have multiple positions within a map and still be
+treated as a single entity.
+
+This handles the concept of a relative map in which the order of
+elements and the distance between them is known, but does not
+directly handle the case when distances are unknown - in that case
+arbitrary values must be assigned for position values.
+
+No units are assumed here - units are handled by context of which Map
+a position is placed in or the subclass of this Position.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein at cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Chad Matsalla, bioinformatics1 at dieselwurks.com
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Position;
+use strict;
+
+use Scalar::Util qw(looks_like_number);
+use Bio::Map::Relative;
+
+use base qw(Bio::Root::Root Bio::Map::PositionI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::Position();
+ Function: Builds a new Bio::Map::Position object 
+ Returns : Bio::Map::Position
+ Args    : -map      => Bio::Map::MapI object
+           -element  => Bio::Map::MappableI object
+           -relative => Bio::Map::RelativeI object
+
+           * If this position has no range, or if a single value can describe
+             the range *
+           -value => scalar             : something that describes the single
+                                          point position or range of this
+                                          Position, most likely an int
+
+           * Or if this position has a range, at least two of *
+           -start => int                : value of the start co-ordinate
+           -end => int                  : value of the end co-ordinate
+           -length => int               : length of the range
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	
+    my ($map, $marker, $element, $value, $start, $end, $length, $relative) = 
+	$self->_rearrange([qw( MAP
+			       MARKER
+                   ELEMENT
+			       VALUE
+				   START
+				   END
+				   LENGTH
+                   RELATIVE
+			       )], @args);
+	
+    my $do_range = defined($start) || defined($end);
+    if ($value && $do_range) {
+        $self->warn("-value and (-start|-end|-length) are mutually exclusive, ignoring value");
+		$value = undef;
+    }
+	
+    $map            && $self->map($map);
+    $marker         && $self->element($marker); # backwards compatibility
+    $element        && $self->element($element);
+    $relative       && $self->relative($relative);
+    defined($value) && $self->value($value);
+	
+    if ($do_range) {
+		defined($start) && $self->start($start);
+		defined($end)   && $self->end($end);
+		if ($length) {
+			if (defined($start) && ! defined($end)) {
+				$self->end($start + $length - 1);
+			}
+			elsif (! defined($start)) {
+				$self->start($end - $length + 1);
+			}
+		}
+		defined($self->end) || $self->end($start);
+    }
+	
+    return $self;
+}
+
+=head2 relative
+
+  Title   : relative
+  Usage   : my $relative = $position->relative();
+            $position->relative($relative);
+  Function: Get/set the thing this Position's coordinates (numerical(), start(),
+            end()) are relative to, as described by a Relative object.
+  Returns : Bio::Map::RelativeI (default is one describing "relative to the
+            start of the Position's map")
+  Args    : none to get, OR
+            Bio::Map::RelativeI to set
+
+=cut
+
+sub relative {
+    my ($self, $relative) = @_;
+    if ($relative) {
+        $self->throw("Must supply an object") unless ref($relative);
+        $self->throw("This is [$relative], not a Bio::Map::RelativeI") unless $relative->isa('Bio::Map::RelativeI');
+        $self->{_relative_not_implicit} = 1;
+        $self->{_relative} = $relative;
+    }
+    return $self->{_relative} || $self->absolute_relative;
+}
+
+=head2 absolute
+
+  Title   : absolute
+  Usage   : my $absolute = $position->absolute();
+            $position->absolute($absolute);
+  Function: Get/set how this Position's co-ordinates (numerical(), start(),
+            end()) are reported. When absolute is off, co-ordinates are
+            relative to the thing described by relative(). Ie. the value
+            returned by start() will be the same as the value you set start()
+            to. When absolute is on, co-ordinates are converted to be relative
+            to the start of the map.
+
+            So if relative() currently points to a Relative object describing
+            "relative to another position which is 100 bp from the start of
+            the map", this Position's start() had been set to 50 and absolute()
+            returns 1, $position->start() will return 150. If absolute() returns
+            0 in the same situation, $position->start() would return 50.
+
+  Returns : boolean (default 0)
+  Args    : none to get, OR
+            boolean to set
+
+=cut
+
+sub absolute {
+    my $self = shift;
+    if (@_) { $self->{_absolute} = shift }
+    return $self->{_absolute} || 0;
+}
+
+=head2 value
+
+ Title   : value
+ Usage   : my $pos = $position->value;
+ Function: Get/Set the value for this postion
+ Returns : scalar, value
+ Args    : [optional] new value to set
+
+=cut
+
+sub value {
+	my ($self, $value) = @_;
+	if (defined $value) {
+		$self->{'_value'} = $value;
+		$self->start($self->numeric) unless defined($self->start);
+		$self->end($self->numeric) unless defined($self->end);
+	}
+	return $self->{'_value'};
+}
+
+=head2 numeric
+
+ Title   : numeric
+ Usage   : my $num = $position->numeric;
+ Function: Read-only method that is guaranteed to return a numeric 
+           representation of the start of this position. 
+ Returns : scalar numeric
+ Args    : none to get the co-ordinate normally (see absolute() method), OR
+           Bio::Map::RelativeI to get the co-ordinate converted to be
+           relative to what this Relative describes.
+
+=cut
+
+sub numeric {
+    my ($self, $value) = @_;
+    my $num = $self->{'_value'};
+    $self->throw("The value has not been set, can't convert to numeric") unless defined($num);
+    $self->throw("This value [$num] is not numeric") unless looks_like_number($num);
+    
+    if (ref($value) && $value->isa('Bio::Map::RelativeI')) {
+        # get the value after co-ordinate conversion
+        my $raw = $num;
+        my ($abs_start, $rel_start) = $self->_relative_handler($value);
+        return $abs_start + $raw - $rel_start;
+    }
+    
+    # get the value as per absolute
+    if ($self->{_relative_not_implicit} && $self->absolute) {
+        # this actually returns the start, but should be the same thing...
+        return $self->relative->absolute_conversion($self);
+    }
+    
+    return $num;
+}
+
+=head2 start
+
+  Title   : start
+  Usage   : my $start = $position->start();
+            $position->start($start);
+  Function: Get/set the start co-ordinate of this position.
+  Returns : the start of this position
+  Args    : scalar numeric to set, OR
+            none to get the co-ordinate normally (see absolute() method), OR
+            Bio::Map::RelativeI to get the co-ordinate converted to be
+            relative to what this Relative describes.
+
+=cut
+
+sub start {
+	my ($self, $value) = @_;
+    if (defined $value) {
+        if (ref($value) && $value->isa('Bio::Map::RelativeI')) {
+            # get the value after co-ordinate conversion
+            my $raw = $self->{start};
+            defined $raw || return;
+            my ($abs_start, $rel_start) = $self->_relative_handler($value);
+            return $abs_start + $raw - $rel_start;
+        }
+        else {
+            # set the value
+            $self->throw("This is [$value], not a number") unless looks_like_number($value);
+            $self->{start} = $value;
+            $self->value($value) unless defined($self->value);
+        }
+    }
+    
+    # get the value as per absolute
+    if ($self->{_relative_not_implicit} && $self->absolute) {
+        return $self->relative->absolute_conversion($self);
+    }
+    
+    return defined($self->{start}) ? $self->{start} : return;
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : my $end = $position->end();
+            $position->end($end);
+  Function: Get/set the end co-ordinate of this position.
+  Returns : the end of this position
+  Args    : scalar numeric to set, OR
+            none to get the co-ordinate normally (see absolute() method), OR
+            Bio::Map::RelativeI to get the co-ordinate converted to be
+            relative to what this Relative describes.
+
+=cut
+
+sub end {
+	my ($self, $value) = @_;
+    if (defined $value) {
+        if (ref($value) && $value->isa('Bio::Map::RelativeI')) {
+            # get the value after co-ordinate conversion
+            my $raw = $self->{end};
+            defined $raw || return;
+            my ($abs_start, $rel_start) = $self->_relative_handler($value);
+            return $abs_start + $raw - $rel_start;
+        }
+        else {
+            # set the value
+            $self->throw("This value [$value] is not numeric!") unless looks_like_number($value);
+            $self->{end} = $value;
+        }
+    }
+    
+    # get the value as per absolute
+    if ($self->{_relative_not_implicit} && $self->absolute) {
+        my $raw = $self->{end} || return;
+        my $abs_start = $self->relative->absolute_conversion($self) || return;
+        return $abs_start + ($raw - $self->{start});
+    }
+    
+    return defined($self->{end}) ? $self->{end} : return;
+}
+
+=head2 length
+
+  Title   : length
+  Usage   : $length = $position->length();
+  Function: Get/set the length of this position's range, changing the end() if
+            necessary. Getting and even setting the length will fail if both
+            start() and end() are not already defined.
+  Returns : the length of this range
+  Args    : none to get, OR scalar numeric (>0) to set.
+
+=cut
+
+sub length {
+	my ($self, $length) = @_;
+	if ($length) {
+        $length > 0 || return;
+		my $existing_length = $self->length || return;
+        return $length if $existing_length == $length;
+        $self->end($self->{start} + $length - 1);
+	}
+	
+	if (defined($self->start) && defined($self->end)) {
+		return $self->end - $self->start + 1;
+	}
+    return;
+}
+
+=head2 sortable
+
+ Title   : sortable
+ Usage   : my $num = $position->sortable();
+ Function: Read-only method that is guaranteed to return a value suitable
+           for correctly sorting this kind of position amongst other positions
+           of the same kind on the same map. Note that sorting different kinds
+           of position together is unlikely to give sane results.
+ Returns : numeric
+ Args    : none
+
+=cut
+
+sub sortable {
+    my ($self, $given_map) = @_;
+    my $answer = $self->numeric($self->absolute_relative);
+    return $answer;
+}
+
+=head2 toString
+
+  Title   : toString
+  Usage   : print $position->toString(), "\n";
+  Function: stringifies this range
+  Returns : a string representation of the range of this Position
+  Args    : optional Bio::Map::RelativeI to have the co-ordinates reported
+            relative to the thing described by that Relative
+
+=cut
+
+sub toString {
+	my ($self, $rel) = @_;
+	if (defined($self->start) && defined($self->end)) {
+		return $self->start($rel).'..'.$self->end($rel);
+	}
+	return '';
+}
+
+=head2 absolute_relative
+
+ Title   : absolute_relative
+ Usage   : my $rel = $position->absolute_relative();
+ Function: Get a relative describing the start of the map. This is useful for
+           supplying to the coordinate methods (start(), end() etc.) to get
+           the temporary effect of having set absolute(1).
+ Returns : Bio::Map::Relative
+ Args    : none
+
+=cut
+
+sub absolute_relative {
+    return Bio::Map::Relative->new(-map => 0, -description => 'start of map');
+}
+
+# get our own absolute start and that of the thing we want as a frame of
+# reference
+sub _relative_handler {
+    my ($self, $value) = @_;
+    
+    my $own_relative = $self->relative;
+    
+    # if the requested relative position is the same as the actual
+    # relative, the current co-ordinate values are correct so shortcut
+    my ($own_type, $req_type) = ($own_relative->type, $value->type);
+    if ($own_type && $req_type && $own_type eq $req_type && $own_relative->$own_type eq $value->$req_type) {
+        return (0, 0);
+    }
+    
+    my $abs_start = $own_relative->absolute_conversion($self);
+    my $rel_start = $value->absolute_conversion($self);
+    $self->throw("Unable to resolve co-ordinate because relative to something that ultimately isn't relative to the map start")
+    unless defined($abs_start) && defined($rel_start);
+    
+    return ($abs_start, $rel_start);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,419 @@
+# $Id: PositionHandler.pm,v 1.3.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::PositionHandler
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::PositionHandler - A Position Handler Implementation
+
+=head1 SYNOPSIS
+
+    # This is used by modules when they want to implement being a
+    # Position or being something that has Positions (when they are
+    # a L<Bio::Map::EntityI>)
+
+    # Make a PositionHandler that knows about you
+    my $ph = new Bio::Map::PositionHandler($self);
+
+    # Register with it so that it handles your Position-related needs
+    $ph->register;
+
+    # If you are a position, get/set the map you are on and the marker you are
+    # for
+    $ph->map($map);
+    $ph->element($marker);
+    my $map = $ph->map;
+    my $marker = $ph->element;
+
+    # If you are a marker, add a new position to yourself
+    $ph->add_positions($pos);
+
+    # And then get all your positions on a particular map
+    foreach my $pos ($ph->get_positions($map)) {
+        # do something with this Bio::Map::PositionI
+    }
+
+    # Or find out what maps you exist on
+    my @maps = $ph->get_other_entities;
+
+    # The same applies if you were a map
+
+=head1 DESCRIPTION
+
+A Position Handler copes with the coordination of different Bio::Map::EntityI
+objects, adding and removing them from each other and knowning who belongs to
+who. These relationships between objects are based around shared Positions,
+hence PositionHandler.
+
+This PositionHandler is able to cope with Bio::Map::PositionI objects,
+Bio::Map::MappableI objects and Bio::Map::MapI objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::PositionHandler;
+use strict;
+
+use base qw(Bio::Root::Root Bio::Map::PositionHandlerI);
+
+# globally accessible hash, via private instance methods
+my $RELATIONS = {};
+
+=head2 General methods
+
+=cut
+
+=head2 new
+
+ Title   : new
+ Usage   : my $position_handler = new Bio::Map::PositionHandler(-self => $self);
+ Function: Get a Bio::Map::PositionHandler that knows who you are.
+ Returns : Bio::Map::PositionHandler object
+ Args    : -self => Bio::Map::EntityI that is you
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($you) = $self->_rearrange([qw(SELF)], @args);
+    
+    $self->throw('Must supply -self') unless $you;
+    $self->throw('-self must be a reference (object)') unless ref($you);
+    $self->throw('This is [$you], not a Bio::Map::EntityI object') unless $you->isa('Bio::Map::EntityI');
+    $self->{_who} = $you;
+    $self->{_rel} = $RELATIONS;
+    return $self;
+}
+
+=head2 register
+
+ Title   : register
+ Usage   : $position_handler->register();
+ Function: Ask this Position Handler to look after your entity relationships.
+ Returns : n/a
+ Args    : none
+
+=cut
+
+sub register {
+    my $self = shift;
+    my $you = $self->{_who};
+    
+    $self->throw("Trying to re-register [$you], which could be bad") if $you->get_position_handler->index;
+    
+    $self->{_index} = ++$self->{_rel}->{assigned_indices};
+    $self->{_rel}->{registered}->{$self->{_index}} = $you;
+}
+
+=head2 index
+
+ Title   : index
+ Usage   : my $index = $position_handler->index();
+ Function: Get the unique registry index for yourself, generated during the
+           resistration process.
+ Returns : int
+ Args    : none
+
+=cut
+
+sub index {
+    my $self = shift;
+    return $self->{_index};
+}
+
+=head2 get_entity
+
+ Title   : get_entity
+ Usage   : my $entity = $position_handler->get_entity($index);
+ Function: Get the entity that corresponds to the supplied registry index.
+ Returns : Bio::Map::EntityI object
+ Args    : int
+
+=cut
+
+sub get_entity {
+    my ($self, $index) = @_;
+    return $self->{_rel}->{registered}->{$index} || $self->throw("Requested registy index '$index' but that index isn't in the registry");
+}
+
+=head2 Methods for Bio::Map::PositionI objects
+
+=cut
+
+=head2 map
+
+ Title   : map
+ Usage   : my $map = $position_handler->map();
+           $position_handler->map($map);
+ Function: Get/Set the map you are on. You must be a Position.
+ Returns : L<Bio::Map::MapI>
+ Args    : none to get, OR
+           new L<Bio::Map::MapI> to set
+
+=cut
+
+sub map {
+    my ($self, $entity) = @_;
+    return $self->_pos_get_set($entity, 'position_maps', 'Bio::Map::MapI');
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : my $element = $position_handler->element();
+           $position_handler->element($element);
+ Function: Get/Set the map element you are for. You must be a Position.
+ Returns : L<Bio::Map::MappableI>
+ Args    : none to get, OR
+           new L<Bio::Map::MappableI> to set
+
+=cut
+
+sub element {
+    my ($self, $entity) = @_;
+    return $self->_pos_get_set($entity, 'position_elements', 'Bio::Map::MappableI');
+}
+
+=head2 Methods for all other Bio::Map::EntityI objects
+
+=cut
+
+=head2 add_positions
+
+ Title   : add_positions
+ Usage   : $position_handler->add_positions($pos1, $pos2, ...);
+ Function: Add some positions to yourself. You can't be a position.
+ Returns : n/a
+ Args    : Array of Bio::Map::PositionI objects
+
+=cut
+
+sub add_positions {
+    my $self = shift;
+    $self->throw('Must supply at least one Bio::Map::EntityI') unless @_ > 0;
+    my $you_index = $self->_get_you_index(0);
+    my $kind = $self->_get_kind;
+    
+    foreach my $pos (@_) {
+        $self->_check_object($pos, 'Bio::Map::PositionI');
+        my $pos_index = $self->_get_other_index($pos);
+        
+        $self->_pos_set($pos_index, $you_index, $kind);
+    }
+}
+
+=head2 get_positions
+
+ Title   : get_positions
+ Usage   : my @positions = $position_handler->get_positions();
+ Function: Get all your positions. You can't be a Position.
+ Returns : Array of Bio::Map::PositionI objects
+ Args    : none for all, OR
+           Bio::Map::EntityI object to limit the Positions to those that
+           are shared by you and this other entity.
+
+=cut
+
+sub get_positions {
+    my ($self, $entity) = @_;
+    my $you_index = $self->_get_you_index(0);
+    
+    my @positions = keys %{$self->{_rel}->{has}->{$you_index}};
+    
+    if ($entity) {
+        my $entity_index = $self->_get_other_index($entity);
+        my $pos_ref = $self->{_rel}->{has}->{$entity_index};
+        @positions = grep { $pos_ref->{$_} } @positions;
+    }
+    
+    return map { $self->get_entity($_) } @positions;
+}
+
+=head2 purge_positions
+
+ Title   : purge_positions
+ Usage   : $position_handler->purge_positions();
+ Function: Remove all positions from yourself. You can't be a Position.
+ Returns : n/a
+ Args    : none to remove all, OR
+           Bio::Map::PositionI object to remove only that entity, OR
+           Bio::Map::EntityI object to limit the removal to those Positions that
+           are shared by you and this other entity.
+
+=cut
+
+sub purge_positions {
+    my ($self, $thing) = @_;
+    my $you_index = $self->_get_you_index(0);
+    my $kind = $self->_get_kind;
+    
+    my @pos_indices;
+    if ($thing) {
+        $self->throw("Must supply an object") unless ref($thing);
+        if ($thing->isa("Bio::Map::PositionI")) {
+            @pos_indices = ($self->_get_other_index($thing));
+        }
+        else {
+            my $entity_index = $self->_get_other_index($thing);
+            my $pos_ref = $self->{_rel}->{has}->{$entity_index};
+            @pos_indices = grep { $pos_ref->{$_} } keys %{$self->{_rel}->{has}->{$you_index}};
+        }
+    }
+    else {
+        @pos_indices = keys %{$self->{_rel}->{has}->{$you_index}};
+    }
+    
+    foreach my $pos_index (@pos_indices) {
+        $self->_purge_pos_entity($pos_index, $you_index, $kind);
+    }
+}
+
+=head2 get_other_entities
+
+ Title   : get_other_entities
+ Usage   : my @entities = $position_handler->get_other_entities();
+ Function: Get all the entities that share your Positions. You can't be a
+           Position.
+ Returns : Array of Bio::Map::EntityI objects
+ Args    : none
+
+=cut
+
+sub get_other_entities {
+    my $self = shift;
+    my $you_index = $self->_get_you_index(0);
+    my $kind = $self->_get_kind;
+    my $want = $kind eq 'position_elements' ? 'position_maps' : 'position_elements';
+    
+    my %entities;
+    while (my ($pos_index) = each %{$self->{_rel}->{has}->{$you_index}}) {
+        my $entity_index = $self->{_rel}->{$want}->{$pos_index};
+        $entities{$entity_index} = 1 if $entity_index;
+    }
+    
+    return map { $self->get_entity($_) } keys %entities;
+}
+
+# do basic check on an object, make sure it is the right type
+sub _check_object {
+    my ($self, $object, $interface) = @_;
+    $self->throw("Must supply an arguement") unless $object;
+    $self->throw("This is [$object], not an object") unless ref($object);
+    $self->throw("This is [$object], not a $interface") unless $object->isa($interface);
+}
+
+# get the object we are the handler of, its index, and throw depending on if
+# we're a Position
+sub _get_you_index {
+    my ($self, $should_be_pos) = @_;
+    my $you = $self->{_who};
+    if ($should_be_pos) {
+        $self->throw("This is not a Position, method invalid") unless $you->isa('Bio::Map::PositionI');
+    }
+    else {
+        $self->throw("This is a Position, method invalid") if $you->isa('Bio::Map::PositionI');
+    }
+    return $self->index;
+}
+
+# check an entity is registered and get its index
+sub _get_other_index {
+    my ($self, $entity) = @_;
+    $self->throw("Must supply an object") unless ref($entity);
+    my $index = $entity->get_position_handler->index;
+    $self->throw("Entity doesn't seem like it's been registered") unless $index;
+    $self->throw("Entity may have been registered with a different PositionHandler, can't deal with it") unless $entity eq $self->get_entity($index);
+    return $index;
+}
+
+# which of the position hashes should we be recorded under?
+sub _get_kind {
+    my $self = shift;
+    my $you = $self->{_who};
+    return $you->isa('Bio::Map::MapI') ? 'position_maps' : $you->isa('Bio::Map::MappableI') ? 'position_elements' : $self->throw("This is [$you] which is an unsupported kind of entity");
+}
+
+# get/set position entity
+sub _pos_get_set {
+    my ($self, $entity, $kind, $interface) = @_;
+    my $you_index = $self->_get_you_index(1);
+    
+    my $entity_index;
+    if ($entity) {
+        $self->_check_object($entity, $interface);
+        my $new_entity_index = $self->_get_other_index($entity);
+        $entity_index = $self->_pos_set($you_index, $new_entity_index, $kind);
+    }
+    
+    $entity_index ||= $self->{_rel}->{$kind}->{$you_index} || 0;
+    if ($entity_index) {
+        return $self->get_entity($entity_index);
+    }
+    return;
+}
+
+# set position entity
+sub _pos_set {
+    my ($self, $pos_index, $new_entity_index, $kind) = @_;
+    my $current_entity_index = $self->{_rel}->{$kind}->{$pos_index} || 0;
+    
+    if ($current_entity_index) {
+        if ($current_entity_index == $new_entity_index) {
+            return $current_entity_index;
+        }
+        
+        $self->_purge_pos_entity($pos_index, $current_entity_index, $kind);
+    }
+    
+    $self->{_rel}->{has}->{$new_entity_index}->{$pos_index} = 1;
+    $self->{_rel}->{$kind}->{$pos_index} = $new_entity_index;
+    return $new_entity_index;
+}
+
+# disassociate position from one of its current entities
+sub _purge_pos_entity {
+    my ($self, $pos_index, $entity_index, $kind) = @_;
+    delete $self->{_rel}->{has}->{$entity_index}->{$pos_index};
+    delete $self->{_rel}->{$kind}->{$pos_index};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandler.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandlerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandlerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandlerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,226 @@
+# $Id: PositionHandlerI.pm,v 1.2.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::PositionHandlerI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::PositionHandlerI - A Position Handler Interface
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::Map::PositionHandler for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+This interface describes the basic methods required for Position Handlers. A
+Position Handler copes with the coordination of different Bio::Map::EntityI
+objects, adding and removing them from each other and knowning who belongs to
+who. These relationships between objects are based around shared Positions,
+hence PositionHandler.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::PositionHandlerI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 General methods
+
+=cut
+
+=head2 register
+
+ Title   : register
+ Usage   : $position_handler->register();
+ Function: Ask this Position Handler to look after your entity relationships.
+ Returns : n/a
+ Args    : none
+
+=cut
+
+sub register {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 index
+
+ Title   : index
+ Usage   : my $index = $position_handler->index();
+ Function: Get the unique registry index for yourself, generated during the
+           resistration process.
+ Returns : int
+ Args    : none
+
+=cut
+
+sub index {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_entity
+
+ Title   : get_entity
+ Usage   : my $entity = $position_handler->get_entity($index);
+ Function: Get the entity that corresponds to the supplied registry index.
+ Returns : Bio::Map::EntityI object
+ Args    : int
+
+=cut
+
+sub get_entity {
+        my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 Methods for Bio::Map::PositionI objects
+
+=cut
+
+=head2 map
+
+ Title   : map
+ Usage   : my $map = $position_handler->map();
+           $position_handler->map($map);
+ Function: Get/Set the map you are on. You must be a Position.
+ Returns : L<Bio::Map::MapI>
+ Args    : none to get, OR
+           new L<Bio::Map::MapI> to set
+
+=cut
+
+sub map {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : my $element = $position_handler->element();
+           $position_handler->element($element);
+ Function: Get/Set the map element you are for. You must be a Position.
+ Returns : L<Bio::Map::MappableI>
+ Args    : none to get, OR
+           new L<Bio::Map::MappableI> to set
+
+=cut
+
+sub element {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 Methods for all other Bio::Map::EntityI objects
+
+=cut
+
+=head2 add_positions
+
+ Title   : add_positions
+ Usage   : $position_handler->add_positions($pos1, $pos2, ...);
+ Function: Add some positions to yourself. You can't be a position.
+ Returns : n/a
+ Args    : Array of Bio::Map::PositionI objects
+
+=cut
+
+sub add_positions {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_positions
+
+ Title   : get_positions
+ Usage   : my @positions = $position_handler->get_positions();
+ Function: Get all your positions. You can't be a Position.
+ Returns : Array of Bio::Map::PositionI objects
+ Args    : none for all, OR
+           Bio::Map::EntityI object to limit the Positions to those that
+           are shared by you and this other entity.
+
+=cut
+
+sub get_positions {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 purge_positions
+
+ Title   : purge_positions
+ Usage   : $position_handler->purge_positions();
+ Function: Remove all positions from yourself. You can't be a Position.
+ Returns : n/a
+ Args    : none to remove all, OR
+           Bio::Map::PositionI object to remove only that entity, OR
+           Bio::Map::EntityI object to limit the removal to those Positions that
+           are shared by you and this other entity.
+
+=cut
+
+sub purge_positions {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_other_entities
+
+ Title   : get_other_entities
+ Usage   : my @entities = $position_handler->get_other_entities();
+ Function: Get all the entities that share your Positions. You can't be a
+           Position.
+ Returns : Array of Bio::Map::EntityI objects
+ Args    : none
+
+=cut
+
+sub get_other_entities {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionHandlerI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/PositionI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,910 @@
+# $Id: PositionI.pm,v 1.17.4.4 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::Map::PositionI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::PositionI - Abstracts the notion of a position having a value in the context of a marker and a Map
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::Map::Position for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+This object stores one of the postions that a mappable object
+(e.g. Marker) may have in a map.
+
+Positions can have non-numeric values or other methods to store the locations,
+so they have a method numeric() which does the conversion. numeric()
+returns the position in a form that can be compared between other positions of
+the same type. It is not necessarily a value suitable for sorting positions (it
+may be the distance from the previous position); for that purpose the result of
+sortable() should be used.
+
+A 'position', in addition to being a single point, can also be an area and so
+can be imagined as a range and compared with other positions on the basis of
+overlap, intersection etc.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Lincoln Stein, lstein-at-cshl.org
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::PositionI;
+use strict;
+use Bio::Map::PositionHandler;
+use Bio::Map::Mappable;
+use Scalar::Util qw(looks_like_number);
+
+use base qw(Bio::Map::EntityI Bio::RangeI);
+
+=head2 EntityI methods
+
+ These are fundamental to coordination of Positions and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 get_position_handler
+
+ Title   : get_position_handler
+ Usage   : my $position_handler = $entity->get_position_handler();
+ Function: Gets a PositionHandlerI that $entity is registered with.
+ Returns : Bio::Map::PositionHandlerI object
+ Args    : none
+
+=cut
+
+sub get_position_handler {
+    my $self = shift;
+    unless (defined $self->{_eh}) {
+        my $ph = Bio::Map::PositionHandler->new(-self => $self);
+        $self->{_eh} = $ph;
+        $ph->register;
+    }
+    return $self->{_eh};
+}
+
+=head2 PositionHandlerI-related methods
+
+ These are fundamental to coordination of Positions and other entities, so are
+ implemented at the interface level
+
+=cut
+
+=head2 map
+
+ Title   : map
+ Usage   : my $map = $position->map();
+           $position->map($map);
+ Function: Get/Set the map the position is in.
+ Returns : L<Bio::Map::MapI>
+ Args    : none to get
+           new L<Bio::Map::MapI> to set
+
+=cut
+
+sub map {
+    my ($self, $map) = @_;
+    return $self->get_position_handler->map($map);
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : my $element = $position->element();
+           $position->element($element);
+ Function: Get/Set the element the position is for.
+ Returns : L<Bio::Map::MappableI>
+ Args    : none to get
+           new L<Bio::Map::MappableI> to set
+
+=cut
+
+sub element {
+    my ($self, $element) = @_;
+    return $self->get_position_handler->element($element);
+}
+
+=head2 marker
+
+ Title   : marker
+ Function: This is a synonym of the element() method
+ Status  : deprecated, will be removed in the next version
+
+=cut
+
+*marker = \&element;
+
+=head2 PositionI-specific methods
+
+=cut
+
+=head2 value
+
+ Title   : value
+ Usage   : my $pos = $position->value();
+ Function: Get/Set the value for this position
+ Returns : scalar, value
+ Args    : [optional] new value to set
+
+=cut
+
+sub value {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 numeric
+
+ Title   : numeric
+ Usage   : my $num = $position->numeric;
+ Function: Read-only method that is guaranteed to return a numeric 
+           representation of the start of this position. 
+ Returns : scalar numeric
+ Args    : none to get the co-ordinate normally (see absolute() method), OR
+           Bio::Map::RelativeI to get the co-ordinate converted to be
+           relative to what this Relative describes.
+
+=cut
+
+sub numeric {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 sortable
+
+ Title   : sortable
+ Usage   : my $num = $position->sortable();
+ Function: Read-only method that is guaranteed to return a value suitable
+           for correctly sorting this kind of position amongst other positions
+           of the same kind on the same map. Note that sorting different kinds
+           of position together is unlikely to give sane results.
+ Returns : numeric
+ Args    : none
+
+=cut
+
+sub sortable {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 relative
+
+  Title   : relative
+  Usage   : my $relative = $position->relative();
+            $position->relative($relative);
+  Function: Get/set the thing this Position's coordinates (numerical(), start(),
+            end()) are relative to, as described by a Relative object.
+  Returns : Bio::Map::RelativeI (default is one describing "relative to the
+            start of the Position's map")
+  Args    : none to get, OR
+            Bio::Map::RelativeI to set
+
+=cut
+
+sub relative {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 absolute
+
+  Title   : absolute
+  Usage   : my $absolute = $position->absolute();
+            $position->absolute($absolute);
+  Function: Get/set how this Position's co-ordinates (numerical(), start(),
+            end()) are reported. When absolute is off, co-ordinates are
+            relative to the thing described by relative(). Ie. the value
+            returned by start() will be the same as the value you set start()
+            to. When absolute is on, co-ordinates are converted to be relative
+            to the start of the map.
+
+            So if relative() currently points to a Relative object describing
+            "relative to another position which is 100 bp from the start of
+            the map", this Position's start() had been set to 50 and absolute()
+            returns 1, $position->start() will return 150. If absolute() returns
+            0 in the same situation, $position->start() would return 50.
+
+  Returns : boolean (default 0)
+  Args    : none to get, OR
+            boolean to set
+
+=cut
+
+sub absolute {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 RangeI-based methods
+
+=cut
+
+=head2 start
+
+  Title   : start
+  Usage   : my $start = $position->start();
+            $position->start($start);
+  Function: Get/set the start co-ordinate of this position.
+  Returns : the start of this position
+  Args    : scalar numeric to set, OR
+            none to get the co-ordinate normally (see absolute() method), OR
+            Bio::Map::RelativeI to get the co-ordinate converted to be
+            relative to what this Relative describes.
+
+=cut
+
+=head2 end
+
+  Title   : end
+  Usage   : my $end = $position->end();
+            $position->end($end);
+  Function: Get/set the end co-ordinate of this position.
+  Returns : the end of this position
+  Args    : scalar numeric to set, OR
+            none to get the co-ordinate normally (see absolute() method), OR
+            Bio::Map::RelativeI to get the co-ordinate converted to be
+            relative to what this Relative describes.
+
+=cut
+
+=head2 length
+
+  Title   : length
+  Usage   : $length = $position->length();
+  Function: Get the length of this position.
+  Returns : the length of this position
+  Args    : none
+
+=cut
+
+=head2 strand
+
+  Title   : strand
+  Usage   : $strand = $position->strand();
+  Function: Get the strand of this position; it is always 1 since maps to not
+            have strands.
+  Returns : 1
+  Args    : none
+
+=cut
+
+sub strand {
+    return 1;
+}
+
+=head2 toString
+
+  Title   : toString
+  Usage   : print $position->toString(), "\n";
+  Function: stringifies this range
+  Returns : a string representation of the range of this Position
+  Args    : optional Bio::Map::RelativeI to have the co-ordinates reported
+            relative to the thing described by that Relative
+
+=cut
+
+sub toString {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head1 RangeI-related methods
+
+These methods work by considering only the values of start() and end(), as
+modified by considering every such co-ordinate relative to the start of the map
+(ie. absolute(1) is set temporarily during the calculation), or any supplied
+Relative. For the boolean methods, when the comparison Position is on the same
+map as the calling Position, there is no point supplying a Relative since the
+answer will be the same as without. Relative is most useful when comparing
+Positions on different maps and you have a Relative that describes some special
+place on each map like 'the start of the gene', where the actual start of the
+gene relative to the start of the map is different for each map.
+
+The methods do not consider maps during their calculations - things on different
+maps can overlap/contain/intersect/etc. each other.
+
+The geometrical methods (intersect, union etc.) do things to the geometry of
+ranges, and return Bio::Map::PositionI compliant objects or triplets (start,
+stop, strand) from which new positions could be built. When a PositionI is made
+it will have a map transferred to it if all the arguments shared the same map.
+If a Relative was supplied the result will have that same Relative.
+
+Note that the strand-testing args are there for compatability with the RangeI
+interface. They have no meaning when only using PositionI objects since maps do
+not have strands. Typically you will just set the argument to undef if you want
+to supply the argument after it.
+
+=head2 equals
+
+  Title   : equals
+  Usage   : if ($p1->equals($p2)) {...}
+  Function: Test whether $p1 has the same start, end, length as $p2.
+  Returns : true if they are describing the same position (regardless of map)
+  Args    : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
+                     one to (mandatory)
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+            arg #3 = optional Bio::Map::RelativeI to ask if the Positions
+                     equal in terms of their relative position to the thing
+                     described by that Relative
+
+=cut
+
+sub equals {
+    # overriding the RangeI implementation so we can handle Relative
+    my ($self, $other, $so, $rel) = @_;
+    
+    my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
+    my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
+    
+    return ($self->_testStrand($other, $so) and
+            $own_start == $other_start and $own_end == $other_end);
+}
+
+
+=head2 less_than
+
+ Title   : less_than
+ Usage   : if ($position->less_than($other_position)) {...}
+ Function: Ask if this Position ends before another starts.
+ Returns : boolean
+ Args    : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
+                    one to (mandatory)
+           arg #2 = optional Bio::Map::RelativeI to ask if the Position is less
+                    in terms of their relative position to the thing described
+                    by that Relative
+
+=cut
+
+sub less_than {
+    my ($self, $other, $rel) = @_;
+    
+    my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
+    my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
+    
+    return $own_end < $other_start;
+}
+
+=head2 greater_than
+
+ Title   : greater_than
+ Usage   : if ($position->greater_than($other_position)) {...}
+ Function: Ask if this Position starts after another ends.
+ Returns : boolean
+ Args    : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
+                    one to (mandatory)
+           arg #2 = optional Bio::Map::RelativeI to ask if the Position is
+                    greater in terms of their relative position to the thing
+                    described by that Relative
+
+=cut
+
+sub greater_than {
+    my ($self, $other, $rel) = @_;
+    
+    my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
+    my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
+    
+    return $own_start > $other_end;
+}
+
+=head2 overlaps
+
+  Title   : overlaps
+  Usage   : if ($p1->overlaps($p2)) {...}
+  Function: Tests if $p1 overlaps $p2.
+  Returns : True if the positions overlap (regardless of map), false otherwise
+  Args    : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
+                     one to (mandatory)
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+            arg #3 = optional Bio::Map::RelativeI to ask if the Positions
+                     overlap in terms of their relative position to the thing
+                     described by that Relative
+
+=cut
+
+sub overlaps {
+    # overriding the RangeI implementation so we can handle Relative
+    my ($self, $other, $so, $rel) = @_;
+    
+    my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
+    my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
+    
+    return ($self->_testStrand($other, $so) and not
+            (($own_start > $other_end or $own_end < $other_start)));
+}
+
+=head2 contains
+
+  Title   : contains
+  Usage   : if ($p1->contains($p2)) {...}
+  Function: Tests whether $p1 totally contains $p2.
+  Returns : true if the argument is totally contained within this position
+            (regardless of map), false otherwise
+  Args    : arg #1 = a Bio::RangeI (eg. a Bio::Map::Position) to compare this
+                     one to, or scalar number (mandatory)
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+            arg #3 = optional Bio::Map::RelativeI to ask if the Position
+                     is contained in terms of their relative position to the
+                     thing described by that Relative
+
+=cut
+
+sub contains {
+    # overriding the RangeI implementation so we can handle Relative
+    my ($self, $other, $so, $rel) = @_;
+    
+    my ($own_start, $own_end) = $self->_pre_rangei($self, $rel);
+    my ($other_start, $other_end) = $self->_pre_rangei($other, $rel);
+    
+    return ($self->_testStrand($other, $so) and
+			$other_start >= $own_start and $other_end <= $own_end);
+}
+
+=head2 intersection
+
+ Title   : intersection
+ Usage   : ($start, $stop, $strand) = $p1->intersection($p2)
+           ($start, $stop, $strand) = Bio::Map::Position->intersection(\@positions);
+           $mappable = $p1->intersection($p2, undef, $relative);
+           $mappable = Bio::Map::Position->intersection(\@positions);
+ Function: gives the range that is contained by all ranges
+ Returns : undef if they do not overlap, OR
+           Bio::Map::Mappable object who's positions are the
+           cross-map-calculated intersection of the input positions on all the
+           maps that the input positions belong to, OR, in list context, a three
+           element array (start, end, strand)
+ Args    : arg #1 = [REQUIRED] a Bio::RangeI (eg. a Bio::Map::Position) to
+                    compare this one to, or an array ref of Bio::RangeI
+           arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+           arg #3 = optional Bio::Map::RelativeI to ask how the Positions
+                    intersect in terms of their relative position to the thing
+                    described by that Relative
+
+=cut
+
+sub intersection {
+    # overriding the RangeI implementation so we can transfer map and handle
+    # Relative
+    my ($self, $given, $so, $rel) = @_;
+	$self->throw("missing arg: you need to pass in another argument") unless $given;
+    
+    my @positions;
+    if ($self eq "Bio::Map::PositionI") {
+		$self = "Bio::Map::Position";
+		$self->warn("calling static methods of an interface is deprecated; use $self instead");
+	}
+	if (ref $self) {
+		push(@positions, $self);
+	}
+    ref($given) eq 'ARRAY' ? push(@positions, @{$given}) : push(@positions, $given);
+    $self->throw("Need at least 2 Positions") unless @positions >= 2;
+    
+    my ($intersect, $i_start, $i_end, $c_start, $c_end, %known_maps);
+    while (@positions > 0) {
+        unless ($intersect) {
+            $intersect = shift(@positions);
+            ($i_start, $i_end) = $self->_pre_rangei($intersect, $rel);
+            my $map = $intersect->map;
+            $known_maps{$map->unique_id} = $map;
+        }
+        
+        my $compare = shift(@positions);
+        ($c_start, $c_end) = $self->_pre_rangei($compare, $rel);
+        return unless $compare->_testStrand($intersect, $so);
+        if ($compare->isa('Bio::Map::PositionI')) {
+            my $this_map = $compare->map;
+            if ($this_map) {
+                $known_maps{$this_map->unique_id} = $this_map;
+            }
+        }
+        else {
+            $self->throw("Only Bio::Map::PositionI objects are supported, not [$compare]");
+        }
+        
+        my @starts = sort {$a <=> $b} ($i_start, $c_start);
+        my @ends   = sort {$a <=> $b} ($i_end, $c_end);
+        
+        my $start = pop @starts; # larger of the 2 starts
+        my $end = shift @ends;   # smaller of the 2 ends
+        
+        my $intersect_strand;    # strand for the intersection
+        if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
+            $intersect_strand = $compare->strand;
+        }
+        else {
+            $intersect_strand = 0;
+        }
+        
+        if ($start > $end) {
+            return;
+        }
+        else {
+            $intersect = $self->new(-start  => $start,
+                                    -end    => $end,
+                                    -strand => $intersect_strand);
+        }
+    }
+    
+    $intersect || return;
+    my ($start, $end, $strand) = ($intersect->start, $intersect->end, $intersect->strand);
+    
+    my @intersects;
+    foreach my $known_map (values %known_maps) {
+        my $new_intersect = $intersect->new(-start => $start,
+                                            -end => $end,
+                                            -strand => $strand,
+                                            -map => $known_map);
+        $new_intersect->relative($rel) if $rel;
+        push(@intersects, $new_intersect);
+    }
+    unless (@intersects) {
+        $intersect->relative($rel) if $rel;
+        @intersects = ($intersect);
+    }
+    
+    my $result = Bio::Map::Mappable->new();
+    $result->add_position(@intersects); # sneaky, add_position can take a list of positions
+    return $result;
+}
+
+=head2 union
+
+ Title   : union
+ Usage   : ($start, $stop, $strand) = $p1->union($p2);
+           ($start, $stop, $strand) = Bio::Map::Position->union(@positions);
+           my $mappable = $p1->union($p2);
+           my $mappable = Bio::Map::Position->union(@positions);
+ Function: finds the minimal position/range that contains all of the positions
+ Returns : Bio::Map::Mappable object who's positions are the
+           cross-map-calculated union of the input positions on all the maps
+           that the input positions belong to, OR, in list context, a three
+           element array (start, end, strand)
+ Args    : a Bio::Map::PositionI to compare this one to, or a list of such
+           OR
+           a single Bio::Map::PositionI or array ref of such AND a
+           Bio::Map::RelativeI to ask for the Position's union in terms of their
+           relative position to the thing described by that Relative
+
+=cut
+
+sub union {
+    # overriding the RangeI implementation so we can transfer map and handle
+    # Relative
+    my ($self, @args) = @_;
+    $self->throw("Not enough arguments") unless @args >= 1;
+    
+    my @positions;
+    my $rel;
+    if ($self eq "Bio::Map::PositionI") {
+		$self = "Bio::Map::Position";
+		$self->warn("calling static methods of an interface is deprecated; use $self instead");
+	}
+	if (ref $self) {
+		push(@positions, $self);
+	}
+    if (ref $args[0] eq 'ARRAY') {
+        push(@positions, @{shift(@args)});
+    }
+    else {
+        push(@positions, shift(@args));
+    }
+    if ($args[0] && $args[0]->isa('Bio::Map::RelativeI')) {
+        $rel = shift(@args);
+    }
+    foreach my $arg (@args) {
+        # avoid pushing undefined values into @positions
+        push(@positions, $arg) if $arg;
+    }
+    $self->throw("Need at least 2 Positions") unless @positions >= 2;
+    
+    my (@starts, @ends, %known_maps, $union_strand);
+    foreach my $compare (@positions) {
+        # RangeI union allows start or end to be undefined; however _pre_rangei
+        # will throw
+        my ($start, $end) = $self->_pre_rangei($compare, $rel);
+        
+        if ($compare->isa('Bio::Map::PositionI')) {
+            my $this_map = $compare->map;
+            if ($this_map) {
+                $known_maps{$this_map->unique_id} = $this_map;
+            }
+        }
+        else {
+            $self->throw("Only Bio::Map::PositionI objects are supported, not [$compare]");
+        }
+        
+        if (! defined $union_strand) {
+			$union_strand = $compare->strand;
+		}
+        else {
+			if (! defined $compare->strand or $union_strand ne $compare->strand) {
+				$union_strand = 0;
+			}
+		}
+        
+        push(@starts, $start);
+        push(@ends, $end);
+    }
+    
+	@starts = sort { $a <=> $b } @starts;
+	@ends   = sort { $a <=> $b } @ends;
+	my $start = shift @starts;
+	my $end = pop @ends;
+    
+    my @unions;
+    foreach my $known_map (values %known_maps) {
+        my $new_union = $self->new(-start => $start,
+                                   -end => $end,
+                                   -strand => $union_strand,
+                                   -map => $known_map);
+        $new_union->relative($rel) if $rel;
+        push(@unions, $new_union);
+    }
+    unless (@unions) {
+        @unions = ($self->new(-start => $start,
+                         -end => $end,
+                         -strand => $union_strand));
+        $unions[0]->relative($rel) if $rel;
+    }
+    
+    my $result = Bio::Map::Mappable->new();
+    $result->add_position(@unions); # sneaky, add_position can take a list of positions
+    return $result;
+}
+
+=head2 overlap_extent
+
+ Title   : overlap_extent
+ Usage   : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
+ Function: Provides actual amount of overlap between two different
+           positions
+ Example :
+ Returns : array of values containing the length unique to the calling 
+           position, the length common to both, and the length unique to 
+           the argument position
+ Args    : a position
+
+=cut
+
+#*** should this be overridden from RangeI?
+
+=head2 disconnected_ranges
+
+ Title   : disconnected_ranges
+ Usage   : my @disc_ranges = Bio::Map::Position->disconnected_ranges(@ranges);
+ Function: Creates the minimal set of positions such that each input position is
+           fully contained by at least one output position, and none of the
+           output positions overlap.
+ Returns : Bio::Map::Mappable with the calculated disconnected ranges
+ Args    : a Bio::Map::PositionI to compare this one to, or a list of such,
+           OR
+           a single Bio::Map::PositionI or array ref of such AND a
+           Bio::Map::RelativeI to consider all Position's co-ordinates in terms
+           of their relative position to the thing described by that Relative
+
+=cut
+
+sub disconnected_ranges {
+    # overriding the RangeI implementation so we can transfer map and handle
+    # Relative
+    my ($self, @args) = @_;
+    $self->throw("Not enough arguments") unless @args >= 1;
+    
+    my @positions;
+    my $rel;
+    if ($self eq "Bio::Map::PositionI") {
+		$self = "Bio::Map::Position";
+		$self->warn("calling static methods of an interface is deprecated; use $self instead");
+	}
+	if (ref $self) {
+		push(@positions, $self);
+	}
+    if (ref $args[0] eq 'ARRAY') {
+        push(@positions, @{shift(@args)});
+    }
+    else {
+        push(@positions, shift(@args));
+    }
+    if ($args[0] && $args[0]->isa('Bio::Map::RelativeI')) {
+        $rel = shift(@args);
+    }
+    foreach my $arg (@args) {
+        push(@positions, $arg) if $arg;
+    }
+    $self->throw("Need at least 2 Positions") unless @positions >= 2;
+    
+    my %known_maps;
+    foreach my $pos (@positions) {
+        $pos->isa('Bio::Map::PositionI') || $self->throw("Must supply only Bio::Map::PositionI objects, not [$pos]");
+        my $map = $pos->map || next;
+        $known_maps{$map->unique_id} = $map;
+    }
+    my %prior_positions;
+    foreach my $map (values %known_maps) {
+        foreach my $pos ($map->get_positions) {
+            $prior_positions{$pos} = 1;
+        }
+    }
+    
+    my @outranges = ();
+    foreach my $inrange (@positions) {
+        my @outranges_new = ();
+        my %overlapping_ranges = ();
+        
+        for (my $i=0; $i<@outranges; $i++) {
+            my $outrange = $outranges[$i];
+            if ($inrange->overlaps($outrange, undef, $rel)) {
+                my $union_able = $inrange->intersection($outrange, undef, $rel);
+                foreach my $pos ($union_able->get_positions) {
+                    $overlapping_ranges{$pos->toString} = $pos; # we flatten down to a result on a single map
+                                                                # to avoid creating 10s of thousands of positions during this process;
+                                                                # we then apply the final answer to all maps at the very end
+                }
+            }
+            else {
+                push(@outranges_new, $outrange);
+            }
+        }
+        
+        @outranges = @outranges_new;
+        
+        my @overlappers = values %overlapping_ranges;
+        if (@overlappers) {
+            if (@overlappers > 1) {
+                my $merged_range_able = shift(@overlappers)->union(\@overlappers, $rel);
+                push(@outranges, $merged_range_able->get_positions);
+            }
+            else {
+                push(@outranges, @overlappers);
+            }
+        }
+        else {
+            push(@outranges, $self->new(-start => $inrange->start($rel), -end => $inrange->end($rel), -strand => $inrange->strand, -map => $inrange->map, -relative => $rel));
+        }
+    }
+    
+    # purge positions that were created whilst calculating the answer, but
+    # aren't the final answer and weren't there previously
+    my %answers = map { $_ => 1 } @outranges;
+    foreach my $map (values %known_maps) {
+        foreach my $pos ($map->get_positions) {
+            if (! exists $prior_positions{$pos} && ! exists $answers{$pos}) {
+                $map->purge_positions($pos);
+            }
+        }
+    }
+    
+    my %post_positions;
+    foreach my $map (values %known_maps) {
+        foreach my $pos ($map->get_positions) {
+            $post_positions{$pos} = 1;
+        }
+    }
+    
+    @outranges || return;
+    
+    # make an outrange on all known maps
+    my @final_positions;
+    foreach my $map (values %known_maps) {
+        foreach my $pos (@outranges) {
+            if ($pos->map eq $map) {
+                push(@final_positions, $pos);
+            }
+            else {
+                push(@final_positions, $pos->new(-start => $pos->start,
+                                                 -end => $pos->end,
+                                                 -relative => $pos->relative,
+                                                 -map => $map));
+            }
+        }
+    }
+    
+    # assign the positions to a result mappable
+    my $result = Bio::Map::Mappable->new();
+    $result->add_position(@final_positions); # sneaky, add_position can take a list of positions
+    
+    return $result;
+}
+
+# get start & end suitable for rangeI methods, taking relative into account
+sub _pre_rangei {
+    my ($self, $other, $rel) = @_;
+    $self->throw("Must supply an object") unless $other;
+    if ($rel) {
+        $self->throw("Must supply an object for the Relative argument") unless ref($rel);
+        $self->throw("This is [$rel], not a Bio::Map::RelativeI") unless $rel->isa('Bio::Map::RelativeI');
+    }
+    
+    my ($other_start, $other_end);
+    if (ref($other)) {
+        if (ref($other) eq 'ARRAY') {
+            $self->throw("_pre_rangei got an array");
+        }
+        $self->throw("This is [$other], not a Bio::RangeI object") unless defined $other && $other->isa('Bio::RangeI');
+        
+        if ($other->isa('Bio::Map::PositionI')) {
+            # to get the desired start/end we need the position to be on a map;
+            # if it isn't on one temporarily place it on self's map
+            # - this lets us have 'generic' positions that aren't on any map
+            # but have a relative defined and can thus be usefully compared to
+            # positions that /are/ on maps
+            my $other_map = $other->map;
+            unless ($other_map) {
+                my $self_map = $self->map || $self->throw("Trying to compare two positions but neither had been placed on a map");
+                $other->map($self_map);
+            }
+            
+            # want start and end positions relative to the supplied rel or map start
+            $rel ||= $other->absolute_relative;
+            $other_start = $other->start($rel);
+            $other_end = $other->end($rel);
+            
+            unless ($other_map) {
+                $self->map->purge_positions($other);
+            }
+        }
+        else {
+            $other_start = $other->start;
+            $other_end = $other->end;
+        }
+    }
+    else {
+        $self->throw("not a number") unless looks_like_number($other);
+        $other_start = $other_end = $other;
+    }
+    
+	$other->throw("start is undefined") unless defined $other_start;
+	$other->throw("end is undefined") unless defined $other_end;
+    
+    return ($other_start, $other_end);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Relative.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/Relative.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/Relative.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,297 @@
+# $Id: Relative.pm,v 1.1.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::Relative
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::Relative - Represents what a Position's coordiantes are relative to.
+
+=head1 SYNOPSIS
+
+    # Get a Bio::Map::PositionI somehow
+    my $pos = new Bio::Map::Position(-value => 100);
+
+    # its co-ordinates are implicitly relative to the start of its map
+    my $implicit_relative = $pos->relative;
+    my $type = $implicit_relative->type; # $type eq 'map'
+    my $value = $implicit_relative->$type(); # $value == 0
+
+    # make its co-ordinates relative to another Position
+    my $pos_we_are_relative_to = new Bio::Map::Position(-value => 200);
+    my $relative = new Bio::Map::Relative(-position => $pos_we_are_relative_to);
+    $pos->relative($relative);
+
+    # Get the start co-ordinate of $pos relative to $pos_we_are_relative_to
+    my $start = $pos->start; # $start == 100
+
+    # Get the start co-ordinate of $pos relative to the start of the map
+    my $abs_start = $relative->absolute_conversion($pos); # $abs_start == 300
+    # - or -
+    $pos->absolute(1);
+    my $abs_start = $pos->start; # $abs_start == 300
+    $pos->absolute(0);
+
+    # Get the start co-ordinate of $pos relative to a third Position
+    my $pos_frame_of_reference = new Bio::Map::Position(-value => 10);
+    my $relative2 = new Bio::Map::Relative(-position => $pos_frame_of_reference);
+    my $start = $pos->start($relative2); # $start == 290
+
+=head1 DESCRIPTION
+
+A Relative object is used to describe what the co-ordinates (numerical(),
+start(), end()) of a Position are relative to. By default they are
+implicitly assumed to be relative to the start of the map the Position is on.
+But setting the relative() of a Position to one of these objects lets us
+define otherwise.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::Relative;
+use strict;
+use Scalar::Util qw(looks_like_number);
+
+use base qw(Bio::Root::Root Bio::Map::RelativeI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $relative = new Bio::Map::Relative();
+ Function: Build a new Bio::Map::Relative object.
+ Returns : Bio::Map::Relative object
+ Args    : -map => int           : coordinates are relative to this point on the
+                                   Position's map [default is map => 0, ie.
+                                   relative to the start of the map],
+           -element => Mappable  : or relative to this element's (a
+                                   Bio::Map::MappableI) position in the map
+                                   (only works if the given element has only one
+                                   position in the map the Position belongs to),
+           -position => Position : or relative to this other Position (a
+                                   Bio::Map::PositionI, fails if the other
+                                   Position is on a different map to this map)
+
+           -description => string: Free text description of what this relative
+                                   describes
+
+           (To say a Position is relative to something and upstream of it,
+            the Position's start() co-ordinate should be set negative)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($map, $element, $position, $desc) = 
+	$self->_rearrange([qw( MAP ELEMENT POSITION DESCRIPTION )], @args);
+    
+    if (defined($map) + defined($element) + defined($position) > 1) {
+        $self->throw("-map, -element and -position are mutually exclusive");
+    }
+    
+    defined($map) && $self->map($map);
+    $element && $self->element($element);
+    $position && $self->position($position);
+    $desc && $self->description($desc);
+    
+    return $self;
+}
+
+=head2 absolute_conversion
+
+ Title   : absolute_conversion
+ Usage   : my $absolute_coord = $relative->absolute_conversion($pos);
+ Function: Convert the start co-ordinate of the supplied position into a number
+           relative to the start of its map.
+ Returns : scalar number
+ Args    : Bio::Map::PositionI object
+
+=cut
+
+sub absolute_conversion {
+    my ($self, $pos) = @_;
+    $self->throw("Must supply an object") unless ref($pos);
+    $self->throw("This is [$pos], not a Bio::Map::PositionI") unless $pos->isa('Bio::Map::PositionI');
+    
+    # get the raw start position of our position
+    my $prior_abs = $pos->absolute;
+    $pos->absolute(0) if $prior_abs;
+    my $raw = $pos->start;
+    $pos->absolute($prior_abs) if $prior_abs;
+    $self->throw("Can't convert co-ordinates when start isn't set") unless defined($raw); #*** needed? return undef?
+    
+    # what are we relative to?
+    my $type = $self->type;
+    my $value = $self->$type;
+    $self->throw("Details not yet set for this Relative, cannot convert") unless $type && defined($value);
+    
+    # get the absolute start of the thing we're relative to
+    my $map = $pos->map;
+    if ($type eq 'element') {
+        $self->throw("Relative to a Mappable, but the Position has no map") unless $map;
+        my @positions = $value->get_positions($map);
+        $value = shift(@positions);
+        $self->throw("Relative to a Mappable, but this Mappable has no positions on the supplied Position's map") unless $value;
+    }
+    if (ref($value)) {
+        # psuedo-recurse
+        my $rel = $value->relative;
+        $value = $rel->absolute_conversion($value);
+    }
+    
+    if (defined($value)) {
+        return $value + $raw;
+    }
+    return;
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $relative->type();
+ Function: Get the type of thing we are relative to. The types correspond
+           to a method name, so the value of what we are relative to can
+           subsequently be found by $value = $relative->$type;
+
+           Note that type is set by the last method that was set, or during
+           new().
+
+ Returns : the string 'map', 'element' or 'position', or undef
+ Args    : none
+
+=cut
+
+sub type {
+    my $self = shift;
+    return $self->{_use} || return;
+}
+
+=head2 map
+
+ Title   : map
+ Usage   : my $int = $relative->map();
+           $relative->map($int);
+ Function: Get/set the distance from the start of the map that the Position's
+           co-ordiantes are relative to.
+ Returns : int
+ Args    : none to get, OR
+           int to set; a value of 0 means relative to the start of the map.
+
+=cut
+
+sub map {
+    my ($self, $num) = @_;
+    if (defined($num)) {
+        $self->throw("This is [$num], not a number") unless looks_like_number($num);
+        $self->{_use} = 'map';
+        $self->{_map} = $num;
+    }
+    return defined($self->{_map}) ? $self->{_map} : return;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : my $element = $relative->element();
+           $relative->element($element);
+ Function: Get/set the map element (Mappable) the Position is relative to. If
+           the Mappable has more than one Position on the Position's map, we
+           will be relative to the Mappable's first Position on the map.
+ Returns : Bio::Map::MappableI
+ Args    : none to get, OR
+           Bio::Map::MappableI to set
+
+=cut
+
+sub element {
+    my ($self, $element) = @_;
+    if ($element) {
+        $self->throw("Must supply an object") unless ref($element);
+        $self->throw("This is [$element], not a Bio::Map::MappableI") unless $element->isa('Bio::Map::MappableI');
+        $self->{_use} = 'element';
+        $self->{_element} = $element;
+    }
+    return $self->{_element} || return;
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : my $position = $relative->position();
+           $relative->position($position);
+ Function: Get/set the Position your Position is relative to. Your Position
+           will be made relative to the start of this supplied Position. It
+           makes no difference what maps the Positions are on.
+ Returns : Bio::Map::PositionI
+ Args    : none to get, OR
+           Bio::Map::PositionI to set
+
+=cut
+
+sub position {
+    my ($self, $pos) = @_;
+    if ($pos) {
+        $self->throw("Must supply an object") unless ref($pos);
+        $self->throw("This is [$pos], not a Bio::Map::PositionI") unless $pos->isa('Bio::Map::PositionI');
+        $self->{_use} = 'position';
+        $self->{_position} = $pos;
+    }
+    return $self->{_position} || return;
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : my $description = $relative->description();
+           $relative->description($description);
+ Function: Get/set a textual description of what this relative describes.
+ Returns : string
+ Args    : none to get, OR
+           string to set
+
+=cut
+
+sub description {
+    my $self = shift;
+    if (@_) { $self->{desc} = shift }
+    return $self->{desc} || '';
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/Relative.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/RelativeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/RelativeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/RelativeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,179 @@
+# $Id: RelativeI.pm,v 1.1.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::RelativeI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::RelativeI - Interface for describing what a Position's coordiantes are
+                      relative to.
+
+=head1 SYNOPSIS
+
+    # do not use this module directly
+    # See Bio::Map::Relative for an example of
+    # implementation.
+
+=head1 DESCRIPTION
+
+A Relative object is used to describe what the co-ordinates (numerical(),
+start(), end()) of a Position are relative to. By default they are
+implicitly assumed to be relative to the start of the map the Position is on.
+But setting the relative() of a Position to one of these objects lets us
+define otherwise.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::RelativeI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 absolute_conversion
+
+ Title   : absolute_conversion
+ Usage   : my $absolute_coord = $relative->absolute_conversion($pos);
+ Function: Convert the start co-ordinate of the supplied position into a number
+           relative to the start of its map.
+ Returns : scalar number
+ Args    : Bio::Map::PositionI object
+
+=cut
+
+sub absolute_conversion {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $relative->type();
+ Function: Get the type of thing we are relative to. The types correspond
+           to a method name, so the value of what we are relative to can
+           subsequently be found by $value = $relative->$type;
+
+           Note that type is set by the last method that was set, or during
+           new().
+
+ Returns : the string 'map', 'element' or 'position', or undef
+ Args    : none
+
+=cut
+
+sub type {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 map
+
+ Title   : map
+ Usage   : my $int = $relative->map();
+           $relative->map($int);
+ Function: Get/set the distance from the start of the map that the Position's
+           co-ordiantes are relative to.
+ Returns : int
+ Args    : none to get, OR
+           int to set; a value of 0 means relative to the start of the map.
+
+=cut
+
+sub map {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : my $element = $relative->element();
+           $relative->element($element);
+ Function: Get/set the map element (Mappable) the Position is relative to. If
+           the Mappable has more than one Position on the Position's map, we
+           will be relative to the Mappable's first Position on the map.
+ Returns : Bio::Map::MappableI
+ Args    : none got get, OR
+           Bio::Map::MappableI to set
+
+=cut
+
+sub element {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 position
+
+ Title   : position
+ Usage   : my $position = $relative->position();
+           $relative->position($position);
+ Function: Get/set the Position your Position is relative to. Your Position
+           will be made relative to the start of this supplied Position. It
+           makes no difference what maps the Positions are on.
+ Returns : Bio::Map::PositionI
+ Args    : none got get, OR
+           Bio::Map::PositionI to set
+
+=cut
+
+sub position {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : my $description = $relative->description();
+           $relative->description($description);
+ Function: Get/set a textual description of what this relative describes.
+ Returns : string
+ Args    : none to get, OR
+           string to set
+
+=cut
+
+sub description {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Map/RelativeI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Map/SimpleMap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Map/SimpleMap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Map/SimpleMap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,355 @@
+# $Id: SimpleMap.pm,v 1.17.4.2 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Map::SimpleMap
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Map::SimpleMap - A MapI implementation handling the basics of a Map 
+
+=head1 SYNOPSIS
+
+    use Bio::Map::SimpleMap;
+
+    my $map = new Bio::Map::SimpleMap(-name => 'genethon',
+				      -type => 'Genetic',
+				      -units=> 'cM',
+				      -species => $human);
+
+    foreach my $marker ( @markers ) { # get a list of markers somewhere
+		$map->add_element($marker);
+    }
+
+    foreach my $marker ($map->get_elements) {
+        # do something with this Bio::Map::MappableI
+    }
+
+=head1 DESCRIPTION
+
+This is the basic implementation of a Bio::Map::MapI. It handles the
+essential storage of name, species, type, and units.
+
+It knows which map elements (mappables) belong to it, and their
+position.
+
+Subclasses might need to redefine or hardcode type(), length() and
+units().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho heikki-at-bioperl-dot-org
+Lincoln Stein      lstein at cshl.org
+Sendu Bala         bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Map::SimpleMap;
+use vars qw($MAPCOUNT);
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Map::MapI);
+BEGIN { $MAPCOUNT = 1; }
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Map::SimpleMap();
+ Function: Builds a new Bio::Map::SimpleMap object
+ Returns : Bio::Map::SimpleMap
+ Args    : -name    => name of map (string)
+           -species => species for this map (Bio::Species) [optional]
+           -units   => map units (string)
+           -uid     => Unique Id [defaults to a unique integer]
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'_name'}     = '';
+  $self->{'_species'}  = '';
+  $self->{'_units'}    = '';
+  $self->{'_type'}    = '';
+  $self->{'_uid'} = $MAPCOUNT++;
+  my ($name, $type,$species, $units,$uid) = $self->_rearrange([qw(NAME TYPE
+					      SPECIES UNITS
+					      UID)], @args);
+  defined $name     && $self->name($name);
+  defined $species  && $self->species($species);
+  defined $units    && $self->units($units);
+  defined $type     && $self->type($type);
+  defined $uid      && $self->unique_id($uid);
+    
+  return $self;
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : my $species = $map->species;
+ Function: Get/Set Species for a map
+ Returns : Bio::Taxon object or string
+ Args    : (optional) Bio::Taxon or string
+
+=cut
+
+sub species{
+   my ($self,$value) = @_;
+   if( defined $value ) {
+       $self->{'_species'} = $value;
+   }
+   return $self->{'_species'};
+}
+
+=head2 units
+
+ Title   : units
+ Usage   : $map->units('cM');
+ Function: Get/Set units for a map
+ Returns : units for a map
+ Args    : units for a map (string)
+
+=cut
+
+sub units{
+   my ($self,$value) = @_;
+   if( defined $value ) {
+       $self->{'_units'} = $value;
+   }
+   return $self->{'_units'};
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $map->type
+ Function: Get/Set Map type
+ Returns : String coding map type
+ Args    : (optional) string
+
+=cut
+
+sub type {
+   my ($self,$value) = @_;
+   # this may be hardcoded/overriden by subclasses
+
+   if( defined $value ) {
+       $self->{'_type'} = $value;
+   }
+   return $self->{'_type'};
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $map->name
+ Function: Get/Set Map name
+ Returns : Map name
+ Args    : (optional) string
+
+=cut
+
+sub name {
+   my ($self,$value) = @_;
+   if( defined $value ) {
+       $self->{'_name'} = $value;
+   }
+   return $self->{'_name'};
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $length = $map->length();
+ Function: Retrieves the length of the map.
+           It is possible for the length to be unknown for maps such as
+           Restriction Enzyme, will return 0 in that case.
+ Returns : integer representing length of map in current units
+           will return 0 if length is not calculateable
+ Args    : none
+
+=cut
+
+sub length {
+	my $self = shift;
+    
+	my $len = 0;
+    foreach my $element ($self->get_elements) {
+        foreach my $pos ($element->get_positions($self)) {
+            if ($pos->value) {
+                $len = $pos->end if $pos->end > $len;
+            }
+        }
+	}
+    
+	return $len;
+}
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $map->unique_id;
+ Function: Get/Set the unique ID for this map
+ Returns : a unique identifier
+ Args    : [optional] new identifier to set
+
+=cut
+
+sub unique_id {
+   my ($self,$id) = @_;
+   if( defined $id ) {
+       $self->{'_uid'} = $id;
+   }
+   return $self->{'_uid'};
+}
+
+=head2 add_element
+
+ Title   : add_element
+ Usage   : $map->add_element($element)
+ Function: Tell a Bio::Map::MappableI object its default Map is this one; same
+           as calling $element->default_map($map).
+
+		   *** does not actually add the element to this map! ***
+
+ Returns : none
+ Args    : Bio::Map::MappableI object
+ Status  : Deprecated, will be removed in next version
+
+=cut
+
+sub add_element {
+    my ($self, $element) = @_;
+    return unless $element;
+	
+    $self->throw("This is not a Bio::Map::MappableI object but a [$element]")
+	unless $element->isa('Bio::Map::MappableI');
+	
+    $element->default_map($self);
+}
+
+=head2 get_elements
+
+ Title   : get_elements
+ Usage   : my @elements = $map->get_elements;
+ Function: Retrieves all the elements on a map (unordered unless all elements
+           have just 1 position on the map, in which case sorted)
+ Returns : Array of Map elements (L<Bio::Map::MappableI>)
+ Args    : none
+
+=cut
+
+sub get_elements {
+	my $self = shift;
+	
+    my @elements = $self->SUPER::get_elements;
+    
+	# for backward compatability with MapIO tests, and for 'niceness', when
+	# there is only 1 position per element we will return the elements in
+	# order, as long as the positions have values set
+    my $only_1 = 1;
+    foreach my $element (@elements) {
+        my @positions = $element->get_positions($self);
+        if (@positions > 1 || (@positions == 1 && ! $positions[0]->value)) {
+            $only_1 = 0;
+        }
+    }
+	if ($only_1) {
+		@elements = map { $_->[1] }
+                    sort { $a->[0] <=> $b->[0] }
+                    map { [${[$_->get_positions($self)]}[0]->sortable, $_] }
+                    @elements;
+	}
+	
+	return @elements;
+}
+
+=head2 each_element
+
+ Title   : each_element
+ Function: Synonym of the get_elements() method.
+ Status  : deprecated, will be removed in the next version
+
+=cut
+
+*each_element = \&get_elements;
+
+=head2 purge_element
+
+ Title   : purge_element
+ Usage   : $map->purge_element($element)
+ Function: Purge an element from the map.
+ Returns : none
+ Args    : Bio::Map::MappableI object
+
+=cut
+
+sub purge_element {
+    my ($self, $element) = @_;
+    $self->throw("Must supply an argument") unless $element;
+    $self->throw("This is [$element], not an object") unless ref($element);
+    $self->throw("This is [$element], not a Bio::Map::MappableI object") unless $element->isa('Bio::Map::MappableI');
+	
+	$self->purge_positions($element);
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $map->annotation($an_col);
+           my $an_col = $map->annotation();
+ Function: Get the annotation collection (see Bio::AnnotationCollectionI)
+           for this annotatable object.
+ Returns : a Bio::AnnotationCollectionI implementing object, or undef
+ Args    : none to get, OR
+           a Bio::AnnotationCollectionI implementing object to set
+
+=cut
+
+sub annotation {
+    my $self = shift;
+    if (@_) { $self->{_annotation} = shift }
+    return $self->{_annotation} || return;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/fpc.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/fpc.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/fpc.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,528 @@
+# fpc.pm,v 1.2.2.1 2005/10/09 15:16:27 jason Exp
+#
+# BioPerl module for Bio::MapIO::fpc
+#
+# Cared for by Gaurav Gupta <gaurav at genome.arizona.edu>
+#
+# Copyright AGCoL
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::MapIO::fpc - A FPC Map reader
+
+=head1 SYNOPSIS
+
+# do not use this object directly it is accessed through the Bio::MapIO system
+
+    use Bio::MapIO;
+
+     -format  : specifies the format of the file format is "fpc",
+     -file    : specifies the name of the .fpc file
+     -readcor : boolean argument, indicating if .cor is to be read
+                 or not. It looks for the .cor file in the same path
+                 as .fpc file.
+                 0 : doesn't read .cor file
+                 1 : reads the .cor file
+                 [default 0]
+     -verbose : indicates the process of loading of fpc file
+    my $mapio = new Bio::MapIO(-format  => "fpc",
+                               -file    => "rice.fpc",
+                               -readcor => 0,
+                               -verbose => 0);
+
+    my $map = $mapio->next_map();
+
+    foreach my $marker ( $map->each_markerid() ) {
+         # loop through the markers associated with the map
+         # likewise for contigs, clones, etc.
+    }
+
+
+=head1 DESCRIPTION
+
+This object contains code for parsing and processing FPC files and creating
+L<Bio::Map::Physical> object from it.
+
+For faster access and better optimization, the data is stored internally in
+hashes. The corresponding objects are created on request.
+
+We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
+too much overhead.
+
+=cut
+
+# Let the code begin...
+
+package Bio::MapIO::fpc;
+use strict;
+use POSIX;
+
+use Bio::Map::Physical;
+use Bio::Map::Clone;
+use Bio::Map::Contig;
+use Bio::Map::FPCMarker;
+use Bio::Range;
+
+use base qw(Bio::MapIO);
+
+my $_readcor;
+
+=head1 Initializer
+
+=head2 _initialize
+
+ Title   : _initialize
+ Usage   : called implicitly
+ Function: calls the SUPER::_initialize
+ Returns : nothing
+ Args    : species, readcor
+
+=cut
+
+sub _initialize{
+    my ($self, at args) = @_;
+    my $species;
+    $self->SUPER::_initialize(@args);
+    ($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
+    $_readcor = 0 unless (defined($_readcor));
+}
+
+=head1 Access Methods
+
+These methods let you get and set the member variables
+
+=head2 next_map
+
+ Title   : next_map
+ Usage   : my $fpcmap = $mapio->next_map();
+ Function: gets the fpcmap from MapIO
+ Returns : object of type L<Bio::Map::MapI>
+ Args    : none
+
+=cut
+
+sub next_map{
+
+    my ($self) = @_;
+
+    my $line;
+    my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
+        $bandcnt,$marker,$seqclone);
+    my ($corfile,$corindex,$BUFFER);
+    my @cordata;
+    my %fpcmarker;
+    my ($contig, $contigNumber);
+    my $curClone  = 0;
+    my $curMarker = 0;
+    my $curContig = 0;
+    my %_clones;
+    my %_markers;
+    my %_contigs;
+    my $ctgzeropos = 1;
+
+    my $map = new Bio::Map::Physical('-units' => 'CB',
+                                     '-type'  => 'physical');
+
+    my $filename = $self->file();
+    my $fh = $self->{'_filehandle'};
+
+    if (defined($_readcor)) {
+        $map->core_exists($_readcor);
+    }
+    else {
+        $map->core_exists(0);
+    }
+
+    if ($map->core_exists()) {
+        $corfile = substr($filename,0,length($filename)-3)."cor";
+        if (open(CORE,$corfile)) {
+            while(read(CORE,$BUFFER,2)) {
+                push(@cordata,unpack('n*', $BUFFER));
+            }
+        }
+        else {
+            $map->core_exists(0);
+        }
+    }
+
+    ## Read in the header
+    while (defined($line = <$fh>)) {
+        chomp($line);
+
+        if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
+        if ($line =~ m{^//\s+([\d.]+)}) {
+            my $version = $1;
+            $version =~ /((\d+)\.(\d+))(.*)/;
+            $map->version($1);
+            if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
+        }
+
+        if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
+        {
+            $map->group_type($3) if ($2 eq "Label");
+            $map->group_abbr($5) if ($4 eq "Abbrev");
+        }
+
+        last unless ($line =~ m{^//});
+    }
+
+    if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
+        $map->group_type("Chromosome");
+        $map->group_abbr("Chr");
+    }
+
+    $_contigs{0}{'range'}{'end'}   = 0;
+    $_contigs{0}{'range'}{'start'} = 0;
+
+    ## Read in the clone data
+    while (defined($line = <$fh>)) {
+        $marker = 0;
+        $contig = 0;
+        $seqclone = 0;
+        $contigNumber = 0;
+
+        my ($type,$name);
+        my (@amatch, at pmatch, at ematch);
+
+        my $bandsread = 0;
+
+        last if ($line =~ /^Markerdata/);
+
+
+        $line =~ /^(\w+)\s+:\s+"(.+)"/;
+
+        ## these will be set if we did find the clone line
+        ($type, $name) = ($1, $2);
+
+        if ($name =~ /sd1/) {
+            $seqclone = 1;
+        }
+
+        $_clones{$name}{'type'} = $type;
+        $_clones{$name}{'contig'} = 0;
+        $_contigs{'0'}{'clones'}{$name}  = 0;
+
+        my $temp;
+
+        ## Loop through the following lines, getting attributes for clone
+        while (defined($line = <$fh>) && $line !~ /^\s*\n$/)  {
+
+            if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/)  {
+                $_clones{$name}{'contig'} = $1;
+                $_contigs{$1}{'clones'}{$name} = 0;
+
+                delete($_contigs{'0'}{'clones'}{$name});
+
+                $temp = $3;
+                $contigNumber = $1;
+                $line = <$fh>;
+                $line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
+                $_clones{$name}{'range'}{'start'} = $temp;
+
+                $_contigs{$contigNumber}{'range'}{'start'} = $temp
+                    if (!exists($_contigs{$contigNumber}{'range'}{'start'})
+                        || $_contigs{$contigNumber}{'range'}{'start'}
+                        >  $temp );
+
+                $_clones{$name}{'range'}{'end'} = $3;
+
+                $_contigs{$contigNumber}{'range'}{'end'} = $3
+                    if (!exists($_contigs{$contigNumber}{'range'}{'end'})
+                        || $_contigs{$contigNumber}{'range'}{'end'} < $3 );
+
+            }
+            elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
+                my $matchtype = "match" . lc(substr($1, 0, 1));
+                $_clones{$name}{$matchtype}{$2} = 0;
+            }
+            elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
+                $_clones{$name}{'markers'}{$2} = 0;
+                $_markers{$2}{'clones'}{$name} = 0;
+                $_markers{$2}{'type'} = $1;
+                $_markers{$2}{'contigs'}{$contigNumber} = 0;
+                $_contigs{$contigNumber}{'markers'}{$2} = 0;
+            }
+            elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
+                my $i = 0;
+                my @numbands;
+                $bandsread = 1;
+
+                if ($map->core_exists()) {
+                    while($i<$2){
+                        push(@numbands,$cordata[($1-1)+$i]);
+                        $i++;
+                    }
+                    $_clones{$name}{'bands'} = \@numbands;
+                }
+                else {
+                    push(@numbands,$1,$2);
+                    $_clones{$name}{'bands'} = \@numbands;
+                }
+                if (exists($_contigs{0}{'clones'}{$name})) {
+                    $_clones{$name}{'range'}{'start'} = $ctgzeropos;
+                    $_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
+                    $_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
+                    $ctgzeropos += $2;
+                }
+            }
+            elsif ($line =~ /^Gel_number\s+(.+)/) {
+                $_clones{$name}{'gel'} = $1;
+            }
+            elsif ($line =~ /^Remark\s+"(.+)"/)  {
+                $_clones{$name}{'remark'} .= $1;
+                $_clones{$name}{'remark'} .= "\n";
+                if($seqclone == 1 ) {
+                    if( $1 =~ /\,\s+Chr(\d+)\s+/){
+                        $_clones{$name}{'group'} = $1;
+                    }
+                }
+            }
+            elsif ($line =~ /^Fp_number\s+"(.+)"/) {
+                $_clones{$name}{'fp_number'} = $1;
+            }
+            elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
+                $_clones{$name}{'sequence_type'} = $1;
+                $_clones{$name}{'sequence_status'} = $2;
+            }
+            elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
+                $_clones{$name}{'fpc_remark'} .= $1;
+                $_clones{$name}{'fpc_remark'} .= "\n";
+            }
+        }
+
+        $curClone++;
+        print "Adding clone $curClone...\n\r"
+            if ($self->verbose()  && $curClone % 1000 == 0);
+    }
+
+    $map->_setCloneRef(\%_clones);
+    $line = <$fh>;
+
+    while (defined($line = <$fh>) && $line !~ /Contigdata/) {
+        my ($type,$name);
+
+        last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
+
+        ($type, $name) = ($1, $2);
+
+        $_markers{$name}{'type'}   = $type;
+        $_markers{$name}{'group'}  = 0;
+        $_markers{$name}{'global'} = 0;
+        $_markers{$name}{'anchor'} = 0;
+
+        while (defined($line = <$fh>) && $line !~ /^\s*\n$/)  {
+            if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/)  {
+                my $position = $1 - floor($1/1000)*1000;
+                $position = sprintf("%.2f",$position);
+
+                $_markers{$name}{'global'} = $position;
+                $_markers{$name}{'group'}  = floor($1/1000);
+                $_markers{$name}{'anchor'} = 1;
+
+                if(defined($2)) {
+                    $_markers{$name}{'framework'} = 1;
+                }
+                else {
+                    $_markers{$name}{'framework'} = 0;
+                }
+            }
+            elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
+                my $grpmatch = $1;
+                my $grptype  = $map->group_type();
+
+                $grpmatch =~ /(\d+|\w)(.*)/;
+
+                my ($group,$subgroup);
+                $group    = $1;
+                $subgroup = $2;
+
+                $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
+
+                $_markers{$name}{'group'}      = $group;
+                $_markers{$name}{'subgroup'}   = $subgroup;
+            }
+            elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
+                $_markers{$name}{'global'}  = $1;
+                $_markers{$name}{'anchor'}  = 1;
+
+                if ($2 eq 'F') {
+                    $_markers{$name}{'framework'} = 1;
+                }
+                else {
+                    $_markers{$name}{'framework'} = 0;
+                }
+            }
+            elsif ($line =~ /^anchor$/) {
+                $_markers{$name}{'anchor'} = 1;
+            }
+            elsif ($line =~ /^Remark\s+"(.+)"/)  {
+                $_markers{$name}{'remark'} .= $1;
+                $_markers{$name}{'remark'} .= "\n";
+            }
+        }
+        $curMarker++;
+        print "Adding Marker $curMarker...\n"
+            if ($self->verbose() && $curMarker % 1000 == 0);
+    }
+
+    $map->_setMarkerRef(\%_markers);
+
+    my $ctgname;
+    my $grpabbr = $map->group_abbr();
+    my $chr_remark;
+
+    $_contigs{0}{'group'} = 0;
+
+    while (defined($line = <$fh>)) {
+
+        if ($line =~ /^Ctg(\d+)/) {
+            $ctgname = $1;
+            $_contigs{$ctgname}{'group'}      = 0;
+            $_contigs{$ctgname}{'anchor'}     = 0;
+            $_contigs{$ctgname}{'position'}   = 0;
+
+            if ($line =~ /#\w*(.*)\w*$/) {
+                $_contigs{$ctgname}{'remark'} = $1;
+                if ($line =~ /#\s+Chr(\d+)\s+/) {
+                    $_contigs{$ctgname}{'group'}  = $1;
+                    $_contigs{$ctgname}{'anchor'} = 1;
+                }
+            }
+        }
+        elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
+
+            $_contigs{$ctgname}{'anchor'}     = 1;
+            $_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
+
+            if (defined($2)) {
+                $_contigs{$ctgname}{'group'}  = $2;
+            }
+            else {
+                $_contigs{$ctgname}{'group'}  = "?";
+            }
+        }
+        elsif ($line =~ /^User_remark\s+"(.+)"/) {
+            $_contigs{$ctgname}{'usr_remark'} = $1;
+        }
+        elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
+            $_contigs{$ctgname}{'trace_remark'} = $1;
+        }
+        elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
+        {
+            my $grpmatch = $2;
+            my $pos = $10;
+            if ($pos eq "NaN") {
+                $pos = 0;
+                print "Warning: Nan encountered for Contig position \n";
+            }
+            $_contigs{$ctgname}{'chr_remark'}   = $6;
+            $_contigs{$ctgname}{'position'} = $pos;
+            $_contigs{$ctgname}{'subgroup'} = 0;
+
+            if (defined($grpmatch)) {
+                $_contigs{$ctgname}{'anchor'} = 1;
+
+                if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
+
+                    my ($group,$subgroup);
+                    $group    = $2 if($grpabbr eq "Chr");
+                    $subgroup = $3 if($grpabbr eq "Chr");
+
+                    $group    = $7 if($grpabbr eq "Lg");
+                    $subgroup = $8 if($grpabbr eq "Lg");
+
+                    $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
+                    $_contigs{$ctgname}{'group'}     = $group;
+                    $_contigs{$ctgname}{'subgroup'}  = $subgroup;
+
+                }
+                else {
+                    $_contigs{$ctgname}{'group'} = $grpmatch;
+                }
+            }
+            else {
+                $_contigs{$ctgname}{'anchor'} = 1;
+                $_contigs{$ctgname}{'group'}  = "?";
+            }
+        }
+        $curContig++;
+        print "Adding Contig $curContig...\n"
+            if ($self->verbose() && $curContig % 100 == 0);
+    }
+
+    $map->_setContigRef(\%_contigs);
+    $map->_calc_markerposition();
+    $map->_calc_contigposition() if ($map->version() < 7.0);
+    $map->_calc_contiggroup() if ($map->version() == 4.6);
+
+    return $map;
+}
+
+
+=head2 write_map
+
+ Title   : write_map
+ Usage   : $mapio->write_map($map);
+ Function: Write a map out
+ Returns : none
+ Args    : Bio::Map::MapI
+
+=cut
+
+sub write_map{
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+1;
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gaurav Gupta
+
+Email gaurav at genome.arizona.edu
+
+=head1 PROJECT LEADERS
+
+Jamie Hatfield            jamie at genome.arizona.edu
+
+Dr. Cari Soderlund        cari at genome.arizona.edu
+
+=head1 PROJECT DESCRIPTION
+
+The project was done in Arizona Genomics Computational Laboratory
+(AGCoL) at University of Arizona.
+
+This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
+for the Computation and Display of Physical Mapping Data".
+
+For more information on this project, please refer:
+  http://www.genome.arizona.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/mapmaker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/mapmaker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/MapIO/mapmaker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: mapmaker.pm,v 1.10.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::MapIO::mapmaker
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::MapIO::mapmaker - A Mapmaker Map reader
+
+=head1 SYNOPSIS
+
+# do not use this object directly it is accessed through the Bio::MapIO system 
+
+    use Bio::MapIO;
+    my $mapio = new Bio::MapIO(-format => "mapmaker",
+			                      -file   => "mapfile.map");
+    while ( my $map = $mapio->next_map ) {  # get each map
+	    foreach my $marker ( $map->each_element ) {
+	       # loop through the markers associated with the map
+	    }
+    }
+
+=head1 DESCRIPTION
+
+This object contains code for parsing and processing Mapmaker output
+and creating L<Bio::Map::MapI> objects from it.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::MapIO::mapmaker;
+use strict;
+
+use Bio::Map::SimpleMap;
+use Bio::Map::LinkagePosition;
+use Bio::Map::Marker;
+
+use base qw(Bio::MapIO);
+
+=head2 next_map
+
+ Title   : next_map
+ Usage   : my $map = $factory->next_map;
+ Function: Get one or more map objects from the Mapmaker input
+ Returns : Bio::Map::MapI
+ Args    : none
+
+See L<Bio::Map::MapI>
+
+=cut
+
+sub next_map{
+   my ($self) = @_;
+   my $map = Bio::Map::SimpleMap->new(-name  => '',
+												  -units => 'cM',
+												  -type  => 'Genetic');
+
+	# Mapmaker input can be free-form, like the result of a copy-paste
+	# from a terminal, with no particular format before or after the 
+	# map data. The $in_map variable is a flag that's set to 1 when 
+	# we're reading map data lines and set back to 0 when we're finished.
+   my ($in_map,$runningDistance);
+
+   while ( defined ($_ = $self->_readline()) ) {
+		if ( /^\s+Markers\s+Distance/ ) {
+			$in_map = 1;
+			next;
+		} 
+		next unless $in_map;
+ 
+		s/ +/\t/;
+		my ($number,$name,$distance) = split;
+		$runningDistance += $distance unless ($distance =~ /-+/);
+		$runningDistance = '0.0' if ($runningDistance == 0 || $distance =~ /-+/);
+
+		my $pos = new Bio::Map::LinkagePosition(-order => $number,
+															 -map   => $map,
+															 -value => $runningDistance );
+		my $marker = new Bio::Map::Marker(-name     => $name,
+													 -position => $pos );
+		
+		if ($distance =~ /-+/) { # last marker
+			$in_map = 0;
+			return $map;
+		}  
+	}
+}
+
+=head2 write_map
+
+ Title   : write_map
+ Usage   : $factory->write_map($map);
+ Function: Write a map out through the factory
+ Returns : none
+ Args    : Bio::Map::MapI
+
+=cut
+
+sub write_map{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/MapIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/MapIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/MapIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,235 @@
+# $Id: MapIO.pm,v 1.9.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::MapIO
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::MapIO - A Map Factory object
+
+=head1 SYNOPSIS
+
+    use Bio::MapIO;
+    my $mapio = new Bio::MapIO(-format => "mapmaker",
+			       -file   => "mapfile.map");
+
+    while( my $map = $mapio->next_map ) { 
+	# get each map
+	foreach my $marker ( $map->each_element ) {
+	    # loop through the markers associated with the map
+	}
+    }
+
+=head1 DESCRIPTION
+
+This is the Factory object for reading Maps from a data stream or file.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::MapIO;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::Factory::MapFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::MapIO();
+ Function: Builds a new Bio::MapIO object 
+ Returns : Bio::MapIO
+ Args    :
+
+
+=cut
+
+sub new {
+  my($caller, at args) = @_;
+
+  my $class = ref($caller) || $caller;
+  
+  # or do we want to call SUPER on an object if $caller is an
+  # object?
+  if( $class =~ /Bio::MapIO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);	
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+	
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
+		'mapmaker';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# normalize capitalization
+	return unless( $class->_load_format_module($format) );
+	return "Bio::MapIO::$format"->new(@args);
+    }
+
+}
+
+=head2 Bio::Factory::MapFactoryI methods
+
+=cut
+
+=head2 next_map
+
+ Title   : next_tree
+ Usage   : my $map = $factory->next_map;
+ Function: Get a map from the factory
+ Returns : L<Bio::Map::MapI>
+ Args    : none
+
+
+=head2 write_map
+
+ Title   : write_tree
+ Usage   : $factory->write_map($map);
+ Function: Write a map out through the factory
+ Returns : none
+ Args    : L<Bio::Map::MapI>
+
+=cut
+
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : L<Bio::Event::EventHandlerI>
+
+=cut
+
+sub attach_EventHandler{
+    my ($self,$handler) = @_;
+    return if( ! $handler );
+    if( ! $handler->isa('Bio::Event::EventHandlerI') ) {
+	$self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI');
+    }
+    $self->{'_handler'} = $handler;
+    return;
+}
+
+=head2 _eventHandler
+
+ Title   : _eventHandler
+ Usage   : private
+ Function: Get the EventHandler
+ Returns : L<Bio::Event::EventHandlerI>
+ Args    : none
+
+
+=cut
+
+sub _eventHandler{
+   my ($self) = @_;
+   return $self->{'_handler'};
+}
+
+sub _initialize {
+    my($self, @args) = @_;
+    $self->{'_handler'} = undef;
+    
+    # initialize the IO part
+    $self->_initialize_io(@args);
+#    $self->attach_EventHandler(new Bio::MapIO::MapEventBuilder());
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL MapIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::MapIO::" . $format;
+  my $ok;  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the MapIO system please see the MapIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'mapmaker'   if /\.(map)$/i;
+   return 'mapxml' if /\.(xml)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Generic.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Generic.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Generic.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,809 @@
+# $Id: Generic.pm,v 1.14.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Matrix::Generic
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::Generic - A generic matrix implementation
+
+=head1 SYNOPSIS
+
+  # A matrix has columns and rows 
+  my $matrix = Bio::Matrix::Generic->new;
+  $matrix->add_column(1,$column1);
+  $matrix->add_column(2,$column2);
+
+  my $element = $matrix->entry_by_num(1,2);
+  $matrix->entry_by_num(1,2,$newval);
+
+  my $entry = $matrix->entry('human', 'mouse');
+
+  $matrix->entry('human','mouse', $newval);
+
+
+=head1 DESCRIPTION
+
+This is a general purpose matrix object for dealing with row+column
+data which is typical when enumerating all the pairwise combinations
+and desiring to get slices of the data.
+
+Data can be accessed by column and row names or indexes.  Matrix
+indexes start at 0.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Matrix::Generic;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Matrix::MatrixI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Matrix::Generic();
+ Function: Builds a new Bio::Matrix::Generic object 
+ Returns : an instance of Bio::Matrix::Generic
+ Args    : -values     => arrayref of arrayrefs of data initialization 
+           -rownames   => arrayref of row names
+           -colnames   => arrayref of col names
+           -matrix_id  => id of the matrix
+           -matrix_name=> name of the matrix
+           -matrix_init_value => default value to initialize empty cells
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($values, $rownames, $colnames,
+      $id,$name,$init_val) = 
+	  $self->_rearrange([qw(VALUES ROWNAMES COLNAMES 
+			        MATRIX_ID MATRIX_NAME 
+                                MATRIX_INIT_VALUE)], at args);
+  $self->matrix_id($id) if  defined $id;
+  $self->matrix_name($name) if defined $name;
+  if( defined $rownames && defined $colnames ) {
+      if( ref($rownames) !~ /ARRAY/i ) {
+	  $self->throw("need an arrayref for the -rownames option");
+      }
+      # insure we copy the values
+      $self->{'_rownames'} = [ @$rownames ];
+      my $count = 0;
+      %{$self->{'_rownamesmap'}} = map { $_ => $count++ } @$rownames; 
+
+      if( ref($colnames) !~ /ARRAY/i ) {
+	  $self->throw("need an arrayref for the -colnames option");
+      }
+      # insure we copy the values
+      $self->{'_colnames'} = [ @$colnames ];
+      $count = 0;
+      %{$self->{'_colnamesmap'}} = map { $_ => $count++ } @$colnames; 
+
+      $self->{'_values'} = [];
+      if( defined $values ) {
+	  if( ref($values) !~ /ARRAY/i ) {
+	      $self->throw("Need an arrayref of arrayrefs (matrix) for -values option");
+	  }	  
+	  for my $v ( @$values ) {
+	      if( ref($v) !~ /ARRAY/i ) {
+		  $self->throw("Need and array of arrayrefs (matrix) for -values option");
+	      }
+	      push @{$self->{'_values'}}, [@$v];
+	  }
+      } else {
+	  my @fill = ($init_val) x scalar @$colnames; # undef init_val will be default
+	  for ( @$rownames ) {
+	      push @{$self->{'_values'}}, [@fill];
+	  }
+      }
+  } elsif( ! defined $rownames && ! defined $colnames && ! defined $values ) {
+      $self->{'_values'}   = [];
+      $self->{'_rownames'} = [];
+      $self->{'_colnames'} = [];
+  } else { 
+      $self->throw("Must have either provided no values/colnames/rownames or provided all three");
+  }
+
+  return $self;
+}
+
+
+=head2 matrix_id
+
+ Title   : matrix_id
+ Usage   : my $id = $matrix->matrix_id
+ Function: Get/Set the matrix ID
+ Returns : scalar value
+ Args    : [optional] new id value to store
+
+
+=cut
+
+sub matrix_id{
+   my $self = shift;
+   return $self->{'_matid'} = shift if @_;
+   return $self->{'_matid'};
+
+   
+}
+
+=head2 matrix_name
+
+ Title   : matrix_name
+ Usage   : my $name = $matrix->matrix_name();
+ Function: Get/Set the matrix name
+ Returns : scalar value
+ Args    : [optional] new matrix name value
+
+
+=cut
+
+sub matrix_name{
+   my $self = shift;
+   return $self->{'_matname'} = shift if @_;
+   return $self->{'_matname'};
+}
+
+
+=head2 entry
+
+ Title   : entry
+ Usage   : my $entry = $matrix->entry($row,$col)
+ Function: Get the value for a specific cell as specified
+           by the row and column names
+ Returns : scalar value or undef if row or col does not
+           exist
+ Args    : $rowname - name of the row
+           $colname - column name
+
+=cut
+
+sub entry{
+   my ($self,$row,$column,$newvalue) = @_;
+   if( ! defined $row || ! defined $column ) {
+       $self->throw("Need at least 2 ids");
+   }
+
+   my ($rownum) = $self->row_num_for_name($row);
+   my ($colnum) = $self->column_num_for_name($column);
+   return $self->entry_by_num($rownum,$colnum,$newvalue);
+}
+
+=head2 get_entry
+
+ Title   : get_entry
+ Usage   : my $entry = $matrix->get_entry($rowname,$columname)
+ Function: Get the entry for a given row,column pair
+ Returns : scalar
+ Args    : $row name
+           $column name 
+
+
+=cut
+
+sub get_entry{ $_[0]->entry($_[1],$_[2]) }
+
+=head2 entry_by_num
+
+ Title   : entry_by_num
+ Usage   : my $entry = $matrix->entry_by_num($rownum,$colnum)
+ Function: Get an entry by row and column numbers instead of by name
+           (rows and columns start at 0)
+ Returns : scalar value or undef if row or column name does not
+           exist
+ Args    : $row - row number
+           $col - column number
+           [optional] $newvalue to store at this cell
+
+=cut
+
+sub entry_by_num {
+    my ($self,$row,$col,$newvalue) = @_;
+    if( ! defined $row || ! defined $col || 
+	$row !~ /^\d+$/ ||
+	$col !~ /^\d+$/ ) {
+	$self->warn("expected to get 2 number for entry_by_num");
+	return;
+    }
+    
+    if( defined $newvalue ) {
+       return $self->_values->[$row][$col] = $newvalue;
+   } else { 
+       return $self->_values->[$row][$col];
+   }
+}
+
+sub get_element { 
+    my $self = shift;
+    $self->entry(@_);
+}
+
+
+=head2 column
+
+ Title   : column
+ Usage   : my @col = $matrix->column('ALPHA');
+           OR
+           $matrix->column('ALPHA', \@col);
+ Function: Get/Set a particular column
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values.  
+           For setting will warn if the new column is of a different
+           length from the rest of the columns.
+ Args    : name of the column
+           [optional] new column to store here 
+
+=cut
+
+sub column{
+    my ($self,$column,$newcol) = @_;
+
+    if( ! defined $column ) {
+	$self->warn("Need at least a column id");
+	return;
+    }
+    my $colnum  = $self->column_num_for_name($column);
+    if( ! defined $colnum ) { 
+	$self->warn("could not find column number for $column");
+	return;
+    }
+    return $self->column_by_num($colnum,$newcol);
+}
+
+
+=head2 get_column
+
+ Title   : get_column
+ Usage   : my @row = $matrix->get_column('ALPHA');
+ Function: Get a particular column
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values
+ Args    : name of the column
+
+
+=cut
+
+sub get_column { $_[0]->column($_[1]) }
+
+
+=head2 column_by_num
+
+ Title   : column_by_num
+ Usage   : my @col = $matrix->column_by_num(1);
+           OR
+           $matrix->column_by_num(1,\@newcol);
+ Function: Get/Set a column by its number instead of name
+           (cols/rows start at 0)
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values
+ Args    : name of the column
+           [optional] new value to store for a particular column
+
+=cut
+
+sub column_by_num{
+    my ($self,$colnum,$newcol) = @_;
+    if( ! defined $colnum ) {
+	$self->warn("need at least a column number");
+	return;
+    }
+    my $rowcount = $self->num_rows;
+    my $colcount = $self->num_columns;
+    my $ret;
+    
+    if( defined $newcol ) {
+	if( ref($newcol) !~ /ARRAY/i) {
+	    $self->warn("expected a valid arrayref for resetting a column");
+	    return;
+	}
+	if( scalar @$newcol != $rowcount ) {
+	    $self->warn("new column is not the correct length ($rowcount) - call add or remove row to shrink or grow the number of rows first");
+	    return;
+	}
+	for(my $i=0; $i < $rowcount; $i++) {
+	    $self->entry_by_num($i,$colnum,$newcol->[$i]);
+	}
+	$ret = $newcol;
+    } else { 
+	$ret = [];
+	for(my $i=0; $i < $rowcount; $i++) {
+	    push @$ret,$self->entry_by_num($i,$colnum);
+	}
+    }
+    if( wantarray ) { return @$ret } 
+    return $ret;
+
+}
+
+=head2 row
+
+ Title   : row
+ Usage   : my @row = $matrix->row($rowname);
+             OR
+           $matrix->row($rowname,\@rowvalues);
+ Function: Get/Set the row of the matrix
+ Returns : Array (in array context) or arrayref (in scalar context)
+ Args    : rowname
+           [optional] new value of row to store
+
+
+=cut
+
+sub row {
+    my ($self,$row,$newrow) = @_;
+    if( ! defined $row) {
+	$self->warn("Need at least a row id");
+	return;
+    }
+    my $rownum = $self->row_num_for_name($row);
+    return $self->row_by_num($rownum,$newrow);
+}
+
+
+=head2 get_row
+
+ Title   : get_row
+ Usage   : my @row = $matrix->get_row('ALPHA');
+ Function: Get a particular row
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values
+ Args    : name of the row
+
+=cut
+
+sub get_row { $_[0]->row($_[1]) }
+
+=head2 row_by_num
+
+ Title   : row_by_num
+ Usage   : my @row = $matrix->row_by_num($rownum);
+             OR
+           $matrix->row($rownum,\@rowvalues);
+ Function: Get/Set the row of the matrix
+ Returns : Array (in array context) or arrayref (in scalar context)
+ Args    : rowname
+           [optional] new value of row to store
+
+=cut
+
+sub row_by_num{
+   my ($self,$rownum,$newrow) = @_;
+   if( ! defined $rownum ) {
+       $self->warn("need at least a row number");
+       return;
+   }
+    my $colcount = $self->num_columns;
+    my $ret;
+    if( defined $newrow ) {
+	if( ref($newrow) !~ /ARRAY/i) {
+	    $self->warn("expected a valid arrayref for resetting a row");
+	    return;
+	}
+	if( scalar @$newrow != $colcount ) {
+	    $self->warn("new row is not the correct length ($colcount) - call add or remove column to shrink or grow the number of columns first");
+	    return;
+	}
+	for(my $i=0; $i < $colcount; $i++) {
+	    $self->entry_by_num($rownum,$i, $newrow->[$i]);
+	}
+	$ret = $newrow;
+    } else { 
+	$ret = [];
+	for(my $i=0; $i < $colcount; $i++) {
+	    # we're doing this to explicitly 
+	    # copy the entire row
+	    push @$ret, $self->entry_by_num($rownum,$i);
+	}
+    }
+    if( wantarray ) { return @$ret } 
+    return $ret;
+
+
+}
+
+
+=head2 diagonal
+
+ Title   : diagonal
+ Usage   : my @diagonal = $matrix->get_diagonal()
+ Function: Get the diagonal of a matrix
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values which lie along the diagonal
+ Args    : none
+
+
+=cut
+
+sub get_diagonal{
+   my ($self) = @_;
+   my @diag;
+   my $rowcount = $self->num_rows;
+   my $colcount = $self->num_columns;
+   for(my $i = 0; $i < $rowcount; $i++ ) {
+       push @diag, $self->entry_by_num($i,$i);
+   }
+   return @diag;
+}
+
+
+=head2 add_row
+
+ Title   : add_row
+ Usage   : $matrix->add_row($index,\@newrow);
+ Function: Adds a row at particular location in the matrix.
+           If $index < the rowcount will shift all the rows down
+           by the number of new rows.
+           To add a single empty row, simply call
+           $matrix->add_row($index,undef);
+ Returns : the updated number of total rows in the matrix
+ Args    : index to store
+           name of the row (header)
+           newrow to add, if this is undef will add a single
+                     row with all values set to undef 
+
+=cut
+
+sub add_row{
+   my ($self,$index,$name,$newrow) = @_;
+   if( !defined $index || 
+       $index !~ /^\d+$/ ) {
+       $self->warn("expected a valid row index in add_row");
+       return;
+   } elsif( ! defined $name) {
+       $self->warn("Need a row name or heading");
+       return;
+   } elsif( defined $self->row_num_for_name($name) ) {
+       $self->warn("Need a unqiue name for the column heading, $name is already used");
+       return;
+   }
+   my $colcount = $self->num_columns;
+   my $rowcount = $self->num_rows;
+
+   if( $index >  $rowcount ) { 
+       $self->warn("cannot add a row beyond 1+last row at the end ($rowcount) not $index - adding at $rowcount instead");
+       $index = $rowcount;
+   }
+
+   if( ! defined $newrow ) {
+       $newrow = [];
+       $newrow->[$colcount] = undef;
+   } elsif( ref($newrow) !~ /ARRAY/i ) {
+       $self->throw("Expected either undef or a valid arrayref for add_row");
+   }
+   # add this row to the matrix by carving out space for it with 
+   # splice
+   splice(@{$self->{'_values'}}, $index,0,[]);
+   for( my $i = 0; $i < $colcount; $i++ ) {
+       $self->entry_by_num($index,$i,$newrow->[$i]);
+   }
+   splice(@{$self->{'_rownames'}}, $index,0,$name);
+   # Sadly we have to remap these each time (except for the case
+   # when we're adding a new column to the end, but I don't think
+   # the speedup for that case warrants the extra code at this time.
+   my $ct = 0;
+   %{$self->{'_rownamesmap'}} = map { $_ => $ct++} @{$self->{'_rownames'}};
+   return $self->num_rows;
+}
+
+=head2 remove_row
+
+ Title   : remove_row
+ Usage   : $matrix->remove_row($colnum)
+ Function: remove a row from the matrix shifting all the rows
+           up by one
+ Returns : Updated number of rows in the matrix
+ Args    : row index
+
+
+=cut
+
+sub remove_row{
+   my ($self,$rowindex) = @_;
+   my $rowcount = $self->num_rows;
+   
+   if( $rowindex > $rowcount ) {
+       $self->warn("rowindex $rowindex is greater than number of rows $rowcount, cannot process");
+       return 0;
+   } else { 
+       splice(@{$self->_values},$rowindex,1);
+       delete $self->{'_rownamesmap'}->{$self->{'_rownames'}->[$rowindex]};
+       splice(@{$self->{'_rownames'}},$rowindex,1);
+   }
+   my $ct = 0;
+   %{$self->{'_rownamesmap'}} = map { $_ => $ct++} @{$self->{'_rownames'}};
+   return $self->num_rows;
+}
+
+=head2 add_column
+
+ Title   : add_column
+ Usage   : $matrix->add_column($index,$colname,\@newcol);
+ Function: Adds a column at particular location in the matrix.
+           If $index < the colcount will shift all the columns right
+           by the number of new columns.
+           To add a single empty column, simply call
+           $matrix->add_column($index,undef);
+ Returns : the updated number of total columns in the matrix
+ Args    : index to store
+           name of the column (header)
+           newcolumn to add, if this is undef will add a single
+                 column with all values set to undef 
+
+
+=cut
+
+
+sub add_column{
+   my ($self,$index,$name,$newcol) = @_;
+   if( !defined $index ||
+       $index !~ /^\d+$/ ) {
+       $self->warn("expected a valid col index in add_column");
+       return;
+   } elsif( ! defined $name) {
+       $self->warn("Need a column name or heading");
+       return;
+   } elsif( defined $self->column_num_for_name($name) ) {
+       $self->warn("Need a unqiue name for the column heading, $name is already used");
+       return;
+   }
+   my $colcount = $self->num_columns;
+   my $rowcount = $self->num_rows;
+   if( $index > $colcount ) { 
+       $self->warn("cannot add a column beyond 1+last column at the end ($colcount) not $index - adding at $colcount instead");
+       $index = $colcount;
+   }
+
+   if( ! defined $newcol ) {
+       $newcol = [];
+       $newcol->[$rowcount] = undef; # make the array '$rowcount' long
+   } elsif( ref($newcol) !~ /ARRAY/i ) {
+       $self->throw("Expected either undef or a valid arrayref for add_row");
+   }
+   for( my $i = 0; $i < $rowcount; $i++ ) {
+       # add this column to each row
+       splice(@{$self->_values->[$i]},$index,0,[]);
+       $self->entry_by_num($i,$index,$newcol->[$i]);
+   }
+   splice(@{$self->{'_colnames'}}, $index,0,$name);
+   # Sadly we have to remap these each time (except for the case
+   # when we're adding a new column to the end, but I don't think
+   # the speedup for that case warrants the extra code at this time.
+   my $ct = 0;
+   %{$self->{'_colnamesmap'}} = map {$_ => $ct++} @{$self->{'_colnames'}};
+   return $self->num_columns;
+}
+
+=head2 remove_column
+
+ Title   : remove_column
+ Usage   : $matrix->remove_column($colnum)
+ Function: remove a column from the matrix shifting all the columns
+           to the left by one
+ Returns : Updated number of columns in the matrix
+ Args    : column index
+
+=cut
+
+sub remove_column{
+   my ($self,$colindex) = @_;
+
+   my $colcount = $self->num_columns;
+   my $rowcount = $self->num_rows;
+   if( $colindex > $colcount ) {
+		$self->warn("colindex $colindex is greater than number of columns ($colcount), cannot process");
+		return 0;
+   } else { 
+		for(my $i = 0; $i < $rowcount; $i++ ) {
+			splice(@{$self->_values->[$i]},$colindex,1);
+		}
+		delete $self->{'_colnamesmap'}->{$self->{'_colnames'}->[$colindex]};
+		splice(@{$self->{'_colnames'}},$colindex,1);
+   }
+   my $ct = 0;
+   %{$self->{'_colnamesmap'}} = map {$_ => $ct++} @{$self->{'_colnames'}};
+   return $self->num_columns;
+}
+
+=head2 column_num_for_name
+
+ Title   : column_num_for_name
+ Usage   : my $num = $matrix->column_num_for_name($name)
+ Function: Gets the column number for a particular column name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub column_num_for_name{
+   my ($self,$name) = @_;
+   
+   return $self->{'_colnamesmap'}->{$name};
+}
+
+=head2 row_num_for_name
+
+ Title   : row_num_for_name
+ Usage   : my $num = $matrix->row_num_for_name
+ Function: Gets the row number for a particular row name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub row_num_for_name{
+   my ($self,$name) = @_;
+   return $self->{'_rownamesmap'}->{$name}
+}
+
+
+=head2 column_header
+
+ Title   : column_header
+ Usage   : my $name = $matrix->column_header(0)
+ Function: Gets the column header for a particular column number
+ Returns : string
+ Args    : integer
+
+
+=cut
+
+sub column_header{
+   my ($self,$num) = @_;
+   return $self->{'_colnames'}->[$num];
+}
+
+
+=head2 row_header
+
+ Title   : row_header
+ Usage   : my $name = $matrix->row_header(0)
+ Function: Gets the row header for a particular row number
+ Returns : string
+ Args    : integer
+
+
+=cut
+
+sub row_header{
+   my ($self,$num) = @_;
+   return $self->{'_rownames'}->[$num];
+}
+
+=head2 num_rows
+
+ Title   : num_rows
+ Usage   : my $rowcount = $matrix->num_rows;
+ Function: Get the number of rows
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_rows{
+   my ($self) = @_;
+   return scalar @{$self->_values};
+}
+
+
+=head2 num_columns
+
+ Title   : num_columns
+ Usage   : my $colcount = $matrix->num_columns
+ Function: Get the number of columns
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_columns{
+   my ($self) = @_;
+   return scalar @{$self->_values->[0] || []};
+}
+
+
+=head2 row_names
+
+ Title   : row_names
+ Usage   : my @rows = $matrix->row_names
+ Function: The names of all the rows
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub row_names{
+   if( wantarray ) { 
+       return @{shift->{'_rownames'}};
+   } else { 
+       return shift->{'_rownames'};
+   }
+}
+
+
+=head2 column_names
+
+ Title   : column_names
+ Usage   : my @columns = $matrix->column_names
+ Function: The names of all the columns
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub column_names{
+   if( wantarray ) { 
+       return @{shift->{'_colnames'}};
+   } else { 
+       return shift->{'_colnames'};
+   }
+}
+
+=head2 private methods
+
+Private methods for a Generic Matrix
+
+=head2 _values
+
+ Title   : _values
+ Usage   : $matrix->_values();
+ Function: get/set for array ref of the matrix containing
+           distance values 
+ Returns : an array reference 
+ Args    : an array reference
+
+
+=cut
+
+sub _values{
+   my ($self,$val) = @_;
+   if( $val ){
+       $self->{'_values'} = $val;
+   }
+   return $self->{'_values'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/phylip.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/phylip.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/phylip.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,191 @@
+# $Id: phylip.pm,v 1.3.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Matrix::IO::phylip
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::IO::phylip - A parser for PHYLIP distance matricies
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::IO;
+  my $parser = new Bio::Matrix::IO(-format   => 'phylip'
+                                   -file     => 't/data/phylipdist.out');
+  my $matrix = $parser->next_matrix;
+
+=head1 DESCRIPTION
+
+This is a parser for PHYLIP distance matrix output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Matrix::IO::phylip;
+use vars qw($DEFAULTPROGRAM);
+use strict;
+
+$DEFAULTPROGRAM = 'phylipdist';
+
+use Bio::Matrix::PhylipDist;
+
+use base qw(Bio::Matrix::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Matrix::IO::phylip();
+ Function: Builds a new Bio::Matrix::IO::phylip object 
+ Returns : an instance of Bio::Matrix::IO::phylip
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($prog) = $self->_rearrange([qw(PROGRAM)], @args);
+  $self->{'_program'} = $prog || $DEFAULTPROGRAM;
+  return $self;
+}
+
+
+=head2 next_matrix
+
+ Title   : next_matrix
+ Usage   : my $matrix = $parser->next_matrix
+ Function: Get the next result set from parser data
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : none
+
+
+=cut
+
+sub next_matrix {
+   my ($self) = @_;
+   my @names;
+   my @values;
+   my $entry;
+   my $size = 0;
+   while ($entry=$self->_readline) {
+       if($#names >=0 && $entry =~/^\s+\d+\n$/){
+	   $self->_pushback($_);
+	   last;
+       } elsif($entry=~/^\s+(\d+)\n$/){	   
+	   $size = $1;
+	   next;
+       } elsif( $entry =~ s/^\s+(\-?\d+\.\d+)/$1/ ) {
+	   my (@line) = split( /\s+/,$entry);
+	   push @{$values[-1]}, @line;
+	   next;
+       }
+       my ($n, at line) = split( /\s+/,$entry);
+       
+       push @names, $n;
+       push @values, [@line];
+   }
+   if( scalar @names != $size ) {
+       $self->warn("The number of entries ".(scalar @names).
+		   " is not the same $size");
+   }
+   $#names>=0 || return;
+   my %dist;
+   my $i=0;
+   foreach my $name(@names){
+       my $j=0;
+       foreach my $n(@names) {
+	   $dist{$name}{$n} = [$i,$j];
+	   $j++;
+       }
+       $i++;
+   }
+   my $matrix = Bio::Matrix::PhylipDist->new
+       (-matrix_name => $self->{'_program'},
+	-matrix      => \%dist,
+	-names       => \@names,
+	-values      => \@values);
+    return $matrix;
+}
+
+=head2 write_matrix
+
+ Title   : write_matrix
+ Usage   : $matio->write_matrix($matrix)
+ Function: Write out a matrix in the phylip distance format
+ Returns : none
+ Args    : L<Bio::Matrix::PhylipDist>
+
+
+=cut
+
+sub write_matrix {
+    my ($self, at matricies) = @_;
+    foreach my $matrix ( @matricies ) {
+	my @names = @{$matrix->names};
+	my @values = @{$matrix->_values};
+	my %matrix = %{$matrix->_matrix};
+	my $str;
+	$str.= (" "x 4). scalar(@names)."\n";
+	foreach my $name (@names){
+	    my $newname = $name. (" " x (15-length($name)));
+	    if( length($name) >= 15 ) { $newname .= " " }
+	    $str.=$newname;
+	    my $count = 0;
+	    foreach my $n (@names){
+		my ($i,$j) = @{$matrix{$name}{$n}};
+		if($count < $#names){
+		    $str.= $values[$i][$j]. "  ";
+		}
+		else {
+		    $str.= $values[$i][$j];
+		}
+		$count++;
+	    }
+	    $str.="\n";
+	}
+	$self->_print($str);
+    }
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/scoring.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/scoring.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO/scoring.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+# $Id: scoring.pm,v 1.4.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Matrix::IO::scoring
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::IO::scoring - A parser for PAM/BLOSUM matricies
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::IO;
+  my $parser = new Bio::Matrix::IO(-format => 'scoring',
+                                   -file   => 'BLOSUM50');
+  my $matrix = $parser->next_matrix;
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Matrix::IO::scoring;
+use strict;
+
+use Bio::Matrix::Scoring;
+use base qw(Bio::Matrix::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Matrix::IO::scoring();
+ Function: Builds a new Bio::Matrix::IO::scoring object 
+ Returns : an instance of Bio::Matrix::IO::scoring
+ Args    :
+
+
+=cut
+
+=head2 next_matrix
+
+ Title   : next_matrix
+ Usage   : my $matrux = $parser->next_matrix
+ Function: parses a scoring matrix (BLOSUM,PAM styles) 
+ Returns : L<Bio::Matrix::Scoring>
+ Args    : none
+
+
+=cut
+
+sub next_matrix{
+   my ($self) = @_;
+   local ($_);
+   my (@matrix, at cols, at rows,%extras,$inmatrix);
+   while( defined ( $_ = $self->_readline ) ) {
+       next if ( /^\s*$/);
+       if( /^\#/ ) {
+	   if( $inmatrix ) { 
+	       $self->_pushback($_);
+	       last;
+	   }
+	   if( m/Entropy\s+\=\s+(\S+)\,\s+
+	       Expected\s+\=\s+(\S+)/ox ) {
+	       $extras{'-entropy'} = $1;
+	       $extras{'-expected'} = $2;
+	   } elsif ( m/Expected\s+score\s+\=\s+(\S+)\,
+		     \s+Entropy\s+\=\s+(\S+)/xo ){
+	       $extras{'-entropy'} = $2;
+	       $extras{'-expected'} = $1;
+	   } elsif( m/(PAM\s+\d+)\s+substitution.+
+		    scale\s+\=\s+(\S+)\s+\=\s+(\S+)/ox ) {
+	       $extras{'-matrix_name'} = $1;
+	       $extras{'-scale'}       = $2;	       
+	       $extras{'-scale_value'} = $3;
+	   } elsif( /Blocks Database\s+\=\s+(\S+)/o ) {
+	       $extras{'-database'} = $1;
+	   } elsif( m/(\S+)\s+Bit\s+Units/ox ) {
+	       $extras{'-scale'} = $1;
+	   } elsif( m/Lowest score\s+\=\s+(\S+)\,\s+
+		    Highest score\s+\=\s+(\S+)/ox ) {
+	       $extras{'-lowest_score'} = $1;
+	       $extras{'-highest_score'} = $2;
+	   } elsif( m/(Lambda)\s+\=\s+(\S+)\s+bits\,
+		    \s+(H)\s+\=\s+(\S+)/ox ) {
+	       # This is a DNA matrix
+	       $extras{$1} = $2;
+	       $extras{$3} = $4;
+	   }	       
+       } elsif( s/^\s+(\S+)/$1/ ) {
+	   @cols = split;
+	   if( $cols[0] ne 'A' ) {
+	       $self->warn("Unrecognized first line of matrix, we might not have parsed it correctly");
+	   }
+	   $inmatrix = 1;
+       } elsif( $inmatrix ) {
+	   if( ! /^(\S+)/ ) { $inmatrix = 0; next }
+	   my ($rowname, at row) = split;
+	   push @rows, $rowname;
+	   push @matrix, [@row];
+       } else { 
+	   print;
+       }
+   }
+   my $matrix = new Bio::Matrix::Scoring(-values     => \@matrix,
+					 -rownames   => \@rows,
+					 -colnames   => \@cols,
+					 %extras);
+}
+
+=head2 write_matrix
+
+ Title   : write_matrix
+ Usage   : $matio->write_matrix($matrix)
+ Function: Write out a matrix in the BLOSUM/PAM format
+ Returns : none
+ Args    : L<Bio::Matrix::Scoring>
+
+
+=cut
+
+sub write_matrix{
+   my ($self, at args) = @_;
+   $self->warn("cannot actually use this function yet - it isn't finished");
+   return;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+# $Id: IO.pm,v 1.5.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Matrix::IO
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::IO - A factory for Matrix parsing
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::IO;
+  my $parser = new Bio::Matrix::IO(-format => 'scoring',
+                                   -file   => 'BLOSUMN50');
+
+  my $matrix = $parser->next_matrix;
+
+=head1 DESCRIPTION
+
+This is a general factory framework for writing parsers for Matricies.
+This includes parsing output from distance output like PHYLIP's
+ProtDist.  Additionally it should be possible to fit parsers for PWM
+and PSSMs once their Matrix objects are written.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Matrix::IO;
+use strict;
+
+
+use base qw(Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Matrix::IO();
+ Function: Builds a new Bio::Matrix::IO object 
+ Returns : an instance of Bio::Matrix::IO
+ Args    :
+
+
+=cut
+
+sub new { 
+  my($caller, at args) = @_;
+  my $class = ref($caller) || $caller;
+    
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::Matrix::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);	
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
+		'scoring';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# normalize capitalization
+	return unless( $class->_load_format_module($format) );
+	return "Bio::Matrix::IO::$format"->new(@args);
+    }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::Matrix::IO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::Matrix::IO->newFh(-file=>$filename,-format=>'Format')
+           $matrix = <$fh>;   # read a matrix object
+           print $fh $matrix; # write a matrix object
+ Returns : filehandle tied to the Bio::SeqIO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function: Get a filehandle type access to the matrix parser
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $matrix = <$fh>;     # read a matrix object
+           print $fh $matrix;   # write a matrix object
+ Returns : filehandle tied to Bio::Matrix::IO class
+ Args    : none
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+
+=head2 next_matrix
+
+ Title   : next_matrix
+ Usage   : my $matrix = $matixio->next_matrix;
+ Function: Parse the next matrix from the data stream
+ Returns : L<Bio::Matrix::MatrixI> type object or undef when finished
+ Args    : none
+
+
+=cut
+
+sub next_matrix{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write_matrix
+
+ Title   : write_matrix
+ Usage   : $io->write_matrix($matrix)
+ Function: Writes a matrix out to the data stream
+ Returns : none
+ Args    : Array of Bio::Matrix::MatrixI object
+          - note that not all matricies can be converted to 
+            each format, beware with mixing matrix types and output formats
+
+=cut
+
+sub write_matrix{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->_initialize_io(@args);
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL Matrix::IO stuff*
+ Function: Loads up (like use) a module at run time on demand
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::Matrix::IO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the Matrix::IO system please see the
+Matrix::IO docs.  This includes ways of checking for formats at
+compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Returns : guessed format of filename (lower case)
+ Args    : filename
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'scoring'   if /BLOSUM|PAM$/i;
+   return 'phylip'   if /\.dist$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {'matrixio' => shift},$class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'matrixio'}->next_tree() unless wantarray;
+  my (@list,$obj);
+  push @list,$obj  while $obj = $self->{'treeio'}->next_tree();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'matrixio'}->write_tree(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/MatrixI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/MatrixI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/MatrixI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,288 @@
+# $Id $
+#
+# BioPerl module for Bio::Matrix::MatrixI
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::MatrixI - An interface for describing a Matrix 
+
+=head1 SYNOPSIS
+
+  # Get a Matrix object
+
+=head1 DESCRIPTION
+
+This is an interface describing how one should be able to interact
+with a matrix.  One can have a lot of information I suppose and this
+outline won't really work for PWM or PSSMs.  We will have to derive a
+particular interface for those.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Matrix::MatrixI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 matrix_id
+
+ Title   : matrix_id
+ Usage   : my $id = $matrix->matrix_id
+ Function: Get the matrix ID
+ Returns : string value
+ Args    : 
+
+
+=cut
+
+sub matrix_id{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 matrix_name
+
+ Title   : matrix_name
+ Usage   : my $name = $matrix->matrix_name();
+ Function: Get the matrix name
+ Returns : string value
+ Args    :
+
+
+=cut
+
+sub matrix_name{
+   my ($self) = @_;
+
+   $self->throw_not_implemented();
+}
+
+=head2 get_entry
+
+ Title   : get_entry
+ Usage   : my $entry = $matrix->get_entry($rowname,$columname)
+ Function: Get the entry for a given row,column pair
+ Returns : scalar
+ Args    : $row name
+           $column name 
+
+
+=cut
+
+sub get_entry{
+   my ($self) = @_;
+
+    $self->throw_not_implemented();
+}
+
+
+=head2 get_column
+
+ Title   : get_column
+ Usage   : my @row = $matrix->get_column('ALPHA');
+ Function: Get a particular column
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values
+ Args    : name of the column
+
+
+=cut
+
+sub get_column{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_row
+
+ Title   : get_row
+ Usage   : my @row = $matrix->get_row('ALPHA');
+ Function: Get a particular row
+ Returns : Array (in array context) or arrayref (in scalar context)
+           of values
+ Args    : name of the row
+
+=cut
+
+sub get_row{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 get_diagonal
+
+ Title   : get_diagonal
+ Usage   : my @diagonal = $matrix->get_diagonal; 
+ Function: Get the diagonal of the matrix
+ Returns : Array (in array context) or arrayref (in scalar context)
+ Args    : none
+
+
+=cut
+
+sub get_diagonal{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 column_num_for_name
+
+ Title   : column_num_for_name
+ Usage   : my $num = $matrix->column_num_for_name($name)
+ Function: Gets the column number for a particular column name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub column_num_for_name{
+   my ($self) = @_;
+
+    $self->throw_not_implemented();
+}
+
+=head2 row_num_for_name
+
+ Title   : row_num_for_name
+ Usage   : my $num = $matrix->row_num_for_name($name)
+ Function: Gets the row number for a particular row name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub row_num_for_name{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 num_rows
+
+ Title   : num_rows
+ Usage   : my $rowcount = $matrix->num_rows;
+ Function: Get the number of rows
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_rows{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 num_columns
+
+ Title   : num_columns
+ Usage   : my $colcount = $matrix->num_columns
+ Function: Get the number of columns
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_columns{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+# inverse?
+=head2 reverse
+
+ Title   : reverse
+ Usage   : my $matrix = $matrix->reverse
+ Function: Get the reverse of a matrix
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub reverse{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 row_names
+
+ Title   : row_names
+ Usage   : my @rows = $matrix->row_names
+ Function: The names of all the rows
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub row_names{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 column_names
+
+ Title   : column_names
+ Usage   : my @columns = $matrix->column_names
+ Function: The names of all the columns
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub column_names{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/mast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/mast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/mast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,303 @@
+# $Id: mast.pm,v 1.21.4.2 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO::mast - PSM mast parser implementation
+
+=head1 SYNOPSIS
+
+See Bio::Matrix::PSM::IO for detailed documentation on how to 
+use PSM parsers
+
+=head1 DESCRIPTION
+
+Parser for mast. This driver unlike meme or transfac for example is
+dedicated more to PSM sequence matches, than to PSM themselves.
+
+=head1 TO DO
+
+Section III should be parsed too, otherwise no real sequence is
+available, so we supply 'NNNNN....' as a seq which is not right.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO::mast;
+use Bio::Matrix::PSM::InstanceSite;
+use Bio::Matrix::PSM::Psm;
+use Bio::Root::Root;
+use strict;
+
+use base qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'mast', 
+						                               -file=>$file);
+ Function: Associates a file with the appropriate parser
+ Throws  : Throws if the file passed is in HTML format or if 
+           some criteria for the file
+           format are not met.
+ Example :
+ Returns : psm object, associated with a file with matrix file
+ Args    : hash
+ return  : "Bio::Matrix::PSM::$format"->new(@args);
+
+=cut
+
+
+sub new {
+    my($class, @args)=@_;
+    my $self = $class->SUPER::new(@args);
+    my (%instances, at header,$n);
+    my ($file)=$self->_rearrange(['FILE'], @args);
+    $self->{file} = $file;
+    $self->{_factor}=1;
+    $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; #Read only for now
+    $self->{_end}=0;
+    undef $self->{hid};
+    return $self if ($file=~/^>/);#Just writing
+    my $buf=$self->_readline;
+	$self->throw('Cannot parse HTML format yet') if ($buf =~/^<HTML>/); 
+    # this should probably be moved to its own function
+    while ( defined($buf=$self->_readline)) {
+	chomp($buf);
+	if ($buf=~/DATABASE AND MOTIFS/) {
+		while ($buf=$self->_readline) {
+			if ($buf=~/DATABASE/) {
+					$buf=~s/^[\s\t]+//;
+					chomp $buf;
+					($n,$self->{_dbname},$self->{_dbtype})=split(/\s/,$buf);
+					$self->{_dbtype}=~s/[\(\)]//g;
+			}
+			if ($buf=~/MOTIFS/) {
+					$buf=~s/^[\s\t]+//;
+					chomp $buf;
+					($n,$self->{_mrsc},$self->{_msrctype})=split(/\s/,$buf);
+					$self->{_msrctype}=~s/[\(\)]//g;
+					last;
+			}
+		}
+		if ($self->{_msrctype} ne $self->{_dbtype}) {#Assume we have protein motifs, nuc DB (not handling opp.)
+			$self->{_factor}=3;
+			$self->{_mixquery}=1;
+		}
+	}
+	if ($buf=~m/MOTIF WIDTH BEST POSSIBLE MATCH/) {
+	    $self->_readline;
+	    while (defined($buf=$self->_readline)) {
+		last if ($buf!~/\w/);
+		$buf=~s/\t+//g;
+		$buf=~s/^\s+//g;
+		my ($id,$width,$seq)=split(/\s+/,$buf);
+		push @{$self->{hid}},$id;
+		$self->{length}->{$id}=$width;
+		$self->{seq}->{$id}=$seq;
+	    }
+	    next;
+	}
+	if ($buf=~m/section i:/i) {
+	    $self->_readline;
+	    $self->_readline;
+	    $self->_readline;
+	    %instances=_get_genes($self);
+	    $self->{instances}=\%instances;
+      	if (!(%instances)) {
+        	$self->warn ("Your MAST analysis did not find any matches satisfying the current threshold.\nSee MAST documentation for more information.\n");
+        	return $self; #The header might be useful so we return the object, not undef
+      	}
+	    next;
+	}
+	if ($buf=~m/section ii:/i) {
+	    $self->_readline;
+	    $self->_readline;
+	    $self->_readline;
+	    last;
+	}
+	$buf=~s/[\t+\s+]/ /g;
+	push @header,$buf unless (($buf=~/\*{10,}/)||($buf!~/\w/));
+    }
+    $self->throw('Could not read Section I, probably wrong format, make sure it is not HTML, giving up...') if !(%instances);
+    $self->warn( "This file might be an unreadable version, proceed with caution!\n") if (!grep(/\s+MAST\s+version\s+3/, at header));
+
+    $self->{unstructured} = \@header;
+    $self->_initialize;
+    return $self;
+}
+
+
+# Get the file header and put store it as a hash, which later we'll use to create
+# the header for each Psm. See Bio::Matrix::PSM::PsmI for header function.
+sub _get_genes {
+	my $self=shift;
+	my %llid;
+	my $ok=0;
+	my $i=0;
+	my %instances;
+	while (my $line=$self->_readline) {
+		last if ($line=~/^[\s\t*]/); # Well, ids can be nearly anything...???
+		chomp($line);
+		$i++;
+		next if ($line eq '');
+		$line=~s/\s+/,/g;
+		my ($id,$key,$eval,$len)=split(/,/,$line);
+		unless ($len) {
+			warn "Malformed data found: $line\n";
+			next;
+		}
+		$instances{$id}=new Bio::Matrix::PSM::InstanceSite (-id=>$id,
+																			  -desc=>$key,
+																			  -score=>$eval, 
+																			  -width=>$len,
+																			  -seq=>'ACGT');
+	}
+	return %instances;
+}
+
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $psm=$psmIO->next_psm();
+ Function: Reads the next PSM from the input file, associated with this object
+ Throws  : Throws if there ara format violations in the input file (checking is not
+            very strict with all drivers).
+ Example :
+ Returns : Bio::Matrix::PSM::Psm object
+ Args    : none
+
+=cut
+
+
+sub next_psm {
+    my $self=shift;
+    return if ($self->{_end}==1);
+    my (@lmotifsm,%index,$eval,$scheme,$sid);
+    %index= %{$self->{length}};
+    my (@instances,%instances);
+    my $line=$self->_readline;
+    $line=~s/[\t\n]//;
+    if ($line =~ /\*{10,}/) { #Endo of Section II if we do only section II
+        $self->{_end}=1;
+        return ;
+    }
+    do {
+	if ($line!~/^\s/) {
+	    ($sid,$eval,$scheme)=split(/\s+/,$line,3);
+	}
+	else
+	{ $scheme .=$line; }
+	$line=$self->_readline;
+	$line=~s/[\t\n]//;
+    } until ($line!~/^\s/);
+    my $pos=1;
+    $scheme=~s/\s+//g;
+    $scheme=~s/\n//g;
+    my @motifs=split(/_/,$scheme);
+    while (@motifs) {
+	my $next=shift(@motifs);
+	if (!($next=~/\D/)) {
+	    last if (!@motifs);
+	    $pos+=$next;
+	    next;
+	}
+        my $id=$next;
+	my $score= $id=~m/\[/ ? 'strong' : 'weak' ;
+	my $frame;
+	my $strand = $id =~ m/\-\d/ ? -1 : 1 ;
+	if ($self->{_mixquery}) {
+		$frame = 0 if $id =~ m/\d+a/ ;
+		$frame = 1 if $id =~ m/\d+b/ ;
+		$frame = 2 if $id =~ m/\d+c/ ;
+	}
+	$id=~s/\D+//g;
+
+	my @s;
+	my $width=$index{$id};
+    #We don't know the sequence, but we know the length
+	my $seq='N' x ($width*$self->{_factor}); #Future version will have to parse Section tree nad get the real seq
+	my $instance=new Bio::Matrix::PSM::InstanceSite 
+	    ( -id=>"$id\@$sid", 
+	      -mid=>$id, 
+	      -accession_number=>$sid,
+	      -desc=>"Motif $id occurrance in $sid",
+	      -score=>$score, 
+	      -seq=>$seq,
+		  -alphabet => 'dna', 
+	      -start=>$pos,
+	      -strand=>$strand);
+	  $instance->frame($frame) if ($self->{_mixquery});
+	push @instances,$instance;
+	$pos+=$index{$id}*$self->{_factor};
+    }
+    my $psm= new Bio::Matrix::PSM::Psm (-instances=> \@instances, 
+					-e_val    => $eval, 
+					-id       => $sid);
+    $self->_pushback($line);
+    return $psm;
+}
+
+
+=head2 write_psm
+
+ Title   : write_psm
+ Usage   : #Get SiteMatrix object somehow (see Bio::Matrix::PSM::SiteMatrix)
+            my $matrix=$psmin->next_matrix;
+            #Create the stream
+            my $psmio=new(-file=>">psms.mast",-format=>'mast');
+            $psmio->write_psm($matrix);
+            #Will warn if only PFM data is contained in $matrix, recalculate the PWM
+            #based on normal distribution (A=>0.25, C=>0.25, etc)
+ Function: writes pwm in mast format
+ Throws  :
+ Example : 
+ Args    : SiteMatrix object
+ Returns : 
+
+=cut
+
+sub write_psm {
+    my ($self,$matrix)=@_;
+#    my $idline=">". $matrix->id . "\n";
+    my $w=$matrix->width;
+    my $header="ALPHABET= ACGT\nlog-odds matrix: alength= 4 w= $w\n";
+    $self->_print($header);
+    unless ($matrix->get_logs_array('A')) {
+        warn "No log-odds data, available, using normal distribution to recalculate the PWM";
+        $matrix->calc_weight({A=>0.25, C=>0.25, G=>0.25,T=>0.25});
+    }
+    while (my %h=$matrix->next_pos) {
+	$self->_print (join("\t",$h{lA},$h{lC},$h{lG},$h{lT},"\n"));
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/masta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/masta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/masta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,308 @@
+#---------------------------------------------------------
+# $Id: masta.pm,v 1.10.4.3 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO::masta - motif fasta format parser
+
+=head1 SYNOPSIS 
+
+MASTA is a position frequency matrix format similar to
+fasta. It contains one ID row just like fasta and then the actual
+data, which is tab delimited:
+
+  0.1	0.62	.017	0.11
+  0.22	0.13	0.54	0.11
+
+Or A,C,G and T could be horizontally positioned (positioning is
+automatically detected).  Please note masta will parse only DNA at the
+moment.
+
+It will also convert a set of aligned sequences:
+ACATGCAT
+ACAGGGAT
+ACAGGCAT
+ACCGGCAT
+
+to a PFM (SiteMatrix object). When writing if you supply SEQ it will
+write 10 random instances, which represent correctly the frequency and
+can be used as an input for weblogo creation purposes.
+
+See Bio::Matrix::PSM::IO for detailed documentation on how to use masta parser
+
+=head1 DESCRIPTION
+
+Parser for meme.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO::masta;
+use Bio::Matrix::PSM::SiteMatrix;
+use vars qw(@HEADER);
+use strict;
+
+use base qw(Bio::Matrix::PSM::IO Bio::Root::Root);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO =  Bio::Matrix::PSM::IO->new(-format=> 'masta',
+						 -file  => $file, 
+                                                 -mtype => 'PWM');
+ Function: Associates a file with the appropriate parser
+ Throws  :
+ Example :
+ Args    : hash
+ Returns : "Bio::Matrix::PSM::$format"->new(@args);
+
+=cut
+
+sub new {
+    my($class, @args)=@_;
+    my $self = $class->SUPER::new(@args);
+    my ($file)=$self->_rearrange(['FILE'], @args);
+    my ($query,$tr1)=split(/\./,$file,2);
+    $self->{file}  = $file;
+    $self->{_end}  = 0;
+    $self->{mtype} = uc($self->_rearrange(['MTYPE'], @args) || "PFM");
+    $self->_initialize_io(@args) || $self->warn("Did you intend to use STDIN?"); #Read only for now
+    return $self;
+}
+
+=head2 write_psm
+
+ Title   : write_psm
+ Usage   : 
+ Function: writes a pfm/pwm/raw sequence in a simple masta format
+ Throws  :
+ Example : 
+ Args    : SiteMatrix object, type (optional string: PWM, SEQ or PFM)
+ Returns : 
+
+=cut
+
+sub write_psm {
+    my ($self,$matrix,$type)=@_;
+    $self->{mtype} = uc($type) if ($type);
+    my $idline=">". $matrix->id . "\n";
+    $self->_print($idline);
+    unless ($self->{mtype} eq 'SEQ') {
+	while (my %h=$matrix->next_pos) {
+	    my $row=$self->{mtype} eq 'PWM' ? join("\t",$h{lA},$h{lC},$h{lG},$h{lT},"\n"):join("\t",$h{pA},$h{pC},$h{pG},$h{pT},"\n");
+	    $self->_print ($row);
+	}
+    } else {
+	my @seq;
+	while (my %h=$matrix->next_pos) {
+	    my ($a,$c,$g,$t)=_freq_to_count(\%h);
+	    $self->throw("Could not convert from frequency to count\n") if (($a+$c+$g+$t) !=10);
+	    for my $i (0..$a-1) {$seq[$i].='A';}
+	    my $m=$a+$c;
+	    for my $i ($a..$m-1) {$seq[$i].='C';}
+	    my $n=$a+$c+$g;
+	    for my $i ($m..$n-1) {$seq[$i].='G';}
+	    for my $i ($n..9) {$seq[$i].='T';}
+	}	
+	foreach my $s (@seq) {
+	    $s.="\n";
+	    $self->_print ($s);
+	}
+    }
+}
+
+=head2 next_matrix
+
+  Title   : next_matrix
+  Usage   : my $matrix = $psmio->next_matrix;
+  Function: Alias of next_psm function
+
+=cut
+
+sub next_matrix { 
+    shift->next_psm(@_);
+}
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $matrix=$psmio->next_psm;
+ Function: returns the next matrix in the stream
+ Throws  : If there is you mix different types, for example weights and
+           frequencies occur in the same entry You can mix weights, but these
+           should be designated by different ID lines
+ Example :
+ Args    :
+ Returns : Bio::Matrix::PSM::SiteMatrix
+
+=cut
+
+sub next_psm {
+    my $self=shift;
+    return if ($self->{_end});
+    my $line=$self->_readline;
+    $self->throw("No ID line- wrong format\n") unless ($line=~/^>/);
+    my ($id,$desc)=split(/[\t\s]+/,$line,2);
+    $id=~s/>//;
+    my ($mtype,$format, at mdata,$len);
+    $self->{_mtype} = 0;
+    while ($line=$self->_readline) {
+	next if $line =~ /^\s+$/;# There should not be empty lines, but just in case...
+	chomp $line;
+	if ($line =~ /^>/) {
+	    $self->_pushback($line);
+	    last;
+	}
+
+	if ($line !~ /[^ACGTacgt]/g) {
+	    # This is a set of aligned sequences
+	    $self->throw("Mixing between types is not allowed or a parsing error occured\n") 
+		if (($self->{_mtype} != 3) && ($mtype)) ;
+	    $self->throw("Bad sequence- different length: $line\n") 
+		if (($len) && ($len!=length($line)));
+	    $len=length($line) unless ($len);
+	    push @mdata,$line;
+	    $self->{_mtype}=3;
+	} else {
+		# do not strip 'e's since they are part of number notation for small/big numbers
+	    $line=~s/[a-df-zA-DF-Z]//g; #Well we may wanna do a hash and auto check for letter order if there is a really boring talk...
+	    $line=~s/^[\s\t]+//;
+	    $line=~s/[\s\t]+/\t/g;
+	    my @data=split(/[\s\t]+/,$line);
+	    if ($#data==3) {
+		$self->throw("Mixing between types is not allowed or a parsing error occured\n") if (($mtype)&&($self->{_mtype} !=1)) ;
+		$self->{_mtype}=1;
+		$mtype=1;
+	    }
+	    else   {
+		$self->throw("Mixing between types is not allowedor a parsing error occured\n") if (($mtype)&&($self->{_mtype} !=2)) ;
+		$self->{_mtype}=2;
+		$mtype=1;
+	    }
+	    push @mdata,\@data;
+	}
+    }
+    $self->{_end} = 1 if (!defined $line || $line !~ /^>/);
+    return _make_matrix(\@mdata,$self->{_mtype},$id,$desc);
+}
+
+sub _make_matrix {
+    my ($mdata,$type,$id,$desc)=@_;
+    if ($type==1) {
+	my @rearr=_rearrange_matrix($mdata); 
+	$mdata=\@rearr;
+    }
+#Auto recognition for what type is this entry (PFM, PWM or simple count)
+#A bit dangerous, I hate too much auto stuff, but I want to be able to mix different
+#types in a single file
+    my $mformat='count';
+    my ($a,$c,$g,$t);
+    if ($type == 3 ) {
+	($a,$c,$g,$t)= &_count_positions($mdata);
+    } else {
+	($a,$c,$g,$t)=@{$mdata};	
+	my $k=$a->[0]+$c->[0]+$g->[0]+$t->[0];
+	my $l= ($a->[0]+$c->[0]+$g->[0]+$t->[0]) - 
+	    (abs($a->[0])+abs($c->[0])+abs($g->[0])+abs($t->[0]));
+	$mformat='freq' if (($k==1) && ($l==0));
+	$mformat='pwm' if ($l!=0);
+    }
+    my (@fa, at fc, at fg, at ft,%mparam);
+
+    if ($mformat eq 'pwm') {
+	foreach my $i (0..$#{$a}) {
+	    my $ca=exp $a->[$i];
+	    my $cc=exp $c->[$i];
+	    my $cg=exp $g->[$i];
+	    my $ct=exp $t->[$i];
+	    my $all=$ca+$cc+$cg+$ct;
+	    push @fa,($ca/$all)*100;
+	    push @fc,($cc/$all)*100;
+	    push @fg,($cg/$all)*100;
+	    push @ft,($ct/$all)*100;
+	}
+    }
+    $desc.=", source is $mformat";
+    if ($mformat eq 'pwm') {
+	$desc=~s/^pwm//;
+	%mparam=(-pA=>\@fa,-pC=>\@fc,-pG=>\@fg,-pT=>\@ft,-id=>$id,-desc=>$desc,
+		 -lA=>$a,-lC=>$c,-lG=>$g,-lT=>$t);
+    }
+    else {
+	%mparam=(-pA=>$a,-pC=>$c,-pG=>$g,-pT=>$t,-id=>$id,-desc=>$desc);
+    }
+    return new Bio::Matrix::PSM::SiteMatrix(%mparam);
+}
+
+sub _rearrange_matrix {
+    my $mdata=shift;
+    my (@a, at c, at g, at t);
+    foreach my $entry (@{$mdata}) {
+	my ($a,$c,$g,$t)=@$entry;
+	push @a,$a;
+	push @c,$c;
+	push @g,$g;
+	push @t,$t;
+    }
+    return \@a,\@c,\@g,\@t;
+}
+
+
+sub _count_positions {
+    my $seq=shift;
+    my %pos;
+    my $l=length($seq->[0])-1;
+    for( my $i = 0; $i <= $l; $i++ ) {
+	for ( qw(A C G T) ) {
+	    $pos{$_}->[$i] = 0;
+	}
+    }
+    foreach my $sequence (@{$seq}) {
+	my @let= split(//,$sequence);
+	for my $i (0..$#let) {
+	    $pos{uc($let[$i])}->[$i]++;
+	}
+    }
+    return $pos{A},$pos{C},$pos{G},$pos{T};
+}
+
+
+sub _freq_to_count {
+    my $h=shift;
+    my $a=int(10*$h->{pA}+0.5);
+    my $c=int(10*$h->{pC}+0.5);
+    my $g=int(10*$h->{pG}+0.5);
+    my $t=int(10*$h->{pT}+0.5);
+    return ($a,$c,$g,$t);
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/masta.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/meme.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/meme.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/meme.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,350 @@
+#---------------------------------------------------------
+# $Id: meme.pm,v 1.20.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO::meme - PSM meme parser implementation
+
+=head1 SYNOPSIS
+
+See Bio::Matrix::PSM::IO for detailed documentation on how to use PSM parsers
+
+=head1 DESCRIPTION
+
+Parser for meme.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO::meme;
+use Bio::Matrix::PSM::InstanceSite;
+use Bio::Matrix::PSM::SiteMatrix;
+use Bio::Matrix::PSM::Psm;
+use vars qw(@HEADER);
+use strict;
+
+use base qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
+
+ at Bio::Matrix::PSM::IO::meme::HEADER = qw(e_val sites IC width);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', 
+						 -file=>$file);
+ Function: Associates a file with the appropriate parser
+ Throws  : Throws if the file passed is in HTML format or 
+           if the MEME header cannot be found.
+ Example :
+ Args    : hash
+ Returns : "Bio::Matrix::PSM::$format"->new(@args);
+
+=cut
+
+sub new {
+    my($class, @args)=@_;
+    my $self = $class->SUPER::new(@args);
+    my ($file)=$self->_rearrange(['FILE'], @args);
+    my ($query,$tr1)=split(/\./,$file,2);
+    $self->{file} = $file;
+    $self->{query}= $query;
+    $self->{end}  = 0;
+    $self->{_strand}=0; #This we'll need to see if revcom option is used
+    $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; #Read only for now
+    #Skip header
+    my $line;
+    while (my $line=$self->_readline) {
+	$self->throw('Cannot parse HTML, please use text output\n') if ($line=~/<HEAD>/); #Should start parsing HTML output, not a bug deal
+	chomp($line);
+	if ($line=~"^ALPHABET") {
+	    $self=_parse_coordinates($self);
+	    last;
+	}
+	push @{$self->{unstructured}},$line unless (($line=~/\*{10,}/) || ($line eq ''));
+    }
+    $self->_initialize;
+    return $self;
+}
+
+=head2 _parse_coordinates
+
+ Title   : _parse_coordinates
+ Usage   :
+ Function:
+ Throws  :
+ Example : Internal stuff
+ Returns :
+ Args    :
+
+=cut
+
+sub _parse_coordinates {
+    my $self=shift;
+    $self->_readline;
+    $self->_readline;
+    my $line=$self->_readline;
+    while ($line !~ /^\*{10,}/ ) {
+	chomp $line;
+	$line =~ s/\s+/,/g;
+	my ($id1,$w1,$l1,$id2,$w2,$l2)=split(/,/,$line);
+	push @{$self->{hid}},$id1;
+	$self->{weight}->{$id1}=$w1;
+	$self->{length}->{$id1}=$l1;
+	if ($id2) {
+	    push @{$self->{hid}},$id2;
+	    $self->{weight}->{$id2}=$w2;
+	    $self->{length}->{$id2}=$l2;
+	}
+	$line=$self->_readline;
+    }
+    return $self;
+}
+
+=head2 header
+
+ Title   : header
+ Usage   :  my %header=$psmIO->header;
+ Function:  Returns the header for the MEME file
+ Throws  :
+ Example : Fetching all the sequences included in the MEME analysis, 
+           being parsed
+           my %header=$psmIO->header;
+            foreach my $seqid (@{$header{instances}}) {
+               my $seq=$db->get_Seq_by_acc($id);
+               #Do something with the sequence
+            }
+            where $db might be Bio::DB:GenBank object, see
+ Returns : Hash with three keys: instances, weights and lengths, which
+           should be self-explenatory. Each value is an array
+           reference. Each array element corresponds to the same
+           element in the other two arrays. So $header{instances}->[$i]
+           will refer to the same sequence in the motif file as
+           $header{weights}->[$i] and $header{lengths}->[$i]
+ Args    :  none
+ Notes   :  OBSOLETE!
+
+=cut
+
+sub header {
+    my $self=shift;
+    my @instances=@{$self->{_inst_name}};
+    my @weights=@{$self->{_inst_weight}};
+    my @lengths=@{$self->{_inst_coord}};
+    return (instances=>\@instances,weights=>\@weights,lengths=>\@lengths);
+}
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $psm=$psmIO->next_psm();
+ Function: Reads the next PSM from the input file, associated with this object
+ Throws  : Throws if the format is inconsistent with the rules for MEME 3.0.4:
+            no SUMMARY Section present or some keywords are missing/altered.
+ Example :
+ Returns : Bio::Matrix::PSM::Psm object
+ Args    : none
+
+=cut
+
+sub next_psm {
+    #Parses the next prediction and returns a psm objects
+    my $self=shift;
+    return if ($self->{end});
+    my ($endm,$line,$instances,$tr,$width,$motif_id,$sites,$e_val,$id,$ic,$lA,$lC,$lG,$lT);
+    while (defined( $line = $self->_readline) ) {
+#Check if revcom is enabled, not very original check....
+  $self->{_strand}=1 if (($line=~/^Sequence name/) && ($line=~/Strand/));
+	if ($line=~ m/\sSite\s/) {
+	    $instances= $self->_parseInstance;
+	}
+	#Here starts the next motif
+	if ( ($line=~/width/) && ($line=~/sites/)) {
+	    chomp($line);
+	    $line=~s/[\t\s=]+/,/g;
+	    $line=~s/\t/,/g;
+	    #Parsing the general information for this prediction
+	    ($tr,$motif_id,$tr,$width,$tr,$sites,
+	     $tr,$tr,$tr,$e_val)=split(/,/,$line);
+	    $self->{id}=$self->{query} . $motif_id;
+	}
+	if ($line =~ /content/i) {
+	    $line=$self->_readline;
+	    chomp($line);
+	    $line=~s/[\)\(]//g;
+	    ($ic)=split(/\s/,$line);
+	}
+        #Last info-prob matrix data
+	if ($line=~/position-specific\s+scoring matrix/) {
+		($lA,$lC,$lG,$lT)=_parse_logs($self);
+	}
+	if ($line=~/^letter-probability\smatrix/) {
+	    my %matrix_dat=$self->_parseMatrix($motif_id);
+	    my $psm= new Bio::Matrix::PSM::Psm(%matrix_dat, 
+					       -instances=>$instances, 
+					       -e_val=>$e_val,
+					       -IC=>$ic, 
+					       -width=>$width, 
+					       -sites=>$sites,
+						   -lA=>$lA,
+						   -lC=>$lC,
+						   -lG=>$lG,
+						   -lT=>$lT,
+						   );
+	    return $psm;
+	}
+	if ($line=~"SUMMARY OF MOTIFS") {
+	    $self->{end}=1;
+	    return;
+	}
+	$endm=1 if ($line=~/^Time\s/); 
+    }
+	if ($endm) { #End of file found, end of current motif too, but not all predictions were made as requested (No summary)
+	    $self->{end}=1;
+            warn "This MEME analysis was terminated prematurely, you may have less motifs than you requested\n";
+	    return;
+	}
+    $self->throw("Wrong format\n"); # Multiple keywords not found, probably wrong format
+}
+
+=head2 _parseMatrix
+
+ Title   : _parseMatrix
+ Usage   :
+ Function: Parses the next site matrix information in the meme file
+ Throws  :
+ Example :  Internal stuff
+ Returns :  hash as for constructing a SiteMatrix object (see SiteMatrixI)
+ Args    :  string
+
+=cut
+
+sub _parseMatrix {
+    my ($self,$id)=@_;
+    my (@pA, at pC, at pG, at pT);
+    my $i=0;
+    my $line = $self->_readline;
+    #Most important part- the probability matrix
+    do {
+	chomp $line;
+	last if ($line eq '');
+  $line=~s/^\s+//;
+	$line=~s/\s+/,/g;
+	($pA[$i],$pC[$i],$pG[$i],$pT[$i])=split(/,/,$line);
+	$i++;
+	$line=$self->_readline;
+    } until $line =~ /\-{10,}/;
+    return (-pA=>\@pA,-pC=>\@pC,-pG=>\@pG,-pT=>\@pT,-id=>$id);
+}
+
+=head2 _parse_logs
+
+ Title   : _parse_logs
+ Usage   :
+ Function: Parses the next site matrix log values in the meme file
+ Throws  :
+ Example :  Internal stuff
+ Returns :  array of array refs
+ Args    :  string
+
+=cut
+
+sub _parse_logs {
+    my $self=shift;
+    my (@lA, at lC, at lG, at lT);
+    my $i=0;
+    $self->_readline;   $self->_readline;
+    my $line = $self->_readline;
+    #Most important part- the probability matrix
+    do {
+	chomp $line;
+	last if ($line eq '');
+  $line=~s/^\s+//;
+	$line=~s/\s+/,/g;
+	($lA[$i],$lC[$i],$lG[$i],$lT[$i])=split(/,/,$line);
+	$i++;
+	$line=$self->_readline;
+    } until $line =~ /\-{10,}/;
+    
+    return (\@lA,\@lC,\@lG,\@lT);
+}
+
+=head2 _parseInstance
+
+ Title   : _parseInstance
+ Usage   :
+ Function:  Parses the next sites instances from the meme file
+ Throws  :
+ Example :  Internal stuff
+ Returns :  Bio::Matrix::PSM::InstanceSite object
+ Args    :  none
+
+=cut
+
+sub _parseInstance {
+    my $self = shift;
+    my $i=0;
+    $self->_readline;
+    my ($line, at instance);
+    while (defined($line=$self->_readline) ) {
+	last if ($line =~ /\-{5}/ );
+	chomp($line);
+	my @comp=split(/\s+/,$line);
+	my ($id,$start,$score,$strand,$s1,$s2,$s3);
+	if ( $self->{_strand}) {
+	    ($id,$strand,$start,$score,$s1,$s2,$s3)=@comp;
+	} else {
+	    ($id,$start,$score,$s1,$s2,$s3)=@comp;
+	    $strand=1;
+	}
+  	my $seq= $s1.$s2.$s3;
+	if ($seq =~ /[^ACGTacgtNnXx\-\.]/) {
+            my $col=$#comp;
+	    $self->throw("I have not been able to parse the correct instance sequence: $seq, $col columns\n");
+	}
+	my $sid = $self->{id} . '@' . $id;
+	$instance[$i] = new Bio::Matrix::PSM::InstanceSite
+	    (-mid      => $self->{id}, 
+	     -start    => $start, 
+	     -score    => $score,
+	     -seq      => $seq, 
+	     -strand   => $strand,
+	     -accession_number => $id, 
+	     -primary_id => $sid, 
+	     -desc => 'Bioperl MEME parser object' );
+	$i++;
+    }
+    $self->{instances} = \@instance;
+    return \@instance;
+}
+
+				
+			
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/psiblast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/psiblast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/psiblast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+#---------------------------------------------------------
+# $Id: psiblast.pm,v 1.11.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO::psiblast - PSM psiblast parser
+
+=head1 SYNOPSIS
+
+See Bio::Matrix::PSM::IO for documentation
+
+=head1 DESCRIPTION
+
+Parser for ASCII matrices from PSI-BLAST (blastpgp program in
+BLAST distribution).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - James Thompson
+
+Email tex at biosysadmin.com
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO::psiblast;
+use Bio::Matrix::PSM::Psm;
+use Bio::Matrix::PSM::ProtMatrix;
+use strict;
+
+use base qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
+
+# define the order in which amino acids are listed in the psiblast matrix file
+our @ordered_alphabet = qw/A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V/;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO  =  new Bio::Matrix::PSM::IO(-format=>'psiblast', 
+                                                  -file=>$file);
+ Function: Associates a file with the appropriate parser
+ Throws  :
+ Example :
+ Args    :
+ Returns : Bio::Matrix::PSM::ProtMatrix->new(@args);
+
+=cut
+
+sub new {
+   my ($class, at args)=@_;
+   my $line;
+
+   my $self   = $class->SUPER::new(@args);
+   my ($file) = $self->_rearrange(['FILE'], @args);
+   $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; # Read only for now
+   $self->_initialize;
+
+   $self->{_ordered_alphabet} = \@ordered_alphabet;
+   return $self;
+}
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $psm = $psmIO->next_psm();
+ Function: Reads the next PSM from the input file, associated with this object
+ Throws  : None
+ Returns : Bio::Matrix::PSM::ProtPsm object
+ Args    : none
+
+=cut
+
+sub next_psm {
+   my $self = shift;
+   my $line;
+
+   return if ($self->{_end});
+
+   my %args;
+   my @ordered_alphabet = @{$self->{_ordered_alphabet}};
+
+   while ( defined( $line = $self->_readline) ) {
+      # remove leading and trailing whitespace
+      chomp $line;
+      $line =~ s/^\s+//g;
+      $line =~ s/\s+$//g;
+   
+      if ( $line =~ /^(\d+)\s+(\w{1})/ ) {  # match reference aa and position number
+         my @elements = split /\s+/, $line;
+         
+         my $position = shift @elements;
+         my $letter   = shift @elements;
+   
+         my $ratio = pop @elements;
+         my $ic    = pop @elements;
+   
+         # put the next 20 elements into the correct array in %args
+         for ( 0 .. 19 ) { push @{$args{'l'.$ordered_alphabet[$_]}}, shift @elements; }
+         for ( 0 .. 19 ) { push @{$args{'p'.$ordered_alphabet[$_]}}, shift @elements; }
+         
+         push @{$args{'ic'}}, $ic;
+      }
+   }
+
+   $self->{_end} = 1; # psiblast matrix files currently only hold one PSM per file
+
+   my $psm = Bio::Matrix::PSM::ProtMatrix->new( %args );
+   return $psm;
+}
+
+sub DESTROY {
+   my $self=shift;
+   $self->close;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/transfac.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/transfac.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO/transfac.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,256 @@
+#---------------------------------------------------------
+# $Id: transfac.pm,v 1.14.4.3 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO::transfac - PSM transfac parser
+
+=head1 SYNOPSIS
+
+See Bio::Matrix::PSM::IO for documentation
+
+=head1 DESCRIPTION
+
+#
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO::transfac;
+use Bio::Matrix::PSM::Psm;
+use Bio::Root::Root;
+use Bio::Annotation::Reference;
+use Bio::Annotation::Comment;
+use Bio::Annotation::DBLink;
+use strict;
+
+use base qw(Bio::Matrix::PSM::PsmHeader Bio::Matrix::PSM::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'transfac', 
+						 -file=>$file);
+ Function: Associates a file with the appropriate parser
+ Throws  :
+ Example :
+ Args    :
+ Returns : "Bio::Matrix::PSM::$format"->new(@args);
+
+=cut
+
+sub new {
+    my ($class, at args)=@_;
+    my $line;
+    my $self = $class->SUPER::new(@args);
+    my ($file)=$self->_rearrange(['FILE'], @args);
+    $self->_initialize_io(@args) || warn "Did you intend to use STDIN?"; #Read only for now
+    #Remove header
+    do {
+	$line=$self->_readline;
+	chomp $line;
+	push @{$self->{unstructured}},$line if (length($line)>2); } until ($line =~ m{^//}) || (!defined($line)); #Unstructured header
+    $self->_initialize;
+    return $self;
+}
+
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $psm=$psmIO->next_psm();
+ Function: Reads the next PSM from the input file, associated with this object
+ Throws  : Upon finding a line, defining the matrix, where one or more positions
+            are not defined, see _make_matrix
+ Returns : Bio::Matrix::PSM::Psm object
+ Args    : none
+
+=cut
+
+sub next_psm {
+    my $self=shift;
+    my $line;
+    return if ($self->{end});
+    my (@a, at c, at g, at t, $id, $tr1, @refs,$accn, $bf, $sites);
+    my $i=0;
+    while (defined( $line=$self->_readline)) {
+	chomp($line);
+	if ($line=~/^\d{2}/) {	#Begining of the frequency data
+	    ($a[$i],$c[$i],$g[$i],$t[$i])=_parse_matrix($line);
+	    $i++;
+	}
+	($tr1,$accn)=split(/\s{2}/,$line) if ($line=~/^AC\s/);
+	($tr1,$bf)=split(/\s{2}/,$line) if ($line=~/^BF\s/);
+	($tr1,$id)=split(/\s{2}/,$line) if ($line=~/^ID\s/);
+	last if (($line=~/^XX/) && ($i>0));
+    }
+    if (!(defined($id) && defined($accn))) {
+	$self->{end}=1;
+	return;
+    }
+    while (defined( $line=$self->_readline)) {	#How many sites?
+	if ($line=~/^BA\s/) {
+	    my ($tr1,$ba)=split(/\s{2}/,$line);
+	    ($sites)=split(/\s/,$ba);
+	}
+   if ($line=~/^RN/) { #Adding a reference as Bio::Annotation object (self)
+    # not interested in RN line itself, since has only transfac-specific
+    # reference id? - no push back of line
+    my $ref=_parse_ref($self);
+    push @refs,$ref
+  }
+	last if ($line=~m{^//});
+    }
+    # We have the frequencies, let's create a SiteMatrix object
+    my %matrix = &_make_matrix($self,\@a,\@c,\@g,\@t,$id, $accn);
+    $matrix{-sites}=$sites if ($sites);
+    $matrix{-width}=@a;
+    my $psm=new Bio::Matrix::PSM::Psm(%matrix);
+    foreach my $ref (@refs) { $psm->add_Annotation('reference',$ref); }
+    return $psm;
+}
+
+=head2 _parseMatrix
+
+ Title   : _parseMatrix
+ Usage   :
+ Function: Parses a line
+ Throws  :
+ Example :  Internal stuff
+ Returns :  array (frequencies for A,C,G,T in this order).
+ Args    :  string
+
+=cut
+
+sub _parse_matrix {
+    my $line=shift;
+    $line=~s/\s+/,/g;
+    my ($tr,$a,$c,$g,$t)=split(/,/,$line);
+    return $a,$c,$g,$t;
+}
+
+
+=head2 _make_matrix
+
+ Title   : _make_matrix
+ Usage   :
+ Function:
+ Throws  :  If a position is undefined, for example if you have line like this
+            in the file you are parsing: 08  4,7,,9
+ Example :  Internal stuff
+ Returns :
+ Args    :
+
+=cut
+
+sub _make_matrix {
+    my ($a, $c, $g, $t, @fa, @fc, at fg, @ft, @a, at c, at g, at t);
+    my $ave=0;
+    my ($self,$cA,$cC,$cG,$cT, $id, $accn)= @_;
+
+    for (my $i=0; $i < @{$cA};$i++) {
+	#No value can be undefined -throw an exception, since setting to 0 probably would be wrong
+  #If this happens it would indicate most probably that the file, being parsed is in a different format
+	map {  $self->throw('Parsing error, a position is not defined') unless  defined(${$_}[$i]) } ($cA, $cG, $cC, $cT);
+	
+	if ( (${$cA}[$i] + ${$cC}[$i] + 
+	      ${$cG}[$i] + ${$cT}[$i] ) ==0 ) {
+	    push @a,$ave;
+	    push @c,$ave;
+	    push @g,$ave;
+	    push @t,$ave;
+	}
+	else {
+	    push @a,${$cA}[$i];
+	    push @c,${$cC}[$i];
+	    push @g,${$cG}[$i];
+	    push @t,${$cT}[$i];
+	    $ave = ((${$cA}[$i]+${$cC}[$i]+
+		     ${$cG}[$i]+${$cT}[$i]) / 4 +$ave)/2;
+	}
+    }
+
+    for (my $i=0; $i<@a;$i++) {
+	my $zero=($a[$i]+$c[$i]+$g[$i]+$t[$i]);
+	next if ($zero==0);
+	push @fa, $a[$i];
+	push @fc, $c[$i];
+	push @fg, $g[$i];
+	push @ft, $t[$i];
+    }
+    return (-pA=>\@fa,-pC=>\@fc,-pG=>\@fg,-pT=>\@ft, -id=>$id, -accession_number=>$accn)
+    }
+
+sub _parse_ref {
+my $self=shift;
+my ($authors,$title,$loc, at refs,$tr,$db,$dbid);
+    while (my $refline=$self->_readline) { #Poorely designed, should go through an array with fields
+      chomp $refline;
+      my ($field,$arg)=split(/\s+/,$refline,2);
+      last if ($field=~/XX/);
+      $field.=' ';
+      REF: {
+          if ($field=~/RX/) {  #DB Reference
+              $refline=~s/[;\.]//g;
+              ($tr, $db, $dbid)=split(/\s+/,$refline);
+              last REF;
+          }
+         if ($field=~/RT/) {   #Title
+            $title .= $arg;
+            last REF;
+          }
+          if ($field=~/RA/) {  #Author
+            $authors .= $arg;
+            last REF;
+          }
+          if ($field=~/RL/) {  #Journal
+            $loc .= $arg;
+            last REF;
+          }
+        }
+     }
+     my $reference=new Bio::Annotation::Reference (-authors=>$authors, -title=>$title,
+                                                    -location=>$loc);
+     if ($db eq 'MEDLINE') {
+        # does it ever equal medline?
+        $reference->medline($dbid);
+     }
+     elsif ($dbid) {
+        $reference->pubmed($dbid);
+     }
+     return $reference;
+}
+
+sub DESTROY {
+    my $self=shift;
+    $self->close;
+}
+
+1;
+  

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,327 @@
+#---------------------------------------------------------
+# $Id: IO.pm,v 1.15.4.1 2006/10/02 23:10:21 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::IO - PSM parser
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::IO;
+
+  my $psmIO= new Bio::Matrix::PSM::IO(-file=>$file, -format=>'transfac');
+
+  my $release=$psmIO->release; #Using Bio::Matrix::PSM::PsmHeader methods
+
+  my $release=$psmIO->release;
+
+  while (my $psm=$psmIO->next_psm) {
+   my %psm_header=$psm->header;
+   my $ic=$psm_header{IC};
+   my $sites=$psm_header{sites};
+   my $width=$psm_header{width};
+   my $score=$psm_header{e_val};
+   my $IUPAC=$psm->IUPAC;
+  }
+
+  my $instances=$psm->instances;
+  foreach my $instance (@{$instances}) {
+    my $id=$instance->primary_id;
+  }
+
+
+=head1 DESCRIPTION
+
+This module allows you to read DNA position scoring matrices and/or
+their respective sequence matches from a file.
+
+There are two header methods, one belonging to
+Bio::Matrix::PSM::IO::driver and the other to
+Bio::Matrix::PSM::Psm. They provide general information about the file
+(driver) and for the current PSM result (Psm) respectively. Psm header
+method always returns the same thing, but some values in the hash
+might be empty, depending on the file you are parsing. You will get
+undef in this case (no exceptions are thrown).
+
+Please note that the file header data (commenatries, version, input
+data, configuration, etc.)  might be obtained through
+Bio::Matrix::PSM::PsmHeader methods. Some methods are driver specific
+(meme, transfac, etc.): meme: weight mast: seq, instances
+
+If called when you parse a different file type you will get undef. For
+example:
+
+  my $psmIO= new Bio::Matrix::PSM::IO(file=>$file, format=>'transfac');
+  my %seq=$psmIO->seq;
+
+will return an empty hash. To see all methods and how to use them go
+to Bio::Matrix::PSM::PsmHeaderI.
+
+See also Bio::Matrix::PSM::PsmI for details on using and manipulating
+the parsed data.
+
+The only way to write PFM/PWM is through masta module (something like fasta for
+DNA matrices). You can see an example by reading Bio::Matrix::PSM::IO::masta
+documentation.
+
+=head1 See also
+
+Bio::Matrix::PSM::PsmI, Bio::Matrix::PSM::PsmHeaderI, Bio::Matrix::PSM::IO::masta
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::IO;
+use vars qw(@PSMFORMATS);
+use strict;
+
+use base qw(Bio::Root::IO);
+
+ at PSMFORMATS = qw(meme transfac mast psiblast masta);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', 
+						 -file=>$file);
+ Function: Associates a file with the appropriate parser
+ Throws  : Throws if the file passed is in HTML format or 
+           if some criteria for the file
+           format are not met. See L<Bio::Matrix::PSM::IO::meme> and 
+           L<Bio::Matrix::PSM::IO::transfac> for more details.
+ Example :
+ Returns : psm object, associated with a file with matrix file
+ Args    : hash
+
+=cut
+
+sub new {
+    my($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+    my $self;
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::Matrix::PSM::IO(\S+)/ ) {
+	$self = $class->SUPER::new(@args);
+	$self->_initialize(@args);
+	return $self;
+    } else {
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
+	    'scoring';
+	$class->throw("$format format unrecognized or an argument error occured\n.") if (!grep(/$format/, at Bio::Matrix::PSM::IO::PSMFORMATS));
+	$format = "\L$format"; # normalize capitalization to lower case
+
+	# normalize capitalization
+	return unless( $class->_load_format_module($format) );
+	return "Bio::Matrix::PSM::IO::$format"->new(@args);
+    }
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function: Get a filehandle type access to the matrix parser
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $matrix = <$fh>;     # read a matrix object
+ Returns : filehandle tied to Bio::Matrix::PSM::IO class
+ Args    : none
+
+=cut
+
+sub fh {
+    my $self = shift;
+    my $class = ref($self) || $self;
+    my $s = Symbol::gensym;
+    tie $$s,$class,$self;
+    return $s;
+}
+
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL Matrix::PSM::IO stuff*
+ Function: Loads up (like use) a module at run time on demand
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::Matrix::PSM::IO::" . $format;
+  my $ok;
+
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the Matrix::PSM::IO system please see the
+Matrix::PSM::IO docs.  This includes ways of checking for formats at
+compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Returns : guessed format of filename (lower case)
+ Args    : filename
+
+=cut
+
+sub _guess_format {
+    my $class = shift;
+    return unless $_ = shift;
+    return 'meme'   if /.meme$|meme.html$/i;
+    return 'transfac'   if /\.dat$/i;
+    return 'mast'   if /^mast\.|\.mast.html$|.mast$/i;
+}
+
+=head2 next_psm
+
+ Title   : next_psm
+ Usage   : my $psm=$psmIO->next_psm();
+ Function: Reads the next PSM from the input file, associated with this object
+ Throws  : Throws if there ara format violations in the input file (checking is not
+            very strict with all drivers).
+ Example :
+ Returns : Bio::Matrix::PSM::Psm object
+ Args    : none
+
+=cut
+
+sub next_psm {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _parseMatrix
+
+ Title   : _parseMatrix
+ Usage   :
+ Function: Parses the next site matrix information in the meme file
+ Throws  :
+ Example :  Internal stuff
+ Returns :  hash as for constructing a SiteMatrix object (see SiteMatrixI)
+ Args    :  string
+
+=cut
+
+sub _parseMatrix {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _parseInstance
+
+ Title   : _parseInstance
+ Usage   :
+ Function: Parses the next sites instances from the meme file
+ Throws  :
+ Example : Internal stuff
+ Returns : Bio::Matrix::PSM::SiteMatrix object
+ Args    : array references
+
+=cut
+
+sub _parseInstance {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _parse_coordinates
+
+ Title   : _parse_coordinates
+ Usage   :
+ Function:
+ Throws  :
+ Example : Internal stuff
+ Returns :
+ Args    :
+
+=cut
+
+sub _parse_coordinates {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 header
+
+ Title   : header
+ Usage   :  my %header=$psmIO->header;
+ Function:  Returns the header for the PSM file, format specific
+ Throws  :
+ Example :
+ Returns :  Hash or a single string with driver specific information
+ Args    :  none
+
+=cut
+
+sub header {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _make_matrix
+
+ Title   : _make_matrix
+ Usage   :
+ Function: makes a matrix from 4 array references (A C G T)
+ Throws  :
+ Example :
+ Returns : SiteMatrix object
+ Args    : array of references(A C G T)
+
+=cut
+
+sub _make_matrix {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,312 @@
+# $Id: InstanceSite.pm,v 1.14.4.2 2006/10/02 23:10:21 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::InstanceSite - A PSM site occurance
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::InstanceSite;
+
+  #You can get an InstanceSite object either from a file:
+
+  my ($instances,$matrix)=$SomePSMFile->parse_next;
+
+  #or from memory
+
+  my %params=(seq=>'TATAAT',
+    id=>"TATAbox1", accession=>'ENSG00000122304', mid=>'TB1',
+    desc=>'TATA box, experimentally verified in PRM1 gene',
+    -relpos=>-35, -anchor=>'CHR7', -start=>35000921, -end=>35000926);
+
+  #Last 2 arguments are passed to create a Bio::LocatableSeq object
+  #Anchor shows the coordinates system for the Bio::LocatableSeq object
+
+=head1 DESCRIPTION
+
+Abstract interface to PSM site occurrence (PSM sequence
+match). InstanceSite objects may be used to describe a PSM (See
+L<Bio::Matrix::PSM::SiteMatrix>) sequence matches.  The usual
+characteristic of such a match is sequence coordinates, score,
+sequence and sequence (gene) identifier- accession number or other id.
+
+This object inherits from Bio::LocatableSeq (which defines the real
+sequence) and might hold a SiteMatrix object, used to detect the CRE
+(cis-regulatory element), or created from this CRE.
+
+While the documentation states that the motif id and gene id
+(accession) combination should be unique, this is not entirely true-
+there might be more than one occurrence of the same cis-regulatory
+element in the upstream region of the same gene.  Therefore relpos
+would be the third element to create a really unique combination.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head2 Description
+
+Bio::Matrix::PSM::InstanceSiteI implementation
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::InstanceSite;
+use strict;
+
+use base qw(Bio::LocatableSeq Bio::Matrix::PSM::InstanceSiteI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $isntance=new Bio::Matrix::PSM::InstanceSite 
+                         (-seq=>'TATAAT', -id=>"TATAbox1",
+                          -accession_number='ENSG00000122304', -mid=>'TB1',
+                          -desc=>'TATA box, experimentally verified in PRM1 gene',
+                          -relpos=>-35, -anchor=>'CHR7', -start=>35000921, -end=>35000926, strand=>1)
+ Function: Creates an InstanceSite object from memory.
+ Throws  :
+ Example :
+ Returns : Bio::Matrix::PSM::InstanceSite object
+ Args    : hash
+
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my %args = @args; #Too many things to rearrange, and I am creating >1K such objects routinely, so this is a performance issue    
+    $args{'-start'} ||= 1;
+    my $end = $args{'-start'} + length($args{-seq}) -1;
+    if (!defined($args{-strand})) {
+	$args{-strand}=1;
+	@args=%args;
+    }
+    my $self = $class->SUPER::new(@args,'-end',$end);
+    
+    while( @args ) {
+	(my $key = shift @args) =~ s/-//gi; #deletes all dashes (only dashes)!
+	$args{$key} = shift @args;
+    }
+#should throw exception if seq is null, for now just warn
+    if (($args{seq} eq '') || (!defined($args{seq}))) {
+	$args{seq}="AGCT";
+	warn "No sequence?!\n";
+    }
+    $self->{mid}=$args{mid};
+    $self->seq($args{seq});
+    $self->desc($args{desc});
+    $self->{score}=$args{score};
+    $self->{relpos}=$args{relpos};
+    $self->{frame}=$args{frame};
+    $self->{anchor}=$args{anchor};
+    return $self;
+}
+
+
+=head2 mid
+
+ Title   : mid
+ Usage   : my $mid=$instance->mid;
+ Function: Get/Set the motif id
+ Throws  :
+ Example :
+ Returns : scalar
+ Args    : scalar
+
+
+=cut
+
+sub mid {
+    my $self = shift;
+    my $prev = $self->{mid};
+    if (@_) { $self->{mid} = shift; }
+    return $prev;
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score=$instance->score;
+ Function: Get/Set the score (mismatches) between the instance and the attached (or
+            initial) PSM
+ Throws  :
+ Example :
+ Returns : real number
+ Args    : real number
+
+=cut
+
+sub score {
+    my $self = shift;
+    my $prev = $self->{score};
+    if (@_) { $self->{score} = shift; }
+    return $prev;
+}
+
+=head2 anchor
+
+ Title   : anchor
+ Usage   : my $anchor=$instance->anchor;
+ Function: Get/Set the anchor which shows what coordinate system start/end use
+ Throws  :
+ Example :
+ Returns : string
+ Args    : string
+
+=cut
+
+sub anchor {
+    my $self = shift;
+    my $prev = $self->{anchor};
+    if (@_) { $self->{anchor} = shift; }
+    return $prev;
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : my $start=$instance->start;
+ Function: Get/Set the position of the instance on the sequence used
+ Throws  :
+ Example :
+ Returns : integer
+ Args    : integer
+
+=cut
+
+
+#Provided by LocatableSeq
+
+=head2 minstance
+
+ Title   : minstance
+ Usage   : my $minstance=$misntance->score;
+ Function: Get/Set the unique identifier- sequence id/motif id, for example PRM1_TATAbox.
+          Not necessarily human readable.
+ Throws  :
+ Example :
+ Returns : string
+ Args    : string
+
+=cut
+
+sub minstance {
+    my $self = shift;
+    my $prev = $self->{minstance};
+    if (@_) { $self->{minstance} = shift; }
+    return $prev;
+}
+
+=head2 relpos
+
+ Title   : relpos
+ Usage   : my $seqpos=$instance->relpos;
+ Function: Get/Set the relative position of the instance with respect to the transcription start
+            site (if known). Can and usually is negative.
+ Throws  :
+ Example :
+ Returns : integer
+ Args    : integer
+
+=cut
+
+sub relpos {
+    my $self = shift;
+    my $prev = $self->{relpos};
+    if (@_) { $self->{relpos} = shift; }
+    return $prev;
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $seq->annotation or $seq->annotation($annotation)
+ Function: Gets or sets the annotation
+ Returns : L<Bio::AnnotationCollectionI> object
+ Args    : None or L<Bio::AnnotationCollectionI> object
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->throw("object of class ".ref($value)." does not implement ".
+		    "Bio::AnnotationCollectionI. Too bad.")
+	    unless $value->isa("Bio::AnnotationCollectionI");
+	$obj->{'_annotation'} = $value;
+    } elsif( ! defined $obj->{'_annotation'}) {
+	$obj->{'_annotation'} = new Bio::Annotation::Collection();
+    }
+    return $obj->{'_annotation'};
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $species = $seq->species() or $seq->species($species)
+ Function: Gets or sets the species
+ Returns : L<Bio::Species> object
+ Args    : None or L<Bio::Species> object
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+
+=head2 frame
+
+ Title   : frame
+ Usage   : my $frane=$instance->frame;
+ Function: Get/Set the frame of a DNA instance with respect to a protein motif used.
+            Returns undef if the motif was not protein or the DB is protein.
+ Throws  :
+ Example :
+ Returns : integer
+ Args    : integer (0, 1, 2)
+
+=cut
+
+sub frame {
+    my $self = shift;
+    my $prev = $self->{frame};
+    if (@_) { $self->{frame} = shift; $self->throw("This is not a legitimate frame") unless (grep(/$self->{frame}/,qw[0 1 2])); }
+    return $prev;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSiteI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSiteI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/InstanceSiteI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,160 @@
+# $Id: InstanceSiteI.pm,v 1.10.4.1 2006/10/02 23:10:21 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::InstanceSiteI - InstanceSite interface, holds an instance of a PSM
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::InstanceSite;
+  #Y ou can get an InstanceSite object either from a file:
+  my ($instances,$matrix)=$SomePSMFile->parse_next;
+  #or from memory
+  my %params=(seq       => 'TATAAT',
+              id        => "TATAbox1", 
+              accession => 'ENSG00000122304', 
+              mid       => 'TB1',
+              desc      => 'TATA box, experimentally verified in PRM1 gene',
+              relpos    => -35);
+
+=head1 DESCRIPTION
+
+Abstract interface to PSM site occurrence (PSM sequence
+match). InstanceSite objects may be used to describe a PSM (See
+Bio::Matrix::PSM::SiteMatrix) sequence matches.  The usual
+characteristic of such a match is sequence coordinates, score,
+sequence and sequence (gene) identifier- accession number or other
+id. This object inherits from Bio::LocatableSeq (which defines the
+real sequence) and might hold a SiteMatrix object, used to detect the
+CRE (cis-regulatory element), or created from this CRE.  While the
+documentation states that the motif id and gene id (accession)
+combination should be unique, this is not entirely true- there might
+be more than one occurrence of the same cis-regulatory element in the
+upstream region of the same gene.  Therefore relpos would be the third
+element to create a really unique combination.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 SEE ALSO
+
+L<Bio::Matrix::PSM::SiteMatrix>, L<Bio::Matrix::PSM::Psm>, L<Bio::Matrix::PSM::IO>
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::InstanceSiteI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 mid
+
+ Title   : mid
+ Usage   : my $mid=$instance->mid;
+ Function: Get/Set the motif id
+ Throws  :
+ Returns : scalar
+ Args    : scalar
+
+
+=cut
+
+sub mid {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score=$instance->score;
+ Function: Get/Set the score (mismatches) between the instance and the attached (or
+            initial) PSM
+ Throws  :
+ Returns : real number
+ Args    : real number
+
+=cut
+
+sub score {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : my $start=$instance->start;
+ Function: Get/Set the position of the instance on the sequence used
+ Throws  :
+ Returns : integer
+ Args    : integer
+
+=cut
+
+sub start {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 relpos
+
+ Title   : relpos
+ Usage   : my $seqpos=$instance->relpos;
+ Function: Get/Set the relative position of the instance with respect to the transcription start
+            site (if known). Can and usually is negative.
+ Throws  :
+ Returns : integer
+ Args    : integer
+
+=cut
+
+sub relpos {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 minstance
+
+ Title   : minstance
+ Usage   : my $minstance=$misntance->score;
+ Function: Get/Set the unique identifier- sequence id/motif id, for example PRM1_TATAbox.
+          Not necessarily human readable.
+ Throws  :
+ Returns : string
+ Args    : string
+
+=cut
+
+sub minstance {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtMatrix.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtMatrix.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtMatrix.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,874 @@
+# $Id: ProtMatrix.pm,v 1.16.4.1 2006/10/02 23:10:22 sendu Exp $
+#---------------------------------------------------------
+
+=head1 NAME
+
+Bio::Matrix::PSM::ProtMatrix - SiteMatrixI implementation, holds a
+position scoring matrix (or position weight matrix) with log-odds scoring
+information.
+
+=head1 SYNOPSIS
+
+   use Bio::Matrix::PSM::ProtMatrix;
+   # Create from memory by supplying probability matrix hash both as strings or
+   # arrays where the frequencies   Hash entries of the form lN refer to an array
+   # of position-specific log-odds scores for amino acid N. Hash entries of the
+   # form pN represent the position-specific probability of finding amino acid N.
+
+   my %param = (
+             'id' => 'A. thaliana protein atp1',
+             '-e_val' => $score,
+             'lS' => [ '-2', '3', '-3', '2', '-3', '1', '1', '3' ],
+             'lF' => [ '-1', '-4', '0', '-5', '0', '-5', '-4', '-4' ],
+             'lT' => [ '-1', '1', '0', '1', '-2', '-1', '0', '1' ],
+             'lN' => [ '-3', '-1', '-2', '3', '-5', '5', '-2', '0' ],
+             'lK' => [ '-2', '0', '-3', '2', '-3', '2', '-3', '-1' ],
+             'lY' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-4', '-4' ],
+             'lE' => [ '-3', '4', '-3', '2', '-4', '-2', '-3', '2' ],
+             'lV' => [ '0', '-2', '1', '-4', '1', '-4', '-1', '-3' ],
+             'lQ' => [ '-1', '0', '-2', '3', '-4', '1', '-3', '0' ],
+             'lM' => [ '8', '-3', '8', '-3', '1', '-3', '-3', '-3' ],
+             'lC' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-3', '-3' ],
+             'lL' => [ '1', '-3', '1', '-4', '3', '-4', '-2', '-4' ],
+             'lA' => [ '-2', '1', '-2', '0', '-2', '-2', '2', '2' ],
+             'lW' => [ '-2', '-4', '-3', '-5', '-4', '-5', '-5', '-5' ],
+             'lP' => [ '-3', '-2', '-4', '-3', '-1', '-3', '6', '-3' ],
+             'lH' => [ '-2', '-2', '-3', '-2', '-5', '-2', '-2', '-3' ],
+             'lD' => [ '-4', '-1', '-3', '1', '-3', '-1', '-3', '4' ],
+             'lR' => [ '-2', '-1', '-3', '0', '-4', '4', '-4', '-3' ],
+             'lI' => [ '0', '-3', '0', '-4', '6', '-4', '-2', '-2' ],
+             'lG' => [ '-4', '-2', '-4', '-2', '-5', '-3', '-1', '-2' ],
+             'pS' => [ '0', '33', '0', '16', '1', '12', '11', '25' ],
+             'pF' => [ '0', '0', '2', '0', '3', '0', '0', '0' ],
+             'pT' => [ '0', '8', '7', '10', '1', '2', '7', '8' ],
+             'pN' => [ '0', '0', '2', '13', '0', '36', '1', '4' ],
+             'pK' => [ '0', '5', '0', '13', '1', '15', '0', '2' ],
+             'pY' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+             'pE' => [ '0', '41', '1', '12', '0', '0', '0', '15' ],
+             'pV' => [ '0', '3', '9', '0', '2', '0', '3', '1' ],
+             'pQ' => [ '0', '0', '0', '15', '0', '4', '0', '3' ],
+             'pM' => [ '100', '0', '66', '0', '2', '0', '0', '0' ],
+             'pC' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+             'pL' => [ '0', '0', '8', '0', '25', '0', '4', '0' ],
+             'pA' => [ '0', '10', '1', '9', '2', '0', '22', '16' ],
+             'pW' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+             'pP' => [ '0', '0', '0', '0', '3', '1', '45', '0' ],
+             'pH' => [ '0', '0', '0', '0', '0', '0', '1', '0' ],
+             'pD' => [ '0', '0', '1', '7', '2', '2', '0', '22' ],
+             'pR' => [ '0', '0', '0', '3', '0', '27', '0', '0' ],
+             'pI' => [ '0', '0', '3', '0', '59', '1', '2', '3' ],
+             'pG' => [ '0', '0', '0', '1', '0', '0', '4', '1' ],
+   );
+
+   my $matrix = Bio::Matrix::PSM::ProtMatrix( %param );
+
+
+   my $site = new Bio::Matrix::PSM::ProtMatrix(%param);
+   # Or get it from a file:
+   use Bio::Matrix::PSM::IO;
+   my $psmIO = new Bio::Matrix::PSM::IO(-file => $file, -format => 'psi-blast');
+   while (my $psm = $psmIO->next_psm) {
+      #Now we have a Bio::Matrix::PSM::Psm object, 
+      # see Bio::Matrix::PSM::PsmI for details
+      #This is a Bio::Matrix::PSM::ProtMatrix object now
+      my $matrix = $psm->matrix;   
+   }
+
+   # Get a simple consensus, where alphabet is:
+   # {A, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V,}
+   # choosing the highest probability or N if prob is too low
+   my $consensus = $site->consensus;
+
+   # Retrieving and using regular expressions:
+   my $regexp = $site->regexp;
+   my $count = grep($regexp,$seq);
+   my $count = ($seq=~ s/$regexp/$1/eg);
+   print "Motif $mid is present $count times in this sequence\n";
+
+=head1 DESCRIPTION
+
+ProtMatrix is designed to provide some basic methods when working with
+position scoring (weight) matrices related to protein sequences.  A
+protein PSM consists of 20 vectors with 20 frequencies (one per amino
+acid per position).  This is the minimum information you should
+provide to construct a PSM object.  The vectors can be provided as
+strings with frequencies where the frequency is {0..a} and a=1. This
+is the way MEME compressed representation of a matrix and it is quite
+useful when working with relational DB.  If arrays are provided as an
+input (references to arrays actually) they can be any number, real or
+integer (frequency or count).
+
+When creating the object the constructor will check for positions that
+equal 0.  If such is found it will increase the count for all
+positions by one and recalculate the frequency.  Potential bug - if
+you are using frequencies and one of the positions is 0 it will change
+significantly.  However, you should never have frequency that equals
+0.
+
+Throws an exception if: You mix as an input array and string (for
+example A matrix is given as array, C - as string).  The position
+vector is (0,0,0,0).  One of the probability vectors is shorter than
+the rest.
+
+Summary of the methods I use most frequently (details bellow):
+
+   iupac - return IUPAC compliant consensus as a string
+   score - Returns the score as a real number
+   IC - information content. Returns a real number
+   id - identifier. Returns a string
+   accession - accession number. Returns a string
+   next_pos - return the sequence probably for each letter, IUPAC
+         symbol, IUPAC probability and simple sequence
+   consenus letter for this position. Rewind at the end. Returns a hash.
+   pos - current position get/set. Returns an integer.
+   regexp - construct a regular expression based on IUPAC consensus.
+         For example AGWV will be [Aa][Gg][AaTt][AaCcGg]
+   width - site width
+   get_string - gets the probability vector for a single base as a string.
+   get_array - gets the probability vector for a single base as an array.
+   get_logs_array - gets the log-odds vector for a single base as an array.
+
+New methods, which might be of interest to anyone who wants to store
+PSM in a relational database without creating an entry for each
+position is the ability to compress the PSM vector into a string with
+losing usually less than 1% of the data.  this can be done with:
+
+   my $str=$matrix->get_compressed_freq('A');
+or
+
+   my $str=$matrix->get_compressed_logs('A');
+
+Loading from a database should be done with new, but is not yet implemented.
+However you can still uncompress such string with:
+
+   my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1,1); for PSM
+
+or
+
+   my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1000,2); for log odds
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.   Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - James Thompson
+
+Email tex at biosysadmin.com
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::ProtMatrix;
+use strict;
+
+use base qw(Bio::Root::Root Bio::Matrix::PSM::SiteMatrixI);
+
+=head2 new
+
+ Title    : new
+ Usage    : my $site = new Bio::Matrix::PSM::ProtMatrix( 
+               %probs,
+               %logs,
+               -IC    => $ic,
+               -e_val => $score, 
+               -id    => $mid
+               -model => \%model
+            );
+ Function : Creates a new Bio::Matrix::PSM::ProtMatrix object from memory
+ Throws   : If inconsistent data for all vectors (all 20 amino acids) is
+               provided, if you mix input types (string vs array) or if a
+               position freq is 0.
+ Example  :
+ Returns  : Bio::Matrix::PSM::ProtMatrix object
+ Args     : Hash references to log-odds scores and probabilities for
+            position-specific scoring info, e-value (optional), information
+            content (optional), id (optional), model for background distribution
+            of proteins (optional).
+
+=cut
+
+sub new {
+   my ($class, @args) = @_;
+   my $self = $class->SUPER::new(@args);
+   my $consensus;
+   #Too many things to rearrange, and I am creating simultanuously >500 
+   # such objects routinely, so this becomes performance issue
+   my %input;
+   while( @args ) {
+     (my $key = shift @args) =~ s/-//gi; #deletes all dashes (only dashes)!
+     $input{$key} = shift @args;
+   }
+
+   # get a protein alphabet for processing log-odds scores and probabilities
+   # maybe change this later on to allow for non-standard aa lists? 
+   my @alphabet = qw/A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V/;
+
+   foreach my $aa (@alphabet) {
+      $self->{"log$aa"}  = defined($input{"l$aa"}) ? $input{"l$aa"} 
+         : $self->throw("Error: No log-odds information for $aa!");
+      $self->{"prob$aa"}  = defined($input{"p$aa"}) ? $input{"p$aa"} 
+         : $self->throw("Error: No probability information for $aa!");
+   }
+   
+   $self->{_position} = 0;
+   $self->{IC}        = $input{IC};
+   $self->{e_val}     = $input{e_val};
+   $self->{sites}     = $input{sites};
+   $self->{width}     = $input{width};
+   $self->{accession_number} = $input{accession_number};
+   $self->{_correction} = defined($input{correction}) ?
+      $input{correction} : 1 ; # Correction might be unwanted- supply your own
+   # No id provided, null for the sake of rel db
+   $self->{id} = defined($input{id}) ? $input{id} : 'null'; 
+   $self->{_alphabet} = \@alphabet;
+
+   #Make consensus, throw if any one of the vectors is shorter
+   $self = _calculate_consensus($self,$input{model});
+   return $self;
+}
+
+=head2 _calculate_consensus
+
+ Title    : Returns an array (or array reference if desired) to the alphabet 
+ Usage    :
+ Function : Returns an array (or array reference) containing all of the
+            allowable characters for this matrix.
+ Throws   :
+ Example  :
+ Returns  : Array or arrary reference.
+ Args     :
+
+=cut
+
+sub alphabet {
+   my $self = shift;
+   if ( wantarray ) {
+      return $self->{_alphabet};
+   } else {
+      return @{$self->{_alphabet}};
+   }
+}
+=head2 _calculate_consensus
+
+ Title    : _calculate_consensus
+ Usage    :
+ Function : Calculates the consensus sequence for this matrix. 
+ Throws   :
+ Example  :
+ Returns  :
+ Args     :
+
+=cut
+
+sub _calculate_consensus {
+   my $self   = shift;
+   my $thresh = shift;
+   
+   # verify that all of the array lengths in %probs are the same
+   my @lengths = map { scalar(@$_) } map {$self->{"prob$_"}} @{ $self->{_alphabet} };
+   my $len = shift @lengths;
+   for ( @lengths ) {
+      if ( $_ ne $len ) { $self->throw( "Probability matrix is damaged!\n" ) };
+   }
+
+   # iterate over probs, generate the most likely sequence and put it into 
+   # $self->{seq}. Put the probability of this sequence into $self->{seqp}.
+   for ( my $i = 0; $i < $len; $i++ ) {
+      # get a list of all the probabilities at position $i, ordered by $self->{_alphabet}
+      my @probs = map { ${$self->{"prob$_"}}[$i] } @{ $self->{_alphabet} };
+      # calculate the consensus of @probs, put sequence into seqp and probabilities into seqp
+      (${$self->{seq}}[$i],${$self->{seqp}}[$i]) = $self->_to_cons( @probs, $thresh );
+   }
+
+   return $self;
+}
+
+=head2 next_pos
+
+ Title    : next_pos
+ Usage    :
+ Function : Retrives the next position features: frequencies for all 20 amino
+            acids, log-odds scores for all 20 amino acids at this position,
+            the main (consensus) letter at this position, the probability
+            for the consensus letter to occur at this position and the relative
+            current position as an integer.
+ Throws   :
+ Example  :
+ Returns  : hash (or hash reference) (pA,pR,pN,pD,...,logA,logR,logN,logD,aa,prob,rel)
+            - pN entries represent the probability for amino acid N
+            to be at this position
+            - logN entries represent the log-odds score for having amino acid
+            N at this position
+            - aa is the consensus amino acid
+            - prob is the probability for the consensus amino acid to be at this 
+            position
+            - rel is the relative index of the current position (integer)
+ Args      : none
+
+
+=cut
+
+sub next_pos {
+      my $self = shift;
+         $self->throw("instance method called on class") unless ref $self;
+
+      my $len = @{$self->{seq}};
+      my $pos = $self->{_position};
+
+      # return a PSM if we're still within range
+      if ($pos<$len) {
+
+         my %probs = map { ("p$_", ${$self->{"prob$_"}}[$pos]) } @{$self->{_alphabet}};
+         my %logs  = map { ("l$_", ${$self->{"log$_"}}[$pos]) } @{$self->{_alphabet}};
+         my $base  = ${$self->{seq}}[$pos];
+         my $prob  = ${$self->{seqp}}[$pos];
+
+         $self->{_position}++;
+         my %hash = ( %probs, %logs, base => $base, rel => $pos, prob => $prob );
+         
+         # decide whether to return the hash or a reference to it
+         if ( wantarray ) {
+            return %hash;
+         } else {
+            return \%hash;
+         }
+   } else { # otherwise, reset $self->{_position} and return nothing
+      $self->{_position} = 0; 
+      return;
+   }
+}
+
+
+=head2 curpos
+
+ Title    : curpos
+ Usage    :
+ Function : Gets/sets the current position. 
+ Throws   :
+ Example  :
+ Returns  : Current position (integer).
+ Args     : New position (integer).
+
+=cut
+
+sub curpos {
+      my $self = shift;
+      if (@_) { $self->{_position} = shift; }
+      return $self->{_position};
+}
+
+
+=head2 e_val
+
+ Title    : e_val
+ Usage    :
+ Function : Gets/sets the e-value
+ Throws   :
+ Example  :
+ Returns  : 
+ Args     : real number
+
+=cut
+
+sub e_val {
+      my $self = shift;
+      if (@_) { $self->{e_val} = shift; }
+      return $self->{e_val};
+}
+
+
+=head2 IC
+
+ Title    : IC
+ Usage    :
+ Function : Position-specific information content.
+ Throws   :
+ Example  :
+ Returns  : Information content for current position.
+ Args     : Information content for current position.
+
+=cut
+
+sub IC {
+      my $self = shift;
+      if (@_) { $self->{IC} = shift; }
+      return $self->{IC};
+}
+
+=head2 accession_number
+
+ Title    : accession_number
+ Usage    :
+ Function: accession number, this will be unique id for the ProtMatrix object as
+            well for any other object, inheriting from ProtMatrix.
+ Throws   :
+ Example  :
+ Returns  : New accession number (string)
+ Args     : Accession number (string)
+
+=cut
+
+sub accession_number {
+      my $self = shift;
+      if (@_) { $self->{accession_number} = shift; }
+      return $self->{accession_number};
+}
+
+=head2 consensus
+
+ Title    : consensus
+ Usage    :
+ Function : Returns the consensus sequence for this PSM.
+ Throws   : if supplied with thresold outisde 5..10 range
+ Example  :
+ Returns  : string
+ Args     : (optional) threshold value 5 to 10 (corresponds to 50-100% at each position
+
+=cut
+
+sub consensus {
+   my $self = shift;
+   my $thresh=shift;
+   $self->_calculate_consensus($thresh) if ($thresh); #Change of threshold
+   my $consensus='';
+
+   foreach my $letter (@{$self->{seq}}) {
+       $consensus .= $letter;
+   }
+
+   return $consensus;
+}
+
+sub IUPAC {
+   my $self = shift;
+   return $self->consensus;
+}
+
+
+=head2 get_string
+
+ Title   : get_string
+ Usage   :
+ Function: Returns given probability vector as a string. Useful if you want to
+            store things in a rel database, where arrays are not first choice
+ Throws  : If the argument is outside {A,C,G,T}
+ Example :
+ Returns : string
+ Args    : character {A,C,G,T}
+
+=cut
+
+sub get_string {
+   my $self = shift;
+   my $base = shift;
+   my $string = '';
+
+   my @prob = @{$self->{"prob$base"}};
+   if ( ! @prob ) {
+      $self->throw( "No such base: $base\n");
+   }
+
+   foreach my $prob (@prob) {
+      my $corrected = $prob*10;
+      my $next = sprintf("%.0f",$corrected);
+      $next = 'a' if ($next eq '10');
+      $string .= $next;
+   }
+   return $string;
+}
+
+
+
+=head2 width
+
+ Title    : width
+ Usage    :
+ Function : Returns the length of the site
+ Throws   :
+ Example  :
+ Returns  : number
+ Args     :
+
+=cut
+
+sub width {
+   my $self = shift;
+   my $width = @{$self->{probA}};
+   return $width;
+}
+
+=head2 get_array
+
+ Title    : get_array
+ Usage    :
+ Function : Returns an array with frequencies for a specified amino acid.
+ Throws   :
+ Example  :
+ Returns  : Array representing frequencies for specified amino acid.
+ Args     : Single amino acid (character).
+
+=cut
+
+sub get_array {
+   my $self = shift;
+   my $letter = uc(shift);
+
+   $self->throw ("No such base: $letter!\n") unless grep { /$letter/ } @{$self->{_alphabet}};
+
+   return @{$self->{"prob$letter"}}; 
+}
+
+
+=head2 get_logs_array
+
+ Title    : get_logs_array
+ Usage    :
+ Function : Returns an array with log_odds for a specified base
+ Throws   :
+ Example  :
+ Returns  : Array representing log-odds scores for specified amino acid.
+ Args     : Single amino acid (character).
+
+=cut
+
+sub get_logs_array {
+   my $self = shift;
+   my $letter = uc(shift);
+
+   $self->throw ("No such base: $letter!\n") unless grep { /$letter/ } @{$self->{_alphabet}};
+
+   return @{$self->{"log$letter"}}; 
+}
+
+=head2 id
+
+ Title    : id
+ Usage    :
+ Function : Gets/sets the site id
+ Throws   :
+ Example  :
+ Returns  : string
+ Args     : string
+
+=cut
+
+sub id {
+      my $self = shift;
+      if (@_) { $self->{id} = shift; }
+      return $self->{id};
+}
+
+=head2 regexp
+
+ Title    : regexp
+ Usage    :
+ Function : Returns a case-insensitive regular expression which matches the
+            IUPAC convention.  X's in consensus sequence will match anything.     
+ Throws   :
+ Example  :
+ Returns  : string
+ Args     : Threshold for calculating consensus sequence (number in range 0-100
+            representing a percentage). Threshold defaults to 20.
+
+=cut
+
+sub regexp {
+   my $self = shift;
+   my $threshold = 20;
+   if ( @_ ) { my $threshold = shift };
+
+   my @alphabet = @{$self->{_alphabet}};
+   my $width = $self->width;
+   my (@regexp, $i);
+   for ( $i = 0; $i < $width; $i++ ) {
+      # get an array of the residues at this position with p > $threshold
+      my @letters = map { uc($_).lc($_) } grep { $self->{"prob$_"}->[$i] >= $threshold } @alphabet;
+
+      my $reg;
+      if ( scalar(@letters) == 0 ) {
+         $reg = '\.';
+      } else {
+         $reg = '['.join('', at letters).']';
+      }
+      push @regexp, $reg;
+   }
+
+   if ( wantarray ) { 
+      return @regexp;
+   } else {
+      return join '', @regexp;
+   }
+}
+
+
+=head2 regexp_array
+
+ Title    : regexp_array
+ Usage    :
+ Function : Returns an array of position-specific regular expressions.
+             X's in consensus sequence will match anything.      
+ Throws   :
+ Example  :
+ Returns  : Array of position-specific regular expressions.
+ Args     : Threshold for calculating consensus sequence (number in range 0-100
+            representing a percentage). Threshold defaults to 20.
+ Notes    : Simply calls regexp method in list context.
+
+=cut
+
+sub regexp_array {
+   my $self = shift;
+   
+   return @{ $self->regexp };
+}
+
+
+=head2 _compress_array
+
+ Title    : _compress_array
+ Usage    :
+ Function :  Will compress an array of real signed numbers to a string (ie vector of bytes)
+             -127 to +127 for bi-directional(signed) and 0..255 for unsigned ;
+ Throws   :
+ Example  :  Internal stuff
+ Returns  :  String
+ Args     :  array reference, followed by max value and direction (optional, defaults to 1),
+             direction of 1 is unsigned, anything else is signed. 
+
+=cut
+
+sub _compress_array {
+   my ($array,$lm,$direct)=@_;
+   my $str;
+   return unless(($array) && ($lm));
+   $direct=1 unless ($direct);
+   my $k1= ($direct==1) ? (255/$lm) : (127/$lm);
+   foreach my $c (@{$array}) {
+      $c=$lm if ($c>$lm);
+      $c=-$lm if (($c<-$lm) && ($direct !=1));
+      $c=0 if (($c<0) && ($direct ==1));
+      my $byte=int($k1*$c);
+      $byte=127+$byte if ($direct !=1);#Clumsy, should be really shift the bits
+      my $char=chr($byte);
+      $str.=$char;
+   }
+   return $str;
+}
+
+=head2 _uncompress_string
+
+ Title    : _uncompress_string
+ Usage    :
+ Function :   Will uncompress a string (vector of bytes) to create an array of real
+                  signed numbers (opposite to_compress_array)
+ Throws   :
+ Example  :   Internal stuff
+ Returns  :   string, followed by max value and direction (optional, defaults to 1),
+              direction of 1 is unsigned, anything else is signed.
+ Args     :   array
+
+=cut
+
+sub _uncompress_string {
+   my ($str,$lm,$direct)=@_;
+   my @array;
+   return unless(($str) && ($lm));
+   $direct=1 unless ($direct);
+   my $k1= ($direct==1) ? (255/$lm) : (127/$lm);
+   while (my $c=chop($str)) {
+      my $byte=ord($c);
+      $byte=$byte-127 if ($direct !=1);#Clumsy, should be really shift the bits
+      my $num=$byte/$k1;
+      unshift @array,$num;
+   }
+
+   return @array;
+}
+
+=head2 get_compressed_freq
+
+ Title    : get_compressed_freq
+ Usage    :
+ Function:   A method to provide a compressed frequency vector. It uses one byte to
+             code the frequence for one of the probability vectors for one position.
+             Useful for relational database. Improvment of the previous 0..a coding.
+ Throws   :
+ Example  :   my $strA=$self->get_compressed_freq('A');
+ Returns  :   String
+ Args     :   char 
+
+=cut
+
+sub get_compressed_freq {
+   my $self=shift;
+   my $base=shift;
+   my $string='';
+   my @prob;
+   BASE: {
+      if ($base eq 'A') {
+         @prob = @{$self->{probA}} unless (!defined($self->{probA}));
+         last BASE;
+      }
+         if ($base eq 'G') {
+         @prob = @{$self->{probG}} unless (!defined($self->{probG}));
+         last BASE;
+      }
+         if ($base eq 'C') {
+         @prob = @{$self->{probC}} unless (!defined($self->{probC}));
+         last BASE;
+      }
+         if ($base eq 'T') {
+         @prob = @{$self->{probT}} unless (!defined($self->{probT}));
+         last BASE;
+      }
+      $self->throw ("No such base: $base!\n");
+   }
+   my $str= _compress_array(\@prob,1,1);
+   return $str;
+}
+
+=head2 sequence_match_weight
+
+ Title    : sequence_match_weight
+ Usage    :
+ Function :   This method will calculate the score of a match, based on the PSM
+              if such is associated with the matrix object. Returns undef if no
+              PSM data is available.
+ Throws   :   if the length of the sequence is different from the matrix width
+ Example  :   my $score=$matrix->sequence_match_weight('ACGGATAG');
+ Returns  :   Floating point
+ Args     :   string
+
+=cut
+
+sub sequence_match_weight {
+   my ($self,$seq)=@_;
+   return unless ($self->{logA});
+
+   my $seqlen = length($seq);
+   my $width  = $self->width;
+   $self->throw("Error: Input sequence size ($seqlen) not equal to PSM size ($width)!\n")
+      unless (length($seq) == $self->width);
+
+   my ($score,$i) = (0,0);
+   foreach my $letter ( split //, $seq ) {
+      # add up the score for this position
+      $score += $self->{"log$letter"}->[$i];
+      $i++;
+   }
+   return $score;
+}
+
+
+=head2 _to_IUPAC
+
+ Title   : _to_IUPAC
+ Usage   :
+ Function: Converts a single position to IUPAC compliant symbol and returns its probability.
+            Currently returns the most likely amino acid/probability combination.
+ Throws  :
+ Example :
+ Returns : char, real number representing an amino acid and a probability.
+ Args    : real numbers for all 20 amino acids (ordered by alphabet contained
+            in $self->{_alphabet}, minimum probability threshold.
+
+=cut
+
+sub _to_IUPAC {
+   my ($self, at probs,$thresh) = @_;
+
+   # provide a default threshold of 5, corresponds to 5% threshold for 
+   # inferring that the aa at any position is the true aa
+   $thresh = 5 unless ( defined $thresh );
+
+   my ($IUPAC_aa,$max_prob) = ('X',$thresh);
+   for my $aa ( @{$self->{_alphabet}} ) {
+      my $prob = shift @probs;
+      if ( $prob > $max_prob ) {
+         $IUPAC_aa = $aa;
+         $max_prob = $prob;
+      }
+   }
+   
+   return $IUPAC_aa, $max_prob;
+}
+
+=head2 _to_cons
+
+ Title   : _to_cons
+ Usage   :
+ Function: Converts a single position to simple consensus character and returns
+            its probability. Currently just calls the _to_IUPAC subroutine. 
+ Throws  :
+ Example :
+ Returns : char, real number
+ Args    : real numbers for A,C,G,T (positional)
+
+=cut
+
+sub _to_cons {
+   return _to_IUPAC( @_ );
+}
+
+=head2 get_all_vectors
+
+ Title    : get_all_vectors
+ Usage    :
+ Function :  returns all possible sequence vectors to satisfy the PFM under
+             a given threshold
+ Throws   :  If threshold outside of 0..1 (no sense to do that)
+ Example  :  my @vectors = $self->get_all_vectors(4);
+ Returns  :  Array of strings
+ Args     :  (optional) floating
+
+=cut
+
+#sub get_all_vectors {
+#   my $self = shift;
+#   my $thresh = shift;
+#
+#   $self->throw("Out of range. Threshold should be >0 and 1<.\n") if (($thresh<0) || ($thresh>1));
+#
+#   my @seq = split(//,$self->consensus($thresh*10));
+#   my @perm;
+#   for my $i (0..@{$self->{probA}}) {
+#      push @{$perm[$i]},'A' if ($self->{probA}->[$i]>$thresh);
+#      push @{$perm[$i]},'C' if ($self->{probC}->[$i]>$thresh);
+#      push @{$perm[$i]},'G' if ($self->{probG}->[$i]>$thresh);
+#      push @{$perm[$i]},'T' if ($self->{probT}->[$i]>$thresh);
+#      push @{$perm[$i]},'N' if  ($seq[$i] eq 'N');
+#   }
+#   my $fpos=shift @perm;
+#   my @strings=@$fpos;
+#   foreach my $pos (@perm) {
+#      my @newstr;
+#      foreach my $let (@$pos) {
+#         foreach my $string (@strings) {
+#           my $newstring = $string . $let;
+#           push @newstr,$newstring;
+#         }
+#      }
+#      @strings=@newstr;
+#   }
+#   return @strings;
+#}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtPsm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtPsm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/ProtPsm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+#---------------------------------------------------------
+# $Id: ProtPsm.pm,v 1.8.4.1 2006/10/02 23:10:22 sendu Exp $
+
+#ISA ProtMatrix, HAS InstanceSite
+
+=head1 NAME
+
+Bio::Matrix::PSM::ProtPsm - handle combination of site matricies
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::IO;
+
+  #To get a ProtPsm object from a file use the Psm parser:
+  my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'psiblast', -file=>$file);
+
+  # Now go through all entities in the file with next_psm, which
+  # returns a Psm object see Bio::Matrix::PSM::IO for detailed
+  # documentation (matrix predictions or matrix sequence matches or
+  # both):
+
+  while (my $psm=$psmIO->next_psm) {
+     my %psm_header = $psm->header;
+     my $ic    = $psm_header{IC};
+     my $sites = $psm_header{sites};
+     my $width = $psm_header{width};
+     my $score = $psm_header{e_val};
+     my $IUPAC = $psm->IUPAC;
+     my $instances = $psm->instances;
+     foreach my $instance (@{$instances}) {
+       my $id = $instance->primary_id;
+       #Do something with the id
+     }
+   }
+
+=head1 DESCRIPTION
+
+To handle a combination of site matrices and/or their corresponding sequence
+matches (instances). This object inherits from Bio::Matrix::PSM::ProtMatrix, so
+you can methods from that class. It may hold also an array of
+Bio::Matrix::PSM::InstanceSite object, but you will have to retrieve these
+through Bio::Matrix::PSM::ProtPsm-E<gt>instances method (see below). To some
+extent this is an expanded ProtMatrix object, holding data from analysis that
+also deal with sequence matches of a particular matrix.
+
+
+=head2 DESIGN ISSUES
+
+This does not make too much sense to me I am mixing PSM with PSM sequence
+matches Though they are very closely related, I am not satisfied by the way
+this is implemented here.  Heikki suggested different objects when one has
+something like meme But does this mean we have to write a different objects for
+mast, meme, transfac, theiresias, etc.?  To me the best way is to return
+SiteMatrix object + arrray of InstanceSite objects and then mast will return
+undef for SiteMatrix and transfac will return undef for InstanceSite. Probably
+I cannot see some other design issues that might arise from such approach, but
+it seems more straightforward.  Hilmar does not like this beacause it is an
+exception from the general BioPerl rules. Should I leave this as an option?
+Also the header rightfully belongs the driver object, and could be retrieved as
+hashes.  I do not think it can be done any other way, unless we want to create
+even one more object with very unclear content.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - James Thompson
+
+Email tex at biosysadmin.com
+
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+ProtMatrix, meme, transfac, psiblast, InstanceSite
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::ProtPsm;
+use Bio::Matrix::PSM::InstanceSite;
+use strict;
+
+use base qw(Bio::Matrix::PSM::ProtMatrix Bio::Matrix::PSM::PsmI Bio::Annotation::Collection);
+
+ at Bio::Matrix::PSM::Psm::HEADER = qw(e_val sites IC width);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psm = Bio::Matrix::PSM::ProtPsm->new(
+              -pS => [ '0', '33', '0', '16', '1', '12', '11', '25' ],
+              -pF => [ '0', '0', '2', '0', '3', '0', '0', '0' ],
+              -pT => [ '0', '8', '7', '10', '1', '2', '7', '8' ],
+              -pN => [ '0', '0', '2', '13', '0', '36', '1', '4' ],
+              -pK => [ '0', '5', '0', '13', '1', '15', '0', '2' ],
+              -pY => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+              -pE => [ '0', '41', '1', '12', '0', '0', '0', '15' ],
+              -pV => [ '0', '3', '9', '0', '2', '0', '3', '1' ],
+              -pQ => [ '0', '0', '0', '15', '0', '4', '0', '3' ],
+              -pM => [ '100', '0', '66', '0', '2', '0', '0', '0' ],
+              -pC => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+              -pL => [ '0', '0', '8', '0', '25', '0', '4', '0' ],
+              -pA => [ '0', '10', '1', '9', '2', '0', '22', '16' ],
+              -pW => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+              -pP => [ '0', '0', '0', '0', '3', '1', '45', '0' ],
+              -pH => [ '0', '0', '0', '0', '0', '0', '1', '0' ],
+              -pD => [ '0', '0', '1', '7', '2', '2', '0', '22' ],
+              -pR => [ '0', '0', '0', '3', '0', '27', '0', '0' ],
+              -pI => [ '0', '0', '3', '0', '59', '1', '2', '3' ],
+              -pG => [ '0', '0', '0', '1', '0', '0', '4', '1' ],
+              -IC => $ic,
+              -sites => $istes,
+              -width => $width,
+              -e_val => $e_val, 
+              -instances => $instances, 
+           }
+
+ Function: Creates a new Bio::Matrix::PSM::ProtPsm object
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::Psm object
+ Args    :  hash
+
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+    my $self = $class->SUPER::new(@args);
+    $self->{'_annotation'} = {};  #Init from Annotation::Collection
+    $self->_typemap(Bio::Annotation::TypeManager->new()); #same
+    ($self->{instances})=$self->_rearrange(['INSTANCES'], @args);
+    return $self;
+}
+
+
+=head2 instances
+
+ Title   : instances
+ Usage   :   my @instances=@{$psm->instances};
+ Function: Gets/sets the instances (Bio::Matrix::PSM::InstanceSite objects)
+        associated with the Psm object
+ Throws  :
+ Example :
+ Returns :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+ Args    :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+
+=cut
+
+sub instances {
+    my $self = shift;
+    my $prev = $self->{instances};
+    if (@_) { $self->{instances} = shift; }
+    return $prev;
+}
+
+
+=head2 header
+
+ Title   : header
+ Usage   :  my %header=$psm->header;
+        my $ic=$psm->header('IC');
+ Function: Gets the general information, common for most files,
+       dealing with PSM such as information content (IC), score
+       (e-value, etc.), number of sites (sites) and width. This
+       list may expand. The current list should be in
+       @Bio::Matrix::PSM::Psm::HEADER. Returns an epty list if an
+       argument is supplied that is not in
+       @Bio::Matrix::PSM::meme::HEADER.
+ Throws  :
+ Example :
+ Returns :  hash or string
+ Args    :  string (IC, e_val...)
+
+=cut
+
+sub header {
+    my $self = shift;
+    return  if ($self->{end});
+    my %header;
+    if (@_) {my $key=shift; return $self->{$key}; }
+    foreach my $key (@Bio::Matrix::PSM::ProtPsm::HEADER) {
+	$header{$key}=$self->{$key};
+    }
+    return %header;
+}
+
+
+=head2 matrix
+
+ Title   :  matrix
+ Usage   :  my $matrix = $psm->matrix;
+ Function:  Gets/sets the SiteMatrix related information
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::SiteMatrix objects
+ Args    :  Bio::Matrix::PSM::SiteMatrix objects
+
+=cut
+
+
+sub matrix {
+   my $self = shift;
+
+   if (@_) {
+      my $matrix = shift;
+      my @alphabet = $self->alphabet;
+
+      foreach my $char (@alphabet) {
+         $self->{"log$char"}  = $matrix->{"log$char"};
+         $self->{"prob$char"} = $matrix->{"prob$char"};
+      }
+      $self->{IC}    = $matrix->IC;
+      $self->{e_val} = $matrix->e_val;
+      $self->{id}    = $matrix->id;
+    }
+
+    return $self;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/Psm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/Psm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/Psm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+#---------------------------------------------------------
+# $Id: Psm.pm,v 1.14.4.1 2006/10/02 23:10:22 sendu Exp $
+
+#ISA SiteMatrix, HAS InstanceSite
+
+=head1 NAME
+
+Bio::Matrix::PSM::Psm - handle combination of site matricies
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::IO;
+
+  #To get a Psm object from a file use the Psm parser:
+  my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', -file=>$file);
+
+  # Now go through all entities in the file with next_psm, which
+  # returns a Psm object see Bio::Matrix::PSM::IO for detailed
+  # documentation (matrix predictions or matrix sequence matches or
+  # both):
+
+  while (my $psm=$psmIO->next_psm) {
+    my %psm_header=$psm->header;
+    my $ic=$psm_header{IC};
+    my $sites=$psm_header{sites};
+    my $width=$psm_header{width};
+    my $score=$psm_header{e_val};
+    my $IUPAC=$psm->IUPAC;
+    my $instances=$psm->instances;
+    foreach my $instance (@{$instances}) {
+      my $id=$instance->primary_id;
+      #Do something with the id
+    }
+  }
+
+ #or create from memmory:
+  my $psm= new Bio::Matrix::PSM::Psm( -pA=>\@pA,-pC=>\@pC,-pG=>\@pG,-pT=>\@pT,
+       -id=>$id,
+       -instances=>$instances, -e_val=>$e_val,
+       -IC=>$ic, -width=>$width, -sites=>$sites)
+
+  # where pA through pG are the respective frequencies of the matrix (see also
+  # Bio::Matrix::PSM::SiteMatrix), and everything else is self-explenatory, 
+  # except for -instances (reference to an array of 
+  #  Bio::Matrix::PSM::InstanceSite objects) which is documented bellow.
+
+=head1 DESCRIPTION
+
+To handle a combination of site matrices and/or their corresponding
+sequence matches (instances). This object inherits from
+Bio::Matrix::PSM::SiteMatrix, so you can use the respective
+methods. It may hold also an array of Bio::Matrix::PSM::InstanceSite
+object, but you will have to retrieve these through
+Bio::Matrix::PSM::Psm-E<gt>instances method (see below). To some extent
+this is an expanded SiteMatrix object, holding data from analysis that
+also deal with sequence matches of a particular matrix.
+
+
+=head2 DESIGN ISSUES
+
+This does not make too much sense to me I am mixing PSM with PSM
+sequence matches Though they are very closely related, I am not
+satisfied by the way this is implemented here.  Heikki suggested
+different objects when one has something like meme But does this mean
+we have to write a different objects for mast, meme, transfac,
+theiresias, etc.?  To me the best way is to return SiteMatrix object +
+arrray of InstanceSite objects and then mast will return undef for
+SiteMatrix and transfac will return undef for InstanceSite. Probably I
+cannot see some other design issues that might arise from such
+approach, but it seems more straightforward.  Hilmar does not like
+this beacause it is an exception from the general BioPerl rules Should
+I leave this as an option?  Also the header rightfully belongs the
+driver object, and could be retrieved as hashes.  I do not think it
+can be done any other way, unless we want to create even one more
+object with very unclear content.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+SiteMatrix, meme, transfac, InstanceSite
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::Psm;
+use Bio::Matrix::PSM::InstanceSite;
+use strict;
+
+use base qw(Bio::Matrix::PSM::SiteMatrix Bio::Matrix::PSM::PsmI Bio::Annotation::Collection);
+
+ at Bio::Matrix::PSM::Psm::HEADER = qw(e_val sites IC width);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psm= new Bio::Matrix::PSM::Psm( -pA=>\@pA,-pC=>\@pC,
+					       -pG=>\@pG,-pT=>\@pT,-id=>$id,
+					       -instances=>$instances, 
+					       -e_val=>$e_val,
+					       -IC=>$ic, -width=>$width, 
+					       -sites=>$sites)
+ Function: Creates a new Bio::Matrix::PSM::Psm object
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::Psm object
+ Args    :  hash
+
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+    my $self = $class->SUPER::new(@args);
+    $self->{'_annotation'} = {};  #Init from Annotation::Collection
+    $self->_typemap(Bio::Annotation::TypeManager->new()); #same
+    ($self->{instances})=$self->_rearrange(['INSTANCES'], @args);
+    return $self;
+}
+
+
+=head2 instances
+
+ Title   : instances
+ Usage   :   my @instances=@{$psm->instances};
+ Function: Gets/sets the instances (Bio::Matrix::PSM::InstanceSite objects)
+            associated with the Psm object
+ Throws  :
+ Example :
+ Returns :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+ Args    :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+
+=cut
+
+sub instances {
+    my $self = shift;
+    my $prev = $self->{instances};
+    if (@_) { $self->{instances} = shift; }
+    return $prev;
+}
+
+
+=head2 header
+
+ Title   : header
+ Usage   :  my %header=$psm->header;
+            my $ic=$psm->header('IC');
+ Function: Gets the general information, common for most files,
+           dealing with PSM such as information content (IC), score
+           (e-value, etc.), number of sites (sites) and width. This
+           list may expand. The current list should be in
+           @Bio::Matrix::PSM::Psm::HEADER. Returns undef if an
+           argument is supplied that is not in
+           @Bio::Matrix::PSM::meme::HEADER.
+ Throws  :
+ Example :
+ Returns :  hash or string
+ Args    :  string (IC, e_val...)
+
+=cut
+
+sub header {
+    my $self = shift;
+    return  if ($self->{end});
+    my %header;
+    if (@_) {my $key=shift; return $self->{$key}; }
+    foreach my $key (@Bio::Matrix::PSM::Psm::HEADER) {
+	$header{$key}=$self->{$key};
+    }
+    return %header;
+}
+
+
+=head2 matrix
+
+ Title   :  matrix
+ Usage   :  my $matrix=$psm->matrix;
+ Function:  Gets/sets the SiteMatrix related information
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::SiteMatrix objects
+ Args    :  Bio::Matrix::PSM::SiteMatrix objects
+
+=cut
+
+
+sub matrix {
+    my $self = shift;
+    my $prev = new Bio::Matrix::PSM::SiteMatrix(-pA=>$self->{probA}, 
+						-pC=>$self->{probC},
+						-pG=>$self->{probG},
+						-pT=>$self->{probT},
+						-lA=>$self->{logA},
+						-lC=>$self->{logC},
+						-lG=>$self->{logG},
+						-lT=>$self->{logT},
+						-IC=>$self->{IC},
+						-e_val=>$self->{e_val},
+						-id=>$self->{id});
+    if (@_) {
+	my $matrix=shift;
+	$self->{IC} = $matrix->IC;
+	$self->{probA}=$matrix->{probA};
+	$self->{probC}=$matrix->{probC};
+	$self->{probG}=$matrix->{probG};
+	$self->{probT}=$matrix->{probT};
+	$self->{e_val}=$matrix->e_val;
+	$self->{id}=$matrix->id;
+    }
+    return $prev;
+}
+ 
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeader.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeader.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeader.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,300 @@
+# $Id: PsmHeader.pm,v 1.11.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::PsmHeader - PSM mast parser implementation
+
+=head1 SYNOPSIS
+
+  # See Bio::Matrix::PSM::IO for detailed documentation on how to use
+  # PSM parsers
+
+=head1 DESCRIPTION
+
+Parser for mast. This driver unlike meme or transfac for example is
+dedicated more to PSM sequence matches
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::PsmHeader;
+
+use Bio::Matrix::PSM::InstanceSite;
+
+use strict;
+use base qw(Bio::Root::Root Bio::Matrix::PSM::PsmHeaderI);
+
+#These define what structures within the
+ at Bio::Matrix::PSM::PsmHeader::MASTHEADER=qw(html version release seq hid 
+					    length instances unstructured);
+ at Bio::Matrix::PSM::PsmHeader::MEMEHEADER=qw(html version release hid weight length unstructured);
+ at Bio::Matrix::PSM::PsmHeader::TRANSFACHEADER=qw(unstructured version release);
+ at Bio::Matrix::PSM::PsmHeader::PSIBLASTHEADER=qw(seq width ic);
+ at Bio::Matrix::PSM::PsmHeader::ALLHEADER=qw(header release type version html 
+					   release weight length id 
+					   seq instances unstructured);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $header= new Bio::Matrix::PSM::PsmHeader(-seq=>\%seq, 
+						       -mid=>\%mid, 
+						       -width=>\%width,
+                                                       -instances=>\%instances,
+						       -header=>\@header,
+						       -type=>'mast');
+ Function: Creates a new Bio::Matrix::PSM::PsmHeader object
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::PsmHeader object
+ Args    :  hash
+
+
+=cut
+
+sub new {
+    my ($class, at args)=@_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+#parse version/release info here from the unstructured array
+sub _initialize {
+    my $self = shift;
+    my $type=ref($self);
+    $type=~s/\w+:://g;
+    $self->{_type} = $type;
+    my $dat=join(" ",grep(/version|release/i,@{$self->{unstructured}}));
+    if ($dat && ($dat=~/version\b/i)) {
+	$self->{version}=substr($dat,$+[0]+1);
+	$self->{version}=~s/\s.+[^\d\.\:\/]//g;
+	$self->{version}=~s/^\D//;
+    }
+    if ($dat && ($dat=~/release\b/i)) {
+	my $rel=substr($dat,$+[0]+1);
+	$rel=~s/[^\d\.\:\/\-]//g;
+	$rel=~s/^\D//;
+	if ($rel=~/\d\d:\d\d:\d\d/) { #Reformat if time is available too
+	    my $time=substr($rel,$-[0]+1);
+	    my $dat= substr($rel,0,$-[0]);
+	    $self->{release}="$dat $time";
+	}
+	else {  $self->{release}=$rel; }
+    }
+    return $self;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : my %seq= $header->seq();
+ Function: Returns the sequence data as a hash, indexed by a sequence ID (motif id or accession number)
+           In case the input data is a motif it would return the consenus seq for each of them (mast).
+ Throws  :
+ Example :
+ Returns :   hash
+ Args    :
+
+
+=cut
+
+sub seq {
+    my $self = shift;
+    return () unless ($self->_check('seq'));
+    return %{$self->{seq}};
+}
+
+=head2 hid
+
+ Title   : hid
+ Usage   : my @hid= $header->hid();
+ Function: Returns array with the motif ids
+ Throws  :
+ Example :
+ Returns :   array
+ Args    :
+
+
+=cut
+
+sub hid {
+    my $self = shift;
+    return unless ($self->_check('hid'));
+    my @header=@{$self->{hid}};
+    return @header;
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my %length= $header->length();
+ Function: Returns the length of the input sequence or motifs as a hash, indexed
+           by a sequence ID (motif id or accession number)
+ Throws  :
+ Example :
+ Returns :  hash
+ Args    :
+
+
+=cut
+
+sub length {
+     my $self = shift;
+     return unless ($self->_check('length'));
+    return $self->{length};
+}
+
+=head2 instances
+
+ Title   : instances
+ Usage   : my %instances= $header->instances();
+ Function: Returns the info about the input data, contained in the header
+ Throws  :
+ Example :
+ Returns : hash
+ Args    :
+
+
+=cut
+
+sub instances {
+      my $self = shift;
+      return unless ($self->_check('instances'));
+      return %{$self->{instances}};
+}
+
+=head2 weight
+
+ Title   : weight
+ Usage   : my %weights= $header->weight();
+ Function: Returns the weights of the input sequence as a hash, indexed
+           by a sequence ID
+ Throws  :
+ Example :
+ Returns :  hash
+ Args    :
+
+
+=cut
+
+sub weight {
+    my $self = shift;
+    return () unless ($self->_check('weight'));
+    return %{$self->{weight}};
+}
+
+
+=head2 unstuctured
+
+ Title   : unstuctured
+ Usage   : my @unstructured= $header->unstuctured();
+ Function: Returns the unstructured data in the header as an array, one line per
+           array element, all control symbols are removed with \W
+ Throws  :
+ Example :
+ Returns :  array
+ Args    :
+
+
+=cut
+
+sub unstructured {
+    my $self = shift;
+    return @{$self->{unstructured}};
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : my $version= $header->version;
+ Function: Returns the version of the file being parsed if such exists
+ Throws  :
+ Example :
+ Returns :  string
+ Args    :
+
+
+=cut
+
+sub version {
+    my $self = shift;
+    return $self->{version};
+}
+
+=head2 release
+
+ Title   : release
+ Usage   : my $release= $header->release;
+ Function: Returns the release of the file being parsed if such exists
+ Throws  :
+ Example :
+ Returns :  string
+ Args    :
+
+
+=cut
+
+sub release {
+    my $self = shift;
+    return $self->{release};
+}
+
+=head2 _check
+
+ Title   : _check
+ Usage   : if ($self->_check('weights') { #do something} else {return 0;}
+ Function: Checks if the method called is aplicable to the file format
+ Throws  :
+ Example :
+ Returns :  boolean
+ Args    :  string
+
+
+=cut
+
+sub _check {
+    my ($self,$method) = @_;
+    my $type= $self->{'_type'};
+    if ($type eq 'meme') { 
+	return 0 unless (grep(/$method/,
+				  @Bio::Matrix::PSM::PsmHeader::MEMEHEADER)); 
+    } elsif ($type eq 'mast') { 
+	return 0 unless (grep(/$method/,
+				  @Bio::Matrix::PSM::PsmHeader::MASTHEADER));
+    } elsif ($type eq 'transfac') { 
+	return 0 unless (grep(/$method/,
+				  @Bio::Matrix::PSM::PsmHeader::TRANSFACHEADER)); 
+    } elsif ($type eq 'psiblast') { 
+	return 0 unless (grep(/$method/,
+				  @Bio::Matrix::PSM::PsmHeader::PSIBLASTHEADER)); 
+    }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeaderI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeaderI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmHeaderI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,278 @@
+#---------------------------------------------------------
+# $Id: PsmHeaderI.pm,v 1.10.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::PsmHeaderI
+
+=head1 SYNOPSIS
+
+ use Bio::Matrix::PSM::IO;
+ #Obtain an Bio::Matrix::PSM::IO object:
+ my $psmIO= new Bio::Matrix::PSM::IO(-file=>$file, -format=>'mast');
+
+ #Get some general data about the file you are parsing:
+ my $release=$psmIO->release;
+ my $version=$psmIO->version;
+
+ print "This analysis was performed using MAST version $version, release $release\n";
+
+ #Now let's see what are the consensus sequences of the motifs fed as an input:
+ my %seq=$psmIO->seq;
+
+ #let's cycle through all consensus sequences now:
+
+ foreach my $id ($psmIO->hid) {
+   print "Motif $id is \t",$seq{$id},"\n";
+ }
+
+  #Finally look at the stuff we do not parse:
+  my @inputfile=grep(/datafile/i,$psmIO->unstructured);
+
+=head1 DESCRIPTION
+
+Generally you should not use this object directly, you can access the
+information through a PSM driver (See Bio::Matrix::PSM::IO). It is
+handling the header data from a PSM file which may be very
+different. This means that some of the methods will return undef
+naturally, because this information is not present in the file which
+is parsed. Some important data might be left over in the unstructured
+part, and you might have to parse it yourself. I will try to
+'structure' this header more in the near future.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::PsmHeaderI;
+use Bio::Matrix::PSM::InstanceSite;
+use Bio::Matrix::PSM::Psm;
+use Bio::Matrix::PSM::IO;
+use strict;
+use base qw(Bio::Matrix::PSM::PsmI);
+
+#Accessor methods, based on the driver
+ at Bio::Matrix::PSM::PsmHeader::MASTHEADER=qw(html version release 
+					    seq hid length instances 
+					    unstructured);
+ at Bio::Matrix::PSM::PsmHeader::MEMEHEADER=qw(html version release hid 
+					    weight length unstructured);
+ at Bio::Matrix::PSM::PsmHeader::TRANSFACHEADER=qw(unstructured version release);
+ at Bio::Matrix::PSM::PsmHeader::ALLHEADER=qw(header release type version html 
+					   release weight length hid 
+					   seq instances unstructured);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $header= new Bio::Matrix::PSM::PsmHeader
+            ( -seq=>\%seq, -mid=>\%mid, -width=>\%width,
+              -instances=>\%instances, -header=>\@header, -type=>'mast');
+ Function: Creates a new Bio::Matrix::PSM::PsmHeader object
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::PsmHeaderI object
+ Args    :  hash
+
+
+=cut
+
+=head2 seq
+
+ Title   : seq
+ Usage   : my %seq= $header->seq();
+ Function: Returns the sequence data as a hash, indexed by a 
+           sequence ID (motif id or accession number)
+           In case the input data is a motif it would return the 
+           consenus seq for each of them (mast).
+ Throws  :
+ Example :
+ Returns :  hash
+ Args    :
+
+
+=cut
+
+sub seq {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 hid
+
+ Title   : hid
+ Usage   : my @ids= $header->hid();
+ Function: Returns array with the motif/instance ids
+ Throws  :
+ Example :
+ Returns :  array
+ Args    :
+
+
+=cut
+
+sub hid {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my %length= $header->length();
+ Function: Returns the length of the input sequence or motifs as a hash, indexed
+           by a sequence ID (motif id or accession number)
+ Throws  :
+ Example :
+ Returns :  hash
+ Args    :
+
+
+=cut
+
+sub length {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 instances
+
+ Title   : instances
+ Usage   : my %instances= $header->length();
+ Function: Returns the instance, used  as a hash, indexed
+           by a sequence ID (motif id or accession number)
+ Throws  :
+ Example :
+ Returns :  hash of Bio::Matrix::PSM::InstanceSite objects
+ Args    :
+
+
+=cut
+
+sub instances {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 weights
+
+ Title   : weights
+ Usage   : my %weights= $header->weights();
+ Function: Returns the weights of the input sequence as a hash, indexed
+           by a sequence ID
+ Throws  :
+ Example :
+ Returns :  hash
+ Args    :
+
+
+=cut
+
+sub weights {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 unstuctured
+
+ Title   : unstuctured
+ Usage   : my @unstructured= $header->unstuctured();
+ Function: Returns the unstructured data in the header as an array, one line per
+           array element, all control symbols are removed with \W
+ Throws  :
+ Example :
+ Returns :   array
+ Args    :
+
+
+=cut
+
+sub unstructured {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : my $version= $header->version;
+ Function: Returns the version of the file being parsed if such exists
+ Throws  :
+ Example :
+ Returns :  string
+ Args    :
+
+
+=cut
+
+sub version {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 revision
+
+ Title   : revision
+ Usage   : my $revision= $header->revision;
+ Function: Returns the revision of the file being parsed if such exists
+ Throws  :
+ Example :
+ Returns :  string
+ Args    :
+
+
+=cut
+
+sub revision {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _check
+
+ Title   : _check
+ Usage   : if ($self->_check('weights') { #do something} else {return 0;}
+ Function: Checks if the method called is aplicable to the file format
+ Throws  :
+ Example :
+ Returns :  boolean
+ Args    :  string
+
+
+=cut
+
+sub _check {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/PsmI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,208 @@
+# $Id: PsmI.pm,v 1.8.4.1 2006/10/02 23:10:22 sendu Exp $
+#---------------------------------------------------------
+#ISA SiteMatrix, HAS InstanceSite
+
+=head1 NAME
+
+Bio::Matrix::PSM::PsmI - abstract interface to handler of site matricies
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::IO;
+
+  # To get a Psm object from a file use the Psm parser:
+  my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', -file=>$file);
+
+  # Now go through all entities in the file with next_psm, which
+  # returns a Psm object see Bio::Matrix::PSM::IO for detailed
+  # documentation (matrix predictions or matrix sequence matches or
+  # both):
+
+  while (my $psm=$psmIO->next_psm) {
+   my %psm_header=$psm->header;
+   my $ic=$psm_header{IC};
+   my $sites=$psm_header{sites};
+   my $width=$psm_header{width};
+   my $score=$psm_header{e_val};
+   my $IUPAC=$psm->IUPAC;
+   my $instances=$psm->instances;
+   foreach my $instance (@{$instances}) {
+     my $id=$instance->primary_id;
+     #Do something with the id
+    }
+   }
+
+  # or create from memmory:
+  my $psm= new Bio::Matrix::PSM::Psm( -pA=>\@pA,-pC=>\@pC,-pG=>\@pG,-pT=>\@pT,
+                                      -id=>$id,
+                                      -instances=>$instances, -e_val=>$e_val,
+                                      -IC=>$ic, -width=>$width, -sites=>$sites)
+
+  # where pA through pG are the respective frequencies of the matrix (see also
+  # Bio::Matrix::PSM::SiteMatrix), and everything else is self-explenatory, 
+  # except for
+  #-instances (reference to an array of Bio::Matrix::PSM::InstanceSite objects)
+  # which is documented bellow.
+
+=head1 DESCRIPTION
+
+Supposed to handle a combination of site matrices and/or their
+corresponding sequence matches (instances). This object inherits from
+Bio::Matrix::PSM::SiteMatrix, so you can use the respective
+methods. It may hold also an array of Bio::Matrix::PSM::InstanceSite
+object, but you will have to retrieve these through
+Bio::Matrix::PSM::Psm-E<gt>instances method (see below). To some extent
+this is an expanded SiteMatrix object, holding data from analysis that
+also deal with sequence matches of a particular matrix.
+
+=head2 DESIGN ISSUES
+
+This design is a bit of a compromise, so it might be a temporary
+solution I am mixing PSM with PSM sequence matches Though they are
+very closely related, I am not satisfied by the way this is
+implemented here.  Heikki suggested different objects when one has
+something like meme But does this mean we have to write a different
+objects for mast, meme, transfac, theiresias, etc.?  To me the best
+way is to return SiteMatrix object + arrray of InstanceSite objects
+and then mast will return undef for SiteMatrix and transfac will
+return undef for InstanceSite. Probably I cannot see some other design
+issues that might arise from such approach, but it seems more
+straightforward.  Hilmar does not like this beacause it is an
+exception from the general BioPerl rules Should I leave this as an
+option?  Also the header rightfully belongs the driver object, and
+could be retrieved as hashes.  I do not think it can be done any other
+way, unless we want to create even one more object with very unclear
+content.
+
+=head1 SEE ALSO
+
+L<Bio::Matrix::PSM::SiteMatrix>, L<Bio::Matrix::PSM::IO::meme>, 
+L<Bio::Matrix::PSM::IO::transfac>, L<Bio::Matrix::PSM::InstanceSite>
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+package Bio::Matrix::PSM::PsmI;
+use Bio::Matrix::PSM::SiteMatrix;
+use Bio::Matrix::PSM::InstanceSite;
+use strict;
+
+use base qw(Bio::Matrix::PSM::SiteMatrixI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $psm= new Bio::Matrix::PSM::Psm( -pA=>\@pA,-pC=>\@pC,-pG=>\@pG,
+					       -pT=>\@pT,-id=>$id,
+					       -instances=>$instances, 
+					       -e_val=>$e_val,
+					       -IC=>$ic, -width=>$width, 
+					       -sites=>$sites)
+ Function: Creates a new Bio::Matrix::PSM::Psm object
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::Psm object
+ Args    :  hash
+
+
+=cut
+
+sub new {
+     my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 instances
+
+ Title   : instances
+ Usage   :   my @instances=@{$psm->instances};
+ Function: Gets/sets the instances (Bio::Matrix::PSM::InstanceSite objects)
+            associated with the Psm object
+ Throws  :
+ Example :
+ Returns :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+ Args    :  array reference (Bio::Matrix::PSM::InstanceSite objects)
+
+=cut
+
+sub instances {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 matrix
+
+ Title   :  matrix
+ Usage   :  my $matrix=$psm->matrix;
+ Function:  Gets/sets the SiteMatrix related information
+ Throws  :
+ Example :
+ Returns :  Bio::Matrix::PSM::SiteMatrix objects
+ Args    :  Bio::Matrix::PSM::SiteMatrix objects
+
+=cut
+
+sub matrix {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 header
+
+ Title   : header
+ Usage   :  my %header=$psm->header;
+            my $ic=$psm->header('IC');
+ Function: Gets the general information, common for most files, dealing
+           with PSM such as information content (IC), score (e-value,
+           etc.), number of sites (sites) and width. This list may
+           expand. The current list should be in
+           @Bio::Matrix::PSM::Psm::HEADER. Returns undef if an argument
+           is supplied that is not in @Bio::Matrix::PSM::meme::HEADER.
+ Throws  :
+ Example :
+ Returns :  hash or string
+ Args    :  string (IC, e_val...)
+
+=cut
+
+sub header {
+    my $self = shift;
+    $self->throw_not_implemented();
+ }
+
+ 
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrix.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrix.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrix.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1006 @@
+# $Id: SiteMatrix.pm,v 1.31.4.3 2006/10/02 23:10:22 sendu Exp $
+#---------------------------------------------------------
+
+=head1 NAME
+
+Bio::Matrix::PSM::SiteMatrix - SiteMatrixI implementation, holds a
+position scoring matrix (or position weight matrix) and log-odds
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::PSM::SiteMatrix;
+  # Create from memory by supplying probability matrix hash
+  # both as strings or arrays
+  # where the frequencies  $a,$c,$g and $t are supplied either as
+  # arrayref or string. Accordingly, lA, lC, lG and lT are the log
+  # odds (only as arrays, no checks done right now)
+  my ($a,$c,$g,$t,$score,$ic, $mid)=@_; 
+  #or
+  my ($a,$c,$g,$t,$score,$ic,$mid)=('05a011','110550','400001',
+                                    '100104',0.001,19.2,'CRE1');
+  #Where a stands for all (this frequency=1), see explanation bellow
+  my %param=(-pA=>$a,-pC=>$c,-pG=>$g,-pT=>$t,
+             -lA=>$la, -lC=>$lc,-lG=>$lg,-lT=>$l,
+             -IC=>$ic,-e_val=>$score, -id=>$mid);
+  my $site=new Bio::Matrix::PSM::SiteMatrix(%param);
+  #Or get it from a file:
+  use Bio::Matrix::PSM::IO;
+  my $psmIO= new Bio::Matrix::PSM::IO(-file=>$file, -format=>'transfac');
+  while (my $psm=$psmIO->next_psm) {
+    #Now we have a Bio::Matrix::PSM::Psm object, 
+    # see Bio::Matrix::PSM::PsmI for details
+    #This is a Bio::Matrix::PSM::SiteMatrix object now
+    my $matrix=$psm->matrix;  
+  }
+
+  # Get a simple consensus, where alphabet is {A,C,G,T,N}, 
+  # choosing the character that both satisfies a supplied or default threshold
+  # frequency and is the most frequenct character at each position, or N.
+  # So for the position with A, C, G, T frequencies of 0.5, 0.25, 0.10, 0.15,
+  # the simple consensus character will be 'A', whilst for 0.5, 0.5, 0, 0 it
+  # would be 'N'.
+  my $consensus=$site->consensus;
+
+  # Get the IUPAC ambiguity code representation of the data in the matrix.
+  # Because the frequencies may have been pseudo-count corrected, insignificant
+  # frequences (below 0.05 by default) are ignored. So a position with
+  # A, C, G, T frequencies of 0.5, 0.5, 0.01, 0.01 will get the IUPAC code 'M',
+  # while 0.97, 0.01, 0.01, 0.01 will get the code 'A' and
+  # 0.25, 0.25, 0.25, 0.25 would get 'N'.
+  my $iupac=$site->IUPAC;
+
+  # Getting/using regular expression (a representation of the IUPAC string)
+  my $regexp=$site->regexp;
+  my $count=grep($regexp,$seq);
+  my $count=($seq=~ s/$regexp/$1/eg);
+  print "Motif $mid is present $count times in this sequence\n";
+
+=head1 DESCRIPTION
+
+SiteMatrix is designed to provide some basic methods when working with position
+scoring (weight) matrices, such as transcription factor binding sites for
+example. A DNA PSM consists of four vectors with frequencies {A,C,G,T}. This is
+the minimum information you should provide to construct a PSM object. The
+vectors can be provided as strings with frequenciesx10 rounded to an int, going
+from {0..a} and 'a' represents the maximum (10). This is like MEME's compressed
+representation of a matrix and it is quite useful when working with relational
+DB. If arrays are provided as an input (references to arrays actually) they can
+be any number, real or integer (frequency or count).
+
+When creating the object you can ask the constructor to make a simple pseudo
+count correction by adding a number (typically 1) to all positions (with the
+-correction option). After adding the number the frequencies will be
+calculated. Only use correction when you supply counts, not frequencies.
+
+Throws an exception if: You mix as an input array and string (for example A
+matrix is given as array, C - as string). The position vector is (0,0,0,0). One
+of the probability vectors is shorter than the rest.
+
+Summary of the methods I use most frequently (details bellow):
+
+  iupac - return IUPAC compliant consensus as a string
+  score - Returns the score as a real number
+  IC - information content. Returns a real number
+  id - identifier. Returns a string
+  accession - accession number. Returns a string
+  next_pos - return the sequence probably for each letter, IUPAC
+      symbol, IUPAC probability and simple sequence
+  consenus letter for this position. Rewind at the end. Returns a hash.
+  pos - current position get/set. Returns an integer.
+  regexp - construct a regular expression based on IUPAC consensus.
+      For example AGWV will be [Aa][Gg][AaTt][AaCcGg]
+  width - site width
+  get_string - gets the probability vector for a single base as a string.
+  get_array - gets the probability vector for a single base as an array.
+  get_logs_array - gets the log-odds vector for a single base as an array.
+
+New methods, which might be of interest to anyone who wants to store
+PSM in a relational database without creating an entry for each
+position is the ability to compress the PSM vector into a string with
+losing usually less than 1% of the data.  this can be done with:
+
+  my $str=$matrix->get_compressed_freq('A');
+or
+  my $str=$matrix->get_compressed_logs('A');
+
+Loading from a database should be done with new, but is not yest implemented.
+However you can still uncompress such string with:
+
+  my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1,1); for PSM
+or
+  my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1000,2); for log odds
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+package Bio::Matrix::PSM::SiteMatrix;
+use strict;
+
+use base qw(Bio::Root::Root Bio::Matrix::PSM::SiteMatrixI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $site=new Bio::Matrix::PSM::SiteMatrix(-pA=>$a,-pC=>$c,
+						     -pG=>$g,-pT=>$t,
+						     -IC=>$ic,
+						     -e_val=>$score, 
+						     -id=>$mid);
+ Function: Creates a new Bio::Matrix::PSM::SiteMatrix object from memory
+ Throws :  If inconsistent data for all vectors (A,C,G and T) is
+           provided, if you mix input types (string vs array) or if a
+           position freq is 0.
+ Returns :  Bio::Matrix::PSM::SiteMatrix object
+ Args    :  -pA    => vector with the frequencies or counts of A
+            -pC    => vector for C
+            -pG    => vector for G
+            -pt    => vector for T
+            -lA    => vector for the log of A
+            -lC    => vector for the log of C
+            -lG    => vector for the log of G
+            -lT    => vector for the log of T
+            -IC    => real number, the information content of this matrix
+            -e_val => real number, the expect value
+            -id    => string, an identifier
+            -width => int, width of the matrix in nucleotides
+            -sites => int, the number of sites that went into this matrix
+            -model => hash ref, background frequencies for A, C, G and T
+            -correction => number, the number to add to all positions to achieve
+                           psuedo count correction (default 0: no correction)
+                           NB: do not use correction when your input is
+                           frequences!
+            -accession_number => string, an accession number
+
+            Vectors can be strings of the frequencies where the frequencies are
+            multiplied by 10 and rounded to the nearest whole number, and where
+            'a' is used to denote the maximal frequency 10. There should be no
+            punctuation (spaces etc.) in the string. For example, 'a0501'.
+            Alternatively frequencies or counts can be represented by an array
+            ref containing the counts, frequencies or logs as any kind of
+            number.
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my $consensus;
+    # Too many things to rearrange, and I am creating simultanuously >500 
+    # such objects routinely, so this becomes performance issue
+    my %input;
+    while (@args) {
+        (my $key = shift @args) =~ s/-//g; #deletes all dashes (only dashes)!
+        $input{$key} = shift @args;
+    }
+    $self->{_position}   = 0;
+    $self->{IC}     = $input{IC};
+    $self->{e_val}  = $input{e_val};
+    $self->{width}  = $input{width};
+	$self->{logA}   = $input{lA};
+	$self->{logC}   = $input{lC};
+	$self->{logG}   = $input{lG};
+	$self->{logT}   = $input{lT};
+    $self->{sites}  = $input{sites};
+    $self->{id}     = $input{id} || 'null';
+    $self->{correction} = $input{correction} || 0;
+    $self->{accession_number} = $input{accession_number};
+	return $self unless (defined($input{pA}) && defined($input{pC}) && defined($input{pG}) && defined($input{pT}));
+    
+    # This should go to _initialize?
+    # Check for input type- no mixing alllowed, throw ex
+    if (ref($input{pA}) =~ /ARRAY/i ) {
+        $self->throw("Mixing matrix data types not allowed: C is not reference") unless(ref($input{pC}));
+        $self->throw("Mixing matrix data types not allowed: G is not reference") unless (ref($input{pG}));
+        $self->throw("Mixing matrix data types not allowed: T is not reference") unless (ref($input{pT}));
+        $self->{probA} = $input{pA};
+        $self->{probC} = $input{pC};
+        $self->{probG} = $input{pG};
+        $self->{probT} = $input{pT};
+    }
+    else {
+        $self->throw("Mixing matrix data types not allowed: C is reference") if (ref($input{pC}));
+        $self->throw("Mixing matrix data types not allowed: G is reference") if (ref($input{pG}));
+        $self->throw("Mixing matrix data types not allowed: T is reference") if (ref($input{pT}));
+        $self->{probA} = [split(//,$input{pA})];
+        $self->{probC} = [split(//,$input{pC})];
+        $self->{probG} = [split(//,$input{pG})];
+        $self->{probT} = [split(//,$input{pT})];
+        for (my $i=0; $i<= @{$self->{probA}}+1; $i++) {
+            # we implictely assume these are MEME-style frequencies x 10, so
+            # 'a' represents the 'maximum', 10. Other positions can actually
+            # add up to over 10 due to rounding, but I don't think that is a
+            # problem?
+            if (${$self->{probA}}[$i] and ${$self->{probA}}[$i] eq 'a') {
+                ${$self->{probA}}[$i]='10';
+            }
+            if (${$self->{probC}}[$i] and ${$self->{probC}}[$i] eq 'a') {
+                ${$self->{probC}}[$i]='10';
+            }
+            if (${$self->{probG}}[$i] and ${$self->{probG}}[$i] eq 'a') {
+                ${$self->{probG}}[$i]='10';
+            }
+            if (${$self->{probT}}[$i] and ${$self->{probT}}[$i] eq 'a') {
+                ${$self->{probT}}[$i]='10';
+            }
+        }
+    }
+    
+    # Check for position with 0 for all bases, throw exception if so
+    for (my $i=0;$i <= $#{$self->{probA}}; $i++) {
+        if ((${$self->{probA}}[$i] + ${$self->{probC}}[$i] + ${$self->{probG}}[$i] + ${$self->{probT}}[$i]) == 0) {
+            $self->throw("Position meaningless-all frequencies are 0");
+        }
+        
+        # apply psuedo-count correction to all values - this will result in
+        # very bad frequencies if the input is already frequences and a
+        # correction value as large as 1 is used!
+        if ($self->{correction}) {
+            ${$self->{probA}}[$i] += $self->{correction};
+            ${$self->{probC}}[$i] += $self->{correction};
+            ${$self->{probG}}[$i] += $self->{correction};
+            ${$self->{probT}}[$i] += $self->{correction};
+        }
+        
+        # (re)calculate frequencies
+        my $div= ${$self->{probA}}[$i] + ${$self->{probC}}[$i] + ${$self->{probG}}[$i] + ${$self->{probT}}[$i];
+        ${$self->{probA}}[$i]=${$self->{probA}}[$i]/$div;
+        ${$self->{probC}}[$i]=${$self->{probC}}[$i]/$div;
+        ${$self->{probG}}[$i]=${$self->{probG}}[$i]/$div;
+        ${$self->{probT}}[$i]=${$self->{probT}}[$i]/$div;
+    }
+    
+    # Calculate the logs
+    if ((!defined($self->{logA})) && ($input{model})) {
+        $self->calc_weight($input{model});
+    }
+    
+    # Make consensus, throw if any one of the vectors is shorter
+    $self->_calculate_consensus;
+    return $self;
+}
+
+=head2 _calculate_consensus
+
+ Title   : _calculate_consensus
+ Function: Internal stuff
+
+=cut
+
+sub _calculate_consensus {
+    my $self=shift;
+    my ($lc,$lt,$lg)=($#{$self->{probC}},$#{$self->{probT}},$#{$self->{probG}});
+    my $len=$#{$self->{probA}};
+    $self->throw("Probability matrix is damaged for C: $len vs $lc") if ($len != $lc);
+    $self->throw("Probability matrix is damaged for T: $len vs $lt") if ($len != $lt);
+    $self->throw("Probability matrix is damaged for G: $len vs $lg") if ($len != $lg);
+    for (my $i=0; $i<$len+1; $i++) {
+        #*** IUPACp values not actually used (eg. by next_pos)
+        (${$self->{IUPAC}}[$i],${$self->{IUPACp}}[$i])=_to_IUPAC(${$self->{probA}}[$i], ${$self->{probC}}[$i], ${$self->{probG}}[$i], ${$self->{probT}}[$i]);
+        (${$self->{seq}}[$i], ${$self->{seqp}}[$i]) = _to_cons(${$self->{probA}}[$i], ${$self->{probC}}[$i], ${$self->{probG}}[$i], ${$self->{probT}}[$i]);
+    }
+    return $self;
+}
+
+=head2 calc_weight
+
+ Title   : calc_weight
+ Usage   : $obj->calc_weight({A=>0.2562, C=>0.2438, G=>0.2432, T=>0.2568});
+ Function: Recalculates the PSM (or weights) based on the PFM (the frequency
+           matrix) and user supplied background model.
+ Throws  : if no model is supplied
+ Returns : n/a
+ Args    : reference to a hash with background frequencies for A,C,G and T
+
+=cut
+
+sub calc_weight {
+    my ($self, $model) = @_;
+    my %model;
+    $model{probA}=$model->{A};
+    $model{probC}=$model->{C};
+    $model{probG}=$model->{G};
+    $model{probT}=$model->{T};
+    foreach my $let qw(probA probC probG probT) {
+      my @str;
+      $self->throw('You did not provide valid model\n') unless (($model{$let}>0) && ($model{$let}<1));
+      foreach my $f (@{$self->{$let}}) {
+        my $w=log($f)-log($model{$let});
+        push @str,$w;
+      }
+      my $llet=$let;
+      $llet=~s/prob/log/;
+      $self->{$llet}=\@str;
+    }
+    return $self;
+}
+
+=head2 next_pos
+
+ Title   : next_pos
+ Usage   :
+ Function: Retrives the next position features: frequencies for A,C,G,T, the
+           main letter (as in consensus) and the probabilty for this letter to
+           occur at this position and the current position
+ Returns : hash (pA,pC,pG,pT,logA,logC,logG,logT,base,prob,rel)
+ Args    : none
+
+=cut
+
+sub next_pos {
+    my $self = shift;
+    $self->throw("instance method called on class") unless ref $self;
+    my $len=@{$self->{seq}};
+    my $pos=$self->{_position};
+    # End reached?
+    if ($pos<$len) {
+	my $pA=${$self->{probA}}[$pos];
+	my $pC=${$self->{probC}}[$pos];
+	my $pG=${$self->{probG}}[$pos];
+	my $pT=${$self->{probT}}[$pos];
+	my $lA=${$self->{logA}}[$pos];
+	my $lC=${$self->{logC}}[$pos];
+	my $lG=${$self->{logG}}[$pos];
+	my $lT=${$self->{logT}}[$pos];
+	my $base=${$self->{seq}}[$pos];
+	my $prob=${$self->{seqp}}[$pos];
+	$self->{_position}++;
+	my %seq=(pA=>$pA,pT=>$pT,pC=>$pC,pG=>$pG, lA=>$lA,lT=>$lT,lC=>$lC,lG=>$lG,base=>$base,rel=>$pos, prob=>$prob);
+	return %seq;
+    }
+    else {$self->{_position}=0; return;}
+}
+
+=head2 curpos
+
+ Title   : curpos
+ Usage   :
+ Function: Gets/sets the current position. Converts to 0 if argument is minus
+           and to width if greater than width
+ Returns : integer
+ Args    : integer
+
+=cut
+
+sub curpos {
+    my $self = shift;
+    my $prev = $self->{_position};
+    if (@_) { $self->{_position} = shift; }
+    return $prev;
+}
+
+=head2 e_val
+
+ Title   : e_val
+ Usage   : 
+ Function: Gets/sets the e-value
+ Returns : real number
+ Args    : none to get, real number to set
+
+=cut
+
+sub e_val {
+    my $self = shift;
+    my $prev = $self->{e_val};
+    if (@_) { $self->{e_val} = shift; }
+    return $prev;
+}
+
+=head2 IC
+
+ Title   : IC
+ Usage   :
+ Function: Get/set the Information Content
+ Returns : real number
+ Args    : none to get, real number to set
+
+=cut
+
+sub IC {
+    my $self = shift;
+    my $prev = $self->{IC};
+    if (@_) { $self->{IC} = shift; }
+    return $prev;
+}
+
+=head2 accession_number
+
+ Title   : accession_number
+ Function: Get/set the accession number, this will be unique id for the
+           SiteMatrix object as well for any other object, inheriting from
+           SiteMatrix
+ Returns : string
+ Args    : none to get, string to set
+
+=cut
+
+sub accession_number {
+    my $self = shift;
+    my $prev = $self->{accession_number};
+    if (@_) { $self->{accession_number} = shift; }
+    return $prev;
+}
+
+=head2 consensus
+
+ Title   : consensus
+ Usage   :
+ Function: Returns the consensus
+ Returns : string
+ Args    : (optional) threshold value 1 to 10, default 5
+           '5' means the returned characters had a 50% or higher presence at
+           their position
+
+=cut
+
+sub consensus {
+    my ($self, $thresh) = @_;
+    if ($thresh) {
+        my $len=$#{$self->{probA}};
+        for (my $i=0; $i<$len+1; $i++) {
+            (${$self->{seq}}[$i], ${$self->{seqp}}[$i]) = _to_cons(${$self->{probA}}[$i], ${$self->{probC}}[$i], ${$self->{probG}}[$i], ${$self->{probT}}[$i], $thresh);
+        }
+    }
+    my $consensus='';
+    foreach my $letter (@{$self->{seq}}) {
+        $consensus .= $letter;
+    }
+    return $consensus;
+}
+
+=head2 width
+
+ Title   : width
+ Usage   :
+ Function: Returns the length of the sites in used to make this matrix
+ Returns : int
+ Args    : none
+
+=cut
+
+sub width {
+    my $self = shift;
+    my $width=@{$self->{probA}};
+    return $width;
+}
+
+=head2 sites
+
+ Title   : sites
+ Usage   :
+ Function: Get/set the number of sites that were used to make this matrix
+ Returns : int
+ Args    : none to get, int to set
+
+=cut
+
+sub sites {
+    my $self = shift;
+    if (@_) { $self->{sites} = shift }
+    return $self->{sites} || return;
+}
+
+=head2 IUPAC
+
+ Title   : IUPAC
+ Usage   :
+ Function: Returns IUPAC compliant consensus
+ Returns : string
+ Args    : optionally, also supply a whole number (int) of 1 or higher to set
+           the significance level when considering the frequencies. 1 (the
+           default) means a 0.05 significance level: frequencies lower than
+           0.05 will be ignored. 2 Means a 0.005 level, and so on.
+
+=cut
+
+sub IUPAC {
+	my ($self, $thresh) = @_;
+    if ($thresh) {
+        my $len=$#{$self->{probA}};
+        for (my $i=0; $i<$len+1; $i++) {
+            (${$self->{IUPAC}}[$i],${$self->{IUPACp}}[$i])=_to_IUPAC(${$self->{probA}}[$i], ${$self->{probC}}[$i], ${$self->{probG}}[$i], ${$self->{probT}}[$i], $thresh);
+        }
+    }
+	my $iu=$self->{IUPAC};
+	my $iupac='';
+	foreach my $let (@{$iu}) {
+		$iupac .= $let;
+	}
+    return $iupac;
+}
+
+=head2 _to_IUPAC
+
+ Title   : _to_IUPAC
+ Usage   :
+ Function: Converts a single position to IUPAC compliant symbol.
+           For rules see the implementation
+ Returns : char, real number
+ Args    : real numbers for frequencies of A,C,G,T (positional)
+
+           optionally, also supply a whole number (int) of 1 or higher to set
+           the significance level when considering the frequencies. 1 (the
+           default) means a 0.05 significance level: frequencies lower than
+           0.05 will be ignored. 2 Means a 0.005 level, and so on.
+
+=cut
+
+sub _to_IUPAC {
+	my ($a, $c, $g, $t, $thresh) = @_;
+    $thresh ||= 1;
+    $thresh = int($thresh);
+    $a = sprintf ("%.${thresh}f", $a);
+    $c = sprintf ("%.${thresh}f", $c);
+    $g = sprintf ("%.${thresh}f", $g);
+    $t = sprintf ("%.${thresh}f", $t);
+    
+    my $total = $a + $c + $g + $t;
+    
+	return 'A' if ($a == $total);
+	return 'G' if ($g == $total);
+	return 'C' if ($c == $total);
+	return 'T' if ($t == $total);
+	my $r=$g+$a;
+	return 'R' if ($r == $total);
+	my $y=$t+$c;
+	return 'Y' if ($y == $total);
+	my $m=$a+$c;
+	return 'M' if ($m == $total);
+	my $k=$g+$t;
+	return 'K' if ($k == $total);
+	my $s=$g+$c;
+	return 'S' if ($s == $total);
+	my $w=$a+$t;
+	return 'W' if ($w == $total);
+	my $d=$r+$t;
+	return 'D' if ($d == $total);
+	my $v=$r+$c;
+	return 'V' if ($v == $total);
+	my $b=$y+$g;
+	return 'B' if ($b == $total);
+	my $h=$y+$a;
+	return 'H' if ($h == $total);
+	return 'N';
+}
+
+=head2 _to_cons
+
+ Title   : _to_cons
+ Usage   :
+ Function: Converts a single position to simple consensus character and returns
+           its probability. For rules see the implementation
+ Returns : char, real number
+ Args    : real numbers for A,C,G,T (positional), and optional 5th argument of
+           threshold (as a number between 1 and 10, where 5 is default and
+           means the returned character had a 50% or higher presence at this
+           position)
+
+=cut
+
+sub _to_cons {
+	my ($A, $C, $G, $T, $thresh) = @_;
+    $thresh ||= 5;
+    
+    # this multiplication by 10 is just to satisfy the thresh range of 1-10
+	my $a = $A * 10;
+	my $c = $C * 10;
+	my $g = $G * 10;
+	my $t = $T * 10;
+    
+    return 'N',10 if (($a<$thresh) && ($c<$thresh) && ($g<$thresh) && ($t<$thresh));
+	return 'N',10 if (($a==$t) && ($a==$c) && ($a==$g));
+    
+    # threshold could be lower than 50%, so must check is not only over
+    # threshold, but also the highest frequency
+	return 'A',$a if (($a>=$thresh) && ($a>$t) && ($a>$c) && ($a>$g));
+	return 'C',$c if (($c>=$thresh) && ($c>$t) && ($c>$a) && ($c>$g));
+	return 'G',$g if (($g>=$thresh) && ($g>$t) && ($g>$c) && ($g>$a));
+	return 'T',$t if (($t>=$thresh) && ($t>$g) && ($t>$c) && ($t>$a));
+	
+    return 'N',10;
+}
+
+=head2 get_string
+
+ Title   : get_string
+ Usage   :
+ Function: Returns given probability vector as a string. Useful if you want to
+           store things in a rel database, where arrays are not first choice
+ Throws  : If the argument is outside {A,C,G,T}
+ Returns : string
+ Args    : character {A,C,G,T}
+
+=cut
+
+sub get_string {
+	my $self=shift;
+	my $base=shift;
+	my $string='';
+	my @prob;
+    
+	BASE: {
+		if ($base eq 'A') {@prob= @{$self->{probA}}; last BASE; }
+		if ($base eq 'C') {@prob= @{$self->{probC}}; last BASE; }
+		if ($base eq 'G') {@prob= @{$self->{probG}}; last BASE; }
+		if ($base eq 'T') {@prob= @{$self->{probT}}; last BASE; }
+		$self->throw ("No such base: $base!\n");
+	}
+    
+    foreach  my $prob (@prob) {
+        my $corrected = $prob*10;
+        my $next=sprintf("%.0f",$corrected);
+        $next='a' if ($next eq '10');
+        $string .= $next;
+    }
+    return $string;
+}
+
+=head2 get_array
+
+ Title   : get_array
+ Usage   :
+ Function: Returns an array with frequencies for a specified base
+ Returns : array
+ Args    : char
+
+=cut
+
+sub get_array {
+	my $self=shift;
+	my $base=uc(shift);
+	return  @{$self->{probA}} if ($base eq 'A');
+	return  @{$self->{probC}} if ($base eq 'C');
+	return  @{$self->{probG}} if ($base eq 'G');
+	return  @{$self->{probT}} if ($base eq 'T');
+	$self->throw("No such base: $base!\n");
+}
+
+=head2 get_logs_array
+
+ Title   : get_logs_array
+ Usage   :
+ Function: Returns an array with log_odds for a specified base
+ Returns : array
+ Args    : char
+
+=cut
+
+sub get_logs_array {
+	my $self=shift;
+	my $base=uc(shift);
+	return  @{$self->{logA}} if (($base eq 'A')  && ($self->{logA}));
+	return  @{$self->{logC}} if (($base eq 'C')  && ($self->{logC}));
+	return  @{$self->{logG}} if (($base eq 'G')  && ($self->{logG}));
+	return  @{$self->{logT}} if (($base eq 'T')  && ($self->{logT}));
+	$self->throw ("No such base: $base!\n") if (!grep(/$base/,qw(A C G T)));
+    return;
+}
+
+=head2 id
+
+ Title   : id
+ Usage   :
+ Function: Gets/sets the site id
+ Returns : string
+ Args    : string
+
+=cut
+
+sub id {
+    my $self = shift;
+    my $prev = $self->{id};
+    if (@_) { $self->{id} = shift; }
+    return $prev;
+}
+
+=head2 regexp
+
+ Title   : regexp
+ Usage   :
+ Function: Returns a regular expression which matches the IUPAC convention.
+           N will match X, N, - and .
+ Returns : string
+ Args    : none (works at the threshold last used for making the IUPAC string)
+
+=cut
+
+sub regexp {
+	my $self=shift;
+	my $regexp;
+	foreach my $letter (@{$self->{IUPAC}}) {
+		my $reg;
+		LETTER: {
+			if ($letter eq 'A') { $reg='[Aa]'; last LETTER; }
+			if ($letter eq 'C') { $reg='[Cc]'; last LETTER; }
+			if ($letter eq 'G') { $reg='[Gg]'; last LETTER; }
+			if ($letter eq 'T') { $reg='[Tt]'; last LETTER; }
+			if ($letter eq 'M') { $reg='[AaCcMm]'; last LETTER; }
+			if ($letter eq 'R') { $reg='[AaGgRr]'; last LETTER; }
+			if ($letter eq 'W') { $reg='[AaTtWw]'; last LETTER; }
+			if ($letter eq 'S') { $reg='[CcGgSs]'; last LETTER; }
+			if ($letter eq 'Y') { $reg='[CcTtYy]'; last LETTER; }
+			if ($letter eq 'K') { $reg='[GgTtKk]'; last LETTER; }
+			if ($letter eq 'V') { $reg='[AaCcGgVv]'; last LETTER; }
+			if ($letter eq 'H') { $reg='[AaCcTtHh]'; last LETTER; }
+			if ($letter eq 'D') { $reg='[AaGgTtDd]'; last LETTER; }
+			if ($letter eq 'B') { $reg='[CcGgTtBb]'; last LETTER; }
+			$reg='\S';
+		}
+		$regexp .= $reg;
+	}
+    return $regexp;
+}
+
+=head2 regexp_array
+
+ Title   : regexp_array
+ Usage   :
+ Function: Returns a regular expression which matches the IUPAC convention.
+           N will match X, N, - and .
+ Returns : array
+ Args    : none (works at the threshold last used for making the IUPAC string)
+ To do   : I have separated regexp and regexp_array, but
+           maybe they can be rewritten as one - just check what should be returned
+
+=cut
+
+sub regexp_array {
+	my $self=shift;
+	my @regexp;
+	foreach my $letter (@{$self->{IUPAC}}) {
+		my $reg;
+		LETTER: {
+			if ($letter eq 'A') { $reg='[Aa]'; last LETTER; }
+			if ($letter eq 'C') { $reg='[Cc]'; last LETTER; }
+			if ($letter eq 'G') { $reg='[Gg]'; last LETTER; }
+			if ($letter eq 'T') { $reg='[Tt]'; last LETTER; }
+			if ($letter eq 'M') { $reg='[AaCcMm]'; last LETTER; }
+			if ($letter eq 'R') { $reg='[AaGgRr]'; last LETTER; }
+			if ($letter eq 'W') { $reg='[AaTtWw]'; last LETTER; }
+			if ($letter eq 'S') { $reg='[CcGgSs]'; last LETTER; }
+			if ($letter eq 'Y') { $reg='[CcTtYy]'; last LETTER; }
+			if ($letter eq 'K') { $reg='[GgTtKk]'; last LETTER; }
+			if ($letter eq 'V') { $reg='[AaCcGgVv]'; last LETTER; }
+			if ($letter eq 'H') { $reg='[AaCcTtHh]'; last LETTER; }
+			if ($letter eq 'D') { $reg='[AaGgTtDd]'; last LETTER; }
+			if ($letter eq 'B') { $reg='[CcGgTtBb]'; last LETTER; }
+			$reg='\S';
+		}
+		push @regexp,$reg;
+	}
+    return @regexp;
+}
+
+
+=head2 _compress_array
+
+ Title   : _compress_array
+ Usage   :
+ Function: Will compress an array of real signed numbers to a string (ie vector
+           of bytes) -127 to +127 for bi-directional(signed) and 0..255 for
+           unsigned
+ Returns : String
+ Args    : array reference, followed by an max value and direction (optional,
+           default 1-unsigned),1 unsigned, any other is signed. 
+
+=cut
+
+sub _compress_array {
+	my ($array,$lm,$direct)=@_;
+	my $str;
+	return  unless(($array) && ($lm));
+	$direct=1 unless ($direct);
+	my $k1= ($direct==1) ? (255/$lm) : (127/$lm);
+	foreach my $c (@{$array}) {
+		$c=$lm if ($c>$lm);
+		$c=-$lm if (($c<-$lm) && ($direct !=1));
+    $c=0 if (($c<0) && ($direct ==1));
+		my $byte=int($k1*$c);
+    $byte=127+$byte if ($direct !=1);#Clumsy, should be really shift the bits
+    my $char=chr($byte);
+		$str.=$char;
+	}
+	return $str;
+}
+
+=head2 _uncompress_string
+
+ Title   : _uncompress_string
+ Usage   :
+ Function: Will uncompress a string (vector of bytes) to create an array of
+           real signed numbers (opposite to_compress_array)
+ Returns : string, followed by an max value and
+ 		   direction (optional, default 1-unsigned), 1 unsigned, any other is signed.
+ Args    : array
+
+=cut
+
+sub _uncompress_string {
+	my ($str,$lm,$direct)=@_;
+	my @array;
+	return unless(($str) && ($lm));
+	$direct=1 unless ($direct);
+	my $k1= ($direct==1) ? (255/$lm) : (127/$lm);
+	foreach my $c (split(//,$str)) {
+		my $byte=ord($c);
+		$byte=$byte-127 if ($direct !=1);#Clumsy, should be really shift the bits
+		my $num=$byte/$k1;
+		push @array,$num;
+	}
+	return @array;
+}
+
+=head2 get_compressed_freq
+
+ Title   : get_compressed_freq
+ Usage   :
+ Function: A method to provide a compressed frequency vector. It uses one byte
+           to code the frequence for one of the probability vectors for one
+           position. Useful for relational database. Improvment of the previous
+           0..a coding.
+ Example :  my $strA=$self->get_compressed_freq('A');
+ Returns :  String
+ Args    :  char 
+
+=cut
+
+sub get_compressed_freq {
+	my $self=shift;
+	my $base=shift;
+	my $string='';
+	my @prob;
+	BASE: {
+		if ($base eq 'A') {
+      @prob= @{$self->{probA}} unless (!defined($self->{probA}));
+      last BASE;
+    }
+  		if ($base eq 'G') {
+      @prob= @{$self->{probG}} unless (!defined($self->{probG}));
+      last BASE;
+    }
+  		if ($base eq 'C') {
+      @prob= @{$self->{probC}} unless (!defined($self->{probC}));
+      last BASE;
+    }
+  		if ($base eq 'T') {
+      @prob= @{$self->{probT}} unless (!defined($self->{probT}));
+      last BASE;
+    }
+		$self->throw ("No such base: $base!\n");
+	}
+	my $str= _compress_array(\@prob,1,1);
+    return $str;
+}
+
+=head2 get_compressed_logs
+
+ Title   : get_compressed_logs
+ Usage   :
+ Function: A method to provide a compressed log-odd vector. It uses one byte to
+ 		   code the log value for one of the log-odds vectors for one position.
+ Example : my $strA=$self->get_compressed_logs('A');
+ Returns : String
+ Args    : char 
+
+=cut
+
+sub get_compressed_logs {
+	my $self=shift;
+	my $base=shift;
+	my $string='';
+	my @prob;
+	BASE: {
+		if ($base eq 'A') {@prob= @{$self->{logA}} unless (!defined($self->{logA})); last BASE; }
+		if ($base eq 'C') {@prob= @{$self->{logC}} unless (!defined($self->{logC})); last BASE; }
+		if ($base eq 'G') {@prob= @{$self->{logG}} unless (!defined($self->{logG})); last BASE; }
+		if ($base eq 'T') {@prob= @{$self->{logT}} unless (!defined($self->{logT})); last BASE; }
+		$self->throw ("No such base: $base!\n");
+	}
+	return _compress_array(\@prob,1000,2);
+}
+
+=head2 sequence_match_weight
+
+ Title   : sequence_match_weight
+ Usage   :
+ Function: This method will calculate the score of a match, based on the PWM
+           if such is associated with the matrix object. Returns undef if no
+           PWM data is available.
+ Throws  : if the length of the sequence is different from the matrix width
+ Example : my $score=$matrix->sequence_match_weight('ACGGATAG');
+ Returns : Floating point
+ Args    : string
+
+=cut
+
+sub sequence_match_weight {
+    my ($self,$seq)=@_;
+    return unless ($self->{logA});
+    my $width=$self->width;
+    $self->throw ("I can calculate the score only for sequence which are exactly my size for $seq, my width is $width\n") unless (length($seq)==@{$self->{logA}});
+    $seq = uc($seq);
+    my @seq=split(//,$seq);
+    my $score = 0;
+    my $i=0;
+    foreach my $pos (@seq) {
+        my $tv = 'log'.$pos;
+        $self->warn("Position ".($i+1)." of input sequence has unknown (ambiguity?) character '$pos': scores will be wrong") unless defined $self->{$tv};
+        $score += defined $self->{$tv} ? $self->{$tv}->[$i] : 0;
+        $i++;
+    }
+    return $score;
+}
+
+=head2 get_all_vectors
+
+ Title   : get_all_vectors
+ Usage   :
+ Function: returns all possible sequence vectors to satisfy the PFM under
+           a given threshold
+ Throws  : If threshold outside of 0..1 (no sense to do that)
+ Example : my @vectors=$self->get_all_vectors(4);
+ Returns : Array of strings
+ Args    : (optional) floating
+
+=cut
+
+sub get_all_vectors {
+	my $self=shift;
+	my $thresh=shift;
+    $self->throw("Out of range. Threshold should be >0 and 1<.\n") if (($thresh<0) || ($thresh>1));
+    my @seq=split(//,$self->consensus($thresh*10));
+    my @perm;
+    for my $i (0..@{$self->{probA}}) {
+        push @{$perm[$i]},'A' if ($self->{probA}->[$i]>$thresh);
+        push @{$perm[$i]},'C' if ($self->{probC}->[$i]>$thresh);
+        push @{$perm[$i]},'G' if ($self->{probG}->[$i]>$thresh);
+        push @{$perm[$i]},'T' if ($self->{probT}->[$i]>$thresh);
+        push @{$perm[$i]},'N' if  ($seq[$i] eq 'N');
+    }
+    my $fpos=shift @perm;
+    my @strings=@$fpos;
+    foreach my $pos (@perm) {
+        my @newstr;
+        foreach my $let (@$pos) {
+            foreach my $string (@strings) {
+                my $newstring = $string . $let;
+                push @newstr,$newstring;
+            }
+        }
+        @strings=@newstr;
+    }
+	return @strings;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrixI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrixI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PSM/SiteMatrixI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,529 @@
+# $Id: SiteMatrixI.pm,v 1.16.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Matrix::PSM::SiteMatrixI - SiteMatrixI implementation, holds a
+position scoring matrix (or position weight matrix) and log-odds
+
+=head1 SYNOPSIS
+
+  # You cannot use this module directly; see Bio::Matrix::PSM::SiteMatrix
+  # for an example implementation
+
+=head1 DESCRIPTION
+
+SiteMatrix is designed to provide some basic methods when working with position
+scoring (weight) matrices, such as transcription factor binding sites for
+example. A DNA PSM consists of four vectors with frequencies {A,C,G,T}. This is
+the minimum information you should provide to construct a PSM object. The
+vectors can be provided as strings with frequenciesx10 rounded to an int, going
+from {0..a} and 'a' represents the maximum (10). This is like MEME's compressed
+representation of a matrix and it is quite useful when working with relational
+DB. If arrays are provided as an input (references to arrays actually) they can
+be any number, real or integer (frequency or count).
+
+When creating the object you can ask the constructor to make a simple pseudo
+count correction by adding a number (typically 1) to all positions (with the
+-correction option). After adding the number the frequencies will be
+calculated. Only use correction when you supply counts, not frequencies.
+
+Throws an exception if: You mix as an input array and string (for example A
+matrix is given as array, C - as string). The position vector is (0,0,0,0). One
+of the probability vectors is shorter than the rest.
+
+Summary of the methods I use most frequently (details bellow):
+
+  iupac - return IUPAC compliant consensus as a string
+  score - Returns the score as a real number
+  IC - information content. Returns a real number
+  id - identifier. Returns a string
+  accession - accession number. Returns a string
+  next_pos - return the sequence probably for each letter, IUPAC
+      symbol, IUPAC probability and simple sequence
+  consenus letter for this position. Rewind at the end. Returns a hash.
+  pos - current position get/set. Returns an integer.
+  regexp - construct a regular expression based on IUPAC consensus.
+      For example AGWV will be [Aa][Gg][AaTt][AaCcGg]
+  width - site width
+  get_string - gets the probability vector for a single base as a string.
+  get_array - gets the probability vector for a single base as an array.
+  get_logs_array - gets the log-odds vector for a single base as an array.
+
+New methods, which might be of interest to anyone who wants to store PSM in a relational
+database without creating an entry for each position is the ability to compress the
+PSM vector into a string with losing usually less than 1% of the data.
+this can be done with:
+
+  my $str=$matrix->get_compressed_freq('A');
+
+or
+
+  my $str=$matrix->get_compressed_logs('A');
+
+Loading from a database should be done with new, but is not yest implemented.
+However you can still uncompress such string with:
+
+  my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1,1); for PSM
+
+or
+
+  my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1000,2); for log odds
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 APPENDIX
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Matrix::PSM::SiteMatrixI;
+
+# use strict;
+use base qw(Bio::Root::RootI);
+
+=head2 calc_weight
+
+ Title   : calc_weight
+ Usage   : $self->calc_weight({A=>0.2562,C=>0.2438,G=>0.2432,T=>0.2568});
+ Function: Recalculates the PSM (or weights) based on the PFM (the frequency matrix)
+           and user supplied background model.
+ Throws  : if no model is supplied
+ Example :
+ Returns :
+ Args    : reference to a hash with background frequencies for A,C,G and T
+
+=cut
+
+sub calc_weight {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+
+=head2 next_pos
+
+ Title   : next_pos
+ Usage   : my %base=$site->next_pos;
+ Function: 
+
+           Retrieves the next position features: frequencies and weights for
+           A,C,G,T, the main letter (as in consensus) and the
+           probabilty for this letter to occur at this position and
+           the current position
+
+ Throws  :
+ Example :
+ Returns : hash (pA,pC,pG,pT,lA,lC,lG,lT,base,prob,rel)
+ Args    : none
+
+
+=cut
+
+sub next_pos {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+=head2 curpos
+
+ Title   : curpos
+ Usage   : my $pos=$site->curpos;
+ Function: Gets/sets the current position. Converts to 0 if argument is minus and
+            to width if greater than width
+ Throws  :
+ Example :
+ Returns : integer
+ Args    : integer
+
+=cut
+
+sub curpos {
+    my $self = shift;
+   $self->throw_not_implemented();
+}
+
+=head2 e_val
+
+ Title   : e_val
+ Usage   : my $score=$site->e_val;
+ Function: Gets/sets the e-value
+ Throws  :
+ Example :
+ Returns : real number
+ Args    : real number
+
+=cut
+
+sub e_val {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 consensus
+
+ Title   : consensus
+ Usage   :
+ Function: Returns the consensus
+ Returns : string
+ Args    : (optional) threshold value 1 to 10, default 5
+           '5' means the returned characters had a 50% or higher presence at
+           their position
+
+=cut
+
+sub consensus {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   :
+ Function: accession number, this will be unique id for the SiteMatrix object as
+ 			well for any other object, inheriting from SiteMatrix
+ Throws  :
+ Example :
+ Returns : string
+ Args    : string
+
+=cut
+
+sub accession_number {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+
+=head2 width
+
+ Title   : width
+ Usage   : my $width=$site->width;
+ Function: Returns the length of the site
+ Throws  :
+ Example :
+ Returns : number
+ Args    :
+
+=cut
+
+sub width {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+=head2 IUPAC
+
+ Title   : IUPAC
+ Usage   : my $iupac_consensus=$site->IUPAC;
+ Function: Returns IUPAC compliant consensus
+ Throws  :
+ Example :
+ Returns : string
+ Args    :
+
+=cut
+
+sub IUPAC {
+  my $self = shift;
+  $self->throw_not_implemented();
+}
+
+=head2 IC
+
+ Title   : IC
+ Usage   : my $ic=$site->IC;
+ Function: Information content
+ Throws  :
+ Example :
+ Returns : real number
+ Args    : none
+
+=cut
+
+sub IC {
+my $self=shift;
+$self->throw_not_implemented();
+}
+
+=head2 get_string
+
+ Title   : get_string
+ Usage   : my $freq_A=$site->get_string('A');
+ Function: Returns given probability vector as a string. Useful if you want to
+           store things in a rel database, where arrays are not first choice
+ Throws  : If the argument is outside {A,C,G,T}
+ Example :
+ Returns : string
+ Args    : character {A,C,G,T}
+
+=cut
+
+sub get_string {
+ my $self=shift;
+ $self->throw_not_implemented();
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : my $id=$site->id;
+ Function: Gets/sets the site id
+ Throws  :
+ Example :
+ Returns : string
+ Args    : string
+
+=cut
+
+sub id {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 regexp
+
+ Title   : regexp
+ Usage   : my $regexp=$site->regexp;
+ Function: Returns a regular expression which matches the IUPAC convention.
+           N will match X, N, - and .
+ Throws  :
+ Example :
+ Returns : string
+ Args    :
+
+=cut
+
+sub regexp {
+ my $self=shift;
+ $self->throw_not_implemented();
+}
+
+=head2 regexp_array
+
+ Title   : regexp_array
+ Usage   : my @regexp=$site->regexp;
+ Function: Returns a regular expression which matches the IUPAC convention.
+           N will match X, N, - and .
+ Throws  :
+ Example :
+ Returns : array
+ Args    :
+ To do   : I have separated regexp and regexp_array, but
+           maybe they can be rewritten as one - just check what
+           should be returned
+
+=cut
+
+sub regexp_array {
+ my $self=shift;
+ $self->throw_not_implemented();
+}
+
+=head2 get_array
+
+ Title   : get_array
+ Usage   : my @freq_A=$site->get_array('A');
+ Function: Returns an array with frequencies for a specified base
+ Throws  :
+ Example :
+ Returns : array
+ Args    : char
+
+=cut
+
+sub get_array {
+  my $self=shift;
+  $self->throw_not_implemented();
+}
+
+
+=head2 _to_IUPAC
+
+ Title   : _to_IUPAC
+ Usage   :
+ Function: Converts a single position to IUPAC compliant symbol and
+            returns its probability. For rules see the implementation.
+ Throws  :
+ Example :
+ Returns : char, real number
+ Args    : real numbers for A,C,G,T (positional)
+
+=cut
+
+sub _to_IUPAC {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _to_cons
+
+ Title   : _to_cons
+ Usage   :
+ Function: Converts a single position to simple consensus character and
+            returns its probability. For rules see the implementation,
+ Throws  :
+ Example :
+ Returns : char, real number
+ Args    : real numbers for A,C,G,T (positional)
+
+=cut
+
+sub _to_cons {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+
+=head2 _calculate_consensus
+
+ Title   : _calculate_consensus
+ Usage   :
+ Function: Internal stuff
+ Throws  :
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _calculate_consensus {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _compress_array
+
+ Title   : _compress_array
+ Usage   :
+ Function:  Will compress an array of real signed numbers to a string (ie vector of bytes)
+ 			-127 to +127 for bi-directional(signed) and 0..255 for unsigned ;
+ Throws  :
+ Example :  Internal stuff
+ Returns :  String
+ Args    :  array reference, followed by an max value and
+ 			direction (optional, default 1-unsigned),1 unsigned, any other is signed.
+
+=cut
+
+sub _compress_array {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 _uncompress_string
+
+ Title   : _uncompress_string
+ Usage   :
+ Function:  Will uncompress a string (vector of bytes) to create an array of real
+            signed numbers (opposite to_compress_array)
+ Throws  :
+ Example :  Internal stuff
+ Returns :  string, followed by an max value and
+ 			direction (optional, default 1-unsigned), 1 unsigned, any other is signed.
+ Args    :  array
+
+=cut
+
+sub _uncompress_string {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_compressed_freq
+
+ Title   : get_compressed_freq
+ Usage   :
+ Function:  A method to provide a compressed frequency vector. It uses one byte to
+ 			code the frequence for one of the probability vectors for one position.
+			Useful for relational database. Improvment of the previous 0..a coding.
+ Throws  :
+ Example :  my $strA=$self->get_compressed_freq('A');
+ Returns :  String
+ Args    :  char
+
+=cut
+
+sub get_compressed_freq {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_compressed_logs
+
+ Title   : get_compressed_logs
+ Usage   :
+ Function:  A method to provide a compressed log-odd vector. It uses one byte to
+ 			code the log value for one of the log-odds vectors for one position.
+ Throws  :
+ Example :  my $strA=$self->get_compressed_logs('A');
+ Returns :  String
+ Args    :  char
+
+=cut
+
+sub get_compressed_logs {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 sequence_match_weight
+
+ Title   : sequence_match_weight
+ Usage   :
+ Function:  This method will calculate the score of a match, based on the PWM
+            if such is associated with the matrix object. Returns undef if no
+             PWM data is available.
+ Throws  :   if the length of the sequence is different from the matrix width
+ Example :  my $score=$matrix->sequence_match_weight('ACGGATAG');
+ Returns :  Floating point
+ Args    :  string
+
+=cut
+
+sub sequence_match_weight {
+    my $self = shift;
+    $self->throw_not_implemented();
+}
+
+=head2 get_all_vectors
+
+ Title   : get_all_vectors
+ Usage   :
+ Function:  returns all possible sequence vectors to satisfy the PFM under
+            a given threshold
+ Throws  :  If threshold outside of 0..1 (no sense to do that)
+ Example :  my @vectors=$self->get_all_vectors(4);
+ Returns :  Array of strings
+ Args    :  (optional) floating
+
+=cut
+
+sub get_all_vectors {
+ my $self = shift;
+    $self->throw_not_implemented();
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PhylipDist.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PhylipDist.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/PhylipDist.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,515 @@
+# BioPerl module for Bio::Matrix::PhylipDist
+#
+# $Id: PhylipDist.pm,v 1.16.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::PhylipDist - A Phylip Distance Matrix object 
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Phylo::Phylip::ProtDist;
+  my $dist = Bio::Tools::Phylo::Phylip::ProtDist->new(
+    -file=>"protdist.out",
+    -program=>"ProtDist");
+  #or
+   my $dist = Bio::Tools::Phylo::Phylip::ProtDist->new(
+    -fh=>"protdist.out",
+    -program=>"ProtDist");
+
+
+  #get specific entries
+  my $distance_value = $dist->get_entry('ALPHA','BETA');
+  my @columns        = $dist->get_column('ALPHA');
+  my @rows           = $dist->get_row('BETA');
+  my @diagonal       = $dist->get_diagonal();
+
+  #print the matrix in phylip numerical format
+  print $dist->print_matrix;
+
+=head1 DESCRIPTION
+
+Simple object for holding Distance Matrices generated by the following Phylip programs:
+
+1) dnadist
+2) protdist
+3) restdist
+
+It currently handles parsing of the matrix without the data output option.
+
+    5
+Alpha          0.00000  4.23419  3.63330  6.20865  3.45431
+Beta           4.23419  0.00000  3.49289  3.36540  4.29179
+Gamma          3.63330  3.49289  0.00000  3.68733  5.84929
+Delta          6.20865  3.36540  3.68733  0.00000  4.43345
+Epsilon        3.45431  4.29179  5.84929  4.43345  0.00000
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+# Let the code begin...
+
+package Bio::Matrix::PhylipDist;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Matrix::MatrixI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $family = Bio::Matrix::PhylipDist->new(-file=>"protdist.out",
+                                                     -program=>"protdist");
+ Function: Constructor for PhylipDist Object
+ Returns : L<Bio::Matrix::PhylipDist>
+
+=cut
+
+sub new {
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($matrix,$values, $names,
+	$program,$matname,
+	$matid) = $self->_rearrange([qw(MATRIX 
+					VALUES 
+					NAMES 
+					PROGRAM
+					MATRIX_NAME
+					MATRIX_ID
+					)], at args);
+    
+    ($matrix && $values && $names) || 
+	$self->throw("Need matrix, values, and names fields all provided!");
+
+    $program && $self->matrix_name($program) if defined $program;
+    
+    $self->_matrix($matrix) if ref($matrix) =~ /HASH/i;
+    $self->_values($values) if ref($values) =~ /ARRAY/i;
+    $self->names($names) if ref($names) =~ /ARRAY/i;
+
+    $self->matrix_name($matname) if defined $matname;
+    $self->matrix_id  ($matid)   if defined $matid;
+
+    return $self;
+}
+
+=head2 get_entry
+
+ Title   : get_entry
+ Usage   : $matrix->get_entry();
+ Function: returns a particular entry 
+ Returns : a float
+ Arguments:  string id1, string id2
+
+=cut
+
+sub get_entry {
+  my ($self,$row,$column) = @_;
+  $row && $column || $self->throw("Need at least 2 ids");
+  my %matrix = %{$self->_matrix};
+  my @values = @{$self->_values};
+  if(ref $matrix{$row}{$column}){
+      my ($i,$j) = @{$matrix{$row}{$column}};
+      return $values[$i][$j];
+  }
+  return;
+
+}
+
+=head2 get_row
+
+ Title   : get_row
+ Usage   : $matrix->get_row('ALPHA');
+ Function: returns a particular row 
+ Returns : an array of float
+ Arguments:  string id1
+
+=cut
+
+sub get_row {
+    my ($self,$row) = @_;
+    $row || $self->throw("Need at least a row id");
+
+    my %matrix = %{$self->_matrix};
+    my @values = @{$self->_values};
+    my @names = @{$self->names};
+    $matrix{$row} || return;
+    my ($val) = values %{$matrix{$row}};
+    my $row_pointer = $val->[0];
+    my $index = scalar(@names)-1;
+    return @{$values[$row_pointer]}[0..$index];
+}
+
+=head2 get_column
+
+ Title   : get_column
+ Usage   : $matrix->get_column('ALPHA');
+ Function: returns a particular column 
+ Returns : an array of floats 
+ Arguments:  string id1
+
+=cut
+
+sub get_column {
+    my ($self,$column) = @_;
+    $column || $self->throw("Need at least a column id");
+
+    my %matrix = %{$self->_matrix};
+    my @values = @{$self->_values};
+    my @names = @{$self->names}; 
+    $matrix{$column} || return ();
+    my ($val) = values %{$matrix{$column}};
+    my $row_pointer = $val->[0];
+    my @ret;
+    for(my $i=0; $i < scalar(@names); $i++) {
+	push @ret, $values[$i][$row_pointer];
+    }
+    return @ret;
+} 
+
+=head2 get_diagonal
+
+ Title   : get_diagonal
+ Usage   : $matrix->get_diagonal();
+ Function: returns the diagonal of the matrix
+ Returns : an array of float
+ Arguments:  string id1
+
+=cut
+
+sub get_diagonal {
+  my ($self) = @_;
+  my %matrix = %{$self->_matrix};
+  my @values = @{$self->_values};
+  my @return;
+  foreach my $name (@{$self->names}){
+    my ($i,$j) = @{$matrix{$name}{$name}};
+    push @return,$values[$i][$j];
+  }
+  return @return;
+}
+
+=head2 print_matrix
+
+ Title   : print_matrix
+ Usage   : $matrix->print_matrix();
+ Function: returns a string of the matrix in phylip format 
+ Returns : a string
+ Arguments:  
+
+=cut
+
+sub print_matrix {
+  my ($self) = @_;
+  my @names = @{$self->names};
+  my @values = @{$self->_values};
+  my %matrix = %{$self->_matrix};
+  my $str;
+  $str.= (" "x 4). scalar(@names)."\n";
+  foreach my $name (@names){
+    my $newname = $name. (" " x (15-length($name)));
+    if( length($name) >= 15 ) { $newname .= " " }
+    $str.=$newname;
+    my $count = 0;
+    foreach my $n (@names) {
+      my ($i,$j) = @{$matrix{$name}{$n}};
+      if($count < $#names){
+        $str .= $values[$i][$j]. "  ";
+      }
+      else {
+	  if( ! defined $values[$i][$j] ) { 
+	      $self->debug("no value for $i,$j cell\n");
+	  } else { 
+	      $str .= $values[$i][$j];
+	  }
+      }
+      $count++;
+    }
+    $str.="\n";
+  }
+  return $str;
+}
+
+=head2 _matrix
+
+ Title   : _matrix
+ Usage   : $matrix->_matrix();
+ Function: get/set for hash reference of the pointers
+           to the value matrix 
+ Returns : hash reference 
+ Arguments: hash reference
+
+=cut
+
+sub _matrix {
+  my ($self,$val) = @_;
+  if($val){
+    $self->{'_matrix'} = $val;
+  }
+  return $self->{'_matrix'};
+}
+
+
+=head2 names
+
+ Title   : names
+ Usage   : $matrix->names();
+ Function: get/set for array ref of names of sequences
+ Returns : an array reference 
+ Arguments: an array reference
+
+=cut
+
+sub names {
+  my ($self,$val) = @_;
+  if($val){
+    $self->{'_names'} = $val;
+  }
+  return $self->{'_names'};
+}
+
+=head2 program
+
+ Title   : program
+ Usage   : $matrix->program();
+ Function: get/set for the program name generating this 
+           matrix
+ Returns : string
+ Arguments: string
+
+=cut
+
+sub program {
+  my ($self) = shift;
+  return $self->matrix_name(@_);
+}
+
+=head2 _values
+
+ Title   : _values
+ Usage   : $matrix->_values();
+ Function: get/set for array ref of the matrix containing
+           distance values 
+ Returns : an array reference 
+ Arguments: an array reference
+
+=cut
+
+sub _values {
+  my ($self,$val) = @_;
+  if($val){
+    $self->{'_values'} = $val;
+  }
+  return $self->{'_values'};
+}
+
+
+=head1 L<Bio::Matrix::MatrixI> implementation
+
+
+=head2 matrix_id
+
+ Title   : matrix_id
+ Usage   : my $id = $matrix->matrix_id
+ Function: Get/Set the matrix ID
+ Returns : scalar value
+ Args    : [optional] new id value to store
+
+
+=cut
+
+sub matrix_id{
+   my $self = shift;
+   return $self->{'_matid'} = shift if @_;
+   return $self->{'_matid'};
+
+   
+}
+
+=head2 matrix_name
+
+ Title   : matrix_name
+ Usage   : my $name = $matrix->matrix_name();
+ Function: Get/Set the matrix name
+ Returns : scalar value
+ Args    : [optional] new matrix name value
+
+
+=cut
+
+sub matrix_name{
+   my $self = shift;
+   return $self->{'_matname'} = shift if @_;
+   return $self->{'_matname'};
+}
+
+=head2 column_header
+
+ Title   : column_header
+ Usage   : my $name = $matrix->column_header(0)
+ Function: Gets the column header for a particular column number
+ Returns : string
+ Args    : integer
+
+
+=cut
+
+sub column_header{
+    my ($self,$num) = @_;
+    my @coln = $self->column_names;
+    return $coln[$num];
+}
+
+
+=head2 row_header
+
+ Title   : row_header
+ Usage   : my $name = $matrix->row_header(0)
+ Function: Gets the row header for a particular row number
+ Returns : string
+ Args    : integer
+
+
+=cut
+
+sub row_header{
+    my ($self,$num) = @_;
+    my @rown = $self->row_names;
+   return $rown[$num];
+}
+=head2 column_num_for_name
+
+ Title   : column_num_for_name
+ Usage   : my $num = $matrix->column_num_for_name($name)
+ Function: Gets the column number for a particular column name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub column_num_for_name{
+   my ($self,$name) = @_;
+   my $ct = 0;
+   foreach my $n ( $self->column_names ) {
+       return $ct if $n eq $name;
+       $ct++;
+   }
+   return;
+}
+
+=head2 row_num_for_name
+
+ Title   : row_num_for_name
+ Usage   : my $num = $matrix->row_num_for_name($name)
+ Function: Gets the row number for a particular row name
+ Returns : integer
+ Args    : string
+
+
+=cut
+
+sub row_num_for_name{
+   my ($self,$name) = @_;
+   my $ct = 0;
+   foreach my $n ( $self->row_names ) {
+       return $ct if $n eq $name;
+       $ct++;
+   }
+}
+
+=head2 num_rows
+
+ Title   : num_rows
+ Usage   : my $rowcount = $matrix->num_rows;
+ Function: Get the number of rows
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_rows{ return scalar @{shift->names} }
+
+=head2 num_columns
+
+ Title   : num_columns
+ Usage   : my $colcount = $matrix->num_columns
+ Function: Get the number of columns
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_columns{
+   return scalar @{shift->names};
+}
+
+=head2 row_names
+
+ Title   : row_names
+ Usage   : my @rows = $matrix->row_names
+ Function: The names of all the rows
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub row_names{ return @{shift->names} }
+
+=head2 column_names
+
+ Title   : column_names
+ Usage   : my @columns = $matrix->column_names
+ Function: The names of all the columns
+ Returns : array in array context, arrayref in scalar context
+ Args    : none
+
+
+=cut
+
+sub column_names{ return @{shift->names} }  
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Scoring.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Scoring.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Matrix/Scoring.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,290 @@
+# $Id: Scoring.pm,v 1.4.4.1 2006/10/02 23:10:21 sendu Exp $
+#
+# BioPerl module for Bio::Matrix::Scoring
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Matrix::Scoring - Object which can hold scoring matrix information
+
+=head1 SYNOPSIS
+
+  use Bio::Matrix::Scoring;
+
+=head1 DESCRIPTION
+
+An object which can handle AA or NT scoring matrix information.  Some
+transformation properties are available too.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Matrix::Scoring;
+use strict;
+
+
+use base qw(Bio::Matrix::Generic);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Matrix::Scoring();
+ Function: Builds a new Bio::Matrix::Scoring object 
+ Returns : an instance of Bio::Matrix::Scoring
+ Args    :
+
+
+=cut
+
+
+sub new { 
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($entropy,$expected,$scale,$scaleval,$database,
+		  $lowestscore,$highestscore,$lambda,$H) = 
+			 $self->_rearrange([qw(
+        ENTROPY EXPECTED SCALE SCALE_VALUE DATABASE
+		  LOWEST_SCORE HIGHEST_SCORE LAMBDA H)], @args);
+
+    $self->entropy  ($entropy);
+    $self->expected_score($expected);
+    $self->scale    ($scale);
+    $self->scale_value($scaleval);
+    $self->database ($database);
+    $self->lowest_score($lowestscore);
+    $self->highest_score($highestscore);
+    $self->lambda($lambda);
+    $self->H($H);
+				    
+    return $self;
+}
+
+=head2 entropy
+
+ Title   : entropy
+ Usage   : $obj->entropy($newval)
+ Function: 
+ Example : 
+ Returns : value of entropy (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub entropy{
+    my $self = shift;
+
+    return $self->{'entropy'} = shift if @_;
+    return $self->{'entropy'};
+}
+
+=head2 expected_score
+
+ Title   : expected_score
+ Usage   : $obj->expected_score($newval)
+ Function: 
+ Example : 
+ Returns : value of expected (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub expected_score{
+    my $self = shift;
+
+    return $self->{'expected'} = shift if @_;
+    return $self->{'expected'};
+}
+
+=head2 scale
+
+ Title   : scale
+ Usage   : $obj->scale($newval)
+ Function: 
+ Example : 
+ Returns : value of scale (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub scale{
+    my $self = shift;
+
+    return $self->{'scale'} = shift if @_;
+    return $self->{'scale'};
+}
+
+=head2 scale_value
+
+ Title   : scale_value
+ Usage   : $obj->scale_value($newval)
+ Function: 
+ Example : 
+ Returns : value of scale_value (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub scale_value{
+    my $self = shift;
+
+    return $self->{'scale_value'} = shift if @_;
+    return $self->{'scale_value'};
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: 
+ Example : 
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub description{
+    my $self = shift;
+
+    return $self->{'description'} = shift if @_;
+    return $self->{'description'};
+}
+
+=head2 database
+
+ Title   : database
+ Usage   : $obj->database($newval)
+ Function: 
+ Example : 
+ Returns : value of database (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub database{
+    my $self = shift;
+
+    return $self->{'database'} = shift if @_;
+    return $self->{'database'};
+}
+
+=head2 lowest_score
+
+ Title   : lowest_score
+ Usage   : $obj->lowest_score($newval)
+ Function: 
+ Example : 
+ Returns : value of lowest_score (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub lowest_score{
+    my $self = shift;
+
+    return $self->{'lowest_score'} = shift if @_;
+    return $self->{'lowest_score'};
+}
+
+=head2 highest_score
+
+ Title   : highest_score
+ Usage   : $obj->highest_score($newval)
+ Function: 
+ Example : 
+ Returns : value of highest_score (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub highest_score{
+    my $self = shift;
+
+    return $self->{'highest_score'} = shift if @_;
+    return $self->{'highest_score'};
+}
+
+=head2 lambda
+
+ Title   : lambda
+ Usage   : $obj->lambda($newval)
+ Function: 
+ Example : 
+ Returns : value of lambda (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub lambda{
+    my $self = shift;
+
+    return $self->{'lambda'} = shift if @_;
+    return $self->{'lambda'};
+}
+
+=head2 H
+
+ Title   : H
+ Usage   : $obj->H($newval)
+ Function: 
+ Example : 
+ Returns : value of H (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub H{
+    my $self = shift;
+    return $self->{'H'} = shift if @_;
+    return $self->{'H'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/DocumentRegistry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/DocumentRegistry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/DocumentRegistry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+# $Id: DocumentRegistry.pm,v 1.11.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::DocumentRegistry
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::DocumentRegistry - Keep track of where to find ontologies.
+Allows lookups by name.
+
+=head1 SYNOPSIS
+
+  my $registry = Bio::Ontology::DocumentRegistry->get_instance();
+  my($ont,$def,$fmt) = $registry->documents('Sequence Ontology');
+
+  my $io = Bio::OntologyIO->new(-url => $ont,
+                                -defs_url => $def,
+                                -format => $fmt);
+  my $so = $io->next_ontology();
+  #...
+
+=head1 DESCRIPTION
+
+Do not use this directly, use Bio::Ontology::OntologyStore instead.
+Bio::Ontology::OntologyStore uses Bio::Ontology::DocumentRegistry to
+load and cache ontologies as object graphs, you can just ask it for
+what you want by name.  See L<Bio::Ontology::OntologyStore> for
+details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Ontology::DocumentRegistry;
+use strict;
+use base qw(Bio::Root::Root);
+use Data::Dumper;
+
+my $instance;
+
+BEGIN {
+$instance = {
+   'Sequence Ontology' => {
+	     ontology => "http://song.cvs.sourceforge.net/*checkout*/song/ontology/so.ontology?rev=HEAD",
+        definitions => "http://song.cvs.sourceforge.net/*checkout*/song/ontology/so.definition?rev=HEAD",
+        format => 'soflat',
+                                    },
+   'Sequence Ontology Feature Annotation' => {
+        ontology => 'http://umn.dl.sourceforge.net/sourceforge/song/sofa.ontology',
+        definitions =>'http://umn.dl.sourceforge.net/sourceforge/song/sofa.definition',
+        format => 'soflat',
+                                    },
+    'Gene Ontology' => {
+         ontology => [
+							 'http://www.geneontology.org/ontology/function.ontology',
+							 'http://www.geneontology.org/ontology/process.ontology',
+							 'http://www.geneontology.org/ontology/component.ontology'
+							],
+			definitions => 'http://www.geneontology.org/ontology/GO.defs',
+         format => 'soflat',
+							  },
+            };
+
+#aliases
+$instance->{Gene_Ontology} = $instance->{'Gene Ontology'};
+
+bless $instance, __PACKAGE__;
+}
+
+
+sub new {
+  return shift->get_instance(@_);
+}
+
+=head2 get_instance
+
+ Title   : get_instance
+ Usage   : my $singleton = Bio::Ontology::DocumentRegistry->get_instance();
+ Function: constructor
+ Returns : The Bio::Ontology::DocumentRegistry singleton.
+ Args    : None
+ Usage
+
+=cut
+
+sub get_instance {
+  return $instance;
+}
+
+=head2 documents
+
+ Title   : documents
+ Usage   : my($ontology_url, $definitions_url, $format) = $obj->documents('Sequence Ontology');
+ Function: Maps an ontology name to a list of (local or) remote URIs where the
+           files can be located.
+ Returns : A 3-item list:
+           (1) URI for the ontology file
+           (2) URI for the ontology definitions file
+           (3) format of the files (dagedit, obo, etc)
+ Args    : Name of an ontology, e.g. 'Sequence Ontology', or 'Cellular Component 
+           (Gene Ontology)'
+
+=cut
+
+
+sub documents {
+  my($self,$name) = @_;
+
+  if(defined($self->{$name})){
+    return ($self->{$name}{ontology} , $self->{$name}{definitions}, $self->{$name}{format});
+  } else {
+    return ();
+  }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/GOterm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/GOterm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/GOterm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,355 @@
+# $Id: GOterm.pm,v 1.22.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::GOterm
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+
+=head1 NAME
+
+Bio::Ontology::GOterm - representation of GO terms 
+
+=head1 SYNOPSIS
+
+  $term = Bio::Ontology::GOterm->new
+    ( -go_id       => "GO:0016847",
+      -name        => "1-aminocyclopropane-1-carboxylate synthase",
+      -definition  => "Catalysis of ...",
+      -is_obsolete => 0,
+      -comment     => "" );
+
+  $term->add_definition_references( @refs );
+  $term->add_secondary_GO_ids( @ids );
+  $term->add_aliases( @aliases );
+
+  foreach my $dr ( $term->each_definition_reference() ) {
+      print $dr, "\n";
+  }
+
+  # etc.
+
+=head1 DESCRIPTION
+
+This is "dumb" class for GO terms (it provides no functionality 
+related to graphs). Implements Bio::Ontology::TermI.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Ontology::GOterm;
+use strict;
+
+use constant GOID_DEFAULT => "GO:0000000";
+use constant TRUE         => 1;
+use constant FALSE        => 0;
+
+use base qw(Bio::Ontology::Term);
+
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $term = Bio::Ontology::GOterm->new( 
+       -go_id       => "GO:0016847",
+       -name        => "1-aminocyclopropane-1-carboxylate synthase",
+       -definition  => "Catalysis of ...",
+       -is_obsolete => 0,
+       -comment     => "" );                   
+ Function: Creates a new Bio::Ontology::GOterm.
+ Returns : A new Bio::Ontology::GOterm object.
+ Args    : -go_id         => the goid of this GO term [GO:nnnnnnn] 
+                             or [nnnnnnn] (nnnnnnn is a zero-padded
+                             integer of seven digits)
+           -name          => the name of this GO term [scalar]
+           -definition    => the definition of this GO term [scalar]  
+           -ontology      => the ontology for this term (a
+                             Bio::Ontology::OntologyI compliant object)
+           -version       => version information [scalar]
+           -is_obsolete   => the obsoleteness of this GO term [0 or 1]   
+           -comment       => a comment [scalar]
+
+=cut
+
+sub new {
+
+    my( $class, at args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+   
+    my ( $GO_id )
+	= $self->_rearrange( [ qw( GO_ID ) ], @args );
+   
+    $GO_id && $self->GO_id( $GO_id );
+  
+                                                    
+    return $self;
+    
+} # new
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $term->init();   
+ Function: Initializes this GOterm to all "" and empty lists.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+
+    my $self = shift;
+
+    # first call the inherited version to properly chain up the hierarchy
+    $self->SUPER::init(@_);
+
+    # then only initialize what we implement ourselves here
+    #$self->GO_id( GOID_DEFAULT );
+  
+} # init
+
+
+=head2 GO_id
+
+ Title   : GO_id
+ Usage   : $term->GO_id( "GO:0003947" );
+           or
+           print $term->GO_id();
+ Function: Set/get for the goid of this GO term.
+
+           This is essentially an alias to identifier(), with added
+           format checking.
+
+ Returns : The goid [GO:nnnnnnn].
+ Args    : The goid [GO:nnnnnnn] or [nnnnnnn] (nnnnnnn is a
+           zero-padded integer of seven digits) (optional).
+
+=cut
+
+sub GO_id {
+    my $self = shift;
+    my $value;
+
+    if ( @_ ) {
+        $value = $self->_check_go_id( shift );
+	unshift(@_, $value);
+    }
+
+    return $self->identifier( @_ );
+
+} # GO_id
+
+
+=head2 get_secondary_GO_ids
+
+ Title   : get_secondary_GO_ids
+ Usage   : @ids = $term->get_secondary_GO_ids();
+ Function: Returns a list of secondary goids of this Term.
+
+           This is aliased to remove_secondary_ids().
+
+ Returns : A list of secondary goids [array of [GO:nnnnnnn]]
+           (nnnnnnn is a zero-padded integer of seven digits).
+ Args    :
+
+=cut
+
+sub get_secondary_GO_ids {
+    return shift->get_secondary_ids(@_);
+} # get_secondary_GO_ids
+
+
+=head2 add_secondary_GO_id
+
+ Title   : add_secondary_GO_id
+ Usage   : $term->add_secondary_GO_id( @ids );
+           or
+           $term->add_secondary_GO_id( $id );                  
+ Function: Pushes one or more secondary goids into
+           the list of secondary goids.
+
+           This is aliased to remove_secondary_ids().
+
+ Returns : 
+ Args    : One secondary goid [GO:nnnnnnn or nnnnnnn] or a list
+           of secondary goids [array of [GO:nnnnnnn or nnnnnnn]]
+           (nnnnnnn is a zero-padded integer of seven digits).
+
+=cut
+
+sub add_secondary_GO_id {
+    return shift->add_secondary_id(@_);
+} # add_secondary_GO_id
+
+
+=head2 remove_secondary_GO_ids
+
+ Title   : remove_secondary_GO_ids()
+ Usage   : $term->remove_secondary_GO_ids();
+ Function: Deletes (and returns) the secondary goids of this Term.
+
+           This is aliased to remove_secondary_ids().
+
+ Returns : A list of secondary goids [array of [GO:nnnnnnn]]
+           (nnnnnnn is a zero-padded integer of seven digits).
+ Args    :
+
+=cut
+
+sub remove_secondary_GO_ids {
+    return shift->remove_secondary_ids(@_);
+} # remove_secondary_GO_ids
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $term->to_string();
+ Function: to_string method for GO terms.
+ Returns : A string representation of this GOterm.
+ Args    :
+
+=cut
+
+sub to_string {
+    my( $self ) = @_;
+
+    my $s = "";
+
+    $s .= "-- GO id:\n";
+    $s .= ($self->GO_id() || '')."\n";
+    $s .= "-- Name:\n";
+    $s .= ($self->name() || '') ."\n";
+    $s .= "-- Definition:\n";
+    $s .= ($self->definition() || '') ."\n";
+    $s .= "-- Category:\n";
+    if ( defined( $self->ontology() ) ) {
+        $s .= $self->ontology()->name()."\n";
+    }
+    else {
+        $s .= "\n";
+    }
+    $s .= "-- Version:\n";
+    $s .= ($self->version() || '') ."\n";
+    $s .= "-- Is obsolete:\n";
+    $s .= $self->is_obsolete()."\n";
+    $s .= "-- Comment:\n";
+    $s .= ($self->comment() || '') ."\n"; 
+    $s .= "-- Definition references:\n";
+    $s .= $self->_array_to_string( $self->get_dblinks() )."\n";
+    $s .= "-- Secondary GO ids:\n";
+    $s .= $self->_array_to_string( $self->get_secondary_GO_ids() )."\n";
+    $s .= "-- Aliases:\n";
+    $s .= $self->_array_to_string( $self->get_synonyms() );
+    
+    return $s;
+    
+} # to_string
+
+
+
+
+# Title   : _check_go_id
+# Function: Checks whether the argument is [GO:nnnnnnn].
+#           If "GO:" is not present, it adds it.
+# Returns : The canonical GO id.
+# Args    : The value to be checked.
+sub _check_go_id {
+    my ( $self, $value ) = @_;
+    unless ( $value =~ /^(GO:)?\d{7}$/ || $value eq GOID_DEFAULT ) {
+        $self->throw( "Found [" . $value
+        . "] where [GO:nnnnnnn] or [nnnnnnn] expected" );
+    } 
+    unless ( $value =~ /^GO:/ ) {
+        $value = "GO:".$value;
+    }
+    return $value;
+} # _check_go_id
+
+
+
+# Title   : _array_to_string         
+# Function:
+# Returns : 
+# Args    : 
+sub _array_to_string {
+    my( $self, @value ) = @_;
+
+    my $s = "";
+    
+    for ( my $i = 0; $i < scalar( @value ); ++$i ) {
+        if ( ! ref( $value[ $i ] ) ) {
+            $s .= "#" . $i . "\n--  " . $value[ $i ] . "\n";
+        }
+    }
+    
+    return $s;
+    
+} # _array_to_string
+
+#################################################################
+# aliases or forwards to maintain backward compatibility
+#################################################################
+
+*each_secondary_GO_id = \&get_secondary_GO_ids;
+*add_secondary_GO_ids = \&add_secondary_GO_id;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/InterProTerm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/InterProTerm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/InterProTerm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,639 @@
+# $Id: InterProTerm.pm,v 1.10.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::InterProTerm
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# Copyright Peter Dimitrov
+# (c) Peter Dimitrov, dimitrov at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::InterProTerm - Implementation of InterProI term interface
+
+=head1 SYNOPSIS
+
+  my $term = Bio::Ontology::InterProTerm->new( 
+        -interpro_id => "IPR000001",
+		  -name => "Kringle",
+		  -definition => "Kringles are autonomous structural domains ...",
+		  -ontology => "Domain"
+					     );
+  print $term->interpro_id(), "\n";
+  print $term->name(), "\n";
+  print $term->definition(), "\n";
+  print $term->is_obsolete(), "\n";
+  print $term->ontology->name(), "\n";
+
+=head1 DESCRIPTION
+
+This is a simple extension of L<Bio::Ontology::Term> for InterPro terms.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::InterProTerm;
+use strict;
+
+use Bio::Annotation::Reference;
+
+use constant INTERPRO_ID_DEFAULT => "IPR000000";
+
+use base qw(Bio::Ontology::Term);
+
+=head2 new
+
+ Title   : new
+ Usage   : $term = Bio::Ontology::InterProTerm->new( -interpro_id => "IPR000002",
+						     -name => "Cdc20/Fizzy",
+						     -definition => "The Cdc20/Fizzy region is almost always ...",
+						     -ontology => "Domain"
+						   );
+
+ Function: Creates a new Bio::Ontology::InterProTerm.
+ Example :
+ Returns : A new Bio::Ontology::InterProTerm object.
+ Args    :
+  -interpro_id => the InterPro ID of the term. Has the form IPRdddddd, where dddddd is a zero-padded six digit number
+  -name => the name of this InterPro term [scalar]
+  -definition => the definition/abstract of this InterPro term [scalar]
+  -ontology => ontology of InterPro terms [Bio::Ontology::OntologyI]
+  -comment => a comment [scalar]
+
+=cut
+
+sub new{
+  my ($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+
+  my ( $interpro_id,
+       $short_name)
+      = $self->_rearrange( [qw( INTERPRO_ID
+				SHORT_NAME
+				)
+			    ], @args );
+
+  $interpro_id && $self->interpro_id( $interpro_id );
+  $short_name && $self->short_name( $short_name );
+
+  return $self;
+}
+
+=head2 init
+
+ Title   : init
+ Usage   : $term->init();
+ Function: Initializes this InterProTerm to all "" and empty lists.
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub init{
+    my $self = shift;
+
+    # first call the inherited version to properly chain up the hierarchy
+    $self->SUPER::init(@_);
+
+    # then only initialize what we implement ourselves here
+    $self->interpro_id( INTERPRO_ID_DEFAULT );
+    $self->short_name("");
+
+}
+
+=head2 _check_interpro_id
+
+ Title   : _check_interpro_id
+ Usage   :
+ Function: Performs simple check in order to validate that its argument has the form IPRdddddd, where dddddd is a zero-padded six digit number.
+ Example :
+ Returns : Returns its argument if valid, otherwise throws exception.
+ Args    : String
+
+
+=cut
+
+sub _check_interpro_id{
+  my ($self, $value) = @_;
+
+  $self->throw( "InterPro ID ".$value." is incorrect\n" )
+    unless ( $value =~ /^IPR\d{6}$/ ||
+	     $value eq INTERPRO_ID_DEFAULT );
+
+  return $value;
+}
+
+=head2 interpro_id
+
+ Title   : interpro_id
+ Usage   : $obj->interpro_id($newval)
+ Function: Set/get for the interpro_id of this InterProTerm
+ Example : 
+ Returns : value of interpro_id (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub interpro_id{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $value = $self->_check_interpro_id($value);
+    return $self->identifier($value);
+  }
+
+  return $self->identifier();
+}
+
+=head2 short_name
+
+ Title   : short_name
+ Usage   : $obj->short_name($newval)
+ Function: Set/get for the short name of this InterProTerm.
+ Example : 
+ Returns : value of short_name (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub short_name{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'short_name'} = $value ? $value : undef;
+  }
+
+  return $self->{'short_name'};
+}
+
+=head2 protein_count
+
+ Title   : protein_count
+ Usage   : $obj->protein_count($newval)
+ Function: Set/get for the protein count of this InterProTerm.
+ Example : 
+ Returns : value of protein_count (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub protein_count{
+  my ($self,$value) = @_;
+
+  if( defined $value) {
+    $self->{'protein_count'} = $value ? $value : undef;
+  }
+
+  return $self->{'protein_count'};
+}
+
+=head2 get_references
+
+ Title   : get_references
+ Usage   :
+ Function: Get the references for this InterPro term.
+ Example :
+ Returns : An array of L<Bio::Annotation::Reference> objects
+ Args    :
+
+
+=cut
+
+sub get_references{
+    my $self = shift;
+
+    return @{$self->{"_references"}} if exists($self->{"_references"});
+    return ();
+}
+
+=head2 add_reference
+
+ Title   : add_reference
+ Usage   :
+ Function: Add one or more references to this InterPro term.
+ Example :
+ Returns : 
+ Args    : One or more L<Bio::Annotation::Reference> objects.
+
+
+=cut
+
+sub add_reference{
+    my $self = shift;
+
+    $self->{"_references"} = [] unless exists($self->{"_references"});
+    push(@{$self->{"_references"}}, @_);
+}
+
+=head2 remove_references
+
+ Title   : remove_references
+ Usage   :
+ Function: Remove all references for this InterPro term.
+ Example :
+ Returns : The list of previous references as an array of
+           L<Bio::Annotation::Reference> objects.
+ Args    :
+
+
+=cut
+
+sub remove_references{
+    my $self = shift;
+
+    my @arr = $self->get_references();
+    $self->{"_references"} = [];
+    return @arr;
+}
+
+=head2 get_members
+
+ Title   : get_members
+ Usage   : @arr = get_members()
+ Function: Get the list of member(s) for this object.
+ Example :
+ Returns : An array of Bio::Annotation::DBLink objects
+ Args    :
+
+
+=cut
+
+sub get_members{
+    my $self = shift;
+
+    return @{$self->{'_members'}} if exists($self->{'_members'});
+    return ();
+}
+
+=head2 add_member
+
+ Title   : add_member
+ Usage   :
+ Function: Add one or more member(s) to this object.
+ Example :
+ Returns : 
+ Args    : One or more Bio::Annotation::DBLink objects.
+
+
+=cut
+
+sub add_member{
+    my $self = shift;
+
+    $self->{'_members'} = [] unless exists($self->{'_members'});
+    push(@{$self->{'_members'}}, @_);
+}
+
+=head2 remove_members
+
+ Title   : remove_members
+ Usage   :
+ Function: Remove all members for this class.
+ Example :
+ Returns : The list of previous members as an array of
+           Bio::Annotation::DBLink objects.
+ Args    :
+
+
+=cut
+
+sub remove_members{
+    my $self = shift;
+
+    my @arr = $self->get_members();
+    $self->{'_members'} = [];
+    return @arr;
+}
+
+=head2 get_examples
+
+ Title   : get_examples
+ Usage   : @arr = get_examples()
+ Function: Get the list of example(s) for this object.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : An array of Bio::Annotation::DBLink objects
+ Args    :
+
+
+=cut
+
+sub get_examples{
+    my $self = shift;
+
+    return @{$self->{'_examples'}} if exists($self->{'_examples'});
+    return ();
+}
+
+=head2 add_example
+
+ Title   : add_example
+ Usage   :
+ Function: Add one or more example(s) to this object.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : 
+ Args    : One or more Bio::Annotation::DBLink objects.
+
+
+=cut
+
+sub add_example{
+    my $self = shift;
+
+    $self->{'_examples'} = [] unless exists($self->{'_examples'});
+    push(@{$self->{'_examples'}}, @_);
+}
+
+=head2 remove_examples
+
+ Title   : remove_examples
+ Usage   :
+ Function: Remove all examples for this class.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : The list of previous examples as an array of
+           Bio::Annotation::DBLink objects.
+ Args    :
+
+
+=cut
+
+sub remove_examples{
+    my $self = shift;
+
+    my @arr = $self->get_examples();
+    $self->{'_examples'} = [];
+    return @arr;
+}
+
+=head2 get_external_documents
+
+ Title   : get_external_documents
+ Usage   : @arr = get_external_documents()
+ Function: Get the list of external_document(s) for this object.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : An array of Bio::Annotation::DBLink objects
+ Args    :
+
+
+=cut
+
+sub get_external_documents{
+    my $self = shift;
+
+    return @{$self->{'_external_documents'}} if exists($self->{'_external_documents'});
+    return ();
+}
+
+=head2 add_external_document
+
+ Title   : add_external_document
+ Usage   :
+ Function: Add one or more external_document(s) to this object.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : 
+ Args    : One or more Bio::Annotation::DBLink objects.
+
+
+=cut
+
+sub add_external_document{
+    my $self = shift;
+
+    $self->{'_external_documents'} = [] unless exists($self->{'_external_documents'});
+    push(@{$self->{'_external_documents'}}, @_);
+}
+
+=head2 remove_external_documents
+
+ Title   : remove_external_documents
+ Usage   :
+ Function: Remove all external_documents for this class.
+
+           This is an element of the InterPro xml schema.
+
+ Example :
+ Returns : The list of previous external_documents as an array of
+           Bio::Annotation::DBLink objects.
+ Args    :
+
+
+=cut
+
+sub remove_external_documents{
+    my $self = shift;
+
+    my @arr = $self->get_external_documents();
+    $self->{'_external_documents'} = [];
+    return @arr;
+}
+
+=head2 class_list
+
+ Title   : class_list
+ Usage   : $obj->class_list($newval)
+ Function: Set/get for class list element of the InterPro xml schema
+ Example : 
+ Returns : reference to an array of Bio::Annotation::DBLink objects
+ Args    : reference to an array of Bio::Annotation::DBLink objects
+
+
+=cut
+
+sub class_list{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'class_list'} = $value;
+  }
+
+  return $self->{'class_list'};
+}
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $term->to_string();
+ Function: to_string method for InterPro terms.
+ Returns : A string representation of this InterPro term.
+ Args    :
+
+=cut
+
+sub to_string {
+  my($self) = @_;
+  my $s = "";
+
+  $s .= "-- InterPro id:\n";
+  $s .= $self->interpro_id()."\n";
+  if (defined $self->name) {
+    $s .= "-- Name:\n";
+    $s .= $self->name()."\n";
+    $s .= "-- Definition:\n";
+    $s .= $self->definition()."\n";
+    $s .= "-- Category:\n";
+    if ( defined( $self->ontology() ) ) {
+      $s .= $self->ontology()->name()."\n";
+    } else {
+      $s .= "\n";
+    }
+    $s .= "-- Version:\n";
+    $s .= $self->version()."\n";
+    $s .= "-- Is obsolete:\n";
+    $s .= $self->is_obsolete()."\n";
+    $s .= "-- Comment:\n";
+    $s .= $self->comment()."\n"; 
+    if (defined $self->get_references) {
+      $s .= "-- References:\n";
+      foreach my $ref ( $self->get_references ) {
+	$s .= $ref->authors."\n".$ref->title."\n".$ref->location."\n\n";
+      };
+      $s .= "\n";
+    }
+    if (defined $self->get_members) {
+      $s .= "-- Member List:\n";
+      foreach my $ref ( $self->get_members ) {
+	$s .= $ref->database."\t".$ref->primary_id."\n";
+      };
+      $s .= "\n";
+    }
+    if (defined $self->get_external_documents) {
+      $s .= "-- External Document List:\n";
+      foreach my $ref ( $self->get_external_documents ) {
+	$s .= $ref->database."\t".$ref->primary_id."\n";
+      };
+      $s .= "\n";
+    }
+    if (defined $self->get_examples) {
+      $s .= "-- Examples:\n";
+      foreach my $ref ( $self->get_examples ) {
+	$s .= $ref->database."\t".$ref->primary_id."\t".$ref->comment."\n";
+      };
+      $s .= "\n";
+    }
+    if (defined $self->class_list) {
+      $s .= "-- Class List:\n";
+      foreach my $ref ( @{$self->class_list} ) {
+	$s .= $ref->primary_id."\n";
+      };
+      $s .= "\n";
+    }
+    if ($self->get_secondary_ids) {
+      $s .= "-- Secondary IDs:\n";
+      foreach my $ref ( $self->get_secondary_ids() ) {
+	$s .= $ref."\n";
+      };
+      $s .= "\n";
+    }
+  }
+  else {
+    $s .= "InterPro term not fully instantiated\n";
+  }
+  return $s;
+}
+
+=head1 Deprecated methods
+
+These are here for backwards compatibility.
+
+=cut
+
+=head2 secondary_ids
+
+ Title   : secondary_ids
+ Usage   : $obj->secondary_ids($newval)
+ Function: This is deprecated. Use get_secondary_ids() or 
+           add_secondary_id() instead.
+ Example : 
+ Returns : reference to an array of strings
+ Args    : reference to an array of strings
+
+
+=cut
+
+sub secondary_ids{
+    my $self = shift;
+    my @ids;
+
+    $self->warn("secondary_ids is deprecated. Use ".
+		"get_secondary_ids/add_secondary_id instead.");
+
+    # set mode?
+    if(@_) {
+	my $sids = shift;
+	if($sids) {
+	    $self->add_secondary_id(@$sids);
+	    @ids = @$sids; 
+	} else {
+	    # we interpret setting to undef as removing the array
+	    $self->remove_secondary_ids();
+	}
+    } else {
+	# no; get mode
+	@ids = $self->get_secondary_ids();
+    }
+    return \@ids;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOEngine.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOEngine.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOEngine.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1029 @@
+# $Id: OBOEngine.pm,v 1.6.4.3 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::OBOEngine
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::OBOEngine - An Ontology Engine for OBO style flat file
+format from the Gene Ontology Consortium
+
+=head1 SYNOPSIS
+
+  use Bio::Ontology::OBOEngine;
+
+  my $parser = Bio::Ontology::OBOEngine->new
+        ( -file => "gene_ontology.obo" );
+
+  my $engine = $parser->parse();
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.
+
+This module replaces SimpleGOEngine.pm, which is deprecated.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Sohel Merchant
+
+Email: s-merchant at northwestern.edu
+
+Address:
+
+  Northwestern University
+  Center for Genetic Medicine (CGM), dictyBase
+  Suite 1206,
+  676 St. Clair st
+  Chicago IL 60611
+
+=head2 CONTRIBUTOR
+
+ Hilmar Lapp, hlapp at gmx.net
+ Chris Mungall,   cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Ontology::OBOEngine;
+
+use Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+
+use strict;
+use Bio::Ontology::RelationshipType;
+use Bio::Ontology::RelationshipFactory;
+use Data::Dumper;
+
+use constant TRUE       => 1;
+use constant FALSE      => 0;
+use constant IS_A       => "IS_A";
+use constant PART_OF    => "PART_OF";
+use constant RELATED_TO => "RELATED_TO";
+use constant TERM       => "TERM";
+use constant TYPE       => "TYPE";
+use constant ONTOLOGY   => "ONTOLOGY";
+
+use base qw(Bio::Root::Root Bio::Ontology::OntologyEngineI);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $engine = Bio::Ontology::OBOEngine->new()
+ Function: Creates a new OBOEngine
+ Returns : A new OBOEngine object
+ Args    :
+
+=cut
+
+sub new {
+    my( $class, @args ) = @_;
+
+    my $self = $class->SUPER::new( @args );
+
+    $self->init();
+
+    return $self;
+} # new
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $engine->init();
+ Function: Initializes this Engine.
+ Returns :
+ Args    :
+
+=cut
+
+sub init {
+    my ( $self ) = @_;
+
+    $self->{ "_is_a_relationship" }       = Bio::Ontology::RelationshipType->get_instance( IS_A );
+    $self->{ "_part_of_relationship" }    = Bio::Ontology::RelationshipType->get_instance( PART_OF );
+    $self->{ "_related_to_relationship" } = Bio::Ontology::RelationshipType->get_instance( RELATED_TO );
+
+    $self->graph( Bio::Ontology::SimpleGOEngine::GraphAdaptor->new() );        # NG 05-02-16
+
+    # set defaults for the factories
+    $self->relationship_factory(Bio::Ontology::RelationshipFactory->new(
+                                     -type => "Bio::Ontology::Relationship"));
+
+} # init
+
+
+
+=head2 is_a_relationship
+
+ Title   : is_a_relationship()
+ Usage   : $IS_A = $engine->is_a_relationship();
+ Function: Returns a Bio::Ontology::RelationshipType object for "is-a"
+           relationships
+ Returns : Bio::Ontology::RelationshipType set to "IS_A"
+ Args    :
+
+=cut
+
+sub is_a_relationship {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->throw( "Attempted to change immutable field" );
+    }
+
+    return $self->{ "_is_a_relationship" };
+} # is_a_relationship
+
+
+
+=head2 part_of_relationship
+
+ Title   : part_of_relationship()
+ Usage   : $PART_OF = $engine->part_of_relationship();
+ Function: Returns a Bio::Ontology::RelationshipType object for "part-of"
+           relationships
+ Returns : Bio::Ontology::RelationshipType set to "PART_OF"
+ Args    :
+
+=cut
+
+sub part_of_relationship {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->throw( "Attempted to change immutable field" );
+    }
+
+    return $self->{ "_part_of_relationship" };
+} # part_of_relationship
+
+=head2 related_to_relationship
+
+ Title   : related_to_relationship()
+ Usage   : $RELATED_TO = $engine->related_to_relationship();
+ Function: Returns a Bio::Ontology::RelationshipType object for "related-to"
+           relationships
+ Returns : Bio::Ontology::RelationshipType set to "RELATED_TO"
+ Args    :
+
+=cut
+
+sub related_to_relationship {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->throw( "Attempted to change immutable field" );
+    }
+
+    return $self->{ "_related_to_relationship" };
+} # related_to_relationship
+
+
+=head2 add_term
+
+ Title   : add_term
+ Usage   : $engine->add_term( $term_obj );
+ Function: Adds a Bio::Ontology::TermI to this engine
+ Returns : true if the term was added and false otherwise (e.g., if the
+           term already existed in the ontology engine)
+ Args    : Bio::Ontology::TermI
+
+=cut
+
+sub add_term {
+    my ( $self, $term ) = @_;
+
+    return FALSE if $self->has_term( $term );
+
+    my $goid = $self->_get_id($term);
+
+    $self->graph()->add_vertex( $goid );
+    $self->graph()->set_vertex_attribute( $goid, TERM, $term );        # NG 05-02-16
+    return TRUE;
+
+} # add_term
+
+
+
+=head2 has_term
+
+ Title   : has_term
+ Usage   : $engine->has_term( $term );
+ Function: Checks whether this engine contains a particular term
+ Returns : true or false
+ Args    : Bio::Ontology::TermI
+           or
+           Term identifier (e.g. "GO:0012345")
+
+=cut
+
+sub has_term {
+    my ( $self, $term ) = @_;
+    $term = $self->_get_id( $term );
+    if ( $self->graph()->has_vertex( $term ) ) {
+        return TRUE;
+    }
+    else {
+        return FALSE;
+    }
+
+} # has_term
+
+
+=head2 add_relationship_type
+
+ Title   : add_relationship_type
+ Usage   : $engine->add_relationship_type( $type_name, $ont );
+ Function: Adds a new relationship type to the engine.  Use
+           get_relationship_type($type_name) to retrieve.
+ Returns : true if successfully added, false otherwise
+ Args    : relationship type name to add (scalar)
+           ontology to which to assign the relationship type
+
+=cut
+
+sub add_relationship_type{
+   my ($self, at args) = @_;
+
+   if(scalar(@_) == 3){
+         my $type_name = $args[0];
+         my $ont = $args[1];
+         $self->{ "_extra_relationship_types" }{$type_name} = Bio::Ontology::RelationshipType->get_instance($type_name,$ont);
+#warn Dumper($self->{"_extra_relationship_types"}{$type_name});
+         return 1;
+   }
+   return 0;
+}
+
+
+=head2 get_relationship_type
+
+ Title   : get_relationship_type
+ Usage   : $engine->get_relationship_type( $type_name );
+ Function: Gets a Bio::Ontology::RelationshipI object corresponding
+           to $type_name
+ Returns : a Bio::Ontology::RelationshipI object
+ Args    :
+
+=cut
+
+sub get_relationship_type{
+   my ($self,$type_name) = @_;
+   return $self->{ "_extra_relationship_types" }{$type_name};
+}
+
+=head2 add_relationship
+
+ Title   : add_relationship
+ Usage   : $engine->add_relationship( $relationship );
+           $engine->add_relatioship( $subject_term, $predicate_term,
+                                     $object_term, $ontology );
+           $engine->add_relatioship( $subject_id, $predicate_id,
+                                     $object_id, $ontology);
+ Function: Adds a relationship to this engine
+ Returns : true if successfully added, false otherwise
+ Args    : The relationship in one of three ways:
+
+             a) subject (or child) term id, Bio::Ontology::TermI
+                (rel.type), object (or parent) term id, ontology
+
+           or
+
+             b) subject Bio::Ontology::TermI, predicate
+                Bio::Ontology::TermI (rel.type), object
+                Bio::Ontology::TermI, ontology
+
+           or
+
+             c) Bio::Ontology::RelationshipI-compliant object
+
+=cut
+
+# term objs or term ids
+sub add_relationship {
+    my ( $self, $child, $type, $parent, $ont ) = @_;
+
+    if ( scalar( @_ ) == 2 ) {
+        $self->_check_class( $child, "Bio::Ontology::RelationshipI" );
+        $type   = $child->predicate_term();
+        $parent = $child->object_term();
+        $ont    = $child->ontology();
+        $child  = $child->subject_term();
+    }
+
+
+    $self->_check_class( $type, "Bio::Ontology::TermI" );
+
+    my $parentid = $self->_get_id( $parent );
+    my $childid = $self->_get_id( $child );
+
+    my $g = $self->graph();
+
+    $self->add_term($child) unless $g->has_vertex( $childid );
+    $self->add_term($parent) unless $g->has_vertex( $parentid );
+
+    # This prevents multi graphs.
+    if ( $g->has_edge( $parentid, $childid ) ) {
+        return FALSE;
+    }
+
+    $g->add_edge( $parentid, $childid );
+    $g->set_edge_attribute( $parentid, $childid, TYPE, $type );           # NG 05-02-16
+    $g->set_edge_attribute( $parentid, $childid, ONTOLOGY, $ont ); # NG 05-02-16
+
+    return TRUE;
+
+} # add_relationship
+
+
+
+
+=head2 get_relationships
+
+
+ Title   : get_relationships
+ Usage   : $engine->get_relationships( $term );
+ Function: Returns all relationships of a term, or all relationships in
+           the graph if no term is specified.
+ Returns : Relationship
+ Args    : term id
+           or
+           Bio::Ontology::TermI
+
+=cut
+
+sub get_relationships {
+    my ( $self, $term ) = @_;
+
+    my $g = $self->graph();
+
+    # obtain the ID if term provided
+    my $termid;
+    if($term) {
+        $termid = $self->_get_id( $term );
+        # check for presence in the graph
+        if ( ! $g->has_vertex( $termid ) ) {
+            $self->throw( "no term with identifier \"$termid\" in ontology" );
+        }
+    }
+
+    # now build the relationships
+    my $relfact = $self->relationship_factory();
+    # we'll build the relationships from edges
+    my @rels = ();
+    my @edges = $termid ? $g->edges_at( $termid ) : $g->edges(); # NG 05-02-13
+    while(@edges) {
+      my ( $startid, $endid ) = @{ shift @edges }; # NG 05-02-16
+      my $rel = $relfact->create_object
+        (-subject_term   => $self->get_terms($endid),
+         -object_term    => $self->get_terms($startid),
+         -predicate_term => $g->get_edge_attribute($startid, $endid, TYPE),
+         -ontology       => $g->get_edge_attribute($startid, $endid, ONTOLOGY));
+      push( @rels, $rel );
+
+    }
+
+    return @rels;
+
+} # get_relationships
+
+=head2 get_all_relationships
+
+
+ Title   : get_all_relationships
+ Usage   : @rels = $engine->get_all_relationships();
+ Function: Returns all relationships in the graph.
+ Returns : Relationship
+ Args    :
+
+=cut
+
+sub get_all_relationships {
+    return shift->get_relationships(@_);
+} # get_all_relationships
+
+
+
+=head2 get_predicate_terms
+
+ Title   : get_predicate_terms
+ Usage   : $engine->get_predicate_terms();
+ Function: Returns the types of relationships this engine contains
+ Returns : Bio::Ontology::RelationshipType
+ Args    :
+
+=cut
+
+sub get_predicate_terms {
+    my ( $self ) = @_;
+
+    my @a = ( $self->is_a_relationship(),
+              $self->part_of_relationship(),
+              $self->related_to_relationship());
+
+        foreach my $termname (keys %{$self->{ "_extra_relationship_types" }}){
+          push @a, $self->{ "_extra_relationship_types" }{ $termname };
+        }
+
+    return @a;
+} # get_predicate_terms
+
+
+
+
+=head2 get_child_terms
+
+ Title   : get_child_terms
+ Usage   : $engine->get_child_terms( $term_obj, @rel_types );
+           $engine->get_child_terms( $term_id, @rel_types );
+ Function: Returns the children of this term
+ Returns : Bio::Ontology::TermI
+ Args    : Bio::Ontology::TermI, Bio::Ontology::RelationshipType
+           or
+           term id, Bio::Ontology::RelationshipType
+
+           if NO Bio::Ontology::RelationshipType is indicated: children
+           of ALL types are returned
+
+=cut
+
+sub get_child_terms {
+    my ( $self, $term, @types ) = @_;
+
+    return $self->_get_child_parent_terms_helper( $term, TRUE, @types );
+
+} # get_child_terms
+
+
+=head2 get_descendant_terms
+
+ Title   : get_descendant_terms
+ Usage   : $engine->get_descendant_terms( $term_obj, @rel_types );
+           $engine->get_descendant_terms( $term_id, @rel_types );
+ Function: Returns the descendants of this term
+ Returns : Bio::Ontology::TermI
+ Args    : Bio::Ontology::TermI, Bio::Ontology::RelationshipType
+           or
+           term id, Bio::Ontology::RelationshipType
+
+           if NO Bio::Ontology::RelationshipType is indicated:
+           descendants of ALL types are returned
+
+=cut
+
+sub get_descendant_terms {
+    my ( $self, $term, @types ) = @_;
+
+    my %ids = ();
+    my @ids = ();
+
+    $term = $self->_get_id( $term );
+
+    if ( ! $self->graph()->has_vertex( $term ) ) {
+        $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" );
+    }
+
+    $self->_get_descendant_terms_helper( $term, \%ids, \@types );
+
+    while( ( my $id ) = each ( %ids ) ) {
+        push( @ids, $id );
+    }
+
+    return $self->get_terms( @ids );
+
+} # get_descendant_terms
+
+
+
+
+=head2 get_parent_terms
+
+ Title   : get_parent_terms
+ Usage   : $engine->get_parent_terms( $term_obj, @rel_types );
+           $engine->get_parent_terms( $term_id, @rel_types );
+ Function: Returns the parents of this term
+ Returns : Bio::Ontology::TermI
+ Args    : Bio::Ontology::TermI, Bio::Ontology::RelationshipType
+           or
+           term id, Bio::Ontology::RelationshipType
+
+           if NO Bio::Ontology::RelationshipType is indicated:
+           parents of ALL types are returned
+
+=cut
+
+sub get_parent_terms {
+    my ( $self, $term, @types ) = @_;
+
+    return $self->_get_child_parent_terms_helper( $term, FALSE, @types );
+
+} # get_parent_terms
+
+
+
+=head2 get_ancestor_terms
+
+ Title   : get_ancestor_terms
+ Usage   : $engine->get_ancestor_terms( $term_obj, @rel_types );
+           $engine->get_ancestor_terms( $term_id, @rel_types );
+ Function: Returns the ancestors of this term
+ Returns : Bio::Ontology::TermI
+ Args    : Bio::Ontology::TermI, Bio::Ontology::RelationshipType
+           or
+           term id, Bio::Ontology::RelationshipType
+
+           if NO Bio::Ontology::RelationshipType is indicated:
+           ancestors of ALL types are returned
+
+=cut
+
+sub get_ancestor_terms {
+    my ( $self, $term, @types ) = @_;
+
+    my %ids = ();
+    my @ids = ();
+
+    $term = $self->_get_id( $term );
+
+    if ( ! $self->graph()->has_vertex( $term ) ) {
+        $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" );
+    }
+
+    $self->_get_ancestor_terms_helper( $term, \%ids, \@types );
+
+    while( ( my $id ) = each ( %ids ) ) {
+        push( @ids, $id );
+    }
+
+    return $self->get_terms( @ids );
+
+} # get_ancestor_terms
+
+
+
+
+
+=head2 get_leaf_terms
+
+ Title   : get_leaf_terms
+ Usage   : $engine->get_leaf_terms();
+ Function: Returns the leaf terms
+ Returns : Bio::Ontology::TermI
+ Args    :
+
+=cut
+
+sub get_leaf_terms {
+    my ( $self ) = @_;
+
+    my @a = $self->graph()->sink_vertices();
+
+    return $self->get_terms( @a );
+
+}
+
+
+
+=head2 get_root_terms()
+
+ Title   : get_root_terms
+ Usage   : $engine->get_root_terms();
+ Function: Returns the root terms
+ Returns : Bio::Ontology::TermI
+ Args    :
+
+=cut
+
+sub get_root_terms {
+    my ( $self ) = @_;
+
+
+    my @a = $self->graph()->source_vertices();
+
+    return $self->get_terms( @a );
+
+}
+
+
+=head2 get_terms
+
+ Title   : get_terms
+ Usage   : @terms = $engine->get_terms( "GO:1234567", "GO:2234567" );
+ Function: Returns term objects with given identifiers
+ Returns : Bio::Ontology::TermI, or the term corresponding to the
+           first identifier if called in scalar context
+ Args    : term ids
+
+=cut
+
+sub get_terms {
+    my ( $self, @ids ) = @_;
+
+    my @terms = ();
+
+    foreach my $id ( @ids ) {
+        if ( $self->graph()->has_vertex( $id ) ) {
+          push( @terms, $self->graph()->get_vertex_attribute( $id, TERM ) ); # NG 05-02-16
+        }
+    }
+
+    return wantarray ? @terms : shift(@terms);
+
+} # get_terms
+
+
+=head2 get_all_terms
+
+ Title   : get_all_terms
+ Usage   : $engine->get_all_terms();
+ Function: Returns all terms in this engine
+ Returns : Bio::Ontology::TermI
+ Args    :
+
+=cut
+
+sub get_all_terms {
+    my ( $self ) = @_;
+
+    return( $self->get_terms( $self->graph()->vertices() ) );
+
+} # get_all_terms
+
+
+=head2 find_terms
+
+ Title   : find_terms
+ Usage   : ($term) = $oe->find_terms(-identifier => "SO:0000263");
+ Function: Find term instances matching queries for their attributes.
+
+           This implementation can efficiently resolve queries by
+           identifier.
+
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : Named parameters. The following parameters should be recognized
+           by any implementations:
+
+              -identifier    query by the given identifier
+              -name          query by the given name
+
+=cut
+
+sub find_terms{
+    my ($self, at args) = @_;
+    my @terms;
+
+    my ($id,$name) = $self->_rearrange([qw(IDENTIFIER NAME)], at args);
+
+    if(defined($id)) {
+        @terms = $self->get_terms($id);
+    } else {
+        @terms = $self->get_all_terms();
+    }
+    if(defined($name)) {
+        @terms = grep { $_->name() eq $name; } @terms;
+    }
+    return @terms;
+}
+
+
+=head2 find_identically_named_terms
+
+ Title   : find_identically_named_terms
+ Usage   : ($term) = $oe->find_identically_named_terms($term0);
+ Function: Find term instances where names match the query term
+           name exactly
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_identically_named_terms{
+    my ($self,$qterm) = @_;
+    $self->throw("Argument doesn't implement Bio::Ontology::TermI. " . "Bummer." )
+        unless defined $qterm and $qterm->isa("Bio::Ontology::TermI");
+
+    my %matching_terms;
+
+    foreach my $term ($self->get_all_terms) {
+        $matching_terms{$term->identifier} = $term and next
+            if $term->name eq $qterm->name;
+    }
+    return values %matching_terms;
+}
+
+
+=head2 find_identical_terms
+
+ Title   : find_identical_terms
+ Usage   : ($term) = $oe->find_identical_terms($term0);
+ Function: Find term instances where name or synonym
+           matches the query exactly
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_identical_terms{
+    my ($self,$qterm) = @_;
+    $self->throw("Argument doesn't implement Bio::Ontology::TermI. " . "Bummer." )
+        unless defined $qterm and $qterm->isa("Bio::Ontology::TermI");
+
+    my %matching_terms;
+
+    foreach my $qstring ($qterm->name, $qterm->each_synonym) {
+        foreach my $term ($self->get_all_terms) {
+            foreach my $string ( $term->name, $term->each_synonym() ) {
+                $matching_terms{$term->identifier} = $term and next
+                    if $string eq $qstring;
+            }
+        }
+    }
+    return values %matching_terms;
+}
+
+=head2 find_similar_terms
+
+ Title   : find_similar_terms
+ Usage   : ($term) = $oe->find_similar_terms($term0);
+ Function: Find term instances where name or synonym, or part of one,
+           matches the query.
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_similar_terms{
+    my ($self,$qterm) = @_;
+    $self->throw("Argument doesn't implement Bio::Ontology::TermI. " . "Bummer." )
+        unless defined $qterm and $qterm->isa("Bio::Ontology::TermI");
+
+    my %matching_terms;
+
+    foreach my $qstring ($qterm->name, $qterm->each_synonym) {
+        foreach my $term ($self->get_all_terms) {
+
+            foreach my $string ( $term->name, $term->each_synonym() ) {
+                $matching_terms{$term->identifier} = $term and next
+                    if $string =~ /$qstring/ or $qstring =~ /$string/;
+            }
+        }
+    }
+    return values %matching_terms;
+}
+
+
+=head2 relationship_factory
+
+ Title   : relationship_factory
+ Usage   : $fact = $obj->relationship_factory()
+ Function: Get/set the object factory to be used when relationship
+           objects are created by the implementation on-the-fly.
+
+ Example :
+ Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : on set, a Bio::Factory::ObjectFactoryI compliant object
+
+=cut
+
+sub relationship_factory{
+    my $self = shift;
+
+    return $self->{'relationship_factory'} = shift if @_;
+    return $self->{'relationship_factory'};
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $fact = $obj->term_factory()
+ Function: Get/set the object factory to be used when term objects are
+           created by the implementation on-the-fly.
+
+           Note that this ontology engine implementation does not
+           create term objects on the fly, and therefore setting this
+           attribute is meaningless.
+
+ Example :
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : on set, a Bio::Factory::ObjectFactoryI compliant object
+
+=cut
+
+sub term_factory{
+    my $self = shift;
+
+    if(@_) {
+        $self->warn("setting term factory, but ".ref($self).
+                    " does not create terms on-the-fly");
+        return $self->{'term_factory'} = shift;
+    }
+    return $self->{'term_factory'};
+}
+
+=head2 graph
+
+ Title   : graph()
+ Usage   : $engine->graph();
+ Function: Returns the Graph this engine is based on
+ Returns : Graph
+ Args    :
+
+=cut
+
+sub graph {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->_check_class( $value, 'Bio::Ontology::SimpleGOEngine::GraphAdaptor' ); # NG 05-02-16
+        $self->{ "_graph" } = $value;
+    }
+
+    return $self->{ "_graph" };
+} # graph
+
+
+# Internal methods
+# ----------------
+# Checks the correct format of a GOBO-formatted id
+# Gets the id out of a term or id string
+sub _get_id {
+    my ( $self, $term ) = @_;
+    my $id = $term;
+
+    if ( ref($term) ) {
+
+        # use TermI standard API
+        $self->throw(
+            "Object doesn't implement Bio::Ontology::TermI. " . "Bummer." )
+          unless $term->isa("Bio::Ontology::TermI");
+        $id = $term->identifier();
+
+        # if there is no ID, we need to fake one from ontology name and name
+        # in order to achieve uniqueness
+        if ( !$id ) {
+            $id = $term->ontology->name() if $term->ontology();
+            $id = $id ? $id . '|' : '';
+            $id .= $term->name();
+        }
+    }
+
+    return $id
+
+#        if $term->isa("Bio::Ontology::GOterm")||($id =~ /^[A-Z_]{1,8}:\d{1,}$/);
+      if $term->isa("Bio::Ontology::OBOterm") || ( $id =~ /^\w+:\w+$/ );
+
+    # prefix with something if only numbers
+    #     if($id =~ /^\d+$/) {
+    #         $self->warn(ref($self).": identifier [$id] is only numbers - ".
+    #                     "prefixing with 'GO:'");
+    #         return "GO:" . $id;
+    #     }
+    # we shouldn't have gotten here if it's at least a remotely decent ID
+    $self->throw( ref($self) . ": non-standard identifier '$id'\n" )
+      unless $id =~ /\|/;
+    return $id;
+}    # _get_id
+
+# Helper for getting children and parent terms
+sub _get_child_parent_terms_helper {
+    my ( $self, $term, $do_get_child_terms, @types ) = @_;
+
+    foreach my $type ( @types ) {
+        $self->_check_class( $type, "Bio::Ontology::TermI" );
+    }
+
+    my @relative_terms = ();
+
+    $term = $self->_get_id( $term );
+    if ( ! $self->graph()->has_vertex( $term ) ) {
+        $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" );
+    }
+
+    my @all_relative_terms = ();
+    if ( $do_get_child_terms ) {
+        @all_relative_terms = $self->graph()->successors( $term );
+    }
+    else {
+        @all_relative_terms = $self->graph()->predecessors( $term );
+    }
+
+    foreach my $relative ( @all_relative_terms ) {
+        if ( scalar( @types ) > 0 ) {
+            foreach my $type ( @types ) {
+                my $relative_type;
+                if ( $do_get_child_terms ) {
+                  $relative_type = $self->graph()->get_edge_attribute ($term, $relative, TYPE );  # NG 05-02-16
+                }
+                else {
+                  $relative_type = $self->graph()->get_edge_attribute ($relative, $term, TYPE ); # NG 05-02-16
+                }
+                if ( $relative_type->equals( $type ) ) {
+                    push( @relative_terms, $relative );
+                }
+            }
+        }
+        else {
+            push( @relative_terms, $relative );
+        }
+    }
+
+    return $self->get_terms( @relative_terms );
+
+} # get_child_terms
+
+
+# Recursive helper
+sub _get_descendant_terms_helper {
+    my ( $self, $term, $ids_ref, $types_ref ) = @_;
+
+    my @child_terms = $self->get_child_terms( $term, @$types_ref );
+
+    if ( scalar( @child_terms ) < 1 ) {
+        return;
+    }
+
+    foreach my $child_term ( @child_terms ) {
+        my $child_term_id = $self->_get_id($child_term->identifier());
+        $ids_ref->{ $child_term_id } = 0;
+        $self->_get_descendant_terms_helper( $child_term_id, $ids_ref, $types_ref );
+    }
+
+} # _get_descendant_terms_helper
+
+
+# Recursive helper
+sub _get_ancestor_terms_helper {
+    my ( $self, $term, $ids_ref, $types_ref ) = @_;
+
+    my @parent_terms = $self->get_parent_terms( $term, @$types_ref );
+
+    if ( scalar( @parent_terms ) < 1 ) {
+        return;
+    }
+
+    foreach my $parent_term ( @parent_terms ) {
+        my $parent_term_id = $self->_get_id($parent_term->identifier());
+        $ids_ref->{ $parent_term_id } = 0;
+        $self->_get_ancestor_terms_helper( $parent_term_id, $ids_ref, $types_ref );
+    }
+
+} # get_ancestor_terms_helper
+
+sub _check_class {
+    my ( $self, $value, $expected_class ) = @_;
+
+    if ( ! defined( $value ) ) {
+        $self->throw( "Found [undef] where [$expected_class] expected" );
+    }
+    elsif ( ! ref( $value ) ) {
+        $self->throw( "Found [scalar] where [$expected_class] expected" );
+    }
+    elsif ( ! $value->isa( $expected_class ) ) {
+        $self->throw( "Found [" . ref( $value ) . "] where [$expected_class] expected" );
+    }
+
+} # _check_class
+
+#################################################################
+# aliases
+#################################################################
+
+*get_relationship_types = \&get_predicate_terms;
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOterm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOterm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OBOterm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,109 @@
+# $Id: OBOterm.pm,v 1.5.4.1 2006/10/02 23:10:22 sendu Exp $
+
+=head1 NAME
+
+Bio::Ontology::OBOterm - representation of OBO terms
+
+=head1 SYNOPSIS
+
+  $term = Bio::Ontology::OBOterm->new
+    ( -identifier       => "GO:0005623",
+      -name        => "Cell",
+      -definition  => "The basic structural and functional unit ...",
+      -is_obsolete => 0,
+      -comment     => "" );
+
+  $term->add_reference( @refs );
+  $term->add_secondary_id( @ids );
+  $term->add_synonym( @synonym );
+
+  # etc.
+
+=head1 DESCRIPTION
+
+This is data holder class for OBO terms. It is currently a dummy class since we anticipate that the
+OBO term will become more richer with more features being added to OBO flat-file format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Sohel Merchant
+
+Email: s-merchant at northwestern.edu
+
+Address:
+
+  Northwestern University
+  Center for Genetic Medicine (CGM), dictyBase
+  Suite 1206,
+  676 St. Clair st
+  Chicago IL 60611
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+# Let the code begin...
+
+package Bio::Ontology::OBOterm;
+use strict;
+
+use constant TRUE  => 1;
+use constant FALSE => 0;
+
+use base qw(Bio::Ontology::Term);
+
+=head2 new
+
+ Title   : new
+ Usage   :   $term = Bio::Ontology::OBOterm->new
+     ( -identifier       => "GO:0005623",
+      -name        => "Cell",
+      -definition  => "The basic structural and functional unit ...",
+      -is_obsolete => 0,
+      -comment     => "" );
+
+ Function: Creates a new Bio::Ontology::OBOterm.
+ Returns : A new Bio::Ontology::OBOterm object.
+ Args    : -identifier    => the id of this OBO term [GO:nnnnnnn]
+                             integer of seven digits)
+           -name          => the name of this OBO term [scalar]
+           -definition    => the definition of this OBO term [scalar]
+           -ontology      => the ontology for this term (a
+                             Bio::Ontology::OntologyI compliant object)
+           -version       => version information [scalar]
+           -is_obsolete   => the obsoleteness of this OBO term [0 or 1]
+           -comment       => a comment [scalar]
+
+=cut
+
+sub new {
+
+    my ( $class, @args ) = @_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}    # new
+
+
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Ontology.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Ontology.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Ontology.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,794 @@
+# $Id: Ontology.pm,v 1.18.4.3 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::Ontology
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::Ontology - standard implementation of an Ontology
+
+=head1 SYNOPSIS
+
+    use Bio::Ontology::Ontology;
+    use Bio::Ontology::Term;
+
+    # create ontology object
+    my $ont = Bio::Ontology::Ontology->new(-name => "OBF");
+
+    # add terms, relationships ...
+    my $bp = Bio::Ontology::Term->new(-identifier => '02', -name => "Bioperl");
+    my $obf = Bio::Ontology::Term->new(-identifier => '01', -name => "OBF");
+    my $partof = Bio::Ontology::RelationshipType->get_instance("PART_OF");
+    $ont->add_term($bp);
+    $ont->add_term($obf);
+    $ont->add_relationship($bp, $obf, $partof);
+
+    # then query
+    my @terms = $ont->get_root_terms(); # "OBF"
+    my @desc = $ont->get_descendant_terms($terms[0], $partof); # "Bioperl"
+    # ... see methods for other ways to query
+
+    # for advanced users, you can re-use the query engine outside of an
+    # ontology to let one instance manage multiple ontologies
+    my $ont2 = Bio::Ontology::Ontology->new(-name => "Foundations",
+                                            -engine => $ont->engine());
+
+
+=head1 DESCRIPTION
+
+This is a no-frills implementation of L<Bio::Ontology::OntologyI>.
+
+The query functions are implemented by delegation to an
+OntologyEngineI implementation.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::Ontology;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+#use Bio::Ontology::SimpleOntologyEngine; # loaded dynamically now!
+
+use base qw(Bio::Root::Root Bio::Ontology::OntologyI Bio::AnnotatableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Ontology::Ontology();
+ Function: Builds a new Bio::Ontology::Ontology object
+ Returns : an instance of Bio::Ontology::Ontology
+ Args    : any number of named arguments. The following names will be
+           recognized by this module:
+
+            -name         the name of the ontology
+            -authority    the name of the authority for the ontology
+            -identifier   an identifier for the ontology, if any
+            -engine       the Bio::Ontology::OntologyEngineI
+                          implementation that this instance should use;
+                          default is Bio::Ontology::SimpleOntologyEngine
+
+            See the corresponding get/set methods for further documentation
+            on individual properties.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($name,$auth,$def,$id,$engine) =
+        $self->_rearrange([qw(NAME
+                          AUTHORITY
+                          DEFINITION
+                          IDENTIFIER
+                          ENGINE)
+                      ],
+                      @args);
+  defined($name) && $self->name($name);
+  defined($auth) && $self->authority($auth);
+  defined($def) && $self->definition($def);
+  defined($id) && $self->identifier($id);
+  defined($engine) && $self->engine($engine);
+
+  return $self;
+}
+
+=head1 Methods from L<Bio::Ontology::OntologyI>
+
+=cut
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function: Get/set the name of the ontology.
+ Example :
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub name{
+    my $self = shift;
+
+    return $self->{'name'} = shift if @_;
+    return $self->{'name'};
+}
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $obj->authority($newval)
+ Function: Get/set the authority for this ontology, for instance the
+           DNS base for the organization granting the name of the
+           ontology and identifiers for the terms.
+
+           This attribute is optional and should not generally
+           expected by applications to have been set. It is here to
+           follow the rules for namespaces, which ontologies serve as
+           for terms.
+
+ Example :
+ Returns : value of authority (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub authority{
+    my $self = shift;
+
+    return $self->{'authority'} = shift if @_;
+    return $self->{'authority'};
+}
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $obj->definition($newval)
+ Function: Get/set a descriptive definition of the ontology.
+ Example :
+ Returns : value of definition (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub definition{
+    my $self = shift;
+
+    return $self->{'definition'} = shift if @_;
+    return $self->{'definition'};
+}
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $id = $obj->identifier()
+ Function: Get an identifier for this ontology.
+
+           This is primarily intended for look-up purposes. The value
+           is not modifiable and is determined automatically by the
+           implementation.  Also, the identifier's uniqueness will only
+           hold within the scope of a particular application's run
+           time since it is derived from a memory location.
+
+ Example :
+ Returns : value of identifier (a scalar)
+ Args    :
+
+
+=cut
+
+sub identifier{
+    my $self = shift;
+
+    if(@_) {
+        $self->throw("cannot modify identifier for ".ref($self))
+            if exists($self->{'identifier'});
+        my $id = shift;
+        $self->{'identifier'} = $id if $id;
+    }
+    if(! exists($self->{'identifier'})) {
+        ($self->{'identifier'}) = "$self" =~ /(0x[0-9a-fA-F]+)/;
+    }
+    return $self->{'identifier'};
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Release any resources this ontology may occupy. In order
+           to efficiently release unused memory or file handles, you
+           should call this method once you are finished with an
+           ontology.
+
+ Example :
+ Returns : TRUE on success and FALSE otherwise
+ Args    : none
+
+
+=cut
+
+sub close{
+    my $self = shift;
+
+    # if it is in the ontology store, remove it from there
+    my $store = Bio::Ontology::OntologyStore->get_instance();
+    $store->remove_ontology($self);
+    # essentially we need to dis-associate from the engine here
+    $self->engine(undef);
+    return 1;
+}
+
+=head1 Implementation-specific public methods
+
+=cut
+
+=head2 engine
+
+ Title   : engine
+ Usage   : $engine = $obj->engine()
+ Function: Get/set the ontology engine to which all the query methods
+           delegate.
+ Example :
+ Returns : an object implementing Bio::Ontology::OntologyEngineI
+ Args    : on set, new value (an object implementing
+           Bio::Ontology::OntologyEngineI, or  undef)
+
+See L<Bio::Ontology::OntologyEngineI>.
+
+=cut
+
+sub engine{
+    my $self = shift;
+
+    if (@_) {
+        my $engine = shift;
+        if($engine &&
+           (! (ref($engine) &&
+               $engine->isa("Bio::Ontology::OntologyEngineI")))) {
+            $self->throw("object of class ".ref($engine)." does not implement".
+                         " Bio::Ontology::OntologyEngineI. Bummer!");
+        }
+        $self->{'engine'} = $engine;
+    } elsif (! exists($self->{'engine'})) {
+        # instantiate on demand
+        eval {
+            # this introduces a dependency on Graph.pm, so load dynamically
+            require Bio::Ontology::SimpleOntologyEngine;
+        };
+        if ($@) {
+            $self->throw("failed to load SimpleOntologyEngine, possibly "
+                         ."Graph.pm is not installed; either install or supply "
+                         ."another OntologyEngineI implementation:\n"
+                         .$@);
+        }
+        $self->{'engine'} = Bio::Ontology::SimpleOntologyEngine->new();
+    }
+    return $self->{'engine'};
+}
+
+=head1 Methods defined in L<Bio::Ontology::OntologyEngineI>
+
+=cut
+
+=head2 add_term
+
+ Title   : add_term
+ Usage   : add_term(TermI term): TermI
+ Function: Adds TermI object to the ontology engine term store
+
+           If the ontology property of the term object was not set,
+           this implementation will set it to itself upon adding the
+           term.
+
+ Example : $oe->add_term($term)
+ Returns : its argument.
+ Args    : object of class TermI.
+
+
+=cut
+
+sub add_term{
+    my $self = shift;
+    my $term = shift;
+
+    # set ontology if not set already
+    $term->ontology($self) if $term && (! $term->ontology());
+    return $self->engine->add_term($term, at _);
+}
+
+=head2 add_relationship
+
+ Title   : add_relationship
+ Usage   : add_relationship(RelationshipI relationship): RelationshipI
+           add_relatioship(TermI subject, TermI predicate, TermI object)
+ Function: Adds a relationship object to the ontology engine.
+ Example :
+ Returns : Its argument.
+ Args    : A RelationshipI object.
+
+
+=cut
+
+sub add_relationship {
+  my $self = shift;
+  my $rel = shift;
+
+  if($rel && $rel->isa("Bio::Ontology::TermI")) {
+    # we need to construct the relationship object on the fly
+    my ($predicate,$object) = @_;
+    $rel = Bio::Ontology::Relationship->new(
+                                            -subject_term   => $rel,
+                                            -object_term    => $object,
+                                            -predicate_term => $predicate,
+                                            -ontology       => $self,
+                                           );
+  }
+  # set ontology if not set already
+  $rel->ontology($self) unless $rel->ontology();
+  return $self->engine->add_relationship($rel);
+}
+
+=head2 get_relationship_type
+
+ Title   : get_relationship_type
+ Usage   : get_relationship_type(scalar): RelationshipTypeI
+ Function: Get a relationshiptype object from the ontology engine.
+ Example :
+ Returns : A RelationshipTypeI object.
+ Args    : The name (scalar) of the RelationshipTypeI object desired.
+
+
+=cut
+
+sub get_relationship_type{
+    my $self = shift;
+    return $self->engine->get_relationship_type(@_);
+}
+
+=head2 get_relationships
+
+ Title   : get_relationships
+ Usage   : get_relationships(TermI term): RelationshipI[]
+ Function: Retrieves all relationship objects in the ontology, or all
+           relationships of a given term.
+ Example :
+ Returns : Array of Bio::Ontology::RelationshipI objects
+ Args    : Optionally, a Bio::Ontology::TermI compliant object
+
+
+=cut
+
+sub get_relationships {
+  my $self = shift;
+  my $term = shift;
+  if($term) {
+        # we don't need to filter in this case
+        return $self->engine->get_relationships($term);
+  }
+  # else we need to filter by ontology
+  return grep { my $ont = $_->ontology;
+                # the first condition is a superset of the second, but
+                # we add it here for efficiency reasons, as many times
+                # it will short-cut to true and is supposedly faster than
+                # string comparison
+                ($ont == $self) || ($ont->name eq $self->name);
+              } $self->engine->get_relationships(@_);
+}
+
+=head2 get_predicate_terms
+
+ Title   : get_predicate_terms
+ Usage   : get_predicate_terms(): TermI
+ Function: Retrieves all relationship types.
+ Example :
+ Returns : Array of TermI objects
+ Args    :
+
+
+=cut
+
+sub get_predicate_terms{
+    my $self = shift;
+
+    return grep { $_->ontology->name eq $self->name;
+              } $self->engine->get_predicate_terms(@_);
+}
+
+=head2 get_child_terms
+
+ Title   : get_child_terms
+ Usage   : get_child_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all child terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_child_terms is a special
+           case of get_descendant_terms, limiting the search to the
+           direct descendants.
+
+           Note that a returned term may possibly be in another
+           ontology than this one, because the underlying engine may
+           manage multiple ontologies and the relationships of terms
+           between them. If you only want descendants within this
+           ontology, you need to filter the returned array.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+
+=cut
+
+sub get_child_terms{
+    return shift->engine->get_child_terms(@_);
+}
+
+=head2 get_descendant_terms
+
+ Title   : get_descendant_terms
+ Usage   : get_descendant_terms(TermI term, TermI rel_types): TermI
+ Function: Retrieves all descendant terms of a given term, that
+           satisfy a relationship among those that are specified in
+           the second argument or undef otherwise.
+
+           Note that a returned term may possibly be in another
+           ontology than this one, because the underlying engine may
+           manage multiple ontologies and the relationships of terms
+           between them. If you only want descendants within this
+           ontology, you need to filter the returned array.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+
+=cut
+
+sub get_descendant_terms{
+    return shift->engine->get_descendant_terms(@_);
+}
+
+=head2 get_parent_terms
+
+ Title   : get_parent_terms
+ Usage   : get_parent_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all parent terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_parent_terms is a special
+           case of get_ancestor_terms, limiting the search to the
+           direct ancestors.
+
+           Note that a returned term may possibly be in another
+           ontology than this one, because the underlying engine may
+           manage multiple ontologies and the relationships of terms
+           between them. If you only want descendants within this
+           ontology, you need to filter the returned array.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+
+=cut
+
+sub get_parent_terms{
+    return shift->engine->get_parent_terms(@_);
+}
+
+=head2 get_ancestor_terms
+
+ Title   : get_ancestor_terms
+ Usage   : get_ancestor_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all ancestor terms of a given term, that satisfy
+           a relationship among those that are specified in the second
+           argument or undef otherwise.
+
+           Note that a returned term may possibly be in another
+           ontology than this one, because the underlying engine may
+           manage multiple ontologies and the relationships of terms
+           between them. If you only want descendants within this
+           ontology, you need to filter the returned array.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+
+=cut
+
+sub get_ancestor_terms{
+    return shift->engine->get_ancestor_terms(@_);
+}
+
+=head2 get_leaf_terms
+
+ Title   : get_leaf_terms
+ Usage   : get_leaf_terms(): TermI
+ Function: Retrieves all leaf terms from the ontology. Leaf term is a
+           term w/o descendants.
+
+ Example : @leaf_terms = $obj->get_leaf_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_leaf_terms{
+    my $self = shift;
+    return grep { my $ont = $_->ontology;
+                  # the first condition is a superset of the second, but
+                  # we add it here for efficiency reasons, as many times
+                  # it will short-cut to true and is supposedly faster than
+                  # string comparison
+                  ($ont == $self) || ($ont->name eq $self->name);
+              } $self->engine->get_leaf_terms(@_);
+}
+
+=head2 get_root_terms()
+
+ Title   : get_root_terms
+ Usage   : get_root_terms(): TermI
+ Function: Retrieves all root terms from the ontology. Root term is a
+           term w/o parents.
+
+ Example : @root_terms = $obj->get_root_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_root_terms{
+    my $self = shift;
+    return grep { my $ont = $_->ontology;
+                  # the first condition is a superset of the second, but
+                  # we add it here for efficiency reasons, as many times
+                  # it will short-cut to true and is supposedly faster than
+                  # string comparison
+                  ($ont == $self) || ($ont->name eq $self->name);
+              } $self->engine->get_root_terms(@_);
+}
+
+=head2 get_all_terms
+
+ Title   : get_all_terms
+ Usage   : get_all_terms: TermI
+ Function: Retrieves all terms from the ontology.
+
+           We do not mandate an order here in which the terms are
+           returned. In fact, the default implementation will return
+           them in unpredictable order.
+
+ Example : @terms = $obj->get_all_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_all_terms{
+    my $self = shift;
+    return grep { my $ont = $_->ontology;
+                  # the first condition is a superset of the second, but
+                  # we add it here for efficiency reasons, as many times
+                  # it will short-cut to true and is supposedly faster than
+                  # string comparison
+                  ($ont == $self) || ($ont->name eq $self->name);
+              } $self->engine->get_all_terms(@_);
+}
+
+=head2 find_terms
+
+ Title   : find_terms
+ Usage   : ($term) = $oe->find_terms(-identifier => "SO:0000263");
+ Function: Find term instances matching queries for their attributes.
+
+           An implementation may not support querying for arbitrary
+           attributes, but can generally be expected to accept
+           -identifier and -name as queries. If both are provided,
+           they are implicitly intersected.
+
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : Named parameters. The following parameters should be recognized
+           by any implementations:
+
+              -identifier    query by the given identifier
+              -name          query by the given name
+
+=cut
+
+sub find_terms{
+    my $self = shift;
+    return grep { $_->ontology->name eq $self->name;
+              } $self->engine->find_terms(@_);
+}
+
+=head2 find_identical_terms
+
+ Title   : find_identical_terms
+ Usage   : ($term) = $oe->find_identical_terms($term0);
+ Function: Find term instances where name or synonym
+           matches the query exactly
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_identical_terms{
+    my $self = shift;
+    return grep { $_->ontology->name eq $self->name;
+              } $self->engine->find_identical_terms(@_);
+}
+
+
+=head2 find_similar_terms
+
+ Title   : find_similar_terms
+ Usage   : ($term) = $oe->find_similar_terms($term0);
+ Function: Find term instances where name or synonym, or part of one,
+           matches the query.
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_similar_terms{
+    my $self = shift;
+    return grep { $_->ontology->name eq $self->name;
+              } $self->engine->find_similar_terms(@_);
+}
+
+=head2 find_identically_named_terms
+
+ Title   : find_identically_named_terms
+ Usage   : ($term) = $oe->find_identically_named_terms($term0);
+ Function: Find term instances where names match the query term
+           name exactly
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : a Bio::Ontology::TermI object
+
+=cut
+
+sub find_identically_named_terms{
+    my $self = shift;
+    return grep { $_->ontology->name eq $self->name
+              } $self->engine->find_identically_named_terms(@_);
+}
+
+=head1 Factory for relationships and terms
+
+=cut
+
+=head2 relationship_factory
+
+ Title   : relationship_factory
+ Usage   : $fact = $obj->relationship_factory()
+ Function: Get (and set, if the engine supports it) the object
+           factory to be used when relationship objects are created by
+           the implementation on-the-fly.
+
+ Example :
+ Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    :
+
+=cut
+
+sub relationship_factory{
+    return shift->engine->relationship_factory(@_);
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $fact = $obj->term_factory()
+ Function: Get (and set, if the engine supports it) the object
+           factory to be used when term objects are created by
+           the implementation on-the-fly.
+
+ Example :
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    :
+
+=cut
+
+sub term_factory{
+    return shift->engine->term_factory(@_);
+}
+
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $annos = $obj->annotation()
+ Function: Get/Set the Bio::Annotation::Collection object
+           The collection contains Bio::Annotation::SimpleValue
+           objects to store header information like the version
+           and date present in the header section of an Ontology
+           file.
+
+ Example :
+ Returns : value of annotation (a Bio::Annotation::Collection
+           compliant object)
+ Args    : A Bio::Annotation::Collection object (Optional)
+
+=cut
+
+sub annotation{
+    my $self = shift;
+    $self->{'annotation'} = shift if @_;
+    return $self->{'annotation'};
+}
+
+
+#################################################################
+# aliases
+#################################################################
+
+*get_relationship_types = \&get_predicate_terms;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyEngineI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyEngineI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyEngineI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,476 @@
+# $Id: OntologyEngineI.pm,v 1.15.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for OntologyEngineI
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# (c) Peter Dimitrov
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::OntologyEngineI - Interface a minimal Ontology implementation should satisfy
+
+=head1 SYNOPSIS
+
+    # see documentation of methods
+
+=head1 DESCRIPTION
+
+This describes the minimal interface an ontology query engine should
+provide.  It intentionally does not make explicit references to the
+ontology being a DAG, nor does it mandate that the ontology be a
+vocabulary. Rather, it tries to generically express what should be
+accessible (queriable) about an ontology.
+
+The idea is to allow for different implementations for different
+purposes, which may then differ as to which operations are efficient
+and which are not, and how much richer the functionality is on top of
+this minimalistic set of methods. Check modules in the Bio::Ontology
+namespace to find out which implementations exist. At the time of
+writing, there is a SimpleOntologyEngine (which does not use
+Graph.pm), and a Graph.pm-based implementation in SimpleGOEngine.
+
+Ontology parsers in Bio::OntologyIO are required to return an
+implementation of this interface.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::OntologyEngineI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 add_term
+
+ Title   : add_term
+ Usage   : add_term(TermI term): TermI
+ Function: Adds TermI object to the ontology engine term store
+ Example : $oe->add_term($term)
+ Returns : its argument.
+ Args    : object of class TermI.
+
+=cut
+
+sub add_term{
+    shift->throw_not_implemented();
+}
+
+=head2 add_relationship
+
+ Title   : add_relationship
+ Usage   : add_relationship(RelationshipI relationship): RelationshipI
+ Function: Adds a relationship object to the ontology engine.
+ Example :
+ Returns : Its argument.
+ Args    : A RelationshipI object.
+
+=cut
+
+sub add_relationship{
+    shift->throw_not_implemented();
+}
+
+=head2 add_relationship_type
+
+ Title   : add_relationship_type
+ Usage   : add_relationship_type(scalar,OntologyI ontology)
+ Function: Adds a relationshiptype object to the ontology engine.
+ Example :
+ Returns : 1 on success, undef on failure
+ Args    : The name(scalar) of the relationshiptype, and the OntologyI 
+           it is to be added to.
+
+=cut
+
+sub add_relationship_type{
+    shift->throw_not_implemented();
+}
+
+=head2 get_relationship_type
+
+ Title   : get_relationship_type
+ Usage   : get_relationship_type(scalar): RelationshipTypeI
+ Function: Get a relationshiptype object from the ontology engine.
+ Example :
+ Returns : A RelationshipTypeI object.
+ Args    : The name (scalar) of the RelationshipTypeI object desired.
+
+=cut
+
+sub get_relationship_type{
+    shift->throw_not_implemented();
+}
+
+=head2 get_relationships
+
+ Title   : get_relationships
+ Usage   : get_relationships(TermI term): RelationshipI
+ Function: Retrieves all relationship objects from this ontology engine,
+           or all relationships of a term if a term is supplied.
+ Example :
+ Returns : Array of Bio::Ontology::RelationshipI objects
+ Args    : None, or a Bio::Ontology::TermI compliant object for which
+           to retrieve the relationships.
+
+=cut
+
+sub get_relationships{
+    shift->throw_not_implemented();
+}
+
+=head2 get_predicate_terms
+
+ Title   : get_predicate_terms
+ Usage   : get_predicate_terms(): TermI
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub get_predicate_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_child_terms
+
+ Title   : get_child_terms
+ Usage   : get_child_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all child terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_child_terms is a special
+           case of get_descendant_terms, limiting the search to the
+           direct descendants.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+sub get_child_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_descendant_terms
+
+ Title   : get_descendant_terms
+ Usage   : get_descendant_terms(TermI term, TermI rel_types): TermI
+ Function: Retrieves all descendant terms of a given term, that
+           satisfy a relationship among those that are specified in
+           the second argument or undef otherwise. 
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+sub get_descendant_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_parent_terms
+
+ Title   : get_parent_terms
+ Usage   : get_parent_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all parent terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_parent_terms is a special
+           case of get_ancestor_terms, limiting the search to the
+           direct ancestors.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+sub get_parent_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_ancestor_terms
+
+ Title   : get_ancestor_terms
+ Usage   : get_ancestor_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all ancestor terms of a given term, that satisfy
+           a relationship among those that are specified in the second
+           argument or undef otherwise. 
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+sub get_ancestor_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_leaf_terms
+
+ Title   : get_leaf_terms
+ Usage   : get_leaf_terms(): TermI
+ Function: Retrieves all leaf terms from the ontology. Leaf term is a
+           term w/o descendants.
+
+ Example : @leaf_terms = $obj->get_leaf_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_leaf_terms{
+    shift->throw_not_implemented();
+}
+
+=head2 get_root_terms
+
+ Title   : get_root_terms
+ Usage   : get_root_terms(): TermI
+ Function: Retrieves all root terms from the ontology. Root term is a
+           term w/o ancestors.
+
+ Example : @root_terms = $obj->get_root_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_root_terms{
+    shift->throw_not_implemented();
+}
+
+=head1 Factory for relationships and terms
+
+=cut
+
+=head2 relationship_factory
+
+ Title   : relationship_factory
+ Usage   : $fact = $obj->relationship_factory()
+ Function: Get (and set, if the implementation supports it) the object
+           factory to be used when relationship objects are created by
+           the implementation on-the-fly.
+
+ Example : 
+ Returns : value of relationship_factory (a Bio::Factory::ObjectFactory
+           compliant object)
+ Args    : 
+
+=cut
+
+sub relationship_factory{
+    return shift->throw_not_implemented();
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $fact = $obj->term_factory()
+ Function: Get (and set, if the implementation supports it) the object
+           factory to be used when term objects are created by
+           the implementation on-the-fly.
+
+ Example : 
+ Returns : value of term_factory (a Bio::Factory::ObjectFactory
+           compliant object)
+ Args    : 
+
+=cut
+
+sub term_factory{
+    return shift->throw_not_implemented();
+}
+
+=head1 Decorator Methods
+
+ These methods come with a default implementation that uses the
+ abstract methods defined for this interface. This may not be very
+ efficient, and hence implementors are encouraged to override these
+ methods if they can provide more efficient implementations.
+
+=cut
+
+=head2 get_all_terms
+
+ Title   : get_all_terms
+ Usage   : get_all_terms: TermI
+ Function: Retrieves all terms from the ontology.
+
+           This is more a decorator method. We provide a default
+           implementation here that loops over all root terms and gets
+           all descendants for each root term. The overall union of
+           terms is then made unique by name and ontology.
+
+           We do not mandate an order here in which the terms are
+           returned. In fact, the default implementation will return
+           them in unpredictable order.
+
+           Engine implementations that can provide a more efficient
+           method for obtaining all terms should definitely override
+           this.
+
+ Example : @terms = $obj->get_all_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_all_terms{
+    my $self = shift;
+    # get all root nodes
+    my @roots = $self->get_root_terms();
+    # accumulate all descendants for each root term
+    my @terms = map { $self->get_descendant_terms($_); } @roots;
+    # add on the root terms themselves
+    push(@terms, @roots);
+    # make unique by name and ontology
+    my %name_map = map { ($_->name."@".$_->ontology->name, $_); } @terms;
+    # done 
+    return values %name_map;
+}
+
+=head2 find_terms
+
+ Title   : find_terms
+ Usage   : ($term) = $oe->find_terms(-identifier => "SO:0000263");
+ Function: Find term instances matching queries for their attributes.
+
+           An implementation may not support querying for arbitrary
+           attributes, but can generally be expected to accept
+           -identifier and -name as queries. If both are provided,
+           they are implicitly intersected.
+
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : Named parameters. The following parameters should be recognized
+           by any implementation:
+
+              -identifier    query by the given identifier
+              -name          query by the given name
+
+=cut
+
+sub find_terms{
+    my $self = shift;
+    my %params = @_;
+    @params{ map { lc $_; } keys %params } = values %params; # lowercase keys
+
+    my @terms = grep {
+	my $ok = exists($params{-identifier}) ?
+	    $_->identifier() eq $params{-identifier} : 1;
+	$ok && ((! exists($params{-name})) ||
+		($_->name() eq $params{-name}));
+    } $self->get_all_terms();
+    return @terms;
+}
+
+=head1 Experimental API method proposals
+
+ Ontologies are a very new domain in bioperl, and we are not sure yet
+ what we will want to do on and with ontologies in which
+ situation. The methods from here on downwards are solely API
+ descriptions to solicit comment and feedback; the chance of any of
+ those being actually implemented already is very slim.
+
+ Disclaimer: As long as an API method stays in this section, it is
+ subject to change, possibly even radical change or complete
+ deletion. If it's not implemented yet (most likely it isn't),
+ implement yourself at your own risk.
+
+ So far for the disclaimer. The reason the API description is here,
+ however, is to solicit feedback. Please feel encouraged to share your
+ opinion, regardless of what it is (a notable difference of this API
+ method to others is that there is actually no working code behind it
+ - so the defense line is non-existent for practical purposes).
+
+=cut
+
+=head2 common_ancestor_path
+
+ Title   : common_ancestor_path
+ Usage   :
+ Function: Get the paths from two terms A and B to term C, such that
+           there is no other term D to which A and B would have a shorter
+           path, provided there is a term C to which both A and B are
+           connected by a path.
+
+           Note that the path to the common ancestor between A and A
+           exists, has distance zero, and predicate "identity".
+
+           The search for the common ancestor C can be further
+           constrained by supplying a predicate term. If supplied, the
+           predicates of the two paths (A,C) and (B,C) must have a
+           common ancestor identical to the predicate, or that has a
+           path to the predicate.
+
+ Example :
+ Returns : The path of the first term to the common ancestor in scalar
+           context, and both paths in list context. Paths are
+           Bio::Ontology::PathI compliant objects.
+ Args    : The two terms (Bio::Ontology::TermI objects), and optionally
+           a constraining common predicate (Bio::Ontology::TermI object).
+           The latter may also be given as a scalar, in which case it
+           is treated as a boolean that, if TRUE, means that the two paths
+           must have identical predicates in order to be returned.
+
+=cut
+
+sub common_ancestor_path{
+    return shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,410 @@
+# $Id: OntologyI.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::OntologyI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::OntologyI - Interface for an ontology implementation
+
+=head1 SYNOPSIS
+
+    # see method documentation
+
+=head1 DESCRIPTION
+
+This describes the minimal interface an ontology implementation must
+provide. In essence, it represents a namespace with description on top
+of the query interface OntologyEngineI.
+
+This interface inherits from L<Bio::Ontology::OntologyEngineI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::OntologyI;
+use strict;
+
+
+use base qw(Bio::Ontology::OntologyEngineI);
+
+=head1  Methods defined in this interface.
+
+=cut
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function: Get/set the name of this ontology.
+ Example : 
+ Returns : value of name (a scalar)
+ Args    : 
+
+=cut
+
+sub name{
+    shift->throw_not_implemented();
+}
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $auth = $obj->authority()
+ Function: Get/set the authority for this ontology, for instance the
+           DNS base for the organization granting the name of the
+           ontology and identifiers for the terms.
+
+           This attribute is optional and should not generally
+           expected by applications to have been set. It is here to
+           follow the rules for namespaces, which ontologies serve as
+           for terms.
+
+ Example : 
+ Returns : value of authority (a scalar)
+ Args    : 
+
+=cut
+
+sub authority{
+    shift->throw_not_implemented();
+}
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $id = $obj->identifier()
+ Function: Get an identifier for this ontology.
+
+           This is primarily intended for look-up purposes. Clients
+           should not expect the value to be modifiable, and it may
+           not be allowed to set its value from outside. Also, the
+           identifier's uniqueness may only hold within the scope of a
+           particular application's run time, i.e., it may be a memory
+           location.
+
+ Example : 
+ Returns : value of identifier (a scalar)
+ Args    : 
+
+=cut
+
+sub identifier{
+    shift->throw_not_implemented();
+}
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $def = $obj->definition()
+ Function: Get a descriptive definition for this ontology.
+ Example : 
+ Returns : value of definition (a scalar)
+ Args    : 
+
+=cut
+
+sub definition{
+    shift->throw_not_implemented();
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Release any resources this ontology may occupy. In order
+           to efficiently release used memory or file handles, you
+           should call this method once you are finished with an
+           ontology.
+
+ Example :
+ Returns : TRUE on success and FALSE otherwise
+ Args    : none
+
+=cut
+
+sub close{
+    shift->throw_not_implemented();
+}
+
+=head1 Methods inherited from L<Bio::Ontology::OntologyEngineI>
+
+Their documentations are copied here for completeness. In most use
+cases, you will want to access the query methods of an ontology, not
+just the name and description ...
+
+=cut
+
+=head2 add_term
+
+ Title   : add_term
+ Usage   : add_term(TermI term): TermI
+ Function: Adds TermI object to the ontology engine term store.
+
+           For ease of use, if the ontology property of the term
+           object was not set, an implementation is encouraged to set
+           it to itself upon adding the term.
+
+ Example : $oe->add_term($term)
+ Returns : its argument.
+ Args    : object of class TermI.
+
+=cut
+
+=head2 add_relationship
+
+ Title   : add_relationship
+ Usage   : add_relationship(RelationshipI relationship): RelationshipI
+ Function: Adds a relationship object to the ontology engine.
+ Example :
+ Returns : Its argument.
+ Args    : A RelationshipI object.
+
+=cut
+
+=head2 get_relationships
+
+ Title   : get_relationships
+ Usage   : get_relationships(TermI term): RelationshipI
+ Function: Retrieves all relationship objects from this ontology engine,
+           or all relationships of a term if a term is supplied.
+ Example :
+ Returns : Array of Bio::Ontology::RelationshipI objects
+ Args    : None, or a Bio::Ontology::TermI compliant object for which
+           to retrieve the relationships.
+
+=cut
+
+=head2 get_predicate_terms
+
+ Title   : get_predicate_terms
+ Usage   : get_predicate_terms(): TermI[]
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+=head2 get_child_terms
+
+ Title   : get_child_terms
+ Usage   : get_child_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all child terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_child_terms is a special
+           case of get_descendant_terms, limiting the search to the
+           direct descendants.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+=head2 get_descendant_terms
+
+ Title   : get_descendant_terms
+ Usage   : get_descendant_terms(TermI term, TermI rel_types): TermI
+ Function: Retrieves all descendant terms of a given term, that
+           satisfy a relationship among those that are specified in
+           the second argument or undef otherwise.
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+=head2 get_parent_terms
+
+ Title   : get_parent_terms
+ Usage   : get_parent_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all parent terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_parent_terms is a special
+           case of get_ancestor_terms, limiting the search to the
+           direct ancestors.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+=head2 get_ancestor_terms
+
+ Title   : get_ancestor_terms
+ Usage   : get_ancestor_terms(TermI term, TermI predicate_terms): TermI
+ Function: Retrieves all ancestor terms of a given term, that satisfy
+           a relationship among those that are specified in the second
+           argument or undef otherwise.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+=head2 get_leaf_terms
+
+ Title   : get_leaf_terms
+ Usage   : get_leaf_terms(): TermI
+ Function: Retrieves all leaf terms from the ontology. Leaf term is a
+           term w/o descendants.
+
+ Example : @leaf_terms = $obj->get_leaf_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+=head2 get_root_terms()
+
+ Title   : get_root_terms
+ Usage   : get_root_terms(): TermI
+ Function: Retrieves all root terms from the ontology. Root term is a
+           term w/o descendants.
+
+ Example : @root_terms = $obj->get_root_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+=head2 get_all_terms
+
+ Title   : get_all_terms
+ Usage   : get_all_terms: TermI
+ Function: Retrieves all terms from the ontology.
+
+           We do not mandate an order here in which the terms are
+           returned. In fact, the default implementation will return
+           them in unpredictable order.
+
+ Example : @terms = $obj->get_all_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+
+=head2 find_terms
+
+ Title   : find_terms
+ Usage   : ($term) = $oe->find_terms(-identifier => "SO:0000263");
+ Function: Find term instances matching queries for their attributes.
+
+           An implementation may not support querying for arbitrary
+           attributes, but can generally be expected to accept
+           -identifier and -name as queries. If both are provided,
+           they are implicitly intersected.
+
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : Named parameters. The following parameters should be recognized
+           by any implementation:
+
+              -identifier    query by the given identifier
+              -name          query by the given name
+
+=cut
+
+=head1 Factory for relationships and terms
+
+=cut
+
+=head2 relationship_factory
+
+ Title   : relationship_factory
+ Usage   : $fact = $obj->relationship_factory()
+ Function: Get (and set, if the implementation supports it) the object
+           factory to be used when relationship objects are created by
+           the implementation on-the-fly.
+
+ Example : 
+ Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : 
+
+=cut
+
+sub relationship_factory{
+    return shift->throw_not_implemented();
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $fact = $obj->term_factory()
+ Function: Get (and set, if the implementation supports it) the object
+           factory to be used when term objects are created by
+           the implementation on-the-fly.
+
+ Example : 
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : 
+
+=cut
+
+sub term_factory{
+    return shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyStore.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyStore.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/OntologyStore.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,346 @@
+# $Id: OntologyStore.pm,v 1.13.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::OntologyStore
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::OntologyStore - A repository of ontologies
+
+=head1 SYNOPSIS
+
+  #----------
+  #SCENARIO 1
+  #----------
+
+  #make an ontology object manually. via OntologyIO
+  my $io = Bio::OntologyIO->new(
+                                #params to fetch Cell Ontology here
+                               );
+  my $cell_ontology = $io->next_ontology;
+
+  #this is a singleton that caches the fact that you've created
+  #a 'Cell Ontology' intance...
+  my $store = Bio::Ontology::OntologyStore->get_instance();
+
+  #...and it can hand you back a copy of it at any time.
+  my $cell_ontology_copy = $store->get_ontology('Cell Ontology');
+
+
+  #----------
+  #SCENARIO 2
+  #----------
+
+  my $store = Bio::Ontology::OntologyStore->get_instance();
+  #this use case allows the construction of an ontology on
+  #demand just by supplying the name.
+  my $ontology = $store->get_ontology('Sequence Ontology');
+
+
+=head1 DESCRIPTION
+
+The primary purpose of this module is that of a singleton repository
+of L<Bio::Ontology::OntologyI> instances from which an Ontology
+instance can be retrieved by name or identifier. This enables TermI
+implementations to return their corresponding OntologyI through using
+this singleton store instead of storing a direct reference to the
+Ontology object. The latter would almost inevitably lead to memory
+cycles, and would therefore potentially blow up an application.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+  Hilmar Lapp E<lt>hlapp at gmx.netE<gt>
+  Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::OntologyStore;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Ontology::DocumentRegistry;
+use Bio::OntologyIO;
+use FileHandle;
+use File::Spec::Functions;
+
+
+use base qw(Bio::Root::Root);
+
+# these are the static ontology stores by name and by identifier - there is
+# only one of each in any application
+my %ont_store_by_name = ();
+my %ont_store_by_id = ();
+my %ont_aliases = (
+                   'Gene Ontology' => 'Gene_Ontology'
+                    );
+# also, this is really meant as a singleton object, so we try to enforce it
+my $instance = undef;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Ontology::OntologyStore();
+ Function: Returns the Bio::Ontology::OntologyStore object.
+
+           Unlike usual implementations of new, this implementation
+           will try to return a previously instantiated store, if
+           there is any. It is just a synonym for get_instance. In
+           order to avoid ambiguities in your code, you may rather
+           want to call rather get_instance explicitly, which also
+           usually is better associated with this kind of behaviour.
+
+ Returns : an instance of Bio::Ontology::OntologyStore
+ Args    :
+
+=cut
+
+sub new {
+    return shift->get_instance(@_);
+}
+
+=head2 get_instance
+
+ Title   : get_instance
+ Usage   :
+ Function: Get an instance of this class for perusal.
+
+           Since by design this class is meant to be used as a
+           singleton, the implementation will return a previously
+           instantianted store if there is one, and instantiate a new
+           one otherwise. In order to use this class by means of an
+           instance, call this method for added code clarity, not
+           new().
+
+ Example :
+ Returns : an instance of this class
+ Args    : named parameters, if any (currently, there are no 
+           class-specific parameters other than those accepted by
+           Bio::Root::Root.
+
+See L<Bio::Root::Root>.
+
+=cut
+
+sub get_instance{
+   my ($self, at args) = @_;
+
+   if(! $instance) {
+       $instance = $self->SUPER::new(@args);
+   }
+   return $instance;
+}
+
+=head2 get_ontology
+
+ Title   : get_ontology
+ Usage   :
+ Function: Get a previously instantiated and registered instance of
+           this class by name or by identifier. 
+
+           One of the main purposes of this class is to enable TermI
+           implementations to return their respective ontology without
+           keeping a strong reference to the respective ontology
+           object. Only objects previously registered objects can be
+           retrieved.
+
+           This is a class method, hence you can call it on the class
+           name, without dereferencing an object.
+
+ Example :
+ Returns : a Bio::Ontology::OntologyI implementing object, or undef
+           if the query could not be satisfied
+ Args    : Named parameters specifying the query. The following parameters
+           are recognized:
+              -name   query the store for an ontology with the given name
+              -id     query for an ontology with the given identifier
+           If both are specified, an implicit AND logical operator is
+           assumed.
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub get_ontology{
+  my ($self, at args) = @_;
+  my $ont;
+
+  my ($name,$id) = $self->_rearrange([qw(NAME ID)], @args);
+  if($id) {
+    $ont = $ont_store_by_id{$id};
+    return unless $ont; # no AND can be satisfied in this case
+  }
+
+  if($name) {
+    my $o = $ont_store_by_name{$name};
+
+    if(!$o){
+      my $doc_registry = Bio::Ontology::DocumentRegistry->get_instance();
+      my($url,$def,$fmt) = $doc_registry->documents($name);
+
+      if(ref($url) eq 'ARRAY'){
+        my $io = Bio::OntologyIO->new(-url      => $url,
+                                      -defs_url => $def,
+                                      -format   => $fmt,
+                                     );
+
+        $o = $io->next_ontology();
+        $ont_store_by_name{$name} = $o;
+      } elsif($url){
+        my $io = Bio::OntologyIO->new(-url      => $url,
+                                      -defs_url => $def,
+                                      -format   => $fmt,
+                                     );
+        $o = $io->next_ontology;
+        $ont_store_by_name{$name} = $o;
+      }
+    }
+
+    if((! $ont) || ($ont->identifier() eq $o->identifier())) {
+      $ont = $o;
+    } else {
+      $ont = undef;
+    }
+  }
+  
+  return $ont;
+}
+
+=head2 register_ontology
+
+ Title   : register_ontology
+ Usage   :
+ Function: Registers the given Ontology object for later retrieval
+           by name and identifier.
+
+ Example :
+ Returns : TRUE on success and FALSE otherwise
+ Args    : the Bio::Ontology::OntologyI object(s) to register
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub register_ontology {
+  my ($self, at args) = @_;
+  my $ret = 1;
+  foreach my $ont (@args) {
+    if(ref($ont) && $ont->isa('Bio::Ontology::OntologyI')){
+      $ont_store_by_name{$ont->name()} = $ont if $ont->name;
+      next;
+    }
+
+	if(! (ref($ont) && $ont->isa("Bio::Ontology::OntologyI"))) {
+      $self->throw((ref($ont) ? ref($ont) : $ont)." does not implement ".
+                   "Bio::Ontology::OntologyI or is not an object");
+	}
+	if($self->get_ontology(-name => $ont->name())) {
+      $self->warn("ontology with name \"".$ont->name().
+                  "\" already exists in the store, ignoring new one");
+      $ret = 0;
+      next;
+	}
+	if($self->get_ontology(-id => $ont->identifier())) {
+      $self->warn("ontology with id \"".$ont->identifier().
+                  "\" already exists in the store, ignoring new one");
+      $ret = 0;
+      next;
+	}
+	$ont_store_by_name{$ont->name()} = $ont;
+	$ont_store_by_id{$ont->identifier()} = $ont;
+  }
+  return $ret;
+}
+
+=head2 remove_ontology
+
+ Title   : remove_ontology
+ Usage   :
+ Function: Remove the specified ontology from the store.
+ Example :
+ Returns : TRUE on success and FALSE otherwise
+ Args    : the Bio::Ontology::OntologyI implementing object(s)
+           to be removed from the store
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub remove_ontology{
+    my $self = shift;
+    my $ret = 1;
+
+    foreach my $ont (@_) {
+	$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
+	    unless $ont && ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
+	# remove it from both the id hash and the name hash
+	delete $ont_store_by_id{$ont->identifier()};
+	delete $ont_store_by_name{$ont->name()} if $ont->name();
+    }
+    return 1;
+}
+
+=head2 guess_ontology()
+
+ Usage   : my $ontology = 
+           Bio::Ontology::OntologyStore->guess_ontology('GO:0000001');
+ Function: tries to guess which ontology a term identifier comes from, 
+           loads it as necessary,
+           and returns it as a Bio::Ontology::Ontology object.
+ Example :
+ Returns : a Bio::Ontology::Ontology object, or warns and returns undef
+ Args    : an ontology term identifier in XXXX:DDDDDDD format.  
+           Guessing is based on the XXXX string before the colon.
+
+=cut
+
+sub guess_ontology {
+  my ($self,$id) = @_;
+
+  my($prefix) = $id =~ /^(.+?):.+$/;
+
+  my %prefix = (
+                SO => 'Sequence Ontology',
+                SOFA => 'Sequence Ontology Feature Annotation',
+                GO => 'Gene Ontology',
+               );
+
+  return $prefix{$prefix} || undef;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Path.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Path.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Path.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,188 @@
+# $Id: Path.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Path
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net> 
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::Path - a path for an ontology term graph
+
+=head1 SYNOPSIS
+
+  $path = Bio::Ontology::Path->new( -identifier     => "16847",
+                                    -subject_term   => $subj,
+                                    -object_term    => $obj,
+                                    -predicate_term => $pred,
+                                    -distance       => 3 );
+
+=head1 DESCRIPTION
+
+This is a basic implementation of Bio::Ontology::PathI.
+
+Essiantially this is a very thin extension of
+L<Bio::Ontology::Relationship>. It basically adds a method distance().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Hilmar Lapp <hlapp at gmx.net>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::Path;
+use strict;
+
+use base qw(Bio::Ontology::Relationship Bio::Ontology::PathI);
+
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $rel = Bio::Ontology::Path->new(-identifier   => "16847",
+                                           -subject_term => $subject,
+                                           -object_term  => $object,
+                                           -predicate_term => $type );
+                                           -distance     => 3 );
+ Function: Creates a new Bio::Ontology::Path.
+ Returns : A new Bio::Ontology::Path object.
+ Args    : -identifier     => the identifier of this relationship [scalar]
+           -subject_term   => the subject term [Bio::Ontology::TermI]
+           -object_term    => the object term [Bio::Ontology::TermI]  
+           -predicate_term => the predicate term [Bio::Ontology::TermI]
+           -distance       => the distance between subject and object
+
+=cut
+
+sub new {
+
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+   
+    my ( $distance ) = 
+	$self->_rearrange( [qw( DISTANCE)
+			    ], @args );
+   
+    $distance      && $self->distance($distance);
+                                                    
+    return $self;
+    
+} # new
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $rel->init();   
+ Function: Initializes this Path to all undef.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my $self = shift;
+    
+    $self->SUPER::init(@_);
+    $self->{ "_distance" } = undef;
+   
+} # init
+
+
+=head2 distance
+
+ Title   : distance
+ Usage   : $obj->distance($newval)
+ Function: Get/set the distance between the two terms connected
+           by this path.
+
+           Note that modifying the distance may not be meaningful. The
+           implementation here is not connected to any graph engine,
+           so changing an existing value may simply render the
+           attribute's value wrong.
+
+ Example : 
+ Returns : value of distance (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub distance{
+    my $self = shift;
+
+    return $self->{'_distance'} = shift if @_;
+    return $self->{'_distance'};
+}
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $rel->to_string();
+ Function: to_string method for Path.
+ Returns : A string representation of this Path.
+ Args    :
+
+=cut
+
+sub to_string {
+    my( $self ) = @_;
+    
+    my $s = $self->SUPER::to_string();
+    $s .= "-- Distance:\n";
+    $s .= $self->distance() if defined($self->distance());
+    $s .= "\n";
+    
+    return $s;
+    
+} # to_string
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/PathI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/PathI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/PathI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,163 @@
+# $Id: PathI.pm,v 1.6.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for PathI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::PathI - Interface for a path between ontology terms
+
+=head1 SYNOPSIS
+
+    # see documentation of methods and an implementation, e.g.,
+    # Bio::Ontology::Path
+
+=head1 DESCRIPTION
+
+This is the minimal interface for a path between two terms in
+an ontology. Ontology engines may use this.
+
+Essentially this is a very thin extension of the
+L<Bio::Ontology::RelationshipI> interface. It basically adds an
+attribute distance(). For a RelationshipI, you can think of distance as
+equal to zero (subject == object) or 1 (subject != object).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::PathI;
+use strict;
+
+use base qw(Bio::Ontology::RelationshipI);
+
+
+=head2 distance
+
+ Title   : distance
+ Usage   : $obj->distance($newval)
+ Function: Get (and set if the implementation allows it) the distance
+           between the two terms connected by this path.
+
+ Example : 
+ Returns : value of distance (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub distance{
+    return shift->throw_not_implemented();
+}
+
+=head1 Bio::Ontology::RelationshipI Methods
+
+=cut
+
+=head2 subject_term
+
+ Title   : subject_term
+ Usage   : $subj = $rel->subject_term();
+ Function: Set/get for the subject term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The subject term [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+=head2 object_term
+
+ Title   : object_term
+ Usage   : $object = $rel->object_term();
+ Function: Set/get for the object term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The object term [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+=head2 predicate_term
+
+ Title   : predicate_term
+ Usage   : $type = $rel->predicate_term();
+ Function: Set/get for the predicate of this relationship.
+
+           For a path the predicate (relationship type) is defined as
+           the greatest common denominator of all predicates
+           (relationship types) encountered along the path. I.e., if
+           predicate A is-a predicate B, the greatest common
+           denominator for a path containing both predicates A and B is B
+
+ Returns : The predicate term [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $ont = $obj->ontology()
+ Function: Get the ontology that defined this relationship.
+ Example : 
+ Returns : an object implementing Bio::Ontology::OntologyI
+ Args    : 
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Relationship.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Relationship.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Relationship.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,401 @@
+# $Id: Relationship.pm,v 1.14.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Relationship
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::Relationship - a relationship for an ontology
+
+=head1 SYNOPSIS
+
+  $rel = Bio::Ontology::Relationship->new( -identifier     => "16847",
+                                           -subject_term   => $subj,
+                                           -object_term    => $obj,
+                                           -predicate_term => $pred );
+
+=head1 DESCRIPTION
+
+This is a basic implementation of Bio::Ontology::RelationshipI. 
+
+The terminology we use here is the one commonly used for ontologies,
+namely the triple of (subject, predicate, object), which in addition
+is scoped in a namespace (ontology). It is called triple because it is
+a tuple of three ontology terms.
+
+There are other terminologies in use for expressing relationships. For
+those who it helps to better understand the concept, the triple of
+(child, relationship type, parent) would be equivalent to the
+terminology chosen here, disregarding the question whether the notion
+of parent and child is sensible in the context of the relationship
+type or not. Especially in the case of ontologies with a wide variety
+of predicates the parent/child terminology and similar ones can
+quickly become ambiguous (e.g., A synthesises B), meaningless (e.g., A
+binds B), or even conflicting (e.g., A is-parent-of B), and are
+therefore strongly discouraged.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 CONTRIBUTORS
+
+ Hilmar Lapp, email: hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::Relationship;
+use strict;
+use Bio::Ontology::TermI;
+
+use base qw(Bio::Root::Root Bio::Ontology::RelationshipI);
+
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $rel = Bio::Ontology::Relationship->new(-identifier   => "16847",
+                                                   -subject_term => $subject,
+                                                   -object_term  => $object,
+                                                   -predicate_term => $type );
+ Function: Creates a new Bio::Ontology::Relationship.
+ Returns : A new Bio::Ontology::Relationship object.
+ Args    : -identifier     => the identifier of this relationship [scalar]
+           -subject_term   => the subject term [Bio::Ontology::TermI]
+           -object_term    => the object term [Bio::Ontology::TermI]  
+           -predicate_term => the predicate term [Bio::Ontology::TermI]
+
+=cut
+
+sub new {
+
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+   
+    my ( $identifier,
+         $subject_term,
+			$child,        # for backwards compatibility
+         $object_term,
+			$parent,       # for backwards compatibility
+         $predicate_term,
+			$reltype,      # for backwards compatibility
+			$ont)
+	= $self->_rearrange( [qw( IDENTIFIER
+				  SUBJECT_TERM
+				  CHILD_TERM
+				  OBJECT_TERM
+				  PARENT_TERM
+				  PREDICATE_TERM
+				  RELATIONSHIP_TYPE
+				  ONTOLOGY)
+			      ], @args );
+   
+    $self->init(); 
+    
+    $self->identifier( $identifier );
+    $subject_term = $child unless $subject_term;
+    $object_term = $parent unless $object_term;
+    $predicate_term = $reltype unless $predicate_term;
+    $self->subject_term( $subject_term) if $subject_term;
+    $self->object_term( $object_term) if $object_term;
+    $self->predicate_term( $predicate_term ) if $predicate_term;
+    $self->ontology($ont) if $ont;
+                                                    
+    return $self;
+    
+} # new
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $rel->init();   
+ Function: Initializes this Relationship to all undef.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my( $self ) = @_;
+    
+    $self->{ "_identifier" }     = undef;
+    $self->{ "_subject_term" }   = undef;
+    $self->{ "_object_term" }    = undef;
+    $self->{ "_predicate_term" } = undef;
+    $self->ontology(undef);
+   
+} # init
+
+
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $rel->identifier( "100050" );
+           or
+           print $rel->identifier();
+ Function: Set/get for the identifier of this Relationship.
+ Returns : The identifier [scalar].
+ Args    : The identifier [scalar] (optional).
+
+=cut
+
+sub identifier {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_identifier" } = $value;
+    }
+
+    return $self->{ "_identifier" };
+} # identifier
+
+
+
+
+=head2 subject_term
+
+ Title   : subject_term
+ Usage   : $rel->subject_term( $subject );
+           or
+           $subject = $rel->subject_term();
+ Function: Set/get for the subject term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The subject term [Bio::Ontology::TermI].
+ Args    : The subject term [Bio::Ontology::TermI] (optional).
+
+=cut
+
+sub subject_term {
+    my ( $self, $term ) = @_;
+  
+    if ( defined $term ) {
+        $self->_check_class( $term, "Bio::Ontology::TermI" );
+        $self->{ "_subject_term" } = $term;
+    }
+
+    return $self->{ "_subject_term" };
+    
+} # subject_term
+
+
+
+=head2 object_term
+
+ Title   : object_term
+ Usage   : $rel->object_term( $object );
+           or
+           $object = $rel->object_term();
+ Function: Set/get for the object term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The object term [Bio::Ontology::TermI].
+ Args    : The object term [Bio::Ontology::TermI] (optional).
+
+=cut
+
+sub object_term {
+    my ( $self, $term ) = @_;
+  
+    if ( defined $term ) {
+        $self->_check_class( $term, "Bio::Ontology::TermI" );
+        $self->{ "_object_term" } = $term;
+    }
+
+    return $self->{ "_object_term" };
+}
+
+
+
+=head2 predicate_term
+
+ Title   : predicate_term
+ Usage   : $rel->predicate_term( $type );
+           or
+           $type = $rel->predicate_term();
+ Function: Set/get for the predicate (relationship type) of this
+           relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The predicate term [Bio::Ontology::TermI].
+ Args    : The predicate term [Bio::Ontology::TermI] (optional).
+
+=cut
+
+sub predicate_term {
+    my ( $self, $term ) = @_;
+  
+    if ( defined $term ) {
+        $self->_check_class( $term, "Bio::Ontology::TermI" );
+        $self->{ "_predicate_term" } = $term;
+    }
+
+    return $self->{ "_predicate_term" };
+}
+
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $ont = $obj->ontology()
+ Function: Get/set the ontology that defined this relationship.
+ Example : 
+ Returns : an object implementing L<Bio::Ontology::OntologyI>
+ Args    : on set, undef or an object implementing 
+           Bio::Ontology::OntologyI (optional)
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub ontology{
+    my $self = shift;
+    my $ont;
+
+    if(@_) {
+	$ont = shift;
+	if($ont) {
+	    $ont = Bio::Ontology::Ontology->new(-name => $ont) if ! ref($ont);
+	    if(! $ont->isa("Bio::Ontology::OntologyI")) {
+		$self->throw(ref($ont)." does not implement ".
+			     "Bio::Ontology::OntologyI. Bummer.");
+	    }
+	} 
+	return $self->{"_ontology"} = $ont;
+    } 
+    return $self->{"_ontology"};
+}
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $rel->to_string();
+ Function: to_string method for Relationship.
+ Returns : A string representation of this Relationship.
+ Args    :
+
+=cut
+
+sub to_string {
+    my( $self ) = @_;
+    
+    local $^W = 0;
+
+    my $s = "";
+
+    $s .= "-- Identifier:\n";
+    $s .= $self->identifier()."\n";
+    $s .= "-- Subject Term Identifier:\n";
+    $s .= $self->subject_term()->identifier()."\n";
+    $s .= "-- Object Term Identifier:\n";
+    $s .= $self->object_term()->identifier()."\n";
+    $s .= "-- Relationship Type Identifier:\n";
+    $s .= $self->predicate_term()->identifier();
+    
+    return $s;
+    
+} # to_string
+
+
+
+sub _check_class {
+    my ( $self, $value, $expected_class ) = @_;
+    
+    if ( ! defined( $value ) ) {
+        $self->throw( "Found [undef] where [$expected_class] expected" );
+    }
+    elsif ( ! ref( $value ) ) {
+        $self->throw( "Found [scalar] where [$expected_class] expected" );
+    } 
+    elsif ( ! $value->isa( $expected_class ) ) {
+        $self->throw( "Found [" . ref( $value ) . "] where [$expected_class] expected" );
+    }    
+
+} # _check_type
+
+#################################################################
+# aliases for backwards compatibility
+#################################################################
+
+=head1 Deprecated Methods
+
+  These methods are deprecated and defined here solely to preserve
+  backwards compatibility.
+
+=cut
+
+*child_term        = \&subject_term;
+*parent_term       = \&object_term;
+*relationship_type = \&predicate_term;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+# $Id: RelationshipFactory.pm,v 1.5.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::RelationshipFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::RelationshipFactory - Instantiates a new
+Bio::Ontology::RelationshipI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Ontology::RelationshipFactory;
+
+    # the default type is Bio::Ontology::Relationship
+    my $factory = new Bio::Ontology::RelationshipFactory(
+                                 -type => 'Bio::Ontology::GOterm');
+    my $clu = $factory->create_object(-name => 'peroxisome',
+                                      -ontology => 'Gene Ontology',
+                                      -identifier => 'GO:0005777');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::Ontology::RelationshipI> objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::RelationshipFactory;
+use strict;
+
+use Bio::Root::Root;
+
+use base qw(Bio::Factory::ObjectFactory);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Ontology::RelationshipFactory();
+ Function: Builds a new Bio::Ontology::RelationshipFactory object 
+ Returns : Bio::Ontology::RelationshipFactory
+ Args    : -type => string, name of a Bio::Ontology::RelationshipI
+                    derived class.
+                    The default is Bio::Ontology::Relationship.
+
+See L<Bio::Ontology::Relationship>, L<Bio::Ontology::RelationshipI>.
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+  
+    # make sure this matches our requirements
+    $self->interface("Bio::Ontology::RelationshipI");
+    $self->type($self->type() || "Bio::Ontology::Relationship");
+
+    return $self;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,189 @@
+# $Id: RelationshipI.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for RelationshipI
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# (c) Peter Dimitrov
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::RelationshipI - Interface for a relationship between ontology terms
+
+=head1 SYNOPSIS
+
+    # see documentation of methods and an implementation, e.g.,
+    # Bio::Ontology::Relationship
+
+=head1 DESCRIPTION
+
+This is the minimal interface for a relationship between two terms in
+an ontology. Ontology engines will use this.
+
+The terminology we use here is the one commonly used for ontologies,
+namely the triple of (subject, predicate, object), which in addition
+is scoped in a namespace (ontology). It is called triple because it is
+a tuple of three ontology terms.
+
+There are other terminologies in use for expressing relationships. For
+those who it helps to better understand the concept, the triple of
+(child, relationship type, parent) would be equivalent to the
+terminology chosen here, disregarding the question whether the notion
+of parent and child is sensible in the context of the relationship
+type or not. Especially in the case of ontologies with a wide variety
+of predicates the parent/child terminology and similar ones can
+quickly become ambiguous (e.g., A synthesises B), meaningless (e.g., A
+binds B), or even conflicting (e.g., A is-parent-of B), and are
+therefore strongly discouraged.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 CONTRIBUTORS
+
+ Hilmar Lapp, email: hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::RelationshipI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : print $rel->identifier();
+ Function: Set/get for the identifier of this Relationship.
+
+           Note that this may not necessarily be used by a particular
+           ontology.
+
+ Returns : The identifier [scalar].
+ Args    : 
+
+=cut
+
+sub identifier{
+    shift->throw_not_implemented();
+}
+
+=head2 subject_term
+
+ Title   : subject_term
+ Usage   : $subj = $rel->subject_term();
+ Function: Set/get for the subject term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The subject term [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+sub subject_term{
+    shift->throw_not_implemented();
+}
+
+=head2 object_term
+
+ Title   : object_term
+ Usage   : $object = $rel->object_term();
+ Function: Set/get for the object term of this Relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The object term [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+sub object_term{
+    shift->throw_not_implemented();
+}
+
+=head2 predicate_term
+
+ Title   : predicate_term
+ Usage   : $type = $rel->predicate_term();
+ Function: Set/get for the relationship type of this relationship.
+
+           The common convention for ontologies is to express
+           relationships between terms as triples (subject, predicate,
+           object).
+
+ Returns : The relationship type [Bio::Ontology::TermI].
+ Args    : 
+
+=cut
+
+sub predicate_term{
+    shift->throw_not_implemented();
+}
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $ont = $obj->ontology()
+ Function: Get the ontology that defined (is the scope for) this
+           relationship.
+ Example : 
+ Returns : an object implementing Bio::Ontology::OntologyI
+ Args    : 
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub ontology{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipType.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipType.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/RelationshipType.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,438 @@
+# $Id: RelationshipType.pm,v 1.17.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::RelationshipType  
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::RelationshipType  - a relationship type for an ontology
+
+=head1 SYNOPSIS
+
+  #
+
+=head1 DESCRIPTION
+
+This class can be used to model various types of relationships
+(such as "IS_A", "PART_OF", "CONTAINS", "FOUND_IN", "RELATED_TO").
+
+This class extends L<Bio::Ontology::Term>, so it essentially is-a
+L<Bio::Ontology::TermI>. In addition, all methods are overridden such
+as to make the object immutable.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address:
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Ontology::RelationshipType;
+use strict;
+
+
+use constant PART_OF    => "PART_OF";
+use constant RELATED_TO => "RELATED_TO";
+use constant IS_A       => "IS_A";
+use constant CONTAINS   => "CONTAINS";
+use constant FOUND_IN   => "FOUND_IN";
+
+
+use base qw(Bio::Ontology::Term);
+
+
+#
+# cache for terms
+#
+my %term_name_map = ();
+
+
+=head2 get_instance
+
+ Title   : get_instance
+ Usage   : $IS_A       = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+           $PART_OF    = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+           $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
+           $CONTAINS   = Bio::Ontology::RelationshipType->get_instance( "CONTAINS" );
+           $FOUND_IN   = Bio::Ontology::RelationshipType->get_instance( "FOUND_IN" );
+ Function: Factory method to create instances of RelationshipType
+ Returns : [Bio::Ontology::RelationshipType]
+ Args    : "IS_A" or "PART_OF" or "CONTAINS" or "FOUND_IN" or 
+           "RELATED_TO" [scalar]
+           the ontology [Bio::Ontology::OntologyI] (optional)
+
+=cut
+
+sub get_instance {
+    my ( $class, $name, $ont ) = @_;
+
+    $class->throw("must provide predicate name") unless $name;
+
+    # is one in the cache?
+    my $reltype = $term_name_map{$name};
+
+    if($reltype &&
+       # check whether ontologies match
+       (($ont && $reltype->ontology() &&
+	 ($ont->name() eq $reltype->ontology->name())) ||
+	(! ($reltype->ontology() || $ont)))) {
+	# we're done, return cached type
+	return $reltype;
+    }
+    # valid relationship type?
+
+#
+#see the cell ontology.  this code is too strict, even for dag-edit files. -allen
+#
+#    if ( ! (($name eq IS_A) || ($name eq PART_OF) ||
+#	    ($name eq CONTAINS) || ( $name eq FOUND_IN ))) {
+#        my $msg = "Found unknown type of relationship: [" . $name . "]\n";
+#        $msg .= "Known types are: [" . IS_A . "], [" . PART_OF . "], [" . CONTAINS . "], [" . FOUND_IN . "]";
+#        $class->throw( $msg );
+#    }
+    # if we get here we need to create the rel.type
+    $reltype = $class->new(-name     => $name,
+			   -ontology => $ont);
+    # cache it (FIXME possibly overrides one from another ontology)
+    $term_name_map{$name} = $reltype;
+    return $reltype;
+} # get_instance
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $type->init();
+ Function: Initializes this to all undef and empty lists.
+ Returns :
+ Args    :
+
+=cut
+
+sub init {
+    my $self = shift;
+
+    $self->SUPER::init();
+
+    # at this point we don't really need to do anything special for us
+} # init
+
+
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if ( $type->equals( $other_type ) ) { ...
+ Function: Compares this type to another one, based on string "eq" of
+           the "identifier" field, if at least one of the two types has
+           the identifier set, or string eq of the name otherwise.
+ Returns : true or false
+ Args    : [Bio::Ontology::RelationshipType]
+
+=cut
+
+sub equals {
+    my( $self, $type ) = @_;
+
+    $self->_check_class( $type, "Bio::Ontology::RelationshipType" );
+
+    if ( $self->identifier() xor $type->identifier() ) {
+        $self->warn("comparing relationship types when only ".
+		    "one has an identifier will always return false" );
+    }
+
+    return
+ 	($self->identifier() || $type->identifier()) ?
+	$self->identifier() eq $type->identifier() :
+	$self->name() eq $type->name();
+	
+} # equals
+
+
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $term->identifier( "IS_A" );
+           or
+           print $term->identifier();
+ Function: Set/get for the immutable identifier of this Type.
+ Returns : The identifier [scalar].
+ Args    : The identifier [scalar] (optional).
+
+=cut
+
+sub identifier {
+    my $self = shift;
+    my $ret = $self->SUPER::identifier();
+    if(@_) {
+	$self->throw($self->veto_change("identifier",$ret,$_[0]))
+	    if $ret && ($ret ne $_[0]);
+	$ret = $self->SUPER::identifier(@_);
+    }
+    return $ret;
+} # identifier
+
+
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $term->name( "is a type" );
+           or
+           print $term->name();
+ Function: Set/get for the immutable name of this Type.
+ Returns : The name [scalar].
+ Args    : The name [scalar] (optional).
+
+=cut
+
+sub name {
+    my $self = shift;
+    my $ret = $self->SUPER::name();
+    if(@_) {
+	$self->throw($self->veto_change("name",$ret,$_[0]))
+	    if $ret && ($ret ne $_[0]);
+	$ret = $self->SUPER::name(@_);
+    }
+    return $ret;
+} # name
+
+
+
+
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $term->definition( "" );
+           or
+           print $term->definition();
+ Function: Set/get for the immutable definition of this Type.
+ Returns : The definition [scalar].
+ Args    : The definition [scalar] (optional).
+
+=cut
+
+sub definition {
+    my $self = shift;
+    my $ret = $self->SUPER::definition();
+    if(@_) {
+	$self->veto_change("definition",$ret,$_[0]) 
+	    if $ret && ($ret ne $_[0]);
+	$ret = $self->SUPER::definition(@_);
+    }
+    # let's be nice and return something readable here
+    return $ret if $ret;
+    return $self->name()." relationship predicate (type)" if $self->name();
+} # definition
+
+
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $term->ontology( $top );
+           or
+           $top = $term->ontology();
+ Function: Set/get for the ontology this relationship type lives in.
+ Returns : The ontology [Bio::Ontology::OntologyI].
+ Args    : On set, the ontology [Bio::Ontology::OntologyI] (optional).
+
+=cut
+
+sub ontology {
+    my $self = shift;
+    my $ret = $self->SUPER::ontology();
+    if(@_) {
+	my $ont = shift;
+	if($ret) {
+	    $self->throw($self->veto_change("ontology",$ret->name,
+					    $ont ? $ont->name : $ont))
+		unless $ont && ($ont->name() eq $ret->name());
+	}
+	$ret = $self->SUPER::ontology($ont, at _);
+    }
+    return $ret;
+} # category
+
+
+
+=head2 version
+
+ Title   : version
+ Usage   : $term->version( "1.00" );
+           or
+           print $term->version();
+ Function: Set/get for immutable version information.
+ Returns : The version [scalar].
+ Args    : The version [scalar] (optional).
+
+=cut
+
+sub version {
+    my $self = shift;
+    my $ret = $self->SUPER::version();
+    if(@_) {
+	$self->throw($self->veto_change("version",$ret,$_[0]))
+	    if $ret && ($ret ne $_[0]);
+	$ret = $self->SUPER::version(@_);
+    }
+    return $ret;
+} # version
+
+
+
+=head2 is_obsolete
+
+ Title   : is_obsolete
+ Usage   : $term->is_obsolete( 1 );
+           or
+           if ( $term->is_obsolete() )
+ Function: Set/get for the immutable obsoleteness of this Type.
+ Returns : the obsoleteness [0 or 1].
+ Args    : the obsoleteness [0 or 1] (optional).
+
+=cut
+
+sub is_obsolete {
+    my $self = shift;
+    my $ret = $self->SUPER::is_obsolete();
+    if(@_) {
+	$self->throw($self->veto_change("is_obsolete",$ret,$_[0]))
+	    if $ret && ($ret != $_[0]);
+	$ret = $self->SUPER::is_obsolete(@_);
+    }
+    return $ret;
+} # is_obsolete
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $term->comment( "..." );
+           or
+           print $term->comment();
+ Function: Set/get for an arbitrary immutable comment about this Type.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment {
+    my $self = shift;
+    my $ret = $self->SUPER::comment();
+    if(@_) {
+	$self->throw($self->veto_change("comment",$ret,$_[0]))
+	    if $ret && ($ret ne $_[0]);
+	$ret = $self->SUPER::comment(@_);
+    }
+    return $ret;
+} # comment
+
+=head1 Private methods 
+
+May be overridden in a derived class, but should never be called from
+outside.
+
+=cut
+
+sub _check_class {
+    my ( $self, $value, $expected_class ) = @_;
+
+    if ( ! defined( $value ) ) {
+        $self->throw( "Found [undef] where [$expected_class] expected" );
+    }
+    elsif ( ! ref( $value ) ) {
+        $self->throw( "Found [scalar] where [$expected_class] expected" );
+    }
+    elsif ( ! $value->isa( $expected_class ) ) {
+        $self->throw( "Found [" . ref( $value ) . "] where [$expected_class] expected" );
+    }
+
+} # _check_type
+
+=head2 veto_change
+
+ Title   : veto_change
+ Usage   :
+ Function: Called if an attribute is changed. Setting an attribute is
+           considered a change if it had a value before and the attempt
+           to set it would change the value.
+
+           This method returns the message to be printed in the exception.
+
+ Example :
+ Returns : A string
+ Args    : The name of the attribute that was attempted to change.
+           Optionally, the old value and the new value for reporting
+           purposes only.
+
+=cut
+
+sub veto_change{
+    my ($self,$attr,$old,$new) = @_;
+
+    my $changetype = $old ? ($new ? "change" : "unset") : "change";
+    my $msg = "attempt to $changetype attribute $attr in ".ref($self).
+    ", which is immutable";
+    $msg .= " (\"$old\" to \"$new\")" if $old && $new;
+    return $msg;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,284 @@
+# $Id: GraphAdaptor.pm,v 1.6.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl Graph adaptor for Bio::Ontology::SimpleGOEngine
+#
+# Cared for by Nat Goodman <natg at shore.net>
+#
+# (c) Nathan Goodman natg at shore.net 2005
+# (c) ISB, Institute for Systems Biology 2005
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::SimpleGOEngine::GraphAdaptor - Graph adaptor for
+Bio::Ontology::SimpleGOEngine
+
+=head1 SYNOPSIS
+
+  use Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+
+  my $graph = Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+
+=head1 DESCRIPTION
+
+This is a think adaptor to simplify use of the old and new versions of
+the standard CPAN Graph module (old is versions 0.2x; new is 0.5x and
+beyond) within Bio::Ontology::SimpleGOEngine.
+
+This module implements only those Graph methods used by
+SimpleGOEngine.  It is far from a complete compatibility layer!  It
+also implements workarounds for cerain performance problems in the
+current versions of Graph v0.5x.
+
+This class provides a 'new' method that determines which version of
+Graph is available.  The object returned by 'new' is blessed into this
+class if the new version of Graph is available, else into the subclass
+
+  Bio::Ontology::SimpleGOEngine::GraphAdaptor02
+
+This class provides implementations for the required graph methods
+using the new version of Graph.  In most cases, these are simple
+pass-throughs.  Methods that differ in v0.2x are implemented in the
+subclass.
+
+The methods implemented here or in the subclasses are listed below.
+In all cases, we implemented the Graph v0.5x interface.  Consult the
+Graph v0.5x man page for details.
+
+  add_vertex
+  has_vertex
+  add_edge
+  has_edge
+  vertices
+  edges
+  edges_at
+  predecessors
+  successors
+  set_vertex_attribute
+  get_vertex_attribute
+  set_edge_attribute
+  get_edge_attribute
+  source_vertices
+  sink_vertices
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Nat Goodman
+
+Email: natg at shore.net
+
+Address:
+
+  Institute for Systems Biology
+  1441 N 34th St
+  Seattle, WA 98103-8904
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+
+use Graph::Directed;
+
+use strict;
+use Bio::Ontology::SimpleGOEngine::GraphAdaptor02;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $graph = Bio::Ontology::SimpleGOEngine::GraphAdaptor->new()
+ Function: Creates a new graph
+ Returns : Bio::Ontology::SimpleGOEngine::GraphAdaptor02 or 
+           Bio::Ontology::SimpleGOEngine::GraphAdaptor05 object, 
+           depending on which Graph version is available
+ Args    : none
+
+=cut
+
+sub new {
+  my( $class ) = @_;
+  $class = ref $class || $class;
+
+  my $self=
+    ( defined $Graph::VERSION && $Graph::VERSION >= 0.5 ) ?
+      bless ( {}, $class ) :
+	bless ( {}, 'Bio::Ontology::SimpleGOEngine::GraphAdaptor02' );
+  $self->{_graph}=new Graph::Directed;
+  $self->{_vertex_attributes}={};
+  $self->{_edge_attributes}={};
+  return $self;
+}
+
+# Here are the main methods
+
+sub add_vertex {
+  my $self=shift;
+  $self->_graph->add_vertex(@_);
+}
+sub has_vertex {
+  my $self=shift;
+  $self->_graph->has_vertex(@_);
+}
+sub add_edge {
+  my $self=shift;
+  $self->_graph->add_edge(@_);
+}
+sub has_edge {
+  my $self=shift;
+  $self->_graph->has_edge(@_);
+}
+sub vertices {
+  my $self=shift;
+  $self->_graph->vertices(@_);
+}
+sub edges {
+  my $self=shift;
+  $self->_graph->edges(@_);
+}
+sub edges_at {
+  my $self=shift;
+  $self->_graph->edges_at(@_);
+}
+sub predecessors {
+  my $self=shift;
+  $self->_graph->predecessors(@_);
+} 
+sub successors {
+  my $self=shift;
+  $self->_graph->successors(@_);
+}
+sub source_vertices {
+  my $self=shift;
+  $self->_graph->source_vertices();
+}
+sub sink_vertices {
+  my $self=shift;
+  $self->_graph->sink_vertices();
+}
+# The following methods workaround a performance problem in Graph v0.5x 
+# when attributes are attached to the graph
+sub set_vertex_attribute {
+  my($self,$v,$attribute,$value)=@_;
+  $self->_vertex2attributes($v)->{$attribute}=$value;
+}
+sub get_vertex_attribute {
+  my($self,$v,$attribute)=@_;
+  $self->_vertex2attributes($v)->{$attribute};
+}
+sub set_edge_attribute {
+  my($self,$u,$v,$attribute,$value)=@_;
+  $self->_edge2attributes($u,$v)->{$attribute}=$value;
+}
+sub get_edge_attribute {
+  my($self,$u,$v,$attribute)=@_;
+  $self->_edge2attributes($u,$v)->{$attribute};
+}
+
+=head2 _graph
+
+ Title   : _graph
+ Usage   : $self->_graph();
+ Function: Internal method to access 'real' graph
+ Returns : Graph::Directed object 
+ Args    : none
+
+=cut
+
+sub _graph {$_[0]->{_graph}; }
+
+=head2 _vertex_attributes
+
+ Title   : _vertex_attributes
+ Usage   : $self->vertex_attributes();
+ Function: Internal method to access HASH used to store vertex attributes
+ Returns : Graph::Directed object 
+ Args    : none
+
+=cut
+
+sub _vertex_attributes {$_[0]->{_vertex_attributes}; }
+
+=head2 _edge_attributes
+
+ Title   : _edge_attributes
+ Usage   : $self->edge_attributes();
+ Function: Internal method to access HASH used to store edge attributes
+ Returns : Graph::Directed object 
+ Args    : none
+
+=cut
+
+sub _edge_attributes {$_[0]->{_edge_attributes}; }
+
+=head2 _vertex2attributes
+
+ Title   : _vertex2attributes
+ Usage   : $value=$graph->_vertex2attributes($v_->{ATTRIBUTE};
+           $graph->_vertex2attributes($v)->{ATTRIBUTE}=$value;
+ Function: Internal method to access attributes for a specific vertex
+ Returns : HASH
+ Args    : none
+
+=cut
+
+sub _vertex2attributes {
+  my($self,$vertex)=@_;
+  $self->_vertex_attributes->{$vertex} or $self->_vertex_attributes->{$vertex}={};
+}
+
+=head2 _edge2attributes
+
+ Title   : _edge2attributes
+ Usage   : $value=$graph->_edge2attributes($u,$v)->{ATTRIBUTE};
+           $graph->_edge2attributes($u,$v)->{ATTRIBUTE}=$value;
+ Function: Internal method to access HASH used to store edge attributes
+ Returns : HASH
+ Args    : none
+
+=cut
+
+sub _edge2attributes {
+  my($self,$u,$v)=@_;
+  $self->_edge_attributes->{$u,$v} or $self->_edge_attributes->{$u,$v}={};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,133 @@
+# $Id: GraphAdaptor02.pm,v 1.6.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl adaptor for old Graph verions (0.2x) for use in 
+# Bio::Ontology::SimpleGOEngine
+#
+# Cared for by Nat Goodman <natg at shore.net>
+#
+# (c) Nathan Goodman natg at shore.net 2005
+# (c) ISB, Institute for Systems Biology 2005
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::SimpleGOEngine::GraphAdaptor02 - Graph adaptor (v02.x) for
+Bio::Ontology::SimpleGOEngine
+
+=head1 DESCRIPTION
+
+Internal subclass of Bio::Ontology::SimpleGOEngine::GraphAdaptor for
+Graph v0.2x.
+
+Call this via Bio::Ontology::SimpleGOEngine::GraphAdaptor
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Nat Goodman
+
+Email: natg at shore.net
+
+Address:
+
+  Institute for Systems Biology
+  1441 N 34th St
+  Seattle, WA 98103-8904
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Ontology::SimpleGOEngine::GraphAdaptor02;
+
+
+use strict;
+
+use base qw(Bio::Ontology::SimpleGOEngine::GraphAdaptor);
+
+# edges
+#   v0.2x returns (u0,v0, u1,v1, ...)
+#   v0.5x returns ([u0,v0], [u1,v1], ...)
+sub edges {
+  my $self=shift;
+  my @edges02=$self->_graph->edges(@_);
+  my @edges;
+  while (@edges02) {
+    my($u,$v)=(shift @edges02,shift @edges02);
+    push(@edges,[$u,$v]);
+  }
+  @edges;
+}
+
+# edges_at
+#   v0.2x uses edges() method and returns (u0,v0, u1,v1, ...)
+#   v0.5x returns ([u0,v0], [u1,v1], ...)
+sub edges_at {
+  my $self=shift;
+  $self->edges(@_);
+}
+
+# set_vertex_attribute
+#   v0.2x uses set_attribute($attribute,$v,$value)
+sub set_vertex_attribute {
+  my($self,$v,$attribute,$value)=@_;
+  $self->_graph->set_attribute($attribute,$v,$value);
+}
+
+# get_vertex_attribute
+#   v0.2x uses get_attribute($attribute,$v)
+sub get_vertex_attribute {
+  my($self,$v,$attribute)=@_;
+  $self->_graph->get_attribute($attribute,$v);
+}
+
+# set_edge_attribute
+#   v0.2x uses set_attribute($attribute,$u,$v,$value)
+sub set_edge_attribute {
+  my($self,$u,$v,$attribute,$value)=@_;
+  $self->_graph->set_attribute($attribute,$u,$v,$value);
+}
+
+# get_edge_attribute
+#   v0.2x uses get_attribute($attribute,$u,$v)
+sub get_edge_attribute {
+  my($self,$u,$v,$attribute)=@_;
+  $self->_graph->get_attribute($attribute,$u,$v);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleGOEngine.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: SimpleGOEngine.pm,v 1.36.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::SimpleGOEngine
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::SimpleGOEngine - a Ontology Engine for GO implementing OntologyEngineI
+
+=head1 SYNOPSIS
+
+  use Bio::Ontology::SimpleGOEngine;
+
+  my $parser = Bio::Ontology::SimpleGOEngine->new
+        ( -defs_file => "/home/czmasek/GO/GO.defs",
+          -files     => ["/home/czmasek/GO/component.ontology",
+                         "/home/czmasek/GO/function.ontology",
+                         "/home/czmasek/GO/process.ontology"] );
+
+  my $engine = $parser->parse();
+
+  my $IS_A       = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+  my $PART_OF    = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+  my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
+
+=head1 DESCRIPTION
+
+This class is deprecated and instead Bio::Ontology::OBOEngine should be used.
+
+Needs Graph.pm from CPAN.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address:
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+package Bio::Ontology::SimpleGOEngine;
+
+use strict;
+
+use base qw(Bio::Ontology::OBOEngine);
+
+
+# Internal methods
+# ----------------
+
+## Overiding this method from OBOEngine
+# Checks the correct format of a GOBO-formatted id
+# Gets the id out of a term or id string
+sub _get_id {
+    my ( $self, $term ) = @_;
+    my $id = $term;
+
+    if(ref($term)) {
+        # use TermI standard API
+        $self->throw("Object doesn't implement Bio::Ontology::TermI. ".
+                     "Bummer.")
+            unless $term->isa("Bio::Ontology::TermI");
+        $id = $term->identifier();
+        # if there is no ID, we need to fake one from ontology name and name
+        # in order to achieve uniqueness
+        if(!$id) {
+            $id = $term->ontology->name() if $term->ontology();
+            $id = $id ? $id.'|' : '';
+            $id .= $term->name();
+        }
+    }
+    # don't fuss if it looks remotely standard, and we trust GO terms
+    return $id
+#        if $term->isa("Bio::Ontology::GOterm")||($id =~ /^[A-Z_]{1,8}:\d{1,}$/);
+        if $term->isa("Bio::Ontology::GOterm")||($id =~ /^\w+:\w+$/);
+    # prefix with something if only numbers
+    if($id =~ /^\d+$/) {
+        $self->warn(ref($self).": identifier [$id] is only numbers - ".
+                    "prefixing with 'GO:'");
+        return "GO:" . $id;
+    }
+    # we shouldn't have gotten here if it's at least a remotely decent ID
+    $self->throw(ref($self).": non-standard identifier '$id'\n")
+        unless $id =~ /\|/;
+    return $id;
+} # _get_id
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleOntologyEngine.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleOntologyEngine.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/SimpleOntologyEngine.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1017 @@
+# $Id: SimpleOntologyEngine.pm,v 1.17.4.3 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::SimpleOntologyEngine
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# Copyright Peter Dimitrov
+# (c) Peter Dimitrov, dimitrov at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::SimpleOntologyEngine - Implementation of OntologyEngineI interface
+
+=head1 SYNOPSIS
+
+  my $soe = Bio::Ontology::SimpleOntologyEngine->new;
+
+=head1 DESCRIPTION
+
+This is a "simple" implementation of Bio::Ontology::OntologyEngineI.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 CONTRIBUTORS
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::SimpleOntologyEngine;
+use strict;
+use Carp;
+use Bio::Ontology::RelationshipFactory;
+use Data::Dumper;
+
+use base qw(Bio::Root::Root Bio::Ontology::OntologyEngineI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $soe = Bio::Ontology::SimpleOntologyEngine->new;
+ Function: Initializes the ontology engine.
+ Example : $soe = Bio::Ontology::SimpleOntologyEngine->new;
+ Returns : Object of class SimpleOntologyEngine.
+ Args    :
+
+=cut
+
+sub new{
+  my ($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+#   my %param = @args;
+
+  $self->_term_store( {} );
+  $self->_relationship_store( {} );
+  $self->_inverted_relationship_store( {} );
+  $self->_relationship_type_store( {} );
+  $self->_instantiated_terms_store( {} );
+
+  # set defaults for the factories
+  $self->relationship_factory(Bio::Ontology::RelationshipFactory->new(
+				     -type => "Bio::Ontology::Relationship"));
+  return $self;
+}
+
+=head2 _instantiated_terms_store
+
+ Title   : _instantiated_terms_store
+ Usage   : $obj->_instantiated_terms_store($newval)
+ Function:
+ Example :
+ Returns : hash
+ Args    : empty hash
+
+=cut
+
+sub _instantiated_terms_store{
+	my ($self, $value) = @_;
+
+	if( defined $value) {
+		$self->{'_instantiated_terms_store'} = $value;
+	}
+	return $self->{'_instantiated_terms_store'};
+}
+
+=head2 mark_instantiated
+
+ Title   : mark_instantiated
+ Usage   : $self->mark_instantiated(TermI terms): TermI
+ Function: Marks TermI objects as fully instantiated,
+           allowing for proper counting of the number of terms in the term store.
+           The TermI objects has to be already stored in the term store in order
+           to be marked.
+ Example : $self->mark_instantiated($term);
+ Returns : its argument or throws an exception if a term is not
+           in the term store.
+ Args    : array of objects of class TermI.
+
+=cut
+
+sub mark_instantiated{
+  my ($self, @terms) = @_;
+
+  foreach my $term (@terms) {
+	  $self->throw( "term ".$term->identifier." not in the term store\n" )
+		 if !defined $self->_term_store->{$term->identifier};
+	  $self->_instantiated_terms_store->{$term->identifier} = 1;
+  }
+
+  return @terms;
+}
+
+=head2 mark_uninstantiated
+
+ Title   : mark_uninstantiated
+ Usage   : $self->mark_uninstantiated(TermI terms): TermI
+ Function: Marks TermI objects as not fully instantiated,
+ Example : $self->mark_uninstantiated($term);
+ Returns : its argument or throws an exception if a term is not
+           in the term store(if the term is not marked it does nothing).
+ Args    : array of objects of class TermI.
+
+=cut
+
+sub mark_uninstantiated{
+  my ($self, @terms) = @_;
+
+  foreach my $term (@terms) {
+    $self->throw( "term ".$term->identifier." not in the term store\n" )
+      if !defined $self->_term_store->{$term->identifier};
+    delete $self->_instantiated_terms_store->{$term->identifier}
+      if defined $self->_instantiated_terms_store->{$term->identifier};
+  }
+
+  return @terms;
+}
+
+=head2 _term_store
+
+ Title   : term_store
+ Usage   : $obj->_term_store($newval)
+ Function:
+ Example :
+ Returns : reference to an array of Bio::Ontology::TermI objects
+ Args    : reference to an array of Bio::Ontology::TermI objects
+
+=cut
+
+sub _term_store{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    if ( defined $self->{'_term_store'}) {
+      $self->throw("_term_store already defined\n");
+    }
+    else {
+      $self->{'_term_store'} = $value;
+    }
+  }
+
+  return $self->{'_term_store'};
+}
+
+=head2 add_term
+
+ Title   : add_term
+ Usage   : add_term(TermI term): TermI
+ Function: Adds TermI object to the ontology engine term store.
+ Marks the term fully instantiated by default.
+ Example : $soe->add_term($term)
+ Returns : its argument.
+ Args    : object of class TermI.
+
+=cut
+
+sub add_term{
+  my ($self, $term) = @_;
+  my $term_store = $self->_term_store;
+
+  if ( defined $term_store -> {$term->identifier}) {
+    $self->throw( "term ".$term->identifier." already defined\n" );
+  }
+  else {
+    $term_store->{$term->identifier} = $term;
+    $self->_instantiated_terms_store->{$term->identifier} = 1;
+  }
+
+  return $term;
+}
+
+=head2 get_term_by_identifier
+
+ Title   : get_term_by_identifier
+ Usage   : get_term_by_identifier(String id): TermI
+ Function: Retrieves terms from the term store by their identifier
+           field, or an empty list if not there.
+ Example : $term = $soe->get_term_by_identifier("IPR000001");
+ Returns : An array of zero or more Bio::Ontology::TermI objects.
+ Args    : An array of identifier strings
+
+=cut
+
+sub get_term_by_identifier{
+  my ($self, @ids) = @_;
+  my @ans = ();
+
+  foreach my $id (@ids) {
+      my $term = $self->_term_store->{$id};
+      push @ans, $term if defined $term;
+  }
+
+  return @ans;
+}
+
+=head2 _get_number_rels
+
+ Title   : get_number_rels
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _get_number_rels{
+  my ($self) = @_;
+  my $num_rels = 0;
+
+  foreach my $entry ($self->_relationship_store) {
+    $num_rels += scalar keys %$entry;
+  }
+  return $num_rels;
+}
+
+=head2 _get_number_terms
+
+ Title   : _get_number_terms
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _get_number_terms{
+  my ($self) = @_;
+
+  return scalar $self->_filter_unmarked( values %{$self->_term_store} );
+
+}
+
+=head2 _relationship_store
+
+ Title   : _storerelationship_store
+ Usage   : $obj->relationship_store($newval)
+ Function: 
+ Example : 
+ Returns : reference to an array of Bio::Ontology::TermI objects
+ Args    : reference to an array of Bio::Ontology::TermI objects
+
+=cut
+
+sub _relationship_store{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+	  if ( defined $self->{'_relationship_store'}) {
+		  $self->throw("_relationship_store already defined\n");
+	  }
+	  else {
+		  $self->{'_relationship_store'} = $value;
+	  }
+  }
+
+  return $self->{'_relationship_store'};
+}
+
+=head2 _inverted_relationship_store
+
+ Title   : _inverted_relationship_store
+ Usage   :
+ Function:
+ Example :
+ Returns : reference to an array of Bio::Ontology::TermI objects
+ Args    : reference to an array of Bio::Ontology::TermI objects
+
+=cut
+
+sub _inverted_relationship_store{
+	my ($self, $value) = @_;
+
+	if( defined $value) {
+		if ( defined $self->{'_inverted_relationship_store'}) {
+			$self->throw("_inverted_relationship_store already defined\n");
+		}
+		else {
+			$self->{'_inverted_relationship_store'} = $value;
+		}
+	}
+
+  return $self->{'_inverted_relationship_store'};
+}
+
+=head2 _relationship_type_store
+
+ Title   : _relationship_type_store
+ Usage   : $obj->_relationship_type_store($newval)
+ Function: 
+ Example : 
+ Returns : reference to an array of Bio::Ontology::RelationshipType objects
+ Args    : reference to an array of Bio::Ontology::RelationshipType objects
+
+=cut
+
+sub _relationship_type_store{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+	  if ( defined $self->{'_relationship_type_store'}) {
+		  $self->throw("_relationship_type_store already defined\n");
+	  }
+	  else {
+		  $self->{'_relationship_type_store'} = $value;
+	  }
+  }
+
+  return $self->{'_relationship_type_store'};
+}
+
+=head2 _add_relationship_simple
+
+ Title   : _add_relationship_simple
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _add_relationship_simple{
+   my ($self, $store, $rel, $inverted) = @_;
+   my $parent_id;
+   my $child_id;
+
+   if ($inverted) {
+		$parent_id = $rel->subject_term->identifier;
+		$child_id = $rel->object_term->identifier;
+   }
+   else {
+		$parent_id = $rel->object_term->identifier;
+		$child_id = $rel->subject_term->identifier;
+   }
+   if(defined $store->{$parent_id} && (defined $store->{$parent_id}->{$child_id}) &&
+      ($store->{$parent_id}->{$child_id}->name != $rel->predicate_term->name)){
+		$self->throw("relationship ".Dumper($rel->predicate_term).
+						 " between ".$parent_id." and ".$child_id.
+						 " already defined as ".
+						 Dumper($store->{$parent_id}->{$child_id})."\n");
+   }
+   else {
+		$store->{$parent_id}->{$child_id} = $rel->predicate_term;
+   }
+}
+
+=head2 add_relationship
+
+ Title   : add_relationship
+ Usage   : add_relationship(RelationshipI relationship): RelationshipI
+ Function: Adds a relationship object to the ontology engine.
+ Example :
+ Returns : Its argument.
+ Args    : A RelationshipI object.
+
+=cut
+
+sub add_relationship{
+   my ($self, $rel) = @_;
+
+   $self->_add_relationship_simple($self->_relationship_store,
+				   $rel, 0);
+   $self->_add_relationship_simple($self->_inverted_relationship_store,
+				   $rel, 1);
+   $self->_relationship_type_store->{
+       $self->_unique_termid($rel->predicate_term)} = $rel->predicate_term;
+
+   return $rel;
+}
+
+=head2 get_relationships
+
+ Title   : get_relationships
+ Usage   : get_relationships(): RelationshipI
+ Function: Retrieves all relationship objects.
+ Example :
+ Returns : Array of RelationshipI objects
+ Args    :
+
+=cut
+
+sub get_relationships{
+    my $self = shift;
+    my $term = shift;
+    my @rels;
+    my $store = $self->_relationship_store;
+    my $relfact = $self->relationship_factory(); 
+
+    my @parent_ids = $term ?
+		# if a term is supplied then only get the term's parents
+		(map { $_->identifier(); } $self->get_parent_terms($term)) :
+		  # otherwise use all parent ids
+		  (keys %{$store});
+    # add the term as a parent too if one is supplied
+    push(@parent_ids,$term->identifier) if $term;
+    
+    foreach my $parent_id (@parent_ids) {
+		 my $parent_entry = $store->{$parent_id};
+
+		 # if a term is supplied, add a relationship for the parent to the term
+		 # except if the parent is the term itself (we added that one before)
+		 if($term && ($parent_id ne $term->identifier())) {
+			 my @parent_terms = $self->get_term_by_identifier($parent_id);
+			 foreach my $parent_term (@parent_terms) {
+				 push(@rels,
+						$relfact->create_object(-object_term    => $parent_term,
+														-subject_term   => $term,
+														-predicate_term =>
+														$parent_entry->{$term->identifier},
+														-ontology => $term->ontology())
+					  );
+			 }
+		 
+		 } else {
+			 # otherwise, i.e., no term supplied, or the parent equals the
+			 # supplied term
+			 my @parent_terms = $term ?
+				($term) : $self->get_term_by_identifier($parent_id);
+			 foreach my $child_id (keys %$parent_entry) {
+				 my $rel_info = $parent_entry->{$child_id};
+				 my ($subj_term) = $self->get_term_by_identifier($child_id);
+
+				 foreach my $parent_term (@parent_terms) {
+					 push(@rels,
+							$relfact->create_object(-object_term  => $parent_term,
+															-subject_term => $subj_term,
+															-predicate_term => $rel_info,
+															-ontology =>$parent_term->ontology
+														  )
+						  );
+				 }
+			 }
+		 }
+    }
+
+    return @rels;
+}
+
+=head2 get_all_relationships
+
+ Title   : get_all_relationships
+ Usage   : get_all_relationships(): RelationshipI
+ Function: Retrieves all relationship objects.
+ Example :
+ Returns : Array of RelationshipI objects
+ Args    :
+
+=cut
+
+sub get_all_relationships{
+    return shift->get_relationships();
+}
+
+=head2 get_predicate_terms
+
+ Title   : get_predicate_terms
+ Usage   : get_predicate_terms(): TermI
+ Function: Retrives all relationship types stored in the engine
+ Example :
+ Returns : reference to an array of Bio::Ontology::RelationshipType objects
+ Args    :
+
+=cut
+
+sub get_predicate_terms{
+  my ($self) = @_;
+
+  return values %{$self->_relationship_type_store};
+}
+
+=head2 _is_rel_type
+
+ Title   : _is_rel_type
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub _is_rel_type{
+  my ($self, $term, @rel_types) = @_;
+
+  foreach my $rel_type (@rel_types) {
+	  if($rel_type->identifier || $term->identifier) {
+		  return 1 if $rel_type->identifier eq $term->identifier;
+	  } else {
+		  return 1 if $rel_type->name eq $term->name;
+	  }
+  }
+
+  return 0;
+}
+
+=head2 _typed_traversal
+
+ Title   : _typed_traversal
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _typed_traversal{
+	my ($self, $rel_store, $level, $term_id, @rel_types) = @_;
+	return if !defined($rel_store->{$term_id});
+	my %parent_entry = %{$rel_store->{$term_id}};
+	my @children = keys %parent_entry;
+
+	my @ans;
+
+	if (@rel_types > 0) {
+		@ans = ();
+
+		foreach my $child_id (@children) {
+			push @ans, $child_id
+			  if $self->_is_rel_type( $rel_store->{$term_id}->{$child_id},
+											  @rel_types);
+		}
+	}
+	else {
+		@ans = @children;
+	}
+	if ($level < 1) {
+		my @ans1 = ();
+
+		foreach my $child_id (@ans) {
+			push @ans1, $self->_typed_traversal($rel_store,
+								$level - 1, $child_id, @rel_types)
+			  if defined $rel_store->{$child_id};
+		}
+		push @ans, @ans1;
+	}
+
+  return @ans;
+}
+
+=head2 get_child_terms
+
+ Title   : get_child_terms
+ Usage   : get_child_terms(TermI term, TermI predicate_terms): TermI
+           get_child_terms(TermI term, RelationshipType predicate_terms): TermI
+ Function: Retrieves all child terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_child_terms is a special
+           case of get_descendant_terms, limiting the search to the
+           direct descendants.
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list of 
+           relationship type terms.
+
+=cut
+
+sub get_child_terms{
+	my ($self, $term, @relationship_types) = @_;
+
+	$self->throw("must provide TermI compliant object") 
+	  unless defined($term) && $term->isa("Bio::Ontology::TermI");
+
+	return $self->_filter_unmarked(
+											 $self->get_term_by_identifier(
+						$self->_typed_traversal($self->_relationship_store,
+					   1,
+					   $term->identifier,
+					   @relationship_types) ) );
+}
+
+=head2 get_descendant_terms
+
+ Title   : get_descendant_terms
+ Usage   : get_descendant_terms(TermI term, TermI rel_types): TermI
+           get_child_terms(TermI term, RelationshipType predicate_terms): TermI
+ Function: Retrieves all descendant terms of a given term, that
+           satisfy a relationship among those that are specified in
+           the second argument or undef otherwise. Uses
+           _typed_traversal to find all descendants.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list of 
+           relationship type terms.
+
+=cut
+
+sub get_descendant_terms{
+  my ($self, $term, @relationship_types) = @_;
+
+  $self->throw("must provide TermI compliant object") 
+      unless defined($term) && $term->isa("Bio::Ontology::TermI");
+
+  return $self->_filter_unmarked(
+	     $self->_filter_repeated(
+	         $self->get_term_by_identifier(
+		     $self->_typed_traversal($self->_relationship_store,
+					     0,
+					     $term->identifier,
+					     @relationship_types) ) ) );
+}
+
+=head2 get_parent_terms
+
+ Title   : get_parent_terms
+ Usage   : get_parent_terms(TermI term, TermI predicate_terms): TermI
+           get_child_terms(TermI term, RelationshipType predicate_terms): TermI
+ Function: Retrieves all parent terms of a given term, that satisfy a
+           relationship among those that are specified in the second
+           argument or undef otherwise. get_parent_terms is a special
+           case of get_ancestor_terms, limiting the search to the
+           direct ancestors.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list of relationship type terms.
+
+=cut
+
+sub get_parent_terms{
+  my ($self, $term, @relationship_types) = @_;
+  $self->throw("term must be a valid object, not undef") unless defined $term;
+
+  return $self->_filter_unmarked(
+	    $self->get_term_by_identifier(
+		$self->_typed_traversal($self->_inverted_relationship_store,
+					1,
+					$term->identifier,
+					@relationship_types) ) );
+}
+
+=head2 get_ancestor_terms
+
+ Title   : get_ancestor_terms
+ Usage   : get_ancestor_terms(TermI term, TermI predicate_terms): TermI
+           get_child_terms(TermI term, RelationshipType predicate_terms): TermI
+ Function: Retrieves all ancestor terms of a given term, that satisfy
+           a relationship among those that are specified in the second
+           argument or undef otherwise. Uses _typed_traversal to find
+           all ancestors.
+
+ Example :
+ Returns : Array of TermI objects.
+ Args    : First argument is the term of interest, second is the list
+           of relationship type terms.
+
+=cut
+
+sub get_ancestor_terms{
+  my ($self, $term, @relationship_types) = @_;
+  $self->throw("term must be a valid object, not undef") unless defined $term;
+
+  return $self->_filter_unmarked(
+	    $self->_filter_repeated(
+               $self->get_term_by_identifier(
+                  $self->_typed_traversal($self->_inverted_relationship_store,
+					  0,
+					  $term->identifier,
+					  @relationship_types) ) ) );
+}
+
+=head2 get_leaf_terms
+
+ Title   : get_leaf_terms
+ Usage   : get_leaf_terms(): TermI
+ Function: Retrieves all leaf terms from the ontology. Leaf term is a term w/o descendants.
+ Example : @leaf_terms = $obj->get_leaf_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_leaf_terms{
+  my ($self) = @_;
+  my @leaf_terms;
+
+  foreach my $term (values %{$self->_term_store}) {
+	  push @leaf_terms, $term
+		 if !defined $self->_relationship_store->{$term->identifier} &&
+			defined $self->_instantiated_terms_store->{$term->identifier};
+  }
+
+  return @leaf_terms;
+}
+
+=head2 get_root_terms
+
+ Title   : get_root_terms
+ Usage   : get_root_terms(): TermI
+ Function: Retrieves all root terms from the ontology. Root term is a term w/o descendants.
+ Example : @root_terms = $obj->get_root_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_root_terms{
+  my ($self) = @_;
+  my @root_terms;
+
+  foreach my $term (values %{$self->_term_store}) {
+    push @root_terms, $term
+      if !defined $self->_inverted_relationship_store->{$term->identifier} &&
+		  defined $self->_instantiated_terms_store->{$term->identifier};
+  }
+
+  return @root_terms;
+}
+
+=head2 _filter_repeated
+
+ Title   : _filter_repeated
+ Usage   : @lst = $self->_filter_repeated(@old_lst);
+ Function: Removes repeated terms
+ Example :
+ Returns : List of unique TermI objects
+ Args    : List of TermI objects
+
+=cut
+
+sub _filter_repeated{
+  my ($self, @args) = @_;
+  my %h;
+
+  foreach my $element (@args) {
+    $h{$element->identifier} = $element if !defined $h{$element->identifier};
+  }
+
+  return values %h;
+}
+
+=head2 get_all_terms
+
+ Title   : get_all_terms
+ Usage   : get_all_terms(): TermI
+ Function: Retrieves all terms currently stored in the ontology.
+ Example : @all_terms = $obj->get_all_terms()
+ Returns : Array of TermI objects.
+ Args    :
+
+=cut
+
+sub get_all_terms{
+  my ($self) = @_;
+
+  return $self->_filter_unmarked( values %{$self->_term_store} );
+}
+
+=head2 find_terms
+
+ Title   : find_terms
+ Usage   : ($term) = $oe->find_terms(-identifier => "SO:0000263");
+ Function: Find term instances matching queries for their attributes.
+
+           This implementation can efficiently resolve queries by
+           identifier.
+
+ Example :
+ Returns : an array of zero or more Bio::Ontology::TermI objects
+ Args    : Named parameters. The following parameters should be recognized
+           by any implementations:
+
+              -identifier    query by the given identifier
+              -name          query by the given name
+
+=cut
+
+sub find_terms{
+    my ($self, at args) = @_;
+    my @terms;
+
+    my ($id,$name) = $self->_rearrange([qw(IDENTIFIER NAME)], at args);
+
+    if(defined($id)) {
+		 @terms = $self->get_term_by_identifier($id);
+    } else {
+		 @terms = $self->get_all_terms();
+    }
+    if(defined($name)) {
+		 @terms = grep { $_->name() eq $name; } @terms;
+    }
+    return @terms;
+}
+
+
+=head2 relationship_factory
+
+ Title   : relationship_factory
+ Usage   : $fact = $obj->relationship_factory()
+ Function: Get/set the object factory to be used when relationship
+           objects are created by the implementation on-the-fly.
+
+ Example : 
+ Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : on set, a Bio::Factory::ObjectFactoryI compliant object
+
+=cut
+
+sub relationship_factory{
+    my $self = shift;
+
+    return $self->{'relationship_factory'} = shift if @_;
+    return $self->{'relationship_factory'};
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $fact = $obj->term_factory()
+ Function: Get/set the object factory to be used when term objects are
+           created by the implementation on-the-fly.
+
+           Note that this ontology engine implementation does not
+           create term objects on the fly, and therefore setting this
+           attribute is meaningless.
+
+ Example : 
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI
+           compliant object)
+ Args    : on set, a Bio::Factory::ObjectFactoryI compliant object
+
+=cut
+
+sub term_factory{
+    my $self = shift;
+
+    if(@_) {
+		 $self->warn("setting term factory, but ".ref($self).
+						 " does not create terms on-the-fly");
+		 return $self->{'term_factory'} = shift;
+    }
+    return $self->{'term_factory'};
+}
+
+=head2 _filter_unmarked
+
+ Title   : _filter_unmarked
+ Usage   : _filter_unmarked(TermI terms): TermI
+ Function: Removes the uninstantiated terms from the list of terms
+ Example :
+ Returns : array of fully instantiated TermI objects
+ Args    : array of TermI objects
+
+=cut
+
+sub _filter_unmarked{
+  my ($self, @terms) = @_;
+  my @filtered_terms = ();
+
+  if ( scalar(@terms) >= 1) {
+    foreach my $term (@terms) {
+      push @filtered_terms, $term
+	if defined $self->_instantiated_terms_store->{$term->identifier};
+    }
+  }
+
+  return @filtered_terms;
+}
+
+=head2 remove_term_by_id
+
+ Title   : remove_term_by_id
+ Usage   : remove_term_by_id(String id): TermI
+ Function: Removes TermI object from the ontology engine using the
+           string id as an identifier. Current implementation does not
+           enforce consistency of the relationships using that term.
+ Example : $term = $soe->remove_term_by_id($id);
+ Returns : Object of class TermI or undef if not found.
+ Args    : The string identifier of a term.
+
+=cut
+
+sub remove_term_by_id{
+  my ($self, $id) = @_;
+
+  if ( $self->get_term_by_identifier($id) ) {
+	  my $term = $self->{_term_store}->{$id};
+	  delete $self->{_term_store}->{$id};
+	  return $term;
+  }
+  else {
+	  $self->warn("Term with id '$id' is not in the term store");
+	  return;
+  }
+}
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $sv->to_string();
+ Function: Currently returns formatted string containing the number of
+           terms and number of relationships from the ontology engine.
+ Example : print $sv->to_string();
+ Returns :
+ Args    :
+
+=cut
+
+sub to_string{
+  my ($self) = @_;
+  my $s = "";
+
+  $s .= "-- # Terms:\n";
+  $s .= scalar($self->get_all_terms)."\n";
+  $s .= "-- # Relationships:\n";
+  $s .= $self->_get_number_rels."\n";
+
+  return $s;
+}
+
+=head2 _unique_termid
+
+ Title   : _unique_termid
+ Usage   :
+ Function: Returns a string that can be used as ID using fail-over
+           approaches. 
+
+           If the identifier attribute is not set, it uses the
+           combination of name and ontology name, provided both are
+           set. If they are not, it returns the name alone.
+
+           Note that this is a private method. Call from inheriting
+           classes but not from outside.
+
+ Example :
+ Returns : a string
+ Args    : a Bio::Ontology::TermI compliant object
+
+=cut
+
+sub _unique_termid{
+    my $self = shift;
+    my $term = shift;
+
+    return $term->identifier() if $term->identifier();
+    my $id = $term->ontology->name() if $term->ontology();
+    if($id) { 
+		 $id .= '|'; 
+    } else { 
+		 $id = ''; 
+    }
+    $id .= $term->name();
+}
+
+
+#################################################################
+# aliases
+#################################################################
+
+*get_relationship_types = \&get_predicate_terms;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Term.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Term.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/Term.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,826 @@
+# $Id: Term.pm,v 1.31.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::Term
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::Term - implementation of the interface for ontology terms
+
+=head1 SYNOPSIS
+
+#get Bio::Ontology::TermI somehow.
+
+  print $term->identifier(), "\n";
+  print $term->name(), "\n";
+  print $term->definition(), "\n";
+  print $term->is_obsolete(), "\n";
+  print $term->comment(), "\n";
+
+  foreach my $synonym ( $term->each_synonym() ) {
+      print $synonym, "\n";
+  }
+
+=head1 DESCRIPTION
+
+This is a simple implementation for ontology terms providing basic
+methods (it provides no functionality related to graphs). It
+implements the L<Bio::Ontology::TermI> interface.
+
+This class also implements L<Bio::IdentifiableI> and
+L<Bio::DescribableI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address:
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Ontology::Term;
+use strict;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::OntologyStore;
+
+use constant TRUE    => 1;
+use constant FALSE   => 0;
+
+use base qw(Bio::Root::Root Bio::Ontology::TermI Bio::IdentifiableI Bio::DescribableI);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $term = Bio::Ontology::Term->new(
+                -identifier  => "16847",
+                -name        => "1-aminocyclopropane-1-carboxylate synthase",
+                -definition  => "Catalysis of ...",
+                -is_obsolete => 0,
+                -comment     => "" );
+ Function: Creates a new Bio::Ontology::Term.
+ Returns : A new Bio::Ontology::Term object.
+ Args    : -identifier            => the identifier of this term [scalar]
+           -name                  => the name of this term [scalar]
+           -definition            => the definition of this term [scalar]
+           -ontology              => the ontology this term lives in
+                                     (a Bio::Ontology::OntologyI object)
+           -version               => version information [scalar]
+           -is_obsolete           => the obsoleteness of this term [0 or 1]
+           -comment               => a comment [scalar]
+           -dblinks               => Bio::Annotation::DBLink objects
+                                     [reference to array]
+           -references            => Bio::Annotation::Reference objects
+                                     [reference to array]
+
+See L<Bio::Ontology::OntologyI>, L<Bio::Annotation::Reference>,
+L<Bio::Annotation::DBLink>.
+
+=cut
+
+sub new {
+
+    my( $class, at args ) = @_;
+
+    my $self = $class->SUPER::new( @args );
+    my ( $identifier,
+         $name,
+         $definition,
+         $category,
+                        $ont,
+         $version,
+         $is_obsolete,
+         $comment,
+                        $dblinks,
+                        $references)
+        = $self->_rearrange( [ qw( IDENTIFIER
+                                                                                NAME
+                                                                                DEFINITION
+                                                                                CATEGORY
+                                                                                ONTOLOGY
+                                                                                VERSION
+                                                                                IS_OBSOLETE
+                                                                                COMMENT
+                                                                                DBLINKS
+                                                                                REFERENCES
+       ) ], @args );
+
+    $self->init();
+
+    defined($identifier)   && $self->identifier( $identifier );
+    defined($name)         && $self->name( $name );
+    defined($definition)   && $self->definition( $definition );
+    defined($category)     && $self->category( $category );
+    defined($ont)          && $self->ontology( $ont );
+    defined($version)      && $self->version( $version );
+    defined($is_obsolete)  && $self->is_obsolete( $is_obsolete );
+    defined($comment)      && $self->comment( $comment  );
+    ref($dblinks)          && $self->add_dblink(@$dblinks);
+    ref($references)       && $self->add_reference(@$references);
+
+    return $self;
+} # new
+
+
+
+sub init {
+
+    my $self = shift;
+
+    $self->identifier(undef);
+    $self->name(undef);
+    $self->comment(undef);
+    $self->definition(undef);
+    $self->ontology(undef);
+    $self->is_obsolete(0);
+    $self->remove_synonyms();
+    $self->remove_dblinks();
+    $self->remove_references;
+    $self->remove_secondary_ids();
+
+} # init
+
+
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $term->identifier( "GO:0003947" );
+           or
+           print $term->identifier();
+ Function: Set/get for the identifier of this Term.
+ Returns : The identifier [scalar].
+ Args    : The identifier [scalar] (optional).
+
+=cut
+
+sub identifier {
+    my $self = shift;
+
+    return $self->{'identifier'} = shift if @_;
+    return $self->{'identifier'};
+} # identifier
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $term->name( "N-acetylgalactosaminyltransferase" );
+           or
+           print $term->name();
+ Function: Set/get for the name of this Term.
+ Returns : The name [scalar].
+ Args    : The name [scalar] (optional).
+
+=cut
+
+sub name {
+    my $self = shift;
+
+    return $self->{'name'} = shift if @_;
+    return $self->{'name'};
+} # name
+
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $term->definition( "Catalysis of ..." );
+           or
+           print $term->definition();
+ Function: Set/get for the definition of this Term.
+ Returns : The definition [scalar].
+ Args    : The definition [scalar] (optional).
+
+=cut
+
+sub definition {
+    my $self = shift;
+
+    return $self->{'definition'} = shift if @_;
+    return $self->{'definition'};
+} # definition
+
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $ont = $term->ontology();
+           or
+           $term->ontology( $ont );
+ Function: Get the ontology this term is in.
+
+           Note that with the ontology in hand you can query for all
+           related terms etc.
+
+ Returns : The ontology of this Term as a Bio::Ontology::OntologyI
+           implementing object.
+ Args    : On set, the  ontology of this Term as a Bio::Ontology::OntologyI
+           implementing object or a string representing its name.
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub ontology {
+    my $self = shift;
+    my $ont;
+
+    if(@_) {
+        $ont = shift;
+        if($ont) {
+            $ont = Bio::Ontology::Ontology->new(-name => $ont) if ! ref($ont);
+            if(! $ont->isa("Bio::Ontology::OntologyI")) {
+                $self->throw(ref($ont)." does not implement ".
+                             "Bio::Ontology::OntologyI. Bummer.");
+            }
+        }
+        return $self->{"_ontology"} = $ont;
+    }
+    return $self->{"_ontology"};
+} # ontology
+
+
+=head2 version
+
+ Title   : version
+ Usage   : $term->version( "1.00" );
+           or
+           print $term->version();
+ Function: Set/get for version information.
+ Returns : The version [scalar].
+ Args    : The version [scalar] (optional).
+
+=cut
+
+sub version {
+    my $self = shift;
+
+    return $self->{'version'} = shift if @_;
+    return $self->{'version'};
+} # version
+
+
+
+=head2 is_obsolete
+
+ Title   : is_obsolete
+ Usage   : $term->is_obsolete( 1 );
+           or
+           if ( $term->is_obsolete() )
+ Function: Set/get for the obsoleteness of this Term.
+ Returns : the obsoleteness [0 or 1].
+ Args    : the obsoleteness [0 or 1] (optional).
+
+=cut
+
+sub is_obsolete{
+    my $self = shift;
+
+    return $self->{'is_obsolete'} = shift if @_;
+    return $self->{'is_obsolete'};
+} # is_obsolete
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $term->comment( "Consider the term ..." );
+           or
+           print $term->comment();
+ Function: Set/get for an arbitrary comment about this Term.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment{
+    my $self = shift;
+
+    return $self->{'comment'} = shift if @_;
+    return $self->{'comment'};
+} # comment
+
+
+
+
+=head2 get_synonyms
+
+ Title   : get_synonyms
+ Usage   : @aliases = $term->get_synonyms;
+ Function: Returns a list of aliases of this Term.
+ Returns : A list of aliases [array of [scalar]].
+ Args    :
+
+=cut
+
+sub get_synonyms {
+    my $self = shift;
+
+    return @{ $self->{ "_synonyms" } } if exists($self->{ "_synonyms" });
+    return ();
+} # get_synonyms
+
+
+=head2 add_synonym
+
+ Title   : add_synonym
+ Usage   : $term->add_synonym( @asynonyms );
+           or
+           $term->add_synonym( $synonym );
+ Function: Pushes one or more synonyms into the list of synonyms.
+ Returns :
+ Args    : One synonym [scalar] or a list of synonyms [array of [scalar]].
+
+=cut
+
+sub add_synonym {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    # avoid duplicates
+    foreach my $syn (@values) {
+        next if grep { $_ eq $syn; } @{$self->{ "_synonyms" }};
+        push( @{ $self->{ "_synonyms" } }, $syn );
+    }
+
+} # add_synonym
+
+
+=head2 remove_synonyms
+
+ Title   : remove_synonyms()
+ Usage   : $term->remove_synonyms();
+ Function: Deletes (and returns) the synonyms of this Term.
+ Returns : A list of synonyms [array of [scalar]].
+ Args    :
+
+=cut
+
+sub remove_synonyms {
+    my ( $self ) = @_;
+
+    my @a = $self->get_synonyms();
+    $self->{ "_synonyms" } = [];
+    return @a;
+
+} # remove_synonyms
+
+=head2 get_dblinks
+
+ Title   : get_dblinks()
+ Usage   : @ds = $term->get_dblinks();
+ Function: Returns a list of each dblinks of this GO term.
+ Returns : A list of dblinks [array of [scalars]].
+ Args    : A scalar indicating the context (optional).
+           If omitted, all dblinks will be returned.
+
+=cut
+
+sub get_dblinks {
+    my $self = shift;
+    my $context = shift;
+
+    if (defined($context)) {
+        return @{$self->{_dblinks}->{$context}}
+            if exists($self->{_dblinks}->{$context});
+    } else {
+        return map { @$_ } values %{$self->{_dblinks}};
+    }
+    return ();
+} # get_dblinks
+
+
+=head2 get_dblink_context
+
+  Title   : get_dblink_context
+  Usage   : @context = $term->get_dblink_context;
+  Function: Return all context existing in Term
+  Returns : a list of scalar
+  Args    : [none]
+
+=cut
+
+sub get_dblink_context {
+    my $self=shift;
+    return keys %{$self->{_dblinks}};
+}
+
+=head2 add_dblink
+
+ Title   : add_dblink
+ Usage   : $term->add_dblink( @dbls );
+           or
+           $term->add_dblink( $dbl );
+ Function: Pushes one or more dblinks onto the list of dblinks.
+ Returns :
+ Args    : One  dblink [scalar] or a list of
+            dblinks [array of [scalars]].
+
+=cut
+
+sub add_dblink {
+    my $self = shift;
+    $self->add_dblink_context($_,'_default') foreach @_;
+} # add_dblink
+
+
+=head2 has_dblink
+
+  Title   : has_dblink
+  Usage   : $term->has_dblink($dblink);
+  Function: Checks if a DBXref is already existing in the OBOterm object
+  Return  : TRUE/FALSE
+  Args    : [arg1] A DBxref identifier
+
+=cut
+
+sub has_dblink {
+    my ( $self, $value ) = @_;
+    return unless defined $value;
+    my $context = "_default";
+    $self->throw("'all' is a reserved word for context.") if $context eq 'all';
+    $context ||= '_default';
+    if ( ( $self->{_dblinks}->{$context} ) && grep { $_ eq $value }
+        @{ $self->{_dblinks}->{$context} } )
+    {
+        return TRUE;
+    }
+    else {
+        return FALSE;
+    }
+}
+
+
+=head2 add_dblink_context
+
+  Title   : add_dblink_context
+  Usage   : $term->add_dblink_context($db, $context);
+  Function: add a dblink with its context
+  Return  : [none]
+  Args    : [arg1] an object of Bio::Annotation::DBLink
+            [arg2] a string for context; if omitted, the
+                   default/context-less one will be used.
+
+=cut
+
+sub add_dblink_context {
+    my ($self, $value, $context)=@_;
+    return unless defined $value;
+    $self->throw("'all' is a reserved word for context.") if $context eq 'all';
+    $context ||= '_default';
+    if (! exists($self->{_dblinks}->{$context})) {
+        $self->{_dblinks}->{$context} = [];
+    }
+    if (grep {$_ eq $value} @{$self->{_dblinks}->{$context}}) {
+        $self->warn("$value exists in the dblink of $context");
+    }
+    push @{$self->{_dblinks}->{$context}}, $value;
+}
+
+=head2 remove_dblinks
+
+ Title   : remove_dblinks()
+ Usage   : $term->remove_dblinks();
+ Function: Deletes (and returns) the definition references of this GO term.
+ Returns : A list of definition references [array of [scalars]].
+ Args    : Context. If omitted or equal to 'all', all dblinks
+           will be removed.
+
+=cut
+
+sub remove_dblinks {
+    my ($self, $context) = @_;
+    $context = undef if $context && ($context eq "all");
+    my @old = $self->get_dblinks($context);
+    if (defined($context)) {
+        $self->{_dblinks}->{$context}=[];
+    } else {
+        $self->{_dblinks} = {};
+    }
+    return @old;
+} # remove_dblinks
+
+
+=head2 get_references
+
+  Title   : get_references
+  Usage   : @references = $self->get_references
+  Fuctnion: Returns a list of references
+  Return  : A list of objects
+  Args    : [none]
+
+=cut
+
+sub get_references {
+    my $self=shift;
+    return @{$self->{_references}} if exists $self->{_references};
+    return ();
+}
+
+=head2 add_reference
+
+  Title   : add_reference
+  Usage   : $self->add_reference($reference);
+            $self->add_reference($reference1, $reference2);
+  Fuctnion: Add one or more references
+  Returns : [none]
+
+=cut
+
+sub add_reference {
+    my ($self, @values) =@_;
+    return unless @values;
+    # avoid duplicates and undefs
+    foreach my $reference (@values){
+        next unless $reference;
+        next if grep{$_ eq $reference} @{$self->{_references}};
+        push @{$self->{_references}}, $reference;
+    }
+}
+
+=head2 remove_references
+
+  Title   : remove_references
+  Usage   : $self->remove_references;
+  Function: Deletes (and returns) all references
+  Returns : A list of references
+  Args    : [none]
+
+=cut
+
+sub remove_references {
+    my $self=shift;
+    my @references=$self->get_references;
+    $self->{_references}=[];
+    return @references;
+}
+
+=head2 get_secondary_ids
+
+ Title   : get_secondary_ids
+ Usage   : @ids = $term->get_secondary_ids();
+ Function: Returns a list of secondary identifiers of this Term.
+
+           Secondary identifiers mostly originate from merging terms,
+           or possibly also from splitting terms.
+
+ Returns : A list of secondary identifiers [array of [scalar]]
+ Args    :
+
+=cut
+
+sub get_secondary_ids {
+    my $self = shift;
+
+    return @{$self->{"_secondary_ids"}} if exists($self->{"_secondary_ids"});
+    return ();
+} # get_secondary_ids
+
+
+=head2 add_secondary_id
+
+ Title   : add_secondary_id
+ Usage   : $term->add_secondary_id( @ids );
+           or
+           $term->add_secondary_id( $id );
+ Function: Adds one or more secondary identifiers to this term.
+ Returns :
+ Args    : One or more secondary identifiers [scalars]
+
+=cut
+
+sub add_secondary_id {
+    my $self = shift;
+
+    return unless @_;
+
+    # avoid duplicates
+    foreach my $id (@_) {
+        next if grep { !$_ or $_ eq $id; } @{$self->{ "_secondary_ids" }};
+        push( @{ $self->{ "_secondary_ids" } }, $id );
+    }
+
+} # add_secondary_id
+
+
+=head2 remove_secondary_ids
+
+ Title   : remove_secondary_ids
+ Usage   : $term->remove_secondary_ids();
+ Function: Deletes (and returns) the secondary identifiers of this Term.
+ Returns : The previous list of secondary identifiers [array of [scalars]]
+ Args    :
+
+=cut
+
+sub remove_secondary_ids {
+    my $self = shift;
+
+    my @a = $self->get_secondary_ids();
+    $self->{ "_secondary_ids" } = [];
+    return @a;
+
+} # remove_secondary_ids
+
+
+# Title   :_is_true_or_false
+# Function: Checks whether the argument is TRUE or FALSE.
+# Returns :
+# Args    : The value to be checked.
+sub _is_true_or_false {
+    my ( $self, $value ) = @_;
+    unless ( $value !~ /\D/ && ( $value == TRUE || $value == FALSE ) ) {
+        $self->throw( "Found [" . $value
+        . "] where " . TRUE . " or " . FALSE . " expected" );
+    }
+} # _is_true_or_false
+
+=head1 Methods implementing L<Bio::IdentifiableI> and L<Bio::DescribableI>
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object.
+
+           This is a synonym for identifier().
+
+ Returns : A scalar
+
+=cut
+
+sub object_id {
+    return shift->identifier(@_);
+}
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for
+           organisation (eg, wormbase.org)
+
+           This forwards to ontology()->authority(). Note that you
+           cannot set the authority before having set the ontology or
+           the namespace (which will set the ontology).
+
+ Returns : A scalar
+ Args    : on set, the new value (a scalar)
+
+=cut
+
+sub authority {
+    my $self = shift;
+    my $ont = $self->ontology();
+
+    return $ont->authority(@_) if $ont;
+    $self->throw("cannot manipulate authority prior to ".
+                 "setting the namespace or ontology") if @_;
+    return;
+}
+
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection.
+
+           This forwards to ontology() (set mode) and
+           ontology()->name() (get mode). I.e., setting the namespace
+           will set the ontology to one matching that name in the
+           ontology store, or to one newly created.
+
+ Returns : A scalar
+ Args    : on set, the new value (a scalar)
+
+=cut
+
+sub namespace {
+    my $self = shift;
+
+    $self->ontology(@_) if(@_);
+    my $ont = $self->ontology();
+    return defined($ont) ? $ont->name() : undef;
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user.
+
+           The definition in Bio::DescribableI states that the
+           string should not contain spaces. As this is not very
+           sensible for ontology terms, we relax this here. The
+           implementation just forwards to name().
+
+ Returns : A scalar
+ Args    : on set, the new value (a scalar)
+
+=cut
+
+sub display_name {
+    return shift->name(@_);
+}
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text.
+
+           This forwards to definition(). The caveat is that the text
+           will often be longer for ontology term definitions than the
+           255 characters stated in the definition in
+           Bio::DescribableI.
+
+ Returns : A scalar
+ Args    : on set, the new value (a scalar)
+
+=cut
+
+sub description {
+    return shift->definition(@_);
+}
+
+#################################################################
+# aliases or forwards to maintain backward compatibility
+#################################################################
+
+=head1  Deprecated methods
+
+Used for looking up the methods that supercedes them.
+
+=cut
+
+*each_synonym = \&get_synonyms;
+*add_synonyms = \&add_synonym;
+*each_dblink = \&get_dblinks;
+*add_dblinks = \&add_dblink;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,137 @@
+# $Id: TermFactory.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::TermFactory
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::TermFactory - Instantiates a new 
+Bio::Ontology::TermI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Ontology::TermFactory;
+
+    # the default type is Bio::Ontology::Term
+    my $factory = new Bio::Ontology::TermFactory(
+                        -type => 'Bio::Ontology::GOterm');
+    my $term = $factory->create_object(-name => 'peroxisome',
+                                       -ontology => 'Gene Ontology',
+                                       -identifier => 'GO:0005777');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::Ontology::TermI> objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Ontology::TermFactory;
+use strict;
+
+use Bio::Root::Root;
+
+use base qw(Bio::Factory::ObjectFactory);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Ontology::TermFactory();
+ Function: Builds a new Bio::Ontology::TermFactory object 
+ Returns : Bio::Ontology::TermFactory
+ Args    : -type => string, name of a Bio::Ontology::TermI derived class.
+                    The default is Bio::Ontology::Term.
+
+See L<Bio::Ontology::TermI>, L<Bio::Ontology::Term>.
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+  
+    # make sure this matches our requirements
+    $self->interface("Bio::Ontology::TermI");
+    $self->type($self->type() || "Bio::Ontology::Term");
+
+    return $self;
+}
+
+
+=head2 create_object
+
+ Title   : create_object
+ Usage   : my $term = $factory->create_object(<named parameters>);
+ Function: Instantiates new Bio::Ontology::TermI (or one of its child classes)
+
+           This object allows us to genericize the instantiation of
+           Term objects.
+
+ Returns : Bio::Ontology::TermI compliant object
+           The return type is configurable using new(-type =>"...").
+ Args    : initialization parameters specific to the type of term
+           object we want.  Typically 
+           -name        => $name
+           -identifier  => identifier for the term
+           -ontology    => ontology for the term
+
+See L<Bio::Ontology::TermI>.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Ontology/TermI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,352 @@
+# $Id: TermI.pm,v 1.17.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Ontology::Term
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Ontology::TermI - interface for ontology terms
+
+=head1 SYNOPSIS
+
+  #get Bio::Ontology::TermI somehow.
+
+  print $term->identifier(), "\n";
+  print $term->name(), "\n";
+  print $term->definition(), "\n";
+  print $term->is_obsolete(), "\n";
+  print $term->comment(), "\n";
+
+  foreach my $synonym ( $term->get_synonyms() ) {
+      print $synonym, "\n";
+  }
+
+
+=head1 DESCRIPTION
+
+This is "dumb" interface for ontology terms providing basic methods
+(it provides no functionality related to graphs).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Ontology::TermI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+
+
+=head2 identifier
+
+ Title   : identifier
+ Usage   : $term->identifier( "0003947" );
+           or
+           print $term->identifier();
+ Function: Set/get for the identifier of this Term.
+ Returns : The identifier [scalar].
+ Args    : The identifier [scalar] (optional).
+
+=cut
+
+sub identifier {
+    shift->throw_not_implemented();
+} # identifier
+
+
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $term->name( "N-acetylgalactosaminyltransferase" );
+           or
+           print $term->name();
+ Function: Set/get for the name of this Term.
+ Returns : The name [scalar].
+ Args    : The name [scalar] (optional).
+
+=cut
+
+sub name {
+    shift->throw_not_implemented();
+} # name
+
+
+
+
+
+=head2 definition
+
+ Title   : definition
+ Usage   : $term->definition( "Catalysis of ..." );
+           or
+           print $term->definition();
+ Function: Set/get for the definition of this Term.
+ Returns : The definition [scalar].
+ Args    : The definition [scalar] (optional).
+
+=cut
+
+sub definition {
+    shift->throw_not_implemented();
+} # definition
+
+
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   : $ont = $term->ontology();
+           or 
+           $term->ontology( $ont );
+ Function: Get the ontology this term is in.
+
+           An implementation may not permit the value of this
+           attribute to be changed once it is set, since that may have
+           serious consequences (note that with the ontology in hand
+           you can query for all related terms etc).
+
+           Note for implementors: you will almost certainly have to
+           take special precaution in order not to create cyclical
+           references in memory.
+
+ Returns : The ontology of this Term as a Bio::Ontology::OntologyI
+           implementing object.
+ Args    : On set, the  ontology of this Term as a Bio::Ontology::OntologyI
+           implementing object or a string representing its name.
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+sub ontology {
+    shift->throw_not_implemented();
+} # ontology
+
+
+
+=head2 version
+
+ Title   : version
+ Usage   : $term->version( "1.00" );
+           or 
+           print $term->version();
+ Function: Set/get for version information.
+ Returns : The version [scalar].
+ Args    : The version [scalar] (optional).
+
+=cut
+
+sub version {
+    shift->throw_not_implemented();
+} # version
+
+
+
+
+=head2 is_obsolete
+
+ Title   : is_obsolete
+ Usage   : $term->is_obsolete( 1 );
+           or
+           if ( $term->is_obsolete() )
+ Function: Set/get for the obsoleteness of this Term.
+ Returns : the obsoleteness [0 or 1].
+ Args    : the obsoleteness [0 or 1] (optional).
+
+=cut
+
+sub is_obsolete {
+    shift->throw_not_implemented();
+} # is_obsolete
+
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $term->comment( "Consider the term ..." );
+           or 
+           print $term->comment();
+ Function: Set/get for an arbitrary comment about this Term.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment {
+    shift->throw_not_implemented();
+} # comment
+
+
+
+
+=head2 get_synonyms
+
+ Title   : get_synonyms
+ Usage   : @aliases = $term->get_synonyms();
+ Function: Returns a list of aliases of this Term.
+
+           If an implementor of this interface permits modification of
+           this array property, the class should define at least
+           methods add_synonym() and remove_synonyms(), with obvious
+           functionality.
+
+ Returns : A list of aliases [array of [scalar]].
+ Args    :
+
+=cut
+
+sub get_synonyms {
+    shift->throw_not_implemented();
+} # get_synonyms
+
+=head2 get_dblinks
+
+ Title   : get_dblinks()
+ Usage   : @ds = $term->get_dblinks();
+ Function: Returns a list of each dblink of this term.
+
+           If an implementor of this interface permits modification of
+           this array property, the class should define at least
+           methods add_dblink() and remove_dblinks(), with obvious
+           functionality.
+
+ Returns : A list of dblinks [array of [scalars]].
+ Args    :
+
+=cut
+
+sub get_dblinks {
+    shift->throw_not_implemented();
+} # get_dblinks
+
+=head2 get_secondary_ids
+
+ Title   : get_secondary_ids
+ Usage   : @ids = $term->get_secondary_ids();
+ Function: Returns a list of secondary identifiers of this Term.
+
+           Secondary identifiers mostly originate from merging terms,
+           or possibly also from splitting terms.
+
+           If an implementor of this interface permits modification of
+           this array property, the class should define at least
+           methods add_secondary_id() and remove_secondary_ids(), with
+           obvious functionality.
+
+ Returns : A list of secondary identifiers [array of [scalar]]
+ Args    :
+
+=cut
+
+sub get_secondary_ids {
+    shift->throw_not_implemented();
+} # get_secondary_ids
+
+
+=head1  Deprecated methods
+
+Used for looking up the methods that supercedes them.
+
+=cut
+
+=head2 category
+
+ Title   : category
+ Usage   :
+ Function: This method is deprecated. Use ontology() instead. We provide
+           an implementation here that preserves backwards compatibility,
+           but if you do not have legacy code using it you should not be
+           calling this method.
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub category {
+    my $self = shift;
+
+    $self->warn("TermI::category is deprecated and being phased out. ".
+		"Use TermI::ontology instead.");
+
+    # called in set mode?
+    if(@_) {
+	# yes; what is incompatible with ontology() is if we were given
+	# a TermI object
+	my $arg = shift;
+	$arg = $arg->name() if ref($arg) && $arg->isa("Bio::Ontology::TermI");
+	return $self->ontology($arg, at _);
+    } else {
+	# No, called in get mode. This is always incompatible with ontology()
+	# since category is supposed to return a TermI.
+	my $ont = $self->ontology();
+	my $term;
+	if(defined($ont)) {
+	    $term = Bio::Ontology::Term->new(-name => $ont->name(),
+					     -identifier =>$ont->identifier());
+	}
+	return $term;
+    }
+} # category
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/BaseSAXHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/BaseSAXHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/BaseSAXHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,228 @@
+# $Id: BaseSAXHandler.pm,v 1.4.6.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for BaseSAXHandler
+#
+# Cared for by Juguang Xiao, juguang at tll.org.sg
+#
+# Copyright Juguang Xiao 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::Handlers::BaseSAXHandler
+
+=head1 DESCRIPTION
+
+This module is an abstract module, serving as the base of any SAX Handler 
+implementation. It tries to offer the framework that SAX handlers generally 
+need, such as tag_stack, char_store, etc.
+
+In the implementation handler, you can take advantage of this based module by
+the following suggestions.
+
+1) In start_element,
+
+ sub start_element {
+     my $self=shift;
+     my $tag=$_[0]->{Name};
+     my %args=%{$_[0]->{Attributes}};
+     # Your code here.
+
+     # Before you conclude the method, write these 2 line.
+     $self->_visited_count_inc($tag);
+     $self->_push_tag($tag);
+ }
+
+2) In end_element,
+
+ sub end_element {
+     my $self=shift;
+     my $tag=shift->{Name};
+     # Your code here.
+
+     # Before you conclude the method, write these 2 lines.
+     $self->_visited_count_dec($tag);
+     $self->_pop_tag;
+ }
+
+3) In characters, or any other methods where you may use the tag
+stack or count
+
+ sub characters {
+     my $self=shift;
+     my $text=shift->{Data};
+
+     $self->_chars_hash->{$self->_top_tag} .= $text;
+
+ }
+ $count = $self->_visited_count('myTag');
+ $tag = $self->_top_tag;
+
+
+=head1 AUTHOR
+
+Juguang Xiao, juguang at tll.org.sg
+
+=head2 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Interal methods are usually preceded with a _
+
+=cut
+
+package Bio::OntologyIO::Handlers::BaseSAXHandler;
+use strict;
+use base qw(Bio::Root::Root);
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self=$class->SUPER::new(@args);
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+    my $self = shift;
+    $self->{_tag_stack} = [];
+    $self->{_visited_count} = {};
+    $self->{_chars_hash} = {};
+    $self->{_current_hash} = {};
+}
+
+=head2 _tag_stack
+
+  Title   : _tag_stack
+  Usage   : @tags = $self->_tag_stack;
+  Function: Get an array of tags that have been accessed but not enclosed.
+  Return  : 
+  Args    :    
+
+=cut
+
+sub _tag_stack {
+    return @{shift->{_tag_stack}};
+}
+
+=head2 _push_tag
+
+=cut
+
+sub _push_tag {
+    my($self,$tag)=@_;
+    push @{$self->{_tag_stack}}, $tag;
+}
+
+=head2 _pop_tag
+
+=cut
+
+sub _pop_tag {
+    my $self=shift;
+    return pop @{$self->{_tag_stack}};
+}
+
+=head2 _top_tag
+
+  Title   : _top_tag
+  Usage   : $top = $self->_top_tag;
+  Function: get the top tag in the tag stack.
+  Return  : a tag name
+  Args    : [none]   
+
+=cut
+
+sub _top_tag {
+    my $self = shift;
+    my @stack=@{$self->{_tag_stack}};
+    return $stack[-1];
+# get the last element in an array while remaining it in. There are few  ways
+# 1) $stack[-1]
+# 2) $stack[$#stack]
+# 3) $stack[@stack-1]
+}
+
+
+=head2 _chars_hash
+
+  Title   : _chars_hash
+  Usage   : $hash= $self->_chars_hash;
+  Function: return the character cache for the specific tag
+  Return  : a hash reference, which is intent for character storage for tags
+  Args    : [none]
+
+=cut
+
+sub _chars_hash {
+    return shift->{_chars_hash};
+}
+
+=head2 _current_hash
+
+=cut
+
+sub _current_hash {
+    return  shift->{_current_hash};
+}
+
+=head2 _visited_count_inc
+
+  Title   : _vistied_count_inc
+  Usage   : $self->vistied_count_inc($tag); # the counter for the tag increase
+  Function: the counter for the tag increase
+  Return  : the current count after this increment
+  Args    : the tag name [scalar]
+
+=cut
+
+sub _visited_count_inc {
+    my ($self, $tag) = @_;
+    my $visited_count=$self->{_visited_count};
+    if(exists $visited_count->{$tag}){
+        $visited_count->{$tag}++;
+    }else{
+        $visited_count->{$tag}=1;
+    }
+    return $visited_count->{$tag};
+}
+
+=head2 _visited_count_dec
+
+  Title   : _visited_count_dec
+  Usage   : $self->_visited_count_dec($tag);
+  Function: the counter for the tag decreases by one
+  Return  : the current count for the specific tag after the decrement
+  Args    : the tag name [scalar]
+
+=cut
+
+sub _visited_count_dec {
+    my ($self, $tag) = @_;
+    my $visited_count=$self->{_visited_count};
+    if(exists $visited_count->{$tag}){
+        $visited_count->{$tag}--;
+    }else{
+        $self->throw("'$tag' has not been visited yet. How to decrease it?!");
+    }
+    return $visited_count->{$tag};
+}
+
+=head2 _visited_count
+
+  Title   : _visited_count
+  Usage   : $count = $self->_visited_count
+  Function: return the counter for the tag
+  Return  : the current counter for the specific tag
+  Args    : the tag name [scalar]
+
+=cut
+
+sub _visited_count {
+    my ($self, $tag) = @_;
+    return $self->{_visited_count}->{$tag};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterProHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterProHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterProHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,722 @@
+# $Id: InterProHandler.pm,v 1.15.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for InterProHandler
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# Copyright Peter Dimitrov
+# (c) Peter Dimitrov, dimitrov at gnf.org, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::Handlers::InterProHandler - XML handler class for InterProParser
+
+=head1 SYNOPSIS
+
+ # do not use directly - used and instantiated by InterProParser
+
+=head1 DESCRIPTION
+
+Handles xml events generated by InterProParser when parsing InterPro
+XML files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 CONTRIBUTORS
+
+Juguang Xiao, juguang at tll.org.sg
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::OntologyIO::Handlers::InterProHandler;
+use strict;
+use Carp;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::RelationshipType;
+use Bio::Ontology::SimpleOntologyEngine;
+use Bio::Annotation::Reference;
+use Data::Dumper;
+
+use base qw(Bio::Root::Root);
+
+my ($record_count, $processed_count, $is_a_rel, $contains_rel, $found_in_rel);
+
+=head2 new
+
+ Title   : new
+ Usage   : $h = Bio::OntologyIO::Handlers::InterProHandler->new;
+ Function: Initializes global variables
+ Example :
+ Returns : an InterProHandler object
+ Args    :
+
+
+=cut
+
+sub new{
+  my ($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+
+  my ($eng,$ont,$name,$fact) =
+      $self->_rearrange([qw(ENGINE
+			    ONTOLOGY
+			    ONTOLOGY_NAME
+			    TERM_FACTORY)
+			 ], at args);
+
+  if(defined($ont)) {
+      $self->ontology($ont);
+  } else {
+      $name = "InterPro" unless $name;
+      $self->ontology(Bio::Ontology::Ontology->new(-name => $name));
+  }
+  $self->ontology_engine($eng) if $eng;
+
+  $self->term_factory($fact) if $fact;
+
+  $is_a_rel = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+  $contains_rel = Bio::Ontology::RelationshipType->get_instance( "CONTAINS" );
+  $found_in_rel = Bio::Ontology::RelationshipType->get_instance( "FOUND_IN" );
+  $is_a_rel->ontology($self->ontology());
+  $contains_rel->ontology($self->ontology());
+  $found_in_rel->ontology($self->ontology());
+  $self->_cite_skip(0);
+  $self->secondary_accessions_map( {} );
+
+  return $self;
+}
+
+=head2 ontology_engine
+
+ Title   : ontology_engine
+ Usage   : $obj->ontology_engine($newval)
+ Function: Get/set ontology engine. Can be initialized only once.
+ Example : 
+ Returns : value of ontology_engine (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub ontology_engine{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    if ( defined $self->{'ontology_engine'}) {
+      $self->throw("ontology_engine already defined");
+    } else {
+      $self->throw(ref($value)." does not implement ".
+		   "Bio::Ontology::OntologyEngineI. Bummer.")
+	  unless $value->isa("Bio::Ontology::OntologyEngineI");
+      $self->{'ontology_engine'} = $value;
+
+      # don't forget to set this as the engine of the ontology, otherwise
+      # those two might not point to the same object
+      my $ont = $self->ontology();
+      if($ont && $ont->can("engine") && (!$ont->engine())) {
+	  $ont->engine($value);
+      }
+
+      $self->debug(ref($self) .
+		   "::ontology_engine: registering ontology engine (".
+		   ref($value)."):\n".
+		   $value->to_string."\n");
+    }
+  }
+
+  return $self->{'ontology_engine'};
+}
+
+=head2 ontology
+
+ Title   : ontology
+ Usage   :
+ Function: Get the ontology to add the InterPro terms to.
+
+           The value is determined automatically once ontology_engine
+           has been set and if it hasn't been set before.
+
+ Example :
+ Returns : A L<Bio::Ontology::OntologyI> implementing object.
+ Args    : On set, a L<Bio::Ontology::OntologyI> implementing object.
+
+=cut
+
+sub ontology{
+    my ($self,$ont) = @_;
+
+    if(defined($ont)) {
+	$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI".
+		     ". Bummer.")
+	    unless $ont->isa("Bio::Ontology::OntologyI");
+	$self->{'_ontology'} = $ont;
+    } 
+    return $self->{'_ontology'};
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $obj->term_factory($newval)
+ Function: Get/set the ontology term object factory 
+ Example : 
+ Returns : value of term_factory (a Bio::Factory::ObjectFactory instance)
+ Args    : on set, new value (a Bio::Factory::ObjectFactory instance
+           or undef, optional)
+
+
+=cut
+
+sub term_factory{
+    my $self = shift;
+
+    return $self->{'term_factory'} = shift if @_;
+    return $self->{'term_factory'};
+}
+
+=head2 _cite_skip
+
+ Title   : _cite_skip
+ Usage   : $obj->_cite_skip($newval)
+ Function: 
+ Example : 
+ Returns : value of _cite_skip (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _cite_skip{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'_cite_skip'} = $value;
+  }
+
+  return $self->{'_cite_skip'};
+}
+
+=head2 _hash
+
+ Title   : _hash
+ Usage   : $obj->_hash($newval)
+ Function: 
+ Example : 
+ Returns : value of _hash (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _hash{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'_hash'} = $value;
+  }
+
+  return $self->{'_hash'};
+}
+
+=head2 _stack
+
+ Title   : _stack
+ Usage   : $obj->_stack($newval)
+ Function: 
+ Example : 
+ Returns : value of _stack (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _stack{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'_stack'} = $value;
+  }
+  return $self->{'_stack'};
+}
+
+=head2 _top
+
+ Title   : _top
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _top{
+  my ($self, $_stack) = @_;
+  my @stack = @{$_stack};
+
+  return (@stack >= 1) ? $stack[@stack - 1] : undef;
+}
+
+=head2 _term
+
+ Title   : _term
+ Usage   : $obj->_term($newval)
+ Function: Get/set method for the term currently processed.
+ Example : 
+ Returns : value of term (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _term{
+  my ($self, $value) = @_;
+
+  if(defined $value) {
+    $self->{'_term'} = $value;
+  }
+
+  return $self->{'_term'};
+}
+
+=head2 _clear_term
+
+ Title   : _clear_term
+ Usage   :
+ Function: Removes the current term from the handler
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _clear_term{
+  my ($self) = @_;
+
+  delete $self->{'_term'};
+}
+
+=head2 _names
+
+ Title   : _names
+ Usage   : $obj->_names($newval)
+ Function: 
+ Example : 
+ Returns : value of _names (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _names{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'_names'} = $value;
+  }
+
+  return $self->{'_names'};
+}
+
+=head2 _create_relationship
+
+ Title   : _create_relationship
+ Usage   :
+ Function: Helper function. Adds relationships to one of the relationship stores.
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _create_relationship{
+  my ($self, $ref_id, $rel_type_term) = @_;
+  my $ont = $self->ontology();
+  my $fact = $self->term_factory();
+  my $term_temp = ($ont->engine->get_term_by_identifier($ref_id))[0];
+
+  my $rel = Bio::Ontology::Relationship->new( -predicate_term => $rel_type_term );
+	
+  if (!defined $term_temp) {
+    $term_temp = $ont->engine->add_term( $fact->create_object( -InterPro_id => $ref_id , -name=>$ref_id, -ontology=>$ont) );
+    $ont->engine->mark_uninstantiated($term_temp);
+  }
+  my $rel_type_name = $self->_top($self->_names);
+
+  if ($rel_type_name eq 'parent_list' || $rel_type_name eq 'found_in') {
+    $rel->object_term( $term_temp );
+    $rel->subject_term( $self->_term );
+  } else {
+    $rel->object_term( $self->_term );
+    $rel->subject_term( $term_temp );
+  }
+  $rel->ontology($ont);
+  $ont->add_relationship($rel);
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   :
+ Function: This is a method that is derived from XML::SAX::Base and
+           has to be overridden for processing start of xml element
+           events. Used internally only.
+
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub start_element {
+  my ($self, $element) = @_;
+  my $ont = $self->ontology();
+  my $fact = $self->term_factory();
+
+  if ($element->{Name} eq 'interprodb') {
+    $ont->add_term($fact->create_object(-identifier => "Active_site",
+					-name => "Active Site") );
+    $ont->add_term($fact->create_object(-identifier => "Binding_site",
+					-name => "Binding Site") );
+    $ont->add_term($fact->create_object(-identifier => "Family",
+					-name => "Family") );
+    $ont->add_term($fact->create_object(-identifier => "Domain",
+					-name => "Domain") );
+    $ont->add_term($fact->create_object(-identifier => "Repeat",
+					-name => "Repeat") );
+    $ont->add_term($fact->create_object(-identifier => "PTM",
+				 -name => "post-translational modification"));
+  } elsif ($element->{Name} eq 'interpro') {
+    my %record_args = %{$element->{Attributes}};
+    my $id = $record_args{"id"};
+    my $term_temp = ($ont->engine->get_term_by_identifier($id))[0];
+
+    $self->_term(
+		 (!defined $term_temp)
+		 ? $ont->add_term( $fact->create_object(-InterPro_id => $id, -name=>$id) )
+		 : $term_temp
+		);
+
+    $self->_term->ontology( $ont );
+    $self->_term->short_name( $record_args{"short_name"} );
+    $self->_term->protein_count( $record_args{"protein_count"} );
+    $self->_increment_record_count();
+    $self->_stack([{ interpro => undef }]);
+    $self->_names(["interpro"]);
+
+    ## Adding a relationship between the newly created InterPro term
+    ## and the term describing its type
+
+    my $rel = Bio::Ontology::Relationship->new( -predicate_term => $is_a_rel );
+    $rel->object_term( ($ont->engine->get_term_by_identifier($record_args{"type"}))[0] );
+    $rel->subject_term( $self->_term );
+    $rel->ontology($ont);
+    $ont->add_relationship($rel);
+  }
+  elsif (defined $self->_stack) {
+    my %hash = ();
+
+    if (keys %{$element->{Attributes}} > 0) {
+      foreach my $key (keys %{$element->{Attributes}}) {
+	$hash{$key} = $element->{Attributes}->{$key};
+      }
+    }
+    push @{$self->_stack}, \%hash;
+    if ($element->{Name} eq 'rel_ref') {
+      my $ref_id = $element->{Attributes}->{"ipr_ref"};
+      my $parent = $self->_top($self->_names);
+
+      if ($parent eq 'parent_list' || $parent eq 'child_list') {
+	$self->_create_relationship($ref_id, $is_a_rel);
+      }
+      if ($parent eq 'contains' ) {
+	$self->_create_relationship($ref_id, $contains_rel);
+      }
+      if ($parent eq 'found_in' ) {
+	$self->_create_relationship($ref_id, $found_in_rel);
+      }
+    }
+    elsif ($element->{Name} eq 'abstract') {
+      $self->_cite_skip(1);
+    }
+    push @{$self->_names}, $element->{Name};
+  }
+
+}
+
+=head2 _char_storage
+
+ Title   : _char_storage
+ Usage   : $obj->_char_storage($newval)
+ Function: 
+ Example : 
+ Returns : value of _char_storage (a scalar)
+ Args    : new value (a scalar, optional)
+
+
+=cut
+
+sub _char_storage{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'_char_storage'} = $value;
+  }
+
+  return $self->{'_char_storage'};
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   :
+ Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing xml characters events. Used internally only.
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub characters {
+  my ($self, $characters) = @_;
+  my $text = $characters->{Data};
+
+  chomp $text;
+  $text =~ s/^(\s+)//;
+  $self->{_char_storage} .= $text;
+
+}
+
+=head2 end_element
+
+ Title   : end_element
+ Usage   :
+ Function: This is a method that is derived from XML::SAX::Base and has to be overridden for processing end of xml element events. Used internally only.
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub end_element {
+  my ($self, $element) = @_;
+
+  if ($element->{Name} eq 'interprodb') {
+    $self->debug("Interpro DB Parser Finished: $record_count read, $processed_count processed\n");
+  }
+  elsif ($element->{Name} eq 'interpro') {
+    $self->_clear_term;
+    $self->_increment_processed_count();
+  }
+  elsif ($element->{Name} ne 'cite') {
+    $self->{_char_storage} =~ s/<\/?p>//g;
+    if ((defined $self->_stack)) {
+      my $current_hash = pop @{$self->_stack};
+      my $parent_hash = $self->_top($self->_stack);
+      my $current_hash_key = pop @{$self->_names};
+
+      if (keys %{$current_hash} > 0 && $self->_char_storage ne "") {
+	$current_hash->{comment} = $self->_char_storage;
+	push @{ $parent_hash->{$current_hash_key} }, $current_hash
+      }
+      elsif ($self->_char_storage ne ""){
+	push @{ $parent_hash->{$current_hash_key} }, { 'accumulated_text_12345' => $self->_char_storage };
+      }
+      elsif (keys %{$current_hash} > 0) {
+	push @{ $parent_hash->{$current_hash_key} }, $current_hash;
+      }
+      if ($element->{Name} eq 'pub_list') {
+	my @refs = ();
+
+	foreach my $pub_record ( @{ $current_hash->{publication} } ) {
+	  my $ref = Bio::Annotation::Reference->new;
+	  my $loc = $pub_record->{location}->[0];
+
+	  $ref->location( $pub_record->{journal}->[0]->{accumulated_text_12345}.", ".$loc->{firstpage}."-".$loc->{lastpage}.", ".$loc->{volume}.", ".$pub_record->{year}->[0]->{accumulated_text_12345});
+	  $ref->title( $pub_record->{title}->[0]->{accumulated_text_12345} );
+	  my $ttt = $pub_record->{author_list}->[0];
+
+	  $ref->authors( $ttt->{accumulated_text_12345} );
+	  $ref->medline( scalar($ttt->{dbkey}) )
+	      if exists($ttt->{db}) && $ttt->{db} eq "MEDLINE";
+	  push @refs, $ref;
+	}
+ 	$self->_term->add_reference(@refs);
+      }
+      elsif ($element->{Name} eq 'name') {
+ 	$self->_term->name( $self->_char_storage );
+      }
+      elsif ($element->{Name} eq 'abstract') {
+	$self->_term->definition( $self->_char_storage );
+	$self->_cite_skip(0);
+      }
+      elsif ($element->{Name} eq 'member_list') {
+	my @refs = ();
+
+	foreach my $db_xref ( @{ $current_hash->{db_xref} } ) {
+	  push @refs, Bio::Annotation::DBLink->new( -database => $db_xref->{db},
+						    -primary_id => $db_xref->{dbkey}
+						  );
+	}
+ 	$self->_term->add_member(@refs);
+      }
+      elsif ($element->{Name} eq 'sec_list') {
+	my @refs = ();
+
+	foreach my $sec_ac ( @{ $current_hash->{sec_ac} } ) {
+	  push @refs, $sec_ac->{sec_ac};
+	}
+ 	$self->_term->add_secondary_id(@refs);
+	$self->secondary_accessions_map->{$self->_term->identifier} = \@refs;
+      }
+      elsif ($element->{Name} eq 'example_list') {
+	my @refs = ();
+
+	foreach my $example ( @{ $current_hash->{example} } ) {
+	  push @refs, Bio::Annotation::DBLink->new( -database => $example->{db_xref}->[0]->{db},
+						    -primary_id => $example->{db_xref}->[0]->{dbkey},
+						    -comment => $example->{comment}
+						  );
+	}
+ 	$self->_term->add_example(@refs);
+      }
+      elsif ($element->{Name} eq 'external_doc_list') {
+	my @refs = ();
+
+	foreach my $db_xref ( @{ $current_hash->{db_xref} } ) {
+	  push @refs, Bio::Annotation::DBLink->new( -database => $db_xref->{db},
+						    -primary_id => $db_xref->{dbkey}
+						  );
+	}
+ 	$self->_term->add_external_document(@refs);
+      }
+      elsif ($element->{Name} eq 'class_list') {
+	my @refs = ();
+
+	foreach my $classification ( @{ $current_hash->{classification} } ) {
+	  push @refs, Bio::Annotation::DBLink->new( -database => $classification->{class_type},
+						    -primary_id => $classification->{id}
+						  );
+	}
+ 	$self->_term->class_list(\@refs);
+      }
+      elsif ($element->{Name} eq 'deleted_entries') {
+	my @refs = ();
+
+	foreach my $del_ref ( @{ $current_hash->{del_ref} } ) {
+	  my $term = ($self->ontology_engine->get_term_by_identifier( $del_ref->{id} ))[0];
+
+	  $term->is_obsolete(1) if defined $term;
+	}
+      }
+    }
+    $self->_char_storage( '' ) if !$self->_cite_skip;
+  }
+}
+
+=head2 secondary_accessions_map
+
+ Title   : secondary_accessions_map
+ Usage   : $obj->secondary_accessions_map($newval)
+ Function: 
+ Example : $map = $interpro_handler->secondary_accessions_map();
+ Returns : Reference to a hash that maps InterPro identifier to an
+  array reference of secondary accessions following the InterPro
+ xml schema.
+ Args    : Empty hash reference
+
+
+=cut
+
+sub secondary_accessions_map{
+  my ($self, $value) = @_;
+
+  if( defined $value) {
+    $self->{'secondary_accessions_map'} = $value;
+  }
+
+  return $self->{'secondary_accessions_map'};
+}
+
+=head2 _increment_record_count
+
+ Title   : _increment_record_count
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _increment_record_count{
+  $record_count++;
+}
+
+=head2 _increment_processed_count
+
+ Title   : _increment_processed_count
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _increment_processed_count{
+  $processed_count++;
+  print STDERR $processed_count."\n" if $processed_count % 100 == 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,583 @@
+# $Id: InterPro_BioSQL_Handler.pm,v 1.7.6.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for InterPro_BioSQL_Handler
+#
+# Cared for by Juguang Xiao, juguang at tll.org.sg
+#
+# Copyright Juguang Xiao 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler - parse an InterPro XML file and persist the resulting terms to a Biosql database
+
+=head1 SYNOPSIS
+
+   # see load_interpro.pl in bioperl-db/scripts/biosql/
+
+=head1 DESCRIPTION
+
+This module is for parsing an InterPro XML file and persist the
+resulting terms to a Biosql database as soon as the term is complete
+as signaled by the appropriate xml tag. This parser takes advantage of
+SAX, a stream-based XML parser technology, to keep the used memory as
+small as possible. The alternative parser for InterPro, module
+InterProHandler, builds up the entire ontology in memory, which given
+the size of the latest InterPro releases requires a huge amount of
+memory.
+
+This module takes the following non-standard arguments upon
+instantiation.
+
+   -db           the adaptor factory as returned by a call to
+                 Bio::DB::BioDB->new()
+   -version      the InterPro version (not available as property!)
+   -term_factory the object factory to use for creating terms
+
+Note that there are two alternatives for how to persist the terms and
+relationships to the database. The default is using the adaptor
+factory passed as -db or set as a property to create persistent
+objects and store them in the database. The alternative is to specify
+a term persistence and a relationship persistence handler; if one or
+both have been set, the respective handler will be called with each
+term and relationship that is to be stored. See properties
+persist_term_handler and persist_relationship_handler.
+
+=head1 AUTHOR
+
+Juguang Xiao, juguang at tll.org.sg
+
+=head1 Contributors
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head2 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Interal methods are usually preceded with a _
+
+=cut
+
+package Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler;
+use strict;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::Term;
+use Bio::Ontology::TermFactory;
+use Bio::Ontology::RelationshipType;
+use Bio::Ontology::Relationship;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Reference;
+
+use base qw(Bio::OntologyIO::Handlers::BaseSAXHandler);
+
+my $is_a_rel;
+my $count=0;
+
+sub _initialize {
+    my($self, at args)=@_;
+    $self->SUPER::_initialize(@args);
+    my ($db, $version, $fact) = $self->_rearrange(
+        [qw(DB  VERSION TERM_FACTORY)], @args);
+    $self->db($db) if $db; # this is now a property and may be set later
+    if (!$fact) {
+        $fact = Bio::Ontology::TermFactory->new(-type=>"Bio::Ontology::Term");
+    }
+    $self->term_factory($fact);
+    my $ontology = Bio::Ontology::Ontology->new(-name => 'InterPro');
+    if (defined($version)) {
+        $version = "InterPro version $version";
+        $ontology->definition($version);
+    }
+    $self->_ontology($ontology);
+    $is_a_rel = Bio::Ontology::RelationshipType->get_instance('IS_A');
+    $is_a_rel->ontology($ontology);
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $obj->term_factory($newval)
+ Function: Get/set the ontology term factory to use.
+
+           As a user of this module it is not necessary to call this
+           method as there will be default. In order to change the
+           default, the easiest way is to instantiate
+           L<Bio::Ontology::TermFactory> with the proper -type
+           argument. Most if not all parsers will actually use this
+           very implementation, so even easier than the aforementioned
+           way is to simply call
+           $ontio->term_factory->type("Bio::Ontology::MyTerm").
+
+ Example : 
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
+ Args    : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
+
+
+=cut
+
+sub term_factory{
+    my $self = shift;
+
+    return $self->{'term_factory'} = shift if @_;
+    return $self->{'term_factory'};
+}
+
+=head2 db
+
+ Title   : db
+ Usage   : $obj->db($newval)
+ Function: Sets or retrieves the database adaptor factory. 
+
+           The adaptor factory is a Bio::DB::DBAdaptorI compliant
+           object and will be used to obtain the persistence adaptors
+           necessary to serialize terms and relationships to the
+           database.
+
+           Usually, you will obtain such an object from a call to
+           Bio::DB::BioDB. You *must* set this property before
+           starting the parse.
+
+           Note that this property is immutable once set, except that
+           you may set it to undef. Therefore, be careful not to set
+           to undef before setting the desired real value.
+
+ Example : 
+ Returns : value of db (a Bio::DB::DBAdaptorI compliant object)
+ Args    : on set, new value (a Bio::DB::DBAdaptorI compliant object
+           or undef, optional)
+
+
+=cut
+
+sub db {
+    my $self=shift;
+    if(@_){
+        my $db = shift;
+        if ($db && exists($self->{_db}) && ($self->{_db} != $db)) { 
+            $self->throw('db may not be modified once set'); 
+        }
+        $self->{_db}=$db;
+    }
+    return $self->{_db};
+}
+
+=head2 persist_term_handler
+
+ Title   : persist_term_handler
+ Usage   : $obj->persist_term_handler($handler, at args)
+ Function: Sets or retrieves the persistence handler for terms along
+           with the constant set of arguments to be passed to the 
+           handler.
+
+           If set, the first argument will be treated as a closure and
+           be called for each term to persist to the database. The
+           term will be passed as a named parameter (-term), followed
+           by the other arguments passed to this setter. Note that
+           this allows to pass an arbitrary configuration to the
+           handler.
+
+           If not set, terms will be persisted along with their
+           relationships using the respective persistence adaptor
+           returned by the adaptor factory (see property db).
+
+ Example : 
+ Returns : an array reference with the values passed on set, or an empty
+           array if never set
+ Args    : On set, an array of values. The first value is the handler
+           as a closure; all other values will be passed to the handler
+           as constant argument.
+
+
+=cut
+
+sub persist_term_handler{
+    my $self = shift;
+
+    return $self->{'persist_term_handler'} = [@_] if @_;
+    return $self->{'persist_term_handler'} || [];
+}
+
+=head2 persist_relationship_handler
+
+ Title   : persist_relationship_handler
+ Usage   : $obj->persist_relationship_handler($handler, at args)
+ Function: Sets or retrieves the persistence handler for relationships
+           along with the constant set of arguments to be passed to
+           the handler.
+
+           If set, the first argument will be treated as a closure and
+           be called for each relationship to persist to the database. The
+           relationship will be passed as a named parameter (-rel), followed
+           by the other arguments passed to this setter. Note that
+           this allows to pass an arbitrary configuration to the
+           handler.
+
+           If not set, relationships will be persisted along with their
+           relationships using the respective persistence adaptor
+           returned by the adaptor factory (see property db).
+
+ Example : 
+ Returns : an array reference with the values passed on set, or an empty
+           array if never set
+ Args    : On set, an array of values. The first value is the handler
+           as a closure; all other values will be passed to the handler
+           as constant argument.
+
+
+=cut
+
+sub persist_relationship_handler{
+    my $self = shift;
+
+    return $self->{'persist_relationship_handler'} = [@_] if @_;
+    return $self->{'persist_relationship_handler'} || [];
+}
+
+=head2 _persist_term
+
+ Title   : _persist_term
+ Usage   :
+ Function: Persists a term to the database, using either a previously 
+           set persistence handler, or the adaptor factory directly.
+ Example :
+ Returns : 
+ Args    : the ontology term to persist
+
+
+=cut
+
+sub _persist_term {
+    my $self = shift;
+    my $term = shift;
+
+    my ($handler, at args) = @{$self->persist_term_handler};
+    if ($handler) {
+        &$handler('-term' => $term, @args);
+    } else {
+        # no handler; we'll do this ourselves straight and simple
+        my $db = $self->db();
+        my $pterm = $db->create_persistent($term);
+        eval { 
+            $pterm->create(); 
+            $pterm->commit();
+        };
+        if ($@) {
+            $pterm->rollback();
+            $self->warn("failed to store term '".$term->name."': ".$@);
+        }
+    }
+}
+
+=head2 _persist_relationship
+
+ Title   : _persist_relationship
+ Usage   :
+ Function: Persists a relationship to the database, using either a
+           previously set persistence handler, or the adaptor factory
+           directly.
+
+ Example :
+ Returns : 
+ Args    : the term relationship to persist
+
+
+=cut
+
+sub _persist_relationship {
+    my $self = shift;
+    my $rel = shift;
+
+    my ($handler, at args) = @{$self->persist_relationship_handler};
+    if ($handler) {
+        &$handler('-rel' => $rel, @args);
+    } else {
+        # no handler; we'll do this ourselves straight and simple
+        my $db = $self->db();
+        my $prel = $db->create_persistent($rel);
+        eval { 
+            $prel->create(); 
+            $prel->commit();
+        };
+        if ($@) {
+            $prel->rollback();
+            $self->warn("failed to store relationship of subject '"
+                        .$rel->subject_term->name."' to object '"
+                        .$rel->object_term->name.": ".$@);
+        }
+    }
+}
+
+=head2 _persist_ontology
+
+ Title   : _persist_ontology
+ Usage   :
+ Function: Perists the ontology itself to the database, by either 
+           inserting or updating it.
+
+           Note that this will only create or update the ontology as
+           an entity, not any of its terms, relationships, or
+           relationship types.
+
+ Example :
+ Returns : the ontology as a peristent object with primary key
+ Args    : the ontology to persist as a Bio::Ontology::OntologyI
+           compliant object
+
+
+=cut
+
+sub _persist_ontology{
+    my $self = shift;
+    my $ont = shift;
+    my $db = $self->db();
+
+    # do a lookup first; chances are we have this already in the database
+    my $adp = $db->get_object_adaptor($ont);
+    # to avoid clobbering this ontology's properties with possibly older ones
+    # from the database we'll need an object factory
+    my $ontfact = 
+        Bio::Factory::ObjectFactory->new(-type=>"Bio::Ontology::Ontology");
+    # do the lookup:
+    my $found = $adp->find_by_unique_key($ont, '-obj_factory' => $ontfact);
+    # make a persistent object of the ontology
+    $ont = $db->create_persistent($ont);
+    # transfer primary key if found in the lookup
+    $ont->primary_key($found->primary_key) if $found;
+    # insert or update
+    my $result;
+    eval {
+        $result = $ont->store();
+    };
+    if ($@ || !$result) {
+        $adp->rollback();
+        $self->throw("failed to update ontology '"
+                     .$ont->name."' in database".($@ ? ": $@" : ""));
+    }
+
+    # done - we don't commit here
+    return ref($result) ? $result : $ont;
+}
+
+sub start_document {
+    my $self = shift;
+    my $ont = $self->_ontology;
+    my @iprtypes = (
+                    $self->create_term(-identifier=>'IPR:Family', 
+                                       -name=>'Family',
+                                       -ontology => $ont),
+                    $self->create_term(-identifier=>'IPR:Domain', 
+                                       -name=>'Domain',
+                                       -ontology => $ont),
+                    $self->create_term(-identifier=>'IPR:Repeat', 
+                                       -name=>'Repeat',
+                                       -ontology => $ont),
+                    $self->create_term(-identifier=>'IPR:PTM', 
+                                       -name=>'post-translational modification',
+                                       -ontology => $ont),
+                    $self->create_term(-identifier=>'IPR:Active_site', 
+                                       -name=>'Active_site',
+                                       -ontology => $ont),
+                    $self->create_term(-identifier=>'IPR:Binding_site', 
+                                       -name=>'Binding_site',
+                                       -ontology => $ont),
+                    );
+    foreach my $iprtype (@iprtypes) {
+        $self->_persist_term($iprtype);
+        $ont->add_term($iprtype);
+    }
+}
+
+sub start_element {
+    my $self=shift;
+    my $tag=$_[0]->{Name};
+    my %args=%{$_[0]->{Attributes}};
+    my $ont = $self->_ontology;
+
+    if($tag eq 'interpro'){
+        my $id = $args{id};
+        my $term = $self->create_term(-identifier=>$id);
+        $term->ontology($ont);
+        $term->add_synonym($args{short_name});
+        #$term->definition();
+        
+        my ($object_term) =
+            ($ont->engine->get_term_by_identifier("IPR:".$args{type}));
+        
+        my $rel = Bio::Ontology::Relationship->new(
+            -subject_term => $term,
+            -predicate_term => $is_a_rel,
+            -object_term => $object_term,
+            -ontology => $ont
+        );
+        $self->_relationship($rel);
+    }elsif($tag eq 'example'){
+        my $example = Bio::Annotation::DBLink->new;
+        $self->_current_hash->{example} = $example;
+    }elsif($tag eq 'db_xref'){
+        my $top = $self->_top_tag;
+        if($top eq 'example'){
+            my $example = $self->_current_hash->{example};
+            $example->database($args{db});
+            $example->primary_id($args{dbkey});
+            #print "EXAmPLE:\t", $example->database, '|', $example->primary_id, "\n";
+        }elsif($top eq 'child'){
+            ;
+        }elsif($top eq 'member_list'){
+            my $dblink=Bio::Annotation::DBLink->new(
+                -dbname => $args{id},
+                -primary_id => $args{dbkey},
+                -comment => $args{name}
+            );
+        }elsif($top eq 'external_doc_list'){
+            ;
+        }elsif($top eq 'publication'){
+            if($args{db} eq 'MEDLINE'){
+                $self->_current_hash->{medline} =$args{dbkey};
+            } elsif($args{db} eq 'PUBMED'){
+                $self->_current_hash->{pubmed} =$args{dbkey};
+            }else{
+                $self->warn("'".$args{dbkey}."' is not a MEDLINE publication, "
+                            ."don't know how to handle");
+            }
+        }elsif($top eq 'structure_db_links'){
+            ;
+        }elsif($top eq 'abstract'){
+            ;
+        }
+        #else{
+        #    $self->warn("unrecognized element '$top' in element '$tag', ignoring");
+        #}
+    }elsif($tag eq 'publication'){
+        my $publication = Bio::Annotation::Reference->new();
+        $self->_current_hash->{publication} = $publication;
+    }elsif($tag eq 'author_list'){
+        ;
+    }elsif($tag eq 'journal'){
+        ;
+    }elsif($tag eq 'location'){
+        ;
+    }elsif($tag eq 'year'){
+        ;
+    } elsif (($tag eq 'dbinfo') && ($self->_top_tag eq 'release')) {
+        my $entrydate = $args{file_date} || '';
+        $entrydate =~ s/ \d{2}:\d{2}:\d{2}//;
+        my $def = $ont->definition() || '';
+        $def .= "\n" if length($def) > 0;
+        $def .= $args{dbname}." version ".$args{version}.", "
+            .$args{entry_count}." entries, ".$entrydate;
+        $ont->definition($def);
+    }
+    #else{
+    #    $self->warn("unrecognized element '$tag', ignoring");
+    #}
+        
+    $self->_visited_count_inc($tag);
+    $self->_push_tag($tag);
+}
+
+
+sub end_element {
+    my $self=shift;
+    my $tag=shift->{Name};
+    my $chars_in=$self->_chars_hash->{$tag};
+    if($tag eq 'interpro'){
+        my $rel = $self->_relationship;
+        # store subject term first in order to give the handler a chance to
+        # apply whatever custom behaviour
+        # (note that the object term is the InterPro type and has been stored
+        # at the start of the whole document)
+        $self->_persist_term($rel->subject_term);
+        # the store the relationship to the InterPro type
+        $self->_persist_relationship($rel);
+    }elsif($tag eq 'name'){
+        my $rel = $self->_relationship;
+        $rel->subject_term->name($self->_chars_hash->{name});
+        $self->_chars_hash->{name}='';
+    }elsif($tag eq 'abstract'){
+        my $rel = $self->_relationship;
+        my $abstract = $self->_chars_hash->{abstract};
+        $abstract =~ s/\n/ /g;
+        $rel->subject_term->definition($abstract);
+        $self->_chars_hash->{abstract} = '';
+    }elsif($tag eq 'example'){
+        my $example = $self->_current_hash->{example};
+        my $comment = $self->_chars_hash->{example};
+        $comment =~ s/^(\s+)//; $comment =~ s/(\s+)$//;
+        $example->comment($comment);
+        $self->_relationship->subject_term->add_dblink($example);
+        $self->_chars_hash->{example}='';
+    }elsif($tag eq 'publication'){
+        my $publication = $self->_create_publication;
+        $self->_relationship->subject_term->add_reference($publication);
+    }elsif($tag eq 'author_list'){
+        $self->_current_hash->{author} =$chars_in;
+    }elsif($tag eq 'title'){
+        $self->_current_hash->{title}=$chars_in;
+    } elsif ($tag eq 'release') {
+        my $ont = $self->_persist_ontology($self->_ontology);
+        $self->_ontology($ont) if $ont;
+    }
+    $self->_pop_tag;
+    $self->_visited_count_dec($tag);
+}
+
+
+sub characters {
+    my $self=shift;
+    my $text = shift->{Data};
+
+    my $top_tag =$self->_top_tag;
+    $self->_chars_hash->{$top_tag} .= $text;
+    
+#    $self->_chars_hash->{abstract} .= $text if $self->_visited_count('abstract');
+}
+
+sub create_term {
+    return shift->term_factory->create_object(@_);
+}
+
+sub _ontology {
+    my $self = shift;
+    return $self->{_ontology}=shift if @_;
+    return $self->{_ontology};
+}
+
+sub _relationship {
+    my $self =shift;
+    $self->{_relationship}=shift if @_;
+    return $self->{_relationship};
+}
+sub _create_publication {
+    my $self=shift;
+    my $publ = $self->_current_hash->{publication};
+    my $journal = $self->_current_hash->{journal} || '<no journal>';
+    my $year = $self->_current_hash->{year} || '<no year>';
+    my $page_location = $self->_current_hash->{page_location} || '<no pages>';
+    my $volumn = $self->_current_hash->{volumn} || '<no volume>';
+    my $medline = 
+        $self->_current_hash->{medline} || $self->_current_hash->{pubmed};
+
+    $publ->authors($self->_current_hash->{author});
+    $publ->location("$journal, $year, V $volumn, $page_location");
+    $publ->title($self->_current_hash->{title});
+    $publ->medline($medline);
+    if ($self->_current_hash->{pubmed}
+        && ($self->_current_hash->{pubmed} != $medline)) {
+        $publ->pubmed($self->_current_hash->{pubmed});
+    }
+    
+# Clear the above in current hash
+    $self->_current_hash->{publication} = undef;
+    $self->_current_hash->{author}      = undef;
+    $self->_current_hash->{journal}     = undef;
+    $self->_current_hash->{year}        = undef;
+    $self->_current_hash->{page_location}=undef;
+    $self->_current_hash->{volumn}      = undef;
+    $self->_current_hash->{title}       = undef;
+    $self->_current_hash->{medline}     = undef;
+    $self->_current_hash->{pubmed}      = undef;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/InterProParser.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/InterProParser.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/InterProParser.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,250 @@
+# $Id: InterProParser.pm,v 1.11.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for InterProParser
+#
+# Cared for by Peter Dimitrov <dimitrov at gnf.org>
+#
+# Copyright Peter Dimitrov
+# (c) Peter Dimitrov, dimitrov at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::InterProParser - Parser for InterPro xml files.
+
+=head1 SYNOPSIS
+
+    # don't use this module directly - use Bio::OntologyIO with instead
+    my $ipp = Bio::OntologyIO->new( -format  => 'interpro',
+                                    -file    => 't/data/interpro.xml',
+                                    -ontology_engine => 'simple' );
+
+=head1 DESCRIPTION
+
+  Use InterProParser to parse InterPro files in xml format. Typical
+  use is the interpro.xml file published by EBI. The xml records
+  should follow the format described in interpro.dtd, although the dtd
+  file is not needed, and the XML file will not be validated against it.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Dimitrov
+
+Email dimitrov at gnf.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::OntologyIO::InterProParser;
+use strict;
+#use Carp;
+use XML::Parser::PerlSAX;
+use Bio::Ontology::SimpleOntologyEngine;
+use Bio::Ontology::TermFactory;
+use Bio::OntologyIO::Handlers::InterProHandler;
+
+use base qw(Bio::OntologyIO);
+
+=head2 new
+
+ Title   : new
+ Usage   :
+ Function: Initializes objects needed for parsing.
+ Example : $ipp = Bio::OntologyIO::InterProParser->new( 
+                                  -file => 't/data/interpro.xml',
+				  -ontology_engine => 'simple' )
+
+ Returns : Object of class Bio::OntologyIO::InterProParser.
+ Args    :
+
+  -file            - file name
+  -ontology_engine - type of ontology engine. Should satisfy the
+                     OntologyEngine interface requirements. Currently
+                     the only option is 'simple'. In the future
+                     Graph.pm based engine will be added to the
+                     choices.
+
+
+=cut
+
+# in reality we let OntologyIO handle the first pass initialization
+# and instead override _initialize().
+sub _initialize{
+    my $self = shift;
+
+    $self->SUPER::_initialize(@_);
+
+    my ($eng,$eng_type,$name) =
+	$self->_rearrange([qw(ENGINE
+			      ONTOLOGY_ENGINE
+			      ONTOLOGY_NAME)
+			   ], @_);
+
+    my $ip_h = Bio::OntologyIO::Handlers::InterProHandler->new(
+			                             -ontology_name => $name);
+
+    if(! $eng) {
+	$eng_type = 'simple' unless $eng_type;
+	if(lc($eng_type) eq 'simple') {
+	    $eng = Bio::Ontology::SimpleOntologyEngine->new();
+	} else {
+	    $self->throw("ontology engine type '$eng_type' ".
+			 "not implemented yet");
+	}
+    }
+    if($eng->isa("Bio::Ontology::OntologyI")) {
+	$ip_h->ontology($eng);
+	$eng = $eng->engine() if $eng->can('engine');
+    }
+    $self->{_ontology_engine} = $eng;
+    $ip_h->ontology_engine($eng);
+
+    $self->{_parser} = XML::Parser::PerlSAX->new( Handler => $ip_h );
+    $self->{_interpro_handler} = $ip_h;
+
+    # default term object factory
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+				   -type => "Bio::Ontology::InterProTerm"))
+	unless $self->term_factory();
+    $ip_h->term_factory($self->term_factory());
+
+}
+
+=head2 parse
+
+ Title   : parse
+ Usage   :
+ Function: Performs the actual parsing.
+ Example : $ipp->parse();
+ Returns : 
+ Args    :
+
+=cut
+
+sub parse{
+   my $self = shift;
+
+   my $ret;
+   if ($self->file()) {
+         $ret = $self->{_parser}->parse( Source => {
+	                SystemId => $self->file() } );
+   } elsif ($self->_fh()) {
+        $ret = $self->{_parser}->parse( Source => {
+                ByteStream => $self->_fh() } );
+   } else {
+        $ret = undef;
+        $self->throw("Only filenames and filehandles are understood here.\n");
+   }
+
+   $self->_is_parsed(1);
+   return $ret;
+}
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   : $ipp->next_ontology()
+ Function: Parses the input file and returns the next InterPro ontology
+           available.
+
+           Usually there will be only one ontology returned from an
+           InterPro XML input.
+
+ Example : $ipp->next_ontology();
+ Returns : Returns the ontology as a Bio::Ontology::OntologyEngineI
+           compliant object.
+ Args    : 
+
+See L<Bio::Ontology::OntologyEngineI>.
+
+=cut
+
+sub next_ontology{
+  my $self = shift;
+
+  $self->parse() unless $self->_is_parsed();
+  # there is only one ontology in an InterPro source file
+  if(exists($self->{'_ontology_engine'})) {
+      my $ont = $self->{_interpro_handler}->ontology();
+      delete $self->{_ontology_engine};
+      return $ont;
+  }
+  return;
+}
+
+=head2 _is_parsed
+
+ Title   : _is_parsed
+ Usage   : $obj->_is_parsed($newval)
+ Function: 
+ Example : 
+ Returns : value of _is_parsed (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub _is_parsed{
+    my $self = shift;
+
+    return $self->{'_is_parsed'} = shift if @_;
+    return $self->{'_is_parsed'};
+}
+
+=head2 secondary_accessions_map
+
+ Title   : secondary_accessions_map
+ Usage   : $obj->secondary_accessions_map()
+ Function: This method is merely for convenience, and one should
+           normally use the InterProTerm secondary_ids method to
+           access the secondary accessions.
+ Example : $map = $interpro_parser->secondary_accessions_map;
+ Returns : Reference to a hash that maps InterPro identifier to an
+           array reference of secondary accessions following the 
+           InterPro xml schema.
+ Args    : Empty hash reference
+
+=cut
+
+sub secondary_accessions_map{
+  my ($self) = @_;
+
+  return $self->{_interpro_handler}->{secondary_accessions_map};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/dagflat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/dagflat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/dagflat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,893 @@
+# $Id: dagflat.pm,v 1.29.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO::dagflat
+#
+# Cared for by Hilmar Lapp, hlapp at gmx.net
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::dagflat - a base class parser for GO flat-file type formats
+
+=head1 SYNOPSIS
+
+  use Bio::OntologyIO;
+
+  # do not use directly -- use via Bio::OntologyIO
+  # e.g., the GO parser is a simple extension of this class
+  my $parser = Bio::OntologyIO->new
+        ( -format       => "go",
+     -defs_file    => "/home/czmasek/GO/GO.defs",
+          -files        => ["/home/czmasek/GO/component.ontology",
+                            "/home/czmasek/GO/function.ontology",
+                            "/home/czmasek/GO/process.ontology"] );
+
+  my $go_ontology = $parser->next_ontology();
+
+  my $IS_A       = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+  my $PART_OF    = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+  my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address:
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head2 CONTRIBUTOR
+
+ Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package  Bio::OntologyIO::dagflat;
+
+use strict;
+
+use Bio::Root::IO;
+use Bio::Ontology::OBOEngine;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::OntologyStore;
+use Bio::Ontology::TermFactory;
+
+use constant TRUE         => 1;
+use constant FALSE        => 0;
+
+
+use base qw(Bio::OntologyIO);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $parser = Bio::OntologyIO->new(
+                             -format => "go",
+                             -defs_file => "/path/to/GO.defs",
+                             -files => ["/path/to/component.ontology",
+                                        "/path/to/function.ontology",
+                                        "/path/to/process.ontology"] );
+ Function: Creates a new dagflat parser.
+ Returns : A new dagflat parser object, implementing Bio::OntologyIO.
+ Args    : -defs_file  => the name of the file holding the term
+                          definitions
+           -files      => a single ontology flat file holding the
+                          term relationships, or an array ref holding
+                          the file names (for GO, there will usually be
+                          3 files: component.ontology, function.ontology,
+                          process.ontology)
+           -file       => if there is only a single flat file, it may
+                          also be specified via the -file parameter
+           -ontology_name => the name of the ontology; if not specified the
+                          parser will auto-discover it by using the term
+                          that starts with a $, and converting underscores
+                          to spaces
+           -engine     => the Bio::Ontology::OntologyEngineI object
+                          to be reused (will be created otherwise); note
+                          that every Bio::Ontology::OntologyI will
+                          qualify as well since that one inherits from the
+                          former.
+
+See L<Bio::OntologyIO>.
+
+=cut
+
+# in reality, we let OntologyIO::new do the instantiation, and override
+# _initialize for all initialization work
+sub _initialize {
+  my ($self, %arg) = @_;
+
+  my ( $defs_file_name,$files,$defs_url,$url,$name,$eng ) =
+        $self->_rearrange([qw( DEFS_FILE
+                           FILES
+                           DEFS_URL
+                           URL
+                           ONTOLOGY_NAME
+                           ENGINE)
+                      ],
+                      %arg );
+
+  delete($arg{-url}); #b/c GO has 3 files...
+
+  $self->SUPER::_initialize( %arg );
+
+  $self->_done( FALSE );
+  $self->_not_first_record( FALSE );
+  $self->_term( "" );
+  delete $self->{'_ontologies'};
+
+  # ontology engine (and possibly name if it's an OntologyI)
+  $eng = Bio::Ontology::OBOEngine->new() unless $eng;
+  if($eng->isa("Bio::Ontology::OntologyI")) {
+    $self->ontology_name($eng->name());
+    $eng = $eng->engine() if $eng->can('engine');
+  }
+  $self->_ont_engine($eng);
+
+  # flat files to parse
+  if(defined($defs_file_name) && defined($defs_url)){
+    $self->throw('cannot provide both -defs_file and -defs_url');
+  } else {
+    defined($defs_file_name) && $self->defs_file( $defs_file_name );
+    defined($defs_url)       && $self->defs_url( $defs_url );
+  }
+
+  if(defined($files) && defined($url)){
+  } elsif(defined($files)){
+    $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
+  } elsif(defined($url)){
+    $self->url($url);
+  }
+
+  # ontology name (overrides implicit one through OntologyI engine)
+  $self->ontology_name($name) if $name;
+
+} # _initialize
+
+=head2 ontology_name
+
+ Title   : ontology_name
+ Usage   : $obj->ontology_name($newval)
+ Function: Get/set the name of the ontology parsed by this module.
+ Example :
+ Returns : value of ontology_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub ontology_name{
+    my $self = shift;
+
+    return $self->{'ontology_name'} = shift if @_;
+    return $self->{'ontology_name'};
+}
+
+
+=head2 parse
+
+ Title   : parse()
+ Usage   : $parser->parse();
+ Function: Parses the files set with "new" or with methods
+           defs_file and _flat_files.
+
+           Normally you should not need to call this method as it will
+           be called automatically upon the first call to
+           next_ontology().
+
+ Returns : [Bio::Ontology::OntologyEngineI]
+ Args    :
+
+=cut
+
+sub parse {
+    my $self = shift;
+
+    #warn "PARSING";
+    # setup the default term factory if not done by anyone yet
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+                                             -type => "Bio::Ontology::Term"))
+        unless $self->term_factory();
+
+    # create the ontology object itself
+    my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
+                                           -engine => $self->_ont_engine());
+
+    # parse definitions
+    while( my $term = $self->_next_term() ) {
+        $self->_add_term( $term, $ont );
+    }
+
+    # set up the ontology of the relationship types
+    foreach ($self->_part_of_relationship(), $self->_is_a_relationship(), $self->_related_to_relationship()) {
+      $_->ontology($ont);
+    }
+
+    # pre-seed the IO system with the first flat file if -file wasn't provided
+    if(! $self->_fh) {
+      if($self->url){
+        if(ref($self->url) eq 'ARRAY'){
+          #warn "BA";
+          foreach my $url (@{ $self->url }){
+            #warn $url;
+            #warn $ont;
+            #warn scalar($ont->get_all_terms());
+            $self->_initialize_io(-url  => $url);
+            $self->_parse_flat_file($ont);
+          }
+          $self->close();
+        } else {
+          $self->_initialize_io(-url  => $self->url);
+        }
+      } elsif($self->_flat_files){
+        $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
+      }
+    }
+
+    while($self->_fh) {
+      $self->_parse_flat_file($ont);
+      # advance to next flat file if more are available
+      if(@{$self->_flat_files()}) {
+        $self->close();
+        $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
+      } else {
+            last; # nothing else to parse so terminate the loop
+      }
+    }
+    $self->_add_ontology($ont);
+
+    # not needed anywhere, only because of backward compatibility
+    return $self->_ont_engine();
+} # parse
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   :
+ Function: Get the next available ontology from the parser. This is the
+           method prescribed by Bio::OntologyIO.
+ Example :
+ Returns : An object implementing Bio::Ontology::OntologyI, and undef if
+           there is no more ontology in the input.
+ Args    :
+
+
+=cut
+
+sub next_ontology {
+  my $self = shift;
+
+  # parse if not done already
+  $self->parse() unless exists($self->{'_ontologies'});
+  # return next available ontology
+  if(exists($self->{'_ontologies'})){
+    my $ont = shift (@{$self->{'_ontologies'}});
+    if($ont){
+      my $store = Bio::Ontology::OntologyStore->new();
+      $store->register_ontology($ont);
+      return $ont;
+    }
+  }
+  return;
+}
+
+=head2 defs_file
+
+ Title   : defs_file
+ Usage   : $parser->defs_file( "GO.defs" );
+ Function: Set/get for the term definitions filename.
+ Returns : The term definitions file name [string].
+ Args    : On set, the term definitions file name [string] (optional).
+
+=cut
+
+sub defs_file {
+  my $self = shift;
+
+  if ( @_ ) {
+        my $f = shift;
+    $self->{ "_defs_file_name" } = $f;
+        $self->_defs_io->close() if $self->_defs_io();
+        if(defined($f)) {
+      $self->_defs_io( Bio::Root::IO->new( -input => $f ) );
+    }
+  }
+  return $self->{ "_defs_file_name" };
+} # defs_file
+
+sub defs_url {
+  my $self = shift;
+  my $val = shift;
+  if(defined($val)){
+    $self->{'_defs_url'} = $val;
+
+        $self->_defs_io->close() if $self->_defs_io();
+    $self->_defs_io( Bio::Root::IO->new( -url => $val ) );
+  }
+  return $self->{'_defs_url'};
+}
+
+sub url {
+  my $self = shift;
+  my $val = shift;
+  if(defined($val)){
+    $self->{'_url'} = $val;
+  }
+  return $self->{'_url'};
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Closes this ontology stream and associated file handles.
+
+           Clients should call this method especially when they write
+           ontologies.
+
+           We need to override this here in order to close the file
+           handle for the term definitions file.
+
+ Example :
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub close{
+    my $self = shift;
+
+    # first call the inherited implementation
+    $self->SUPER::close();
+    # then close the defs file io (if there is one)
+    $self->_defs_io->close() if $self->_defs_io();
+}
+
+=head2 _flat_files
+
+ Title   : _flat_files
+ Usage   : $files_to_parse = $parser->_flat_files();
+ Function: Get the array of ontology flat files that need to be parsed.
+
+           Note that this array will decrease in elements over the
+           parsing process. Therefore, it\'s value outside of this
+           module will be limited. Also, be careful not to alter the
+           array unless you know what you are doing.
+
+ Returns : a reference to an array of zero or more strings
+ Args    : none
+
+=cut
+
+sub _flat_files {
+    my $self = shift;
+
+    $self->{_flat_files} = [] unless exists($self->{_flat_files});
+    return $self->{_flat_files};
+}
+
+
+# INTERNAL METHODS
+# ----------------
+
+=head2 _defs_io
+
+ Title   : _defs_io
+ Usage   : $obj->_defs_io($newval)
+ Function: Get/set the Bio::Root::IO instance representing the
+           definition file, if provided (see defs_file()).
+ Example :
+ Returns : value of _defs_io (a Bio::Root::IO object)
+ Args    : on set, new value (a Bio::Root::IO object or undef, optional)
+
+=cut
+
+sub _defs_io{
+    my $self = shift;
+
+    return $self->{'_defs_io'} = shift if @_;
+    return $self->{'_defs_io'};
+}
+
+sub _add_ontology {
+    my $self = shift;
+    $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
+    foreach my $ont (@_) {
+        $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
+            unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
+        # the ontology name may have been auto-discovered while parsing
+        # the file
+        $ont->name($self->ontology_name) unless $ont->name();
+        push(@{$self->{'_ontologies'}}, $ont);
+    }
+}
+
+# This simply delegates. See SimpleGOEngine.
+sub _add_term {
+    my ( $self, $term, $ont ) = @_;
+    $term->ontology($ont) if $ont && (! $term->ontology);
+    $self->_ont_engine()->add_term( $term );
+} # _add_term
+
+
+
+# This simply delegates. See SimpleGOEngine
+sub _part_of_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->part_of_relationship(@_);
+} # _part_of_relationship
+
+
+
+# This simply delegates. See SimpleGOEngine
+sub _is_a_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->is_a_relationship(@_);
+} # _is_a_relationship
+
+# This simply delegates. See SimpleGOEngine
+sub _related_to_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->related_to_relationship(@_);
+} # _is_a_relationship
+
+
+
+# This simply delegates. See SimpleGOEngine
+sub _add_relationship {
+    my ( $self, $parent, $child, $type, $ont ) = @_;
+
+    # note the triple terminology (subject,predicate,object) corresponds to
+    # (child,type,parent)
+    $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
+
+
+} # _add_relationship
+
+
+# This simply delegates. See SimpleGOEngine
+sub _has_term {
+    my $self = shift;
+
+    return $self->_ont_engine()->has_term( @_ );
+} # _add_term
+
+
+
+# This parses the relationships files
+sub _parse_flat_file {
+  my $self = shift;
+  my $ont  = shift;
+
+  my @stack       = ();
+  my $prev_spaces = -1;
+  my $prev_term   = "";
+
+  while ( my $line = $self->_readline() ) {
+
+        if ( $line =~ /^!/ ) {
+          next;
+        }
+
+        # split into term specifications
+        my @termspecs = split(/ (?=[%<])/, $line);
+        # the first element is whitespace only
+        shift(@termspecs) if $termspecs[0] =~ /^\s*$/;
+
+        # parse out the focus term
+        my $current_term   = $self->_get_first_termid( $termspecs[0] );
+        my @syns           = $self->_get_synonyms( $termspecs[0] );
+        my @sec_go_ids     = $self->_get_secondary_termids( $termspecs[0] );
+        my @cross          = $self->_get_db_cross_refs( $termspecs[0] );
+        my @cross_refs;
+        foreach my $cross_ref (@cross) {
+            $cross_ref eq $current_term && next;
+            push(@cross_refs, $cross_ref);
+        }
+        
+        # parse out the parents of the focus term
+        shift(@termspecs);
+        my @isa_parents = ();
+        my @partof_parents = ();
+        foreach my $parent (@termspecs) {
+            if (index($parent, "%") == 0) {
+                push(@isa_parents, $self->_get_first_termid($parent));
+            } elsif (index($parent, "<") == 0) {
+                push(@partof_parents, $self->_get_first_termid($parent));
+            } else {
+                $self->warn("unhandled relationship type in '".$parent."'");
+            }
+        }
+
+        if ( ! $self->_has_term( $current_term ) ) {
+          my $term =$self->_create_ont_entry($self->_get_name($line,
+                                                              $current_term),
+                                             $current_term );
+          $self->_add_term( $term, $ont );
+        }
+
+        my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
+
+        $current_term_object->add_dblink( @cross_refs );
+        $current_term_object->add_secondary_id( @sec_go_ids );
+        $current_term_object->add_synonym( @syns );
+        unless ( $line =~ /^\$/ ) {
+          $current_term_object->ontology( $ont );
+        }
+        foreach my $parent ( @isa_parents ) {
+          if ( ! $self->_has_term( $parent ) ) {
+                my $term = $self->_create_ont_entry($self->_get_name($line,
+                                                                     $parent),
+                                                    $parent );
+                $self->_add_term( $term, $ont );
+          }
+
+          $self->_add_relationship( $parent,
+                                    $current_term,
+                                    $self->_is_a_relationship(),
+                                    $ont);
+
+        }
+        foreach my $parent ( @partof_parents ) {
+          if ( ! $self->_has_term( $parent ) ) {
+                my $term = $self->_create_ont_entry($self->_get_name($line,
+                                                                     $parent),
+                                                    $parent );
+                $self->_add_term( $term, $ont );
+          }
+
+          $self->_add_relationship( $parent,
+                                    $current_term,
+                                    $self->_part_of_relationship(),
+                                    $ont);
+        }
+
+        my $current_spaces = $self->_count_spaces( $line );
+
+        if ( $current_spaces != $prev_spaces  ) {
+
+          if ( $current_spaces == $prev_spaces + 1 ) {
+                push( @stack, $prev_term );
+          } elsif ( $current_spaces < $prev_spaces ) {
+                my $n = $prev_spaces -  $current_spaces;
+                for ( my $i = 0; $i < $n; ++$i ) {
+                  pop( @stack );
+                }
+          } else {
+                $self->throw( "format error (file ".$self->file.")" );
+          }
+        }
+
+        my $parent = $stack[ @stack - 1 ];
+
+        # add a relationship if the line isn\'t the one with the root term
+        # of the ontology (which is also the name of the ontology)
+        if ( index($line,'$') != 0 ) {
+          #adding @reltype@ syntax
+          if ( $line !~ /^\s*([<%~]|\@\w+?\@)/ ) {
+                $self->throw( "format error (file ".$self->file.") offending line:\n$line" );
+          }
+
+          my($relstring) = $line =~ /^\s*([<%~]|\@[^\@]+?\@)/;
+
+          my $reltype;
+
+          if ($relstring eq '<') {
+                $reltype = $self->_part_of_relationship;
+          } elsif ($relstring eq '%') {
+                $reltype = $self->_is_a_relationship;
+          } elsif ($relstring eq '~') {
+                $reltype = $self->_related_to_relationship;
+          } else {
+                $relstring =~ s/\@//g;
+                if ($self->_ont_engine->get_relationship_type($relstring)) {
+                  $reltype = $self->_ont_engine->get_relationship_type($relstring);
+                } else {
+                  $self->_ont_engine->add_relationship_type($relstring, $ont);
+                  $reltype = $self->_ont_engine->get_relationship_type($relstring);
+                }
+          }
+
+          #my $reltype = ($line =~ /^\s*</) ?
+          #$self->_part_of_relationship() :
+          #$self->_is_a_relationship();
+          $self->_add_relationship( $parent, $current_term, $reltype, $ont);
+        }
+
+        $prev_spaces = $current_spaces;
+        $prev_term = $current_term;
+  }
+  return $ont;
+}                                                                # _parse_relationships_file
+
+
+
+# Parses the 1st term id number out of line.
+sub _get_first_termid {
+    my ( $self, $line ) = @_;
+    if ( $line =~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) {
+#    if ( $line =~ /;\s*(\w+:\w+)/ ) {
+        return $1;
+    }
+    else {
+        $self->throw( "format error: no term id in line \"$line\"" );
+    }
+
+} # _get_first_termid
+
+
+
+# Parses the name out of line.
+sub _get_name {
+    my ( $self, $line, $termid ) = @_;
+
+    if ( $line =~ /([^;<%~]+);\s*$termid/ ) {
+        my $name = $1;
+        # remove trailing and leading whitespace
+        $name =~ s/\s+$//;
+        $name =~ s/^\s+//;
+        $name =~ s/\@.+?\@//;
+        # remove leading dollar character; also we default the name of the
+        # ontology to this name unless it is preset to something else
+        if(index($name,'$') == 0) {
+            $name = substr($name,1);
+            # replace underscores by spaces for setting the ontology name
+            $self->ontology_name(join(" ",split(/_/,$name)))
+                unless $self->ontology_name();
+        }
+        return $name;
+    }
+    else {
+        return;
+    }
+} # _get_name
+
+
+# Parses the synonyms out of line.
+sub _get_synonyms {
+    my ( $self, $line ) = @_;
+
+    my @synonyms = ();
+
+    while ( $line =~ /synonym\s*:\s*([^;<%~]+)/g ) {
+        my $syn = $1;
+        $syn =~ s/\s+$//;
+        $syn =~ s/^\s+//;
+        push( @synonyms, $syn );
+    }
+    return @synonyms;
+
+} # _get_synonyms
+
+
+
+# Parses the db cross refs out of line.
+sub _get_db_cross_refs {
+    my ( $self, $line ) = @_;
+
+    my @refs = ();
+
+    while ( $line =~ /;([^;<%~:]+:[^;<%~:]+)/g ) {
+        my $ref = $1;
+        if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
+            next;
+        }
+        $ref =~ s/\s+$//;
+        $ref =~ s/^\s+//;
+
+        $ref = $self->unescape( $ref );
+
+        push( @refs, $ref ) if defined $ref;
+    }
+    return @refs;
+
+}
+
+
+# Parses the secondary go ids out of a line
+sub _get_secondary_termids {
+    my ( $self, $line ) = @_;
+    my @secs = ();
+
+#    while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
+    while ( $line =~ /,\s*(\w+:\w+)/g ) {
+        my $sec = $1;
+        push( @secs, $sec );
+    }
+    return @secs;
+
+} # _get_secondary_termids
+
+
+# Counts the spaces at the beginning of a line in the relationships files
+sub _count_spaces {
+    my ( $self, $line ) = @_;
+
+    if ( $line =~ /^(\s+)/ ) {
+         return length( $1 );
+    }
+    else {
+         return 0;
+    }
+} # _count_spaces
+
+
+# "next" method for parsing the defintions file
+sub _next_term {
+  my ( $self ) = @_;
+
+  if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
+    return;
+  }
+
+  my $line      = "";
+  my $termid    = "";
+  my $next_term = $self->_term();
+  my $def       = "";
+  my $comment   = "";
+  my @def_refs  = ();
+  my $isobsolete;
+
+  while( $line = ( $self->_defs_io->_readline() ) ) {
+    if ( $line !~ /\S/
+         ||   $line =~ /^\s*!/ ) {
+      next;
+    }
+    elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
+      $self->_term( $1 );
+      last if $self->_not_first_record();
+      $next_term = $1;
+      $self->_not_first_record( TRUE );
+    }
+    elsif ( $line =~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) {
+      $termid = $1;
+    }
+    elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
+      $def = $self->unescape($1);
+      $isobsolete = 1 if index($def,"OBSOLETE") == 0;
+    }
+    elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
+      push( @def_refs, $self->unescape($1) );
+    }
+    elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
+      $comment = $self->unescape($1);
+    }
+  }
+  $self->_done( TRUE ) unless $line; # we'll come back until done
+
+  return $self->_create_ont_entry( $next_term, $termid, $def,
+                                   $comment, \@def_refs, $isobsolete);
+} # _next_term
+
+
+
+# Holds the GO engine to be parsed into
+sub _ont_engine {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_ont_engine" } = $value;
+    }
+
+    return $self->{ "_ont_engine" };
+} # _ont_engine
+
+
+# Used to create ontology terms.
+# Arguments: name, id
+sub _create_ont_entry {
+    my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
+
+    if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
+      $obsolete = 1;
+    }
+
+    my $term = $self->term_factory->create_object(-name => $name,
+                                                  -identifier => $termid,
+                                                  -definition => $def,
+                                                  -comment => $cmt,
+                                                  -dblinks => $dbxrefs,
+                                                  -is_obsolete => $obsolete);
+
+    return $term;
+} # _create_ont_entry
+
+
+
+# Holds whether first record or not
+sub _not_first_record {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_not_first_record" } = $value;
+    }
+
+    return $self->{ "_not_first_record" };
+} # _not_first_record
+
+
+
+# Holds whether done or not
+sub _done {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_done" } = $value;
+    }
+
+    return $self->{ "_done" };
+} # _done
+
+
+# Holds a term.
+sub _term {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_term" } = $value;
+    }
+
+    return $self->{ "_term" };
+} # _term
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/goflat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/goflat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/goflat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,160 @@
+# $Id: goflat.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO::goflat
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::goflat - a parser for the Gene Ontology flat-file format
+
+=head1 SYNOPSIS
+
+  use Bio::OntologyIO;
+
+  # do not use directly -- use via Bio::OntologyIO
+  my $parser = Bio::OntologyIO->new
+	( -format       => "go",
+     -defs_file    => "/home/czmasek/GO/GO.defs",
+	  -files        => ["/home/czmasek/GO/component.ontology",
+	                    "/home/czmasek/GO/function.ontology",
+	                    "/home/czmasek/GO/process.ontology"] );
+
+  my $go_ontology = $parser->next_ontology();
+
+  my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+  my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.
+
+This is essentially a very thin derivation of the dagflat parser.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head2 CONTRIBUTOR
+
+ Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package  Bio::OntologyIO::goflat;
+
+use strict;
+
+use Bio::Ontology::TermFactory;
+
+use constant TRUE         => 1;
+use constant FALSE        => 0;
+
+
+use base qw(Bio::OntologyIO::dagflat);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $parser = Bio::OntologyIO->new(
+                             -format => "go",
+                             -defs_file => "/path/to/GO.defs",
+                             -files => ["/path/to/component.ontology",
+                                        "/path/to/function.ontology",
+                                        "/path/to/process.ontology"] );
+ Function: Creates a new goflat parser.
+ Returns : A new goflat parser object, implementing Bio::OntologyIO.
+ Args    : -defs_file  => the name of the file holding the term
+                          definitions
+           -files      => a single ontology flat file holding the
+                          term relationships, or an array ref holding
+                          the file names (for GO, there will usually be
+                          3 files: component.ontology, function.ontology,
+                          process.ontology)
+           -file       => if there is only a single flat file, it may
+                          also be specified via the -file parameter
+           -ontology_name => the name of the ontology; if not specified the
+                          parser will auto-discover it by using the term
+                          that starts with a $, and converting underscores
+                          to spaces
+           -engine     => the Bio::Ontology::OntologyEngineI object
+                          to be reused (will be created otherwise); note
+                          that every Bio::Ontology::OntologyI will
+                          qualify as well since that one inherits from the
+                          former.
+
+See L<Bio::OntologyIO>.
+
+=cut
+
+# in reality, we let OntologyIO::new do the instantiation, and override
+# _initialize for all initialization work
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    $self->SUPER::_initialize( @args );
+
+    # default term object factory
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+					  -type => "Bio::Ontology::GOterm"))
+	unless $self->term_factory();
+
+} # _initialize
+
+  
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/obo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/obo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/obo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,786 @@
+# $Id: obo.pm,v 1.8.4.4 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO::obo
+#
+# Cared for by Sohel Merchant, s-merchant at northwestern.edu
+#
+# Copyright Sohel Merchant
+#
+# You may distribute this module under the same terms as perl itself
+
+
+=head1 NAME
+
+Bio::OntologyIO::obo - a parser for OBO flat-file format from Gene Ontology Consortium
+
+=head1 SYNOPSIS
+
+  use Bio::OntologyIO;
+
+  # do not use directly -- use via Bio::OntologyIO
+  my $parser = Bio::OntologyIO->new
+        ( -format       => "obo",
+          -file        =>  "gene_ontology.obo");
+
+  while(my $ont = $parser->next_ontology()) {
+  print "read ontology ",$ont->name()," with ",
+               scalar($ont->get_root_terms)," root terms, and ",
+               scalar($ont->get_all_terms)," total terms, and ",
+               scalar($ont->get_leaf_terms)," leaf terms\n";
+  }
+
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Sohel Merchant
+
+Email: s-merchant at northwestern.edu
+
+
+Address:
+
+  Northwestern University
+  Center for Genetic Medicine (CGM), dictyBase
+  Suite 1206,
+  676 St. Clair st
+  Chicago IL 60611
+
+=head2 CONTRIBUTOR
+
+ Hilmar Lapp, hlapp at gmx.net
+ Chris Mungall,   cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package  Bio::OntologyIO::obo;
+
+use strict;
+
+use Bio::Root::IO;
+use Bio::Ontology::OBOEngine;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::OntologyStore;
+use Bio::Ontology::TermFactory;
+use Bio::Annotation::Collection;
+use Data::Dumper;
+use Text::Balanced qw(extract_quotelike extract_bracketed);
+
+use constant TRUE  => 1;
+use constant FALSE => 0;
+
+use base qw(Bio::OntologyIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $parser = Bio::OntologyIO->new(
+                             -format => "obo",
+                             -file => "gene_ontology.obo");
+ Function: Creates a new dagflat parser.
+ Returns : A new dagflat parser object, implementing Bio::OntologyIO.
+ Args    : -file      => a single ontology flat file holding the
+                          terms, descriptions and relationships
+           -ontology_name => the name of the ontology; if not specified the
+                          parser will assign the name of the ontology as the
+                          default-namespace header value from the OBO file.
+           -engine     => the Bio::Ontology::OntologyEngineI object
+                          to be reused (will be created otherwise); note
+                          that every Bio::Ontology::OntologyI will
+                          qualify as well since that one inherits from the
+                          former.
+
+See L<Bio::OntologyIO>.
+
+=cut
+
+# in reality, we let OntologyIO::new do the instantiation, and override
+# _initialize for all initialization work
+sub _initialize {
+    my ( $self, %arg ) = @_;
+
+    my ( $file, $name, $eng ) = $self->_rearrange(
+        [
+            qw( FILE
+              ONTOLOGY_NAME
+              ENGINE)
+        ],
+        %arg
+    );
+
+    $self->SUPER::_initialize(%arg);
+    delete $self->{'_ontologies'};
+
+    # ontology engine (and possibly name if it's an OntologyI)
+    $eng = Bio::Ontology::OBOEngine->new() unless $eng;
+    if ( $eng->isa("Bio::Ontology::OntologyI") ) {
+        $self->ontology_name( $eng->name() );
+        $eng = $eng->engine() if $eng->can('engine');
+    }
+    $self->_ont_engine($eng);
+
+    $self->ontology_name($name) if $name;
+
+}    # _initialize
+
+=head2 ontology_name
+
+ Title   : ontology_name
+ Usage   : $obj->ontology_name($newval)
+ Function: Get/set the name of the ontology parsed by this module.
+ Example :
+ Returns : value of ontology_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub ontology_name {
+    my $self = shift;
+
+    return $self->{'ontology_name'} = shift if @_;
+    return $self->{'ontology_name'};
+}
+
+=head2 parse
+
+ Title   : parse()
+ Usage   : $parser->parse();
+ Function: Parses the files set with "new" or with methods
+           defs_file and _flat_files.
+
+           Normally you should not need to call this method as it will
+           be called automatically upon the first call to
+           next_ontology().
+
+ Returns : Bio::Ontology::OntologyEngineI
+ Args    :
+
+=cut
+
+sub parse {
+    my $self = shift;
+
+    # setup the default term factory if not done by anyone yet
+    $self->term_factory(
+        Bio::Ontology::TermFactory->new( -type => "Bio::Ontology::OBOterm" ) )
+      unless $self->term_factory();
+
+    ## Parse the file header
+    my $annotations_collection = $self->_header();
+
+    # create the default ontology object itself
+    my $ont = Bio::Ontology::Ontology->new(
+        -name   => $self->ontology_name(),
+        -engine => $self->_ont_engine()
+    );
+
+    ## Assign the file headers
+    $ont->annotation($annotations_collection);
+
+    # set up the ontology of the relationship types
+    foreach (
+        $self->_part_of_relationship(),
+        $self->_is_a_relationship(),
+        $self->_related_to_relationship()
+      )
+    {
+        $_->ontology($ont);
+    }
+
+##################################
+    $self->_add_ontology($ont);
+##################################
+
+    ### Adding new terms
+    while ( my $term = $self->_next_term() ) {
+
+        ### CHeck if the terms has a valid ID and NAME otherwise ignore the term
+        if ( !$term->identifier() || !$term->name() ) {
+            $self->throw( "OBO File Format Error on line "
+                  . $self->{'_current_line_no'}
+                  . " \nThe term does not have a id/name tag. This term will be ignored.\n"
+            );
+            next;
+        }
+
+        my $new_ontology_flag    = 1;
+        my $ontologies_array_ref = $self->{'_ontologies'};
+        foreach my $ontology (@$ontologies_array_ref) {
+            my ($oname, $t_ns) = ($ontology->name(), $term->namespace() );
+            next unless (defined($oname) && defined($t_ns));
+            if ( $oname eq $t_ns ) {
+                ### No need to create new ontology
+                $new_ontology_flag = 0;
+                $ont               = $ontology;
+            }
+        }
+
+        if ( $new_ontology_flag && $term->namespace() ) {
+            my $new_ont = Bio::Ontology::Ontology->new(
+                -name   => $term->namespace(),
+                -engine => $self->_ont_engine()
+            );
+            $new_ont->annotation($annotations_collection);
+            $self->_add_ontology($new_ont);
+            $ont = $new_ont;
+        }
+
+
+        $self->_add_term( $term, $ont );
+
+        #### Addding the IS_A relationship
+        my $isa_parents_array_ref = $self->{'_isa_parents'};
+        foreach my $parent_term (@$isa_parents_array_ref) {
+            ### Check if parent exist, if not then add the term to the graph.
+            if ( !( $self->_has_term($parent_term) ) ) {
+                $self->_add_term( $parent_term, $ont );
+            }
+
+            $self->_add_relationship( $parent_term, $term,
+                $self->_is_a_relationship(), $ont );
+        }
+
+        #### Addding the other relationships like part_of, realted_to, develpos_from
+        my $relationship_hash_ref = $self->{'_relationships'};
+        foreach my $relationship ( keys %$relationship_hash_ref ) {
+            my $reltype;
+            #### Check if relationship exist, if not add it.
+            if ( $self->_ont_engine->get_relationship_type($relationship) ) {
+                $reltype =
+                  $self->_ont_engine->get_relationship_type($relationship);
+            }
+            else {
+                $self->_ont_engine->add_relationship_type( $relationship,
+                    $ont );
+                $reltype =
+                  $self->_ont_engine->get_relationship_type($relationship);
+            }
+
+            #### Check if the id already exist in the graph
+            my $id_array_ref = $$relationship_hash_ref{$relationship};
+            foreach my $id (@$id_array_ref) {
+                my $parent_term = $self->_create_term_object();
+                $parent_term->identifier($id);
+                $parent_term->ontology($ont);
+
+                if ( !( $self->_has_term($parent_term) ) ) {
+                    $self->_add_term( $parent_term, $ont );
+                }
+
+                $self->_add_relationship( $parent_term, $term, $reltype, $ont );
+            }
+
+        }
+
+    }
+    return $self->_ont_engine();
+}    # parse
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   :
+ Function: Get the next available ontology from the parser. This is the
+           method prescribed by Bio::OntologyIO.
+ Example :
+ Returns : An object implementing Bio::Ontology::OntologyI, and nothing if
+           there is no more ontology in the input.
+ Args    :
+
+
+=cut
+
+sub next_ontology {
+    my $self = shift;
+
+    # parse if not done already
+    $self->parse() unless exists( $self->{'_ontologies'} );
+
+    # return next available ontology
+    if ( exists( $self->{'_ontologies'} ) ) {
+        my $ont = shift( @{ $self->{'_ontologies'} } );
+        if ($ont) {
+            my $store = Bio::Ontology::OntologyStore->new();
+            $store->register_ontology($ont);
+
+            return $ont;
+        }
+    }
+    return;
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Closes this ontology stream and associated file handles.
+
+           Clients should call this method especially when they write
+           ontologies.
+
+           We need to override this here in order to close the file
+           handle for the term definitions file.
+
+ Example :
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub close {
+    my $self = shift;
+
+    # first call the inherited implementation
+    $self->SUPER::close();
+}
+
+# INTERNAL METHODS
+# ----------------
+
+sub _add_ontology {
+    my $self = shift;
+    $self->{'_ontologies'} = [] unless exists( $self->{'_ontologies'} );
+    foreach my $ont (@_) {
+        $self->throw(
+            ref($ont) . " does not implement Bio::Ontology::OntologyI" )
+          unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
+
+        # the ontology name may have been auto-discovered while parsing
+        # the file
+        $ont->name( $self->ontology_name ) unless $ont->name();
+        push( @{ $self->{'_ontologies'} }, $ont );
+    }
+}
+
+# This simply delegates. See OBOEngine.
+sub _add_term {
+    my ( $self, $term, $ont ) = @_;
+    $term->ontology($ont) if $ont && ( !$term->ontology );
+    $self->_ont_engine()->add_term($term);
+}    # _add_term
+
+# This simply delegates. See OBOEngine
+sub _part_of_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->part_of_relationship(@_);
+}    # _part_of_relationship
+
+# This simply delegates. See OBOEngine
+sub _is_a_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->is_a_relationship(@_);
+}    # _is_a_relationship
+
+# This simply delegates. See OBOEngine
+sub _related_to_relationship {
+    my $self = shift;
+
+    return $self->_ont_engine()->related_to_relationship(@_);
+}    # _is_a_relationship
+
+# This simply delegates. See OBOEngine
+sub _add_relationship {
+    my ( $self, $parent, $child, $type, $ont ) = @_;
+
+    # note the triple terminology (subject,predicate,object) corresponds to
+    # (child,type,parent)
+    $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
+
+}    # _add_relationship
+
+# This simply delegates. See OBOEngine
+sub _has_term {
+    my $self = shift;
+
+    return $self->_ont_engine()->has_term(@_);
+}    # _add_term
+
+# Holds the OBO engine to be parsed into
+sub _ont_engine {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{"_ont_engine"} = $value;
+    }
+
+    return $self->{"_ont_engine"};
+}    # _ont_engine
+
+# Removes the escape chracters from the file
+sub _filter_line {
+    my ( $self, $line ) = @_;
+
+    chomp($line);
+    $line =~ tr [\200-\377]
+          [\000-\177];    # see 'man perlop', section on tr/
+                          # weird ascii characters should be excluded
+    $line =~ tr/\0-\10//d;     # remove weird characters; ascii 0-8
+                               # preserve \11 (9 - tab) and \12 (10-linefeed)
+    $line =~ tr/\13\14//d;     # remove weird characters; 11,12
+                               # preserve \15 (13 - carriage return)
+    $line =~ tr/\16-\37//d;    # remove 14-31 (all rest before space)
+    $line =~ tr/\177//d;       # remove DEL character
+
+    $line =~ s/^\!.*//;
+    $line =~ s/[^\\]\!.*//;
+    $line =~ s/[^\\]\#.*//;
+    $line =~ s/^\s+//;
+    $line =~ s/\s+$//;
+
+    return $line;
+}
+
+# Parses the header
+sub _header {
+    my $self                  = shift;
+    my $annotation_collection = new Bio::Annotation::Collection();
+    my ( $tag, $value );
+    my $line_counter = 0;
+    $self->{'_current_line_no'} = 0;
+    my $format_version_header_flag = 0;
+    my $default_namespace_header_flag = 0;
+
+    while ( my $line = $self->_readline() ) {
+        ++$line_counter;
+        my $line = $self->_filter_line($line);
+        if ( !$line ) {
+            if ( !$format_version_header_flag || !$default_namespace_header_flag) {
+                $self->throw(
+"OBO File Format Error - \nCannot find tag format-version and/ default-namespace . These are required header.\n"
+                );
+            }
+
+            $self->{'_current_line_no'} = $line_counter;
+            return $annotation_collection;
+        }
+
+        ### CHeck if there is a header
+        if($line =~ /\[\w*\]/) {
+                $self->throw(
+"OBO File Format Error - \nCannot find tag format-version. Thi ia a required header.\n"
+                );
+          }
+
+
+        ### If the line is not null, check it contains atleasdt one colon
+        $self->_check_colon( $line, $line_counter );
+
+        ### Thsse ar the allowed headers. Any other headers will be ignored
+        if ( $line =~
+/^(\[|format-version:|typeref:|version:|date:|saved-by:|auto-generated-by:|default-namespace:|remark:|subsetdef:)/
+          )
+        {
+            if ( $line =~ /^([\w\-]+)\:\s*(.*)/ ) {
+                ( $tag, $value ) = ( $1, $2 );
+            }
+
+            if ( $tag =~ /format-version/) {
+                $format_version_header_flag = 1;
+            }elsif( $tag =~ /default-namespace/ ) {
+                $default_namespace_header_flag = 1;
+            }
+
+            my $header = new Bio::Annotation::SimpleValue( -value => $value );
+            $annotation_collection->add_Annotation( $tag, $header );
+
+            #### Assign the Ontology name as the value of the default-namespace header
+            if ( $tag =~ /default-namespace/i ) {
+
+                $self->ontology_name($value);
+            }
+
+        }
+
+    }
+
+}
+
+### Parses each stanza of the file
+sub _next_term {
+    my $self             = shift;
+    my $term             ;
+    my $skip_stanza_flag = 1;
+    my $line_counter     = $self->{'_current_line_no'};
+
+    while ( my $line = $self->_readline() ) {
+        #print $line."\n";
+        ++$line_counter;
+        my $line = $self->_filter_line($line);
+        if ( !$line && $term ) {
+            $self->{'_current_line_no'} = $line_counter;
+            return $term;
+        }
+
+        if ( ( $line =~ /^\[(\w+)\]\s*(.*)/ ) ) {    #New stanza
+
+            if ( uc($1) eq "TERM" ) {
+
+                $term             = $self->_create_term_object;
+                $skip_stanza_flag = 0;
+                ### Reset the relationships after each stanza
+                $self->{'_relationships'} = {};
+                $self->{'_isa_parents'}   = undef;
+            }
+            elsif ( uc($1) eq "TYPEDEF" ) {
+                $skip_stanza_flag = 1;
+                ### Check if this typedef is already defined by the relationship
+            }
+            else {
+                $skip_stanza_flag = 1;
+                $self->warn(
+"OBO File Format Warning on line $line_counter $line \nUnrecognized stanza type found. Skipping this stanza.\n"
+                );
+            }
+            next;
+        }
+
+        ### If the line is not null, check it contains atleasdt one colon
+        $self->_check_colon( $line, $line_counter );
+
+        ### if there is any tag value other thn the list below move to the next tag
+        next
+          if (
+            (
+                $line !~
+/^(\[|id:|name:|is_a:|relationship:|namespace:|is_obsolete:|alt_id:|def:|xref_analog:|exact_synonym:|broad_synonym:|related_synonym:|synonym:|comment:)/
+            )
+            || $skip_stanza_flag
+          );
+
+        if ( $line =~ /^([\w\-]+)\:\s*(.*)/ ) {    #TAg Value pair
+            my ( $tag, $val ) = ( $1, $2 );
+
+            ### If no value for the tag thrown a warning
+            if ( !$val ) {
+                $self->warn(
+"OBO File Format Warning on line $line_counter $line \nTag has no value\n"
+                );
+            }
+
+            my $qh;
+            ( $val, $qh ) = $self->_extract_quals($val);
+            my $val2 = $val;
+            $val2 =~ s/\\,/,/g;
+            $tag = uc($tag);
+            if ( $tag eq "ID" ) {
+
+                $term->identifier($val);
+                if ( $self->_has_term($term) ) {
+                    $term = $self->_ont_engine()->get_terms($val);
+                }
+
+            }
+            elsif ( $tag eq "NAME" ) {
+                $term->name($val);
+            }
+            elsif ( $tag eq "XREF_ANALOG" ) {
+                if ( !$term->has_dblink($val) ) {
+                    $term->add_dblink($val);
+                }
+            }
+            elsif ( $tag eq "XREF_UNKNOWN" ) {
+                $term->add_dblink($val);
+            }
+            elsif ( $tag eq "NAMESPACE" ) {
+                $term->namespace($val);
+            }
+            elsif ( $tag eq "DEF" ) {
+                my ( $defstr, $parts ) = $self->_extract_qstr($val);
+                $term->definition($defstr);
+                $term->add_dblink(@$parts);
+            }
+            elsif ( $tag =~ /(\w*)synonym/i ) {
+                $val =~ s/['"\[\]]//g;
+                $term->add_synonym($val);
+            }
+            elsif ( $tag eq "ALT_ID" ) {
+                $term->add_secondary_id($val);
+            }
+            elsif ( $tag eq "IS_OBSOLETE" ) {
+
+                if ( $val eq 'true' ) {
+                    $val = 1;
+                }
+                if ( $val eq 'false' ) {
+                    $val = 0;
+                }
+                $term->is_obsolete($val);
+            }
+            elsif ( $tag eq "COMMENT" ) {
+                $term->comment($val);
+            }
+            elsif ( $tag eq "RELATIONSHIP" ) {
+                $self->_handle_relationship_tag($val);
+            }
+            elsif ( $tag eq "IS_A" ) {
+
+                $val =~ s/ //g;
+                my $parent_term = $self->_create_term_object();
+                $parent_term->identifier($val);
+
+                if ( $self->{'_isa_parents'} ) {
+                    my $isa_parents_array_ref = $self->{'_isa_parents'};
+                    push( @$isa_parents_array_ref, $parent_term );
+                }
+                else {
+                    my @terms_array;
+                    push( @terms_array, $parent_term );
+                    $self->{'_isa_parents'} = \@terms_array;
+                }
+            }
+        }
+    }
+    return $term;
+}
+
+# Creates a Bio::Ontology::OBOterm object
+sub _create_term_object {
+
+    my ($self) = @_;
+    my $term = $self->term_factory->create_object();
+    return $term;
+
+}
+
+#
+sub _extract_quals {
+    my ( $self, $str ) = @_;
+
+    my %q = ();
+    if ( $str =~ /(.*)\s+(\{.*\})\s*$/ ) {
+        my $return_str = $1;
+        my $extr       = $2;
+        if ($extr) {
+            my @qparts = $self->_split_on_comma($extr);
+            foreach (@qparts) {
+                if (/(\w+)=\"(.*)\"/) {
+                    $q{$1} = $2;
+                }
+                elsif (/(\w+)=\'(.*)\'/) {
+                    $q{$1} = $2;
+                }
+                else {
+                    warn("$_ in $str");
+                }
+            }
+        }
+        return ( $return_str, \%q );
+    }
+    else {
+        return ( $str, {} );
+    }
+}
+#
+sub _extract_qstr {
+    my ( $self, $str ) = @_;
+
+    my ( $extr, $rem, $prefix ) = extract_quotelike($str);
+    my $txt = $extr;
+    $txt =~ s/^\"//;
+    $txt =~ s/\"$//;
+    if ($prefix) {
+        warn("illegal prefix: $prefix in: $str");
+    }
+
+    my @extra = ();
+
+    # eg synonym: "foo" EXACT [...]
+    if ( $rem =~ /(\w+)\s+(\[.*)/ ) {
+        $rem = $2;
+        push( @extra, split( ' ', $1 ) );
+    }
+
+    my @parts = ();
+    while ( ( $extr, $rem, $prefix ) = extract_bracketed( $rem, '[]' ) ) {
+        last unless $extr;
+        $extr =~ s/^\[//;
+        $extr =~ s/\]$//;
+        push( @parts, $extr ) if $extr;
+    }
+    @parts =
+      map { $self->_split_on_comma($_) } @parts;
+
+    $txt =~ s/\\//g;
+    return ( $txt, \@parts, \@extra );
+}
+#
+sub _split_on_comma {
+    my ( $self, $str ) = @_;
+    my @parts = ();
+    while ( $str =~ /(.*[^\\],\s*)(.*)/ ) {
+        $str = $1;
+        my $part = $2;
+        unshift( @parts, $part );
+        $str =~ s/,\s*$//;
+    }
+    unshift( @parts, $str );
+    return map { s/\\//g; $_ } @parts;
+}
+
+# This method checks for an existing colon in a line
+sub _check_colon {
+    my ( $self, $line, $line_no ) = @_;
+    if ( $line && !( $line =~ /:/ ) ) {
+        $self->throw(
+"OBO File Format Error on line $line_no $line - \nCannot find key-terminating colon\n"
+        );
+    }
+}
+
+# This method handles relationship tags
+sub _handle_relationship_tag {
+    my ( $self, $val ) = @_;
+    my @parts        = split( / /, $val );
+    my $relationship = uc($parts[0]);
+    my $id           = $parts[1];
+    my $parent_term  = $self->_create_term_object();
+    $parent_term->identifier($id);
+
+    if ( my $realtionships_hash = $self->{'_relationships'} ) {
+        my $id_array_ref = $$realtionships_hash{$relationship};
+        if ( !$id_array_ref ) {
+            my @ids;
+            push( @ids, $id );
+            $$realtionships_hash{$relationship} = \@ids;
+
+        }
+        else {
+            push( @$id_array_ref, $id );
+        }
+
+    }
+
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/simplehierarchy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/simplehierarchy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/simplehierarchy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,684 @@
+# $Id: simplehierarchy.pm,v 1.17.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO::simplehierarchy
+#
+# Cared for by Allen Day, allenday at ucla.edu
+#
+# (c) Allen Day, allenday at ucla.edu, 2003.
+# (c) Department of Human Genetics, UCLA Medical School, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::simplehierarchy - a base class parser for simple hierarchy-by-indentation
+                  type formats
+
+=head1 SYNOPSIS
+
+  use Bio::OntologyIO;
+
+  # do not use directly -- use via Bio::OntologyIO
+  my $parser = Bio::OntologyIO->new
+        ( -format        => "simplehierarchy",
+          -file          => "pathology_terms.csv",
+          -indent_string => ",",
+          -ontology_name => "eVOC",
+          -term_factory  => $fact,
+        );
+
+  my $ontology = $parser->next_ontology();
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.  This class is nearly identical to
+OntologyIO::dagflat, see L<Bio::OntologyIO::dagflat> for details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day
+
+Email: allenday at ucla.edu
+
+=head2 CONTRIBUTOR
+
+Christian Zmasek
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package  Bio::OntologyIO::simplehierarchy;
+
+use strict;
+
+use Data::Dumper;
+use File::Basename;
+use Bio::Root::IO;
+use Bio::Ontology::OBOEngine;
+use Bio::Ontology::Ontology;
+use Bio::Ontology::TermFactory;
+
+use constant TRUE         => 1;
+use constant FALSE        => 0;
+
+
+use base qw(Bio::OntologyIO);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : see SYNOPSIS
+ Function: Creates a new simplehierarchy parser.
+ Returns : A new simplehierarchy parser object, implementing Bio::OntologyIO.
+ Args    : -files         => a single ontology flat file holding the
+                             term relationships, or an array ref holding
+                             the file names
+           -file          => if there is only a single flat file, it may
+                             also be specified via the -file parameter
+           -ontology_name => the name of the ontology, defaults to
+                             "Gene Ontology"
+           -file_is_root  => Boolean indicating whether a virtual root
+                             term is to be added, the name of which will
+                             be derived from the file name. Default is false.
+                             Enabling this allows to parse multiple input
+                             files into the same ontology and still have
+                             separately rooted.
+           -engine        => the L<Bio::Ontology::OntologyEngineI> object
+                             to be reused (will be created otherwise); note
+                             that every L<Bio::Ontology::OntologyI> will
+                             qualify as well since that one inherits from the
+                             former.
+           -indent_string => the string used to indent hierarchical
+                             levels in the file.
+
+                             For a file like this:
+
+                             term0
+                               subterm1A
+                                 subterm2A
+                               subterm1B
+                               subterm1C
+
+                             indent_string would be "  ".  Defaults to
+                             one space (" ").
+           -comment_char  => Allows specification of a regular
+                             expression string to indicate a comment line.
+                             Currently defaults to "[\|\-]".
+                             Note: this is not yet implemented.
+
+See L<Bio::OntologyIO>.
+
+=cut
+
+# in reality, we let OntologyIO::new do the instantiation, and override
+# _initialize for all initialization work
+sub _initialize {
+    my ($self, @args) = @_;
+    $self->SUPER::_initialize( @args );
+
+    my ( $indent,$files,$fileisroot,$name,$eng ) =
+        $self->_rearrange([qw(INDENT_STRING
+                              FILES
+                              FILE_IS_ROOT
+                              ONTOLOGY_NAME
+                              ENGINE)
+                           ], @args);
+
+    $self->_done( FALSE );
+    $self->_not_first_record( FALSE );
+    $self->_term( "" );
+    $self->file_is_root($fileisroot) if defined($fileisroot);
+    $indent = ' ' unless defined($indent); #reasonable default?
+    # the indentation string may have escaped chars
+    if (($indent =~ /\\/) && ($indent !~ /[\$\`]/)) {
+        $indent = "\$indent = \"$indent\"";
+        eval $indent;
+    }
+    $self->indent_string($indent);
+    delete $self->{'_ontologies'};
+
+    # ontology engine (and possibly name if it's an OntologyI)
+    $eng = Bio::Ontology::OBOEngine->new() unless $eng;
+    if($eng->isa("Bio::Ontology::OntologyI")) {
+        $self->ontology_name($eng->name());
+        $eng = $eng->engine() if $eng->can('engine');
+    }
+    $self->_ont_engine($eng);
+
+    # flat files to parse
+    $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
+
+    # ontology name (overrides implicit one through OntologyI engine)
+    $self->ontology_name($name) if $name;
+
+} # _initialize
+
+=head2 ontology_name
+
+ Title   : ontology_name
+ Usage   : $obj->ontology_name($newval)
+ Function: Get/set the name of the ontology parsed by this module.
+ Example :
+ Returns : value of ontology_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub ontology_name{
+    my $self = shift;
+
+    return $self->{'ontology_name'} = shift if @_;
+    return $self->{'ontology_name'};
+}
+
+
+=head2 parse
+
+ Title   : parse()
+ Usage   : $parser->parse();
+ Function: Parses the files set with "new" or with methods
+           defs_file and _flat_files.
+
+           Normally you should not need to call this method as it will
+           be called automatically upon the first call to
+           next_ontology().
+
+ Returns : [Bio::Ontology::OntologyEngineI]
+ Args    :
+
+=cut
+
+sub parse {
+    my $self = shift;
+
+    # setup the default term factory if not done by anyone yet
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+                                             -type => "Bio::Ontology::Term"))
+        unless $self->term_factory();
+
+    # create the ontology object itself
+    my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
+                                           -engine => $self->_ont_engine());
+
+    # set up the ontology of the relationship types
+    foreach ($self->_part_of_relationship(),
+             $self->_is_a_relationship(),
+             $self->_related_to_relationship()) {
+        $_->ontology($ont);
+    }
+
+    # pre-seed the IO system with the first flat file if -file wasn't provided
+    if(! $self->_fh) {
+        $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
+    }
+
+    while($self->_fh) {
+          $self->_parse_flat_file($ont);
+          # advance to next flat file if more are available
+          if(@{$self->_flat_files()}) {
+            $self->close();
+            # reset the virtual root so that the next one is generated from
+            # the next file
+            $self->_virtual_root(undef);
+            # now re-initialize the IO object
+            $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
+          } else {
+            last; # nothing else to parse so terminate the loop
+          }
+    }
+    $self->_add_ontology($ont);
+    # not needed anywhere, only because of backward compatibility
+    return $self->_ont_engine();
+} # parse
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   :
+ Function: Get the next available ontology from the parser. This is the
+           method prescribed by Bio::OntologyIO.
+ Example :
+ Returns : An object implementing Bio::Ontology::OntologyI, and undef if
+           there is no more ontology in the input.
+ Args    :
+
+=cut
+
+sub next_ontology{
+    my $self = shift;
+
+    # parse if not done already
+    $self->parse() unless exists($self->{'_ontologies'});
+    # return next available ontology
+    return shift(@{$self->{'_ontologies'}}) if exists($self->{'_ontologies'});
+    return;
+}
+
+=head2 _flat_files
+
+ Title   : _flat_files
+ Usage   : $files_to_parse = $parser->_flat_files();
+ Function: Get the array of ontology flat files that need to be parsed.
+
+           Note that this array will decrease in elements over the
+           parsing process. Therefore, it\'s value outside of this
+           module will be limited. Also, be careful not to alter the
+           array unless you know what you are doing.
+
+ Returns : a reference to an array of zero or more strings
+ Args    : none
+
+=cut
+
+sub _flat_files {
+    my $self = shift;
+
+    $self->{_flat_files} = [] unless exists($self->{_flat_files});
+    return $self->{_flat_files};
+}
+
+
+# INTERNAL METHODS
+# ----------------
+
+=head2 _defs_io
+
+ Title   : _defs_io
+ Usage   : $obj->_defs_io($newval)
+ Function: Get/set the Bio::Root::IO instance representing the
+           definition file, if provided (see defs_file()).
+ Example :
+ Returns : value of _defs_io (a Bio::Root::IO object)
+ Args    : on set, new value (a Bio::Root::IO object or undef, optional)
+
+=cut
+
+sub _defs_io{
+    my $self = shift;
+
+    return $self->{'_defs_io'} = shift if @_;
+    return $self->{'_defs_io'};
+}
+
+sub _add_ontology {
+    my $self = shift;
+    $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
+    foreach my $ont (@_) {
+        $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
+            unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
+        push(@{$self->{'_ontologies'}}, $ont);
+    }
+}
+
+# This simply delegates. See SimpleGOEngine.
+sub _add_term {
+    my ( $self, $term, $ont ) = @_;
+
+    $term->ontology($ont) if $ont && (! $term->ontology);
+    $self->_ont_engine()->add_term( $term );
+
+
+} # _add_term
+
+# This simply delegates. See SimpleGOEngine
+sub _part_of_relationship {
+    my ( $self, $term ) = @_;
+
+    return $self->_ont_engine()->part_of_relationship();
+
+} # _part_of_relationship
+
+
+
+# This simply delegates. See SimpleGOEngine
+sub _is_a_relationship {
+    my ( $self, $term ) = @_;
+
+    return $self->_ont_engine()->is_a_relationship();
+
+} # _is_a_relationship
+
+
+# This simply delegates. See SimpleGOEngine
+sub _related_to_relationship {
+    my ( $self, $term ) = @_;
+
+    return $self->_ont_engine()->related_to_relationship();
+
+} # _is_a_relationship
+
+
+# This simply delegates. See SimpleGOEngine
+sub _add_relationship {
+    my ( $self, $parent, $child, $type, $ont ) = @_;
+
+    # note the triple terminology (subject,predicate,object) corresponds to
+    # (child,type,parent)
+    $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
+
+
+} # _add_relationship
+
+
+# This simply delegates. See SimpleGOEngine
+sub _has_term {
+    my ( $self, $term ) = @_;
+
+    $term = $self->ontology_name() .'|'. $term
+        unless ref($term) || !$self->ontology_name();
+    return $self->_ont_engine()->has_term( $term );
+
+} # _add_term
+
+# This simply delegates after prefixing the namespace name if it is just a
+# base identifier. See SimpleGOEngine
+sub _get_terms{
+    my $self = shift;
+    my @args = ();
+
+    while(@_) {
+        unshift(@args, pop(@_)); # this actually does preserve the order
+        $args[0] = $self->ontology_name() .'|'. $args[0]
+            unless ref($args[0]) || !$self->ontology_name();
+    }
+    return $self->_ont_engine->get_terms(@args);
+}
+
+
+# This parses the relationships files
+sub _parse_flat_file {
+  my $self = shift;
+  my $ont  = shift;
+
+  my @stack       = ();
+  my $prev_indent = -1;
+  my $parent      = "";
+  my $prev_term   = "";
+
+  my $indent_string = $self->indent_string;
+
+
+  while ( my $line = $self->_readline() ) {
+        if ( $line =~ /^[$indent_string]*[\|\-]/ ) { #this is not yet generalized
+          next;
+        }
+
+        my ($current_term) = $line =~ /^[$indent_string]*(.*)/;
+        my $current_indent = $self->_count_indents( $line );
+        chomp $current_term;
+        # remove extraneous delimiter characters at the end of the name if any
+        $current_term =~ s/[$indent_string]+$//;
+        # remove double quotes surrounding the entry, if any
+        $current_term =~ s/^\"(.*)\"$/$1/;
+        # also, the name might contain a synonym
+        my $syn = $current_term =~ s/\s+{([^}]+)}// ? $1 : undef;
+
+         if ( ! $self->_has_term( $current_term ) ) {
+           my $term = $self->_create_ont_entry($current_term);
+          # add synonym(s) if any
+          $term->add_synonym(split(/[;,]\s*/,$syn)) if $syn;
+          # add to the machine
+           $self->_add_term( $term, $ont );
+
+          #go on to the next term if a root node.
+          if($current_indent == 0) {
+              # add the virtual root as parent if there is one
+              if($self->_virtual_root()) {
+                  $self->_add_relationship($self->_virtual_root(),
+                                           $term,
+                                           $self->_is_a_relationship(),
+                                           $ont);
+              }
+              $prev_indent = $current_indent;
+              $prev_term = $current_term;
+              push @stack, $current_term;
+              next;
+          }
+         }
+
+        # note: we are ensured to see the parent first in this type of file,
+        # so we never need to possibly insert the parent here
+
+         if ( $current_indent != $prev_indent  ) {
+           if ( $current_indent == $prev_indent + 1 ) {
+                 push( @stack, $prev_term );
+           } elsif ( $current_indent < $prev_indent ) {
+                 my $n = $prev_indent -  $current_indent;
+                 for ( my $i = 0; $i < $n; ++$i ) {
+                   pop( @stack );
+                 }
+           } else {
+                 $self->throw("format error: indentation level $current_indent "
+                             ."is more than one higher than the previous "
+                             ."level $prev_indent ('$current_term', "
+                             ."file ".$self->file.")" );
+           }
+         }
+
+         $parent = $stack[-1];
+
+        if($parent ne $current_term) { #this prevents infinite recursion from a parent linking to itself
+          $self->_add_relationship($self->_get_terms($parent),
+                                   $self->_get_terms($current_term),
+                                   $self->_is_a_relationship(),
+                                   $ont);
+        }
+
+        $prev_indent = $current_indent;
+        $prev_term   = $current_term;
+  }
+  return $ont;
+} # _parse_relationships_file
+
+
+
+# Parses the 1st term id number out of line.
+sub _get_first_termid {
+    my ( $self, $line ) = @_;
+
+    if ( $line =~ /;\s*([A-Z]{1,8}:\d{7})/ ) {
+        return $1;
+    }
+    else {
+        $self->throw( "format error: no term id in line \"$line\"" );
+    }
+
+} # _get_first_termid
+
+# Counts the indents at the beginning of a line in the relationships files
+sub _count_indents {
+  my ( $self, $line ) = @_;
+
+  my $indent = $self->indent_string;
+
+  if ( $line =~ /^($indent+)/ ) {
+        return (length($1)/length($indent));
+  }
+  else {
+        return 0;
+  }
+} # _count_indents
+
+
+# Holds the GO engine to be parsed into
+sub _ont_engine {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_ont_engine" } = $value;
+    }
+
+    return $self->{ "_ont_engine" };
+} # _ont_engine
+
+
+# Used to create ontology terms.
+# Arguments: name, id
+sub _create_ont_entry {
+    my ( $self, $name, $termid ) = @_;
+
+    my $term = $self->term_factory->create_object(-name => $name,
+                                                  -identifier => $termid);
+
+    return $term;
+
+} # _create_ont_entry
+
+
+# Holds whether first record or not
+sub _not_first_record {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( $value == FALSE || $value == TRUE ) {
+            $self->throw( "Argument to method \"_not_first_record\" must be either ".TRUE." or ".FALSE );
+        }
+        $self->{ "_not_first_record" } = $value;
+    }
+
+    return $self->{ "_not_first_record" };
+} # _not_first_record
+
+
+
+# Holds whether done or not
+sub _done {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( $value == FALSE || $value == TRUE ) {
+            $self->throw( "Found [$value] where [" . TRUE
+            ." or " . FALSE . "] expected" );
+        }
+        $self->{ "_done" } = $value;
+    }
+
+    return $self->{ "_done" };
+} # _done
+
+
+# Holds a term.
+sub _term {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_term" } = $value;
+    }
+
+    return $self->{ "_term" };
+} # _term
+
+=head2 indent_string
+
+ Title   : indent_string
+ Usage   : $obj->indent_string($newval)
+ Function:
+ Example :
+ Returns : value of indent_string (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub indent_string{
+    my $self = shift;
+
+    return $self->{'indent_string'} = shift if @_;
+    return $self->{'indent_string'};
+}
+
+=head2 file_is_root
+
+ Title   : file_is_root
+ Usage   : $obj->file_is_root($newval)
+ Function: Boolean indicating whether a virtual root term is to be
+           added, the name of which will be derived from the file
+           name.
+
+           Enabling this allows to parse multiple input files into the
+           same ontology and still have separately rooted.
+
+ Example :
+ Returns : value of file_is_root (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub file_is_root{
+    my $self = shift;
+
+    return $self->{'file_is_root'} = shift if @_;
+    return $self->{'file_is_root'};
+}
+
+=head2 _virtual_root
+
+ Title   : _virtual_root
+ Usage   : $obj->_virtual_root($newval)
+ Function:
+ Example :
+ Returns : value of _virtual_root (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub _virtual_root{
+    my $self = shift;
+
+    return $self->{'_virtual_root'} = shift if @_;
+
+    # don't return anything if not in file_is_root mode, or if we don't
+    # have a file to derive the root node from
+    return unless $self->file_is_root() && $self->file();
+
+    # construct it if we haven't done this before
+    if(! $self->{'_virtual_root'}) {
+        my ($rt,undef,undef) = fileparse($self->file(), '\..*');
+        $rt =~ s/_/ /g;
+        $rt = $self->_create_ont_entry($rt);
+        $self->_add_term($rt, $self->ontology_name());
+        $self->{'_virtual_root'} = $rt;
+    }
+
+    return $self->{'_virtual_root'};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/simplehierarchy.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/soflat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/soflat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO/soflat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,154 @@
+# $Id: soflat.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO::soflat
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) Hilmar Lapp, hlapp at gnf.org, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002-3.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO::soflat - a parser for the Sequence Ontology flat-file format
+
+=head1 SYNOPSIS
+
+  use Bio::OntologyIO;
+
+  # do not use directly -- use via Bio::OntologyIO
+  my $parser = Bio::OntologyIO->new
+	( -format       => "so", # or soflat
+     -defs_file    => "/home/czmasek/SO/SO.defs",
+	  -file         => "/home/czmasek/SO/sofa.ontology" );
+
+  my $sofa_ontology = $parser->next_ontology();
+
+  my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+  my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+
+=head1 DESCRIPTION
+
+Needs Graph.pm from CPAN.
+
+This is essentially a very thin derivation of the dagflat base-parser.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head2 CONTRIBUTOR
+
+ Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package  Bio::OntologyIO::soflat;
+
+use strict;
+
+use Bio::Ontology::TermFactory;
+
+use constant TRUE         => 1;
+use constant FALSE        => 0;
+
+
+use base qw(Bio::OntologyIO::dagflat);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $parser = Bio::OntologyIO->new(
+                             -format => "soflat",
+                             -files => ["/path/to/sofa.ontology"] );
+ Function: Creates a new soflat parser.
+ Returns : A new soflat parser object, implementing Bio::OntologyIO.
+ Args    : -defs_file  => the name of the file holding the term
+                          definitions
+           -files      => a single ontology flat file holding the
+                          term relationships, or an array ref holding
+                          the file names
+           -file       => if there is only a single flat file, it may
+                          also be specified via the -file parameter
+           -ontology_name => the name of the ontology; if not specified the
+                          parser will auto-discover it by using the term
+                          that starts with a $, and converting underscores
+                          to spaces
+           -engine     => the Bio::Ontology::OntologyEngineI object
+                          to be reused (will be created otherwise); note
+                          that every Bio::Ontology::OntologyI will
+                          qualify as well since that one inherits from the
+                          former.
+
+See L<Bio::Ontology::OntologyI>.
+
+=cut
+
+# in reality, we let OntologyIO::new do the instantiation, and override
+# _initialize for all initialization work
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    $self->SUPER::_initialize( @args );
+
+    # default term object factory
+    $self->term_factory(Bio::Ontology::TermFactory->new(
+					  -type => "Bio::Ontology::GOterm"))
+	unless $self->term_factory();
+
+} # _initialize
+
+  
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/OntologyIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,303 @@
+# $Id: OntologyIO.pm,v 1.12.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::OntologyIO
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2003.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::OntologyIO - Parser factory for Ontology formats
+
+=head1 SYNOPSIS
+
+    use Bio::OntologyIO;
+
+    my $parser = Bio::OntologyIO->new(-format => "go",
+                                      -file=> $file);
+
+    while(my $ont = $parser->next_ontology()) {
+         print "read ontology ",$ont->name()," with ",
+               scalar($ont->get_root_terms)," root terms, and ",
+               scalar($ont->get_leaf_terms)," leaf terms\n";
+    }
+
+=head1 DESCRIPTION
+
+This is the parser factory for different ontology sources and
+formats. Conceptually, it is very similar to L<Bio::SeqIO>, but the
+difference is that the chunk of data returned as an object is an
+entire ontology.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::OntologyIO;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+#
+# Maps from format name to driver suitable for the format.
+#
+my %format_driver_map = (
+                         "go"          => "goflat",
+                         "so"          => "soflat",
+                         "interpro"    => "InterProParser",
+                         "interprosax" => "Handlers::InterPro_BioSQL_Handler",
+                         "evoc"        => "simplehierarchy",
+                         "obo"        => "obo"
+                         );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
+ Function: Returns a stream of ontologies opened on the specified input
+           for the specified format.
+ Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
+           for the specified format.
+ Args    : Named parameters. Common parameters are
+
+              -format    - the format of the input; the following are
+                           presently supported:
+                  goflat: DAG-Edit Gene Ontology flat files
+                  go    : synonymous to goflat
+                  soflat: DAG-Edit Sequence Ontology flat files
+                  so    : synonymous to soflat
+                  simplehierarchy: text format with one term per line
+                          and indentation giving the hierarchy
+                  evoc  : synonymous to simplehierarchy
+                  interpro: InterPro XML
+                  interprosax: InterPro XML - this is actually not a
+                          Bio::OntologyIO compliant parser; instead it
+                          persists terms as they are encountered.
+                          L<Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler>
+                  obo   : OBO format style from Gene Ontology Consortium
+              -file      - the file holding the data
+              -fh        - the stream providing the data (-file and -fh are
+                          mutually exclusive)
+              -ontology_name - the name of the ontology
+              -engine    - the L<Bio::Ontology::OntologyEngineI> object
+                          to be reused (will be created otherwise); note
+                          that every L<Bio::Ontology::OntologyI> will
+                          qualify as well since that one inherits from the
+                          former.
+              -term_factory - the ontology term factory to use. Provide a
+                          value only if you know what you are doing.
+
+           DAG-Edit flat file parsers will usually also accept the
+           following parameters.
+
+              -defs_file - the name of the file holding the term
+                          definitions
+              -files     - an array ref holding the file names (for GO,
+                          there will usually be 3 files: component.ontology,
+                          function.ontology, process.ontology)
+
+           Other parameters are specific to the parsers.
+
+=cut
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
+        my ($self) = $class->SUPER::new(@args);
+        $self->_initialize(@args);
+        return $self;
+    } else {
+        my %param = @args;
+        @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+        my $format = $class->_map_format($param{'-format'});
+
+        # normalize capitalization
+        return unless( $class->_load_format_module($format) );
+        return "Bio::OntologyIO::$format"->new(@args);
+    }
+
+}
+
+sub _initialize {
+    my($self, @args) = @_;
+
+    # initialize factories etc
+    my ($eng,$fact,$ontname) =
+        $self->_rearrange([qw(TERM_FACTORY)
+                           ], @args);
+    # term object factory
+    $self->term_factory($fact) if $fact;
+
+    # initialize the Bio::Root::IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_ontology
+
+ Title   : next_ontology
+ Usage   : $ont = $stream->next_ontology()
+ Function: Reads the next ontology object from the stream and returns it.
+ Returns : a L<Bio::Ontology::OntologyI> compliant object, or undef at the
+           end of the stream
+ Args    : none
+
+
+=cut
+
+sub next_ontology {
+    shift->throw_not_implemented();
+}
+
+=head2 term_factory
+
+ Title   : term_factory
+ Usage   : $obj->term_factory($newval)
+ Function: Get/set the ontology term factory to use.
+
+           As a user of this module it is not necessary to call this
+           method as there will be default. In order to change the
+           default, the easiest way is to instantiate
+           L<Bio::Ontology::TermFactory> with the proper -type
+           argument. Most if not all parsers will actually use this
+           very implementation, so even easier than the aforementioned
+           way is to simply call
+           $ontio->term_factory->type("Bio::Ontology::MyTerm").
+
+ Example :
+ Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
+ Args    : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
+
+
+=cut
+
+sub term_factory{
+    my $self = shift;
+
+    return $self->{'term_factory'} = shift if @_;
+    return $self->{'term_factory'};
+}
+
+=head1 Private Methods
+
+  Some of these are actually 'protected' in OO speak, which means you
+  may or will want to utilize them in a derived ontology parser, but
+  you should not call them from outside.
+
+=cut
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL OntologyIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+    my ($self, $format) = @_;
+    my $module = "Bio::OntologyIO::" . $format;
+    my $ok;
+
+    eval {
+        $ok = $self->_load_module($module);
+    };
+    if ( $@ ) {
+        print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the OntologyIO system please see the docs.
+This includes ways of checking for formats at compile time, not run time
+END
+    }
+    return $ok;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub _map_format {
+    my $self = shift;
+    my $format = shift;
+    my $mod;
+
+    if($format) {
+        $mod = $format_driver_map{lc($format)};
+        $mod = lc($format) unless $mod;
+    } else {
+        $self->throw("unable to guess ontology format, specify -format");
+    }
+    return $mod;
+}
+
+sub unescape {
+  my( $self, $ref ) = @_;
+  $ref =~ s/&lt\\;/\</g;
+  $ref =~ s/&gt\\;/\>/g;
+  $ref =~ s/&pct\\;/\%/g;
+  $ref =~ s/\\n/\n/g;
+  $ref =~ s/\\t/\t/g;
+  return $ref;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Perl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Perl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Perl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,694 @@
+# $Id: Perl.pm,v 1.26.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Perl
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Perl - Functional access to BioPerl for people who don't know objects
+
+=head1 SYNOPSIS
+
+  use Bio::Perl;
+
+  # will guess file format from extension
+  $seq_object = read_sequence($filename);
+
+  # forces genbank format
+  $seq_object = read_sequence($filename,'genbank');
+
+  # reads an array of sequences
+  @seq_object_array = read_all_sequences($filename,'fasta');
+
+  # sequences are Bio::Seq objects, so the following methods work
+  # for more info see Bio::Seq, or do 'perldoc Bio/Seq.pm'
+
+  print "Sequence name is ",$seq_object->display_id,"\n";
+  print "Sequence acc  is ",$seq_object->accession_number,"\n";
+  print "First 5 bases is ",$seq_object->subseq(1,5),"\n";
+
+  # get the whole sequence as a single string
+
+  $sequence_as_a_string = $seq_object->seq();
+
+  # writing sequences
+
+  write_sequence(">$filename",'genbank',$seq_object);
+
+  write_sequence(">$filename",'genbank', at seq_object_array);
+
+  # making a new sequence from just a string
+
+  $seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA",
+      "myname","AL12232");
+
+  # getting a sequence from a database (assumes internet connection)
+
+  $seq_object = get_sequence('swissprot',"ROA1_HUMAN");
+
+  $seq_object = get_sequence('embl',"AI129902");
+
+  $seq_object = get_sequence('genbank',"AI129902");
+
+  # BLAST a sequence (assummes an internet connection)
+
+  $blast_report = blast_sequence($seq_object);
+
+  write_blast(">blast.out",$blast_report);
+
+
+=head1 DESCRIPTION
+
+Easy first time access to BioPerl via functions.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::Perl;
+use vars qw(@EXPORT @EXPORT_OK $DBOKAY);
+use strict;
+use Carp;
+
+use Bio::SeqIO;
+use Bio::Seq;
+use Bio::Root::Version '$VERSION';
+BEGIN {
+    eval {
+	require Bio::DB::EMBL;
+	require Bio::DB::GenBank;
+	require Bio::DB::SwissProt;
+	require Bio::DB::RefSeq;
+	require Bio::DB::GenPept;
+    };
+    if( $@ ) {
+	$DBOKAY = 0;
+    } else {
+	$DBOKAY = 1;
+    }
+}
+
+use base qw(Exporter);
+
+ at EXPORT = qw(read_sequence read_all_sequences write_sequence
+	     new_sequence get_sequence translate translate_as_string
+	     reverse_complement revcom revcom_as_string
+	     reverse_complement_as_string blast_sequence write_blast);
+
+ at EXPORT_OK = @EXPORT;
+
+
+=head2 read_sequence
+
+ Title   : read_sequence
+ Usage   : $seq = read_sequence('sequences.fa')
+           $seq = read_sequence($filename,'genbank');
+
+           # pipes are fine
+           $seq = read_sequence("my_fetching_program $id |",'fasta');
+
+ Function: Reads the top sequence from the file. If no format is given, it will
+           try to guess the format from the filename. If a format is given, it
+           forces that format. The filename can be any valid perl open() string
+           - in particular, you can put in pipes
+
+ Returns : A Bio::Seq object. A quick synopsis:
+           $seq_object->display_id - name of the sequence
+           $seq_object->seq        - sequence as a string
+
+ Args    : Two strings, first the filename - any Perl open() string is ok
+           Second string is the format, which is optional
+
+For more information on Seq objects see L<Bio::Seq>.
+
+=cut
+
+sub read_sequence{
+   my ($filename,$format) = @_;
+
+   if( !defined $filename ) {
+       confess "read_sequence($filename) - usage incorrect";
+   }
+
+   my $seqio;
+
+   if( defined $format ) {
+       $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
+   } else {
+       $seqio = Bio::SeqIO->new( '-file' => $filename);
+   }
+
+   my $seq = $seqio->next_seq();
+
+   return $seq;
+}
+
+
+=head2 read_all_sequences
+
+ Title   : read_all_sequences
+ Usage   : @seq_object_array = read_all_sequences($filename);
+           @seq_object_array = read_all_sequences($filename,'genbank');
+
+ Function: Just as the function above, but reads all the sequences in the
+           file and loads them into an array.
+
+           For very large files, you will run out of memory. When this
+           happens, you've got to use the SeqIO system directly (this is
+           not so hard! Don't worry about it!).
+
+ Returns : array of Bio::Seq objects
+
+ Args    : two strings, first the filename (any open() string is ok)
+           second the format (which is optional)
+
+See L<Bio::SeqIO> and L<Bio::Seq> for more information
+
+=cut
+
+sub read_all_sequences{
+   my ($filename,$format) = @_;
+
+   if( !defined $filename ) {
+       confess "read_all_sequences($filename) - usage incorrect";
+   }
+
+   my $seqio;
+
+   if( defined $format ) {
+       $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
+   } else {
+       $seqio = Bio::SeqIO->new( '-file' => $filename);
+   }
+
+   my @seq_array;
+
+   while( my $seq = $seqio->next_seq() ) {
+       push(@seq_array,$seq);
+   }
+
+   return @seq_array;
+}
+
+
+=head2 write_sequence
+
+ Title   : write_sequence
+ Usage   : write_sequence(">new_file.gb",'genbank',$seq)
+           write_sequence(">new_file.gb",'genbank', at array_of_sequence_objects)
+
+ Function: writes sequences in the specified format
+
+ Returns : true
+
+ Args    : filename as a string, must provide an open() output file
+           format as a string
+           one or more sequence objects
+
+
+=cut
+
+sub write_sequence{
+   my ($filename,$format, at sequence_objects) = @_;
+
+   if( scalar(@sequence_objects) == 0 ) {
+       confess("write_sequence(filename,format,sequence_object)");
+   }
+
+   my $error = 0;
+   my $seqname = "sequence1";
+
+   # catch users who haven't passed us a filename we can open
+   if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
+       $filename = ">".$filename;
+   }
+
+   my $seqio = Bio::SeqIO->new('-file' => $filename, '-format' => $format);
+
+   foreach my $seq ( @sequence_objects ) {
+       my $seq_obj;
+
+       if( !ref $seq ) {
+	   if( length $seq > 50 ) {
+	       # odds are this is a sequence as a string, and someone has not figured out
+	       # how to make objects. Warn him/her and then make a sequence object
+	       # from this
+	       if( $error == 0 ) {
+		   carp("WARNING: You have put in a long string into write_sequence.\nI suspect this means that this is the actual sequence\nIn the future try the\n  new_sequence method of this module to make a new sequence object.\nDoing this for you here\n");
+		   $error = 1;
+	       }
+
+	       $seq_obj = new_sequence($seq,$seqname);
+	       $seqname++;
+	   } else {
+	       confess("You have a non object [$seq] passed to write_sequence. It maybe that you want to use new_sequence to make this string into a sequence object?");
+	   }
+       } else {
+	   if( !$seq->isa("Bio::SeqI") ) {
+	       confess("object [$seq] is not a Bio::Seq object; can't write it out");
+	   }
+	   $seq_obj = $seq;
+       }
+
+       # finally... we get to write out the sequence!
+       $seqio->write_seq($seq_obj);
+   }
+   1;
+}
+
+=head2 new_sequence
+
+ Title   : new_sequence
+ Usage   : $seq_obj = new_sequence("GATTACA", "kino-enzyme");
+
+ Function: Construct a sequency object from sequence string
+ Returns : A Bio::Seq object
+
+ Args    : sequence string
+           name string (optional, default "no-name-for-sequence")
+           accession - accession number (optional, no default)
+
+=cut
+
+sub new_sequence{
+   my ($seq,$name,$accession) = @_;
+
+   if( !defined $seq ) {
+       confess("new_sequence(sequence_as_string) usage");
+   }
+
+   $name ||= "no-name-for-sequence";
+
+   my $seq_object = Bio::Seq->new( -seq => $seq, -id => $name);
+
+   $accession && $seq_object->accession_number($accession);
+
+   return $seq_object;
+}
+
+=head2 blast_sequence
+
+ Title   : blast_sequence
+ Usage   : $blast_result = blast_sequence($seq)
+           $blast_result = blast_sequence('MFVEGGTFASEDDDSASAEDE');
+
+ Function: If the computer has Internet accessibility, blasts
+           the sequence using the NCBI BLAST server against nrdb.
+
+           It chooses the flavour of BLAST on the basis of the sequence.
+
+           This function uses Bio::Tools::Run::RemoteBlast, which itself
+           use Bio::SearchIO - as soon as you want to know more, check out
+           these modules
+ Returns : Bio::Search::Result::GenericResult.pm
+
+ Args    : Either a string of protein letters or nucleotides, or a
+           Bio::Seq object
+
+=cut
+
+sub blast_sequence {
+    my ($seq,$verbose) = shift;
+
+    if( !defined $verbose ) {
+	$verbose = 1;
+    }
+
+    if( !ref $seq ) {
+	$seq = Bio::Seq->new( -seq => $seq, -id => 'blast-sequence-temp-id');
+    } elsif ( !$seq->isa('Bio::PrimarySeqI') ) {
+	croak("[$seq] is an object, but not a Bio::Seq object, cannot be blasted");
+    }
+
+    require Bio::Tools::Run::RemoteBlast;
+
+    my $prog = 'blastp';
+    my $e_val= '1e-10';
+
+    my @params = ( '-prog' => $prog,
+		   '-expect' => $e_val,
+		   '-readmethod' => 'SearchIO' );
+
+    my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
+
+    my $r = $factory->submit_blast($seq);
+    if( $verbose ) {
+	print STDERR "Submitted Blast for [".$seq->id."] ";
+    }
+    sleep 5;
+
+    my $result;
+
+    LOOP :
+    while( my @rids = $factory->each_rid) {
+	foreach my $rid ( @rids ) {
+	    my $rc = $factory->retrieve_blast($rid);
+	    if( !ref($rc) ) {
+		if( $rc < 0 ) {
+		    $factory->remove_rid($rid);
+		}
+		if( $verbose ) {
+		    print STDERR ".";
+		}
+		sleep 10;
+	    } else {
+		$result = $rc->next_result();
+		$factory->remove_rid($rid);
+		last LOOP;
+	    }
+	}
+    }
+
+    if( $verbose ) {
+	print STDERR "\n";
+    }
+    return $result;
+}
+
+=head2 write_blast
+
+ Title   : write_blast
+ Usage   : write_blast($filename,$blast_report);
+
+ Function: Writes a BLAST result object (or more formally
+           a SearchIO result object) out to a filename
+           in BLAST-like format
+
+ Returns : none
+
+ Args    : filename as a string
+           Bio::SearchIO::Results object
+
+=cut
+
+sub write_blast {
+    my ($filename,$blast) = @_;
+
+    if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
+	$filename = ">".$filename;
+    }
+
+    my $output = Bio::SearchIO->new( -output_format => 'blast', -file => $filename);
+
+    $output->write_result($blast);
+
+}
+
+=head2 get_sequence
+
+ Title   : get_sequence
+ Usage   : $seq_object = get_sequence('swiss',"ROA1_HUMAN");
+
+ Function: If the computer has Internet access this method gets
+           the sequence from Internet accessible databases. Currently
+           this supports Swissprot ('swiss'), EMBL ('embl'), GenBank
+           ('genbank'), GenPept ('genpept'), and RefSeq ('refseq').
+
+           Swissprot and EMBL are more robust than GenBank fetching.
+
+           If the user is trying to retrieve a RefSeq entry from
+           GenBank/EMBL, the query is silently redirected.
+
+ Returns : A Bio::Seq object
+
+ Args    : database type - one of swiss, embl, genbank, genpept, or
+           refseq
+
+=cut
+
+my $genbank_db = undef;
+my $genpept_db = undef;
+my $embl_db = undef;
+my $swiss_db = undef;
+my $refseq_db = undef;
+
+sub get_sequence{
+   my ($db_type,$identifier) = @_;
+   if( ! $DBOKAY ) {
+       confess ("Your system does not have one of LWP, HTTP::Request::Common, IO::String installed so the DB retrieval method is not available.  \nFull error message is:\n $!\n");
+       return;
+   }
+   $db_type = lc($db_type);
+
+   my $db;
+
+   if( $db_type =~ /genbank/ ) {
+       if( !defined $genbank_db ) {
+	   $genbank_db = Bio::DB::GenBank->new();
+       }
+       $db = $genbank_db;
+   }
+   if( $db_type =~ /genpept/ ) {
+       if( !defined $genpept_db ) {
+	   $genpept_db = Bio::DB::GenPept->new();
+       }
+       $db = $genpept_db;
+   }
+
+   if( $db_type =~ /swiss/ ) {
+       if( !defined $swiss_db ) {
+	   $swiss_db = Bio::DB::SwissProt->new();
+       }
+       $db = $swiss_db;
+   }
+
+   if( $db_type =~ /embl/ ) {
+       if( !defined $embl_db ) {
+	   $embl_db = Bio::DB::EMBL->new();
+       }
+       $db = $embl_db;
+   }
+
+   if( $db_type =~ /refseq/ or ($db_type !~ /swiss/ and
+				$identifier =~ /^\s*N\S+_/)) {
+       if( !defined $refseq_db ) {
+	   $refseq_db = Bio::DB::RefSeq->new();
+       }
+       $db = $refseq_db;
+   }
+
+   my $seq;
+
+   if( $identifier =~ /^\w+\d+$/ ) {
+       $seq = $db->get_Seq_by_acc($identifier);
+   } else {
+       $seq = $db->get_Seq_by_id($identifier);
+   }
+
+   return $seq;
+}
+
+
+=head2 translate
+
+ Title   : translate
+ Usage   : $seqobj = translate($seq_or_string_scalar)
+
+ Function: translates a DNA sequence object OR just a plain
+           string of DNA to amino acids
+ Returns : A Bio::Seq object
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub translate {
+   my ($scalar) = shift;
+
+   my $obj;
+
+   if( ref $scalar ) {
+     if( !$scalar->isa("Bio::PrimarySeqI") ) {
+        confess("Expecting a sequence object not a $scalar");
+     } else {
+        $obj= $scalar;
+
+     }
+
+   } else {
+
+     # check this looks vaguely like DNA
+     my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
+
+     if( $n < length($scalar) * 0.85 ) {
+       confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
+     }
+
+     $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
+   }
+
+   return $obj->translate();
+}
+
+
+=head2 translate_as_string
+
+ Title   : translate_as_string
+ Usage   : $seqstring = translate_as_string($seq_or_string_scalar)
+
+ Function: translates a DNA sequence object OR just a plain
+           string of DNA to amino acids
+ Returns : A string of just amino acids
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub translate_as_string {
+   my ($scalar) = shift;
+
+   my $obj = Bio::Perl::translate($scalar);
+
+   return $obj->seq;
+}
+
+
+=head2 reverse_complement
+
+ Title   : reverse_complement
+ Usage   : $seqobj = reverse_complement($seq_or_string_scalar)
+
+ Function: reverse complements a string or sequence argument
+           producing a Bio::Seq - if you want a string, you
+           can use reverse_complement_as_string
+ Returns : A Bio::Seq object
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub reverse_complement {
+   my ($scalar) = shift;
+
+   my $obj;
+
+   if( ref $scalar ) {
+     if( !$scalar->isa("Bio::PrimarySeqI") ) {
+        confess("Expecting a sequence object not a $scalar");
+     } else {
+        $obj= $scalar;
+
+     }
+
+   } else {
+
+     # check this looks vaguely like DNA
+     my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
+
+     if( $n < length($scalar) * 0.85 ) {
+       confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
+     }
+
+     $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
+   }
+
+   return $obj->revcom();
+}
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $seqobj = revcom($seq_or_string_scalar)
+
+ Function: reverse complements a string or sequence argument
+           producing a Bio::Seq - if you want a string, you
+           can use reverse_complement_as_string
+
+           This is an alias for reverse_complement
+ Returns : A Bio::Seq object
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub revcom {
+    return &Bio::Perl::reverse_complement(@_);
+}
+
+
+=head2 reverse_complement_as_string
+
+ Title   : reverse_complement_as_string
+ Usage   : $string = reverse_complement_as_string($seq_or_string_scalar)
+
+ Function: reverse complements a string or sequence argument
+           producing a string
+ Returns : A string of DNA letters
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub reverse_complement_as_string {
+   my ($scalar) = shift;
+
+   my $obj = &Bio::Perl::reverse_complement($scalar);
+
+   return $obj->seq;
+}
+
+
+=head2 revcom_as_string
+
+ Title   : revcom_as_string
+ Usage   : $string = revcom_as_string($seq_or_string_scalar)
+
+ Function: reverse complements a string or sequence argument
+           producing a string
+ Returns : A string of DNA letters
+
+ Args    : Either a sequence object or a string of
+           just DNA sequence characters
+
+=cut
+
+sub revcom_as_string {
+   my ($scalar) = shift;
+
+   my $obj = &Bio::Perl::reverse_complement($scalar);
+
+   return $obj->seq;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Correlate.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Correlate.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Correlate.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,364 @@
+# $Id: Correlate.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::Correlate
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::Correlate - Representation of a correlating phenotype in a given species
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::Correlate;
+
+  $co = Bio::Phenotype::Correlate->new( -name        => "4(Tas1r3)",
+                                        -description => "mouse correlate of human phenotype MIM 605865",
+                                        -species     => $mouse,
+                                        -type        => "homolog",
+                                        -comment     => "type=homolog is putative" );
+
+  print $co->name();
+  print $co->description();
+  print $co->species()->binomial();
+  print $co->type();
+  print $co->comment();
+
+  print $co->to_string();
+
+=head1 DESCRIPTION
+
+This class models correlating phenotypes.
+Its creation was inspired by the OMIM database where many human phenotypes
+have a correlating mouse phenotype. Therefore, this class is intended
+to be used together with a phenotype class. 
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Phenotype::Correlate;
+use strict;
+use Bio::Species;
+
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $co = Bio::Phenotype::Correlate->new( -name        => "4(Tas1r3)",
+                                                 -description => "mouse correlate of human phenotype MIM 605865",
+                                                 -species     => $mouse,
+                                                 -type        => "homolog",
+                                                 -comment     => "type=homolog is putative" );                      
+ Function: Creates a new Correlate object.
+ Returns : A new Correlate object.
+ Args    : -name        => a name or id
+           -description => a description
+           -species     => the species of this correlating phenotype [Bio::Species]
+           -type        => the type of correlation
+           -comment     => a comment
+
+=cut
+
+sub new {
+
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+ 
+    my ( $name, $desc, $species, $type, $comment )
+    = $self->_rearrange( [ qw( NAME
+                               DESCRIPTION
+                               SPECIES
+                               TYPE
+                               COMMENT ) ], @args );
+                         
+    $self->init();                     
+   
+    $name    && $self->name( $name );
+    $desc    && $self->description( $desc );
+    $species && $self->species( $species );
+    $type    && $self->type( $type );
+    $comment && $self->comment( $comment );
+   
+    return $self;
+    
+} # new
+
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $co->init();   
+ Function: Initializes this Correlate to all "".
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+
+    my( $self ) = @_;
+
+    $self->name( "" );
+    $self->description( "" );
+    my $species = Bio::Species->new();
+    $species->classification( qw( species Undetermined ) );
+    $self->species( $species );
+    $self->type( "" );
+    $self->comment( "" );
+  
+} # init
+
+
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $co->name( "4(Tas1r3)" );
+           or
+           print $co->name();
+ Function: Set/get for the name or id of this Correlate.
+ Returns : The name or id of this Correlate.
+ Args    : The name or id of this Correlate (optional).
+
+=cut
+
+sub name {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_name" } = $value;
+    }
+
+    return $self->{ "_name" };
+
+} # name
+
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $co->description( "mouse correlate of human phenotype MIM 03923" );
+           or
+           print $co->description();
+ Function: Set/get for the description of this Correlate.
+ Returns : A description of this Correlate.
+ Args    : A description of this Correlate (optional).
+
+=cut
+
+sub description {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_description" } = $value;
+    }
+
+    return $self->{ "_description" };
+
+} # description
+
+
+
+
+=head2 species
+
+ Title   : species
+ Usage   : $co->species( $species );
+           or
+           $species = $co->species();
+ Function: Set/get for the species of this Correlate.
+ Returns : The Bio::Species of this Correlate [Bio::Species].
+ Args    : The Bio::Species of this Correlate [Bio::Species] (optional).
+
+=cut
+
+sub species {
+
+    my ( $self, $value )  = @_;
+
+    if ( defined $value ) {
+        $self->_check_ref_type( $value, "Bio::Species" );
+        $self->{ "_species" } = $value;
+    }
+    
+    return $self->{ "_species" };
+    
+} # species
+
+
+
+
+=head2 type
+
+ Title   : type
+ Usage   : $co->type( "homolog" );
+           or
+           print $co->type();
+ Function: Set/get for the type of this Correlate.
+ Returns : The type of this Correlate.
+ Args    : The type of this Correlate (optional).
+
+=cut
+
+sub type {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_type" } = $value;
+    }
+
+    return $self->{ "_type" };
+
+} # type
+
+
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $co->comment( "doubtful" );
+           or 
+           print $co->comment();
+ Function: Set/get for an arbitrary comment about this Correlate.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_comment" } = $value;
+    }
+   
+    return $self->{ "_comment" };
+    
+} # comment
+
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $co->to_string();
+ Function: To string method for Correlate objects.
+ Returns : A string representations of this Correlate.
+ Args    :
+
+=cut
+
+sub to_string {
+
+    my ( $self ) = @_;
+
+    my $s = "";
+    
+    $s .= "-- Name:\n";
+    $s .= $self->name()."\n";
+    $s .= "-- Description:\n";
+    $s .= $self->description()."\n";
+    $s .= "-- Species:\n";
+    $s .= $self->species()->binomial()."\n";
+    $s .= "-- Type of correlation:\n";
+    $s .= $self->type()."\n";
+    $s .= "-- Comment:\n";
+    $s .= $self->comment();
+  
+    return $s;
+    
+} # to_string
+
+
+
+
+# Title   : _check_ref_type              
+# Function: Checks for the correct type.
+# Returns : 
+# Args    : The value to be checked, the expected class.
+sub _check_ref_type {
+    my ( $self, $value, $expected_class ) = @_;
+
+    if ( ! defined( $value ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found [undef" 
+        ."] where [$expected_class] expected" );
+    }
+    elsif ( ! ref( $value ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found scalar"
+        ." where [$expected_class] expected" );
+    } 
+    elsif ( ! $value->isa( $expected_class ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found [". ref( $value ) 
+        ."] where [$expected_class] expected" );
+    }    
+} # _check_ref_type
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Term.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Term.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Term.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,296 @@
+# $Id: Term.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::MeSH::Term
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::MeSH::Term - A MeSH term
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::MeSH::Term;
+
+  # create a term object
+  my $term = Bio::Phenotype::MeSH::Term->new
+      (-id => 'D000001',
+       -name => 'Dietary Fats',
+       -description => 'dietary fats are...'
+      );
+
+  # get a Bio::Phenotype::MeSH::Twig somehow...
+  $term->add_twig($twig1);
+
+
+=head1 DESCRIPTION
+
+This class keeps information about MeSH terms. MeSH stands for Medical
+Subject Headings and is one of the ways for annotaing biomedical
+literature.  The terminology is maintained by National Library of
+Medicine of USA . See http://www.nlm.nih.gov/mesh/meshhome.html.
+
+In addition to id, name and description a term can know about its
+surrounding terms (Bio::Phenotype::MeSH::Twig) in the term hierarchy.
+
+This class is mainly used from Bio::DB::MeSH which retrieves terms
+over the Web.
+
+=head1 SEE ALSO
+
+L<Bio::DB::MeSH>, 
+L<Bio::Phenotype::MeSH::Twig>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::MeSH::Term;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+
+    my( $class, at args ) = @_;
+    my $self = $class->SUPER::new( @args );
+
+    my ( $id, $name, $description, $comment ) = $self->_rearrange
+        ( [ qw( ID
+                NAME
+                DESCRIPTION
+                SPECIES
+                COMMENT
+              ) ],
+          @args );
+
+    $self->{"_twigs"} = [];
+
+    $id            && $self->id( $id );
+    $name          && $self->name( $name );
+    $description   && $self->description( $description );
+
+    return $self;
+}
+
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id( "r1" );
+           or
+           print $obj->id();
+ Function: Set/get for the id.
+ Returns : A id [scalar].
+ Args    : A id [scalar] (optional).
+
+=cut
+
+sub id {
+    my ( $self, $value ) = @_;
+    $self->{ "_id" } = $value if defined $value;
+    return $self->{ "_id" };
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name( "r1" );
+           or
+           print $obj->name();
+ Function: Set/get for the name.
+ Returns : A name [scalar].
+ Args    : A name [scalar] (optional).
+
+=cut
+
+sub name {
+    my ( $self, $value ) = @_;
+    $self->{ "_name" } = $value if defined $value;
+    return $self->{ "_name" };
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description( "r1" );
+           or
+           print $obj->description();
+ Function: Set/get for the description.
+ Returns : A description [scalar].
+ Args    : A description [scalar] (optional).
+
+=cut
+
+sub description {
+    my ( $self, $value ) = @_;
+    $self->{ "_description" } = $value if defined $value;
+    return $self->{ "_description" };
+}
+
+
+=head2 add_synonym
+
+ Title   : add_synonym
+ Usage   : $obj->add_synonym( @synonyms );
+           or
+           $obj->add_synonym( $synonym );
+ Function: Pushes one or more synonyms for the term  term
+           into the list of synonyms.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_synonym {
+    my ( $self, @values ) = @_;
+    push( @{ $self->{ "_synonyms" } }, @values );
+}
+
+=head2 each_synonym
+
+ Title   : each_synonym()
+ Usage   : @gs = $obj->each_synonym();
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_synonym {
+    my ( $self ) = shift;
+    return @{ $self->{ "_synonyms" } };
+}
+
+=head2 purge_synonyms
+
+ Usage   : $obj->purge_synonym();
+ Function: Deletes  the list of synonyms to this term.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub purge_synonyms {
+    my ( $self ) = @_;
+    $self->{ "_synonyms" } = [];
+}
+
+
+=head2 Twig management
+
+Each MeSH term belongs to a complex tree like hierachy of terms where
+each term can appear multiple times. The immediately surrounding nodes
+of the tree are modelled in twigs.
+
+See: L<Bio::Phenotype::MeSH::Twig>.
+
+=cut
+
+=head2 add_twig
+
+ Title   : add_twig
+ Usage   : $obj->add_twig( @twigs );
+           or
+           $obj->add_twig( $twig );
+ Function: Pushes one or more twig term names [scalars, most likely Strings]
+           into the list of twigs.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_twig {
+    my ( $self, @values ) = @_;
+    foreach my $twig (@values) {
+        $self->warn ("Not a MeSH twig [$twig]")
+            unless $twig->isa('Bio::Phenotype::MeSH::Twig');
+        $twig->term($self);
+        push( @{ $self->{ "_twigs" } }, $twig );
+    }
+    1;
+}
+
+=head2 each_twig
+
+ Title   : each_twig()
+ Usage   : @gs = $obj->each_twig();
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_twig {
+    my ( $self ) = shift;
+    return @{ $self->{ "_twigs" } };
+}
+
+=head2 purge_twigs
+
+ Usage   : $obj->purge_twig();
+ Function: Deletes  the list of twigs associated with this term.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub purge_twigs {
+    my ( $self ) = @_;
+    $self->{ "_twigs" } = [];
+}
+
+
+=head2 each_parent
+
+ Title   : each_parent()
+ Usage   : @gs = $obj->each_parent();
+ Function: Returns a list of names of parents for this term
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_parent {
+    my ( $self ) = shift;
+    return map {$_->parent()} @{ $self->{ "_twigs" } };
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Twig.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Twig.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/MeSH/Twig.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,261 @@
+# $Id: Twig.pm,v 1.7.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::MeSH::Twig
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::MeSH::Twig - Context for a MeSH term
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::MeSH::Twig
+  # create a twig object
+  my $twig = Bio::Phenotype::MeSH::Twig->new();
+
+  # the term has only one parent in any twig
+  $twig->parent('Fats');
+
+
+  # a twig makeas sense only in the context of a term
+  # which is a  Bio::Phenotype::MeSH::Term object
+
+  # a term can have many twigs i.e. it can appear in many places in
+  # the hierarchy
+  #
+  $ term->add_twig($twig);
+
+  # adding the twig into a term adds a link into into it 
+  $twig->term eq $term;
+
+  # a twig can know about other terms under the parant node
+  $twig->add_sister('Bread', 'Candy', 'Cereals');
+  print join ( ', ', $twig->each_sister()), "\n";
+
+  # a twig can know about other terms under this term
+  $twig->add_child('Butter', 'Margarine');
+  print join ( ', ', $twig->each_child()), "\n";
+
+
+
+=head1 DESCRIPTION
+
+This class represents the immediate surrounding of a MeSH term. It
+keeps track on nodes names above the current node ('parent') other
+nodes at the same level ('sisters') and nodes under it ('children').
+Note that these are name strings, not objects.
+
+Each twig can be associated with only one term, but term can have
+multiple twigs. (Twigs can be though to be roles for a term.)
+
+=head1 SEE ALSO
+
+L<Bio::Phenotype::MeSH::Term>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Phenotype::MeSH::Twig;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+
+sub new {
+
+    my( $class, at args ) = @_;
+    my $self = $class->SUPER::new( @args );
+
+    my ($term, $parent ) = $self->_rearrange
+        ( [ qw(
+               TERM
+               PARENT
+              ) ],
+          @args );
+
+    $self->{"_children"} = [];
+    $self->{"_sisters"} = [];
+
+    $term && $self->term($term );
+    $parent  && $self->parent($parent );
+    return $self;
+}
+
+
+=head2 parent
+
+ Title   : parent
+ Usage   : $obj->parent( "r1" );
+           or
+           print $obj->parent();
+ Function: Set/get for the parent.
+ Returns : A parent [scalar].
+ Args    : A parent [scalar] (optional).
+
+=cut
+
+sub parent {
+    my ( $self, $value ) = @_;
+    $self->{ "_parent" } = $value if defined $value;
+    return $self->{ "_parent" };
+}
+
+=head2 term
+
+ Title   : term
+ Usage   : $obj->term( "r1" );
+           or
+           print $obj->term();
+ Function: Set/get for the term.
+ Returns : A term [scalar].
+ Args    : A term [scalar] (optional).
+
+=cut
+
+sub term {
+    my ( $self, $value ) = @_;
+    if (defined $value) {
+        $self->throw ("Not a MeSH term [$value]")
+            unless $value->isa('Bio::Phenotype::MeSH::Term');
+        $self->{ "_term" } = $value
+    }
+    return $self->{ "_term" };
+}
+
+
+=head2 add_child
+
+ Title   : add_child
+ Usage   : $obj->add_child( @children );
+           or
+           $obj->add_child( $child );
+ Function: Pushes one or more child term names [scalars, most likely Strings]
+           into the list of children.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_child {
+    my ( $self, @values ) = @_;
+    push( @{ $self->{ "_children" } }, @values );
+    return scalar @values;
+}
+
+=head2 each_child
+
+ Title   : each_child()
+ Usage   : @gs = $obj->each_child();
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_child {
+    my ( $self ) = shift;
+    return @{ $self->{ "_children" } };
+}
+
+=head2 purge_children
+
+ Usage   : $obj->purge_child();
+ Function: Deletes  the list of children associated with this term.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub purge_children {
+    my ( $self ) = @_;
+    $self->{ "_children" } = [];
+}
+
+
+=head2 add_sister
+
+ Title   : add_sister
+ Usage   : $obj->add_sister( @sisters );
+           or
+           $obj->add_sister( $sister );
+ Function: Pushes one or more sister term names [scalars, most likely Strings]
+           into the list of sisters.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_sister {
+    my ( $self, @values ) = @_;
+    push( @{ $self->{ "_sisters" } }, @values );
+    return scalar @values;
+}
+
+=head2 each_sister
+
+ Title   : each_sister()
+ Usage   : @gs = $obj->each_sister();
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_sister {
+    my ( $self ) = shift;
+    return @{ $self->{ "_sisters" } };
+}
+
+=head2 purge_sisters
+
+ Usage   : $obj->purge_sister();
+ Function: Deletes  the list of sisters associated with this term.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub purge_sisters {
+    my ( $self ) = @_;
+    $self->{'_sisters'} = [];
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Measure.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Measure.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Measure.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,364 @@
+# $Id: Measure.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::Measure
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::Measure - Representation of context/value(-range)/unit triplets
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::Measure;
+
+  my $measure = Bio::Phenotype::Measure->new( -context     => "length",
+                                              -description => "reduced length in 4(Tas1r3)",
+                                              -start       => 0,
+                                              -end         => 15,
+                                              -unit        => "mm",
+                                              -comment     => "see also Miller et al" );
+
+  print $measure->context();
+  print $measure->description();
+  print $measure->start();
+  print $measure->end();
+  print $measure->unit();
+  print $measure->comment();
+
+  print $measure->to_string();
+
+=head1 DESCRIPTION
+
+Measure is for biochemically defined phenotypes or any other types of measures.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Phenotype::Measure;
+use strict;
+
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $me = Bio::Phenotype::Measure->new( -context     => "length",
+                                                  -description => "reduced length in 4(Tas1r3)",
+                                                  -start       => 0,
+                                                  -end         => 15,
+                                                  -unit        => "mm",
+                                                  -comment     => "see Miller also et al" );                      
+ Function: Creates a new Measure object.
+ Returns : A new Measure object.
+ Args    : -context     => the context
+           -description => a description
+           -start       => the start value
+           -end         => the end value
+           -unit        => the unit
+           -comment     => a comment
+
+=cut
+
+sub new {
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+
+    my ( $con, $desc, $start, $end, $unit, $comment )
+    = $self->_rearrange( [ qw( CONTEXT
+                               DESCRIPTION
+                               START
+                               END
+                               UNIT
+                               COMMENT ) ], @args );
+
+    $self->init(); 
+ 
+    $con     && $self->context( $con );
+    $desc    && $self->description( $desc );
+    $start   && $self->start( $start );
+    $end     && $self->end( $end );
+    $unit    && $self->unit( $unit );
+    $comment && $self->comment( $comment );
+                           
+    return $self;
+    
+} # new
+
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $measure->init();   
+ Function: Initializes this Measure to all "".
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my( $self ) = @_;
+
+    $self->context( "" );
+    $self->description( "" );
+    $self->start( "" );
+    $self->end( "" );
+    $self->unit( "" );
+    $self->comment( "" );
+  
+} # init
+
+
+
+
+=head2 context
+
+ Title   : context
+ Usage   : $measure->context( "Ca-conc" );
+           or 
+           print $measure->context(); 
+ Function: Set/get for the context of this Measure.
+ Returns : The context.
+ Args    : The context (optional).
+
+=cut
+
+sub context {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_context" } = $value;
+    }
+   
+    return $self->{ "_context" };
+    
+} # context
+
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $measure->description( "reduced in 4(Tas1r3)" );
+           or 
+           print $measure->description(); 
+ Function: Set/get for the description of this Measure.
+ Returns : A description.
+ Args    : A description (optional).
+
+=cut
+
+sub description {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_description" } = $value;
+    }
+   
+    return $self->{ "_description" };
+    
+} # description
+
+
+
+
+=head2 start 
+
+ Title   : start
+ Usage   : $measure->start( 330 );
+           or 
+           print $measure->start(); 
+ Function: Set/get for the start value of this Measure.
+ Returns : The start value.
+ Args    : The start value (optional).
+
+=cut
+
+sub start {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_start" } = $value;
+    }
+   
+    return $self->{ "_start" };
+    
+} #  start
+
+
+
+
+=head2 end 
+
+ Title   : end 
+ Usage   : $measure->end( 459 );
+           or 
+           print $measure->end(); 
+ Function: Set/get for the end value of this Measure.
+ Returns : The end value.
+ Args    : The end value (optional).
+
+=cut
+
+sub end {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_end" } = $value;
+    }
+   
+    return $self->{ "_end" };
+    
+} # end
+
+
+
+
+=head2 unit
+
+ Title   : unit
+ Usage   : $measure->unit( "mM" );
+           or 
+           print $measure->unit(); 
+ Function: Set/get for the unit of this Measure.
+ Returns : The unit.
+ Args    : The unit (optional).
+
+=cut
+
+sub unit {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_unit" } = $value;
+    }
+   
+    return $self->{ "_unit" };
+    
+} # unit
+
+
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $measure->comment( "see also Miller et al" );
+           or 
+           print $measure->comment();
+ Function: Set/get for an arbitrary comment about this Measure.
+ Returns : A comment.
+ Args    : A comment (optional).
+
+=cut
+
+sub comment {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_comment" } = $value;
+    }
+   
+    return $self->{ "_comment" };
+    
+} # comment
+
+
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $measure->to_string();
+ Function: To string method for Measure objects.
+ Returns : A string representations of this Measure.
+ Args    :
+
+=cut
+
+sub to_string {
+    my ( $self ) = @_;
+
+    my $s = "";
+    
+    $s .= "-- Context:\n";
+    $s .= $self->context()."\n";
+    $s .= "-- Description:\n";
+    $s .= $self->description()."\n";
+    $s .= "-- Start:\n";
+    $s .= $self->start()."\n";
+    $s .= "-- End:\n";
+    $s .= $self->end()."\n";
+    $s .= "-- Unit:\n";
+    $s .= $self->unit()."\n";
+    $s .= "-- Comment:\n";
+    $s .= $self->comment();
+    
+    return $s;
+    
+} # to_string
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/MiniMIMentry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/MiniMIMentry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/MiniMIMentry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,292 @@
+# $Id: MiniMIMentry.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::OMIM::MiniMIMentry
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::OMIM::MiniMIMentry - Representation of a Mini MIM entry
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::OMIM::MiniMIMentry;
+
+  $mm = Bio::Phenotype::OMIM::MiniMIMentry->new( -description  => "The central form of ...",
+                                                 -created      => "Victor A. McKusick: 6/4/1986",
+                                                 -contributors => "Kelly A. Przylepa - revised: 03/18/2002",
+                                                 -edited       => "alopez: 06/03/1997" );
+
+
+=head1 DESCRIPTION
+
+This class representats of Mini MIM entries.
+This class is intended to be used together with a OMIM entry class. 
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Phenotype::OMIM::MiniMIMentry;
+use strict;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $mm = Bio::Phenotype::OMIM::MiniMIMentry->new( -description  => "The central form of ...",
+                                                          -created      => "Victor A. McKusick: 6/4/1986",
+                                                          -contributors => "Kelly A. Przylepa - revised: 03/18/2002",
+                                                          -edited       => "alopez: 06/03/1997" );
+
+ Function: Creates a new MiniMIMentry object.
+ Returns : A new MiniMIMentry object.
+ Args    : -description  => a description
+           -created      => name(s) and date(s) (free form)
+           -contributors => name(s) and date(s) (free form)
+           -edited       => name(s) and date(s) (free form)
+
+=cut
+
+sub new {
+
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+
+    my ( $desc, $created, $contributors, $edited )
+    = $self->_rearrange( [ qw( DESCRIPTION
+                               CREATED
+                               CONTRIBUTORS
+                               EDITED ) ], @args );
+
+    $self->init(); 
+
+    $desc         && $self->description( $desc );
+    $created      && $self->created( $created );
+    $contributors && $self->contributors( $contributors );
+    $edited       && $self->edited( $edited );
+
+    return $self;
+
+} # new
+
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $mm->init();   
+ Function: Initializes this MiniMIMentry to all "".
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+
+    my( $self ) = @_;
+   
+    $self->description( "" );
+    $self->created( "" );
+    $self->contributors( "" );
+    $self->edited( "" );
+    
+  
+} # init
+
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $mm->description( "The central form of ..." );
+           or
+           print $mm->description();
+ Function: Set/get for the description field of the Mini MIM database.
+ Returns : The description.
+ Args    : The description (optional).
+
+=cut
+
+sub description {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_description" } = $value;
+    }
+
+    return $self->{ "_description" };
+
+} # description
+
+
+
+
+=head2 created
+
+ Title   : created
+ Usage   : $mm->created( "Victor A. McKusick: 6/4/1986" );
+           or
+           print $mm->created();
+ Function: Set/get for the created field of the Mini MIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub created {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_created" } = $value;
+    }
+
+    return $self->{ "_created" };
+
+} # created
+
+
+
+
+=head2 contributors
+
+ Title   : contributors
+ Usage   : $mm->contributors( "Kelly A. Przylepa - revised: 03/18/2002" );
+           or
+           print $mm->contributors();
+ Function: Set/get for the contributors field of the Mini MIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub contributors {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_contributors" } = $value;
+    }
+
+    return $self->{ "_contributors" };
+
+} # contributors
+
+
+
+
+=head2 edited
+
+ Title   : edited
+ Usage   : $mm->edited( "alopez: 06/03/1997" );
+           or
+           print $mm->edited();
+ Function: Set/get for the edited field of the Mini MIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub edited {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_edited" } = $value;
+    }
+
+    return $self->{ "_edited" };
+
+} # edited
+
+
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $mm->to_string();
+ Function: To string method for MiniMIMentry objects.
+ Returns : A string representations of this MiniMIMentry.
+ Args    :
+
+=cut
+
+sub to_string {
+    my ( $self ) = @_;
+
+    my $s = "";
+    
+    $s .= "-- Description:\n";
+    $s .= $self->description()."\n";
+    $s .= "-- Created:\n";
+    $s .= $self->created()."\n";
+    $s .= "-- Contributors:\n";
+    $s .= $self->contributors()."\n";
+    $s .= "-- Edited:\n";
+    $s .= $self->edited();
+  
+    return $s;
+    
+} # to_string 
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,817 @@
+# $Id: OMIMentry.pm,v 1.16.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::OMIM::OMIMentry
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::OMIM::OMIMentry - represents OMIM (Online Mendelian
+Inheritance in Man) database entries
+
+=head1 SYNOPSIS
+
+  $obj = Bio::Phenotype::OMIM::OMIMentry->new( -mim_number          => 200000,
+                                               -description         => "This is ...",
+                                               -more_than_two_genes => 1 );
+
+=head1 DESCRIPTION
+
+Inherits from Bio::Phenotype::PhenotypeI.
+Bio::Phenotype::OMIM::OMIMparser parses the flat file representation
+of OMIM (i.e. files "omim.txt" and "genemap") returning OMIMentry objects. 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::OMIM::OMIMentry;
+use strict;
+
+use Bio::Phenotype::OMIM::MiniMIMentry;
+use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
+
+use constant TRUE              => 1;
+use constant FALSE             => 0;
+use constant DEFAULT_MIM_NUMER => 0;
+
+use base qw(Bio::Phenotype::Phenotype);
+
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj = Bio::Phenotype::OMIM::OMIMentry->new( -mim_number          => 200000,
+                                                        -description         => "This is ...",
+                                                        -more_than_two_genes => 1 );                      
+ Function: Creates a new OMIMentry object.
+ Returns : A new OMIMentry object.
+ Args    : -mim_number                     => the MIM number
+           -title                          => the title or name
+           -alternative_titles_and_symbols => the "alternative titles and symbols"    
+           -more_than_two_genes            => can phenotype can be caused by mutation in any of two or more genes?       
+           -is_separate                    => is this phenotype separate from those represented by other entries  
+           -description                    => the description of this phenotype
+           -mapping_method                 => the mapping method      
+           -gene_status                    => the gene status of this       
+           -comment                        => a comment        
+           -species                        => ref to the the species (human)
+           -created                        => created by whom/when       
+           -edited                         => edited by whom/when    
+           -contributors                   => contributed by whom/when 
+           -additional_references          => "see also"     
+           -clinical_symptoms              => the clinical symptoms
+           -minimim                        => the Mini MIM associated with this OMIM antry
+
+=cut
+
+sub new {
+
+    my( $class, at args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+   
+    my ( $mim_number,
+         $title,
+         $alternative_titles_and_symbols,     
+         $more_than_two_genes,       
+         $is_separate,    
+         $description,
+         $mapping_method,     
+         $gene_status,       
+         $comment,        
+         $species,
+         $created,       
+         $edited,    
+         $contributors,
+         $additional_references,     
+         $clinical_symptoms, 
+         $miniMIM )
+    = $self->_rearrange( [ qw( MIM_NUMBER
+                               TITLE
+                               ALTERNATIVE_TITLES_AND_SYMBOLS
+                               MORE_THAN_TWO_GENES
+                               IS_SEPARATE
+                               DESCRIPTION
+                               MAPPING_METHOD
+                               GENE_STATUS
+                               COMMENT
+                               SPECIES
+                               CREATED
+                               EDITED
+                               CONTRIBUTORS
+                               ADDITIONAL_REFERENCES
+                               CLINICAL_SYMPTOMS
+                               MINIMIM ) ], @args );
+   
+    $self->init(); 
+    
+    $mim_number                     && $self->MIM_number( $mim_number );
+    $title                          && $self->title( $title );
+    $alternative_titles_and_symbols && $self->alternative_titles_and_symbols( $alternative_titles_and_symbols );     
+    $more_than_two_genes            && $self->more_than_two_genes( $more_than_two_genes );      
+    $is_separate                    && $self->is_separate( $is_separate );   
+    $description                    && $self->description( $description );
+    $mapping_method                 && $self->mapping_method( $mapping_method );     
+    $gene_status                    && $self->gene_status( $gene_status );       
+    $comment                        && $self->comment( $comment );        
+    $species                        && $self->species( $species );
+    $created                        && $self->created( $created );       
+    $edited                         && $self->edited( $edited );    
+    $contributors                   && $self->contributors( $contributors );
+    $additional_references          && $self->additional_references( $additional_references );     
+    $clinical_symptoms              && $self->clinical_symptoms_raw( $clinical_symptoms );
+    $miniMIM                        && $self->miniMIM( $miniMIM );
+                                                    
+    return $self;
+    
+} # new
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $obj->init();   
+ Function: Initializes this OMIMentry to all "" and empty lists.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+
+    my( $self ) = @_;
+
+    $self->MIM_number( DEFAULT_MIM_NUMER );
+    $self->title( "" );
+    $self->alternative_titles_and_symbols( "" );
+    $self->more_than_two_genes( FALSE );
+    $self->is_separate( FALSE );
+    $self->description( "" );
+    $self->mapping_method( "" );
+    $self->gene_status( "" );
+    $self->comment( "" );
+    my $species = Bio::Species->new();
+    $species->classification( qw( sapiens Homo ) );
+    $self->species( $species );
+    $self->created( "" );
+    $self->edited( "" );
+    $self->contributors( "" );
+    $self->additional_references( "" );
+    $self->clinical_symptoms( {} );
+    $self->remove_Correlates();
+    $self->remove_References();
+    $self->remove_AllelicVariants();
+    $self->remove_CytoPositions();
+    $self->remove_gene_symbols();
+    $self->remove_Genotypes();
+    $self->remove_DBLinks();
+    $self->remove_keywords();
+    $self->remove_Variants();
+    $self->remove_Measures();
+    $self->miniMIM( Bio::Phenotype::OMIM::MiniMIMentry->new() );
+  
+} # init
+
+
+
+sub to_string {
+
+    my( $self ) = @_;
+
+    my $s = "";
+
+    $s .= "-- MIM number:\n";
+    $s .= $self->MIM_number()."\n\n";
+    $s .= "-- Title:\n";
+    $s .= $self->title()."\n\n";
+    $s .= "-- Alternative Titles and Symbols:\n";
+    $s .= $self->alternative_titles_and_symbols()."\n\n";
+    $s .= "-- Can be caused by Mutation in any of two or more Genes:\n";
+    $s .= $self->more_than_two_genes()."\n\n";
+    $s .= "-- Phenotype is separate:\n";
+    $s .= $self->is_separate()."\n\n"; 
+    $s .= "-- Description:\n";
+    $s .= $self->description()."\n\n";
+    $s .= "-- Species:\n";
+    $s .= $self->species()->binomial()."\n\n";
+    $s .= "-- Clinical Symptoms:\n";
+    $s .= $self->clinical_symptoms()."\n\n";
+    $s .= "-- Allelic Variants:\n";
+    $s .= $self->_array_to_string( $self->each_AllelicVariant() )."\n";
+    $s .= "-- Cyto Positions:\n";
+    $s .= $self->_array_to_string( $self->each_CytoPosition() )."\n";
+    $s .= "-- Gene Symbols:\n";
+    $s .= $self->_array_to_string( $self->each_gene_symbol() )."\n";
+    $s .= "-- Correlates:\n";
+    $s .= $self->_array_to_string( $self->each_Correlate() )."\n";
+    $s .= "-- References:\n";
+    $s .= $self->_array_to_string( $self->each_Reference() )."\n";
+    $s .= "-- Additional References:\n";
+    $s .= $self->additional_references()."\n\n";
+    $s .= "-- Mapping Method:\n";
+    $s .= $self->mapping_method()."\n\n";
+    $s .= "-- Gene status:\n";
+    $s .= $self->gene_status()."\n\n";
+    $s .= "-- Created:\n";
+    $s .= $self->created()."\n\n";
+    $s .= "-- Contributors:\n";
+    $s .= $self->contributors()."\n\n";
+    $s .= "-- Edited:\n";
+    $s .= $self->edited()."\n\n";
+    $s .= "-- Comment:\n";
+    $s .= $self->comment()."\n\n";
+    $s .= "-- MiniMIM:\n";
+    $s .= $self->miniMIM()->to_string()."\n\n";
+    return $s;
+    
+
+} # to_string
+
+
+
+=head2 MIM_number
+
+ Title   : MIM_number
+ Usage   : $omim->MIM_number( "100050" );
+           or
+           print $omim->MIM_number();
+ Function: Set/get for the MIM number of this OMIM entry.
+ Returns : The MIM number [an integer larger than 100000].
+ Args    : The MIM number [an integer larger than 100000] (optional).
+
+=cut
+
+sub MIM_number {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        if ( $value =~ /\D/
+        || ( $value < 100000 && $value != DEFAULT_MIM_NUMER ) ) {
+            $self->throw( "Found [$value]" 
+            . " where [integer larger than 100000] expected" );
+        }
+        $self->{ "_MIM_number" } = $value;
+    }
+
+    return $self->{ "_MIM_number" };
+
+} # MIM_number
+
+
+
+
+=head2 title
+
+ Title   : title
+ Usage   : $omim->title( "AARSKOG SYNDROME" );
+           or
+           print $omim->title();
+ Function: Set/get for the title or name of this OMIM entry.
+           This method is an alias to the method "name" of
+           Bio::Phenotype::PhenotypeI.
+ Returns : The title [scalar].
+ Args    : The title [scalar] (optional).
+
+=cut
+
+sub title {
+    my $self = shift;
+    
+    $self->name(@_);
+    
+} # title
+
+
+
+
+=head2 alternative_titles_and_symbols
+
+ Title   : alternative_titles_and_symbols
+ Usage   : $omim->alternative_titles_and_symbols( "AORTIC ANEURYSM, ABDOMINAL" );
+           or
+           print $omim->alternative_titles_and_symbols();
+ Function: Set/get for the "alternative titles and symbols" of this OMIM entry.
+           Currently, everything after the first line of title (TI) field is
+           considered "alternative titles and symbols".
+ Returns : "alternative titles and symbols" [scalar].
+ Args    : "alternative titles and symbols" [scalar] (optional).
+
+=cut
+
+sub alternative_titles_and_symbols {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_alternative_titles_and_symbols" } = $value;
+    }
+
+    return $self->{ "_alternative_titles_and_symbols" };
+
+} # alternative_titles_and_symbols
+
+
+
+
+=head2 more_than_two_genes
+
+ Title   : more_than_two_genes
+ Usage   : $omim->more_than_two_genes( 1 );
+           or
+           print $omim->more_than_two_genes();
+ Function: This is true if this phenotype can be caused
+           by mutation in any of two or more genes.
+           In OMIM, this is indicated by a number symbol (#)
+           before an entry number (e.g. #114480 -- BREAST CANCER).
+ Returns : [1 or 0].
+ Args    : [1 or 0] (optional).
+
+=cut
+
+sub more_than_two_genes {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->_is_true_or_false( $value );
+        $self->{ "_more_than_two_genes" } = $value;
+    }
+
+    return $self->{ "_more_than_two_genes" };
+
+} # more_than_two_genes
+
+
+
+
+=head2 is_separate
+
+ Title   : is_separate
+ Usage   : $omim->is_separate( 1 );
+           or
+           print $omim->is_separate();
+ Function: This is true if the phenotype determined by the gene at
+           the given locus is separate from those represented by
+           other entries where "is_separate" is true and if the mode
+           of inheritance of the phenotype has been proved
+           (in the judgment of the authors and editors).
+           In OMIM, this is indicated by a asterisk  (*)
+           before an entry number (e.g. *113705 BREAST CANCER,
+           TYPE 1; BRCA1).
+ Returns : [1 or 0].
+ Args    : [1 or 0] (optional).
+
+=cut
+
+sub is_separate {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->_is_true_or_false( $value );
+        $self->{ "_is_separate" } = $value;
+    }
+
+    return $self->{ "_is_separate" };
+
+} # is_separate
+
+
+
+
+=head2 mapping_method
+
+ Title   : mapping_method
+ Usage   : $omim->mapping_method( "PCR of somatic cell hybrid DNA" );
+           or
+           print $omim->mapping_method();
+ Function: Set/get for the mapping method of this OMIM entry.
+ Returns : The mapping method [scalar].
+ Args    : The mapping method [scalar] (optional).
+
+=cut
+
+sub mapping_method {
+    my $self = shift;
+    return $self->{ "_mapping_method" } = shift if(@_);
+    return $self->{ "_mapping_method" };
+} # mapping_method
+
+=head2 gene_status
+
+ Title   : gene_status
+ Usage   : $omim->gene_status( "C" );
+           or
+           print $omim->gene_status();
+ Function: Set/get for the gene status of this OMIM entry.
+           The certainty with which assignment of loci to chromosomes or the linkage
+           between two loci has been established has been graded into the following
+           classes:
+           <L>C = confirmed - observed in at least two laboratories or in several families.
+           <L>P = provisional - based on evidence from one laboratory or one family.
+           <L>I = inconsistent - results of different laboratories disagree.
+           <L>L = limbo - evidence not as strong as that provisional, but included for
+           heuristic reasons. (Same as `tentative'.)
+
+ Returns :  [C, P, I, or L].
+ Args    :  [C, P, I, or L] (optional).
+
+=cut
+
+sub gene_status {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        #unless ( $value eq "C"
+        #      || $value eq "P"
+        #      || $value eq "I"
+        #      || $value eq "L"
+        #      || $value eq "A"  # !?
+        #      || $value eq "H"  # !?
+        #      || $value eq "U"  # !?
+        #      || $value eq "" ) {
+        #    $self->throw( "Found [$value]" 
+        #    . " where [C, P, I, or L] expected" );
+        #}
+        unless ( $value eq "C"
+              || $value eq "P"
+              || $value eq "I"
+              || $value eq "L"
+              || $value eq "" ) {
+            $value = "";
+        }
+        
+        $self->{ "_gene_status" } = $value;
+    }
+
+    return $self->{ "_gene_status" };
+
+} # gene_status
+
+
+=head2 clinical_symptoms
+
+ Title   : clinical_symptoms
+ Usage   : $omim->clinical_symptoms({});
+ Function: Set/get for the clinical symptoms of this OMIM entry.
+ Returns : [hash reference].
+ Args    : [hash reference]. Suggested not to assign alone. Parser will do.
+
+=cut
+
+sub clinical_symptoms {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless(ref($value) eq 'HASH'){
+            $self->throw('a hash referenced needed');
+        }
+        $self->{ "_clinical_symptoms" } = $value;
+    }
+
+    return $self->{ "_clinical_symptoms" };
+
+} # clinical_symptoms
+
+=head2 clinical_symptoms_raw
+
+  Title     : clinical_symptoms_raw
+  Usage     : $omim->clinical_symptoms( "Patients with ..." );
+              print $omim->clinical_symptoms();
+  Functions : Get/set for text information of clinical symptoms
+  Returns   : The clinical symptoms [scalar].
+  Args      : The clinical symptoms [scalar] (optional).
+
+=cut 
+
+sub clinical_symptoms_raw {
+    my $self = shift;
+    return $self->{_clinical_symptoms_raw} = shift if @_;
+    return $self->{_clinical_symptoms_raw};
+}
+
+=head2 add_clinical_symptoms
+
+  Title     : add_clinical_symptoms
+  Usage     : $entry->add_clinical_symptoms('Ears', 'Floppy ears', 'Lop-ears');
+  Function  : add one or more symptoms on one part of body.
+  Returns   : [none]
+  Args      : ($part, @symptoms)
+              $part, the text name of part/organism of human
+              @symptoms, an array of text description
+
+=cut
+
+sub add_clinical_symptoms {
+    my ($self, $part, @symptoms) = @_;
+    unless(defined $part){
+        $self->throw('a part/organism must be assigned');
+    }
+    $self->{_clinical_symptoms} = {} unless $self->{_clinical_symptoms};
+    $self->{_clinical_symptoms}->{$part} = [] 
+        unless $self->{_clinical_symptoms}->{$part};
+    push @{$self->{_clinical_symptoms}->{$part}}, @symptoms;
+}
+
+=head2 query_clinical_symptoms
+
+  Title     : get_clinical_symptoms
+  Usage     : @symptoms = $self->query_clinical_symptoms('Ears');
+  Function  : get all symptoms specific to one part/organism.
+  Returns   : an array of text
+  Args      : $organ
+
+=cut
+
+sub query_clinical_symptoms {
+    my ($self, $organ)=@_;
+    my $symptoms=$self->{_clinical_symptoms}->{$organ};
+    @$symptoms;
+}
+
+sub get_clinical_symptom_organs {
+    my ($self)=@_;
+    keys %{$self->{_clinical_symptoms}};
+}
+
+=head2 created
+
+ Title   : created
+ Usage   : $omim->created( "Victor A. McKusick: 6/4/1986" );
+           or
+           print $omim->created();
+ Function: Set/get for the created field of the OMIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub created {
+    my $self = shift;
+    return $self->{ "_created" } = shift if(@_);
+    return $self->{ "_created" };
+
+} # created
+
+
+
+
+=head2 contributors
+
+ Title   : contributors
+ Usage   : $omim->contributors( "Kelly A. Przylepa - revised: 03/18/2002" );
+           or
+           print $omim->contributors();
+ Function: Set/get for the contributors field of the OMIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub contributors {
+    my  $self = shift;
+    $self->{ "_contributors" } = shift if(@_);
+    return $self->{ "_contributors" };
+
+} # contributors
+
+
+
+
+=head2 edited
+
+ Title   : edited
+ Usage   : $omim->edited( "alopez: 06/03/1997" );
+           or
+           print $omim->edited();
+ Function: Set/get for the edited field of the OMIM database.
+ Returns : Name(s) and date(s) [scalar - free form].
+ Args    : Name(s) and date(s) [scalar - free form] (optional).
+
+=cut
+
+sub edited {
+    my $self = shift;
+    return $self->{ "_edited" } = shift if(@_);
+    return $self->{ "_edited" };
+
+} # edited
+
+
+
+
+=head2 additional_references
+
+ Title   : additional_references
+ Usage   : $omim->additional_references( "Miller er al." );
+           or
+           print $omim->additional_references();
+ Function: Set/get for the additional references of this OMIM antry
+           (see also).
+ Returns : additional reference [scalar].
+ Args    : additional reference [scalar] (optional).
+
+=cut
+
+sub additional_references {
+    my $self = shift;
+    return $self->{ "_additional_references" } = shift if(@_);
+    return $self->{ "_additional_references" };
+
+} # additional_references
+
+=head2 miniMIM
+
+ Title   : miniMIM
+ Usage   : $omim->miniMIM( $MM );
+           or
+           $MM = $omim->miniMIM();
+ Function: Set/get for the Mini MIM associated with this OMIM antry
+           (see also).
+ Returns : [Bio::Phenotype::OMIM::MiniMIMentry].
+ Args    : [Bio::Phenotype::OMIM::MiniMIMentry] (optional).
+
+=cut
+
+sub miniMIM {
+
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->_check_ref_type( $value, "Bio::Phenotype::OMIM::MiniMIMentry" );
+        $self->{ "_mini_mim" } = $value;
+    }
+    
+    return $self->{ "_mini_mim" };
+}
+
+=head2 each_AllelicVariant
+
+ Title   : each_AllelicVariant()
+ Usage   : @avs = $obj->each_AllelicVariant();                 
+ Function: Returns a list of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects
+           associated with this OMIM entry.
+ Returns : A list of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects.
+ Args    :
+
+=cut
+
+sub each_AllelicVariant {
+    my ( $self ) = @_;
+    
+    return @{$self->{"_allelic_variants"}} if exists($self->{"_allelic_variants"});
+    return ();    
+} # each_AllelicVariant
+
+
+=head2 add_AllelicVariants
+
+ Title   : add_AllelicVariants
+ Usage   : $obj->add_AllelicVariants( @avs );
+           or
+           $obj->add_AllelicVariants( $av );                  
+ Function: Pushes one or more OMIMentryAllelicVariant
+           into the list of OMIMentryAllelicVariants.
+ Returns : 
+ Args    : Bio::Phenotype::OMIM::OMIMentryAllelicVariant object(s).
+
+=cut
+
+sub add_AllelicVariants {
+    my ( $self, @values ) = @_;
+    
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Phenotype::OMIM::OMIMentryAllelicVariant" );
+    }
+        
+    push( @{ $self->{ "_allelic_variants" } }, @values );
+    
+} # add_AllelicVariants
+
+
+=head2 remove_AllelicVariants
+
+ Title   : remove_AllelicVariants
+ Usage   : $obj->remove_AllelicVariants();
+ Function: Deletes (and returns) the list of OMIMentryAllelicVariant objects
+           associated with this OMIM entry.
+ Returns : A list of OMIMentryAllelicVariant objects.
+ Args    :
+
+=cut
+
+sub remove_AllelicVariants {
+    my ( $self ) = @_;
+     
+    my @a = $self->each_AllelicVariant();
+    $self->{ "_allelic_variants" } = [];
+    return @a;
+
+} # remove_AllelicVariants
+
+
+# Title   : _array_to_string         
+# Function:
+# Returns : 
+# Args    : 
+sub _array_to_string {
+    my( $self, @value ) = @_;
+
+    my $s = "";
+    
+    for ( my $i = 0; $i < scalar( @value ); ++$i ) {
+        if ( ! ref( $value[ $i ] ) ) {
+            $s .= "#" . $i . "\n-- Value:\n" . $value[ $i ] . "\n";
+        }
+        elsif ( $value[ $i ]->isa( "Bio::Phenotype::OMIM::OMIMentryAllelicVariant" ) 
+        ||      $value[ $i ]->isa( "Bio::Phenotype::Correlate" ) ) {
+            $s .= "#" . $i . "\n" . ( $value[ $i ] )->to_string() . "\n";
+        }
+        elsif ( $value[ $i ]->isa( "Bio::Annotation::Reference" ) ) {
+            $s .= "#".$i."\n-- Authors:\n".( $value[ $i ] )->authors()."\n";
+            $s .= "-- Title:\n".( $value[ $i ] )->title()."\n";
+            $s .= "-- Location:\n".( $value[ $i ] )->location()."\n";
+        }
+        elsif ( $value[ $i ]->isa( "Bio::Map::CytoPosition" ) ) {
+            $s .= "#" . $i . "\n-- Value:\n" . ( $value[ $i ] )->value() . "\n";
+        }
+    }
+    
+    return $s;
+    
+} # _array_to_string
+
+
+# Title   :_is_true_or_false              
+# Function: Checks whether the argument is 1 or 0.
+# Returns : 
+# Args    : The value to be checked.
+sub _is_true_or_false {
+    my ( $self, $value ) = @_;
+    unless ( $value !~ /\D/ && ( $value == TRUE || $value == FALSE ) ) {
+        $self->throw( "Found [" . $value
+        . "] where " . TRUE . " or " . FALSE . " expected" );
+    }
+} # _is_true_or_false
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,427 @@
+# $Id: OMIMentryAllelicVariant.pm,v 1.9.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::OMIM::OMIMentryAllelicVariant
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::OMIM::OMIMentryAllelicVariant - Representation of a allelic
+variant of the OMIM database
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
+
+  $av = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new( -number               => ".0001",
+                                                            -title                => "ALCOHOL INTOLERANCE",
+                                                            -symbol               => "ALDH2*2",
+                                                            -description          => "The ALDH2*2-encoded ...",
+                                                            -aa_ori               => "GLU",
+                                                            -aa_mut               => "LYS",
+                                                            -position             => 487,
+                                                            -additional_mutations => "IVS4DS, G-A, +1" );
+
+=head1 DESCRIPTION
+
+This class models the allelic variant of the OMIM database.
+This class is intended to be used together with a OMIM entry class. 
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods.
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
+use strict;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $av = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new( -number               => ".0001",
+                                                                     -title                => "ALCOHOL INTOLERANCE",
+                                                                     -symbol               => "ALDH2*2",
+                                                                     -description          => "The ALDH2*2-encoded ...",
+                                                                     -aa_ori               => "GLU",
+                                                                     -aa_mut               => "LYS",
+                                                                     -position             => 487,
+                                                                     -additional_mutations => "IVS4DS, G-A, +1" );                      
+ Function: Creates a new OMIMentryAllelicVariant object.
+ Returns : A new OMIMentryAllelicVariant object.
+ Args    : -number               => the OMIM allelic variant number
+           -title                => the title
+           -symbol               => a symbol
+           -description          => a description
+           -aa_ori               => the original amino acid
+           -aa_mut               => the mutated amino acid
+           -position             => the position of the mutation
+           -additional_mutations => free form description of additional mutations
+
+=cut
+
+sub new {
+
+    my( $class, @args ) = @_;
+  
+    my $self = $class->SUPER::new( @args );
+   
+    my ( $number, $title, $symbol, $desc, $ori, $mut, $pos, $am )
+    = $self->_rearrange( [ qw( NUMBER
+                               TITLE
+                               SYMBOL
+                               DESCRIPTION
+                               AA_ORI
+                               AA_MUT
+                               POSITION
+                               ADDITIONAL_MUTATIONS ) ], @args );
+
+    $self->init(); 
+
+    $number && $self->number( $number );
+    $title  && $self->title( $title );
+    $symbol && $self->symbol( $symbol );
+    $desc   && $self->description( $desc );
+    $ori    && $self->aa_ori( $ori );
+    $mut    && $self->aa_mut( $mut );
+    $pos    && $self->position( $pos );
+    $am     && $self->additional_mutations( $am );
+   
+    return $self;
+
+} # new 
+
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $av->init();   
+ Function: Initializes this OMIMentryAllelicVariant to all "".
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my( $self ) = @_;
+
+    $self->number( "" );
+    $self->title( "" );
+    $self->symbol( "" );
+    $self->description( "" );
+    $self->aa_ori( "" );
+    $self->aa_mut( "" );
+    $self->position( "" );
+    $self->additional_mutations( "" );
+    
+} # init
+
+
+
+
+=head2 number
+
+ Title   : number
+ Usage   : $av->number( ".0001" );
+           or
+           print $av->number();
+ Function: Set/get for the OMIM allelic variant number of this
+           OMIMentryAllelicVariant.
+ Returns : The OMIM allelic variant number.
+ Args    : The OMIM allelic variant number (optional).
+
+=cut
+
+sub number {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_number" } = $value;
+    }
+
+    return $self->{ "_number" };
+
+} # number
+
+
+
+=head2 title
+
+ Title   : title
+ Usage   : $av->title( "ALCOHOL INTOLERANCE" );
+           or
+           print $av->title();
+ Function: Set/get for the title of this OMIMentryAllelicVariant.
+ Returns : The title.
+ Args    : The title (optional).
+
+=cut
+
+sub title {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_title" } = $value;
+    }
+
+    return $self->{ "_title" };
+
+} # title
+
+
+
+
+=head2 symbol
+
+ Title   : symbol
+ Usage   : $av->symbol( "ALDH2*2" );
+           or
+           print $av->symbol();
+ Function: Set/get for the symbol of this OMIMentryAllelicVariant.
+ Returns : A symbol.
+ Args    : A symbol (optional).
+
+=cut
+
+sub symbol {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_symbol" } = $value;
+    }
+
+    return $self->{ "_symbol" };
+
+} # symbol
+
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $av->description( "The ALDH2*2-encoded protein has a change ..." );
+           or
+           print $av->description();
+ Function: Set/get for the description of this OMIMentryAllelicVariant.
+ Returns : A description.
+ Args    : A description (optional).
+
+=cut
+
+sub description {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_description" } = $value;
+    }
+
+    return $self->{ "_description" };
+
+} # description
+
+
+
+
+=head2 aa_ori
+
+ Title   : aa_ori
+ Usage   : $av->aa_ori( "GLU" );
+           or
+           print $av->aa_ori();
+ Function: Set/get for the original amino acid(s).
+ Returns : The original amino acid(s).
+ Args    : The original amino acid(s) (optional).
+
+=cut
+
+sub aa_ori {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_aa_ori" } = $value;
+    }
+
+    return $self->{ "_aa_ori" };
+
+} # aa_ori
+
+
+
+
+=head2 aa_mut
+
+ Title   : aa_mut
+ Usage   : $av->aa_mut( "LYS" );
+           or
+           print $av->aa_mut();
+ Function: Set/get for the mutated amino acid(s).
+ Returns : The mutated amino acid(s).
+ Args    : The mutated amino acid(s) (optional).
+
+=cut
+
+sub aa_mut {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_aa_mut" } = $value;
+    }
+
+    return $self->{ "_aa_mut" };
+
+} # aa_mut
+
+
+
+
+=head2 position
+
+ Title   : position
+ Usage   : $av->position( 487 );
+           or
+           print $av->position();
+ Function: Set/get for the position of the mutation.
+ Returns : The position of the mutation.
+ Args    : The position of the mutation (optional).
+
+=cut
+
+sub position {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_position" } = $value;
+    }
+
+    return $self->{ "_position" };
+
+} # position
+
+
+
+
+=head2 additional_mutations
+
+ Title   : additional_mutations
+ Usage   : $av->additional_mutations( "1-BP DEL, 911T" );
+           or
+           print $av->additional_mutations();
+ Function: Set/get for free form description of (additional) mutation(s).
+ Returns : description of (additional) mutation(s).
+ Args    : description of (additional) mutation(s) (optional).
+
+=cut
+
+sub additional_mutations {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_additional_mutations" } = $value;
+    }
+
+    return $self->{ "_additional_mutations" };
+
+} # additional_mutations
+
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $av->to_string();
+ Function: To string method for OMIMentryAllelicVariant objects.
+ Returns : A string representations of this OMIMentryAllelicVariant.
+ Args    :
+
+=cut
+
+sub to_string {
+    my( $self ) = @_;
+
+    my $s = "";
+    
+    $s .= "-- Number:\n";
+    $s .= $self->number()."\n";
+    $s .= "-- Title:\n";
+    $s .= $self->title()."\n";
+    $s .= "-- Symbol:\n";
+    $s .= $self->symbol()."\n";
+    $s .= "-- Description:\n";
+    $s .= $self->description()."\n";
+    $s .= "-- Original AA(s):\n";
+    $s .= $self->aa_ori()."\n";
+    $s .= "-- Mutated AA(s):\n";
+    $s .= $self->aa_mut()."\n";
+    $s .= "-- Position:\n";
+    $s .= $self->position()."\n";
+    $s .= "-- Additional Mutation(s):\n";
+    $s .= $self->additional_mutations();
+  
+    return $s;
+ 
+} # to_string
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMparser.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMparser.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/OMIM/OMIMparser.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,934 @@
+# $Id: OMIMparser.pm,v 1.20.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::OMIM::OMIMparser
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::OMIM::OMIMparser - parser for the OMIM database
+
+=head1 SYNOPSIS
+
+  use Bio::Phenotype::OMIM::OMIMparser;
+
+  # The OMIM database is available as textfile at:
+  # ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z
+  # The genemap is available as textfile at:
+  # ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap
+
+  $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap  => "/path/to/genemap",
+                                                        -omimtext => "/path/to/omim.txt" );
+
+  while ( my $omim_entry = $omim_parser->next_phenotype() ) {
+    # This prints everything.
+    print( $omim_entry->to_string() );
+    print "\n\n";
+
+    # This gets individual data (some of them object-arrays)
+    # (and illustrates the relevant methods of OMIMentry).
+    my $numb  = $omim_entry->MIM_number();                     # *FIELD* NO
+    my $title = $omim_entry->title();                          # *FIELD* TI - first line
+    my $alt   = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines
+    my $mtt   = $omim_entry->more_than_two_genes();            # "#" before title
+    my $sep   = $omim_entry->is_separate();                    # "*" before title
+    my $desc  = $omim_entry->description();                    # *FIELD* TX
+    my $mm    = $omim_entry->mapping_method();                 # from genemap
+    my $gs    = $omim_entry->gene_status();                    # from genemap
+    my $cr    = $omim_entry->created();                        # *FIELD* CD
+    my $cont  = $omim_entry->contributors();                   # *FIELD* CN
+    my $ed    = $omim_entry->edited();                         # *FIELD* ED
+    my $sa    = $omim_entry->additional_references();          # *FIELD* SA
+    my $cs    = $omim_entry->clinical_symptoms_raw();              # *FIELD* CS
+    my $comm  = $omim_entry->comment();                        # from genemap
+
+    my $mini_mim   = $omim_entry->miniMIM();                   # *FIELD* MN
+      # A Bio::Phenotype::OMIM::MiniMIMentry object.
+      # class Bio::Phenotype::OMIM::MiniMIMentry
+      # provides the following:
+      # - description()
+      # - created()
+      # - contributors()
+      # - edited() 
+      #
+    # Prints the contents of the MINI MIM entry (most OMIM entries do
+    # not have MINI MIM entries, though).
+    print $mini_mim->description()."\n";
+    print $mini_mim->created()."\n";
+    print $mini_mim->contributors()."\n";
+    print $mini_mim->edited()."\n";
+
+    my @corrs      = $omim_entry->each_Correlate();            # from genemap
+      # Array of Bio::Phenotype::Correlate objects.
+      # class Bio::Phenotype::Correlate
+      # provides the following:
+      # - name()
+      # - description() (not used)
+      # - species() (always mouse)
+      # - type() ("OMIM mouse correlate")
+      # - comment() 
+
+    my @refs       = $omim_entry->each_Reference();            # *FIELD* RF
+      # Array of Bio::Annotation::Reference objects.
+
+
+    my @avs        = $omim_entry->each_AllelicVariant();       # *FIELD* AV
+      # Array of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects.
+      # class Bio::Phenotype::OMIM::OMIMentryAllelicVariant
+      # provides the following:
+      # - number (e.g ".0001" )
+      # - title (e.g "ALCOHOL INTOLERANCE" )
+      # - symbol (e.g "ALDH2*2" )
+      # - description (e.g "The ALDH2*2-encoded protein has a change ..." )
+      # - aa_ori  (used if information in the form "LYS123ARG" is found)
+      # - aa_mut (used if information in the form "LYS123ARG" is found)
+      # - position (used if information in the form "LYS123ARG" is found)
+      # - additional_mutations (used for e.g. "1-BP DEL, 911T")
+
+    my @cps        = $omim_entry->each_CytoPosition();         # from genemap
+      # Array of Bio::Map::CytoPosition objects.
+
+    my @gss        = $omim_entry->each_gene_symbol();          # from genemap
+      # Array of strings.
+
+    # do something ...
+  }
+
+=head1 DESCRIPTION
+
+This parser returns Bio::Phenotype::OMIM::OMIMentry objects
+(which inherit from Bio::Phenotype::PhenotypeI).
+It parses the OMIM database available as 
+ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z 
+together with (optionally) the gene map file at
+ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::OMIM::OMIMparser;
+
+use strict;
+
+use Bio::Root::IO;
+use Bio::Species;
+use Bio::Annotation::Reference;
+use Bio::Map::CytoPosition;
+use Bio::Phenotype::OMIM::OMIMentry;
+use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
+use Bio::Phenotype::Correlate;
+
+use base qw(Bio::Root::Root);
+
+
+use constant DEFAULT_STATE               => 0;
+use constant MIM_NUMBER_STATE            => 1;
+use constant TITLE_STATE                 => 2;
+use constant TEXT_STATE                  => 3;
+use constant MINI_MIM_TEXT_STATE         => 4;
+use constant ALLELIC_VARIANT_STATE       => 5;
+use constant SEE_ALSO_STATE              => 6;
+use constant REF_STATE                   => 7;
+use constant SYMPT_STATE                 => 8;
+use constant CONTRIBUTORS_STATE          => 9;
+use constant CREATED_BY_STATE            => 10;
+use constant EDITED_BY_STATE             => 11;
+use constant MINI_MIM_EDITED_BY_STATE    => 12;
+use constant MINI_MIM_CREATED_BY_STATE   => 13;
+use constant MINI_MIM_CONTRIBUTORS_STATE => 14;
+use constant TRUE                        => 1;
+use constant FALSE                       => 0;
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap  => "/path/to/genemap",
+                                                                 -omimtext => "/path/to/omim.txt" );                      
+ Function: Creates a new OMIMparser.
+ Returns : A new OMIMparser object.
+ Args    : -genemap  => the genemap file name (optional)
+           -omimtext => the omim text file name
+
+=cut
+
+sub new {
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+
+    my ( $genemap_file_name, $omimtxt_file_name ) 
+    = $self->_rearrange( [ qw( GENEMAP OMIMTEXT ) ], @args );
+
+    $self->init(); 
+    
+    $genemap_file_name && $self->genemap_file_name( $genemap_file_name );
+    
+    $omimtxt_file_name && $self->omimtxt_file_name( $omimtxt_file_name);
+                         
+    return $self;
+}
+
+
+
+
+=head2 next_phenotype
+
+ Title   : next_phenotype()
+ Usage   : while ( my $omim_entry = $omim_parser->next_phenotype() ) {
+               # do something with $omim_entry
+           }    
+ Function: Returns an Bio::Phenotype::OMIM::OMIMentry or
+           undef once the end of the omim text file is reached.
+ Returns : A Bio::Phenotype::OMIM::OMIMentry.
+ Args    :
+
+=cut
+
+sub next_phenotype {
+    my ( $self ) = @_;
+    
+    unless( defined( $self->_OMIM_text_file() ) ) {
+        $self->_no_OMIM_text_file_provided_error();
+    }
+    
+    if ( $self->_done() == TRUE ) {
+        return;
+    }
+
+    my $fieldtag          = "";
+    my $contents          = "";
+    my $line              = "";
+    my $state             = DEFAULT_STATE;
+    my $saw_mini_min_flag = FALSE;
+    my %record            = ();
+    
+    while( $line = ( $self->_OMIM_text_file )->_readline() ) {
+        if ( $line =~ /^\s*\*RECORD\*/ ) {
+            if ( $self->_is_not_first_record() == TRUE ) {
+                $self->_add_to_hash( $state, $contents,\%record );
+                my $omim_entry = $self->_createOMIMentry( \%record );
+                return $omim_entry;
+            }
+            else {
+                $self->_is_not_first_record( TRUE );
+            }
+            
+        }
+        elsif ( $line =~ /^\s*\*FIELD\*\s*(\S+)/ ) {
+            $fieldtag = $1;
+            if ( $state != DEFAULT_STATE ) {
+                $self->_add_to_hash( $state, $contents,\%record );
+            }
+            $contents = "";
+            
+            if ( $fieldtag eq "NO" ) {
+                $state = MIM_NUMBER_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "TI" ) {
+                $state = TITLE_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "TX" ) {
+                $state = TEXT_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "MN" ) {
+                $state = MINI_MIM_TEXT_STATE;
+                $saw_mini_min_flag = TRUE;           
+            }
+            elsif ( $fieldtag eq "AV" ) {
+                $state = ALLELIC_VARIANT_STATE;
+                $saw_mini_min_flag = FALSE;     
+            }
+            elsif ( $fieldtag eq "SA" ) { 
+                $state = SEE_ALSO_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "RF" ) {
+                $state = REF_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "CS" ) {
+                $state = SYMPT_STATE;
+                $saw_mini_min_flag = FALSE;   
+            }
+            elsif ( $fieldtag eq "CN" ) {
+                if ( $saw_mini_min_flag == TRUE ) {
+                    $state = MINI_MIM_CONTRIBUTORS_STATE;
+                }
+                else {
+                    $state = CONTRIBUTORS_STATE;
+                }     
+            }
+            elsif ( $fieldtag eq "CD" ) {
+                if ( $saw_mini_min_flag == TRUE ) {
+                    $state = MINI_MIM_CREATED_BY_STATE;
+                }
+                else {
+                    $state = CREATED_BY_STATE;
+                }     
+            }
+            elsif ( $fieldtag eq "ED" ) {
+                if ( $saw_mini_min_flag == TRUE ) {
+                    $state = MINI_MIM_EDITED_BY_STATE;
+                }
+                else {
+                    $state = EDITED_BY_STATE;
+                }     
+            }
+            else {
+                print "Warning: Unknown tag: $fieldtag\n";
+            }
+
+        }
+        else {
+            $contents .= $line;
+        }
+    }
+
+    $self->_OMIM_text_file()->close();
+    $self->_done( TRUE );
+
+    unless( %record ) {
+        $self->_not_a_OMIM_text_file_error();
+    }
+
+    $self->_add_to_hash( $state, $contents,\%record );
+    
+    my $omim_entry = $self->_createOMIMentry( \%record );
+    
+    return $omim_entry;
+
+} # next_phenotype
+
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $omim_parser->init();   
+ Function: Initializes this OMIMparser to all "".
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my ( $self ) = @_;
+    
+    $self->genemap_file_name( "" );
+    $self->omimtxt_file_name( "" );
+    $self->_genemap_hash( {} );
+    $self->_OMIM_text_file( undef );
+    $self->_is_not_first_record( FALSE );
+    $self->_done( FALSE );
+
+} # init
+
+
+
+
+=head2 genemap_file_name
+
+ Title   : genemap_file_name
+ Usage   : $omimparser->genemap_file_name( "genemap" );
+ Function: Set/get for the genemap file name.
+ Returns : The genemap file name [string].
+ Args    : The genemap file name [string] (optional).
+
+=cut
+
+sub genemap_file_name {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_genemap_file_name" } = $value;
+        if ( $value =~ /\W/ ) {
+            _genemap_hash( $self->_read_genemap( $value ) );
+        }
+    }
+    
+    return $self->{ "_genemap_file_name" };
+} # genemap_file_name
+
+
+
+
+=head2 omimtxt_file_name
+
+ Title   : omimtxt_file_name
+ Usage   : $omimparser->omimtxt_file_name( "omim.txt" );
+ Function: Set/get for the omim text file name.
+ Returns : The the omim text file name [string].
+ Args    : The the omim text file name [string] (optional).
+
+=cut
+
+sub omimtxt_file_name {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_omimtxt_file_name" } = $value;
+        if ( $value =~ /\W/ ) {
+            $self->_OMIM_text_file( new Bio::Root::IO->new( -file => $value ) );
+        } 
+    }
+    
+    return $self->{ "_omimtxt_file_name" };
+} # omimtxt_file_name
+
+
+
+
+
+sub _createOMIMentry {
+    my ( $self, $record_ref ) = @_;
+    
+    my $omim_entry = Bio::Phenotype::OMIM::OMIMentry->new();
+    my $mini_mim   = Bio::Phenotype::OMIM::MiniMIMentry->new();
+    
+    while ( ( my $key, my $val ) = each( %$record_ref ) ) {
+        
+        $val =~ s/^\s+//;
+        $val =~ s/\s+$//;
+        
+        if ( $key == MIM_NUMBER_STATE ) {
+            $val =~ s/\s+//g;
+            $val =~ s/\D//g;
+           
+            $omim_entry->MIM_number( $val );
+            
+            my $gm = $self->_genemap_hash();
+            if ( exists( $$gm{ $val } ) ) {
+                $self->_parse_genemap( $omim_entry, $val );
+            }
+            
+        }
+        elsif ( $key == TITLE_STATE ) {
+            my ( $title, $alt_titles ) = $self->_parse_title( $val );
+            $omim_entry->title( $title );
+            $omim_entry->alternative_titles_and_symbols( $alt_titles );
+            if ( $title =~ /^\*/ ) {
+                 $omim_entry->is_separate( TRUE );
+            }
+            elsif ( $title =~ /^#/ ) {
+                 $omim_entry->more_than_two_genes( TRUE );
+            } 
+        }
+        elsif ( $key == TEXT_STATE ) {
+            $val = undef if($val =~ /DESCRIPTION1\nDESCRIPTION2/);
+            $omim_entry->description( $val );
+        }
+        elsif ( $key == ALLELIC_VARIANT_STATE ) {
+            my @allelic_variants =  $self->_parse_allelic_variants( $val );
+            $omim_entry->add_AllelicVariants( @allelic_variants );
+        }
+        elsif ( $key == SEE_ALSO_STATE ) {
+            $omim_entry->additional_references( $val );
+        }
+        elsif ( $key == REF_STATE ) {
+            my @refs =  $self->_parse_references( $val );
+            $omim_entry->add_References( @refs );
+        }
+        elsif ( $key == SYMPT_STATE ) {
+            $val = '' if($val eq 'clinical symptoms');
+            $omim_entry->clinical_symptoms_raw( $val );
+        }
+        elsif ( $key == CONTRIBUTORS_STATE ) {
+            $val = undef if($val =~ /cn1\ncn2\ncn3/);
+            $omim_entry->contributors( $val );
+        }
+        elsif ( $key == CREATED_BY_STATE ) {
+            $val = undef if($val =~ /cd1\ncd2\ncd3/);
+            $omim_entry->created( $val );
+        }
+        elsif ( $key == EDITED_BY_STATE ) {
+            $val = undef if($val =~ /ed1\ned2\ned3/);
+            $omim_entry->edited( $val );
+        }
+        elsif ( $key == MINI_MIM_TEXT_STATE ) {
+            $mini_mim->description( $val );
+        }
+        elsif ( $key == MINI_MIM_CONTRIBUTORS_STATE ) {
+            $mini_mim->contributors( $val );
+        }
+        elsif ( $key == MINI_MIM_CREATED_BY_STATE ) {
+            $mini_mim->created( $val );
+        }
+        elsif ( $key == MINI_MIM_EDITED_BY_STATE ) {
+            $mini_mim->edited( $val );
+        }
+    
+    }
+    
+    my $man = Bio::Species->new();
+    $man->classification( qw( sapiens Homo ) );
+    $man->common_name( "man" );
+    $omim_entry->species( $man );
+    $omim_entry->miniMIM( $mini_mim );
+
+    # parse the symptoms text into a hash-based structure.
+    $self->_finer_parse_symptoms($omim_entry);
+    
+    return $omim_entry;
+
+} # _createOMIMentry
+
+
+sub _finer_parse_symptoms {
+    my ($self, $omim_entry) = @_;
+    my $text = $omim_entry->clinical_symptoms_raw;
+    if( $text ) { 
+	my $part;
+	for my $line (split /\n/, $text){
+		if ($line =~ /^([\w\s,]+)\:\s*$/) {
+		$part = $1;
+	    } elsif( $line =~ /^\s+$/ ) {
+	    } elsif($line =~ /^(\s+)([^;]+)\;?\s*$/){
+		my $symptom = $2;
+		if( ! $part ) { 
+		    # $self->warn("$text\nline='$line'\n");
+		    next;
+		}
+		$omim_entry->add_clinical_symptoms($part, $symptom);
+	    }
+	}
+    }
+    $omim_entry->clinical_symptoms_raw('');
+}
+
+sub _parse_genemap {
+     my ( $self, $omim_entry, $val ) = @_;
+     
+     my $genemap_line = ${ $self->_genemap_hash() }{ $val };
+     my @a = split( /\|/, $genemap_line );
+
+     my $locations = $a[ 4 ];
+     if ( defined ( $locations ) ) {
+          $locations =~ s/\s+//g;
+          my @ls = split( /[,;]/, $locations );
+          my @cps;
+          foreach my $l ( @ls ) {
+               my $cp = Bio::Map::CytoPosition->new( -value => $l );
+               push( @cps, $cp ); 
+          }
+          $omim_entry->add_CytoPositions( @cps );
+      }
+
+     my $gene_symbols = $a[ 5 ];
+     if ( defined ( $gene_symbols ) ) {
+          $gene_symbols =~ s/\s+//g;
+          my @gss = split( /[,;]/, $gene_symbols );
+          $omim_entry->add_gene_symbols( @gss );
+     }
+
+     my $mouse_correlates = $a[ 16 ];
+     if ( defined ( $mouse_correlates ) ) {
+          $mouse_correlates =~ s/\s+//g;
+          my @mcs = split( /[,;]/, $mouse_correlates );
+          my @cs;
+          foreach my $mc ( @mcs ) {
+               my $mouse = Bio::Species->new();
+               $mouse->classification( qw( musculus Mus ) );
+               $mouse->common_name( "mouse" );
+               my $c = Bio::Phenotype::Correlate->new();
+               $c->name( $mc );
+               $c->species( $mouse );
+               $c->type( "OMIM mouse correlate" );
+
+               push( @cs, $c ); 
+          }
+          $omim_entry->add_Correlates( @cs );
+     }
+
+     $omim_entry->gene_status( $a[ 6 ] ) if defined $a[ 6 ];
+     $omim_entry->mapping_method( $a[ 10 ] ) if defined $a[ 10 ];
+     $omim_entry->comment( $a[ 11 ] ) if defined $a[ 11 ];
+
+} # _parse_genemap
+
+
+
+
+sub _parse_allelic_variants {
+    my ( $self, $text ) = @_;
+    
+    my @allelic_variants;
+    my $number          = "";
+    my $title           = "";
+    my $symbol_mut_line = "";
+    my $prev_line       = "";
+    my $description     = "";
+    my $saw_empty_line  = FALSE;
+     
+    my @lines = split( /\n/, $text );
+    
+    foreach my $line ( @lines ) {
+        if ( $line !~ /\w/ ) {
+             $saw_empty_line = TRUE;
+        }
+        elsif ( $line =~ /^\s*(\.\d+)/ ) {
+            my $current_number = $1;
+            if ( $number ne "" ) {
+                my $allelic_variant = $self->_create_allelic_variant( $number, $title, 
+                                                    $symbol_mut_line, $description );
+                
+                push( @allelic_variants, $allelic_variant );
+            }
+            $number          = $current_number;
+            $title           = "";
+            $prev_line       = "";
+            $symbol_mut_line = "";
+            $description     = "";
+            $saw_empty_line  = FALSE;
+        }
+        elsif ( $title eq "" ) {
+            $title = $line;
+        }
+        elsif ( $saw_empty_line == FALSE ) {
+            $prev_line = $line;
+        }
+        elsif ( $saw_empty_line == TRUE ) {
+            if ( $prev_line ne "" ) {
+                $symbol_mut_line = $prev_line;
+                $prev_line       = "";
+            }
+            if ( $description ne "" ) {
+                $description .= "\n" . $line;
+            }
+            else {
+                $description = $line;
+            }
+        }
+    }
+    
+    my $allelic_variant = $self->_create_allelic_variant( $number, $title, 
+                                        $symbol_mut_line, $description );
+    
+    push( @allelic_variants, $allelic_variant );
+    
+    return @allelic_variants;
+    
+} # _parse_allelic_variants
+
+
+
+
+sub _create_allelic_variant {
+    my ( $self, $number, $title, $symbol_mut_line, $description ) = @_;
+    
+    my $symbol   = "";
+    my $mutation = "";
+    my $aa_ori   = "";
+    my $aa_mut   = "";
+    my $position = "";
+   
+    if ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*([a-z]{3})(\d+)([a-z]{3})/i ) {
+         $symbol   = $1;
+         $aa_ori   = $2;
+         $aa_mut   = $4;
+         $position = $3;
+    }
+    elsif ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*(.+)/ ) {
+         $symbol   = $1;
+         $mutation = $2;
+    }
+    else {
+         $symbol = $symbol_mut_line;
+    }
+    
+    if ( ! defined( $description ) ) { $self->throw("undef desc"); }
+    if ( ! defined( $mutation ) )   { $self->throw("undef mutation"); }
+  
+    
+    my $allelic_variant = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new();
+    $allelic_variant->number( $number );
+    $allelic_variant->aa_ori( $aa_ori );
+    $allelic_variant->aa_mut( $aa_mut );
+    $allelic_variant->position( $position );
+    $allelic_variant->title( $title );
+    $allelic_variant->symbol( $symbol );
+    $allelic_variant->description( $description );
+    $allelic_variant->additional_mutations( $mutation );
+     
+    return $allelic_variant; 
+    
+} # _create_allelic_variant
+
+
+
+
+sub _parse_title {
+    my ( $self, $text ) = @_;
+    my $title = "";
+    if ( $text =~ /^(.+)\n/ ) {
+        $title = $1;
+        $text  =~ s/^.+\n//;
+    }
+    else {
+        $title = $text;
+        $text  = "";
+    
+    }
+    
+    return ( $title, $text ); 
+} # _parse_title
+
+
+
+
+sub _parse_references {
+    my ( $self, $text ) = @_;
+    
+    $text =~ s/\A\s+//;
+    $text =~ s/\s+\z//;
+    $text =~ s/\A\d+\.\s*//;
+    
+    my @references;
+    
+    my @texts = split( /\s*\n\s*\n\s*\d+\.\s*/, $text );
+    
+    foreach my $t ( @texts ) {
+    
+        my $authors   = "";
+        my $title     = "";
+        my $location  = "";
+        
+        $t =~ s/\s+/ /g;
+        
+        if ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s+(.+?)\s+(\S+?)\s*:\s*(\w?\d+.*)\s*,\s*(\d+)/ ) {
+            $authors    = $1;
+            $title      = $2;
+            my $journal = $3;
+            my $volume  = $4;
+            my $fromto  = $5;
+            my $year    = $6;
+            my $from    = "",
+            my $to      = "";
+            if ( $fromto =~ /(\d+)-+(\d+)/ ) {
+                $from = $1;
+                $to   = "-".$2;
+            }
+            elsif ( $fromto =~ /\A(\w+)/ ) {
+                $from = $1;
+            }
+            $location = $journal." ".$volume." ".$from.$to." (".$year.")";
+        }
+       
+            
+        elsif ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s*(.+?)\z/ ) {
+            $authors   = $1;
+            $title     = $2;
+            $location  = $3;
+        }
+        else {
+            $title = $t;  
+        }
+         
+        my $ref = Bio::Annotation::Reference->new( -title    => $title,
+                                                   -location => $location,
+                                                   -authors  => $authors );
+        push( @references, $ref );
+       
+    }
+    return @references;
+    
+} # _parse_references
+
+
+
+
+sub _genemap_hash {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( ref( $value ) eq "HASH" ) {
+            $self->throw( "Argument to method \"_genemap_hash\" is not a reference to an Hash" );
+        }
+        $self->{ "_genemap_hash" } = $value;
+     
+    }
+    
+    return $self->{ "_genemap_hash" };
+} # _genemap_hash
+
+
+
+
+sub _is_not_first_record {
+
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( $value == FALSE || $value == TRUE ) {
+            $self->throw( "Found [$value] where [" . TRUE
+            ." or " . FALSE . "] expected" );
+        }
+        $self->{ "_not_first_record" } = $value;
+    }
+    
+    return $self->{ "_not_first_record" };
+} # _is_not_first_record
+
+
+
+
+sub _done {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( $value == FALSE || $value == TRUE ) {
+            $self->throw( "Found [$value] where [" . TRUE
+            ." or " . FALSE . "] expected" );
+        }
+        $self->{ "_done" } = $value;
+    }
+    
+    return $self->{ "_done" };
+} # _done
+
+
+
+
+sub _OMIM_text_file {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        unless ( $value->isa( "Bio::Root::IO" ) ) {
+            $self->throw( "[$value] is not a valid \"Bio::Root::IO\"" );
+        }
+        $self->{ "_omimtxt_file" } = $value;
+     
+    }
+    
+    return $self->{ "_omimtxt_file" };
+} # _OMIM_text_file
+
+
+
+
+sub _read_genemap {
+    my ( $self, $genemap_file_name ) = @_;
+    
+    my $line         = "";
+    my %genemap_hash = ();
+    my $genemap_file = new Bio::Root::IO->new( -file => $genemap_file_name );
+    my @a            = ();
+    my %gm           = ();
+    
+    while( $line = $genemap_file->_readline() ) {
+        @a = split( /\|/, $line );
+        unless( scalar( @a ) == 18 ) {
+            $self->throw( "Gene map file \"".$self->genemap_file_name()
+            . "\" is not in the expected format" );
+        }
+        $gm{ $a[ 9 ] } = $line;
+    }
+    $genemap_file->close();
+    $self->_genemap_hash( \%gm );
+  
+} #_read_genemap 
+
+
+
+
+sub _no_OMIM_text_file_provided_error {
+    my ( $self ) = @_;
+
+    my $msg =  "Need to indicate a OMIM text file to read from with\n";
+    $msg .= "either \"OMIMparser->new( -omimtext => \"path/to/omim.txt\" );\"\n";
+    $msg .= "or \"\$omim_parser->omimtxt_file_name( \"path/to/omim.txt\" );\"";
+    $self->throw( $msg );
+} # _no_OMIM_text_file_provided_error
+
+
+
+
+sub _not_a_OMIM_text_file_error {
+    my ( $self ) = @_;
+
+    my $msg =  "File \"".$self->omimtxt_file_name() . 
+    "\" appears not to be a OMIM text file";
+    $self->throw( $msg );
+} # _not_a_OMIM_text_file_error
+
+
+
+
+sub _add_to_hash {
+    my ( $self, $state, $contents, $record_ref ) = @_;
+    
+    if ( exists( $record_ref->{ $state } ) ) {
+        chomp( $record_ref->{ $state } );
+        $record_ref->{ $state } = $record_ref->{ $state } . $contents;
+    }
+    else {
+        $record_ref->{ $state } = $contents;
+    }
+} # _add_to_hash
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Phenotype.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Phenotype.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/Phenotype.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,944 @@
+# $Id: Phenotype.pm,v 1.12.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::Phenotype
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::Phenotype - A class for modeling phenotypes
+
+=head1 SYNOPSIS
+
+  #get Bio::Phenotype::PhenotypeI somehow
+
+  print $phenotype->name(), "\n";
+  print $phenotype->description(), "\n";
+
+  my @keywords = ( "achondroplasia", "dwarfism" );
+  $phenotype->add_keywords( @keywords ); 
+  foreach my $keyword ( $phenotype->each_keyword() ) {
+       print $keyword, "\n";
+  }
+  $phenotype->remove_keywords();
+
+
+  foreach my $gene_symbol ( $phenotype->each_gene_symbol() ) {
+       print $gene_symbol, "\n";
+  }
+
+  foreach my $corr ( $phenotype->each_Correlate() ) {
+       # Do something with $corr
+  }
+
+  foreach my $var ( $phenotype->each_Variant() ) {
+       # Do something with $var (mutation)
+  }
+
+  foreach my $measure ( $phenotype->each_Measure() ) {
+       # Do something with $measure
+  }
+
+
+=head1 DESCRIPTION
+
+This superclass implements common methods for classes modelling phenotypes.
+Bio::Phenotype::OMIM::OMIMentry is an example of an instantiable phenotype
+class (the design of this interface was partially guided by the need
+to model OMIM entries).
+Please note. This class provides methods to associate mutations
+(methods "each_Variant", ...) and genotypes (methods "each_Genotype", ...) 
+with phenotypes. Yet, these aspects might need some future enhancements,
+especially since there is no "genotype" class yet.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::Phenotype;
+use strict;
+
+use Bio::Species;
+use Bio::Variation::VariantI;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Reference;
+use Bio::Phenotype::Measure;
+use Bio::Phenotype::Correlate;
+use Bio::Map::CytoPosition;
+use Bio::Range;
+
+
+use base qw(Bio::Root::Root Bio::Phenotype::PhenotypeI);
+
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj = Bio::Phenotype::Phenotype->new( -name         => "XY",
+                                                  -description  => "This is ..." );
+ Function: Creates a new Phenotype object.
+ Returns : A new Phenotype object.
+ Args    : -name                           => the name
+           -description                    => the description of this phenotype
+           -species                        => ref to the the species
+           -comment                        => a comment 
+
+=cut
+
+sub new {
+
+    my( $class, at args ) = @_;
+
+    my $self = $class->SUPER::new( @args );
+
+    my ( $name,
+         $description,
+         $species,
+         $comment )
+    = $self->_rearrange( [ qw( NAME
+                               DESCRIPTION
+                               SPECIES 
+                               COMMENT ) ], @args );
+
+    $self->init(); 
+
+    $name                           && $self->name( $name );
+    $description                    && $self->description( $description );
+    $species                        && $self->species( $species );
+    $comment                        && $self->comment( $comment );
+
+    return $self;
+
+} # new
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $obj->init();   
+ Function: Initializes this OMIMentry to all "" and empty lists.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+
+    my( $self ) = @_;
+
+
+    $self->name( "" );
+    $self->description( "" );
+    my $species = Bio::Species->new();
+    $species->classification( qw( sapiens Homo ) );
+    $self->species( $species );
+    $self->comment( "" );
+    $self->remove_Correlates();
+    $self->remove_References();
+    $self->remove_CytoPositions();
+    $self->remove_gene_symbols();
+    $self->remove_Genotypes();
+    $self->remove_DBLinks();
+    $self->remove_keywords();
+    $self->remove_Variants();
+    $self->remove_Measures();
+
+} # init
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name( "r1" );
+           or
+           print $obj->name();
+ Function: Set/get for the name or id of this phenotype.
+ Returns : A name or id [scalar].
+ Args    : A name or id [scalar] (optional).
+
+=cut
+
+sub name {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->{ "_name" } = $value;
+    }
+
+    return $self->{ "_name" };
+
+} # name
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description( "This is ..." );
+           or
+           print $obj->description();
+ Function: Set/get for the description of this phenotype.
+ Returns : A description [scalar].
+ Args    : A description [scalar] (optional).
+
+=cut
+
+sub description {
+    my $self = shift;
+    return $self->{ "_description" } = shift if(@_);
+    return $self->{ "_description" };
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj->species( $species );
+           or
+           $species = $obj->species();
+ Function: Set/get for the species of this phenotype.
+ Returns : A species [Bio::Species].
+ Args    : A species [Bio::Species] (optional).
+
+=cut
+
+sub species {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value ) {
+        $self->_check_ref_type( $value, "Bio::Species" );
+        $self->{ "_species" } = $value;
+    }
+
+    return $self->{ "_species" };
+
+} # species
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $obj->comment( "putative" );
+           or
+           print $obj->comment();
+ Function: Set/get for a comment about this phenotype.
+ Returns : A comment [scalar].
+ Args    : A comment [scalar] (optional).
+
+=cut
+
+sub comment {
+    my $self = shift;
+    return $self->{ "_comment" } = shift if(@_);
+    return $self->{ "_comment" };
+} # comment
+
+
+=head2 each_gene_symbol
+
+ Title   : each_gene_symbol()
+ Usage   : @gs = $obj->each_gene_symbol();
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_gene_symbol {
+    my ( $self ) = @_;
+
+    return @{$self->{"_gene_symbols"}} if exists($self->{"_gene_symbols"});
+    return ();
+} # each_gene_symbol
+
+
+=head2 add_gene_symbols
+
+ Title   : add_gene_symbols
+ Usage   : $obj->add_gene_symbols( @gs );
+           or
+           $obj->add_gene_symbols( $gs );
+ Function: Pushes one or more gene symbols [scalars, most likely Strings]
+           into the list of gene symbols.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_gene_symbols {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    push( @{ $self->{ "_gene_symbols" } }, @values );
+
+} # add_gene_symbols
+
+
+=head2 remove_gene_symbols
+
+ Usage   : $obj->remove_gene_symbols();
+ Function: Deletes (and returns) the list of gene symbols [scalars,
+           most likely Strings] associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub remove_gene_symbols {
+    my ( $self ) = @_;
+
+    my @a = $self->each_gene_symbol();
+    $self->{ "_gene_symbols" } = [];
+    return @a;
+
+} # remove_gene_symbols
+
+
+
+
+=head2 each_Variant
+
+ Title   : each_Variant()
+ Usage   : @vs = $obj->each_Variant();
+ Function: Returns a list of Bio::Variation::VariantI implementing objects
+           associated with this phenotype.
+           This is for representing the actual mutation(s) causing this 
+           phenotype.
+           {* The "variants" data member and its methods will/might need to be
+           changed/improved in one way or another, CZ 09/06/02 *}
+ Returns : A list of Bio::Variation::VariantI implementing objects.
+ Args    :
+
+=cut
+
+sub each_Variant {
+     my ( $self ) = @_;
+
+     return @{ $self->{ "_variants" } } if exists($self->{ "_variants" });
+     return ();
+} # each_Variant
+
+
+=head2 add_Variants
+
+ Usage   : $obj->add_Variants( @vs );
+           or
+           $obj->add_Variants( $v );
+ Function: Pushes one or more Bio::Variation::VariantI implementing objects
+           into the list of Variants.
+ Returns : 
+ Args    : Bio::Variation::VariantI implementing object(s).
+
+=cut
+
+sub add_Variants {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Variation::VariantI" );
+    }
+
+    push( @{ $self->{ "_variants" } }, @values );
+
+} # add_Variants
+
+
+=head2 remove_Variants
+
+ Title   : remove_Variants
+ Usage   : $obj->remove_Variants();
+ Function: Deletes (and returns) the list of Bio::Variation::VariantI implementing
+           objects associated with this phenotype.
+ Returns : A list of Bio::Variation::VariantI implementing objects.
+ Args    :
+
+=cut
+
+sub remove_Variants {
+    my ( $self ) = @_;
+  
+    my @a = $self->each_Variant();
+    $self->{ "_variants" } = [];
+    return @a;
+
+} # remove_Variants
+
+
+
+
+=head2 each_Reference
+
+ Title   : each_Reference()
+ Usage   : @refs = $obj->each_Reference();                 
+ Function: Returns a list of Bio::Annotation::Reference objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::Reference objects.
+ Args    :
+
+=cut
+
+sub each_Reference {
+    my ( $self ) = @_;
+    
+    return @{ $self->{ "_references" } } if exists($self->{ "_references" });
+    return ();
+} # each_Reference
+
+
+=head2 add_References 
+
+ Title   : add_References
+ Usage   : $obj->add_References( @refs );
+           or
+           $obj->add_References( $ref );                  
+ Function: Pushes one or more Bio::Annotation::Reference objects
+           into the list of References.
+ Returns : 
+ Args    : Bio::Annotation::Reference object(s).
+
+=cut
+
+sub add_References {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Annotation::Reference" );
+    }
+        
+    push( @{ $self->{ "_references" } }, @values );
+    
+} # add_References
+
+
+=head2 remove_References
+
+ Title   : remove_References()
+ Usage   : $obj->remove_References();
+ Function: Deletes (and returns) the list of Bio::Annotation::Reference objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::Reference objects.
+ Args    :
+
+=cut
+
+sub remove_References {
+    my ( $self ) = @_;
+     
+    my @a = $self->each_Reference();
+    $self->{ "_references" } = [];
+    return @a;
+
+} # remove_References
+
+
+
+
+=head2 each_CytoPosition
+
+ Title   : each_CytoPosition()
+ Usage   : @cps = $obj->each_CytoPosition();                 
+ Function: Returns a list of Bio::Map::CytoPosition objects
+           associated with this phenotype.
+ Returns : A list of Bio::Map::CytoPosition objects.
+ Args    :
+
+=cut
+
+sub each_CytoPosition {
+    my ( $self ) = @_;
+    
+    return @{$self->{"_cyto_positions"}} if exists($self->{"_cyto_positions"});
+    return ();
+} # each_CytoPosition
+
+
+=head2 add_CytoPositions
+
+ Title   : add_CytoPositions
+ Usage   : $obj->add_CytoPositions( @cps );
+           or
+           $obj->add_CytoPositions( $cp );                  
+ Function: Pushes one or more Bio::Map::CytoPosition objects
+           into the list of CytoPositions.
+ Returns : 
+ Args    : Bio::Map::CytoPosition object(s).
+
+=cut
+
+sub add_CytoPositions {
+    my ( $self, @values ) = @_;
+    
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Map::CytoPosition" );
+    }
+        
+    push( @{ $self->{ "_cyto_positions" } }, @values );
+    
+} # add_CytoPositions
+
+
+=head2 remove_CytoPositions
+
+ Title   : remove_CytoPositions
+ Usage   : $obj->remove_CytoPositions();
+ Function: Deletes (and returns) the list o fBio::Map::CytoPosition objects
+           associated with this phenotype.
+ Returns : A list of Bio::Map::CytoPosition objects.
+ Args    :
+
+=cut
+
+sub remove_CytoPositions {
+    my ( $self ) = @_;
+     
+    my @a = $self->each_CytoPosition();
+    $self->{ "_cyto_positions" } = [];
+    return @a;
+
+} # remove_CytoPositions
+
+
+
+
+=head2 each_Correlate
+
+ Title   : each_Correlate()
+ Usage   : @corrs = $obj->each_Correlate();                 
+ Function: Returns a list of Bio::Phenotype::Correlate objects
+           associated with this phenotype.
+           (Correlates are correlating phenotypes in different species;
+           inspired by mouse correlates of human phenotypes in the OMIM
+           database.)
+ Returns : A list of Bio::Phenotype::Correlate objects.
+ Args    :
+
+=cut
+
+sub each_Correlate {
+    my ( $self ) = @_;
+
+    return @{ $self->{ "_correlates" } } if exists($self->{ "_correlates" });
+    return (); 
+} # each_Correlate
+
+
+
+
+=head2 add_Correlates
+
+ Title   : add_Correlates
+ Usage   : $obj->add_Correlates( @corrs );
+           or
+           $obj->add_Correlates( $corr );                  
+ Function: Pushes one or more Bio::Phenotype::Correlate objects
+           into the list of Correlates.
+ Returns : 
+ Args    : Bio::Phenotype::Correlate object(s).
+
+=cut
+
+sub add_Correlates {
+    my ( $self, @values ) = @_;
+    
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Phenotype::Correlate" );
+    }
+        
+    push( @{ $self->{ "_correlates" } }, @values );
+    
+} # add_Correlates
+
+
+=head2 remove_Correlates
+
+ Title   : remove_Correlates
+ Usage   : $obj->remove_Correlates();
+ Function: Deletes (and returns) the list of Bio::Phenotype::Correlate objects
+           associated with this phenotype.
+ Returns : A list of Bio::Phenotype::Correlate objects.
+ Args    :
+
+=cut
+
+sub remove_Correlates {
+    my ( $self ) = @_;
+  
+    my @a = $self->each_Correlate();
+    $self->{ "_correlates" } = [];
+    return @a;
+
+} # remove_Correlates
+
+
+
+
+=head2 each_Measure
+
+ Title   : each_Measure()
+ Usage   : @ms = $obj->each_Measure();                 
+ Function: Returns a list of Bio::Phenotype::Measure objects
+           associated with this phenotype.
+           (Measure is for biochemically defined phenotypes
+           or any other types of measures.)
+ Returns : A list of Bio::Phenotype::Measure objects.
+ Args    :
+
+=cut
+
+sub each_Measure {
+    my ( $self ) = @_;
+    
+    return @{ $self->{ "_measures" } } if exists($self->{ "_measures" });
+    return ();
+} # each_Measure
+
+
+=head2 add_Measures
+
+ Title   : add_Measures
+ Usage   : $obj->add_Measures( @ms );
+           or
+           $obj->add_Measures( $m );                  
+ Function: Pushes one or more Bio::Phenotype::Measure objects
+           into the list of Measures.
+ Returns : 
+ Args    : Bio::Phenotype::Measure object(s).
+
+=cut
+
+sub add_Measures {
+    my ( $self, @values ) = @_;
+    
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Phenotype::Measure" );
+    }
+        
+    push( @{ $self->{ "_measures" } }, @values );
+    
+} # add_Measures
+
+
+=head2 remove_Measures
+
+ Title   : remove_Measures
+ Usage   : $obj->remove_Measures();
+ Function: Deletes (and returns) the list of Bio::Phenotype::Measure objects
+           associated with this phenotype.
+ Returns : A list of Bio::Phenotype::Measure objects.
+ Args    :
+
+=cut
+
+sub remove_Measures {
+    my ( $self ) = @_;
+   
+    my @a = $self->each_Measure();
+    $self->{ "_measures" } = [];
+    return @a;
+
+} # remove_Measures
+
+
+
+
+=head2 each_keyword
+
+ Title   : each_keyword()
+ Usage   : @kws = $obj->each_keyword();                 
+ Function: Returns a list of key words [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_keyword {
+    my ( $self ) = @_;
+    
+    return @{ $self->{ "_keywords" } } if exists($self->{ "_keywords" });
+    return ();
+} # each_keyword
+
+
+=head2 add_keywords
+
+ Title   : add_keywords
+ Usage   : $obj->add_keywords( @kws );
+           or
+           $obj->add_keywords( $kw );                  
+ Function: Pushes one or more keywords [scalars, most likely Strings]
+           into the list of key words.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_keywords {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    push( @{ $self->{ "_keywords" } }, @values );
+    
+} # add_keywords
+
+
+=head2 remove_keywords
+
+ Title   : remove_keywords
+ Usage   : $obj->remove_keywords();
+ Function: Deletes (and returns) the list of key words [scalars,
+           most likely Strings] associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub remove_keywords {
+    my ( $self ) = @_;
+    
+    my @a = $self->each_keyword();
+    $self->{ "_keywords" } = [];
+    return @a;
+
+} # remove_keywords
+
+
+
+
+=head2 each_DBLink
+
+ Title   : each_DBLink()
+ Usage   : @dbls = $obj->each_DBLink();                 
+ Function: Returns a list of Bio::Annotation::DBLink objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::DBLink objects.
+ Args    :
+
+=cut
+
+sub each_DBLink {
+    my ( $self ) = @_;
+   
+    return @{ $self->{ "_db_links" } } if exists($self->{ "_db_links" });
+    return ();   
+}
+
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   : $obj->add_DBLinks( @dbls );
+           or
+           $obj->add_DBLinks( $dbl );                  
+ Function: Pushes one or more Bio::Annotation::DBLink objects
+           into the list of DBLinks.
+ Returns : 
+ Args    : Bio::Annotation::DBLink object(s).
+
+=cut
+
+sub add_DBLinks {
+    my ( $self, @values ) = @_;
+
+    return unless( @values );
+
+    foreach my $value ( @values ) {  
+        $self->_check_ref_type( $value, "Bio::Annotation::DBLink" );
+    }
+        
+    push( @{ $self->{ "_db_links" } }, @values );
+  
+} # add_DBLinks
+
+
+=head2 remove_DBLinks
+
+ Title   : remove_DBLinks
+ Usage   : $obj->remove_DBLinks();
+ Function: Deletes (and returns) the list of Bio::Annotation::DBLink objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::DBLink objects.
+ Args    :
+
+=cut
+
+sub remove_DBLinks {
+    my ( $self ) = @_;
+  
+    my @a = $self->each_DBLink();
+    $self->{ "_db_links" } = [];
+    return @a;
+
+} # remove_DBLinks
+
+
+
+
+=head2 each_Genotype
+
+ Title   : each_Reference()
+ Usage   : @gts = $obj->each_Reference();                 
+ Function: Returns a list of "Genotype" objects
+           associated with this phenotype.
+           {* the "genotypes" data member and its methods certainly will/needs to be
+           changed/improved in one way or another since there is
+           no "Genotype" class yet, CZ 09/06/02 *}
+ Returns : A list of "Genotype" objects.
+ Args    :
+
+=cut
+
+sub each_Genotype {
+    my ( $self ) = @_;
+   
+    return @{ $self->{ "_genotypes" } } if exists($self->{ "_genotypes" });
+    return ();
+} # each_Genotype
+
+
+=head2 add_Genotypes
+
+ Title   : add_Genotypes
+ Usage   : $obj->add_Genotypes( @gts );
+           or
+           $obj->add_Genotypes( $gt );                  
+ Function: Pushes one or more "Genotypes"
+           into the list of "Genotypes".
+ Returns : 
+ Args    : "Genotypes(s)".
+
+=cut
+
+sub add_Genotypes {
+    my ( $self, @values ) = @_;
+    
+    return unless( @values );
+
+    #foreach my $value ( @values ) {  
+    #    $self->_check_ref_type( $value, "Bio::GenotypeI" );
+    #}
+        
+    push( @{ $self->{ "_genotypes" } }, @values );
+    
+} # add_Genotypes
+
+
+=head2 remove_Genotypes
+
+ Title   : remove_Genotypes
+ Usage   : $obj->remove_Genotypes();
+ Function: Deletes (and returns) the list of "Genotype" objects
+           associated with this phenotype.
+ Returns : A list of "Genotype" objects.
+ Args    :
+
+=cut
+
+sub remove_Genotypes {
+    my ( $self ) = @_;
+
+    my @a = $self->each_Genotype();
+    $self->{ "_genotypes" } = [];
+    return @a;
+
+} # remove_Genotypes
+
+
+=head2 _check_ref_type
+
+ Title   : _check_ref_type
+ Usage   : $self->_check_ref_type( $value, "Bio::Annotation::DBLink" );
+ Function: Checks for the correct type.
+ Returns : 
+ Args    : The value to be checked, the expected class.
+
+=cut
+
+sub _check_ref_type {
+    my ( $self, $value, $expected_class ) = @_;
+
+    if ( ! defined( $value ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found [undef" 
+        ."] where [$expected_class] expected" );
+    }
+    elsif ( ! ref( $value ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found scalar"
+        ." where [$expected_class] expected" );
+    } 
+    elsif ( ! $value->isa( $expected_class ) ) {
+        $self->throw( ( caller( 1 ) )[ 3 ] .": Found [". ref( $value ) 
+        ."] where [$expected_class] expected" );
+    }    
+} # _check_ref_type
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/PhenotypeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/PhenotypeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Phenotype/PhenotypeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,769 @@
+# $Id: PhenotypeI.pm,v 1.11.4.1 2006/10/02 23:10:22 sendu Exp $
+#
+# BioPerl module for Bio::Phenotype::PhenotypeI
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Phenotype::PhenotypeI - An interface for classes modeling phenotypes
+
+=head1 SYNOPSIS
+
+  #get Bio::Phenotype::PhenotypeI somehow
+
+  print $phenotype->name(), "\n";
+  print $phenotype->description(), "\n";
+
+  my @keywords = ( "achondroplasia", "dwarfism" );
+  $phenotype->add_keywords( @keywords ); 
+  foreach my $keyword ( $phenotype->each_keyword() ) {
+       print $keyword, "\n";
+  }
+  $phenotype->remove_keywords();
+
+
+  foreach my $gene_symbol ( $phenotype->each_gene_symbol() ) {
+       print $gene_symbol, "\n";
+  }
+
+  foreach my $corr ( $phenotype->each_Correlate() ) {
+       # Do something with $corr
+  }
+
+  foreach my $var ( $phenotype->each_Variant() ) {
+       # Do something with $var (mutation)
+  }
+
+  foreach my $measure ( $phenotype->each_Measure() ) {
+       # Do something with $measure
+  }
+
+
+=head1 DESCRIPTION
+
+This superclass defines common methods for classes modelling phenotypes.
+Bio::Phenotype::OMIM::OMIMentry is an example of an instantiable phenotype
+class (the design of this interface was partially guided by the need
+to model OMIM entries).
+Please note. This interface provides methods to associate mutations
+(methods "each_Variant", ...) and genotypes (methods "each_Genotype", ...) 
+with phenotypes. Yet, these aspects might need some future enhancements,
+especially since there is no "genotype" class yet.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Phenotype::PhenotypeI;
+use base qw(Bio::Root::RootI);
+
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name( "r1" );
+           or
+           print $obj->name();
+ Function: Set/get for the name or id of this phenotype.
+ Returns : A name or id [scalar].
+ Args    : A name or id [scalar] (optional).
+
+=cut
+
+sub name {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # name
+
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description( "This is ..." );
+           or
+           print $obj->description();
+ Function: Set/get for the description of this phenotype.
+ Returns : A description [scalar].
+ Args    : A description [scalar] (optional).
+
+=cut
+
+sub description {
+     my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # description
+
+
+
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj->species( $species );
+           or
+           $species = $obj->species();
+ Function: Set/get for the species of this phenotype.
+ Returns : A species [Bio::Species].
+ Args    : A species [Bio::Species] (optional).
+
+=cut
+
+sub species {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # species
+
+
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $obj->comment( "putative" );
+           or
+           print $obj->comment();
+ Function: Set/get for a comment about this phenotype.
+ Returns : A comment [scalar].
+ Args    : A comment [scalar] (optional).
+
+=cut
+
+sub comment {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # comment
+
+
+
+
+=head2 each_gene_symbol
+
+ Title   : each_gene_symbol()
+ Usage   : @gs = $obj->each_gene_symbol();                 
+ Function: Returns a list of gene symbols [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_gene_symbol {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # each_gene_symbol
+
+
+=head2 add_gene_symbols
+
+ Title   : add_gene_symbols
+ Usage   : $obj->add_gene_symbols( @gs );
+           or
+           $obj->add_gene_symbols( $gs );                  
+ Function: Pushes one or more gene symbols [scalars, most likely Strings]
+           into the list of gene symbols.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_gene_symbols {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_gene_symbols
+
+
+=head2 remove_gene_symbols
+
+ Usage   : $obj->remove_gene_symbols();
+ Function: Deletes (and returns) the list of gene symbols [scalars,
+           most likely Strings] associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub remove_gene_symbols {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # remove_gene_symbols
+
+
+
+
+=head2 each_Variant
+
+ Title   : each_Variant()
+ Usage   : @vs = $obj->each_Variant();                 
+ Function: Returns a list of Bio::Variation::VariantI implementing objects
+           associated with this phenotype.
+           This is for representing the actual mutation(s) causing this 
+           phenotype.
+           {* The "variants" data member and its methods will/might need to be
+           changed/improved in one way or another, CZ 09/06/02 *}
+ Returns : A list of Bio::Variation::VariantI implementing objects.
+ Args    :
+
+=cut
+
+sub each_Variant {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # each_Variant
+
+
+=head2 add_Variants
+
+ Usage   : $obj->add_Variants( @vs );
+           or
+           $obj->add_Variants( $v );                  
+ Function: Pushes one or more Bio::Variation::VariantI implementing objects
+           into the list of Variants.
+ Returns : 
+ Args    : Bio::Variation::VariantI implementing object(s).
+
+=cut
+
+sub add_Variants {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_Variants
+
+
+=head2 remove_Variants
+
+ Title   : remove_Variants
+ Usage   : $obj->remove_Variants();
+ Function: Deletes (and returns) the list of Bio::Variation::VariantI implementing
+           objects associated with this phenotype.
+ Returns : A list of Bio::Variation::VariantI implementing objects.
+ Args    :
+
+=cut
+
+sub remove_Variants {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_Variants
+
+
+
+
+=head2 each_Reference
+
+ Title   : each_Reference()
+ Usage   : @refs = $obj->each_Reference();                 
+ Function: Returns a list of Bio::Annotation::Reference objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::Reference objects.
+ Args    :
+
+=cut
+
+sub each_Reference {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_Reference
+
+
+=head2 add_References 
+
+ Title   : add_References
+ Usage   : $obj->add_References( @refs );
+           or
+           $obj->add_References( $ref );                  
+ Function: Pushes one or more Bio::Annotation::Reference objects
+           into the list of References.
+ Returns : 
+ Args    : Bio::Annotation::Reference object(s).
+
+=cut
+
+sub add_References {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_References
+
+
+=head2 remove_References
+
+ Title   : remove_References()
+ Usage   : $obj->remove_References();
+ Function: Deletes (and returns) the list of Bio::Annotation::Reference objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::Reference objects.
+ Args    :
+
+=cut
+
+sub remove_References {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_References
+
+
+
+
+=head2 each_CytoPosition
+
+ Title   : each_CytoPosition()
+ Usage   : @cps = $obj->each_CytoPosition();                 
+ Function: Returns a list of Bio::Map::CytoPosition objects
+           associated with this phenotype.
+ Returns : A list of Bio::Map::CytoPosition objects.
+ Args    :
+
+=cut
+
+sub each_CytoPosition {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_CytoPosition
+
+
+=head2 add_CytoPositions
+
+ Title   : add_CytoPositions
+ Usage   : $obj->add_CytoPositions( @cps );
+           or
+           $obj->add_CytoPositions( $cp );                  
+ Function: Pushes one or more Bio::Map::CytoPosition objects
+           into the list of CytoPositions.
+ Returns : 
+ Args    : Bio::Map::CytoPosition object(s).
+
+=cut
+
+sub add_CytoPositions {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_CytoPositions
+
+
+=head2 remove_CytoPositions
+
+ Title   : remove_CytoPositions
+ Usage   : $obj->remove_CytoPositions();
+ Function: Deletes (and returns) the list o fBio::Map::CytoPosition objects
+           associated with this phenotype.
+ Returns : A list of Bio::Map::CytoPosition objects.
+ Args    :
+
+=cut
+
+sub remove_CytoPositions {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_CytoPositions
+
+
+
+
+=head2 each_Correlate
+
+ Title   : each_Correlate()
+ Usage   : @corrs = $obj->each_Correlate();                 
+ Function: Returns a list of Bio::Phenotype::Correlate objects
+           associated with this phenotype.
+           (Correlates are correlating phenotypes in different species;
+           inspired by mouse correlates of human phenotypes in the OMIM
+           database.)
+ Returns : A list of Bio::Phenotype::Correlate objects.
+ Args    :
+
+=cut
+
+sub each_Correlate {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_Correlate
+
+
+
+
+=head2 add_Correlates
+
+ Title   : add_Correlates
+ Usage   : $obj->add_Correlates( @corrs );
+           or
+           $obj->add_Correlates( $corr );                  
+ Function: Pushes one or more Bio::Phenotype::Correlate objects
+           into the list of Correlates.
+ Returns : 
+ Args    : Bio::Phenotype::Correlate object(s).
+
+=cut
+
+sub add_Correlates {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_Correlates
+
+
+=head2 remove_Correlates
+
+ Title   : remove_Correlates
+ Usage   : $obj->remove_Correlates();
+ Function: Deletes (and returns) the list of Bio::Phenotype::Correlate objects
+           associated with this phenotype.
+ Returns : A list of Bio::Phenotype::Correlate objects.
+ Args    :
+
+=cut
+
+sub remove_Correlates {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_Correlates
+
+
+
+
+=head2 each_Measure
+
+ Title   : each_Measure()
+ Usage   : @ms = $obj->each_Measure();                 
+ Function: Returns a list of Bio::Phenotype::Measure objects
+           associated with this phenotype.
+           (Measure is for biochemically defined phenotypes
+           or any other types of measures.)
+ Returns : A list of Bio::Phenotype::Measure objects.
+ Args    :
+
+=cut
+
+sub each_Measure {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_Measure
+
+
+=head2 add_Measures
+
+ Title   : add_Measures
+ Usage   : $obj->add_Measures( @ms );
+           or
+           $obj->add_Measures( $m );                  
+ Function: Pushes one or more Bio::Phenotype::Measure objects
+           into the list of Measures.
+ Returns : 
+ Args    : Bio::Phenotype::Measure object(s).
+
+=cut
+
+sub add_Measures {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_Measures
+
+
+=head2 remove_Measures
+
+ Title   : remove_Measures
+ Usage   : $obj->remove_Measures();
+ Function: Deletes (and returns) the list of Bio::Phenotype::Measure objects
+           associated with this phenotype.
+ Returns : A list of Bio::Phenotype::Measure objects.
+ Args    :
+
+=cut
+
+sub remove_Measures {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_Measures
+
+
+
+
+=head2 each_keyword
+
+ Title   : each_keyword()
+ Usage   : @kws = $obj->each_keyword();                 
+ Function: Returns a list of key words [scalars, most likely Strings]
+           associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub each_keyword {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_keyword
+
+
+=head2 add_keywords
+
+ Title   : add_keywords
+ Usage   : $obj->add_keywords( @kws );
+           or
+           $obj->add_keywords( $kw );                  
+ Function: Pushes one or more keywords [scalars, most likely Strings]
+           into the list of key words.
+ Returns : 
+ Args    : scalar(s).
+
+=cut
+
+sub add_keywords {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_keywords
+
+
+=head2 remove_keywords
+
+ Title   : remove_keywords
+ Usage   : $obj->remove_keywords();
+ Function: Deletes (and returns) the list of key words [scalars,
+           most likely Strings] associated with this phenotype.
+ Returns : A list of scalars.
+ Args    :
+
+=cut
+
+sub remove_keywords {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_keywords
+
+
+
+
+=head2 each_DBLink
+
+ Title   : each_DBLink()
+ Usage   : @dbls = $obj->each_DBLink();                 
+ Function: Returns a list of Bio::Annotation::DBLink objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::DBLink objects.
+ Args    :
+
+=cut
+
+sub each_DBLink {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+}
+
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   : $obj->add_DBLinks( @dbls );
+           or
+           $obj->add_DBLinks( $dbl );                  
+ Function: Pushes one or more Bio::Annotation::DBLink objects
+           into the list of DBLinks.
+ Returns : 
+ Args    : Bio::Annotation::DBLink object(s).
+
+=cut
+
+sub add_DBLinks {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_DBLinks
+
+
+=head2 remove_DBLinks
+
+ Title   : remove_DBLinks
+ Usage   : $obj->remove_DBLinks();
+ Function: Deletes (and returns) the list of Bio::Annotation::DBLink objects
+           associated with this phenotype.
+ Returns : A list of Bio::Annotation::DBLink objects.
+ Args    :
+
+=cut
+
+sub remove_DBLinks {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+
+} # remove_DBLinks
+
+
+
+
+=head2 each_Genotype
+
+ Title   : each_Reference()
+ Usage   : @gts = $obj->each_Reference();                 
+ Function: Returns a list of "Genotype" objects
+           associated with this phenotype.
+           {* the "genotypes" data member and its methods certainly will/needs to be
+           changed/improved in one way or another since there is
+           no "Genotype" class yet, CZ 09/06/02 *}
+ Returns : A list of "Genotype" objects.
+ Args    :
+
+=cut
+
+sub each_Genotype {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # each_Genotype
+
+
+=head2 add_Genotypes
+
+ Title   : add_Genotypes
+ Usage   : $obj->add_Genotypes( @gts );
+           or
+           $obj->add_Genotypes( $gt );                  
+ Function: Pushes one or more "Genotypes"
+           into the list of "Genotypes".
+ Returns : 
+ Args    : "Genotypes(s)".
+
+=cut
+
+sub add_Genotypes {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # add_Genotypes
+
+
+=head2 remove_Genotypes
+
+ Title   : remove_Genotypes
+ Usage   : $obj->remove_Genotypes();
+ Function: Deletes (and returns) the list of "Genotype" objects
+           associated with this phenotype.
+ Returns : A list of "Genotype" objects.
+ Args    :
+
+=cut
+
+sub remove_Genotypes {
+    my ( $self ) = @_;
+
+    $self->throw_not_implemented();
+    
+} # remove_Genotypes
+
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Genotype.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Genotype.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Genotype.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# $Id: Genotype.pm,v 1.10.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Genotype
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Genotype - An implementation of GenotypeI which is just an allele container
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Genotype;
+  my $genotype = new Bio::PopGen::Genotype(-marker_name   => $name,
+                                           -individual_id => $indid,
+                                           -alleles       => \@alleles);
+
+=head1 DESCRIPTION
+
+This object will contain alleles for a given marker for a given
+individual.
+
+The class variable BlankAlleles (accessible through
+$Bio::PopGen::Genotype::BlankAlleles = 'somepattern') can be set to a
+regexp pattern for identifying blank alleles which should no be
+counted (they are effectively missing data).  By default it set to
+match white space, '-', 'N' or 'n', and '?' as blank alleles which are
+skipped.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Genotype;
+use vars qw($BlankAlleles);
+use strict;
+
+$BlankAlleles = '[\s\-N\?]';
+
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::Root::Root Bio::PopGen::GenotypeI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Genotype();
+ Function: Builds a new Bio::PopGen::Genotype object 
+ Returns : an instance of Bio::PopGen::Genotype
+ Args    : -marker_name   => string representing name of the marker
+           -individual_id => string representing individual id (optional)
+           -alleles       => arrayref with each item in the array being an allele
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($marker_name, $ind_id, $alleles) = $self->_rearrange([qw(MARKER_NAME
+							       INDIVIDUAL_ID
+							       ALLELES)], at args);
+  defined $marker_name && $self->marker_name($marker_name);
+  defined $ind_id      && $self->individual_id($ind_id);
+  if( defined $alleles ) {
+      if( ref($alleles) =~ /array/i ) {
+	  $self->add_Allele(@$alleles);
+      } else { 
+	  $self->warn("Could not initialize with -alleles value, it is not an array ref");
+      }
+  }
+  return $self;
+}
+
+
+=head2 marker_name
+
+ Title   : marker_name
+ Usage   : my $name = $genotype->marker_name();
+ Function: Get the marker name for a genotype result
+ Returns : string
+ Args    : [optional] marker name value to store
+
+
+=cut
+
+sub marker_name{
+    my ($self) = shift;
+    return $self->{'_marker_name'} = shift if @_;
+    return $self->{'_marker_name'};
+}
+
+=head2 individual_id
+
+ Title   : individual_id
+ Usage   : my $indid = $genotype->individual_id();
+ Function: Gets the individual id associated with a genotype
+           This is effectively a back reference since we will typically
+           associate a genotype with an individual with an 
+           individual HAS-A genotype relationship.
+ Returns : unique id string for an individual
+ Args    : none
+
+
+=cut
+
+sub individual_id {
+    my ($self) = shift;
+    return $self->{'_individual_id'} = shift if @_;
+    return $self->{'_individual_id'};
+}
+
+=head2 get_Alleles
+
+ Title   : get_Alleles
+ Usage   : my @alleles = $genotype->get_Alleles();
+ Function: Get the alleles for a given marker and individual
+ Returns : array of alleles (strings in this implementation)
+ Args    : $showblank - boolean flag to indicate return ALL alleles not 
+                        skipping the coded EMPTY alleles
+
+ Note    : Uses the class variable $BlankAlleles to test if alleles
+           should be skipped or not.
+
+=cut
+
+sub get_Alleles{
+    my ($self,$showblank) = @_;
+    if( $showblank ) {
+	return @{$self->{'_alleles'} || []};
+    } else {
+	return @{$self->{'_cached_noblank'}} 
+	   if( defined $self->{'_cached_noblank'} );
+	    
+	# one liners - woo hoo.
+	$self->{'_cached_noblank'} = [ grep { ! /^\s*$BlankAlleles\s*$/oi } 
+				       @{$self->{'_alleles'} || []}];
+	return @{$self->{'_cached_noblank'}};
+    }
+}
+
+=head2 add_Allele
+
+ Title   : add_Allele
+ Usage   : $genotype->add_Allele(@alleles);
+ Function: Add alleles to the genotype, at this point there is no
+           verification to insure that haploid individuals only have 1 
+           allele or that diploids only have 2 - we assume that is
+           done by the user creating these objects 
+ Returns : count of the number of alleles in genotype
+ Args    : Array of alleles to store
+
+
+=cut
+
+sub add_Allele {
+    my ($self, at alleles) = @_;
+    $self->{'_cached_noblank'} = undef;
+    push @{$self->{'_alleles'}}, @alleles;
+    return scalar @{$self->{'_alleles'}};
+}
+
+=head2 reset_Alleles
+
+ Title   : reset_Alleles
+ Usage   : $genotype->reset_Alleles;
+ Function: Resets the stored alleles so the list is empty
+ Returns : None
+ Args    : None
+
+
+=cut
+
+sub reset_Alleles{
+   my ($self, at args) = @_;
+   $self->{'_cached_noblank'} = undef;
+   $self->{'_alleles'} = [];
+   return 0;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/GenotypeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/GenotypeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/GenotypeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+# $Id $
+#
+# BioPerl module for Bio::PopGen::GenotypeI
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::GenotypeI - A marker and alleles for a specific individual
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the interface here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::GenotypeI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 marker_name
+
+ Title   : marker_name
+ Usage   : my $name = $genotype->marker_name();
+ Function: Get the marker name for a genotype result
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub marker_name{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 individual_id
+
+ Title   : individual_id
+ Usage   : my $indid = $genotype->individual_id();
+ Function: Gets the individual id associated with a genotype
+           This is effectively a back reference since we will typically
+           associate a genotype with an individual with an 
+           individual HAS-A genotype relationship.
+ Returns : unique id string for an individual
+ Args    : none
+
+
+=cut
+
+sub individual_id{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Alleles
+
+ Title   : get_Alleles
+ Usage   : my @alleles = $genotype->get_Alleles();
+ Function: Get the alleles for a given marker and individual
+ Returns : array of alleles (strings in many implementations)
+ Args    : none
+
+
+=cut
+
+sub get_Alleles{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/HtSNP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/HtSNP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/HtSNP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1691 @@
+# module Bio::PopGen::HtSNP.pm
+# cared by Pedro M. Gomez-Fabre <pgf18872-at-gsk-dot-com>
+#
+#
+
+=head1 NAME
+
+Bio::PopGen::HtSNP.pm- Select htSNP from a haplotype set
+
+=head1 SYNOPSIS
+
+    use Bio::PopGen::HtSNP;
+
+    my $obj = Bio::PopGen::HtSNP->new($hap,$snp,$pop);
+
+=head1 DESCRIPTION
+
+Select the minimal set of SNP that contains the full information about
+the haplotype without redundancies.
+
+Take as input the followin values:
+
+=over 4
+
+=item - the haplotype block (array of array).
+
+=item - the snp id (array).
+
+=item - family information and frequency (array of array).
+
+=back
+
+The final haplotype is generated in a numerical format and the SNP's
+sets can be retrieve from the module.
+
+B<considerations:>
+
+
+- If you force to include a family with indetermination, the SNP's
+with indetermination will be removed from the analysis, so consider
+before to place your data set what do you really want to do.
+
+- If two families have the same information (identical haplotype), one
+of them will be removed and the removed files will be stored classify
+as removed.
+
+- Only are accepted for calculation A, C, G, T and - (as deletion) and
+their combinations. Any other value as n or ? will be considered as
+degenerations due to lack of information.
+
+=head2 RATIONALE
+
+On a haplotype set is expected that some of the SNP and their
+variations contribute in the same way to the haplotype. Eliminating
+redundancies will produce a minimal set of SNP's that can be used as
+input for a taging selection process. On the process SNP's with the
+same variation are clustered on the same group.
+
+The idea is that because the tagging haplotype process is
+exponential. All redundant information we could eliminate on the
+tagging process will help to find a quick result.
+
+=head2 CONSTRUCTORS
+
+  my $obj = Bio::PopGen::HtSNP->new
+    (-haplotype_block => \@haplotype_patterns,
+     -snp_ids         => \@snp_ids,
+     -pattern_freq    => \@pattern_name_and_freq);
+
+where  $hap, $snp and $pop are in the format:
+
+  my $hap = [
+             'acgt',
+             'agtc',
+             'cgtc'
+            ];                     # haplotype patterns' id
+
+  my $snp = [qw/s1 s2 s3 s4/];     # snps' Id's
+
+  my $pop = [
+             [qw/ uno    0.20/],
+             [qw/ dos    0.20/],
+             [qw/ tres   0.15/],
+            ];                     # haplotype_pattern_id    Frequency
+
+=head2 OBJECT METHODS
+
+    See Below for more detailed summaries.
+
+
+=head1 DETAILS
+
+=head2 How the process is working with one example
+
+Let's begin with one general example of the code.
+
+Input haplotype:
+
+  acgtcca-t
+  cggtagtgc
+  cccccgtgc
+  cgctcgtgc
+
+The first thing to to is to B<split the haplotype> into characters.
+
+  a       c       g       t       c       c       a       -       t
+  c       g       g       t       a       g       t       g       c
+  c       c       c       c       c       g       t       g       c
+  c       g       c       t       c       g       t       g       c
+
+Now we have to B<convert> the haplotype to B<Upercase>. This
+will produce the same SNP if we have input a or A.
+
+  A       C       G       T       C       C       A       -       T
+  C       G       G       T       A       G       T       G       C
+  C       C       C       C       C       G       T       G       C
+  C       G       C       T       C       G       T       G       C
+
+The program admit as values any combination of ACTG and - (deletions).
+The haplotype is B<converted to number>, considering the first variation
+as zero and the alternate value as 1 (see expanded description below).
+
+  0       0       0       0       0       0       0       0       0
+  1       1       0       0       1       1       1       1       1
+  1       0       1       1       0       1       1       1       1
+  1       1       1       0       0       1       1       1       1
+
+Once we have the haplotype converted to numbers we have to generate the
+snp type information for the haplotype.
+
+
+B<SNP code = SUM ( value * multiplicity ^ position );>
+
+    where:
+      SUM is the sum of the values for the SNP
+      value is the SNP number code (0 [generally for the mayor allele],
+                                    1 [for the minor allele].
+      position is the position on the block.
+
+For this example the code is:
+
+  0       0       0       0       0       0       0       0       0
+  1       1       0       0       1       1       1       1       1
+  1       0       1       1       0       1       1       1       1
+  1       1       1       0       0       1       1       1       1
+ ------------------------------------------------------------------
+  14      10      12      4       2       14      14      14      14
+
+  14 = 0*2^0 + 1*2^1 + 1*2^2 + 1*2^3
+  12 = 0*2^0 + 1*2^1 + 0*2^2 + 1*2^3
+  ....
+
+Once we have the families classify. We will B<take> just the SNP's B<not
+redundant>.
+
+  14      10      12      4       2
+
+This information will be B<passed to the tag module> is you want to tag
+the htSNP.
+
+Whatever it happens to one SNPs of a class will happen to a SNP of
+the same class. Therefore you don't need to scan redundancies
+
+=head2 Working with fuzzy data.
+
+This module is designed to work with fuzzy data. As the source of the
+haplotype is diverse. The program assume that some haplotypes can be
+generated using different values. If there is any indetermination (? or n)
+or any other degenerated value or invalid. The program will take away
+This SNP and will leave that for a further analysis.
+
+On a complex situation:
+
+  a       c       g       t       ?       c       a       c       t
+  a       c       g       t       ?       c       a       -       t
+  c       g       ?       t       a       g       ?       g       c
+  c       a       c       t       c       g       t       g       c
+  c       g       c       t       c       g       t       g       c
+  c       g       g       t       a       g       ?       g       c
+  a       c       ?       t       ?       c       a       c       t
+
+On this haplotype everything is happening. We have a multialelic variance.
+We have indeterminations. We have deletions and we have even one SNP
+which is not a real SNP.
+
+The buiding process will be the same on this situation.
+
+Convert the haplotype to uppercase.
+
+  A       C       G       T       ?       C       A       C       T
+  A       C       G       T       ?       C       A       -       T
+  C       G       ?       T       A       G       ?       G       C
+  C       A       C       T       C       G       T       G       C
+  C       G       C       T       C       G       T       G       C
+  C       G       G       T       A       G       ?       G       C
+  A       C       ?       T       ?       C       A       C       T
+
+All columns that present indeterminations will be removed from the analysis
+on this Step.
+
+hapotype after remove columns:
+
+  A       C       T       C       C       T
+  A       C       T       C       -       T
+  C       G       T       G       G       C
+  C       A       T       G       G       C
+  C       G       T       G       G       C
+  C       G       T       G       G       C
+  A       C       T       C       C       T
+
+All changes made on the haplotype matrix, will be also made on the SNP list.
+
+  snp_id_1 snp_id_2 snp_id_4 snp_id_6 snp_id_8 snp_id_9
+
+now the SNP that is not one SNP will be removed from the analysis.
+SNP with Id snp_id_4 (the one with all T's).
+
+
+because of the removing. Some of the families will become the same and will
+be clustered. A posteriori analysis will diference these families.
+but because of the indetermination can not be distinguish.
+
+  A       C       C       C       T
+  A       C       C       -       T
+  C       G       G       G       C
+  C       A       G       G       C
+  C       G       G       G       C
+  C       G       G       G       C
+  A       C       C       C       T
+
+The result of the mergering will go like:
+
+  A       C       C       C       T
+  A       C       C       -       T
+  C       G       G       G       C
+  C       A       G       G       C
+
+Once again the changes made on the families and we merge the frequency (I<to be
+implemented>)
+
+Before to convert the haplotype into numbers we consider how many variations
+we have on the set. On this case the variations are 3.
+
+The control code will use on this situation base three as mutiplicity
+
+  0       0       0       0       0
+  0       0       0       1       0
+  1       1       1       2       1
+  1       2       1       2       1
+ -----------------------------------
+  36      63      36      75      36
+
+And the minimal set for this combination is
+
+  0       0       0
+  0       0       1
+  1       1       2
+  1       2       2
+
+B<NOTE:> this second example is a remote example an on normal conditions. This
+conditions makes no sense, but as the haplotypes, can come from many sources
+we have to be ready for all kind of combinations.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Pedro M. Gomez-Fabre
+
+Email pgf18872-at-gsk-dot-com
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::PopGen::HtSNP;
+use Data::Dumper;
+use Storable qw(dclone);
+
+use vars qw ();
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+my $USAGE = 'Usage:
+
+    Bio::PopGen::HtSNP->new(-haplotype_block -ids -pattern_freq)
+
+';
+
+=head2 new
+
+ Title   : new
+ Function: constructor of the class.
+ Usage   : $obj-> Bio::PopGen::HtSNP->new(-haplotype_block
+                                          -snp_ids
+                                          -pattern_freq)
+ Returns : self hash
+ Args    : input haplotype (array of array)
+           snp_ids         (array)
+           pop_freq        (array of array)
+ Status  : public
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    my ($haplotype_block,
+        $snp_ids,
+        $pattern_freq    ) = $self->_rearrange([qw(HAPLOTYPE_BLOCK 
+                                                   SNP_IDS
+                                                   PATTERN_FREQ)], at args);
+
+    if ($haplotype_block){
+        $self->haplotype_block($haplotype_block);
+    }
+    else{
+        $self->throw("Haplotype block has not been defined.
+                      \n$USAGE");
+    }
+    if ($snp_ids){
+        $self->snp_ids($snp_ids);
+    }
+    else{
+        $self->throw("Array with ids has not been defined.
+                      \n$USAGE");
+    }
+    if ($pattern_freq){
+        $self->pattern_freq($pattern_freq);
+    }
+    else{
+        $self->throw("Array with pattern id and frequency has not been defined.
+                      \n$USAGE");
+    }
+
+    # if the input values are not well formed complained and exit.
+    _check_input($self);
+
+    _do_it($self);
+
+    return $self;
+}
+
+=head2 haplotype_block 
+
+ Title   : haplotype_block 
+ Usage   : my $haplotype_block = $HtSNP->haplotype_block();
+ Function: Get the haplotype block for a haplotype tagging selection
+ Returns : reference of array 
+ Args    : reference of array with haplotype pattern 
+
+
+=cut
+
+sub haplotype_block{
+    my ($self) =shift;
+    return $self->{'_haplotype_block'} = shift if @_;
+    return $self->{'_haplotype_block'};
+}
+
+=head2 snp_ids 
+
+ Title   : snp_ids 
+ Usage   : my $snp_ids = $HtSNP->$snp_ids();
+ Function: Get the ids for a haplotype tagging selection
+ Returns : reference of array
+ Args    : reference of array with SNP ids
+
+
+=cut
+
+sub snp_ids{
+    my ($self) =shift;
+    return $self->{'_snp_ids'} = shift if @_;
+    return $self->{'_snp_ids'};
+}
+
+
+=head2 pattern_freq
+
+ Title   : pattern_freq
+ Usage   : my $pattern_freq = $HtSNP->pattern_freq();
+ Function: Get the pattern id and frequency  for a haplotype
+           tagging selection
+ Returns : reference of array
+ Args    : reference of array with SNP ids
+
+=cut
+
+sub pattern_freq{
+    my ($self) =shift;
+    return $self->{'_pattern_freq'} = shift if @_;
+    return $self->{'_pattern_freq'};
+}
+
+=head2 _check_input
+
+ Title   : _check_input
+ Usage   : _check_input($self)
+ Function: check for errors on the input
+ Returns : self hash
+ Args    : self
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _check_input{
+#------------------------
+
+    my $self = shift;
+
+    _haplotype_length_error($self);
+    _population_error($self);
+
+}
+
+=head2 _haplotype_length_error
+
+ Title   : _haplotype_length_error
+ Usage   : _haplotype_length_error($self)
+ Function: check if the haplotype length is the same that the one on the
+           SNP id list. If not break and exit
+ Returns : self hash
+ Args    : self
+ Status  : internal
+
+=cut
+
+
+#------------------------
+sub _haplotype_length_error{
+#------------------------
+
+    my $self = shift;
+
+    my $input_block = $self->haplotype_block();
+    my $snp_ids     = $self->snp_ids();
+
+
+    #############################
+    # define error list
+    #############################
+    my $different_haplotype_length = 0;
+
+    ##############################
+    # get parameters used to find
+    # the errors
+    ##############################
+
+    my $snp_number         = scalar @$snp_ids;
+    my $number_of_families = scalar @$input_block;
+    my $h                  = 0; # haplotype position
+
+
+    ############################
+    # haplotype length
+    #
+    # if the length differs from the number of ids
+    ############################
+
+    for ($h=0; $h<$#$input_block+1 ; $h++){
+        if (length $input_block->[$h]  != $snp_number){
+            $different_haplotype_length = 1;
+            last;
+        }
+    }
+
+    # haploytypes does not have the same length
+    if ($different_haplotype_length){
+       $self->throw("The number of snp ids is $snp_number and ".
+            "the length of the family (". ($h+1) .") [".
+             $input_block->[$h]."] is ".
+             length $input_block->[$h], "\n");
+    }
+}
+
+=head2 _population_error
+
+
+ Title   : _population_error
+ Usage   : _population_error($self)
+ Function: use input_block and pop_freq test if the number of elements
+           match. If doesn't break and quit.
+ Returns : self hash
+ Args    : self
+ Status  : internal
+
+=cut
+
+
+#------------------------
+sub _population_error{
+#------------------------
+
+    my $self = shift;
+
+    my $input_block = $self->haplotype_block();
+    my $pop_freq    = $self->pattern_freq();
+
+    #############################
+    # define error list
+    #############################
+    my $pop_freq_elements_error    = 0;  # matrix bad formed
+
+    ##############################
+    # get parameters used to find
+    # the errors
+    ##############################
+    my $number_of_families = scalar @$input_block;
+
+    my $pf         = 0; # number of elements on population frequency
+    my $frequency  = 0; # population frequency
+    my $p_f_length = 0;
+
+    # check if the pop_freq array is well formed and if the number
+    # of elements fit with the number of families
+
+    #############################
+    # check population frequency
+    #
+    # - population frequency matrix need to be well formed
+    # - get the frequency
+    # - calculate number of families on pop_freq
+    #############################
+
+    for  ($pf=0; $pf<$#$pop_freq+1; $pf++){
+        $frequency += $pop_freq->[$pf]->[1];
+
+        if ( scalar @{$pop_freq->[$pf]} !=2){
+            $p_f_length = scalar @{$pop_freq->[$pf]};
+            $pop_freq_elements_error = 1;
+            last;
+        }
+    }
+
+    ###########################
+    ## error processing
+    ###########################
+
+
+    # The frequency shouldn't be greater than 1
+    if ($frequency >1) {
+        $self->warn("The frequency for this set is $frequency (greater than 1)\n");
+    }
+
+    # the haplotype matix is not well formed
+    if ($pop_freq_elements_error){
+        $self->throw("the frequency matrix is not well formed\n".
+             "\nThe number of elements for pattern ".($pf+1)." is ".
+             "$p_f_length\n".
+             "It should be 2 for pattern \"@{$pop_freq->[$pf]}\"\n".
+             "\nFormat should be:\n".
+             "haplotype_id\t frequency\n"
+            );
+    }
+
+    # the size does not fit on pop_freq array
+    #  with the one in haplotype (input_block)
+    if ($pf != $number_of_families) {
+        $self->throw("The number of patterns on frequency array ($pf)\n".
+             "does not fit with the number of haplotype patterns on \n". 
+             "haplotype array ($number_of_families)\n");
+    }
+}
+
+=head2 _do_it
+
+
+ Title   : _do_it
+ Usage   : _do_it($self)
+ Function: Process the input generating the results.
+ Returns : self hash
+ Args    : self
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _do_it{
+#------------------------
+
+    my $self = shift;
+
+    # first we are goinf to define here all variables we are going to use
+    $self -> {'w_hap'}          = [];
+    $self -> {'w_pop_freq'}     = dclone ( $self ->pattern_freq() );
+    $self -> {'deg_pattern'}    = {};
+    $self -> {'snp_type'}       = {};  # type of snp on the set. see below
+    $self -> {'alleles_number'} = 0;   # number of variations (biallelic,...)
+    $self -> {'snp_type_code'}  = [];
+    $self -> {'ht_type'}        = [];  # store the snp type used on the htSet
+    $self -> {'split_hap'}      = [];
+    $self -> {'snp_and_code'}   = [];
+
+
+    # we classify the SNP under snp_type
+    $self->{snp_type}->{useful_snp} = dclone ( $self ->snp_ids() );
+    $self->{snp_type}->{deg_snp}    = []; # deg snp
+    $self->{snp_type}->{silent_snp} = []; # not a real snp
+
+    # split the haplotype
+    _split_haplo ($self);
+
+    # first we convert to upper case the haplotype
+    # to make A the same as a for comparison
+    _to_upper_case( $self -> {w_hap} );
+
+    #######################################################
+    # check if any SNP has indetermination. If any SNP has
+    # indetermination this value will be removed.
+    #######################################################
+    _remove_deg ( $self );
+
+    #######################################################
+    # depending of the families you use some SNPs can be
+    # silent. This silent SNP's are not used on the
+    # creation of tags and has to be skipped from the
+    # analysis.
+    #######################################################
+    _rem_silent_snp ( $self );
+
+    #######################################################
+    # for the remaining SNP's we have to check if two
+    # families have the same value. If this is true, the families
+    # will produce the same result and therefore we will not find
+    # any pattern. So, the redundant families need to be take
+    # away from the analysis. But also considered for a further
+    # run.
+    #
+    # When we talk about a normal haplotype blocks this situation
+    # makes no sense but if we remove one of the snp because the
+    # degeneration two families can became the same.
+    # these families may be analised on a second round
+    #######################################################
+
+    _find_deg_pattern ( $self );
+
+    #################################################################
+    # if the pattern list length is different to the lenght of the w_hap
+    # we can tell that tow columns have been considered as the same one
+    # and therefore we have to start to remove the values.
+    # remove all columns with degeneration
+    #
+    # For this calculation we don't use the pattern frequency.
+    # All patterns are the same, This selection makes
+    # sense when you have different frequency.
+    #
+    # Note: on this version we don't classify the haplotype by frequency
+    # but if you need to do it. This is the place to do it!!!!
+    #
+    # In reality you don't need to sort the values because you will remove
+    # the values according to their values.
+    #
+    # But as comes from a hash, the order could be different and as a
+    # consequence the code generate on every run of the same set could
+    # differ. That is not important. In fact, does not matter but could
+    # confuse people.
+    #################################################################
+
+    my @tmp =sort { $a <=> $b}
+         keys %{$self -> {deg_pattern}}; # just count the families
+
+    # if the size of the list is different to the size of the degenerated
+    # family. There is degeneration. And the redundancies will be
+    # removed.
+    if($#tmp != $#{$self -> { w_hap } } ){
+        _keep_these_patterns($self->{w_hap}, \@tmp);
+        _keep_these_patterns($self->{w_pop_freq}, \@tmp);
+    }
+
+    #################################################################
+    # the steps made before about removing snp and cluster families
+    # are just needed pre-process the haplotype before.
+    #
+    # Now is when the fun starts.
+    #
+    #
+    # once we have the this minimal matrix, we have to calculate the
+    # max multipliticy for the values. The max number of alleles found
+    # on the set. A normal haplotype is biallelic but we can not
+    # reject multiple variations.
+    ##################################################################
+
+    _alleles_number ( $self );
+
+    ##################################################################
+    # Now we have to convert the haplotype into number
+    #
+    # A       C       C       -       T
+    # C       A       G       G       C
+    # A       C       C       C       T
+    # C       G       G       G       C
+    #
+    # one haplotype like this transformed into number produce this result
+    #
+    # 0       0       0       0       0
+    # 1       1       1       1       1
+    # 0       0       0       2       0
+    # 1       2       1       1       1
+    #
+    ##################################################################
+
+    _convert_to_numbers( $self );
+
+    ###################################################################
+    # The next step is to calculate the type of the SNP.
+    # This process is made based on the position of the SNP, the value
+    # and its multiplicity.
+    ###################################################################
+
+    _snp_type_code( $self );
+
+    ###################################################################
+    # now we have all information we need to calculate the haplotype
+    # tagging SNP htSNP
+    ###################################################################
+
+    _htSNP( $self );
+
+    ###################################################################
+    # patch:
+    #
+    # all SNP have a code. but if the SNP is not used this code must
+    # be zero in case of silent SNP. This looks not to informative
+    # because all the information is already there. But this method
+    # compile the full set.
+    ###################################################################
+
+    _snp_and_code_summary( $self );
+}
+
+=head2 input_block
+
+ Title   : input_block
+ Usage   : $obj->input_block()
+ Function: returns input block
+ Returns : reference to array of array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub input_block{
+#------------------------
+
+    my $self = shift;
+    return $self -> {input_block};
+}
+
+=head2 hap_length
+
+ Title   : hap_length
+ Usage   : $obj->hap_length()
+ Function: get numbers of SNP on the haplotype
+ Returns : scalar
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub hap_length{
+#------------------------
+
+    my $self = shift;
+    return scalar @{$self -> {'_snp_ids'}};
+}
+
+
+=head2 pop_freq
+
+ Title   : pop_freq
+ Usage   : $obj->pop_freq()
+ Function: returns population frequency
+ Returns : reference to array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub pop_freq{
+#------------------------
+
+    my $self = shift;
+    return $self -> {pop_freq}
+}
+
+
+=head2 deg_snp
+
+
+ Title   : deg_snp
+ Usage   : $obj->deg_snp()
+ Function: returns snp_removes due to indetermination on their values
+ Returns : reference to array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub deg_snp{
+#------------------------
+    my $self = shift;
+    return $self -> {snp_type} ->{deg_snp};
+}
+
+
+=head2 snp_type
+
+
+ Title   : snp_type
+ Usage   : $obj->snp_type()
+ Function: returns hash with SNP type
+ Returns : reference to hash
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub snp_type{
+#------------------------
+    my $self = shift;
+    return $self -> {snp_type};
+}
+
+
+=head2 silent_snp
+
+
+ Title   : silent_snp
+ Usage   : $obj->silent_snp()
+ Function: some SNP's are silent (not contibuting to the haplotype)
+           and are not considering for this analysis
+ Returns : reference to a array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub silent_snp{
+#------------------------
+    my $self = shift;
+    return $self -> {snp_type} ->{silent_snp};
+}
+
+
+=head2 useful_snp
+
+
+ Title   : useful_snp
+ Usage   : $obj->useful_snp()
+ Function: returns list of SNP's that are can be used as htSNP. Some
+           of them can produce the same information. But this is
+           not considered here.
+ Returns : reference to a array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub useful_snp{
+#------------------------
+    my $self = shift;
+    return $self -> {snp_type} ->{useful_snp};
+}
+
+
+=head2 ht_type
+
+
+ Title   : ht_type
+ Usage   : $obj->ht_type()
+ Function: every useful SNP has a numeric code dependending of its
+           value and position. For a better description see
+           description of the module.
+ Returns : reference to a array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub ht_type{
+#------------------------
+    my $self = shift;
+    return $self -> {ht_type};
+}
+=head2 ht_set
+
+
+ Title   : ht_set
+ Usage   : $obj->ht_set()
+ Function: returns the minimal haplotype in numerical format. This
+           haplotype contains the maximal information about the
+           haplotype variations but with no redundancies. It's the
+           minimal set that describes the haplotype.
+ Returns : reference to an array of arrays
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub ht_set{
+#------------------------
+    my $self = shift;
+    return $self -> {w_hap};
+}
+
+=head2 snp_type_code
+
+
+ Title   : snp_type_code
+ Usage   : $obj->snp_type_code()
+ Function: returns the numeric code of the SNPs that need to be
+           tagged that correspond to the SNP's considered in ht_set.
+ Returns : reference to an array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub snp_type_code{
+#------------------------
+    my $self = shift;
+    return $self -> {snp_type_code};
+}
+
+=head2 snp_and_code
+
+
+ Title   : snp_and_code
+ Usage   : $obj->snp_and_code()
+ Function: Returns the full list of SNP's and the code associate to
+           them. If the SNP belongs to the group useful_snp it keep
+           this code. If the SNP is silent the code is 0. And if the
+           SNP is degenerated the code is -1.
+ Returns : reference to an array of array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub snp_and_code{
+#------------------------
+    my $self = shift;
+    return $self -> {'snp_and_code'};
+}
+
+=head2 deg_pattern
+
+
+ Title   : deg_pattern
+ Usage   : $obj->deg_pattern()
+ Function: Returns the a list with the degenerated haplotype.
+           Sometimes due to degeneration some haplotypes looks
+           the same and if we don't remove them it won't find
+           any tag.
+ Returns : reference to a hash of array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub deg_pattern{
+#------------------------
+    my $self = shift;
+
+    return $self -> {'deg_pattern'};
+}
+
+=head2 split_hap
+
+
+ Title   : split_hap
+ Usage   : $obj->split_hap()
+ Function: simple representation of the haplotype base by base
+           Same information that input haplotype but base based.
+ Returns : reference to an array of array
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub split_hap{
+#------------------------
+    my $self = shift;
+    return $self -> {'split_hap'};
+}
+
+=head2 _split_haplo
+
+ Title   : _split_haplo
+ Usage   : _split_haplo($self)
+ Function: Take a haplotype and split it into bases
+ Returns : self
+ Args    : none
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _split_haplo {
+#------------------------
+    my $self = shift;
+
+    my $in  = $self ->{'_haplotype_block'};
+    my $out = $self ->{'w_hap'};
+
+    # split every haplotype and store the result into $out
+    foreach (@$in){
+        push @$out, [split (//,$_)];
+    }
+
+    $self -> {'split_hap'} = dclone ($out);
+}
+
+# internal method to convert the haplotype to uppercase
+
+
+=head2 _to_upper_case
+
+
+ Title   : _to_upper_case
+ Usage   : _to_upper_case()
+ Function: make SNP or in-dels Upper case
+ Returns : self
+ Args    : an AoA ref
+ Status  : private
+
+=cut
+
+#------------------------
+sub _to_upper_case {
+#------------------------
+    my ($arr) =@_;
+
+    foreach my $aref (@$arr){
+        foreach my $value (@{@$aref} ){
+            $value = uc $value;
+        }
+    }
+}
+
+
+=head2 _remove_deg
+
+
+ Title   : _remove_deg
+ Usage   : _remove_deg()
+ Function: when have a indetermination or strange value this SNP
+           is removed
+ Returns : haplotype family set and degeneration list
+ Args    : ref to an AoA and a ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _remove_deg {
+#------------------------
+    my $self = shift;
+
+    my $hap         = $self->{w_hap};
+    my $snp         = $self->{snp_type}->{useful_snp};
+    my $deg_snp     = $self->{snp_type}->{deg_snp};
+
+    my $rem = [];  # take the position of the array to be removed
+
+    # first we work on the columns we have void values
+    $rem = _find_indet($hap,$rem);  # find degenerated columns
+
+    if (@$rem){
+
+        # remove column on haplotype
+        _remove_col($hap,$rem); # remove list
+
+        # now remove the values from SNP id
+        _remove_snp_id($snp,$deg_snp,$rem); # remove list
+    }
+}
+
+
+=head2 _rem_silent_snp
+
+
+ Title   : _rem_silent_snp
+ Usage   : _rem_silent_snp()
+ Function: there is the remote possibilty that one SNP won't be a
+           real SNP on this situation we have to remove this SNP,
+           otherwise the program won't find any tag
+ Returns : nonthing
+ Args    : ref to an AoA and a ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _rem_silent_snp {
+#------------------------
+    my $self = shift;
+
+    my $hap         = $self->{w_hap};
+    my $snp         = $self->{snp_type}->{useful_snp};
+    my $silent_snp  = $self->{snp_type}->{silent_snp};
+
+    my $rem = [];   # store the positions to be removed
+
+    #find columns with no variation on the SNP, Real snp?
+    $rem = _find_silent_snps($hap);
+
+    if (@$rem){
+
+        # remove column on haplotype
+        _remove_col($hap,$rem);
+
+        # remove the values from SNP id
+        _remove_snp_id($snp,$silent_snp,$rem);
+    }
+}
+
+
+=head2 _find_silent_snps
+
+
+ Title   : _find_silent_snps
+ Usage   :
+ Function: list of snps that are not SNPs. All values for that
+           SNPs on the set is the same one. Look stupid but can
+           happend and if this happend you will not find any tag
+ Returns : nothing
+ Args    :
+ Status  :
+
+=cut
+
+#------------------------
+sub _find_silent_snps{
+#------------------------
+    my ($arr)=@_;
+
+    my $list =[]; # no snp list;
+
+    # determine the number of snp by the length of the first row.
+    # we assume that the matrix is squared.
+    my $colsn= @{$arr->[0]};
+
+    for (my $i=0;$i<$colsn;$i++){
+        my $different =0;  # check degeneration
+
+        for my $r (1..$#$arr){
+            if($arr->[0][$i] ne $arr->[$r][$i]){
+                $different =1;
+                last;
+            }
+        }
+
+        if(!$different){
+            push (@$list, $i);
+        }
+    }
+
+    return $list;
+}
+
+
+=head2 _find_indet
+
+
+ Title   : _find_indet
+ Usage   :
+ Function: find column (SNP) with invalid or degenerated values
+           and store this values into the second parameter suplied.
+ Returns : nothing
+ Args    : ref to AoA and ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _find_indet{
+#------------------------
+    my ($arr, $list)=@_;
+
+    foreach my $i(0..$#$arr){
+        foreach my $j(0..$#{$arr->[$i]}){
+            unless ($arr->[$i][$j] =~ /[ACTG-]/){
+                if ($#$list<0){
+                    push(@$list,$j);
+                }
+                else{
+                    my $found =0;   # check if already exist the value
+                    foreach my $k(0..$#$list){
+                        $found =1 if ($list->[$k] eq $j);
+                        last if ($found);
+                    }
+                    if(!$found){
+                        push(@$list,$j);
+                    }
+                }
+            }
+        }
+    }
+
+    @$list = sort { $a <=> $b} @$list;
+
+    return $list;
+}
+
+=head2 _remove_col
+
+ Title   : _remove_col
+ Usage   :
+ Function: remove columns contained on the second array from
+           the first arr
+ Returns : nothing
+ Args    : array of array reference and array reference
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _remove_col{
+#------------------------
+    my ($arr,$rem)=@_;
+
+    foreach my $col (reverse @$rem){
+        splice @$_, $col, 1 for @$arr;
+    }
+}
+
+
+=head2 _remove_snp_id
+
+ Title   : _remove_snp_id
+ Usage   :
+ Function: remove columns contained on the second array from
+           the first arr
+ Returns : nothing
+ Args    : array of array reference and array reference
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _remove_snp_id{
+#------------------------
+    my ($arr,$removed,$rem_list)=@_;
+
+    push @$removed, splice @$arr, $_, 1 foreach reverse @$rem_list;
+}
+
+
+=head2 _find_deg_pattern
+
+ Title   : _find_deg_pattern
+ Usage   :
+ Function: create a list with the degenerated patterns
+ Returns : @array
+ Args    : a ref to AoA
+ Status  : public
+
+=cut
+
+#------------------------
+sub _find_deg_pattern{
+#------------------------
+    my $self  = shift;
+
+    my $arr   = $self ->{w_hap};          # the working haplotype
+    my $list  = $self ->{'deg_pattern'};  # degenerated patterns 
+
+    # we have to check all elements
+    foreach my $i(0..$#$arr){
+        # is the element has not been used create a key
+        unless  ( _is_on_hash ($list,\$i) ) {
+            $list->{$i}=[$i];
+        };
+
+        foreach my $j($i+1..$#$arr){
+            my $comp = compare_arrays($arr->[$i],$arr->[$j]);
+
+            if($comp){
+                # as we have no elements we push this into the list
+                # check for the first element
+                my $key = _key_for_value($list,\$i);
+
+                push (@{$list->{$key}},$j);
+
+                last;
+            }
+        }
+    }
+
+}
+
+#------------------------
+sub _key_for_value{
+#------------------------
+    my($hash,$value)=@_;
+
+    foreach my $key (keys %$hash){
+        if( _is_there(\@{$hash->{$key}},$value)){
+            return $key;
+        }
+    }
+}
+
+#------------------------
+sub _is_on_hash{
+#------------------------
+    my($hash,$value)=@_;
+
+    foreach my $key (keys %$hash){
+        if( _is_there(\@{$hash->{$key}},$value)){
+            return 1;
+        }
+    }
+}
+
+#------------------------
+sub _is_there{
+#------------------------
+
+    my($arr,$value)=@_;
+
+    foreach my $el (@$arr){
+        if ($el eq $$value){
+            return 1;
+        }
+    }
+}
+
+
+=head2 _keep_these_patterns
+
+
+ Title   : _keep_these_patterns
+ Usage   :
+ Function: this is a basic approach, take a LoL and a list,
+           keep just the columns included on the list
+ Returns : nothing
+ Args    : an AoA and an array
+ Status  : public
+
+=cut
+
+#------------------------
+sub _keep_these_patterns{
+#------------------------
+    my ($arr,$list)=@_;
+
+    # by now we just take one of the repetitions but you can weight
+    # the values by frequency
+
+    my @outValues=();
+
+    foreach my $k (@$list){
+        push @outValues, $arr->[$k];
+    }
+
+    #make arr to hold the new values
+    @$arr= @{dclone(\@outValues)};
+
+}
+
+
+=head2 compare_arrays
+
+
+ Title   : compare_arrays
+ Usage   :
+ Function: take two arrays and compare their values
+ Returns : 1 if the two values are the same
+           0 if the values are different
+ Args    : an AoA and an array
+ Status  : public
+
+=cut
+
+#------------------------
+sub compare_arrays {
+#------------------------
+    my ($first, $second) = @_;
+    return 0 unless @$first == @$second;
+    for (my $i = 0; $i < @$first; $i++) {
+        return 0 if $first->[$i] ne $second->[$i];
+    }
+    return 1;
+}
+
+
+=head2 _convert_to_numbers
+
+
+ Title   : _convert_to_numbers
+ Usage   : _convert_to_numbers()
+ Function: tranform the haplotype into numbers. before to do that
+           we have to consider the variation on the set.
+ Returns : nonthing
+ Args    : ref to an AoA and a ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _convert_to_numbers{
+#------------------------
+    my $self = shift;
+
+    my $hap_ref = $self->{w_hap};
+    my $mm      = $self->{alleles_number};
+
+    # the first element is considered as zero. The first modification
+    # is consider as one and so on.
+
+    my $length = @{ @$hap_ref[0]};    #length of the haplotype
+
+    for (my $c = 0; $c<$length;$c++){
+
+        my @al=();
+
+        for my $r (0..$#$hap_ref){
+
+            push @al,$hap_ref->[$r][$c]
+                unless _is_there(\@al,\$hap_ref->[$r][$c]);
+
+            $hap_ref->[$r][$c] = get_position(\@al,\$hap_ref->[$r][$c]);
+        }
+    }
+}
+
+
+=head2 _snp_type_code
+
+
+ Title   : _snp_type_code
+ Usage   :
+ Function:
+           we have to create the snp type code for each version.
+           The way the snp type is created is the following:
+
+           we take the number value for every SNP and do the
+           following calculation
+
+           let be a SNP set as follow:
+
+           0    0
+           1    1
+           1    2
+
+           and multiplicity 3
+           on this case the situation is:
+
+           sum (value * multiplicity ^ position) for each SNP
+
+           0 * 3 ^ 0 + 1 * 3 ^ 1 + 1 * 3 ^ 2 = 12
+           0 * 3 ^ 0 + 1 * 3 ^ 1 + 2 * 3 ^ 2 = 21
+ Returns : nothing
+ Args    : $self
+ Status  : private
+
+=cut
+
+#------------------------
+sub _snp_type_code{
+#------------------------
+    my $self = shift;
+
+    my $hap = $self->{w_hap};
+    my $arr = $self->{snp_type_code};
+    my $al  = $self->{alleles_number};
+
+    my $length = @{ $hap->[0]};    #length of the haplotype
+
+    for (my $c=0; $c<$length; $c++){
+        for my $r (0..$#$hap){
+            $arr->[$c] += $hap->[$r][$c] * $al ** $r;
+        }
+    }
+}
+
+#################################################
+# return the position of an element in one array
+# The element is always present on the array
+#################################################
+
+#------------------------
+sub get_position{
+#------------------------
+
+    my($array, $value)=@_;
+
+    for my $i(0..$#$array) {
+        if ($array->[$i] eq $$value){
+            return $i;
+        }
+    }
+
+}
+
+
+=head2 _alleles_number
+
+
+ Title   : _alleles_number
+ Usage   :
+ Function: calculate the max number of alleles for a haplotype and
+           if the number. For each SNP the number is stored and the
+           max number of alleles for a SNP on the set is returned
+ Returns : max number of alleles (a scalar storing a number)
+ Args    : ref to AoA
+ Status  : public
+
+=cut
+
+#------------------------
+sub _alleles_number{
+#------------------------
+
+    my $self = shift;
+
+    my $hap_ref = $self ->{w_hap};          # working haplotype
+
+    my $length = @{ @$hap_ref[0]};    # length of the haplotype
+
+    for (my $c = 0; $c<$length;$c++){
+
+        my %alleles=();
+
+        for my $r (0..$#$hap_ref){
+            $alleles{ $hap_ref->[$r][$c] } =1; # new key for every new snp
+        }
+
+        # if the number of alleles for this column is
+        # greater than before set $m value as allele number
+        if ($self->{alleles_number} < keys %alleles) {
+            $self->{alleles_number} = keys %alleles;
+        }
+    }
+}
+
+
+=head2 _htSNP
+
+
+ Title   : _htSNP
+ Usage   : _htSNP()
+ Function: calculate the minimal set that contains all information of the
+           haplotype.
+ Returns : nonthing
+ Args    : ref to an AoA and a ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _htSNP{
+#------------------------
+    my $self = shift;
+
+    my $hap           = $self->{'w_hap'};
+    my $type          = $self->{'snp_type_code'};
+    my $set           = $self->{'ht_type'};
+    my $out           = [];     # store the minimal set
+
+    my $nc=0;        # new column for the output values
+
+    # pass for every value of the snp_type_code
+    for my $c (0..$#$type){
+
+        my $exist =0;
+
+        # every new value (not present) is pushed into set
+        if ( ! _is_there( $set,\$type->[$c] ) ){
+            push @$set, $type->[$c];
+
+            $exist =1;
+
+            for my $r(0..$#$hap){
+                #save value of the snp for every SNP
+                $out->[$r][$nc]= $hap->[$r][$c];
+            }
+        }
+
+        if ($exist){ $nc++ };
+    }
+
+    @$hap = @{dclone $out};
+}
+
+=head2 _snp_and_code_summary
+
+ Title   : _snp_and_code_summary
+ Usage   : _snp_and_code_summary()
+ Function: compile on a list all SNP and the code for each. This
+           information can be also obtained combining snp_type and
+           snp_type_code but on these results the information about
+           the rest of SNP's are not compiled as table.
+
+           0 will be silent SNPs
+           -1 are degenerated SNPs
+           and the rest of positive values are the code for useful SNP
+
+ Returns : nonthing
+ Args    : ref to an AoA and a ref to an array
+ Status  : internal
+
+=cut
+
+#------------------------
+sub _snp_and_code_summary{
+#------------------------
+    my $self = shift;
+
+    my $snp_type_code = $self->{'snp_type_code'};
+    my $useful_snp    = $self->{'snp_type'}->{'useful_snp'};
+    my $silent_snp    = $self->{'snp_type'}->{'silent_snp'};
+    my $deg_snp       = $self->{'snp_type'}->{'deg_snp'};
+    my $snp_ids       = $self->snp_ids();
+    my $snp_and_code  = $self->{'snp_and_code'};
+
+    # walk all SNP's and generate code for each
+
+    # do a practical thing. Consider all snp silent
+    foreach my $i (0..$#$snp_ids){
+
+        # assign zero to silent
+        my $value=0;
+
+        # active SNPs
+        foreach my $j (0..$#$useful_snp){
+            if ($snp_ids->[$i] eq $useful_snp->[$j]){
+                $value = $snp_type_code->[$j];
+                last;
+            }
+        }
+
+        # assign -1 to degenerated
+        foreach my $j (0..$#$deg_snp){
+            if ($snp_ids->[$i] eq $deg_snp->[$j]){
+                $value = -1;
+                last;
+            }
+        }
+
+        push @$snp_and_code, [$snp_ids->[$i], $value];
+
+    }
+}
+
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/csv.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/csv.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/csv.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,351 @@
+# $Id: csv.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::IO::csv
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IO::csv -Extract individual allele data from a CSV parser 
+
+=head1 SYNOPSIS
+
+#Do not use directly, use through the Bio::PopGen::IO driver
+
+  use Bio::PopGen::IO;
+  my $io = new Bio::PopGen::IO(-format => 'csv',
+                               -file   => 'data.csv');
+
+  # Some IO might support reading in a population at a time
+
+  my @population;
+  while( my $ind = $io->next_individual ) {
+      push @population, $ind;
+  }
+
+=head1 DESCRIPTION
+
+This object will parse comma delimited format (CSV) or whatever
+delimiter you specify. It currently doesn't handle the more complex
+quote escaped CSV format.  There are 3 initialization parameters, 
+the delimiter (-field_delimiter) [default ','], (-allele_delimiter) 
+[default ' '].    The third initialization parameter is a boolean 
+-no_header which specifies if there is no header line to read in.  All lines starting with '#' will be skipped
+
+When no_header is not specific the data is assumed to be of the following form.
+Having a header line this
+SAMPLE,MARKERNAME1,MARKERNAME2,...
+
+and each data line having the form (diploid data)
+SAMP1,101 102,100 90,a b
+or for haploid data
+SAMP1,101,100,a
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::IO::csv;
+use vars qw($FieldDelim $AlleleDelim $NoHeader);
+use strict;
+
+($FieldDelim,$AlleleDelim,$NoHeader) =( ',', '\s+',0);
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Population;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::PopGen::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::IO::csv();
+ Function: Builds a new Bio::PopGen::IO::csv object 
+ Returns : an instance of Bio::PopGen::IO::csv
+ Args    : [optional, these are the current defaults] 
+           -field_delimiter => ','
+           -allele_delimiter=> '\s+'
+           -no_header       => 0,
+
+
+=cut
+
+sub _initialize {
+    my($self, @args) = @_;
+    my ($fieldsep,$all_sep, 
+	$noheader) = $self->_rearrange([qw(FIELD_DELIMITER
+					   ALLELE_DELIMITER
+					   NO_HEADER)], at args);
+
+
+    $self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
+    $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
+    $self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
+
+    $self->{'_header'} = undef;
+    return 1;
+}
+
+=head2 flag
+
+ Title   : flag
+ Usage   : $obj->flag($flagname,$newval)
+ Function: Get/Set the flag value
+ Returns : value of a flag (a boolean)
+ Args    : A flag name, currently we expect 
+           'no_header', 'field_delimiter', or 'allele_delimiter' 
+           on set, new value (a boolean or undef, optional)
+
+
+=cut
+
+sub flag{
+    my $self = shift;
+    my $fieldname = shift;
+    return unless defined $fieldname;
+    
+    return $self->{'_flag'}->{$fieldname} = shift if @_;
+    return $self->{'_flag'}->{$fieldname};
+}
+
+
+=head2 next_individual
+
+ Title   : next_individual
+ Usage   : my $ind = $popgenio->next_individual;
+ Function: Retrieve the next individual from a dataset
+ Returns : L<Bio::PopGen::IndividualI> object
+ Args    : none
+
+
+=cut
+
+sub next_individual{
+    my ($self) = @_;
+    while( defined( $_ = $self->_readline) ) {
+	next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
+	last;
+    }
+    return if ! defined $_; 
+    if( $self->flag('no_header') || 
+	defined $self->{'_header'} ) {
+
+	#########new (allows field delim to be the same as the allele delim
+
+	my ($samp, at marker_results);
+
+	if($self->flag('field_delimiter') ne $self->flag('allele_delimiter')){
+
+		($samp, at marker_results) = split($self->flag('field_delimiter'),$_);
+	}
+	else{
+
+		my $fielddelim = $self->flag('field_delimiter');
+		my $alleledelim = $self->flag('allele_delimiter');
+
+		($samp) = /(^.+?)$fielddelim/;
+		s/^.+?$fielddelim//;
+	
+		(@marker_results) = /([\d|\w]+$alleledelim[\d|\w]+)/g;
+	
+	}
+
+	#########end new
+
+	my $i = 1;
+	foreach my $m ( @marker_results ) {
+	    $m =~ s/^\s+//;
+	    $m =~ s/\s+$//;
+	    my $markername;
+	    if( defined $self->{'_header'} ) {
+		$markername = $self->{'_header'}->[$i];
+	    } else { 
+		$markername = "Marker$i";
+	    }
+	    $self->debug( "markername is $markername alleles are $m\n");
+
+	    my @alleles = split($self->flag('allele_delimiter'), $m);
+		
+	    $m = new Bio::PopGen::Genotype(-alleles      => \@alleles,
+					   -marker_name  => $markername,
+					   -individual_id=> $samp); 
+	    $i++; 
+	}
+	return new Bio::PopGen::Individual(-unique_id => $samp,
+					   -genotypes => \@marker_results);
+    } else {
+	chomp;
+	$self->{'_header'} = [split($self->flag('field_delimiter'),$_)];
+	return $self->next_individual; # rerun loop again
+    }
+    return;
+}
+
+
+=head2 next_population
+
+ Title   : next_population
+ Usage   : my $ind = $popgenio->next_population;
+ Function: Retrieve the next population from a dataset
+ Returns : L<Bio::PopGen::PopulationI> object
+ Args    : none
+ Note    : Many implementation will not implement this
+
+=cut
+
+# Plan is to just return the whole dataset as a single population by 
+# default I think - people would then have each population in a separate
+# file.
+
+sub next_population{
+    my ($self) = @_;
+    my @inds;
+    while( my $ind = $self->next_individual ) {
+	push @inds, $ind;
+    }
+    Bio::PopGen::Population->new(-individuals => \@inds);
+}
+
+
+
+
+=head2 write_individual
+
+ Title   : write_individual
+ Usage   : $popgenio->write_individual($ind);
+ Function: Write an individual out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+
+=cut
+
+sub write_individual{
+    my ($self, at inds) = @_;
+    my $fielddelim  = $self->flag('field_delimiter');
+    my $alleledelim= $self->flag('allele_delimiter');
+    
+    foreach my $ind ( @inds ) {
+	if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object ($ind)");
+	    next;
+	}
+	# we'll go ahead and sort these until
+	# we have a better way to insure a consistent order
+	my @marker_names = sort $ind->get_marker_names;
+	if( ! $self->flag('no_header') && 
+	    ! $self->flag('header_written') ) {
+	    $self->_print(join($fielddelim, ('SAMPLE', @marker_names)), "\n");
+	    $self->flag('header_written',1);
+	}
+	$self->_print( join($fielddelim, $ind->unique_id, 
+			    # we're chaining map here, pay attention and read
+			    # starting with the last map
+			    
+			    # we'll turn genotypes into allele pairs
+			    # which will be separated by the allele delimiter
+			    map { join($alleledelim,$_->get_Alleles) } 
+			    # marker names will be sorted so we don't
+			    # have to worry about this between individuals
+			    # unless the individual set you pass in has 
+			    # a mixed set of markers...
+			    # this will turn marker names into Genotypes
+			    map {$ind->get_Genotypes(-marker => $_)} 
+			    @marker_names), "\n")
+    }    
+}
+
+=head2 write_population
+
+ Title   : write_population
+ Usage   : $popgenio->write_population($pop);
+ Function: Write a population out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+ Note    : Many implementation will not implement this
+
+=cut
+
+sub write_population{
+    my ($self, at pops) = @_;
+    my $fielddelim  = $self->flag('field_delimiter');
+#     my $alleledelim= $self->flag('allele_delimiter');
+    my $alleledelim = ' ';
+    foreach my $pop ( @pops ) {
+	if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
+	    next;
+	}
+	# we'll go ahead and sort these until
+	# we have a better way to insure a consistent order
+	my @marker_names = sort $pop->get_marker_names;
+	if( ! $self->flag('no_header') && 
+	    ! $self->flag('header_written') ) {
+	    $self->_print( join($fielddelim, ('SAMPLE', @marker_names)), 
+			   "\n");
+	    $self->flag('header_written',1);
+	}
+	foreach my $ind ( $pop->get_Individuals ) {
+	   $self->_print( join($fielddelim, $ind->unique_id, 
+			       # we're chaining map here, pay attention 
+			       # and read starting with the last map
+			       
+			       # we'll turn genotypes into allele pairs
+			       # which will be separated by the allele 
+			       # delimiter
+			       map { join($alleledelim,$_->get_Alleles) } 
+			       # marker names will be sorted so we don't
+			       # have to worry about this between individuals
+			       # unless the individual set you pass in has 
+			       # a mixed set of markers...
+			       # this will turn marker names into Genotypes
+			       map {$ind->get_Genotypes(-marker => $_)} 
+			       @marker_names), "\n");
+       }    
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/hapmap.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/hapmap.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/hapmap.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,313 @@
+# $Id: hapmap.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::IO::hapmap
+#
+# Cared for by Rich Dobson <r.j.dobson-at-qmul.ac.uk>
+#
+# Copyright Rich Dobson
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IO::hapmap - A parser for HapMap output data
+
+=head1 SYNOPSIS
+
+  # Do not use directly, use through the Bio::PopGen::IO driver
+
+  use Bio::PopGen::IO;
+  my $io = new Bio::PopGen::IO(-format => 'hapmap',
+                               -file   => 'data.hapmap');
+
+  # Some IO might support reading in a population at a time
+
+  my @population;
+  while( my $ind = $io->next_individual ) {
+      push @population, $ind;
+  }
+
+=head1 DESCRIPTION
+
+A driver module for Bio::PopGen::IO for parsing hapmap data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Rich Dobson
+
+Email r.j.dobson-at-qmul.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::PopGen::IO::hapmap;
+use vars qw($FieldDelim $AlleleDelim $NoHeader $StartingCol);
+use strict;
+
+($FieldDelim,$AlleleDelim,$NoHeader,$StartingCol) =( '\s+','',0,11);
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Population;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::PopGen::IO);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::IO::hapmap();
+ Function: Builds a new Bio::PopGen::IO::hapmap object 
+ Returns : an instance of Bio::PopGen::IO::hapmap
+ Args    : [optional, these are the current defaults] 
+           -field_delimiter => ','
+           -allele_delimiter=> '\s+'
+           -no_header       => 0,
+           -starting_column => 11
+
+=cut
+
+
+sub _initialize  {
+
+    my($self, @args) = @_;
+
+    $Bio::PopGen::Genotype::BlankAlleles='';
+
+    my ($fieldsep,$all_sep, 
+	$noheader, $start_col) = $self->_rearrange([qw(FIELD_DELIMITER
+						       ALLELE_DELIMITER
+						       NO_HEADER
+						       STARTING_COLUMN)],
+						   @args);
+
+    $self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
+    $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
+    $self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
+    $self->starting_column(defined $start_col ? $start_col : $StartingCol );
+
+    $self->{'_header'} = undef;
+    return 1;
+
+}
+
+=head2 flag
+
+ Title   : flag
+ Usage   : $obj->flag($flagname,$newval)
+ Function: Get/Set the flag value
+ Returns : value of a flag (a boolean)
+ Args    : A flag name, currently we expect 
+           'no_header', 'field_delimiter', or 'allele_delimiter' 
+           on set, new value (a boolean or undef, optional)
+
+=cut
+
+sub flag  {
+
+    my $self = shift;
+    my $fieldname = shift;
+    return unless defined $fieldname;
+    return $self->{'_flag'}->{$fieldname} = shift if @_;
+    return $self->{'_flag'}->{$fieldname};
+
+}
+
+sub _pivot {
+    my ($self) = @_;
+
+    my (@cols, at rows, at idheader);
+    while ($_ = $self->_readline){
+	chomp($_);
+	next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
+	if( /^rs\#\s+alleles\s+chrom\s+pos\s+strand/ ) {
+	    @idheader = split $self->flag('field_delimiter');
+	} else { 
+	    push @cols, [split $self->flag('field_delimiter')];
+	}
+    }
+    my $startingcol = $self->starting_column;
+
+    $self->{'_header'} = [ map { $_->[0] } @cols];
+    for my $n ($startingcol.. $#{ $cols[ 0 ]}) { 
+	my $column = [ $idheader[$n],
+		       map{ $_->[ $n ] } @cols ];	
+	push (@rows, $column); 
+    }
+    $self->{'_pivot'} = [@rows];
+    $self->{'_i'} = 0;
+}
+
+
+=head2 next_individual
+
+ Title   : next_individual
+ Usage   : my $ind = $popgenio->next_individual;
+ Function: Retrieve the next individual from a dataset
+ Returns : A Bio::PopGen::IndividualI object
+ Args    : none
+
+See L<Bio::PopGen::IndividualI>
+
+=cut
+
+sub next_individual  {
+    my ($self) = @_;
+    unless($self->{'_pivot'}){
+	#if it's the first time then pivot the table and store.
+	#Lines will now be read from the stored pivot version of the input file
+	$self->_pivot;
+    }
+
+    $_ = $self->{'_pivot'}->[$self->{'_i'}++];
+
+    return unless defined $_;
+
+    # Store all the marker related info. Now that the pivot has taken
+    # place this is in the first few lines of the file Maybe this
+    # should be put in a marker object. Doesn't seem to fit too well
+    # though
+
+    my ($samp, at marker_results) = @$_;
+
+    # at some point use all this info
+    my $i = 1;
+    foreach my $m ( @marker_results ) {
+	$m =~ s/^\s+//;
+	$m =~ s/\s+$//;
+	my $markername;
+	if( defined $self->{'_header'} ) {
+	    $markername = $self->{'_header'}->[$i-1];
+	} else { 
+	    $markername = "Marker$i";
+	}
+
+	my @alleles = split($self->flag('allele_delimiter'), $m);
+	if( @alleles != 2 ) { 
+	    $self->warn("$m for $samp\n");
+	} else { 
+	    $m = new Bio::PopGen::Genotype(-alleles      => \@alleles,
+					   -marker_name  => $markername,
+					   -individual_id=> $samp);
+	}
+	$i++; 
+    }
+
+    return new Bio::PopGen::Individual(-unique_id => $samp,
+				       -genotypes => \@marker_results);
+
+}
+
+=head2 next_population
+
+ Title   : next_population
+ Usage   : my $ind = $popgenio->next_population;
+ Function: Retrieve the next population from a dataset
+ Returns : Bio::PopGen::PopulationI object
+ Args    : none
+ Note    : Many implementation will not implement this
+
+See L<Bio::PopGen::PopulationI>
+
+=cut
+
+sub next_population {
+    my ($self) = @_;
+    my @inds;
+    while( my $ind = $self->next_individual ) {
+	push @inds, $ind;
+    }
+    Bio::PopGen::Population->new(-individuals => \@inds);
+}
+
+=head2 write_individual
+
+ Title   : write_individual
+ Usage   : $popgenio->write_individual($ind);
+ Function: Write an individual out in the file format
+           NOT SUPPORTED  BY hapmap format
+ Returns : none
+ Args    : Bio::PopGen::PopulationI object(s)
+
+See L<Bio::PopGen::PopulationI>
+
+=cut
+
+sub write_individual {
+    my ($self, at inds) = @_;
+
+    # data from hapmap is output, not input, so 
+    # we don't need a method for writing and input file
+
+    $self->throw_not_implemented();
+}
+
+=head2 write_population
+
+ Title   : write_population
+ Usage   : $popgenio->write_population($pop);
+ Function: Write a population out in the file format
+           NOT SUPPORTED  BY hapmap format
+ Returns : none
+ Args    : Bio::PopGen::PopulationI object(s)
+ Note    : Many implementation will not implement this
+
+See L<Bio::PopGen::PopulationI>
+
+=cut
+
+sub write_population {
+    my ($self, at inds) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 starting_column
+
+ Title   : starting_column
+ Usage   : $obj->starting_column($newval)
+ Function: Column where data starts
+ Example : 
+ Returns : value of starting_column (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub starting_column{
+    my $self = shift;
+
+    return $self->{'starting_column'} = shift if @_;
+    return $self->{'starting_column'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/phase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/phase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/phase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,369 @@
+# $Id: phase.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::IO::phase
+#
+# Cared for by Rich Dobson <r.j.dobson-at-qmul.ac.uk>
+#
+# Copyright Rich Dobson
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IO::phase - A parser for Phase format data
+
+=head1 SYNOPSIS
+
+# Do not use directly, use through the Bio::PopGen::IO driver
+
+  use Bio::PopGen::IO;
+  my $io = new Bio::PopGen::IO(-format => 'phase',
+                               -file   => 'data.phase');
+
+  # Some IO might support reading in a population at a time
+
+  my @population;
+  while( my $ind = $io->next_individual ) {
+      push @population, $ind;
+  }
+
+
+=head1 DESCRIPTION
+
+A driver module for Bio::PopGen::IO for parsing phase data.
+
+PHASE is defined in http://www.stat.washington.edu/stephens/instruct2.1.pdf
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Rich Dobson
+
+Email r.j.dobson-at-qmul.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::IO::phase;
+use vars qw($FieldDelim $AlleleDelim $NoHeader);
+use strict;
+
+($FieldDelim,$AlleleDelim,$NoHeader) =( ',', '\s+',0);
+
+
+
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Population;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::PopGen::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::IO::hapmap();
+ Function: Builds a new Bio::PopGen::IO::hapmap object 
+ Returns : an instance of Bio::PopGen::IO::hapmap
+ Args    : [optional, these are the current defaults] 
+           -field_delimiter => ','
+           -allele_delimiter=> '\s+'
+           -no_header       => 0,
+
+
+=cut
+
+
+sub _initialize  {
+
+    my($self, @args) = @_;
+
+    $Bio::PopGen::Genotype::BlankAlleles='';
+
+    my ($fieldsep,$all_sep, 
+	$noheader) = $self->_rearrange([qw(FIELD_DELIMITER
+					   ALLELE_DELIMITER
+					   NO_HEADER)], at args);
+
+    $self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
+    $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
+    $self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
+
+    $self->{'_header'} = undef;
+    return 1;
+
+}
+
+=head2 flag
+
+ Title   : flag
+ Usage   : $obj->flag($flagname,$newval)
+ Function: Get/Set the flag value
+ Returns : value of a flag (a boolean)
+ Args    : A flag name, currently we expect 
+           'no_header', 'field_delimiter', or 'allele_delimiter' 
+           on set, new value (a boolean or undef, optional)
+
+
+=cut
+
+sub flag  {
+
+    my $self = shift;
+    my $fieldname = shift;
+    return unless defined $fieldname;
+
+    return $self->{'_flag'}->{$fieldname} = shift if @_;
+    return $self->{'_flag'}->{$fieldname};
+
+}
+
+=head2 next_individual
+
+ Title   : next_individual
+ Usage   : my $ind = $popgenio->next_individual;
+ Function: Retrieve the next individual from a dataset
+ Returns : L<Bio::PopGen::IndividualI> object
+ Args    : none
+
+
+=cut
+
+sub next_individual  {
+    my ($self) = @_;
+
+    my ($sam, at marker_results,$number_of_ids,$number_of_markers,
+	$marker_positions,$micro_snp);
+
+    while( defined( $_ = $self->_readline) ) {
+	next if( /^\s+$/ || ! length($_) );
+	last;
+    }
+    
+    return unless defined $_; 
+    if( $self->flag('no_header') || defined $self->{'_header'} ) {
+
+	####### sometimes there is some marker info at the start of a phase input file
+	####### we collect it in the next few lines if there is. Should this info be held in a marker object?
+
+	if(!$self->{'_count'} && /^\s*\d+$/){
+	    $self->flag('number_of_ids',$_);
+	    #print "number_of_ids : $number_of_ids\n";
+	    $self->{'_count'}++;
+	    return $self->next_individual;
+	} elsif($self->{'_count'} == 1 && /^\s*\d+$/){
+	    $self->flag('number_of_markers',$_);
+	    #print "number_of_markers : $number_of_markers\n";
+	    $self->{'_count'}++;
+	    return $self->next_individual;
+	} elsif($self->{'_count'} == 2 && /^\s*P\s\d/){
+	    $self->flag('marker_positions',$_);
+	    #print "marker_position : $marker_positions\n";
+	    $self->{'_count'}++;
+	    return $self->next_individual;
+	} elsif($self->{'_count'} == 3 && /^\s*(M|S)+\s*$/i){
+	    $self->flag('micro_snp',$_);
+	    #print "microsat or snp : $micro_snp\n";
+	    $self->{'_count'}++;
+	    return $self->next_individual;
+	} elsif(/^\s*\#/){
+	    ($self->{'_sam'}) = /^\s*\#(.+)/;
+	    #print "sample : $self->{'_sam'}\n";
+	    $self->{'_count'}++;
+	    return $self->next_individual;
+	} else {
+	    chomp $_;
+	    if( $self->{'_row1'} ) {
+		# if we are looking at the 2nd row of alleles for this id
+
+		@{$self->{'_second_row'}} = 
+		    split($self->flag('field_delimiter'),$_);
+
+		for my $i(0 .. $#{$self->{'_first_row'}}){
+
+		    push(@{$self->{'_marker_results'}},
+			 $self->{'_first_row'}->[$i].
+			 $self->flag('field_delimiter').
+			 $self->{'_second_row'}->[$i]);
+		}
+		$self->{'_row1'} = 0;
+	    } else {
+		# if we are looking at the first row of alleles for this id
+		@{$self->{'_marker_results'}} = ();
+		@{$self->{'_first_row'}} = split($self->flag('field_delimiter'),$_);
+		$self->{'_row1'} = 1;
+		return $self->next_individual;
+	    }
+	}
+
+	my $i = 1;
+	foreach my $m ( @{$self->{'_marker_results'}} ) {
+	    $m =~ s/^\s+//;
+	    $m =~ s/\s+$//;
+	    my $markername;
+	    if( defined $self->{'_header'} ) {
+		$markername = $self->{'_header'}->[$i] || "Marker$i";
+	    } else { 
+		$markername = "Marker$i";
+	    }
+	    $self->debug( "markername is $markername alleles are $m\n");
+	    my @alleles = split($self->flag('allele_delimiter'), $m);	
+
+	    $m = new Bio::PopGen::Genotype(-alleles      =>\@alleles,
+					   -marker_name  => $markername,
+					   -individual_id=> $self->{'_sam'}); 
+	    $i++; 
+	}
+	return new Bio::PopGen::Individual(-unique_id => $self->{'_sam'},
+					   -genotypes =>\@{$self->{'_marker_results'}},
+					   );
+
+    } else {
+	chomp;
+	$self->{'_header'} = [split($self->flag('field_delimiter'),$_)];
+	return $self->next_individual; # rerun loop again
+    }
+    return;
+}
+
+=head2 next_population
+
+ Title   : next_population
+ Usage   : my $ind = $popgenio->next_population;
+ Function: Retrieve the next population from a dataset
+ Returns : L<Bio::PopGen::PopulationI> object
+ Args    : none
+ Note    : Many implementation will not implement this
+
+=cut
+
+sub next_population{
+    my ($self) = @_;
+    my @inds;
+    while( my $ind = $self->next_individual ) {
+	push @inds, $ind;
+    }
+    Bio::PopGen::Population->new(-individuals => \@inds);
+}
+
+=head2 write_individual
+
+ Title   : write_individual
+ Usage   : $popgenio->write_individual($ind);
+ Function: Write an individual out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+
+=cut
+
+
+sub write_individual {
+    my ($self, at inds) = @_;
+    my $fielddelim  = $self->flag('field_delimiter');
+    my $alleledelim = $self->flag('allele_delimiter');
+
+    foreach my $ind ( @inds ) {
+	if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object ($ind)");
+	    next;
+	}
+	# we'll go ahead and sort these until
+	# we have a better way to insure a consistent order
+	my @marker_names = sort $ind->get_marker_names;
+	if( ! $self->flag('no_header') && 
+	    ! $self->flag('header_written') ) {
+	    $self->_print(join($fielddelim, ('SAM', @marker_names)), "\n");
+	    $self->flag('header_written',1);
+	}
+
+	my(@row1, at row2);
+
+	for (@marker_names){
+	    my $geno = $ind->get_Genotypes(-marker => $_);
+	    my @alleles = $geno->get_Alleles();
+	    push(@row1,$alleles[0]);
+	    push(@row2,$alleles[1]);
+	}
+	$self->_print("#",$ind->unique_id,"\n",
+		      join($fielddelim, at row1),"\n",
+		      join($fielddelim, at row2),"\n");
+    }
+}
+
+=head2 write_population
+
+ Title   : write_population
+ Usage   : $popgenio->write_population($pop);
+ Function: Write a population out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+ Note    : Many implementation will not implement this
+
+=cut
+
+
+sub write_population {
+    my ($self, at pops) = @_;
+    my $fielddelim  = $self->flag('field_delimiter');
+    my $alleledelim = $self->flag('allele_delimiter');
+
+    foreach my $pop ( @pops ) {
+	if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
+	    next;
+	}
+	# we'll go ahead and sort these until
+	# we have a better way to insure a consistent order
+	my @marker_names = sort $pop->get_marker_names;
+	if( ! $self->flag('no_header') && 
+	    ! $self->flag('header_written') ) {
+	    $self->_print( join($fielddelim, ('SAM', @marker_names)), 
+			   "\n");
+	    $self->flag('header_written',1);
+	}
+	foreach my $ind ( $pop->get_Individuals ) {
+	    my(@row1, at row2);
+	    for (@marker_names){
+		my $geno = $ind->get_Genotypes(-marker => $_);
+		my @alleles = $geno->get_Alleles();
+		push (@row1,$alleles[0]);
+		push (@row2,$alleles[1]);
+	    }
+	    $self->_print("#",$ind->unique_id,"\n",
+			  join($fielddelim, at row1),"\n",
+			  join($fielddelim, at row2),"\n");
+	} 
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/prettybase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/prettybase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO/prettybase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,294 @@
+# $Id: prettybase.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::IO::prettybase
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IO::prettybase - Extract individual allele data from PrettyBase format
+
+=head1 SYNOPSIS
+
+Do not use directly, use through the Bio::PopGen::IO driver
+
+=head1 DESCRIPTION
+
+This object will parse comma delimited PrettyBase output.  PrettyBase
+is defined by the SeattleSNPs http://pga.gs.washington.edu/
+
+This is expected to be tab delimited (you can vary with the
+field_delimiter flag SITE SAMPLE ALLELE1 ALLELE2
+
+There are 2 initialization parameters, the delimiter
+(-field_delimiter) [default 'tab'] and a boolean -no_header which
+specifies if there is no header line to read in.  All lines starting
+with '#' will be skipped
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::IO::prettybase;
+use vars qw($FieldDelim $Header);
+use strict;
+
+($FieldDelim,$Header) =( '\t',0);
+
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Population;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::PopGen::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::IO::prettybase();
+ Function: Builds a new Bio::PopGen::IO::prettybase object 
+ Returns : an instance of Bio::PopGen::IO::prettybase
+ Args    : -field_delimiter      => a field delimiter character or regexp (default is /\t/ ) 
+           -header               => boolean if the file will have a header and parser should
+                                    skip first line in the file (default is false)
+           -convert_indel_states => convert alleles which are longer than one character
+                                    to an 'I' meaning insert state, and alleles which are
+                                    '-' to a delete state.
+                                    (default is false)
+
+=cut
+
+sub _initialize {
+    my($self, @args) = @_;
+    my ($fieldsep,
+	$conv_indels,
+	$header) = $self->_rearrange([qw(FIELD_DELIMITER
+					 CONVERT_INDEL_STATES
+					 HEADER)], at args);
+
+    $self->flag('header', defined $header ? $header : $Header);
+    $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
+    $self->{'_header'} = undef;
+    $self->{'_parsed_individiuals'} = [];
+    $self->{'_parsed'} = 0;
+    $self->flag('convert_indel',$conv_indels || 0);
+    return 1;
+}
+
+=head2 flag
+
+ Title   : flag
+ Usage   : $obj->flag($flagname,$newval)
+ Function: Get/Set the flag value
+ Returns : value of a flag (a boolean)
+ Args    : A flag name, currently we expect 
+           'header', 'field_delimiter', or 'allele_delimiter' 
+           on set, new value (a boolean or undef, optional)
+
+
+=cut
+
+sub flag{
+    my $self = shift;
+    my $fieldname = shift;
+    return unless defined $fieldname;
+    
+    return $self->{'_flag'}->{$fieldname} = shift if @_;
+    return $self->{'_flag'}->{$fieldname};
+}
+
+
+=head2 next_individual
+
+ Title   : next_individual
+ Usage   : my $ind = $popgenio->next_individual;
+ Function: Retrieve the next individual from a dataset
+ Returns : Bio::PopGen::IndividualI object
+ Args    : none
+
+
+=cut
+
+sub next_individual {
+    my ($self) = @_;
+    unless( $self->{'_parsed'} ) {
+	$self->_parse_prettybase;
+    }
+    return $self->{'_parsed_individiuals'}->[$self->{'_iterator'}++];
+}
+
+
+
+=head2 next_population
+
+ Title   : next_population
+ Usage   : my $ind = $popgenio->next_population;
+ Function: Retrieve the next population from a dataset
+ Returns : Bio::PopGen::PopulationI object
+ Args    : none
+ Note    : Many implementation will not implement this
+
+=cut
+
+# Plan is to just return the whole dataset as a single population by 
+# default I think - people would then have each population in a separate
+# file.
+
+sub next_population{
+    my ($self) = @_;
+    my @inds;
+    while( my $ind = $self->next_individual ) {
+	push @inds, $ind;
+    }
+    return unless @inds;
+    Bio::PopGen::Population->new(-individuals => \@inds);
+}
+
+
+sub _parse_prettybase {
+    my $self = shift;
+    my %inds;
+    my $convert_indels = $self->flag('convert_indel');
+    while( defined( $_ = $self->_readline) ) {
+	next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
+	
+	my ($site,$sample, at alleles) = split($self->flag('field_delimiter'),$_);
+	if( ! defined $sample ) { 
+	    warn("sample id is undefined for $_");
+	    next;
+	}
+	for my $allele ( @alleles ) {
+	    $allele =~ s/^\s+//;
+	    $allele =~ s/\s+$//;
+	    if( $convert_indels ) {
+		if( length($allele) > 1 ) {
+		    # we have an insert state
+		    $allele = 'I';
+		} elsif( $allele eq '-' ) {
+		    # have a delete state
+		    $allele = 'D';
+		}
+	    }
+	}
+	
+	my $g = new Bio::PopGen::Genotype(-alleles      => \@alleles,
+					  -marker_name  => $site,
+					  -individual_id=> $sample); 
+	
+
+	if( ! defined $inds{$sample} ) {
+	    $inds{$sample} = Bio::PopGen::Individual->new(-unique_id => $sample);
+	}
+	$inds{$sample}->add_Genotype($g);
+    }
+    $self->{'_parsed_individiuals'} = [ values %inds ];
+    $self->{'_parsed'} = 1;
+    return;
+}
+
+
+=head2 write_individual
+
+ Title   : write_individual
+ Usage   : $popgenio->write_individual($ind);
+ Function: Write an individual out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+
+=cut
+
+sub write_individual{
+    my ($self, at inds) = @_;
+    foreach my $ind ( @inds ) {
+	if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
+	    next;
+	}
+	foreach my $marker ( $ind->get_marker_names ) { 
+	    my $g = $ind->get_Genotypes(-marker=> $marker);
+	    next unless defined $g;
+	    $self->_print( join("\t", $marker, $ind->unique_id, 
+				$g->get_Alleles), "\n");	    
+	}
+    }
+    
+}
+
+
+
+=head2 write_population
+
+ Title   : write_population
+ Usage   : $popgenio->write_population($pop);
+ Function: Write a population out in the file format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+ Note    : Many implementation will not implement this
+
+=cut
+
+sub write_population{
+    my ($self, at pops) = @_;
+    foreach my $pop ( @pops ) {
+	if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
+	    $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
+	    next;
+	}
+	my @mnames = $pop->get_marker_names;
+	foreach my $ind ( $pop->get_Individuals ) {
+	    if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
+		$self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
+		next;
+	    }
+	    foreach my $marker ( @mnames ) { 
+		my $g = $ind->get_Genotypes(-marker=> $marker);
+		next unless defined $g;
+		$self->_print( join("\t", $marker, $ind->unique_id, 
+				    $g->get_Alleles), "\n");
+			   
+	    }
+	}
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,321 @@
+# $Id: IO.pm,v 1.6.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::IO
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IO - Input individual,marker,allele information
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::IO;
+  my $io = new Bio::PopGen::IO(-format => 'csv',
+                               -file   => 'data.csv');
+
+  # Some IO might support reading in a population at a time
+
+  my @population;
+  while( my $ind = $io->next_individual ) {
+      push @population, $ind;
+  }
+
+
+=head1 DESCRIPTION
+
+This is a generic interface to reading in population genetic data (of
+which there really isn't too many standard formats).  This implementation
+makes it easy to provide your own parser for the data.  You need to
+only implement one function next_individual.  You can also implement 
+next_population if your data has explicit information about population
+memberhsip for the indidviduals.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+#TODO 
+# Set the Individual creation as a factory rather than
+# hardcoded
+
+package Bio::PopGen::IO;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+
+use base qw(Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::IO();
+ Function: Builds a new Bio::PopGen::IO object 
+ Returns : an instance of Bio::PopGen::IO
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  if( $class =~ /Bio::PopGen::IO::(\S+)/ ) {
+    my ($self) = $class->SUPER::new(@args);	
+    $self->_initialize(@args);
+    return $self;
+  } else { 
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+    my $format = $param{'-format'} ||
+      $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'csv';
+
+    # normalize capitalization to lower case
+    $format = "\L$format";
+    
+    return unless( $class->_load_format_module($format) );
+    return "Bio::PopGen::IO::${format}"->new(@args);
+  }
+}
+
+# _initialize is chained for all PopGen::IO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+#    my ($indfact, $popfact) = $self->_rearrange([qw(INDIVIDUAL_FACTORY
+#						    POPULATION_FACTORY)],
+#						@args);
+#    $indfact = Bio::PopGen::IndividualBuilder->new() unless $indfact;
+#    $indfact = Bio::PopGen::PopulationBuilder->new() unless $indfact;
+
+    # initialize the IO part
+    $self->_initialize_io(@args);
+    return 1;
+}
+
+=head2 next_individual
+
+ Title   : next_individual
+ Usage   : my $ind = $popgenio->next_individual;
+ Function: Retrieve the next individual from a dataset
+ Returns : L<Bio::PopGen::IndividualI> object
+ Args    : none
+
+
+=cut
+
+sub next_individual{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 next_population
+
+ Title   : next_population
+ Usage   : my $pop = $popgenio->next_population;
+ Function: Retrieve the next population from a dataset
+ Returns : L<Bio::PopGen::PopulationI> object
+ Args    : none
+ Note    : Many implementation will not implement this
+
+=cut
+
+sub next_population{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 write_individual
+
+ Title   : write_individual
+ Usage   : $popgenio->write_individual($ind);
+ Function: Write an individual out in the implementation format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+
+=cut
+
+sub write_individual{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+
+=head2 write_population
+
+ Title   : write_population
+ Usage   : $popgenio->write_population($pop);
+ Function: Write a population out in the implementation format
+ Returns : none
+ Args    : L<Bio::PopGen::PopulationI> object(s)
+ Note    : Many implementation will not implement this
+
+=cut
+
+sub write_population{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to the Bio::SeqIO::Fh class
+ Args    :
+
+See L<Bio::SeqIO::Fh>
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to Bio::SeqIO class
+ Args    : none
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL SearchIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example : 
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::PopGen::IO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+      print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the Bio::PopGen::IO system please see the 
+Bio::PopGen::IO docs.  This includes ways of checking for formats at 
+compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'csv'   if (/csv/i or /\.dat\w$/i);
+}
+
+sub close { 
+    my $self = shift;
+    $self->SUPER::close(@_);
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {processor => shift}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'processor'}->next_result() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'processor'}->next_result();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'processor'}->write_result(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Individual.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Individual.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Individual.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,282 @@
+# $Id: Individual.pm,v 1.15.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Individual
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Individual - An implementation of an Individual who has
+Genotype or Sequence Results
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Individual;
+
+  my $ind = new Bio::PopGen::Individual(-unique_id => $id,
+                                        -genotypes => \@genotypes);
+
+=head1 DESCRIPTION
+
+This object is a container for genotypes.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Individual;
+use vars qw($UIDCOUNTER);
+use strict;
+BEGIN { $UIDCOUNTER = 1 }
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::PopGen::IndividualI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Individual();
+ Function: Builds a new Bio::PopGen::Individual object 
+ Returns : an instance of Bio::PopGen::Individual
+ Args    : -unique_id => $id,
+           -genotypes => \@genotypes
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->{'_genotypes'} = {};
+  my ($uid,$genotypes) = $self->_rearrange([qw(UNIQUE_ID
+					       GENOTYPES)], at args);
+  unless( defined $uid ) {
+      $uid = $UIDCOUNTER++;
+  } 
+  $self->unique_id($uid);
+  if( defined $genotypes ) {
+      if( ref($genotypes) =~ /array/i ) {
+	  $self->add_Genotype(@$genotypes);
+      } else { 
+	  $self->warn("Must provide a valid array reference to set the genotypes value in the contructor");
+      }
+  }
+  return $self;
+}
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $individual->unique_id
+ Function: Unique Identifier
+ Returns : string representing unique identifier
+ Args    : string
+
+
+=cut
+
+sub unique_id{
+   my ($self) = shift;
+   return $self->{'_unique_id'} = shift if @_;
+   return $self->{'_unique_id'};
+}
+
+=head2 num_of_results
+
+ Title   : num_of_results
+ Usage   : my $count = $person->num_results;
+ Function: returns the count of the number of Results for a person
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub num_of_results {
+    return scalar keys %{shift->{'_genotypes'}};
+}
+
+
+=head2 add_Genotype
+
+ Title   : add_Genotype
+ Usage   : $individual->add_Genotype
+ Function: add a genotype value
+ Returns : count of the number of genotypes associated with this individual
+ Args    : @genotypes - L<Bio::PopGen::GenotypeI> object(s) containing 
+                        alleles plus a marker name
+
+=cut
+
+sub add_Genotype {
+   my ($self, at genotypes) = @_;
+   
+   foreach my $g ( @genotypes ) {
+       if( !ref($g) || ! $g->isa('Bio::PopGen::GenotypeI') ) {
+	   $self->warn("cannot add $g as a genotype skipping");
+	   next;
+       }
+       my $mname = $g->marker_name;
+       if( ! defined $mname || ! length($mname) ) { 
+         # can't just say ! name b/c '0' wouldn't be valid 
+	   $self->warn("cannot add genotype because marker name is not defined or is an empty string");
+	   next;
+       }
+       if( $self->verbose > 0 && 
+	   defined $self->{'_genotypes'}->{$mname} ) {
+	   # a warning when we have verbosity cranked up 
+	   $self->debug("Overwriting the previous value for $mname for this individual");
+       }
+       # this will force Genotype individual_id to be set to 
+       # the Individual it has been added for
+       $g->individual_id($self->unique_id);
+       $self->{'_genotypes'}->{$mname} = $g;
+   }
+   return scalar keys %{$self->{'_genotypes'}};
+}
+
+=head2 reset_Genotypes
+
+ Title   : reset_Genotypes
+ Usage   : $individual->reset_Genotypes;
+ Function: Reset the genotypes stored for this individual
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_Genotypes{
+    shift->{'_genotypes'} = {};
+}
+
+=head2 remove_Genotype
+
+ Title   : remove_Genotype
+ Usage   : $individual->remove_Genotype(@names)
+ Function: Removes the genotypes for the requested markers
+ Returns : none
+ Args    : Names of markers 
+
+
+=cut
+
+sub remove_Genotype{
+   my ($self, at mkrs) = @_;
+   foreach my $m ( @mkrs ) {
+       delete($self->{'_genotypes'}->{$m});
+   }
+}
+
+=head2 get_Genotypes
+
+ Title   : get_Genotypes
+ Usage   : my @genotypes = $ind->get_Genotypes(-marker => $markername);
+ Function: Get the genotypes for an individual, based on a criteria
+ Returns : Array of genotypes
+ Args    : either none (return all genotypes) or 
+           -marker => name of marker to return (exact match, case matters)
+
+
+=cut
+
+sub get_Genotypes{
+   my ($self, at args) = @_;
+   if( @args ) {
+       unshift @args, '-marker' if( @args == 1 );  # deal with single args
+       
+       my ($name) = $self->_rearrange([qw(MARKER)], @args);
+       if( ! $name ) {
+	   $self->warn("Only know how to process the -marker field currently");
+	   return();
+       }
+       my $v = $self->{'_genotypes'}->{$name};
+       return $v;
+   }
+   return values %{$self->{'_genotypes'} || {}};
+}
+
+=head2 has_Marker
+
+ Title   : has_Marker
+ Usage   : if( $ind->has_Marker($name) ) {}
+ Function: Boolean test to see if an Individual has a genotype 
+           for a specific marker
+ Returns : Boolean (true or false)
+ Args    : String representing a marker name
+
+
+=cut
+
+sub has_Marker{
+   my ($self,$name) = @_;
+   return 0 if ! defined $name;
+
+   $name = $name->name if ref($name) && $name->isa('Bio::PopGen::MarkerI');
+   if( ref($name) ) { 
+       $self->warn("Passed in a ".ref($name). " to has_Marker, expecting either a string or a Bio::PopGen::MarkerI");
+       return 0;
+   }
+   return defined $self->{'_genotypes'}->{$name};
+}
+
+=head2 get_marker_names
+
+ Title   : get_marker_names
+ Usage   : my @names = $individual->get_marker_names;
+ Function: Returns the list of known marker names
+ Returns : List of strings
+ Args    : none
+
+
+=cut
+
+sub get_marker_names{
+   my ($self) = @_;
+   return keys %{$self->{'_genotypes'}};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IndividualI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IndividualI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/IndividualI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,162 @@
+# $Id $
+#
+# BioPerl module for Bio::PopGen::IndividualI
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::IndividualI - An individual who has Genotype or Sequence Results
+
+=head1 SYNOPSIS
+
+  # Get a Bio::PopGen::IndividualI somehow
+  # test if it has alleles/genotypes for a given marker
+  if( $ind->has_marker($markername) ) {
+  }
+  # get the unique id
+  print $ind->unique_id, "\n";
+
+  # get the number of results (genotypes)
+  print $ind->num_results;
+
+=head1 DESCRIPTION
+
+Describe the interface here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::IndividualI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $individual->unique_id
+ Function: Unique Identifier
+ Returns : string representing unique identifier
+ Args    : string
+
+
+=cut
+
+sub unique_id{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 num_genotypes
+
+ Title   : num_genotypes
+ Usage   : my $count = $person->num_results;
+ Function: returns the count of the number of Results for a person
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub num_genotypes { 
+    shift->throw_not_implemented;
+}
+
+sub num_of_results{ 
+    my $self = shift;
+    $self->deprecated("num_of_results is deprecated, use num_genotypes instead");
+    $self->num_genotypes;
+}
+
+=head2 get_Genotypes
+
+ Title   : get_Genotypes
+ Usage   : my @genotypes = $ind->get_Genotypes(-marker => $markername);
+ Function: Get the genotypes for an individual, based on a criteria
+ Returns : Array of genotypes
+ Args    : either none (return all genotypes) or 
+           -marker => name of marker to return (exact match, case matters)
+
+
+=cut
+
+sub get_Genotypes{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 has_Marker
+
+ Title   : has_Marker
+ Usage   : if( $ind->has_Marker($name) ) {}
+ Function: Boolean test to see if an Individual has a genotype 
+           for a specific marker
+ Returns : Boolean (true or false)
+ Args    : String representing a marker name
+
+
+=cut
+
+sub has_Marker{
+   my ($self,$name) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_marker_names
+
+ Title   : get_marker_names
+ Usage   : my @names = $individual->get_marker_names;
+ Function: Returns the list of known marker names
+ Returns : List of strings
+ Args    : none
+
+
+=cut
+
+sub get_marker_names{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Marker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Marker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Marker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,282 @@
+# $Id: Marker.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Marker
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Marker - A genetic marker which one uses to generate genotypes
+
+=head1 SYNOPSIS
+
+  my $name = $marker->name();            # marker name
+  my $description = $marker->description(); # description
+  my $type = $marker->type();            # coded type of the marker
+  my $unique_id = $marker->unique_id;    # optional unique ID
+  my @alleles = $marker->get_Alleles();  # the known alleles
+  my %allele_freqs = $marker->get_Allele_Frequencies(); # keys are marker names
+                                         # vals are frequencies
+                                         # may change to handle multiple populations
+
+=head1 DESCRIPTION
+
+This object will not contain genotype information pertaining to an
+individual, but rather population level statistics and descriptive
+information about a marker.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Marker;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use vars qw($UniqueCounter);
+
+$UniqueCounter = 0;
+
+use base qw(Bio::Root::Root Bio::PopGen::MarkerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Marker();
+ Function: Builds a new Bio::PopGen::Marker object 
+ Returns : an instance of Bio::PopGen::Marker
+ Args    : -name          => [string] marker name
+           -description   => [string] marker description
+           -type          => [string] marker type
+           -unique_id     => [string/int] unique id
+           -allele_freq   => [hash ref] allele frequencies 
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($name,$desc,$type,$uid,$af) = $self->_rearrange([qw(NAME
+							  DESCRIPTION
+							  TYPE
+							  UNIQUE_ID
+							  ALLELE_FREQ)], at args);
+  $self->{'_allele_freqs'} = {};
+  if( ! defined $uid ) {
+      $uid = $UniqueCounter++;
+  }
+  if( defined $name) {
+      $self->name($name);
+  } else { 
+      $self->throw("Must have provided a name when initializing a Marker");
+  }
+  defined $desc && $self->description($desc);
+  defined $type && $self->type($type);
+  $self->unique_id($uid);
+  if( defined $af) {
+      if( ref($af) !~ /HASH/i ) {
+	  $self->warn("Must provide valid Hash reference for allele_freq method");
+      } else { 
+	  foreach my $allele ( keys %$af ) {
+	      $self->add_Allele_Frequency($allele, $af->{$allele});
+	  }
+      }
+  }
+  return $self;
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $marker->name();
+ Function: Get the name of the marker
+ Returns : string representing the name of the marker
+ Args    : [optional] name
+
+
+=cut
+
+sub name{
+    my $self = shift;
+
+    return $self->{'_name'} = shift if @_;
+    return $self->{'_name'};
+}
+
+
+=head2 description
+
+ Title   : description
+ Usage   : my $desc = $marker->description
+ Function: Get the marker description free text
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub description{
+    my $self = shift;
+
+    return $self->{'_description'} = shift if @_;
+    return $self->{'_description'};
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $marker->type;
+ Function: Get coded string for marker type
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub type{
+    my $self = shift;
+
+    return $self->{'_type'} = shift if @_;
+    return $self->{'_type'};
+}
+
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $marker->unique_id;
+ Function: Get the unique marker ID
+ Returns : unique ID string
+ Args    : [optional ] string
+
+
+=cut
+
+sub unique_id{
+    my $self = shift;
+
+    return $self->{'_uniqueid'} = shift if @_;
+    return $self->{'_uniqueid'};
+}
+
+=head2 get_Alleles
+
+ Title   : get_Alleles
+ Usage   : my @alleles = $marker->get_Alleles();
+ Function: Get the available marker alleles
+ Returns : Array of strings
+ Args    : none
+
+=cut
+
+sub get_Alleles{
+    my $self = shift;
+    my (@numeric, at alpha);
+    
+    for ( keys %{$self->{'_allele_freqs'}} ) {
+	if( /[^\d\.\-e]/ ) { push @alpha, $_ }
+	else { push @numeric, $_ }
+    }
+    @numeric = sort { $b <=> $a } @numeric;
+    @alpha   = sort { $b cmp $a } @alpha;
+    return @numeric, at alpha;
+}
+
+
+=head2 get_Allele_Frequencies
+
+ Title   : get_Allele_Frequencies
+ Usage   : my %allele_freqs = $marker->get_Allele_Frequencies;
+ Function: Get the alleles and their frequency (set relative to
+           a given population - you may want to create different
+           markers with the same name for different populations
+           with this current implementation
+ Returns : Associative array where keys are the names of the alleles
+ Args    : none
+
+
+=cut
+
+sub get_Allele_Frequencies{
+   return %{$_[0]->{'_allele_freqs'}};
+}
+
+=head2 add_Allele_Frequency
+
+ Title   : add_Allele_Frequency
+ Usage   : $marker->add_Allele_Frequency($allele,$freq)
+ Function: Adds an allele frequency
+ Returns : None
+ Args    : $allele - allele name
+           $freq   - frequency value
+
+
+=cut
+
+sub add_Allele_Frequency{
+   my ($self,$allele,$freq) = @_;
+   $self->{'_allele_freqs'}->{$allele} = $freq;
+}
+
+=head2 reset_alleles
+
+ Title   : reset_alleles
+ Usage   : $marker->reset_alleles();
+ Function: Reset the alleles for a marker
+ Returns : None
+ Args    : None
+
+
+=cut
+
+sub reset_alleles{
+   my ($self) = @_;
+   $self->{'_allele_freqs'} = {};
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/MarkerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/MarkerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/MarkerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,204 @@
+# $Id $
+#
+# BioPerl module for Bio::PopGen::MarkerI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::MarkerI - A Population Genetic conceptual marker
+
+=head1 SYNOPSIS
+
+# Get a Bio::PopGen::MarkerI somehow - like using a Bio::PopGen::Marker
+
+  my $name = $marker->name();            # marker name
+  my $description = $marker->description(); # description
+  my $type = $marker->type();            # coded type of the marker
+  my $unique_id = $marker->unique_id;    # optional unique ID
+
+  my @alleles = $marker->get_Alleles();  # the known alleles
+  my %allele_freqs = $marker->get_Allele_Frequencies(); # keys are marker names
+                                         # vals are frequencies
+                                         # may change to handle multiple populations
+
+
+=head1 DESCRIPTION
+
+This is the basic interface for Markers which one can associate
+alleles with for calculating Theta and Pi.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::MarkerI;
+use strict;
+
+
+use base qw(Bio::Root::RootI Bio::AnnotatableI);
+
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $marker->name();
+ Function: Get the name of the marker
+ Returns : string representing the name of the marker
+ Args    : 
+
+
+=cut
+
+sub name{
+   $_[0]->throw_not_implemented();
+}
+
+
+=head2 description
+
+ Title   : description
+ Usage   : my $desc = $marker->description
+ Function: Get the marker description free text
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub description{
+    $_[0]->throw_not_implemented();
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : my $type = $marker->type;
+ Function: Get coded string for marker type
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub type{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $marker->unique_id;
+ Function: Get the unique marker ID
+ Returns : unique ID string
+ Args    : [optional ] string
+
+
+=cut
+
+sub unique_id{
+   my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($seq_obj)
+ Function: retrieve the attached annotation object
+ Returns : Bio::AnnotationCollectionI or none;
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information. This method comes through extension from
+L<Bio::AnnotatableI>.
+
+
+=cut
+
+
+sub annotation{
+   my ($self, at args) = @_;
+
+}
+
+
+=head2 get_Alleles
+
+ Title   : get_Alleles
+ Usage   : my @alleles = $marker->get_Alleles();
+ Function: Get the available marker alleles if they are known and stored
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub get_Alleles{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 get_Allele_Frequencies
+
+ Title   : get_Allele_Frequencies
+ Usage   : my %allele_freqs = $marker->get_Allele_Frequencies;
+ Function: Get the alleles and their frequency (set relative to
+           a given population - you may want to create different
+           markers with the same name for different populations
+           with this current implementation
+ Returns : Associative array (hash) where keys are the names of the alleles
+ Args    : none
+
+
+=cut
+
+sub get_Allele_Frequencies{
+   my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopStats.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopStats.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopStats.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,284 @@
+# $Id: PopStats.pm,v 1.10.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::PopStats
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::PopStats - A collection of methods for calculating
+statistics about a population or sets of populations
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::PopStats;
+  my $stats = new Bio::PopGen::PopStats(); # add -haploid => 1 
+                                           # to process haploid data
+
+=head1 DESCRIPTION
+
+Calculate various population structure statistics, most notably Wright's Fst.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::PopStats;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::PopStats();
+ Function: Builds a new Bio::PopGen::PopStats object 
+ Returns : an instance of Bio::PopGen::PopStats
+ Args    : -haploid => 1 (if want to use haploid calculations)
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($haploid) = $self->_rearrange([qw(HAPLOID)], at args);
+  if( $haploid ) { $self->haploid_status(1) }
+  return $self;
+}
+
+
+=head2 haploid_status
+
+ Title   : haploid_status
+ Usage   : $obj->haploid_status($newval)
+ Function: Boolean value for whether or not to do haploid 
+           or diploid calculations, where appropriate
+ Returns : Boolean
+ Args    : on set, new boolean value optional)
+
+
+=cut
+
+sub haploid_status{
+    my $self = shift;
+    return $self->{'haploid_status'} = shift if @_;
+    return $self->{'haploid_status'};
+}
+
+
+# Implementation provided my Matthew Hahn, massaged by Jason Stajich
+
+=head2 Fst
+
+ Title   : Fst
+ Usage   : my $fst = $stats->Fst(\@populations,\@markernames)
+ Function: Calculate Wright's Fst based on a set of sub-populations
+           and specific markers
+ Returns : Fst value (a value between 0 and 1)
+ Args    : Arrayref of populations to process
+           Arrayref of marker names to process
+ Note    : Based on diploid method in Weir BS, Genetics Data Analysis II, 1996
+           page 178.
+
+=cut
+
+#' make emacs happy here
+sub Fst {
+   my ($self,$populations,$markernames) = @_;
+
+   if( ! defined $populations || 
+       ref($populations) !~ /ARRAY/i ) { 
+       $self->warn("Must provide a valid arrayref for populations");
+       return;
+   } elsif( ! defined $markernames ||
+	    ref($markernames) !~ /ARRAY/i ) {
+       $self->warn("Must provide a valid arrayref for marker names");
+       return;
+   }
+   my $num_sub_pops          = scalar @$populations;
+
+   if( $num_sub_pops < 2 ) {
+       $self->warn("Must provide at least 2 populations for this test, you provided $num_sub_pops");
+       return;
+   }
+
+   # This code assumes that pop 1 contains at least one of all the
+   # alleles - need to do some more work to insure that the complete 
+   # set of alleles is seen.
+   my $Fst;
+   my ($TS_sub1,$TS_sub2);
+
+   foreach my $marker ( @$markernames ) {
+       # Get all the alleles from all the genotypes in all subpopulations
+       my %allAlleles;
+       foreach my $allele ( map { $_->get_Alleles() } 
+			    map { $_->get_Genotypes($marker) } @$populations ){
+	   $allAlleles{$allele}++;
+       }
+       my @alleles = keys %allAlleles;
+
+       foreach my $allele_name ( @alleles ) {
+	   my $avg_samp_size         = 0; # n-bar
+	   my $avg_allele_freq       = 0; # p-tilda-A-dot
+
+	   my $total_samples_squared = 0; # 
+	   my $sum_heterozygote      = 0;
+
+	   my @marker_freqs;
+
+	   # Walk through each population, get the calculated allele frequencies
+	   # for the marker, do some bookkeeping
+
+
+	   foreach my $pop ( @$populations ) {
+	       my $s = $pop->get_number_individuals($marker);
+
+	       $avg_samp_size += $s;
+	       $total_samples_squared += $s**2;
+
+	       my $markerobj = $pop->get_Marker($marker);
+	       if( ! defined $markerobj ) { 
+		   $self->warn("Could not derive Marker for $marker ".
+			       "from population ". $pop->name);
+		   return;
+	       }
+
+	       my $freq_homozygotes = 
+		   $pop->get_Frequency_Homozygotes($marker,$allele_name);
+	       my %af = $markerobj->get_Allele_Frequencies();
+	       my $all_freq = ( ($af{$allele_name} || 0));
+
+	       $avg_allele_freq += $s * $all_freq;
+	       $sum_heterozygote += (2 * $s)*( $all_freq - $freq_homozygotes);
+
+	       push @marker_freqs, \%af;
+	   }
+	   my $total_samples =  $avg_samp_size;	# sum of n over i sub-populations
+	   $avg_samp_size /= $num_sub_pops;
+	   $avg_allele_freq /= $total_samples;
+
+	   # n-sub-c
+	   my $adj_samp_size = ( 1/ ($num_sub_pops - 1)) *
+	       ( $total_samples - ( $total_samples_squared/$total_samples));
+
+	   my $variance              = 0; # s-squared-sub-A
+	   my $sum_variance          = 0;
+	   my $i = 0;		# we have cached the marker info
+	   foreach my $pop ( @$populations ) {
+	       my $s = $pop->get_number_individuals($marker);
+	       my %af = %{$marker_freqs[$i++]};
+	       $sum_variance += $s * (( ($af{$allele_name} || 0) - 
+					$avg_allele_freq)**2);
+	   }
+	   $variance = ( 1 / (( $num_sub_pops-1)*$avg_samp_size))*$sum_variance;
+
+	   # H-tilda-A-dot
+	   my $freq_heterozygote = ($sum_heterozygote / $total_samples);
+
+	   if( $self->haploid_status ) {
+	       # Haploid calculations
+
+	       my $T_sub1 = $variance - 
+		   ( ( 1/($avg_samp_size-1))*
+		     ( ($avg_allele_freq*(1-$avg_allele_freq))-
+		       ( (($num_sub_pops-1)/$num_sub_pops)*$variance)));
+	       my $T_sub2 = ( (($adj_samp_size-1)/($avg_samp_size-1))*
+			      $avg_allele_freq*(1-$avg_allele_freq) ) +
+			      ( 1 + ( (($num_sub_pops-1)*
+				       ($avg_samp_size-$adj_samp_size))/ 
+				      ($avg_samp_size - 1))) * 
+				      ($variance/$num_sub_pops);
+
+
+	       #to get total Fst from all alleles (if more than two) or all
+	       #loci (if more than one), we need to calculate $T_sub1 and
+	       #$T_sub2 for all alleles for all loci, sum, and then divide
+	       #again to get Fst.
+	       $TS_sub1 += $T_sub1;
+	       $TS_sub2 += $T_sub2;
+
+	   } else { 
+	       my $S_sub1 = $variance - ( (1/($avg_samp_size-1))*
+					  ( ($avg_allele_freq*
+					     (1-$avg_allele_freq)) - 
+					    ((($num_sub_pops-1)/$num_sub_pops)*
+					     $variance)-0.25*$freq_heterozygote ) );
+	       my $S_sub2 = ($avg_allele_freq*(1-$avg_allele_freq)) - 
+		   ( ($avg_samp_size/($num_sub_pops*($avg_samp_size-1)))*
+		     ( ((($num_sub_pops*($avg_samp_size- $adj_samp_size))/
+			 $avg_samp_size)*$avg_allele_freq*
+			(1-$avg_allele_freq)) - 
+		       ( (1/$avg_samp_size)* (($avg_samp_size-1)+
+					      ($num_sub_pops-1)*
+					      ($avg_samp_size-
+					       $adj_samp_size) )*$variance ) - 
+		       ( (($num_sub_pops*($avg_samp_size-$adj_samp_size))/
+			  (4*$avg_samp_size*$adj_samp_size))*
+			 $freq_heterozygote ) ) );
+
+	       my $S_sub3 = ($adj_samp_size/(2*$avg_samp_size))*
+		   $freq_heterozygote;
+
+	       #Again, to get the average over many alleles or many loci,
+	       #we will have to run the above for each and then sum the $S
+	       #variables and recalculate the F statistics 
+	       $TS_sub1 += $S_sub1;
+	       $TS_sub2 += $S_sub2;
+	   } 
+       }
+   }
+   # $Fst_diploid = $S_sub1/$S_sub2;
+   #my $Fit_diploid = 1 - ($S_sub3/$S_sub2);
+   #my $Fis_diploid = ($Fit_diploid-$Fst_diploid)/(1-$Fst_diploid);
+   $Fst = $TS_sub1 / $TS_sub2;
+
+   return $Fst;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Population.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Population.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Population.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,571 @@
+# $Id: Population.pm,v 1.19.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Population
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Population - A population of individuals
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Population;
+  use Bio::PopGen::Individual;
+  my $population = Bio::PopGen::Population->new();
+  my $ind = Bio::PopGen::Individual->new(-unique_id => 'id');
+  $population->add_Individual($ind);
+
+  for my $ind ( $population->get_Individuals ) {
+    # iterate through the individuals
+  }
+
+  for my $name ( $population->get_marker_names ) {
+    my $marker = $population->get_Marker();
+  }
+
+  my $num_inds = $population->get_number_individuals;
+
+  my $homozygote_f   = $population->get_Frequency_Homozygotes;
+  my $heterozygote_f = $population->get_Frequency_Heterozygotes;
+
+  # make a population haploid by making fake chromosomes through
+  # haplotypes -- ala allele 1 is on chrom 1 and allele 2 is on chrom 2 
+  # the number of individuals created will thus be 2 x number in
+  # population
+  my $happop = $population->haploid_population;
+
+
+=head1 DESCRIPTION
+
+This is a collection of individuals.  We'll have ways of generating
+L<Bio::PopGen::MarkerI> objects out so we can calculate allele_frequencies
+for implementing the various statistical tests.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Population;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::PopGen::Marker;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::Root::Root Bio::PopGen::PopulationI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Population();
+ Function: Builds a new Bio::PopGen::Population object 
+ Returns : an instance of Bio::PopGen::Population
+ Args    : -individuals => array ref of individuals (optional)
+           -name        => population name (optional)
+           -source      => a source tag (optional)
+           -description => a short description string of the population (optional)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->{'_individuals'} = [];
+  my ($name,$source,$description,
+      $inds) = $self->_rearrange([qw(NAME 
+				     SOURCE 
+				     DESCRIPTION
+				     INDIVIDUALS)], @args);
+  if( defined $inds ) {
+      if( ref($inds) !~ /ARRAY/i ) {
+	  $self->warn("Need to provide a value array ref for the -individuals initialization flag");
+      } else { 
+	  $self->add_Individual(@$inds);
+      }
+  }
+
+  defined $name   && $self->name($name);
+  defined $source && $self->source($source);
+  defined $description && $self->description($description);
+
+  return $self;
+}
+
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $pop->name
+ Function: Get the population name
+ Returns : string representing population name
+ Args    : [optional] string representing population name
+
+
+=cut
+
+sub name{
+   my $self = shift;
+   return $self->{'_name'} = shift if @_;
+   return $self->{'_name'};
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : my $description = $pop->description
+ Function: Get the population description
+ Returns : string representing population description
+ Args    : [optional] string representing population description
+
+
+=cut
+
+sub description{
+   my $self = shift;
+   return $self->{'_description'} = shift if @_;
+   return $self->{'_description'};
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : my $source = $pop->source
+ Function: Get the population source
+ Returns : string representing population source
+ Args    : [optional] string representing population source
+
+
+=cut
+
+sub source{
+   my $self = shift;
+   return $self->{'_source'} = shift if @_;
+   return $self->{'_source'};
+}
+
+=head2 set_Allele_Frequency
+
+ Title   : set_Allele_Frequency
+ Usage   : $population->set_Allele_Frequency('marker' => { 'allele1' => 0.1});
+ Function: Sets an allele frequency for a Marker for this Population
+           This allows the Population to not have individual individual
+           genotypes but rather a set of overall allele frequencies
+ Returns : Count of the number of markers
+ Args    : -name      => (string) marker name
+           -allele    => (string) allele name
+           -frequency => (double) allele frequency - must be between 0 and 1
+           OR
+	   -frequencies => { 'marker1' => { 'allele1' => 0.01,
+					    'allele2' => 0.99},
+			     'marker2' => ...
+			    }
+
+=cut
+
+sub set_Allele_Frequency {
+   my ($self, at args) = @_;
+   my ($name,$allele, $frequency,
+       $frequencies) = $self->_rearrange([qw(NAME
+					     ALLELE
+					     FREQUENCY
+					     FREQUENCIES
+					     )], @args);
+   if( defined $frequencies ) { # this supercedes the res
+       if( ref($frequencies) =~ /HASH/i ) {
+	   my ($markername,$alleles);
+	   while( ($markername,$alleles) = each %$frequencies ) {
+	       $self->{'_allele_freqs'}->{$markername} = 
+		   new Bio::PopGen::Marker(-name        => $markername,
+					   -allele_freq => $alleles);
+	   }
+       } else { 
+	   $self->throw("Must provide a valid hashref for the -frequencies option");
+       }
+   } else { 
+       unless( defined $self->{'_allele_freqs'}->{$name} ) {
+	   $self->{'_allele_freqs'}->{$name} = 
+	       new Bio::PopGen::Marker(-name        => $name);
+       }
+       $self->{'_allele_freqs'}->{$name}->add_Allele_Frequency($allele,$frequency);
+   }
+   return scalar keys %{$self->{'_allele_freqs'}};
+}
+
+
+=head2 add_Individual
+
+ Title   : add_Individual
+ Usage   : $population->add_Individual(@individuals);
+ Function: Add individuals to a population
+ Returns : count of the current number in the object 
+ Args    : Array of Individuals
+
+
+=cut
+
+sub add_Individual{
+    my ($self, at inds) = @_;
+    foreach my $i ( @inds ) {
+	next if ! defined $i;
+	unless(  $i->isa('Bio::PopGen::IndividualI') ) {
+	    $self->warn("cannot add an individual ($i) which is not a Bio::PopGen::IndividualI");
+	    next;
+	}
+	push @{$self->{'_individuals'}}, $i;
+    }
+    $self->{'_cached_markernames'} = undef;
+    $self->{'_allele_freqs'} = {};
+    return scalar @{$self->{'_individuals'} || []};
+}
+
+
+=head2 remove_Individuals
+
+ Title   : remove_Individuals
+ Usage   : $population->remove_Individuals(@ids);
+ Function: Remove individual(s) to a population
+ Returns : count of the current number in the object 
+ Args    : Array of ids
+
+=cut
+
+sub remove_Individuals {
+    my ($self, at names) = @_;
+    my $i = 0;
+    my %namehash; # O(1) lookup will be faster I think
+    foreach my $n ( @names ) { $namehash{$n}++ }
+    my @tosplice;
+    foreach my $ind (  @{$self->{'_individuals'} || []} ) {
+	unshift @tosplice, $i if( $namehash{$ind->person_id} );
+	$i++;
+    }
+    foreach my $index ( @tosplice ) {
+	splice(@{$self->{'_individuals'}}, $index,1);
+    }
+    $self->{'_cached_markernames'} = undef;
+    $self->{'_allele_freqs'} = {};
+    return scalar @{$self->{'_individuals'} || []};
+}
+
+=head2 get_Individuals
+
+ Title   : get_Individuals
+ Usage   : my @inds = $pop->get_Individuals();
+ Function: Return the individuals, alternatively restrict by a criteria
+ Returns : Array of Bio::PopGen::IndividualI objects
+ Args    : none if want all the individuals OR,
+           -unique_id => To get an individual with a specific id
+           -marker    => To only get individuals which have a genotype specific
+                        for a specific marker name
+
+
+=cut
+
+sub get_Individuals{
+   my ($self, at args) = @_;
+   my @inds = @{$self->{'_individuals'} || []};
+   return unless @inds;
+   if( @args ) { # save a little time here if @args is empty
+       my ($id,$marker) = $self->_rearrange([qw(UNIQUE_ID MARKER)], @args);
+
+       
+       if( defined $id ) { 
+	   @inds = grep { $_->unique_id eq $id } @inds;
+       } elsif (defined $marker) {
+	   @inds = grep { $_->has_Marker($marker) } @inds;
+       }
+   }
+   return @inds;
+}
+
+=head2 get_Genotypes
+
+ Title   : get_Genotypes
+ Usage   : my @genotypes = $pop->get_Genotypes(-marker => $name)
+ Function: Get the genotypes for all the individuals for a specific
+           marker name
+ Returns : Array of Bio::PopGen::GenotypeI objects
+ Args    : -marker => name of the marker
+
+
+=cut
+
+sub get_Genotypes{
+   my ($self, at args) = @_;
+   my ($name) = $self->_rearrange([qw(MARKER)], at args);
+   if( defined $name ) {
+       return grep { defined $_ } map { $_->get_Genotypes(-marker => $name) } 
+       @{$self->{'_individuals'} || []}
+   } 
+   $self->warn("You needed to have provided a valid -marker value");
+   return ();
+}
+
+
+=head2 get_marker_names
+
+ Title   : get_marker_names
+ Usage   : my @names = $pop->get_marker_names;
+ Function: Get the names of the markers
+ Returns : Array of strings
+ Args    : [optional] boolean flag to ignore internal cache status
+
+
+=cut
+
+sub get_marker_names{
+    my ($self,$force) = @_;
+    return @{$self->{'_cached_markernames'} || []} 
+      if( ! $force && defined $self->{'_cached_markernames'});
+    my %unique;
+    foreach my $n ( map { $_->get_marker_names } $self->get_Individuals() ) {
+	$unique{$n}++;
+    }
+    $self->{'_cached_markernames'} = [ keys %unique ];
+    return @{$self->{'_cached_markernames'} || []};
+}
+
+
+=head2 get_Marker
+
+ Title   : get_Marker
+ Usage   : my $marker = $population->get_Marker($name)
+ Function: Get a Bio::PopGen::Marker object based on this population
+ Returns : Bio::PopGen::MarkerI object
+ Args    : name of the marker
+
+
+=cut
+
+sub get_Marker{
+   my ($self,$markername) = @_;
+   my $marker;
+   # setup some caching too
+   if( defined $self->{'_allele_freqs'} &&
+       defined ($marker = $self->{'_allele_freqs'}->{$markername}) ) {
+       # marker is now set to the stored value
+   } else { 
+       my @genotypes = $self->get_Genotypes(-marker => $markername);
+       $marker = new Bio::PopGen::Marker(-name   => $markername);
+       
+       if( ! @genotypes ) {
+	   $self->warn("No genotypes for Marker $markername in the population");
+       } else { 
+	   my %alleles;
+	   my $count;
+	   map { $count++; $alleles{$_}++ } map { $_->get_Alleles } @genotypes;
+	   foreach my $allele ( keys %alleles ) {
+	       $marker->add_Allele_Frequency($allele, $alleles{$allele}/$count);
+	   }
+       }
+       $self->{'_allele_freqs'}->{$markername} = $marker;
+   }
+   return $marker;
+}
+
+
+=head2 get_number_individuals
+
+ Title   : get_number_individuals
+ Usage   : my $count = $pop->get_number_individuals;
+ Function: Get the count of the number of individuals
+ Returns : integer >= 0
+ Args    : none
+
+
+=cut
+
+sub get_number_individuals{
+   my ($self,$markername) = @_;
+
+   if( $self->{'_forced_set_individuals'} ) {
+       return $self->{'_forced_set_individuals'};
+   }
+
+   unless( defined $markername ) {
+       return scalar @{$self->{'_individuals'} || []};
+   } else { 
+       my $number =0;
+       foreach my $individual ( @{$self->{'_individuals'} || []} ) {
+	   $number++ if( $individual->has_Marker($markername));
+       }
+       return $number;
+   }
+}
+
+=head2 set_number_individuals
+
+ Title   : set_number_individuals
+    Usage   : $pop->set_number_individuals($num);
+ Function: Fixes the number of individuals, call this with
+           0 to unset.
+           Only use this if you know what you are doing,
+           this is only relavent when you are just adding
+           allele frequency data for a population and want to
+           calculate something like theta
+ Returns : none
+ Args    : individual count, calling it with undef or 0
+            will reset the value to return a number
+            calculated from the number of individuals
+            stored for this population.
+
+=cut
+
+sub set_number_individuals{
+   my ($self,$indcount) = @_;
+   return $self->{'_forced_set_individuals'} = $indcount;
+}
+
+
+=head2 get_Frequency_Homozygotes
+
+ Title   : get_Frequency_Homozygotes
+ Usage   : my $freq = $pop->get_Frequency_Homozygotes;
+ Function: Calculate the frequency of homozygotes in the population
+ Returns : fraction between 0 and 1
+ Args    : $markername
+
+
+=cut
+
+sub get_Frequency_Homozygotes{
+   my ($self,$marker,$allelename) = @_;
+   my ($homozygote_count) = 0;
+   return 0 if ! defined $marker || ! defined $allelename;
+   $marker = $marker->name if( defined $marker && 
+			       ref($marker) &&
+			       $marker->isa('Bio::PopGen::MarkerI'));
+   my $total = $self->get_number_individuals($marker);
+   foreach my $genotype ( $self->get_Genotypes($marker) ) {
+       my %alleles = map { $_ => 1} $genotype->get_Alleles();
+       # what to do for non-diploid situations?
+       if( $alleles{$allelename} ) {
+	   $homozygote_count++ if( keys %alleles == 1);
+       }
+   }
+   return $total ? $homozygote_count / $total : 0;
+}
+
+=head2 get_Frequency_Heterozygotes
+
+ Title   : get_Frequency_Heterozygotes
+ Usage   : my $freq = $pop->get_Frequency_Homozygotes;
+ Function: Calculate the frequency of homozygotes in the population
+ Returns : fraction between 0 and 1
+ Args    : $markername
+
+
+=cut
+
+sub get_Frequency_Heterozygotes{
+   my ($self,$marker,$allelename) = @_;
+   my ($heterozygote_count) = 0;
+   return 0 if ! defined $marker || ! defined $allelename;
+   $marker = $marker->name if( defined $marker && ref($marker) &&
+			       $marker->isa('Bio::PopGen::MarkerI'));
+   if( ref($marker) ) {
+       $self->warn("Passed in a ".ref($marker). " to has_Marker, expecting either a string or a Bio::PopGen::MarkerI");
+       return 0;
+   }
+   my $total = $self->get_number_individuals($marker);
+
+   foreach my $genotype ( $self->get_Genotypes($marker) ) {
+       my %alleles = map { $_ => 1} $genotype->get_Alleles();
+       # what to do for non-diploid situations?
+       if( $alleles{$allelename} ) {
+	   $heterozygote_count++ if( keys %alleles == 2);
+       }
+   }
+   return $total ? $heterozygote_count / $total : 0;
+}
+
+=head2 haploid_population
+
+ Title   : haploid_population
+ Usage   : my $pop = $population->haploid_population;
+ Function: Make a new population where all the individuals
+           are haploid - effectively an individual out of each
+           chromosome an individual has.  
+ Returns : L<Bio::PopGen::PopulationI>
+ Args    : None
+
+
+=cut
+
+sub haploid_population{
+   my ($self) = @_;
+   my @inds;
+   my @marker_names = $self->get_marker_names;
+
+   for my $ind ( $self->get_Individuals ) {
+       my @chromosomes;
+       my $id = $ind->unique_id;
+       # separate genotypes into 'chromosomes'
+       for my $marker_name( @marker_names ) {
+	   my ($genotype) = $ind->get_Genotypes(-marker => $marker_name);
+	   my $i =0;
+	   for my $allele ( $genotype->get_Alleles ) {
+	       push @{$chromosomes[$i]}, 
+	       Bio::PopGen::Genotype->new(-marker_name => $marker_name,
+					-individual_id => $id.".$i",
+					-alleles     => [$allele]);
+	       $i++;
+	   }
+       }
+       for my $chrom ( @chromosomes ) {
+	   my $copyind = ref($ind)->new(-unique_id => $id.".1",
+					-genotypes => $chrom);
+	   push @inds, $ind;
+       }
+   }
+   my $population = ref($self)->new(-name        => $self->name,
+				    -source      => $self->source,
+				    -description => $self->description,
+				    -individuals => \@inds);
+				    
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopulationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopulationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/PopulationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,240 @@
+# $Id: PopulationI.pm,v 1.9.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::PopulationI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::PopulationI - Interface for Populations
+
+=head1 SYNOPSIS
+
+  # Get Bio::PopGen::PopulationI object somehow, like
+  # from Bio::Population::Population
+
+  print "name is ", $population->name(), "\n";
+  print "source is ", $population->source(), "\n";
+  print "description is ", $population->description(), "\n";
+
+  print "For marker $markername:\n";
+  foreach my $genotype ( $population->get_Genotypes(-marker => $markername) ) {
+      print "Individual ", $genotype->individual_id, " genotype alleles are ",
+      join(',', $genotype->get_Alleles()), "\n";
+  }
+  # get a marker with allele frequencies calculated from the population
+  my $marker = $population->get_Marker($markername); 
+  my %af = $marker->get_Allele_Frequencies;
+  foreach my $allele ( keys %af ) {
+      print "$allele $af{$allele}\n";
+  }
+
+=head1 DESCRIPTION
+
+This interface describes the basics of a population.  One can use this
+object to get the genotypes of specific individuals, only those
+individuals which have a certain marker, or create a marker with
+allele frequency information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, matthew.hahn-at-duke.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::PopulationI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $pop->name
+ Function: Get the population name
+ Returns : string representing population name
+ Args    : [optional] string representing population name
+
+
+=cut
+
+sub name{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : my $description = $pop->description
+ Function: Get the population description
+ Returns : string representing population description
+ Args    : [optional] string representing population description
+
+
+=cut
+
+sub description{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : my $source = $pop->source
+ Function: Get the population source
+ Returns : string representing population source
+ Args    : [optional] string representing population source
+
+
+=cut
+
+sub source{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_Individuals
+
+ Title   : get_Individuals
+ Usage   : my @inds = $pop->get_Individuals();
+ Function: Return the individuals, alternatively restrict by a criteria
+ Returns : Array of L<Bio::PopGen::IndividualI> objects
+ Args    : none if want all the individuals OR,
+           -unique_id => To get an individual with a specific id
+           -marker    => To only get individuals which have a genotype specific
+                        for a specific marker name
+
+
+=cut
+
+sub get_Individuals{
+    shift->throw_not_implemented();
+}
+
+=head2 get_Genotypes
+
+ Title   : get_Genotypes
+ Usage   : my @genotypes = $pop->get_Genotypes(-marker => $name)
+ Function: Get the genotypes for all the individuals for a specific
+           marker name
+ Returns : Array of L<Bio::PopGen::GenotypeI> objects
+ Args    : -marker => name of the marker
+
+
+=cut
+
+sub get_Genotypes{
+    shift->throw_not_implemented;
+}
+
+=head2 get_Marker
+
+ Title   : get_Marker
+ Usage   : my $marker = $population->get_Marker($name)
+ Function: Get a Bio::PopGen::Marker object based on this population
+ Returns : L<Bio::PopGen::MarkerI> object
+ Args    : name of the marker
+
+
+=cut
+
+sub get_Marker{
+    shift->throw_not_implemented();
+}
+
+=head2 get_marker_names
+
+ Title   : get_marker_names
+ Usage   : my @names = $pop->get_marker_names;
+ Function: Get the names of the markers
+ Returns : Array of strings
+ Args    : none
+
+
+=cut
+
+sub get_marker_names{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 get_Markers
+
+ Title   : get_Markers
+ Usage   : my @markers = $pop->get_Markers();
+ Function: Will retrieve a list of instantiated MarkerI objects 
+           for a population.  This is a convience method combining
+           get_marker_names with get_Marker
+ Returns : List of array of Bio::PopGen::MarkerI objects
+ Args    : none
+
+
+=cut
+
+sub get_Markers{
+    my ($self) = shift;
+    return map { $self->get_Marker($_) } $self->get_marker_names();
+}
+
+
+=head2 get_number_individuals
+
+ Title   : get_number_individuals
+ Usage   : my $count = $pop->get_number_individuals;
+ Function: Get the count of the number of individuals
+ Returns : integer >= 0
+ Args    : [optional] marker name, will return a count of the number
+           of individuals which have this marker
+
+
+=cut
+
+sub get_number_individuals{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/Coalescent.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/Coalescent.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/Coalescent.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,402 @@
+# $Id: Coalescent.pm,v 1.11.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Simulation::Coalescent
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Simulation::Coalescent - A Coalescent simulation factory
+
+=head1 SYNOPSIS
+
+    use Bio::PopGen::Simulation::Coalescent;
+    my @taxonnames = qw(SpeciesA SpeciesB SpeciesC SpeciesD);
+    my $sim1 = Bio::PopGen::Simulation::Coalescent->new(-samples => \@taxonnames);
+
+    my $tree = $sim1->next_tree;
+
+    # add 20 mutations randomly to the tree
+    $sim1->add_Mutations($tree,20);
+
+    # or for anonymous samples
+
+    my $sim2 = Bio::PopGen::Simulation::Coalescent->new( -sample_size => 6,
+							 -maxcount => 50);
+    my $tree2 = $sim2->next_tree;
+    # add 20 mutations randomly to the tree
+    $sim2->add_Mutations($tree2,20);
+
+=head1 DESCRIPTION
+
+Builds a random tree every time next_tree is called or up to -maxcount
+times with branch lengths and provides the ability to randomly add
+mutations onto the tree with a probabilty proportional to the branch
+lengths.
+
+This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
+
+Hudson, R. R. 1990. Gene genealogies and the coalescent
+       process. Pp. 1-44 in D. Futuyma and J.  Antonovics, eds. Oxford
+       surveys in evolutionary biology. Vol. 7. Oxford University
+       Press, New York.
+
+This module was previously named Bio::Tree::RandomTree
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Matthew Hahn
+
+Email jason-at-bioperl-dot-org
+Email matthew-dot-hahn-at-duke-dot-edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Simulation::Coalescent;
+use vars qw($PRECISION_DIGITS);
+use strict;
+
+$PRECISION_DIGITS = 3; # Precision for the branchlength
+
+use Bio::Tree::AlleleNode;
+use Bio::PopGen::Genotype;
+use Bio::Tree::Tree;
+
+use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Simulation::Coalescent();
+ Function: Builds a new Bio::PopGen::Simulation::Coalescent object 
+ Returns : an instance of Bio::PopGen::Simulation::Coalescent
+ Args    : -samples => arrayref of sample names
+           OR
+           -sample_size=> number of samples (samps will get a systematic name)
+           -maxcount   => [optional] maximum number of trees to provide
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+   my $self = $class->SUPER::new(@args);
+   
+   $self->{'_treecounter'} = 0;
+   $self->{'_maxcount'} = 0;
+   my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
+							       SAMPLES
+							       SAMPLE_SIZE)],
+							   @args);
+   my @samples;
+   
+   if( ! defined $samps ) { 
+       if( ! defined $samplesize || $samplesize <= 0 ) { 
+	   $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified (sampsize is $samplesize)");
+       }
+       foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }      
+   } else { 
+       if( ref($samps) !~ /ARRAY/i ) { 
+	   $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
+       }
+       @samples = @$samps;
+   }
+   
+   $self->samples(\@samples);
+   $self->sample_size(scalar @samples);
+   defined $maxcount && $self->maxcount($maxcount);   
+   return $self;
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree
+ Function: Returns a random tree based on the initialized number of nodes
+           NOTE: if maxcount is not specified on initialization or
+                 set to a valid integer, subsequent calls to next_tree will 
+                 continue to return random trees and never return undef
+ Returns : Bio::Tree::TreeI object
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self) = @_;
+   # If maxcount is set to something non-zero then next tree will
+   # continue to return valid trees until maxcount is reached
+   # otherwise will always return trees 
+   return undef if( $self->maxcount &&
+		    $self->{'_treecounter'}++ >= $self->maxcount );
+   my $size = $self->sample_size;
+   
+   my $in;
+   my @tree = ();
+   my @list = ();
+   
+   for($in=0;$in < 2*$size -1; $in++ ) { 
+       push @tree, { 'nodenum' => "Node$in" };
+   }
+   # in C we would have 2 arrays
+   # an array of nodes (tree)
+   # and array of pointers to these nodes (list)
+   # and we just shuffle the list items to do the 
+   # tree topology generation
+   # instead in perl, we will have a list of hashes (nodes) called @tree
+   # and a list of integers representing the indexes in tree called @list
+
+   for($in=0;$in < $size;$in++)  {
+       $tree[$in]->{'time'} = 0;
+       $tree[$in]->{'desc1'} = undef;
+       $tree[$in]->{'desc2'} = undef;
+       push @list, $in;
+   }
+
+   my $t=0;
+   # generate times for the nodes
+   for($in = $size; $in > 1; $in-- ) {
+	$t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );    
+	$tree[2 * $size - $in]->{'time'} =$t;
+    }
+   # topology generation
+   for ($in = $size; $in > 1; $in-- ) {
+       my $pick = int $self->random($in);    
+       my $nodeindex = $list[$pick];       
+       my $swap = 2 * $size - $in;       
+       $tree[$swap]->{'desc1'} = $nodeindex;	
+       $list[$pick] = $list[$in-1];       
+       $pick = int rand($in - 1);    
+       $nodeindex = $list[$pick];
+       $tree[$swap]->{'desc2'} = $nodeindex;	
+       $list[$pick] = $swap;
+   }
+   # Let's convert the hashes into nodes
+
+   my @nodes = ();   
+   foreach my $n ( @tree ) { 
+       push @nodes, 
+	   new Bio::Tree::AlleleNode(-id => $n->{'nodenum'},
+				     -branch_length => $n->{'time'});
+   }
+   my $ct = 0;
+   foreach my $node ( @nodes ) { 
+       my $n = $tree[$ct++];
+       if( defined $n->{'desc1'} ) {
+	   $node->add_Descendent($nodes[$n->{'desc1'}]);
+       }
+       if( defined $n->{'desc2'} ) { 
+	   $node->add_Descendent($nodes[$n->{'desc2'}]);
+       }
+   }   
+   my $T = Bio::Tree::Tree->new(-root => pop @nodes );
+   return $T;
+}
+
+=head2 add_Mutations
+
+ Title   : add_Mutations
+ Usage   : $factory->add_Mutations($tree, $mutcount);
+ Function: Adds mutations to a tree via a random process weighted by 
+           branch length (it is a poisson distribution 
+			  as part of a coalescent process) 
+ Returns : none
+ Args    : $tree - Bio::Tree::TreeI 
+           $nummut - number of mutations
+           $precision - optional # of digits for precision
+
+
+=cut
+
+sub add_Mutations{
+   my ($self,$tree, $nummut,$precision) = @_;
+   $precision ||= $PRECISION_DIGITS;
+   $precision = 10**$precision;
+
+   my @branches;
+   my @lens;
+   my $branchlen = 0;
+   my $last = 0;
+   my @nodes = $tree->get_nodes();
+   my $i = 0;
+
+   # Jason's somewhat simplistics way of doing a poission
+   # distribution for a fixed number of mutations
+   # build an array and put the node number in a slot
+   # representing the branch to put a mutation on
+   # but weight the number of slots per branch by the 
+   # length of the branch ( ancestor's time - node time)
+   
+   foreach my $node ( @nodes ) {
+       if( $node->ancestor ) { 
+	   my $len = int ( ($node->ancestor->branch_length - 
+			    $node->branch_length) * $precision);
+	   if ( $len > 0 ) {
+	       for( my $j =0;$j < $len;$j++) {
+		   push @branches, $i;
+	       }
+	       $last += $len;
+	   }
+	   $branchlen += $len;
+       }
+       if( ! $node->isa('Bio::Tree::AlleleNode') ) {
+	   bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
+       } 
+       # This let's us reset the stored genotypes so we can keep reusing the 
+       # same tree topology, but throw down mutations multiple times
+       $node->reset_Genotypes;
+       $i++;
+   }
+   # sanity check
+   $self->throw("branch len is $branchlen arraylen is $last")
+        unless ( $branchlen == $last );
+   my @mutations;
+   for( my $j = 0; $j < $nummut; $j++)  {
+       my $index = int(rand($branchlen));
+       my $branch = $branches[$index];
+
+       # We're using an infinite sites model so every new
+       # mutation is a new site
+       my $g = new Bio::PopGen::Genotype(-marker_name  => "Mutation$j",
+					 -alleles => [1]);
+       $nodes[$branch]->add_Genotype($g);
+       push @mutations, "Mutation$j";
+       # Let's add this mutation to all the children (push it down
+       # the branches to the tips)
+       foreach my $child ( $nodes[$branch]->get_all_Descendents ) {
+	   $child->add_Genotype($g);
+       }
+   }
+   # Insure that everyone who doesn't have the mutation
+   # has the ancestral state, which is '0'
+   foreach my $node ( @nodes ) {
+       foreach my $m ( @mutations ) {
+	   if( ! $node->has_Marker($m) ) {
+	       my $emptyg = new Bio::PopGen::Genotype(-marker_name => $m,
+						      -alleles     => [0]);
+	       $node->add_Genotype($emptyg);
+	   }
+       }
+   }
+}
+
+=head2 maxcount
+
+ Title   : maxcount
+ Usage   : $obj->maxcount($newval)
+ Function: 
+ Returns : Maxcount value
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub maxcount{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if( $value =~ /^(\d+)/ ) { 
+	   $self->{'maxcount'} = $1;
+       } else { 
+	   $self->warn("Must specify a valid Positive integer to maxcount");
+	   $self->{'maxcount'} = 0;
+       }
+  }
+   return $self->{'_maxcount'};
+}
+
+=head2 samples
+
+ Title   : samples
+ Usage   : $obj->samples($newval)
+ Function: 
+ Example : 
+ Returns : value of samples
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub samples{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if( ref($value) !~ /ARRAY/i ) { 
+	   $self->warn("Must specify a valid array ref to the method 'samples'");
+	   $value = [];
+       } 
+      $self->{'samples'} = $value;
+    }
+    return $self->{'samples'};
+
+}
+
+=head2 sample_size
+
+ Title   : sample_size
+ Usage   : $obj->sample_size($newval)
+ Function: 
+ Example : 
+ Returns : value of sample_size
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub sample_size{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'sample_size'} = $value;
+    }
+    return $self->{'sample_size'};
+
+}
+
+=head2 random
+
+ Title   : random
+ Usage   : my $rfloat = $node->random($size)
+ Function: Generates a random number between 0 and $size
+           This is abstracted so that someone can override and provide their
+           own special RNG.  This is expected to be a uniform RNG.
+ Returns : Floating point random
+ Args    : $maximum size for random number (defaults to 1)
+
+
+=cut
+
+sub random{
+   my ($self,$max) = @_;
+   return rand($max);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/GeneticDrift.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/GeneticDrift.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Simulation/GeneticDrift.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,302 @@
+# $Id: GeneticDrift.pm,v 1.5.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Simulation::GeneticDrift
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Simulation::GeneticDrift - A simple genetic drift simulation
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Simulation::GeneticDrift;
+  my $sim = new Bio::PopGen::Simulation::GeneticDrift(-popsize => 40,
+						      -alleles => {A => 0.2,
+							           B => 0.8});
+  for(my $i =0 ;$i < 10; $i++ ) {
+    my %f = $sim->next_generation; # get the freqs for each generation
+  }
+
+  for(my $i =0 ;$i < 10; $i++ ) {
+    # get the allele freqs as part of a Bio::PopGen::Population object
+    my $pop = $sim->next_generation('population'); 
+  }
+
+=head1 DESCRIPTION
+
+A very simple 1 locus multi-allele random drift module, start with an
+initial set of allele frequency and simulate what happens over time.
+
+This isn't really useful for anything in particular yet but will be
+built upon.
+
+See Gillespie JH. (1998) "Population Genetics: a Concise guide." The Johns
+              Hopkins University Press, Baltimore, USA.  pp.19-47.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Simulation::GeneticDrift;
+use strict;
+
+use Bio::PopGen::Population;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Simulation::GeneticDrift();
+ Function: Builds a new Bio::PopGen::Simulation::GeneticDrift object 
+ Returns : an instance of Bio::PopGen::Simulation::GeneticDrift
+ Args    : -popsize => starting N
+           -haploid => boolean if we should simulate haploids 
+           -alleles => arrayref of the allele names
+           OR
+           -population => L<Bio::PopGen::PopulationI> object to initialize 
+                          from some previously defined Population object
+                          (or result from a previous simulation)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($population,
+      $popsize, $haploid, $alleles) = $self->_rearrange([qw(POPULATION
+							    POPSIZE
+							    HAPLOID
+							    ALLELES)], at args);
+  if( defined $population && ref($population) &&
+      $population->isa('Bio::PopGen::PopulationI') ) {
+      $self->population_size($population->get_number_individuals || $popsize);
+      my %f = $population->get_Allele_Frequencies;
+      while( my ($allele,$freq) = each %f ) {
+	  $self->add_Allele_Frequency($allele,$freq);
+      }
+  } else { 
+      $self->population_size($popsize);  
+  
+      if( ! defined $alleles || ref($alleles) !~ /HASH/i  ) {
+	  $self->throw("Must provide a valid set of initial allele frequencies to $class as an hashref");
+      } 
+      while( my ($allele,$freq) = each %$alleles ) {
+	  $self->add_Allele_Frequency($allele,$freq);
+      }
+  }
+  unless( $self->validate_Frequencies ) {
+      $self->throw("You specified allele frequencies which summed to more than 1");
+  }
+  return $self;
+}
+
+
+=head2 next_generation
+
+ Title   : next_generation
+ Usage   : my %generation = $sim->next_generation
+ Function: Get the next generation of allele frequencies based on the current
+           generation
+ Returns : Hash of allele frequencies
+ Args    : 'allelefreqs' or 'population' to get back a hash of allele 
+                 frequencies (default) OR a L<Bio::PopGen::Population> object
+
+
+=cut
+
+sub next_generation{
+   my ($self,$rettype) = @_;
+   my %initial = $self->get_Allele_Frequencies;
+   my $popsize = $self->population_size || 
+       $self->throw("Need to have set a valid population size when running the simulation");
+   # we're going to construct a mapping of the rational space from 0->1 
+   # which will map to a particular allele and be proportional to it
+   # frequency
+   my ($last, at mapping) = (0);
+
+   # we'll make ranges that cover from >= left and < right in terms of the
+   # order doesn't matter - 'distance' does
+   # range that we're going to try and match
+   # since rand() goes from 0 up to 1 (not including 1)
+   foreach my $a ( keys %initial ) {
+       push @mapping, [$last,$initial{$a}+$last,$a];
+       $last += $initial{$a};
+   }
+
+   my %f;
+   for( my $i =0; $i < $popsize; $i++ ) {
+       my $rand = rand(1);
+       foreach my $val ( @mapping ) {
+	   if( $rand >= $val->[0] && $rand < $val->[1] ) {
+	       $f{$val->[2]}++;
+	       last;
+	 }
+       }
+   }
+   foreach my $f ( values %f ) {
+       $f /= $popsize;
+   }
+   %{$self->{'_allele_freqs'}} = %f;
+   
+   if( defined $rettype && 
+       $rettype =~ /population/i) {
+       return Bio::PopGen::Poulation->new(-frequencies => \%f);
+   } else { 
+       return %f;
+   }
+
+}
+
+=head2 population_size
+
+ Title   : population_size
+ Usage   : $obj->population_size($newval)
+ Function: 
+ Example : 
+ Returns : value of population_size (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub population_size{
+    my $self = shift;
+
+    return $self->{'_population_size'} = shift if @_;
+    return $self->{'_population_size'};
+}
+
+=head2 set_Frequencies_Equivalent
+
+ Title   : set_Frequencies_Equivalent
+ Usage   : $sim->set_Frequencies_Equivalent
+ Function: Reset the allele frequencies so they are all even
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub set_Frequencies_Equivalent{
+   my ($self) = @_;
+   my @alleles = keys %{$self->{'_allele_freqs'}};
+   my $eqfreq  = 1 / scalar @alleles;
+   for ( @alleles ) { $self->{'_allele_freqs'}->{$_} = $eqfreq }
+   return;
+}
+
+
+=head2 get_Allele_Frequencies
+
+ Title   : get_Allele_Frequencies
+ Usage   : my %allele_freqs = $marker->get_Allele_Frequencies;
+ Function: Get the alleles and their frequency (set relative to
+           a given population - you may want to create different
+           markers with the same name for different populations
+           with this current implementation
+ Returns : Associative array where keys are the names of the alleles
+ Args    : none
+
+
+=cut
+
+sub get_Allele_Frequencies{
+   return %{$_[0]->{'_allele_freqs'}};
+}
+
+=head2 add_Allele_Frequency
+
+ Title   : add_Allele_Frequency
+ Usage   : $marker->add_Allele_Frequency($allele,$freq)
+ Function: Adds an allele frequency
+ Returns : None
+ Args    : $allele - allele name
+           $freq   - frequency value
+
+
+=cut
+
+sub add_Allele_Frequency{
+   my ($self,$allele,$freq) = @_;
+   $self->{'_allele_freqs'}->{$allele} = $freq;
+}
+
+=head2 reset_alleles
+
+ Title   : reset_alleles
+ Usage   : $marker->reset_alleles();
+ Function: Reset the alleles for a marker
+ Returns : None
+ Args    : None
+
+
+=cut
+
+sub reset_alleles{
+   my ($self) = @_;
+   $self->{'_allele_freqs'} = {};
+}
+
+=head2 validate_Frequencies
+
+ Title   : validate_Frequencies
+ Usage   : if( $sim->validate_Frequencies) {}
+ Function: Sanity checker that allele frequencies sum to 1 or less
+ Returns : boolean
+ Args    : -strict => 1 boolean if you want to insure that sum of freqs is 1
+
+
+=cut
+
+sub validate_Frequencies{
+   my ($self, at args) = @_;
+   my ($strict) = $self->_rearrange([qw(STRICT)], @args);
+   my $sum = 0;
+   my %freq = $self->get_Allele_Frequencies;
+   foreach my $f ( values %freq ) { 
+       $sum += $f;
+   }
+   return ($strict) ? $sum == 1 : $sum <= 1;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Statistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Statistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Statistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1248 @@
+# $Id: Statistics.pm,v 1.34.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Statistics
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Statistics - Population Genetics statistical tests  
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Statistics;
+  use Bio::AlignIO;
+  use Bio::PopGen::IO;
+  use Bio::PopGen::Simulation::Coalescent;
+
+  my $sim = new Bio::PopGen::Simulation::Coalescent( -sample_size => 12);
+
+  my $tree = $sim->next_tree;
+
+  $sim->add_Mutations($tree,20);
+
+  my $stats = new Bio::PopGen::Statistics();
+  my $individuals = [ $tree->get_leaf_nodes];
+  my $pi = $stats->pi($individuals);
+  my $D  = $stats->tajima_D($individuals);
+
+  # Alternatively to do this on input data from
+  # See the tests in t/PopGen.t for more examples
+  my $parser = new Bio::PopGen::IO(-format => 'prettybase',
+                                   -file   => 't/data/popstats.prettybase');
+  my $pop = $parser->next_population;
+  # Note that you can also call the stats as a class method if you like
+  # the only reason to instantiate it (as above) is if you want
+  # to set the verbosity for debugging
+  $pi     = Bio::PopGen::Statistics->pi($pop);
+  $theta  = Bio::PopGen::Statistics->theta($pop);
+
+  # Pi and Theta also take additional arguments,
+  # see the documentation for more information
+
+  use Bio::PopGen::Utilities;
+  use Bio::AlignIO;
+
+  my $in = new Bio::AlignIO(-file   => 't/data/t7.aln',
+                            -format => 'clustalw');
+  my $aln = $in->next_aln;
+  # get a population, each sequence is an individual and 
+  # for the default case, every site which is not monomorphic
+  # is a 'marker'.  Each individual will have a 'genotype' for the
+  # site which will be the specific base in the alignment at that
+  # site
+
+  my $pop = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln);
+
+
+=head1 DESCRIPTION
+
+This object is intended to provide implementations some standard
+population genetics statistics about alleles in populations.
+
+This module was previously named Bio::Tree::Statistics.
+
+This object is a place to accumulate routines for calculating various
+statistics from the coalescent simulation, marker/allele, or from
+aligned sequence data given that you can calculate alleles, number of
+segregating sites.
+
+Currently implemented:
+ Fu and Li's D    (fu_and_li_D)
+ Fu and Li's D*   (fu_and_li_D_star)
+ Fu and Li's F    (fu_and_li_F)
+ Fu and Li's F*   (fu_and_li_F_star)
+ Tajima's D       (tajima_D)
+ Watterson's theta (theta)
+ pi               (pi) - number of pairwise differences
+ composite_LD     (composite_LD)
+
+Count based methods also exist in case you have already calculated the key statistics (seg sites, num individuals, etc) and just want to compute the statistic.
+
+In all cases where a the method expects an arrayref of
+L<Bio::PopGen::IndividualI> objects and L<Bio::PopGen::PopulationI>
+object will also work.
+
+=head2 REFERENCES
+
+Fu Y.X and Li W.H. (1993) "Statistical Tests of Neutrality of
+Mutations." Genetics 133:693-709.
+
+Fu Y.X. (1996) "New Statistical Tests of Neutrality for DNA samples
+from a Population." Genetics 143:557-570.
+
+Tajima F. (1989) "Statistical method for testing the neutral mutation
+hypothesis by DNA polymorphism." Genetics 123:585-595.
+
+=head2 CITING THIS WORK
+
+Please see this reference for use of this implementation.
+
+Stajich JE and Hahn MW "Disentangling the Effects of Demography and Selection in Human History." (2005) Mol Biol Evol 22(1):63-73. 
+
+If you use these Bio::PopGen modules please cite the Bioperl
+publication (see FAQ) and the above reference.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Matthew Hahn
+
+Email jason-at-bioperl-dot-org
+Email matthew-dot-hahn-at-duke-dot-edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Statistics;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::PopGen::Statistics();
+ Function: Builds a new Bio::PopGen::Statistics object 
+ Returns : an instance of Bio::PopGen::Statistics
+ Args    : none
+
+
+=cut
+
+
+=head2 fu_and_li_D
+
+ Title   : fu_and_li_D
+ Usage   : my $D = $statistics->fu_and_li_D(\@ingroup,$extmutations);
+ Function: Fu and Li D statistic for a list of individuals
+           given an outgroup and the number of external mutations
+           (either provided or calculated from list of outgroup individuals)
+ Returns : decimal
+ Args    : $individuals - array reference which contains ingroup individuals 
+           (L<Bio::PopGen::Individual> or derived classes)
+           $extmutations - number of external mutations OR
+           arrayref of outgroup individuals
+
+=cut
+
+sub fu_and_li_D { 
+    my ($self,$ingroup,$outgroup) = @_;
+
+    my ($seg_sites,$n,$ancestral,$derived) = (0,0,0,0);
+    if( ref($ingroup) =~ /ARRAY/i ) {
+	$n = scalar @$ingroup;
+	# pi - all pairwise differences 
+	$seg_sites   = $self->segregating_sites_count($ingroup);
+    } elsif( ref($ingroup) && 
+	     $ingroup->isa('Bio::PopGen::PopulationI')) {
+	$n = $ingroup->get_number_individuals;
+	$seg_sites   = $self->segregating_sites_count($ingroup);
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI OR a Bio::PopGen::PopulationI object to fu_and_li_D");
+	return 0;
+    }
+    
+    if( $seg_sites <= 0 ) { 
+	$self->warn("mutation total was not > 0, cannot calculate a Fu and Li D");
+	return 0;
+    }
+
+    if( ! defined $outgroup ) {
+	$self->warn("Need to provide either an array ref to the outgroup individuals or the number of external mutations");
+	return 0;
+    } elsif( ref($outgroup) ) {
+	($ancestral,$derived) = $self->derived_mutations($ingroup,$outgroup);
+	$ancestral = 0 unless defined $ancestral;
+    } else { 
+	$ancestral = $outgroup;
+    }
+   
+    return $self->fu_and_li_D_counts($n,$seg_sites,
+				     $ancestral,$derived);
+}
+
+=head2 fu_and_li_D_counts
+
+ Title   : fu_li_D_counts
+ Usage   : my $D = $statistics->fu_and_li_D_counts($samps,$sites,
+                                                   $external);
+ Function: Fu and Li D statistic for the raw counts of the number
+           of samples, sites, external and internal mutations
+ Returns : decimal number
+ Args    : number of samples (N)
+           number of segregating sites (n)
+           number of external mutations (n_e)
+
+=cut
+
+
+sub fu_and_li_D_counts {
+    my ($self,$n,$seg_sites, $external_mut) = @_;
+    my $a_n = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+	$a_n += ( 1 / $k );
+    }
+    my $b = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+        $b += ( 1 / $k**2 );
+    }
+
+    my $c = 2 * ( ( ( $n * $a_n ) - (2 * ( $n -1 ))) /
+                  ( ( $n - 1) * ( $n - 2 ) ) );
+
+    my $v = 1 + ( ( $a_n**2 / ( $b + $a_n**2 ) ) * 
+		  ( $c - ( ( $n + 1) /
+			   ( $n - 1) ) ));
+    
+    my $u = $a_n - 1 - $v;
+
+    ($seg_sites - $a_n * $external_mut) / 
+	sqrt( ($u * $seg_sites) + ($v * $seg_sites*$seg_sites));
+    
+}
+
+
+=head2 fu_and_li_D_star
+
+ Title   : fu_and_li_D_star
+ Usage   : my $D = $statistics->fu_an_li_D_star(\@individuals);
+ Function: Fu and Li's D* statistic for a set of samples
+            Without an outgroup
+ Returns : decimal number
+ Args    : array ref of L<Bio::PopGen::IndividualI> objects
+           OR
+           L<Bio::PopGen::PopulationI> object
+
+=cut
+
+#'
+# fu_and_li_D*
+
+sub fu_and_li_D_star {
+    my ($self,$individuals) = @_;
+
+    my ($seg_sites,$n,$singletons);
+    if( ref($individuals) =~ /ARRAY/i ) {
+	$n = scalar @$individuals;
+	$seg_sites   = $self->segregating_sites_count($individuals);
+	$singletons  = $self->singleton_count($individuals);
+    } elsif( ref($individuals) && 
+	     $individuals->isa('Bio::PopGen::PopulationI')) {
+	my $pop = $individuals;
+	$n = $pop->get_number_individuals;
+	$seg_sites   = $self->segregating_sites_count($pop);
+	$singletons  = $self->singleton_count($pop);
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI OR a Bio::PopGen::PopulationI object to tajima_D");
+	return 0;
+    }
+
+    return $self->fu_and_li_D_star_counts($n,$seg_sites, $singletons);
+}
+
+=head2 fu_and_li_D_star_counts
+
+ Title   : fu_li_D_star_counts
+ Usage   : my $D = $statistics->fu_and_li_D_star_counts($samps,$sites,
+                                                        $singletons);
+
+ Function: Fu and Li D statistic for the raw counts of the number
+           of samples, sites, external and internal mutations
+ Returns : decimal number
+ Args    : number of samples (N)
+           number of segregating sites (n)
+           singletons (n_s)
+
+=cut
+
+
+sub fu_and_li_D_star_counts {
+    my ($self,$n,$seg_sites, $singletons) = @_;
+    my $a_n;
+    for(my $k = 1; $k < $n; $k++ ) {
+	$a_n += ( 1 / $k );
+    }
+
+    my $a1 = $a_n + 1 / $n;
+
+    my $b = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+        $b += ( 1 / $k**2 );
+    }
+
+    my $c = 2 * ( ( ( $n * $a_n ) - (2 * ( $n -1 ))) /
+                  ( ( $n - 1) * ( $n - 2 ) ) );
+
+    my $d = $c + ($n -2) / ($n - 1)**2 +
+	2 / ($n -1) * 
+	( 1.5 - ( (2*$a1 - 3) / ($n -2) ) - 
+	  1 / $n ); 
+    
+    my $v_star = ( ( ($n/($n-1) )**2)*$b + (($a_n**2)*$d) -
+		 (2*( ($n*$a_n*($a_n+1)) )/(($n-1)**2)) )  /
+		   (($a_n**2) + $b);
+
+    my $u_star = ( ($n/($n-1))*
+		   ($a_n - ($n/
+			  ($n-1)))) - $v_star;
+
+
+    return (($n / ($n - 1)) * $seg_sites - 
+	    $a_n * $singletons) / 
+	    sqrt( ($u_star * $seg_sites) + ($v_star * $seg_sites*$seg_sites));
+}
+
+
+=head2 fu_and_li_F
+
+ Title   : fu_and_li_F
+ Usage   : my $F = Bio::PopGen::Statistics->fu_and_li_F(\@ingroup,$ext_muts);
+ Function: Calculate Fu and Li's F on an ingroup with either the set of 
+           outgroup individuals, or the number of external mutations
+ Returns : decimal number
+ Args    : array ref of L<Bio::PopGen::IndividualI> objects for the ingroup
+           OR a L<Bio::PopGen::PopulationI> object
+           number of external mutations OR list of individuals for the outgroup
+
+=cut
+
+#'
+
+sub fu_and_li_F {
+    my ($self,$ingroup,$outgroup) = @_;
+    my ($seg_sites,$pi,$n,$external,$internal);
+    if( ref($ingroup) =~ /ARRAY/i ) {
+	$n = scalar @$ingroup;
+	# pi - all pairwise differences 
+	$pi          = $self->pi($ingroup);  
+	$seg_sites   = $self->segregating_sites_count($ingroup);
+    } elsif( ref($ingroup) && 
+	     $ingroup->isa('Bio::PopGen::PopulationI')) {
+	$n = $ingroup->get_number_individuals;
+	$pi          = $self->pi($ingroup);
+	$seg_sites   = $self->segregating_sites_count($ingroup);
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI OR a Bio::PopGen::PopulationI object to Fu and Li's F");
+	return 0;
+    }
+    
+    if( ! defined $outgroup ) {
+	$self->warn("Need to provide either an array ref to the outgroup individuals or the number of external mutations");
+	return 0;
+    } elsif( ref($outgroup) ) {
+	($external,$internal) = $self->derived_mutations($ingroup,$outgroup);
+    } else { 
+	$external = $outgroup;
+    }
+    $self->fu_and_li_F_counts($n,$pi,$seg_sites,$external);
+}
+
+=head2 fu_and_li_F_counts
+
+ Title   : fu_li_F_counts
+ Usage   : my $F = $statistics->fu_and_li_F_counts($samps,$pi,
+                                                   $sites,
+                                                   $external);
+ Function: Fu and Li F statistic for the raw counts of the number
+           of samples, sites, external and internal mutations
+ Returns : decimal number
+ Args    : number of samples (N)
+           average pairwise differences (pi)
+           number of segregating sites (n)
+           external mutations (n_e)
+
+=cut
+
+
+sub fu_and_li_F_counts {
+    my ($self,$n,$pi,$seg_sites, $external) = @_;
+    my $a_n = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+	$a_n += ( 1 / $k );
+    }
+
+    my $a1 = $a_n + (1 / $n );
+
+    my $b = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+	$b += ( 1 / $k**2 );
+    }
+
+    my $c = 2 * ( ( ( $n * $a_n ) - (2 * ( $n -1 ))) / 
+		  ( ( $n - 1) * ( $n - 2 ) ) );
+
+    my $v_F = ( $c + ( (2*(($n**2)+$n+3)) / 
+		       ( (9*$n)*($n-1) ) ) -
+		(2/($n-1)) ) / ( ($a_n**2)+$b );
+
+    my $u_F = ( 1 + ( ($n+1)/(3*($n-1)) )-
+		( 4*( ($n+1)/(($n-1)**2) ))*
+		($a1 - ((2*$n)/($n+1))) ) /
+		$a_n - $v_F;
+
+    # warn("$v_F vf $u_F uf n = $n\n");
+    my $F = ($pi - $external) / ( sqrt( ($u_F*$seg_sites) +
+					($v_F*($seg_sites**2)) ) );
+
+    return $F;
+}
+
+=head2 fu_and_li_F_star
+
+ Title   : fu_and_li_F_star
+ Usage   : my $F = Bio::PopGen::Statistics->fu_and_li_F_star(\@ingroup);
+ Function: Calculate Fu and Li's F* on an ingroup without an outgroup
+           It uses count of singleton alleles instead 
+ Returns : decimal number
+ Args    : array ref of L<Bio::PopGen::IndividualI> objects for the ingroup
+           OR
+           L<Bio::PopGen::PopulationI> object
+
+=cut
+
+#' keep my emacs happy
+
+sub fu_and_li_F_star {
+    my ($self,$individuals) = @_;
+
+    my ($seg_sites,$pi,$n,$singletons);
+    if( ref($individuals) =~ /ARRAY/i ) {
+	$n = scalar @$individuals;
+	# pi - all pairwise differences 
+	$pi          = $self->pi($individuals);  
+	$seg_sites   = $self->segregating_sites_count($individuals);
+	$singletons  = $self->singleton_count($individuals);
+    } elsif( ref($individuals) && 
+	     $individuals->isa('Bio::PopGen::PopulationI')) {
+	my $pop = $individuals;
+	$n = $pop->get_number_individuals;
+	$pi          = $self->pi($pop);
+	$seg_sites   = $self->segregating_sites_count($pop);
+	$singletons  = $self->singleton_count($pop);
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI OR a Bio::PopGen::PopulationI object to fu_and_li_F_star");
+	return 0;
+    }
+    return $self->fu_and_li_F_star_counts($n,
+					  $pi,
+					  $seg_sites,
+					  $singletons);
+} 
+
+=head2 fu_and_li_F_star_counts
+
+ Title   : fu_li_F_star_counts
+ Usage   : my $F = $statistics->fu_and_li_F_star_counts($samps,
+                                                   $pi,$sites,
+                                                   $singletons);
+ Function: Fu and Li F statistic for the raw counts of the number
+           of samples, sites, external and internal mutations
+ Returns : decimal number
+ Args    : number of samples (N)
+           average pairwise differences (pi)
+           number of segregating sites (n)
+           singleton  mutations (n_s)
+
+=cut
+
+
+sub fu_and_li_F_star_counts {
+    my ($self,$n,$pi,$seg_sites, $singletons) = @_;
+    if( $n <= 1 ) {
+	$self->warn("N must be > 1\n");
+	return;
+    }
+    if( $n == 2) { 
+	return 0;
+    } 
+
+    my $a_n = 0;
+    
+
+    my $b = 0;
+    for(my $k= 1; $k < $n; $k++ ) {
+	$b += (1 / ($k**2));
+	$a_n += ( 1 / $k );     # Eq (2)
+    }
+    my $a1 = $a_n + (1 / $n );
+
+    # warn("a_n is $a_n a1 is $a1 n is $n b is $b\n");
+
+    # From Simonsen et al (1995) instead of Fu and Li 1993
+    my $v_F_star = ( (( 2 * $n ** 3 + 110 * $n**2 - (255 * $n) + 153)/
+		      (9 * ($n ** 2) * ( $n - 1))) +
+		     ((2 * ($n - 1) * $a_n ) / $n ** 2) -
+		     (8 * $b / $n) ) / 
+		     ( ($a_n ** 2) + $b );
+    
+    my $u_F_star = ((( (4* ($n**2)) + (19 * $n) + 3 - (12 * ($n + 1)* $a1)) /
+		    (3 * $n * ( $n - 1))) / $a_n) - $v_F_star;
+
+    # warn("vf* = $v_F_star uf* = $u_F_star n = $n\n");
+    my $F_star = ( $pi - ($singletons*( ( $n-1) / $n)) ) /
+	sqrt ( $u_F_star*$seg_sites + $v_F_star*$seg_sites**2);
+    return $F_star;
+}
+
+=head2 tajima_D
+
+ Title   : tajima_D
+ Usage   : my $D = Bio::PopGen::Statistics->tajima_D(\@samples);
+ Function: Calculate Tajima's D on a set of samples 
+ Returns : decimal number
+ Args    : array ref of L<Bio::PopGen::IndividualI> objects
+           OR 
+           L<Bio::PopGen::PopulationI> object
+
+
+=cut
+
+#'
+
+sub tajima_D {
+    my ($self,$individuals) = @_;
+    my ($seg_sites,$pi,$n);
+
+    if( ref($individuals) =~ /ARRAY/i ) {
+	$n = scalar @$individuals;
+	# pi - all pairwise differences 
+	$pi          = $self->pi($individuals);  
+	$seg_sites = $self->segregating_sites_count($individuals);
+
+    } elsif( ref($individuals) && 
+	     $individuals->isa('Bio::PopGen::PopulationI')) {
+	my $pop = $individuals;
+	$n = $pop->get_number_individuals;
+	$pi          = $self->pi($pop);
+	$seg_sites = $self->segregating_sites_count($pop);
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI OR a Bio::PopGen::PopulationI object to tajima_D");
+	return 0;
+    }
+    $self->tajima_D_counts($n,$seg_sites,$pi);
+}
+
+=head2 tajima_D_counts
+
+ Title   : tajima_D_counts
+ Usage   : my $D = $statistics->tajima_D_counts($samps,$sites,$pi);
+ Function: Tajima's D statistic for the raw counts of the number
+           of samples, sites, and avg pairwise distances (pi)
+ Returns : decimal number
+ Args    : number of samples (N)
+           number of segregating sites (n)
+           average pairwise differences (pi)
+
+=cut
+
+#'
+
+sub tajima_D_counts {
+    my ($self,$n,$seg_sites,$pi) = @_;
+    my $a1 = 0; 
+    for(my $k= 1; $k < $n; $k++ ) {
+	$a1 += ( 1 / $k );
+    }
+
+     my $a2 = 0;
+     for(my $k= 1; $k < $n; $k++ ) {
+	 $a2 += ( 1 / $k**2 );
+     }
+    
+    my $b1 = ( $n + 1 ) / ( 3* ( $n - 1) );
+    my $b2 = ( 2 * ( $n ** 2 + $n + 3) ) / 
+	     ( ( 9 * $n) * ( $n - 1) );
+    my $c1 = $b1 - ( 1 / $a1 );
+    my $c2 = $b2 - ( ( $n + 2 ) /
+		     ( $a1 * $n))+( $a2 / $a1 ** 2);
+    my $e1 = $c1 / $a1;
+    my $e2 = $c2 / ( $a1**2 + $a2 );
+    
+    my $D = ( $pi - ( $seg_sites / $a1 ) ) / 
+	sqrt ( ($e1 * $seg_sites) + (( $e2 * $seg_sites) * ( $seg_sites - 1)));
+
+    return $D;
+}
+
+
+=head2 pi
+
+ Title   : pi
+ Usage   : my $pi = Bio::PopGen::Statistics->pi(\@inds)
+ Function: Calculate pi (average number of pairwise differences) given
+           a list of individuals which have the same number of markers
+           (also called sites) as available from the get_Genotypes()
+           call in L<Bio::PopGen::IndividualI>
+ Returns : decimal number
+ Args    : Arg1= array ref of L<Bio::PopGen::IndividualI> objects
+             which have markers/mutations.  We expect all individuals to
+             have a marker - we will deal with missing data as a special case.
+           OR
+           Arg1= L<Bio::PopGen::PopulationI> object.  In the event that
+                 only allele frequency data is available, storing it in
+                 Population object will make this available.
+           num sites [optional], an optional second argument (integer)
+             which is the number of sites, then pi returned is pi/site.
+
+=cut
+
+sub pi {
+    my ($self,$individuals,$numsites) = @_;
+    my (%data, at marker_names,$n);
+
+    if( ref($individuals) =~ /ARRAY/i ) {
+	# one possible argument is an arrayref of Bio::PopGen::IndividualI objs
+	@marker_names = $individuals->[0]->get_marker_names;
+	$n = scalar @$individuals;
+
+	# Here we are calculating the allele frequencies
+	my %marker_total;
+	foreach my $ind ( @$individuals ) {
+	    if( ! $ind->isa('Bio::PopGen::IndividualI') ) {
+		$self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects, this is a ".ref($ind)."\n");
+		return 0;
+	    }
+	    foreach my $m ( @marker_names ) {
+		foreach my $allele (map { $_->get_Alleles} 
+			       $ind->get_Genotypes($m) ) {
+		    $data{$m}->{$allele}++;
+		    $marker_total{$m}++;
+		}
+	    }
+	}
+	while( my ($marker,$count) =  each %marker_total ) {
+	    foreach my $c ( values %{$data{$marker}} ) {
+		$c /= $count;
+	    }
+	}
+	# %data will contain allele frequencies for each marker, allele
+    } elsif( ref($individuals) && 
+	     $individuals->isa('Bio::PopGen::PopulationI') ) {
+	my $pop = $individuals;
+	$n = $pop->get_number_individuals;
+	foreach my $marker( $pop->get_Markers ) {
+	    push @marker_names, $marker->name;
+	    $data{$marker->name} = {$marker->get_Allele_Frequencies};
+	}
+    } else { 
+	$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI to pi");
+    }
+    # doing all pairwise combinations
+
+    # For now we assume that all individuals have the same markers
+    my ($diffcount,$totalcompare) = (0,0);
+    my $pi = 0;
+    foreach my $markerdat ( values %data ) {
+	my $totalalleles; # this will only be different among markers
+	                  # when there is missing data
+	my @alleles = keys %$markerdat;
+	foreach my $al ( @alleles ) { $totalalleles += $markerdat->{$al} }
+	for( my $i =0; $i < scalar @alleles -1; $i++ ) {
+	    my ($a1,$a2) = ( $alleles[$i], $alleles[$i+1]);
+	    $pi += $self->heterozygosity($n, 
+					 $markerdat->{$a1} / $totalalleles,
+					 $markerdat->{$a2} / $totalalleles);
+	}
+    }
+    $self->debug( "pi=$pi\n");
+    if( $numsites ) { 
+	return $pi / $numsites;
+    } else { 
+	return $pi;
+    }
+}
+
+
+=head2 theta
+
+ Title   : theta
+ Usage   : my $theta = Bio::PopGen::Statistics->theta($sampsize,$segsites);
+ Function: Calculates Watterson's theta from the sample size 
+           and the number of segregating sites.
+           Providing the third parameter, total number of sites will
+           return theta per site.
+           This is also known as K-hat = K / a_n   
+ Returns : decimal number 
+ Args    : sample size (integer),
+           num segregating sites (integer)
+           total sites (integer) [optional] (to calculate theta per site)
+           OR
+           provide an arrayref of the L<Bio::PopGen::IndividualI> objects
+           total sites (integer) [optional] (to calculate theta per site)
+           OR
+           provide an L<Bio::PopGen::PopulationI> object
+           total sites (integer)[optional]
+
+=cut
+
+#'
+
+sub theta {
+    my $self = shift;    
+    my ( $n, $seg_sites,$totalsites) = @_;
+    if( ref($n) =~ /ARRAY/i ) {
+	my $samps = $n;
+	$totalsites = $seg_sites; # only 2 arguments if one is an array
+	my %data;
+	my @marker_names = $samps->[0]->get_marker_names;
+	# we need to calculate number of polymorphic sites
+	$seg_sites = $self->segregating_sites_count($samps);
+	$n = scalar @$samps;
+
+    } elsif(ref($n) &&
+	    $n->isa('Bio::PopGen::PopulationI') ) {
+	# This will handle the case when we pass in a PopulationI object
+	my $pop = $n;
+	$totalsites = $seg_sites; # shift the arguments over by one
+	$n = $pop->haploid_population->get_number_individuals;
+	$seg_sites = $self->segregating_sites_count($pop);
+    }
+    my $a1 = 0; 
+    for(my $k= 1; $k < $n; $k++ ) {
+	$a1 += ( 1 / $k );
+    }    
+    if( $totalsites ) { # 0 and undef are the same can't divide by them
+	$seg_sites /= $totalsites;
+    }
+    return $seg_sites / $a1;
+}
+
+=head2 singleton_count
+
+ Title   : singleton_count
+ Usage   : my ($singletons) = Bio::PopGen::Statistics->singleton_count(\@inds)
+ Function: Calculate the number of mutations/alleles which only occur once in
+           a list of individuals for all sites/markers
+ Returns : (integer) number of alleles which only occur once (integer)
+ Args    : arrayref of L<Bio::PopGen::IndividualI> objects
+           OR
+           L<Bio::PopGen::PopulationI> object
+
+=cut
+
+sub singleton_count {
+    my ($self,$individuals) = @_;
+
+    my @inds;
+    if( ref($individuals) =~ /ARRAY/ ) {
+	@inds = @$individuals;
+    } elsif( ref($individuals) && 
+	     $individuals->isa('Bio::PopGen::PopulationI') ) {
+	my $pop = $individuals;
+	@inds = $pop->get_Individuals();
+	unless( @inds ) { 
+	    $self->warn("Need to provide a population which has individuals loaded, not just a population with allele frequencies");
+	    return 0;
+	}
+    } else {
+	$self->warn("Expected either a PopulationI object or an arrayref of IndividualI objects");
+	return 0;
+    }
+    # find number of sites where a particular allele is only seen once
+
+    my ($singleton_allele_ct,%sites) = (0);
+    # first collect all the alleles into a hash structure
+    
+    foreach my $n ( @inds ) {
+	if( ! $n->isa('Bio::PopGen::IndividualI') ) {
+	    $self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects, this is a ".ref($n)."\n");
+	    return 0;
+	}
+	foreach my $g ( $n->get_Genotypes ) {
+	    my ($nm, at alleles) = ($g->marker_name, $g->get_Alleles);
+	    foreach my $allele (@alleles ) {
+		$sites{$nm}->{$allele}++;
+	    }
+	}
+    }
+    foreach my $site ( values %sites ) { # don't really care what the name is
+	foreach my $allelect ( values %$site ) { # 
+            # find the sites which have an allele with only 1 copy
+ 	    $singleton_allele_ct++ if( $allelect == 1 );
+	}
+    }
+    return $singleton_allele_ct;
+}
+
+# Yes I know that singleton_count and segregating_sites_count are
+# basically processing the same data so calling them both is
+# redundant, something I want to fix later but want to make things
+# correct and simple first
+
+=head2 segregating_sites_count
+
+ Title   : segregating_sites_count
+ Usage   : my $segsites = Bio::PopGen::Statistics->segregating_sites_count
+ Function: Gets the number of segregating sites (number of polymorphic sites)
+ Returns : (integer) number of segregating sites
+ Args    : arrayref of L<Bio::PopGen::IndividualI> objects 
+           OR
+           L<Bio::PopGen::PopulationI> object
+
+=cut
+
+# perhaps we'll change this in the future 
+# to return the actual segregating sites
+# so one can use this to pull in the names of those sites.
+# Would be trivial if it is useful.
+
+sub segregating_sites_count{
+   my ($self,$individuals) = @_;
+   my $type = ref($individuals);
+   my $seg_sites = 0;
+   if( $type =~ /ARRAY/i ) {
+       my %sites;
+       foreach my $n ( @$individuals ) {
+	   if( ! $n->isa('Bio::PopGen::IndividualI') ) {
+	       $self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects, this is a ".ref($n)."\n");
+	       return 0;
+	   }
+	   foreach my $g ( $n->get_Genotypes ) {
+	       my ($nm, at alleles) = ($g->marker_name, $g->get_Alleles);
+	       foreach my $allele (@alleles ) {
+		   $sites{$nm}->{$allele}++;
+	       }
+	   }
+       }
+       foreach my $site ( values %sites ) { # use values b/c we don't 
+	                                    # really care what the name is
+	   # find the sites which >1 allele
+	   $seg_sites++ if( keys %$site > 1 );
+       }
+   } elsif( $type && $individuals->isa('Bio::PopGen::PopulationI') ) {
+       foreach my $marker ( $individuals->haploid_population->get_Markers ) {  
+	   my @alleles = $marker->get_Alleles;	    
+	   $seg_sites++ if ( scalar @alleles > 1 );
+       }
+   } else { 
+       $self->warn("segregating_sites_count expects either a PopulationI object or a list of IndividualI objects");
+       return 0;
+   } 
+   return $seg_sites;
+}
+
+
+=head2 heterozygosity
+
+ Title   : heterozygosity
+ Usage   : my $het = Bio::PopGen::Statistics->heterozygosity($sampsize,$freq1);
+ Function: Calculate the heterozgosity for a sample set for a set of alleles
+ Returns : decimal number
+ Args    : sample size (integer)
+           frequency of one allele (fraction - must be less than 1)
+           [optional] frequency of another allele - this is only needed
+                      in a non-binary allele system
+
+Note     : p^2 + 2pq + q^2
+
+=cut
+
+
+sub heterozygosity {
+    my ($self,$samp_size, $freq1,$freq2) = @_;
+    if( ! $freq2 ) { $freq2 = 1 - $freq1 }
+    if( $freq1 > 1 || $freq2 > 1 ) { 
+	$self->warn("heterozygosity expects frequencies to be less than 1");
+    }
+    my $sum = ($freq1**2) + (($freq2)**2);
+    my $h = ( $samp_size*(1- $sum) ) / ($samp_size - 1) ;
+    return $h;
+}
+
+
+=head2 derived_mutations
+
+ Title   : derived_mutations
+ Usage   : my $ext = Bio::PopGen::Statistics->derived_mutations($ingroup,$outgroup);
+ Function: Calculate the number of alleles or (mutations) which are ancestral
+           and the number which are derived (occurred only on the tips)
+ Returns : array of 2 items - number of external and internal derived 
+           mutation
+ Args    : ingroup - L<Bio::PopGen::IndividualI>s arrayref OR 
+                     L<Bio::PopGen::PopulationI>
+           outgroup- L<Bio::PopGen::IndividualI>s arrayref OR 
+                     L<Bio::PopGen::PopulationI> OR
+                     a single L<Bio::PopGen::IndividualI>
+
+=cut
+
+sub derived_mutations{
+   my ($self,$ingroup,$outgroup) = @_;
+   my (%indata,%outdata, at marker_names);
+
+   # basically we have to do some type checking
+   # if that perl were typed...
+   my ($itype,$otype) = (ref($ingroup),ref($outgroup));
+
+   return $outgroup unless( $otype ); # we expect arrayrefs or objects, nums
+                                      # are already the value we 
+                                      # are searching for
+   # pick apart the ingroup
+   # get the data
+   if( ref($ingroup) =~ /ARRAY/i ) {
+       if( ! ref($ingroup->[0]) ||
+	   ! $ingroup->[0]->isa('Bio::PopGen::IndividualI') ) {
+	   $self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects or a Population for ingroup in external_mutations");
+	   return 0;
+       }
+       # we assume that all individuals have the same markers 
+       # i.e. that they are aligned
+       @marker_names = $ingroup->[0]->get_marker_names;
+       for my $ind ( @$ingroup ) {
+	   for my $m ( @marker_names ) {
+	       for my $allele ( map { $_->get_Alleles }
+				    $ind->get_Genotypes($m) ) {
+		   $indata{$m}->{$allele}++;
+	       }
+	   }
+       }	   
+   } elsif( ref($ingroup) && $ingroup->isa('Bio::PopGen::PopulationI') ) {
+       @marker_names = $ingroup->get_marker_names;
+       for my $ind ( $ingroup->haploid_population->get_Individuals() ) {
+	   for my $m ( @marker_names ) {
+	       for my $allele ( map { $_->get_Alleles} 
+				    $ind->get_Genotypes($m) ) {
+		   $indata{$m}->{$allele}++;
+	       }
+	   }
+       }
+   } else { 
+       $self->warn("Need an arrayref of Bio::PopGen::IndividualI objs or a Bio::PopGen::Population for ingroup in external_mutations");
+       return 0;
+   }
+    
+   if( $otype =~ /ARRAY/i ) {
+       if( ! ref($outgroup->[0]) ||
+	   ! $outgroup->[0]->isa('Bio::PopGen::IndividualI') ) {
+	   $self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects or a Population for outgroup in external_mutations");
+	   return 0;
+       }
+       for my $ind ( @$outgroup ) {
+	   for my $m ( @marker_names ) {
+	       for my $allele ( map { $_->get_Alleles }
+				$ind->get_Genotypes($m) ) {
+		   $outdata{$m}->{$allele}++;
+	       }
+	   }
+       }
+   
+   } elsif( $otype->isa('Bio::PopGen::PopulationI') ) {
+       for my $ind ( $outgroup->haploid_population->get_Individuals() ) {
+	   for my $m ( @marker_names ) {
+	       for my $allele ( map { $_->get_Alleles} 
+				    $ind->get_Genotypes($m) ) {
+		   $outdata{$m}->{$allele}++;
+	       }
+	   }
+       }
+   } elsif( $otype->isa('Bio::PopGen::PopulationI') ) { 
+       $self->warn("Need an arrayref of Bio::PopGen::IndividualI objs or a Bio::PopGen::Population for outgroup in external_mutations");
+       return 0;
+   }
+   
+   # derived mutations are defined as 
+   # 
+   # ingroup  (G A T)
+   # outgroup (A)
+   # derived mutations are G and T, A is the external mutation
+   
+   # ingroup  (A T)
+   # outgroup (C)
+   # derived mutations A,T no external/ancestral mutations
+   
+   # ingroup  (G A T)
+   # outgroup (A T)
+   # cannot determine
+  
+   my ($internal,$external);
+   foreach my $marker ( @marker_names ) {
+       my @outalleles = keys %{$outdata{$marker}};
+       my @in_alleles = keys %{$indata{$marker}};
+       next if( @outalleles > 1 || @in_alleles == 1);
+       for my $allele ( @in_alleles ) {
+	   if( ! exists $outdata{$marker}->{$allele} ) { 
+	       if( $indata{$marker}->{$allele} == 1 ) { 
+		   $external++;
+	       } else { 
+		   $internal++;
+	       }
+	   }
+       }
+   }
+   return ($external, $internal);
+}
+
+
+=head2 composite_LD
+
+ Title   : composite_LD
+ Usage   : %matrix = Bio::PopGen::Statistics->composite_LD($population);
+ Function: Calculate the Linkage Disequilibrium 
+           This is for calculating LD for unphased data. 
+           Other methods will be appropriate for phased haplotype data.
+
+ Returns : Hash of Hashes - first key is site 1,second key is site 2
+           and value is LD for those two sites.
+           my $LDarrayref = $matrix{$site1}->{$site2};
+           my ($ldval, $chisquared) = @$LDarrayref;
+ Args    : L<Bio::PopGen::PopulationI> or arrayref of 
+           L<Bio::PopGen::IndividualI>s 
+ Reference: Weir B.S. (1996) "Genetic Data Analysis II", 
+                      Sinauer, Sunderlanm MA.
+
+=cut
+
+sub composite_LD {
+    my ($self,$pop) = @_;
+    if( ref($pop) =~ /ARRAY/i ) {
+	if( ref($pop->[0]) && $pop->[0]->isa('Bio::PopGen::IndividualI') ) {
+	    $pop = new Bio::PopGen::Population(-individuals => @$pop);
+	} else { 
+	    $self->warn("composite_LD expects a Bio::PopGen::PopulationI or an arrayref of Bio::PopGen::IndividualI objects");
+	    return ();
+	}
+    } elsif( ! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
+	$self->warn("composite_LD expects a Bio::PopGen::PopulationI or an arrayref of Bio::PopGen::IndividualI objects");
+	return ();
+    }
+
+    my @marker_names = $pop->get_marker_names;
+    my @inds = $pop->get_Individuals;
+    my $num_inds = scalar @inds;
+    my (%lookup);
+    # calculate allele frequencies for each marker from the population
+    # use the built-in get_Marker to get the allele freqs
+    # we still need to calculate the genotype frequencies
+    foreach my $marker_name ( @marker_names ) {	
+	my(%allelef);
+
+	foreach my $ind ( @inds ) {
+	    my ($genotype) = $ind->get_Genotypes(-marker => $marker_name);
+	    if( ! defined $genotype ) { 
+		$self->warn("no genotype for marker $marker_name for individual ". $ind->unique_id. "\n");
+		next;
+	    }
+	    my @alleles  = sort $genotype->get_Alleles;
+	    next if( scalar @alleles != 2);
+	    my $genostr  = join(',', @alleles);
+            $allelef{$alleles[0]}++;
+            $allelef{$alleles[1]}++;
+	}
+
+	# we should check for cases where there > 2 alleles or
+	# only 1 allele and throw out those markers.
+	my @alleles      = sort keys %allelef;
+	my $allele_count = scalar @alleles;
+	# test if site is polymorphic
+	if( $allele_count != 2) { 
+	    # only really warn if we're seeing multi-allele
+	    $self->warn("Skipping $marker_name because it has $allele_count alleles (".join(',', at alleles)."), \ncomposite_LD will currently only work for biallelic markers") if $allele_count > 2;
+	    next;		# skip this marker
+	}
+
+	# Need to do something here to detect alleles which aren't 
+	# a single character
+	if( length($alleles[0]) != 1 ||
+	    length($alleles[1]) != 1 ) {
+	    $self->warn("An individual has an allele which is not a single base, this is currently not supported in composite_LD - consider recoding the allele as a single character");
+	    next;
+	}
+
+	# fix the call for allele 1 (A or B) and 
+	# allele 2 (a or b) in terms of how we'll do the 
+	# N square from Weir p.126
+	$self->debug( "$alleles[0] is 1, $alleles[1] is 2 for $marker_name\n");
+	$lookup{$marker_name}->{'1'} = $alleles[0];
+	$lookup{$marker_name}->{'2'} = $alleles[1];
+    }
+
+    @marker_names = sort keys %lookup;
+    my $site_count   = scalar @marker_names;
+    # where the final data will be stored
+    my %stats_for_sites;
+
+    # standard way of generating pairwise combos
+    # LD is done by comparing all the pairwise site (marker)
+    # combinations and keeping track of the genotype and 
+    # pairwise genotype (ie genotypes of the 2 sites) frequencies
+    for( my $i = 0; $i < $site_count - 1; $i++ ) {
+	my $site1 = $marker_names[$i];
+	my (%genotypes, %total_genotype_count,
+    	%total_pairwisegeno_count,%pairwise_genotypes);
+	for( my $j = $i+1; $j < $site_count ; $j++) { 
+	 
+	my (%genotypes, %total_genotype_count,
+	    %total_pairwisegeno_count,%pairwise_genotypes);
+	 
+	    my $site2 = $marker_names[$j];
+	    my (%allele_count,%allele_freqs) = (0,0);
+	    foreach my $ind ( @inds ) {
+		# build string of genotype at site 1
+		my ($genotype1) = $ind->get_Genotypes(-marker => $site1);
+		my @alleles1  = sort $genotype1->get_Alleles;
+
+                # if an individual has only one available allele
+		# (has a blank or N for one of the chromosomes)
+		# we don't want to use it in our calculation
+
+		next unless( scalar @alleles1 == 2);
+		my $genostr1  = join(',', @alleles1);
+
+		# build string of genotype at site 2
+		my ($genotype2) = $ind->get_Genotypes(-marker => $site2);
+		my @alleles2  = sort $genotype2->get_Alleles;
+		my $genostr2  = join(',', @alleles2);
+		
+		next unless( scalar @alleles2 == 2);
+		for (@alleles1) {
+		    $allele_count{$site1}++;
+		    $allele_freqs{$site1}->{$_}++;
+		}
+		$genotypes{$site1}->{$genostr1}++;
+		$total_genotype_count{$site1}++;
+
+		for (@alleles2) {
+		    $allele_count{$site2}++;
+		    $allele_freqs{$site2}->{$_}++;
+		}
+		$genotypes{$site2}->{$genostr2}++;
+		$total_genotype_count{$site2}++;
+
+		# We are using the $site1,$site2 to signify
+		# a unique key
+		$pairwise_genotypes{"$site1,$site2"}->{"$genostr1,$genostr2"}++;
+		# some individuals 
+		$total_pairwisegeno_count{"$site1,$site2"}++;
+	    }
+	    for my $site ( %allele_freqs ) {
+		for my $al ( keys %{ $allele_freqs{$site} } ) {
+		    $allele_freqs{$site}->{$al} /= $allele_count{$site};
+		}
+	    }
+	    my $n = $total_pairwisegeno_count{"$site1,$site2"};	# number of inds
+	    # 'A' and 'B' are two loci or in our case site1 and site2  
+	    my $allele1_site1 = $lookup{$site1}->{'1'};	# this is the BigA allele
+	    my $allele1_site2 = $lookup{$site2}->{'1'};	# this is the BigB allele
+	    my $allele2_site1 = $lookup{$site1}->{'2'};	# this is the LittleA allele
+	    my $allele2_site2 = $lookup{$site2}->{'2'};	# this is the LittleB allele
+	    # AABB
+	    my $N1genostr = join(",",( $allele1_site1, $allele1_site1,
+				       $allele1_site2, $allele1_site2));
+	    $self->debug(" [$site1,$site2](AABB) N1genostr=$N1genostr\n");
+	    # AABb
+	    my $N2genostr = join(",",( $allele1_site1, $allele1_site1,
+				       $allele1_site2, $allele2_site2));
+	    $self->debug(" [$site1,$site2](AABb) N2genostr=$N2genostr\n");
+	    # AaBB
+	    my $N4genostr = join(",",( $allele1_site1, $allele2_site1,
+				       $allele1_site2, $allele1_site2));
+	    $self->debug(" [$site1,$site2](AaBB) N4genostr=$N4genostr\n");
+	    # AaBb
+	    my $N5genostr = join(",",( $allele1_site1, $allele2_site1,
+				       $allele1_site2, $allele2_site2));
+	    $self->debug(" [$site1,$site2](AaBb) N5genostr=$N5genostr\n");
+	    # count of AABB in 
+	    my $n1 = $pairwise_genotypes{"$site1,$site2"}->{$N1genostr} || 0;
+	    # count of AABb in 
+	    my $n2 = $pairwise_genotypes{"$site1,$site2"}->{$N2genostr} || 0;
+	    # count of AaBB in 
+	    my $n4 = $pairwise_genotypes{"$site1,$site2"}->{$N4genostr} || 0;
+	    # count of AaBb in 
+	    my $n5 = $pairwise_genotypes{"$site1,$site2"}->{$N5genostr} || 0;
+
+	    my $homozA_site1 = join(",", ($allele1_site1,$allele1_site1));
+	    my $homozB_site2 = join(",", ($allele1_site2,$allele1_site2));
+	my $p_AA = ($genotypes{$site1}->{$homozA_site1} || 0) / $n;
+	    my $p_BB = ($genotypes{$site2}->{$homozB_site2} || 0) / $n;
+	    my $p_A  = $allele_freqs{$site1}->{$allele1_site1} || 0;	# an individual allele freq
+	    my $p_a  =  1 - $p_A;
+
+	    my $p_B  = $allele_freqs{$site2}->{$allele1_site2} || 0;	# an individual allele freq
+	    my $p_b  =  1 - $p_B;
+
+	    # variance of allele frequencies
+	    my $pi_A = $p_A * $p_a;
+	    my $pi_B = $p_B * $p_b;
+
+	    # hardy weinberg
+	    my $D_A  = $p_AA - $p_A**2;
+	    my $D_B  = $p_BB - $p_B**2;
+	    my $n_AB = 2*$n1 + $n2 + $n4 + 0.5 * $n5;
+	    $self->debug("n_AB=$n_AB -- n1=$n1, n2=$n2 n4=$n4 n5=$n5\n");
+
+	    my $delta_AB = (1 / $n ) * ( $n_AB ) - ( 2 * $p_A * $p_B );
+	    $self->debug("delta_AB=$delta_AB -- n=$n, n_AB=$n_AB p_A=$p_A, p_B=$p_B\n");
+	    $self->debug(sprintf(" (%d * %.4f) / ( %.2f + %.2f) * ( %.2f + %.2f) \n",
+				 $n,$delta_AB**2, $pi_A, $D_A, $pi_B, $D_B));
+	    
+	    my $chisquared;
+	    eval { $chisquared = ( $n * ($delta_AB**2) ) / 
+		       ( ( $pi_A + $D_A) * ( $pi_B + $D_B) );
+	       };
+	    if( $@ ) {
+		$self->debug("Skipping the site because the denom is 0.\nsite1=$site1, site2=$site2 : pi_A=$pi_A, pi_B=$pi_B D_A=$D_A, D_B=$D_B\n");
+		next;
+	    }
+	    # this will be an upper triangular matrix
+	    $stats_for_sites{$site1}->{$site2} = [$delta_AB,$chisquared];
+	}
+    }
+    return %stats_for_sites;
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/TagHaplotype.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/TagHaplotype.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/TagHaplotype.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,438 @@
+# module Bio::PopGen::TagHaplotype.pm
+#
+# Cared for by Pedro M. Gomez-Fabre <pgf18872-at-gsk-dot-com>
+#
+# Copyright Pedro M. Gomez-Fabre
+#
+# You may distribute this module under the same term as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::TagHaplotype.pm - Haplotype tag object.
+
+=head1 SYNOPSIS
+
+    use Bio::PopGen::TagHaplotype;
+
+    my $obj = Bio::PopGen::TagHaplotype -> new($hap);
+
+=head1 DESCRIPTION
+
+This module take as input a haplotype and try toe get the minimal set
+of SNP that define the haplotype. This module can be use alone.  But
+due to the tagging haplotype process is exponential one. My suggestion
+is that before to use this module you pass your data under Select.mp
+module also on this folder.  In any case if, you provide an haplotype
+the module will try to find the answer to your question.
+
+=head1 CONSTRUCTORS
+
+    my $obj = Bio::PopGen::TagHaplotype -> new($hap);
+
+    were $hap is the reference to an array of array with the haplotype.
+
+    $hap= [[0, 0, 0],
+           [1, 0, 0],
+           [0, 1, 1]
+          ];
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Pedro M. Gomez-Fabre
+
+Email pgf18872-at-gsk-dot-com
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::PopGen::TagHaplotype;
+use strict;
+
+use Data::Dumper;
+use Storable qw(dclone);
+
+use base qw(Bio::Root::Root);
+
+my $USAGE = <<EOF
+Usage:
+    Bio::PopGen::TagHaplotype->new(-haplotype_block => \$hapblockref)
+
+EOF
+;
+
+=head2 new
+
+ Title   : new
+ Function: constructor of the class.
+ Returns : self hash
+ Args    : input haplotype (array of array)
+ Status  : public
+
+=cut
+
+#------------------------
+sub new{
+#------------------------
+    my ($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my ($haplotype_block) = $self->_rearrange([qw(HAPLOTYPE_BLOCK)], at args);
+
+    if ($haplotype_block) {
+        $self->haplotype_block($haplotype_block);
+    }
+    else{
+        $self->throw("haplotype has not been supplied\n$USAGE");
+    }
+
+    # check that the haplotype block is well formed.
+    for (my $i=0; $i<$#$haplotype_block+1; $i++){
+	if ( $#{$haplotype_block->[0]} !=
+             $#{$haplotype_block->[$i]} ){
+
+            $self->throw("The haplotype matrix is not well formed (Not squared)");
+        }
+    }
+
+    # make the calculation
+    my $tag_list =  _scan_snp( $self ->haplotype_block );
+
+    if ($tag_list){
+        $self ->tag_list($tag_list);
+    }
+    else { 
+        $self ->tag_list(undef);
+    }
+
+    if ( defined $self->tag_list){
+        $self ->tag_length(scalar @{$self->tag_list});
+    }
+    else {
+        $self ->tag_length(0);  #"NO TAGS FOUND!"
+    }
+
+    return $self;
+}
+
+=head2 haplotype_block
+
+ Title   : haplotype_block
+ Usage   : my $haplotype_block = $TagHaplotype->haplotype_block();
+ Function: Get the haplotype block for a haplotype tagging selection
+ Returns : reference of array
+ Args    : reference of array with haplotype pattern
+
+
+=cut
+
+sub haplotype_block{
+    my ($self) =shift;
+    return $self->{'_haplotype_block'} = shift if @_;
+    return $self->{'_haplotype_block'};
+}
+
+
+=head2 input_block 
+
+ Title   : input_block 
+ Usage   : $obj->input_block()
+ Function: returns haplotype block. By now will produce the same ouput than
+           $self->haplotype_block. but for compatiblity, this method is kept. 
+           This method is deprecated.
+ Returns : reference to array of array with the haplotype input value 
+ Args    : none 
+ Status  : public
+
+=cut
+
+#------------------------
+sub input_block{
+#------------------------
+    my $self = shift;
+
+    $self->warn(ref($self). "::input_block - deprecated method. Use haplotype_block() instead.");
+    return $self->haplotype_block;
+}
+
+=head2 tag_list
+
+ Title   : tag_list 
+ Usage   : $obj->tag_list()
+ Function: returns the list of SNPs combination that identify the
+           haplotype. All combinations are displayed as arrays
+ Returns : reference to array of array. 
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub tag_list{
+#------------------------
+    my ($self) = shift;
+    return $self->{'_tag_list'}= shift if @_;
+    return $self->{'_tag_list'};
+}
+
+=head2 tag_length 
+
+ Title   : tag_length 
+ Usage   : $obj->tag_length()
+ Function: returns the length of the tag.
+ Returns : scalar 
+ Args    : none
+ Status  : public
+
+=cut
+
+#------------------------
+sub tag_length{
+#------------------------
+    my ($self) =shift;
+    return $self ->{'_tag_length'} = shift if @_;
+    return $self ->{'_tag_length'};
+}
+
+=head2 _scan_snp 
+
+ Title   : _scan_snp 
+ Usage   : internal
+ Function: scan sets increasing the length until find a non degenerated
+           pattern. 
+ Returns : scalar
+ Args    : none
+ Status  : private
+
+=cut
+
+#------------------------
+sub _scan_snp{
+#------------------------
+    my ($hap)=@_;
+
+    my $hap_length = scalar @{$hap->[0]};    ## store the haplotype length
+
+    for my $i(1..$hap_length){
+
+        my $list = _gen_comb($hap_length, $i);
+
+        my $snp_collection = _scan_combinations($hap, $list);
+
+        # if there is any element on the collection.
+        # We have reached our goal and 
+        # we can stop the calculation.
+        if($#$snp_collection>-1){
+            return $snp_collection;
+        }
+    }
+}
+
+=head2 _gen_comb
+
+ Title   : _gen_comb 
+ Usage   : internal
+ Function: we supply the length of the haplotype and the length of the
+           word we need to find and the functions returns the possible
+           list of combinations.
+ Returns : scalar
+ Args    : none
+ Status  : private
+
+=cut
+
+#------------------------
+sub _gen_comb{
+#------------------------
+
+    my ($hap_length,$n) = @_;
+
+    my @array = ();    # list with all elements we have to combine
+
+    
+    for(0..$hap_length-1){ push @array, $_ };
+
+    #
+    # we need some parameters to create the combination list.
+    # This parameters can be changed if we can modify the list values
+    #
+
+    my $m = -1;      # this parameter start the calculation at value
+                     # m+1 on the recursive cicle.
+
+    my $value = [];  ## seems to have not too much sense here, but is
+                     ## needed on the recursion and need to be started
+                     ## from here
+    my $list = [];
+
+    _generateCombinations ( \@array, \$m, \$n, $value, $list);
+
+    return $list;
+
+}
+
+=head2 _generateCombinations 
+
+ Title   : _generateCombinations 
+ Usage   : internal
+ Function: Recursive function that produce all combinations for a set
+
+           i.e.:
+
+           1, 2, 3, 4
+
+           and word of B<3> will produce:
+
+           1, 2, 3
+           1, 2, 4
+           1, 3, 4
+           2, 3, 4
+
+ Returns :
+ Args    : none
+ Status  : private
+
+=cut
+
+#------------------------
+sub _generateCombinations{
+#------------------------
+    my ($rarr, $rm, $rn, $rvalue,$rlist)=@_;
+
+    for (my $i = ($$rm+1); $i<scalar @$rarr; $i++){
+        push (my @value2,@$rvalue,$rarr->[$i]);
+        if (scalar @value2<$$rn){
+            _generateCombinations($rarr,\$i, $rn, \@value2, $rlist);
+        }
+        if (scalar @value2==$$rn){
+            push @$rlist, [@value2];
+        }
+        if(scalar @value2>$$rn){
+            last;
+        }
+    }
+}
+
+# take the list of combinations
+# i.e.: 1 2 3
+#       1 2 4
+#       1 3 4
+#       2 3 4
+#
+# generate a sub array from the haplotype with the snp tag for the combination
+# and check all haplotypes for these columns.
+# if two haplotypes have the same value. we can not define the haplotype
+# without ambiguity.
+# Will return a list of valid combinations (SNP Tags)
+#
+
+=head2 _scan_combinations 
+
+ Title   : _scan_combinations 
+ Usage   : internal
+ Function: take the haplotype and a list of possible combination
+           for that length. Generate a subset and scan it to find if
+           the information is enought to define the haplotype set.
+ Returns :
+ Args    : none
+ Status  : private
+
+=cut
+
+#------------------------
+sub _scan_combinations {
+#------------------------
+
+    my($hap,$list) = @_;
+
+    my $valid_combination = undef;
+
+    # we have to check every snp combinations from the list
+    for my $i (0..$#$list){
+
+        # extract from the big array the one we will use for tag calculations
+        my $subArray = _get_subArray ($hap, $list->[$i]);
+
+        my $degeneration = _deg_test($subArray);
+
+        if(!$degeneration){
+            push @$valid_combination, [@{$list->[$i]}];
+        }
+    }
+    return $valid_combination;
+}
+
+# return 1 if two arrays are degenerated (same haplotype)
+#------------------------
+sub _deg_test{
+#------------------------
+
+    my ($hap)= @_;
+
+    # for every sub array we compare each element with the rest
+    for my $c1(0..$#$hap){
+        for my $c2($c1+1..$#$hap){
+            my $degeneration = compare_arrays($hap->[$c1], $hap->[$c2]);
+            if ($degeneration){
+                # if the two arrays are the same
+                return 1;
+            }
+        }
+    }
+}
+
+#------------------------
+sub _get_subArray {
+#------------------------
+    my($hap, $combination) =@_;
+
+    my $out = [];    # output array to be tested
+ 
+    for my $i (0..$#$hap){
+        foreach(@$combination){
+            push @{$out->[$i]}, $hap->[$i][$_];
+        }
+    }
+    return $out;
+}
+
+#
+# take two arrays and compare their values
+# Returns : 1 if the two values are the same
+#           0 if the values are different
+#
+
+#------------------------
+sub compare_arrays {
+#------------------------
+    my ($first, $second) = @_;
+    return 0 unless @$first == @$second;
+    for (my $i = 0; $i < @$first; $i++) {
+        return 0 if $first->[$i] ne $second->[$i];
+    }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Utilities.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Utilities.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PopGen/Utilities.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,230 @@
+# $Id: Utilities.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::PopGen::Utilities
+#
+# Cared for by Jason Stajich <jason-at-open-bio-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PopGen::Utilities - Utilities for working with PopGen data and objects
+
+=head1 SYNOPSIS
+
+  use Bio::PopGen::Utilities;
+  use Bio::AlignIO;
+
+  my $in = new Bio::AlignIO(-file   => 't/data/t7.aln',
+                            -format => 'clustalw');
+  my $aln = $in->next_aln;
+  # get a population, each sequence is an individual and 
+  # for the default case, every site which is not monomorphic
+  # is a 'marker'.  Each individual will have a 'genotype' for the
+  # site which will be the specific base in the alignment at that
+  # site
+  my $pop = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln);
+
+  # get the synonymous sites from the alignemt only as the 'genotypes'
+  # for the population
+  my $synpop = Bio::PopGen::Utilities->aln_to_population(-site_model => 'syn',
+                                                         -alignment  => $aln);
+
+
+=head1 DESCRIPTION
+
+This object provides some convience function to turn sequence
+alignments into usable objects for the Population genetics modules
+(Bio::PopGen).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-open-bio-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PopGen::Utilities;
+use strict;
+
+use Bio::Align::DNAStatistics;
+use Bio::PopGen::Population;
+use Bio::PopGen::Individual;
+
+use base qw(Bio::Root::Root);
+
+
+=head2 aln_to_population
+
+ Title   : aln_to_population
+ Usage   : my $pop = Bio::PopGen::Utilities->aln_to_population($aln);
+ Function: Turn and alignment into a set of L<Bio::PopGen::Individual>
+           objects grouped in a L<Bio::PopGen::Population> object
+
+           Sites are treated as 'Markers' in the Bioperl PopGen object
+           model in the sense that a site is a unique location for which
+           an individual will have a genotype (a set of alleles). 
+           In this implementation we are assuming that each individual 
+           has a single entry in the alignment file.
+
+           Specify a site model as one of those listed
+           'all' -- every base in the alignment is considered a site
+           'syn' -- Synonomous sites. Those where a seen substition do 
+                    not change the amino acid [Assumes this is only 
+                    coding sequence and the frame starts with first base 
+                    in the alignment]
+           'non' -- Non-Synonomous sites.  Those where a substitution changes
+                    the encoded amino acid.
+
+           The option -site_model
+                for Non-synonymous: 'non' or 'non-synonomous' or 'NS' or 'Ka'
+		    Synonymous	  : 'synonomous' or 'syn' or 'S' or 'Ks'
+                    All           : 'all' 
+          To see all sites, including those which are fixed in the population
+          add -include_monomorphic => 1
+          to the arguments
+ Returns : 
+ Args    : -include_monomorphic => 1   to specify all sites, 
+                                       even those which are monomorphic
+                                       in the population 
+                                  (useful for HKA test mostly) 
+                            [default is false]
+           -site_model     => one-of 'all', 'syn', or 'non' 
+                             to specify a site model you want to see data
+                             for
+                            [default is all]
+           -alignment      => provide a L<Bio::SimpleAlign> object [required]
+
+=cut
+
+sub aln_to_population{
+   my ($self, at args) = @_;
+   my ($aln,
+       $sitemodel,
+       $includefixed) = $self->_rearrange([qw(ALIGNMENT
+					      SITE_MODEL
+					      INCLUDE_MONOMORPHIC)],
+					  @args);
+   if( ! defined $aln ) { 
+       $self->warn("Must provide a valid Bio::SimpleAlign object to run aln_to_population");
+       return;
+   }
+   if( ! $aln->is_flush ) {
+       $self->warn("Must provide a Bio::SimpleAlign object with aligned sequences to aln_to_population!");
+       return;
+   }
+
+   my $population = Bio::PopGen::Population->new(-source => 'alignment');
+   my @seqs = map { $_->seq() } $aln->each_seq;
+
+   if( ! defined $sitemodel ||
+       $sitemodel =~ /all/i ) {
+       my $ct = 0;
+       my @inds;
+       my @seqs;
+       for my $seq ( $aln->each_seq ) {
+	   my $ind = Bio::PopGen::Individual->new(-unique_id => $seq->display_id);
+	   push @seqs, $seq->seq;
+	   push @inds, $ind;
+       }
+       for( my $i = 0; $i < $aln->length; $i++ ) {
+	   my $nm = "Site-$i";
+	   my (@genotypes,%set);
+	   # do we skip indels?
+	   for my $seq ( @seqs ) {
+	       my $site = substr($seq,$i,1);
+	       $set{$site}++;
+	       push @genotypes, $site;
+	   }
+	   if( keys %set > 1 || $includefixed ) {
+	       for( my $i = 0; $i < scalar @genotypes; $i++ ) {
+		   $inds[$i]->add_Genotype(Bio::PopGen::Genotype->new
+					   (-marker_name  => $nm,
+					    -individual_id=> $inds[$i]->unique_id,
+					    -alleles      => [$genotypes[$i]]));
+	       }
+	   }
+       }
+       for my $ind ( @inds ) { 
+	   $population->add_Individual($ind);
+       }
+   } else { 
+       $self->throw("Can only build sites based on all the data right now!");
+       my ($sitecount, at sites) = ($aln->length);
+       my @sitecat;
+       # ToDo: categorize site a syn, non-syn, monomorphic
+       #      4-fold degenerate?
+       my (@codons, at codons_v, $codon_ct);
+       
+       for( my $i = 0; $i < $sitecount; $i++ ) {
+	   if( $i && $i % 3 == 0 ) {
+	       # A A T  T T G  T C G
+	       # A A A  T A G  T A G
+	       # A A T  T A G  T C T	       
+	       for my $cod ( @{$codons[$codon_ct]} ) {
+		   $codons_v[$codon_ct]->{$cod}++;
+	       }
+	       $codon_ct++;	       
+	   }
+	   my $seqct = 0;
+	   foreach my $seq ( @seqs ) {   
+	       my $char = substr($seq,$i,1);
+	       $sites[$i]->{'alleles'}->{$char}++;
+	       $sites[$i]->{'seq'}->[$seqct] = $char;
+	       $codons[$codon_ct]->[$seqct] .= $char;	       
+	       $seqct++;
+	   }
+       }
+
+       # at the end @sites will be full, each entry is a column and it
+       # will have a hashref with 2 values, 'alleles' which will have
+       # a frequency for each base as an allele. 
+       # 'seq' will have the
+       # participating residue for each sequence
+
+
+       my ($i,$seqctr) = (0,0); 
+       for my $site ( @sites ) { 
+	   my %alleles = %{$site->{'alleles'}};
+	   my %codons = $codons_v[$i % 3]->[$seqctr];
+	   $i++;
+	   $seqctr++;
+       }
+   }
+   return $population;
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,869 @@
+# $Id: PrimarySeq.pm,v 1.95.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# bioperl module for Bio::PrimarySeq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PrimarySeq - Bioperl lightweight Sequence Object
+
+=head1 SYNOPSIS
+
+  # Bio::SeqIO for file reading, Bio::DB::GenBank for
+  # database reading
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+  use Bio::DB::GenBank;
+
+  # make from memory
+
+  $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG',
+                                   -id  => 'GeneFragment-12',
+                                   -accession_number => 'X78121',
+                                   -alphabet => 'dna',
+                                   -is_circular => 1 );
+  print "Sequence ", $seqobj->id(), " with accession ",
+    $seqobj->accession_number, "\n";
+
+  # read from file
+
+  $inputstream = Bio::SeqIO->new(-file => "myseq.fa",
+                                 -format => 'Fasta');
+  $seqobj = $inputstream->next_seq();
+  print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n";
+
+  # to get out parts of the sequence.
+
+  print "Sequence ", $seqobj->id(), " with accession ",
+    $seqobj->accession_number, " and desc ", $seqobj->desc, "\n";
+
+  $string  = $seqobj->seq();
+  $string2 = $seqobj->subseq(1,40);
+
+=head1 DESCRIPTION
+
+PrimarySeq is a lightweight Sequence object, storing the sequence, its
+name, a computer-useful unique name, and other fundamental attributes.
+It does not contain sequence features or other information.  To have a
+sequence with sequence features you should use the Seq object which uses
+this object.
+
+Although new users will use Bio::PrimarySeq a lot, in general you will
+be using it from the Bio::Seq object. For more information on Bio::Seq
+see L<Bio::Seq>. For interest you might like to know that
+Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls
+to do with sequence to it (the has-a relationship lets us get out of a
+otherwise nasty cyclical reference in Perl which would leak memory).
+
+Sequence objects are defined by the Bio::PrimarySeqI interface, and this
+object is a pure Perl implementation of the interface. If that's
+gibberish to you, don't worry. The take home message is that this
+object is the bioperl default sequence object, but other people can
+use their own objects as sequences if they so wish. If you are
+interested in wrapping your own objects as compliant Bioperl sequence
+objects, then you should read the Bio::PrimarySeqI documentation
+
+The documentation of this object is a merge of the Bio::PrimarySeq and
+Bio::PrimarySeqI documentation.  This allows all the methods which you can
+call on sequence objects here.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PrimarySeq;
+use vars qw($MATCHPATTERN);
+use strict;
+
+$MATCHPATTERN = 'A-Za-z\-\.\*\?=~';
+
+use base qw(Bio::Root::Root Bio::PrimarySeqI Bio::IdentifiableI Bio::DescribableI);
+
+#
+# setup the allowed values for alphabet()
+#
+
+my %valid_type = map {$_, 1} qw( dna rna protein );
+
+=head2 new
+
+ Title   : new
+ Usage   : $seq    = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+                                           -id  => 'human_id',
+					   -accession_number => 'AL000012',
+					   );
+
+ Function: Returns a new primary seq object from
+           basic constructors, being a string for the sequence
+           and strings for id and accession_number.
+
+           Note that you can provide an empty sequence string. However, in
+           this case you MUST specify the type of sequence you wish to
+           initialize by the parameter -alphabet. See alphabet() for possible
+           values.
+ Returns : a new Bio::PrimarySeq object
+ Args    : -seq         => sequence string
+           -display_id  => display id of the sequence (locus name)
+           -accession_number => accession number
+           -primary_id  => primary id (Genbank id)
+           -namespace   => the namespace for the accession
+           -authority   => the authority for the namespace
+           -description => description text
+           -desc        => alias for description
+           -alphabet    => sequence type (alphabet) (dna|rna|protein)
+           -id          => alias for display id
+           -is_circular => boolean field for whether or not sequence is circular
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid,
+       $desc,$description,
+       $alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) =
+	$self->_rearrange([qw(SEQ
+			      DISPLAY_ID
+			      ACCESSION_NUMBER
+			      PRIMARY_ID
+			      NAMESPACE
+			      AUTHORITY
+			      VERSION
+			      OBJECT_ID
+			      DESC
+			      DESCRIPTION
+			      ALPHABET
+			      ID
+			      IS_CIRCULAR
+			      DIRECT
+			      REF_TO_SEQ
+			      LENGTH
+			      )],
+			  @args);
+    if( defined $id && defined $given_id ) {
+		 if( $id ne $given_id ) {
+			 $self->throw("Provided both id and display_id constructor ".
+							  "functions. [$id] [$given_id]");
+		 }
+    }
+    if( defined $given_id ) { $id = $given_id; }
+
+    # let's set the length before the seq -- if there is one, this length is
+    # going to be invalidated
+    defined $len && $self->length($len);
+
+    # if alphabet is provided we set it first, so that it won't be guessed
+    # when the sequence is set
+    $alphabet && $self->alphabet($alphabet);
+
+    # if there is an alphabet, and direct is passed in, assumme the alphabet
+    # and sequence is ok
+
+    if( $direct && $ref_to_seq) {
+		 $self->{'seq'} = $$ref_to_seq;
+		 if( ! $alphabet ) {
+		     $self->_guess_alphabet();
+		 } # else it has been set already above
+    } else {
+		 #	print STDERR "DEBUG: setting sequence to [$seq]\n";
+		 # note: the sequence string may be empty
+		 $self->seq($seq) if defined($seq);
+	 }
+
+    $id          && $self->display_id($id);
+    $acc         && $self->accession_number($acc);
+    defined $pid && $self->primary_id($pid);
+    $desc        && $self->desc($desc);
+    $description && $self->description($description);
+    $is_circular && $self->is_circular($is_circular);
+    $ns          && $self->namespace($ns);
+    $auth        && $self->authority($auth);
+    defined($v)  && $self->version($v);
+    defined($oid) && $self->object_id($oid);
+
+    return $self;
+}
+
+sub direct_seq_set {
+    my $obj = shift;
+    return $obj->{'seq'} = shift if @_;
+    return;
+}
+
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string    = $obj->seq()
+ Function: Returns the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard), but you should not rely on this.
+ Returns : A scalar
+ Args    : Optionally on set the new value (a string). An optional second
+           argument presets the alphabet (otherwise it will be guessed).
+
+=cut
+
+sub seq {
+   my ($obj, at args) = @_;
+
+   if( scalar(@args) == 0 ) {
+       return $obj->{'seq'};
+   }
+
+   my ($value,$alphabet) = @args;
+
+   if(@args) {
+       if(defined($value) && (! $obj->validate_seq($value))) {
+	   $obj->throw("Attempting to set the sequence to [$value] ".
+							"which does not look healthy");
+		}
+       # if a sequence was already set we make sure that we re-adjust the
+       # alphabet, otherwise we skip guessing if alphabet is already set
+       # note: if the new seq is empty or undef, we don't consider that a
+       # change (we wouldn't have anything to guess on anyway)
+		my $is_changed_seq =
+		  exists($obj->{'seq'}) && (CORE::length($value || '') > 0);
+		$obj->{'seq'} = $value;
+       # new alphabet overridden by arguments?
+		if($alphabet) {
+	   # yes, set it no matter what
+			$obj->alphabet($alphabet);
+		} elsif( # if we changed a previous sequence to a new one
+				  $is_changed_seq ||
+				  # or if there is no alphabet yet at all
+				  (! defined($obj->alphabet()))) {
+			# we need to guess the (possibly new) alphabet
+			$obj->_guess_alphabet();
+		} # else (seq not changed and alphabet was defined) do nothing
+		# if the seq is changed, make sure we unset a possibly set length
+		$obj->length(undef) if $is_changed_seq || $obj->{'seq'};
+   }
+   return $obj->{'seq'};
+}
+
+=head2 validate_seq
+
+ Title   : validate_seq
+ Usage   : if(! $seq->validate_seq($seq_str) ) {
+                print "sequence $seq_str is not valid for an object of
+                alphabet ",$seq->alphabet, "\n";
+	   }
+ Function: Validates a given sequence string. A validating sequence string
+           must be accepted by seq(). A string that does not validate will
+           lead to an exception if passed to seq().
+
+           The implementation provided here does not take alphabet() into
+           account. Allowed are all letters (A-Z) and '-','.','*','?','=',
+           and '~'.
+
+ Example :
+ Returns : 1 if the supplied sequence string is valid for the object, and
+           0 otherwise.
+ Args    : The sequence string to be validated.
+
+
+=cut
+
+sub validate_seq {
+	my ($self,$seqstr) = @_;
+	if( ! defined $seqstr ){ $seqstr = $self->seq(); }
+	return 0 unless( defined $seqstr);
+	if((CORE::length($seqstr) > 0) &&
+	   ($seqstr !~ /^([$MATCHPATTERN]+)$/)) {
+	    $self->warn("seq doesn't validate, mismatch is " .
+			join(",",($seqstr =~ /([^$MATCHPATTERN]+)/g)));
+		return 0;
+	}
+	return 1;
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+ Returns : a string
+ Args    : integer for start position
+           integer for end position
+                 OR
+           Bio::LocationI location for subseq (strand honored)
+
+=cut
+
+sub subseq {
+   my ($self,$start,$end,$replace) = @_;
+
+   if( ref($start) && $start->isa('Bio::LocationI') ) {
+       my $loc = $start;
+       $replace = $end; # do we really use this anywhere? scary. HL
+       my $seq = "";
+       foreach my $subloc ($loc->each_Location()) {
+	   my $piece = $self->subseq($subloc->start(),
+				     $subloc->end(), $replace);
+	   if($subloc->strand() < 0) {
+	       $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq();
+	   }
+	   $seq .= $piece;
+       }
+       return $seq;
+   } elsif(  defined  $start && defined $end ) {
+       if( $start > $end ){
+	   $self->throw("Bad start,end parameters. Start [$start] has to be ".
+			"less than end [$end]");
+       }
+       if( $start <= 0 ) {
+	   $self->throw("Bad start parameter ($start). Start must be positive.");
+       }
+       if( $end > $self->length ) {
+	   $self->throw("Bad end parameter ($end). End must be less than the total length of sequence (total=".$self->length.")");
+       }
+
+       # remove one from start, and then length is end-start
+       $start--;
+       if( defined $replace ) {
+	   return substr( $self->seq(), $start, ($end-$start), $replace);
+       } else {
+	   return substr( $self->seq(), $start, ($end-$start));
+       }
+   } else {
+       $self->warn("Incorrect parameters to subseq - must be two integers or a Bio::LocationI object");
+       return;
+   }
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length();
+ Function: Get the length of the sequence in number of symbols (bases
+           or amino acids).
+
+           You can also set this attribute, even to a number that does
+           not match the length of the sequence string. This is useful
+           if you don''t want to set the sequence too, or if you want
+           to free up memory by unsetting the sequence. In the latter
+           case you could do e.g.
+
+               $seq->length($seq->length);
+               $seq->seq(undef);
+
+           Note that if you set the sequence to a value other than
+           undef at any time, the length attribute will be
+           invalidated, and the length of the sequence string will be
+           reported again. Also, we won''t let you lie about the length.
+
+ Example :
+ Returns : integer representing the length of the sequence.
+ Args    : Optionally, the value on set
+
+=cut
+
+sub length {
+    my $self = shift;
+    my $len = CORE::length($self->seq() || '');
+
+    if(@_) {
+		 my $val = shift;
+		 if(defined($val) && $len && ($len != $val)) {
+			 $self->throw("You're trying to lie about the length: ".
+							  "is $len but you say ".$val);
+		 }
+		 $self->{'_seq_length'} = $val;
+    } elsif(defined($self->{'_seq_length'})) {
+		 return $self->{'_seq_length'};
+    }
+    return $len;
+}
+
+=head2 display_id
+
+ Title   : display_id or display_name
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, aka the common name of the Sequence object.
+
+           The semantics of this is that it is the most likely string to
+           be used as an identifier of the sequence, and likely to have
+           "human" readability.  The id is equivalent to the ID field of
+           the GenBank/EMBL databanks and the id field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id to
+           embed other information. Bioperl does not use any embedded
+           information in the ID field, and people are encouraged to use
+           other mechanisms (accession field for example, or extending
+           the sequence object) to solve this.
+
+           With the new Bio::DescribeableI interface, display_name aliases
+           to this method.
+
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub display_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'display_id'} = $value;
+	}
+	return $obj->{'display_id'};
+}
+
+=head2 accession_number
+
+ Title   : accession_number or object_id
+ Usage   : $unique_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should
+           return "unknown".
+
+           [Note this method name is likely to change in 1.3]
+
+           With the new Bio::IdentifiableI interface, this is aliased
+           to object_id
+
+ Returns : A string
+ Args    : A string (optional) for setting
+
+=cut
+
+sub accession_number {
+    my( $obj, $acc ) = @_;
+
+    if (defined $acc) {
+		 $obj->{'accession_number'} = $acc;
+    } else {
+		 $acc = $obj->{'accession_number'};
+		 $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no natural primary id, this method
+           should return a stringified memory location.
+
+ Returns : A string
+ Args    : A string (optional, for setting)
+
+=cut
+
+sub primary_id {
+    my $obj = shift;
+
+    if(@_) {
+		 $obj->{'primary_id'} = shift;
+    }
+    if( ! defined($obj->{'primary_id'}) ) {
+		 return "$obj";
+    }
+    return $obj->{'primary_id'};
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Get/Set the alphabet of sequence, one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no alphabet specified it
+           has to guess.
+ Args    : optional string to set : 'dna' | 'rna' | 'protein'
+
+
+=cut
+
+sub alphabet {
+    my ($obj,$value) = @_;
+    if (defined $value) {
+		 $value = lc $value;
+		 unless ( $valid_type{$value} ) {
+			 $obj->throw("Alphabet '$value' is not a valid alphabet (".
+							 join(',', map "'$_'", sort keys %valid_type) .
+							 ") lowercase");
+		 }
+		 $obj->{'alphabet'} = $value;
+    }
+    return $obj->{'alphabet'};
+}
+
+=head2 desc
+
+ Title   : desc or description
+ Usage   : $obj->desc($newval)
+ Function: Get/set description of the sequence.
+
+           'description' is an alias for this for compliance with the
+           Bio::DescribeableI interface.
+
+ Example :
+ Returns : value of desc (a string)
+ Args    : newvalue (a string or undef, optional)
+
+
+=cut
+
+sub desc{
+    my $self = shift;
+
+    return $self->{'desc'} = shift if @_;
+    return $self->{'desc'};
+}
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   :
+ Function:
+ Example :
+ Returns : true
+ Args    :
+
+
+=cut
+
+sub can_call_new {
+   my ($self) = @_;
+
+   return 1;
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: This is mapped on display_id
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub  id {
+   return shift->display_id(@_);
+}
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular{
+    my $self = shift;
+
+    return $self->{'is_circular'} = shift if @_;
+    return $self->{'is_circular'};
+}
+
+=head1 Methods for Bio::IdentifiableI compliance
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: A string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences.
+
+           This is aliased to accession_number().
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->accession_number(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: A number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept.
+
+ Returns : A number
+
+=cut
+
+sub version{
+    my ($self,$value) = @_;
+    if( defined $value) {
+		 $self->{'_version'} = $value;
+    }
+    return $self->{'_version'};
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: A string which represents the organisation which
+           granted the namespace, written as the DNS name for
+           organisation (eg, wormbase.org).
+
+ Returns : A scalar
+
+=cut
+
+sub authority {
+    my ($obj,$value) = @_;
+    if( defined $value) {
+		 $obj->{'authority'} = $value;
+    }
+    return $obj->{'authority'};
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection.
+
+ Returns : A scalar
+
+
+=cut
+
+sub namespace{
+    my ($self,$value) = @_;
+    if( defined $value) {
+		 $self->{'namespace'} = $value;
+    }
+    return $self->{'namespace'} || "";
+}
+
+=head1 Methods for Bio::DescribableI compliance
+
+This comprises of display_name and description.
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user.
+           The string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking
+           this is a good idea).
+
+           This is aliased to display_id().
+ Returns : A scalar
+
+=cut
+
+sub display_name {
+    return shift->display_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display.
+
+           This is aliased to desc().
+ Returns : A scalar
+
+=cut
+
+sub description {
+    return shift->desc(@_);
+}
+
+=head1 Methods Inherited from Bio::PrimarySeqI
+
+These methods are available on Bio::PrimarySeq, although they are
+actually implemented on Bio::PrimarySeqI
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::SeqI implementing object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of
+           "Sequence is a protein. Cannot revcom".
+
+           The id is the same id as the orginal sequence, and the
+           accession number is also indentical. If someone wants to
+           track that this sequence has be reversed, it needs to
+           define its own extensions.
+
+           To do an inplace edit of an object you can go:
+
+           $seqobj = $seqobj->revcom();
+
+           This of course, causes Perl to handle the garbage
+           collection of the old object, but it is roughly speaking as
+           efficient as an inplace edit.
+
+ Returns : A new (fresh) Bio::SeqI object
+ Args    : none
+
+=cut
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+
+ Example :
+ Returns : A fresh Bio::SeqI implementing object.
+ Args    :
+
+
+=cut
+
+=head1 Internal methods
+
+These are internal methods to PrimarySeq
+
+=cut
+
+=head2 _guess_alphabet
+
+ Title   : _guess_alphabet
+ Usage   :
+ Function: Determines (and sets) the type of sequence: dna, rna, protein
+ Example :
+ Returns : one of strings 'dna', 'rna' or 'protein'.
+ Args    : none
+
+
+=cut
+
+sub _guess_alphabet {
+   my ($self) = @_;
+   my $type;
+
+	#return if $self->alphabet;
+
+   my $str = $self->seq();
+	# Remove char's that clearly denote ambiguity
+   $str =~ s/[-.?x]//gi;
+
+   my $total = CORE::length($str);
+   if( $total == 0 ) {
+       $self->warn("Got a sequence with no letters in it ".
+		   "cannot guess alphabet [$str]");
+       return '';
+   }
+
+   my $u = ($str =~ tr/Uu//);
+	# The assumption here is that most of sequences comprised of mainly
+   # ATGC, with some N, will be 'dna' despite the fact that N could
+	# also be Asparagine
+   my $atgc = ($str =~ tr/ATGCNatgcn//);
+
+   if( ($atgc / $total) > 0.85 ) {
+       $type = 'dna';
+   } elsif( (($atgc + $u) / $total) > 0.85 ) {
+       $type = 'rna';
+   } else {
+       $type = 'protein';
+   }
+
+   $self->alphabet($type);
+   return $type;
+}
+
+############################################################################
+# aliases due to name changes or to compensate for our lack of consistency #
+############################################################################
+
+sub accession {
+    my $self = shift;
+
+    $self->warn(ref($self)."::accession is deprecated, ".
+		"use accession_number() instead");
+    return $self->accession_number(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PrimarySeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,799 @@
+# $Id: PrimarySeqI.pm,v 1.65.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::PrimarySeqI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PrimarySeqI - Interface definition for a Bio::PrimarySeq
+
+=head1 SYNOPSIS
+
+    # Bio::PrimarySeqI is the interface class for sequences.
+    # If you are a newcomer to bioperl, you might want to start with
+    # Bio::Seq documentation.
+
+    # Test if this is a seq object
+    $obj->isa("Bio::PrimarySeqI") ||
+      $obj->throw("$obj does not implement the Bio::PrimarySeqI interface");
+
+    # Accessors
+    $string    = $obj->seq();
+    $substring = $obj->subseq(12,50);
+    $display   = $obj->display_id();       # for human display
+    $id        = $obj->primary_id();       # unique id for this object,
+                                           # implementation defined
+    $unique_key= $obj->accession_number(); # unique biological id
+
+
+    # Object manipulation
+    eval {
+	   $rev = $obj->revcom();
+    };
+    if( $@ ) {
+	   $obj->throw("Could not reverse complement. ".
+		    "Probably not DNA. Actual exception\n$@\n");
+    }
+
+    $trunc = $obj->trunc(12,50);
+    # $rev and $trunc are Bio::PrimarySeqI compliant objects
+
+
+=head1 DESCRIPTION
+
+This object defines an abstract interface to basic sequence
+information - for most users of the package the documentation (and
+methods) in this class are not useful - this is a developers-only
+class which defines what methods have to be implmented by other Perl
+objects to comply to the Bio::PrimarySeqI interface. Go "perldoc
+Bio::Seq" or "man Bio::Seq" for more information on the main class for
+sequences.
+
+PrimarySeq is an object just for the sequence and its name(s), nothing
+more. Seq is the larger object complete with features. There is a pure
+perl implementation of this in L<Bio::PrimarySeq>. If you just want to
+use L<Bio::PrimarySeq> objects, then please read that module first. This
+module defines the interface, and is of more interest to people who
+want to wrap their own Perl Objects/RDBs/FileSystems etc in way that
+they "are" bioperl sequence objects, even though it is not using Perl
+to store the sequence etc.
+
+This interface defines what bioperl considers necessary to "be" a
+sequence, without providing an implementation of this, an
+implementation is provided in L<Bio::PrimarySeq>. If you want to provide
+a Bio::PrimarySeq-compliant object which in fact wraps another
+object/database/out-of-perl experience, then this is the correct thing
+to wrap, generally by providing a wrapper class which would inherit
+from your object and this Bio::PrimarySeqI interface. The wrapper class
+then would have methods lists in the "Implementation Specific
+Functions" which would provide these methods for your object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PrimarySeqI;
+use strict;
+use Bio::Tools::CodonTable;
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string = $obj->seq()
+ Function: Returns the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard), but implementations are suggested to
+           keep an open mind about case (some users... want mixed case!)
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub seq {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: Returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, i.e. 1-2 are the first two
+           bases of the sequence.
+
+           Start cannot be larger than end but can be equal.
+
+ Returns : A string
+ Args    :
+ Status  : Virtual
+
+=cut
+
+sub subseq{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $id_string = $obj->display_id();
+ Function: Returns the display id, also known as the common name of the Sequence
+           object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the ID
+           field of the GenBank/EMBL databanks and the id field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience reasons.
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub display_id {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub accession_number {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no accession number, this method should
+           return a stringified memory location.
+
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub primary_id {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   : if( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	 }
+ Function: Can_call_new returns 1 or 0 depending
+           on whether an implementation allows new
+           constructor to be called. If a new constructor
+           is allowed, then it should take the followed hashed
+           constructor list.
+
+           $myobject->new( -seq => $sequence_as_string,
+			   -display_id  => $id
+			   -accession_number => $accession
+			   -alphabet => 'dna',
+			   );
+ Returns : 1 or 0
+ Args    :
+
+
+=cut
+
+sub can_call_new{
+   my ($self, at args) = @_;
+
+   # we default to 0 here
+
+   return 0;
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called "type" because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : A string either 'dna','rna','protein'. NB - the object must
+           make a call of the alphabet, if there is no alphabet specified it
+           has to guess.
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    my ( $self ) = @_;
+    $self->throw_not_implemented();
+}
+
+sub moltype{
+   my ($self, at args) = @_;
+
+   $self->warn("moltype: pre v1.0 method. Calling alphabet() instead...");
+   $self->alphabet(@args);
+}
+
+
+=head1 Optional Implementation Functions
+
+The following functions rely on the above functions. An
+implementing class does not need to provide these functions, as they
+will be provided by this class, but is free to override these
+functions.
+
+The revcom(), trunc(), and translate() methods create new sequence
+objects. They will call new() on the class of the sequence object
+instance passed as argument, unless can_call_new() returns FALSE. In
+the latter case a Bio::PrimarySeq object will be created. Implementors
+which really want to control how objects are created (eg, for object
+persistence over a database, or objects in a CORBA framework), they
+are encouraged to override these methods
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::PrimarySeqI implementing object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of "Sequence is a
+           protein. Cannot revcom".
+
+           The id is the same id as the original sequence, and the
+           accession number is also indentical. If someone wants to
+           track that this sequence has be reversed, it needs to
+           define its own extensionsj.
+
+           To do an inplace edit of an object you can go:
+
+           $seq = $seq->revcom();
+
+           This of course, causes Perl to handle the garbage
+           collection of the old object, but it is roughly speaking as
+           efficient as an inplace edit.
+
+ Returns : A new (fresh) Bio::PrimarySeqI object
+ Args    : None
+
+
+=cut
+
+sub revcom{
+   my ($self) = @_;
+
+
+   # check the type is good first.
+   my $t = $self->alphabet;
+
+   if( $t eq 'protein' ) {
+       $self->throw("Sequence is a protein. Cannot revcom");
+   }
+
+   if( $t ne 'dna' && $t ne 'rna' ) {
+       if( $self->can('warn') ) {
+	   $self->warn("Sequence is not dna or rna, but [$t]. ".
+		       "Attempting to revcom, but unsure if this is right");
+       } else {
+	   warn("[$self] Sequence is not dna or rna, but [$t]. ".
+		"Attempting to revcom, but unsure if this is right");
+       }
+   }
+
+   # yank out the sequence string
+
+   my $str = $self->seq();
+
+   # if is RNA - map to DNA then map back
+
+   if( $t eq 'rna' ) {
+       $str =~ tr/uU/tT/;
+   }
+
+   # revcom etc...
+
+   $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+   my $revseq = CORE::reverse $str;
+
+   if( $t eq 'rna' ) {
+       $revseq =~ tr/tT/uU/;
+   }
+
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::PrimarySeq';
+       $self->_attempt_to_load_Seq();
+   }
+   my $out = $seqclass->new( '-seq' => $revseq,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-alphabet' => $self->alphabet,
+			     '-desc' => $self->desc(),
+                             '-verbose' => $self->verbose
+			     );
+   return $out;
+
+}
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence.
+ Returns : A fresh Bio::PrimarySeqI implementing object.
+ Args    : Two integers denoting first and last base of the sub-sequence.
+
+
+=cut
+
+sub trunc{
+   my ($self,$start,$end) = @_;
+
+   my $str;
+   if( defined $start && ref($start) &&
+       $start->isa('Bio::LocationI') ) {
+       $str = $self->subseq($start); # start is a location actually
+   } elsif( !$end ) {
+       $self->throw("trunc start,end -- there was no end for $start");
+   } elsif( $end < $start ) {
+       my $msg = "start [$start] is greater than end [$end]. \n".
+	   "If you want to truncated and reverse complement, \n".
+	       "you must call trunc followed by revcom. Sorry.";
+       $self->throw($msg);
+   } else {
+       $str = $self->subseq($start,$end);
+   }
+
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::PrimarySeq';
+       $self->_attempt_to_load_Seq();
+   }
+
+   my $out = $seqclass->new( '-seq' => $str,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-alphabet' => $self->alphabet,
+			     '-desc' => $self->desc(),
+                             '-verbose' => $self->verbose
+			     );
+   return $out;
+}
+
+
+=head2 translate
+
+ Title   : translate
+ Usage   : $protein_seq_obj = $dna_seq_obj->translate
+
+           Or if you expect a complete coding sequence (CDS) translation,
+           with inititator at the beginning and terminator at the end:
+
+           $protein_seq_obj = $cds_seq_obj->translate(-complete => 1);
+
+           Or if you want translate() to find the first initiation
+           codon and return the corresponding protein:
+
+           $protein_seq_obj = $cds_seq_obj->translate(-orf => 1);
+
+ Function: Provides the translation of the DNA sequence using full
+           IUPAC ambiguities in DNA/RNA and amino acid codes.
+
+           The complete CDS translation is identical to EMBL/TREMBL
+           database translation. Note that the trailing terminator
+           character is removed before returning the translated protein
+           object.
+
+           Note: if you set $dna_seq_obj->verbose(1) you will get a
+           warning if the first codon is not a valid initiator.
+
+ Returns : A Bio::PrimarySeqI implementing object
+ Args    : -terminator    - character for terminator        default is *
+           -unknown       - character for unknown           default is X
+           -frame         - frame                           default is 0
+           -codontable_id - codon table id                  default is 1
+           -complete      - complete CDS expected           default is 0
+           -throw         - throw exception if not complete default is 0
+           -orf           - find 1st ORF                    default is 0
+           -start         - alternative initiation codon
+           -codontable    - Bio::Tools::CodonTable object
+		   -offset        - offset for fuzzy locations      default is 0
+
+ Notes   : The -start argument only applies when -orf is set to 1. By default
+           all initiation codons found in the given codon table are used
+           but when "start" is set to some codon this codon will be used
+           exclusively as the initiation codon. Note that the default codon
+           table (NCBI "Standard") has 3 initiation codons!
+
+           By default translate() translates termination codons to
+           the some character (default is *), both internal and trailing
+           codons. Setting "-complete" to 1 tells translate() to remove
+           the trailing character.
+
+		   -offset is used for seqfeatures which contain the the \codon_start
+		   tag and can be set to 1, 2, or 3.  This is the offset by which the
+		   sequence translation starts relative to the first base of the
+		   feature
+
+For details on codon tables used by translate() see L<Bio::Tools::CodonTable>.
+
+           Deprecated argument set (v. 1.5.1 and prior versions)
+           where each argument is an element in an array:
+
+           1: character for terminator (optional), defaults to '*'.
+           2: character for unknown amino acid (optional), defaults to 'X'.
+           3: frame (optional), valid values are 0, 1, 2, defaults to 0.
+           4: codon table id (optional), defaults to 1.
+           5: complete coding sequence expected, defaults to 0 (false).
+           6: boolean, throw exception if not complete coding sequence
+              (true), defaults to warning (false)
+           7: codontable, a custom Bio::Tools::CodonTable object (optional).
+
+=cut
+
+sub translate {
+	 my ($self, at args) = @_;
+	 my ($terminator, $unknown, $frame, $codonTableId, $complete, $throw,
+		  $codonTable, $orf, $start_codon, $offset);
+
+	 ## new API with named parameters, post 1.5.1
+	 if ($args[0] && $args[0] =~ /^-[A-Z]+/i) {
+		 ($terminator, $unknown, $frame, $codonTableId, $complete, $throw,
+		  $codonTable, $orf, $start_codon, $offset) =
+			 $self->_rearrange([qw(TERMINATOR
+								UNKNOWN
+								FRAME
+								CODONTABLE_ID
+								COMPLETE
+								THROW
+								CODONTABLE
+								ORF
+								START
+								OFFSET)], @args);
+	 ## old API, 1.5.1 and preceding versions
+	 } else {
+		 ($terminator, $unknown, $frame, $codonTableId,
+		  $complete, $throw, $codonTable, $offset) = @args;
+	 }
+
+    ## Initialize termination codon, unknown codon, codon table id, frame
+    $terminator = '*'    unless (defined($terminator) and $terminator ne '');
+    $unknown = "X"       unless (defined($unknown) and $unknown ne '');
+    $frame = 0           unless (defined($frame) and $frame ne '');
+    $codonTableId = 1    unless (defined($codonTableId) and $codonTableId ne '');
+
+    ## Get a CodonTable, error if custom CodonTable is invalid
+    if ($codonTable) {
+		 $self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable)
+			unless $codonTable->isa('Bio::Tools::CodonTable');
+    } else {
+		 $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId);
+	 }
+
+    ## Error if alphabet is "protein"
+    $self->throw("Can't translate an amino acid sequence.") if
+		($self->alphabet =~ /protein/i);
+
+    ## Error if -start parameter isn't a valid codon
+	 if ($start_codon) {
+		 $self->throw("Invalid start codon: $start_codon.") if
+			( $start_codon !~ /^[A-Z]{3}$/i );
+	 }
+	 
+	 my $seq;
+	 
+	 if ($offset) {
+		$self->throw("Offset must be 1, 2, or 3.") if
+		    ( $offset !~ /^[123]$/ );
+		my ($start, $end) = ($offset, $self->length);
+		($seq) = $self->subseq($start, $end);
+	 } else {
+		($seq) = $self->seq();
+	 }
+
+    ## ignore frame if an ORF is supposed to be found
+	 if ($orf) {
+		 $seq = $self->_find_orf($seq,$codonTable,$start_codon);
+	 } else {
+	 ## use frame, error if frame is not 0, 1 or 2
+		 $self->throw("Valid values for frame are 0, 1, or 2, not $frame.")
+			unless ($frame == 0 or $frame == 1 or $frame == 2);
+		 $seq = substr($seq,$frame);
+    }
+
+    ## Translate it
+    my $output = $codonTable->translate($seq);
+    # Use user-input terminator/unknown
+    $output =~ s/\*/$terminator/g;
+    $output =~ s/X/$unknown/g;
+
+    ## Only if we are expecting to translate a complete coding region
+    if ($complete) {
+		 my $id = $self->display_id;
+		 # remove the terminator character
+		 if( substr($output,-1,1) eq $terminator ) {
+			 chop $output;
+		 } else {
+			 $throw && $self->throw("Seq [$id]: Not using a valid terminator codon!");
+			 $self->warn("Seq [$id]: Not using a valid terminator codon!");
+		 }
+		 # test if there are terminator characters inside the protein sequence!
+		 if ($output =~ /\*/) {
+			 $throw && $self->throw("Seq [$id]: Terminator codon inside CDS!");
+			 $self->warn("Seq [$id]: Terminator codon inside CDS!");
+		 }
+		 # if the initiator codon is not ATG, the amino acid needs to be changed to M
+		 if ( substr($output,0,1) ne 'M' ) {
+			 if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) {
+				 $output = 'M'. substr($output,1);
+			 }	elsif ($throw) {
+				 $self->throw("Seq [$id]: Not using a valid initiator codon!");
+			 } else {
+				 $self->warn("Seq [$id]: Not using a valid initiator codon!");
+			 }
+		 }
+    }
+
+    my $seqclass;
+    if ($self->can_call_new()) {
+		 $seqclass = ref($self);
+    } else {
+		 $seqclass = 'Bio::PrimarySeq';
+		 $self->_attempt_to_load_Seq();
+    }
+    my $out = $seqclass->new( '-seq' => $output,
+										'-display_id'  => $self->display_id,
+										'-accession_number' => $self->accession_number,
+										# is there anything wrong with retaining the
+										# description?
+										'-desc' => $self->desc(),
+										'-alphabet' => 'protein',
+                              '-verbose' => $self->verbose
+			      );
+    return $out;
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: ID of the sequence. This should normally be (and actually is in
+           the implementation provided here) just a synonym for display_id().
+ Returns : A string.
+ Args    :
+
+=cut
+
+sub  id {
+   my ($self)= @_;
+
+   return $self->display_id();
+}
+
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length()
+ Function:
+ Returns : Integer representing the length of the sequence.
+ Args    :
+
+=cut
+
+sub  length {
+   my ($self)= @_;
+   $self->throw_not_implemented();
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seq->desc($newval);
+           $description = $seq->desc();
+ Function: Get/set description text for a seq object
+ Returns : Value of desc
+ Args    : newvalue (optional)
+
+=cut
+
+sub desc {
+   shift->throw_not_implemented();
+}
+
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular{
+    shift->throw_not_implemented;
+}
+
+=head1 Private functions
+
+These are some private functions for the PrimarySeqI interface. You do not
+need to implement these functions
+
+=head2 _find_orf
+
+ Title   : _find_orf
+ Usage   :
+ Function: Finds ORF starting at 1st initiation codon in nucleotide sequence.
+           The ORF is not required to have a termination codon.
+ Example :
+ Returns : A nucleotide sequence or nothing, if no initiation codon is found.
+ Args    : Nucleotide sequence, CodonTable object, alternative initiation
+           codon (optional).
+
+=cut
+
+sub _find_orf {
+	my ($self,$sequence,$codonTable,$start_codon) = @_;
+
+	# find initiation codon and remove leading sequence
+	while ($sequence) {
+		my $codon = substr($sequence,0,3);
+		if ($start_codon) {
+			last if ( $codon =~ /$start_codon/i );
+		} else {
+			last if ($codonTable->is_start_codon($codon));
+		}
+		$sequence = substr($sequence,1);
+	}
+	return unless $sequence;
+
+	# find termination codon and remove trailing sequence
+	my $len = CORE::length($sequence);
+	my $offset = 3;
+	while ($offset < $len) {
+		my $codon = substr($sequence,$offset,3);
+		if ( $codonTable->is_ter_codon($codon) ){
+			$sequence = substr($sequence, 0, $offset + 3);
+			return $sequence;
+		}
+		$offset += 3;
+	}
+	$self->warn("No termination codon found, will translate - sequence:\n$sequence");
+	$sequence;
+}
+
+=head2 _attempt_to_load_Seq
+
+ Title   : _attempt_to_load_Seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _attempt_to_load_Seq{
+   my ($self) = @_;
+
+   if( $main::{'Bio::PrimarySeq'} ) {
+       return 1;
+   } else {
+       eval {
+	   require Bio::PrimarySeq;
+       };
+       if( $@ ) {
+	   my $text = "Bio::PrimarySeq could not be loaded for [$self]\n".
+	       "This indicates that you are using Bio::PrimarySeqI ".
+	       "without Bio::PrimarySeq loaded or without providing a ".
+	       "complete implementation.\nThe most likely problem is that there ".
+	       "has been a misconfiguration of the bioperl environment\n".
+	       "Actual exception:\n\n";
+	   $self->throw("$text$@\n");
+	   return 0;
+       }
+       return 1;
+   }
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/PullParserI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/PullParserI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/PullParserI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,624 @@
+# $Id: PullParserI.pm,v 1.1.2.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::PullParserI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PullParserI - A base module for fast 'pull' parsing
+
+=head1 SYNOPSIS
+
+    # do not use this class, it is intended for parser module
+    # writers only
+
+=head1 DESCRIPTION
+
+If you are writing a module to parse some new format, you may wish to use
+a 'pull' approach whereby you only do work (reading file data, parsing it,
+turning the parsed data in an object) when absolutely necessary.
+
+PullParserI provides a system for doing exactly that. As a PullParser you
+need a chunk. A chunk is just a Bio::Root::IO that contains all the raw data
+you would want to parse. You can use the chunk() method to create a chunk from
+a filename, existing filehandle or even a string. If you make a chunk from a
+large file, but actually only want your chunk to be some portion of the whole
+file, supply start and end amounts in bytes to chunk() at the same time.
+The methods _chunk_seek() and _chunk_tell() provide seeks and tells that are
+relative to the start and end of your chunk, not the whole file.
+
+The other thing you will need to decide when making a chunk is how to handle
+piped input. A PullParser typically needs seekable data to parse, so if your
+data is piped in and unseekable, you must decide between creating a temp file
+or reading the input into memory, which will be done before the chunk becomes
+usable and you can begin any parsing. Alternatively you can choose to force
+a sequential read, in which case you can make use of _dependencies() to define
+the linear order of methods that would result in the file being read
+sequentially. The return value of _sequential() is also useful here, if you
+would need to cache some data or otherwise behave differently during a
+sequential read.
+
+The main method in the system is get_field(). This method relies on the
+existance of a private hash reference accessible to it with the method
+_fields(). That hash ref should have as keys all the sorts of data you will want
+to parse (eg. 'score'), and prior to parsing the values would be undefined. A
+user of your module can then call either $module-E<gt>get_field('score') or
+$module-E<gt>score and get_field will either return the answer from
+$self-E<gt>_fields-E<gt>{score} if it is defined, or call a method _discover_score()
+first if not. So for the system to work you need to define a _discover_*()
+method for every field in the fields hash, and ensure that the method stores an
+answer in the fields hash.
+
+How you implement your _discover_* methods is up to you, though you should never
+call a _discover_* method directly yourself; always use get_field(), since
+get_field() will deal with calling dependant methods for you if a forced
+sequenctial read is in progress due to piped input. You will almost certainly
+want to make use of the various chunk-related methods of this class (that are
+denoted private by the leading '_'; this means you can use them as the author of
+a parser class, but users of your parser should not). 
+
+Primary amongst them is _*_chunk_by_end() to which you provide text that
+represents the end of your desired chunk and it does a readline with your
+argument as $/. The chunk knows about its line-endings, so if you want your
+end definition to include a new line, just always use "\n" and PullParserI will
+do any necessary conversion for you.
+
+If your input data is hierarchical (eg. report-E<gt>many results-E<gt>many hits-E<gt>many
+hsps), and you want an object at the leaf of the hierarchy to have access to
+information that is shared amongst all of them (is parsed in the root), you
+don't have to copy the data to each leaf object; simply by defining parent(),
+when you call get_field() and the requested field isn't in your leaf's fields
+hash, the leaf's parent will be asked for the field instead, and so on till
+root.
+
+See Bio::SearchIO::hmmer_pull for an example of implementing a parser using
+PullParserI.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 CONTRIBUTORS
+
+Inspired by a posting by Aaron J. Mackey
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::PullParserI;
+
+use vars qw($AUTOLOAD $FORCE_TEMP_FILE);
+use strict;
+
+use Bio::Root::IO;
+
+use base qw(Bio::Root::RootI);
+
+BEGIN {
+    # chunk() needs perl 5.8 feature for modes other than temp_file, so will
+    # workaround by forcing temp_file mode in <5.8. Could also rewrite using
+    # IO::String, but don't want to.
+    if ($] < 5.008) {
+        $FORCE_TEMP_FILE = 1;
+    }
+}
+
+=head2 _fields
+
+ Title   : _fields
+ Usage   : $obj->_fields( { field1 => undef } );
+           my $fields_ref = $obj->_fields;
+ Function: Get/set the hash reference containing all the fields for this parser
+ Returns : hash ref
+ Args    : none to get, OR hash ref to set
+
+=cut
+
+sub _fields {
+    my $self = shift;
+    if (@_) {
+        $self->{_fields} = shift;
+    }
+    unless (defined $self->{_fields}) {
+        $self->{_fields} = { };
+    }
+    return $self->{_fields};
+}
+
+=head2 has_field
+
+ Title   : has_field
+ Usage   : if ($obj->has_field('field_name') {...}
+ Function: Ask if a particular object has a given field (doesn't ask ancestors)
+ Returns : boolean
+ Args    : string (the field name to test)
+
+=cut
+
+sub has_field {
+    my ($self, $desired) = @_;
+    $desired || return;
+    return exists $self->_fields->{$desired};
+}
+
+=head2 get_field
+
+ Title   : get_field
+ Usage   : my $field_value = $obj->get_field('field_name');
+ Function: Get the value of a given field. If this $obj doesn't have the field,
+           it's parent() will be asked, and so on until there are no more
+           parents.
+ Returns : scalar, warns if a value for the field couldn't be found and returns
+           undef.
+ Args    : string (the field to get)
+
+=cut
+
+sub get_field {
+    my $self = shift;
+    my $desired = shift || return keys %{$self->_fields};
+    if (exists $self->_fields->{$desired}) {
+        unless (defined $self->_fields->{$desired}) {
+            my $method = '_discover_'.$desired;
+            
+            my $dependency = $self->_dependencies($desired);
+            if ($dependency && ! defined $self->_fields->{$dependency}) {
+                my $dep_method = '_discover_'.$dependency;
+                $self->$dep_method;
+            }
+            
+            # it might exist now
+            $self->$method unless defined $self->_fields->{$desired};
+        }
+        return $self->_fields->{$desired};
+    }
+    
+    # is it a field of our parent? (checks all ancestors)
+    if (my $parent = $self->parent) {
+        return $parent->get_field($desired);
+    }
+    
+    $desired =~ s/_discover_//;
+    $self->warn("This report does not hold information about '$desired'");
+    return;
+}
+
+=head2 parent
+
+ Title   : parent
+ Usage   : $obj->parent($parent_obj);
+           my $parent_obj = $obj->parent;
+ Function: Get/set the parent object of this one.
+ Returns : Bio::PullParserI
+ Args    : none to get, OR Bio::PullParserI to set
+
+=cut
+
+sub parent {
+    my $self = shift;
+    if (@_) { $self->{parent} = shift }
+    return $self->{parent} || return;
+}
+
+=head2 chunk
+
+ Title   : chunk
+ Usage   : $obj->chunk($filename);
+           my $chunk = $obj->chunk;
+ Function: Get/set the chunk of this parser.
+ Returns : Bio:Root::IO
+ Args    : none to get, OR
+           First argument of a GLOB reference, filename string, string data to
+           treat as the chunk, or Bio::Root::IO.
+           Optionally, also provide:
+           -start => int : the byte position within the thing described by the
+                           first arguement to consider as the start of this
+                           chunk (default 0)
+           -end   => int : the byte position to consider as the end (default
+                           true end)
+           -piped_behaviour => 'memory'|'temp_file'|'sequential_read'
+
+           The last option comes into effect when the first argument is
+           something that cannot be seeked (eg. piped input filehandle).
+            'memory'          means read all the piped input into a string
+                              first, then set the chunk to that string.
+            'temp_file'       means read all the piped input and output it to
+                              a temp file, then set the chunk to that temp file.
+            'sequential_read' means that the piped input should be read
+                              sequentially and your parsing code must cope with
+                              not being able to seek.
+           'memory' is the fastest but uses the most memory. 'temp_file' and
+           'sequential_read' can be slow, with 'temp_file' being the most memory
+           efficient but requiring disc space. The default is 'sequential_read'.
+           Note that in versions of perl earlier than 5.8 only temp_file works
+           and will be used regardless of what value is supplied here.
+
+=cut
+
+sub chunk {
+    my $self = shift;
+    
+    if (@_) {
+        my $thing = shift || $self->throw("Trying to set chunk() to an undefined value");
+        if (ref($thing) eq 'GLOB') {
+            $self->{_chunk} = new Bio::Root::IO(-fh => $thing);
+        }
+        elsif (ref(\$thing) eq 'SCALAR') {
+            if (-e $thing) {
+                $self->{_chunk} = new Bio::Root::IO(-file => $thing);
+            }
+            else {
+                unless ($FORCE_TEMP_FILE) {
+                    # treat a string as a filehandle
+                    open(my $fake_fh, "+<", \$thing); # requires perl 5.8
+                    $self->{_chunk} = new Bio::Root::IO(-fh => $fake_fh);
+                }
+                else {
+                    my ($handle) = $self->{_chunk}->tempfile();
+                    print $handle $thing;
+                    $self->{_chunk} = new Bio::Root::IO(-fh => $handle);
+                }
+            }
+        }
+        elsif ($thing->isa('Bio::Root::IO')) {
+            $self->{_chunk} = $thing;
+        }
+        else {
+            $self->throw("Unknown input into chunk()");
+        }
+        
+        my ($piped_behaviour, $start, $end);
+        if (@_) {
+            ($piped_behaviour, $start, $end) =
+                $self->_rearrange([qw(PIPED_BEHAVIOUR START END)], @_);
+        }
+        $piped_behaviour ||= 'sequential_read';
+        $FORCE_TEMP_FILE && ($piped_behaviour = 'temp_file');
+        $start ||= 0;
+        $self->_chunk_true_start($start);
+        $self->_chunk_true_end($end);
+        
+        # determine if the chunk is seekable
+        my $fh = $self->{_chunk}->_fh;
+        seek($fh, 0, 0);
+        my $first_line = <$fh>;
+        seek($fh, 0, 0);
+        my $seekable = tell($fh) == 0;
+        unless ($seekable) {
+            if ($piped_behaviour eq 'memory') {
+                my $string = $first_line;
+                while (<$fh>) {
+                    $string .= $_;
+                }
+                $self->chunk($string);
+            }
+            elsif ($piped_behaviour eq 'temp_file') {
+                my ($handle) = $self->{_chunk}->tempfile();
+                print $handle $first_line;
+                while (<$fh>) {
+                    print $handle $_;
+                }
+                seek($handle, 0, 0);
+                $self->chunk($handle);
+            }
+            elsif ($piped_behaviour eq 'sequential_read') {
+                $self->{_chunk}->_pushback($first_line);
+                $self->_sequential(1);
+            }
+            else {
+                $self->throw("Unknown piped behaviour type '$piped_behaviour'");
+            }
+        }
+        
+        # determine our line ending
+        if ($first_line =~ /\015\012/) {
+            $self->_line_ending("\015\012");
+        }
+        elsif ($first_line =~ /\015/) {
+            $self->_line_ending("\015");
+        }
+        else {
+            $self->_line_ending("\n");
+        }
+    }
+    
+    return $self->{_chunk} || return;
+}
+
+=head2 _sequential
+
+ Title   : _sequential
+ Usage   : if ($obj->_sequential) {...}
+ Function: Ask if we have to do operations such that the input is read
+           sequentially.
+ Returns : boolean
+ Args    : none to get, OR boolean to set (typically, you should never set this
+           yourself)
+
+=cut
+
+sub _sequential {
+    my $self = shift;
+    if (@_) {
+        $self->{_sequential} = shift;
+    }
+    return $self->{_sequential} || 0;
+}
+
+=head2 _dependencies
+
+ Title   : _dependencies
+ Usage   : $obj->_dependencies( { field1 => field2 } );
+           my $dependancy = $obj->_dependencies('field_name');
+ Function: Set the fields that are dependent on each other, or get the field
+           than another is dependent upon.
+ Returns : string (a field name)
+ Args    : string (a field name) to get, OR hash ref to initially set, with
+           field names as keys and values, key field being dependent upon value
+           field.
+
+=cut
+
+sub _dependencies {
+    my ($self, $thing) = @_;
+    $thing || return;
+    if (ref($thing) eq 'HASH') {
+        $self->{_dependencies} = $thing;
+    }
+    else {
+        return $self->{_dependencies}->{$thing};
+    }
+}
+
+=head2 _chunk_true_start
+
+ Title   : _chunk_true_start
+ Usage   : my $true_start = $obj->_chunk_true_start;
+ Function: Get/set the true start position of the chunk within the filehandle
+           it is part of.
+ Returns : int
+ Args    : none to get, OR int to set (typically, you won't set this yourself)
+
+=cut
+
+sub _chunk_true_start {
+    my $self = shift;
+    if (@_) {
+        $self->{_chunk_start} = shift;
+    }
+    return $self->{_chunk_start} || 0;
+}
+
+=head2 _chunk_true_end
+
+ Title   : _chunk_true_end
+ Usage   : my $true_end = $obj->_chunk_true_end;
+ Function: Get/set for the true end position of the chunk within the filehandle
+           it is part of.
+ Returns : int
+ Args    : none to get, OR int to set (typically, you won't set this yourself)
+
+=cut
+
+sub _chunk_true_end {
+    my $self = shift;
+    if (@_) {
+        $self->{_chunk_end} = shift;
+    }
+    return $self->{_chunk_end};
+}
+
+=head2 _line_ending
+
+ Title   : _line_ending
+ Usage   : my $line_ending = $obj->_line_ending;
+ Function: Get/set for the line ending for the chunk.
+ Returns : string
+ Args    : none to get, OR string to set (typically, you won't set this
+           yourself)
+
+=cut
+
+sub _line_ending {
+    my $self = shift;
+    if (@_) {
+        $self->{_chunk_line_ending} = shift;
+    }
+    return $self->{_chunk_line_ending};
+}
+
+=head2 _chunk_seek
+
+ Title   : _chunk_seek
+ Usage   : $obj->_chunk_seek($pos);
+ Function: seek() the chunk to the provided position in bytes, relative to the
+           defined start of the chunk within its filehandle.
+
+           In _sequential() mode, this function does nothing.
+
+ Returns : n/a
+ Args    : int
+
+=cut
+
+sub _chunk_seek {
+    my ($self, $pos) = @_;
+    return if $self->_sequential;
+    
+    my $fh = $self->chunk->_fh;
+    
+    # seek to the defined start
+    seek($fh, $self->_chunk_true_start, 0);
+    
+    # now seek to desired position relative to defined start
+    seek($fh, $pos, 1);
+}
+
+=head2 _chunk_tell
+
+ Title   : _chunk_seek
+ Usage   : my $pos = $obj->_chunk_tell;
+ Function: Get the current tell() position within the chunk, relative to the
+           defined start of the chunk within its filehandle.
+
+           In _sequential() mode, this function does nothing.
+
+ Returns : int
+ Args    : none
+
+=cut
+
+sub _chunk_tell {
+    my $self = shift;
+    return if $self->_sequential;
+    
+    my $fh = $self->chunk->_fh;
+    return tell($fh) - $self->_chunk_true_start;
+}
+
+=head2 _get_chunk_by_nol
+
+ Title   : _chunk_seek
+ Usage   : my $string = $obj->_get_chunk_by_nol;
+ Function: Get a chunk of chunk() from the current position onward for the given
+           number of lines.
+ Returns : string
+ Args    : int (number of lines you want)
+
+=cut
+
+sub _get_chunk_by_nol {
+    my ($self, $nol) = @_;
+    $nol > 0 || $self->throw("Can't request a chunk of fewer than 1 lines");
+    
+    # hope that $/ is \n
+    
+    my ($line, $count);
+    while (defined($_ = $self->chunk->_readline)) {
+        $line .= $_;
+        $count++;
+        last if $count == $nol;
+    }
+    
+    my $end = $self->_chunk_tell;
+    if ($self->_chunk_true_end ? $end <= $self->_chunk_true_end : 1) {
+        return $line;
+    }
+    return;
+}
+
+=head2 _get_chunk_by_end
+
+ Title   : _get_chunk_by_end
+ Usage   : my $string = $obj->_get_chunk_by_end;
+ Function: Get a chunk of chunk() from the current position onward till the end
+           of the line, as defined by the supplied argument.
+ Returns : string
+ Args    : string (line ending - if you want the line ending to include a new
+           line, always use \n)
+
+=cut
+
+sub _get_chunk_by_end {
+    my ($self, $chunk_ending) = @_;
+    my $line_ending = $self->_line_ending;
+    $chunk_ending =~ s/\n/$line_ending/g;
+    local $/ = $chunk_ending || '';
+    my $line = $self->chunk->_readline;
+    
+    my $end = $self->_chunk_tell;
+    if ($self->_chunk_true_end ? $end <= $self->_chunk_true_end : 1) {
+        return $line;
+    }
+    return;
+}
+
+=head2 _find_chunk_by_end
+
+ Title   : _find_chunk_by_end
+ Usage   : my $string = $obj->_find_chunk_by_end;
+ Function: Get the start and end of what would be a chunk of chunk() from the
+           current position onward till the end of the line, as defined by the
+           supplied argument.
+
+           In _sequential() mode, this function does nothing.
+
+ Returns : _chunk_tell values for start and end in 2 element list
+ Args    : string (line ending - if you want the line ending to include a new
+           line, always use \n)
+
+=cut
+
+sub _find_chunk_by_end {
+    my ($self, $chunk_ending) = @_;
+    return if $self->_sequential;
+    
+    my $line_ending = $self->_line_ending;
+    $chunk_ending =~ s/\n/$line_ending/g;
+    local $/ = $chunk_ending || '';
+    
+    my $start = $self->_chunk_tell;
+    $self->chunk->_readline;
+    my $end = $self->_chunk_tell;
+    
+    if ($self->_chunk_true_end ? $end <= $self->_chunk_true_end : 1) {
+        return ($start, $end);
+    }
+    return;
+}
+
+=head2 AUTOLOAD
+
+ Title   : AUTOLOAD
+ Usage   : n/a
+ Function: Assumes that any unknown method called should be treated as
+           get_field($method_name).
+ Returns : n/a
+ Args    : n/a
+
+=cut
+
+sub AUTOLOAD {
+    my $self = shift;
+    ref($self) || return;
+    
+	my $name = $AUTOLOAD;
+	$name =~ s/.*://; # strip fully-qualified portion
+    
+    # is it one of our fields?
+    return $self->get_field($name);
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/PullParserI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Range.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Range.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Range.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,341 @@
+# $Id: Range.pm,v 1.27.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Range
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copywright Matthew Pocock
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::Range - Pure perl RangeI implementation
+
+=head1 SYNOPSIS
+
+  $range = new Bio::Range(-start=>10, -end=>30, -strand=>+1);
+  $r2 = new Bio::Range(-start=>15, -end=>200, -strand=>+1);
+
+  print join(', ', $range->union($r2)), "\n";
+  print join(', ', $range->intersection($r2)), "\n";
+
+  print $range->overlaps($r2), "\n";
+  print $range->contains($r2), "\n";
+
+=head1 DESCRIPTION
+
+This provides a pure perl implementation of the BioPerl range
+interface.
+
+Ranges are modeled as having (start, end, length, strand). They use
+Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
+range. End is always greater-than or equal-to start, and length is
+greather than or equal to 1. The behaviour of a range is undefined if
+ranges with negative numbers or zero are used.
+
+So, in summary:
+
+  length = end - start + 1
+  end >= start
+  strand = (-1 | 0 | +1)
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via  the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Range;
+
+use strict;
+use Carp;
+use integer;
+
+
+use base qw(Bio::Root::Root Bio::RangeI);
+
+=head1 Constructors
+
+=head2 new
+
+  Title   : new
+  Usage   : $range = Bio::Range->new(-start => 100, -end=> 200, -strand = +1);
+  Function: generates a new Bio::Range
+  Returns : a new range
+  Args    : -strand (defaults to 0) and any two of (-start, -end, -length),
+            the third will be calculated
+
+=cut
+
+sub new {
+  my ($caller, @args) = @_;
+  my $self = $caller->SUPER::new(@args);
+  my ($strand, $start, $end, $length) = 
+      $self->_rearrange([qw(STRAND 
+			    START
+			    END 
+			    LENGTH
+			    )], at args);
+  $self->strand($strand || 0);
+
+  if(defined $start ) {
+      $self->start($start);
+      if(defined $end) {
+	  $self->end($end);
+      } elsif(defined $length) {
+	  $self->end($self->start()+ $length - 1);
+      }
+  } elsif(defined $end && defined $length ) {
+      $self->end($end);
+      $self->start($self->end() - $length + 1);
+  }
+  return $self;
+}
+
+=head2 unions
+
+ Title   : unions
+ Usage   : @unions = Bio::Range->unions(@ranges);
+ Function: generate a list of non-intersecting Bio::Range objects
+           from a list of Bio::Range objects which may intersect
+ Returns : a list of Bio::Range objects
+ Args    : a list of Bio::Range objects
+
+
+=cut
+
+sub unions {
+  my ($class, at i) = @_;
+
+  my $i = 0;
+  my %i = map { $i++ => $_ } @i;
+
+  my $lastsize = scalar(keys %i);
+
+  do {
+
+    foreach my $j (sort { $i{$a}->start <=> $i{$b}->start } keys %i){
+      foreach my $k (sort { $i{$a}->start <=> $i{$b}->start } keys %i){
+
+        #it may have been replaced by a union under the key of
+        #the overlapping range, we are altering the hash in-place
+        next unless $i{$j};
+
+        next if $i{$k}->end   < $i{$j}->start;
+        last if $i{$k}->start > $i{$j}->end;
+
+        if($i{$j}->overlaps($i{$k})){
+          my($start,$end,$strand) = $i{$j}->union($i{$k});
+          delete($i{$k});
+          $i{$j} = Bio::Range->new( -start => $start , -end => $end , -strand => $strand );
+        }
+      }
+    }
+
+    goto DONE if scalar(keys %i) == $lastsize;
+    $lastsize = scalar(keys %i);
+
+    #warn $lastsize;
+
+  } while(1);
+
+  DONE:
+
+  return values %i;
+}
+
+
+=head1 Member variable access
+
+These methods let you get at and set the member variables
+
+=head2 start
+
+  Title    : start
+  Function : return or set the start co-ordinate
+  Example  : $s = $range->start(); $range->start(7);
+  Returns  : the value of the start co-ordinate
+  Args     : optionally, the new start co-ordinate
+  Overrides: Bio::RangeI::start
+
+=cut
+
+sub start {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->throw("'$value' is not an integer.\n") 
+	    unless $value =~ /^[-+]?\d+$/;
+        $self->{'start'} = $value;
+    }
+    return $self->{'start'};
+}
+
+=head2 end
+
+  Title    : end
+  Function : return or set the end co-ordinate
+  Example  : $e = $range->end(); $range->end(2000);
+  Returns  : the value of the end co-ordinate
+  Args     : optionally, the new end co-ordinate
+  Overrides: Bio::RangeI::end
+
+=cut
+
+sub end {
+
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->throw("'$value' is not an integer.\n") 
+	    unless $value =~ /^[-+]?\d+$/;
+        $self->{'end'} = $value;
+    }
+    return $self->{'end'};
+}
+
+=head2 strand
+
+  Title    : strand
+  Function : return or set the strandedness
+  Example  : $st = $range->strand(); $range->strand(-1);
+  Returns  : the value of the strandedness (-1, 0 or 1)
+  Args     : optionally, the new strand - (-1, 0, 1) or (-, ., +).
+  Overrides: Bio::RangeI::Strand
+
+=cut
+
+sub strand {
+  my $self = shift;
+  if(@_) {
+    my $val = shift;
+    $val =~ tr/+/1/;
+    $val =~ tr/-/-1/;
+    $val =~ tr/./0/;
+    if($val == -1 || $val == 0 || $val == 1 ) {
+      $self->{'strand'} = $val;
+    }
+  }
+  return $self->{'strand'};
+}
+
+=head2 length
+
+  Title    : length
+  Function : returns the length of this range
+  Example  : $length = $range->length();
+  Returns  : the length of this range, equal to end - start + 1
+  Args     : if you attempt to set the length an exception will be thrown
+  Overrides: Bio::RangeI::Length
+
+=cut
+
+sub length {
+  my $self = shift;
+  if(@_) {
+    confess ref($self), "->length() is read-only";
+  }
+  return $self->end() - $self->start() + 1;
+}
+
+=head2 toString
+
+  Title   : toString
+  Function: stringifies this range
+  Example : print $range->toString(), "\n";
+  Returns : a string representation of this range
+
+=cut
+
+sub toString {
+  my $self = shift;
+  return  "(${\$self->start}, ${\$self->end}) strand=${\$self->strand}";
+}
+
+=head1 Boolean Methods
+
+These methods return true or false.
+
+ $range->overlaps($otherRange) && print "Ranges overlap\n";
+
+=head2 overlaps
+
+  Title    : overlaps
+  Usage    : if($r1->overlaps($r2)) { do stuff }
+  Function : tests if $r2 overlaps $r1
+  Args     : a range to test for overlap with
+  Returns  : true if the ranges overlap, false otherwise
+  Inherited: Bio::RangeI
+
+=head2 contains
+
+  Title    : contains
+  Usage    : if($r1->contains($r2) { do stuff }
+  Function : tests wether $r1 totally contains $r2
+  Args     : a range to test for being contained
+  Returns  : true if the argument is totally contained within this range
+  Inherited: Bio::RangeI
+
+=head2 equals
+
+  Title    : equals
+  Usage    : if($r1->equals($r2))
+  Function : test whether $r1 has the same start, end, length as $r2
+  Args     : a range to test for equality
+  Returns  : true if they are describing the same range
+  Inherited: Bio::RangeI
+
+=head1 Geometrical methods
+
+These methods do things to the geometry of ranges, and return
+triplets (start, end, strand) from which new ranges could be built.
+
+=head2 intersection
+
+  Title    : intersection
+  Usage    : ($start, $stop, $strand) = $r1->intersection($r2)
+  Function : gives the range that is contained by both ranges
+  Args     : a range to compare this one to
+  Returns  : nothing if they do not overlap, or the range that they do overlap
+  Inherited: Bio::RangeI::intersection
+
+=cut
+
+=head2 union
+
+  Title    : union
+  Usage    : ($start, $stop, $strand) = $r1->union($r2);
+           : ($start, $stop, $strand) = Bio::Range->union(@ranges);
+  Function : finds the minimal range that contains all of the ranges
+  Args     : a range or list of ranges
+  Returns  : the range containing all of the ranges
+  Inherited: Bio::RangeI::union
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/RangeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/RangeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/RangeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,560 @@
+# $Id: RangeI.pm,v 1.49.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::RangeI
+#
+# Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Matthew Pocock
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::RangeI - Range interface
+
+=head1 SYNOPSIS
+
+  #Do not run this module directly
+
+=head1 DESCRIPTION
+
+This provides a standard BioPerl range interface that should be
+implemented by any object that wants to be treated as a range. This
+serves purely as an abstract base class for implementers and can not
+be instantiated.
+
+Ranges are modeled as having (start, end, length, strand). They use
+Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
+range. End is always greater-than or equal-to start, and length is
+greater than or equal to 1. The behaviour of a range is undefined if
+ranges with negative numbers or zero are used.
+
+So, in summary:
+
+  length = end - start + 1
+  end >= start
+  strand = (-1 | 0 | +1)
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Juha Muilu (muilu at ebi.ac.uk)
+Sendu Bala (bix at sendu.me.uk)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::RangeI;
+
+use strict;
+use Carp;
+use integer;
+use vars qw(%STRAND_OPTIONS);
+
+use base qw(Bio::Root::RootI);
+
+BEGIN {
+# STRAND_OPTIONS contains the legal values for the strand-testing options
+    %STRAND_OPTIONS = map { $_, '_' . $_ }
+    (
+     'strong', # ranges must have the same strand
+     'weak',   # ranges must have the same strand or no strand
+     'ignore', # ignore strand information
+     );
+}
+
+# utility methods
+#
+
+# returns true if strands are equal and non-zero
+sub _strong {
+    my ($r1, $r2) = @_;
+    my ($s1, $s2) = ($r1->strand(), $r2->strand());
+
+    return 1 if $s1 != 0 && $s1 == $s2;
+}
+
+# returns true if strands are equal or either is zero
+sub _weak {
+    my ($r1, $r2) = @_;
+    my ($s1, $s2) = ($r1->strand(), $r2->strand());
+    return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
+}
+
+# returns true for any strandedness
+sub _ignore {
+    return 1;
+}
+
+# works out what test to use for the strictness and returns true/false
+# e.g. $r1->_testStrand($r2, 'strong')
+sub _testStrand() {
+    my ($r1, $r2, $comp) = @_;
+    return 1 unless $comp;
+    my $func = $STRAND_OPTIONS{$comp};
+    return $r1->$func($r2);
+}
+
+=head1 Abstract methods
+
+These methods must be implemented in all subclasses.
+
+=head2 start
+
+  Title   : start
+  Usage   : $start = $range->start();
+  Function: get/set the start of this range
+  Returns : the start of this range
+  Args    : optionally allows the start to be set
+            using $range->start($start)
+
+=cut
+
+sub start {
+    shift->throw_not_implemented();
+}
+
+=head2 end
+
+  Title   : end
+  Usage   : $end = $range->end();
+  Function: get/set the end of this range
+  Returns : the end of this range
+  Args    : optionally allows the end to be set
+            using $range->end($end)
+
+=cut
+
+sub end {
+    shift->throw_not_implemented();
+}
+
+=head2 length
+
+  Title   : length
+  Usage   : $length = $range->length();
+  Function: get/set the length of this range
+  Returns : the length of this range
+  Args    : optionally allows the length to be set
+             using $range->length($length)
+
+=cut
+
+sub length {
+    shift->throw_not_implemented();
+}
+
+=head2 strand
+
+  Title   : strand
+  Usage   : $strand = $range->strand();
+  Function: get/set the strand of this range
+  Returns : the strandedness (-1, 0, +1)
+  Args    : optionally allows the strand to be set
+            using $range->strand($strand)
+
+=cut
+
+sub strand {
+    shift->throw_not_implemented();
+}
+
+=head1 Boolean Methods
+
+These methods return true or false. They throw an error if start and
+end are not defined.
+
+  $range->overlaps($otherRange) && print "Ranges overlap\n";
+
+=head2 overlaps
+
+  Title   : overlaps
+  Usage   : if($r1->overlaps($r2)) { do stuff }
+  Function: tests if $r2 overlaps $r1
+  Args    : arg #1 = a range to compare this one to (mandatory)
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+  Returns : true if the ranges overlap, false otherwise
+
+=cut
+
+sub overlaps {
+	my ($self, $other, $so) = @_;
+
+	$self->throw("start is undefined") unless defined $self->start;
+	$self->throw("end is undefined") unless defined $self->end;
+	$self->throw("not a Bio::RangeI object") unless defined $other &&
+	  $other->isa('Bio::RangeI');
+	$other->throw("start is undefined") unless defined $other->start;
+	$other->throw("end is undefined") unless defined $other->end;
+
+	return
+	  ($self->_testStrand($other, $so)
+		and not (
+					($self->start() > $other->end() or
+					 $self->end() < $other->start()   )
+				  ));
+}
+
+=head2 contains
+
+  Title   : contains
+  Usage   : if($r1->contains($r2) { do stuff }
+  Function: tests whether $r1 totally contains $r2
+  Args    : arg #1 = a range to compare this one to (mandatory)
+	             alternatively, integer scalar to test
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+  Returns : true if the argument is totally contained within this range
+
+=cut
+
+sub contains {
+	my ($self, $other, $so) = @_;
+	$self->throw("start is undefined") unless defined $self->start;
+	$self->throw("end is undefined") unless defined $self->end;
+
+	if(defined $other && ref $other) { # a range object?
+      $other->throw("Not a Bio::RangeI object: $other") unless  $other->isa('Bio::RangeI');
+      $other->throw("start is undefined") unless defined $other->start;
+      $other->throw("end is undefined") unless defined $other->end;
+
+      return ($self->_testStrand($other, $so)      and
+				  $other->start() >= $self->start() and
+				  $other->end() <= $self->end());
+  } else { # a scalar?
+	  $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
+	  return ($other >= $self->start() and $other <= $self->end());
+  }
+}
+
+=head2 equals
+
+  Title   : equals
+  Usage   : if($r1->equals($r2))
+  Function: test whether $r1 has the same start, end, length as $r2
+  Args    : arg #1 = a range to compare this one to (mandatory)
+            arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+  Returns : true if they are describing the same range
+
+=cut
+
+sub equals {
+    my ($self, $other, $so) = @_;
+
+    $self->throw("start is undefined") unless defined $self->start;
+    $self->throw("end is undefined") unless defined $self->end;
+    $other->throw("Not a Bio::RangeI object") unless  $other->isa('Bio::RangeI');
+    $other->throw("start is undefined") unless defined $other->start;
+    $other->throw("end is undefined") unless defined $other->end;
+
+    return ($self->_testStrand($other, $so)   and
+	    $self->start() == $other->start() and
+	    $self->end()   == $other->end()       );
+}
+
+=head1 Geometrical methods
+
+These methods do things to the geometry of ranges, and return
+Bio::RangeI compliant objects or triplets (start, stop, strand) from
+which new ranges could be built.
+
+=head2 intersection
+
+ Title   : intersection
+ Usage   : ($start, $stop, $strand) = $r1->intersection($r2); OR
+           ($start, $stop, $strand) = Bio::Range->intersection(\@ranges); OR
+           my $containing_range = $r1->intersection($r2); OR
+           my $containing_range = Bio::Range->intersection(\@ranges);
+ Function: gives the range that is contained by all ranges
+ Returns : undef if they do not overlap, or
+           the range that they do overlap (in the form of an object
+            like the calling one, OR a three element array)
+ Args    : arg #1 = [REQUIRED] a range to compare this one to,
+                    or an array ref of ranges
+           arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
+
+=cut
+
+sub intersection {
+	my ($self, $given, $so) = @_;
+	$self->throw("missing arg: you need to pass in another feature") unless $given;
+
+    my @ranges;
+    if ($self eq "Bio::RangeI") {
+		$self = "Bio::Range";
+		$self->warn("calling static methods of an interface is deprecated; use $self instead");
+	}
+	if (ref $self) {
+		push(@ranges, $self);
+	}
+    ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
+    $self->throw("Need at least 2 ranges") unless @ranges >= 2;
+
+    my $intersect;
+    while (@ranges > 0) {
+        unless ($intersect) {
+            $intersect = shift(@ranges);
+            $self->throw("Not an object: $intersect") unless ref($intersect);
+            $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
+            $self->throw("start is undefined") unless defined $intersect->start;
+            $self->throw("end is undefined") unless defined $intersect->end;
+        }
+
+        my $compare = shift(@ranges);
+        $self->throw("Not an object: $compare") unless ref($compare);
+        $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
+        $self->throw("start is undefined") unless defined $compare->start;
+        $self->throw("end is undefined") unless defined $compare->end;
+        return unless $compare->_testStrand($intersect, $so);
+
+        my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
+        my @ends   = sort {$a <=> $b} ($intersect->end(), $compare->end());
+
+        my $start = pop @starts; # larger of the 2 starts
+        my $end = shift @ends;   # smaller of the 2 ends
+
+        my $intersect_strand;    # strand for the intersection
+        if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
+            $intersect_strand = $compare->strand;
+        }
+        else {
+            $intersect_strand = 0;
+        }
+
+        if ($start > $end) {
+            return;
+        }
+        else {
+            $intersect = $self->new(-start  => $start,
+                                    -end    => $end,
+                                    -strand => $intersect_strand);
+        }
+    }
+
+    if (wantarray()) {
+        return ($intersect->start, $intersect->end, $intersect->strand);
+    }
+    else {
+        return $intersect;
+    }
+}
+
+=head2 union
+
+   Title   : union
+    Usage   : ($start, $stop, $strand) = $r1->union($r2);
+            : ($start, $stop, $strand) = Bio::Range->union(@ranges);
+              my $newrange = Bio::Range->union(@ranges);
+    Function: finds the minimal Range that contains all of the Ranges
+    Args    : a Range or list of Range objects
+    Returns : the range containing all of the range
+              (in the form of an object like the calling one, OR
+              a three element array)
+
+=cut
+
+sub union {
+	my $self = shift;
+	my @ranges = @_;
+	if ($self eq "Bio::RangeI") {
+		$self = "Bio::Range";
+		$self->warn("calling static methods of an interface is deprecated; use $self instead");
+	}
+	if(ref $self) {
+		unshift @ranges, $self;
+	}
+
+	my @start = sort {$a<=>$b}
+	  map( { $_->start() } @ranges);
+	my @end   = sort {$a<=>$b}
+	  map( { $_->end()   } @ranges);
+
+	my $start = shift @start;
+	while( !defined $start ) {
+		$start = shift @start;
+	}
+
+	my $end = pop @end;
+
+	my $union_strand;  # Strand for the union range object.
+
+	foreach(@ranges) {
+		if(! defined $union_strand) {
+			$union_strand = $_->strand;
+			next;
+		} else {
+			if(not defined $_->strand or $union_strand ne $_->strand) {
+				$union_strand = 0;
+				last;
+			}
+		}
+	}
+	return unless $start or $end;
+	if( wantarray() ) {
+		return ( $start,$end,$union_strand);
+	} else {
+		return $self->new('-start' => $start,
+								'-end' => $end,
+								'-strand' => $union_strand
+							  );
+	}
+}
+
+=head2 overlap_extent
+
+ Title   : overlap_extent
+ Usage   : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
+ Function: Provides actual amount of overlap between two different
+           ranges
+ Example :
+ Returns : array of values containing the length unique to the calling
+           range, the length common to both, and the length unique to
+           the argument range
+ Args    : a range
+
+=cut
+
+sub overlap_extent{
+	my ($a,$b) = @_;
+
+	$a->throw("start is undefined") unless defined $a->start;
+	$a->throw("end is undefined") unless defined $a->end;
+	$b->throw("Not a Bio::RangeI object") unless  $b->isa('Bio::RangeI');
+	$b->throw("start is undefined") unless defined $b->start;
+	$b->throw("end is undefined") unless defined $b->end;
+
+	if( ! $a->overlaps($b) ) {
+		return ($a->length,0,$b->length);
+	}
+
+	my ($au,$bu) = (0, 0);
+	if( $a->start < $b->start ) {
+		$au = $b->start - $a->start;
+	} else {
+		$bu = $a->start - $b->start;
+	}
+
+	if( $a->end > $b->end ) {
+		$au += $a->end - $b->end;
+	} else {
+		$bu += $b->end - $a->end;
+	}
+
+	my $intersect = $a->intersection($b);
+	my $ie = $intersect->end;
+	my $is = $intersect->start;
+
+	return ($au,$ie-$is+1,$bu);
+}
+
+=head2 disconnected_ranges
+
+    Title   : disconnected_ranges
+    Usage   : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
+    Function: finds the minimal set of ranges such that each input range
+              is fully contained by at least one output range, and none of
+              the output ranges overlap
+    Args    : a list of ranges
+    Returns : a list of objects of the same type as the input
+              (conforms to RangeI)
+
+=cut
+
+sub disconnected_ranges {
+    my $self = shift;
+    if ($self eq "Bio::RangeI") {
+	$self = "Bio::Range";
+	$self->warn("calling static methods of an interface is deprecated; use $self instead");
+    }
+    my @inranges = @_;
+    if(ref $self) {
+	unshift @inranges, $self;
+    }
+
+    my @outranges = (); # disconnected ranges
+
+    # iterate through all input ranges $inrange,
+    # adding each input range to the set of output ranges @outranges,
+    # provided $inrange does not overlap ANY range in @outranges
+    # - if it does overlap an outrange, then merge it
+    foreach my $inrange (@inranges) {
+	my $intersects = 0;
+	my @outranges_new = ();
+	my @intersecting_ranges = ();
+
+        # iterate through all @outranges, testing if it intersects
+        # current $inrange; if it does, merge and add to list
+        # of @intersecting_ranges, otherwise add $outrange to
+        # the new list of outranges that do NOT intersect
+	for (my $i=0; $i<@outranges; $i++) {
+	    my $outrange = $outranges[$i];
+	    my $intersection = $inrange->intersection($outrange);
+	    if ($intersection) {
+		$intersects = 1;
+		my $union = $inrange->union($outrange);
+		push(@intersecting_ranges, $union);
+	    }
+	    else {
+		push(@outranges_new, $outrange);
+	    }
+	}
+	@outranges = @outranges_new;
+        # @outranges now contains a list of non-overlapping ranges
+        # that do not intersect the current $inrange
+
+	if (@intersecting_ranges) {
+	    if (@intersecting_ranges > 1) {
+		# this sf intersected > 1 range, which means that
+		# all the ranges it intersects should be joined
+		# together in a new range
+                my $merged_range =
+                  $self->union(@intersecting_ranges);
+		push(@outranges, $merged_range);
+
+	    }
+	    else {
+		# exactly 1 intersecting range
+		push(@outranges, @intersecting_ranges);
+	    }
+	}
+	else {
+	    # no intersections found - new range
+	    push(@outranges,
+		 $self->new('-start'=>$inrange->start,
+			    '-end'=>$inrange->end,
+			    '-strand'=>$inrange->strand,
+			   ));
+	}
+    }
+    return @outranges;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Analysis.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Analysis.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Analysis.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1217 @@
+# $Id: Analysis.pm,v 1.19.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module Bio::Restriction::Analysis
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# You may distribute this module under the same terms as perl itself
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::Analysis - cutting sequences with restriction
+enzymes
+
+=head1 SYNOPSIS
+
+  # analyze a DNA sequence for restriction enzymes
+  use Bio::Restriction::Analysis;
+  use Bio::PrimarySeq;
+  use Data::Dumper;
+
+  # get a DNA sequence from somewhere
+  my $seq = new Bio::PrimarySeq
+      (-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTC',
+       -primary_id => 'synopsis',
+       -molecule => 'dna');
+
+  # now start an analysis.
+  # this is using the default set of enzymes
+  my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);
+
+  # find unique cutters. This returns a
+  # Bio::Restriction::EnzymeCollection object
+  my $enzymes = $ra->unique_cutters;
+  print "Unique cutters: ", join (', ', 
+      map {$_->name} $enzymes->unique_cutters), "\n";
+
+  # AluI is one them. Where does it cut?
+  # This is will return an array of the sequence strings
+
+  my $enz = 'AluI';
+  my @frags = $ra->fragments($enz);
+  # how big are the fragments?
+  print "AluI fragment lengths: ", join(' & ', map {length $_} @frags), "\n";
+
+  # You can also bypass fragments and call sizes directly:
+  # to see all the fragment sizes
+  print "All sizes: ", join " ", $ra->sizes($enz), "\n";
+  # to see all the fragment sizes sorted by size like on a gel
+  print "All sizes, sorted ", join (" ", $ra->sizes($enz, 0, 1)), "\n";
+
+  # how many times does each enzyme cut
+  my $cuts = $ra->cuts_by_enzyme('BamHI');
+  print "BamHI cuts $cuts times\n";
+
+  # How many enzymes do not cut at all?
+  print "There are ", scalar $ra->zero_cutters->each_enzyme,
+        " enzymes that do not cut\n";
+
+  # what about enzymes that cut twice?
+  my $two_cutters = $ra->cutters(2);
+  print join (" ", map {$_->name} $two_cutters->each_enzyme),
+      " cut the sequence twice\n";
+
+  # what are all the enzymes that cut, and how often do they cut
+  printf "\n%-10s%s\n", 'Enzyme', 'Number of Cuts';
+  my $all_cutters = $ra->cutters;
+  map {
+      printf "%-10s%s\n", $_->name, $ra->cuts_by_enzyme($_->name)
+  } $all_cutters->each_enzyme;
+
+  # Finally, we can interact the restriction enzyme object by
+  # retrieving it from the collection object see the docs for
+  # Bio::Restriction::Enzyme.pm
+  my $enzobj = $enzymes->get_enzyme($enz);
+
+
+=head1 DESCRIPTION
+
+Bio::Restriction::Analysis describes the results of cutting a DNA
+sequence with restriction enzymes.
+
+To use this module you can pass a sequence object and optionally a
+Bio::Restriction::EnzymeCollection that contains the enzyme(s) to cut the
+sequences with. There is a default set of enzymes that will be loaded
+if you do not pass in a Bio::Restriction::EnzymeCollection.
+
+To cut a sequence, set up a Restriction::Analysis object with a sequence
+like this:
+
+  use Bio::Restriction::Analysis;
+  my $ra = Bio::Restriction::Analysis->new(-seq=>$seqobj);
+
+or
+
+  my $ra = Bio::Restriction::Analysis->new
+      (-seq=>$seqobj, -enzymes=>$enzs);
+
+Then, to get the fragments for a particular enzyme use this:
+
+  @fragments = $ra->fragments('EcoRI');
+
+Note that the naming of restriction enzymes is that the last numbers
+are usually Roman numbers (I, II, III, etc). You may want to use
+something like this:
+
+  # get a reference to an array of unique (single) cutters
+  $singles = $re->unique_cutters;
+  foreach my $enz ($singles->each_enzyme) {
+      @fragments = $re->fragments($enz);
+      ... do something here ...
+  }
+
+Note that if your sequence is circular, the first and last fragment
+will be joined so that they are the appropriate length and sequence
+for further analysis. This fragment will also be checked for cuts
+by the enzyme(s).  However, this will change the start of the
+sequence!
+
+There are two separate algorithms used depending on whether your
+enzyme has ambiguity. The non-ambiguous algoritm is a lot faster,
+and if you are using very large sequences you should try and use
+this algorithm. If you have a large sequence (e.g. genome) and 
+want to use ambgiuous enzymes you may want to make seperate
+Bio::Restriction::Enzyme objects for each of the possible
+alternatives and make sure that you do not set is_ambiguous!
+
+This version should correctly deal with overlapping cut sites
+in both ambiguous and non-ambiguous enzymes.
+
+I have tried to write this module with speed and memory in mind
+so that it can be effectively used for large (e.g. genome sized)
+sequence. This module only stores the cut positions internally,
+and calculates everything else on an as-needed basis. Therefore
+when you call fragment_maps (for example), there may be another
+delay while these are generated.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu, 
+Steve Chervitz, sac at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Rob Edwards.  Some of this work is Copyright (c)
+1997-2002 Steve A. Chervitz. All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::Enzyme>, 
+L<Bio::Restriction::EnzymeCollection>
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+package Bio::Restriction::Analysis;
+use Bio::Restriction::EnzymeCollection;
+use strict;
+use Data::Dumper;
+
+use vars qw ();
+use base qw(Bio::Root::Root);
+
+=head1 new
+
+ Title     : new
+ Function  : Initializes the restriction enzyme object
+ Returns   : The Restriction::Analysis object 
+ Arguments : 
+
+	     $re_anal->new(-seq=$seqobj, 
+                 -enzymes=>Restriction::EnzymeCollection object)
+	     -seq requires a Bio::PrimarySeq object
+	     -enzymes is optional.
+              If ommitted it will use the default set of enzymes
+
+This is the place to start. Pass in a sequence, and you will be able
+to get the fragments back out.  Several other things are available
+like the number of zero cutters or single cutters.
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($seq,$enzymes) =
+        $self->_rearrange([qw(
+                              SEQ
+                              ENZYMES
+                             )], @args);
+
+    $seq && $self->seq($seq);
+
+    $enzymes ?  $self->enzymes($enzymes)
+        :  ($self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new );
+
+    # keep track of status
+    $self->{'_cut'} = 0;
+    
+    # left these here because we want to reforce a _cut if someone
+    # just calls new
+    $self->{maximum_cuts} = 0;
+
+    $self->{'_number_of_cuts_by_enzyme'} = {};
+    $self->{'_number_of_cuts_by_cuts'} = {};
+    $self->{'_fragments'} = {};
+    $self->{'_cut_positions'} = {}; # cut position is the real position 
+    $self->{'_frag_map_list'} = {};
+
+    return $self;
+
+}
+
+=head1 Methods to set parameters
+
+=cut
+
+=head2 seq
+
+ Title    : seq
+ Usage    : $ranalysis->seq($newval);
+ Function : get/set method for the  sequence to be cut
+ Example  : $re->seq($seq);
+ Returns  : value of seq
+ Args     : A Bio::PrimarySeqI dna object (optional)
+
+=cut
+
+sub seq {
+     my $self = shift;
+     if (@_) {
+         my $seq = shift;
+         $self->throw('Need a sequence object ['. ref $seq.  ']')
+             unless $seq->isa('Bio::PrimarySeqI');
+         $self->throw('Need a DNA sequence object ['. $seq->alphabet.  ']')
+             unless $seq->alphabet eq 'dna';
+
+         $self->{'_seq'} = $seq;
+         $self->{'_cut'} = 0;
+     }
+     return $self->{'_seq'};
+}
+
+=head2 enzymes
+
+ Title    : enzymes
+ Usage    : $re->enzymes($newval)
+ Function : gets/Set the restriction enzyme enzymes
+ Example  : $re->enzymes('EcoRI')
+ Returns  : reference to the collection
+ Args     : an array of Bio::Restriction::EnzymeCollection and/or
+            Bio::Restriction::Enzyme objects
+
+
+The default object for this method is
+Bio::Restriction::EnzymeCollection.  However, you can also pass it a
+list of Bio::Restriction::Enzyme objects - even mixed with Collection
+objects.  They will all be stored into one collection.
+
+=cut
+
+sub enzymes {
+     my $self = shift;
+     if (@_) {
+         $self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new (-empty => 1)
+             unless $self->{'_enzymes'};
+         $self->{'_enzymes'}->enzymes(@_);
+         $self->{'_cut'} = 0;
+     }
+     return $self->{'_enzymes'};
+}
+
+
+=head1 Perform the analysis
+
+=cut
+
+=head2 cut
+
+ Title    : cut
+ Usage    : $re->cut()
+ Function : Cut the sequence with the enzymes
+ Example  : $re->cut(); $re->cut('single'); or $re->cut('multiple', $enzymecollection);
+ Returns  : $self
+ Args     : 'single' (optional), 'multiple' with enzyme collection.
+
+An explicit cut method is needed to pass arguments to it. 
+
+There are two varieties of cut. Single is the default, and need
+not be explicitly called. This cuts the sequence with each
+enzyme separately.
+
+Multiple cuts a sequence with more than one enzyme. You must pass
+it a Bio::Restriction::EnzymeCollection object of the set
+of enzymes that you want to use in the double digest. The results
+will be stored as an enzyme named "multiple_digest", so you can
+use all the retrieval methods to get the data.
+
+If you want to use the default setting there is no need to call cut
+directly. Every method in the class that needs output checks the
+object's internal status and recalculates the cuts if needed.
+
+Note: cut doesn't now re-initialize everything before figuring
+out cuts. This is so that you can do multiple digests, or add more
+data or whatever. You'll have to use new to reset everything.
+
+See also the comments in above about ambiguous and non-ambiguous
+sequences.
+
+=cut
+
+sub cut {
+    my ($self, $opt, $ec) = @_;
+
+    # for the moment I have left this as a separate routine so
+    # the user calls cuts rather than _cuts. This also initializes
+    # some stuff we need to use.
+  
+    $self->throw("A sequence must be supplied")
+        unless $self->seq;
+
+    if (uc($opt) eq "MULTIPLE") {
+      $self->throw("You must supply a separate enzyme collection for multiple digests") unless $ec;
+      $self->_multiple_cuts($ec); # multiple digests
+    } else {
+    # reset some of the things that we save
+       $self->{maximum_cuts} = 0;
+       $self->{'_number_of_cuts_by_enzyme'} = {};
+       $self->{'_number_of_cuts_by_cuts'} = {};
+       $self->{'_fragments'} = {};
+       $self->{'_cut_positions'} = {}; # cut position is the real position 
+       $self->{'_frag_map_list'} = {};
+       $self->_cuts;
+    } 
+    
+    $self->{'_cut'} = 1;
+    return $self;
+}
+
+=head2 mulitple_digest
+
+ Title     : multiple_digest
+ Function  : perform a multiple digest on a sequence
+ Returns   : $self so you can go and get any of the other methods
+ Arguments : An enzyme collection
+
+ Multiple digests can use 1 or more enzymes, and the data is stored
+ in as if it were an enzyme called multiple_digest. You can then
+ retrieve information about multiple digests from any of the other
+ methods.
+
+ You can use this method in place of $re->cut('multiple', $enz_coll);
+
+=cut
+
+sub multiple_digest {
+ my ($self, $ec)=@_;
+ return $self->cut('multiple', $ec);
+}
+
+=head1 Query the results of the analysis
+
+=cut
+
+=head2 positions
+
+  Title    : positions
+  Function : Retrieve the positions that an enzyme cuts at
+  Returns  : An array of the positions that an enzyme cuts at
+           : or an empty array if the enzyme doesn't cut
+  Arguments: An enzyme name to retrieve the positions for
+  Comments : The cut occurs after the base specified.
+
+=cut
+
+sub positions {
+    my ($self, $enz) = @_;
+    $self->cut unless $self->{'_cut'};
+    $self->throw('no enzyme selected to get positions for')
+        unless $enz;
+
+    return defined $self->{'_cut_positions'}->{$enz} ?
+        @{$self->{'_cut_positions'}->{$enz}} : 
+        ();
+}
+
+=head2 fragments
+
+  Title    : fragments
+  Function : Retrieve the fragments that we cut
+  Returns  : An array of the fragments retrieved. 
+  Arguments: An enzyme name to retrieve the fragments for
+
+For example this code will retrieve the fragments for all enzymes that
+cut your sequence
+
+  my $all_cutters = $analysis->cutters;
+  foreach my $enz ($$all_cutters->each_enzyme}) {
+      @fragments=$analysis->fragments($enz);
+  }
+
+=cut
+
+sub fragments {
+    my ($self, $enz) = @_;
+    $self->cut unless $self->{'_cut'};
+    $self->throw('no enzyme selected to get fragments for')
+        unless $enz;
+    my @fragments;
+    for ($self->fragment_maps($enz)) {push @fragments, $_->{seq}}
+    return @fragments;
+}
+
+=head2 fragment_maps
+
+  Title     : fragment_maps
+  Function  : Retrieves fragment sequences with start and end
+              points. Useful for feature construction.
+
+  Returns   : An array containing a hash reference for each fragment,
+              containing the start point, end point and DNA
+              sequence. The hash keys are 'start', 'end' and
+              'seq'. Returns an empty array if not defined.
+
+  Arguments : An enzyme name, enzyme object, 
+              or enzyme collection to retrieve the fragments for.
+
+If passes an enzyme collection it will return the result of a multiple
+digest. This : will also cause the special enzyme 'multiple_digest' to
+be created so you can get : other information about this multiple
+digest. (TMTOWTDI).
+
+There is a minor problem with this and $self-E<gt>fragments that I
+haven't got a good answer for (at the moment). If the sequence is not
+cut, do we return undef, or the whole sequence?
+
+For linear fragments it would be good to return the whole
+sequence. For circular fragments I am not sure.
+
+At the moment it returns the whole sequence with start of 1 and end of
+length of the sequence.  For example:
+
+  use Bio::Restriction::Analysis;
+  use Bio::Restriction::EnzymeCollection;
+  use Bio::PrimarySeq;
+
+  my $seq = new Bio::PrimarySeq
+      (-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATCCAAAAAAGAGTGAGCTTCTGAT',
+       -primary_id => 'synopsis',
+       -molecule => 'dna');
+
+  my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);
+
+  my @gel;
+  my @bam_maps = $ra->fragment_maps('BamHI');
+  foreach my $i (@bam_maps) {
+     my $start = $i->{start};
+     my $end = $i->{end};
+     my $sequence = $i->{seq};
+     push @gel, "$start--$sequence--$end";
+     @gel = sort {length $b <=> length $a} @gel;
+  }
+  print join("\n", @gel) . "\n";
+
+=cut
+
+sub fragment_maps {
+    my ($self, $enz) = @_;
+    $self->cut unless $self->{'_cut'};
+    $self->throw('no enzyme selected to get fragment maps for')
+        unless $enz;
+
+    # we are going to generate this on an as-needed basis rather than
+    # for every enzyme this should cut down on the amount of
+    # duplicated data we are trying to save in memory and make this
+    # faster and easier for large sequences, e.g. genome analysis
+
+    my @cut_positions;
+    if (ref $enz eq '') {
+        @cut_positions=@{$self->{'_cut_positions'}->{$enz}};
+    } elsif ($enz->isa("Bio::Restriction::EnzymeI")) {
+        @cut_positions=@{$self->{'_cut_positions'}->{$enz->name}};
+    } elsif ($enz->isa("Bio::Restriction::EnzymeCollection")) {
+        $self->cut('multiple', $enz);
+        @cut_positions=@{$self->{'_cut_positions'}->{'multiple_digest'}};
+    }
+
+    unless ($cut_positions[0]) {
+        # it doesn't cut
+        # return the whole sequence
+        # this should probably have the is_circular command
+        my %map=(
+                 'start'  => 1,
+                 'end'    => $self->{'_seq'}->length,
+                 'seq'    => $self->{'_seq'}->seq
+                );
+        push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);
+        return defined $self->{'_frag_map_list'}->{$enz} ?
+            @{$self->{'_frag_map_list'}->{$enz}} : ();
+    }
+
+    @cut_positions=sort {$a <=> $b} @cut_positions;
+    push my @cuts, $cut_positions[0];
+    foreach my $i (@cut_positions) {
+        push @cuts, $i if $i != $cuts[$#cuts];
+    }
+
+    my $start=1; my $stop; my %seq; my %stop;
+    foreach $stop (@cuts) {
+        $seq{$start}=$self->{'_seq'}->subseq($start, $stop);
+        $stop{$start}=$stop;
+        $start=$stop+1;
+    }
+    $stop=$self->{'_seq'}->length;
+    if ($start > $stop) {
+        # borderline case. The enzyme cleaved at the end of the sequence
+        # what do I do now?
+    }
+    else {
+         $seq{$start}=$self->{'_seq'}->subseq($start, $stop);
+         $stop{$start}=$stop;
+    }
+
+    if ($self->{'_seq'}->is_circular) {
+        # join the first and last fragments
+        $seq{$start}.=$seq{'1'};
+        delete $seq{'1'};
+        $stop{$start}=$stop{'1'};
+        delete $stop{'1'};
+    }
+
+    foreach my $start (sort {$a <=> $b} keys %seq) {
+        my %map=(
+                 'start'  => $start,
+                 'end'    => $stop{$start},
+                 'seq'    => $seq{$start}
+                );
+        push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);
+    }
+
+    return defined $self->{'_frag_map_list'}->{$enz} ?
+        @{$self->{'_frag_map_list'}->{$enz}} : ();
+}
+
+
+=head2 sizes
+
+  Title    : sizes
+  Function : Retrieves an array with the sizes of the fragments
+  Returns  : Array that has the sizes of the fragments ordered from 
+             largest to smallest like they would appear in a gel.
+  Arguments: An enzyme name to retrieve the sizes for is required and
+             kilobases to the nearest 0.1 kb, else it will be in
+             bp. If the optional third entry is set the results will
+             be sorted.
+
+This is designed to make it easy to see what fragments you should get
+on a gel!
+
+You should be able to do these:
+
+  # to see all the fragment sizes,
+  print join "\n", @{$re->sizes($enz)}, "\n";
+  # to see all the fragment sizes sorted
+  print join "\n", @{$re->sizes($enz, 0, 1)}, "\n";
+  # to see all the fragment sizes in kb sorted
+  print join "\n", @{$re->sizes($enz, 1, 1)}, "\n";
+
+=cut
+
+sub sizes {
+    my ($self, $enz, $kb, $sort) = @_;
+    $self->throw('no enzyme selected to get fragments for')
+        unless $enz;
+    $self->cut unless $self->{'_cut'};
+    my @frag; my $lastsite=0;
+    foreach my $site (@{$self->{'_cut_positions'}->{$enz}}) {
+      $kb ? push (@frag, (int($site-($lastsite))/100)/10)
+          : push (@frag, $site-($lastsite));
+      $lastsite=$site;
+    }
+    $kb ? push (@frag, (int($self->{'_seq'}->length-($lastsite))/100)/10)
+        : push (@frag, $self->{'_seq'}->length-($lastsite));
+    if ($self->{'_seq'}->is_circular) {
+       my $first=shift @frag;
+       my $last=pop @frag;
+       push @frag, ($first+$last);
+    }
+    $sort ? @frag = sort {$b <=> $a} @frag : 1;
+
+    return @frag;
+}
+
+=head1 How many times does enzymes X cut?
+
+=cut
+
+=head2 cuts_by_enzyme
+
+ Title     : cuts_by_enzyme
+ Function  : Return the number of cuts for an enzyme
+ Returns   : An integer with the number of times each enzyme cuts.
+             Returns 0 if doesn't cut or undef if not defined
+ Arguments : An enzyme name string
+
+
+=cut
+
+sub cuts_by_enzyme {
+    my ($self, $enz)=@_;
+
+    $self->throw("Need an enzyme name")
+        unless defined $enz;
+    $self->cut unless $self->{'_cut'};
+    return $self->{'_number_of_cuts_by_enzyme'}->{$enz};
+}
+
+=head1 Which enzymes cut the sequence N times?
+
+=cut
+
+=head2 cutters
+
+ Title     : cutters
+ Function  : Find enzymes that cut a given number of times
+ Returns   : a Bio::Restriction::EnzymeCollection
+ Arguments : 1. exact time or lower limit,
+                non-negative integer, optional
+             2. upper limit, non-negative integer,
+                larger or equalthan first, optional
+
+
+If no argumets are given, the method returns all enzymes that do cut
+the sequence. The argument zero, '0', is same as method
+zero_cutters().  The argument one, '1', corresponds to unique_cutters.
+If either of the limits is larger than number of cuts any enzyme cuts the
+sequence, the that limit is automagically lowered. The method max_cuts()
+gives the largest number of cuts.
+
+See Also : L<unique_cutters|unique_cutters>,
+L<zero_cutters|zero_cutters>, L<max_cuts|max_cuts>
+
+=cut
+
+sub cutters {
+    my ($self, $a, $z) = @_;
+
+    $self->cut unless $self->{'_cut'};
+
+    my ($start, $end);
+    if (defined $a) {
+        $self->throw("Need a non-zero integer [$a]")
+            unless $a =~ /^[+]?\d+$/;
+        $start = $a;
+    } else {
+        $start = 1;
+    }
+    $start = $self->{'maximum_cuts'} if $start > $self->{'maximum_cuts'};
+
+    if (defined $z) {
+        $self->throw("Need a non-zero integer no smaller than start [0]")
+            unless $z =~ /^[+]?\d+$/ and $z >= $a;
+        $end = $z;
+    }
+    elsif (defined $a) {
+        $end = $start;
+    } else {
+        $end = $self->{'maximum_cuts'};
+    }
+    $end = $self->{'maximum_cuts'} if $end > $self->{'maximum_cuts'};
+    my $set = new Bio::Restriction::EnzymeCollection(-empty => 1);
+
+    #return an empty set if nothing cuts
+    return $set unless $self->{'maximum_cuts'};
+
+    for (my $i=$start; $i<=$end; $i++) {
+        $set->enzymes( @{$self->{_number_of_cuts_by_cuts}->{$i}} )
+            if defined $self->{_number_of_cuts_by_cuts}->{$i};
+    }
+
+    return $set;
+}
+
+
+=head2 unique_cutters
+
+ Title     : unique_cutters
+ Function  : A special case if cutters() where enzymes only cut once
+ Returns   : a Bio::Restriction::EnzymeCollection
+ Arguments : -
+
+
+See also:  L<cutters>, L<zero_cutters>
+
+=cut
+
+sub unique_cutters {
+    shift->cutters(1);
+}
+
+=head2 zero_cutters
+
+ Title     : zero_cutters
+ Function  : A special case if cutters() where enzymes don't cut the sequence
+ Returns   : a Bio::Restriction::EnzymeCollection
+ Arguments : -
+
+See also:  L<cutters>, L<unique_cutters>
+
+=cut
+
+sub zero_cutters {
+    shift->cutters(0);
+}
+
+=head2 max_cuts
+
+ Title     : max_cuts
+ Function  : Find the most number of cuts
+ Returns   : The number of times the enzyme that cuts most cuts.
+ Arguments : None
+
+This is not a very practical method, but if you are curious...
+
+=cut
+
+sub max_cuts { return shift->{maximum_cuts} }
+
+=head1 Internal methods
+
+=cut
+
+=head2 _cuts
+
+ Title     : _cuts
+ Function  : Figures out which enzymes we know about and cuts the sequence.
+ Returns   : Nothing.
+ Arguments : None.
+ Comments  : An internal method. This will figure out where the sequence 
+             should be cut, and provide the appropriate results.
+
+=cut
+
+sub _cuts {
+    my $self = shift;
+
+    my $target_seq=uc $self->{'_seq'}->seq; # I have been burned on this before :)
+
+
+    # first, find out all the enzymes that we have
+    foreach my $enz ($self->{'_enzymes'}->each_enzyme) {
+        my @all_cuts;
+        my @others = $enz->others if $enz->can("others");
+        foreach my $enzyme ($enz, @others) {
+            my ($beforeseq, $afterseq)=$self->_enzyme_sites($enzyme);
+            # cut the sequence
+
+            # if the enzyme is ambiguous we need to use a regexp to find the cut site
+            # otherwise we can use index (much faster)
+
+            # All of these methods return references to arrays. 
+            # All of the arrays are positions in the DNA where the sequence is cut
+            # We will push everything into @all_cuts, and then deconvolute it
+            # and figure everything else out from there.
+	
+            my $cut_positions;
+            if ($enzyme->is_ambiguous) {
+	        $cut_positions= $self->_ambig_cuts($beforeseq, $afterseq, $target_seq, $enzyme);
+            } else {
+	        $cut_positions= $self->_nonambig_cuts($beforeseq, $afterseq, $target_seq, $enzyme);
+            }
+
+            push @all_cuts, @$cut_positions;
+
+            # deal with is_circular sequences
+            if ($self->{'_seq'}->is_circular) {
+                $cut_positions=$self->_circular($beforeseq, $afterseq, $enzyme);
+                push @all_cuts, @$cut_positions;
+            }
+
+            # we need to deal with non-palindromic enzymes separately
+            unless ($enzyme->is_palindromic) {
+   	        $cut_positions=$self->_non_pal_enz($target_seq, $enzyme);
+                push @all_cuts, @$cut_positions;
+            }
+        }
+
+	if (defined $all_cuts[0]) {
+            # now just remove any duplicate cut sites
+            @all_cuts = sort {$a <=> $b} @all_cuts;
+            push  @{$self->{'_cut_positions'}->{$enz->name}},  $all_cuts[0];
+            foreach my $i (@all_cuts) {
+                push @{$self->{'_cut_positions'}->{$enz->name}}, $i 
+                    if $i != ${$self->{'_cut_positions'}->{$enz->name}}[$#{$self->{'_cut_positions'}->{$enz->name}}];
+            }
+        } else {
+            # this just fixes an eror when @all_cuts is not defined!
+            @{$self->{'_cut_positions'}->{$enz->name}}=();
+	}
+
+        # note I have removed saving any other information except the
+        # cut_positions this should significantly decrease the amount
+        # of memory that is required for large sequences. It should
+        # also speed things up dramatically, because fragments and
+        # fragment maps are only calculated for those enzymes they are
+        # needed for.
+	
+        # finally, save minimal information about each enzyme
+	my $number_of_cuts=scalar @{$self->{'_cut_positions'}->{$enz->name}};
+        # now just store the number of cuts
+	$self->{_number_of_cuts_by_enzyme}->{$enz->name}=$number_of_cuts;
+        push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, $enz);
+        if ($number_of_cuts > $self->{maximum_cuts}) {
+            $self->{maximum_cuts}=$number_of_cuts;
+        }
+
+    }
+}
+
+=head2 _enzyme_sites
+
+ Title     : _enzyme_sites
+ Function  : An internal method to figure out the two sides of an enzyme
+ Returns   : The sequence before the cut and the sequence after the cut
+ Arguments : A Bio::Restriction::Enzyme object
+
+=cut
+
+sub _enzyme_sites {
+    my ($self, $enz)=@_;
+    # get the cut site
+    # I have reworked this so that it uses $enz->cut to get the site
+
+    my $site=$enz->cut;
+    # split it into the two fragments for the sequence before and after.
+    $site=0 unless defined $site;
+    # The following should not be an exception, both Type I and Type III
+	 # enzymes cut outside of their recognition sequences
+    #if ($site < 0 || $site > length($enz->string)) {
+    #   $self->throw("This is (probably) not your fault.\nGot a cut site of $site and a     # sequence of ".$enz->string);
+    # }
+
+    # the default values just stop an error from an undefined
+    # string. But they don't affect the split.
+    my ($beforeseq, $afterseq)= ('.', '.');
+
+    if ($site == 0) {
+       $afterseq=$enz->string;
+    }
+    elsif ($site == $enz->seq->length) {
+       $beforeseq=$enz->string;
+    }
+    else {
+       $beforeseq=$enz->seq->subseq(1, $site);
+       $afterseq=$enz->seq->subseq($site+1, $enz->seq->length);
+    }
+
+    # if the enzyme is ambiguous we need to convert this into a perl string
+    if ($enz->is_ambiguous) {
+       $beforeseq=$self->_expanded_string($beforeseq);
+       $afterseq =$self->_expanded_string($afterseq);
+    }
+
+    return ($beforeseq, $afterseq);
+}
+
+
+=head2 _non_pal_enz
+
+  Title    : _non_pal_enz
+  Function : Analyses non_palindromic enzymes for cuts in both ways
+  Returns  : A reference to an array of cut positions
+  Arguments: The sequence to check and the enzyme object
+
+=cut
+
+sub _non_pal_enz {
+    my ($self, $target_seq, $enz) =@_;
+    # add support for non-palindromic sequences
+    # the enzyme is not the same forwards and backwards
+    my $site=$enz->complementary_cut;
+    # we are going to rc the sequence, so complementary_cut becomes length-complementary_cut
+
+    my ($beforeseq, $afterseq)=('.', '.');
+
+    my $new_left_cut=$enz->seq->length-$site;
+    # there is a problem when this is actually zero
+    if ($new_left_cut == 0) {$afterseq=$enz->seq->revcom->seq}
+    elsif ($new_left_cut == $enz->seq->length) {$beforeseq=$enz->seq->revcom->seq}
+    else {
+       $beforeseq=$enz->seq->revcom->subseq(1, ($enz->seq->length-$site));
+       $afterseq=$enz->seq->revcom->subseq(($enz->seq->length-$site), $enz->seq->length);
+     }
+
+    # complementary cut is the position on the forward strand
+    # correct for reverse strand - I think this is right
+    my $results=[];
+    if ($enz->is_ambiguous) {
+          $results= $self->_ambig_cuts($beforeseq, $afterseq, $target_seq, $enz);
+    } else {
+          $results= $self->_nonambig_cuts($beforeseq, $afterseq, $target_seq, $enz);
+    }
+
+    # deal with is_circular
+    my $more_results=[];
+    $more_results=$self->_circular($beforeseq, $afterseq, $enz) 
+        if ($self->{'_seq'}->is_circular);
+    push my @all_cuts, (@$more_results, @$results);
+    return \@all_cuts;
+} 
+
+=head2 _ambig_cuts
+
+ Title     : _ambig_cuts
+ Function  : An internal method to localize the cuts in the sequence
+ Returns   : A reference to an array of cut positions
+ Arguments : The separated enzyme site, the target sequence, and the enzyme object
+ Comments  : This is a slow implementation but works for ambiguous sequences.
+             Whenever possible, _nonambig_cuts should be used as it is a lot faster.
+
+=cut
+
+sub _ambig_cuts {
+    my ($self, $beforeseq, $afterseq, $target_seq, $enz) = @_;
+    
+    # cut the sequence. This is done with split so we can use
+    # regexp. 
+    
+    my @cuts = split /($beforeseq)($afterseq)/i, $target_seq;
+    # now the array has extra elements --- the before and after!
+    # we have:
+    # element 0 sequence
+    # element 1 3' end
+    # element 2 5' end of next sequence
+    # element 3 sequence
+    # ....
+
+    # we need to loop through the array and add the ends to the
+    # appropriate parts of the sequence
+
+    my $i=0;
+    my @re_frags;
+    if ($#cuts) {           # there is >1 element
+        while ($i<$#cuts) {
+            my $joinedseq;
+            # the first sequence is a special case
+            if ($i == 0) {
+                $joinedseq=$cuts[$i].$cuts[$i+1];
+            } else {
+                $joinedseq=$cuts[$i-1].$cuts[$i].$cuts[$i+1];
+            }
+	    # now deal with overlapping sequences
+	    # we can do this through a regular regexp as we only
+	    # have a short fragment to look through
+
+	    while ($joinedseq =~ /$beforeseq$afterseq/) {
+                $joinedseq =~ s/^(.*?$beforeseq)($afterseq)/$2/;
+                push @re_frags, $1;
+	    }
+            push @re_frags, $joinedseq;
+            $i+=3;
+        }
+
+    # I don't think we want the last fragment in. It is messing up the _circular
+    # part of things. So I deleted this part of the code :)
+
+    } else {
+            # if we don't cut, leave the array empty
+	    return [];
+    } # the sequence was not cut.
+
+    # now @re_frags has the fragments of all the sequences
+    # but some people want to have this return the lengths
+    # of the fragments.
+
+    # in theory the actual cut sites should be the length
+    # of the fragments in @re_frags
+
+    # note, that now this is the only data that we are saving. We
+    # will have to go back add regenerate re_frags. The reason is
+    # that we can use this in _circular easier
+
+    my @cut_positions = map {length($_)} @re_frags;
+
+    # the cut positions are right now the lengths of the sequence, but
+    # we need to add them all onto each other
+
+    for (my $i=1; $i<=$#cut_positions; $i++) {
+     $cut_positions[$i]+=$cut_positions[$i-1];
+    }
+
+    # in one of those oddities in life, 2 fragments mean an enzyme cut once
+    # so $#re_frags is the number of cuts
+    return \@cut_positions;
+}
+
+
+=head2 _nonambig_cuts
+
+ Title     : _nonambig_cuts
+ Function  : Figures out which enzymes we know about and cuts the sequence.
+ Returns   : Nothing.
+ Arguments : The separated enzyme site, the target sequence, and the enzyme object
+
+An internal method. This will figure out where the sequence should be
+cut, and provide the appropriate results.  This is a much faster
+implementation because it doesn't use a regexp, but it can not deal
+with ambiguous sequences
+
+=cut
+
+sub _nonambig_cuts {
+    my ($self, $beforeseq, $afterseq, $target_seq, $enz) = @_;
+
+    if ($beforeseq eq ".") {$beforeseq = ''}
+    if ($afterseq  eq ".") {$afterseq  = ''}
+    my $index_posn=index($target_seq, $beforeseq.$afterseq);
+    return [] if ($index_posn == -1); # there is no match to the sequence
+
+    # there is at least one cut site
+    my @cuts;
+    while ($index_posn > -1) {
+	  push (@cuts, $index_posn+length($beforeseq));
+	  $index_posn=index($target_seq, $beforeseq.$afterseq, $index_posn+1);
+    }
+
+    return \@cuts;
+}
+
+
+=head2 _mulitple_cuts
+
+ Title     : _multiple_cuts
+ Function  : Figures out multiple digests
+ Returns   : An array of the cut sites for multiply digested DNA
+ Arguments : A Bio::Restriction::EnzymeCollection object
+ Comments  : Double digests is one subset of this, but you can use
+             as many enzymes as you want.
+
+=cut
+
+sub _multiple_cuts {
+    my ($self, $ec)=@_;
+    $self->cut unless $self->{'_cut'};
+
+    # now that we are using positions rather than fragments
+    # this is really easy
+    my @cuts;
+    foreach my $enz ($ec->each_enzyme) { 
+       push @cuts, @{$self->{'_cut_positions'}->{$enz->name}}
+           if defined $self->{'_cut_positions'}->{$enz->name};
+    }
+    @{$self->{'_cut_positions'}->{'multiple_digest'}}=sort {$a <=> $b} @cuts;
+
+    my $number_of_cuts;
+
+    $number_of_cuts=scalar @{$self->{'_cut_positions'}->{'multiple_digest'}};
+    $self->{_number_of_cuts_by_enzyme}->{'multiple_digest'}=$number_of_cuts;
+    push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, 'multiple_digest');
+    if ($number_of_cuts > $self->{maximum_cuts}) {
+        $self->{maximum_cuts}=$number_of_cuts;
+    }
+}
+
+
+=head2 _circular
+
+ Title     : _circular
+ Function  : Deals with circular sequences
+ Returns   : Nothing.
+ Arguments : None.
+
+There are two problems with circular sequences.
+
+  1. When you cut a sequence and rejoin fragments you could generate
+  new cut sites.
+
+  2. There could be a cut site at the end of the sequence.
+
+I think these may be the same problem, and so we're working on #2 first!
+
+=cut
+
+sub _circular {
+    my ($self, $beforeseq, $afterseq, $enz) = @_;
+    my $target_seq=uc $self->{'_seq'}->seq; # I have been burned on this before :)
+
+    # the approach I am taking is to find out the longest enzyme in the collection
+    # (I'll have to add a new function in enzyme collection for this)
+    # and then add more than that sequence from the end of the sequence to the start
+    # of the sequence, and map the new cut sites for each of the enzymes.
+
+    # The cut sites that we are interested in must be within the length of the 
+    # enzyme sequence from the start or the end.
+
+    my $longest_enz=$self->{'_enzymes'}->longest_cutter;
+    my $longest_cut=$longest_enz->recognition_length;
+    # this is an error that I don't want to deal with at the moment
+    $self->throw("Crap. The longest recognition site ($longest_cut) is longer than the".
+      " length of the sequence") if ($longest_cut > $self->{'_seq'}->length);
+
+   # newseq is just the last part of the sequence and the first part of the sequence
+   # we don't want to go through and check the whole sequence again
+
+   my ($first, $last) =
+       (substr($target_seq, 0, $longest_cut),substr($target_seq, -$longest_cut));
+   my $newseq=$last.$first;
+
+   # now find the cut sites
+   # if the enzyme is ambiguous we need to use a regexp to find the cut site
+   # otherwise we can use index (much faster)
+   my $cut_positions;
+   if ($enz->is_ambiguous) {
+      $cut_positions= $self->_ambig_cuts($beforeseq, $afterseq, $newseq, $enz);
+   } else {
+      $cut_positions=$self->_nonambig_cuts($beforeseq, $afterseq, $newseq, $enz);
+   }
+
+   # the enzyme doesn't cut in the new fragment - likely to be default	
+   return [] if (!$cut_positions);
+
+   # now we are going to add things to _cut_positions
+   # in this shema it doesn't matter if the site is there twice - 
+   # we will take care of that later. Because we are using position
+   # rather than frag or anything else, we can just
+   # remove duplicates.
+   my @circ_cuts;
+   foreach my $cut (@$cut_positions) {
+    if ($cut == length($last)) {
+     # the cut is actually at position 0, but we're going to call this the
+     # length of the sequence so we don't confuse no cuts with a 0 cut
+     push (@circ_cuts, $self->{'_seq'}->length);
+    }
+    elsif ($cut < length($last)) {
+     # the cut is before the end of the sequence
+     # there is VERY likely to be an off by one error here
+     push (@circ_cuts, $self->{'_seq'}->length - (length($last) - $cut));
+    }
+    else {
+     # the cut is at the start of the sequence (position >=1)
+     # there is VERY likely to be an off by one error here
+     # note, we put this at the beginning of the array rather than the end!
+     unshift (@circ_cuts, $cut-length($last));
+    }
+   }
+   return \@circ_cuts;
+}
+
+
+
+
+
+=head2 _expanded_string
+
+ Title     : _expanded_string
+ Function  : Expand nucleotide ambiguity codes to their representative letters
+ Returns   : The full length string
+ Arguments : The string to be expanded.
+
+Stolen from the original RestrictionEnzyme.pm
+
+=cut
+
+
+sub _expanded_string {
+    my ($self, $str) = @_;
+
+    $str =~ s/N|X/\./g;
+    $str =~ s/R/\[AG\]/g;
+    $str =~ s/Y/\[CT\]/g;
+    $str =~ s/S/\[GC\]/g;
+    $str =~ s/W/\[AT\]/g;
+    $str =~ s/M/\[AC\]/g;
+    $str =~ s/K/\[TG\]/g;
+    $str =~ s/B/\[CGT\]/g;
+    $str =~ s/D/\[AGT\]/g;
+    $str =~ s/H/\[ACT\]/g;
+    $str =~ s/V/\[ACG\]/g;
+
+    return $str;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiCut.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiCut.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiCut.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,168 @@
+# $Id: MultiCut.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#------------------------------------------------------------------
+#
+# BioPerl module Bio::Restriction::Enzyme::MultiCut
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::Enzyme::MultiCut - A single restriction endonuclease
+
+=head1 SYNOPSIS
+
+  # set up a single restriction enzyme. This contains lots of
+  # information about the enzyme that is generally parsed from a
+  # rebase file and can then be read back
+
+  use Bio::Restriction::Enzyme;
+
+
+=head1 DESCRIPTION
+
+This module defines a restriction endonuclease class where one object
+represents one of the distinct recognition sites for that enzyme. The
+method L<others|others> stores references to other objects with
+alternative sites.
+
+In this schema each object within an EnzymeCollection can be checked
+for matching a sequence.
+
+
+REBASE report notation C<Bsp24I (8/13)GACNNNNNNTGG(12/7)> means:
+
+
+   Bsp24I
+             5'      ^NNNNNNNNGACNNNNNNTGGNNNNNNNNNNNN^   3'
+             3' ^NNNNNNNNNNNNNCTGNNNNNNACCNNNNNNN^        5'
+
+
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Rob Edwards.
+
+Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
+Rights Reserved.  This module is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::Enzyme>, L<Bio::Restriction::Analysis>, 
+L<Bio::Restriction::EnzymeCollection>
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+package Bio::Restriction::Enzyme::MultiCut;
+use strict;
+
+use Data::Dumper;
+
+use vars qw ();
+use base qw(Bio::Restriction::Enzyme);
+
+
+=head2 new
+
+ Title     : new
+ Function
+ Function  : Initializes the enzyme object
+ Returns   : The Restriction::Enzyme::MultiCut object
+ Argument  : 
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($others) =
+            $self->_rearrange([qw(
+                                  OTHERS
+                                 )], @args);
+
+    $others && $self->others($others);
+    return $self;
+}
+
+=head2 others
+
+ Title     : others
+ Usage     : $re->vendor(@list_of_companies);
+ Function  : Gets/Sets the a list of companies that you can get the enzyme from.
+             Also sets the commercially_available boolean
+ Arguments : A reference to an array containing the names of companies
+             that you can get the enzyme from
+ Returns   : A reference to an array containing the names of companies
+             that you can get the enzyme from
+
+Added for compatibility to REBASE
+
+=cut
+
+sub others {
+    my $self = shift;
+    push @{$self->{_others}}, @_ if @_;
+    return @{$self->{'_others'}};
+}
+
+
+=head2 purge_others
+
+ Title     : purge_others
+ Usage     : $re->purge_references();
+ Function  : Purges the set of references for this enzyme
+ Arguments : 
+ Returns   : 
+
+=cut
+
+sub purge_others {
+    my ($self) = shift;
+    $self->{_others} = [];
+
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiSite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiSite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme/MultiSite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,158 @@
+# $Id: MultiSite.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#------------------------------------------------------------------
+#
+# BioPerl module Bio::Restriction::Enzyme::MultiSite
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::Enzyme::MultiSite - A single restriction endonuclease
+
+=head1 SYNOPSIS
+
+  # set up a single restriction enzyme. This contains lots of
+  # information about the enzyme that is generally parsed from a
+  # rebase file and can then be read back
+
+  use Bio::Restriction::Enzyme;
+
+
+=head1 DESCRIPTION
+
+This module is used for restriction enzymes that recogonize more than
+one site. There are some enzymes that recognize sites that cannot be
+represented by the ambiguous genetic code. For example, M.PhiBssHII
+recognizes the sites: ACGCGT,CCGCGG,RGCGCY,RCCGGY, and GCGCGC
+
+Each site gets its own object that Bio::Restriction::Enzyme will
+refer to. Each also correlates with the other sites using the 
+method L<others|others> which stores references to other objects 
+with alternative sites.
+
+In this schema each object within an EnzymeCollection can be checked
+for matching a sequence.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Rob Edwards.
+
+Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
+Rights Reserved.  This module is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::Enzyme>, L<Bio::Restriction::Analysis>, 
+L<Bio::Restriction::EnzymeCollection>
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+package Bio::Restriction::Enzyme::MultiSite;
+use strict;
+
+use Data::Dumper;
+
+use vars qw ();
+use base qw(Bio::Restriction::Enzyme);
+
+=head2 new
+
+ Title     : new
+ Function
+ Function  : Initializes the enzyme object
+ Returns   : The Restriction::Enzyme::MultiSite object
+ Argument  : 
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($others) =
+            $self->_rearrange([qw(
+                                  OTHERS
+                                 )], @args);
+
+    $others && $self->others($others);
+    return $self;
+}
+
+=head2 others
+
+ Title     : others
+ Usage     : $re->others(@others);
+ Function  : Gets/Sets the a list of other sites that this enzyme recoginizes
+ Arguments : An array containing the other Bio::Restriction::Enzyme::MultiSite
+             objects.
+ Returns   : An array containing the other Bio::Restriction::Enzyme::MultiSite
+             objects.
+
+=cut
+
+sub others {
+    my $self = shift;
+    push @{$self->{_others}}, @_ if @_;
+    return @{$self->{'_others'}};
+}
+
+
+=head2 purge_others
+
+ Title     : purge_others
+ Usage     : $re->purge_references();
+ Function  : Purges the set of references for this enzyme
+ Arguments : 
+ Returns   : 
+
+=cut
+
+sub purge_others {
+    my ($self) = shift;
+    $self->{_others} = [];
+
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/Enzyme.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1394 @@
+# $Id: Enzyme.pm,v 1.16.4.2 2006/11/17 09:32:41 sendu Exp $
+#------------------------------------------------------------------
+#
+# BioPerl module Bio::Restriction::Enzyme
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::Enzyme - A single restriction endonuclease
+(cuts DNA at specific locations)
+
+=head1 SYNOPSIS
+
+  # set up a single restriction enzyme. This contains lots of
+  # information about the enzyme that is generally parsed from a
+  # rebase file and can then be read back
+
+  use Bio::Restriction::Enzyme;
+
+  # define a new enzyme with the cut sequence
+  my $re=new Bio::Restriction::Enzyme
+      (-enzyme=>'EcoRI', -seq=>'G^AATTC');
+
+  # once the sequence has been defined a bunch of stuff is calculated
+  # for you:
+
+  #### PRECALCULATED
+
+  # find where the enzyme cuts after ...
+  my $ca=$re->cut;
+
+  # ... and where it cuts on the opposite strand
+  my $oca = $re->complementary_cut;
+
+  # get the cut sequence string back.
+  # Note that site will return the sequence with a caret
+  my $with_caret=$re->site; #returns 'G^AATTC';
+
+  # but it is also a Bio::PrimarySeq object ....
+  my $without_caret=$re->seq; # returns 'GAATTC';
+  # ... and so does string
+  $without_caret=$re->string; #returns 'GAATTC';
+
+  # what is the reverse complement of the cut site
+  my $rc=$re->revcom; # returns 'GAATTC';
+
+  # now the recognition length. There are two types:
+  #   recognition_length() is the length of the sequence
+  #   cutter() estimate of cut frequency
+
+  my $recog_length = $re->recognition_length; # returns 6
+  # also returns 6 in this case but would return 
+  # 4 for GANNTC and 5 for RGATCY (BstX2I)!
+  $recog_length=$re->cutter; 
+
+  # is the sequence a palindrome  - the same forwards and backwards
+  my $pal= $re->palindromic; # this is a boolean
+
+  # is the sequence blunt (i.e. no overhang - the forward and reverse
+  # cuts are the same)
+  print "blunt\n" if $re->overhang eq 'blunt';
+
+  # Overhang can have three values: "5'", "3'", "blunt", and undef
+  # Direction is very important if you use Klenow!
+  my $oh=$re->overhang;
+
+  # what is the overhang sequence
+  my $ohseq=$re->overhang_seq; # will return 'AATT';
+
+  # is the sequence ambiguous - does it contain non-GATC bases?
+  my $ambig=$re->is_ambiguous; # this is boolean
+
+  print "Stuff about the enzyme\nCuts after: $ca\n",
+        "Complementary cut: $oca\nSite:\n\t$with_caret or\n",
+        "\t$without_caret\n";
+  print "Reverse of the sequence: $rc\nRecognition length: $recog_length\n",
+        "Is it palindromic? $pal\n";
+  print "The overhang is $oh with sequence $ohseq\n",
+        "And is it ambiguous? $ambig\n\n";
+
+
+  ### THINGS YOU CAN SET, and get from rich REBASE file
+
+  # get or set the isoschizomers (enzymes that recognize the same
+  # site)
+  $re->isoschizomers('PvuII', 'SmaI'); # not really true :)
+  print "Isoschizomers are ", join " ", $re->isoschizomers, "\n";
+
+  # get or set the methylation sites
+  $re->methylation_sites(2); # not really true :)
+  print "Methylated at ", join " ", keys %{$re->methylation_sites},"\n";
+
+  #Get or set the source microbe
+  $re->microbe('E. coli');
+  print "It came from ", $re->microbe, "\n";
+
+  # get or set the person who isolated it
+  $re->source("Rob"); # not really true :)
+  print $re->source, " sent it to us\n";
+
+  # get or set whether it is commercially available and the company
+  # that it can be bought at
+  $re->vendors('NEB'); # my favorite
+  print "Is it commercially available :";
+  print $re->vendors ? "Yes" : "No";
+  print " and it can be got from ", join " ", 
+      $re->vendors, "\n";
+
+  # get or set a reference for this
+  $re->reference('Edwards et al. J. Bacteriology');
+  print "It was not published in ", $re->reference, "\n";
+
+  # get or set the enzyme name
+  $re->name('BamHI');
+  print "The name of EcoRI is not really ", $re->name, "\n";
+
+
+=head1 DESCRIPTION
+
+This module defines a single restriction endonuclease.  You can use it
+to make custom restriction enzymes, and it is used by
+Bio::Restriction::IO to define enzymes in the New England Biolabs
+REBASE collection.
+
+Use Bio::Restriction::Analysis to figure out which enzymes are available
+and where they cut your sequence.
+
+
+=head1 RESTRICTION MODIFICATION SYSTEMS
+
+At least three geneticaly and biochamically distinct restriction
+modification systems exist. The cutting components of them are known
+as restriction endonuleases.  The three systems are known by roman
+numerals: Type I, II, and III restriction enzymes.
+
+REBASE format 'cutzymes'(#15) lists enzyme type in its last field. The
+categories there do not always match the the following short
+descriptions of the enzymes types. See
+http://it.stlawu.edu/~tbudd/rmsyst.html for a better overview.
+
+
+=head2 TypeI
+
+Type I systems recognize a bipartite asymetrical sequence of 5-7 bp:
+
+  ---TGA*NnTGCT--- * = methylation sites
+  ---ACTNnA*CGA--- n = 6 for EcoK, n = 8 for EcoB
+
+The cleavage site is roughly 1000 (400-7000) base pairs from the
+recognition site.
+
+=head2 TypeII
+
+The simplest and most common (at least commercially).
+
+Site recognition is via short palindromic base sequences that are 4-6
+base pairs long. Cleavage is at the recognition site (but may
+occasionally be just adjacent to the palindromic sequence, usually
+within) and may produce blunt end termini or staggered, "sticky
+end" termini.
+
+=head2 TypeIII
+
+The recognition site is a 5-7 bp asymmetrical sequence. Cleavage is
+ATP dependent 24-26 base pairs downstream from the recognition site
+and usually yields staggered cuts 2-4 bases apart.
+
+
+=head1 COMMENTS
+
+I am trying to make this backwards compatible with
+Bio::Tools::RestrictionEnzyme.  Undoubtedly some things will break,
+but we can fix things as we progress.....!
+
+I have added another comments section at the end of this POD that
+discusses a couple of areas I know are broken (at the moment)
+
+
+=head1 TO DO
+
+=over 2
+
+=item *
+
+Convert vendors touse full names of companies instead of code
+
+=item *
+
+Add regular expression based matching to vendors
+
+=item *
+
+Move away from the archaic ^ notation for cut sites. Ideally
+I'd totally like to remove this altogether, or add a method
+that adds it in if someone really wants it. We should be
+fixed on a sequence, number notation.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Peter Blaiklock, pblaiklo at restrictionmapper.org
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Rob Edwards.
+
+Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
+Rights Reserved.  This module is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::Analysis>, 
+L<Bio::Restriction::EnzymeCollection>, L<Bio::Restriction::IO>
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+package Bio::Restriction::Enzyme;
+use strict;
+
+use Bio::PrimarySeq;
+
+use Data::Dumper;
+
+use vars qw (%TYPE);
+use base qw(Bio::Root::Root Bio::Restriction::EnzymeI);
+
+BEGIN {
+    my %TYPE = (I => 1, II => 1, III => 1);
+}
+
+=head2 new
+
+ Title     : new
+ Function
+ Function  : Initializes the Enzyme object
+ Returns   : The Restriction::Enzyme object
+ Argument  : A standard definition can have several formats. For example:
+	     $re->new(-enzyme='EcoRI', -seq->'GAATTC' -cut->'1')
+             Or, you can define the cut site in the sequence, for example
+	     $re->new(-enzyme='EcoRI', -seq->'G^AATTC'), but you must use a caret
+	     Or, a sequence can cut outside the recognition site, for example
+	     $re->new(-enzyme='AbeI', -seq->'CCTCAGC' -cut->'-5/-2')
+
+	     Other arguments:
+	     -isoschizomers=>\@list  a reference to an array of
+              known isoschizomers
+	     -references=>$ref a reference to the enzyme
+	     -source=>$source the source (person) of the enzyme
+	     -commercial_availability=>@companies a list of companies
+              that supply the enzyme
+	     -methylation_site=>\%sites a reference to hash that has
+              the position as the key and the type of methylation
+              as the value
+
+A Restriction::Enzyme object manages its recognition sequence as a
+Bio::PrimarySeq object.
+
+The minimum requirement is for a name and a sequence.
+
+This will create the restriction enzyme object, and define several
+things about the sequence, such as palindromic, size, etc.
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($name,$enzyme,$site,$seq,$cut,$complementary_cut, $is_prototype, $prototype,
+        $isoschizomers, $meth, $microbe, $source, $vendors, $references) =
+            $self->_rearrange([qw(
+                                  NAME
+                                  ENZYME
+                                  SITE
+                                  SEQ
+                                  CUT
+                                  COMPLEMENTARY_CUT
+                                  IS_PROTOTYPE
+                                  PROTOTYPE
+                                  ISOSCHIZOMERS
+                                  METHYLATION_SITES
+                                  MICROBE
+                                  SOURCE
+                                  VENDORS
+                                  REFERENCES
+                                 )], @args);
+
+
+
+
+    $self->{_isoschizomers} = ();
+    $self->{_methylation_sites} = {};
+    $self->{_vendors} = ();
+    $self->{_references} = ();
+
+    $name && $self->name($name);
+    $enzyme && $self->name($enzyme);
+    $site && $self->site($site);
+    $seq && $self->site($seq);
+    $self->throw('At the minimum, you must define a name and '.
+                 'recognition site for the restriction enzyme')
+        unless $self->{'_name'} && $self->{'_seq'};
+
+
+    defined $cut && $self->cut($cut);
+    $complementary_cut && $self->complementary_cut($complementary_cut);
+    $is_prototype && $self->is_prototype($is_prototype);
+    $prototype && $self->prototype($prototype);
+    $isoschizomers && $self->isoschizomers($isoschizomers);
+    $meth && $self->methylation_sites($meth);
+    $microbe && $self->microbe($microbe);
+    $source && $self->source($source);
+    $vendors && $self->vendors($vendors);
+    $references && $self->references($references);
+
+    return $self;
+}
+
+=head1 Essential methods
+
+=cut
+
+=head2 name
+
+ Title    : name
+ Usage    : $re->name($newval)
+ Function : Gets/Sets the restriction enzyme name
+ Example  : $re->name('EcoRI')
+ Returns  : value of name
+ Args     : newvalue (optional)
+
+This will also clean up the name. I have added this because some
+people get confused about restriction enzyme names.  The name should
+be One upper case letter, and two lower case letters (because it is
+derived from the organism name, eg.  EcoRI is from E. coli). After
+that it is all confused, but the numbers should be roman numbers not
+numbers, therefore we'll correct those. At least this will provide
+some standard, I hope.
+
+=cut
+
+sub name{
+    my ($self, $name)=@_;
+
+    if ($name) {                # correct and set the name
+        my $old_name = $name;
+
+        # remove spaces. Some people write HindIII as Hind III
+        $name =~ s/\s+//g;
+        # change TAILING ones to I's
+        if ($name =~ m/(1+)$/) {
+            my $i = 'I' x length($1);
+            $name =~ s/1+$/$i/;
+        }
+
+        # make the first letter upper case
+        $name =~ s/^(\w)/uc($1)/e;
+
+        unless ($name eq $old_name) {
+            # we have changed the name, so send a warning
+            $self->warn("The enzyme name $old_name was changed to $name");
+        }
+        $self->{'_name'} = $name;
+    }
+    return $self->{'_name'};
+}
+
+
+=head2 site
+
+ Title     : site
+ Usage     : $re->site();
+ Function  : Gets/sets the recognition sequence for the enzyme.
+ Example   : $seq_string = $re->site();
+ Returns   : String containing recognition sequence indicating
+           : cleavage site as in  'G^AATTC'.
+ Argument  : n/a
+ Throws    : n/a
+
+
+Side effect: the sequence is always converted to upper case.
+
+The cut site can also be set by using methods L<cut|cut> and
+L<complementary_cut|complementary_cut>.
+
+This will pad out missing sequence with N's. For example the enzyme
+Acc36I cuts at ACCTGC(4/8). This will be returned as ACCTGCNNNN^
+
+Note that the common notation ACCTGC(4/8) means that the forward
+strand cut is four nucleotides after the END of the recognition
+site. The forward cut() in the coordinates used here in Acc36I
+ACCTGC(4/8) is at 6+4 i.e. 10.
+
+** This is the main setable method for the recognition site.
+
+=cut
+
+sub site {
+    my ($self, $site) = @_;
+    if ( $site ) {
+
+        $self->throw("Unrecognized characters in site: [$site]")
+            if $site =~ /[^ATGCMRWSYKVHDBN\^]/i;
+        # we may have to redefine this if there is a ^ in the sequence
+
+        # first, check and see if we have a cut site in the sequence
+        # if so, find the position, and set the target sequence and cut site
+
+        $self->{'_site'} = $site;
+
+        my ($first, $second) = $site =~ /(.*)\^(.*)/;
+        $site = "$1$2" if defined $first;
+        $self->{'_site'} = $site;
+
+
+        # now set the recognition site as a new Bio::PrimarySeq object
+        # we need it before calling cut() and complementary_cut()
+        $self->{_seq} = new Bio::PrimarySeq(-id=>$self->name,
+                                            -seq=>$site,
+                                            -verbose=>$self->verbose,
+                                            -alphabet=>'dna');
+
+        if (defined $first) {
+            $self->cut(length $first);
+            $self->complementary_cut(length $second);
+        $self->revcom_site($self->{_seq}->revcom->seq);
+        }
+    }
+    return $self->{'_site'};
+}
+
+=head2 revcom_site
+
+ Title     : revcom_site
+ Usage     : $re->revcom_site();
+ Function  : Gets/sets the complementary recognition sequence for the enzyme.
+ Example   : $seq_string = $re->revcom_site();
+ Returns   : String containing recognition sequence indicating
+           : cleavage site as in  'G^AATTC'.
+ Argument  : Sequence of the site
+ Throws    : n/a
+
+This is the same as site, except it returns the revcom site. For
+palindromic enzymes these two are identical. For non-palindromic
+enzymes they are not!
+
+See also L<site|site> above.
+
+=cut
+
+sub revcom_site {
+    my ($self, $site)=@_;
+    if ($self->is_palindromic) {
+      $self->{'_revcom_site'}=$self->{'_site'};
+      return $self->{'_revcom_site'};
+    }
+    if ($site) {
+        $self->throw("Unrecognized characters in revcom site: [$site]")
+            if $site =~ /[^ATGCMRWSYKVHDBN\^]/i;
+	
+        # we may have to redefine this if there is a ^ in the sequence
+
+        # first, check and see if we have a cut site in the sequence
+        # if so, find the position, and set the target sequence and cut site
+        my $pos=$self->complementary_cut;
+        $site =~ s/(.{$pos})/$1\^/;
+        $self->{'_revcom_site'} = $site;
+
+    }
+    unless ($self->{'_revcom_site'}) {
+       my $revcom=$self->revcom;
+       my $cc=$self->complementary_cut;
+       my $hat=length($revcom)-$cc+1; # we need it on the other strand!
+       if ($cc > length($revcom)) {
+        my $pad= "N" x ($cc-length($revcom));
+	$revcom = $pad. $revcom;
+	$hat=length($revcom)-$cc+1;
+       }
+       elsif ($cc < 0) {
+        my $pad = "N" x -$cc;
+	$revcom .= $pad;
+	$hat=length($revcom);
+       }
+       $revcom =~ s/(.{$hat})/$1\^/;
+       $self->{'_revcom_site'}=$revcom;
+   }
+
+    return $self->{'_revcom_site'};
+}
+
+=head2 cut
+
+ Title     : cut
+ Usage     : $num = $re->cut(1);
+ Function  : Sets/gets an integer indicating the position of cleavage
+             relative to the 5' end of the recognition sequence in the
+             forward strand.
+
+             For type II enzymes, sets the symmetrically positioned
+             reverse strand cut site by calling complementary_cut().
+
+ Returns   : Integer, 0 if not set
+ Argument  : an integer for the forward strand cut site (optional)
+
+Note that the common notation ACCTGC(4/8) means that the forward
+strand cut is four nucleotides after the END of the recognition
+site. The forwad cut in the coordinates used here in Acc36I
+ACCTGC(4/8) is at 6+4 i.e. 10.
+
+Note that REBASE uses notation where cuts within symmetic sites are
+marked by '^' within the forward sequence but if the site is
+asymmetric the parenthesis syntax is used where numbering ALWAYS
+starts from last nucleotide in the forward strand. That's why AciI has
+a site usually written as CCGC(-3/-1) actualy cuts in
+
+  C^C G C
+  G G C^G
+
+In our notation, these locations are 1 and 3.
+
+
+The cuts locations in the notation used are relative to the first
+(non-N) nucleotide of the reported forward strand of the recognition
+sequence. The following diagram numbers the phosphodiester bonds
+(marked by + ) which can be cut by the restriction enzymes:
+
+                           1   2   3   4   5   6   7   8  ...
+     N + N + N + N + N + G + A + C + T + G + G + N + N + N
+  ... -5  -4  -3  -2  -1
+
+
+=cut
+
+sub cuts_after {
+    shift->cut(@_);
+}
+
+sub cut {
+     my ($self, $value) = @_;
+     if (defined $value) {
+         $self->throw("The cut position needs to be an integer [$value]")
+             unless $value =~ /[-+]?\d+/;
+         $self->{'_cut'} = $value;
+
+         $self->complementary_cut(length ($self->seq->seq) - $value )
+             if $self->type eq 'II';
+
+         if (length ($self->{_site}) < $value ) {
+             my $pad_length = $value - length $self->{_site};
+             $self->{_site} .= 'N' x $pad_length;
+         }
+         $self->{_site} =
+             substr($self->{_site}, 0, $value). '^'. substr($self->{_site}, $value)
+                 unless $self->{_site} =~ /\^/;
+     }
+     return $self->{'_cut'} || 0;
+}
+
+
+
+=head2 complementary_cut
+
+ Title     : complementary_cut
+ Usage     : $num = $re->complementary_cut('1');
+ Function  : Sets/Gets an integer indicating the position of cleavage
+           : on the reverse strand of the restriction site.
+ Returns   : Integer
+ Argument  : An integer (optional)
+ Throws    : Exception if argument is non-numeric.
+
+This method determines the cut on the reverse strand of the sequence.
+For most enzymes this will be within the sequence, and will be set
+automatically based on the forward strand cut, but it need not be.
+
+B<Note> that the returned location indicates the location AFTER the
+first non-N site nucleotide in the FORWARD strand.
+
+=cut
+
+sub complementary_cut {
+    my ($self, $num)=@_;
+
+    if (defined $num) {
+        $self->throw("The cut position needs to be an integer [$num]")
+            unless $num =~ /[-+]?\d+/;
+        $self->{'_rc_cut'} = $num;
+    }
+    return $self->{'_rc_cut'} || 0;
+}
+
+
+=head1 Read only (usually) recognition site descriptive methods
+
+=cut
+
+=head2 type
+
+ Title     : type
+ Usage     : $re->type();
+ Function  : Get/set the restriction system type
+ Returns   : 
+ Argument  : optional type: ('I'|II|III)
+
+Restriction enzymes have been catezorized into three types. Some
+REBASE formats give the type, but the following rules can be used to
+classify the known enzymes:
+
+=over 4
+
+=item 1
+
+Bipartite site (with 6-8 Ns in the middle and the cut site
+is E<gt> 50 nt away) =E<gt> type I
+
+=item 2
+
+Site length E<lt> 3  =E<gt> type I
+
+=item 3
+
+5-6 asymmetric site and cuts E<gt>20 nt away =E<gt> type III
+
+=item 4
+
+All other  =E<gt> type II
+
+=back
+
+There are some enzymes in REBASE which have bipartite recognition site
+and cat far from the site but are still classified as type I. I've no
+idea if this is really so.
+
+=cut
+
+sub type {
+    my ($self, $value) = @_;
+
+    if ($value) {
+        $self->throw("Not a valid value [$value], needs to one of : ".
+                     join (', ', sort keys %TYPE) ) 
+            unless $TYPE{$value};
+        return $self->{'_type'} = $value;
+    }
+
+    # pre set
+    #return $self->{'_type'} if $self->{'_type'};
+    # bipartite
+    return $self->{'_type'} = 'I'
+        if $self->{'_seq'}->seq =~ /N*[^N]+N{6,8}[^N]/ and abs($self->cut) > 50 ;
+    # 3 nt site
+    return $self->{'_type'} = 'I'
+        if $self->{'_seq'}->length == 3;
+    # asymmetric and cuts > 20 nt
+    return $self->{'_type'} = 'III'
+        if (length $self->string == 5 or length $self->string == 6 ) and
+            not $self->palindromic and abs($self->cut) > 20;
+    return $self->{'_type'} = 'II';
+}
+
+=head2 seq
+
+ Title     : seq
+ Usage     : $re->seq();
+ Function  : Get the Bio::PrimarySeq.pm object representing
+           : the recognition sequence
+ Returns   : A Bio::PrimarySeq object representing the
+             enzyme recognition site
+ Argument  : n/a
+ Throws    : n/a
+
+
+=cut
+
+sub seq {
+    shift->{'_seq'};
+}
+
+=head2 string
+
+ Title     : string
+ Usage     : $re->string();
+ Function  : Get a string representing the recognition sequence.
+ Returns   : String. Does NOT contain a  '^' representing the cut location
+             as returned by the site() method.
+ Argument  : n/a
+ Throws    : n/a
+
+=cut
+
+sub string {
+    shift->{'_seq'}->seq;
+}
+
+
+
+=head2 revcom
+
+ Title     : revcom
+ Usage     : $re->revcom();
+ Function  : Get a string representing the reverse complement of
+           : the recognition sequence.
+ Returns   : String
+ Argument  : n/a
+ Throws    : n/a
+
+=cut
+
+sub revcom {
+    shift->{'_seq'}->revcom->seq();
+}
+
+=head2 recognition_length
+
+ Title     : recognition_length
+ Usage     : $re->recognition_length();
+ Function  : Get the length of the RECOGNITION sequence.
+             This is the total recognition sequence,
+             inluding the ambiguous codes.
+ Returns   : An integer
+ Argument  : Nothing
+
+See also: L<non_ambiguous_length>
+
+=cut
+
+sub recognition_length {
+    my $self = shift;
+    return length($self->string);
+}
+
+=head2 cutter
+
+ Title    : cutter
+ Usage    : $re->cutter
+ Function : Returns the "cutter" value of the recognition site.
+
+            This is a value relative to site length and lack of
+            ambiguity codes. Hence: 'RCATGY' is a five (5) cutter site
+            and 'CCTNAGG' a six cutter
+
+            This measure correlates to the frequency of the enzyme
+            cuts much better than plain recognition site length.
+
+ Example  : $re->cutter
+ Returns  : integer or float number
+ Args     : none
+
+
+
+Why is this better than just stripping the ambiguos codes? Think about
+it like this: You have a random sequence; all nucleotides are equally
+probable. You have a four nucleotide re site. The probability of that
+site finding a match is one out of 4^4 or 256, meaning that on average
+a four cutter finds a match every 256 nucleotides. For a six cutter,
+the average fragment length is 4^6 or 4096. In the case of ambiguity
+codes the chances are finding the match are better: an R (A|T) has 1/2
+chance of finding a match in a random sequence. Therefore, for RGCGCY
+the probability is one out of (2*4*4*4*4*2) which exactly the same as
+for a five cutter! Cutter, although it can have non-integer values
+turns out to be a useful and simple measure.
+
+=cut
+
+sub cutter {
+    my ($self)=@_;
+    $_ = uc $self->string;
+
+    my $cutter = tr/[ATGC]//d;
+    my $count =  tr/[MRWSYK]//d;
+    $cutter += $count/2;
+    $count =  tr/[VHDB]//d;
+    $cutter += $count*3/4;
+    return $cutter;
+}
+
+
+=head2 is_palindromic
+
+ Title     : is_palindromic
+ Usage     : $re->is_palindromic();
+ Function  : Determines if the recognition sequence is palindromic
+           : for the current restriction enzyme.
+ Returns   : Boolean
+ Argument  : n/a
+ Throws    : n/a
+
+A palindromic site (EcoRI):
+
+  5-GAATTC-3
+  3-CTTAAG-5
+
+=cut
+
+# I just renamed this because is_palindromic fits in better
+# with the other is_? methods
+sub palindromic {
+ my $self=shift;
+ return $self->is_palindromic(@_);
+}
+
+sub is_palindromic {
+    my $self = shift;
+    if ($self->string eq $self->revcom) {
+        $self->{_palindromic}=1;
+    }
+    return $self->{_palindromic} || 0;
+}
+
+
+
+=head2 overhang
+
+ Title     : overhang
+ Usage     : $re->overhang();
+ Function  : Determines the overhang of the restriction enzyme
+ Returns   : "5'", "3'", "blunt" of undef
+ Argument  : n/a
+ Throws    : n/a
+
+A blunt site in SmaI returns C<blunt>
+
+  5' C C C^G G G 3'
+  3' G G G^C C C 5'
+
+A 5' overhang in EcoRI returns C<5'>
+
+  5' G^A A T T C 3'
+  3' C T T A A^G 5'
+
+A 3' overhang in KpnI returns C<3'>
+
+  5' G G T A C^C 3'
+  3' C^C A T G G 5'
+
+=cut
+
+sub overhang {
+    my $self = shift;
+    unless ($self->{'_cut'} && $self->{'_rc_cut'}) {
+        return "unknown";
+    }
+    if ($self->{_cut} < $self->{_rc_cut}) {
+        $self->{_overhang}="5'";
+    } elsif ($self->{_cut} == $self->{_rc_cut}) {
+        $self->{_overhang}="blunt";
+    } elsif ($self->{_cut} > $self->{_rc_cut}) {
+        $self->{_overhang}="3'";
+    } else {
+        $self->{_overhang}="unknown";
+    }
+    return $self->{_overhang}
+}
+
+=head2 overhang_seq
+
+ Title     : overhang_seq
+ Usage     : $re->overhang_seq();
+ Function  : Determines the overhang sequence of the restriction enzyme
+ Returns   : a Bio::LocatableSeq
+ Argument  : n/a
+ Throws    : n/a
+
+I do not think it is necessary to create a seq object of these. (Heikki)
+
+Note: returns empty string for blunt sequences and undef for ones that
+we don't know.  Compare these:
+
+A blunt site in SmaI returns empty string
+
+  5' C C C^G G G 3'
+  3' G G G^C C C 5'
+
+A 5' overhang in EcoRI returns C<AATT>
+
+  5' G^A A T T C 3'
+  3' C T T A A^G 5'
+
+A 3' overhang in KpnI returns C<GTAC>
+
+  5' G G T A C^C 3'
+  3' C^C A T G G 5'
+
+Note that you need to use method L<overhang|overhang> to decide
+whether it is a 5' or 3' overhang!!!
+
+Note: The overhang stuff does not work if the site is asymmetric! Rethink! 
+
+=cut
+
+sub overhang_seq {
+    my $self = shift;
+
+#    my $overhang->Bio::PrimarySeq(-id=>$self->name . '-overhang',
+#                                  -verbose=>$self->verbose,
+#                                  -alphabet=>'dna');
+
+    return '' if $self->overhang eq 'blunt' ;
+
+    unless ($self->{_cut} && $self->{_rc_cut}) {
+        # lets just check that we really can't figure it out
+        $self->cut;
+        $self->complementary_cut;
+        unless ($self->{_cut} && $self->{_rc_cut}) {
+            return;
+        }
+    }
+
+    # this is throwing an error for sequences outside the restriction
+    # site (eg ^NNNNGATCNNNN^)
+    # So if this is the case we need to fake these guys
+    if (($self->{_cut}<0) ||
+        ($self->{_rc_cut}<0) || 
+        ($self->{_cut}>$self->seq->length) ||
+        ($self->{_rc_cut}>$self->seq->length)) {
+        my $tempseq=$self->site;
+        my ($five, $three)=split /\^/, $tempseq;
+        if ($self->{_cut} > $self->{_rc_cut}) {
+            return substr($five, $self->{_rc_cut})
+        } elsif ($self->{_cut} < $self->{_rc_cut}) {
+            return substr($three, 0, $self->{_rc_cut})
+        } else {
+            return '';
+        }
+    }
+
+    if ($self->{_cut} > $self->{_rc_cut}) {
+        return $self->seq->subseq($self->{_rc_cut}+1,$self->{_cut});
+    } elsif ($self->{_cut} < $self->{_rc_cut}) {
+        return $self->seq->subseq($self->{_cut}+1, $self->{_rc_cut});
+    } else {
+        return '';
+    }
+}
+
+
+
+=head2 compatible_ends
+
+ Title     : compatible_ends
+ Usage     : $re->compatible_ends($re2);
+ Function  : Determines if the two restriction enzyme cut sites
+              have compatible ends.
+ Returns   : 0 if not, 1 if only one pair ends match, 2 if both ends.
+ Argument  : a Bio::Restriction::Enzyme
+ Throws    : unless the argument is a Bio::Resriction::Enzyme and
+             if there are Ns in the ovarhangs
+
+In case of type II enzymes which which cut symmetrically, this
+function can be considered to return a boolean value.
+
+
+=cut
+
+sub compatible_ends {
+    my ($self, $re) = @_;
+
+    $self->throw("Need a Bio::Restriction::Enzyme as an argument, [$re]")
+        unless $re->isa('Bio::Restriction::Enzyme');
+
+#    $self->throw("Only type II enzymes work now")
+#        unless $self->type eq 'II';
+
+    $self->debug("N(s) in overhangs. Can not compare")
+        if $self->overhang_seq =~ /N/ or $re->overhang_seq =~ /N/;
+
+    return 2 if $self->overhang_seq eq $re->overhang_seq and
+        $self->overhang eq $re->overhang;
+
+    return 0;
+}
+
+=head2 is_ambiguous
+
+ Title     : is_ambiguous
+ Usage     : $re->is_ambiguous();
+ Function  : Determines if the restriction enzyme contains ambiguous sequences
+ Returns   : Boolean
+ Argument  : n/a
+ Throws    : n/a
+
+=cut
+
+sub is_ambiguous {
+    my $self = shift;
+    return $self->string =~ m/[^AGCT]/ ? 1 : 0 ;
+}
+
+=head2 Additional methods from Rebase
+
+=cut
+
+
+=head2 is_prototype
+
+ Title    : is_prototype
+ Usage    : $re->is_prototype
+ Function : Get/Set method for finding out if this enzyme is a prototype
+ Example  : $re->is_prototype(1)
+ Returns  : Boolean
+ Args     : none
+
+Prototype enzymes are the most commonly available and usually first
+enzymes discoverd that have the same recognition site. Using only
+prototype enzymes in restriciton analysis avoids redundacy and
+speeds things up.
+
+=cut
+
+sub is_prototype {
+     my $self = shift;
+     if (@_) {
+         (shift) ? (return $self->{'_is_prototype'} = 1) :
+                   (return $self->{'_is_prototype'} = 0) ;
+     }
+     return $self->{'_is_prototype'} || 0;
+}
+
+=head2 prototype_name
+
+ Title    : prototype_name
+ Usage    : $re->prototype_name
+ Function : Get/Set method for the name of prototype for
+            this enzyme's recognition site
+ Example  : $re->prototype_name(1)
+ Returns  : prototype enzyme name string or an empty string
+ Args     : optional prototype enzyme name string
+
+If the enzyme itself is the protype, its own name is returned.  Not to
+confuse the negative result with an unset value, use method
+L<is_prototype|is_prototype>.
+
+This method is called I<prototype_name> rather than I<prototype>,
+because it returns a string rather than on object.
+
+=cut
+
+sub prototype_name {
+     my $self = shift;
+
+     $self->{'_prototype'} = shift if @_;
+     return $self->name if $self->{'_is_prototype'};
+     return $self->{'_prototype'} || '';
+}
+
+=head2 isoschizomers
+
+ Title     : isoschizomers
+ Usage     : $re->isoschizomers(@list);
+ Function  : Gets/Sets a list of known isoschizomers (enzymes that
+             recognize the same site, but don't necessarily cut at
+             the same position).
+ Arguments : A reference to an array that contains the isoschizomers
+ Returns   : A reference to an array of the known isoschizomers or 0
+             if not defined.
+
+This has to be the hardest name to spell.  Added for compatibility to
+REBASE
+
+=cut
+
+
+sub isoschizomers {
+    my ($self) = shift;
+    push @{$self->{_isoschizomers}}, @_ if @_;
+          # make sure that you don't dereference if null
+          # chad believes quite strongly that you should return
+          # a reference to an array anyway. don't bother dereferencing.
+          # i'll post that to the list.
+     if ($self->{'_isoschizomers'}) {
+         return @{$self->{_isoschizomers}};
+     }
+     
+}
+
+
+=head2 purge_isoschizomers
+
+ Title     : purge_isoschizomers
+ Usage     : $re->purge_isoschizomers();
+ Function  : Purges the set of isoschizomers for this enzyme
+ Arguments : 
+ Returns   : 1
+
+=cut
+
+sub purge_isoschizomers {
+    my ($self) = shift;
+    $self->{_isoschizomers} = [];
+
+}
+
+
+=head2 methylation_sites
+
+ Title     : methylation_sites
+ Usage     : $re->methylation_sites(\%sites);
+ Function  : Gets/Sets known methylation sites (positions on the sequence
+             that get modified to promote or prevent cleavage).
+ Arguments : A reference to a hash that contains the methylation sites
+ Returns   : A reference to a hash of the methylation sites or
+             an empty string if not defined.
+
+There are three types of methylation sites:
+
+=over 3
+
+=item *  (6) = N6-methyladenosine
+
+=item *  (5) = 5-methylcytosine
+
+=item *  (4) = N4-methylcytosine
+
+=back
+
+These are stored as 6, 5, and 4 respectively.  The hash has the
+sequence position as the key and the type of methylation as the value.
+A negative number in the sequence position indicates that the DNA is
+methylated on the complementary strand.
+
+Note that in REBASE, the methylation positions are given 
+Added for compatibility to REBASE.
+
+=cut
+
+sub methylation_sites {
+    my $self = shift;
+
+    while (@_) {
+        my $key = shift;
+        $self->{'_methylation_sites'}->{$key} = shift;
+    }
+    return %{$self->{_methylation_sites}};
+}
+
+
+=head2 purge_methylation_sites
+
+ Title     : purge_methylation_sites
+ Usage     : $re->purge_methylation_sites();
+ Function  : Purges the set of methylation_sites for this enzyme
+ Arguments : 
+ Returns   : 
+
+=cut
+
+sub purge_methylation_sites {
+    my ($self) = shift;
+    $self->{_methylation_sites} = {};
+}
+
+=head2 microbe
+
+ Title     : microbe
+ Usage     : $re->microbe($microbe);
+ Function  : Gets/Sets microorganism where the restriction enzyme was found
+ Arguments : A scalar containing the microbes name
+ Returns   : A scalar containing the microbes name or 0 if not defined
+
+Added for compatibility to REBASE
+
+=cut
+
+sub microbe {
+    my ($self, $microbe) = @_;
+    if ($microbe) {
+        $self->{_microbe}=$microbe;
+    }
+    return $self->{_microbe} || '';
+
+}
+
+
+=head2 source
+
+ Title     : source
+ Usage     : $re->source('Rob Edwards');
+ Function  : Gets/Sets the person who provided the enzyme
+ Arguments : A scalar containing the persons name
+ Returns   : A scalar containing the persons name or 0 if not defined
+
+Added for compatibility to REBASE
+
+=cut
+
+sub source {
+    my ($self, $source) = @_;
+    if ($source) {
+        $self->{_source}=$source;
+    }
+    return $self->{_source} || '';
+}
+
+
+=head2 vendors
+
+ Title     : vendors
+ Usage     : $re->vendor(@list_of_companies);
+ Function  : Gets/Sets the a list of companies that you can get the enzyme from.
+             Also sets the commercially_available boolean
+ Arguments : A reference to an array containing the names of companies
+             that you can get the enzyme from
+ Returns   : A reference to an array containing the names of companies
+             that you can get the enzyme from
+
+Added for compatibility to REBASE
+
+=cut
+
+sub vendors {
+    my $self = shift;
+    push @{$self->{_vendors}}, @_ if @_;
+    if ($self->{'_vendors'}) {
+         return @{$self->{'_vendors'}};
+    }
+}
+
+
+=head2 purge_vendors
+
+ Title     : purge_vendors
+ Usage     : $re->purge_references();
+ Function  : Purges the set of references for this enzyme
+ Arguments : 
+ Returns   : 
+
+=cut
+
+sub purge_vendors {
+    my ($self) = shift;
+    $self->{_vendors} = [];
+
+}
+
+=head2 vendor
+
+ Title     : vendor
+ Usage     : $re->vendor(@list_of_companies);
+ Function  : Gets/Sets the a list of companies that you can get the enzyme from.
+             Also sets the commercially_available boolean
+ Arguments : A reference to an array containing the names of companies
+             that you can get the enzyme from
+ Returns   : A reference to an array containing the names of companies
+             that you can get the enzyme from
+
+Added for compatibility to REBASE
+
+=cut
+
+
+sub vendor {
+    my $self = shift;
+    return push @{$self->{_vendors}}, @_;
+    return $self->{_vendors};
+}
+
+
+=head2 references
+
+ Title     : references
+ Usage     : $re->references(string);
+ Function  : Gets/Sets the references for this enzyme
+ Arguments : an array of string reference(s) (optional)
+ Returns   : an array of references
+
+Use L<purge_references|purge_references> to reset the list of references
+
+This should be a L<Bio::Biblio> object, but its not (yet)
+
+=cut
+
+sub references {
+    my ($self) = shift;
+    push @{$self->{_references}}, @_ if @_;
+    return @{$self->{_references}};
+}
+
+
+=head2 purge_references
+
+ Title     : purge_references
+ Usage     : $re->purge_references();
+ Function  : Purges the set of references for this enzyme
+ Arguments : 
+ Returns   : 1
+
+=cut
+
+sub purge_references {
+    my ($self) = shift;
+    $self->{_references} = [];
+
+}
+
+=head2 clone
+
+ Title     : clone
+ Usage     : $re->clone
+ Function  : Deep copy of the object
+ Arguments : -
+ Returns   : new Bio::Restriction::EnzymeI object
+
+This works as long as the object is a clean in-memory object using
+scalars, arrays and hashes. You have been warned.
+
+If you have module Storable, it is used, otherwise local code is used.
+Todo: local code cuts circular references.
+
+=cut
+
+sub clone {
+    my ($self, $this) = @_;
+
+    eval { require Storable; };
+    return Storable::dclone($self) unless $@;
+    # modified from deep_copy() @ http://www.stonehenge.com/merlyn/UnixReview/col30.html
+    unless ($this) {
+        my $new;
+        foreach my $k (keys %$self) {
+            if (not ref $self->{$k}) {
+                $new->{$k} = $self->{$k};
+            } else {
+                $new->{$k} = $self->clone($self->{$k});
+            }
+            #print Dumper $new;
+        }
+        bless $new, ref($self);
+        return $new;
+    }
+    if (not ref $this) {
+        $this;
+    }
+    elsif (ref $this eq "ARRAY") {
+        [map $self->clone($_), @$this];
+    }
+    elsif (ref $this eq "HASH") {
+        +{map { $_ => $self->clone($this->{$_}) } keys %$this};
+    } else { # objects
+        return  if $this->isa('Bio::Restriction::EnzymeI');
+        return $this->clone if $this->can('clone');
+        my $obj;
+        foreach my $k (keys %$this) {
+            if (not ref $this->{$k}) {
+                $obj->{$k} = $this->{$k};
+            } else {
+                $obj->{$k} = $this->clone($this->{$k});
+            }
+        }
+        bless $obj, ref($this);
+        return $obj;
+    }
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeCollection.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeCollection.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeCollection.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,392 @@
+# $Id: EnzymeCollection.pm,v 1.10.4.2 2006/11/08 17:25:54 sendu Exp $
+#-------------------------------------------------------------------------------
+#
+# BioPerl module Bio::Restriction::EnzymeCollection
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#-------------------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::EnzymeCollection - Set of restriction endonucleases
+
+=head1 SYNOPSIS
+
+  use Bio::Restriction::EnzymeCollection;
+
+  # Create a collection with the default enzymes.
+  my $default_collection = Bio::Restriction::EnzymeCollection->new();
+
+  # Or create a collection from a REBASE 'withrefm' file obtained from
+  # ftp://ftp.neb.com/pub/rebase/. (See Bio::Restriction::IO for more
+  # information.)
+  my $rebase = Bio::Restriction::IO->new(
+      -file   => 'withrefm.610',
+      -format => 'withrefm' );
+  my $rebase_collection = $rebase->read();
+
+  # Or create an empty collection and set the enzymes later. See
+  # 'CUSTOM COLLECTIONS' below for more information.
+  my $empty_collection =
+    Bio::Restriction::EnzymeCollection->new( -empty => 1 );
+
+  # Get an array of Bio::Restriction::Enzyme objects from the collection.
+  my @enzymes = $default_collection->each_enzyme();
+
+  # Get a Bio::Restriction::Enzyme object for a particular enzyme by name.
+  my $enz = $default_collection->get_enzyme( 'EcoRI' );
+
+  # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
+  # that have the equivalent of 6-bp recognition sequences.
+  my $six_cutters = $default_collection->cutters( 6 );
+
+  # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
+  # that are rare cutters.
+  my $rare_cutters = $default_collection->cutters( -start => 6, -end => 8 );
+
+  # Get a Bio::Restriction::EnzymeCollection object that contains enzymes
+  # that generate blunt ends:
+  my $blunt_cutters = $default_collection->blunt_enzymes();
+
+  # See 'CUSTOM COLLECTIONS' below for an example of creating a
+  # Bio::Restriction::EnzymeCollection object with a specified subset of
+  # enzymes using methods provided by the Bio::RestrictionEnzyme class.
+
+=head1 DESCRIPTION
+
+Bio::Restriction::EnzymeCollection represents a collection of
+restriction enzymes.
+
+If you create a new collection directly rather than from a REBASE
+file using L<Bio::Restriction::IO>, it will be populated by a
+default set of enzymes with site and cut information
+only.
+
+Use L<Bio::Restriction::Analysis> to figure out which enzymes are
+available and where they cut your sequence.
+
+=head1 CUSTOM COLLECTIONS
+
+Note that the underlying L<Bio::Restriction::Enzyme> objects have a rich
+variety of methods that allow more complicated selections than the methods
+that are defined by Bio::Restriction::EnzymeCollection.
+
+For example, the way to create a custom collection of Type II enzymes
+is as follows:
+
+  my $complete_collection =
+      Bio::Restriction::EnzymeCollection->new();
+  my $type_ii_collection  =
+      Bio::Restriction::EnzymeCollection->new( -empty => 1 );
+  $type_ii_collection->enzymes(
+      grep { $_->type() eq 'II' } $complete_collection->each_enzyme() );
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::IO> - read in enzymes from REBASE files
+
+L<Bio::Restriction::Analysis> - figure out what enzymes cut a sequence
+
+L<Bio::Restriction::Enzyme> - define a single restriction enzyme
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Rob Edwards.
+
+Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
+Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+
+package Bio::Restriction::EnzymeCollection;
+use strict;
+
+use Bio::Restriction::Enzyme;
+use Bio::Restriction::IO;
+
+use Data::Dumper;
+
+use vars qw ();
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title     : new
+ Function  : Initializes the Restriction::EnzymeCollection object
+ Returns   : The Restriction::EnzymeCollection object
+ Arguments : optional named parameter -empty
+
+Set parameter -empty to true if you do NOT want the collection be
+populated by the default set of prototype type II enzymes.
+
+Alternatively, pass an array of enzymes to -enzymes parameter.
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($empty) =
+            $self->_rearrange([qw(
+                                  EMPTY
+                                 )], @args);
+
+    $self->{'_all_enzymes'} = [];
+    $self->{'_enzymes'} = {};
+
+    return $self if $empty;
+
+    # the default set of enzymes
+    my $in  = Bio::Restriction::IO->new(-verbose => $self->verbose);
+    return $in->read;
+
+}
+
+=head2 Manipulate the enzymes within the collection
+
+=cut
+
+=head2 enzymes
+
+ Title     : enzyme
+ Function  : add/get method for enzymes and enzyme collections
+ Returns   : object itself
+ Arguments : array of Bio::Restriction::Enzyme and
+             Bio::Restriction::EnzymeCollection objects
+
+=cut
+
+sub enzymes {
+    my ($self, @enzs)=@_;
+    foreach my $e (@enzs) {
+        if ( ref $e eq '') {
+            print "|$e|\n";
+        }
+        elsif ($e->isa('Bio::Restriction::EnzymeI')) {
+            push(@{$self->{'_all_enzymes'}},$e);
+            $self->{'_enzymes'}->{$e->name} = $e;
+        }
+        elsif ($e->isa('Bio::Restriction::EnzymeCollection')) {
+           $self->enzymes($e->each_enzyme);
+        } else {
+            my $r = 1;
+            $self->warn("EnzymeCollection can not deal with ".
+                        ref($e)." objects");
+        }
+    }
+    return $self;
+}
+
+#
+# method to remove duplicates?
+#
+
+=head2 each_enzyme
+
+ Title     : each_enzyme
+ Function  : get an array of enzymes
+ Returns   : array of Bio::Restriction::Enzyme objects
+ Arguments : -
+
+=cut
+
+sub each_enzyme {
+    my $self = shift;
+    return @{$self->{'_all_enzymes'}};
+}
+
+=head2 get_enzyme
+
+ Title     : get_enzyme
+ Function  : Gets a Bio::Restriction::Enzyme object for the enzyme name
+ Returns   : A Bio::Restriction::Enzyme object or undef
+ Arguments : An enzyme name that is in the collection
+
+=cut
+
+sub get_enzyme {
+    my ($self, $name)=@_;
+    return $self->{'_enzymes'}->{$name};
+}
+
+
+=head2 available_list
+
+ Title     : available_list
+ Function  : Gets a list of all the enzymes that we know about
+ Returns   : A reference to an array with all the enzyme names
+             that we have defined or 0 if none are defined
+ Arguments : Nothing
+ Comments  : Note, I maintain this for backwards compatibility,
+             but I don't like the name as it is very ambiguous
+
+=cut
+
+sub available_list {
+    my ($self, $size)=@_;
+    my @keys = sort keys %{$self->{'_enzymes'}};
+    return @keys;
+}
+
+=head2 longest_cutter
+
+ Title     : longest_cutter
+ Function  : Gets the enzyme with the longest recognition site
+ Returns   : A Bio::Restriction::Enzyme object
+ Arguments : Nothing
+ Comments  : Note, this is used by Bio::Restriction::Analysis
+             to figure out what to do with circular sequences
+
+=cut
+
+sub longest_cutter {
+    my ($self)=@_;
+    my $longest=0; my $longest_enz='.';
+    foreach my $enz ($self->each_enzyme) {
+     my $len=$enz->recognition_length;
+     if ($len > $longest) {$longest=$len; $longest_enz=$enz}
+    }
+    return $longest_enz;
+}
+
+=head2 Filter enzymes
+
+=cut
+
+=head2 blunt_enzymes
+
+  Title     : blunt_enzymes
+  Function  : Gets a list of all the enzymes that are blunt cutters
+  Returns   : A reference to an array with all the enzyme names that
+              are blunt cutters or 0 if none are defined
+  Arguments : Nothing
+  Comments  : 
+
+This is an example of the kind of filtering better done by the scripts
+using the rich collection of methods in Bio::Restriction::Enzyme.
+
+=cut
+
+sub blunt_enzymes {
+    my $self=shift;
+    my $bs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+    return $bs->enzymes(  grep { $_->overhang eq 'blunt' }  $self->each_enzyme );
+}
+
+
+=head2 cutters
+
+  Title     : cutters
+  Function  : Gets a list of all the enzymes that recognize a
+              certain size, e.g. 6-cutters
+  Usage     : $cutters = $collection->cutters(6);
+  Returns   : A reference to an array with all the enzyme names
+              that are x cutters or 0 if none are defined
+  Arguments : A positive number for the size of cutters to return
+              OR
+              A range: (-start => 6, -end => 8,
+                        -inclusive => 1, -exclusive = 0 )
+
+The default for a range is 'inclusive'
+
+
+=cut
+
+sub cutters {
+    my ($self) = shift;
+
+    return unless @_; # no argument
+
+    if (scalar @_ == 1 ) {
+        my $size = shift;
+        $self->throw("Need a positive number [$size]")
+            unless $size =~ /[+]?[\d\.]+/;
+
+        my $bs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+
+        foreach my $e ($self->each_enzyme) {
+            ##print $e->name, ": ", $e->cutter, "\n"  if $e->cutter == $size;
+            $bs->enzymes($e) if $e->cutter == $size;
+        }
+        return $bs;
+        #return $bs->enzymes(  grep { ($_->cutter == $size) }  $self->each_enzyme );
+
+    } else { # named arguments
+
+        my ($start, $end, $inclusive, $exclusive ) =
+            $self->_rearrange([qw(
+                                  START
+                                  END
+                                  INCLUSIVE
+                                  EXCLUSIVE
+                                 )], @_);
+
+        $self->throw("Start needs a positive number [$start]")
+            unless $start =~ /[+]?[\d\.]+/;
+        $self->throw("End needs a positive number [$end]")
+            unless $end =~ /[+]?[\d\.]+/;
+
+        my $limits;
+        $inclusive = 1 if $inclusive or not $exclusive;
+        $inclusive = 0 if $exclusive;
+
+        my $bs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+        if ($inclusive) {
+            foreach my $e ($self->each_enzyme) {
+                $bs->enzymes($e) if $e->cutter >= $start and $e->cutter <= $end;
+            }
+        } else {
+            foreach my $e ($self->each_enzyme) {
+                $bs->enzymes($e) if $e->cutter > $start and $e->cutter < $end;
+            }
+        }
+        return $bs;
+    }
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/EnzymeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+# $Id: EnzymeI.pm,v 1.6.4.1 2006/10/02 23:10:23 sendu Exp $
+#------------------------------------------------------------------
+#
+# BioPerl module Bio::Restriction::EnzymeI
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Restriction::EnzymeI - Interface class for restriction endonuclease
+
+=head1 SYNOPSIS
+
+  # do not run this class directly
+
+=head1 DESCRIPTION
+
+This module defines methods for a single restriction endonuclease.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 SEE ALSO
+
+L<Bio::Restriction::Enzyme>
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private and
+are intended for internal use by this module. They are not considered
+part of the public interface and are described here for documentation
+purposes only.
+
+=cut
+
+package Bio::Restriction::EnzymeI;
+use strict;
+
+
+
+use base qw(Bio::Root::RootI);
+
+sub name {  shift->throw_not_implemented; }
+sub site {  shift->throw_not_implemented; }
+sub cuts_after {  shift->throw_not_implemented; }
+sub cut {  shift->throw_not_implemented; }
+sub complementary_cut {  shift->throw_not_implemented; }
+sub type {  shift->throw_not_implemented; }
+sub seq {  shift->throw_not_implemented; }
+sub string {  shift->throw_not_implemented; }
+sub revcom {  shift->throw_not_implemented; }
+sub recognition_length {  shift->throw_not_implemented; }
+sub non_ambiguous_length {  shift->throw_not_implemented; }
+sub cutter {  shift->throw_not_implemented; }
+sub palindromic {  shift->throw_not_implemented; }
+sub overhang {  shift->throw_not_implemented; }
+sub overhang_seq {  shift->throw_not_implemented; }
+sub is_ambiguous {  shift->throw_not_implemented; }
+sub is_prototype {  shift->throw_not_implemented; }
+sub prototype_name {  shift->throw_not_implemented; }
+sub isoschizomers {  shift->throw_not_implemented; }
+sub purge_isoschizomers {  shift->throw_not_implemented; }
+sub methylation_sites {  shift->throw_not_implemented; }
+sub purge_methylation_sites {  shift->throw_not_implemented; }
+sub microbe {  shift->throw_not_implemented; }
+sub source {  shift->throw_not_implemented; }
+sub vendors {  shift->throw_not_implemented; }
+sub purge_vendors {  shift->throw_not_implemented; }
+sub vendor {  shift->throw_not_implemented; }
+sub references {  shift->throw_not_implemented; }
+sub purge_references {  shift->throw_not_implemented; }
+sub xxxxxxx {  shift->throw_not_implemented; }
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/bairoch.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/bairoch.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/bairoch.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+# $Id: bairoch.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
+# BioPerl module for Bio::Restriction::IO::withrefm
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Restriction::IO::bairoch - bairoch enzyme set
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Restriction::IO class.
+
+=head1 DESCRIPTION
+
+This is the most complete format of the REBASE files, and basically
+includes all the data on each of the restriction enzymes.
+
+This parser is for the Bairoch format (aka MacVector, Vector NTI, PC/Gene 
+(Bairoch) format), REBASE format #19
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Restriction::IO::bairoch;
+
+use vars qw(%WITH_REFM_FIELD);
+use strict;
+
+use Bio::Restriction::Enzyme;
+use Bio::Restriction::Enzyme::MultiCut;
+use Bio::Restriction::Enzyme::MultiSite;
+use Bio::Restriction::EnzymeCollection;
+
+use Data::Dumper;
+
+use base qw(Bio::Restriction::IO::base);
+
+
+sub new {
+    my($class, @args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+    my($self, at args) = @_;
+    my ($verbose) =
+            $self->_rearrange([qw(
+                                  VERBOSE
+                                 )], @args);
+    $verbose || 0;
+    $self->verbose($verbose);
+
+    return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 read
+
+ Title   : read
+ Usage   : $renzs = $stream->read
+ Function: reads all the restrction enzymes from the stream
+ Returns : a Bio::Restriction::Restriction object
+ Args    : none
+
+=cut
+
+sub read {
+    my $self = shift;
+
+    my $renzs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
+
+    local $/ = '//';
+    while (defined(my $entry=$self->_readline()) ) {
+        $self->debug("|$entry|\n");
+
+        #
+        # Minimal information
+        #
+        my ($name) = $entry =~ /ID\s+(\S+)/;
+        my ($site) = $entry =~ /RS\s+([^\n]+)/;
+        next unless ($name && $site);
+       
+        # the standard sequence format for these guys is:
+        # GATC, 2;
+        # or, for enzymes that cut more than once
+        # GATC, 2; GTAC, 2; 
+
+        # there are a couple of sequences that have multiple
+        # recognition sites. 
+
+        my @sequences;
+        if ($site =~ /\;/) {
+            @sequences = split /\;/, $site;
+            $self->debug(@sequences,"\n");
+            $site=shift @sequences;
+        }
+        
+        my ($seq, $cut)=split /,\s+/, $site;
+        $self->debug("SITE: |$site| GAVE: |$seq| and |$cut|\n");
+        if ($seq eq '?') {
+           $self->warn("$name: no site. Skipping") if $self->verbose > 1;
+           next;
+        }
+        
+            # this is mainly an error check to make sure that I am adding what I think I am!	
+        if ($seq !~ /[NGATC]/i) {
+          $self->throw("Sequence $name has weird sequence: |$seq|");
+        }
+        my $re;
+        if ($cut eq "?") {
+              $re = Bio::Restriction::Enzyme->new(-name=>$name, -seq => $seq);
+        }
+        else {
+               if ($cut !~ /^-?\d+$/) {
+             $self->throw("Cut site from $name is weird: |$cut|\n");
+               }
+        
+               $re = Bio::Restriction::Enzyme->new(-name=>$name,
+                                                  -cut => $cut,
+                                                  -seq => $seq
+                                                  );
+        }
+        $renzs->enzymes($re);
+
+        #
+        # prototype / isoschizomers
+        #
+        my ($prototype) = $entry =~ /PT\s+([^\n]+)/;
+
+        if ($prototype) {
+            #$re->isoschizomers(split /\,/, $isoschizomers);
+            #NOTE: Need to add a method so that we can add isoschosomers to enzymes that may not exist!
+	    $re->is_prototype(0);
+        } else {
+            $re->is_prototype(1);
+        }
+
+        #
+        # methylation
+        #
+
+        my ($meth) = $entry =~ /MS\s+([^\n]+)/;
+        my @meths;
+        if ($meth) {
+            # this can be either X(Y) or X(Y),X2(Y2)
+            # where X is the base and y is the type of methylation
+            if ( $meth =~ /(\S+)\((\d+)\),(\S+)\((\d+)\)/ ) { # two msites per site
+                #my ($p1, $m1, $p2, $m2) = ($1, $2, $3, $4);
+                $re->methylation_sites($self->_meth($re,$1, $2),
+                                       $self->_meth($re,$3,$4));
+            }
+            elsif ($meth =~ /(\S+)\((\d+)\)/ ) { # one msite per site or more sites
+                #print Dumper $meth;
+                $re->methylation_sites( $self->_meth($re,$1,$2) );
+                @meths = split /, /, $meth;
+                $meth=shift @meths;
+            } else {
+                $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
+            }
+        }
+
+        #
+        # microbe
+        #
+        my ($microbe) = $entry =~ /OS\s+([^\n]+)/;
+        $re->microbe($microbe) if $microbe;
+
+        #
+        # source
+        #
+        #my ($source) = $entry =~ /<6>([^\n]+)/;
+        #$re->source($source) if $source;
+
+        #
+        # vendors
+        #
+        my ($vendors) = $entry =~ /CR\s+([^\n]+)/;
+        $re->vendors(split /,\s*/, $vendors) if $vendors;
+
+        #
+        # references
+        #
+        #my ($refs) = $entry =~ /<8>(.+)/s;
+        #$re->references(map {split /\n+/} $refs) if $refs;
+
+        #
+        # create special types of Enzymes
+        #
+        $self->warn("Current issues with multisite enzymes using bairoch format\n".
+                    "Recommend using itype2 or withrefm formats for now") if @sequences;
+        #$self->_make_multisites($renzs, $re, \@sequences, \@meths) if @sequences;
+
+    }
+
+    return $renzs;
+}
+
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($renzs)
+ Function: writes restriction enzymes into the stream
+ Returns : 1 for success and 0 for error
+ Args    : a Bio::Restriction::Enzyme
+           or a Bio::Restriction::EnzymeCollection object
+
+=cut
+
+sub write {
+    my ($self, at h) = @_;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/base.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/base.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/base.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,904 @@
+# $Id: base.pm,v 1.11.4.1 2006/10/02 23:10:23 sendu Exp $
+# BioPerl module for Bio::Restriction::IO::base
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Restriction::IO::base - base enzyme set
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Restriction::IO class.
+
+=head1 DESCRIPTION
+
+
+This class defines some base methods for restriction enzyme input and
+at the same time gives a base list of common enzymes.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Restriction::IO::base;
+
+use strict;
+
+use Bio::Restriction::Enzyme;
+use Bio::Restriction::EnzymeCollection;
+use Bio::Restriction::Enzyme::MultiCut;
+use Bio::Restriction::Enzyme::MultiSite;
+use Data::Dumper;
+
+use base qw(Bio::Restriction::IO);
+
+my $offset; # class variable
+
+sub new {
+    my($class, @args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+    my($self, at args) = @_;
+    return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 read
+
+ Title   : read
+ Usage   : $renzs = $stream->read
+ Function: reads all the restrction enzymes from the stream
+ Returns : a Bio::Restriction::Restriction object
+ Args    : none
+
+=cut
+
+
+
+sub read {
+    my $self = shift;
+
+    my $renzs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+    seek DATA,($offset||=tell DATA), 0;
+    while (<DATA>) {
+        chomp;
+        next if /^\s*$/;
+        my ($name, $site, $cut) = split /\s+/;
+        #foreach my $key (keys %{$res}) {
+        #my ($site, $cut) = split /\s+/, $res->{$key};
+        my $re = new Bio::Restriction::Enzyme(-name => $name,
+                                              -site => $site,
+                                              -cut => $cut);
+        $renzs->enzymes($re);
+    }
+    return $renzs;
+}
+
+
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($renzs)
+ Function: writes restriction enzymes into the stream
+ Returns : 1 for success and 0 for error
+ Args    : a Bio::Restriction::Enzyme
+           or a Bio::Restriction::EnzymeCollection object
+
+=cut
+
+sub write {
+    my $self = shift;
+    foreach (@_) {
+        map { printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
+          } sort {$a->name cmp $b->name} $_->each_enzyme
+            if $_->isa('Bio::Restriction::EnzymeCollection');
+        printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut 
+            if $_->isa('Bio::Restriction::Enzyme');
+    }
+}
+
+
+=head2 Common REBASE parsing methods
+
+The rest of the methods in this file are to be used by other REBASE
+parsers. They are not to be used outside subclasses of this base
+class. (They are 'protected' in the sense the word is used in Java.)
+
+=cut
+
+=head2 _cuts_from_site
+
+ Title   : _cuts_from_site
+ Usage   : ($site, $cut, $comp_cut) = _cuts_from_site('ACGCGT(4/5)');
+ Function: Separates cut positions from a single site string.
+           Does nothing to site if it does not have the cut string
+ Returns : array of site_string, forward_cut_position, reverse_cut_position
+ Args    : recognition site string
+
+=cut
+
+sub _cuts_from_site {
+    my ($self, $site) = @_;
+    my ($cut, $comp_cut) = $site =~ /\((-?\d+)\/(-?\d+)\)/;
+    $site =~ s/\(.*\)$//;
+    return ($site, $cut, $comp_cut);
+}
+
+
+=head2 _meth
+
+ Title   : _meth
+ Usage   : ($pos, $meth) = $self->_meth('2(5)');
+ Function: Separates methylation postion and coce from a string.
+           Adjusts the postion depending on enzyme site length
+           and symmetry 
+ Returns : array of position and methylation code
+ Args    : 1. reference to Enzyme object
+           2. methylation description string
+
+=cut
+
+sub _meth {
+    my ($self, $re, $meth) = @_;
+
+    $meth =~ /(\S+)\((\d+)\)/;
+    my ($pos, $m) = ($1, $2);
+    $pos = 0 if $pos eq '?';
+    $pos = $re->seq->length + $pos if $pos and ! $re->palindromic;
+    return ($pos, $m);
+
+    $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
+}
+
+
+=head2 _coordinate_shift_to_cut
+
+ Title   : _coordinate_shift_to_cut
+ Usage   : $cut = $self->_coordinate_shift_to_cut($oricut, offset);
+ Function: Adjust cut position coordinates to start from the 
+           first nucleotides of site
+ Returns : Cut position in correct coordinates
+ Args    : 1. Original cut position
+           2. Length of the recognition site
+
+=cut
+
+sub _coordinate_shift_to_cut {
+    my ($self, $cut, $site_length) = @_;
+    return $cut + $site_length;
+}
+
+
+=head2 _make_multisites
+
+ Title   : _make_multisites
+ Usage   : $self->_make_multisites($collection, $first_enzyme, \@sites, \@mets)
+ Function: 
+
+           Bless a Bio::Restriction::Enzyme (which is already part of
+           the collection object) into
+           Bio::Restriction::Enzyme::MultiSite and clone it as many
+           times as there are alternative sites. The new objects are
+           added into the collection and into others list of sister
+           objects.
+
+ Returns : nothing, does in place editing
+ Args    : 1. a Bio::Restriction::EnzymeCollection
+           2. a Bio::Restriction::Enzyme
+           3. reference to an array of recognition site strings
+           4. reference to an array of methylation code strings, optional
+
+=cut
+
+sub _make_multisites {
+    my ($self, $renzs, $re, $sites, $meths) = @_;
+
+    bless $re, 'Bio::Restriction::Enzyme::MultiSite';
+    #print Dumper $re, $sites, $meths;
+
+    my $count = 0;
+    while ($count < scalar @{$sites}) {
+        #print ">>>>>>>>>>>", scalar @{$sites}, ">>>>>>>>>>>>>>>>>>>>>> $count\n";
+        my $re2 = $re->clone;
+
+
+        my $site = @{$sites}[$count];
+        my ($cut, $comp_cut);
+        ($site, $cut, $comp_cut) = $self->_cuts_from_site($site);
+        $re2->site($site);
+
+        if ($cut) {
+            $re->cut($self->_coordinate_shift_to_cut(length($site), $cut));
+            $re->complementary_cut($self->_coordinate_shift_to_cut(length($site), $comp_cut));
+        }
+
+
+        if ($meths and @$meths) {
+            $re2->purge_methylation_sites;
+            $re2->methylation_sites($self->_meth($re2, @{$meths}[$count]));
+        }
+
+        $re->others($re2);
+        $count++;
+    }
+
+
+    foreach my $enz ($re->others) {
+        $enz->others($re, grep {$_ ne $enz} $re->others);
+    }
+
+    #print Dumper $re;
+
+    1;
+}
+
+
+
+=head2 _make_multicuts
+
+ Title   : _make_multicuts
+ Usage   : $self->_make_multicuts($collection, $first_enzyme, $precuts)
+ Function: 
+
+           Bless a Bio::Restriction::Enzyme (which is already part of
+           the collection object) into
+           Bio::Restriction::Enzyme::MultiCut and clone it. The precut
+           string is processed to replase the cut sites in the cloned
+           object which is added into the collection. Both object
+           refere to each other through others() method.
+
+ Returns : nothing, does in place editing
+ Args    : 1. a Bio::Restriction::EnzymeCollection
+           2. a Bio::Restriction::Enzyme
+           3. precut string, e.g. '12/7'
+
+
+The examples we have of multiply cutting enzymes cut only four
+times. This protected method deals only with a string of two
+integers separated with a slash, e.g. '12/7'. The numbers represent the postions
+BEFORE the start of the recognition site, i.e. negative positions.
+
+=cut
+
+sub _make_multicuts {
+    my ($self, $renzs, $re, $precut) = @_;
+
+    bless $re, 'Bio::Restriction::Enzyme::MultiCut';
+
+    my ($cut, $comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/;
+    
+    # Pads the front to prevent detection of sites when the 1st
+    # cut is off the end of the sequence.
+    my $site = $re->site;
+    $re->site(('N' x abs($cut)) . $site);
+
+    my $re2 = $re->clone;
+
+    $re2->cut("-$cut");
+    $re2->complementary_cut("-$comp_cut");
+
+    $re->others($re2);
+
+    1;
+}
+
+
+=head2 _companies
+
+ Title     : _companies
+ Purpose   : Defines the companies that we know about
+ Returns   : A hash
+ Argument  : Nothing
+ Comments  : An internal method to define the companies that we know about
+             REBASE uses a code, and this converts the code to the real name
+	     (e.g. A = Amersham Pharmacia Biotech)
+
+=cut
+
+
+sub _companies {
+    # this is just so it is easy to set up the codes that REBASE uses
+    my $self=shift;
+    my %companies=(
+                   'A'=>'Amersham Pharmacia Biotech (1/03)',
+                   'C'=>'Minotech Biotechnology (6/01)',
+                   'E'=>'Stratagene (1/03)',
+                   'F'=>'Fermentas AB (1/03)',
+                   'G'=>'Qbiogene (1/03)',
+                   'H'=>'American Allied Biochemical, Inc. (10/98)',
+                   'I'=>'SibEnzyme Ltd. (1/03)',
+                   'J'=>'Nippon Gene Co., Ltd. (6/00)',
+                   'K'=>'Takara Shuzo Co. Ltd. (1/03)',
+                   'M'=>'Roche Applied Science (1/03)',
+                   'N'=>'New England Biolabs (1/03)',
+                   'O'=>'Toyobo Biochemicals (11/98)',
+                   'P'=>'Megabase Research Products (5/99)',
+                   'Q'=>'CHIMERx (1/03)',
+                   'R'=>'Promega Corporation (1/03)',
+                   'S'=>'Sigma Chemical Corporation (1/03)',
+                   'U'=>'Bangalore Genei (1/03)',
+                   'V'=>'MRC-Holland (1/03)',
+                   'X'=>'EURx Ltd. (1/03)');
+    $self->{company}=\%companies;
+}
+
+
+1;
+
+__DATA__
+AasI	GACNNNNNNGTC	7
+AatI	AGGCCT	3
+AccII	CGCG	2
+AatII	GACGTC	5
+AauI	TGTACA	1
+Acc113I	AGTACT	3
+Acc16I	TGCGCA	3
+Acc65I	GGTACC	1
+AccB1I	GGYRCC	1
+AccB7I	CCANNNNNTGG	7
+AccI	GTMKAC	2
+AccIII	TCCGGA	1
+AciI	CCGC	1
+AclI	AACGTT	2
+AcsI	RAATTY	1
+AcvI	CACGTG	3
+AcyI	GRCGYC	2
+AdeI	CACNNNGTG	6
+AfaI	GTAC	2
+AfeI	AGCGCT	3
+AflI	GGWCC	1
+AflII	CTTAAG	1
+AflIII	ACRYGT	1
+AgeI	ACCGGT	1
+AhaIII	TTTAAA	3
+AhdI	GACNNNNNGTC	6
+AhlI	ACTAGT	1
+AleI	CACNNNNGTG	5
+AluI	AGCT	2
+Alw21I	GWGCWC	5
+Alw44I	GTGCAC	1
+AlwNI	CAGNNNCTG	6
+Ama87I	CYCGRG	1
+AocI	CCTNAGG	2
+Aor51HI	AGCGCT	3
+ApaBI	GCANNNNNTGC	8
+ApaI	GGGCCC	5
+ApaLI	GTGCAC	1
+ApoI	RAATTY	1
+AscI	GGCGCGCC	2
+AseI	ATTAAT	2
+AsiAI	ACCGGT	1
+AsiSI	GCGATCGC	5
+AsnI	ATTAAT	2
+Asp700I	GAANNNNTTC	5
+Asp718I	GGTACC	1
+AspEI	GACNNNNNGTC	6
+AspHI	GWGCWC	5
+AspI	GACNNNGTC	4
+AspLEI	GCGC	3
+AspS9I	GGNCC	1
+AsuC2I	CCSGG	2
+AsuI	GGNCC	1
+AsuII	TTCGAA	2
+AsuNHI	GCTAGC	1
+AvaI	CYCGRG	1
+AvaII	GGWCC	1
+AviII	TGCGCA	3
+AvrII	CCTAGG	1
+AxyI	CCTNAGG	2
+BalI	TGGCCA	3
+BamHI	GGATCC	1
+BanI	GGYRCC	1
+BanII	GRGCYC	5
+BanIII	ATCGAT	2
+BbeI	GGCGCC	5
+BbrPI	CACGTG	3
+BbuI	GCATGC	5
+Bbv12I	GWGCWC	5
+BclI	TGATCA	1
+BcnI	CCSGG	2
+BcoI	CYCGRG	1
+BcuI	ACTAGT	1
+BetI	WCCGGW	1
+BfaI	CTAG	1
+BfmI	CTRYAG	1
+BfrBI	ATGCAT	3
+BfrI	CTTAAG	1
+BfuCI	GATC	0
+BglI	GCCNNNNNGGC	7
+BglII	AGATCT	1
+BlnI	CCTAGG	1
+BloHII	CTGCAG	5
+BlpI	GCTNAGC	2
+Bme1390I	CCNGG	2
+Bme1580I	GKGCMC	5
+Bme18I	GGWCC	1
+BmtI	GCTAGC	5
+BmyI	GDGCHC	5
+BoxI	GACNNNNGTC	5
+Bpu1102I	GCTNAGC	2
+Bpu14I	TTCGAA	2
+Bsa29I	ATCGAT	2
+BsaAI	YACGTR	3
+BsaBI	GATNNNNATC	5
+BsaHI	GRCGYC	2
+BsaJI	CCNNGG	1
+BsaOI	CGRYCG	4
+BsaWI	WCCGGW	1
+Bsc4I	CCNNNNNNNGG	7
+BscBI	GGNNCC	3
+BscFI	GATC	0
+BscI	ATCGAT	2
+Bse118I	RCCGGY	1
+Bse21I	CCTNAGG	2
+Bse8I	GATNNNNATC	5
+BseAI	TCCGGA	1
+BseBI	CCWGG	2
+BseCI	ATCGAT	2
+BseDI	CCNNGG	1
+BseJI	GATNNNNATC	5
+BseLI	CCNNNNNNNGG	7
+BsePI	GCGCGC	1
+BseSI	GKGCMC	5
+BseX3I	CGGCCG	1
+Bsh1236I	CGCG	2
+Bsh1285I	CGRYCG	4
+BshFI	GGCC	2
+BshI	GGCC	2
+BshNI	GGYRCC	1
+BshTI	ACCGGT	1
+BsiBI	GATNNNNATC	5
+BsiCI	TTCGAA	2
+BsiEI	CGRYCG	4
+BsiHKAI	GWGCWC	5
+BsiHKCI	CYCGRG	1
+BsiLI	CCWGG	2
+BsiMI	TCCGGA	1
+BsiQI	TGATCA	1
+BsiSI	CCGG	1
+BsiWI	CGTACG	1
+BsiXI	ATCGAT	2
+BsiYI	CCNNNNNNNGG	7
+BsiZI	GGNCC	1
+BslI	CCNNNNNNNGG	7
+BsoBI	CYCGRG	1
+Bsp106I	ATCGAT	2
+Bsp119I	TTCGAA	2
+Bsp120I	GGGCCC	1
+Bsp1286I	GDGCHC	5
+Bsp13I	TCCGGA	1
+Bsp1407I	TGTACA	1
+Bsp143I	GATC	0
+Bsp143II	RGCGCY	5
+Bsp1720I	GCTNAGC	2
+Bsp19I	CCATGG	1
+Bsp68I	TCGCGA	3
+BspA2I	CCTAGG	1
+BspCI	CGATCG	4
+BspDI	ATCGAT	2
+BspEI	TCCGGA	1
+BspHI	TCATGA	1
+BspLI	GGNNCC	3
+BspLU11I	ACATGT	1
+BspMII	TCCGGA	1
+BspT104I	TTCGAA	2
+BspT107I	GGYRCC	1
+BspTI	CTTAAG	1
+BspXI	ATCGAT	2
+BsrBRI	GATNNNNATC	5
+BsrFI	RCCGGY	1
+BsrGI	TGTACA	1
+BssAI	RCCGGY	1
+BssECI	CCNNGG	1
+BssHI	CTCGAG	1
+BssHII	GCGCGC	1
+BssKI	CCNGG	0
+BssNAI	GTATAC	3
+BssT1I	CCWWGG	1
+Bst1107I	GTATAC	3
+Bst2UI	CCWGG	2
+Bst4CI	ACNGT	3
+Bst98I	CTTAAG	1
+BstACI	GRCGYC	2
+BstAPI	GCANNNNNTGC	7
+BstBAI	YACGTR	3
+BstBI	TTCGAA	2
+BstC8I	GCNNGC	3
+BstDEI	CTNAG	1
+BstDSI	CCRYGG	1
+BstEII	GGTNACC	1
+BstENI	CCTNNNNNAGG	5
+BstENII	GATC	0
+BstFNI	CGCG	2
+BstH2I	RGCGCY	5
+BstHHI	GCGC	3
+BstHPI	GTTAAC	3
+BstKTI	GATC	3
+BstMAI	CTGCAG	5
+BstMCI	CGRYCG	4
+BstMWI	GCNNNNNNNGC	7
+BstNI	CCWGG	2
+BstNSI	RCATGY	5
+BstOI	CCWGG	2
+BstPAI	GACNNNNGTC	5
+BstPI	GGTNACC	1
+BstSCI	CCNGG	0
+BstSFI	CTRYAG	1
+BstSNI	TACGTA	3
+BstUI	CGCG	2
+BstX2I	RGATCY	1
+BstXI	CCANNNNNNTGG	8
+BstYI	RGATCY	1
+BstZ17I	GTATAC	3
+BstZI	CGGCCG	1
+Bsu15I	ATCGAT	2
+Bsu36I	CCTNAGG	2
+BsuRI	GGCC	2
+BsuTUI	ATCGAT	2
+BtgI	CCRYGG	1
+BthCI	GCNGC	4
+Cac8I	GCNNGC	3
+CaiI	CAGNNNCTG	6
+CauII	CCSGG	2
+CciNI	GCGGCCGC	2
+CelII	GCTNAGC	2
+CfoI	GCGC	3
+Cfr10I	RCCGGY	1
+Cfr13I	GGNCC	1
+Cfr42I	CCGCGG	4
+Cfr9I	CCCGGG	1
+CfrI	YGGCCR	1
+ChaI	GATC	4
+ClaI	ATCGAT	2
+CpoI	CGGWCCG	2
+Csp45I	TTCGAA	2
+Csp6I	GTAC	1
+CspAI	ACCGGT	1
+CspI	CGGWCCG	2
+CviAII	CATG	1
+CviJI	RGCY	2
+CviRI	TGCA	2
+CviTI	RGCY	2
+CvnI	CCTNAGG	2
+DdeI	CTNAG	1
+DpnI	GATC	2
+DpnII	GATC	0
+DraI	TTTAAA	3
+DraII	RGGNCCY	2
+DraIII	CACNNNGTG	6
+DrdI	GACNNNNNNGTC	7
+DsaI	CCRYGG	1
+DseDI	GACNNNNNNGTC	7
+EaeI	YGGCCR	1
+EagI	CGGCCG	1
+Eam1105I	GACNNNNNGTC	6
+Ecl136II	GAGCTC	3
+EclHKI	GACNNNNNGTC	6
+EclXI	CGGCCG	1
+Eco105I	TACGTA	3
+Eco130I	CCWWGG	1
+Eco147I	AGGCCT	3
+Eco24I	GRGCYC	5
+Eco32I	GATATC	3
+Eco47I	GGWCC	1
+Eco47III	AGCGCT	3
+Eco52I	CGGCCG	1
+Eco72I	CACGTG	3
+Eco81I	CCTNAGG	2
+Eco88I	CYCGRG	1
+Eco91I	GGTNACC	1
+EcoHI	CCSGG	0
+EcoICRI	GAGCTC	3
+EcoNI	CCTNNNNNAGG	5
+EcoO109I	RGGNCCY	2
+EcoO65I	GGTNACC	1
+EcoRI	GAATTC	1
+EcoRII	CCWGG	0
+EcoRV	GATATC	3
+EcoT14I	CCWWGG	1
+EcoT22I	ATGCAT	5
+EcoT38I	GRGCYC	5
+EgeI	GGCGCC	3
+EheI	GGCGCC	3
+ErhI	CCWWGG	1
+EsaBC3I	TCGA	2
+EspI	GCTNAGC	2
+FatI	CATG	0
+FauNDI	CATATG	2
+FbaI	TGATCA	1
+FblI	GTMKAC	2
+FmuI	GGNCC	4
+Fnu4HI	GCNGC	2
+FnuDII	CGCG	2
+FriOI	GRGCYC	5
+FseI	GGCCGGCC	6
+Fsp4HI	GCNGC	2
+FspAI	RTGCGCAY	4
+FspI	TGCGCA	3
+FunI	AGCGCT	3
+FunII	GAATTC	1
+HaeI	WGGCCW	3
+HaeII	RGCGCY	5
+HaeIII	GGCC	2
+HapII	CCGG	1
+HgiAI	GWGCWC	5
+HgiCI	GGYRCC	1
+HgiJII	GRGCYC	5
+HhaI	GCGC	3
+Hin1I	GRCGYC	2
+Hin6I	GCGC	1
+HinP1I	GCGC	1
+HincII	GTYRAC	3
+HindI	CAC	2
+HindII	GTYRAC	3
+HindIII	AAGCTT	1
+HinfI	GANTC	1
+HpaI	GTTAAC	3
+HpaII	CCGG	1
+Hpy178III	TCNNGA	2
+Hpy188I	TCNGA	3
+Hpy188III	TCNNGA	2
+Hpy8I	GTNNAC	3
+Hpy99I	CGWCG	5
+HpyCH4I	CATG	3
+HpyCH4III	ACNGT	3
+HpyCH4IV	ACGT	1
+HpyCH4V	TGCA	2
+HpyF10VI	GCNNNNNNNGC	8
+Hsp92I	GRCGYC	2
+Hsp92II	CATG	4
+HspAI	GCGC	1
+ItaI	GCNGC	2
+KasI	GGCGCC	1
+Kpn2I	TCCGGA	1
+KpnI	GGTACC	5
+Ksp22I	TGATCA	1
+KspAI	GTTAAC	3
+KspI	CCGCGG	4
+Kzo9I	GATC	0
+LpnI	RGCGCY	3
+LspI	TTCGAA	2
+MabI	ACCWGGT	1
+MaeI	CTAG	1
+MaeII	ACGT	1
+MaeIII	GTNAC	0
+MamI	GATNNNNATC	5
+MboI	GATC	0
+McrI	CGRYCG	4
+MfeI	CAATTG	1
+MflI	RGATCY	1
+MhlI	GDGCHC	5
+MlsI	TGGCCA	3
+MluI	ACGCGT	1
+MluNI	TGGCCA	3
+Mly113I	GGCGCC	2
+Mph1103I	ATGCAT	5
+MroI	TCCGGA	1
+MroNI	GCCGGC	1
+MroXI	GAANNNNTTC	5
+MscI	TGGCCA	3
+MseI	TTAA	1
+MslI	CAYNNNNRTG	5
+Msp20I	TGGCCA	3
+MspA1I	CMGCKG	3
+MspCI	CTTAAG	1
+MspI	CCGG	1
+MspR9I	CCNGG	2
+MssI	GTTTAAAC	4
+MstI	TGCGCA	3
+MunI	CAATTG	1
+MvaI	CCWGG	2
+MvnI	CGCG	2
+MwoI	GCNNNNNNNGC	7
+NaeI	GCCGGC	3
+NarI	GGCGCC	2
+NciI	CCSGG	2
+NcoI	CCATGG	1
+NdeI	CATATG	2
+NdeII	GATC	0
+NgoAIV	GCCGGC	1
+NgoMIV	GCCGGC	1
+NheI	GCTAGC	1
+NlaIII	CATG	4
+NlaIV	GGNNCC	3
+Nli3877I	CYCGRG	5
+NmuCI	GTSAC	0
+NotI	GCGGCCGC	2
+NruGI	GACNNNNNGTC	6
+NruI	TCGCGA	3
+NsbI	TGCGCA	3
+NsiI	ATGCAT	5
+NspBII	CMGCKG	3
+NspI	RCATGY	5
+NspIII	CYCGRG	1
+NspV	TTCGAA	2
+OliI	CACNNNNGTG	5
+PacI	TTAATTAA	5
+PaeI	GCATGC	5
+PaeR7I	CTCGAG	1
+PagI	TCATGA	1
+PalI	GGCC	2
+PauI	GCGCGC	1
+PceI	AGGCCT	3
+PciI	ACATGT	1
+PdiI	GCCGGC	3
+PdmI	GAANNNNTTC	5
+Pfl23II	CGTACG	1
+PflBI	CCANNNNNTGG	7
+PflFI	GACNNNGTC	4
+PflMI	CCANNNNNTGG	7
+PfoI	TCCNGGA	1
+PinAI	ACCGGT	1
+Ple19I	CGATCG	4
+PmaCI	CACGTG	3
+PmeI	GTTTAAAC	4
+PmlI	CACGTG	3
+Ppu10I	ATGCAT	1
+PpuMI	RGGWCCY	2
+PpuXI	RGGWCCY	2
+PshAI	GACNNNNGTC	5
+PshBI	ATTAAT	2
+PsiI	TTATAA	3
+Psp03I	GGWCC	4
+Psp124BI	GAGCTC	5
+Psp1406I	AACGTT	2
+Psp5II	RGGWCCY	2
+Psp6I	CCWGG	0
+PspAI	CCCGGG	1
+PspEI	GGTNACC	1
+PspGI	CCWGG	0
+PspLI	CGTACG	1
+PspN4I	GGNNCC	3
+PspOMI	GGGCCC	1
+PspPI	GGNCC	1
+PspPPI	RGGWCCY	2
+PssI	RGGNCCY	5
+PstI	CTGCAG	5
+PsuI	RGATCY	1
+PsyI	GACNNNGTC	4
+PvuI	CGATCG	4
+PvuII	CAGCTG	3
+RcaI	TCATGA	1
+RsaI	GTAC	2
+Rsr2I	CGGWCCG	2
+RsrII	CGGWCCG	2
+SacI	GAGCTC	5
+SacII	CCGCGG	4
+SalI	GTCGAC	1
+SanDI	GGGWCCC	2
+SatI	GCNGC	2
+Sau3AI	GATC	0
+Sau96I	GGNCC	1
+SauI	CCTNAGG	2
+SbfI	CCTGCAGG	6
+ScaI	AGTACT	3
+SciI	CTCGAG	3
+ScrFI	CCNGG	2
+SdaI	CCTGCAGG	6
+SduI	GDGCHC	5
+SecI	CCNNGG	1
+SelI	CGCG	0
+SexAI	ACCWGGT	1
+SfcI	CTRYAG	1
+SfeI	CTRYAG	1
+SfiI	GGCCNNNNNGGCC	8
+SfoI	GGCGCC	3
+Sfr274I	CTCGAG	1
+Sfr303I	CCGCGG	4
+SfuI	TTCGAA	2
+SgfI	GCGATCGC	5
+SgrAI	CRCCGGYG	2
+SgrBI	CCGCGG	4
+SinI	GGWCC	1
+SlaI	CTCGAG	1
+SmaI	CCCGGG	3
+SmiI	ATTTAAAT	4
+SmiMI	CAYNNNNRTG	5
+SmlI	CTYRAG	1
+SnaBI	TACGTA	3
+SpaHI	GCATGC	5
+SpeI	ACTAGT	1
+SphI	GCATGC	5
+SplI	CGTACG	1
+SrfI	GCCCGGGC	4
+Sse232I	CGCCGGCG	2
+Sse8387I	CCTGCAGG	6
+Sse8647I	AGGWCCT	2
+Sse9I	AATT	0
+SseBI	AGGCCT	3
+SspBI	TGTACA	1
+SspI	AATATT	3
+SstI	GAGCTC	5
+SstII	CCGCGG	4
+StuI	AGGCCT	3
+StyI	CCWWGG	1
+SunI	CGTACG	1
+SwaI	ATTTAAAT	4
+TaaI	ACNGT	3
+TaiI	ACGT	4
+TaqI	TCGA	1
+TasI	AATT	0
+TatI	WGTACW	1
+TauI	GCSGC	4
+TelI	GACNNNGTC	4
+TfiI	GAWTC	1
+ThaI	CGCG	2
+TliI	CTCGAG	1
+Tru1I	TTAA	1
+Tru9I	TTAA	1
+TscI	ACGT	4
+TseI	GCWGC	1
+Tsp45I	GTSAC	0
+Tsp4CI	ACNGT	3
+Tsp509I	AATT	0
+TspEI	AATT	0
+Tth111I	GACNNNGTC	4
+TthHB8I	TCGA	1
+UnbI	GGNCC	0
+Van91I	CCANNNNNTGG	7
+Vha464I	CTTAAG	1
+VneI	GTGCAC	1
+VpaK11AI	GGWCC	0
+VpaK11BI	GGWCC	1
+VspI	ATTAAT	2
+XagI	CCTNNNNNAGG	5
+XapI	RAATTY	1
+XbaI	TCTAGA	1
+XceI	RCATGY	5
+XcmI	CCANNNNNNNNNTGG	8
+XhoI	CTCGAG	1
+XhoII	RGATCY	1
+XmaCI	CCCGGG	1
+XmaI	CCCGGG	1
+XmaIII	CGGCCG	1
+XmaJI	CCTAGG	1
+XmiI	GTMKAC	2
+XmnI	GAANNNNTTC	5
+XspI	CTAG	1
+ZhoI	ATCGAT	2
+ZraI	GACGTC	3
+Zsp2I	ATGCAT	5

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/itype2.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/itype2.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/itype2.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,237 @@
+# $Id: itype2.pm,v 1.6.4.1 2006/10/02 23:10:23 sendu Exp $
+# BioPerl module for Bio::Restriction::IO::itype2
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Restriction::IO::itype2 - itype2 enzyme set
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Restriction::IO class.
+
+=head1 DESCRIPTION
+
+This is tab delimited, entry per line format which is fast to process.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Restriction::IO::itype2;
+
+use strict;
+
+use Bio::Restriction::Enzyme;
+use Bio::Restriction::EnzymeCollection;
+
+use Data::Dumper;
+
+use base qw(Bio::Restriction::IO::base);
+
+
+
+sub new {
+    my($class, @args) = @_;
+    my $self = bless {}, $class;
+
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+    my($self, at args) = @_;
+
+    my ($verbose) =
+            $self->_rearrange([qw(
+                                  VERBOSE
+                                 )], @args);
+    $verbose || 0;
+    $self->verbose($verbose);
+
+    $self->_companies;
+    return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 read
+
+ Title   : read
+ Usage   : $renzs = $stream->read
+ Function: reads all the restrction enzymes from the stream
+ Returns : a Bio::Restriction::Restriction object
+ Args    : none
+
+Internally creates a hash of enzyme information which is passed on to
+_create_enzyme method. See
+L<Bio::Restriction::IO::base::_create_enzyme>.
+
+=cut
+
+sub read {
+    my $self = shift;
+
+    my $renzs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+
+    # read until start of data
+    while (defined( my $line = $self->_readline()) ) {
+        next if $line =~ /^[ R]/;
+        $self->_pushback($line);
+        last;
+    }
+
+    # enzyme name [tab] prototype [tab] recognition sequence with
+    # cleavage site [tab] methylation site and type [tab] commercial
+    # source [tab] references
+
+    while (defined(my $line = $self->_readline()) ) {
+        $self->debug($line);
+        chomp $line;
+
+        my ($name, $prototype, $site, $meth, $vendor, $refs) = split /\t/, $line;
+        # we need mininum name and site
+        unless ($site) {
+            $self->warn("Can not parse line with name [$name]") if $self->verbose > 0;
+            next;
+        }
+        next unless $name;
+
+        # four cut enzymes are not in this format
+        my $precut;
+        if ($site =~ m/^\((\d+\/\d+)\)[ATGCN]+/) {
+            $precut=$1;
+            $site =~ s/\($precut\)//;
+        }
+        # -------------- cut ---------------
+
+        my @sequences;
+        if ($site =~ /\,/) {
+            @sequences = split /\,/, $site;
+            $site=shift @sequences;
+        }
+
+        my ($cut, $comp_cut);
+        ($site, $cut, $comp_cut) = $self->_cuts_from_site($site);
+
+
+        my $re = new Bio::Restriction::Enzyme(-name=>$name,
+                                              -site => $site
+                                             );
+        $renzs->enzymes($re);
+
+        if ($cut) {
+            $re->cut($self->_coordinate_shift_to_cut(length($site), $cut));
+            $re->complementary_cut($self->_coordinate_shift_to_cut(length($site), $comp_cut));
+        }
+
+
+        #
+        # prototype 
+        #
+
+        $prototype ? $re->prototype_name($prototype) : $re->is_prototype;
+
+
+        #
+        # methylation
+        #
+        my @meths;
+        if ($meth) {
+            # this can be either X(Y) or X(Y),X2(Y2)
+            # where X is the base and y is the type of methylation
+            if ( $meth =~ /(\S+)\((\d+)\),(\S+)\((\d+)\)/ ) { # two msites per site
+                #my ($p1, $m1, $p2, $m2) = ($1, $2, $3, $4);
+                $re->methylation_sites($self->_meth($re,$1, $2),
+                                       $self->_meth($re,$3,$4));
+            }
+            elsif ($meth =~ /(\S+)\((\d+)\)/ ) { # one msite per site or more sites
+                #print Dumper $meth;
+                $re->methylation_sites( $self->_meth($re,$1,$2) );
+                @meths = split /, /, $meth;
+                $meth=shift @meths;
+            } else {
+                $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
+            }
+        }
+
+        #
+        # vendors
+        #
+        if ($vendor) {
+            $re->vendors( split / */, $vendor);
+        }
+
+        #
+        # references
+        #
+        $re->references(map {split /\n+/} $refs) if $refs;
+
+        #
+        # create special types of Enzymes
+        #
+        $self->_make_multisites($renzs, $re, \@sequences, \@meths) if @sequences;
+        $self->_make_multicuts($renzs, $re, $precut) if $precut;
+
+    }
+
+    return $renzs;
+}
+
+
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($renzs)
+ Function: writes restriction enzymes into the stream
+ Returns : 1 for success and 0 for error
+ Args    : a Bio::Restriction::Enzyme
+           or a Bio::Restriction::EnzymeCollection object
+
+=cut
+
+sub write {
+    my ($self, at h) = @_;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/withrefm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/withrefm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO/withrefm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,251 @@
+# $Id: withrefm.pm,v 1.8.4.1 2006/10/02 23:10:23 sendu Exp $
+# BioPerl module for Bio::Restriction::IO::withrefm
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Restriction::IO::withrefm - withrefm enzyme set
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Restriction::IO class.
+
+=head1 DESCRIPTION
+
+This is the most complete format of the REBASE files, and basically
+includes all the data on each of the restriction enzymes.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Restriction::IO::withrefm;
+
+use vars qw(%WITH_REFM_FIELD);
+use strict;
+
+#use Bio::Restriction::IO;
+use Bio::Restriction::Enzyme;
+use Bio::Restriction::EnzymeCollection;
+
+use Data::Dumper;
+
+use base qw(Bio::Restriction::IO::base);
+
+
+sub new {
+    my($class, @args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+    my($self, at args) = @_;
+    my ($verbose) =
+            $self->_rearrange([qw(
+                                  VERBOSE
+                                 )], @args);
+    $verbose || 0;
+    $self->verbose($verbose);
+
+    return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 read
+
+ Title   : read
+ Usage   : $renzs = $stream->read
+ Function: reads all the restrction enzymes from the stream
+ Returns : a Bio::Restriction::Restriction object
+ Args    : none
+
+=cut
+
+sub read {
+    my $self = shift;
+
+    my $renzs = new Bio::Restriction::EnzymeCollection(-empty => 1);
+
+    local $/ = '<1>';
+    while (defined(my $entry=$self->_readline()) ) {
+
+        # not an entry.
+        next unless $entry =~ /<2>/;
+
+        #$self->debug("|$entry|\n");
+
+        #
+        # Minimal information
+        #
+        my ($name) = $entry =~ /^(\S+)/;
+        my ($site) = $entry =~ /\<3\>([^\n]+)/;
+
+
+        if ( ! defined $site || $site eq '' or $site eq '?') {
+            $self->warn("$name: no site. Skipping") if $self->verbose > 1;
+            next;
+        }
+
+        my $precut;
+        if ($site =~ m/^\((\w+\/\w+)\)\w+\((\w+\/\w+)\)/) {
+            $precut = $1;
+            $site =~ s/\($precut\)//;
+        }
+
+        # there are a couple of sequences that have multiple
+        # recognition sites eg M.PhiBssHII: ACGCGT,CCGCGG,RGCGCY,RCCGGY,GCGCGC
+
+
+        my @sequences;
+        if ($site =~ /\,/) {
+            @sequences = split /\,/, $site;
+            $site=shift @sequences;
+        }
+
+        my ($cut, $comp_cut);
+        ($site, $cut, $comp_cut) = $self->_cuts_from_site($site);
+
+        my $re = new Bio::Restriction::Enzyme(-name=>$name,
+                                              -site => $site
+                                             );
+        $renzs->enzymes($re);
+
+        if ($cut) {
+            $re->cut($self->_coordinate_shift_to_cut(length($site), $cut));
+            $re->complementary_cut($self->_coordinate_shift_to_cut(length($site), $comp_cut));
+        }
+
+        #
+        # prototype / isoschizomers
+        #
+
+        my ($isoschizomers) = $entry =~ /<2>([^\n]+)/;
+
+        if ($isoschizomers) {
+            $re->isoschizomers(split /\,/, $isoschizomers);
+            $re->is_prototype(0);
+        } else {
+            $re->is_prototype(1);
+        }
+
+        #
+        # methylation
+        #
+
+        my ($meth) = $entry =~ /<4>([^\n]+)/;
+        my @meths;
+        if ($meth) {
+            # this can be either X(Y) or X(Y),X2(Y2)
+            # where X is the base and y is the type of methylation
+            if ( $meth =~ /(\S+)\((\d+)\),(\S+)\((\d+)\)/ ) { # two msites per site
+                #my ($p1, $m1, $p2, $m2) = ($1, $2, $3, $4);
+                $re->methylation_sites($self->_meth($re,$1, $2),
+                                       $self->_meth($re,$3,$4));
+            }
+            elsif ($meth =~ /(\S+)\((\d+)\)/ ) { # one msite per site or more sites
+                #print Dumper $meth;
+                $re->methylation_sites( $self->_meth($re,$1,$2) );
+                @meths = split /, /, $meth;
+                $meth=shift @meths;
+            } else {
+                $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
+            }
+        }
+
+        #
+        # microbe
+        #
+        my ($microbe) = $entry =~ /<5>([^\n]+)/;
+        $re->microbe($microbe) if $microbe;
+
+        #
+        # source
+        #
+        my ($source) = $entry =~ /<6>([^\n]+)/;
+        $re->source($source) if $source;
+
+        #
+        # vendors
+        #
+        my ($vendors) = $entry =~ /<7>([^\n]+)/;
+        $re->vendors(split / */, $vendors) if $vendors;
+
+        #
+        # references
+        #
+        my ($refs) = $entry =~ /<8>(.+)/s;
+        $re->references(map {split /\n+/} $refs) if $refs;
+
+
+        #
+        # create special types of Enzymes
+        #
+        $self->_make_multisites($renzs, $re, \@sequences, \@meths) if @sequences;
+
+        $self->_make_multicuts($renzs, $re, $precut) if $precut;
+
+    }
+
+    return $renzs;
+}
+
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($renzs)
+ Function: writes restriction enzymes into the stream
+ Returns : 1 for success and 0 for error
+ Args    : a Bio::Restriction::Enzyme
+           or a Bio::Restriction::EnzymeCollection object
+
+=cut
+
+sub write {
+    my ($self, at h) = @_;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Restriction/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,236 @@
+# $Id: IO.pm,v 1.12.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::Restriction::IO
+#
+# Cared for by Rob Edwards <redwards at utmem.edu>
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Restriction::IO - Handler for sequence variation IO Formats
+
+=head1 SYNOPSIS
+
+    use Bio::Restriction::IO;
+
+    $in  = Bio::Restriction::IO->new(-file => "inputfilename" ,
+                                     -format => 'withrefm');
+    my $res = $in->read; # a Bio::Restriction::EnzymeCollection
+
+=head1 DESCRIPTION
+
+L<Bio::Restriction::IO> is a handler module for the formats in the
+Restriction IO set, e.g. C<Bio::Restriction::IO::xxx>. It is the
+officially sanctioned way of getting at the format objects, which most
+people should use.
+
+The structure, conventions and most of the code is inherited from
+L<Bio::SeqIO>. The main difference is that instead of using methods
+C<next_seq>, you drop C<_seq> from the method name.
+
+Also, instead of dealing only with individual L<Bio::Restriction::Enzyme>
+objects, C<read()> will slurp in all enzymes into a 
+L<Bio::Restriction::EnzymeCollection> object.
+
+For more details, see documentation in L<Bio::SeqIO>.
+
+=head1 TO DO
+
+At the moment, these can be use mainly to get a custom set if enzymes in
+C<withrefm> or C<itype2> formats into L<Bio::Restriction::Enzyme> or
+L<Bio::Restriction::EnzymeCollection> objects.  Using C<bairoch> format is
+highly experimental and is not recommmended at this time.
+
+This class inherits from L<Bio::SeqIO> for convenience sake, though this should
+inherit from L<Bio::Root::Root>.  Get rid of L<Bio::SeqIO> inheritance by
+copying relevant methods in.
+
+C<write()> methods are currently not implemented for any format except C<base>.
+Using C<write()> even with C<base> format is not recommended as it does not
+support multicut/multisite enzyme output.
+
+Should additional formats be supported (such as XML)?
+
+=head1 SEE ALSO
+
+L<Bio::SeqIO>, 
+L<Bio::Restriction::Enzyme>, 
+L<Bio::Restriction::EnzymeCollection>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Restriction::IO;
+
+use strict;
+use vars qw(%FORMAT);
+use base qw(Bio::SeqIO);
+
+%FORMAT = (
+            'itype2'    => 'itype2',
+            '8'         => 'itype2',
+            'withrefm'  => 'withrefm',
+            '31'        => 'withrefm',
+            'base'      => 'base',
+            '0'         => 'base',
+	    'bairoch'   => 'bairoch',
+	    '19'        => 'bairoch',
+	    'macvector' => 'bairoch',
+	    'vectorNTI' => 'bairoch'
+);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::Restriction::IO->new(-file => $filename,
+                                               -format => 'Format')
+ Function: Returns a new seqstream
+ Returns : A Bio::Restriction::IO::Handler initialised with
+           the appropriate format
+ Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+
+=cut
+
+sub new {
+   my ($class, %param) = @_;
+   my ($format);
+
+   @param{ map { lc $_ } keys %param } = values %param;  # lowercase keys
+
+   $format = $FORMAT{$param{'-format'}} if defined $param{'-format'};
+   $format ||= $class->_guess_format( $param{-file} || $ARGV[0] )
+             || 'base';
+   $format = "\L$format"; # normalize capitalization to lower case
+
+   return unless $class->_load_format_module($format);
+   return "Bio::Restriction::IO::$format"->new(%param);
+}
+
+
+sub _load_format_module {
+  my ($class, $format) = @_;
+  my $module = "Bio::Restriction::IO::" . $format;
+  my $ok;
+  eval {
+      $ok = $class->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$class: $format cannot be found
+Exception $@
+For more information about the IO system please see the IO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 read
+
+ Title   : read
+ Usage   : $renzs = $stream->read
+ Function: reads all the restrction enzymes from the stream
+ Returns : a Bio::Restriction::EnzymeCollection object
+ Args    :
+
+=cut
+
+sub read {
+   my ($self, $seq) = @_;
+   $self->throw_not_implemented();
+}
+
+sub next {
+   my ($self, $seq) = @_;
+   $self->throw_not_implemented();
+}
+
+sub next_seq {
+   my ($self, $seq) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Restriction::EnzymeCollection object
+
+=cut
+
+sub write {
+    my ($self, $seq) = @_;
+    $self->throw("Sorry, you cannot write to a generic ".
+                 "Bio::Restricion::IO object.");
+}
+
+sub write_seq {
+   my ($self, $seq) = @_;
+   $self->warn("These are not sequence objects. ".
+               "Use method 'write' instead of 'write_seq'.");
+   $self->write($seq);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return  unless $_ = shift;
+   return 'flat' if /\.dat$/i;
+   return 'xml'  if /\.xml$/i;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/Exception.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/Exception.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/Exception.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,469 @@
+#-----------------------------------------------------------------
+# $Id: Exception.pm,v 1.15.8.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module Bio::Root::Exception
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Root::Exception - Generic exception objects for Bioperl
+
+=head1 SYNOPSIS
+
+=head2 Throwing exceptions using L<Error::throw()>:
+
+    use Bio::Root::Exception;
+    use Error;
+
+    # Set Error::Debug to include stack trace data in the error messages
+    $Error::Debug = 1;
+
+    $file = shift;
+    open (IN, $file) ||
+	    throw Bio::Root::FileOpenException ( "Can't open file $file for reading", $!);
+
+=head2 Throwing exceptions using L<Bio::Root::Root::throw()>:
+
+     # Here we have an object that ISA Bio::Root::Root, so it inherits throw().
+
+     open (IN, $file) || 
+                $object->throw(-class => 'Bio::Root::FileOpenException',
+                               -text => "Can't open file $file for reading",
+                               -value => $!);
+
+=head2 Catching and handling exceptions using L<Error::try()>:
+
+    use Bio::Root::Exception;
+    use Error qw(:try);
+
+    # Note that we need to import the 'try' tag from Error.pm
+
+    # Set Error::Debug to include stack trace data in the error messages
+    $Error::Debug = 1;
+
+    $file = shift;
+    try {
+        open (IN, $file) ||
+	    throw Bio::Root::FileOpenException ( "Can't open file $file for reading", $!);
+    }
+    catch Bio::Root::FileOpenException with {
+        my $err = shift;
+        print STDERR "Using default input file: $default_file\n";
+        open (IN, $default_file) || die "Can't open $default_file";
+    }
+    otherwise {
+        my $err = shift;
+    	print STDERR "An unexpected exception occurred: \n$err";
+
+	# By placing an the error object reference within double quotes,
+	# you're invoking its stringify() method.
+    }
+   finally {
+       # Any code that you want to execute regardless of whether or not
+       # an exception occurred.
+   };  
+   # the ending semicolon is essential!
+
+
+=head2 Defining a new Exception type as a subclass of Bio::Root::Exception:
+
+    @Bio::TestException::ISA = qw( Bio::Root::Exception );
+
+
+=head1 DESCRIPTION
+
+=head2 Exceptions defined in L<Bio::Root::Exception>
+
+These are generic exceptions for typical problem situations that could arise
+in any module or script. 
+
+=over 8
+
+=item Bio::Root::Exception()
+
+=item Bio::Root::NotImplemented()
+
+=item Bio::Root::IOException()
+
+=item Bio::Root::FileOpenException()
+
+=item Bio::Root::SystemException()
+
+=item Bio::Root::BadParameter()
+
+=item Bio::Root::OutOfRange()
+
+=item Bio::Root::NoSuchThing()
+
+=back
+
+Using defined exception classes like these is a good idea because it
+indicates the basic nature of what went wrong in a convenient,
+computable way.
+
+If there is a type of exception that you want to throw
+that is not covered by the classes listed above, it is easy to define
+a new one that fits your needs. Just write a line like the following
+in your module or script where you want to use it (or put it somewhere
+that is accessible to your code):
+
+    @NoCanDoException::ISA = qw( Bio::Root::Exception );
+
+All of the exceptions defined in this module inherit from a common
+base class exception, Bio::Root::Exception. This allows a user to
+write a handler for all Bioperl-derived exceptions as follows:
+
+           use Bio::Whatever;
+           use Error qw(:try);
+
+           try {
+                # some code that depends on Bioperl
+           }
+           catch Bio::Root::Exception with {
+               my $err = shift;
+               print "A Bioperl exception occurred:\n$err\n";
+           };
+
+So if you do create your own exceptions, just be sure they inherit
+from Bio::Root::Exception directly, or indirectly by inheriting from a
+Bio::Root::Exception subclass.
+
+The exceptions in Bio::Root::Exception are extensions of Graham Barr's
+L<Error> module available from CPAN.  Despite this dependency, the
+L<Bio::Root::Exception> module does not explicitly C<require Error>.
+This permits Bio::Root::Exception to be loaded even when
+Error.pm is not available.
+
+=head2 Throwing exceptions within Bioperl modules
+
+Error.pm is not part of the Bioperl distibution, and may not be
+present within  any given perl installation. So, when you want to 
+throw an exception in a Bioperl module, the safe way to throw it
+is to use L<Bio::Root::Root::throw()> which can use Error.pm 
+when it's available. See documentation in Bio::Root::Root for details.
+
+=head1 SEE ALSO
+
+See the C<examples/exceptions> directory of the Bioperl distribution for 
+working demo code.
+
+L<Bio::Root::Root::throw()> for information about throwing 
+L<Bio::Root::Exception>-based exceptions.
+
+L<Error> (available from CPAN, author: GBARR)
+
+Error.pm is helping to guide the design of exception handling in Perl 6. 
+See these RFC's: 
+
+     http://dev.perl.org/rfc/63.pod 
+
+     http://dev.perl.org/rfc/88.pod
+
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 EXCEPTIONS
+
+=cut
+
+# Define some generic exceptions.'
+
+package Bio::Root::Exception;
+use Bio::Root::Version;
+
+use strict;
+
+my $debug = $Error::Debug;  # Prevents the "used only once" warning.
+my $DEFAULT_VALUE = "__DUMMY__";  # Permits eval{} based handlers to work
+
+=head2 L<Bio::Root::Exception>
+
+ Purpose : A generic base class for all BioPerl exceptions.
+           By including a "catch Bio::Root::Exception" block, you
+           should be able to trap all BioPerl exceptions.
+ Example : throw Bio::Root::Exception("A generic exception", $!);
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::Exception::ISA = qw( Error );
+#---------------------------------------------------------
+
+=head2 Methods defined by Bio::Root::Exception
+
+=over 4
+
+=item L< new() >
+
+ Purpose : Guarantees that -value is set properly before
+           calling Error::new().
+
+ Arguments: key-value style arguments same as for Error::new()
+
+     You can also specify plain arguments as ($message, $value)
+     where $value is optional.
+
+     -value, if defined, must be non-zero and not an empty string 
+     in order for eval{}-based exception handlers to work. 
+     These require that if($@) evaluates to true, which will not 
+     be the case if the Error has no value (Error overloads 
+     numeric operations to the Error::value() method).
+
+     It is OK to create Bio::Root::Exception objects without
+     specifing -value. In this case, an invisible dummy value is used.
+
+     If you happen to specify a -value of zero (0), it will
+     be replaced by the string "The number zero (0)".
+
+     If you happen to specify a -value of empty string (""), it will
+     be replaced by the string "An empty string ("")".
+
+=cut
+
+sub new {
+    my ($class, @args) = @_; 
+    my ($value, %params);
+    if( @args % 2 == 0 && $args[0] =~ /^-/) {
+        %params = @args;
+        $value = $params{'-value'};
+    }
+    else {
+        $params{-text} = $args[0];
+        $value = $args[1];
+    }
+
+    if( defined $value and not $value) {
+	$value = "The number zero (0)" if $value == 0;
+	$value = "An empty string (\"\")" if $value eq "";
+    }
+    else {
+	$value ||= $DEFAULT_VALUE;
+    }
+    $params{-value} = $value;
+
+    my $self = $class->SUPER::new( %params );
+    return $self;
+}
+
+=item pretty_format()
+
+ Purpose : Get a nicely formatted string containing information about the 
+           exception. Format is similar to that produced by 
+           Bio::Root::Root::throw(), with the addition of the name of
+           the exception class in the EXCEPTION line and some other
+           data available via the Error object.
+ Example : print $error->pretty_format;
+
+=cut
+
+sub pretty_format {
+    my $self = shift;
+    my $msg = $self->text;
+    my $stack = '';
+    if( $Error::Debug ) {
+      $stack = $self->_reformat_stacktrace();
+    }
+    my $value_string = $self->value ne $DEFAULT_VALUE ? "VALUE: ".$self->value."\n" : "";
+    my $class = ref($self);
+
+    my $title = "------------- EXCEPTION: $class -------------";
+    my $footer = "\n" . '-' x CORE::length($title);
+    my $out = "\n$title\n" .
+       "MSG: $msg\n". $value_string. $stack. $footer . "\n";
+    return $out;
+}
+
+
+# Reformatting of the stack performed by  _reformat_stacktrace:
+#   1. Shift the file:line data in line i to line i+1.
+#   2. change xxx::__ANON__() to "try{} block"
+#   3. skip the "require" and "Error::subs::try" stack entries (boring)
+# This means that the first line in the stack won't have any file:line data
+# But this isn't a big issue since it's for a Bio::Root::-based method 
+# that doesn't vary from exception to exception.
+
+sub _reformat_stacktrace {
+    my $self = shift;
+    my $msg = $self->text;
+    my $stack = $self->stacktrace();
+    $stack =~ s/\Q$msg//;
+    my @stack = split( /\n/, $stack);
+    my @new_stack = ();
+    my ($method, $file, $linenum, $prev_file, $prev_linenum);
+    my $stack_count = 0;
+    foreach my $i( 0..$#stack ) {
+        # print "STACK-ORIG: $stack[$i]\n";
+        if( ($stack[$i] =~ /^\s*([^(]+)\s*\(.*\) called at (\S+) line (\d+)/) ||
+             ($stack[$i] =~ /^\s*(require 0) called at (\S+) line (\d+)/)) {
+            ($method, $file, $linenum) = ($1, $2, $3);
+            $stack_count++;
+        }
+        else{
+            next;
+        }
+        if( $stack_count == 1 ) {
+            push @new_stack, "STACK: $method";
+            ($prev_file, $prev_linenum) = ($file, $linenum);
+            next;
+        }
+
+        if( $method =~ /__ANON__/ ) {
+            $method = "try{} block";
+        }
+        if( ($method =~ /^require/ and $file =~ /Error\.pm/ ) ||
+            ($method =~ /^Error::subs::try/ ) )   {
+            last;
+        }
+        push @new_stack, "STACK: $method $prev_file:$prev_linenum";
+        ($prev_file, $prev_linenum) = ($file, $linenum);
+    }
+    push @new_stack, "STACK: $prev_file:$prev_linenum";
+
+    return join "\n", @new_stack;
+}
+
+=item L<stringify()>
+
+ Purpose : Overrides Error::stringify() to call pretty_format(). 
+           This is called automatically when an exception object 
+           is placed between double quotes.
+ Example : catch Bio::Root::Exception with {
+              my $error = shift;
+              print "$error";
+           }
+
+See Also: L<pretty_format()|pretty_format>
+
+=cut
+
+sub stringify {
+    my ($self, @args) = @_;
+    return $self->pretty_format( @args );
+}
+
+
+
+=back
+
+=head1 Subclasses of Bio::Root::Exception 
+
+
+=head2 L<Bio::Root::NotImplemented>
+
+ Purpose : Indicates that a method has not been implemented.
+ Example : throw Bio::Root::NotImplemented( 
+               -text   => "Method \"foo\" not implemented in module FooBar.",
+               -value  => "foo" );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::NotImplemented::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+=head2 L<Bio::Root::IOException>
+
+ Purpose : Indicates that some input/output-related trouble has occurred.
+ Example : throw Bio::Root::IOException( 
+               -text   => "Can't save data to file $file.",
+	       -value  => $! );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::IOException::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+
+=head2 L<Bio::Root::FileOpenException>
+
+ Purpose : Indicates that a file could not be opened.
+ Example : throw Bio::Root::FileOpenException( 
+               -text   => "Can't open file $file for reading.",
+	       -value  => $! );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::FileOpenException::ISA = qw( Bio::Root::IOException );
+#---------------------------------------------------------
+
+
+=head2 L<Bio::Root::SystemException>
+
+ Purpose : Indicates that a system call failed.
+ Example : unlink($file) or throw Bio::Root::SystemException( 
+               -text   => "Can't unlink file $file.",
+	       -value  => $! );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::SystemException::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+
+=head2 L<Bio::Root::BadParameter>
+
+ Purpose : Indicates that one or more parameters supplied to a method 
+           are invalid, unspecified, or conflicting.
+ Example : throw Bio::Root::BadParameter( 
+               -text   => "Required parameter \"-foo\" was not specified",
+               -value  => "-foo" );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::BadParameter::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+
+=head2 L<Bio::Root::OutOfRange>
+
+ Purpose : Indicates that a specified (start,end) range or 
+           an index to an array is outside the permitted range.
+ Example : throw Bio::Root::OutOfRange( 
+               -text   => "Start coordinate ($start) cannot be less than zero.",
+               -value  => $start  );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::OutOfRange::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+
+=head2 L<Bio::Root::NoSuchThing>
+
+ Purpose : Indicates that a requested thing cannot be located 
+           and therefore could possibly be bogus.
+ Example : throw Bio::Root::NoSuchThing( 
+               -text   => "Accession M000001 could not be found.",
+               -value  => "M000001"  );
+
+=cut
+
+#---------------------------------------------------------
+ at Bio::Root::NoSuchThing::ISA = qw( Bio::Root::Exception );
+#---------------------------------------------------------
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/HTTPget.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/HTTPget.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/HTTPget.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,388 @@
+# $Id: HTTPget.pm,v 1.14.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for fallback HTTP get operations.
+# Module is proxy-aware 
+#
+#  Cared for by Chris Dagdigian <dag at sonsorol.org>
+#  but all of the good stuff was written by
+#  Lincoln Stein.
+# 
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Root::HTTPget - module for fallback HTTP get operations when 
+LWP:: is unavailable
+
+=head1 SYNOPSIS
+
+ use Bio::Root::HTTPget;
+ my $web = new Bio::Root::HTTPget;
+
+ my $response = $web->get('http://localhost');
+ $response    = $web->get('http://localhost/images');
+
+ $response    = eval { $web->get('http://fred:secret@localhost/ladies_only/')
+                     } or warn $@;
+
+ $response    = eval { $web->get('http://jeff:secret@localhost/ladies_only/')
+                     } or warn $@;
+
+ $response    = $web->get('http://localhost/images/navauthors.gif');
+ $response    = $web->get(-url=>'http://www.google.com',
+ 		                    -proxy=>'http://www.modperl.com');
+
+=head1 DESCRIPTION
+
+This is basically an last-chance module for doing network HTTP get
+requests in situations where more advanced external CPAN modules such
+as LWP:: are not installed.
+
+The particular reason this module was developed was so that the Open
+Bio Database Access code can fallback to fetching the default registry
+files from http://open-bio.org/registry/ without having to depend on
+external dependencies like Bundle::LWP for network HTTP access.
+
+The core of this module was written by Lincoln Stein. It can handle proxies
+and HTTP-based proxy authentication.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+ =head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Lincoln Stein
+
+ Cared for by Chris Dagdigian <dag at sonsorol.org>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Root::HTTPget;
+
+use strict;
+use IO::Socket qw(:DEFAULT :crlf);
+
+use base qw(Bio::Root::Root);
+
+
+=head2 get
+
+ Title   : get
+ Usage   : my $resp = get(-url => $url);
+ Function: 
+ Returns : string
+ Args    : -url     => URL to HTTPGet
+           -proxy   => proxy to use
+           -user    => username for proxy or authentication
+           -pass    => password for proxy or authentication
+           -timeout => timeout
+
+=cut
+
+sub get {
+    my $self;
+    if( ref($_[0]) ) {
+	$self = shift;
+    }
+    
+    my ($url,$proxy,$timeout,$auth_user,$auth_pass) = 
+	__PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)], at _);
+    my $dest  = $proxy || $url;
+
+    my ($host,$port,$path,$user,$pass) 
+	= _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
+    $auth_user ||= $user;
+    $auth_pass ||= $pass;
+    if ($self) {
+        unless ($proxy) {
+            $proxy = $self->proxy;
+        }
+        unless ($auth_user) { 
+            ($auth_user, $auth_pass) = $self->authentication;
+        }
+    }
+    $path = $url if $proxy;
+    
+    # set up the connection
+    my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
+
+    # the request
+    print $socket "GET $path HTTP/1.0$CRLF";
+    print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
+    # Support virtual hosts
+    print $socket "HOST: $host$CRLF";
+
+    if ($auth_user && $auth_pass) { # authentication information
+	my $token = _encode_base64("$auth_user:$auth_pass");
+	print $socket "Authorization: Basic $token$CRLF";
+    }
+    print $socket "$CRLF";
+
+    # read the response
+    my $response;
+    {
+	local $/ = "$CRLF$CRLF";
+	$response = <$socket>;
+    }
+
+    my ($status_line, at other_lines) = split $CRLF,$response;
+    my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
+	or __PACKAGE__->throw("invalid response from web server: got $response");
+
+    my %headers = map {/^(\S+): (.+)/} @other_lines;
+    if ($stat_code == 302 || $stat_code == 301) { # redirect
+	my $location = $headers{Location} or 
+            __PACKAGE__->throw("invalid redirect: no Location header");
+	return get(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass); # recursive call
+    }
+
+    elsif ($stat_code == 401) { # auth required
+	my $auth_required = $headers{'WWW-Authenticate'};
+	$auth_required =~ /^Basic realm="([^\"]+)"/
+	    or __PACKAGE__->throw("server requires unknown type of".
+                                  " authentication: $auth_required");
+	__PACKAGE__->throw("request failed: $status_line, realm = $1");
+    }
+
+    elsif ($stat_code != 200) {
+	__PACKAGE__->throw("request failed: $status_line");
+    }
+
+    $response = '';
+    while (1) {
+	my $bytes = read($socket,$response,2048,length $response);
+	last unless $bytes > 0;
+    }
+
+    $response;
+}
+
+=head2 getFH
+
+ Title   : getFH
+ Usage   : 
+ Function:
+ Example :
+ Returns : string
+ Args    : 
+
+=cut
+
+sub getFH {
+  my ($url,$proxy,$timeout,$auth_user,$auth_pass) = 
+    __PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)], at _);
+  my $dest  = $proxy || $url;
+
+  my ($host,$port,$path,$user,$pass) 
+    = _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
+  $auth_user ||= $user;
+  $auth_pass ||= $pass;
+  $path = $url if $proxy;
+
+  # set up the connection
+  my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
+
+  # the request
+  print $socket "GET $path HTTP/1.0$CRLF";
+  print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
+  # Support virtual hosts
+  print $socket "HOST: $host$CRLF";
+
+  if ($auth_user && $auth_pass) {  # authentication information
+    my $token = _encode_base64("$auth_user:$auth_pass");
+    print $socket "Authorization: Basic $token$CRLF";
+  }
+  print $socket "$CRLF";
+
+  # read the response
+  my $response;
+  {
+    local $/ = "$CRLF$CRLF";
+    $response = <$socket>;
+  }
+
+  my ($status_line, at other_lines) = split $CRLF,$response;
+  my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
+    or __PACKAGE__->throw("invalid response from web server: got $response");
+
+  my %headers = map {/^(\S+): (.+)/} @other_lines;
+  if ($stat_code == 302 || $stat_code == 301) {  # redirect
+    my $location = $headers{Location} or 
+        __PACKAGE__->throw("invalid redirect: no Location header");
+    return getFH(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass);  # recursive call
+  }
+
+  elsif ($stat_code == 401) { # auth required
+    my $auth_required = $headers{'WWW-Authenticate'};
+    $auth_required =~ /^Basic realm="([^\"]+)"/
+      or __PACKAGE__->throw("server requires unknown type of ".
+                            "authentication: $auth_required");
+    __PACKAGE__->throw("request failed: $status_line, realm = $1");
+  }
+
+  elsif ($stat_code != 200) {
+    __PACKAGE__->throw("request failed: $status_line");
+  }
+
+  # Now that we are reasonably sure the socket and request
+  # are OK we pass the socket back as a filehandle so it can
+  # be processed by the caller...
+
+  $socket;
+
+}
+
+
+=head2 _http_parse_url
+
+ Title   :
+ Usage   : 
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _http_parse_url {
+  my $url = shift;
+  my ($user,$pass,$hostent,$path) = 
+    $url =~ m!^http://(?:([^:]+):([^:]+)@)?([^/]+)(/?[^\#]*)! or return;
+  $path ||= '/';
+  my ($host,$port) = split(':',$hostent);
+  return ($host,$port||80,$path,$user,$pass);
+}
+
+=head2 _http_connect
+
+ Title   :
+ Usage   : 
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _http_connect {
+  my ($host,$port,$timeout) = @_;
+  my $sock = IO::Socket::INET->new(Proto     => 'tcp',
+                                   Type      => SOCK_STREAM,
+				   PeerHost  => $host,
+				   PeerPort  => $port,
+				   Timeout   => $timeout,
+				  );
+  $sock;
+}
+
+
+=head2 _encode_base64
+
+ Title   :
+ Usage   : 
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _encode_base64 {
+    my $res = "";
+    my $eol = $_[1];
+    $eol = "\n" unless defined $eol;
+    pos($_[0]) = 0;                          # ensure start at the beginning
+
+    $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));
+
+    $res =~ tr|` -_|AA-Za-z0-9+/|;               # `# help emacs
+    # fix padding at the end
+    my $padding = (3 - length($_[0]) % 3) % 3;
+    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
+    # break encoded string into lines of no more than 76 characters each
+    if (length $eol) {
+        $res =~ s/(.{1,76})/$1$eol/g;
+    }
+    return $res;
+}
+
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or 
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy. Defaults to environment variable
+           http_proxy if present.
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+           $username : username (if proxy requires authentication)
+           $password : password (if proxy requires authentication)
+
+=cut
+
+sub proxy {
+    my ($self,$protocol,$proxy,$username,$password) = @_;
+    $protocol ||= 'http';
+    unless ($proxy) {
+        if (defined $ENV{http_proxy}) {
+            $proxy = $ENV{http_proxy};
+            if ($proxy =~ /\@/) {
+                ($username, $password, $proxy) = $proxy =~ m{http://(\S+):(\S+)\@(\S+)};
+                $proxy = 'http://'.$proxy;
+            }
+        }
+    }
+    return unless (defined $proxy);
+    $self->authentication($username, $password) 
+	if ($username && $password);
+    return $self->{'_proxy'}->{$protocol} = $proxy;
+}
+
+=head2 authentication
+
+ Title   : authentication
+ Usage   : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass 
+ Args    : Array or user/pass
+
+
+=cut
+
+sub authentication{
+   my ($self,$u,$p) = @_;
+
+   if( defined $u && defined $p ) {
+       $self->{'_authentication'} = [ $u,$p];
+   }
+   return @{$self->{'_authentication'} || []};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,952 @@
+# $Id: IO.pm,v 1.61.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::Root::IO
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Root::IO - module providing several methods often needed when dealing with file IO
+
+=head1 SYNOPSIS
+
+    # utilize stream I/O in your module
+    $self->{'io'} = Bio::Root::IO->new(-file => "myfile");
+    $self->{'io'}->_print("some stuff");
+    $line = $self->{'io'}->_readline();
+    $self->{'io'}->_pushback($line);
+    $self->{'io'}->close();
+
+    # obtain platform-compatible filenames
+    $path = Bio::Root::IO->catfile($dir, $subdir, $filename);
+    # obtain a temporary file (created in $TEMPDIR)
+    ($handle) = $io->tempfile();
+
+=head1 DESCRIPTION
+
+This module provides methods that will usually be needed for any sort
+of file- or stream-related input/output, e.g., keeping track of a file
+handle, transient printing and reading from the file handle, a close
+method, automatically closing the handle on garbage collection, etc.
+
+To use this for your own code you will either want to inherit from
+this module, or instantiate an object for every file or stream you are
+dealing with. In the first case this module will most likely not be
+the first class off which your class inherits; therefore you need to
+call _initialize_io() with the named parameters in order to set file
+handle, open file, etc automatically.
+
+Most methods start with an underscore, indicating they are private. In
+OO speak, they are not private but protected, that is, use them in
+your module code, but a client code of your module will usually not
+want to call them (except those not starting with an underscore).
+
+In addition this module contains a couple of convenience methods for
+cross-platform safe tempfile creation and similar tasks. There are
+some CPAN modules related that may not be available on all
+platforms. At present, File::Spec and File::Temp are attempted. This
+module defines $PATHSEP, $TEMPDIR, and $ROOTDIR, which will always be set, 
+and $OPENFLAGS, which will be set if either of File::Spec or File::Temp fails.
+
+The -noclose boolean (accessed via the noclose method) prevents a
+filehandle from being closed when the IO object is cleaned up.  This
+is special behavior when a object like a parser might share a
+filehandle with an object like an indexer where it is not proper to
+close the filehandle as it will continue to be reused until the end of the
+stream is reached.  In general you won't want to play with this flag.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Root::IO;
+use vars qw($FILESPECLOADED $FILETEMPLOADED $FILEPATHLOADED
+	    $TEMPDIR $PATHSEP $ROOTDIR $OPENFLAGS $VERBOSE $ONMAC
+            $HAS_LWP
+           );
+use strict;
+
+use Symbol;
+use POSIX qw(dup);
+use IO::Handle;
+use Bio::Root::HTTPget;
+
+use base qw(Bio::Root::Root);
+
+my $TEMPCOUNTER;
+my $HAS_WIN32 = 0;
+#my $HAS_LWP = 1;
+
+BEGIN {
+    $TEMPCOUNTER = 0;
+    $FILESPECLOADED = 0;
+    $FILETEMPLOADED = 0;
+    $FILEPATHLOADED = 0;
+    $VERBOSE = 0;
+
+    # try to load those modules that may cause trouble on some systems
+    eval { 
+	require File::Path;
+	$FILEPATHLOADED = 1;
+    }; 
+    if( $@ ) {
+	print STDERR "Cannot load File::Path: $@" if( $VERBOSE > 0 );
+	# do nothing
+    }
+
+    eval {
+        require LWP::Simple;
+    };
+    if( $@ ) {
+	print STDERR "Cannot load LWP::Simple: $@" if( $VERBOSE > 0 );
+        $HAS_LWP = 0;
+    } else {
+        $HAS_LWP = 1;
+    }
+
+    # If on Win32, attempt to find Win32 package
+
+    if($^O =~ /mswin/i) {
+	eval {
+	    require Win32;
+	    $HAS_WIN32 = 1;
+	};
+    }
+
+    # Try to provide a path separator. Why doesn't File::Spec export this,
+    # or did I miss it?
+    if($^O =~ /mswin/i) {
+	$PATHSEP = "\\";
+    } elsif($^O =~ /macos/i) {
+	$PATHSEP = ":";
+    } else { # unix
+	$PATHSEP = "/";
+    }
+    eval {
+	require File::Spec;
+	$FILESPECLOADED = 1;
+	$TEMPDIR = File::Spec->tmpdir();
+	$ROOTDIR = File::Spec->rootdir();
+	require File::Temp; # tempfile creation
+	$FILETEMPLOADED = 1;
+    };
+    if( $@ ) { 
+	if(! defined($TEMPDIR)) { # File::Spec failed
+	    # determine tempdir
+	    if (defined $ENV{'TEMPDIR'} && -d $ENV{'TEMPDIR'} ) {
+		$TEMPDIR = $ENV{'TEMPDIR'};
+	    } elsif( defined $ENV{'TMPDIR'} && -d $ENV{'TMPDIR'} ) {
+		$TEMPDIR = $ENV{'TMPDIR'};
+	    }
+	    if($^O =~ /mswin/i) {
+		$TEMPDIR = 'C:\TEMP' unless $TEMPDIR;
+		$ROOTDIR = 'C:';
+	    } elsif($^O =~ /macos/i) {
+		$TEMPDIR = "" unless $TEMPDIR; # what is a reasonable default on Macs?
+		$ROOTDIR = ""; # what is reasonable??
+	    } else { # unix
+		$TEMPDIR = "/tmp" unless $TEMPDIR;
+		$ROOTDIR = "/";
+	    }
+	    if (!( -d $TEMPDIR && -w $TEMPDIR )) {
+		$TEMPDIR = '.'; # last resort
+	    }
+	}
+	# File::Temp failed (alone, or File::Spec already failed)
+	#
+	# determine open flags for tempfile creation -- we'll have to do this
+	# ourselves
+	use Fcntl;
+	use Symbol;
+	$OPENFLAGS = O_CREAT | O_EXCL | O_RDWR;
+	for my $oflag (qw/FOLLOW BINARY LARGEFILE EXLOCK NOINHERIT TEMPORARY/){
+	    my ($bit, $func) = (0, "Fcntl::O_" . $oflag);
+	    no strict 'refs';
+	    $OPENFLAGS |= $bit if eval { $bit = &$func(); 1 };
+	}
+    }
+    $ONMAC = "\015" eq "\n";
+}
+
+=head2 new
+
+ Title   : new 
+ Usage   : 
+ Function: Overridden here to automatically call _initialize_io().
+ Example :
+ Returns : new instance of this class
+ Args    : named parameters
+
+
+=cut
+
+sub new {
+    my ($caller, @args) = @_;
+    my $self = $caller->SUPER::new(@args);
+
+    $self->_initialize_io(@args);
+    return $self;
+}
+
+=head2 _initialize_io
+
+ Title   : initialize_io
+ Usage   : $self->_initialize_io(@params);
+ Function: Initializes filehandle and other properties from the parameters.
+
+           Currently recognizes the following named parameters:
+              -file     name of file to open
+              -url      name of URL to open
+              -input    name of file, or GLOB, or IO::Handle object
+              -fh       file handle (mutually exclusive with -file)
+              -flush    boolean flag to autoflush after each write
+              -noclose  boolean flag, when set to true will not close a
+                        filehandle (must explictly call close($io->_fh)
+ Returns : TRUE
+ Args    : named parameters
+
+
+=cut
+
+sub _initialize_io {
+    my($self, @args) = @_;
+
+    $self->_register_for_cleanup(\&_io_cleanup);
+
+    my ($input, $noclose, $file, $fh, $flush, $url) = $self->_rearrange([qw(INPUT 
+							    NOCLOSE
+							    FILE FH 
+							    FLUSH URL)], @args);
+
+    if($url){
+      my $trymax = 5;
+
+      if($HAS_LWP){ #use LWP::Simple::getstore()
+	require LWP::Simple;
+        #$self->warn("has lwp");
+        my $http_result;
+        my($handle,$tempfile) = $self->tempfile();
+        close($handle);
+
+        for(my $try = 1 ; $try <= $trymax ; $try++){
+          $http_result = LWP::Simple::getstore($url, $tempfile);
+          $self->warn("[$try/$trymax] tried to fetch $url, but server threw $http_result.  retrying...") if $http_result != 200;
+          last if $http_result == 200;
+        }
+        $self->throw("failed to fetch $url, server threw $http_result") if $http_result != 200;
+
+        $input = $tempfile;
+        $file  = $tempfile;
+      } else { #use Bio::Root::HTTPget
+        #$self->warn("no lwp");
+
+        $fh = Bio::Root::HTTPget->getFH($url);
+      }
+    }
+
+    delete $self->{'_readbuffer'};
+    delete $self->{'_filehandle'};
+    $self->noclose( $noclose) if defined $noclose;
+    # determine whether the input is a file(name) or a stream
+    if($input) {
+	if(ref(\$input) eq "SCALAR") {
+	    # we assume that a scalar is a filename
+	    if($file && ($file ne $input)) {
+		$self->throw("input file given twice: $file and $input disagree");
+	    }
+	    $file = $input;
+	} elsif(ref($input) &&
+		((ref($input) eq "GLOB") || $input->isa('IO::Handle'))) {
+	    # input is a stream
+	    $fh = $input;
+	} else {
+	    # let's be strict for now
+	    $self->throw("unable to determine type of input $input: ".
+			 "not string and not GLOB");
+	}
+    }
+    if(defined($file) && defined($fh)) {
+	$self->throw("Providing both a file and a filehandle for reading - only one please!");
+    }
+
+    if(defined($file) && ($file ne '')) {
+	$fh = Symbol::gensym();
+	open ($fh,$file) ||
+	    $self->throw("Could not open $file: $!");
+	$self->file($file);
+    }
+    $self->_fh($fh) if $fh; # if not provided, defaults to STDIN and STDOUT
+
+    $self->_flush_on_write(defined $flush ? $flush : 1);
+
+    return 1;
+}
+
+=head2 _fh
+
+ Title   : _fh
+ Usage   : $obj->_fh($newval)
+ Function: Get/set the file handle for the stream encapsulated.
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+=cut
+
+sub _fh {
+    my ($obj, $value) = @_;
+    if ( defined $value) {
+	$obj->{'_filehandle'} = $value;
+    }
+    return $obj->{'_filehandle'};
+}
+
+=head2 mode
+
+ Title   : mode
+ Usage   : $obj->mode()
+ Function:
+ Example :
+ Returns : mode of filehandle:
+           'r' for readable
+           'w' for writeable
+           '?' if mode could not be determined
+ Args    : -force (optional), see notes.
+ Notes   : once mode() has been called, the filehandle's mode is cached
+           for further calls to mode().  to override this behavior so
+           that mode() re-checks the filehandle's mode, call with arg
+           -force
+
+=cut
+
+sub mode {
+    my ($obj, @arg) = @_;
+	my %param = @arg;
+    return $obj->{'_mode'} if defined $obj->{'_mode'} and !$param{-force};
+    
+    # Previous system of:
+    #  my $iotest = new IO::Handle;
+    #  $iotest->fdopen( dup(fileno($fh)) , 'r' );
+    #  if ($iotest->error == 0) { ... }
+    # didn't actually seem to work under any platform, since there would no
+    # no error if the filehandle had been opened writable only. Couldn't be
+    # hacked around when dealing with unseekable (piped) filehandles.
+    #
+    # Just try and do a simple readline, turning io warnings off, instead:
+    
+    my $fh = $obj->_fh || return '?';
+    
+    no warnings "io"; # we expect a warning if this is writable only
+    my $line = <$fh>;
+    if (defined $line) {
+        $obj->_pushback($line);
+        $obj->{'_mode'} = 'r';
+    }
+    else {
+        $obj->{'_mode'} = 'w';
+    }
+    
+    return $obj->{'_mode'};
+}
+
+=head2 file
+
+ Title   : file
+ Usage   : $obj->file($newval)
+ Function: Get/set the filename, if one has been designated.
+ Example :
+ Returns : value of file
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub file {
+    my ($obj, $value) = @_;
+    if ( defined $value) {
+	$obj->{'_file'} = $value;
+    }
+    return $obj->{'_file'};
+}
+
+=head2 _print
+
+ Title   : _print
+ Usage   : $obj->_print(@lines)
+ Function:
+ Example :
+ Returns : 1 on success, undef on failure
+
+=cut
+
+sub _print {
+    my $self = shift;
+    my $fh = $self->_fh() || \*STDOUT;
+    my $ret = print $fh @_;
+    return $ret;
+}
+
+=head2 _readline
+
+ Title   : _readline
+ Usage   : $obj->_readline(%args)
+ Function: Reads a line of input.
+
+           Note that this method implicitely uses the value of $/ that is
+           in effect when called.
+
+           Note also that the current implementation does not handle pushed
+           back input correctly unless the pushed back input ends with the
+           value of $/.
+
+ Example :
+ Args    : Accepts a hash of arguments, currently only -raw is recognized
+           passing (-raw => 1) prevents \r\n sequences from being changed
+           to \n.  The default value of -raw is undef, allowing \r\n to be
+           converted to \n.
+ Returns : 
+
+=cut
+
+sub _readline {
+    my $self = shift;
+    my %param =@_;
+    my $fh = $self->_fh or return;
+    my $line;
+
+    # if the buffer been filled by _pushback then return the buffer
+    # contents, rather than read from the filehandle
+    if( @{$self->{'_readbuffer'} || [] } ) {
+	$line = shift @{$self->{'_readbuffer'}};
+    } else {
+	$line = <$fh>;
+    }
+
+    #don't strip line endings if -raw is specified
+    # $line =~ s/\r\n/\n/g if( (!$param{-raw}) && (defined $line) );
+    # Dave Howorth's fix
+    if( (!$param{-raw}) && (defined $line) ) {
+        $line =~ s/\015\012/\012/g; # Change all CR/LF pairs to LF
+        $line =~ tr/\015/\n/ unless $ONMAC; # Change all single CRs to NEWLINE
+    }
+    return $line;
+}
+
+=head2 _pushback
+
+ Title   : _pushback
+ Usage   : $obj->_pushback($newvalue)
+ Function: puts a line previously read with _readline back into a buffer.
+           buffer can hold as many lines as system memory permits.
+ Example :
+ Returns :
+ Args    : newvalue
+
+=cut
+
+sub _pushback {
+    my ($obj, $value) = @_;
+    return unless $value;
+    push @{$obj->{'_readbuffer'}}, $value;
+}
+
+=head2 close
+
+ Title   : close
+ Usage   : $io->close()
+ Function: Closes the file handle associated with this IO instance.
+           Will not close the FH if  -noclose is specified
+ Returns : none
+ Args    : none
+
+=cut
+
+sub close {
+   my ($self) = @_;
+   return if $self->noclose; # don't close if we explictly asked not to
+   if( defined $self->{'_filehandle'} ) {
+       $self->flush;
+       return if( \*STDOUT == $self->_fh ||
+		  \*STDERR == $self->_fh ||
+		  \*STDIN == $self->_fh
+		  ); # don't close STDOUT fh
+       if( ! ref($self->{'_filehandle'}) ||
+	   ! $self->{'_filehandle'}->isa('IO::String') ) {
+	   close($self->{'_filehandle'});
+       }
+   }
+   $self->{'_filehandle'} = undef;
+   delete $self->{'_readbuffer'};
+}
+
+
+=head2 flush
+
+ Title   : flush
+ Usage   : $io->flush()
+ Function: Flushes the filehandle
+ Returns : none
+ Args    : none
+
+=cut
+
+sub flush {
+  my ($self) = shift;
+  
+  if( !defined $self->{'_filehandle'} ) {
+    $self->throw("Attempting to call flush but no filehandle active");
+  }
+
+  if( ref($self->{'_filehandle'}) =~ /GLOB/ ) {
+    my $oldh = select($self->{'_filehandle'});
+    $| = 1;
+    select($oldh);
+  } else {
+    $self->{'_filehandle'}->flush();
+  }
+}
+
+=head2 noclose
+
+ Title   : noclose
+ Usage   : $obj->noclose($newval)
+ Function: Get/Set the NOCLOSE flag - setting this to true will
+           prevent a filehandle from being closed
+           when an object is cleaned up or explicitly closed
+           This is a bit of hack 
+ Returns : value of noclose (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub noclose{
+    my $self = shift;
+
+    return $self->{'_noclose'} = shift if @_;
+    return $self->{'_noclose'};
+}
+
+sub _io_cleanup {
+    my ($self) = @_;
+
+    $self->close();
+    my $v = $self->verbose;
+
+    # we are planning to cleanup temp files no matter what    
+    if( exists($self->{'_rootio_tempfiles'}) &&
+	ref($self->{'_rootio_tempfiles'}) =~ /array/i) { 
+	if( $v > 0 ) {
+	    warn( "going to remove files ", 
+		  join(",",  @{$self->{'_rootio_tempfiles'}}), "\n");
+	}
+	unlink  (@{$self->{'_rootio_tempfiles'}} );
+    }
+    # cleanup if we are not using File::Temp
+    if( $self->{'_cleanuptempdir'} &&
+	exists($self->{'_rootio_tempdirs'}) &&
+	ref($self->{'_rootio_tempdirs'}) =~ /array/i) {	
+
+	if( $v > 0 ) {
+	    warn( "going to remove dirs ", 
+		  join(",",  @{$self->{'_rootio_tempdirs'}}), "\n");
+	}
+	$self->rmtree( $self->{'_rootio_tempdirs'});
+    }
+}
+
+=head2 exists_exe
+
+ Title   : exists_exe
+ Usage   : $exists = $obj->exists_exe('clustalw');
+           $exists = Bio::Root::IO->exists_exe('clustalw')
+           $exists = Bio::Root::IO::exists_exe('clustalw')
+ Function: Determines whether the given executable exists either as file
+           or within the path environment. The latter requires File::Spec
+           to be installed.
+           On Win32-based system, .exe is automatically appended to the program
+           name unless the program name already ends in .exe.
+ Example :
+ Returns : 1 if the given program is callable as an executable, and 0 otherwise
+ Args    : the name of the executable
+
+=cut
+
+sub exists_exe {
+    my ($self, $exe) = @_;
+    $exe = $self if(!(ref($self) || $exe));
+    $exe .= '.exe' if(($^O =~ /mswin/i) && ($exe !~ /\.(exe|com|bat|cmd)$/i));
+    return $exe if(-e $exe); # full path and exists
+
+    # Ewan's comment. I don't think we need this. People should not be
+    # asking for a program with a pathseparator starting it
+    
+    # $exe =~ s/^$PATHSEP//;
+
+    # Not a full path, or does not exist. Let's see whether it's in the path.
+    if($FILESPECLOADED) {
+	foreach my $dir (File::Spec->path()) {
+	    my $f = Bio::Root::IO->catfile($dir, $exe);	    
+	    return $f if(-e $f && -x $f );
+	}
+    }    
+    return 0;
+}
+
+=head2 tempfile
+
+ Title   : tempfile
+ Usage   : my ($handle,$tempfile) = $io->tempfile(); 
+ Function: Returns a temporary filename and a handle opened for writing and
+           and reading.
+
+ Caveats : If you do not have File::Temp on your system you should avoid
+           specifying TEMPLATE and SUFFIX. (We don't want to recode
+           everything, okay?)
+ Returns : a 2-element array, consisting of temporary handle and temporary 
+           file name
+ Args    : named parameters compatible with File::Temp: DIR (defaults to
+           $Bio::Root::IO::TEMPDIR), TEMPLATE, SUFFIX.
+
+=cut
+
+#'
+sub tempfile {
+    my ($self, @args) = @_;
+    my ($tfh, $file);
+    my %params = @args;
+
+    # map between naming with and without dash
+    foreach my $key (keys(%params)) {
+	if( $key =~ /^-/  ) {
+	    my $v = $params{$key};
+	    delete $params{$key};
+	    $params{uc(substr($key,1))} = $v;
+	} else { 
+	    # this is to upper case
+	    my $v = $params{$key};
+	    delete $params{$key};	    
+	    $params{uc($key)} = $v;
+	}
+    }
+    $params{'DIR'} = $TEMPDIR if(! exists($params{'DIR'}));
+    unless (exists $params{'UNLINK'} && 
+	    defined $params{'UNLINK'} &&
+	    ! $params{'UNLINK'} ) {
+	$params{'UNLINK'} = 1;
+    } else { $params{'UNLINK'} = 0 }
+	    
+    if($FILETEMPLOADED) {
+	if(exists($params{'TEMPLATE'})) {
+	    my $template = $params{'TEMPLATE'};
+	    delete $params{'TEMPLATE'};
+	    ($tfh, $file) = File::Temp::tempfile($template, %params);
+	} else {
+	    ($tfh, $file) = File::Temp::tempfile(%params);
+	}
+    } else {
+	my $dir = $params{'DIR'};
+	$file = $self->catfile($dir,
+			       (exists($params{'TEMPLATE'}) ?
+				$params{'TEMPLATE'} :
+				sprintf( "%s.%s.%s",  
+					 $ENV{USER} || 'unknown', $$, 
+					 $TEMPCOUNTER++)));
+
+	# sneakiness for getting around long filenames on Win32?
+	if( $HAS_WIN32 ) {
+	    $file = Win32::GetShortPathName($file);
+	}
+
+	# taken from File::Temp
+	if ($] < 5.006) {
+	    $tfh = &Symbol::gensym;
+	}    
+	# Try to make sure this will be marked close-on-exec
+	# XXX: Win32 doesn't respect this, nor the proper fcntl,
+	#      but may have O_NOINHERIT. This may or may not be in Fcntl.
+	local $^F = 2; 
+	# Store callers umask
+	my $umask = umask();
+	# Set a known umaskr
+	umask(066);
+	# Attempt to open the file
+	if ( sysopen($tfh, $file, $OPENFLAGS, 0600) ) {
+	    # Reset umask
+	    umask($umask); 
+	} else { 
+	    $self->throw("Could not open tempfile $file: $!\n");
+	}
+    }
+
+    if(  $params{'UNLINK'} ) {
+	push @{$self->{'_rootio_tempfiles'}}, $file;
+    } 
+
+
+    return wantarray ? ($tfh,$file) : $tfh;
+}
+
+=head2  tempdir
+
+ Title   : tempdir
+ Usage   : my ($tempdir) = $io->tempdir(CLEANUP=>1); 
+ Function: Creates and returns the name of a new temporary directory.
+
+           Note that you should not use this function for obtaining "the"
+           temp directory. Use $Bio::Root::IO::TEMPDIR for that. Calling this
+           method will in fact create a new directory.
+
+ Returns : The name of a new temporary directory.
+ Args    : args - ( key CLEANUP ) indicates whether or not to cleanup 
+           dir on object destruction, other keys as specified by File::Temp
+
+=cut
+
+sub tempdir {
+    my ( $self, @args ) = @_;
+    if($FILETEMPLOADED && File::Temp->can('tempdir') ) {
+	return File::Temp::tempdir(@args);
+    }
+
+    # we have to do this ourselves, not good
+    #
+    # we are planning to cleanup temp files no matter what
+    my %params = @args;
+    $self->{'_cleanuptempdir'} = ( defined $params{CLEANUP} && 
+				   $params{CLEANUP} == 1);
+    my $tdir = $self->catfile($TEMPDIR,
+			      sprintf("dir_%s-%s-%s", 
+				      $ENV{USER} || 'unknown', $$, 
+				      $TEMPCOUNTER++));
+    mkdir($tdir, 0755);
+    push @{$self->{'_rootio_tempdirs'}}, $tdir; 
+    return $tdir;
+}
+
+=head2 catfile
+
+ Title   : catfile
+ Usage   : $path = Bio::Root::IO->catfile(@dirs,$filename);
+ Function: Constructs a full pathname in a cross-platform safe way.
+
+           If File::Spec exists on your system, this routine will merely
+           delegate to it. Otherwise it tries to make a good guess.
+
+           You should use this method whenever you construct a path name
+           from directory and filename. Otherwise you risk cross-platform
+           compatibility of your code.
+
+           You can call this method both as a class and an instance method.
+
+ Returns : a string
+ Args    : components of the pathname (directories and filename, NOT an
+           extension)
+
+=cut
+
+sub catfile {
+    my ($self, @args) = @_;
+
+    return File::Spec->catfile(@args) if($FILESPECLOADED);
+    # this is clumsy and not very appealing, but how do we specify the
+    # root directory?
+    if($args[0] eq '/') {
+	$args[0] = $ROOTDIR;
+    }
+    return join($PATHSEP, @args);
+}
+
+=head2 rmtree
+
+ Title   : rmtree
+ Usage   : Bio::Root::IO->rmtree($dirname );
+ Function: Remove a full directory tree
+
+           If File::Path exists on your system, this routine will merely
+           delegate to it. Otherwise it runs a local version of that code.
+
+           You should use this method to remove directories which contain 
+           files.
+
+           You can call this method both as a class and an instance method.
+
+ Returns : number of files successfully deleted
+ Args    : roots - rootdir to delete or reference to list of dirs
+
+           verbose - a boolean value, which if TRUE will cause
+                     C<rmtree> to print a message each time it
+                     examines a file, giving the name of the file, and
+                     indicating whether it's using C<rmdir> or
+                     C<unlink> to remove it, or that it's skipping it.
+                     (defaults to FALSE)
+
+           safe - a boolean value, which if TRUE will cause C<rmtree>
+                  to skip any files to which you do not have delete
+                  access (if running under VMS) or write access (if
+                  running under another OS).  This will change in the
+                  future when a criterion for 'delete permission'
+                  under OSs other than VMS is settled.  (defaults to
+                  FALSE)
+
+=cut
+
+# taken straight from File::Path VERSION = "1.0403"
+sub rmtree {
+    my($self,$roots, $verbose, $safe) = @_;
+    if( $FILEPATHLOADED ) { 
+	return File::Path::rmtree ($roots, $verbose, $safe);
+    }
+
+    my $force_writeable = ($^O eq 'os2' || $^O eq 'dos' || $^O eq 'MSWin32'
+		       || $^O eq 'amigaos' || $^O eq 'cygwin');
+    my $Is_VMS = $^O eq 'VMS';
+
+    my(@files);
+    my($count) = 0;
+    $verbose ||= 0;
+    $safe ||= 0;
+    if ( defined($roots) && length($roots) ) {
+	$roots = [$roots] unless ref $roots;
+    } else {
+	$self->warn("No root path(s) specified\n");
+	return 0;
+    }
+
+    my($root);
+    foreach $root (@{$roots}) {
+	$root =~ s#/\z##;
+	(undef, undef, my $rp) = lstat $root or next;
+	$rp &= 07777;	# don't forget setuid, setgid, sticky bits
+	if ( -d _ ) {
+	    # notabene: 0777 is for making readable in the first place,
+	    # it's also intended to change it to writable in case we have
+	    # to recurse in which case we are better than rm -rf for 
+	    # subtrees with strange permissions
+	    chmod(0777, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
+	      or $self->warn("Can't make directory $root read+writeable: $!")
+		unless $safe;
+	    if (opendir(DIR, $root) ){
+		@files = readdir DIR;
+		closedir(DIR);
+	    } else {
+	        $self->warn( "Can't read $root: $!");
+		@files = ();
+	    }
+
+	    # Deleting large numbers of files from VMS Files-11 filesystems
+	    # is faster if done in reverse ASCIIbetical order 
+	    @files = reverse @files if $Is_VMS;
+	    ($root = VMS::Filespec::unixify($root)) =~ s#\.dir\z## if $Is_VMS;
+	    @files = map("$root/$_", grep $_!~/^\.{1,2}\z/s, at files);
+	    $count += $self->rmtree([@files],$verbose,$safe);
+	    if ($safe &&
+		($Is_VMS ? !&VMS::Filespec::candelete($root) : !-w $root)) {
+		print "skipped $root\n" if $verbose;
+		next;
+	    }
+	    chmod 0777, $root
+	      or $self->warn( "Can't make directory $root writeable: $!")
+		if $force_writeable;
+	    print "rmdir $root\n" if $verbose;
+	    if (rmdir $root) {
+		++$count;
+	    }
+	    else {
+		$self->warn( "Can't remove directory $root: $!");
+		chmod($rp, ($Is_VMS ? VMS::Filespec::fileify($root) : $root))
+		    or $self->warn("and can't restore permissions to "
+		            . sprintf("0%o",$rp) . "\n");
+	    }
+	}
+	else {
+
+	    if ($safe &&
+		($Is_VMS ? !&VMS::Filespec::candelete($root)
+		         : !(-l $root || -w $root)))
+	    {
+		print "skipped $root\n" if $verbose;
+		next;
+	    }
+	    chmod 0666, $root
+	      or $self->warn( "Can't make file $root writeable: $!")
+		if $force_writeable;
+	    warn "unlink $root\n" if $verbose;
+	    # delete all versions under VMS
+	    for (;;) {
+		unless (unlink $root) {
+		    $self->warn( "Can't unlink file $root: $!");
+		    if ($force_writeable) {
+			chmod $rp, $root
+			    or $self->warn("and can't restore permissions to "
+			            . sprintf("0%o",$rp) . "\n");
+		    }
+		    last;
+		}
+		++$count;
+		last unless $Is_VMS && lstat $root;
+	    }
+	}
+    }
+
+    $count;
+}
+
+=head2 _flush_on_write
+
+ Title   : _flush_on_write
+ Usage   : $obj->_flush_on_write($newval)
+ Function: Boolean flag to indicate whether to flush 
+           the filehandle on writing when the end of 
+           a component is finished (Sequences,Alignments,etc)
+ Returns : value of _flush_on_write
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _flush_on_write {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_flush_on_write'} = $value;
+    }
+    return $self->{'_flush_on_write'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/Root.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/Root.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/Root.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,445 @@
+package Bio::Root::Root;
+use strict;
+
+# $Id: Root.pm,v 1.35.6.2 2006/11/08 17:25:55 sendu Exp $
+
+=head1 NAME
+
+Bio::Root::Root - Hash-based implementation of Bio::Root::RootI
+
+=head1 SYNOPSIS
+
+  # Any Bioperl-compliant object is a RootI compliant object
+
+  # Here's how to throw and catch an exception using the eval-based syntax.
+
+  $obj->throw("This is an exception");
+
+  eval {
+      $obj->throw("This is catching an exception");
+  };
+
+  if( $@ ) {
+      print "Caught exception";
+  } else {
+      print "no exception";
+  }
+
+  # Alternatively, using the new typed exception syntax in the throw() call:
+
+  $obj->throw( -class => 'Bio::Root::BadParameter',
+               -text  => "Can not open file $file",
+               -value  => $file );
+
+  # Want to see debug() outputs for this object
+  
+  my $obj = Bio::Object->new(-verbose=>1);
+
+  my $obj = Bio::Object->new(%args);
+  $obj->verbose(2);
+
+  # Print debug messages which honour current verbosity setting
+  
+  $obj->debug("Boring output only to be seen if verbose > 0\n");
+
+=head1 DESCRIPTION
+
+This is a hashref-based implementation of the Bio::Root::RootI
+interface.  Most Bioperl objects should inherit from this.
+
+See the documentation for L<Bio::Root::RootI> for most of the methods
+implemented by this module.  Only overridden methods are described
+here.
+
+=head2 Throwing Exceptions
+
+One of the functionalities that L<Bio::Root::RootI> provides is the
+ability to L<throw()> exceptions with pretty stack traces. Bio::Root::Root
+enhances this with the ability to use L<Error> (available from CPAN)
+if it has also been installed. 
+
+If L<Error> has been installed, L<throw()> will use it. This causes an
+Error.pm-derived object to be thrown. This can be caught within a
+C<catch{}> block, from wich you can extract useful bits of
+information. If L<Error> is not installed, it will use the 
+L<Bio::Root::RootI>-based exception throwing facilty.
+
+=head2 Typed Exception Syntax 
+
+The typed exception syntax of L<throw()> has the advantage of plainly
+indicating the nature of the trouble, since the name of the class
+is included in the title of the exception output.
+
+To take advantage of this capability, you must specify arguments
+as named parameters in the L<throw()> call. Here are the parameters:
+
+=over 4
+
+=item -class
+
+name of the class of the exception.
+This should be one of the classes defined in L<Bio::Root::Exception>,
+or a custom error of yours that extends one of the exceptions
+defined in L<Bio::Root::Exception>.
+
+=item -text
+
+a sensible message for the exception
+
+=item -value
+
+the value causing the exception or $!, if appropriate.
+
+=back
+
+Note that Bio::Root::Exception does not need to be imported into
+your module (or script) namespace in order to throw exceptions
+via Bio::Root::Root::throw(), since Bio::Root::Root imports it.
+
+=head2 Try-Catch-Finally Support
+
+In addition to using an eval{} block to handle exceptions, you can
+also use a try-catch-finally block structure if L<Error> has been
+installed in your system (available from CPAN).  See the documentation
+for Error for more details.
+
+Here's an example. See the L<Bio::Root::Exception> module for 
+other pre-defined exception types:
+
+   try {
+    open( IN, $file) || $obj->throw( -class => 'Bio::Root::FileOpenException',
+                                     -text => "Cannot open file $file for reading",
+                                     -value => $!);
+   }
+   catch Bio::Root::BadParameter with {
+       my $err = shift;   # get the Error object
+       # Perform specific exception handling code for the FileOpenException
+   }
+   catch Bio::Root::Exception with {
+       my $err = shift;   # get the Error object
+       # Perform general exception handling code for any Bioperl exception.
+   }
+   otherwise {
+       # A catch-all for any other type of exception
+   }
+   finally {
+       # Any code that you want to execute regardless of whether or not
+       # an exception occurred.
+   };  
+   # the ending semicolon is essential!
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Functions originally from Steve Chervitz. 
+Refactored by Ewan Birney.
+Re-refactored by Lincoln Stein.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+
+use vars qw($DEBUG $ID $VERBOSITY $ERRORLOADED);
+use strict;
+use Bio::Root::IO;
+
+use base qw(Bio::Root::RootI);
+
+BEGIN { 
+
+    $ID        = 'Bio::Root::Root';
+    $DEBUG     = 0;
+    $VERBOSITY = 0;
+    $ERRORLOADED = 0;
+
+    # Check whether or not Error.pm is available.
+
+    # $main::DONT_USE_ERROR is intended for testing purposes and also
+    # when you don't want to use the Error module, even if it is installed.
+    # Just put a INIT { $DONT_USE_ERROR = 1; } at the top of your script.
+    if( not $main::DONT_USE_ERROR ) {
+        if ( eval "require Error"  ) {
+            import Error qw(:try);
+            require Bio::Root::Exception;
+            $ERRORLOADED = 1;
+            $Error::Debug = 1; # enable verbose stack trace 
+        }
+    } 
+    if( !$ERRORLOADED ) {
+        require Carp; import Carp qw( confess );
+    }    
+    $main::DONT_USE_ERROR;  # so that perl -w won't warn "used only once"
+
+}
+
+
+
+=head2 new
+
+ Purpose   : generic instantiation function can be overridden if 
+             special needs of a module cannot be done in _initialize
+
+=cut
+
+sub new {
+#    my ($class, %param) = @_;
+    my $class = shift;
+    my $self = {};
+    bless $self, ref($class) || $class;
+
+    if(@_ > 1) {
+	# if the number of arguments is odd but at least 3, we'll give
+	# it a try to find -verbose
+	shift if @_ % 2;
+	my %param = @_;
+	## See "Comments" above regarding use of _rearrange().
+	$self->verbose($param{'-VERBOSE'} || $param{'-verbose'});
+    }
+    return $self;
+}
+
+
+=head2 verbose
+
+ Title   : verbose
+ Usage   : $self->verbose(1)
+ Function: Sets verbose level for how ->warn behaves
+           -1 = no warning
+            0 = standard, small warning
+            1 = warning with stack trace
+            2 = warning becomes throw
+ Returns : The current verbosity setting (integer between -1 to 2)
+ Args    : -1,0,1 or 2
+
+
+=cut
+
+sub verbose {
+   my ($self,$value) = @_;
+   # allow one to set global verbosity flag
+   return $DEBUG  if $DEBUG;
+   return $VERBOSITY unless ref $self;
+   
+    if (defined $value || ! defined $self->{'_root_verbose'}) {
+       $self->{'_root_verbose'} = $value || 0;
+    }
+    return $self->{'_root_verbose'};
+}
+
+sub _register_for_cleanup {
+  my ($self,$method) = @_;
+  if($method) {
+    if(! exists($self->{'_root_cleanup_methods'})) {
+      $self->{'_root_cleanup_methods'} = [];
+    }
+    push(@{$self->{'_root_cleanup_methods'}},$method);
+  }
+}
+
+sub _unregister_for_cleanup {
+  my ($self,$method) = @_;
+  my @methods = grep {$_ ne $method} $self->_cleanup_methods;
+  $self->{'_root_cleanup_methods'} = \@methods;
+}
+
+
+sub _cleanup_methods {
+  my $self = shift;
+  return unless ref $self && $self->isa('HASH');
+  my $methods = $self->{'_root_cleanup_methods'} or return;
+  @$methods;
+
+}
+
+=head2 throw
+
+ Title   : throw
+ Usage   : $obj->throw("throwing exception message");
+           or
+           $obj->throw( -class => 'Bio::Root::Exception',
+                        -text  => "throwing exception message",
+                        -value => $bad_value  );
+ Function: Throws an exception, which, if not caught with an eval or
+           a try block will provide a nice stack trace to STDERR 
+           with the message.
+           If Error.pm is installed, and if a -class parameter is
+           provided, Error::throw will be used, throwing an error 
+           of the type specified by -class.
+           If Error.pm is installed and no -class parameter is provided
+           (i.e., a simple string is given), A Bio::Root::Exception 
+           is thrown.
+ Returns : n/a
+ Args    : A string giving a descriptive error message, optional
+           Named parameters:
+           '-class'  a string for the name of a class that derives 
+                     from Error.pm, such as any of the exceptions 
+                     defined in Bio::Root::Exception.
+                     Default class: Bio::Root::Exception
+           '-text'   a string giving a descriptive error message
+           '-value'  the value causing the exception, or $! (optional)
+
+           Thus, if only a string argument is given, and Error.pm is available,
+           this is equivalent to the arguments:
+                 -text  => "message",
+                 -class => Bio::Root::Exception
+ Comments : If Error.pm is installed, and you don't want to use it
+            for some reason, you can block the use of Error.pm by
+            Bio::Root::Root::throw() by defining a scalar named
+            $main::DONT_USE_ERROR (define it in your main script
+            and you don't need the main:: part) and setting it to 
+            a true value; you must do this within a BEGIN subroutine.
+            
+            Also note that if you use the string form, the string cannot
+            start with a dash, or the resulting throw message will be empty.
+
+=cut
+
+#'
+
+sub throw{
+   my ($self, at args) = @_;
+   
+   my ( $text, $class ) = $self->_rearrange( [qw(TEXT CLASS)], @args);
+
+   if( $ERRORLOADED ) {
+#       print STDERR "  Calling Error::throw\n\n";
+
+       # Enable re-throwing of Error objects.
+       # If the error is not derived from Bio::Root::Exception, 
+       # we can't guarantee that the Error's value was set properly
+       # and, ipso facto, that it will be catchable from an eval{}.
+       # But chances are, if you're re-throwing non-Bio::Root::Exceptions,
+       # you're probably using Error::try(), not eval{}.
+       # TODO: Fix the MSG: line of the re-thrown error. Has an extra line
+       # containing the '----- EXCEPTION -----' banner.
+       if( ref($args[0])) {
+           if( $args[0]->isa('Error')) {
+               my $class = ref $args[0];
+               $class->throw( @args );
+           } else {
+               my $text .= "\nWARNING: Attempt to throw a non-Error.pm object: " . ref$args[0];
+               my $class = "Bio::Root::Exception";
+               $class->throw( '-text' => $text, '-value' => $args[0] ); 
+           }
+       } else {
+           $class ||= "Bio::Root::Exception";
+
+   	   my %args;
+	   if( @args % 2 == 0 && $args[0] =~ /^-/ ) {
+	       %args = @args;
+	       $args{-text} = $text;
+	       $args{-object} = $self;
+	   }
+
+           $class->throw( scalar keys %args > 0 ? %args : @args ); # (%args || @args) puts %args in scalar context!
+       }
+   }
+   else {
+#       print STDERR "  Not calling Error::throw\n\n";
+       $class ||= '';
+       my $std = $self->stack_trace_dump();
+       my $title = "------------- EXCEPTION $class -------------";
+       my $footer = "\n" . '-' x CORE::length($title);
+       $text ||= '';
+
+       my $out = "\n$title\n" .
+           "MSG: $text\n". $std . $footer . "\n";
+
+       die $out;
+   }
+}
+
+=head2 debug
+
+ Title   : debug
+ Usage   : $obj->debug("This is debugging output");
+ Function: Prints a debugging message when verbose is > 0
+ Returns : none
+ Args    : message string(s) to print to STDERR
+
+=cut
+
+sub debug{
+   my ($self, at msgs) = @_;
+
+   if( defined $self->verbose && $self->verbose > 0 ) { 
+       print STDERR @msgs;
+   }   
+}
+
+=head2 _load_module
+
+ Title   : _load_module
+ Usage   : $self->_load_module("Bio::SeqIO::genbank");
+ Function: Loads up (like use) the specified module at run time on demand.
+ Example : 
+ Returns : TRUE on success. Throws an exception upon failure.
+ Args    : The module to load (_without_ the trailing .pm).
+
+=cut
+
+sub _load_module {
+    my ($self, $name) = @_;
+    my ($module, $load, $m);
+    $module = "_<$name.pm";
+    return 1 if $main::{$module};
+
+    # untaint operation for safe web-based running (modified after a fix
+    # a fix by Lincoln) HL
+    if ($name !~ /^([\w:]+)$/) {
+	$self->throw("$name is an illegal perl package name");
+    } else { 
+	$name = $1;
+    }
+
+    $load = "$name.pm";
+    my $io = Bio::Root::IO->new();
+    # catfile comes from IO
+    $load = $io->catfile((split(/::/,$load)));
+    eval {
+        require $load;
+    };
+    if ( $@ ) {
+        $self->throw("Failed to load module $name. ".$@);
+    }
+    return 1;
+}
+
+
+sub DESTROY {
+    my $self = shift;
+    my @cleanup_methods = $self->_cleanup_methods or return;
+    for my $method (@cleanup_methods) {
+      $method->($self);
+    }
+}
+
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/RootI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/RootI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/RootI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,575 @@
+# $Id: RootI.pm,v 1.69.4.4 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::Root::RootI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+# 
+# This was refactored to have chained calls to new instead
+# of chained calls to _initialize
+#
+# added debug and deprecated methods --Jason Stajich 2001-10-12
+# 
+
+=head1 NAME
+
+Bio::Root::RootI - Abstract interface to root object code
+
+=head1 SYNOPSIS
+
+  # any bioperl or bioperl compliant object is a RootI 
+  # compliant object
+
+  $obj->throw("This is an exception");
+
+  eval {
+      $obj->throw("This is catching an exception");
+  };
+
+  if( $@ ) {
+      print "Caught exception";
+  } else {
+      print "no exception";
+  }
+
+  # Using throw_not_implemented() within a RootI-based interface module:
+
+  package Foo;
+  use base qw(Bio::Root::RootI);
+
+  sub foo {
+      my $self = shift;
+      $self->throw_not_implemented;
+  }
+
+
+=head1 DESCRIPTION
+
+This is just a set of methods which do not assume B<anything> about the object
+they are on. The methods provide the ability to throw exceptions with nice
+stack traces.
+
+This is what should be inherited by all Bioperl compliant interfaces, even
+if they are exotic XS/CORBA/Other perl systems.
+
+=head2 Using throw_not_implemented()
+
+The method L<throw_not_implemented()|throw_not_implemented> should be
+called by all methods within interface modules that extend RootI so
+that if an implementation fails to override them, an exception will be
+thrown.
+
+For example, say there is an interface module called C<FooI> that
+provides a method called C<foo()>. Since this method is considered
+abstract within FooI and should be implemented by any module claiming to
+implement C<FooI>, the C<FooI::foo()> method should consist of the
+following:
+
+    sub foo {
+    	my $self = shift;
+    	$self->throw_not_implemented;
+    }
+
+So, if an implementer of C<FooI> forgets to implement C<foo()>
+and a user of the implementation calls C<foo()>, a
+L<Bio::Exception::NotImplemented> exception will result.
+
+Unfortunately, failure to implement a method can only be determined at
+run time (i.e., you can't verify that an implementation is complete by
+running C<perl -wc> on it). So it should be standard practice for a test
+of an implementation to check each method and verify that it doesn't
+throw a L<Bio::Exception::NotImplemented>.
+
+=head1 CONTACT
+
+Functions originally from Steve Chervitz. Refactored by Ewan
+Birney. Re-refactored by Lincoln Stein.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Root::RootI;
+
+use vars qw($DEBUG $ID $VERBOSITY);
+use strict;
+use Carp 'confess','carp';
+
+use Bio::Root::Version;
+
+BEGIN { 
+    $ID        = 'Bio::Root::RootI';
+    $DEBUG     = 0;
+    $VERBOSITY = 0;
+}
+
+sub new {
+  my $class = shift;
+  my @args = @_;
+  unless ( $ENV{'BIOPERLDEBUG'} ) {
+      carp("Use of new in Bio::Root::RootI is deprecated.  Please use Bio::Root::Root instead");
+  }
+  eval "require Bio::Root::Root";
+  return Bio::Root::Root->new(@args);
+}
+
+# for backwards compatibility
+sub _initialize {
+    my($self, at args) = @_;
+    return 1;
+}
+
+
+=head2 throw
+
+ Title   : throw
+ Usage   : $obj->throw("throwing exception message")
+ Function: Throws an exception, which, if not caught with an eval brace
+           will provide a nice stack trace to STDERR with the message
+ Returns : nothing
+ Args    : A string giving a descriptive error message
+
+
+=cut
+
+sub throw{
+   my ($self,$string) = @_;
+
+   my $std = $self->stack_trace_dump();
+
+   my $out = "\n-------------------- EXCEPTION --------------------\n".
+       "MSG: ".$string."\n".$std."-------------------------------------------\n";
+   die $out;
+
+}
+
+=head2 warn
+
+ Title   : warn
+ Usage   : $object->warn("Warning message");
+ Function: Places a warning. What happens now is down to the
+           verbosity of the object  (value of $obj->verbose) 
+            verbosity 0 or not set => small warning
+            verbosity -1 => no warning
+            verbosity 1 => warning with stack trace
+            verbosity 2 => converts warnings into throw
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub warn{
+    my ($self,$string) = @_;
+    
+    my $verbose = $self->verbose;
+
+    if( $verbose >= 2 ) {
+	$self->throw($string);
+    } elsif( $verbose <= -1 ) {
+	return;
+    } elsif( $verbose == 1 ) {
+	my $out = "\n-------------------- WARNING ---------------------\n".
+		"MSG: ".$string."\n";
+	$out .= $self->stack_trace_dump;
+	
+	print STDERR $out;
+	return;
+    }    
+
+    my $out = "\n-------------------- WARNING ---------------------\n".
+       "MSG: ".$string."\n".
+	   "---------------------------------------------------\n";
+    print STDERR $out;
+}
+
+=head2 deprecated
+
+ Title   : deprecated
+ Usage   : $obj->deprecated("Method X is deprecated");
+ Function: Prints a message about deprecation 
+           unless verbose is < 0 (which means be quiet)
+ Returns : none
+ Args    : Message string to print to STDERR
+
+=cut
+
+sub deprecated{
+   my ($self,$msg) = @_;
+   if( $self->verbose >= 0 ) { 
+       print STDERR $msg, "\n", $self->stack_trace_dump;
+   }
+}
+
+=head2 stack_trace_dump
+
+ Title   : stack_trace_dump
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub stack_trace_dump{
+   my ($self) = @_;
+
+   my @stack = $self->stack_trace();
+
+   shift @stack;
+   shift @stack;
+   shift @stack;
+
+   my $out;
+   my ($module,$function,$file,$position);
+   
+
+   foreach my $stack ( @stack) {
+       ($module,$file,$position,$function) = @{$stack};
+       $out .= "STACK $function $file:$position\n";
+   }
+
+   return $out;
+}
+
+
+=head2 stack_trace
+
+ Title   : stack_trace
+ Usage   : @stack_array_ref= $self->stack_trace
+ Function: gives an array to a reference of arrays with stack trace info
+           each coming from the caller(stack_number) call
+ Returns : array containing a reference of arrays
+ Args    : none
+
+
+=cut
+
+sub stack_trace{
+   my ($self) = @_;
+
+   my $i = 0;
+   my @out = ();
+   my $prev = [];
+   while( my @call = caller($i++)) {
+       # major annoyance that caller puts caller context as
+       # function name. Hence some monkeying around...
+       $prev->[3] = $call[3];
+       push(@out,$prev);
+       $prev = \@call;
+   }
+   $prev->[3] = 'toplevel';
+   push(@out,$prev);
+   return @out;
+}
+
+
+=head2 _rearrange
+
+ Usage     : $object->_rearrange( array_ref, list_of_arguments)
+ Purpose   : Rearranges named parameters to requested order.
+ Example   : $self->_rearrange([qw(SEQUENCE ID DESC)], at param);
+           : Where @param = (-sequence => $s,
+	       :                 -desc     => $d,
+	       :                 -id       => $i);
+ Returns   : @params - an array of parameters in the requested order.
+           : The above example would return ($s, $i, $d).
+           : Unspecified parameters will return undef. For example, if
+           :        @param = (-sequence => $s);
+           : the above _rearrange call would return ($s, undef, undef)
+ Argument  : $order : a reference to an array which describes the desired
+           :          order of the named parameters.
+           : @param : an array of parameters, either as a list (in
+           :          which case the function simply returns the list),
+           :          or as an associative array with hyphenated tags
+           :          (in which case the function sorts the values 
+           :          according to @{$order} and returns that new array.)
+	       :	      The tags can be upper, lower, or mixed case
+           :          but they must start with a hyphen (at least the
+           :          first one should be hyphenated.)
+ Source    : This function was taken from CGI.pm, written by Dr. Lincoln
+           : Stein, and adapted for use in Bio::Seq by Richard Resnick and
+           : then adapted for use in Bio::Root::Object.pm by Steve Chervitz,
+           : then migrated into Bio::Root::RootI.pm by Ewan Birney.
+ Comments  :
+           : Uppercase tags are the norm, 
+           : (SAC)
+           : This method may not be appropriate for method calls that are
+           : within in an inner loop if efficiency is a concern.
+           :
+           : Parameters can be specified using any of these formats:
+           :  @param = (-name=>'me', -color=>'blue');
+           :  @param = (-NAME=>'me', -COLOR=>'blue');
+           :  @param = (-Name=>'me', -Color=>'blue');
+           :  @param = ('me', 'blue');
+           : A leading hyphenated argument is used by this function to 
+           : indicate that named parameters are being used.
+           : Therefore, the ('me', 'blue') list will be returned as-is.
+           :
+	       : Note that Perl will confuse unquoted, hyphenated tags as 
+           : function calls if there is a function of the same name 
+           : in the current namespace:
+           :    -name => 'foo' is interpreted as -&name => 'foo'
+	       :
+           : For ultimate safety, put single quotes around the tag:
+	       : ('-name'=>'me', '-color' =>'blue');
+           : This can be a bit cumbersome and I find not as readable
+           : as using all uppercase, which is also fairly safe:
+	       : (-NAME=>'me', -COLOR =>'blue');
+	       :
+           : Personal note (SAC): I have found all uppercase tags to
+           : be more managable: it involves less single-quoting,
+           : the key names stand out better, and there are no method naming 
+           : conflicts.
+           : The drawbacks are that it's not as easy to type as lowercase,
+           : and lots of uppercase can be hard to read.
+           :
+           : Regardless of the style, it greatly helps to line
+	       : the parameters up vertically for long/complex lists.
+           :
+           : Note that if @param is a single string that happens to start with
+           : a dash, it will be treated as a hash key and probably fail to
+           : match anything in the array_ref, so not be returned as normally
+           : happens when @param is a simple list and not an associative array.
+
+=cut
+
+sub _rearrange {
+    my $dummy = shift;
+    my $order = shift;
+
+    return @_ unless (substr($_[0]||'',0,1) eq '-');
+    push @_,undef unless $#_ %2;
+    my %param;
+    while( @_ ) {
+	(my $key = shift) =~ tr/a-z\055/A-Z/d; #deletes all dashes!
+	$param{$key} = shift;
+    }
+    map { $_ = uc($_) } @$order; # for bug #1343, but is there perf hit here?
+    return @param{@$order};
+}
+
+
+#----------------'
+sub _rearrange_old {
+#----------------
+    my($self,$order, at param) = @_;
+    
+    # JGRG -- This is wrong, because we don't want
+    # to assign empty string to anything, and this
+    # code is actually returning an array 1 less
+    # than the length of @param:
+
+    ## If there are no parameters, we simply wish to return
+    ## an empty array which is the size of the @{$order} array.
+    #return ('') x $#{$order} unless @param;
+    
+    # ...all we need to do is return an empty array:
+    # return unless @param;
+    
+    # If we've got parameters, we need to check to see whether
+    # they are named or simply listed. If they are listed, we
+    # can just return them. 
+
+    # The mod test fixes bug where a single string parameter beginning with '-' gets lost.
+    # This tends to happen in error messages such as: $obj->throw("-id not defined")
+    return @param unless (defined($param[0]) && $param[0]=~/^-/o && ($#param % 2));
+
+    # Tester
+#    print "\n_rearrange() named parameters:\n";
+#    my $i; for ($i=0;$i<@param;$i+=2) { printf "%20s => %s\n", $param[$i],$param[$i+1]; }; <STDIN>;
+
+    # Now we've got to do some work on the named parameters.
+    # The next few lines strip out the '-' characters which
+    # preceed the keys, and capitalizes them.
+    for (my $i=0;$i<@param;$i+=2) {
+	$param[$i]=~s/^\-//;
+	$param[$i]=~tr/a-z/A-Z/;
+    }
+    
+    # Now we'll convert the @params variable into an associative array.
+    # local($^W) = 0;  # prevent "odd number of elements" warning with -w.
+    my(%param) = @param;
+    
+    # my(@return_array);
+    
+    # What we intend to do is loop through the @{$order} variable,
+    # and for each value, we use that as a key into our associative
+    # array, pushing the value at that key onto our return array.
+    # my($key);
+    
+    #foreach (@{$order}) {
+	# my($value) = $param{$key};
+	# delete $param{$key};
+	#push(@return_array,$param{$_});
+    #}
+
+    return @param{@{$order}};
+    
+#    print "\n_rearrange() after processing:\n";
+#    my $i; for ($i=0;$i<@return_array;$i++) { printf "%20s => %s\n", ${$order}[$i], $return_array[$i]; } <STDIN>;
+
+    # return @return_array;
+}
+
+=head2 _register_for_cleanup
+
+ Title   : _register_for_cleanup
+ Usage   : -- internal --
+ Function: Register a method to be called at DESTROY time. This is useful
+           and sometimes essential in the case of multiple inheritance for
+           classes coming second in the sequence of inheritance.
+ Returns : 
+ Args    : a code reference
+
+The code reference will be invoked with the object as the first
+argument, as per a method.  You may register an unlimited number of
+cleanup methods.
+
+=cut
+
+sub _register_for_cleanup {
+  my ($self,$method) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 _unregister_for_cleanup
+
+ Title   : _unregister_for_cleanup
+ Usage   : -- internal --
+ Function: Remove a method that has previously been registered to be called
+           at DESTROY time.  If called with a method to be called at DESTROY time.
+           Has no effect if the code reference has not previously been registered.
+ Returns : nothing
+ Args    : a code reference
+
+=cut
+
+sub _unregister_for_cleanup {
+  my ($self,$method) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 _cleanup_methods
+
+ Title   : _cleanup_methods
+ Usage   : -- internal --
+ Function: Return current list of registered cleanup methods.
+ Returns : list of coderefs
+ Args    : none
+
+=cut
+
+sub _cleanup_methods {
+  my $self = shift;
+  unless ( $ENV{'BIOPERLDEBUG'} || $self->verbose  > 0 ) {
+      carp("Use of Bio::Root::RootI is deprecated.  Please use Bio::Root::Root instead");
+  }
+  return;
+}
+
+=head2 throw_not_implemented
+
+ Purpose : Throws a Bio::Root::NotImplemented exception.
+           Intended for use in the method definitions of 
+           abstract interface modules where methods are defined
+           but are intended to be overridden by subclasses.
+ Usage   : $object->throw_not_implemented();
+ Example : sub method_foo { 
+             $self = shift; 
+             $self->throw_not_implemented();
+           }
+ Returns : n/a
+ Args    : n/a
+ Throws  : A Bio::Root::NotImplemented exception.
+           The message of the exception contains
+             - the name of the method 
+             - the name of the interface 
+             - the name of the implementing class 
+
+  	   If this object has a throw() method, $self->throw will be used.
+           If the object doesn't have a throw() method, 
+           Carp::confess() will be used.
+
+
+=cut
+
+#'
+
+sub throw_not_implemented {
+    my $self = shift;
+
+    # Bio::Root::Root::throw() knows how to check for Error.pm and will
+    # throw an Error-derived object of the specified class (Bio::Root::NotImplemented),
+    # which is defined in Bio::Root::Exception.
+    # If Error.pm is not available, the name of the class is just included in the
+    # error message.
+
+    my $message = $self->_not_implemented_msg;
+
+    if( $self->can('throw') ) {
+	    $self->throw(-text=>$message,
+                         -class=>'Bio::Root::NotImplemented');
+    } else {
+	    confess $message ;
+    }
+}
+
+
+=head2 warn_not_implemented
+
+ Purpose : Generates a warning that a method has not been implemented.
+           Intended for use in the method definitions of 
+           abstract interface modules where methods are defined
+           but are intended to be overridden by subclasses.
+           Generally, throw_not_implemented() should be used,
+           but warn_not_implemented() may be used if the method isn't
+           considered essential and convenient no-op behavior can be 
+           provided within the interface.
+ Usage   : $object->warn_not_implemented( method-name-string );
+ Example : $self->warn_not_implemented( "get_foobar" );
+ Returns : Calls $self->warn on this object, if available.
+           If the object doesn't have a warn() method,
+           Carp::carp() will be used.
+ Args    : n/a
+
+
+=cut
+
+#'
+
+sub warn_not_implemented {
+    my $self = shift;
+    my $message = $self->_not_implemented_msg;
+    if( $self->can('warn') ) {
+        $self->warn( $message );
+    }else {
+	    carp $message ;
+    }
+}
+
+# Unify 'not implemented' message. -Juguang
+sub _not_implemented_msg {
+    my $self = shift;
+    my $package = ref $self;
+    my $meth = (caller(2))[3];
+    my $msg =<<EOD_NOT_IMP;
+Abstract method \"$meth\" is not implemented by package $package.
+This is not your fault - author of $package should be blamed!
+EOD_NOT_IMP
+    return $msg;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/Storable.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/Storable.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/Storable.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,611 @@
+# $Id: Storable.pm,v 1.7.4.1 2006/10/02 23:10:23 sendu Exp $
+#
+# BioPerl module for Bio::Root::Storable
+#
+# Cared for by Will Spooner <whs at sanger.ac.uk>
+#
+# Copyright Will Spooner <whs at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Root::Storable - object serialisation methods
+
+=head1 SYNOPSIS
+
+  my $storable = Bio::Root::Storable->new();
+
+  # Store/retrieve using class retriever
+  my $token     = $storable->store();
+  my $storable2 = Bio::Root::Storable->retrieve( $token );
+
+  # Store/retrieve using object retriever
+  my $storable2 = $storable->new_retrievable();
+  $storable2->retrieve();
+
+
+=head1 DESCRIPTION
+
+Generic module that allows objects to be safely stored/retrieved from
+disk.  Can be inhereted by any BioPerl object. As it will not usually
+be the first class in the inheretence list, _initialise_storable()
+should be called during object instantiation.
+
+Object storage is recursive; If the object being stored contains other
+storable objects, these will be stored seperately, and replaced by a
+skeleton object in the parent heirarchy. When the parent is later
+retrieved, its children remain in the skeleton state until explicitly
+retrieved by the parent. This lazy-retrieve approach has obvious
+memory efficiency benefits for certain applications.
+
+
+By default, objects are stored in binary format (using the Perl
+Storable module). Earlier versions of Perl5 do not include Storable as
+a core module. If this is the case, ASCII object storage (using the
+Perl Data::Dumper module) is used instead.
+
+ASCII storage can be enabled by default by setting the value of
+$Bio::Root::Storable::BINARY to false.
+
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bio.perl.org
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Will Spooner
+
+Email whs at sanger.ac.uk
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+package Bio::Root::Storable;
+
+use strict;
+use Data::Dumper qw( Dumper );
+
+
+use Bio::Root::IO;
+
+use vars qw( $BINARY );
+use base qw(Bio::Root::Root);
+
+BEGIN{
+  if( eval "require Storable" ){
+    Storable->import( 'freeze', 'thaw' );
+    $BINARY = 1;
+  }
+}
+
+#----------------------------------------------------------------------
+
+=head2 new
+
+  Arg [1]   : -workdir  => filesystem path,
+              -template => tmpfile template,
+              -suffix   => tmpfile suffix,
+  Function  : Builds a new Bio::Root::Storable inhereting object
+  Returntype: Bio::Root::Storable inhereting object
+  Exceptions: 
+  Caller    : 
+  Example   : $storable = Bio::Root::Storable->new()
+
+=cut
+
+sub new {
+  my ($caller, @args) = @_;
+  my $self = $caller->SUPER::new(@args);
+  $self->_initialise_storable;
+  return $self;
+}
+
+#----------------------------------------------------------------------
+
+=head2 _initialise_storable
+
+  Arg [1]   : See 'new' method
+  Function  : Initialises storable-specific attributes
+  Returntype: boolean
+  Exceptions: 
+  Caller    : 
+  Example   : 
+
+=cut
+
+sub _initialise_storable {
+  my $self = shift;
+  my( $workdir, $template, $suffix ) =
+    $self->_rearrange([qw(WORKDIR TEMPLATE SUFFIX)], @_ );
+  $workdir  && $self->workdir ( $workdir );
+  $template && $self->template( $template );
+  $suffix   && $self->suffix  ( $suffix   );
+  return 1;
+}
+
+
+
+#----------------------------------------------------------------------
+
+=head2 statefile
+
+  Arg [1]   : string (optional)
+  Function  : Accessor for the file to write state into.
+              Should not normaly use as a setter - let Root::IO
+              do this for you.
+  Returntype: string
+  Exceptions: 
+  Caller    : Bio::Root::Storable->store
+  Example   : my $statefile = $obj->statefile();
+
+=cut
+
+sub statefile{
+
+  my $key = '_statefile';
+  my $self  = shift;
+
+  if( @_ ){ $self->{$key} = shift }
+
+  if( ! $self->{$key} ){ # Create a new statefile
+
+    my $workdir  = $self->workdir;
+    my $template = $self->template;
+    my $suffix   = $self->suffix;
+
+    # TODO: add cleanup and unlink methods. For now, we'll keep the
+    # statefile hanging around.
+    my @args = ( CLEANUP=>0, UNLINK=>0 );
+    if( $template ){ push( @args, 'TEMPLATE' => $template )};
+    if( $workdir  ){ push( @args, 'DIR'      => $workdir  )};
+    if( $suffix   ){ push( @args, 'SUFFIX'   => $suffix   )};
+    my( $fh, $file ) = Bio::Root::IO->new->tempfile( @args );
+
+    $self->{$key} = $file;
+  }
+
+  return $self->{$key};
+}
+
+#----------------------------------------------------------------------
+
+=head2 workdir
+
+  Arg [1]   : string (optional) (TODO - convert to array for x-platform)
+  Function  : Accessor for the statefile directory. Defaults to
+              $Bio::Root::IO::TEMPDIR
+  Returntype: string
+  Exceptions: 
+  Caller    : 
+  Example   : $obj->workdir('/tmp/foo');
+
+=cut
+
+sub workdir {
+  my $key = '_workdir';
+  my $self = shift;
+  if( @_ ){
+    my $caller = join( ', ', (caller(0))[1..2] );
+    $self->{$key} && $self->debug("Overwriting workdir: probably bad!");
+    $self->{$key} = shift
+  }
+  $self->{$key} ||= $Bio::Root::IO::TEMPDIR;
+  return $self->{$key};
+}
+
+#----------------------------------------------------------------------
+
+=head2 template
+
+  Arg [1]   : string (optional)
+  Function  : Accessor for the statefile template. Defaults to XXXXXXXX
+  Returntype: string
+  Exceptions: 
+  Caller    : 
+  Example   : $obj->workdir('RES_XXXXXXXX');
+
+=cut
+
+sub template {
+  my $key = '_template';
+  my $self = shift;
+  if( @_ ){ $self->{$key} = shift }
+  $self->{$key} ||= 'XXXXXXXX';
+  return $self->{$key};
+}
+
+#----------------------------------------------------------------------
+
+=head2 suffix
+
+  Arg [1]   : string (optional)
+  Function  : Accessor for the statefile template.
+  Returntype: string
+  Exceptions: 
+  Caller    : 
+  Example   : $obj->suffix('.state');
+
+=cut
+
+sub suffix {
+  my $key = '_suffix';
+  my $self = shift;
+  if( @_ ){ $self->{$key} = shift }
+  return $self->{$key};
+}
+
+#----------------------------------------------------------------------
+
+=head2 new_retrievable
+
+  Arg [1]   : Same as for 'new'
+  Function  : Similar to store, except returns a 'skeleton' of the calling
+              object, rather than the statefile.
+              The skeleton can be repopulated by calling 'retrieve'. This
+              will be a clone of the original object.
+  Returntype: Bio::Root::Storable inhereting object
+  Exceptions: 
+  Caller    : 
+  Example   : my $skel = $obj->new_retrievable(); # skeleton 
+              $skel->retrieve();                  # clone
+
+=cut
+
+sub new_retrievable{
+   my $self = shift;
+   my @args = @_;
+
+   $self->_initialise_storable( @args );
+
+   if( $self->retrievable ){ return $self->clone } # Clone retrievable
+   return bless( { _statefile   => $self->store(@args),
+		   _workdir     => $self->workdir,
+		   _suffix      => $self->suffix,
+		   _template    => $self->template,
+		   _retrievable => 1 }, ref( $self ) );
+}
+
+#----------------------------------------------------------------------
+
+=head2 retrievable
+
+  Arg [1]   : none
+  Function  : Reports whether the object is in 'skeleton' state, and the
+              'retrieve' method can be called.
+  Returntype: boolean
+  Exceptions: 
+  Caller    : 
+  Example   : if( $obj->retrievable ){ $obj->retrieve }
+
+=cut
+
+sub retrievable {
+   my $self = shift;
+   if( @_ ){ $self->{_retrievable} = shift }
+   return $self->{_retrievable};
+}
+
+#----------------------------------------------------------------------
+
+=head2 token
+
+  Arg [1]   : None
+  Function  : Accessor for token attribute
+  Returntype: string. Whatever retrieve needs to retrieve.
+              This base implementation returns the statefile
+  Exceptions: 
+  Caller    : 
+  Example   : my $token = $obj->token();
+
+=cut
+
+sub token{
+  my $self = shift;
+  return $self->statefile;
+}
+
+
+#----------------------------------------------------------------------
+
+=head2 store
+
+  Arg [1]   : none
+  Function  : Saves a serialised representation of the object structure
+              to disk. Returns the name of the file that the object was
+              saved to.
+  Returntype: string
+
+  Exceptions: 
+  Caller    : 
+  Example   : my $token = $obj->store();
+
+=cut
+
+sub store{
+  my $self = shift;
+  my $statefile = $self->statefile;
+  my $store_obj = $self->serialise;
+  my $io = Bio::Root::IO->new( ">$statefile" );
+  $io->_print( $store_obj );
+  $self->debug( "STORING $self to $statefile\n" );
+  return $statefile;
+}
+
+#----------------------------------------------------------------------
+
+=head2 serialise
+
+  Arg [1]   : none
+  Function  : Prepares the the serialised representation of the object.
+              Object attribute names starting with '__' are skipped.
+              This is useful for those that do not serialise too well
+              (e.g. filehandles).
+              Attributes are examined for other storable objects. If these
+              are found they are serialised seperately using 'new_retrievable'
+  Returntype: string
+  Exceptions: 
+  Caller    : 
+  Example   : my $serialised = $obj->serialise();
+
+=cut
+
+sub serialise{
+  my $self = shift;
+
+  # Create a new object of same class that is going to be serialised
+  my $store_obj = bless( {}, ref( $self ) );
+
+  my %retargs = ( -workdir =>$self->workdir,
+		  -suffix  =>$self->suffix,
+		  -template=>$self->template );
+  # Assume that other storable bio objects held by this object are
+  # only 1-deep.
+
+  foreach my $key( keys( %$self ) ){
+    if( $key =~ /^__/ ){ next } # Ignore keys starting with '__'
+    my $value = $self->{$key};
+
+    # Scalar value
+    if( ! ref( $value ) ){
+      $store_obj->{$key} = $value;
+    }
+
+    # Bio::Root::Storable obj: save placeholder
+    elsif( ref($value) =~ /^Bio::/ and $value->isa('Bio::Root::Storable') ){
+      # Bio::Root::Storable
+      $store_obj->{$key} = $value->new_retrievable( %retargs );
+      next;
+    }
+
+    # Arrayref value. Look for Bio::Root::Storable objs
+    elsif( ref( $value ) eq 'ARRAY' ){
+      my @ary;
+      foreach my $val( @$value ){
+	if( ref($val) =~ /^Bio::/ and $val->isa('Bio::Root::Storable') ){
+	  push(  @ary, $val->new_retrievable( %retargs ) );
+	}
+	else{ push(  @ary, $val ) }
+      }
+      $store_obj->{$key} = \@ary;
+    }
+
+    # Hashref value. Look for Bio::Root::Storable objs
+    elsif( ref( $value ) eq 'HASH' ){
+      my %hash;
+      foreach my $k2( keys %$value ){
+	my $val = $value->{$k2};
+	if( ref($val) =~ /^Bio::/ and $val->isa('Bio::Root::Storable') ){
+	  $hash{$k2} = $val->new_retrievable( %retargs );
+	}
+	else{ $hash{$k2} = $val }
+      }
+      $store_obj->{$key} = \%hash;
+    }
+
+    # Unknown, just add to the store object regardless
+    else{ $store_obj->{$key} = $value }
+  }
+  $store_obj->retrievable(0); # Once deserialised, obj not retrievable
+  return $self->_freeze( $store_obj );
+}
+
+
+#----------------------------------------------------------------------
+
+=head2 retrieve
+
+  Arg [1]   : string; filesystem location of the state file to be retrieved
+  Function  : Retrieves a stored object from disk.
+              Note that the retrieved object will be blessed into its original
+              class, and not the
+  Returntype: Bio::Root::Storable inhereting object
+  Exceptions: 
+  Caller    : 
+  Example   : my $obj = Bio::Root::Storable->retrieve( $token );
+
+=cut
+
+sub retrieve{
+  my( $caller, $statefile ) = @_;
+
+  my $self = {};
+  my $class = ref( $caller ) || $caller;
+
+  # Is this a call on a retrievable object?
+  if( ref( $caller ) and
+      $caller->retrievable ){
+    $self = $caller;
+    $statefile = $self->statefile;
+  }
+  bless( $self, $class );
+
+  # Recover serialised object
+  if( ! -f $statefile ){
+    $self->throw( "Token $statefile is not found" );
+  }
+  my $io = Bio::Root::IO->new( $statefile );
+  local $/ = undef();
+  my $state_str = $io->_readline('-raw'=>1);
+
+  # Dynamic-load modules required by stored object
+  my $stored_obj;
+  my $success;
+  for( my $i=0; $i<10; $i++ ){
+    eval{ $stored_obj = $self->_thaw( $state_str ) };
+    if( ! $@ ){ $success=1; last }
+    my $package;
+    if( $@ =~ /Cannot restore overloading(.*)/i ){
+      my $postmatch = $1; #'
+      if( $postmatch =~ /\(package +([\w\:]+)\)/ ) {
+        $package = $1;
+      }
+    }
+    if( $package ){
+      eval "require $package"; $self->throw($@) if $@;
+    }
+    else{ $self->throw($@) }
+  }
+  if( ! $success ){ $self->throw("maximum number of requires exceeded" ) }
+
+  if( ! ref( $stored_obj ) ){
+    $self->throw( "Token $statefile returned no data" );
+  }
+  map{ $self->{$_} = $stored_obj->{$_} } keys %$stored_obj; # Copy hasheys
+  $self->retrievable(0);
+
+  # Maintain class of stored obj
+  return $self;
+}
+
+#----------------------------------------------------------------------
+
+
+=head2 clone
+
+  Arg [1]   : none
+  Function  : Returns a clone of the calling object
+  Returntype: Bio::Root::Storable inhereting object
+  Exceptions: 
+  Caller    : 
+  Example   : my $clone = $obj->clone();
+
+=cut
+
+sub clone {
+  my $self = shift;
+  my $frozen = $self->_freeze( $self );
+  return $self->_thaw( $frozen );
+}
+
+
+
+#----------------------------------------------------------------------
+
+=head2 remove
+
+  Arg [1]   : none
+  Function  : Clears the stored object from disk
+  Returntype: boolean
+  Exceptions: 
+  Caller    : 
+  Example   : $obj->remove();
+
+=cut
+
+sub remove {
+  my $self = shift;
+  if( -e $self->statefile ){
+    unlink( $self->statefile );
+  }
+  return 1;
+}
+
+#----------------------------------------------------------------------
+
+=head2 _freeze
+
+  Arg [1]   : variable
+  Function  : Converts whatever is in the the arg into a string.
+              Uses either Storable::freeze or Data::Dumper::Dump
+              depending on the value of $Bio::Root::BINARY
+  Returntype: 
+  Exceptions: 
+  Caller    : 
+  Example   : 
+
+=cut
+
+sub _freeze {
+  my $self = shift;
+  my $data = shift;
+  if( $BINARY ){
+    return freeze( $data );
+  }
+  else{
+    $Data::Dumper::Purity = 1;
+    return Data::Dumper->Dump( [\$data],["*code"] );
+  }
+}
+
+#----------------------------------------------------------------------
+
+=head2 _thaw
+
+  Arg [1]   : string
+  Function  : Converts the string into a perl 'whatever'.
+              Uses either Storable::thaw or eval depending on the
+              value of $Bio::Root::BINARY.
+              Note; the string arg should have been created with 
+              the _freeze method, or strange things may occur!
+  Returntype: variable
+  Exceptions: 
+  Caller    : 
+  Example   : 
+
+=cut
+
+sub _thaw {
+  my $self = shift;
+  my $data = shift;
+  if( $BINARY ){ return thaw( $data ) }
+  else{ 
+    my $code; 
+    $code = eval( $data ) ;
+    if($@) {
+      $self->throw( "eval: $@" );
+    }   
+    ref( $code ) eq 'REF' || 
+      $self->throw( "Serialised string was not a scalar ref" );
+    return $$code;
+  }
+}
+
+
+
+
+#----------------------------------------------------------------------
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Root/Version.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Root/Version.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Root/Version.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,106 @@
+# $Id: Version.pm,v 1.9.4.7 2007/01/23 11:25:49 sendu Exp $
+#
+# BioPerl module for Bio::Root::Version
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Root::Version - provide global, distribution-level versioning
+
+=head1 SYNOPSIS
+
+  package Bio::Tools::NiftyFeature;
+  require Bio::Root::RootI;
+
+
+  # later, in client code:
+  package main;
+  use Bio::Tools::NiftyFeature 3.14;
+
+
+  ## alternative usage: NiftyFeature defines own $VERSION:
+  package Bio::Tools::NiftyFeature;
+  my $VERSION = 9.8;
+
+  # later in client code:
+  package main;
+
+  # ensure we're using an up-to-date BioPerl distribution
+  use Bio::Perl 3.14;
+
+  # NiftyFeature has its own versioning scheme:
+  use Bio::Tools::NiftyFeature 9.8;
+
+=head1 DESCRIPTION
+
+This module provides a mechanism by which all other BioPerl modules
+can share the same $VERSION, without manually synchronizing each file.
+
+Bio::Root::RootI itself uses this module, so any module that directly
+(or indirectly) uses Bio::Root::RootI will get a global $VERSION
+variable set if it's not already.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Root::Version;
+use strict;
+
+our $VERSION = 1.005002_102;
+$VERSION = eval $VERSION;
+
+sub import {
+    # try to handle multiple levels of inheritance:
+    my $i = 0;
+    my $pkg = caller($i);
+    no strict 'refs';
+    while ($pkg) {
+	if ($pkg =~ m/^Bio::/o &&
+	    not defined ${$pkg . "::VERSION"}) {
+	    ${$pkg . "::VERSION"} = $VERSION;
+	}
+        $pkg = caller(++$i);
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastStatistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastStatistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastStatistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+# 
+#
+# BioPerl module for wrapping Blast statistics
+#
+# Cared for by Chad Matsalla (bioinformatics1 at dieselwurks dot com)
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::BlastStatistics - An object for Blast statistics
+
+=head1 SYNOPSIS
+
+          # this is a wrapper to hold the statistics from a Blast report
+     my $bs = $result->get_statistics();
+          # you can get a statistic generically, like this:
+     my $kappa  = $bs->get_statistic("kappa");
+          # or specifically, like this:
+     my $kappa2 = $bs->get_kappa();
+
+
+=head1 DESCRIPTION
+
+This is a basic container to hold the statistics returned from a Blast.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks dot com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::BlastStatistics;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::RootI Bio::Search::StatisticsI);
+
+
+
+
+
+sub new {
+    my ($class, @args) = @_; 
+          # really, don't bother with any initial initialization
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : $statistic_object->get_statistic($statistic_name);
+ Function: Get the value of a statistic named $statistic_name
+ Returns : A scalar that should be a string
+ Args    : A scalar that should be a string
+
+=cut
+
+sub get_statistic {
+   my ($self,$arg) = @_;
+     return $self->{$arg};
+}
+
+
+=head2 set_statistic
+
+ Title   : set_statistic
+ Usage   : $statistic_object->set_statistic($statistic_name => $statistic_value);
+ Function: Set the value of a statistic named $statistic_name to $statistic_value
+ Returns : Void
+ Args    : A hash containing name=>value pairs
+
+=cut
+
+sub set_statistic {
+   my ($self,%args) = @_;
+     foreach (keys %args) {
+          $self->{$_} = $args{$_};
+     }
+}
+
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastStatistics.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastUtils.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastUtils.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/BlastUtils.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,528 @@
+=head1 NAME
+
+Bio::Search::BlastUtils - Utility functions for Bio::Search:: BLAST objects
+
+=head1 SYNOPSIS
+
+ # This module is just a collection of subroutines, not an object.
+
+See L<Bio::Search::Hit::BlastHit>.
+
+=head1 DESCRIPTION
+
+The BlastUtils.pm module is a collection of subroutines used primarily by
+Bio::Search::Hit::BlastHit objects for some of the additional
+functionality, such as HSP tiling. Right now, the BlastUtils is just a
+collection of methods, not an object, and it's tightly coupled to
+Bio::Search::Hit::BlastHit. A goal for the future is to generalize it
+to work based on the Bio::Search interfaces, then it can work with any
+objects that implements them.
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+=cut
+
+#'
+
+package Bio::Search::BlastUtils;
+use Bio::Root::Version;
+
+
+=head2 tile_hsps
+
+ Usage     : tile_hsps( $sbjct );
+           : This is called automatically by Bio::Search::Hit::BlastHit 
+           : during object construction or
+           : as needed by methods that rely on having tiled data.
+ Purpose   : Collect statistics about the aligned sequences in a set of HSPs.
+           : Calculates the following data across all HSPs: 
+           :    -- total alignment length 
+           :    -- total identical residues 
+           :    -- total conserved residues
+ Returns   : n/a
+ Argument  : A Bio::Search::Hit::BlastHit object 
+ Throws    : n/a
+ Comments  :
+ 	   : This method is *strongly* coupled to Bio::Search::Hit::BlastHit
+ 	   : (it accesses BlastHit data members directly).
+ 	   : TODO: Re-write this to the Bio::Search::Hit::HitI interface.
+ 	   :
+           : This method performs more careful summing of data across
+           : all HSPs in the Sbjct object. Only HSPs that are in the same strand 
+           : and frame are tiled. Simply summing the data from all HSPs
+           : in the same strand and frame will overestimate the actual 
+           : length of the alignment if there is overlap between different HSPs 
+           : (often the case).
+           :
+           : The strategy is to tile the HSPs and sum over the
+           : contigs, collecting data separately from overlapping and
+           : non-overlapping regions of each HSP. To facilitate this, the
+           : HSP.pm object now permits extraction of data from sub-sections
+           : of an HSP.
+           : 
+           : Additional useful information is collected from the results
+           : of the tiling. It is possible that sub-sequences in
+           : different HSPs will overlap significantly. In this case, it
+           : is impossible to create a single unambiguous alignment by
+           : concatenating the HSPs. The ambiguity may indicate the
+           : presence of multiple, similar domains in one or both of the
+           : aligned sequences. This ambiguity is recorded using the
+           : ambiguous_aln() method.
+           : 
+           : This method does not attempt to discern biologically
+           : significant vs. insignificant overlaps. The allowable amount of 
+           : overlap can be set with the overlap() method or with the -OVERLAP
+           : parameter used when constructing the Blast & Sbjct objects. 
+           : 
+           : For a given hit, both the query and the sbjct sequences are
+           : tiled independently.
+           : 
+           :    -- If only query sequence HSPs overlap, 
+           :          this may suggest multiple domains in the sbjct.
+           :    -- If only sbjct sequence HSPs overlap, 
+           :          this may suggest multiple domains in the query.
+           :    -- If both query & sbjct sequence HSPs overlap, 
+           :          this suggests multiple domains in both.
+           :    -- If neither query & sbjct sequence HSPs overlap, 
+           :          this suggests either no multiple domains in either
+           :          sequence OR that both sequences have the same
+           :          distribution of multiple similar domains.
+           : 
+           : This method can deal with the special case of when multiple
+           : HSPs exactly overlap.
+           : 
+           : Efficiency concerns:
+           :  Speed will be an issue for sequences with numerous HSPs.
+           : 
+ Bugs      : Currently, tile_hsps() does not properly account for
+           : the number of non-tiled but overlapping HSPs, which becomes a problem
+           : as overlap() grows. Large values overlap() may thus lead to 
+           : incorrect statistics for some hits. For best results, keep overlap()
+           : below 5 (DEFAULT IS 2). For more about this, see the "HSP Tiling and
+           : Ambiguous Alignments" section in L<Bio::Search::Hit::BlastHit>.
+
+See Also   : L<_adjust_contigs>(), L<Bio::Search::Hit::BlastHit|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#--------------
+sub tile_hsps {
+#--------------
+    my $sbjct = shift;
+
+    $sbjct->{'_tile_hsps'} = 1;
+    $sbjct->{'_gaps_query'} = 0;
+    $sbjct->{'_gaps_sbjct'} = 0;
+
+    ## Simple summation scheme. Valid if there is only one HSP.
+    if((defined($sbjct->{'_n'}) and $sbjct->{'_n'} == 1) or $sbjct->num_hsps == 1) {
+	my $hsp = $sbjct->hsp;
+	$sbjct->{'_length_aln_query'} = $hsp->length('query');
+	$sbjct->{'_length_aln_sbjct'} = $hsp->length('sbjct');
+	$sbjct->{'_length_aln_total'} = $hsp->length('total');
+	($sbjct->{'_totalIdentical'},$sbjct->{'_totalConserved'}) = $hsp->matches();
+	$sbjct->{'_gaps_query'} = $hsp->gaps('query');
+	$sbjct->{'_gaps_sbjct'} = $hsp->gaps('sbjct');
+
+#	print "_tile_hsps(): single HSP, easy stats.\n";
+	return;
+    } else {
+#	print STDERR "Sbjct: _tile_hsps: summing multiple HSPs\n";
+	$sbjct->{'_length_aln_query'} = 0;
+	$sbjct->{'_length_aln_sbjct'} = 0;
+	$sbjct->{'_length_aln_total'} = 0;
+	$sbjct->{'_totalIdentical'}   = 0;
+	$sbjct->{'_totalConserved'}   = 0;
+    }
+
+    ## More than one HSP. Must tile HSPs.
+#    print "\nTiling HSPs for $sbjct\n";
+    my($hsp, $qstart, $qstop, $sstart, $sstop);
+    my($frame, $strand, $qstrand, $sstrand);
+    my(@qcontigs, @scontigs);
+    my $qoverlap = 0;
+    my $soverlap = 0;
+    my $max_overlap = $sbjct->{'_overlap'};
+
+    foreach $hsp ($sbjct->hsps()) {
+#	printf "  HSP: %s\n%s\n",$hsp->name, $hsp->str('query');
+#	printf "  Length = %d; Identical = %d; Conserved = %d; Conserved(1-10): %d",$hsp->length, $hsp->length(-TYPE=>'iden'), $hsp->length(-TYPE=>'cons'), $hsp->length(-TYPE=>'cons',-START=>0,-STOP=>10); 
+	($qstart, $qstop) = $hsp->range('query');
+	($sstart, $sstop) = $hsp->range('sbjct');
+	$frame = $hsp->frame;
+	$frame = -1 unless defined $frame;
+	($qstrand, $sstrand) = $hsp->strand;
+
+	my ($qgaps, $sgaps)  = $hsp->gaps();
+	$sbjct->{'_gaps_query'} += $qgaps;
+	$sbjct->{'_gaps_sbjct'} += $sgaps;
+
+	$sbjct->{'_length_aln_total'} += $hsp->length;
+	## Collect contigs in the query sequence.
+	$qoverlap = &_adjust_contigs('query', $hsp, $qstart, $qstop, \@qcontigs, $max_overlap, $frame, $qstrand);
+
+	## Collect contigs in the sbjct sequence (needed for domain data and gapped Blast).
+	$soverlap = &_adjust_contigs('sbjct', $hsp, $sstart, $sstop, \@scontigs, $max_overlap, $frame, $sstrand);
+
+	## Collect overall start and stop data for query and sbjct over all HSPs.
+	if(not defined $sbjct->{'_queryStart'}) {
+	    $sbjct->{'_queryStart'} = $qstart;
+	    $sbjct->{'_queryStop'}  = $qstop;
+	    $sbjct->{'_sbjctStart'} = $sstart;
+	    $sbjct->{'_sbjctStop'}  = $sstop;
+	} else {
+	    $sbjct->{'_queryStart'} = ($qstart < $sbjct->{'_queryStart'} ? $qstart : $sbjct->{'_queryStart'});
+	    $sbjct->{'_queryStop'}  = ($qstop  > $sbjct->{'_queryStop'}  ? $qstop  : $sbjct->{'_queryStop'});
+	    $sbjct->{'_sbjctStart'} = ($sstart < $sbjct->{'_sbjctStart'} ? $sstart : $sbjct->{'_sbjctStart'});
+	    $sbjct->{'_sbjctStop'}  = ($sstop  > $sbjct->{'_sbjctStop'}  ? $sstop  : $sbjct->{'_sbjctStop'});
+	}	    
+    }
+
+    ## Collect data across the collected contigs.
+
+#    print "\nQUERY CONTIGS:\n";
+#    print "  gaps = $sbjct->{'_gaps_query'}\n";
+
+    # TODO: Account for strand/frame issue!
+    # Strategy: collect data on a per strand+frame basis and save the most significant one.
+    my (%qctg_dat);
+    foreach(@qcontigs) {
+#	print "  query contig: $_->{'start'} - $_->{'stop'}\n";
+#	print "         iden = $_->{'iden'}; cons = $_->{'cons'}\n";
+	($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
+	$qctg_dat{ "$frame$strand" }->{'length_aln_query'} += $_->{'stop'} - $_->{'start'} + 1;
+	$qctg_dat{ "$frame$strand" }->{'totalIdentical'}   += $_->{'iden'};
+	$qctg_dat{ "$frame$strand" }->{'totalConserved'}   += $_->{'cons'};
+	$qctg_dat{ "$frame$strand" }->{'qstrand'}   = $strand;
+    }
+
+    # Find longest contig.
+    my @sortedkeys = reverse sort { $qctg_dat{ $a }->{'length_aln_query'} <=> $qctg_dat{ $b }->{'length_aln_query'} } keys %qctg_dat;
+
+    # Save the largest to the sbjct:
+    my $longest = $sortedkeys[0];
+    $sbjct->{'_length_aln_query'} = $qctg_dat{ $longest }->{'length_aln_query'};
+    $sbjct->{'_totalIdentical'}   = $qctg_dat{ $longest }->{'totalIdentical'};
+    $sbjct->{'_totalConserved'}   = $qctg_dat{ $longest }->{'totalConserved'};
+    $sbjct->{'_qstrand'} = $qctg_dat{ $longest }->{'qstrand'};
+
+    ## Collect data for sbjct contigs. Important for gapped Blast.
+    ## The totalIdentical and totalConserved numbers will be the same
+    ## as determined for the query contigs.
+
+#    print "\nSBJCT CONTIGS:\n";
+#    print "  gaps = $sbjct->{'_gaps_sbjct'}\n";
+
+    my (%sctg_dat);
+    foreach(@scontigs) {
+#	print "  sbjct contig: $_->{'start'} - $_->{'stop'}\n";
+#	print "         iden = $_->{'iden'}; cons = $_->{'cons'}\n";
+	($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
+	$sctg_dat{ "$frame$strand" }->{'length_aln_sbjct'}   += $_->{'stop'} - $_->{'start'} + 1;
+	$sctg_dat{ "$frame$strand" }->{'frame'}  = $frame;
+	$sctg_dat{ "$frame$strand" }->{'sstrand'}  = $strand;
+    }
+
+    @sortedkeys = reverse sort { $sctg_dat{ $a }->{'length_aln_sbjct'} <=> $sctg_dat{ $b }->{'length_aln_sbjct'} } keys %sctg_dat;
+
+    # Save the largest to the sbjct:
+    $longest = $sortedkeys[0];
+
+    $sbjct->{'_length_aln_sbjct'} = $sctg_dat{ $longest }->{'length_aln_sbjct'};
+    $sbjct->{'_frame'} = $sctg_dat{ $longest }->{'frame'};
+    $sbjct->{'_sstrand'} = $sctg_dat{ $longest }->{'sstrand'};
+
+    if($qoverlap) {
+	if($soverlap) { $sbjct->ambiguous_aln('qs'); 
+#			print "\n*** AMBIGUOUS ALIGNMENT: Query and Sbjct\n\n";
+		      }
+	else { $sbjct->ambiguous_aln('q');
+#	       print "\n*** AMBIGUOUS ALIGNMENT: Query\n\n";
+	   }
+    } elsif($soverlap) { 
+	$sbjct->ambiguous_aln('s'); 
+#	print "\n*** AMBIGUOUS ALIGNMENT: Sbjct\n\n";
+    }
+
+    # Adjust length based on BLAST flavor.
+    my $prog = $sbjct->algorithm;
+    if($prog eq 'TBLASTN') {
+	$sbjct->{'_length_aln_sbjct'} /= 3;
+    } elsif($prog eq 'BLASTX' ) {
+	$sbjct->{'_length_aln_query'} /= 3;
+    } elsif($prog eq 'TBLASTX') {
+	$sbjct->{'_length_aln_query'} /= 3;
+	$sbjct->{'_length_aln_sbjct'} /= 3;
+    }
+}
+
+
+
+=head2 _adjust_contigs
+
+ Usage     : n/a; called automatically during object construction.
+ Purpose   : Builds HSP contigs for a given BLAST hit.
+           : Utility method called by _tile_hsps()
+ Returns   : 
+ Argument  : 
+ Throws    : Exceptions propagated from Bio::Search::Hit::BlastHSP::matches()
+           : for invalid sub-sequence ranges.
+ Status    : Experimental
+ Comments  : This method does not currently support gapped alignments.
+           : Also, it does not keep track of the number of HSPs that
+           : overlap within the amount specified by overlap().
+           : This will lead to significant tracking errors for large
+           : overlap values.
+
+See Also   : L<tile_hsps>(), L<Bio::Search::Hit::BlastHSP::matches|Bio::Search::Hit::BlastHSP>
+
+=cut
+
+#-------------------
+sub _adjust_contigs {
+#-------------------
+    my ($seqType, $hsp, $start, $stop, $contigs_ref, $max_overlap, $frame, $strand) = @_;
+
+    my $overlap = 0;
+    my ($numID, $numCons);
+
+#    print STDERR "Testing $seqType data: HSP (${\$hsp->name});  $start, $stop, strand=$strand, frame=$frame\n"; 
+    foreach(@$contigs_ref) {
+#	print STDERR "  Contig: $_->{'start'} - $_->{'stop'}, strand=$_->{'strand'}, frame=$_->{'frame'}, iden= $_->{'iden'}, cons= $_->{'cons'}\n";
+
+	# Don't merge things unless they have matching strand/frame.
+	next unless ($_->{'frame'} == $frame and $_->{'strand'} == $strand);
+
+	## Test special case of a nested HSP. Skip it.
+	if($start >= $_->{'start'} and $stop <= $_->{'stop'}) { 
+#	    print STDERR "----> Nested HSP. Skipping.\n";
+	    $overlap = 1; 
+	    next;
+	}
+
+	## Test for overlap at beginning of contig.
+	if($start < $_->{'start'} and $stop > ($_->{'start'} + $max_overlap)) { 
+#	    print STDERR "----> Overlaps beg: existing beg,end: $_->{'start'},$_->{'stop'}, new beg,end: $start,$stop\n";
+	    # Collect stats over the non-overlapping region.
+	    eval {
+		($numID, $numCons) = $hsp->matches(-SEQ   =>$seqType, 
+						   -START =>$start, 
+						   -STOP  =>$_->{'start'}-1); 
+	    };
+	    if($@) { warn "\a\n$@\n"; }
+	    else {
+		$_->{'start'} = $start; # Assign a new start coordinate to the contig
+		$_->{'iden'} += $numID; # and add new data to #identical, #conserved.
+		$_->{'cons'} += $numCons;
+		$overlap     = 1; 
+	    }
+	}
+
+	## Test for overlap at end of contig.
+	if($stop > $_->{'stop'} and $start < ($_->{'stop'} - $max_overlap)) { 
+#	    print STDERR "----> Overlaps end: existing beg,end: $_->{'start'},$_->{'stop'}, new beg,end: $start,$stop\n";
+	    # Collect stats over the non-overlapping region.
+	    eval {
+		($numID,$numCons) = $hsp->matches(-SEQ   =>$seqType, 
+						  -START =>$_->{'stop'}, 
+						  -STOP  =>$stop); 
+	    };
+	    if($@) { warn "\a\n$@\n"; }
+	    else {
+		$_->{'stop'}  = $stop;  # Assign a new stop coordinate to the contig
+		$_->{'iden'} += $numID; # and add new data to #identical, #conserved.
+		$_->{'cons'} += $numCons;
+		$overlap    = 1; 
+	    }
+	}
+	$overlap && do {
+#		print STDERR " New Contig data:\n";
+#		print STDERR "  Contig: $_->{'start'} - $_->{'stop'}, iden= $_->{'iden'}, cons= $_->{'cons'}\n";
+		last;
+	    };
+    }
+    ## If there is no overlap, add the complete HSP data.
+    !$overlap && do {
+#	print STDERR "No overlap. Adding new contig.\n";
+	($numID,$numCons) = $hsp->matches(-SEQ=>$seqType); 
+	push @$contigs_ref, {'start'=>$start, 'stop'=>$stop,
+			     'iden'=>$numID,  'cons'=>$numCons,
+			     'strand'=>$strand, 'frame'=>$frame};
+    };
+    $overlap;
+}
+
+=head2 get_exponent
+
+ Usage     : &get_exponent( number );
+ Purpose   : Determines the power of 10 exponent of an integer, float, 
+           : or scientific notation number.
+ Example   : &get_exponent("4.0e-206");
+           : &get_exponent("0.00032");
+           : &get_exponent("10.");
+           : &get_exponent("1000.0");
+           : &get_exponent("e+83");
+ Argument  : Float, Integer, or scientific notation number
+ Returns   : Integer representing the exponent part of the number (+ or -).
+           : If argument == 0 (zero), return value is "-999".
+ Comments  : Exponents are rounded up (less negative) if the mantissa is >= 5.
+           : Exponents are rounded down (more negative) if the mantissa is <= -5.
+
+=cut
+
+#------------------
+sub get_exponent {
+#------------------
+    my $data = shift;
+
+    my($num, $exp) = split /[eE]/, $data;
+
+    if( defined $exp) { 
+	$num = 1 if not $num;
+	$num >= 5 and $exp++;
+	$num <= -5 and $exp--;
+    } elsif( $num == 0) {
+	$exp = -999;
+    } elsif( not $num =~ /\./) {
+	$exp = CORE::length($num) -1;
+    } else {
+	$exp = 0;
+	$num .= '0' if $num =~ /\.$/;
+	my ($c);
+	my $rev = 0;
+	if($num !~ /^0/) {
+	    $num = reverse($num);
+	    $rev = 1;
+	}
+	do { $c = chop($num);
+	     $c == 0 && $exp++; 
+	 } while( $c ne '.');
+
+	$exp = -$exp if $num == 0 and not $rev;
+	$exp -= 1 if $rev;
+    }
+    return $exp;
+}
+
+=head2 collapse_nums
+
+ Usage     : @cnums = collapse_nums( @numbers );
+ Purpose   : Collapses a list of numbers into a set of ranges of consecutive terms:
+           : Useful for condensing long lists of consecutive numbers.
+           :  EXPANDED:
+           :     1 2 3 4 5 6 10 12 13 14 15 17 18 20 21 22 24 26 30 31 32
+           :  COLLAPSED:
+           :     1-6 10 12-15 17 18 20-22 24 26 30-32
+ Argument  : List of numbers sorted numerically.
+ Returns   : List of numbers mixed with ranges of numbers (see above).
+ Throws    : n/a
+
+See Also   : L<Bio::Search::Hit::BlastHit::seq_inds()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#------------------
+sub collapse_nums {
+#------------------
+# This is probably not the slickest connectivity algorithm, but will do for now.
+    my @a = @_;
+    my ($from, $to, $i, @ca, $consec);
+    
+    $consec = 0;
+    for($i=0; $i < @a; $i++) {
+	not $from and do{ $from = $a[$i]; next; };
+	if($a[$i] == $a[$i-1]+1) {
+	    $to = $a[$i];
+	    $consec++;
+	} else {
+	    if($consec == 1) { $from .= ",$to"; }
+	    else { $from .= $consec>1 ? "\-$to" : ""; }
+	    push @ca, split(',', $from);
+	    $from =  $a[$i];
+	    $consec = 0;
+	    $to = undef;
+	}
+    }
+    if(defined $to) {
+	if($consec == 1) { $from .= ",$to"; }
+	else { $from .= $consec>1 ? "\-$to" : ""; }
+    }
+    push @ca, split(',', $from) if $from;
+
+    @ca;
+}
+
+
+=head2 strip_blast_html
+
+ Usage     : $boolean = &strip_blast_html( string_ref );
+           : This method is exported.
+ Purpose   : Removes HTML formatting from a supplied string.
+           : Attempts to restore the Blast report to enable
+           : parsing by Bio::SearchIO::blast.pm
+ Returns   : Boolean: true if string was stripped, false if not.
+ Argument  : string_ref = reference to a string containing the whole Blast
+           :              report containing HTML formatting.
+ Throws    : Croaks if the argument is not a scalar reference.
+ Comments  : Based on code originally written by Alex Dong Li
+           : (ali at genet.sickkids.on.ca).
+           : This method does some Blast-specific stripping 
+           : (adds back a '>' character in front of each HSP 
+           : alignment listing).
+           :   
+           : THIS METHOD IS VERY SENSITIVE TO BLAST FORMATTING CHANGES!
+           :
+           : Removal of the HTML tags and accurate reconstitution of the
+           : non-HTML-formatted report is highly dependent on structure of
+           : the HTML-formatted version. For example, it assumes that first 
+           : line of each alignment section (HSP listing) starts with a
+           : <a name=..> anchor tag. This permits the reconstruction of the 
+           : original report in which these lines begin with a ">".
+           : This is required for parsing.
+           :
+           : If the structure of the Blast report itself is not intended to
+           : be a standard, the structure of the HTML-formatted version
+           : is even less so. Therefore, the use of this method to
+           : reconstitute parsable Blast reports from HTML-format versions
+           : should be considered a temorary solution.
+
+=cut
+
+#--------------------
+sub strip_blast_html {
+#--------------------
+      # This may not best way to remove html tags. However, it is simple.
+      # it won't work under following conditions:
+      #    1) if quoted > appears in a tag  (does this ever happen?)
+      #    2) if a tag is split over multiple lines and this method is
+      #       used to process one line at a time.
+      
+    my ($string_ref) = shift;
+
+    ref $string_ref eq 'SCALAR' or 
+	croak ("Can't strip HTML: ".
+	       "Argument is should be a SCALAR reference not a ${\ref $string_ref}\n");
+
+    my $str = $$string_ref;
+    my $stripped = 0;
+
+    # Removing "<a name =...>" and adding the '>' character for 
+    # HSP alignment listings.
+    $str =~ s/(\A|\n)<a name ?=[^>]+> ?/>/sgi and $stripped = 1;
+
+    # Removing all "<>" tags. 
+    $str =~ s/<[^>]+>|&nbsp//sgi and $stripped = 1;
+
+    # Re-uniting any lone '>' characters.
+    $str =~ s/(\A|\n)>\s+/\n\n>/sgi and $stripped = 1;
+
+    $$string_ref = $str;
+    $stripped;
+}
+
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/DatabaseI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/DatabaseI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/DatabaseI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,146 @@
+#-----------------------------------------------------------------
+# $Id: DatabaseI.pm,v 1.10.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::DatabaseI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::DatabaseI - Interface for a database used in a sequence search
+
+=head1 SYNOPSIS
+
+Bio::Search::DatabaseI objects should not be instantiated since this
+module defines a pure interface.
+
+Given an object that implements the Bio::Search::DatabaseI  interface,
+you can do the following things with it:
+
+    $name = $db->name();
+
+    $date = $db->date();
+
+    $num_letters = $db->letters();
+
+    $num_entries = $db->entries();
+
+=head1 DESCRIPTION
+
+This module defines methods for an object that provides metadata
+information about a database used for sequence searching.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::DatabaseI;
+
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 name
+
+ Usage     : $name = $db->name();
+ Purpose   : Get the name of the database searched.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+sub name {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+=head2 date
+
+ Usage     : $date = $db->date();
+ Purpose   : Get the creation date of the queried database.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+sub date {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+
+=head2 letters
+
+ Usage     : $num_letters = $db->letters();
+ Purpose   : Get the number of letters in the queried database.
+ Returns   : Integer
+ Argument  : n/a
+
+=cut
+
+sub letters {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+
+=head2 entries
+
+ Usage     : $num_entries = $db->entries();
+ Purpose   : Get the number of entries in the queried database.
+ Returns   : Integer
+ Argument  : n/a
+
+=cut
+
+sub entries {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericDatabase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericDatabase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericDatabase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,170 @@
+#-----------------------------------------------------------------
+# $Id: GenericDatabase.pm,v 1.9.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::GenericDatabase
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::GenericDatabase - Generic implementation of Bio::Search::DatabaseI
+
+=head1 SYNOPSIS
+
+    use Bio::Search::GenericDatabase;
+
+    $db = Bio::Search::GenericDatabase->new( -name => 'my Blast db',
+					     -date => '2001-03-13',
+					     -length => 2352511,
+					     -entries => 250000 );
+
+    $name = $db->name();
+    $date = $db->date();
+    $num_letters = $db->letters();
+    $num_entries = $db->entries();
+
+=head1 DESCRIPTION
+
+This module provides a basic implementation of L<Bio::Search::DatabaseI>.
+See documentation in that module for more information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object methods.
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::GenericDatabase;
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::Search::DatabaseI);
+
+sub new {
+    my ($class, @args) = @_; 
+    my $self = $class->SUPER::new(@args);
+    my ($name, $date, $length, $ents) = 
+        $self->_rearrange( [qw(NAME DATE LENGTH ENTRIES)], @args);
+
+    $name    && $self->name($name);
+    $date    && $self->date($date);
+    $length  && $self->letters($length);
+    $ents    && $self->entries($ents);
+
+    return $self;
+}
+
+=head2 name
+
+See L<Bio::Search::DatabaseI::name>() for documentation
+
+This implementation is a combined set/get accessor.
+
+=cut
+
+#---------------
+sub name {
+#---------------
+    my $self = shift;
+    if(@_) { 
+        my $name = shift;
+        $name =~ s/(^\s+|\s+$)//g;
+        $self->{'_db'} = $name;
+    }
+    $self->{'_db'};
+}
+
+=head2 date
+
+See L<Bio::Search::DatabaseI::date>() for documentation
+
+This implementation is a combined set/get accessor.
+
+=cut
+
+#-----------------------
+sub date {
+#-----------------------
+    my $self = shift;
+    if(@_) { $self->{'_dbDate'} = shift; }
+    $self->{'_dbDate'};
+}
+
+
+=head2 letters
+
+See L<Bio::Search::DatabaseI::letters>() for documentation
+
+This implementation is a combined set/get accessor.
+
+=cut
+
+#----------------------
+sub letters {
+#----------------------
+    my $self = shift;
+    if(@_) { $self->{'_dbLetters'} = shift; }
+    $self->{'_dbLetters'};
+}
+
+
+=head2 entries
+
+See L<Bio::Search::DatabaseI::entries>() for documentation
+
+This implementation is a combined set/get accessor.
+
+=cut
+
+#------------------
+sub entries {
+#------------------
+    my $self = shift;
+    if(@_) { $self->{'_dbEntries'} = shift; }
+    $self->{'_dbEntries'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericStatistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericStatistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericStatistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+# 
+#
+# BioPerl module for wrapping statistics
+#
+# Cared for by Chad Matsalla (bioinformatics1 at dieselwurks dot com)
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::GenericStatistics - An  object for statistics
+
+=head1 SYNOPSIS
+
+  my $void   = $obj->set_statistic("statistic_name","statistic_value"); 
+  my $value  = $obj->get_statistic("statistic_name");
+
+=head1 DESCRIPTION
+
+This is a basic container to hold the statistics returned from a program.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks dot com
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::GenericStatistics;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Search::StatisticsI);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : $statistic_object->get_statistic($statistic_name);
+ Function: Get the value of a statistic named $statistic_name
+ Returns : A scalar that should be a string
+ Args    : A scalar that should be a string
+
+=cut
+
+sub get_statistic {
+    my ($self,$arg) = @_;
+    return $self->{stats}->{$arg};
+}
+
+=head2 set_statistic
+
+ Title   : set_statistic
+ Usage   : $statistic_object->set_statistic($statistic_name => $statistic_value);
+ Function: Set the value of a statistic named $statistic_name to $statistic_value
+ Returns : Void
+ Args    : A hash containing name=>value pairs
+
+=cut
+
+sub set_statistic {
+    my ($self,$name,$value) = @_;
+    $self->{stats}->{$name} = $value;
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $statistic_object->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : list of available statistic names
+ Args    : none
+
+=cut
+
+sub available_statistics {
+    my $self = shift;
+    return keys %{$self->{stats}};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/GenericStatistics.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/BlastHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/BlastHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/BlastHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1726 @@
+#-----------------------------------------------------------------
+# $Id: BlastHSP.pm,v 1.28.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::HSP::BlastHSP
+#
+# (This module was originally called Bio::Tools::Blast::HSP)
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Search::HSP::BlastHSP - Bioperl BLAST High-Scoring Pair object
+
+=head1 SYNOPSIS
+
+See L<Bio::Search::Hit::BlastHit>.
+
+=head1 DESCRIPTION
+
+A Bio::Search::HSP::BlastHSP object provides an interface to data
+obtained in a single alignment section of a Blast report (known as a
+"High-scoring Segment Pair"). This is essentially a pairwise
+alignment with score information.
+
+BlastHSP objects are accessed via L<Bio::Search::Hit::BlastHit>
+objects after parsing a BLAST report using the L<Bio::SearchIO>
+system.
+
+The construction of BlastHSP objects is performed by
+Bio::Factory::BlastHitFactory in a process that is
+orchestrated by the Blast parser (L<Bio::SearchIO::psiblast>).
+The resulting BlastHSPs are then accessed via
+L<Bio::Search::Hit::BlastHit>). Therefore, you do not need to
+use L<Bio::Search::HSP::BlastHSP>) directly. If you need to construct
+BlastHSPs directly, see the new() function for details.
+
+For L<Bio::SearchIO> BLAST parsing usage examples, see the
+C<examples/search-blast> directory of the Bioperl distribution.
+
+=head2 Start and End coordinates
+
+Sequence endpoints are swapped so that start is always less than
+end. This affects For TBLASTN/X hits on the minus strand. Strand
+information can be recovered using the strand() method. This
+normalization step is standard Bioperl practice. It also facilitates
+use of range information by methods such as match().
+
+=over 1
+
+=item * Supports BLAST versions 1.x and 2.x, gapped and ungapped.
+
+=back
+
+Bio::Search::HSP::BlastHSP.pm has the ability to extract a list of all
+residue indices for identical and conservative matches along both
+query and sbjct sequences. Since this degree of detail is not always
+needed, this behavior does not occur during construction of the BlastHSP
+object.  These data will automatically be collected as necessary as
+the BlastHSP.pm object is used.
+
+=head1 DEPENDENCIES
+
+Bio::Search::HSP::BlastHSP.pm is a concrete class that inherits from
+L<Bio::SeqFeature::SimilarityPair> and L<Bio::Search::HSP::HSPI>.
+L<Bio::Seq> and L<Bio::SimpleAlign> are employed for creating
+sequence and alignment objects, respectively.
+
+=head2 Relationship to SimpleAlign.pm & Seq.pm
+
+BlastHSP.pm can provide the query or sbjct sequence as a L<Bio::Seq>
+object via the L<seq()|seq> method. The BlastHSP.pm object can also create a
+two-sequence L<Bio::SimpleAlign> alignment object using the the query
+and sbjct sequences via the L<get_aln()|get_aln> method. Creation of alignment
+objects is not automatic when constructing the BlastHSP.pm object since
+this level of functionality is not always required and would generate
+a lot of extra overhead when crunching many reports.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac-at-bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was originally developed in the Department of Genetics
+at Stanford University. I would also like to acknowledge my
+colleagues at Affymetrix for useful feedback.
+
+=head1 SEE ALSO
+
+ Bio::Search::Hit::BlastHit.pm          - Blast hit object.
+ Bio::Search::Result::BlastResult.pm    - Blast Result object.
+ Bio::Seq.pm                            - Biosequence object
+
+=head2 Links:
+
+ http://bio.perl.org/                       - Bioperl Project Homepage
+
+=head1 COPYRIGHT
+
+Copyright (c) 1996-2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# END of main POD documentation.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::HSP::BlastHSP;
+
+use strict;
+use Bio::SeqFeature::Similarity;
+
+use vars qw($GAP_SYMBOL %STRAND_SYMBOL);
+
+use overload
+    '""' => \&to_string;
+
+use base qw(Bio::SeqFeature::SimilarityPair Bio::Search::HSP::HSPI);
+
+$GAP_SYMBOL    = '-';          # Need a more general way to handle gap symbols.
+%STRAND_SYMBOL = ('Plus' => 1, 'Minus' => -1 );
+
+
+=head2 new
+
+ Usage     : $hsp = Bio::Search::HSP::BlastHSP->new( %named_params );
+           : Bio::Search::HSP::BlastHSP objects are constructed
+           : automatically by Bio::SearchIO::BlastHitFactory,
+           : so there is no need for direct instantiation.
+ Purpose   : Constructs a new BlastHSP object and Initializes key variables
+           : for the HSP.
+ Returns   : A Bio::Search::HSP::BlastHSP object
+ Argument  : Named parameters:
+           : Parameter keys are case-insensitive.
+           :      -RAW_DATA  => array ref containing raw BLAST report data for
+           :                    for a single HSP. This includes all lines
+           :                    of the HSP alignment from a traditional BLAST
+                                or PSI-BLAST (non-XML) report,
+           :      -RANK         => integer (1..n).
+           :      -PROGRAM      => string ('TBLASTN', 'BLASTP', etc.).
+           :      -QUERY_NAME   => string, id of query sequence
+           :      -HIT_NAME     => string, id of hit sequence
+           :
+ Comments  : Having the raw data allows this object to do lazy parsing of
+           : the raw HSP data (i.e., not parsed until needed).
+           :
+           : Note that there is a fair amount of basic parsing that is
+           : currently performed in this module that would be more appropriate
+           : to do within a separate factory object.
+           : This parsing code will likely be relocated and more initialization
+           : parameters will be added to new().
+           :
+See Also   : L<Bio::SeqFeature::SimilarityPair::new()>, L<Bio::SeqFeature::Similarity::new()>
+
+=cut
+
+#----------------
+sub new {
+#----------------
+    my ($class, @args ) = @_;
+
+    my $self = $class->SUPER::new( @args );
+    # Initialize placeholders
+    $self->{'_queryGaps'} = $self->{'_sbjctGaps'} = 0;
+    my ($raw_data, $qname, $hname, $qlen, $hlen);
+
+    ($self->{'_prog'}, $self->{'_rank'}, $raw_data,
+     $qname, $hname) =
+	 $self->_rearrange([qw( PROGRAM
+				RANK
+				RAW_DATA
+				QUERY_NAME
+				HIT_NAME
+				)], @args );
+
+    # _set_data() does a fair amount of parsing.
+    # This will likely change (see comment above.)
+    $self->_set_data( @{$raw_data} );
+    # Store the aligned query as sequence feature
+    my ($qb, $hb) = ($self->start());
+    my ($qe, $he) = ($self->end());
+    my ($qs, $hs) = ($self->strand());
+    my ($qf,$hf) = ($self->query->frame(),
+		    $self->hit->frame);
+
+    $self->query( Bio::SeqFeature::Similarity->new (-start   =>$qb,
+						    -end     =>$qe,
+						    -strand  =>$qs,
+						    -bits    =>$self->bits,
+						    -score   =>$self->score,
+						    -frame   =>$qf,
+						    -seq_id  => $qname,
+						    -source  =>$self->{'_prog'} ));
+
+    $self->hit( Bio::SeqFeature::Similarity->new (-start   =>$hb,
+						  -end     =>$he,
+						  -strand  =>$hs,
+						  -bits    =>$self->bits,
+						  -score   =>$self->score,
+                                                  -frame   =>$hf,
+						  -seq_id  => $hname,
+						  -source  =>$self->{'_prog'} ));
+
+    # set lengths
+    $self->query->seqlength($qlen); # query
+    $self->hit->seqlength($hlen); # subject
+
+    $self->query->frac_identical($self->frac_identical('query'));
+    $self->hit->frac_identical($self->frac_identical('hit'));
+    return $self;
+}
+
+#sub DESTROY {
+#    my $self = shift;
+#    #print STDERR "--->DESTROYING $self\n";
+#}
+
+
+# Title   : _id_str;
+# Purpose : Intended for internal use only to provide a string for use
+#           within exception messages to help users figure out which
+#           query/hit caused the problem.
+# Returns : Short string with name of query and hit seq
+sub _id_str {
+    my $self = shift;
+    if( not defined $self->{'_id_str'}) {
+        my $qname = $self->query->seq_id;
+        my $hname = $self->hit->seq_id;
+        $self->{'_id_str'} = "QUERY=\"$qname\" HIT=\"$hname\"";
+    }
+    return $self->{'_id_str'};
+}
+
+#=================================================
+# Begin Bio::Search::HSP::HSPI implementation
+#=================================================
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hsp->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hsp
+           For BLAST, the algorithm denotes what type of sequence was aligned
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated
+           dna-translated dna).
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+#----------------
+sub algorithm {
+#----------------
+    my ($self, at args) = @_;
+    return $self->{'_prog'};
+}
+
+
+
+
+=head2 signif()
+
+ Usage     : $hsp_obj->signif()
+ Purpose   : Get the P-value or Expect value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43)
+           : Returns P-value if it is defined, otherwise, Expect value.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Provided for consistency with BlastHit::signif()
+           : Support for returning the significance data in different
+           : formats (e.g., exponent only), is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<p()|p>, L<expect()|expect>, L<Bio::Search::Hit::BlastHit::signif()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#-----------
+sub signif {
+#-----------
+    my $self = shift;
+    my $val ||= defined($self->{'_p'}) ? $self->{'_p'} : $self->{'_expect'};
+    $val;
+}
+
+
+
+=head2 evalue
+
+ Usage     : $hsp_obj->evalue()
+ Purpose   : Get the Expect value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43)
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Support for returning the expectation data in different
+           : formats (e.g., exponent only), is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<p()|p>
+
+=cut
+
+#----------
+sub evalue { shift->{'_expect'} }
+#----------
+
+
+=head2 p
+
+ Usage     : $hsp_obj->p()
+ Purpose   : Get the P-value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43) or undef if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : P-value is not defined with NCBI Blast2 reports.
+           : Support for returning the expectation data in different
+           : formats (e.g., exponent only) is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<expect()|expect>
+
+=cut
+
+#-----
+sub p { my $self = shift; $self->{'_p'}; }
+#-----
+
+# alias
+sub pvalue { shift->p(@_); }
+
+=head2 length
+
+ Usage     : $hsp->length( [seq_type] )
+ Purpose   : Get the length of the aligned portion of the query or sbjct.
+ Example   : $hsp->length('query')
+ Returns   : integer
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'  (default = 'total')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : 'total' length is the full length of the alignment
+           : as reported in the denominators in the alignment section:
+           : "Identical = 34/120 Positives = 67/120".
+
+See Also   : L<gaps()|gaps>
+
+=cut
+
+#-----------
+sub length {
+#-----------
+## Developer note: when using the built-in length function within
+##                 this module, call it as CORE::length().
+    my( $self, $seqType,$data ) = @_;
+    $seqType  ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $seqType ne 'total' and $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+    if( defined $data  ) {
+	$self->{$seqType.'Length'} = $data;
+    }
+    $self->{$seqType.'Length'};
+}
+
+
+
+=head2 gaps
+
+ Usage     : $hsp->gaps( [seq_type] )
+ Purpose   : Get the number of gaps in the query, sbjct, or total alignment.
+           : Also can return query gaps and sbjct gaps as a two-element list
+           : when in array context.
+ Example   : $total_gaps      = $hsp->gaps();
+           : ($qgaps, $sgaps) = $hsp->gaps();
+           : $qgaps           = $hsp->gaps('query');
+ Returns   : scalar context: integer
+           : array context without args: (int, int) = ('queryGaps', 'sbjctGaps')
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : (default = 'total', scalar context)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<length()|length>, L<matches()|matches>
+
+=cut
+
+#---------
+sub gaps {
+#---------
+    my( $self, $seqType ) = @_;
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    $seqType  ||= (wantarray ? 'list' : 'total');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType =~ /list|array/i) {
+	return (($self->{'_queryGaps'} || 0), ($self->{'_sbjctGaps'} || 0));
+    }
+
+    if($seqType eq 'total') {
+	return ($self->{'_queryGaps'} + $self->{'_sbjctGaps'}) || 0;
+    } else {
+	## Sensitive to member name format.
+	$seqType = "_\L$seqType\E";
+	return $self->{$seqType.'Gaps'} || 0;
+    }
+}
+
+
+=head2 frac_identical
+
+ Usage     : $hsp_object->frac_identical( [seq_type] );
+ Purpose   : Get the fraction of identical positions within the given HSP.
+ Example   : $frac_iden = $hsp_object->frac_identical('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : default = 'total' (but see comments below).
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI-BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           : Therefore, when called without an argument or an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used.
+           :
+           : To get the fraction identical among only the aligned residues,
+           : ignoring the gaps, call this method with an argument of 'query'
+           : or 'sbjct' ('sbjct' is synonymous with 'hit').
+
+See Also   : L<frac_conserved()|frac_conserved>, L<num_identical()|num_identical>, L<matches()|matches>
+
+=cut
+
+#-------------------
+sub frac_identical {
+#-------------------
+# The value is calculated as opposed to storing it from the parsed results.
+# This saves storage and also permits flexibility in determining for which
+# sequence (query or sbjct) the figure is to be calculated.
+
+    my( $self, $seqType ) = @_;
+    $seqType ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType ne 'total') {
+      $self->_set_seq_data() unless $self->{'_set_seq_data'};
+    }
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$seqType.'Length'});
+}
+
+
+=head2 frac_conserved
+
+ Usage     : $hsp_object->frac_conserved( [seq_type] );
+ Purpose   : Get the fraction of conserved positions within the given HSP.
+           : (Note: 'conservative' positions are called 'positives' in the
+	   : Blast report.)
+ Example   : $frac_cons = $hsp_object->frac_conserved('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : default = 'total' (but see comments below).
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI-BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           : Therefore, when called without an argument or an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used.
+           :
+           : To get the fraction conserved among only the aligned residues,
+           : ignoring the gaps, call this method with an argument of 'query'
+           : or 'sbjct'.
+
+See Also   : L<frac_conserved()|frac_conserved>, L<num_conserved()|num_conserved>, L<matches()|matches>
+
+=cut
+
+#--------------------
+sub frac_conserved {
+#--------------------
+# The value is calculated as opposed to storing it from the parsed results.
+# This saves storage and also permits flexibility in determining for which
+# sequence (query or sbjct) the figure is to be calculated.
+
+    my( $self, $seqType ) = @_;
+    $seqType ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType ne 'total') {
+      $self->_set_seq_data() unless $self->{'_set_seq_data'};
+    }
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    sprintf( "%.2f", $self->{'_numConserved'}/$self->{$seqType.'Length'});
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+#----------------
+sub query_string{ shift->seq_str('query'); }
+#----------------
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+#----------------
+sub hit_string{ shift->seq_str('hit'); }
+#----------------
+
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : none
+
+=cut
+
+#----------------
+sub homology_string{ shift->seq_str('match'); }
+#----------------
+
+#=================================================
+# End Bio::Search::HSP::HSPI implementation
+#=================================================
+
+# Older method delegating to method defined in HSPI.
+
+=head2 expect
+
+See L<Bio::Search::HSP::HSPI::expect()|Bio::Search::HSP::HSPI>
+
+=cut
+
+#----------
+sub expect { shift->evalue( @_ ); }
+#----------
+
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+#'
+
+#----------
+sub rank { shift->{'_rank'} }
+#----------
+
+# For backward compatibility
+#----------
+sub name { shift->rank }
+#----------
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $hsp->to_string;
+ Function: Returns a string representation for the Blast HSP.
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [BlastHSP] <rank>
+           e.g.:
+           [BlastHit] 1
+ Args    : None
+
+=cut
+
+#----------
+sub to_string {
+#----------
+    my $self = shift;
+    return "[BlastHSP] " . $self->rank();
+}
+
+
+#=head2 _set_data (Private method)
+#
+# Usage     : called automatically during object construction.
+# Purpose   : Parses the raw HSP section from a flat BLAST report and
+#             sets the query sequence, sbjct sequence, and the "match" data
+#           : which consists of the symbols between the query and sbjct lines
+#           : in the alignment.
+# Argument  : Array (all lines for a single, complete HSP, from a raw,
+#             flat (i.e., non-XML) BLAST report)
+# Throws    : Propagates any exceptions from the methods called ("See Also")
+#
+#See Also   : L<_set_seq()|_set_seq>, L<_set_score_stats()|_set_score_stats>, L<_set_match_stats()|_set_match_stats>, L<_initialize()|_initialize>
+#
+#=cut
+
+#--------------
+sub _set_data {
+#--------------
+    my $self = shift;
+    my @data = @_;
+    my @queryList  = ();  # 'Query' = SEQUENCE USED TO QUERY THE DATABASE.
+    my @sbjctList  = ();  # 'Sbjct' = HOMOLOGOUS SEQUENCE FOUND IN THE DATABASE.
+    my @matchList  = ();
+    my $matchLine  = 0;   # Alternating boolean: when true, load 'match' data.
+    my @linedat = ();
+
+    #print STDERR "BlastHSP: set_data()\n";
+
+    my($line, $aln_row_len, $length_diff);
+    $length_diff = 0;
+
+    # Collecting data for all lines in the alignment
+    # and then storing the collections for possible processing later.
+    #
+    # Note that "match" lines may not be properly padded with spaces.
+    # This loop now properly handles such cases:
+    # Query: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVIXXXXX 1200
+    #             PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVI
+    # Sbjct: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVILSLKL 1200
+
+    foreach $line( @data ) {
+	next if $line =~ /^\s*$/;
+
+	if( $line =~ /^ ?Score/ ) {
+	    $self->_set_score_stats( $line );
+	} elsif( $line =~ /^ ?(Identities|Positives|Strand)/ ) {
+	    $self->_set_match_stats( $line );
+	} elsif( $line =~ /^ ?Frame = ([\d+-]+)/ ) {
+	  # Version 2.0.8 has Frame information on a separate line.
+	  # Storing frame according to SeqFeature::Generic::frame()
+	  # which does not contain strand info (use strand()).
+	  my $frame = abs($1) - 1;
+	  $self->frame( $frame );
+	} elsif( $line =~ /^(Query:?[\s\d]+)([^\s\d]+)/ ) {
+	    push @queryList, $line;
+	    $self->{'_match_indent'} = CORE::length $1;
+	    $aln_row_len = (CORE::length $1) + (CORE::length $2);
+	    $matchLine = 1;
+	} elsif( $matchLine ) {
+	    # Pad the match line with spaces if necessary.
+	    $length_diff = $aln_row_len - CORE::length $line;
+	    $length_diff and $line .= ' 'x $length_diff;
+	    push @matchList, $line;
+	    $matchLine = 0;
+	} elsif( $line =~ /^Sbjct/ ) {
+	    push @sbjctList, $line;
+	}
+    }
+    # Storing the query and sbjct lists in case they are needed later.
+    # We could make this conditional to save memory.
+    $self->{'_queryList'} = \@queryList;
+    $self->{'_sbjctList'} = \@sbjctList;
+
+    # Storing the match list in case it is needed later.
+    $self->{'_matchList'} = \@matchList;
+
+    if(not defined ($self->{'_numIdentical'})) {
+        my $id_str = $self->_id_str;
+        $self->throw( -text  => "Can't parse match statistics. Possibly a new or unrecognized Blast format. ($id_str)");
+    }
+
+    if(!scalar @queryList or !scalar @sbjctList) {
+        my $id_str = $self->_id_str;
+        $self->throw( "Can't find query or sbjct alignment lines. Possibly unrecognized Blast format. ($id_str)");
+    }
+}
+
+
+#=head2 _set_score_stats (Private method)
+#
+# Usage     : called automatically by _set_data()
+# Purpose   : Sets various score statistics obtained from the HSP listing.
+# Argument  : String with any of the following formats:
+#           : blast2:  Score = 30.1 bits (66), Expect = 9.2
+#           : blast2:  Score = 158.2 bits (544), Expect(2) = e-110
+#           : blast1:  Score = 410 (144.3 bits), Expect = 1.7e-40, P = 1.7e-40
+#           : blast1:  Score = 55 (19.4 bits), Expect = 5.3, Sum P(3) = 0.99
+# Throws    : Exception if the stats cannot be parsed, probably due to a change
+#           : in the Blast report format.
+#
+#See Also   : L<_set_data()|_set_data>
+#
+#=cut
+
+#--------------------
+sub _set_score_stats {
+#--------------------
+    my ($self, $data) = @_;
+
+    my ($expect, $p);
+
+    if($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect = +([\d.e+-]+)/) {
+	# blast2 format n = 1
+	$self->bits($1);
+	$self->score($2);
+	$expect            = $3;
+    } elsif($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect\((\d+)\) = +([\d.e+-]+)/) {
+	# blast2 format n > 1
+	$self->bits($1);
+	$self->score($2);
+	$self->{'_n'}      = $3;
+	$expect            = $4;
+
+    } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), P = +([\d.e-]+)/) {
+	# blast1 format, n = 1
+	$self->score($1);
+	$self->bits($2);
+	$expect            = $3;
+	$p                 = $4;
+
+    } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), +Sum P\((\d+)\) = +([\d.e-]+)/) {
+	# blast1 format, n > 1
+	$self->score($1);
+	$self->bits($2);
+	$expect            = $3;
+	$self->{'_n'}      = $4;
+	$p                 = $5;
+
+    } else {
+        my $id_str = $self->_id_str;
+	$self->throw(-class => 'Bio::Root::Exception',
+		     -text => "Can't parse score statistics: unrecognized format. ($id_str)",
+		     -value => $data);
+    }
+    $expect = "1$expect" if $expect =~ /^e/i;
+    $p      = "1$p"      if defined $p and $p=~ /^e/i;
+
+    $self->{'_expect'} = $expect;
+    $self->{'_p'}      = $p || undef;
+    $self->significance( $p || $expect );
+}
+
+
+#=head2 _set_match_stats (Private method)
+#
+# Usage     : Private method; called automatically by _set_data()
+# Purpose   : Sets various matching statistics obtained from the HSP listing.
+# Argument  : blast2: Identities = 23/74 (31%), Positives = 29/74 (39%), Gaps = 17/74 (22%)
+#           : blast2: Identities = 57/98 (58%), Positives = 74/98 (75%)
+#           : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%)
+#           : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%), Frame = -3
+#           : WU-blast: Identities = 310/553 (56%), Positives = 310/553 (56%), Strand = Minus / Plus
+# Throws    : Exception if the stats cannot be parsed, probably due to a change
+#           : in the Blast report format.
+# Comments  : The "Gaps = " data in the HSP header has a different meaning depending
+#           : on the type of Blast: for BLASTP, this number is the total number of
+#           : gaps in query+sbjct; for TBLASTN, it is the number of gaps in the
+#           : query sequence only. Thus, it is safer to collect the data
+#           : separately by examining the actual sequence strings as is done
+#           : in _set_seq().
+#
+#See Also   : L<_set_data()|_set_data>, L<_set_seq()|_set_seq>
+#
+#=cut
+
+#--------------------
+sub _set_match_stats {
+#--------------------
+    my ($self, $data) = @_;
+
+    if($data =~ m!Identities = (\d+)/(\d+)!) {
+      # blast1 or 2 format
+      $self->{'_numIdentical'} = $1;
+      $self->{'_totalLength'}  = $2;
+    }
+
+    if($data =~ m!Positives = (\d+)/(\d+)!) {
+      # blast1 or 2 format
+      $self->{'_numConserved'} = $1;
+      $self->{'_totalLength'}  = $2;
+    }
+
+    if($data =~ m!Frame = ([\d+-]+)!) {
+      $self->frame($1);
+    }
+
+    # Strand data is not always present in this line.
+    # _set_seq() will also set strand information.
+    if($data =~ m!Strand = (\w+) / (\w+)!) {
+	$self->{'_queryStrand'} = $1;
+	$self->{'_sbjctStrand'} = $2;
+    }
+
+#    if($data =~ m!Gaps = (\d+)/(\d+)!) {
+#	 $self->{'_totalGaps'} = $1;
+#    } else {
+#	 $self->{'_totalGaps'} = 0;
+#    }
+}
+
+
+
+#=head2 _set_seq_data (Private method)
+#
+# Usage     : called automatically when sequence data is requested.
+# Purpose   : Sets the HSP sequence data for both query and sbjct sequences.
+#           : Includes: start, stop, length, gaps, and raw sequence.
+# Argument  : n/a
+# Throws    : Propagates any exception thrown by _set_match_seq()
+# Comments  : Uses raw data stored by _set_data() during object construction.
+#           : These data are not always needed, so it is conditionally
+#           : executed only upon demand by methods such as gaps(), _set_residues(),
+#           : etc. _set_seq() does the dirty work.
+#
+#See Also   : L<_set_seq()|_set_seq>
+#
+#=cut
+
+#-----------------
+sub _set_seq_data {
+#-----------------
+    my $self = shift;
+
+    $self->_set_seq('query', @{$self->{'_queryList'}});
+    $self->_set_seq('sbjct', @{$self->{'_sbjctList'}});
+
+    # Liberate some memory.
+    @{$self->{'_queryList'}} = @{$self->{'_sbjctList'}} = ();
+    undef $self->{'_queryList'};
+    undef $self->{'_sbjctList'};
+
+    $self->{'_set_seq_data'} = 1;
+}
+
+
+
+#=head2 _set_seq (Private method)
+#
+# Usage     : called automatically by _set_seq_data()
+#           : $hsp_obj->($seq_type, @data);
+# Purpose   : Sets sequence information for both the query and sbjct sequences.
+#           : Directly counts the number of gaps in each sequence (if gapped Blast).
+# Argument  : $seq_type = 'query' or 'sbjct'
+#           : @data = all seq lines with the form:
+#           : Query: 61  SPHNVKDRKEQNGSINNAISPTATANTSGSQQINIDSALRDRSSNVAAQPSLSDASSGSN 120
+# Throws    : Exception if data strings cannot be parsed, probably due to a change
+#           : in the Blast report format.
+# Comments  : Uses first argument to determine which data members to set
+#           : making this method sensitive data member name changes.
+#           : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
+# Warning   : Sequence endpoints are normalized so that start < end. This affects HSPs
+#           : for TBLASTN/X hits on the minus strand. Normalization facilitates use
+#           : of range information by methods such as match().
+#
+#See Also   : L<_set_seq_data()|_set_seq_data>, L<matches()|matches>, L<range()|range>, L<start()|start>, L<end()|end>
+#
+#=cut
+
+#-------------
+sub _set_seq {
+#-------------
+    my $self      = shift;
+    my $seqType   = shift;
+    my @data      = @_;
+    my @ranges    = ();
+    my @sequence  = ();
+    my $numGaps   = 0;
+
+    foreach( @data ) {
+        if( m/(\d+) *([^\d\s]+) *(\d+)/) {
+            push @ranges, ( $1, $3 ) ;
+            push @sequence, $2;
+        #print STDERR "_set_seq found sequence \"$2\"\n";
+	} else {
+	    $self->warn("Bad sequence data: $_");
+	}
+    }
+
+    if( !(scalar(@sequence) and scalar(@ranges))) {
+        my $id_str = $self->_id_str;
+	$self->throw("Can't set sequence: missing data. Possibly unrecognized Blast format. ($id_str) $seqType");
+   }
+
+    # Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+    $self->{$seqType.'Start'} = $ranges[0];
+    $self->{$seqType.'Stop'}  = $ranges[ $#ranges ];
+    $self->{$seqType.'Seq'}   = \@sequence;
+
+    $self->{$seqType.'Length'} = abs($ranges[ $#ranges ] - $ranges[0]) + 1;
+
+    # Adjust lengths for BLASTX, TBLASTN, TBLASTX sequences
+    # Converting nucl coords to amino acid coords.
+
+    my $prog = $self->algorithm;
+    if($prog eq 'TBLASTN' and $seqType eq '_sbjct') {
+	$self->{$seqType.'Length'} /= 3;
+    } elsif($prog eq 'BLASTX' and $seqType eq '_query') {
+	$self->{$seqType.'Length'} /= 3;
+    } elsif($prog eq 'TBLASTX') {
+	$self->{$seqType.'Length'} /= 3;
+    }
+
+    if( $prog ne 'BLASTP' ) {
+        $self->{$seqType.'Strand'} = 'Plus' if $prog =~ /BLASTN/;
+        $self->{$seqType.'Strand'} = 'Plus' if ($prog =~ /BLASTX/ and $seqType eq '_query');
+        # Normalize sequence endpoints so that start < end.
+        # Reverse complement or 'minus strand' HSPs get flipped here.
+        if($self->{$seqType.'Start'} > $self->{$seqType.'Stop'}) {
+            ($self->{$seqType.'Start'}, $self->{$seqType.'Stop'}) =
+                ($self->{$seqType.'Stop'}, $self->{$seqType.'Start'});
+            $self->{$seqType.'Strand'} = 'Minus';
+        }
+    }
+
+    ## Count number of gaps in each seq. Only need to do this for gapped Blasts.
+#    if($self->{'_gapped'}) {
+	my $seqstr = join('', @sequence);
+	$seqstr =~ s/\s//g;
+        my $num_gaps = CORE::length($seqstr) - $self->{$seqType.'Length'};
+	$self->{$seqType.'Gaps'} = $num_gaps if $num_gaps > 0;
+#    }
+}
+
+
+#=head2 _set_residues (Private method)
+#
+# Usage     : called automatically when residue data is requested.
+# Purpose   : Sets the residue numbers representing the identical and
+#           : conserved positions. These data are obtained by analyzing the
+#           : symbols between query and sbjct lines of the alignments.
+# Argument  : n/a
+# Throws    : Propagates any exception thrown by _set_seq_data() and _set_match_seq().
+# Comments  : These data are not always needed, so it is conditionally
+#           : executed only upon demand by methods such as seq_inds().
+#           : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
+#
+#See Also   : L<_set_seq_data()|_set_seq_data>, L<_set_match_seq()|_set_match_seq>, seq_inds()
+#
+#=cut
+
+#------------------
+sub _set_residues {
+#------------------
+    my $self      = shift;
+    my @sequence  = ();
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    # Using hashes to avoid saving duplicate residue numbers.
+    my %identicalList_query = ();
+    my %identicalList_sbjct = ();
+    my %conservedList_query = ();
+    my %conservedList_sbjct = ();
+
+    my $aref = $self->_set_match_seq() if not ref $self->{'_matchSeq'};
+    $aref  ||= $self->{'_matchSeq'};
+    my $seqString = join('', @$aref );
+
+    my $qseq = join('',@{$self->{'_querySeq'}});
+    my $sseq = join('',@{$self->{'_sbjctSeq'}});
+    my $resCount_query = $self->{'_queryStop'} || 0;
+    my $resCount_sbjct = $self->{'_sbjctStop'} || 0;
+
+    my $prog = $self->algorithm;
+    if($prog !~ /^BLASTP|^BLASTN/) {
+	if($prog eq 'TBLASTN') {
+	    $resCount_sbjct /= 3;
+	} elsif($prog eq 'BLASTX') {
+	    $resCount_query /= 3;
+	} elsif($prog eq 'TBLASTX') {
+	    $resCount_query /= 3;
+	    $resCount_sbjct /= 3;
+	}
+    }
+
+    my ($mchar, $schar, $qchar);
+    while( $mchar = chop($seqString) ) {
+	($qchar, $schar) = (chop($qseq), chop($sseq));
+	if( $mchar eq '+' ) {
+	    $conservedList_query{ $resCount_query } = 1;
+	    $conservedList_sbjct{ $resCount_sbjct } = 1;
+	} elsif( $mchar ne ' ' ) {
+	    $identicalList_query{ $resCount_query } = 1;
+	    $identicalList_sbjct{ $resCount_sbjct } = 1;
+	}
+	$resCount_query-- if $qchar ne $GAP_SYMBOL;
+	$resCount_sbjct-- if $schar ne $GAP_SYMBOL;
+    }
+    $self->{'_identicalRes_query'} = \%identicalList_query;
+    $self->{'_conservedRes_query'} = \%conservedList_query;
+    $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
+    $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
+
+}
+
+
+
+
+#=head2 _set_match_seq (Private method)
+#
+# Usage     : $hsp_obj->_set_match_seq()
+# Purpose   : Set the 'match' sequence for the current HSP (symbols in between
+#           : the query and sbjct lines.)
+# Returns   : Array reference holding the match sequences lines.
+# Argument  : n/a
+# Throws    : Exception if the _matchList field is not set.
+# Comments  : The match information is not always necessary. This method
+#           : allows it to be conditionally prepared.
+#           : Called by _set_residues>() and seq_str().
+#
+#See Also   : L<_set_residues()|_set_residues>, L<seq_str()|seq_str>
+#
+#=cut
+
+#-------------------
+sub _set_match_seq {
+#-------------------
+    my $self = shift;
+
+    if( ! ref($self->{'_matchList'}) ) {
+        my $id_str = $self->_id_str;
+        $self->throw("Can't set HSP match sequence: No data ($id_str)");
+    }
+
+    my @data = @{$self->{'_matchList'}};
+
+    my(@sequence);
+    foreach( @data ) {
+	chomp($_);
+	## Remove leading spaces; (note: aln may begin with a space
+	## which is why we can't use s/^ +//).
+	s/^ {$self->{'_match_indent'}}//;
+	push @sequence, $_;
+    }
+    # Liberate some memory.
+    @{$self->{'_matchList'}} = undef;
+    $self->{'_matchList'} = undef;
+
+    $self->{'_matchSeq'} = \@sequence;
+
+    return $self->{'_matchSeq'};
+}
+
+
+=head2 n
+
+ Usage     : $hsp_obj->n()
+ Purpose   : Get the N value (num HSPs on which P/Expect is based).
+           : This value is not defined with NCBI Blast2 with gapping.
+ Returns   : Integer or null string if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : The 'N' value is listed in parenthesis with P/Expect value:
+           : e.g., P(3) = 1.2e-30  ---> (N = 3).
+           : Not defined in NCBI Blast2 with gaps.
+           : This typically is equal to the number of HSPs but not always.
+           : To obtain the number of HSPs, use Bio::Search::Hit::BlastHit::num_hsps().
+
+See Also   : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
+
+=cut
+
+#-----
+sub n { my $self = shift; $self->{'_n'} || ''; }
+#-----
+
+
+=head2 matches
+
+ Usage     : $hsp->matches([seq_type], [start], [stop]);
+ Purpose   : Get the total number of identical and conservative matches
+           : in the query or sbjct sequence for the given HSP. Optionally can
+           : report data within a defined interval along the seq.
+           : (Note: 'conservative' matches are called 'positives' in the
+	   : Blast report.)
+ Example   : ($id,$cons) = $hsp_object->matches('hit');
+           : ($id,$cons) = $hsp_object->matches('query',300,400);
+ Returns   : 2-element array of integers
+ Argument  : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
+           :  ('sbjct' is synonymous with 'hit')
+           : (2) start = Starting coordinate (optional)
+           : (3) stop  = Ending coordinate (optional)
+ Throws    : Exception if the supplied coordinates are out of range.
+ Comments  : Relies on seq_str('match') to get the string of alignment symbols
+           : between the query and sbjct lines which are used for determining
+           : the number of identical and conservative matches.
+
+See Also   : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#-----------
+sub matches {
+#-----------
+    my( $self, %param ) = @_;
+    my(@data);
+    my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my($start,$stop);
+
+    if(!defined $beg && !defined $end) {
+	## Get data for the whole alignment.
+	push @data, ($self->{'_numIdentical'}, $self->{'_numConserved'});
+    } else {
+	## Get the substring representing the desired sub-section of aln.
+	$beg ||= 0;
+	$end ||= 0;
+	($start,$stop) = $self->range($seqType);
+	if($beg == 0) { $beg = $start; $end = $beg+$end; }
+	elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
+
+	if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
+	else { $end += 1;}   ##ML moved from commented position below, makes
+                             ##more sense here
+#	if($end > $stop) { $end = $stop; }
+	if($beg < $start) { $beg = $start; }
+#	else { $end += 1;}
+
+#	my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
+
+	## ML: START fix for substr out of range error ------------------
+	my $seq = "";
+        my $prog = $self->algorithm;
+	if (($prog eq 'TBLASTN') and ($seqType eq 'sbjct'))
+	{
+	    $seq = substr($self->seq_str('match'),
+			  int(($beg-$start)/3), int(($end-$beg+1)/3));
+
+	} elsif (($prog eq 'BLASTX') and ($seqType eq 'query'))
+	{
+	    $seq = substr($self->seq_str('match'),
+			  int(($beg-$start)/3), int(($end-$beg+1)/3));
+	} else {
+	    $seq = substr($self->seq_str('match'),
+			  $beg-$start, ($end-$beg));
+	}
+	## ML: End of fix for  substr out of range error -----------------
+
+
+	## ML: debugging code
+	## This is where we get our exception.  Try printing out the values going
+	## into this:
+	##
+#	 print STDERR
+#	     qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
+#	     $self->seq_str("$seqType"), qq("\n),$self->rank,",(  index:";
+#	 print STDERR  $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
+#	     CORE::length $self->seq_str("$seqType");
+#	 print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
+#	     ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
+	 ## ML: END DEBUGGING CODE----------
+
+	if(!CORE::length $seq) {
+            my $id_str = $self->_id_str;
+	    $self->throw("Undefined $seqType sub-sequence ($beg,$end). Valid range = $start - $stop ($id_str)");
+	}
+	## Get data for a substring.
+#	printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
+#	printf "Original match seq:\n%s\n",$self->seq_str('match');
+	$seq =~ s/ //g;  # remove space (no info).
+	my $len_cons = CORE::length $seq;
+	$seq =~ s/\+//g;  # remove '+' characters (conservative substitutions)
+	my $len_id = CORE::length $seq;
+	push @data, ($len_id, $len_cons);
+#	printf "  HSP = %s\n  id = %d; cons = %d\n", $self->rank, $len_id, $len_cons; <STDIN>;
+    }
+    @data;
+}
+
+
+=head2 num_identical
+
+ Usage     : $hsp_object->num_identical();
+ Purpose   : Get the number of identical positions within the given HSP.
+ Example   : $num_iden = $hsp_object->num_identical();
+ Returns   : integer
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<num_conserved()|num_conserved>, L<frac_identical()|frac_identical>
+
+=cut
+
+#-------------------
+sub num_identical {
+#-------------------
+    my( $self) = shift;
+
+    $self->{'_numIdentical'};
+}
+
+
+=head2 num_conserved
+
+ Usage     : $hsp_object->num_conserved();
+ Purpose   : Get the number of conserved positions within the given HSP.
+ Example   : $num_iden = $hsp_object->num_conserved();
+ Returns   : integer
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<num_identical()|num_identical>, L<frac_conserved()|frac_conserved>
+
+=cut
+
+#-------------------
+sub num_conserved {
+#-------------------
+    my( $self) = shift;
+
+    $self->{'_numConserved'};
+}
+
+
+
+=head2 range
+
+ Usage     : $hsp->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($query_beg, $query_end) = $hsp->range('query');
+           : ($hit_beg, $hit_end) = $hsp->range('hit');
+ Returns   : Two-element array of integers
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<end()|end>
+
+=cut
+
+#----------
+sub range {
+#----------
+    my ($self, $seqType) = @_;
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+
+    return ($self->{$seqType.'Start'},$self->{$seqType.'Stop'});
+}
+
+=head2 start
+
+ Usage     : $hsp->start( [seq_type] );
+ Purpose   : Gets the start coordinate for the query, sbjct, or both sequences
+           : in the HSP alignment.
+           : NOTE: Start will always be less than end.
+           : To determine strand, use $hsp->strand()
+ Example   : $query_beg = $hsp->start('query');
+           : $hit_beg = $hsp->start('hit');
+           : ($query_beg, $hit_beg) = $hsp->start();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default= 'query')
+           :  ('sbjct' is synonymous with 'hit')
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<end()|end>, L<range()|range>
+
+=cut
+
+#----------
+sub start {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /list|array/i) {
+	return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
+    } else {
+	## Sensitive to member name changes.
+	$seqType = "_\L$seqType\E";
+	return $self->{$seqType.'Start'};
+    }
+}
+
+=head2 end
+
+ Usage     : $hsp->end( [seq_type] );
+ Purpose   : Gets the end coordinate for the query, sbjct, or both sequences
+           : in the HSP alignment.
+           : NOTE: Start will always be less than end.
+           : To determine strand, use $hsp->strand()
+ Example   : $query_end = $hsp->end('query');
+           : $hit_end = $hsp->end('hit');
+           : ($query_end, $hit_end) = $hsp->end();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default= 'query')
+           :  ('sbjct' is synonymous with 'hit')
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<range()|range>, L<strand()|strand>
+
+=cut
+
+#----------
+sub end {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /list|array/i) {
+	return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
+    } else {
+	## Sensitive to member name changes.
+	$seqType = "_\L$seqType\E";
+	return $self->{$seqType.'Stop'};
+    }
+}
+
+
+
+=head2 strand
+
+ Usage     : $hsp_object->strand( [seq_type] )
+ Purpose   : Get the strand of the query or sbjct sequence.
+ Example   : print $hsp->strand('query');
+           : ($query_strand, $hit_strand) = $hsp->strand();
+ Returns   : -1, 0, or 1
+           : -1 = Minus strand, +1 = Plus strand
+           : Returns 0 if strand is not defined, which occurs
+           : for BLASTP reports, and the query of TBLASTN
+           : as well as the hit if BLASTX reports.
+           : In scalar context without arguments, returns queryStrand value.
+           : In array context without arguments, returns a two-element list
+           :    of strings (queryStrand, sbjctStrand).
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or undef
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<_set_seq()>, L<_set_match_stats()>
+
+=cut
+
+#-----------
+sub strand {
+#-----------
+    my( $self, $seqType ) = @_;
+
+    $seqType  ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    # $seqType could be '_list'.
+    $self->{'_queryStrand'} or $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    my $prog = $self->algorithm;
+
+    if($seqType  =~ /list|array/i) {
+        my ($qstr, $hstr);
+        if( $prog eq 'BLASTP') {
+            $qstr = 0;
+            $hstr = 0;
+        }
+        elsif( $prog eq 'TBLASTN') {
+            $qstr = 0;
+            $hstr = $STRAND_SYMBOL{$self->{'_sbjctStrand'}};
+        }
+        elsif( $prog eq 'BLASTX') {
+            $qstr = $STRAND_SYMBOL{$self->{'_queryStrand'}};
+            $hstr = 0;
+        }
+        else {
+            $qstr = $STRAND_SYMBOL{$self->{'_queryStrand'}} if defined $self->{'_queryStrand'};
+            $hstr = $STRAND_SYMBOL{$self->{'_sbjctStrand'}} if defined $self->{'_sbjctStrand'};
+        }
+        $qstr ||= 0;
+        $hstr ||= 0;
+	return ($qstr, $hstr);
+    }
+    local $^W = 0;
+    $STRAND_SYMBOL{$self->{$seqType.'Strand'}} || 0;
+}
+
+
+=head2 seq
+
+ Usage     : $hsp->seq( [seq_type] );
+ Purpose   : Get the query or sbjct sequence as a Bio::Seq.pm object.
+ Example   : $seqObj = $hsp->seq('query');
+ Returns   : Object reference for a Bio::Seq.pm object.
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query').
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : Propagates any exception that occurs during construction
+           : of the Bio::Seq.pm object.
+ Comments  : The sequence is returned in an array of strings corresponding
+           : to the strings in the original format of the Blast alignment.
+           : (i.e., same spacing).
+
+See Also   : L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<Bio::Seq>
+
+=cut
+
+#-------
+sub seq {
+#-------
+    my($self,$seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    my $str = $self->seq_str($seqType);
+
+    require Bio::Seq;
+
+    new Bio::Seq (-ID   => $self->to_string,
+		  -SEQ  => $str,
+		  -DESC => "$seqType sequence",
+		  );
+}
+
+=head2 seq_str
+
+ Usage     : $hsp->seq_str( seq_type );
+ Purpose   : Get the full query, sbjct, or 'match' sequence as a string.
+           : The 'match' sequence is the string of symbols in between the
+           : query and sbjct sequences.
+ Example   : $str = $hsp->seq_str('query');
+ Returns   : String
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : Exception if the argument does not match an accepted seq_type.
+ Comments  : Calls _set_seq_data() to set the 'match' sequence if it has
+           : not been set already.
+
+See Also   : L<seq()|seq>, L<seq_inds()|seq_inds>, L<_set_match_seq()>
+
+=cut
+
+#------------
+sub seq_str {
+#------------
+    my($self,$seqType) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    ## Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /sbjct|query/) {
+	my $seq = join('',@{$self->{$seqType.'Seq'}});
+	$seq =~ s/\s+//g;
+	return $seq;
+
+    } elsif( $seqType =~ /match/i) {
+	# Only need to call _set_match_seq() if the match seq is requested.
+	my $aref = $self->_set_match_seq() unless ref $self->{'_matchSeq'};
+	$aref =  $self->{'_matchSeq'};
+
+	return join('',@$aref);
+
+    } else {
+        my $id_str = $self->_id_str;
+	$self->throw(-class => 'Bio::Root::BadParameter',
+		     -text => "Invalid or undefined sequence type: $seqType ($id_str)\n" .
+		               "Valid types: query, sbjct, match",
+		     -value => $seqType);
+    }
+}
+
+=head2 seq_inds
+
+ Usage     : $hsp->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) for all identical
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+           :  ('sbjct' is synonymous with 'hit')
+           : class     = 'identical' or 'conserved' (default = identical)
+           :              (can be shortened to 'id' or 'cons')
+           :              (actually, anything not 'id' will evaluate to 'conserved').
+           : collapse  = boolean, if true, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
+           :             collapses to "1-5 7 9-11". This is useful for
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : Calls _set_residues() to set the 'match' sequence if it has
+           : not been set already.
+
+See Also   : L<seq()|seq>, L<_set_residues()>, L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::BlastHit::seq_inds()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#---------------
+sub seq_inds {
+#---------------
+    my ($self, $seqType, $class, $collapse) = @_;
+
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_residues() unless defined $self->{'_identicalRes_query'};
+
+    $seqType  = ($seqType !~ /^q/i ? 'sbjct' : 'query');
+    $class = ($class !~ /^id/i ? 'conserved' : 'identical');
+
+    ## Sensitive to member name changes.
+    $seqType  = "_\L$seqType\E";
+    $class = "_\L$class\E";
+
+    my @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+
+    require Bio::Search::BlastUtils if $collapse;
+
+    return $collapse ? &Bio::Search::BlastUtils::collapse_nums(@ary) : @ary;
+}
+
+
+=head2 get_aln
+
+ Usage     : $hsp->get_aln()
+ Purpose   : Get a Bio::SimpleAlign object constructed from the query + sbjct
+           : sequences of the present HSP object.
+ Example   : $aln_obj = $hsp->get_aln();
+ Returns   : Object reference for a Bio::SimpleAlign.pm object.
+ Argument  : n/a.
+ Throws    : Propagates any exception ocurring during the construction of
+           : the Bio::SimpleAlign object.
+ Comments  : Requires Bio::SimpleAlign.
+           : The Bio::SimpleAlign object is constructed from the query + sbjct
+           : sequence objects obtained by calling seq().
+           : Gap residues are included (see $GAP_SYMBOL).
+
+See Also   : L<seq()|seq>, L<Bio::SimpleAlign>
+
+=cut
+
+#------------
+sub get_aln {
+#------------
+    my $self = shift;
+
+    require Bio::SimpleAlign;
+    require Bio::LocatableSeq;
+    my $qseq = $self->seq('query');
+    my $sseq = $self->seq('sbjct');
+
+    my $type = $self->algorithm =~ /P$|^T/ ? 'amino' : 'dna';
+    my $aln = new Bio::SimpleAlign();
+    $aln->add_seq(new Bio::LocatableSeq(-seq => $qseq->seq(),
+					-id  => 'query_'.$qseq->display_id(),
+					-start => 1,
+					-end   => CORE::length($qseq)));
+
+    $aln->add_seq(new Bio::LocatableSeq(-seq => $sseq->seq(),
+					-id  => 'hit_'.$sseq->display_id(),
+					-start => 1,
+					-end   => CORE::length($sseq)));
+
+    return $aln;
+}
+
+
+1;
+__END__
+
+
+=head1 FOR DEVELOPERS ONLY
+
+=head2 Data Members
+
+Information about the various data members of this module is provided for those
+wishing to modify or understand the code. Two things to bear in mind:
+
+=over 4
+
+=item 1 Do NOT rely on these in any code outside of this module.
+
+All data members are prefixed with an underscore to signify that they are private.
+Always use accessor methods. If the accessor doesn't exist or is inadequate,
+create or modify an accessor (and let me know, too!).
+
+=item 2 This documentation may be incomplete and out of date.
+
+It is easy for these data member descriptions to become obsolete as
+this module is still evolving. Always double check this info and search
+for members not described here.
+
+=back
+
+An instance of Bio::Search::HSP::BlastHSP.pm is a blessed reference to a hash containing
+all or some of the following fields:
+
+ FIELD           VALUE
+ --------------------------------------------------------------
+ (member names are mostly self-explanatory)
+
+ _score              :
+ _bits               :
+ _p                  :
+ _n                  : Integer. The 'N' value listed in parenthesis with P/Expect value:
+                     : e.g., P(3) = 1.2e-30  ---> (N = 3).
+                     : Not defined in NCBI Blast2 with gaps.
+                     : To obtain the number of HSPs, use Bio::Search::Hit::BlastHit::num_hsps().
+ _expect             :
+ _queryLength        :
+ _queryGaps          :
+ _queryStart         :
+ _queryStop          :
+ _querySeq           :
+ _sbjctLength        :
+ _sbjctGaps          :
+ _sbjctStart         :
+ _sbjctStop          :
+ _sbjctSeq           :
+ _matchSeq           : String. Contains the symbols between the query and sbjct lines
+                       which indicate identical (letter) and conserved ('+') matches
+                       or a mismatch (' ').
+ _numIdentical       :
+ _numConserved       :
+ _identicalRes_query :
+ _identicalRes_sbjct :
+ _conservedRes_query :
+ _conservedRes_sbjct :
+ _match_indent       : The number of leading space characters on each line containing
+                       the match symbols. _match_indent is 13 in this example:
+                         Query:   285 QNSAPWGLARISHRERLNLGSFNKYLYDDDAG
+                                      Q +APWGLARIS       G+ + Y YD+ AG
+                         ^^^^^^^^^^^^^
+
+
+=cut
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/FastaHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/FastaHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/FastaHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,186 @@
+# $Id: FastaHSP.pm,v 1.10.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::FastaHSP
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::FastaHSP - HSP object for FASTA specific data
+
+=head1 SYNOPSIS
+
+  # get a FastaHSP from a SearchIO stream
+  my $in = new Bio::SearchIO(-format => 'fasta', -file => 'filename.fasta');
+
+  while( my $r = $in->next_result) {
+      while( my $hit = $r->next_result ) {
+           while( my $hsp = $hit->next_hsp ) {
+              print "smith-waterman score (if available): ", 
+                    $hsp->sw_score(),"\n";
+           }
+      }
+  }
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::FastaHSP;
+use strict;
+
+
+use base qw(Bio::Search::HSP::GenericHSP);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::FastaHSP();
+ Function: Builds a new Bio::Search::HSP::FastaHSP object 
+ Returns : Bio::Search::HSP::FastaHSP
+ Args    : -swscore => smith-waterman score
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  
+  my ($swscore) = $self->_rearrange([qw(SWSCORE)], @args);
+
+  defined $swscore && $self->sw_score($swscore);
+
+  return $self;
+}
+
+
+=head2 sw_score
+
+ Title   : sw_score
+ Usage   : $obj->sw_score($newval)
+ Function: Get/Set Smith-Waterman score
+ Returns : value of sw_score
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub sw_score{
+    my ($self,$value) = @_;
+    if( defined $value || ! defined $self->{'_sw_score'} ) {
+	$value = 0 unless defined $value; # default value
+	$self->{'_sw_score'} = $value;
+    }
+    return $self->{'_sw_score'};
+}
+
+
+sub get_aln {
+    my ($self) = @_;
+    require Bio::LocatableSeq;
+    require Bio::SimpleAlign;
+    my $aln = new Bio::SimpleAlign;
+    my $hs = $self->hit_string();
+    my $qs = $self->query_string();
+
+    # fasta reports some extra 'regional' sequence information
+    # we need to clear out first
+    # this seemed a bit insane to me at first, but it appears to 
+    # work --jason
+    
+    # we infer the end of the regional sequence where the first
+    # non space is in the homology string
+    # then we use the HSP->length to tell us how far to read
+    # to cut off the end of the sequence
+        
+    my ($start) = 0;
+    if( $self->homology_string() =~ /^(\s+)/ ) {
+	$start = CORE::length($1);
+    }
+    $self->debug("hs seq is '$hs'\n");
+    $self->debug("qs seq is '$qs'\n");
+
+    $hs = substr($hs, $start,$self->length('total'));
+    $qs = substr($qs, $start,$self->length('total'));
+    foreach my $seq ( $qs,$hs)  {
+	foreach my $f ( '\\', '/', ' ') {
+	    my $index =  index($seq,$f);
+	    while( $index >=0 && length($seq) > 0 ) {
+		substr($hs,$index,1) = '';
+		substr($qs,$index,1) = '';
+		$self->debug( "$f, $index+1, for ".length($seq). " ($seq)\n");
+		$index = index($seq,$f,$index+1);
+	    }
+	}
+    }
+
+    my $seqonly = $qs;
+    $seqonly =~ s/\s+//g;
+    my ($q_nm,$s_nm) = ($self->query->seq_id(),
+			$self->hit->seq_id());
+    unless( defined $q_nm && CORE::length ($q_nm) ) {
+	$q_nm = 'query';
+    }
+    unless( defined $s_nm && CORE::length ($s_nm) ) {
+	$s_nm = 'hit';
+    }
+    my $query = new Bio::LocatableSeq('-seq'   => $seqonly,
+				      '-id'    => $q_nm,
+				      '-start' => $self->query->start,
+				      '-end'   => $self->query->end,
+				      );
+    $seqonly = $hs;
+    $seqonly =~ s/\s+//g;
+    my $hit =  new Bio::LocatableSeq('-seq'    => $seqonly,
+				      '-id'    => $s_nm,
+				      '-start' => $self->hit->start,
+				      '-end'   => $self->hit->end,
+				      );
+    $aln->add_seq($query);
+    $aln->add_seq($hit);
+    return $aln;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/GenericHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/GenericHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/GenericHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1481 @@
+# $Id: GenericHSP.pm,v 1.68.4.5 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::GenericHSP
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair
+
+=head1 SYNOPSIS
+
+    my $hsp = new Bio::Search::HSP::GenericHSP( -algorithm => 'blastp',
+                                                -evalue    => '1e-30',
+                                                );
+
+    $r_type = $hsp->algorithm;
+
+    $pvalue = $hsp->p();
+
+    $evalue = $hsp->evalue();
+
+    $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+
+    $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+
+    $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+
+    $qseq = $hsp->query_string;
+
+    $hseq = $hsp->hit_string;
+
+    $homo_string = $hsp->homology_string;
+
+    $len = $hsp->length( ['query'|'hit'|'total'] );
+
+    $len = $hsp->length( ['query'|'hit'|'total'] );
+
+    $rank = $hsp->rank;
+
+# TODO: Describe how to configure a SearchIO stream so that it generates
+#       GenericHSP objects.
+
+
+=head1 DESCRIPTION
+
+This implementation is "Generic", meaning it is is suitable for
+holding information about High Scoring pairs from most Search reports
+such as BLAST and FastA.  Specialized objects can be derived from
+this.
+
+Unless you're writing a parser, you won't ever need to create a
+GenericHSP or any other HSPI-implementing object. If you use
+the SearchIO system, HSPI objects are created automatically from
+a SearchIO stream which returns Bio::Search::Result::ResultI objects
+and you get the HSPI objects via the ResultI API.
+
+For documentation on what you can do with GenericHSP (and other HSPI
+objects), please see the API documentation in
+L<Bio::Search::HSP::HSPI|Bio::Search::HSP::HSPI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason-at-bioperl.org
+Email sac-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::HSP::GenericHSP;
+use vars qw($GAP_SYMBOL);
+use strict;
+
+use Bio::Root::Root;
+use Bio::SeqFeature::Similarity;
+
+use base qw(Bio::Search::HSP::HSPI);
+
+BEGIN {
+    $GAP_SYMBOL = '-';
+}
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::GenericHSP();
+ Function: Builds a new Bio::Search::HSP::GenericHSP object
+ Returns : Bio::Search::HSP::GenericHSP
+ Args    : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
+           -evalue    => evalue
+           -pvalue    => pvalue
+           -bits      => bit value for HSP
+           -score     => score value for HSP (typically z-score but depends on
+                                              analysis)
+           -hsp_length=> Length of the HSP (including gaps)
+           -identical => # of residues that that matched identically
+           -percent_identity => (optional) percent identity
+           -conserved => # of residues that matched conservatively
+                           (only protein comparisions;
+                            conserved == identical in nucleotide comparisons)
+           -hsp_gaps   => # of gaps in the HSP
+           -query_gaps => # of gaps in the query in the alignment
+           -hit_gaps   => # of gaps in the subject in the alignment
+           -query_name  => HSP Query sequence name (if available)
+           -query_start => HSP Query start (in original query sequence coords)
+           -query_end   => HSP Query end (in original query sequence coords)
+           -query_length=> total length of the query sequence
+           -query_seq   => query sequence portion of the HSP
+           -query_desc  => textual description of the query
+           -hit_name    => HSP Hit sequence name (if available)
+           -hit_start   => HSP Hit start (in original hit sequence coords)
+           -hit_end     => HSP Hit end (in original hit sequence coords)
+           -hit_length  => total length of the hit sequence
+           -hit_seq     => hit sequence portion of the HSP
+           -hit_desc    => textual description of the hit
+           -homology_seq=> homology sequence for the HSP
+           -hit_frame   => hit frame (only if hit is translated protein)
+           -query_frame => query frame (only if query is translated protein)
+           -rank        => HSP rank
+           -links       => HSP links information (WU-BLAST only)
+           -hsp_group   => HSP Group informat (WU-BLAST only)
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    # don't pass anything to SUPER; complex heirarchy results in lots of work
+    # for nothing
+    my $self = $class->SUPER::new();
+
+    # for speed, don't use _rearrange and just store all input data directly
+    # with no method calls and no work done. work can be carried
+    # out just-in-time later if desired
+    my %args = @args;
+    while (my ($arg, $value) = each %args) {
+        $arg =~ tr/a-z\055/A-Z/d;
+        $self->{$arg} = $value;
+    }
+    my $bits = $self->{BITS};
+
+    defined $self->{VERBOSE} && $self->verbose($self->{VERBOSE});
+
+    $self->{ALGORITHM} ||= 'GENERIC';
+
+    if (! defined $self->{QUERY_LENGTH} || ! defined $self->{HIT_LENGTH}) {
+        $self->throw("Must define hit and query length");
+    }
+
+    $self->{'_sequenceschanged'} = 1;
+    
+    $self->{_finished_new} = 1;
+    return $self;
+}
+
+
+sub _logical_length {
+    my ($self, $type) = @_;
+    my $algo = $self->algorithm;
+    my $len = $self->length($type);
+    my $logical = $len;
+    if($algo =~ /^(PSI)?T(BLAST|FAST)[NY]/oi ) {
+        $logical = $len/3 if $type =~ /sbjct|hit|tot/i;
+    } elsif($algo =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
+        $logical = $len/3 if $type =~ /query|tot/i;
+    } elsif($algo =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
+        $logical = $len/3;
+    }
+    return $logical;
+}
+
+=head2 L<Bio::Search::HSP::HSPI> methods
+
+Implementation of L<Bio::Search::HSP::HSPI> methods follow
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the HSP
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm{
+    my ($self,$value) = @_;
+    my $previous = $self->{'ALGORITHM'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'ALGORITHM'} = $value;
+    }
+
+    return $previous;
+}
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP or undef
+ Returns : float or exponential (2e-10)
+           P-value is not defined with NCBI Blast2 reports.
+ Args    : [optional] numeric to set value
+
+=cut
+
+sub pvalue {
+    my ($self,$value) = @_;
+    my $previous = $self->{'PVALUE'};
+    if( defined $value  ) {
+        $self->{'PVALUE'} = $value;
+    }
+    return $previous;
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : my $evalue = $hsp->evalue();
+ Function: Returns the e-value for this HSP
+ Returns : float or exponential (2e-10)
+ Args    : [optional] numeric to set value
+
+=cut
+
+sub evalue { shift->significance(@_) }
+
+# Override significance to return the e-value or, if this is
+# not defined (WU-BLAST), return the p-value.
+sub significance {
+    my $self = shift;
+    my $signif = $self->query->significance(@_);
+    return (defined $signif && $signif ne '') ? $signif : $self->pvalue(@_);
+}
+
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+ Function: Returns the fraction of identitical positions for this HSP
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1:  'query' = num identical / length of query seq (without gaps)
+                   'hit'   = num identical / length of hit seq (without gaps)
+                             synonyms: 'sbjct', 'subject'
+                   'total' = num conserved / length of alignment (with gaps)
+                             synonyms: 'hsp'
+                   default = 'total'
+           arg 2: [optional] frac identical value to set for the type requested
+
+=cut
+
+sub frac_identical {
+   my ($self, $type,$value) = @_;
+
+    unless ($self->{_did_prefrac}) {
+        $self->_pre_frac;
+    }
+
+   $type = lc $type if defined $type;
+   $type = 'hit' if( defined $type &&
+		     $type =~ /subject|sbjct/);
+   $type = 'total' if( ! defined $type || $type eq 'hsp' ||
+                        $type !~ /query|hit|subject|sbjct|total/);
+   my $previous = $self->{'_frac_identical'}->{$type};
+   if( defined $value || ! defined $previous ) {
+       $value = $previous = '' unless defined $value;
+       if( $type eq 'hit' || $type eq 'query' ) {
+           $self->$type()->frac_identical( $value);
+       }
+       $self->{'_frac_identical'}->{$type} = $value;
+   }
+   return $previous;
+
+}
+
+=head2 frac_conserved
+
+ Title    : frac_conserved
+ Usage    : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+ Function : Returns the fraction of conserved positions for this HSP.
+            This is the fraction of symbols in the alignment with a
+            positive score.
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1: 'query' = num conserved / length of query seq (without gaps)
+                  'hit'   = num conserved / length of hit seq (without gaps)
+                             synonyms: 'sbjct', 'subject'
+                  'total' = num conserved / length of alignment (with gaps)
+                             synonyms: 'hsp'
+                  default = 'total'
+           arg 2: [optional] frac conserved value to set for the type requested
+
+=cut
+
+sub frac_conserved {
+    my ($self, $type,$value) = @_;
+
+    unless ($self->{_did_prefrac}) {
+        $self->_pre_frac;
+    }
+
+    $type = lc $type if defined $type;
+    $type = 'hit' if( defined $type && $type =~ /subject|sbjct/);
+    $type = 'total' if( ! defined $type || $type eq 'hsp' ||
+                        $type !~ /query|hit|subject|sbjct|total/);
+    my $previous = $self->{'_frac_conserved'}->{$type};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_frac_conserved'}->{$type} = $value;
+    }
+    return $previous;
+}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : arg 1: 'query' = num gaps in query seq
+                   'hit'   = num gaps in hit seq; synonyms: 'sbjct', 'subject'
+                   'total' = num gaps in whole alignment;  synonyms: 'hsp'
+                   default = 'total'
+            arg 2: [optional] integer gap value to set for the type requested
+
+=cut
+
+sub gaps {
+    my ($self, $type,$value) = @_;
+
+    unless ($self->{_did_pregaps}) {
+        $self->_pre_gaps;
+    }
+
+    $type = lc $type if defined $type;
+    $type = 'total' if( ! defined $type || $type eq 'hsp' ||
+                        $type !~ /query|hit|subject|sbjct|total/);
+    $type = 'hit' if $type =~ /sbjct|subject/;
+    my $previous = $self->{'_gaps'}->{$type};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_gaps'}->{$type} = $value;
+    }
+    return $previous || 0;
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for query sequence
+
+
+=cut
+
+sub query_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{QUERY_SEQ};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{QUERY_SEQ} = $value;
+        # do some housekeeping so we know when to
+        # re-run _calculate_seq_positions
+        $self->{'_sequenceschanged'} = 1;
+    }
+    return $previous;
+}
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for hit sequence
+
+
+=cut
+
+sub hit_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{HIT_SEQ};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{HIT_SEQ} = $value;
+        # do some housekeeping so we know when to
+        # re-run _calculate_seq_positions
+        $self->{'_sequenceschanged'} = 1;
+    }
+    return $previous;
+}
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : [optional] string to set for homology sequence
+
+=cut
+
+sub homology_string{
+    my ($self,$value) = @_;
+    my $previous = $self->{HOMOLOGY_SEQ};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{HOMOLOGY_SEQ} = $value;
+        # do some housekeeping so we know when to
+        # re-run _calculate_seq_positions
+        $self->{'_sequenceschanged'} = 1;
+    }
+    return $previous;
+}
+
+=head2 length
+
+ Title    : length
+ Usage    : my $len = $hsp->length( ['query'|'hit'|'total'] );
+ Function : Returns the length of the query or hit in the alignment
+            (without gaps)
+            or the aggregate length of the HSP (including gaps;
+            this may be greater than either hit or query )
+ Returns  : integer
+ Args     : arg 1: 'query' = length of query seq (without gaps)
+                   'hit'   = length of hit seq (without gaps) (synonyms: sbjct, subject)
+                   'total' = length of alignment (with gaps)
+                   default = 'total'
+            arg 2: [optional] integer length value to set for specific type
+
+=cut
+
+sub length {
+
+    my $self = shift;
+    my $type = shift;
+
+    $type = 'total' unless defined $type;
+    $type = lc $type;
+
+    if( $type =~ /^q/i ) {
+        return $self->query()->length(shift);
+    } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
+        return $self->hit()->length(shift);
+    } else {
+        my $v = shift;
+        if( defined $v ) {
+            $self->{HSP_LENGTH} = $v;
+        }
+        return $self->{HSP_LENGTH};
+   }
+    return 0; # should never get here
+}
+
+=head2 hsp_length
+
+ Title   : hsp_length
+ Usage   : my $len = $hsp->hsp_length()
+ Function: shortcut  length('hsp')
+ Returns : floating point between 0 and 100
+ Args    : none
+
+=cut
+
+sub hsp_length { return shift->length('hsp', shift); }
+
+=head2 percent_identity
+
+ Title   : percent_identity
+ Usage   : my $percentid = $hsp->percent_identity()
+ Function: Returns the calculated percent identity for an HSP
+ Returns : floating point between 0 and 100
+ Args    : none
+
+
+=cut
+
+sub percent_identity {
+    my $self = shift;
+
+    unless ($self->{_did_prepi}) {
+        $self->_pre_pi;
+    }
+
+    return $self->SUPER::percent_identity(@_);
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $hsp->frame($queryframe,$subjectframe)
+ Function: Set the Frame for both query and subject and insure that
+           they agree.
+           This overrides the frame() method implementation in
+           FeaturePair.
+ Returns : array of query and subjects if return type wants an array
+           or query frame if defined or subject frame
+ Args    : none
+ Note    : Frames are stored in the GFF way (0-2) not 1-3
+           as they are in BLAST (negative frames are deduced by checking
+                                 the strand of the query or hit)
+
+=cut
+
+sub frame {
+    my $self = shift;
+
+    unless (defined $self->{_did_preframe}) {
+        $self->_pre_frame;
+    }
+    my $qframe = $self->{QUERY_FRAME};
+    my $sframe = $self->{HIT_FRAME};
+
+    if( defined $qframe ) {
+        if( $qframe == 0 ) {
+            $qframe = 0;
+        } elsif( $qframe !~ /^([+-])?([1-3])/ ) {
+            $self->warn("Specifying an invalid query frame ($qframe)");
+            $qframe = undef;
+        } else {
+            my $dir = $1;
+            $dir = '+' unless defined $dir;
+            if( ($dir eq '-' && $self->query->strand >= 0) ||
+                ($dir eq '+' && $self->query->strand <= 0) ) {
+                $self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
+            }
+            # Set frame to GFF [0-2] -
+            # what if someone tries to put in a GFF frame!
+            $qframe = $2 - 1;
+        }
+        $self->query->frame($qframe);
+    }
+    if( defined $sframe ) {
+          if( $sframe == 0 ) {
+            $sframe = 0;
+          } elsif( $sframe !~ /^([+-])?([1-3])/ ) {
+            $self->warn("Specifying an invalid subject frame ($sframe)");
+            $sframe = undef;
+          } else {
+              my $dir = $1;
+              $dir = '+' unless defined $dir;
+              if( ($dir eq '-' && $self->hit->strand >= 0) ||
+                  ($dir eq '+' && $self->hit->strand <= 0) )
+              {
+                  $self->warn("Subject frame ($sframe) did not match strand of subject (". $self->hit->strand() . ")");
+              }
+
+              # Set frame to GFF [0-2]
+              $sframe = $2 - 1;
+          }
+          $self->hit->frame($sframe);
+      }
+    if (wantarray() && $self->algorithm =~ /^T(BLAST|FAST)(X|Y|XY)/oi)
+    {
+        return ($self->query->frame(), $self->hit->frame());
+    } elsif (wantarray())  {
+        ($self->query->frame() &&
+         return ($self->query->frame(), undef)) ||
+             ($self->hit->frame() &&
+              return (undef, $self->hit->frame()));
+    } else {
+        ($self->query->frame() &&
+         return $self->query->frame()) ||
+        ($self->hit->frame() &&
+         return $self->hit->frame());
+    }
+}
+
+
+=head2 get_aln
+
+ Title   : get_aln
+ Usage   : my $aln = $hsp->gel_aln
+ Function: Returns a L<Bio::SimpleAlign> object representing the HSP alignment
+ Returns : L<Bio::SimpleAlign>
+ Args    : none
+
+=cut
+
+sub get_aln {
+    my ($self) = @_;
+    require Bio::LocatableSeq;
+    require Bio::SimpleAlign;
+    my $aln = new Bio::SimpleAlign;
+    my $hs = $self->hit_string();
+    my $qs = $self->query_string();
+    # FASTA specific stuff moved to the FastaHSP object
+    my $seqonly = $qs;
+    $seqonly =~ s/[\-\s]//g;
+    my ($q_nm,$s_nm) = ($self->query->seq_id(),
+                        $self->hit->seq_id());
+    unless( defined $q_nm && CORE::length ($q_nm) ) {
+        $q_nm = 'query';
+    }
+    unless( defined $s_nm && CORE::length ($s_nm) ) {
+        $s_nm = 'hit';
+    }
+    my $query = new Bio::LocatableSeq('-seq'   => $qs,
+                                      '-id'    => $q_nm,
+                                      '-start' => $self->query->start,
+                                      '-end'   => $self->query->end,
+                                      );
+    $seqonly = $hs;
+    $seqonly =~ s/[\-\s]//g;
+    my $hit =  new Bio::LocatableSeq('-seq'    => $hs,
+                                      '-id'    => $s_nm,
+                                      '-start' => $self->hit->start,
+                                      '-end'   => $self->hit->end,
+                                      );
+    $aln->add_seq($query);
+    $aln->add_seq($hit);
+    return $aln;
+}
+
+=head2 num_conserved
+
+ Title   : num_conserved
+ Usage   : $obj->num_conserved($newval)
+ Function: returns the number of conserved residues in the alignment
+ Returns : inetger
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_conserved{
+    my ($self,$value) = @_;
+
+    unless ($self->{_did_presimilar}) {
+        $self->_pre_similar_stats;
+    }
+
+    if (defined $value) {
+        $self->{CONSERVED} = $value;
+    }
+    return $self->{CONSERVED};
+}
+
+=head2 num_identical
+
+ Title   : num_identical
+ Usage   : $obj->num_identical($newval)
+ Function: returns the number of identical residues in the alignment
+ Returns : integer
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_identical{
+   my ($self,$value) = @_;
+
+   unless ($self->{_did_presimilar}) {
+        $self->_pre_similar_stats;
+    }
+
+   if( defined $value) {
+       $self->{IDENTICAL} = $value;
+   }
+   return $self->{IDENTICAL};
+}
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+sub rank {
+    my ($self,$value) = @_;
+    if( defined $value) {
+        $self->{RANK} = $value;
+    }
+    return $self->{RANK};
+}
+
+
+=head2 seq_inds
+
+ Title   : seq_inds
+ Purpose   : Get a list of residue positions (indices) for all identical
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+           :  ('sbjct' is synonymous with 'hit')
+           : class     = 'identical' or 'conserved' or 'nomatch' or 'gap'
+           :              (default = identical)
+           :              (can be shortened to 'id' or 'cons')
+           :             or 'conserved-not-identical'
+           : collapse  = boolean, if true, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
+           :             collapses to "1-5 7 9-11". This is useful for
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  :
+
+See Also   : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>,
+             L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub seq_inds{
+   my ($self, $seqType, $class, $collapse) = @_;
+
+   # prepare the internal structures - this is cached so
+   # if the strings have not changed we're okay
+   $self->_calculate_seq_positions();
+
+   $seqType  ||= 'query';
+   $class ||= 'identical';
+   $collapse ||= 0;
+   $seqType = 'sbjct' if $seqType eq 'hit';
+   my $t = lc(substr($seqType,0,1));
+   if( $t eq 'q' ) {
+       $seqType = 'query';
+   } elsif ( $t eq 's' || $t eq 'h' ) {
+       $seqType = 'sbjct';
+   } else {
+       $self->warn("unknown seqtype $seqType using 'query'");
+       $seqType = 'query';
+   }
+   $t = lc(substr($class,0,1));
+
+   if( $t eq 'c' ) {
+       if( $class =~ /conserved\-not\-identical/ ) {
+	   $class = 'conserved';
+       } else {
+	   $class = 'conservedall';
+       }
+   } elsif( $t eq 'i' ) {
+       $class = 'identical';
+   } elsif( $t eq 'n' ) {
+       $class = 'nomatch';
+   } elsif( $t eq 'g' ) {
+       $class = 'gap';
+   } else {
+       $self->warn("unknown sequence class $class using 'identical'");
+       $class = 'identical';
+   }
+
+   ## Sensitive to member name changes.
+   $seqType  = "_\L$seqType\E";
+   $class = "_\L$class\E";
+   my @ary;
+
+   if( $class eq '_gap' ) {
+       # this means that we are remapping the gap length that is stored
+       # in the hash (for example $self->{'_gapRes_query'} )
+       # so we'll return an array which has the values of the position of the
+       # of the gap (the key in the hash) + the gap length (value in the
+       # hash for this key - 1.
+
+       @ary = map { $_ > 1 ?
+                        $_..($_ + $self->{"${class}Res$seqType"}->{$_} - 1) :
+                        $_ }
+              sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+   } elsif( $class eq '_conservedall' ) {
+       @ary = sort { $a <=> $b }
+       keys %{ $self->{"_conservedRes$seqType"}},
+       keys %{ $self->{"_identicalRes$seqType"}},
+   }  else {
+       @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+   }
+   require Bio::Search::BlastUtils if $collapse;
+
+   return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary;
+}
+
+
+=head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
+
+These methods come from L<Bio::SeqFeature::SimilarityPair>
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : L<Bio::SeqFeature::Similarity>
+ Args    : [optional] new value to set
+
+=cut
+
+sub query {
+    my $self = shift;
+    unless ($self->{_created_qff}) {
+        $self->_query_seq_feature;
+    }
+    return $self->SUPER::query(@_);
+}
+
+sub feature1 {
+    my $self = shift;
+    if (! $self->{_finished_new} || $self->{_making_qff}) {
+        return $self->{_sim1} if $self->{_sim1};
+        $self->{_sim1} = Bio::SeqFeature::Similarity->new();
+        return $self->{_sim1};
+    }
+    unless ($self->{_created_qff}) {
+        $self->_query_seq_feature;
+    }
+    return $self->SUPER::feature1(@_);
+}
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : L<Bio::SeqFeature::Similarity>
+ Args    : [optional] new value to set
+
+=cut
+
+sub hit {
+    my $self = shift;
+    unless ($self->{_created_sff}) {
+        $self->_subject_seq_feature;
+    }
+    return $self->SUPER::hit(@_);
+}
+
+sub feature2 {
+    my $self = shift;
+    if (! $self->{_finished_new} || $self->{_making_sff}) {
+        return $self->{_sim2} if $self->{_sim2};
+        $self->{_sim2} = Bio::SeqFeature::Similarity->new();
+        return $self->{_sim2};
+    }
+    unless ($self->{_created_sff}) {
+        $self->_subject_seq_feature;
+    }
+    return $self->SUPER::feature2(@_);
+}
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: Get/Set the significance value
+ Returns : numeric
+ Args    : [optional] new value to set
+
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $obj->score();
+           $obj->score($value);
+ Function: Get/Set the score value
+ Returns : numeric
+ Args    : [optional] new value to set
+
+
+=head2 bits
+
+ Title   : bits
+ Usage   : $bits = $obj->bits();
+           $obj->bits($value);
+ Function: Get/Set the bits value
+ Returns : numeric
+ Args    : [optional] new value to set
+
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $hsp->strand('query')
+ Function: Retrieves the strand for the HSP component requested
+ Returns : +1 or -1 (0 if unknown)
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
+           'query' to retrieve the query strand (default)
+
+=cut
+
+=head1 Private methods
+
+=cut
+
+=head2 _calculate_seq_positions
+
+ Title   : _calculate_seq_positions
+ Usage   : $self->_calculate_seq_positions
+ Function: Internal function
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _calculate_seq_positions {
+    my ($self, at args) = @_;
+    return unless ( $self->{'_sequenceschanged'} );
+    $self->{'_sequenceschanged'} = 0;
+    my ($mchar, $schar, $qchar);
+    my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
+                                     $self->query_string(),
+                                     $self->hit_string() );
+
+    # Using hashes to avoid saving duplicate residue numbers.
+    my %identicalList_query = ();
+    my %identicalList_sbjct = ();
+    my %conservedList_query = ();
+    my %conservedList_sbjct = ();
+
+    my %gapList_query = ();
+    my %gapList_sbjct = ();
+    my %nomatchList_query = ();
+    my %nomatchList_sbjct = ();
+
+    my $qdir = $self->query->strand || 1;
+    my $sdir = $self->hit->strand || 1;
+    my $resCount_query = ($qdir >=0) ? $self->query->end : $self->query->start;
+    my $resCount_sbjct = ($sdir >=0) ? $self->hit->end : $self->hit->start;
+
+    my $prog = $self->algorithm;
+    if( $prog  =~ /FAST|SSEARCH|SMITH-WATERMAN/i ) {
+        # fasta reports some extra 'regional' sequence information
+        # we need to clear out first
+        # this seemed a bit insane to me at first, but it appears to
+        # work --jason
+
+        # we infer the end of the regional sequence where the first
+        # non space is in the homology string
+        # then we use the HSP->length to tell us how far to read
+        # to cut off the end of the sequence
+
+        # one possible problem is the sequence which
+
+        my ($start) = (0);
+        if( $seqString =~ /^(\s+)/ ) {
+            $start = CORE::length($1);
+        }
+
+        $seqString = substr($seqString, $start,$self->length('total'));
+        $qseq = substr($qseq, $start,$self->length('total'));
+        $sseq = substr($sseq, $start,$self->length('total'));
+
+        $qseq =~ s![\\\/]!!g;
+        $sseq =~ s![\\\/]!!g;
+    }
+
+    if($prog =~ /^(PSI)?T(BLAST|FAST)N/oi ) {
+	$resCount_sbjct = int($resCount_sbjct / 3);
+    } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi  ) {
+	$resCount_query = int($resCount_query / 3);
+    } elsif($prog =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
+	$resCount_query = int($resCount_query / 3);
+	$resCount_sbjct = int($resCount_sbjct / 3);
+    }
+    while( $mchar = chop($seqString) ) {
+	($qchar, $schar) = (chop($qseq), chop($sseq));
+	if( $mchar eq '+' || $mchar eq '.' || $mchar eq ':' ) {
+	    $conservedList_query{ $resCount_query } = 1;
+	    $conservedList_sbjct{ $resCount_sbjct } = 1;
+	} elsif( $mchar ne ' ' ) {
+	    $identicalList_query{ $resCount_query } = 1;
+	    $identicalList_sbjct{ $resCount_sbjct } = 1;
+	} elsif( $mchar eq ' ') {
+	    $nomatchList_query{ $resCount_query } = 1;
+	    $nomatchList_sbjct{ $resCount_sbjct } = 1;
+	}
+	if( $qchar eq $GAP_SYMBOL ) {
+	    $gapList_query{ $resCount_query } ++;
+	} else {
+	    $resCount_query -= $qdir;
+	}
+	if( $schar eq $GAP_SYMBOL ) {
+	    $gapList_sbjct{ $resCount_query } ++;
+	} else {
+	    $resCount_sbjct -=$sdir;
+	}
+    }
+    $self->{'_identicalRes_query'} = \%identicalList_query;
+    $self->{'_conservedRes_query'} = \%conservedList_query;
+    $self->{'_nomatchRes_query'}   = \%nomatchList_query;
+    $self->{'_gapRes_query'}       = \%gapList_query;
+
+    $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
+    $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
+    $self->{'_nomatchRes_sbjct'}   = \%nomatchList_sbjct;
+    $self->{'_gapRes_sbjct'}       = \%gapList_sbjct;
+    return 1;
+}
+
+=head2 n
+
+See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
+
+=cut
+
+sub n {
+    my $self = shift;
+    if(@_) { $self->{'_n'} = shift; }
+    defined $self->{'_n'} ? $self->{'_n'} : '';
+}
+
+=head2 range
+
+See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
+
+=cut
+
+sub range {
+    my ($self, $seqType) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my ($start, $end);
+    if( $seqType eq 'query' ) {
+        $start = $self->query->start;
+        $end = $self->query->end;
+    }
+    else {
+        $start = $self->hit->start;
+        $end = $self->hit->end;
+    }
+    return ($start, $end);
+}
+
+
+=head2 links
+
+ Title   : links
+ Usage   : $obj->links($newval)
+ Function: Get/Set the Links value (from WU-BLAST)
+           Indicates the placement of the alignment in the group of HSPs
+ Returns : Value of links
+ Args    : On set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub links{
+    my $self = shift;
+
+    return $self->{LINKS} = shift if @_;
+    return $self->{LINKS};
+}
+
+=head2 hsp_group
+
+ Title   : hsp_group
+ Usage   : $obj->hsp_group($newval)
+ Function: Get/Set the Group value (from WU-BLAST)
+           Indicates a grouping of HSPs
+ Returns : Value of group
+ Args    : On set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub hsp_group {
+    my $self = shift;
+
+    return $self->{HSP_GROUP} = shift if @_;
+    return $self->{HSP_GROUP};
+}
+
+# The cigar string code is written by Juguang Xiao <juguang at fugu-sg.org>
+
+=head1 Brief introduction on cigar string
+
+NOTE: the concept is originally from EnsEMBL docs at
+http://may2005.archive.ensembl.org/Docs/wiki/html/EnsemblDocs/CigarFormat.html
+Please append to these docs if you have a better definition.
+
+Sequence alignment hits can be stored in a database as ungapped alignments.
+This imposes 2 major constraints on alignments:
+
+a) alignments for a single hit record require multiple rows in the database,
+and
+b) it is not possible to accurately retrieve the exact original alignment.
+
+Alternatively, sequence alignments can be stored as gapped alignments using
+the CIGAR line format (where CIGAR stands for Concise Idiosyncratic Gapped
+Alignment Report).
+
+In the cigar line format alignments are stored as follows:
+
+M: Match
+D: Deletion
+I: Insertion
+
+An example of an alignment for a hypthetical protein match is shown below:
+
+
+Query:   42 PGPAGLP----GSVGLQGPRGLRGPLP-GPLGPPL...
+
+            PG    P    G     GP   R      PLGP
+
+Sbjct: 1672 PGTP*TPLVPLGPWVPLGPSSPR--LPSGPLGPTD...
+
+
+protein_align_feature table as the following cigar line:
+
+7M4D12M2I2MD7M
+
+=head2 cigar_string
+
+  Name:     cigar_string
+  Usage:    $cigar_string = $hsp->cigar_string
+  Function: Generate and return cigar string for this HSP alignment
+  Args:     No input needed
+  Return:   a cigar string
+
+=cut
+
+
+sub cigar_string {
+    my ($self, $arg) = @_;
+    $self->warn("this is not a setter") if(defined $arg);
+
+    unless(defined $self->{_cigar_string}){ # generate cigar string
+        my $cigar_string = $self->generate_cigar_string($self->query_string, $self->hit_string);
+        $self->{_cigar_string} = $cigar_string;
+    } # end of unless
+
+    return $self->{_cigar_string};
+}
+
+=head2 generate_cigar_string
+
+  Name:     generate_cigar_string
+  Usage:    my $cigar_string = Bio::Search::HSP::GenericHSP::generate_cigar_string ($qstr, $hstr);
+  Function: generate cigar string from a simple sequence of alignment.
+  Args:     the string of query and subject
+  Return:   cigar string
+
+=cut
+
+sub generate_cigar_string {
+    my ($self, $qstr, $hstr) = @_;
+    my @qchars = split //, $qstr;
+    my @hchars = split //, $hstr;
+
+    unless(scalar(@qchars) == scalar(@hchars)){
+        $self->throw("two sequences are not equal in lengths");
+    }
+
+    $self->{_count_for_cigar_string} = 0;
+    $self->{_state_for_cigar_string} = 'M';
+
+    my $cigar_string = '';
+    for(my $i=0; $i <= $#qchars; $i++){
+        my $qchar = $qchars[$i];
+        my $hchar = $hchars[$i];
+        if($qchar ne $GAP_SYMBOL && $hchar ne $GAP_SYMBOL){ # Match
+            $cigar_string .= $self->_sub_cigar_string('M');
+        }elsif($qchar eq $GAP_SYMBOL){ # Deletion
+            $cigar_string .= $self->_sub_cigar_string('D');
+        }elsif($hchar eq $GAP_SYMBOL){ # Insertion
+            $cigar_string .= $self->_sub_cigar_string('I');
+        }else{
+            $self->throw("Impossible state that 2 gaps on each seq aligned");
+        }
+    }
+    $cigar_string .= $self->_sub_cigar_string('X'); # not forget the tail.
+    return $cigar_string;
+}
+
+# an internal method to help generate cigar string
+
+sub _sub_cigar_string {
+    my ($self, $new_state) = @_;
+
+    my $sub_cigar_string = '';
+    if($self->{_state_for_cigar_string} eq $new_state){
+        $self->{_count_for_cigar_string} += 1; # Remain the state and increase the counter
+    }else{
+        $sub_cigar_string .= $self->{_count_for_cigar_string}
+            unless $self->{_count_for_cigar_string} == 1;
+        $sub_cigar_string .= $self->{_state_for_cigar_string};
+        $self->{_count_for_cigar_string} = 1;
+        $self->{_state_for_cigar_string} = $new_state;
+    }
+    return $sub_cigar_string;
+}
+
+
+
+# needed before seqfeatures can be made
+sub _pre_seq_feature {
+    my $self = shift;
+    my $algo = $self->{ALGORITHM};
+
+    my ($queryfactor, $hitfactor) = (0,0);
+    if( $algo =~ /^(PSI)?T(BLAST|FAST|SW)[NY]/oi ) {
+        $hitfactor = 1;
+    }
+    elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi || $algo =~ /^P?GENEWISE/oi ) {
+        $queryfactor = 1;
+    }
+    elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi || $algo =~ /^(BLAST|FAST|SW)N/oi || $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SMITH\-WATERMAN|SIM4$/){
+        $hitfactor = 1;
+        $queryfactor = 1;
+    }
+    elsif ($algo =~ /^RPS-BLAST/) {
+        $queryfactor = ($algo =~ /^RPS-BLAST\(BLASTX\)/) ? 1 : 0;
+        $hitfactor = 0;
+    }
+    $self->{_query_factor} = $queryfactor;
+    $self->{_hit_factor} = $hitfactor;
+}
+
+# make query seq feature
+sub _query_seq_feature {
+    my $self = shift;
+    $self->{_making_qff} = 1;
+    my $qs = $self->{QUERY_START};
+    my $qe = $self->{QUERY_END};
+    unless (defined $self->{_query_factor}) {
+        $self->_pre_seq_feature;
+    }
+    my $queryfactor = $self->{_query_factor};
+
+    unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin"); }
+
+    my $strand;
+    if ($qe > $qs) {  # normal query: start < end
+        if ($queryfactor) {
+            $strand = 1;
+        }
+        else {
+            $strand = undef;
+        }
+    }
+    else {
+        if ($queryfactor) {
+            $strand = -1;
+        }
+        else {
+            $strand = undef;
+        }
+        ($qs,$qe) = ($qe,$qs);
+    }
+
+    # Note: many of these data are not query- and hit-specific.
+    # Only start, end, name, length are.
+    # We could be more efficient by only storing this info once.
+    # steve chervitz --- Sat Apr  5 00:55:07 2003
+
+    my $sim1 = $self->{_sim1} || Bio::SeqFeature::Similarity->new();
+    $sim1->start($qs);
+    $sim1->end($qe);
+    $sim1->significance($self->{EVALUE});
+    $sim1->bits($self->{BITS});
+    $sim1->score($self->{SCORE});
+    $sim1->strand($strand);
+    $sim1->seq_id($self->{QUERY_NAME});
+    $sim1->seqlength($self->{QUERY_LENGTH});
+    $sim1->source_tag($self->{ALGORITHM});
+    $sim1->seqdesc($self->{QUERY_DESC});
+    
+    $self->SUPER::feature1($sim1);
+
+    # to determine frame from something like FASTXY which doesn't
+    # report the frame
+    my $qframe = $self->{QUERY_FRAME};
+    if (defined $strand && ! defined $qframe && $queryfactor) {
+        $qframe = ( $self->query->start % 3 ) * $strand;
+    }
+    elsif (! defined $strand) {
+        $qframe = 0;
+    }
+    $self->{QUERY_FRAME} = $qframe;
+
+    $self->{_created_qff} = 1;
+    $self->{_making_qff} = 0;
+    $self->_pre_frame;
+}
+
+# make subject seq feature
+sub _subject_seq_feature {
+    my $self = shift;
+    $self->{_making_sff} = 1;
+    my $hs = $self->{HIT_START};
+    my $he = $self->{HIT_END};
+    unless (defined $self->{_hit_factor}) {
+        $self->_pre_seq_feature;
+    }
+    my $hitfactor = $self->{_hit_factor};
+
+    unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
+
+    my $strand;
+    if ($he > $hs) { # normal subject
+        if ($hitfactor) {
+            $strand = 1;
+        }
+        else {
+            $strand = undef;
+        }
+    }
+    else {
+        if ($hitfactor) {
+            $strand = -1;
+        }
+        else {
+            $strand = undef;
+        }
+        ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
+    }
+
+    my $sim2 = $self->{_sim2} || Bio::SeqFeature::Similarity->new();
+    $sim2->start($hs);
+    $sim2->end($he);
+    $sim2->significance($self->{EVALUE});
+    $sim2->bits($self->{BITS});
+    $sim2->score($self->{SCORE});
+    $sim2->strand($strand);
+    $sim2->seq_id($self->{HIT_NAME});
+    $sim2->seqlength($self->{HIT_LENGTH});
+    $sim2->source_tag($self->{ALGORITHM});
+    $sim2->seqdesc($self->{HIT_DESC});
+    $self->SUPER::feature2($sim2);
+
+    my $hframe = $self->{HIT_FRAME};
+    if (defined $strand && ! defined $hframe && $hitfactor) {
+        $hframe = ( $hs % 3 ) * $strand;
+    }
+    elsif (! defined $strand) {
+        $hframe = 0;
+    }
+    $self->{HIT_FRAME} = $hframe;
+
+    $self->{_created_sff} = 1;
+    $self->{_making_sff} = 0;
+    $self->_pre_frame;
+}
+
+# know the frame following seq feature creation
+sub _pre_frame {
+    my $self = shift;
+    $self->{_created_qff} || $self->_query_seq_feature;
+    $self->{_created_sff} || $self->_subject_seq_feature;
+    $self->{_did_preframe} = 1;
+    $self->frame;
+}
+
+# before calling the num_* methods
+sub _pre_similar_stats {
+    my $self = shift;
+    my $identical = $self->{IDENTICAL};
+    my $conserved = $self->{CONSERVED};
+    my $percent_id = $self->{PERCENT_IDENTITY};
+
+    if (! defined $identical) {
+        if (! defined $percent_id) {
+            $self->warn("Did not defined the number of identical matches or overall percent identity in the HSP assuming 0");
+            $identical = 0;
+        }
+        else {
+            $identical = int($percent_id * $self->{HSP_LENGTH});
+        }
+    }
+
+    if (! defined $conserved) {
+        $self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)")
+            if( $self->{ALGORITHM} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
+        $conserved = $identical;
+    }
+    $self->{IDENTICAL} = $identical;
+    $self->{CONSERVED} = $conserved;
+    $self->{_did_presimilar} = 1;
+}
+
+# before calling the frac_* methods
+sub _pre_frac {
+    my $self = shift;
+    my $hsp_len = $self->{HSP_LENGTH};
+    my $hit_len = $self->{HIT_LENGTH};
+    my $query_len = $self->{QUERY_LENGTH};
+
+    my $identical = $self->num_identical;
+    my $conserved = $self->num_conserved;
+
+    $self->{_did_prefrac} = 1;
+    my $logical;
+    if( $hsp_len ) {
+        $self->length('total', $hsp_len);
+        $logical = $self->_logical_length('total');
+        $self->frac_identical( 'total', $identical / $hsp_len);
+        $self->frac_conserved( 'total', $conserved / $hsp_len);
+    }
+    if( $hit_len ) {
+        $logical = $self->_logical_length('hit');
+        $self->frac_identical( 'hit', $identical / $logical);
+        $self->frac_conserved( 'hit', $conserved / $logical);
+    }
+    if( $query_len ) {
+        $logical = $self->_logical_length('query');
+        $self->frac_identical( 'query', $identical / $logical) ;
+        $self->frac_conserved( 'query', $conserved / $logical);
+    }
+}
+
+# before calling gaps()
+sub _pre_gaps {
+    my $self = shift;
+    my $query_gaps = $self->{QUERY_GAPS};
+    my $query_seq = $self->{QUERY_SEQ};
+    my $hit_gaps = $self->{HIT_GAPS};
+    my $hit_seq = $self->{HIT_SEQ};
+    my $gaps = $self->{HSP_GAPS};
+
+    $self->{_did_pregaps} = 1; # well, we're in the process; avoid recursion
+    if( defined $query_gaps ) {
+        $self->gaps('query', $query_gaps);
+    } elsif( defined $query_seq ) {
+        $self->gaps('query', scalar ( $query_seq =~ tr/\-//));
+    }
+    if( defined $hit_gaps ) {
+        $self->gaps('hit', $hit_gaps);
+    } elsif( defined $hit_seq ) {
+        $self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
+    }
+    if( ! defined $gaps ) {
+        $gaps = $self->gaps("query") + $self->gaps("hit");
+    }
+    $self->gaps('total', $gaps);
+}
+
+# before percent_identity
+sub _pre_pi {
+    my $self = shift;
+    $self->{_did_prepi} = 1;
+    $self->percent_identity($self->{PERCENT_IDENTITY} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH} > 0 );
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HMMERHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HMMERHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HMMERHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,385 @@
+# $Id: HMMERHSP.pm,v 1.8.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::HMMERHSP
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::HMMERHSP - A HSP object for HMMER results
+
+=head1 SYNOPSIS
+
+    use Bio::Search::HSP::HMMERHSP;
+    # us it just like a Bio::Search::HSP::GenericHSP object
+
+=head1 DESCRIPTION
+
+This object is a specialization of L<Bio::Search::HSP::GenericHSP>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::HMMERHSP;
+use strict;
+
+use base qw(Bio::Search::HSP::GenericHSP);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::HMMERHSP();
+ Function: Builds a new Bio::Search::HSP::HMMERHSP object 
+ Returns : Bio::Search::HSP::HMMERHSP
+ Args    :
+
+Plus Bio::Seach::HSP::GenericHSP methods
+
+           -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
+           -evalue    => evalue
+           -pvalue    => pvalue
+           -bits      => bit value for HSP
+           -score     => score value for HSP (typically z-score but depends on
+					      analysis)
+           -hsp_length=> Length of the HSP (including gaps)
+           -identical => # of residues that that matched identically
+           -conserved => # of residues that matched conservatively 
+                           (only protein comparisions; 
+			    conserved == identical in nucleotide comparisons)
+           -hsp_gaps   => # of gaps in the HSP
+           -query_gaps => # of gaps in the query in the alignment
+           -hit_gaps   => # of gaps in the subject in the alignment    
+           -query_name  => HSP Query sequence name (if available)
+           -query_start => HSP Query start (in original query sequence coords)
+           -query_end   => HSP Query end (in original query sequence coords)
+           -hit_name    => HSP Hit sequence name (if available)
+           -hit_start   => HSP Hit start (in original hit sequence coords)
+           -hit_end     => HSP Hit end (in original hit sequence coords)
+           -hit_length  => total length of the hit sequence
+           -query_length=> total length of the query sequence
+           -query_seq   => query sequence portion of the HSP
+           -hit_seq     => hit sequence portion of the HSP
+           -homology_seq=> homology sequence for the HSP
+           -hit_frame   => hit frame (only if hit is translated protein)
+           -query_frame => query frame (only if query is translated protein)
+
+=cut
+
+=head2 Bio::Search::HSP::HSPI methods
+
+Implementation of Bio::Search::HSP::HSPI methods follow
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the HSP
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP or undef 
+ Returns : float or exponential (2e-10)
+           P-value is not defined with NCBI Blast2 reports.
+ Args    : [optional] numeric to set value
+
+=cut
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : my $evalue = $hsp->evalue();
+ Function: Returns the e-value for this HSP
+ Returns : float or exponential (2e-10)
+ Args    : [optional] numeric to set value
+
+=cut
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+ Function: Returns the fraction of identitical positions for this HSP 
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1:  'query' = num identical / length of query seq (without gaps)
+                   'hit'   = num identical / length of hit seq (without gaps)
+                   'total' = num identical / length of alignment (with gaps)
+                   default = 'total' 
+           arg 2: [optional] frac identical value to set for the type requested
+
+=cut
+
+=head2 frac_conserved
+
+ Title    : frac_conserved
+ Usage    : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+ Function : Returns the fraction of conserved positions for this HSP.
+            This is the fraction of symbols in the alignment with a 
+            positive score.
+ Returns : Float in range 0.0 -> 1.0
+ Args    : arg 1: 'query' = num conserved / length of query seq (without gaps)
+                  'hit'   = num conserved / length of hit seq (without gaps)
+                  'total' = num conserved / length of alignment (with gaps)
+                  default = 'total' 
+           arg 2: [optional] frac conserved value to set for the type requested
+
+=cut
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : arg 1: 'query' = num gaps in query seq
+                   'hit'   = num gaps in hit seq
+                   'total' = num gaps in whole alignment 
+                   default = 'total' 
+            arg 2: [optional] integer gap value to set for the type requested
+
+=cut
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for query sequence
+
+
+=cut
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : [optional] string to set for hit sequence
+
+
+=cut
+
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the 
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : [optional] string to set for homology sequence
+
+=cut
+
+=head2 length
+
+ Title    : length
+ Usage    : my $len = $hsp->length( ['query'|'hit'|'total'] );
+ Function : Returns the length of the query or hit in the alignment 
+            (without gaps) 
+            or the aggregate length of the HSP (including gaps;
+            this may be greater than either hit or query )
+ Returns  : integer
+ Args     : arg 1: 'query' = length of query seq (without gaps)
+                   'hit'   = length of hit seq (without gaps)
+                   'total' = length of alignment (with gaps)
+                   default = 'total' 
+            arg 2: [optional] integer length value to set for specific type
+
+=cut
+
+=head2 percent_identity
+
+ Title   : percent_identity
+ Usage   : my $percentid = $hsp->percent_identity()
+ Function: Returns the calculated percent identity for an HSP
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+
+=cut
+
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $hsp->frame($queryframe,$subjectframe)
+ Function: Set the Frame for both query and subject and insure that
+           they agree.
+           This overrides the frame() method implementation in
+           FeaturePair.
+ Returns : array of query and subjects if return type wants an array
+           or query frame if defined or subject frame
+ Args    : none
+ Note    : Frames are stored in the GFF way (0-2) not 1-3
+           as they are in BLAST (negative frames are deduced by checking 
+				 the strand of the query or hit)
+
+=cut
+
+
+=head2 get_aln
+
+ Title   : get_aln
+ Usage   : my $aln = $hsp->gel_aln
+ Function: Returns a Bio::SimpleAlign representing the HSP alignment
+ Returns : Bio::SimpleAlign
+ Args    : none
+
+=cut
+
+sub get_aln {
+    my ($self) = shift;
+    $self->warn("Inappropriate to build a Bio::SimpleAlign from a HMMER HSP object");
+    return;
+}
+
+=head2 num_conserved
+
+ Title   : num_conserved
+ Usage   : $obj->num_conserved($newval)
+ Function: returns the number of conserved residues in the alignment
+ Returns : inetger
+ Args    : integer (optional)
+
+
+=cut
+
+=head2 num_identical
+
+ Title   : num_identical
+ Usage   : $obj->num_identical($newval)
+ Function: returns the number of identical residues in the alignment
+ Returns : integer
+ Args    : integer (optional)
+
+
+=cut
+
+=head2 seq_inds
+
+ Title   : seq_inds
+ Purpose   : Get a list of residue positions (indices) for all identical 
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers 
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+           :  ('sbjct' is synonymous with 'hit') 
+           : class     = 'identical' or 'conserved' or 'nomatch' or 'gap'
+           :              (default = identical)
+           :              (can be shortened to 'id' or 'cons')
+           :              
+           : collapse  = boolean, if true, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+           :             collapses to "1-5 7 9-11". This is useful for 
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : 
+
+See Also   : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
+
+=cut
+
+=head2 Inherited from Bio::SeqFeature::SimilarityPair
+
+These methods come from Bio::SeqFeature::SimilarityPair
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: Get/Set the significance value
+ Returns : numeric
+ Args    : [optional] new value to set
+
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score = $hsp->score();
+ Function: Returns the score for this HSP or undef 
+ Returns : numeric           
+ Args    : [optional] numeric to set value
+
+=cut 
+
+=head2 bits
+
+ Title   : bits
+ Usage   : my $bits = $hsp->bits();
+ Function: Returns the bit value for this HSP or undef 
+ Returns : numeric
+ Args    : none
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: HSPFactory.pm,v 1.8.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::HSPFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::HSPFactory - A factory to create Bio::Search::HSP::HSPI objects 
+
+=head1 SYNOPSIS
+
+    use Bio::Search::HSP::HSPFactory;
+    my $factory = new Bio::Search::HSP::HSPFactory();
+    my $resultobj = $factory->create(@args);
+
+=head1 DESCRIPTION
+
+
+This is a general way of hiding the object creation process so that we
+can dynamically change the objects that are created by the SearchIO
+parser depending on what format report we are parsing.
+
+This object is for creating new HSPs.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::HSPFactory;
+use vars qw($DEFAULT_TYPE);
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
+
+BEGIN { 
+    $DEFAULT_TYPE = 'Bio::Search::HSP::GenericHSP'; 
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::HSPFactory();
+ Function: Builds a new Bio::Search::HSP::HSPFactory object 
+ Returns : Bio::Search::HSP::HSPFactory
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($type) = $self->_rearrange([qw(TYPE)], at args);
+  $self->type($type) if defined $type;
+  return $self;
+}
+
+=head2 create
+
+ Title   : create
+ Usage   : $factory->create(%args)
+ Function: Create a new L<Bio::Search::HSP::HSPI> object  
+ Returns : L<Bio::Search::HSP::HSPI>
+ Args    : hash of initialization parameters
+
+
+=cut
+
+sub create{
+   my ($self, at args) = @_;
+   my $type = $self->type;
+   eval { $self->_load_module($type) };
+   if( $@ ) { $self->throw("Unable to load module $type"); }
+   return $type->new(@args);
+}
+
+
+=head2 type
+
+ Title   : type
+ Usage   : $factory->type('Bio::Search::HSP::GenericHSP');
+ Function: Get/Set the HSP creation type
+ Returns : string
+ Args    : [optional] string to set 
+
+=cut
+
+sub type{
+    my ($self,$type) = @_;
+    if( defined $type ) { 
+	# redundancy with the create method which also calls _load_module
+	# I know - but this is not a highly called object so I am going 
+	# to leave it in
+	eval {$self->_load_module($type) };
+	if( $@ ){ $self->warn("Cannot find module $type, unable to set type. $@") } 
+	else { $self->{'_type'} = $type; }
+    }
+    return $self->{'_type'} || $DEFAULT_TYPE;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HSPI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,745 @@
+#-----------------------------------------------------------------
+# $Id: HSPI.pm,v 1.36.4.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::HSPI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+# and Jason Stajich <jason at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::HSPI - Interface for a High Scoring Pair in a similarity search result
+
+=head1 SYNOPSIS
+
+    # Bio::Search::HSP::HSPI objects cannot be instantiated since this
+    # module defines a pure interface.
+
+    # Given an object that implements the Bio::Search::HSP::HSPI  interface,
+    # you can do the following things with it:
+
+    $r_type = $hsp->algorithm;
+
+    $pvalue = $hsp->pvalue();
+
+    $evalue = $hsp->evalue();
+
+    $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+
+    $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+
+    $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+
+    $qseq = $hsp->query_string;
+
+    $hseq = $hsp->hit_string;
+
+    $homology_string = $hsp->homology_string;
+
+    $len = $hsp->length( ['query'|'hit'|'total'] );
+
+    $rank = $hsp->rank;
+
+=head1 DESCRIPTION
+
+Bio::Search::HSP::HSPI objects cannot be instantiated since this
+module defines a pure interface.
+
+Given an object that implements the L<Bio::Search::HSP::HSPI> interface,
+you can do the following things with it:
+
+=head1 SEE ALSO
+
+This interface inherits methods from these other modules:
+
+L<Bio::SeqFeatureI>,
+L<Bio::SeqFeature::FeaturePair>
+L<Bio::SeqFeature::SimilarityPair>
+
+Please refer to these modules for documentation of the 
+many additional inherited methods.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz, Jason Stajich
+
+Email sac-at-bioperl.org
+Email jason-at-bioperl.org
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz, Jason Stajich. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::HSPI;
+
+
+use strict;
+use Carp;
+
+use base qw(Bio::SeqFeature::SimilarityPair Bio::Root::RootI);
+
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the HSP
+ Returns : string (e.g., BLASTP)
+ Args    : none
+
+=cut
+
+sub algorithm{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP or undef 
+ Returns : float or exponential (2e-10)
+           P-value is not defined with NCBI Blast2 reports.
+ Args    : none
+
+=cut
+
+sub pvalue {
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : my $evalue = $hsp->evalue();
+ Function: Returns the e-value for this HSP
+ Returns : float or exponential (2e-10)
+ Args    : none
+
+=cut
+
+sub evalue {
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+ Function: Returns the fraction of identitical positions for this HSP 
+ Returns : Float in range 0.0 -> 1.0
+ Args    : 'query' = num identical / length of query seq (without gaps)
+           'hit'   = num identical / length of hit seq (without gaps)
+           'total' = num identical / length of alignment (with gaps)
+           default = 'total' 
+
+=cut
+
+sub frac_identical {
+   my ($self, $type) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 frac_conserved
+
+ Title    : frac_conserved
+ Usage    : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+ Function : Returns the fraction of conserved positions for this HSP.
+            This is the fraction of symbols in the alignment with a 
+            positive score.
+ Returns : Float in range 0.0 -> 1.0
+ Args    : 'query' = num conserved / length of query seq (without gaps)
+           'hit'   = num conserved / length of hit seq (without gaps)
+           'total' = num conserved / length of alignment (with gaps)
+           default = 'total' 
+
+=cut
+
+sub frac_conserved {
+    my ($self, $type) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 num_identical
+
+ Title   : num_identical
+ Usage   : $obj->num_identical($newval)
+ Function: returns the number of identical residues in the alignment
+ Returns : integer
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_identical{
+    shift->throw_not_implemented;
+}
+
+=head2 num_conserved
+
+ Title   : num_conserved
+ Usage   : $obj->num_conserved($newval)
+ Function: returns the number of conserved residues in the alignment
+ Returns : inetger
+ Args    : integer (optional)
+
+
+=cut
+
+sub num_conserved{
+    shift->throw_not_implemented();
+}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : 'query' = num conserved / length of query seq (without gaps)
+            'hit'   = num conserved / length of hit seq (without gaps)
+            'total' = num conserved / length of alignment (with gaps)
+            default = 'total' 
+
+=cut
+
+sub gaps        {
+    my ($self, $type) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub query_string{
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub hit_string{
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the 
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : none
+
+=cut
+
+sub homology_string{
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 length
+
+ Title    : length
+ Usage    : my $len = $hsp->length( ['query'|'hit'|'total'] );
+ Function : Returns the length of the query or hit in the alignment (without gaps) 
+            or the aggregate length of the HSP (including gaps;
+            this may be greater than either hit or query )
+ Returns  : integer
+ Args     : 'query' = length of query seq (without gaps)
+            'hit'   = length of hit seq (without gaps)
+            'total' = length of alignment (with gaps)
+            default = 'total' 
+ Args    : none
+
+=cut
+
+sub length{
+    shift->throw_not_implemented();
+}
+
+=head2 percent_identity
+
+ Title   : percent_identity
+ Usage   : my $percentid = $hsp->percent_identity()
+ Function: Returns the calculated percent identity for an HSP
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+
+=cut
+
+sub percent_identity{
+   my ($self) = @_;
+   return $self->frac_identical('hsp') * 100;   
+}
+
+=head2 get_aln
+
+ Title   : get_aln
+ Usage   : my $aln = $hsp->get_aln
+ Function: Returns a Bio::SimpleAlign representing the HSP alignment
+ Returns : Bio::SimpleAlign
+ Args    : none
+
+=cut
+
+sub get_aln {
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+
+=head2 seq_inds
+
+ Title   : seq_inds
+ Purpose   : Get a list of residue positions (indices) for all identical 
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers 
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+              ('sbjct' is synonymous with 'hit') 
+             class     = 'identical' or 'conserved' or 'nomatch' or 'gap'
+                          (default = identical)
+                          (can be shortened to 'id' or 'cons')
+
+             collapse  = boolean, if true, consecutive positions are merged
+                         using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+                         collapses to "1-5 7 9-11". This is useful for 
+                         consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : 
+
+See Also   : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub seq_inds {
+    shift->throw_not_implemented();
+}
+
+=head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
+
+These methods come from L<Bio::SeqFeature::SimilarityPair>
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair)
+ Returns : significance value (scientific notation string)
+ Args    : significance value (sci notation string)
+
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score = $hsp->score();
+ Function: Returns the score for this HSP or undef 
+ Returns : numeric           
+ Args    : [optional] numeric to set value
+
+=head2 bits
+
+ Title   : bits
+ Usage   : my $bits = $hsp->bits();
+ Function: Returns the bit value for this HSP or undef 
+ Returns : numeric
+ Args    : none
+
+=cut
+
+# override 
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $hsp->strand('query')
+ Function: Retrieves the strand for the HSP component requested
+ Returns : +1 or -1 (0 if unknown)
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
+           'query' to retrieve the query strand (default)
+           'list' or 'array' to retreive both query and hit together
+
+=cut
+
+sub strand {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if( $val =~ /^q/i ) {
+	return $self->query->strand(@_);
+    } elsif( $val =~ /^hi|^s/i ) {
+	return $self->hit->strand(@_);
+    } elsif (  $val =~ /^list|array/i ) {
+	# do we really need to pass on additional arguments here? HL
+	# (formerly this was strand(shift) which is really bad coding because
+	# it breaks if the callee allows setting to undef)
+	return ($self->query->strand(@_), 
+		$self->hit->strand(@_) );
+    } else { 
+	$self->warn("unrecognized component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : $hsp->start('query')
+ Function: Retrieves the start for the HSP component requested
+ Returns : integer
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
+           'query' to retrieve the query start (default)
+
+=cut
+
+sub start {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if( $val =~ /^q/i ) { 
+        return $self->query->start(@_);
+    } elsif( $val =~ /^(hi|s)/i ) {
+	return $self->hit->start(@_);
+    } elsif (  $val =~ /^list|array/i ) {	
+	# do we really need to pass on additional arguments here? HL
+	# (formerly this was strand(shift) which is really bad coding because
+	# it breaks if the callee allows setting to undef)
+	return ($self->query->start(@_), 
+		$self->hit->start(@_) );
+    } else { 
+	$self->warn("unrecognized component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $hsp->end('query')
+ Function: Retrieves the end for the HSP component requested
+ Returns : integer
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
+           'query' to retrieve the query end (default)
+
+=cut
+
+sub end {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if( $val =~ /^q/i ) { 
+        return $self->query->end(@_);
+    } elsif( $val =~ /^(hi|s)/i ) {
+	return $self->hit->end(@_);
+    } elsif (  $val =~ /^list|array/i ) {	
+	# do we really need to pass on additional arguments here? HL
+	# (formerly this was strand(shift) which is really bad coding because
+	# it breaks if the callee allows setting to undef)
+	return ($self->query->end(@_), 
+		$self->hit->end(@_) );
+    } else {
+	$self->warn("unrecognized end component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 seq
+
+ Usage     : $hsp->seq( [seq_type] );
+ Purpose   : Get the query or sbjct sequence as a Bio::Seq.pm object.
+ Example   : $seqObj = $hsp->seq('query');
+ Returns   : Object reference for a Bio::Seq.pm object.
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query').
+           :  ('sbjct' is synonymous with 'hit') 
+           : default is 'query'
+ Throws    : Propagates any exception that occurs during construction
+           : of the Bio::Seq.pm object.
+ Comments  : The sequence is returned in an array of strings corresponding
+           : to the strings in the original format of the Blast alignment.
+           : (i.e., same spacing).
+
+See Also   : L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<Bio::Seq>
+
+=cut
+
+#-------
+sub seq {
+#-------
+    my($self,$seqType) = @_; 
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    my $str = $self->seq_str($seqType);
+    if( $seqType =~ /^(m|ho)/i ) {
+        $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
+    }
+    require Bio::LocatableSeq;
+    my $id = $seqType =~ /^q/i ? $self->query->seq_id : $self->hit->seq_id;
+    new Bio::LocatableSeq (-ID    => $id,
+                           -SEQ   => $str,
+                           -START => $self->start($seqType),
+                           -END   => $self->end($seqType),
+                           -STRAND=> $self->strand($seqType),
+                           -DESC  => "$seqType sequence ",
+                           );
+}
+
+=head2 seq_str
+
+ Usage     : $hsp->seq_str( seq_type );
+ Purpose   : Get the full query, sbjct, or 'match' sequence as a string.
+           : The 'match' sequence is the string of symbols in between the 
+           : query and sbjct sequences.
+ Example   : $str = $hsp->seq_str('query');
+ Returns   : String
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
+           :  ('sbjct' is synonymous with 'hit')
+           : default is 'query'
+ Throws    : Exception if the argument does not match an accepted seq_type.
+ Comments  : 
+
+See Also   : L<seq()|seq>, L<seq_inds()|seq_inds>, L<_set_match_seq()>
+
+=cut
+
+sub seq_str {  
+    my $self = shift;
+    my $type = shift || 'query';
+
+    if( $type =~ /^q/i ) { return $self->query_string(@_) }
+    elsif( $type =~ /^(s)|(hi)/i ) { return $self->hit_string(@_)}
+    elsif ( $type =~ /^(ho)|(ma)/i  ) { return $self->homology_string(@_) }
+    else { 
+        $self->warn("unknown sequence type $type");
+    }
+    return '';
+}
+
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+sub rank { shift->throw_not_implemented }
+
+=head2 matches
+
+ Usage     : $hsp->matches(-seq   => 'hit'|'query', 
+                           -start => $start, 
+                           -stop  => $stop);
+ Purpose   : Get the total number of identical and conservative matches 
+           : in the query or sbjct sequence for the given HSP. Optionally can
+           : report data within a defined interval along the seq.
+           : (Note: 'conservative' matches are called 'positives' in the
+           : Blast report.)
+ Example   : ($id,$cons) = $hsp_object->matches(-seq   => 'hit');
+           : ($id,$cons) = $hsp_object->matches(-seq   => 'query',
+                                                -start => 300,
+                                                -stop  => 400);
+ Returns   : 2-element array of integers 
+ Argument  : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
+           :  ('sbjct' is synonymous with 'hit') 
+           : (2) start = Starting coordinate (optional)
+           : (3) stop  = Ending coordinate (optional)
+ Throws    : Exception if the supplied coordinates are out of range.
+ Comments  : Relies on seq_str('match') to get the string of alignment symbols
+           : between the query and sbjct lines which are used for determining
+           : the number of identical and conservative matches.
+
+See Also   : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#-----------
+sub matches {
+#-----------
+    my( $self, %param ) = @_;
+    my(@data);
+    my($seqType, $beg, $end) = ($param{-SEQ}, 
+                                $param{-START}, 
+                                $param{-STOP});
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if( (!defined $beg && !defined $end) || ! $self->seq_str('match') ) {
+        ## Get data for the whole alignment.
+        push @data, ($self->num_identical, $self->num_conserved);
+    } else {
+        ## Get the substring representing the desired sub-section of aln.
+        $beg ||= 0;
+        $end ||= 0;
+        my($start,$stop) = $self->range($seqType);
+        if($beg == 0) { $beg = $start; $end = $beg+$end; } # sane?
+        elsif($end == 0) { $end = $stop; $beg = $end-$beg; } # sane?
+        
+        if($end > $stop) { $end = $stop; }
+        if($beg < $start) { $beg = $start; }
+        
+        ## ML: START fix for substr out of range error ------------------
+        my $seq = "";
+        if (($self->algorithm =~ /TBLAST[NX]/) && ($seqType eq 'sbjct'))
+        {
+            $seq = substr($self->seq_str('match'),
+                          int(($beg-$start)/3), 
+                          int(($end-$beg+1)/3));
+
+        } elsif (($self->algorithm =~ /T?BLASTX/) && ($seqType eq 'query')) {
+            $seq = substr($self->seq_str('match'),
+                          int(($beg-$start)/3), int(($end-$beg+1)/3));
+        } else {
+            $seq = substr($self->seq_str('match'), 
+                          $beg-$start, ($end-$beg+1));
+        }
+        ## ML: End of fix for  substr out of range error -----------------
+        
+        if(!CORE::length $seq) {
+            $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
+        }
+        
+        $seq =~ s/ //g;  # remove space (no info).
+        my $len_cons = CORE::length $seq;
+        $seq =~ s/\+//g;  # remove '+' characters (conservative substitutions)
+        my $len_id = CORE::length $seq;
+        push @data, ($len_id, $len_cons);
+    }
+    @data;
+}
+
+=head2 n
+
+ Usage     : $hsp_obj->n()
+ Purpose   : Get the N value (num HSPs on which P/Expect is based).
+           : This value is not defined with NCBI Blast2 with gapping.
+ Returns   : Integer or null string if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : The 'N' value is listed in parenthesis with P/Expect value:
+           : e.g., P(3) = 1.2e-30  ---> (N = 3).
+           : Not defined in NCBI Blast2 with gaps.
+           : This typically is equal to the number of HSPs but not always.
+           : To obtain the number of HSPs, use Bio::Search::Hit::HitI::num_hsps().
+
+See Also   : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
+
+=cut
+
+sub n { shift->throw_not_implemented }
+
+=head2 range
+
+ Usage     : $hsp->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($query_beg, $query_end) = $hsp->range('query');
+           : ($hit_beg, $hit_end) = $hsp->range('hit');
+ Returns   : Two-element array of integers 
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :  ('sbjct' is synonymous with 'hit') 
+ Throws    : n/a
+ Comments  : This is a convenience method for constructions such as
+             ($hsp->query->start, $hsp->query->end)
+
+=cut
+
+sub range { shift->throw_not_implemented }
+
+sub expect { shift->evalue(@_) }
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HmmpfamHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HmmpfamHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HmmpfamHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,342 @@
+# $Id: HmmpfamHSP.pm,v 1.1.2.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::HmmpfamHSP
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps
+
+=head1 SYNOPSIS
+
+    # generally we use Bio::SearchIO to build these objects
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer_pull',
+							   -file   => 'result.hmmer');
+
+    while (my $result = $in->next_result) {
+		while (my $hit = $result->next_hit) {
+			print $hit->name, "\n";
+			print $hit->score, "\n";
+			print $hit->significance, "\n";
+
+			while (my $hsp = $hit->next_hsp) {
+				# process HSPI objects
+			}
+		}
+    }
+
+=head1 DESCRIPTION
+
+This object implements a parser for hmmpfam hsp output, a program in the HMMER
+package.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::HSP::HmmpfamHSP;
+
+use strict;
+use base qw(Bio::Search::HSP::PullHSPI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::HmmpfamHSP();
+ Function: Builds a new Bio::Search::HSP::HmmpfamHSP object.
+ Returns : Bio::Search::HSP::HmmpfamHSP
+ Args    : -chunk  => [Bio::Root::IO, $start, $end] (required if no -parent)
+           -parent => Bio::PullParserI object (required if no -chunk)
+           -hsp_data => array ref with [rank query_start query_end hit_start
+										hit_end score evalue]
+
+           where the array ref provided to -chunk contains an IO object
+           for a filehandle to something representing the raw data of the
+           hsp, and $start and $end define the tell() position within the
+           filehandle that the hsp data starts and ends (optional; defaults
+           to start and end of the entire thing described by the filehandle)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+	my $self = $class->SUPER::new(@args);
+	
+	$self->_setup(@args);
+	
+	my $fields = $self->_fields;
+	foreach my $field (qw( alignment )) {
+		$fields->{$field} = undef;
+	}
+	
+	my $hsp_data = $self->_raw_hsp_data;
+	if ($hsp_data && ref($hsp_data) eq 'ARRAY') {
+		my @hsp_data = @{$hsp_data}; # don't alter the reference
+		foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) {
+			$fields->{$field} = shift(@hsp_data);
+		}
+	}
+	
+	$self->_dependencies( { ( query_string => 'alignment',
+                              hit_string => 'alignment',
+                              homology_string => 'alignment',
+                              hit_identical_inds => 'seq_inds',
+							  hit_conserved_inds => 'seq_inds',
+							  hit_nomatch_inds => 'seq_inds',
+                              hit_gap_inds => 'seq_inds',
+                              query_identical_inds => 'seq_inds',
+							  query_conserved_inds => 'seq_inds',
+							  query_nomatch_inds => 'seq_inds',
+							  query_gap_inds => 'seq_inds' ) } );
+	
+    return $self;
+}
+
+#
+# PullParserI discovery methods so we can answer all HitI questions
+#
+
+sub _discover_alignment {
+    my $self = shift;
+    my $alignments_hash = $self->get_field('alignments');
+	
+    my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank');
+    while (! defined $alignments_hash->{$identifier}) {
+        last unless $self->parent->parent->_next_alignment;
+    }
+    my $alignment = $alignments_hash->{$identifier};
+    
+    if ($alignment) {
+        # work out query, hit and homology strings, and some stats
+        # (quicker to do this all at once instead of each method working on
+        # $alignment string itself)
+        
+        my ($query_string, $hit_string, $homology_string);
+        while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) {
+            my $hi = $1;
+            my $ho = $2;
+            $query_string .= $3;
+            
+            $hi =~ s/\*\-\>//;
+            $ho = ' 'x(length($hi) - length($ho)).$ho;
+			$hi =~ s/\<\-\*//;
+            
+            $hit_string .= $hi;
+            $homology_string .= $ho;
+        }
+        
+        $self->_fields->{query_string} = $query_string;
+        $self->_fields->{hit_string} = $hit_string;
+        $homology_string =~ s/   $//;
+        $self->_fields->{homology_string} = $homology_string;
+        
+        ($self->{_query_gaps}) = $query_string =~ tr/-//;
+        ($self->{_hit_gaps}) = $hit_string =~ tr/.//;
+        ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
+    }
+    
+    $self->_fields->{alignment} = 1; # stop this method being called again
+}
+
+# seq_inds related methods, all just need seq_inds field to have been gotten
+sub _discover_seq_inds {
+    my $self = shift;
+    my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
+                                      $self->get_field('query_string'),
+                                      $self->get_field('hit_string') );
+    
+    # (code largely lifted from GenericHSP)
+    
+    # Using hashes to avoid saving duplicate residue numbers.
+    my %identicalList_query = ();
+    my %identicalList_sbjct = ();
+    my %conservedList_query = ();
+    my %conservedList_sbjct = ();
+    my @gapList_query = ();
+    my @gapList_sbjct = ();
+    my %nomatchList_query = ();
+    my %nomatchList_sbjct = ();
+    
+    my $resCount_query = $self->get_field('query_end');
+    my $resCount_sbjct = $self->get_field('hit_end');
+    
+    my ($mchar, $schar, $qchar);
+    while ($mchar = chop($seqString) ) {
+        ($qchar, $schar) = (chop($qseq), chop($sseq));
+        
+        if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') { 
+            $conservedList_query{ $resCount_query } = 1; 
+            $conservedList_sbjct{ $resCount_sbjct } = 1;
+        }
+        elsif ($mchar eq ' ') { 
+            $nomatchList_query{ $resCount_query } = 1;
+            $nomatchList_sbjct{ $resCount_sbjct } = 1;
+        }
+        else { 
+            $identicalList_query{ $resCount_query } = 1; 
+            $identicalList_sbjct{ $resCount_sbjct } = 1;
+        }
+        
+        if ($qchar eq '-') {
+            push(@gapList_query, $resCount_query);
+        }
+        else { 	    
+            $resCount_query -= 1;
+        }
+        if ($schar eq '.') {
+            push(@gapList_sbjct, $resCount_sbjct);
+        }
+        else { 	    
+            $resCount_sbjct -= 1;
+        }
+    }
+    
+    my $fields = $self->_fields;
+    $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
+    $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
+    $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
+    $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
+    $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
+    $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
+    $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
+    $fields->{query_gap_inds} = [ reverse @gapList_query ];
+    
+    $fields->{seq_inds} = 1;
+}
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : L<Bio::SeqFeature::Similarity>
+ Args    : none
+
+=cut
+
+sub query {
+    my $self = shift;
+    unless ($self->{_created_query}) {
+        $self->SUPER::query( new  Bio::SeqFeature::Similarity
+                  ('-primary'  => $self->primary_tag,
+                   '-start'    => $self->get_field('query_start'),
+                   '-end'      => $self->get_field('query_end'),
+                   '-expect'   => $self->get_field('evalue'),
+                   '-score'    => $self->get_field('score'),
+                   '-strand'   => 1,
+                   '-seq_id'   => $self->get_field('query_name'),
+                   #'-seqlength'=> $self->get_field('query_length'),  (not known)
+                   '-source'   => $self->get_field('algorithm'),
+                   '-seqdesc'  => $self->get_field('query_description')
+                   ) );
+		$self->{_created_query} = 1;
+    }
+    return $self->SUPER::query(@_);
+}
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : L<Bio::SeqFeature::Similarity>
+ Args    : [optional] new value to set
+
+=cut
+
+sub hit {
+    my $self = shift;
+    unless ($self->{_created_hit}) {
+        $self->SUPER::hit( new  Bio::SeqFeature::Similarity
+                  ('-primary'  => $self->primary_tag,
+                   '-start'    => $self->get_field('hit_start'),
+                   '-end'      => $self->get_field('hit_end'),
+                   '-expect'   => $self->get_field('evalue'),
+                   '-score'    => $self->get_field('score'),
+                   '-strand'   => 1,
+                   '-seq_id'   => $self->get_field('name'),
+                   '-seqlength'=> $self->get_field('length'),
+                   '-source'   => $self->get_field('algorithm'),
+                   '-seqdesc'  => $self->get_field('description')
+                   ) );
+		$self->{_created_hit} = 1;
+    }
+    return $self->SUPER::hit(@_);
+}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : 'query' = num conserved / length of query seq (without gaps)
+            'hit'   = num conserved / length of hit seq (without gaps)
+            'total' = num conserved / length of alignment (with gaps)
+            default = 'total' 
+
+=cut
+
+sub gaps {
+    my ($self, $type) = @_;
+    
+    $type = lc $type if defined $type;
+    $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/); 
+    $type = 'hit' if $type =~ /sbjct|subject/;
+    
+    $self->get_field('alignment'); # make sure gaps have been calculated
+    
+    return $self->{'_'.$type.'_gaps'};
+}
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP
+ Returns : undef (Hmmpfam reports do not have p-values)
+ Args    : none
+
+=cut
+
+sub pvalue {
+	return undef;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/HmmpfamHSP.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PSLHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PSLHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PSLHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: PSLHSP.pm,v 1.4.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::PSLHSP
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::PSLHSP - A HSP for PSL output
+
+=head1 SYNOPSIS
+
+  # get a PSLHSP somehow (SearchIO::psl)
+
+=head1 DESCRIPTION
+
+This is a HSP for PSL output so we can handle seq_inds differently.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::PSLHSP;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Search::HSP::GenericHSP);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::PSLHSP();
+ Function: Builds a new Bio::Search::HSP::PSLHSP object 
+ Returns : an instance of Bio::Search::HSP::PSLHSP
+ Args    : -gapblocks => arrayref of gap locations which are [start,length]
+                         of gaps
+
+
+=cut
+
+sub new { 
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($qgaplocs,
+	$hgaplocs,
+	$mismatches) = $self->_rearrange([qw(QUERY_GAPBLOCKS
+					     HIT_GAPBLOCKS
+					     MISMATCHES)],
+				       @args);
+    $self->gap_blocks('query',$qgaplocs) if defined $qgaplocs;
+    $self->gap_blocks('hit',  $hgaplocs) if defined $hgaplocs;
+    $self->mismatches($mismatches) if defined $mismatches;
+    return $self;
+}
+
+=head2 gap_blocks
+
+ Title   : gap_blocks
+ Usage   : $obj->gap_blocks($seqtype,$blocks)
+ Function: Get/Set the gap blocks
+ Returns : value of gap_blocks (a scalar)
+ Args    : sequence type - 'query' or 'hit'
+           blocks - arrayref of block start,length
+
+
+=cut
+
+sub gap_blocks {
+    my ($self,$seqtype,$blocks) = @_;
+    if( ! defined $seqtype ) { $seqtype = 'query' }
+    $seqtype = lc($seqtype);
+    $seqtype = 'hit' if $seqtype eq 'sbjct';
+    if( $seqtype !~ /query|hit/i ) { 
+	$self->warn("Expect either 'query' or 'hit' as argument 1 for gap_blocks");
+    }
+
+    unless( defined $blocks ) {
+	return $self->{'_gap_blocks'}->{$seqtype};
+    } else { 
+	return $self->{'_gap_blocks'}->{$seqtype} = $blocks;
+    }
+}
+
+=head2 mismatches
+
+ Title   : mismatches
+ Usage   : $obj->mismatches($newval)
+ Function: Get/Set the number of mismatches
+ Returns : value of mismatches (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub mismatches{
+    my $self = shift;
+    return $self->{'mismatches'} = shift if @_;
+    return $self->{'mismatches'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PsiBlastHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PsiBlastHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PsiBlastHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1724 @@
+#-----------------------------------------------------------------
+# $Id: PsiBlastHSP.pm,v 1.7.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::HSP::PsiBlastHSP
+#
+# (This module was originally called Bio::Tools::Blast::HSP)
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Search::HSP::PsiBlastHSP - Bioperl BLAST High-Scoring Pair object
+
+=head1 SYNOPSIS
+
+See L<Bio::Search::Hit::BlastHit>.
+
+=head1 DESCRIPTION
+
+A Bio::Search::HSP::PsiBlastHSP object provides an interface to data
+obtained in a single alignment section of a Blast report (known as a
+"High-scoring Segment Pair"). This is essentially a pairwise
+alignment with score information.
+
+PsiBlastHSP objects are accessed via L<Bio::Search::Hit::BlastHit>
+objects after parsing a BLAST report using the L<Bio::SearchIO>
+system.
+
+The construction of PsiBlastHSP objects is performed by
+Bio::Factory::BlastHitFactory in a process that is
+orchestrated by the Blast parser (L<Bio::SearchIO::psiblast>).
+The resulting PsiBlastHSPs are then accessed via
+L<Bio::Search::Hit::BlastHit>). Therefore, you do not need to
+use L<Bio::Search::HSP::PsiBlastHSP>) directly. If you need to construct
+PsiBlastHSPs directly, see the new() function for details.
+
+For L<Bio::SearchIO> BLAST parsing usage examples, see the
+C<examples/search-blast> directory of the Bioperl distribution.
+
+
+=head2 Start and End coordinates
+
+Sequence endpoints are swapped so that start is always less than
+end. This affects For TBLASTN/X hits on the minus strand. Strand
+information can be recovered using the strand() method. This
+normalization step is standard Bioperl practice. It also facilitates
+use of range information by methods such as match().
+
+=over 1
+
+=item * Supports BLAST versions 1.x and 2.x, gapped and ungapped.
+
+=back
+
+Bio::Search::HSP::PsiBlastHSP.pm has the ability to extract a list of all
+residue indices for identical and conservative matches along both
+query and sbjct sequences. Since this degree of detail is not always
+needed, this behavior does not occur during construction of the PsiBlastHSP
+object.  These data will automatically be collected as necessary as
+the PsiBlastHSP.pm object is used.
+
+=head1 DEPENDENCIES
+
+Bio::Search::HSP::PsiBlastHSP.pm is a concrete class that inherits from
+L<Bio::SeqFeature::SimilarityPair> and L<Bio::Search::HSP::HSPI>.
+L<Bio::Seq> and L<Bio::SimpleAlign> are employed for creating
+sequence and alignment objects, respectively.
+
+=head2 Relationship to L<Bio::SimpleAlign> and L<Bio::Seq>
+
+PsiBlastHSP.pm can provide the query or sbjct sequence as a L<Bio::Seq>
+object via the L<seq()|seq> method. The PsiBlastHSP.pm object can also create a
+two-sequence L<Bio::SimpleAlign> alignment object using the the query
+and sbjct sequences via the L<get_aln()|get_aln> method. Creation of alignment
+objects is not automatic when constructing the PsiBlastHSP.pm object since
+this level of functionality is not always required and would generate
+a lot of extra overhead when crunching many reports.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac-at-bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was originally developed in the Department of Genetics
+at Stanford University. I would also like to acknowledge my
+colleagues at Affymetrix for useful feedback.
+
+=head1 SEE ALSO
+
+ Bio::Search::Hit::BlastHit.pm          - Blast hit object.
+ Bio::Search::Result::BlastResult.pm    - Blast Result object.
+ Bio::Seq.pm                            - Biosequence object
+
+=head2 Links:
+
+ http://bio.perl.org/                       - Bioperl Project Homepage
+
+=head1 COPYRIGHT
+
+Copyright (c) 1996-2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# END of main POD documentation.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::HSP::PsiBlastHSP;
+
+use strict;
+use Bio::SeqFeature::Similarity;
+
+use vars qw($GAP_SYMBOL %STRAND_SYMBOL);
+
+use overload
+    '""' => \&to_string;
+
+use base qw(Bio::SeqFeature::SimilarityPair Bio::Search::HSP::HSPI);
+
+$GAP_SYMBOL    = '-';          # Need a more general way to handle gap symbols.
+%STRAND_SYMBOL = ('Plus' => 1, 'Minus' => -1 );
+
+
+=head2 new
+
+ Usage     : $hsp = Bio::Search::HSP::PsiBlastHSP->new( %named_params );
+           : Bio::Search::HSP::PsiBlastHSP.pm objects are constructed
+           : automatically by Bio::SearchIO::BlastHitFactory.pm,
+           : so there is no need for direct instantiation.
+ Purpose   : Constructs a new PsiBlastHSP object and Initializes key variables
+           : for the HSP.
+ Returns   : A Bio::Search::HSP::PsiBlastHSP object
+ Argument  : Named parameters:
+           : Parameter keys are case-insensitive.
+           :      -RAW_DATA  => array ref containing raw BLAST report data for
+           :                    for a single HSP. This includes all lines
+           :                    of the HSP alignment from a traditional BLAST
+                                or PSI-BLAST (non-XML) report,
+           :      -RANK         => integer (1..n).
+           :      -PROGRAM      => string ('TBLASTN', 'BLASTP', etc.).
+           :      -QUERY_NAME   => string, id of query sequence
+           :      -HIT_NAME     => string, id of hit sequence
+           :
+ Comments  : Having the raw data allows this object to do lazy parsing of
+           : the raw HSP data (i.e., not parsed until needed).
+           :
+           : Note that there is a fair amount of basic parsing that is
+           : currently performed in this module that would be more appropriate
+           : to do within a separate factory object.
+           : This parsing code will likely be relocated and more initialization
+           : parameters will be added to new().
+           :
+See Also   : L<Bio::SeqFeature::SimilarityPair::new()>, L<Bio::SeqFeature::Similarity::new()>
+
+=cut
+
+#----------------
+sub new {
+#----------------
+    my ($class, @args ) = @_;
+
+    my $self = $class->SUPER::new( @args );
+    # Initialize placeholders
+    $self->{'_queryGaps'} = $self->{'_sbjctGaps'} = 0;
+    my ($raw_data, $qname, $hname, $qlen, $hlen);
+
+    ($self->{'_prog'}, $self->{'_rank'}, $raw_data,
+     $qname, $hname) =
+      $self->_rearrange([qw( PROGRAM
+                             RANK
+                             RAW_DATA
+                             QUERY_NAME
+                             HIT_NAME
+                           )], @args );
+
+    # _set_data() does a fair amount of parsing.
+    # This will likely change (see comment above.)
+    $self->_set_data( @{$raw_data} );
+    # Store the aligned query as sequence feature
+    my ($qb, $hb) = ($self->start());
+    my ($qe, $he) = ($self->end());
+    my ($qs, $hs) = ($self->strand());
+    my ($qf,$hf) = ($self->query->frame(),
+                    $self->hit->frame);
+
+    $self->query( Bio::SeqFeature::Similarity->new (-start   =>$qb,
+                                                    -end     =>$qe,
+                                                    -strand  =>$qs,
+                                                    -bits    =>$self->bits,
+                                                    -score   =>$self->score,
+                                                    -frame   =>$qf,
+                                                    -seq_id  => $qname,
+                                                    -source  =>$self->{'_prog'} ));
+
+    $self->hit( Bio::SeqFeature::Similarity->new (-start   =>$hb,
+                                                  -end     =>$he,
+                                                  -strand  =>$hs,
+                                                  -bits    =>$self->bits,
+                                                  -score   =>$self->score,
+                                                  -frame   =>$hf,
+                                                  -seq_id  => $hname,
+                                                  -source  =>$self->{'_prog'} ));
+
+    # set lengths
+    $self->query->seqlength($qlen); # query
+    $self->hit->seqlength($hlen); # subject
+
+    $self->query->frac_identical($self->frac_identical('query'));
+    $self->hit->frac_identical($self->frac_identical('hit'));
+    return $self;
+}
+
+#sub DESTROY {
+#    my $self = shift;
+#    #print STDERR "--->DESTROYING $self\n";
+#}
+
+
+# Title   : _id_str;
+# Purpose : Intended for internal use only to provide a string for use
+#           within exception messages to help users figure out which
+#           query/hit caused the problem.
+# Returns : Short string with name of query and hit seq
+sub _id_str {
+    my $self = shift;
+    if( not defined $self->{'_id_str'}) {
+        my $qname = $self->query->seqname;
+        my $hname = $self->hit->seqname;
+        $self->{'_id_str'} = "QUERY=\"$qname\" HIT=\"$hname\"";
+    }
+    return $self->{'_id_str'};
+}
+
+#=================================================
+# Begin Bio::Search::HSP::HSPI implementation
+#=================================================
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hsp->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hsp
+           For BLAST, the algorithm denotes what type of sequence was aligned
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated
+           dna-translated dna).
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+#----------------
+sub algorithm {
+#----------------
+    my ($self, at args) = @_;
+    return $self->{'_prog'};
+}
+
+
+
+
+=head2 signif()
+
+ Usage     : $hsp_obj->signif()
+ Purpose   : Get the P-value or Expect value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43)
+           : Returns P-value if it is defined, otherwise, Expect value.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Provided for consistency with BlastHit::signif()
+           : Support for returning the significance data in different
+           : formats (e.g., exponent only), is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<p()|p>, L<expect()|expect>, L<Bio::Search::Hit::BlastHit::signif()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#-----------
+sub signif {
+#-----------
+    my $self = shift;
+    my $val ||= defined($self->{'_p'}) ? $self->{'_p'} : $self->{'_expect'};
+    $val;
+}
+
+
+
+=head2 evalue
+
+ Usage     : $hsp_obj->evalue()
+ Purpose   : Get the Expect value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43)
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Support for returning the expectation data in different
+           : formats (e.g., exponent only), is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<p()|p>
+
+=cut
+
+#----------
+sub evalue { shift->{'_expect'} }
+#----------
+
+
+=head2 p
+
+ Usage     : $hsp_obj->p()
+ Purpose   : Get the P-value for the HSP.
+ Returns   : Float (0.001 or 1.3e-43) or undef if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : P-value is not defined with NCBI Blast2 reports.
+           : Support for returning the expectation data in different
+           : formats (e.g., exponent only) is not provided for HSP objects.
+           : This is only available for the BlastHit or Blast object.
+
+See Also   : L<expect()|expect>
+
+=cut
+
+#-----
+sub p { my $self = shift; $self->{'_p'}; }
+#-----
+
+# alias
+sub pvalue { shift->p(@_); }
+
+=head2 length
+
+ Usage     : $hsp->length( [seq_type] )
+ Purpose   : Get the length of the aligned portion of the query or sbjct.
+ Example   : $hsp->length('query')
+ Returns   : integer
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'  (default = 'total')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : 'total' length is the full length of the alignment
+           : as reported in the denominators in the alignment section:
+           : "Identical = 34/120 Positives = 67/120".
+
+See Also   : L<gaps()|gaps>
+
+=cut
+
+#-----------
+sub length {
+#-----------
+## Developer note: when using the built-in length function within
+##                 this module, call it as CORE::length().
+    my( $self, $seqType ) = @_;
+    $seqType  ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $seqType ne 'total' and $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+    $self->{$seqType.'Length'};
+}
+
+
+
+=head2 gaps
+
+ Usage     : $hsp->gaps( [seq_type] )
+ Purpose   : Get the number of gaps in the query, sbjct, or total alignment.
+           : Also can return query gaps and sbjct gaps as a two-element list
+           : when in array context.
+ Example   : $total_gaps      = $hsp->gaps();
+           : ($qgaps, $sgaps) = $hsp->gaps();
+           : $qgaps           = $hsp->gaps('query');
+ Returns   : scalar context: integer
+           : array context without args: (int, int) = ('queryGaps', 'sbjctGaps')
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : (default = 'total', scalar context)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<length()|length>, L<matches()|matches>
+
+=cut
+
+#---------
+sub gaps {
+#---------
+    my( $self, $seqType ) = @_;
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    $seqType  ||= (wantarray ? 'list' : 'total');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType =~ /list|array/i) {
+        return (($self->{'_queryGaps'} || 0), ($self->{'_sbjctGaps'} || 0));
+    }
+
+    if($seqType eq 'total') {
+        return ($self->{'_queryGaps'} + $self->{'_sbjctGaps'}) || 0;
+    } else {
+        ## Sensitive to member name format.
+        $seqType = "_\L$seqType\E";
+        return $self->{$seqType.'Gaps'} || 0;
+    }
+}
+
+
+=head2 frac_identical
+
+ Usage     : $hsp_object->frac_identical( [seq_type] );
+ Purpose   : Get the fraction of identical positions within the given HSP.
+ Example   : $frac_iden = $hsp_object->frac_identical('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : default = 'total' (but see comments below).
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI-BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           : Therefore, when called without an argument or an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used.
+           :
+           : To get the fraction identical among only the aligned residues,
+           : ignoring the gaps, call this method with an argument of 'query'
+           : or 'sbjct' ('sbjct' is synonymous with 'hit').
+
+See Also   : L<frac_conserved()|frac_conserved>, L<num_identical()|num_identical>, L<matches()|matches>
+
+=cut
+
+#-------------------
+sub frac_identical {
+#-------------------
+# The value is calculated as opposed to storing it from the parsed results.
+# This saves storage and also permits flexibility in determining for which
+# sequence (query or sbjct) the figure is to be calculated.
+
+    my( $self, $seqType ) = @_;
+    $seqType ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType ne 'total') {
+      $self->_set_seq_data() unless $self->{'_set_seq_data'};
+    }
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    sprintf( "%.2f", $self->{'_numIdentical'}/$self->{$seqType.'Length'});
+}
+
+
+=head2 frac_conserved
+
+ Usage     : $hsp_object->frac_conserved( [seq_type] );
+ Purpose   : Get the fraction of conserved positions within the given HSP.
+           : (Note: 'conservative' positions are called 'positives' in the
+           : Blast report.)
+ Example   : $frac_cons = $hsp_object->frac_conserved('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or 'total'
+           :  ('sbjct' is synonymous with 'hit')
+           : default = 'total' (but see comments below).
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI-BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           : Therefore, when called without an argument or an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used.
+           :
+           : To get the fraction conserved among only the aligned residues,
+           : ignoring the gaps, call this method with an argument of 'query'
+           : or 'sbjct'.
+
+See Also   : L<frac_conserved()|frac_conserved>, L<num_conserved()|num_conserved>, L<matches()|matches>
+
+=cut
+
+#--------------------
+sub frac_conserved {
+#--------------------
+# The value is calculated as opposed to storing it from the parsed results.
+# This saves storage and also permits flexibility in determining for which
+# sequence (query or sbjct) the figure is to be calculated.
+
+    my( $self, $seqType ) = @_;
+    $seqType ||= 'total';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if($seqType ne 'total') {
+      $self->_set_seq_data() unless $self->{'_set_seq_data'};
+    }
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    sprintf( "%.2f", $self->{'_numConserved'}/$self->{$seqType.'Length'});
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+#----------------
+sub query_string{ shift->seq_str('query'); }
+#----------------
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+#----------------
+sub hit_string{ shift->seq_str('hit'); }
+#----------------
+
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : none
+
+=cut
+
+#----------------
+sub homology_string{ shift->seq_str('match'); }
+#----------------
+
+#=================================================
+# End Bio::Search::HSP::HSPI implementation
+#=================================================
+
+# Older method delegating to method defined in HSPI.
+
+=head2 expect
+
+See L<Bio::Search::HSP::HSPI::expect()|Bio::Search::HSP::HSPI>
+
+=cut
+
+#----------
+sub expect { shift->evalue( @_ ); }
+#----------
+
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+#'
+
+#----------
+sub rank { shift->{'_rank'} }
+#----------
+
+# For backward compatibility
+#----------
+sub name { shift->rank }
+#----------
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $hsp->to_string;
+ Function: Returns a string representation for the Blast HSP.
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [PsiBlastHSP] <rank>
+           e.g.:
+           [BlastHit] 1
+ Args    : None
+
+=cut
+
+#----------
+sub to_string {
+#----------
+    my $self = shift;
+    return "[PsiBlastHSP] " . $self->rank();
+}
+
+
+#=head2 _set_data (Private method)
+#
+# Usage     : called automatically during object construction.
+# Purpose   : Parses the raw HSP section from a flat BLAST report and
+#             sets the query sequence, sbjct sequence, and the "match" data
+#           : which consists of the symbols between the query and sbjct lines
+#           : in the alignment.
+# Argument  : Array (all lines for a single, complete HSP, from a raw,
+#             flat (i.e., non-XML) BLAST report)
+# Throws    : Propagates any exceptions from the methods called ("See Also")
+#
+#See Also   : L<_set_seq()|_set_seq>, L<_set_score_stats()|_set_score_stats>, L<_set_match_stats()|_set_match_stats>, L<_initialize()|_initialize>
+#
+#=cut
+
+#--------------
+sub _set_data {
+#--------------
+    my $self = shift;
+    my @data = @_;
+    my @queryList  = ();  # 'Query' = SEQUENCE USED TO QUERY THE DATABASE.
+    my @sbjctList  = ();  # 'Sbjct' = HOMOLOGOUS SEQUENCE FOUND IN THE DATABASE.
+    my @matchList  = ();
+    my $matchLine  = 0;   # Alternating boolean: when true, load 'match' data.
+    my @linedat = ();
+
+    #print STDERR "PsiBlastHSP: set_data()\n";
+
+    my($line, $aln_row_len, $length_diff);
+    $length_diff = 0;
+
+    # Collecting data for all lines in the alignment
+    # and then storing the collections for possible processing later.
+    #
+    # Note that "match" lines may not be properly padded with spaces.
+    # This loop now properly handles such cases:
+    # Query: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVIXXXXX 1200
+    #             PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVI
+    # Sbjct: 1141 PSLVELTIRDCPRLEVGPMIRSLPKFPMLKKLDLAVANIIEEDLDVIGSLEELVILSLKL 1200
+
+    foreach $line( @data ) {
+        next if $line =~ /^\s*$/;
+
+        if( $line =~ /^ ?Score/ ) {
+            $self->_set_score_stats( $line );
+        } elsif( $line =~ /^ ?(Identities|Positives|Strand)/ ) {
+            $self->_set_match_stats( $line );
+        } elsif( $line =~ /^ ?Frame = ([\d+-]+)/ ) {
+          # Version 2.0.8 has Frame information on a separate line.
+          # Storing frame according to SeqFeature::Generic::frame()
+          # which does not contain strand info (use strand()).
+          my $frame = abs($1) - 1;
+          $self->frame( $frame );
+        } elsif( $line =~ /^(Query:?[\s\d]+)([^\s\d]+)/ ) {
+            push @queryList, $line;
+            $self->{'_match_indent'} = CORE::length $1;
+            $aln_row_len = (CORE::length $1) + (CORE::length $2);
+            $matchLine = 1;
+        } elsif( $matchLine ) {
+            # Pad the match line with spaces if necessary.
+            $length_diff = $aln_row_len - CORE::length $line;
+            $length_diff and $line .= ' 'x $length_diff;
+            push @matchList, $line;
+            $matchLine = 0;
+        } elsif( $line =~ /^Sbjct/ ) {
+            push @sbjctList, $line;
+        }
+    }
+    # Storing the query and sbjct lists in case they are needed later.
+    # We could make this conditional to save memory.
+    $self->{'_queryList'} = \@queryList;
+    $self->{'_sbjctList'} = \@sbjctList;
+
+    # Storing the match list in case it is needed later.
+    $self->{'_matchList'} = \@matchList;
+
+    if(not defined ($self->{'_numIdentical'})) {
+        my $id_str = $self->_id_str;
+        $self->throw( -text  => "Can't parse match statistics. Possibly a new or unrecognized Blast format. ($id_str)");
+    }
+
+    if(!scalar @queryList or !scalar @sbjctList) {
+        my $id_str = $self->_id_str;
+        $self->throw( "Can't find query or sbjct alignment lines. Possibly unrecognized Blast format. ($id_str)");
+    }
+}
+
+
+#=head2 _set_score_stats (Private method)
+#
+# Usage     : called automatically by _set_data()
+# Purpose   : Sets various score statistics obtained from the HSP listing.
+# Argument  : String with any of the following formats:
+#           : blast2:  Score = 30.1 bits (66), Expect = 9.2
+#           : blast2:  Score = 158.2 bits (544), Expect(2) = e-110
+#           : blast1:  Score = 410 (144.3 bits), Expect = 1.7e-40, P = 1.7e-40
+#           : blast1:  Score = 55 (19.4 bits), Expect = 5.3, Sum P(3) = 0.99
+# Throws    : Exception if the stats cannot be parsed, probably due to a change
+#           : in the Blast report format.
+#
+#See Also   : L<_set_data()|_set_data>
+#
+#=cut
+
+#--------------------
+sub _set_score_stats {
+#--------------------
+    my ($self, $data) = @_;
+
+    my ($expect, $p);
+
+    if($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect = +([\d.e+-]+)/) {
+        # blast2 format n = 1
+        $self->bits($1);
+        $self->score($2);
+        $expect            = $3;
+    } elsif($data =~ /Score = +([\d.e+-]+) bits \(([\d.e+-]+)\), +Expect\((\d+)\) = +([\d.e+-]+)/) {
+        # blast2 format n > 1
+        $self->bits($1);
+        $self->score($2);
+        $self->{'_n'}      = $3;
+        $expect            = $4;
+
+    } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), P = +([\d.e-]+)/) {
+        # blast1 format, n = 1
+        $self->score($1);
+        $self->bits($2);
+        $expect            = $3;
+        $p                 = $4;
+
+    } elsif($data =~ /Score = +([\d.e+-]+) \(([\d.e+-]+) bits\), +Expect = +([\d.e+-]+), +Sum P\((\d+)\) = +([\d.e-]+)/) {
+        # blast1 format, n > 1
+        $self->score($1);
+        $self->bits($2);
+        $expect            = $3;
+        $self->{'_n'}      = $4;
+        $p                 = $5;
+
+    } else {
+        my $id_str = $self->_id_str;
+        $self->throw(-class => 'Bio::Root::Exception',
+                     -text => "Can't parse score statistics: unrecognized format. ($id_str)",
+                     -value => $data);
+    }
+    $expect = "1$expect" if $expect =~ /^e/i;
+    $p      = "1$p"      if defined $p and $p=~ /^e/i;
+
+    $self->{'_expect'} = $expect;
+    $self->{'_p'}      = $p || undef;
+    $self->significance( $p || $expect );
+}
+
+
+#=head2 _set_match_stats (Private method)
+#
+# Usage     : Private method; called automatically by _set_data()
+# Purpose   : Sets various matching statistics obtained from the HSP listing.
+# Argument  : blast2: Identities = 23/74 (31%), Positives = 29/74 (39%), Gaps = 17/74 (22%)
+#           : blast2: Identities = 57/98 (58%), Positives = 74/98 (75%)
+#           : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%)
+#           : blast1: Identities = 87/204 (42%), Positives = 126/204 (61%), Frame = -3
+#           : WU-blast: Identities = 310/553 (56%), Positives = 310/553 (56%), Strand = Minus / Plus
+# Throws    : Exception if the stats cannot be parsed, probably due to a change
+#           : in the Blast report format.
+# Comments  : The "Gaps = " data in the HSP header has a different meaning depending
+#           : on the type of Blast: for BLASTP, this number is the total number of
+#           : gaps in query+sbjct; for TBLASTN, it is the number of gaps in the
+#           : query sequence only. Thus, it is safer to collect the data
+#           : separately by examining the actual sequence strings as is done
+#           : in _set_seq().
+#
+#See Also   : L<_set_data()|_set_data>, L<_set_seq()|_set_seq>
+#
+#=cut
+
+#--------------------
+sub _set_match_stats {
+#--------------------
+    my ($self, $data) = @_;
+
+    if($data =~ m!Identities = (\d+)/(\d+)!) {
+      # blast1 or 2 format
+      $self->{'_numIdentical'} = $1;
+      $self->{'_totalLength'}  = $2;
+    }
+
+    if($data =~ m!Positives = (\d+)/(\d+)!) {
+      # blast1 or 2 format
+      $self->{'_numConserved'} = $1;
+      $self->{'_totalLength'}  = $2;
+    }
+
+    if($data =~ m!Frame = ([\d+-]+)!) {
+      $self->frame($1);
+    }
+
+    # Strand data is not always present in this line.
+    # _set_seq() will also set strand information.
+    if($data =~ m!Strand = (\w+) / (\w+)!) {
+        $self->{'_queryStrand'} = $1;
+        $self->{'_sbjctStrand'} = $2;
+    }
+
+#    if($data =~ m!Gaps = (\d+)/(\d+)!) {
+#         $self->{'_totalGaps'} = $1;
+#    } else {
+#         $self->{'_totalGaps'} = 0;
+#    }
+}
+
+
+
+#=head2 _set_seq_data (Private method)
+#
+# Usage     : called automatically when sequence data is requested.
+# Purpose   : Sets the HSP sequence data for both query and sbjct sequences.
+#           : Includes: start, stop, length, gaps, and raw sequence.
+# Argument  : n/a
+# Throws    : Propagates any exception thrown by _set_match_seq()
+# Comments  : Uses raw data stored by _set_data() during object construction.
+#           : These data are not always needed, so it is conditionally
+#           : executed only upon demand by methods such as gaps(), _set_residues(),
+#           : etc. _set_seq() does the dirty work.
+#
+#See Also   : L<_set_seq()|_set_seq>
+#
+#=cut
+
+#-----------------
+sub _set_seq_data {
+#-----------------
+    my $self = shift;
+
+    $self->_set_seq('query', @{$self->{'_queryList'}});
+    $self->_set_seq('sbjct', @{$self->{'_sbjctList'}});
+
+    # Liberate some memory.
+    @{$self->{'_queryList'}} = @{$self->{'_sbjctList'}} = ();
+    undef $self->{'_queryList'};
+    undef $self->{'_sbjctList'};
+
+    $self->{'_set_seq_data'} = 1;
+}
+
+
+
+#=head2 _set_seq (Private method)
+#
+# Usage     : called automatically by _set_seq_data()
+#           : $hsp_obj->($seq_type, @data);
+# Purpose   : Sets sequence information for both the query and sbjct sequences.
+#           : Directly counts the number of gaps in each sequence (if gapped Blast).
+# Argument  : $seq_type = 'query' or 'sbjct'
+#           : @data = all seq lines with the form:
+#           : Query: 61  SPHNVKDRKEQNGSINNAISPTATANTSGSQQINIDSALRDRSSNVAAQPSLSDASSGSN 120
+# Throws    : Exception if data strings cannot be parsed, probably due to a change
+#           : in the Blast report format.
+# Comments  : Uses first argument to determine which data members to set
+#           : making this method sensitive data member name changes.
+#           : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
+# Warning   : Sequence endpoints are normalized so that start < end. This affects HSPs
+#           : for TBLASTN/X hits on the minus strand. Normalization facilitates use
+#           : of range information by methods such as match().
+#
+#See Also   : L<_set_seq_data()|_set_seq_data>, L<matches()|matches>, L<range()|range>, L<start()|start>, L<end()|end>
+#
+#=cut
+
+#-------------
+sub _set_seq {
+#-------------
+    my $self      = shift;
+    my $seqType   = shift;
+    my @data      = @_;
+    my @ranges    = ();
+    my @sequence  = ();
+    my $numGaps   = 0;
+
+    foreach( @data ) {
+        if( m/(\d+) *([^\d\s]+) *(\d+)/) {
+            push @ranges, ( $1, $3 ) ;
+            push @sequence, $2;
+        #print STDERR "_set_seq found sequence \"$2\"\n";
+        } else {
+            $self->warn("Bad sequence data: $_");
+        }
+    }
+
+    if( !(scalar(@sequence) and scalar(@ranges))) {
+        my $id_str = $self->_id_str;
+        $self->throw("Can't set sequence: missing data. Possibly unrecognized Blast format. ($id_str)");
+   }
+
+    # Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+    $self->{$seqType.'Start'} = $ranges[0];
+    $self->{$seqType.'Stop'}  = $ranges[ $#ranges ];
+    $self->{$seqType.'Seq'}   = \@sequence;
+
+    $self->{$seqType.'Length'} = abs($ranges[ $#ranges ] - $ranges[0]) + 1;
+
+    # Adjust lengths for BLASTX, TBLASTN, TBLASTX sequences
+    # Converting nucl coords to amino acid coords.
+
+    my $prog = $self->algorithm;
+    if($prog eq 'TBLASTN' and $seqType eq '_sbjct') {
+        $self->{$seqType.'Length'} /= 3;
+    } elsif($prog eq 'BLASTX' and $seqType eq '_query') {
+        $self->{$seqType.'Length'} /= 3;
+    } elsif($prog eq 'TBLASTX') {
+        $self->{$seqType.'Length'} /= 3;
+    }
+
+    if( $prog ne 'BLASTP' ) {
+        $self->{$seqType.'Strand'} = 'Plus' if $prog =~ /BLASTN/;
+        $self->{$seqType.'Strand'} = 'Plus' if ($prog =~ /BLASTX/ and $seqType eq '_query');
+        # Normalize sequence endpoints so that start < end.
+        # Reverse complement or 'minus strand' HSPs get flipped here.
+        if($self->{$seqType.'Start'} > $self->{$seqType.'Stop'}) {
+            ($self->{$seqType.'Start'}, $self->{$seqType.'Stop'}) =
+                ($self->{$seqType.'Stop'}, $self->{$seqType.'Start'});
+            $self->{$seqType.'Strand'} = 'Minus';
+        }
+    }
+
+    ## Count number of gaps in each seq. Only need to do this for gapped Blasts.
+#    if($self->{'_gapped'}) {
+        my $seqstr = join('', @sequence);
+        $seqstr =~ s/\s//g;
+        my $num_gaps = CORE::length($seqstr) - $self->{$seqType.'Length'};
+        $self->{$seqType.'Gaps'} = $num_gaps if $num_gaps > 0;
+#    }
+}
+
+
+#=head2 _set_residues (Private method)
+#
+# Usage     : called automatically when residue data is requested.
+# Purpose   : Sets the residue numbers representing the identical and
+#           : conserved positions. These data are obtained by analyzing the
+#           : symbols between query and sbjct lines of the alignments.
+# Argument  : n/a
+# Throws    : Propagates any exception thrown by _set_seq_data() and _set_match_seq().
+# Comments  : These data are not always needed, so it is conditionally
+#           : executed only upon demand by methods such as seq_inds().
+#           : Behavior is dependent on the type of BLAST analysis (TBLASTN, BLASTP, etc).
+#
+#See Also   : L<_set_seq_data()|_set_seq_data>, L<_set_match_seq()|_set_match_seq>, seq_inds()
+#
+#=cut
+
+#------------------
+sub _set_residues {
+#------------------
+    my $self      = shift;
+    my @sequence  = ();
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    # Using hashes to avoid saving duplicate residue numbers.
+    my %identicalList_query = ();
+    my %identicalList_sbjct = ();
+    my %conservedList_query = ();
+    my %conservedList_sbjct = ();
+
+    my $aref = $self->_set_match_seq() if not ref $self->{'_matchSeq'};
+    $aref  ||= $self->{'_matchSeq'};
+    my $seqString = join('', @$aref );
+
+    my $qseq = join('',@{$self->{'_querySeq'}});
+    my $sseq = join('',@{$self->{'_sbjctSeq'}});
+    my $resCount_query = $self->{'_queryStop'} || 0;
+    my $resCount_sbjct = $self->{'_sbjctStop'} || 0;
+
+    my $prog = $self->algorithm;
+    if($prog !~ /^BLASTP|^BLASTN/) {
+        if($prog eq 'TBLASTN') {
+            $resCount_sbjct /= 3;
+        } elsif($prog eq 'BLASTX') {
+            $resCount_query /= 3;
+        } elsif($prog eq 'TBLASTX') {
+            $resCount_query /= 3;
+            $resCount_sbjct /= 3;
+        }
+    }
+
+    my ($mchar, $schar, $qchar);
+    while( $mchar = chop($seqString) ) {
+        ($qchar, $schar) = (chop($qseq), chop($sseq));
+        if( $mchar eq '+' ) {
+            $conservedList_query{ $resCount_query } = 1;
+            $conservedList_sbjct{ $resCount_sbjct } = 1;
+        } elsif( $mchar ne ' ' ) {
+            $identicalList_query{ $resCount_query } = 1;
+            $identicalList_sbjct{ $resCount_sbjct } = 1;
+        }
+        $resCount_query-- if $qchar ne $GAP_SYMBOL;
+        $resCount_sbjct-- if $schar ne $GAP_SYMBOL;
+    }
+    $self->{'_identicalRes_query'} = \%identicalList_query;
+    $self->{'_conservedRes_query'} = \%conservedList_query;
+    $self->{'_identicalRes_sbjct'} = \%identicalList_sbjct;
+    $self->{'_conservedRes_sbjct'} = \%conservedList_sbjct;
+
+}
+
+
+
+
+#=head2 _set_match_seq (Private method)
+#
+# Usage     : $hsp_obj->_set_match_seq()
+# Purpose   : Set the 'match' sequence for the current HSP (symbols in between
+#           : the query and sbjct lines.)
+# Returns   : Array reference holding the match sequences lines.
+# Argument  : n/a
+# Throws    : Exception if the _matchList field is not set.
+# Comments  : The match information is not always necessary. This method
+#           : allows it to be conditionally prepared.
+#           : Called by _set_residues>() and seq_str().
+#
+#See Also   : L<_set_residues()|_set_residues>, L<seq_str()|seq_str>
+#
+#=cut
+
+#-------------------
+sub _set_match_seq {
+#-------------------
+    my $self = shift;
+
+    if( ! ref($self->{'_matchList'}) ) {
+        my $id_str = $self->_id_str;
+        $self->throw("Can't set HSP match sequence: No data ($id_str)");
+    }
+
+    my @data = @{$self->{'_matchList'}};
+
+    my(@sequence);
+    foreach( @data ) {
+        chomp($_);
+        ## Remove leading spaces; (note: aln may begin with a space
+        ## which is why we can't use s/^ +//).
+        s/^ {$self->{'_match_indent'}}//;
+        push @sequence, $_;
+    }
+    # Liberate some memory.
+    @{$self->{'_matchList'}} = undef;
+    $self->{'_matchList'} = undef;
+
+    $self->{'_matchSeq'} = \@sequence;
+
+    return $self->{'_matchSeq'};
+}
+
+
+=head2 n
+
+ Usage     : $hsp_obj->n()
+ Purpose   : Get the N value (num HSPs on which P/Expect is based).
+           : This value is not defined with NCBI Blast2 with gapping.
+ Returns   : Integer or null string if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : The 'N' value is listed in parenthesis with P/Expect value:
+           : e.g., P(3) = 1.2e-30  ---> (N = 3).
+           : Not defined in NCBI Blast2 with gaps.
+           : This typically is equal to the number of HSPs but not always.
+           : To obtain the number of HSPs, use Bio::Search::Hit::BlastHit::num_hsps().
+
+See Also   : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
+
+=cut
+
+#-----
+sub n { my $self = shift; $self->{'_n'} || ''; }
+#-----
+
+
+=head2 matches
+
+ Usage     : $hsp->matches([seq_type], [start], [stop]);
+ Purpose   : Get the total number of identical and conservative matches
+           : in the query or sbjct sequence for the given HSP. Optionally can
+           : report data within a defined interval along the seq.
+           : (Note: 'conservative' matches are called 'positives' in the
+           : Blast report.)
+ Example   : ($id,$cons) = $hsp_object->matches('hit');
+           : ($id,$cons) = $hsp_object->matches('query',300,400);
+ Returns   : 2-element array of integers
+ Argument  : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
+           :  ('sbjct' is synonymous with 'hit')
+           : (2) start = Starting coordinate (optional)
+           : (3) stop  = Ending coordinate (optional)
+ Throws    : Exception if the supplied coordinates are out of range.
+ Comments  : Relies on seq_str('match') to get the string of alignment symbols
+           : between the query and sbjct lines which are used for determining
+           : the number of identical and conservative matches.
+
+See Also   : L<length()|length>, L<gaps()|gaps>, L<seq_str()|seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#-----------
+sub matches {
+#-----------
+    my( $self, %param ) = @_;
+    my(@data);
+    my($seqType, $beg, $end) = ($param{-SEQ}, $param{-START}, $param{-STOP});
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my($start,$stop);
+
+    if(!defined $beg && !defined $end) {
+        ## Get data for the whole alignment.
+        push @data, ($self->{'_numIdentical'}, $self->{'_numConserved'});
+    } else {
+        ## Get the substring representing the desired sub-section of aln.
+        $beg ||= 0;
+        $end ||= 0;
+        ($start,$stop) = $self->range($seqType);
+        if($beg == 0) { $beg = $start; $end = $beg+$end; }
+        elsif($end == 0) { $end = $stop; $beg = $end-$beg; }
+
+        if($end >= $stop) { $end = $stop; } ##ML changed from if (end >stop)
+        else { $end += 1;}   ##ML moved from commented position below, makes
+                             ##more sense here
+#        if($end > $stop) { $end = $stop; }
+        if($beg < $start) { $beg = $start; }
+#        else { $end += 1;}
+
+#        my $seq = substr($self->seq_str('match'), $beg-$start, ($end-$beg));
+
+        ## ML: START fix for substr out of range error ------------------
+        my $seq = "";
+        my $prog = $self->algorithm;
+        if (($prog eq 'TBLASTN') and ($seqType eq 'sbjct'))
+        {
+            $seq = substr($self->seq_str('match'),
+                          int(($beg-$start)/3), int(($end-$beg+1)/3));
+
+        } elsif (($prog eq 'BLASTX') and ($seqType eq 'query'))
+        {
+            $seq = substr($self->seq_str('match'),
+                          int(($beg-$start)/3), int(($end-$beg+1)/3));
+        } else {
+            $seq = substr($self->seq_str('match'),
+                          $beg-$start, ($end-$beg));
+        }
+        ## ML: End of fix for  substr out of range error -----------------
+
+
+        ## ML: debugging code
+        ## This is where we get our exception.  Try printing out the values going
+        ## into this:
+        ##
+#         print STDERR
+#             qq(*------------MY EXCEPTION --------------------\nSeq: ") ,
+#             $self->seq_str("$seqType"), qq("\n),$self->rank,",(  index:";
+#         print STDERR  $beg-$start, ", len: ", $end-$beg," ), (HSPRealLen:",
+#             CORE::length $self->seq_str("$seqType");
+#         print STDERR ", HSPCalcLen: ", $stop - $start +1 ," ),
+#             ( beg: $beg, end: $end ), ( start: $start, stop: stop )\n";
+         ## ML: END DEBUGGING CODE----------
+
+        if(!CORE::length $seq) {
+            my $id_str = $self->_id_str;
+            $self->throw("Undefined $seqType sub-sequence ($beg,$end). Valid range = $start - $stop ($id_str)");
+        }
+        ## Get data for a substring.
+#        printf "Collecting HSP subsection data: beg,end = %d,%d; start,stop = %d,%d\n%s<---\n", $beg, $end, $start, $stop, $seq;
+#        printf "Original match seq:\n%s\n",$self->seq_str('match');
+        $seq =~ s/ //g;  # remove space (no info).
+        my $len_cons = CORE::length $seq;
+        $seq =~ s/\+//g;  # remove '+' characters (conservative substitutions)
+        my $len_id = CORE::length $seq;
+        push @data, ($len_id, $len_cons);
+#        printf "  HSP = %s\n  id = %d; cons = %d\n", $self->rank, $len_id, $len_cons; <STDIN>;
+    }
+    @data;
+}
+
+
+=head2 num_identical
+
+ Usage     : $hsp_object->num_identical();
+ Purpose   : Get the number of identical positions within the given HSP.
+ Example   : $num_iden = $hsp_object->num_identical();
+ Returns   : integer
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<num_conserved()|num_conserved>, L<frac_identical()|frac_identical>
+
+=cut
+
+#-------------------
+sub num_identical {
+#-------------------
+    my( $self) = shift;
+
+    $self->{'_numIdentical'};
+}
+
+
+=head2 num_conserved
+
+ Usage     : $hsp_object->num_conserved();
+ Purpose   : Get the number of conserved positions within the given HSP.
+ Example   : $num_iden = $hsp_object->num_conserved();
+ Returns   : integer
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<num_identical()|num_identical>, L<frac_conserved()|frac_conserved>
+
+=cut
+
+#-------------------
+sub num_conserved {
+#-------------------
+    my( $self) = shift;
+
+    $self->{'_numConserved'};
+}
+
+
+
+=head2 range
+
+ Usage     : $hsp->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($query_beg, $query_end) = $hsp->range('query');
+           : ($hit_beg, $hit_end) = $hsp->range('hit');
+ Returns   : Two-element array of integers
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<end()|end>
+
+=cut
+
+#----------
+sub range {
+#----------
+    my ($self, $seqType) = @_;
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+
+    return ($self->{$seqType.'Start'},$self->{$seqType.'Stop'});
+}
+
+=head2 start
+
+ Usage     : $hsp->start( [seq_type] );
+ Purpose   : Gets the start coordinate for the query, sbjct, or both sequences
+           : in the HSP alignment.
+           : NOTE: Start will always be less than end.
+           : To determine strand, use $hsp->strand()
+ Example   : $query_beg = $hsp->start('query');
+           : $hit_beg = $hsp->start('hit');
+           : ($query_beg, $hit_beg) = $hsp->start();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default= 'query')
+           :  ('sbjct' is synonymous with 'hit')
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<end()|end>, L<range()|range>
+
+=cut
+
+#----------
+sub start {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /list|array/i) {
+        return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
+    } else {
+        ## Sensitive to member name changes.
+        $seqType = "_\L$seqType\E";
+        return $self->{$seqType.'Start'};
+    }
+}
+
+=head2 end
+
+ Usage     : $hsp->end( [seq_type] );
+ Purpose   : Gets the end coordinate for the query, sbjct, or both sequences
+           : in the HSP alignment.
+           : NOTE: Start will always be less than end.
+           : To determine strand, use $hsp->strand()
+ Example   : $query_end = $hsp->end('query');
+           : $hit_end = $hsp->end('hit');
+           : ($query_end, $hit_end) = $hsp->end();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default= 'query')
+           :  ('sbjct' is synonymous with 'hit')
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<range()|range>, L<strand()|strand>
+
+=cut
+
+#----------
+sub end {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /list|array/i) {
+        return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
+    } else {
+        ## Sensitive to member name changes.
+        $seqType = "_\L$seqType\E";
+        return $self->{$seqType.'Stop'};
+    }
+}
+
+
+
+=head2 strand
+
+ Usage     : $hsp_object->strand( [seq_type] )
+ Purpose   : Get the strand of the query or sbjct sequence.
+ Example   : print $hsp->strand('query');
+           : ($query_strand, $hit_strand) = $hsp->strand();
+ Returns   : -1, 0, or 1
+           : -1 = Minus strand, +1 = Plus strand
+           : Returns 0 if strand is not defined, which occurs
+           : for BLASTP reports, and the query of TBLASTN
+           : as well as the hit if BLASTX reports.
+           : In scalar context without arguments, returns queryStrand value.
+           : In array context without arguments, returns a two-element list
+           :    of strings (queryStrand, sbjctStrand).
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : seq_type: 'query' or 'hit' or 'sbjct' or undef
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<_set_seq()>, L<_set_match_stats()>
+
+=cut
+
+#-----------
+sub strand {
+#-----------
+    my( $self, $seqType ) = @_;
+
+    $seqType  ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = "_\L$seqType\E";
+
+    # $seqType could be '_list'.
+    $self->{'_queryStrand'} or $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    my $prog = $self->algorithm;
+
+    if($seqType  =~ /list|array/i) {
+        my ($qstr, $hstr);
+        if( $prog eq 'BLASTP') {
+            $qstr = 0;
+            $hstr = 0;
+        }
+        elsif( $prog eq 'TBLASTN') {
+            $qstr = 0;
+            $hstr = $STRAND_SYMBOL{$self->{'_sbjctStrand'}};
+        }
+        elsif( $prog eq 'BLASTX') {
+            $qstr = $STRAND_SYMBOL{$self->{'_queryStrand'}};
+            $hstr = 0;
+        }
+        else {
+            $qstr = $STRAND_SYMBOL{$self->{'_queryStrand'}} if defined $self->{'_queryStrand'};
+            $hstr = $STRAND_SYMBOL{$self->{'_sbjctStrand'}} if defined $self->{'_sbjctStrand'};
+        }
+        $qstr ||= 0;
+        $hstr ||= 0;
+        return ($qstr, $hstr);
+    }
+    local $^W = 0;
+    $STRAND_SYMBOL{$self->{$seqType.'Strand'}} || 0;
+}
+
+
+=head2 seq
+
+ Usage     : $hsp->seq( [seq_type] );
+ Purpose   : Get the query or sbjct sequence as a Bio::Seq.pm object.
+ Example   : $seqObj = $hsp->seq('query');
+ Returns   : Object reference for a Bio::Seq.pm object.
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query').
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : Propagates any exception that occurs during construction
+           : of the Bio::Seq.pm object.
+ Comments  : The sequence is returned in an array of strings corresponding
+           : to the strings in the original format of the Blast alignment.
+           : (i.e., same spacing).
+
+See Also   : L<seq_str()|seq_str>, L<seq_inds()|seq_inds>, L<Bio::Seq>
+
+=cut
+
+#-------
+sub seq {
+#-------
+    my($self,$seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    my $str = $self->seq_str($seqType);
+
+    require Bio::Seq;
+
+    new Bio::Seq (-ID   => $self->to_string,
+                  -SEQ  => $str,
+                  -DESC => "$seqType sequence",
+                  );
+}
+
+=head2 seq_str
+
+ Usage     : $hsp->seq_str( seq_type );
+ Purpose   : Get the full query, sbjct, or 'match' sequence as a string.
+           : The 'match' sequence is the string of symbols in between the
+           : query and sbjct sequences.
+ Example   : $str = $hsp->seq_str('query');
+ Returns   : String
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
+           :  ('sbjct' is synonymous with 'hit')
+ Throws    : Exception if the argument does not match an accepted seq_type.
+ Comments  : Calls _set_seq_data() to set the 'match' sequence if it has
+           : not been set already.
+
+See Also   : L<seq()|seq>, L<seq_inds()|seq_inds>, L<_set_match_seq()>
+
+=cut
+
+#------------
+sub seq_str {
+#------------
+    my($self,$seqType) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    ## Sensitive to member name changes.
+    $seqType = "_\L$seqType\E";
+
+    $self->_set_seq_data() unless $self->{'_set_seq_data'};
+
+    if($seqType =~ /sbjct|query/) {
+        my $seq = join('',@{$self->{$seqType.'Seq'}});
+        $seq =~ s/\s+//g;
+        return $seq;
+
+    } elsif( $seqType =~ /match/i) {
+        # Only need to call _set_match_seq() if the match seq is requested.
+        my $aref = $self->_set_match_seq() unless ref $self->{'_matchSeq'};
+        $aref =  $self->{'_matchSeq'};
+
+        return join('',@$aref);
+
+    } else {
+        my $id_str = $self->_id_str;
+        $self->throw(-class => 'Bio::Root::BadParameter',
+                     -text => "Invalid or undefined sequence type: $seqType ($id_str)\n" .
+                               "Valid types: query, sbjct, match",
+                     -value => $seqType);
+    }
+}
+
+=head2 seq_inds
+
+ Usage     : $hsp->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) for all identical
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+           :  ('sbjct' is synonymous with 'hit')
+           : class     = 'identical' or 'conserved' (default = identical)
+           :              (can be shortened to 'id' or 'cons')
+           :              (actually, anything not 'id' will evaluate to 'conserved').
+           : collapse  = boolean, if true, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
+           :             collapses to "1-5 7 9-11". This is useful for
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : Calls _set_residues() to set the 'match' sequence if it has
+           : not been set already.
+
+See Also   : L<seq()|seq>, L<_set_residues()>, L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::BlastHit::seq_inds()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+#---------------
+sub seq_inds {
+#---------------
+    my ($self, $seqType, $class, $collapse) = @_;
+
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $self->_set_residues() unless defined $self->{'_identicalRes_query'};
+
+    $seqType  = ($seqType !~ /^q/i ? 'sbjct' : 'query');
+    $class = ($class !~ /^id/i ? 'conserved' : 'identical');
+
+    ## Sensitive to member name changes.
+    $seqType  = "_\L$seqType\E";
+    $class = "_\L$class\E";
+
+    my @ary = sort { $a <=> $b } keys %{ $self->{"${class}Res$seqType"}};
+
+    require Bio::Search::BlastUtils if $collapse;
+
+    return $collapse ? &Bio::Search::BlastUtils::collapse_nums(@ary) : @ary;
+}
+
+
+=head2 get_aln
+
+ Usage     : $hsp->get_aln()
+ Purpose   : Get a Bio::SimpleAlign object constructed from the query + sbjct
+           : sequences of the present HSP object.
+ Example   : $aln_obj = $hsp->get_aln();
+ Returns   : Object reference for a Bio::SimpleAlign.pm object.
+ Argument  : n/a.
+ Throws    : Propagates any exception ocurring during the construction of
+           : the Bio::SimpleAlign object.
+ Comments  : Requires Bio::SimpleAlign.
+           : The Bio::SimpleAlign object is constructed from the query + sbjct
+           : sequence objects obtained by calling seq().
+           : Gap residues are included (see $GAP_SYMBOL).
+
+See Also   : L<seq()|seq>, L<Bio::SimpleAlign>
+
+=cut
+
+#------------
+sub get_aln {
+#------------
+    my $self = shift;
+
+    require Bio::SimpleAlign;
+    require Bio::LocatableSeq;
+    my $qseq = $self->seq('query');
+    my $sseq = $self->seq('sbjct');
+
+    my $type = $self->algorithm =~ /P$|^T/ ? 'amino' : 'dna';
+    my $aln = new Bio::SimpleAlign();
+    $aln->add_seq(new Bio::LocatableSeq(-seq => $qseq->seq(),
+                                        -id  => 'query_'.$qseq->display_id(),
+                                        -start => 1,
+                                        -end   => CORE::length($qseq)));
+
+    $aln->add_seq(new Bio::LocatableSeq(-seq => $sseq->seq(),
+                                        -id  => 'hit_'.$sseq->display_id(),
+                                        -start => 1,
+                                        -end   => CORE::length($sseq)));
+
+    return $aln;
+}
+
+
+1;
+__END__
+
+
+=head1 FOR DEVELOPERS ONLY
+
+=head2 Data Members
+
+Information about the various data members of this module is provided for those
+wishing to modify or understand the code. Two things to bear in mind:
+
+=over 4
+
+=item 1 Do NOT rely on these in any code outside of this module.
+
+All data members are prefixed with an underscore to signify that they are private.
+Always use accessor methods. If the accessor doesn't exist or is inadequate,
+create or modify an accessor (and let me know, too!).
+
+=item 2 This documentation may be incomplete and out of date.
+
+It is easy for these data member descriptions to become obsolete as
+this module is still evolving. Always double check this info and search
+for members not described here.
+
+=back
+
+An instance of Bio::Search::HSP::PsiBlastHSP.pm is a blessed reference to a hash containing
+all or some of the following fields:
+
+ FIELD           VALUE
+ --------------------------------------------------------------
+ (member names are mostly self-explanatory)
+
+ _score              :
+ _bits               :
+ _p                  :
+ _n                  : Integer. The 'N' value listed in parenthesis with P/Expect value:
+                     : e.g., P(3) = 1.2e-30  ---> (N = 3).
+                     : Not defined in NCBI Blast2 with gaps.
+                     : To obtain the number of HSPs, use Bio::Search::Hit::BlastHit::num_hsps().
+ _expect             :
+ _queryLength        :
+ _queryGaps          :
+ _queryStart         :
+ _queryStop          :
+ _querySeq           :
+ _sbjctLength        :
+ _sbjctGaps          :
+ _sbjctStart         :
+ _sbjctStop          :
+ _sbjctSeq           :
+ _matchSeq           : String. Contains the symbols between the query and sbjct lines
+                       which indicate identical (letter) and conserved ('+') matches
+                       or a mismatch (' ').
+ _numIdentical       :
+ _numConserved       :
+ _identicalRes_query :
+ _identicalRes_sbjct :
+ _conservedRes_query :
+ _conservedRes_sbjct :
+ _match_indent       : The number of leading space characters on each line containing
+                       the match symbols. _match_indent is 13 in this example:
+                         Query:   285 QNSAPWGLARISHRERLNLGSFNKYLYDDDAG
+                                      Q +APWGLARIS       G+ + Y YD+ AG
+                         ^^^^^^^^^^^^^
+
+
+=cut
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PullHSPI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PullHSPI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PullHSPI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,933 @@
+# $Id: PullHSPI.pm,v 1.1.2.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::PullHSPI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::PullHSPI - Bio::Search::HSP::HSPI interface for pull parsers.
+
+=head1 SYNOPSIS
+
+	# This is an interface and cannot be instantiated
+
+    # generally we use Bio::SearchIO to build these objects
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer_pull',
+							   -file   => 'result.hmmer');
+
+    while (my $result = $in->next_result) {
+		while (my $hit = $result->next_hit) {
+			while (my $hsp = $hit->next_hsp) {
+                $r_type = $hsp->algorithm;
+                $pvalue = $hsp->p();
+                $evalue = $hsp->evalue();
+                $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+                $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+                $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+                $qseq = $hsp->query_string;
+                $hseq = $hsp->hit_string;
+                $homo_string = $hsp->homology_string;
+                $len = $hsp->length( ['query'|'hit'|'total'] );
+                $len = $hsp->length( ['query'|'hit'|'total'] );
+                $rank = $hsp->rank;
+            }
+
+=head1 DESCRIPTION
+
+PullHSP is for fast implementations that only do parsing work on the hsp
+data when you actually request information by calling one of the HSPI
+methods.
+
+Many methods of HSPI are implemented in a way suitable for inheriting classes
+that use Bio::PullParserI. It only really makes sense for PullHSP modules to be
+created by (and have as a -parent) PullHit modules.
+
+In addition to the usual -chunk and -parent, -hsp_data is all you should supply
+when making a PullHSP object. This will store that data and make it accessible
+via _raw_hsp_data, which you can access in your subclass. It would be best to
+simply provide the data as the input -chunk instead, if the raw data is large
+enough.
+
+=head1 SEE ALSO
+
+This module inherits methods from these other modules:
+
+L<Bio::SeqFeatureI>,
+L<Bio::SeqFeature::FeaturePair>
+L<Bio::SeqFeature::SimilarityPair>
+
+Please refer to these modules for documentation of the 
+many additional inherited methods.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 COPYRIGHT
+
+Copyright (c) 2006 Sendu Bala. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::HSP::PullHSPI;
+
+
+use strict;
+
+use base qw(Bio::Search::HSP::HSPI Bio::PullParserI);
+
+=head2 _setup
+
+ Title   : _setup
+ Usage   : $self->_setup(@args)
+ Function: Implementers should call this to setup common fields and deal with
+           common arguments to new().
+ Returns : n/a
+ Args    : @args received in new().
+
+=cut
+
+sub _setup {
+    my ($self, @args) = @_;
+	
+	# fields most subclasses probably will want
+	$self->_fields( { ( hsp_length => undef,
+                        identical => undef,
+                        percent_identity => undef,
+                        conserved => undef,
+                        hsp_gaps => undef,
+                        query_gaps => undef,
+                        hit_gaps => undef,
+						evalue => undef,
+						pvalue => undef,
+						score => undef,
+						query_start => undef,
+						query_end => undef,
+						query_string => undef,
+						hit_start => undef,
+						hit_end => undef,
+						hit_string => undef,
+						homology_string => undef,
+						rank => undef,
+                        seq_inds => undef,
+                        hit_identical_inds => undef,
+                        hit_conserved_inds => undef,
+                        hit_nomatch_inds => undef,
+                        hit_gap_inds => undef,
+                        query_identical_inds => undef,
+                        query_conserved_inds => undef,
+                        query_nomatch_inds => undef,
+                        query_gap_inds => undef ) } );
+	
+	my ($parent, $chunk, $hsp_data) = $self->_rearrange([qw(PARENT
+														    CHUNK
+															HSP_DATA)], @args);
+	
+    $self->throw("Need -parent or -chunk to be defined") unless defined $parent || $chunk;
+    
+	$self->parent($parent) if $parent;
+    
+    if ($chunk) {
+        my ($io, $start, $end) = (undef, 0, undef);
+        if (ref($chunk) eq 'ARRAY') {
+            ($io, $start, $end) = @{$chunk};
+        }
+        else {
+            $io = $chunk;
+        }
+        $self->chunk($io, -start => $start, -end => $end);
+    }
+    
+	$self->_raw_hsp_data($hsp_data) if $hsp_data;
+	
+    return $self;
+}
+
+sub _raw_hsp_data {
+	my $self = shift;
+	if (@_) {
+		$self->{_raw_hsp_data} = shift;
+	}
+	return $self->{_raw_hsp_data};
+}
+
+#
+# Some of these methods are written explitely to avoid HSPI throwing not
+# implemented or the wrong ancestor class being used to answer the method;
+# if it didn't do that then PullParserI AUTOLOAD would have cought them.
+#
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the HSP
+ Returns : string (e.g., BLASTP)
+ Args    : none
+
+=cut
+
+sub algorithm {
+	return shift->get_field('algorithm');
+}
+
+=head2 pvalue
+
+ Title   : pvalue
+ Usage   : my $pvalue = $hsp->pvalue();
+ Function: Returns the P-value for this HSP or undef 
+ Returns : float or exponential (2e-10)
+ Args    : none
+
+=cut
+
+sub pvalue {
+	return shift->get_field('pvalue');
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : my $evalue = $hsp->evalue();
+ Function: Returns the e-value for this HSP
+ Returns : float or exponential (2e-10)
+ Args    : none
+
+=cut
+
+sub evalue {
+	return shift->get_field('evalue');
+}
+
+*expect = \&evalue;
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
+ Function: Returns the fraction of identitical positions for this HSP 
+ Returns : Float in range 0.0 -> 1.0
+ Args    : 'query' = num identical / length of query seq (without gaps)
+           'hit'   = num identical / length of hit seq (without gaps)
+           'total' = num identical / length of alignment (with gaps)
+           default = 'total' 
+
+=cut
+
+sub frac_identical {
+	my ($self, $type) = @_;
+	
+	$type = lc $type if defined $type;
+	$type = 'hit' if (defined $type && $type =~ /subject|sbjct/);
+	$type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
+	
+	my $ratio = $self->num_identical($type) / $self->length($type);
+    return sprintf( "%.3f", $ratio);
+}
+
+=head2 frac_conserved
+
+ Title    : frac_conserved
+ Usage    : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
+ Function : Returns the fraction of conserved positions for this HSP.
+            This is the fraction of symbols in the alignment with a 
+            positive score.
+ Returns : Float in range 0.0 -> 1.0
+ Args    : 'query' = num conserved / length of query seq (without gaps)
+           'hit'   = num conserved / length of hit seq (without gaps)
+           'total' = num conserved / length of alignment (with gaps)
+           default = 'total' 
+
+=cut
+
+sub frac_conserved {
+    my ($self, $type) = @_;
+	
+	$type = lc $type if defined $type;
+	$type = 'hit' if (defined $type && $type =~ /subject|sbjct/);
+	$type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
+	
+	my $ratio = $self->num_conserved($type) / $self->length($type);
+    return sprintf( "%.3f", $ratio);
+}
+
+=head2 num_identical
+
+ Title   : num_identical
+ Usage   : $obj->num_identical($newval)
+ Function: returns the number of identical residues in the alignment
+ Returns : integer
+ Args    : integer (optional)
+
+=cut
+
+sub num_identical {
+    my $self = shift;
+	return scalar($self->seq_inds('hit', 'identical'));
+}
+
+=head2 num_conserved
+
+ Title   : num_conserved
+ Usage   : $obj->num_conserved($newval)
+ Function: returns the number of conserved residues in the alignment
+ Returns : inetger
+ Args    : integer (optional)
+
+=cut
+
+sub num_conserved {
+    my $self = shift;
+	return scalar($self->seq_inds('hit', 'conserved-not-identical'));
+}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
+ Function : Get the number of gaps in the query, hit, or total alignment.
+ Returns  : Integer, number of gaps or 0 if none
+ Args     : 'query', 'hit' or 'total'; default = 'total' 
+
+=cut
+
+sub gaps {
+    my ($self, $type) = @_;
+    $type = lc $type if defined $type;
+    $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/); 
+    $type = 'hit' if $type =~ /sbjct|subject/;
+	
+	if ($type eq 'total') {
+		return scalar($self->seq_inds('hit', 'gap')) + scalar($self->seq_inds('query', 'gap'));
+	}
+	return scalar($self->seq_inds($type, 'gap'));
+}
+
+=head2 query_string
+
+ Title   : query_string
+ Usage   : my $qseq = $hsp->query_string;
+ Function: Retrieves the query sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+=cut
+
+sub query_string {
+	return shift->get_field('query_string');
+}
+
+=head2 hit_string
+
+ Title   : hit_string
+ Usage   : my $hseq = $hsp->hit_string;
+ Function: Retrieves the hit sequence of this HSP as a string
+ Returns : string
+ Args    : none
+
+=cut
+
+sub hit_string {
+	return shift->get_field('hit_string');
+}
+
+=head2 homology_string
+
+ Title   : homology_string
+ Usage   : my $homo_string = $hsp->homology_string;
+ Function: Retrieves the homology sequence for this HSP as a string.
+         : The homology sequence is the string of symbols in between the 
+         : query and hit sequences in the alignment indicating the degree
+         : of conservation (e.g., identical, similar, not similar).
+ Returns : string
+ Args    : none
+
+=cut
+
+sub homology_string {
+	return shift->get_field('homology_string');
+}
+
+=head2 length
+
+ Title    : length
+ Usage    : my $len = $hsp->length( ['query'|'hit'|'total'] );
+ Function : Returns the length of the query or hit in the alignment (without gaps) 
+            or the aggregate length of the HSP (including gaps;
+            this may be greater than either hit or query )
+ Returns  : integer
+ Args     : 'query' = length of query seq (without gaps)
+            'hit'   = length of hit seq (without gaps)
+            'total' = length of alignment (with gaps)
+            default = 'total' 
+ Args    : none
+
+=cut
+
+sub length {
+    my ($self, $type) = @_;
+    $type = 'total' unless defined $type;
+    $type = lc $type;
+
+    if ($type =~ /^q/i) {
+        return $self->query->length;
+    }
+	elsif ($type =~ /^(hit|subject|sbjct)/) {
+        return $self->hit->length;
+    }
+	else { 
+        return $self->hit->length + $self->gaps('hit');
+	}
+}
+
+=head2 hsp_length
+
+ Title   : hsp_length
+ Usage   : my $len = $hsp->hsp_length()
+ Function: shortcut  length('hsp')
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+=cut
+
+sub hsp_length {
+	return shift->length('total');
+}
+
+=head2 percent_identity
+
+ Title   : percent_identity
+ Usage   : my $percentid = $hsp->percent_identity()
+ Function: Returns the calculated percent identity for an HSP
+ Returns : floating point between 0 and 100 
+ Args    : none
+
+=cut
+
+sub percent_identity{
+	my ($self) = @_;
+	return $self->frac_identical('hsp') * 100;   
+}
+
+=head2 get_aln
+
+ Title   : get_aln
+ Usage   : my $aln = $hsp->get_aln
+ Function: Returns a Bio::SimpleAlign representing the HSP alignment
+ Returns : Bio::SimpleAlign
+ Args    : none
+
+=cut
+
+sub get_aln {
+	my $self = shift;
+	
+    require Bio::LocatableSeq;
+    require Bio::SimpleAlign;
+    my $aln = new Bio::SimpleAlign;
+    my $hs = $self->seq('hit');
+    my $qs = $self->seq('query');
+	if ($hs && $qs) {
+		$aln->add_seq($hs);
+		$aln->add_seq($qs);
+		return $aln;
+	}
+	return;
+}
+
+=head2 seq_inds
+
+ Title   : seq_inds
+ Purpose   : Get a list of residue positions (indices) for all identical 
+           : or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hsp->seq_inds('query', 'identical');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved');
+           : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
+ Returns   : List of integers 
+           : May include ranges if collapse is true.
+ Argument  : seq_type  = 'query' or 'hit' or 'sbjct'  (default = query)
+              ('sbjct' is synonymous with 'hit') 
+             class     = 'identical' or 'conserved' or 'nomatch' or 'gap'
+                          (default = identical)
+                          (can be shortened to 'id' or 'cons')
+		                  Note that 'conserved' includes identical unless you
+		                  use 'conserved-not-identical'
+
+             collapse  = boolean, if true, consecutive positions are merged
+                         using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+                         collapses to "1-5 7 9-11". This is useful for 
+                         consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : 
+
+See Also   : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub seq_inds {
+    my ($self, $seqType, $class, $collapse) = @_;
+    
+    $seqType ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+    $seqType = lc($seqType);
+    $class = lc($class);
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    my $t = substr($seqType,0,1);
+    if ($t eq 'q') {
+        $seqType = 'query';
+    }
+    elsif ($t eq 's' || $t eq 'h') {
+        $seqType = 'hit';
+    }
+    else { 
+        $self->warn("unknown seqtype $seqType using 'query'");
+        $seqType = 'query';
+    }
+    
+    $t = substr($class,0,1);
+    if ($t eq 'c') {
+        if ($class eq 'conserved-not-identical') {
+            $class = 'conserved';
+        }
+        else { 
+            $class = 'conservedall';
+        }
+    }
+    elsif ($t eq 'i') {
+        $class = 'identical';
+    }
+    elsif ($t eq 'n') {
+        $class = 'nomatch';
+    }
+    elsif ($t eq 'g') {
+        $class = 'gap';
+    }
+    else { 
+        $self->warn("unknown sequence class $class using 'identical'");
+        $class = 'identical';
+    }
+    
+    $seqType .= '_';
+    $class .= '_inds';
+    
+    my @ary;
+    if ($class eq 'conservedall_inds') {
+		my %tmp = map { $_, 1 } @{$self->get_field($seqType.'conserved_inds')},
+								@{$self->get_field($seqType.'identical_inds')};
+		@ary = sort {$a <=> $b} keys %tmp;
+    }
+    else { 
+        @ary = @{$self->get_field($seqType.$class)};
+    }
+    
+    return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary;
+}
+
+=head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
+
+These methods come from L<Bio::SeqFeature::SimilarityPair>
+
+=head2 query
+
+ Title   : query
+ Usage   : my $query = $hsp->query
+ Function: Returns a SeqFeature representing the query in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 hit
+
+ Title   : hit
+ Usage   : my $hit = $hsp->hit
+ Function: Returns a SeqFeature representing the hit in the HSP
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] new value to set
+
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair)
+ Returns : significance value (scientific notation string)
+ Args    : significance value (sci notation string)
+
+=cut
+
+sub significance {
+	return shift->get_field('evalue');
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : my $score = $hsp->score();
+ Function: Returns the score for this HSP or undef 
+ Returns : numeric           
+ Args    : [optional] numeric to set value
+
+=cut
+
+sub score {
+	return shift->get_field('score');
+}
+
+=head2 bits
+
+ Title   : bits
+ Usage   : my $bits = $hsp->bits();
+ Function: Returns the bit value for this HSP or undef 
+ Returns : numeric
+ Args    : none
+
+=cut
+
+sub bits {
+	return shift->get_field('bits');
+}
+
+# override 
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $hsp->strand('query')
+ Function: Retrieves the strand for the HSP component requested
+ Returns : +1 or -1 (0 if unknown)
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
+           'query' to retrieve the query strand (default)
+           'list' or 'array' to retreive both query and hit together
+
+=cut
+
+sub strand {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if ($val =~ /^q/i) {
+        return $self->query->strand(@_);
+    }
+    elsif ($val =~ /^hi|^s/i) {
+        return $self->hit->strand(@_);
+    }
+    elsif ($val =~ /^list|array/i) {
+        return ($self->query->strand(@_), $self->hit->strand(@_) );
+    }
+    else { 
+        $self->warn("unrecognized component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : $hsp->start('query')
+ Function: Retrieves the start for the HSP component requested
+ Returns : integer
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
+           'query' to retrieve the query start (default)
+
+=cut
+
+sub start {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if ($val =~ /^q/i) { 
+        return $self->query->start(@_);
+    }
+    elsif ($val =~ /^(hi|s)/i) {
+        return $self->hit->start(@_);
+    }
+    elsif ($val =~ /^list|array/i) {
+        return ($self->query->start(@_), $self->hit->start(@_) );
+    }
+    else { 
+        $self->warn("unrecognized component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $hsp->end('query')
+ Function: Retrieves the end for the HSP component requested
+ Returns : integer
+ Args    : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
+           'query' to retrieve the query end (default)
+
+=cut
+
+sub end {
+    my $self = shift;
+    my $val = shift;
+    $val = 'query' unless defined $val;
+    $val =~ s/^\s+//;
+
+    if ($val =~ /^q/i) { 
+        return $self->query->end(@_);
+    }
+    elsif ($val =~ /^(hi|s)/i) {
+        return $self->hit->end(@_);
+    }
+    elsif ($val =~ /^list|array/i) {
+        return ($self->query->end(@_), $self->hit->end(@_) );
+    }
+    else {
+        $self->warn("unrecognized end component '$val' requested\n");
+    }
+    return 0;
+}
+
+=head2 seq
+
+ Usage     : $hsp->seq( [seq_type] );
+ Purpose   : Get the query or sbjct sequence as a Bio::Seq.pm object.
+ Example   : $seqObj = $hsp->seq('query');
+ Returns   : Object reference for a Bio::LocatableSeq object.
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query').
+           : ('sbjct' is synonymous with 'hit') 
+           : default is 'query'
+
+=cut
+
+sub seq {
+    my ($self, $seqType) = @_; 
+    $seqType ||= 'query';
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    if ($seqType =~ /^(m|ho)/i ) {
+        $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
+    }
+	
+    my $str = $self->seq_str($seqType) || return;
+    require Bio::LocatableSeq;
+    my $id = $seqType =~ /^q/i ? $self->query->seq_id : $self->hit->seq_id;
+    return new Bio::LocatableSeq (  -ID    => $id,
+                                    -SEQ   => $str,
+                                    -START => $self->start($seqType),
+                                    -END   => $self->end($seqType),
+                                    -STRAND=> $self->strand($seqType),
+                                    -DESC  => "$seqType sequence " );
+}
+
+=head2 seq_str
+
+ Usage     : $hsp->seq_str( seq_type );
+ Purpose   : Get the full query, sbjct, or 'match' sequence as a string.
+           : The 'match' sequence is the string of symbols in between the 
+           : query and sbjct sequences.
+ Example   : $str = $hsp->seq_str('query');
+ Returns   : String
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
+           :  ('sbjct' is synonymous with 'hit')
+           : default is 'query'
+ Throws    : Exception if the argument does not match an accepted seq_type.
+ Comments  : 
+
+See Also   : L<seq()|seq>, L<seq_inds()|seq_inds>, B<_set_match_seq()>
+
+=cut
+
+sub seq_str {  
+    my $self = shift;
+    my $type = shift || 'query';
+
+    if ($type =~ /^q/i) {
+        return $self->query_string(@_);
+    }
+    elsif ($type =~ /^(s)|(hi)/i) {
+        return $self->hit_string(@_);
+    }
+    elsif ($type =~ /^(ho)|(ma)/i) {
+        return $self->homology_string(@_);
+    }
+    else { 
+        $self->warn("unknown sequence type $type");
+    }
+    return '';
+}
+
+=head2 rank
+
+ Usage     : $hsp->rank( [string] );
+ Purpose   : Get the rank of the HSP within a given Blast hit.
+ Example   : $rank = $hsp->rank;
+ Returns   : Integer (1..n) corresponding to the order in which the HSP
+             appears in the BLAST report.
+
+=cut
+
+sub rank {
+    return shift->get_field('rank');
+}
+
+=head2 matches
+
+ Usage     : $hsp->matches(-seq   => 'hit'|'query', 
+                           -start => $start, 
+                           -stop  => $stop);
+ Purpose   : Get the total number of identical and conservative matches 
+           : in the query or sbjct sequence for the given HSP. Optionally can
+           : report data within a defined interval along the seq.
+ Example   : ($id,$cons) = $hsp_object->matches(-seq   => 'hit');
+           : ($id,$cons) = $hsp_object->matches(-seq   => 'query',
+                                                -start => 300,
+                                                -stop  => 400);
+ Returns   : 2-element array of integers 
+ Argument  : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
+           :  ('sbjct' is synonymous with 'hit') 
+           : (2) start = Starting coordinate (optional)
+           : (3) stop  = Ending coordinate (optional)
+
+=cut
+
+sub matches {
+    my ($self, @args) = @_;
+    my($seqType, $beg, $end) = $self->_rearrange([qw(SEQ START STOP)], @args);
+    $seqType ||= 'query';
+    $seqType = 'hit' if $seqType eq 'sbjct';
+	
+    my @data;
+    if ((!defined $beg && !defined $end) || ! $self->seq_str('match')) {
+        push @data, ($self->num_identical, $self->num_conserved);
+    }
+	else {
+        $beg ||= 0;
+        $end ||= 0;
+        my ($start, $stop) = $self->range($seqType);
+		
+        if ($beg == 0) {
+			$beg = $start;
+			$end = $beg+$end;
+		}
+        elsif ($end == 0) {
+			$end = $stop;
+			$beg = $end-$beg;
+		}
+		
+        if ($end >= $stop) {
+			$end = $stop;
+		}
+        else {
+			$end += 1;
+		}
+        if ($beg < $start) {
+			$beg = $start;
+		}
+        
+        my $seq = substr($self->seq_str('homology'), $beg-$start, ($end-$beg));
+        
+        if (!CORE::length $seq) {
+            $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
+        }
+        ## Get data for a substring.
+        $seq =~ s/ //g;  # remove space (no info).
+        my $len_cons = CORE::length $seq;
+        $seq =~ s/\+//g;  # remove '+' characters (conservative substitutions)
+        my $len_id = CORE::length $seq;
+        push @data, ($len_id, $len_cons);
+    }
+	
+    return @data;
+}
+
+=head2 n
+
+ Usage     : $hsp_obj->n()
+ Purpose   : Get the N value (num HSPs on which P/Expect is based).
+ Returns   : Integer or null string if not defined.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : The 'N' value is listed in parenthesis with P/Expect value:
+           : e.g., P(3) = 1.2e-30  ---> (N = 3).
+           : Not defined in NCBI Blast2 with gaps.
+           : This typically is equal to the number of HSPs but not always.
+
+=cut
+
+sub n {
+    return shift->get_field('num_hsps');
+}
+
+=head2 range
+
+ Usage     : $hsp->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($query_beg, $query_end) = $hsp->range('query');
+           : ($hit_beg, $hit_end) = $hsp->range('hit');
+ Returns   : Two-element array of integers 
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :  ('sbjct' is synonymous with 'hit') 
+ Throws    : n/a
+ Comments  : This is a convenience method for constructions such as
+             ($hsp->query->start, $hsp->query->end)
+
+=cut
+
+sub range {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+	
+    my ($start, $end);
+    if ($seqType eq 'query') {
+        $start = $self->query->start;
+        $end = $self->query->end;
+    }
+    else {
+        $start = $self->hit->start;
+        $end = $self->hit->end;
+    }
+    return ($start, $end);
+}
+
+#*** would want cigar stuff from GenericHSP - move to HSPI?
+
+1;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/PullHSPI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/WABAHSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/WABAHSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/HSP/WABAHSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+# $Id: WABAHSP.pm,v 1.9.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::HSP::WABAHSP
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::HSP::WABAHSP - HSP object suitable for describing WABA alignments
+
+=head1 SYNOPSIS
+
+# use this object as you would a GenericHSP
+# a few other methods have been added including state
+
+=head1 DESCRIPTION
+
+This object implements a few of the extra methods such as
+hmmstate_string which returns the HMM state representation for the
+WABA alignment.  We also must implement a method to calculate
+homology_string since it is not returned by the algorithm in the
+machine readable format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Search::HSP::WABAHSP;
+use strict;
+use Bio::Root::RootI;
+
+use base qw(Bio::Search::HSP::GenericHSP);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::HSP::WABAHSP();
+ Function: Builds a new Bio::Search::HSP::WABAHSP object 
+ Returns : Bio::Search::HSP::WABAHSP
+ Args    : -hmmstate_seq => the string representing the state output from WABA
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  # gotta do some preprocessing before we send the arguments to the superclass
+  my ($len,$qs,$hs) = Bio::Root::RootI->_rearrange([qw(HSP_LENGTH
+						       QUERY_SEQ 
+						       HIT_SEQ)], at args);  
+  if( $len != length($qs) ) {
+    Bio::Root::RootI->warn("HSP_LENGTH must equal length of query_seq string, using value from QUERY_SEQ\n");
+      $len = length($qs);
+  }
+  my( $homol_seq,$gapct,$identical) = ('',0,0);
+  
+  for(my $i=0;$i<$len;$i++) {
+      my $q = substr($qs,$i,1);
+      my $h = substr($hs,$i,1);
+      if( $q eq '-' || $h eq '-' ) {
+	  $homol_seq .= ' ';
+	  $gapct ++;
+      } elsif( $q eq $h ) { 
+	  $homol_seq .= '|';
+	  $identical++;
+      } else { 
+	  $homol_seq .= ' ';
+      }
+  }
+  my $self = $class->SUPER::new('-conserved' => $identical,
+				'-identical' => $identical,
+				'-gaps'      => $gapct,
+				'-homology_seq' => $homol_seq,
+				@args);
+    
+  my ($hmmst) = $self->_rearrange([qw(HMMSTATE_SEQ)], at args);
+  defined $hmmst && $self->hmmstate_string($hmmst);
+  
+  $self->add_tag_value('Target' , join(" ","Sequence:".$self->hit->seq_id, 
+				       $self->hit->start, $self->hit->end));
+
+  return $self;
+}
+
+=head2 hmmstate_string
+
+ Title   : hmmstate_string
+ Usage   : my $hmmseq = $wabahsp->hmmstate_string();
+ Function: Get/Set the WABA HMM stateseq
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub hmmstate_string{
+   my ($self,$val) = @_;
+   if( defined $val ) { 
+       $self->{'_hmmstate_string'} = $val;
+   }
+   return $self->{'_hmmstate_string'};
+}
+
+=head2 homology_string
+
+ Title   : homolgy_string
+ Usage   : my $homology_str = $hsp->homology_string();
+ Function: Homology string must be calculated for a WABA HSP so we can do
+           so here and cache the result so it is only done once
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub homology_string{
+   my ($self) = @_;
+   return '';
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/BlastHit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/BlastHit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/BlastHit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+# $Id: BlastHit.pm,v 1.17.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::GenericHit
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::BlastHit - Blast-specific subclass of Bio::Search::Hit::GenericHit
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Hit::BlastHit;
+    my $hit = new Bio::Search::Hit::BlastHit(-algorithm => 'blastp');
+
+# See Bio::Search::Hit::GenericHit for information about working with Hits.
+
+# TODO: Describe how to configure a SearchIO stream so that it generates
+#       GenericHit objects.
+
+=head1 DESCRIPTION
+
+This object is a subclass of Bio::Search::Hit::GenericHit
+and provides some operations that facilitate working with BLAST
+and PSI-BLAST Hits.
+
+For general information about working with Hits, see 
+Bio::Search::Hit::GenericHit.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason at bioperl.org
+Email sac at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Hit::BlastHit;
+use strict;
+
+use Bio::Search::SearchUtils;
+
+use base qw(Bio::Search::Hit::GenericHit);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Hit::GenericHit();
+ Function: Builds a new Bio::Search::Hit::GenericHit object 
+ Returns : Bio::Search::Hit::GenericHit
+ Args    : See Bio::Search::Hit::GenericHit() for other args.
+           Here are the BLAST-specific args that can be used when
+           creating BlastHit objects:
+           -iteration    => integer for the PSI-Blast iteration number
+           -found_again  => boolean, true if hit appears in a 
+                            "previously found" section of a PSI-Blast report.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($iter,$found) = $self->_rearrange([qw(ITERATION 
+                                            FOUND_AGAIN
+                                           )], @args);
+
+  defined $iter   && $self->iteration($iter);
+  defined $found  && $self->found_again($found);
+
+  return $self;
+}
+
+=head2 iteration
+
+ Usage     : $hit->iteration( $iteration_num );
+ Purpose   : Gets the iteration number in which the Hit was found.
+ Example   : $iteration_num = $sbjct->iteration();
+ Returns   : Integer greater than or equal to 1
+             Non-PSI-BLAST reports will report iteration as 1, but this number
+             is only meaningful for PSI-BLAST reports.
+ Argument  : iteration_num (optional, used when setting only)
+ Throws    : none
+
+See Also   : L<found_again()|found_again>
+
+=cut
+
+sub iteration{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_psiblast_iteration'} = $value;
+    }
+    return $self->{'_psiblast_iteration'};
+}
+
+=head2 found_again
+
+ Title     : found_again
+ Usage     : $hit->found_again;
+             $hit->found_again(1);
+ Purpose   : Gets a boolean indicator whether or not the hit has
+             been found in a previous iteration.
+             This is only applicable to PSI-BLAST reports.
+
+              This method indicates if the hit was reported in the 
+              "Sequences used in model and found again" section of the
+              PSI-BLAST report or if it was reported in the
+              "Sequences not found previously or not previously below threshold"
+              section of the PSI-BLAST report. Only for hits in iteration > 1.
+
+ Example   : if( $hit->found_again()) { ... };
+ Returns   : Boolean, true (1) if the hit has been found in a 
+             previous PSI-BLAST iteration.
+             Returns false (0 or undef) for hits that have not occurred in a
+             previous PSI-BLAST iteration.
+ Argument  : Boolean (1 or 0). Only used for setting.
+ Throws    : none
+
+See Also   : L<iteration()|iteration>
+
+=cut
+
+sub found_again {
+   my $self = shift;
+   return $self->{'_found_again'} = shift if @_;
+   return $self->{'_found_again'};
+}
+
+
+sub expect { shift->significance(@_) }
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/Fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/Fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/Fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,123 @@
+# $Id: Fasta.pm,v 1.10.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::Fasta
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::Fasta - Hit object specific for Fasta-generated hits
+
+=head1 SYNOPSIS
+
+  # You wouldn't normally create these manually; 
+  # instead they would be produced by Bio::SearchIO::fasta
+
+  use Bio::Search::Hit::Fasta;
+  my $hit = Bio::Search::Hit::Fasta->new(id=>'LBL_6321', desc=>'lipoprotein', e_val=>0.01);
+
+=head1 DESCRIPTION
+
+L<Bio::Search::Hit::HitI> objects are data structures that contain information
+about specific hits obtained during a library search.  Some information will
+be algorithm-specific, but others will be generally defined, such as the
+ability to obtain alignment objects corresponding to each hit.
+
+=head1 SEE ALSO
+
+L<Bio::Search::Hit::HitI>,
+L<Bio::Search::Hit::GenericHit>,
+L<Bio::SearchIO::fasta>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey-at-virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::Search::Hit::Fasta;
+
+use vars qw($AUTOLOAD);
+use strict;
+
+use base qw(Bio::Search::Hit::HitI);
+
+my @AUTOLOAD_OK = qw(_ID _DESC _SIZE _INITN _INIT1 _OPT _ZSC _E_VAL);
+
+my %AUTOLOAD_OK = ();
+ at AUTOLOAD_OK{@AUTOLOAD_OK} = (1) x @AUTOLOAD_OK;
+
+=head2 _initialize
+
+ Function: where the heavy stuff will happen when new is called
+
+=cut
+
+sub _initialize {
+    my($self, %args) = @_;
+
+    my $make = $self->SUPER::_initialize(%args);
+
+    while (my ($key, $val) = each %args) {
+	$key = '_' . uc($key);
+	$self->$key($val);
+    }
+
+    return $make; # success - we hope!
+}
+
+=head2 AUTOLOAD
+
+ Function: Provide getter/setters for ID,DESC,SIZE,INITN,INIT1,OPT,ZSC,E_VAL
+
+=cut
+
+sub AUTOLOAD {
+    my ($self, $val) = @_;
+
+    $AUTOLOAD =~ s/.*:://;
+
+    if ( $AUTOLOAD_OK{$AUTOLOAD} ) {
+        $self->{$AUTOLOAD} = $val if defined $val;
+        return $self->{$AUTOLOAD};
+    } else {
+        $self->throw("Unallowed accessor: $AUTOLOAD !");
+    }
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/GenericHit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/GenericHit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/GenericHit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1620 @@
+# $Id: GenericHit.pm,v 1.37.4.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::GenericHit
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Hit::GenericHit;
+    my $hit = new Bio::Search::Hit::GenericHit(-algorithm => 'blastp');
+
+    # typically one gets HitI objects from a SearchIO stream via a ResultI
+    use Bio::SearchIO;
+    my $parser = new Bio::SearchIO(-format => 'blast', -file => 'result.bls');
+
+    my $result = $parser->next_result;
+    my $hit    = $result->next_hit;
+
+# TODO: Describe how to configure a SearchIO stream so that it generates
+#       GenericHit objects.
+
+=head1 DESCRIPTION
+
+This object handles the hit data from a Database Sequence Search such
+as FASTA or BLAST.
+
+Unless you're writing a parser, you won't ever need to create a
+GenericHit or any other HitI-implementing object. If you use
+the SearchIO system, HitI objects are created automatically from
+a SearchIO stream which returns Bio::Search::Hit::HitI objects.
+
+For documentation on what you can do with GenericHit (and other HitI
+objects), please see the API documentation in
+L<Bio::Search::Hit::HitI|Bio::Search::Hit::HitI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason-at-bioperl-dot-org
+Email sac-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Hit::GenericHit;
+use strict;
+
+use Bio::Search::SearchUtils;
+
+use base qw(Bio::Root::Root Bio::Search::Hit::HitI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Hit::GenericHit();
+ Function: Builds a new Bio::Search::Hit::GenericHit object 
+ Returns : Bio::Search::Hit::GenericHit
+ Args    : -name         => Name of Hit (required)
+           -description  => Description (optional)
+           -accession    => Accession number (optional)
+           -length       => Length of the Hit (optional)
+           -score        => Raw Score for the Hit (optional)
+           -bits         => Bit Score for the Hit (optional)
+           -significance => Significance value for the Hit (optional)
+           -algorithm    => Algorithm used (BLASTP, FASTX, etc...)
+           -hsps         => Array ref of HSPs for this Hit. 
+           -found_again  => boolean, true if hit appears in a 
+                            "previously found" section of a PSI-Blast report.
+           -hsp_factory  => Bio::Factory::ObjectFactoryI able to create HSPI
+                            objects.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
+      $score,$algo,$signif,$bits,
+      $rank, $hsp_factory) = $self->_rearrange([qw(HSPS
+                                     NAME 
+                                     QUERY_LEN
+                                     DESCRIPTION
+                                     ACCESSION
+                                     LOCUS
+                                     LENGTH SCORE ALGORITHM 
+                                     SIGNIFICANCE BITS
+                                     RANK
+                                     HSP_FACTORY)], @args);
+  
+  defined $query_len && $self->query_length($query_len);
+
+  if( ! defined $name ) { 
+      $self->throw("Must have defined a valid name for Hit");
+  } else { 
+      $self->name($name);
+  }  
+
+  defined $acc         && $self->accession($acc);
+  defined $locus       && $self->locus($locus);
+  defined $desc        && $self->description($desc);
+  defined $length      && $self->length($length);
+  defined $algo        && $self->algorithm($algo);
+  defined $signif      && $self->significance($signif);
+  defined $score       && $self->raw_score($score);
+  defined $bits        && $self->bits($bits);
+  defined $rank        && $self->rank($rank);
+  defined $hsp_factory && $self->hsp_factory($hsp_factory);
+
+  $self->{'_iterator'} = 0;
+  if( defined $hsps  ) {
+      if( ref($hsps) !~ /array/i ) {
+          $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
+      } else {
+          my $hspcount=0;
+          while( @{$hsps} ) { 
+              $hspcount++;
+              $self->add_hsp(shift @{$hsps} );
+          }
+          $self->{'_hsps'} = undef if $hspcount == 0;
+      }
+  } 
+  else {
+      $self->{'_hsps'} = undef;
+  }
+
+  return $self;
+}
+
+=head2 add_hsp
+
+ Title   : add_hsp
+ Usage   : $hit->add_hsp($hsp)
+ Function: Add a HSP to the collection of HSPs for a Hit
+ Returns : number of HSPs in the Hit
+ Args    : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
+           for creating a HSPI object (&hsp_factory must be set to get it back)
+
+=cut
+
+sub add_hsp {
+   my ($self,$hsp) = @_;
+   if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) { 
+       $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
+       return;
+   }
+   
+   push @{$self->{'_hsps'}}, $hsp;
+   if (ref($hsp) eq 'HASH') {
+       $self->{_hashes}->{$#{$self->{'_hsps'}}} = 1;
+   }
+   return scalar @{$self->{'_hsps'}};
+}
+
+=head2 hsp_factory
+
+ Title   : hsp_factory
+ Usage   : $hit->hsp_factory($hsp_factory)
+ Function: Get/set the factory used to build HSPI objects if necessary.
+ Returns : Bio::Factory::ObjectFactoryI
+ Args    : Bio::Factory::ObjectFactoryI
+
+=cut
+
+sub hsp_factory {
+    my $self = shift;
+    if (@_) { $self->{_hsp_factory} = shift }
+    return $self->{_hsp_factory} || return;
+}
+
+=head2 Bio::Search::Hit::HitI methods
+
+Implementation of Bio::Search::Hit::HitI methods
+
+=head2 name
+
+ Title   : name
+ Usage   : $hit_name = $hit->name();
+ Function: returns the name of the Hit sequence
+ Returns : a scalar string
+ Args    : [optional] scalar string to set the name
+
+=cut
+
+sub name {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_name'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_name'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $acc = $hit->accession();
+ Function: Retrieve the accession (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub accession {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_accession'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_accession'} = $value;
+    } 
+        return $previous;
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $desc = $hit->description();
+ Function: Retrieve the description for the hit
+ Returns : a scalar string
+ Args    : [optional] scalar string to set the descrition
+
+=cut
+
+sub description {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_description'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_description'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $len = $hit->length
+ Function: Returns the length of the hit 
+ Returns : integer
+ Args    : [optional] integer to set the length
+
+=cut
+
+sub length {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_length'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = 0 unless defined $value;
+        $self->{'_length'} = $value;
+    } 
+    return $previous;
+}
+
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hit->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hit
+           For BLAST, the algorithm denotes what type of sequence was aligned 
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated 
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated 
+           dna-translated dna).
+ Returns : a scalar string 
+ Args    : [optional] scalar string to set the algorithm
+
+=cut
+
+sub algorithm {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_algorithm'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 raw_score
+
+ Title   : raw_score
+ Usage   : $score = $hit->raw_score();
+ Function: Gets the "raw score" generated by the algorithm.  What
+           this score is exactly will vary from algorithm to algorithm,
+           returning undef if unavailable.
+ Returns : a scalar value
+ Args    : [optional] scalar value to set the raw score
+
+=cut
+
+sub raw_score {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_score'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_score'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 score
+
+Equivalent to L<raw_score()|raw_score>
+
+=cut
+
+sub score { shift->raw_score(@_); }
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $significance = $hit->significance();
+ Function: Used to obtain the E or P value of a hit, i.e. the probability that
+           this particular hit was obtained purely by random chance.  If
+           information is not available (nor calculatable from other
+           information sources), return undef.
+ Returns : a scalar value or undef if unavailable
+ Args    : [optional] scalar value to set the significance
+
+=cut
+
+sub significance {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_significance'};
+    if( defined $value ) { 
+        $self->{'_significance'} = $value;
+    } elsif ( ! defined $previous ) {
+	unless( defined $self->{'_hsps'}->[0] ) {
+	    $self->warn("No HSPs for this Hit (".$self->name.")");
+	    return;
+	}
+        # Set the significance of the Hit to that of the top HSP.
+        $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
+    }
+
+    return $previous;
+}
+
+=head2 bits
+
+ Usage     : $hit_object->bits();
+ Purpose   : Gets the bit score of the best HSP for the current hit.
+ Example   : $bits = $hit_object->bits();
+ Returns   : Integer or undef if bit score is not set
+ Argument  : n/a
+ Comments  : For BLAST1, the non-bit score is listed in the summary line.
+
+See Also   : L<score()|score>
+
+=cut
+
+sub bits {
+    my ($self,$value) = @_; 
+    my $previous = $self->{'_bits'};
+    if( defined $value ) { 
+        $self->{'_bits'} = $value;
+    } elsif ( ! defined $previous ) {
+        # Set the bits of the Hit to that of the top HSP.
+	unless( defined $self->{'_hsps'}->[0] ) {
+	    $self->warn("No HSPs for this Hit (".$self->name.")");
+	    return;
+	}
+        $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
+    }    
+    return $previous;
+}
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : while( $hsp = $obj->next_hsp()) { ... }
+ Function : Returns the next available High Scoring Pair
+ Example  : 
+ Returns  : Bio::Search::HSP::HSPI object or null if finished
+ Args     : none
+
+=cut
+
+sub next_hsp {
+    my $self = shift;
+    $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
+    return unless
+        defined($self->{'_hsps'}) 
+        && $self->{'_iterator'} <= scalar @{$self->{'_hsps'}};
+    
+    my $iterator = $self->{'_iterator'}++;
+    my $hsp = $self->{'_hsps'}->[$iterator] || return;
+    if (ref($hsp) eq 'HASH') {
+        my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
+        $hsp = $factory->create_object(%{$hsp});
+        $self->{'_hsps'}->[$iterator] = $hsp;
+        delete $self->{_hashes}->{$iterator};
+    }
+    return $hsp;  
+}
+
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+           : Get the numbers of HSPs for the current hit.
+ Example   : @hsps = $hit_object->hsps();
+           : $num  = $hit_object->hsps();  # alternatively, use num_hsps()
+ Returns   : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
+           : Scalar context: integer (number of HSPs).
+           :                 (Equivalent to num_hsps()).
+ Argument  : n/a. Relies on wantarray
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsp()|hsp>, L<num_hsps()|num_hsps>
+
+=cut
+
+sub hsps {
+    my $self = shift;
+    foreach my $i (keys %{$self->{_hashes} || {}}) {
+        my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
+        $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
+        delete $self->{_hashes}->{$i};
+    }
+    
+    return wantarray() ? @{$self->{'_hsps'} || []} : scalar(@{$self->{'_hsps'} || []});
+}
+
+=head2 num_hsps
+
+ Usage     : $hit_object->num_hsps();
+ Purpose   : Get the number of HSPs for the present hit.
+ Example   : $nhsps = $hit_object->num_hsps();
+ Returns   : Integer or '-' if HSPs have not been callected
+ Argument  : n/a
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+sub num_hsps {
+    my $self = shift;
+    
+    unless ($self->{'_hsps'}) {
+        return '-';
+    }
+    
+    return scalar(@{$self->{'_hsps'}});
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $hit->rewind;
+ Function: Allow one to reset the HSP iterator to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind{
+   my ($self) = @_;
+   $self->{'_iterator'} = 0;
+}
+
+=head2 ambiguous_aln
+
+ Usage     : $ambig_code = $hit_object->ambiguous_aln();
+ Purpose   : Sets/Gets ambiguity code data member.
+ Example   : (see usage)
+ Returns   : String = 'q', 's', 'qs', '-'
+           :   'q'  = query sequence contains overlapping sub-sequences 
+           :          while sbjct does not.
+           :   's'  = sbjct sequence contains overlapping sub-sequences 
+           :          while query does not.
+           :   'qs' = query and sbjct sequence contains overlapping sub-sequences
+           :          relative to each other.
+           :   '-'  = query and sbjct sequence do not contains multiple domains 
+           :          relative to each other OR both contain the same distribution
+           :          of similar domains.
+ Argument  : n/a
+ Throws    : n/a
+ Comment   : Note: "sbjct" is synonymous with "hit"
+
+=cut
+
+sub ambiguous_aln {
+    my $self = shift;
+    if(@_) { $self->{'_ambiguous_aln'} = shift; }
+    $self->{'_ambiguous_aln'} || '-';
+}
+
+=head2 overlap
+
+See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub overlap {
+    my $self = shift; 
+    if(@_) { $self->{'_overlap'} = shift; }
+    defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
+}
+
+
+=head2 n
+
+ Usage     : $hit_object->n();
+ Purpose   : Gets the N number for the current hit.
+           : This is the number of HSPs in the set which was ascribed
+           : the lowest P-value (listed on the description line).
+           : This number is not the same as the total number of HSPs.
+           : To get the total number of HSPs, use num_hsps().
+ Example   : $n = $hit_object->n();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set (BLAST2 reports).
+ Comments  : Note that the N parameter is not reported in gapped BLAST2.
+           : Calling n() on such reports will result in a call to num_hsps().
+           : The num_hsps() method will count the actual number of
+           : HSPs in the alignment listing, which may exceed N in
+           : some cases.
+
+See Also   : L<num_hsps()|num_hsps>
+
+=cut
+
+sub n {
+    my $self = shift; 
+
+    # The check for $self->{'_n'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description 
+    # line only.
+
+    my ($n);
+    if(not defined($self->{'_n'})) {
+	if( $self->hsp ) {
+	    $n = $self->hsp->n;
+	}
+    } else {
+        $n = $self->{'_n'}; 
+    } 
+    $n ||= $self->num_hsps;
+
+    return $n;
+}
+
+=head2 p
+
+ Usage     : $hit_object->p( [format] );
+ Purpose   : Get the P-value for the best HSP of the given BLAST hit.
+           : (Note that P-values are not provided with NCBI Blast2 reports).
+ Example   : $p =  $sbjct->p;
+           : $p =  $sbjct->p('exp');  # get exponent only.
+           : ($num, $exp) =  $sbjct->p('parts');  # split sci notation into parts
+ Returns   : Float or scientific notation number (the raw P-value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and P-value
+           :                is in scientific notation (See Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a 
+           :            2-element list (1.2, -34) (See Comments).
+ Throws    : Warns if no P-value is defined. Uses expect instead.
+ Comments  : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the P-value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
+
+=cut
+
+sub p {
+# Some duplication of logic for p(), expect() and signif() for the sake of performance.
+    my ($self, $fmt) = @_;
+
+    my $val = $self->{'_p'};
+
+    # $val can be zero.
+    if(not defined $val) {
+        # P-value not defined, must be a NCBI Blast2 report.
+        # Use expect instead.
+        $self->warn( "P-value not defined. Using expect() instead.");
+        $val = $self->{'_expect'};
+    }
+
+    return $val if not $fmt or $fmt =~ /^raw/i;
+    ## Special formats: exponent-only or as list.
+    return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
+    return (split (/eE/, $val)) if $fmt =~ /^parts/i;
+
+    ## Default: return the raw P-value.
+    return $val;
+}
+
+=head2 hsp
+
+ Usage     : $hit_object->hsp( [string] );
+ Purpose   : Get a single HSPI object for the present HitI object.
+ Example   : $hspObj  = $hit_object->hsp;  # same as 'best'
+           : $hspObj  = $hit_object->hsp('best');
+           : $hspObj  = $hit_object->hsp('worst');
+ Returns   : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
+ Argument  : String (or no argument).
+           :   No argument (default) = highest scoring HSP (same as 'best').
+           :   'best' or 'first' = highest scoring HSP.
+           :   'worst' or 'last' = lowest scoring HSP.
+ Throws    : Exception if the HSPs have not been collected.
+           : Exception if an unrecognized argument is used.
+
+See Also   : L<hsps()|hsps>, L<num_hsps>()
+
+=cut
+
+sub hsp {
+    my( $self, $option ) = @_;
+    $option ||= 'best';
+    
+    if (not ref $self->{'_hsps'}) {
+        $self->throw("Can't get HSPs: data not collected.");
+    }
+
+    my @hsps = $self->hsps;
+    
+    return $hsps[0]      if $option =~ /best|first|1/i;
+    return $hsps[$#hsps] if $option =~ /worst|last/i;
+
+    $self->throw("Can't get HSP for: $option\n" .
+                 "Valid arguments: 'best', 'worst'");
+}
+
+=head2 logical_length
+
+ Usage     : $hit_object->logical_length( [seq_type] );
+           : (mostly intended for internal use).
+ Purpose   : Get the logical length of the hit sequence.
+           : This is necessary since the number of identical/conserved residues 
+           : can be in terms of peptide sequence space, yet the query and/or hit
+           : sequence are in nucleotide space.
+ Example   : $len    = $hit_object->logical_length();
+ Returns   : Integer 
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  :
+           : In the case of BLAST flavors:
+           : For TBLASTN reports, the length of the aligned portion of the 
+           : nucleotide hit sequence is divided by 3; for BLASTX reports, 
+           : the length of the aligned portion of the nucleotide query 
+           : sequence is divided by 3. For TBLASTX reports, the length of 
+           : both hit and query sequence are converted.
+           :
+           : This is important for functions like frac_aligned_query()
+           : which need to operate in amino acid coordinate space when dealing
+           : with [T]BLAST[NX] type reports.
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+sub logical_length {
+    my $self = shift;
+    my $seqType = shift || 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my ($length, $logical);
+    my $algo = $self->algorithm;
+
+    # For the sbjct, return logical sbjct length
+    if( $seqType eq 'sbjct' ) {
+        $length = $self->length;
+    } else {
+        # Otherwise, return logical query length
+        $length = $self->query_length();
+        $self->throw("Must have defined query_len") unless ( $length );
+    }
+
+    $logical = Bio::Search::SearchUtils::logical_length($algo, $seqType, $length);
+
+    return int($logical);
+}
+
+=head2 length_aln
+
+ Usage     : $hit_object->length_aln( [seq_type] );
+ Purpose   : Get the total length of the aligned region for query or sbjct seq.
+           : This number will include all HSPs
+ Example   : $len    = $hit_object->length_aln(); # default = query
+           : $lenAln = $hit_object->length_aln('query');
+ Returns   : Integer 
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : Exception if the argument is not recognized.
+ Comments  : This method will report the logical length of the alignment,
+           : meaning that for TBLAST[NX] reports, the length is reported
+           : using amino acid coordinate space (i.e., nucleotides / 3).
+           : 
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling length() on each (use hsps() to get all HSPs).
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+sub length_aln {
+    my( $self, $seqType, $num ) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    # Setter:
+    if( defined $num) {
+        return $self->{'_length_aln_'.$seqType} = $num;
+    }
+
+    unless ($self->{'_hsps'}) {
+        #return wantarray ? ('-','-') : '-';
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    my $data = $self->{'_length_aln_'.$seqType};
+    
+    ## If we don't have data, figure out what went wrong.
+    if(!$data) {
+        $self->throw("Can't get length aln for sequence type \"$seqType\". " . 
+                     "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
+    }                
+    return $data;
+}    
+
+=head2 gaps
+
+ Usage     : $hit_object->gaps( [seq_type] );
+ Purpose   : Get the number of gaps in the aligned query, hit, or both sequences.
+           : Data is summed across all HSPs.
+ Example   : $qgaps = $hit_object->gaps('query');
+           : $hgaps = $hit_object->gaps('hit');
+           : $tgaps = $hit_object->gaps();    # default = total (query + hit)
+ Returns   : scalar context: integer
+           : array context without args: two-element list of integers  
+           :    (queryGaps, hitGaps)
+           : Array context can be forced by providing an argument of 'list' or 'array'.
+           :
+           : CAUTION: Calling this method within printf or sprintf is arrray context.
+           : So this function may not give you what you expect. For example:
+           :          printf "Total gaps: %d", $hit->gaps();
+           : Actually returns a two-element array, so what gets printed 
+           : is the number of gaps in the query, not the total
+           :
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list'  (default = 'total')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through each HSP object.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : Not relying on wantarray since that will fail in situations 
+           : such as printf "%d", $hit->gaps() in which you might expect to 
+           : be printing the total gaps, but evaluates to array context.
+
+See Also   : L<length_aln()|length_aln>
+
+=cut
+
+sub gaps {
+    my( $self, $seqType, $num ) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'total');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return wantarray ? ('-','-') : '-';
+        #return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    $seqType = lc($seqType);
+
+    if( defined $num ) {
+        $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
+
+        return $self->{'_gaps_'.$seqType} = $num;
+    }
+    elsif($seqType =~ /list|array/i) {
+        return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
+    }
+    elsif($seqType eq 'total') {
+        return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
+    } else {
+        return $self->{'_gaps_'.$seqType} || 0;
+    }
+}    
+
+
+=head2 matches
+
+See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub matches {
+    my( $self, $arg1, $arg2) = @_;
+    my(@data,$data);
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return wantarray ? ('-','-') : '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    unless( $arg1 ) {
+        @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
+
+        return @data;
+    } else {
+
+        if( defined $arg2 ) {
+            $self->{'_totalIdentical'} = $arg1;
+            $self->{'_totalConserved'} = $arg2;
+            return ( $arg1, $arg2 );
+        }
+        elsif($arg1 =~ /^id/i) { 
+            $data = $self->{'_totalIdentical'};
+        } else {
+            $data = $self->{'_totalConserved'};
+        }
+        #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
+        return $data;
+    }
+    
+    ## If we make it to here, it is likely the case that
+    ## the parser constructed a minimal hit object from the summary line only.
+    ## It either delibrately skipped parsing the alignment section,
+    ## or was not able to because it was absent (due to blast executable parameter
+    ## setting such as -b 0 (B=0 for WU-BLAST) )
+    #$self->throw("Can't get identical or conserved data: no data.");
+}
+
+
+=head2 start
+
+ Usage     : $sbjct->start( [seq_type] );
+ Purpose   : Gets the start coordinate for the query, sbjct, or both sequences
+           : in the BlastHit object. If there is more than one HSP, the lowest start
+           : value of all HSPs is returned.
+ Example   : $qbeg = $sbjct->start('query');
+           : $sbeg = $sbjct->start('hit');
+           : ($qbeg, $sbeg) = $sbjct->start();
+ Returns   : scalar context: integer 
+           : array context without args: list of two integers (queryStart, sbjctStart)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If there is more than one
+           : HSP and they have not already been tiled, they will be tiled first automatically..
+           : Remember that the start and end coordinates of all HSPs are 
+           : normalized so that start < end. Strand information can be
+           : obtained by calling $hit->strand().
+
+See Also   : L<end()|end>, L<range()|range>, L<strand()|strand>, 
+             L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+sub start {
+    my ($self, $seqType, $num) = @_;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return wantarray ? ('-','-') : '-';
+    }
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if( defined $num ) {
+        $seqType = "_\L$seqType\E";
+        return $self->{$seqType.'Start'} = $num;
+    }
+
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->start($seqType);
+    } else {
+        &Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+	if($seqType =~ /list|array/i) {
+	    return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
+	} else {
+	    ## Sensitive to member name changes.
+	    $seqType = "_\L$seqType\E";
+	    return $self->{$seqType.'Start'};
+	}
+    }
+}
+
+
+=head2 end
+
+ Usage     : $sbjct->end( [seq_type] );
+ Purpose   : Gets the end coordinate for the query, sbjct, or both sequences
+           : in the BlastHit object. If there is more than one HSP, 
+             the largest end
+           : value of all HSPs is returned.
+ Example   : $qend = $sbjct->end('query');
+           : $send = $sbjct->end('hit');
+           : ($qend, $send) = $sbjct->end();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers 
+           : (queryEnd, sbjctEnd)
+           : Array context can be "induced" by providing an argument 
+           : of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'sbjct'
+           :  (case insensitive). If not supplied, 'query' is used.
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If there is 
+           : more than one HSP and they have not already been tiled, 
+           : they will be tiled first automatically..
+           : Remember that the start and end coordinates of all HSPs are 
+           : normalized so that start < end. Strand information can be
+           : obtained by calling $hit->strand().
+
+See Also   : L<start()|start>, L<range()|range>, L<strand()|strand>
+
+=cut
+
+sub end {
+    my ($self, $seqType, $num) = @_;
+
+    unless ($self->{'_hsps'}) {
+        return wantarray ? ('-','-') : '-';
+    }
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    if( defined $num ) {
+        $seqType = "_\L$seqType\E";
+        return $self->{$seqType.'Stop'} = $num;
+    }
+
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->end($seqType);
+    } else {
+        Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+        if($seqType =~ /list|array/i) {
+            return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
+        } else {
+            ## Sensitive to member name changes.
+            $seqType = "_\L$seqType\E";
+            return $self->{$seqType.'Stop'};
+        }
+    }
+}
+
+=head2 range
+
+ Usage     : $sbjct->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($qbeg, $qend) = $sbjct->range('query');
+           : ($sbeg, $send) = $sbjct->range('hit');
+ Returns   : Two-element array of integers 
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<end()|end>
+
+=cut
+
+sub range {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    return ($self->start($seqType), $self->end($seqType));
+}
+
+
+=head2 frac_identical
+
+ Usage     : $hit_object->frac_identical( [seq_type] );
+ Purpose   : Get the overall fraction of identical positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_iden = $hit_object->frac_identical('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  :
+           : To compute the fraction identical, the logical length of the 
+           : aligned portion of the sequence is used, meaning that
+           : in the case of BLAST flavors, for TBLASTN reports, the length of 
+           : the aligned portion of the 
+           : nucleotide hit sequence is divided by 3; for BLASTX reports, 
+           : the length of the aligned portion of the nucleotide query 
+           : sequence is divided by 3. For TBLASTX reports, the length of 
+           : both hit and query sequence are converted.
+           : This is necessary since the number of identical residues is
+           : in terms of peptide sequence space.
+           :
+           : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           :
+           : Therefore, when called with an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used. Total does NOT take into account HSP
+           : tiling, so it should not be used.
+           :
+           : To get the fraction identical among only the aligned residues,
+           : ignoring the gaps, call this method without an argument or 
+           : with an argument of 'query' or 'hit'.
+           :
+           : If you need data for each HSP, use hsps() and then iterate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub frac_identical {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = lc($seqType);
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    my $ident = $self->matches('id');
+    my $total = $self->length_aln($seqType);
+    my $ratio = $ident / $total;
+    my $ratio_rounded = sprintf( "%.3f", $ratio);
+
+    # Round down iff normal rounding yields 1 (just like blast)
+    $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
+    return $ratio_rounded;
+}
+
+
+=head2 frac_conserved
+
+ Usage     : $hit_object->frac_conserved( [seq_type] );
+ Purpose   : Get the overall fraction of conserved positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_cons = $hit_object->frac_conserved('hit');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  :
+           : To compute the fraction conserved, the logical length of the 
+           : aligned portion of the sequence is used, meaning that
+           : in the case of BLAST flavors, for TBLASTN reports, the length of 
+           : the aligned portion of the 
+           : nucleotide hit sequence is divided by 3; for BLASTX reports, 
+           : the length of the aligned portion of the nucleotide query 
+           : sequence is divided by 3. For TBLASTX reports, the length of 
+           : both hit and query sequence are converted.
+           : This is necessary since the number of conserved residues is
+           : in terms of peptide sequence space.
+           :
+           : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Positives = 34/120 Positives = 67/120".
+           : NCBI BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           :
+           : Therefore, when called with an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used. Total does NOT take into account HSP
+           : tiling, so it should not be used.
+           :
+           : To get the fraction conserved among only the aligned residues,
+           : ignoring the gaps, call this method without an argument or 
+           : with an argument of 'query' or 'hit'.
+           :
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub frac_conserved {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = lc($seqType);
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    my $consv = $self->matches('cons');
+    my $total = $self->length_aln($seqType);
+    my $ratio = $consv / $total;
+    my $ratio_rounded = sprintf( "%.3f", $ratio);
+
+    # Round down iff normal rounding yields 1 (just like blast)
+    $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
+    return $ratio_rounded;
+}
+
+
+
+
+=head2 frac_aligned_query
+
+ Usage     : $hit_object->frac_aligned_query();
+ Purpose   : Get the fraction of the query sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_query();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>,  L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub frac_aligned_query {
+    my $self = shift;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    sprintf( "%.2f", $self->length_aln('query') /
+             $self->logical_length('query'));
+}
+
+
+
+=head2 frac_aligned_hit
+
+ Usage     : $hit_object->frac_aligned_hit();
+ Purpose   : Get the fraction of the hit (sbjct) sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_hit();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>,  L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub frac_aligned_hit {
+    my $self = shift;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    sprintf( "%.2f", $self->length_aln('sbjct') / $self->logical_length('sbjct'));
+}
+
+
+## These methods are being maintained for backward compatibility. 
+
+=head2 frac_aligned_sbjct
+
+Same as L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+*frac_aligned_sbjct = \&fract_aligned_hit;
+
+=head2 num_unaligned_sbjct
+
+Same as L<num_unaligned_hit()|num_unaligned_hit>
+
+=cut
+
+*num_unaligned_sbjct = \&num_unaligned_hit;
+
+
+=head2 num_unaligned_hit
+
+ Usage     : $hit_object->num_unaligned_hit();
+ Purpose   : Get the number of the unaligned residues in the hit sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_hit();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_hit().
+           : They apply here as well.
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+
+See Also   : L<num_unaligned_query()|num_unaligned_query>,  L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+sub num_unaligned_hit {
+    my $self = shift;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
+    ($num < 0 ? 0 : $num );
+}
+
+
+=head2 num_unaligned_query
+
+ Usage     : $hit_object->num_unaligned_query();
+ Purpose   : Get the number of the unaligned residues in the query sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_query();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_query().
+           : They apply here as well.
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+
+See Also   : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>,  L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub num_unaligned_query {
+    my $self = shift;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    my $num = $self->logical_length('query') - $self->length_aln('query');
+    ($num < 0 ? 0 : $num );
+}
+
+
+
+=head2 seq_inds
+
+ Usage     : $hit->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) across all HSPs
+           : for identical or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hit->seq_inds('query', 'identical');
+           : @h_ind = $hit->seq_inds('hit', 'conserved');
+           : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
+ Returns   : Array of integers 
+           : May include ranges if collapse is non-zero.
+ Argument  : [0] seq_type  = 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :                 ('sbjct' is synonymous with 'hit')
+           : [1] class     = 'identical' or 'conserved' (default = 'identical')
+           :              (can be shortened to 'id' or 'cons')
+           :              (actually, anything not 'id' will evaluate to 'conserved').
+           : [2] collapse  = boolean, if non-zero, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+           :             collapses to "1-5 7 9-11". This is useful for 
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+
+See Also   : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+sub seq_inds {
+    my ($self, $seqType, $class, $collapse) = @_;
+
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my (@inds, $hsp);
+    foreach $hsp ($self->hsps) {
+        # This will merge data for all HSPs together.
+        push @inds, $hsp->seq_inds($seqType, $class);
+    }
+    
+    # Need to remove duplicates and sort the merged positions.
+    if(@inds) {
+        my %tmp = map { $_, 1 } @inds;
+        @inds = sort {$a <=> $b} keys %tmp;
+    }
+
+    $collapse ?  &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds; 
+}
+
+
+=head2 strand
+
+See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub strand {
+    my ($self, $seqType, $strnd) = @_;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return wantarray ? ('-','-') : '-';
+        #return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    $seqType = lc($seqType);
+
+    if( defined $strnd ) {
+        $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
+
+        return $self->{'_strand_'.$seqType} = $strnd;
+    }
+
+    my ($qstr, $hstr);
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->strand($seqType);
+    } 
+    elsif( defined $self->{'_strand_query'}) {
+        # Get the data computed during hsp tiling.
+        $qstr = $self->{'_strand_query'};
+        $hstr = $self->{'_strand_sbjct'}
+    }
+    else {
+        # otherwise, iterate through all HSPs collecting strand info.
+        # This will return the string "-1/1" if there are HSPs on different strands.
+        # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
+        # (unless the above elsif{} is commented out).
+        my (%qstr, %hstr);
+        foreach my $hsp( $self->hsps ) {
+            my ( $q, $h ) = $hsp->strand();
+            $qstr{ $q }++;
+            $hstr{ $h }++;
+        }
+        $qstr = join( '/', sort keys %qstr);
+        $hstr = join( '/', sort keys %hstr);
+    }
+
+    if($seqType =~ /list|array/i) {
+        return ($qstr, $hstr);
+    } elsif( $seqType eq 'query' ) {
+        return $qstr;
+    } else {
+        return $hstr;
+    }
+}
+
+=head2 frame
+
+See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
+
+=cut
+
+sub frame {
+    my( $self, $frm ) = @_;
+
+    unless ($self->{'_hsps'}) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        #return wantarray ? ('-','-') : '-';
+        return '-';
+    }
+
+    Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
+
+    if( defined $frm ) {
+        return $self->{'_frame'} = $frm;
+    }
+
+    # The check for $self->{'_frame'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description line only.
+
+    my ($frame);
+    if(not defined($self->{'_frame'})) {
+        $frame = $self->hsp->frame;
+    } else {
+        $frame = $self->{'_frame'}; 
+    } 
+    return $frame;
+}
+
+=head2 rank
+
+ Title   : rank
+ Usage   : $obj->rank($newval)
+ Function: Get/Set the rank of this Hit in the Query search list
+           i.e. this is the Nth hit for a specific query
+ Returns : value of rank
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub rank {
+    my $self = shift;
+    return $self->{'_rank'} = shift if @_;
+    return $self->{'_rank'} || 1;
+}
+
+=head2 locus
+
+ Title   : locus
+ Usage   : $locus = $hit->locus();
+ Function: Retrieve the locus (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub locus {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_locus'};
+    if( defined $value || ! defined $previous ) { 
+      unless (defined $value) {
+        if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
+                  $value = $previous = $3;
+                } else {
+          $value = $previous = '';
+        }
+      }
+          $self->{'_locus'} = $value;
+    } 
+        return $previous;
+}
+
+=head2 each_accession_number
+
+ Title   : each_accession_number
+ Usage   : @each_accession_number = $hit->each_accession_number();
+ Function: Get each accession number listed in the description of the hit.
+           If there are no alternatives, then only the primary accession will 
+           be given
+ Returns : list of all accession numbers in the description
+ Args    : none
+
+=cut
+
+sub each_accession_number {
+    my ($self,$value) = @_;
+    my $desc = $self->{'_description'};
+    #put primary accnum on the list
+    my @accnums;
+    push (@accnums,$self->{'_accession'});
+    if( defined $desc )  { 
+      while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
+        my $id = $1;
+        my ($acc, $version);
+	if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
+	    ($acc, $version) = split /\./, $2; 
+	} elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
+	    ($acc, $version) = split /\./, $3;  
+	} elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
+	    $acc = $id;
+	} elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
+	    ($acc,$version) = ($2);
+	} else {
+                     #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
+                     #Database Name                     Identifier Syntax
+          #============================      ========================
+          #GenBank                           gb|accession|locus
+          #EMBL Data Library                 emb|accession|locus
+          #DDBJ, DNA Database of Japan       dbj|accession|locus
+          #NBRF PIR                          pir||entry
+          #Protein Research Foundation       prf||name
+          #SWISS-PROT                        sp|accession|entry name
+          #Brookhaven Protein Data Bank      pdb|entry|chain
+          #Patents                           pat|country|number 
+          #GenInfo Backbone Id               bbs|number 
+          #General database identifier           gnl|database|identifier
+          #NCBI Reference Sequence           ref|accession|locus
+          #Local Sequence identifier         lcl|identifier
+              $acc=$id;
+            }
+            push(@accnums, $acc);
+          }
+    }  
+    return @accnums;
+}
+
+=head2 tiled_hsps
+
+See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
+
+=cut
+
+sub tiled_hsps { 
+    my $self = shift;
+    return $self->{'_tiled_hsps'} = shift if @_;
+    return $self->{'_tiled_hsps'};
+}
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $obj->query_length($newval)
+ Function: Get/Set the query_length
+ Returns : value of query_length (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub query_length {
+    my $self = shift;
+
+    return $self->{'_query_length'} = shift if @_;
+    return $self->{'_query_length'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HMMERHit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HMMERHit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HMMERHit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,297 @@
+# $Id: HMMERHit.pm,v 1.6.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::HMMERHit
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::HMMERHit - A Hit module for HMMER hits
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Hit::HMMERHit;
+    my $hit = new Bio::Search::Hit::HMMERHit;
+    # use it in the same way as Bio::Search::Hit::GenericHit
+
+=head1 DESCRIPTION
+
+This is a specialization of L<Bio::Search::Hit::GenericHit>.  There
+are a few news methods L<next_domain> and L<domains>.  Note that
+L<bits> and L<iteration> make no sense for this object and will
+return 0.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Hit::HMMERHit;
+use strict;
+
+
+use base qw(Bio::Search::Hit::GenericHit);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Hit::HMMERHit();
+ Function: Builds a new Bio::Search::Hit::HMMERHit object 
+ Returns : Bio::Search::Hit::HMMERHit
+ Args    : 
+
+ Plus the Bio::Search::Hit::GenericHit inherited params
+           -name         => Name of Hit (required)
+           -description  => Description (optional)
+           -accession    => Accession number (optional)
+           -length       => Length of the Hit (optional)
+           -score        => Raw Score for the Hit (optional)
+           -significance => Significance value for the Hit (optional)
+           -algorithm    => Algorithm used (BLASTP, FASTX, etc...)
+           -hsps         => Array ref of HSPs for this Hit. 
+
+
+=cut
+
+
+=head2 next_domain
+
+ Title   : next_domain 
+ Usage   : my $domain = $hit->next_domain();
+ Function: An alias for L<next_hsp()>, this will return the next HSP
+ Returns : L<Bio::Search::HSP::HSPI> object
+ Args    : none
+
+
+=cut
+
+sub next_domain{ shift->next_hsp }
+
+=head2 domains
+
+ Title   : domains
+ Usage   : my @domains = $hit->domains();
+ Function: An alias for L<hsps()>, this will return the full list of hsps
+ Returns : array of L<Bio::Search::HSP::HSPI> objects
+ Args    : none
+
+
+=cut
+
+sub domains{ shift->hsps() }
+
+
+=head2 inherited Bio::Search::Hit::GenericHit methods
+
+=cut
+
+=head2 add_hsp
+
+ Title   : add_hsp
+ Usage   : $hit->add_hsp($hsp)
+ Function: Add a HSP to the collection of HSPs for a Hit
+ Returns : number of HSPs in the Hit
+ Args    : Bio::Search::HSP::HSPI object
+
+
+=cut
+
+=head2 Bio::Search::Hit::HitI methods
+
+=cut
+
+=head2 name
+
+ Title   : name
+ Usage   : $hit_name = $hit->name();
+ Function: returns the name of the Hit sequence
+ Returns : a scalar string
+ Args    : [optional] scalar string to set the name
+
+=cut
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $acc = $hit->accession();
+ Function: Retrieve the accession (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+=head2 description
+
+ Title   : description
+ Usage   : $desc = $hit->description();
+ Function: Retrieve the description for the hit
+ Returns : a scalar string
+ Args    : [optional] scalar string to set the descrition
+
+=cut
+
+=head2 length
+
+ Title   : length
+ Usage   : my $len = $hit->length
+ Function: Returns the length of the hit 
+ Returns : integer
+ Args    : [optional] integer to set the length
+
+=cut
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hit->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hit
+           For BLAST, the algorithm denotes what type of sequence was aligned 
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated 
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated 
+           dna-translated dna).
+ Returns : a scalar string 
+ Args    : [optional] scalar string to set the algorithm
+
+=cut
+
+=head2 raw_score
+
+ Title   : raw_score
+ Usage   : $score = $hit->raw_score();
+ Function: Gets the "raw score" generated by the algorithm.  What
+           this score is exactly will vary from algorithm to algorithm,
+           returning undef if unavailable.
+ Returns : a scalar value
+ Args    : [optional] scalar value to set the raw score
+
+=cut
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $significance = $hit->significance();
+ Function: Used to obtain the E or P value of a hit, i.e. the probability that
+           this particular hit was obtained purely by random chance.  If
+           information is not available (nor calculatable from other
+           information sources), return undef.
+ Returns : a scalar value or undef if unavailable
+ Args    : [optional] scalar value to set the significance
+
+=cut
+
+=head2 bits
+
+ Usage     : $hit_object->bits();
+ Purpose   : Gets the bit score of the best HSP for the current hit.
+ Example   : $bits = $hit_object->bits();
+ Returns   : Integer or undef if bit score is not set
+ Argument  : n/a
+
+See Also   : L<score()|score>
+
+=cut
+
+sub bits { return 0 }
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : while( $hsp = $obj->next_hsp()) { ... }
+ Function : Returns the next available High Scoring Pair
+ Example  : 
+ Returns  : Bio::Search::HSP::HSPI object or null if finished
+ Args     : none
+
+=cut
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+           : Get the numbers of HSPs for the current hit.
+ Example   : @hsps = $hit_object->hsps();
+           : $num  = $hit_object->hsps();  # alternatively, use num_hsps()
+ Returns   : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
+           : Scalar context: integer (number of HSPs).
+           :                 (Equivalent to num_hsps()).
+ Argument  : n/a. Relies on wantarray
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsp()|hsp>, L<num_hsps()|num_hsps>
+
+=cut
+
+=head2 num_hsps
+
+ Usage     : $hit_object->num_hsps();
+ Purpose   : Get the number of HSPs for the present Blast hit.
+ Example   : $nhsps = $hit_object->num_hsps();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $hit->rewind;
+ Function: Allow one to reset the HSP iteration to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+=head2 iteration
+
+ Title   : iteration
+ Usage   : $obj->iteration($newval)
+ Function: PSI-BLAST iteration
+ Returns : value of iteration
+ Args    : newvalue (optional)
+
+
+=cut
+
+
+sub iteration { return 0 }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: HitFactory.pm,v 1.6.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::HitFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::HitFactory - A factory to create Bio::Search::Hit::HitI objects 
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Hit::HitFactory;
+    my $factory = new Bio::Search::Hit::HitFactory();
+    my $resultobj = $factory->create(@args);
+
+=head1 DESCRIPTION
+
+This is a general way of hiding the object creation process so that we
+can dynamically change the objects that are created by the SearchIO
+parser depending on what format report we are parsing.
+
+This object is for creating new Hits.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Hit::HitFactory;
+use vars qw($DEFAULT_TYPE);
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
+
+BEGIN { 
+    $DEFAULT_TYPE = 'Bio::Search::Hit::GenericHit'; 
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Hit::HitFactory();
+ Function: Builds a new Bio::Search::Hit::HitFactory object 
+ Returns : Bio::Search::Hit::HitFactory
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($type) = $self->_rearrange([qw(TYPE)], at args);
+  $self->type($type) if defined $type;
+  return $self;
+}
+
+=head2 create
+
+ Title   : create
+ Usage   : $factory->create(%args)
+ Function: Create a new L<Bio::Search::Hit::HitI> object  
+ Returns : L<Bio::Search::Hit::HitI>
+ Args    : hash of initialization parameters
+
+
+=cut
+
+sub create{
+   my ($self, at args) = @_;
+   my $type = $self->type;
+   eval { $self->_load_module($type) };   
+   if( $@ ) { $self->throw("Unable to load module $type"); }
+   return $type->new(@args);
+}
+
+
+=head2 type
+
+ Title   : type
+ Usage   : $factory->type('Bio::Search::Hit::GenericHit');
+ Function: Get/Set the Hit creation type
+ Returns : string
+ Args    : [optional] string to set 
+
+
+=cut
+
+sub type{
+    my ($self,$type) = @_;
+   if( defined $type ) { 
+       # redundancy with the create method which also calls _load_module
+       # I know - but this is not a highly called object so I am going 
+       # to leave it in
+       eval {$self->_load_module($type) };
+       if( $@ ){ $self->warn("Cannot find module $type, unable to set type"); }
+       else { $self->{'_type'} = $type; }
+   } 
+    return $self->{'_type'} || $DEFAULT_TYPE;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HitI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,683 @@
+#-----------------------------------------------------------------
+# $Id: HitI.pm,v 1.24.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Hit::HitI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Originally created by Aaron Mackey <amackey at virginia.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::HitI - Interface for a hit in a similarity search result
+
+=head1 SYNOPSIS
+
+# Bio::Search::Hit::HitI objects should not be instantiated since this
+# module defines a pure interface.
+
+# Given an object that implements the Bio::Search::Hit::HitI  interface,
+# you can do the following things with it:
+
+    # Get a HitI object from a SearchIO stream:
+    use Bio::SeachIO;
+    my $searchio = new Bio::SearchIO(-format => 'blast', -file => 'result.bls');
+    my $result = $searchio->next_result;
+    my $hit    = $result->next_hit;
+
+    $hit_name = $hit->name();
+
+    $desc = $hit->description();
+
+    $len = $hit->length
+
+    $alg = $hit->algorithm();
+
+    $score = $hit->raw_score();
+
+    $significance = $hit->significance();
+
+    $rank = $hit->rank(); # the Nth hit for a specific query
+
+    while( $hsp = $obj->next_hsp()) { ... } # process in iterator fashion
+
+    for my $hsp ( $obj->hsps()()) { ... } # process in list fashion
+
+=head1 DESCRIPTION
+
+    Bio::Search::Hit::* objects are data structures that contain information
+about specific hits obtained during a library search.  Some information will
+be algorithm-specific, but others will be generally defined.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey, Steve Chervitz
+
+Email amackey at virginia.edu  (original author)
+Email sac at bioperl.org
+
+=head1 COPYRIGHT
+
+Copyright (c) 1999-2001 Aaron Mackey, Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Hit::HitI;
+
+
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $hit_name = $hit->name();
+ Function: returns the name of the Hit sequence
+ Returns : a scalar string
+ Args    : none
+
+The B<name> of a hit is unique within a Result or within an Iteration.
+
+=cut
+
+sub name {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $desc = $hit->description();
+ Function: Retrieve the description for the hit
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+sub description {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $acc = $hit->accession();
+ Function: Retrieve the accession (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub accession {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 locus
+
+ Title   : locus
+ Usage   : $acc = $hit->locus();
+ Function: Retrieve the locus(if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub locus {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $len = $hit->length
+ Function: Returns the length of the hit 
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub length {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented;
+}
+
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hit->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hit
+           For BLAST, the algorithm denotes what type of sequence was aligned 
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated 
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated 
+           dna-translated dna).
+ Returns : a scalar string 
+ Args    : none
+
+=cut
+
+sub algorithm {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 raw_score
+
+ Title   : raw_score
+ Usage   : $score = $hit->raw_score();
+ Function: Gets the "raw score" generated by the algorithm.  What
+           this score is exactly will vary from algorithm to algorithm,
+           returning undef if unavailable.
+ Returns : a scalar value
+ Args    : none
+
+=cut
+
+sub raw_score {
+    $_[0]->throw_not_implemented;
+}
+
+=head2 score
+
+Equivalent to L<raw_score()|raw_score>
+
+=cut
+
+sub score { shift->raw_score(@_); }
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $significance = $hit->significance();
+ Function: Used to obtain the E or P value of a hit, i.e. the probability that
+           this particular hit was obtained purely by random chance.  If
+           information is not available (nor calculatable from other
+           information sources), return undef.
+ Returns : a scalar value or undef if unavailable
+ Args    : none
+
+=cut
+
+sub significance {
+    $_[0]->throw_not_implemented;
+}
+
+=head2 bits
+
+ Usage     : $hit_object->bits();
+ Purpose   : Gets the bit score of the best HSP for the current hit.
+ Example   : $bits = $hit_object->bits();
+ Returns   : Integer or double for FASTA reports
+ Argument  : n/a
+ Comments  : For BLAST1, the non-bit score is listed in the summary line.
+
+See Also   : L<score()|score>
+
+=cut
+
+#---------
+sub bits { 
+#---------
+    $_[0]->throw_not_implemented();
+}
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : while( $hsp = $obj->next_hsp()) { ... }
+ Function : Returns the next available High Scoring Pair
+ Example  : 
+ Returns  : L<Bio::Search::HSP::HSPI> object or null if finished
+ Args     : none
+
+=cut
+
+sub next_hsp {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+           : Get the numbers of HSPs for the current hit.
+ Example   : @hsps = $hit_object->hsps();
+           : $num  = $hit_object->hsps();  # alternatively, use num_hsps()
+ Returns   : Array context : list of L<Bio::Search::HSP::BlastHSP> objects.
+           : Scalar context: integer (number of HSPs).
+           :                 (Equivalent to num_hsps()).
+ Argument  : n/a. Relies on wantarray
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsp()|hsp>, L<num_hsps()|num_hsps>
+
+=cut
+
+#---------
+sub hsps {
+#---------
+    my $self = shift;
+
+    $self->throw_not_implemented();
+}
+
+
+
+=head2 num_hsps
+
+ Usage     : $hit_object->num_hsps();
+ Purpose   : Get the number of HSPs for the present Blast hit.
+ Example   : $nhsps = $hit_object->num_hsps();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+#-------------
+sub num_hsps {
+#-------------
+    shift->throw_not_implemented();
+}
+
+
+=head2 seq_inds
+
+ Usage     : $hit->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) across all HSPs
+           : for identical or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hit->seq_inds('query', 'identical');
+           : @h_ind = $hit->seq_inds('hit', 'conserved');
+           : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
+ Returns   : Array of integers 
+           : May include ranges if collapse is non-zero.
+ Argument  : [0] seq_type  = 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :                 ('sbjct' is synonymous with 'hit')
+           : [1] class     = 'identical' or 'conserved' (default = 'identical')
+           :              (can be shortened to 'id' or 'cons')
+           :              (actually, anything not 'id' will evaluate to 'conserved').
+           : [2] collapse  = boolean, if non-zero, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11" 
+           :             collapses to "1-5 7 9-11". This is useful for 
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+
+See Also   : L<Bio::Search::HSP::HSPI::seq_inds()|Bio::Search::HSP::HSPI>
+
+=cut
+
+#-------------
+sub seq_inds {
+#-------------
+    my ($self, $seqType, $class, $collapse) = @_;
+
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my (@inds, $hsp);    
+    foreach $hsp ($self->hsps) {
+        # This will merge data for all HSPs together.
+        push @inds, $hsp->seq_inds($seqType, $class);
+    }
+    
+    # Need to remove duplicates and sort the merged positions.
+    if(@inds) {
+        my %tmp = map { $_, 1 } @inds;
+        @inds = sort {$a <=> $b} keys %tmp;
+    }
+
+    $collapse ?  &Bio::Search::BlastUtils::collapse_nums(@inds) : @inds; 
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $hit->rewind;
+ Function: Allow one to reset the HSP iterator to the beginning
+           if possible
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 overlap
+
+ Usage     : $hit_object->overlap( [integer] );
+ Purpose   : Gets/Sets the allowable amount overlap between different HSP sequences.
+ Example   : $hit_object->overlap(5);
+           : $overlap = $hit_object->overlap;
+ Returns   : Integer.
+ Argument  : integer.
+ Throws    : n/a
+ Status    : Experimental
+ Comments  : Any two HSPs whose sequences overlap by less than or equal
+           : to the overlap() number of resides will be considered separate HSPs
+           : and will not get tiled by L<Bio::Search::BlastUtils::_adjust_contigs()>.
+
+See Also   : L<Bio::Search::BlastUtils::_adjust_contigs()|Bio::Search::BlastUtils>, L<BUGS | BUGS>
+
+=cut
+
+#-------------
+sub overlap { shift->throw_not_implemented }
+
+
+=head2 n
+
+ Usage     : $hit_object->n();
+ Purpose   : Gets the N number for the current Blast hit.
+           : This is the number of HSPs in the set which was ascribed
+           : the lowest P-value (listed on the description line).
+           : This number is not the same as the total number of HSPs.
+           : To get the total number of HSPs, use num_hsps().
+ Example   : $n = $hit_object->n();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set (BLAST2 reports).
+ Comments  : Note that the N parameter is not reported in gapped BLAST2.
+           : Calling n() on such reports will result in a call to num_hsps().
+           : The num_hsps() method will count the actual number of
+           : HSPs in the alignment listing, which may exceed N in
+           : some cases.
+
+See Also   : L<num_hsps()|num_hsps>
+
+=cut
+
+#-----
+sub n { shift->throw_not_implemented }
+
+=head2 p
+
+ Usage     : $hit_object->p( [format] );
+ Purpose   : Get the P-value for the best HSP of the given BLAST hit.
+           : (Note that P-values are not provided with NCBI Blast2 reports).
+ Example   : $p =  $sbjct->p;
+           : $p =  $sbjct->p('exp');  # get exponent only.
+           : ($num, $exp) =  $sbjct->p('parts');  # split sci notation into parts
+ Returns   : Float or scientific notation number (the raw P-value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and P-value
+           :                is in scientific notation (See Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a 
+           :            2-element list (1.2, -34) (See Comments).
+ Throws    : Warns if no P-value is defined. Uses expect instead.
+ Comments  : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the P-value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
+
+=cut
+
+#--------
+sub p { shift->throw_not_implemented() }
+
+=head2 hsp
+
+ Usage     : $hit_object->hsp( [string] );
+ Purpose   : Get a single HSPI object for the present HitI object.
+ Example   : $hspObj  = $hit_object->hsp;  # same as 'best'
+           : $hspObj  = $hit_object->hsp('best');
+           : $hspObj  = $hit_object->hsp('worst');
+ Returns   : Object reference for a L<Bio::Search::HSP::HSPI> object.
+ Argument  : String (or no argument).
+           :   No argument (default) = highest scoring HSP (same as 'best').
+           :   'best' or 'first' = highest scoring HSP.
+           :   'worst' or 'last' = lowest scoring HSP.
+ Throws    : Exception if the HSPs have not been collected.
+           : Exception if an unrecognized argument is used.
+
+See Also   : L<hsps()|hsps>, L<num_hsps>()
+
+=cut
+
+#----------
+sub hsp { shift->throw_not_implemented }
+
+=head2 logical_length
+
+ Usage     : $hit_object->logical_length( [seq_type] );
+           : (mostly intended for internal use).
+ Purpose   : Get the logical length of the hit sequence.
+           : If the Blast is a TBLASTN or TBLASTX, the returned length 
+           : is the length of the would-be amino acid sequence (length/3).
+           : For all other BLAST flavors, this function is the same as length().
+ Example   : $len    = $hit_object->logical_length();
+ Returns   : Integer 
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This is important for functions like frac_aligned_query()
+           : which need to operate in amino acid coordinate space when dealing
+           : with [T]BLAST[NX] type reports.
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+#--------------------
+sub logical_length { shift->throw_not_implemented() }
+
+
+=head2 rank
+
+ Title   : rank
+ Usage   : $obj->rank($newval)
+ Function: Get/Set the rank of this Hit in the Query search list
+           i.e. this is the Nth hit for a specific query
+ Returns : value of rank
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub rank{
+   my ($self,$value) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 each_accession_number
+
+ Title   : each_accession_number
+ Usage   : $obj->each_accession_number
+ Function: Get each accession number listed in the description of the hit.
+           If there are no alternatives, then only the primary accession will 
+           be given
+ Returns : list of all accession numbers in the description
+ Args    : none
+
+
+=cut
+
+sub each_accession_number{
+   my ($self,$value) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 tiled_hsps
+
+ Usage     : $hit_object->tiled_hsps( [integer] );
+ Purpose   : Gets/Sets an indicator for whether or not the HSPs in this Hit 
+           : have been tiled.
+           : Methods that rely on HSPs being tiled should check this
+           : and then call SearchUtils::tile_hsps() if not.
+ Example   : $hit_object->tiled_hsps(1);
+           : if( $hit_object->tiled_hsps ) { # do something }
+ Returns   : Boolean (1 or 0) 
+ Argument  : integer (optional)
+ Throws    : n/a
+
+=cut
+
+sub tiled_hsps { shift->throw_not_implemented }
+
+
+=head2 strand
+
+ Usage     : $sbjct->strand( [seq_type] );
+ Purpose   : Gets the strand(s) for the query, sbjct, or both sequences
+           : in the best HSP of the BlastHit object after HSP tiling.
+           : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
+ Example   : $qstrand = $sbjct->strand('query');
+           : $sstrand = $sbjct->strand('hit');
+           : ($qstrand, $sstrand) = $sbjct->strand();
+ Returns   : scalar context: integer '1', '-1', or '0'
+           : array context without args: list of two strings (queryStrand, sbjctStrand)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling strand() on each (use hsps() to get all HSPs).
+           :
+           : Formerly (prior to 10/21/02), this method would return the
+           : string "-1/1" for hits with HSPs on both strands.
+           : However, now that strand and frame is properly being accounted
+           : for during HSP tiling, it makes more sense for strand()
+           : to return the strand data for the best HSP after tiling.
+           :
+           : If you really want to know about hits on opposite strands,
+           : you should be iterating through the HSPs using methods on the
+           : HSP objects.
+           :
+           : A possible use case where knowing whether a hit has HSPs 
+           : on both strands would be when filtering via SearchIO for hits with 
+           : this property. However, in this case it would be better to have a
+           : dedicated method such as $hit->hsps_on_both_strands(). Similarly
+           : for frame. This could be provided if there is interest.
+
+See Also   : L<Bio::Search::HSP::HSPI::strand>()
+
+=cut
+
+#---------'
+sub strand { shift->throw_not_implemented }
+
+
+=head2 frame
+
+ Usage     : $hit_object->frame();
+ Purpose   : Gets the reading frame for the best HSP after HSP tiling.
+           : This is only valid for BLASTX and TBLASTN/X type reports.
+ Example   : $frame = $hit_object->frame();
+ Returns   : Integer (-2 .. +2)
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set.
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling frame() on each (use hsps() to get all HSPs).
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+#---------'
+sub frame { shift->throw_not_implemented }
+
+
+=head2 matches
+
+ Usage     : $hit_object->matches( [class] );
+ Purpose   : Get the total number of identical or conserved matches 
+           : (or both) across all HSPs.
+           : (Note: 'conservative' matches are indicated as 'positives' 
+           :         in BLAST reports.)
+ Example   : ($id,$cons) = $hit_object->matches(); # no argument
+           : $id = $hit_object->matches('id');
+           : $cons = $hit_object->matches('cons'); 
+ Returns   : Integer or a 2-element array of integers 
+ Argument  : class = 'id' | 'cons' OR none. 
+           : If no argument is provided, both identical and conservative 
+           : numbers are returned in a two element list.
+           : (Other terms can be used to refer to the conservative
+           :  matches, e.g., 'positive'. All that is checked is whether or
+           :  not the supplied string starts with 'id'. If not, the 
+           : conservative matches are returned.)
+ Throws    : Exception if the requested data cannot be obtained.
+ Comments  : This method requires that all HSPs be tiled. If there is more than one
+           : HSP and they have not already been tiled, they will be tiled first automatically..
+           :
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : Does not rely on wantarray to return a list. Only checks for
+           : the presence of an argument (no arg = return list).
+
+See Also   : L<Bio::Search::HSP::GenericHSP::matches()|Bio::Search::HSP::GenericHSP>, L<hsps()|hsps>
+
+=cut
+
+sub matches { shift->throw_not_implemented }
+
+
+# aliasing for Steve's method names
+sub hit_description { shift->description(@_) }
+# aliasing for Steve's method names
+sub hit_length { shift->length(@_) }
+
+1;
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HmmpfamHit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HmmpfamHit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HmmpfamHit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,366 @@
+# $Id: HmmpfamHit.pm,v 1.1.2.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Hit::HmmpfamHit
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::HmmpfamHit - A parser and hit object for hmmpfam hits
+
+=head1 SYNOPSIS
+
+    # generally we use Bio::SearchIO to build these objects
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer_pull',
+							   -file   => 'result.hmmer');
+
+    while (my $result = $in->next_result) {
+		while (my $hit = $result->next_hit) {
+			print $hit->name, "\n";
+			print $hit->score, "\n";
+			print $hit->significance, "\n";
+
+			while (my $hsp = $hit->next_hsp) {
+				# process HSPI objects
+			}
+		}
+    }
+
+=head1 DESCRIPTION
+
+This object implements a parser for hmmpfam hit output, a program in the HMMER
+package.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Hit::HmmpfamHit;
+
+use strict;
+
+use Bio::Search::HSP::HmmpfamHSP;
+
+use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Hit::HmmpfamHit();
+ Function: Builds a new Bio::Search::Hit::HmmpfamHit object.
+ Returns : Bio::Search::Hit::HmmpfamHit
+ Args    : -chunk    => [Bio::Root::IO, $start, $end] (required if no -parent)
+           -parent   => Bio::PullParserI object (required if no -chunk)
+           -hit_data => array ref with [name description score significance
+		                                num_hsps rank]
+
+           where the array ref provided to -chunk contains an IO object
+           for a filehandle to something representing the raw data of the
+           hit, and $start and $end define the tell() position within the
+           filehandle that the hit data starts and ends (optional; defaults
+           to start and end of the entire thing described by the filehandle)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	
+	$self->_setup(@args);
+	
+	my $fields = $self->_fields;
+	foreach my $field (qw( next_domain domains hsp_data )) {
+		$fields->{$field} = undef;
+	}
+	
+	my $hit_data = $self->_raw_hit_data;
+	if ($hit_data && ref($hit_data) eq 'ARRAY') {
+		foreach my $field (qw(name description score significance num_hsps rank)) {
+			$fields->{$field} = shift(@{$hit_data});
+		}
+	}
+	$fields->{hit_start} = 1;
+	
+	delete $self->_fields->{accession};
+	
+	$self->_dependencies( { ( length => 'hsp_data' ) } );
+	
+    return $self;
+}
+
+#
+# PullParserI discovery methods so we can answer all HitI questions
+#
+
+sub _discover_description {
+	# this should be set when this object is created, but if it was undef as is
+	# possible, this _discover method will be called: just return and keep the
+	# return value undef
+	return;
+}
+
+sub _discover_hsp_data {
+	my $self = shift;
+	my $hsp_table = $self->get_field('hsp_table');
+	my $hsp_data = $hsp_table->{$self->get_field('name')} || undef;
+	if ($hsp_data) {
+		if (defined $hsp_data->{hit_length}) {
+			$self->_fields->{length} = $hsp_data->{hit_length};
+		}
+		
+		# rank query_start query_end hit_start hit_end score evalue
+		$self->_fields->{hsp_data} = $hsp_data->{hsp_data};
+	}
+}
+
+sub _discover_query_start {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	
+	my ($this_hsp) = sort { $a->[1] <=> $b->[1] } @{$hsp_data};
+	$self->_fields->{query_start} = $this_hsp->[1];
+}
+
+sub _discover_query_end {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	
+	my ($this_hsp) = sort { $b->[2] <=> $a->[2] } @{$hsp_data};
+	$self->_fields->{query_end} = $this_hsp->[2];
+}
+
+sub _discover_hit_start {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	
+	my ($this_hsp) = sort { $a->[3] <=> $b->[3] } @{$hsp_data};
+	$self->_fields->{hit_start} = $this_hsp->[3];
+}
+
+sub _discover_hit_end {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	
+	my ($this_hsp) = sort { $b->[4] <=> $a->[4] } @{$hsp_data};
+	$self->_fields->{hit_end} = $this_hsp->[4];
+}
+
+sub _discover_next_hsp {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	unless (defined $self->{_next_hsp_index}) {
+		$self->{_next_hsp_index} = 0;
+	}
+	return if $self->{_next_hsp_index} == -1;
+	
+	$self->_fields->{next_hsp} = new Bio::Search::HSP::HmmpfamHSP(-parent => $self,
+																  -hsp_data => $hsp_data->[$self->{_next_hsp_index}++]);
+	
+	if ($self->{_next_hsp_index} > $#{$hsp_data}) {
+		$self->{_next_hsp_index} = -1;
+	}
+}
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : while( $hsp = $obj->next_hsp()) { ... }
+ Function : Returns the next available High Scoring Pair
+ Example  : 
+ Returns  : L<Bio::Search::HSP::HSPI> object or null if finished
+ Args     : none
+
+=cut
+
+sub next_hsp {
+    my $self = shift;
+    my $hsp = $self->get_field('next_hsp');
+	undef $self->_fields->{next_hsp};
+	return $hsp;
+}
+
+=head2 next_domain
+
+ Title   : next_domain 
+ Usage   : my $domain = $hit->next_domain();
+ Function: An alias for L<next_hsp()>, this will return the next HSP
+ Returns : L<Bio::Search::HSP::HSPI> object
+ Args    : none
+
+=cut
+
+*next_domain = \&next_hsp;
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+ Example   : @hsps = $hit_object->hsps();
+ Returns   : list of L<Bio::Search::HSP::BlastHSP> objects.
+ Argument  : none
+
+=cut
+
+sub hsps {
+    my $self = shift;
+	my $old = $self->{_next_hsp_index} || 0;
+	$self->rewind;
+	my @hsps;
+	while (defined(my $hsp = $self->next_hsp)) {
+		push(@hsps, $hsp);
+	}
+	$self->{_next_hsp_index} =  @hsps > 0 ? $old : -1;
+	return @hsps;
+}
+
+=head2 domains
+
+ Title   : domains
+ Usage   : my @domains = $hit->domains();
+ Function: An alias for L<hsps()>, this will return the full list of hsps
+ Returns : array of L<Bio::Search::HSP::HSPI> objects
+ Args    : none
+
+=cut
+
+*domains = \&hsps;
+
+=head2 hsp
+
+ Usage     : $hit_object->hsp( [string] );
+ Purpose   : Get a single HSPI object for the present HitI object.
+ Example   : $hspObj  = $hit_object->hsp;  # same as 'best'
+           : $hspObj  = $hit_object->hsp('best');
+           : $hspObj  = $hit_object->hsp('worst');
+ Returns   : Object reference for a L<Bio::Search::HSP::HSPI> object.
+ Argument  : String (or no argument).
+           :   No argument (default) = highest scoring HSP (same as 'best').
+           :   'best'  = highest scoring HSP.
+           :   'worst' = lowest scoring HSP.
+ Throws    : Exception if an unrecognized argument is used.
+
+See Also   : L<hsps()|hsps>, L<num_hsps>()
+
+=cut
+
+sub hsp {
+    my ($self, $type) = @_;
+	$type ||= 'best';
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	
+	my $sort;
+	if ($type eq 'best') {
+		$sort = sub { $a->[6] <=> $b->[6] };
+	}
+	elsif ($type eq 'worst') {
+		$sort = sub { $b->[6] <=> $a->[6] };
+	}
+	else {
+		$self->throw("Unknown arg '$type' given to hsp()");
+	}
+	
+	my ($this_hsp) = sort $sort @{$hsp_data};
+	return new Bio::Search::HSP::HmmpfamHSP(-parent => $self, -hsp_data => $this_hsp);
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iterator to the beginning, so that
+           next_hit() will subsequently return the first hit and so on.
+ Returns : n/a
+ Args    : none
+
+=cut
+
+sub rewind {
+	my $self = shift;
+	my $hsp_data = $self->get_field('hsp_data') || return;
+	$self->{_next_hsp_index} = @{$hsp_data} > 0 ? 0 : -1;
+}
+
+# have p() a synonym of significance()
+sub p {
+	return shift->significance;
+}
+
+=head2 strand
+
+ Usage     : $sbjct->strand( [seq_type] );
+ Purpose   : Gets the strand(s) for the query, sbjct, or both sequences.
+           : For hmmpfam, the answers are always 1 (forward strand).
+ Example   : $qstrand = $sbjct->strand('query');
+           : $sstrand = $sbjct->strand('hit');
+           : ($qstrand, $sstrand) = $sbjct->strand();
+ Returns   : scalar context: integer '1'
+           : array context without args: list of two strings (1, 1)
+           : Array context can be "induced" by providing an argument of 'list'
+		   : or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default
+           : = 'query') ('sbjct' is synonymous with 'hit')
+
+=cut
+
+sub strand {
+    my ($self, $type) = @_;
+	$type ||= (wantarray ? 'list' : 'query');
+    $type = lc($type);
+	if ($type eq 'list' || $type eq 'array') {
+		return (1, 1);
+	}
+	return 1;
+}
+
+=head2 frac_aligned_query
+
+ Usage     : $hit_object->frac_aligned_query();
+ Purpose   : Get the fraction of the query sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_query();
+ Returns   : undef (the length of query sequences is unknown in Hmmpfam reports)
+ Argument  : none
+
+=cut
+
+sub frac_aligned_query {
+    return undef;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/HmmpfamHit.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PsiBlastHit.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PsiBlastHit.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PsiBlastHit.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2002 @@
+#-----------------------------------------------------------------
+# $Id: PsiBlastHit.pm,v 1.6.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Hit::PsiBlastHit
+#
+# (This module was originally called Bio::Tools::Blast::Sbjct)
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Search::Hit::PsiBlastHit - Bioperl BLAST Hit object
+
+=head1 SYNOPSIS
+
+See L<Bio::Search::Result::BlastResult>.
+
+=head1 DESCRIPTION
+
+The Bio::Search::Hit::PsiBlastHit.pm module encapsulates data and
+methods for manipulating "hits" from a BLAST report. A BLAST hit is a
+collection of HSPs along with other metadata such as sequence name and
+score information. Hit objects are accessed via
+L<Bio::Search::Result::BlastResult> objects after parsing a BLAST
+report using the L<Bio::SearchIO> system.
+
+In Blast lingo, the "sbjct" sequences are all the sequences in a
+target database which were compared against a "query" sequence.  The
+terms "sbjct" and "hit" will be used interchangeably in this module.
+All methods that take 'sbjct' as an argument also support 'hit' as a
+synonym.
+
+This module supports BLAST versions 1.x and 2.x, gapped and ungapped,
+and PSI-BLAST.
+
+The construction of PsiBlastHit objects is performed by
+Bio::SearchIO::blast::PsiBlastHitFactory in a process that is
+orchestrated by the Blast parser (L<Bio::SearchIO::blast>).
+The resulting PsiBlastHits are then accessed via
+L<Bio::Search::Result::BlastResult>). Therefore, you do not need to
+use L<Bio::Search::Hit::PsiBlastHit>) directly. If you need to
+construct PsiBlastHits directly, see the C<new()> function for details.
+
+For L<Bio::SearchIO> BLAST parsing usage examples, see the
+C<examples/search-blast> directory of the Bioperl distribution.
+
+
+=head2 HSP Tiling and Ambiguous Alignments
+
+If a Blast hit has more than one HSP, the Bio::Search::Hit::PsiBlastHit.pm
+object has the ability to merge overlapping HSPs into contiguous
+blocks. This permits the PsiBlastHit object to sum data across all HSPs
+without counting data in the overlapping regions multiple times, which
+would happen if data from each overlapping HSP are simply summed.  HSP
+tiling is performed automatically when methods of the PsiBlastHit object
+that rely on tiled data are invoked. These include
+L<frac_identical()|frac_identical>, L<frac_conserved()|frac_conserved>, L<gaps()|gaps>,
+L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>,
+L<num_unaligned_query()|num_unaligned_query>, L<num_unaligned_hit()|num_unaligned_hit>.
+
+It also permits the assessment of an "ambiguous alignment" if the
+query (or sbjct) sequences from different HSPs overlap
+(see L<ambiguous_aln()|ambiguous_aln>). The existence
+of an overlap could indicate a biologically interesting region in the
+sequence, such as a repeated domain.  The PsiBlastHit object uses the
+C<-OVERLAP> parameter to determine when two sequences overlap; if this is
+set to 2 -- the default -- then any two sbjct or query HSP sequences
+must overlap by more than two residues to get merged into the same
+contig and counted as an overlap. See the L<BUGS | BUGS> section below for
+"issues" with HSP tiling.
+
+
+The results of the HSP tiling is reported with the following ambiguity codes:
+
+   'q' = Query sequence contains multiple sub-sequences matching
+         a single region in the sbjct sequence.
+
+   's' = Subject (PsiBlastHit) sequence contains multiple sub-sequences matching
+         a single region in the query sequence.
+
+   'qs' = Both query and sbjct sequences contain more than one
+          sub-sequence with similarity to the other sequence.
+
+
+For addition information about ambiguous BLAST alignments, see
+L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils> and
+
+ http://www-genome.stanford.edu/Sacch3D/help/ambig_aln.html
+
+=head1 DEPENDENCIES
+
+Bio::Search::Hit::PsiBlastHit.pm is a concrete class that inherits from
+L<Bio::Root::Root> and L<Bio::Search::Hit::HitI>.  and relies on
+L<Bio::Search::HSP::BlastHSP>.
+
+
+=head1 BUGS
+
+One consequence of the HSP tiling is that methods that rely on HSP
+tiling such as L<frac_identical()|frac_identical>, L<frac_conserved()|frac_conserved>, L<gaps()|gaps>
+etc. may report misleading numbers when C<-OVERLAP> is set to a large
+number.  For example, say we have two HSPs and the query sequence tile
+as follows:
+
+            1      8             22      30        40             60
+ Full seq:  ------------------------------------------------------------
+                    *  ** *   **
+ HSP1:             ---------------                    (6 identical matches)
+                              **   **  **
+ HSP2:                        -------------           (6 identical matches)
+
+
+If C<-OVERLAP> is set to some number over 4, HSP1 and HSP2 will not be
+tiled into a single contig and their numbers of identical matches will
+be added, giving a total of 12, not 10 if they had be combined into
+one contig. This can lead to number greater than 1.0 for methods
+L<frac_identical()|frac_identical> and L<frac_conserved()|frac_conserved>. This is less of an issue
+with gapped Blast since it tends to combine HSPs that would be listed
+separately without gapping.  (Fractions E<gt>1.0 can be viewed as a
+signal for an interesting alignment that warrants further inspection,
+thus turning this bug into a feature :-).
+
+Using large values for C<-OVERLAP> can lead to incorrect numbers
+reported by methods that rely on HSP tiling but can be useful if you
+care more about detecting ambiguous alignments.  Setting C<-OVERLAP>
+to zero will lead to the most accurate numbers for the
+tiling-dependent methods but will be useless for detecting overlapping
+HSPs since all HSPs will appear to overlap.
+
+
+=head1 SEE ALSO
+
+ Bio::Search::HSP::BlastHSP.pm         - Blast HSP object.
+ Bio::Search::Result::BlastResult.pm   - Blast Result object.
+ Bio::Search::Hit::HitI.pm             - Interface implemented by PsiBlastHit.pm
+ Bio::Root::Root.pm                    - Base class for PsiBlastHit.pm
+
+Links:
+
+ http://bio.perl.org/                       - Bioperl Project Homepage
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+    http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was originally developed in the Department of Genetics
+at Stanford University. I would also like to acknowledge my
+colleagues at Affymetrix for useful feedback.
+
+=head1 COPYRIGHT
+
+Copyright (c) 1996-2001 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Search::Hit::PsiBlastHit;
+
+use strict;
+use Bio::Search::BlastUtils;
+use vars qw(%SUMMARY_OFFSET);
+
+use overload
+    '""' => \&to_string;
+
+use base qw(Bio::Root::Root Bio::Search::Hit::HitI);
+
+
+=head2 new
+
+ Usage     : $hit = Bio::Search::Hit::PsiBlastHit->new( %named_params );
+           : Bio::Search::Hit::PsiBlastHit.pm objects are constructed
+           : automatically by Bio::SearchIO::PsiBlastHitFactory.pm,
+           : so there is no need for direct instantiation.
+ Purpose   : Constructs a new PsiBlastHit object and Initializes key variables
+           : for the hit.
+ Returns   : A Bio::Search::Hit::PsiBlastHit object
+ Argument  : Named Parameters:
+           : Parameter keys are case-insensitive.
+           :     -RAW_DATA   => array reference holding raw BLAST report data
+           :                    for a single hit. This includes all lines
+           :                    within the HSP alignment listing section of a
+           :                    traditional BLAST or PSI-BLAST (non-XML) report,
+           :                    starting at (or just after) the leading '>'.
+           :         -HOLD_RAW_DATA => boolean, should -RAW_DATA be saved within the object.
+           :         -QUERY_LEN  => Length of the query sequence
+           :         -ITERATION  => integer (PSI-BLAST iteration number in which hit was found)
+           :         -OVERLAP    => integer (maximum overlap between adjacent
+           :                    HSPs when tiling)
+           :         -PROGRAM    => string (type of Blast: BLASTP, BLASTN, etc)
+           :         -SIGNIF     => significance
+           :         -IS_PVAL    => boolean, true if -SIGNIF contains a P-value
+           :         -SCORE      => raw BLAST score
+           :         -FOUND_AGAIN   => boolean, true if this was a hit from the
+           :                       section of a PSI-BLAST with iteration > 1
+           :                       containing sequences that were also found
+           :                       in iteration 1.
+ Comments  : This object accepts raw Blast report data not because it
+           : is required for parsing, but in order to retrieve it
+           : (only available if -HOLD_RAW_DATA is set to true).
+
+See Also   : L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>, L<Bio::Root::Root::new()|Bio::Root::Root>
+
+=cut
+
+#-------------------
+sub new {
+#-------------------
+    my ($class, @args ) = @_;
+    my $self = $class->SUPER::new( @args );
+
+    my ($raw_data, $signif, $is_pval, $hold_raw);
+
+    ($self->{'_blast_program'}, $self->{'_query_length'}, $raw_data, $hold_raw,
+     $self->{'_overlap'}, $self->{'_iteration'}, $signif, $is_pval,
+     $self->{'_score'}, $self->{'_found_again'} ) =
+       $self->_rearrange( [qw(PROGRAM
+                              QUERY_LEN
+                              RAW_DATA
+                              HOLD_RAW_DATA
+                              OVERLAP
+                              ITERATION
+                              SIGNIF
+                              IS_PVAL
+                              SCORE
+                              FOUND_AGAIN )], @args );
+
+    # TODO: Handle this in parser. Just pass in name parameter.
+    $self->_set_id( $raw_data->[0] );
+
+    if($is_pval) {
+        $self->{'_p'} = $signif;
+    } else {
+        $self->{'_expect'} = $signif;
+    }
+
+    if( $hold_raw ) {
+        $self->{'_hit_data'} = $raw_data;
+    }
+
+    return $self;
+}
+
+sub DESTROY {
+    my $self=shift;
+    #print STDERR "-->DESTROYING $self\n";
+}
+
+
+#=================================================
+# Begin Bio::Search::Hit::HitI implementation
+#=================================================
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hit->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hit
+           For BLAST, the algorithm denotes what type of sequence was aligned
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated
+           dna-translated dna).
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+#----------------
+sub algorithm {
+#----------------
+    my ($self, at args) = @_;
+    return $self->{'_blast_program'};
+}
+
+=head2 name
+
+ Usage     : $hit->name([string]);
+ Purpose   : Set/Get a string to identify the hit.
+ Example   : $name = $hit->name;
+           : $hit->name('M81707');
+ Returns   : String consisting of the hit's name or undef if not set.
+ Comments  : The name is parsed out of the "Query=" line as the first chunk of
+             non-whitespace text. If you want the rest of the line, use
+             $hit->description().
+
+See Also: L<accession()|accession>
+
+=cut
+
+#'
+
+#----------------
+sub name {
+#----------------
+    my $self = shift;
+    if (@_) {
+        my $name = shift;
+        $name =~ s/^\s+|(\s+|,)$//g;
+        $self->{'_name'} = $name;
+    }
+    return $self->{'_name'};
+}
+
+=head2 description
+
+ Usage     : $hit_object->description( [integer] );
+ Purpose   : Set/Get a description string for the hit.
+             This is parsed out of the "Query=" line as everything after
+             the first chunk of non-whitespace text. Use $hit->name()
+             to get the first chunk (the ID of the sequence).
+ Example   : $description = $hit->description;
+           : $desc_60char = $hit->description(60);
+ Argument  : Integer (optional) indicating the desired length of the
+           : description string to be returned.
+ Returns   : String consisting of the hit's description or undef if not set.
+
+=cut
+
+#'
+
+#----------------
+sub description {
+#----------------
+    my( $self, $len ) = @_;
+    $len = (defined $len) ? $len : (CORE::length $self->{'_description'});
+    return substr( $self->{'_description'}, 0 ,$len );
+}
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $acc = $hit->accession();
+ Function: Retrieve the accession (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+ Comments: Accession numbers are extracted based on the assumption that they
+           are delimited by | characters (NCBI-style). If this is not the case,
+           use the name() method and parse it as necessary.
+
+See Also: L<name()|name>
+
+=cut
+
+#--------------------
+sub accession {
+#--------------------
+    my $self = shift;
+    if(@_) { $self->{'_accession'} = shift; }
+    $self->{'_accession'} || '';
+}
+
+=head2 raw_score
+
+ Usage     : $hit_object->raw_score();
+ Purpose   : Gets the BLAST score of the best HSP for the current Blast hit.
+ Example   : $score = $hit_object->raw_score();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<bits()|bits>
+
+=cut
+
+#----------
+sub raw_score {
+#----------
+    my $self = shift;
+
+    # The check for $self->{'_score'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description line only.
+
+    my ($score);
+    if(not defined($self->{'_score'})) {
+        $score = $self->hsp->score;
+    } else {
+        $score = $self->{'_score'};
+    }
+    return $score;
+}
+
+
+=head2 length
+
+ Usage     : $hit_object->length();
+ Purpose   : Get the total length of the hit sequence.
+ Example   : $len = $hit_object->length();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Developer note: when using the built-in length function within
+           : this module, call it as CORE::length().
+
+See Also   : L<logical_length()|logical_length>,  L<length_aln()|length_aln>
+
+=cut
+
+#-----------
+sub length {
+#-----------
+    my $self = shift;
+    return $self->{'_length'};
+}
+
+=head2 significance
+
+Equivalent to L<signif()|signif>
+
+=cut
+
+#----------------
+sub significance { shift->signif( @_ ); }
+#----------------
+
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : $hsp = $obj->next_hsp();
+ Function : returns the next available High Scoring Pair object
+ Example  :
+ Returns  : Bio::Search::HSP::BlastHSP or undef if finished
+ Args     : none
+
+=cut
+
+#----------------
+sub next_hsp {
+#----------------
+    my $self = shift;
+
+    unless($self->{'_hsp_queue_started'}) {
+        $self->{'_hsp_queue'} = [$self->hsps()];
+        $self->{'_hsp_queue_started'} = 1;
+    }
+    pop @{$self->{'_hsp_queue'}};
+}
+
+#=================================================
+# End Bio::Search::Hit::HitI implementation
+#=================================================
+
+
+# Providing a more explicit method for getting name of hit
+# (corresponds with column name in HitTableWriter)
+#----------------
+sub hit_name {
+#----------------
+    my $self = shift;
+    $self->name( @_ );
+}
+
+# Older method Delegates to description()
+#----------------
+sub desc {
+#----------------
+    my $self = shift;
+    return $self->description( @_ );
+}
+
+# Providing a more explicit method for getting description of hit
+# (corresponds with column name in HitTableWriter)
+#----------------
+sub hit_description {
+#----------------
+    my $self = shift;
+    return $self->description( @_ );
+}
+
+=head2 score
+
+Equivalent to L<raw_score()|raw_score>
+
+=cut
+
+#----------------
+sub score { shift->raw_score( @_ ); }
+#----------------
+
+
+=head2 hit_length
+
+Equivalent to L<length()|length>
+
+=cut
+
+# Providing a more explicit method for getting length of hit
+#----------------
+sub hit_length { shift->length( @_ ); }
+#----------------
+
+
+=head2 signif
+
+ Usage     : $hit_object->signif( [format] );
+ Purpose   : Get the P or Expect value for the best HSP of the given BLAST hit.
+           : The value returned is the one which is reported in the description
+           : section of the Blast report. For Blast1 and WU-Blast2, this
+           : is a P-value, for Blast2, it is an Expect value.
+ Example   : $obj->signif()        # returns 1.3e-34
+           : $obj->signif('exp')   # returns -34
+           : $obj->signif('parts') # returns (1.3, -34)
+ Returns   : Float or scientific notation number (the raw P/Expect value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and P/Expect value
+           :                is in scientific notation (see Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a
+           :            2-element list (1.2, -34)  (see Comments).
+ Throws    : n/a
+ Comments  : The signif() method provides a way to deal with the fact that
+           : Blast1 and Blast2 formats (and WU- vs. NCBI-BLAST) differ in
+           : what is reported in the description lines of each hit in the
+           : Blast report. The signif() method frees any client code from
+           : having to know if this is a P-value or an Expect value,
+           : making it easier to write code that can process both
+           : Blast1 and Blast2 reports. This is not necessarily a good thing,
+           : since one should always know when one is working with P-values or
+           : Expect values (hence the deprecated status).
+           : Use of expect() is recommended since all hits will have an Expect value.
+           :
+           : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the expect value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<p()|p>, L<expect()|expect>, L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
+
+=cut
+
+#-------------
+sub signif {
+#-------------
+# Some duplication of logic for p(), expect() and signif() for the sake of performance.
+    my ($self, $fmt) = @_;
+
+    my $val = defined($self->{'_p'}) ? $self->{'_p'} : $self->{'_expect'};
+
+    # $val can be zero.
+    defined($val) or $self->throw("Can't get P- or Expect value: HSPs may not have been set.");
+
+    return $val if not $fmt or $fmt =~ /^raw/i;
+    ## Special formats: exponent-only or as list.
+    return &Bio::Search::BlastUtils::get_exponent($val) if $fmt =~ /^exp/i;
+    return (split (/eE/, $val)) if $fmt =~ /^parts/i;
+
+    ## Default: return the raw P/Expect-value.
+    return $val;
+}
+
+#----------------
+sub raw_hit_data {
+#----------------
+    my $self = shift;
+    my $data = '>';
+    # Need to add blank lines where we've removed them.
+    foreach( @{$self->{'_hit_data'}} ) {
+        if( $_ eq 'end') {
+            $data .= "\n";
+        }
+        else {
+            $data .= /^\s*(Score|Query)/ ? "\n$_" : $_;
+        }
+    }
+    return $data;
+}
+
+
+#=head2 _set_length
+#
+# Usage     : $hit_object->_set_length( "233" );
+# Purpose   : Set the total length of the hit sequence.
+# Example   : $hit_object->_set_length( $len );
+# Returns   : n/a
+# Argument  : Integer (only when setting). Any commas will be stripped out.
+# Throws    : n/a
+#
+#=cut
+
+#-----------
+sub _set_length {
+#-----------
+    my ($self, $len) = @_;
+    $len =~ s/,//g; # get rid of commas
+    $self->{'_length'} = $len;
+}
+
+#=head2 _set_description
+#
+# Usage     : Private method; called automatically during construction
+# Purpose   : Sets the description of the hit sequence.
+#            : For sequence without descriptions, does not set any description.
+# Argument  : Array containing description (multiple lines).
+# Comments  : Processes the supplied description:
+#                1. Join all lines into one string.
+#                2. Remove sequence id at the beginning of description.
+#                3. Removes junk charactes at begin and end of description.
+#
+#=cut
+
+#--------------
+sub _set_description {
+#--------------
+    my( $self, @desc ) = @_;
+    my( $desc);
+
+#    print STDERR "PsiBlastHit: RAW DESC:\n at desc\n";
+
+    $desc = join(" ", @desc);
+
+    my $name = $self->name;
+
+    if($desc) {
+        $desc =~ s/^\s*\S+\s+//; # remove the sequence ID(s)
+                                 # This won't work if there's no description.
+        $desc =~ s/^\s*$name//;  # ...but this should.
+        $desc =~ s/^[\s!]+//;
+        $desc =~ s/ \d+$//;
+        $desc =~ s/\.+$//;
+        $self->{'_description'} = $desc;
+    }
+
+#    print STDERR "PsiBlastHit: _set_description =  $desc\n";
+}
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $hit->to_string;
+ Function: Returns a string representation for the Blast Hit.
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [PsiBlastHit] <name> <description>
+           e.g.:
+           [PsiBlastHit] emb|Z46660|SC9725 S.cerevisiae chromosome XIII cosmid
+ Args    : None
+
+=cut
+
+#----------------
+sub to_string {
+#----------------
+    my $self = shift;
+    return "[PsiBlastHit] " . $self->name . " " . $self->description;
+}
+
+
+#=head2 _set_id
+#
+# Usage     : Private method; automatically called by new()
+# Purpose   : Sets the name of the PsiBlastHit sequence from the BLAST summary line.
+#           : The identifier is assumed to be the first
+#           : chunk of non-whitespace characters in the description line
+#           : Does not assume any semantics in the structure of the identifier
+#           : (Formerly, this method attempted to extract database name from
+#           : the seq identifiers, but this was prone to break).
+# Returns   : n/a
+# Argument  : String containing description line of the hit from Blast report
+#           : or first line of an alignment section (with or without the leading '>').
+# Throws    : Warning if cannot locate sequence ID.
+#
+#See Also   : L<new()|new>, L<accession()|accession>
+#
+#=cut
+
+#---------------
+sub _set_id {
+#---------------
+    my( $self, $desc ) = @_;
+
+    # New strategy: Assume only that the ID is the first white space
+    # delimited chunk. Not attempting to extract accession & database name.
+    # Clients will have to interpret it as necessary.
+    if($desc =~ /^>?(\S+)\s*(.*)/) {
+        my ($name, $desc) = ($1, $2);
+        $self->name($name);
+        $self->{'_description'} = $desc;
+        # Note that this description comes from the summary section of the
+        # BLAST report and so may be truncated. The full description will be
+        # set from the alignment section. We're setting description here in case
+        # the alignment section isn't being parsed.
+
+        # Assuming accession is delimited with | symbols (NCBI-style)
+        my @pieces = split(/\|/,$name);
+        my $acc = pop @pieces;
+        $self->accession( $acc );
+    }
+    else {
+        $self->warn("Can't locate sequence identifier in summary line.", "Line = $desc");
+        $desc = 'Unknown sequence ID' if not $desc;
+        $self->name($desc);
+    }
+}
+
+
+=head2 ambiguous_aln
+
+ Usage     : $ambig_code = $hit_object->ambiguous_aln();
+ Purpose   : Sets/Gets ambiguity code data member.
+ Example   : (see usage)
+ Returns   : String = 'q', 's', 'qs', '-'
+           :   'q'  = query sequence contains overlapping sub-sequences
+           :          while sbjct does not.
+           :   's'  = sbjct sequence contains overlapping sub-sequences
+           :          while query does not.
+           :   'qs' = query and sbjct sequence contains overlapping sub-sequences
+           :          relative to each other.
+           :   '-'  = query and sbjct sequence do not contains multiple domains
+           :          relative to each other OR both contain the same distribution
+           :          of similar domains.
+ Argument  : n/a
+ Throws    : n/a
+ Status    : Experimental
+
+See Also   : L<Bio::Search::BlastUtils::tile_hsps>, L<HSP Tiling and Ambiguous Alignments>
+
+=cut
+
+#--------------------
+sub ambiguous_aln {
+#--------------------
+    my $self = shift;
+    if(@_) { $self->{'_ambiguous_aln'} = shift; }
+    $self->{'_ambiguous_aln'} || '-';
+}
+
+
+
+=head2 overlap
+
+ Usage     : $blast_object->overlap( [integer] );
+ Purpose   : Gets/Sets the allowable amount overlap between different HSP sequences.
+ Example   : $blast_object->overlap(5);
+           : $overlap = $blast_object->overlap;
+ Returns   : Integer.
+ Argument  : integer.
+ Throws    : n/a
+ Status    : Experimental
+ Comments  : Any two HSPs whose sequences overlap by less than or equal
+           : to the overlap() number of resides will be considered separate HSPs
+           : and will not get tiled by Bio::Search::BlastUtils::_adjust_contigs().
+
+See Also   : L<Bio::Search::BlastUtils::_adjust_contigs()|Bio::Search::BlastUtils>, L<BUGS | BUGS>
+
+=cut
+
+#-------------
+sub overlap {
+#-------------
+    my $self = shift;
+    if(@_) { $self->{'_overlap'} = shift; }
+    defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
+}
+
+
+
+
+
+
+=head2 bits
+
+ Usage     : $hit_object->bits();
+ Purpose   : Gets the BLAST bit score of the best HSP for the current Blast hit.
+ Example   : $bits = $hit_object->bits();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if bit score is not set.
+ Comments  : For BLAST1, the non-bit score is listed in the summary line.
+
+See Also   : L<score()|score>
+
+=cut
+
+#---------
+sub bits {
+#---------
+    my $self = shift;
+
+    # The check for $self->{'_bits'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description line only.
+
+    my ($bits);
+    if(not defined($self->{'_bits'})) {
+        $bits = $self->hsp->bits;
+    } else {
+        $bits = $self->{'_bits'};
+    }
+    return $bits;
+}
+
+
+
+=head2 n
+
+ Usage     : $hit_object->n();
+ Purpose   : Gets the N number for the current Blast hit.
+           : This is the number of HSPs in the set which was ascribed
+           : the lowest P-value (listed on the description line).
+           : This number is not the same as the total number of HSPs.
+           : To get the total number of HSPs, use num_hsps().
+ Example   : $n = $hit_object->n();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set (BLAST2 reports).
+ Comments  : Note that the N parameter is not reported in gapped BLAST2.
+           : Calling n() on such reports will result in a call to num_hsps().
+           : The num_hsps() method will count the actual number of
+           : HSPs in the alignment listing, which may exceed N in
+           : some cases.
+
+See Also   : L<num_hsps()|num_hsps>
+
+=cut
+
+#-----
+sub n {
+#-----
+    my $self = shift;
+
+    # The check for $self->{'_n'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description line only.
+
+    my ($n);
+    if(not defined($self->{'_n'})) {
+        $n = $self->hsp->n;
+    } else {
+        $n = $self->{'_n'};
+    }
+    $n ||= $self->num_hsps;
+
+    return $n;
+}
+
+
+
+=head2 frame
+
+ Usage     : $hit_object->frame();
+ Purpose   : Gets the reading frame for the best HSP after HSP tiling.
+           : This is only valid for BLASTX and TBLASTN/X reports.
+ Example   : $frame = $hit_object->frame();
+ Returns   : Integer (-2 .. +2)
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set (BLAST2 reports).
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling frame() on each (use hsps() to get all HSPs).
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+#----------'
+sub frame {
+#----------
+    my $self = shift;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    # The check for $self->{'_frame'} is a remnant from the 'query' mode days
+    # in which the sbjct object would collect data from the description line only.
+
+    my ($frame);
+    if(not defined($self->{'_frame'})) {
+        $frame = $self->hsp->frame;
+    } else {
+        $frame = $self->{'_frame'};
+    }
+    return $frame;
+}
+
+
+
+
+
+=head2 p
+
+ Usage     : $hit_object->p( [format] );
+ Purpose   : Get the P-value for the best HSP of the given BLAST hit.
+           : (Note that P-values are not provided with NCBI Blast2 reports).
+ Example   : $p =  $sbjct->p;
+           : $p =  $sbjct->p('exp');  # get exponent only.
+           : ($num, $exp) =  $sbjct->p('parts');  # split sci notation into parts
+ Returns   : Float or scientific notation number (the raw P-value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and P-value
+           :                is in scientific notation (See Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a
+           :            2-element list (1.2, -34) (See Comments).
+ Throws    : Warns if no P-value is defined. Uses expect instead.
+ Comments  : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the P-value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
+
+=cut
+
+#--------
+sub p {
+#--------
+# Some duplication of logic for p(), expect() and signif() for the sake of performance.
+    my ($self, $fmt) = @_;
+
+    my $val = $self->{'_p'};
+
+    # $val can be zero.
+    if(not defined $val) {
+        # P-value not defined, must be a NCBI Blast2 report.
+        # Use expect instead.
+        $self->warn( "P-value not defined. Using expect() instead.");
+        $val = $self->{'_expect'};
+    }
+
+    return $val if not $fmt or $fmt =~ /^raw/i;
+    ## Special formats: exponent-only or as list.
+    return &Bio::Search::BlastUtils::get_exponent($val) if $fmt =~ /^exp/i;
+    return (split (/eE/, $val)) if $fmt =~ /^parts/i;
+
+    ## Default: return the raw P-value.
+    return $val;
+}
+
+
+
+=head2 expect
+
+ Usage     : $hit_object->expect( [format] );
+ Purpose   : Get the Expect value for the best HSP of the given BLAST hit.
+ Example   : $e =  $sbjct->expect;
+           : $e =  $sbjct->expect('exp');  # get exponent only.
+           : ($num, $exp) = $sbjct->expect('parts');  # split sci notation into parts
+ Returns   : Float or scientific notation number (the raw expect value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and Expect
+           :                is in scientific notation (see Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a
+           :            2-element list (1.2, -34)  (see Comments).
+ Throws    : Exception if the Expect value is not defined.
+ Comments  : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the expect value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<p()|p>, L<signif()|signif>, L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
+
+=cut
+
+#-----------
+sub expect {
+#-----------
+# Some duplication of logic for p(), expect() and signif() for the sake of performance.
+    my ($self, $fmt) = @_;
+
+    my $val;
+
+    # For Blast reports that list the P value on the description line,
+    # getting the expect value requires fully parsing the HSP data.
+    # For NCBI blast, there's no problem.
+    if(not defined($self->{'_expect'})) {
+        if( defined $self->{'_hsps'}) {
+            $self->{'_expect'} = $val = $self->hsp->expect;
+        } else {
+            # If _expect is not set and _hsps are not set,
+            # then this must be a P-value-based report that was
+            # run without setting the HSPs (shallow parsing).
+            $self->throw("Can't get expect value. HSPs have not been set.");
+        }
+    } else {
+        $val = $self->{'_expect'};
+    }
+
+    # $val can be zero.
+    defined($val) or $self->throw("Can't get Expect value.");
+
+    return $val if not $fmt or $fmt =~ /^raw/i;
+    ## Special formats: exponent-only or as list.
+    return &Bio::Search::BlastUtils::get_exponent($val) if $fmt =~ /^exp/i;
+    return (split (/eE/, $val)) if $fmt =~ /^parts/i;
+
+    ## Default: return the raw Expect-value.
+    return $val;
+}
+
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+           : Get the numbers of HSPs for the current hit.
+ Example   : @hsps = $hit_object->hsps();
+           : $num  = $hit_object->hsps();  # alternatively, use num_hsps()
+ Returns   : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
+           : Scalar context: integer (number of HSPs).
+           :                 (Equivalent to num_hsps()).
+ Argument  : n/a. Relies on wantarray
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsp()|hsp>, L<num_hsps()|num_hsps>
+
+=cut
+
+#---------
+sub hsps {
+#---------
+    my $self = shift;
+
+    if (not ref $self->{'_hsps'}) {
+        $self->throw("Can't get HSPs: data not collected.");
+    }
+
+    return wantarray
+        #  returning list containing all HSPs.
+        ? @{$self->{'_hsps'}}
+        #  returning number of HSPs.
+        : scalar(@{$self->{'_hsps'}});
+}
+
+
+
+=head2 hsp
+
+ Usage     : $hit_object->hsp( [string] );
+ Purpose   : Get a single BlastHSP.pm object for the present PsiBlastHit.pm object.
+ Example   : $hspObj  = $hit_object->hsp;  # same as 'best'
+           : $hspObj  = $hit_object->hsp('best');
+           : $hspObj  = $hit_object->hsp('worst');
+ Returns   : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
+ Argument  : String (or no argument).
+           :   No argument (default) = highest scoring HSP (same as 'best').
+           :   'best' or 'first' = highest scoring HSP.
+           :   'worst' or 'last' = lowest scoring HSP.
+ Throws    : Exception if the HSPs have not been collected.
+           : Exception if an unrecognized argument is used.
+
+See Also   : L<hsps()|hsps>, L<num_hsps>()
+
+=cut
+
+#----------
+sub hsp {
+#----------
+    my( $self, $option ) = @_;
+    $option ||= 'best';
+
+    if (not ref $self->{'_hsps'}) {
+        $self->throw("Can't get HSPs: data not collected.");
+    }
+
+    my @hsps = @{$self->{'_hsps'}};
+
+    return $hsps[0]      if $option =~ /best|first|1/i;
+    return $hsps[$#hsps] if $option =~ /worst|last/i;
+
+    $self->throw("Can't get HSP for: $option\n" .
+                 "Valid arguments: 'best', 'worst'");
+}
+
+
+
+=head2 num_hsps
+
+ Usage     : $hit_object->num_hsps();
+ Purpose   : Get the number of HSPs for the present Blast hit.
+ Example   : $nhsps = $hit_object->num_hsps();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+#-------------
+sub num_hsps {
+#-------------
+    my $self = shift;
+
+    if (not defined $self->{'_hsps'}) {
+        $self->throw("Can't get HSPs: data not collected.");
+    }
+
+    return scalar(@{$self->{'_hsps'}});
+}
+
+
+
+=head2 logical_length
+
+ Usage     : $hit_object->logical_length( [seq_type] );
+           : (mostly intended for internal use).
+ Purpose   : Get the logical length of the hit sequence.
+           : For query sequence of BLASTX and TBLASTX reports and the hit
+           : sequence of TBLASTN and TBLASTX reports, the returned length
+           : is the length of the would-be amino acid sequence (length/3).
+           : For all other BLAST flavors, this function is the same as length().
+ Example   : $len = $hit_object->logical_length();
+ Returns   : Integer
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This is important for functions like frac_aligned_query()
+           : which need to operate in amino acid coordinate space when dealing
+           : with T?BLASTX type reports.
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+#--------------------
+sub logical_length {
+#--------------------
+    my $self = shift;
+    my $seqType = shift || 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my $length;
+
+    # For the sbjct, return logical sbjct length
+    if( $seqType eq 'sbjct' ) {
+        $length = $self->{'_logical_length'} || $self->{'_length'};
+    }
+    else {
+        # Otherwise, return logical query length
+        $length = $self->{'_query_length'};
+
+        # Adjust length based on BLAST flavor.
+        if($self->{'_blast_program'} =~ /T?BLASTX/ ) {
+            $length /= 3;
+        }
+    }
+    return $length;
+}
+
+
+=head2 length_aln
+
+ Usage     : $hit_object->length_aln( [seq_type] );
+ Purpose   : Get the total length of the aligned region for query or sbjct seq.
+           : This number will include all HSPs
+ Example   : $len    = $hit_object->length_aln(); # default = query
+           : $lenAln = $hit_object->length_aln('query');
+ Returns   : Integer
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : Exception if the argument is not recognized.
+ Comments  : This method will report the logical length of the alignment,
+           : meaning that for TBLAST[NX] reports, the length is reported
+           : using amino acid coordinate space (i.e., nucleotides / 3).
+           :
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling length() on each (use hsps() to get all HSPs).
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+#---------------'
+sub length_aln {
+#---------------
+    my( $self, $seqType ) = @_;
+
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    my $data = $self->{'_length_aln_'.$seqType};
+
+    ## If we don't have data, figure out what went wrong.
+    if(!$data) {
+        $self->throw("Can't get length aln for sequence type \"$seqType\"" .
+                     "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
+    }
+    $data;
+}
+
+
+=head2 gaps
+
+ Usage     : $hit_object->gaps( [seq_type] );
+ Purpose   : Get the number of gaps in the aligned query, sbjct, or both sequences.
+           : Data is summed across all HSPs.
+ Example   : $qgaps = $hit_object->gaps('query');
+           : $hgaps = $hit_object->gaps('hit');
+           : $tgaps = $hit_object->gaps();    # default = total (query + hit)
+ Returns   : scalar context: integer
+           : array context without args: two-element list of integers
+           :    (queryGaps, sbjctGaps)
+           : Array context can be forced by providing an argument of 'list' or 'array'.
+           :
+           : CAUTION: Calling this method within printf or sprintf is arrray context.
+           : So this function may not give you what you expect. For example:
+           :          printf "Total gaps: %d", $hit->gaps();
+           : Actually returns a two-element array, so what gets printed
+           : is the number of gaps in the query, not the total
+           :
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list'  (default = 'total')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through each HSP object.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : Not relying on wantarray since that will fail in situations
+           : such as printf "%d", $hit->gaps() in which you might expect to
+           : be printing the total gaps, but evaluates to array context.
+
+See Also   : L<length_aln()|length_aln>
+
+=cut
+
+#----------
+sub gaps {
+#----------
+    my( $self, $seqType ) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'total');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    $seqType = lc($seqType);
+
+    if($seqType =~ /list|array/i) {
+        return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
+    }
+
+    if($seqType eq 'total') {
+        return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
+    } else {
+        return $self->{'_gaps_'.$seqType} || 0;
+    }
+}
+
+
+
+=head2 matches
+
+ Usage     : $hit_object->matches( [class] );
+ Purpose   : Get the total number of identical or conserved matches
+           : (or both) across all HSPs.
+           : (Note: 'conservative' matches are indicated as 'positives'
+           :         in the Blast report.)
+ Example   : ($id,$cons) = $hit_object->matches(); # no argument
+           : $id = $hit_object->matches('id');
+           : $cons = $hit_object->matches('cons');
+ Returns   : Integer or a 2-element array of integers
+ Argument  : class = 'id' | 'cons' OR none.
+           : If no argument is provided, both identical and conservative
+           : numbers are returned in a two element list.
+           : (Other terms can be used to refer to the conservative
+           :  matches, e.g., 'positive'. All that is checked is whether or
+           :  not the supplied string starts with 'id'. If not, the
+           : conservative matches are returned.)
+ Throws    : Exception if the requested data cannot be obtained.
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : Does not rely on wantarray to return a list. Only checks for
+           : the presence of an argument (no arg = return list).
+
+See Also   : L<Bio::Search::HSP::BlastHSP::matches()|Bio::Search::HSP::BlastHSP>, L<hsps()|hsps>
+
+=cut
+
+#---------------
+sub matches {
+#---------------
+    my( $self, $arg) = @_;
+    my(@data,$data);
+
+    if(!$arg) {
+        @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
+
+        return @data if @data;
+
+    } else {
+
+        if($arg =~ /^id/i) {
+            $data = $self->{'_totalIdentical'};
+        } else {
+            $data = $self->{'_totalConserved'};
+        }
+        return $data if $data;
+    }
+
+    ## Something went wrong if we make it to here.
+    $self->throw("Can't get identical or conserved data: no data.");
+}
+
+
+=head2 start
+
+ Usage     : $sbjct->start( [seq_type] );
+ Purpose   : Gets the start coordinate for the query, sbjct, or both sequences
+           : in the PsiBlastHit object. If there is more than one HSP, the lowest start
+           : value of all HSPs is returned.
+ Example   : $qbeg = $sbjct->start('query');
+           : $sbeg = $sbjct->start('hit');
+           : ($qbeg, $sbeg) = $sbjct->start();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers (queryStart, sbjctStart)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If there is more than one
+           : HSP and they have not already been tiled, they will be tiled first automatically..
+           : Remember that the start and end coordinates of all HSPs are
+           : normalized so that start < end. Strand information can be
+           : obtained by calling $hit->strand().
+
+See Also   : L<end()|end>, L<range()|range>, L<strand()|strand>, L<HSP Tiling and Ambiguous Alignments>, L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+#----------
+sub start {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->start($seqType);
+    } else {
+        Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+        if($seqType =~ /list|array/i) {
+            return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
+        } else {
+            ## Sensitive to member name changes.
+            $seqType = "_\L$seqType\E";
+            return $self->{$seqType.'Start'};
+        }
+    }
+}
+
+
+=head2 end
+
+ Usage     : $sbjct->end( [seq_type] );
+ Purpose   : Gets the end coordinate for the query, sbjct, or both sequences
+           : in the PsiBlastHit object. If there is more than one HSP, the largest end
+           : value of all HSPs is returned.
+ Example   : $qend = $sbjct->end('query');
+           : $send = $sbjct->end('hit');
+           : ($qend, $send) = $sbjct->end();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers (queryEnd, sbjctEnd)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'sbjct'
+           :  (case insensitive). If not supplied, 'query' is used.
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If there is more than one
+           : HSP and they have not already been tiled, they will be tiled first automatically..
+           : Remember that the start and end coordinates of all HSPs are
+           : normalized so that start < end. Strand information can be
+           : obtained by calling $hit->strand().
+
+See Also   : L<start()|start>, L<range()|range>, L<strand()|strand>, L<HSP Tiling and Ambiguous Alignments>, L<Bio::Search::HSP::BlastHSP::end|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+#----------
+sub end {
+#----------
+    my ($self, $seqType) = @_;
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->end($seqType);
+    } else {
+        Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+        if($seqType =~ /list|array/i) {
+            return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
+        } else {
+            ## Sensitive to member name changes.
+            $seqType = "_\L$seqType\E";
+            return $self->{$seqType.'Stop'};
+        }
+    }
+}
+
+=head2 range
+
+ Usage     : $sbjct->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($qbeg, $qend) = $sbjct->range('query');
+           : ($sbeg, $send) = $sbjct->range('hit');
+ Returns   : Two-element array of integers
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<end()|end>
+
+=cut
+
+#----------
+sub range {
+#----------
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+    return ($self->start($seqType), $self->end($seqType));
+}
+
+
+=head2 frac_identical
+
+ Usage     : $hit_object->frac_identical( [seq_type] );
+ Purpose   : Get the overall fraction of identical positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_iden = $hit_object->frac_identical('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Identical = 34/120 Positives = 67/120".
+           : NCBI BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           :
+           : Therefore, when called with an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used. Total does NOT take into account HSP
+           : tiling, so it should not be used.
+           :
+           : To get the fraction identical among only the aligned residues,
+           : ignoring the gaps, call this method without an argument or
+           : with an argument of 'query' or 'hit'.
+           :
+           : If you need data for each HSP, use hsps() and then iterate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>
+
+=cut
+
+#------------------
+sub frac_identical {
+#------------------
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = lc($seqType);
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    sprintf( "%.2f", $self->{'_totalIdentical'}/$self->{'_length_aln_'.$seqType});
+}
+
+
+
+=head2 frac_conserved
+
+ Usage     : $hit_object->frac_conserved( [seq_type] );
+ Purpose   : Get the overall fraction of conserved positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_cons = $hit_object->frac_conserved('hit');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : Different versions of Blast report different values for the total
+           : length of the alignment. This is the number reported in the
+           : denominators in the stats section:
+           : "Positives = 34/120 Positives = 67/120".
+           : NCBI BLAST uses the total length of the alignment (with gaps)
+           : WU-BLAST uses the length of the query sequence (without gaps).
+           :
+           : Therefore, when called with an argument of 'total',
+           : this method will report different values depending on the
+           : version of BLAST used. Total does NOT take into account HSP
+           : tiling, so it should not be used.
+           :
+           : To get the fraction conserved among only the aligned residues,
+           : ignoring the gaps, call this method without an argument or
+           : with an argument of 'query' or 'hit'.
+           :
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>
+
+=cut
+
+#--------------------
+sub frac_conserved {
+#--------------------
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    ## Sensitive to member name format.
+    $seqType = lc($seqType);
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    sprintf( "%.2f", $self->{'_totalConserved'}/$self->{'_length_aln_'.$seqType});
+}
+
+
+
+
+=head2 frac_aligned_query
+
+ Usage     : $hit_object->frac_aligned_query();
+ Purpose   : Get the fraction of the query sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_query();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : To compute the fraction aligned, the logical length of the query
+           : sequence is used, meaning that for [T]BLASTX reports, the
+           : full length of the query sequence is converted into amino acids
+           : by dividing by 3. This is necessary because of the way
+           : the lengths of aligned sequences are computed.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>,  L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>
+
+=cut
+
+#----------------------
+sub frac_aligned_query {
+#----------------------
+    my $self = shift;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    sprintf( "%.2f", $self->{'_length_aln_query'}/$self->logical_length('query'));
+}
+
+
+
+=head2 frac_aligned_hit
+
+ Usage     : $hit_object->frac_aligned_hit();
+ Purpose   : Get the fraction of the hit (sbjct) sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_hit();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : To compute the fraction aligned, the logical length of the sbjct
+           : sequence is used, meaning that for TBLAST[NX] reports, the
+           : full length of the sbjct sequence is converted into amino acids
+           : by dividing by 3. This is necessary because of the way
+           : the lengths of aligned sequences are computed.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically.
+
+See Also   : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>,  L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>
+
+=cut
+
+#--------------------
+sub frac_aligned_hit {
+#--------------------
+    my $self = shift;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    sprintf( "%.2f", $self->{'_length_aln_sbjct'}/$self->logical_length('sbjct'));
+}
+
+
+## These methods are being maintained for backward compatibility.
+
+=head2 frac_aligned_sbjct
+
+Same as L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+#----------------
+sub frac_aligned_sbjct {  my $self=shift; $self->frac_aligned_hit(@_); }
+#----------------
+
+=head2 num_unaligned_sbjct
+
+Same as L<num_unaligned_hit()|num_unaligned_hit>
+
+=cut
+
+#----------------
+sub num_unaligned_sbjct {  my $self=shift; $self->num_unaligned_hit(@_); }
+#----------------
+
+
+
+=head2 num_unaligned_hit
+
+ Usage     : $hit_object->num_unaligned_hit();
+ Purpose   : Get the number of the unaligned residues in the hit sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_hit();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_hit().
+           : They apply here as well.
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+
+See Also   : L<num_unaligned_query()|num_unaligned_query>,  L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>, L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+#---------------------
+sub num_unaligned_hit {
+#---------------------
+    my $self = shift;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    my $num = $self->logical_length('sbjct') - $self->{'_length_aln_sbjct'};
+    ($num < 0 ? 0 : $num );
+}
+
+
+=head2 num_unaligned_query
+
+ Usage     : $hit_object->num_unaligned_query();
+ Purpose   : Get the number of the unaligned residues in the query sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_query();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : See notes regarding logical lengths in the comments for frac_aligned_query().
+           : They apply here as well.
+           : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+           : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+
+See Also   : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>,  L<Bio::Search::BlastUtils::tile_hsps()|Bio::Search::BlastUtils>
+
+=cut
+
+#-----------------------
+sub num_unaligned_query {
+#-----------------------
+    my $self = shift;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    my $num = $self->logical_length('query') - $self->{'_length_aln_query'};
+    ($num < 0 ? 0 : $num );
+}
+
+
+
+=head2 seq_inds
+
+ Usage     : $hit->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) across all HSPs
+           : for identical or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hit->seq_inds('query', 'identical');
+           : @h_ind = $hit->seq_inds('hit', 'conserved');
+           : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
+ Returns   : Array of integers
+           : May include ranges if collapse is non-zero.
+ Argument  : [0] seq_type  = 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :                 ('sbjct' is synonymous with 'hit')
+           : [1] class     = 'identical' or 'conserved' (default = 'identical')
+           :              (can be shortened to 'id' or 'cons')
+           :              (actually, anything not 'id' will evaluate to 'conserved').
+           : [2] collapse  = boolean, if non-zero, consecutive positions are merged
+           :             using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
+           :             collapses to "1-5 7 9-11". This is useful for
+           :             consolidating long lists. Default = no collapse.
+ Throws    : n/a.
+ Comments  : Note that HSPs are not tiled for this. This could be a problem
+           : for hits containing mutually exclusive HSPs.
+           : TODO: Consider tiling and then reporting seq_inds for the
+           : best HSP contig.
+
+See Also   : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+#-------------
+sub seq_inds {
+#-------------
+    my ($self, $seqType, $class, $collapse) = @_;
+
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my (@inds, $hsp);
+    foreach $hsp ($self->hsps) {
+        # This will merge data for all HSPs together.
+        push @inds, $hsp->seq_inds($seqType, $class);
+    }
+
+    # Need to remove duplicates and sort the merged positions.
+    if(@inds) {
+        my %tmp = map { $_, 1 } @inds;
+        @inds = sort {$a <=> $b} keys %tmp;
+    }
+
+    $collapse ?  &Bio::Search::BlastUtils::collapse_nums(@inds) : @inds;
+}
+
+
+=head2 iteration
+
+ Usage     : $sbjct->iteration( );
+ Purpose   : Gets the iteration number in which the Hit was found.
+ Example   : $iteration_num = $sbjct->iteration();
+ Returns   : Integer greater than or equal to 1
+             Non-PSI-BLAST reports will report iteration as 1, but this number
+             is only meaningful for PSI-BLAST reports.
+ Argument  : none
+ Throws    : none
+
+See Also   : L<found_again()|found_again>
+
+=cut
+
+#----------------
+sub iteration { shift->{'_iteration'} }
+#----------------
+
+
+=head2 found_again
+
+ Usage     : $sbjct->found_again;
+ Purpose   : Gets a boolean indicator whether or not the hit has
+             been found in a previous iteration.
+             This is only applicable to PSI-BLAST reports.
+
+              This method indicates if the hit was reported in the
+              "Sequences used in model and found again" section of the
+              PSI-BLAST report or if it was reported in the
+              "Sequences not found previously or not previously below threshold"
+              section of the PSI-BLAST report. Only for hits in iteration > 1.
+
+ Example   : if( $sbjct->found_again()) { ... };
+ Returns   : Boolean (1 or 0) for PSI-BLAST report iterations greater than 1.
+             Returns undef for PSI-BLAST report iteration 1 and non PSI_BLAST
+             reports.
+ Argument  : none
+ Throws    : none
+
+See Also   : L<found_again()|found_again>
+
+=cut
+
+#----------------
+sub found_again { shift->{'_found_again'} }
+#----------------
+
+
+=head2 strand
+
+ Usage     : $sbjct->strand( [seq_type] );
+ Purpose   : Gets the strand(s) for the query, sbjct, or both sequences
+           : in the best HSP of the PsiBlastHit object after HSP tiling.
+           : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
+ Example   : $qstrand = $sbjct->strand('query');
+           : $sstrand = $sbjct->strand('hit');
+           : ($qstrand, $sstrand) = $sbjct->strand();
+ Returns   : scalar context: integer '1', '-1', or '0'
+           : array context without args: list of two strings (queryStrand, sbjctStrand)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling strand() on each (use hsps() to get all HSPs).
+           :
+           : Formerly (prior to 10/21/02), this method would return the
+           : string "-1/1" for hits with HSPs on both strands.
+           : However, now that strand and frame is properly being accounted
+           : for during HSP tiling, it makes more sense for strand()
+           : to return the strand data for the best HSP after tiling.
+           :
+           : If you really want to know about hits on opposite strands,
+           : you should be iterating through the HSPs using methods on the
+           : HSP objects.
+           :
+           : A possible use case where knowing whether a hit has HSPs
+           : on both strands would be when filtering via SearchIO for hits with
+           : this property. However, in this case it would be better to have a
+           : dedicated method such as $hit->hsps_on_both_strands(). Similarly
+           : for frame. This could be provided if there is interest.
+
+See Also   : L<Bio::Search::HSP::BlastHSP::strand>()
+
+=cut
+
+#----------'
+sub strand {
+#----------
+    my ($self, $seqType) = @_;
+
+    Bio::Search::BlastUtils::tile_hsps($self) if not $self->{'_tile_hsps'};
+
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'sbjct' if $seqType eq 'hit';
+
+    my ($qstr, $hstr);
+    # If there is only one HSP, defer this call to the solitary HSP.
+    if($self->num_hsps == 1) {
+        return $self->hsp->strand($seqType);
+    }
+    elsif( defined $self->{'_qstrand'}) {
+        # Get the data computed during hsp tiling.
+        $qstr = $self->{'_qstrand'};
+        $hstr = $self->{'_sstrand'};
+    }
+    else {
+        # otherwise, iterate through all HSPs collecting strand info.
+        # This will return the string "-1/1" if there are HSPs on different strands.
+        # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
+        # (unless the above elsif{} is commented out).
+        my (%qstr, %hstr);
+        foreach my $hsp( $self->hsps ) {
+            my ( $q, $h ) = $hsp->strand();
+            $qstr{ $q }++;
+            $hstr{ $h }++;
+        }
+        $qstr = join( '/', sort keys %qstr);
+        $hstr = join( '/', sort keys %hstr);
+    }
+
+    if($seqType =~ /list|array/i) {
+        return ($qstr, $hstr);
+    } elsif( $seqType eq 'query' ) {
+        return $qstr;
+    } else {
+        return $hstr;
+    }
+}
+
+
+1;
+__END__
+
+#####################################################################################
+#                                END OF CLASS                                       #
+#####################################################################################
+
+
+=head1 FOR DEVELOPERS ONLY
+
+=head2 Data Members
+
+Information about the various data members of this module is provided for those
+wishing to modify or understand the code. Two things to bear in mind:
+
+=over 4
+
+=item 1 Do NOT rely on these in any code outside of this module.
+
+All data members are prefixed with an underscore to signify that they are private.
+Always use accessor methods. If the accessor doesn't exist or is inadequate,
+create or modify an accessor (and let me know, too!). (An exception to this might
+be for BlastHSP.pm which is more tightly coupled to PsiBlastHit.pm and
+may access PsiBlastHit data members directly for efficiency purposes, but probably
+should not).
+
+=item 2 This documentation may be incomplete and out of date.
+
+It is easy for these data member descriptions to become obsolete as
+this module is still evolving. Always double check this info and search
+for members not described here.
+
+=back
+
+An instance of Bio::Search::Hit::PsiBlastHit.pm is a blessed reference to a hash containing
+all or some of the following fields:
+
+ FIELD           VALUE
+ --------------------------------------------------------------
+ _hsps          : Array ref for a list of Bio::Search::HSP::BlastHSP.pm objects.
+                :
+ _db            : Database identifier from the summary line.
+                :
+ _desc          : Description data for the hit from the summary line.
+                :
+ _length        : Total length of the hit sequence.
+                :
+ _score         : BLAST score.
+                :
+ _bits          : BLAST score (in bits). Matrix-independent.
+                :
+ _p             : BLAST P value. Obtained from summary section. (Blast1/WU-Blast only)
+                :
+ _expect        : BLAST Expect value. Obtained from summary section.
+                :
+ _n             : BLAST N value (number of HSPs) (Blast1/WU-Blast2 only)
+                :
+ _frame         : Reading frame for TBLASTN and TBLASTX analyses.
+                :
+ _totalIdentical: Total number of identical aligned monomers.
+                :
+ _totalConserved: Total number of conserved aligned monomers (a.k.a. "positives").
+                :
+ _overlap       : Maximum number of overlapping residues between adjacent HSPs
+                : before considering the alignment to be ambiguous.
+                :
+ _ambiguous_aln : Boolean. True if the alignment of all HSPs is ambiguous.
+                :
+ _length_aln_query : Length of the aligned region of the query sequence.
+                   :
+ _length_aln_sbjct : Length of the aligned region of the sbjct sequence.
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PullHitI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PullHitI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PullHitI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1134 @@
+# $Id: PullHitI.pm,v 1.1.2.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Hit::PullHitI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Hit::PullHitI - Bio::Search::Hit::HitI interface for pull parsers.
+
+=head1 SYNOPSIS
+
+	# This is an interface and cannot be instantiated
+
+    # typically one gets HitI objects from a SearchIO stream via a ResultI
+    use Bio::SearchIO;
+    my $parser = new Bio::SearchIO(-format => 'hmmer_pull',
+                                   -file => 't/data/hmmpfam.out');
+
+    my $result = $parser->next_result;
+    my $hit    = $result->next_hit;
+
+    $hit_name = $hit->name();
+
+    $desc = $hit->description();
+
+    $len = $hit->length
+
+    $alg = $hit->algorithm();
+
+    $score = $hit->raw_score();
+
+    $significance = $hit->significance();
+
+    $rank = $hit->rank(); # the Nth hit for a specific query
+
+    while( $hsp = $obj->next_hsp()) { ... } # process in iterator fashion
+
+    for my $hsp ( $obj->hsps()()) { ... } # process in list fashion
+
+=head1 DESCRIPTION
+
+This object handles the hit data from a database sequence search.
+
+PullHitI is for fast implementations that only do parsing work on the hit
+data when you actually request information by calling one of the HitI
+methods.
+
+Many methods of HitI are implemented in a way suitable for inheriting classes
+that use Bio::PullParserI. It only really makes sense for PullHit modules to be
+created by (and have as a -parent) PullResult modules.
+
+In addition to the usual -chunk and -parent, -hit_data is all you should supply
+when making a PullHit object. This will store that data and make it accessible
+via _raw_hit_data, which you can access in your subclass. It would be best to
+simply provide the data as the input -chunk instead, if the raw data is large
+enough.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 COPYRIGHT
+
+Copyright (c) 2006 Sendu Bala. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Hit::PullHitI;
+
+use Bio::Search::SearchUtils;
+
+use strict;
+
+use base qw(Bio::PullParserI Bio::Search::Hit::HitI);
+
+=head2 _setup
+
+ Title   : _setup
+ Usage   : $self->_setup(@args)
+ Function: Implementers should call this to setup common fields and deal with
+           common arguments to new().
+ Returns : n/a
+ Args    : @args received in new().
+
+=cut
+
+sub _setup {
+    my ($self, @args) = @_;
+    
+    # fields most subclasses probably will want
+    $self->_fields( { ( next_hsp => undef,
+                        num_hsps => undef,
+                        hsps => undef,
+                        hit_start => undef,
+                        query_start => undef,
+                        hit_end => undef,
+                        query_end => undef,
+                        length => undef,
+						name => undef ,
+						accession => undef ) } );
+    
+    my ($parent, $chunk, $hit_data) = $self->_rearrange([qw(PARENT
+                                                            CHUNK
+                                                            HIT_DATA)], @args);
+    $self->throw("Need -parent or -chunk to be defined") unless $parent || $chunk;
+    
+	$self->parent($parent) if $parent;
+    
+    if ($chunk) {
+        my ($io, $start, $end) = (undef, 0, undef);
+        if (ref($chunk) eq 'ARRAY') {
+            ($io, $start, $end) = @{$chunk};
+        }
+        else {
+            $io = $chunk;
+        }
+        $self->chunk($io, -start => $start, -end => $end);
+    }
+    
+    $self->_raw_hit_data($hit_data) if $hit_data;
+}
+
+sub _raw_hit_data {
+	my $self = shift;
+	if (@_) {
+		$self->{_raw_hit_data} = shift;
+	}
+	return $self->{_raw_hit_data};
+}
+
+#
+# Some of these methods are written explitely to avoid HitI throwing not
+# implemented; if it didn't do that then PullParserI AUTOLOAD would have
+# cought them.
+#
+
+=head2 name
+
+ Title   : name
+ Usage   : $hit_name = $hit->name();
+ Function: returns the name of the Hit sequence
+ Returns : a scalar string
+ Args    : none
+
+The B<name> of a hit is unique within a Result or within an Iteration.
+
+=cut
+
+sub name {
+    return shift->get_field('name');
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $desc = $hit->description();
+ Function: Retrieve the description for the hit
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+sub description {
+    return shift->get_field('description');
+}
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $acc = $hit->accession();
+ Function: Retrieve the accession (if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub accession {
+    return shift->get_field('accession');
+}
+
+=head2 locus
+
+ Title   : locus
+ Usage   : $acc = $hit->locus();
+ Function: Retrieve the locus(if available) for the hit
+ Returns : a scalar string (empty string if not set)
+ Args    : none
+
+=cut
+
+sub locus {
+    return shift->get_field('locus');
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $len = $hit->length
+ Function: Returns the length of the hit 
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub length {
+   return shift->get_field('length');
+}
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : $alg = $hit->algorithm();
+ Function: Gets the algorithm specification that was used to obtain the hit
+           For BLAST, the algorithm denotes what type of sequence was aligned 
+           against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated 
+           dna-prt, TBLASTN prt-translated dna, TBLASTX translated 
+           dna-translated dna).
+ Returns : a scalar string 
+ Args    : none
+
+=cut
+
+sub algorithm {
+    return shift->get_field('algorithm');
+}
+
+=head2 raw_score
+
+ Title   : raw_score
+ Usage   : $score = $hit->raw_score();
+ Function: Gets the "raw score" generated by the algorithm.  What
+           this score is exactly will vary from algorithm to algorithm,
+           returning undef if unavailable.
+ Returns : a scalar value
+ Args    : none
+
+=cut
+
+sub raw_score {
+    return shift->get_field('score');
+}
+
+=head2 score
+
+Equivalent to L<raw_score()|raw_score>
+
+=cut
+
+sub score {
+    return shift->get_field('score');
+}
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $significance = $hit->significance();
+ Function: Used to obtain the E or P value of a hit, i.e. the probability that
+           this particular hit was obtained purely by random chance.  If
+           information is not available (nor calculatable from other
+           information sources), return undef.
+ Returns : a scalar value or undef if unavailable
+ Args    : none
+
+=cut
+
+sub significance {
+    return shift->get_field('significance');
+}
+
+=head2 bits
+
+ Usage     : $hit_object->bits();
+ Purpose   : Gets the bit score of the best HSP for the current hit.
+ Example   : $bits = $hit_object->bits();
+ Returns   : Integer or double for FASTA reports
+ Argument  : n/a
+ Comments  : For BLAST1, the non-bit score is listed in the summary line.
+
+See Also   : L<score()|score>
+
+=cut
+
+sub bits {
+    return shift->get_field('bits');
+}
+
+=head2 next_hsp
+
+ Title    : next_hsp
+ Usage    : while( $hsp = $obj->next_hsp()) { ... }
+ Function : Returns the next available High Scoring Pair
+ Example  : 
+ Returns  : L<Bio::Search::HSP::HSPI> object or null if finished
+ Args     : none
+
+=cut
+
+sub next_hsp {
+    return shift->get_field('next_hsp');
+}
+
+=head2 hsps
+
+ Usage     : $hit_object->hsps();
+ Purpose   : Get a list containing all HSP objects.
+           : Get the numbers of HSPs for the current hit.
+ Example   : @hsps = $hit_object->hsps();
+           : $num  = $hit_object->hsps();  # alternatively, use num_hsps()
+ Returns   : Array context : list of L<Bio::Search::HSP::BlastHSP> objects.
+           : Scalar context: integer (number of HSPs).
+           :                 (Equivalent to num_hsps()).
+ Argument  : n/a. Relies on wantarray
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsp()|hsp>, L<num_hsps()|num_hsps>
+
+=cut
+
+sub hsps {
+    return shift->get_field('hsps');
+}
+
+=head2 num_hsps
+
+ Usage     : $hit_object->num_hsps();
+ Purpose   : Get the number of HSPs for the present Blast hit.
+ Example   : $nhsps = $hit_object->num_hsps();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if the HSPs have not been collected.
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+sub num_hsps {
+    return shift->get_field('num_hsps');
+}
+
+#
+# HitI/ GenericHit methods that are unrelated to simply parsing information
+# directly out of a file, but need more complex calculation; mostly not
+# implemented here.
+#
+
+=head2 seq_inds
+
+ Usage     : $hit->seq_inds( seq_type, class, collapse );
+ Purpose   : Get a list of residue positions (indices) across all HSPs
+           : for identical or conserved residues in the query or sbjct sequence.
+ Example   : @s_ind = $hit->seq_inds('query', 'identical');
+           : @h_ind = $hit->seq_inds('hit', 'conserved');
+           : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
+ Returns   : Array of integers 
+           : May include ranges if collapse is non-zero.
+ Argument  : [0] seq_type  = 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :                 ('sbjct' is synonymous with 'hit')
+           : [1] class = 'identical' or 'conserved' or 'nomatch' or 'gap'
+		   :         (default = 'identical')
+           :         (can be shortened to 'id' or 'cons')
+		   :         Note that 'conserved' includes identical unless you use
+		   :         'conserved-not-identical'
+           : [2] collapse = boolean, if non-zero, consecutive positions are
+           :             merged using a range notation, e.g.,
+           :             "1 2 3 4 5 7 9 10 11" collapses to "1-5 7 9-11". This
+           :             is useful for  consolidating long lists. Default = no
+           :             collapse.
+ Throws    : n/a.
+
+See Also   : L<Bio::Search::HSP::HSPI::seq_inds()|Bio::Search::HSP::HSPI>
+
+=cut
+
+sub seq_inds {
+    my ($self, $seqType, $class, $collapse) = @_;
+    
+    $seqType  ||= 'query';
+    $class ||= 'identical';
+    $collapse ||= 0;
+    
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+	my $storage_name = '_seq_inds_'.$seqType.'_'.$class;
+	unless (defined $self->{$storage_name}) {
+		my @inds;    
+		foreach my $hsp ($self->hsps) {
+			# This will merge data for all HSPs together.
+			push @inds, $hsp->seq_inds($seqType, $class);
+		}
+		
+		# Need to remove duplicates and sort the merged positions, unless gaps.
+		if (@inds && $class ne 'gap') {
+			my %tmp = map { $_, 1 } @inds;
+			@inds = sort {$a <=> $b} keys %tmp;
+		}
+		
+		$self->{$storage_name} = \@inds;
+	}
+	
+	my @inds = @{$self->{$storage_name}};
+    
+    $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $hit->rewind;
+ Function: Allow one to reset the HSP iterator to the beginning if possible
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind {
+    shift->throw_not_implemented();
+}
+
+=head2 overlap
+
+ Usage     : $hit_object->overlap( [integer] );
+ Purpose   : Gets/Sets the allowable amount overlap between different HSP
+             sequences.
+ Example   : $hit_object->overlap(5);
+           : $overlap = $hit_object->overlap;
+ Returns   : Integer.
+ Argument  : integer.
+ Throws    : n/a
+ Status    : Deprecated
+ Comments  : This value isn't used for anything
+
+=cut
+
+sub overlap {
+    my $self = shift;
+    if (@_) { $self->{_overlap} = shift }
+    return $self->{_overlap} || 0;
+}
+
+=head2 n
+
+ Usage     : $hit_object->n();
+ Purpose   : Gets the N number for the current Blast hit.
+           : This is the number of HSPs in the set which was ascribed
+           : the lowest P-value (listed on the description line).
+           : This number is not the same as the total number of HSPs.
+           : To get the total number of HSPs, use num_hsps().
+ Example   : $n = $hit_object->n();
+ Returns   : Integer
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set (BLAST2 reports).
+ Comments  : Note that the N parameter is not reported in gapped BLAST2.
+           : Calling n() on such reports will result in a call to num_hsps().
+           : The num_hsps() method will count the actual number of
+           : HSPs in the alignment listing, which may exceed N in
+           : some cases.
+
+See Also   : L<num_hsps()|num_hsps>
+
+=cut
+
+sub n {
+    return shift->get_field('num_hsps');
+}
+
+=head2 p
+
+ Usage     : $hit_object->p( [format] );
+ Purpose   : Get the P-value for the best HSP of the given BLAST hit.
+           : (Note that P-values are not provided with NCBI Blast2 reports).
+ Example   : $p =  $sbjct->p;
+           : $p =  $sbjct->p('exp');  # get exponent only.
+           : ($num, $exp) =  $sbjct->p('parts'); # split sci notation into parts
+ Returns   : Float or scientific notation number (the raw P-value, DEFAULT).
+           : Integer if format == 'exp' (the magnitude of the base 10 exponent).
+           : 2-element list (float, int) if format == 'parts' and P-value
+           :                is in scientific notation (See Comments).
+ Argument  : format: string of 'raw' | 'exp' | 'parts'
+           :    'raw' returns value given in report. Default. (1.2e-34)
+           :    'exp' returns exponent value only (34)
+           :    'parts' returns the decimal and exponent as a 
+           :            2-element list (1.2, -34) (See Comments).
+ Throws    : Warns if no P-value is defined. Uses expect instead.
+ Comments  : Using the 'parts' argument is not recommended since it will not
+           : work as expected if the P-value is not in scientific notation.
+           : That is, floats are not converted into sci notation before
+           : splitting into parts.
+
+See Also   : L<expect()|expect>, L<signif()|signif>,
+             L<Bio::Search::BlastUtils::get_exponent()|Bio::Search::BlastUtils>
+
+=cut
+
+sub p {
+    shift->throw_not_implemented;
+}
+
+=head2 hsp
+
+ Usage     : $hit_object->hsp( [string] );
+ Purpose   : Get a single HSPI object for the present HitI object.
+ Example   : $hspObj  = $hit_object->hsp;  # same as 'best'
+           : $hspObj  = $hit_object->hsp('best');
+           : $hspObj  = $hit_object->hsp('worst');
+ Returns   : Object reference for a L<Bio::Search::HSP::HSPI> object.
+ Argument  : String (or no argument).
+           :   No argument (default) = highest scoring HSP (same as 'best').
+           :   'best'  = highest scoring HSP.
+           :   'worst' = lowest scoring HSP.
+ Throws    : Exception if an unrecognized argument is used.
+
+See Also   : L<hsps()|hsps>, L<num_hsps>()
+
+=cut
+
+sub hsp {
+    shift->throw_not_implemented;
+}
+
+=head2 logical_length
+
+ Usage     : $hit_object->logical_length( [seq_type] );
+           : (mostly intended for internal use).
+ Purpose   : Get the logical length of the hit sequence.
+           : If the Blast is a TBLASTN or TBLASTX, the returned length 
+           : is the length of the would-be amino acid sequence (length/3).
+           : For all other BLAST flavors, this function is the same as length().
+ Example   : $len    = $hit_object->logical_length();
+ Returns   : Integer 
+ Argument  : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This is important for functions like frac_aligned_query()
+           : which need to operate in amino acid coordinate space when dealing
+           : with [T]BLAST[NX] type reports.
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>,
+             L<frac_aligned_hit()|frac_aligned_hit>
+
+=cut
+
+sub logical_length {
+    my ($self, $type) = @_;
+    $type ||= 'query';
+    $type = lc($type);
+	$type = 'hit' if $type eq 'sbjct';
+    if ($type eq 'query') {
+        return $self->get_field('query_length');
+    }
+    elsif ($type eq 'hit') {
+        return $self->get_field('length');
+    }
+}
+
+=head2 rank
+
+ Title   : rank
+ Usage   : $obj->rank($newval)
+ Function: Get/Set the rank of this Hit in the Query search list
+           i.e. this is the Nth hit for a specific query
+ Returns : value of rank
+ Args    : newvalue (optional)
+
+=cut
+
+sub rank {
+    return shift->get_field('rank');
+}
+
+=head2 each_accession_number
+
+ Title   : each_accession_number
+ Usage   : $obj->each_accession_number
+ Function: Get each accession number listed in the description of the hit.
+           If there are no alternatives, then only the primary accession will 
+           be given (if there is one).
+ Returns : list of all accession numbers in the description
+ Args    : none
+
+=cut
+
+sub each_accession_number {
+    my $self = shift;
+    my $accession = $self->get_field('accession') if $self->has_field('accession');
+    my $desc = $self->get_field('description') if $self->has_field('description');
+    return unless $accession || $desc;
+    
+    my @accnums;
+    push (@accnums, $accession) if $accession;
+    
+    if (defined $desc) { 
+        while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
+            my $id = $1;
+            my $acc;
+            if ($id =~ /(?:gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(?:.*)/) {
+                ($acc) = split /\./, $1; 
+            }
+            elsif ($id =~ /(?:pir|prf|pat|gnl)\|(?:.*)\|(.*)/) {
+                ($acc) = split /\./, $1;  
+            }
+            elsif ($id =~ /(?:gim|gi|bbm|bbs|lcl)\|(?:\d*)/) {
+                $acc = $id;
+            }
+            elsif ($id =~ /(?:oth)\|(.*)\|(?:.*)\|(?:.*)/ ) { # discontinued...
+                $acc = $1;
+            }
+            else {
+                $acc = $id;
+            }
+            push(@accnums, $acc);
+        }
+    }
+    return @accnums;
+}
+
+=head2 tiled_hsps
+
+ Usage     : $hit_object->tiled_hsps( [integer] );
+ Purpose   : Gets/Sets an indicator for whether or not the HSPs in this Hit 
+           : have been tiled.
+ Example   : $hit_object->tiled_hsps(1);
+           : if( $hit_object->tiled_hsps ) { # do something }
+ Returns   : Boolean (1 or 0) 
+ Argument  : integer (optional)
+ Throws    : n/a
+ Status    : Deprecated
+ Notes     : This value is not used for anything
+
+=cut
+
+sub tiled_hsps {
+    my $self = shift;
+    if (@_) { $self->{_hsps_are_tiled} = shift }
+    return $self->{_hsps_are_tiled} || 0;
+}
+
+=head2 strand
+
+ Usage     : $sbjct->strand( [seq_type] );
+ Purpose   : Gets the strand(s) for the query, sbjct, or both sequences
+           : in the best HSP of the BlastHit object after HSP tiling.
+           : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
+ Example   : $qstrand = $sbjct->strand('query');
+           : $sstrand = $sbjct->strand('hit');
+           : ($qstrand, $sstrand) = $sbjct->strand();
+ Returns   : scalar context: integer '1', '-1', or '0'
+           : array context without args: list of two strings (queryStrand, sbjctStrand)
+           : Array context can be "induced" by providing an argument of 'list' or 'array'.
+ Argument  : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling strand() on each (use hsps() to get all HSPs).
+           :
+           : Formerly (prior to 10/21/02), this method would return the
+           : string "-1/1" for hits with HSPs on both strands.
+           : However, now that strand and frame is properly being accounted
+           : for during HSP tiling, it makes more sense for strand()
+           : to return the strand data for the best HSP after tiling.
+           :
+           : If you really want to know about hits on opposite strands,
+           : you should be iterating through the HSPs using methods on the
+           : HSP objects.
+           :
+           : A possible use case where knowing whether a hit has HSPs 
+           : on both strands would be when filtering via SearchIO for hits with 
+           : this property. However, in this case it would be better to have a
+           : dedicated method such as $hit->hsps_on_both_strands(). Similarly
+           : for frame. This could be provided if there is interest.
+
+See Also   : L<Bio::Search::HSP::HSPI::strand>()
+
+=cut
+
+sub strand {
+    shift->throw_not_implemented;
+}
+
+=head2 frame
+
+ Usage     : $hit_object->frame();
+ Purpose   : Gets the reading frame for the best HSP after HSP tiling.
+           : This is only valid for BLASTX and TBLASTN/X type reports.
+ Example   : $frame = $hit_object->frame();
+ Returns   : Integer (-2 .. +2)
+ Argument  : n/a
+ Throws    : Exception if HSPs have not been set.
+ Comments  : This method requires that all HSPs be tiled. If they have not
+           : already been tiled, they will be tiled first automatically..
+           : If you don't want the tiled data, iterate through each HSP
+           : calling frame() on each (use hsps() to get all HSPs).
+
+See Also   : L<hsps()|hsps>
+
+=cut
+
+sub frame {
+    shift->throw_not_implemented;
+}
+
+=head2 length_aln
+
+ Usage     : $hit_object->length_aln( [seq_type] );
+ Purpose   : Get the total length of the aligned region for query or sbjct seq.
+           : This number will include all HSPs, and excludes gaps.
+ Example   : $len    = $hit_object->length_aln(); # default = query
+           : $lenAln = $hit_object->length_aln('query');
+ Returns   : Integer 
+ Argument  : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : Exception if the argument is not recognized.
+ Comments  : This method will report the logical length of the alignment,
+           : meaning that for TBLAST[NX] reports, the length is reported
+           : using amino acid coordinate space (i.e., nucleotides / 3).
+
+See Also   : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>,
+             L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>,
+             L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>,
+             L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
+
+=cut
+
+sub length_aln {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+	my %non_gaps = map { $_, 1 } $self->seq_inds($seqType, 'conserved'),
+							     $self->seq_inds($seqType, 'no_match');
+	return scalar(keys %non_gaps);
+}
+
+=head2 gaps
+
+ Usage     : $hit_object->gaps( [seq_type] );
+ Purpose   : Get the number of gaps in the aligned query, hit, or both sequences.
+           : Data is summed across all HSPs.
+ Example   : $qgaps = $hit_object->gaps('query');
+           : $hgaps = $hit_object->gaps('hit');
+           : $tgaps = $hit_object->gaps();    # default = total (query + hit)
+ Returns   : scalar context: integer
+           : array context without args: two-element list of integers  
+           :    (queryGaps, hitGaps)
+           : Array context can be forced by providing an argument of 'list' or
+		   : 'array'.
+           :
+           : CAUTION: Calling this method within printf or sprintf is arrray
+		   : context.
+           : So this function may not give you what you expect. For example:
+           :          printf "Total gaps: %d", $hit->gaps();
+           : Actually returns a two-element array, so what gets printed 
+           : is the number of gaps in the query, not the total
+           :
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list'
+           : (default = 'total') ('sbjct' is synonymous with 'hit')
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through each HSP object.
+
+=cut
+
+sub gaps {
+    my ($self, $seqType) = @_;
+    
+    $seqType ||= (wantarray ? 'list' : 'total');
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    if ($seqType =~ /list|array/i) {
+        return (scalar($self->seq_inds('query', 'gap')), scalar($self->seq_inds('hit', 'gap')));
+    }
+    elsif ($seqType eq 'total') {
+        return (scalar($self->seq_inds('query', 'gap')) + scalar($self->seq_inds('hit', 'gap'))) || 0;
+    }
+    else {
+        return scalar($self->seq_inds($seqType, 'gap')) || 0;
+    }
+}
+
+=head2 matches
+
+ Usage     : $hit_object->matches( [class] );
+ Purpose   : Get the total number of identical or conserved matches 
+           : (or both) across all HSPs.
+           : (Note: 'conservative' matches are indicated as 'positives' 
+           :         in BLAST reports.)
+ Example   : ($id,$cons) = $hit_object->matches(); # no argument
+           : $id = $hit_object->matches('id');
+           : $cons = $hit_object->matches('cons'); 
+ Returns   : Integer or a 2-element array of integers 
+ Argument  : [0] class = 'id' | 'cons' OR none.
+           : [1] seq_type  = 'query' or 'hit' or 'sbjct'  (default = 'query')
+           :                 ('sbjct' is synonymous with 'hit')
+           : If no argument is provided, both identical and conservative 
+           : numbers are returned in a two element list.
+           : (Other terms can be used to refer to the conservative
+           :  matches, e.g., 'positive'. All that is checked is whether or
+           :  not the supplied string starts with 'id'. If not, the 
+           : conservative matches are returned.)
+
+=cut
+
+sub matches {
+    my ($self, $class, $seqType) = @_;
+    
+	# no query/hit choice? The answer differs depending on sequence, since
+	# hsps could overlap on one sequence and not the other. Added an option,
+	# but otherwise will assume 'hit'
+	$seqType ||= 'hit';
+	$seqType = 'hit' if $seqType eq 'sbjct';
+	
+	unless (exists $self->{_id_matches}) {
+		$self->{_id_matches}->{hit} = scalar($self->seq_inds('hit', 'identical'));
+		$self->{_id_matches}->{query} = scalar($self->seq_inds('query', 'identical'));
+	}
+	unless (exists $self->{_con_matches}) {
+		foreach my $type ('hit', 'query') {
+			# 'conserved-not-identical' can give us 'identical' matches if hsps
+			# overlapped so have to get the difference
+			my %identicals = map { $_ => 1 } $self->seq_inds($type, 'identical');
+			my @conserved = $self->seq_inds($type, 'conserved-not-identical');
+			
+			my $real_conserved;
+			foreach (@conserved) {
+				unless (exists $identicals{$_}) {
+					$real_conserved++;
+				}
+			}
+			$self->{_con_matches}->{$type} = $real_conserved;
+		}
+	}
+	
+	
+    unless ($class) {
+        return ($self->{_id_matches}->{$seqType}, $self->{_con_matches}->{$seqType});
+    }
+    else {
+		if ($class =~ /^id/i) { 
+            return $self->{_id_matches}->{$seqType};
+        }
+        else {
+            return $self->{_con_matches}->{$seqType};
+        }
+    }
+    return;
+}
+
+=head2 start
+
+ Usage     : $sbjct->start( [seq_type] );
+ Purpose   : Gets the start coordinate for the query, sbjct, or both sequences
+           : in the object. If there is more than one HSP, the lowest start
+           : value of all HSPs is returned.
+ Example   : $qbeg = $sbjct->start('query');
+           : $sbeg = $sbjct->start('hit');
+           : ($qbeg, $sbeg) = $sbjct->start();
+ Returns   : scalar context: integer 
+           : array context without args: list of two integers (queryStart,
+           : sbjctStart)
+           : Array context can be "induced" by providing an argument of 'list'
+           : or 'array'.
+ Argument  : 'query' or 'hit' or 'sbjct' (default = 'query') ('sbjct' is
+             synonymous with 'hit')
+
+=cut
+
+sub start {
+    my ($self, $seqType) = @_;
+    
+    unless ($self->get_field('num_hsps')) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return;
+    }
+    
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    if ($seqType =~ /list|array/i) {
+	    return ($self->get_field('query_start'), $self->get_field('hit_start'));
+	}
+    elsif ($seqType eq 'hit') {
+        return $self->get_field('hit_start');
+	}
+    elsif ($seqType eq 'query') {
+        return $self->get_field('query_start');
+    }
+    else {
+        $self->throw("Unknown sequence type '$seqType'");
+    }
+}
+
+=head2 end
+
+ Usage     : $sbjct->end( [seq_type] );
+ Purpose   : Gets the end coordinate for the query, sbjct, or both sequences
+           : in the object. If there is more than one HSP, the largest end
+           : value of all HSPs is returned.
+ Example   : $qend = $sbjct->end('query');
+           : $send = $sbjct->end('hit');
+           : ($qend, $send) = $sbjct->end();
+ Returns   : scalar context: integer
+           : array context without args: list of two integers 
+           : (queryEnd, sbjctEnd)
+           : Array context can be "induced" by providing an argument 
+           : of 'list' or 'array'.
+ Argument  : 'query' or 'hit' or 'sbjct' (default = 'query') ('sbjct' is
+             synonymous with 'hit')
+
+=cut
+
+sub end {
+    my ($self, $seqType) = @_;
+    
+    unless ($self->get_field('num_hsps')) {
+        Bio::Search::SearchUtils::_warn_about_no_hsps($self);
+        return;
+    }
+    
+    $seqType ||= (wantarray ? 'list' : 'query');
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    if ($seqType =~ /list|array/i) {
+	    return ($self->get_field('query_end'), $self->get_field('hit_end'));
+	}
+    elsif ($seqType eq 'hit') {
+        return $self->get_field('hit_end');
+	}
+    elsif ($seqType eq 'query') {
+        return $self->get_field('query_end');
+    }
+    else {
+        $self->throw("Unknown sequence type '$seqType'");
+    }
+}
+
+=head2 range
+
+ Usage     : $sbjct->range( [seq_type] );
+ Purpose   : Gets the (start, end) coordinates for the query or sbjct sequence
+           : in the HSP alignment.
+ Example   : ($qbeg, $qend) = $sbjct->range('query');
+           : ($sbeg, $send) = $sbjct->range('hit');
+ Returns   : Two-element array of integers 
+ Argument  : seq_type = string, 'query' or 'hit' or 'sbjct'  (default = 'query')
+             ('sbjct' is synonymous with 'hit')
+ Throws    : n/a
+
+See Also   : L<start()|start>, L<end()|end>
+
+=cut
+
+sub range {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    return ($self->start($seqType), $self->end($seqType));
+}
+
+=head2 frac_identical
+
+ Usage     : $hit_object->frac_identical( [seq_type] );
+ Purpose   : Get the overall fraction of identical positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_iden = $hit_object->frac_identical('query');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+
+=cut
+
+sub frac_identical {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = lc($seqType);
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    my $ident = $self->matches('id', $seqType);
+    my $total = $self->length_aln($seqType);
+    my $ratio = $ident / $total;
+    my $ratio_rounded = sprintf( "%.3f", $ratio);
+    
+    # Round down iff normal rounding yields 1 (just like blast)
+    $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
+    return $ratio_rounded;
+}
+
+=head2 frac_conserved
+
+ Usage     : $hit_object->frac_conserved( [seq_type] );
+ Purpose   : Get the overall fraction of conserved positions across all HSPs.
+           : The number refers to only the aligned regions and does not
+           : account for unaligned regions in between the HSPs, if any.
+ Example   : $frac_cons = $hit_object->frac_conserved('hit');
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
+           : default = 'query' (but see comments below).
+           : ('sbjct' is synonymous with 'hit')
+
+=cut
+
+sub frac_conserved {
+    my ($self, $seqType) = @_;
+    $seqType ||= 'query';
+    $seqType = lc($seqType);
+    $seqType = 'hit' if $seqType eq 'sbjct';
+    
+    my $consv = $self->matches('cons');
+    my $total = $self->length_aln($seqType);
+    my $ratio = $consv / $total;
+    my $ratio_rounded = sprintf( "%.3f", $ratio);
+    
+    # Round down iff normal rounding yields 1 (just like blast)
+    $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
+    return $ratio_rounded;
+}
+
+=head2 frac_aligned_query
+
+ Usage     : $hit_object->frac_aligned_query();
+ Purpose   : Get the fraction of the query sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_query();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : none
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+
+=cut
+
+sub frac_aligned_query {
+    my $self = shift;
+    return sprintf("%.2f", $self->length_aln('query') / $self->logical_length('query'));
+}
+
+=head2 frac_aligned_hit
+
+ Usage     : $hit_object->frac_aligned_hit();
+ Purpose   : Get the fraction of the hit (sbjct) sequence which has been aligned
+           : across all HSPs (not including intervals between non-overlapping
+           : HSPs).
+ Example   : $frac_alnq = $hit_object->frac_aligned_hit();
+ Returns   : Float (2-decimal precision, e.g., 0.75).
+ Argument  : none
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+
+=cut
+
+sub frac_aligned_hit {
+    my $self = shift;
+    return sprintf( "%.2f", $self->length_aln('sbjct') / $self->logical_length('sbjct'));
+}
+
+=head2 num_unaligned_hit
+
+ Usage     : $hit_object->num_unaligned_hit();
+ Purpose   : Get the number of the unaligned residues in the hit sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_hit();
+ Returns   : Integer
+ Argument  : none
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+
+=cut
+
+sub num_unaligned_hit {
+    my $self = shift;
+    # why does this method even exist?!
+    return $self->gaps('hit');
+}
+
+=head2 num_unaligned_query
+
+ Usage     : $hit_object->num_unaligned_query();
+ Purpose   : Get the number of the unaligned residues in the query sequence.
+           : Sums across all all HSPs.
+ Example   : $num_unaln = $hit_object->num_unaligned_query();
+ Returns   : Integer
+ Argument  : none
+ Comments  : If you need data for each HSP, use hsps() and then interate
+           : through the HSP objects.
+
+=cut
+
+sub num_unaligned_query {
+    my $self = shift;
+	# why does this method even exist?!
+    return $self->gaps('query');
+}
+
+# aliasing for Steve's method names
+*hit_description = \&description;
+*hit_length = \&length;
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Hit/PullHitI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/GenericIteration.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/GenericIteration.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/GenericIteration.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,674 @@
+# $Id: GenericIteration.pm,v 1.6.4.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Iteration::GenericIteration
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Copyright Steve Chervitz
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+# TODO: Consider calling this BlastIteration (strongly) and maybe simplifying IterationI.
+
+=head1 NAME
+
+Bio::Search::Iteration::GenericIteration - A generic implementation of the Bio::Search::Iteration::IterationI interface.
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Iteration::GenericIteration;
+    my $it = new Bio::Search::GenericIteration(
+                              -number => 1,
+                              -converged => 0,
+                              -newhits_unclassified => [@newhits_unclass],
+                              -newhits_below => [@newhits_below_threshold],
+                              -newhits_not_below => [@newhits_not_below_threshold],
+                              -oldhits_below => [@oldhits_below_threshold],
+                              -oldhits_newly_below => [@oldhits_newly_below_threshold],
+                              -oldhits_not_below => [@oldhits_not_below_threshold],
+                                        );
+
+# TODO: Describe how to configure a SearchIO stream so that it generates
+#       GenericIteration objects.
+
+
+=head1 DESCRIPTION
+
+This module acts as a container for Bio::Search::Hit::HitI objects,
+allowing a Search::Result::ResultI object to partition its hits based
+on which iteration the hit occurred in (e.g., a PSI-BLAST round).
+
+Unless you're writing a parser, you won't ever need to create a
+GenericIteration or any other IterationI-implementing object. If you use
+the SearchIO system, IterationI objects are created automatically from
+a SearchIO stream which returns Bio::Search::Result::ResultI objects
+and you get the IterationI objects via the ResultI API.
+
+For documentation on what you can do with GenericIteration (and other IterationI
+objects), please see the API documentation in
+L<Bio::Search::Iteration::IterationI|Bio::Search::Iteration::IterationI>.
+
+Bio::Search::Iteration::GenericIteration is similar in spirit to
+Bio::Tools::BPlite::Iteration, except that
+Bio::Search::Iteration::GenericIteration is a pure container, without
+any parsing functionality as is in Bio::Tools::BPlite::Iteration.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz
+
+Email sac at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Iteration::GenericIteration;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Search::Iteration::IterationI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Iteration(%args);
+ Function: Builds a new Bio::Search::Iteration object 
+ Returns : Bio::Search::Iteration::GenericIteration object
+ Args    : -number => integer for the number of this iteration (required)
+           -converged => boolean value whether or not the iteration converged
+           -newhits_unclassified => array reference to hits that were not found
+                       in a previous iteration for the iteration and have not been 
+                       classified with regard to the inclusion threshold
+
+           # The following are only used for PSI-BLAST reports:
+
+           -newhits_below => array reference to hits were not found in a 
+                        previous iteration and are below the inclusion threshold.
+           -newhits_not_below => array reference to hits that were not found in a 
+                        previous iteration below threshold that and are not below 
+                        the inclusion threshold threshold.
+           -oldhits_below => array reference to hits that were found
+                        in a previous iteration below inclusion threshold and are
+                        still below threshold in the current iteration.
+           -oldhits_newly_below => array reference to hits that were found
+                        in a previous iteration above threshold but are below
+                        threshold in the current iteration.
+           -oldhits_not_below => array reference to hits that were found in a
+                        previous iteration above threshold that and are still above
+                        the inclusion threshold threshold.
+
+           -hit_factory => Bio::Factory::ObjectFactoryI capable of making
+                        Bio::Search::Hit::HitI objects
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($number, $newhits_unclassified, $newhits_below, $newhits_not_below,
+      $oldhits_below, $oldhits_newly_below, $oldhits_not_below, $converged,
+      $h_f) =
+      $self->_rearrange([qw(NUMBER
+                            NEWHITS_UNCLASSIFIED
+                            NEWHITS_BELOW
+                            NEWHITS_NOT_BELOW
+                            OLDHITS_BELOW
+                            OLDHITS_NEWLY_BELOW
+                            OLDHITS_NOT_BELOW
+                            CONVERGED
+                            HIT_FACTORY
+                           )], @args);
+
+  if( ! defined $number ) { 
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Iteration number not specified.");
+  } else { 
+      $self->number($number);
+  }
+
+  defined $converged && $self->converged($converged);
+
+  # TODO: Performance optimization test calling add_hit() vs. simple assignment:
+  #       push @{$self->{'_hits_new'}}, @{$newhits};
+  #             vs.
+  #       foreach(@{$newhits_below}) {$self->add_hit(-hit=>$_, -old=>0, -below=>1);}
+
+  if(defined $newhits_unclassified ) {
+    if( ref($newhits_unclassified) =~ /ARRAY/i) {
+         push @{$self->{'_newhits_unclassified'}}, @{$newhits_unclassified};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter NEWHITS is not an array ref: $newhits_unclassified");
+    }
+  } else {
+      $self->{'_newhits_unclassified'} = [];
+  }
+
+  if(defined $newhits_below ) {
+    if( ref($newhits_below) =~ /ARRAY/i) {
+        push @{$self->{'_newhits_below_threshold'}}, @{$newhits_below};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter NEWHITS_BELOW is not an array ref: $newhits_below");
+    }
+  } else {
+      $self->{'_newhits_below_threshold'} = [];
+  }
+
+  if(defined $newhits_not_below ) {
+    if( ref($newhits_not_below) =~ /ARRAY/i) {
+         push @{$self->{'_newhits_not_below_threshold'}}, @{$newhits_not_below};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter NEWHITS_NOT_BELOW is not an array ref: $newhits_not_below");
+    }
+  } else {
+      $self->{'_newhits_not_below_threshold'} = [];
+  }
+
+  if(defined $oldhits_below ) {
+    if( ref($oldhits_below) =~ /ARRAY/i) {
+         push @{$self->{'_oldhits_below_threshold'}}, @{$oldhits_below};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter OLDHITS_BELOW is not an array ref: $oldhits_below");
+    }
+  } else {
+      $self->{'_oldhits_below_threshold'} = [];
+  }
+
+  if(defined $oldhits_newly_below ) {
+    if( ref($oldhits_newly_below) =~ /ARRAY/i) {
+         push @{$self->{'_oldhits_newly_below_threshold'}}, @{$oldhits_newly_below};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter OLDHITS_NEWLY_BELOW is not an array ref: $oldhits_newly_below");
+    }
+  } else {
+      $self->{'_oldhits_newly_below_threshold'} = [];
+  }
+
+  if(defined $oldhits_not_below ) {
+    if( ref($oldhits_not_below) =~ /ARRAY/i) {
+         push @{$self->{'_oldhits_not_below_threshold'}}, @{$oldhits_not_below};
+    } else {
+      $self->throw(-class=>'Bio::Root::BadParameter',
+                   -text=>"Parameter OLDHITS_NOT_BELOW is not an array ref: $oldhits_not_below");
+    }
+  } else {
+      $self->{'_oldhits_not_below_threshold'} = [];
+  }
+  
+  $self->hit_factory($h_f) if $h_f;
+  
+  return $self;
+}
+
+
+=head2 number
+
+See documentation in Bio::Search::Iteration::IterationI.
+
+=cut
+
+sub number {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_number'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_number'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 converged
+
+See documentation in Bio::Search::Iteration::IterationI.
+
+=cut
+
+sub converged {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_converged'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_converged'} = $value;
+    } 
+    return $previous;
+}
+
+
+=head2 hit_factory
+
+ Title   : hit_factory
+ Usage   : $hit->hit_factory($hit_factory)
+ Function: Get/set the factory used to build HitI objects if necessary.
+ Returns : Bio::Factory::ObjectFactoryI
+ Args    : Bio::Factory::ObjectFactoryI
+
+=cut
+
+sub hit_factory {
+    my $self = shift;
+    if (@_) { $self->{_hit_factory} = shift }
+    return $self->{_hit_factory} || return;
+}
+
+=head2 next_hit
+
+This iterates through all old hits as returned by L<oldhits> 
+followed by all new hits as returned by L<newhits>.
+
+For more documentation see L<Bio::Search::Iteration::IterationI::next_hit()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub next_hit {
+   my ($self) = @_;
+
+   unless($self->{'_hit_queue_started'}) {
+       $self->{'_hit_queue'} = ( [$self->oldhits(), $self->newhits()] );
+       $self->{'_hit_queue_started'} = 1;
+   }
+   return shift @{$self->{'_hit_queue'}};
+}
+
+=head2 next_hit_new
+
+See documentation in L<Bio::Search::Iteration::IterationI::next_hit_new()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub next_hit_new {
+   my ($self) = @_;
+
+   unless($self->{'_hit_queue_new_started'}) {
+       $self->{'_hit_queue_new'} = [$self->newhits()];
+       $self->{'_hit_queue_new_started'} = 1;
+   }
+   return shift @{$self->{'_hit_queue_new'}};
+}
+
+=head2 next_hit_old
+
+See documentation in L<Bio::Search::Iteration::IterationI::next_hit_old()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub next_hit_old {
+   my ($self,$found_again) = @_;
+
+   unless($self->{'_hit_queue_old_started'}) {
+       $self->{'_hit_queue_old'} = [$self->oldhits()];
+       $self->{'_hit_queue_old_started'} = 1;
+   }
+   return shift @{$self->{'_hit_queue_old'}};
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $iteration->rewind;
+ Function: Allow one to reset the Hit iterators to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind {
+   my $self = shift;
+   $self->{'_hit_queue_started'} = 0;
+   $self->{'_hit_queue_new_started'} = 0;
+   $self->{'_hit_queue_old_started'} = 0;
+   foreach ($self->hits) {
+      $_->rewind;
+   }
+}
+
+
+=head2 num_hits
+
+See documentation in L<Bio::Search::Iteration::IterationI::num_hits()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub num_hits {
+   my $self = shift;
+
+   return $self->num_hits_old + $self->num_hits_new;
+}
+
+=head2 num_hits_new
+
+See documentation in L<Bio::Search::Iteration::IterationI::num_hits_new()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub num_hits_new {
+   my $self = shift;
+
+    return scalar $self->newhits();
+}
+
+=head2 num_hits_old
+
+See documentation in L<Bio::Search::Iteration::IterationI::num_hits_old()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub num_hits_old {
+   my ($self,$found_again) = @_;
+
+   return scalar $self->oldhits();
+}
+
+=head2 add_hit
+
+See documentation in L<Bio::Search::Iteration::IterationI::add_hit()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub add_hit { 
+    my ($self, at args) = @_;
+    my( $hit, $old, $below, $newly_below ) = 
+        $self->_rearrange([qw(HIT
+                              OLD
+                              BELOW_THRESHOLD
+                              NEWLY_BELOW
+                             )], @args);
+    my $count = 0;
+
+    unless( ref($hit) eq 'HASH' || $hit->isa('Bio::Search::Hit::HitI') ) { 
+        $self->throw(-class=>'Bio::Root::BadParameter',
+                     -text=>"Passed in " .ref($hit). 
+                    " as a Hit which is not a Bio::Search::Hit::HitI.");
+    }
+
+    if($old) {
+        if ($newly_below) {
+            push @{$self->{'_oldhits_newly_below_threshold'}}, $hit;
+            $count = scalar @{$self->{'_oldhits_newly_below_threshold'}};
+        } elsif ($below) {
+            push @{$self->{'_oldhits_below_threshold'}}, $hit;
+            $count = scalar @{$self->{'_oldhits_below_threshold'}};
+        } else {
+            push @{$self->{'_oldhits_not_below_threshold'}}, $hit;
+            $count = scalar @{$self->{'_oldhits_not_below_threshold'}};
+        }
+    } elsif (defined $old) {
+        # -old is defined but false, so this is a new PSI-BLAST hit
+        if ($below) {
+            push @{$self->{'_newhits_below_threshold'}}, $hit;
+            $count = scalar @{$self->{'_newhits_below_threshold'}};
+        } elsif (defined $below) {
+            push @{$self->{'_newhits_not_below_threshold'}}, $hit;
+            $count = scalar @{$self->{'_newhits_not_below_threshold'}};
+        } else {
+            # -below not defined, PSI-BLAST threshold may not be known
+            push @{$self->{'_newhits_unclassified'}}, $hit;
+            $count = scalar @{$self->{'_newhits_unclassified'}};
+        }
+    } else {
+        # -old not defined, so it's non-PSI-BLAST
+        push @{$self->{'_newhits_unclassified'}}, $hit;
+        $count = scalar @{$self->{'_newhits_unclassified'}};
+    }
+    return $count;
+}
+
+=head2 hits
+
+See Documentation in InterfaceI.
+
+=cut
+
+sub hits  { 
+    my $self = shift;
+#    print STDERR "Called GenericIteration::hits()\n";
+    my @new = $self->newhits;
+    my @old = $self->oldhits;
+    return ( @new, @old );
+}
+
+=head2 newhits
+
+Returns a list containing all newhits in this order:
+
+newhits_below_threshold
+newhits_not_below_threshold
+newhits_unclassified
+
+See more documentation in InterfaceI.
+
+=cut
+
+sub newhits  { 
+    my $self = shift;
+    my @hits = $self->newhits_below_threshold;
+    push @hits, $self->newhits_not_below_threshold;
+    push @hits, $self->newhits_unclassified;
+    return @hits;
+}
+
+=head2 newhits_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::newhits_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub newhits_below_threshold  { 
+    my $self = shift;
+    if (ref $self->{'_newhits_below_threshold'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_newhits_below_threshold'}};
+        for (0..$#{$self->{'_newhits_below_threshold'}}) {
+            ref(${$self->{'_newhits_below_threshold'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_newhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_below_threshold'}}[$_]});
+        }
+        return @{$self->{'_newhits_below_threshold'}};
+    }
+    return;
+}
+
+=head2 newhits_not_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::newhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub newhits_not_below_threshold  { 
+    my $self = shift;
+    if (ref $self->{'_newhits_not_below_threshold'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_newhits_not_below_threshold'}};
+        for (0..$#{$self->{'_newhits_not_below_threshold'}}) {
+            ref(${$self->{'_newhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_newhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_not_below_threshold'}}[$_]});
+        }
+        return @{$self->{'_newhits_not_below_threshold'}};
+    }
+    return;
+}
+
+=head2 newhits_unclassified
+
+ Title   : newhits_unclassified
+ Usage   : foreach( $iteration->hits_unclassified ) {...}
+ Function: Gets all newhits that have not been partitioned into
+           sets relative to the inclusion threshold.
+ Returns : Array of Bio::Search::Hit::HitI objects.
+ Args    : none
+
+=cut
+
+sub newhits_unclassified  { 
+    my $self = shift;
+    if (ref $self->{'_newhits_unclassified'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_newhits_unclassified'}};
+        for (0..$#{$self->{'_newhits_unclassified'}}) {
+            ref(${$self->{'_newhits_unclassified'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_newhits_unclassified'}}[$_] = $factory->create_object(%{${$self->{'_newhits_unclassified'}}[$_]});
+        }
+        return @{$self->{'_newhits_unclassified'}};
+    }
+    return;
+}
+
+=head2 oldhits
+
+Returns a list containing all oldhits in this order:
+
+oldhits_below_threshold
+oldhits_newly_below_threshold
+oldhits_not_below_threshold
+
+See more documentation in InterfaceI.
+
+=cut
+
+sub oldhits  { 
+    my $self = shift;
+    my @hits = $self->oldhits_below_threshold;
+    push @hits, $self->oldhits_newly_below_threshold;
+    push @hits, $self->oldhits_not_below_threshold;
+    return @hits;
+}
+
+=head2 oldhits_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::oldhits_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub oldhits_below_threshold  { 
+    my $self = shift;
+    if (ref $self->{'_oldhits_below_threshold'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_oldhits_below_threshold'}};
+        for (0..$#{$self->{'_oldhits_below_threshold'}}) {
+            ref(${$self->{'_oldhits_below_threshold'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_oldhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_below_threshold'}}[$_]});
+        }
+        return @{$self->{'_oldhits_below_threshold'}};
+    }
+    return;
+}
+
+=head2 oldhits_newly_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::oldhits_newly_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub oldhits_newly_below_threshold  { 
+    my $self = shift;
+    if (ref $self->{'_oldhits_newly_below_threshold'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_oldhits_newly_below_threshold'}};
+        for (0..$#{$self->{'_oldhits_newly_below_threshold'}}) {
+            ref(${$self->{'_oldhits_newly_below_threshold'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_oldhits_newly_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_newly_below_threshold'}}[$_]});
+        }
+        return @{$self->{'_oldhits_newly_below_threshold'}};
+    }
+    return;
+}
+
+=head2 oldhits_not_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::oldhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub oldhits_not_below_threshold  { 
+    my $self = shift;
+    if (ref $self->{'_oldhits_not_below_threshold'} ) {
+        my $factory = $self->hit_factory || return @{$self->{'_oldhits_not_below_threshold'}};
+        for (0..$#{$self->{'_oldhits_not_below_threshold'}}) {
+            ref(${$self->{'_oldhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
+            ${$self->{'_oldhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_not_below_threshold'}}[$_]});
+        }
+        return @{$self->{'_oldhits_not_below_threshold'}};
+    }
+    return;
+}
+
+=head2 hits_below_threshold
+
+See documentation in L<Bio::Search::Iteration::IterationI::hits_below_threshold()|Bio::Search::Iteration::IterationI>.
+
+=cut
+
+sub hits_below_threshold  {
+    my $self = shift;
+    my @hits = $self->newhits_below_threshold;
+    push @hits, $self->oldhits_newly_below_threshold;
+    return @hits;
+}
+
+=head2 get_hit
+
+See documentation in L<Bio::Search::Iteration::IterationI::get_hit()|Bio::Search::Iteration::IterationI>.
+
+To free up the memory used by the get_hit() functionality, call free_hit_lookup().
+
+This functionality might be useful at the Result level, too.
+BlastResult::get_hit() would return a list of HitI objects for hits 
+that occur in multiple iterations.
+
+=cut
+
+sub get_hit {
+    my ($self,$name) = @_;
+    $self->_create_hit_lookup() unless defined $self->{'_hit_lookup'};
+
+    return $self->{'_hit_lookup'}->{"\U$name"};
+}
+
+# Internal method.
+sub _create_hit_lookup {
+    my $self = shift;
+    foreach ($self->hits) {
+        my $hname = $_->name;
+        $self->{'_hit_lookup'}->{"\U$hname"} = $_;
+    }
+}
+
+=head2 free_hit_lookup
+
+ Purpose : Frees up the memory used by the get_hit() functionality.
+           For the memory-conscious.
+
+=cut
+
+sub free_hit_lookup {
+    my $self = shift;
+    undef $self->{'_hit_lookup'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/IterationI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/IterationI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Iteration/IterationI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,639 @@
+#-----------------------------------------------------------------
+# $Id: IterationI.pm,v 1.8.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Iteration::IterationI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Iteration::IterationI - Abstract interface to an
+iteration from an iterated search result, such as PSI-BLAST.
+
+=head1 SYNOPSIS
+
+    # Bio::Search::Iteration::IterationI objects cannot be 
+    # instantiated since this module defines a pure interface.
+    # Given an object that implements the 
+    # Bio::Search::Iteration::IterationI interface, 
+    # you can do the following things with it:
+
+    # First, open up a SearchIO stream
+    use Bio::SearchIO;
+    my $file = shift or die "Usage: $0 <BLAST-report-file>\n";
+    my $in = new Bio::SearchIO(-format => 'blast',
+                               -file => $file # comment out this line to read STDIN
+                              );
+    # Iterate over all results in the input stream
+    while (my $result = $in->next_result) {
+
+        printf "Result #%d: %s\n", $in->result_count, $result->to_string;
+        printf "Total Iterations: %d\n", $result->num_iterations();
+
+        # Iterate over all iterations and process old and new hits
+        # separately.
+
+        while( my $it = $result->next_iteration) { 
+            printf "\nIteration %d\n", $it->number;
+            printf "Converged: %d\n", $it->converged;
+
+            # Print out the hits not found in previous iteration
+            printf "New hits: %d\n", $it->num_hits_new;
+            while( my $hit = $it->next_hit_new ) {
+                printf "  %s, Expect=%g\n", $hit->name, $hit->expect; 
+            }
+
+            # Print out the hits found in previous iteration
+            printf "Old hits: %d\n", $it->num_hits_old; 
+            while( my $hit = $it->next_hit_old ) {
+                printf "  %s, Expect=%g\n", $hit->name, $hit->expect; 
+            }
+        }
+        printf "%s\n\n", '-' x 50;
+    }
+
+    printf "Total Reports processed: %d: %s\n", $in->result_count;
+
+    __END__
+
+    # NOTE: The following functionality is just proposed
+    # (does not yet exist but might, given sufficient hew and cry):
+
+    # Zero-in on the new hits found in last iteration.
+    # By default, iteration() returns the last one.
+
+    my $last_iteration = $result->iteration();
+    while( my $hit = $last_iteration->next_hit) {
+        # Do something with new hit...
+    }
+
+    # Get the first iteration
+
+    my $first_iteration = $result->iteration(1);
+
+
+=head1 DESCRIPTION
+
+Bio::Search::Result::ResultI objects are data structures containing
+the results from the execution of a search algorithm.  As such, it may
+contain various algorithm specific information as well as details of
+the execution, but will contain a few fundamental elements, including
+the ability to return Bio::Search::Hit::HitI objects.
+
+=head2 Classification of Hits
+
+Within a given iteration, the hits can be classified into a number of
+useful subsets based on whether or not the hit appeard in a previous
+iteration and whether or not the hit is below the threshold E-value
+for inclusion in the score matrix model.
+
+                           All hits
+                             (A)
+               _______________|_________________
+               |                               |
+            New hits                        Old hits
+              (B)                             (C)
+      _________|________                _______|_________
+      |                |                |               |
+    Below            Above             Below          Above
+  threshold        threshold         threshold      threshold
+     (D)              (E)              (F)             (G)
+                               _________|___________
+                               |                   |
+                         Occurred in a         Occurred in a
+                         previous iteration    previous iteration
+                         below threshold       above threshold
+                              (H)                  (I)
+
+Notes: The term I<threshold> in the diagram and descriptions below
+refer to this inclusion threshold. I<Below threshold> actually means
+I<at or below threshold>.
+
+The IterationI interface defines a number of methods for extracting
+these subsets of hits.
+
+=over 4
+
+=item * newhits_below_threshold() [subset D]
+
+Hits that did not appear in a previous iteration and are below
+threshold in the current iteration.
+
+=item * newhits_not_below_threshold() [subset E]
+
+Hits that did not appear in a previous iteration and are not below
+threshold in the current iteration.
+
+=item * newhits() [subset B]
+
+All newly found hits, below and above the inclusion threshold.  This
+is the union of newhits_below_threshold() + newhits_not_below_threshold()
+[subset D + subset E].
+
+=item * oldhits_below_threshold() [subset H]
+
+Hits that appeared in a previous iteration below threshold and are
+still below threshold in the current iteration.
+
+=item * oldhits_newly_below_threshold() [subset I]
+
+Hits that appeared in a previous iteration above threshold but are
+below threshold in the current iteration. (Not applicable to the first
+iteration.)
+
+=item * oldhits_not_below_threshold() [subset G]
+
+Hits that appeared in a previous iteration not below threshold and
+are still not below threshold in the current iteration.
+
+=item * oldhits()  [subset C]
+
+All hits that occured in a previous iteration, whether below or above
+threshold in the current iteration. Union of oldhits_below_threshold()
++ oldhits_newly_below_threshold() + oldhits_not_below_threshold()
+[subset H + subset I + subset G]. (Not applicable to the first
+iteration.)
+
+=item * hits_below_threshold() [subset D + subset F]
+
+All hits, old and new, that are below the inclusion threshold in this
+iteration. This is the union of newhits_below_threshold() +
+oldhits_below_threshold() + oldhits_newly_below_threshold()
+[subset D + subset H + subset I].
+
+=item * hits() [subset A]
+
+The union of newhits() and oldhits() [subset B + subset C].
+
+=back
+
+For the first iteration, the methods L<oldhits>, L<oldhits_below_threshold>,
+L<oldhits_newly_below_threshold>, and oldhits_not_below_threshold()
+will return empty lists.
+
+Iterator and numbers-of-hit methods are provided for subsets A, B, and C:
+
+=over 4
+
+=item * next_hit_new(), num_hits_new() [subset B]
+
+=item * next_hit_old(), num_hits_old() [subset C]
+
+=item * next_hit(), num_hits() [subset A]
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003 Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::Search::Iteration::IterationI;
+
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 number
+
+ Title   : number
+ Usage   : $it_number = $iteration->number();
+ Purpose : returns the number of the iteration (a.k.a "round") 
+           within the Result.
+ Returns : integer
+ Args    : [optional] integer to set the number of the iteration
+
+=cut
+
+sub number {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 converged
+
+ Title   : converged
+ Usage   : $it_converged = $iteration->converged();
+ Purpose : Indicates whether or not the iteration has converged 
+ Returns : boolean 
+ Args    : [optional] boolean value to set the converged of the iteration
+
+=cut
+
+sub converged {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $iteration->next_hit( [$found_again]) ) { ... }
+ Purpose : Iterates through all of the HitI objects
+           including new hits and old hits found in a previous iteration
+           and both below and above the inclusion threshold.
+           Corresponds to subset A in the "Classification of Hits"
+           documentation section of this module.
+ Returns : A Bio::Search::Hit::HitI object or undef if there are no more.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<hits>, L<Classification of Hits>
+
+next_hit() iterates through all hits, including the new ones
+for this iteration and those found in previous iterations.
+You can interrogate each hit using L<Bio::Search::Hit::HitI::found_again>
+to determine whether it is new or old.
+
+To get just the new hits, use L<next_hit_new>.
+To get just the old hits, use L<next_hit_old>.
+
+=cut
+
+sub next_hit {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 next_hit_new
+
+ Title   : next_hit_new
+ Usage   : while( $hit = $iteration->next_hit_new() ) { ... }
+ Purpose : Iterates through all newly found hits (did not occur in a
+           previous iteration) and are either below or above the inclusion threshold.
+           Corresponds to subset B in the "Classification of Hits"
+           documentation section of this module.
+ Returns : A Bio::Search::Hit::HitI object or undef if there are no more.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<next_hit>, L<next_hit_old>, L<newhits>, L<Classification of Hits>
+
+=cut
+
+sub next_hit_new {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 next_hit_old
+
+ Title   : next_hit_old
+ Usage   : while( $hit = $iteration->next_hit_old() ) { ... }
+ Purpose : Iterates through the Hit objects representing just the
+           hits that have been found in a previous iteration, whether
+           below or above the inclusion threshold.
+           Corresponds to subset C in the "Classification of Hits"
+           documentation section of this module.
+ Returns : A Bio::Search::Hit::HitI object or undef if there are no more.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<next_hit>, L<next_hit_old>, L<oldhits>, L<Classification of Hits>
+
+=cut
+
+sub next_hit_old {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount_total = $iteration->num_hits
+ Purpose : Returns the total number of hits for this query result, including new and old
+           below and above inclusion threshold.
+ Returns : integer
+ Args    : none
+
+See Also: L<num_hits_new>, L<num_hits_old>, L<Classification of Hits>
+
+=cut
+
+sub num_hits {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 num_hits_new
+
+ Title   : num_hits_new
+ Usage   : my $hitcount_new = $result->num_hits_new;
+         : my $hitcount_new_below_thresh = $result->num_hits_new( 1 );
+ Purpose : Returns the number of new hits in this iteration that were not
+           found in a previous iteration and are either below or above the
+           the inclusion threshold.
+           Corresponds to subset B in the "Classification of Hits"
+           documentation section of this module.
+ Returns : integer
+ Args    : (optional) boolean, true if you want to get a count of just the new hits
+           that are below the inclusion threshold.
+
+
+See Also: L<num_hits>, L<num_hits_old>, L<Classification of Hits>
+
+=cut
+
+sub num_hits_new {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 num_hits_old
+
+ Title   : num_hits_old
+ Usage   : my $hitcount_old = $result->num_hits_old;
+         : my $hitcount_old_below_thresh = $result->num_hits_old( 1 );
+ Purpose : Returns the number of new hits in this iteration that were
+           found in a previous iteration and are either below or above the
+           the inclusion threshold.
+           Corresponds to subset C in the "Classification of Hits"
+           documentation section of this module.
+ Returns : integer
+ Args    : (optional) boolean, true if you want to get a count of just the old hits
+           that are below the inclusion threshold.
+
+See Also: L<num_hits>, L<num_hits_new>, L<Classification of Hits>
+
+=cut
+
+sub num_hits_old {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 hits
+
+ Title    : hits
+ Usage    : foreach( $obj->hits() ) { ... };
+ Purpose  : Provides access to all hits, both new and old, and either
+            below or above the inclusion threshold.
+            Corresponds to subset A in the "Classification of Hits"
+            documentation section of this module.
+ Returns  : An array containing all HitI objects.
+            Hits will be ordered according to their occurrence in the report
+            unless otherwise specified.
+ Args     : none
+
+See Also: L<newhits>, L<oldhits>, L<Classification of Hits>
+
+=cut
+
+sub hits  { shift->throw_not_implemented(); }
+
+=head2 newhits
+
+ Title    : newhits
+ Usage    : foreach( $obj->newhits() ) { ... };
+ Purpose  : Provides access to hits that were not found in a previous iteration
+            and may be either below or above the inclusion threshold.
+            Corresponds to subset B in the "Classification of Hits"
+            documentation section of this module.
+ Returns  : An array containing Bio::Search::Hit::HitI objects.
+            Hits will be ordered according to their occurrence in the report
+            unless otherwise specified.
+ Args     : none
+
+See Also: L<hits>, L<oldhits>, L<newhits_below_threshold> + L<newhits_not_below_threshold>, L<Classification of Hits>
+
+=cut
+
+sub newhits  { shift->throw_not_implemented(); }
+
+=head2 oldhits
+
+ Title    : oldhits
+ Usage    : foreach( $obj->oldhits() ) { ... };
+ Purpose  : Provides access to hits that were found in a previous iteration
+            and are either below or above the inclusion threshold in the current iteration.
+            Corresponds to subset C in the "Classification of Hits"
+            documentation section of this module.
+ Returns  : An array containing Bio::Search::Hit::HitI objects.
+            Hits will be ordered according to their occurrence in the report
+            unless otherwise specified.
+ Args     : none
+
+See Also: L<hits>, L<newhits>, L<oldhits_below_threshold>, L<oldhits_newly_below_threshold>, L<oldhits_not_below_threshold>, L<Classification of Hits>
+
+=cut
+
+sub oldhits  { shift->throw_not_implemented(); }
+
+=head2 newhits_below_threshold
+
+ Title   : newhits_below_threshold
+ Usage   : foreach( $obj->newhits_below_threshold() ) { ... };
+ Purpose : Provides access to hits that did not appear in a 
+           previous iteration and are below threshold.
+           Corresponds to subset D in the "Classification of Hits"
+           documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<newhits_not_below_threshold>, L<oldhits_newly_below_threshold>, L<newhits>, L<Classification of Hits>
+
+=cut
+
+sub newhits_below_threshold  { shift->throw_not_implemented(); }
+
+=head2 oldhits_below_threshold
+
+ Title   : oldhits_below_threshold
+ Usage   : foreach( $obj->oldhits_below_threshold() ) { ... };
+ Purpose : Provides access to hits that appeared in a 
+           previous iteration below inclusion threshold and are still below threshold.
+           Corresponds to subset H in the "Classification of Hits"
+           documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<oldhits_not_below_threshold>, L<oldhits_newly_below_threshold>, L<oldhits>, L<Classification of Hits>
+
+=cut
+
+sub oldhits_below_threshold  { shift->throw_not_implemented(); }
+
+=head2 oldhits_newly_below_threshold
+
+ Title   : oldhits_newly_below_threshold
+ Usage   : foreach( $obj->oldhits_newly_below_threshold() ) { ... };
+ Purpose : Provides access to hits that appeared in a previous
+           iteration above threshold but are below threshold in the 
+           current iteration. Not applicable to the first iteration.
+           Corresponds to subset I in the "Classification of Hits"
+           documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<newhits_below_threshold>, L<oldhits>, L<Classification of Hits>
+
+=cut
+
+sub oldhits_newly_below_threshold  { shift->throw_not_implemented(); }
+
+=head2 oldhits_not_below_threshold
+
+ Title   : oldhits_not_below_threshold
+ Usage   : foreach( $obj->oldhits_not_below_threshold() ) { ... };
+ Purpose : Provides access to hits that appeared in a previous iteration
+           not below threshold and are still not below threshold.
+           Corresponds to subset G in the "Classification of Hits"
+           documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<oldhits_below_threshold>, L<oldhits>, L<Classification of Hits>
+
+=cut
+
+sub oldhits_not_below_threshold  { shift->throw_not_implemented(); }
+
+=head2 newhits_not_below_threshold
+
+ Title   : newhits_not_below_threshold
+ Usage   : foreach( $obj->newhits_not_below_threshold() ) { ... };
+ Purpose : Provides access to hits that did not appear in a 
+           previous iteration and are not below threshold 
+           in the current iteration.
+           Corresponds to subset E in the "Classification of Hits"
+           documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<newhits_below_threshold>, L<newhits>, L<Classification of Hits>
+
+=cut
+
+sub newhits_not_below_threshold  { shift->throw_not_implemented(); }
+
+=head2 hits_below_threshold
+
+ Title   : hits_below_threshold
+ Usage   : foreach( $obj->hits_below_threshold() ) { ... };
+ Purpose : Provides access to all hits, old and new, that are below the inclusion threshold.
+           Corresponds to the union of subset D and subset F in the 
+           "Classification of Hits" documentation section of this module.
+ Returns : An array containing Bio::Search::Hit::HitI objects.
+           Hits will be returned in the order in which they occur in the report
+           unless otherwise specified.
+ Args    : none
+
+See Also: L<newhits_below_threshold>, L<oldhits_newly_below_threshold>, L<oldhits_below_threshold>, L<Classification of Hits>
+
+=cut
+
+sub hits_below_threshold  { shift->throw_not_implemented(); }
+
+
+=head2 add_hit
+
+ Title   : add_hit
+ Usage   : $report->add_hit(-hit             =>$hit_obj,
+                            -old             =>$boolean,
+                            -below_threshold =>$boolean,
+                            -newly_below     =>$boolean )
+ Purpose : Adds a HitI to the stored list of hits
+ Returns : Number of HitI currently stored for the class of the added hit.
+ Args    : Tagged values, the only required one is -hit. All others are used
+           only for PSI-BLAST reports.
+           -hit => Bio::Search::Hit::HitI object
+           -old => boolean, true indicates that the hit was found 
+                   in a previous iteration. Default=false.
+           -below_threshold => boolean, true indicates that the hit is below
+                   the inclusion threshold.
+           -newly_below => boolean, true indicates that the hit is below
+                   the inclusion threshold in this iteration but was above
+                   the inclusion threshold in a previous iteration. 
+                   Only appropriate for old hits. Default=false.
+ Throws  : Bio::Root::BadParameter if the hit is not a
+           Bio::Search::Hit::HitI.
+           Bio::Root::BadParameter if -old=>false and -newly_below=>true.
+
+=cut
+
+sub add_hit { shift->throw_not_implemented }
+
+
+
+=head2 get_hit
+
+ Title   : get_hit
+ Usage   : $hit = $report->get_hit( $hit_name )
+ Purpose : Gets a HitI object given its name 
+           if a hit with this name exists within this Iteration.
+ Returns : Bio::Search::Hit::HitI object or undef if there is no such hit.
+ Args    : $hit_name = string containing name of the hit
+ Throws  : n/a
+
+The name string must be the same as that returned by
+Bio::Search::Hit::HitI::name().
+
+The lookup should be case-insensitive.
+
+=cut
+
+sub get_hit { shift->throw_not_implemented }
+
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Processor.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Processor.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Processor.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+
+#
+# BioPerl module for Bio::Search::Processor
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Processor - DESCRIPTION of Object
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey at virginia.edu
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Processor;
+
+use strict;
+
+use Bio::Root::Version;
+
+=head2 new
+
+ Title   : new
+ Usage   : $proc = new Bio::Search::Processor -file      => $filename,
+                                              -algorithm => 'Algorithm' ;
+ Function: Used to specify and initialize a data processor of search
+           algorithm results.
+ Returns : A processor specific to the algorithm type, if it exists.
+ Args    : -file => filename
+           -algorithm => algorithm specifier
+           -fh => filehandle to attach to (file or fh required)
+
+=cut
+
+sub new {
+
+    my $type = shift;
+    my $proc;
+    my ($module, $load, $algorithm);
+
+    my %args = @_;
+
+    exists $args{'-algorithm'} or do { 
+	print STDERR "Must supply an algorithm!";
+	return;
+    };
+
+    $algorithm = $args{'-algorithm'} || $args{'-ALGORITHM'};
+
+    $module = "_<Bio/Search/Processor/$algorithm.pm";
+    $load = "Bio/Search/Processor/$algorithm.pm";
+
+    unless ( $main::{$module} ) {
+	eval { require $load; };
+	if ( $@ ) {
+	    print STDERR <<"EOF";
+$load: $algorithm cannot be found
+Exception $@
+For more information about the Search/Processor system please see the
+Processor docs.  This includes ways of checking for processors at 
+compile time, not run time
+EOF
+	    return;
+	}
+    }
+
+    $proc = "Bio::Search::Processor::$algorithm"->new(@_);
+    return $proc;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/BlastResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/BlastResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/BlastResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,494 @@
+# $Id: BlastResult.pm,v 1.22.4.3 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::BlastResult
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Copyright Steve Chervitz
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::BlastResult - Blast-specific subclass of Bio::Search::Result::GenericResult
+
+=head1 SYNOPSIS
+
+    # Working with iterations (PSI-BLAST results)
+
+    $result->next_iteration();
+    $result->num_iterations();
+    $result->iteration();
+    $result->iterations();
+
+# See Bio::Search::Result::GenericResult for information about working with Results.
+
+# See L<Bio::Search::Iteration::IterationI|Bio::Search::Iteration::IterationI>
+# for details about working with iterations.
+
+# TODO:
+#     * Show how to configure a SearchIO stream so that it generates
+#       BlastResult objects.
+
+
+=head1 DESCRIPTION
+
+This object is a subclass of Bio::Search::Result::GenericResult
+and provides some operations that facilitate working with BLAST
+and PSI-BLAST results.
+
+For general information about working with Results, see 
+Bio::Search::Result::GenericResult.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz
+
+Email sac at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::BlastResult;
+use strict;
+
+use Bio::Search::BlastStatistics;
+
+use base qw(Bio::Search::Result::GenericResult);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::BlastResult();
+ Function: Builds a new Bio::Search::Result::BlastResult object
+ Returns : Bio::Search::Result::BlastResult
+ Args    : See Bio::Search::Result::GenericResult();
+           The following parameters are specific to BlastResult:
+             -iterations  => array ref of Bio::Search::Iteration::IterationI objects
+             -inclusion_threshold => e-value threshold for inclusion in the
+                                     PSI-BLAST score matrix model (blastpgp)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'_iterations'} = [];
+  $self->{'_iteration_index'} = 0;
+  $self->{'_iteration_count'} = 0;
+
+  my( $iters, $ithresh ) = $self->_rearrange([qw(ITERATIONS
+                                                 INCLUSION_THRESHOLD)], at args);
+
+  $self->{'_inclusion_threshold'} = $ithresh;  # This is a read-only variable
+
+  if( defined $iters  ) {
+      $self->throw("Must define arrayref of Iterations when initializing a $class\n") unless ref($iters) =~ /array/i;
+
+      foreach my $i ( @{$iters} ) {
+          $self->add_iteration($i);
+      }
+  } 
+  else {
+      # This shouldn't get called with the new SearchIO::blast.
+      print STDERR "BlastResult::new(): Not adding iterations.\n";
+      $self->{'_no_iterations'} = 1;
+  }
+
+  return $self;
+}
+
+
+=head2 hits
+
+This method overrides L<Bio::Search::Result::GenericResult::hits> to take 
+into account the possibility of multiple iterations, as occurs in PSI-BLAST reports.
+
+If there are multiple iterations, all 'new' hits for all iterations are returned.
+These are the hits that did not occur in a previous iteration.
+
+See Also: L<Bio::Search::Result::GenericResult::hits>
+
+=cut
+
+sub hits {
+   my ($self) = shift;
+   if ($self->{'_no_iterations'}) {
+       return $self->SUPER::hits;
+   }
+   my @hits = ();
+   foreach my $it ($self->iterations) {
+       push @hits, $it->hits;
+   }
+   return @hits;
+}
+
+=head2 next_hit
+
+This method overrides L<Bio::Search::Result::GenericResult::next_hit> to take 
+into account the possibility of multiple iterations, as occurs in PSI-BLAST reports.
+
+If there are multiple iterations, calling next_hit() traverses the
+all of the hits, old and new, for each iteration, calling next_hit() on each iteration. 
+
+See Also: L<Bio::Search::Iteration::GenericIteration::next_hit>
+
+=cut
+
+sub next_hit {
+    my ($self, at args) = @_;
+    if ($self->{'_no_iterations'}) {
+        return $self->SUPER::next_hit(@args);
+    }
+
+    my $iter_index;
+    if (not defined $self->{'_last_hit'}) {
+        $iter_index = $self->{'_iter_index'} = $self->_next_iteration_index;
+    } else {
+        $iter_index = $self->{'_iter_index'};
+    }
+
+    return if $iter_index >= scalar @{$self->{'_iterations'}};
+
+    my $it = $self->{'_iterations'}->[$iter_index];
+    my $hit = $self->{'_last_hit'} = $it->next_hit;
+
+    return defined($hit) ? $hit : $self->next_hit;
+}
+
+
+=head2 num_hits
+
+This method overrides L<Bio::Search::Result::GenericResult::num_hits> to take 
+into account the possibility of multiple iterations, as occurs in PSI-BLAST reports.
+
+If there are multiple iterations, calling num_hits() returns the number of
+'new' hits for each iteration. These are the hits that did not occur
+in a previous iteration.
+
+See Also: L<Bio::Search::Result::GenericResult::num_hits>
+
+=cut
+
+sub num_hits{
+   my ($self) = shift;
+   if ($self->{'_no_iterations'}) {
+       return $self->SUPER::num_hits;
+   }
+   if (not defined $self->{'_iterations'}) {
+       $self->throw("Can't get Hits: data not collected.");
+    }
+    return scalar( $self->hits );
+}
+
+=head2 add_hit
+
+ Title   : add_hit
+ Usage   : $report->add_hit($hit)
+ Function: Adds a HitI to the stored list of hits
+ Returns : Number of HitI currently stored
+ Args    : Bio::Search::Hit::HitI
+
+=cut
+
+sub add_hit {
+    my ($self,$hit) = @_;
+    my $iter = $self->iteration;
+    if( $hit->isa('Bio::Search::Hit::HitI') ) { 
+	return $iter->add_hit(-hit => $hit);
+    } else { 
+        $self->throw("Passed in a " .ref($hit). 
+                     " as a Iteration which is not a Bio::Search::Hit::HitI.");
+    }
+    return $iter->num_hits;
+}
+
+=head2 add_iteration
+
+ Title   : add_iteration
+ Usage   : $report->add_iteration($iteration)
+ Function: Adds a IterationI to the stored list of iterations
+ Returns : Number of IterationI currently stored
+ Args    : Bio::Search::Iteration::IterationI
+
+=cut
+
+sub add_iteration {
+    my ($self,$i) = @_;
+    if( $i->isa('Bio::Search::Iteration::IterationI') ) { 
+        push @{$self->{'_iterations'}}, $i;
+        $self->{'_iteration_count'}++;
+    } else { 
+        $self->throw("Passed in a " .ref($i). 
+                     " as a Iteration which is not a Bio::Search::Iteration::IterationI.");
+    }
+    return scalar @{$self->{'_iterations'}};
+}
+
+
+=head2 next_iteration
+
+ Title   : next_iteration
+ Usage   : while( $it = $result->next_iteration()) { ... }
+ Function: Returns the next Iteration object, representing all hits
+           found within a given PSI-Blast iteration.
+ Returns : a Bio::Search::Iteration::IterationI object or undef if there are no more.
+ Args    : none
+
+=cut
+
+sub next_iteration {
+    my ($self) = @_;
+
+   unless($self->{'_iter_queue_started'}) {
+       $self->{'_iter_queue'} = [$self->iterations()];
+       $self->{'_iter_queue_started'} = 1;
+   }
+   return shift @{$self->{'_iter_queue'}};
+}
+
+=head2 iteration
+
+ Usage     : $iteration = $blast->iteration( $number );
+ Purpose   : Get an IterationI object for the specified iteration
+             in the search result (PSI-BLAST).
+ Returns   : Bio::Search::Iteration::IterationI object
+ Throws    : Bio::Root::NoSuchThing exception if $number is not within 
+             range of the number of iterations in this report.
+ Argument  : integer (optional, if not specified get the last iteration)
+             First iteration = 1
+
+=cut
+
+sub iteration {
+    my ($self,$num) = @_;
+    $num = scalar @{$self->{'_iterations'}} unless defined $num;
+    unless ($num >= 1 and $num <= scalar $self->{'_iteration_count'}) {
+        $self->throw(-class=>'Bio::Root::NoSuchThing',
+                     -text=>"No such iteration number: $num. Valid range=1-$self->{'_iteration_count'}",
+                     -value=>$num);
+    }
+    return $self->{'_iterations'}->[$num-1];
+}
+
+=head2 num_iterations
+
+ Usage     : $num_iterations = $blast->num_iterations; 
+ Purpose   : Get the number of iterations in the search result (PSI-BLAST).
+ Returns   : Total number of iterations in the report
+ Argument  : none (read-only)
+
+=cut
+
+sub num_iterations { shift->{'_iteration_count'} }
+
+
+# Methods provided for consistency with BPpsilite.pm
+
+=head2 number_of_iterations
+
+Same as L<num_iterations>.
+
+=cut
+
+sub number_of_iterations { shift->num_iterations }
+
+=head2 round
+
+Same as L<iteration>.
+
+=cut
+
+sub round { shift->iteration(@_) }
+
+
+=head2 iterations
+
+ Title   : iterations
+ Usage   : my @iterations = $result->iterations
+ Function: Returns the IterationI objects contained within this Result
+ Returns : Array of L<Bio::Search::Iteration::IterationI> objects
+ Args    : none
+
+=cut
+
+sub iterations { 
+    my $self = shift;
+    my @its = ();
+    if( ref($self->{'_iterations'}) =~ /ARRAY/i ) {
+       @its = @{$self->{'_iterations'}};
+    }
+    return @its;
+}
+
+=head2 psiblast
+
+ Usage     : if( $blast->psiblast ) { ... }
+ Purpose   : Set/get a boolean indicator whether or not the report 
+             is a PSI-BLAST report.
+ Returns   : 1 if PSI-BLAST, undef if not.
+ Argument  : 1 (when setting)
+
+=cut
+
+#----------------
+sub psiblast {
+#----------------
+    my ($self, $val ) = @_;
+    if( $val ) {
+        $self->{'_psiblast'} = 1;
+    }
+    return $self->{'_psiblast'};
+}
+
+
+=head2 no_hits_found
+
+ Usage     : $nohits = $blast->no_hits_found( $iteration_number );
+ Purpose   : Get boolean indicator indicating whether or not any hits
+             were present in the report.
+
+             This is NOT the same as determining the number of hits via
+             the hits() method, which will return zero hits if there were no
+             hits in the report or if all hits were filtered out during the parse.
+
+             Thus, this method can be used to distinguish these possibilities
+             for hitless reports generated when filtering.
+
+ Returns   : Boolean
+ Argument  : (optional) integer indicating the iteration number (PSI-BLAST)
+             If iteration number is not specified and this is a PSI-BLAST result,
+             then this method will return true only if all iterations had
+             no hits found.
+
+=cut
+
+sub no_hits_found {
+    my ($self, $round) = @_;
+
+    my $result = 0;   # final return value of this method.
+    # Watch the double negative! 
+    # result = 0 means "yes hits were found"
+    # result = 1 means "no hits were found" (for the indicated iteration or all iterations)
+
+    # If a iteration was not specified and there were multiple iterations,
+    # this method should return true only if all iterations had no hits found.
+    if( not defined $round ) {
+        if( $self->{'_iterations'} > 1) {
+            $result = 1;
+            foreach my $i( 1..$self->{'_iterations'} ) {
+                if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
+                    $result = 0;
+                    last;
+                }
+            }
+        }
+        else {
+            $result = $self->{"_iteration_1"}->{'_no_hits_found'};
+        }
+    }
+    else {
+        $result = $self->{"_iteration_$round"}->{'_no_hits_found'};
+    }
+
+    return $result;
+}
+
+
+=head2 set_no_hits_found
+
+ Usage     : $blast->set_no_hits_found( $iteration_number ); 
+ Purpose   : Set boolean indicator indicating whether or not any hits
+             were present in the report.
+ Returns   : n/a
+ Argument  : (optional) integer indicating the iteration number (PSI-BLAST)
+
+=cut
+
+sub set_no_hits_found {
+    my ($self, $round) = @_;
+    $round ||= 1;
+    $self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
+}
+
+=head2 _next_iteration_index
+
+ Title   : _next_iteration_index
+ Usage   : private
+
+=cut
+
+sub _next_iteration_index{
+   my ($self, at args) = @_;
+   return $self->{'_iteration_index'}++;
+}
+
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Iteration iterator to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind {
+   my $self = shift;
+   $self->SUPER::rewind(@_);
+   $self->{'_iteration_index'} = 0;
+   foreach ($self->iterations) {
+       $_->rewind;
+   }
+}
+
+
+=head2 inclusion_threshold
+
+ Title   : inclusion_threshold
+ Usage   : my $incl_thresh = $result->inclusion_threshold; (read-only)
+ Function: Gets the e-value threshold for inclusion in the PSI-BLAST 
+           score matrix model (blastpgp) that was used for generating the report
+           being parsed.
+ Returns : number (real) or undef if not a PSI-BLAST report.
+ Args    : none
+
+=cut
+
+sub inclusion_threshold {
+    my $self = shift;
+    return $self->{'_inclusion_threshold'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/GenericResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/GenericResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/GenericResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,752 @@
+# $Id: GenericResult.pm,v 1.23.2.7 2006/10/16 17:08:16 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::GenericResult
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::GenericResult - Generic Implementation of
+Bio::Search::Result::ResultI interface applicable to most search
+results.
+
+=head1 SYNOPSIS
+
+
+    # typically one gets Results from a SearchIO stream
+    use Bio::SearchIO;
+    my $io = new Bio::SearchIO(-format => 'blast',
+                                -file   => 't/data/HUMBETGLOA.tblastx');
+    while( my $result = $io->next_result ) {
+        # process all search results within the input stream
+        while( my $hit = $result->next_hit ) {  
+            # insert code here for hit processing
+        }
+    }
+
+    use Bio::Search::Result::GenericResult;
+    my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
+    # typically these are created from a Bio::SearchIO stream
+    my $result = new Bio::Search::Result::GenericResult
+        ( -query_name        => 'HUMBETGLOA',
+          -query_accession   => ''
+          -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
+          -query_length      => 3002
+          -database_name     => 'ecoli.aa'
+          -database_letters  => 4662239,
+          -database_entries  => 400,
+          -parameters        => { 'e' => '0.001' },
+          -statistics        => { 'kappa' => 0.731 },
+          -algorithm         => 'blastp',
+          -algorithm_version => '2.1.2',
+          );
+
+    my $id = $result->query_name();
+
+    my $desc = $result->query_description();
+
+    my $name = $result->database_name();
+
+    my $size = $result->database_letters();
+
+    my $num_entries = $result->database_entries();
+
+    my $gap_ext = $result->get_parameter('e');
+
+    my @params = $result->available_parameters;
+
+    my $kappa = $result->get_statistic('kappa');
+
+    my @statnames = $result->available_statistics;
+
+# TODO: Show how to configure a SearchIO stream so that it generates
+#       GenericResult objects.
+
+
+=head1 DESCRIPTION
+
+This object is an implementation of the Bio::Search::Result::ResultI
+interface and provides a generic place to store results from a
+sequence database search.
+
+Unless you're writing a parser, you won't ever need to create a
+GenericResult or any other ResultI-implementing object. If you use
+the SearchIO system, ResultI objects are created automatically from
+a SearchIO stream which returns Bio::Search::Result::ResultI objects.
+
+For documentation on what you can do with GenericResult (and other ResultI
+objects), please see the API documentation in
+L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich and Steve Chervitz
+
+Email jason at bioperl.org
+Email sac at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::GenericResult;
+use strict;
+
+use Bio::Search::GenericStatistics;
+use Bio::Tools::Run::GenericParameters;
+
+# bug #1420
+#use overload 
+#    '""' => \&to_string;
+
+use base qw(Bio::Root::Root Bio::Search::Result::ResultI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::GenericResult();
+ Function: Builds a new Bio::Search::Result::GenericResult object 
+ Returns : Bio::Search::Result::GenericResult
+ Args    : -query_name        => Name of query Sequence
+           -query_accession   => Query accession number (if available)
+           -query_description => Description of query sequence
+           -query_length      => Length of query sequence
+           -database_name     => Name of database
+           -database_letters  => Number of residues in database
+           -database_entries  => Number of entries in database
+           -hits              => array ref of Bio::Search::Hit::HitI objects
+           -parameters        => hash ref of search parameters (key => value)
+           -statistics        => hash ref of search statistics (key => value)
+           -algorithm         => program name (blastx)
+           -algorithm_version   => version of the algorithm (2.1.2)
+           -algorithm_reference => literature reference string for this algorithm
+           -hit_factory       => Bio::Factory::ObjectFactoryI capable of making
+                                 Bio::Search::Hit::HitI objects
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'_hits'} = [];
+  $self->{'_hitindex'} = 0;
+  $self->{'_statistics'} = new Bio::Search::GenericStatistics;
+  $self->{'_parameters'} = new Bio::Tools::Run::GenericParameters;
+
+  my ($qname,$qacc,$qdesc,$qlen,
+      $dbname,$dblet,$dbent,$params,   
+      $stats, $hits, $algo, $algo_v,
+      $prog_ref, $algo_r, $hit_factory) = $self->_rearrange([qw(QUERY_NAME
+                                                  QUERY_ACCESSION
+                                                  QUERY_DESCRIPTION
+                                                  QUERY_LENGTH
+                                                  DATABASE_NAME
+                                                  DATABASE_LETTERS
+                                                  DATABASE_ENTRIES
+                                                  PARAMETERS
+                                                  STATISTICS
+                                                  HITS
+                                                  ALGORITHM
+                                                  ALGORITHM_VERSION
+                                                  PROGRAM_REFERENCE
+                                                  ALGORITHM_REFERENCE
+                                                  HIT_FACTORY
+                                                 )], at args);
+
+  $algo_r ||= $prog_ref;         
+  defined $algo   && $self->algorithm($algo);
+  defined $algo_v && $self->algorithm_version($algo_v);
+  defined $algo_r && $self->algorithm_reference($algo_r);
+
+  defined $qname && $self->query_name($qname);
+  defined $qacc  && $self->query_accession($qacc);
+  defined $qdesc && $self->query_description($qdesc);
+  defined $qlen  && $self->query_length($qlen);
+  defined $dbname && $self->database_name($dbname);
+  defined $dblet  && $self->database_letters($dblet);
+  defined $dbent  && $self->database_entries($dbent);
+
+  defined $hit_factory && $self->hit_factory($hit_factory);
+  
+  if( defined $params ) {
+      if( ref($params) !~ /hash/i ) {
+          $self->throw("Must specify a hash reference with the parameter '-parameters");
+      }
+      while( my ($key,$value) = each %{$params} ) {
+          $self->{'_parameters'}->set_parameter($key   =>   $value);
+               # $self->add_parameter($key,$value);
+      }
+  }
+  if( defined $stats ) {
+      if( ref($stats) !~ /hash/i ) {
+          $self->throw("Must specify a hash reference with the parameter '-statistics");
+      }
+      while( my ($key,$value) = each %{$stats} ) {
+          $self->{'_statistics'}->set_statistic($key   =>   $value); 
+          # $self->add_statistic($key,$value);
+      }
+  }
+
+  if( defined $hits  ) { 
+      $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
+
+      foreach my $s ( @$hits ) {
+          $self->add_hit($s);
+      }
+  }
+  return $self;
+}
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the Result
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_algorithm'} = $value;
+    } 
+    return $previous;   
+}
+
+=head2 algorithm_version
+
+ Title   : algorithm_version
+ Usage   : my $r_version = $hsp->algorithm_version
+ Function: Obtain the version of the algorithm used to obtain the Result
+ Returns : string (e.g., 2.1.2)
+ Args    : [optional] scalar string to set algorithm version value
+
+=cut
+
+sub algorithm_version{
+    my ($self,$value) = @_;
+    my $previous = $self->{'_algorithm_version'};
+    if( defined $value || ! defined $previous ) { 
+        $value = $previous = '' unless defined $value;
+        $self->{'_algorithm_version'} = $value;
+    } 
+
+    return $previous;   
+}
+
+=head2 Bio::Search::Result::ResultI interface methods
+
+Bio::Search::Result::ResultI implementation
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+
+=cut
+
+sub next_hit {
+    my ($self, at args) = @_;
+    my $index = $self->_nexthitindex;
+    return if $index > scalar @{$self->{'_hits'}};
+    
+    my $hit = $self->{'_hits'}->[$index];
+    if (ref($hit) eq 'HASH') {
+        my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
+        $hit = $factory->create_object(%{$hit});
+        $self->{'_hits'}->[$index] = $hit;
+        delete $self->{_hashes}->{$index};
+    }
+    return $hit;    
+}
+
+=head2 query_name
+
+ Title   : query_name
+ Usage   : $id = $result->query_name();
+ Function: Get the string identifier of the query used by the
+           algorithm that performed the search.
+ Returns : a string.
+ Args    : [optional] new string value for query name
+
+=cut
+
+sub query_name {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_queryname'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_queryname'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_accession
+
+ Title   : query_accession
+ Usage   : $id = $result->query_accession();
+ Function: Get the accession (if available) for the query sequence
+ Returns : a string
+ Args    : [optional] new string value for accession
+
+=cut
+
+sub query_accession {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_queryacc'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_queryacc'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $id = $result->query_length();
+ Function: Get the length of the query sequence
+           used in the search.
+ Returns : a number
+ Args    :  [optional] new integer value for query length
+
+=cut
+
+sub query_length {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_querylength'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = 0 unless defined $value;
+        $self->{'_querylength'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 query_description
+
+ Title   : query_description
+ Usage   : $id = $result->query_description();
+ Function: Get the description of the query sequence
+           used in the search.
+ Returns : a string
+ Args    : [optional] new string for the query description
+
+=cut
+
+sub query_description {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_querydesc'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_querydesc'} = $value;
+    } 
+    return $previous;
+}
+
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $name = $result->database_name()
+ Function: Used to obtain the name of the database that the query was searched
+           against by the algorithm.
+ Returns : a scalar string
+ Args    : [optional] new string for the db name
+
+=cut
+
+sub database_name {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbname'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_dbname'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : [optional] new scalar integer for number of letters in db 
+
+
+=cut
+
+sub database_letters {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbletters'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_dbletters'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : [optional] new integer for the number of sequence entries in the db
+
+
+=cut
+
+sub database_entries {
+    my ($self,$value) = @_;
+    my $previous = $self->{'_dbentries'};
+    if( defined $value || ! defined $previous ) {
+        $value = $previous = '' unless defined $value;
+        $self->{'_dbentries'} = $value;
+    } 
+    return $previous;
+}
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $report->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this report
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+sub get_parameter {
+   my ($self,$name) = @_;
+   return $self->{'_parameters'}->get_parameter($name);
+}
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $report->available_paramters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this report
+ Args    : none
+
+=cut
+
+sub available_parameters{
+   my ($self) = @_;
+   return $self->{'_parameters'}->available_parameters;
+}
+
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $report->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this report
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+sub get_statistic{
+   my ($self,$key) = @_;
+   return $self->{'_statistics'}->get_statistic($key);
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $report->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this report
+ Args    : none
+
+=cut
+
+sub available_statistics{
+   my ($self) = @_;
+   return $self->{'_statistics'}->available_statistics;
+}
+
+=head2 Bio::Search::Report 
+
+Bio::Search::Result::GenericResult specific methods
+
+=head2 add_hit
+
+ Title   : add_hit
+ Usage   : $report->add_hit($hit)
+ Function: Adds a HitI to the stored list of hits
+ Returns : Number of HitI currently stored
+ Args    : Bio::Search::Hit::HitI
+
+=cut
+
+sub add_hit {
+    my ($self,$s) = @_;
+    if (ref($s) eq 'HASH' || $s->isa('Bio::Search::Hit::HitI') ) {
+        push @{$self->{'_hits'}}, $s;
+    }
+    else { 
+        $self->throw("Passed in " .ref($s)." as a Hit which is not a Bio::Search::HitI.");
+    }
+    
+    if (ref($s) eq 'HASH') {
+        $self->{_hashes}->{$#{$self->{'_hits'}}} = 1;
+    }
+    return scalar @{$self->{'_hits'}};
+}
+
+=head2 hit_factory
+
+ Title   : hit_factory
+ Usage   : $hit->hit_factory($hit_factory)
+ Function: Get/set the factory used to build HitI objects if necessary.
+ Returns : Bio::Factory::ObjectFactoryI
+ Args    : Bio::Factory::ObjectFactoryI
+
+=cut
+
+sub hit_factory {
+    my $self = shift;
+    if (@_) { $self->{_hit_factory} = shift }
+    return $self->{_hit_factory} || return;
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iterator to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind{
+   my ($self) = @_;
+   $self->{'_hitindex'} = 0;
+}
+
+
+=head2 _nexthitindex
+
+ Title   : _nexthitindex
+ Usage   : private
+
+=cut
+
+sub _nexthitindex{
+   my ($self, at args) = @_;
+   return $self->{'_hitindex'}++;
+}
+
+
+=head2 add_parameter
+
+ Title   : add_parameter
+ Usage   : $report->add_parameter('gapext', 11);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+sub add_parameter {
+   my ($self,$key,$value) = @_;
+   $self->{'_parameters'}->set_parameter($key => $value);
+}
+
+
+=head2 add_statistic
+
+ Title   : add_statistic
+ Usage   : $report->add_statistic('lambda', 2.3);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+sub add_statistic {
+   my ($self,$key,$value) = @_;
+   $self->{'_statistics'}->set_statistic($key => $value);
+   return;
+}
+
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount= $result->num_hits
+ Function: returns the number of hits for this query result
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_hits{
+   my ($self) = shift;
+   if (not defined $self->{'_hits'}) {
+       $self->throw("Can't get Hits: data not collected.");
+    }
+    return scalar(@{$self->{'_hits'}});
+}
+
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the available hits for this Result
+ Returns : Array of L<Bio::Search::Hit::HitI> objects
+ Args    : none
+
+
+=cut
+
+sub hits {
+    my ($self) = shift;
+    
+    foreach my $i (keys %{$self->{_hashes} || {}}) {
+        my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
+        $self->{'_hits'}->[$i] = $factory->create_object(%{$self->{'_hits'}->[$i]});
+        delete $self->{_hashes}->{$i};
+    }
+    
+    my @hits = ();
+    if (ref $self->{'_hits'}) {
+        @hits = @{$self->{'_hits'}};
+    }
+    return @hits;   
+}
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : $obj->algorithm_reference($newval)
+ Function: 
+ Returns : string containing literature reference for the algorithm
+ Args    : newvalue string (optional)
+ Comments: Formerly named program_reference(), which is still supported
+           for backwards compatibility.
+
+=cut
+
+sub algorithm_reference{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'algorithm_reference'} = $value;
+    }
+    return $self->{'algorithm_reference'};
+}
+
+
+sub program_reference { shift->algorithm_reference(@_); }
+
+
+=head2 no_hits_found
+
+See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
+
+=cut
+
+sub no_hits_found {
+    my $self = shift;
+
+    # Watch the double negative! 
+    # result = 0 means "yes hits were found"
+    # result = 1 means "no hits were found" 
+
+    return $self->{'_no_hits_found'};
+}
+
+
+=head2 set_no_hits_found
+
+See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
+
+=cut
+
+sub set_no_hits_found {
+    my $self = shift;
+    $self->{'_no_hits_found'} = 1;
+}
+
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : print $blast->to_string;
+ Function: Returns a string representation for the Blast result. 
+           Primarily intended for debugging purposes.
+ Example : see usage
+ Returns : A string of the form:
+           [GenericResult] <analysis_method> query=<name> <description> db=<database
+           e.g.:
+           [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ 
+ Args    : None
+
+=cut
+
+sub to_string {
+    my $self = shift;
+    my $str = ref($self) . ", algorithm= " . $self->algorithm . ", query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
+    return $str;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HMMERResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HMMERResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HMMERResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,436 @@
+# $Id: HMMERResult.pm,v 1.6.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::HMMERResult
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::HMMERResult - A Result object for HMMER results
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Result::HMMERResult;
+    my $result = new Bio::Search::Result::HMMERResult
+    ( -hmm_name => 'pfam',
+      -sequence_file => 'roa1.pep',
+      -hits => \@hits);
+
+    # generally we use Bio::SearchIO to build these objects
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer',
+			       -file   => 'result.hmmer');
+    while( my $result = $in->next_result ) {
+	print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
+    }
+
+=head1 DESCRIPTION
+
+This is a specialization of L<Bio::Search::Result::GenericResult>.
+There are a few extra methods, specifically L<sequence_file>,
+L<hmm_name>, L<next_models>, and L<models>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::HMMERResult;
+use strict;
+
+
+
+use base qw(Bio::Search::Result::GenericResult);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::HMMERResult();
+ Function: Builds a new Bio::Search::Result::HMMERResult object 
+ Returns : Bio::Search::Result::HMMERResult
+ Args    : -hmm_name => string, name of hmm file
+           -sequence_file => name of the sequence file
+
+plus Bio::Search::Result::GenericResult parameters
+
+           -query_name        => Name of query Sequence
+           -query_accession   => Query accession number (if available)
+           -query_description => Description of query sequence
+           -query_length      => Length of query sequence
+           -database_name     => Name of database
+           -database_letters  => Number of residues in database
+           -database_entries  => Number of entries in database
+           -parameters        => hash ref of search parameters (key => value)
+           -statistics        => hash ref of search statistics (key => value)
+           -algorithm         => program name (blastx)
+           -algorithm_version => version of the algorithm (2.1.2)
+           -program_reference => literature reference string for this algorithm
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  
+  my ($hmm,$seqfile) = $self->_rearrange([qw(HMM_NAME SEQUENCE_FILE)],
+					 @args);
+  
+  defined( $seqfile) && $self->sequence_file($seqfile);
+  defined( $hmm) && $self->hmm_name($hmm);
+
+  return $self;
+}
+
+
+=head2 hmm_name
+
+ Title   : hmm_name
+ Usage   : $obj->hmm_name($newval)
+ Function: Get/Set the value of hmm_name
+ Returns : value of hmm_name
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub hmm_name{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_hmm_name'} = $value;
+    }
+    return $self->{'_hmm_name'};
+}
+
+
+=head2 sequence_file
+
+ Title   : sequence_file
+ Usage   : $obj->sequence_file($newval)
+ Function: Get/Set the value of sequence_file
+ Returns : value of sequence_file
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub sequence_file{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_sequence_file'} = $value;
+    }
+    return $self->{'_sequence_file'};
+
+}
+
+
+=head2 next_model
+
+ Title   : next_model
+ Usage   : my $domain = $result->next_model
+ Function: Returns the next domain - this
+           is an alias for next_hit
+ Returns : L<Bio::Search::Hit::HitI> object
+ Args    : none
+
+
+=cut
+
+sub next_model{ shift->next_hit }
+
+=head2 models
+
+ Title   : models
+ Usage   : my @domains = $result->models;
+ Function: Returns the list of HMM models seen - this
+           is an alias for hits()
+ Returns : Array of L<Bio::Search::Hit::HitI> objects
+ Args    : none
+
+
+=cut
+
+sub models{ shift->hits }
+
+=head2 Bio::Search::Result::GenericResult inherited methods
+
+=cut
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $hsp->algorithm
+ Function: Obtain the name of the algorithm used to obtain the Result
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+=head2 algorithm_version
+
+ Title   : algorithm_version
+ Usage   : my $r_version = $hsp->algorithm_version
+ Function: Obtain the version of the algorithm used to obtain the Result
+ Returns : string (e.g., 2.1.2)
+ Args    : [optional] scalar string to set algorithm version value
+
+=cut
+
+=head2 Bio::Search::Result::ResultI interface methods
+
+Bio::Search::Result::ResultI implementation
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+
+=cut
+
+=head2 query_name
+
+ Title   : query_name
+ Usage   : $id = $result->query_name();
+ Function: Get the string identifier of the query used by the
+           algorithm that performed the search.
+ Returns : a string.
+ Args    : [optional] new string value for query name
+
+=cut
+
+=head2 query_accession
+
+ Title   : query_accession
+ Usage   : $id = $result->query_accession();
+ Function: Get the accession (if available) for the query sequence
+ Returns : a string
+ Args    : [optional] new string value for accession
+
+=cut
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $id = $result->query_length();
+ Function: Get the length of the query sequence
+           used in the search.
+ Returns : a number
+ Args    :  [optional] new integer value for query length
+
+=cut
+
+=head2 query_description
+
+ Title   : query_description
+ Usage   : $id = $result->query_description();
+ Function: Get the description of the query sequence
+           used in the search.
+ Returns : a string
+ Args    : [optional] new string for the query description
+
+=cut
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $name = $result->database_name()
+ Function: Used to obtain the name of the database that the query was searched
+           against by the algorithm.
+ Returns : a scalar string
+ Args    : [optional] new string for the db name
+
+=cut
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : [optional] new scalar integer for number of letters in db 
+
+
+=cut
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : [optional] new integer for the number of sequence entries in the db
+
+
+=cut
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $report->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this report
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $report->available_paramters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this report
+ Args    : none
+
+=cut
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $report->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this report
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $report->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this report
+ Args    : none
+
+=cut
+
+=head2 Bio::Search::Result::GenericResult specific methods
+
+=cut
+
+=head2 add_hit
+
+ Title   : add_hit
+ Usage   : $report->add_hit($hit)
+ Function: Adds a HitI to the stored list of hits
+ Returns : Number of HitI currently stored
+ Args    : Bio::Search::Hit::HitI
+
+=cut
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iteration to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind{
+   my ($self) = @_;
+   $self->{'_hitindex'} = 0;
+}
+
+
+=head2 add_parameter
+
+ Title   : add_parameter
+ Usage   : $report->add_parameter('gapext', 11);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+=head2 add_statistic
+
+ Title   : add_statistic
+ Usage   : $report->add_statistic('lambda', 2.3);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount= $result->num_hits
+ Function: returns the number of hits for this query result
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the available hits for this Result
+ Returns : Array of L<Bio::Search::Hit::HitI> objects
+ Args    : none
+
+
+=cut
+
+=head2 program_reference
+
+ Title   : program_reference
+ Usage   : $obj->program_reference($newval)
+ Function: 
+ Returns : value of the literature reference for the algorithm
+ Args    : newvalue (optional)
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HmmpfamResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HmmpfamResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HmmpfamResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,381 @@
+# $Id: HmmpfamResult.pm,v 1.1.2.4 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::HmmpfamResult
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam
+                                     results
+
+=head1 SYNOPSIS
+
+    # generally we use Bio::SearchIO to build these objects
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer_pull',
+							   -file   => 'result.hmmer');
+
+    while (my $result = $in->next_result) {
+		print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
+    }
+
+=head1 DESCRIPTION
+
+This object implements a parser for hmmpfam result output, a program in the HMMER
+package.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Result::HmmpfamResult;
+
+use strict;
+
+use Bio::Search::Hit::HmmpfamHit;
+
+use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::Result::hmmpfam();
+ Function: Builds a new Bio::SearchIO::Result::hmmpfam object 
+ Returns : Bio::SearchIO::Result::hmmpfam
+ Args    : -chunk  => [Bio::Root::IO, $start, $end] (required if no -parent)
+           -parent => Bio::PullParserI object (required if no -chunk)
+           -parameters => hash ref of search parameters (key => value), optional
+           -statistics => hash ref of search statistics (key => value), optional
+
+		   where the array ref provided to -chunk contains an IO object
+           for a filehandle to something representing the raw data of the
+           result, and $start and $end define the tell() position within the
+           filehandle that the result data starts and ends (optional; defaults
+           to start and end of the entire thing described by the filehandle)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	
+	$self->_setup(@args);
+	
+	foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) {
+		$self->_fields->{$field} = undef;
+	}
+	
+	$self->_dependencies( { ( query_name => 'header',
+                              query_accession => 'header',
+                              query_description => 'header',
+                              hit_table => 'header',
+							  num_hits => 'hit_table',
+							  no_hits_found => 'hit_table',
+                              hsp_table => 'hit_table',
+                              next_alignment => 'hsp_table' ) } );
+    
+    return $self;
+}
+
+#
+# PullParserI discovery methods so we can answer all ResultI questions
+#
+
+sub _discover_header {
+	my $self = shift;
+	$self->_chunk_seek(0);
+	my $header = $self->_get_chunk_by_end("all domains):\n");
+	$self->{_after_header} = $self->_chunk_tell;
+	
+	$header || $self->throw("Could not find hmmer header, is file really hmmer format?");
+	
+	($self->_fields->{query_name}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m;
+	($self->_fields->{query_accession}) = $header =~ /^Accession:\s+(\S+)/m;
+	($self->_fields->{query_description}) = $header =~ /^Description:\s+(\S.+)/m;
+	$self->_fields->{query_accession} ||= '';
+	$self->_fields->{query_description} ||= '';
+	
+	$self->_fields->{header} = 1; # stop this method being called again
+}
+
+sub _discover_hit_table {
+	my $self = shift;
+	
+	$self->_chunk_seek($self->{_after_header});
+	my $table = $self->_get_chunk_by_end("for domains:\n");
+	$self->{_after_hit_table} = $self->_chunk_tell;
+	
+	my $evalue_cutoff = $self->get_field('evalue_cutoff');
+	undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
+	my $score_cutoff = $self->get_field('score_cutoff');
+	undef $score_cutoff if $score_cutoff eq '[unset]';
+	my $hsps_cutoff = $self->get_field('hsps_cutoff');
+	undef $hsps_cutoff if $hsps_cutoff eq '[unset]';
+	
+	my @table;
+	my $no_hit = 1;
+	while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) {
+		$no_hit = 0;
+		my $evalue = abs($4); # consistency for tests under Windows
+		next if ($evalue_cutoff && $evalue > $evalue_cutoff);
+		next if ($score_cutoff && $3 < $score_cutoff);
+		next if ($hsps_cutoff && $5 < $hsps_cutoff);
+		push(@table, [$1, $2, $3, $evalue, $5]);
+	}
+	$self->_fields->{hit_table} = \@table;
+	$self->{_next_hit_index} = @table > 0 ? 0 : -1;
+	
+	$self->_fields->{no_hits_found} = $no_hit;
+	$self->_fields->{num_hits} = @table;
+}
+
+sub _discover_hsp_table {
+	my $self = shift;
+	
+	$self->_chunk_seek($self->{_after_hit_table});
+	my $table = $self->_get_chunk_by_end("top-scoring domains:\n");
+	$table ||= $self->_get_chunk_by_end("//\n"); # A0 reports
+	$self->{_after_hsp_table} = $self->_chunk_tell;
+	
+	my %table;
+	# can't save this regex work for the hsp object because the hit object needs
+	# its length, so may as well just do all the work here
+	while ($table =~ /^(\S+)\s+(\d+)\/\d+\s+(\d+)\s+(\d+)\s+\S\S\s+(\d+)\s+(\d+)\s+\S(\S)\s+(\S+)\s+(\S+)/gm) {
+		# rank query_start query_end hit_start hit_end score evalue
+		my $evalue = abs($9); # consistency for tests under Windows
+		push(@{$table{$1}->{hsp_data}}, [$2, $3, $4, $5, $6, $8, $evalue]);
+		if ($7 eq ']') {
+			$table{$1}->{hit_length} = $6;
+		}
+	}
+	$self->_fields->{hsp_table} = \%table;
+}
+
+sub _discover_alignments {
+	my $self = shift;
+	$self->_fields->{alignments} = { };
+}
+
+sub _next_alignment {
+	my $self = shift;;
+	return if $self->{_no_more_alignments};
+	
+	my $aligns = $self->_fields->{alignments};
+	
+	unless (defined $self->{_after_previous_alignment}) {
+		$self->_chunk_seek($self->{_after_hsp_table});
+		my $chunk = $self->_get_chunk_by_end(": domain");
+		unless ($chunk) {
+			$self->{_no_more_alignments} = 1;
+			return;
+		}
+		
+		$self->{_after_previous_alignment} = $self->_chunk_tell;
+		$self->{_next_alignment_start_text} = $chunk;
+		$self->_next_alignment;
+		return;
+	}
+	
+	$self->_chunk_seek($self->{_after_previous_alignment});
+	my $chunk = $self->_get_chunk_by_end(": domain");
+	unless ($chunk) {
+		$self->_chunk_seek($self->{_after_previous_alignment});
+		$chunk = $self->_get_chunk_by_end("//");
+		
+		unless ($chunk) {
+			$self->{_no_more_alignments} = 1;
+			return;
+		}
+	}
+	
+	$self->{_after_previous_alignment} = $self->_chunk_tell;
+	
+	if (defined $self->{_next_alignment_start_text}) {
+		$chunk = $self->{_next_alignment_start_text}.$chunk;
+	}
+	$chunk =~ s/(\S+: domain)$//;
+	$self->{_next_alignment_start_text} = $1;
+	
+	my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/;
+	$aligns->{$name.'~~~~'.$domain} = $chunk;
+	return 1;
+}
+
+sub _discover_next_hit {
+	my $self = shift;
+	my @hit_table = @{$self->get_field('hit_table')};
+	return if $self->{_next_hit_index} == -1;
+	
+	#[name description score significance num_hsps rank]
+	my @hit_data = (@{$hit_table[$self->{_next_hit_index}++]}, $self->{_next_hit_index});
+	
+	$self->_fields->{next_hit} = new Bio::Search::Hit::HmmpfamHit(-parent => $self,
+																  -hit_data => \@hit_data);
+	
+	if ($self->{_next_hit_index} > $#hit_table) {
+		$self->{_next_hit_index} = -1;
+	}
+}
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+=cut
+
+sub next_hit {
+	my $self = shift;
+    my $hit = $self->get_field('next_hit');
+	undef $self->_fields->{next_hit};
+	return $hit;
+}
+
+=head2 next_model
+
+ Title   : next_model
+ Usage   : my $domain = $result->next_model
+ Function: Returns the next domain - this is an alias for next_hit()
+ Returns : L<Bio::Search::Hit::HitI> object
+ Args    : none
+
+=cut
+
+*next_model = \&next_hit;
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the HitI objects contained within this Result
+ Returns : Array of Bio::Search::Hit::HitI objects
+ Args    : none
+
+See Also: L<Bio::Search::Hit::HitI>
+
+=cut
+
+sub hits {
+	my $self = shift;
+	my $old = $self->{_next_hit_index} || 0;
+	$self->rewind;
+	my @hits;
+	while (defined(my $hit = $self->next_hit)) {
+		push(@hits, $hit);
+	}
+	$self->{_next_hit_index} = @hits > 0 ? $old : -1;
+	return @hits;
+}
+
+=head2 models
+
+ Title   : models
+ Usage   : my @domains = $result->models;
+ Function: Returns the list of HMM models seen - this is an alias for hits()
+ Returns : Array of L<Bio::Search::Hit::HitI> objects
+ Args    : none
+
+=cut
+
+*models = \&hits;
+
+=head2 sort_hits
+
+ Title		: sort_hits
+ Usage		: $result->sort_hits('<score')
+ Function	: Sorts the hits so that they come out in the desired order when
+              hits() or next_hit() is called.
+ Returns	: n/a
+ Args		: A coderef for the sort function. See the documentation on the Perl
+              sort() function for guidelines on writing sort functions.
+			  You will be sorting array references, not HitI objects. The
+			  references contain name as element 0, description as element 1,
+			  score as element 2, significance as element 3 and number of hsps
+			  as element 4.
+			  By default the sort order is ascending significance value (ie.
+			  most significant hits first).
+ Note		: To access the special variables $a and $b used by the Perl sort()
+              function the user function must access
+			  Bio::Search::Result::HmmpfamResult namespace. 
+              For example, use : 
+              $result->sort_hits(
+				sub{$Bio::Search::Result::HmmpfamResult::a->[2]
+				                         <=> 
+					$Bio::Search::Result::HmmpfamResult::b->[2]});
+              NOT $result->sort_hits($a->[2] <=> $b->[2]);
+
+=cut
+
+sub sort_hits {
+    my ($self, $code_ref) = @_;
+	$code_ref ||= sub { $a->[3] <=> $b->[3] };
+	
+	# avoid creating hit objects just to sort, hence force user to sort on
+	# the array references in hit table
+	my $table_ref = $self->get_field('hit_table');
+	@{$table_ref} > 1 || return;
+	
+	my @sorted = sort $code_ref @{$table_ref};
+	@sorted == @{$table_ref} || $self->throw("Your sort routine failed to give back all hits!");
+	$self->_fields->{hit_table} = \@sorted;
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iterator to the beginning, so that
+           next_hit() will subsequently return the first hit and so on.
+ Returns : n/a
+ Args    : none
+
+=cut
+
+sub rewind {
+	my $self = shift;
+	unless ($self->_fields->{hit_table}) {
+		$self->get_field('hit_table');
+	}
+	$self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/HmmpfamResult.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/PullResultI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/PullResultI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/PullResultI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,535 @@
+# $Id: PullResultI.pm,v 1.1.2.2 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Result::PullResultI
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::PullResultI - Bio::Search::Result::ResultI interface for
+                                  'pull' parsers
+
+=head1 SYNOPSIS
+
+    # This is an interface and cannot be instantiated
+
+    # typically one gets Results from a SearchIO stream
+    use Bio::SearchIO;
+    my $io = new Bio::SearchIO(-format => 'hmmer_pull',
+                                -file   => 't/data/hmmpfam.out');
+
+    my $result = $io->next_result;
+
+    while( $hit = $result->next_hit()) {
+        # enter code here for hit processing
+    }
+
+    my $id = $result->query_name();
+    my $desc = $result->query_description();
+    my $dbname = $result->database_name();
+    my $size = $result->database_letters();
+    my $num_entries = $result->database_entries();
+    my $gap_ext = $result->get_parameter('gapext');
+    my @params = $result->available_parameters;
+    my $kappa = $result->get_statistic('kappa');
+    my @statnames = $result->available_statistics;
+
+=head1 DESCRIPTION
+
+Bio::Search::Result::ResultI objects are data structures containing
+the results from the execution of a search algorithm.  As such, it may
+contain various algorithm specific information as well as details of
+the execution, but will contain a few fundamental elements, including
+the ability to return Bio::Search::Hit::HitI objects.
+
+PullResultI is for fast implementations that only do parsing work on the result
+data when you actually request information by calling one of the ResultI
+methods.
+
+Many methods of ResultI are implemented in a way suitable for inheriting classes
+that use Bio::PullParserI. It only really makes sense for PullResult modules to
+be created by (and have as a -parent) SearchIO modules written using
+PullParserI.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 COPYRIGHT
+
+Copyright (c) 2006 Sendu Bala.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Search::Result::PullResultI;
+
+use strict;
+
+use Bio::Search::GenericStatistics;
+use Bio::Tools::Run::GenericParameters;
+
+use base qw(Bio::PullParserI Bio::Search::Result::ResultI);
+
+=head2 _setup
+
+ Title   : _setup
+ Usage   : $self->_setup(@args)
+ Function: Implementers should call this to setup common fields and deal with
+           common arguments to new().
+ Returns : n/a
+ Args    : @args received in new().
+
+=cut
+
+sub _setup {
+    my ($self, @args) = @_;
+    
+    # fields most subclasses probably will want
+    $self->_fields( { ( next_hit => undef,
+                        num_hits => undef,
+                        hits => undef,
+                        no_hits_found => undef,
+                        query_name => undef,
+                        query_accession => undef,
+                        query_length => undef,
+                        query_description => undef  ) } );
+    
+    my ($parent, $chunk, $params, $stats) = $self->_rearrange([qw(PARENT
+                                                                  CHUNK
+                                                                  PARAMETERS
+                                                                  STATISTICS)],
+                                                              @args);
+    $self->throw("Need -parent or -chunk to be defined") unless $parent || $chunk;
+    
+	$self->parent($parent) if $parent;
+    
+    if ($chunk) {
+        my ($io, $start, $end) = (undef, 0, undef);
+        if (ref($chunk) eq 'ARRAY') {
+            ($io, $start, $end) = @{$chunk};
+        }
+        else {
+            $io = $chunk;
+        }
+        $self->chunk($io, -start => $start, -end => $end);
+    }
+    
+    if (defined $params) {
+        if (ref($params) !~ /hash/i) {
+            $self->throw("Must specify a hash reference with the the parameter '-parameters");
+        }
+        while (my ($key,$value) = each %{$params}) {
+            $self->add_parameter($key, $value);
+        }
+    }
+    if (defined $stats) {
+        if (ref($stats) !~ /hash/i) {
+            $self->throw("Must specify a hash reference with the the parameter '-statistics");
+        }
+        while (my ($key,$value) = each %{$stats}) {
+            $self->add_statistic($key, $value);
+        }
+    }
+}
+
+#
+# Some of these methods are written explitely to avoid ResultI throwing not
+# implemented; if it didn't do that then PullParserI AUTOLOAD would have
+# cought all them.
+#
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+=cut
+
+sub next_hit {
+    return shift->get_field('next_hit');
+}
+
+=head2 sort_hits
+
+ Title		: sort_hits
+ Usage		: $result->sort_hits(\&sort_function)
+ Function	: Sorts the available hit objects by a user-supplied function.
+              Defaults to sort by descending score.
+ Returns	: n/a
+ Args		: A coderef for the sort function. See the documentation on the Perl
+              sort() function for guidelines on writing sort functions.  
+ Note		: To access the special variables $a and $b used by the Perl sort()
+              function the user function must access Bio::Search::Result::ResultI namespace. 
+              For example, use : 
+              $result->sort_hits(sub{$Bio::Search::Result::ResultI::a->length <=> 
+					                 $Bio::Search::Result::ResultI::b->length});
+              NOT $result->sort_hits($a->length <=>$b->length);
+
+=cut
+
+# In ResultI. subclasses will probably want to override since sort_hits normally
+# calls hits().
+
+=head2 query_name
+
+ Title   : query_name
+ Usage   : $id = $result->query_name();
+ Function: Get the string identifier of the query used by the
+           algorithm that performed the search.
+ Returns : a string.
+ Args    : none
+
+=cut
+
+sub query_name {
+    return shift->get_field('query_name');
+}
+
+=head2 query_accession
+
+ Title   : query_accession
+ Usage   : $id = $result->query_accession();
+ Function: Get the accession (if available) for the query sequence
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub query_accession {
+    return shift->get_field('query_accession');
+}
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $id = $result->query_length();
+ Function: Get the length of the query sequence used in the search.
+ Returns : a number
+ Args    : none
+
+=cut
+
+sub query_length {
+    return shift->get_field('query_length');
+}
+
+=head2 query_description
+
+ Title   : query_description
+ Usage   : $id = $result->query_description();
+ Function: Get the description of the query sequence
+           used in the search.
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub query_description {
+    return shift->get_field('query_description');
+}
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $name = $result->database_name()
+ Function: Used to obtain the name of the database that the query was searched
+           against by the algorithm.
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+sub database_name {
+    return shift->get_field('database_name');
+}
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : none
+
+=cut
+
+sub database_letters {
+    return shift->get_field('database_letters');
+}
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : none
+
+=cut
+
+sub database_entries {
+    return shift->get_field('database_entries');
+}
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $result->algorithm
+ Function: Obtain the name of the algorithm used to obtain the Result
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm {
+   return shift->get_field('algorithm');
+}
+
+=head2 algorithm_version
+
+ Title   : algorithm_version
+ Usage   : my $r_version = $result->algorithm_version
+ Function: Obtain the version of the algorithm used to obtain the Result
+ Returns : string (e.g., 2.1.2)
+ Args    : [optional] scalar string to set algorithm version value
+
+=cut
+
+sub algorithm_version {
+   return shift->get_field('algorithm_version');
+}
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : $obj->algorithm_reference($newval)
+ Function: 
+ Returns : value of the literature reference for the algorithm
+ Args    : newvalue (optional)
+ Comments: The default implementation in ResultI returns an empty string
+           rather than throwing a NotImplemented exception, since
+           the ref may not always be available and is not critical.
+
+=cut
+
+sub algorithm_reference {
+   my ($self) = @_;
+   return '';
+}
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount= $result->num_hits
+ Function: returns the number of hits for this query result
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub num_hits {
+   return shift->get_field('num_hits');
+}
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the HitI objects contained within this Result
+ Returns : Array of Bio::Search::Hit::HitI objects
+ Args    : none
+
+See Also: L<Bio::Search::Hit::HitI>
+
+=cut
+
+sub hits {
+   return shift->get_field('hits');
+}
+
+=head2 no_hits_found
+
+ Usage     : $nohits = $blast->no_hits_found();
+ Function  : Get boolean indicator indicating whether or not any hits
+             were present in the report.
+
+             This is NOT the same as determining the number of hits via
+             the hits() method, which will return zero hits if there were no
+             hits in the report or if all hits were filtered out during the
+             parse.
+
+             Thus, this method can be used to distinguish these possibilities
+             for hitless reports generated when filtering.
+
+ Returns   : Boolean
+ Args      : none
+
+=cut
+
+sub no_hits_found {
+    return shift->get_field('no_hits_found');
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $result->rewind;
+ Function: Allow one to reset the Hit iterator to the beginning
+           Since this is an in-memory implementation
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind {
+   shift->throw_not_implemented();
+}
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $result->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this result
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+sub get_parameter {
+    my ($self, $param) = @_;
+    $param || return;
+    return unless defined $self->{_parameters};
+    return $self->{_parameters}->get_parameter($param);
+}
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $result->available_parameters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this result
+ Args    : none
+
+=cut
+
+sub available_parameters {
+    my $self = shift;
+    return () unless defined $self->{_parameters};
+    return $self->{_parameters}->available_parameters;
+}
+
+=head2 add_parameter
+
+ Title   : add_parameter
+ Usage   : $report->add_parameter('gapext', 11);
+ Function: Adds a parameter
+ Returns : none
+ Args    : key  - key value name for this parama
+           value - value for this parameter
+
+=cut
+
+sub add_parameter {
+    my ($self, $key, $value) = @_;
+    unless (exists $self->{_parameters}) {
+        $self->{_parameters} = new Bio::Tools::Run::GenericParameters;
+    }
+    $self->{_parameters}->set_parameter($key => $value);
+}
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $result->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this result
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+sub get_statistic {
+    my ($self, $stat) = @_;
+    $stat || return;
+    return unless defined $self->{_statistics};
+    return $self->{_statistics}->get_statistic($stat);
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $result->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this result
+ Args    : none
+
+=cut
+
+sub available_statistics {
+    my $self = shift;
+    return () unless defined $self->{_statistics};
+    return $self->{_statistics}->available_statistics;
+}
+
+=head2 add_statistic
+
+ Title   : add_statistic
+ Usage   : $report->add_statistic('lambda', 2.3);
+ Function: Adds a statistic
+ Returns : none
+ Args    : key  - key value name for this statistic
+           value - value for this statistic
+
+=cut
+
+sub add_statistic {
+    my ($self, $key, $value) = @_;
+    unless (exists $self->{_statistics}) {
+        $self->{_statistics} = new Bio::Search::GenericStatistics;
+    }
+    $self->{_statistics}->set_statistic($key => $value);
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/PullResultI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: ResultFactory.pm,v 1.6.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::ResultFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::ResultFactory - A factory to create Bio::Search::Result::ResultI objects 
+
+=head1 SYNOPSIS
+
+    use Bio::Search::Result::ResultFactory;
+    my $factory = new Bio::Search::Result::ResultFactory();
+    my $resultobj = $factory->create(@args);
+
+=head1 DESCRIPTION
+
+This is a general way of hiding the object creation process so that we
+can dynamically change the objects that are created by the SearchIO
+parser depending on what format report we are parsing.
+
+This object is for creating new Results.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::ResultFactory;
+use vars qw($DEFAULT_TYPE);
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectFactoryI);
+
+BEGIN { 
+    $DEFAULT_TYPE = 'Bio::Search::Result::GenericResult'; 
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::ResultFactory();
+ Function: Builds a new Bio::Search::Result::ResultFactory object 
+ Returns : Bio::Search::Result::ResultFactory
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($type) = $self->_rearrange([qw(TYPE)], at args);
+  $self->type($type) if defined $type;
+  return $self;
+}
+
+=head2 create
+
+ Title   : create
+ Usage   : $factory->create(%args)
+ Function: Create a new L<Bio::Search::Result::ResultI> object  
+ Returns : L<Bio::Search::Result::ResultI>
+ Args    : hash of initialization parameters
+
+
+=cut
+
+sub create{
+   my ($self, at args) = @_;
+   my $type = $self->type;
+   eval { $self->_load_module($type) };
+   if( $@ ) { $self->throw("Unable to load module $type: $@"); }
+   return $type->new(@args);
+}
+
+
+=head2 type
+
+ Title   : type
+ Usage   : $factory->type('Bio::Search::Result::GenericResult');
+ Function: Get/Set the Result creation type
+ Returns : string
+ Args    : [optional] string to set 
+
+
+=cut
+
+sub type{
+    my ($self,$type) = @_;
+   if( defined $type ) { 
+       # redundancy with the create method which also calls _load_module
+       # I know - but this is not a highly called object so I am going 
+       # to leave it in
+       eval {$self->_load_module($type) };
+       if( $@ ){ $self->warn("Cannot find module $type, unable to set type"); }
+       else { $self->{'_type'} = $type; }
+   } 
+    return $self->{'_type'} || $DEFAULT_TYPE;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/ResultI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,494 @@
+#-----------------------------------------------------------------
+# $Id: ResultI.pm,v 1.23.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module Bio::Search::Result::ResultI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# Originally created by Aaron Mackey <amackey at virginia.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::ResultI - Abstract interface to Search Result objects
+
+=head1 SYNOPSIS
+
+# Bio::Search::Result::ResultI objects cannot be instantiated since this
+# module defines a pure interface.
+
+# Given an object that implements the Bio::Search::Result::ResultI  interface,
+# you can do the following things with it:
+
+    use Bio::SearchIO;
+    my $io = new Bio::SearchIO(-format => 'blast',
+                                -file   => 't/data/HUMBETGLOA.tblastx');
+    my $result = $io->next_result;
+    while( $hit = $result->next_hit()) { # enter code here for hit processing
+    }
+
+    my $id = $result->query_name();
+
+    my $desc = $result->query_description();
+
+    my $dbname = $result->database_name();
+
+    my $size = $result->database_letters();
+
+    my $num_entries = $result->database_entries();
+
+    my $gap_ext = $result->get_parameter('gapext');
+
+    my @params = $result->available_parameters;
+
+    my $kappa = $result->get_statistic('kappa');
+
+    my @statnames = $result->available_statistics;
+
+
+=head1 DESCRIPTION
+
+Bio::Search::Result::ResultI objects are data structures containing
+the results from the execution of a search algorithm.  As such, it may
+contain various algorithm specific information as well as details of
+the execution, but will contain a few fundamental elements, including
+the ability to return Bio::Search::Hit::HitI objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Aaron Mackey E<lt>amackey at virginia.eduE<gt>  (original author)
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 1999-2001 Aaron Mackey, Steve Chervitz. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::Search::Result::ResultI;
+
+use strict;
+
+
+use base qw(Bio::AnalysisResultI);
+
+
+=head2 next_hit
+
+ Title   : next_hit
+ Usage   : while( $hit = $result->next_hit()) { ... }
+ Function: Returns the next available Hit object, representing potential
+           matches between the query and various entities from the database.
+ Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
+ Args    : none
+
+
+=cut
+
+sub next_hit {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 sort_hits
+
+ Title		: sort_hits
+ Usage		: $result->sort_hits(\&sort_function)
+ Function	: Sorts the available hit objects by a user-supplied function. Defaults to sort
+                  by descending score.
+ Returns	: n/a
+ Args		: A coderef for the sort function.  See the documentation on the Perl sort() 
+                  function for guidelines on writing sort functions.  
+ Note		: To access the special variables $a and $b used by the Perl sort() function 
+                  the user function must access Bio::Search::Result::ResultI namespace. 
+                  For example, use : 
+                  $result->sort_hits( sub{$Bio::Search::Result::ResultI::a->length <=> 
+					      $Bio::Search::Result::ResultI::b->length});
+                   NOT $result->sort_hits($a->length <=>$b->length);
+
+=cut
+
+sub sort_hits {
+    my ($self, $coderef) = @_;
+    my @sorted_hits;
+
+    if ($coderef)  {
+	$self->throw('sort_hits requires a sort function passed as a subroutine reference')
+	    unless (ref($coderef) eq 'CODE');
+    }
+    else {
+	$coderef = \&_default_sort_hits;
+	# throw a warning?
+    }
+
+    my @hits = $self->hits();
+    
+    eval {@sorted_hits = sort $coderef @hits };
+
+   if ($@) {
+       $self->throw("Unable to sort hits: $@");
+   }
+   else {
+       $self->{'_hits'} = \@sorted_hits;
+       $self->{'_no_iterations'} = 1; # to bypass iteration checking in hits() method
+       1;
+   }
+}
+
+=head2 _default sort_hits
+
+  Title	: _default_sort_hits
+  Usage	: Do not call directly.
+  Function: Sort hits in descending order by score
+  Args	: None
+  Returns: 1 on success
+  Note	: Used by $result->sort_hits()
+
+=cut
+
+sub _default_sort_hits {
+    $Bio::Search::Result::ResultI::b->score <=> 
+	    $Bio::Search::Result::ResultI::a->score;
+
+}
+
+=head2 query_name
+
+ Title   : query_name
+ Usage   : $id = $result->query_name();
+ Function: Get the string identifier of the query used by the
+           algorithm that performed the search.
+ Returns : a string.
+ Args    : none
+
+=cut
+
+sub query_name {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 query_accession
+
+ Title   : query_accession
+ Usage   : $id = $result->query_accession();
+ Function: Get the accession (if available) for the query sequence
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub query_accession {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 query_length
+
+ Title   : query_length
+ Usage   : $id = $result->query_length();
+ Function: Get the length of the query sequence
+           used in the search.
+ Returns : a number
+ Args    : none
+
+=cut
+
+sub query_length {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 query_description
+
+ Title   : query_description
+ Usage   : $id = $result->query_description();
+ Function: Get the description of the query sequence
+           used in the search.
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub query_description {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented;
+}
+
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $name = $result->database_name()
+ Function: Used to obtain the name of the database that the query was searched
+           against by the algorithm.
+ Returns : a scalar string
+ Args    : none
+
+=cut
+
+sub database_name {
+    my ($self, at args) = @_;
+
+    $self->throw_not_implemented;
+}
+
+=head2 database_letters
+
+ Title   : database_letters
+ Usage   : $size = $result->database_letters()
+ Function: Used to obtain the size of database that was searched against.
+ Returns : a scalar integer (units specific to algorithm, but probably the
+           total number of residues in the database, if available) or undef if
+           the information was not available to the Processor object.
+ Args    : none
+
+
+=cut
+
+sub database_letters {
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 database_entries
+
+ Title   : database_entries
+ Usage   : $num_entries = $result->database_entries()
+ Function: Used to obtain the number of entries contained in the database.
+ Returns : a scalar integer representing the number of entities in the database
+           or undef if the information was not available.
+ Args    : none
+
+
+=cut
+
+sub database_entries {
+    my ($self, at args) = @_;
+
+    $self->throw_not_implemented();
+}
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : my $gap_ext = $result->get_parameter('gapext')
+ Function: Returns the value for a specific parameter used
+           when running this result
+ Returns : string
+ Args    : name of parameter (string)
+
+=cut
+
+sub get_parameter{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @params = $result->available_parameters
+ Function: Returns the names of the available parameters
+ Returns : Return list of available parameters used for this result
+ Args    : none
+
+=cut
+
+sub available_parameters{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : my $gap_ext = $result->get_statistic('kappa')
+ Function: Returns the value for a specific statistic available 
+           from this result
+ Returns : string
+ Args    : name of statistic (string)
+
+=cut
+
+sub get_statistic{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 available_statistics
+
+ Title   : available_statistics
+ Usage   : my @statnames = $result->available_statistics
+ Function: Returns the names of the available statistics
+ Returns : Return list of available statistics used for this result
+ Args    : none
+
+=cut
+
+sub available_statistics{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 algorithm
+
+ Title   : algorithm
+ Usage   : my $r_type = $result->algorithm
+ Function: Obtain the name of the algorithm used to obtain the Result
+ Returns : string (e.g., BLASTP)
+ Args    : [optional] scalar string to set value
+
+=cut
+
+sub algorithm{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 algorithm_version
+
+ Title   : algorithm_version
+ Usage   : my $r_version = $result->algorithm_version
+ Function: Obtain the version of the algorithm used to obtain the Result
+ Returns : string (e.g., 2.1.2)
+ Args    : [optional] scalar string to set algorithm version value
+
+=cut
+
+sub algorithm_version{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : $obj->algorithm_reference($newval)
+ Function: 
+ Returns : value of the literature reference for the algorithm
+ Args    : newvalue (optional)
+ Comments: The default implementation in ResultI returns an empty string
+           rather than throwing a NotImplemented exception, since
+           the ref may not always be available and is not critical.
+
+=cut
+
+sub algorithm_reference{
+   my ($self) = @_;
+   return '';
+}
+
+=head2 num_hits
+
+ Title   : num_hits
+ Usage   : my $hitcount= $result->num_hits
+ Function: returns the number of hits for this query result
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub num_hits{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 hits
+
+ Title   : hits
+ Usage   : my @hits = $result->hits
+ Function: Returns the HitI objects contained within this Result
+ Returns : Array of Bio::Search::Hit::HitI objects
+ Args    : none
+
+See Also: L<Bio::Search::Hit::HitI>
+
+=cut
+
+sub hits{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 no_hits_found
+
+ Usage     : $nohits = $blast->no_hits_found();
+ Purpose   : Get boolean indicator indicating whether or not any hits
+             were present in the report.
+
+             This is NOT the same as determining the number of hits via
+             the hits() method, which will return zero hits if there were no
+             hits in the report or if all hits were filtered out during the parse.
+
+             Thus, this method can be used to distinguish these possibilities
+             for hitless reports generated when filtering.
+
+ Returns   : Boolean
+ Argument  : none
+
+=cut
+
+#-----------
+sub no_hits_found { shift->throw_not_implemented }
+
+
+
+=head2 set_no_hits_found
+
+ Usage     : $blast->set_no_hits_found(); 
+ Purpose   : Set boolean indicator indicating whether or not any hits
+             were present in the report.
+ Returns   : n/a
+ Argument  : none
+
+=cut
+
+sub set_no_hits_found { shift->throw_not_implemented }
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/WABAResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/WABAResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/Result/WABAResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+# $Id: WABAResult.pm,v 1.5.4.1 2006/10/02 23:10:24 sendu Exp $
+#
+# BioPerl module for Bio::Search::Result::WABAResult
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::Result::WABAResult - Result object for WABA alignment output
+
+=head1 SYNOPSIS
+
+# use this object exactly as you would a GenericResult
+# the only extra method is query_database which is the 
+# name of the file where the query sequence came from
+
+=head1 DESCRIPTION
+
+This object is for WABA result output, there is little difference
+between this object and a GenericResult save the addition of one
+method query_database.  Expect many of the fields for GenericResult to
+be empty however as WABA was not intended to provide a lot of extra
+information other than the alignment.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::Result::WABAResult;
+use strict;
+
+
+use base qw(Bio::Search::Result::GenericResult);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Search::Result::WABAResult();
+ Function: Builds a new Bio::Search::Result::WABAResult object 
+ Returns : Bio::Search::Result::WABAResult
+ Args    : -query_database => "name of the database where the query came from"
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($db) = $self->_rearrange([qw(QUERY_DATABASE)], @args);
+  defined $db && $self->query_database($db);
+  return $self;
+}
+
+=head2 query_database
+
+ Title   : query_database
+ Usage   : $obj->query_database($newval)
+ Function: Data field for the database filename where the 
+           query sequence came from
+ Returns : value of query_database
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub query_database{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'query_database'} = $value;
+    }
+    return $self->{'query_database'};
+}
+
+
+=head2 All other methods are inherited from Bio::Search::Result::GenericResult
+
+See the L<Bio::Search::Result::GenericResult> for complete
+documentation of the rest of the methods that are available for this
+module.
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/SearchUtils.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/SearchUtils.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/SearchUtils.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,704 @@
+=head1 NAME
+
+Bio::Search::SearchUtils - Utility functions for Bio::Search:: objects
+
+=head1 SYNOPSIS
+
+  # This module is just a collection of subroutines, not an object.
+
+=head1 DESCRIPTION
+
+The SearchUtils.pm module is a collection of subroutines used
+primarily by Bio::Search::Hit::HitI objects for some of the additional
+functionality, such as HSP tiling. Right now, the SearchUtils is just
+a collection of methods, not an object.
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=cut
+
+package Bio::Search::SearchUtils;
+use Bio::Root::Version;
+
+use strict;
+
+=head2 tile_hsps
+
+ Usage     : tile_hsps( $sbjct );
+           : This is called automatically by methods in Bio::Search::Hit::GenericHit 
+           : that rely on having tiled data.
+           :
+           : If you are interested in getting data about the constructed HSP contigs:
+           : my ($qcontigs, $scontigs) = Bio::Search::SearchUtils::tile_hsps($hit);
+           : if (ref $qcontigs) {
+           :    print STDERR "Query contigs:\n";
+           :    foreach (@{$qcontigs}) {
+           :         print "contig start is $_->{'start'}\n";
+           :         print "contig stop is $_->{'stop'}\n";
+           :    }
+           : }
+           : See below for more information about the contig data structure.
+           :
+ Purpose   : Collect statistics about the aligned sequences in a set of HSPs.
+           : Calculates the following data across all HSPs: 
+           :    -- total alignment length 
+           :    -- total identical residues 
+           :    -- total conserved residues
+ Returns   : If there was only a single HSP (so no tiling was necessary)
+               tile_hsps() returns a list of two non-zero integers.
+             If there were multiple HSP, 
+               tile_hsps() returns a list of two array references containin HSP contig data.
+             The first array ref contains a list of HSP contigs on the query sequence.
+             The second array ref contains a list of HSP contigs on the subject sequence.
+             Each contig is a hash reference with the following data fields:
+               'start' => start coordinate of the contig
+               'stop'  => start coordinate of the contig
+               'iden'  => number of identical residues in the contig
+               'cons'  => number of conserved residues in the contig
+               'strand'=> strand of the contig
+               'frame' => frame of the contig
+ Argument  : A Bio::Search::Hit::HitI object 
+ Throws    : n/a
+ Comments  :
+           : This method performs more careful summing of data across
+           : all HSPs in the Sbjct object. Only HSPs that are in the same strand 
+           : and frame are tiled. Simply summing the data from all HSPs
+           : in the same strand and frame will overestimate the actual 
+           : length of the alignment if there is overlap between different HSPs 
+           : (often the case).
+           :
+           : The strategy is to tile the HSPs and sum over the
+           : contigs, collecting data separately from overlapping and
+           : non-overlapping regions of each HSP. To facilitate this, the
+           : HSP.pm object now permits extraction of data from sub-sections
+           : of an HSP.
+           : 
+           : Additional useful information is collected from the results
+           : of the tiling. It is possible that sub-sequences in
+           : different HSPs will overlap significantly. In this case, it
+           : is impossible to create a single unambiguous alignment by
+           : concatenating the HSPs. The ambiguity may indicate the
+           : presence of multiple, similar domains in one or both of the
+           : aligned sequences. This ambiguity is recorded using the
+           : ambiguous_aln() method.
+           : 
+           : This method does not attempt to discern biologically
+           : significant vs. insignificant overlaps. The allowable amount of 
+           : overlap can be set with the overlap() method or with the -OVERLAP
+           : parameter used when constructing the Hit object.
+           : 
+           : For a given hit, both the query and the sbjct sequences are
+           : tiled independently.
+           : 
+           :    -- If only query sequence HSPs overlap, 
+           :          this may suggest multiple domains in the sbjct.
+           :    -- If only sbjct sequence HSPs overlap, 
+           :          this may suggest multiple domains in the query.
+           :    -- If both query & sbjct sequence HSPs overlap, 
+           :          this suggests multiple domains in both.
+           :    -- If neither query & sbjct sequence HSPs overlap, 
+           :          this suggests either no multiple domains in either
+           :          sequence OR that both sequences have the same
+           :          distribution of multiple similar domains.
+           : 
+           : This method can deal with the special case of when multiple
+           : HSPs exactly overlap.
+           : 
+           : Efficiency concerns:
+           :  Speed will be an issue for sequences with numerous HSPs.
+           : 
+ Bugs      : Currently, tile_hsps() does not properly account for
+           : the number of non-tiled but overlapping HSPs, which becomes a problem
+           : as overlap() grows. Large values overlap() may thus lead to 
+           : incorrect statistics for some hits. For best results, keep overlap()
+           : below 5 (DEFAULT IS 2). For more about this, see the "HSP Tiling and
+           : Ambiguous Alignments" section in L<Bio::Search::Hit::GenericHit>.
+
+See Also   : L<_adjust_contigs>(), L<Bio::Search::Hit::GenericHit|Bio::Search::Hit::GenericHit>
+
+=cut
+
+#--------------
+sub tile_hsps {
+#--------------
+    my $sbjct = shift;
+
+    #print STDERR "Calling tile_hsps(): $sbjct\n";
+    #$sbjct->verbose(1);  # to activate debugging
+    $sbjct->tiled_hsps(1);
+
+    if( $sbjct->num_hsps == 0 || $sbjct->n == 0 ) { 		
+	#print STDERR "_tile_hsps(): no hsps, nothing to tile! (", $sbjct->num_hsps, ")\n";
+        _warn_about_no_hsps($sbjct);
+        return (undef, undef);
+
+    } elsif( $sbjct->n == 1 or $sbjct->num_hsps == 1) {
+        ## Simple summation scheme. Valid if there is only one HSP.
+	#print STDERR "_tile_hsps(): single HSP, easy stats.\n";
+	my $hsp = $sbjct->hsp;
+	$sbjct->length_aln('query', $hsp->length('query'));
+	$sbjct->length_aln('hit', $hsp->length('sbjct'));
+	$sbjct->length_aln('total', $hsp->length('total'));
+	$sbjct->matches( $hsp->matches() );
+	$sbjct->gaps('query', $hsp->gaps('query'));
+	$sbjct->gaps('sbjct', $hsp->gaps('sbjct'));
+
+        _adjust_length_aln($sbjct);
+	return (1, 1);
+    } else {
+	#print STDERR "Sbjct: _tile_hsps: summing multiple HSPs\n";
+	$sbjct->length_aln('query', 0);
+	$sbjct->length_aln('sbjct', 0);
+	$sbjct->length_aln('total', 0); 
+ 	$sbjct->matches( 0, 0);
+        $sbjct->gaps('query', 0);
+        $sbjct->gaps('hit', 0);
+    }
+
+    ## More than one HSP. Must tile HSPs.
+#    print "\nTiling HSPs for $sbjct\n";
+    my($hsp, $qstart, $qstop, $sstart, $sstop);
+    my($frame, $strand, $qstrand, $sstrand);
+    my(@qcontigs, @scontigs);
+    my $qoverlap = 0;
+    my $soverlap = 0;
+    my $max_overlap = $sbjct->overlap;
+    my $hit_qgaps = 0;
+    my $hit_sgaps = 0;
+    my $hit_len_aln = 0;
+    my %start_stop;
+    my $v = $sbjct->verbose;
+    foreach $hsp ( $sbjct->hsps() ) {
+	#$sbjct->debug( sprintf("  HSP: %s %d..%d\n",$hsp->query->seq_id, $hsp->query->start, $hsp->hit->end)) if $v > 0; #$hsp->str('query');
+#	printf "  Length = %d; Identical = %d; Conserved = %d; Conserved(1-10): %d",$hsp->length, $hsp->length(-TYPE=>'iden'), 
+#	$hsp->length(-TYPE=>'cons'),
+#	$hsp->length(-TYPE=>'cons',
+#		     -START=>0,-STOP=>10); 
+
+	($qstart, $qstop) = $hsp->range('query');
+	($sstart, $sstop) = $hsp->range('sbjct');
+	$frame = $hsp->frame;
+	$frame = -1 unless defined $frame;
+	
+	($qstrand, $sstrand) = ($hsp->query->strand,
+				$hsp->hit->strand);
+
+        # Note: No correction for overlap.
+	
+	my ($qgaps, $sgaps)  = ($hsp->gaps('query'), $hsp->gaps('hit'));
+	$hit_qgaps += $qgaps;
+	$hit_sgaps += $sgaps;
+	$hit_len_aln += $hsp->length;
+
+	## Collect contigs in the query sequence.
+ 	$qoverlap = &_adjust_contigs('query', $hsp, $qstart, $qstop, 
+				     \@qcontigs, $max_overlap, $frame, 
+				     $qstrand);
+
+	## Collect contigs in the sbjct sequence 
+	#  (needed for domain data and gapped Blast).
+	$soverlap = &_adjust_contigs('sbjct', $hsp, $sstart, $sstop, 
+				     \@scontigs, $max_overlap, $frame, 
+				     $sstrand);
+
+	## Collect overall start and stop data for query and 
+	#  sbjct over all HSPs.
+	unless ( defined $start_stop{'qstart'} ) {
+            $start_stop{'qstart'}  = $qstart;
+            $start_stop{'qstop'}   = $qstop;
+            $start_stop{'sstart'}  = $sstart;
+            $start_stop{'sstop'}   = $sstop;
+	} else {
+	    $start_stop{'qstart'} = ($qstart < $start_stop{'qstart'} ? 
+				     $qstart : $start_stop{'qstart'} );
+	    $start_stop{'qstop'}  = ($qstop  > $start_stop{'qstop'} ? 
+				     $qstop  : $start_stop{'qstop'} );
+	    $start_stop{'sstart'} = ($sstart < $start_stop{'sstart'} ? 
+				     $sstart : $start_stop{'sstart'} );
+	    $start_stop{'sstop'}  = ($sstop  > $start_stop{'sstop'} ? 
+				     $sstop  : $start_stop{'sstop'} );
+	}	    
+    }
+
+    # Store the collected data in the Hit object
+    $sbjct->gaps('query', $hit_qgaps);
+    $sbjct->gaps('hit', $hit_sgaps);
+    $sbjct->length_aln('total', $hit_len_aln);
+    
+    $sbjct->start('query',$start_stop{'qstart'});
+    $sbjct->end('query', $start_stop{'qstop'});
+    $sbjct->start('hit', $start_stop{'sstart'});
+    $sbjct->end('hit', $start_stop{'sstop'});
+    ## Collect data across the collected contigs.
+
+    #$sbjct->debug( "\nQUERY CONTIGS:\n"."  gaps = $sbjct->{'_gaps_query'}\n");
+
+    # Account for strand/frame.
+    # Strategy: collect data on a per strand+frame basis and 
+    #           save the most significant one.
+    my (%qctg_dat);
+    foreach (@qcontigs) {
+        ($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
+        
+        if( $v > 0 ) {
+            #$sbjct->debug(sprintf( "$frame/$strand len is getting %d for %d..%d\n", 
+            #			   ($_->{'stop'} - $_->{'start'} + 1), $_->{'start'}, $_->{'stop'}));
+        }
+        
+        $qctg_dat{ "$frame$strand" }->{'length_aln_query'} += $_->{'stop'} - $_->{'start'} + 1;
+        $qctg_dat{ "$frame$strand" }->{'totalIdentical'}   += $_->{'iden'};
+        $qctg_dat{ "$frame$strand" }->{'totalConserved'}   += $_->{'cons'};
+        $qctg_dat{ "$frame$strand" }->{'qstrand'}   = $strand;
+    }
+
+    # Find longest contig.
+    my @sortedkeys = sort { $qctg_dat{$b}->{'length_aln_query'} 
+			    <=> $qctg_dat{$a}->{'length_aln_query'} }
+    keys %qctg_dat;
+
+    # Save the largest to the sbjct:
+    my $longest = $sortedkeys[0];
+    #$sbjct->debug( "longest is ". $qctg_dat{ $longest }->{'length_aln_query'}. "\n");
+    $sbjct->length_aln('query', $qctg_dat{ $longest }->{'length_aln_query'});
+    $sbjct->matches($qctg_dat{ $longest }->{'totalIdentical'},
+                    $qctg_dat{ $longest }->{'totalConserved'});
+    $sbjct->strand('query', $qctg_dat{ $longest }->{'qstrand'});
+
+    ## Collect data for sbjct contigs. Important for gapped Blast.
+    ## The totalIdentical and totalConserved numbers will be the same
+    ## as determined for the query contigs.
+
+    #$sbjct->debug( "\nSBJCT CONTIGS:\n"."  gaps = ". $sbjct->gaps('sbjct'). "\n");
+    my (%sctg_dat);
+    foreach(@scontigs) {
+	#$sbjct->debug("  sbjct contig: $_->{'start'} - $_->{'stop'}\n".
+	#	     "         iden = $_->{'iden'}; cons = $_->{'cons'}\n");
+	($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
+	$sctg_dat{ "$frame$strand" }->{'length_aln_sbjct'}   += $_->{'stop'} - $_->{'start'} + 1;
+	$sctg_dat{ "$frame$strand" }->{'frame'}  = $frame;
+	$sctg_dat{ "$frame$strand" }->{'sstrand'}  = $strand;
+    }
+    
+    @sortedkeys = sort { $sctg_dat{ $b }->{'length_aln_sbjct'} 
+			 <=> $sctg_dat{ $a }->{'length_aln_sbjct'} 
+		     } keys %sctg_dat;
+
+    # Save the largest to the sbjct:
+    $longest = $sortedkeys[0];
+
+    $sbjct->length_aln('sbjct', $sctg_dat{ $longest }->{'length_aln_sbjct'});
+    $sbjct->frame( $sctg_dat{ $longest }->{'frame'} );
+    $sbjct->strand('hit', $sctg_dat{ $longest }->{'sstrand'});
+
+    if($qoverlap) {
+	if($soverlap) { $sbjct->ambiguous_aln('qs'); 
+			#$sbjct->debug("\n*** AMBIGUOUS ALIGNMENT: Query and Sbjct\n\n");
+		    }
+	else { $sbjct->ambiguous_aln('q');
+	       #$sbjct->debug( "\n*** AMBIGUOUS ALIGNMENT: Query\n\n");
+	   }
+    } elsif($soverlap) { 
+	$sbjct->ambiguous_aln('s'); 
+	#$sbjct->debug( "\n*** AMBIGUOUS ALIGNMENT: Sbjct\n\n");
+    }
+
+    _adjust_length_aln($sbjct);
+
+    return ( [@qcontigs], [@scontigs] );
+}
+
+
+
+# Title    : _adjust_length_aln  
+# Usage    : n/a; internal use only; called by tile_hsps.
+# Purpose  : Adjust length of aligment based on BLAST flavor.
+# Comments : See comments in logica_length()
+sub _adjust_length_aln {
+    my $sbjct = shift;
+    my $algo = $sbjct->algorithm;
+    my $hlen = $sbjct->length_aln('sbjct');
+    my $qlen = $sbjct->length_aln('query');
+
+    $sbjct->length_aln('sbjct', logical_length($algo, 'sbjct', $hlen));
+    $sbjct->length_aln('query', logical_length($algo, 'query', $qlen));
+}
+
+=head2 logical_length
+
+ Usage     : logical_length( $alg_name, $seq_type, $length );
+ Purpose   : Determine the logical length of an aligned sequence based on 
+           : algorithm name and sequence type.
+ Returns   : integer representing the logical aligned length.
+ Argument  : $alg_name = name of algorigthm (e.g., blastx, tblastn)
+           : $seq_type = type of sequence (e.g., query or hit)
+           : $length = physical length of the sequence in the alignment.
+ Throws    : n/a
+ Comments  : This function is used to account for the fact that number of identities 
+             and conserved residues is reported in peptide space while the query 
+             length (in the case of BLASTX and TBLASTX) and/or the hit length 
+             (in the case of TBLASTN and TBLASTX) are in nucleotide space.
+             The adjustment affects the values reported by the various frac_XXX 
+             methods in GenericHit and GenericHSP.
+
+=cut
+
+sub logical_length {
+    my ($algo, $type, $len) = @_;
+    my $logical = $len;
+    if($algo =~ /^(?:PSI)?T(?:BLASTN|FAST(?:X|Y|XY))/oi ) {
+        $logical = $len/3 if $type =~ /sbjct|hit|tot/i;
+    } elsif($algo =~ /^(?:BLASTX|FAST(?:X|Y|XY))/oi ) {
+        $logical = $len/3 if $type =~ /query|tot/i;
+    } elsif($algo =~ /^TBLASTX/oi ) {
+        $logical = $len/3;
+    }
+    return $logical;
+}
+
+
+#=head2 _adjust_contigs
+#
+# Usage     : n/a; internal function called by tile_hsps
+# Purpose   : Builds HSP contigs for a given BLAST hit.
+#           : Utility method called by _tile_hsps()
+# Returns   : 
+# Argument  : 
+# Throws    : Exceptions propagated from Bio::Search::Hit::BlastHSP::matches()
+#           : for invalid sub-sequence ranges.
+# Status    : Experimental
+# Comments  : This method does not currently support gapped alignments.
+#           : Also, it does not keep track of the number of HSPs that
+#           : overlap within the amount specified by overlap().
+#           : This will lead to significant tracking errors for large
+#           : overlap values.
+#
+#See Also   : L<tile_hsps>(), L<Bio::Search::Hit::BlastHSP::matches|Bio::Search::Hit::BlastHSP>
+#
+#=cut
+
+sub _adjust_contigs {
+    my ($seqType, $hsp, $start, $stop, $contigs_ref, 
+	$max_overlap, $frame, $strand) = @_;
+    my $overlap = 0;
+    my ($numID, $numCons);
+    
+    foreach (@$contigs_ref) {
+        # Don't merge things unless they have matching strand/frame.
+        next unless ($_->{'frame'} == $frame && $_->{'strand'} == $strand);
+        
+        # Test special case of a nested HSP. Skip it.
+        if ($start >= $_->{'start'} && $stop <= $_->{'stop'}) {
+            $overlap = 1; 
+            next;
+        }
+        
+        # Test for overlap at beginning of contig, or precedes consecutively
+        if ($start < $_->{'start'} && $stop >= ($_->{'start'} + $max_overlap - 1)) {
+            eval {
+                ($numID, $numCons) = $hsp->matches(-SEQ   =>$seqType, 
+                               -START => $start, 
+                               -STOP  => $_->{'start'} - 1); 
+            };
+            if($@) { warn "\a\n$@\n"; }
+            else {
+                $_->{'start'} = $start;	# Assign a new start coordinate to the contig
+                $_->{'iden'} += $numID;	# and add new data to #identical, #conserved.
+                $_->{'cons'} += $numCons;
+                push(@{$_->{hsps}}, $hsp);
+                $overlap     = 1; 
+            }
+        }
+        
+        # Test for overlap at end of contig, or follows consecutively
+        if ($stop > $_->{'stop'} and $start <= ($_->{'stop'} - $max_overlap + 1)) {
+            eval {
+                ($numID,$numCons) = $hsp->matches(-SEQ   =>$seqType, 
+                              -START => $_->{'stop'} + 1, 
+                              -STOP  => $stop); 
+            };
+            if($@) { warn "\a\n$@\n"; }
+            else {
+                $_->{'stop'}  = $stop; # Assign a new stop coordinate to the contig
+                $_->{'iden'} += $numID;	# and add new data to #identical, #conserved.
+                $_->{'cons'} += $numCons;
+                push(@{$_->{hsps}}, $hsp);
+                $overlap    = 1; 
+            }
+        }
+        
+        last if $overlap;
+    }
+    
+    if ($overlap && @$contigs_ref > 1) {
+        ## Merge any contigs that now overlap
+        my $max = $#{$contigs_ref};
+        for my $i (0..$max) {
+            ${$contigs_ref}[$i] || next;
+            my ($i_start, $i_stop) = (${$contigs_ref}[$i]->{start}, ${$contigs_ref}[$i]->{stop});
+            
+            for my $u ($i+1..$max) {
+                ${$contigs_ref}[$u] || next;
+                my ($u_start, $u_stop) = (${$contigs_ref}[$u]->{start}, ${$contigs_ref}[$u]->{stop});
+                
+                if ($u_start < $i_start && $u_stop >= ($i_start + $max_overlap - 1)) {
+                    # find the hsps within the contig that have sequence
+                    # extending before $i_start
+                    my ($ids, $cons) = (0, 0);
+                    my $use_start = $i_start;
+                    foreach my $hsp (sort { $b->end <=> $a->end } @{${$contigs_ref}[$u]->{hsps}}) {
+                        my $hsp_start = $hsp->start;
+                        $hsp_start < $use_start || next;
+                        
+                        my ($these_ids, $these_cons);
+                        eval {
+                            ($these_ids, $these_cons) = $hsp->matches(-SEQ => $seqType, -START => $hsp_start, -STOP => $use_start - 1);
+                        };
+                        if($@) { warn "\a\n$@\n"; }
+                        else {
+                            $ids  += $these_ids;
+                            $cons += $these_cons;
+                        }
+                        
+                        last if $hsp_start == $u_start;
+                        $use_start = $hsp_start;
+                    }
+                    ${$contigs_ref}[$i]->{start} = $u_start;
+                    ${$contigs_ref}[$i]->{'iden'} += $ids;
+                    ${$contigs_ref}[$i]->{'cons'} += $cons;
+                    push(@{${$contigs_ref}[$i]->{hsps}}, @{${$contigs_ref}[$u]->{hsps}});
+                    
+                    ${$contigs_ref}[$u] = undef;
+                }
+                elsif ($u_stop > $i_stop && $u_start <= ($i_stop - $max_overlap + 1)) {
+                    # find the hsps within the contig that have sequence
+                    # extending beyond $i_stop
+                    my ($ids, $cons) = (0, 0);
+                    my $use_stop = $i_stop;
+                    foreach my $hsp (sort { $a->start <=> $b->start } @{${$contigs_ref}[$u]->{hsps}}) {
+                        my $hsp_end = $hsp->end;
+                        $hsp_end > $use_stop || next;
+                        
+                        my ($these_ids, $these_cons);
+                        eval {
+                            ($these_ids, $these_cons) = $hsp->matches(-SEQ => $seqType, -START => $use_stop + 1, -STOP => $hsp_end);
+                        };
+                        if($@) { warn "\a\n$@\n"; }
+                        else {
+                            $ids  += $these_ids;
+                            $cons += $these_cons;
+                        }
+                        
+                        last if $hsp_end == $u_stop;
+                        $use_stop = $hsp_end;
+                    }
+                    ${$contigs_ref}[$i]->{'stop'}  = $u_stop;
+                    ${$contigs_ref}[$i]->{'iden'} += $ids;
+                    ${$contigs_ref}[$i]->{'cons'} += $cons;
+                    push(@{${$contigs_ref}[$i]->{hsps}}, @{${$contigs_ref}[$u]->{hsps}});
+                    
+                    ${$contigs_ref}[$u] = undef;
+                }
+            }
+        }
+        
+        my @merged;
+        foreach (@$contigs_ref) {
+            push(@merged, $_ || next);
+        }
+        @{$contigs_ref} = @merged;
+    }
+    elsif (! $overlap) {
+        ## If there is no overlap, add the complete HSP data.
+        ($numID,$numCons) = $hsp->matches(-SEQ=>$seqType);
+        push @$contigs_ref, {'start' =>$start, 'stop' =>$stop,
+			     'iden'  =>$numID, 'cons' =>$numCons,
+			     'strand'=>$strand,'frame'=>$frame,'hsps'=>[$hsp]};
+    }
+    
+    return $overlap;
+}
+
+=head2 get_exponent
+
+ Usage     : &get_exponent( number );
+ Purpose   : Determines the power of 10 exponent of an integer, float, 
+           : or scientific notation number.
+ Example   : &get_exponent("4.0e-206");
+           : &get_exponent("0.00032");
+           : &get_exponent("10.");
+           : &get_exponent("1000.0");
+           : &get_exponent("e+83");
+ Argument  : Float, Integer, or scientific notation number
+ Returns   : Integer representing the exponent part of the number (+ or -).
+           : If argument == 0 (zero), return value is "-999".
+ Comments  : Exponents are rounded up (less negative) if the mantissa is >= 5.
+           : Exponents are rounded down (more negative) if the mantissa is <= -5.
+
+=cut
+
+sub get_exponent {
+    my $data = shift;
+
+    my($num, $exp) = split /[eE]/, $data;
+
+    if( defined $exp) { 
+	$num = 1 if not $num;
+	$num >= 5 and $exp++;
+	$num <= -5 and $exp--;
+    } elsif( $num == 0) {
+	$exp = -999;
+    } elsif( not $num =~ /\./) {
+	$exp = CORE::length($num) -1;
+    } else {
+	$exp = 0;
+	$num .= '0' if $num =~ /\.$/;
+	my ($c);
+	my $rev = 0;
+	if($num !~ /^0/) {
+	    $num = reverse($num);
+	    $rev = 1;
+	}
+	do { $c = chop($num);
+	     $c == 0 && $exp++; 
+	 } while( $c ne '.');
+
+	$exp = -$exp if $num == 0 and not $rev;
+	$exp -= 1 if $rev;
+    }
+    return $exp;
+}
+
+=head2 collapse_nums
+
+ Usage     : @cnums = collapse_nums( @numbers );
+ Purpose   : Collapses a list of numbers into a set of ranges of consecutive terms:
+           : Useful for condensing long lists of consecutive numbers.
+           :  EXPANDED:
+           :     1 2 3 4 5 6 10 12 13 14 15 17 18 20 21 22 24 26 30 31 32
+           :  COLLAPSED:
+           :     1-6 10 12-15 17 18 20-22 24 26 30-32
+ Argument  : List of numbers sorted numerically.
+ Returns   : List of numbers mixed with ranges of numbers (see above).
+ Throws    : n/a
+
+See Also   : L<Bio::Search::Hit::BlastHit::seq_inds()|Bio::Search::Hit::BlastHit>
+
+=cut
+
+sub collapse_nums {
+# This is probably not the slickest connectivity algorithm, but will do for now.
+    my @a = @_;
+    my ($from, $to, $i, @ca, $consec);
+    
+    $consec = 0;
+    for($i=0; $i < @a; $i++) {
+	not $from and do{ $from = $a[$i]; next; };
+	if($a[$i] == $a[$i-1]+1) {
+	    $to = $a[$i];
+	    $consec++;
+	} else {
+	    if($consec == 1) { $from .= ",$to"; }
+	    else { $from .= $consec>1 ? "\-$to" : ""; }
+	    push @ca, split(',', $from);
+	    $from =  $a[$i];
+	    $consec = 0;
+	    $to = undef;
+	}
+    }
+    if(defined $to) {
+	if($consec == 1) { $from .= ",$to"; }
+	else { $from .= $consec>1 ? "\-$to" : ""; }
+    }
+    push @ca, split(',', $from) if $from;
+
+    @ca;
+}
+
+
+=head2 strip_blast_html
+
+ Usage     : $boolean = &strip_blast_html( string_ref );
+           : This method is exported.
+ Purpose   : Removes HTML formatting from a supplied string.
+           : Attempts to restore the Blast report to enable
+           : parsing by Bio::SearchIO::blast.pm
+ Returns   : Boolean: true if string was stripped, false if not.
+ Argument  : string_ref = reference to a string containing the whole Blast
+           :              report containing HTML formatting.
+ Throws    : Croaks if the argument is not a scalar reference.
+ Comments  : Based on code originally written by Alex Dong Li
+           : (ali at genet.sickkids.on.ca).
+           : This method does some Blast-specific stripping 
+           : (adds back a '>' character in front of each HSP 
+           : alignment listing).
+           :   
+           : THIS METHOD IS VERY SENSITIVE TO BLAST FORMATTING CHANGES!
+           :
+           : Removal of the HTML tags and accurate reconstitution of the
+           : non-HTML-formatted report is highly dependent on structure of
+           : the HTML-formatted version. For example, it assumes that first 
+           : line of each alignment section (HSP listing) starts with a
+           : <a name=..> anchor tag. This permits the reconstruction of the 
+           : original report in which these lines begin with a ">".
+           : This is required for parsing.
+           :
+           : If the structure of the Blast report itself is not intended to
+           : be a standard, the structure of the HTML-formatted version
+           : is even less so. Therefore, the use of this method to
+           : reconstitute parsable Blast reports from HTML-format versions
+           : should be considered a temorary solution.
+
+=cut
+
+sub strip_blast_html {
+      # This may not best way to remove html tags. However, it is simple.
+      # it won't work under following conditions:
+      #    1) if quoted > appears in a tag  (does this ever happen?)
+      #    2) if a tag is split over multiple lines and this method is
+      #       used to process one line at a time.
+      
+    my ($string_ref) = shift;
+
+    ref $string_ref eq 'SCALAR' or 
+	croak ("Can't strip HTML: ".
+	       "Argument is should be a SCALAR reference not a ${\ref $string_ref}\n");
+
+    my $str = $$string_ref;
+    my $stripped = 0;
+
+    # Removing "<a name =...>" and adding the '>' character for 
+    # HSP alignment listings.
+    $str =~ s/(\A|\n)<a name ?=[^>]+> ?/>/sgi and $stripped = 1;
+
+    # Removing all "<>" tags. 
+    $str =~ s/<[^>]+>|&nbsp//sgi and $stripped = 1;
+
+    # Re-uniting any lone '>' characters.
+    $str =~ s/(\A|\n)>\s+/\n\n>/sgi and $stripped = 1;
+
+    $$string_ref = $str;
+    $stripped;
+}
+
+sub _warn_about_no_hsps {
+    my $hit = shift;
+    my $prev_func=(caller(1))[3];
+    $hit->warn("There is no HSP data for hit '".$hit->name."'.\n".
+               "You have called a method ($prev_func)\n".
+               "that requires HSP data and there was no HSP data for this hit,\n".
+               "most likely because it was absent from the BLAST report.\n".
+               "Note that by default, BLAST lists alignments for the first 250 hits,\n".
+               "but it lists descriptions for 500 hits. If this is the case,\n".
+               "and you care about these hits, you should re-run BLAST using the\n".
+               "-b option (or equivalent if not using blastall) to increase the number\n".
+               "of alignments.\n"
+              );
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Search/StatisticsI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Search/StatisticsI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Search/StatisticsI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+# 
+#
+# BioPerl module for wrapping runtime parameters
+#
+# Cared for by Chad Matsalla (bioinformatics1 at dieselwurks dot com)
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Search::StatisticsI - A Base object for statistics
+
+=head1 SYNOPSIS
+
+  # do not use this object directly, it provides the following methods
+  # for its subclasses
+
+  my $void   = $obj->set_statistic("statistic_name","statistic_value"); 
+  my $value  = $obj->get_statistic("statistic_name");
+
+=head1 DESCRIPTION
+
+This is a basic container to hold the statistics returned from a program.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks dot com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Search::StatisticsI;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 get_statistic
+
+ Title   : get_statistic
+ Usage   : $statistic_object->get_statistic($statistic_name);
+ Function: Get the value of a statistic named $statistic_name
+ Returns : A scalar that should be a string
+ Args    : A scalar that should be a string
+
+=cut
+
+sub get_statistic {
+   my ($self,$arg) = @_;
+     $self->throw_not_implemented;
+}
+
+
+=head2 set_statistic
+
+ Title   : set_statistic
+ Usage   : $statistic_object->set_statistic($statistic_name => $statistic_value);
+ Function: Set the value of a statistic named $statistic_name to $statistic_value
+ Returns : Void
+ Args    : A hash containing name=>value pairs
+
+=cut
+
+sub set_statistic {
+   my ($self,$name,$value) = @_;
+     $self->throw_not_implemented;
+}
+
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Search/StatisticsI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchDist.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchDist.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchDist.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,244 @@
+# $Id: SearchDist.pm,v 1.20.4.4 2006/10/02 23:10:12 sendu Exp $
+
+#
+# BioPerl module for Bio::SearchDist
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchDist - A perl wrapper around Sean Eddy's histogram object
+
+=head1 SYNOPSIS
+
+  $dis = Bio::SearchDist->new();
+  foreach $score ( @scores ) {
+     $dis->add_score($score);
+  }
+
+  if( $dis->fit_evd() ) {
+    foreach $score ( @scores ) {
+      $evalue = $dis->evalue($score);
+      print "Score $score had an evalue of $evalue\n";
+    }
+  } else {
+    warn("Could not fit histogram to an EVD!");
+  }
+
+=head1 DESCRIPTION
+
+The Bio::SearchDist object is a wrapper around Sean Eddy's excellent
+histogram object. The histogram object can bascially take in a number
+of scores which are sensibly distributed somewhere around 0 that come
+from a supposed Extreme Value Distribution. Having add all the scores
+from a database search via the add_score method you can then fit a
+extreme value distribution using fit_evd(). Once fitted you can then
+get out the evalue for each score (or a new score) using
+evalue($score).
+
+The fitting procedure is better described in Sean Eddy's own code
+(available from http://hmmer.wustl.edu, or in the histogram.h header
+file in Compile/SW). Bascially it fits a EVD via a maximum likelhood
+method with pruning of the top end of the distribution so that real
+positives are discarded in the fitting procedure. This comes from
+an orginally idea of Richard Mott's and the likelhood fitting
+is from a book by Lawless [should ref here].
+
+
+The object relies on the fact that the scores are sensibly distributed
+around about 0 and that integer bins are sensible for the
+histogram. Scores based on bits are often ideal for this (bits based
+scoring mechanisms is what this histogram object was originally
+designed for).
+
+
+=head1 CONTACT
+
+The original code this was based on comes from the histogram module as
+part of the HMMer2 package. Look at http://hmmer.wustl.edu/
+
+Its use in Bioperl is via the Compiled XS extension which is cared for
+by Ewan Birney (birney at ebi.ac.uk). Please contact Ewan first about
+the use of this module
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchDist;
+use strict;
+
+
+BEGIN {
+    eval {
+	require Bio::Ext::Align;
+    };
+    if ( $@ ) {
+print $@;
+	print STDERR ("\nThe C-compiled engine for histogram object (Bio::Ext::Align) has not been installed.\n Please install the bioperl-ext package\n\n");
+	exit(1);
+    }
+}
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  my($min, $max, $lump) =
+	$self->_rearrange([qw(MIN MAX LUMP)], @args);
+
+  if( !  $min ) {
+    $min = -100;
+  }
+
+  if( !  $max ) {
+    $max = +100;
+  }
+
+  if( ! $lump ) {
+    $lump = 50;
+  }
+
+  $self->_engine(&Bio::Ext::Align::new_Histogram($min,$max,$lump));
+
+  return $self;
+}
+
+=head2 add_score
+
+ Title   : add_score
+ Usage   : $dis->add_score(300);
+ Function: Adds a single score to the distribution
+ Returns : nothing
+ Args    :
+
+
+=cut
+
+sub add_score{
+   my ($self,$score) = @_;
+   my ($eng);
+   $eng = $self->_engine();
+   #$eng->AddToHistogram($score);
+   $eng->add($score);
+}
+
+=head2 fit_evd
+
+ Title   : fit_evd
+ Usage   : $dis->fit_evd();
+ Function: fits an evd to the current distribution
+ Returns : 1 if it fits successfully, 0 if not
+ Args    :
+
+
+=cut
+
+sub fit_evd{
+   my ($self, at args) = @_;
+
+   return $self->_engine()->fit_EVD(10000,1);
+}
+
+=head2 fit_Gaussian
+
+ Title   : fit_Gaussian
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub fit_Gaussian{
+   my ($self,$high) = @_;
+
+   if( ! defined $high ) {
+       $high = 10000;
+   }
+
+   return $self->_engine()->fit_Gaussian($high);
+}
+
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : $eval = $dis->evalue($score)
+ Function: Returns the evalue of this score
+ Returns : float
+ Args    :
+
+
+=cut
+
+sub evalue{
+   my ($self,$score) = @_;
+
+   return $self->_engine()->evalue($score);
+
+}
+
+
+
+=head2 _engine
+
+ Title   : _engine
+ Usage   : $obj->_engine($newval)
+ Function: underlyine bp_sw:: histogram engine
+ Returns : value of _engine
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _engine{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_engine'} = $value;
+    }
+    return $self->{'_engine'};
+}
+
+
+## End of Package
+
+1;
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/EventHandlerI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/EventHandlerI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/EventHandlerI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+# $Id: EventHandlerI.pm,v 1.13.4.1 2006/10/02 23:10:25 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::EventHandlerI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::EventHandlerI - An abstract Event Handler for Search Result parsing
+
+=head1 SYNOPSIS
+
+# do not use this object directly it is an interface
+# See Bio::SearchIO::SearchResultEventBuilder for an implementation
+
+    use Bio::SearchIO::SearchResultEventBuilder;
+    my $handler = new Bio::SearchIO::SearchResultEventBuilder();
+
+=head1 DESCRIPTION
+
+This interface describes the basic methods needed to handle Events
+thrown from parsing a Search Result such as FASTA, BLAST, or HMMer.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::EventHandlerI;
+use strict;
+use Carp;
+
+
+use base qw(Bio::Event::EventHandlerI);
+
+=head2 start_result
+
+ Title   : start_result
+ Usage   : $handler->start_result($data)
+ Function: Begins a result event cycle
+ Returns : none 
+ Args    : Type of Result
+
+=cut
+
+sub start_result {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 end_result
+
+ Title   : end_result
+ Usage   : $handler->end_result($data)
+ Function: Ends a result event cycle
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_result{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 start_hsp
+
+ Title   : start_hsp
+ Usage   : $handler->start_hsp($data)
+ Function: Start a HSP event cycle
+ Returns : none
+ Args    : type of element
+           associated hashref
+
+=cut
+
+sub start_hsp{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 end_hsp
+
+ Title   : end_hsp
+ Usage   : $handler->end_hsp()
+ Function: Ends a HSP event cycle
+ Returns : Bio::Search::HSP::HSPI object
+ Args    : type of event and associated hashref
+
+=cut
+
+sub end_hsp{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 start_hit
+
+ Title   : start_hit
+ Usage   : $handler->start_hit()
+ Function: Starts a Hit event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub start_hit {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented
+}
+
+=head2 end_hit
+
+ Title   : end_hit
+ Usage   : $handler->end_hit()
+ Function: Ends a Hit event cycle
+ Returns : Bio::Search::Hit::HitI object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hit {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 start_iteration
+
+ Title   : start_iteration
+ Usage   : $handler->start_iteration()
+ Function: Starts an Iteration event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub start_iteration {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented
+}
+
+=head2 end_iteration
+
+ Title   : end_iteration
+ Usage   : $handler->end_iteration()
+ Function: Ends an Iterationevent cycle
+ Returns : Bio::Search::Iteration::IterationI object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_iteration {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 register_factory
+
+ Title   : register_factory
+ Usage   : $handler->register_factory('TYPE',$factory);
+ Function: Register a specific factory for a object type class
+ Returns : none
+ Args    : string representing the class and
+           Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub register_factory{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 factory
+
+ Title   : factory
+ Usage   : my $f = $handler->factory('TYPE');
+ Function: Retrieves the associated factory for requested 'TYPE'
+ Returns : a Bio::Factory::ObjectFactoryI
+ Throws  : Bio::Root::BadParameter if none registered for the supplied type
+ Args    : name of factory class to retrieve
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub factory{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 Bio::Event::EventHandlerI methods
+
+=cut
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+=head2 SAX methods
+
+See L<Bio::Event::EventHandlerI> for the additional SAX methods.
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/FastHitEventBuilder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/FastHitEventBuilder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/FastHitEventBuilder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,296 @@
+# $Id: FastHitEventBuilder.pm,v 1.13.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::FastHitEventBuilder
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::FastHitEventBuilder - Event Handler for SearchIO events.
+
+=head1 SYNOPSIS
+
+  # Do not use this object directly, this object is part of the SearchIO
+  # event based parsing system.
+
+  # to use the FastHitEventBuilder do this
+
+  use Bio::SearchIO::FastHitEventBuilder;
+
+  my $searchio = new Bio::SearchIO(-format => $format, -file => $file);
+
+  $searchio->attach_EventHandler(new Bio::SearchIO::FastHitEventBuilder);
+
+  while( my $r = $searchio->next_result ) {
+   while( my $h = $r->next_hit ) {
+    # note that Hits will NOT have HSPs
+   }
+  }
+
+=head1 DESCRIPTION
+
+This object handles Search Events generated by the SearchIO classes
+and build appropriate Bio::Search::* objects from them.  This object
+is intended for lightweight parsers which only want Hits and not deal
+with the overhead of HSPs.  It is a lot faster than the standard
+parser event handler but of course you are getting less information
+and less objects out.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::FastHitEventBuilder;
+use vars qw(%KNOWNEVENTS);
+use strict;
+
+use Bio::Search::HSP::HSPFactory;
+use Bio::Search::Hit::HitFactory;
+use Bio::Search::Result::ResultFactory;
+
+use base qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::FastHitEventBuilder();
+ Function: Builds a new Bio::SearchIO::FastHitEventBuilder object 
+ Returns : Bio::SearchIO::FastHitEventBuilder
+ Args    : -hit_factory    => Bio::Factory::ObjectFactoryI
+           -result_factory => Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub new { 
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($hitF,$resultF) = $self->_rearrange([qw(HIT_FACTORY
+						      RESULT_FACTORY)], at args);
+    $self->register_factory('hit', $hitF ||
+                            Bio::Factory::ObjectFactory->new(
+                                      -type      => 'Bio::Search::Hit::GenericHit',
+                                      -interface => 'Bio::Search::Hit::HitI'));
+
+    $self->register_factory('result', $resultF ||
+                            Bio::Factory::ObjectFactory->new(
+                                      -type      => 'Bio::Search::Result::GenericResult',
+                                      -interface => 'Bio::Search::Result::ResultI'));
+
+    return $self;
+}
+
+# new comes from the superclass
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+sub will_handle{
+   my ($self,$type) = @_;
+   # these are the events we recognize
+   return ( $type eq 'hit' || $type eq 'result' );
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_result
+
+ Title   : start_result
+ Usage   : $handler->start_result($resulttype)
+ Function: Begins a result event cycle
+ Returns : none 
+ Args    : Type of Report
+
+=cut
+
+sub start_result {
+   my ($self,$type) = @_;
+   $self->{'_resulttype'} = $type;
+   $self->{'_hits'} = [];
+   return;
+}
+
+=head2 end_result
+
+ Title   : end_result
+ Usage   : my @results = $parser->end_result
+ Function: Finishes a result handler cycle Returns : A Bio::Search::Result::ResultI
+ Args    : none
+
+=cut
+
+sub end_result {
+    my ($self,$type,$data) = @_;    
+    if( defined $data->{'runid'} &&
+	$data->{'runid'} !~ /^\s+$/ ) {	
+	
+	if( $data->{'runid'} !~ /^lcl\|/) { 
+	    $data->{"RESULT-query_name"}= $data->{'runid'};
+	} else { 
+	    ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = split(/\s+/,$data->{"RESULT-query_description"},2);
+	}
+	
+	if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
+	    my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
+	    # this is for |123|gb|ABC1.1|
+	    $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
+	    $data->{"RESULT-query_accession"}= $acc;
+	}
+	delete $data->{'runid'};
+    }
+    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
+               grep { /^RESULT/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || $type);
+    $args{'-hits'}      =  $self->{'_hits'};
+    my $result = $self->factory('result')->create(%args);
+    $self->{'_hits'} = [];
+    return $result;
+}
+
+=head2 start_hit
+
+ Title   : start_hit
+ Usage   : $handler->start_hit()
+ Function: Starts a Hit event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub start_hit{
+    my ($self,$type) = @_;
+    return;
+}
+
+
+=head2 end_hit
+
+ Title   : end_hit
+ Usage   : $handler->end_hit()
+ Function: Ends a Hit event cycle
+ Returns : Bio::Search::Hit::HitI object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hit{
+    my ($self,$type,$data) = @_;   
+    my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || $type);
+    $args{'-query_len'} =  $data->{'RESULT-query_length'};
+    my ($hitrank) = scalar @{$self->{'_hits'}} + 1;
+    $args{'-rank'} = $hitrank;
+    my $hit = $self->factory('hit')->create(%args);
+    push @{$self->{'_hits'}}, $hit;
+    $self->{'_hsps'} = [];
+    return $hit;
+}
+
+=head2 Factory methods
+
+=cut
+
+=head2 register_factory
+
+ Title   : register_factory
+ Usage   : $handler->register_factory('TYPE',$factory);
+ Function: Register a specific factory for a object type class
+ Returns : none
+ Args    : string representing the class and
+           Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub register_factory{
+   my ($self, $type,$f) = @_;
+   if( ! defined $f || ! ref($f) || 
+       ! $f->isa('Bio::Factory::ObjectFactoryI') ) { 
+       $self->throw("Cannot set factory to value $f".ref($f)."\n");
+   }
+   $self->{'_factories'}->{lc($type)} = $f;
+}
+
+
+=head2 factory
+
+ Title   : factory
+ Usage   : my $f = $handler->factory('TYPE');
+ Function: Retrieves the associated factory for requested 'TYPE'
+ Returns : a Bio::Factory::ObjectFactoryI or undef if none registered
+ Args    : name of factory class to retrieve
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub factory{
+   my ($self,$type) = @_;
+   return $self->{'_factories'}->{lc($type)} || $self->throw("No factory registered for $type");
+}
+
+=head2 inclusion_threshold
+
+See L<Bio::SearchIO::blast::inclusion_threshold>.
+
+=cut
+
+sub inclusion_threshold {
+    my $self = shift;
+    return $self->{'_inclusion_threshold'} = shift if @_;
+    return $self->{'_inclusion_threshold'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/IteratedSearchResultEventBuilder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/IteratedSearchResultEventBuilder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/IteratedSearchResultEventBuilder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,563 @@
+#------------------------------------------------------------------
+# $Id: IteratedSearchResultEventBuilder.pm,v 1.10.4.2 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
+#
+# Cared for by Steve Chervitz <sac at bioperl.org> and Jason Stajich <jason at bioperl.org>
+#
+# Copyright Steve Chervitz
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::IteratedSearchResultEventBuilder - Event Handler for
+SearchIO events.
+
+=head1 SYNOPSIS
+
+# Do not use this object directly, this object is part of the SearchIO
+# event based parsing system.
+
+=head1 DESCRIPTION
+
+This object handles Search Events generated by the SearchIO classes
+and build appropriate Bio::Search::* objects from them.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Steve Chervitz
+
+Email sac-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Parts of code based on SearchResultEventBuilder by Jason Stajich
+jason at bioperl.org
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::IteratedSearchResultEventBuilder;
+use vars qw(%KNOWNEVENTS             $DEFAULT_INCLUSION_THRESHOLD
+            $MAX_HSP_OVERLAP
+);
+
+use strict;
+
+use Bio::Factory::ObjectFactory;
+
+use base qw(Bio::SearchIO::SearchResultEventBuilder);
+
+# e-value threshold for inclusion in the PSI-BLAST score matrix model (blastpgp)
+# NOTE: Executing `blastpgp -` incorrectly reports that the default is 0.005.
+#       (version 2.2.2 [Jan-08-2002])
+$DEFAULT_INCLUSION_THRESHOLD = 0.001;
+
+
+$MAX_HSP_OVERLAP  = 2;  # Used when tiling multiple HSPs.
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::IteratedSearchResultEventBuilder();
+ Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object 
+ Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
+ Args    : -hsp_factory    => Bio::Factory::ObjectFactoryI
+           -hit_factory    => Bio::Factory::ObjectFactoryI
+           -result_factory => Bio::Factory::ObjectFactoryI
+           -iteration_factory => Bio::Factory::ObjectFactoryI
+           -inclusion_threshold => e-value threshold for inclusion in the
+                                   PSI-BLAST score matrix model (blastpgp)
+           -signif      => float or scientific notation number to be used
+                           as a P- or Expect value cutoff
+           -score       => integer or scientific notation number to be used
+                           as a blast score value cutoff
+           -bits        => integer or scientific notation number to be used
+                           as a bit score value cutoff
+           -hit_filter  => reference to a function to be used for
+                           filtering hits based on arbitrary criteria.
+
+
+See L<Bio::SearchIO::SearchResultEventBuilder> for more information
+
+=cut
+
+sub new { 
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($hitF, $resultF, $hspF, $iterationF) =
+        $self->_rearrange([qw(
+                              HIT_FACTORY
+                              RESULT_FACTORY
+			      HSP_FACTORY
+                              ITERATION_FACTORY
+                             )], at args);
+
+    $self->_init_parse_params(@args);
+
+    # Note that we need to override the setting of result and factories here
+    # so that we can set different default factories than are set by the super class.
+    $self->register_factory('result', $resultF || 
+                            Bio::Factory::ObjectFactory->new(
+                                 -type      => 'Bio::Search::Result::BlastResult',
+                                 -interface => 'Bio::Search::Result::ResultI'));
+
+    $self->register_factory('hit', $hitF || 
+                            Bio::Factory::ObjectFactory->new(
+                                 -type      => 'Bio::Search::Hit::BlastHit',
+                                 -interface => 'Bio::Search::Hit::HitI'));
+
+    $self->register_factory('hsp', $hspF || 
+                            Bio::Factory::ObjectFactory->new(
+                                 -type      => 'Bio::Search::HSP::GenericHSP',
+                                 -interface => 'Bio::Search::HSP::HSPI'));
+
+    # TODO: Change this to BlastIteration (maybe)
+    $self->register_factory('iteration', $iterationF || 
+                            Bio::Factory::ObjectFactory->new(
+                                 -type      => 'Bio::Search::Iteration::GenericIteration',
+                                 -interface => 'Bio::Search::Iteration::IterationI'));
+    return $self;
+}
+
+
+#Initializes parameters used during parsing of Blast reports.
+sub _init_parse_params {
+
+    my ($self, @args) = @_;
+    # -FILT_FUNC has been replaced by -HIT_FILTER.
+    # Leaving -FILT_FUNC in place for backward compatibility
+    my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
+           $self->_rearrange([qw(INCLUSION_THRESHOLD
+                                 SIGNIF SCORE BITS HIT_FILTER FILT_FUNC
+                                )], @args);
+
+    $self->inclusion_threshold( defined($ithresh) ? $ithresh : $DEFAULT_INCLUSION_THRESHOLD);
+    my $hit_filt = $hit_filter || $filt_func;
+    defined $hit_filter && $self->hit_filter($hit_filt);
+    defined $signif     && $self->max_significance($signif);
+    defined $score      && $self->min_score($score);
+    defined $bits       && $self->min_bits($bits);
+}
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+sub will_handle{
+   my ($self,$type) = @_;
+   # these are the events we recognize
+   return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' || $type eq 'iteration' ||
+            $type eq 'newhits' || $type eq 'oldhits' );
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_result
+
+ Title   : start_result
+ Usage   : $handler->start_result($resulttype)
+ Function: Begins a result event cycle
+ Returns : none 
+ Args    : Type of Report
+
+=cut
+
+sub start_result {
+   my $self = shift;
+   #print STDERR "ISREB: start_result()\n";
+   $self->SUPER::start_result(@_);
+   $self->{'_iterations'} = [];
+   $self->{'_iteration_count'} = 0;
+   $self->{'_old_hit_names'} = undef;
+   $self->{'_hit_names_below'} = undef;
+   return;
+}
+
+=head2 end_result
+
+ Title   : end_result
+ Usage   : my @results = $parser->end_result
+ Function: Finishes a result handler cycle 
+ Returns : A Bio::Search::Result::ResultI
+ Args    : none
+
+=cut
+
+sub end_result {
+    my ($self,$type,$data) = @_;
+    #print STDERR "ISREB: end_result\n";
+    ## How is runid getting set? Purpose?
+    if( defined $data->{'runid'} &&
+        $data->{'runid'} !~ /^\s+$/ ) {        
+
+        if( $data->{'runid'} !~ /^lcl\|/) { 
+            $data->{"RESULT-query_name"}= $data->{'runid'};
+        } else { 
+            ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) = 
+                split(/\s+/,$data->{"RESULT-query_description"},2);
+        }
+        
+        if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
+            my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
+            # this is for |123|gb|ABC1.1|
+            $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
+            $data->{"RESULT-query_accession"}= $acc;
+        }
+        delete $data->{'runid'};
+    }
+    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
+               grep { /^RESULT/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+                               $data->{'RESULT-algorithm_name'} || $type);
+
+    $args{'-iterations'} = $self->{'_iterations'};
+
+    my $result = $self->factory('result')->create_object(%args);
+    $result->hit_factory($self->factory('hit'));
+    $self->{'_iterations'} = [];
+    return $result;
+}
+
+
+# Title   : _add_hit (private function for internal use only)
+# Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
+# Argument: Bio::Search::Hit::HitI object 
+
+sub _add_hit {
+    my ($self, $hit) = @_;
+	
+    my $hit_name = uc($hit->{-name});
+    my $hit_signif = $hit->{-significance};
+    my $ithresh = $self->{'_inclusion_threshold'};
+	
+    # Test significance using custom function (if supplied)
+    my $add_hit = 1;
+	
+    my $hit_filter = $self->{'_hit_filter'};
+	
+    if($hit_filter) {
+        # since &hit_filter is out of our control and would expect a HitI object,
+        # we're forced to make one for it
+        $hit = $self->factory('hit')->create_object(%{$hit});
+        $add_hit = 0 unless &$hit_filter($hit);
+    } else {
+        if($self->{'_confirm_significance'}) {
+            $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
+        }
+        if($self->{'_confirm_score'}) {
+            my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
+            $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
+        }
+        if($self->{'_confirm_bits'}) {
+            my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
+            $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
+        }
+    }
+	
+    $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
+    # Building hit lookup hashes for determining if the hit is old/new and 
+    # above/below threshold.
+    $self->{'_old_hit_names'}->{$hit_name}++;
+    $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
+}
+
+
+# Title   : _store_hit (private function for internal use only)
+# Purpose : Collects hit objects into defined sets that are useful for 
+#           analyzing PSI-blast results.
+#           These are ultimately added to the iteration object in end_iteration().
+#
+# Strategy:
+#   Primary split = old vs. new
+#   Secondary split = below vs. above threshold
+#   1. Has this hit occurred in a previous iteration?
+#   1.1. If yes, was it below threshold?
+#   1.1.1. If yes, ---> [oldhits_below] 
+#   1.1.2. If no, is it now below threshold?
+#   1.1.2.1. If yes, ---> [oldhits_newly_below] 
+#   1.1.2.2. If no, ---> [oldhits_not_below] 
+#   1.2. If no, is it below threshold?
+#   1.2.1. If yes, ---> [newhits_below] 
+#   1.2.2. If no, ---> [newhits_not_below] 
+#   1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified] 
+#   Note: As long as there's a default inclusion threshold, 
+#         there won't be an unclassified set.
+#
+# For the first iteration, it might be nice to detect non-PSI blast reports
+# and put the hits in the unclassified set.
+# However, it shouldn't matter where the hits get put for the first iteration
+# for non-PSI blast reports since they'll get flattened out in the
+# result and iteration search objects.
+#
+sub _store_hit {
+    my ($self, $hit, $hit_name, $hit_signif) = @_;
+
+    my $ithresh = $self->{'_inclusion_threshold'};
+
+    if (exists $self->{'_old_hit_names'}->{$hit_name}) {
+        if (exists $self->{'_hit_names_below'}->{$hit_name}) {
+            push @{$self->{'_oldhits_below'}}, $hit;
+        } elsif ($hit_signif <= $ithresh) {
+            push @{$self->{'_oldhits_newly_below'}}, $hit;
+        } else {
+            push @{$self->{'_oldhits_not_below'}}, $hit;
+        }
+    } else {
+        if ($hit_signif <= $ithresh) {
+            push @{$self->{'_newhits_below'}}, $hit;
+        } else {
+            push @{$self->{'_newhits_not_below'}}, $hit;
+        }
+    }
+    $self->{'_hitcount'}++;
+}
+
+=head2 start_iteration
+
+ Title   : start_iteration
+ Usage   : $handler->start_iteration()
+ Function: Starts an Iteration event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+=cut
+
+sub start_iteration {
+    my ($self,$type) = @_;
+
+    #print STDERR "ISREB: start_iteration()\n";
+    $self->{'_iteration_count'}++;
+
+    # Reset arrays for the various classes of hits.
+#    $self->{'_newhits_unclassified'}     = [];
+    $self->{'_newhits_below'}        = [];
+    $self->{'_newhits_not_below'}    = [];
+    $self->{'_oldhits_below'}        = [];
+    $self->{'_oldhits_newly_below'}  = [];
+    $self->{'_oldhits_not_below'}    = [];
+    $self->{'_hitcount'} = 0;
+    return;
+}
+
+
+=head2 end_iteration
+
+ Title   : end_iteration
+ Usage   : $handler->end_iteration()
+ Function: Ends an Iteration event cycle
+ Returns : Bio::Search::Iteration object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_iteration {
+    my ($self,$type,$data) = @_;   
+
+    # print STDERR "ISREB: end_iteration()\n";
+
+    my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); } 
+    grep { /^ITERATION/ } keys %{$data};
+
+    $args{'-number'} = $self->{'_iteration_count'};
+    $args{'-oldhits_below'} = $self->{'_oldhits_below'};
+    $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
+    $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
+    $args{'-newhits_below'} = $self->{'_newhits_below'};
+    $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
+    $args{'-hit_factory'} = $self->factory('hit');
+
+    my $it = $self->factory('iteration')->create_object(%args);
+    push @{$self->{'_iterations'}}, $it;
+    return $it;
+}
+
+=head2 max_significance
+
+ Usage     : $obj->max_significance();
+ Purpose   : Set/Get the P or Expect value used as significance screening cutoff.
+             This is the value of the -signif parameter supplied to new().
+             Hits with P or E-value above this are skipped.
+ Returns   : Scientific notation number with this format: 1.0e-05.
+ Argument  : Number (sci notation, float, integer) (when setting)
+ Throws    : Bio::Root::BadParameter exception if the supplied argument is
+           : not a valid number.
+ Comments  : Screening of significant hits uses the data provided on the
+           : description line. For NCBI BLAST1 and WU-BLAST, this data 
+           : is P-value. for NCBI BLAST2 it is an Expect value.
+
+=cut
+
+sub max_significance {
+    my $self = shift;
+    if (@_) {
+        my $sig = shift;
+        if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
+            $self->throw(-class => 'Bio::Root::BadParameter',
+                         -text => "Invalid significance value: $sig\n".
+                         "Must be a number greater than zero.",
+                         -value=>$sig);
+        }
+        $self->{'_confirm_significance'} = 1;
+        $self->{'_max_significance'} = $sig;
+    }
+    sprintf "%.1e", $self->{'_max_significance'};
+}
+
+
+=head2 signif
+
+Synonym for L<max_significance()|max_significance>
+
+=cut
+
+sub signif { shift->max_significance }
+
+=head2 min_score
+
+ Usage     : $obj->min_score();
+ Purpose   : Gets the Blast score used as screening cutoff.
+             This is the value of the -score parameter supplied to new().
+             Hits with scores below this are skipped.
+ Returns   : Integer (or undef if not set)
+ Argument  : Integer (when setting)
+ Throws    : Bio::Root::BadParameter exception if the supplied argument is
+           : not a valid number.
+ Comments  : Screening of significant hits uses the data provided on the
+           : description line. 
+
+=cut
+
+sub min_score {
+    my $self = shift;
+    if (@_) {
+        my $score = shift;
+        if( $score =~ /[^\de+]/ or $score <= 0) {
+            $self->throw(-class => 'Bio::Root::BadParameter',
+                         -text => "Invalid score value: $score\n".
+                                  "Must be an integer greater than zero.",
+                        -value => $score);
+        }
+        $self->{'_confirm_score'} = 1;
+        $self->{'_min_score'} = $score;
+    }
+    return $self->{'_min_score'};
+}
+
+
+=head2 min_bits
+
+ Usage     : $obj->min_bits();
+ Purpose   : Gets the Blast bit score used as screening cutoff.
+             This is the value of the -bits parameter supplied to new().
+             Hits with bits score below this are skipped.
+ Returns   : Integer (or undef if not set)
+ Argument  : Integer (when setting)
+ Throws    : Bio::Root::BadParameter exception if the supplied argument is
+           : not a valid number.
+ Comments  : Screening of significant hits uses the data provided on the
+           : description line. 
+
+=cut
+
+sub min_bits {
+    my $self = shift;
+    if (@_) {
+        my $bits = shift;
+        if( $bits =~ /[^\de+]/ or $bits <= 0) {
+            $self->throw(-class => 'Bio::Root::BadParameter',
+                         -text => "Invalid bits value: $bits\n".
+                                  "Must be an integer greater than zero.",
+                        -value => $bits);
+        }
+        $self->{'_confirm_bits'} = 1;
+        $self->{'_min_bits'} = $bits;
+    }
+    return $self->{'_min_bits'};
+}
+
+
+=head2 hit_filter
+
+ Usage     : $obj->hit_filter();
+ Purpose   : Set/Get a function reference used for filtering out hits.
+             This is the value of the -hit_filter parameter supplied to new().
+             Hits that fail to pass the filter are skipped.
+ Returns   : Function ref (or undef if not set)
+ Argument  : Function ref (when setting)
+ Throws    : Bio::Root::BadParameter exception if the supplied argument is
+           : not a function reference.
+
+=cut
+
+sub hit_filter {
+    my $self = shift;
+    if (@_) {
+        my $func = shift;
+        if(not ref $func eq 'CODE') {
+            $self->throw(-class=>'Bio::Root::BadParameter',
+                         -text=>"Not a function reference: $func\n".
+                                "The -hit_filter parameter must be function reference.",
+                         -value=> $func);
+        }
+        $self->{'_hit_filter'} = $func;
+    }
+    return $self->{'_hit_filter'};
+}
+
+=head2 inclusion_threshold
+
+See L<Bio::SearchIO::blast::inclusion_threshold>.
+
+=cut
+
+sub inclusion_threshold {
+    my $self = shift;
+    return $self->{'_inclusion_threshold'} = shift if @_;
+    return $self->{'_inclusion_threshold'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchResultEventBuilder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchResultEventBuilder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchResultEventBuilder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,434 @@
+# $Id: SearchResultEventBuilder.pm,v 1.41.4.2 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::SearchResultEventBuilder
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events.
+
+=head1 SYNOPSIS
+
+# Do not use this object directly, this object is part of the SearchIO
+# event based parsing system.
+
+=head1 DESCRIPTION
+
+This object handles Search Events generated by the SearchIO classes
+and build appropriate Bio::Search::* objects from them.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::SearchResultEventBuilder;
+use vars qw(%KNOWNEVENTS);
+use strict;
+
+use Bio::Factory::ObjectFactory;
+
+use base qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::SearchResultEventBuilder();
+ Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object 
+ Returns : Bio::SearchIO::SearchResultEventBuilder
+ Args    : -hsp_factory    => Bio::Factory::ObjectFactoryI
+           -hit_factory    => Bio::Factory::ObjectFactoryI
+           -result_factory => Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub new { 
+    my ($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
+                                                      HIT_FACTORY
+                                                      RESULT_FACTORY)], at args);
+    $self->register_factory('hsp', $hspF || 
+                            Bio::Factory::ObjectFactory->new(
+                                     -type      => 'Bio::Search::HSP::GenericHSP',
+                                     -interface => 'Bio::Search::HSP::HSPI'));
+
+    $self->register_factory('hit', $hitF ||
+                            Bio::Factory::ObjectFactory->new(
+                                      -type      => 'Bio::Search::Hit::GenericHit',
+                                      -interface => 'Bio::Search::Hit::HitI'));
+
+    $self->register_factory('result', $resultF ||
+                            Bio::Factory::ObjectFactory->new(
+                                      -type      => 'Bio::Search::Result::GenericResult',
+                                      -interface => 'Bio::Search::Result::ResultI'));
+
+    return $self;
+}
+
+# new comes from the superclass
+
+=head2 will_handle
+
+ Title   : will_handle
+ Usage   : if( $handler->will_handle($event_type) ) { ... }
+ Function: Tests if this event builder knows how to process a specific event
+ Returns : boolean
+ Args    : event type name
+
+
+=cut
+
+sub will_handle{
+   my ($self,$type) = @_;
+   # these are the events we recognize
+   return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' );
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_result
+
+ Title   : start_result
+ Usage   : $handler->start_result($resulttype)
+ Function: Begins a result event cycle
+ Returns : none 
+ Args    : Type of Report
+
+=cut
+
+sub start_result {
+   my ($self,$type) = @_;
+   $self->{'_resulttype'} = $type;
+   $self->{'_hits'} = [];   
+   $self->{'_hsps'} = [];
+   $self->{'_hitcount'} = 0;
+   return;
+}
+
+=head2 end_result
+
+ Title   : end_result
+ Usage   : my @results = $parser->end_result
+ Function: Finishes a result handler cycle 
+ Returns : A Bio::Search::Result::ResultI
+ Args    : none
+
+=cut
+
+# this is overridden by IteratedSearchResultEventBuilder
+# so keep that in mind when debugging
+
+sub end_result {
+    my ($self,$type,$data) = @_;    
+
+    if( defined $data->{'runid'} &&
+        $data->{'runid'} !~ /^\s+$/ ) {        
+
+        if( $data->{'runid'} !~ /^lcl\|/) { 
+            $data->{"RESULT-query_name"}= $data->{'runid'};
+        } else { 
+            ($data->{"RESULT-query_name"},
+	     $data->{"RESULT-query_description"}) = 
+		 split(/\s+/,$data->{"RESULT-query_description"},2);
+        }
+        
+        if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
+            my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
+            # this is for |123|gb|ABC1.1|
+            $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
+            $data->{"RESULT-query_accession"}= $acc;
+        }
+        delete $data->{'runid'};
+    }
+    my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); } 
+               grep { /^RESULT/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+                               $data->{'RESULT-algorithm_name'} || $type);
+    $args{'-hits'}      =  $self->{'_hits'};
+    my $result = $self->factory('result')->create_object(%args);
+    $result->hit_factory($self->factory('hit'));
+    $self->{'_hits'} = [];
+    return $result;
+}
+
+=head2 start_hsp
+
+ Title   : start_hsp
+ Usage   : $handler->start_hsp($name,$data)
+ Function: Begins processing a HSP event
+ Returns : none
+ Args    : type of element 
+           associated data (hashref)
+
+=cut
+
+sub start_hsp {
+    my ($self, at args) = @_;
+    return;
+}
+
+=head2 end_hsp
+
+ Title   : end_hsp
+ Usage   : $handler->end_hsp()
+ Function: Finish processing a HSP event
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hsp {
+    my ($self,$type,$data) = @_;
+
+    if( defined $data->{'runid'} &&
+        $data->{'runid'} !~ /^\s+$/ ) {        
+
+        if( $data->{'runid'} !~ /^lcl\|/) { 
+            $data->{"RESULT-query_name"}= $data->{'runid'};
+        } else { 
+            ($data->{"RESULT-query_name"},
+	     $data->{"RESULT-query_description"}) = 
+		 split(/\s+/,$data->{"RESULT-query_description"},2);
+        }
+        
+        if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
+            my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
+            # this is for |123|gb|ABC1.1|
+            $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
+            $data->{"RESULT-query_accession"}= $acc;
+        }
+        delete $data->{'runid'};
+    }
+
+    # this code is to deal with the fact that Blast XML data
+    # always has start < end and one has to infer strandedness
+    # from the frame which is a problem for the Search::HSP object
+    # which expect to be able to infer strand from the order of 
+    # of the begin/end of the query and hit coordinates
+    if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
+        (( $data->{'HSP-query_frame'} < 0 && 
+           $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||       
+         $data->{'HSP-query_frame'} > 0 && 
+         ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) ) 
+        )
+    { 
+        # swap
+        ($data->{'HSP-query_start'},
+         $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
+                                      $data->{'HSP-query_start'});
+    } 
+    if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
+        ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 && 
+          $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||       
+         defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 && 
+         ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
+        ) 
+    { 
+        # swap
+        ($data->{'HSP-hit_start'},
+         $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
+                                    $data->{'HSP-hit_start'});
+    }
+    $data->{'HSP-query_frame'} ||= 0;
+    $data->{'HSP-hit_frame'} ||= 0;
+    # handle Blast 2.1.2 which did not support data member: hsp_align-len
+    $data->{'HSP-query_length'} ||= $data->{'RESULT-query_length'};
+    $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || '');
+    $data->{'HSP-hit_length'}   ||= $data->{'HIT-length'};
+    $data->{'HSP-hit_length'}   ||= length ($data->{'HSP-hit_seq'} || '');
+    
+    $data->{'HSP-hsp_length'}   ||= length ($data->{'HSP-homology_seq'} || '');
+    
+    my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) } 
+               grep { /^HSP/ } keys %{$data};
+    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+                               $data->{'RESULT-algorithm_name'} || $type);
+    # copy this over from result
+    $args{'-query_name'} = $data->{'RESULT-query_name'};
+    $args{'-hit_name'} = $data->{'HIT-name'};
+    my ($rank) = scalar @{$self->{'_hsps'} || []} + 1;
+    $args{'-rank'} = $rank;
+    
+    $args{'-hit_desc'} = $data->{'HIT-description'};
+    $args{'-query_desc'} = $data->{'RESULT-query_description'};
+    
+    my $bits = $args{'-bits'};
+    my $hsp = \%args;
+    push @{$self->{'_hsps'}}, $hsp;
+    
+    return $hsp;
+}
+
+
+=head2 start_hit
+
+ Title   : start_hit
+ Usage   : $handler->start_hit()
+ Function: Starts a Hit event cycle
+ Returns : none
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub start_hit{
+    my ($self,$type) = @_;
+    $self->{'_hsps'} = [];
+    return;
+}
+
+
+=head2 end_hit
+
+ Title   : end_hit
+ Usage   : $handler->end_hit()
+ Function: Ends a Hit event cycle
+ Returns : Bio::Search::Hit::HitI object
+ Args    : type of event and associated hashref
+
+
+=cut
+
+sub end_hit{
+    my ($self,$type,$data) = @_;   
+    my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
+    #print STDERR "SREB: end_hit\n";
+
+    # I hate special cases, but this is here because NCBI BLAST XML
+    # doesn't play nice and is undergoing mutation -jason
+    if( $args{'-name'} =~ /BL_ORD_ID/ ) {
+        ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
+    }    
+    $args{'-algorithm'} =  uc( $args{'-algorithm_name'} || 
+                               $data->{'RESULT-algorithm_name'} || $type);
+    $args{'-hsps'}      = $self->{'_hsps'};
+    $args{'-query_len'} =  $data->{'RESULT-query_length'};
+    $args{'-rank'}      = $self->{'_hitcount'} + 1;
+    unless( defined $args{'-significance'} ) {
+	if( defined $args{'-hsps'} && 
+	    $args{'-hsps'}->[0] ) {
+	    $args{'-significance'} = $args{'-hsps'}->[0]->{'-evalue'};
+	}
+    }
+    my $hit = \%args;
+    $hit->{'-hsp_factory'} = $self->factory('hsp');
+    $self->_add_hit($hit);
+    $self->{'_hsps'} = [];
+    return $hit;
+}
+
+# TODO: Optionally impose hit filtering here
+sub _add_hit {
+    my ($self, $hit) = @_;
+    push @{$self->{'_hits'}}, $hit;
+    $self->{'_hitcount'} = scalar @{$self->{'_hits'}};
+}
+
+=head2 Factory methods
+
+=cut
+
+=head2 register_factory
+
+ Title   : register_factory
+ Usage   : $handler->register_factory('TYPE',$factory);
+ Function: Register a specific factory for a object type class
+ Returns : none
+ Args    : string representing the class and
+           Bio::Factory::ObjectFactoryI
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub register_factory{
+   my ($self, $type,$f) = @_;
+   if( ! defined $f || ! ref($f) || 
+       ! $f->isa('Bio::Factory::ObjectFactoryI') ) { 
+       $self->throw("Cannot set factory to value $f".ref($f)."\n");
+   }
+   $self->{'_factories'}->{lc($type)} = $f;
+}
+
+
+=head2 factory
+
+ Title   : factory
+ Usage   : my $f = $handler->factory('TYPE');
+ Function: Retrieves the associated factory for requested 'TYPE'
+ Returns : a Bio::Factory::ObjectFactoryI 
+ Throws  : Bio::Root::BadParameter if none registered for the supplied type
+ Args    : name of factory class to retrieve
+
+See L<Bio::Factory::ObjectFactoryI> for more information
+
+=cut
+
+sub factory{
+   my ($self,$type) = @_;
+   return $self->{'_factories'}->{lc($type)} || 
+       $self->throw(-class=>'Bio::Root::BadParameter',
+                    -text=>"No factory registered for $type");
+}
+
+=head2 inclusion_threshold
+
+See L<Bio::SearchIO::blast::inclusion_threshold>.
+
+=cut
+
+sub inclusion_threshold {
+    my $self = shift;
+    return $self->{'_inclusion_threshold'} = shift if @_;
+    return $self->{'_inclusion_threshold'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchWriterI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchWriterI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/SearchWriterI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+#-----------------------------------------------------------------
+# $Id: SearchWriterI.pm,v 1.10.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module Bio::SearchIO::SearchWriterI
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+=head1 NAME
+
+Bio::SearchIO::SearchWriterI - Interface for outputting parsed Search results
+
+=head1 SYNOPSIS
+
+Bio::SearchIO::SearchWriterI objects cannot be instantiated since this
+module defines a pure interface.
+
+Given an object that implements the Bio::SearchIO::SearchWriterI interface,
+you can do the following things with it:
+
+    print $writer->to_string( $result_obj, @args );
+
+=head1 DESCRIPTION
+
+This module defines abstract methods that all subclasses must implement
+to be used for outputting results from L<Bio::Search::Result::ResultI>
+objects.
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac-at-bioperl.orgE<gt>
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+
+=cut
+
+package Bio::SearchIO::SearchWriterI;
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 to_string
+
+ Purpose   : Produces data for each Search::Result::ResultI in a string.
+           : This is an abstract method. For some useful implementations,
+           : see ResultTableWriter.pm, HitTableWriter.pm, 
+           : and HSPTableWriter.pm.
+ Usage     : print $writer->to_string( $result_obj, @args );
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : @args = any additional arguments used by your implementation.
+ Returns   : String containing data for each search Result or any of its
+           : sub-objects (Hits and HSPs).
+ Throws    : n/a
+
+=cut
+
+sub to_string {
+    my ($self, $result, @args) = @_;
+    $self->throw_not_implemented;
+}
+
+=head2 start_report
+
+ Title   : start_report
+ Usage   : $self->start_report()
+ Function: The method to call when starting a report. You can override it
+           to make a custom header
+ Returns : string
+ Args    : none
+
+=cut
+
+sub start_report { return '' }
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document (</BODY></HTML>)
+           for HTML
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub end_report {  return '' }
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+# yes this is an implementation in the interface, 
+# yes it assumes that the underlying class is hash-based
+# yes that might not be a good idea, but until people
+# start extending the SearchWriterI interface I think
+# this is an okay way to go
+
+sub filter {
+    my ($self,$method,$code) = @_;    
+    return unless $method;
+    $method = uc($method);
+    if( $method ne 'HSP' &&
+	$method ne 'HIT' &&
+	$method ne 'RESULT' ) {
+	$self->warn("Unknown method $method");
+	return;
+    }
+    if( $code )  {
+	$self->throw("Must provide a valid code reference") unless ref($code) =~ /CODE/;
+	$self->{$method} = $code;
+    }
+    return $self->{$method};
+}
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/BSMLResultWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/BSMLResultWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/BSMLResultWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,342 @@
+# $Id: BSMLResultWriter.pm,v 1.4.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::Writer::BSMLResultWriter
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::Writer::BSMLResultWriter - BSML output writer
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  my $in = new Bio::SearchIO(-file   => 'result.blast',
+                             -format => 'blast');
+  my $out = new Bio::SearchIO(-output_format  => 'BSMLResultWriter',
+                              -file           => ">result.bsml");
+  while( my $r = $in->next_result ) {
+    $out->write_result($r);
+  }
+
+=head1 DESCRIPTION
+
+This is a writer to produce BSML for a search result.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::Writer::BSMLResultWriter;
+use strict;
+
+use XML::Writer;
+use IO::String;
+
+use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::Writer::BSMLResultWriter();
+ Function: Builds a new Bio::SearchIO::Writer::BSMLResultWriter object 
+ Returns : an instance of Bio::SearchIO::Writer::BSMLResultWriter
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  return $self;
+}
+
+=head2 to_string
+
+ Purpose   : Produces data for each Search::Result::ResultI in a string.
+           : This is an abstract method. For some useful implementations,
+           : see ResultTableWriter.pm, HitTableWriter.pm, 
+           : and HSPTableWriter.pm.
+ Usage     : print $writer->to_string( $result_obj, @args );
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : @args = any additional arguments used by your implementation.
+ Returns   : String containing data for each search Result or any of its
+           : sub-objects (Hits and HSPs).
+ Throws    : n/a
+
+=cut
+
+# this implementation is largely adapted from the Incogen XSLT stylesheet
+# to convert NCBI BLAST XML to BSML
+
+sub to_string {
+    my ($self,$result,$num) = @_;
+    my $str = new IO::String();
+    my $writer = new XML::Writer(OUTPUT     => $str,
+				 DATA_INDENT => 1,
+				 DATA_MODE   => 1);
+    $writer->xmlDecl('UTF-8');
+    $writer->doctype('Bsml','-//EBI//Labbook, Inc. BSML DTD//EN',
+		     'http://www.labbook.com/dtd/bsml3_1.dtd');
+    $writer->startTag('Bsml');
+    $writer->startTag('Definitions');
+    $writer->startTag('Sequences');
+    my $reporttype = $result->algorithm;
+    my ($qmoltype,$hmoltype);
+    my $hit = $result->next_hit;
+    my $hsp = $hit->next_hsp;
+    if( $hsp->query->strand == 0 ) { $qmoltype = 'aa' }
+    else { $qmoltype = 'nt' }
+    
+    if( $hsp->hit->strand == 0 ) { $hmoltype = 'aa' }
+    else { $hmoltype = 'nt' }
+	
+    $writer->startTag('Sequence',
+		      'length' => $result->query_length,
+		      'title'  => $result->query_name . " ". $result->query_description,
+		      'molecule' => $qmoltype,
+		      'representation' => 'virtual',
+		      'id'     => $result->query_name
+		      );
+    # Here we're annotating the Query sequence with hits
+    # hence the Feature-table
+    $writer->startTag('Feature-tables');
+    $writer->startTag('Feature-table',
+		      'title' => "$reporttype Result", 
+		      'class' => $reporttype);
+    my ($hitnum,$hspnum) = (1,1);
+    foreach my $hit ( $result->hits ) {	
+	$hspnum = 1;
+	foreach my $hsp ( $hit->hsps ) {
+	    $writer->startTag('Feature',
+			      'class'  => $reporttype,
+			      'value-type' => 'alignment',
+			      'title'  => $hit->name. " ". $hit->description,
+			      );
+
+	    $writer->emptyTag('Interval-loc',
+			      'startpos' => $hsp->query->start,
+			      'endpos'   => $hsp->query->end);
+	    $writer->emptyTag('Qualifier',
+			      'value-type' => 'score',
+			      'value'      => $hsp->score,
+			      );
+	    
+	    $writer->emptyTag('Qualifier',
+			      'value-type' => 'target-start',
+			      'value'      => $hsp->hit->start,
+			      );
+	    $writer->emptyTag('Qualifier',
+			      'value-type' => 'target-end',
+			      'value'      => $hsp->hit->end,
+			      );
+	    $writer->emptyTag('Link',
+			      'title' => 'alignment',
+			      'href'  => sprintf("#SPA%d.%d",$hitnum,$hspnum)
+			      );
+	    
+	    if( $hsp->hit->strand < 0 ) {
+		$writer->emptyTag('Qualifier',
+				  'value-type' => 'target-on-complement',
+				  'value'      => 1,
+				  );
+	    }
+	    $hspnum++;
+	    $writer->endTag('Feature');
+	}
+	$hitnum++;
+    }
+    $writer->endTag('Feature-table');
+    $writer->endTag('Feature-tables');
+    $writer->endTag('Sequence');
+    $writer->endTag('Sequences');
+
+    $writer->startTag('Tables');
+    $writer->startTag('Sequence-search-table',
+		      'search-type' => $reporttype,
+		      'query-length' => $result->query_length);
+    $hitnum = $hspnum = 1;
+    foreach my $hit ( $result->hits ) {
+	$hspnum = 1;
+	foreach my $hsp ( $hit->hsps ) {
+	    $writer->startTag('Seq-pair-alignment',
+			      'id' => sprintf("SPA%d.%d",$hitnum,$hspnum),
+			      'method'       => join(' ',$result->algorithm), 
+			      'compxref'     => sprintf("%s:%s",
+						'',$result->query_name),
+			      'refxref'      => sprintf("%s:%s",
+							$result->database_name,
+							$hit->name),
+			      'refseq'       => $hit->name,
+			      'title'        => $result->query_name,
+			      'compseq'      => $result->query_name,
+			      'compcaption'  => $result->query_name . ' ' .
+			                         $result->query_description,
+			      'refcaption'   => $hit->name . " ". 
+                                                 $hit->description,
+			      'totalscore'   => $hsp->score,
+			      'refstart'     => $hsp->query->start,
+			      'refend'       => $hsp->query->end,
+			      'compstart'    => $hsp->hit->start,
+			      'compend'      => $hsp->hit->end,
+			      'complength'   => $hit->length,
+			      'reflength'    => $result->query_length);
+
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hit-num',
+			      'content' => $hitnum);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hit-id',
+			      'content' => $hit->name);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hsp-num',
+			      'content' => $hspnum);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hsp-bit-score',
+			      'content' => $hsp->bits);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hsp-evalue',
+			      'content' => $hsp->evalue);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'pattern-from',
+			      'content' => 0);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'pattern-to',
+			      'content' => 0);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'query-frame',
+			      'content' => $hsp->query->frame);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hit-frame',
+			      'content' => $hsp->hit->frame * $hsp->hit->strand);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'percent_identity',
+			      'content' => sprintf("%.2f",$hsp->percent_identity));
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'percent_similarity',
+			      'content' => sprintf("%.2f",$hsp->frac_conserved('total') * 100));	    
+	    my $cons = $hsp->frac_conserved('total') * $hsp->length('total');
+	    my $ident = $hsp->frac_identical('total') * $hsp->length('total');
+	    
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'identity',
+			      'content' => $ident);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'positive',
+			      'content' => $cons);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'gaps',
+			      'content' => $hsp->gaps('total'));
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'align-len',
+			      'content' => $hsp->length('total'));
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'density',
+			      'content' => 0);
+	    $writer->emptyTag('Attribute',
+			      'name'    => 'hit-len',
+			      'content' => $hit->length);
+	    my @extrafields;
+
+	    $writer->emptyTag('Seq-pair-run',
+			      'runlength'     => $hsp->hit->length,
+			      'comprunlength' => $hsp->hsp_length,
+			      'complength'    => $hsp->hit->length,
+			      'compcomplement'=> $hsp->hit->strand < 0 ? 1 :0,
+			      'refcomplement' => $hsp->query->strand < 0 ? 1 :0,
+			      'refdata'       => $hsp->query_string,
+			      'compdata'      => $hsp->hit_string,
+			      'alignment'     => $hsp->homology_string,
+			      );
+	    $hspnum++;
+	    $writer->endTag('Seq-pair-alignment');
+	}
+	$hitnum++;
+    }
+    $writer->endTag('Sequence-search-table');
+    $writer->endTag('Tables');
+    
+    $writer->startTag('Research');
+    $writer->startTag('Analyses');
+    $writer->startTag('Analysis');
+    $writer->emptyTag('Attribute',
+		      'name'    => 'program',
+		      'content' => $reporttype);
+    $writer->emptyTag('Attribute',
+		      'name'    => 'version',
+		      'content' => join(' ',$reporttype, 
+					$result->algorithm_version));
+    $writer->emptyTag('Attribute',
+		      'name'     => 'reference',
+		      'content'  => $result->algorithm_reference);
+    $writer->emptyTag('Attribute',
+		      'name'     => 'db',
+		      'content'  => $result->database_name);
+    $writer->emptyTag('Attribute',
+		      'name'     => 'db-size',
+		      'content'  => $result->database_entries);
+    $writer->emptyTag('Attribute',
+		      'name'     => 'db-length',
+		      'content'  => $result->database_letters);
+    # $writer->emptyTag('Attribute',
+    # 'name'     => 'iter-num',
+    # 'content'  => $result->iteration_num);
+    foreach my $attr ( $result->available_parameters ) {
+	$writer->emptyTag('Attribute',
+			  'name'     => $attr,
+			  'content'  => $result->get_parameter($attr));
+    }
+    foreach my $attr ( $result->available_statistics ) {
+	$writer->emptyTag('Attribute',
+			  'name'     => $attr,
+			  'content'  => $result->get_statistic($attr));
+    }
+    $writer->endTag('Analysis');    
+    $writer->endTag('Analyses');    
+    $writer->endTag('Research');
+    
+    $writer->endTag('Definitions');   
+    $writer->endTag('Bsml');   
+    $writer->end();
+    return ${$str->string_ref};
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/GbrowseGFF.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/GbrowseGFF.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/GbrowseGFF.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,444 @@
+#-----------------------------------------------------------------
+# $Id: GbrowseGFF.pm,v 1.15.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module Bio::SearchIO::Writer::GbrowseGFF.pm
+#
+# Cared for by Mark Wilkinson <markw at illuminae.com>
+#
+# You may distribute this module under the same terms as perl itself
+#-----------------------------------------------------------------
+
+=head1 NAME
+
+Bio::SearchIO::Writer::GbrowseGFF - Interface for outputting parsed search results in Gbrowse GFF format
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  my $in = new Bio::SearchIO(-file   => 'result.blast',      
+                             -format => 'blast');
+  my $out = new Bio::SearchIO(-output_format  => 'GbrowseGFF',
+                              -output_cigar   => 1,
+                              -output_signif  => 1,
+                              -file           => ">result.gff");
+  while( my $r = $in->next_result ) {
+    $out->write_result($r);
+  }
+
+=head1 DESCRIPTION
+
+This writer produces Gbrowse flavour GFF from a Search::Result object.
+
+=head1 AUTHOR  Mark Wilkinson
+
+Email markw-at-illuminae-dot-com
+
+=head1 CONTRIBUTORS
+
+Susan Miller sjmiller at email-DOT-arizon-DOT-edu
+Jason Stajich jason at bioperl-dot-org
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::SearchIO::Writer::GbrowseGFF;
+use vars qw(%Defaults);
+use strict;
+
+$Defaults{'Prefix'}   = 'EST';
+$Defaults{'HSPTag'}   = 'HSP';
+$Defaults{'MatchTag'} = 'match';
+
+use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::Writer::GbrowseGFF(@args);
+ Function: Builds a new Bio::SearchIO::Writer::GbrowseGFF object 
+ Returns : an instance of Bio::SearchIO::Writer::GbrowseGFF
+ Args    :  -e_value => 10   : set e_value parsing cutoff (default undef)
+            (note the -e_value flag is deprecated.)
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    ($self->{'_evalue'},
+     $self->{'_cigar'},
+     $self->{'_prefix'},
+     $self->{'signif'} ) = $self->_rearrange([qw(E_VALUE OUTPUT_CIGAR PREFIX
+						 OUTPUT_SIGNIF)], @args);
+    $self->{'_evalue'} && warn( 'use of the -e_value argument is deprecated.  In future, use $writer->filter("type", \&code)  instead.\n\tparsing will proceed correctly with this e_value\n');
+    $self->{Gbrowse_HSPID} = 0;
+    $self->{Gbrowse_HITID} = 0;
+    $self->{'_prefix'} ||= $Defaults{'Prefix'};
+    return $self;
+}
+
+sub _incrementHSP {
+    my ($self) = @_;
+    return ++$self->{Gbrowse_HSPID};
+}
+
+sub _incrementHIT {
+    my ($self) = @_;
+    return ++$self->{Gbrowse_HITID}
+}
+# according to the GFF3 spec:
+#"match".  In addition to the generic "match"
+#type, there are the subclasses "cDNA_match," "EST_match,"
+#"translated_nucleotide_match," "nucleotide_to_protein_match," and
+#"nucleotide_motif."
+
+=head2 to_string
+
+ Purpose   : Produce the Gbrowse format GFF lines for a Result
+ Usage     : print $writer->to_string( $result_obj, @args);
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+             -version => 1|2|2.5|3  ; the GFF format you want to output (default 3)
+             -match_tag => match|cDNA_match|EST_match|translated_nucleotide_match
+                           nucleotide_to_protein_match|nucleotide_motif
+                           This is the SO term to be placed in GFF column 3.
+             -prefix => String to prefix the group by, default is EST 
+                        (see %Defaults class variable) A default can also
+                        be set on object init
+ Returns   : String containing data for each search Result or any of its
+           : sub-objects (Hits and HSPs).
+ Throws    : n/a
+
+=cut
+
+             #-reference => 'hit'|'query' ; whether the hit sequence name or the
+             #                              query sequence name is used as the
+             #                              'reference' sequence (GFF column 1)
+
+sub to_string {
+    my ($self, $result, @args) = @_;
+    my ($format, $reference, 
+	$match_tag,$hsp_tag,
+	$prefix) = $self->_rearrange([qw
+				      (VERSION 
+				       REFERENCE 
+				       MATCH_TAG HSP_TAG
+				       PREFIX)], @args);
+    warn $reference if $reference;
+    $reference ||='hit'; # default is that the hit sequence (db sequence) becomes the reference sequence.  I think this is fairly typical...
+    $match_tag ||= $Defaults{'MatchTag'}; # default is the generic 'match' tag.
+    $hsp_tag   ||= $Defaults{'HSPTag'}; # default is the generic 'hsp' tag.
+    $prefix    ||= $self->{'_prefix'};
+    $self->throw("$reference must be one of 'query', or 'hit'\n") unless $reference;
+    
+    #*************  THIS IS WHERE I STOPPED  ****************   
+    # *****************************************************
+    #*************************************************
+    
+    $format ||='3';
+    my $gffio = Bio::Tools::GFF->new(-gff_version => $format); # try to set it
+    
+    # just in case that behaviour changes (at the moment, an invalid format throws an exception, but it might return undef in the future
+    return "" unless defined $gffio;  # be kind and don't return undef in case the person is putting teh output directly into a printstatement without testing it
+    # now $gffio is either false, or a valid GFF formatter
+
+    my ($GFF,$cigar,$score);
+    my ($resultfilter,$hitfilter,$hspfilter) = (
+        $self->filter('RESULT'),
+        $self->filter('HIT'),
+        $self->filter('HSP'));
+	$result->can('rewind') &&  $result->rewind(); # ensure we're at the beginning
+    next if (defined $resultfilter && ! (&{$resultfilter}($result)) );
+
+    while( my $hit = $result->next_hit ) {
+        
+        if (defined $self->{_evalue}){
+            next unless ($hit->significance < $self->{_evalue});
+        }
+        next if( defined $hitfilter && ! &{$hitfilter}($hit) ); # test against filter code
+
+        my $refseq  = $reference eq 'hit' ? $hit->name : $result->query_name;
+        my $seqname = $reference eq 'hit' ? $result->query_name : $hit->name;  # hopefully this will be a simple identifier without a full description line!!
+	if ($self->{_signif}) {
+	    $score = $hit->significance;
+	} else {
+	    $score = $hit->raw_score;
+	}
+	$self->throw("No reference sequence name found in hit; required for GFF (this may not be your fault if your report type does not include reference sequence names)\n") unless $refseq;
+        my $source = $hit->algorithm;
+        $self->throw("No algorithm name found in hit; required for GFF (this may not be your fault if your report type does not include algorithm names)\n") unless $refseq;
+        $self->throw("This module only works on BLASTN reports at this time.  Sorry.\n") unless $source eq "BLASTN";
+        
+        my @plus_hsps;
+        my @minus_hsps;
+        
+        # pre-process the HSP's because we later need to know
+        # the extents of the plus and munus strand
+        # on both the subject and query strands individually
+        my ($qpmin, $qpmax, $qmmin, $qmmax, $spmin, $spmax, $smmin, $smmax); # variables for the plus/minus strand min start and max end to know the full extents of the hit
+        while( my $hsp = $hit->next_hsp ) {
+            if ( defined $self->{_evalue} ) {  
+                # for backward compatibility only
+                next unless ($hsp->significance < $self->{_evalue});
+	    }
+            next if( defined $hspfilter && ! &{$hspfilter}($hsp) ); # test against HSP filter
+            if ($hsp->hit->strand >= 0 ){
+                push @plus_hsps, $hsp;
+                if (defined $qpmin){  # set or reset the minimum and maximum extent of the plus-strand hit
+                    $qpmin = $hsp->query->start if $hsp->query->start < $qpmin;
+                    $qpmax = $hsp->query->end if $hsp->query->end > $qpmax;
+                    $spmin = $hsp->hit->start if $hsp->hit->start < $spmin;
+                    $spmax = $hsp->hit->end if $hsp->hit->end > $spmax;
+                } else {
+                    $qpmin = $hsp->query->start;
+                    $qpmax = $hsp->query->end;
+                    $spmin = $hsp->hit->start;
+                    $spmax = $hsp->hit->end;
+                }
+            } 
+            if ($hsp->hit->strand < 0 ){
+                push @minus_hsps, $hsp;
+                if (defined $qmmin){ # set or reset the minimum and maximum extent of the minus-strand hit
+                    $qmmin = $hsp->query->start if $hsp->query->start < $qmmin;
+                    $qmmax = $hsp->query->end if $hsp->query->end > $qmmax;
+                    $smmin = $hsp->hit->start if $hsp->hit->start < $smmin;
+                    $smmax = $hsp->hit->end if $hsp->hit->end > $smmax;
+                } else {
+                    $qmmin = $hsp->query->start;
+                    $qmmax = $hsp->query->end;
+                    $smmin = $hsp->hit->start;
+                    $smmax = $hsp->hit->end;
+                }
+            }
+            #else next if there is no strand, but that makes no sense..??
+        }
+        next unless (scalar(@plus_hsps) + scalar(@minus_hsps));  # next if no hsps (??)
+        my $ID = $self->_incrementHIT();
+        # okay, write out the index line for the entire hit before 
+	# processing HSP's
+        # unfortunately (or not??), HitI objects do not implement 
+	# SeqFeatureI, so we can't just call ->gff_string
+        # as a result, this module is quite brittle to changes 
+	# in the GFF format since we are hard-coding the GFF output here :-(
+	
+        if (scalar(@plus_hsps)){
+	    my %tags = ( 'ID' => "match_sequence$ID");
+
+	    if ($format==2.5) {
+		$tags{'Target'} = "$prefix:$seqname";
+		$tags{'tstart'} = $qmmin;
+		$tags{'tend'}   = $qmmax;
+	    } else {
+		$tags{'Target'} = "$prefix:$seqname $qpmin $qpmax";
+	    }
+	    if ( $self->{'_cigar'} ) {
+		$tags{'Gap'} = $cigar;
+	    }
+            my $feat = Bio::SeqFeature::Generic->new(
+                -seq_id      => $refseq,
+                -source_tag  => $source,
+                -primary_tag => $match_tag,
+                -start       => $spmin,
+                -end         => $spmax,
+                -score       => $score,
+                -strand      => '+',
+                -frame       => '.',
+                -tag         => \%tags 
+            );
+
+
+            my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
+            $GFF .= $feat->gff_string($formatter)."\n";
+        }
+        if (scalar(@minus_hsps)){
+	    my %tags  = ( 'ID' => "match_sequence$ID");
+
+            if ($format==2.5) {
+                $tags{'Target'} = "$prefix:$seqname";
+                $tags{'tstart'} = $qpmax;
+                $tags{'tend'}   = $qpmin;
+            }
+            else {
+                $tags{'Target'} = "$prefix:$seqname $qpmax $qpmin";
+            }
+            my $feat = Bio::SeqFeature::Generic->new(
+                -seq_id      => $refseq,
+                -source_tag  => $source,
+                -primary_tag => $match_tag,
+                -start       => $smmin,
+                -end         => $smmax,
+                -score       => $score,
+                -strand      => '-',
+                -frame       => '.',
+                -tag         => \%tags 
+	    );
+
+            my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
+            $GFF .= $feat->gff_string($formatter)."\n";
+        }
+        
+        # process + strand hsps
+        foreach my $hsp (@plus_hsps){
+            my $hspID  = $self->_incrementHSP();
+            my $qstart = $hsp->query->start;
+            my $qend   = $hsp->query->end;
+            my $sstart = $hsp->hit->start;
+            my $send   = $hsp->hit->end;
+            my $score  = $hsp->score;
+	    
+	    my %tags  = ( 'ID'     => "match_hsp$hspID",
+		          'Parent' => "match_sequence$ID" );
+	    
+            if ($format==2.5) {
+                $tags{'Target'} = "$prefix:$seqname";
+                $tags{'tstart'} = $qstart;
+                $tags{'tend'}   = $qend;
+            }
+            else {
+                $tags{'Target'} = "$prefix:$seqname $qstart $qend";
+            }
+	    if ( $self->{'_cigar'} ) {
+		$tags{'Gap'} = $hsp->cigar_string;
+	    }
+
+            my $feat = Bio::SeqFeature::Generic->new(
+                -seq_id      => $refseq,
+                -source_tag  => $source,
+                -primary_tag => $hsp_tag,
+                -start       => $sstart,
+                -end         => $send,
+                -score       => $score,
+                -strand      => '+',
+                -frame       => '.',
+                -tag         => \%tags 
+            );
+
+            my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
+            $GFF .= $feat->gff_string($formatter)."\n";
+        }
+
+        foreach my $hsp (@minus_hsps) {
+            my $hspID  = $self->_incrementHSP();
+            my $qstart = $hsp->query->start;
+            my $qend   = $hsp->query->end;
+            my $sstart = $hsp->hit->start;
+            my $send   = $hsp->hit->end;
+            my $score  = $hsp->score;
+
+            my %tags  = ( 'ID'     => "match_hsp$hspID",
+                          'Parent' => "match_sequence$ID" );
+
+            if ($format==2.5) {
+                $tags{'Target'} = "$prefix:$seqname";
+                $tags{'tstart'} = $qend;
+                $tags{'tend'}   = $qstart;
+            }
+            else {
+                $tags{'Target'} = "$prefix:$seqname $qend $qstart";
+            }
+	    if ( $self->{'_cigar'} ) {
+		$tags{'Gap'} = $hsp->cigar_string;
+	    }
+
+            my $feat = Bio::SeqFeature::Generic->new(
+                -seq_id      => $refseq,
+                -source_tag  => $source,
+                -primary_tag => $hsp_tag,
+                -start       => $sstart,
+                -end         => $send,
+                -score       => $score,
+                -strand      => '-',
+                -frame       => '.',
+		-tag         => \%tags 
+            );
+
+            my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
+            $GFF .= $feat->gff_string($formatter) ."\n";
+        }
+    }
+    return $GFF;
+}
+
+sub significance_filter {
+    my ($self,$method,$code) = @_;    
+    return unless $method;
+    $method = uc($method);
+    if( $method ne 'HSP' &&
+	$method ne 'HIT' &&
+	$method ne 'RESULT' ) {
+	$self->warn("Unknown method $method");
+	return;
+    }
+    if( $code )  {
+	$self->throw("Must provide a valid code reference") unless ref($code) =~ /CODE/;
+	$self->{$method} = $code;
+    }
+    return $self->{$method};
+}
+
+=head2 start_report
+
+ Title   : start_report
+ Usage   : $self->start_report()
+ Function: has no function, returns nothing
+ Returns : empty string
+ Args    : none
+
+=cut
+
+sub start_report { return '' }
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: has no function, returns nothing
+ Returns : empty string
+ Args    : none
+
+
+=cut
+
+sub end_report {  return '' }
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+ Note    : GbrowseGFF.pm makes no changes to the default filter code
+
+
+=cut
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HSPTableWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HSPTableWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HSPTableWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,287 @@
+# $Id: HSPTableWriter.pm,v 1.17.4.1 2006/10/02 23:10:27 sendu Exp $
+
+=head1 NAME
+
+Bio::SearchIO::Writer::HSPTableWriter - Tab-delimited data for Bio::Search::HSP::HSPI objects
+
+=head1 SYNOPSIS
+
+=head2 Example 1: Using the default columns
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::HSPTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::HSPTableWriter->new();
+
+    my $out = Bio::SearchIO->new( -writer => $writer );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Example 2: Specifying a subset of columns 
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::HSPTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::HSPTableWriter->new( 
+                                  -columns => [qw(
+                                                  query_name
+                                                  query_length
+                                                  hit_name
+                                                  hit_length
+                                                  rank
+                                                  frac_identical_query
+                                                  expect
+                                                  )]  );
+
+    my $out = Bio::SearchIO->new( -writer => $writer,
+				  -file   => ">searchio.out" );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Custom Labels
+
+You can also specify different column labels if you don't want to use
+the defaults.  Do this by specifying a C<-labels> hash reference
+parameter when creating the HSPTableWriter object.  The keys of the
+hash should be the column number (left-most column = 1) for the label(s)
+you want to specify. Here's an example:
+
+    my $writer = Bio::SearchIO::Writer::HSPTableWriter->new( 
+                               -columns => [qw( query_name 
+                                                query_length
+                                                hit_name
+                                                hit_length  )],
+                               -labels  => { 1 => 'QUERY_GI',
+  	                                     3 => 'HIT_IDENTIFIER' } );
+
+
+=head1 DESCRIPTION
+
+Bio::SearchIO::Writer::HSPTableWriter generates output at the finest
+level of granularity for data within a search result. Data for each HSP
+within each hit in a search result is output in tab-delimited format,
+one row per HSP.
+
+=head2 Available Columns
+
+Here are the columns that can be specified in the C<-columns>
+parameter when creating a HSPTableWriter object.  If a C<-columns> parameter
+is not specified, this list, in this order, will be used as the default.
+
+    query_name             # Sequence identifier of the query.
+    query_length           # Full length of the query sequence
+    hit_name               # Sequence identifier of the hit
+    hit_length             # Full length of the hit sequence
+    round                  # Round number for hit (PSI-BLAST)
+    rank
+    expect                 # Expect value for the alignment
+    score                  # Score for the alignment (e.g., BLAST score)
+    bits                   # Bit score for the alignment
+    frac_identical_query   # fraction of identical substitutions in query
+    frac_identical_hit     # fraction of identical substitutions in hit
+    frac_conserved_query   # fraction of conserved substitutions in query
+    frac_conserved_hit     # fraction of conserved substitutions in hit
+    length_aln_query       # Length of the aligned portion of the query sequence
+    length_aln_hit         # Length of the aligned portion of the hit sequence
+    gaps_query             # Number of gaps in the aligned query sequence
+    gaps_hit               # Number of gaps in the aligned hit sequence
+    gaps_total             # Number of gaps in the aligned query and hit sequences
+    start_query            # Starting coordinate of the aligned portion of the query sequence
+    end_query              # Ending coordinate of the aligned portion of the query sequence
+    start_hit              # Starting coordinate of the aligned portion of the hit sequence
+    end_hit                # Ending coordinate of the aligned portion of the hit sequence
+    strand_query           # Strand of the aligned query sequence
+    strand_hit             # Strand of the aligned hit sequence
+    frame                  # Reading frame of the aligned query sequence 
+    hit_description        # Full description of the hit sequence
+    query_description      # Full description of the query sequence
+
+For more details about these columns, see the documentation for the
+corresponding method in Bio::Search::HSP::HSPI.
+
+=head1 TODO
+
+Figure out the best way to incorporate algorithm-specific score columns.
+The best route is probably to have algorith-specific subclasses 
+(e.g., BlastHSPTableWriter, FastaHSPTableWriter).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
+and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+    Bio::SearchIO::Writer::HitTableWriter
+    Bio::SearchIO::Writer::ResultTableWriter
+
+=head1 METHODS
+
+=cut
+
+package Bio::SearchIO::Writer::HSPTableWriter;
+
+use strict;
+
+use base qw(Bio::SearchIO::Writer::ResultTableWriter);
+
+
+# Array fields: column, object, method[/argument], printf format, column label
+# Methods for result object are defined in Bio::Search::Result::ResultI.
+# Methods for hit object are defined in Bio::Search::Hit::HitI.
+# Methods for hsp object are defined in Bio::Search::HSP::HSPI.
+# Tech note: If a bogus method is supplied, it will result in all values to be zero.
+#            Don't know why this is.
+# TODO (maybe): Allow specification of signif_format (i.e., separate mantissa/exponent)
+my %column_map = (
+                  'query_name'            => ['1', 'result', 'query_name', 's', 'QUERY' ],
+                  'query_length'          => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
+                  'hit_name'              => ['3', 'hit', 'name', 's', 'HIT'],
+                  'hit_length'            => ['4', 'hit', 'hit_length', 'd', 'LEN_H'],
+                  'round'                 => ['5', 'hit', 'iteration', 'd', 'ROUND', 'hit'],
+                  'rank'                  => ['6', 'hsp', 'rank', 'd', 'RANK'],
+                  'expect'                => ['7', 'hsp', 'expect', '.1e', 'EXPCT'],
+                  'score'                 => ['8', 'hsp', 'score', 'd', 'SCORE'],
+                  'bits'                  => ['9', 'hsp', 'bits', 'd', 'BITS'],
+                  'frac_identical_query'  => ['10', 'hsp', 'frac_identical/query', '.2f', 'FR_IDQ'],
+                  'frac_identical_hit'    => ['11', 'hsp', 'frac_identical/hit', '.2f', 'FR_IDH'],
+                  'frac_conserved_query'  => ['12', 'hsp', 'frac_conserved/query', '.2f', 'FR_CNQ'],
+                  'frac_conserved_hit'    => ['13', 'hsp', 'frac_conserved/hit', '.2f', 'FR_CNH'],
+                  'length_aln_query'      => ['14', 'hsp', 'length/query', 'd', 'LN_ALQ'],
+                  'length_aln_hit'        => ['15', 'hsp', 'length/hit', 'd', 'LN_ALH'],
+                  'gaps_query'            => ['16', 'hsp', 'gaps/query', 'd', 'GAPS_Q'],
+                  'gaps_hit'              => ['17', 'hsp', 'gaps/hit', 'd', 'GAPS_H'],
+                  'gaps_total'            => ['18', 'hsp', 'gaps/total', 'd', 'GAPS_QH'],
+                  'start_query'           => ['19', 'hsp', 'start/query', 'd', 'START_Q'],
+                  'end_query'             => ['20', 'hsp', 'end/query', 'd', 'END_Q'],
+                  'start_hit'             => ['21', 'hsp', 'start/hit', 'd', 'START_H'],
+                  'end_hit'               => ['22', 'hsp', 'end/hit', 'd', 'END_H'],
+                  'strand_query'          => ['23', 'hsp', 'strand/query', 'd', 'STRND_Q'],
+                  'strand_hit'            => ['24', 'hsp', 'strand/hit', 'd', 'STRND_H'],
+                  'frame'                 => ['25', 'hsp', 'frame', 's', 'FRAME'],
+                  'hit_description'       => ['26', 'hit', 'hit_description', 's', 'DESC_H'],
+                  'query_description'     => ['27', 'result', 'query_description', 's', 'DESC_Q'],
+                 );
+
+sub column_map { return %column_map }
+
+
+=head2 to_string()
+
+Note: this method is not intended for direct use. 
+The SearchIO::write_result() method calls it automatically 
+if the writer is hooked up to a SearchIO object as illustrated in
+L<the SYNOPSIS section | SYNOPSIS>.
+
+ Title     : to_string()
+           :
+ Usage     : print $writer->to_string( $result_obj, [$include_labels] );
+           :
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : $include_labels = boolean, if true column labels are included (default: false)
+           :
+ Returns   : String containing tab-delimited set of data for each HSP
+           : in each Hit of the supplied ResultI object. 
+           :
+ Throws    : n/a
+
+=cut
+
+sub to_string {
+    my ($self, $result, $include_labels) = @_;
+    
+    my $str = $include_labels ? $self->column_labels() : '';
+    my ($resultfilter,$hitfilter,
+	$hspfilter) = ( $self->filter('RESULT'),
+			$self->filter('HIT'),
+			$self->filter('HSP'));
+    if( ! defined $resultfilter || &{$resultfilter}($result) ) {
+	my $func_ref = $self->row_data_func;
+	my $printf_fmt = $self->printf_fmt;
+	$result->can('rewind') && 
+	    $result->rewind(); # insure we're at the beginning
+	while( my $hit = $result->next_hit) {
+	    next if( defined $hitfilter && ! &{$hitfilter}($hit) );
+	    $hit->can('rewind') && $hit->rewind;# insure we're at the beginning
+	    while(my $hsp = $hit->next_hsp) {
+		next if ( defined $hspfilter && ! &{$hspfilter}($hsp));
+		my @row_data  = &{$func_ref}($result, $hit, $hsp);
+		$str .= sprintf "$printf_fmt\n", @row_data;
+	    }
+	}
+    }
+    $str =~ s/\t\n/\n/gs;
+    return $str;
+}
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document.  Nothing for
+           a text message.
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return '';
+}
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HTMLResultWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HTMLResultWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HTMLResultWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,895 @@
+# $Id: HTMLResultWriter.pm,v 1.38.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::Writer::HTMLResultWriter
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# Changes 2003-07-31 (jason)
+# Gary has cleaned up the code a lot to produce better looking HTML
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::Writer::HTMLResultWriter - write a Bio::Search::ResultI in HTML
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  use Bio::SearchIO::Writer::HTMLResultWriter;
+
+  my $in = new Bio::SearchIO(-format => 'blast',
+			     -file   => shift @ARGV);
+
+  my $writer = new Bio::SearchIO::Writer::HTMLResultWriter();
+  my $out = new Bio::SearchIO(-writer => $writer);
+  $out->write_result($in->next_result);
+
+
+  # to filter your output
+  my $MinLength = 100; # need a variable with scope outside the method
+  sub hsp_filter { 
+      my $hsp = shift;
+      return 1 if $hsp->length('total') > $MinLength;
+  }
+  sub result_filter { 
+      my $result = shift;
+      return $hsp->num_hits > 0;
+  }
+
+  my $writer = new Bio::SearchIO::Writer::HTMLResultWriter
+                     (-filters => { 'HSP' => \&hsp_filter} );
+  my $out = new Bio::SearchIO(-writer => $writer);
+  $out->write_result($in->next_result);
+
+  # can also set the filter via the writer object
+  $writer->filter('RESULT', \&result_filter);
+
+=head1 DESCRIPTION
+
+This object implements the SearchWriterI interface which will produce
+a set of HTML for a specific L<Bio::Search::Report::ReportI interface>.
+
+See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Gary Williams G.Williams at hgmp.mrc.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::SearchIO::Writer::HTMLResultWriter;
+use strict;
+use vars qw(%RemoteURLDefault
+            $MaxDescLen $DATE $AlignmentLineWidth $Revision);
+
+# Object preamble - inherits from Bio::Root::RootI
+
+BEGIN {
+    $Revision = '$Id: HTMLResultWriter.pm,v 1.38.4.1 2006/10/02 23:10:27 sendu Exp $';
+    $DATE = localtime(time);
+    %RemoteURLDefault = ( 
+      'PROTEIN' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein&cmd=search&term=%s',			  
+      'NUCLEOTIDE' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=%s'
+    );
+    $MaxDescLen = 60;
+    $AlignmentLineWidth = 60;
+}
+
+
+use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::Writer::HTMLResultWriter();
+ Function: Builds a new Bio::SearchIO::Writer::HTMLResultWriter object 
+ Returns : Bio::SearchIO::Writer::HTMLResultWriter
+ Args    : -filters => hashref with any or all of the keys (HSP HIT RESULT)
+           which have values pointing to a subroutine reference
+           which will expect to get a 
+           -nucleotide_url => URL sprintf string base for the nt sequences
+           -protein_url => URL sprintf string base for the aa sequences
+           -no_wublastlinks => boolean. Do not display WU-BLAST lines 
+                               even if they are parsed out.
+                               Links = (1) 
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($p,$n,$filters,
+      $nowublastlinks) = $self->_rearrange([qw(PROTEIN_URL 
+					       NUCLEOTIDE_URL 
+					       FILTERS
+					       NO_WUBLASTLINKS)], at args);
+  $self->remote_database_url('p',$p || $RemoteURLDefault{'PROTEIN'});
+  $self->remote_database_url('n',$n || $RemoteURLDefault{'NUCLEOTIDE'});
+  $self->no_wublastlinks(! $nowublastlinks);
+  if( defined $filters ) {
+      if( !ref($filters) =~ /HASH/i ) { 
+	  $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
+      } else { 
+	  while( my ($type,$code) = each %{$filters} ) {
+	      $self->filter($type,$code);
+	  }
+      }
+  }
+
+  return $self;
+}
+
+=head2 remote_database_url
+
+ Title   : remote_database_url
+ Usage   : $obj->remote_database_url($type,$newval)
+ Function: This should return or set a string that contains a %s which can be
+           filled in with sprintf.
+ Returns : value of remote_database_url
+ Args    : $type - 'PROTEIN' or 'P' for protein URLS
+                   'NUCLEOTIDE' or 'N' for nucleotide URLS
+           $value - new value to set [optional]
+
+
+=cut
+
+sub remote_database_url{
+   my ($self,$type,$value) = @_;
+   if( ! defined $type || $type !~ /^(P|N)/i ) { 
+       $self->warn("Must provide a type (PROTEIN or NUCLEOTIDE)");
+       return '';
+   }
+   $type = uc $1;
+   if( defined $value) {
+      $self->{'remote_database_url'}->{$type} = $value;
+    }
+   return $self->{'remote_database_url'}->{$type};
+}
+
+=head2 to_string
+
+ Purpose   : Produces data for each Search::Result::ResultI in a string.
+           : This is an abstract method. For some useful implementations,
+           : see ResultTableWriter.pm, HitTableWriter.pm, 
+           : and HSPTableWriter.pm.
+ Usage     : print $writer->to_string( $result_obj, @args );
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : @args = any additional arguments used by your implementation.
+ Returns   : String containing data for each search Result or any of its
+           : sub-objects (Hits and HSPs).
+ Throws    : n/a
+
+=cut
+
+sub to_string {
+    my ($self,$result,$num) = @_; 
+    $num ||= 0;
+    return unless defined $result;
+    my $links = $self->no_wublastlinks;
+    my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
+						  $self->filter('HIT'),
+						  $self->filter('HSP') );
+    return '' if( defined $resultfilter && ! &{$resultfilter}($result) );    
+
+    my ($qtype,$dbtype,$dbseqtype,$type);
+    my $alg = $result->algorithm;
+
+    # This is actually wrong for the FASTAs I think
+    if(  $alg =~ /T(FAST|BLAST)([XY])/i ) {
+	$qtype      = $dbtype = 'translated';
+	$dbseqtype = $type       = 'PROTEIN';
+    } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
+	$qtype      = '';
+	$dbtype     = 'translated';
+	$type       = 'PROTEIN';
+	$dbseqtype  = 'NUCLEOTIDE';
+    } elsif( $alg =~ /(FAST|BLAST)N/i || 
+	     $alg =~ /(WABA|EXONERATE)/i ) {
+	$qtype      = $dbtype = '';
+	$type = $dbseqtype  = 'NUCLEOTIDE';
+    } elsif( $alg =~ /(FAST|BLAST)P/  || 
+	     $alg =~ /SSEARCH|HMM(PFAM|SEARCH)/i ) {
+	$qtype      = $dbtype = '';
+	$type = $dbseqtype  = 'PROTEIN';
+    } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
+	$qtype      = 'translated';
+        $dbtype     = 'PROTEIN';
+	$dbseqtype  = $type      = 'PROTEIN';
+    } else { 
+	$self->warn("algorithm was ", $result->algorithm, " couldn't match\n");
+    }
+    
+    
+    my %baselens = ( 'Sbjct:'   => ( $dbtype eq 'translated' )  ? 3 : 1,
+		     'Query:'   => ( $qtype  eq 'translated' )  ? 3 : 1);
+
+    my $str;
+    if( ! defined $num || $num <= 1 ) { 
+	$str = &{$self->start_report}($result);
+    }
+
+    $str .= &{$self->title}($result);
+
+    $str .= $result->algorithm_reference || $self->algorithm_reference($result);
+    $str .= &{$self->introduction}($result);
+
+    $str .= "<table border=0>
+            <tr><th>Sequences producing significant alignments:</th>
+            <th>Score<br>(bits)</th><th>E<br>value</th></tr>";
+
+    my $hspstr = '<p><p>';
+    if( $result->can('rewind')) {
+        $result->rewind(); # support stream based parsing routines
+    }
+
+    while( my $hit = $result->next_hit ) {
+	next if( $hitfilter && ! &{$hitfilter}($hit) );
+	my $nm = $hit->name();
+	
+	$self->debug( "no $nm for name (".$hit->description(). "\n") 
+	    unless $nm;
+	my ($gi,$acc) = &{$self->id_parser}($nm);
+	my $p = "%-$MaxDescLen". "s";
+	my $descsub;
+	if( length($hit->description) > ($MaxDescLen - 3) ) {
+	    $descsub = sprintf($p,
+		substr($hit->description,0,$MaxDescLen-3) . "...");
+	} else { 
+	    $descsub = sprintf($p,$hit->description);
+	}
+
+	my $url_desc  = &{$self->hit_link_desc()}($self,$hit, $result);
+	my $url_align = &{$self->hit_link_align()}($self,$hit, $result);
+
+	my @hsps = $hit->hsps;
+	
+	if( ! @hsps ) {
+	    # no HSPs so no link 
+	    $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td>%.2g</td></tr>'."\n",
+			    $url_desc, $descsub, 
+			    ($hit->raw_score ? $hit->raw_score : 
+			     (defined $hsps[0] ? $hsps[0]->score : ' ')),
+			    ( $hit->significance ? $hit->significance :
+			      (defined $hsps[0] ? $hsps[0]->evalue : ' ')) 
+			    );
+	} else { 
+	    # failover to first HSP if the data does not contain a 
+	    # bitscore/significance value for the Hit (NCBI XML data for one)
+
+	    $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td><a href="#%s">%.2g</a></td></tr>'."\n",
+			    $url_desc, $descsub, 
+			    ($hit->raw_score ? $hit->raw_score : 
+			     (defined $hsps[0] ? $hsps[0]->score : ' ')),
+			    $acc,
+			    ( $hit->significance ? $hit->significance :
+			      (defined $hsps[0] ? $hsps[0]->evalue : ' ')) 
+			    );
+	    $hspstr .= "<a name=\"$acc\">\n".
+		sprintf("><b>%s</b> %s\n<dd>Length = %s</dd><p>\n\n", $url_align, 
+			defined $hit->description ? $hit->description : '', 
+			&_numwithcommas($hit->length));
+	    my $ct = 0;
+	    foreach my $hsp (@hsps ) {
+		next if( $hspfilter && ! &{$hspfilter}($hsp) );
+		$hspstr .= sprintf(" Score = %s bits (%s), Expect = %s",
+				   $hsp->bits || $hsp->score, 
+				   $hsp->score || $hsp->bits, 
+				   $hsp->evalue || '');
+		if( defined $hsp->pvalue ) {
+		    $hspstr .= ", P = ".$hsp->pvalue;
+		}
+		$hspstr .= "<br>\n";
+		$hspstr .= sprintf(" Identities = %d/%d (%d%%)",
+				   ( $hsp->frac_identical('total') * 
+				     $hsp->length('total')),
+				   $hsp->length('total'),
+				   $hsp->frac_identical('total') * 100);
+
+		if( $type eq 'PROTEIN' ) {
+		    $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
+				       ( $hsp->frac_conserved('total') * 
+					 $hsp->length('total')),
+				       $hsp->length('total'),
+				       $hsp->frac_conserved('total') * 100);
+		}
+		if( $hsp->gaps ) {
+		    $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
+				       $hsp->gaps('total'),
+				       $hsp->length('total'),
+				       (100 * $hsp->gaps('total') / 
+					$hsp->length('total')));
+		}
+
+		my ($hframe,$qframe)   = ( $hsp->hit->frame, $hsp->query->frame);
+		my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
+		# so TBLASTX will have Query/Hit frames
+		#    BLASTX  will have Query frame
+		#    TBLASTN will have Hit frame
+		if( $hstrand || $qstrand ) {
+		    $hspstr .= ", Frame = ";
+		    my ($signq, $signh);
+		    unless( $hstrand ) {
+			$hframe = undef;
+			# if strand is null or 0 then it is protein
+			# and this no frame
+		    } else { 
+			$signh = $hstrand < 0 ? '-' : '+';
+		    }
+		    unless( $qstrand  ) {
+			$qframe = undef;
+			# if strand is null or 0 then it is protein
+		    } else { 
+			$signq =$qstrand < 0 ? '-' : '+';
+		    }
+		    # remember bioperl stores frames as 0,1,2 (GFF way)
+		    # BLAST reports reports as 1,2,3 so
+		    # we have to add 1 to the frame values
+		    if( defined $hframe && ! defined $qframe) {  
+			$hspstr .= "$signh".($hframe+1);
+		    } elsif( defined $qframe && ! defined $hframe) {  
+			$hspstr .= "$signq".($qframe+1);
+		    } else { 
+			$hspstr .= sprintf(" %s%d / %s%d",
+					   $signq,$qframe+1,
+					   $signh, $hframe+1);
+		    }
+		}
+		if($links && 
+		   $hsp->can('links') && defined(my $lnks = $hsp->links) ) {
+		    $hspstr .= sprintf("<br>\nLinks = %s\n",$lnks);
+		}
+
+		$hspstr .= "</a><p>\n<pre>";
+
+		my @hspvals = ( {'name' => 'Query:',
+				 'seq'  => $hsp->query_string,
+				 'start' => ($qstrand >= 0 ? 
+					     $hsp->query->start : 
+					     $hsp->query->end),
+					     'end'   => ($qstrand >= 0 ? 
+							 $hsp->query->end : 
+							 $hsp->query->start),
+							 'index' => 0,
+							 'direction' => $qstrand || 1
+						     },
+				{ 'name' => ' 'x6,
+				  'seq'  => $hsp->homology_string,
+				  'start' => undef,
+				  'end'   => undef,
+				  'index' => 0,
+				  'direction' => 1
+				  },
+				{ 'name'  => 'Sbjct:',
+				  'seq'   => $hsp->hit_string,
+				  'start' => ($hstrand >= 0 ? 
+					      $hsp->hit->start : 
+					      $hsp->hit->end),
+					      'end'   => ($hstrand >= 0 ? 
+							  $hsp->hit->end : 
+							  $hsp->hit->start),
+							  'index' => 0, 
+							  'direction' => $hstrand || 1
+						      }
+				);	    
+
+
+		# let's set the expected length (in chars) of the starting number
+		# in an alignment block so we can have things line up
+		# Just going to try and set to the largest
+
+		my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
+						    length($hspvals[0]->{'end'}),
+						    length($hspvals[2]->{'start'}),
+						    length($hspvals[2]->{'end'}));
+		my $count = 0;
+		while ( $count <= $hsp->length('total') ) {
+		    foreach my $v ( @hspvals ) {
+			my $piece = substr($v->{'seq'}, $v->{'index'} + $count,
+					   $AlignmentLineWidth);
+			my $cp = $piece;
+			my $plen = scalar ( $cp =~ tr/\-//);
+			my ($start,$end) = ('','');
+			if( defined $v->{'start'} ) { 
+			    $start = $v->{'start'};
+			    # since strand can be + or - use the direction
+			    # to signify which whether to add or substract from end
+			    my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
+				$baselens{$v->{'name'}};
+			    if( length($piece) < $AlignmentLineWidth ) {
+				$d = (length($piece) - $plen) * $v->{'direction'} * 
+				    $baselens{$v->{'name'}};
+			    }
+			    $end   = $v->{'start'} + $d - $v->{'direction'};
+			    $v->{'start'} += $d;
+			}
+			$hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
+					   $v->{'name'},
+					   $start,
+					   $piece,
+					   $end
+					   );
+		    }
+		    $count += $AlignmentLineWidth;
+		    $hspstr .= "\n\n";
+		}
+		$hspstr .= "</pre>\n";
+	    }
+	}
+#	$hspstr .= "</pre>\n";
+    }
+
+
+    # make table of search statistics and end the web page
+    $str .= "</table><p>\n".$hspstr."<p><p><hr><h2>Search Parameters</h2><table border=1><tr><th>Parameter</th><th>Value</th>\n";
+        
+    foreach my $param ( sort $result->available_parameters ) {
+	$str .= "<tr><td>$param</td><td>". $result->get_parameter($param) ."</td></tr>\n";
+	
+    }
+    $str .= "</table><p><h2>Search Statistics</h2><table border=1><tr><th>Statistic</th><th>Value</th></tr>\n";
+    foreach my $stat ( sort $result->available_statistics ) {
+	$str .= "<tr><td>$stat</td><td>". $result->get_statistic($stat). "</td></tr>\n";
+    }
+    $str .=  "</table><P>".$self->footer() . "<P>\n";
+    return $str;
+}
+
+=head2 hit_link_desc
+
+ Title   : hit_link_desc
+ Usage   : $self->hit_link_desc(\&link_function);
+ Function: Get/Set the function which provides an HTML 
+           link(s) for the given hit to be used
+           within the description section at the top of the BLAST report.
+           This allows a person reading the report within
+           a web browser to go to one or more database entries for
+           the given hit from the description section.
+ Returns : Function reference
+ Args    : Function reference
+ See Also: L<default_hit_link_desc()>
+
+=cut
+
+sub hit_link_desc{
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_hit_link_desc'} = $code;
+    }
+    return $self->{'_hit_link_desc'} || \&default_hit_link_desc;
+}
+
+=head2 default_hit_link_desc
+
+ Title   : defaulthit_link_desc
+ Usage   : $self->default_hit_link_desc($hit, $result)
+ Function: Provides an HTML link(s) for the given hit to be used
+           within the description section at the top of the BLAST report.
+           This allows a person reading the report within
+           a web browser to go to one or more database entries for
+           the given hit from the description section.
+ Returns : string containing HTML markup "<a href...")
+
+           The default implementation returns an HTML link to the
+           URL supplied by the remote_database_url() method
+           and using the identifier supplied by the id_parser() method.
+           It will use the NCBI GI if present, and the accession if not.
+
+ Args    : First argument is a Bio::Search::Hit::HitI
+           Second argument is a Bio::Search::Result::ResultI
+
+See Also: L<hit_link_align>, L<remote_database>, L<id_parser>
+
+=cut
+
+sub default_hit_link_desc {
+    my($self, $hit, $result) = @_;
+    my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE';
+    my ($gi,$acc) = &{$self->id_parser}($hit->name);
+
+    my $url = length($self->remote_database_url($type)) > 0 ? 
+              sprintf('<a href="%s">%s</a>',
+                      sprintf($self->remote_database_url($type),$gi || $acc), 
+                      $hit->name()) :  $hit->name();
+
+    return $url;
+}
+
+
+=head2 hit_link_align
+
+ Title   : hit_link_align
+ Usage   : $self->hit_link_align(\&link_function);
+ Function: Get/Set the function which provides an HTML link(s) 
+           for the given hit to be used
+           within the HSP alignment section of the BLAST report.
+           This allows a person reading the report within
+           a web browser to go to one or more database entries for
+           the given hit from the alignment section.
+ Returns : string containing HTML markup "<a href...")
+
+           The default implementation delegates to hit_link_desc().
+
+ Args    : First argument is a Bio::Search::Hit::HitI
+           Second argument is a Bio::Search::Result::ResultI
+
+See Also: L<hit_link_desc>, L<remote_database>, L<id_parser>
+
+=cut
+
+sub hit_link_align {
+    my ($self,$code) = @_;
+    if ($code) {
+        $self->{'_hit_link_align'} = $code;
+    }
+    return $self->{'_hit_link_align'} || \&default_hit_link_desc;
+}
+
+=head2 start_report
+
+  Title   : start_report
+  Usage   : $index->start_report( CODE )
+  Function: Stores or returns the code to
+            write the start of the <HTML> block, the <TITLE> block
+            and the start of the <BODY> block of HTML.   Useful
+            for (for instance) specifying alternative
+            HTML if you are embedding the output in
+            an HTML page which you have already started.
+            (For example a routine returning a null string).
+            Returns \&default_start_report (see below) if not
+            set. 
+  Example : $index->start_report( \&my_start_report )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub start_report {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_start_report'} = $code;
+    }
+    return $self->{'_start_report'} || \&default_start_report;
+}
+
+=head2 default_start_report
+
+ Title   : default_start_report
+ Usage   : $self->default_start_report($result)
+ Function: The default method to call when starting a report.
+ Returns : sting
+ Args    : First argument is a Bio::Search::Result::ResultI
+
+=cut
+
+sub default_start_report {
+    my ($result) = @_;
+    return sprintf(
+    qq{<HTML>
+      <HEAD> <CENTER><TITLE>Bioperl Reformatted HTML of %s output with Bioperl Bio::SearchIO system</TITLE></CENTER></HEAD>
+      <!------------------------------------------------------------------->
+      <!-- Generated by Bio::SearchIO::Writer::HTMLResultWriter          -->
+      <!-- %s -->
+      <!-- http://bioperl.org                                            -->
+      <!------------------------------------------------------------------->
+      <BODY BGCOLOR="WHITE">
+    },$result->algorithm,$Revision);
+    
+}
+
+=head2 title
+
+ Title   : title
+ Usage   : $self->title($CODE)
+
+  Function: Stores or returns the code to provide HTML for the given
+            BLAST report that will appear at the top of the BLAST report
+            HTML output.  Useful for (for instance) specifying
+            alternative routines to write your own titles.
+            Returns \&default_title (see below) if not
+            set. 
+  Example : $index->title( \&my_title )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub title {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_title'} = $code;
+    }
+    return $self->{'_title'} || \&default_title;
+}
+
+=head2 default_title
+
+ Title   : default_title
+ Usage   : $self->default_title($result)
+ Function: Provides HTML for the given BLAST report that will appear
+           at the top of the BLAST report HTML output.
+ Returns : string containing HTML markup
+           The default implementation returns <CENTER> <H1> HTML
+           containing text such as:
+           "Bioperl Reformatted HTML of BLASTP Search Report
+                     for gi|1786183|gb|AAC73113.1|"
+ Args    : First argument is a Bio::Search::Result::ResultI
+
+=cut
+
+sub default_title {
+    my ($result) = @_;
+
+    return sprintf(
+        qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
+		    $result->algorithm,
+		    $result->query_name());
+}
+
+
+=head2 introduction
+
+ Title   : introduction
+ Usage   : $self->introduction($CODE)
+
+  Function: Stores or returns the code to provide HTML for the given
+            BLAST report detailing the query and the
+            database information.
+            Useful for (for instance) specifying
+            routines returning alternative introductions.
+            Returns \&default_introduction (see below) if not
+            set. 
+  Example : $index->introduction( \&my_introduction )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub introduction {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_introduction'} = $code;
+    }
+    return $self->{'_introduction'} || \&default_introduction;
+}
+
+=head2 default_introduction
+
+ Title   : default_introduction
+ Usage   : $self->default_introduction($result)
+ Function: Outputs HTML to provide the query
+           and the database information
+ Returns : string containing HTML
+ Args    : First argument is a Bio::Search::Result::ResultI
+           Second argument is string holding literature citation
+
+=cut
+
+sub default_introduction {
+    my ($result) = @_;
+
+    return sprintf(
+    qq{
+    <b>Query=</b> %s %s<br><dd>(%s letters)</dd>
+    <p>
+    <b>Database:</b> %s<br><dd>%s sequences; %s total letters<p></dd>
+    <p>
+  }, 
+		   $result->query_name, 
+		   $result->query_description, 
+		   &_numwithcommas($result->query_length), 
+		   $result->database_name(),
+		   &_numwithcommas($result->database_entries()), 
+		   &_numwithcommas($result->database_letters()),
+		   );
+}
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document (</BODY></HTML>)
+           for HTML
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return "</BODY>\n</HTML>\n";
+}
+
+# copied from Bio::Index::Fasta
+# useful here as well
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of FASTA file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my( $self, $code ) = @_;
+    
+    if ($code) {
+        $self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Fasta ID parser for Fasta.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+            The default implementation checks for NCBI-style
+            identifiers in the given string ('gi|12345|AA54321').
+            For these IDs, it extracts the GI and accession and
+            returns a two-element list of strings (GI, acc).
+  Args    : a fasta header line string
+
+=cut
+
+sub default_id_parser {    
+    my ($string) = @_;
+    my ($gi,$acc);
+    if( $string =~ s/gi\|(\d+)\|?// ) 
+    { $gi = $1; $acc = $1;}
+    
+    if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
+	$acc = defined $2 ? $2 : $1;
+    } else {
+        $acc = $string;
+	$acc =~ s/^\s+(\S+)/$1/;
+	$acc =~ s/(\S+)\s+$/$1/;	
+    } 
+    return ($gi,$acc);
+}
+	
+sub MIN { $a <=> $b ? $a : $b; }
+sub MAX { $a <=> $b ? $b : $a; }
+
+sub footer { 
+    my ($self) = @_;
+    return "<hr><h5>Produced by Bioperl module ".ref($self)." on $DATE<br>Revision: $Revision</h5>\n"
+    
+}
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : my $reference = $writer->algorithm_reference($result);
+ Function: Returns the appropriate Bibliographic reference for the 
+           algorithm format being produced
+ Returns : String
+ Args    : L<Bio::Search::Result::ResultI> to reference
+
+
+=cut
+
+sub algorithm_reference {
+   my ($self,$result) = @_;
+   return '' if( ! defined $result || !ref($result) ||
+		 ! $result->isa('Bio::Search::Result::ResultI')) ;   
+   if( $result->algorithm =~ /BLAST/i ) {
+       my $res = $result->algorithm . ' ' . $result->algorithm_version . "<p>";
+       if( $result->algorithm_version =~ /WashU/i ) {
+	   return $res .
+"Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.<br>
+All Rights Reserved.<p>
+<b>Reference:</b>  Gish, W. (1996-2000) <a href=\"http://blast.wustl.edu\">http://blast.wustl.edu</a><p>";	   
+       } else {
+	   return $res . 
+"<b>Reference:</b> Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,<br>
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),<br>
+\"Gapped BLAST and PSI-BLAST: a new generation of protein database search<br>
+programs\",  Nucleic Acids Res. 25:3389-3402.<p>";
+
+       }       
+   } elsif( $result->algorithm =~ /FAST/i ) {
+       return $result->algorithm . " " . $result->algorithm_version . "<br>" .
+	   "\n<b>Reference:</b> Pearson et al, Genomics (1997) 46:24-36<p>";
+   } else { 
+       return '';
+   }
+}
+
+# from Perl Cookbook 2.17
+sub _numwithcommas {
+    my $num = reverse( $_[0] );
+    $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
+    return scalar reverse $num;
+}
+
+=head2 Methods Bio::SearchIO::SearchWriterI
+
+L<Bio::SearchIO::SearchWriterI> inherited methods.
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+
+=head2 no_wublastlinks
+
+ Title   : no_wublastlinks
+ Usage   : $obj->no_wublastlinks($newval)
+ Function: Get/Set boolean value regarding whether or not to display
+           Link = (1) 
+           type output in the report output (WU-BLAST only)
+ Returns : boolean
+ Args    : on set, new boolean value (a scalar or undef, optional)
+
+
+=cut
+
+sub no_wublastlinks{
+    my $self = shift;
+
+    return $self->{'no_wublastlinks'} = shift if @_;
+    return $self->{'no_wublastlinks'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HitTableWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HitTableWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/HitTableWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,306 @@
+# $Id: HitTableWriter.pm,v 1.20.4.1 2006/10/02 23:10:27 sendu Exp $
+
+=head1 NAME
+
+Bio::SearchIO::Writer::HitTableWriter - Tab-delimited data for Bio::Search::Hit::HitI objects
+
+=head1 SYNOPSIS
+
+=head2 Example 1: Using the default columns
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::HitTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::HitTableWriter->new();
+
+    my $out = Bio::SearchIO->new( -writer => $writer );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Example 2: Specifying a subset of columns 
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::HitTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::HitTableWriter->new( 
+                                  -columns => [qw(
+                                                  query_name
+                                                  query_length
+                                                  hit_name
+                                                  hit_length
+                                                  frac_identical_query
+                                                  expect
+                                                  )]  );
+
+    my $out = Bio::SearchIO->new( -writer => $writer,
+				  -file   => ">searchio.out" );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Custom Labels
+
+You can also specify different column labels if you don't want to use
+the defaults.  Do this by specifying a C<-labels> hash reference
+parameter when creating the HitTableWriter object. The keys of the
+hash should be the column number (left-most column = 1) for the label(s)
+you want to specify. Here's an example:
+
+    my $writer = Bio::SearchIO::Writer::HitTableWriter->new( 
+                               -columns => [qw( query_name 
+                                                query_length
+                                                hit_name
+                                                hit_length  )],
+                               -labels  => { 1 => 'QUERY_GI',
+  	                                     3 => 'HIT_IDENTIFIER' } );
+
+
+=head1 DESCRIPTION
+
+Bio::SearchIO::Writer::HitTableWriter outputs summary data 
+for each Hit within a search result. Output is in tab-delimited format,
+one row per Hit. 
+
+The reason why this is considered summary data is that if a hit
+contains multiple HSPs, the HSPs will be tiled and 
+the data represents a summary across all HSPs.
+See below for which columns are affected.
+See the docs in L<Bio::Search::Hit::BlastHit|Bio::Search::Hit::BlastHit>
+ for more details on HSP tiling.
+
+=head2 Available Columns
+
+Here are the columns that can be specified in the C<-columns>
+parameter when creating a HitTableWriter object.  If a C<-columns> parameter
+is not specified, this list, in this order, will be used as the default.
+
+    query_name             # Sequence identifier of the query.
+    query_length           # Full length of the query sequence
+    hit_name               # Sequence identifier of the hit
+    hit_length             # Full length of the hit sequence
+    round                  # Round number for hit (PSI-BLAST)
+    expect                 # Expect value for the alignment
+    score                  # Score for the alignment (e.g., BLAST score)
+    bits                   # Bit score for the alignment
+    num_hsps               # Number of HSPs (not the "N" value)
+    frac_identical_query*  # fraction of identical substitutions in query
+    frac_identical_hit*    # fraction of identical substitutions in hit
+    frac_conserved_query*  # fraction of conserved substitutions in query
+    frac_conserved_hit*    # fraction of conserved substitutions in hit
+    frac_aligned_query*    # fraction of the query sequence that is aligned
+    frac_aligned_hit*      # fraction of the hit sequence that is aligned
+    length_aln_query*      # Length of the aligned portion of the query sequence
+    length_aln_hit*        # Length of the aligned portion of the hit sequence
+    gaps_query*            # Number of gaps in the aligned query sequence
+    gaps_hit*              # Number of gaps in the aligned hit sequence
+    gaps_total*            # Number of gaps in the aligned query and hit sequences
+    start_query*           # Starting coordinate of the aligned portion of the query sequence
+    end_query*             # Ending coordinate of the aligned portion of the query sequence
+    start_hit*             # Starting coordinate of the aligned portion of the hit sequence
+    end_hit*               # Ending coordinate of the aligned portion of the hit sequence
+    strand_query           # Strand of the aligned query sequence
+    strand_hit             # Strand of the aligned hit sequence
+    frame                  # Frame of the alignment (0,1,2)
+    ambiguous_aln          # Ambiguous alignment indicator ('qs', 'q', 's')
+    hit_description        # Full description of the hit sequence
+    query_description      # Full description of the query sequence
+    rank                   # The rank order of the hit
+    num_hits               # Number of hits for the query finding this hit
+
+Items marked with a C<*> report data summed across all HSPs
+after tiling them to avoid counting data from overlapping regions
+multiple times.
+
+For more details about these columns, see the documentation for the
+corresponding method in Bio::Search::Result::BlastHit.
+
+=head1 TODO
+
+Figure out the best way to incorporate algorithm-specific score columns.
+The best route is probably to have algorithm-specific subclasses 
+(e.g., BlastHitTableWriter, FastaHitTableWriter).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
+and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001, 2002 Steve Chervitz. All Rights Reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+L<Bio::SearchIO::Writer::HitTableWriter>, 
+L<Bio::SearchIO::Writer::ResultTableWriter>
+
+=head1 METHODS
+
+=cut
+
+package Bio::SearchIO::Writer::HitTableWriter;
+
+use strict;
+
+use base qw(Bio::SearchIO::Writer::ResultTableWriter);
+
+
+# Array fields: column, object, method[/argument], printf format,
+# column label Methods for result object are defined in
+# Bio::Search::Result::ResultI.  Methods for hit object are defined in
+# Bio::Search::Hit::HitI.  Tech note: If a bogus method is supplied,
+# it will result in all values to be zero.  Don't know why this is.
+
+# TODO (maybe): Allow specification of separate mantissa/exponent for
+# significance data.
+
+my %column_map = (
+                  'query_name'            => ['1', 'result', 'query_name', 's', 'QUERY' ],
+                  'query_length'          => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
+                  'hit_name'              => ['3', 'hit', 'name', 's', 'HIT'],
+                  'hit_length'            => ['4', 'hit', 'length', 'd', 'LEN_H'],
+                  'round'                 => ['5', 'hit', 'iteration', 'd', 'ROUND'],
+                  'expect'                => ['6', 'hit', 'significance', '.1e', 'EXPCT'],
+                  'score'                 => ['7', 'hit', 'raw_score', 'd', 'SCORE'],
+                  'bits'                  => ['8', 'hit', 'bits', 'd', 'BITS'],
+                  'num_hsps'              => ['9', 'hit', 'num_hsps', 'd', 'HSPS'],
+                  'frac_identical_query'  => ['10', 'hit', 'frac_identical/query', '.2f', 'FR_IDQ'],
+                  'frac_identical_hit'    => ['11', 'hit', 'frac_identical/hit', '.2f', 'FR_IDH'],
+                  'frac_conserved_query'  => ['12', 'hit', 'frac_conserved/query', '.2f', 'FR_CNQ'],
+                  'frac_conserved_hit'    => ['13', 'hit', 'frac_conserved/hit', '.2f', 'FR_CNH'],
+                  'frac_aligned_query'    => ['14', 'hit', 'frac_aligned_query', '.2f', 'FR_ALQ'],
+                  'frac_aligned_hit'      => ['15', 'hit', 'frac_aligned_hit', '.2f', 'FR_ALH'],
+                  'length_aln_query'      => ['16', 'hit', 'length_aln/query', 'd', 'LN_ALQ'],
+                  'length_aln_hit'        => ['17', 'hit', 'length_aln/hit', 'd', 'LN_ALH'],
+                  'gaps_query'            => ['18', 'hit', 'gaps/query', 'd', 'GAPS_Q'],
+                  'gaps_hit'              => ['19', 'hit', 'gaps/hit', 'd', 'GAPS_H'],
+                  'gaps_total'            => ['20', 'hit', 'gaps/total', 'd', 'GAPS_QH'],
+                  'start_query'           => ['21', 'hit', 'start/query', 'd', 'START_Q'],
+                  'end_query'             => ['22', 'hit', 'end/query', 'd', 'END_Q'],
+                  'start_hit'             => ['23', 'hit', 'start/hit', 'd', 'START_H'],
+                  'end_hit'               => ['24', 'hit', 'end/hit', 'd', 'END_H'],
+                  'strand_query'          => ['25', 'hit', 'strand/query', 's', 'STRND_Q'],
+                  'strand_hit'            => ['26', 'hit', 'strand/hit', 's', 'STRND_H'],
+                  'frame'                 => ['27', 'hit', 'frame', 'd', 'FRAME'],
+                  'ambiguous_aln'         => ['28', 'hit', 'ambiguous_aln', 's', 'AMBIG'],
+                  'hit_description'       => ['29', 'hit', 'description', 's', 'DESC_H'],
+                  'query_description'     => ['30', 'result', 'query_description', 's', 'DESC_Q'],
+                  'rank'                  => ['31', 'hit', 'rank', 's', 'RANK'],
+                  'num_hits'              => ['32', 'result', 'num_hits', 's', 'NUM_HITS'],
+                 );
+
+sub column_map { return %column_map }
+
+
+=head2 to_string()
+
+Note: this method is not intended for direct use. The
+SearchIO::write_result() method calls it automatically if the writer
+is hooked up to a SearchIO object as illustrated in 
+L<the SYNOPSIS section | SYNOPSIS>.
+
+ Title     : to_string()
+           :
+ Usage     : print $writer->to_string( $result_obj, [$include_labels] );
+           :
+ Argument  : $result_obj = A Bio::Search::Result::BlastResult object
+           : $include_labels = boolean, if true column labels are included (default: false)
+           :
+ Returns   : String containing tab-delimited set of data for each hit 
+           : in a BlastResult object. Some data is summed across multiple HSPs.
+           :
+ Throws    : n/a
+
+=cut
+
+#----------------
+sub to_string {
+#----------------
+    my ($self, $result, $include_labels) = @_;
+
+    my $str = $include_labels ? $self->column_labels() : '';
+    my $func_ref = $self->row_data_func;
+    my $printf_fmt = $self->printf_fmt;
+    
+    my ($resultfilter,$hitfilter) = ( $self->filter('RESULT'),
+				      $self->filter('HIT') );
+    if( ! defined $resultfilter ||
+        &{$resultfilter}($result) ) {
+	$result->can('rewind') && 
+	    $result->rewind(); # insure we're at the beginning
+	foreach my $hit($result->hits) {	    
+	    next if( defined $hitfilter && ! &{$hitfilter}($hit));
+	    my @row_data  = map { defined $_ ? $_ : 0 } &{$func_ref}($result, $hit);
+	    $str .= sprintf "$printf_fmt\n", @row_data;
+	}
+    }
+    $str =~ s/\t\n/\n/gs;
+    return $str;
+}
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document.  Nothing for
+           a text message.
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return '';
+}
+
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/ResultTableWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/ResultTableWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/ResultTableWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,457 @@
+# $Id: ResultTableWriter.pm,v 1.19.4.1 2006/10/02 23:10:27 sendu Exp $
+
+=head1 NAME
+
+Bio::SearchIO::Writer::ResultTableWriter - Outputs tab-delimited data for each Bio::Search::Result::ResultI object.
+
+=head1 SYNOPSIS
+
+=head2 Example 1: Using the default columns
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::ResultTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new();
+
+    my $out = Bio::SearchIO->new( -writer => $writer );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Example 2: Specifying a subset of columns 
+
+    use Bio::SearchIO;
+    use Bio::SearchIO::Writer::ResultTableWriter;
+
+    my $in = Bio::SearchIO->new();
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( 
+                                  -columns => [qw(
+                                                  query_name
+                                                  query_length
+                                                  num_hits
+                                                  )]  );
+
+    my $out = Bio::SearchIO->new( -writer => $writer,
+				  -file   => ">result.out" );
+
+    while ( my $result = $in->next_result() ) {
+        $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
+    }
+
+=head2 Custom Labels
+
+You can also specify different column labels if you don't want to use
+the defaults.  Do this by specifying a C<-labels> hash reference
+parameter when creating the ResultTableWriter object.  The keys of the
+hash should be the column number (left-most column = 1) for the label(s)
+you want to specify. Here's an example:
+
+    my $writer = Bio::SearchIO::Writer::ResultTableWriter->new( 
+                               -columns => [qw( query_name 
+                                                query_length
+                                                query_description 
+                                                num_hits)],
+                               -labels  => { 1 => 'QUERY_GI',
+  	                                     2 => 'QUERY_LENGTH' } );
+
+
+=head1 DESCRIPTION
+
+Bio::SearchIO::Writer::ResultTableWriter outputs data in tab-delimited
+format for each search result, one row per search result. This is a very
+coarse-grain level of information since it only includes data
+stored in the Bio::Search::Result::ResultI object itself and does not
+include any information about hits or HSPs.
+
+You most likely will never use this object but instead will use one of
+its subclasses: Bio::SearchIO::Writer::HitTableWriter or
+Bio::SearchIO::Writer::HSPTableWriter.
+
+=head2 Available Columns
+
+Here are the columns that can be specified in the C<-columns>
+parameter when creating a ResultTableWriter object.  If a C<-columns> parameter
+is not specified, this list, in this order, will be used as the default.
+
+    query_name
+    query_length
+    query_description
+
+For more details about these columns, see the documentation for the
+corresponding method in L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 AUTHOR 
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+See L<the FEEDBACK section | FEEDBACK> for where to send bug reports
+and comments.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+L<Bio::SearchIO::Writer::HitTableWriter>,
+L<Bio::SearchIO::Writer::HSPTableWriter>
+
+=head1 METHODS
+
+=cut
+
+
+package Bio::SearchIO::Writer::ResultTableWriter;
+
+use strict;
+
+use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
+
+# Array fields: column, object, method[/argument], printf format, column label
+# Methods are defined in Bio::Search::Result::ResultI.
+# Tech note: If a bogus method is supplied, it will result in all values to be zero.
+#            Don't know why this is.
+my %column_map = (
+                  'query_name'        => ['1', 'result', 'query_name', 's', 'QUERY' ],
+                  'query_length'      => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
+                  'query_description' => ['3', 'result', 'query_description', 's', 'DESC_Q'],
+                  'num_hits'          => ['4', 'result', 'num_hits', 'd', 'NUM_HITS'],
+                 );
+
+sub column_map { return %column_map }
+
+sub new {
+    my ($class, @args) = @_; 
+    my $self = $class->SUPER::new(@args);
+
+    my( $col_spec, $label_spec,
+	$filters ) = $self->_rearrange( [qw(COLUMNS 
+					    LABELS
+					    FILTERS)], @args);
+    
+    $self->_set_cols( $col_spec );
+    $self->_set_labels( $label_spec ) if $label_spec;
+    $self->_set_printf_fmt();
+    $self->_set_row_data_func();
+    $self->_set_column_labels();
+    
+    if( defined $filters ) {
+	if( !ref($filters) =~ /HASH/i ) { 
+	    $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
+	} else { 
+	    while( my ($type,$code) = each %{$filters} ) {
+		$self->filter($type,$code);
+	    }
+	}
+    }
+
+
+    return $self;
+}
+
+
+# Purpose : Stores the column spec internally. Also performs QC on the 
+#           user-supplied column specification.
+#
+sub _set_cols {
+    my ($self, $col_spec_ref) = @_;
+    return if defined $self->{'_cols'};  # only set columns once
+
+    my %map = $self->column_map;
+
+    if( not defined $col_spec_ref) {
+        print STDERR "\nUsing default column map.\n";
+	$col_spec_ref = [ map { $_ } sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map ];
+    }
+
+    if( ref($col_spec_ref) eq 'ARRAY') {
+        # printf "%d columns to process\n", scalar(@$col_spec_ref);
+        my @col_spec = @{$col_spec_ref};
+        while( my $item = lc(shift @col_spec) ) {
+            if( not defined ($map{$item}) ) {
+                $self->throw(-class =>'Bio::Root::BadParameter',
+                             -text => "Unknown column name: $item"
+                            );
+            }
+            push @{$self->{'_cols'}}, $item;
+            #print "pushing on to col $col_num, $inner: $item\n";
+        }
+    }
+    else {
+        $self->throw(-class =>'Bio::Root::BadParameter',
+                     -text => "Can't set columns: not a ARRAY ref",
+                     -value => $col_spec_ref
+                    );
+    }
+}
+
+sub _set_printf_fmt {
+    my ($self) = @_;
+
+    my @cols = $self->columns();
+    my %map = $self->column_map;
+
+    my $printf_fmt = '';
+
+    foreach my $col ( @cols ) {
+	$printf_fmt .= "\%$map{$col}->[3]\t";
+    }
+
+    $printf_fmt =~ s/\\t$//;
+
+    $self->{'_printf_fmt'} = $printf_fmt;
+}
+
+sub printf_fmt { shift->{'_printf_fmt'} }
+
+# Sets the data to be used for the labels.
+sub _set_labels {
+    my ($self, $label_spec) = @_;
+    if( ref($label_spec) eq 'HASH') {
+        foreach my $col ( sort { $a <=> $b } keys %$label_spec ) {
+#            print "LABEL: $col $label_spec->{$col}\n";
+            $self->{'_custom_labels'}->{$col} = $label_spec->{$col};
+        }
+    }
+    else {
+        $self->throw(-class =>'Bio::Root::BadParameter',
+                     -text => "Can't set labels: not a HASH ref: $label_spec"
+                    );
+    }
+}
+
+sub _set_column_labels {
+    my $self = shift;
+
+    my @cols = $self->columns;
+    my %map = $self->column_map;
+    my $printf_fmt = '';
+    my (@data, $label, @underbars);
+
+    my $i = 0;
+    foreach my $col( @cols ) {
+	$i++;
+        $printf_fmt .= "\%s\t";
+
+        if(defined $self->{'_custom_labels'}->{$i}) {
+	    $label = $self->{'_custom_labels'}->{$i};
+        }
+	else {
+	    $label = $map{$col}->[4];
+	}
+	push @data, $label;
+        push @underbars, '-' x length($label);
+
+    }
+    $printf_fmt =~ s/\\t$//;
+
+    my $str = sprintf "$printf_fmt\n", @data;
+
+    $str =~ s/\t\n/\n/;
+    $str .= sprintf "$printf_fmt\n", @underbars;
+
+    $str =~ s/\t\n/\n/gs;
+    $self->{'_column_labels'} = $str;
+}
+
+# Purpose : Generate a function that will call the appropriate
+# methods on the result, hit, and hsp objects to retrieve the column data 
+# specified in the column spec.
+#
+# We should only have to go through the column spec once
+# for a given ResultTableWriter. To permit this, we'll generate code 
+# for a method that returns an array of the data for a row of output
+# given a result, hit, and hsp object as arguments.
+#
+sub _set_row_data_func {
+    my $self = shift;
+
+    # Now we need to generate a string that can be eval'd to get the data.
+    my @cols = $self->columns();
+    my %map = $self->column_map;
+    my @data;
+    while( my $col = shift @cols ) {
+	my $object = $map{$col}->[1];
+	my $method = $map{$col}->[2];
+        my $arg = '';
+        if( $method =~ m!(\w+)/(\w+)! ) {
+            $method = $1;
+            $arg = "\"$2\"";
+        }
+        push @data, "\$$object->$method($arg)";
+    }
+    my $code = join( ",", @data);
+
+    if( $self->verbose > 0 ) {
+## Begin Debugging	
+	$self->debug( "Data to print:\n");
+	foreach( 0..$#data) { $self->debug( " [". ($_+ 1) . "] $data[$_]\n");}
+	$self->debug( "CODE:\n$code\n");
+	$self->debug("Printf format: ". $self->printf_fmt. "\n");
+## End Debugging
+    }
+
+    my $func = sub {
+        my ($result, $hit, $hsp) = @_;
+        my @r = eval $code;
+        # This should reduce the occurrence of those opaque "all zeros" bugs.
+	if( $@ ) { $self->throw("Trouble in ResultTableWriter::_set_row_data_func() eval: $@\n\n"); 
+               }
+	return @r;
+    };
+    $self->{'_row_data_func'} = $func;
+}
+
+sub row_data_func { shift->{'_row_data_func'} }
+
+
+=head2 to_string()
+
+Note: this method is not intended for direct use. The
+SearchIO::write_result() method calls it automatically if the writer
+is hooked up to a SearchIO object as illustrated in L<the SYNOPSIS section | SYNOPSIS>.
+
+ Title     : to_string()
+           :
+ Usage     : print $writer->to_string( $result_obj, [$include_labels] );
+           :
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : $include_labels = boolean, if true column labels are included (default: false)
+           :
+ Returns   : String containing tab-delimited set of data for each hit 
+           : in a ResultI object. Some data is summed across multiple HSPs.
+           :
+ Throws    : n/a
+
+=cut
+
+#----------------
+sub to_string {
+#----------------
+    my ($self, $result, $include_labels) = @_;
+
+    my $str = $include_labels ? $self->column_labels() : '';
+    my $resultfilter = $self->filter('RESULT');
+    if( ! defined $resultfilter ||
+        &{$resultfilter}($result) ) {	
+	my @row_data  = &{$self->{'_row_data_func'}}( $result );
+	$str .= sprintf "$self->{'_printf_fmt'}\n", @row_data;
+	$str =~ s/\t\n/\n/gs;
+    }
+    return $str;
+}
+
+
+
+sub columns {
+    my $self = shift;
+    my @cols;
+    if( ref $self->{'_cols'} ) {
+        @cols = @{$self->{'_cols'}};
+    }
+    else {
+        my %map = $self->column_map;
+        @cols = sort { $map{$a}->[0] <=> $map{$b}->[0] } keys %map;
+   }
+    return @cols;
+}
+
+
+=head2 column_labels
+
+ Usage     : print $result_obj->column_labels();
+ Purpose   : Get column labels for to_string().
+ Returns   : String containing column labels. Tab-delimited.
+ Argument  : n/a
+ Throws    : n/a
+
+=cut
+
+sub column_labels { shift->{'_column_labels'} }
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document.  Nothing for
+           a text message.
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return '';
+}
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+
+# Is this really needed?
+#=head2 signif_format
+#
+# Usage     : $writer->signif_format( [FMT] );
+# Purpose   : Allows retrieval of the P/Expect exponent values only
+#           : or as a two-element list (mantissa, exponent).
+# Usage     : $writer->signif_format('exp');
+#           : $writer->signif_format('parts');
+# Returns   : String or '' if not set.
+# Argument  : String, FMT = 'exp' (return the exponent only)
+#           :             = 'parts'(return exponent + mantissa in 2-elem list)
+#           :              = undefined (return the raw value)
+# Comments  : P/Expect values are still stored internally as the full,
+#           : scientific notation value.
+#
+#=cut
+#
+##-------------
+#sub signif_format {
+##-------------
+#    my $self = shift;
+#    if(@_) { $self->{'_signif_format'} = shift; }
+#    return $self->{'_signif_format'};
+#}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/TextResultWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/TextResultWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/Writer/TextResultWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,796 @@
+# $Id: TextResultWriter.pm,v 1.18.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::Writer::TextResultWriter
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::Writer::TextResultWriter - Object to implement writing
+a Bio::Search::ResultI in Text.
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  use Bio::SearchIO::Writer::TextResultWriter;
+
+  my $in = new Bio::SearchIO(-format => 'blast',
+			     -file   => shift @ARGV);
+
+  my $writer = new Bio::SearchIO::Writer::TextResultWriter();
+  my $out = new Bio::SearchIO(-writer => $writer);
+  $out->write_result($in->next_result);
+
+=head1 DESCRIPTION
+
+This object implements the SearchWriterI interface which will produce
+a set of Text for a specific Bio::Search::Report::ReportI interface.
+
+You can also provide the argument -filters =E<gt> \%hash to filter the at
+the hsp, hit, or result level.  %hash is an associative array which
+contains any or all of the keys (HSP, HIT, RESULT).  The values
+pointed to by these keys would be references to a subroutine which
+expects to be passed an object - one of Bio::Search::HSP::HSPI,
+Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively.
+Each function needs to return a boolean value as to whether or not the
+passed element should be included in the output report - true if it is
+to be included, false if it to be omitted.
+
+For example to filter on sequences in the database which are too short
+for your criteria you would do the following.
+
+Define a hit filter method 
+
+  sub hit_filter { 
+      my $hit = shift;
+      return $hit->length E<gt> 100; # test if length of the hit sequence
+                                     # long enough    
+  }
+  my $writer = new Bio::SearchIO::Writer::TextResultWriter(
+       -filters => { 'HIT' =E<gt> \&hit_filter }  
+      );
+
+Another example would be to filter HSPs on percent identity, let's
+only include HSPs which are 75% identical or better.
+
+   sub hsp_filter {
+       my $hsp = shift;
+       return $hsp->percent_identity E<gt> 75;
+   }
+   my $writer = new Bio::SearchIO::Writer::TextResultWriter(
+       -filters => { 'HSP' =E<gt> \&hsp_filter }  
+      );
+
+See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
+
+
+This module will use the module Text::Wrap if it is installed to wrap
+the Query description line.  If you do not have Text::Wrap installed
+this module will work fine but you won't have the Query line wrapped.
+You will see a warning about this when you first instantiate a
+TextResultWriter - to avoid these warnings from showing up, simply set
+the verbosity upon initialization to -1 like this: my $writer = new
+Bio::SearchIO::Writer::TextResultWriter(-verbose =E<gt> -1);
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::Writer::TextResultWriter;
+use vars qw($MaxNameLen $MaxDescLen $AlignmentLineWidth 	    $DescLineLen $TextWrapLoaded);
+use strict;
+
+# Object preamble - inherits from Bio::Root::RootI
+
+BEGIN {
+    $MaxDescLen = 65;
+    $AlignmentLineWidth = 60;    
+    eval { require Text::Wrap; $TextWrapLoaded = 1;};
+    if( $@ ) {
+	$TextWrapLoaded = 0;
+    }
+}
+
+use POSIX;
+
+use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::Writer::TextResultWriter();
+ Function: Builds a new Bio::SearchIO::Writer::TextResultWriter object 
+ Returns : Bio::SearchIO::Writer::TextResultWriter
+ Args    : -filters => hashref with any or all of the keys (HSP HIT RESULT)
+           which have values pointing to a subroutine reference
+           which will expect to get a Hit,HSP, Result object respectively
+           -no_wublastlinks => boolean. Do not display WU-BLAST lines even if 
+                               they are parsed out
+                               Links = (1) 
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($filters,$nowublastlinks) = $self->_rearrange([qw(FILTERS 
+							NO_WUBLASTLINKS)],
+						    @args);
+  if( defined $filters ) {
+      if( !ref($filters) =~ /HASH/i ) { 
+	  $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
+      } else { 
+	  while( my ($type,$code) = each %{$filters} ) {
+	      $self->filter($type,$code);
+	  }
+      }
+  }
+  $self->no_wublastlinks(! $nowublastlinks);
+  unless( $TextWrapLoaded ) {
+      $self->warn("Could not load Text::Wrap - the Query Description will not be line wrapped\n");
+  } else { 
+      $Text::Wrap::columns =  $MaxDescLen;
+  }
+  return $self;
+}
+
+
+=head2 to_string
+
+ Purpose   : Produces data for each Search::Result::ResultI in a string.
+           : This is an abstract method. For some useful implementations,
+           : see ResultTableWriter.pm, HitTableWriter.pm, 
+           : and HSPTableWriter.pm.
+ Usage     : print $writer->to_string( $result_obj, @args );
+ Argument  : $result_obj = A Bio::Search::Result::ResultI object
+           : @args = any additional arguments used by your implementation.
+ Returns   : String containing data for each search Result or any of its
+           : sub-objects (Hits and HSPs).
+ Throws    : n/a
+
+=cut
+
+sub to_string {
+    my ($self,$result,$num) = @_; 
+    $num ||= 0;
+    return unless defined $result;
+    my $links = $self->no_wublastlinks;
+    my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
+						  $self->filter('HIT'),
+						  $self->filter('HSP') );
+    return '' if( defined $resultfilter && ! &{$resultfilter}($result) );    
+
+    my ($qtype,$dbtype,$dbseqtype,$type);
+    my $alg = $result->algorithm;
+
+    # This is actually wrong for the FASTAs I think
+    if(  $alg =~ /T(FAST|BLAST)([XY])/i ) {
+	$qtype      = $dbtype = 'translated';
+	$dbseqtype = $type       = 'PROTEIN';
+    } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
+	$qtype      = '';
+	$dbtype     = 'translated';
+	$type       = 'PROTEIN';
+	$dbseqtype  = 'NUCLEOTIDE';
+    } elsif( $alg =~ /(FAST|BLAST)N/i || 
+	     $alg =~ /(WABA|EXONERATE)/i ) {
+	$qtype      = $dbtype = '';
+	$type = $dbseqtype  = 'NUCLEOTIDE';
+    } elsif( $alg =~ /(FAST|BLAST)P/  || 
+	     $alg =~ /SSEARCH|(HMM|SEARCH|PFAM)/i ) {
+	$qtype      = $dbtype = '';
+	$type = $dbseqtype  = 'PROTEIN';
+    } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
+	$qtype      = 'translated';
+        $dbtype     = 'PROTEIN';
+	$dbseqtype  = $type      = 'PROTEIN';
+    } else { 
+	print STDERR "algorithm was ", $result->algorithm, " couldn't match\n";
+    }
+    
+    
+    my %baselens = ( 'Sbjct:'   => ( $dbtype eq 'translated' )  ? 3 : 1,
+		     'Query:'   => ( $qtype  eq 'translated' )  ? 3 : 1);
+
+    my $str;
+    if( ! defined $num || $num <= 1 ) { 
+	$str = &{$self->start_report}($result);
+    }
+
+    $str .= &{$self->title}($result);
+
+    $str .= $result->algorithm_reference || $self->algorithm_reference($result);
+    $str .= &{$self->introduction}($result);
+
+
+    $str .= qq{
+                                                                 Score       E
+Sequences producing significant alignments:                      (bits)    value
+};
+    my $hspstr = '';
+    if( $result->can('rewind')) {
+        $result->rewind(); # support stream based parsing routines
+    }
+    while( my $hit = $result->next_hit ) {
+	next if( defined $hitfilter && ! &{$hitfilter}($hit) );
+	my $nm = $hit->name();
+	$self->debug( "no $nm for name (".$hit->description(). "\n") 
+	    unless $nm;
+	my ($gi,$acc) = &{$self->id_parser}($nm);
+	my $p = "%-$MaxDescLen". "s";
+	my $descsub;
+	my $desc = sprintf("%s %s",$nm,$hit->description);
+	if( length($desc) - 3 > $MaxDescLen) {
+	    $descsub = sprintf($p,
+			       substr($desc,0,$MaxDescLen-3) . 
+			       "...");
+	} else { 
+	    $descsub = sprintf($p,$desc);
+	}
+
+	$str .= sprintf("%s   %-4s  %s\n",
+			$descsub,
+			defined $hit->raw_score ? $hit->raw_score : ' ',
+			defined $hit->significance ? $hit->significance : '?');
+	my @hsps = $hit->hsps;
+	if( @hsps ) { 
+	    $hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n",
+			       $hit->name, 
+			       defined $hit->description ? $hit->description : '', 
+			       '', # empty is for the %9s in the str formatting 
+			       $hit->length);
+
+	    foreach my $hsp ( @hsps ) { 
+		next if( defined $hspfilter && ! &{$hspfilter}($hsp) );
+		$hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s",
+				   $hsp->bits, $hsp->score, $hsp->evalue);
+		if( $hsp->pvalue ) {
+		    $hspstr .= ", P = ".$hsp->pvalue;
+		}
+		$hspstr .= "\n";
+		$hspstr .= sprintf(" Identities = %d/%d (%d%%)",
+				   ( $hsp->frac_identical('total') * 
+				     $hsp->length('total')),
+				   $hsp->length('total'),
+				   POSIX::floor($hsp->frac_identical('total') 
+						* 100));
+		
+		if( $type eq 'PROTEIN' ) {
+		    $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
+				       ( $hsp->frac_conserved('total') * 
+					 $hsp->length('total')),
+				       $hsp->length('total'),
+				       POSIX::floor($hsp->frac_conserved('total') * 100));
+
+		}
+		if( $hsp->gaps ) {
+		    $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
+				       $hsp->gaps('total'),
+				       $hsp->length('total'),
+				       POSIX::floor(100 * $hsp->gaps('total') / 
+						    $hsp->length('total')));
+		}
+		$hspstr .= "\n";
+		my ($hframe,$qframe)   = ( $hsp->hit->frame, 
+					   $hsp->query->frame);
+		my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
+		# so TBLASTX will have Query/Hit frames
+		#    BLASTX  will have Query frame
+		#    TBLASTN will have Hit frame
+		if( $hstrand || $qstrand ) {
+		    $hspstr .= " Frame = ";
+		    my ($signq, $signh);
+		    unless( $hstrand ) {
+			$hframe = undef;
+			# if strand is null or 0 then it is protein
+			# and this no frame
+		    } else { 
+			$signh = $hstrand < 0 ? '-' : '+';
+		    }
+		    unless( $qstrand  ) {
+			$qframe = undef;
+			# if strand is null or 0 then it is protein
+		    } else { 
+			$signq =$qstrand < 0 ? '-' : '+';
+		    }
+		    # remember bioperl stores frames as 0,1,2 (GFF way)
+		    # BLAST reports reports as 1,2,3 so
+		    # we have to add 1 to the frame values
+		    if( defined $hframe && ! defined $qframe) {  
+			$hspstr .= "$signh".($hframe+1);
+		    } elsif( defined $qframe && ! defined $hframe) {  
+			$hspstr .= "$signq".($qframe+1);
+		    } else { 
+			$hspstr .= sprintf(" %s%d / %s%d",
+					   $signq,$qframe+1,
+					   $signh, $hframe+1);
+		    }
+		}
+		
+		if( $links && 
+		    $hsp->can('links') && defined(my $lnks = $hsp->links) ) {
+		    $hspstr .= sprintf(" Links = %s\n",$lnks);
+		}
+		$hspstr .= "\n\n";
+
+		my @hspvals = ( {'name'  => 'Query:',
+				 'seq'   => $hsp->query_string,
+				 'start' => ( $qstrand >= 0 ? 
+					      $hsp->query->start : 
+					      $hsp->query->end),
+					      'end'   => ($qstrand >= 0 ? 
+							  $hsp->query->end : 
+							  $hsp->query->start),
+							  'index' => 0,
+							  'direction' => $qstrand || 1
+						      },
+				{ 'name' => ' 'x6, # this might need to adjust for long coordinates??
+				  'seq'  => $hsp->homology_string,
+				  'start' => undef,
+				  'end'   => undef,
+				  'index' => 0,
+				  'direction' => 1
+				  },
+				{ 'name'  => 'Sbjct:',
+				  'seq'   => $hsp->hit_string,
+				  'start' => ($hstrand >= 0 ? 
+					      $hsp->hit->start : $hsp->hit->end),
+				      'end'   => ($hstrand >= 0 ? 
+						  $hsp->hit->end : $hsp->hit->start),
+				      'index' => 0,
+				      'direction' => $hstrand || 1
+				  }
+				);	    
+
+
+		# let's set the expected length (in chars) of the starting number
+		# in an alignment block so we can have things line up
+		# Just going to try and set to the largest
+
+		my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
+						    length($hspvals[0]->{'end'}),
+						    length($hspvals[2]->{'start'}),
+						    length($hspvals[2]->{'end'}));
+		my $count = 0;
+		while ( $count <= $hsp->length('total') ) {
+		    foreach my $v ( @hspvals ) {
+			my $piece = substr($v->{'seq'}, $v->{'index'} +$count,
+					   $AlignmentLineWidth);
+			my $cp = $piece;
+			my $plen = scalar ( $cp =~ tr/\-//);
+			my ($start,$end) = ('','');
+			if( defined $v->{'start'} ) { 
+			    $start = $v->{'start'};
+			    # since strand can be + or - use the direction
+			    # to signify which whether to add or substract from end
+			    my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
+				$baselens{$v->{'name'}};
+			    if( length($piece) < $AlignmentLineWidth ) {
+				$d = (length($piece) - $plen) * $v->{'direction'} * 
+				    $baselens{$v->{'name'}};
+			    }
+			    $end   = $v->{'start'} + $d - $v->{'direction'};
+			    $v->{'start'} += $d;
+			}
+			$hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
+					   $v->{'name'},
+					   $start,
+					   $piece,
+					   $end
+					   );
+		    }
+		    $count += $AlignmentLineWidth;
+		    $hspstr .= "\n";
+		}
+	    }
+	    $hspstr .= "\n";
+	}
+    }
+    $str .= "\n\n".$hspstr;
+    
+    $str .= sprintf(qq{  Database: %s
+    Posted date:  %s
+  Number of letters in database: %s
+  Number of sequences in database: %s
+
+Matrix: %s
+}, 		   
+		    $result->database_name(),
+		    $result->get_statistic('posted_date') || 
+		    POSIX::strftime("%b %d, %Y %I:%M %p",localtime),
+		    &_numwithcommas($result->database_entries()), 
+		    &_numwithcommas($result->database_letters()),
+		    $result->get_parameter('matrix') || '');
+
+    if( defined (my $open = $result->get_parameter('gapopen')) ) {
+	$str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n",
+			$open || 0, $result->get_parameter('gapext') || 0);
+    }
+
+    # skip those params we've already output
+    foreach my $param ( grep { ! /matrix|gapopen|gapext/i } 
+			$result->available_parameters ) {
+	$str .= "$param: ". $result->get_parameter($param) ."\n";
+	
+    }
+    $str .= "Search Statistics\n";
+    # skip posted date, we already output it
+   foreach my $stat ( sort grep { ! /posted_date/ } 
+		      $result->available_statistics ) {
+       my $expect = $result->get_parameter('expect');
+       my $v = $result->get_statistic($stat);
+       if( $v =~ /^\d+$/ ) {
+	   $v = &_numwithcommas($v);
+       }
+       if( defined $expect && 
+	   $stat eq 'seqs_better_than_cutoff' ) {
+	   $str .= "seqs_better_than_$expect: $v\n";
+       } else { 
+	   my $v = 
+	   $str .= "$stat: $v\n";
+       }
+    }
+    $str .=  "\n\n";
+    return $str;
+}
+
+
+=head2 start_report
+
+  Title   : start_report
+  Usage   : $index->start_report( CODE )
+  Function: Stores or returns the code to
+            write the start of the <HTML> block, the <TITLE> block
+            and the start of the <BODY> block of HTML.   Useful
+            for (for instance) specifying alternative
+            HTML if you are embedding the output in
+            an HTML page which you have already started.
+            (For example a routine returning a null string).
+            Returns \&default_start_report (see below) if not
+            set. 
+  Example : $index->start_report( \&my_start_report )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub start_report {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_start_report'} = $code;
+    }
+    return $self->{'_start_report'} || \&default_start_report;
+}
+
+=head2 default_start_report
+
+ Title   : default_start_report
+ Usage   : $self->default_start_report($result)
+ Function: The default method to call when starting a report.
+ Returns : sting
+ Args    : First argument is a Bio::Search::Result::ResultI
+
+=cut
+
+sub default_start_report {
+    my ($result) = @_;
+    return "";    
+}
+
+=head2 title
+
+ Title   : title
+ Usage   : $self->title($CODE)
+
+  Function: Stores or returns the code to provide HTML for the given
+            BLAST report that will appear at the top of the BLAST report
+            HTML output.  Useful for (for instance) specifying
+            alternative routines to write your own titles.
+            Returns \&default_title (see below) if not
+            set. 
+  Example : $index->title( \&my_title )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub title {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_title'} = $code;
+    }
+    return $self->{'_title'} || \&default_title;
+}
+
+=head2 default_title
+
+ Title   : default_title
+ Usage   : $self->default_title($result)
+ Function: Provides HTML for the given BLAST report that will appear
+           at the top of the BLAST report output.
+ Returns : empty for text implementation
+ Args    : First argument is a Bio::Search::Result::ResultI
+
+=cut
+
+sub default_title {
+    my ($result) = @_;
+    return "";
+# The HTML implementation
+#    return sprintf(
+#        qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
+#		    $result->algorithm,
+#		    $result->query_name());
+}
+
+
+=head2 introduction
+
+ Title   : introduction
+ Usage   : $self->introduction($CODE)
+
+  Function: Stores or returns the code to provide HTML for the given
+            BLAST report detailing the query and the
+            database information.
+            Useful for (for instance) specifying
+            routines returning alternative introductions.
+            Returns \&default_introduction (see below) if not
+            set. 
+  Example : $index->introduction( \&my_introduction )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub introduction {
+    my( $self, $code ) = @_; 
+    if ($code) {
+        $self->{'_introduction'} = $code;
+    }
+    return $self->{'_introduction'} || \&default_introduction;
+}
+
+=head2 default_introduction
+
+ Title   : default_introduction
+ Usage   : $self->default_introduction($result)
+ Function: Outputs HTML to provide the query
+           and the database information
+ Returns : string containing HTML
+ Args    : First argument is a Bio::Search::Result::ResultI
+           Second argument is string holding literature citation
+
+=cut
+
+sub default_introduction {
+    my ($result) = @_;
+
+    return sprintf(
+    qq{
+Query= %s
+       (%s letters)
+
+Database: %s
+           %s sequences; %s total letters
+}, 
+		   &_linewrap($result->query_name . " " . 
+			      $result->query_description), 
+		   &_numwithcommas($result->query_length), 
+		   $result->database_name(),
+		   &_numwithcommas($result->database_entries()), 
+		   &_numwithcommas($result->database_letters()),
+		   );
+}
+
+=head2 end_report
+
+ Title   : end_report
+ Usage   : $self->end_report()
+ Function: The method to call when ending a report, this is
+           mostly for cleanup for formats which require you to 
+           have something at the end of the document (</BODY></HTML>)
+           for HTML
+ Returns : string
+ Args    : none
+
+=cut
+
+sub end_report {
+    return "";
+}
+
+
+# copied from Bio::Index::Fasta
+# useful here as well
+
+=head2 id_parser
+
+  Title   : id_parser
+  Usage   : $index->id_parser( CODE )
+  Function: Stores or returns the code used by record_id to
+            parse the ID for record from a string.  Useful
+            for (for instance) specifying a different
+            parser for different flavours of FASTA file. 
+            Returns \&default_id_parser (see below) if not
+            set. If you supply your own id_parser
+            subroutine, then it should expect a fasta
+            description line.  An entry will be added to
+            the index for each string in the list returned.
+  Example : $index->id_parser( \&my_id_parser )
+  Returns : ref to CODE if called without arguments
+  Args    : CODE
+
+=cut
+
+sub id_parser {
+    my( $self, $code ) = @_;
+    
+    if ($code) {
+        $self->{'_id_parser'} = $code;
+    }
+    return $self->{'_id_parser'} || \&default_id_parser;
+}
+
+
+
+=head2 default_id_parser
+
+  Title   : default_id_parser
+  Usage   : $id = default_id_parser( $header )
+  Function: The default Fasta ID parser for Fasta.pm
+            Returns $1 from applying the regexp /^>\s*(\S+)/
+            to $header.
+  Returns : ID string
+  Args    : a fasta header line string
+
+=cut
+
+sub default_id_parser {    
+    my ($string) = @_;
+    my ($gi,$acc);
+    if( $string =~ s/gi\|(\d+)\|?// ) 
+    { $gi = $1; $acc = $1;}
+    
+    if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
+	$acc = defined $2 ? $2 : $1;
+    } else {
+        $acc = $string;
+	$acc =~ s/^\s+(\S+)/$1/;
+	$acc =~ s/(\S+)\s+$/$1/;	
+    } 
+    return ($gi,$acc);
+}
+	
+sub MIN { $a <=> $b ? $a : $b; }
+sub MAX { $a <=> $b ? $b : $a; }
+
+
+=head2 algorithm_reference
+
+ Title   : algorithm_reference
+ Usage   : my $reference = $writer->algorithm_reference($result);
+ Function: Returns the appropriate Bibliographic reference for the 
+           algorithm format being produced
+ Returns : String
+ Args    : L<Bio::Search::Result::ResultI> to reference
+
+
+=cut
+
+sub algorithm_reference{
+   my ($self,$result) = @_;
+   return '' if( ! defined $result || !ref($result) ||
+		 ! $result->isa('Bio::Search::Result::ResultI')) ;   
+   if( $result->algorithm =~ /BLAST/i ) {
+       my $res = $result->algorithm . ' '. $result->algorithm_version. "\n";
+       if( $result->algorithm_version =~ /WashU/i ) {
+	   return $res .qq{
+Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+ 
+Reference:  Gish, W. (1996-2000) http://blast.wustl.edu
+};	   
+       } else {
+	   return $res . qq{
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+};
+       }       
+   } elsif( $result->algorithm =~ /FAST/i ) {
+       return $result->algorithm. " ". $result->algorithm_version . "\n".
+	   "\nReference: Pearson et al, Genomics (1997) 46:24-36\n";
+   } else { 
+       return '';
+   }
+}
+
+# from Perl Cookbook 2.17
+sub _numwithcommas {
+    my $num = reverse( $_[0] );
+    $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
+    return scalar reverse $num;
+}
+
+sub _linewrap {
+    my ($str) = @_;
+    if($TextWrapLoaded) {
+	return Text::Wrap::wrap("","",$str); # use Text::Wrap
+    } else { return $str; }     # cannot wrap
+}
+=head2 Methods Bio::SearchIO::SearchWriterI
+
+L<Bio::SearchIO::SearchWriterI> inherited methods.
+
+=head2 filter
+
+ Title   : filter
+ Usage   : $writer->filter('hsp', \&hsp_filter);
+ Function: Filter out either at HSP,Hit,or Result level
+ Returns : none
+ Args    : string => data type,
+           CODE reference
+
+
+=cut
+
+=head2 no_wublastlinks
+
+ Title   : no_wublastlinks
+ Usage   : $obj->no_wublastlinks($newval)
+ Function: Get/Set boolean value regarding whether or not to display
+           Link = (1) 
+           type output in the report output (WU-BLAST only)
+ Returns : boolean
+ Args    : on set, new boolean value (a scalar or undef, optional)
+
+
+=cut
+
+sub no_wublastlinks{
+    my $self = shift;
+
+    return $self->{'no_wublastlinks'} = shift if @_;
+    return $self->{'no_wublastlinks'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/axt.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/axt.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/axt.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,494 @@
+# $Id: axt.pm,v 1.8.4.2 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::axt
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::axt - a parser for axt format reports
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  my $parser = new Bio::SearchIO(-format => 'axt',
+                                 -file   => 't/data/report.blastz');
+  while( my $result = $parser->next_result ) {
+    while( my $hit = $result->next_hit ) {
+      while( my $hsp = $hit->next_hsp ) {
+      }
+    }
+  }
+
+=head1 DESCRIPTION
+
+This is a parser and event-generator for AXT format reports.  BLASTZ
+reports (Schwartz et al,(2003) Genome Research, 13:103-107) are normally
+in LAV format but are commonly post-processed to AXT format; many precomputed
+BLASTZ reports, such as those found in the UCSC Genome
+Browser, are in AXT format.   This parser will also parse any
+AXT format produced from any lav report and directly out of BLAT.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::axt;
+use vars qw(%MODEMAP %MAPPING @STATES $GAPCHAR);
+use strict;
+
+use Bio::Search::Result::ResultFactory;
+use Bio::Search::HSP::HSPFactory;
+use base qw(Bio::SearchIO);
+
+use POSIX;
+
+BEGIN { 
+    # mapping of NCBI Blast terms to Bioperl hash keys
+    %MODEMAP = ('AXTOutput'   => 'result',
+		'Hit'         => 'hit',
+		'Hsp'         => 'hsp'
+		);
+    $GAPCHAR = '-';
+    %MAPPING = 
+	( 
+	  'Hsp_score'      => 'HSP-score',
+          'Hsp_query-from' => 'HSP-query_start',
+          'Hsp_query-to'   => 'HSP-query_end',
+          'Hsp_hit-from'   => 'HSP-hit_start',
+          'Hsp_hit-to'     => 'HSP-hit_end',
+	  'Hsp_positive'   => 'HSP-conserved',
+          'Hsp_identity'   => 'HSP-identical',
+          'Hsp_gaps'       => 'HSP-hsp_gaps',
+          'Hsp_hitgaps'    => 'HSP-hit_gaps',
+          'Hsp_querygaps'  => 'HSP-query_gaps',
+          'Hsp_qseq'       => 'HSP-query_seq',
+          'Hsp_hseq'       => 'HSP-hit_seq',
+          'Hsp_midline'    => 'HSP-homology_seq', # ignoring this for now
+          'Hsp_align-len'  => 'HSP-hsp_length',
+	  
+	  'Hit_id'        => 'HIT-name',	  
+	  'AXTOutput_query-def'=> 'RESULT-query_name',	  
+	  );
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::axt();
+ Function: Builds a new Bio::SearchIO::axt object 
+ Returns : an instance of Bio::SearchIO::axt
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+    my ($self) = @_;
+    local $/ = "\n";
+    local $_;
+
+    my ($curquery,$curhit);
+    $self->start_document();
+    my @hit_signifs;
+    while( defined ($_ = $self->_readline )) { 
+	next if (/^\s+$/);
+	if( m/^(\d+)\s+      # alignment number - we'll throw this away anyways
+	    (\S+)\s+         # Query name      
+	    (\d+)\s+(\d+)\s+ # Query start Query end (always + strand, 0 based)
+	    (\S+)\s+         # Hit name
+	    (\d+)\s+(\d+)\s+ # Hit start Hit end (0 based)
+	    ([\-\+])\s+      # Hit strand
+	    ([\d\.\-]+)\s+   # Score
+	    /ox ) {
+	    my ($alnnum, $qname,$qstart,$qend, $hname,
+		$hstart,$hend,$hstrand, $score) = ($1,$2,$3,$4,$5,
+						   $6,$7,$8,$9);
+	    $self->{'_reporttype'} = 'AXT';
+	    # Jim's code is 0 based
+	    $qstart++;  $qend++; $hstart++; $hend++;
+	    if( defined $curquery && 
+		$curquery ne $qname ) { 
+		$self->end_element({'Name' => 'Hit'});
+		$self->_pushback($_);
+		$self->end_element({'Name' => 'AXTOutput'});
+		return $self->end_document();
+	    }
+	    
+	    if( defined $curhit &&
+		$curhit ne $hname) {
+		# slight duplication here -- keep these in SYNC
+		$self->end_element({'Name' => 'Hit'});
+		$self->start_element({'Name' => 'Hit'});
+		$self->element({'Name' => 'Hit_id',
+				'Data' => $hname});
+	    } elsif ( ! defined $curquery ) { 
+		$self->start_element({'Name' => 'AXTOutput'});
+		$self->{'_result_count'}++;
+		$self->element({'Name' => 'AXTOutput_query-def',
+				'Data' => $qname });
+		
+		$self->start_element({'Name' => 'Hit'});
+		$self->element({'Name' => 'Hit_id',
+				'Data' => $hname});
+	    }
+	    $self->start_element({'Name' => 'Hsp'});
+	    my $queryalign = $self->_readline;
+	    my $hitalign   = $self->_readline;
+	    chomp($queryalign);
+	    chomp($hitalign);
+	    my $alnlen = length($queryalign);
+	    my $qgapnum = ( $queryalign =~ s/\Q$GAPCHAR/$GAPCHAR/g); 
+	    my $hgapnum = ( $hitalign =~ s/\Q$GAPCHAR/$GAPCHAR/g); 
+	    my $totalgaps = ($qgapnum + $hgapnum);
+	    
+	    if( $hstrand eq '-' ) { # strand gets inferred by start/end
+		($hstart,$hend) = ($hend,$hstart);
+	    }
+	    $self->element({'Name' => 'Hsp_score',
+			    'Data' => $score});
+	    $self->element({'Name' => 'Hsp_query-from',
+			    'Data' => $qstart});
+	    $self->element({'Name' => 'Hsp_query-to',
+			    'Data' => $qend});
+	    $self->element({'Name' => 'Hsp_hit-from',
+			    'Data' => $hstart});
+	    $self->element({'Name' => 'Hsp_hit-to',
+			    'Data' => $hend});
+	    $self->element({'Name' => 'Hsp_gaps',
+			    'Data' => $qgapnum + $hgapnum});
+	    $self->element({'Name' => 'Hsp_querygaps',
+			    'Data' => $qgapnum});
+	    $self->element({'Name' => 'Hsp_hitgaps',
+			    'Data' => $hgapnum});
+	    
+	    $self->element({'Name' => 'Hsp_identity',
+			    'Data' => $alnlen - $totalgaps});
+	    $self->element({'Name' => 'Hsp_positive',
+			    'Data' => $alnlen - $totalgaps});
+	    $self->element({'Name' => 'Hsp_qseq',
+			    'Data' => $queryalign});
+	    $self->element({'Name' => 'Hsp_hseq',
+			    'Data' => $hitalign});
+	    
+	    $self->end_element({'Name' => 'Hsp'});	    
+	    $curquery = $qname;
+	    $curhit   = $hname;	   
+	}
+    }
+    # fence post
+    if( defined $curquery  ) {
+	$self->end_element({'Name' => 'Hit'});
+	$self->end_element({'Name' => 'AXTOutput'});
+	return $self->end_document();
+    }
+    return;
+}
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult'));
+
+    $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::GenericHSP'));
+}
+
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+   if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						 
+	unshift @{$self->{'_elements'}}, $type;
+    }
+    if($nm eq 'AXTOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+        
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});	    
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) { 	
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];	    
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else { 
+	$self->warn( "unknown nm $nm ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'AXTOutput' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+
+   return unless ( defined $data->{'Data'} );
+   if( $data->{'Data'} =~ /^\s+$/ ) {
+       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
+   }
+
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+   
+   $self->{'_last_data'} = $data->{'Data'}; 
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;  
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       } 
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;  
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2502 @@
+# $Id: blast.pm,v 1.99.4.4 2006/10/11 19:55:17 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::blast
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+# 20030409 - sac
+#          PSI-BLAST full parsing support. Rollout of new
+#          model which will remove Steve's old psiblast driver
+# 20030424 - jason
+#          Megablast parsing fix as reported by Neil Saunders
+# 20030427 - jason
+#          Support bl2seq parsing
+# 20031124 - jason
+#          Parse more blast statistics, lambda, entropy, etc
+#          from WU-BLAST in frame-specific manner
+# 20060216 - cjf - fixed blast parsing for BLAST v2.2.13 output
+
+=head1 NAME
+
+Bio::SearchIO::blast - Event generator for event based parsing of
+blast reports
+
+=head1 SYNOPSIS
+
+   # Do not use this object directly - it is used as part of the
+   # Bio::SearchIO system.
+
+    use Bio::SearchIO;
+    my $searchio = new Bio::SearchIO(-format => 'blast',
+                                     -file   => 't/data/ecolitst.bls');
+    while( my $result = $searchio->next_result ) {
+        while( my $hit = $result->next_hit ) {
+            while( my $hsp = $hit->next_hsp ) {
+                # ...
+            }
+        }
+    }
+
+=head1 DESCRIPTION
+
+This object encapsulated the necessary methods for generating events
+suitable for building Bio::Search objects from a BLAST report file.
+Read the L<Bio::SearchIO> for more information about how to use this.
+
+This driver can parse:
+
+=over 4
+
+=item * 
+
+NCBI produced plain text BLAST reports from blastall, this also
+includes PSIBLAST, PSITBLASTN, RPSBLAST, and bl2seq reports.  NCBI XML
+BLAST output is parsed with the blastxml SearchIO driver
+
+=item *
+
+WU-BLAST all reports
+
+=item *
+
+Jim Kent's BLAST-like output from his programs (BLASTZ, BLAT)
+
+=item *
+
+BLAST-like output from Paracel BTK output
+
+=back
+
+=head2 bl2seq parsing
+
+Since I cannot differentiate between BLASTX and TBLASTN since bl2seq
+doesn't report the algorithm used - I assume it is BLASTX by default -
+you can supply the program type with -report_type in the SearchIO
+constructor i.e.
+
+  my $parser = new Bio::SearchIO(-format => 'blast',
+                                 -file => 'bl2seq.tblastn.report',
+                                 -report_type => 'tblastn');
+
+This only really affects where the frame and strand information are
+put - they will always be on the $hsp-E<gt>query instead of on the
+$hsp-E<gt>hit part of the feature pair for blastx and tblastn bl2seq
+produced reports.  Hope that's clear...
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email Jason Stajich jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Steve Chervitz sac-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...'
+
+package Bio::SearchIO::blast;
+
+use Bio::SearchIO::IteratedSearchResultEventBuilder;
+use strict;
+use vars qw(%MAPPING %MODEMAP
+  $DEFAULT_BLAST_WRITER_CLASS
+  $MAX_HSP_OVERLAP
+  $DEFAULT_SIGNIF
+  $DEFAULT_SCORE
+  $DEFAULTREPORTTYPE
+);
+
+
+use base qw(Bio::SearchIO);
+
+BEGIN {
+
+    # mapping of NCBI Blast terms to Bioperl hash keys
+    %MODEMAP = (
+        'BlastOutput' => 'result',
+        'Iteration'   => 'iteration',
+        'Hit'         => 'hit',
+        'Hsp'         => 'hsp'
+    );
+
+    # This should really be done more intelligently, like with
+    # XSLT
+
+    %MAPPING = (
+        'Hsp_bit-score'   => 'HSP-bits',
+        'Hsp_score'       => 'HSP-score',
+        'Hsp_evalue'      => 'HSP-evalue',
+        'Hsp_pvalue'      => 'HSP-pvalue',
+        'Hsp_query-from'  => 'HSP-query_start',
+        'Hsp_query-to'    => 'HSP-query_end',
+        'Hsp_hit-from'    => 'HSP-hit_start',
+        'Hsp_hit-to'      => 'HSP-hit_end',
+        'Hsp_positive'    => 'HSP-conserved',
+        'Hsp_identity'    => 'HSP-identical',
+        'Hsp_gaps'        => 'HSP-hsp_gaps',
+        'Hsp_hitgaps'     => 'HSP-hit_gaps',
+        'Hsp_querygaps'   => 'HSP-query_gaps',
+        'Hsp_qseq'        => 'HSP-query_seq',
+        'Hsp_hseq'        => 'HSP-hit_seq',
+        'Hsp_midline'     => 'HSP-homology_seq',
+        'Hsp_align-len'   => 'HSP-hsp_length',
+        'Hsp_query-frame' => 'HSP-query_frame',
+        'Hsp_hit-frame'   => 'HSP-hit_frame',
+        'Hsp_links'       => 'HSP-links',
+        'Hsp_group'       => 'HSP-hsp_group',
+
+        'Hit_id'        => 'HIT-name',
+        'Hit_len'       => 'HIT-length',
+        'Hit_accession' => 'HIT-accession',
+        'Hit_def'       => 'HIT-description',
+        'Hit_signif'    => 'HIT-significance',
+
+        # For NCBI blast, the description line contains bits.
+        # For WU-blast, the  description line contains score.
+        'Hit_score' => 'HIT-score',
+        'Hit_bits'  => 'HIT-bits',
+
+        'Iteration_iter-num'  => 'ITERATION-number',
+        'Iteration_converged' => 'ITERATION-converged',
+
+        'BlastOutput_program'             => 'RESULT-algorithm_name',
+        'BlastOutput_version'             => 'RESULT-algorithm_version',
+        'BlastOutput_query-def'           => 'RESULT-query_name',
+        'BlastOutput_query-len'           => 'RESULT-query_length',
+        'BlastOutput_query-acc'           => 'RESULT-query_accession',
+        'BlastOutput_querydesc'           => 'RESULT-query_description',
+        'BlastOutput_db'                  => 'RESULT-database_name',
+        'BlastOutput_db-len'              => 'RESULT-database_entries',
+        'BlastOutput_db-let'              => 'RESULT-database_letters',
+        'BlastOutput_inclusion-threshold' => 'RESULT-inclusion_threshold',
+
+        'Parameters_matrix'      => { 'RESULT-parameters' => 'matrix' },
+        'Parameters_expect'      => { 'RESULT-parameters' => 'expect' },
+        'Parameters_include'     => { 'RESULT-parameters' => 'include' },
+        'Parameters_sc-match'    => { 'RESULT-parameters' => 'match' },
+        'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch' },
+        'Parameters_gap-open'    => { 'RESULT-parameters' => 'gapopen' },
+        'Parameters_gap-extend'  => { 'RESULT-parameters' => 'gapext' },
+        'Parameters_filter'      => { 'RESULT-parameters' => 'filter' },
+        'Parameters_allowgaps'   => { 'RESULT-parameters' => 'allowgaps' },
+        'Parameters_full_dbpath' => { 'RESULT-parameters' => 'full_dbpath' },
+        'Statistics_db-len'      => { 'RESULT-statistics' => 'dbentries' },
+        'Statistics_db-let'      => { 'RESULT-statistics' => 'dbletters' },
+        'Statistics_hsp-len'     =>
+          { 'RESULT-statistics' => 'effective_hsplength' },
+        'Statistics_query-len' => { 'RESULT-statistics' => 'querylength' },
+        'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace' },
+        'Statistics_eff-spaceused' =>
+          { 'RESULT-statistics' => 'effectivespaceused' },
+        'Statistics_eff-dblen' =>
+          { 'RESULT-statistics' => 'effectivedblength' },
+        'Statistics_kappa'         => { 'RESULT-statistics' => 'kappa' },
+        'Statistics_lambda'        => { 'RESULT-statistics' => 'lambda' },
+        'Statistics_entropy'       => { 'RESULT-statistics' => 'entropy' },
+        'Statistics_gapped_kappa'  => { 'RESULT-statistics' => 'kappa_gapped' },
+        'Statistics_gapped_lambda' =>
+          { 'RESULT-statistics' => 'lambda_gapped' },
+        'Statistics_gapped_entropy' =>
+          { 'RESULT-statistics' => 'entropy_gapped' },
+
+        'Statistics_framewindow' =>
+          { 'RESULT-statistics' => 'frameshiftwindow' },
+        'Statistics_decay' => { 'RESULT-statistics' => 'decayconst' },
+
+        'Statistics_hit_to_db' => { 'RESULT-statistics' => 'Hits_to_DB' },
+        'Statistics_num_suc_extensions' =>
+          { 'RESULT-statistics' => 'num_successful_extensions' },
+
+        # WU-BLAST stats
+        'Statistics_DFA_states' => { 'RESULT-statistics' => 'num_dfa_states' },
+        'Statistics_DFA_size'   => { 'RESULT-statistics' => 'dfa_size' },
+        'Statistics_noprocessors' =>
+          { 'RESULT-statistics' => 'no_of_processors' },
+        'Statistics_neighbortime' =>
+          { 'RESULT-statistics' => 'neighborhood_generate_time' },
+        'Statistics_starttime' => { 'RESULT-statistics' => 'start_time' },
+        'Statistics_endtime'   => { 'RESULT-statistics' => 'end_time' },
+    );
+
+    # add WU-BLAST Frame-Based Statistics
+    for my $frame ( 0 .. 3 ) {
+        for my $strand ( '+', '-' ) {
+            for my $ind (
+                qw(length efflength E S W T X X_gapped E2
+                E2_gapped S2)
+              )
+            {
+                $MAPPING{"Statistics_frame$strand$frame\_$ind"} =
+                  { 'RESULT-statistics' => "Frame$strand$frame\_$ind" };
+            }
+            for my $val (qw(lambda kappa entropy )) {
+                for my $type (qw(used computed gapped)) {
+                    my $key = "Statistics_frame$strand$frame\_$val\_$type";
+                    my $val =
+                      { 'RESULT-statistics' =>
+                          "Frame$strand$frame\_$val\_$type" };
+                    $MAPPING{$key} = $val;
+                }
+            }
+        }
+    }
+
+    # add Statistics
+    for my $stats (
+        qw(T A X1 X2 X3 S1 S2 X1_bits X2_bits X3_bits
+        S1_bits S2_bits  num_extensions
+        num_successful_extensions
+        seqs_better_than_cutoff
+        posted_date
+        search_cputime total_cputime
+        search_actualtime total_actualtime
+        no_of_processors ctxfactor)
+      )
+    {
+        my $key = "Statistics_$stats";
+        my $val = { 'RESULT-statistics' => $stats };
+        $MAPPING{$key} = $val;
+    }
+
+    # add WU-BLAST Parameters
+    for my $param (
+        qw(span span1 span2 links warnings notes hspsepsmax
+        hspsepqmax topcomboN topcomboE postsw cpus wordmask
+        filter sort_by_pvalue sort_by_count sort_by_highscore
+        sort_by_totalscore sort_by_subjectlength noseqs gi qtype
+        qres V B Z Y M N)
+      )
+    {
+        my $key = "Parameters_$param";
+        my $val = { 'RESULT-parameters' => $param };
+        $MAPPING{$key} = $val;
+    }
+
+    $DEFAULT_BLAST_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+    $MAX_HSP_OVERLAP   = 2;           # Used when tiling multiple HSPs.
+    $DEFAULTREPORTTYPE = 'BLASTP';    # for bl2seq
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::blast(%args);
+ Function: Builds a new Bio::SearchIO::blast object 
+ Returns : Bio::SearchIO::blast
+ Args    : Key-value pairs:
+           -fh/-file => filehandle/filename to BLAST file
+           -format   => 'blast'
+           -report_type => 'blastx', 'tblastn', etc -- only for bl2seq
+                           reports when you want to distinguish between
+                           tblastn and blastx reports (this only controls
+                           where the frame information is put - on the query
+                           or subject object.
+           -inclusion_threshold => e-value threshold for inclusion in the
+                                   PSI-BLAST score matrix model (blastpgp)
+           -signif      => float or scientific notation number to be used
+                           as a P- or Expect value cutoff
+           -score       => integer or scientific notation number to be used
+                           as a blast score value cutoff
+           -bits        => integer or scientific notation number to be used
+                           as a bit score value cutoff
+           -hit_filter  => reference to a function to be used for
+                           filtering hits based on arbitrary criteria.
+                           All hits of each BLAST report must satisfy 
+                           this criteria to be retained. 
+                           If a hit fails this test, it is ignored.
+                           This function should take a
+                           Bio::Search::Hit::BlastHit.pm object as its first
+                           argument and return true
+                           if the hit should be retained.
+                           Sample filter function:
+                              -hit_filter => sub { $hit = shift;
+                                                   $hit->gaps == 0; },
+                           (Note: -filt_func is synonymous with -hit_filter)
+           -overlap     => integer. The amount of overlap to permit between
+                           adjacent HSPs when tiling HSPs. A reasonable value is 2.
+                           Default = $Bio::SearchIO::blast::MAX_HSP_OVERLAP.
+
+            The following criteria are not yet supported:
+            (these are probably best applied within this module rather than in the 
+             event handler since they would permit the parser to take some shortcuts.)
+
+           -check_all_hits => boolean. Check all hits for significance against
+                              significance criteria.  Default = false.
+                              If false, stops processing hits after the first
+                              non-significant hit or the first hit that fails
+                              the hit_filter call. This speeds parsing,
+                              taking advantage of the fact that the hits
+                              are processed in the order they appear in the report.
+           -min_query_len => integer to be used as a minimum for query sequence length.
+                             Reports with query sequences below this length will
+                             not be processed. Default = no minimum length.
+           -best        => boolean. Only process the best hit of each report;
+                           default = false.
+
+=cut
+
+sub _initialize {
+    my ( $self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+
+ # Blast reports require a specialized version of the SREB due to the
+ # possibility of iterations (PSI-BLAST). Forwarding all arguments to it.
+ # An issue here is that we want to set new default object factories if none are
+ # supplied.
+
+    my $handler = new Bio::SearchIO::IteratedSearchResultEventBuilder(@args);
+    $self->attach_EventHandler($handler);
+    
+    # 2006-04-26 move this to the attach_handler function in this
+    # module so we can really reset the handler 
+    # Optimization: caching
+    # the EventHandler since it is used a lot during the parse.
+    # $self->{'_handler_cache'} = $handler;
+
+    my ( $min_qlen, $check_all, $overlap, $best, $rpttype ) = $self->_rearrange(
+        [
+            qw(MIN_LENGTH CHECK_ALL_HITS
+              OVERLAP BEST
+              REPORT_TYPE)
+        ],
+        @args
+    );
+
+    defined $min_qlen  && $self->min_query_length($min_qlen);
+    defined $best      && $self->best_hit_only($best);
+    defined $check_all && $self->check_all_hits($check_all);
+    defined $rpttype   && ( $self->{'_reporttype'} = $rpttype );
+}
+
+sub attach_EventHandler {
+    my ($self,$handler) = @_;
+
+    $self->SUPER::attach_EventHandler($handler);    
+
+    # Optimization: caching the EventHandler since it is used a lot
+    # during the parse.
+
+    $self->{'_handler_cache'} = $handler;
+    return;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    my $v      = $self->verbose;
+    my $data   = '';
+    my $flavor = '';
+    $self->{'_seentop'} = 0;     # start next report at top
+    my ( $reporttype, $seenquery, $reportline );  
+    my ( $seeniteration, $found_again );
+    my $incl_threshold = $self->inclusion_threshold;
+    my $bl2seq_fix;
+    $self->start_document();  # let the fun begin...
+    my (@hit_signifs);
+    my $gapped_stats = 0;    # for switching between gapped/ungapped
+                             # lambda, K, H
+    local $_ = "\n";   #consistency
+    while ( defined( $_ = $self->_readline ) ) {
+        next if (/^\s+$/);       # skip empty lines
+        next if (/CPU time:/);
+        next if (/^>\s*$/);
+        if (
+               /^([T]?BLAST[NPX])\s*(.+)$/i               # NCBI BLAST
+            || /^(PSITBLASTN)\s+(.+)$/i                   # PSIBLAST
+            || /^(RPS-BLAST)\s*(.+)$/i                    # RPSBLAST
+            || /^(MEGABLAST)\s*(.+)$/i                    # MEGABLAST
+            || /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i    #Paracel BTK
+          )
+        {
+            $self->debug("blast.pm: Start of new report: $1 $2\n");
+            if ( $self->{'_seentop'} ) { 
+                # This handles multi-result input streams
+                $self->_pushback($_);
+                $self->in_element('hsp')
+                  && $self->end_element( { 'Name' => 'Hsp' } );
+                $self->in_element('hit')
+                  && $self->end_element( { 'Name' => 'Hit' } );
+                $self->within_element('iteration')
+                  && $self->end_element( { 'Name' => 'Iteration' } );
+                $self->end_element( { 'Name' => 'BlastOutput' } );
+                return $self->end_document();
+            }
+            $self->_start_blastoutput;
+            $reporttype = $1;
+            if ($reporttype =~ /RPS-BLAST/) {
+                $reporttype .= '(BLASTP)'; # default RPS-BLAST type
+            }
+            $reportline = $_;   # to fix the fact that RPS-BLAST output is wrong
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_program',
+                    'Data' => $reporttype
+                }
+            );
+
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_version',
+                    'Data' => $2
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_inclusion-threshold',
+                    'Data' => $incl_threshold
+                }
+            );
+        }
+        # added Windows workaround for bug 1985
+        elsif (/^(Searching|Results from round)/) { 
+            next unless $1 =~ /Results from round/; 
+            $self->debug("blast.pm: Possible psi blast iterations found...\n");
+            
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->in_element('hit')
+              && $self->end_element( { 'Name' => 'Hit' } );
+            if ( defined $seeniteration ) {
+                $self->within_element('iteration')
+                  && $self->end_element( { 'Name' => 'Iteration' } );
+                $self->_start_iteration;
+            }
+            else {
+                $self->_start_iteration;
+            }
+            $seeniteration = 1;
+        }
+        elsif (/^Query=\s*(.*)$/) {
+            $self->debug("blast.pm: Query= found...$_\n");
+            my $q    = $1;
+            my $size = 0;
+            if ( defined $seenquery ) {
+                $self->_pushback($reportline) if $reportline;
+                $self->_pushback($_);
+                $self->in_element('hsp')
+                  && $self->end_element( { 'Name' => 'Hsp' } );
+                $self->in_element('hit')
+                  && $self->end_element( { 'Name' => 'Hit' } );
+                $self->within_element('iteration')
+                  && $self->end_element( { 'Name' => 'Iteration' } );
+                if ($bl2seq_fix) {
+                    $self->element(
+                        {
+                            'Name' => 'BlastOutput_program',
+                            'Data' => $reporttype
+                        }
+                    );
+                }
+                $self->end_element( { 'Name' => 'BlastOutput' } );
+                return $self->end_document();
+            }
+            else {
+                if ( !defined $reporttype ) {
+                    $self->_start_blastoutput;
+                    if ( defined $seeniteration ) {
+                        $self->in_element('iteration')
+                          && $self->end_element( { 'Name' => 'Iteration' } );
+                        $self->_start_iteration;
+                    }
+                    else {
+                        $self->_start_iteration;
+                    }
+                    $seeniteration = 1;
+                }
+            }
+            $seenquery = $q;
+            $_ = $self->_readline;
+            while ( defined($_) ) {
+                if (/^Database:/) {
+                    $self->_pushback($_);
+                    last;
+                }
+                chomp;
+                # below line fixes length issue with BLAST v2.2.13; still works 
+                # with BLAST v2.2.12
+                if ( /\((\-?[\d,]+)\s+letters.*\)/ || /^Length=(\-?[\d,]+)/ ) {
+                    $size = $1;
+                    $size =~ s/,//g;
+                    last;
+                }
+                else {
+                    $q .= " $_";
+                    $q =~ s/ +/ /g;
+                    $q =~ s/^ | $//g;
+                }
+
+                $_ = $self->_readline;
+            }
+            chomp($q);
+            my ( $nm, $desc ) = split( /\s+/, $q, 2 );
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_query-def',
+                    'Data' => $nm
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_query-len',
+                    'Data' => $size
+                }
+            );
+            defined $desc && $desc =~ s/\s+$//;
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_querydesc',
+                    'Data' => $desc
+                }
+            );
+            my ( $acc, $version ) = &_get_accession_version($nm);
+            $version = defined($version) && length($version) ? ".$version" : "";
+            $acc = '' unless defined($acc);
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_query-acc',
+                    'Data' => "$acc$version"
+                }
+            );
+        }
+        elsif (/Sequences producing significant alignments:/) {
+            $self->debug("blast.pm: Processing NCBI-BLAST descriptions\n");
+            $flavor = 'ncbi';
+            
+            # PSI-BLAST parsing needs to be fixed to specifically look
+            # for old vs new per iteration, as sorting based on duplication
+            # leads to bugs, see bug 1986
+            
+            # The next line is not necessarily whitespace in psiblast reports.
+            # Also note that we must look for the end of this section by testing
+            # for a line with a leading >. Blank lines occur with this section
+            # for psiblast.
+            if ( !$self->in_element('iteration') ) {
+                $self->_start_iteration;
+            }
+          descline:
+            while ( defined( $_ = $self->_readline() ) ) {
+                if (/^>/ 
+                    || /^\s+Database:\s+?/
+                    || /^Parameters:/
+                    || /^\s+Subset/
+                    || /^\s*Lambda/
+                    || /^\s*Histogram/
+                    ) {
+                    $self->_pushback($_); # Catch leading > (end of section)
+                    last descline;
+                }
+                elsif (/([\d\.\+\-eE]+)\s+([\d\.\+\-eE]+)(\s+\d+)?\s*$/) {
+
+                    # the last match is for gapped BLAST output
+                    # which will report the number of HSPs for the Hit
+                    my ( $score, $evalue ) = ( $1, $2 );
+
+                    # Some data clean-up so e-value will appear numeric to perl
+                    $evalue =~ s/^e/1e/i;
+
+                    # This to handle no-HSP case
+                    my @line = split;
+                    
+                    # we want to throw away the score, evalue
+                    pop @line, pop @line;
+
+                    # and N if it is present (of course they are not
+                    # really in that order, but it doesn't matter
+                    if ($3) { pop @line }
+
+                    # add the last 2 entries s.t. we can reconstruct
+                    # a minimal Hit object at the end of the day
+                    push @hit_signifs,
+                      [ $evalue, $score, shift @line, join( ' ', @line ) ];
+                    
+                }
+                elsif (/^CONVERGED/i) {
+                    $self->element(
+                        {
+                            'Name' => 'Iteration_converged',
+                            'Data' => 1
+                        }
+                    );
+                }
+                @hit_signifs = sort {$a->[0] <=> $b->[0]} @hit_signifs;
+            }
+        }
+        elsif (/Sequences producing High-scoring Segment Pairs:/) {
+
+     # This block is for WU-BLAST, so we don't have to check for psi-blast stuff
+     # skip the next line
+            $self->debug("blast.pm: Processing WU-BLAST descriptions\n");
+            $_      = $self->_readline();
+            $flavor = 'wu';
+
+            if ( !$self->in_element('iteration') ) {
+                $self->_start_iteration;
+            }
+
+            while ( defined( $_ = $self->_readline() )
+                && !/^\s+$/ )
+            {
+                my @line = split;
+                pop @line;    # throw away first number which is for 'N'col
+
+                # add the last 2 entries to array s.t. we can reconstruct
+                # a minimal Hit object at the end of the day
+                push @hit_signifs,
+                  [ pop @line, pop @line, shift @line, join( ' ', @line ) ];
+            }
+            
+        }
+        elsif (/^Database:\s*(.+)$/) {
+
+            $self->debug("blast.pm: Database: $1\n");
+            my $db = $1;
+            while ( defined( $_ = $self->_readline ) ) {
+                if (
+                    /^\s+(\-?[\d\,]+|\S+)\s+sequences\;
+                   \s+(\-?[\d,]+|\S+)\s+ # Deal with NCBI 2.2.8 OSX problems
+                   total\s+letters/ox
+                  )
+                {
+                    my ( $s, $l ) = ( $1, $2 );
+                    $s =~ s/,//g;
+                    $l =~ s/,//g;
+                    $self->element(
+                        {
+                            'Name' => 'BlastOutput_db-len',
+                            'Data' => $s
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'BlastOutput_db-let',
+                            'Data' => $l
+                        }
+                    );
+                    last;
+                }
+                else {
+                    chomp;
+                    $db .= $_;
+                }
+            }
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_db',
+                    'Data' => $db
+                }
+            );
+        }
+        # bypasses this NCBI blast 2.2.13 extra output for now...
+		# Features in/flanking this part of subject sequence:
+        elsif (/^\sFeatures\s\w+\sthis\spart\sof\ssubject\ssequence:/) {
+        	# junk following lines up to start of HSP
+			while($_ !~ /^\sScore\s=/) {
+				$self->debug("Bypassing features line: $_");
+        		$_ = $self->_readline;
+        	}
+			$self->_pushback($_);
+        }
+        
+        # move inside of a hit
+        elsif (/^>\s*(\S+)\s*(.*)?/) {
+            chomp;
+
+            $self->debug("blast.pm: Hit: $1\n");
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->in_element('hit')
+              && $self->end_element( { 'Name' => 'Hit' } );
+
+            # special case when bl2seq reports don't have a leading
+            # Query=
+            if ( !$self->within_element('result') ) {
+                $self->_start_blastoutput;
+                $self->_start_iteration;
+            }
+            elsif ( !$self->within_element('iteration') ) {
+                $self->_start_iteration;
+            }
+            $self->start_element( { 'Name' => 'Hit' } );
+            my $id         = $1;
+            my $restofline = $2;
+
+            $self->debug("Starting a hit: $1 $2\n");
+            $self->element(
+                {
+                    'Name' => 'Hit_id',
+                    'Data' => $id
+                }
+            );
+            my ( $acc, $version ) = &_get_accession_version($id);
+            $self->element(
+                {
+                    'Name' => 'Hit_accession',
+                    'Data' => $acc
+                }
+            );
+ 
+            # add hit significance (from the hit table)
+            # this is where Bug 1986 goes awry
+            
+            my $v = shift @hit_signifs;
+            if ( defined $v ) {
+                $self->element(
+                    {
+                        'Name' => 'Hit_signif',
+                        'Data' => $v->[0]
+                    }
+                );
+                $self->element(
+                    {
+                        'Name' => 'Hit_score',
+                        'Data' => $v->[1]
+                    }
+                );
+            }
+            while ( defined( $_ = $self->_readline() ) ) {
+                next if (/^\s+$/);
+                chomp;
+                if (/Length\s*=\s*([\d,]+)/) {
+                    my $l = $1;
+                    $l =~ s/\,//g;
+                    $self->element(
+                        {
+                            'Name' => 'Hit_len',
+                            'Data' => $l
+                        }
+                    );
+                    last;
+                }
+                else {
+                    $restofline .= $_;
+                }
+            }
+            $restofline =~ s/\s+/ /g;
+            $self->element(
+                {
+                    'Name' => 'Hit_def',
+                    'Data' => $restofline
+                }
+            );
+        }
+        elsif (/\s+(Plus|Minus) Strand HSPs:/i) {
+            next;
+        }
+        elsif (
+            ( $self->in_element('hit') || $self->in_element('hsp') )
+            &&    # paracel genewise BTK
+            m/Score\s*=\s*(\S+)\s*bits\s* # Bit score
+                (?:\((\d+)\))?,                 # Raw score
+		\s+Log\-Length\sScore\s*=\s*(\d+) # Log-Length score
+                /ox
+          )
+        {
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->start_element( { 'Name' => 'Hsp' } );
+
+            $self->debug( "Got paracel genewise HSP score=$1\n");
+
+            # Some data clean-up so e-value will appear numeric to perl
+            my ( $bits, $score, $evalue ) = ( $1, $2, $3 );
+            $evalue =~ s/^e/1e/i;
+            $self->element(
+                {
+                    'Name' => 'Hsp_score',
+                    'Data' => $score
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_bit-score',
+                    'Data' => $bits
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_evalue',
+                    'Data' => $evalue
+                }
+            );
+        }
+        elsif (
+            ( $self->in_element('hit') || $self->in_element('hsp') )
+            &&    # paracel hframe BTK
+            m/Score\s*=\s*([^,\s]+),     # Raw score
+		\s*Expect\s*=\s*([^,\s]+),  # E-value
+                \s*P(?:\(\S+\))?\s*=\s*([^,\s]+) # P-value
+                /ox
+          )
+        {
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->start_element( { 'Name' => 'Hsp' } );
+
+            $self->debug( "Got paracel hframe HSP score=$1\n");
+
+            # Some data clean-up so e-value will appear numeric to perl
+            my ( $score, $evalue, $pvalue ) = ( $1, $2, $3 );
+            $evalue = "1$evalue" if $evalue =~ /^e/;
+            $pvalue = "1$pvalue" if $pvalue =~ /^e/;
+
+            $self->element(
+                {
+                    'Name' => 'Hsp_score',
+                    'Data' => $score
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_evalue',
+                    'Data' => $evalue
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_pvalue',
+                    'Data' => $pvalue
+                }
+            );
+        }
+        elsif (
+            ( $self->in_element('hit') || $self->in_element('hsp') )
+            &&    # wublast
+            m/Score\s*=\s*(\S+)\s*         # Bit score
+                \(([\d\.]+)\s*bits\),         # Raw score
+                \s*Expect\s*=\s*([^,\s]+),    # E-value
+                \s*(?:Sum)?\s*                # SUM
+                P(?:\(\d+\))?\s*=\s*([^,\s]+) # P-value
+                (?:\s*,\s+Group\s*\=\s*(\d+))?    # HSP Group
+                /ox
+          )
+        {         # wu-blast HSP parse
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->start_element( { 'Name' => 'Hsp' } );
+
+            # Some data clean-up so e-value will appear numeric to perl
+            my ( $score, $bits, $evalue, $pvalue, $group ) =
+              ( $1, $2, $3, $4, $5 );
+            $evalue =~ s/^e/1e/i;
+            $pvalue =~ s/^e/1e/i;
+
+            $self->element(
+                {
+                    'Name' => 'Hsp_score',
+                    'Data' => $score
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_bit-score',
+                    'Data' => $bits
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_evalue',
+                    'Data' => $evalue
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_pvalue',
+                    'Data' => $pvalue
+                }
+            );
+
+            if ( defined $group ) {
+                $self->element(
+                    {
+                        'Name' => 'Hsp_group',
+                        'Data' => $group
+                    }
+                );
+            }
+
+        }
+        elsif (
+            ( $self->in_element('hit') || $self->in_element('hsp') )
+            &&    # ncbi blast
+            m/Score\s*=\s*(\S+)\s*bits\s* # Bit score
+                (?:\((\d+)\))?,            # Missing for BLAT pseudo-BLAST fmt 
+                \s*Expect(?:\(\d+\+?\))?\s*=\s*(\S+) # E-value
+                /ox
+          )
+        {         # parse NCBI blast HSP
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+
+            # Some data clean-up so e-value will appear numeric to perl
+            my ( $bits, $score, $evalue ) = ( $1, $2, $3 );
+            $evalue =~ s/^e/1e/i;
+
+            $self->start_element( { 'Name' => 'Hsp' } );
+            $self->element(
+                {
+                    'Name' => 'Hsp_score',
+                    'Data' => $score
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_bit-score',
+                    'Data' => $bits
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_evalue',
+                    'Data' => $evalue
+                }
+            );
+            $score = '' unless defined $score;    # deal with BLAT which
+                                                  # has no score only bits
+            $self->debug("Got NCBI HSP score=$score, evalue $evalue\n");
+        }
+        elsif (
+            $self->in_element('hsp')
+            && m/Identities\s*=\s*(\d+)\s*\/\s*(\d+)\s*[\d\%\(\)]+\s*
+                (?:,\s*Positives\s*=\s*(\d+)\/(\d+)\s*[\d\%\(\)]+\s*)? # pos only valid for Protein alignments
+                (?:\,\s*Gaps\s*=\s*(\d+)\/(\d+))? # Gaps
+                /oxi
+          )
+        {
+            $self->element(
+                {
+                    'Name' => 'Hsp_identity',
+                    'Data' => $1
+                }
+            );
+            $self->element(
+                {
+                    'Name' => 'Hsp_align-len',
+                    'Data' => $2
+                }
+            );
+            if ( defined $3 ) {
+                $self->element(
+                    {
+                        'Name' => 'Hsp_positive',
+                        'Data' => $3
+                    }
+                );
+            }
+            else {
+                $self->element(
+                    {
+                        'Name' => 'Hsp_positive',
+                        'Data' => $1
+                    }
+                );
+            }
+            if ( defined $6 ) {
+                $self->element(
+                    {
+                        'Name' => 'Hsp_gaps',
+                        'Data' => $5
+                    }
+                );
+            }
+
+            $self->{'_Query'} = { 'begin' => 0, 'end' => 0 };
+            $self->{'_Sbjct'} = { 'begin' => 0, 'end' => 0 };
+
+            if (/(Frame\s*=\s*.+)$/) {
+
+                # handle wu-blast Frame listing on same line
+                $self->_pushback($1);
+            }
+        }
+        elsif ( $self->in_element('hsp')
+            && /Strand\s*=\s*(Plus|Minus)\s*\/\s*(Plus|Minus)/i )
+        {
+
+            # consume this event ( we infer strand from start/end)
+            unless ($reporttype) {
+                $self->{'_reporttype'} = $reporttype = 'BLASTN';
+                $bl2seq_fix = 1;    # special case to resubmit the algorithm
+                                    # reporttype
+            }
+            next;
+        }
+        elsif ( $self->in_element('hsp')
+            && /Links\s*=\s*(\S+)/ox )
+        {
+            $self->element(
+                {
+                    'Name' => 'Hsp_links',
+                    'Data' => $1
+                }
+            );
+        }
+        elsif ( $self->in_element('hsp')
+            && /Frame\s*=\s*([\+\-][1-3])\s*(\/\s*([\+\-][1-3]))?/ )
+        {
+
+            # this is for bl2seq only
+            unless ( defined $reporttype ) {
+                $bl2seq_fix = 1;
+                if ( $1 && $2 ) { $reporttype = 'TBLASTX' }
+                else {
+                    $reporttype = 'BLASTX';
+
+    # we can't distinguish between BLASTX and TBLASTN straight from the report }
+                }
+                $self->{'_reporttype'} = $reporttype;
+            }
+
+            my ( $queryframe, $hitframe );
+            if ( $reporttype eq 'TBLASTX' ) {
+                ( $queryframe, $hitframe ) = ( $1, $2 );
+                $hitframe =~ s/\/\s*//g;
+            }
+            elsif ( $reporttype eq 'TBLASTN' || $reporttype eq 'PSITBLASTN') {
+                ( $hitframe, $queryframe ) = ( $1, 0 );
+            }
+            elsif ( $reporttype eq 'BLASTX' || $reporttype eq 'RPS-BLAST(BLASTP)') {
+                ( $queryframe, $hitframe ) = ( $1, 0 );
+                # though NCBI doesn't report it, this is a special BLASTX-like
+                # RPS-BLAST; should be handled differently
+                if ($reporttype eq 'RPS-BLAST(BLASTP)') {
+                    $self->element(
+                        {
+                            'Name' => 'BlastOutput_program',
+                            'Data' => 'RPS-BLAST(BLASTX)'
+                        }
+                    );
+                }
+            }
+            $self->element(
+                {
+                    'Name' => 'Hsp_query-frame',
+                    'Data' => $queryframe
+                }
+            );
+
+            $self->element(
+                {
+                    'Name' => 'Hsp_hit-frame',
+                    'Data' => $hitframe
+                }
+            );
+        }
+        elsif (/^Parameters:/
+            || /^\s+Database:\s+?/
+            || /^\s+Subset/
+            || /^\s*Lambda/
+            || /^\s*Histogram/
+            || ( $self->in_element('hsp') && /WARNING|NOTE/ ) )
+        {
+
+            # Note: Lambda check was necessary to parse
+            # t/data/ecoli_domains.rpsblast AND to parse bl2seq
+            $self->debug("blast.pm: found parameters section \n");
+
+            $self->in_element('hsp')
+              && $self->end_element( { 'Name' => 'Hsp' } );
+            $self->in_element('hit')
+              && $self->end_element( { 'Name' => 'Hit' } );
+
+            # This is for the case when we specify -b 0 (or B=0 for WU-BLAST)
+            # and still want to construct minimal Hit objects
+            while ( my $v = shift @hit_signifs ) {
+                next unless defined $v;
+                $self->start_element( { 'Name' => 'Hit' } );
+                my $id   = $v->[2];
+                my $desc = $v->[3];
+                $self->element(
+                    {
+                        'Name' => 'Hit_id',
+                        'Data' => $id
+                    }
+                );
+                my ( $acc, $version ) = &_get_accession_version($id);
+                $self->element(
+                    {
+                        'Name' => 'Hit_accession',
+                        'Data' => $acc
+                    }
+                );
+
+                if ( defined $v ) {
+                    $self->element(
+                        {
+                            'Name' => 'Hit_signif',
+                            'Data' => $v->[0]
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_score',
+                            'Data' => $v->[1]
+                        }
+                    );
+                }
+                $self->element(
+                    {
+                        'Name' => 'Hit_def',
+                        'Data' => $desc
+                    }
+                );
+                $self->end_element( { 'Name' => 'Hit' } );
+            }
+
+            $self->within_element('iteration')
+              && $self->end_element( { 'Name' => 'Iteration' } );
+
+            next if /^\s+Subset/;
+            my $blast = (/^(\s+Database\:)|(\s*Lambda)/) ? 'ncbi' : 'wublast';
+            if (/^\s*Histogram/) {
+                $blast = 'btk';
+            }
+
+            my $last = '';
+
+            # default is that gaps are allowed
+            $self->element(
+                {
+                    'Name' => 'Parameters_allowgaps',
+                    'Data' => 'yes'
+                }
+            );
+            while ( defined( $_ = $self->_readline ) ) {
+                if (
+                       /^(PSI)?([T]?BLAST[NPX])\s*(.+)/i
+                    || /^MEGABLAST\s*(.+)/i
+                    || /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i    #Paracel BTK
+                  )
+                {
+                    $self->_pushback($_);
+
+                    # let's handle this in the loop
+                    last;
+                }
+                elsif (/^Query=/) {
+                    $self->_pushback($reportline) if $reportline;
+                    $self->_pushback($_);
+
+                    # -- Superfluous I think, but adding nonetheless
+                    $self->in_element('hsp')
+                      && $self->end_element( { 'Name' => 'Hsp' } );
+                    $self->in_element('hit')
+                      && $self->end_element( { 'Name' => 'Hit' } );
+
+                    # --
+                    if ($bl2seq_fix) {
+                        $self->element(
+                            {
+                                'Name' => 'BlastOutput_program',
+                                'Data' => $reporttype
+                            }
+                        );
+                    }
+                    $self->end_element( { 'Name' => 'BlastOutput' } );
+                    return $self->end_document();
+                }
+
+                # here is where difference between wublast and ncbiblast
+                # is better handled by different logic
+                if (   /Number of Sequences:\s+([\d\,]+)/i
+                    || /of sequences in database:\s+(\-?[\d,]+)/i )
+                {
+                    my $c = $1;
+                    $c =~ s/\,//g;
+                    $self->element(
+                        {
+                            'Name' => 'Statistics_db-len',
+                            'Data' => $c
+                        }
+                    );
+                }
+                elsif (/letters in database:\s+(\-?[\d,]+)/i) {
+                    my $s = $1;
+                    $s =~ s/,//g;
+                    $self->element(
+                        {
+                            'Name' => 'Statistics_db-let',
+                            'Data' => $s
+                        }
+                    );
+                }
+                elsif ( $blast eq 'btk' ) {
+                    next;
+                }
+                elsif ( $blast eq 'wublast' ) {
+
+                    #		   warn($_);
+                    if (/E=(\S+)/) {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_expect',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (/nogaps/) {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_allowgaps',
+                                'Data' => 'no'
+                            }
+                        );
+                    }
+                    elsif (/ctxfactor=(\S+)/) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_ctxfactor',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (
+/(postsw|links|span[12]?|warnings|notes|gi|noseqs|qres|qype)/
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => "Parameters_$1",
+                                'Data' => 'yes'
+                            }
+                        );
+                    }
+                    elsif (/(\S+)=(\S+)/) {
+                        $self->element(
+                            {
+                                'Name' => "Parameters_$1",
+                                'Data' => $2
+                            }
+                        );
+                    }
+                    elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Matrix name/i ) {
+                        my $firstgapinfo = 1;
+                        my $frame        = undef;
+                        while ( defined($_) && !/^\s+$/ ) {
+                            s/^\s+//;
+                            s/\s+$//;
+                            if ( $firstgapinfo
+                                && s/Q=(\d+),R=(\d+)\s+//x )
+                            {
+                                $firstgapinfo = 0;
+
+                                $self->element(
+                                    {
+                                        'Name' => 'Parameters_gap-open',
+                                        'Data' => $1
+                                    }
+                                );
+                                $self->element(
+                                    {
+                                        'Name' => 'Parameters_gap-extend',
+                                        'Data' => $2
+                                    }
+                                );
+                                my @fields = split;
+
+                                for my $type (
+                                    qw(lambda_gapped
+                                    kappa_gapped
+                                    entropy_gapped)
+                                  )
+                                {
+                                    next if $type eq 'n/a';
+                                    if ( !@fields ) {
+                                        warn "fields is empty for $type\n";
+                                        next;
+                                    }
+                                    $self->element(
+                                        {
+                                            'Name' =>
+                                              "Statistics_frame$frame\_$type",
+                                            'Data' => shift @fields
+                                        }
+                                    );
+                                }
+                            }
+                            else {
+                                my ( $frameo, $matid, $matrix, @fields ) =
+                                  split;
+                                if ( !defined $frame ) {
+
+                                    # keep some sort of default feature I guess
+                                    # even though this is sort of wrong
+                                    $self->element(
+                                        {
+                                            'Name' => 'Parameters_matrix',
+                                            'Data' => $matrix
+                                        }
+                                    );
+                                    $self->element(
+                                        {
+                                            'Name' => 'Statistics_lambda',
+                                            'Data' => $fields[0]
+                                        }
+                                    );
+                                    $self->element(
+                                        {
+                                            'Name' => 'Statistics_kappa',
+                                            'Data' => $fields[1]
+                                        }
+                                    );
+                                    $self->element(
+                                        {
+                                            'Name' => 'Statistics_entropy',
+                                            'Data' => $fields[2]
+                                        }
+                                    );
+                                }
+                                $frame = $frameo;
+                                my $ii = 0;
+                                for my $type (
+                                    qw(lambda_used
+                                    kappa_used
+                                    entropy_used
+                                    lambda_computed
+                                    kappa_computed
+                                    entropy_computed)
+                                  )
+                                {
+                                    my $f = $fields[$ii];
+                                    next unless defined $f;    # deal with n/a
+                                    if ( $f eq 'same' ) {
+                                        $f = $fields[ $ii - 3 ];
+                                    }
+                                    $ii++;
+                                    $self->element(
+                                        {
+                                            'Name' =>
+                                              "Statistics_frame$frame\_$type",
+                                            'Data' => $f
+                                        }
+                                    );
+
+                                }
+                            }
+
+                            # get the next line
+                            $_ = $self->_readline;
+                        }
+                        $last = $_;
+                    }
+                    elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Length/i ) {
+                        my $frame = undef;
+                        while ( defined($_) && !/^\s+/ ) {
+                            s/^\s+//;
+                            s/\s+$//;
+                            my @fields = split;
+                            if ( @fields <= 3 ) {
+                                for my $type (qw(X_gapped E2_gapped S2)) {
+                                    last unless @fields;
+                                    $self->element(
+                                        {
+                                            'Name' =>
+                                              "Statistics_frame$frame\_$type",
+                                            'Data' => shift @fields
+                                        }
+                                    );
+                                }
+                            }
+                            else {
+
+                                for my $type (
+                                    qw(length
+                                    efflength
+                                    E S W T X E2 S2)
+                                  )
+                                {
+                                    $self->element(
+                                        {
+                                            'Name' =>
+                                              "Statistics_frame$frame\_$type",
+                                            'Data' => shift @fields
+                                        }
+                                    );
+                                }
+                            }
+                            $_ = $self->_readline;
+                        }
+                        $last = $_;
+                    }
+                    elsif (/(\S+\s+\S+)\s+DFA:\s+(\S+)\s+\((.+)\)/) {
+                        if ( $1 eq 'states in' ) {
+                            $self->element(
+                                {
+                                    'Name' => 'Statistics_DFA_states',
+                                    'Data' => "$2 $3"
+                                }
+                            );
+                        }
+                        elsif ( $1 eq 'size of' ) {
+                            $self->element(
+                                {
+                                    'Name' => 'Statistics_DFA_size',
+                                    'Data' => "$2 $3"
+                                }
+                            );
+                        }
+                    }
+                    elsif (
+                        m/^\s+Time to generate neighborhood:\s+
+			    (\S+\s+\S+\s+\S+)/x
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_neighbortime',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (/processors\s+used:\s+(\d+)/) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_noprocessors',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (
+                        m/^\s+(\S+)\s+cpu\s+time:\s+# cputype
+			    (\S+\s+\S+\s+\S+)           # cputime
+			    \s+Elapsed:\s+(\S+)/x
+                      )
+                    {
+                        my $cputype = lc($1);
+                        $self->element(
+                            {
+                                'Name' => "Statistics_$cputype\_cputime",
+                                'Data' => $2
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => "Statistics_$cputype\_actualtime",
+                                'Data' => $3
+                            }
+                        );
+                    }
+                    elsif (/^\s+Start:/) {
+                        my ( $junk, $start, $stime, $end, $etime ) =
+                          split( /\s+(Start|End)\:\s+/, $_ );
+                        chomp($stime);
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_starttime',
+                                'Data' => $stime
+                            }
+                        );
+                        chomp($etime);
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_endtime',
+                                'Data' => $etime
+                            }
+                        );
+                    }
+                    elsif (/^\s+Database:\s+(.+)$/) {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_full_dbpath',
+                                'Data' => $1
+                            }
+                        );
+
+                    }
+                    elsif (/^\s+Posted:\s+(.+)/) {
+                        my $d = $1;
+                        chomp($d);
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_posted_date',
+                                'Data' => $d
+                            }
+                        );
+                    }
+                }
+                elsif ( $blast eq 'ncbi' ) {
+
+                    if (m/^Matrix:\s+(.+)\s*$/oxi) {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_matrix',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (/^Gapped/) {
+                        $gapped_stats = 1;
+                    }
+                    elsif (/^Lambda/) {
+                        $_ = $self->_readline;
+                        s/^\s+//;
+                        my ( $lambda, $kappa, $entropy ) = split;
+                        if ($gapped_stats) {
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_gapped_lambda",
+                                    'Data' => $lambda
+                                }
+                            );
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_gapped_kappa",
+                                    'Data' => $kappa
+                                }
+                            );
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_gapped_entropy",
+                                    'Data' => $entropy
+                                }
+                            );
+                        }
+                        else {
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_lambda",
+                                    'Data' => $lambda
+                                }
+                            );
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_kappa",
+                                    'Data' => $kappa
+                                }
+                            );
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_entropy",
+                                    'Data' => $entropy
+                                }
+                            );
+                        }
+                    }
+                    elsif (m/effective\s+search\s+space\s+used:\s+(\d+)/ox) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_eff-spaceused',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (m/effective\s+search\s+space:\s+(\d+)/ox) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_eff-space',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (
+                        m/Gap\s+Penalties:\s+Existence:\s+(\d+)\,
+			    \s+Extension:\s+(\d+)/ox
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_gap-open',
+                                'Data' => $1
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_gap-extend',
+                                'Data' => $2
+                            }
+                        );
+                    }
+                    elsif (/effective\s+HSP\s+length:\s+(\d+)/) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_hsp-len',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (/effective\s+length\s+of\s+query:\s+([\d\,]+)/) {
+                        my $c = $1;
+                        $c =~ s/\,//g;
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_query-len',
+                                'Data' => $c
+                            }
+                        );
+                    }
+                    elsif (/effective\s+length\s+of\s+database:\s+([\d\,]+)/) {
+                        my $c = $1;
+                        $c =~ s/\,//g;
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_eff-dblen',
+                                'Data' => $c
+                            }
+                        );
+                    }
+                    elsif (
+/^(T|A|X1|X2|X3|S1|S2):\s+(\d+(\.\d+)?)\s+(?:\(\s*(\d+\.\d+) bits\))?/
+                      )
+                    {
+                        my $v = $2;
+                        chomp($v);
+                        $self->element(
+                            {
+                                'Name' => "Statistics_$1",
+                                'Data' => $v
+                            }
+                        );
+                        if ( defined $4 ) {
+                            $self->element(
+                                {
+                                    'Name' => "Statistics_$1_bits",
+                                    'Data' => $4
+                                }
+                            );
+                        }
+                    }
+                    elsif (
+                        m/frameshift\s+window\,
+			    \s+decay\s+const:\s+(\d+)\,\s+([\.\d]+)/x
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_framewindow',
+                                'Data' => $1
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_decay',
+                                'Data' => $2
+                            }
+                        );
+                    }
+                    elsif (m/^Number\s+of\s+Hits\s+to\s+DB:\s+(\S+)/ox) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_hit_to_db',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (m/^Number\s+of\s+extensions:\s+(\S+)/ox) {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_num_extensions',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (
+                        m/^Number\s+of\s+successful\s+extensions:\s+
+			    (\S+)/ox
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_num_suc_extensions',
+                                'Data' => $1
+                            }
+                        );
+                    }
+                    elsif (
+                        m/^Number\s+of\s+sequences\s+better\s+than\s+
+			    (\S+):\s+(\d+)/ox
+                      )
+                    {
+                        $self->element(
+                            {
+                                'Name' => 'Parameters_expect',
+                                'Data' => $1
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_seqs_better_than_cutoff',
+                                'Data' => $2
+                            }
+                        );
+                    }
+                    elsif (/^\s+Posted\s+date:\s+(.+)/) {
+                        my $d = $1;
+                        chomp($d);
+                        $self->element(
+                            {
+                                'Name' => 'Statistics_posted_date',
+                                'Data' => $d
+                            }
+                        );
+                    }
+                    elsif ( !/^\s+$/ ) {
+                        #$self->debug( "unmatched stat $_");
+                    }
+                }
+                $last = $_;
+            }
+        } elsif ( $self->in_element('hsp') ) {
+            $self->debug("blast.pm: Processing HSP\n");
+            # let's read 3 lines at a time;
+            # bl2seq hackiness... Not sure I like
+            $self->{'_reporttype'} ||= $DEFAULTREPORTTYPE;
+            my %data = (
+                'Query' => '',
+                'Mid'   => '',
+                'Hit'   => ''
+            );
+            my $len;
+            for ( my $i = 0 ; defined($_) && $i < 3 ; $i++ ) {
+                # $self->debug("$i: $_") if $v;
+                if ( ( $i == 0 && /^\s+$/) || 
+		     /^\s*(?:Lambda|Minus|Plus|Score)/i )
+                {
+                    $self->_pushback($_) if defined $_;
+                    $self->end_element( { 'Name' => 'Hsp' } );
+                    last;
+                }
+                chomp;
+                if (/^((Query|Sbjct):?\s+(\-?\d+)\s*)(\S+)\s+(\-?\d+)/) {
+                    my ( $full, $type, $start, $str, $end ) =
+                      ( $1, $2, $3, $4, $5 );
+
+                    if ( $str eq '-' ) {
+                        $i = 3 if $type eq 'Sbjct';
+                    }
+                    else {
+                        $data{$type} = $str;
+                    }
+                    $len = length($full);
+                    $self->{"\_$type"}->{'begin'} = $start
+                      unless $self->{"_$type"}->{'begin'};
+                    $self->{"\_$type"}->{'end'} = $end;
+                } else {
+                    $self->throw("no data for midline $_")
+                      unless ( defined $_ && defined $len );
+                    $data{'Mid'} = substr( $_, $len );
+                }
+                $_ = $self->_readline();
+            }
+            $self->characters(
+                {
+                    'Name' => 'Hsp_qseq',
+                    'Data' => $data{'Query'}
+                }
+            );
+            $self->characters(
+                {
+                    'Name' => 'Hsp_hseq',
+                    'Data' => $data{'Sbjct'}
+                }
+            );
+            $self->characters(
+                {
+                    'Name' => 'Hsp_midline',
+                    'Data' => $data{'Mid'}
+                }
+            );
+        }
+        else {
+            #$self->debug("blast.pm: unrecognized line $_");
+        }
+    }
+
+    $self->debug("blast.pm: End of BlastOutput\n");
+    if ( $self->{'_seentop'} ) {
+        $self->within_element('hsp')
+          && $self->end_element( { 'Name' => 'Hsp' } );
+        $self->within_element('hit')
+          && $self->end_element( { 'Name' => 'Hit' } );
+        $self->within_element('iteration')
+          && $self->end_element( { 'Name' => 'Iteration' } );
+        if ($bl2seq_fix) {
+            $self->element(
+                {
+                    'Name' => 'BlastOutput_program',
+                    'Data' => $reporttype
+                }
+            );
+        }
+        $self->end_element( { 'Name' => 'BlastOutput' } );
+    }
+    return $self->end_document();
+}
+
+# Private method for internal use only.
+sub _start_blastoutput {
+    my $self = shift;
+    $self->start_element( { 'Name' => 'BlastOutput' } );
+    $self->{'_seentop'} = 1;
+    $self->{'_result_count'}++;
+    $self->{'_handler_rc'} = undef;
+}
+
+sub _start_iteration {
+    my $self = shift;
+    $self->start_element( { 'Name' => 'Iteration' } );
+
+    #   $self->{'_hit_info'} = undef;
+}
+
+=head2 _will_handle
+
+ Title   : _will_handle
+ Usage   : Private method. For internal use only.
+              if( $self->_will_handle($type) ) { ... }
+ Function: Provides an optimized way to check whether or not an element of a 
+           given type is to be handled.
+ Returns : Reference to EventHandler object if the element type is to be handled.
+           undef if the element type is not to be handled.
+ Args    : string containing type of element.
+
+Optimizations:
+
+=over 2
+
+=item 1
+
+Using the cached pointer to the EventHandler to minimize repeated
+lookups.
+
+=item 2
+
+Caching the will_handle status for each type that is encountered so
+that it only need be checked by calling
+handler-E<gt>will_handle($type) once.
+
+=back
+
+This does not lead to a major savings by itself (only 5-10%).  In
+combination with other optimizations, or for large parse jobs, the
+savings good be significant.
+
+To test against the unoptimized version, remove the parentheses from
+around the third term in the ternary " ? : " operator and add two
+calls to $self-E<gt>_eventHandler().
+
+=cut
+
+sub _will_handle {
+    my ( $self, $type ) = @_;
+    my $handler     = $self->{'_handler_cache'};
+    my $will_handle =
+      defined( $self->{'_will_handle_cache'}->{$type} )
+      ? $self->{'_will_handle_cache'}->{$type}
+      : ( $self->{'_will_handle_cache'}->{$type} =
+          $handler->will_handle($type) );
+
+    return $will_handle ? $handler : undef;
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+=cut
+
+sub start_element {
+    my ( $self, $data ) = @_;
+
+    # we currently don't care about attributes
+    my $nm   = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    if ($type) {
+        my $handler = $self->_will_handle($type);
+        if ($handler) {
+            my $func = sprintf( "start_%s", lc $type );
+            $self->{'_handler_rc'} = $handler->$func( $data->{'Attributes'} );
+        }
+        else {
+            #$self->debug( # changed 4/29/2006 to play nice with other event handlers
+            #    "Bio::SearchIO::InternalParserError ".
+            #    "\nCan't handle elements of type \'$type.\'"
+            #);
+        }
+        unshift @{ $self->{'_elements'} }, $type;
+        if ( $type eq 'result' ) {
+            $self->{'_values'} = {};
+            $self->{'_result'} = undef;
+        }
+        else {
+
+            # cleanup some things
+            if ( defined $self->{'_values'} ) {
+                foreach my $k (
+                    grep { /^\U$type\-/ }
+                    keys %{ $self->{'_values'} }
+                  )
+                {
+                    delete $self->{'_values'}->{$k};
+                }
+            }
+        }
+    }
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ( $self, $data ) = @_;
+    
+    my $nm   = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    my $rc;
+    if ( $nm eq 'BlastOutput_program' ) {
+        if ( $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
+            $self->{'_reporttype'} = uc $1;
+        }
+        $self->{'_reporttype'} ||= $DEFAULTREPORTTYPE;
+    }
+
+    # Hsps are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+    if ( $nm eq 'Hsp' ) {
+        foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
+            $self->element(
+                {
+                    'Name' => $_,
+                    'Data' => $self->{'_last_hspdata'}->{$_}
+                }
+            );
+        }
+        $self->{'_last_hspdata'} = {};
+        $self->element(
+            {
+                'Name' => 'Hsp_query-from',
+                'Data' => $self->{'_Query'}->{'begin'}
+            }
+        );
+        $self->element(
+            {
+                'Name' => 'Hsp_query-to',
+                'Data' => $self->{'_Query'}->{'end'}
+            }
+        );
+
+        $self->element(
+            {
+                'Name' => 'Hsp_hit-from',
+                'Data' => $self->{'_Sbjct'}->{'begin'}
+            }
+        );
+        $self->element(
+            {
+                'Name' => 'Hsp_hit-to',
+                'Data' => $self->{'_Sbjct'}->{'end'}
+            }
+        );
+
+        #    } elsif( $nm eq 'Iteration' ) {
+        # Nothing special needs to be done here.
+    }
+    if ( $type = $MODEMAP{$nm} ) {
+        my $handler = $self->_will_handle($type);
+        if ($handler) {
+            my $func = sprintf( "end_%s", lc $type );
+            $rc = $handler->$func( $self->{'_reporttype'}, $self->{'_values'} );
+        }
+        shift @{ $self->{'_elements'} };
+
+    }
+    elsif ( $MAPPING{$nm} ) {
+
+        if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
+
+            # this is where we shove in the data from the
+            # hashref info about params or statistics
+            my $key = ( keys %{ $MAPPING{$nm} } )[0];
+            $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
+              $self->{'_last_data'};
+        }
+        else {
+            $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
+        }
+    }
+    else {
+        #$self->debug("blast.pm: unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = '';    # remove read data if we are at
+                                   # end of an element
+    $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
+    return $rc;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convenience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element {
+    my ( $self, $data ) = @_;
+    $self->start_element($data);
+    $self->characters($data);
+    $self->end_element($data);
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters {
+    my ( $self, $data ) = @_;
+    if (   $self->in_element('hsp')
+        && $data->{'Name'} =~ /^Hsp\_(qseq|hseq|midline)$/ )
+    {
+        $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}
+          if defined $data->{'Data'};
+    }
+    return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
+    $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+See Also: L<in_element>
+
+=cut
+
+sub within_element {
+    my ( $self, $name ) = @_;
+    return 0
+      if ( !defined $name && !defined $self->{'_elements'}
+        || scalar @{ $self->{'_elements'} } == 0 );
+    foreach ( @{ $self->{'_elements'} } ) {
+        if ( $_ eq $name ) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'within_element' because within
+           can be tested for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+See Also: L<within_element>
+
+=cut
+
+sub in_element {
+    my ( $self, $name ) = @_;
+    return 0 if !defined $self->{'_elements'}->[0];
+    return ( $self->{'_elements'}->[0] eq $name );
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document {
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'}   = {};
+    $self->{'_result'}   = undef;
+    $self->{'_elements'} = [];
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document {
+    my ( $self, @args ) = @_;
+
+    #$self->debug("blast.pm: end_document\n");
+    return $self->{'_result'};
+}
+
+sub write_result {
+    my ( $self, $blast, @args ) = @_;
+
+    if ( not defined( $self->writer ) ) {
+        $self->warn("Writer not defined. Using a $DEFAULT_BLAST_WRITER_CLASS");
+        $self->writer( $DEFAULT_BLAST_WRITER_CLASS->new() );
+    }
+    $self->SUPER::write_result( $blast, @args );
+}
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+=head2 inclusion_threshold
+
+ Title   : inclusion_threshold
+ Usage   : my $incl_thresh = $isreb->inclusion_threshold;
+         : $isreb->inclusion_threshold(1e-5);
+ Function: Get/Set the e-value threshold for inclusion in the PSI-BLAST 
+           score matrix model (blastpgp) that was used for generating the reports
+           being parsed.
+ Returns : number (real) 
+           Default value: $Bio::SearchIO::IteratedSearchResultEventBuilder::DEFAULT_INCLUSION_THRESHOLD
+ Args    : number (real)  (e.g., 0.0001 or 1e-4 )
+
+=cut
+
+# Delegates to the event handler.
+sub inclusion_threshold {
+    shift->_eventHandler->inclusion_threshold(@_);
+}
+
+=head2 max_significance
+
+ Usage     : $obj->max_significance();
+ Purpose   : Set/Get the P or Expect value used as significance screening cutoff.
+             This is the value of the -signif parameter supplied to new().
+             Hits with P or E-value above this are skipped.
+ Returns   : Scientific notation number with this format: 1.0e-05.
+ Argument  : Scientific notation number or float (when setting)
+ Comments  : Screening of significant hits uses the data provided on the
+           : description line. For NCBI BLAST1 and WU-BLAST, this data 
+           : is P-value. for NCBI BLAST2 it is an Expect value.
+
+=cut
+
+sub max_significance { shift->{'_handler_cache'}->max_significance(@_) }
+
+=head2 signif
+
+Synonym for L<max_significance()|max_significance>
+
+=cut
+
+sub signif { shift->max_significance(@_) }
+
+=head2 min_score
+
+ Usage     : $obj->min_score();
+ Purpose   : Set/Get the Blast score used as screening cutoff.
+             This is the value of the -score parameter supplied to new().
+             Hits with scores below this are skipped.
+ Returns   : Integer or scientific notation number.
+ Argument  : Integer or scientific notation number (when setting)
+ Comments  : Screening of significant hits uses the data provided on the
+           : description line. 
+
+=cut
+
+sub min_score { shift->{'_handler_cache'}->max_significance(@_) }
+
+=head2 min_query_length
+
+ Usage     : $obj->min_query_length();
+ Purpose   : Gets the query sequence length used as screening criteria.
+             This is the value of the -min_query_len parameter supplied to new().
+             Hits with sequence length below this are skipped.
+ Returns   : Integer
+ Argument  : n/a
+
+=cut
+
+sub min_query_length {
+    my $self = shift;
+    if (@_) {
+        my $min_qlen = shift;
+        if ( $min_qlen =~ /\D/ or $min_qlen <= 0 ) {
+            $self->throw(
+                -class => 'Bio::Root::BadParameter',
+                -text  => "Invalid minimum query length value: $min_qlen\n"
+                  . "Value must be an integer > 0. Value not set.",
+                -value => $min_qlen
+            );
+        }
+        $self->{'_confirm_qlength'}  = 1;
+        $self->{'_min_query_length'} = $min_qlen;
+    }
+
+    return $self->{'_min_query_length'};
+}
+
+=head2 best_hit_only
+
+ Title     : best_hit_only
+ Usage     : print "only getting best hit.\n" if $obj->best_hit_only;
+ Purpose   : Set/Get the indicator for whether or not to process only 
+           : the best BlastHit.
+ Returns   : Boolean (1 | 0)
+ Argument  : Boolean (1 | 0) (when setting)
+
+=cut
+
+sub best_hit_only {
+    my $self = shift;
+    if (@_) { $self->{'_best'} = shift; }
+    $self->{'_best'};
+}
+
+=head2 check_all_hits
+
+ Title     : check_all_hits
+ Usage     : print "checking all hits.\n" if $obj->check_all_hits;
+ Purpose   : Set/Get the indicator for whether or not to process all hits.
+           : If false, the parser will stop processing hits after the
+           : the first non-significance hit or the first hit that fails 
+           : any hit filter.
+ Returns   : Boolean (1 | 0)
+ Argument  : Boolean (1 | 0) (when setting)
+
+=cut
+
+sub check_all_hits {
+    my $self = shift;
+    if (@_) { $self->{'_check_all'} = shift; }
+    $self->{'_check_all'};
+}
+
+=head2 _get_accession_version
+
+ Title   : _get_accession_version
+ Usage   : my ($acc,$ver) = &_get_accession_version($id)
+ Function:Private function to get an accession,version pair
+           for an ID (if it is in NCBI format)
+ Returns : 2-pule of accession, version
+ Args    : ID string to process
+
+
+=cut
+
+sub _get_accession_version {
+    my $id = shift;
+
+    # handle case when this is accidently called as a class method
+    if ( ref($id) && $id->isa('Bio::SearchIO') ) {
+        $id = shift;
+    }
+    return unless defined $id;
+    my ( $acc, $version );
+    if ( $id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|lcl)\|(.*)\|(.*)/ ) {
+        ( $acc, $version ) = split /\./, $2;
+    }
+    elsif ( $id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/ ) {
+        ( $acc, $version ) = split /\./, $3;
+    }
+    else {
+
+        #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
+        #Database Name                     Identifier Syntax
+        #============================      ========================
+        #GenBank                           gb|accession|locus
+        #EMBL Data Library                 emb|accession|locus
+        #DDBJ, DNA Database of Japan       dbj|accession|locus
+        #NBRF PIR                          pir||entry
+        #Protein Research Foundation       prf||name
+        #SWISS-PROT                        sp|accession|entry name
+        #Brookhaven Protein Data Bank      pdb|entry|chain
+        #Patents                           pat|country|number
+        #GenInfo Backbone Id               bbs|number
+        #General database identifier           gnl|database|identifier
+        #NCBI Reference Sequence           ref|accession|locus
+        #Local Sequence identifier         lcl|identifier
+        $acc = $id;
+    }
+    return ( $acc, $version );
+}
+
+1;
+
+__END__
+
+Developer Notes
+---------------
+
+The following information is added in hopes of increasing the
+maintainability of this code. It runs the risk of becoming obsolete as
+the code gets updated. As always, double check against the actual
+source. If you find any discrepencies, please correct them.
+[ This documentation added on 3 Jun 2003. ]
+
+The logic is the brainchild of Jason Stajich, documented by Steve
+Chervitz. Jason: please check it over and modify as you see fit.
+
+Question:
+Elmo wants to know: How does this module unmarshall data from the input stream?
+(i.e., how does information from a raw input file get added to 
+the correct Bioperl object?)
+
+Answer:
+
+This answer is specific to SearchIO::blast, but may apply to other
+SearchIO.pm subclasses as well. The following description gives the
+basic idea. The actual processing is a little more complex for
+certain types of data (HSP, Report Parameters).
+
+You can think of blast::next_result() as faking a SAX XML parser,
+making a non-XML document behave like its XML. The overhead to do this
+is quite substantial (~650 lines of code instead of ~80 in
+blastxml.pm).
+
+0. First, add a key => value pair for the datum of interest to %MAPPING
+    Example:
+           'Foo_bar'   => 'Foo-bar',
+
+1. next_result() collects the datum of interest from the input stream, 
+   and calls element(). 
+    Example:
+            $self->element({ 'Name' => 'Foo_bar',
+                             'Data' => $foobar});
+
+2. The element() method is a convenience method that calls start_element(),
+   characters(), and end_element(). 
+
+3. start_element() checks to see if the event handler can handle a start_xxx(),
+   where xxx = the 'Name' parameter passed into element(), and calls start_xxx()
+   if so. Otherwise, start_element() does not do anything.
+
+   Data that will have such an event handler are defined in %MODEMAP.
+   Typically, there are only handler methods for the main parts of
+   the search result (e.g., Result, Iteration, Hit, HSP),
+   which have corresponding Bioperl modules. So in this example,
+   there was an earlier call such as $self->element({'Name'=>'Foo'})
+   and the Foo_bar datum is meant to ultimately go into a Foo object.
+
+   The start_foo() method in the handler will typically do any
+   data initialization necessary to prepare for creating a new Foo object.
+   Example: SearchResultEventBuilder::start_result()
+
+4. characters() takes the value of the 'Data' key from the hashref argument in
+   the elements() call and saves it in a local data member:
+   Example:
+   $self->{'_last_data'} = $data->{'Data'};
+
+5. end_element() is like start_element() in that it does the check for whether
+   the event handler can handle end_xxx() and if so, calls it, passing in 
+   the data collected from all of the characters() calls that occurred
+   since the start_xxx() call.
+
+   If there isn't any special handler for the data type specified by 'Name', 
+   end_element() will place the data saved by characters() into another
+   local data member that saves it in a hash with a key defined by %MAPPING.
+   Example:
+           $nm = $data->{'Name'};
+           $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+
+   In this case, $MAPPING{$nm} is 'Foo-bar'.
+
+   end_element() finishes by resetting the local data member used by 
+   characters(). (i.e., $self->{'_last_data'} = '';)
+
+6. When the next_result() method encounters the end of the Foo element in the 
+   input stream. It will invoke $self->end_element({'Name'=>'Foo'}).
+   end_element() then sends all of the data in the $self->{'_values'} hash.
+   Note that $self->{'_values'} is cleaned out during start_element(),
+   keeping it at a resonable size.
+
+   In the event handler, the end_foo() method takes the hash from end_element()
+   and creates a new hash containing the same data, but having keys lacking
+   the 'Foo' prefix (e.g., 'Foo-bar' becomes '-bar'). The handler's end_foo()
+   method then creates the Foo object, passing in this new hash as an argument.
+   Example: SearchResultEventBuilder::end_result()
+
+7. Objects created from the data in the search result are managed by 
+   the event handler which adds them to a ResultI object (using API methods
+   for that object). The ResultI object gets passed back to
+   SearchIO::end_element() when it calls end_result().
+
+   The ResultI object is then saved in an internal data member of the 
+   SearchIO object, which returns it at the end of next_result()
+   by calling end_document().
+
+   (Technical Note: All objects created by end_xxx() methods in the event 
+    handler are returned to SearchIO::end_element(), but the SearchIO object
+    only cares about the ResultI objects.)
+
+(Sesame Street aficionados note: This answer was NOT given by Mr. Noodle ;-P)
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blasttable.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blasttable.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blasttable.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,554 @@
+# $Id: blasttable.pm,v 1.7.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::blasttable
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::blasttable - Driver module for SearchIO for parsing NCBI -m 8/9 format
+
+=head1 SYNOPSIS
+
+  # do not use this module directly
+  use Bio::SearchIO;
+  my $parser = new Bio::SearchIO(-file   => $file,
+                                 -format => 'blasttable');
+
+  while( my $result = $parser->next_result ) {
+  }
+
+=head1 DESCRIPTION
+
+This module will support parsing NCBI -m 8 or -m 9 tabular output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::SearchIO::blasttable;
+use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $DefaultProgramName);
+use strict;
+use Bio::Search::Result::ResultFactory;
+use Bio::Search::Hit::HitFactory;
+use Bio::Search::HSP::HSPFactory;
+
+$DefaultProgramName = 'BLASTN';
+$DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+
+# mapping of terms to Bioperl hash keys
+%MODEMAP = (
+	    'Result'             => 'result',
+	    'Hit'                => 'hit',
+	    'Hsp'                => 'hsp'
+	    );
+
+%MAPPING = ( 
+	     'Hsp_bit-score'  => 'HSP-bits',
+	     'Hsp_score'      => 'HSP-score',
+	     'Hsp_evalue'     => 'HSP-evalue',
+	     'Hsp_query-from' => 'HSP-query_start',
+	     'Hsp_query-to'   => 'HSP-query_end',
+	     'Hsp_hit-from'   => 'HSP-hit_start',
+	     'Hsp_hit-to'     => 'HSP-hit_end',
+	     'Hsp_positive'   => 'HSP-conserved',
+	     'Hsp_identity'   => 'HSP-identical',
+	     'Hsp_mismatches' => 'HSP-mismatches',
+	     'Hsp_qgapblocks' => 'HSP-query_gapblocks',
+	     'Hsp_hgapblocks' => 'HSP-hit_gapblocks',
+	     'Hsp_gaps'       => 'HSP-hsp_gaps',
+	     'Hsp_hitgaps'    => 'HSP-hit_gaps',
+	     'Hsp_querygaps'  => 'HSP-query_gaps',
+	     'Hsp_align-len'  => 'HSP-hsp_length',
+	     'Hsp_query-frame'=> 'HSP-query_frame',
+	     'Hsp_hit-frame'  => 'HSP-hit_frame',
+
+	     'Hit_id'        => 'HIT-name',
+	     'Hit_len'       => 'HIT-length',
+	     'Hit_accession' => 'HIT-accession',
+	     'Hit_def'       => 'HIT-description',
+	     'Hit_signif'    => 'HIT-significance',
+	     'Hit_score'     => 'HIT-score',
+	     'Hit_bits'      => 'HIT-bits',
+
+	     'Result_program'  => 'RESULT-algorithm_name',
+	     'Result_version'  => 'RESULT-algorithm_version',
+	     'Result_query-def'=> 'RESULT-query_name',
+	     'Result_query-len'=> 'RESULT-query_length',
+	     'Result_query-acc'=> 'RESULT-query_accession',
+	     'Result_querydesc'=> 'RESULT-query_description',
+	     'Result_db'       => 'RESULT-database_name',
+	     'Result_db-len'   => 'RESULT-database_entries',
+	     'Result_db-let'   => 'RESULT-database_letters',
+	     );
+
+use base qw(Bio::SearchIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::blasttable();
+ Function: Builds a new Bio::SearchIO::blasttable object 
+ Returns : an instance of Bio::SearchIO::blasttable
+ Args    :
+
+
+=cut
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+
+    my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)],
+				    @args);
+    $self->program_name($pname || $DefaultProgramName);
+    $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult'));
+    $self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit'));
+    $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::GenericHSP'));
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $result = $parser->next_result
+ Function: Parse the next result from the data stream
+ Returns : L<Bio::Search::Result::ResultI>
+ Args    : none
+
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   my ($lastquery,$lasthit);
+   local $/ = "\n";
+   local $_;
+
+   while( defined ($_ = $self->_readline) ) {
+       next if /^\#/ || /^\s+$/;
+       my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
+            $qstart,$qend,$hstart,$hend,$evalue,$bits) = split;
+       
+       # Remember Jim's code is 0 based
+       if( defined $lastquery && 
+	   $lastquery ne $qname ) {
+	   $self->end_element({'Name' => 'Hit'});
+	   $self->end_element({'Name' => 'Result'});
+	   $self->_pushback($_);
+	   return $self->end_document;
+       } elsif( ! defined $lastquery ) {
+	   $self->{'_result_count'}++;
+	   $self->start_element({'Name' => 'Result'});
+	   $self->element({'Name' => 'Result_program',
+			   'Data' => $self->program_name});
+	   $self->element({'Name' => 'Result_query-def',
+			   'Data' => $qname});
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $hname});
+	   # we'll store the 1st hsp bits as the hit bits
+	   $self->element({'Name' => 'Hit_bits',			   
+			   'Data' => $bits});	   
+           # we'll store the 1st hsp value as the hit evalue
+	   $self->element({'Name' => 'Hit_signif',			   
+			   'Data' => $evalue});
+	   
+       } elsif( $lasthit ne $hname ) {
+	   if( $self->in_element('hit') ) {	       
+	       $self->end_element({'Name' => 'Hit'});
+	   }
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $hname});
+	   # we'll store the 1st hsp bits as the hit bits
+	   $self->element({'Name' => 'Hit_bits',			   
+			   'Data' => $bits});	   
+           # we'll store the 1st hsp value as the hit evalue
+	   $self->element({'Name' => 'Hit_signif',			   
+			   'Data' => $evalue});
+       }
+       my $identical = $hsp_len - $mismatches - $gapsm;
+       $self->start_element({'Name' => 'Hsp'});
+       $self->element({'Name' => 'Hsp_evalue',			   
+		       'Data' => $evalue});       
+       $self->element({'Name' => 'Hsp_bit-score',
+		       'Data' => $bits});
+       $self->element({'Name' => 'Hsp_identity',
+		       'Data' => $identical});
+       $self->element({'Name' => 'Hsp_positive',
+		       'Data' => $identical});
+       $self->element({'Name' => 'Hsp_gaps',
+		       'Data' => $gapsm});
+       $self->element({'Name' => 'Hsp_query-from',
+		       'Data' => $qstart});
+       $self->element({'Name' => 'Hsp_query-to',
+		       'Data' => $qend});
+
+       $self->element({'Name' => 'Hsp_hit-from',
+		       'Data' => $hstart });
+       $self->element({'Name' => 'Hsp_hit-to',
+		       'Data' => $hend });
+       $self->element({'Name' => 'Hsp_align-len',
+		       'Data' => $hsp_len});
+       $self->end_element({'Name' => 'Hsp'});
+       $lastquery = $qname;
+       $lasthit   = $hname;
+   }
+   # fencepost
+   if( defined $lasthit && defined $lastquery ) {
+       if( $self->in_element('hit') ) {
+	   $self->end_element({'Name' => 'Hit'});
+       }
+       $self->end_element({'Name' => 'Result'});
+       return $self->end_document;
+   }
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+   if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						 
+	unshift @{$self->{'_elements'}}, $type;
+    }
+    if($nm eq 'Result') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+        
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});	    
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) { 	
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];	    
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else { 
+	$self->warn( "unknown nm $nm ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'Result' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+
+   return unless ( defined $data->{'Data'} );
+   if( $data->{'Data'} =~ /^\s+$/ ) {
+       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
+   }
+
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+   
+   $self->{'_last_data'} = $data->{'Data'}; 
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;  
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       } 
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;  
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+
+=head2 program_name
+
+ Title   : program_name
+ Usage   : $obj->program_name($newval)
+ Function: Get/Set the program name
+ Returns : value of program_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub program_name{
+    my $self = shift;
+
+    $self->{'program_name'} = shift if @_;
+    return $self->{'program_name'} || $DefaultProgramName;
+}
+
+
+=head2 _will_handle
+
+ Title   : _will_handle
+ Usage   : Private method. For internal use only.
+              if( $self->_will_handle($type) ) { ... }
+ Function: Provides an optimized way to check whether or not an element of a 
+           given type is to be handled.
+ Returns : Reference to EventHandler object if the element type is to be handled.
+           undef if the element type is not to be handled.
+ Args    : string containing type of element.
+
+Optimizations:
+
+=over 2
+
+=item 1
+
+Using the cached pointer to the EventHandler to minimize repeated
+lookups.
+
+=item 2
+
+Caching the will_handle status for each type that is encountered so
+that it only need be checked by calling
+handler-E<gt>will_handle($type) once.
+
+=back
+
+This does not lead to a major savings by itself (only 5-10%).  In
+combination with other optimizations, or for large parse jobs, the
+savings good be significant.
+
+To test against the unoptimized version, remove the parentheses from
+around the third term in the ternary " ? : " operator and add two
+calls to $self-E<gt>_eventHandler().
+
+=cut
+
+sub _will_handle {
+    my ($self,$type) = @_;
+    my $handler = $self->{'_handler'};
+    my $will_handle = defined($self->{'_will_handle_cache'}->{$type})
+                             ? $self->{'_will_handle_cache'}->{$type}
+                             : ($self->{'_will_handle_cache'}->{$type} =
+                               $handler->will_handle($type));
+
+    return $will_handle ? $handler : undef;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blastxml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blastxml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/blastxml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,473 @@
+# $Id: blastxml.pm,v 1.36.4.4 2006/11/30 09:23:59 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::blastxml
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::blastxml - A SearchIO implementation of NCBI Blast XML parsing. 
+
+=head1 SYNOPSIS
+
+    use Bio::SearchIO;
+    my $searchin = new Bio::SearchIO(-format => 'blastxml',
+				     -file   => 't/data/plague_yeast.bls.xml');
+    while( my $result = $searchin->next_result ) {
+    }
+
+    # one can also request that the parser NOT keep the XML data in memory
+    # by using the tempfile initialization flag.
+    my $searchin = new Bio::SearchIO(-tempfile => 1,
+				     -format => 'blastxml',
+				     -file   => 't/data/plague_yeast.bls.xml');
+    while( my $result = $searchin->next_result ) {
+    }
+
+=head1 DESCRIPTION
+
+This object implements a NCBI Blast XML parser.  It requires XML::SAX; it is
+also recommended (for faster parsing) that XML::SAX::ExpatXS be installed and
+set as the default parser in ParserDetails.ini.  This file is located in the
+SAX subdirectory of XML in your local perl library (normally in the 'site'
+directory).  Currently, XML::SAX::Expat will NOT work as expected if set as
+default; you must have local copies of the NCBI DTDs if using XML::SAX::Expat.
+
+There is one additional initialization flag from the SearchIO defaults
+- that is the -tempfile flag.  If specified as true, then the parser
+will write out each report to a temporary filehandle rather than
+holding the entire report as a string in memory.  The reason this is
+done in the first place is NCBI reports have an uncessary E<lt>?xml
+version="1.0"?E<gt> at the beginning of each report and RPS-BLAST reports
+have an additional unecessary RPS-BLAST tag at the top of each report.
+So we currently have implemented the work around by preparsing the
+file (yes it makes the process slower, but it works).
+
+=head1 DEPENDENCIES
+
+In addition to parts of the Bio:: hierarchy, this module uses:
+
+ XML::SAX
+
+It is also recommended that XML::SAX::ExpatXS be installed and made the default
+XML::SAX parser using , along with the
+Expat library () for faster parsing.  XML::SAX::Expat is not recommended; 
+XML::SAX::ExpatXS is considered the current replacement for XML::SAX:Expat
+and is actively being considered to replace XML::SAX::Expat.  XML::SAX::Expat
+will work, but only if you have local copies of the NCBI BLAST DTDs. This is
+due to issues with NCBI's BLAST XML format.  The DTDs and the web address to
+obtain them are:
+
+  NCBI_BlastOutput.dtd	    
+  NCBI_BlastOutput.mod.dtd
+
+  http://www.ncbi.nlm.nih.gov/data_specs/dtd/
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SearchIO::blastxml;
+use vars qw($DTD %MAPPING %MODEMAP $DEBUG);
+use strict;
+
+$DTD = 'ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NCBI_BlastOutput.dtd';
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use XML::SAX;
+use HTML::Entities;
+use IO::File;
+
+BEGIN {
+    # uncomment only for testing; trying to get XML::SAX::Expat to play nice...
+    #$XML::SAX::ParserPackage = 'XML::SAX::Expat';
+    # mapping of NCBI Blast terms to Bioperl hash keys
+    %MODEMAP = ('BlastOutput' => 'result',
+		'Hit'         => 'hit',
+		'Hsp'         => 'hsp'
+		);
+
+    %MAPPING = ( 
+		 # HSP specific fields
+		 'Hsp_bit-score'  => 'HSP-bits',
+		 'Hsp_score'      => 'HSP-score',
+		 'Hsp_evalue'     => 'HSP-evalue',
+		 'Hsp_query-from' => 'HSP-query_start',
+		 'Hsp_query-to'   => 'HSP-query_end',
+		 'Hsp_hit-from'   => 'HSP-hit_start',
+		 'Hsp_hit-to'     => 'HSP-hit_end',
+		 'Hsp_positive'   => 'HSP-conserved',
+		 'Hsp_identity'   => 'HSP-identical',
+		 'Hsp_gaps'       => 'HSP-gaps',
+		 'Hsp_hitgaps'    => 'HSP-hit_gaps',
+		 'Hsp_querygaps'  => 'HSP-query_gaps',
+		 'Hsp_qseq'       => 'HSP-query_seq',
+		 'Hsp_hseq'       => 'HSP-hit_seq',
+		 'Hsp_midline'    => 'HSP-homology_seq',
+		 'Hsp_align-len'  => 'HSP-hsp_length',
+		 'Hsp_query-frame'=> 'HSP-query_frame',
+		 'Hsp_hit-frame'  => 'HSP-hit_frame',
+
+		 # these are ignored for now
+		 'Hsp_num'          => 'HSP-order',
+		 'Hsp_pattern-from' => 'patternend',
+		 'Hsp_pattern-to'   => 'patternstart',
+		 'Hsp_density'      => 'hspdensity',
+
+		 # Hit specific fields
+		 'Hit_id'               => 'HIT-name',
+		 'Hit_len'              => 'HIT-length',
+		 'Hit_accession'        => 'HIT-accession',
+		 'Hit_def'              => 'HIT-description',
+		 'Hit_num'              => 'HIT-order',
+		 'Iteration_iter-num'   => 'HIT-iteration',
+		 'Iteration_stat'       => 'HIT-iteration_statistic',
+		 
+		 'BlastOutput_program'   => 'RESULT-algorithm_name',
+		 'BlastOutput_version'   => 'RESULT-algorithm_version',
+		 'BlastOutput_query-def' => 'RESULT-query_description',
+		 'BlastOutput_query-len' => 'RESULT-query_length',
+		 'BlastOutput_db'        => 'RESULT-database_name',
+		 'BlastOutput_reference' => 'RESULT-program_reference',
+		 'BlastOutput_query-ID'  => 'runid',
+		 
+		 'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
+		 'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
+		 'Parameters_include'   => { 'RESULT-parameters' => 'include'},
+		 'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
+		 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
+		 'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
+		 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'},
+		 'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
+		 'Statistics_db-num'    => 'RESULT-database_entries',
+		 'Statistics_db-len'    => 'RESULT-database_letters',
+		 'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
+		 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
+		 'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
+		 'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
+		 'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
+		 );
+    eval {  require Time::HiRes };	
+    if( $@ ) { $DEBUG = 0; }
+}
+
+
+use base qw(Bio::SearchIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $searchio = new Bio::SearchIO(-format => 'blastxml',
+					    -file   => 'filename',
+					    -tempfile => 1);
+ Function: Initializes the object - this is chained through new in SearchIO
+ Returns : Bio::SearchIO::blastxml object
+ Args    : One additional argument from the format and file/fh parameters.
+           -tempfile => boolean.  Defaults to false.  Write out XML data
+                                  to a temporary filehandle to send to 
+                                  PerlSAX parser.
+=cut
+
+=head2 _initialize
+
+ Title   : _initialize
+ Usage   : private
+ Function: Initializes the object - this is chained through new in SearchIO
+
+=cut
+
+sub _initialize{
+    my ($self, at args) = @_;   
+    $self->SUPER::_initialize(@args);
+    my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
+    defined $usetempfile && $self->use_tempfile($usetempfile);
+    $self->{'_xmlparser'} = XML::SAX::ParserFactory->parser(Handler => $self);
+    my $local_parser = ref($self->{'_xmlparser'});
+    if ($local_parser eq 'XML::SAX::Expat') {
+        $self->throw('XML::SAX::Expat not supported as it is no '.
+                     'longer maintained.  Please use any other XML::SAX '.
+                     'backend (such as XML::SAX::ExpatXS or XML::LibXML)');
+    }    
+    $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    local $/ = "\n";
+    local $_;
+ 
+    my $data = '';
+    my $firstline = 1;
+    my ($tfh);
+    if( $self->use_tempfile ) {
+	$tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!");	
+	$tfh->autoflush(1);
+    }
+   
+    my ($sawxmlheader,$okaytoprocess,$sawdoctype);
+    while( defined( $_ = $self->_readline) ) {
+	if( /^RPS-BLAST/i ) {
+	    $self->{'_type'} = 'RPS-BLAST';
+	    next;
+	}
+	if( /^<\?xml version/ ) {
+	    if( ! $firstline ) {
+		$self->_pushback($_);
+		last;
+	    }
+	    $sawxmlheader = 1;
+	} 
+	# for the non xml version prefixed in each section
+	if( /DOCTYPE/ ) { #|| /<BlastOutput>/
+	    if(  $sawdoctype ) {
+		if( ! $sawxmlheader ) { 
+		    $self->_pushback("<?xml version=\"1.0\"?>\n");
+		}
+		$self->_pushback($_);
+		last;
+	    }
+	    $sawdoctype = 1;
+	    unless( $sawxmlheader ) {
+		$self->debug( "matched here\n");
+		$self->_pushback("<?xml version=\"1.0\"?>\n");
+		$self->_pushback($_);
+		next;
+	    }
+	}
+	$okaytoprocess = 1;
+	if( defined $tfh ) {
+	    print $tfh $_;
+	} else {
+	    $data .= $_;
+	}
+	$firstline = 0;
+    }
+    return unless( $okaytoprocess);
+    
+    my %parser_args;
+    if( defined $tfh ) {
+	seek($tfh,0,0);
+	%parser_args = ('Source' => { 'ByteStream' => $tfh });
+    } else {
+	%parser_args = ('Source' => { 'String' => $data });
+    }
+    my $result;
+    my $starttime;
+    #if(  $DEBUG ) {  $starttime = [ Time::HiRes::gettimeofday() ]; }
+
+    eval { 
+	$result = $self->{'_xmlparser'}->parse(%parser_args);
+        $self->{'_result_count'}++;
+    };
+    if( $@ ) {
+	$self->warn("error in parsing a report:\n $@");
+	$result = undef;
+    }    
+    #if( $DEBUG ) {
+	#$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
+    #}
+    # parsing magic here - but we call event handlers rather than 
+    # instantiating things 
+    return $result;
+}
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $parser->start_document;
+ Function: SAX method to indicate starting to parse a new document
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $parser->end_document;
+ Function: SAX method to indicate finishing parsing a new document
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $parser->start_element($data)
+ Function: SAX method to indicate starting a new element
+ Returns : none
+ Args    : hash ref for data
+
+=cut
+
+sub start_element{
+    my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						     
+    }
+
+    if($nm eq 'BlastOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+    }
+}
+
+=head2 end_element
+
+ Title   : end_element
+ Usage   : $parser->end_element($data)
+ Function: Signals finishing an element
+ Returns : Bio::Search object dpending on what type of element
+ Args    : hash ref for data
+
+=cut
+
+sub end_element{
+    my ($self,$data) = @_;
+
+    my $nm = $data->{'Name'};
+    my $rc;
+    if($nm eq 'BlastOutput_program' &&
+       $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
+	$self->{'_type'} = uc $1; 
+    }
+
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_type'},
+					      $self->{'_values'});
+	}
+    } elsif( $MAPPING{$nm} ) { 
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } elsif( $nm eq 'Iteration' || $nm eq 'Hit_hsps' || $nm eq 'Parameters' ||
+	     $nm eq 'BlastOutput_param' || $nm eq 'Iteration_hits' || 
+	     $nm eq 'Statistics' || $nm eq 'BlastOutput_iterations' ){
+        # ignores these elements for now; no iteration parsing
+    } else { 	
+	
+	$self->debug("ignoring unrecognized element type $nm\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'BlastOutput' );
+    return $rc;
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $parser->characters($data)
+ Function: Signals new characters to be processed
+ Returns : characters read
+ Args    : hash ref with the key 'Data'
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
+   $self->{'_last_data'} = &decode_entities($data->{'Data'}); 
+}
+
+=head2 use_tempfile
+
+ Title   : use_tempfile
+ Usage   : $obj->use_tempfile($newval)
+ Function: Get/Set boolean flag on whether or not use a tempfile
+ Example : 
+ Returns : value of use_tempfile
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub use_tempfile{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_use_tempfile'} = $value;
+    }
+    return $self->{'_use_tempfile'};
+}
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/exonerate.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/exonerate.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/exonerate.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,792 @@
+# $Id: exonerate.pm,v 1.17.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::exonerate
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::exonerate - parser for Exonerate
+
+=head1 SYNOPSIS
+
+  # do not use this module directly, it is a driver for SearchIO
+
+  use Bio::SearchIO;
+  my $searchio = new Bio::SearchIO(-file => 'file.exonerate',
+                                   -format => 'exonerate');
+
+
+  while( my $r = $searchio->next_result ) {
+    print $r->query_name, "\n";
+  }
+
+=head1 DESCRIPTION
+
+This is a driver for the SearchIO system for parsing Exonerate (Guy
+Slater) output.  You can get Exonerate at
+http://cvsweb.sanger.ac.uk/cgi-bin/cvsweb.cgi/exonerate/?cvsroot=Ensembl
+[until Guy puts up a Web reference,publication for it.]).
+
+An optional parameter -min_intron is supported by the L<new>
+initialization method.  This is if you run Exonerate with a different
+minimum intron length (default is 30) the parser will be able to
+detect the difference between standard deletions and an intron.  Still
+some room to play with there that might cause this to get
+misinterpreted that has not been fully tested or explored.
+
+The VULGAR and CIGAR formats should be parsed okay now creating HSPs
+where appropriate (so merging match states where appropriate rather
+than breaking an HSP at each indel as it may have done in the past).
+The GFF that comes from exonerate is still probably a better way to go
+if you are doing protein2genome or est2genome mapping.
+For example you can see this script:
+
+http://fungal.genome.duke.edu/~jes12/software/scripts/process_exonerate_gff3.perl.txt
+
+If your report contains both CIGAR and VULGAR lines only the first one
+will processed for a given Query/Target pair.  If you preferentially
+want to use VULGAR or CIGAR add one of these options when initializing
+the SearchIO object.
+
+    -cigar  => 1
+OR
+    -vulgar => 1
+
+Or set them via these methods.
+
+    $parser->cigar(1)
+OR
+    $parser->vulgar(1)
+
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::exonerate;
+use strict;
+use vars qw(@STATES %MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $MIN_INTRON);
+
+use base qw(Bio::SearchIO);
+
+%MODEMAP = ( 'ExonerateOutput' => 'result',
+    'Hit'             => 'hit',
+    'Hsp'             => 'hsp'
+    );
+
+%MAPPING =
+    (
+    'Hsp_query-from'=>  'HSP-query_start',
+    'Hsp_query-to'  =>  'HSP-query_end',
+    'Hsp_hit-from'  =>  'HSP-hit_start',
+    'Hsp_hit-to'    =>  'HSP-hit_end',
+    'Hsp_qseq'      =>  'HSP-query_seq',
+    'Hsp_hseq'      =>  'HSP-hit_seq',
+    'Hsp_midline'   =>  'HSP-homology_seq',
+    'Hsp_score'     =>  'HSP-score',
+    'Hsp_qlength'   =>  'HSP-query_length',
+    'Hsp_hlength'   =>  'HSP-hit_length',
+    'Hsp_align-len' =>  'HSP-hsp_length',
+    'Hsp_identity'  =>  'HSP-identical',
+    'Hsp_gaps'       => 'HSP-hsp_gaps',
+    'Hsp_hitgaps'    => 'HSP-hit_gaps',
+    'Hsp_querygaps'  => 'HSP-query_gaps',
+
+    'Hit_id'        => 'HIT-name',
+    'Hit_desc'      => 'HIT-description',
+    'Hit_len'       => 'HIT-length',
+    'Hit_score'     => 'HIT-score',
+
+    'ExonerateOutput_program'   => 'RESULT-algorithm_name',
+    'ExonerateOutput_query-def' => 'RESULT-query_name',
+    'ExonerateOutput_query-desc'=> 'RESULT-query_description',
+    'ExonerateOutput_query-len' => 'RESULT-query_length',
+    );
+
+$DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+
+$MIN_INTRON=30; # This is the minimum intron size
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::exonerate();
+ Function: Builds a new Bio::SearchIO::exonerate object
+ Returns : an instance of Bio::SearchIO::exonerate
+ Args    : -min_intron => somewhat obselete option, how to determine if a
+                          an indel is an intron or a local gap.  Use VULGAR
+                          rather than CIGAR to avoid this heuristic,default 30.
+           -cigar       => 1   set this to 1 if you want to parse
+                               CIGAR exclusively.
+           -vulgar      => 1   set this to 1 if you want to parse VULGAR
+                               exclusively, setting both to 1 will revert
+                               to the default behavior of just parsing the
+                               first line that it sees.
+
+=cut
+
+sub new {
+    my ($class) = shift;
+    my $self = $class->SUPER::new(@_);
+
+    my ($min_intron,$cigar,
+	$vulgar) = $self->_rearrange([qw(MIN_INTRON
+					 CIGAR
+					 VULGAR)], @_);
+    if( $min_intron ) {
+	$MIN_INTRON = $min_intron;
+    }
+    if( $cigar && $vulgar ) {
+	$self->warn("cannot get HSPs from both CIGAR and VULGAR lines, will just choose whichever comes first (same as if you had chosen neither");
+	$cigar = 0; $vulgar=0;
+    }
+    $self->cigar($cigar);
+    $self->vulgar($vulgar);
+    $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   local $/ = "\n";
+   local $_;
+
+   $self->{'_last_data'} = '';
+   my ($reporttype,$seenquery,$reportline);
+   $self->start_document();
+   my @hit_signifs;
+   my $seentop;
+   my (@q_ex, @m_ex, @h_ex); ## gc addition
+   while( defined($_ = $self->_readline) ) {
+       # warn( "Reading $_");
+       if( /^Query:\s+(\S+)\s*(.+)?/ ) {
+	   if( $seentop ) {
+	       $self->end_element({'Name' => 'ExonerateOutput'});
+	       $self->_pushback($_);
+	       return $self->end_document();
+	   }
+	   $seentop = 1;
+	   my ($nm,$desc) = ($1,$2);
+	   chomp($desc) if defined $desc;
+	   $self->{'_result_count'}++;
+	   $self->start_element({'Name' => 'ExonerateOutput'});
+	   $self->element({'Name' => 'ExonerateOutput_query-def',
+			   'Data' => $nm });
+	   $self->element({'Name' => 'ExonerateOutput_query-desc',
+			   'Data' => $desc });
+	   $self->element({'Name' => 'ExonerateOutput_program',
+			   'Data' => 'Exonerate' });
+	   $self->{'_seencigar'} = 0;
+	   $self->{'_vulgar'}    = 0;
+
+       } elsif ( /^Target:\s+(\S+)\s*(.+)?/ ) {
+	   my ($nm,$desc) = ($1,$2);
+	   chomp($desc) if defined $desc;
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $nm});
+	   $self->element({'Name' => 'Hit_desc',
+			   'Data' => $desc});
+	   $self->{'_seencigar'} = 0;
+	   $self->{'_vulgar'}    = 0;
+       } elsif(  s/^vulgar:\s+(\S+)\s+         # query sequence id
+		 (\d+)\s+(\d+)\s+([\-\+])\s+   # query start-end-strand
+		 (\S+)\s+                      # target sequence id
+		 (\d+)\s+(\d+)\s+([\-\+])\s+   # target start-end-strand
+		 (\d+)\s+                      # score
+		 //ox ) {
+	   next if( $self->cigar || $self->{'_seencigar'});
+	   $self->{'_vulgar'}++;
+	   #
+	   # Note from Ewan. This is ugly - copy and paste from
+	   # cigar line parsing. Should unify somehow...
+	   #
+	   if( ! $self->within_element('result') ) {
+	       $self->start_element({'Name' => 'ExonerateOutput'});
+	       $self->element({'Name' => 'ExonerateOutput_query-def',
+			       'Data' => $1 });
+	   }
+	   if( ! $self->within_element('hit') ) {
+	       $self->start_element({'Name' => 'Hit'});
+	       $self->element({'Name' => 'Hit_id',
+			       'Data' => $5});
+	   }
+
+	   ## gc note:
+	   ## $qe and $he are no longer used for calculating the ends,
+	   ## just the $qs and $hs values and the alignment and insert lenghts
+	   my ($qs,$qe,$qstrand) = ($2,$3,$4);
+	   my ($hs,$he,$hstrand) = ($6,$7,$8);
+	   my $score = $9;
+#	   $self->element({'Name' => 'ExonerateOutput_query-len',
+#			   'Data' => $qe});
+#	   $self->element({'Name' => 'Hit_len',
+#			   'Data' => $he});
+
+	   ## gc note:
+	   ## add one because these values are zero-based
+	   ## this calculation was originally done lower in the code,
+	   ## but it's clearer to do it just once at the start
+	   my @rest = split;
+	   my ($qbegin,$qend) = ('query-from', 'query-to');
+
+	   if( $qstrand eq '-' ) {
+	       $qstrand = -1; $qe++;
+	   } else {
+	       $qstrand = 1;
+	       $qs++;
+	   }
+	   my ($hbegin,$hend) = ('hit-from', 'hit-to');
+
+	   if( $hstrand eq '-' ) {
+	       $hstrand = -1;
+	       $he++;
+	   } else {
+	       $hstrand = 1;
+	       $hs++;
+	   }
+	   # okay let's do this right and generate a set of HSPs
+	   # from the cigar line/home/bio1/jes12/bin/exonerate  --model est2genome --bestn 1 t/data/exonerate_cdna.fa t/data/exonerate_genomic_rev.fa
+
+	   my ($aln_len,$inserts,$deletes) = (0,0,0);
+	   my ($laststate, at events,$gaps) =( '' );
+	   while( @rest >= 3 ) {
+	       my ($state,$len1,$len2) = (shift @rest, shift @rest, shift @rest);
+	       #
+	       # HSPs are only the Match cases; otherwise we just
+	       # move the coordinates on by the correct amount
+	       #
+
+	       if( $state eq 'M' ) {
+		   if( $laststate eq 'G' ) {
+		       # merge gaps across Match states so the HSP
+		       # goes across
+		       $events[-1]->{$qend} = $qs + $len1*$qstrand - $qstrand;
+		       $events[-1]->{$hend}   = $hs + $len2*$hstrand - $hstrand;
+		       $events[-1]->{'gaps'} = $gaps;
+		   } else {
+		       push @events,
+		       { 'score'     => $score,
+			 'align-len' => $len1,
+			 $qbegin => $qs,
+			 $qend  => ($qs + $len1*$qstrand - $qstrand),
+			 $hbegin => $hs,
+			 $hend   => ($hs + $len2*$hstrand - $hstrand),
+		     };
+		   }
+		   $gaps = 0;
+	       } else {
+		   $gaps = $len1 + $len2 if $state eq 'G';
+	       }
+	       $qs += $len1*$qstrand;
+	       $hs += $len2*$hstrand;
+	       $laststate= $state;
+	   }
+	   for my $event ( @events ) {
+	       $self->start_element({'Name' => 'Hsp'});
+	       while( my ($key,$val) = each %$event ) {
+		   $self->element({'Name' => "Hsp_$key",
+				   'Data' => $val});
+	       }
+	       $self->element({'Name' => 'Hsp_identity',
+			       'Data' => 0});
+	       $self->end_element({'Name' => 'Hsp'});
+	   }
+
+	   # end of hit
+	   $self->element({'Name' => 'Hit_score',
+			   'Data' => $score});
+	   # issued end...
+	   $self->end_element({'Name' => 'Hit'});
+	   $self->end_element({'Name' => 'ExonerateOutput'});
+
+	   return $self->end_document();
+
+       } elsif(  s/^cigar:\s+(\S+)\s+          # query sequence id
+		 (\d+)\s+(\d+)\s+([\-\+])\s+   # query start-end-strand
+		 (\S+)\s+                      # target sequence id
+		 (\d+)\s+(\d+)\s+([\-\+])\s+   # target start-end-strand
+		 (\d+)\s+                      # score
+		 //ox ) {
+	   next if( $self->vulgar || $self->{'_seenvulgar'});
+	   $self->{'_cigar'}++;
+
+	   if( ! $self->within_element('result') ) {
+	       $self->start_element({'Name' => 'ExonerateOutput'});
+	       $self->element({'Name' => 'ExonerateOutput_query-def',
+			       'Data' => $1 });
+	   }
+	   if( ! $self->within_element('hit') ) {
+	       $self->start_element({'Name' => 'Hit'});
+	       $self->element({'Name' => 'Hit_id',
+			       'Data' => $5});
+	   }
+	   ## gc note:
+	   ## $qe and $he are no longer used for calculating the ends,
+	   ## just the $qs and $hs values and the alignment and insert lenghts
+	   my ($qs,$qe,$qstrand) = ($2,$3,$4);
+	   my ($hs,$he,$hstrand) = ($6,$7,$8);
+	   my $score = $9;
+#	   $self->element({'Name' => 'ExonerateOutput_query-len',
+#			   'Data' => $qe});
+#	   $self->element({'Name' => 'Hit_len',
+#			   'Data' => $he});
+
+	   my @rest = split;
+	   if( $qstrand eq '-' ) {
+	       $qstrand = -1;
+	       ($qs,$qe) = ($qe,$qs); # flip-flop if we're on opp strand
+	       $qs--; $qe++;
+	   } else { $qstrand = 1; }
+	   if( $hstrand eq '-' ) {
+	       $hstrand = -1;
+	       ($hs,$he) = ($he,$hs); # flip-flop if we're on opp strand
+	       $hs--; $he++;
+	   } else { $hstrand = 1; }
+	   # okay let's do this right and generate a set of HSPs
+	   # from the cigar line
+
+	   ## gc note:
+	   ## add one because these values are zero-based
+	   ## this calculation was originally done lower in the code,
+	   ## but it's clearer to do it just once at the start
+	   $qs++; $hs++;
+
+	   my ($aln_len,$inserts,$deletes) = (0,0,0);
+	   while( @rest >= 2 ) {
+	       my ($state,$len) = (shift @rest, shift @rest);
+	       if( $state eq 'I' ) {
+		   $inserts+=$len;
+	       } elsif( $state eq 'D' ) {
+		   if( $len >= $MIN_INTRON ) {
+		       $self->start_element({'Name' => 'Hsp'});
+
+		       $self->element({'Name' => 'Hsp_score',
+				       'Data' => $score});
+		       $self->element({'Name' => 'Hsp_align-len',
+				       'Data' => $aln_len});
+		       $self->element({'Name' => 'Hsp_identity',
+				       'Data' => $aln_len -
+					   ($inserts + $deletes)});
+
+		       # HSP ends where the other begins
+		       $self->element({'Name' => 'Hsp_query-from',
+				       'Data' => $qs});
+		       ## gc note:
+		       ## $qs is now the start of the next hsp
+		       ## the end of this hsp is 1 before this position
+		       ## (or 1 after in case of reverse strand)
+		       $qs += $aln_len*$qstrand;
+		       $self->element({'Name' => 'Hsp_query-to',
+				       'Data' => $qs - ($qstrand*1)});
+
+		       $hs += $deletes*$hstrand;
+		       $self->element({'Name' => 'Hsp_hit-from',
+				       'Data' => $hs});
+		       $hs += $aln_len*$hstrand;
+		       $self->element({'Name' => 'Hsp_hit-to',
+				       'Data' => $hs-($hstrand*1)});
+
+		       $self->element({'Name' => 'Hsp_align-len',
+				       'Data' => $aln_len + $inserts
+					   + $deletes});
+		       $self->element({'Name' => 'Hsp_identity',
+				       'Data' => $aln_len });
+
+		       $self->element({'Name' => 'Hsp_gaps',
+				       'Data' => $inserts + $deletes});
+		       $self->element({'Name' => 'Hsp_querygaps',
+				       'Data' => $inserts});
+		       $self->element({'Name' => 'Hsp_hitgaps',
+				       'Data' => $deletes});
+
+## gc addition start
+
+		       $self->element({'Name' => 'Hsp_qseq',
+				       'Data' => shift @q_ex,
+				   });
+		       $self->element({'Name' => 'Hsp_hseq',
+				       'Data' => shift @h_ex,
+				   });
+		       $self->element({'Name' => 'Hsp_midline',
+				       'Data' => shift @m_ex,
+				   });
+## gc addition end
+		       $self->end_element({'Name' => 'Hsp'});
+
+		       $aln_len = $inserts = $deletes = 0;
+		   }
+		   $deletes+=$len;
+	       } else {
+		   $aln_len += $len;
+	       }
+	   }
+	   $self->start_element({'Name' => 'Hsp'});
+
+## gc addition start
+
+	   $self->element({'Name' => 'Hsp_qseq',
+			   'Data' => shift @q_ex,
+		       });
+	   $self->element({'Name' => 'Hsp_hseq',
+			   'Data' => shift @h_ex,
+		       });
+	   $self->element({'Name' => 'Hsp_midline',
+			   'Data' => shift @m_ex,
+		       });
+## gc addition end
+
+	   $self->element({'Name' => 'Hsp_score',
+			   'Data' => $score});
+
+	   $self->element({'Name' => 'Hsp_query-from',
+			   'Data' => $qs});
+
+	   $qs += $aln_len*$qstrand;
+	   $self->element({'Name' => 'Hsp_query-to',
+			   'Data' => $qs - ($qstrand*1)});
+
+	   $hs += $deletes*$hstrand;
+	   $self->element({'Name' => 'Hsp_hit-from',
+			   'Data' => $hs});
+	   $hs += $aln_len*$hstrand;
+	   $self->element({'Name' => 'Hsp_hit-to',
+			   'Data' => $hs -($hstrand*1)});
+
+	   $self->element({'Name' => 'Hsp_align-len',
+			   'Data' => $aln_len});
+
+	   $self->element({'Name' => 'Hsp_identity',
+			   'Data' => $aln_len - ($inserts + $deletes)});
+
+	   $self->element({'Name' => 'Hsp_gaps',
+			   'Data' => $inserts + $deletes});
+
+	   $self->element({'Name' => 'Hsp_querygaps',
+			   'Data' => $inserts});
+	   $self->element({'Name' => 'Hsp_hitgaps',
+			   'Data' => $deletes});
+	   $self->end_element({'Name' => 'Hsp'});
+
+	   $self->element({'Name' => 'Hit_score',
+			   'Data' => $score});
+
+	   $self->end_element({'Name' => 'Hit'});
+	   $self->end_element({'Name' => 'ExonerateOutput'});
+
+	   return $self->end_document();
+       } else {
+	   # skipping this line
+       }
+   }
+   return $self->end_document() if( $seentop );
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+   # we currently don't care about attributes
+   my $nm = $data->{'Name'};
+   my $type = $MODEMAP{$nm};
+
+   if( $type ) {
+       if( $self->_eventHandler->will_handle($type) ) {
+	   my $func = sprintf("start_%s",lc $type);
+	   $self->_eventHandler->$func($data->{'Attributes'});
+       }
+       unshift @{$self->{'_elements'}}, $type;
+       if($type eq 'result') {
+	   $self->{'_values'} = {};
+	   $self->{'_result'}= undef;
+       }
+   }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    my $rc;
+
+    if( $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) {
+
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else {
+	$self->debug( "unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at
+				# end of an element
+    $self->{'_result'} = $rc if( defined $type && $type eq 'result' );
+    return $rc;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;
+
+   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
+
+   $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       }
+   }
+   return 0;
+}
+
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_elements'} = [];
+    $self->{'_reporttype'} = 'exonerate';
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+
+sub write_result {
+   my ($self, $blast, @args) = @_;
+
+   if( not defined($self->writer) ) {
+       $self->warn("Writer not defined. Using a $DEFAULT_WRITER_CLASS");
+       $self->writer( $DEFAULT_WRITER_CLASS->new() );
+   }
+   $self->SUPER::write_result( $blast, @args );
+}
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+=head2 vulgar
+
+ Title   : vulgar
+ Usage   : $obj->vulgar($newval)
+ Function: Get/Set flag, do you want to build HSPs from VULGAR string?
+ Returns : value of vulgar (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub vulgar{
+    my $self = shift;
+    my $x = shift if @_;
+    if( @_ ) {
+	if( $_[0] && $self->{'_cigar'} ) {
+	    $self->warn("Trying to set vulgar and cigar both to 1, must be either or");
+	    $self->{'_cigar'}  = 0;
+	    return $self->{'_vulgar'} = 0;
+	}
+    }
+    return $self->{'_vulgar'};
+}
+
+=head2 cigar
+
+ Title   : cigar
+ Usage   : $obj->cigar($newval)
+ Function: Get/Set boolean flag do you want to build HSPs from CIGAR strings?
+ Returns : value of cigar (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub cigar{
+    my $self = shift;
+    my $x = shift if @_;
+    if( @_ ) {
+	if( $_[0] && $self->{'_vulgar'} ) {
+	    $self->warn("Trying to set vulgar and cigar both to 1, must be either or");
+	    $self->{'_vulgar'}  = 0;
+	    return $self->{'_cigar'} = 0;
+	}
+    }
+    return $self->{'_cigar'};
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1099 @@
+# $Id: fasta.pm,v 1.50.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::fasta
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::fasta - A SearchIO parser for FASTA results
+
+=head1 SYNOPSIS
+
+  # Do not use this object directly, use it through the SearchIO system
+   use Bio::SearchIO;
+   my $searchio = new Bio::SearchIO(-format => 'fasta',
+				    -file   => 'report.FASTA');
+   while( my $result = $searchio->next_result ) {
+	# ... do what you would normally doi with Bio::SearchIO.
+   }
+
+=head1 DESCRIPTION
+
+This object contains the event based parsing code for FASTA format
+reports.  It creates L<Bio::Search::HSP::FastaHSP> objects instead of
+L<Bio::Search::HSP::GenericHSP> for the HSP objects. 
+
+This module will parse -m 9 -d 0 output as well as default m 1 output
+from FASTA as well as SSEARCH.
+
+Also see the SearchIO HOWTO:
+L<http://bioperl.open-bio.org/wiki/HOWTO:SearchIO>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Aaron Mackey
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::fasta;
+use vars qw(%MODEMAP %MAPPING $IDLENGTH);
+use strict;
+
+# Object preamble - inherits from Bio::Root::RootI
+
+use Bio::Factory::ObjectFactory;
+use POSIX;
+
+BEGIN { 
+    # Set IDLENGTH to a new value if you have
+    # compile FASTA with a different ID length
+    # (actually newest FASTA allows the setting of this
+    #  via -C parameter, default is 6)
+    $IDLENGTH = 6;
+
+    # mapping of NCBI Blast terms to Bioperl hash keys
+    %MODEMAP = ('FastaOutput' => 'result',
+		'Hit'         => 'hit',
+		'Hsp'         => 'hsp'
+		);
+
+    # This should really be done more intelligently, like with
+    # XSLT
+
+    %MAPPING = 
+	( 
+	  'Hsp_bit-score' => 'HSP-bits',
+	  'Hsp_score'     => 'HSP-score',
+	  'Hsp_sw-score'  => 'HSP-swscore',
+	  'Hsp_evalue'    => 'HSP-evalue',
+	  'Hsp_query-from'=> 'HSP-query_start',
+	  'Hsp_query-to'  => 'HSP-query_end',
+	  'Hsp_hit-from'  => 'HSP-hit_start',
+	  'Hsp_hit-to'    => 'HSP-hit_end',
+	  'Hsp_positive'  => 'HSP-conserved',
+	  'Hsp_identity'  => 'HSP-identical',
+	  'Hsp_gaps'      => 'HSP-hsp_gaps',
+	  'Hsp_hitgaps'   => 'HSP-hit_gaps',
+	  'Hsp_querygaps' => 'HSP-query_gaps',
+	  'Hsp_qseq'      => 'HSP-query_seq',
+	  'Hsp_hseq'      =>  'HSP-hit_seq',
+	  'Hsp_midline'   =>  'HSP-homology_seq',
+	  'Hsp_align-len' =>  'HSP-hsp_length',
+	  'Hsp_query-frame'=> 'HSP-query_frame',
+	  'Hsp_hit-frame'  => 'HSP-hit_frame',
+
+	  'Hit_id'        => 'HIT-name',
+	  'Hit_len'       => 'HIT-length',
+	  'Hit_accession' => 'HIT-accession',
+	  'Hit_def'       => 'HIT-description',
+	  'Hit_signif'    => 'HIT-significance',
+	  'Hit_score'     => 'HIT-score',
+
+	  'FastaOutput_program'  => 'RESULT-algorithm_name',
+	  'FastaOutput_version'  => 'RESULT-algorithm_version',
+	  'FastaOutput_query-def'=> 'RESULT-query_name',
+	  'FastaOutput_querydesc'=> 'RESULT-query_description',
+	  'FastaOutput_query-len'=> 'RESULT-query_length',
+	  'FastaOutput_db'       => 'RESULT-database_name',
+	  'FastaOutput_db-len'   => 'RESULT-database_entries',
+	  'FastaOutput_db-let'   => 'RESULT-database_letters',
+
+	  'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
+	  'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
+	  'Parameters_include'   => { 'RESULT-parameters' => 'include'},
+	  'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
+	  'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
+	  'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
+	  'Parameters_gap-ext'   => { 'RESULT-parameters' => 'gapext'},
+	  'Parameters_word-size' => { 'RESULT-parameters' => 'wordsize'},
+	  'Parameters_ktup'      => { 'RESULT-parameters' => 'ktup'},
+	  'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
+	  'Statistics_db-num'    => { 'RESULT-statistics' => 'dbentries'},
+	  'Statistics_db-len'    => { 'RESULT-statistics' => 'dbletters'},
+	  'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
+	  'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
+	  'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
+	  'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
+	  'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
+	  );
+}
+
+
+use base qw(Bio::SearchIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::fasta();
+ Function: Builds a new Bio::SearchIO::fasta object 
+ Returns : Bio::SearchIO::fasta
+ Args    : -idlength - set ID length to something other 
+                       than the default (7), this is only
+                       necessary if you have compiled FASTA
+                       with a new default id length to display
+                       in the HSP alignment blocks
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  return unless @args;
+  my ($idlength) = $self->_rearrange([qw(IDLENGTH)], at args);
+  $self->idlength($idlength || $IDLENGTH);
+  $self->_eventHandler->register_factory('hsp', 
+                                         Bio::Factory::ObjectFactory->new(
+                                            -type      => 'Bio::Search::HSP::FastaHSP',
+                                            -interface => 'Bio::Search::HSP::HSPI'));
+  return 1;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   local $/ = "\n";
+   local $_;
+
+   my $data = '';
+   my $seentop = 0;
+   my $current_hsp;
+   $self->start_document();
+   my @hit_signifs;
+   while( defined ($_ = $self->_readline )) {
+       next if( ! $self->in_element('hsp')  &&
+		/^\s+$/); # skip empty lines
+       if( m/(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi || 
+	   /(\S+)\s+compares\s+a/ ||
+	   ( m/^\#\s+/ && 
+	     ($_ = $self->_readline) &&
+	     /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi ||
+	     /(\S+)\s+compares\s+a/
+	   )
+	 ) {
+	   if( $seentop ) {
+	       $self->_pushback($_);
+	       $self->end_element({ 'Name' => 'FastaOutput'});
+	       return $self->end_document();
+	   }
+	   $self->{'_reporttype'} = $1;
+	   $self->start_element({ 'Name' => 'FastaOutput' } );
+	   $self->{'_result_count'}++;
+	   $seentop = 1;
+	   
+	   $self->element({ 'Name' => 'FastaOutput_program',
+			    'Data' => $self->{'_reporttype'}});
+	   $_ = $self->_readline();
+	   my ($version) = (/version\s+(\S+)/);
+	   $version = '' unless defined $version;
+	   $self->{'_version'} = $version;
+	   $self->element({ 'Name' => 'FastaOutput_version',
+			    'Data' => $version});
+
+	   my ($last, $leadin, $type, $querylen, $querytype, $querydef);
+
+	   while( defined($_ = $self->_readline()) ) {
+	       if( /^ (
+                       (?:\s+>) |             # fa33 lead-in
+                       (?:\s*\d+\s*>>>)       # fa34 mlib lead-in
+                      )
+                      (.*)
+                   /x
+		 ) {
+		   ($leadin, $querydef) = ($1, $2);
+		   if ($leadin =~ m/>>>/) {
+		       if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
+			   ($querydef, $querylen, $querytype) = ($1, $2, $3);
+			   last;
+		       }
+		   } else {
+		       if( $last =~ /(\S+)[:,]\s*(\d+)\s+(aa|nt)/ ) {
+			   ($querylen, $querytype) = ($2, $3);
+			   $querydef ||= $1;
+			   last;
+		       }
+		   }
+	       } elsif ( m/^\s*vs\s+\S+/o ) {
+		   if ( $last =~ /(\S+)[,:]\s+(\d+)\s+(aa|nt)/o) {
+		       ($querydef, $querylen, $querytype) = ($1, $2, $3);
+		       last;
+		   }
+	       } 
+	       $last = $_;
+	   }
+	   
+	   if( $self->{'_reporttype'} &&
+	       $self->{'_reporttype'} eq 'FASTA'
+	       ) {
+	       if( $querytype eq 'nt') {
+		   $self->{'_reporttype'} = 'FASTN' ;
+	       } elsif( $querytype eq 'aa' ) {
+		   $self->{'_reporttype'} = 'FASTP' ;
+	       }
+	   }
+	   my ($name, $descr) = $querydef =~ m/^(\S+)\s*(.*?)\s*$/o;
+	   $self->element({'Name' => 'FastaOutput_query-def',
+			   'Data' => $name});
+	   $self->element({'Name' => 'FastaOutput_querydesc',
+			   'Data' => $descr});
+	   if ($querylen) {
+	       $self->element({'Name' => 'FastaOutput_query-len',
+			       'Data' => $querylen});
+	   } else {
+	       $self->warn("unable to find and set query length");
+	   }
+	   if( $last =~ /^\s*vs\s+(\S+)/ || 
+	       ( $last =~ /^searching\s+(\S+)\s+library/ ) ||
+	       (defined $_ && /^\s*vs\s+(\S+)/) ||
+	       (defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/)
+	     ) {
+	       $self->element({'Name' => 'FastaOutput_db',
+                           'Data' => $1});
+	   } elsif (m/^\s+opt(?:\s+E\(\))?$/o) {
+	       # histogram ... read over it more rapidly than the larger outer loop:
+	       while (defined($_ = $self->_readline)) {
+		   last if m/^>\d+/;
+	       }
+	   }
+
+       } elsif( /(\d+) residues in\s+(\d+)\s+sequences/ ) {
+	   $self->element({'Name' => 'FastaOutput_db-let',
+			   'Data' => $1});
+	   $self->element({'Name' => 'FastaOutput_db-len',
+			   'Data' => $2});
+	   $self->element({'Name' => 'Statistics_db-len',
+			   'Data' => $1});
+	   $self->element({'Name' => 'Statistics_db-num',
+			   'Data' => $2});	   
+       } elsif( /Lambda=\s*(\S+)/ ) {
+	   $self->element({'Name' => 'Statistics_lambda',
+			   'Data' => $1});	  
+       } elsif (/K=\s*(\S+)/) {
+	   $self->element({'Name' => 'Statistics_kappa',
+			   'Data' => $1});
+       } elsif( /^\s*(Smith-Waterman).+(\S+)\s*matrix [^\]]*?(xS)?\]/ ) {	   
+	   $self->element({'Name' => 'Parameters_matrix',
+			   'Data' => $2});
+	   $self->element({'Name' => 'Parameters_filter',
+			   'Data' => defined $3 ? 1 : 0,
+			  });
+	   $self->{'_reporttype'} = $1;
+
+	   $self->element({ 'Name' => 'FastaOutput_program',
+			    'Data' => $self->{'_reporttype'}});
+	   
+       } elsif( /The best( related| unrelated)? scores are:/ ) {
+	   my $rel = $1;
+	   my @labels = split;
+	   @labels = map {
+	       if ($_ =~ m/^E\((\d+)\)$/o) {
+		   $self->element({'Name' => 'Statistics_eff-space', 'Data' => $1});
+		   "evalue";
+	       } else {
+		   $_;
+	       }
+	   } @labels[$rel ? 5 : 4 .. $#labels];
+
+	   while( defined ($_ = $self->_readline() ) && 
+		  ! /^\s+$/ ) {
+	       my @line = split;
+
+	       if ($line[-1] =~ m/\=/o && $labels[-1] eq 'fs') {
+		   # unlabelled alignment hit;
+		   push @labels, "aln_code";
+	       }
+
+	       my %data;
+	       @data{@labels} = splice(@line, @line - @labels);
+	       if ($line[-1] =~ m/\[([1-6rf])\]/o) {
+		   my $fr = $1;
+		   $data{lframe} = ($fr =~ /\d/o ?
+				    ($fr <= 3   ? "+$fr" : "-@{[$fr-3]}") :
+				    ($fr eq 'f' ? '+1'  : '-1')
+				    );
+		   pop @line;
+	       } else {
+		   $data{lframe} = '0';
+	       }
+
+	       if ($line[-1] =~ m/^\(?(\d+)\)$/) {
+		   $data{hit_len} = $1;
+		   pop @line;
+		   if ($line[-1] =~ m/^\($/) {
+		       pop @line;
+		   }
+	       } else {
+		   $data{hit_len} = 0;
+	       }
+
+	       # rebuild the first part of the line, preserving spaces:
+	       ($_) = m/^(\S+(?:\s+\S+){$#line})/;
+
+	       my ($id, $desc) = split(/\s+/,$_,2);
+	       my @pieces = split(/\|/,$id);
+	       my $acc = pop @pieces;
+	       $acc =~ s/\.\d+$//;
+
+	       @data{qw(id desc acc)} = ($id, $desc, $acc);
+
+	       push @hit_signifs, \%data;
+	   }
+       } elsif( /^\s*([T]?FAST[XYAF]).+,\s*(\S+)\s*matrix[^\]]+?(xS)?\]\s*ktup:\s*(\d+)/ ) {
+	   $self->element({'Name' => 'Parameters_matrix',
+			   'Data' => $2});
+	   $self->element({'Name' => 'Parameters_filter',
+			   'Data' => defined $3 ? 1 : 0,
+			  });
+	   $self->element({'Name' => 'Parameters_ktup',
+			   'Data' => $4});
+	   $self->{'_reporttype'} = $1 if( $self->{'_reporttype'} !~ /FAST[PN]/i ) ;
+
+	   $self->element({ 'Name' => 'FastaOutput_program',
+			    'Data' => $self->{'_reporttype'}});
+	   
+       } elsif( /(?:gap\-pen|open\/ext):\s+([\-\+]?\d+)\s*\/\s*([\-\+]?\d+).+width:\s+(\d+)/ ) {
+	   $self->element({'Name' => 'Parameters_gap-open',
+			   'Data' => $1});
+	   $self->element({'Name' => 'Parameters_gap-ext',
+			   'Data' => $2});
+	   $self->element({'Name' => 'Parameters_word-size',
+			   'Data' => $3});
+       } elsif( /^>>(.+?)\s+\((\d+)\s*(aa|nt)\)$/ ) {
+	   if( $self->in_element('hsp') ) {
+	       $self->end_element({ 'Name' => 'Hsp'});
+	   }
+	   if( $self->in_element('hit') ) {
+	       $self->end_element({ 'Name' => 'Hit'});
+	   }
+	   
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({ 'Name' => 'Hit_len',
+			    'Data' => $2});  
+	   my ($id,$desc) = split(/\s+/,$1,2);
+	   $self->element({ 'Name' => 'Hit_id',
+			    'Data' => $id}); 	   
+	   my @pieces = split(/\|/,$id);
+	   my $acc = pop @pieces;
+	   $acc =~ s/\.\d+$//;
+	   $self->element({ 'Name' =>  'Hit_accession',
+			    'Data'  => $acc});	
+	   $self->element({ 'Name' => 'Hit_def',
+			    'Data' => $desc});	   
+
+	   $_ = $self->_readline();
+	   my ($score,$bits,$e) = /Z-score: \s* (\S+) \s*
+                               (?: bits: \s* (\S+) \s+ )?
+                               (?: E|expect ) \s* \(\) :? \s*(\S+)/ox;
+	   $bits = $score unless defined $bits;
+
+	   my $v = shift @hit_signifs;
+	   if( defined $v ) {
+	       @{$v}{qw(evalue bits z-sc)} = ($e, $bits, $score);
+	   }
+	   $self->element({'Name' => 'Hit_signif',
+			   'Data' => $v ? $v->{evalue} : $e });
+	   $self->element({'Name' => 'Hit_score',
+			   'Data' => $v ? $v->{bits} : $bits });
+	   $self->start_element({'Name' => 'Hsp'});
+
+	   $self->element({'Name' => 'Hsp_score',
+			   'Data' => $v ? $v->{'z-sc'} : $score });
+	   $self->element({'Name' => 'Hsp_evalue',
+			   'Data' => $v ? $v->{evalue} : $e });
+	   $self->element({'Name' => 'Hsp_bit-score',
+			   'Data' => $v ? $v->{bits} : $bits });
+	   $_ = $self->_readline();
+	   if( s/Smith-Waterman score:\s*(\d+)\;?// ) {
+	       $self->element({'Name' => 'Hsp_sw-score',
+			       'Data' => $1});
+	   }
+	   if( / (\d*\.?\d+)\% \s* identity
+                 (?:\s* \(\s*(\S+)\% \s* (?:ungapped|similar) \) )?
+                 \s* in \s* (\d+) \s+ (?:aa|nt) \s+ overlap \s*
+                 \( (\d+) \- (\d+) : (\d+) \- (\d+) \)
+               /x ) {
+	       my ($identper,$gapper,$len,$querystart,
+		   $queryend,$hitstart,$hitend) = ($1,$2,$3,$4,$5,$6,$7);
+	       my $ident = POSIX::ceil(($identper/100) * $len);
+	       my $gaps = ( defined $gapper ) ? POSIX::ceil ( ($gapper/100) * $len) : undef;
+	       
+	       $self->element({'Name' => 'Hsp_gaps',
+			       'Data' => $gaps});
+	       $self->element({'Name' => 'Hsp_identity',
+			       'Data' => $ident});
+	       $self->element({'Name' => 'Hsp_positive',
+			       'Data' => $ident});
+	       $self->element({'Name' => 'Hsp_align-len',
+			       'Data' => $len});
+	       
+	       $self->debug( "query_start = $querystart, query_end = $queryend\n");
+	       $self->element({'Name' => 'Hsp_query-from',
+			       'Data' => $querystart});
+	       $self->element({'Name' => 'Hsp_query-to',
+			       'Data' => $queryend});
+	       $self->element({'Name' => 'Hsp_hit-from',
+			       'Data' => $hitstart});
+	       $self->element({'Name' => 'Hsp_hit-to',
+			       'Data' => $hitend});
+	       
+	       }
+
+	   if ($v) {
+	       $self->element({'Name' => 'Hsp_querygaps', 'Data' => $v->{qgaps} }) if exists $v->{qgaps};
+	       $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $v->{lgaps} }) if exists $v->{lgaps};
+
+	       if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
+		   if( 8 == scalar grep { exists $v->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
+		       if ($v->{ax0} < $v->{an0}) {
+			   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($v->{px0} - $v->{ax0}) % 3) + 1]}" });
+		       } else {
+			   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($v->{an0} - $v->{pn0}) % 3) + 1]}" });
+		       }
+		       if ($v->{ax1} < $v->{an1}) {
+			   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($v->{px1} - $v->{ax1}) % 3) + 1]}" });
+		       } else {
+			   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($v->{an1} - $v->{pn1}) % 3) + 1]}" });
+		       }
+		   } else {
+		       $self->element({'Name' => 'Hsp_query-frame', 'Data' => $v->{lframe} });
+		       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
+		   }
+	       } else {
+		   $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
+		   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $v->{lframe} });
+	       }
+
+	   } else {
+	       $self->warn( "unable to parse FASTA score line: $_");
+	   }
+       } elsif( /\d+\s*residues\s*in\s*\d+\s*query\s*sequences/ ) {
+	   if( $self->in_element('hsp') ) {
+	       $self->end_element({'Name' => 'Hsp'});
+	   } 
+	   if( $self->in_element('hit') ) {
+	       $self->end_element({'Name' => 'Hit'});
+	   }
+	   
+#	   $_ = $self->_readline();
+#	   my ( $liblen,$libsize) = /(\d+)\s+residues\s*in(\d+)\s*library/;
+	   # fast forward to the end of the file as there is 
+	   # nothing else left to do with this file and want to be sure and
+	   # reset it
+	   while(defined($_ = $self->_readline() ) ) { 
+	       last if( /^Function used was/);
+	       if( /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?
+		   sequence/oxi ||
+		   /(\S+)\s+compares\s+a/oi ) {
+		   $self->_pushback($_);
+	       }
+	   }
+
+           if (@hit_signifs) {
+	       # process remaining best hits
+	       for my $h (@hit_signifs) {
+		   # Hsp_score Hsp_evalue Hsp_bit-score
+		   # Hsp_sw-score Hsp_gaps Hsp_identity Hsp_positive
+		   # Hsp_align-len Hsp_query-from Hsp_query-to
+		   # Hsp_hit-from Hsp_hit-to Hsp_qseq Hsp_midline
+
+		   $self->start_element({'Name' => 'Hit'});
+		   $self->element({ 'Name' => 'Hit_len',
+				    'Data' => $h->{hit_len}
+				  }) if exists $h->{hit_len};
+		   $self->element({ 'Name' => 'Hit_id',
+				    'Data' => $h->{id}
+				  }) if exists $h->{id};
+		   $self->element({ 'Name' =>  'Hit_accession',
+				    'Data'  => $h->{acc}
+				  }) if exists $h->{acc};
+		   $self->element({ 'Name' => 'Hit_def',
+				    'Data' => $h->{desc}
+				  }) if exists $h->{desc};
+		   $self->element({'Name' => 'Hit_signif',
+				   'Data' => $h->{evalue}
+				  }) if exists $h->{evalue};
+		   $self->element({'Name' => 'Hit_score',
+				   'Data' => $h->{bits}
+				  }) if exists $h->{bits};
+
+		   $self->start_element({'Name' => 'Hsp'});
+		   $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
+		   $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
+		   $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
+		   $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
+		   $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
+		   $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
+		       if (exists $h->{'%_id'} && exists $h->{alen});
+		   if( exists $h->{'%_gid'} ) { 
+		       $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_gid'} * $h->{alen})}) if exists $h->{'%_gid'} && exists $h->{alen};
+		   } else { 
+		       $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
+			   if (exists $h->{'%_id'} && exists $h->{alen});
+		   }
+		   $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
+		   $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
+		   $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
+		   $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
+		   $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
+
+		   $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
+		   $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
+
+		   if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
+		       if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
+			   if ($h->{ax0} < $h->{an0}) {
+			       $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
+			   } else {
+			       $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
+			   }
+			   if ($h->{ax1} < $h->{an1}) {
+			       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
+			   } else {
+			       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
+			   }
+		       } else {
+			   $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
+			   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
+		       }
+		   } else {
+		       $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
+		       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
+		   }
+
+		   $self->end_element({'Name' => 'Hsp'});
+		   $self->end_element({'Name' => 'Hit'});
+	       }
+	   }
+
+	   $self->end_element({ 'Name' => 'FastaOutput'});
+	   return $self->end_document();
+       } elsif( /^\s*\d+\s*>>>/) {
+	   if ($self->within_element('FastaOutput')) {
+	       if( $self->in_element('hsp') ) {
+		   $self->end_element({'Name' => 'Hsp'});
+	       } 
+	       if( $self->in_element('hit') ) {
+		   $self->end_element({'Name' => 'Hit'});
+	       }
+
+	       if (@hit_signifs) {
+		   # process remaining best hits
+		   for my $h (@hit_signifs) {
+		       $self->start_element({'Name' => 'Hit'});
+		       $self->element({ 'Name' => 'Hit_len',
+					'Data' => $h->{hit_len}
+				      }) if exists $h->{hit_len};
+		       $self->element({ 'Name' => 'Hit_id',
+					'Data' => $h->{id}
+				      }) if exists $h->{id};
+		       $self->element({ 'Name' =>  'Hit_accession',
+					'Data'  => $h->{acc}
+				      }) if exists $h->{acc};
+		       $self->element({ 'Name' => 'Hit_def',
+					'Data' => $h->{desc}
+				      }) if exists $h->{desc};
+		       $self->element({'Name' => 'Hit_signif',
+				       'Data' => $h->{evalue}
+				      }) if exists $h->{evalue};
+		       $self->element({'Name' => 'Hit_score',
+				       'Data' => $h->{bits}
+				      }) if exists $h->{bits};
+
+		       $self->start_element({'Name' => 'Hsp'});
+		       $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
+		       $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
+		       $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
+		       $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
+		       $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
+		       $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
+			   if (exists $h->{'%_id'} && exists $h->{alen});
+		       if( exists $h->{'%_gid'} ) { 
+			   $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_gid'} * $h->{alen})}) if exists $h->{'%_gid'} && exists $h->{alen};
+		       } else { 
+			   $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
+			   if (exists $h->{'%_id'} && exists $h->{alen});
+		       }
+		       $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
+		       $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
+		       $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
+		       $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
+		       $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
+
+		       $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
+		       $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
+		       
+		       if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
+			   if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
+			       if ($h->{ax0} < $h->{an0}) {
+				   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
+			       } else {
+				   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
+			       }
+			       if ($h->{ax1} < $h->{an1}) {
+				   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
+			       } else {
+				   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
+			       }
+			   } else {
+			       $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
+			       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
+			   }
+		       } else {
+			   $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
+			   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
+		       }
+
+		       $self->end_element({'Name' => 'Hsp'});
+		       $self->end_element({'Name' => 'Hit'});
+		   }
+	       }
+	       $self->end_element({ 'Name' => 'FastaOutput' });
+	       $self->_pushback($_);
+	       return $self->end_document();
+	   } else {
+	       $self->start_element({ 'Name' => 'FastaOutput' });
+	       $self->{'_result_count'}++;
+	       $seentop = 1;
+	       $self->element({ 'Name' => 'FastaOutput_program',
+				'Data' => $self->{'_reporttype'} });
+	       $self->element({ 'Name' => 'FastaOutput_version',
+				'Data' => $self->{'_version'} });
+
+	       my ($type, $querylen, $querytype, $querydef);
+
+	       if( /^\s*\d+\s*>>>(.*)/ ) {
+		   $querydef = $1;
+		   if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
+		       ($querydef, $querylen, $querytype) = ($1, $2, $3);
+		   }
+	       }
+
+	       if( $self->{'_reporttype'} &&
+		   $self->{'_reporttype'} eq 'FASTA' 
+		   ) {
+		   if( $querytype eq 'nt') {
+		       $self->{'_reporttype'} = 'FASTN' ;
+		   } elsif( $querytype eq 'aa' ) {
+		       $self->{'_reporttype'} = 'FASTP' ;
+		   }
+	       }
+	       my ($name,$descr) = ($querydef =~ m/^(\S+)(?:\s+(.*))?\s*$/o);
+	       $self->element({'Name' => 'FastaOutput_query-def',
+			       'Data' => $name});
+	       $self->element({'Name' => 'FastaOutput_querydesc',
+			       'Data' => $descr});
+	       if ($querylen) {
+		   $self->element({'Name' => 'FastaOutput_query-len',
+				   'Data' => $querylen});
+	       } else {
+		   $self->warn("unable to find and set query length");
+	       }
+
+
+	       if( defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/ ) {
+		   $self->element({'Name' => 'FastaOutput_db',
+				   'Data' => $1});
+	       }
+	   }
+       } elsif( $self->in_element('hsp' ) ) {
+	   
+	   my @data = ( [],[],[]);
+	   my $count = 0;
+	   my $len = $self->idlength + 1;
+	   my ($seq1_id);
+	   while( defined($_ ) ) {
+	       chomp;
+	       $self->debug( "$count $_\n");
+	       
+	       if( /residues in \d+\s+query\s+sequences/o) {
+		   $self->_pushback($_);
+		   last;
+	       } elsif (/^>>>\*\*\*/o) {
+		   $self->end_element({Name => "Hsp"});
+		   last;
+	       } elsif (/^>>/o) {
+		   $self->_pushback($_);
+		   last;
+	       } elsif (/^\s*\d+\s*>>>/o) {
+		   $self->_pushback($_);
+		   last;
+	       }
+	       if( $count == 0 ) { 
+		   if( /^(\S+)\s+/ ) {
+		       $self->_pushback($_);
+		       $count = 2;
+		   } elsif( /^\s+\d+/ || /^\s+$/ ) { 
+		       # do nothing, this is really a 0 line
+		   } elsif( length($_) == 0 ) { 
+		       $count = -1;
+		   } else { 
+		       $self->_pushback($_);
+		       $count = 0;
+		   }
+	       } elsif( $count == 1 || $count == 3 ) {
+		   if( /^(\S+)\s+/ ) {
+		       $len = CORE::length($1) if $len < CORE::length($1);
+		       s/\s+$//; # trim trailing spaces,we don't want them 
+		       push @{$data[$count-1]},substr($_,$len);
+		   } elsif( /^\s+(\d+)/ ) {
+		       $count = -1;
+		       $self->_pushback($_);
+		   } elsif( /^\s+$/ || length($_) == 0) {
+		       $count = 5;  
+		       # going to skip these
+		   } else {
+		       $self->warn("Unrecognized alignment line ($count) '$_'");
+		   }
+	       } elsif( $count == 2 ) {
+		   if( /^\s+\d+\s+/ ) {
+		       $self->warn("$_\n") if $self->verbose > 0;
+		       # we are on a Subject part of the alignment
+		       # but we THOUGHT we were on the Query
+		       # move that last line to the proper place
+		       push @{$data[2]}, pop @{$data[0]};
+		       $count = 4;
+		   } else {
+		       # toss the first IDLENGTH characters of the line
+		       if( length($_) >= $len ) {
+			   push @{$data[$count-1]}, substr($_,$len);
+		       }
+		   }
+	       } 
+	       last if( $count++ >= 5);
+	       $_ = $self->_readline();	       
+	   }
+	   if( @{$data[0]} || @{$data[2]}) {
+	       $self->characters({'Name' => 'Hsp_qseq',
+				  'Data' => join('',@{$data[0]}) });
+	       $self->characters({'Name' => 'Hsp_midline',
+				  'Data' => join('',@{$data[1]}) });
+	       $self->characters({'Name' => 'Hsp_hseq',
+				  'Data' => join('',@{$data[2]}) });
+	   }
+       } else {
+	   if( ! $seentop ) {
+	       $self->debug($_);
+	       $self->warn("unrecognized FASTA Family report file!");
+	       return;
+	   }
+       }
+   }
+}
+
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+    if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						 
+	unshift @{$self->{'_elements'}}, $type;
+    }
+    if($nm eq 'FastaOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+    if( $nm eq 'Hsp' ) {
+	foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
+	    $self->element({'Name' => $_,
+			    'Data' => $self->{'_last_hspdata'}->{$_}});
+	}
+	$self->{'_last_hspdata'} = {}
+    }
+
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});	    
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) { 	
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];	    
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else { 
+	$self->warn( "unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'FastaOutput' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+
+   return unless ( defined $data->{'Data'} );
+   if( $data->{'Data'} =~ /^\s+$/ ) {
+       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
+   }
+
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+   
+   $self->{'_last_data'} = $data->{'Data'}; 
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;  
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name || $_ eq $MODEMAP{$name} ) {
+	   return 1;
+       } 
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;  
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name ||
+	    (exists $MODEMAP{$name} && $self->{'_elements'}->[0] eq $MODEMAP{$name})
+	  );
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 idlength
+
+ Title   : idlength
+ Usage   : $obj->idlength($newval)
+ Function: Internal storage of the length of the ID desc
+           in the HSP alignment blocks.  Defaults to
+           $IDLENGTH class variable value
+ Returns : value of idlength
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub idlength{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_idlength'} = $value;
+    }
+    return $self->{'_idlength'} || $IDLENGTH;
+}
+
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1280 @@
+# $Id: hmmer.pm,v 1.34.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::hmmer
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch)
+
+=head1 SYNOPSIS
+
+    # do not use this class directly it is available through Bio::SearchIO
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer',
+                               -file   => 't/data/L77119.hmmer');
+    while( my $result = $in->next_result ) {
+        # this is a Bio::Search::Result::HMMERResult object
+        print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
+        while( my $hit = $result->next_hit ) {
+            print $hit->name(), "\n";
+            while( my $hsp = $hit->next_hsp ) {
+                print "length is ", $hsp->length(), "\n";
+            }
+        }
+    }
+
+=head1 DESCRIPTION
+
+This object implements a parser for HMMER output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SearchIO::hmmer;
+
+use strict;
+
+use Bio::Factory::ObjectFactory;
+
+use vars qw(%MAPPING %MODEMAP
+);
+
+use base qw(Bio::SearchIO);
+
+BEGIN {
+
+    # mapping of HMMER items to Bioperl hash keys
+    %MODEMAP = (
+        'HMMER_Output' => 'result',
+        'Hit'          => 'hit',
+        'Hsp'          => 'hsp'
+    );
+
+    %MAPPING = (
+        'Hsp_bit-score'   => 'HSP-bits',
+        'Hsp_score'       => 'HSP-score',
+        'Hsp_evalue'      => 'HSP-evalue',
+        'Hsp_query-from'  => 'HSP-query_start',
+        'Hsp_query-to'    => 'HSP-query_end',
+        'Hsp_hit-from'    => 'HSP-hit_start',
+        'Hsp_hit-to'      => 'HSP-hit_end',
+        'Hsp_positive'    => 'HSP-conserved',
+        'Hsp_identity'    => 'HSP-identical',
+        'Hsp_gaps'        => 'HSP-hsp_gaps',
+        'Hsp_hitgaps'     => 'HSP-hit_gaps',
+        'Hsp_querygaps'   => 'HSP-query_gaps',
+        'Hsp_qseq'        => 'HSP-query_seq',
+        'Hsp_hseq'        => 'HSP-hit_seq',
+        'Hsp_midline'     => 'HSP-homology_seq',
+        'Hsp_align-len'   => 'HSP-hsp_length',
+        'Hsp_query-frame' => 'HSP-query_frame',
+        'Hsp_hit-frame'   => 'HSP-hit_frame',
+
+        'Hit_id'        => 'HIT-name',
+        'Hit_len'       => 'HIT-length',
+        'Hit_accession' => 'HIT-accession',
+        'Hit_desc'      => 'HIT-description',
+        'Hit_signif'    => 'HIT-significance',
+        'Hit_score'     => 'HIT-score',
+
+        'HMMER_program'   => 'RESULT-algorithm_name',
+        'HMMER_version'   => 'RESULT-algorithm_version',
+        'HMMER_query-def' => 'RESULT-query_name',
+        'HMMER_query-len' => 'RESULT-query_length',
+        'HMMER_query-acc' => 'RESULT-query_accession',
+        'HMMER_querydesc' => 'RESULT-query_description',
+        'HMMER_hmm'       => 'RESULT-hmm_name',
+        'HMMER_seqfile'   => 'RESULT-sequence_file',
+        'HMMER_db'        => 'RESULT-database_name',
+    );
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::hmmer();
+ Function: Builds a new Bio::SearchIO::hmmer object 
+ Returns : Bio::SearchIO::hmmer
+ Args    : -fh/-file => HMMER filename
+           -format   => 'hmmer'
+
+=cut
+
+sub _initialize {
+    my ( $self, @args ) = @_;
+    $self->SUPER::_initialize(@args);
+    my $handler = $self->_eventHandler;
+    $handler->register_factory(
+        'result',
+        Bio::Factory::ObjectFactory->new(
+            -type      => 'Bio::Search::Result::HMMERResult',
+            -interface => 'Bio::Search::Result::ResultI'
+        )
+    );
+
+    $handler->register_factory(
+        'hit',
+        Bio::Factory::ObjectFactory->new(
+            -type      => 'Bio::Search::Hit::HMMERHit',
+            -interface => 'Bio::Search::Hit::HitI'
+        )
+    );
+
+    $handler->register_factory(
+        'hsp',
+        Bio::Factory::ObjectFactory->new(
+            -type      => 'Bio::Search::HSP::HMMERHSP',
+            -interface => 'Bio::Search::HSP::HSPI'
+        )
+    );
+    $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)';
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    my $seentop = 0;
+    my $reporttype;
+    my ( $last, @hitinfo, @hspinfo, %hspinfo, %hitinfo );
+    local $/ = "\n";
+    local $_;
+
+    my $verbose = $self->verbose;    # cache for speed?
+    $self->start_document();
+    local ($_);
+    while ( defined( $_ = $self->_readline ) ) {
+        my $lineorig = $_;
+        chomp;
+        if (/^HMMER\s+(\S+)\s+\((.+)\)/o) {
+            my ( $prog, $version ) = split;
+            if ($seentop) {
+                $self->_pushback($_);
+                $self->end_element( { 'Name' => 'HMMER_Output' } );
+                return $self->end_document();
+            }
+            $self->{'_hmmidline'} = $_;
+            $self->start_element( { 'Name' => 'HMMER_Output' } );
+            $self->{'_result_count'}++;
+            $seentop = 1;
+            if ( defined $last ) {
+                ($reporttype) = split( /\s+/, $last );
+                $self->element(
+                    {
+                        'Name' => 'HMMER_program',
+                        'Data' => uc($reporttype)
+                    }
+                );
+            }
+            $self->element(
+                {
+                    'Name' => 'HMMER_version',
+                    'Data' => $version
+                }
+            );
+        }
+        elsif (s/^HMM file:\s+//o) {
+            $self->{'_hmmfileline'} = $lineorig;
+            $self->element(
+                {
+                    'Name' => 'HMMER_hmm',
+                    'Data' => $_
+                }
+            );
+        }
+        elsif (s/^Sequence\s+(file|database):\s+//o) {
+            $self->{'_hmmseqline'} = $lineorig;
+            if ( $1 eq 'database' ) {
+                $self->element(
+                    {
+                        'Name' => 'HMMER_db',
+                        'Data' => $_
+                    }
+                );
+            }
+            $self->element(
+                {
+                    'Name' => 'HMMER_seqfile',
+                    'Data' => $_
+                }
+            );
+        }
+        elsif (s/^Query(\s+(sequence|HMM))?(?:\s+\d+)?:\s+//o) {
+            if ( !$seentop ) {
+
+                # we're in a multi-query report
+                $self->_pushback( $self->{'_hmmidline'} );
+                $self->_pushback( $self->{'_hmmfileline'} );
+                $self->_pushback( $self->{'_hmmseqline'} );
+                $self->_pushback($lineorig);
+                next;
+            }
+            s/\s+$//;
+            $self->element(
+                {
+                    'Name' => 'HMMER_query-def',
+                    'Data' => $_
+                }
+            );
+        }
+        elsif (s/^Accession:\s+//o) {
+            s/\s+$//;
+            $self->element(
+                {
+                    'Name' => 'HMMER_query-acc',
+                    'Data' => $_
+                }
+            );
+        }
+        elsif (s/^Description:\s+//o) {
+            s/\s+$//;
+            $self->element(
+                {
+                    'Name' => 'HMMER_querydesc',
+                    'Data' => $_
+                }
+            );
+        }
+        elsif ( defined $self->{'_reporttype'}
+            && $self->{'_reporttype'} eq 'HMMSEARCH' )
+        {
+
+            # PROCESS HMMSEARCH RESULTS HERE
+            if (/^Scores for complete sequences/o) {
+                while ( defined( $_ = $self->_readline ) ) {
+                    last if (/^\s+$/);
+                    next if ( /^Sequence\s+Description/o || /^\-\-\-/o );
+                    my @line = split;
+                    my ( $name, $n, $evalue, $score ) =
+                      ( shift @line, pop @line, pop @line, pop @line );
+                    my $desc = join( ' ', @line );
+                    push @hitinfo, [ $name, $desc, $evalue, $score ];
+                    $hitinfo{$name} = $#hitinfo;
+                }
+            }
+            elsif (/^Parsed for domains:/o) {
+                @hspinfo = ();
+
+                while ( defined( $_ = $self->_readline ) ) {
+                    last if (/^\s+$/);
+                    if (m!^//!) {
+                        $self->_pushback($_);
+                        last;
+                    }
+                    next if ( /^(Model|Sequence)\s+Domain/ || /^\-\-\-/ );
+
+                    chomp;
+                    if (
+                        my ( $n, $domainnum, $domainct, @vals ) = (
+                            m!^(\S+)\s+      # host name
+			(\d+)/(\d+)\s+   # num/num (ie 1 of 2) 
+			(\d+)\s+(\d+).+? # sequence start and end
+			(\d+)\s+(\d+)\s+ # hmm start and end
+			\S+\s+           # []
+			(\S+)\s+         # score
+			(\S+)            # evalue
+			\s*$!ox
+                        )
+                      )
+                    {
+
+                        # array lookup so that we can get rid of things
+                        # when they've been processed
+                        my $info = $hitinfo[ $hitinfo{$n} ];
+                        if ( !defined $info ) {
+                            $self->warn(
+"Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"
+                            );
+                            next;
+                        }
+                        push @hspinfo, [ $n, @vals ];
+                    }
+                }
+            }
+            elsif (/^Alignments of top/o) {
+                my ( $prelength, $lastdomain, $count, $width );
+                $count = 0;
+                my %domaincounter;
+                my $second_tier = 0;
+                while ( defined( $_ = $self->_readline ) ) {
+                    next if ( /^Align/o
+                        || /^\s+RF\s+[x\s]+$/o );
+                    if ( /^Histogram/o || m!^//!o ) {
+                        if ( $self->in_element('hsp') ) {
+                            $self->end_element( { 'Name' => 'Hsp' } );
+                        }
+                        if ( $self->within_element('hit') ) {
+                            $self->end_element( { 'Name' => 'Hit' } );
+                        }
+                        last;
+                    }
+                    chomp;
+
+                    if (
+                        m/^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+
+                        from\s+(\d+)\s+to\s+(\d+)/x
+                      )
+                    {
+                        my ( $name, $domainct, $domaintotal, $from, $to ) =
+                          ( $1, $2, $3, $4, $5 );
+                        $domaincounter{$name}++;
+                        if ( $self->within_element('hit') ) {
+                            if ( $self->within_element('hsp') ) {
+                                $self->end_element( { 'Name' => 'Hsp' } );
+                            }
+                            $self->end_element( { 'Name' => 'Hit' } );
+                        }
+
+                        $self->start_element( { 'Name' => 'Hit' } );
+                        my $info = [
+                            @{
+                                $hitinfo[ $hitinfo{$name} ] || $self->throw(
+"Could not find hit info for $name: Insure that your database contains only unique sequence names"
+                                )
+                              }
+                        ];
+                        if ( $info->[0] ne $name ) {
+                            $self->throw(
+"Somehow the Model table order does not match the order in the domains (got "
+                                  . $info->[0]
+                                  . ", expected $name)" );
+                        }
+                        $self->element(
+                            {
+                                'Name' => 'Hit_id',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_desc',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_signif',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_score',
+                                'Data' => shift @{$info}
+                            }
+                        );
+
+                        $self->start_element( { 'Name' => 'Hsp' } );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_identity',
+                                'Data' => 0
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_positive',
+                                'Data' => 0
+                            }
+                        );
+                        my $HSPinfo = shift @hspinfo;
+                        my $id      = shift @$HSPinfo;
+
+                        if ( $id ne $name ) {
+                            $self->throw(
+"Somehow the domain list details do not match the table (got $id, expected $name)"
+                            );
+                        }
+                        if ( $domaincounter{$name} == $domaintotal ) {
+                            $hitinfo[ $hitinfo{$name} ] = undef;
+                        }
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_hit-from',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_hit-to',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_query-from',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_query-to',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_score',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_evalue',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $lastdomain = $name;
+                    }
+                    else {
+
+                        # Might want to change this so that it
+                        # accumulates all the of the alignment lines into
+                        # three array slots and then tests for the
+                        # end of the line
+                        if (/^(\s+\*\-\>)(\S+)/o) {    # start of domain
+                            $prelength = CORE::length($1);
+                            $width     = 0;
+
+                            # $width = CORE::length($2);
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_qseq',
+                                    'Data' => $2
+                                }
+                            );
+                            $count       = 0;
+                            $second_tier = 0;
+                        }
+                        elsif (/^(\s+)(\S+)\<\-\*\s*$/o) {    #end of domain
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_qseq',
+                                    'Data' => $2
+                                }
+                            );
+                            $width = CORE::length($2);
+                            $count = 0;
+                        }
+                        elsif (( $count != 1 && /^\s+$/o )
+                            || CORE::length($_) == 0
+                            || /^\s+\-?\*\s*$/ )
+                        {
+                            next;
+                        }
+                        elsif ( $count == 0 ) {
+                            $prelength -= 3 unless ( $second_tier++ );
+                            unless ( defined $prelength ) {
+
+                                # $self->warn("prelength not set");
+                                next;
+                            }
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_qseq',
+                                    'Data' => substr( $_, $prelength )
+                                }
+                            );
+                        }
+                        elsif ( $count == 1 ) {
+                            if ( !defined $prelength ) {
+                                $self->warn("prelength not set");
+                            }
+                            if ($width) {
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_midline',
+                                        'Data' =>
+                                          substr( $_, $prelength, $width )
+                                    }
+                                );
+                            }
+                            else {
+                                $self->debug("midline is $_\n")
+                                  if ( $verbose > 0
+                                    && CORE::length($_) <= $prelength );
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_midline',
+                                        'Data' => substr( $_, $prelength )
+                                    }
+                                );
+                            }
+                        }
+                        elsif ( $count == 2 ) {
+                            if (/^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o) {
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_hseq',
+                                        'Data' => $3
+                                    }
+                                );
+                            }
+                            else {
+                                $self->warn("unrecognized line: $_\n");
+                            }
+                        }
+                        $count = 0 if $count++ >= 2;
+                    }
+                }
+            }
+            elsif ( /^Histogram/o || m!^//!o ) {
+                while ( my $HSPinfo = shift @hspinfo ) {
+                    my $id   = shift @$HSPinfo;
+                    my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
+                    next unless defined $info;
+                    $self->start_element( { 'Name' => 'Hit' } );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_id',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_desc',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_signif',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_score',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->start_element( { 'Name' => 'Hsp' } );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_query-from',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_query-to',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_hit-from',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_hit-to',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_score',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_evalue',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_identity',
+                            'Data' => 0
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_positive',
+                            'Data' => 0
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_positive',
+                            'Data' => 0
+                        }
+                    );
+                    $self->end_element( { 'Name' => 'Hsp' } );
+                    $self->end_element( { 'Name' => 'Hit' } );
+                }
+                @hitinfo = ();
+                %hitinfo = ();
+                last;
+            }
+        }
+        elsif ( defined $self->{'_reporttype'}
+            && $self->{'_reporttype'} eq 'HMMPFAM' )
+        {
+            # process HMMPFAM results here
+            if (/^Scores for sequence family/o) {
+                while ( defined( $_ = $self->_readline ) ) {
+                    last if (/^\s+$/);
+                    next if ( /^Model\s+Description/o || /^\-\-\-/o );
+                    chomp;
+                    my @line = split;
+                    my ( $model, $n, $evalue, $score ) =
+                      ( shift @line, pop @line, pop @line, pop @line );
+                    my $desc = join( ' ', @line );
+                    push @hitinfo, [ $model, $desc, $score, $evalue, $n ];
+                    $hitinfo{$model} = $#hitinfo;
+                }
+            }
+            elsif (/^Parsed for domains:/o) {
+                @hspinfo = ();
+                while ( defined( $_ = $self->_readline ) ) {
+                    last if (/^\s+$/);
+                    if (m!^//!) {
+                        $self->_pushback($_);
+                        last;
+                    }
+                    next if ( /^Model\s+Domain/o || /^\-\-\-/o );
+                    chomp;
+                    if (
+                        my ( $n, $domainnum, $domainct, @vals ) = (
+                            m!^(\S+)\s+         # domain name
+                            (\d+)/(\d+)\s+      # domain num out of num
+                            (\d+)\s+(\d+).+?    # seq start, end
+                            (\d+)\s+(\d+)\s+    # hmm start, end
+                            \S+\s+              # []
+                            (\S+)\s+            # score       
+                            (\S+)               # evalue
+                            \s*$!ox
+                        )
+                      )
+                    {
+                        my $hindex = $hitinfo{$n};
+                        if ( !defined $hindex ) {
+                            push @hitinfo,
+                              [ $n, '', $vals[5], $vals[6], $domainct ];
+                            $hitinfo{$n} = $#hitinfo;
+                            $hindex = $#hitinfo;
+                        }
+                        my $info = $hitinfo[$hindex];
+                        if ( !defined $info ) {
+                            $self->warn(
+"incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"
+                            );
+                            next;
+                        }
+                        push @hspinfo, [ $n, @vals ];
+                    }
+                }
+            }
+            elsif (/^Alignments of top/o) {
+                my ( $prelength, $lastdomain, $count, $width );
+                $count = 0;
+                my $second_tier = 0;
+                while ( defined( $_ = $self->_readline ) ) {
+                    next
+                      if (
+                        /^Align/o
+                        || ( $count != 1
+                            && /^\s+RF\s+[x\s]+$/o )
+                      );
+                    $self->debug("$count $_") if $verbose > 0;
+                    if ( /^Histogram/o || m!^//!o || /^Query sequence/o ) {
+                        if ( $self->in_element('hsp') ) {
+                            $self->end_element( { 'Name' => 'Hsp' } );
+                        }
+                        if ( $self->in_element('hit') ) {
+                            $self->end_element( { 'Name' => 'Hit' } );
+                        }
+                        $self->_pushback($_);
+                        last;
+                    }
+                    chomp;
+                    if (m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o) {
+                        my ( $name, $from, $to ) = ( $1, $2, $3 );
+
+                        if ( $self->within_element('hit') ) {
+                            if ( $self->in_element('hsp') ) {
+                                $self->end_element( { 'Name' => 'Hsp' } );
+                            }
+                            $self->end_element( { 'Name' => 'Hit' } );
+                        }
+                        my $info = [ @{ $hitinfo[ $hitinfo{$name} ] } ];
+                        if ( !defined $info
+                            || $info->[0] ne $name )
+                        {
+                            $self->warn(
+"Somehow the Model table order does not match the order in the domains (got "
+                                  . $info->[0]
+                                  . ", expected $name). We're back loading this from the alignment information instead"
+                            );
+                            $info = [
+                                $name, '',
+                                /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox
+                            ];
+                            push @hitinfo, $info;
+                            $hitinfo{$name} = $#hitinfo;
+                        }
+                        $self->start_element( { 'Name' => 'Hit' } );
+
+                        $self->element(
+                            {
+                                'Name' => 'Hit_id',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_desc',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_score',
+                                'Data' => shift @{$info}
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hit_signif',
+                                'Data' => shift @{$info}
+                            }
+                        );
+
+                        $self->start_element( { 'Name' => 'Hsp' } );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_identity',
+                                'Data' => 0
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_positive',
+                                'Data' => 0
+                            }
+                        );
+                        my $HSPinfo = shift @hspinfo;
+                        my $id      = shift @$HSPinfo;
+
+                        if ( $id ne $name ) {
+                            $self->throw(
+"Somehow the domain list details do not match the table (got $id, expected $name)"
+                            );
+                        }
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_query-from',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_query-to',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_hit-from',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_hit-to',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_score',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $self->element(
+                            {
+                                'Name' => 'Hsp_evalue',
+                                'Data' => shift @$HSPinfo
+                            }
+                        );
+                        $lastdomain = $name;
+                    }
+                    else {
+                        if (/^(\s+\*\-\>)(\S+)/o) {
+
+                            # start of domain
+                            $prelength = CORE::length($1);
+                            $width     = 0;
+
+                            # $width = CORE::length($2);
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_hseq',
+                                    'Data' => $2
+                                }
+                            );
+                            $count       = 0;
+                            $second_tier = 0;
+
+                        }
+                        elsif (/^(\s+)(\S+)\<\-?\*?\s*$/o) {
+
+                            #end of domain
+                            $prelength -= 3 unless ( $second_tier++ );
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_hseq',
+                                    'Data' => $2
+                                }
+                            );
+                            $width = CORE::length($2);
+                            $count = 0;
+                        }
+                        elsif (CORE::length($_) == 0
+                            || ( $count != 1 && /^\s+$/o )
+                            || /^\s+\-?\*\s*$/ )
+                        {
+                            next;
+                        }
+                        elsif ( $count == 0 ) {
+                            $prelength -= 3 unless ( $second_tier++ );
+                            unless ( defined $prelength ) {
+
+                                # $self->warn("prelength not set");
+                                next;
+                            }
+                            $self->element(
+                                {
+                                    'Name' => 'Hsp_hseq',
+                                    'Data' => substr( $_, $prelength )
+                                }
+                            );
+                        }
+                        elsif ( $count == 1 ) {
+                            if ( !defined $prelength ) {
+                                $self->warn("prelength not set");
+                            }
+                            if ($width) {
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_midline',
+                                        'Data' =>
+                                          substr( $_, $prelength, $width )
+                                    }
+                                );
+                            }
+                            else {
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_midline',
+                                        'Data' => substr( $_, $prelength )
+                                    }
+                                );
+                            }
+                        }
+                        elsif ( $count == 2 ) {
+                            if (   /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o
+                                || /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o )
+                            {
+                                $self->element(
+                                    {
+                                        'Name' => 'Hsp_qseq',
+                                        'Data' => $3
+                                    }
+                                );
+                            }
+                            else {
+                                $self->throw(
+                                    "unrecognized line ($count): $_\n");
+                            }
+                        }
+                        $count = 0 if $count++ >= 2;
+                    }
+                }
+            }
+            elsif ( /^Histogram/o || m!^//!o ) {
+
+                while ( my $HSPinfo = shift @hspinfo ) {
+                    my $id   = shift @$HSPinfo;
+                    my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
+                    next unless defined $info;
+                    $self->start_element( { 'Name' => 'Hit' } );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_id',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_desc',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_signif',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hit_score',
+                            'Data' => shift @{$info}
+                        }
+                    );
+                    $self->start_element( { 'Name' => 'Hsp' } );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_query-from',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_query-to',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_hit-from',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_hit-to',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_score',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_evalue',
+                            'Data' => shift @$HSPinfo
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_identity',
+                            'Data' => 0
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_positive',
+                            'Data' => 0
+                        }
+                    );
+                    $self->element(
+                        {
+                            'Name' => 'Hsp_positive',
+                            'Data' => 0
+                        }
+                    );
+                    $self->end_element( { 'Name' => 'Hsp' } );
+                    $self->end_element( { 'Name' => 'Hit' } );
+                }
+                @hitinfo = ();
+                %hitinfo = ();
+                last;
+            }
+            else {
+                $self->debug($_) if $verbose > 0;
+            }
+        }
+        $last = $_;
+    }
+    $self->end_element( { 'Name' => 'HMMER_Output' } ) unless !$seentop;
+    return $self->end_document();
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element {
+    my ( $self, $data ) = @_;
+
+    # we currently don't care about attributes
+    my $nm   = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    if ($type) {
+        if ( $self->_eventHandler->will_handle($type) ) {
+            my $func = sprintf( "start_%s", lc $type );
+            $self->_eventHandler->$func( $data->{'Attributes'} );
+        }
+        unshift @{ $self->{'_elements'} }, $type;
+    }
+    if ( defined $type
+        && $type eq 'result' )
+    {
+        $self->{'_values'} = {};
+        $self->{'_result'} = undef;
+    }
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ( $self, $data ) = @_;
+    my $nm   = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    my $rc;
+
+    if ( $nm eq 'HMMER_program' ) {
+        if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) {
+            $self->{'_reporttype'} = uc $1;
+        }
+    }
+
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+    if ( $nm eq 'Hsp' ) {
+        foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
+            $self->element(
+                {
+                    'Name' => $_,
+                    'Data' => $self->{'_last_hspdata'}->{$_}
+                }
+            );
+        }
+        $self->{'_last_hspdata'} = {};
+    }
+    if ($type) {
+        if ( $self->_eventHandler->will_handle($type) ) {
+            my $func = sprintf( "end_%s", lc $type );
+            $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
+                $self->{'_values'} );
+        }
+        my $lastelem = shift @{ $self->{'_elements'} };
+    }
+    elsif ( $MAPPING{$nm} ) {
+        if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
+            my $key = ( keys %{ $MAPPING{$nm} } )[0];
+            $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
+              $self->{'_last_data'};
+        }
+        else {
+            $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
+        }
+    }
+    else {
+        $self->debug("unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = '';    # remove read data if we are at
+                                   # end of an element
+    $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
+    return $rc;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element {
+    my ( $self, $data ) = @_;
+    $self->start_element($data);
+    $self->characters($data);
+    $self->end_element($data);
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters {
+    my ( $self, $data ) = @_;
+
+    if (   $self->in_element('hsp')
+        && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
+        && defined $data->{'Data'} )
+    {
+        $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
+    }
+    return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
+
+    $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element {
+    my ( $self, $name ) = @_;
+    return 0
+      if ( !defined $name
+        || !defined $self->{'_elements'}
+        || scalar @{ $self->{'_elements'} } == 0 );
+    foreach ( @{ $self->{'_elements'} } ) {
+        return 1 if ( $_ eq $name );
+    }
+    return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'within' because 'in' only 
+           tests its immediete parent.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element {
+    my ( $self, $name ) = @_;
+    return 0 if !defined $self->{'_elements'}->[0];
+    return ( $self->{'_elements'}->[0] eq $name );
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document {
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'}   = {};
+    $self->{'_result'}   = undef;
+    $self->{'_elements'} = [];
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document {
+    my ($self) = @_;
+    return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer_pull.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer_pull.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer_pull.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,271 @@
+# $Id: hmmer_pull.pm,v 1.1.2.4 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::hmmer_pull
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::hmmer_pull - A parser for HMMER output
+
+=head1 SYNOPSIS
+
+    # do not use this class directly it is available through Bio::SearchIO
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'hmmer_pull',
+                               -file   => 't/data/hmmpfam.bigout');
+    while (my $result = $in->next_result) {
+        # this is a Bio::Search::Result::HmmpfamResult object
+        print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
+        while (my $hit = $result->next_hit) {
+            print $hit->name(), "\n";
+            while (my $hsp = $hit->next_hsp) {
+                print "length is ", $hsp->length(), "\n";
+            }
+        }
+    }
+
+=head1 DESCRIPTION
+
+This object implements a pull-parser for HMMER output. It is fast since it
+only does work on request (hence 'pull').
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SearchIO::hmmer_pull;
+
+use strict;
+
+
+use base qw(Bio::SearchIO Bio::PullParserI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::hmmer_pull();
+ Function: Builds a new Bio::SearchIO::hmmer_pull object 
+ Returns : Bio::SearchIO::hmmer_pull
+ Args    : -fh/-file => HMMER output filename
+           -format   => 'hmmer_pull'
+           -evalue   => float or scientific notation number to be used
+                        as an evalue cutoff for hits
+           -score    => integer or scientific notation number to be used
+                        as a score value cutoff for hits
+           -hsps     => integer minimum number of hsps (domains) a hit must have
+           -piped_behaviour => 'temp_file'|'memory'|'sequential_read'
+
+           -piped_behaviour defines what the parser should do if the input is
+            an unseekable filehandle (eg. piped input), see
+            Bio::PullParserI::chunk for details. Default is 'sequential_read'.
+
+=cut
+
+sub _initialize {
+    my ($self, @args) = @_;
+    
+    # don't do normal SearchIO initialization
+    
+    my ($writer, $file, $fh, $piped_behaviour, $evalue, $score, $hsps) =
+                            $self->_rearrange([qw(WRITER
+                                                  FILE FH
+                                                  PIPED_BEHAVIOUR
+                                                  EVALUE
+                                                  SCORE
+                                                  HSPS)], @args);
+    $self->writer($writer) if $writer;
+    
+    $self->_fields( { ( header => undef,
+                        algorithm => undef,
+                        algorithm_version => undef,
+                        algorithm_reference => '',
+                        hmm_file => undef,
+                        hmm_name => undef,
+                        sequence_file => undef,
+                        sequence_database => undef,
+                        database_name => undef,
+                        database_letters => undef,
+                        database_entries => undef,
+                        next_result => undef,
+                        evalue_cutoff => '[unset]',
+                        score_cutoff => '[unset]',
+                        hsps_cutoff => '[unset]' ) } );
+    
+    $self->_fields->{evalue_cutoff} = $evalue if $evalue;
+    $self->_fields->{score_cutoff} = $score if $score;
+    $self->_fields->{hsps_cutoff} = $hsps if $hsps;
+    
+    $self->_dependencies( { ( algorithm => 'header',
+                              algorithm_version => 'header',
+                              hmm_file => 'header',
+                              hmm_name => 'header',
+                              sequence_file => 'header',
+                              sequence_database => 'header' ) } );
+    
+    $self->chunk($file || $fh || $self->throw("-file or -fh must be supplied"),
+                 -piped_behaviour => $piped_behaviour || 'sequential_read');
+}
+
+sub _discover_header {
+    my $self = shift;
+    $self->_chunk_seek(0);
+    my $header = $self->_get_chunk_by_nol(8);
+    $self->{_after_header} = $self->_chunk_tell;
+    
+    my ($algo) = $header =~ /^(hmm\S+) - search/m;
+    $self->_fields->{algorithm} = uc $algo;
+    
+    ($self->_fields->{algorithm_version}) = $header =~ /^HMMER\s+?(\S+)/m;
+    
+    ($self->_fields->{hmm_file}) = $header =~ /^HMM file:\s.+?(\S+)$/m;
+    $self->_fields->{hmm_name} = $self->_fields->{hmm_file};
+    
+    ($self->_fields->{sequence_file}) = $header =~ /^Sequence (?:file|database):\s.+?(\S+)$/m;
+    $self->_fields->{sequence_database} = $self->_fields->{sequence_file};
+    
+    $self->_fields->{header} = 1;
+}
+
+sub _discover_database_name {
+    my $self = shift;
+    my $type = $self->get_field('algorithm');
+    
+    if ($type eq 'HMMPFAM') {
+        $self->_fields->{database_name} = $self->get_field('hmm_file');
+    }
+    elsif ($type eq 'HMMSEARCH') {
+        $self->_fields->{database_name} = $self->get_field('sequence_file');
+    }
+}
+
+sub _discover_next_result {
+    my $self = shift;
+    my $type = $self->get_field('algorithm'); # also sets _after_header if not set
+    
+    if ($type eq 'HMMPFAM') {
+        use Bio::Search::Result::HmmpfamResult;
+        
+        unless ($self->_sequential) {
+            $self->_chunk_seek($self->{_end_of_previous_result} || $self->{_after_header});
+            
+            my ($start, $end) = $self->_find_chunk_by_end("//\n");
+            return if $start == $end;
+            $self->_fields->{next_result} = new Bio::Search::Result::HmmpfamResult(-chunk => [($self->chunk, $start, $end)],
+                                                                                   -parent => $self);
+            
+            $self->{_end_of_previous_result} = $end;
+        }
+        else {
+            # deliberatly don't cache these, which means rewind won't work;
+            # if we cached we may as well have used 'memory' option to
+            # -piped_behaviour
+            my $chunk = $self->_get_chunk_by_end("//\n");
+            $chunk || return;
+            $self->_fields->{next_result} = new Bio::Search::Result::HmmpfamResult(-chunk => [$chunk],
+                                                                                   -parent => $self);
+        }
+    }
+    elsif ($type eq 'HMMSEARCH') {
+        $self->throw("Can't handle hmmsearch yet\n");
+    }
+    else {
+        $self->throw("Unknown report type");
+    }
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result {
+    my $self = shift;
+    my $result = $self->get_field('next_result') || return;
+    
+    undef $self->_fields->{next_result};
+    
+    $self->{'_result_count'}++;
+    return $result;
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed.
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+=head2 rewind
+
+ Title   : rewind
+ Usage   : $searchio->rewind;
+ Function: Allow one to reset the Result iterator to the beginning, so that
+           next_result() will subsequently return the first result and so on.
+
+           NB: result objects are not cached, so you will get new result objects
+           each time you rewind. Also, note that result_count() counts the
+           number of times you have called next_result(), so will not be able
+           tell you how many results there were in the file if you use rewind().
+
+ Returns : n/a
+ Args    : none
+
+=cut
+
+sub rewind {
+	my $self = shift;
+    if ($self->_sequential) {
+        $self->warn("rewind has no effect on piped input when you have chosen 'sequential_read' mode");
+    }
+	delete $self->{_end_of_previous_result};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/hmmer_pull.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/megablast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/megablast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/megablast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,515 @@
+# $Id: megablast.pm,v 1.7.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::megablast
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::megablast - a driver module for Bio::SearchIO to parse
+megablast reports (format 0)
+
+=head1 SYNOPSIS
+
+# do not use this module directly
+
+  use Bio::SearchIO;
+  # for default format output from megablast
+  my $in = new Bio::SearchIO(-file   => 'file.mbl',
+                             -format => 'megablast',
+                             -report_format => 0);
+
+  while( my $r = $in->next_result ) {
+    while( my $hit = $r->next_hit ) {
+      while( my $hsp = $hit->next_hsp ) {
+      }
+    }
+  }
+
+=head1 DESCRIPTION
+
+Beware!
+
+Because of the way megablast report format 0 is coded, realize that score
+means # gaps + # mismatches for a HSP.
+
+The docs from NCBI regarding FORMAT 0
+#   0: Produce one-line output for each alignment, in the form
+#
+#   'subject-id'=='[+-]query-id' (s_off q_off s_end q_end) score
+#
+#   Here subject(query)-id is a gi number, an accession or some other type of
+#   identifier found in the FASTA definition line of the respective sequence.
+#
+#   + or - corresponds to same or different strand alignment.
+#
+#   Score for non-affine gapping parameters means the total number of
+#   differences (mismatches + gaps). For affine case it is the actual (raw)
+#   score of the alignment.
+
+FORMAT 1 parsing has not been implemented
+FORMAT 2 parsing should work with the SearchIO 'blast' parser
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::megablast;
+use strict;
+use vars qw(%MAPPING %MODEMAP $DEFAULT_BLAST_WRITER_CLASS);
+
+use base qw(Bio::SearchIO);
+
+BEGIN {
+    # mapping of MegaBlast terms to Bioperl hash keys
+    %MODEMAP = ('MegaBlastOutput' => 'result',
+		'Hit'         => 'hit',
+		'Hsp'         => 'hsp'
+		);
+
+    # This should really be done more intelligently, like with
+    # XSLT
+
+    %MAPPING =
+	(
+	  'Hsp_query-from' => 'HSP-query_start',
+	  'Hsp_query-to'   => 'HSP-query_end',
+	  'Hsp_hit-from'   => 'HSP-hit_start',
+	  'Hsp_hit-to'     => 'HSP-hit_end',
+	  'Hit_score'      => 'HIT-score',
+	  'Hsp_score'      => 'HSP-score',
+	
+	  'Hsp_identity'   => 'HSP-identical',
+	  'Hsp_positive'   => 'HSP-conserved',
+
+	  'Hit_id'         => 'HIT-name',
+	
+	  'MegaBlastOutput_program'  => 'RESULT-algorithm_name',
+	  'MegaBlastOutput_query-def'=> 'RESULT-query_name',
+	  );
+
+
+    $DEFAULT_BLAST_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+}
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::blast();
+ Function: Builds a new Bio::SearchIO::blast object
+ Returns : Bio::SearchIO::blast
+ Args    : -fh/-file => filehandle/filename to BLAST file
+           -format   => 'blast'
+
+=cut
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($fmt) = $self->_rearrange([qw(REPORT_FORMAT)], @args);
+
+    $self->throw("Must have provided a value for -report_format when initializing a megablast parser") unless defined $fmt ;
+    $self->report_format($fmt);
+    return 1;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   
+   local $/ = "\n";
+   local $_;
+
+   my $fmt = $self->report_format;
+   my ($lastquery,$lasthit);
+   while( defined($_ = $self->_readline) ) {
+       if( $fmt == 0 ) {
+	   if( /^\'(\S+)\'\=\=\'(\+|\-)(\S+)\'\s+
+	       \((\d+)\s+(\d+)\s+(\d+)\s+(\d+)\)\s+
+	       (\d+)/ox )
+	   {
+	       my ($hit,$strand,$query,
+		   $h_start,$q_start,$h_end,$q_end,
+		   $score) = ($1,$2,$3,$4,$5,$6,$7,$8);
+	       if( ! defined $lastquery ) {
+		   $self->start_element({'Name' => 'MegaBlastOutput'});
+		   $self->element({'Name' => 'MegaBlastOutput_program',
+				   'Data' => 'MEGABLAST'});
+		   $self->element({'Name' => 'MegaBlastOutput_query-def',
+				   'Data' => $query});
+	       } elsif( $lastquery ne $query ) {
+		   $self->_pushback($_);
+		   $self->end_element({'Name' => 'Hit'}) if( defined $lasthit);
+		   $self->end_element({ 'Name' => 'MegaBlastOutput'});
+		   $lasthit = undef;
+		   $lastquery = undef;
+		   return $self->end_document();
+	       }
+
+	       if( ! defined $lasthit || $lasthit ne $hit  ) {
+		   $self->end_element({'Name' => 'Hit'}) if( defined $lasthit);
+		   $self->start_element({'Name' => 'Hit'});
+		   $self->element({'Name' => 'Hit_id',
+				   'Data' => $hit});
+	       }
+	       $self->start_element({'Name' => 'Hsp'});
+	       $self->element({'Name' => 'Hsp_score',
+			       'Data' => $score});
+
+	       # flip flop start/end if strand is < 0
+	       # since strandedness is inferred from the query
+	       # because of the way it is coded all queries will
+	       # be on the forward strand and hits will be either
+	       # +/-
+
+	       # also the NCBI docs state:
+#   0: Produce one-line output for each alignment, in the form
+#
+#   'subject-id'=='[+-]query-id' (s_off q_off s_end q_end) score
+#
+#   Here subject(query)-id is a gi number, an accession or some other type of
+#   identifier found in the FASTA definition line of the respective sequence.
+#
+#   + or - corresponds to same or different strand alignment.
+#
+#   Score for non-affine gapping parameters means the total number of
+#   differences (mismatches + gaps). For affine case it is the actual (raw)
+#   score of the alignment.
+	
+	       # and yet when rev strand hits are made I see
+	       # (MBL 2.2.4)
+	       # 'Contig634'=='-503384' (1 7941 321 7620) 19
+	       # so the query is on the rev strand and the
+	       # subject is on the fwd strand
+	       # so I am flip-flopping everything when I see a '-'
+	       if( $strand eq '-' ) {
+		   ($h_start,$h_end) = ( $h_end,$h_start);
+		   ($q_start,$q_end) = ( $q_end,$q_start);
+	       }
+	       $self->element({'Name' => 'Hsp_hit-from',
+			       'Data' => $h_start});
+	       $self->element({'Name' => 'Hsp_hit-to',
+			       'Data' => $h_end});
+	       $self->element({'Name' => 'Hsp_query-from',
+			       'Data' => $q_start});
+	       $self->element({'Name' => 'Hsp_query-to',
+			       'Data' => $q_end});
+	       # might not be quite right -- need to know length of the HSP
+	       my $numid = (abs($q_end - $q_start) - $score);
+
+	       $self->element({'Name' => 'Hsp_identity',
+			       'Data' => $numid});
+	       $self->element({'Name' => 'Hsp_positive',
+			       'Data' => $numid});
+
+	       $self->end_element({'Name' => 'Hsp'});
+	       $lasthit   = $hit;
+	       $lastquery = $query;
+	   } else {
+	       $self->debug("Unknown line in fmt0 parsing: $_");
+	   }
+       }
+   }
+   if( defined $lastquery && $fmt == 0 ) {
+       $self->end_element({'Name' => 'Hit'}) if( defined $lasthit);
+       $self->end_element({ 'Name' => 'MegaBlastOutput'});
+       return $self->end_document();
+   }
+   return 0;
+}
+
+=head2 report_format
+
+ Title   : report_format
+ Usage   : $obj->report_format($newval)
+ Function: Get/Set the report_format value
+ Returns : value of report_format (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub report_format{
+    my $self = shift;
+    return $self->{'_report_format'} = shift if @_;
+    return $self->{'_report_format'};
+}
+
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently do not care about attributes
+    my $nm = $data->{'Name'};
+   if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}
+	unshift @{$self->{'_elements'}}, $type;
+    }
+
+    if($nm eq 'MegaBlastOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) {
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else {
+	$self->warn( "unknown nm $nm ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'MegaBlastOutput' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;
+   return unless defined $data->{'Data'};
+   $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function:
+ Example :
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       }
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/psl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/psl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/psl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,558 @@
+# $Id: psl.pm,v 1.13.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::psl
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::psl - A parser for PSL output (UCSC)
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  my $parser = new Bio::SearchIO(-file   => 'file.psl',
+                                 -format => 'psl');
+  while( my $result = $parser->next_result ) {
+  }
+
+=head1 DESCRIPTION
+
+This is a SearchIO driver for PSL format.
+PSL format is documented here:
+http://genome.ucsc.edu/goldenPath/help/customTrack.html#PSL
+
+By default it assumes PSL output came from BLAT you can override that
+by specifying -program_name =E<gt> 'BLASTZ' when initializing the
+SearchIO object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::psl;
+use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS $DefaultProgramName);
+
+use strict;
+use Bio::Search::HSP::HSPFactory;
+use Bio::Search::Hit::HitFactory;
+use Bio::Search::Result::ResultFactory;
+
+$DefaultProgramName = 'BLAT';
+$DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+
+# mapping of terms to Bioperl hash keys
+%MODEMAP = (
+	    'PSLOutput'          => 'result',
+	    'Result'             => 'result',
+	    'Hit'                => 'hit',
+	    'Hsp'                => 'hsp'
+	    );
+
+%MAPPING = ( 
+	     'Hsp_bit-score'  => 'HSP-bits',
+	     'Hsp_score'      => 'HSP-score',
+	     'Hsp_evalue'     => 'HSP-evalue',
+	     'Hsp_query-from' => 'HSP-query_start',
+	     'Hsp_query-to'   => 'HSP-query_end',
+	     'Hsp_hit-from'   => 'HSP-hit_start',
+	     'Hsp_hit-to'     => 'HSP-hit_end',
+	     'Hsp_positive'   => 'HSP-conserved',
+	     'Hsp_identity'   => 'HSP-identical',
+	     'Hsp_mismatches' => 'HSP-mismatches',
+	     'Hsp_qgapblocks' => 'HSP-query_gapblocks',
+	     'Hsp_hgapblocks' => 'HSP-hit_gapblocks',
+	     'Hsp_gaps'       => 'HSP-hsp_gaps',
+	     'Hsp_hitgaps'    => 'HSP-hit_gaps',
+	     'Hsp_querygaps'  => 'HSP-query_gaps',
+	     'Hsp_align-len'  => 'HSP-hsp_length',
+	     'Hsp_query-frame'=> 'HSP-query_frame',
+	     'Hsp_hit-frame'  => 'HSP-hit_frame',
+
+	     'Hit_id'        => 'HIT-name',
+	     'Hit_len'       => 'HIT-length',
+	     'Hit_accession' => 'HIT-accession',
+	     'Hit_def'       => 'HIT-description',
+	     'Hit_signif'    => 'HIT-significance',
+	     'Hit_score'     => 'HIT-score',
+	     'Hit_bits'      => 'HIT-bits',
+
+	     'PSLOutput_program'  => 'RESULT-algorithm_name',
+	     'PSLOutput_version'  => 'RESULT-algorithm_version',
+	     'PSLOutput_query-def'=> 'RESULT-query_name',
+	     'PSLOutput_query-len'=> 'RESULT-query_length',
+	     'PSLOutput_query-acc'=> 'RESULT-query_accession',
+	     'PSLOutput_querydesc'=> 'RESULT-query_description',
+	     'PSLOutput_db'       => 'RESULT-database_name',
+	     'PSLOutput_db-len'   => 'RESULT-database_entries',
+	     'PSLOutput_db-let'   => 'RESULT-database_letters',
+	     );
+
+use base qw(Bio::SearchIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::psl();
+ Function: Builds a new Bio::SearchIO::psl object 
+ Returns : an instance of Bio::SearchIO::psl
+ Args    :
+
+
+=cut
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($pname) = $self->_rearrange([qw(PROGRAM_NAME)],
+				    @args);
+    $self->program_name($pname || $DefaultProgramName);
+    $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::GenericResult'));
+
+    $self->_eventHandler->register_factory('hit', Bio::Search::Hit::HitFactory->new(-type => 'Bio::Search::Hit::GenericHit'));
+    $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::PSLHSP'));
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $result = $parser->next_result
+ Function: Parse the next result from the data stream
+ Returns : L<Bio::Search::Result::ResultI>
+ Args    : none
+
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   my ($lastquery,$lasthit);
+   local $/ = "\n";
+   local $_;
+
+   while( defined ($_ = $self->_readline) ) {
+	    #clear header if exists
+	    if(/^psLayout/){
+			#pass over header lines lines
+			while(!/^\d+\s+\d+\s+/) {
+				$_ = $self->_readline;
+			}
+	    } 
+		my ( $matches,$mismatches,$rep_matches,$n_count,
+	    $q_num_insert,$q_base_insert,
+	    $t_num_insert, $t_base_insert, 
+	    $strand, $q_name, $q_length, $q_start,
+	    $q_end, $t_name, $t_length,$t_start, $t_end, $block_count,
+	    $block_sizes,  $q_starts,      $t_starts
+	    ) = split;
+
+       my $score   = sprintf "%.2f", ( 100 * ( $matches + $mismatches + $rep_matches ) / $q_length );
+
+       # this is overall percent identity...
+       my $percent_id = sprintf "%.2f", ( 100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches )
+);
+
+       # Remember Jim's code is 0 based
+       if( defined $lastquery && 
+	   $lastquery ne $q_name ) {
+	   $self->end_element({'Name' => 'Hit'});
+	   $self->end_element({'Name' => 'PSLOutput'});
+	   $self->_pushback($_);
+	   return $self->end_document;
+       } elsif( ! defined $lastquery ) {
+	   $self->{'_result_count'}++;
+	   $self->start_element({'Name' => 'PSLOutput'});
+	   $self->element({'Name' => 'PSLOutput_program',
+			   'Data' => $self->program_name});
+	   $self->element({'Name' => 'PSLOutput_query-def',
+			   'Data' => $q_name});
+	   $self->element({'Name' => 'PSLOutput_query-len',
+			   'Data' => $q_length});
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $t_name});
+	   $self->element({'Name' => 'Hit_len',
+			   'Data' => $t_length});
+	   $self->element({'Name' => 'Hit_score',
+			   'Data' => $score});
+       } elsif( $lasthit ne $t_name ) {
+	   $self->end_element({'Name' => 'Hit'});
+	   $self->start_element({'Name' => 'Hit'});
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $t_name});
+	   $self->element({'Name' => 'Hit_len',
+			   'Data' => $t_length});
+	   $self->element({'Name' => 'Hit_score',
+			   'Data' => $score});
+       }
+              
+       my $identical = $matches + $rep_matches;
+       $self->start_element({'Name' => 'Hsp'});
+       $self->element({'Name' => 'Hsp_score',
+		       'Data' => $score});
+       $self->element({'Name' => 'Hsp_identity',
+		       'Data' => $identical});
+       $self->element({'Name' => 'Hsp_positive',
+		       'Data' => $identical});
+       $self->element({'Name' => 'Hsp_mismatches',
+		       'Data' => $mismatches});
+       $self->element({'Name' => 'Hsp_gaps',
+		       'Data' => $q_base_insert + $t_base_insert});
+       # query gaps are the number of target inserts and vice-versa
+       $self->element({'Name' => 'Hsp_querygaps',
+		       'Data' => $t_base_insert});
+       $self->element({'Name' => 'Hsp_hitgaps',
+		       'Data' => $q_base_insert});
+       if( $strand eq '+' ) {
+	   $self->element({'Name' => 'Hsp_query-from',
+			   'Data' => $q_start + 1});
+	   $self->element({'Name' => 'Hsp_query-to',
+			   'Data' => $q_end});
+       } else { 
+	   $self->element({'Name' => 'Hsp_query-to',
+			   'Data' => $q_start + 1});
+	   $self->element({'Name' => 'Hsp_query-from',
+			   'Data' => $q_end});
+       }
+       my $hsplen = $q_base_insert + $t_base_insert + 
+	   abs( $t_end - $t_start) + abs( $q_end - $q_start);
+       $self->element({'Name' => 'Hsp_hit-from',
+		       'Data' => $t_start + 1 });
+       $self->element({'Name' => 'Hsp_hit-to',
+		       'Data' => $t_end});
+       $self->element({'Name' => 'Hsp_align-len',
+		       'Data' => $hsplen});
+       # cleanup trailing commas in some output
+       $block_sizes =~ s/\,$//;
+       $q_starts    =~ s/\,$//;
+       $t_starts    =~ s/\,$//;
+       my @blocksizes = split(/,/,$block_sizes); # block sizes
+       my @qstarts = split(/,/,$q_starts); # starting position of each block
+                                           # in query
+       my @tstarts = split(/,/,$t_starts); # starting position of each block
+                                           # in target
+       my (@qgapblocks, at hgapblocks);
+       for( my $i = 0; $i < $block_count; $i++) {
+	   if( $strand eq '+' ) {
+	       push @qgapblocks, [ $qstarts[$i] + 1, $blocksizes[$i]];
+	   } else { 
+	       push @qgapblocks, [ $q_length - $qstarts[$i], $blocksizes[$i]];
+	   }
+	   push @hgapblocks, [ $tstarts[$i] + 1, $blocksizes[$i]];
+       }
+       $self->element({'Name' => 'Hsp_qgapblocks',
+		       'Data' => \@qgapblocks});
+       $self->element({'Name' => 'Hsp_hgapblocks',
+		       'Data' => \@hgapblocks});
+       $self->end_element({'Name' => 'Hsp'});
+       $lastquery = $q_name;
+       $lasthit   = $t_name;
+   }   
+   if( defined $lasthit || defined $lastquery ) {
+       $self->end_element({'Name' => 'Hit'});
+       $self->end_element({'Name' => 'Result'});
+       return $self->end_document;
+   }
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+   if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						 
+	unshift @{$self->{'_elements'}}, $type;
+    }
+    if($nm eq 'PSLOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+        
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});	    
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) { 	
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];	    
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else { 
+	$self->warn( __PACKAGE__."::end_element: unknown nm '$nm', ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( defined $nm &&
+				 defined $MODEMAP{$nm} &&
+				 $MODEMAP{$nm} eq 'result' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+
+   return unless ( defined $data->{'Data'} );
+   if( $data->{'Data'} =~ /^\s+$/ ) {
+       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
+   }
+
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+   
+   $self->{'_last_data'} = $data->{'Data'}; 
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;  
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       } 
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;  
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+
+=head2 program_name
+
+ Title   : program_name
+ Usage   : $obj->program_name($newval)
+ Function: Get/Set the program name
+ Returns : value of program_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub program_name{
+    my $self = shift;
+
+    $self->{'program_name'} = shift if @_;
+    return $self->{'program_name'} || $DefaultProgramName;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/sim4.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/sim4.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/sim4.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,646 @@
+# $Id: sim4.pm,v 1.13.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::sim4
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::sim4 - parser for Sim4 alignments
+
+=head1 SYNOPSIS
+
+  # do not use this module directly, it is a driver for SearchIO
+  use Bio::SearchIO;
+  my $searchio = new Bio::SearchIO(-file => 'results.sim4',
+                                   -format => 'sim4');
+
+  while ( my $result = $searchio->next_result ) {
+      while ( my $hit = $result->next_hit ) {
+	  while ( my $hsp = $hit->next_hsp ) {
+              # ...
+	  }
+      }
+  }
+
+=head1 DESCRIPTION
+
+This is a driver for the SearchIO system for parsing Sim4.
+http://globin.cse.psu.edu/html/docs/sim4.html
+
+Cannot parse LAV or 'exon file' formats (A=2 or A=5)
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Luc Gauthier (lgauthie at hotmail.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::sim4;
+
+use strict;
+use vars qw($DEFAULTFORMAT %ALIGN_TYPES
+            %MAPPING %MODEMAP $DEFAULT_WRITER_CLASS);
+
+use POSIX;
+use Bio::SearchIO::SearchResultEventBuilder;
+
+use base qw(Bio::SearchIO);
+
+$DEFAULTFORMAT = 'SIM4';
+$DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+
+%ALIGN_TYPES = (
+    0 => 'Ruler',
+    1 => 'Query', 
+    2 => 'Mid', 
+    3 => 'Sbjct'
+);
+
+%MODEMAP = (
+    'Sim4Output' => 'result',
+    'Hit'        => 'hit',
+    'Hsp'        => 'hsp'
+);
+
+%MAPPING = (
+    'Hsp_query-from'=>  'HSP-query_start',
+    'Hsp_query-to'  =>  'HSP-query_end',
+    'Hsp_qseq'      =>  'HSP-query_seq',
+    'Hsp_qlength'   =>  'HSP-query_length',
+    'Hsp_querygaps'  => 'HSP-query_gaps',
+    'Hsp_hit-from'  =>  'HSP-hit_start',
+    'Hsp_hit-to'    =>  'HSP-hit_end',
+    'Hsp_hseq'      =>  'HSP-hit_seq',
+    'Hsp_hlength'   =>  'HSP-hit_length',
+    'Hsp_hitgaps'    => 'HSP-hit_gaps',
+    'Hsp_midline'   =>  'HSP-homology_seq',
+    'Hsp_score'     =>  'HSP-score',
+    'Hsp_align-len' =>  'HSP-hsp_length',
+    'Hsp_identity'  =>  'HSP-identical',
+
+    'Hit_id'        => 'HIT-name',
+    'Hit_desc'      => 'HIT-description',
+    'Hit_len'       => 'HIT-length',
+
+    'Sim4Output_program'   => 'RESULT-algorithm_name',
+    'Sim4Output_query-def' => 'RESULT-query_name',
+    'Sim4Output_query-desc'=> 'RESULT-query_description',
+    'Sim4Output_query-len' => 'RESULT-query_length',
+);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::sim4();
+ Function: Builds a new Bio::SearchIO::sim4 object
+ Returns : an instance of Bio::SearchIO::sim4
+ Args    :
+
+
+=cut
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $result = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    local $/ = "\n";
+    local $_;
+
+    # Declare/adjust needed variables
+    $self->{'_last_data'} = '';
+    my ($seentop, $qfull, @hsps, %alignment, $format);
+    my $hit_direction = 1;
+
+    # Start document and main element
+    $self->start_document();
+    $self->start_element({'Name' => 'Sim4Output'});
+    my $lastquery = '';
+    # Read output report until EOF
+    while( defined($_ = $self->_readline) ) {       
+        # Skip empty lines, chomp filled ones
+	next if( /^\s+$/); chomp;
+
+        # Make sure sim4 output format is not 2 or 5
+        if (!$seentop) {
+	    if ( /^\#:lav/ ) { $format = 2; }
+            elsif ( /^<|>/ ) { $format = 5; }
+            $self->throw("Bio::SearchIO::sim4 module cannot parse 'type $format' outputs.") if $format;
+	}
+
+        # This line indicates the start of a new hit
+	if( /^seq1\s*=\s*(\S+),\s+(\d+)/ ) {
+	    my ($nm,$desc) = ($1,$2);
+            # First hit? Adjust some parameters if so
+	    if ( ! $seentop ) {
+	        $self->element( {'Name' => 'Sim4Output_query-def', 
+				 'Data' => $nm} );
+	        $self->element( {'Name' => 'Sim4Output_query-len', 
+				 'Data' => $desc} );
+                $seentop = 1;
+	    } elsif( $nm ne $lastquery ) {
+		$self->_pushback($_);
+		last;
+	    }
+	    $lastquery = $nm;
+            # A previous HSP may need to be ended
+            $self->end_element({'Name' => 'Hsp'}) if ( $self->in_element('hsp') );
+            # A previous hit exists? End it and reset needed variables
+            if ( $self->in_element('hit') ) {
+	        foreach (@hsps) {
+                    $self->start_element({'Name' => 'Hsp'});
+                    while (my ($name, $data) = each %$_) {
+                        $self->{'_currentHSP'}{$name} = $data;
+		    }
+		    $self->end_element({'Name' => 'Hsp'});
+                    $self->{'_currentHSP'} = {};
+	        }
+                $format = 0 if @hsps;
+                @hsps = ();
+                %alignment = ();
+                $qfull = 0;
+                $hit_direction = 1;
+                $self->end_element({'Name' => 'Hit'});
+	    }
+
+        # This line describes the current hit... so let's start it
+	} elsif( /^seq2\s*=\s*(\S+)\s+\(>?(\S+)\s*\),\s*(\d+)/ ) {
+            $self->start_element({'Name' => 'Hit'});
+	    $self->element( {'Name' => 'Hit_id', 'Data' => $2} );
+	    $self->element( {'Name' => 'Hit_desc', 'Data' => $1} );
+	    $self->element( {'Name' => 'Hit_len', 'Data' => $3} );
+
+        # This line may give additional details about query or subject
+	} elsif( /^>(\S+)\s*(.*)?/ ) {
+            # Previous line was query details... this time subject details
+	    if( $qfull )  {
+                $format = 4 if !$format;
+		$self->element({'Name' => 'Hit_desc', 'Data' => $2});
+            # First line of this type is always query details for a given hit
+	    } else { 
+		$self->element({'Name' => 'Sim4Output_query-desc', 'Data' => $2});
+		$qfull = 1;
+	    }
+
+        # This line indicates that subject is on reverse strand
+	} elsif( /^\(complement\)/ ) {
+	    $hit_direction = -1;
+
+        # This line describes the current HSP... so add it to @hsps array
+	} elsif( /^\(?(\d+)\-(\d+)\)?\s+\(?(\d+)\-(\d+)\)?\s+(\d+)/ ) {
+		my ($qs,$qe,$hs,$he,$pid) = ($1,$2,$3,$4,$5);
+                push @hsps, {
+                    'Hsp_query-from' => $qs,
+                    'Hsp_query-to' => $qe,
+                    'Hsp_hit-from' => $hit_direction >= 0 ? $hs : $he,
+                    'Hsp_hit-to' => $hit_direction >= 0 ? $he : $hs,
+                    'Hsp_identity' => 0, #can't determine correctly from raw pct
+                    'Hsp_qlength' => abs($qe - $qs) + 1,
+                    'Hsp_hlength' => abs($he - $hs) + 1,
+                    'Hsp_align-len' => abs($qe - $qs) + 1,
+	        };
+
+        # This line indicates the start of an alignment block
+        } elsif( /^\s+(\d+)\s/ ) {
+            # Store the current alignment block in a hash
+	    for( my $i = 0; defined($_) && $i < 4; $i++ ) {
+                my ($start, $string) = /^\s+(\d*)\s(.*)/;
+                $alignment{$ALIGN_TYPES{$i}} = { start => $start, string => $i != 2
+                    ? $string
+                    : (' ' x (length($alignment{$ALIGN_TYPES{$i-1}}{string}) - length($string))) . $string
+                };
+                $_ = $self->_readline();
+	    }
+
+            # 'Ruler' line indicates the start of a new HSP
+            if ($alignment{Ruler}{start} == 0) {
+                $format = @hsps ? 3 : 1 if !$format;
+                # A previous HSP may need to be ended
+                $self->end_element({'Name' => 'Hsp'}) if ( $self->in_element('hsp') );
+                # Start the new HSP and fill the '_currentHSP' property with available details
+     	        $self->start_element({'Name' => 'Hsp'});
+                $self->{'_currentHSP'} = @hsps ? shift @hsps : {
+                    'Hsp_query-from' => $alignment{Query}{start},
+                    'Hsp_hit-from' => $alignment{Sbjct}{start},
+     	        }
+	    }
+
+            # Midline indicates a boundary between two HSPs
+	    if ( $alignment{Mid}{string} =~ /<|>/g ) {
+                my ($hsp_start, $hsp_end);
+                # Are we currently in an open HSP?
+    	        if ( $self->in_element('hsp') ) {
+                    # Find end pos, adjust 'gaps', 'seq' and 'midline' properties... then close HSP
+                    $hsp_end = (pos $alignment{Mid}{string}) - 1;
+                    $self->{'_currentHSP'}{'Hsp_querygaps'} +=
+                        ($self->{'_currentHSP'}{'Hsp_qseq'} .= substr($alignment{Query}{string}, 0, $hsp_end)) =~ s/ /-/g;
+                    $self->{'_currentHSP'}{'Hsp_hitgaps'} +=
+                        ($self->{'_currentHSP'}{'Hsp_hseq'} .= substr($alignment{Sbjct}{string}, 0, $hsp_end)) =~ s/ /-/g;
+                    ($self->{'_currentHSP'}{'Hsp_midline'} .= substr($alignment{Mid}{string}, 0, $hsp_end)) =~ s/-/ /g;
+                    $self->end_element({'Name' => 'Hsp'});
+
+                    # Does a new HSP start in the current alignment block?
+                    if ( $alignment{Mid}{string} =~ /\|/g ) {
+                        # Find start pos, start new HSP and fill it with available details
+                        $hsp_start = (pos $alignment{Mid}{string}) - 1;
+                        $self->start_element({'Name' => 'Hsp'});
+                        $self->{'_currentHSP'} = @hsps ? shift @hsps : {};
+                        $self->{'_currentHSP'}{'Hsp_querygaps'} +=
+                            ($self->{'_currentHSP'}{'Hsp_qseq'} = substr($alignment{Query}{string}, $hsp_start)) =~ s/ /-/g;
+                        $self->{'_currentHSP'}{'Hsp_hitgaps'} +=
+                            ($self->{'_currentHSP'}{'Hsp_hseq'} = substr($alignment{Sbjct}{string}, $hsp_start)) =~ s/ /-/g;
+                        ($self->{'_currentHSP'}{'Hsp_midline'} = substr($alignment{Mid}{string}, $hsp_start)) =~ s/-/ /g;
+		    }
+		}
+                # No HSP is currently open...
+                else {
+                    # Find start pos, start new HSP and fill it with available
+                    # details then skip to next alignment block
+		    $hsp_start = index($alignment{Mid}{string}, '|');
+	            $self->start_element({'Name' => 'Hsp'});
+                    $self->{'_currentHSP'} = @hsps ? shift @hsps : {
+                        'Hsp_query-from' => $alignment{Query}{start},
+    	            };
+                    $self->{'_currentHSP'}{'Hsp_querygaps'} +=
+                        ($self->{'_currentHSP'}{'Hsp_qseq'} = substr($alignment{Query}{string}, $hsp_start)) =~ s/ /-/g;
+                    $self->{'_currentHSP'}{'Hsp_hitgaps'} +=
+                        ($self->{'_currentHSP'}{'Hsp_hseq'} = substr($alignment{Sbjct}{string}, $hsp_start)) =~ s/ /-/g;
+                    ($self->{'_currentHSP'}{'Hsp_midline'} = substr($alignment{Mid}{string}, $hsp_start)) =~ s/-/ /g;
+                    next;
+		}
+	    }
+            # Current alignment block does not contain HSPs boundary
+            else {
+                # Start a new HSP if none is currently open
+	        # (Happens if last boundary finished at the very end of previous block)
+	        if ( !$self->in_element('hsp') ) {
+     	            $self->start_element({'Name' => 'Hsp'});
+                    $self->{'_currentHSP'} = @hsps ? shift @hsps : {
+                        'Hsp_query-from' => $alignment{Query}{start},
+                        'Hsp_hit-from' => $alignment{Sbjct}{start},
+     	            }
+		}
+                # Adjust details of the current HSP
+                $self->{'_currentHSP'}{'Hsp_query-from'} ||= 
+		    $alignment{Query}{start} - 
+		    length($self->{'_currentHSP'}{'Hsp_qseq'} || '');
+                $self->{'_currentHSP'}{'Hsp_hit-from'} ||= 
+		    $alignment{Sbjct}{start} - 
+		    length($self->{'_currentHSP'}{'Hsp_hseq'} || '');
+                $self->{'_currentHSP'}{'Hsp_querygaps'} +=
+                    ($self->{'_currentHSP'}{'Hsp_qseq'} .= 
+		     $alignment{Query}{string}) =~ s/ /-/g;
+                $self->{'_currentHSP'}{'Hsp_hitgaps'} +=
+                    ($self->{'_currentHSP'}{'Hsp_hseq'} .= 
+		     $alignment{Sbjct}{string}) =~ s/ /-/g;
+                ($self->{'_currentHSP'}{'Hsp_midline'} .= 
+		 $alignment{Mid}{string}) =~ s/-/ /g;
+	    }
+	}
+    }
+
+    # We are done reading the sim4 report, end everything and return
+    if( $seentop ) {
+        # end HSP if needed
+        $self->end_element({'Name' => 'Hsp'}) if ( $self->in_element('hsp') );
+        # end Hit if needed
+        if ( $self->in_element('hit') ) {
+            foreach (@hsps) {
+                $self->start_element({'Name' => 'Hsp'});
+                while (my ($name, $data) = each %$_) {
+                    $self->{'_currentHSP'}{$name} = $data;
+    	        }
+    	        $self->end_element({'Name' => 'Hsp'});
+            }
+            $self->end_element({'Name' => 'Hit'});
+	}
+        # adjust result's algorithm name, end output and return
+        $self->element({'Name' => 'Sim4Output_program',
+                        'Data' => $DEFAULTFORMAT . ' (A=' . (defined $format ? $format : '?') . ')'});
+	$self->end_element({'Name' => 'Sim4Output'});
+	return $self->end_document();
+    } 
+    return;
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+   # we currently don't care about attributes
+   my $nm = $data->{'Name'};
+   my $type = $MODEMAP{$nm};
+
+   if( $type ) {
+       if( $self->_will_handle($type) ) {
+	   my $func = sprintf("start_%s",lc $type);
+	   $self->_eventHandler->$func($data->{'Attributes'});
+       }
+       unshift @{$self->{'_elements'}}, $type;
+
+       if($type eq 'result') {
+	   $self->{'_values'} = {};
+	   $self->{'_result'}= undef;
+       }
+   }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    my $rc;
+    
+    if( $nm eq 'Hsp' ) {
+        $self->{'_currentHSP'}{'Hsp_midline'} ||= '';
+	$self->{'_currentHSP'}{'Hsp_query-to'} ||=
+            $self->{'_currentHSP'}{'Hsp_query-from'} + length($self->{'_currentHSP'}{'Hsp_qseq'}) - 1 - $self->{'_currentHSP'}{'Hsp_querygaps'};
+        $self->{'_currentHSP'}{'Hsp_hit-to'} ||=
+            $self->{'_currentHSP'}{'Hsp_hit-from'} + length($self->{'_currentHSP'}{'Hsp_hseq'}) - 1 - $self->{'_currentHSP'}{'Hsp_hitgaps'};
+        $self->{'_currentHSP'}{'Hsp_identity'} ||= 
+	    ($self->{'_currentHSP'}{'Hsp_midline'} =~ tr/\|//);
+        $self->{'_currentHSP'}{'Hsp_qlength'} ||= abs($self->{'_currentHSP'}{'Hsp_query-to'} - $self->{'_currentHSP'}{'Hsp_query-from'}) + 1;
+        $self->{'_currentHSP'}{'Hsp_hlength'} ||= abs($self->{'_currentHSP'}{'Hsp_hit-to'} - $self->{'_currentHSP'}{'Hsp_hit-from'}) + 1;
+        $self->{'_currentHSP'}{'Hsp_align-len'} ||= abs($self->{'_currentHSP'}{'Hsp_query-to'} - $self->{'_currentHSP'}{'Hsp_query-from'}) + 1;
+        $self->{'_currentHSP'}{'Hsp_score'} ||= int(100 * ($self->{'_currentHSP'}{'Hsp_identity'} / $self->{'_currentHSP'}{'Hsp_align-len'}));
+        foreach (keys %{$self->{'_currentHSP'}}) {
+            $self->element({'Name' => $_, 'Data' => delete ${$self->{'_currentHSP'}}{$_}});
+	}
+    }
+
+    if( $type = $MODEMAP{$nm} ) {
+	if( $self->_will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) {
+
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else {
+	$self->debug( "unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at
+				# end of an element
+    $self->{'_result'} = $rc if( defined $type && $type eq 'result' );
+    return $rc;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;
+   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
+   
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+
+   $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       }
+   }
+   return 0;
+}
+
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_elements'} = [];
+    $self->{'_reporttype'} = $DEFAULTFORMAT;
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+
+sub write_result {
+   my ($self, $blast, @args) = @_;
+
+   if( not defined($self->writer) ) {
+       $self->warn("Writer not defined. Using a $DEFAULT_WRITER_CLASS");
+       $self->writer( $DEFAULT_WRITER_CLASS->new() );
+   }
+   $self->SUPER::write_result( $blast, @args );
+}
+
+sub result_count {
+    return 1; # can a sim4 report contain more than one result?
+}
+
+sub report_count { shift->result_count }
+
+=head2 _will_handle
+
+ Title   : _will_handle
+ Usage   : Private method. For internal use only.
+              if( $self->_will_handle($type) ) { ... }
+ Function: Provides an optimized way to check whether or not an element of a 
+           given type is to be handled.
+ Returns : Reference to EventHandler object if the element type is to be handled.
+           undef if the element type is not to be handled.
+ Args    : string containing type of element.
+
+Optimizations:
+
+  1. Using the cached pointer to the EventHandler to minimize repeated lookups.
+  2. Caching the will_handle status for each type that is encountered
+     so that it only need be checked by calling handler->will_handle($type) once.
+
+This does not lead to a major savings by itself (only 5-10%).
+In combination with other optimizations, or for large parse jobs, the
+savings good be significant.
+
+To test against the unoptimized version, remove the parentheses from
+around the third term in the ternary " ? : " operator and add two
+calls to $self-E<gt>_eventHandler().
+
+=cut
+
+sub _will_handle {
+    my ($self,$type) = @_;
+    my $handler = $self->{'_handler_cache'} ||= $self->_eventHandler;
+
+    my $will_handle = defined($self->{'_will_handle_cache'}->{$type})
+                             ? $self->{'_will_handle_cache'}->{$type}
+                             : ($self->{'_will_handle_cache'}->{$type} =
+                               $handler->will_handle($type));
+
+    return $will_handle ? $handler : undef;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/waba.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/waba.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/waba.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,516 @@
+# $Id: waba.pm,v 1.16.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::waba
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::waba - SearchIO parser for Jim Kent WABA program
+alignment output
+
+=head1 SYNOPSIS
+
+    # do not use this object directly, rather through Bio::SearchIO
+
+    use Bio::SearchIO;
+    my $in = new Bio::SearchIO(-format => 'waba',
+			       -file   => 'output.wab');
+    while( my $result = $in->next_result ) {
+	while( my $hit = $result->next_hit ) {
+	    while( my $hsp = $result->next_hsp ) {
+
+	    }
+	}
+    }
+
+=head1 DESCRIPTION
+
+This parser will process the waba output (NOT the human readable format).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::waba;
+use vars qw(%MODEMAP %MAPPING @STATES);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Search::Result::ResultFactory;
+use Bio::Search::HSP::HSPFactory;
+
+use POSIX;
+
+BEGIN { 
+    # mapping of NCBI Blast terms to Bioperl hash keys
+    %MODEMAP = ('WABAOutput' => 'result',
+		'Hit'         => 'hit',
+		'Hsp'         => 'hsp'
+		);
+    @STATES = qw(Hsp_qseq Hsp_hseq Hsp_stateseq);
+    %MAPPING = 
+	( 
+	  'Hsp_query-from'=>  'HSP-query_start',
+	  'Hsp_query-to'  =>  'HSP-query_end',
+	  'Hsp_hit-from'  =>  'HSP-hit_start',
+	  'Hsp_hit-to'    =>  'HSP-hit_end',
+	  'Hsp_qseq'      =>  'HSP-query_seq',
+	  'Hsp_hseq'      =>  'HSP-hit_seq',
+	  'Hsp_midline'   =>  'HSP-homology_seq',
+	  'Hsp_stateseq'  =>  'HSP-hmmstate_seq',
+	  'Hsp_align-len' =>  'HSP-hsp_length',
+	  
+	  'Hit_id'        => 'HIT-name',
+	  'Hit_accession' => 'HIT-accession',
+
+	  'WABAOutput_program'  => 'RESULT-algorithm_name',
+	  'WABAOutput_version'  => 'RESULT-algorithm_version',
+	  'WABAOutput_query-def'=> 'RESULT-query_name',
+	  'WABAOutput_query-db' => 'RESULT-query_database',
+ 	  'WABAOutput_db'       => 'RESULT-database_name',
+	  );
+}
+
+
+use base qw(Bio::SearchIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::waba();
+ Function: Builds a new Bio::SearchIO::waba object 
+ Returns : Bio::SearchIO::waba
+ Args    : see Bio::SearchIO
+
+=cut
+
+sub _initialize {
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    $self->_eventHandler->register_factory('result', Bio::Search::Result::ResultFactory->new(-type => 'Bio::Search::Result::WABAResult'));
+
+    $self->_eventHandler->register_factory('hsp', Bio::Search::HSP::HSPFactory->new(-type => 'Bio::Search::HSP::WABAHSP'));
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+    my ($self) = @_;
+    local $/ = "\n";
+    local $_;
+    
+    my ($curquery,$curhit);
+    my $state = -1;
+    $self->start_document();
+    my @hit_signifs;
+    while( defined ($_ = $self->_readline )) { 
+	
+	if( $state == -1 ) {
+	    my ($qid, $qhspid,$qpercent, $junk,
+		$alnlen,$qdb,$qacc,$qstart,$qend,$qstrand,
+		$hitdb,$hacc,$hstart,$hend,
+		$hstrand) =
+		    ( /^(\S+)\.(\S+)\s+align\s+ # get the queryid
+		      (\d+(\.\d+)?)\%\s+     # get the percentage
+		      of\s+(\d+)\s+  # get the length of the alignment
+		      (\S+)\s+           # this is the query database
+		      (\S+):(\-?\d+)\-(\-?\d+) # The accession:start-end for query
+		      \s+([\-\+])        # query strand
+		      \s+(\S+)\.         # hit db
+		      (\S+):(\-?\d+)\-(\-?\d+) # The accession:start-end for hit
+		      \s+([\-\+])\s*$    # hit strand
+		      /ox );
+	    
+	    # Curses.  Jim's code is 0 based, the following is to readjust
+	    if( $hstart < 0 ) { $hstart *= -1}
+	    if( $hend   < 0 ) { $hend   *= -1}
+	    if( $qstart < 0 ) { $qstart *= -1}
+	    if( $qend   < 0 ) { $qend   *= -1}
+	    $hstart++; $hend++; $qstart++; $qend++;
+	    if( ! defined $alnlen ) {
+		$self->warn("Unable to parse the rest of the WABA alignment info for: '$_'");
+		last;
+	    }
+	    $self->{'_reporttype'} = 'WABA'; # hardcoded - only 
+	                                     # one type of WABA AFAIK	    
+	    if( defined $curquery && 
+		$curquery ne $qid ) { 
+		$self->end_element({'Name' => 'Hit'});
+		$self->_pushback($_);
+		$self->end_element({'Name' => 'WABAOutput'});
+		return $self->end_document();
+	    } 
+	    
+	    if( defined $curhit &&
+		$curhit ne $hacc) {
+		# slight duplication here -- keep these in SYNC
+		$self->end_element({'Name' => 'Hit'});
+		$self->start_element({'Name' => 'Hit'});
+		$self->element({'Name' => 'Hit_id',
+				'Data' => $hacc});
+		$self->element({'Name' => 'Hit_accession',
+				'Data' => $hacc});
+
+	    } elsif ( ! defined $curquery ) {
+		$self->start_element({'Name' => 'WABAOutput'});
+		$self->{'_result_count'}++;
+		$self->element({'Name' => 'WABAOutput_query-def',
+				'Data' => $qid });
+		$self->element({'Name' => 'WABAOutput_program',
+				'Data' => 'WABA'});
+		$self->element({'Name' => 'WABAOutput_query-db',
+				'Data' => $qdb});
+		$self->element({'Name' => 'WABAOutput_db',
+				'Data' => $hitdb});
+		
+		# slight duplication here -- keep these N'SYNC ;-)
+		$self->start_element({'Name' => 'Hit'});
+		$self->element({'Name' => 'Hit_id',
+				'Data' => $hacc});
+		$self->element({'Name' => 'Hit_accession',
+				'Data' => $hacc});
+	    }
+
+	    
+	    # strand is inferred by start,end values
+	    # in the Result Builder
+	    if( $qstrand eq '-' ) {
+		($qstart,$qend) = ($qend,$qstart);
+	    }
+	    if( $hstrand eq '-' ) {
+		($hstart,$hend) = ($hend,$hstart);
+	    }
+
+	    $self->start_element({'Name' => 'Hsp'});
+	    $self->element({'Name' => 'Hsp_query-from',
+			    'Data' => $qstart});
+	    $self->element({'Name' => 'Hsp_query-to',
+			    'Data' => $qend});
+	    $self->element({'Name' => 'Hsp_hit-from',
+			    'Data' => $hstart});
+	    $self->element({'Name' => 'Hsp_hit-to',
+			    'Data' => $hend});
+	    $self->element({'Name' => 'Hsp_align-len',
+			    'Data' => $alnlen});
+	    
+	    $curquery = $qid;
+	    $curhit   = $hacc;
+	    $state = 0;
+	} elsif( ! defined $curquery ) {
+	    $self->warn("skipping because no Hit begin line was recognized\n$_") if( $_ !~ /^\s+$/ );
+	    next;
+	} else { 
+	    chomp;
+	    $self->element({'Name' => $STATES[$state++],
+			    'Data' => $_});
+	    if( $state >= scalar @STATES ) {
+		$state = -1;
+		$self->end_element({'Name' => 'Hsp'});
+	    }
+	}
+    }
+    if( defined $curquery  ) {
+	$self->end_element({'Name' => 'Hit'});
+	$self->end_element({'Name' => 'WABAOutput'});
+	return $self->end_document();
+    }
+    return;
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+    # we currently don't care about attributes
+    my $nm = $data->{'Name'};    
+   if( my $type = $MODEMAP{$nm} ) {
+	$self->_mode($type);
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("start_%s",lc $type);
+	    $self->_eventHandler->$func($data->{'Attributes'});
+	}						 
+	unshift @{$self->{'_elements'}}, $type;
+    }
+    if($nm eq 'WABAOutput') {
+	$self->{'_values'} = {};
+	$self->{'_result'}= undef;
+	$self->{'_mode'} = '';
+    }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $rc;
+    # Hsp are sort of weird, in that they end when another
+    # object begins so have to detect this in end_element for now
+    if( $nm eq 'Hsp' ) {
+	foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
+	    $self->element({'Name' => $_,
+			    'Data' => $self->{'_last_hspdata'}->{$_}});
+	}
+	$self->{'_last_hspdata'} = {}
+    }
+
+    if( my $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});	    
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) { 	
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];	    
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else { 
+	$self->warn( "unknown nm $nm ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at 
+				# end of an element
+    $self->{'_result'} = $rc if( $nm eq 'WABAOutput' );
+    return $rc;
+
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;   
+
+   return unless ( defined $data->{'Data'} );
+   if( $data->{'Data'} =~ /^\s+$/ ) {
+       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
+   }
+
+   if( $self->in_element('hsp') && 
+       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
+       
+       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
+   }  
+   
+   $self->{'_last_data'} = $data->{'Data'}; 
+}
+
+=head2 _mode
+
+ Title   : _mode
+ Usage   : $obj->_mode($newval)
+ Function: 
+ Example : 
+ Returns : value of _mode
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _mode{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_mode'} = $value;
+    }
+    return $self->{'_mode'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;  
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       } 
+   }
+   return 0;
+}
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name 
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;  
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handles a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_mode'} = '';
+    $self->{'_elements'} = [];
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : my $count = $searchio->result_count
+ Function: Returns the number of results we have processed
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/wise.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/wise.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO/wise.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,491 @@
+# $Id: wise.pm,v 1.6.4.1 2006/10/02 23:10:26 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO::wise
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO::wise - Parsing of wise output as alignments
+
+=head1 SYNOPSIS
+
+  use Bio::SearchIO;
+  my $parser = new Bio::SearchIO(-file    => 'file.genewise', 
+                                 -format  => 'wise',
+                                 -wisetype=> 'genewise');
+
+  while( my $result = $parser->next_result ) {}
+
+=head1 DESCRIPTION
+
+This object parsers Wise output using Bio::Tools::Genewise or
+Bio::Tools::Genomewise as a helper.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO::wise;
+use vars qw(%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::SearchIO);
+
+%MODEMAP = ('WiseOutput' => 'result',
+	    'Hit'             => 'hit',
+	    'Hsp'             => 'hsp'
+    );
+%MAPPING =
+    (
+     'Hsp_query-from'=>  'HSP-query_start',
+     'Hsp_query-to'  =>  'HSP-query_end',
+     'Hsp_hit-from'  =>  'HSP-hit_start',
+     'Hsp_hit-to'    =>  'HSP-hit_end',
+     'Hsp_qseq'      =>  'HSP-query_seq',
+     'Hsp_hseq'      =>  'HSP-hit_seq',
+     'Hsp_midline'   =>  'HSP-homology_seq',
+     'Hsp_score'     =>  'HSP-score',
+     'Hsp_qlength'   =>  'HSP-query_length',
+     'Hsp_hlength'   =>  'HSP-hit_length',
+     'Hsp_align-len' =>  'HSP-hsp_length',
+     'Hsp_positive'   => 'HSP-conserved',
+     'Hsp_identity'   => 'HSP-identical',
+     #'Hsp_gaps'       => 'HSP-hsp_gaps',
+     #'Hsp_hitgaps'    => 'HSP-hit_gaps',
+     #'Hsp_querygaps'  => 'HSP-query_gaps',
+     
+     'Hit_id'        => 'HIT-name',
+#    'Hit_desc'      => 'HIT-description',
+#    'Hit_len'       => 'HIT-length',
+     'Hit_score'     => 'HIT-score',
+
+     'WiseOutput_program'   => 'RESULT-algorithm_name',
+     'WiseOutput_query-def' => 'RESULT-query_name',
+     'WiseOutput_query-desc'=> 'RESULT-query_description',
+     'WiseOutput_query-len' => 'RESULT-query_length',
+    );
+
+$DEFAULT_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
+
+
+use Bio::Tools::Genewise;
+use Bio::Tools::Genomewise;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO::wise();
+ Function: Builds a new Bio::SearchIO::wise object 
+ Returns : an instance of Bio::SearchIO::wise
+ Args    : -wise => a Bio::Tools::Genewise or Bio::Tools::Genomewise object
+
+
+=cut
+
+sub _initialize {
+    my ($self, at args) = @_;
+    my ( $wisetype, $file,$fh ) =
+	$self->_rearrange([qw(WISETYPE FILE FH)], @args);
+    my @newargs;
+    while( @args ) {
+	my $a = shift @args;
+	if( $a =~ /FILE|FH/i ) {
+	    shift @args;
+	    next;
+	}
+	push @newargs, $a, shift @args;
+    }
+    $self->SUPER::_initialize(@newargs);
+
+    # Optimization: caching the EventHandler 
+    # since it's use a lot during the parse.
+    $self->{'_handler_cache'} = $self->_eventHandler;
+
+    $self->wisetype($wisetype);
+    my @ioargs;
+    if( $fh ) { 
+	push @ioargs, ('-fh' => $fh);
+    } elsif( $file ) {
+	push @ioargs, ('-file' => $file);
+    }
+
+    if( $wisetype =~ /genewise/i ) {
+	$self->wise(new Bio::Tools::Genewise(@ioargs));
+    } elsif( $wisetype =~ /genomewise/i ) {
+	$self->wise(new Bio::Tools::Genomewise(@ioargs));
+    } else { 
+	$self->throw("Must supply a -wisetype to ".ref($self)." which is one of 'genomewise' 'genewise'\n");
+    }
+    return $self;
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $hit = $searchio->next_result;
+ Function: Returns the next Result from a search
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+   local $/ = "\n";
+   local $_;
+
+   return unless $self->wise;
+   my $prediction = $self->wise->next_prediction;
+   return unless $prediction;
+   $self->{'_reporttype'} = uc $self->wisetype;
+   $self->start_element({'Name' => 'WiseOutput'});
+   $self->element({'Name' => 'WiseOutput_program',
+		   'Data' => $self->wisetype});
+   $self->element({'Name' => 'WiseOutput_query-def',
+		   'Data' => $self->wise->_target_id});
+   my @transcripts = $prediction->transcripts;
+
+   foreach my $transcript ( @transcripts ) {
+       my @exons =  $transcript->exons;
+       my $protid;
+       $self->start_element({'Name' => 'Hit'});
+       
+       if( $exons[0]->has_tag('supporting_feature') ) {
+	   my ($supporting_feature) = $exons[0]->get_tag_values('supporting_feature');
+	   $protid = $supporting_feature->feature2->seq_id;
+	   $self->element({'Name' => 'Hit_id',
+			   'Data' => $protid});       
+       } 
+       $self->element({'Name' => 'Hit_score',
+		       'Data' => $exons[0]->score});
+       foreach my $exon ( @exons ) {
+	   $self->start_element({'Name' => 'Hsp'});
+	   if( $exon->strand < 0 ) { 
+	       $self->element({'Name' => 'Hsp_query-from',
+			       'Data' => $exon->end});
+	       $self->element({'Name' => 'Hsp_query-to',
+			       'Data' => $exon->start});
+	   } else { 
+	       $self->element({'Name' => 'Hsp_query-from',
+			       'Data' => $exon->start});
+	       $self->element({'Name' => 'Hsp_query-to',
+			       'Data' => $exon->end});
+	   }
+	   $self->element({'Name' => 'Hsp_score',
+			   'Data' => $exon->score});
+	   if( $exon->has_tag('supporting_feature') ) {
+	       my ($sf) = $exon->get_tag_values('supporting_feature');
+	       my $protein = $sf->feature2;
+	       if( $protein->strand < 0 ) {
+		   $self->element({'Name' => 'Hsp_hit-from',
+				   'Data' => $protein->end});
+		   $self->element({'Name' => 'Hsp_hit-to',
+				   'Data' => $protein->start});
+	       } else { 
+		   $self->element({'Name' => 'Hsp_hit-from',
+				   'Data' => $protein->start});
+		   $self->element({'Name' => 'Hsp_hit-to',
+				   'Data' => $protein->end});
+	       }
+	   }
+	   $self->element({'Name' => 'Hsp_identity',
+			   'Data' => 0});
+	   $self->element({'Name' => 'Hsp_positive',
+			   'Data' => 0});
+	   $self->end_element({'Name' => 'Hsp'});
+       }
+       $self->end_element({'Name' => 'Hit'});
+   }
+   $self->end_element({'Name' => 'WiseOutput'});
+   return $self->end_document();
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->start_element
+ Function: Handles a start element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub start_element{
+   my ($self,$data) = @_;
+   # we currently don't care about attributes
+   my $nm = $data->{'Name'};
+   my $type = $MODEMAP{$nm};
+
+   if( $type ) {
+       if( $self->_eventHandler->will_handle($type) ) {
+	   my $func = sprintf("start_%s",lc $type);
+	   $self->_eventHandler->$func($data->{'Attributes'});
+       }
+       unshift @{$self->{'_elements'}}, $type;
+
+       if($type eq 'result') {
+	   $self->{'_values'} = {};
+	   $self->{'_result'}= undef;
+       }
+   }
+
+}
+
+=head2 end_element
+
+ Title   : start_element
+ Usage   : $eventgenerator->end_element
+ Function: Handles an end element event
+ Returns : none
+ Args    : hashref with at least 2 keys 'Data' and 'Name'
+
+
+=cut
+
+sub end_element {
+    my ($self,$data) = @_;
+    my $nm = $data->{'Name'};
+    my $type = $MODEMAP{$nm};
+    my $rc;
+
+    if( $type = $MODEMAP{$nm} ) {
+	if( $self->_eventHandler->will_handle($type) ) {
+	    my $func = sprintf("end_%s",lc $type);
+	    $rc = $self->_eventHandler->$func($self->{'_reporttype'},
+					      $self->{'_values'});
+	}
+	shift @{$self->{'_elements'}};
+
+    } elsif( $MAPPING{$nm} ) {
+
+	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
+	    my $key = (keys %{$MAPPING{$nm}})[0];
+	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
+	} else {
+	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
+	}
+    } else {
+	$self->debug( "unknown nm $nm, ignoring\n");
+    }
+    $self->{'_last_data'} = ''; # remove read data if we are at
+				# end of an element
+    $self->{'_result'} = $rc if( defined $type && $type eq 'result' );
+    return $rc;
+}
+
+=head2 element
+
+ Title   : element
+ Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str});
+ Function: Convience method that calls start_element, characters, end_element
+ Returns : none
+ Args    : Hash ref with the keys 'Name' and 'Data'
+
+
+=cut
+
+sub element{
+   my ($self,$data) = @_;
+   $self->start_element($data);
+   $self->characters($data);
+   $self->end_element($data);
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $eventgenerator->characters($str)
+ Function: Send a character events
+ Returns : none
+ Args    : string
+
+
+=cut
+
+sub characters{
+   my ($self,$data) = @_;
+
+   return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
+
+   $self->{'_last_data'} = $data->{'Data'};
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   : if( $eventgenerator->within_element($element) ) {}
+ Function: Test if we are within a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub within_element{
+   my ($self,$name) = @_;
+   return 0 if ( ! defined $name &&
+		 ! defined  $self->{'_elements'} ||
+		 scalar @{$self->{'_elements'}} == 0) ;
+   foreach (  @{$self->{'_elements'}} ) {
+       if( $_ eq $name  ) {
+	   return 1;
+       }
+   }
+   return 0;
+}
+
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   : if( $eventgenerator->in_element($element) ) {}
+ Function: Test if we are in a particular element
+           This is different than 'in' because within can be tested
+           for a whole block.
+ Returns : boolean
+ Args    : string element name
+
+
+=cut
+
+sub in_element{
+   my ($self,$name) = @_;
+   return 0 if ! defined $self->{'_elements'}->[0];
+   return ( $self->{'_elements'}->[0] eq $name)
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $eventgenerator->start_document
+ Function: Handle a start document event
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub start_document{
+    my ($self) = @_;
+    $self->{'_lasttype'} = '';
+    $self->{'_values'} = {};
+    $self->{'_result'}= undef;
+    $self->{'_elements'} = [];
+    $self->{'_reporttype'} = 'exonerate';
+}
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : $eventgenerator->end_document
+ Function: Handles an end document event
+ Returns : Bio::Search::Result::ResultI object
+ Args    : none
+
+
+=cut
+
+sub end_document{
+   my ($self, at args) = @_;
+   return $self->{'_result'};
+}
+
+
+sub write_result {
+   my ($self, $blast, @args) = @_;
+
+   if( not defined($self->writer) ) {
+       $self->warn("Writer not defined. Using a $DEFAULT_WRITER_CLASS");
+       $self->writer( $DEFAULT_WRITER_CLASS->new() );
+   }
+   $self->SUPER::write_result( $blast, @args );
+}
+
+sub result_count {
+    my $self = shift;
+    return $self->{'_result_count'};
+}
+
+sub report_count { shift->result_count }
+
+
+=head2 wise
+
+ Title   : wise
+ Usage   : $obj->wise($newval)
+ Function: Get/Set the Wise object parser
+ Returns : value of wise (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub wise{
+    my $self = shift;
+    return $self->{'wise'} = shift if @_;
+    return $self->{'wise'};
+}
+
+=head2 wisetype
+
+ Title   : wisetype
+ Usage   : $obj->wisetype($newval)
+ Function: Wise program type
+ Returns : value of wisetype (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub wisetype{
+    my $self = shift;
+
+    return $self->{'wisetype'} = shift if @_;
+    return $self->{'wisetype'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SearchIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,474 @@
+# $Id: SearchIO.pm,v 1.39.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::SearchIO
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SearchIO - Driver for parsing Sequence Database Searches 
+(BLAST, FASTA, ...)
+
+=head1 SYNOPSIS
+
+   use Bio::SearchIO;
+   # format can be 'fasta', 'blast', 'exonerate', ...
+   my $searchio = new Bio::SearchIO( -format => 'blastxml',
+                                     -file   => 'blastout.xml' );
+   while ( my $result = $searchio->next_result() ) {
+       while( my $hit = $result->next_hit ) {
+        # process the Bio::Search::Hit::HitI object
+           while( my $hsp = $hit->next_hsp ) { 
+            # process the Bio::Search::HSP::HSPI object
+           }
+       }
+   }
+
+
+=head1 DESCRIPTION
+
+This is a driver for instantiating a parser for report files from
+sequence database searches. This object serves as a wrapper for the
+format parsers in Bio::SearchIO::* - you should not need to ever
+use those format parsers directly. (For people used to the SeqIO
+system it, we are deliberately using the same pattern).
+
+Once you get a SearchIO object, calling next_result() gives you back
+a L<Bio::Search::Result::ResultI> compliant object, which is an object that
+represents one Blast/Fasta/HMMER whatever report.
+
+A list of module names and formats is below:
+
+  blast      BLAST (WUBLAST, NCBIBLAST,bl2seq)   
+  fasta      FASTA -m9 and -m0
+  blasttable BLAST -m9 or -m8 output (NCBI not WUBLAST tabular)
+  megablast  MEGABLAST
+  psl        UCSC PSL format
+  waba       WABA output
+  axt        AXT format
+  sim4       Sim4
+  hmmer      HMMER hmmpfam and hmmsearch
+  exonerate  Exonerate CIGAR and VULGAR format
+  blastxml   NCBI BLAST XML
+  wise       Genewise -genesf format
+
+Also see the SearchIO HOWTO:
+http://bioperl.open-bio.org/wiki/HOWTO:SearchIO
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich & Steve Chervitz
+
+Email jason-at-bioperl.org
+Email sac-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SearchIO;
+use strict;
+
+# Object preamble - inherits from Bio::Root::IO
+
+use Bio::SearchIO::SearchResultEventBuilder;
+
+# Special exception class for exceptions during parsing.
+# End users should not ever see these.
+# For an example of usage, see blast.pm.
+ at Bio::SearchIO::InternalParserError::ISA = qw(Bio::Root::Exception);
+
+use Symbol();
+
+use base qw(Bio::Root::IO Bio::Event::EventGeneratorI Bio::AnalysisParserI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SearchIO();
+ Function: Builds a new Bio::SearchIO object 
+ Returns : Bio::SearchIO initialized with the correct format
+ Args    : -file           => $filename
+           -format         => format
+           -fh             => filehandle to attach to
+           -result_factory => Object implementing Bio::Factory::ObjectFactoryI
+           -hit_factory    => Object implementing Bio::Factory::ObjectFactoryI
+           -hsp_factory    => Object implementing Bio::Factory::ObjectFactoryI
+           -writer         => Object implementing Bio::SearchIO::SearchWriterI
+           -output_format  => output format, which will dynamically load writer
+
+See L<Bio::Factory::ObjectFactoryI>, L<Bio::SearchIO::SearchWriterI>
+
+Any factory objects in the arguments are passed along to the
+SearchResultEventBuilder object which holds these factories and sets
+default ones if none are supplied as arguments.
+
+=cut
+
+sub new {
+  my($caller, at args) = @_;
+  my $class = ref($caller) || $caller;
+    
+  # or do we want to call SUPER on an object if $caller is an
+  # object?
+  if( $class =~ /Bio::SearchIO::(\S+)/ ) {
+    my ($self) = $class->SUPER::new(@args);        
+    $self->_initialize(@args);
+    return $self;
+  } else { 
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+    my $format = $param{'-format'} ||
+      $class->_guess_format( $param{'-file'} || $ARGV[0] ) || 'blast';
+
+    my $output_format = $param{'-output_format'};
+    my $writer = undef;
+
+    if( defined $output_format ) {
+        if( defined $param{'-writer'} ) {
+            my $dummy = Bio::Root::Root->new();
+            $dummy->throw("Both writer and output format specified - not good");
+        }
+
+        if( $output_format =~ /^blast$/i ) {
+            $output_format = 'TextResultWriter';
+        }
+        my $output_module = "Bio::SearchIO::Writer::".$output_format;
+        $class->_load_module($output_module);
+        $writer = $output_module->new(@args);
+        push(@args,"-writer",$writer);
+    }
+
+
+    # normalize capitalization to lower case
+    $format = "\L$format";
+    
+    return unless( $class->_load_format_module($format) );
+    return "Bio::SearchIO::${format}"->new(@args);
+  }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::SearchIO->newFh(-file=>$filename,
+                                      -format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::SearchIO->newFh(-file=>$filename,
+                                      -format=>'Format')
+           $result = <$fh>;   # read a ResultI object
+           print $fh $result; # write a ResultI object
+ Returns : filehandle tied to the Bio::SearchIO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $result = <$fh>;     # read a ResultI object
+           print $fh $result;   # write a ResultI object
+ Returns : filehandle tied to the Bio::SearchIO::Fh class
+ Args    :
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : Bio::SearchIO::EventHandlerI
+
+See L<Bio::SearchIO::EventHandlerI>
+
+=cut
+
+sub attach_EventHandler{
+    my ($self,$handler) = @_;
+    return if( ! $handler );
+    if( ! $handler->isa('Bio::SearchIO::EventHandlerI') ) {
+        $self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::SearchIO::EventHandlerI');
+    }
+    $self->{'_handler'} = $handler;
+    return;
+}
+
+=head2 _eventHandler
+
+ Title   : _eventHandler
+ Usage   : private
+ Function: Get the EventHandler
+ Returns : Bio::SearchIO::EventHandlerI
+ Args    : none
+
+See L<Bio::SearchIO::EventHandlerI>
+
+=cut
+
+sub _eventHandler{
+   my ($self) = @_;
+   return $self->{'_handler'};
+}
+
+sub _initialize {
+    my($self, @args) = @_;
+    $self->{'_handler'} = undef;
+    # not really necessary unless we put more in RootI
+    #$self->SUPER::_initialize(@args);
+
+    # initialize the IO part
+    $self->_initialize_io(@args);
+    $self->attach_EventHandler(new Bio::SearchIO::SearchResultEventBuilder(@args));
+    $self->{'_reporttype'} = '';
+    $self->{_notfirsttime} = 0;
+    my ( $writer ) = $self->_rearrange([qw(WRITER)], @args);
+
+    $self->writer( $writer ) if $writer;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : $result = stream->next_result
+ Function: Reads the next ResultI object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream that
+           are either misformatted or that use syntax not yet understood
+           by the driver. If such an incident is recoverable, e.g., by
+           dismissing a feature of a feature table or some other non-mandatory
+           part of an entry, the driver will issue a warning. In the case
+           of a non-recoverable situation an exception will be thrown.
+           Do not assume that you can resume parsing the same stream after
+           catching the exception. Note that you can always turn recoverable
+           errors into exceptions by calling $stream->verbose(2) (see
+           Bio::Root::RootI POD page).
+ Returns : A Bio::Search::Result::ResultI object
+ Args    : n/a
+
+See L<Bio::Root::RootI>
+
+=cut
+
+sub next_result {
+   my ($self) = @_;
+   $self->throw_not_implemented;
+}
+
+=head2 write_result
+
+ Title   : write_result
+ Usage   : $stream->write_result($result_result, @other_args)
+ Function: Writes data from the $result_result object into the stream.
+         : Delegates to the to_string() method of the associated 
+         : WriterI object.
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Search:Result::ResultI object,
+         : plus any other arguments for the Writer
+ Throws  : Bio::Root::Exception if a Writer has not been set.
+
+See L<Bio::Root::Exception>
+
+=cut
+
+sub write_result {
+   my ($self, $result, @args) = @_;
+
+   if( not ref($self->{'_result_writer'}) ) {
+       $self->throw("ResultWriter not defined.");
+   }
+   @args = $self->{'_notfirsttime'} unless( @args );
+
+   my $str = $self->writer->to_string( $result, @args);
+   $self->{'_notfirsttime'} = 1;
+   # print "Got string: \n$str\n";
+   $self->_print( "$str" ) if defined $str;
+   return 1;
+}
+
+
+=head2 writer
+
+ Title   : writer
+ Usage   : $writer = $stream->writer;
+ Function: Sets/Gets a SearchWriterI object to be used for this searchIO.
+ Returns : 1 for success and 0 for error
+ Args    : Bio::SearchIO::SearchWriterI object (when setting)
+ Throws  : Bio::Root::Exception if a non-Bio::SearchIO::SearchWriterI object
+           is passed in.
+
+=cut
+
+sub writer {
+    my ($self, $writer) = @_;
+    if( ref($writer) and $writer->isa( 'Bio::SearchIO::SearchWriterI' )) {
+        $self->{'_result_writer'} = $writer;
+    }
+    elsif( defined $writer ) {
+        $self->throw("Can't set ResultWriter. Not a Bio::SearchIO::SearchWriterI: $writer");
+    }
+    return $self->{'_result_writer'};
+}
+
+
+=head2 result_count
+
+ Title   : result_count
+ Usage   : $num = $stream->result_count;
+ Function: Gets the number of Blast results that have been parsed.
+ Returns : integer
+ Args    : none
+ Throws  : none
+
+=cut
+
+sub result_count {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL SearchIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example : 
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::SearchIO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+      print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the SearchIO system please see the SearchIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'blast'   if (/\.(blast|t?bl\w)$/i );
+   return 'fasta' if (/\.
+		      (?: t? fas (?:ta)? |
+		       m\d+ |
+		       (?: t? (?: fa |  fx |  fy |  ff |  fs ) ) |
+		       (?: (?:ss | os | ps) (?:earch)? ))
+		      $/ix );
+   return 'blastxml' if ( /\.(blast)?xml$/i);
+   return 'exonerate' if ( /\.exon(erate)?/i );
+}
+
+sub close { 
+    my $self = shift;    
+
+    if( $self->writer ) {
+        $self->_print($self->writer->end_report());
+	$self->{'_result_writer'}= undef;
+    }
+    $self->SUPER::close(@_);
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->close() if defined $self->_fh;
+    $self->SUPER::DESTROY;
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {processor => shift}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'processor'}->next_result() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'processor'}->next_result();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'processor'}->write_result(@_);
+}
+
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/BaseSeqProcessor.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/BaseSeqProcessor.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/BaseSeqProcessor.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,310 @@
+# $Id: BaseSeqProcessor.pm,v 1.7.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::BaseSeqProcessor
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::BaseSeqProcessor - Base implementation for a SequenceProcessor
+
+=head1 SYNOPSIS
+
+    # you need to derive your own processor from this one
+
+=head1 DESCRIPTION
+
+This provides just a basic framework for implementations of
+L<Bio::Factory::SequenceProcessorI>.
+
+Essentially what it does is support a parameter to new() to set
+sequence factory and source stream, and a next_seq() implementation
+that will use a queue to be filled by a class overriding
+process_seq().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::BaseSeqProcessor;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Factory::SequenceProcessorI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Seq::BaseSeqProcessor();
+ Function: Builds a new Bio::Seq::BaseSeqProcessor object 
+ Returns : an instance of Bio::Seq::BaseSeqProcessor
+ Args    : Named parameters. Currently supported are
+             -seqfactory  the Bio::Factory::SequenceFactoryI object to use
+             -source_stream the Bio::Factory::SequenceStreamI object to
+                          which we are chained
+
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+
+    my ($stream,$fact) =
+	$self->_rearrange([qw(SOURCE_STREAM SEQFACTORY)], @args);
+
+    $self->{'_queue'} = [];
+    $self->sequence_factory($fact) if $fact;
+    $self->source_stream($stream) if $stream;
+    
+    return $self;
+}
+
+=head1 L<Bio::Factory::SequenceProcessorI> methods
+
+=cut
+
+=head2 source_stream
+
+ Title   : source_stream
+ Usage   : $obj->source_stream($newval)
+ Function: Get/set the source sequence stream for this sequence
+           processor.
+
+ Example : 
+ Returns : A Bio::Factory::SequenceStreamI compliant object
+ Args    : on set, new value (a Bio::Factory::SequenceStreamI compliant
+           object)
+
+
+=cut
+
+sub source_stream{
+    my $self = shift;
+
+    if(@_) {
+	my $stream = shift;
+	my $fact = $stream->sequence_factory();
+	$self->sequence_factory($fact)
+	    unless $self->sequence_factory() || (! $fact);
+	return $self->{'source_stream'} = $stream;
+    }
+    return $self->{'source_stream'};
+}
+
+=head1 L<Bio::Factory::SequenceStreamI> methods
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = stream->next_seq
+ Function: Reads the next sequence object from the stream and returns it.
+
+           This implementation will obtain objects from the source
+           stream as necessary and pass them to process_seq() for
+           processing. This method will return the objects one at a
+           time that process_seq() returns.
+
+ Returns : a Bio::Seq sequence object
+ Args    : none
+
+See L<Bio::Factory::SequenceStreamI::next_seq>
+
+=cut
+
+sub next_seq{
+    my $self = shift;
+    my $seq;
+
+    # if the queue is empty, fetch next from source and process it
+    if(@{$self->{'_queue'}} == 0) {
+	my @seqs = ();
+	while($seq = $self->source_stream->next_seq()) {
+	    @seqs = $self->process_seq($seq);
+	    # we may get zero seqs returned
+	    last if @seqs;
+	}
+	push(@{$self->{'_queue'}}, @seqs) if @seqs;
+    }
+    # take next from the queue of seqs
+    $seq = shift(@{$self->{'_queue'}});
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: Writes the result(s) of processing the sequence object into
+           the stream.
+
+           You need to override this method in order not to alter
+           (process) sequence objects before output.
+
+ Returns : 1 for success and 0 for error. The method stops attempting
+           to write objects after the first error returned from the
+           source stream. Otherwise the return value is the value
+           returned from the source stream from writing the last
+           object resulting from processing the last sequence object
+           given as argument.
+
+ Args    : Bio::SeqI object, or an array of such objects
+
+=cut
+
+sub write_seq{
+    my ($self, @seqs) = @_;
+    my $ret;
+    foreach my $seq (@seqs) {
+        foreach my $processed ($self->process_seq($seq)) {
+            $ret = $self->source_stream->write_seq($seq);
+            return unless $ret;
+        }
+    }
+    return $ret;
+}
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : none
+
+
+=cut
+
+sub sequence_factory{
+    my $self = shift;
+
+    return $self->{'sequence_factory'} = shift if @_;
+    return $self->{'sequence_factory'};
+}
+
+=head2 object_factory
+
+ Title   : object_factory
+ Usage   : $obj->object_factory($newval)
+ Function: This is an alias to sequence_factory with a more generic name.
+ Example : 
+ Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
+ Args    : on set, new value (a L<Bio::Factory::ObjectFactoryI> 
+           compliant object or undef, optional)
+
+
+=cut
+
+sub object_factory{
+    return shift->sequence_factory(@_);
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Closes the stream. We override this here in order to cascade
+           to the source stream.
+ Example :
+ Returns : 
+ Args    : none
+
+
+=cut
+
+sub close{
+    my $self = shift;
+    return $self->source_stream() ? $self->source_stream->close(@_) : 1;
+}
+
+=head1 To be overridden by a derived class
+
+=cut
+
+=head2 process_seq
+
+ Title   : process_seq
+ Usage   :
+ Function: This is the method that is supposed to do the actual
+           processing. It needs to be overridden to do what you want
+           it to do.
+
+           Generally, you do not have to override or implement any other
+           method to derive your own sequence processor.
+
+           The implementation provided here just returns the unaltered
+           input sequence and hence is not very useful other than
+           serving as a neutral default processor.
+
+ Example :
+ Returns : An array of zero or more Bio::PrimarySeqI (or derived
+           interface) compliant object as the result of processing the
+           input sequence.
+ Args    : A Bio::PrimarySeqI (or derived interface) compliant object
+           to be processed.
+
+
+=cut
+
+sub process_seq{
+    my ($self,$seq) = @_;
+
+    return ($seq);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/EncodedSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/EncodedSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/EncodedSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,593 @@
+# $Id: EncodedSeq.pm,v 1.11.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::EncodedSeq
+#
+# Cared for by Aaron Mackey
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::EncodedSeq - subtype of L<Bio::LocatableSeq|Bio::LocatableSeq> to store DNA that encodes a protein
+
+=head1 SYNOPSIS
+
+  $obj = new Bio::Seq::EncodedSeq(-seq => $dna,
+                                  -encoding => "CCCCCCCIIIIICCCCC",
+                                  -start => 1,
+                                  -strand => 1,
+                                  -length => 17);
+
+  # splice out (and possibly revcomp) the coding sequence
+  $cds = obj->cds;
+
+  # obtain the protein translation of the sequence
+  $prot = $obj->translate;
+
+  # other access/inspection routines as with Bio::LocatableSeq and
+  # Bio::SeqI; note that coordinates are relative only to the DNA
+  # sequence, not it's implicit encoded protein sequence.
+
+=head1 DESCRIPTION
+
+Bio::Seq::EncodedSeq is a L<Bio::LocatableSeq|Bio::LocatableSeq>
+object that holds a DNA sequence as well as information about the
+coding potential of that DNA sequence.  It is meant to be useful in an
+alignment context, where the DNA may contain frameshifts, gaps and/or
+introns, or in describing the transcript of a gene.  An EncodedSeq
+provides the ability to access the "spliced" coding sequence, meaning
+that all introns and gaps are removed, and any frameshifts are
+adjusted to provide a "clean" CDS.
+
+In order to make simultaneous use of either the DNA or the implicit
+encoded protein sequence coordinates, please see
+L<Bio::Coordinate::EncodingPair>.
+
+=head1 ENCODING
+
+We use the term "encoding" here to refer to the series of symbols that
+we use to identify which residues of a DNA sequence are protein-coding
+(i.e. part of a codon), intronic, part of a 5' or 3', frameshift
+"mutations", etc.  From this information, a Bio::Seq::EncodedSeq is
+able to "figure out" its translational CDS.  There are two sets of
+coding characters, one termed "implicit" and one termed "explicit".
+
+The "implicit" encoding is a bit simpler than the "explicit" encoding:
+'C' is used for any nucleotide that's part of a codon, 'U' for any
+UTR, etc.  The full list is shown below:
+
+ Code  Meaning
+ ----  -------
+  C    coding
+  I    intronic
+  U    untranslated
+  G    gapped (for use in alignments)
+  F    a "forward", +1 frameshift
+  B    a "backward", -1 frameshift
+
+The "explicit" encoding is just an expansion of the "implicit"
+encoding, to denote phase:
+
+ Code  Meaning
+ ----  -------
+  C    coding, 1st codon position
+  D    coding, 2nd codon position
+  E    coding, 3rd codon position
+
+  I    intronic, phase 0 (relative to intron begin)
+  J    intronic, phase 1
+  K    intronic, phase 2
+
+  U    untranslated 3'UTR
+  V    untranslated 5'UTR
+
+  G    gapped (for use in alignments)
+  F    a "forward", +1 frameshift
+  B    a "backward", -1 frameshift
+
+Note that the explicit coding is meant to provide easy access to
+position/phase specific nucleotides:
+
+  $obj = new Bio::Seq::EncodedSeq (-seq => "ACAATCAGACTACG...",
+                                   -encoding => "CCCCCCIII..."
+                                  );
+
+  # fetch arrays of nucleotides at each codon position:
+  my @pos1 = $obj->dnaseq(encoding => 'C', explicit => 1);
+  my @pos2 = $obj->dnaseq(encoding => 'D');
+  my @pos3 = $obj->dnaseq(encoding => 'E');
+
+  # fetch arrays of "3-1" codon dinucleotides, useful for genomic
+  # signature analyses without compounding influences of codon bias:
+  my @pairs = $obj->dnaseq(encoding => 'EC');
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Seq::EncodedSeq;
+
+use strict;
+
+
+use base qw(Bio::LocatableSeq);
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj = Bio::Seq::EncodedSeq->new(-seq      => "AGTACGTGTCATG",
+                                            -encoding => "CCCCCCFCCCCCC",
+                                            -id       => "myseq",
+                                            -start    => 1,
+                                            -end      => 13,
+                                            -strand   => 1
+                                      );
+ Function: creates a new Bio::Seq::EncodedSeq object from a supplied DNA
+           sequence
+ Returns : a new Bio::Seq::EncodedSeq object
+
+ Args    : seq      - primary nucleotide sequence used to encode the
+                      protein; note that any positions involved in a
+                      gap ('G') or backward frameshift ('B') should
+                      have one or more gap characters; if the encoding
+                      specifies G or B, but no (or not enough) gap
+                      characters exist, *they'll be added*; similary,
+                      if there are gap characters without a
+                      corresponding G or B encoding, G's will be
+                      inserted into the encoding.  This allows some
+                      flexibility in specifying your sequence and
+                      coding without having to calculate a lot of the
+                      encoding for yourself.
+
+           encoding - a string of characters (see Encoding Table)
+                      describing backwards frameshifts implied by the
+                      encoding but not present in the sequence will be
+                      added (as '-'s) to the sequence.  If not
+                      supplied, it will be assumed that all positions
+                      are coding (C).  Encoding may include either
+                      implicit phase encoding characters (i.e. "CCC")
+                      and/or explicit encoding characters (i.e. "CDE").
+                      Additionally, prefixed numbers may be used to
+                      denote repetition (i.e. "27C3I28C").
+
+                      Alternatively, encoding may be a hashref
+                      datastructure, with encoding characters as keys
+                      and Bio::LocationI objects (or arrayrefs of
+                      Bio::LocationI objects) as values, e.g.:
+
+                      { C => [ Bio::Location::Simple->new(1,9),
+                               Bio::Location::Simple->new(11,13) ],
+                        F => Bio::Location::Simple->new(10,10),
+                      } # same as "CCCCCCCCCFCCC"
+
+                      Note that if the location ranges overlap, the
+                      behavior of the encoding will be undefined
+                      (well, it will be defined, but only according to
+                      the order in which the hash keys are read, which
+                      is basically undefined ... just don't do that).
+
+           id, start, end, strand - as with Bio::LocatableSeq; note
+                      that the coordinates are relative to the
+                      encoding DNA sequence, not the implicit protein
+                      sequence.  If strand is reversed, then the
+                      encoding is assumed to be relative to the
+                      reverse strand as well.
+
+=cut
+
+#'
+
+sub new {
+
+    my ($self, @args) = @_;
+    $self = $self->SUPER::new(@args, -alphabet => 'dna');
+    my ($enc) = $self->_rearrange([qw(ENCODING)], @args);
+    # set the real encoding:
+    if ($enc) {
+	$self->encoding($enc);
+    } else {
+	$self->_recheck_encoding;
+    }
+    return $self;
+}
+
+=head2 encoding
+
+ Title   : encoding
+ Usage   : $obj->encoding("CCCCCC");
+           $obj->encoding( -encoding => { I => $location } );
+           $enc = $obj->encoding(-explicit => 1);
+           $enc = $obj->encoding("CCCCCC", -explicit => 1);
+           $enc = $obj->encoding(-location => $location,
+                                 -explicit => 1,
+                                 -absolute => 1 );
+ Function: get/set the objects encoding, either globally or by location(s).
+ Returns : the (possibly new) encoding string.
+ Args    : encoding - see the encoding argument to the new() function.
+
+           explicit - whether or not to return explicit phase
+                      information in the coding (i.e. "CCC" becomes
+                      "CDE", "III" becomes "IJK", etc); defaults to 0.
+
+           location - optional; location to get/set the encoding.
+                      Defaults to the entire sequence.
+
+           absolute - whether or not the locational elements (either
+                      in the encoding hashref or the location
+                      argument) are relative to the absolute start/end
+                      of the Bio::LocatableSeq, or to the internal,
+                      relative coordinate system (beginning at 1);
+                      defaults to 0 (i.e. relative)
+
+=cut
+
+sub encoding {
+
+    my ($self, @args) = @_;
+    my ($enc, $loc, $exp, $abs) = $self->_rearrange([qw(ENCODING LOCATION EXPLICIT ABSOLUTE)], @args);
+
+    if (!$enc) {
+	# do nothing; _recheck_encoding will fix for us, if necessary
+    } elsif (ref $enc eq 'HASH') {
+	$self->throw( -class => 'Bio::Root::NotImplemented',
+		      -text  => "Hashref functionality not yet implemented;\nplease email me if you really need this.");
+	#TODO: finish all this
+	while (my ($char, $locs) = each %$enc) {
+	    if (ref $locs eq 'ARRAY') {
+	    } elsif (UNIVERSAL::isa($locs, "Bio::LocationI")) {
+	    } else {
+		$self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}");
+	    }
+	}
+    } elsif (! ref $enc) {
+	$enc = uc $enc;
+	$exp = 1 if (!defined $exp && $enc =~ m/[DEJKV]/o);
+
+	if ($enc =~ m/\d/o) { # numerically "enhanced" encoding
+	    my $numenc = $enc;
+	    $enc = '';
+	    while ($numenc =~ m/\G(\d*)([CDEIJKUVGFB])/g) {
+		my ($num, $char) = ($1, $2);
+		$num = 1 unless $num;
+		$enc .= $char x $num;
+	    }
+	}
+
+	if (defined $exp && $exp == 0 && $enc =~ m/([^CIUGFB])/) {
+	    $self->throw("Unrecognized character '$1' in implicit encoding");
+	} elsif ($enc =~ m/[^CDEIJKUVGFB]/) {
+	    $self->throw("Unrecognized character '$1' in explicit encoding");
+	}
+
+	if ($loc) { # a global location, over which to apply the specified encoding.
+
+	    # balk if too many non-gap characters present in encoding:
+	    my ($ct) = $enc =~ tr/GB/GB/;
+	    $ct = length($enc) - $ct;
+	    $self->throw("Location length doesn't match number of coding chars in encoding!")
+		if ($loc->location_type eq 'EXACT' &&  $loc->length != $ct);
+
+	    my $start = $loc->start;
+	    my $end = $loc->end;
+
+	    # strip any encoding that hangs off the ends of the sequence:
+	    if ($start < $self->start) {
+		my $diff = $self->start - $start;
+		$start = $self->start;
+		$enc = substr($enc, $diff);
+	    }
+	    if ($end > $self->end) {
+		my $diff = $end - $self->end;
+		$end = $self->end;
+		$enc = substr($enc, -$diff);
+	    }
+
+	    my $currenc = $self->{_encoding};
+	    my $currseq = $self->seq;
+
+	    my ($spanstart, $spanend) = ($self->column_from_residue_number($start),
+					 $self->column_from_residue_number($end) );
+
+	    if ($currseq) {
+		# strip any gaps in sequence spanned by this location:
+		($spanstart, $spanend) = ($spanend, $spanstart) if $self->strand < 0;
+		my ($before, $in, $after) = $currseq =~ m/(.{@{[ $spanstart - ($loc->location_type eq 'IN-BETWEEN' ? 0 : 1) ]}})
+                                                          (.{@{[ $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1) ]}})
+                                                          (.*)
+                                                         /x;
+                $in ||= '';
+		$in =~ s/[\.\-]+//g;
+		$currseq = ($before||'') . $in. ($after||'');
+		# change seq without changing the alphabet
+		$self->seq($currseq,$self->alphabet());
+	    }
+
+	    $currenc = reverse $currenc if $self->strand < 0;
+	    substr($currenc, $spanstart, $spanend - $spanstart + ($loc->location_type eq 'IN-BETWEEN' ? -1 : 1),
+		   $self->strand >= 0 ? $enc : reverse $enc);
+	    $currenc = reverse $currenc if $self->strand < 0;
+
+	    $self->{_encoding} = $currenc;
+	    $self->_recheck_encoding;
+
+	    $currenc = $self->{_encoding};
+	    $currenc = reverse $currenc if $self->strand < 0;
+	    $enc = substr($currenc, $spanstart, length $enc);
+	    $enc = reverse $enc if $self->strand < 0;
+
+	    return $exp ? $enc: $self->_convert2implicit($enc);
+
+	} else {
+	    # presume a global redefinition; strip any current gap
+	    # characters in the sequence so they don't corrupt the
+	    # encoding
+	    my $dna = $self->seq;
+	    $dna =~ s/[\.\-]//g;
+	    $self->seq($dna, 'dna');
+	    $self->{_encoding} = $enc;
+	}
+    } else {
+	$self->throw("Only a scalar or a ref to a hash; not a ref to a @{{lc ref $enc}}");
+    }
+
+    $self->_recheck_encoding();
+
+    return $exp ? $self->{_encoding} : $self->_convert2implicit($self->{_encoding});
+}
+
+sub _convert2implicit {
+
+    my ($self, $enc) = @_;
+
+    $enc =~ s/[DE]/C/g;
+    $enc =~ s/[JK]/I/g;
+    $enc =~ s/V/U/g;
+
+    return $enc;
+}
+
+sub _recheck_encoding {
+
+    my $self = shift;
+
+    my @enc = split //, ($self->{_encoding} || '');
+
+    my @nt = split(//, $self->SUPER::seq);
+    @nt = reverse @nt if $self->strand && $self->strand < 0;
+
+    # make sure an encoding exists!
+    @enc = ('C') x scalar grep { !/[\.\-]/o } @nt
+	unless @enc;
+
+    # check for gaps to be truly present in the sequence
+    # and vice versa
+    my $i;
+    for ($i = 0 ; $i < @nt && $i < @enc ; $i++) {
+	if ($nt[$i] =~ /[\.\-]/o && $enc[$i] !~ m/[GB]/o) {
+	    splice(@enc, $i, 0, 'G');
+	} elsif ($nt[$i] !~ /[\.\-]/o && $enc[$i] =~ m/[GB]/o) {
+	    splice(@nt, $i, 0, '-');
+	}
+    }
+    if ($i < @enc) {
+	# extra encoding; presumably all gaps?
+	for (  ; $i < @enc ; $i++) {
+	    if ($enc[$i] =~ m/[GB]/o) {
+		push @nt, '-';
+	    } else {
+		$self->throw("Extraneous encoding info: " . join('', @enc[$i..$#enc]));
+	    }
+	}
+    } elsif ($i < @nt) {
+	for (  ; $i < @nt ; $i++) {
+	    if ($nt[$i] =~ m/[\.\-]/o) {
+		push @enc, 'G';
+	    } else {
+		push @enc, 'C';
+	    }
+	}
+    }
+
+    my @cde_array = qw(C D E);
+    my @ijk_array = qw(I J K);
+    # convert any leftover implicit coding into explicit coding
+    my ($Cct, $Ict, $Uct, $Vct, $Vwarned) = (0, 0, 0, 0);
+    for ($i = 0 ; $i < @enc ; $i++) {
+	if ($enc[$i] =~ m/[CDE]/o) {
+	    my  $temp_index = $Cct %3;
+	    $enc[$i] = $cde_array[$temp_index];
+	    $Cct++; $Ict = 0; $Uct = 1;
+	    $self->warn("3' untranslated encoding (V) seen prior to other coding symbols")
+		if ($Vct && !$Vwarned++);
+	} elsif ($enc[$i] =~ m/[IJK]/o) {
+	    $enc[$i] = $ijk_array[$Ict % 3];
+	    $Ict++; $Uct = 1;
+	    $self->warn("3' untranslated encoding (V) seen before other coding symbols")
+		if ($Vct && !$Vwarned++);
+	} elsif ($enc[$i] =~ m/[UV]/o) {
+	    if ($Uct == 1) {
+		$enc[$i] = 'V';
+		$Vct = 1;
+	    }
+	} elsif ($enc[$i] eq 'B') {
+	    $Cct++; $Ict++
+	} elsif ($enc[$i] eq 'G') {
+	    # gap; leave alone
+	}
+    }
+
+    @nt = reverse @nt if $self->strand && $self->strand < 0;
+
+    $self->{'seq'} = join('', @nt);
+    # $self->seq(join('', @nt), 'dna');
+    $self->{_encoding} = join '', @enc;
+}
+
+=head2 cds
+
+ Title   : cds
+ Usage   : $cds = $obj->cds(-nogaps => 1);
+ Function: obtain the "spliced" DNA sequence, by removing any
+           nucleotides that participate in an UTR, forward frameshift
+           or intron, and replacing any unknown nucleotide implied by
+           a backward frameshift or gap with N's.
+ Returns : a Bio::Seq::EncodedSeq object, with an encoding consisting only
+           of "CCCC..".
+ Args    : nogaps - strip any gap characters (resulting from 'G' or 'B'
+           encodings), rather than replacing them with N's.
+
+=cut
+
+sub cds {
+
+    my ($self, @args) = @_;
+
+    my ($nogaps, $loc) = $self->_rearrange([qw(NOGAPS LOCATION)], @args);
+    $nogaps = 0 unless defined $nogaps;
+
+    my @nt = split //, $self->strand < 0 ? $self->revcom->seq : $self->seq;
+    my @enc = split //, $self->_convert2implicit($self->{_encoding});
+
+    my ($start, $end) = (0, scalar @nt);
+
+    if ($loc) {
+	$start = $loc->start;
+	$start++ if $loc->location_type eq 'IN-BETWEEN';
+	$start = $self->column_from_residue_number($start);
+	$start--;
+
+	$end = $loc->end;
+	$end = $self->column_from_residue_number($end);
+
+	($start, $end) = ($end, $start) if $self->strand < 0;
+	$start--;
+    }
+
+    for (my $i = $start ; $i < $end ; $i++) {
+	if ($enc[$i] eq 'I' || $enc[$i] eq 'U' || $enc[$i] eq 'F') {
+	    # remove introns, untranslated and forward frameshift nucleotides
+	    $nt[$i] = undef;
+	} elsif ($enc[$i] eq 'G' || $enc[$i] eq 'B') {
+	    # replace gaps and backward frameshifts with N's, unless asked not to.
+	    $nt[$i] = $nogaps ? undef : 'N';
+	}
+    }
+
+    return ($self->can_call_new ? ref($self) : __PACKAGE__)->new
+	(-seq => join('', grep { defined } @nt[$start..--$end]),
+	 -start => $self->start,
+	 -end => $self->end,
+	 -strand => 1, -alphabet => 'dna');
+}
+
+=head2 translate
+
+ Title   : translate
+ Usage   : $prot = $obj->translate(@args);
+ Function: obtain the protein sequence encoded by the underlying DNA
+           sequence; same as $obj->cds()->translate(@args).
+ Returns : a Bio::PrimarySeq object.
+ Args    : same as the translate() function of Bio::PrimarySeqI
+
+=cut
+
+sub translate { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_) };
+
+=head2 protseq
+
+ Title   : seq
+ Usage   : $protseq = $obj->protseq();
+ Function: obtain the raw protein sequence encoded by the underlying
+           DNA sequence; This is the same as calling
+           $obj->translate()->seq();
+ Returns : a string of single-letter amino acid codes
+ Args :    same as the seq() function of Bio::PrimarySeq; note that this
+           function may not be used to set the protein sequence; see
+           the dnaseq() function for that.
+
+=cut
+
+sub protseq { shift->cds(-nogaps => 1, @_)->SUPER::translate(@_)->seq };
+
+=head2 dnaseq
+
+ Title   : dnaseq
+ Usage   : $dnaseq = $obj->dnaseq();
+           $obj->dnaseq("ACGTGTCGT", "CCCCCCCCC");
+           $obj->dnaseq(-seq      => "ATG",
+                        -encoding => "CCC",
+                        -location => $loc );
+           @introns = $obj->$dnaseq(-encoding => 'I')
+ Function: get/set the underlying DNA sequence; will overwrite any
+           current DNA and/or encoding information present.
+ Returns : a string of single-letter nucleotide codes, including any
+           gaps implied by the encoding.
+ Args    : seq      - the DNA sequence to be used as a replacement
+           encoding - the encoding of the DNA sequence (see the new()
+                      constructor); defaults to all 'C' if setting a
+                      new DNA sequence.  If no new DNA sequence is
+                      being provided, then the encoding is used as a
+                      "filter" for which to return fragments of
+                      non-overlapping DNA that match the encoding.
+           location - optional, the location of the DNA sequence to
+                      get/set; defaults to the entire sequence.
+
+=cut
+
+sub dnaseq {
+
+    my ($self, @args) = @_;
+    my ($seq, $enc, $loc) = $self->_rearrange([qw(DNASEQ ENCODING LOCATION)], @args);
+
+    $self
+
+}
+
+# need to overload this so that we truncate both the seq and the encoding!
+sub trunc {
+
+    my ($self, $start, $end) = @_;
+    my $new = $self->SUPER::trunc($start, $end);
+    $start--;
+    my $enc = $self->{_encoding};
+    $enc = reverse $enc if $self->strand < 0;
+    $enc = substr($enc, $start, $end - $start);
+    $enc = reverse $enc if $self->strand < 0;
+    $new->encoding($enc);
+    return $new;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeLocatableSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeLocatableSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeLocatableSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,360 @@
+
+# BioPerl module for Bio::Seq::LargeLocatableSeq
+#
+# Cared for by Albert Vilella
+#
+#	based on the Bio::LargePrimarySeq module
+#       by Ewan Birney <birney at sanger.ac.uk>
+#
+#       and the Bio::LocatableSeq module
+#       by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Albert Vilella
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
+files in the tempdir
+
+=head1 SYNOPSIS
+
+  # normal primary seq usage
+    use Bio::Seq::LargeLocatableSeq;
+    my $seq = new Bio::Seq::LargeLocatableSeq(-seq => "CAGT-GGT",
+				              -id  => "seq1",
+				              -start => 1,
+				              -end   => 7);
+
+=head1 DESCRIPTION
+
+Bio::Seq::LargeLocatableSeq - object with start/end points on it that
+can be projected into a MSA or have coordinates relative to another
+seq.
+
+This object, unlike Bio::LocatableSeq, stores a sequence as a series
+of files in a temporary directory. The aim is to allow someone the
+ability to store very large sequences (eg, E<gt> 100MBases) in a file
+system without running out of memory (eg, on a 64 MB real memory
+machine!).
+
+Of course, to actually make use of this functionality, the programs
+which use this object B<must> not call $primary_seq-E<gt>seq otherwise
+the entire sequence will come out into memory and probably crash your
+machine. However, calls like $primary_seq-E<gt>subseq(10,100) will cause
+only 90 characters to be brought into real memory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Albert Vilella
+
+Email avilella-AT-gmail-DOT-com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Seq::LargeLocatableSeq;
+use vars qw($AUTOLOAD);
+use strict;
+
+
+use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Seq::LargeLocatableSeq();
+ Function: Builds a new Bio::Seq::LargeLocatableSeq object
+ Returns : an instance of Bio::Seq::LargeLocatableSeq
+ Args    :
+
+
+=cut
+
+sub new {
+    my ($class, %params) = @_;
+
+    # don't let PrimarySeq set seq until we have
+    # opened filehandle
+
+    my $seq = $params{'-seq'} || $params{'-SEQ'};
+    if($seq ) {
+	delete $params{'-seq'};
+	delete $params{'-SEQ'};
+    }
+    my $self = $class->SUPER::new(%params);
+    $self->_initialize_io(%params);
+    my $tempdir = $self->tempdir( CLEANUP => 1);
+    my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
+
+    $tfh     && $self->_fh($tfh);
+    $file    && $self->_filename($file);
+    $self->length(0);
+    $seq && $self->seq($seq);
+
+    return $self;
+}
+
+
+=head2 length
+
+ Title   : length
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub length {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'length'} = $value;
+    }
+
+    return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub seq {
+   my ($self, $data) = @_;
+   if( defined $data ) {
+       if( $self->length() == 0) {
+	   $self->add_sequence_as_string($data);
+       } else {
+	   $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
+       }
+   }
+   return $self->subseq(1,$self->length);
+}
+
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub subseq{
+   my ($self,$start,$end) = @_;
+   my $string;
+   my $fh = $self->_fh();
+
+   if( ref($start) && $start->isa('Bio::LocationI') ) {
+       my $loc = $start;
+       if( $loc->length == 0 ) {
+	   $self->warn("Expect location lengths to be > 0");
+	   return '';
+       } elsif( $loc->end < $loc->start ) {
+	   # what about circular seqs
+	   $self->warn("Expect location start to come before location end");
+       }
+       my $seq = '';
+       if( $loc->isa('Bio::Location::SplitLocationI') ) {
+	   foreach my $subloc ( $loc->sub_Location ) {
+	       if(! seek($fh,$subloc->start() - 1,0)) {
+		   $self->throw("Unable to seek on file $start:$end $!");
+	       }
+	       my $ret = read($fh, $string, $subloc->length());
+	       if( !defined $ret ) {
+		   $self->throw("Unable to read $start:$end $!");
+	       }
+	       if( $subloc->strand < 0 ) {
+# 		   $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
+		   $string = Bio::Seq::LargePrimarySeq->new(-seq => $string)->revcom()->seq();
+	       }
+	       $seq .= $string;		
+	   }
+       } else {
+	   if(! seek($fh,$loc->start()-1,0)) {
+	       $self->throw("Unable to seek on file ".$loc->start.":".
+			    $loc->end ." $!");
+	   }
+	   my $ret = read($fh, $string, $loc->length());
+	   if( !defined $ret ) {
+	       $self->throw("Unable to read ".$loc->start.":".
+			    $loc->end ." $!");
+	   }
+	   $seq = $string;
+       }
+       if( defined $loc->strand &&
+	   $loc->strand < 0 ) {
+# 	   $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
+	   $seq = Bio::Seq::LargePrimarySeq->new(-seq => $seq)->revcom()->seq();
+       }
+       return $seq;
+   }
+   if( $start <= 0 || $end > $self->length ) {
+       $self->throw("Attempting to get a subseq out of range $start:$end vs ".
+		    $self->length);
+   }
+   if( $end < $start ) {
+       $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
+   }
+
+   if(! seek($fh,$start-1,0)) {
+       $self->throw("Unable to seek on file $start:$end $!");
+   }
+   my $ret = read($fh, $string, $end-$start+1);
+   if( !defined $ret ) {
+       $self->throw("Unable to read $start:$end $!");
+   }
+   return $string;
+}
+
+
+=head2 add_sequence_as_string
+
+ Title   : add_sequence_as_string
+ Usage   : $seq->add_sequence_as_string("CATGAT");
+ Function: Appends additional residues to an existing LargeLocatableSeq object.
+           This allows one to build up a large sequence without storing
+           entire object in memory.
+ Returns : Current length of sequence
+ Args    : string to append
+
+=cut
+
+sub add_sequence_as_string{
+   my ($self,$str) = @_;
+   my $len = $self->length + CORE::length($str);
+   my $fh = $self->_fh();
+   if(! seek($fh,0,2)) {
+       $self->throw("Unable to seek end of file: $!");
+   }
+   $self->_print($str);
+   $self->length($len);
+}
+
+
+=head2 _filename
+
+ Title   : _filename
+ Usage   : $obj->_filename($newval)
+ Function:
+ Example :
+ Returns : value of _filename
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filename{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_filename'} = $value;
+    }
+    return $obj->{'_filename'};
+
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : $obj->alphabet($newval)
+ Function:
+ Example :
+ Returns : value of alphabet
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub alphabet{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->SUPER::alphabet($value);
+    }
+    return $self->SUPER::alphabet() || 'dna';
+
+}
+
+
+=head2 end
+
+ Title   : end
+ Usage   : $obj->end($newval)
+ Function:
+ Returns : value of end
+ Args    : newvalue (optional)
+
+=cut
+
+sub end {
+   my $self = shift;
+   if( @_ ) {
+      my $value = shift;
+      my $string = $self->seq;
+      if ($self->seq) {
+          my $len = $self->_ungapped_len;
+	  my $id = $self->id;
+	  $self->warn("In sequence $id residue count gives end value $len.
+Overriding value [$value] with value $len for Bio::LargeLocatableSeq::end().")
+	      and $value = $len if $len != $value and $self->verbose > 0;
+      }
+
+      $self->{'end'} = $value;
+    }
+
+   return $self->{'end'} || $self->_ungapped_len;
+}
+
+
+sub DESTROY {
+    my $self = shift;
+    my $fh = $self->_fh();
+    close($fh) if( defined $fh );
+    # this should be handled by Tempfile removal, but we'll unlink anyways.
+    unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
+    $self->SUPER::DESTROY();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargePrimarySeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargePrimarySeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargePrimarySeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,302 @@
+# $Id: LargePrimarySeq.pm,v 1.32.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::LargePrimarySeq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+#
+# updated to utilize File::Temp - jason 2000-12-12
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
+files in the tempdir (as found by File::Temp) or the default method in
+Bio::Root::Root
+
+=head1 SYNOPSIS
+
+  # normal primary seq usage
+
+=head1 DESCRIPTION
+
+This object stores a sequence as a series of files in a temporary
+directory. The aim is to allow someone the ability to store very large
+sequences (eg, E<gt> 100MBases) in a file system without running out
+of memory (eg, on a 64 MB real memory machine!).
+
+Of course, to actually make use of this functionality, the programs
+which use this object B<must> not call $primary_seq-E<gt>seq otherwise
+the entire sequence will come out into memory and probably paste your
+machine. However, calls $primary_seq-E<gt>subseq(10,100) will cause
+only 90 characters to be brought into real memory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, Jason Stajich
+
+Email birney at ebi.ac.uk
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::LargePrimarySeq;
+use vars qw($AUTOLOAD);
+use strict;
+
+
+use base qw(Bio::PrimarySeq Bio::Root::IO Bio::Seq::LargeSeqI);
+
+sub new {
+    my ($class, %params) = @_;
+    
+    # don't let PrimarySeq set seq until we have 
+    # opened filehandle
+
+    my $seq = $params{'-seq'} || $params{'-SEQ'};
+    if($seq ) {
+	delete $params{'-seq'};
+	delete $params{'-SEQ'};
+    }
+    my $self = $class->SUPER::new(%params);
+    $self->_initialize_io(%params);
+    my $tempdir = $self->tempdir( CLEANUP => 1);
+    my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
+
+    $tfh     && $self->_fh($tfh);
+    $file    && $self->_filename($file);    
+    $self->length(0);
+    $seq && $self->seq($seq); 
+
+    return $self;
+}
+
+
+=head2 length
+
+ Title   : length
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub length {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'length'} = $value;
+    }
+    return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub seq {
+   my ($self, $data) = @_;   
+   if( defined $data ) {
+       if( $self->length() == 0) {
+	   $self->add_sequence_as_string($data);
+       } else { 
+	   $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
+       }
+   } 
+   return $self->subseq(1,$self->length);
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub subseq{
+   my ($self,$start,$end) = @_;
+   my $string;
+   my $fh = $self->_fh();
+
+   if( ref($start) && $start->isa('Bio::LocationI') ) {
+       my $loc = $start;
+       if( $loc->length == 0 ) { 
+	   $self->warn("Expect location lengths to be > 0");
+	   return '';
+       } elsif( $loc->end < $loc->start ) { 
+	   # what about circular seqs
+	   $self->warn("Expect location start to come before location end");
+       }
+       my $seq = '';
+       if( $loc->isa('Bio::Location::SplitLocationI') ) {
+	   foreach my $subloc ( $loc->sub_Location ) {
+	       if(! seek($fh,$subloc->start() - 1,0)) {
+		   $self->throw("Unable to seek on file $start:$end $!");
+	       }
+	       my $ret = read($fh, $string, $subloc->length());
+	       if( !defined $ret ) {
+		   $self->throw("Unable to read $start:$end $!");
+	       }
+	       if( $subloc->strand < 0 ) { 
+		   $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
+	       }
+	       $seq .= $string;
+	   }
+       } else { 
+	   if(! seek($fh,$loc->start()-1,0)) {
+	       $self->throw("Unable to seek on file ".$loc->start.":".
+			    $loc->end ." $!");
+	   }
+	   my $ret = read($fh, $string, $loc->length());
+	   if( !defined $ret ) {
+	       $self->throw("Unable to read ".$loc->start.":".
+			    $loc->end ." $!");
+	   }
+	   $seq = $string;
+       }
+       if( defined $loc->strand && 
+	   $loc->strand < 0 ) { 
+	   $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
+       }
+       return $seq;
+   }
+   if( $start <= 0 || $end > $self->length ) {
+       $self->throw("Attempting to get a subseq out of range $start:$end vs ".
+		    $self->length);
+   }
+   if( $end < $start ) {
+       $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
+   }
+
+   if(! seek($fh,$start-1,0)) {
+       $self->throw("Unable to seek on file $start:$end $!");
+   }
+   my $ret = read($fh, $string, $end-$start+1);
+   if( !defined $ret ) {
+       $self->throw("Unable to read $start:$end $!");
+   }
+   return $string;
+}
+
+=head2 add_sequence_as_string
+
+ Title   : add_sequence_as_string
+ Usage   : $seq->add_sequence_as_string("CATGAT");
+ Function: Appends additional residues to an existing LargePrimarySeq object.
+           This allows one to build up a large sequence without storing
+           entire object in memory.
+ Returns : Current length of sequence
+ Args    : string to append
+
+=cut
+
+sub add_sequence_as_string{
+   my ($self,$str) = @_;
+   my $len = $self->length + CORE::length($str);
+   my $fh = $self->_fh();
+   if(! seek($fh,0,2)) {
+       $self->throw("Unable to seek end of file: $!");
+   }
+   $self->_print($str);
+   $self->length($len);
+}
+
+
+=head2 _filename
+
+ Title   : _filename
+ Usage   : $obj->_filename($newval)
+ Function: 
+ Example : 
+ Returns : value of _filename
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filename{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_filename'} = $value;
+    }
+    return $obj->{'_filename'};
+
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : $obj->alphabet($newval)
+ Function: 
+ Example : 
+ Returns : value of alphabet
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub alphabet{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->SUPER::alphabet($value);
+    }
+    return $self->SUPER::alphabet() || 'dna';
+
+}
+
+sub DESTROY {
+    my $self = shift;
+    my $fh = $self->_fh();
+    close($fh) if( defined $fh );
+    # this should be handled by Tempfile removal, but we'll unlink anyways.
+    unlink $self->_filename()
+        if defined $self->_filename() && -e $self->_filename;
+    $self->SUPER::DESTROY();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,138 @@
+# $Id: LargeSeq.pm,v 1.15.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::LargeSeq
+#
+# Cared for by Ewan Birney, Jason Stajich
+#
+# Copyright Ewan Birney, Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::LargeSeq - SeqI compliant object that stores sequence as
+files in /tmp
+
+=head1 SYNOPSIS
+
+  # normal primary seq usage
+
+=head1 DESCRIPTION
+
+This object stores a sequence as a series of files in a temporary
+directory. The aim is to allow someone the ability to store very large
+sequences (eg, E<gt> 100MBases) in a file system without running out
+of memory (eg, on a 64 MB real memory machine!).
+
+Of course, to actually make use of this functionality, the programs
+which use this object B<must> not call $primary_seq-E<gt>seq otherwise
+the entire sequence will come out into memory and probably paste your
+machine. However, calls $primary_seq-E<gt>subseq(10,100) will cause
+only 90 characters to be brought into real memory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::LargeSeq;
+use vars qw($AUTOLOAD);
+use strict;
+
+# Object preamble
+
+use Bio::Seq::LargePrimarySeq;
+
+use base qw(Bio::Seq Bio::Seq::LargeSeqI);
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($pseq) = $self->_rearrange([qw(PRIMARYSEQ)], @args);
+
+    if( ! defined $pseq ) {
+	$pseq = new Bio::Seq::LargePrimarySeq(@args);
+    }
+    $self->primary_seq($pseq);
+
+    return $self;
+}
+
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+
+ Example :
+ Returns : a fresh Bio::SeqI object
+ Args    :
+
+=cut
+
+sub trunc {
+    my ($self, $s, $e) = @_;
+    return new Bio::Seq::LargeSeq
+        ('-display_id' => $self->display_id,
+         '-accession_number' => $self->accession_number,
+         '-desc' => $self->desc,
+         '-alphabet' => $self->alphabet,
+         -primaryseq => $self->primary_seq->trunc($s,$e));
+}
+
+=head2 Bio::Seq::LargePrimarySeq methods
+
+=cut
+
+=head2 add_sequence_as_string
+
+ Title   : add_sequence_as_string
+ Usage   : $seq->add_sequence_as_string("CATGAT");
+ Function: Appends additional residues to an existing LargePrimarySeq object.
+           This allows one to build up a large sequence without storing
+           entire object in memory.
+ Returns : Current length of sequence
+ Args    : string to append
+
+=cut
+
+sub add_sequence_as_string {
+    my ($self,$str) = @_;
+    return $self->primary_seq->add_sequence_as_string($str);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/LargeSeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+# $Id $
+#
+# BioPerl module for Bio::Seq::LargeSeqI
+#
+# Cared for by Albert Vilella
+#
+# Copyright Albert Vilella
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::LargeSeqI - Interface class for sequences that cache their
+residues in a temporary file
+
+=head1 SYNOPSIS
+
+ #
+
+=head1 DESCRIPTION
+
+The interface class defines a group of sequence classes that do not
+keep their sequence information in memory but store it in a file. This
+makes it possible to work with very large files even with limited RAM.
+
+The most important consequence of file caching for sequences is that
+you do not want to inspect the sequence unless absolutely
+necessary. These sequences typically override the length() method not
+to check the sequence.
+
+The seq() method is not resetable, if you want to add to the end of the
+sequence you have to use add_sequence_as_string(), for any other sequence chnages you'll
+have to create a new object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Albert Vilella
+
+Email avilella-AT-gmail-DOT-com
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::LargeSeqI;
+use strict;
+
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 add_sequence_as_string
+
+ Title   : add_sequence_as_string
+ Usage   : $seq->add_sequence_as_string("CATGAT");
+ Function: Appends additional residues to an existing  object.
+           This allows one to build up a large sequence without
+           storing entire object in memory.
+ Returns : Current length of sequence
+ Args    : string to append
+
+=cut
+
+sub add_sequence_as_string {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta/Array.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta/Array.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta/Array.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,671 @@
+# $Id: Array.pm,v 1.13.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::Meta::Array
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::Meta::Array - array-based generic implementation of a
+sequence class with residue-based meta information
+
+=head1 SYNOPSIS
+
+  use Bio::LocatableSeq;
+  use Bio::Seq::Meta::Array;
+
+  my $seq = Bio::LocatableSeq->new(-id=>'test',
+                                   -seq=>'ACTGCTAGCT',
+                                   -start=>2434,
+                                   -start=>2443,
+                                   -strand=>1,
+                                   -varbose=>1, # to see warnings
+                                  );
+  bless $seq, Bio::Seq::Meta::Array;
+  # the existing sequence object can be a Bio::PrimarySeq, too
+
+  # to test this is a meta seq object
+  $seq->isa("Bio::Seq::Meta::Array")
+      || $seq->throw("$seq is not a Bio::Seq::Meta::Array");
+
+  $seq->meta('1 2 3 4 5 6 7 8 9 10');
+
+  # or you could create the Meta object directly
+  $seq = Bio::Seq::Meta::Array->new(-id=>'test',
+                                    -seq=>'ACTGCTAGCT',
+                                    -start=>2434,
+                                    -start=>2443,
+                                    -strand=>1,
+                                    -meta=>'1 2 3 4 5 6 7 8 9 10',
+                                    -varbose=>1, # to see warnings
+                                   );
+
+
+  # accessors
+  $arrayref   = $seq->meta();
+  $string     = $seq->meta_text();
+  $substring  = $seq->submeta_text(2,5);
+  $unique_key = $seq->accession_number();
+
+=head1 DESCRIPTION
+
+This class implements generic methods for sequences with residue-based
+meta information. Meta sequences with meta data are Bio::LocatableSeq
+objects with additional methods to store that meta information. See
+L<Bio::LocatableSeq> and L<Bio::Seq::MetaI>.
+
+The meta information in this class can be a string of variable length
+and can be a complex structure.  Blank values are undef or zero.
+
+Application specific implementations should inherit from this class to
+override and add to these methods.
+
+This class can be used for storing sequence quality values but
+Bio::Seq::Quality has named methods that make it easier.
+
+=head1 SEE ALSO
+
+L<Bio::LocatableSeq>, 
+L<Bio::Seq::MetaI>, 
+L<Bio::Seq::Meta>, 
+L<Bio::Seq::Quality>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Chad Matsalla, bioinformatics at dieselwurks.com
+Aaron Mackey, amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::Meta::Array;
+use vars qw(@ISA $DEFAULT_NAME $GAP $META_GAP);
+use strict;
+
+use Data::Dumper;
+
+#use overload '""' => \&to_string;
+
+use base qw(Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI);
+
+BEGIN {
+
+    $DEFAULT_NAME = 'DEFAULT';
+    $GAP = '-';
+    $META_GAP = 0;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $metaseq = Bio::Seq::Meta::Array->new
+	        ( -meta => 'aaaaaaaabbbbbbbb',
+                  -seq =>  'TKLMILVSHIVILSRM'
+	          -id  => 'human_id',
+	          -accession_number => 'S000012',
+	        );
+ Function: Constructor for Bio::Seq::Meta::Array class, meta data being in a
+           string. Note that you can provide an empty quality string.
+ Returns : a new Bio::Seq::Meta::Array object
+
+=cut
+
+sub new {
+    my ($class, %args) = @_;
+	#defined inheritance according to stated baseclass,
+	#if undefined then will be PrimarySeq
+	if (defined($args{'-baseclass'})) {
+		@ISA = ($args{'-baseclass'},"Bio::Seq::MetaI");
+		}
+	else {
+		@ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI );
+		}
+
+    my $self = $class->SUPER::new(%args);
+
+    my($meta, $forceflush) =
+        $self->_rearrange([qw(META
+                              FORCE_FLUSH
+                              )],
+                          %args);
+
+    $self->{'_meta'}->{$DEFAULT_NAME} = [];
+
+    $meta && $self->meta($meta);
+    $forceflush && $self->force_flush($forceflush);
+
+    return $self;
+}
+
+
+=head2 meta
+
+ Title   : meta
+ Usage   : $meta_values  = $obj->meta($values_string);
+ Function:
+
+           Get and set method for the meta data starting from residue
+           position one. Since it is dependent on the length of the
+           sequence, it needs to be manipulated after the sequence.
+
+           The length of the returned value always matches the length
+           of the sequence.
+
+ Returns : reference to an array of meta data
+ Args    : new value, string or array ref, optional
+
+=cut
+
+sub meta {
+   shift->named_meta($DEFAULT_NAME, shift);
+}
+
+=head2 meta_text
+
+ Title   : meta_text
+ Usage   : $meta_values  = $obj->meta_text($values_arrayref);
+ Function: Variant of meta() guarantied to return a string
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, string or array ref, optional
+
+=cut
+
+sub meta_text {
+    return join ' ',  map {0 unless $_} @{shift->meta(shift)};
+}
+
+=head2 named_meta
+
+ Title   : named_meta()
+ Usage   : $meta_values  = $obj->named_meta($name, $values_arrayref);
+ Function: A more general version of meta(). Each meta data set needs
+           to be named. See also L<meta_names>.
+ Returns : reference to an array of meta data
+ Args    : scalar, name of the meta data set
+           new value, string or array ref, optional
+
+=cut
+
+sub named_meta {
+   my ($self, $name, $value) = @_;
+
+   $name ||= $DEFAULT_NAME;
+
+   if (defined $value) {
+       my ($arrayref);
+
+       if (ref $value eq 'ARRAY' ) { # array ref
+           $arrayref = $value;
+       }
+       elsif (not ref($value)) { # scalar
+           $arrayref = [split /\s+/, $value];
+       } else {
+           $self->throw("I need a scalar or array ref, not [". ref($value). "]");
+       }
+
+       # test for length
+       my $diff = $self->length - @{$arrayref};
+       if ($diff > 0) {
+           foreach (1..$diff) { push @{$arrayref}, 0;}
+       }
+
+       $self->{'_meta'}->{$name} = $arrayref;
+
+       #$self->_test_gap_positions($name) if $self->verbose > 0;
+   }
+
+   $self->_do_flush if $self->force_flush;
+
+   return $self->{'_meta'}->{$name} || (" " x $self->length);
+
+}
+
+=head2 _test_gap_positions
+
+ Title   : _test_gap_positions
+ Usage   : $meta_values  = $obj->_test_gap_positions($name);
+ Function: Internal test for correct position of gap characters.
+           Gap being only '-' this time.
+
+           This method is called from named_meta() when setting meta
+           data but only if verbose is positive as this can be an
+           expensive process on very long sequences. Set verbose(1) to
+           see warnings when gaps do not align in sequence and meta
+           data and turn them into errors by setting verbose(2).
+
+ Returns : true on success, prints warnings
+ Args    : none
+
+=cut
+
+sub _test_gap_positions {
+    my $self = shift;
+    my $name = shift;
+    my $success = 1;
+
+    $self->seq || return $success;
+    my $len = CORE::length($self->seq);
+    for (my $i=0; $i < $len; $i++) {
+        my $s = substr $self->{seq}, $i, 1;
+        my $m = substr $self->{_meta}->{$name}, $i, 1;
+        $self->warn("Gap mismatch in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
+            and $success = 0
+                if ($m eq '-') && $s ne $m;
+    }
+    return $success;
+}
+
+=head2 named_meta_text
+
+ Title   : named_meta_text()
+ Usage   : $meta_values  = $obj->named_meta_text($name, $values_arrayref);
+ Function: Variant of named_meta() guarantied to return a textual
+           representation  of the named meta data.
+           For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data set
+           new value, string or array ref, optional
+
+=cut
+
+sub named_meta_text {
+    return join ' ', @{shift->named_meta(@_)};
+
+}
+
+=head2 submeta
+
+ Title   : submeta
+ Usage   : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
+           $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
+ Function:
+
+           Get and set method for meta data for subsequences.
+
+           Numbering starts from 1 and the number is inclusive, ie 1-2
+           are the first two residue of the sequence. Start cannot be
+           larger than end but can be equal.
+
+           If the second argument is missing the returned values
+           should extend to the end of the sequence.
+
+           The return value may be a string or an array reference,
+           depending on the implentation. If in doubt, use
+           submeta_text() which is a variant guarantied to return a
+           string.  See L<submeta_text>.
+
+ Returns : A reference to an array or a string
+ Args    : integer, start position
+           integer, end position, optional when a third argument present
+           new value, string or array ref, optional
+
+=cut
+
+sub submeta {
+   shift->named_submeta($DEFAULT_NAME, @_);
+}
+
+=head2 submeta_text
+
+ Title   : submeta_text
+ Usage   : $meta_values  = $obj->submeta_text(20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, string or array ref, optional
+
+
+=cut
+
+sub submeta_text {
+    return join ' ', @{shift->named_submeta($DEFAULT_NAME, @_)};
+}
+
+=head2 named_submeta
+
+ Title   : named_submeta
+ Usage   : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
+           $subset_of_meta_values = $obj->named_submeta($name, 10);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : A reference to an array or a string
+ Args    : scalar, name of the meta data set
+           integer, start position
+           integer, end position, optional when a third argument present (can be undef)
+           new value, string or array ref, optional
+
+=cut
+
+
+sub named_submeta {
+    my ($self, $name, $start, $end, $value) = @_;
+
+    $name ||= $DEFAULT_NAME;
+    $start ||=1;
+    $start =~ /^[+]?\d+$/ and $start > 0 or
+        $self->throw("Need at least a positive integer start value");
+    $start--;
+
+    if (defined $value) {
+        my $arrayref;
+
+        if (ref $value eq 'ARRAY' ) { # array ref
+            $arrayref = $value;
+        }
+        elsif (not ref($value)) { # scalar
+            $arrayref = [split /\s+/, $value];
+        } else {
+            $self->throw("I need a space separated scalar or array ref, not [". ref($value). "]");
+        }
+
+        $self->warn("You are setting meta values beyond the length of the sequence\n".
+                    "[$start > ". length($self->seq)."] in sequence ". $self->id)
+            if $start + scalar @{$arrayref} -1 > $self->length;
+
+
+        $end or $end = @{$arrayref} + $start;
+        $end--;
+
+        # test for length; pad if needed
+        my $diff = $end - $start - scalar @{$arrayref};
+        if ($diff > 0) {
+            foreach (1..$diff) { push @{$arrayref}, $META_GAP}
+        }
+
+        @{$self->{_meta}->{$name}}[$start..$end] = @{$arrayref};
+
+        $self->_do_flush if $self->force_flush;
+
+        return $arrayref;
+
+    } else {
+
+        $end or $end = $self->length;
+        $end = $self->length if $end > $self->length;
+        $end--;
+        return [@{$self->{_meta}->{$name}}[$start..$end]];
+
+    }
+}
+
+
+=head2 named_submeta_text
+
+ Title   : named_submeta_text
+ Usage   : $meta_values  = $obj->named_submeta_text($name, 20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data
+ Args    : integer, start position, optional
+           integer, end position, optional
+           new value, string or array ref, optional
+
+=cut
+
+sub named_submeta_text {
+    return join ' ', @{shift->named_submeta(@_)};
+}
+
+=head2 meta_names
+
+ Title   : meta_names
+ Usage   : @meta_names  = $obj->meta_names()
+ Function: Retrives an array of meta data set names. The default
+           (unnamed) set name is guarantied to be the first name if it
+           contains any data.
+ Returns : an array of names
+ Args    : none
+
+=cut
+
+sub meta_names {
+    my ($self) = @_;
+
+    my @r;
+    foreach  ( sort keys %{$self->{'_meta'}} ) {
+        push (@r, $_) unless $_ eq $DEFAULT_NAME;
+    }
+    unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
+    return @r;
+}
+
+
+=head2 meta_length
+
+ Title   : meta_length()
+ Usage   : $meta_len  = $obj->meta_length();
+ Function: return the number of elements in the meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub meta_length {
+   my ($self) = @_;
+   return $self->named_meta_length($DEFAULT_NAME);
+}
+
+
+=head2 named_meta_length
+
+ Title   : named_meta_length()
+ Usage   : $meeta_len  = $obj->named_meta_length($name);
+ Function: return the number of elements in the named meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub named_meta_length {
+   my ($self, $name) = @_;
+   $name ||= $DEFAULT_NAME;
+   return scalar @{$self->{'_meta'}->{$name}};
+}
+
+
+
+=head2 force_flush
+
+ Title   : force_flush()
+ Usage   : $force_flush = $obj->force_flush(1);
+ Function: Automatically pad with empty values or truncate meta values
+           to sequence length. Not done by default.
+ Returns : boolean 1 or 0
+ Args    : optional boolean value
+
+Note that if you turn this forced padding off, the previously padded
+values are not changed.
+
+=cut
+
+sub force_flush {
+    my ($self, $value) = @_;
+
+    if (defined $value) {
+        if ($value) {
+            $self->{force_flush} = 1;
+            $self->_do_flush;
+        } else {
+            $self->{force_flush} = 0;
+        }
+    }
+    return $self->{force_flush};
+}
+
+
+=head2 _do_flush
+
+ Title   : _do_flush
+ Usage   : 
+ Function: internal method to do the force that meta values are same
+           length as sequence . Called from L<force_flush>
+ Returns : 
+ Args    : 
+
+=cut
+
+
+sub _do_flush {
+    my ($self) = @_;
+
+    foreach my $name ($self->meta_names) {
+        #print "seq: ", $self->length , "  ", $name, ": ", $self->named_meta_length($name), "======\n";
+        
+        # elongnation
+        if ($self->length > $self->named_meta_length($name)) {
+            my $diff = $self->length - $self->named_meta_length($name);
+            foreach (1..$diff) { push @{$self->{'_meta'}->{$name}}, $META_GAP}
+        }
+        # truncation
+        elsif ( $self->length < $self->named_meta_length($name) ) {
+            $self->{_meta}->{$name} = [@{$self->{_meta}->{$name}}[0..($self->length-1)]]
+        }
+    }
+}
+
+
+
+=head2 is_flush
+
+ Title   : is_flush
+ Usage   : $is_flush  = $obj->is_flush()
+           or  $is_flush = $obj->is_flush($my_meta_name)
+ Function: Boolean to tell if all meta values are in
+           flush with the sequence length.
+           Returns true if force_flush() is set
+           Set verbosity to a positive value to see failed meta sets
+ Returns : boolean 1 or 0
+ Args    : optional name of the meta set
+
+=cut
+
+sub is_flush {
+
+    my ($self, $name) = shift;
+
+    return 1 if $self->force_flush;
+
+    my $sticky = '';
+
+
+    if ($name) {
+        $sticky .= "$name " if $self->length != $self->named_meta_length($name);
+    } else {
+        foreach my $m ($self->meta_names) {
+            $sticky .= "$m " if $self->length != $self->named_meta_length($m);
+        }
+    }
+
+    if ($sticky) {
+        print "These meta set are not flush: $sticky\n" if $self->verbose; 
+        return 0;
+    }
+
+    return 1;
+}
+
+
+=head1 Bio::PrimarySeqI methods
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $newseq = $seq->revcom();
+ Function: Produces a new Bio::Seq::MetaI implementing object where
+           the order of residues and their meta information is reversed.
+ Returns : A new (fresh) Bio::Seq::Meta object
+ Args    : none
+ Throws  : if the object returns false on is_flush()
+
+Note: The method does nothing to meta values, it reorders them, only.
+
+=cut
+
+sub revcom {
+    my $self = shift;
+
+    $self->throw("Can not get a reverse complement. The object is not flush.")
+        unless $self->is_flush;
+
+    my $new = $self->SUPER::revcom;
+    my $end = $self->length - 1;
+    map {
+        $new->{_meta}->{$_} = [ reverse @{$self->{_meta}->{$_}}[0..$end]]
+    } keys %{$self->{_meta}};
+
+    return $new;
+}
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $seq->trunc(10,100);
+ Function: Provides a truncation of a sequence together with meta data
+ Returns : a fresh Bio::Seq::Meta implementing object
+ Args    : Two integers denoting first and last residue of the sub-sequence.
+
+=cut
+
+sub trunc {
+    my ($self, $start, $end) = @_;
+
+    # test arguments
+    $start =~ /^[+]?\d+$/ and $start > 0 or
+        $self->throw("Need at least a positive integer start value as start");
+    $end =~ /^[+]?\d+$/ and $end > 0 or
+        $self->throw("Need at least a positive integer start value as end");
+    $end >= $start or
+        $self->throw("End position has to be larger or equal to start");
+    $end <= $self->length or
+        $self->throw("End position can not be larger than sequence length");
+
+
+    my $new = $self->SUPER::trunc($start, $end);
+    $start--;
+    $end--;
+    map {
+        $new->{_meta}->{$_} = [@{$self->{_meta}->{$_}}[$start..$end]]
+    } keys %{$self->{_meta}};
+    return $new;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Meta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,709 @@
+# $Id: Meta.pm,v 1.12.4.2 2006/11/08 19:00:33 cjfields Exp $
+#
+# BioPerl module for Bio::Seq::Meta
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::Meta - Generic superclass for sequence objects with
+residue-based meta information
+
+=head1 SYNOPSIS
+
+  use Bio::LocatableSeq;
+  use Bio::Seq::Meta;
+  use Bio::Tools::OddCodes;
+  use Bio::SeqIO;
+
+  my $seq = Bio::LocatableSeq->new(-id=>'test',
+                                   -seq=>'ACTGCTAGCT',
+                                   -start=>2434,
+                                   -end=>2443,
+                                   -strand=>1,
+                                   -verbose=>1, # to see warnings
+                                  );
+
+  bless $seq, Bio::Seq::Meta;
+  # the existing sequence object can be a Bio::PrimarySeq, too
+
+  # to test this is a meta seq object
+  $seq->isa("Bio::Seq::Meta")
+      || $seq->throw("$seq is not a Bio::Seq::Meta");
+
+
+  $seq->meta('1234567890');
+  $seq = Bio::Seq::Meta->new(-id=>'test',
+                             -seq=>'HACILMIFGT',
+                             -start=>2434,
+                             -end=>2443,
+                             -strand=>1,
+                             -meta=>'1234567890',
+                             -verbose=>1, # to see warnings
+                            );
+
+  # accessors
+  $string     = $seq->meta_text();
+  $substring  = $seq->submeta_text(2,5);
+  $unique_key = $seq->accession_number();
+
+  # storing output from Bio::Tools::OddCodes as meta data
+  my $protcodes = Bio::Tools::OddCodes->new(-seq => $seq);
+  my @codes = qw(structural chemical functional charge hydrophobic);
+  map { $seq->named_meta($_, ${$protcodes->$_($seq) } )} @codes;
+
+  my $out = Bio::SeqIO->new(-format=>'metafasta');
+  $out->write_seq($seq);
+
+
+=head1 DESCRIPTION
+
+This class implements generic methods for sequences with residue-based
+meta information. Meta sequences with meta data are Bio::LocatableSeq
+objects with additional methods to store that meta information. See
+L<Bio::LocatableSeq> and L<Bio::Seq::MetaI>.
+
+The meta information in this class is always one character per residue
+long and blank values are space characters (ASCII 32).
+
+After the latest rewrite, the meta information no longer covers all
+the residues automatically. Methods to check the length of meta
+information (L<meta_length>)and to see if the ends are flushed to the
+sequence have been added (L<is_flush>). To force the old
+functionality, set L<force_flush> to true.
+
+It is assumed that meta data values do not depend on the nucleotide
+sequence strand value.
+
+Application specific implementations should inherit from this class to
+override and add to these methods.
+
+L<Bio::Seq::Meta::Array> allows for more complex meta values (scalars
+or objects) to be used.
+
+=head2 Method naming
+
+Character based meta data is read and set by method meta() and its
+variants. These are the suffixes and prefixes used in the variants:
+
+    [named_] [sub] meta [_text]
+
+=over 3
+
+=item _text
+
+Suffix B<_text> guaranties that output is a string. Note that it does
+not limit the input.
+
+In this implementation, the output is always text, so these methods
+are redundant.
+
+=item sub
+
+Prefix B<sub>, like in subseq(), means that the method applies to sub
+region of the sequence range and takes start and end as arguments.
+Unlike subseq(), these methods are able to set values.  If the range
+is not defined, it defaults to the complete sequence.
+
+=item named
+
+Prefix B<named_> in method names allows the used to attach multiple
+meta strings to one sequence by explicitly naming them. The name is
+always the first argument to the method. The "unnamed" methods use the
+class wide default name for the meta data and are thus special cases
+"named" methods.
+
+Note that internally names are keys in a hash and any misspelling of a
+name will silently store the data under a wrong name. The used names
+(keys) can be retrieved using method meta_names(). See L<meta_names>.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::LocatableSeq>, 
+L<Bio::Seq::MetaI>, 
+L<Bio::Seq::Meta::Array>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Chad Matsalla, bioinformatics at dieselwurks.com
+
+Aaron Mackey, amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::Meta;
+use vars qw($DEFAULT_NAME $GAP $META_GAP);
+use strict;
+
+#use overload '""' => \&to_string;
+
+use base qw(Bio::LocatableSeq Bio::Seq::MetaI);
+
+
+BEGIN {
+
+    $DEFAULT_NAME = 'DEFAULT';
+    $GAP = '-';
+    $META_GAP = ' ';
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $metaseq = Bio::Seq::Meta->new
+	        ( -meta => 'aaaaaaaabbbbbbbb',
+                  -seq =>  'TKLMILVSHIVILSRM'
+	          -id  => 'human_id',
+	          -accession_number => 'S000012',
+	        );
+ Function: Constructor for Bio::Seq::Meta class, meta data being in a
+           string. Note that you can provide an empty quality string.
+ Returns : a new Bio::Seq::Meta object
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my($meta, $forceflush) =
+        $self->_rearrange([qw(META
+                              FORCE_FLUSH
+                              )],
+                          @args);
+
+    #$self->{'_meta'} = {};
+    $self->{'_meta'}->{$DEFAULT_NAME} = "";
+
+    $meta && $self->meta($meta);
+    $forceflush && $self->force_flush($forceflush);
+
+    return $self;
+}
+
+
+=head2 meta
+
+ Title   : meta
+ Usage   : $meta_values  = $obj->meta($values_string);
+ Function:
+
+           Get and set method for the meta data starting from residue
+           position one. Since it is dependent on the length of the
+           sequence, it needs to be manipulated after the sequence.
+
+           The length of the returned value always matches the length
+           of the sequence, if force_flush() is set. See L<force_flush>.
+
+ Returns : meta data in a string
+ Args    : new value, string, optional
+
+=cut
+
+sub meta {
+   shift->named_meta($DEFAULT_NAME, shift);
+}
+
+=head2 meta_text
+
+ Title   : meta_text
+ Usage   : $meta_values  = $obj->meta_text($values_arrayref);
+ Function: Variant of meta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub meta_text {
+    shift->meta(shift);
+}
+
+=head2 named_meta
+
+ Title   : named_meta()
+ Usage   : $meta_values  = $obj->named_meta($name, $values_arrayref);
+ Function: A more general version of meta(). Each meta data set needs
+           to be named. See also L<meta_names>.
+ Returns : a string
+ Args    : scalar, name of the meta data set
+           new value, optional
+
+=cut
+
+sub named_meta {
+   my ($self, $name, $value) = @_;
+
+   $name ||= $DEFAULT_NAME;
+   if( defined $value) {
+
+       $self->throw("I need a scalar value, not [". ref($value). "]")
+	   if ref($value);
+
+       # test for length
+       my $diff = $self->length - CORE::length($value);
+       if ($diff > 0) {
+           $value .= (" " x $diff);
+       }
+
+       $self->{'_meta'}->{$name} = $value;
+
+       #$self->_test_gap_positions($name) if $self->verbose > 0;
+   }
+
+   return " " x $self->length 
+    if $self->force_flush && not defined $self->{'_meta'}->{$name};
+
+
+   $self->_do_flush if $self->force_flush;
+
+   return $self->{'_meta'}->{$name};
+}
+
+=head2 _test_gap_positions
+
+ Title   : _test_gap_positions
+ Usage   : $meta_values  = $obj->_test_gap_positions($name);
+ Function: Internal test for correct position of gap characters.
+           Gap being only '-' this time.
+
+           This method is called from named_meta() when setting meta
+           data but only if verbose is positive as this can be an
+           expensive process on very long sequences. Set verbose(1) to
+           see warnings when gaps do not align in sequence and meta
+           data and turn them into errors by setting verbose(2).
+
+ Returns : true on success, prints warnings
+ Args    : none
+
+=cut
+
+sub _test_gap_positions {
+    my $self = shift;
+    my $name = shift;
+    my $success = 1;
+
+    $self->seq || return $success;
+    my $len = CORE::length($self->seq);
+    for (my $i=0; $i < $len; $i++) {
+        my $s = substr $self->{seq}, $i, 1;
+        my $m = substr $self->{_meta}->{$name}, $i, 1;
+        $self->warn("Gap mismatch [$m/$s] in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
+            and $success = 0
+                if ($s eq $META_GAP) && $s ne $m;
+    }
+    return $success;
+}
+
+=head2 named_meta_text
+
+ Title   : named_meta_text()
+ Usage   : $meta_values  = $obj->named_meta_text($name, $values_arrayref);
+ Function: Variant of named_meta() guarantied to return a textual
+           representation  of the named meta data.
+           For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data set
+           new value, optional
+
+=cut
+
+sub named_meta_text {
+    shift->named_meta(@_);
+}
+
+=head2 submeta
+
+ Title   : submeta
+ Usage   : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
+           $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
+ Function:
+
+           Get and set method for meta data for subsequences.
+
+           Numbering starts from 1 and the number is inclusive, ie 1-2
+           are the first two residue of the sequence. Start cannot be
+           larger than end but can be equal.
+
+           If the second argument is missing the returned values
+           should extend to the end of the sequence.
+
+           The return value may be a string or an array reference,
+           depending on the implementation. If in doubt, use
+           submeta_text() which is a variant guarantied to return a
+           string.  See L<submeta_text>.
+
+ Returns : A reference to an array or a string
+ Args    : integer, start position
+           integer, end position, optional when a third argument present
+           new value, optional
+
+=cut
+
+sub submeta {
+   shift->named_submeta($DEFAULT_NAME, @_);
+}
+
+=head2 submeta_text
+
+ Title   : submeta_text
+ Usage   : $meta_values  = $obj->submeta_text(20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual 
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, optional
+
+
+=cut
+
+sub submeta_text {
+    shift->submeta(@_);
+}
+
+=head2 named_submeta
+
+ Title   : named_submeta
+ Usage   : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
+           $subset_of_meta_values = $obj->named_submeta($name, 10);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : A reference to an array or a string
+ Args    : scalar, name of the meta data set
+           integer, start position
+           integer, end position, optional when a third argument present
+           new value, optional
+
+=cut
+
+sub named_submeta {
+    my ($self, $name, $start, $end, $value) = @_;
+
+    $name ||= $DEFAULT_NAME;
+    $start ||=1;
+
+
+    $start =~ /^[+]?\d+$/ and $start > 0 or
+        $self->throw("Need at least a positive integer start value");
+
+    if ($value) {
+        $end ||= $start+length($value)-1;
+        $self->warn("You are setting meta values beyond the length of the sequence\n".
+                    "[$start > ". length($self->seq)."] in sequence ". $self->id)
+            if $start > length $self->seq;
+
+        # pad meta data if needed
+        $self->{_meta}->{$name} = () unless defined $self->{_meta}->{$name};
+        if (length($self->{_meta}->{$name}) < $start) {
+            $self->{'_meta'}->{$name} .=  " " x ( $start - length($self->{'_meta'}->{$name}) -1);
+        }
+
+        my $tail = '';
+        $tail = substr ($self->{_meta}->{$name}, $start-1+length($value))
+            if length($self->{_meta}->{$name}) >= $start-1+length($value);
+        
+        substr ($self->{_meta}->{$name}, --$start) = $value;
+        $self->{_meta}->{$name} .= $tail;
+
+        return substr ($self->{_meta}->{$name}, $start, $end - $start + 1);
+
+    } else {
+
+        $end or $end = length $self->seq;
+
+        # pad meta data if needed
+        if (length($self->{_meta}->{$name}) < $end) {
+            $self->{'_meta'}->{$name} .=  " " x ( $start - length($self->{'_meta'}->{$name}));
+        }
+
+        return substr ($self->{_meta}->{$name}, $start-1, $end - $start + 1)
+    }
+}
+
+
+=head2 named_submeta_text
+
+ Title   : named_submeta_text
+ Usage   : $meta_values  = $obj->named_submeta_text($name, 20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data
+ Args    : integer, start position, optional
+           integer, end position, optional
+           new value, optional
+
+=cut
+
+sub named_submeta_text {
+    shift->named_submeta(@_);
+}
+
+=head2 meta_names
+
+ Title   : meta_names
+ Usage   : @meta_names  = $obj->meta_names()
+ Function: Retrieves an array of meta data set names. The default
+           (unnamed) set name is guarantied to be the first name.
+ Returns : an array of names
+ Args    : none
+
+=cut
+
+sub meta_names {
+    my ($self) = @_;
+
+    my @r;
+    foreach  ( sort keys %{$self->{'_meta'}} ) {
+        push (@r, $_) unless $_ eq $DEFAULT_NAME;
+    }
+    unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
+    return @r;
+}
+
+
+=head2 meta_length
+
+ Title   : meta_length()
+ Usage   : $meeta_len  = $obj->meta_length();
+ Function: return the number of elements in the meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub meta_length {
+   my ($self) = @_;
+   return $self->named_meta_length($DEFAULT_NAME);
+}
+
+
+=head2 named_meta_length
+
+ Title   : named_meta_length()
+ Usage   : $meta_len  = $obj->named_meta_length($name);
+ Function: return the number of elements in the named meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub named_meta_length {
+   my ($self, $name) = @_;
+   $name ||= $DEFAULT_NAME;
+   return length ($self->{'_meta'}->{$name});
+}
+
+
+=head2 force_flush
+
+ Title   : force_flush()
+ Usage   : $force_flush = $obj->force_flush(1);
+ Function: Automatically pad with empty values or truncate meta values
+           to sequence length. Not done by default.
+ Returns : boolean 1 or 0
+ Args    : optional boolean value
+
+Note that if you turn this forced padding off, the previously padded
+values are not changed.
+
+=cut
+
+sub force_flush {
+    my ($self, $value) = @_;
+
+    if (defined $value) {
+        if ($value) {
+            $self->{force_flush} = 1;
+            $self->_do_flush;
+        } else {
+            $self->{force_flush} = 0;
+        }
+    }
+    return $self->{force_flush};
+}
+
+
+=head2 _do_flush
+
+ Title   : _do_flush
+ Usage   : 
+ Function: internal method to do the force that meta values are same 
+           length as the sequence . Called from L<force_flush>
+ Returns : 
+ Args    : 
+
+=cut
+
+
+sub _do_flush {
+    my ($self) = @_;
+
+    foreach my $name ( ('DEFAULT', $self->meta_names) ) {
+
+        # elongnation
+        if ($self->length > $self->named_meta_length($name)) {
+            $self->{'_meta'}->{$name} .= $META_GAP x ($self->length - $self->named_meta_length($name)) ;
+        }
+        # truncation
+        elsif ( $self->length < $self->named_meta_length($name) ) {
+            $self->{_meta}->{$name} = substr($self->{_meta}->{$name}, 0, $self->length-1);
+        }
+    }
+
+}
+
+
+=head2 is_flush
+
+ Title   : is_flush
+ Usage   : $is_flush  = $obj->is_flush()
+           or  $is_flush = $obj->is_flush($my_meta_name)
+ Function: Boolean to tell if all meta values are in
+           flush with the sequence length.
+           Returns true if force_flush() is set
+           Set verbosity to a positive value to see failed meta sets
+ Returns : boolean 1 or 0
+ Args    : optional name of the meta set
+
+=cut
+
+
+sub is_flush {
+
+    my ($self, $name) = shift;
+
+    return 1 if $self->force_flush;
+
+    my $sticky = '';
+
+
+    if ($name) {
+        $sticky .= "$name " if $self->length != $self->named_meta_length($name);
+    } else {
+        foreach my $m ($self->meta_names) {
+            $sticky .= "$m " if $self->length != $self->named_meta_length($m);
+        }
+    }
+
+    if ($sticky) {
+        print "These meta set are not flush: $sticky\n" if $self->verbose; 
+        return 0;
+    }
+
+    return 1;
+}
+
+
+=head1 Bio::PrimarySeqI methods
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $newseq = $seq->revcom();
+ Function: Produces a new Bio::Seq::MetaI implementing object where
+           the order of residues and their meta information is reversed.
+ Returns : A new (fresh) Bio::Seq::Meta object
+ Args    : none
+ Throws  : if the object returns false on is_flush()
+
+Note: The method does nothing to meta values, it reorders them, only.
+
+=cut
+
+sub revcom {
+    my $self = shift;
+
+    $self->throw("Can not get a reverse complement. The object is not flush.")
+        unless $self->is_flush;
+
+    my $new = $self->SUPER::revcom;
+    foreach (keys %{$self->{_meta}}) {
+        $new->named_meta($_, scalar reverse $self->{_meta}->{$_} );
+    };
+    return $new;
+}
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $seq->trunc(10,100);
+ Function: Provides a truncation of a sequence together with meta data
+ Returns : a fresh Bio::Seq::Meta implementing object
+ Args    : Two integers denoting first and last residue of the sub-sequence.
+
+=cut
+
+sub trunc {
+    my ($self, $start, $end) = @_;
+
+    # test arguments
+    $start =~ /^[+]?\d+$/ and $start > 0 or
+        $self->throw("Need at least a positive integer start value as start");
+    $end =~ /^[+]?\d+$/ and $end > 0 or
+        $self->throw("Need at least a positive integer start value as end");
+    $end >= $start or
+        $self->throw("End position has to be larger or equal to start");
+    $end <= $self->length or
+        $self->throw("End position can not be larger than sequence length");
+
+    my $new = $self->SUPER::trunc($start, $end);
+    $start--;
+    foreach (keys %{$self->{_meta}}) {
+        $new->named_meta($_,
+                         substr($self->{_meta}->{$_}, $start, $end - $start)
+                        );
+    };
+    return $new;
+}
+
+
+sub to_string {
+    my ($self) = @_;
+    my $out = Bio::SeqIO->new(-format=>'metafasta');
+    $out->write_seq($self);
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/MetaI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/MetaI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/MetaI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,410 @@
+# $Id: MetaI.pm,v 1.10.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::MetaI
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::MetaI - Interface for sequence objects with residue-based
+meta information
+
+=head1 SYNOPSIS
+
+  # get a Bio::Seq::MetaI compliant object somehow
+
+  # to test this is a meta seq object
+  $obj->isa("Bio::Seq::MetaI")
+     || $obj->throw("$obj not a Bio::Seq::MetaI");
+
+  # accessors
+  $string     = $obj->meta;
+  $string     = $obj->meta_text;
+  $substring  = $obj->submeta(12,50);
+  $unique_key = $obj->accession_number();
+
+
+=head1 DESCRIPTION
+
+This class defines an abstract interface for basic residue-based meta
+information. Examples of this kind of meta data are secondary
+structures (RNA and protein), protein hydrophobicity assignments, or
+other alternative alphabets for polypeptides, sequence quality data
+and nucleotide alignments with translations.
+
+The length of the meta data sequence is not dependent on the amount of
+the meta information. The meta information always covers all the
+residues, but a blank value is used to denote unavailable
+information. If necessary the implementation quietly truncates or
+extends meta information with blank values. Definition of blank is
+implementation dependent. Gaps in MSAs should not have meta
+information.
+
+At this point a residue in a sequence object can have only one meta
+value. If you need more, use multiple copies of the sequence object.
+
+Meta data storage can be implemented in various ways, e.g: string,
+array of scalars, array of hashes, array of objects.
+
+If the implementation so chooses, there can be more then one meta
+values associated to each residue. See L<named_meta> and
+L<names_submeta>. Note that use of arbitrary names is very prone to
+typos leading to creation of additional copies of meta data sets.
+
+Bio::Seq::Meta provides basic, pure perl implementation of sequences
+with meta information. See L<Bio::Seq::Meta>. Application specific
+implementations will override and add to these methods.
+
+=head2 Method naming
+
+Character based meta data is read and set by method meta() and its
+variants. These are the suffixes and prefixes used in the variants:
+
+    [named_] [sub] meta [_text]
+
+=over 3
+
+=item _text
+
+Suffix B<_text> guaranties that output is a string. Note that it does
+not limit the input.
+
+=item sub
+
+Prefix B<sub>, like in subseq(), means that the method applies to sub
+region of the sequence range and takes start and end as arguments.
+Unlike subseq(), these methods are able to set values.  If the range
+is not defined, it defaults to the complete sequence.
+
+=item named_
+
+Prefix B<named_> in method names allows the used to attach multiple
+meta strings to one sequence by explicitly naming them. The name is
+always the first argument to the method. The "unnamed" methods use the
+class wide default name for the meta data and are thus special cases
+"named" methods.
+
+Note that internally names are keys in a hash and any misspelling of a
+name will silently store the data under a wrong name. The used names
+(keys) can be retrieved using method meta_names(). See L<meta_names>.
+
+=back
+
+
+=head1 SEE ALSO
+
+L<Bio::Seq::Meta>, 
+L<Bio::Seq::Meta::Array>, 
+L<Bio::Seq::EncodedSeq>, 
+L<Bio::Tools::OddCodes>, 
+L<Bio::Seq::Quality>
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Chad Matsalla, bioinformatics at dieselwurks.com;
+Aaron Mackey, amackey at virginia.edu;
+Peter Schattner schattner at alum.mit.edu;
+Richard Adams, Richard.Adams at ed.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::MetaI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+=head2 meta
+
+ Title   : meta
+ Usage   : $meta_values  = $obj->meta($values_string);
+ Function:
+
+           Get and set method for the unnamed meta data starting from
+           residue position one. Since it is dependent on the length
+           of the sequence, it needs to be manipulated after the
+           sequence.
+
+           The implementation may choose to accept argument values in
+           a string or in an array (reference) or in a hash
+           (reference).
+
+           The return value may be a string or an array reference,
+           depending on the implentation. If in doubt, use meta_text()
+           which is a variant guarantied to return a string.  See
+           L<meta_text>.
+
+           The length of the returned value always matches the length
+           of the sequence.
+
+ Returns : A reference to an array or a string
+ Args    : new value, optional
+
+=cut
+
+sub meta { shift->throw_not_implemented }
+
+=head2 meta_text
+
+ Title   : meta_text()
+ Usage   : $meta_values  = $obj->meta_text($values_arrayref);
+ Function: Variant of meta() guarantied to return a textual
+           representation of the meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub meta_text { shift->throw_not_implemented }
+
+=head2 named_meta
+
+ Title   : named_meta()
+ Usage   : $meta_values  = $obj->named_meta($name, $values_arrayref);
+ Function: A more general version of meta(). Each meta data set needs
+           to be named. See also L<meta_names>.
+ Returns : a string
+ Args    : scalar, name of the meta data set
+           new value, optional
+
+=cut
+
+sub named_meta { shift->throw_not_implemented }
+
+=head2 named_meta_text
+
+ Title   : named_meta_text()
+ Usage   : $meta_values  = $obj->named_meta_text($name, $values_arrayref);
+ Function: Variant of named_meta() guarantied to return a textual
+           representation  of the named meta data.
+           For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data set
+           new value, optional
+
+=cut
+
+sub named_meta_text { shift->throw_not_implemented }
+
+=head2 submeta
+
+ Title   : submeta
+ Usage   : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
+           $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
+ Function:
+
+           Get and set method for meta data for subsequences.
+
+           Numbering starts from 1 and the number is inclusive, ie 1-2
+           are the first two residue of the sequence. Start cannot be
+           larger than end but can be equal.
+
+           If the second argument is missing the returned values
+           should extend to the end of the sequence.
+
+           If implementation tries to set values beyond the current
+           sequence, they should be ignored.
+
+           The return value may be a string or an array reference,
+           depending on the implentation. If in doubt, use
+           submeta_text() which is a variant guarantied to return a
+           string.  See L<submeta_text>.
+
+ Returns : A reference to an array or a string
+ Args    : integer, start position, optional
+           integer, end position, optional
+           new value, optional
+
+=cut
+
+sub submeta { shift->throw_not_implemented }
+
+=head2 submeta_text
+
+ Title   : submeta_text
+ Usage   : $meta_values  = $obj->submeta_text(20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : integer, start position, optional
+           integer, end position, optional
+           new value, optional
+
+=cut
+
+sub submeta_text { shift->throw_not_implemented }
+
+=head2 named_submeta
+
+ Title   : named_submeta
+ Usage   : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
+           $subset_of_meta_values = $obj->named_submeta($name, 10);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : A reference to an array or a string
+ Args    : scalar, name of the meta data set
+           integer, start position
+           integer, end position, optional when a third argument present
+           new value, optional
+
+=cut
+
+sub named_submeta { shift->throw_not_implemented }
+
+=head2 named_submeta_text
+
+ Title   : named_submeta_text
+ Usage   : $meta_values  = $obj->named_submeta_text($name, 20, $value_string);
+ Function: Variant of submeta() guarantied to return a textual
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : scalar, name of the meta data
+ Args    : integer, start position, optional
+           integer, end position, optional
+           new value, optional
+
+=cut
+
+sub named_submeta_text { shift->throw_not_implemented }
+
+=head2 meta_names
+
+ Title   : meta_names
+ Usage   : @meta_names  = $obj->meta_names()
+ Function: Retrives an array of meta data set names. The default (unnamed)
+           set name is guarantied to be the first name.
+ Returns : an array of names
+ Args    : none
+
+=cut
+
+sub meta_names { shift->throw_not_implemented }
+
+
+=head2 force_flush
+
+ Title   : force_flush()
+ Usage   : $force_flush = $obj->force_flush(1);
+ Function: Automatically pad with empty values or truncate meta values to
+            sequence length
+ Returns : boolean 1 or 0
+ Args    : optional boolean value
+
+=cut
+
+sub force_flush { shift->throw_not_implemented }
+
+
+=head2 is_flush
+
+ Title   : is_flush
+ Usage   : $is_flush  = $obj->is_flush()
+           or  $is_flush = $obj->is_flush($my_meta_name)
+ Function: Boolean to tell if all meta values are in
+           flush with the sequence length.
+           Returns true if force_flush() is set
+           Set verbosity to a positive value to see failed meta sets
+ Returns : boolean 1 or 0
+ Args    : optional name of the meta set
+
+=cut
+
+sub is_flush { shift->throw_not_implemented }
+
+
+=head2 meta_length
+
+ Title   : meta_length()
+ Usage   : $meeta_len  = $obj->meta_length();
+ Function: return the number of elements in the meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub meta_length { shift->throw_not_implemented }
+
+=head2 named_meta_length
+
+ Title   : named_meta_length()
+ Usage   : $meeta_len  = $obj->named_meta_length($name);
+ Function: return the number of elements in the named meta set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub named_meta_length { shift->throw_not_implemented }
+
+
+=head1 Bio::PrimarySeqI methods
+
+Implemeting classes will need to rewrite these Bio::PrimaryI methods.
+
+=cut
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $newseq = $seq->revcom();
+ Function: Produces a new Bio::Seq::MetaI implementing object where
+           the order of residues and their meta information is reversed.
+ Returns : A new (fresh) Bio::Seq::MetaI object
+ Args    : none
+
+=cut
+
+sub revcom { shift->throw_not_implemented }
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence
+ Returns : a fresh Bio::Seq::MetaI implementing object
+ Args    : Two integers denoting first and last residue of the sub-sequence.
+
+=cut
+
+sub trunc { shift->throw_not_implemented }
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimaryQual.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimaryQual.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimaryQual.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,479 @@
+# $Id: PrimaryQual.pm,v 1.24.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# bioperl module for Bio::PrimaryQual
+#
+# Cared for by Chad Matsalla <bioinformatics at dieselwurks.com>
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
+
+=head1 SYNOPSIS
+
+ use Bio::Seq::PrimaryQual;
+
+ # you can use either a space-delimited string for quality
+
+ my $string_quals = "10 20 30 40 50 40 30 20 10";
+ my $qualobj = Bio::Seq::PrimaryQual->new
+ 	      ( '-qual' => $string_quals,
+ 		'-id'  => 'QualityFragment-12',
+ 		'-accession_number' => 'X78121',
+ 		);
+
+ # _or_ you can use an array of quality values
+
+ my @q2 = split/ /,$string_quals;
+ $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
+       '-primary_id'     =>      'chads primary_id',
+       '-desc'           =>      'chads desc',
+       '-accession_number' => 'chads accession_number',
+      '-id'             =>      'chads id'
+      );
+
+ # to get the quality values out:
+
+ my @quals = @{$qualobj->qual()};
+
+ # to give _new_ quality values
+
+ my $newqualstring = "50 90 1000 20 12 0 0";
+ $qualobj->qual($newqualstring);
+
+
+=head1 DESCRIPTION
+
+This module provides a mechanism for storing quality
+values. Much more useful as part of
+Bio::Seq::SeqWithQuality where these quality values
+are associated with the sequence information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Seq::PrimaryQual;
+use vars qw(%valid_type);
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Seq::QualI);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $qual = Bio::Seq::PrimaryQual->new
+	( -qual => '10 20 30 40 50 50 20 10',
+	  -id  => 'human_id',
+	  -accession_number => 'AL000012',
+	);
+
+ Function: Returns a new Bio::Seq::PrimaryQual object from basic 
+	constructors, being a string _or_ a reference to an array for the
+	sequence and strings for id and accession_number. Note that you
+	can provide an empty quality string.
+ Returns : a new Bio::Seq::PrimaryQual object
+
+=cut
+
+
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    # default: turn ON the warnings (duh)
+    my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
+        $self->_rearrange([qw(QUAL
+                              DISPLAY_ID
+                              ACCESSION_NUMBER
+                              PRIMARY_ID
+                              DESC
+                              ID
+                              HEADER
+                              )],
+                          @args);
+    if( defined $id && defined $given_id ) {
+        if( $id ne $given_id ) {
+            $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");   
+        }
+    }
+    if( defined $given_id ) { $id = $given_id; }
+    
+    # note: the sequence string may be empty
+    $self->qual(defined($qual) ? $qual : []);
+     $header && $self->header($header);
+    $id      && $self->display_id($id);
+    $acc     && $self->accession_number($acc);
+    $pid     && $self->primary_id($pid);
+    $desc    && $self->desc($desc);
+
+    return $self;
+}
+
+=head2 qual()
+
+ Title   : qual()
+ Usage   : @quality_values  = @{$obj->qual()};
+ Function: Returns the quality as a reference to an array containing the
+           quality values. The individual elements of the quality array are
+           not validated and can be any numeric value.
+ Returns : A reference to an array.
+
+=cut
+
+sub qual {
+    my ($self,$value) = @_;
+
+    if( ! defined $value || length($value) == 0 ) { 
+	$self->{'qual'} ||= [];
+    } elsif( ref($value) =~ /ARRAY/i ) {
+	# if the user passed in a reference to an array
+	$self->{'qual'} = $value;
+    } elsif(! $self->validate_qual($value)){
+	$self->throw("Attempting to set the quality to [$value] which does not look healthy");	    
+    } else {
+	$value =~ s/^\s+//;
+	$self->{'qual'} = [split(/\s+/,$value)];
+    }
+    
+    return $self->{'qual'};
+}
+
+=head2 validate_qual($qualstring)
+
+ Title	 : validate_qual($qualstring)
+ Usage	 : print("Valid.") if { &validate_qual($self,$qualities); }
+ Function: Make sure that the quality, if it has length > 0, contains at
+	least one digit. Note that quality strings are parsed into arrays
+	using split/\d+/,$quality_string, so make sure that your quality
+	scalar looks like this if you want it to be parsed properly.
+ Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
+ Args    : a scalar (any scalar, why PrimarySeq author?) and a scalar
+	containing the string to validate.
+
+=cut
+
+sub validate_qual {
+    # how do I validate quality values?
+    # \d+\s+\d+..., I suppose
+    my ($self,$qualstr) = @_;
+    # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a 
+    #                    length method, you have to qualify 
+    #                    which length to use)
+    return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);   
+    return 1 if( $qualstr =~ /\d/);
+    
+    return 0;
+}
+
+=head2 subqual($start,$end)
+
+ Title   : subqual($start,$end)
+ Usage   : @subset_of_quality_values = @{$obj->subqual(10,40)};
+ Function: returns the quality values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+
+sub subqual {
+   my ($self,$start,$end) = @_;
+
+   if( $start > $end ){
+       $self->throw("in subqual, start [$start] has to be greater than end [$end]");
+   }
+
+   if( $start <= 0 || $end > $self->length ) {
+       $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
+   }
+
+   # remove one from start, and then length is end-start
+
+   $start--;
+	$end--;
+	my @sub_qual_array = @{$self->{qual}}[$start..$end];
+
+ 	#   return substr $self->seq(), $start, ($end-$start);
+	return \@sub_qual_array;
+
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, aka the common name of the Quality
+        object.
+        The semantics of this is that it is the most likely string to be
+        used as an identifier of the quality sequence, and likely to have
+        "human" readability.  The id is equivalent to the ID field of the
+        GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
+        database. In fasta format, the >(\S+) is presumed to be the id,
+        though some people overload the id to embed other information.
+        Bioperl does not use any embedded information in the ID field,
+        and people are encouraged to use other mechanisms (accession
+	field for example, or extending the sequence object) to solve
+	this. Notice that $seq->id() maps to this function, mainly for
+        legacy/convience issues
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub display_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'display_id'} = $value;
+    }
+    return $obj->{'display_id'};
+
+}
+
+=head2 header()
+
+ Title   : header()
+ Usage   : $header = $obj->header();
+ Function: Get/set the header that the user wants printed for this
+     quality object.
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub header {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'header'} = $value;
+    }
+    return $obj->{'header'};
+
+}
+
+=head2 accession_number()
+
+ Title   : accession_number()
+ Usage   : $unique_biological_key = $obj->accession_number();
+ Function: Returns the unique biological id for a sequence, commonly
+        called the accession_number. For sequences from established
+        databases, the implementors should try to use the correct
+        accession number. Notice that primary_id() provides the unique id
+        for the implemetation, allowing multiple objects to have the same
+        accession number in a particular implementation. For sequences
+        with no accession number, this method should return "unknown".
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub accession_number {
+    my( $obj, $acc ) = @_;
+
+    if (defined $acc) {
+        $obj->{'accession_number'} = $acc;
+    } else {
+        $acc = $obj->{'accession_number'};
+        $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id()
+
+ Title   : primary_id()
+ Usage   : $unique_implementation_key = $obj->primary_id();
+ Function: Returns the unique id for this object in this implementation.
+        This allows implementations to manage their own object ids in a
+        way the implementaiton can control clients can expect one id to
+        map to one object. For sequences with no accession number, this
+        method should return a stringified memory location.
+ Returns : A string
+ Args    : None
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   return $obj->{'primary_id'};
+
+}
+
+=head2 desc()
+
+ Title   : desc()
+ Usage   : $qual->desc($newval);
+           $description = $qual->desc();
+ Function: Get/set description text for a qual object
+ Example :
+ Returns : Value of desc
+ Args    : newvalue (optional)
+
+=cut
+
+sub desc {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'desc'} = $value;
+    }
+    return $obj->{'desc'};
+
+}
+
+=head2 id()
+
+ Title   : id()
+ Usage   : $id = $qual->id();
+ Function: Return the ID of the quality. This should normally be (and
+	actually is in the implementation provided here) just a synonym
+	for display_id().
+ Returns : A string.
+ Args    : None.
+
+=cut
+
+sub id {
+   my ($self,$value) = @_;
+   if( defined $value ) {
+        return $self->display_id($value);
+   }
+   return $self->display_id();
+}
+
+=head2 length()
+
+ Title	 : length()
+ Usage	 : $length = $qual->length();
+ Function: Return the length of the array holding the quality values.
+	Under most circumstances, this should match the number of quality
+	values but no validation is done when the PrimaryQual object is
+	constructed and non-digits could be put into this array. Is this
+	a bug? Just enough rope...
+ Returns : A scalar (the number of elements in the quality array).
+ Args    : None.
+
+=cut
+
+sub length {
+    my $self = shift;
+    if (ref($self->{qual}) ne "ARRAY") {
+	$self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
+    }
+    return scalar(@{$self->{qual}});
+}
+
+=head2 qualat($position)
+
+ Title   : qualat($position)
+ Usage   : $quality = $obj->qualat(10);
+ Function: Return the quality value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the first
+        two bases of the sequence. Start cannot be larger than end but can
+        be equal.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub qualat {
+    my ($self,$val) = @_;
+    my @qualat = @{$self->subqual($val,$val)};
+    if (scalar(@qualat) == 1) {
+	return $qualat[0];
+    }
+    else {
+	$self->throw("AAAH! qualat provided more then one quality.");
+    }
+} 
+
+=head2 to_string()
+
+ Title   : to_string()
+ Usage   : $quality = $obj->to_string();
+ Function: Return a textual representation of what the object contains.
+	For this module, this function will return:
+		qual
+		display_id
+		accession_number
+		primary_id
+		desc
+		id
+		length
+ Returns : A scalar.
+ Args    : None.
+
+=cut
+
+sub to_string {
+    my ($self,$out,$result) = shift;
+    $out = "qual: ".join(',',@{$self->qual()});
+    foreach (qw(display_id accession_number primary_id desc id)) {
+	$result = $self->$_();
+	if (!$result) { $result = "<unset>"; }
+	$out .= "$_: $result\n";
+    }
+    return $out;
+}
+
+
+sub to_string_automatic {
+    my ($self,$sub_result,$out) = shift;
+    foreach (sort keys %$self) {
+	print("Working on $_\n");
+	eval { $self->$_(); };
+	if ($@) { $sub_result = ref($_); }
+	elsif (!($sub_result = $self->$_())) {
+	    $sub_result = "<unset>";
+	}
+	if (ref($sub_result) eq "ARRAY") {
+	    print("This thing ($_) is an array!\n");
+	    $sub_result = join(',',@$sub_result);	
+	}
+	$out .= "$_: ".$sub_result."\n";
+    }
+    return $out;
+} 
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimedSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimedSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/PrimedSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,466 @@
+# $Id: PrimedSeq.pm,v 1.23.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# This is the original copyright statement. I have relied on Chad's module
+# extensively for this module.
+#
+# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself. 
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+#
+# But I have modified lots of it, so I guess I should add:
+#
+# Copyright (c) 2003 bioperl, Rob Edwards. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself. 
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+
+=head1 NAME
+
+Bio::Seq::PrimedSeq - A representation of a sequence and two primers 
+flanking a target region
+
+=head1 SYNOPSIS
+
+The easiest way to use this is probably either, (i), get the output 
+from Bio::Tools::Run::Primer3, Bio::Tools::Primer3, or 
+Bio::Tools::PCRSimulation:
+
+      # For example, start with a fasta file
+      use Bio::SeqIO;
+      use Bio::Tools::Run::Primer3;
+
+      my $file = shift || die "need a file to read";
+      my $seqin = Bio::SeqIO->new(-file => $file);
+      my $seq = $seqin->next_seq;
+
+      # use primer3 to design some primers
+      my $primer3run = Bio::Tools::Run::Primer3->new(-seq => $seq);
+      $primer3run -> run; # run it with the default parameters
+
+      # create a file to write the results to
+      my $seqout = Bio::SeqIO->new(-file => ">primed_sequence.gbk", 
+                                   -format => 'genbank');
+
+      # now just get all the results and write them out.
+      while (my $results = $primer3run->next_primer) {
+         $seqout->write_seq($results->annotated_seq);
+      }
+
+Or, (ii), to create a genbank file for a sequence and its cognate primers:
+
+     use Bio::SeqIO;
+     use Bio::Seq::PrimedSeq;
+
+     # have a sequence file ($file) with the template, and two primers
+     # that match it, in fasta format
+
+     my $file = shift || die "$0 <file>";
+     my $seqin = new Bio::SeqIO(-file => $file);
+
+     # read three sequences
+     my ($template, $leftprimer, $rightprimer) =
+           ($seqin->next_seq, $seqin->next_seq, $seqin->next_seq);
+     # set up the primed sequence object
+     my $primedseq = Bio::Seq::PrimedSeq->new(-seq => $template, 
+                                              -left_primer => $leftprimer,
+                                              -right_primer => $rightprimer);
+     # open a file for output
+     my $seqout = Bio::SeqIO->new(-file => ">primed_sequence.gbk",
+                                  -format => 'genbank');
+     # print the sequence out
+     $seqout->write_seq($primedseq->annotated_sequence);
+
+This should output a genbank file with the two primers labeled.
+
+=head1 DESCRIPTION
+
+This module is a slightly glorified capsule containing a primed sequence. 
+It was created to address the fact that a primer is more than a seqfeature 
+and there need to be ways to represent the primer-sequence complex and 
+the behaviors and attributes that are associated with the complex.
+
+The primers are represented as Bio::SeqFeature::Primer objects, and should
+be instantiated first.
+
+A simple way to create a PrimedSeq object is as follows:
+
+  my $primedseq = Bio::Seq::PrimedSeq->new(
+          -seq          => $seq,  # Bio::Seq object,
+          -left_primer  => $left, # Bio::SeqFeature::Primer object,
+          -right_primer => $right # Bio::SeqFeature::Primer object,
+  );
+
+From the PrimedSeq object you should be able to retrieve
+information about melting temperatures and what not on each of the primers 
+and the amplicon.
+
+This is based on the PrimedSeq.pm module started by Chad Matsalla, with 
+additions/improvements by Rob Edwards.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Rob Edwards, redwards at utmem.edu
+
+Based on a module written by Chad Matsalla, bioinformatics1 at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Seq::PrimedSeq;
+
+use strict;
+use Bio::SeqFeature::Primer;
+
+use vars qw ($AUTOLOAD @RES %OK_FIELD $ID);
+
+use base qw(Bio::Root::Root Bio::SeqFeature::Generic);
+
+BEGIN {
+ @RES = qw(); # nothing here yet, not sure what we want!
+ foreach my $attr (@RES) {$OK_FIELD{$attr}++}
+}
+
+$ID = 'Bio::Tools::Analysis::Nucleotide::PrimedSeq';
+
+sub AUTOLOAD {
+ my $self = shift;
+ my $attr = $AUTOLOAD;
+ $attr =~ s/.*:://;
+ $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
+ $self->{$attr} = shift if @_;
+ return $self->{$attr};
+}
+
+
+=head2 new
+
+ Title   : new()
+ Usage   : $primed_sequence = new Bio::SeqFeature::Primer( 
+                                     -seq => $sequence,
+                                     -left_primer => $left_primer,
+                                     -right_primer => $right_primer);
+ Function: A constructor for an object representing a primed sequence 
+ Returns : A Bio::Seq::PrimedSeq object
+ Args    :  -seq => a Bio::Seq object (required)
+            -left_primer => a Bio::SeqFeature::Primer object (required)
+            -right_primer => a Bio::SeqFeature::Primer object (required)
+
+           Many other parameters can be included including all of the output
+           parameters from the primer3 program. At the moment most of these
+           parameters will not do anything.
+
+=cut
+
+sub new {
+
+	# note, I have cleaned up a lot of the script that Chad had written here,
+	# and I have removed the part where he removed the - before the tags.
+	# Very confusing.
+
+	my($class,%args) = @_;
+	my $self = $class->SUPER::new(%args);
+   # these are the absolute minimum components required to make
+   # a primedseq
+
+	foreach my $key (keys %args) {
+		if ($key =~  /^-seq/i) {
+			$self->{target_sequence} = $args{$key};
+			next;
+		} else {
+			my $okey;
+			($okey = $key) =~ s/^-//;
+			if (($okey eq "left_primer" || $okey eq "right_primer") && 
+				 ref($args{$key}) && $args{$key}->isa('Bio::SeqI') ) {
+				# We have been given a Bio::Seq object, 
+				# make it a Bio::SeqFeature::Primer object
+				$self->{$okey} = Bio::SeqFeature::Primer->new(-seq => $args{$key});
+				push @{$self->{'arguments'}},$okey;
+				next;
+			}
+
+			$self->{$okey} = $args{$key};
+			push @{$self->{'arguments'}},$okey;
+		}
+	}
+	# and now the insurance - make sure that things are ok
+	if (!$self->{target_sequence} || !$self->{left_primer} || 
+		 !$self->{right_primer} ) {
+		$self->throw("You must provide a -seq, -left_primer, and -right_primer to create this object.");
+	}
+  
+	if (! ref($self->{target_sequence}) ||
+		 ! $self->{target_sequence}->isa('Bio::SeqI') ) {
+		$self->throw("The target_sequence must be a Bio::Seq to create this object.");
+ }
+	if (! ref($self->{left_primer}) ||
+		 ! $self->{left_primer}->isa("Bio::SeqFeature::Primer") || 
+		 ! ref($self->{right_primer}) ||
+		 ! $self->{right_primer}->isa("Bio::SeqFeature::Primer")) {
+		$self->throw("You must provide a left_primer and right_primer, both as Bio::SeqFeature::Primer to create this object.");
+	}
+ 
+	# now we have the sequences, lets find out where they are
+	$self->_place_seqs();
+	return $self;
+}
+
+
+=head2 get_primer
+
+ Title   : get_primer();
+ Usage   : $primer = $primedseq->get_primer(l, left, left_primer, 
+           -left_primer) to return the left primer or 
+	        $primer = $primedseq->get_primer(r, right, right_primer, 
+           -right_primer) to return the right primer or
+	        $primer = $primedseq->get_primer(b, both, both_primers, 
+           -both_primers)
+           to return the left primer, right primer array
+ Function: A getter for the left primer in thie PrimedSeq object.
+ Returns : A Bio::SeqFeature::Primer object
+ Args    : Either of (l, left, left_primer, -left_primer) to get left 
+           primer.
+           Either of (r, right, right_primer, -right_primer) to get 
+           right primer
+	        Either of (b, both, both_primers, -both_primers) to get 
+           both primers. 
+           Note that this is plural. [default]
+
+=cut
+
+sub get_primer() {
+ my ($self, $arg) = @_;
+ if (! defined $arg ) {
+  return ($self->{'left_primer'}, $self->{'right_primer'});
+ } elsif( $arg =~ /^l/ || $arg =~ /^-l/) { 
+  # what a cheat, I couldn't be bothered to write all those or statements!
+  # Hah, now you can write leprechaun to get the left primer.
+  return $self->{'left_primer'}
+ }
+ elsif ($arg =~ /^r/ || $arg =~ /^-r/) {return $self->{'right_primer'}}
+ elsif ($arg =~ /^b/ || $arg =~ /^-b/) {return ($self->{'left_primer'}, $self->{'right_primer'})}
+}
+
+
+
+=head2 annotated_sequence
+
+ Title   : annotated_sequence
+ Usage   : $annotated_sequence_object = $primedseq->annotated_sequence()
+ Function: Get an annotated sequence object containg the left and right 
+           primers
+ Returns : An annotated sequence object or 0 if not defined.
+ Args    : 
+ Note    : Use this method to return a sequence object that you can write
+           out (e.g. in GenBank format). See the example above.
+
+=cut
+
+sub annotated_sequence {
+  my $self = shift;
+  if (exists $self->{annotated_sequence}) {return $self->{annotated_sequence}}
+  else {return 0}
+}
+
+=head2 amplicon
+
+ Title   : amplicon
+ Usage   : my $amplicon = $primedseq->amplicon()
+ Function: Retrieve the amplicon as a sequence object
+ Returns : A seq object. To get the sequence use $amplicon->seq
+ Args    : None
+ Note    : 
+
+=cut
+
+sub amplicon {
+ my ($self, at args) = @_;
+ my $id = $self->{'-seq'}->{'id'};
+ unless ($id) {$id=""}
+ # this just prevents a warning when $self->{'-seq'}->{'id'} is not defined
+ $id = "Amplicon from ".$id;
+ 
+ my $seqobj=Bio::Seq->new(-id=>$id, seq=>$self->{'amplicon_sequence'});
+ return $seqobj;
+}
+
+
+=head2 seq
+
+ Title   : seq
+ Usage   : my $seqobj = $primedseq->seq()
+ Function: Retrieve the target sequence as a sequence object
+ Returns : A seq object. To get the sequence use $seqobj->seq
+ Args    : None
+ Note    : 
+
+=cut
+
+sub seq {
+ my $self = shift;
+ return $self->{target_sequence};
+}
+
+=head2 _place_seqs
+
+ Title   : _place_seqs
+ Usage   : $self->_place_seqs()
+ Function: An internal method to place the primers on the sequence and 
+           set up the ranges of the sequences
+ Returns : Nothing
+ Args    : None
+ Note    : Internal use only
+
+=cut
+
+sub _place_seqs {
+	my $self = shift;
+ 
+	# we are going to pull out the target sequence, and then the primer sequences
+	my $target_sequence = $self->{'target_sequence'}->seq();
+ 
+	# left primer
+	my $left_seq = $self->{'left_primer'}->seq()->seq();
+
+	my $rprc = $self->{'right_primer'}->seq()->revcom();
+ 
+	my $right_seq=$rprc->seq();
+  
+	# now just change the case, because we keep getting screwed on this
+	$target_sequence=uc($target_sequence);
+	$left_seq=uc($left_seq);
+	$right_seq=uc($right_seq);
+ 
+	unless ($target_sequence =~ /(.*)$left_seq(.*)$right_seq(.*)/) {
+		unless ($target_sequence =~ /$left_seq/) {$self->throw("Can't place left sequence on target!")}
+		unless ($target_sequence =~ /$right_seq/) {$self->throw("Can't place right sequence on target!")}
+ }
+ 
+	my ($before, $middle, $after) = ($1, $2, $3); # note didn't use $`, $', and $& because they are bad. Just use length instead.
+
+	# cool now we can figure out lengths and what not.
+	# we'll figure out the position and compare it to known positions (e.g. from primer3)
+ 
+	my $left_location = length($before). ",". length($left_seq);
+	my $right_location = (length($target_sequence)-length($after)-1).",".length($right_seq);
+	my $amplicon_size = length($left_seq)+length($middle)+length($right_seq);
+ 
+	if (exists $self->{'left_primer'}->{'PRIMER_LEFT'}) {
+		# this is the left primer from primer3 input
+		# just check to make sure it is right
+		unless ($self->{'left_primer'}->{'PRIMER_LEFT'} eq $left_location) {
+			$self->warn("Note got |".$self->{'left_primer'}->{'PRIMER_LEFT'}."| from primer3 and |$left_location| for the left primer. You should email redwards\@utmem.edu about this.");
+		}
+	}
+	else {
+		$self->{'left_primer'}->{'PRIMER_LEFT'}=$left_location;
+	}
+ 
+	if (exists $self->{'right_primer'}->{'PRIMER_RIGHT'}) {
+		# this is the right primer from primer3 input
+		# just check to make sure it is right
+		unless ($self->{'right_primer'}->{'PRIMER_RIGHT'} eq $right_location) {
+			$self->warn("Note got |".$self->{'right_primer'}->{'PRIMER_RIGHT'}."| from primer3 and |$right_location| for the right primer. You should email redwards\@utmem.edu about this.");
+		}
+	}
+	else {
+		$self->{'right_primer'}->{'PRIMER_RIGHT'}=$right_location;
+	}
+ 
+	if (exists $self->{'PRIMER_PRODUCT_SIZE'}) {
+		# this is the product size from primer3 input
+		# just check to make sure it is right
+		unless ($self->{'PRIMER_PRODUCT_SIZE'} eq $amplicon_size) {
+			$self->warn("Note got |".$self->{'PRIMER_PRODUCT_SIZE'}."| from primer3 and |$amplicon_size| for the size. You should email redwards\@utmem.edu about this.");
+		}
+	}
+	else {
+		$self->{'PRIMER_PRODUCT_SIZE'} = $amplicon_size;
+	}
+ 
+	$self->{'amplicon_sequence'}= lc($left_seq).uc($middle).lc($right_seq); # I put this in a different case, but I think the seqobj may revert this
+	
+	$self->_set_seqfeature;
+}
+
+=head2 _set_seqfeature
+
+ Title   : _set_seqfeature
+ Usage   : $self->_set_seqfeature()
+ Function: An internal method to create Bio::SeqFeature::Generic objects
+           for the primed seq
+ Returns : Nothing
+ Args    : None
+ Note    : Internal use only. Should only call this once left and right 
+           primers have been placed on the sequence. This will then set 
+           them as sequence features so hopefully we can get a nice output 
+           with write_seq.
+
+=cut
+
+
+sub _set_seqfeature {
+	my $self = shift;
+	unless ($self->{'left_primer'}->{'PRIMER_LEFT'} && 
+			  $self->{'right_primer'}->{'PRIMER_RIGHT'}) {
+		$self->warn("hmmm. Haven't placed primers, but trying to make annotated sequence");
+		return 0;
+	}
+	my ($start, $length) = split /,/, $self->{'left_primer'}->{'PRIMER_LEFT'};
+	my $tm=$self->{'left_primer'}->{'PRIMER_LEFT_TM'} || $self->{'left_primer'}->Tm || 0;
+
+	my $seqfeatureL=new Bio::SeqFeature::Generic(
+						  -start => $start+1, -end => $start+$length, -strand => 1,
+                    -primary_tag => 'left_primer', -source => 'primer3',
+                    -tag    => {new => 1, author => 'Bio::Seq::PrimedSeq', Tm => $tm}
+															  );
+ 
+	($start, $length) = split /,/, $self->{'right_primer'}->{'PRIMER_RIGHT'};
+	$tm=$self->{'right_primer'}->{'PRIMER_RIGHT_TM'} || $self->{'right_primer'}->Tm || 0;
+ 
+	my $seqfeatureR=new Bio::SeqFeature::Generic(
+   -start => $start-$length+2, -end => $start+1, -strand => -1,
+   -primary_tag => 'right_primer', -source => 'primer3',
+   -tag    => {new => 1, author => 'Bio::Seq::PrimedSeq', Tm => $tm}
+															  );
+
+	# now add the sequences to a annotated sequence
+	$self->{annotated_sequence} = $self->{target_sequence};
+	$self->{annotated_sequence}->add_SeqFeature($seqfeatureL);
+	$self->{annotated_sequence}->add_SeqFeature($seqfeatureR);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/QualI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/QualI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/QualI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,567 @@
+# $Id: QualI.pm,v 1.11.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::QualI
+#
+# Cared for by Chad Matsalla <bioinformatics at dieselwurks.com
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::QualI - Interface definition for a Bio::Seq::Qual
+
+=head1 SYNOPSIS
+
+    # get a Bio::Seq::Qual compliant object somehow
+
+    # to test this is a seq object
+
+    $obj->isa("Bio::Seq::QualI") 
+       || $obj->throw("$obj does not implement the Bio::Seq::QualI interface");
+
+    # accessors
+
+    $string    = $obj->qual();
+    $substring = $obj->subqual(12,50);
+    $display   = $obj->display_id(); # for human display
+    $id        = $obj->primary_id(); # unique id for this object,
+                                     # implementation defined
+    $unique_key= $obj->accession_number();
+                       # unique biological id
+
+
+
+=head1 DESCRIPTION
+
+This object defines an abstract interface to basic quality
+information. PrimaryQual is an object just for the quality and its
+name(s), nothing more. There is a pure perl implementation of this in
+Bio::Seq::PrimaryQual. If you just want to use Bio::Seq::PrimaryQual
+objects, then please read that module first. This module defines the
+interface, and is of more interest to people who want to wrap their
+own Perl Objects/RDBs/FileSystems etc in way that they "are" bioperl
+quality objects, even though it is not using Perl to store the
+sequence etc.
+
+This interface defines what bioperl consideres necessary to "be" a
+sequence of qualities, without providing an implementation of
+this. (An implementation is provided in Bio::Seq::PrimaryQual). If you
+want to provide a Bio::Seq::PrimaryQual 'compliant' object which in
+fact wraps another object/database/out-of-perl experience, then this
+is the correct thing to wrap, generally by providing a wrapper class
+which would inherit from your object and this Bio::Seq::QualI
+interface. The wrapper class then would have methods lists in the
+"Implementation Specific Functions" which would provide these methods
+for your object.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+This module is heavily based on Bio::Seq::PrimarySeq and is modeled after
+or outright copies sections of it. Thanks Ewan!
+
+Email bioinformatics at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::QualI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 qual()
+
+ Title   : qual()
+ Usage   : @quality_values  = @{$obj->qual()};
+ Function: Returns the quality as a reference to an array containing the
+	quality values. The individual elements of the quality array are
+	not validated and can be any numeric value.
+ Returns : A reference to an array.
+ Status  : 
+
+=cut
+
+sub qual {
+   my ($self) = @_;
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+   }
+}
+
+=head2 subqual($start,$end)
+
+ Title   : subqual($start,$end)
+ Usage   : @subset_of_quality_values = @{$obj->subseq(10,40)};
+ Function: returns the quality values from $start to $end, where the
+	first value is 1 and the number is inclusive, ie 1-2 are the first
+	two bases of the sequence. Start cannot be larger than end but can
+	be equal.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+
+=cut
+
+sub subqual {
+   my ($self) = @_;
+
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of subqual - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of subqual - implementing class did not provide this method");
+   }
+
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id_string = $obj->display_id() _or_
+	   $id_string = $obj->display_id($new_display_id);
+ Function: Returns the display id, aka the common name of the Quality 
+	object.
+	The semantics of this is that it is the most likely string to be
+	used as an identifier of the quality sequence, and likely to have
+	"human" readability.  The id is equivalent to the ID field of the
+	GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
+	database. In fasta format, the >(\S+) is presumed to be the id,
+	though some people overload the id to embed other information.
+	Bioperl does not use any embedded information in the ID field,
+	and people are encouraged to use other mechanisms (accession field
+	for example, or extending the sequence object) to solve this.
+	Notice that $seq->id() maps to this function, mainly for 
+	legacy/convience issues
+ Returns : A string
+ Args    : If an arg is provided, it will replace the existing display_id
+	in the object.
+
+
+=cut
+
+sub display_id {
+   my ($self) = @_;
+
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of id - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of id - implementing class did not provide this method");
+   }
+
+}
+
+
+=head2 accession_number()
+
+ Title   : accession_number()
+ Usage   : $unique_biological_key = $obj->accession_number(); _or_
+	   $unique_biological_key = $obj->accession_number($new_acc_num);
+ Function: Returns the unique biological id for a sequence, commonly 
+	called the accession_number. For sequences from established 
+	databases, the implementors should try to use the correct 
+	accession number. Notice that primary_id() provides the unique id 
+	for the implemetation, allowing multiple objects to have the same
+	accession number in a particular implementation. For sequences
+	with no accession number, this method should return "unknown".
+ Returns : A string.
+ Args    : If an arg is provided, it will replace the existing
+	accession_number in the object.
+
+=cut
+
+sub accession_number {
+   my ($self, at args) = @_;
+
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of seq - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of seq - implementing class did not provide this method");
+   }
+
+}
+
+
+
+=head2 primary_id()
+
+ Title   : primary_id()
+ Usage   : $unique_implementation_key = $obj->primary_id(); _or_
+	   $unique_implementation_key = $obj->primary_id($new_prim_id);
+ Function: Returns the unique id for this object in this implementation.
+	This allows implementations to manage their own object ids in a
+	way the implementaiton can control clients can expect one id to
+	map to one object. For sequences with no accession number, this
+	method should return a stringified memory location.
+ Returns : A string
+ Args    : If an arg is provided, it will replace the existing
+	primary_id in the object.
+
+=cut
+
+sub primary_id {
+   my ($self, at args) = @_;
+
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+   }
+
+}
+
+
+=head2 can_call_new()
+
+ Title   : can_call_new()
+ Usage   : if( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	 }
+ Function: can_call_new returns 1 or 0 depending on whether an
+	implementation allows new constructor to be called. If a new
+	constructor is allowed, then it should take the followed hashed
+	constructor list.
+           $myobject->new( -qual => $quality_as_string,
+			   -display_id  => $id,
+			   -accession_number => $accession,
+			   );
+ Example :
+ Returns : 1 or 0
+ Args    :
+
+
+=cut
+
+sub can_call_new{
+   my ($self, at args) = @_;
+   # we default to 0 here
+   return 0;
+}
+
+=head2 qualat($position)
+
+ Title   : qualat($position)
+ Usage   : $quality = $obj->qualat(10);
+ Function: Return the quality value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the first
+        two bases of the sequence. Start cannot be larger than end but can
+        be equal.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub qualat {
+   my ($self,$value) = @_;
+   if( $self->can('warn') ) {
+       $self->warn("Bio::Seq::QualI definition of qualat - implementing class did not provide this method");
+   } else {
+       warn("Bio::Seq::QualI definition of qualat - implementing class did not provide this method");
+   }
+   return '';
+} 
+
+=head1 Optional Implementation Functions
+
+The following functions rely on the above functions. A implementing
+class does not need to provide these functions, as they will be
+provided by this class, but is free to override these functions.
+
+All of revcom(), trunc(), and translate() create new sequence
+objects. They will call new() on the class of the sequence object
+instance passed as argument, unless can_call_new() returns FALSE. In
+the latter case a Bio::PrimarySeq object will be created. Implementors
+which really want to control how objects are created (eg, for object
+persistence over a database, or objects in a CORBA framework), they
+are encouraged to override these methods
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : @rev = @{$qual->revcom()};
+ Function: Produces a new Bio::Seq::QualI implementing object which
+	is reversed from the original quality array.
+	The id is the same id as the orginal sequence, and the accession number
+	is also indentical. If someone wants to track that this sequence has
+	been reversed, it needs to define its own extensions
+
+	To do an inplace edit of an object you can go:
+
+	$qual = $qual->revcom();
+
+	This of course, causes Perl to handle the garbage collection of the old
+	object, but it is roughly speaking as efficient as an inplace edit.
+ Returns : A new (fresh) Bio::Seq::PrimaryQualI object
+ Args    : none
+
+=cut
+
+sub revcom{
+   my ($self) = @_;
+		# this is the cleanest way
+	my @qualities = @{$self->qual()};
+	my @reversed_qualities = reverse(@qualities);
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::Seq::PrimaryQual';
+		# Wassat?
+		# $self->_attempt_to_load_Seq();
+   }
+	# the \@reverse_qualities thing works simply because I will it to work.
+   my $out = $seqclass->new( '-qual' => \@reversed_qualities,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-desc' => $self->desc()
+			     );
+   return $out;
+}
+
+=head2 trunc()
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+ Returns : a fresh Bio::Seq::QualI implementing object
+ Args    : Two integers denoting first and last base of the sub-sequence.
+
+
+=cut
+
+sub trunc {
+   my ($self,$start,$end) = @_;
+
+   if( !$end ) {
+       if( $self->can('throw')  ) {
+	   $self->throw("trunc start,end");
+       } else {
+	   confess("[$self] trunc start,end");
+       }
+   }
+
+   if( $end < $start ) {
+       if( $self->can('throw')  ) {
+	   $self->throw("$end is smaller than $start. if you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
+       } else {
+	   confess("[$self] $end is smaller than $start. If you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
+       }
+   }
+
+   my $r_qual = $self->subqual($start,$end);
+
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::Seq::PrimaryQual';
+		# wassat?
+		# $self->_attempt_to_load_Seq();
+   }
+   my $out = $seqclass->new( '-qual' => $r_qual,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-desc' => $self->desc()
+			     );
+   return $out;
+}
+
+
+=head2 translate()
+
+ Title   : translate()
+ Usage   : $protein_seq_obj = $dna_seq_obj->translate
+           #if full CDS expected:
+           $protein_seq_obj = $cds_seq_obj->translate(undef,undef,undef,undef,1);
+ Function: Completely useless in this interface.
+ Returns : Nothing.
+ Args    : Nothing.
+
+=cut
+
+
+sub translate {
+	return 0;
+}
+
+
+=head2 id()
+
+ Title   : id()
+ Usage   : $id = $qual->id()
+ Function: ID of the quality. This should normally be (and actually is in
+           the implementation provided here) just a synonym for display_id().
+ Example :
+ Returns : A string.
+ Args    :
+
+
+=cut
+
+sub  id {
+   my ($self)= @_;
+   return $self->display_id();
+}
+
+=head2 length()
+
+ Title   : length()
+ Usage   : $length = $qual->length();
+ Function: Return the length of the array holding the quality values.
+        Under most circumstances, this should match the number of quality
+        values but no validation is done when the PrimaryQual object is
+        constructed and non-digits could be put into this array. Is this a
+        bug? Just enough rope...
+ Returns : A scalar (the number of elements in the quality array).
+ Args    : None.
+
+=cut
+
+sub length {
+   my ($self)= @_;
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::QualI definition of length - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::QualI definition of length - implementing class did not provide this method");
+   }
+}
+
+
+=head2 desc()
+
+ Title   : desc()
+ Usage   : $qual->desc($newval);
+           $description = $seq->desc();
+ Function: Get/set description text for a qual object
+ Example :
+ Returns : value of desc
+ Args    : newvalue (optional)
+
+=cut
+
+sub desc {
+   my ($self,$value) = @_;
+   if( $self->can('warn') ) {
+       $self->warn("Bio::Seq::QualI definition of desc - implementing class did not provide this method");
+   } else {
+       warn("Bio::Seq::QualI definition of desc - implementing class did not provide this method");
+   }
+   return '';
+}
+
+#  These methods are here for backward compatibility with the old, 0.5
+#  Seq objects. They all throw warnings that someone is using a
+#  deprecated method, and may eventually be removed completely from
+#  this object. However, they are important to ease the transition from
+#  the old system.
+
+=head1 Private functions
+
+These are some private functions for the PrimarySeqI interface. You do not
+need to implement these functions
+
+=head2 _attempt_to_load_Seq
+
+ Title   : _attempt_to_load_Seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _attempt_to_load_Seq{
+   my ($self) = @_;
+
+   if( $main::{'Bio::Seq::PrimaryQual'} ) {
+       return 1;
+   } else {
+       eval {
+	   require Bio::Seq::PrimaryQual;
+       };
+       if( $@ ) {
+	   if( $self->can('throw') ) {
+	       $self->throw("Bio::Seq::PrimaryQual could not be loaded for $self\nThis indicates that you are using Bio::Seq::PrimaryQualI without Bio::Seq::PrimaryQual loaded and without providing a complete solution\nThe most likely problem is that there has been a misconfiguration of the bioperl environment\nActual exception\n\n$@\n");
+	   } else {
+	       confess("Bio::Seq::PrimarySeq could not be loaded for $self\nThis indicates that you are usnig Bio::Seq::PrimaryQualI without Bio::Seq::PrimaryQual loaded and without providing a complete solution\nThe most likely problem is that there has been a misconfiguration of the bioperl environment\nActual exception\n\n$@\n");
+	   }
+	   return 0;
+       }
+       return 1;
+   }
+
+}
+
+
+=head2 qualtype()
+
+ Title   : qualtype()
+ Usage   : if( $obj->qualtype eq 'phd' ) { /Do Something/ }
+ Function: At this time, this function is not used for 
+	Bio::Seq::PrimaryQual objects. In fact, now it is a month later and
+	I just completed the Bio::Seq::SeqWithQuality object and this is
+	definitely deprecated.
+ Returns : Nothing. (not implemented)
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub qualtype {
+   my ($self, at args) = @_;
+   if( $self->can('throw') ) {
+	# $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+       $self->throw("qualtypetype is not used with quality objects.");
+   } else {
+	# confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
+	confess("qualtype is not used with quality objects.");
+   }
+
+
+}
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Quality.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Quality.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/Quality.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,475 @@
+# $Id: Quality.pm,v 1.10.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::Quality
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::Quality - Implementation of sequence with residue quality and trace values
+
+=head1 SYNOPSIS
+
+  use Bio::Seq::Quality;
+
+  # input can be space delimited string or array ref
+  my $qual = '0 1 2 3 4 5 6 7 8 9 11 12';
+  my $trace = '0 5 10 15 20 25 30 35 40 45 50 55';
+
+  my $seq = Bio::Seq::Quality->new
+      ( -qual => $qual,
+        -trace_indices => $trace,
+        -seq =>  'atcgatcgatcg',
+        -id  => 'human_id',
+        -accession_number => 'S000012',
+        -verbose => -1   # to silence deprecated methods
+  );
+
+  my $quals = $seq->qual; # array ref
+  my $traces = $seq->trace;  # array ref
+
+  my $quals = $seq->qual_text; # string
+  my $traces = $seq->trace_text; # string
+
+
+  # get sub values
+  $quals = $seq->subqual(2, 3);  # array ref
+  $traces = $seq->subtrace(2, 3); # array ref
+  $quals = $seq->subqual_text(2, 3); # string
+  $quals = $seq->subtrace_text(2, 3); # string
+
+  # set sub values 
+  $seq->subqual(2, 3, "9 9");
+  $seq->subtrace(2, 3, "9 9");
+
+
+
+=head1 DESCRIPTION
+
+This object stores base quality values together with the sequence
+string.
+
+It is a reimplementation of Chad Matsalla's Bio::Seq::SeqWithQuality
+module using Bio::Seq::MetaI. 
+
+The implementation is based on Bio::Seq::Meta::Array. qual() and
+trace() are base methods to store and retrieve information that have
+extensions to retrieve values as a scalar (e.g. qual_text() ), or get
+or set subvalues (e.g. subqual() ). See L<Bio::Seq::MetaI> for more details.
+
+All the functional code is in Bio::Seq::Meta::Array.
+
+There deprecated methods that are included for compatibility with
+Bio::Seq::SeqWithQuality. These will print a warning unless verbosity
+of the object is set to be less than zero.
+
+=head2 Differences from Bio::Seq::SeqWithQuality
+
+It is not possible to fully follow the interface of
+Bio::Seq::SeqWithQuality since internally a Bio::Seq::SeqWithQuality
+object is a composite of two independent objects: a Bio::PrimarySeq
+object and Bio::Seq::PrimaryQual object. Both of these objects can be
+created separately and merged into Bio::Seq::SeqWithQuality.
+
+This implementation is based on Bio::Seq::Meta::Array that is a subclass
+of Bio::PrimarySeq that stores any number of meta information in
+unnamed arrays.
+
+Here we assume that two meta sets, called 'qual' and 'trace_indices' are
+attached to a sequence. (But there is nothing that prevents you to add
+as many named meta sets as you need using normal meta() methods).
+
+qual() is an alias to meta(), qualat($loc) is an alias to
+submeta($loc,$loc).
+
+trace_indices() in Bio::Seq::SeqWithQuality has been abbreviated to
+trace() and is an alias to named_meta('trace').
+
+You can create an object without passing any arguments to the
+constructor (Bio::Seq::SeqWithQuality fails without alphabet). It will
+warn about not being able to set alphabet unless you set verbosity of
+the object to a negative value.
+
+After the latest rewrite, the meta information sets (quality and
+trace) no longer cover all the residues automatically. Methods to
+check the length of meta information (L<quality_length>,
+L<trace_length>)and to see if the ends are flushed to the sequence
+have been added (L<quality_is_flush>, L<trace_is_flush>). To force
+the old functinality, set L<force_flush> to true.
+
+qual_obj() and seq_obj() methods do not exist!
+
+Finally, there is only one set of descriptors (primary_id, display_id,
+accession_number) for the object.
+
+
+=head1 SEE ALSO
+
+L<Bio::Seq::MetaI>, 
+L<Bio::Seq::Meta::Array>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Chad Matsalla, bioinformatics at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::Quality;
+use vars qw($DEFAULT_NAME $GAP $META_GAP);
+use strict;
+
+#use overload '""' => \&to_string;
+
+use base qw(Bio::LocatableSeq Bio::Seq::Meta::Array);
+
+
+BEGIN {
+
+    $DEFAULT_NAME = 'DEFAULT';
+    $GAP = '-';
+    $META_GAP = ' ';
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : $metaseq = Bio::Seq::Quality->new
+	        ( -qual => '0 1 2 3 4 5 6 7 8 9 11 12',
+                  -trace => '0 5 10 15 20 25 30 35 40 45 50 55',
+                  -seq =>  'atcgatcgatcg',
+	          -id  => 'human_id',
+	          -accession_number => 'S000012',
+	        );
+ Function: Constructor for Bio::Seq::Quality class.
+           Note that you can provide an empty quality and trace strings.
+ Returns : a new Bio::Seq::Quality object
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my($meta, $qual, $trace, $trace_indices) =
+        $self->_rearrange([qw(META
+                              QUAL
+                              TRACE
+                              TRACE_INDICES
+                              )],
+                          @args);
+
+    $self->{'_meta'}->{$DEFAULT_NAME} = [];
+    $self->{'_meta'}->{'trace'} = [];
+
+    $meta && $self->meta($meta);
+    $qual && $self->qual($qual);
+    $trace && $self->named_meta('trace', $trace);
+    $trace_indices && $self->named_meta('trace', $trace_indices);
+
+    return $self;
+}
+
+
+=head2 qual
+
+ Title   : qual
+ Usage   : $qual_values  = $obj->qual($values_string);
+ Function:
+
+           Get and set method for the meta data starting from residue
+           position one. Since it is dependent on the length of the
+           sequence, it needs to be manipulated after the sequence.
+
+           The length of the returned value always matches the length
+           of the sequence.
+
+ Returns : reference to an array of meta data
+ Args    : new value, string or array ref or Bio::Seq::PrimaryQual, optional
+
+=cut
+
+sub qual {
+    my $self = shift;
+    my $value = shift;
+    $value = $value->qual if ref($value) and ref($value) ne 'ARRAY' and $value->isa('Bio::Seq::PrimaryQual');
+    $self->named_meta($DEFAULT_NAME, $value);
+}
+
+=head2 qual_text
+
+ Title   : qual_text
+ Usage   : $qual_values  = $obj->qual_text($values_arrayref);
+ Function: Variant of meta() and qual()  guarantied to return a string
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub qual_text {
+    return join ' ', @{shift->submeta(@_)};
+}
+
+=head2 subqual
+
+ Title   : subqual
+ Usage   : $subset_of_qual_values = $obj->subqual(10, 20, $value_string);
+           $subset_of_qual_values = $obj->subqual(10, undef, $value_string);
+ Function:
+
+           Get and set method for meta data for subsequences.
+
+           Numbering starts from 1 and the number is inclusive, ie 1-2
+           are the first two residue of the sequence. Start cannot be
+           larger than end but can be equal.
+
+           If the second argument is missing the returned values
+           should extend to the end of the sequence.
+
+ Returns : A reference to an array
+ Args    : integer, start position
+           integer, end position, optional when a third argument present
+           new value, optional
+
+=cut
+
+sub subqual {
+   shift->named_submeta($DEFAULT_NAME, @_);
+}
+
+=head2 subqual_text
+
+ Title   : subqual_text
+ Usage   : $meta_values  = $obj->subqual_text(20, $value_string);
+ Function: Variant of subqual() returning a stringified
+           representation  of meta data. For details, see L<Bio::Seq::MetaI>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub subqual_text {
+    return join ' ', @{shift->submeta(@_)};
+}
+
+
+=head2 quality_length
+
+ Title   : quality_length()
+ Usage   : $qual_len  = $obj->quality_length();
+ Function: return the number of elements in the quality array
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub quality_length {
+   my ($self) = @_;
+   return $self->named_meta_length('DEFAULT');
+}
+
+
+
+=head2 quality_is_flush
+
+ Title   : quality_is_flush
+ Usage   : $quality_is_flush  = $obj->quality_is_flush()
+ Function: Boolean to tell if the trace length equals the sequence length.
+           Returns true if force_flush() is set.
+ Returns : boolean 1 or 0
+ Args    : none
+
+=cut
+
+sub quality_is_flush {
+    return shift->is_flush('quality');
+}
+
+
+=head2 trace
+
+ Title   : trace
+ Usage   : $trace_values  = $obj->trace($values_string);
+ Function:
+
+           Get and set method for the meta data starting from residue
+           position one. Since it is dependent on the length of the
+           sequence, it needs to be manipulated after the sequence.
+
+           The length of the returned value always matches the length
+           of the sequence.
+
+ Returns : reference to an array of meta data
+ Args    : new value, string or array ref, optional
+
+=cut
+
+sub trace {
+    return shift->named_meta('trace', shift);
+}
+
+=head2 trace_text
+
+ Title   : trace_text
+ Usage   : $trace_values  = $obj->trace_text($values_arrayref);
+ Function: Variant of meta() and trace()  guarantied to return a string
+           representation  of meta data. For details, see L<meta>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub trace_text {
+    return join ' ', @{shift->named_submeta('trace', @_)};
+}
+
+=head2 subtrace
+
+ Title   : subtrace
+ Usage   : $subset_of_trace_values = $obj->subtrace(10, 20, $value_string);
+           $subset_of_trace_values = $obj->subtrace(10, undef, $value_string);
+ Function:
+
+           Get and set method for meta data for subsequences.
+
+           Numbering starts from 1 and the number is inclusive, ie 1-2
+           are the first two residue of the sequence. Start cannot be
+           larger than end but can be equal.
+
+           If the second argument is missing the returned values
+           should extend to the end of the sequence.
+
+ Returns : A reference to an array
+ Args    : integer, start position
+           integer, end position, optional when a third argument present
+           new value, optional
+
+
+=cut
+
+sub subtrace {
+    return shift->named_submeta('trace', @_);
+}
+
+=head2 subtrace_text
+
+ Title   : subtrace_text
+ Usage   : $meta_values  = $obj->subtrace_text(20, $value_string);
+ Function: Variant of subtrace() returning a stringified
+           representation  of meta data. For details, see L<Bio::Seq::MetaI>.
+ Returns : a string
+ Args    : new value, optional
+
+=cut
+
+sub subtrace_text {
+    return join ' ', @{shift->named_submeta('trace', @_)};
+}
+
+
+=head2 trace_length
+
+ Title   : trace_length()
+ Usage   : $trace_len  = $obj->trace_length();
+ Function: return the number of elements in the trace set
+ Returns : integer
+ Args    : -
+
+=cut
+
+sub trace_length {
+   my ($self) = @_;
+   return $self->named_meta_length('trace');
+}
+
+=head2 trace_is_flush
+
+ Title   : trace_is_flush
+ Usage   : $trace_is_flush  = $obj->trace_is_flush()
+ Function: Boolean to tell if the trace length equals the sequence length.
+           Returns true if force_flush() is set.
+ Returns : boolean 1 or 0
+ Args    : none
+
+=cut
+
+sub trace_is_flush {
+    return shift->is_flush('trace');
+}
+
+
+################## deprecated methdods ##################
+
+
+sub trace_indices {
+    my $self = shift;
+    return $self->named_meta('trace');
+}
+
+sub trace_index_at {
+    my ($self, $val) =@_;
+    return shift @{$self->named_submeta('trace', $val, $val)};
+}
+
+
+sub sub_trace_index {
+    my $self = shift; 
+    return $self->named_submeta('trace', @_);
+}
+
+
+sub qualat {
+    my ($self, $val) =@_;
+    return shift @{$self->submeta($val, $val)};
+}
+
+
+sub baseat {
+    my ($self,$val) = @_;
+    return $self->subseq($val,$val);
+}
+
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,465 @@
+# $Id: RichSeq.pm,v 1.19.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::RichSeq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::RichSeq - Module implementing a sequence created from a rich
+sequence database entry
+
+=head1 SYNOPSIS
+
+See L<Bio::Seq::RichSeqI> and documentation of methods.
+
+=head1 DESCRIPTION
+
+This module implements Bio::Seq::RichSeqI, an interface for sequences
+created from or created for entries from/of rich sequence databanks,
+like EMBL, GenBank, and SwissProt. Methods added to the Bio::SeqI
+interface therefore focus on databank-specific information. Note that
+not every rich databank format may use all of the properties provided.
+
+=head1 Implemented Interfaces
+
+This class implementes the following interfaces.
+
+=over 4
+
+=item Bio::Seq::RichSeqI
+
+Note that this includes implementing Bio::PrimarySeqI and Bio::SeqI.
+
+=item Bio::IdentifiableI
+
+=item Bio::DescribableI
+
+=item Bio::AnnotatableI
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::RichSeq;
+use vars qw($AUTOLOAD);
+use strict;
+
+
+
+use base qw(Bio::Seq Bio::Seq::RichSeqI);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : $seq    = Bio::Seq::RichSeq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+                                             -id  => 'human_id',
+				             -accession_number => 'AL000012',
+				            );
+
+ Function: Returns a new seq object from
+           basic constructors, being a string for the sequence
+           and strings for id and accession_number
+ Returns : a new Bio::Seq::RichSeq object
+
+=cut
+
+sub new {
+    # standard new call..
+    my($caller, at args) = @_;
+    my $self = $caller->SUPER::new(@args);
+    
+    $self->{'_dates'} = [];
+    $self->{'_secondary_accession'} = [];
+
+    my ($dates, $xtra, $sv,
+	$keywords, $pid, $mol, 
+	$division ) = $self->_rearrange([qw(DATES 
+					    SECONDARY_ACCESSIONS
+					    SEQ_VERSION 
+					    KEYWORDS
+					    PID
+					    MOLECULE
+					    DIVISION
+					    )],
+					@args);
+    defined $division && $self->division($division);
+    defined $mol && $self->molecule($mol);
+    if(defined($keywords)) {
+	if(ref($keywords) && (ref($keywords) eq "ARRAY")) {
+	    $self->add_keyword(@$keywords);
+	} else {
+	    # got a string - use the old API
+	    $self->keywords($keywords);
+	}
+    }
+    defined $sv && $self->seq_version($sv);
+    defined $pid && $self->pid($pid);
+
+    if( defined $dates ) {
+	if( ref($dates) eq "ARRAY" ) {
+	    foreach ( @$dates) {
+		$self->add_date($_);
+	    } 
+	} else { 
+	    $self->add_date($dates);
+	}
+    }
+
+    if( defined $xtra ) {
+	if( ref($xtra) eq "ARRAY" ) {
+	    foreach ( @$xtra) {
+		$self->add_secondary_accession($_);
+	    } 
+	} else { 
+	    $self->add_secondary_accession($xtra);
+	}
+    }
+    
+    return $self;
+}
+
+
+=head2 division
+
+ Title   : division
+ Usage   : $obj->division($newval)
+ Function: 
+ Returns : value of division
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub division {
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_division'} = $value;
+    }
+    return $obj->{'_division'};
+
+}
+
+=head2 molecule
+
+ Title   : molecule
+ Usage   : $obj->molecule($newval)
+ Function: 
+ Returns : type of molecule (DNA, mRNA)
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub molecule {
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_molecule'} = $value;
+    }
+    return $obj->{'_molecule'};
+
+}
+
+=head2 add_date
+
+ Title   : add_date
+ Usage   : $self->add_date($datestr)
+ Function: adds one or more dates
+
+           This implementation stores dates as keyed annotation, the
+           key being 'date_changed'. You can take advantage of this
+           fact when accessing the annotation collection directly.
+
+ Example :
+ Returns : 
+ Args    : a date string or an array of such strings
+
+
+=cut
+
+sub add_date {
+    return shift->_add_annotation_value('date_changed', at _);
+}
+
+=head2 get_dates
+
+ Title   : get_dates
+ Usage   : my @dates = $seq->get_dates;
+ Function: Get the dates of the sequence (usually, when it was created and
+           changed.
+ Returns : an array of date strings
+ Args    :
+
+
+=cut
+
+sub get_dates{
+    return shift->_get_annotation_values('date_changed');
+}
+
+
+=head2 pid
+
+ Title   : pid
+ Usage   : my $pid = $seq->pid();
+ Function: Get (and set, depending on the implementation) the PID property
+           for the sequence.
+ Returns : a string
+ Args    :
+
+
+=cut
+
+sub pid{
+    my $self = shift;
+
+    return $self->{'_pid'} = shift if @_;
+    return $self->{'_pid'};
+}
+
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $obj->accession($newval)
+ Function: Whilst the underlying sequence object does not 
+           have an accession, so we need one here.
+
+           In this implementation this is merely a synonym for
+           accession_number().
+ Example : 
+ Returns : value of accession
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub accession {
+   my ($obj, at args) = @_;
+   return $obj->accession_number(@args);
+}
+
+=head2 add_secondary_accession
+
+ Title   : add_secondary_accession
+ Usage   : $self->add_domment($ref)
+ Function: adds a secondary_accession
+
+           This implementation stores secondary accession numbers as
+           keyed annotation, the key being 'secondary_accession'. You
+           can take advantage of this fact when accessing the
+           annotation collection directly.
+
+ Example :
+ Returns : 
+ Args    : a string or an array of strings
+
+
+=cut
+
+sub add_secondary_accession {
+    return shift->_add_annotation_value('secondary_accession', at _);
+}
+
+=head2 get_secondary_accessions
+
+ Title   : get_secondary_accessions
+ Usage   : my @acc = $seq->get_secondary_accessions();
+ Function: Get the secondary accession numbers as strings.
+ Returns : An array of strings
+ Args    : none
+
+
+=cut
+
+sub get_secondary_accessions{
+    return shift->_get_annotation_values('secondary_accession');
+}
+
+=head2 seq_version
+
+ Title   : seq_version
+ Usage   : $obj->seq_version($newval)
+ Function: Get/set the sequence version
+ Returns : value of seq_version (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub seq_version{
+    my $self = shift;
+
+    return $self->{'_seq_version'} = shift if @_;
+    return $self->{'_seq_version'};
+}
+
+
+=head2 add_keyword
+
+ Title   : add_keyword
+ Usage   : $obj->add_keyword($newval)
+ Function: Add a new keyword to the annotation of the sequence.
+
+           This implementation stores keywords as keyed annotation,
+           the key being 'keyword'. You can take advantage of this
+           fact when accessing the annotation collection directly.
+
+ Returns : 
+ Args    : value to be added (optional) (a string)
+
+
+=cut
+
+sub add_keyword {
+    return shift->_add_annotation_value('keyword', at _);
+}
+
+=head2 get_keywords
+
+ Title   : get_keywords
+ Usage   : $obj->get_keywords($newval)
+ Function: Get the keywords for this sequence as an array of strings.
+ Returns : an array of strings
+ Args    : 
+
+
+=cut
+
+sub get_keywords {
+    return shift->_get_annotation_values('keyword');
+}
+
+=head1 Private methods and synonyms for backward compatibility
+
+=cut
+
+=head2 _add_annotation_value
+
+ Title   : _add_annotation_value
+ Usage   :
+ Function: Adds a value to the annotation collection under the specified
+           key. Note that this is not a public method.
+ Returns : 
+ Args    : key (a string), value(s) (one or more scalars)
+
+
+=cut
+
+sub _add_annotation_value{
+    my $self = shift;
+    my $key  = shift;
+
+    foreach my $val (@_) {
+	$self->annotation->add_Annotation(
+			Bio::Annotation::SimpleValue->new(-tagname => $key,
+							  -value => $val)
+					  );
+    }
+}
+
+=head2 _get_annotation_values
+
+ Title   : _get_annotation_values
+ Usage   :
+ Function: Gets the values of a specific annotation as identified by the
+           key from the annotation collection. Note that this is not a
+           public method.
+ Example :
+ Returns : an array of strings
+ Args    : the key (a string)
+
+
+=cut
+
+sub _get_annotation_values{
+    my $self = shift;
+
+    return map { $_->value(); } $self->annotation->get_Annotations(shift);
+}
+
+#
+##
+### Deprecated methods kept for ease of transition
+##
+#
+
+sub keywords {
+    my $self = shift;
+
+    # have we been called in set mode?
+    if(@_) {
+	# yes; translate to the new API
+	foreach my $kwd (@_) {
+	    $self->add_keyword(split(/\s*;\s*/,$kwd));
+	}
+    } else {
+	# no; translate read-only to the new API
+	return join("; ",$self->get_keywords());
+    }
+}
+
+sub each_date {
+   my ($self) = @_;
+   $self->warn("Deprecated method... please use get_dates");
+   return $self->get_dates;
+}
+
+
+sub each_secondary_accession {
+   my ($self) = @_;
+   $self->warn("each_secondary_accession - deprecated method. use get_secondary_accessions");
+   return $self->get_secondary_accessions;
+
+}
+
+sub sv {
+   my ($obj,$value) = @_;
+   $obj->warn("sv - deprecated method. use seq_version");
+   $obj->seq_version($value);
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/RichSeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,229 @@
+# $Id: RichSeqI.pm,v 1.14.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::RichSeqI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::RichSeqI - interface for sequences from rich data sources, mostly databases
+
+=head1 SYNOPSIS
+
+    @secondary   = $richseq->get_secondary_accessions;
+    $division    = $richseq->division;
+    $mol         = $richseq->molecule;
+    @dates       = $richseq->get_dates;
+    $seq_version = $richseq->seq_version;
+    $pid         = $richseq->pid;
+    @keywords    = $richseq->get_keywords;
+
+=head1 DESCRIPTION
+
+This interface extends the Bio::SeqI interface to give additional functionality
+to sequences with richer data sources, in particular from database sequences 
+(EMBL, GenBank and Swissprot).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::RichSeqI;
+use strict;
+
+use base qw(Bio::SeqI);
+
+
+=head2 get_secondary_accessions
+
+ Title   : get_secondary_accessions
+ Usage   : 
+ Function: Get the secondary accessions for a sequence.
+
+           An implementation that allows modification of this array
+           property should provide the methods add_secondary_accession
+           and remove_secondary_accessions, with obvious purpose.
+
+ Example :
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub get_secondary_accessions{
+   my ($self, at args) = @_;
+
+   $self->throw("hit get_secondary_accessions in interface definition - error");
+
+}
+
+
+=head2 division
+
+ Title   : division
+ Usage   :
+ Function: Get (and set, depending on the implementation) the divison for
+           a sequence.
+
+           Examples from GenBank are PLN (plants), PRI (primates), etc.
+ Example :
+ Returns : a string
+ Args    :
+
+
+=cut
+
+sub division{
+   my ($self, at args) = @_;
+
+   $self->throw("hit division in interface definition - error");
+
+}
+
+
+=head2 molecule
+
+ Title   : molecule
+ Usage   :
+ Function: Get (and set, depending on the implementation) the molecule
+           type for the sequence.
+
+           This is not necessarily the same as Bio::PrimarySeqI::alphabet(),
+           because it is databank-specific.
+ Example :
+ Returns : a string
+ Args    :
+
+
+=cut
+
+sub molecule{
+   my ($self, at args) = @_;
+
+   $self->throw("hit molecule in interface definition - error");
+}
+
+=head2 pid
+
+ Title   : pid
+ Usage   :
+ Function: Get (and set, depending on the implementation) the PID property
+           for the sequence.
+ Example :
+ Returns : a string
+ Args    :
+
+
+=cut
+
+sub pid {
+   my ($self, at args) = @_;
+
+   $self->throw("hit pid in interface definition - error");
+}
+
+=head2 get_dates
+
+ Title   : get_dates
+ Usage   :
+ Function: Get (and set, depending on the implementation) the dates the
+           databank entry specified for the sequence
+
+           An implementation that allows modification of this array
+           property should provide the methods add_date and
+           remove_dates, with obvious purpose.
+
+ Example :
+ Returns : an array of strings
+ Args    :
+
+
+=cut
+
+sub get_dates{
+   my ($self, at args) = @_;
+
+   $self->throw("hit get_dates in interface definition - error");
+
+}
+
+
+=head2 seq_version
+
+ Title   : seq_version
+ Usage   :
+ Function: Get (and set, depending on the implementation) the version string
+           of the sequence.
+ Example :
+ Returns : a string
+ Args    :
+
+
+=cut
+
+sub seq_version{
+   my ($self, at args) = @_;
+
+   $self->throw("hit seq_version in interface definition - error");
+
+}
+
+=head2 get_keywords
+
+ Title   : get_keywords
+ Usage   : $obj->get_keywords()
+ Function: Get the keywords for this sequence object.
+
+           An implementation that allows modification of this array
+           property should provide the methods add_keyword and
+           remove_keywords, with obvious purpose.
+
+ Returns : an array of strings
+ Args    : 
+
+
+=cut
+
+sub get_keywords {
+   my ($self) = @_;
+   $self->throw("hit keywords in interface definition - error");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqBuilder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqBuilder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqBuilder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,651 @@
+# $Id: SeqBuilder.pm,v 1.9.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::SeqBuilder
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
+
+=head1 SYNOPSIS
+
+   use Bio::SeqIO;
+
+   # usually you won't instantiate this yourself - a SeqIO object -
+   # you will have one already
+   my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
+   my $builder = $seqin->sequence_builder();
+
+   # if you need only sequence, id, and description (e.g. for 
+   # conversion to FASTA format):
+   $builder->want_none();
+   $builder->add_wanted_slot('display_id','desc','seq');
+
+   # if you want everything except the sequence and features
+   $builder->want_all(1); # this is the default if it's untouched
+   $builder->add_unwanted_slot('seq','features');
+
+   # if you want only human sequences shorter than 5kb and skip all
+   # others
+   $builder->add_object_condition(sub {
+       my $h = shift;
+       return 0 if $h->{'-length'} > 5000;
+       return 0 if exists($h->{'-species'}) &&
+                   ($h->{'-species'}->binomial() ne "Homo sapiens");
+       return 1;
+   });
+
+   # when you are finished with configuring the builder, just use
+   # the SeqIO API as you would normally
+   while(my $seq = $seqin->next_seq()) {
+       # do something
+   }
+
+=head1 DESCRIPTION
+
+This is an implementation of L<Bio::Factory::ObjectBuilderI> used by
+parsers of rich sequence streams. It provides for a relatively
+easy-to-use configurator of the parsing flow.
+
+Configuring the parsing process may be for you if you need much less
+information, or much less sequence, than the stream actually
+contains. Configuration can in both cases speed up the parsing time
+considerably, because unwanted sections or the rest of unwanted
+sequences are skipped over by the parser. This configuration could
+also conserve memory if you're running out of available RAM.
+
+See the methods of the class-specific implementation section for
+further documentation of what can be configured.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::SeqBuilder;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
+
+my %slot_param_map = ("add_SeqFeature" => "features",
+		      );
+my %param_slot_map = ("features"       => "add_SeqFeature",
+		      );
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Seq::SeqBuilder();
+ Function: Builds a new Bio::Seq::SeqBuilder object 
+ Returns : an instance of Bio::Seq::SeqBuilder
+ Args    :
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    $self->{'wanted_slots'} = [];
+    $self->{'unwanted_slots'} = [];
+    $self->{'object_conds'} = [];
+    $self->{'_objhash'} = {};
+    $self->want_all(1);
+
+    return $self;
+}
+
+=head1 Methods for implementing L<Bio::Factory::ObjectBuilderI>
+
+=cut
+
+=head2 want_slot
+
+ Title   : want_slot
+ Usage   :
+ Function: Whether or not the object builder wants to populate the
+           specified slot of the object to be built.
+
+           The slot can be specified either as the name of the
+           respective method, or the initialization parameter that
+           would be otherwise passed to new() of the object to be
+           built.
+
+           Note that usually only the parser will call this
+           method. Use add_wanted_slots and add_unwanted_slots for
+           configuration.
+
+ Example :
+ Returns : TRUE if the object builder wants to populate the slot, and
+           FALSE otherwise.
+ Args    : the name of the slot (a string)
+
+
+=cut
+
+sub want_slot{
+	my ($self,$slot) = @_;
+	my $ok = 0;
+
+	$slot = substr($slot,1) if substr($slot,0,1) eq '-';
+	if($self->want_all()) {
+	foreach ($self->get_unwanted_slots()) {
+		# this always overrides in want-all mode
+		return 0 if($slot eq $_);
+	}
+	if(! exists($self->{'_objskel'})) {
+		$self->{'_objskel'} = $self->sequence_factory->create_object();
+	}
+	if(exists($param_slot_map{$slot})) {
+		$ok = $self->{'_objskel'}->can($param_slot_map{$slot});
+	} else {
+		$ok = $self->{'_objskel'}->can($slot);
+	}
+	return $ok if $ok;
+	# even if the object 'cannot' do this slot, it might have been
+	# added to the list of wanted slot, so carry on
+}
+	foreach ($self->get_wanted_slots()) {
+		if($slot eq $_) {
+			$ok = 1;
+			last;
+		}
+	}
+	return $ok;
+}
+
+=head2 add_slot_value
+
+ Title   : add_slot_value
+ Usage   :
+ Function: Adds one or more values to the specified slot of the object
+           to be built.
+
+           Naming the slot is the same as for want_slot().
+
+           The object builder may further filter the content to be
+           set, or even completely ignore the request.
+
+           If this method reports failure, the caller should not add
+           more values to the same slot. In addition, the caller may
+           find it appropriate to abandon the object being built
+           altogether.
+
+           This implementation will allow the caller to overwrite the
+           return value from want_slot(), because the slot is not
+           checked against want_slot().
+
+           Note that usually only the parser will call this method,
+           but you may call it from anywhere if you know what you are
+           doing. A derived class may be used to further manipulate
+           the value to be added.
+
+ Example :
+ Returns : TRUE on success, and FALSE otherwise
+ Args    : the name of the slot (a string)
+           parameters determining the value to be set
+
+                 OR
+
+           alternatively, a list of slotname/value pairs in the style
+           of named parameters as they would be passed to new(), where
+           each element at an even index is the parameter (slot) name
+           starting with a dash, and each element at an odd index is
+           the value of the preceding name.
+
+=cut
+
+sub add_slot_value{
+	my ($self,$slot, at args) = @_;
+
+	my $h = $self->{'_objhash'};
+	return unless $h;
+	# multiple named parameter variant of calling?
+	if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
+		unshift(@args, $slot);
+		while(@args) {
+			my $key = shift(@args);
+			$h->{$key} = shift(@args);
+		}
+	} else {
+		if($slot eq 'add_SeqFeature') {
+			$slot = '-'.$slot_param_map{$slot};
+			$h->{$slot} = [] unless $h->{$slot};
+			push(@{$h->{$slot}}, @args);
+		} else {
+			$slot = '-'.$slot unless substr($slot,0,1) eq '-';
+			$h->{$slot} = $args[0];
+		}
+	}
+	return 1;
+}
+
+=head2 want_object
+
+ Title   : want_object
+ Usage   :
+ Function: Whether or not the object builder is still interested in
+           continuing with the object being built.
+
+           If this method returns FALSE, the caller should not add any
+           more values to slots, or otherwise risks that the builder
+           throws an exception. In addition, make_object() is likely
+           to return undef after this method returned FALSE.
+
+           Note that usually only the parser will call this
+           method. Use add_object_condition for configuration.
+
+ Example :
+ Returns : TRUE if the object builder wants to continue building
+           the present object, and FALSE otherwise.
+ Args    : none
+
+=cut
+
+sub want_object{
+	my $self = shift;
+
+	my $ok = 1;
+	foreach my $cond ($self->get_object_conditions()) {
+		$ok = &$cond($self->{'_objhash'});
+		last unless $ok;
+	}
+	delete $self->{'_objhash'} unless $ok;
+	return $ok;
+}
+
+=head2 make_object
+
+ Title   : make_object
+ Usage   :
+ Function: Get the built object.
+
+           This method is allowed to return undef if no value has ever
+           been added since the last call to make_object(), or if
+           want_object() returned FALSE (or would have returned FALSE)
+           before calling this method.
+
+           For an implementation that allows consecutive building of
+           objects, a caller must call this method once, and only
+           once, between subsequent objects to be built. I.e., a call
+           to make_object implies 'end_object.'
+
+ Example :
+ Returns : the object that was built
+ Args    : none
+
+=cut
+
+sub make_object{
+	my $self = shift;
+
+	my $obj;
+	if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
+		$obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
+	}
+	$self->{'_objhash'} = {}; # reset
+	return $obj;
+}
+
+=head1 Implementation specific methods
+
+These methods allow to conveniently configure this sequence object
+builder as to which slots are desired, and under which circumstances a
+sequence object should be abandoned altogether. The default mode is
+want_all(1), which means the builder will report all slots as wanted
+that the object created by the sequence factory supports.
+
+You can add specific slots you want through add_wanted_slots(). In
+most cases, you will want to call want_none() before in order to relax
+zero acceptance through a list of wanted slots.
+
+Alternatively, you can add specific unwanted slots through
+add_unwanted_slots(). In this case, you will usually want to call
+want_all(1) before (which is the default if you never touched the
+builder) to restrict unrestricted acceptance.
+
+I.e., want_all(1) means want all slots except for the unwanted, and
+want_none() means only those explicitly wanted.
+
+If a slot is in both the unwanted and the wanted list, the following
+rules hold. In want-all mode, the unwanted list overrules. In
+want-none mode, the wanted list overrides the unwanted list. If this
+is confusing to you, just try to avoid having slots at the same time
+in the wanted and the unwanted lists.
+
+=cut
+
+=head2 get_wanted_slots
+
+ Title   : get_wanted_slots
+ Usage   : $obj->get_wanted_slots($newval)
+ Function: Get the list of wanted slots
+ Example : 
+ Returns : a list of strings
+ Args    : 
+
+
+=cut
+
+sub get_wanted_slots{
+	my $self = shift;
+
+	return @{$self->{'wanted_slots'}};
+}
+
+=head2 add_wanted_slot
+
+ Title   : add_wanted_slot
+ Usage   :
+ Function: Adds the specified slots to the list of wanted slots.
+ Example :
+ Returns : TRUE
+ Args    : an array of slot names (strings)
+
+=cut
+
+sub add_wanted_slot{
+	my ($self, at slots) = @_;
+
+	my $myslots = $self->{'wanted_slots'};
+	foreach my $slot (@slots) {
+		if(! grep { $slot eq $_; } @$myslots) {
+			push(@$myslots, $slot);
+		}
+	}
+	return 1;
+}
+
+=head2 remove_wanted_slots
+
+ Title   : remove_wanted_slots
+ Usage   :
+ Function: Removes all wanted slots added previously through
+           add_wanted_slots().
+ Example :
+ Returns : the previous list of wanted slot names
+ Args    : none
+
+=cut
+
+sub remove_wanted_slots{
+	my $self = shift;
+	my @slots = $self->get_wanted_slots();
+	$self->{'wanted_slots'} = [];
+	return @slots;
+}
+
+=head2 get_unwanted_slots
+
+ Title   : get_unwanted_slots
+ Usage   : $obj->get_unwanted_slots($newval)
+ Function: Get the list of unwanted slots.
+ Example : 
+ Returns : a list of strings
+ Args    : none
+
+=cut
+
+sub get_unwanted_slots{
+	my $self = shift;
+
+	return @{$self->{'unwanted_slots'}};
+}
+
+=head2 add_unwanted_slot
+
+ Title   : add_unwanted_slot
+ Usage   :
+ Function: Adds the specified slots to the list of unwanted slots.
+ Example :
+ Returns : TRUE
+ Args    : an array of slot names (strings)
+
+=cut
+
+sub add_unwanted_slot{
+	my ($self, at slots) = @_;
+
+	my $myslots = $self->{'unwanted_slots'};
+	foreach my $slot (@slots) {
+		if(! grep { $slot eq $_; } @$myslots) {
+			push(@$myslots, $slot);
+		}
+	}
+	return 1;
+}
+
+=head2 remove_unwanted_slots
+
+ Title   : remove_unwanted_slots
+ Usage   :
+ Function: Removes the list of unwanted slots added previously through
+           add_unwanted_slots().
+ Example :
+ Returns : the previous list of unwanted slot names
+ Args    : none
+
+=cut
+
+sub remove_unwanted_slots{
+	my $self = shift;
+	my @slots = $self->get_unwanted_slots();
+	$self->{'unwanted_slots'} = [];
+	return @slots;
+}
+
+=head2 want_none
+
+ Title   : want_none
+ Usage   :
+ Function: Disables all slots. After calling this method, want_slot()
+           will return FALSE regardless of slot name.
+
+           This is different from removed_wanted_slots() in that it
+           also sets want_all() to FALSE. Note that it also resets the
+           list of unwanted slots in order to avoid slots being in
+           both lists.
+
+ Example :
+ Returns : TRUE
+ Args    : none
+
+=cut
+
+sub want_none{
+	my $self = shift;
+
+	$self->want_all(0);
+	$self->remove_wanted_slots();
+	$self->remove_unwanted_slots();
+	return 1;
+}
+
+=head2 want_all
+
+ Title   : want_all
+ Usage   : $obj->want_all($newval)
+ Function: Whether or not this sequence object builder wants to
+           populate all slots that the object has. Whether an object
+           supports a slot is generally determined by what can()
+           returns. You can add additional 'virtual' slots by calling
+           add_wanted_slot.
+
+           This will be ON by default. Call $obj->want_none() to
+           disable all slots.
+
+ Example : 
+ Returns : TRUE if this builder wants to populate all slots, and
+           FALSE otherwise.
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub want_all{
+	my $self = shift;
+
+	return $self->{'want_all'} = shift if @_;
+	return $self->{'want_all'};
+}
+
+=head2 get_object_conditions
+
+ Title   : get_object_conditions
+ Usage   :
+ Function: Get the list of conditions an object must meet in order to
+           be 'wanted.' See want_object() for where this is used.
+
+           Conditions in this implementation are closures (anonymous
+           functions) which are passed one parameter, a hash reference
+           the keys of which are equal to initialization
+           paramaters. The closure must return TRUE to make the object
+           'wanted.'
+
+           Conditions will be implicitly ANDed.
+
+ Example :
+ Returns : a list of closures
+ Args    : none
+
+=cut
+
+sub get_object_conditions{
+	my $self = shift;
+
+	return @{$self->{'object_conds'}};
+}
+
+=head2 add_object_condition
+
+ Title   : add_object_condition
+ Usage   :
+ Function: Adds a condition an object must meet in order to be 'wanted.'
+           See want_object() for where this is used.
+
+           Conditions in this implementation must be closures
+           (anonymous functions). These will be passed one parameter,
+           which is a hash reference with the sequence object
+           initialization paramters being the keys.
+
+           Conditions are implicitly ANDed. If you want other
+           operators, perform those tests inside of one closure
+           instead of multiple.  This will also be more efficient.
+
+ Example :
+ Returns : TRUE
+ Args    : the list of conditions
+
+=cut
+
+sub add_object_condition{
+	my ($self, at conds) = @_;
+
+	if(grep { ref($_) ne 'CODE'; } @conds) {
+		$self->throw("conditions against which to validate an object ".
+						 "must be anonymous code blocks");
+	}
+	push(@{$self->{'object_conds'}}, @conds);
+	return 1;
+}
+
+=head2 remove_object_conditions
+
+ Title   : remove_object_conditions
+ Usage   :
+ Function: Removes the conditions an object must meet in order to be
+           'wanted.'
+ Example :
+ Returns : The list of previously set conditions (an array of closures)
+ Args    : none
+
+=cut
+
+sub remove_object_conditions{
+	my $self = shift;
+	my @conds = $self->get_object_conditions();
+	$self->{'object_conds'} = [];
+	return @conds;
+}
+
+=head1 Methods to control what type of object is built
+
+=cut
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $obj->sequence_factory($newval)
+ Function: Get/set the sequence factory to be used by this object
+           builder.
+ Example : 
+ Returns : the Bio::Factory::SequenceFactoryI implementing object to use
+ Args    : on set, new value (a Bio::Factory::SequenceFactoryI
+           implementing object or undef, optional)
+
+=cut
+
+sub sequence_factory{
+	my $self = shift;
+
+	if(@_) {
+		delete $self->{'_objskel'};
+		return $self->{'sequence_factory'} = shift;
+	}
+	return $self->{'sequence_factory'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,146 @@
+# $Id: SeqFactory.pm,v 1.11.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::SeqFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::SeqFactory - Instantiates a new Bio::PrimarySeqI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Seq::SeqFactory;
+    my $factory = new Bio::Seq::SeqFactory;
+    my $seq = $factory->create(-seq => 'WYRAVLC',
+			       -id  => 'name');
+
+    # If you want the factory to create Bio::Seq objects instead
+    # of the default Bio::PrimarySeq objects, use the -type parameter:
+
+    my $factory = new Bio::Seq::SeqFactory(-type => 'Bio::Seq');
+
+
+=head1 DESCRIPTION
+
+This object will build L<Bio::PrimarySeqI> and L<Bio::SeqI> objects
+generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::SeqFactory;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::Factory::SequenceFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Seq::SeqFactory();
+ Function: Builds a new Bio::Seq::SeqFactory object 
+ Returns : Bio::Seq::SeqFactory
+ Args    : -type => string, name of a PrimarySeqI derived class
+                    This is optional. Default=Bio::PrimarySeq.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  my ($type) = $self->_rearrange([qw(TYPE)], @args);
+  if( ! defined $type ) { 
+      $type = 'Bio::PrimarySeq';
+  }
+  $self->type($type);
+  return $self;
+}
+
+
+=head2 create
+
+ Title   : create
+ Usage   : my $seq = $seqbuilder->create(-seq => 'CAGT', -id => 'name');
+ Function: Instantiates new Bio::SeqI (or one of its child classes)
+           This object allows us to genericize the instantiation of sequence
+           objects.
+ Returns : Bio::PrimarySeq object (default)
+           The return type is configurable using new(-type =>"...").
+ Args    : initialization parameters specific to the type of sequence
+           object we want.  Typically 
+           -seq        => $str,
+           -display_id => $name
+
+=cut
+
+sub create {
+   my ($self, at args) = @_;
+   return $self->type->new(-verbose => $self->verbose, @args);
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $obj->type($newval)
+ Function: 
+ Returns : value of type
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub type{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       eval "require $value";
+       if( $@ ) { $self->throw("$@: Unrecognized Sequence type for SeqFactory '$value'");}
+       
+       my $a = bless {},$value;
+       unless( $a->isa('Bio::PrimarySeqI') ||
+	       $a->isa('Bio::Seq::QualI') ) {
+	   $self->throw("Must provide a valid Bio::PrimarySeqI or Bio::Seq::QualI or child class to SeqFactory Not $value");
+       }
+      $self->{'type'} = $value;
+    }
+    return $self->{'type'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFastaSpeedFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFastaSpeedFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqFastaSpeedFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,137 @@
+# $Id: SeqFastaSpeedFactory.pm,v 1.7.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::SeqFastaSpeedFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::SeqFastaSpeedFactory - Instantiates a new Bio::PrimarySeqI (or derived class) through a factory
+
+=head1 SYNOPSIS
+
+    use Bio::Seq::SeqFastaSpeedFactory;
+    my $factory = new Bio::Seq::SeqFastaSpeedFactory;
+    my $seq = $factory->create(-seq => 'WYRAVLC',
+			       -id  => 'name');
+
+    # If you want the factory to create Bio::Seq objects instead
+    # of the default Bio::PrimarySeq objects, use the -type parameter:
+
+    my $factory = new Bio::Seq::SeqFastaSpeedFactory(-type => 'Bio::Seq');
+
+
+=head1 DESCRIPTION
+
+This object will build Bio::Seq objects generically.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::SeqFastaSpeedFactory;
+use strict;
+
+use Bio::Seq;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root Bio::Factory::SequenceFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Seq::SeqFastaSpeedFactory();
+ Function: Builds a new Bio::Seq::SeqFastaSpeedFactory object 
+ Returns : Bio::Seq::SeqFastaSpeedFactory
+ Args    : -type => string, name of a PrimarySeqI derived class
+                    This is optional. Default=Bio::PrimarySeq.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  return $self;
+}
+
+
+=head2 create
+
+ Title   : create
+ Usage   : my $seq = $seqbuilder->create(-seq => 'CAGT', -id => 'name');
+ Function: Instantiates a new Bio::Seq object, correctly built but very
+           fast, knowing stuff about Bio::PrimarySeq and Bio::Seq
+ Returns : Bio::Seq
+
+ Args    : initialization parameters specific to the type of sequence
+           object we want.  Typically 
+           -seq        => $str,
+           -id         => $name
+
+=cut
+
+sub create {
+    my ($self, at args) = @_;
+    
+    my %param = @args;
+    @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+    
+    my $sequence = $param{'-seq'};
+    my $fulldesc = $param{'-desc'};
+    my $id       = $param{'-id'} || $param{'-primary_id'};
+    my $alphabet = $param{'-alphabet'};
+
+    my $seq = bless {}, "Bio::Seq";
+    my $t_pseq = $seq->{'primary_seq'} = bless {}, "Bio::PrimarySeq";
+    $t_pseq->{'seq'}  = $sequence;
+    $t_pseq->{'desc'} = $fulldesc;
+    $t_pseq->{'display_id'} = $id;
+    $t_pseq->{'primary_id'} = $id;
+    $seq->{'primary_id'} = $id; # currently Bio::Seq does not delegate this
+    if( $sequence and !$alphabet ) {
+	$t_pseq->_guess_alphabet();
+    } elsif ( $sequence and $alphabet ) {
+        $t_pseq->{'alphabet'} = $alphabet;
+    }
+
+    return $seq;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqWithQuality.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqWithQuality.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SeqWithQuality.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,916 @@
+# $Id: SeqWithQuality.pm,v 1.27.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::QualI
+#
+# Cared for by Chad Matsalla <bioinformatics at dieselwurks.com
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::SeqWithQuality - Bioperl object packaging a sequence with its quality
+
+=head1 SYNOPSIS
+
+	use Bio::PrimarySeq;
+	use Bio::Seq::PrimaryQual;
+        use Bio::Seq::SeqWithQuality;
+
+		# make from memory
+	my $qual = Bio::Seq::SeqWithQuality->new
+		( -qual => '10 20 30 40 50 50 20 10',
+		  -seq => 'ATCGATCG',
+		  -id  => 'human_id',
+		  -accession_number => 'AL000012',
+		);
+
+		# make from objects
+		# first, make a PrimarySeq object
+	my $seqobj = Bio::PrimarySeq->new
+		( -seq => 'atcgatcg',
+		  -id  => 'GeneFragment-12',
+		  -accession_number => 'X78121',
+		  -alphabet => 'dna'
+		);
+
+		# now make a PrimaryQual object
+	my $qualobj = Bio::Seq::PrimaryQual->new
+		( -qual => '10 20 30 40 50 50 20 10',
+		  -id  => 'GeneFragment-12',
+		  -accession_number => 'X78121',
+		  -alphabet => 'dna'
+		);
+
+		# now make the SeqWithQuality object
+	my $swqobj = Bio::Seq::SeqWithQuality->new
+		( -seq  => $seqobj,
+		  -qual => $qualobj
+		);
+		# done!
+
+	$swqobj->id(); # the id of the SeqWithQuality object
+			# may not match the the id of the sequence or
+			# of the quality (check the pod, luke)
+	$swqobj->seq(); # the sequence of the SeqWithQuality object
+	$swqobj->qual(); # the quality of the SeqWithQuality object
+
+         # to get out parts of the sequence.
+
+         print "Sequence ", $seqobj->id(), " with accession ",
+		$seqobj->accession, " and desc ", $seqobj->desc, "\n";
+
+         $string2 = $seqobj->subseq(1,40);
+
+=head1 DESCRIPTION
+
+This object stores base quality values together with the sequence string.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics at dieselwurks.com
+
+=head1 CONTRIBUTORS 
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::Seq::SeqWithQuality;
+
+
+use strict;
+use Bio::PrimarySeq;
+use Bio::Seq::PrimaryQual;
+
+use base qw(Bio::Root::Root Bio::PrimarySeqI Bio::Seq::QualI);
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $qual = Bio::Seq::SeqWithQuality ->new
+		( -qual => '10 20 30 40 50 50 20 10',
+		  -seq => 'ATCGATCG',
+		  -id  => 'human_id',
+		  -accession_number => 'AL000012',
+                  -trace_indices    => '0 5 10 15 20 25 30 35'
+		);
+ Function: Returns a new Bio::Seq::SeqWithQual object from basic
+           constructors.
+ Returns : a new Bio::Seq::PrimaryQual object
+ Notes   : Arguments:
+	-qual can be a quality string (see Bio::Seq::PrimaryQual for more
+	information on this) or a reference to a Bio::Seq::PrimaryQual
+	object.
+	-seq can be a sequence string (see Bio::PrimarySeq for more
+	information on this) or a reference to a Bio::PrimaryQual object.
+	-seq, -id, -accession_number, -primary_id, -desc, -id behave like
+	this:
+	1. if they are provided on construction of the
+	Bio::Seq::SeqWithQuality they will be set as the descriptors for
+	the object unless changed by one of the following mechanisms:
+	a) $obj->set_common_descriptors() is used and both the -seq and
+	  the -qual object have the same descriptors. These common
+	  descriptors will then become the descriptors for the
+	  Bio::Seq::SeqWithQual object.
+	b) the descriptors are manually set using the seq(), id(),
+		desc(), or accession_number(), primary_id(),
+	2. if no descriptors are provided, the new() constructor will see
+		if the descriptor used in the PrimarySeq and in the
+		PrimaryQual objects match. If they do, they will become
+		the descriptors for the SeqWithQuality object.
+
+	To eliminate ambiguity, I strongly suggest you set the
+	descriptors manually on construction of the object. Really.
+     -trace_indices : a space_delimited list of trace indices
+         (where would the peaks be drawn if this list of qualities
+          was to be plotted?)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	# default: turn OFF the warnings
+	$self->{supress_warnings} = 1;
+    my($qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet,$trace_indices) =
+	$self->_rearrange([qw(
+			      QUAL
+			      SEQ
+			      DISPLAY_ID
+			      ACCESSION_NUMBER
+			      PRIMARY_ID
+			      DESC
+			      ID
+			      ALPHABET
+                              TRACE_INDICES
+			      )],
+			  @args);
+    # first, deal with the sequence and quality information
+    if ( defined $id && defined $given_id ) {
+	if( $id ne $given_id ) {
+	    $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
+	}
+    } 
+    if( defined $given_id ) {
+	$self->display_id($given_id);
+	$id = $given_id;
+    } 
+    if (!$seq) {
+	my $id;
+	unless ($self->{supress_warnings} == 1) {
+		$self->warn("You did not provide sequence information during the construction of a Bio::Seq::SeqWithQuality object. Sequence components for this object will be empty.");
+	}
+	if (!$alphabet) {
+	    $self->throw("If you want me to create a PrimarySeq object for your empty sequence <boggle> you must specify a -alphabet to satisfy the constructor requirements for a Bio::PrimarySeq object with no sequence. Read the POD for it, luke.");		
+	}
+	$self->{seq_ref} = Bio::PrimarySeq->new
+	    (
+	     -seq		=>  "",
+	     -accession_number	=>  $acc,
+	     -primary_id	=>  $pid,
+	     -desc		=>  $desc,
+	     -display_id	=>  $id,
+	     -alphabet          =>  $alphabet
+	     );
+    }
+    elsif (UNIVERSAL::isa($seq,"Bio::PrimarySeq") ||  UNIVERSAL::isa($seq,"Bio::Seq")) {
+	$self->{seq_ref} = $seq;
+    }
+	elsif (ref($seq)) {
+		$self->throw("You passed a seq argument into a SeqWithQUality object and it was a reference ($seq) which did not inherit from Bio::Seq or Bio::PrimarySeq. I don't know what to do with this.");
+	}
+
+    else {
+	my $seqobj = Bio::PrimarySeq->new
+	    (
+	     -seq		=>	$seq,
+	     -accession_number	=>	$acc,
+	     -primary_id	=>	$pid,
+	     -desc		=>	$desc,
+	     -display_id	=>	$id,
+	     );
+	$self->{seq_ref} = $seqobj;
+    }
+
+    if (!defined($qual)) {
+	$self->{qual_ref} = Bio::Seq::PrimaryQual->new
+	    (
+	     -qual		=>	"",
+	     -accession_number	=>	$acc,
+	     -primary_id	=>	$pid,
+	     -desc		=>	$desc,
+	     -display_id	=>	$id,
+	     );
+    }
+    elsif (ref($qual) eq "Bio::Seq::PrimaryQual") {
+	$self->{qual_ref} = $qual;
+    }
+    else {
+	my $qualobj = Bio::Seq::PrimaryQual->new
+	    (
+	     -qual		=>	$qual,
+	     -accession_number	=>	$acc,
+	     -primary_id	=>	$pid,
+	     -desc		=>	$desc,
+	     -display_id	=>	$id,
+          -trace_indices =>   $trace_indices
+	     );
+	$self->{qual_ref} = $qualobj;
+    }
+
+    # now try to set the descriptors for this object
+    $self->_set_descriptors($qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet);
+    $self->length();
+    $self->deprecated("deprecated class- use Bio::Seq::Quality instead");
+
+    return $self;
+}
+
+=head2 _common_id()
+
+ Title   : _common_id()
+ Usage   : $common_id = $self->_common_id();
+ Function: Compare the display_id of {qual_ref} and {seq_ref}.
+ Returns : Nothing if they don't match. If they do return
+	   {seq_ref}->display_id()
+ Args    : None.
+
+=cut
+
+#'
+sub _common_id {
+	my $self = shift;
+	return if (!$self->{seq_ref} || !$self->{qual_ref});
+	my $sid = $self->{seq_ref}->display_id();
+	return if (!$sid);
+	return if (!$self->{qual_ref}->display_id());
+	return $sid if ($sid eq $self->{qual_ref}->display_id());
+		# should this become a warning?
+		# print("ids $sid and $self->{qual_ref}->display_id() do not match. Bummer.\n");
+}
+
+=head2 _common_display_id()
+
+ Title   : _common_id()
+ Usage   : $common_id = $self->_common_display_id();
+ Function: Compare the display_id of {qual_ref} and {seq_ref}.
+ Returns : Nothing if they don't match. If they do return
+	   {seq_ref}->display_id()
+ Args    : None.
+
+=cut
+
+#'
+sub _common_display_id {
+	my $self = shift;
+	$self->common_id();
+}
+
+=head2 _common_accession_number()
+
+ Title   : _common_accession_number()
+ Usage   : $common_id = $self->_common_accession_number();
+ Function: Compare the accession_number() of {qual_ref} and {seq_ref}.
+ Returns : Nothing if they don't match. If they do return
+	   {seq_ref}->accession_number()
+ Args    : None.
+
+=cut
+
+#'
+sub _common_accession_number {
+	my $self = shift;
+	return if ($self->{seq_ref} || $self->{qual_ref});
+	my $acc = $self->{seq_ref}->accession_number();
+		# if (!$acc) { print("the seqref has no acc.\n"); }
+	return if (!$acc);
+		# if ($acc eq $self->{qual_ref}->accession_number()) { print("$acc matches ".$self->{qual_ref}->accession_number()."\n"); }
+	return $acc if ($acc eq $self->{qual_ref}->accession_number());
+		# should this become a warning?
+		# print("accession numbers $acc and $self->{qual_ref}->accession_number() do not match. Bummer.\n");
+}
+
+=head2 _common_primary_id()
+
+ Title   : _common_primary_id()
+ Usage   : $common_primard_id = $self->_common_primary_id();
+ Function: Compare the primary_id of {qual_ref} and {seq_ref}.
+ Returns : Nothing if they don't match. If they do return
+	   {seq_ref}->primary_id()
+ Args    : None.
+
+=cut
+
+#'
+sub _common_primary_id {
+	my $self = shift;
+	return if ($self->{seq_ref} || $self->{qual_ref});
+	my $pid = $self->{seq_ref}->primary_id();
+	return if (!$pid);
+	return $pid if ($pid eq $self->{qual_ref}->primary_id());
+		# should this become a warning?
+		# print("primary_ids $pid and $self->{qual_ref}->primary_id() do not match. Bummer.\n");
+
+}
+
+=head2 _common_desc()
+
+ Title   : _common_desc()
+ Usage   : $common_desc = $self->_common_desc();
+ Function: Compare the desc of {qual_ref} and {seq_ref}.
+ Returns : Nothing if they don't match. If they do return
+	   {seq_ref}->desc()
+ Args    : None.
+
+=cut
+
+#'
+sub _common_desc {
+	my $self = shift;
+	return if ($self->{seq_ref} || $self->{qual_ref});
+	my $des = $self->{seq_ref}->desc();
+	return if (!$des);
+	return $des if ($des eq $self->{qual_ref}->desc());
+		# should this become a warning?
+		# print("descriptions $des and $self->{qual_ref}->desc() do not match. Bummer.\n");
+
+}
+
+=head2 set_common_descriptors()
+
+ Title   : set_common_descriptors()
+ Usage   : $self->set_common_descriptors();
+ Function: Compare the descriptors (id,accession_number,display_id,
+	primary_id, desc) for the PrimarySeq and PrimaryQual objects
+	within the SeqWithQuality object. If they match, make that
+	descriptor the descriptor for the SeqWithQuality object.
+ Returns : Nothing.
+ Args    : None.
+
+=cut
+
+sub set_common_descriptors {
+	my $self = shift;
+	return if ($self->{seq_ref} || $self->{qual_ref});
+	&_common_id();
+	&_common_display_id();
+	&_common_accession_number();
+	&_common_primary_id();
+	&_common_desc();
+}
+
+=head2 alphabet()
+
+ Title   : alphabet();
+ Usage   : $molecule_type = $obj->alphabet();
+ Function: Get the molecule type from the PrimarySeq object.
+ Returns : What what PrimarySeq says the type of the sequence is.
+ Args    : None.
+
+=cut
+
+sub alphabet {
+	my $self = shift;
+	return $self->{seq_ref}->alphabet();	
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id_string = $obj->display_id();
+ Function: Returns the display id, aka the common name of the Quality
+        object.
+        The semantics of this is that it is the most likely string to be
+        used as an identifier of the quality sequence, and likely to have
+        "human" readability.  The id is equivalent to the ID field of the
+        GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
+        database. In fasta format, the >(\S+) is presumed to be the id,
+        though some people overload the id to embed other information.
+        Bioperl does not use any embedded information in the ID field,
+        and people are encouraged to use other mechanisms (accession
+	field for example, or extending the sequence object) to solve
+	this. Notice that $seq->id() maps to this function, mainly for
+        legacy/convience issues.
+	This method sets the display_id for the SeqWithQuality object.
+ Returns : A string
+ Args    : If a scalar is provided, it is set as the new display_id for
+	the SeqWithQuality object.
+ Status  : Virtual
+
+=cut
+
+sub display_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'display_id'} = $value;
+    }
+    return $obj->{'display_id'};
+
+}
+
+=head2 accession_number()
+
+ Title   : accession_number()
+ Usage   : $unique_biological_key = $obj->accession_number();
+ Function: Returns the unique biological id for a sequence, commonly
+        called the accession_number. For sequences from established
+        databases, the implementors should try to use the correct
+        accession number. Notice that primary_id() provides the unique id
+        for the implemetation, allowing multiple objects to have the same
+        accession number in a particular implementation. For sequences
+        with no accession number, this method should return "unknown".
+	This method sets the accession_number for the SeqWithQuality
+	object. 
+ Returns : A string (the value of accession_number)
+ Args    : If a scalar is provided, it is set as the new accession_number
+	for the SeqWithQuality object.
+ Status  : Virtual
+
+
+=cut
+
+sub accession_number {
+    my( $obj, $acc ) = @_;
+
+    if (defined $acc) {
+        $obj->{'accession_number'} = $acc;
+    } else {
+        $acc = $obj->{'accession_number'};
+        $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id()
+
+ Title   : primary_id()
+ Usage   : $unique_implementation_key = $obj->primary_id();
+ Function: Returns the unique id for this object in this implementation.
+        This allows implementations to manage their own object ids in a
+        way the implementaiton can control clients can expect one id to
+        map to one object. For sequences with no accession number, this
+        method should return a stringified memory location.
+	This method sets the primary_id for the SeqWithQuality
+	object.
+ Returns : A string. (the value of primary_id)
+ Args    : If a scalar is provided, it is set as the new primary_id for
+	the SeqWithQuality object.
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+   if ($value) {
+      $obj->{'primary_id'} = $value;
+    }
+   return $obj->{'primary_id'};
+
+}
+
+=head2 desc()
+
+ Title   : desc()
+ Usage   : $qual->desc($newval); _or_ 
+           $description = $qual->desc();
+ Function: Get/set description text for this SeqWithQuality object.
+ Returns : A string. (the value of desc)
+ Args    : If a scalar is provided, it is set as the new desc for the
+	SeqWithQuality object.
+
+=cut
+
+sub desc {
+	# a mechanism to set the disc for the SeqWithQuality object.
+	# probably will be used most often by set_common_features()
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'desc'} = $value;
+    }
+    return $obj->{'desc'};
+}
+
+=head2 id()
+
+ Title   : id()
+ Usage   : $id = $qual->id();
+ Function: Return the ID of the quality. This should normally be (and
+        actually is in the implementation provided here) just a synonym
+        for display_id().
+ Returns : A string. (the value of id)
+ Args    : If a scalar is provided, it is set as the new id for the
+	SeqWithQuality object.
+
+=cut
+
+sub id {
+   my ($self,$value) = @_;
+   if (!$self) { $self->throw("no value for self in $value"); }
+   if( defined $value ) {
+       return $self->display_id($value);
+   }
+   return $self->display_id();
+}
+
+=head2 seq
+
+ Title   : seq()
+ Usage   : $string    = $obj->seq(); _or_
+	$obj->seq("atctatcatca");
+ Function: Returns the sequence that is contained in the imbedded in the
+	PrimarySeq object within the SeqWithQuality object
+ Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
+ Args    : If a scalar is provided, the SeqWithQuality object will
+	attempt to set that as the sequence for the imbedded PrimarySeq
+	object. Otherwise, the value of seq() for the PrimarySeq object
+	is returned.
+ Notes   : This is probably not a good idea because you then should call
+	length() to make sure that the sequence and quality are of the
+	same length. Even then, how can you make sure that this sequence
+	belongs with that quality? I provided this to give you rope to
+	hang yourself with. Tie it to a strong device and use a good
+	knot.
+
+=cut
+
+sub seq {
+	my ($self,$value) = @_;
+	if( defined $value) {
+		$self->{seq_ref}->seq($value);
+		$self->length();
+	}
+	return $self->{seq_ref}->seq();
+}
+
+=head2 qual()
+
+ Title   : qual()
+ Usage   : @quality_values  = @{$obj->qual()}; _or_
+	$obj->qual("10 10 20 40 50");
+ Function: Returns the quality as imbedded in the PrimaryQual object
+	within the SeqWithQuality object.
+ Returns : A reference to an array containing the quality values in the 
+	PrimaryQual object.
+ Args    : If a scalar is provided, the SeqWithQuality object will
+	attempt to set that as the quality for the imbedded PrimaryQual
+	object. Otherwise, the value of qual() for the PrimaryQual
+	object is returned.
+ Notes   : This is probably not a good idea because you then should call
+	length() to make sure that the sequence and quality are of the
+	same length. Even then, how can you make sure that this sequence
+	belongs with that quality? I provided this to give you a strong
+	board with which to flagellate yourself. 
+
+=cut
+
+sub qual {
+	my ($self,$value) = @_;
+
+	if( defined $value) {
+		$self->{qual_ref}->qual($value);
+			# update the lengths
+		$self->length();
+	}
+	return $self->{qual_ref}->qual();
+}
+
+
+
+=head2 trace_indices()
+
+ Title   : trace_indices()
+ Usage   : @trace_indice_values  = @{$obj->trace_indices()}; _or_
+	$obj->trace_indices("10 10 20 40 50");
+ Function: Returns the trace_indices as imbedded in the Primaryqual object
+	within the SeqWithQualiity object.
+ Returns : A reference to an array containing the trace_indice values in the 
+	PrimaryQual object.
+ Args    : If a scalar is provided, the SeqWithuQuality object will
+	attempt to set that as the trace_indices for the imbedded PrimaryQual
+	object. Otherwise, the value of trace_indices() for the PrimaryQual
+	object is returned.
+ Notes   : This is probably not a good idea because you then should call
+	length() to make sure that the sequence and trace_indices are of the
+	same length. Even then, how can you make sure that this sequence
+	belongs with that trace_indicex? I provided this to give you a strong
+	board with which to flagellate yourself. 
+
+=cut
+
+sub trace_indices {
+	my ($self,$value) = @_;
+
+	if( defined $value) {
+		$self->{qual_ref}->trace_indices($value);
+			# update the lengths
+		$self->length();
+	}
+	return $self->{qual_ref}->trace_indices();
+}
+
+
+
+
+=head2 length()
+
+ Title   : length()
+ Usage   : $length = $seqWqual->length();
+ Function: Get the length of the SeqWithQuality sequence/quality.
+ Returns : Returns the length of the sequence and quality if they are
+	both the same. Returns "DIFFERENT" if they differ.
+ Args    : None.
+
+=cut
+
+sub length {
+    my $self = shift;
+    if (!$self->{seq_ref}) {
+	unless ($self->{supress_warnings} == 1) {
+	    $self->warn("Can't find {seq_ref} here in length().");
+	}
+	return;
+    }
+    if (!$self->{qual_ref}) {
+	unless ($self->{supress_warnings} == 1) {
+	    $self->warn("Can't find {qual_ref} here in length().");
+	}
+	return;
+    }
+    my $seql = $self->{seq_ref}->length();
+
+    if ($seql != $self->{qual_ref}->length()) {
+	unless ($self->{supress_warnings} == 1) {
+	    $self->warn("Sequence length (".$seql.") is different from quality length (".$self->{qual_ref}->length().") in the SeqWithQuality object. This can only lead to problems later.");		
+	}
+	$self->{'length'} = "DIFFERENT";
+    }
+    else {
+	$self->{'length'} = $seql;
+    }
+    return $self->{'length'};
+}
+
+
+=head2 qual_obj
+
+ Title   : qual_obj($different_obj)
+ Usage   : $qualobj = $seqWqual->qual_obj(); _or_
+	$qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
+ Function: Get the PrimaryQual object that is imbedded in the
+	SeqWithQuality object or if a reference to a PrimaryQual object
+	is provided, set this as the PrimaryQual object imbedded in the
+	SeqWithQuality object.
+ Returns : A reference to a Bio::Seq::SeqWithQuality object.
+
+=cut
+
+sub qual_obj {
+    my ($self,$value) = @_;
+    if (defined($value)) {
+	if (ref($value) eq "Bio::Seq::PrimaryQual") {
+	    $self->{qual_ref} = $value;
+	    
+	    $self->debug("You successfully changed the PrimaryQual object within a SeqWithQuality object. ID's for the SeqWithQuality object may now not be what you expect. Use something like set_common_descriptors() to fix them if you care,");	    
+	}	
+	else {
+	    $self->debug("You tried to change the PrimaryQual object within a SeqWithQuality object but you passed a reference to an object that was not a Bio::Seq::PrimaryQual object. Thus your change failed. Sorry.\n");	    
+	}
+    }
+    return $self->{qual_ref};
+}
+
+
+=head2 seq_obj
+
+ Title   : seq_obj()
+ Usage   : $seqobj = $seqWqual->qual_obj(); _or_
+	$seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
+ Function: Get the PrimarySeq object that is imbedded in the
+	SeqWithQuality object or if a reference to a PrimarySeq object is
+	provided, set this as the PrimarySeq object imbedded in the
+	SeqWithQuality object.
+ Returns : A reference to a Bio::PrimarySeq object.
+
+=cut
+
+sub seq_obj {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (ref($value) eq "Bio::PrimarySeq") {
+	    $self->debug("You successfully changed the PrimarySeq object within a SeqWithQuality object. ID's for the SeqWithQuality object may now not be what you expect. Use something like set_common_descriptors() to fix them if you care,");
+	} else {
+	    $self->debug("You tried to change the PrimarySeq object within a SeqWithQuality object but you passed a reference to an object that was not a Bio::PrimarySeq object. Thus your change failed. Sorry.\n");
+	}
+    }
+    return $self->{seq_ref};
+}
+
+=head2 _set_descriptors
+
+ Title   : _set_descriptors()
+ Usage   : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
+	$alphabet);
+ Function: Set the descriptors for the SeqWithQuality object. Try to
+	match the descriptors in the PrimarySeq object and in the
+	PrimaryQual object if descriptors were not provided with
+	construction.
+ Returns : Nothing.
+ Args    : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
+	in the new() method.
+ Notes   : Really only intended to be called by the new() method. If
+	you want to invoke a similar function try
+	set_common_descriptors().
+
+=cut
+
+
+sub _set_descriptors {
+    my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
+    my ($c_id,$c_acc,$c_pid,$c_desc);
+    if (!$self->display_id()) {
+	if ($c_id = $self->_common_id() ) { $self->display_id($c_id); }
+	else {
+	    if ($self->{seq_ref}) {
+		# print("Using seq_ref to set id to ".$self->{seq_ref}->display_id()."\n");
+		# ::dumpValue($self->{seq_ref});
+		$self->display_id($self->{seq_ref}->id());
+	    }
+	    elsif ($self->{qual_ref}) {
+		$self->display_id($self->{qual_ref}->id());
+	    }
+	}
+    }
+    if ($acc) { $self->accession_number($acc); }
+    elsif ($c_acc = $self->_common_accession_number() ) { $self->accession_number($c_acc); }
+    if ($pid) { $self->primary_id($pid); }
+    elsif ($c_pid = $self->_common_primary_id() ) { $self->primary_id($c_pid); }
+    if ($desc) { $self->desc($desc); }
+    elsif ($c_desc = $self->_common_desc() ) { $self->desc($c_desc); }
+}
+
+=head2 subseq($start,$end)
+
+ Title   : subseq($start,$end)
+ Usage   : $subsequence = $obj->subseq($start,$end);
+ Function: Returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence.
+ Returns : A string.
+ Args    : Two positions.
+
+=cut
+
+sub subseq {
+    my ($self, at args) = @_;
+    # does a single value work?
+    return $self->{seq_ref}->subseq(@args);	
+}
+
+=head2 baseat($position)
+
+ Title   : baseat($position)
+ Usage   : $base_at_position_6 = $obj->baseat("6");
+ Function: Returns a single base at the given position, where the first
+	base is 1 and the number is inclusive, ie 1-2 are the first two
+	bases of the sequence.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub baseat {
+    my ($self,$val) = @_;
+    return $self->{seq_ref}->subseq($val,$val);
+}
+
+=head2 subqual($start,$end)
+
+ Title   : subqual($start,$end)
+ Usage   : @qualities = @{$obj->subqual(10,20);
+ Function: returns the quality values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+sub subqual {
+    my ($self, at args) = @_;
+    return $self->{qual_ref}->subqual(@args);
+}
+
+=head2 qualat($position)
+
+ Title   : qualat($position)
+ Usage   : $quality = $obj->qualat(10);
+ Function: Return the quality value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub qualat {
+    my ($self,$val) = @_;
+    return $self->{qual_ref}->qualat($val);
+}
+
+=head2 sub_trace_index($start,$end)
+
+ Title   : sub_trace_index($start,$end)
+ Usage   : @trace_indices = @{$obj->sub_trace_index(10,20);
+ Function: returns the trace index values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be e_trace_index.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+sub sub_trace_index {
+    my ($self, at args) = @_;
+    return $self->{qual_ref}->sub_trace_index(@args);
+}
+
+=head2 trace_index_at($position)
+
+ Title   : trace_index_at($position)
+ Usage   : $trace_index = $obj->trace_index_at(10);
+ Function: Return the trace_index value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be etrace_index_.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub trace_index_at {
+    my ($self,$val) = @_;
+    return $self->{qual_ref}->trace_index_at($val);
+}
+
+=head2 to_string()
+
+ Title   : to_string()
+ Usage   : $quality = $obj->to_string();
+ Function: Return a textual representation of what the object contains.
+	For this module, this function will return:
+                qual
+		seq
+                display_id
+                accession_number
+                primary_id
+                desc
+                id
+                length_sequence
+		length_quality
+ Returns : A scalar.
+ Args    : None.
+
+=cut
+
+sub to_string {
+        my ($self,$out,$result) = shift;
+        $out = "qual: ".join(',',@{$self->qual()})."\n";
+        foreach (qw(seq display_id accession_number primary_id desc id)) {
+                $result = $self->$_();
+                if (!$result) { $result = "<unset>"; }
+                $out .= "$_: $result\n";
+        }
+        return $out;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SequenceTrace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SequenceTrace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SequenceTrace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1099 @@
+# $Id: SequenceTrace.pm,v 1.14.4.1 2006/10/02 23:10:27 sendu Exp $
+#
+# BioPerl module for Bio::Seq::SequenceTrace
+#
+# Cared for by Chad Matsalla <bioinformatics at dieselwurks.com
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace
+
+=head1 SYNOPSIS
+
+  # example code here
+
+=head1 DESCRIPTION
+
+This object stores a sequence with its trace.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics at dieselwurks.com
+
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::Seq::SequenceTrace;
+
+
+use strict;
+use Bio::Seq::QualI;
+use Bio::PrimarySeqI;
+use Bio::PrimarySeq;
+use Bio::Seq::PrimaryQual;
+use Dumpvalue();
+
+my $dumper = new Dumpvalue();
+
+use base qw(Bio::Root::Root Bio::Seq::Quality Bio::Seq::TraceI);
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $st = Bio::Seq::SequenceTrace->new
+     (    -swq =>   Bio::Seq::SequenceWithQuality,
+          -trace_a  =>   \@trace_values_for_a_channel,
+          -trace_t  =>   \@trace_values_for_t_channel,
+          -trace_g  =>   \@trace_values_for_g_channel,
+          -trace_c  =>   \@trace_values_for_c_channel,
+          -accuracy_a    =>   \@a_accuracies,
+          -accuracy_t    =>   \@t_accuracies,
+          -accuracy_g    =>   \@g_accuracies,
+          -accuracy_c    =>   \@c_accuracies,
+          -peak_indices    => '0 5 10 15 20 25 30 35'
+     );
+ Function: Returns a new Bio::Seq::SequenceTrace object from basic
+        constructors.
+ Returns : a new Bio::Seq::SequenceTrace object
+Arguments: I think that these are all describes in the usage above.
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+	# default: turn OFF the warnings
+	$self->{supress_warnings} = 1;
+    my($swq,$peak_indices,$trace_a,$trace_t,
+          $trace_g,$trace_c,$acc_a,$acc_t,$acc_g,$acc_c) =
+          $self->_rearrange([qw(
+               SWQ
+               PEAK_INDICES
+               TRACE_A
+               TRACE_T
+               TRACE_G
+               TRACE_C
+               ACCURACY_A
+               ACCURACY_T
+               ACCURACY_G
+               ACCURACY_C )], @args);
+          # first, deal with the sequence and quality information
+     if ($swq && ref($swq) eq "Bio::Seq::Quality") {
+          $self->{swq} = $swq;
+     }
+     else {
+          $self->throw("A Bio::Seq::SequenceTrace object must be created with a
+               Bio::Seq::Quality object. You provided this type of object: "
+               .ref($swq));
+     }
+     if (!$acc_a) {
+          # this means that you probably did not provide traces and accuracies
+          # and that they need to be synthesized
+          $self->set_accuracies();
+     }
+     else {
+          $self->accuracies('a',$acc_a);
+          $self->accuracies('t',$acc_t);
+          $self->accuracies('g',$acc_g);
+          $self->accuracies('c',$acc_c);
+     }
+     if (!$trace_a) {
+          $self->_synthesize_traces();
+     }
+     else {
+          $self->trace('a',$trace_a);
+          $self->trace('t',$trace_t);
+          $self->trace('g',$trace_g);
+          $self->trace('c',$trace_c);
+          $self->peak_indices($peak_indices);
+     }
+     $self->id($self->swq_obj()->id());
+    return $self;
+}
+
+sub swq_obj {
+     my $self = shift;
+     return $self->{swq};
+}
+
+
+
+=head2 trace($base,\@new_values)
+
+ Title   : trace($base,\@new_values)
+ Usage   : @trace_Values  = @{$obj->trace($base,\@new_values)};
+ Function: Returns the trace values as a reference to an array containing the
+     trace values. The individual elements of the trace array are not validated
+     and can be any numeric value.
+ Returns : A reference to an array.
+ Status  : 
+Arguments: $base : which color channel would you like the trace values for?
+               - $base must be one of "A","T","G","C"
+          \@new_values : a reference to an array of values containing trace
+               data for this base
+
+=cut
+
+sub trace {
+   my ($self,$base_channel,$values) = @_;
+     if (!$base_channel) {
+          $self->throw('You must provide a valid base channel (atgc) to use trace()');
+     }
+     $base_channel =~ tr/A-Z/a-z/;
+     if ($base_channel !~ /[acgt]/) {
+          $self->throw('You must provide a valid base channel (atgc) to use trace()');
+     }
+     if ($values) {
+             if (ref($values) eq "ARRAY") {
+                  $self->{trace}->{$base_channel} = $values;
+             }
+             else {
+                    my @trace = split(' ',$values);
+                  $self->{trace}->{$base_channel} = \@trace;
+             }
+     }
+     if ($self->{trace}->{$base_channel}) {
+          return $self->{trace}->{$base_channel};
+     }
+     else {
+          return;
+     }
+}
+
+
+=head2 peak_indices($new_indices)
+
+ Title   : peak_indices($new_indices)
+ Usage   : $indices = $obj->peak_indices($new_indices);
+ Function: Return the trace index points for this object.
+ Returns : A scalar
+ Args    : If used, the trace indices will be set to the provided value.
+
+=cut
+
+sub peak_indices {
+   my ($self,$peak_indices)= @_;
+     if ($peak_indices) {
+          if (ref($peak_indices) eq "ARRAY") {
+               $self->{peak_indices} = $peak_indices;
+          }
+          else {
+               my @indices = split(' ',$peak_indices);
+               $self->{peak_indices} = \@indices;
+         } 
+     }
+     if (!$self->{peak_indices}) {
+          my @temp = ();
+          $self->{peak_indices} = \@temp;
+     }
+     return $self->{peak_indices};
+}
+
+
+=head2 _reset_peak_indices()
+
+ Title   : _rest_peak_indices()
+ Usage   : $obj->_reset_peak_indices();
+ Function: Reset the peak indices.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : When you create a sub_trace_object, the peak indices
+     will still be pointing to the apporpriate location _in the
+     original trace_. In order to fix this, the initial value must
+     be subtracted from each value here. ie. The first peak index
+     must be "1".
+
+=cut
+
+sub _reset_peak_indices {
+   my $self = shift;
+     my $length = $self->length();
+     my $subtractive = $self->peak_index_at(1);
+     my ($original,$new);
+     $self->peak_index_at(1,"null");
+     for (my $counter=2; $counter<= $length; $counter++) {
+          my $original = $self->peak_index_at($counter);
+          $new = $original - $subtractive;
+          $self->peak_index_at($counter,$new);
+     }
+     return;
+}
+
+
+
+
+
+=head2 peak_index_at($position)
+
+ Title   : peak_index_at($position)
+ Usage   : $peak_index = $obj->peak_index_at($postition);
+ Function: Return the trace iindex point at this position
+ Returns : A scalar
+ Args    : If used, the trace index at this position will be 
+     set to the provided value.
+
+=cut
+
+sub peak_index_at {
+   my ($self,$position,$value)= @_;
+   if ($value) {
+          if ($value eq "null") {
+               $self->peak_indices->[$position-1] = "0";
+          }
+          else {
+               $self->peak_indices->[$position-1] = $value;
+          }
+   }
+    return $self->peak_indices()->[$position-1];
+}
+
+=head2 alphabet()
+
+ Title   : alphabet();
+ Usage   : $molecule_type = $obj->alphabet();
+ Function: Get the molecule type from the PrimarySeq object.
+ Returns : What what PrimarySeq says the type of the sequence is.
+ Args    : None.
+
+=cut
+
+sub alphabet {
+	my $self = shift;
+	return $self->{swq}->{seq_ref}->alphabet(@_);
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id_string = $obj->display_id();
+ Function: Returns the display id, aka the common name of the Quality
+        object.
+        The semantics of this is that it is the most likely string to be
+        used as an identifier of the quality sequence, and likely to have
+        "human" readability.  The id is equivalent to the ID field of the
+        GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
+        database. In fasta format, the >(\S+) is presumed to be the id,
+        though some people overload the id to embed other information.
+        Bioperl does not use any embedded information in the ID field,
+        and people are encouraged to use other mechanisms (accession
+	field for example, or extending the sequence object) to solve
+	this. Notice that $seq->id() maps to this function, mainly for
+        legacy/convience issues.
+	This method sets the display_id for the Quality object.
+ Returns : A string
+ Args    : If a scalar is provided, it is set as the new display_id for
+	the Quality object.
+ Status  : Virtual
+
+=cut
+
+sub display_id {
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{swq}->display_id($value);
+    }
+    return $self->{swq}->display_id();
+
+}
+
+=head2 accession_number()
+
+ Title   : accession_number()
+ Usage   : $unique_biological_key = $obj->accession_number();
+ Function: Returns the unique biological id for a sequence, commonly
+        called the accession_number. For sequences from established
+        databases, the implementors should try to use the correct
+        accession number. Notice that primary_id() provides the unique id
+        for the implemetation, allowing multiple objects to have the same
+        accession number in a particular implementation. For sequences
+        with no accession number, this method should return "unknown".
+	This method sets the accession_number for the Quality
+	object. 
+ Returns : A string (the value of accession_number)
+ Args    : If a scalar is provided, it is set as the new accession_number
+	for the Quality object.
+ Status  : Virtual
+
+
+=cut
+
+sub accession_number {
+    my( $self, $acc ) = @_;
+    if (defined $acc) {
+        $self->{swq}->accession_number($acc);
+    } else {
+        $acc = $self->{swq}->accession_number();
+        $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id()
+
+ Title   : primary_id()
+ Usage   : $unique_implementation_key = $obj->primary_id();
+ Function: Returns the unique id for this object in this implementation.
+        This allows implementations to manage their own object ids in a
+        way the implementaiton can control clients can expect one id to
+        map to one object. For sequences with no accession number, this
+        method should return a stringified memory location.
+	This method sets the primary_id for the Quality
+	object.
+ Returns : A string. (the value of primary_id)
+ Args    : If a scalar is provided, it is set as the new primary_id for
+	the Quality object.
+
+=cut
+
+sub primary_id {
+   my ($self,$value) = @_;
+   if ($value) {
+      $self->{swq}->primary_id($value);
+    }
+   return $self->{swq}->primary_id();
+
+}
+
+=head2 desc()
+
+ Title   : desc()
+ Usage   : $qual->desc($newval); _or_ 
+           $description = $qual->desc();
+ Function: Get/set description text for this Quality object.
+ Returns : A string. (the value of desc)
+ Args    : If a scalar is provided, it is set as the new desc for the
+	   Quality object.
+
+=cut
+
+sub desc {
+	# a mechanism to set the desc for the Quality object.
+	# probably will be used most often by set_common_features()
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{swq}->desc($value);
+    }
+      return $self->{swq}->desc();
+}
+
+=head2 id()
+
+ Title   : id()
+ Usage   : $id = $qual->id();
+ Function: Return the ID of the quality. This should normally be (and
+        actually is in the implementation provided here) just a synonym
+        for display_id().
+ Returns : A string. (the value of id)
+ Args    : If a scalar is provided, it is set as the new id for the
+	   Quality object.
+
+=cut
+
+sub id {
+   my ($self,$value) = @_;
+   if (!$self) { $self->throw("no value for self in $value"); }
+   if( defined $value ) {
+       $self->{swq}->display_id($value);
+   }
+   return $self->{swq}->display_id();
+}
+
+=head2 seq
+
+ Title   : seq()
+ Usage   : $string    = $obj->seq(); _or_
+	$obj->seq("atctatcatca");
+ Function: Returns the sequence that is contained in the imbedded in the
+	PrimarySeq object within the Quality object
+ Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
+ Args    : If a scalar is provided, the Quality object will
+	attempt to set that as the sequence for the imbedded PrimarySeq
+	object. Otherwise, the value of seq() for the PrimarySeq object
+	is returned.
+ Notes   : This is probably not a good idea because you then should call
+	length() to make sure that the sequence and quality are of the
+	same length. Even then, how can you make sure that this sequence
+	belongs with that quality? I provided this to give you rope to
+	hang yourself with. Tie it to a strong device and use a good
+	knot.
+
+=cut
+
+sub seq {
+	my ($self,$value) = @_;
+	if( defined $value) {
+		$self->{swq}->seq($value);
+	}
+	return $self->{swq}->seq();
+}
+
+=head2 qual()
+
+ Title   : qual()
+ Usage   : @quality_values  = @{$obj->qual()}; _or_
+	$obj->qual("10 10 20 40 50");
+ Function: Returns the quality as imbedded in the PrimaryQual object
+	within the Quality object.
+ Returns : A reference to an array containing the quality values in the 
+	PrimaryQual object.
+ Args    : If a scalar is provided, the Quality object will
+	attempt to set that as the quality for the imbedded PrimaryQual
+	object. Otherwise, the value of qual() for the PrimaryQual
+	object is returned.
+ Notes   : This is probably not a good idea because you then should call
+	length() to make sure that the sequence and quality are of the
+	same length. Even then, how can you make sure that this sequence
+	belongs with that quality? I provided this to give you a strong
+	board with which to flagellate yourself. 
+
+=cut
+
+sub qual {
+	my ($self,$value) = @_;
+
+	if( defined $value) {
+		$self->{swq}->qual($value);
+	}
+	return $self->{swq}->qual();
+}
+
+
+
+
+=head2 length()
+
+ Title   : length()
+ Usage   : $length = $seqWqual->length();
+ Function: Get the length of the Quality sequence/quality.
+ Returns : Returns the length of the sequence and quality
+ Args    : None.
+
+=cut
+
+sub length {
+    my $self = shift;
+     return $self->seq_obj()->length();
+
+}
+
+
+=head2 qual_obj
+
+ Title   : qual_obj($different_obj)
+ Usage   : $qualobj = $seqWqual->qual_obj(); _or_
+	$qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
+ Function: Get the PrimaryQual object that is imbedded in the
+	Quality object or if a reference to a PrimaryQual object
+	is provided, set this as the PrimaryQual object imbedded in the
+	Quality object.
+ Returns : A reference to a Bio::Seq::Quality object.
+
+=cut
+
+sub qual_obj {
+    my ($self,$value) = @_;
+#    return $self->{swq}->qual_obj($value);
+    return $self->{swq};
+}
+
+
+=head2 seq_obj
+
+ Title   : seq_obj()
+ Usage   : $seqobj = $seqWqual->seq_obj(); _or_
+	$seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
+ Function: Get the PrimarySeq object that is imbedded in the
+	Quality object or if a reference to a PrimarySeq object is
+	provided, set this as the PrimarySeq object imbedded in the
+	Quality object.
+ Returns : A reference to a Bio::PrimarySeq object.
+
+=cut
+
+sub seq_obj {
+    my ($self,$value) = @_;
+#    return $self->{swq}->seq_obj($value);
+    return $self->{swq};
+}
+
+=head2 _set_descriptors
+
+ Title   : _set_descriptors()
+ Usage   : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
+	$alphabet);
+ Function: Set the descriptors for the Quality object. Try to
+	match the descriptors in the PrimarySeq object and in the
+	PrimaryQual object if descriptors were not provided with
+	construction.
+ Returns : Nothing.
+ Args    : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
+	in the new() method.
+ Notes   : Really only intended to be called by the new() method. If
+	you want to invoke a similar function try
+	set_common_descriptors().
+
+=cut
+
+
+sub _set_descriptors {
+    my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
+     $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet);
+}
+
+=head2 subseq($start,$end)
+
+ Title   : subseq($start,$end)
+ Usage   : $subsequence = $obj->subseq($start,$end);
+ Function: Returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence.
+ Returns : A string.
+ Args    : Two positions.
+
+=cut
+
+sub subseq {
+    my ($self, at args) = @_;
+    # does a single value work?
+    return $self->{swq}->subseq(@args);	
+}
+
+=head2 baseat($position)
+
+ Title   : baseat($position)
+ Usage   : $base_at_position_6 = $obj->baseat("6");
+ Function: Returns a single base at the given position, where the first
+	base is 1 and the number is inclusive, ie 1-2 are the first two
+	bases of the sequence.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub baseat {
+    my ($self,$val) = @_;
+    return $self->{swq}->subseq($val,$val);
+}
+
+=head2 subqual($start,$end)
+
+ Title   : subqual($start,$end)
+ Usage   : @qualities = @{$obj->subqual(10,20);
+ Function: returns the quality values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+sub subqual {
+    my ($self, at args) = @_;
+    return $self->{swq}->subqual(@args);
+}
+
+=head2 qualat($position)
+
+ Title   : qualat($position)
+ Usage   : $quality = $obj->qualat(10);
+ Function: Return the quality value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be equal.
+ Returns : A scalar.
+ Args    : A position.
+
+=cut
+
+sub qualat {
+    my ($self,$val) = @_;
+    return $self->{swq}->qualat($val);
+}
+
+=head2 sub_peak_index($start,$end)
+
+ Title   : sub_peak_index($start,$end)
+ Usage   : @peak_indices = @{$obj->sub_peak_index(10,20);
+ Function: returns the trace index values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two trace indices for this channel.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+sub sub_peak_index {
+   my ($self,$start,$end) = @_;
+   if( $start > $end ){
+       $self->throw("in sub_peak_index, start [$start] has to be greater than end [$end]");
+   }
+
+   if( $start <= 0 || $end > $self->length ) {
+       $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
+   }
+
+   # remove one from start, and then length is end-start
+
+   $start--;
+     $end--;
+     my @sub_peak_index_array = @{$self->{peak_indices}}[$start..$end];
+
+     #   return substr $self->seq(), $start, ($end-$start);
+     return \@sub_peak_index_array;
+
+}
+
+=head2 sub_trace($start,$end)
+
+ Title   : sub_trace($base_channel,$start,$end)
+ Usage   : @trace_values = @{$obj->sub_trace('a',10,20)};
+ Function: returns the trace values from $start to $end, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the
+	first two bases of the sequence. Start cannot be larger than
+	end but can be e_peak_index.
+ Returns : A reference to an array.
+ Args    : a start position and an end position
+
+=cut
+
+sub sub_trace {
+   my ($self,$base_channel,$start,$end) = @_;
+   if( $start > $end ){
+       $self->throw("in sub_trace, start [$start] has to be greater than end [$end]");
+   }
+
+   if( $start <= 0 || $end > $self->trace_length() ) {
+       $self->throw("You have to have start positive and length less than the total length of traces [$start:$end] Total ".$self->trace_length."");
+   }
+
+   # remove one from start, and then length is end-start
+
+   $start--;
+     $end--;
+     my @sub_peak_index_array = @{$self->trace($base_channel)}[$start..$end];
+
+     #   return substr $self->seq(), $start, ($end-$start);
+     return \@sub_peak_index_array;
+
+}
+
+=head2 trace_length()
+
+ Title   : trace_length()
+ Usage   : $trace_length = $obj->trace_length();
+ Function: Return the length of the trace if all four traces (atgc)
+     are the same. Otherwise, throw an error.
+ Returns : A scalar.
+ Args    : none
+
+=cut
+
+sub trace_length {
+    my $self = shift;
+     if ( !$self->trace('a') || !$self->trace('t') || !$self->trace('g') || !$self->trace('c') ) {
+           $self->warn("One or more of the trace channels are missing. Cannot give you a length.");
+
+     } 
+     my $lengtha = scalar(@{$self->trace('a')});
+     my $lengtht = scalar(@{$self->trace('t')});
+     my $lengthg = scalar(@{$self->trace('g')});
+     my $lengthc = scalar(@{$self->trace('c')});
+     if (($lengtha == $lengtht) && ($lengtha == $lengthg) && ($lengtha == $lengthc) ) {
+          return $lengtha;
+     }
+     $self->warn("Not all of the trace indices are the same length".
+          " Here are their lengths: a: $lengtha t:$lengtht ".
+          " g: $lengthg c: $lengthc");
+}
+
+
+
+=head2 sub_trace_object($start,$end)
+
+ Title   : sub_trace_object($start,$end)
+ Usage   : $smaller_object = $object->sub_trace_object('1','100');
+ Function: Get a subset of the sequence, its quality, and its trace.
+ Returns : A reference to a Bio::Seq::SequenceTrace object
+ Args    : a start position and an end position
+ Notes   : 
+     - the start and end position refer to the positions of _bases_.
+     - for example, to get a sub SequenceTrace for bases 5-10,
+          use this routine.
+          - you will get the bases, qualities, and the trace values
+          - you can then use this object to synthesize a new scf
+               using seqIO::scf.
+
+=cut
+
+sub sub_trace_object {
+     my ($self,$start,$end) = @_;
+          my ($start2,$end2);
+        my @subs = @{$self->sub_peak_index($start,$end)};
+        $start2 = shift(@subs);
+        $end2 =  pop(@subs);
+     my $new_object =  new Bio::Seq::SequenceTrace(
+               -swq =>   new Bio::Seq::Quality(
+                             -seq => $self->subseq($start,$end),
+                             -qual     =>   $self->subqual($start,$end),
+                             -id    =>   $self->id()
+                         ),
+             -trace_a  => $self->sub_trace('a',$start2,$end2),
+             -trace_t  => $self->sub_trace('t',$start2,$end2),
+             -trace_g  => $self->sub_trace('g',$start2,$end2),
+             -trace_c  => $self->sub_trace('c',$start2,$end2),
+             -peak_indices =>   $self->sub_peak_index($start,$end)
+
+        );
+     $new_object->set_accuracies();
+     $new_object->_reset_peak_indices();
+     return $new_object;
+}
+
+=head2 _synthesize_traces()
+
+ Title   : _synthesize_traces()
+ Usage   : $obj->_synthesize_traces();
+ Function: Synthesize false traces for this object.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : This method is intended to be invoked when this
+     object is created with a SWQ object- that is to say that
+     there is a sequence and a set of qualities but there was
+     no actual trace data.
+
+=cut
+
+sub _synthesize_traces {
+     my ($self) = shift;
+     $self->peak_indices(qw());
+#ml     my $version = 2;
+          # the user should be warned if traces already exist
+          #
+          #
+#ml     ( my $sequence = $self->seq() ) =~ tr/a-z/A-Z/;
+#ml     my @quals = @{$self->qual()};
+#ml     my $info;
+         # build the ramp for the first base.
+         # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score.
+         # REMEMBER: A C G T
+         # note to self-> smooth this thing out a bit later
+     my $ramp_data;
+    @{$ramp_data->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 );
+         # the width of the ramp
+    $ramp_data->{'ramp_width'} = scalar(@{$ramp_data->{'ramp'}});
+         # how far should the peaks overlap?
+    $ramp_data->{'ramp_overlap'} = 1;
+          # where should the peaks be located?
+    $ramp_data->{'peak_at'} = 7;
+    $ramp_data->{'ramp_total_length'} =
+          $self->seq_obj()->length() * $ramp_data->{'ramp_width'}
+          - $self->seq_obj()->length() * $ramp_data->{'ramp_overlap'};
+    my $pos;
+    my $total_length = $ramp_data->{ramp_total_length};
+     $self->initialize_traces("0",$total_length+2);
+         # now populate them
+    my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position);
+#ml    my $sequence_length = $self->length();
+    my $half_ramp = int($ramp_data->{'ramp_width'}/2);
+    for ($pos = 0; $pos<$self->length();$pos++) {
+          $current_base = uc $self->seq_obj()->subseq($pos+1,$pos+1);
+               # print("Synthesizing the ramp for $current_base\n");
+          my $all_bases = "ATGC";
+          $peak_quality = $self->qual_obj()->qualat($pos+1);
+                    # where should the peak for this base be placed? Modeled after a mktrace scf
+          $place_base_at = ($pos * $ramp_data->{'ramp_width'}) -
+                      ($pos * $ramp_data->{'ramp_overlap'}) -
+                   $half_ramp + $ramp_data->{'ramp_width'} - 1;
+               # print("Placing this base at this position: $place_base_at\n");
+          push @{$self->peak_indices()},$place_base_at;
+          $ramp_position = $place_base_at - $half_ramp;
+          if ($current_base =~ "N" ) {
+               $current_base = "A";
+          }
+          for ($current_ramp = 0; $current_ramp < $ramp_data->{'ramp_width'};  $current_ramp++) {
+                    # print("Placing a trace value here: $current_base ".($ramp_position+$current_ramp+1)." ".$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]."\n");
+             $self->trace_value_at($current_base,$ramp_position+$current_ramp+1,$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]);
+          }
+          $self->peak_index_at($pos+1,
+              $place_base_at+1
+          );
+#ml          my $other_bases = $self->_get_other_bases($current_base);
+          # foreach ( split('',$other_bases) ) {
+          #          push @{$self->{'text'}->{"v3_base_accuracy"}->{$_}},0;
+          #}
+    }
+}
+
+
+
+
+
+=head2 _dump_traces($transformed)
+
+ Title   : _dump_traces("transformed")
+ Usage   : &_dump_traces($ra,$rc,$rg,$rt);
+ Function: Used in debugging. Prints all traces one beside each other.
+ Returns : Nothing.
+ Args    : References to the arrays containing the traces for A,C,G,T.
+ Notes   : Beats using dumpValue, I'll tell ya. Much better then using
+           join' ' too.
+     - if a scalar is included as an argument (any scalar), this
+     procedure will dump the _delta'd trace. If you don't know what
+     that means you should not be using this.
+
+=cut
+
+#'
+sub _dump_traces {
+    my ($self) = @_;
+    my (@sA, at sT, at sG, at sC);
+    print ("Count\ta\tc\tg\tt\n");
+     my $length = $self->trace_length();
+    for (my $curr=1; $curr <= $length; $curr++) {
+     print(($curr-1)."\t".$self->trace_value_at('a',$curr).
+                "\t".$self->trace_value_at('c',$curr).
+                "\t".$self->trace_value_at('g',$curr).
+                "\t".$self->trace_value_at('t',$curr)."\n");
+    }
+    return;
+}
+
+=head2 _initialize_traces()
+
+ Title   : _initialize_traces()
+ Usage   : $trace_object->_initialize_traces();
+ Function: Creates empty arrays to hold synthetic trace values.
+ Returns : Nothing.
+ Args    : None.
+
+=cut
+
+sub initialize_traces {
+     my ($self,$value,$length) = @_;
+     foreach (qw(a t g c)) {
+          my @temp;
+          for (my $count=0; $count<$length; $count++) {
+               $temp[$count] = $value;
+          }
+          $self->trace($_,\@temp);
+     }
+}
+
+=head2 trace_value_at($channel,$position)
+
+ Title   : trace_value_at($channel,$position)
+ Usage   : $value = $trace_object->trace_value_at($channel,$position);
+ Function: What is the value of the trace for this base at this position?
+ Returns : A scalar represnting the trace value here.
+ Args    : a base channel (a,t,g,c)
+           a position ( < $trace_object->trace_length() )
+
+=cut
+
+sub trace_value_at {
+     my ($self,$channel,$position,$value) = @_;
+     if ($value) {
+          $self->trace($channel)->[$position] = $value;
+     }
+     return $self->sub_trace($channel,($position),($position))->[0];
+}
+
+sub _deprecated_get_scf_version_2_base_structure {
+          # this sub is deprecated- check inside SeqIO::scf
+     my $self = shift;
+     my (@structure,$current);
+     my $length = $self->length();
+     for ($current=1; $current <= $self->length() ; $current++) {
+           my $base_here = $self->seq_obj()->subseq($current,$current);
+          $base_here = lc($base_here);
+          my $probabilities;
+          $probabilities->{$base_here} = $self->qual_obj()->qualat($current);
+          my $other_bases = "atgc";
+          my $empty = "";
+          $other_bases =~ s/$base_here/$empty/e;
+          foreach ( split('',$other_bases) ) {
+               $probabilities->{$_} = "0";
+          }
+          @structure = (
+               @structure,
+              $self->peak_index_at($current),
+              $probabilities->{'a'},
+              $probabilities->{'t'},
+              $probabilities->{'g'},
+              $probabilities->{'c'}
+         ); 
+          
+     }
+     return \@structure;
+}
+
+sub _deprecated_get_scf_version_3_base_structure {
+     my $self = shift;
+     my $structure;
+     $structure = join('',$self->peak_indices());
+     return $structure;
+}
+
+
+=head2 accuracies($channel,$position)
+
+ Title   : trace_value_at($channel,$position)
+ Usage   : $value = $trace_object->trace_value_at($channel,$position);
+ Function: What is the value of the trace for this base at this position?
+ Returns : A scalar represnting the trace value here.
+ Args    : a base channel (a,t,g,c)
+           a position ( < $trace_object->trace_length() )
+
+=cut
+
+
+sub accuracies {
+     my ($self,$channel,$value) = @_;
+     if ($value) {
+          if (ref($value) eq "ARRAY") {
+               $self->{accuracies}->{$channel} = $value;
+          }
+          else {
+               my @acc = split(' ',$value);
+               $self->{accuracies}->{$channel} = \@acc;
+          }
+     }
+     return $self->{accuracies}->{$channel};
+}
+
+
+=head2 set_accuracies()
+
+ Title   : set_sccuracies()
+ Usage   : $trace_object->set_accuracies();
+ Function: Take a sequence's quality and synthesize proper scf-style
+     base accuracies that can then be accessed with
+     accuracies("a") or something like it.
+ Returns : Nothing.
+ Args    : None.
+
+=cut
+
+sub set_accuracies {
+     my $self = shift;
+     my $count = 0;
+     my $length = $self->length();
+     for ($count=1; $count <= $length; $count++) {
+          my $base_here = $self->seq_obj()->subseq($count,$count);
+          my $qual_here = $self->qual_obj()->qualat($count);
+          $self->accuracy_at($base_here,$count,$qual_here);
+          my $other_bases = $self->_get_other_bases($base_here);
+          foreach (split('',$other_bases)) {
+               $self->accuracy_at($_,$count,"null");
+          }
+     }
+}
+
+
+=head2 scf_dump()
+
+ Title   : scf_dump()
+ Usage   : $trace_object->scf_dump();
+ Function: Prints out the contents of the structures representing
+     the SequenceTrace in a manner similar to io_lib's scf_dump.
+ Returns : Nothing. Prints out the contents of the structures
+     used to represent the sequence and its trace.
+ Args    : None.
+ Notes   : Used in debugging, obviously.
+
+=cut
+
+sub scf_dump {
+     my $self = shift;
+     my $count;
+     for ($count=1;$count<=$self->length();$count++) {
+          my $base_here = lc($self->seq_obj()->subseq($count,$count));
+          print($base_here." ".sprintf("%05d",$self->peak_index_at($count))."\t");
+          foreach (sort qw(a c g t)) {
+               print(sprintf("%03d",$self->accuracy_at($_,$count))."\t");
+          }
+          print("\n");
+     }
+     $self->_dump_traces();
+}
+
+=head2 _get_other_bases($this_base)
+
+ Title   : _get_other_bases($this_base)
+ Usage   : $other_bases = $trace_object->_get_other_bases($this_base);
+ Function: A utility routine to return bases other then the one provided.
+     I was doing this over and over so I put it here.
+ Returns : Three of a,t,g and c.
+ Args    : A base (atgc)
+ Notes   : $obj->_get_other_bases("a") returns "tgc"
+
+=cut
+
+sub _get_other_bases {
+     my ($self,$this_base) = @_;
+     $this_base = lc($this_base);
+     my $all_bases = "atgc";
+     my $empty = "";
+     $all_bases =~ s/$this_base/$empty/e;
+     return $all_bases;
+}
+
+
+=head2 accuracy_at($base,$position)
+
+ Title   : accuracy_at($base,$position)
+ Usage   : $accuracy = $trace_object->accuracy_at($base,$position);
+ Function: 
+ Returns : Returns the accuracy of finding $base at $position.
+ Args    : 1. a base channel (atgc) 2. a value to _set_ the accuracy
+ Notes   : $obj->_get_other_bases("a") returns "tgc"
+
+=cut
+
+
+sub accuracy_at {
+     my ($self,$base,$position,$value) = @_;
+     $base = lc($base);
+     if ($value) {
+          if ($value eq "null") {
+               $self->{accuracies}->{$base}->[$position-1] = "0";
+          }
+          else {
+               $self->{accuracies}->{$base}->[$position-1] = $value;
+          }
+     }
+     return $self->{accuracies}->{$base}->[$position-1];
+}
+
+1;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/SequenceTrace.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/TraceI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq/TraceI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq/TraceI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,236 @@
+# BioPerl module for Bio::Seq::TraceI
+#
+# Cared for by Chad Matsalla <bioinformatics at dieselwurks.com
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq::TraceI - Interface definition for a Bio::Seq::Trace
+
+=head1 SYNOPSIS
+
+    # get a Bio::Seq::Qual compliant object somehow
+          $st = &get_object_somehow();
+
+    # to test this is a seq object
+          $st->isa("Bio::Seq::TraceI") 
+               || $obj->throw("$obj does not implement the Bio::Seq::TraceI interface");
+
+          # set the trace for T to be @trace_points
+     my $arrayref = $st->trace("T",\@trace_points);
+          # get the trace points for "C"
+     my $arrayref = $st->trace("C");
+          # get a subtrace for "G" from 10 to 100 
+     $arrayref = $st->subtrace("G",10,100);
+          # what is the trace value for "A" at position 355?
+     my $trace_calue = $st->traceat("A",355);
+          # create a false trace for "A" with $accuracy
+     $arrayref = $st->false_trace("A",Bio::Seq::Quality, $accuracy);
+          # does this trace have entries for each base?
+     $bool = $st->is_complete();
+          # how many entries are there in this trace?
+     $length = $st->length();
+
+
+
+=head1 DESCRIPTION
+
+This object defines an abstract interface to basic trace information. This
+information may have come from an ABI- or scf- formatted file or may have been
+made up.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics at dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Seq::TraceI;
+use strict;
+use Carp;
+use Dumpvalue;
+use Bio::Root::RootI;
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 trace($base,\@new_values)
+
+ Title   : trace($base,\@new_values)
+ Usage   : @trace_Values  = @{$obj->trace($base,\@new_values)};
+ Function: Returns the trace values as a reference to an array containing the
+     trace values. The individual elements of the trace array are not validated
+     and can be any numeric value.
+ Returns : A reference to an array.
+ Status  : 
+Arguments: $base : which color channel would you like the trace values for?
+               - $base must be one of "A","T","G","C"
+          \@new_values : a reference to an array of values containing trace
+               data for this base
+
+=cut
+
+sub trace {
+   my ($self) = @_;
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::TraceI definition of trace - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::TraceI definition of trace - implementing class did not provide this method");
+   }
+}
+
+=head2 subtrace($base,$start,$end)
+
+ Title   : subtrace($base,$start,$end)
+ Usage   : @subset_of_traces = @{$obj->subtrace("A",10,40)};
+ Function: returns the trace values from $start to $end, where the
+	first value is 1 and the number is inclusive, ie 1-2 are the first
+	two trace values of this base. Start cannot be larger than end but can
+	be equal.
+ Returns : A reference to an array.
+ Args    : $base: "A","T","G" or "C"
+          $start: a start position
+          $end  : an end position
+
+=cut
+
+sub subtrace {
+   my ($self) = @_;
+
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::TraceI definition of subtrace - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::TraceI definition of subtrace - implementing class did not provide this method");
+   }
+
+}
+
+=head2 can_call_new()
+
+ Title   : can_call_new()
+ Usage   : if( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	 }
+ Function: can_call_new returns 1 or 0 depending on whether an
+	implementation allows new constructor to be called. If a new
+	constructor is allowed, then it should take the followed hashed
+	constructor list.
+           $myobject->new( -qual => $quality_as_string,
+			   -display_id  => $id,
+			   -accession_number => $accession,
+			   );
+ Example :
+ Returns : 1 or 0
+ Args    :
+
+
+=cut
+
+sub can_call_new{
+   my ($self, at args) = @_;
+   # we default to 0 here
+   return 0;
+}
+
+=head2 traceat($channel,$position)
+
+ Title   : qualat($channel,$position)
+ Usage   : $trace = $obj->traceat(500);
+ Function: Return the trace value at the given location, where the
+        first value is 1 and the number is inclusive, ie 1-2 are the first
+        two bases of the sequence. Start cannot be larger than end but can
+        be equal.
+ Returns : A scalar.
+ Args    : A base and a position.
+
+=cut
+
+sub traceat {
+   my ($self,$value) = @_;
+   if( $self->can('warn') ) {
+       $self->warn("Bio::Seq::TraceI definition of traceat - implementing class did not provide this method");
+   } else {
+       warn("Bio::Seq::TraceI definition of traceat - implementing class did not provide this method");
+   }
+   return '';
+} 
+
+=head2 length()
+
+ Title   : length()
+ Usage   : $length = $obj->length("A");
+ Function: Return the length of the array holding the trace values for the "A"
+     channel. A check should be done to make sure that this Trace object
+     is_complete() before doing this to prevent hazardous results.
+ Returns : A scalar (the number of elements in the quality array).
+ Args    : If used, get the traces from that channel. Default to "A"
+
+=cut
+
+sub length {
+   my ($self)= @_;
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::TraceI definition of length - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::TraceI definition of length - implementing class did not provide this method");
+   }
+}
+
+=head2 trace_indices($new_indices)
+
+ Title   : trace_indices($new_indices)
+ Usage   : $indices = $obj->trace_indices($new_indices);
+ Function: Return the trace iindex points for this object.
+ Returns : A scalar
+ Args    : If used, the trace indices will be set to the provided value.
+
+=cut
+
+sub trace_indices {
+   my ($self)= @_;
+   if( $self->can('throw') ) {
+       $self->throw("Bio::Seq::TraceI definition of trace_indices - implementing class did not provide this method");
+   } else {
+       confess("Bio::Seq::TraceI definition of trace_indices - implementing class did not provide this method");
+   }
+}
+
+
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Seq/TraceI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Seq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Seq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Seq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1252 @@
+# $Id: Seq.pm,v 1.91.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Seq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq - Sequence object, with features
+
+=head1 SYNOPSIS
+
+    # This is the main sequence object in Bioperl
+
+    # gets a sequence from a file
+    $seqio  = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat');
+    $seqobj = $seqio->next_seq();
+
+    # SeqIO can both read and write sequences; see Bio::SeqIO
+    # for more information and examples
+
+    # get from database
+    $db = Bio::DB::GenBank->new();
+    $seqobj = $db->get_Seq_by_acc('X78121');
+
+    # make from strings in script
+    $seqobj = Bio::Seq->new( -display_id => 'my_id',
+			     -seq => $sequence_as_string);
+
+    # gets sequence as a string from sequence object
+    $seqstr   = $seqobj->seq(); # actual sequence as a string
+    $seqstr   = $seqobj->subseq(10,50); # slice in biological coordinates
+
+    # retrieves information from the sequence
+    # features must implement Bio::SeqFeatureI interface
+
+    @features = $seqobj->get_SeqFeatures(); # just top level
+    foreach my $feat ( @features ) {
+	print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ",
+	$feat->end," strand ",$feat->strand,"\n";
+
+        # features retain link to underlying sequence object
+        print "Feature sequence is ",$feat->seq->seq(),"\n"
+    }
+
+    # sequences may have a species
+
+    if( defined $seq->species ) {
+	print "Sequence is from ",$species->binomial," [",$species->common_name,"]\n";
+    }
+
+    # annotation objects are Bio::AnnotationCollectionI's
+    $ann      = $seqobj->annotation(); # annotation object
+
+    # references is one type of annotations to get. Also get
+    # comment and dblink. Look at Bio::AnnotationCollection for
+    # more information
+
+    foreach my $ref ( $ann->get_Annotations('reference') ) {
+	print "Reference ",$ref->title,"\n";
+    }
+
+    # you can get truncations, translations and reverse complements, these
+    # all give back Bio::Seq objects themselves, though currently with no
+    # features transfered
+
+    my $trunc = $seqobj->trunc(100,200);
+    my $rev   = $seqobj->revcom();
+
+    # there are many options to translate - check out the docs
+    my $trans = $seqobj->translate();
+
+    # these functions can be chained together
+
+    my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate();
+
+
+
+=head1 DESCRIPTION
+
+A Seq object is a sequence with sequence features placed on it. The
+Seq object contains a PrimarySeq object for the actual sequence and
+also implements its interface.
+
+In Bioperl we have 3 main players that people are going to use frequently
+
+  Bio::PrimarySeq  - just the sequence and its names, nothing else.
+  Bio::SeqFeatureI - a feature on a sequence, potentially with a sequence
+                     and a location and annotation.
+  Bio::Seq         - A sequence and a collection of sequence features
+                     (an aggregate) with its own annotation.
+
+Although Bioperl is not tied heavily to file formats these distinctions do
+map to file formats sensibly and for some bioinformaticians this might help
+
+  Bio::PrimarySeq  - Fasta file of a sequence
+  Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table
+  Bio::Seq         - A single EMBL/GenBank/DDBJ entry
+
+By having this split we avoid a lot of nasty circular references
+(sequence features can hold a reference to a sequence without the sequence
+holding a reference to the sequence feature). See L<Bio::PrimarySeq> and
+L<Bio::SeqFeatureI> for more information.
+
+Ian Korf really helped in the design of the Seq and SeqFeature system.
+
+=head2 Examples
+
+A simple and fundamental block of code:
+
+  use Bio::SeqIO;
+
+  my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object
+  my $seqobj = $seqIOobj->next_seq;              # get a Seq object
+
+With the Seq object in hand one has access to a powerful set of Bioperl
+methods and related Bioperl objects. This next script will take a file of sequences
+in EMBL format and create a file of the reverse-complemented sequences
+in Fasta format using Seq objects. It also prints out details about the
+exons it finds as sequence features in Genbank Flat File format.
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+  while((my $seqobj = $seqin->next_seq())) {
+      print "Seen sequence ",$seqobj->display_id,", start of seq ",
+            substr($seqobj->seq,1,10),"\n";
+      if( $seqobj->alphabet eq 'dna') {
+	    $rev = $seqobj->revcom;
+	    $id  = $seqobj->display_id();
+            $id  = "$id.rev";
+            $rev->display_id($id);
+            $seqout->write_seq($rev);
+      }
+
+      foreach $feat ( $seqobj->get_SeqFeatures() ) {
+           if( $feat->primary_tag eq 'exon' ) {
+              print STDOUT "Location ",$feat->start,":",
+                    $feat->end," GFF[",$feat->gff_string,"]\n";
+	   }
+      }
+  }
+
+Let's examine the script. The lines below import the Bioperl modules.
+Seq is the main Bioperl sequence object and SeqIO is the Bioperl support
+for reading sequences from files and to files
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+
+These two lines create two SeqIO streams: one for reading in sequences
+and one for outputting sequences:
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+Notice that in the "$seqout" case there is a greater-than sign,
+indicating the file is being opened for writing.
+
+Using the
+
+  '-argument' => value
+
+syntax is common in Bioperl. The file argument is like an argument
+to open() . You can also pass in filehandles or FileHandle objects by
+using the -fh argument (see L<Bio::SeqIO> documentation for details).
+Many formats in Bioperl are handled, including Fasta, EMBL, GenBank,
+Swissprot (swiss), PIR, and GCG.
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+This is the main loop which will loop progressively through sequences
+in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq
+object from the file:
+
+  while((my $seqobj = $seqio->next_seq())) {
+
+This print line below accesses fields in the Seq object directly. The
+$seqobj-E<gt>display_id is the way to access the display_id attribute
+of the Seq object. The $seqobj-E<gt>seq method gets the actual
+sequence out as string. Then you can do manipulation of this if
+you want to (there are however easy ways of doing truncation,
+reverse-complement and translation).
+
+  print "Seen sequence ",$seqobj->display_id,", start of seq ",
+               substr($seqobj->seq,1,10),"\n";
+
+Bioperl has to guess the alphabet of the sequence, being either 'dna',
+'rna', or 'protein'. The alphabet attribute is one of these three
+possibilities.
+
+  if( $seqobj->alphabet eq 'dna') {
+
+The $seqobj-E<gt>revcom method provides the reverse complement of the Seq
+object as another Seq object. Thus, the $rev variable is a reference to
+another Seq object. For example, one could repeat the above print line
+for this Seq object (putting $rev in place of $seqobj). In this
+case we are going to output the object into the file stream we built
+earlier on.
+
+  $rev = $seqobj->revcom;
+
+When we output it, we want the id of the outputted object
+to be changed to "$id.rev", ie, with .rev on the end of the name. The
+following lines retrieve the id of the sequence object, add .rev
+to this and then set the display_id of the rev sequence object to
+this. Notice that to set the display_id attribute you just need
+call the same method, display_id(), with the new value as an argument.
+Getting and setting values with the same method is common in Bioperl.
+
+  $id  = $seqobj->display_id();
+  $id  = "$id.rev";
+  $rev->display_id($id);
+
+The write_seq method on the SeqIO output object, $seqout, writes the
+$rev object to the filestream we built at the top of the script.
+The filestream knows that it is outputting in fasta format, and
+so it provides fasta output.
+
+  $seqout->write_seq($rev);
+
+This block of code loops over sequence features in the sequence
+object, trying to find ones who have been tagged as 'exon'.
+Features have start and end attributes and can be outputted
+in Genbank Flat File format, GFF, a standarized format for sequence
+features.
+
+  foreach $feat ( $seqobj->get_SeqFeatures() ) {
+      if( $feat->primary_tag eq 'exon' ) {
+          print STDOUT "Location ",$feat->start,":",
+             $feat->end," GFF[",$feat->gff_string,"]\n";
+      }
+  }
+
+The code above shows how a few Bio::Seq methods suffice to read, parse,
+reformat and analyze sequences from a file. A full list of methods
+available to Bio::Seq objects is shown below. Bear in mind that some of
+these methods come from PrimarySeq objects, which are simpler
+than Seq objects, stripped of features (see L<Bio::PrimarySeq> for
+more information).
+
+  # these methods return strings, and accept strings in some cases:
+
+  $seqobj->seq();              # string of sequence
+  $seqobj->subseq(5,10);       # part of the sequence as a string
+  $seqobj->accession_number(); # when there, the accession number
+  $seqobj->alphabet();         # one of 'dna','rna',or 'protein'
+  $seqobj->seq_version()       # when there, the version
+  $seqobj->keywords();         # when there, the Keywords line
+  $seqobj->length()            # length
+  $seqobj->desc();             # description
+  $seqobj->primary_id();       # a unique id for this sequence regardless
+                               # of its display_id or accession number
+  $seqobj->display_id();       # the human readable id of the sequence
+
+Some of these values map to fields in common formats. For example, The
+display_id() method returns the LOCUS name of a Genbank entry,
+the (\S+) following the E<gt> character in a Fasta file, the ID from
+a SwissProt file, and so on. The desc() method will return the DEFINITION
+line of a Genbank file, the description following the display_id in a
+Fasta file, and the DE field in a SwissProt file.
+
+  # the following methods return new Seq objects, but
+  # do not transfer features across to the new object:
+
+  $seqobj->trunc(5,10)  # truncation from 5 to 10 as new object
+  $seqobj->revcom       # reverse complements sequence
+  $seqobj->translate    # translation of the sequence
+
+  # if new() can be called this method returns 1, else 0
+
+  $seqobj->can_call_new
+
+  # the following method determines if the given string will be accepted
+  # by the seq() method - if the string is acceptable then validate()
+  # returns 1, or 0 if not
+
+  $seqobj->validate_seq($string)
+
+  # the following method returns or accepts a Species object:
+
+  $seqobj->species();
+
+Please see L<Bio::Species> for more information on this object.
+
+  # the following method returns or accepts an Annotation object
+  # which in turn allows access to Annotation::Reference
+  # and Annotation::Comment objects:
+
+  $seqobj->annotation();
+
+These annotations typically refer to entire sequences, unlike
+features.  See L<Bio::AnnotationCollectionI>,
+L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and
+L<Bio::Annotation::Comment> for details.
+
+It is also important to be able to describe defined portions of a
+sequence. The combination of some description and the corresponding
+sub-sequence is called a feature - an exon and its coordinates within
+a gene is an example of a feature, or a domain within a protein.
+
+  # the following methods return an array of SeqFeatureI objects:
+
+  $seqobj->get_SeqFeatures # The 'top level' sequence features
+  $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq
+                               # features, such as features in an exon
+
+  # to find out the number of features use:
+
+  $seqobj->feature_count
+
+Here are just some of the methods available to SeqFeatureI objects:
+
+  # these methods return numbers:
+
+  $feat->start          # start position (1 is the first base)
+  $feat->end            # end position (2 is the second base)
+  $feat->strand         # 1 means forward, -1 reverse, 0 not relevant
+
+  # these methods return or accept strings:
+
+  $feat->primary_tag    # the name of the sequence feature, eg
+                        # 'exon', 'glycoslyation site', 'TM domain'
+  $feat->source_tag     # where the feature comes from, eg, 'EMBL_GenBank',
+                        # or 'BLAST'
+
+  # this method returns the more austere PrimarySeq object, not a
+  # Seq object - the main difference is that PrimarySeq objects do not
+  # themselves contain sequence features
+
+  $feat->seq            # the sequence between start,end on the
+                        # correct strand of the sequence
+
+See L<Bio::PrimarySeq> for more details on PrimarySeq objects.
+
+  # useful methods for feature comparisons, for start/end points
+
+  $feat->overlaps($other)  # do $feat and $other overlap?
+  $feat->contains($other)  # is $other completely within $feat?
+  $feat->equals($other)    # do $feat and $other completely agree?
+
+  # one can also add features
+
+  $seqobj->add_SeqFeature($feat)     # returns 1 if successful
+  $seqobj->add_SeqFeature(@features) # returns 1 if successful
+
+  # sub features. For complex join() statements, the feature
+  # is one sequence feature with many sub SeqFeatures
+
+  $feat->sub_SeqFeature  # returns array of sub seq features
+
+Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>,
+for more information on sequence features.
+
+It is worth mentioning that one can also retrieve the start and end
+positions of a feature using a Bio::LocationI object:
+
+  $location = $feat->location # $location is a Bio::LocationI object
+  $location->start;           # start position
+  $location->end;             # end position
+
+This is useful because one needs a Bio::Location::SplitLocationI object
+in order to retrieve the coordinates inside the Genbank or EMBL join()
+statements (e.g. "CDS    join(51..142,273..495,1346..1474)"):
+
+  if ( $feat->location->isa('Bio::Location::SplitLocationI') &&
+	       $feat->primary_tag eq 'CDS' )  {
+    foreach $loc ( $feat->location->sub_Location ) {
+      print $loc->start . ".." . $loc->end . "\n";
+    }
+  }
+
+See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more
+information.
+
+=head1 Implemented Interfaces
+
+This class implements the following interfaces.
+
+=over 4
+
+=item Bio::SeqI
+
+Note that this includes implementing Bio::PrimarySeqI.
+
+=item Bio::IdentifiableI
+
+=item Bio::DescribableI
+
+=item Bio::AnnotatableI
+
+=item Bio::FeatureHolderI
+
+=back
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects
+
+Email birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::Seq;
+use strict;
+
+use Bio::Annotation::Collection;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root Bio::SeqI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::FeatureHolderI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+                                 -id  => 'human_id',
+				 -accession_number => 'AL000012',
+			       );
+
+ Function: Returns a new Seq object from
+           basic constructors, being a string for the sequence
+           and strings for id and accession_number
+ Returns : a new Bio::Seq object
+
+=cut
+
+sub new {
+    my($caller, at args) = @_;
+
+    if( $caller ne 'Bio::Seq') {
+	$caller = ref($caller) if ref($caller);
+    }
+
+    # we know our inherietance heirarchy
+    my $self = Bio::Root::Root->new(@args);
+    bless $self,$caller;
+
+    # this is way too sneaky probably. We delegate the construction of
+    # the Seq object onto PrimarySeq and then pop primary_seq into
+    # our primary_seq slot
+
+    my $pseq = Bio::PrimarySeq->new(@args);
+
+    # as we have just made this, we know it is ok to set hash directly
+    # rather than going through the method
+
+    $self->{'primary_seq'} = $pseq;
+
+    # setting this array is now delayed until the final
+    # moment, again speed ups for non feature containing things
+    # $self->{'_as_feat'} = [];
+
+
+    my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args);
+
+    # for a number of cases - reading fasta files - these are never set. This
+    # gives a quick optimisation around testing things later on
+
+    if( defined $ann || defined $pid || defined $feat || defined $species ) {
+	$pid && $self->primary_id($pid);
+	$species && $self->species($species);
+	$ann && $self->annotation($ann);
+	
+	if( defined $feat ) {
+	    if( ref($feat) !~ /ARRAY/i ) {
+		if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) {
+		    $self->add_SeqFeature($feat);
+		} else {
+		    $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self));
+		}
+	    } else {
+		foreach my $feature ( @$feat ) {
+		    $self->add_SeqFeature($feature);
+		}	
+	    }
+	}
+    }
+
+    return $self;
+}
+
+=head1 PrimarySeq interface
+
+
+The PrimarySeq interface provides the basic sequence getting
+and setting methods for on all sequences.
+
+These methods implement the Bio::PrimarySeq interface by delegating
+to the primary_seq inside the object. This means that you
+can use a Seq object wherever there is a PrimarySeq, and
+of course, you are free to use these functions anyway.
+
+=cut
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string = $obj->seq()
+ Function: Get/Set the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard),
+           but implementations are suggested to keep an open mind about
+           case (some users... want mixed case!)
+ Returns : A scalar
+ Args    : Optionally on set the new value (a string). An optional second
+           argument presets the alphabet (otherwise it will be guessed).
+           Both parameters may also be given in named paramater style
+           with -seq and -alphabet being the names.
+
+=cut
+
+sub seq {
+    return shift->primary_seq()->seq(@_);
+}
+
+
+=head2 validate_seq
+
+ Title   : validate_seq
+ Usage   : if(! $seq->validate_seq($seq_str) ) {
+                print "sequence $seq_str is not valid for an object of type ",
+		      ref($seq), "\n";
+	   }
+ Function: Validates a given sequence string. A validating sequence string
+           must be accepted by seq(). A string that does not validate will
+           lead to an exception if passed to seq().
+
+           The implementation provided here does not take alphabet() into
+           account. Allowed are all letters (A-Z), '-','.','*','=', and '~'.
+
+ Example :
+ Returns : 1 if the supplied sequence string is valid for the object, and
+           0 otherwise.
+ Args    : The sequence string to be validated.
+
+
+=cut
+
+sub validate_seq {
+    return shift->primary_seq()->validate_seq(@_);
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length()
+ Function:
+ Example :
+ Returns : Integer representing the length of the sequence.
+ Args    : None
+
+=cut
+
+sub length {
+    return shift->primary_seq()->length(@_);
+}
+
+=head1 Methods from the Bio::PrimarySeqI interface
+
+=cut
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: Returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+
+           Start cannot be larger than end but can be equal
+
+ Returns : A string
+ Args    : 2 integers
+
+
+=cut
+
+sub subseq {
+    return shift->primary_seq()->subseq(@_);
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $id = $obj->display_id or $obj->display_id($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+
+=cut
+
+sub display_id {
+   return shift->primary_seq->display_id(@_);
+}
+
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+
+           Can also be used to set the accession number.
+ Example : $key = $seq->accession_number or $seq->accession_number($key)
+ Returns : A string
+ Args    : None or an accession number
+
+
+=cut
+
+sub accession_number {
+   return shift->primary_seq->accession_number(@_);
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc {
+   return shift->primary_seq->desc(@_);
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage
+           their own object ids in a way the implementation can control
+           clients can expect one id to map to one object.
+
+           For sequences with no natural id, this method should return
+           a stringified memory location.
+
+           Can also be used to set the primary_id (or unset to undef).
+
+           [Note this method name is likely to change in 1.3]
+
+ Example : $id = $seq->primary_id or $seq->primary_id($id)
+ Returns : A string
+ Args    : None or an id, or undef to unset the primary id.
+
+
+=cut
+
+sub primary_id {
+    # Note: this used to not delegate to the primary seq. This is
+    # really bad in very subtle ways. E.g., if you created the object
+    # with a primary id given to the constructor and then later you
+    # change the primary id, if this method wouldn't delegate you'd
+    # have different values for primary id in the PrimarySeq object
+    # compared to this instance. Not good.
+
+    # I can't remember why not delegating was ever deemed
+    # advantageous, but I hereby claim that its problems far outweigh
+    # its advantages, if there are any. Convince me otherwise if you
+    # disagree. HL 2004/08/05
+
+    return shift->primary_seq->primary_id(@_);
+}
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   : if ( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	   }
+ Function: can_call_new returns 1 or 0 depending
+           on whether an implementation allows new
+           constructor to be called. If a new constructor
+           is allowed, then it should take the followed hashed
+           constructor list.
+
+           $myobject->new( -seq => $sequence_as_string,
+			   -display_id  => $id
+			   -accession_number => $accession
+			   -alphabet => 'dna',
+			 );
+ Example :
+ Returns : 1 or 0
+ Args    : None
+
+
+=cut
+
+sub can_call_new {
+    return 1;
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if ( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Get/Set the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : A string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : optional string to set : 'dna' | 'rna' | 'protein'
+
+
+=cut
+
+sub alphabet {
+   my $self = shift;
+   return $self->primary_seq->alphabet(@_) if @_ && defined $_[0];
+   return $self->primary_seq->alphabet();
+}
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular {
+    return shift->primary_seq()->is_circular(@_);
+}
+
+
+=head1 Methods for Bio::IdentifiableI compliance
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to accession_number().
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->accession_number(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+ Returns : A number
+
+=cut
+
+sub version{
+    return shift->primary_seq->version(@_);
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+
+=cut
+
+sub authority {
+    return shift->primary_seq()->authority(@_);
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection
+
+ Returns : A scalar
+
+
+=cut
+
+sub namespace{
+    return shift->primary_seq()->namespace(@_);
+}
+
+=head1 Methods for Bio::DescribableI compliance
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking
+           this is a good idea)
+
+           This is aliased to display_id().
+ Returns : A scalar
+
+=cut
+
+sub display_name {
+    return shift->display_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+
+           This is aliased to desc().
+ Returns : A scalar
+
+=cut
+
+sub description {
+    return shift->desc(@_);
+}
+
+=head1 Methods for implementing Bio::AnnotatableI
+
+=cut
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $seq->annotation or 
+           $seq->annotation($ann)
+ Function: Gets or sets the annotation
+ Returns : Bio::AnnotationCollectionI object
+ Args    : None or Bio::AnnotationCollectionI object
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->throw("object of class ".ref($value)." does not implement ".
+		    "Bio::AnnotationCollectionI. Too bad.")
+	    unless $value->isa("Bio::AnnotationCollectionI");
+	$obj->{'_annotation'} = $value;
+    } elsif( ! defined $obj->{'_annotation'}) {
+	$obj->{'_annotation'} = new Bio::Annotation::Collection;
+    }
+    return $obj->{'_annotation'};
+}
+
+=head1 Methods to implement Bio::FeatureHolderI
+
+This includes methods for retrieving, adding, and removing features.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   :
+ Function: Get the feature objects held by this feature holder.
+
+           Features which are not top-level are subfeatures of one or
+           more of the returned feature objects, which means that you
+           must traverse the subfeature arrays of each top-level
+           feature object in order to traverse all features associated
+           with this sequence.
+
+           Use get_all_SeqFeatures() if you want the feature tree
+           flattened into one single array.
+
+ Example :
+ Returns : an array of Bio::SeqFeatureI implementing objects
+ Args    : none
+
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+
+=cut
+
+sub get_SeqFeatures{
+   my $self = shift;
+
+   if( !defined $self->{'_as_feat'} ) {
+       $self->{'_as_feat'} = [];
+   }
+
+   return @{$self->{'_as_feat'}};
+}
+
+=head2 get_all_SeqFeatures
+
+ Title   : get_all_SeqFeatures
+ Usage   : @feat_ary = $seq->get_all_SeqFeatures();
+ Function: Returns the tree of feature objects attached to this
+           sequence object flattened into one single array. Top-level
+           features will still contain their subfeature-arrays, which
+           means that you will encounter subfeatures twice if you
+           traverse the subfeature tree of the returned objects.
+
+           Use get_SeqFeatures() if you want the array to contain only
+           the top-level features.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects.
+ Args    : None
+
+
+=cut
+
+# this implementation is inherited from FeatureHolderI
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $seq->feature_count()
+ Function: Return the number of SeqFeatures attached to a sequence
+ Returns : integer representing the number of SeqFeatures
+ Args    : None
+
+
+=cut
+
+sub feature_count {
+    my ($self) = @_;
+
+    if (defined($self->{'_as_feat'})) {
+	return ($#{$self->{'_as_feat'}} + 1);
+    } else {
+	return 0;
+    }
+}
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $seq->add_SeqFeature($feat);
+           $seq->add_SeqFeature(@feat);
+ Function: Adds the given feature object (or each of an array of feature
+           objects to the feature array of this
+           sequence. The object passed is required to implement the
+           Bio::SeqFeatureI interface.
+ Returns : 1 on success
+ Args    : A Bio::SeqFeatureI implementing object, or an array of such objects.
+
+
+=cut
+
+sub add_SeqFeature {
+   my ($self, at feat) = @_;
+
+   $self->{'_as_feat'} = [] unless $self->{'_as_feat'};
+
+   foreach my $feat ( @feat ) {
+       if( !$feat->isa("Bio::SeqFeatureI") ) {
+	   $self->throw("$feat is not a SeqFeatureI and that's what we expect...");
+       }
+
+       # make sure we attach ourselves to the feature if the feature wants it
+       my $aseq = $self->primary_seq;
+       $feat->attach_seq($aseq) if $aseq;
+
+       push(@{$self->{'_as_feat'}},$feat);
+   }
+   return 1;
+}
+
+=head2 remove_SeqFeatures
+
+ Title   : remove_SeqFeatures
+ Usage   : $seq->remove_SeqFeatures();
+ Function: Flushes all attached SeqFeatureI objects.
+
+           To remove individual feature objects, delete those from the returned
+           array and re-add the rest.
+ Example :
+ Returns : The array of Bio::SeqFeatureI objects removed from this seq.
+ Args    : None
+
+
+=cut
+
+sub remove_SeqFeatures {
+    my $self = shift;
+
+    return () unless $self->{'_as_feat'};
+    my @feats = @{$self->{'_as_feat'}};
+    $self->{'_as_feat'} = [];
+    return @feats;
+}
+
+=head1 Methods provided in the Bio::PrimarySeqI interface
+
+
+These methods are inherited from the PrimarySeq interface
+and work as one expects, building new Bio::Seq objects
+or other information as expected. See L<Bio::PrimarySeq>
+for more information.
+
+Sequence Features are B<not> transfered to the new objects.
+This is possibly a mistake. Anyone who feels the urge in
+dealing with this is welcome to give it a go.
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::Seq object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of "Sequence is a protein.
+           Cannot revcom"
+
+           The id is the same id as the original sequence, and the
+           accession number is also identical. If someone wants to track
+           that this sequence has be reversed, it needs to define its own
+           extensions
+
+           To do an in-place edit of an object you can go:
+
+           $seq = $seq->revcom();
+
+           This of course, causes Perl to handle the garbage collection of
+           the old object, but it is roughly speaking as efficient as an
+           in-place edit.
+
+ Returns : A new (fresh) Bio::Seq object
+ Args    : None
+
+
+=cut
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence
+
+ Example :
+ Returns : A fresh Seq object
+ Args    : A Seq object
+
+
+=cut
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: This is mapped on display_id
+ Returns : value of display_id()
+ Args    : [optional] value to update display_id
+
+
+=cut
+
+sub  id {
+    return shift->display_id(@_);
+}
+
+
+=head1 Seq only methods
+
+
+These methods are specific to the Bio::Seq object, and not
+found on the Bio::PrimarySeq object
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $seq->primary_seq or $seq->primary_seq($newval)
+ Function: Get or set a PrimarySeq object
+ Example :
+ Returns : PrimarySeq object
+ Args    : None or PrimarySeq object
+
+
+=cut
+
+sub primary_seq {
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+       if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) {
+	   $obj->throw("$value is not a Bio::PrimarySeq compliant object");
+       }
+
+       $obj->{'primary_seq'} = $value;
+       # descend down over all seqfeature objects, seeing whether they
+       # want an attached seq.
+
+       foreach my $sf ( $obj->get_SeqFeatures() ) {
+	   $sf->attach_seq($value);
+       }
+
+   }
+   return $obj->{'primary_seq'};
+
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $species = $seq->species() or $seq->species($species)
+ Function: Gets or sets the species
+ Returns : L<Bio::Species> object
+ Args    : None or L<Bio::Species> object
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+=head1 Internal methods
+
+=cut
+
+# keep AUTOLOAD happy
+sub DESTROY { }
+
+############################################################################
+# aliases due to name changes or to compensate for our lack of consistency #
+############################################################################
+
+# in all other modules we use the object in the singular --
+# lack of consistency sucks
+*flush_SeqFeature = \&remove_SeqFeatures;
+*flush_SeqFeatures = \&remove_SeqFeatures;
+
+# this is now get_SeqFeatures() (from FeatureHolderI)
+*top_SeqFeatures = \&get_SeqFeatures;
+
+# this is now get_all_SeqFeatures() in FeatureHolderI
+sub all_SeqFeatures{
+    return shift->get_all_SeqFeatures(@_);
+}
+
+sub accession {
+    my $self = shift;
+    $self->warn(ref($self)."::accession is deprecated, ".
+		"use accession_number() instead");
+    return $self->accession_number(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqAnalysisParserI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqAnalysisParserI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqAnalysisParserI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,101 @@
+# $Id: SeqAnalysisParserI.pm,v 1.16.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::SeqAnalysisParserI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>,
+# and Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Jason Stajich, Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqAnalysisParserI - Sequence analysis output parser interface  
+
+=head1 SYNOPSIS
+
+    # get a SeqAnalysisParserI somehow, e.g. by
+    my $parser = Bio::Factory::SeqAnalysisParserFactory->get_parser(
+                            '-input' => 'inputfile', '-method' => 'genscan');
+    while( my $feature = $parser->next_feature() ) {
+	print "Feature from ", $feature->start, " to ", $feature->end, "\n";
+    }
+
+=head1 DESCRIPTION
+
+SeqAnalysisParserI is a generic interface for describing sequence analysis
+result parsers. Sequence analysis in this sense is a search for similarities
+or the identification of features on the sequence, like a databank search or a
+a gene prediction result.
+
+The concept behind this interface is to have a generic interface in sequence
+annotation pipelines (as used e.g. in high-throughput automated
+sequence annotation). This interface enables plug-and-play for new analysis
+methods and their corresponding parsers without the necessity for modifying
+the core of the annotation pipeline. In this concept the annotation pipeline
+has to rely on only a list of methods for which to process the results, and a
+factory from which it can obtain the corresponding parser implementing this
+interface.
+
+See Bio::Factory::SeqAnalysisParserFactoryI and
+Bio::Factory::SeqAnalysisParserFactory for interface and an implementation
+of the corresponding factory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Jason Stajich
+
+Email Hilmar Lapp E<lt>hlapp at gmx.netE<gt>, Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqAnalysisParserI;
+use strict;
+use Carp;
+use base qw(Bio::Root::RootI);
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none    
+
+=cut
+
+sub next_feature {
+    my ($self) = shift;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Annotated.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Annotated.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Annotated.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,984 @@
+# $Id: Annotated.pm,v 1.34.4.3 2006/10/17 09:12:57 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Annotated
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Annotated - PLEASE PUT SOMETHING HERE
+
+=head1 SYNOPSIS
+
+    # none yet, complain to authors
+
+=head1 DESCRIPTION
+
+None yet, complain to authors.
+
+=head1 Implemented Interfaces
+
+This class implementes the following interfaces.
+
+=over 4
+
+=item Bio::SeqFeatureI
+
+Note that this includes implementing Bio::RangeI.
+
+=item Bio::AnnotatableI
+
+=item Bio::FeatureHolderI
+
+Features held by a feature are essentially sub-features.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via 
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::SeqFeature::Annotated;
+
+use strict;
+
+use Bio::Annotation::Collection;
+use Bio::Annotation::OntologyTerm;
+use Bio::Annotation::Target;
+use Bio::LocatableSeq;
+use Bio::Location::Simple;
+use Bio::Ontology::OntologyStore;
+use Bio::Tools::GFF;
+
+use URI::Escape;
+
+use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::AnnotatableI Bio::FeatureHolderI);
+
+######################################
+#get_SeqFeatures
+#display_name
+#primary_tag
+#source_tag                       x with warning
+#has_tag
+#get_tag_values
+#get_tagset_values
+#get_all_tags
+#attach_seq
+#seq                              x
+#entire_seq                       x
+#seq_id
+#gff_string
+#_static_gff_handler
+#start                            x
+#end                              x
+#strand                           x
+#location
+#primary_id
+
+sub new {
+    my ( $caller, @args) = @_;
+    my ($self) = $caller->SUPER::new(@args); 
+
+    $self->_initialize(@args);
+
+    return $self;
+}
+
+sub _initialize {
+  my ($self, at args) = @_;
+  my ($start, $end, $strand, $frame, $phase, $score,
+      $name, $annot, $location,
+      $display_name, # deprecate
+      $seq_id, $type,$source,$feature
+     ) =
+        $self->_rearrange([qw(START
+                              END
+                              STRAND
+                              FRAME
+                              PHASE
+                              SCORE
+                              NAME
+                              ANNOTATION
+                              LOCATION
+                              DISPLAY_NAME
+                              SEQ_ID
+                              TYPE
+                              SOURCE
+			      FEATURE
+                             )], @args);
+  defined $start        && $self->start($start);
+  defined $end          && $self->end($end);
+  defined $strand       && $self->strand($strand);
+  defined $frame        && $self->frame($frame);
+  defined $phase        && $self->phase($phase);
+  defined $score        && $self->score($score);
+  defined $source       && $self->source($source);
+  defined $type         && $self->type($type);
+  defined $location     && $self->location($location);
+  defined $annot        && $self->annotation($annot);
+  defined $feature      && $self->from_feature($feature);
+
+  if( defined($display_name) && defined($name) ){
+	  $self->throw('Cannot define (-id and -seq_id) or (-name and -display_name) attributes');
+  }
+  defined $seq_id                   && $self->seq_id($seq_id);
+  defined ($name || $display_name)  && $self->name($name || $display_name);
+}
+
+=head1 ATTRIBUTE ACCESSORS FOR Bio::SeqFeature::Annotated
+
+=cut
+
+=head2 from_feature
+
+  Usage: $obj->from_feature($myfeature);
+  Desc : initialize this object with the contents of another feature
+         object.  Useful for converting objects like
+         L<Bio::SeqFeature::Generic> to this class
+  Ret  : nothing meaningful
+  Args : a single object of some other feature type,
+  Side Effects: throws error on failure
+  Example:
+
+=cut
+
+sub from_feature {
+  my ($self,$feat,%opts) = @_;
+
+  ref($feat) && ($feat->isa('Bio::AnnotationCollectionI') || $feat->isa('Bio::SeqFeatureI'))
+    or $self->throw('invalid arguments to from_feature');
+
+  #TODO: add overrides in opts for these values, so people don't have to screw up their feature object
+  #if they don't want to
+
+  ### set most of the data
+  foreach my $fieldname (qw/ start end strand frame score location seq_id source_tag primary_tag/) {
+    no strict 'refs'; #using symbolic refs
+    $self->$fieldname( $feat->$fieldname );
+  }
+
+  ### now pick up the annotations/tags of the other feature
+  #for Bio::AnnotationCollectionI features
+  if ( $feat->isa('Bio::AnnotatableI') ) {
+    foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
+      my @values = $feat->annotation->get_Annotations($key);
+      @values = _aggregate_scalar_annotations(\%opts,$key, at values);
+      foreach my $val (@values) {
+	$self->add_Annotation($key,$val)
+      }
+    }
+  }
+}
+#given a key and its values, make the values into
+#Bio::Annotation::\w+ objects
+sub _aggregate_scalar_annotations {
+  my ($opts,$key, at values) = @_;
+
+  #anything that's not an object, make it a SimpleValue
+  @values = map { ref($_) ? $_ : Bio::Annotation::SimpleValue->new(-value => $_) } @values;
+
+  #try to make Target objects
+  if($key eq 'Target' && (@values == 3 || @values == 4)
+     && @values == grep {$_->isa('Bio::Annotation::SimpleValue')} @values
+    ) {
+    @values = map {$_->value} @values;
+    #make a strand if it doesn't have one, enforcing start <= end
+    if(@values == 3) {
+      if($values[1] <= $values[2]) {
+	$values[3] = '+';
+      } else {
+	@values[1,2] = @values[2,1];
+	$values[3] = '-';
+      }
+    }
+    return ( Bio::Annotation::Target->new( -target_id => $values[0],
+					   -start     => $values[1],
+					   -end       => $values[2],
+					   -strand    => $values[3],
+					 )
+	   );
+  }
+  #try to make DBLink objects
+  elsif($key eq 'dblink' || $key eq 'Dbxref') {
+    return map {
+      if( /:/ ) { #convert to a DBLink if it has a colon in it
+	my ($db,$id) = split /:/,$_->value;
+	Bio::Annotation::DBLink->new( -database   => $db,
+				      -primary_id => $id,
+				    );
+      } else { #otherwise leave as a SimpleValue
+	$_
+      }
+    } @values;
+  }
+  #make OntologyTerm objects
+  elsif($key eq 'Ontology_term') {
+    return map { Bio::Annotation::OntologyTerm->new(-identifier => $_->value) } @values
+  }
+  #make Comment objects
+  elsif($key eq 'comment') {
+    return map { Bio::Annotation::Comment->new( -text => $_->value ) } @values;
+  }
+
+  return @values;
+}
+
+
+=head2 seq_id()
+
+ Usage   : $obj->seq_id($newval)
+ Function: holds a string corresponding to the unique
+           seq_id of the sequence underlying the feature
+           (e.g. database accession or primary key).
+ Returns : a Bio::Annotation::SimpleValue object representing the seq_id.
+ Args    : on set, some string or a Bio::Annotation::SimpleValue object.
+
+=cut
+
+sub seq_id {
+  my($self,$val) = @_;
+  if (defined($val)) {
+      my $term = undef;
+      if (!ref($val)) {
+	  $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val));
+      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
+	  $term = $val;
+      }
+      if (!defined($term) || ($term->value =~ /^>/)) {
+	  $self->throw('give seq_id() a scalar or Bio::Annotation::SimpleValue object, not '.$val);
+      }
+      $self->remove_Annotations('seq_id');
+      $self->add_Annotation('seq_id', $term);
+  }
+
+  $self->seq_id('.') unless ($self->get_Annotations('seq_id')); # make sure we always have something
+
+  return $self->get_Annotations('seq_id');
+}
+
+=head2 name()
+
+ Usage   : $obj->name($newval)
+ Function: human-readable name for the feature.
+ Returns : value of name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub name {
+  my($self,$val) = @_;
+  $self->{'name'} = $val if defined($val);
+  return $self->{'name'};
+}
+
+=head2 type()
+
+ Usage   : $obj->type($newval)
+ Function: a SOFA type for the feature.
+ Returns : Bio::Annotation::OntologyTerm object representing the type.
+ Args    : on set, a SOFA name, identifier, or Bio::Annotation::OntologyTerm object.
+
+=cut
+
+use constant MAX_TYPE_CACHE_MEMBERS => 20;
+sub type {
+  my($self,$val) = @_;
+  if(defined($val)){
+    # print("Trying to set annotated->type to $val\n");
+    my $term = undef;
+
+    if(!ref($val)){
+      #we have a plain text annotation coming in.  try to map it to SOFA.
+
+      our %__type_cache; #a little cache of plaintext types we've already seen
+
+      #clear our cache if it gets too big
+      if(scalar(keys %__type_cache) > MAX_TYPE_CACHE_MEMBERS) {
+	%__type_cache = ();
+      }
+
+      #set $term to either a cached value, or look up a new one, throwing
+      #up if not found
+      $term = $__type_cache{$val} ||= do {
+	my $sofa = Bio::Ontology::OntologyStore->get_instance->get_ontology('Sequence Ontology Feature Annotation');
+	my ($soterm) = $val =~ /^\D+:\d+$/ #does it look like an ident?
+	  ? ($sofa->find_terms(-identifier => $val))[0] #yes, lookup by ident
+	  : ($sofa->find_terms(-name => $val))[0];      #no, lookup by name
+	
+	#throw up if it's not in SOFA
+	unless($soterm){
+	  $self->throw("couldn't find a SOFA term matching type '$val'.");
+	}
+	my $newterm = Bio::Annotation::OntologyTerm->new;
+	$newterm->term($soterm);
+	$newterm;
+      };
+    }
+    elsif(ref($val) && $val->isa('Bio::Annotation::OntologyTerm')){
+      $term = $val;
+    }
+    else {
+      #we have the wrong type of object
+      $self->throw('give type() a SOFA term name, identifier, or Bio::Annotation::OntologyTerm object, not '.$val);
+    }
+    $self->remove_Annotations('type');
+    $self->add_Annotation('type',$term);
+  }
+  else {
+    return $self->get_Annotations('type');
+  }
+}
+
+=head2 source()
+
+ Usage   : $obj->source($newval)
+ Function: holds a string corresponding to the source of the feature.
+ Returns : a Bio::Annotation::SimpleValue object representing the source.
+ Args    : on set, some scalar or a Bio::Annotation::SimpleValue object.
+
+=cut
+
+sub source {
+  my($self,$val) = @_;
+
+  if (defined($val)) {
+      my $term;
+      if (!ref($val)) {
+	  $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val));
+      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
+	  $term = $val;
+      } else {
+	  $self->throw('give source() a scalar or Bio::Annotation::SimpleValue object, not '.$val);
+      }
+      $self->remove_Annotations('source');
+      $self->add_Annotation('source', $term);
+     
+  }
+  else {
+    if (!$self->get_Annotations('source')) {
+        $self->source('.');
+    }
+    return $self->get_Annotations('source');
+  }
+}
+
+=head2 score()
+
+ Usage   : $score = $feat->score()
+           $feat->score($score)
+ Function: holds a value corresponding to the score of the feature.
+ Returns : a Bio::Annotation::SimpleValue object representing the score.
+ Args    : on set, a scalar or a Bio::Annotation::SimpleValue object.
+
+=cut
+
+sub score {
+  my $self = shift;
+  my $val = shift;
+
+  if(defined($val)){
+      my $term = undef;
+      if (!ref($val)) {
+	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
+      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
+	  $term = $val;
+      }
+
+      if ($term->value ne '.' &&
+           (!defined($term) || ($term->value !~ /^[+-]?\d+\.?\d*(e-\d+)?/))) {
+	  $self->throw("'$val' is not a valid score");
+      }
+      $self->remove_Annotations('score');
+      $self->add_Annotation('score', $term);
+  }
+
+  $self->score('.') unless ($self->get_Annotations('score')); # make sure we always have something
+  
+  return $self->get_Annotations('score');
+}
+
+=head2 phase()
+
+ Usage   : $phase = $feat->phase()
+           $feat->phase($phase)
+ Function: get/set on phase information
+ Returns : a Bio::Annotation::SimpleValue object holdig one of 0,1,2,'.'
+           as its value.
+ Args    : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue
+           object holding one of 0,1,2,'.' as its value.
+
+=cut
+
+sub phase {
+  my $self = shift;
+  my $val = shift;
+
+  if(defined($val)){
+      my $term = undef;
+      if (!ref($val)) {
+	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
+      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
+	  $term = $val;
+      }
+      if (!defined($term) || ($term->value !~ /^[0-2.]$/)) {
+	  $self->throw("'$val' is not a valid phase");
+      }
+      $self->remove_Annotations('phase');
+      $self->add_Annotation('phase', $term);
+  }
+
+  $self->phase('.') unless (defined $self->get_Annotations('phase')); # make sure we always have something
+  
+  return $self->get_Annotations('phase');
+}
+
+
+=head2 frame()
+
+ Usage   : $frame = $feat->frame()
+           $feat->frame($phase)
+ Function: get/set on phase information
+ Returns : a Bio::Annotation::SimpleValue object holdig one of 0,1,2,'.'
+           as its value.
+ Args    : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue
+           object holding one of 0,1,2,'.' as its value.
+
+=cut
+
+sub frame {
+  my $self = shift;
+  my $val = shift;
+
+  if(defined($val)){
+      my $term = undef;
+      if (!ref($val)) {
+	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
+      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
+	  $term = $val;
+      }
+      if (!defined($term) || ($term->value !~ /^[0-2.]$/)) {
+	  $self->throw("'$val' is not a valid frame");
+      }
+      $self->remove_Annotations('frame');
+      $self->add_Annotation('frame', $term);
+  }
+
+  $self->frame('.') unless ($self->get_Annotations('frame')); # make sure we always have something
+  
+  return $self->get_Annotations('frame');
+}
+
+############################################################
+
+=head1 SHORTCUT METHDODS TO ACCESS Bio::AnnotatableI INTERFACE METHODS
+
+=cut
+
+=head2 add_Annotation()
+
+ Usage   :
+ Function: $obj->add_Annotation() is a shortcut to $obj->annotation->add_Annotation
+ Returns : 
+ Args    :
+
+=cut
+
+sub add_Annotation {
+  my ($self, at args) = @_;
+  return $self->annotation->add_Annotation(@args);
+}
+
+=head2 remove_Annotations()
+
+ Usage   :
+ Function: $obj->remove_Annotations() is a shortcut to $obj->annotation->remove_Annotations
+ Returns : 
+ Args    :
+
+=cut
+
+sub remove_Annotations {
+  my ($self, at args) = @_;
+  return $self->annotation->remove_Annotations(@args);
+}
+
+############################################################
+
+=head1 INTERFACE METHODS FOR Bio::SeqFeatureI
+
+=cut
+
+=head2 display_name()
+
+ Deprecated, use L<Bio::SeqFeatureI/name()>.  Will raise a warning.
+
+=cut
+
+sub display_name {
+  my $self = shift;
+
+  #1.6
+  #$self->warn('display_name() is deprecated, use name()');
+
+  return $self->name(@_);
+}
+
+=head2 primary_tag()
+
+ Deprecated, use L<Bio::SeqFeatureI/type()>.  Will raise a warning.
+
+=cut
+
+sub primary_tag {
+  my $self = shift;
+
+  #1.6
+  #$self->warn('primary_tag() is deprecated, use type()');
+  my $t = $self->type(@_);
+  return ref($t) ? $t->name : $t;
+}
+
+=head2 source_tag()
+
+ Deprecated, use L<Bio::SeqFeatureI/source()>.  Will raise a warning.
+
+=cut
+
+sub source_tag {
+  my $self = shift;
+
+  #1.6
+  #$self->warn('source_tag() is deprecated, use source()');
+
+  return $self->source(@_);
+}
+
+
+=head2 attach_seq()
+
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+ Returns : TRUE on success
+ Args    : a Bio::PrimarySeqI compliant object
+
+=cut
+
+sub attach_seq {
+   my ($self, $seq) = @_;
+
+   if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
+       $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
+   }
+
+   $self->{'seq'} = $seq;
+
+   # attach to sub features if they want it
+   foreach ( $self->get_SeqFeatures() ) {
+       $_->attach_seq($seq);
+   }
+   return 1;
+}
+
+=head2 seq()
+
+ Usage   : $tseq = $sf->seq()
+ Function: returns a truncated version of seq() with bounds matching this feature
+ Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
+           bounded by start & end, or undef if there is no sequence attached
+ Args    : none
+
+=cut
+
+sub seq {
+  my ($self) = @_;
+
+  return unless defined($self->entire_seq());
+
+  my $seq = $self->entire_seq->trunc($self->start(), $self->end());
+
+  if ( defined $self->strand && $self->strand == -1 ) {
+    $seq = $seq->revcom;
+  }
+
+  return $seq;
+}
+
+=head2 entire_seq()
+
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
+           sequence attached
+ Args    : none
+
+=cut
+
+sub entire_seq {
+  return shift->{'seq'};
+}
+
+=head2 has_tag()
+
+ See Bio::AnnotatableI::has_tag().
+
+=cut
+
+#implemented in Bio::AnnotatableI
+
+# sub has_tag {
+#   return shift->annotation->has_tag(@_);
+# }
+
+=head2 add_tag_value()
+
+ See Bio::AnnotatableI::add_tag_value().
+
+=cut
+
+#implemented in Bio::AnnotatableI
+
+# sub add_tag_value {
+#   return shift->annotation->add_tag_value(@_);
+# }
+
+=head2 get_tag_values()
+
+ See Bio::AnnotationCollectionI::get_tag_values().
+
+=cut
+
+#implemented in Bio::AnnotatableI
+
+# sub get_tag_values {
+#   return shift->annotation->get_tag_values(@_);
+# }
+
+=head2 get_all_tags()
+
+ See Bio::AnnotationCollectionI::get_all_annotation_keys().
+
+=cut
+
+#implemented in Bio::AnnotatableI
+
+# sub get_all_tags {
+#   return shift->annotation->get_all_annotation_keys(@_);
+# }
+
+=head2 remove_tag()
+
+ See Bio::AnnotationCollectionI::remove_tag().
+
+=cut
+
+#implemented in Bio::AnnotatableI
+
+# sub remove_tag {
+#   return shift->annotation->remove_tag(@_);
+# }
+
+
+############################################################
+
+=head1 INTERFACE METHODS FOR Bio::RangeI
+
+ as inherited via Bio::SeqFeatureI
+
+=cut
+
+=head2 length()
+
+ Usage   : $feature->length()
+ Function: Get the feature length computed as $feat->end - $feat->start + 1
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub length {
+  my $self = shift;
+  return $self->end() - $self->start() + 1;
+}
+
+=head2 start()
+
+ Usage   : $obj->start($newval)
+ Function: Get/set on the start coordinate of the feature
+ Returns : integer
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub start {
+  my ($self,$value) = @_;
+  return $self->location->start($value);
+}
+
+=head2 end()
+
+ Usage   : $obj->end($newval)
+ Function: Get/set on the end coordinate of the feature
+ Returns : integer
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub end {
+  my ($self,$value) = @_;
+  return $self->location->end($value);
+}
+
+=head2 strand()
+
+ Usage   : $strand = $feat->strand($newval)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : ???
+
+=cut
+
+sub strand {
+  my $self = shift;
+  return $self->location->strand(@_);
+}
+
+
+############################################################
+
+=head1 INTERFACE METHODS FOR Bio::FeatureHolderI
+
+This includes methods for retrieving, adding, and removing
+features. Since this is already a feature, features held by this
+feature holder are essentially sub-features.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Usage   : @feats = $feat->get_SeqFeatures();
+ Function: Returns an array of Bio::SeqFeatureI objects
+ Returns : An array
+ Args    : none
+
+=cut
+
+sub get_SeqFeatures {
+  return @{ shift->{'sub_array'} || []};
+}
+
+=head2 add_SeqFeature()
+
+ Usage   : $feat->add_SeqFeature($subfeat);
+           $feat->add_SeqFeature($subfeat,'EXPAND')
+ Function: adds a SeqFeature into the subSeqFeature array.
+           with no 'EXPAND' qualifer, subfeat will be tested
+           as to whether it lies inside the parent, and throw
+           an exception if not.
+
+           If EXPAND is used, the parent''s start/end/strand will
+           be adjusted so that it grows to accommodate the new
+           subFeature
+ Example :
+ Returns : nothing
+ Args    : a Bio::SeqFeatureI object
+
+=cut
+
+sub add_SeqFeature {
+  my ($self,$val, $expand) = @_;
+
+  return unless $val;
+
+  if ((!ref($val)) || !$val->isa('Bio::SeqFeatureI') ) {
+      $self->throw((ref($val) ? ref($val) : $val)
+                   ." does not implement Bio::SeqFeatureI.");
+  }
+
+  if($expand && ($expand eq 'EXPAND')) {
+      $self->_expand_region($val);
+  } else {
+      if ( !$self->contains($val) ) {
+	  $self->warn("$val is not contained within parent feature, and expansion is not valid, ignoring.");
+	  return;
+      }
+  }
+
+  push(@{$self->{'sub_array'}},$val);
+}
+
+=head2 remove_SeqFeatures()
+
+ Usage   : $obj->remove_SeqFeatures
+ Function: Removes all sub SeqFeatures.  If you want to remove only a subset,
+           remove that subset from the returned array, and add back the rest.
+ Returns : The array of Bio::SeqFeatureI implementing sub-features that was
+           deleted from this feature.
+ Args    : none
+
+=cut
+
+sub remove_SeqFeatures {
+  my ($self) = @_;
+
+  my @subfeats = @{$self->{'sub_array'} || []};
+  $self->{'sub_array'} = []; # zap the array.
+  return @subfeats;
+}
+
+############################################################
+
+=head1 INTERFACE METHODS FOR Bio::AnnotatableI
+
+=cut
+
+=head2 annotation()
+
+ Usage   : $obj->annotation($annot_obj)
+ Function: Get/set the annotation collection object for annotating this
+           feature.
+ Returns : A Bio::AnnotationCollectionI object
+ Args    : newvalue (optional)
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+
+    # we are smart if someone references the object and there hasn't been
+    # one set yet
+    if(defined $value || ! defined $obj->{'annotation'} ) {
+        $value = new Bio::Annotation::Collection unless ( defined $value );
+        $obj->{'annotation'} = $value;
+    }
+    return $obj->{'annotation'};
+}
+
+############################################################
+
+=head2 location()
+
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location 
+           of feature on sequence or parent feature  
+ Returns : Bio::LocationI object
+ Args    : [optional] Bio::LocationI object to set the value to.
+
+=cut
+
+sub location {
+  my($self, $value ) = @_;
+
+  if (defined($value)) {
+    unless (ref($value) and $value->isa('Bio::LocationI')) {
+      $self->throw("object $value pretends to be a location but ".
+                   "does not implement Bio::LocationI");
+    }
+    $self->{'location'} = $value;
+  }
+  elsif (! $self->{'location'}) {
+    # guarantees a real location object is returned every time
+    $self->{'location'} = Bio::Location::Simple->new();
+  }
+  return $self->{'location'};
+}
+
+=head2 add_target()
+
+ Usage   : $seqfeature->add_target(Bio::LocatableSeq->new(...));
+ Function: adds a target location on another reference sequence for this feature
+ Returns : true on success
+ Args    : a Bio::LocatableSeq object
+
+=cut
+
+sub add_target {
+  my ($self,$seq) = @_;
+  $self->throw("$seq is not a Bio::LocatableSeq, bailing out") unless ref($seq) and seq->isa('Bio::LocatableSeq');
+  push @{ $self->{'targets'} }, $seq;
+  return $seq;
+}
+
+=head2 each_target()
+
+ Usage   : @targets = $seqfeature->each_target();
+ Function: Returns a list of Bio::LocatableSeqs which are the locations of this object.
+           To obtain the "primary" location, see L</location()>.
+ Returns : a list of 0..N Bio::LocatableSeq objects
+ Args    : none
+
+=cut
+
+sub each_target {
+  my ($self) = @_;
+  return $self->{'targets'} ? @{ $self->{'targets'} } : ();
+}
+
+=head2 _expand_region
+
+ Title   : _expand_region
+ Usage   : $self->_expand_region($feature);
+ Function: Expand the total region covered by this feature to
+           accomodate for the given feature.
+
+           May be called whenever any kind of subfeature is added to this
+           feature. add_SeqFeature() already does this.
+ Returns : 
+ Args    : A Bio::SeqFeatureI implementing object.
+
+=cut
+
+sub _expand_region {
+    my ($self, $feat) = @_;
+    if(! $feat->isa('Bio::SeqFeatureI')) {
+        $self->warn("$feat does not implement Bio::SeqFeatureI");
+    }
+    # if this doesn't have start/end set - forget it!
+    if((! defined($self->start())) && (! defined $self->end())) {
+        $self->start($feat->start());
+        $self->end($feat->end());
+        $self->strand($feat->strand) unless defined($self->strand());
+#        $self->strand($feat->strand) unless $self->strand();
+    } else {
+        my $range = $self->union($feat);
+        $self->start($range->start);
+        $self->end($range->end);
+        $self->strand($range->strand);
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/AnnotationAdaptor.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/AnnotationAdaptor.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/AnnotationAdaptor.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,487 @@
+# $Id: AnnotationAdaptor.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::AnnotationAdaptor
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::AnnotationAdaptor - integrates SeqFeatureIs annotation
+
+=head1 SYNOPSIS
+
+   use Bio::SeqFeature::Generic;
+   use Bio::SeqFeature::AnnotationAdaptor;
+
+   # obtain a SeqFeatureI implementing object somehow
+   my $feat = Bio::SeqFeature::Generic->new(-start => 10, -end => 20);
+
+   # add tag/value annotation
+   $feat->add_tag_value("mytag", "value of tag mytag");
+   $feat->add_tag_value("mytag", "another value of tag mytag");
+
+   # Bio::SeqFeature::Generic also provides annotation(), which returns a
+   # Bio::AnnotationCollectionI compliant object
+   $feat->annotation->add_Annotation("dbxref", $dblink);
+
+   # to integrate tag/value annotation with AnnotationCollectionI
+   # annotation, use this adaptor, which also implements 
+   # Bio::AnnotationCollectionI
+   my $anncoll = Bio::SeqFeature::AnnotationAdaptor(-feature => $feat);
+
+   # this will now return tag/value pairs as 
+   # Bio::Annotation::SimpleValue objects
+   my @anns = $anncoll->get_Annotations("mytag");
+   # other added before annotation is available too
+   my @dblinks = $anncoll->get_Annotations("dbxref");
+
+   # also supports transparent adding of tag/value pairs in 
+   # Bio::AnnotationI flavor
+   my $tagval = Bio::Annotation::SimpleValue->new(-value => "some value",
+                                                  -tagname => "some tag");
+   $anncoll->add_Annotation($tagval);
+   # this is now also available from the feature's tag/value system
+   my @vals = $feat->each_tag_value("some tag");
+
+=head1 DESCRIPTION
+
+L<Bio::SeqFeatureI> defines light-weight annotation of features
+through tag/value pairs. Conversely, L<Bio::AnnotationCollectionI>
+together with L<Bio::AnnotationI> defines an annotation bag, which is
+better typed, but more heavy-weight because it contains every single
+piece of annotation as objects. The frequently used base
+implementation of Bio::SeqFeatureI, Bio::SeqFeature::Generic, defines
+an additional slot for AnnotationCollectionI-compliant annotation.
+
+This adaptor provides a L<Bio::AnnotationCollectionI> compliant,
+unified, and integrated view on the annotation of L<Bio::SeqFeatureI>
+objects, including tag/value pairs, and annotation through the
+annotation() method, if the object supports it. Code using this
+adaptor does not need to worry about the different ways of possibly
+annotating a SeqFeatureI object, but can instead assume that it
+strictly follows the AnnotationCollectionI scheme. The price to pay is
+that retrieving and adding annotation will always use objects instead
+of light-weight tag/value pairs.
+
+In other words, this adaptor allows us to keep the best of both
+worlds. If you create tens of thousands of feature objects, and your
+only annotation is tag/value pairs, you are best off using the
+features' native tag/value system. If you create a smaller number of
+features, but with rich and typed annotation mixed with tag/value
+pairs, this adaptor may be for you. Since its implementation is by
+double-composition, you only need to create one instance of the
+adaptor. In order to transparently annotate a feature object, set the
+feature using the feature() method. Every annotation you add will be
+added to the feature object, and hence will not be lost when you set
+feature() to the next object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+#' Let the code begin...
+
+
+package Bio::SeqFeature::AnnotationAdaptor;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Annotation::SimpleValue;
+use Data::Dumper;
+
+use base qw(Bio::Root::Root Bio::AnnotationCollectionI Bio::AnnotatableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SeqFeature::AnnotationAdaptor();
+ Function: Builds a new Bio::SeqFeature::AnnotationAdaptor object 
+ Returns : an instance of Bio::SeqFeature::AnnotationAdaptor
+ Args    : Named parameters
+            -feature    the Bio::SeqFeatureI implementing object to adapt
+                        (mandatory to be passed here, or set via feature()
+                        before calling other methods)
+            -annotation the Bio::AnnotationCollectionI implementing object
+                        for storing richer annotation (this will default to
+                        the $feature->annotation() if it supports it)
+            -tagvalue_factory the object factory to use for creating tag/value
+                        pair representing objects
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  my ($feat,$anncoll,$fact) =
+	$self->_rearrange([qw(FEATURE
+                          ANNOTATION
+                          TAGVALUE_FACTORY)], @args);
+
+  $self->feature($feat) if $feat;
+  $self->annotation($anncoll) if $feat;
+  $self->tagvalue_object_factory($fact) if $fact;
+
+  return $self;
+}
+
+=head2 feature
+
+ Title   : feature
+ Usage   : $obj->feature($newval)
+ Function: Get/set the feature that this object adapts to an
+           AnnotationCollectionI.
+ Example : 
+ Returns : value of feature (a Bio::SeqFeatureI compliant object)
+ Args    : new value (a Bio::SeqFeatureI compliant object, optional)
+
+
+=cut
+
+sub feature{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'feature'} = $value;
+    }
+    return $self->{'feature'};
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($newval)
+ Function: Get/set the AnnotationCollectionI implementing object used by
+           this adaptor to store additional annotation that cannot be stored
+           by the SeqFeatureI itself.
+
+           If requested before having been set, the value will default to the
+           annotation object of the feature if it has one.
+ Example : 
+ Returns : value of annotation (a Bio::AnnotationCollectionI compliant object)
+ Args    : new value (a Bio::AnnotationCollectionI compliant object, optional)
+
+
+=cut
+
+sub annotation{
+    my ($self,$value) = @_;
+
+    if( defined $value) {
+        $self->{'annotation'} = $value;
+    }
+    if((! exists($self->{'annotation'})) &&
+       $self->feature()->can('annotation')) {
+        return $self->feature()->annotation();
+    }
+    return $self->{'annotation'};
+}
+
+=head1 AnnotationCollectionI implementing methods
+
+=cut
+
+=head2 get_all_annotation_keys
+
+ Title   : get_all_annotation_keys
+ Usage   : $ac->get_all_annotation_keys()
+ Function: gives back a list of annotation keys, which are simple text strings
+ Returns : list of strings
+ Args    : none
+
+=cut
+
+sub get_all_annotation_keys{
+    my ($self) = @_;
+    my @keys = ();
+    
+    # get the tags from the feature object
+    push(@keys, $self->feature()->all_tags());
+    # ask the annotation implementation in addition, while avoiding duplicates
+    if($self->annotation()) {
+	push(@keys,
+	     grep { ! $self->feature->has_tag($_); }
+	          $self->annotation()->get_all_annotation_keys());
+    }
+    # done
+    return @keys;
+}
+
+
+=head2 get_Annotations
+
+ Title   : get_Annotations
+ Usage   : my @annotations = $collection->get_Annotations('key')
+ Function: Retrieves all the Bio::AnnotationI objects for a specific key
+ Returns : list of Bio::AnnotationI - empty if no objects stored for a key
+ Args    : string which is key for annotations
+
+=cut
+
+sub get_Annotations{
+    my ($self, @keys) = @_;
+    my @anns = ();
+
+    # we need a annotation object factory
+    my $fact = $self->tagvalue_object_factory();
+
+    # get all tags if no keys have been provided
+    @keys = $self->feature->all_tags() unless @keys;
+
+# don't bother.  SeqFeatureI now inherits AnnotatableI so $self->annotation() will give you all you need
+#
+#     # build object for each value for each tag
+#     foreach my $key (@keys) {
+#       # protect against keys that aren't tags
+#       next unless $self->feature->has_tag($key);
+#       # add each tag/value pair as a SimpleValue object
+#       foreach my $val ($self->feature()->get_tag_values($key)) {
+# 	    my $ann;
+# 	    if($fact) {
+#           $ann = $fact->create_object(-value => $val, -tagname => $key);
+# 	    } else {
+#           $ann = Bio::Annotation::SimpleValue->new(-value => $val,
+#                                                    -tagname => $key);
+# 	    }
+# 	    push(@anns, $ann);
+#       }
+#     }
+
+    # add what is in the annotation implementation if any
+    if($self->annotation()) {
+      push(@anns, $self->annotation->get_Annotations(@keys));
+    }
+
+    # done
+    return @anns;
+}
+
+=head2 get_num_of_annotations
+
+ Title   : get_num_of_annotations
+ Usage   : my $count = $collection->get_num_of_annotations()
+ Function: Returns the count of all annotations stored in this collection 
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub get_num_of_annotations{
+  my ($self) = @_;
+
+  # first, count the number of tags on the feature
+  my $num_anns = 0;
+
+#  foreach ($self->feature()->all_tags()) {
+#	$num_anns += scalar( $self->feature()->annotation->get_Annotations($_) );
+#  }
+
+  # add from the annotation implementation if any
+  if($self->annotation()) {
+ 	$num_anns += $self->annotation()->get_num_of_annotations();
+  }
+
+  # done
+  return $num_anns;
+}
+
+=head1 Implementation specific functions - to allow adding
+
+=cut
+
+=head2 add_Annotation
+
+ Title   : add_Annotation
+ Usage   : $self->add_Annotation('reference',$object);
+           $self->add_Annotation($object,'Bio::MyInterface::DiseaseI');
+           $self->add_Annotation($object);
+           $self->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI');
+ Function: Adds an annotation for a specific key.
+
+           If the key is omitted, the object to be added must provide a value
+           via its tagname().
+
+           If the archetype is provided, this and future objects added under
+           that tag have to comply with the archetype and will be rejected
+           otherwise.
+
+           This implementation will add all Bio::Annotation::SimpleValue
+           objects to the adapted features as tag/value pairs. Caveat: this
+           may potentially result in information loss if a derived object
+           is supplied.
+
+ Returns : none
+ Args    : annotation key ('disease', 'dblink', ...)
+           object to store (must be Bio::AnnotationI compliant)
+           [optional] object archetype to map future storage of object 
+                      of these types to
+
+=cut
+
+sub add_Annotation{
+    my ($self,$key,$object,$archetype) = @_;
+   
+    # if there's no key we use the tagname() as key
+    if(ref($key) && $key->isa("Bio::AnnotationI") &&
+       (! ($object && ref($object)))) {
+	$archetype = $object if $object;
+	$object = $key;
+	$key = $object->tagname();
+	$key = $key->name() if $key && ref($key); # OntologyTermI
+	$self->throw("Annotation object must have a tagname if key omitted")
+	    unless $key;
+    }
+    
+    if( !defined $object ) {
+	$self->throw("Must have at least key and object in add_Annotation");
+    }
+    
+    if( ! (ref($object) && $object->isa("Bio::AnnotationI")) ) {
+	$self->throw("object must be a Bio::AnnotationI compliant object, otherwise we wont add it!");
+    }
+    
+    # ready to add -- if it's a SimpleValue, we add to the feature's tags,
+    # otherwise we'll add to the annotation collection implementation
+
+    if($object->isa("Bio::Annotation::SimpleValue") &&
+       $self->feature()->can('add_tag_value')) {
+	return $self->feature()->add_tag_value($key, $object->value());
+    } else {
+	my $anncoll = $self->annotation();
+	if(! $anncoll) {
+	    $anncoll = Bio::Annotation::Collection->new();
+	    $self->annotation($anncoll);
+	}
+	if($anncoll->can('add_Annotation')) {
+	    return $anncoll->add_Annotation($key,$object,$archetype);
+	}
+	$self->throw("Annotation implementation does not allow adding!");
+    }
+}
+
+=head2 remove_Annotations
+
+ Title   : remove_Annotations
+ Usage   :
+ Function: Remove the annotations for the specified key from this
+           collection.
+
+           If the key happens to be a tag, then the tag is removed
+           from the feature.
+
+ Example :
+ Returns : an array Bio::AnnotationI compliant objects which were stored
+           under the given key(s)
+ Args    : the key(s) (tag name(s), one or more strings) for which to
+           remove annotations (optional; if none given, flushes all
+           annotations)
+
+
+=cut
+
+sub remove_Annotations{
+    my ($self, @keys) = @_;
+
+    # set to all keys if none are supplied
+    @keys = $self->get_all_annotation_keys() unless @keys;
+    # collect existing annotation
+    my @anns = $self->get_Annotations(@keys);
+    # flush
+    foreach my $key (@keys) {
+	# delete the tag if it is one
+	$self->feature->remove_tag($key) if $self->feature->has_tag($key);
+	# and delegate to the annotation implementation 
+	my $anncoll = $self->annotation();
+	if($anncoll && $anncoll->can('remove_Annotations')) {
+	    $anncoll->remove_Annotations($key);
+	} elsif($anncoll) {
+	    $self->warn("Annotation bundle implementation ".ref($anncoll).
+			" does not allow remove!");
+	}
+    }
+    return @anns;
+}
+
+=head1 Additional methods
+
+=cut
+
+=head2 tagvalue_object_factory
+
+ Title   : tagvalue_object_factory
+ Usage   : $obj->tagval_object_factory($newval)
+ Function: Get/set the object factory to use for creating objects that
+           represent tag/value pairs (e.g.,
+           Bio::Annotation::SimpleValue).
+
+           The object to be created is expected to follow
+           Bio::Annotation::SimpleValue in terms of supported
+           arguments at creation time, and the methods.
+
+ Example : 
+ Returns : A Bio::Factory::ObjectFactoryI compliant object
+ Args    : new value (a Bio::Factory::ObjectFactoryI compliant object, 
+           optional)
+
+
+=cut
+
+sub tagvalue_object_factory{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'tagval_object_factory'} = $value;
+    }
+    return $self->{'tagval_object_factory'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Collection.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Collection.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Collection.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,536 @@
+# $Id: Collection.pm,v 1.18.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Collection
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Collection - A container class for SeqFeatures
+suitable for performing operations such as finding features within a
+range, that match a certain feature type, etc.
+
+=head1 SYNOPSIS
+
+  use Bio::SeqFeature::Collection;
+  use Bio::Location::Simple;
+  use Bio::Tools::GFF;
+  use Bio::Root::IO;
+  # let's first input some features
+  my $gffio = Bio::Tools::GFF->new(-file => Bio::Root::IO->catfile
+  				 ("t","data","myco_sites.gff"),
+  				 -gff_version => 2);
+  my @features = ();
+  # loop over the input stream
+  while(my $feature = $gffio->next_feature()) {
+      # do something with feature
+      push @features, $feature;
+  }
+  $gffio->close();
+  # build the Collection object
+  my $col = new Bio::SeqFeature::Collection();
+  # add these features to the object
+  my $totaladded = $col->add_features(\@features);
+
+  my @subset = $col->features_in_range(-start => 1,
+  				     -end => 25000,
+  				     -strand => 1,
+  				     -contain => 0);
+  # subset should have 18 entries for this dataset
+  print "size is ", scalar @subset, "\n";
+  @subset = $col->features_in_range(-range => Bio::Location::Simple->new
+  				  (-start => 70000,
+  				   -end => 150000,
+  				   -strand => -1),
+  				  -contain => 1,
+  				  -strandmatch => 'strong');
+
+  # subset should have 22 entries for this dataset
+  print "size is ", scalar @subset, "\n";
+  print "total number of features in collection is ",
+         $col->feature_count(),"\n";
+
+=head1 DESCRIPTION
+
+This object will efficiently allow one for query subsets of ranges
+within a large collection of sequence features (in fact the objects
+just have to be Bio::RangeI compliant).  This is done by the creation
+of bins which are stored in order in a B-Tree data structure as
+provided by the DB_File interface to the Berkeley DB.
+
+This is based on work done by Lincoln for storage in a mysql instance
+- this is intended to be an embedded in-memory implementation for
+easily quering for subsets of a large range set.
+
+Collections can be made persistant by keeping the indexfile and
+passing in the -keep flag like this:
+
+  my $collection = new Bio::SeqFeature::Collection(-keep => 1,
+                                                   -file => 'col.idx');
+  $collaction->add_features(\@features);
+  undef $collection;
+
+  # To reuse this collection, next time you initialize a Collection object
+  # specify the filename and the index will be reused.
+  $collection = new Bio::SeqFeature::Collection(-keep => 1,
+                                                -file => 'col.idx');
+
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Using code and strategy developed by Lincoln Stein (lstein at cshl.org)
+in Bio::DB::GFF implementation.  Credit also to Lincoln for suggesting
+using Storable to serialize features rather than my previous implementation
+which kept the features in memory.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Collection;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::DB::GFF::Util::Binning;
+use DB_File;
+use Bio::Location::Simple;
+use Bio::SeqFeature::Generic;
+use Storable qw(freeze thaw);
+
+use base qw(Bio::Root::Root);
+
+
+# This may need to get re-optimized for BDB usage as these
+# numbers were derived empirically by Lincoln on a mysql srv
+# running on his laptop
+
+# this is the largest that any reference sequence can be (100 megabases)
+use constant MAX_BIN    => 100_000_000;
+
+# this is the smallest bin (1 K)
+use constant MIN_BIN    => 1_000;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SeqFeature::Collection();
+ Function: Builds a new Bio::SeqFeature::Collection object
+ Returns : Bio::SeqFeature::Collection
+ Args    :
+
+           -minbin        minimum value to use for binning
+                          (default is 100,000,000)
+           -maxbin        maximum value to use for binning
+                          (default is 1,000)
+           -file          filename to store/read the
+                          BTREE from rather than an in-memory structure
+                          (default is false and in-memory).
+           -keep          boolean, will not remove index file on
+                          object destruction.
+           -features      Array ref of features to add initially
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($maxbin,$minbin, $file, $keep,
+      $features) = $self->_rearrange([qw(MAXBIN MINBIN FILE KEEP
+					 FEATURES)], at args);
+
+  defined $maxbin && $self->max_bin($maxbin);
+  defined $minbin && $self->min_bin($minbin);
+
+  defined $features &&  $self->add_features($features);
+  $DB_BTREE->{'flags'} = R_DUP ;
+  $DB_BTREE->{'compare'} = \&_compare;
+  $self->{'_btreehash'} = {};
+  if( $file ) {
+      $self->debug("using file $file");
+      $self->indexfile($file);
+  }
+  $self->keep($keep);
+  $self->{'_btree'} = tie %{$self->{'_btreehash'}},
+  'DB_File', $self->indexfile, O_RDWR|O_CREAT, 0640, $DB_BTREE;
+  return $self;
+}
+
+
+=head2 add_features
+
+ Title   : add_features
+ Usage   : $collection->add_features(\@features);
+ Function:
+ Returns : number of features added
+ Args    : arrayref of Bio::SeqFeatureI objects to index
+
+
+=cut
+
+sub add_features{
+   my ($self,$feats) = @_;
+   if( ref($feats) !~ /ARRAY/i ) {
+       $self->warn("Must provide a valid Array reference to add_features");
+       return 0;
+   }
+   my $count = 0;
+   foreach my $f ( @$feats ) {
+       if( ! $f || ! ref($f) || ! $f->isa('Bio::RangeI') ) {
+	   $self->warn("Must provide valid Bio::RangeI objects to add_features, skipping object '$f'\n");
+	   next;
+       }
+       my $bin = bin($f->start,$f->end,$self->min_bin);
+       my $serialized = &feature_freeze($f);
+       $self->{'_btree'}->put($bin,$serialized);
+       if( $f->isa('Bio::SeqFeature::Generic') ) {
+	   $self->debug( "$bin for ". $f->location->to_FTstring(). " matches ".$#{$self->{'_features'}}. "\n");
+       }
+       $count++;
+   }
+   return $count;
+}
+
+
+=head2 features_in_range
+
+ Title   : features_in_range
+ Usage   : my @features = $collection->features_in_range($range)
+ Function: Retrieves a list of features which were contained or overlap the
+           the requested range (see Args for way to specify overlap or
+				only those containe)d
+ Returns : List of Bio::SeqFeatureI objects
+ Args    : -range => Bio::RangeI object defining range to search,
+           OR
+           -start  => start,
+           -end    => end,
+           -strand  => strand
+
+           -contain => boolean - true if feature must be completely
+                       contained with range
+                       OR false if should include features that simply overlap
+                       the range. Default: true.
+           -strandmatch =>  'strong',  ranges must have the same strand
+                            'weak',    ranges must have the same
+                                           strand or no strand
+                            'ignore', ignore strand information
+                           Default. 'ignore'.
+
+=cut
+
+sub features_in_range{
+   my $self = shift;
+   my (@args) = @_;
+   my ($range, $contain, $strandmatch,$start,$end,$strand);
+   if( @args == 1 ) {
+       $range = shift @args;
+   } else {
+       ($start,$end,$strand,$range,
+	$contain,$strandmatch) = $self->_rearrange([qw(START END
+						       STRAND
+						       RANGE CONTAIN
+						       STRANDMATCH)],
+						   @args);
+       $contain = 1 unless defined $contain;
+   }
+   $strand = 1 unless defined $strand;
+   if( $strand !~ /^([\-\+])$/ &&
+       $strand !~ /^[\-\+]?1$/ ) {
+       $self->warn("must provide a valid numeric or +/- for strand");
+       return ();
+   }
+   if( defined $1 ) { $strand .= 1; }
+
+   if( !defined $start && !defined $end ) {
+       if( ! defined $range || !ref($range) || ! $range->isa("Bio::RangeI") )
+       {
+	   $self->warn("Must defined a valid Range for the method feature_in_range");
+	   return ();
+       }
+       ($start,$end,$strand) = ($range->start,$range->end,$range->strand);
+   }
+   my $r = new Bio::Location::Simple(-start => $start,
+				     -end   => $end,
+				     -strand => $strand);
+
+   my @features;
+   my $maxbin = $self->max_bin;
+   my $minbin = $self->min_bin;
+   my $tier = $maxbin;
+   my ($k,$v, at bins) = ("",undef);
+   while ($tier >= $minbin) {
+	my ($tier_start,$tier_stop) = (bin_bot($tier,$start),
+				       bin_top($tier,$end));
+       if( $tier_start == $tier_stop ) {
+	   my @vals = $self->{'_btree'}->get_dup($tier_start);
+	   if( scalar @vals > 0 ) {
+	       push @bins, map { thaw($_) } @vals;
+	   }
+       } else {	
+	   $k = $tier_start;
+	   my @vals;
+	   for( my $rc = $self->{'_btree'}->seq($k,$v,R_CURSOR);
+	        $rc == 0;
+	        $rc = $self->{'_btree'}->seq($k,$v, R_NEXT) ) {
+	       last if( $k > $tier_stop || $k < $tier_start);
+	       push @bins, thaw($v);
+	   }
+       }
+       $tier /= 10;
+   }
+   my %seen = ();
+   foreach my $t ( map { ref($_) } @bins) {
+       next if $seen{$t}++;
+       eval "require $t";
+
+       if( $@ ) {
+	   $self->warn("Trying to thaw a stored feature $t which does not appear in your Perl library. $@");
+	   next;
+       }
+   }
+   $strandmatch = 'ignore' unless defined $strandmatch;
+   return ( $contain ) ? grep { $r->contains($_,$strandmatch) } @bins :
+       grep { $r->overlaps($_,$strandmatch)} @bins;
+}
+
+=head2 remove_features
+
+ Title   : remove_features
+ Usage   : $collection->remove_features(\@array)
+ Function: Removes the requested sequence features (based on features
+	   which have the same location)
+ Returns : Number of features removed
+ Args    : Arrayref of Bio::RangeI objects
+
+
+=cut
+
+sub remove_features{
+   my ($self,$feats) = @_;
+   if( ref($feats) !~ /ARRAY/i ) {
+       $self->warn("Must provide a valid Array reference to remove_features");
+       return 0;
+   }
+   my $countprocessed = 0;
+
+   foreach my $f ( @$feats ) {
+       next if ! ref($f) || ! $f->isa('Bio::RangeI');
+       my $bin = bin($f->start,$f->end,$self->min_bin);
+       my @vals = $self->{'_btree'}->get_dup($bin);
+       my $vcount = scalar @vals;
+
+       foreach my $v ( @vals )  {
+	   # Once we have uniquely identifiable field
+	   # I think it will work better.
+	   if( $v eq &feature_freeze($f) ) {
+	       $self->{'_btree'}->del_dup($bin,$v);
+	       $vcount--;
+	       $countprocessed++;
+	   }
+       }
+       if( $vcount == 0 ) {
+	   $self->{'_btree'}->del($bin);
+       }
+   }
+   $countprocessed;
+
+}
+
+=head2 get_all_features
+
+ Title   : get_all_features
+ Usage   : my @f = $col->get_all_features()
+ Function: Return all the features stored in this collection (Could be large)
+ Returns : Array of Bio::RangeI objects
+ Args    : None
+
+
+=cut
+
+sub get_all_features{
+   my ($self) = @_;
+   my @features;
+   my ($key,$value);
+   for (my $status = $self->{'_btree'}->seq($key, $value, R_FIRST) ;
+	$status == 0 ;
+	$status = $self->{'_btree'}->seq($key, $value, R_NEXT) )
+   {   next unless defined $value;
+       push @features, &thaw($value);
+   }
+   if( scalar @features !=  $self->feature_count() ) {
+       $self->warn("feature count does not match actual count\n");
+   }
+   return @features;
+}
+
+
+=head2 min_bin
+
+ Title   : min_bin
+ Usage   : my $minbin= $self->min_bin;
+ Function: Get/Set the minimum value to use for binning
+ Returns : integer
+ Args    : [optional] minimum bin value
+
+
+=cut
+
+sub min_bin {
+  my ($self,$min) = @_;
+  if( defined $min ) {
+      $self->{'_min_bin'} = $min;
+  }
+  return $self->{'_min_bin'}  || MIN_BIN;
+}
+
+=head2 max_bin
+
+ Title   : max_bin
+ Usage   : my $maxbin= $self->max_bin;
+ Function: Get/Set the maximum value to use for binning
+ Returns : integer
+ Args    : [optional] maximum bin value
+
+
+=cut
+
+sub max_bin {
+  my ($self,$max) = @_;
+  if( defined $max ) {
+      $self->{'_max_bin'} = $max;
+  }
+  return $self->{'max_bin'} || MAX_BIN;
+}
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : my $c = $col->feature_count()
+ Function: Retrieve the total number of features in the collection
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub feature_count {
+    my $self = shift;
+    my $count = 0;
+    for ( keys %{$self->{'_btreehash'}} ) {
+	my $v = $self->{'_btreehash'}->{$_};
+	next unless defined  $v;
+	$count++;
+    }
+    $count;
+}
+
+=head2 indexfile
+
+ Title   : indexfile
+ Usage   : $obj->indexfile($newval)
+ Function: Get/set the filename where index is kept
+ Returns : value of indexfile (a filename string)
+ Args    : on set, new value (a filename string )
+
+
+=cut
+
+sub indexfile{
+    my $self = shift;
+
+    return $self->{'indexfile'} = shift if @_;
+    return $self->{'indexfile'};
+}
+
+=head2 keep
+
+ Title   : keep
+ Usage   : $obj->keep($newval)
+ Function: Get/set boolean flag to keep the indexfile after
+           exiting program
+ Example :
+ Returns : value of keep (boolean)
+ Args    : on set, new value (boolean)
+
+
+=cut
+
+sub keep{
+    my $self = shift;
+
+    return $self->{'keep'} = shift if @_;
+    return $self->{'keep'};
+}
+
+sub _compare{
+    if( defined $_[0] && ! defined $_[1]) {
+	return -1;
+    } elsif ( defined $_[1] && ! defined $_[0]) {
+	return 1;
+    }
+    $_[0] <=> $_[1];
+}
+
+sub feature_freeze {
+    my $obj = shift;
+    for my $funcref ( $obj->_cleanup_methods ) {
+	$obj->_unregister_for_cleanup($funcref);
+    }
+    return freeze($obj);
+}
+
+sub feature_thaw {
+    return thaw(shift);
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->SUPER::DESTROY();
+    $self->{'_btree'} = undef;
+    untie(%{$self->{'_btreehash'}});
+
+    if( ! $self->keep && $self->indexfile ) {
+	$self->debug( "unlinking ".$self->indexfile. "\n");
+	unlink($self->indexfile);
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/CollectionI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/CollectionI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/CollectionI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+# $Id: CollectionI.pm,v 1.5.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::CollectionI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::CollectionI - An interface for a collection of SeqFeatureI objects. 
+
+=head1 SYNOPSIS
+
+
+# get a Bio::SeqFeature::CollectionI somehow
+# perhaps a Bio::SeqFeature::Collection
+
+
+    use Bio::SeqFeature::Collection;
+    my $collection = new Bio::SeqFeature::Collection;
+    $collection->add_features(\@featurelist);
+
+
+    $collection->features(-attributes => 
+			  [ { 'location' => new Bio::Location::Simple
+				  (-start=> 1, -end => 300) ,
+				  'overlaps' }]);
+
+=head1 DESCRIPTION
+
+This interface describes the basic methods needed for a collection of Sequence Features.  
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::CollectionI;
+use strict;
+use Carp;
+
+use base qw(Bio::Root::RootI);
+
+
+=head2 add_features
+
+ Title   : add_features
+ Usage   : $collection->add_features(\@features);
+ Function:
+ Returns : number of features added
+ Args    : arrayref of Bio::SeqFeatureI objects to index
+
+=cut
+
+sub add_features{
+    shift->throw_not_implemented();
+}
+
+
+=head2 features
+
+ Title   : features
+ Usage   : my @f = $collection->features(@args);
+ Returns : a list of Bio::SeqFeatureI objects
+ Args    : see below
+ Status  : public
+
+This routine will retrieve features associated with this collection
+object.  It can be used to return all features, or a subset based on
+their type, location, or attributes.
+
+  -types     List of feature types to return.  Argument is an array
+             of Bio::Das::FeatureTypeI objects or a set of strings
+             that can be converted into FeatureTypeI objects.
+
+  -callback   A callback to invoke on each feature.  The subroutine
+              will be passed to each Bio::SeqFeatureI object in turn.
+
+  -attributes A hash reference containing attributes to match.
+
+The -attributes argument is a hashref containing one or more attributes
+to match against:
+
+  -attributes => { Gene => 'abc-1',
+                   Note => 'confirmed' }
+
+Attribute matching is simple exact string matching, and multiple
+attributes are ANDed together.  See L<Bio::DB::ConstraintsI> for a
+more sophisticated take on this.
+
+If one provides a callback, it will be invoked on each feature in
+turn.  If the callback returns a false value, iteration will be
+interrupted.  When a callback is provided, the method returns undef.
+
+=cut
+
+sub features{
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Computation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Computation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Computation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,565 @@
+# $Id: Computation.pm,v 1.16.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Generic
+#
+# Cared for by mark Fiers <m.w.e.j.fiers at plant.wag-ur.nl>
+#
+# Copyright Ewan Birney, Mark Fiers
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Computation - Computation SeqFeature
+
+=head1 SYNOPSIS
+
+   $feat = new Bio::SeqFeature::Computation (
+                                -start => 10, -end => 100,
+				-strand => -1, -primary => 'repeat',
+				-program_name => 'GeneMark',
+				-program_date => '12-5-2000',
+				-program_version => 'x.y',
+				-database_name => 'Arabidopsis',
+				-database_date => '12-dec-2000',
+				-computation_id => 2231,
+				-score    => { no_score => 334 } );
+
+
+=head1 DESCRIPTION
+
+Bio::SeqFeature::Computation extends the Generic seqfeature object with
+a set of computation related fields and a more flexible set of storing
+more types of score and subseqfeatures. It is compatible with the Generic
+SeqFeature object.
+
+The new way of storing score values is similar to the tag structure in the 
+Generic object. For storing sets of subseqfeatures the array containg the
+subseqfeatures is now a hash which contains arrays of seqfeatures
+Both the score and subSeqfeature methods can be called in exactly the same
+way, the value's will be stored as a 'default' score or subseqfeature.
+
+=cut
+
+#'
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, Mark Fiers
+
+Ewan Birney E<lt>birney at sanger.ac.ukE<gt>
+Mark Fiers E<lt>m.w.e.j.fiers at plant.wag-ur.nlE<gt>
+
+=head1 DEVELOPERS
+
+This class has been written with an eye out of inheritance. The fields
+the actual object hash are:
+
+   _gsf_sub_hash  = reference to a hash containing sets of sub arrays
+   _gsf_score_hash= reference to a hash for the score values
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqFeature::Computation;
+use strict;
+
+use Bio::Root::Root;
+
+use base qw(Bio::SeqFeature::Generic);
+						     
+sub new {
+    my ( $class, @args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+
+
+    my ( $computation_id, 
+	$program_name, $program_date, $program_version,
+	$database_name, $database_date, $database_version) =
+	    $self->_rearrange([qw(COMPUTATION_ID
+				  PROGRAM_NAME
+				  PROGRAM_DATE
+				  PROGRAM_VERSION
+				  DATABASE_NAME
+				  DATABASE_DATE
+				  DATABASE_VERSION
+				  )], at args);
+
+    $program_name	    && $self->program_name($program_name);
+    $program_date	    && $self->program_date($program_date);
+    $program_version  && $self->program_version($program_version);
+    $database_name    && $self->database_name($database_name);
+    $database_date    && $self->database_date($database_date);
+    $database_version && $self->database_version($database_version);
+    $computation_id   && $self->computation_id($computation_id);
+    
+    return $self;
+}  
+
+=head2 has_score
+
+ Title   : has_score
+ Usage   : $value = $self->has_score('some_score')
+ Function: Tests wether a feature contains a score
+ Returns : TRUE if the SeqFeature has the score,
+           and FALSE otherwise.
+ Args    : The name of a score
+
+=cut
+
+sub has_score {
+    my ($self, $score) = @_;
+    return unless defined $score;
+    return exists $self->{'_gsf_score_hash'}->{$score};
+}
+
+=head2 add_score_value
+
+ Title   : add_score_value
+ Usage   : $self->add_score_value('P_value',224);
+ Returns : TRUE on success
+ Args    : score (string) and value (any scalar)
+
+=cut
+
+sub add_score_value {
+   my ($self, $score, $value) = @_;
+   if( ! defined $score || ! defined $value ) { 
+       $self->warn("must specify a valid $score and $value to add_score_value");
+       return 0;
+   }
+
+   if ( !defined $self->{'_gsf_score_hash'}->{$score} ) {
+       $self->{'_gsf_score_hash'}->{$score} = [];
+   }
+
+   push(@{$self->{'_gsf_score_hash'}->{$score}},$value);
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $value = $comp_obj->score()
+           $comp_obj->score($value)
+ Function: Returns the 'default' score or sets the 'default' score
+           This method exist for compatibility options           
+	   It would equal ($comp_obj->each_score_value('default'))[0];
+ Returns : A value
+ Args    : (optional) a new value for the 'default' score 
+
+=cut
+
+sub score {
+    my ($self, $value) = @_;
+    my @v;
+    if (defined $value) {
+
+	if( ref($value) =~ /HASH/i ) {
+	    while( my ($t,$val) = each %{ $value } ) {
+		$self->add_score_value($t,$val);
+	    }
+	} else {
+	    @v = $value;
+	    $self->add_score_value('default', $value);
+	}
+	
+    } else {       
+	@v = $self->each_score_value('default');
+    }
+    return $v[0];
+}
+
+=head2 each_score_value
+
+ Title   : each_score_value
+ Usage   : @values = $gsf->each_score_value('note');
+ Function: Returns a list of all the values stored
+           under a particular score.
+ Returns : A list of scalars
+ Args    : The name of the score
+
+=cut
+
+sub each_score_value {
+   my ($self, $score) = @_;
+   if ( ! exists $self->{'_gsf_score_hash'}->{$score} ) {
+       $self->warn("asking for score value that does not exist $score");
+       return;
+   }
+   return @{$self->{'_gsf_score_hash'}->{$score}};
+}
+
+
+=head2 all_scores
+
+ Title   : all_scores
+ Usage   : @scores = $feat->all_scores()
+ Function: Get a list of all the scores in a feature
+ Returns : An array of score names
+ Args    : none
+
+
+=cut
+
+sub all_scores {
+   my ($self, @args) = @_;
+
+   return keys %{$self->{'_gsf_score_hash'}};
+}
+
+
+=head2 remove_score
+
+ Title   : remove_score
+ Usage   : $feat->remove_score('some_score')
+ Function: removes a score from this feature
+ Returns : nothing
+ Args    : score (string)
+
+
+=cut
+
+sub remove_score {
+   my ($self, $score) = @_;
+
+   if ( ! exists $self->{'_gsf_score_hash'}->{$score} ) {
+       $self->warn("trying to remove a score that does not exist: $score");
+   }
+
+   delete $self->{'_gsf_score_hash'}->{$score};
+}
+
+=head2 computation_id
+
+ Title   : computation_id
+ Usage   : $computation_id = $feat->computation_id()
+           $feat->computation_id($computation_id)
+ Function: get/set on program name information
+ Returns : string
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub computation_id {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_computation_id'} = $value;
+  }
+
+  return $self->{'_gsf_computation_id'};
+}
+
+
+
+
+=head2 program_name
+
+ Title   : program_name
+ Usage   : $program_name = $feat->program_name()
+           $feat->program_name($program_name)
+ Function: get/set on program name information
+ Returns : string
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub program_name {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_program_name'} = $value;
+  }
+
+  return $self->{'_gsf_program_name'};
+}
+
+=head2 program_date
+
+ Title   : program_date
+ Usage   : $program_date = $feat->program_date()
+           $feat->program_date($program_date)
+ Function: get/set on program date information
+ Returns : date (string)
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub program_date {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_program_date'} = $value;
+  }
+
+  return $self->{'_gsf_program_date'};
+}
+
+
+=head2 program_version
+
+ Title   : program_version
+ Usage   : $program_version = $feat->program_version()
+           $feat->program_version($program_version)
+ Function: get/set on program version information
+ Returns : date (string)
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub program_version {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_program_version'} = $value;
+  }
+
+  return $self->{'_gsf_program_version'};
+}
+
+=head2 database_name
+
+ Title   : database_name
+ Usage   : $database_name = $feat->database_name()
+           $feat->database_name($database_name)
+ Function: get/set on program name information
+ Returns : string
+ Args    : none if get, the new value if set
+
+=cut
+
+sub database_name {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_database_name'} = $value;
+  }
+
+  return $self->{'_gsf_database_name'};
+}
+
+=head2 database_date
+
+ Title   : database_date
+ Usage   : $database_date = $feat->database_date()
+           $feat->database_date($database_date)
+ Function: get/set on program date information
+ Returns : date (string)
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub database_date {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_database_date'} = $value;
+  }
+
+  return $self->{'_gsf_database_date'};
+}
+
+
+=head2 database_version
+
+ Title   : database_version
+ Usage   : $database_version = $feat->database_version()
+           $feat->database_version($database_version)
+ Function: get/set on program version information
+ Returns : date (string)
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub database_version {
+  my ($self,$value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_database_version'} = $value;
+  }
+
+  return $self->{'_gsf_database_version'};
+
+}
+
+=head2 sub_SeqFeature_type
+
+ Title   : sub_SeqFeature_type
+ Usage   : $sub_SeqFeature_type = $feat->sub_SeqFeature_type()
+           $feat->sub_SeqFeature_type($sub_SeqFeature_type)
+ Function: sub_SeqFeature_type is automatically set when adding
+           a sub_computation (sub_SeqFeature) to a computation object
+ Returns : sub_SeqFeature_type (string)
+ Args    : none if get, the new value if set
+
+=cut
+
+sub sub_SeqFeature_type {
+  my ($self, $value) = @_;
+
+  if (defined($value)) {
+      $self->{'_gsf_sub_SeqFeature_type'} = $value;
+  }
+  return $self->{'_gsf_sub_SeqFeature_type'};
+}
+
+=head2 all_sub_SeqFeature_types
+
+ Title   : all_Sub_SeqFeature_types
+ Usage   : @all_sub_seqfeature_types = $comp->all_Sub_SeqFeature_types();
+ Function: Returns an array with all subseqfeature types
+ Returns : An array
+ Args    : none
+
+=cut
+
+sub all_sub_SeqFeature_types {
+   my ($self) = @_;
+   return keys ( %{$self->{'gsf_sub_hash'}} );
+}
+
+=head2 sub_SeqFeature
+
+ Title   : sub_SeqFeature('sub_feature_type')
+ Usage   : @feats = $feat->sub_SeqFeature();
+           @feats = $feat->sub_SeqFeature('sub_feature_type');           
+ Function: Returns an array of sub Sequence Features of a specific
+           type or, if the type is ommited, all sub Sequence Features
+ Returns : An array
+ Args    : (optional) a sub_SeqFeature type (ie exon, pattern)
+
+=cut
+
+sub sub_SeqFeature {
+   my ($self, $ssf_type) = @_;
+   my (@return_array) = ();
+   if ($ssf_type eq '') {
+       #return all sub_SeqFeatures
+       foreach (keys ( %{$self->{'gsf_sub_hash'}} )){
+	  push @return_array, @{$self->{'gsf_sub_hash'}->{$_}};	            
+       }
+       return @return_array;
+   } else {
+       if (defined ($self->{'gsf_sub_hash'}->{$ssf_type})) {
+           return @{$self->{'gsf_sub_hash'}->{$ssf_type}};	
+       } else {
+           $self->warn("$ssf_type is not a valid sub SeqFeature type");
+       }
+   }
+}
+
+=head2 add_sub_SeqFeature
+
+ Title   : add_sub_SeqFeature
+ Usage   : $feat->add_sub_SeqFeature($subfeat);
+           $feat->add_sub_SeqFeature($subfeat,'sub_seqfeature_type')
+           $feat->add_sub_SeqFeature($subfeat,'EXPAND')
+           $feat->add_sub_SeqFeature($subfeat,'EXPAND','sub_seqfeature_type')
+ Function: adds a SeqFeature into a specific subSeqFeature array.
+           with no 'EXPAND' qualifer, subfeat will be tested
+           as to whether it lies inside the parent, and throw
+           an exception if not.
+           If EXPAND is used, the parents start/end/strand will
+           be adjusted so that it grows to accommodate the new
+           subFeature,
+	   optionally a sub_seqfeature type can be defined.
+ Returns : nothing
+ Args    : An object which has the SeqFeatureI interface
+         : (optional) 'EXPAND'
+	 : (optional) 'sub_SeqFeature_type'
+
+=cut
+
+sub add_sub_SeqFeature{
+   my ($self,$feat,$var1, $var2) = @_;
+   $var1 = '' unless( defined $var1);
+   $var2 = '' unless( defined $var2);   
+   my ($expand, $ssf_type) = ('', $var1 . $var2);	
+   $expand = 'EXPAND' if ($ssf_type =~ s/EXPAND//);
+
+   if ( !$feat->isa('Bio::SeqFeatureI') ) {
+       $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
+   }
+
+   if($expand eq 'EXPAND') {
+       $self->_expand_region($feat);
+   } else {
+       if ( !$self->contains($feat) ) {
+	   $self->throw("$feat is not contained within parent feature, and expansion is not valid");
+       }
+   }
+
+   $ssf_type = 'default' if ($ssf_type eq '');
+  
+   if (!(defined ($self->{'gsf_sub_hash'}->{$ssf_type}))) {     
+      @{$self->{'gsf_sub_hash'}->{$ssf_type}} = ();
+   } 
+   $feat->sub_SeqFeature_type($ssf_type);
+   push @{$self->{'gsf_sub_hash'}->{$ssf_type}}, $feat;
+}
+
+=head2 flush_sub_SeqFeature
+
+ Title   : flush_sub_SeqFeature
+ Usage   : $sf->flush_sub_SeqFeature
+           $sf->flush_sub_SeqFeature('sub_SeqFeature_type');	
+ Function: Removes all sub SeqFeature or all sub SeqFeatures
+ 	   of a specified type 
+           (if you want to remove a more specific subset, take
+	    an array of them all, flush them, and add
+            back only the guys you want)
+ Example :
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub flush_sub_SeqFeature {
+   my ($self, $ssf_type) = @_;
+   if ($ssf_type) {
+      if ((defined ($self->{'gsf_sub_hash'}->{$ssf_type}))) {   
+             delete $self->{'gsf_sub_hash'}->{$ssf_type};
+       } else {
+           $self->warn("$ssf_type is not a valid sub SeqFeature type");
+       }
+   } else {
+      $self->{'_gsf_sub_hash'} = {}; # zap the complete hash implicitly.
+   }
+} 
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/FeaturePair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/FeaturePair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/FeaturePair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,560 @@
+# $Id: FeaturePair.pm,v 1.22.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::FeaturePair
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::FeaturePair - hold pair feature information e.g. blast hits
+
+=head1 SYNOPSIS
+
+    my $feat  = new Bio::SeqFeature::FeaturePair(-feature1 => $f1,
+						 -feature2 => $f2,
+					      );
+
+    # Bio::SeqFeatureI methods can be used
+
+    my $start = $feat->start;
+    my $end   = $feat->end;
+
+    # Bio::FeaturePair methods can be used
+    my $hstart = $feat->hstart;
+    my $hend   = $feat->hend;
+
+   my $feature1 = $feat->feature1;  # returns feature1 object
+
+=head1 DESCRIPTION
+
+A sequence feature object where the feature is itself a feature on
+another sequence - e.g. a blast hit where residues 1-40 of a protein
+sequence SW:HBA_HUMAN has hit to bases 100 - 220 on a genomic sequence
+HS120G22.  The genomic sequence coordinates are used to create one
+sequence feature $f1 and the protein coordinates are used to create
+feature $f2.  A FeaturePair object can then be made
+
+    my $fp = new Bio::SeqFeature::FeaturePair(-feature1 => $f1,   # genomic
+					      -feature2 => $f2,   # protein
+					      );
+
+This object can be used as a standard Bio::SeqFeatureI in which case
+
+    my $gstart = $fp->start  # returns start coord on feature1 - genomic seq.
+    my $gend   = $fp->end    # returns end coord on feature1.
+
+In general standard Bio::SeqFeatureI method calls return information
+in feature1.
+
+Data in the feature 2 object are generally obtained using the standard
+methods prefixed by h (for hit!)
+
+    my $pstart = $fp->hstart # returns start coord on feature2 = protein seq.
+    my $pend   = $fp->hend   # returns end coord on feature2.
+
+If you wish to swap feature1 and feature2 around :
+
+    $feat->invert
+
+so... 
+
+    $feat->start # etc. returns data in $feature2 object
+
+
+No sub_SeqFeatures or tags can be stored in this object directly.  Any
+features or tags are expected to be stored in the contained objects
+feature1, and feature2.
+
+=head1 CONTACT
+
+Ewan Birney E<lt>birney at sanger.ac.ukE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::FeaturePair;
+use vars qw($AUTOLOAD);
+use strict;
+
+use Bio::SeqFeatureI;
+use Bio::Factory::ObjectFactory;
+
+use base qw(Bio::SeqFeature::Generic);
+
+=head2 new
+
+ Title   : new
+ Usage   :
+ Function: Constructor for this module. Accepts the following parameters:
+
+             -feature1   Bio::SeqFeatureI-compliant object
+             -feature2   Bio::SeqFeatureI-compliant object
+             -feature_factory  Bio::Factory::ObjectFactoryI compliant
+                         object to be used when feature1 and/or feature2
+                         are accessed without explicitly set before. This
+                         is mostly useful for derived classes who want to
+                         set their preferred class for feature objects.
+
+ Example :
+ Returns : 
+ Args    : see above
+
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+
+    #
+    # We've got a certain problem here that somewhat relates to chicken and
+    # eggs. The problem is, we override a lot of SeqFeatureI methods here
+    # to delegate them to either feature1 or feature2. If we pass along
+    # those attributes right away, we need feature1 or feature2 or the feature
+    # factory in place, or there is no way around the dreaded default, which
+    # is ugly too (as it necessitates subsequent copying if you wanted a
+    # different feature object class).
+    #
+    # So I decided to go with the lesser of two evils here: we need to assume
+    # here that we can set all attributes through set_attributes(), which we
+    # assume is no different from setting them through the constructor. This
+    # gives us a window to set the feature objects and the factory, such that
+    # any derived class doesn't have to worry about this any more.
+    #
+    # I'm happy to hear a better solution, but I think this one isn't so bad.
+    #
+    my $self = $class->SUPER::new();
+    my ($feature1,$feature2,$featfact) = 
+	$self->_rearrange([qw(FEATURE1
+			      FEATURE2
+			      FEATURE_FACTORY
+			      )], at args);
+    
+    $self->_register_for_cleanup(\&cleanup_fp);
+    # initialize the feature object factory if not provided
+    if(! $featfact) {
+	$featfact = Bio::Factory::ObjectFactory->new(
+				   -type => "Bio::SeqFeature::Generic",
+				   -interface => "Bio::SeqFeatureI");
+    }
+    $self->feature_factory($featfact);
+    # Store the features in the object
+    $feature1 && $self->feature1($feature1);
+    $feature2 && $self->feature2($feature2);
+    
+    # OK. Now we're setup to store all the attributes, and they'll go right
+    # away into the right objects.
+    $self->set_attributes(@args);
+
+    # done - we hope
+    return $self;
+}
+
+=head2 feature1
+
+ Title   : feature1
+ Usage   : $f = $featpair->feature1
+           $featpair->feature1($feature)
+ Function: Get/set for the query feature
+ Returns : Bio::SeqFeatureI
+ Args    : Bio::SeqFeatureI
+
+
+=cut
+
+sub feature1 {
+    my ($self,$arg) = @_;    
+    if ( defined($arg) || !defined $self->{'feature1'} ) {
+	$self->throw("internal error: feature factory not set!") 
+	    unless $self->feature_factory;
+	$arg = $self->feature_factory->create_object() unless( defined $arg);
+	$self->throw("Argument [$arg] must be a Bio::SeqFeatureI") 
+	    unless (ref($arg) && $arg->isa("Bio::SeqFeatureI"));
+	$self->{'feature1'} = $arg;
+    }
+    return $self->{'feature1'};
+}
+
+=head2 feature2
+
+ Title   : feature2
+ Usage   : $f = $featpair->feature2
+           $featpair->feature2($feature)
+ Function: Get/set for the hit feature
+ Returns : Bio::SeqFeatureI
+ Args    : Bio::SeqFeatureI
+
+
+=cut
+
+sub feature2 {
+    my ($self,$arg) = @_;
+
+    if ( defined($arg) || ! defined $self->{'feature2'}) {
+	$self->throw("internal error: feature factory not set!") 
+	    unless $self->feature_factory;
+	$arg = $self->feature_factory->create_object() unless( defined $arg);
+	$self->throw("Argument [$arg] must be a Bio::SeqFeatureI") 
+	    unless (ref($arg) && $arg->isa("Bio::SeqFeatureI"));
+	$self->{'feature2'} = $arg;
+    }
+    return $self->{'feature2'};
+}
+
+=head2 start
+
+ Title   : start
+ Usage   : $start = $featpair->start
+           $featpair->start(20)
+ Function: Get/set on the start coordinate of feature1
+ Returns : integer
+ Args    : [optional] beginning of feature
+
+=cut
+
+sub start {
+    return shift->feature1->start(@_);
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $end = $featpair->end
+           $featpair->end($end)
+ Function: get/set on the end coordinate of feature1
+ Returns : integer
+ Args    : [optional] ending point of feature
+
+
+=cut
+
+sub end{
+    return shift->feature1->end(@_);    
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $feat->strand()
+           $feat->strand($strand)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : [optional] strand information to set
+
+
+=cut
+
+sub strand{
+    return shift->feature1->strand(@_);    
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : $location = $featpair->location
+           $featpair->location($location)
+ Function: Get/set location object (using feature1)
+ Returns : Bio::LocationI object
+ Args    : [optional] LocationI to store
+
+=cut
+
+sub location {
+    return shift->feature1->location(@_);
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $feat->score()
+           $feat->score($score)
+ Function: get/set on score information
+ Returns : float
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub score {
+    return shift->feature1->score(@_);    
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $feat->frame()
+           $feat->frame($frame)
+ Function: get/set on frame information
+ Returns : 0,1,2
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub frame {
+    return shift->feature1->frame(@_);    
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $ptag = $featpair->primary_tag
+ Function: get/set on the primary_tag of feature1
+ Returns : 0,1,2
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub primary_tag{
+    return shift->feature1->primary_tag(@_);    
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+           $feat->source_tag('genscan');
+ Function: Returns the source tag for a feature,
+           eg, 'genscan' 
+ Returns : a string 
+ Args    : none
+
+
+=cut
+
+sub source_tag{
+    return shift->feature1->source_tag(@_);    
+}
+
+=head2 seqname
+
+ Title   : seqname
+ Usage   : $obj->seq_id($newval)
+ Function: There are many cases when you make a feature that you
+           do know the sequence name, but do not know its actual
+           sequence. This is an attribute such that you can store 
+           the seqname.
+
+           This attribute should *not* be used in GFF dumping, as
+           that should come from the collection in which the seq
+           feature was found.
+ Returns : value of seqname
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seq_id{
+    return shift->feature1->seq_id(@_);    
+}
+
+=head2 hseqname
+
+ Title   : hseqname
+ Usage   : $featpair->hseqname($newval)
+ Function: Get/set method for the name of
+           feature2.
+ Returns : value of $feature2->seq_id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub hseq_id {
+    return shift->feature2->seq_id(@_);
+}
+
+
+=head2 hstart
+
+ Title   : hstart
+ Usage   : $start = $featpair->hstart
+           $featpair->hstart(20)
+ Function: Get/set on the start coordinate of feature2
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub hstart {
+    return shift->feature2->start(@_);    
+}
+
+=head2 hend
+
+ Title   : hend
+ Usage   : $end = $featpair->hend
+           $featpair->hend($end)
+ Function: get/set on the end coordinate of feature2
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub hend{
+    return shift->feature2->end(@_);    
+}
+
+
+=head2 hstrand
+
+ Title   : hstrand
+ Usage   : $strand = $feat->strand()
+           $feat->strand($strand)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : none
+
+
+=cut
+
+sub hstrand{
+    return shift->feature2->strand(@_);
+}
+
+=head2 hscore
+
+ Title   : hscore
+ Usage   : $score = $feat->score()
+           $feat->score($score)
+ Function: get/set on score information
+ Returns : float
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub hscore {
+    return shift->feature2->score(@_);    
+}
+
+=head2 hframe
+
+ Title   : hframe
+ Usage   : $frame = $feat->frame()
+           $feat->frame($frame)
+ Function: get/set on frame information
+ Returns : 0,1,2
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub hframe {
+    return shift->feature2->frame(@_);    
+}
+
+=head2 hprimary_tag
+
+ Title   : hprimary_tag
+ Usage   : $ptag = $featpair->hprimary_tag
+ Function: Get/set on the primary_tag of feature2
+ Returns : 0,1,2
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub hprimary_tag{
+    return shift->feature2->primary_tag(@_);    
+}
+
+=head2 hsource_tag
+
+ Title   : hsource_tag
+ Usage   : $tag = $feat->hsource_tag()
+           $feat->source_tag('genscan');
+ Function: Returns the source tag for a feature,
+           eg, 'genscan' 
+ Returns : a string 
+ Args    : none
+
+
+=cut
+
+sub hsource_tag{
+    return shift->feature2->source_tag(@_);
+}
+
+=head2 invert
+
+ Title   : invert
+ Usage   : $tag = $feat->invert
+ Function: Swaps feature1 and feature2 around
+ Returns : Nothing
+ Args    : none
+
+
+=cut
+
+sub invert {
+    my ($self) = @_;
+
+    my $tmp = $self->feature1;
+    
+    $self->feature1($self->feature2);
+    $self->feature2($tmp);
+    return;
+}
+
+=head2 feature_factory
+
+ Title   : feature_factory
+ Usage   : $obj->feature_factory($newval)
+ Function: Get/set the feature object factory for this feature pair.
+
+           The feature object factory will be used to create a feature
+           object if feature1() or feature2() is called in get mode
+           without having been set before.
+
+           The default is an instance of Bio::Factory::ObjectFactory
+           and hence allows the type to be changed dynamically at any
+           time.
+
+ Example : 
+ Returns : The feature object factory in use (a 
+           Bio::Factory::ObjectFactoryI compliant object)
+ Args    : on set, a Bio::Factory::ObjectFactoryI compliant object
+
+
+=cut
+
+sub feature_factory{
+    my $self = shift;
+
+    return $self->{'feature_factory'} = shift if @_;
+    return $self->{'feature_factory'};
+}
+
+#################################################################
+# aliases for backwards compatibility                           #
+#################################################################
+
+# seqname() is already aliased in Generic.pm, and we overwrite seq_id
+
+sub hseqname {
+    my $self = shift;
+    $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
+    return $self->hseq_id(@_);
+}
+
+sub cleanup_fp {
+    my $self = shift;
+    $self->{'feature1'} = $self->{'feature2'} = undef;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Exon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Exon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Exon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,222 @@
+# $Id: Exon.pm,v 1.15.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::Exon
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::Exon - a feature representing an exon
+
+=head1 SYNOPSIS
+
+    # obtain an exon instance $exon somehow
+    print "exon from ", $exon->start(), " to ", $exon->end(),
+          " on seq ", $exon->seq_id(), ", strand ", $exon->strand(),
+          ", encodes the peptide sequence ", 
+          $exon->cds()->translate()->seq(), "\n";
+
+=head1 DESCRIPTION
+
+This module implements a feature representing an exon by implementing
+the Bio::SeqFeature::Gene::ExonI interface. By default an Exon is
+coding. Supply -is_coding =E<gt> 0 to the constructor or call
+$exon-E<gt>is_coding(0) otherwise.
+
+Apart from that, this class also implements Bio::SeqFeatureI by
+inheriting off Bio::SeqFeature::Generic.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::Exon;
+use strict;
+
+
+use base qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::ExonI);
+
+#
+# A list of allowed exon types. See primary_tag().
+#
+my @valid_exon_types = ('initial', 'internal', 'terminal');
+
+sub new {
+    my ($caller, @args) = @_;
+    my $self = $caller->SUPER::new(@args);
+
+    my ($is_coding) =
+	$self->_rearrange([qw(IS_CODING)], at args);
+    $self->primary_tag('exon') unless $self->primary_tag();
+    $self->is_coding(defined($is_coding) ? $is_coding : 1);
+    $self->strand(0) if(! defined($self->strand()));
+    return $self;
+}
+
+
+=head2 is_coding
+
+ Title   : is_coding
+ Usage   : if($exon->is_coding()) {
+                   # do something
+           }
+           if($is_utr) {
+               $exon->is_coding(0);
+           }
+ Function: Get/set whether or not the exon codes for amino acid.
+ Returns : TRUE if the object represents a feature translated into protein,
+           and FALSE otherwise.
+ Args    : A boolean value on set.
+
+
+=cut
+
+sub is_coding {
+    my ($self,$val) = @_;
+
+    if(defined($val)) {
+	$self->{'_iscoding'} = $val;
+    }
+    return $self->{'_iscoding'};
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+           $feat->primary_tag('exon')
+ Function: Get/set the primary tag for the exon feature.
+
+           This method is overridden here in order to allow only for
+           tag values following a certain convention. For consistency reasons,
+           the tag value must either contain the string 'exon' or the string
+           'utr' (both case-insensitive). In the case of 'exon', a string
+           describing the type of exon may be appended or prefixed. Presently,
+           the following types are allowed: initial, internal, and terminal
+           (all case-insensitive). 
+
+           If the supplied tag value matches 'utr' (case-insensitive),
+           is_coding() will automatically be set to FALSE, and to TRUE
+           otherwise.
+
+ Returns : A string.
+ Args    : A string on set.
+
+
+=cut
+
+# sub primary_tag {
+#    my ($self,$value) = @_;
+
+#    if(defined($value)) {
+#        if((lc($value) =~ /utr/i) || (lc($value) eq "exon") ||
+# 	  ((lc($value) =~ /exon/i) &&
+# 	   (grep { $value =~ /$_/i; } @valid_exon_types))) {
+# 	   $self->is_coding($value =~ /utr/i ? 0 : 1);
+#        } else {
+# 	   $self->throw("primary tag $value is invalid for object of class ".
+# 			ref($self));
+#        }
+#    }
+#    return $self->SUPER::primary_tag($value);
+# }
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $exon->location()
+ Function: Returns a location object suitable for identifying the location 
+	   of the exon on the sequence or parent feature.
+
+           This method is overridden here to restrict allowed location types
+           to non-compound locations.
+
+ Returns : Bio::LocationI object
+ Args    : none
+
+
+=cut
+
+sub location {
+   my ($self,$value) = @_;  
+
+   if(defined($value) && $value->isa('Bio::Location::SplitLocationI')) {
+       $self->throw("split or compound location is not allowed ".
+		    "for an object of type " . ref($self));
+   }
+   return $self->SUPER::location($value);
+}
+
+=head2 cds
+
+ Title   : cds()
+ Usage   : $cds = $exon->cds();
+ Function: Get the coding sequence of the exon as a sequence object.
+
+           The sequence of the returned object is prefixed by Ns (lower case)
+           if the frame of the exon is defined and different from zero. The
+           result is that the first base starts a codon (frame 0).
+
+           This implementation returns undef if the particular exon is
+           not translated to protein, i.e., is_coding() returns FALSE. Undef
+           will also be returned if no sequence is attached to this exon
+           feature.
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub cds {
+    my ($self) = @_;
+
+    # UTR is not translated
+    return if(! $self->is_coding());
+
+    my $seq = $self->seq();
+    if(defined($seq) && defined($self->frame()) && ($self->frame() != 0)) {
+	my $prefix = "n" x $self->frame();
+	$seq->seq($prefix . $seq->seq());
+    }
+    return $seq;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/ExonI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/ExonI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/ExonI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,110 @@
+# $Id: ExonI.pm,v 1.11.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::ExonI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::ExonI - Interface for a feature representing an exon
+
+=head1 SYNOPSIS
+
+See documentation of methods.
+
+=head1 DESCRIPTION
+
+A feature representing an exon. An exon in this definition is
+transcribed and at least for one particular transcript not spliced out
+of the pre-mRNA. However, it does not necessarily code for amino acid.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::ExonI;
+use strict;
+
+use base qw(Bio::SeqFeatureI);
+
+
+=head2 is_coding
+
+ Title   : is_coding
+ Usage   : if($exon->is_coding()) {
+                   # do something
+           }
+ Function: Whether or not the exon codes for amino acid.
+ Returns : TRUE if the object represents a feature translated into protein,
+           and FALSE otherwise.
+ Args    : 
+
+
+=cut
+
+sub is_coding {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 cds
+
+ Title   : cds()
+ Usage   : $cds = $exon->cds();
+ Function: Get the coding sequence of the exon as a sequence object.
+
+           The returned sequence object must be in frame 0, i.e., the first
+           base starts a codon.
+
+           An implementation may return undef, indicating that a coding
+           sequence does not exist, e.g. for a UTR (untranslated region).
+
+ Returns : A L<Bio::PrimarySeqI> implementing object.
+ Args    : 
+
+
+=cut
+
+sub cds {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructure.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructure.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructure.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,406 @@
+# $Id: GeneStructure.pm,v 1.21.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::GeneStructure
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::GeneStructure - A feature representing an arbitrarily complex structure of a gene
+
+=head1 SYNOPSIS
+
+  # See documentation of methods.
+
+=head1 DESCRIPTION
+
+A feature representing a gene structure. As of now, a gene structure
+really is only a collection of transcripts. See
+L<Bio::SeqFeature::Gene::TranscriptI> (interface) and
+L<Bio::SeqFeature::Gene::Transcript> (implementation) for the features
+of such objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution. Bug reports can be submitted via the
+ web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::GeneStructure;
+use vars qw($WeakRefs);
+use strict;
+
+BEGIN {
+    eval "use Scalar::Util qw(weaken);";
+    if ($@) {
+	$Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 0;  
+    } else { $Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 1; }
+}
+
+
+use base qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::GeneStructureI);
+
+
+sub new {
+    my ($caller, @args) = @_;
+    my $self = $caller->SUPER::new(@args);
+    $self->_register_for_cleanup(\&gene_cleanup);
+    my ($primary) =
+	$self->_rearrange([qw(PRIMARY
+			      )], at args);
+    
+    $primary = 'genestructure' unless $primary;
+    $self->primary_tag($primary);
+    $self->strand(0) if(! defined($self->strand()));
+    return $self;
+}
+
+=head2 transcripts
+
+ Title   : transcripts
+ Usage   : @transcripts = $gene->transcripts();
+ Function: Get the transcripts of this gene structure. Many gene structures
+           will have only one transcript.
+
+ Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects.
+ Args    : 
+
+
+=cut
+
+sub transcripts {
+    return @{shift->{'_transcripts'} || []};
+}
+
+=head2 add_transcript
+
+ Title   : add_transcript()
+ Usage   : $gene->add_transcript($transcript);
+ Function: Add a transcript to this gene structure.
+ Returns : 
+ Args    : A Bio::SeqFeature::Gene::TranscriptI implementing object.
+
+
+=cut
+
+sub add_transcript {
+    my ($self, $fea) = @_;
+
+    if(!$fea || ! $fea->isa('Bio::SeqFeature::Gene::TranscriptI') ) {
+	$self->throw("$fea does not implement Bio::SeqFeature::Gene::TranscriptI");
+    }
+    unless( exists $self->{'_transcripts'}  ) {
+	$self->{'_transcripts'} = [];
+    }
+    $self->_expand_region($fea);
+    if( $Bio::SeqFeature::Gene::GeneStructure::WeakRefs ) {
+	$fea->parent(weaken $self);
+    } else {
+	$fea->parent($self);
+    }
+    push(@{$self->{'_transcripts'}}, $fea);
+}
+
+=head2 flush_transcripts
+
+ Title   : flush_transcripts()
+ Usage   : $gene->flush_transcripts();
+ Function: Remove all transcripts from this gene structure.
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub flush_transcripts {
+    my ($self) = @_;    
+    if( defined $self->{'_transcripts'} ) {
+	foreach my $t ( grep {defined} @{$self->{'_transcripts'} || []} ) {
+	    $t->parent(undef); # remove bkwds pointers
+	    $t = undef;
+	}
+	delete($self->{'_transcripts'});	
+    }
+}
+
+=head2 add_transcript_as_features
+
+ Title   : add_transcript_as_features
+ Usage   : $gene->add_transcript_as_features(@featurelist);
+ Function: take a list of Bio::SeqFeatureI objects and turn them into a
+           Bio::SeqFeature::Gene::Transcript object.  Add that transcript to the gene.
+ Returns : nothing
+ Args    : a list of Bio::SeqFeatureI compliant objects
+
+
+=cut
+
+sub add_transcript_as_features {
+    my ($self, at features) = @_;
+    my $transcript=Bio::SeqFeature::Gene::Transcript->new;
+    foreach my $fea (@features) {
+	if ($fea->primary_tag =~ /utr/i) { #UTR / utr/ 3' utr / utr5 etc.
+	    $transcript->add_utr($fea);
+	} elsif ($fea->primary_tag =~ /promot/i) { #allow for spelling differences
+	    $transcript->add_promoter($fea);
+	} elsif ($fea->primary_tag =~ /poly.*A/i) { #polyA, POLY_A, etc.
+	    $transcript->poly_A_site($fea);
+	} else {		#assume the rest are exons
+	    $transcript->add_exon($fea);
+	}
+    }
+    $self->add_transcript($transcript);
+}
+
+
+=head2 promoters
+
+ Title   : promoters
+ Usage   : @prom_sites = $gene->promoters();
+ Function: Get the promoter features of this gene structure.
+
+           This method basically merges the promoters returned by transcripts.
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects.
+ Args    : 
+
+
+=cut
+
+sub promoters {
+    my ($self) = @_;
+    my @transcripts = $self->transcripts();
+    my @feas = ();
+
+    foreach my $tr (@transcripts) {
+	push(@feas, $tr->promoters());
+    }
+    return @feas;
+}
+
+
+=head2 exons
+
+ Title   : exons()
+ Usage   : @exons = $gene->exons();
+           @inital_exons = $gene->exons('Initial');
+ Function: Get all exon features or all exons of a specified type of this gene
+           structure.
+
+           Exon type is treated as a case-insensitive regular expression and 
+           optional. For consistency, use only the following types: 
+           initial, internal, terminal, utr, utr5prime, and utr3prime. 
+           A special and virtual type is 'coding', which refers to all types
+           except utr.
+
+           This method basically merges the exons returned by transcripts.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
+ Args    : An optional string specifying the type of exon.
+
+
+=cut
+
+sub exons {
+    my ($self, @args) = @_;
+    my @transcripts = $self->transcripts();
+    my @feas = ();
+
+    foreach my $tr (@transcripts) {
+	push(@feas, $tr->exons(@args));
+    }
+    return @feas;
+}
+
+=head2 introns
+
+ Title   : introns()
+ Usage   : @introns = $gene->introns();
+ Function: Get all introns of this gene structure.
+
+           Note that this class currently generates these features on-the-fly,
+           that is, it simply treats all regions between exons as introns.
+           It assumes that the exons in the transcripts do not overlap.
+
+           This method basically merges the introns returned by transcripts.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects.
+ Args    : 
+
+
+=cut
+
+sub introns {
+    my ($self) = @_;
+    my @transcripts = $self->transcripts();
+    my @feas = ();
+
+    foreach my $tr (@transcripts) {
+	push(@feas, $tr->introns());
+    }
+    return @feas;
+}
+
+=head2 poly_A_sites
+
+ Title   : poly_A_sites()
+ Usage   : @polyAsites = $gene->poly_A_sites();
+ Function: Get the poly-adenylation sites of this gene structure.
+
+           This method basically merges the poly-adenylation sites returned by
+           transcripts.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects.
+ Args    : 
+
+
+=cut
+
+sub poly_A_sites {
+    my ($self) = @_;
+    my @transcripts = $self->transcripts();
+    my @feas = ();
+
+    foreach my $tr (@transcripts) {
+	push(@feas, $tr->poly_A_site());
+    }
+    return @feas;
+}
+
+=head2 utrs
+
+ Title   : utrs()
+ Usage   : @utr_sites = $gene->utrs('3prime');
+           @utr_sites = $gene->utrs('5prime');
+           @utr_sites = $gene->utrs();
+ Function: Get the features representing untranslated regions (UTR) of this
+           gene structure.
+
+           You may provide an argument specifying the type of UTR. Currently
+           the following types are recognized: 5prime 3prime for UTR on the
+           5' and 3' end of the CDS, respectively.
+
+           This method basically merges the UTRs returned by transcripts.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
+           representing the UTR regions or sites.
+ Args    : Optionally, either 3prime, or 5prime for the the type of UTR
+           feature.
+
+
+=cut
+
+sub utrs {
+    my ($self, at args) = @_;
+    my @transcripts = $self->transcripts();
+    my @feas = ();
+
+    foreach my $tr (@transcripts) {
+	push(@feas, $tr->utrs(@args));
+    }
+    return @feas;
+}
+
+=head2 sub_SeqFeature
+
+ Title   : sub_SeqFeature
+ Usage   : @feats = $gene->sub_SeqFeature();
+ Function: Returns an array of all subfeatures.
+
+           This method is defined in Bio::SeqFeatureI. We override this here
+           to include the transcripts.
+
+ Returns : An array Bio::SeqFeatureI implementing objects.
+ Args    : none
+
+
+=cut
+
+sub sub_SeqFeature {
+    my ($self) = @_;   
+    my @feas = ();
+
+    # get what the parent already has
+    @feas = $self->SUPER::sub_SeqFeature();
+    push(@feas, $self->transcripts());
+    return @feas;
+}
+
+=head2 flush_sub_SeqFeature
+
+ Title   : flush_sub_SeqFeature
+ Usage   : $gene->flush_sub_SeqFeature();
+           $gene->flush_sub_SeqFeature(1);
+ Function: Removes all subfeatures.
+
+           This method is overridden from Bio::SeqFeature::Generic to flush
+           all additional subfeatures, i.e., transcripts, which is
+           almost certainly not what you want. To remove only features added
+           through $gene->add_sub_SeqFeature($feature) pass any
+           argument evaluating to TRUE.
+
+ Example :
+ Returns : none
+ Args    : Optionally, an argument evaluating to TRUE will suppress flushing
+           of all gene structure-specific subfeatures (transcripts).
+
+
+=cut
+
+sub flush_sub_SeqFeature {
+   my ($self,$fea_only) = @_;
+
+   $self->SUPER::flush_sub_SeqFeature();
+   if(! $fea_only) {
+       $self->flush_transcripts();
+   }
+}
+
+sub gene_cleanup {
+    my $self = shift;
+    $self->flush_transcripts;
+}
+
+1;
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/GeneStructureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,189 @@
+# $Id: GeneStructureI.pm,v 1.12.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::GeneStructureI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::GeneStructureI - A feature representing an arbitrarily
+           complex structure of a gene
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+A feature representing a gene structure.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::GeneStructureI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::SeqFeatureI);
+
+=head2 transcripts
+
+ Title   : transcripts()
+ Usage   : @transcripts = $gene->transcripts();
+ Function: Get the transcript features/sites of this gene structure.
+
+           See Bio::SeqFeature::Gene::TranscriptI for properties of the
+           returned objects.
+
+ Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects
+           representing the promoter regions or sites.
+ Args    : 
+
+
+=cut
+
+sub transcripts {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 promoters
+
+ Title   : promoters()
+ Usage   : @prom_sites = $gene->promoters();
+ Function: Get the promoter features/sites of this gene structure.
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           promoter regions or sites.
+ Args    : 
+
+=cut
+
+sub promoters {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 exons
+
+ Title   : exons()
+ Usage   : @exons = $gene->exons();
+           @inital = $gene->exons('Initial');
+ Function: Get all exon features or all exons of specified type of this gene
+           structure.
+
+           Refer to the documentation of the class that produced this gene
+           structure object for information about the possible types.
+
+           See Bio::SeqFeature::Gene::ExonI for properties of the
+           returned objects.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
+           representing the exon regions.
+ Args    : An optional string specifying the type of the exon.
+
+=cut
+
+sub exons {
+    my ($self, $type) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 introns
+
+ Title   : introns()
+ Usage   : @introns = $gene->introns();
+ Function: Get all introns of this gene structure.
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           introns.
+ Args    : 
+
+
+=cut
+
+sub introns {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 poly_A_sites
+
+ Title   : poly_A_sites()
+ Usage   : @polyAsites = $gene->poly_A_sites();
+ Function: Get the poly-adenylation features/sites of this gene structure.
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           poly-adenylation regions or sites.
+ Args    : 
+
+
+=cut
+
+sub poly_A_sites {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 utrs
+
+ Title   : utrs()
+ Usage   : @utr_sites = $gene->utrs();
+ Function: Get the UTR features/sites of this gene structure.
+
+           See Bio::SeqFeature::Gene::ExonI for properties of the
+           returned objects.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
+           representing the UTR regions or sites.
+ Args    : 
+
+
+=cut
+
+sub utrs {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Intron.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Intron.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Intron.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,291 @@
+# $Id: Intron.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::Intron
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::Intron - An intron feature
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gene.pbi.nrc.ca
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::Intron;
+use strict;
+
+use Bio::SeqFeature::Gene::Exon;
+
+use base qw(Bio::SeqFeature::Gene::NC_Feature);
+
+sub new {
+    my($class, at args) = @_;
+
+    # introns are non-coding by default
+    if(! grep { lc($_) eq '-is_coding'; } @args) {
+	push(@args, '-is_coding', 0);
+    }
+    my $self = $class->SUPER::new(@args);
+
+    my ($primary, $prim) = 
+	$self->_rearrange([qw(PRIMARY PRIMARY_TAG)], at args);
+    $self->primary_tag('intron') unless $primary || $prim;
+
+    return $self;
+}
+
+=head2 upstream_Exon 
+
+  Title   : upstream_Exon 
+  Usage   : $intron->upstream_Exon()
+  Function: exon upstream of the intron 
+  Returns : Bio::EnsEMBL::Exon
+  Args    : 
+
+=cut
+
+sub upstream_Exon {
+    my( $self, $exon ) = @_;
+    
+    if ($exon) {
+        $self->{'_intron_location'} = undef;
+        $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI") 
+	    unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
+        $self->{'_upstream_exon'} = $exon;
+    }
+    return $self->{'_upstream_exon'};
+}
+
+
+=head2 downstream_Exon 
+
+  Title   : downstream_Exon 
+  Usage   : $intron->downstream_Exon()
+  Function: exon downstream of the intron 
+  Returns : Bio::EnsEMBL::Exon
+  Args    : 
+
+=cut
+
+sub downstream_Exon {
+    my( $self, $exon ) = @_;
+    
+    if ($exon) {
+        $self->{'_intron_location'} = undef;
+        $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
+            unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
+        $self->{'_downstream_exon'} = $exon;
+    }
+    return $self->{'_downstream_exon'};
+}
+
+=head2 phase 
+
+  Title   : phase 
+  Usage   : $intron->phase()
+  Function: returns the phase of the intron(where it interrupts the codon)  
+  Returns : int(0,1,2)
+  Args    : 
+
+=cut
+
+sub phase {
+  my ($self) = @_;
+  return $self->downstream_Exon->phase;
+}
+
+
+=head2 acceptor_splice_site 
+
+  Title   : acceptor_splice_site 
+  Usage   : $intron->acceptor_splice_site(21,3)
+  Function: returns the sequence corresponding to the 
+            consensus acceptor splice site. If start and
+            end are provided, it will number of base pairs
+            left and right of the canonical AG. Here 21 means
+            21 bp into intron and 3 means 3 bp into the exon.
+            --Intron--21----|AG|-3-----Exon
+            Defaults to 21,3
+
+  Returns : Bio::Seq
+  Args    : start and end
+
+=cut
+
+sub acceptor_splice_site {
+  my ($self,$ss_start,$ss_end) = @_;
+  $ss_start = 21 unless defined $ss_start;
+  $ss_end   = 3 unless defined $ss_end;
+  if($self->strand < 0){
+    my $tmp= $ss_start;
+    $ss_start = $ss_end;
+    $ss_end = $tmp;
+  }
+  my $intron_end= $self->location->end;
+  my $down_exon = $self->downstream_Exon;
+  my $acceptor;  
+  if($self->strand < 0){
+      $ss_start= $ss_start >  $down_exon->length ? $down_exon->length: $ss_start;
+      $ss_end= $ss_end > $self->length-2 ? $self->length-2 : $ss_end;
+      $acceptor = Bio::SeqFeature::Generic->new(-start=>$self->start - ($ss_start) ,  
+                                               -end=>$self->start + ($ss_end+1),
+                                               -strand=>$self->strand,    
+                                               -primary_tag=>"donor splice site");
+  } 
+  else {
+    $ss_start = $ss_start > $self->length-2 ? $self->length-2 : $ss_start;
+    $ss_end = $ss_end > $down_exon->length ? $down_exon->length : $ss_end;
+ 
+
+    $acceptor = Bio::SeqFeature::Generic->new(-start=>$self->end - ($ss_start + 1),  
+                                               -end=>$self->end + $ss_end,
+                                               -strand=>$self->strand,    
+                                               -primary_tag=>"donor splice site");
+  }
+  $acceptor->attach_seq($self->entire_seq);
+
+  return $acceptor;
+
+}
+
+=head2 donor_splice_site 
+
+  Title   : donor_splice_site 
+  Usage   : $intron->donor_splice_site(3,6)
+  Function: returns the sequence corresponding to the 
+            consensus donor splice site. If start and
+            end are provided, it will number of base pairs
+            left and right of the canonical GT. Here 3 means
+            3 bp into exon and 6 means 6 bp into the intron.
+            --Exon-3--|GT|-6----Intron-
+            Defaults to 3,6
+
+  Returns : Bio::Seq
+  Args    : start and end
+
+=cut
+
+sub donor_splice_site {
+  my ($self,$ss_start,$ss_end) = @_;
+  $ss_start = 3 unless defined $ss_start;
+  $ss_end   = 10 unless defined $ss_end;
+  if($self->strand < 0){
+    my $tmp= $ss_start;
+    $ss_start = $ss_end;
+    $ss_end = $tmp;
+  }
+  my $up_exon = $self->upstream_Exon;
+  my $donor;
+  if($self->strand < 0){
+    $ss_end = $ss_end > $up_exon->length ? $up_exon->length : $ss_end;
+    $ss_start   = $ss_start> $self->length -2 ? $self->length -2 : $ss_start;
+    $donor = Bio::SeqFeature::Generic->new(-start=>$self->end -  ($ss_start+1),
+                                            -end  => $self->end + ($ss_end),
+                                            -strand=>$self->strand,
+                                            -primary_tag=>"acceptor splice site");
+  } 
+  else {
+    $ss_start = $ss_start > $up_exon->length ? $up_exon->length : $ss_start;
+    $ss_end   = $ss_end > $self->length -2 ? $self->length -2 : $ss_end;
+    $donor = Bio::SeqFeature::Generic->new(-start=>$self->start - $ss_start,
+                                           -end  => $self->start +($ss_end+1),
+                                            -strand=>$self->strand,
+                                            -primary_tag=>"acceptor splice site");
+  }
+  $donor->attach_seq($self->entire_seq);
+  return $donor;
+} 
+
+sub location {
+    my( $self ) = @_;
+    
+    unless ($self->{'_intron_location'}) {
+        my $loc = Bio::Location::Simple->new;
+    
+        my $up_exon = $self->upstream_Exon;
+        my $down_exon = $self->downstream_Exon;
+        
+        # Get the PrimarySeqs attached to both and check it is the same sequence
+        my $up_seq   = $up_exon  ->entire_seq;
+        my $down_seq = $down_exon->entire_seq;
+        unless (ref($up_seq) eq ref($down_seq) ) {
+            $self->throw("upstream and downstream exons are attached to different sequences\n'$up_seq' and '$down_seq'");
+        }
+        
+        # Check that the exons are on the same strand.  (Do I need to bother?)
+        my $up_strand   = $up_exon  ->strand;
+        my $down_strand = $down_exon->strand;
+        unless ($up_strand == $down_strand) {
+            $self->throw("upstream and downstream exons are on different strands "
+                . "('$up_strand' and '$down_strand')");
+        }
+        $loc->strand($up_strand);
+        
+        #   $exon_end is the  end  of the exon which is 5' of the intron on the genomic sequence.
+        # $exon_start is the start of the exon which is 3' of the intron on the genomic sequence.
+        my( $exon_end, $exon_start );
+        if ($up_strand == 1) {
+            $exon_end   = $up_exon  ->end;
+            $exon_start = $down_exon->start;
+        } else {
+            $exon_end   = $down_exon->end;
+            $exon_start = $up_exon  ->start;
+        }
+        unless ($exon_end < $exon_start) {
+            $self->throw("Intron gap begins after '$exon_end' and ends before '$exon_start'");
+        }
+        $loc->start($exon_end   + 1);
+        $loc->end  ($exon_start - 1);
+        
+        # Attach the sequence and location objects to the intron
+        $self->{'_intron_location'} = $loc;
+        
+    }
+    return $self->{'_intron_location'};
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/NC_Feature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/NC_Feature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/NC_Feature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,118 @@
+# $Id: NC_Feature.pm,v 1.11.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::NC_Feature.pm
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::NC_Feature.pm - superclass for non-coding features
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gnf.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Gene::NC_Feature;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::SeqFeature::Generic);
+
+sub new {
+    my($class, at args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+
+    my ($is_coding) =
+	$self->_rearrange([qw(IS_CODING)], at args);
+    # default is non-coding
+    $self->is_coding(defined($is_coding) ? $is_coding : 0);
+
+    return $self;
+}
+
+
+
+=head2 is_coding
+
+ Title   : is_coding
+ Usage   : if ($feature->is_coding()) {
+                     #do something
+            }
+ Function: Whether or not the feature codes for amino acid.
+ Returns : FALSE
+ Args    : none
+
+=cut
+
+sub is_coding{
+    my $self = shift;
+
+    return $self->{'is_coding'} = shift if @_;
+    return $self->{'is_coding'};
+}
+
+=head2 cds
+
+ Title   : cds
+ Usage   : $cds=$feature->cds();
+ Function: get the coding sequence of this feature
+ Returns : undef
+ Args    : none
+
+
+=cut
+
+sub cds {
+   my ($self, at args) = @_;
+   return;
+
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Poly_A_site.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Poly_A_site.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Poly_A_site.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,74 @@
+# $Id: Poly_A_site.pm,v 1.7.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::Poly_A_site
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::Poly_A_site - poly A feature
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Inherits from L<Bio::SeqFeature::Gene::NC_Feature>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gene.pbi.nrc.ca
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::Poly_A_site;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::SeqFeature::Gene::NC_Feature);
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Promoter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Promoter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Promoter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+# $Id: Promoter.pm,v 1.8.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::Promoter
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::Promoter - Describes a promoter      
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gene.pbi.nrc.ca
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::Promoter;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::SeqFeature::Gene::NC_Feature);
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Transcript.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Transcript.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/Transcript.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,788 @@
+# $Id: Transcript.pm,v 1.38.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::Transcript
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::Transcript - A feature representing a transcript
+
+=head1 SYNOPSIS
+
+  # See documentation of methods.
+
+=head1 DESCRIPTION
+
+A feature representing a transcript.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.  Bug reports can be submitted via the
+ web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Gene::Transcript;
+use strict;
+
+
+use Bio::PrimarySeq;
+
+use base qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI);
+
+sub new {
+    my ($caller, @args) = @_;
+    my $self = $caller->SUPER::new(@args);
+    $self->_register_for_cleanup(\&transcript_destroy);
+    my ($primary) = $self->_rearrange([qw(PRIMARY)], at args);
+
+    $primary = 'transcript' unless $primary;
+    $self->primary_tag($primary);
+    $self->strand(0) if(! defined($self->strand()));
+    return $self;
+}
+
+
+=head2 promoters
+
+ Title   : promoters()
+ Usage   : @proms = $transcript->promoters();
+ Function: Get the promoter features/sites of this transcript. 
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           promoter regions or sites.
+ Args    : 
+
+
+=cut
+
+sub promoters {
+    my ($self) = @_;
+    return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter');
+}
+
+=head2 add_promoter
+
+ Title   : add_promoter()
+ Usage   : $transcript->add_promoter($feature);
+ Function: Add a promoter feature/site to this transcript.
+
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : 
+ Args    : A Bio::SeqFeatureI implementing object.
+
+
+=cut
+
+sub add_promoter {
+    my ($self, $fea) = @_;
+    $self->_add($fea,'Bio::SeqFeature::Gene::Promoter');
+}
+
+=head2 flush_promoters
+
+ Title   : flush_promoters()
+ Usage   : $transcript->flush_promoters();
+ Function: Remove all promoter features/sites from this transcript.
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : the removed features as a list
+ Args    : none
+
+
+=cut
+
+sub flush_promoters {
+    my ($self) = @_;
+    return $self->_flush('Bio::SeqFeature::Gene::Promoter');
+}
+
+=head2 exons
+
+ Title   : exons()
+ Usage   : @exons = $gene->exons();
+           ($inital_exon) = $gene->exons('Initial');
+ Function: Get all exon features or all exons of specified type of this 
+           transcript.
+
+           Exon type is treated as a case-insensitive regular expression and 
+           is optional. For consistency, use only the following types: 
+           initial, internal, terminal.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
+ Args    : An optional string specifying the primary_tag of the feature.
+
+
+=cut
+
+sub exons {
+    my ($self, $type) = @_;
+    return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI', 
+					     $type);
+}
+
+=head2 exons_ordered
+
+ Title   : exons_ordered
+ Usage   : @exons = $gene->exons_ordered();
+           @exons = $gene->exons_ordered("Internal");
+ Function: Get an ordered list of all exon features or all exons of specified
+           type of this transcript.
+
+           Exon type is treated as a case-insensitive regular expression and 
+           is optional. For consistency, use only the following types:
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
+ Args    : An optional string specifying the primary_tag of the feature.
+
+=cut
+
+sub exons_ordered { 
+    my ($self,$type) = @_;
+    return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type);
+}
+
+=head2 add_exon
+
+ Title   : add_exon()
+ Usage   : $transcript->add_exon($exon,'initial');
+ Function: Add a exon feature to this transcript.
+
+           The second argument denotes the type of exon. Mixing exons with and
+           without a type is likely to cause trouble in exons(). Either
+           leave out the type for all exons or for none.
+
+           Presently, the following types are known: initial, internal, 
+           terminal, utr, utr5prime, and utr3prime (all case-insensitive).
+           UTR should better be added through utrs()/add_utr().
+
+           If you wish to use other or additional types, you will almost
+           certainly have to call exon_type_sortorder() in order to replace
+           the default sort order, or mrna(), cds(), protein(), and exons()
+           may yield unexpected results.
+
+ Returns : 
+ Args    : A Bio::SeqFeature::Gene::ExonI implementing object.
+           A string indicating the type of the exon (optional).
+
+
+=cut
+
+sub add_exon {
+    my ($self, $fea, $type) = @_;
+    if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) {
+	$self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI");
+    }
+    $self->_add($fea,'Bio::SeqFeature::Gene::Exon', $type);
+}
+
+=head2 flush_exons
+
+ Title   : flush_exons()
+ Usage   : $transcript->flush_exons();
+           $transcript->flush_exons('terminal');
+ Function: Remove all or a certain type of exon features from this transcript.
+
+           See add_exon() for documentation about types.
+
+           Calling without a type will not flush UTRs. Call flush_utrs() for
+           this purpose.
+ Returns : the deleted features as a list
+ Args    : A string indicating the type of the exon (optional).
+
+
+=cut
+
+sub flush_exons {
+    my ($self, $type) = @_;
+    return $self->_flush('Bio::SeqFeature::Gene::Exon',$type);
+}
+
+=head2 introns
+
+ Title   : introns()
+ Usage   : @introns = $gene->introns();
+ Function: Get all intron features this gene structure.
+
+           Note that this implementation generates these features
+           on-the-fly, that is, it simply treats all regions between
+           exons as introns, assuming that exons do not overlap. A
+           consequence is that a consistent correspondence between the
+           elements in the returned array and the array that exons()
+           returns will exist only if the exons are properly sorted
+           within their types (forward for plus- strand and reverse
+           for minus-strand transcripts). To ensure correctness the
+           elements in the array returned will always be sorted.
+
+ Returns : An array of Bio::SeqFeature::Gene::Intron objects representing
+           the intron regions.
+ Args    : 
+
+
+=cut
+
+sub introns {
+    my ($self) = @_;
+    my @introns = ();
+    my @exons = $self->exons();
+    my ($strand, $rev_order);
+
+    # if there's 1 or less exons we're done
+    return () unless($#exons > 0);
+    # record strand and order (a minus-strand transcript is likely to have
+    # the exons stacked in reverse order)
+    foreach my $exon (@exons) {
+	$strand = $exon->strand();
+	last if $strand; # we're done if we've got 1 or -1
+    }
+    $rev_order = ($exons[0]->end() < $exons[1]->start() ? 0 : 1);
+
+    # Make sure exons are sorted. Because we assume they don't overlap, we
+    # simply sort by start position.
+    if((! defined($strand)) || ($strand != -1) || (! $rev_order)) {
+	# always sort forward for plus-strand transcripts, and for negative-
+	# strand transcripts that appear to be unsorted or forward sorted
+        @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] } 
+	         map { [ $_, $_->start * ($_->strand || 1)] } @exons;
+    } else {
+	# sort in reverse order for transcripts on the negative strand and
+	# found to be in reverse order
+        @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons;
+    }
+    # loop over all intervening gaps
+    while ((my $exonA = shift (@exons)) &&(my $exonB = shift(@exons))){
+       my $intron = Bio::SeqFeature::Gene::Intron->new(-primary=>'intron');
+       $intron->upstream_Exon($exonA);
+       $intron->downstream_Exon($exonB);
+       $intron->attach_seq($self->entire_seq) if $self->entire_seq;
+       unshift(@exons,$exonB);
+       push @introns,$intron;
+    }
+    return @introns;
+}
+
+=head2 poly_A_site
+
+ Title   : poly_A_site()
+ Usage   : $polyAsite = $transcript->poly_A_site();
+ Function: Get/set the poly-adenylation feature/site of this transcript.
+ Returns : A Bio::SeqFeatureI implementing object representing the
+           poly-adenylation region.
+ Args    : A Bio::SeqFeatureI implementing object on set, or FALSE to flush
+           a previously set object.
+
+
+=cut
+
+sub poly_A_site {
+    my ($self, $fea) = @_;
+    if ($fea) {
+	$self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site');
+    }
+    return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0];
+}
+
+=head2 utrs
+
+ Title   : utrs()
+ Usage   : @utr_sites = $transcript->utrs('utr3prime');
+           @utr_sites = $transcript->utrs('utr5prime');
+           @utr_sites = $transcript->utrs();
+ Function: Get the features representing untranslated regions (UTR) of this
+           transcript.
+
+           You may provide an argument specifying the type of UTR. Currently
+           the following types are recognized: utr5prime utr3prime for UTR on the
+           5' and 3' end of the CDS, respectively.
+
+ Returns : An array of Bio::SeqFeature::Gene::UTR objects
+           representing the UTR regions or sites.
+ Args    : Optionally, either utr3prime, or utr5prime for the the type of UTR
+           feature.
+
+
+=cut
+
+sub utrs {
+    my ($self, $type) = @_;
+    return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type);
+
+}
+
+=head2 add_utr
+
+ Title   : add_utr()
+ Usage   : $transcript->add_utr($utrobj, 'utr3prime');
+           $transcript->add_utr($utrobj);
+ Function: Add a UTR feature/site to this transcript.
+
+           The second parameter is optional and denotes the type of the UTR
+           feature. Presently recognized types include 'utr5prime' and 'utr3prime'
+           for UTR on the 5' and 3' end of a gene, respectively.
+
+           Calling this method is the same as calling 
+           add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a
+           special exon object, which is transcribed, not spliced out, but
+           not translated.
+
+           Note that the object supplied should return FALSE for is_coding().
+           Otherwise cds() and friends will become confused.
+
+ Returns : 
+ Args    : A Bio::SeqFeature::Gene::UTR implementing object.
+
+
+=cut
+
+sub add_utr {
+    my ($self, $fea, $type) = @_;
+    $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type);
+}
+
+=head2 flush_utrs
+
+ Title   : flush_utrs()
+ Usage   : $transcript->flush_utrs();
+           $transcript->flush_utrs('utr3prime');
+ Function: Remove all or a specific type of UTR features/sites from this
+           transcript.
+
+           Cf. add_utr() for documentation about recognized types.
+ Returns : a list of the removed features
+ Args    : Optionally a string denoting the type of UTR feature.
+
+
+=cut
+
+sub flush_utrs {
+    my ($self, $type) = @_;
+    return $self->_flush('Bio::SeqFeature::Gene::UTR',$type);
+}
+
+=head2 sub_SeqFeature
+
+ Title   : sub_SeqFeature
+ Usage   : @feats = $transcript->sub_SeqFeature();
+ Function: Returns an array of all subfeatures.
+
+           This method is defined in Bio::SeqFeatureI. We override this here
+           to include the exon etc features.
+
+ Returns : An array Bio::SeqFeatureI implementing objects.
+ Args    : none
+
+
+=cut
+
+sub sub_SeqFeature {
+   my ($self) = @_;   
+   my @feas;
+   
+   # get what the parent already has
+   @feas = $self->SUPER::sub_SeqFeature();
+   # add the features we have in addition
+   push(@feas, $self->exons()); # this includes UTR features
+   push(@feas, $self->promoters());
+   push(@feas, $self->poly_A_site()) if($self->poly_A_site());
+   return @feas;
+}
+
+=head2 flush_sub_SeqFeature
+
+ Title   : flush_sub_SeqFeature
+ Usage   : $transcript->flush_sub_SeqFeature();
+           $transcript->flush_sub_SeqFeature(1);
+ Function: Removes all subfeatures.
+
+           This method is overridden from Bio::SeqFeature::Generic to flush
+           all additional subfeatures like exons, promoters, etc., which is
+           almost certainly not what you want. To remove only features added
+           through $transcript->add_sub_SeqFeature($feature) pass any
+           argument evaluating to TRUE.
+
+ Example :
+ Returns : none
+ Args    : Optionally, an argument evaluating to TRUE will suppress flushing
+           of all transcript-specific subfeatures (exons etc.).
+
+
+=cut
+
+sub flush_sub_SeqFeature {
+   my ($self,$fea_only) = @_;
+
+   $self->SUPER::flush_sub_SeqFeature();
+   if(! $fea_only) {
+       $self->flush_promoters();
+       $self->flush_exons();
+       $self->flush_utrs();
+       $self->poly_A_site(0);
+   }
+}
+
+
+=head2 cds
+
+ Title   : cds
+ Usage   : $seq = $transcript->cds();
+ Function: Returns the CDS (coding sequence) as defined by the exons
+           of this transcript and the attached sequence.
+
+           If no sequence is attached this method will return false.
+
+           Note that the implementation provided here returns a
+           concatenation of all coding exons, thereby assuming that
+           exons do not overlap.
+
+           Note also that you cannot set the CDS via this method. Set
+           a single CDS feature as a single exon, or derive your own
+           class if you want to store a predicted CDS.
+
+ Example :
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+=cut
+
+sub cds {
+    my ($self) = @_;
+    my @exons = $self->exons_ordered();  #this is always sorted properly according to strand
+    my $strand;
+
+    return  unless(@exons);
+    # record strand (a minus-strand transcript must have the exons sorted in
+    # reverse order)
+    foreach my $exon (@exons) {
+	if(defined($exon->strand()) && (! $strand)) {
+	    $strand = $exon->strand();
+	}
+	if($exon->strand() && (($exon->strand() * $strand) < 0)) {
+	    $self->throw("Transcript mixes coding exons on plus and minus ".
+			 "strand. This makes no sense.");
+	}
+    }
+    my $cds = $self->_make_cds(@exons);
+    return unless $cds;
+    return Bio::PrimarySeq->new('-id' => $self->seq_id(),
+				'-seq' => $cds,
+				'-alphabet' => "dna");
+}
+
+=head2 protein
+
+ Title   : protein()
+ Usage   : $protein = $transcript->protein();
+ Function: Get the protein encoded by the transcript as a sequence object.
+
+           The implementation provided here simply calls translate() on the
+           object returned by cds().
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub protein {
+    my ($self) = @_;
+    my $seq;
+
+    $seq = $self->cds();
+    return $seq->translate() if $seq;
+    return;
+}
+
+=head2 mrna
+
+ Title   : mrna()
+ Usage   : $mrna = $transcript->mrna();
+ Function: Get the mRNA of the transcript as a sequence object.
+
+           The difference to cds() is that the sequence object returned by
+           this methods will also include UTR and the poly-adenylation site,
+           but not promoter sequence (TBD).
+
+           HL: do we really need this method?
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub mrna {
+    my ($self) = @_;
+    my ($seq, $mrna, $elem);
+
+    # get the coding part
+    $seq = $self->cds();
+    if(! $seq) {
+	$seq = Bio::PrimarySeq->new('-id' => $self->seq_id(),
+				    '-alphabet' => "rna",
+				    '-seq' => "");
+    }
+    # get and add UTR sequences
+    $mrna = "";
+    foreach $elem ($self->utrs('utr5prime')) {
+	$mrna .= $elem->seq()->seq();
+    }
+    $seq->seq($mrna . $seq->seq());
+    $mrna = "";
+    foreach $elem ($self->utrs('utr3prime')) {
+	$mrna .= $elem->seq()->seq();
+    }
+    $seq->seq($seq->seq() . $mrna);
+    if($self->poly_A_site()) {
+	$seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq());
+    }
+    return if($seq->length() == 0);
+    return $seq;
+}
+
+sub _get_typed_keys {
+    my ($self, $keyprefix, $type) = @_;
+    my @keys = ();
+    my @feas = ();
+
+    # make case-insensitive
+    $type = ($type ? lc($type) : "");
+    # pull out all feature types that exist and match
+    @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self}));
+    return @keys;
+}
+
+sub _make_cds {
+    my ($self, at exons) = @_;
+    my $cds = "";
+
+    foreach my $exon (@exons) {
+	next if((! defined($exon->seq())) || (! $exon->is_coding()));
+	my $phase = length($cds) % 3;
+	# let's check the simple case 
+	if((! defined($exon->frame())) || ($phase == $exon->frame())) {
+	    # this one fits exactly, or frame of the exon is undefined (should
+	    # we warn about that?); we bypass the $exon->cds() here (hmm,
+	    # not very clean style, but I don't see where this screws up)
+	    $cds .= $exon->seq()->seq();
+	} else {
+	    # this one is probably from exon shuffling and needs some work
+	    my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0
+	    next if(! $seq);
+	    $seq = $seq->seq();
+	    # adjustment needed?
+	    if($phase > 0) {
+		# how many Ns can we chop off the piece to be added?
+		my $n_crop = 0;
+		if($seq =~ /^(n+)/i) {
+		    $n_crop = length($1);
+		}
+		if($n_crop >= $phase) {
+		    # chop off to match the phase
+		    $seq = substr($seq, $phase);
+		} else {
+		    # fill in Ns
+		    $seq = ("n" x (3-$phase)) . $seq;
+		}
+	    }
+	    $cds .= $seq;
+	}
+    }
+    return $cds;
+}
+
+=head2 features
+
+ Title   : features
+ Usage   : my @features=$transcript->features;
+ Function: returns all the features associated with this transcript
+ Returns : a list of SeqFeatureI implementing objects
+ Args    : none
+
+
+=cut
+
+
+sub features {
+    my $self = shift;    
+    return grep { defined } @{$self->{'_features'} || []};
+}
+
+=head2 features_ordered
+
+ Title   : features_ordered
+ Usage   : my @features=$transcript->features_ordered;
+ Function: returns all the features associated with this transcript,
+           in order by feature start, according to strand
+ Returns : a list of SeqFeatureI implementing objects
+ Args    : none
+
+
+=cut
+
+sub features_ordered{
+   my ($self) = @_;
+   return $self->_stranded_sort(@{$self->{'_features'} || []});
+}
+
+
+sub get_unordered_feature_type{
+    my ($self, $type, $pri)=@_;
+    my @list;
+    foreach ( $self->features) {
+	if ($_->isa($type)) {
+	    if ($pri && $_->primary_tag !~ /$pri/i) {
+		next;
+	    }
+	    push @list,$_;
+	}
+    }
+    return @list;
+
+}
+
+sub get_feature_type {
+    my ($self)=shift;
+    return $self->_stranded_sort($self->get_unordered_feature_type(@_));
+}
+
+#This was fixed by Gene Cutler - the indexing on the list being reversed
+#fixed a bad bug.  Thanks Gene!
+sub _flush {
+     my ($self, $type, $pri)=@_;
+     my @list=$self->features;
+     my @cut;
+     for (reverse (0..$#list)) {
+         if (defined $list[$_] &&
+	     $list[$_]->isa($type)) {
+             if ($pri && $list[$_]->primary_tag !~ /$pri/i) {
+                 next;
+             }
+             push @cut, splice @list, $_, 1;  #remove the element of $type from @list
+                                              #and return each of them in @cut
+         }
+     }
+     $self->{'_features'}=\@list;
+     return reverse @cut;
+}
+
+sub _add {
+    my ($self, $fea, $type, $pri)=@_;
+    require Bio::SeqFeature::Gene::Promoter;
+    require Bio::SeqFeature::Gene::UTR;
+    require Bio::SeqFeature::Gene::Exon;
+    require Bio::SeqFeature::Gene::Intron;
+    require Bio::SeqFeature::Gene::Poly_A_site;
+
+    if(! $fea->isa('Bio::SeqFeatureI') ) {
+	$self->throw("$fea does not implement Bio::SeqFeatureI");
+    }
+    if(! $fea->isa($type) || $pri) {
+	$fea=$self->_new_of_type($fea,$type,$pri);
+    }
+    if (! $self->strand) {
+	$self->strand($fea->strand);
+    } else {
+	if ($self->strand * $fea->strand == -1) {
+	    $self->throw("$fea is on opposite strand from $self");
+	}
+    }
+
+    $self->_expand_region($fea);
+    if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) &&
+       $fea->can('attach_seq')) {
+	$fea->attach_seq($self->entire_seq());
+    }
+    if (defined $self->parent) {
+	$self->parent->_expand_region($fea);
+    }
+    push(@{$self->{'_features'}}, $fea);
+    1;
+}
+
+sub _stranded_sort {
+    my ($self, at list)=@_;
+    my $strand;
+    foreach my $fea (@list) {
+	if($fea->strand()) {
+	    # defined and != 0
+	    $strand = $fea->strand() if(! $strand);
+	    if(($fea->strand() * $strand) < 0) {
+		$strand = undef;
+		last;
+	    }
+	}
+    }
+    if (defined $strand && $strand == - 1) {  #reverse strand
+	return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list;
+    } else {               #undef or forward strand
+	return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list;
+    }
+}
+
+sub _new_of_type {
+    my ($self, $fea, $type, $pri)= @_;
+    my $primary;
+    if ($pri) {
+	$primary = $pri;    #can set new primary tag if desired
+    } else {
+	($primary) = $type =~ /.*::(.+)/;  #or else primary is just end of type string
+    }
+    bless $fea,$type;
+    $fea->primary_tag($primary);
+    return $fea;
+}
+
+sub transcript_destroy {
+    my $self = shift;    
+    # We're going to be really explicit to insure memory leaks 
+    # don't occur
+    foreach my $f ( $self->features ) {
+	$f = undef;
+    }
+    $self->parent(undef);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/TranscriptI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/TranscriptI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/TranscriptI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,252 @@
+# $Id: TranscriptI.pm,v 1.13.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::TranscriptI
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::TranscriptI - Interface for a feature representing a
+         transcript of exons, promoter(s), UTR, and a poly-adenylation site.
+
+=head1 SYNOPSIS
+
+  #documentation needed
+
+=head1 DESCRIPTION
+
+A feature representing a transcript.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::TranscriptI;
+use strict;
+
+use Carp;
+
+use base qw(Bio::SeqFeatureI);
+
+=head2 promoters
+
+ Title   : promoters()
+ Usage   : @proms = $transcript->promoters();
+ Function: Get the promoter features of this transcript.
+
+           Note that OO-modeling of regulatory elements is not stable yet.
+           This means that this method might change or even disappear in a
+           future release. Be aware of this if you use it.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           promoter regions or sites.
+ Args    : 
+
+=cut
+
+sub promoters {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 exons
+
+ Title   : exons()
+ Usage   : @exons = $transcript->exons();
+           @inital = $transcript->exons('Initial');
+ Function: Get the individual exons this transcript comprises of, or all exons
+           of a specified type.
+
+           Refer to the documentation of the class that produced this 
+           transcript object for information about the possible types.
+
+           See Bio::SeqFeature::Gene::ExonI for properties of the
+           returned objects.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
+ Args    : An optional string specifying the type of the exon.
+
+=cut
+
+sub exons {
+    my ($self, $type) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 introns
+
+ Title   : introns()
+ Usage   : @introns = $transcript->introns();
+ Function: Get all introns this transcript comprises of.
+ Returns : An array of Bio::SeqFeatureI implementing objects representing the
+           introns.
+ Args    : 
+
+
+=cut
+
+sub introns {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 poly_A_site
+
+ Title   : poly_A_site()
+ Usage   : $polyAsite = $transcript->poly_A_site();
+ Function: Get the poly-adenylation site of this transcript.
+ Returns : A Bio::SeqFeatureI implementing object.
+ Args    : 
+
+
+=cut
+
+sub poly_A_site {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 utrs
+
+ Title   : utrs()
+ Usage   : @utr_sites = $transcript->utrs();
+ Function: Get the UTR regions this transcript comprises of.
+
+           See Bio::SeqFeature::Gene::ExonI for properties of the
+           returned objects.
+
+ Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
+ Args    : 
+
+
+=cut
+
+sub utrs {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 mrna
+
+ Title   : mrna()
+ Usage   : $mrna = $transcript->mrna();
+ Function: Get the mRNA of the transcript as a sequence object.
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub mrna {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 cds
+
+ Title   : cds()
+ Usage   : $cds = $transcript->cds();
+ Function: Get the CDS (coding sequence) of the transcript as a sequence
+           object.
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub cds {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 protein
+
+ Title   : protein()
+ Usage   : $protein = $transcript->protein();
+ Function: Get the protein encoded by the transcript as a sequence object.
+
+ Returns : A Bio::PrimarySeqI implementing object.
+ Args    : 
+
+
+=cut
+
+sub protein {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 parent
+
+ Title   : parent
+ Usage   : $obj->parent($newval)
+ Function: get the parent gene of the transcript
+ Returns : value of parent - a Bio::SeqFeature::Gene::GeneStructureI-compliant object
+ Args    : a Bio::SeqFeature::Gene::GeneStructureI-compliant object (optional)
+
+
+=cut
+
+sub parent{
+   my $self = shift;
+   if( @_ ) {
+       my $value = shift;
+       # I really mean ! defined $value - 
+       # we will allow re-setting the parent to undef 
+       if (! defined $value || 
+	   $value->isa("Bio::SeqFeature::Gene::GeneStructureI")) {
+	   $self->{'_parent'} = $value;
+       } else {
+	   $self->throw("$value must be a Bio::SeqFeature::Gene::GeneStructureI")
+       }
+    }
+    return $self->{'_parent'};
+}
+
+
+1;
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/UTR.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/UTR.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Gene/UTR.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,131 @@
+# $Id: UTR.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Gene::UTR
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Gene::UTR - A feature representing an untranslated region
+          that is part of a transcriptional unit
+
+=head1 SYNOPSIS
+
+See documentation of methods
+
+=head1 DESCRIPTION
+
+A UTR is a Bio::SeqFeature::Gene::ExonI compliant object that is
+non-coding, and can be either 5' or 3' in a transcript.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gene.pbi.nrc.ca
+
+=head1 CONTRIBUTORS
+
+This is based on the Gene Structure scaffolding erected by Hilmar Lapp
+(hlapp at gmx.net).
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Gene::UTR;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::SeqFeature::Gene::Exon);
+
+=head2 new
+
+ Title   : new
+ Usage   :
+ Function: We override the constructor here to set is_coding to false
+           unless explicitly overridden.
+
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub new{
+    my ($caller, @args) = @_;
+
+    if(! grep { lc($_) eq '-is_coding'; } @args) {
+	push(@args, '-is_coding', 0);
+    }
+    my $self = $caller->SUPER::new(@args);
+
+    my ($primary, $prim) = 
+	$self->_rearrange([qw(PRIMARY PRIMARY_TAG)], at args);
+
+    $self->primary_tag('utr') unless $primary || $prim;
+
+    return $self;
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+ Function: Returns the primary tag for a feature,
+           eg 'utr5prime'.  This method insures that 5prime/3prime information
+           is uniformly stored
+ Returns : a string 
+ Args    : none
+
+=cut
+
+sub primary_tag{
+    my $self = shift;
+    if(@_ && defined($_[0])) {
+	my $val = shift;
+	if ($val =~ /(3|5)/ ) { 
+	    $val = "utr$1prime";
+	} else { 
+	    $self->warn("Primary tag should indicate if this is 3 or 5'. ".
+			"Preferred text is 'utr3prime' or 'utr5prime'.");
+	}
+	unshift(@_,$val);
+    }
+    return $self->SUPER::primary_tag(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Generic.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Generic.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Generic.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,956 @@
+# $Id: Generic.pm,v 1.103.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Generic
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Generic - Generic SeqFeature
+
+=head1 SYNOPSIS
+
+   $feat = new Bio::SeqFeature::Generic ( 
+            -start        => 10, 
+            -end          => 100,
+            -strand       => -1, 
+            -primary      => 'repeat', # -primary_tag is a synonym
+            -source_tag   => 'repeatmasker',
+            -display_name => 'alu family',
+            -score        => 1000,
+            -tag          => { new => 1,
+                               author => 'someone',
+                               sillytag => 'this is silly!' } );
+
+   $feat = new Bio::SeqFeature::Generic ( -gff_string => $string );
+   # if you want explicitly GFF1
+   $feat = new Bio::SeqFeature::Generic ( -gff1_string => $string );
+
+   # add it to an annotated sequence
+
+   $annseq->add_SeqFeature($feat);
+
+=head1 DESCRIPTION
+
+Bio::SeqFeature::Generic is a generic implementation for the
+Bio::SeqFeatureI interface, providing a simple object to provide all
+the information for a feature on a sequence.
+
+For many Features, this is all you will need to use (for example, this
+is fine for Repeats in DNA sequence or Domains in protein
+sequence). For other features, which have more structure, this is a
+good base class to extend using inheritence to have new things: this
+is what is done in the L<Bio::SeqFeature::Gene>,
+L<Bio::SeqFeature::Transcript> and L<Bio::SeqFeature::Exon>, which provide
+well coordinated classes to represent genes on DNA sequence (for
+example, you can get the protein sequence out from a transcript
+class).
+
+For many Features, you want to add some piece of information, for
+example a common one is that this feature is 'new' whereas other
+features are 'old'.  The tag system, which here is implemented using a
+hash can be used here.  You can use the tag system to extend the
+L<Bio::SeqFeature::Generic> programmatically: that is, you know that you have
+read in more information into the tag 'mytag' which you can then
+retrieve. This means you do not need to know how to write inherited
+Perl to provide more complex information on a feature, and/or, if you
+do know but you do not want to write a new class every time you need
+some extra piece of information, you can use the tag system to easily
+store and then retrieve information.
+
+The tag system can be written in/out of GFF format, and also into EMBL
+format via the L<Bio::SeqIO> system
+
+=head1 Implemented Interfaces
+
+This class implementes the following interfaces.
+
+=over 4
+
+=item L<Bio::SeqFeatureI>
+
+Note that this includes implementing Bio::RangeI.
+
+=item L<Bio::AnnotatableI>
+
+=item L<Bio::FeatureHolderI>
+
+Features held by a feature are essentially sub-features.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via 
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Ewan Birney E<lt>birney at sanger.ac.ukE<gt>
+
+=head1 DEVELOPERS
+
+This class has been written with an eye out for inheritance. The fields
+the actual object hash are:
+
+   _gsf_tag_hash  = reference to a hash for the tags
+   _gsf_sub_array = reference to an array for subfeatures
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Generic;
+use strict;
+
+use Bio::AnnotatableI;
+use Bio::Annotation::Collection;
+use Bio::Location::Simple;
+use Bio::Location::Split;
+use Bio::Tools::GFF;
+#use Tie::IxHash;
+
+use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI);
+
+sub new {
+    my ( $caller, @args) = @_;
+    my ($self) = $caller->SUPER::new(@args);
+    $self->_register_for_cleanup(\&cleanup_generic);
+    $self->{'_parse_h'}       = {};
+    $self->{'_gsf_tag_hash'}  = {};
+
+    # bulk-set attributes
+    $self->set_attributes(@args);
+
+    # done - we hope
+    return $self;
+}
+
+
+=head2 set_attributes
+
+ Title   : set_attributes
+ Usage   :
+ Function: Sets a whole array of parameters at once.
+ Example :
+ Returns : none
+ Args    : Named parameters, in the form as they would otherwise be passed
+           to new(). Currently recognized are:
+
+                    -start          start position
+                    -end            end position
+                    -strand         strand
+                    -primary_tag    primary tag 
+                    -primary        (synonym for -primary_tag)
+                    -source         source tag
+                    -frame          frame
+                    -score          score value
+                    -tag            a reference to a tag/value hash
+                    -gff_string     GFF v.2 string to initialize from
+                    -gff1_string    GFF v.1 string to initialize from
+                    -seq_id         the display name of the sequence
+                    -annotation     the AnnotationCollectionI object
+                    -location       the LocationI object
+
+=cut
+
+sub set_attributes {
+    my ($self, at args) = @_;
+    my ($start, $end, $strand, $primary_tag, $source_tag, $primary, 
+		  $source, $frame, $score, $tag, $gff_string, $gff1_string,
+        $seqname, $seqid, $annot, $location,$display_name) =
+            $self->_rearrange([qw(START
+                                  END
+                                  STRAND
+                                  PRIMARY_TAG
+                                  SOURCE_TAG
+                                  PRIMARY
+                                  SOURCE
+                                  FRAME
+                                  SCORE
+                                  TAG
+                                  GFF_STRING
+                                  GFF1_STRING
+                                  SEQNAME
+                                  SEQ_ID
+                                  ANNOTATION
+                                  LOCATION
+                                  DISPLAY_NAME
+                                  )], @args);
+    $location    && $self->location($location);
+    $gff_string  && $self->_from_gff_string($gff_string);
+    $gff1_string  && do {
+        $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
+        $self->_from_gff_stream($gff1_string);
+    };
+    $primary_tag            && $self->primary_tag($primary_tag);
+    $source_tag             && $self->source_tag($source_tag);
+    $primary                && $self->primary_tag($primary);
+    $source                 && $self->source_tag($source);
+    defined $start          && $self->start($start);
+    defined $end            && $self->end($end);
+    defined $strand         && $self->strand($strand);
+    defined $frame          && $self->frame($frame);
+    defined $display_name   && $self->display_name($display_name);
+    defined $score          && $self->score($score);
+    $annot                  && $self->annotation($annot);
+    if($seqname) {
+        $self->warn("-seqname is deprecated. Please use -seq_id instead.");
+        $seqid = $seqname unless $seqid;
+    }
+    $seqid          && $self->seq_id($seqid);
+    $tag            && do {
+        foreach my $t ( keys %$tag ) {
+            $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? @{$tag->{$t}} : $tag->{$t});
+        }
+    };
+}
+
+
+=head2 direct_new
+
+ Title   : direct_new
+ Usage   : my $obj = Bio::SeqFeature::Generic->direct_new
+ Function: create a blessed hash - for performance improvement in 
+           object creation
+ Returns : Bio::SeqFeature::Generic object
+ Args    : none
+
+
+=cut
+
+sub direct_new {
+    my ( $class) = @_;
+    my ($self) = {};
+
+    bless $self,$class;
+
+    return $self;
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location 
+           of feature on sequence or parent feature  
+ Returns : Bio::LocationI object
+ Args    : [optional] Bio::LocationI object to set the value to.
+
+
+=cut
+
+sub location {
+    my($self, $value ) = @_;  
+
+    if (defined($value)) {
+        unless (ref($value) and $value->isa('Bio::LocationI')) {
+            $self->throw("object $value pretends to be a location but ".
+                         "does not implement Bio::LocationI");
+        }
+        $self->{'_location'} = $value;
+    }
+    elsif (! $self->{'_location'}) {
+        # guarantees a real location object is returned every time
+        $self->{'_location'} = Bio::Location::Simple->new();
+    }
+    return $self->{'_location'};
+}
+
+
+=head2 start
+
+ Title   : start
+ Usage   : $start = $feat->start
+           $feat->start(20)
+ Function: Get/set on the start coordinate of the feature
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub start {
+   my ($self,$value) = @_;
+   return $self->location->start($value);
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $end = $feat->end
+           $feat->end($end)
+ Function: get/set on the end coordinate of the feature
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub end {
+   my ($self,$value) = @_;
+   return $self->location->end($value);
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : my $len = $feature->length
+ Function: Get the feature length computed as 
+           $feat->end - $feat->start + 1
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub length {
+   my $self = shift;
+   return $self->end - $self->start() + 1;
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $feat->strand()
+           $feat->strand($strand)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : none
+
+
+=cut
+
+sub strand {
+   my $self = shift;
+   return $self->location->strand(@_);
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $feat->score()
+           $feat->score($score)
+ Function: get/set on score information
+ Returns : float
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub score {
+  my $self = shift;
+
+  if (@_) {
+      my $value = shift;
+      if ( defined $value && $value &&
+	   $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
+	  $self->throw(-class=>'Bio::Root::BadParameter',
+		       -text=>"'$value' is not a valid score",
+		       -value=>$value);
+      }
+      return $self->{'_gsf_score'} = $value;
+  }
+  return $self->{'_gsf_score'};
+}
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $feat->frame()
+           $feat->frame($frame)
+ Function: get/set on frame information
+ Returns : 0,1,2, '.'
+ Args    : none if get, the new value if set
+
+
+=cut
+
+sub frame {
+  my $self = shift;
+
+  if ( @_ ) {
+      my $value = shift;
+      if ( defined $value && 
+	   $value !~ /^[0-2.]$/ ) {
+	  $self->throw("'$value' is not a valid frame");
+      }
+      if( defined $value && $value eq '.' ) { $value = '.' } 
+      return $self->{'_gsf_frame'} = $value;
+  }
+  return $self->{'_gsf_frame'};
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+           $feat->primary_tag('exon')
+ Function: get/set on the primary tag for a feature,
+           eg 'exon'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub primary_tag {
+    my $self = shift;
+    return $self->{'_primary_tag'} = shift if @_;
+    return $self->{'_primary_tag'};
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+           $feat->source_tag('genscan');
+ Function: Returns the source tag for a feature,
+           eg, 'genscan'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub source_tag {
+    my $self = shift;
+    return $self->{'_source_tag'} = shift if @_;
+    return $self->{'_source_tag'};
+}
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+ Example :
+ Returns : TRUE on success
+ Args    : a Bio::PrimarySeqI compliant object
+
+
+=cut
+
+sub attach_seq {
+   my ($self, $seq) = @_;
+
+   if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
+       $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
+   }
+
+   $self->{'_gsf_seq'} = $seq;
+
+   # attach to sub features if they want it
+   foreach ( $self->sub_SeqFeature() ) {
+       $_->attach_seq($seq);
+   }
+   return 1;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $tseq = $sf->seq()
+ Function: returns the truncated sequence (if there) for this
+ Example :
+ Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
+           bounded by start & end, or undef if there is no sequence attached
+ Args    : none
+
+
+=cut
+
+sub seq {
+   my ($self, $arg) = @_;
+
+   if ( defined $arg ) {
+       $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
+   }
+
+   if ( ! exists $self->{'_gsf_seq'} ) {
+       return;
+   }
+
+   # assumming our seq object is sensible, it should not have to yank
+   # the entire sequence out here.
+
+   my $seq = $self->{'_gsf_seq'}->trunc($self->start(), $self->end());
+
+
+   if ( defined $self->strand &&
+	$self->strand == -1 ) {
+
+       # ok. this does not work well (?)
+       #print STDERR "Before revcom", $seq->str, "\n";
+       $seq = $seq->revcom;
+       #print STDERR "After  revcom", $seq->str, "\n";
+   }
+
+   return $seq;
+}
+
+=head2 entire_seq
+
+ Title   : entire_seq
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Example :
+ Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
+           sequence attached
+ Args    :
+
+
+=cut
+
+sub entire_seq {
+   return shift->{'_gsf_seq'};
+}
+
+
+=head2 seq_id
+
+ Title   : seq_id
+ Usage   : $obj->seq_id($newval)
+ Function: There are many cases when you make a feature that you
+           do know the sequence name, but do not know its actual
+           sequence. This is an attribute such that you can store
+           the ID (e.g., display_id) of the sequence.
+
+           This attribute should *not* be used in GFF dumping, as
+           that should come from the collection in which the seq
+           feature was found.
+ Returns : value of seq_id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seq_id {
+    my $obj = shift;
+    return $obj->{'_gsf_seq_id'} = shift if @_;
+    return $obj->{'_gsf_seq_id'};
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $featname = $obj->display_name
+ Function: Implements the display_name() method, which is a human-readable
+           name for the feature. 
+ Returns : value of display_name (a string)
+ Args    : Optionally, on set the new value or undef 
+
+=cut
+
+sub display_name{
+    my $self = shift;
+    return $self->{'display_name'} = shift if @_;
+    return $self->{'display_name'};
+}
+
+=head1 Methods for implementing Bio::AnnotatableI
+
+=cut
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($annot_obj)
+ Function: Get/set the annotation collection object for annotating this
+           feature.
+
+ Example : 
+ Returns : A Bio::AnnotationCollectionI object
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+
+    # we are smart if someone references the object and there hasn't been
+    # one set yet
+    if(defined $value || ! defined $obj->{'annotation'} ) {
+        $value = new Bio::Annotation::Collection unless ( defined $value );
+        $obj->{'annotation'} = $value;
+    }
+    return $obj->{'annotation'};
+}
+
+=head1 Methods to implement Bio::FeatureHolderI
+
+This includes methods for retrieving, adding, and removing
+features. Since this is already a feature, features held by this
+feature holder are essentially sub-features.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   : @feats = $feat->get_SeqFeatures();
+ Function: Returns an array of sub Sequence Features
+ Returns : An array
+ Args    : none
+
+
+=cut
+
+sub get_SeqFeatures {
+    return @{ shift->{'_gsf_sub_array'} || []};    
+}
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $feat->add_SeqFeature($subfeat);
+           $feat->add_SeqFeature($subfeat,'EXPAND')
+ Function: adds a SeqFeature into the subSeqFeature array.
+           with no 'EXPAND' qualifer, subfeat will be tested
+           as to whether it lies inside the parent, and throw
+           an exception if not.
+
+           If EXPAND is used, the parent's start/end/strand will
+           be adjusted so that it grows to accommodate the new
+           subFeature
+ Returns : nothing
+ Args    : An object which has the SeqFeatureI interface
+
+
+=cut
+
+#'
+sub add_SeqFeature{
+    my ($self,$feat,$expand) = @_;
+    unless( defined $feat ) {
+	$self->warn("Called add_SeqFeature with no feature, ignoring");
+	return;
+    }
+    if ( !$feat->isa('Bio::SeqFeatureI') ) {
+        $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
+    }
+
+    if($expand && ($expand eq 'EXPAND')) {
+        $self->_expand_region($feat);
+    } else {
+        if ( !$self->contains($feat) ) {
+            $self->throw("$feat is not contained within parent feature, and expansion is not valid");
+        }
+    }
+
+    $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
+    push(@{$self->{'_gsf_sub_array'}},$feat);
+
+}
+
+=head2 remove_SeqFeatures
+
+ Title   : remove_SeqFeatures
+ Usage   : $sf->remove_SeqFeatures
+ Function: Removes all sub SeqFeatures
+
+           If you want to remove only a subset, remove that subset from the
+           returned array, and add back the rest.
+
+ Example :
+ Returns : The array of Bio::SeqFeatureI implementing sub-features that was
+           deleted from this feature.
+ Args    : none
+
+
+=cut
+
+sub remove_SeqFeatures {
+   my ($self) = @_;
+
+   my @subfeats = @{$self->{'_gsf_sub_array'} || []};
+   $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
+   return @subfeats;
+}
+
+=head1 GFF-related methods
+
+=cut
+
+=head2 gff_format
+
+ Title   : gff_format
+ Usage   : # get:
+           $gffio = $feature->gff_format();
+           # set (change the default version of GFF2):
+           $feature->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
+ Function: Get/set the GFF format interpreter. This object is supposed to 
+           format and parse GFF. See Bio::Tools::GFF for the interface.
+
+           If this method is called as class method, the default for all
+           newly created instances will be changed. Otherwise only this
+           instance will be affected.
+ Example : 
+ Returns : a Bio::Tools::GFF compliant object
+ Args    : On set, an instance of Bio::Tools::GFF or a derived object.
+
+
+=cut
+
+sub gff_format {
+    my ($self, $gffio) = @_;
+
+    if(defined($gffio)) {
+        if(ref($self)) {
+            $self->{'_gffio'} = $gffio;
+        } else {
+            $Bio::SeqFeatureI::static_gff_formatter = $gffio;
+        }
+    }
+    return (ref($self) && exists($self->{'_gffio'}) ?
+            $self->{'_gffio'} : $self->_static_gff_formatter);
+}
+
+=head2 gff_string
+
+ Title   : gff_string
+ Usage   : $str = $feat->gff_string;
+           $str = $feat->gff_string($gff_formatter);
+ Function: Provides the feature information in GFF format.
+
+           We override this here from Bio::SeqFeatureI in order to use the
+           formatter returned by gff_format().
+
+ Returns : A string
+ Args    : Optionally, an object implementing gff_string().
+
+
+=cut
+
+sub gff_string{
+   my ($self,$formatter) = @_;
+
+   $formatter = $self->gff_format() unless $formatter;
+   return $formatter->gff_string($self);
+}
+
+=head2 slurp_gff_file
+
+ Title   : slurp_file
+ Usage   : @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
+ Function: Sneaky function to load an entire file as in memory objects.
+           Beware of big files.
+
+           This method is deprecated. Use Bio::Tools::GFF instead, which can
+           also handle large files.
+
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub slurp_gff_file {
+   my ($f) = @_;
+   my @out;
+   if ( !defined $f ) {
+       Bio::Root::Root->throw("Must have a filehandle");
+   }
+
+   Bio::Root::Root->warn("deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.");
+  
+   while(<$f>) {
+       my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
+       push(@out, $sf);
+   }
+
+   return @out;
+}
+
+=head2 _from_gff_string
+
+ Title   : _from_gff_string
+ Usage   :
+ Function: Set feature properties from GFF string. 
+
+           This method uses the object returned by gff_format() for the
+           actual interpretation of the string. Set a different GFF format
+           interpreter first if you need a specific version, like GFF1. (The
+           default is GFF2.)
+ Example :
+ Returns : 
+ Args    : a GFF-formatted string
+
+
+=cut
+
+sub _from_gff_string {
+   my ($self, $string) = @_;
+
+   $self->gff_format()->from_gff_string($self, $string);
+}
+
+
+=head2 _expand_region
+
+ Title   : _expand_region
+ Usage   : $self->_expand_region($feature);
+ Function: Expand the total region covered by this feature to
+           accomodate for the given feature.
+
+           May be called whenever any kind of subfeature is added to this
+           feature. add_sub_SeqFeature() already does this.
+ Returns : 
+ Args    : A Bio::SeqFeatureI implementing object.
+
+
+=cut
+
+sub _expand_region {
+    my ($self, $feat) = @_;
+    if(! $feat->isa('Bio::SeqFeatureI')) {
+        $self->warn("$feat does not implement Bio::SeqFeatureI");
+    }
+    # if this doesn't have start/end set - forget it!
+    if((! defined($self->start)) && (! defined $self->end)) {
+        $self->start($feat->start);
+        $self->end($feat->end);
+        $self->strand($feat->strand) unless $self->strand;
+    } else {
+        my ($start,$end,$strand) = $self->union($feat);
+        $self->start($start);
+        $self->end($end);
+        $self->strand($strand);
+    }
+}
+
+=head2 _parse
+
+ Title   : _parse
+ Usage   :
+ Function: Parsing hints
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _parse {
+   my ($self) = @_;
+   return $self->{'_parse_h'};
+}
+
+=head2 _tag_value
+
+ Title   : _tag_value
+ Usage   : 
+ Function: For internal use only. Convenience method for those tags that
+           may only have a single value.
+ Returns : The first value under the given tag as a scalar (string)
+ Args    : The tag as a string. Optionally, the value on set.
+
+
+=cut
+
+sub _tag_value {
+    my $self = shift;
+    my $tag = shift;
+
+    if(@_ || (! $self->has_tag($tag))) {
+        $self->remove_tag($tag) if($self->has_tag($tag));
+        $self->add_tag_value($tag, @_);
+    }
+    return ($self->get_tag_values($tag))[0];
+}
+
+#######################################################################
+# aliases for methods that changed their names in an attempt to make  #
+# bioperl names more consistent                                       #
+#######################################################################
+
+sub seqname {
+    my $self = shift;
+    $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
+    return $self->seq_id(@_);
+}
+
+sub display_id {
+    my $self = shift;
+    $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
+    return $self->display_name(@_);
+}
+
+# # this is towards consistent naming
+sub each_tag_value { return shift->get_tag_values(@_); }
+sub all_tags { return shift->get_all_tags(@_); }
+
+# we revamped the feature containing property to implementing
+# Bio::FeatureHolderI
+*sub_SeqFeature = \&get_SeqFeatures;
+*add_sub_SeqFeature = \&add_SeqFeature;
+*flush_sub_SeqFeatures = \&remove_SeqFeatures;
+# this one is because of inconsistent naming ...
+*flush_sub_SeqFeature = \&remove_SeqFeatures;
+
+
+sub cleanup_generic {
+    my $self = shift;
+    foreach my $f ( @{$self->{'_gsf_sub_array'} || []} ) {
+	$f = undef;
+    }
+    $self->{'_gsf_seq'} = undef;
+    foreach my $t ( keys %{$self->{'_gsf_tag_hash'} || {}} ) {
+	$self->{'_gsf_tag_hash'}->{$t} = undef;
+	delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix
+    }
+}
+
+=head1 INHERITED METHODS FOR L<Bio::AnnotatableI> VIA L<Bio::SeqFeatureI>
+
+=head2 has_tag()
+
+=cut
+
+=head2 add_tag_value()
+
+=cut
+
+=head2 get_tag_values()
+
+=cut
+
+=head2 get_all_tags()
+
+=cut
+
+=head2 remove_tag()
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/PositionProxy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/PositionProxy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/PositionProxy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,447 @@
+# $Id: PositionProxy.pm,v 1.9.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::PositionProxy
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::PositionProxy - handle features when truncation/revcom sequences span a feature
+
+=head1 SYNOPSIS
+
+   $proxy = new Bio::SeqFeature::PositionProxy ( -loc => $loc,
+                                                 -parent => $basefeature);
+
+   $seq->add_SeqFeature($feat);
+
+
+
+=head1 DESCRIPTION
+
+PositionProxy is a Proxy Sequence Feature to handle truncation
+and revcomp without duplicating all the data within the sequence features.
+It holds a new location for a sequence feature and the original feature
+it came from to provide the additional annotation information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Ewan Birney E<lt>birney at sanger.ac.ukE<gt>
+
+=head1 DEVELOPERS
+
+This class has been written with an eye out of inheritence. The fields
+the actual object hash are:
+
+   _gsf_tag_hash  = reference to a hash for the tags
+   _gsf_sub_array = reference to an array for sub arrays
+   _gsf_start     = scalar of the start point
+   _gsf_end       = scalar of the end point
+   _gsf_strand    = scalar of the strand
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::PositionProxy;
+use strict;
+
+use Bio::Tools::GFF;
+
+
+use base qw(Bio::Root::Root Bio::SeqFeatureI);
+
+sub new {
+    my ($caller, @args) = @_;
+    my $self = $caller->SUPER::new(@args);
+
+    my ($feature,$location) = $self->_rearrange([qw(PARENT LOC)], at args);
+
+    if( !defined $feature || !ref $feature || !$feature->isa('Bio::SeqFeatureI') ) {
+      $self->throw("Must have a parent feature, not a [$feature]");
+    }
+
+    if( $feature->isa("Bio::SeqFeature::PositionProxy") ) {
+      $feature = $feature->parent();
+    }
+
+    if( !defined $location || !ref $location || !$location->isa('Bio::LocationI') ) {
+      $self->throw("Must have a location, not a [$location]");
+    }
+
+
+    return $self;
+}
+
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location 
+	   of feature on sequence or parent feature  
+ Returns : Bio::LocationI object
+ Args    : none
+
+
+=cut
+
+sub location {
+    my($self, $value ) = @_;  
+
+    if (defined($value)) {
+        unless (ref($value) and $value->isa('Bio::LocationI')) {
+	    $self->throw("object $value pretends to be a location but ".
+			 "does not implement Bio::LocationI");
+        }
+        $self->{'_location'} = $value;
+    }
+    elsif (! $self->{'_location'}) {
+        # guarantees a real location object is returned every time
+        $self->{'_location'} = Bio::Location::Simple->new();
+    }
+    return $self->{'_location'};
+}
+
+
+=head2 parent
+
+ Title   : parent
+ Usage   : my $sf = $proxy->parent()
+ Function: returns the seqfeature parent of this proxy
+ Returns : Bio::SeqFeatureI object
+ Args    : none
+
+
+=cut
+
+sub parent {
+    my($self, $value ) = @_;  
+
+    if (defined($value)) {
+        unless (ref($value) and $value->isa('Bio::SeqFeatureI')) {
+	    $self->throw("object $value pretends to be a location but ".
+			 "does not implement Bio::SeqFeatureI");
+        }
+        $self->{'_parent'} = $value;
+    }
+
+    return $self->{'_parent'};
+}
+
+
+
+=head2 start
+
+ Title   : start
+ Usage   : $start = $feat->start
+           $feat->start(20)
+ Function: Get
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub start {
+   my ($self,$value) = @_;
+   return $self->location->start($value);
+}
+
+=head2 end
+
+ Title   : end
+ Usage   : $end = $feat->end
+           $feat->end($end)
+ Function: get
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+sub end {
+   my ($self,$value) = @_;
+   return $self->location->end($value);
+}
+
+=head2 length
+
+ Title   : length
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub length {
+   my ($self) = @_;
+   return $self->end - $self->start() + 1;
+}
+
+=head2 strand
+
+ Title   : strand
+ Usage   : $strand = $feat->strand()
+           $feat->strand($strand)
+ Function: get/set on strand information, being 1,-1 or 0
+ Returns : -1,1 or 0
+ Args    : none
+
+
+=cut
+
+sub strand {
+   my ($self,$value) = @_;
+   return $self->location->strand($value);
+}
+
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+ Example :
+ Returns : TRUE on success
+ Args    :
+
+
+=cut
+
+sub attach_seq {
+   my ($self, $seq) = @_;
+
+   if ( !defined $seq || !ref $seq || ! $seq->isa("Bio::PrimarySeqI") ) {
+       $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
+   }
+
+   $self->{'_gsf_seq'} = $seq;
+
+   # attach to sub features if they want it
+
+   foreach my $sf ( $self->sub_SeqFeature() ) {
+       if ( $sf->can("attach_seq") ) {
+	   $sf->attach_seq($seq);
+       }
+   }
+   return 1;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $tseq = $sf->seq()
+ Function: returns the truncated sequence (if there) for this
+ Example :
+ Returns : sub seq on attached sequence bounded by start & end
+ Args    : none
+
+
+=cut
+
+sub seq {
+   my ($self, $arg) = @_;
+
+   if ( defined $arg ) {
+       $self->throw("Calling SeqFeature::PositionProxy->seq with an argument. You probably want attach_seq");
+   }
+
+   if ( ! exists $self->{'_gsf_seq'} ) {
+       return;
+   }
+
+   # assumming our seq object is sensible, it should not have to yank
+   # the entire sequence out here.
+
+   my $seq = $self->{'_gsf_seq'}->trunc($self->start(), $self->end());
+
+
+   if ( $self->strand == -1 ) {
+       $seq = $seq->revcom;
+   }
+
+   return $seq;
+}
+
+=head2 entire_seq
+
+ Title   : entire_seq
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub entire_seq {
+   my ($self) = @_;
+
+   return unless exists($self->{'_gsf_seq'});
+   return $self->{'_gsf_seq'};
+}
+
+
+=head2 seqname
+
+ Title   : seqname
+ Usage   : $obj->seq_id($newval)
+ Function: There are many cases when you make a feature that you
+           do know the sequence name, but do not know its actual
+           sequence. This is an attribute such that you can store
+           the seqname.
+
+           This attribute should *not* be used in GFF dumping, as
+           that should come from the collection in which the seq
+           feature was found.
+ Returns : value of seqname
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seqname {
+    my ($obj,$value) = @_;
+    if ( defined $value ) {
+	$obj->{'_gsf_seqname'} = $value;
+    }
+    return $obj->{'_gsf_seqname'};
+}
+
+
+
+=head2 Proxies
+
+These functions chain back to the parent for all non sequence related stuff.
+
+
+=cut
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+ Function: Returns the primary tag for a feature,
+           eg 'exon'
+ Returns : a string 
+ Args    : none
+
+
+=cut
+
+sub primary_tag{
+   my ($self, at args) = @_;
+
+   return $self->parent->primary_tag();
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+ Function: Returns the source tag for a feature,
+           eg, 'genscan' 
+ Returns : a string 
+ Args    : none
+
+
+=cut
+
+sub source_tag{
+   my ($self) = @_;
+
+   return $self->parent->source_tag();
+}
+
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $tag_exists = $self->has_tag('some_tag')
+ Function: 
+ Returns : TRUE if the specified tag exists, and FALSE otherwise
+ Args    :
+
+
+=cut
+
+sub has_tag{
+   my ($self,$tag) = @_;
+
+   return $self->parent->has_tag($tag);
+}
+
+=head2 each_tag_value
+
+ Title   : each_tag_value
+ Usage   : @values = $self->each_tag_value('some_tag')
+ Function: 
+ Returns : An array comprising the values of the specified tag.
+ Args    :
+
+
+=cut
+
+sub each_tag_value {
+   my ($self,$tag) = @_;
+
+   return $self->parent->each_tag_value($tag);
+}
+
+=head2 all_tags
+
+ Title   : all_tags
+ Usage   : @tags = $feat->all_tags()
+ Function: gives all tags for this feature
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub all_tags{
+   my ($self) = @_;
+
+   return $self->parent->all_tags();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Primer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Primer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Primer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,501 @@
+# $Id: Primer.pm,v 1.21.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Primer
+#
+# This is the original copyright statement. I have relied on Chad's module
+# extensively for this module.
+#
+# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself. 
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+#
+# But I have modified lots of it, so I guess I should add:
+#
+# Copyright (c) 2003 bioperl, Rob Edwards. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself. 
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Primer - Primer Generic SeqFeature
+
+=head1 SYNOPSIS
+
+ # set up a single primer that can be used in a PCR reaction
+
+ use Bio::SeqFeature::Primer;
+
+ # initiate a primer with raw sequence
+ my $primer=Bio::SeqFeature::Primer->new(-seq=>'CTTTTCATTCTGACTGCAACG');
+
+ # get the primery tag for the primer # should return Primer
+ my $tag=$primer->primary_tag;
+
+ # get or set the location that the primer binds to the target at
+ $primer->location(500);
+ my $location=$primer->location(500);
+
+ # get or set the 5' end of the primer homology, as the primer doesn't 
+ # have to be the same as the target sequence
+ $primer->start(2);
+ my $start=$primer->start;
+
+ # get or set the 3' end of the primer homology
+ $primer->end(19);
+ my $end = $primer->end;
+
+ # get or set the strand of the primer. Strand should be 1, 0, or -1
+ $primer->strand(-1);
+ my $strand=$primer->strand;
+
+ # get or set the id of the primer
+ $primer->display_id('test_id');
+ my $id=$primer->display_id;
+
+ # get the tm of the primer. This is calculated for you by the software.
+ # however, see the docs.
+ my $tm = $primer->Tm;
+
+ print "These are the details of the primer:\n\tTag:\t\t$tag\n\tLocation\t$location\n\tStart:\t\t$start\n";
+ print "\tEnd:\t\t$end\n\tStrand:\t\t$strand\n\tID:\t\t$id\n\tTm:\t\t$tm\n";
+
+
+
+=head1 DESCRIPTION
+
+Handle primer sequences. This will allow you to generate a primer
+object required for a Bio::Seq::PrimedSeq object. This module is
+designed to integrate with Bio::Tools::Primer3 and
+Bio::Seq::PrimedSeq.
+
+In addition, you can calculate the melting temperature of the primer.
+
+This module is supposed to implement location and range, presumably
+through generic.pm, but does not do so yet. However, it does allow you
+to set primers, and use those objects as the basis for
+Bio::Seq::PrimedSeq objects.
+
+See also the POD for Bio::Seq::PrimedSeq and
+Bio::Tools::Nucleotide::Analysis::Primer3
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR 
+
+Rob Edwards, redwards at utmem.edu
+
+The original concept and much of the code was written by
+Chad Matsalla, bioinformatics1 at dieselwurks.com
+
+=head1 APPENDIX
+
+	The rest of the documentation details each of the object
+	methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Primer;
+use strict;
+
+use Bio::Seq;
+use Bio::Tools::SeqStats;
+
+
+use vars qw ($AUTOLOAD @RES %OK_FIELD $ID);
+
+BEGIN {
+ @RES=qw(); # nothing here yet, not sure what we want!
+
+ foreach my $attr (@RES) {$OK_FIELD{$attr}++}
+}
+
+use base qw(Bio::Root::Root Bio::SeqFeature::Generic);
+
+$ID = 'Bio::SeqFeature::Primer';
+
+sub AUTOLOAD {
+ my $self = shift;
+ my $attr = $AUTOLOAD;
+ $attr =~ s/.*:://;
+ $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
+ $self->{$attr} = shift if @_;
+ return $self->{$attr};
+}
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $primer = Bio::SeqFeature::Primer(-seq=>sequence_object);
+ Function: Instantiate a new object
+ Returns : A SeqPrimer object
+ Args    : You must pass either a sequence object (preferable) or a sequence. 
+
+=cut
+
+
+sub new {
+  # I have changed some of Chad's code. I hope he doesn't mind. Mine is more stupid than his, but my simple mind gets it.
+  # I also removed from of the generic.pm things, but we can put them back....
+
+  my ($class, %args) = @_;  
+  my $self = $class->SUPER::new(%args);
+  # i am going to keep an array of the things that have been passed
+  # into the object on construction. this will aid retrieval of these
+  # things later
+  foreach my $argument (keys %args) {
+   if ($argument eq "-SEQUENCE" || $argument eq "-sequence" || $argument eq "-seq") {
+    if (ref($args{$argument}) eq "Bio::Seq") {$self->{seq} = $args{$argument}}
+    else {
+     unless ($args{-id}) {$args{-id}="SeqFeature Primer object"}
+     $self->{seq} = new Bio::Seq( -seq => $args{$argument}, -id => $args{-id});
+    }
+    $self->{$argument} = $self->{seq};
+    push (@{$self->{arguments}}, "seq");
+   }
+   else {
+    $self->{$argument} = $args{$argument};
+    push (@{$self->{arguments}}, $argument); # note need to check the BioPerl way of doing this.
+   }
+  }
+
+  # now error check and make sure that we at least got a sequence
+  if (!$self->{seq}) {$self->throw("You must pass in a sequence to construct this object.")}
+
+   # a bunch of things now need to be set for this SeqFeature
+   # things like:
+   # TARGET=513,26
+   # PRIMER_FIRST_BASE_INDEX=1
+   # PRIMER_LEFT=484,20
+
+   # these can be added in, and we won't demand them, but provide a mechanism to check that they exist
+
+   $self->Tm();
+   return $self;
+}
+
+
+=head2 seq()
+
+ Title   : seq()
+ Usage   : $seq = $primer->seq();
+ Function: Return the sequence associated with this Primer. 
+ Returns : A Bio::Seq object
+ Args    : None.
+
+=cut
+
+sub seq {
+     my $self = shift;
+     return $self->{seq};
+}
+
+sub primary_tag {
+     return "Primer";
+}
+
+=head2 source_tag()
+
+ Title   : source_tag()
+ Usage   : $tag = $feature->source_tag();
+ Function: Returns the source of this tag.
+ Returns : A string.
+ Args    : If an argument is provided, the source of this SeqFeature
+           is set to that argument.
+
+=cut
+
+sub source_tag {
+     my ($self,$insource) = @_;
+     if ($insource) { $self->{source} = $insource; }
+     return $self->{source};
+}
+
+=head2 location()
+
+ Title   : location()
+ Usage   : $tag = $primer->location();
+ Function: Gets or sets the location of the primer on the sequence  
+ Returns : If the location is set, returns that, if not returns 0. 
+           Note: At the moment I am using the primer3 notation of location
+	   (although you can set whatever you want). 
+	   In this form, both primers are given from their 5' ends and a length.
+	   In this case, the left primer is given from the leftmost end, but
+	   the right primer is given from the rightmost end.
+	   You can use start() and end() to get the leftmost and rightmost base
+	   of each sequence.
+ Args    : If supplied will set a location
+
+=cut
+
+sub location {
+     my ($self, $location) = @_;
+     if ($location) {$self->{location}=$location}
+     if ($self->{location}) {return $self->{location}}
+     else {return 0}
+}
+
+=head2 start()
+
+ Title   : start()
+ Usage   : $start_position = $primer->start($new_position);
+ Function: Return the start position of this Primer.
+           This is the leftmost base, regardless of whether it is a left or right primer.
+ Returns : The start position of this primer or 0 if not set.
+ Args    : If supplied will set a start position.
+
+=cut
+
+sub start {
+     my ($self,$start) = @_;
+     if ($start) {$self->{start_position} = $start}
+     if ($self->{start_position}) {return $self->{start_position}}
+     else {return 0}
+}
+
+=head2 end()
+
+ Title   : end()
+ Usage   : $end_position = $primer->end($new_position);
+ Function: Return the end position of this primer.
+           This is the rightmost base, regardless of whether it is a left or right primer.
+ Returns : The end position of this primer.
+ Args    : If supplied will set an end position.
+
+=cut
+
+sub end {
+     my ($self,$end) = @_;
+     if ($end) {$self->{end_position} = $end}
+     if ($self->{end_position}) {return $self->{end_position}}
+     else {return 0}
+}
+
+=head2 strand()
+
+ Title   : strand()
+ Usage   : $strand=$primer->strand()
+ Function: Get or set the strand.
+ Returns : The strand that the primer binds to.
+ Args    :  If an argument is supplied will set the strand, otherwise will return it. Should be 1, 0 (not set), or -1
+
+=cut
+
+sub strand {
+     my ($self, $strand) = @_;
+     if ($strand) {
+      unless ($strand == -1 || $strand == 0 ||$strand == 1) {$self->throw("Strand must be either 1, 0, or -1 not $strand")}
+      $self->{strand}=$strand;
+     }
+     if ($self->{strand}) {return $self->{strand}}
+     else {return 0}
+}
+
+=head2 display_id()
+
+ Title   : display_id()
+ Usage   : $id = $primer->display_id($new_id)
+ Function: Returns the display ID for this Primer feature
+ Returns : A scalar.
+ Args    : If an argument is provided, the display_id of this primer is set to that value.
+
+=cut
+
+sub display_id {
+     my ($self,$newid) = @_;
+     if ($newid) {$self->seq()->display_id($newid)}
+     return $self->seq()->display_id();
+}
+
+
+=head2 Tm()
+
+  Title   : Tm()
+  Usage   : $tm = $primer->Tm(-salt=>'0.05', -oligo=>'0.0000001')
+  Function: Calculates and returns the Tm (melting temperature) of the primer
+  Returns : A scalar containing the Tm.
+  Args    : -salt set the Na+ concentration on which to base the calculation (default=0.05 molar).
+          : -oligo set the oligo concentration on which to base the calculation (default=0.00000025 molar).
+  Notes   : Calculation of Tm as per Allawi et. al Biochemistry 1997 36:10581-10594.  Also see
+            documentation at http://biotools.idtdna.com/analyzer/ as they use this formula and
+            have a couple nice help pages.  These Tm values will be about are about 0.5-3 degrees
+            off from those of the idtdna web tool.  I don't know why.
+
+            This was suggested by Barry Moore (thanks!). See the discussion on the bioperl-l
+            with the subject "Bio::SeqFeature::Primer Calculating the PrimerTM"
+
+=cut
+
+sub Tm  {
+     my ($self, %args) = @_;
+     my $salt_conc = 0.05; #salt concentration (molar units)
+     my $oligo_conc = 0.00000025; #oligo concentration (molar units)
+     if ($args{'-salt'}) {$salt_conc = $args{'-salt'}} #accept object defined salt concentration
+     if ($args{'-oligo'}) {$oligo_conc = $args{'-oligo'}} #accept object defined oligo concentration
+     my $seqobj = $self->seq();
+     my $length = $seqobj->length();
+     my $sequence = uc $seqobj->seq();
+     my @dinucleotides;
+     my $enthalpy;
+     my $entropy;
+     #Break sequence string into an array of all possible dinucleotides
+     while ($sequence =~ /(.)(?=(.))/g) {
+         push @dinucleotides, $1.$2;
+     }
+     #Build a hash with the thermodynamic values
+     my %thermo_values = ('AA' => {'enthalpy' => -7.9,
+                                   'entropy'  => -22.2},
+                          'AC' => {'enthalpy' => -8.4,
+                                   'entropy'  => -22.4},
+                          'AG' => {'enthalpy' => -7.8,
+                                   'entropy'  => -21},
+                          'AT' => {'enthalpy' => -7.2,
+                                   'entropy'  => -20.4},
+                          'CA' => {'enthalpy' => -8.5,
+                                   'entropy'  => -22.7},
+                          'CC' => {'enthalpy' => -8,
+                                   'entropy'  => -19.9},
+                          'CG' => {'enthalpy' => -10.6,
+                                   'entropy'  => -27.2},
+                          'CT' => {'enthalpy' => -7.8,
+                                   'entropy'  => -21},
+                          'GA' => {'enthalpy' => -8.2,
+                                   'entropy'  => -22.2},
+                          'GC' => {'enthalpy' => -9.8,
+                                   'entropy'  => -24.4},
+                          'GG' => {'enthalpy' => -8,
+                                   'entropy'  => -19.9},
+                          'GT' => {'enthalpy' => -8.4,
+                                   'entropy'  => -22.4},
+                          'TA' => {'enthalpy' => -7.2,
+                                   'entropy'  => -21.3},
+                          'TC' => {'enthalpy' => -8.2,
+                                   'entropy'  => -22.2},
+                          'TG' => {'enthalpy' => -8.5,
+                                   'entropy'  => -22.7},
+                          'TT' => {'enthalpy' => -7.9,
+                                   'entropy'  => -22.2},
+                          'A' =>  {'enthalpy' => 2.3,
+                                   'entropy'  => 4.1},
+                          'C' =>  {'enthalpy' => 0.1,
+                                   'entropy'  => -2.8},
+                          'G' =>  {'enthalpy' => 0.1,
+                                   'entropy'  => -2.8},
+                          'T' =>  {'enthalpy' => 2.3,
+                                   'entropy'  => 4.1}
+                         );
+     #Loop through dinucleotides and calculate cumulative enthalpy and entropy values
+     for (@dinucleotides) {
+        $enthalpy += $thermo_values{$_}{enthalpy};
+        $entropy += $thermo_values{$_}{entropy};
+     }
+     #Account for initiation parameters
+     $enthalpy += $thermo_values{substr($sequence, 0, 1)}{enthalpy};
+     $entropy += $thermo_values{substr($sequence, 0, 1)}{entropy};
+     $enthalpy += $thermo_values{substr($sequence, -1, 1)}{enthalpy};
+     $entropy += $thermo_values{substr($sequence, -1, 1)}{entropy};
+     #Symmetry correction
+     $entropy -= 1.4;
+     my $r = 1.987; #molar gas constant
+     my $tm = ($enthalpy * 1000 / ($entropy + ($r * log($oligo_conc))) - 273.15 + (12* (log($salt_conc)/log(10))));
+     $self->{'Tm'}=$tm;
+     return $tm;
+ }
+
+=head2 Tm_estimate
+
+ Title   : Tm_estimate
+ Usage   : $tm = $primer->Tm_estimate(-salt=>'0.05')
+ Function: Calculates and returns the Tm (melting temperature) of the primer
+ Returns : A scalar containing the Tm.
+ Args    : -salt set the Na+ concentration on which to base the calculation.
+ Notes   : This is an estimate of the Tm that is kept in for comparative reasons.
+           You should probably use Tm instead!
+
+	   This Tm calculations are taken from the Primer3 docs: They are
+	   based on Bolton and McCarthy, PNAS 84:1390 (1962) 
+	   as presented in Sambrook, Fritsch and Maniatis,
+	   Molecular Cloning, p 11.46 (1989, CSHL Press).
+
+	   Tm = 81.5 + 16.6(log10([Na+])) + .41*(%GC) - 600/length
+
+	   where [Na+] is the molar sodium concentration, %GC is the
+	   %G+C of the sequence, and length is the length of the sequence.
+
+	   However.... I can never get this calculation to give me the same result
+	   as primer3 does. Don't ask why, I never figured it out. But I did 
+	   want to include a Tm calculation here becuase I use these modules for 
+	   other things besides reading primer3 output.
+
+	   The primer3 calculation is saved as 'PRIMER_LEFT_TM' or 'PRIMER_RIGHT_TM'
+	   and this calculation is saved as $primer->Tm so you can get both and
+	   average them!
+
+=cut
+
+sub Tm_estimate {
+
+ # note I really think that this should be put into seqstats as it is more generic, but what the heck.
+
+ my ($self, %args) = @_;
+ my $salt=0.2;
+ if ($args{'-salt'}) {$salt=$args{'-salt'}}
+ my $seqobj=$self->seq();
+ my $length=$seqobj->length();
+ my $seqdata = Bio::Tools::SeqStats->count_monomers($seqobj);
+ my $gc=$$seqdata{'G'} + $$seqdata{'C'};
+ my $percent_gc=($gc/$length)*100;
+
+
+ my $tm= 81.5+(16.6*(log($salt)/log(10)))+(0.41*$percent_gc) - (600/$length);
+
+ # and now error check compared to primer3
+ # note that this NEVER gives me the same values, so I am ignoring it
+ # you can get these out separately anyway
+
+# if ($self->{'PRIMER_LEFT_TM'}) {
+#  unless ($self->{'PRIMER_LEFT_TM'} == $tm) {
+#   $self->warn("Calculated $tm for Left primer but received ".$self->{'PRIMER_LEFT_TM'}." from primer3\n");
+#  }
+# }
+# elsif ($self->{'PRIMER_RIGHT_TM'}) {
+#  unless ($self->{'PRIMER_RIGHT_TM'} == $tm) {
+#    $self->warn("Calculated $tm for Right primer but received ".$self->{'PRIMER_RIGHT_TM'}." from primer3\n");
+#  }
+# }
+
+ $self->{'Tm'}=$tm;
+ return $tm; 
+} 
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Oligo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Oligo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Oligo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,175 @@
+# $Id: Oligo.pm,v 1.6.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::SiRNA::Pair
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Donald Jackson
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::SiRNA::Oligo - Perl object for small inhibitory RNAs.
+
+=head1 SYNOPSIS
+
+  use Bio::SeqFeature::SiRNA::Oligo;
+
+  my $oligo = Bio::SeqFeature::SiRNA::Oligo->
+      new( -seq		=> 'AUGCCGAUUGCAAGUCAGATT',
+	   -start 	=> 10,
+	   -end		=> 31,
+	   -strand	=> -1,
+	   -primary	=> 'SiRNA::Oligo',
+	   -source_tag	=> 'Bio::Tools::SiRNA',
+	   -tag		=> { note => 'A note' }, );
+
+  # normally two complementary Oligos are combined in an SiRNA::Pair
+  # object
+  $pair->antisense($oligo);
+
+
+=head1 DESCRIPTION
+
+Object methods for single SiRNA oligos - inherits
+L<Bio::SeqFeature::Generic>.  Does B<not> include methods for designing
+SiRNAs - see L<Bio::Tools::SiRNA> for that.
+
+=head1 SEE ALSO
+
+L<Bio::Tools::SiRNA>, L<Bio::SeqFeature::SiRNA::Pair>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqFeature::SiRNA::Oligo;
+
+require 5.005_62;
+use strict;
+use warnings;
+
+
+use base qw(Bio::SeqFeature::Generic);
+
+our @ARGNAMES = qw(SEQ START END STRAND PRIMARY SOURCE_TAG SCORE TAG 
+                   SEQ_ID ANNOTATION LOCATION);
+
+=head2 new
+
+  Title		: new
+  Usage		: my $sirna_oligo = Bio::SeqFeature::SiRNA::Oligo->new();
+  Function	: Create a new SiRNA::Oligo object
+  Returns	: Bio::Tools::SiRNA object
+  Args    	: -seq		  sequence of the RNAi oligo.  Should be in RNA alphabet
+                                  except for the final TT overhang.
+                  -start          start position
+ 	 	  -end            end position
+ 	 	  -strand         strand
+ 	 	  -primary        primary tag - defaults to 'SiRNA::Oligo'
+ 	 	  -source         source tag
+ 	 	  -score          score value
+ 	 	  -tag            a reference to a tag/value hash
+ 	 	  -seq_id         the display name of the sequence
+ 	 	  -annotation     the AnnotationCollectionI object
+ 	 	  -location       the LocationI object
+
+Currently passing arguments in gff_string or gff1_string is not
+supported.  SiRNA::Oligo objects are typically created by a design
+algorithm such as Bio::Tools::SiRNA
+
+=cut
+
+sub new {
+    my ($proto, @args) = @_;
+
+    my $pkg = ref($proto) || $proto;
+
+    my (%args);
+
+    my $self = $pkg->SUPER::new();
+
+    @args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); 
+    # default primary tag
+    $args{'PRIMARY'} ||= 'SiRNA::Oligo';
+
+    $args{'PRIMARY'}		&& $self->primary_tag($args{'PRIMARY'});
+    $args{'SOURCE_TAG'}		&& $self->source_tag($args{'SOURCE_TAG'});
+    $args{'SEQNAME'}		&& $self->seqname($args{'SEQNAME'});
+    $args{'SEQ'}		&& $self->seq($args{'SEQ'});
+    $args{'ANNOTATION'}		&& $self->annotation($args{'ANNOTATION'});
+    $args{'LOCATION'}		&& $self->location($args{'LOCATION'});
+    defined($args{'START'})	&& $self->start($args{'START'});
+    defined($args{'END'})	&& $self->end($args{'END'});
+    defined($args{'STRAND'})	&& $self->strand($args{'STRAND'});
+    defined($args{'SCORE'})	&& $self->score($args{'SCORE'});
+
+    if ($args{'TAG'}) {	
+	foreach my $t ( keys %{ $args{'TAG'} } ) {
+	    $self->add_tag_value($t, $args{'TAG'}->{$t});
+	}
+    }
+
+    return $self;
+}
+
+=head2 seq
+
+  Title		: Seq
+  Usage		: my $oligo_sequence = $sirna_oligo->seq();
+  Purpose	: Get/set the sequence of the RNAi oligo
+  Returns 	: Sequence for the RNAi oligo
+  Args		: Sequence of the RNAi oligo (optional)
+  Note		: Overloads Bio::SeqFeature::Generic seq method - the oligo and 
+                  parent sequences are different. 
+                  Note that all but the last 2 nucleotides are RNA (per Tuschl and colleagues).
+                  SiRNA::Pair objects are typically created by a design algorithm such as
+                  Bio::Tools::SiRNA.
+
+=cut
+
+sub seq {
+    my ($self, $seq) = @_;
+    if ($seq) {
+	# check alphabet
+	if ($seq =~ /[^ACGTUacgtu]/ ) {
+	    warn "Sequence contains illegal characters";
+	    return;
+	}
+	else {
+	    $self->{'seq'} = $seq;
+	}
+    }
+    return $self->{'seq'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Pair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Pair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SiRNA/Pair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,297 @@
+# $Id: Pair.pm,v 1.6.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::SiRNA::Pair
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Donald Jackson
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::SiRNA::Pair - Perl object for small inhibitory RNA
+(SiRNA) oligo pairs
+
+=head1 SYNOPSIS
+
+  use Bio::SeqFeature::SiRNA::Pair;
+  my $pair = Bio::SeqFeature::SiRNA::Pair->
+      new( -sense       => $bio_seqfeature_sirna_oligo, # strand=1
+           -antisense	=> $bio_seqfeature_sirna_oligo, # strand= -1
+	   -primary	=> 'SiRNA::Pair',
+	   -source_tag 	=> 'Bio::Tools::SiRNA',
+	   -start	=> 8,
+	   -end		=> 31,
+	   -rank	=> 1,
+	   -fxgc	=> 0.5,
+	   -tag		=> { note => 'a note' } );
+
+  $target_sequence->add_SeqFeature($pair);					
+
+=head1 DESCRIPTION
+
+Object methods for (complementary) pairs of L<Bio::SeqFeature::SiRNA::Oligo> 
+objects - inherits L<Bio::SeqFeature::Generic>. See that package for information
+on inherited methods.
+
+Does B<not> include methods for designing SiRNAs -- see L<Bio::Tools::SiRNA>
+
+=head1 SEE ALSO
+
+L<Bio::SeqFeature::Oligo>, L<Bio::Tools::SiRNA>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqFeature::SiRNA::Pair;
+
+require 5.005_62;
+use strict;
+use warnings;
+
+
+use base qw(Bio::SeqFeature::Generic);
+
+# arguments to new().  Taken from Bio::SeqFeature Generic.
+# Omit frame (not relevant), GFF_STRING and GFF1_STRING 
+# because I'm not sure how to handle them.  Add RANK, FXGC, SENSE, ANTISENSE
+our @ARGNAMES = qw(RANK FXGC SENSE ANTISENSE START END STRAND PRIMARY SOURCE_TAG
+		   SCORE TAG SEQNAME ANNOTATION LOCATION);
+
+=head1 METHODS
+
+=head2 new
+
+  Title		: new
+  Usage		: my $sirna_pair = Bio::SeqFeature::SiRNA::Pair->new();
+  Purpose	: Create a new SiRNA::Pair object
+  Returns	: Bio::Tools::SiRNA object
+  Args		: -start 	10
+                  -end		31
+                  -rank		1 #  'Rank' in Tuschl group's rules
+                  -fxgc		0.5 # GC fraction for target sequence
+		  -primary	'SiRNA::Pair', # default value
+		  -source_tag	'Bio::Tools::SiRNA'
+		  -tag		{ note => 'A note' }
+                  -sense	a Bio::SeqFeature::SiRNA::Oligo object
+                                with strand = 1
+                  -antisense	a Bio::SeqFeature::SiRNA::Oligo object
+                                with strand = -1
+);
+
+  Note		: SiRNA::Pair objects are typically created by a design 
+                  algorithm such as Bio::Tools::SiRNA
+
+=cut
+
+sub new {
+    my ($proto, @args) = @_;
+
+    my $pkg = ref($proto) || $proto;
+
+    my $self = $pkg->SUPER::new();
+    my %args;
+    @args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); 
+    # default primary tag
+    $args{'PRIMARY'} ||= 'SiRNA::Pair';
+
+    $args{'PRIMARY'}		&& $self->primary_tag($args{'PRIMARY'});
+    $args{'SOURCE_TAG'}		&& $self->source_tag($args{'SOURCE_TAG'});
+    $args{'SEQNAME'}		&& $self->seqname($args{'SEQNAME'});
+    $args{'ANNOTATION'}		&& $self->annotation($args{'ANNOTATION'});
+    $args{'LOCATION'}		&& $self->location($args{'LOCATION'});
+    $args{'SENSE'}		&& $self->sense($args{'SENSE'});
+    $args{'ANTISENSE'}		&& $self->antisense($args{'ANTISENSE'});
+    defined($args{'START'})	&& $self->start($args{'START'});
+    defined($args{'END'})	&& $self->end($args{'END'});
+    defined($args{'STRAND'})	&& $self->strand($args{'STRAND'});
+    defined($args{'SCORE'})	&& $self->score($args{'SCORE'});
+    defined($args{'RANK'})	&& $self->rank($args{'RANK'});
+    defined($args{'FXGC'})	&& $self->fxGC($args{'FXGC'});
+
+    if ($args{'TAG'}) {	
+	foreach my $t (keys %{$args{'TAG'}}) {
+	    $self->add_tag_value($t, $args{'TAG'}->{$t});
+	}
+    }
+
+
+    return $self;
+}
+
+=head2 rank
+
+  Title		: rank
+  Usage		: my $pair_rank = $sirna_pair->rank()
+  Purpose	: Get/set the 'quality rank' for this pair.
+                  See Bio::Tools::SiRNA for a description of ranks.
+  Returns	: scalar
+  Args		: scalar (optional) indicating pair rank
+
+=cut
+
+sub rank {
+    my ($self, $rank) = @_;
+
+    if (defined $rank) {
+	# first clear out old tags
+	$self->remove_tag('rank') if ( $self->has_tag('rank') );
+	$self->add_tag_value('rank', $rank);
+    }
+    else {
+	if ($self->has_tag('rank')) {
+	    my @ranks = $self->each_tag_value('rank');
+	    return shift @ranks;
+	}
+	else {
+	    $self->throw("Rank not defined for this Pair\n");
+	    return;
+	}
+    }
+}
+
+=head2 fxGC
+
+  Title		: fxGC
+  Usage		: my $fxGC = $sirna_pair->fxGC();
+  Purpose 	: Get/set the fraction of GC for this pair - based on TARGET sequence, not oligos.
+  Returns 	: scalar between 0-1
+  Args		: scalar between 0-1 (optional)
+
+=cut
+
+
+sub fxGC {
+    my ($self, $fxGC) = @_;
+
+    if (defined $fxGC) {
+	# is this an integer?
+	if ($fxGC =~ /[^.\d]/) {
+	    $self->throw(  -class => 'Bio::Root::BadParameter',
+			   -text  => "Fraction GC must be a number between 0, 1 - NOT <$fxGC>",
+			   -value => $fxGC
+			   );
+	}
+	if  ( $fxGC < 0 or $fxGC > 1 ) {
+	    $self->throw( -class => 'Bio::Root::BadParameter',
+			  -text  => "Fraction GC must be a number between 0, 1 - NOT <$fxGC>",
+			   -value => $fxGC
+);
+	}
+	    
+	#  clear out old tags
+	$self->remove_tag('fxGC') if ( $self->has_tag('fxGC') );
+	$self->add_tag_value('fxGC', $fxGC)
+	    or $self->throw("Unable to set fxGC");
+    }
+    else {
+	if ($self->has_tag('fxGC')) {
+	    my @fxGCs = $self->each_tag_value('fxGC');
+	    return shift @fxGCs;
+	}
+	else {
+	    $self->throw("FxGC not defined for this Pair");
+	}
+    }
+}
+
+=head2 sense
+
+  Title		: sense
+  Usage		: my $sense_oligo = $sirna_pair->sense()
+  Purpose	: Get/set the SiRNA::Oligo object corresponding to the sense strand
+  Returns 	: Bio::SeqFeature::SiRNA::Oligo object
+  Args		: Bio::SeqFeature::SiRNA::Oligo object
+
+=cut
+
+
+sub sense {
+    my ($self, $soligo) = @_;
+
+    if ($soligo) {
+	$self->_add_oligo($soligo, 1) or return;
+    }
+    else {
+	return $self->_get_oligo(1);
+    }
+}
+
+=head2 antisense
+
+  Title		: antisense
+  Usage		: my $antisense_oligo = $sirna_pair->antisense()
+  Purpose	: Get/set the SiRNA::Oligo object corresponding to the antisense strand
+  Returns 	: Bio::SeqFeature::SiRNA::Oligo object
+  Args		: Bio::SeqFeature::SiRNA::Oligo object
+
+=cut
+
+sub antisense {
+    my ($self, $asoligo) = @_;
+
+    if ($asoligo) {
+	$self->_add_oligo($asoligo, -1) or return;
+    }
+    else {
+	return $self->_get_oligo(-1);
+    }
+}
+	
+sub _add_oligo {
+    my ($self, $oligo, $strand) = @_;
+
+    unless ($oligo->isa('Bio::SeqFeature::SiRNA::Oligo')) {
+	$self->throw( -class => 'Bio::Root::BadParameter',
+		      -text  =>  "Oligos must be passed as Bio::SeqFeature::SiRNA::Oligo objects\n");	
+    }
+
+    $oligo->strand($strand);
+    return $self->add_sub_SeqFeature($oligo, 'EXPAND');
+}
+
+sub _get_oligo {
+    my ($self, $strand) = @_;
+    my $feat;
+
+    my @feats = $self->sub_SeqFeature;
+
+    foreach $feat (@feats) {
+	next unless ($feat->primary_tag eq 'SiRNA::Oligo');
+	next unless ($feat->strand == $strand);
+	return $feat;
+    }
+    return;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Similarity.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Similarity.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Similarity.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,194 @@
+# $Id: Similarity.pm,v 1.16.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::Similarity
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Similarity - A sequence feature based on similarity
+
+=head1 SYNOPSIS
+
+    # obtain a similarity feature somehow
+    print "significance: ", $sim_fea->significance(), "\n";
+    print "bit score: ", $sim_fea->bits(), "\n";
+    print "score: ", $sim_fea->score(), "\n";
+    print "fraction of identical residues: ", $sim_fea->frac_identical(), "\n";
+
+=head1 DESCRIPTION
+
+This module is basically a sequence features based on similarity, and therefore
+has support for measures assessing the similarity.
+
+Everything else is inherited from L<Bio::SeqFeature::Generic>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net or hilmar.lapp at pharma.novartis.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Similarity;
+use strict;
+
+
+use base qw(Bio::SeqFeature::Generic);
+
+sub new {
+    my ( $caller, @args) = @_;
+    my ($self) = $caller->SUPER::new(@args);
+
+    my ($primary,$evalue, $bits, $frac,$seqlen,$seqdesc) =
+	$self->_rearrange([qw(PRIMARY
+			      EXPECT
+			      BITS
+			      FRAC
+			      SEQLENGTH
+			      SEQDESC
+			      )], at args);
+
+    defined $evalue && $self->significance($evalue);
+    defined $bits   && $self->bits($bits);
+    defined $frac   && $self->frac_identical($frac);
+    defined $seqlen && $self->seqlength($seqlen);
+    defined $seqdesc && $self->seqdesc($seqdesc);
+    $primary  = 'similarity' unless defined $primary;
+    $self->primary_tag($primary) unless( defined $self->primary_tag() );
+    $self->strand(0) unless( defined $self->strand() );
+
+    return $self;
+}
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub significance {
+    return shift->_tag_value('signif', @_);
+}
+
+=head2 bits
+
+ Title   : bits
+ Usage   : $bits = $obj->bits();
+           $obj->bits($value);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub bits {
+    return shift->_tag_value('Bits', @_);
+}
+
+=head2 frac_identical
+
+ Title   : frac_identical
+ Usage   : $fracid = $obj->frac_identical();
+           $obj->frac_identical($value);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub frac_identical {
+    return shift->_tag_value('FracId', @_);
+}
+
+=head2 seqlength
+
+ Title   : seqlength
+ Usage   : $len = $obj->seqlength();
+           $obj->seqlength($len);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub seqlength {
+    return shift->_tag_value('SeqLength', @_);
+}
+
+=head2 seqdesc
+
+ Title   : seqdesc
+ Usage   : $desc = $obj->seqdesc();
+           $obj->seqdesc($desc);
+ Function: At present this method is a shorthand for 
+           $obj->annotation()->description().
+
+           Note that this is not stored in the tag system and hence will
+           not be included in the return value of gff_string().
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub seqdesc {
+    my ($self, $value) = @_;
+
+    if( defined $value ) { 
+	my $v = Bio::Annotation::SimpleValue->new();
+	$v->value($value);
+	$self->annotation->add_Annotation('description',$v);
+    }
+    my ($v) = $self->annotation()->get_Annotations('description');
+    return $v ? $v->value : undef;
+}
+
+#
+# Everything else is just inherited from SeqFeature::Generic.
+#
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SimilarityPair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SimilarityPair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/SimilarityPair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,294 @@
+# $Id: SimilarityPair.pm,v 1.30.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeature::SimilarityPair
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::SimilarityPair - Sequence feature based on the similarity
+                  of two sequences.
+
+=head1 SYNOPSIS
+
+  $sim_pair = Bio::SeqFeature::SimilarityPair->from_searchResult($blastHit);
+
+  $sim = $sim_pair->query(); # a Bio::SeqFeature::Similarity object - the query
+  $sim = $sim_pair->hit();   # dto - the hit.
+
+  # some properties for the similarity pair
+  $expect = $sim_pair->significance();
+  $score = $sim_pair->score();
+  $bitscore = $sim_pair->bits();
+
+  # this will not write the description for the sequence (only its name)
+  print $sim_pair->query()->gff_string(), "\n";
+
+=head1 DESCRIPTION
+
+Lightweight similarity search result as a pair of Similarity
+features. This class inherits off Bio::SeqFeature::FeaturePair and
+therefore implements Bio::SeqFeatureI, whereas the two features of the
+pair are descendants of Bio::SeqFeature::Generic, with better support
+for representing similarity search results in a cleaner way.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net or hilmar.lapp at pharma.novartis.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::SimilarityPair;
+use strict;
+
+use Bio::SeqFeature::Similarity;
+use Bio::Factory::ObjectFactory;
+
+use base qw(Bio::SeqFeature::FeaturePair);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $similarityPair = new Bio::SeqFeature::SimilarityPair
+                                 (-hit   => $hit,
+                                  -query => $query,
+                                  -source => 'blastp');
+ Function: Initializes a new SimilarityPair object
+ Returns : Bio::SeqFeature::SimilarityPair
+ Args    : -query => The query in a Feature pair 
+           -hit   => (formerly '-subject') the subject/hit in a Feature pair
+
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    if(! grep { lc($_) eq "-feature_factory"; } @args) {
+	# if no overriding factory is provided, provide our preferred one
+	my $fact = Bio::Factory::ObjectFactory->new(
+                                    -type => "Bio::SeqFeature::Similarity",
+				    -interface => "Bio::SeqFeatureI");
+	push(@args, '-feature_factory', $fact);
+    }
+    my $self = $class->SUPER::new(@args);
+
+    my ($primary, $hit, $query, $fea1, $source,$sbjct) =
+        $self->_rearrange([qw(PRIMARY
+                              HIT
+                              QUERY
+                              FEATURE1
+                              SOURCE
+                              SUBJECT
+                              )], at args);
+    
+    if( $sbjct ) { 
+        # undeprecated by Jason before 1.1 release 
+        # $self->deprecated("use of -subject deprecated: SimilarityPair now uses 'hit'");
+        if(! $hit) { $hit = $sbjct } 
+        else { 
+            $self->warn("-hit and -subject were specified, using -hit and ignoring -subject");
+        }
+    }
+
+    # set the query and subject feature if provided
+    $self->query( $query) if $query && ! $fea1;
+    $hit && $self->hit($hit);
+
+    # the following refer to feature1, which is guaranteed to exist
+    if( defined $primary || ! defined $self->primary_tag) { 
+        $primary = 'similarity' unless defined $primary;
+        $self->primary_tag($primary);
+    } 
+
+    $source && $self->source_tag($source);
+
+    return $self;
+}
+
+#
+# Everything else is just inherited from SeqFeature::FeaturePair.
+#
+
+=head2 query
+
+ Title   : query
+ Usage   : $query_feature = $obj->query();
+           $obj->query($query_feature);
+ Function: The query object for this similarity pair
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] Bio::SeqFeature::Similarity
+
+See L<Bio::SeqFeature::Similarity>, L<Bio::SeqFeature::FeaturePair>
+
+=cut
+
+sub query {
+    return shift->feature1(@_);
+}
+
+
+
+
+=head2 subject
+
+ Title   : subject
+ Usage   : $sbjct_feature = $obj->subject();
+           $obj->subject($sbjct_feature);
+ Function: Get/Set Subject for a SimilarityPair 
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] Bio::SeqFeature::Similarity
+ Notes   : Deprecated.  Use the method 'hit' instead
+
+=cut
+
+sub subject { 
+    my $self = shift;
+#    $self->deprecated("Method subject deprecated: use hit() instead");
+    $self->hit(@_); 
+}
+
+=head2 hit
+
+ Title   : hit
+ Usage   : $sbjct_feature = $obj->hit();
+           $obj->hit($sbjct_feature);
+ Function: Get/Set Hit for a SimilarityPair 
+ Returns : Bio::SeqFeature::Similarity
+ Args    : [optional] Bio::SeqFeature::Similarity
+
+
+=cut
+
+sub hit {
+    return shift->feature2(@_);
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $source = $obj->source_tag(); # i.e., program
+           $obj->source_tag($evalue);
+ Function: Gets the source tag (program name typically) for a feature 
+ Returns : string
+ Args    : [optional] string
+
+
+=cut
+
+sub source_tag {
+    my ($self, @args) = @_;
+
+    if(@args) {
+        $self->hit()->source_tag(@args);
+    }
+    return $self->query()->source_tag(@args);
+}
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub significance {
+    my ($self, @args) = @_;
+
+    if(@args) {
+        $self->hit()->significance(@args);
+    }
+    return $self->query()->significance(@args);
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $obj->score();
+           $obj->score($value);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub score {
+    my ($self, @args) = @_;
+
+    if(@args) {
+        $self->hit()->score(@args);
+    }
+    # Note: You might think it's only getting set on the hit object.
+    # Actually, it's getting set on both hit and query.
+
+    return $self->query()->score(@args);
+}
+
+=head2 bits
+
+ Title   : bits
+ Usage   : $bits = $obj->bits();
+           $obj->bits($value);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub bits {
+    my ($self, @args) = @_;
+
+    if(@args) {
+        $self->hit()->bits(@args);
+    }
+    return $self->query()->bits(@args);
+}
+
+#################################################################
+# aliases for backwards compatibility or convenience            #
+#################################################################
+
+*sbjct = \&subject;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/FeatureNamer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/FeatureNamer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/FeatureNamer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,239 @@
+# $Id: FeatureNamer.pm,v 1.6.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# bioperl module for Bio::SeqFeature::Tools::FeatureNamer
+#
+# Cared for by Chris Mungall <cjm at fruitfly.org>
+#
+# Copyright Chris Mungall
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Tools::FeatureNamer - generates unique persistent names for features
+
+=head1 SYNOPSIS
+
+  use Bio::SeqIO;
+  use Bio::SeqFeature::Tools::FeatureNamer;
+
+  # first fetch a genbank SeqI object
+  $seqio =
+    Bio::SeqIO->new(-file=>'AE003644.gbk',
+                    -format=>'GenBank');
+  $seq = $seqio->next_seq();
+
+  $namer = Bio::SeqFeature::Tools::FeatureNamer->new;
+  my @features = $seq->get_SeqFeatures;
+  foreach my $feature (@features) {
+    $namer->name_feature($feature) unless $feature->display_name;
+  }  
+
+=head1 DESCRIPTION
+
+This is a helper class for providing names for SeqFeatures
+
+The L<Bio::SeqFeatureI> class provides a display_name
+method. Typically the display_name is not set when parsing formats
+such as genbank - instead properties such as B<label>, B<product> or
+B<gene> are set in a somewhat inconsistent manner.
+
+In addition, when generating subfeatures (for example, exons that are
+subfeatures of a transcript feature), it is often desirable to name
+these subfeatures before either exporting to another format or
+reporting to the user.
+
+This module is intended to help given uniform display_names to
+features and their subfeatures.
+
+=head1 TODO
+
+Currently the naming policy is hardcoded. It may be desirable to allow
+plugging in variations on naming policies; this could be done either
+by subclassing, anonymous subroutines (closures) or
+parameterization. Contact the author if you feel you have need for a
+different naming policy
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Mungall
+
+Email:  cjm AT fruitfly DOT org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Tools::FeatureNamer;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $unflattener = Bio::SeqFeature::Tools::FeatureNamer->new();
+ Function: constructor
+ Example : 
+ Returns : a new Bio::SeqFeature::Tools::FeatureNamer
+ Args    : see below
+
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+#    my($typemap) =
+#	$self->_rearrange([qw(TYPEMAP
+#			     )],
+#                          @args);#
+
+#    $typemap  && $self->typemap($typemap);
+    return $self; # success - we hope!
+}
+
+=head2 name_feature
+
+ Title   : name_feature
+ Usage   : $namer->name_feature($sf);
+ Function: sets display_name
+ Example :
+ Returns : 
+ Args    : L<Bio::SeqFeatureI>
+
+This method calls generate_feature_name() and uses the returned value
+to set the display_name of the feature
+
+=cut
+
+sub name_feature {
+    my ($self, $sf) = @_;
+    my $name = $self->generate_feature_name($sf);
+    $sf->display_name($name);
+}
+
+=head2 name_contained_features
+
+ Title   : name_contained_features
+ Usage   : $namer->name_contained_features($sf);
+ Function: sets display_name for all features contained by sf
+ Example :
+ Returns : 
+ Args    : L<Bio::SeqFeatureI>
+
+iterates through all subfeatures of a certain feature (using
+get_all_SeqFeatures) and names each subfeatures, based on the
+generated name for the holder feature
+
+A subfeature is named by concatenating the generated name of the
+container feature with the type and a number.
+
+For example, if the containing feature is a gene with display name
+B<dpp>, subfeatures will be named dpp-mRNA-1 dpp-mRNA2 dpp-exon1
+dpp-exon2 etc
+
+=cut
+
+sub name_contained_features{
+   my ($self,$sf) = @_;
+   my $cname = $self->generate_feature_name($sf);
+   my @subsfs = $sf->get_all_SeqFeatures;
+   my %num_by_type = ();
+   foreach my $ssf (@subsfs) {
+       my $type = $ssf->primary_tag;
+       my $num = $num_by_type{$type} || 0;
+       $num++;
+       $num_by_type{$type} = $num;
+       $ssf->display_name("$cname-$type-$num");
+   }
+   return;
+}
+
+=head2 generate_feature_name
+
+ Title   : generate_feature_name
+ Usage   : $name = $namer->generate_feature_name($sf);
+ Function: derives a sensible human readable name for a $sf
+ Example :
+ Returns : str
+ Args    : L<Bio::SeqFeatureI>
+
+returns a generated name (but does not actually set display_name).
+
+If display_name is already set, the method will return this
+
+Otherwise, the name will depend on the property:
+
+=over
+
+=item label
+
+=item product
+
+=item gene
+
+=item locus_tag
+
+=back
+
+(in order of priority)
+
+=cut
+
+sub generate_feature_name {
+    my ($self, $sf) = @_;
+
+    my $name = $sf->display_name;
+    if (!$name) {
+	if ($sf->has_tag("label")) {
+	    ($name) = $sf->get_tag_values("label");
+	}
+	elsif ($sf->has_tag("product")) {
+	    ($name) = $sf->get_tag_values("product");
+	}
+	elsif ($sf->primary_tag eq 'gene' &&
+	       $sf->has_tag("gene")) {
+	    ($name) = $sf->get_tag_values("gene");
+	}
+	elsif ($sf->primary_tag eq 'gene' &&
+	       $sf->has_tag("locus_tag")) {
+	    ($name) = $sf->get_tag_values("locus_tag");
+	}
+	else {
+	    $name =  $sf->display_name;
+	}
+    }
+    return $name;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/IDHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/IDHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/IDHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,263 @@
+# $Id: IDHandler.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# bioperl module for Bio::SeqFeature::Tools::IDHandler
+#
+# Cared for by Chris Mungall <cjm at fruitfly.org>
+#
+# Copyright Chris Mungall
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Tools::IDHandler - maps $seq_feature-E<gt>primary_tag
+
+=head1 SYNOPSIS
+
+  use Bio::SeqIO;
+  use Bio::SeqFeature::Tools::IDHandler;
+
+
+=head1 DESCRIPTION
+
+Class to map $seq_feature-E<gt>primary_tag
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Mungall
+
+Email:  cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Tools::IDHandler;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $unflattener = Bio::SeqFeature::Tools::IDHandler->new();
+ Function: constructor
+ Example : 
+ Returns : a new Bio::SeqFeature::Tools::IDHandler
+ Args    : see below
+
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($generate_id_sub) =
+	$self->_rearrange([qw(GENERATE_ID_SUB
+			     )],
+                          @args);
+
+    return $self; # success - we hope!
+}
+
+=head2 set_ParentIDs_from_hierarchy()
+
+ Title   : set_ParentIDs_from_hierarchy()
+ Usage   : $idhandler->set_ParentIDs_from_hierarchy($fholder)
+ Function: populates tags Parent and ID via holder hierarchy
+ Example :
+ Returns : 
+ Args    : Bio::featureHolderI (either a SeqFeature or a Seq)
+
+This is mainly for GFF3 export
+
+GFF3 uses the tags ID and Parent to represent the feature containment
+hierarchy; it does NOT use the feature holder tree
+
+This method sets Parent (and ID for any parents not set) based on
+feature holder/containement hierarchy, ready for GFF3 output
+
+=cut
+
+# method author: cjm at fruitfly.org
+sub set_ParentIDs_from_hierarchy(){
+   my $self = shift;
+   my ($featholder) = @_;
+
+   # we will traverse the tree of contained seqfeatures
+   # (a seqfeature is itself a holder)
+
+   # start with the top-level features
+   my @sfs = $featholder->get_SeqFeatures;
+
+   # clear existing parent tags
+   # (we assume this is the desired behaviour)
+   my @all_sfs = $featholder->get_all_SeqFeatures;
+   foreach (@all_sfs) {
+       if ($_->has_tag('Parent')) {
+           $_->remove_tag('Parent');
+       }
+   }
+   
+
+   # iterate until entire tree traversed
+   while (@sfs) {
+       my $sf = shift @sfs;
+       my @subsfs = $sf->get_SeqFeatures;
+
+       # see if the ID tag 
+       my $id = $sf->primary_id;
+       if (!$id) {
+           # the skolem function feature(seq,start,end,type)
+           # is presumed to uniquely identify this feature, and
+           # to also be persistent
+           $id = $sf->generate_unique_persistent_id;
+       }
+       foreach my $subsf (@subsfs) {
+           $subsf->add_tag_value('Parent', $id);
+       }
+       
+       # push children on to end of stack (breadth first search)
+       push(@sfs, @subsfs);
+   }
+   return;
+}
+
+=head2 create_hierarchy_from_ParentIDs
+
+ Title   : create_hierarchy_from_ParentIDs
+ Usage   : $idhandler->set_ParentIDs_from_hierarchy($fholder)
+ Function: inverse of set_ParentIDs_from_hierarchy
+ Example :
+ Returns : list of top SeqFeatures
+ Args    :
+
+
+=cut
+
+sub create_hierarchy_from_ParentIDs{
+   my ($self,$featholder, at args) = @_;
+
+   my @sfs = $featholder->get_all_SeqFeatures;
+   my %sf_by_ID = ();
+   foreach (@sfs) {
+       my $id = $_->primary_id;
+       next unless $id;
+       if ($sf_by_ID{$id}) {
+           $featholder->throw("DUPLICATE ID: $id");
+       }
+       $sf_by_ID{$id} = $_;
+       $_->remove_SeqFeatures; # clear existing hierarchy (assume this is desired)
+   }
+   if (!%sf_by_ID) {
+       # warn??
+       # this is actually expected behaviour for some kinds of data;
+       # eg lists of STSs - no containment hierarchy
+       return;
+   }
+
+   my @topsfs = 
+     grep {
+         my @parents = $_->get_tagset_values('Parent');
+         foreach my $parent (@parents) {
+             $sf_by_ID{$parent}->add_SeqFeature($_)
+		 if exists $sf_by_ID{$parent};
+         }
+         !@parents;
+     } @sfs;
+   $featholder->remove_SeqFeatures;
+   $featholder->add_SeqFeature($_) foreach @topsfs;
+   return @topsfs;
+}
+
+
+=head2 generate_unique_persistent_id
+
+ Title   : generate_unique_persistent_id
+ Usage   :
+ Function: generates a unique and persistent identifier for this
+ Example :
+ Returns : value of primary_id (a scalar)
+ Args    :
+
+Will generate an ID, B<and> set primary_id() (see above)
+
+The ID is a string generated from 
+
+  seq_id
+  primary_tag
+  start
+  end
+
+There are three underlying assumptions: that all the above accessors
+are set; that seq_id is a persistent and unique identifier for the
+sequence containing this feature; and that 
+
+  (seq_id, primary_tag, start, end) 
+
+is a "unique constraint" over features
+
+The ID is persistent, so long as none of these values change - if they
+do, it is considered a seperate entity
+
+=cut
+
+# method author: cjm at fruitfly.org
+sub generate_unique_persistent_id{
+   my ($self,$sf, at args) = @_;
+
+   my $id;
+   if (!$sf->isa("Bio::SeqFeatureI")) {
+       $sf->throw("not a Bio::SeqFeatureI");
+   }
+   my $seq_id = $sf->seq_id || $sf->throw("seq_id must be set");
+   #my $seq_id = $sf->seq_id || 'unknown_seq';
+   if ($sf->has_tag('transcript_id')) {
+       ($id) = $sf->get_tag_values('transcript_id');
+   }
+   elsif ($sf->has_tag('protein_id')) {
+       ($id) = $sf->get_tag_values('protein_id');
+   }
+   else {
+       my $source = $sf->source_tag || $sf->throw("source tag must be set");
+       #my $source = $sf->source_tag || 'unknown_source';
+       my $start = $sf->start || $sf->throw("start must be set");
+       my $end = $sf->end || $sf->throw("end must be set");
+       my $type = $sf->primary_tag || $sf->throw("primary_tag must be set");
+
+       $id = "$source:$type:$seq_id:$start:$end";
+   }
+   $sf->primary_id($id);
+   return $id;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/TypeMapper.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/TypeMapper.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/TypeMapper.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,352 @@
+# $Id: TypeMapper.pm,v 1.14.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# bioperl module for Bio::SeqFeature::Tools::TypeMapper
+#
+# Cared for by Chris Mungall <cjm at fruitfly.org>
+#
+# Copyright Chris Mungall
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Tools::TypeMapper - maps $seq_feature-E<gt>primary_tag
+
+=head1 SYNOPSIS
+
+  use Bio::SeqIO;
+  use Bio::SeqFeature::Tools::TypeMapper;
+
+  # first fetch a genbank SeqI object
+  $seqio =
+    Bio::SeqIO->new(-file=>'AE003644.gbk',
+                    -format=>'GenBank');
+  $seq = $seqio->next_seq();
+
+  $tm = Bio::SeqFeature::Tools::TypeMapper->new;
+
+  # map all the types in the sequence
+  $tm->map_types(-seq=>$seq,
+		 {CDS=>'ORF',
+		  variation=>sub {
+		      my $f = shift;
+		      $f->length > 1 ?
+			'variation' : 'SNP'
+		  },
+		 });
+
+   # alternatively, use the hardcoded SO mapping
+   $tm->map_types_to_SO(-seq=>$seq);
+
+=head1 DESCRIPTION
+
+This class implements an object for mapping between types; for
+example, the types in a genbank feature table, and the types specified
+in the Sequence Ontology.
+
+You can specify your own mapping, either as a simple hash index, or by
+providing your own subroutines.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Mungall
+
+Email:  cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Tools::TypeMapper;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $unflattener = Bio::SeqFeature::Tools::TypeMapper->new();
+ Function: constructor
+ Example : 
+ Returns : a new Bio::SeqFeature::Tools::TypeMapper
+ Args    : see below
+
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($typemap) =
+	$self->_rearrange([qw(TYPEMAP
+			     )],
+                          @args);
+
+    $typemap  && $self->typemap($typemap);
+    return $self; # success - we hope!
+}
+
+=head2 typemap
+
+ Title   : typemap
+ Usage   : $obj->typemap($newval)
+ Function: 
+ Example : 
+ Returns : value of typemap (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub typemap{
+    my $self = shift;
+
+    return $self->{'typemap'} = shift if @_;
+    return $self->{'typemap'};
+}
+
+=head2 map_types
+
+ Title   : map_types
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub map_types{
+   my ($self, at args) = @_;
+
+   my($sf, $seq, $type_map) =
+     $self->_rearrange([qw(FEATURE
+                           SEQ
+			   TYPE_MAP
+                          )],
+                          @args);
+   if (!$sf && !$seq) {
+       $self->throw("you need to pass in either -feature or -seq");
+   }
+
+   my @sfs = ($sf);
+   if ($seq) {
+       $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
+       @sfs = $seq->get_all_SeqFeatures;
+   }
+   $type_map = $type_map || $self->type_map;
+   foreach my $sf (@sfs) {
+
+       $sf->isa("Bio::SeqFeatureI") || $self->throw("$sf NOT A SeqFeatureI");
+       $sf->isa("Bio::FeatureHolderI") || $self->throw("$sf NOT A FeatureHolderI");
+
+       my $type = $sf->primary_tag;
+       my $mtype = $type_map->{$type};
+       if ($mtype) {
+	   if (ref($mtype)) {
+	       if (ref($mtype) eq 'CODE') {
+		   $mtype = $mtype->($sf);
+	       }
+	       else {
+		   $self->throw('must be scalar or CODE ref');
+	       }
+	   }
+	   $sf->primary_tag($mtype);
+       }
+   }
+   return;
+}
+
+=head2 map_types_to_SO
+
+ Title   : map_types_to_SO
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+hardcodes the genbank to SO mapping
+
+Based on revision 1.22 of SO
+
+Please see the actual code for the mappings
+
+Taken from
+
+L<http://song.sourceforge.net/FT_SO_map.txt>
+
+=cut
+
+sub map_types_to_SO{
+   my ($self, at args) = @_;
+
+   # note : some of the FT_SO mappings are commented out and overriden...
+   push(@args,
+	(-type_map=>{
+
+                     "FT term" => "SO term",
+                     "-" => "located_sequence_feature",
+                     "-10_signal" => "minus_10_signal",
+                     "-35_signal" => "minus_35_signal",
+                     "3'UTR" => "three_prime_UTR",
+                     "3'clip" => "three_prime_clip",
+                     "5'UTR" => "five_prime_UTR",
+                     "5'clip" => "five_prime_clip",
+                     "CAAT_signal" => "CAAT_signal",
+                     "CDS" => "CDS",
+                     "C_region" => "undefined",
+                     "D-loop" => "D_loop",
+                     "D_segment" => "D_gene",
+                     "GC_signal" => "GC_rich_region",
+                     "J_segment" => "undefined",
+                     "LTR" => "long_terminal_repeat",
+                     "N_region" => "undefined",
+                     "RBS" => "ribosome_entry_site",
+                     "STS" => "STS",
+                     "S_region" => "undefined",
+                     "TATA_signal" => "TATA_box",
+                     "V_region" => "undefined",
+                     "V_segment" => "undefined",
+                     "attenuator" => "attenuator",
+                     "conflict" => "undefined",
+                     "enhancer" => "enhancer",
+                     "exon" => "exon",
+                     "gap" => "gap",
+                     "gene" => "gene",
+                     "iDNA" => "iDNA",
+                     "intron" => "intron",
+                     "mRNA" => "mRNA",
+                     "mat_peptide" => "mature_peptide",
+#                     "misc_RNA" => "transcript",
+                     "misc_binding" => "binding_site",
+                     "misc_difference" => "sequence_difference",
+                     "misc_feature" => "region",
+                     "misc_recomb" => "recombination_feature",
+                     "misc_signal" => "regulatory_region",
+                     "misc_structure" => "sequence_secondary_structure",
+                     "modified_base" => "modified_base_site",
+                     "old_sequence" => "undefined",
+                     "operon" => "operon",
+                     "oriT" => "origin_of_transfer",
+                     "polyA_signal" => "polyA_signal_sequence",
+                     "polyA_site" => "polyA_site",
+                     "precursor_RNA" => "primary_transcript",
+                     "prim_transcript" => "primary_transcript",
+                     "primer_bind" => "primer_binding_site",
+                     "promoter" => "promoter",
+                     "protein_bind" => "protein_binding_site",
+                     "rRNA" => "rRNA",
+                     "repeat_region" => "repeat_region",
+                     "repeat_unit" => "repeat_unit",
+                     "satellite" => "satellite_DNA",
+                     "scRNA" => "scRNA",
+                     "sig_peptide" => "signal_peptide",
+                     "snRNA" => "snRNA",
+                     "snoRNA" => "snoRNA",
+#                     "source" => "databank_entry",
+                     "stem_loop" => "stem_loop",
+                     "tRNA" => "tRNA",
+                     "terminator" => "terminator",
+                     "transit_peptide" => "transit_peptide",
+                     "unsure" => "undefined",
+                     "variation" => "sequence_variant",
+
+		     # this is the most generic form for RNAs;
+		     # we always represent the processed form of
+		     # the transcript
+		     misc_RNA=>'processed_transcript',
+
+		     # not sure about this one...
+		     source=>'contig',
+
+		     rep_origin=>'origin_of_replication',
+
+
+                     Protein=>'protein',
+		     
+		    }));
+   return $self->map_types(@args);
+
+}
+
+=head2 get_relationship_type_by_parent_child
+
+ Title   : get_relationship_type_by_parent_child
+ Usage   : $type = $tm->get_relationship_type_by_parent_child($parent_sf, $child_sf);
+ Usage   : $type = $tm->get_relationship_type_by_parent_child('mRNA', 'protein');
+ Function: given two features where the parent contains the child,
+           will determine what the relationship between them in
+ Example :
+ Returns : 
+ Args    : parent SeqFeature, child SeqFeature OR
+           parent type string, child type string OR
+
+bioperl Seq::FeatureHolderI hierarchies are equivalent to unlabeled
+graphs (where parent nodes are the containers, and child nodes are the
+features being contained). For example, a feature of type mRNA can
+contain features of type exon.
+
+Some external representations (eg chadoxml or chaosxml) require that
+the edges in the feature relationship graph are labeled. For example,
+the type between mRNA and exon would be B<part_of>. Although it
+stretches the bioperl notion of containment, we could have a CDS
+contained by an mRNA (for example, the
+L<Bio::SeqFeature::Tools::Unflattener> module takes genbank records
+and makes these kind of links. The relationship here would be
+B<produced_by>
+
+In chado speak, the child is the B<subject> feature and the parent is
+the B<object> feature
+
+=cut
+
+sub get_relationship_type_by_parent_child {
+   my ($self,$parent,$child) = @_;
+   $parent = ref($parent) ? $parent->primary_tag : $parent;
+   $child = ref($child) ? $child->primary_tag : $child;
+
+   my $type = 'part_of'; # default
+
+   # TODO - do this with metadata, or infer via SO itself
+
+   if (lc($child) eq 'protein') {
+       $type = 'derives_from';
+   }
+   if (lc($child) eq 'polypeptide') {
+       $type = 'derives_from';
+   }
+   return $type;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/Unflattener.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/Unflattener.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/Tools/Unflattener.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2798 @@
+# $Id: Unflattener.pm,v 1.37.4.2 2006/10/02 23:10:28 sendu Exp $
+#
+# bioperl module for Bio::SeqFeature::Tools::Unflattener
+#
+# Cared for by Chris Mungall <cjm at fruitfly.org>
+#
+# Copyright Chris Mungall
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Tools::Unflattener - turns flat list of genbank-sourced features into a nested SeqFeatureI hierarchy
+
+=head1 SYNOPSIS
+
+  # standard / generic use - unflatten a genbank record
+  use Bio::SeqIO;
+  use Bio::SeqFeature::Tools::Unflattener;
+
+  # generate an Unflattener object
+  $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+
+  # first fetch a genbank SeqI object
+  $seqio =
+    Bio::SeqIO->new(-file=>'AE003644.gbk',
+                    -format=>'GenBank');
+  my $out =
+    Bio::SeqIO->new(-format=>'asciitree');
+  while ($seq = $seqio->next_seq()) {
+
+    # get top level unflattended SeqFeatureI objects
+    $unflattener->unflatten_seq(-seq=>$seq,
+                                -use_magic=>1);
+    $out->write_seq($seq);
+
+    @top_sfs = $seq->get_SeqFeatures;
+    foreach my $sf (@top_sfs) {
+	# do something with top-level features (eg genes)
+    }
+  }
+
+
+=head1 DESCRIPTION
+
+Most GenBank entries for annotated genomic DNA contain a B<flat> list
+of features. These features can be parsed into an equivalent flat list
+of L<Bio::SeqFeatureI> objects using the standard L<Bio::SeqIO>
+classes. However, it is often desirable to B<unflatten> this list into
+something resembling actual B<gene models>, in which genes, mRNAs and CDSs
+are B<nested> according to the nature of the gene model.
+
+The BioPerl object model allows us to store these kind of associations
+between SeqFeatures in B<containment hierarchies> -- any SeqFeatureI
+object can contain nested SeqFeatureI objects. The
+Bio::SeqFeature::Tools::Unflattener object facilitates construction of
+these hierarchies from the underlying GenBank flat-feature-list
+representation.
+
+For example, if you were to look at a typical GenBank DNA entry, say,
+B<AE003644>, you would see a flat list of features:
+
+  source
+
+  gene CG4491
+  mRNA CG4491-RA
+  CDS CG4491-PA
+
+  gene tRNA-Pro
+  tRNA tRNA-Pro
+
+  gene CG32954
+  mRNA CG32954-RA
+  mRNA CG32954-RC
+  mRNA CG32954-RB
+  CDS CG32954-PA
+  CDS CG32954-PB
+  CDS CG32954-PC
+
+These features have sequence locations, but it is not immediately
+clear how to write code such that each mRNA is linked to the
+appropriate CDS (other than relying on IDs which is very bad)
+
+We would like to convert the above list into the B<containment
+hierarchy>, shown below:
+
+  source
+  gene
+    mRNA CG4491-RA
+      CDS CG4491-PA
+      exon
+      exon
+  gene
+    tRNA tRNA-Pro
+      exon
+  gene
+    mRNA CG32954-RA
+      CDS CG32954-PA
+      exon
+      exon
+    mRNA CG32954-RC
+      CDS CG32954-PC
+      exon
+      exon
+    mRNA CG32954-RB
+      CDS CG32954-PB
+      exon
+      exon
+
+Where each feature is nested underneath its container. Note that exons
+have been automatically inferred (even for tRNA genes).
+
+We do this using a call on a L<Bio::SeqFeature::Tools::Unflattener>
+object
+
+  @sfs = $unflattener->unflatten_seq(-seq=>$seq);
+
+This would return a list of the B<top level> (i.e. container)
+SeqFeatureI objects - in this case, genes. Other top level features
+are possible; for instance, the B<source> feature which is always
+present, and other features such as B<variation> or B<misc_feature>
+types.
+
+The containment hierarchy can be accessed using the get_SeqFeature()
+call on any feature object - see L<Bio::SeqFeature::FeatureHolderI>.
+The following code will traverse the containment hierarchy for a
+feature:
+
+  sub traverse {
+    $sf = shift;   #  $sf isa Bio::SeqfeatureI
+
+    # ...do something with $sf!
+
+    # depth first traversal of containment tree
+    @contained_sfs = $sf->get_SeqFeatures;
+    traverse($_) foreach @contained_sfs;
+  }
+
+Once you have built the hierarchy, you can do neat stuff like turn the
+features into 'rich' feature objects (eg
+L<Bio::SeqFeature::Gene::GeneStructure>) or convert to a suitable
+format such as GFF3 or chadoxml (after mapping to the Sequence
+Ontology); this step is not described here.
+
+=head1 USING MAGIC
+
+Due to the quixotic nature of how features are stored in
+GenBank/EMBL/DDBJ, there is no guarantee that the default behaviour of
+this module will produce perfect results. Sometimes it is hard or
+impossible to build a correct containment hierarchy if the information
+provided is simply too lossy, as is often the case. If you care deeply
+about your data, you should always manually inspect the resulting
+containment hierarchy; you may have to customise the algorithm for
+building the hierarchy, or even manually tweak the resulting
+hierarchy. This is explained in more detail further on in the document.
+
+However, if you are satisfied with the default behaviour, then you do
+not need to read any further. Just make sure you set the parameter
+B<use_magic> - this will invoke incantations which will magically
+produce good results no matter what the idiosyncracies of the
+particular GenBank record in question.
+
+For example
+
+  $unflattener->unflatten_seq(-seq=>$seq,
+                              -use_magic=>1);
+
+The success of this depends on the phase of the moon at the time the
+entry was submitted to GenBank. Note that the magical recipe is being
+constantly improved, so the results of invoking magic may vary
+depending on the bioperl release.
+
+If you are skeptical of magic, or you wish to exact fine grained
+control over how the entry is unflattened, or you simply wish to
+understand more about how this crazy stuff works, then read on!
+
+=head1 PROBLEMATIC DATA AND INCONSISTENCIES
+
+Occasionally the Unflattener will have problems with certain
+records. For example, the record may contain inconsistent data - maybe
+there is an B<exon> entry that has no corresponding B<mRNA> location. 
+
+The default behaviour is to throw an exception reporting the problem,
+if the problem is relatively serious - for example, inconsistent data.
+
+You can exert more fine grained control over this - perhaps you want
+the Unflattener to do the best it can, and report any problems. This
+can be done - refer to the methods.
+
+  error_threshold()
+
+  get_problems()
+
+  report_problems()
+
+  ignore_problems()
+
+=head1 ALGORITHM
+
+This is the default algorithm; you should be able to override any part
+of it to customise.
+
+The core of the algorithm is in two parts
+
+=over
+
+=item Partitioning the flat feature list into groups
+
+=item Resolving the feature containment hierarchy for each group
+
+=back
+
+There are other optional steps after the completion of these two
+steps, such as B<inferring exons>; we now describe in more detail what
+is going on.
+
+=head2 Partitioning into groups
+
+First of all the flat feature list is partitioned into B<group>s.
+
+The default way of doing this is to use the B<gene> attribute; if we
+look at two features from GenBank accession AE003644.3:
+
+     gene            20111..23268
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /note="last curated on Thu Dec 13 16:51:32 PST 2001"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0005771"
+     mRNA            join(20111..20584,20887..23268)
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /product="CG4491-RA"
+                     /db_xref="FLYBASE:FBgn0005771"
+
+Both these features share the same /gene tag which is "noc", so they
+correspond to the same gene model (the CDS feature is not shown, but
+this also has a tag-value /gene="noc").
+
+Not all groups need to correspond to gene models, but this is the most
+common use case; later on we shall describe how to customise the
+grouping.
+
+Sometimes other tags have to be used; for instance, if you look at the
+entire record for AE003644.3 you will see you actually need the use the
+/locus_tag attribute. This attribute is actually B<not present> in
+most records!
+
+You can override this:
+
+  $collection->unflatten_seq(-seq=>$seq, -group_tag=>'locus_tag');
+
+Alternatively, if you B<-use_magic>, the object will try and make a
+guess as to what the correct group_tag should be.
+
+At the end of this step, we should have a list of groups - there is no
+structure within a group; the group just serves to partition the flat
+features. For the example data above, we would have the following groups.
+
+  [ source ]
+  [ gene mRNA CDS ]
+  [ gene mRNA CDS ]
+  [ gene mRNA CDS ]
+  [ gene mRNA mRNA mRNA CDS CDS CDS ]
+
+=head3 Multicopy Genes
+
+Multicopy genes are usually rRNAs or tRNAs that are duplicated across
+the genome. Because they are functionally equivalent, and usually have
+the same sequence, they usually have the same group_tag (ie gene
+symbol); they often have a /note tag giving copy number. This means
+they will end up in the same group. This is undesirable, because they
+are spatially disconnected.
+
+There is another step, which involves splitting spatially disconnected
+groups into distinct groups
+
+this would turn this
+
+ [gene-rrn3 rRNA-rrn3 gene-rrn3 rRNA-rrn3]
+
+into this
+
+ [gene-rrn3 rRNA-rrn3] [gene-rrn3 rRNA-rrn3]
+
+based on the coordinates
+
+=head3 What next?
+
+The next step is to add some structure to each group, by making
+B<containment hierarchies>, trees that represent how the features
+interrelate
+
+=head2 Resolving the containment hierarchy
+
+After the grouping is done, we end up with a list of groups which
+probably contain features of type 'gene', 'mRNA', 'CDS' and so on.
+
+Singleton groups (eg the 'source' feature) are ignored at this stage.
+
+Each group is itself flat; we need to add an extra level of
+organisation. Usually this is because different spliceforms
+(represented by the 'mRNA' feature) can give rise to different
+protein products (indicated by the 'CDS' feature). We want to correctly
+associate mRNAs to CDSs.
+
+We want to go from a group like this:
+
+  [ gene mRNA mRNA mRNA CDS CDS CDS ]
+
+to a containment hierarchy like this:
+
+  gene
+    mRNA
+      CDS
+    mRNA
+      CDS
+    mRNA
+      CDS
+
+In which each CDS is nested underneath the correct corresponding mRNA.
+
+For entries that contain no alternate splicing, this is simple; we
+know that the group
+
+  [ gene mRNA CDS ]
+
+Must resolve to the tree
+
+  gene
+    mRNA
+      CDS
+
+How can we do this in entries with alternate splicing? The bad
+news is that there is no guaranteed way of doing this correctly for
+any GenBank entry. Occasionally the submission will have been done in
+such a way as to reconstruct the containment hierarchy. However, this
+is not consistent across databank entries, so no generic solution can
+be provided by this object. This module does provide the framework
+within which you can customise a solution for the particular dataset
+you are interested in - see later.
+
+The good news is that there is an inference we can do that should
+produce pretty good results the vast majority of the time. It uses
+splice coordinate data - this is the default behaviour of this module,
+and is described in detail below.
+
+=head2 Using splice site coordinates to infer containment
+
+If an mRNA is to be the container for a CDS, then the splice site
+coordinates (or intron coordinates, depending on how you look at it)
+of the CDS must fit inside the splice site coordinates of the mRNA.
+
+Ambiguities can still arise, but the results produced should still be
+reasonable and consistent at the sequence level. Look at this fake
+example:
+
+  mRNA    XXX---XX--XXXXXX--XXXX         join(1..3,7..8,11..16,19..23)
+  mRNA    XXX-------XXXXXX--XXXX         join(1..3,11..16,19..23)
+  CDS                 XXXX--XX           join(13..16,19..20)
+  CDS                 XXXX--XX           join(13..16,19..20)
+
+[obviously the positions have been scaled down]
+
+We cannot unambiguously match mRNA with CDS based on splice sites,
+since both CDS share the splice site locations 16^17 and
+18^19. However, the consequences of making a wrong match are probably
+not very severe. Any annotation data attached to the first CDS is
+probably identical to the seconds CDS, other than identifiers.
+
+The default behaviour of this module is to make an arbitrary call
+where it is ambiguous (the mapping will always be bijective; i.e. one
+mRNA -E<gt> one CDS).
+
+[TODO: NOTE: not tested on EMBL data, which may not be bijective; ie two
+mRNAs can share the same CDS??]
+
+This completes the building of the containment hierarchy; other
+optional step follow
+
+=head1 POST-GROUPING STEPS
+
+=head2 Inferring exons from mRNAs
+
+This step always occurs if B<-use_magic> is invoked.
+
+In a typical GenBank entry, the exons are B<implicit>. That is they
+can be inferred from the mRNA location.
+
+For example:
+
+     mRNA            join(20111..20584,20887..23268)
+
+This tells us that this particular transcript has two exons. In
+bioperl, the mRNA feature will have a 'split location'.
+
+If we call
+
+  $unflattener->feature_from_splitloc(-seq=>$seq);
+
+This will generate the necessary exon features, and nest them under
+the appropriate mRNAs. Note that the mRNAs will no longer have split
+locations - they will have simple locations spanning the extent of the
+exons. This is intentional, to avoid redundancy.
+
+Occasionally a GenBank entry will have both implicit exons (from the
+mRNA location) B<and> explicit exon features.
+
+In this case, exons will still be transferred. Tag-value data from the
+explicit exon will be transfered to the implicit exon. If exons are
+shared between mRNAs these will be represented by different
+objects. Any inconsistencies between implicit and explicit will be
+reported.
+
+=head3 tRNAs and other noncoding RNAs
+
+exons will also be generated from these features
+
+=head2 Inferring mRNAs from CDS
+
+Some GenBank entries represent gene models using features of type
+gene, mRNA and CDS; some entries just use gene and CDS.
+
+If we only have gene and CDS, then the containment hierarchies will
+look like this:
+
+  gene
+    CDS
+
+If we want the containment hierarchies to be uniform, like this
+
+  gene
+    mRNA
+      CDS
+
+Then we must create an mRNA feature. This will have identical
+coordinates to the CDS. The assumption is that there is either no
+untranslated region, or it is unknown.
+
+To do this, we can call
+
+   $unflattener->infer_mRNA_from_CDS(-seq=>$seq);
+
+This is taken care of automatically, if B<-use_magic> is invoked.
+
+=head1 ADVANCED
+
+=head2 Customising the grouping of features
+
+The default behaviour is suited mostly to building models of protein
+coding genes and noncoding genes from genbank genomic DNA submissions.
+
+You can change the tag used to partition the feature by passing in a
+different group_tag argument - see the unflatten_seq() method
+
+Other behaviour may be desirable. For example, even though SNPs
+(features of type 'variation' in GenBank) are not actually part of the
+gene model, it may be desirable to group SNPs that overlap or are
+nearby gene models.
+
+It should certainly be possible to extend this module to do
+this. However, I have yet to code this part!!! If anyone would find
+this useful let me know.
+
+In the meantime, you could write your own grouping subroutine, and
+feed the results into unflatten_groups() [see the method documentation
+below]
+
+=head2 Customising the resolution of the containment hierarchy
+
+Once the flat list of features has been partitioned into groups, the
+method unflatten_group() is called on each group to build a tree.
+
+The algorithm for doing this is described above; ambiguities are
+resolved by using splice coordinates. As discussed, this can be
+ambiguous.
+
+Some submissions may contain information in tags/attributes that hint
+as to the mapping that needs to be made between the features.
+
+For example, with the Drosophila Melanogaster release 3 submission, we
+see that CDS features in alternately spliced mRNAs have a form like
+this:
+
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RA"
+                                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+                     /codon_start=1
+                     /product="CG32954-PA"
+                     /protein_id="AAF53403.1"
+                     /db_xref="GI:7298167"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP..."
+
+Here the /note tag provides the clue we need to link CDS to mRNA
+(highlighted with ^^^^). We just need to find the mRNA with the tag
+
+  /product="CG32954-RA"
+
+I have no idea how consistent this practice is across submissions; it
+is consistent for the fruitfly genome submission.
+
+We can customise the behaviour of unflatten_group() by providing our
+own resolver method. This obviously requires a bit of extra
+programming, but there is no way to get around this.
+
+Here is an example of how to pass in your own resolver; this example
+basically checks the parent (container) /product tag to see if it
+matches the required string in the child (contained) /note tag.
+
+       $unflattener->unflatten_seq(-seq=>$seq,
+                                 -group_tag=>'locus_tag',
+                                 -resolver_method=>sub {
+                                     my $self = shift;
+                                     my ($sf, @candidate_container_sfs) = @_;
+                                     if ($sf->has_tag('note')) {
+                                         my @notes = $sf->get_tag_values('note');
+                                         my @trnames = map {/from transcript\s+(.*)/;
+                                                            $1} @notes;
+                                         @trnames = grep {$_} @trnames;
+                                         my $trname;
+                                         if (@trnames == 0) {
+                                             $self->throw("UNRESOLVABLE");
+                                         }
+                                         elsif (@trnames == 1) {
+                                             $trname = $trnames[0];
+                                         }
+                                         else {
+                                             $self->throw("AMBIGUOUS: @trnames");
+                                         }
+                                         my @container_sfs =
+                                           grep {
+                                               my ($product) =
+                                                 $_->has_tag('product') ?
+                                                   $_->get_tag_values('product') :
+                                                     ('');
+                                               $product eq $trname;
+                                           } @candidate_container_sfs;
+                                         if (@container_sfs == 0) {
+                                             $self->throw("UNRESOLVABLE");
+                                         }
+                                         elsif (@container_sfs == 1) {
+                                             # we got it!
+                                             return $container_sfs[0];
+                                         }
+                                         else {
+                                             $self->throw("AMBIGUOUS");
+                                         }
+                                     }
+                                 });
+
+the resolver method is only called when there is more than one spliceform.
+
+=head2 Parsing mRNA records
+
+Some of the entries in sequence databanks are for mRNA sequences as
+well as genomic DNA. We may want to build models from these too.
+
+NOT YET DONE - IN PROGRESS!!!
+
+Open question - what would these look like?
+
+Ideally we would like a way of combining a mRNA record with the
+corresponding SeFeature entry from the appropriate genomic DNA
+record. This could be problemmatic in some cases - for example, the
+mRNA sequences may not match 100% (due to differences in strain,
+assembly problems, sequencing problems, etc). What then...?
+
+=head1 SEE ALSO
+
+Feature table description
+
+  http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Mungall
+
+Email:  cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqFeature::Tools::Unflattener;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Location::Simple;
+use Bio::SeqFeature::Generic;
+use Bio::Range;
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $unflattener = Bio::SeqFeature::Tools::Unflattener->new();
+           $unflattener->unflatten_seq(-seq=>$seq);
+ Function: constructor
+ Example : 
+ Returns : a new Bio::SeqFeature::Tools::Unflattener
+ Args    : see below
+
+Arguments
+
+  -seq       : A L<Bio::SeqI> object (optional)
+               the sequence to unflatten; this can also be passed in
+               when we call unflatten_seq()
+
+  -group_tag : a string representing the /tag used to partition flat features
+               (see discussion above)
+
+=cut
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($seq, $group_tag) =
+	$self->_rearrange([qw(SEQ
+                              GROUP_TAG
+			     )],
+                          @args);
+
+    $seq  && $self->seq($seq);
+    $group_tag  && $self->group_tag($group_tag);
+    return $self; # success - we hope!
+}
+
+sub DESTROY {
+    my $self = shift;
+    return if $self->{_reported_problems};
+    return if $self->{_ignore_problems};
+    my @probs = $self->get_problems;
+    if (!$self->{_problems_reported} &&
+	scalar(@probs)) {
+	$self->warn(
+	    "WARNING: There are UNREPORTED PROBLEMS.\n".
+	    "You may wish to use the method report_problems(), \n",
+	    "or ignore_problems() on the Unflattener object\n");
+    }
+    return;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $unflattener->seq($newval)
+ Function: 
+ Example : 
+ Returns : value of seq (a Bio::SeqI)
+ Args    : on set, new value (a Bio::SeqI, optional)
+
+The Bio::SeqI object should hold a flat list of Bio::SeqFeatureI
+objects; this is the list that will be unflattened.
+
+The sequence object can also be set when we call unflatten_seq()
+
+=cut
+
+sub seq{
+    my $self = shift;
+
+    return $self->{'seq'} = shift if @_;
+    return $self->{'seq'};
+}
+
+=head2 group_tag
+
+ Title   : group_tag
+ Usage   : $unflattener->group_tag($newval)
+ Function: 
+ Example : 
+ Returns : value of group_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+This is the tag that will be used to collect elements from the flat
+feature list into groups; for instance, if we look at two typical
+GenBank features:
+
+     gene            20111..23268
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /note="last curated on Thu Dec 13 16:51:32 PST 2001"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0005771"
+     mRNA            join(20111..20584,20887..23268)
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /product="CG4491-RA"
+                     /db_xref="FLYBASE:FBgn0005771"
+
+We can see that these comprise the same gene model because they share
+the same /gene attribute; we want to collect these together in groups.
+
+Setting group_tag is optional. The default is to use 'gene'. In the
+example above, we could also use /locus_tag
+
+=cut
+
+sub group_tag{
+    my $self = shift;
+
+    return $self->{'group_tag'} = shift if @_;
+    return $self->{'group_tag'};
+}
+
+=head2 partonomy
+
+ Title   : partonomy
+ Usage   : $unflattener->partonomy({mRNA=>'gene', CDS=>'mRNA')
+ Function: 
+ Example : 
+ Returns : value of partonomy (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+A hash representing the containment structure that the seq_feature
+nesting should conform to; each key represents the contained (child)
+type; each value represents the container (parent) type.
+
+=cut
+
+sub partonomy{
+    my $self = shift;
+
+    return $self->{'partonomy'} = shift if @_;
+    if (!$self->{'partonomy'}) {
+	$self->{'partonomy'} = $self->_default_partonomy;
+    }
+    return $self->{'partonomy'};
+}
+
+sub _default_partonomy{
+    return {
+            mRNA => 'gene',
+            tRNA => 'gene',
+            rRNA => 'gene',
+            scRNA => 'gene',
+            snRNA => 'gene',
+            snoRNA => 'gene',
+            misc_RNA => 'gene',
+            CDS => 'mRNA',
+	    exon => 'mRNA',
+	    intron => 'mRNA',
+
+            pseudoexon => 'pseudogene',
+            pseudointron => 'pseudogene',
+            pseudotranscript => 'pseudogene',
+           };
+}
+
+=head2 structure_type
+
+ Title   : structure_type
+ Usage   : $unflattener->structure_type($newval)
+ Function: 
+ Example : 
+ Returns : value of structure_type (a scalar)
+ Args    : on set, new value (an int or undef, optional)
+
+GenBank entries conform to different flavours, or B<structure
+types>. Some have mRNAs, some do not.
+
+Right now there are only two base structure types defined. If you set
+the structure type, then appropriate unflattening action will be
+taken.  The presence or absence of explicit exons does not affect the
+structure type.
+
+If you invoke B<-use_magic> then this will be set automatically, based
+on the content of the record.
+
+=over
+
+=item Type 0 (DEFAULT)
+
+typically contains
+
+  source
+  gene
+  mRNA
+  CDS
+
+with this structure type, we want the seq_features to be nested like this
+
+  gene
+    mRNA
+    CDS
+      exon
+
+exons and introns are implicit from the mRNA 'join' location
+
+to get exons from the mRNAs, you will need this call (see below)
+
+  $unflattener->feature_from_splitloc(-seq=>$seq);
+
+=item Type 1
+
+typically contains
+
+  source
+  gene
+  CDS
+  exon [optional]
+  intron [optional]
+
+there are no mRNA features
+
+with this structure type, we want the seq_features to be nested like this
+
+  gene
+    CDS
+      exon
+      intron
+
+exon and intron may or may not be present; they may be implicit from
+the CDS 'join' location
+
+=back
+
+=cut
+
+sub structure_type{
+    my $self = shift;
+
+    return $self->{'structure_type'} = shift if @_;
+    return $self->{'structure_type'};
+}
+
+=head2 get_problems
+
+ Title   : get_problems
+ Usage   : @probs = get_problems()
+ Function: Get the list of problem(s) for this object.
+ Example :
+ Returns : An array of [severity, description] pairs
+ Args    :
+
+In the course of unflattening a record, problems may occur. Some of
+these problems are non-fatal, and can be ignored.
+
+Problems are represented as arrayrefs containing a pair [severity,
+description]
+
+severity is a number, the higher, the more severe the problem
+
+the description is a text string
+
+=cut
+
+sub get_problems{
+    my $self = shift;
+
+    return @{$self->{'_problems'}} if exists($self->{'_problems'});
+    return ();
+}
+
+=head2 clear_problems
+
+ Title   : clear_problems
+ Usage   :
+ Function: resets the problem list to empty
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub clear_problems{
+   my ($self, at args) = @_;
+   $self->{'_problems'} = [];
+   return;
+}
+
+
+# PRIVATE
+# see get_problems
+sub add_problem{
+    my $self = shift;
+
+    $self->{'_problems'} = [] unless exists($self->{'_problems'});
+    if ($self->verbose > 0) {
+        warn( "PROBLEM: $_\n") foreach @_;
+    }
+    push(@{$self->{'_problems'}}, @_);
+}
+
+# PRIVATE
+# see get_problems
+sub problem {
+    my $self = shift;
+    my ($severity, $desc, @sfs) = @_;
+    if (@sfs) {
+	foreach my $sf (@sfs) {
+	    $desc .=
+	      sprintf("\nSF [$sf]: %s\n",
+		      join('; ',
+                           $sf->primary_tag,
+			   map {
+			       $sf->has_tag($_) ?
+				 $sf->get_tag_values($_) : ()
+			     } qw(gene product label)));
+	}
+    }
+    my $thresh = $self->error_threshold;
+    if ($severity > $thresh) {
+	$self->{_problems_reported} = 1;
+	$self->throw("PROBLEM, SEVERITY==$severity\n$desc");
+    }
+    $self->add_problem([$severity, $desc]);
+    return;
+}
+
+=head2 report_problems
+
+ Title   : report_problems
+ Usage   : $unflattener->report_problems(\*STDERR);
+ Function:
+ Example :
+ Returns : 
+ Args    : FileHandle (defaults to STDERR)
+
+
+=cut
+
+sub report_problems{
+   my ($self, $fh) = @_;
+
+   if (!$fh) {
+       $fh = \*STDERR;
+   }
+   foreach my $problem ($self->get_problems) {
+       my ($sev, $desc) = @$problem;
+       printf $fh "PROBLEM, SEVERITY==$sev\n$desc\n";
+   }
+   $self->{_problems_reported} = 1;
+   return;
+}
+
+=head2 ignore_problems
+
+ Title   : ignore_problems
+ Usage   : $obj->ignore_problems();
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+Unflattener is very particular about problems it finds along the
+way. If you have set the error_threshold such that less severe
+problems do not cause exceptions, Unflattener still expects you to
+report_problems() at the end, so that the user of the module is aware
+of any inconsistencies or problems with the data. In fact, a warning
+will be produced if there are unreported problems. To silence, this
+warning, call the ignore_problems() method before the Unflattener
+object is destroyed.
+
+=cut
+
+sub ignore_problems{
+   my ($self) = @_;
+   $self->{_ignore_problems} = 1;
+   return;
+}
+
+
+=head2 error_threshold
+
+ Title   : error_threshold
+ Usage   : $obj->error_threshold($severity)
+ Function: 
+ Example : 
+ Returns : value of error_threshold (a scalar)
+ Args    : on set, new value (an integer)
+
+Sets the threshold above which errors cause this module to throw an
+exception. The default is 0; all problems with a severity E<gt> 0 will
+cause an exception.
+
+If you raise the threshold to 1, then the unflattening process will be
+more lax; problems of severity==1 are generally non-fatal, but may
+indicate that the results should be inspected, for example, to make
+sure there is no data loss.
+
+=cut
+
+sub error_threshold{
+    my $self = shift;
+
+    return $self->{'error_threshold'} = shift if @_;
+    return $self->{'error_threshold'} || 0;
+}
+
+
+
+# PRIVATE
+#
+# given a type (eg mRNA), will return the container type (eg gene)
+sub get_container_type{
+   my ($self,$type) = @_;
+   my @roots = $self->_get_partonomy_roots;
+   if (grep {$_ eq $type} @roots) {
+       # it is a root - no parents/containers
+       return;
+   }
+   my $ch = $self->partonomy;
+   my $ctype = $ch->{$type};
+   if (!$ctype) {
+       # asterix acts as a wild card
+       $ctype = $ch->{'*'};
+   }
+   return $ctype;
+}
+
+# get root node of partonomy hierarchy (usually gene)
+sub _get_partonomy_roots {
+    my $self = shift;
+    my $ch = $self->partonomy;
+    my @parents = values %$ch;
+    # find parents that do not have parents themselves
+    return grep {!$ch->{$_}} @parents;
+}
+
+
+
+=head2 unflatten_seq
+
+ Title   : unflatten_seq
+ Usage   : @sfs = $unflattener->unflatten_seq($seq);
+ Function: turns a flat list of features into a list of holder features
+ Example :
+ Returns : list of Bio::SeqFeatureI objects
+ Args    : see below
+
+partitions a list of features then arranges them in a nested tree; see
+above for full explanation.
+
+note - the Bio::SeqI object passed in will be modified
+
+Arguments
+
+  -seq   :          a Bio::SeqI object; must contain Bio::SeqFeatureI objects
+                    (this is optional if seq has already been set)
+
+  -use_magic:       if TRUE (ie non-zero) then magic will be invoked;
+                    see discussion above.
+
+  -resolver_method: a CODE reference
+                    see the documentation above for an example of
+                    a subroutine that can be used to resolve hierarchies
+                    within groups.
+
+                    this is optional - if nothing is supplied, a default
+                    subroutine will be used (see below)
+
+  -group_tag:       a string
+                    [ see the group_tag() method ]
+                    this overrides the default group_tag which is 'gene'
+
+
+
+=cut
+
+sub unflatten_seq{
+   my ($self, at args) = @_;
+
+    my($seq, $resolver_method, $group_tag, $partonomy, 
+       $structure_type, $resolver_tag, $use_magic) =
+	$self->_rearrange([qw(SEQ
+                              RESOLVER_METHOD
+                              GROUP_TAG
+                              PARTONOMY
+			      STRUCTURE_TYPE
+			      RESOLVER_TAG
+			      USE_MAGIC
+			     )],
+                          @args);
+
+   # seq we want to unflatten
+   $seq = $seq || $self->seq;
+
+
+   # prevent bad argument combinations
+   if ($partonomy &&
+       defined($structure_type)) {
+       $self->throw("You cannot set both -partonomy and -structure_type\n".
+		    "(the former is implied by the latter)");
+   }
+
+   # remember the current value of partonomy, to reset later
+   my $old_partonomy = $self->partonomy;
+   $self->partonomy($partonomy) if defined $partonomy;
+
+   # remember old structure_type
+   my $old_structure_type = $self->structure_type;
+   $self->structure_type($structure_type) if defined $structure_type;
+
+   # if we are sourcing our data from genbank, all the
+   # features should be flat (eq no sub_SeqFeatures)
+   my @flat_seq_features = $seq->get_SeqFeatures;
+   my @all_seq_features = $seq->get_all_SeqFeatures;
+
+   # sanity checks
+   if (@all_seq_features > @flat_seq_features) {
+       $self->throw("It looks as if this sequence has already been unflattened");
+   }
+   if (@all_seq_features < @flat_seq_features) {
+       $self->throw("ASSERTION ERROR: something is seriously wrong with your features");
+   }
+
+   # tag for ungrouping; usually /gene or /locus_tag
+   #     for example:        /gene="foo"
+   $group_tag = $group_tag || $self->group_tag;
+   if ($use_magic) {
+       # use magic to guess the group tag
+       my @sfs_with_locus_tag =
+	 grep {$_->has_tag("locus_tag")} @flat_seq_features;
+       my @sfs_with_gene_tag =
+	 grep {$_->has_tag("gene")} @flat_seq_features;
+       my @sfs_with_product_tag =
+	 grep {$_->has_tag("product")} @flat_seq_features;
+       if (@sfs_with_locus_tag) {
+	   if ($group_tag && $group_tag ne 'locus_tag') {
+	       $self->throw("You have explicitly set group_tag to be '$group_tag'\n".
+			    "However, I detect that some features use /locus_tag\n".
+			    "I believe that this is the correct group_tag to use\n".
+			    "You can resolve this by either NOT setting -group_tag\n".
+			    "OR you can unset -use_magic to regain control");
+	   }
+
+	   # use /locus_tag instead of /gene tag for grouping
+	   # see GenBank entry AE003677 (version 3) for an example
+	   $group_tag = 'locus_tag';
+           if ($self->verbose > 0) {
+               warn "Set group tag to: $group_tag\n";
+           }
+       }
+
+       # on rare occasions, records will have no /gene or /locus_tag
+       # but it WILL have /product tags. These serve the same purpose
+       # for grouping. For an example, see AY763288 (also in t/data)
+       if (@sfs_with_locus_tag==0 &&
+           @sfs_with_gene_tag==0 &&
+           @sfs_with_product_tag>0 &&
+           !$group_tag) {
+	   $group_tag = 'product';
+           if ($self->verbose > 0) {
+               warn "Set group tag to: $group_tag\n";
+           }
+           
+       }
+   }
+   if (!$group_tag) {
+       $group_tag = 'gene';
+   }
+
+   # ------------------------------
+   # GROUP FEATURES using $group_tag
+   #     collect features into unstructured groups
+   # ------------------------------
+
+   # -------------
+   # we want to generate a list of groups;
+   # each group is a list of SeqFeatures; this
+   # group probably (but not necessarily)
+   # corresponds to a gene model.
+   #
+   # this array will look something like this:
+   # ([$f1], [$f2, $f3, $f4], ...., [$f97, $f98, $f99])
+   #
+   # there are also 'singleton' groups, with one member.
+   # for instance, the 'source' feature is in a singleton group;
+   # the same with others such as 'misc_feature'
+   my @groups = ();
+   # -------------
+
+   # --------------------
+   # we hope that the genbank record allows us to group by some grouping
+   # tag.
+   # for instance, most of the time a gene model can be grouped using
+   # the gene tag - that is where you see
+   #                    /gene="foo"
+   # in a genbank record
+   # --------------------
+   
+   # keep an index of groups by their
+   # grouping tag
+   my %group_by_tag = ();
+   
+
+   # iterate through all features, putting them into groups
+   foreach my $sf (@flat_seq_features) {
+       if (!$sf->has_tag($group_tag)) {
+	   # SINGLETON
+           # this is an ungroupable feature;
+           # add it to a group of its own
+           push(@groups, [$sf]);
+       }
+       else {
+	   # NON-SINGLETON
+           my @group_tagvals = $sf->get_tag_values($group_tag);
+           if (@group_tagvals > 1) {
+	       # sanity check:
+               # currently something can only belong to one group
+               $self->problem(2,
+			      ">1 value for /$group_tag: @group_tagvals\n".
+			      "At this time this module is not equipped to handle this adequately", $sf);
+           }
+	   # get value of group tag
+           my $gtv = shift @group_tagvals;
+           $gtv || $self->throw("Empty /$group_tag vals not allowed!");
+
+           # is this a new group?
+           my $group = $group_by_tag{$gtv};
+           if ($group) {
+               # this group has been encountered before - add current
+               # sf to the end of the group
+               push(@$group, $sf);
+           }
+           else {
+               # new group; add to index and create new group
+               $group = [$sf];  # currently one member; probably more to come
+               $group_by_tag{$gtv} = $group;
+               push(@groups, $group);
+           }
+       }
+   }
+   
+   # as well as having the same group_tag, a group should be spatially
+   # connected. if not, then the group should be split into subgroups.
+   # this turns out to be necessary in the case of multicopy genes.
+   # the standard way to represent these is as spatially disconnected
+   # gene models (usually a 'gene' feature and some kind of RNA feature)
+   # with the same group tag; the code below will split these into 
+   # seperate groups, one per copy.
+   @groups = map { $self->_split_group_if_disconnected($_) } @groups;
+
+   # remove any duplicates; most of the time the method below has
+   # no effect. there are some unusual genbank records for which
+   # duplicate removal is necessary. see the comments in the
+   # _remove_duplicates_from_group() method if you want to know
+   # the ugly details
+   foreach my $group (@groups) {
+       $self->_remove_duplicates_from_group($group);
+   }
+
+   # -
+
+   # PSEUDOGENES, PSEUDOEXONS AND PSEUDOINTRONS
+   # these are indicated with the /pseudo tag
+   # these are mapped to a different type; they should NOT
+   # be treated as normal genes
+   foreach my $sf (@all_seq_features) {
+       if ($sf->has_tag('pseudo')) {
+           my $type = $sf->primary_tag;
+           # SO type is typically the same as the normal
+           # type but preceeded by "pseudo"
+           if ($type eq 'misc_RNA') {
+               $sf->primary_tag("pseudotranscript");
+           }
+           else {
+               $sf->primary_tag("pseudo$type");
+           }
+       }
+   }
+   # now some of the post-processing that follows which applies to
+   # genes will NOT be applied to pseudogenes; this is deliberate
+   # for example, gene models are normalised to be gene-transcript-exon
+   # for pseudogenes we leave them as pseudogene-pseudoexon
+
+   # --- MAGIC ---
+   my $need_to_infer_exons = 0;
+   my $need_to_infer_mRNAs = 0;
+   my @removed_exons = ();
+   if ($use_magic) {
+       if (defined($structure_type)) {
+	   $self->throw("Can't combine use_magic AND setting structure_type");
+       }
+       my $n_introns =
+	 scalar(grep {$_->primary_tag eq 'exon'} @flat_seq_features);
+       my $n_exons =
+	 scalar(grep {$_->primary_tag eq 'exon'} @flat_seq_features);
+       my $n_mrnas =
+	 scalar(grep {$_->primary_tag eq 'mRNA'} @flat_seq_features);
+       my $n_mrnas_attached_to_gene =
+	 scalar(grep {$_->primary_tag eq 'mRNA' &&
+			$_->has_tag($group_tag)} @flat_seq_features);
+       my $n_cdss =
+	 scalar(grep {$_->primary_tag eq 'CDS'} @flat_seq_features);
+       my $n_rnas =
+	 scalar(grep {$_->primary_tag =~ /RNA/} @flat_seq_features);  
+       # Are there any CDS features in the record?
+       if ($n_cdss > 0) {
+           # YES
+           
+	   # - a pc gene model should contain at the least a CDS
+
+           # Are there any mRNA features in the record?
+	   if ($n_mrnas == 0) {
+               # NO mRNAs:
+	       # looks like structure_type == 1
+	       $structure_type = 1;
+	       $need_to_infer_mRNAs = 1;
+	   }
+	   elsif ($n_mrnas_attached_to_gene == 0) {
+               # $n_mrnas > 0
+               # $n_mrnas_attached_to_gene = 0
+               #
+               # The entries _do_ contain mRNA features,
+               # but none of them are part of a group/gene, i.e. they
+               # are 'floating'
+
+	       # this is an annoying weird file that has some floating
+	       # mRNA features; 
+	       # eg ftp.ncbi.nih.gov/genomes/Schizosaccharomyces_pombe/
+               
+               if ($self->verbose) {
+                   my @floating_mrnas =
+                     grep {$_->primary_tag eq 'mRNA' &&
+                             !$_->has_tag($group_tag)} @flat_seq_features;
+                   printf STDERR "Unattached mRNAs:\n";
+                   foreach my $mrna (@floating_mrnas) {
+                       $self->_write_sf_detail($mrna);
+                   }
+                   printf STDERR "Don't know how to deal with these; filter at source?\n";
+               }
+
+	       foreach (@flat_seq_features) {
+		   if ($_->primary_tag eq 'mRNA') {
+		       # what should we do??
+		       
+		       # I think for pombe we just have to filter
+		       # out bogus mRNAs prior to starting
+		   }
+	       }
+
+	       # looks like structure_type == 2
+	       $structure_type = 2;
+	       $need_to_infer_mRNAs = 1;
+	   }
+	   else {
+	   }
+
+	   # we always infer exons in magic mode
+	   $need_to_infer_exons = 1;
+       }
+       else {
+	   # this doesn't seem to be any kind of protein coding gene model
+	   if ( $n_rnas > 0 ) {
+	       $need_to_infer_exons = 1;
+	   }
+       }
+
+       if ($need_to_infer_exons) {
+	   # remove exons and introns from group -
+	   # we will infer exons later, and we
+	   # can always infer introns from exons
+	   foreach my $group (@groups) {
+	       @$group = 
+		 grep {
+		     my $type = $_->primary_tag();
+		     if ($type eq 'exon') {
+			 # keep track of all removed exons,
+			 # so we can do a sanity check later
+			 push(@removed_exons, $_);
+		     }
+		     $type ne 'exon' && $type ne 'intron'
+		 } @$group;
+	   }
+	   # get rid of any groups that have zero members
+	   @groups = grep {scalar(@$_)} @groups;
+       }
+   }
+   # --- END OF MAGIC ---
+   
+   # LOGICAL ASSERTION
+   if (grep {!scalar(@$_)} @groups) {
+       $self->throw("ASSERTION ERROR: empty group");
+   }
+
+   # LOGGING
+   if ($self->verbose > 0) {
+       printf STDERR "GROUPS:\n";
+       foreach my $group (@groups) {
+	   $self->_write_group($group, $group_tag);
+       }
+   }
+   # -
+
+   # --------- FINISHED GROUPING -------------
+
+
+   # TYPE CONTAINMENT HIERARCHY (aka partonomy)
+   # set the containment hierarchy if desired
+   # see docs for structure_type() method
+   if ($structure_type) {
+       if ($structure_type == 1) {
+	   $self->partonomy(
+                            {CDS => 'gene',
+                             exon => 'CDS',
+                             intron => 'CDS',
+                            }
+                           );
+       }
+       else {
+	   $self->throw("structure_type $structure_type is currently unknown");
+       }
+   }
+
+   # see if we have an obvious resolver_tag
+   if ($use_magic) {
+       foreach my $sf (@all_seq_features) {
+	   if ($sf->has_tag('derived_from')) {
+	       $resolver_tag = 'derived_from';
+	   }
+       }
+   }
+
+   if ($use_magic) {
+       # point all feature types without a container type to the root type.
+       #
+       # for example, if we have an unanticipated feature_type, say
+       # 'aberration', this should by default point to the parent 'gene'
+       foreach my $group (@groups) {
+	   my @sfs = @$group;
+	   if (@sfs > 1) {
+	       foreach my $sf (@sfs) {
+		   my $type = $sf->primary_tag;
+		   next if $type eq 'gene';
+		   my $container_type = $self->get_container_type($type);
+		   if (!$container_type) {
+		       $self->partonomy->{$type} = 'gene';
+		   }
+	       }
+	   }
+       }
+   }
+
+   # we have done the first part of the unflattening.
+   # we now have a list of groups; each group is a list of seqfeatures.
+   # the actual group itself is flat; we may want to unflatten this further;
+   # for instance, a gene model can contain multiple mRNAs and CDSs. We may want
+   # to link the correct mRNA to the correct CDS via the bioperl sub_SeqFeature tree.
+   #
+   # what we would end up with would be
+   #  gene1
+   #    mRNA-a
+   #      CDS-a
+   #    mRNA-b
+   #      CDS-b
+   my @top_sfs = $self->unflatten_groups(-groups=>\@groups,
+                                         -resolver_method=>$resolver_method,
+					 -resolver_tag=>$resolver_tag);
+   
+   # restore settings
+   $self->partonomy($old_partonomy);
+
+   # restore settings
+   $self->structure_type($old_structure_type);
+
+   # modify the original Seq object - the top seqfeatures are now
+   # the top features from each group
+   $seq->remove_SeqFeatures;
+   $seq->add_SeqFeature(@top_sfs);
+
+   # --------- FINISHED UNFLATTENING -------------
+
+   # lets see if there are any post-unflattening tasks we need to do
+
+   
+
+   # INFERRING mRNAs
+   if ($need_to_infer_mRNAs) {
+       if ($self->verbose > 0) {
+	   printf STDERR "** INFERRING mRNA from CDS\n";
+       }
+       $self->infer_mRNA_from_CDS(-seq=>$seq);
+   }
+
+   # INFERRING exons
+   if ($need_to_infer_exons) {
+
+       # infer exons, one group/gene at a time
+       foreach my $sf (@top_sfs) {
+	   my @sub_sfs = ($sf, $sf->get_all_SeqFeatures);
+	   $self->feature_from_splitloc(-features=>\@sub_sfs);
+       }
+
+       # some exons are stated explicitly; ie there is an "exon" feature
+       # most exons are inferred; ie there is a "mRNA" feature with
+       # split locations
+       #
+       # if there were exons explicitly stated in the entry, we need to
+       # do two things:
+       #
+       # make sure these exons are consistent with the inferred exons
+       #  (you never know)
+       #
+       # transfer annotation (tag-vals) from the explicit exon to the
+       # new inferred exon
+       if (@removed_exons) {
+	   my @allfeats = $seq->get_all_SeqFeatures;
+
+	   # find all the inferred exons that are children of mRNA
+	   my @mrnas =  grep {$_->primary_tag eq 'mRNA'} @allfeats;
+	   my @exons =  
+	     grep {$_->primary_tag eq 'exon'}
+	       map {$_->get_SeqFeatures} @mrnas;
+
+	   my %exon_h = (); 	   # index of exons by location;
+
+	   # there CAN be >1 exon at a location; we can represent these redundantly
+	   # (ie as a tree, not a graph)
+	   push(@{$exon_h{$self->_locstr($_)}}, $_) foreach @exons;
+	   my @problems = ();      # list of problems;
+	                           # each problem is a 
+	                           # [$severity, $description] pair
+	   my $problem = '';
+	   my ($n_exons, $n_removed_exons) =
+	     (scalar(keys %exon_h), scalar(@removed_exons));
+	   foreach my $removed_exon (@removed_exons) {
+	       my $locstr = $self->_locstr($removed_exon);
+	       my $inferred_exons = $exon_h{$locstr};
+	       delete $exon_h{$locstr};
+	       if ($inferred_exons) {
+		   my %exons_done = ();
+		   foreach my $exon (@$inferred_exons) {
+
+		       # make sure we don't move stuff twice
+		       next if $exons_done{$exon};
+		       $exons_done{$exon} = 1;
+
+		       # we need to tranfer any tag-values from the explicit
+		       # exon to the implicit exon
+		       foreach my $tag ($removed_exon->get_all_tags) {
+			   my @vals = $removed_exon->get_tag_values($tag);
+			   if (!$exon->can("add_tag_value")) {
+			       # I'm puzzled as to what should be done here;
+			       # SeqFeatureIs are not necessarily mutable,
+			       # but we know that in practice the implementing
+			       # class is mutable
+			       $self->throw("The SeqFeature object does not ".
+					    "implement add_tag_value()");
+			   }
+			   $exon->add_tag_value($tag, @vals);
+		       }
+		   }
+	       } 
+               else {
+                   # no exons inferred at $locstr
+		   push(@problems,
+			[1, 
+			 "there is a conflict with exons; there was an explicitly ".
+			 "stated exon with location $locstr, yet I cannot generate ".
+			 "this exon from the supplied mRNA locations\n"]);
+	       }
+	   }
+	   # do we have any inferred exons left over, that were not
+	   # covered in the explicit exons?
+	   if (keys %exon_h) {
+	       # TODO - we ignore this problem for now
+	       push(@problems,
+		    [1,
+		     sprintf("There are some inferred exons that are not in the ".
+			     "explicit exon list; they are the exons at locations:\n".
+			     join("\n", keys %exon_h)."\n")]);
+	   }
+
+	   # report any problems
+	   if (@problems) {
+	       my $thresh = $self->error_threshold;
+	       my @bad_problems = grep {$_->[0] > $thresh} @problems;
+	       if (@bad_problems) {
+		   printf STDERR "PROBLEM:\n";
+		   $self->_write_hier(\@top_sfs);
+		   # TODO - allow more fine grained control over this
+		   $self->{_problems_reported} = 1;
+		   $self->throw(join("\n",
+				     map {"@$_"} @bad_problems));
+	       }
+	       $self->problem(@$_) foreach @problems;
+	   }
+       }
+   }    
+   # --- end of inferring exons --
+
+   # return new top level features; this can also 
+   # be retrieved via
+   #   $seq->get_SeqFeatures();
+#   return @top_sfs;
+   return $seq->get_SeqFeatures;
+}
+
+# _split_group_if_disconnected([@sfs])
+#
+# as well as having the same group_tag, a group should be spatially
+# connected. if not, then the group should be split into subgroups.
+# this turns out to be necessary in the case of multicopy genes.
+# the standard way to represent these is as spatially disconnected
+# gene models (usually a 'gene' feature and some kind of RNA feature)
+# with the same group tag; the code below will split these into 
+# seperate groups, one per copy.
+
+sub _split_group_if_disconnected {
+    my $self = shift;
+    my $group = shift;
+    my @sfs = @$group;
+    my @ranges =
+      Bio::Range->disconnected_ranges(@sfs);
+    my @groups;
+    if (@ranges == 0) {
+	$self->throw("ASSERTION ERROR");
+    }
+    elsif (@ranges == 1) {
+	# no need to split the group
+	@groups = ($group);
+    }
+    else {
+	# @ranges > 1
+	# split the group into disconnected ranges
+	if ($self->verbose > 0) {
+	    printf STDERR "GROUP PRE-SPLIT:\n";
+	    $self->_write_group($group, $self->group_tag);
+	}
+	@groups =
+	  map {
+	      my $range = $_;
+	      [grep {
+		  $_->intersection($range);
+	      } @sfs]
+	  } @ranges;
+	if ($self->verbose > 0) {
+	    printf STDERR "SPLIT GROUPS:\n";
+	    $self->_write_group($_, $self->group_tag) foreach @groups;	    
+	}
+    }
+    return @groups;
+}
+
+sub _remove_duplicates_from_group {
+    my $self = shift;
+    my $group = shift;
+
+    # ::: WEIRD BOUNDARY CASE CODE :::
+    # for some reason, there are some gb records with two gene
+    # features for one gene; for example, see ATF14F8.gbk
+    # in the t/data directory
+    #
+    # in this case, we get rid of one of the genes
+
+    my @genes = grep {$_->primary_tag eq 'gene'} @$group;
+    if (@genes > 1) {
+	# OK, if we look at ATF14F8.gbk we see that some genes
+	# just exist as a single location, some exist as a multisplit location;
+	#
+	# eg
+
+	#     gene            16790..26395
+	#                     /gene="F14F8_60"
+	#     ...
+	#     gene            complement(join(16790..19855,20136..20912,21378..21497,
+	#                     21654..21876,22204..22400,22527..23158,23335..23448,
+	#                     23538..23938,24175..24536,24604..24715,24889..24984,
+	#                     25114..25171,25257..25329,25544..25589,25900..26018,
+	#                     26300..26395))
+	#                     /gene="F14F8_60"
+
+	# the former is the 'standard' way of representing the gene in genbank;
+	# the latter is redundant with the CDS entry. So we shall get rid of
+	# the latter with the following filter
+
+	if ($self->verbose > 0) {
+	    printf STDERR "REMOVING DUPLICATES:\n";
+	}
+
+	@genes =
+	  grep {
+	      my $loc = $_->location;
+	      if ($loc->isa("Bio::Location::SplitLocationI")) {
+		  my @locs = $loc->each_Location;		  
+		  if (@locs > 1) {
+		      0;
+		  }
+		  else {
+		      1;
+		  }
+	      }
+	      else {
+		  1;
+	      }
+	  } @genes;
+
+	if (@genes > 1) {
+	    # OK, that didn't work. Our only resort is to just pick one at random
+	    @genes = ($genes[0]);
+	}
+	if (@genes) {
+	    @genes == 1 || $self->throw("ASSERTION ERROR");
+	    @$group =
+	      ($genes[0], grep {$_->primary_tag ne 'gene'} @$group);
+	}
+    }
+    # its a dirty job but someone's gotta do it
+    return;
+}
+
+
+=head2 unflatten_groups
+
+ Title   : unflatten_groups
+ Usage   :
+ Function: iterates over groups, calling unflatten_group() [see below]
+ Example :
+ Returns : list of Bio::SeqFeatureI objects that are holders
+ Args    : see below
+
+Arguments
+
+  -groups:          list of list references; inner list is of Bio::SeqFeatureI objects
+                    e.g.  ( [$sf1], [$sf2, $sf3, $sf4], [$sf5, ...], ...)
+
+  -resolver_method: a CODE reference
+                    see the documentation above for an example of
+                    a subroutine that can be used to resolve hierarchies
+                    within groups.
+
+                    this is optional - a default subroutine will be used
+
+
+NOTE: You should not need to call this method, unless you want fine
+grained control over how the unflattening process.
+
+=cut
+
+sub unflatten_groups{
+   my ($self, at args) = @_;
+   my($groups, $resolver_method, $resolver_tag) =
+     $self->_rearrange([qw(GROUPS
+                           RESOLVER_METHOD
+			   RESOLVER_TAG
+                          )],
+                          @args);
+
+   # this is just a simple wrapper for unflatten_group()
+   return 
+     map {
+         $self->unflatten_group(-group=>$_,
+                                -resolver_method=>$resolver_method,
+				-resolver_tag=>$resolver_tag)
+     } @$groups;
+}
+
+=head2 unflatten_group
+
+ Title   : unflatten_group
+ Usage   :
+ Function: nests a group of features into a feature containment hierarchy
+ Example :
+ Returns : Bio::SeqFeatureI objects that holds other features
+ Args    : see below
+
+Arguments
+
+  -group:           reference to list of Bio::SeqFeatureI objects
+
+  -resolver_method: a CODE reference
+                    see the documentation above for an example of
+                    a subroutine that can be used to resolve hierarchies
+                    within groups
+
+                    this is optional - a default subroutine will be used
+
+
+NOTE: You should not need to call this method, unless you want fine
+grained control over how the unflattening process.
+
+=cut
+
+sub unflatten_group{
+   my ($self, at args) = @_;
+
+   my($group, $resolver_method, $resolver_tag) =
+     $self->_rearrange([qw(GROUP
+                           RESOLVER_METHOD
+			   RESOLVER_TAG
+                          )],
+                          @args);
+
+   if ($self->verbose > 0) {
+       printf STDERR "UNFLATTENING GROUP:\n";
+       $self->_write_group($group, $self->group_tag);
+   }
+
+   my @sfs = @$group;
+
+   # we can safely ignore singletons (e.g. [source])
+   return $sfs[0] if @sfs == 1;
+
+   my $partonomy = $self->partonomy;
+
+   # $resolver_method is a reference to a SUB that will resolve
+   # ambiguous parent/child containment; for example, determining
+   # which mRNAs go with which CDSs
+   $resolver_method = $resolver_method || \&_resolve_container_for_sf;
+
+   # TAG BASED RESOLVING OF HIERARCHIES
+   #
+   # if the user specifies $resolver_tag, then we use this tag
+   # to pair up ambiguous parents and children;
+   #
+   # for example, the CDS feature may have a resolver tag of /derives_from
+   # which is a 'foreign key' into the /label tag of the mRNA feature
+   #
+   # this kind of tag-based resolution is possible for a certain subset
+   # of genbank records
+   #
+   # if no resolver tag is specified, we revert to the normal
+   # resolver_method
+   if ($resolver_tag) {
+       my $backup_resolver_method = $resolver_method;
+       # closure: $resolver_tag is remembered by this sub
+       my $sub = 
+	 sub {
+	     my ($self, $sf, @possible_container_sfs) = @_;
+	     my @container_sfs = ();
+	     if ($sf->has_tag($resolver_tag)) {
+		 my ($resolver_tagval) = $sf->get_tag_values($resolver_tag);
+		 # if a feature has a resolver_tag (e.g. /derives_from)
+		 # this specifies the /product, /symbol or /label for the
+		 # parent feature
+		 @container_sfs = 
+		   grep {
+		       my $match = 0;
+		       $self->_write_sf($_) if $self->verbose > 0;
+		       foreach my $tag (qw(product symbol label)) {
+			   if ($_->has_tag($tag)) {
+			       my @vals =
+				 $_->get_tag_values($tag);
+			       if (grep {$_ eq $resolver_tagval} @vals) {
+				   $match = 1;
+				   last;
+			       }
+			   }   
+		       }
+		       $match;
+		   } @possible_container_sfs;
+	     } 
+	     else {
+		 return $backup_resolver_method->($sf, @possible_container_sfs);
+	     }
+	     return map {$_=>0} @container_sfs;
+	 };
+       $resolver_method = $sub;
+   }
+   else {
+       # CONDITION: $resolver_tag is NOT set
+       $self->throw("assertion error") if $resolver_tag;
+   }
+   # we have now set $resolver_method to a subroutine for
+   # disambiguatimng parent/child relationships. we will
+   # now build the whole containment hierarchy for this group
+
+
+   # FIND TOP/ROOT SEQFEATURES
+   #
+   # find all the features for which there is no
+   # containing feature type (eg genes)
+   my @top_sfs =
+     grep { 
+         !$self->get_container_type($_->primary_tag);
+     } @sfs;
+
+   # CONDITION: there must be at most one root
+   if (@top_sfs > 1) {
+       $self->_write_group($group, $self->group_tag);
+       printf STDERR "TOP SFS:\n";
+       $self->_write_sf($_) foreach @top_sfs;
+       $self->throw("multiple top-sfs in group");
+   }
+   my $top_sf = $top_sfs[0];
+
+   # CREATE INDEX OF SEQFEATURES BY TYPE
+   my %sfs_by_type = ();
+   foreach my $sf (@sfs) {
+       push(@{$sfs_by_type{$sf->primary_tag}}, $sf);
+   }
+
+   # containment index; keyed by child; lookup parent
+   # note: this index uses the stringified object reference of
+   # the object as a surrogate lookup key
+
+   my %container = ();   # child -> parent
+
+   # ALGORITHM: build containment graph
+   #
+   # find all possible containers for each SF;
+   # for instance, for a CDS, the possible containers are all
+   # the mRNAs in the same group. For a mRNA, the possible
+   # containers are any SFs of type 'gene' (should only be 1).
+   # (these container-type mappings can be overridden)
+   #
+   # contention is resolved by checking coordinates of splice sites
+   # (this is the default, but can be overridden)
+   #
+   # most of the time, there is no problem identifying a unique
+   # parent for every child; this can be ambiguous when constructing
+   # CDS to mRNA relationships with lots of alternate splicing
+   #
+   # a hash of child->parent relationships is constructed (%container)
+   # any mappings that need further resolution (eg CDS to mRNA) are
+   # placed in %unresolved
+
+   # %unresolved index
+   # (keyed by stringified object reference of child seqfeature)
+   my %unresolved = ();    # child -> [parent,score] to be resolved
+                           
+   # index of seqfeatures by their stringified object reference;
+   # this is essentially a way of 'reviving' an object from its stringified
+   # reference
+   # (see NOTE ON USING OBJECTS AS KEYS IN HASHES, below)
+   my %idxsf = map {$_=>$_} @sfs;
+
+   foreach my $sf (@sfs) {
+       my $type = $sf->primary_tag;
+
+       # container type (e.g. the container type for CDS is usually mRNA)
+       my $container_type = 
+         $self->get_container_type($type);
+       if ($container_type) {
+
+           my @possible_container_sfs =
+             @{$sfs_by_type{$container_type} || []};
+           # we now have a list of possible containers
+           # (eg for a CDS in an alternately spliced gene, this
+           #  would be a list of all the mRNAs for this gene)
+
+	   if (!@possible_container_sfs) {
+	       # root of hierarchy
+	   }
+	   else {
+	       if (@possible_container_sfs == 1) {
+                   # this is the easy situation, whereby the containment
+                   # hierarchy is unambiguous. this will probably be the
+                   # case if the genbank record has no alternate splicing
+                   # within it
+
+		   # ONE OPTION ONLY - resolved!
+		   $container{$sf} = $possible_container_sfs[0];
+
+	       }
+	       else {
+		   # MULTIPLE CONTAINER CHOICES
+		   $self->throw("ASSERTION ERROR") unless @possible_container_sfs > 1;
+
+                   # push this onto the %unresolved graph, and deal with it
+                   # later
+
+                   # for now we hardcode things such that the only type 
+                   # with ambiguous parents is a CDS; if this is violated,
+                   # it has a weak problem class of '1' so the API user
+                   # can easily set things to ignore these
+		   if ($sf->primary_tag ne 'CDS') {
+		       $self->problem(1,
+				      "multiple container choice for non-CDS; ".
+				      "CDS to mRNA should be the only ".
+				      "relationships requiring resolving",
+				      $sf);
+		   }
+
+                   # previously we set the SUB $resolver_method
+                   $self->throw("ASSERTION ERROR")
+                     unless $resolver_method;
+
+                   # $resolver_method will assign scores to
+                   # parent/child combinations; later on we
+                   # will use these scores to find the optimal
+                   # parent/child pairings
+
+                   # the default $resolver_method uses splice sites to
+                   # score possible parent/child matches
+
+		   my %container_sfh =
+		     $resolver_method->($self, $sf, @possible_container_sfs);
+                   if (!%container_sfh) {
+                       $self->problem(2,
+                                      "no containers possible for SeqFeature of ".
+                                      "type: $type; this SF is being placed at ".
+                                      "root level",
+                                      $sf);
+                       # RESOLVED! (sort of - placed at root/gene level)
+                       $container{$sf} = $top_sf;
+
+                       # this sort of thing happens if the record is
+                       # badly messed up and there is absolutely no indication
+                       # of where to put the CDS. Perhaps we should just
+                       # place it with a random mRNA?
+                   }
+		   foreach my $jsf (keys %container_sfh) {
+
+                       # add [score, parent] pairs to the %unresolved
+                       # lookup table/graph
+		       push(@{$unresolved{$sf}}, 
+			    [$idxsf{$jsf}, $container_sfh{$jsf} || 0]);
+		   }
+	       }
+	   }
+       }
+       else {
+           # CONDITION:
+           # not container type for $sf->primary_tag
+           
+           # CONDITION:
+	   # $sf must be a root/top node (eg gene)
+       }
+   }
+
+   if (0) {
+
+       # CODE CURRENTLY DISABLED
+
+       # we require a 1:1 mapping between mRNAs and CDSs;
+       # create artificial duplicates if we can't do this...
+       if (%unresolved) {
+           my %childh = map {$_=>1} keys %unresolved;
+           my %parenth = map {$_->[0]=>1} map {@$_} values %unresolved;
+           if ($self->verbose > 0) {
+               printf STDERR "MATCHING %d CHILDREN TO %d PARENTS\n",
+                 scalar(keys %childh), scalar(keys %parenth);
+           }
+           # 99.99% of the time in genbank genomic record of structure type 0, we
+           # see one CDS for every mRNA; one exception is the S Pombe
+           # genome, which is all CDS, bar a few spurious mRNAs; we have to
+           # filter out the spurious mRNAs in this case
+           #
+           # another strange case is in the mouse genome, NT_078847.1
+           # for Pcdh13 you will notice there is 4 mRNAs and 5 CDSs.
+           # most unusual! 
+           # I'm at a loss for a really clever thing to do here. I think the
+           # best thing is to create duplicate features to preserve the 1:1 mapping
+           #       my $suffix_id = 1;
+           #       while (keys %childh > keys %parenth) {
+           #           
+           #       }
+       }
+   }
+
+   # DEBUGGING CODE
+   if ($self->verbose > 0 && scalar(keys %unresolved)) {
+       printf STDERR "UNRESOLVED PAIRS:\n";
+       foreach my $childsf (keys %unresolved) {
+	   my @poss = @{$unresolved{$childsf}};
+	   foreach my $p (@poss) {
+	       my $parentsf = $p->[0];
+	       $childsf = $idxsf{$childsf};
+               my @clabels = ($childsf->get_tagset_values(qw(protein_id label product)), "?");
+               my @plabels = ($parentsf->get_tagset_values(qw(transcript_id label product)), "?");
+	       printf STDERR
+                      ("  PAIR: $clabels[0] => $plabels[0]  (of %d)\n", 
+                       scalar(@poss));
+	   }
+       }
+   } # -- end of verbose
+
+   # Now we have to fully resolve the containment hierarchy; remember,
+   # the graph %container has the fully resolved child->parent links;
+   #
+   # the graph %unresolved is keyed by children missing parents; we
+   # need to put all these orphans in the %container graph
+   #
+   # we do this using the scores in %unresolved, with the
+   # find_best_matches() algorithm
+   my $unresolved_problem_reported = 0;
+   if (%unresolved) {
+       my $new_pairs =
+	 $self->find_best_matches(\%unresolved, []);
+       if (!$new_pairs) {
+           my ($g) = $sfs[0]->get_tagset_values($self->group_tag || 'gene');
+	   $self->problem(2,
+			  "Could not resolve hierarchy for $g");
+           $new_pairs = [];
+           $unresolved_problem_reported = 1;
+       }
+       foreach my $pair (@$new_pairs) {
+	   if ($self->verbose > 0) {
+	       printf STDERR "  resolved pair @$pair\n";
+	   }
+	   $container{$pair->[0]} = $pair->[1];
+           delete $unresolved{$pair->[0]};
+       }
+   }
+
+   # CONDITION: containment hierarchy resolved
+   if (%unresolved) {
+       $self->throw("UNRESOLVED: %unresolved")
+         unless $unresolved_problem_reported;
+   }
+
+   # make nested SeqFeature hierarchy from @containment_pairs
+   # ie put child SeqFeatures into parent SeqFeatures
+   my @top = ();
+   foreach my $sf (@sfs) {
+       my $container_sf = $container{$sf};
+       if ($container_sf) {
+           # make $sf nested inside $container_sf
+
+           # first check if the container spatially contains the containee
+           if ($container_sf->contains($sf)) {
+               # add containee
+	       $container_sf->add_SeqFeature($sf);
+           }
+           else {
+               # weird case - the container does NOT spatially
+               # contain the containee;
+               # we expand and throw a warning
+               #
+               # for an example of this see ZFP91-CNTF dicistronic gene
+               # in NCBI chrom 11 build 34.3
+	       $self->problem(1,
+			      "Container feature does not spatially contain ".
+                              "subfeature. Perhaps this is a dicistronic gene? ".
+                              "I am expanding the parent feature",
+			      $container_sf,
+			      $sf);
+	       $container_sf->add_SeqFeature($sf, 'EXPAND');
+           }
+       }
+       else {
+           push(@top, $sf);
+       }
+   }
+   return @top;
+} # -- end of unflatten_group
+
+# -------
+# A NOTE ON USING OBJECTS AS KEYS IN HASHES (stringified objects)
+#
+# Often we with to use seqfeatures as keys in a hashtable; because seqfeatures
+# in bioperl have no unique ID, we use a surrogate ID in the form of the
+# stringified object references - this is just what you get if you say
+#
+#  print "$sf\n";
+#
+# this is guaranteed to be unique (within a particular perl execution)
+#
+# often we want to 'revive' the objects used as keys in a hash - once the
+# objects are used as keys, remember it is the *strings* used as keys and
+# not the object itself, so the object needs to be revived using another
+# hashtable that looks like this
+#
+#    %sfidx = map { $_ => $_ } @sfs
+#
+# -------
+
+
+# recursively finds the best set of pairings from a matrix of possible pairings
+#
+# tries to make sure nothing is unpaired
+#
+# given a matrix of POSSIBLE matches
+#  (matrix expressed as hash/lookup; keyed by child object; val = [parent, score]
+#
+# 
+sub find_best_matches {
+    my $self = shift;
+    my $matrix = shift;
+    my $pairs = shift;        # [child,parent] pairs already selected
+
+    my $verbose = $self->verbose;
+    #################################print "I";
+    if ($verbose > 0) {
+	printf STDERR "find_best_matches: (/%d)\n", scalar(@$pairs);
+    }
+
+    my %selected_children = map {($_->[0]=>1)} @$pairs;
+    my %selected_parents = map {($_->[1]=>1)} @$pairs;
+    
+    # make a copy of the matrix with the portions still to be
+    # resolved
+    my %unresolved_parents = ();
+    my %unresolved =
+      map {
+          if ($verbose > 0) {
+              printf STDERR "  $_ : %s\n", join("; ", map {"[@$_]"} @{$matrix->{$_}});
+          }
+	  if ($selected_children{$_}) {
+	      ();
+	  }
+	  else {
+	      my @parents =
+		grep {
+		    !$selected_parents{$_->[0]}
+		} @{$matrix->{$_}};
+              $unresolved_parents{$_} = 1 foreach @parents;
+              # new parents
+	      ($_ => [@parents]);
+	  }
+      } keys %$matrix;
+    
+    my @I = keys %unresolved;
+
+    return $pairs if !scalar(keys %unresolved_parents);
+    # NECESSARY CONDITION:
+    # all possible parents have a child match
+
+    return $pairs if !scalar(@I);
+    # NECESSARY CONDITION:
+    # all possible children have a parent match
+
+    # give those with fewest choices highest priority
+    @I = sort {
+	# n possible parents
+	scalar(@{$unresolved{$a}}) 
+	  <=>
+	    scalar(@{$unresolved{$b}}) ;
+    } @I;
+    
+    my $csf = shift @I;
+
+    my @J = @{$unresolved{$csf}};  # array of [parent, score]
+
+    # sort by score, highest first
+    @J =
+      sort {
+	  $b->[1] <=> $a->[1]
+      } @J;
+
+    # select pair(s) from remaining matrix of possible pairs
+    # by iterating through possible parents
+
+    my $successful_pairs;
+    foreach my $j (@J) {
+	my ($psf, $score) = @$j;
+	# would selecting $csf, $psf as a pair
+	# remove all choices from another?
+	my $bad = 0;
+	foreach my $sf (@I) {
+	    if (!grep {$_->[0] ne $psf} @{$unresolved{$sf}}) {
+		# $psf was the only parent choice for $sf
+		$bad = 1;
+		last;
+	    }
+	}
+	if (!$bad) {
+	    my $pair = [$csf, $psf];
+	    my $new_pairs = [@$pairs, $pair];
+	    my $set = $self->find_best_matches($matrix, $new_pairs);
+	    if ($set) {
+		$successful_pairs = $set;
+		last;
+	    }
+	}
+    }
+    # success
+    return $successful_pairs if $successful_pairs;
+    # fail
+    return 0;
+}
+
+# ----------------------------------------------
+# writes a group to stdout
+#
+# mostly for logging/debugging
+# ----------------------------------------------
+sub _write_group {
+    my $self = shift;
+    my $group = shift;
+    my $group_tag = shift || 'gene';
+
+    my $f = $group->[0];
+    my $label = '?';
+    if ($f->has_tag($group_tag)) {
+	($label) = $f->get_tag_values($group_tag);
+    }
+    if( $self->verbose > 0 ) { 
+	printf STDERR ("  GROUP [%s]:%s\n",
+	       $label,
+	       join(' ',
+		    map { $_->primary_tag } @$group));
+    }
+
+}
+
+sub _write_sf {
+    my $self = shift;
+    my $sf = shift;
+    printf STDERR "TYPE:%s\n", $sf->primary_tag;
+    return;
+}
+
+sub _write_sf_detail {
+    my $self = shift;
+    my $sf = shift;
+    printf STDERR "TYPE:%s\n", $sf->primary_tag;
+    my @locs = $sf->location->each_Location;
+    printf STDERR "  %s,%s [%s]\n", $_->start, $_->end, $_->strand foreach @locs;
+    return;
+}
+
+sub _write_hier {
+    my $self = shift;
+    my @sfs = @{shift || []};
+    my $indent = shift || 0;
+    if( $self->verbose > 0 ) {
+	foreach my $sf (@sfs) {
+	    my $label = '?';
+	    if ($sf->has_tag('product')) {
+		($label) = $sf->get_tag_values('product');
+	    }
+	    printf STDERR "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
+	    my @sub_sfs = $sf->sub_SeqFeature;
+	    $self->_write_hier(\@sub_sfs, $indent+1);
+	}
+    }
+}
+
+# -----------------------------------------------
+#
+# returns all possible containers for an SF based
+# on splice site coordinates; splice site coords
+# must be contained
+# -----------------------------------------------
+sub _resolve_container_for_sf{
+   my ($self, $sf, @possible_container_sfs) = @_;
+
+   my @coords = $self->_get_splice_coords_for_sf($sf);
+   my $start = $sf->start;
+   my $end = $sf->end;
+   my $splice_uniq_str = "@coords";
+   
+   my @sf_score_pairs = ();
+   # a CDS is contained by a mRNA if the locations of the splice
+   # coordinates are identical
+   foreach (@possible_container_sfs) {
+       my @container_coords = $self->_get_splice_coords_for_sf($_);
+       my $inside = 
+	 !$splice_uniq_str || 
+	   index("@container_coords", $splice_uniq_str) > -1;
+       if ($inside) {
+           # the container cannot be smaller than the thing contained
+           if ($_->start > $start || $_->end < $end) {
+               $inside = 0;
+           }
+       }
+       if ($self->verbose > 0) {
+	   printf STDERR "    Checking containment:[$inside] (@container_coords) IN ($splice_uniq_str)\n";
+       }
+       if ($inside) {
+	   # SCORE: matching (ss-scoords+2)/(n-container-ss-coords+2)
+	   my $score =
+	     (scalar(@coords)+2)/(scalar(@container_coords)+2);
+	   push(@sf_score_pairs,
+		$_=>$score);
+       }
+   }
+   # return array ( $sf1=>$score1, $sf2=>$score2, ...)
+   return @sf_score_pairs;
+}
+
+sub _get_splice_coords_for_sf {
+    my $self = shift;
+    my $sf = shift;
+
+   my @locs = $sf->location;
+   if ($sf->location->isa("Bio::Location::SplitLocationI")) {
+       @locs = $sf->location->each_Location;
+   }
+
+   # get an ordered list of (start, end) positions
+
+#   my @coords =
+#     map {
+#         $_->strand > 0 ? ($_->start, $_->end) : ($_->end, $_->start)
+#     } @locs;
+
+    my @coords = map {($_->start, $_->end)} @locs;
+
+   # remove first and last leaving only splice sites
+   pop @coords;
+   shift @coords;
+    return @coords;
+}
+
+=head2 feature_from_splitloc
+
+ Title   : feature_from_splitloc
+ Usage   : $unflattener->feature_from_splitloc(-features=>$sfs);
+ Function:
+ Example :
+ Returns : 
+ Args    : see below
+
+At this time all this method does is generate exons for mRNA or other RNA features
+
+Arguments:
+
+  -feature:    a Bio::SeqFeatureI object (that conforms to Bio::FeatureHolderI)
+  -seq:        a Bio::SeqI object that contains Bio::SeqFeatureI objects
+  -features:   an arrayref of Bio::SeqFeatureI object
+
+
+=cut
+
+sub feature_from_splitloc{
+   my ($self, at args) = @_;
+
+   my($sf, $seq, $sfs) =
+     $self->_rearrange([qw(FEATURE
+                           SEQ
+			   FEATURES
+                          )],
+                          @args);
+   my @sfs = (@{$sfs || []});
+   push(@sfs, $sf) if $sf;
+   if ($seq) {
+       $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
+       @sfs = $seq->get_all_SeqFeatures;
+   }
+   my @exons = grep {$_->primary_tag eq 'exon'} @sfs;
+   if (@exons) {
+       $self->problem(2,
+		      "There are already exons, so I will not infer exons");
+   }
+
+   # index of features by type+location
+   my %loc_h = ();
+
+   # infer for every feature
+   foreach my $sf (@sfs) {
+
+       $sf->isa("Bio::SeqFeatureI") || $self->throw("$sf NOT A SeqFeatureI");
+       $sf->isa("Bio::FeatureHolderI") || $self->throw("$sf NOT A FeatureHolderI");
+
+       my $type = $sf->primary_tag;
+       next unless $type eq 'mRNA' or $type =~ /RNA/;
+
+       # an mRNA from genbank will have a discontinuous location,
+       # with each sub-location being equivalent to an exon
+       my @locs = $sf->location;
+
+       if ($sf->location->isa("Bio::Location::SplitLocationI")) {
+           @locs = $sf->location->each_Location;
+       }
+
+       if (!@locs) {
+           use Data::Dumper;
+           print Dumper $sf;
+	   $self->throw("ASSERTION ERROR: sf has no location objects");
+       }
+
+       # make exons from locations
+       my @subsfs =
+         map {
+             my $subsf = Bio::SeqFeature::Generic->new(-location=>$_,
+                                                       -primary_tag=>'exon');
+             ## Provide seq_id to new feature:
+             $subsf->seq_id($sf->seq_id) if $sf->seq_id;
+             $subsf->source_tag($sf->source_tag) if $sf->source_tag;
+             ## Transfer /locus_tag and /gene tag values to inferred
+             ## features.  TODO: Perhaps? this should not be done
+             ## indiscriminantly but rather by virtue of the setting
+             ## of group_tag.
+             foreach my $tag (grep /gene|locus_tag/, $sf->get_all_tags) {
+                 my @vals = $sf->get_tag_values($tag);
+                 $subsf->add_tag_value($tag, @vals);
+             }
+
+	     my $locstr = 'exon::'.$self->_locstr($subsf);
+
+	     # re-use feature if type and location the same
+	     if ($loc_h{$locstr}) {
+		 $subsf = $loc_h{$locstr};
+	     }
+	     else {
+		 $loc_h{$locstr} = $subsf;
+	     }
+             $subsf;
+         } @locs;
+       
+       # PARANOID CHECK
+       $self->_check_order_is_consistent($sf->location->strand, at subsfs);
+       #----
+
+       $sf->location(Bio::Location::Simple->new());
+
+       # we allow the exons to define the boundaries of the transcript
+       $sf->add_SeqFeature($_, 'EXPAND') foreach @subsfs;
+
+
+       if (!$sf->location->strand) {
+	   # correct weird bioperl bug in previous versions;
+	   # strand was not being set correctly
+	   $sf->location->strand($subsfs[0]->location->strand);
+       }
+
+       
+   }
+   return;
+}
+
+#sub merge_features_with_same_loc {
+#   my ($self, at args) = @_;
+
+#   my($sfs, $seq) =
+#     $self->_rearrange([qw(FEATURES
+#                           SEQ
+#                          )],
+#                          @args);
+#   my @sfs = (@$sfs);
+#   if ($seq) {
+#       $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
+#       @sfs = $seq->get_all_SeqFeatures;
+#   }
+
+   
+#   my %loc_h = ();
+#   foreach my $sf (@sfs) {
+#       my $type = $sf->primary_tag;
+#       my $locstr = $self->_locstr($sf);
+##       $loc_h{$type.$locstr}
+#       push(@{$exon_h{$self->_locstr($_)}}, $_) foreach @exons;
+#   }
+#}
+
+=head2 infer_mRNA_from_CDS
+
+ Title   : infer_mRNA_from_CDS
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+given a "type 1" containment hierarchy
+
+  gene
+    CDS
+      exon
+
+this will infer the uniform "type 0" containment hierarchy
+
+  gene
+    mRNA
+      CDS
+      exon
+
+all the children of the CDS will be moved to the mRNA
+
+a "type 2" containment hierarchy is mixed type "0" and "1" (for
+example, see ftp.ncbi.nih.gov/genomes/Schizosaccharomyces_pombe/)
+
+=cut
+
+sub infer_mRNA_from_CDS{
+   my ($self, at args) = @_;
+
+   my($sf, $seq) =
+     $self->_rearrange([qw(FEATURE
+                           SEQ
+                          )],
+                          @args);
+   my @sfs = ($sf);
+   if ($seq) {
+       $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
+       @sfs = $seq->get_all_SeqFeatures;
+   }
+
+   foreach my $sf (@sfs) {
+
+       $sf->isa("Bio::SeqFeatureI") || $self->throw("$sf NOT A SeqFeatureI");
+       $sf->isa("Bio::FeatureHolderI") || $self->throw("$sf NOT A FeatureHolderI");
+       if ($self->verbose > 0) {
+           printf STDERR "    Checking $sf %s\n", $sf->primary_tag;
+       }
+       
+       if ($sf->primary_tag eq 'mRNA') {
+	   $self->problem(2,
+			  "Inferring mRNAs when there are already mRNAs present");
+       }
+
+       my @cdsl = grep {$_->primary_tag eq 'CDS' } $sf->get_SeqFeatures;
+       if (@cdsl) {
+	   my @children = grep {$_->primary_tag ne 'CDS'} $sf->get_SeqFeatures;
+	   my @mrnas = ();
+	   foreach my $cds (@cdsl) {
+	       
+               if ($self->verbose > 0) {
+                   print "    Inferring mRNA from CDS $cds\n";
+               }
+               $self->_check_order_is_consistent($cds->location->strand,$cds->location->each_Location);
+               
+	       my $loc = Bio::Location::Split->new;
+	       foreach my $cdsexonloc ($cds->location->each_Location) {
+		   my $subloc =
+		     Bio::Location::Simple->new(-start=>$cdsexonloc->start,
+						-end=>$cdsexonloc->end,
+						-strand=>$cdsexonloc->strand);
+		   $loc->add_sub_Location($subloc);
+	       }
+	       # share the same location
+	       my $mrna =
+		 Bio::SeqFeature::Generic->new(-location=>$loc,
+					       -primary_tag=>'mRNA');
+	       
+               ## Provide seq_id to new feature:
+               $mrna->seq_id($cds->seq_id) if $cds->seq_id;
+               $mrna->source_tag($cds->source_tag) if $cds->source_tag;
+
+               $self->_check_order_is_consistent($mrna->location->strand,$mrna->location->each_Location);
+
+               # make the mRNA hold the CDS; no EXPAND option,
+               # the CDS cannot be wider than the mRNA
+	       $mrna->add_SeqFeature($cds);
+
+	       # mRNA steals children of CDS
+	       foreach my $subsf ($cds->get_SeqFeatures) {
+		   $mrna->add_SeqFeature($subsf);
+	       }
+	       $cds->remove_SeqFeatures;
+	       push(@mrnas, $mrna);
+	   }
+	   # change gene/CDS to gene/mRNA
+	   $sf->remove_SeqFeatures;
+	   $sf->add_SeqFeature($_) foreach (@mrnas, @children);
+       }
+   }
+   return;
+   
+
+}
+
+=head2 remove_types
+
+ Title   : remove_types
+ Usage   : $unf->remove_types(-seq=>$seq, -types=>["mRNA"]);
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+removes features of a set type
+
+useful for pre-filtering a genbank record; eg to get rid of STSs
+
+also, there is no way to unflatten
+ftp.ncbi.nih.gov/genomes/Schizosaccharomyces_pombe/ UNLESS the bogus
+mRNAs in these records are removed (or changed to a different type) -
+they just confuse things too much
+
+=cut
+
+sub remove_types{
+   my ($self, at args) = @_;
+
+   my($seq, $types) =
+     $self->_rearrange([qw(
+                           SEQ
+			   TYPES
+                          )],
+                          @args);
+   $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
+   my @sfs = $seq->get_all_SeqFeatures;
+   my %rh = map {$_=>1} @$types;
+   @sfs = grep {!$rh{$_->primary_tag}} @sfs;
+   $seq->remove_SeqFeatures;
+   $seq->add_SeqFeature($_) foreach @sfs;
+   return;
+}
+
+
+# _check_order_is_consistent($strand,$ranges) RETURNS BOOL
+#
+# note: the value of this test is moot - there are many valid,
+# if unusual cases where it would flag an anomaly. for example
+# transpliced genes such as mod(mdg4) in dmel on AE003744, and
+# the following spliced gene on NC_001284:
+#
+#     mRNA            complement(join(20571..20717,21692..22086,190740..190761,
+#                     140724..141939,142769..142998))
+#                     /gene="nad5"
+#                     /note="trans-splicing, RNA editing"
+#                     /db_xref="GeneID:814567"
+#
+# note how the exons are not in order
+#  this will flag a level-3 warning, the user of this module
+#  can ignore this and deal appropriately with the resulting
+#  unordered exons
+sub _check_order_is_consistent {
+    my $self = shift;
+
+    my $parent_strand = shift; # this does nothing..?
+    my @ranges = @_;
+    return unless @ranges;
+    my $rangestr =
+      join(" ",map{sprintf("[%s,%s]",$_->start,$_->end)} @ranges);
+    my $strand = $ranges[0]->strand;
+    for (my $i=1; $i<@ranges;$i++) {
+	if ($ranges[$i]->strand != $strand) {
+            $self->problem(1,"inconsistent strands. Trans-spliced gene? Range: $rangestr");
+	    return 1; 
+            # mixed ranges - autopass
+            # some mRNAs have exons on both strands; for
+            # example, the dmel mod(mdg4) gene which is
+            # trans-spliced (in actual fact two mRNAs)
+	}
+    }
+    my $pass = 1;
+    for (my $i=1; $i<@ranges;$i++) {
+	my $rangeP = $ranges[$i-1];
+	my $range = $ranges[$i];
+	    if ($rangeP->start > $range->end) {
+                # failed - but still get one more chance..
+		$pass = 0;
+                $self->problem(2,"Ranges not in correct order. Strange ensembl genbank entry? Range: $rangestr");
+                last;
+	    }
+    }
+    
+    if (!$pass) {
+        # sometimes (eg ensembl flavour genbank files)
+        # exons on reverse strand listed in reverse order
+        # eg join(complement(R1),...,complement(Rn))
+        # where R1 > R2
+        for (my $i=1; $i<@ranges;$i++) {
+            my $rangeP = $ranges[$i-1];
+            my $range = $ranges[$i];
+	    if ($rangeP->end < $range->start) {
+                $self->problem(3,"inconsistent order. Range: $rangestr");
+                return 0;
+	    }
+        }
+    }
+    return 1; # pass
+}
+
+# PRIVATE METHOD: _locstr($sf)
+#
+# returns a location string for a feature; just the outer boundaries
+sub _locstr {
+    my $self = shift;
+    my $sf = shift;
+    return
+      sprintf("%d..%d", $sf->start, $sf->end);
+}
+
+sub iterate_containment_tree {
+    my $self = shift;
+    my $feature_holder = shift;
+    my $sub = shift;
+    $sub->($feature_holder);
+    my @sfs = $feature_holder->get_SeqFeatures;
+    $self->iterate_containment_tree($_) foreach @sfs;
+}
+
+sub find_best_pairs {
+    my $matrix = shift;
+    my $size = shift;
+    my $i = shift || 0;
+
+    for (my $j=0; $j < $size; $j++) {
+	my $score = $matrix->[$i][$j];
+	if (!defined($score)) {
+	    next;
+	}
+	
+    }
+    
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/TypedSeqFeatureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/TypedSeqFeatureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeature/TypedSeqFeatureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,114 @@
+
+#
+# BioPerl module for Bio::SeqFeature::OntologyTypedI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::TypedSeqFeatureI - a strongly typed SeqFeature
+
+=head1 SYNOPSIS
+
+
+   # get Sequence Features in some manner, eg
+   # from a Sequence object
+
+    foreach $sf ( $seq->get_SeqFeatures() ) {
+        # all sequence features must have primary_tag() return a string
+        $type_as_string = $sf->primary_tag();
+	if( $sf->isa("Bio::SeqFeature::TypedSeqFeatureI") ) {
+            $ot = $sf->ontology_term();
+            print "Ontology identifier:",$ot->identifier(),
+                  " name:",$ot->name(),
+                  " Description:",$ot->description(),"\n";
+
+        } else {
+            print "Sequence Feature does not have an ontology type\n";
+	}
+
+    }
+
+=head1 DESCRIPTION
+
+This interface describes the extension of SeqFeatureI 
+to being a strongly typed SeqFeature.
+
+Bio::SeqFeature::TypedSeqFeatureI extends the Bio::SeqFeatureI
+interface (ie, a TypedSeqFeatureI feature must also implement
+all the Bio::SeqFeatureI interface as well). 
+
+It is suggested that the primary_tag() method of SeqFeatureI
+return the same as the ontology_term()-E<gt>name() of the OntologyTypedI
+(ie, the "string" name of the ontology type is used as the primary
+tag), but this should not be assummed by client code as they
+are scenarios where one would like to maintain the difference.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email - please email the BioPerl mailing list above.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::TypedSeqFeatureI;
+
+use strict;
+use Carp;
+use Bio::Root::RootI;
+
+use base qw(Bio::SeqFeatureI);
+
+
+=head2 ontology_term
+
+  Title   : ontology_term
+  Usage   : my $ot = $seqfeature->ontology_term()
+  Returns : a Bio::Ontology::TermI compliant object
+  Args    : none
+  Status  : public
+
+This method returns the ontology term for a 
+strongly typed sequence feature. 
+
+=cut
+
+sub ontology_term {
+    shift->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeatureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeatureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqFeatureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,637 @@
+# $Id: SeqFeatureI.pm,v 1.66.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::SeqFeatureI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeatureI - Abstract interface of a Sequence Feature
+
+=head1 SYNOPSIS
+
+    # get a seqfeature somehow, eg, from a Sequence with Features attached
+
+    foreach $feat ( $seq->get_SeqFeatures() ) {
+       print "Feature from ", $feat->start, "to ",
+	       $feat->end, " Primary tag  ", $feat->primary_tag,
+	          ", produced by ", $feat->source_tag(), "\n";
+
+       if( $feat->strand == 0 ) {
+		    print "Feature applicable to either strand\n";
+       } else {
+          print "Feature on strand ", $feat->strand,"\n"; # -1,1
+       }
+
+       print "feature location is ",$feat->start, "..",
+          $feat->end, " on strand ", $feat->strand, "\n";
+       print "easy utility to print locations in GenBank/EMBL way ",
+          $feat->location->to_FTstring(), "\n";
+
+       foreach $tag ( $feat->get_all_tags() ) {
+		    print "Feature has tag ", $tag, " with values, ",
+		      join(' ',$feat->get_tag_values($tag)), "\n";
+       }
+	    print "new feature\n" if $feat->has_tag('new');
+	    # features can have sub features
+	    my @subfeat = $feat->get_SeqFeatures();
+	 }
+
+=head1 DESCRIPTION
+
+This interface is the functions one can expect for any Sequence
+Feature, whatever its implementation or whether it is a more complex
+type (eg, a Gene). This object does not actually provide any
+implemention, it just provides the definitions of what methods one can
+call. See Bio::SeqFeature::Generic for a good standard implementation
+of this object
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeatureI;
+use vars qw($HasInMemory);
+use strict;
+
+BEGIN {
+    eval { require Bio::DB::InMemoryCache };
+    if( $@ ) { $HasInMemory = 0 }
+    else { $HasInMemory = 1 }
+}
+
+use Bio::Seq;
+
+use Carp;
+
+use base qw(Bio::RangeI Bio::AnnotatableI);
+
+=head1 Bio::SeqFeatureI specific methods
+
+New method interfaces.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   : @feats = $feat->get_SeqFeatures();
+ Function: Returns an array of sub Sequence Features
+ Returns : An array
+ Args    : none
+
+=cut
+
+sub get_SeqFeatures{
+   my ($self, at args) = @_;
+
+   $self->throw_not_implemented();
+}
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $name = $feat->display_name()
+ Function: Returns the human-readable name of the feature for displays.
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub display_name {
+    shift->throw_not_implemented();
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+ Function: Returns the primary tag for a feature,
+           eg 'exon'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub primary_tag{
+   my ($self, at args) = @_;
+
+   $self->throw_not_implemented();
+
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+ Function: Returns the source tag for a feature,
+           eg, 'genscan'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub source_tag{
+   my ($self, at args) = @_;
+
+   $self->throw_not_implemented();
+}
+
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+
+           Note that it is not guaranteed that if you obtain a feature from
+           an object in bioperl, it will have a sequence attached. Also,
+           implementors of this interface can choose to provide an empty
+           implementation of this method. I.e., there is also no guarantee
+           that if you do attach a sequence, seq() or entire_seq() will not
+           return undef.
+
+           The reason that this method is here on the interface is to enable
+           you to call it on every SeqFeatureI compliant object, and
+           that it will be implemented in a useful way and set to a useful
+           value for the great majority of use cases. Implementors who choose
+           to ignore the call are encouraged to specifically state this in
+           their documentation.
+
+ Example :
+ Returns : TRUE on success
+ Args    : a Bio::PrimarySeqI compliant object
+
+
+=cut
+
+sub attach_seq {
+    shift->throw_not_implemented();
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $tseq = $sf->seq()
+ Function: returns the truncated sequence (if there is a sequence attached)
+           for this feature
+ Example :
+ Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
+           bounded by start & end, or undef if there is no sequence attached
+ Args    : none
+
+
+=cut
+
+sub seq {
+    shift->throw_not_implemented();
+}
+
+=head2 entire_seq
+
+ Title   : entire_seq
+ Usage   : $whole_seq = $sf->entire_seq()
+ Function: gives the entire sequence that this seqfeature is attached to
+ Example :
+ Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
+           sequence attached
+ Args    : none
+
+
+=cut
+
+sub entire_seq {
+    shift->throw_not_implemented();
+}
+
+
+=head2 seq_id
+
+ Title   : seq_id
+ Usage   : $obj->seq_id($newval)
+ Function: There are many cases when you make a feature that you
+           do know the sequence name, but do not know its actual
+           sequence. This is an attribute such that you can store
+           the ID (e.g., display_id) of the sequence.
+
+           This attribute should *not* be used in GFF dumping, as
+           that should come from the collection in which the seq
+           feature was found.
+ Returns : value of seq_id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seq_id {
+    shift->throw_not_implemented();
+}
+
+=head2 gff_string
+
+ Title   : gff_string
+ Usage   : $str = $feat->gff_string;
+           $str = $feat->gff_string($gff_formatter);
+ Function: Provides the feature information in GFF format.
+
+           The implementation provided here returns GFF2 by default. If you
+           want a different version, supply an object implementing a method
+           gff_string() accepting a SeqFeatureI object as argument. E.g., to
+           obtain GFF1 format, do the following:
+
+                my $gffio = Bio::Tools::GFF->new(-gff_version => 1);
+                $gff1str = $feat->gff_string($gff1io);
+
+ Returns : A string
+ Args    : Optionally, an object implementing gff_string().
+
+
+=cut
+
+sub gff_string{
+   my ($self,$formatter) = @_;
+
+   $formatter = $self->_static_gff_formatter unless $formatter;
+   return $formatter->gff_string($self);
+}
+
+my $static_gff_formatter = undef;
+
+=head2 _static_gff_formatter
+
+ Title   : _static_gff_formatter
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _static_gff_formatter{
+   my ($self, at args) = @_;
+
+   if( !defined $static_gff_formatter ) {
+       $static_gff_formatter = Bio::Tools::GFF->new('-gff_version' => 2);
+   }
+   return $static_gff_formatter;
+}
+
+
+=head1 Decorating methods
+
+These methods have an implementation provided by Bio::SeqFeatureI,
+but can be validly overwritten by subclasses
+
+=head2 spliced_seq
+
+  Title   : spliced_seq
+
+  Usage   : $seq = $feature->spliced_seq()
+            $seq = $feature_with_remote_locations->spliced_seq($db_for_seqs)
+
+  Function: Provides a sequence of the feature which is the most
+            semantically "relevant" feature for this sequence. A default
+            implementation is provided which for simple cases returns just
+            the sequence, but for split cases, loops over the split location
+            to return the sequence. In the case of split locations with
+            remote locations, eg
+
+            join(AB000123:5567-5589,80..1144)
+
+            in the case when a database object is passed in, it will attempt
+            to retrieve the sequence from the database object, and "Do the right thing",
+            however if no database object is provided, it will generate the correct
+            number of N's (DNA) or X's (protein, though this is unlikely).
+
+            This function is deliberately "magical" attempting to second guess
+            what a user wants as "the" sequence for this feature.
+
+            Implementing classes are free to override this method with their
+            own magic if they have a better idea what the user wants.
+
+  Args    : [optional]
+            -db        A L<Bio::DB::RandomAccessI> compliant object if
+                       one needs to retrieve remote seqs.
+            -nosort    boolean if the locations should not be sorted
+                       by start location.  This may occur, for instance,
+                       in a circular sequence where a gene span starts
+                       before the end of the sequence and ends after the
+                       sequence start. Example : join(15685..16260,1..207)
+  Returns : A L<Bio::PrimarySeqI> object
+
+=cut
+
+sub spliced_seq {
+    my $self = shift;
+	my @args = @_;
+	my ($db,$nosort) = $self->_rearrange([qw(DB NOSORT)], @args);
+
+	# (added 7/7/06 to allow use old API (with warnings)
+	my $old_api = (!(grep {$_ =~ /(?:nosort|db)/} @args)) ? 1 : 0;
+	if (@args && $old_api) {
+		$self->warn(q(API has changed; please use '-db' or '-nosort' ).
+                     qq(for args. See POD for more details.));
+		$db = shift @args if @args;
+		$nosort = shift @args if @args;
+	};
+
+	if( $db && ref($db) && ! $db->isa('Bio::DB::RandomAccessI') ) {
+        $self->warn("Must pass in a valid Bio::DB::RandomAccessI object".
+                    " for access to remote locations for spliced_seq");
+        $db = undef;
+    } elsif( defined $db && $HasInMemory &&
+            $db->isa('Bio::DB::InMemoryCache') ) {
+        $db = new Bio::DB::InMemoryCache(-seqdb => $db);
+    }
+
+    if( ! $self->location->isa("Bio::Location::SplitLocationI") ) {
+	return $self->seq(); # nice and easy!
+    }
+
+    # redundant test, but the above ISA is probably not ideal.
+    if( ! $self->location->isa("Bio::Location::SplitLocationI") ) {
+	$self->throw("not atomic, not split, yikes, in trouble!");
+    }
+
+    my $seqstr = '';
+    my $seqid = $self->entire_seq->display_id;
+    # This is to deal with reverse strand features
+    # so we are really sorting features 5' -> 3' on their strand
+    # i.e. rev strand features will be sorted largest to smallest
+    # as this how revcom CDSes seem to be annotated in genbank.
+    # Might need to eventually allow this to be programable?
+    # (can I mention how much fun this is NOT! --jason)
+
+    my ($mixed,$mixedloc, $fstrand) = (0);
+
+    if( $self->isa('Bio::Das::SegmentI') &&
+	! $self->absolute ) {
+	$self->warn("Calling spliced_seq with a Bio::Das::SegmentI which does have absolute set to 1 -- be warned you may not be getting things on the correct strand");
+    }
+
+    my @locset = $self->location->each_Location;
+    my @locs;
+    if( ! $nosort ) {
+	@locs = map { $_->[0] }
+	# sort so that most negative is first basically to order
+	# the features on the opposite strand 5'->3' on their strand
+	# rather than they way most are input which is on the fwd strand
+
+	sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation
+	map {
+	    $fstrand = $_->strand unless defined $fstrand;
+	    $mixed = 1 if defined $_->strand && $fstrand != $_->strand;
+	    if( defined $_->seq_id ) {
+		$mixedloc = 1 if( $_->seq_id ne $seqid );
+	    }
+	    [ $_, $_->start * ($_->strand || 1)];
+	} @locset;
+
+	if ( $mixed ) {
+	    $self->warn("Mixed strand locations, spliced seq using the input order rather than trying to sort");
+	    @locs = @locset;
+	}
+    } else {
+	# use the original order instead of trying to sort
+	@locs = @locset;
+	$fstrand = $locs[0]->strand;
+    }
+
+    foreach my $loc ( @locs ) {
+	if( ! $loc->isa("Bio::Location::Atomic") ) {
+	    $self->throw("Can only deal with one level deep locations");
+	}
+	my $called_seq;
+	if( $fstrand != $loc->strand ) {
+	    $self->warn("feature strand is different from location strand!");
+	}
+	# deal with remote sequences
+
+	if( defined $loc->seq_id &&
+	    $loc->seq_id ne $seqid ) {
+	    if( defined $db ) {
+		my $sid = $loc->seq_id;
+		$sid =~ s/\.\d+$//g;
+		eval {
+		    $called_seq = $db->get_Seq_by_acc($sid);
+		};
+		if( $@ ) {
+		    $self->warn("In attempting to join a remote location, sequence $sid was not in database. Will provide padding N's. Full exception \n\n$@");
+		    $called_seq = undef;
+		}
+	    } else {
+		$self->warn( "cannot get remote location for ".$loc->seq_id ." without a valid Bio::DB::RandomAccessI database handle (like Bio::DB::GenBank)");
+		$called_seq = undef;
+	    }
+	    if( !defined $called_seq ) {
+		$seqstr .= 'N' x $self->length;
+		next;
+	    }
+	} else {
+	    $called_seq = $self->entire_seq;
+	}
+
+    # does the called sequence make sense? Bug 1780
+    if ($called_seq->length < $loc->end) {
+        my $accession = $called_seq->accession;
+        my $end = $loc->end;
+        my $length = $called_seq->length;
+        my $orig_id = $self->seq_id; # originating sequence
+        my ($locus) = $self->get_tagset_values("locus_tag");
+        $self->throw("Location end ($end) exceeds length ($length) of ".
+                     "called sequence $accession.\nCheck sequence version used in ".
+                     "$locus locus-tagged SeqFeature in $orig_id.");
+    }
+
+	if( $self->isa('Bio::Das::SegmentI') ) {
+	    my ($s,$e) = ($loc->start,$loc->end);
+	    $seqstr .= $called_seq->subseq($s,$e)->seq();
+	} else {
+	    # This is dumb, subseq should work on locations...
+	    if( $loc->strand == 1 ) {
+		$seqstr .= $called_seq->subseq($loc->start,$loc->end);
+	    } else {
+		if( $nosort ) {
+		    $seqstr = $called_seq->trunc($loc->start,$loc->end)->revcom->seq() . $seqstr;
+		} else {
+		    $seqstr .= $called_seq->trunc($loc->start,$loc->end)->revcom->seq();
+		}
+	    }
+	}
+    }
+    my $out = Bio::Seq->new( -id => $self->entire_seq->display_id
+			            . "_spliced_feat",
+			     -seq => $seqstr);
+
+    return $out;
+}
+
+=head2 location
+
+ Title   : location
+ Usage   : my $location = $seqfeature->location()
+ Function: returns a location object suitable for identifying location
+	   of feature on sequence or parent feature
+ Returns : Bio::LocationI object
+ Args    : none
+
+
+=cut
+
+sub location {
+   my ($self) = @_;
+
+   $self->throw_not_implemented();
+}
+
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $obj->primary_id($newval)
+ Function:
+ Example :
+ Returns : value of primary_id (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+Primary ID is a synonym for the tag 'ID'
+
+=cut
+
+sub primary_id{
+    my $self = shift;
+    # note from cjm at fruitfly.org:
+    # I have commented out the following 2 lines:
+
+    #return $self->{'primary_id'} = shift if @_;
+    #return $self->{'primary_id'};
+
+    #... and replaced it with the following; see
+    # http://bioperl.org/pipermail/bioperl-l/2003-December/014150.html
+    # for the discussion that lead to this change
+
+    if (@_) {
+        if ($self->has_tag('ID')) {
+            $self->remove_tag('ID');
+        }
+        $self->add_tag_value('ID', shift);
+    }
+    my ($id) = $self->get_tagset_values('ID');
+    return $id;
+}
+
+sub generate_unique_persistent_id {
+    # DEPRECATED - us IDHandler
+    my $self = shift;
+    require "Bio/SeqFeature/Tools/IDHandler.pm";
+    Bio::SeqFeature::Tools::IDHandler->new->generate_unique_persistent_id($self);
+}
+
+=head1 Bio::RangeI methods
+
+These methods are inherited from RangeI and can be used
+directly from a SeqFeatureI interface. Remember that a
+SeqFeature is-a RangeI, and so wherever you see RangeI you
+can use a feature ($r in the below documentation).
+
+=cut
+
+=head2 start()
+
+ See L<Bio::RangeI>
+
+=head2 end()
+
+ See L<Bio::RangeI>
+
+=head2 strand()
+
+ See L<Bio::RangeI>
+
+=head2 overlaps()
+
+ See L<Bio::RangeI>
+
+=head2 contains()
+
+ See L<Bio::RangeI>
+
+=head2 equals()
+
+ See L<Bio::RangeI>
+
+=head2 intersection()
+
+ See L<Bio::RangeI>
+
+=head2 union()
+
+ See L<Bio::RangeI>
+
+=head1 Bio::AnnotatableI methods
+
+=cut
+
+=head2 has_tag()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=head2 remove_tag()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=head2 add_tag_value()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=head2 get_tag_values()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=head2 get_tagset_values()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=head2 get_all_tags()
+
+ B<Deprecated>.  See L<Bio::AnnotatableI>
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+# $Id: SeqI.pm,v 1.29.4.4 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::SeqI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqI [Developers] - Abstract Interface of Sequence (with features)
+
+=head1 SYNOPSIS
+
+    # Bio::SeqI is the interface class for sequences.
+
+    # If you are a newcomer to bioperl, you should
+    # start with Bio::Seq documentation. This
+    # documentation is mainly for developers using
+    # Bioperl.
+
+    # Bio::SeqI implements Bio::PrimarySeqI
+    $seq      = $seqobj->seq(); # actual sequence as a string
+    $seqstr   = $seqobj->subseq(10,50);
+
+    # Bio::SeqI has annotationcollections
+
+    $ann      = $seqobj->annotation(); # annotation object
+
+    # Bio::SeqI has sequence features
+    # features must implement Bio::SeqFeatureI
+
+    @features = $seqobj->get_SeqFeatures(); # just top level
+    @features = $seqobj->get_all_SeqFeatures(); # descend into sub features
+
+
+
+=head1 DESCRIPTION
+
+Bio::SeqI is the abstract interface of annotated Sequences. These
+methods are those which you can be guarenteed to get for any Bio::SeqI
+- for most users of the package the documentation (and methods) in
+this class are not at useful - this is a developers only class which
+defines what methods have to be implmented by other Perl objects to
+comply to the Bio::SeqI interface. Go "perldoc Bio::Seq" or "man
+Bio::Seq" for more information.
+
+
+There aren't many here, because too many complicated functions here
+prevent implementations which are just wrappers around a database or
+similar delayed mechanisms.
+
+Most of the clever stuff happens inside the SeqFeatureI system.
+
+A good reference implementation is Bio::Seq which is a pure perl
+implementation of this class with alot of extra pieces for extra
+manipulation.  However, if you want to be able to use any sequence
+object in your analysis, if you can do it just using these methods,
+then you know you will be future proof and compatible with other
+implementations of Seq.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::SeqI;
+use strict;
+
+
+# Object preamble - inheriets from Bio::PrimarySeqI
+
+use base qw(Bio::PrimarySeqI Bio::AnnotatableI Bio::FeatureHolderI);
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   : my @feats = $seq->get_SeqFeatures();
+ Function: retrieve just the toplevel sequence features attached to this seq
+ Returns : array of Bio::SeqFeatureI objects
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> and L<Bio::SeqFeatureI> for more information.
+
+=cut
+
+=head2 get_all_SeqFeatures
+
+ Title   : get_all_SeqFeatures
+ Usage   : @features = $annseq->get_all_SeqFeatures()
+ Function: returns all SeqFeatures, included sub SeqFeatures
+ Returns : an array of Bio::SeqFeatureI objects
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> and L<Bio::SeqFeatureI> for more information.
+
+=cut
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $seq->feature_count()
+ Function: Return the number of SeqFeatures attached to a sequence
+ Returns : integer representing the number of SeqFeatures
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> for more information.
+
+=cut
+
+=head2 seq
+
+ Title   : seq
+ Usage   : my $string = $seq->seq();
+ Function: Retrieves the sequence string for the sequence object
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub seq{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write_GFF
+
+ Title   : write_GFF
+ Usage   : $seq->write_GFF(\*FILEHANDLE);
+ Function: Convience method to write out all the sequence features
+           in GFF format to the provided filehandle (STDOUT by default)
+ Returns : none
+ Args    : [optional] filehandle to write to (default is STDOUT)
+
+
+=cut
+
+sub write_GFF{
+   my ($self,$fh) = @_;
+
+   $fh || do { $fh = \*STDOUT; };
+
+   foreach my $sf ( $self->get_all_SeqFeatures() ) {
+       print $fh $sf->gff_string, "\n";
+   }
+
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($seq_obj)
+ Function: retrieve the attached annotation object
+ Returns : Bio::AnnotationCollectionI or none;
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information. This method comes through extension from
+L<Bio::AnnotatableI>.
+
+=cut
+
+=head2 species
+
+ Title   : species
+ Usage   :
+ Function: Gets or sets the species
+ Example : $species = $self->species();
+ Returns : Bio::Species object
+ Args    : Bio::Species object or none;
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $obj->primary_seq($newval)
+ Function: Retrieve the underlying Bio::PrimarySeqI object if available.
+           This is in the event one has a sequence with lots of features
+           but want to be able to narrow the object to just one with
+           the basics of a sequence (no features or annotations).
+ Returns : Bio::PrimarySeqI
+ Args    : Bio::PrimarySeqI or none;
+
+See L<Bio::PrimarySeqI> for more information
+
+=cut
+
+sub primary_seq {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/FTHelper.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/FTHelper.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/FTHelper.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,314 @@
+# $Id: FTHelper.pm,v 1.61.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::FTHelper
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::FTHelper - Helper class for Embl/Genbank feature tables
+
+=head1 SYNOPSIS
+
+Used by Bio::SeqIO::EMBL,Bio::SeqIO::genbank, and Bio::SeqIO::swiss to
+help process the Feature Table
+
+=head1 DESCRIPTION
+
+Represents one particular Feature with the following fields
+
+      key - the key of the feature
+      loc - the location string of the feature
+      <other fields> - other fields
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqIO::FTHelper;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::Location::Simple;
+use Bio::Location::Fuzzy;
+use Bio::Location::Split;
+
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+
+    # no chained new because we make lots and lots of these.
+    my $self = {};
+    bless $self,$class;
+    $self->{'_field'} = {};
+    return $self;
+}
+
+=head2 _generic_seqfeature
+
+ Title   : _generic_seqfeature
+ Usage   : $fthelper->_generic_seqfeature($annseq, "GenBank")
+ Function: processes fthelper into a generic seqfeature
+ Returns : TRUE on success and otherwise FALSE
+ Args    : The Bio::Factory::LocationFactoryI object to use for parsing
+           location strings. The ID (e.g., display_id) of the sequence on which
+           this feature is located, optionally a string indicating the source
+           (GenBank/EMBL/SwissProt)
+
+
+=cut
+
+sub _generic_seqfeature {
+    my ($fth, $locfac, $seqid, $source) = @_;
+    my ($sf);
+
+    # set a default if not specified
+    if(! defined($source)) {
+	$source = "EMBL/GenBank/SwissProt";
+    }
+
+    # initialize feature object
+    $sf = Bio::SeqFeature::Generic->direct_new();
+
+    # parse location; this may cause an exception, in which case we gently
+    # recover and ignore this feature
+
+
+    my $loc;
+    eval {
+	$loc = $locfac->from_string($fth->loc);
+    };
+
+    if(! $loc) {
+	  $fth->warn("exception while parsing location line [" . $fth->loc .
+		      "] in reading $source, ignoring feature " .
+		      $fth->key() . " (seqid=" . $seqid . "): " . $@);
+	  return;
+    }
+
+    # set additional location attributes
+    if($seqid && (! $loc->is_remote())) {
+	$loc->seq_id($seqid); # propagates if it is a split location
+    }
+
+
+    # set attributes of feature
+    $sf->location($loc);
+    $sf->primary_tag($fth->key);
+    $sf->source_tag($source);
+    $sf->seq_id($seqid);
+    foreach my $key ( keys %{$fth->field} ){
+	foreach my $value ( @{$fth->field->{$key}} ) {
+	    $sf->add_tag_value($key,$value);
+	}
+    }
+    return $sf;
+}
+
+
+=head2 from_SeqFeature
+
+ Title   : from_SeqFeature
+ Usage   : @fthelperlist = Bio::SeqIO::FTHelper::from_SeqFeature($sf,
+						     $context_annseq);
+ Function: constructor of fthelpers from SeqFeatures
+         :
+         : The additional annseq argument is to allow the building of FTHelper
+         : lines relevant to particular sequences (ie, when features are spread over
+         : enteries, knowing how to build this)
+ Returns : an array of FThelpers
+ Args    : seq features
+
+
+=cut
+
+sub from_SeqFeature {
+  my ($sf, $context_annseq) = @_;
+  my @ret;
+
+  #
+  # If this object knows how to make FThelpers, then let it
+  # - this allows us to store *really* weird objects that can write
+  # themselves to the EMBL/GenBank...
+  #
+
+  if ( $sf->can("to_FTHelper") ) {
+	return $sf->to_FTHelper($context_annseq);
+  }
+
+  my $fth = Bio::SeqIO::FTHelper->new();
+  my $key = $sf->primary_tag();
+  my $locstr = $sf->location->to_FTstring;
+
+  # ES 25/06/01 Commented out this code, Jason to double check
+  #The location FT string for all simple subseqfeatures is already
+  #in the Split location FT string
+
+  # going into sub features
+  #foreach my $sub ( $sf->sub_SeqFeature() ) {
+  #my @subfth = &Bio::SeqIO::FTHelper::from_SeqFeature($sub);
+  #push(@ret, @subfth);
+  #}
+
+  $fth->loc($locstr);
+  $fth->key($key);
+  $fth->field->{'note'} = [];
+  #$sf->source_tag && do { push(@{$fth->field->{'note'}},"source=" . $sf->source_tag ); };
+
+  ($sf->can('score') && $sf->score) && do { push(@{$fth->field->{'note'}},
+                                                 "score=" . $sf->score ); };
+  ($sf->can('frame') && $sf->frame) && do { push(@{$fth->field->{'note'}},
+                                                 "frame=" . $sf->frame ); };
+  #$sf->strand && do { push(@{$fth->field->{'note'}},"strand=" . $sf->strand ); };
+
+  foreach my $tag ( $sf->get_all_tags ) {
+    # Tags which begin with underscores are considered
+    # private, and are therefore not printed
+    next if $tag =~ /^_/;
+	if ( !defined $fth->field->{$tag} ) {
+      $fth->field->{$tag} = [];
+	}
+	foreach my $val ( $sf->get_Annotations($tag) ) {
+      push(@{$fth->field->{$tag}},$val);
+	}
+  }
+  push(@ret, $fth);
+
+  unless (@ret) {
+	$context_annseq->throw("Problem in processing seqfeature $sf - no fthelpers. Error!");
+  }
+  foreach my $ft (@ret) {
+	if ( !$ft->isa('Bio::SeqIO::FTHelper') ) {
+      $sf->throw("Problem in processing seqfeature $sf - made a $fth!");
+	}
+  }
+
+  return @ret;
+}
+
+
+=head2 key
+
+ Title   : key
+ Usage   : $obj->key($newval)
+ Function:
+ Example :
+ Returns : value of key
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub key {
+   my ($obj, $value) = @_;
+   if ( defined $value ) {
+      $obj->{'key'} = $value;
+    }
+    return $obj->{'key'};
+
+}
+
+=head2 loc
+
+ Title   : loc
+ Usage   : $obj->loc($newval)
+ Function:
+ Example :
+ Returns : value of loc
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub loc {
+   my ($obj, $value) = @_;
+   if ( defined $value ) {
+      $obj->{'loc'} = $value;
+    }
+    return $obj->{'loc'};
+}
+
+
+=head2 field
+
+ Title   : field
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub field {
+   my ($self) = @_;
+
+   return $self->{'_field'};
+}
+
+=head2 add_field
+
+ Title   : add_field
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub add_field {
+   my ($self, $key, $val) = @_;
+
+   if ( !exists $self->field->{$key} ) {
+       $self->field->{$key} = [];
+   }
+   push( @{$self->field->{$key}} , $val);
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/MultiFile.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/MultiFile.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/MultiFile.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+# $Id: MultiFile.pm,v 1.12.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::MultiFile
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::MultiFile - Treating a set of files as a single input stream
+
+=head1 SYNOPSIS
+
+   $seqin = Bio::SeqIO::MultiFile( '-format' => 'Fasta',
+                                   '-files'  => ['file1','file2'] );
+   while((my $seq = $seqin->next_seq)) {
+       # do something with $seq
+   }
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::MultiFile provides a simple way of bundling a whole
+set of identically formatted sequence input files as a single stream.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqIO::MultiFile;
+use strict;
+
+use base qw(Bio::SeqIO);
+
+
+# _initialize is where the heavy stuff will happen when new is called
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+
+  my ($file_array,$format) = $self->_rearrange([qw(
+					 FILES
+					 FORMAT
+					)],
+				     @args,
+				     );
+  if( !defined $file_array || ! ref $file_array ) {
+      $self->throw("Must have an array files for MultiFile");
+  }
+
+  if( !defined $format ) {
+      $self->throw("Must have a format for MultiFile");
+  }
+
+  $self->{'_file_array'} = [];
+
+  $self->_set_file(@$file_array);
+  $self->_format($format);
+  if( $self->_load_file() == 0 ) {
+     $self->throw("Unable even to initialise the first file");
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub next_seq{
+   my ($self, at args) = @_;
+
+   my $seq = $self->_current_seqio->next_seq();
+   if( !defined $seq ) {
+       if( $self->_load_file() == 0) {
+	   return;
+       } else {
+	   return $self->next_seq();
+       }
+   } else {
+       return $seq;
+   }
+
+}
+
+=head2 next_primary_seq
+
+ Title   : next_primary_seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub next_primary_seq{
+   my ($self, at args) = @_;
+
+   my $seq = $self->_current_seqio->next_primary_seq();
+   if( !defined $seq ) {
+       if( $self->_load_file() == 0) {
+	   return;
+       } else {
+	   return $self->next_primary_seq();
+       }
+   } else {
+       return $seq;
+   }
+
+}
+
+=head2 _load_file
+
+ Title   : _load_file
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _load_file{
+   my ($self, at args) = @_;
+
+   my $file = shift(@{$self->{'_file_array'}});
+   if( !defined $file ) {
+       return 0;
+   }
+   my $seqio = Bio::SeqIO->new( '-format' => $self->_format(), -file => $file);
+   # should throw an exception - but if not...
+   if( !defined $seqio) {
+       $self->throw("no seqio built for $file!");
+   }
+
+   $self->_current_seqio($seqio);
+   return 1;
+}
+
+=head2 _set_file
+
+ Title   : _set_file
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _set_file{
+   my ($self, at files) = @_;
+
+   push(@{$self->{'_file_array'}}, at files);
+
+}
+
+=head2 _current_seqio
+
+ Title   : _current_seqio
+ Usage   : $obj->_current_seqio($newval)
+ Function:
+ Example :
+ Returns : value of _current_seqio
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _current_seqio{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_current_seqio'} = $value;
+    }
+    return $obj->{'_current_seqio'};
+
+}
+
+=head2 _format
+
+ Title   : _format
+ Usage   : $obj->_format($newval)
+ Function:
+ Example :
+ Returns : value of _format
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _format{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_format'} = $value;
+    }
+    return $obj->{'_format'};
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/abi.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/abi.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/abi.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: abi.pm,v 1.14.4.1 2006/10/02 23:10:28 sendu Exp $
+# BioPerl module for Bio::SeqIO::abi
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::abi - abi trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from abi trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::abi;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::Quality'));
+  }
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'abi');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'abi');
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ace.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ace.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ace.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,192 @@
+# $Id: ace.pm,v 1.17.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::ace
+#
+# Cared for by James Gilbert <jgrg at sanger.ac.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::ace - ace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and
+from ace file format.  It only parses a DNA or
+Peptide objects contained in the ace file,
+producing PrimarySeq objects from them.  All
+other objects in the files will be ignored.  It
+doesn't attempt to parse any annotation attatched
+to the containing Sequence or Protein objects,
+which would probably be impossible, since
+everyone's ACeDB schema can be different.
+
+It won't parse ace files containing Timestamps
+correctly either.  This can easily be added if
+considered necessary.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - James Gilbert
+
+Email: jgrg at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+package Bio::SeqIO::ace;
+use strict;
+
+use Bio::Seq;
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+{
+    my %bio_mol_type = (
+        'dna'       => 'dna',
+        'peptide'   => 'protein',
+    );
+
+    sub next_seq {
+        my( $self ) = @_;
+        local $/ = "";  # Split input on blank lines
+
+        my $fh = $self->_filehandle;
+        my( $type, $id );
+        while (<$fh>) {
+            if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) {
+                s/^.+$//m;  # Remove first line
+                s/\s+//g;   # Remove whitespace
+                last;
+            }
+        }
+        # Return if there weren't any DNA or peptide objects
+        return unless $type;
+
+        # Choose the molecule type
+        my $mol_type = $bio_mol_type{lc $type}
+            or $self->throw("Can't get Bio::Seq molecule type for '$type'");
+
+        # Remove quotes from $id
+        $id =~ s/^"|"$//g;
+
+        # Un-escape forward slashes, double quotes, percent signs,
+        # semi-colons, tabs, and backslashes (if you're mad enough
+        # to have any of these as part of object names in your acedb
+        # database).
+	$id =~ s/\\([\/"%;\t\\])/$1/g;
+#"
+	# Called as next_seq(), so give back a Bio::Seq
+	return $self->sequence_factory->create(
+					       -seq        => $_,
+					       -primary_id => $id,
+					       -display_id => $id,
+					       -alphabet    => $mol_type,
+					       );
+    }
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object(s)
+
+
+=cut
+
+sub write_seq {
+    my ($self, @seq) = @_;
+
+    foreach my $seq (@seq) {
+	$self->throw("Did not provide a valid Bio::PrimarySeqI object")
+	    unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+        my $mol_type = $seq->alphabet;
+        my $id = $seq->display_id;
+
+        # Escape special charachers in id
+        $id =~ s/([\/"%;\t\\])/\\$1/g;
+#"
+        # Print header for DNA or Protein object
+        if ($mol_type eq 'dna') {
+            $self->_print(
+                qq{\nSequence : "$id"\nDNA "$id"\n},
+                qq{\nDNA : "$id"\n},
+            );
+        }
+        elsif ($mol_type eq 'protein') {
+            $self->_print(
+                qq{\nProtein : "$id"\nPeptide "$id"\n},
+                qq{\nPeptide : "$id"\n},
+            );
+        }
+        else {
+            $self->throw("Don't know how to produce ACeDB output for '$mol_type'");
+        }
+
+        # Print the sequence
+        my $str = $seq->seq;
+        my( $formatted_seq );
+        while ($str =~ /(.{1,60})/g) {
+            $formatted_seq .= "$1\n";
+        }
+        $self->_print($formatted_seq, "\n");
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/agave.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/agave.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/agave.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1782 @@
+# BioPerl module: Bio::SeqIO::agave
+#
+# AGAVE: Architecture for Genomic Annotation, Visualization and Exchange.
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+#
+# The original version of the module can be found here:
+# http://www.lifecde.com/products/agave/agave.pm
+#
+# The DTD for AGAVE XML can be located here:
+# http://www.lifecde.com/products/agave/schema/v2_3/agave.dtd
+#
+#
+=head1 NAME
+
+Bio::SeqIO::agave - AGAVE sequence output stream.
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler system. Go:
+
+  $in  = Bio::SeqIO->new('-file'   => "$file_in",
+                         '-format' => 'EMBL');
+
+  $out = Bio::SeqIO->new('-file'   => ">$file_out",
+                         '-format' => 'AGAVE');
+
+  while (my $seq = $in->next_seq){
+        $out->write_seq($seq);
+  }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to agave xml file and
+vice-versa.  I (Simon) coded up this module because I needed a parser
+to extract data from AGAVE xml to be utitlized by the GenQuire genome
+annotation system (See http://www.bioinformatics.org/Genquire).
+
+***NOTE*** At the moment, not all of the tags are implemented.  In
+general, I followed the output format for the XEMBL project
+http://www.ebi.ac.uk/xembl/
+
+=cut
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Simon K. Chan
+
+Email:
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# ===================
+
+
+# Let the code begin...
+package Bio::SeqIO::agave;
+use strict;
+
+use IO::File;
+
+
+use lib '/home/skchan/checkout/bioperl-live';
+use Bio::SeqFeature::Generic;
+use Bio::Seq;
+use Bio::PrimarySeq;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Reference;
+use Bio::Species;
+
+use lib '/home/skchan/gq/BIO_SUPPORT/XML-Writer/XML-Writer-0.510/blib/lib';
+use XML::Writer;
+
+use Data::Dumper;
+
+use base qw(Bio::SeqIO);
+
+# ==================================================================================
+sub _initialize {
+
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args); # Run the constructor of the parent class.
+
+    my %tmp = @args ;
+    $self->{'file'} = $tmp{'-file'};
+
+    if ($self->{'file'} !~ /^>/) {
+        $self->_process;
+        # Parse the thing, but only if it is the input file (ie not
+        # outputing agave file, but reading it).
+        $self->{'parsed'} = 1;
+        # Set the flag to let the code know that the agave xml file
+        # has been parsed.
+    }
+    $self->{'seqs_stored'} = 0;
+
+}
+# ==================================================================================
+
+=head2 _process
+
+  Title    : _process
+  Usage    : $self->_process
+  Function : Parses the agave xml file.
+  Args     : None.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : _initialize
+             Method(s) that this method calls   : _process_sciobj
+             FIRST/START sub.
+
+=cut
+
+sub _process {
+    my ($self) = @_;
+
+    while (1) {
+
+        my $line = $self->_readline;
+        next unless $line;
+        next if $line =~ /^\s*$/;
+
+        if ($line =~ /<\?xml version/o) {
+
+            # do nothing
+
+        } elsif ($line =~ /\<!DOCTYPE (\w+) SYSTEM "([\w\.]+)"\>/) {
+
+            $self->throw("Error: This xml file is not in AGAVE format! DOCTYPE: $1 , SYSTEM: $2\n\n")
+                if $1 ne 'sciobj' || $2 ne 'sciobj.dtd';
+
+        } elsif ($line =~ /<sciobj (.*)>/) {
+
+            push @{$self->{'sciobj'}}, $self->_process_sciobj($1);
+
+        } elsif ($line =~ /<\/sciobj>/) {
+
+            last;               # It is finished.
+
+        } else {
+
+            # throw an error message.  The above conditions should
+            # take care all of the possible options...?
+            # $self->throw("Error: Do not recognize this AGAVE xml
+            # line: $line\n\n");
+
+        }
+
+
+    }                           # close while loop
+
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _process_sciobj
+
+  Title    : _process_sciobj
+  Usage    : $self->_process_sciobj
+  Function : Parses the data between the <sciobj></sciobj> tags.
+  Args     : The string that holds the attributes for <sciobj>.
+  Returns  : Data structure holding the values parsed between
+             the <sciobj></sciobj> tags.
+  Note     : Method(s) that call(s) this method : _process
+             Method(s) that this method calls   :
+             _helper_store_attribute_list , _process_contig
+
+=cut
+
+sub _process_sciobj {
+
+    my ($self, $attribute_line) = @_;
+    my $sciobj;
+    $self->_helper_store_attribute_list($attribute_line, \$sciobj);
+
+    my $line = $self->_readline;
+
+    # Zero or more <contig>
+    while ($line =~ /<contig\s?(.*?)\s?>/) {
+        my $contig = $self->_process_contig(\$line, $1);
+        push @{$sciobj->{'contig'}}, $contig;
+        # print "line in _process_sciobj: $line\n";
+        # $line changes value within the subs called in this sub (_process_contig).
+    }
+
+    return $sciobj;
+}
+# ==================================================================================
+
+=head2 _process_contig
+
+  Title    : _process_contig
+  Usage    : $self->_process_contig
+  Function : Parses the data between the <contig></contig> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the line to be parsed.
+             - scalar holding the attributes for the <contig> tag
+               to be parsed.
+  Returns  : Data structure holding the values parsed between
+             the <contig></contig> tags.
+  Note     : Method(s) that call(s) this method : _process_sciobj
+             Method(s) that this method calls   :
+             _helper_store_attribute_list, _one_tag , _process_fragment_order
+
+=cut
+
+sub _process_contig {
+
+    my ($self, $line, $attribute_line) = @_;
+
+    my $contig;
+    $self->_helper_store_attribute_list($attribute_line, \$contig);
+    $$line = $self->_readline;
+
+    # One <db_id>:
+    $self->_one_tag($line, \$contig, 'db_id');
+
+
+    # Zero or more <fragment_order>
+    $self->_process_fragment_order($line, \$contig);
+
+    return $contig;
+
+}
+# ==================================================================================
+
+=head2 _process_fragment_order
+
+  Title    : _process_fragment_order
+  Usage    : $self->_process_fragment_order
+  Function : Parses the data between the <fragment_order></fragment_order> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the <fragment_order> data.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : _process_contig
+             Method(s) that this method calls   :
+             _helper_store_attribute_list , _process_fragment_orientation
+
+=cut
+
+sub _process_fragment_order {
+
+
+    my ($self, $line, $data_structure) = @_;
+    # Because I'm passing a reference to a data structure, I don't need to return it
+    # after values have been added.
+
+    while ($$line =~ /<fragment_order\s?(.*?)\s?>/) {
+
+        my $fragment_order;
+        $self->_helper_store_attribute_list($1, \$fragment_order);
+        # Store the attribute(s) for <fragment_order> into the
+        # $fragment_order data structure.
+        $$line = $self->_readline;
+
+        # One or more <fragment_orientation>
+        $self->_process_fragment_orientation($line, \$fragment_order);
+        # Don't forget: $line is a reference to a scalar.
+
+        push @{$$data_structure->{'fragment_order'}}, $fragment_order;
+        # Store the data between <fragment_order></fragment_order>
+        # in $$data_structure.
+
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _process_fragment_orientation
+
+  Title    : _process_fragment_orientation
+  Usage    : $self->_process_fragment_orientation
+  Function : Parses the data between the <fragment_orientation> and
+             </fragment_orientation> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the <fragment_orientation> data.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : _process_fragment_order
+
+Method(s) that this method calls : _helper_store_attribute_list ,
+_process_bio_sequence
+
+=cut
+
+sub _process_fragment_orientation {
+
+
+    my ($self, $line, $data_structure) = @_;
+
+    # counter to determine the number of iterations within this while loop.
+    my $count = 0;
+
+    # One or more <fragment_orientation>
+    while ($$line =~ /<fragment_orientation\s?(.*?)\s?>/) {
+
+        my $fragment_orientation;
+        $self->_helper_store_attribute_list($1, \$fragment_orientation);
+        $$line = $self->_readline;
+
+        # One <bio_sequence>
+        $$line =~ /<bio_sequence\s?(.*?)\s?>/;
+        # Process the data between <bio_sequence></bio_sequence>
+        my $bio_sequence = $self->_process_bio_sequence($line, $1);
+        $fragment_orientation->{'bio_sequence'} = $bio_sequence;
+
+        push @{$$data_structure->{'fragment_orientation'}}, $fragment_orientation;
+
+        ++$count;
+    }
+
+
+    $self->throw("Error: Missing <fragment_orientation> tag.  Got this: $$line\n\n")
+        if $count == 0;
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _process_bio_sequence
+
+  Title    : _process_bio_sequence
+  Usage    : $self->_process_bio_sequence
+  Function : Parses the data between the <bio_sequence></bio_sequence> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - scalar holding the value of the attributes for <bio_sequence>
+  Returns  : data structure holding the values between <bio_sequence></bio_sequence>
+  Note     : Method(s) that call(s) this method : _process_fragment_orientation
+
+Method(s) that this method calls : _helper_store_attribute_list ,
+_one_tag , _question_mark_tag , _star_tag , _process_alt_ids ,
+_process_xrefs , _process_sequence_map
+
+=cut
+
+sub _process_bio_sequence {
+
+    my ($self, $line, $attribute_line) = @_;
+
+    my $bio_sequence;
+
+    $self->_helper_store_attribute_list($attribute_line, \$bio_sequence);
+    $$line = $self->_readline;
+
+
+    # One <db_id>.
+    $self->_one_tag($line, \$bio_sequence, 'db_id');
+
+
+    # Zero or one <note>.
+    $self->_question_mark_tag($line, \$bio_sequence, 'note');
+
+
+    # Zero or more <description>
+    $self->_question_mark_tag($line, \$bio_sequence, 'description');
+
+
+    # Zero or more <keyword>
+    $self->_star_tag($line, \$bio_sequence, 'keyword');
+
+
+    # Zero or one <sequence>
+    $self->_question_mark_tag($line, \$bio_sequence, 'sequence');
+
+
+    # Zero or one <alt_ids>
+    # NOT IMPLEMENTED!!!!
+    #if ($line =~ /<alt_ids>/){ # NOT DONE YET!
+    #       my $alt_ids;
+    #       $bio_sequence->{'alt_ids'} = $self->_process_alt_ids(\$alt_ids);
+    #}
+
+
+    # Zero or one <xrefs>
+    if ($$line =~ /<xrefs\s?(.*?)\s?>/) {
+        my $xrefs = $self->_process_xrefs($line, \$bio_sequence);
+        $bio_sequence->{'xrefs'} = $xrefs || 'null';
+    }
+
+
+    # Zero or more <sequence_map>
+    if ($$line =~ /<sequence_map\s?(.*?)\s?>/) {
+        my $sequence_map = $self->_process_sequence_map($line);
+        push @{$bio_sequence->{'sequence_map'}}, $sequence_map;
+    }
+
+    # print Data::Dumper->Dump([$bio_sequence]); exit;
+
+    return $bio_sequence;
+
+}
+# ==================================================================================
+
+=head2 _process_xrefs
+
+  Title    : _process_xrefs
+  Usage    : $self->_process_xrefs
+  Function : Parse the data between the <xrefs></xrefs> tags.
+  Args     : reference to a scalar holding the value of the line to be parsed.
+  Return   : Nothing.
+  Note     : Method(s) that call(s) this method: _process_bio_sequence
+             Method(s) that this method calls: _one_tag , _process_xref
+
+=cut
+
+sub _process_xrefs {
+
+    my ($self, $line) = @_;
+
+    my $xrefs;
+
+    $$line = $self->_readline;
+
+    # One or more <db_id> or <xref> within <xrefs></xrefs>.  Check if
+    # to see if there's at least one.
+    if ($$line =~ /<db_id|xref\s?(.*?)\s?>/) {
+
+        while ($$line =~ /<(db_id|xref)\s?(.*?)\s?>/) {
+
+            if ($1 eq "db_id") {
+
+                my $db_id;
+                $self->_one_tag($line, \$db_id, 'db_id');
+                push @{$xrefs->{'db_id'}}, $db_id;
+
+            } elsif ($1 eq "xref") {
+
+                my $xref;
+                $self->_process_xref($line, \$xref);
+                push @{$xrefs->{'xref'}}, $xref;
+
+            } else {
+
+                $self->throw("Error:  Tag type should be one of db_id or xref!  Got this: $$line\n\n");
+            }
+
+
+        }                       # close while loop
+
+
+        if ($$line =~ /<\/xrefs>/) {
+            $$line = $self->_readline; # get the next line to be _processed by the next sub.
+            return $xrefs;
+        } else {
+            $self->throw("Error: Missing </xrefs> tag.  Got this: $$line\n\n");
+        }
+
+
+
+    } else {
+
+        $self->throw("Error: Missing <db_id> or <xref> tag.  Got this: $$line\n\n");
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _process_xref
+
+  Title    : _process_xref
+  Usage    : $self->_process_xref
+  Function : Parses the data between the <xref></xref> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the <xref> data.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : _process_xrefs (note the 's' in 'xrefs')
+             Method(s) that this method calls   : _helper_store_attribute_list , _star_tag
+
+=cut
+
+sub _process_xref {
+
+    my ($self, $line, $xref) = @_;
+
+    $$line = $self->_readline;
+
+    # One <db_id>
+    if ($$line =~ /<db_id\s?(.*?)\s?>/) {
+        $self->_helper_store_attribute_list($1, $xref);
+    } else {
+        $self->throw("Error:  Missing <db_id> tag.  Got this: $$line\n\n");
+    }
+
+
+    # Zero or more <xref_property>
+    $self->_star_tag($line, $xref, 'xref_propery');
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _process_sequence_map
+
+  Title    : _process_sequence_map
+  Usage    : $self->_process_sequence_map
+  Function : Parses the data between the <sequence_map></sequence_map> tags.
+  Args     : Reference to scalar holding the line to be parsed.
+  Returns  : Data structure that holds the values that were parsed.
+  Note     : Method(s) that call(s) this method : _process_bio_sequence
+             Method(s) that this method calls   : _helper_store_attribute_list ,
+                _question_mark_tag , _process_annotations
+
+=cut
+
+sub _process_sequence_map {
+
+    my ($self, $line) = @_;
+
+    my $sequence_map;
+
+    # Zero or more <sequence_map>
+    while ($$line =~ /<sequence_map\s?(.*?)\s?>/) {
+
+        $self->_helper_store_attribute_list($1, \$sequence_map) if defined $1;
+        $$line = $self->_readline;
+
+        # Zero or one <note>
+        $self->_question_mark_tag($line, \$sequence_map, 'note');
+
+        # NOT IMPLEMENTED!!!
+        #if ($$line =~ /<computations\?(.*?)\s?>/){
+        #       # $self->_process_computations();
+        #}
+
+
+        # Zero or one <annotations>
+        if ($$line =~ /<annotations\s?(.*?)\s?>/) {
+            my $annotations = $self->_process_annotations($line);
+            $sequence_map->{'annotations'} = $annotations;
+        }
+
+
+    }                           # closes the while loop
+
+
+    # Match closing tag:
+    if ($$line =~ /<\/sequence_map>/) {
+        return $sequence_map;
+    } else {
+        $self->throw("Error:  Missing </sequence_map> tag.  Got this: $$line\n\n");
+    }
+
+
+}
+# ==================================================================================
+
+=head2 _process_annotations
+
+  Title    : _process_annotations
+  Usage    : $self->_process_annotations
+  Function : Parse the data between the <annotations></annotations> tags.
+  Args     : Reference to scalar holding the line to be parsed.
+  Returns  : Data structure that holds the values that were parsed.
+  Note     : Method(s) that call(s) this method : _process_sequence_map
+             Method(s) that this method calls   : _process_seq_feature
+
+=cut
+
+sub _process_annotations {
+
+    my ($self, $line) = @_;
+    # ( seq_feature | gene | comp_result )+
+
+    my $annotations;
+
+    $$line = $self->_readline;
+
+    my $count = 0;              # counter to keep track of number of iterations in the loop.
+
+    # One or more of these:
+    while ($$line =~ /<(seq_feature|gene|comp_result)\s?(.*?)\s?>/) {
+
+        if ($$line =~ /<seq_feature\s?(.*?)\s?>/) {
+
+            my $seq_feature = $self->_process_seq_feature($line, $1);
+            push @{$annotations->{'seq_feature'}}, $seq_feature;
+
+        } elsif ($$line =~ /<gene\s?(.*?)\s?>/) {
+
+            # gene
+
+        } elsif ($$line =~ /<comp_result\s?(.*?)\s?>/) {
+
+            # comp_result
+
+        }
+
+        ++$count;
+
+    }                           # closes the while loop.
+
+    $self->throw("Error:  Missing <seq_feature> tag.  Got: $$line\n\n") if $count == 0;
+
+    # Match closing tag:
+    if ($$line =~ /<\/annotations/) {
+
+        $$line = $self->_readline; # get the next line to be _processed by the next sub.
+        return $annotations;
+
+    } else {
+        $self->throw("Error:  Missing </annotations> tag.  Got this: $$line\n\n");
+    }
+
+
+}
+# ==================================================================================
+
+=head2 _process_seq_feature
+
+  Title    : _process_seq_feature
+  Usage    : $self->_process_seq_feature
+  Function : Parses the data between the <seq_feature></seq_feature> tag.
+  Args     : 2 scalars:
+             - Reference to scalar holding the line to be parsed.
+             - Scalar holding the attributes for <seq_feature>.
+  Returns  : Data structure holding the values parsed.
+  Note     : Method(s) that call(s) this method: _process_annotations
+
+Method(s) that this method calls: _helper_store_attribute_list ,
+_process_classification , _question_mark_tag , _one_tag ,
+_process_evidence , _process_qualifier , _process_seq_feature ,
+_process_related_annot
+
+=cut
+
+sub _process_seq_feature {
+
+    my ($self, $line, $attribute_line) = @_;
+
+    my $seq_feature;
+    $self->_helper_store_attribute_list($attribute_line, \$seq_feature);
+
+
+    $$line = $self->_readline;
+
+
+    # Zero or more <classification>
+    $self->_process_classification($line, \$seq_feature);
+
+
+
+    # Zero or one <note>
+    $self->_question_mark_tag($line, \$seq_feature, 'note');
+
+
+
+    # One <seq_location>
+    $self->_one_tag($line, \$seq_feature, 'seq_location');
+
+
+
+    # Zero or one <xrefs>
+    $self->_question_mark_tag($line, \$seq_feature, 'xrefs');
+
+
+
+    # Zero or one <evidence>
+    $self->_process_evidence($line, \$seq_feature);
+
+
+
+    # Zero or more <qualifier>
+    $self->_process_qualifier($line, \$seq_feature);
+
+
+
+    # Zero or more <seq_feature>.  A <seq_feature> tag within a <seq_feature> tag?  Oh, well.  Whatever...
+    while ($$line =~ /<seq_feature\s?(.*?)\s?>/) {
+        $self->_process_seq_feature($line, $1);
+        $$line = $self->_readline;
+    }
+
+
+    # Zero or more <related_annot>
+    while ($$line =~ /<related_annot\s?(.*?)\s?>/) {
+        $self->_process_related_annot($line, $1);
+        $$line = $self->_readline;
+    }
+
+
+    # Match the closing tag:
+    if ($$line =~ /<\/seq_feature>/) {
+
+        $$line = $self->_readline; # for the next sub...
+        return $seq_feature;
+
+    } else {
+
+        $self->throw("Error.  Missing </seq_feature> tag.  Got this: $$line\n");
+
+    }
+
+}
+# ==================================================================================
+
+=head2 _process_qualifier
+
+  Title    : _process_qualifier
+  Usage    : $self->_process_qualifier
+  Function : Parse the data between the <qualifier></qualifier> tags.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the <qualifer> data.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : _process_seq_feature
+             Method(s) that this method calls   : _star_tag
+
+=cut
+
+sub _process_qualifier {
+
+    my ($self, $line, $data_structure) = @_;
+
+    my $qualifier;
+    $self->_star_tag($line, \$qualifier, 'qualifier');
+    push @{$$data_structure->{'qualifier'}},$qualifier;
+
+
+    return;
+    # No need to return the data structure since its reference was what was modified.
+
+}
+# ==================================================================================
+
+=head2 _process_classification
+
+  Title   : _process_classification
+  Usage   : $self->_process_classification
+  Function: Parse the data between the <classification></classification> tags.
+  Args    :   2 scalars:
+            - reference to a scalar holding the value of the line to be parsed.
+            - reference to a data structure to store the <qualifer> data.
+  Returns : Nothing.
+  Note    : Method(s) that call(s) this method: _process_seq_feature
+
+  Method(s) that this method calls: _helper_store_attribute_list ,
+  _question_mark_tag , _star_tag, _process_evidence
+
+=cut
+
+sub _process_classification { # NOT IN USE.
+
+    my ($self, $line, $data_structure) = @_;
+
+    my $classification = $$data_structure->{'classification'};
+
+    while ($$line =~ /<classification\s?(.*?)\s?>/) {
+
+        $self->_helper_store_attribute_list($1, \$classification);
+
+        # Zero or one <description>
+        $self->_question_mark_tag($line, \$classification, 'description');
+
+        # Zero or more <id_alias>
+        $self->_star_tag($line, \$classification, 'id_alias');
+
+        # Zero or one <evidence>
+        $self->_process_evidence($line, \$classification);
+    }
+
+
+}
+# ==================================================================================
+
+sub _process_evidence { # NOT done.
+
+    my ($self, $line, $data_structure) = @_;
+
+    if ($$line =~ /<evidence>/) {
+
+        $$line = $self->_readline;
+
+        # One or more <element_id> OR One or more <comp_result>
+        while ($$line =~ /<(element_id|comp_result)\s?(.*?)\s?>/) {
+            if ($$line =~ /<element_id\s?(.*?)\s?>/) {
+                my $element_id;
+                $self->_plus_tag($line, \$element_id, 'element_id');
+                push @{$$data_structure->{'element_id'}}, $element_id;
+            } elsif ($$line =~ /<comp_result\s?(.*?)\s?>/) {
+                my $comp_result;
+                $self->_process_comp_result($line, \$comp_result, $1);
+                push @{$$data_structure->{'comp_result'}}, $comp_result;
+            }
+            $$line = $self->_readline;
+        }
+
+    }
+
+
+}
+# ==================================================================================
+
+sub _process_comp_result { # NOT IN USE.
+
+
+    my ($self, $line, $comp_result, $attribute_line) = @_;
+
+    $self->_helper_store_attribute_list($attribute_line, $comp_result);
+    $$line = $self->_readline;
+
+    # Zero or one <note>
+    $self->_question_mark_tag($line, $comp_result, 'note');
+
+    # Zero or one <match_desc>
+    $self->_question_mark_tag($line, $comp_result, 'match_desc');
+
+    # Zero or one <match_align>
+    $self->_question_mark_tag($line, $comp_result, 'match_align');
+
+    # Zero or one <query_region>
+    $self->_process_query_region($line, $comp_result);
+
+    # Zero or one <match_region>
+    $self->_process_match_region($line, $comp_result);
+
+    # Zero or more <result_property>
+    $self->_star_tag($line, $comp_result, 'result_property');
+
+    # Zero or more <result_group>
+    $self->_process_result_group($line, $comp_result);
+
+    # Zero or more <related_annot>
+    $self->_process_related_annot($line, $comp_result);
+
+}
+# ==================================================================================
+
+sub _process_related_annot { # NOT IN USE.
+
+    my ($self, $line, $data_structure) = @_;
+
+    while ($$line =~ /<related_annot\s?(.*?)\s?>/) {
+
+        my $related_annot;
+        # Zero or one <related_annot>
+        $self->_helper_store_attribute_list($1, \$related_annot);
+        $$line = $self->_readline;
+
+        # One or more <element_id>
+        my $element_id_count = 0;
+        while ($$line =~ /<element_id\s?(.*?)\s?>/) {
+            my $element_id;
+            $self->_helper_store_attribute_list($1, \$element_id);
+            push @{$related_annot->{'element_id'}}, $element_id;
+            $$line = $self->_readline;
+            ++$element_id_count;
+        }
+
+        if ($element_id_count == 0) {
+            $self->throw("Error.  Missing <element_id> tag.  Got: $$line");
+        }
+
+        # Zero or more <sci_property>
+        $self->_star_tag($line, \$related_annot, 'sci_property');
+        # while ($$line =~ /<sci_property\s?(.*?)\s?>/){
+        #
+        # }
+
+        push @{$data_structure->{'related_annot'}}, $related_annot;
+
+        unless ($$line =~ /<\/related_annot>/){
+            $self->throw("Error.  Missing </related_tag>. Got: $$line\n");
+        }
+
+    }
+
+
+}
+# ==================================================================================
+
+sub _process_result_group { # NOT IN USE.
+
+    my ($self, $line, $data_structure) = @_;
+
+    while ($$line =~ /<result_group\s?(.*?)\s?>/) {
+        my $result_group = $$data_structure->{'result_group'};
+        $self->_helper_store_attribute_list($1, \$result_group);
+
+        my $count = 0;
+        $$line = $self->_readline;
+        while ($$line =~ /<comp_result\s?(.*?)\s?>/) {
+            # one or more <comp_result>
+            $self->_process_comp_result(\$line, \$result_group, $1);
+            $$line = $self->_readline;
+            ++$count;
+        }
+
+        $self->throw("Error.  No <comp_result></comp_result> tag! Got this: $$line")
+            if $count == 0;
+
+        # in the last iteration in the inner while loop, $line will
+        # have a value of the closing tag of 'result_group'
+        if ($line =~ /<\/result_group>/) {
+            $$line = $self->_readline;
+        } else {
+            $self->throw("Error.  No </result_tag>!  Got this: $$line");
+        }
+
+
+    }
+
+
+}
+# ==================================================================================
+
+sub _process_match_region { # NOT IN USE.
+
+    my ($self, $line, $data_structure) = @_;
+
+    my $match_region = $data_structure->{'match_region'};
+
+    if ($$line =~ /<match_region\s?(.*?)\s?>(.*?)>/) {
+
+        $self->_helper_store_attribute_line($1, \$match_region);
+        $$line = $self->_readline;
+
+        # Zero or one db_id | element_id | bio_sequence
+        if ($$line =~ /<db_id\s?(.*?)\s?>(.*?)<\/db_id>/) {
+            $self->_question_mark_tag($line, \$match_region, 'db_id');
+        } elsif ($$line =~ /<element_id\s?(.*?)\s?>/) { # empty...
+            $self->_question_mark_tag($line, \$match_region, 'element_id');
+        } elsif ($$line =~ /<bio_sequence\s?(.*?)\s?>/) {
+            $match_region->{'bio_sequence'} = $self->_process_bio_sequence($line, $1);
+        }
+
+        $$line = $self->_readline;
+        if ($$line =~ /<\/match_region>/o) {
+            $$line = $self->_readline; # get the next line to be _processed by the next sub
+            return;
+        } else {
+            $self->throw("No closing tag </match_region>!  Got this: $$line\n");
+        }
+
+    }
+}
+# ==================================================================================
+
+sub _process_query_region { # NOT IN USE.
+
+    my ($self, $line, $data_structure) = @_;
+
+    my $query_region = $data_structure->{'query_region'};
+    if ($$line =~ /<query_region\s?(.*?)\s?>/) {
+        $self->_helper_store_attribute_list($1, \$query_region);
+        $$line = $self->_readline;
+
+        # Zero or one <db_id>
+        $self->_question_mark_tag($line, \$query_region, 'db_id');
+
+        if ($$line =~ /<\/query_region>/) {
+            $$line = $self->_readline; # get the next line to _process.
+            return;
+        } else {
+            $self->throw("No closing tag </query_region>.  Got this: $$line\n");
+        }
+
+    }
+
+
+}
+# ==================================================================================
+
+=head2 _tag_processing_helper
+
+  Title    : _tag_processing_helper
+  Usage    : $self->_tag_processing_helper
+  Function : Stores the tag value within the data structure.
+             Also calls _helper_store_attribute_list to store the 
+             attributes and their values in the data structure.
+  Args     : 5 scalars:
+             - Scalar holding the value of the attributes
+             - Reference to a data structure to store the data for <$tag_name>
+             - Scalar holding the tag name.
+             - Scalar holding the value of the tag.
+             - Scalar holding the value of either 'star', 'plus', 
+               or 'question mark' which specifies what type of method
+               called this method.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method:
+             Method(s) that this method calls: _helper_store_attribute_list
+
+=cut
+
+sub _tag_processing_helper {
+
+    my ($self, $attribute_list, $data_structure, $tag_name, $tag_value, $caller) = @_;
+
+    # Add the attributes to the $$data_structure if they exist.
+    # print "tag_name: $tag_name , attribute_list: $attribute_list\n";
+    if (defined $attribute_list) {
+        $self->_helper_store_attribute_list($attribute_list, $data_structure);
+    }
+
+
+    if ($caller eq 'star' || $caller eq 'plus') {
+        push @{$$data_structure->{$tag_name}}, $tag_value;
+        # There's either zero or more tags (*) or one or more (+)
+    } else {
+        $$data_structure->{$tag_name} = $tag_value || 'null';
+        # There's zero or one tag (?)
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _one_tag
+
+  Title    : _one_tag
+  Usage    : $self->_one_tag
+  Function : A method to store data from tags that occurs just once.
+  Args     : 2 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the data for <$tag_name>
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : many
+             Method(s) that this method calls   : _tag_processing_helper
+
+=cut
+
+sub _one_tag {
+
+    my ($self, $line, $data_structure, $tag_name) = @_;
+
+    $self->throw("Error:  Missing <$tag_name></$tag_name>.  Got: $$line\n\n")
+        if $$line !~ /\<$tag_name/; 
+    # check to see if $$line is in correct format.
+
+    if ($$line =~ /<$tag_name\s?(.*?)\s?\/?>(.*?)<\/$tag_name>/) {
+
+        $self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'one');
+        # $1 = attributes $data_structure = to hold the parsed values
+        # # $tag_name = name of the tag $2 = tag value 'one' = lets
+        # _tag_processing_helper know that it was called from the
+        # _one_tag method.
+
+    } elsif ($$line =~ /<$tag_name\s?(.*?)\s?\/?>/) {
+
+        $self->_tag_processing_helper($1, $data_structure, $tag_name, '', 'one');
+
+    } else {
+        $self->throw("Error:  Cannot parse this line: $$line\n\n");
+    }
+
+    $$line = $self->_readline;  # get the next line.
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _question_mark_tag
+
+  Title    : _question_mark_tag
+  Usage    : $self->_question_mark_tag
+  Function : Parses values from tags that occurs zero or one time. ie: tag_name?
+  Args     : 3 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the data for <$tag_name>
+             - scalar holding the name of the tag.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : many.
+             Method(s) that this method calls   : _tag_processing_helper
+
+
+=cut
+
+sub _question_mark_tag {
+
+    my ($self, $line, $data_structure, $tag_name) = @_;
+
+    if ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) {
+        $self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'question mark');
+        $$line = $self->_readline;
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _star_tag
+
+  Title    : _star_tag
+  Usage    : $self->_star_tag
+  Function : Parses values from tags that occur zero or more times. ie: tag_name*
+  Args     : 3 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the data for <$tag_name>
+             - scalar holding the name of the tag.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : many.
+             Method(s) that this method calls   : _tag_processing_helper
+
+=cut
+
+sub _star_tag {
+
+    my ($self, $line, $data_structure, $tag_name) = @_;
+
+    #print "tag_name: $tag_name\n";
+    while ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) {
+        $self->_tag_processing_helper
+            ($1, $data_structure, $tag_name, $2, 'star');
+        # The tag and attribute values are stored within
+        # $$data_structure within the _tag_processing_helper method.
+        $$line = $self->_readline;
+    }
+    #if ($tag_name eq 'qualifier'){
+    #       print "this one:\n";
+    #       print Data::Dumper->Dump([$data_structure]); exit;
+    #}
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _plus_tag
+
+  Title    : _plus_tag
+  Usage    : $self->_plus_tag
+  Function : Handles 'plus' tags (tags that occur one or more times).  tag_name+
+  Args     : 3 scalars:
+             - reference to a scalar holding the value of the line to be parsed.
+             - reference to a data structure to store the data for <$tag_name>
+             - scalar holding the name of the tag.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : many.
+             Method(s) that this method calls   : _star_tag
+
+=cut
+
+sub _plus_tag {
+
+    my ($self, $line, $data_structure, $tag_name) = @_;
+
+    if ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) {
+
+        # Store value of the first occurence of $tag_name.
+        # All subsequent values, if any, will be stored in the method _star_tag.
+        $self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'plus');
+
+
+        # If the flow gets within this block, we've already determined
+        # that there's at least one of <$tag_name> Are there more?  To
+        # answer this, we could just treat the tag as a * tag now
+        # (zero or more).  We've already determined that it's NOT
+        # zero, so how many more?  Thus, call _star_tag.
+        $$line = $self->_readline;
+        $self->_star_tag($line, $data_structure, $tag_name);
+
+
+    } else {
+        $self->throw("Error:  Missing <$tag_name></$tag_name>.  Got: $$line\n\n");
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _helper_store_attribute_list
+
+  Title    : _helper_store_attribute_list
+  Usage    : $self->_helper_store_attribute_list
+  Function : A helper method used to store the attributes from
+             the tags into the data structure.
+  Args     : 2 scalars:
+             - scalar holding the attribute values to be parsed.
+             - reference to a data structure to store the data between the 2 tags.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : Many.
+             Method(s) that this method call(s) : None.
+
+=cut
+
+sub _helper_store_attribute_list {
+
+    my ($self, $attribute_line, $data_structure) = @_;
+
+    my %attribs = ($attribute_line =~ /(\w+)\s*=\s*"([^"]*)"/g);
+
+    my $attribute_list;
+    for my $key (keys %attribs) {
+        # print "\tkey: $key , value: $attribs{$key}\n";
+        ###$$data_structure->{$key} = $attribs{$key};           # <- The ORIGINAL.
+        push @{$$data_structure->{$key}}, $attribs{$key};
+        # Now, store them in an array because there may be > 1 tag, thus
+        # > 1 attribute of the same name.
+        # Doing this has made it necessary to change the _store_seqs method.
+        # ie: Change $bio_sequence->{'molecule_type'};
+        # to
+        # $bio_sequence->{'molecule_type'}->[0];
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _store_seqs
+
+  Title    : _store_seqs
+  Usage    : $self->_store_seqs
+  Function : This method is called once in the life time of the script.
+             It stores the data parsed from the agave xml file into
+             the Bio::Seq object.
+  Args     : None.
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method : next_seq
+             Method(s) that this method calls   : None.
+
+=cut
+
+sub _store_seqs {
+
+    my ($self) = @_;
+
+
+    for my $sciobj (@{$self->{'sciobj'}}) {
+
+        ### $sciobj = $self->{'sciobj'};                # The root node.
+
+
+        for my $contig (@{$sciobj->{'contig'}}) { # Each contig has a fragment order.
+
+            for my $fragment_order (@{$contig->{'fragment_order'}}) { # Each fragment order has a fragment_orientation.
+
+                for my $fragment_orientation (@{$fragment_order->{'fragment_orientation'}}) {
+                    # Each fragment_orientation contain 1 bio sequence.
+
+                    my $bio_sequence = $fragment_orientation->{'bio_sequence'}; # <bio_sequence> contains all the
+                    # interesting stuff:
+
+                    my $sequence         = $bio_sequence->{'sequence'};
+                    my $accession_number = $bio_sequence->{'sequence_id'}->[0]; # also use for primary_id
+                    my $organism         = $bio_sequence->{'organism'};
+                    my $description      = $bio_sequence->{'description'};
+                    my $molecule_type    = $bio_sequence->{'molecule_type'}->[0];
+
+                    my $primary_seq = Bio::PrimarySeq->new(
+                                                           -id       => $accession_number,
+                                                           -alphabet => $molecule_type,
+                                                           -seq      => $sequence,
+                                                           -desc     => $description,
+                                                          );
+
+                    my $seq = Bio::Seq->new (
+                                             -display_id       => $accession_number,
+                                             -accession_number => $accession_number,
+                                             -primary_seq      => $primary_seq,
+                                             -seq              => $sequence,
+                                             -description      => $description,
+                                            );
+
+                    my $organism_name = $bio_sequence->{organism_name}->[0];
+                    if (defined $organism_name) {
+
+                        my @classification = split(' ', $organism_name);
+                        my $species = Bio::Species->new();
+                        $species->classification(@classification);
+                        $seq->species($species);
+                    }
+                    # Pull out the keywords: $keywords is an array ref.
+
+                    my $keywords = $bio_sequence->{keyword};
+                    my %key_to_value;
+
+                    for my $keywords (@$keywords) {
+                        # print "keywords: $keywords\n";
+                        my @words = split(':', $keywords);
+                        for (my $i = 0; $i < scalar @words - 1; $i++) {
+                            if ($i % 2 == 0) {
+                                my $j = $i; $j++;
+                                # print "$words[$i] , $words[$j]\n";
+                                $key_to_value{$words[$i]} = $words[$j];
+                            }
+                        }
+                        # print Data::Dumper->Dump([%key_to_value]);
+                        my $reference = Bio::Annotation::Reference->
+                            new(-authors => $key_to_value{authors},
+                                -title => $key_to_value{title},
+                                -database => $key_to_value{database},
+                                -pubmed => $key_to_value{pubmed},
+                               );
+                        $seq->annotation->add_Annotation('reference', $reference);
+
+                    }           # close for my $keywords
+
+
+                    #  print Data::Dumper->Dump([$bio_sequence]); print "here\n"; exit;
+                    if (defined $bio_sequence->{'sequence_map'}) {
+
+                        for my $sequence_map (@{$bio_sequence->{'sequence_map'}}) {
+
+                            # print Data::Dumper->Dump([$sequence_map]); print "here\n"; exit;
+
+                            my $label = $sequence_map->{label};
+
+                            if (defined $sequence_map->{annotations} &&
+                                ref($sequence_map->{annotations}) eq 'HASH') {
+
+                                # Get the sequence features (ie genes, exons, etc) from this $sequence_map
+                                for my $seq_feature (@{$sequence_map->{'annotations'}->{'seq_feature'}}) {
+
+                                    # print Data::Dumper->Dump([$seq_feature]); exit;
+                                    my $seq_location     = $seq_feature->{'seq_location'};
+                                    my $start_coord      = $seq_feature->{'least_start'}->[0];
+                                    my $feature_type     = $seq_feature->{'feature_type'}->[0];
+                                    my $end_coord        = $seq_feature->{'greatest_end'}->[0];
+                                    my $is_on_complement = $seq_feature->{'is_on_complement'}->[0];
+
+                                    # Specify the coordinates and the tag for this seq feature.
+                                    # print "Primary Tag for this SeqFeature: $feature_type\n";
+                                    my $feat = Bio::SeqFeature::Generic->
+                                        new(
+                                            -start       => $start_coord,
+                                            -end         => $end_coord,
+                                            -primary_tag => $feature_type,
+                                           );
+
+
+                                    if (defined $seq_feature->{'qualifier'} &&
+                                        ref($seq_feature->{'qualifier'}) eq 'ARRAY') {
+
+                                        for my $feature (@{$seq_feature->{'qualifier'}}) {
+
+                                            my $value = $feature->{'qualifier'};
+                                            my $feature_type = $feature->{'qualifier_type'};
+
+                                            for (my $i = 0;
+                                                 $i < scalar @{$value};
+                                                 $i++) {
+                                                $feat->add_tag_value(
+                                                                     $feature_type->[$i] => $value->[$i]
+                                                                    );
+                                            } # close the for loop
+
+                                        }
+
+                                    } # close if (defined $seq_feature->...
+
+
+                                    $seq->add_SeqFeature($feat);
+
+
+                                } # close for my $seq_feature (@{$sequence_map->...
+
+
+                            }   # close if (defined $sequence_map->{annotations} &&
+
+
+                        }       # close for my $sequence_map (@{$bio_sequence->{'sequence_map'}}){
+
+                    }           # close if (defined $bio_sequence->{'sequence_map'}){
+
+
+                    # This is where the Bio::Seq objects are stored:
+                    push @{$self->{'sequence_objects'}}, $seq;
+
+
+                }               # close for my $fragment_orientation
+
+
+            }                   # close for my $fragment_order
+
+
+        }                       # close for my $contig
+
+
+    }                           # close for my $sciobj
+
+    # Flag is set so that we know that the sequence objects are now stored in $self.
+    $self->{'seqs_stored'} = 1;
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 next_seq
+
+        Title    : next_seq
+        Usage    : $seq = $stream->next_seq()
+        Function : Returns the next sequence in the stream.
+        Args     : None.
+        Returns  : Bio::Seq object
+
+Method is called from the script.  Method(s) that this method calls:
+_store_seqs (only once throughout the life time of script execution).
+
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    # convert agave to genbank/fasta/embl whatever.
+
+    $self->_store_seqs if $self->{'seqs_stored'} == 0;
+
+    $self->throw("Error: No Bio::Seq objects stored yet!\n\n")
+        if !defined $self->{'sequence_objects'}; # This should never occur...
+
+    if (scalar @{$self->{'sequence_objects'}} > 0) {
+        return shift @{$self->{'sequence_objects'}};
+    } else {
+        # All done.  Nothing more to parse.
+        # print "returning nothing!\n";
+        return 0;
+    }
+
+
+}
+# ==================================================================================
+
+=head2 next_primary_seq
+
+  Title   : next_primary_seq
+  Usage   : $seq = $stream->next_primary_seq()
+  Function: returns the next primary sequence (ie no seq_features) in the stream
+  Returns : Bio::PrimarySeq object
+  Args    : NONE
+
+=cut
+
+sub next_primary_seq {
+    my $self=shift;
+    return 0;
+}
+# ==================================================================================
+
+=head2 write_seq
+
+  Title   : write_seq
+  Usage   : Not Yet Implemented! $stream->write_seq(@seq)
+  Function: writes the $seq object into the stream
+  Returns : 1 for success and 0 for error
+  Args    : Bio::Seq object
+
+=cut
+
+sub write_seq {
+
+    # Convert the Bio::Seq object(s) to AGAVE xml file.
+
+    my ($self, at seqs) = @_;
+
+    foreach my $seq ( @seqs ) {
+        $self->_write_each_record( $seq ); # where most of the work actually takes place.
+    }
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _write_each_record
+
+  Title   : _write_each_record
+  Usage   : $agave->_write_each_record( $seqI )
+  Function: change data into agave format
+  Returns : NONE
+  Args    : Bio::SeqI object
+
+=cut
+
+sub  _write_each_record {
+    my ($self,$seq) = @_;
+
+    # $self->{'file'} =~ s/>//g;
+    my $output = new IO::File(">" . $self->{'file'});
+    my $writer = new XML::Writer(OUTPUT => $output,
+                                 NAMESPACES => 0,
+                                 DATA_MODE => 1,
+                                 DATA_INDENT => 2 ) ;
+
+    $writer->xmlDecl("UTF-8");
+    $writer->doctype("sciobj", '', "sciobj.dtd");
+    $writer ->startTag('sciobj',
+                       'version', '2',
+                       'release', '2');
+
+    $writer->startTag('contig', 'length', $seq->length);
+    my $annotation = $seq ->annotation;
+    # print "annotation: $annotation\n"; exit;  Bio::Annotation::Collection=HASH(0x8112e6c)
+    if ( $annotation->get_Annotations('dblink') ) {
+        # used to be $annotation->each_DBLink, but Bio::Annotation::Collection::each_DBLink
+        # is now replaced with get_Annotations('dblink')
+        my $dblink = $annotation->get_Annotations('dblink')->[0] ;
+
+        $writer ->startTag('db_id',
+                           'id', $dblink->primary_id ,
+                           'db_code', $dblink->database );
+    } else {
+        $writer ->startTag('db_id',
+                           'id', $seq->display_id ,
+                           'db_code', 'default' );
+    }
+    $writer ->endTag('db_id') ;
+
+
+    $writer->startTag('fragment_order');
+    $writer->startTag('fragment_orientation');
+
+    ##start bio_sequence
+    ####my $organism = $seq->species->genus . " " . $seq->species->species;
+    $writer ->startTag('bio_sequence',
+                       'sequence_id', $seq->display_id,
+                       'seq_length', $seq->length,
+                       # 'molecule_type', $seq->moltype, # deprecated
+                       'molecule_type', $self->alphabet,
+                       #'organism_name', $organism
+                      );
+
+    # my $desc = $seq->{primary_seq}->{desc};
+    # print "desc: $desc\n"; exit;
+    # print Data::Dumper->Dump([$seq]);  exit;
+    ##start db_id under bio_sequence
+    $annotation = $seq ->annotation;
+    # print "annotation: $annotation\n"; exit;  Bio::Annotation::Collection=HASH(0x8112e6c)
+    if ( $annotation->get_Annotations('dblink') ) {
+        # used to be $annotation->each_DBLink, but Bio::Annotation::Collection::each_DBLink
+        # is now replaced with get_Annotations('dblink')
+        my $dblink = $annotation->get_Annotations('dblink')->[0] ;
+
+        $writer ->startTag('db_id',
+                           'id', $dblink->primary_id ,
+                           'db_code', $dblink->database );
+    } else {
+        $writer ->startTag('db_id',
+                           'id', $seq->display_id ,
+                           'db_code', 'default' );
+    }
+    $writer ->endTag('db_id') ;
+
+    ##start note
+    my $note = "" ;
+    foreach my $comment ( $annotation->get_Annotations('comment') ) {
+        # used to be $annotations->each_Comment(), but that's now been replaced
+        # with get_Annotations()
+        # $comment is a Bio::Annotation::Comment object
+        $note .= $comment->text() . "\n";
+    }
+
+    $writer ->startTag('note');
+    $writer ->characters( $note ) ;
+    $writer ->endTag('note');
+
+    ##start description
+    $writer ->startTag('description');
+
+    # $writer ->characters( $annotation->get_Annotations('description') ) ;
+    # used to be $annotations->each_description(), but that's now been
+    # replaced with get_Annotations.
+    # Simon added this: this is the primary_seq's desc (the DEFINITION tag in a genbank file)
+    $writer->characters($seq->{primary_seq}->{desc});
+    $writer ->endTag('description');
+
+    ##start keywords
+    foreach my $genename ( $annotation->get_Annotations('gene_name') ) {
+        # used to be $annotations->each_gene_name, but that's now been
+        # replaced with get_Annotations()
+        $writer ->startTag('keyword');
+        $writer ->characters( $genename ) ;
+        $writer ->endTag('keyword');
+    }
+
+
+    foreach my $ref ( $annotation->get_Annotations('reference') ) {
+        # used to be $annotation->each_Reference, but
+        # that's now been replaced with get_Annotations('reference');
+        # link is a Bio::Annotation::Reference object
+        $writer ->startTag('keyword');
+        # print Data::Dumper->Dump([$ref]); exit;
+        my $medline  = $ref->medline || 'null';
+        my $pubmed   = $ref->pubmed || 'null';
+        my $database = $ref->database || 'null';
+        my $authors  = $ref->authors || 'null';
+        my $title    = $ref->title || 'null';
+
+
+        $writer ->characters( 'medline:' . "$medline" . ':' . 'pubmed:' .
+                              "$pubmed" . ':' . 'database:' . "$database" .
+                              ':' .'authors:' . "$authors" . ':' . 'title:' . "$title" ) ;
+        $writer ->endTag('keyword');
+    }
+
+    ## start sequence
+    $writer ->startTag('sequence');
+    $writer ->characters( $seq->seq ) ;
+    $writer ->endTag('sequence');
+
+    ## start xrefs
+    $writer ->startTag('xrefs');
+    foreach my $link ( $annotation->get_Annotations('dblink') ) {
+        # link is a Bio::Annotation::DBLink object
+        $writer ->startTag('db_id',
+                           'db_code', $link->database,
+                           'id', $link->primary_id);
+        $writer ->characters( $link->comment ) ;
+        $writer ->endTag('db_id');
+    }
+    $writer ->endTag('xrefs') ;
+
+    ##start sequence map
+    ##we can not use :  my @feats = $seq->all_SeqFeatures;
+    ##rather, we use top_SeqFeatures() to keep the tree structure
+    my @feats = $seq->top_SeqFeatures ;
+
+    my $features;
+
+    ##now we need cluster top level seqfeature by algorithm
+    my $maps;
+    foreach my $feature (@feats) {
+        my $map_type = $feature ->source_tag;
+        push (@{$maps->{ $map_type }}, $feature);
+    }
+
+    ##now we enter each sequence_map
+    foreach my $map_type (keys  %$maps ) {
+        $writer->startTag('sequence_map',
+                          'label', $map_type );
+        $writer->startTag('annotations');
+        # the original author accidently entered 'annotation' instead of 'annotations'
+
+        foreach my $feature ( @{$maps->{ $map_type }} ) {
+            $self->_write_seqfeature( $feature, $writer ) ;
+        }
+
+        $writer->endTag('annotations');
+        $writer->endTag('sequence_map');
+    }
+
+    $writer->endTag('bio_sequence');
+    $writer->endTag('fragment_orientation');
+    $writer->endTag('fragment_order');
+    $writer->endTag('contig');
+    $writer->endTag('sciobj');
+
+}
+# ==================================================================================
+
+=head2 _write_seqfeature
+
+  Usage   : $agave->_write_each_record( $seqfeature, $write )
+  Function: change seeqfeature data into agave format
+  Returns : NONE
+  Args    : Bio::SeqFeature object and XML::writer object
+
+=cut
+
+sub _write_seqfeature{
+
+    my ($self,$seqf, $writer) = @_;
+
+    ##now enter seq feature
+    $writer ->startTag('seq_feature',
+                       'feature_type', $seqf->primary_tag() );
+
+    my $strand = $seqf->strand();
+    $strand = 0 if !defined $strand;
+    # $strand == 1 ? 'false' : 'true';
+    my $is_on_complement;
+    if ($strand == 1) {
+        $is_on_complement = 'true';
+    } else {
+        $is_on_complement = 'false';
+    }
+
+    # die Data::Dumper->Dump([$seqf]) if !defined $strand;
+    $writer ->startTag('seq_location',
+                       'lease_start', $seqf->start(),
+                       'greatest_end', $seqf->end(),
+                       # 'is_on_complement', $seqf->strand() == 1 ? 'false' : 'true') ;
+                       'is_on_complement' , $is_on_complement);
+    # is_on_complement: is the feature found on the complementary
+    # strand (true) or not (false)?
+    $writer ->endTag('seq_location');
+
+    ##enter qualifier
+    foreach my $tag ( $seqf->all_tags() ) {
+        $writer ->startTag('qualifier',
+                           'qualifier_type', $tag);
+        $writer ->characters( $seqf->each_tag_value($tag) ) ;
+        $writer ->endTag('qualifier');
+    }
+
+    ##now recursively travel the seqFeature
+    foreach my $subfeat ( $seqf->sub_SeqFeature ) {
+        $self->_write_seqfeature( $subfeat, $writer ) ;
+    }
+
+    $writer->endTag('seq_feature');
+
+    return;
+
+}
+# ==================================================================================
+
+=head2 _filehandle
+
+  Title   : _filehandle
+  Usage   : $obj->_filehandle($newval)
+  Function:
+  Example :
+  Returns : value of _filehandle
+  Args    : newvalue (optional)
+
+=cut
+
+sub _filehandle{
+
+    my ($obj,$value) = @_;
+    if ( defined $value) {
+        $obj->{'_filehandle'} = $value;
+    }
+    return $obj->{'_filehandle'};
+
+}
+# ==================================================================================
+
+=head2 throw
+
+  Title    : throw
+  Usage    : $self->throw;
+  Function : Throw's error message.  Calls SeqIO's throw method.
+  Args     : Array of string(s), holding error message(s).
+  Returns  : Nothing.
+  Note     : Method(s) that call(s) this method: many.
+             Method(s) that this method calls: Bio::SeqIO's throw method.
+
+=cut
+
+sub throw {
+
+    my ($self, @s) = @_;
+    my $string = "[$.]" . join('', @s);
+    $self->SUPER::throw($string);
+    return;
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/alf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/alf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/alf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: alf.pm,v 1.12.4.1 2006/10/02 23:10:28 sendu Exp $
+# BioPerl module for Bio::SeqIO::alf
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::alf - alf trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from alf trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::alf;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq'));      
+  }
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::SeqWithQuality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'alf');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'alf');
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/asciitree.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/asciitree.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/asciitree.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,221 @@
+# $Id: asciitree.pm,v 1.4.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::asciitree
+#
+# Cared for by Chris Mungall <cjm at fruitfly.org>
+#
+# Copyright Chris Mungall
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::asciitree - asciitree sequence input/output stream
+
+=head1 SYNOPSIS
+
+  # It is probably best not to use this object directly, but
+  # rather go through the SeqIO handler system. Go:
+
+    $instream  = Bio::SeqIO->new(-file => $filename,
+                                 -format => 'chadoxml');
+    $outstream = Bio::SeqIO->new(-file => $filename,
+                                 -format => 'asciitree');
+
+    while ( my $seq = $instream->next_seq() ) {
+	    $outstream->write_seq();
+    }
+
+
+=head1 DESCRIPTION
+
+This is a WRITE-ONLY SeqIO module. It writes a Bio::SeqI object
+containing nested SeqFeature objects in such a way that the SeqFeature
+containment hierarchy is visible as a tree structure
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Mungall
+
+Email cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::asciitree;
+use strict;
+
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+    my($self, at args) = @_;
+
+    $self->SUPER::_initialize(@args);
+    # hash for functions for decoding keys.
+}
+
+=head2 show_detail
+
+ Title   : show_detail
+ Usage   : $obj->show_detail($newval)
+ Function:
+ Example :
+ Returns : value of show_detail (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub show_detail{
+    my $self = shift;
+
+    return $self->{'show_detail'} = shift if @_;
+    return $self->{'show_detail'};
+}
+
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    $self->throw("This is a WRITE-ONLY adapter");
+}
+
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object (must be seq) to the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of 1 to n Bio::SeqI objects
+
+=cut
+
+sub write_seq {
+    my ($self, at seqs) = @_;
+
+    foreach my $seq ( @seqs ) {
+	$self->throw("Attempting to write with no seq!") unless defined $seq;
+
+	if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
+	    $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
+	}
+	$self->_print("Seq: ".$seq->accession_number);
+	$self->_print("\n");
+	my @top_sfs = $seq->get_SeqFeatures;
+	$self->write_indented_sf(1, @top_sfs);
+    }
+}
+
+sub write_indented_sf {
+    my $self = shift;
+    my $indent = shift;
+    my @sfs = @_;
+    foreach my $sf (@sfs) {
+        my $label = '';
+        if ($sf->has_tag('standard_name')) {
+            ($label) = $sf->get_tag_values('standard_name');
+        }
+        if ($sf->has_tag('product')) {
+            ($label) = $sf->get_tag_values('product');
+        }
+	my $COLS = 60;
+	my $tab = ' ' x 10;
+	my @lines = ();
+	if ($self->show_detail) {
+	    my @tags = $sf->all_tags;
+	    foreach my $tag (@tags) {
+		my @vals = $sf->get_tag_values($tag);
+		foreach my $val (@vals) {
+		    $val = "\"$val\"";
+		    push(@lines,
+			 "$tab/$tag=");
+		    while (my $cut =
+			   substr($val, 0, $COLS - length($lines[-1]), '')) {
+			$lines[-1] .= "$cut";
+			if ($val) {
+			    push(@lines, $tab);
+			}
+		    }
+		}
+	    }
+	}
+	my $detail = join("\n", @lines);
+
+        my @sub_sfs = $sf->get_SeqFeatures;
+	my $locstr = '';
+	if (!@sub_sfs) {
+	    $locstr = $self->_locstr($sf);
+	}
+        my $col1 = sprintf("%s%s $label",
+			   '  ' x $indent, $sf->primary_tag);
+	my $line = sprintf("%-50s %s\n",
+			   substr($col1, 0, 50), $locstr);
+	$self->_print($line);
+	if ($detail) {
+	    $self->_print($detail."\n");
+	}
+	$self->write_indented_sf($indent+1, @sub_sfs);
+    }
+    return;
+}
+
+sub _locstr {
+    my $self = shift;
+    my $sf = shift;
+    my $strand = $sf->strand || 0;
+    my $ss = '.';
+    $ss = '+' if $strand > 0;
+    $ss = '-' if $strand < 0;
+
+    my $splitlocstr = '';
+    if ($sf->isa("Bio::SeqFeatureI")) {
+        my @locs = ($sf->location);
+        if ($sf->location->isa("Bio::Location::SplitLocationI")) {
+            @locs = $sf->location->each_Location;
+            $splitlocstr = "; SPLIT: ".join(" ",
+                                          map {$self->_locstr($_)} @locs);
+
+        }
+    }
+
+    return
+      sprintf("%d..%d[%s] $splitlocstr", $sf->start, $sf->end, $ss);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1464 @@
+#
+# BioPerl module for Bio::SeqIO::bsml
+#
+# Cared for by Charles Tilford (tilfordc at bms.com)
+# Copyright (C) Charles Tilford 2001
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# Also at:   http://www.gnu.org/copyleft/lesser.html
+
+
+# Much of the basic documentation in this module has been
+# cut-and-pasted from the embl.pm (Ewan Birney) SeqIO module.
+
+
+=head1 NAME
+
+Bio::SeqIO::bsml - BSML sequence input/output stream
+
+=head1 SYNOPSIS
+
+ It is probably best not to use this object directly, but rather go
+ through the SeqIO handler system. To read a BSML file:
+
+    $stream = Bio::SeqIO->new( -file => $filename, -format => 'bsml');
+
+    while ( my $bioSeqObj = $stream->next_seq() ) {
+   	# do something with $bioSeqObj
+    }
+
+ To write a Seq object to the current file handle in BSML XML format:
+
+    $stream->write_seq( -seq => $seqObj);
+
+ If instead you would like a XML::DOM object containing the BSML, use:
+
+    my $newXmlObject = $stream->to_bsml( -seq => $seqObj);
+
+=head1 DEPENDENCIES
+
+ In addition to parts of the Bio:: hierarchy, this module uses:
+
+ XML::DOM
+
+=head1 DESCRIPTION
+
+ This object can transform Bio::Seq objects to and from BSML (XML)
+ flatfiles.
+
+=head2 NOTE:
+
+ 2/1/02 - I have changed the API to more closely match argument
+ passing used by other BioPerl methods ( -tag => value ). Internal
+ methods are using the same API, but you should not be calling those
+ anyway...
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+ Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.
+ Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head2 Things Still to Do
+
+ * The module now uses the new Collection.pm system. However,
+   Annotations associated with a Feature object still seem to use the
+   old system, so parsing with the old methods are included..
+
+ * Generate Seq objects with no sequence data but an assigned
+   length. This appears to be an issue with Bio::Seq. It is possible
+   (and reasonable) to make a BSML document with features but no
+   sequence data.
+
+ * Support <Seq-data-import>. Do not know how commonly this is used.
+
+ * Some features are awaiting implementation in later versions of
+   BSML. These include:
+
+       * Nested feature support
+
+       * Complex feature (ie joins)
+
+       * Unambiguity in strand (ie -1,0,1, not just  'complement' )
+
+       * More friendly dblink structures
+
+ * Location.pm (or RangeI::union?) appears to have a bug when 'expand'
+   is used.
+
+ * More intelligent hunting for sequence and feature titles? It is not
+   terribly clear where the most appropriate field is located, better
+   grepping (eg looking for a reasonable count for spaces and numbers)
+   may allow for titles better than "AE008041".
+
+=head1 AUTHOR - Charles Tilford
+
+Bristol-Myers Squibb Bioinformatics
+
+Email tilfordc at bms.com
+
+I have developed the BSML specific code for this package, but have used
+code from other SeqIO packages for much of the nuts-and-bolts. In particular
+I have used code from the embl.pm module either directly or as a framework
+for many of the subroutines that are common to SeqIO modules.
+
+=cut
+
+package Bio::SeqIO::bsml;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use XML::DOM;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::SeqIO);
+
+my $idcounter = {};  # Used to generate unique id values
+my $nvtoken = ": ";  # The token used if a name/value pair has to be stuffed
+                     # into a single line
+
+=head1 METHODS
+
+=cut
+
+# LS: this seems to get overwritten on line 1317, generating a redefinition error.  Dead code?
+# CAT: This was inappropriately added in revision 1.10 - I added the check for existance of a sequence factory to the actual _initialize
+# sub _initialize {
+#   my($self, at args) = @_;
+#   $self->SUPER::_initialize(@args);
+#   if( ! defined $self->sequence_factory ) {
+#       $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::RichSeq'));
+#   }
+# }
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : my $bioSeqObj = $stream->next_seq
+ Function: Retrieves the next sequence from a SeqIO::bsml stream.
+ Returns : A reference to a Bio::Seq::RichSeq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my $self = shift;
+    my ($desc);
+    my $bioSeq = $self->sequence_factory->create(-verbose =>$self->verbose());
+
+    unless (exists $self->{'domtree'}) {
+	$self->throw("A BSML document has not yet been parsed.");
+	return;
+    }
+    my $dom = $self->{'domtree'};
+    my $seqElements = $dom->getElementsByTagName ("Sequence");
+    if ($self->{'current_node'} == $seqElements->getLength ) {
+	# There are no more <Sequence>s to process
+	return;
+    }
+    my $xmlSeq = $seqElements->item($self->{'current_node'});
+
+    # Assume that title attribute contains the best display id
+    if (my $val = $xmlSeq->getAttribute( "title")) {
+       $bioSeq->display_id($val);
+   }
+
+    # Set the molecule type
+    if (my $val = $xmlSeq->getAttribute( "molecule" )) {
+	my %mol = ('dna' => 'DNA', 'rna' => 'RNA', 'aa' => 'protein');
+	$bioSeq->molecule($mol{ lc($val) });
+    }
+
+    # Set the accession number
+    if (my $val = $xmlSeq->getAttribute( "ic-acckey" )) {
+	$bioSeq->accession_number($val);
+    }
+
+    # Get the sequence data for the element
+    if (my $seqData = &FIRSTDATA($xmlSeq->getElementsByTagName("Seq-data")
+				 ->item(0) ) ) {
+	# Sequence data exists, transfer to the Seq object
+	# Remove white space and CRs (not neccesary?)
+	$seqData =~ s/[\s\n\r]//g;
+	$bioSeq->seq($seqData);
+    } elsif (my $import = $xmlSeq->getElementsByTagName("Seq-dataimport")
+	     ->item(0) )  {
+#>>>>  # What about <Seq-data-import> ??
+
+    } elsif (my $val = $xmlSeq->getAttribute("length"))  {
+	# No sequence defined, set the length directly
+
+#>>>>   # This does not appear to work - length is apparently calculated
+	# from the sequence. How to make a "virtual" sequence??? Such
+	# creatures are common in BSML...
+	$bioSeq->length($val);
+    }
+
+    my $species = Bio::Species->new();
+    my @classification = ();
+
+    # Peruse the generic <Attributes> - those that are direct children of
+    # the <Sequence> or the <Feature-tables> element
+    # Sticky wicket here - data not controlled by schema, could be anything
+    my @seqDesc = ();
+    my %specs = ('common_name' => 'y',
+		 'genus' => 'y',
+		 'species' => 'y',
+		 'sub_species' => 'y', );
+    my %seqMap = (
+		  'add_date' => [ 'date' ],
+		  'keywords' => [ 'keyword', ],
+		  'seq_version' => [ 'version' ],
+		  'division' => [ 'division' ],
+		  'add_secondary_accession' => ['accession'],
+		  'pid' => ['pid'],
+		  'primary_id' => [ 'primary.id', 'primary_id' ],
+		  );
+    my $floppies = &GETFLOPPIES($xmlSeq);
+    foreach my $attr (@{$floppies}) {
+	# Don't want to get attributes from <Feature> or <Table> elements yet
+	my $parent = $attr->getParentNode->getNodeName;
+	next unless($parent eq "Sequence" || $parent eq "Feature-tables");
+
+	my ($name, $content) = &FLOPPYVALS($attr);
+	$name = lc($name);
+	if (exists $specs{$name}) { # It looks like part of species...
+	    $species->$name($content);
+	    next;
+	}
+	my $value = "";
+	# Cycle through the Seq methods:
+	foreach my $method (keys %seqMap) {
+	    # Cycle through potential matching attributes:
+	    foreach my $match (@{$seqMap{$method}}) {
+		# If the <Attribute> name matches one of the keys,
+		# set $value, unless it has already been set
+		$value ||= $content if ($name =~ /$match/i);
+	    }
+	    if ($value ne "") {
+		$bioSeq->$method($value);
+		last;
+	    }
+	}
+	next if ($value ne "");
+
+	if ($name =~ /^species$/i) {   # Uh, it's the species designation?
+	    if ($content =~ / /) {
+		# Assume that a full species name has been provided
+		# This will screw up if the last word is the subspecies...
+		my @break = split " ", $content;
+		@classification = reverse @break;
+	    } else {
+		$classification[0] = $content;
+	    }
+	    next;
+	}
+	if ($name =~ /sub[_ ]?species/i) {  # Should be the subspecies...
+	    $species->sub_species( $content );
+	    next;
+	}
+	if ($name =~ /classification/i) {  # Should be species classification
+	    # We will assume that there are spaces separating the terms:
+	    my @bits = split " ", $content;
+	    # Now make sure there is not other cruft as well (eg semi-colons)
+	    for my $i (0..$#bits) {
+		$bits[$i] =~ /(\w+)/;
+		$bits[$i] = $1;
+	    }
+	    $species->classification( @bits );
+	    next;
+	}
+	if ($name =~ /comment/) {
+	    my $com = Bio::Annotation::Comment->new('-text' => $content);
+	  #  $bioSeq->annotation->add_Comment($com);
+	    $bioSeq->annotation->add_Annotation('comment', $com);
+	    next;
+	}
+	# Description line - collect all descriptions for later assembly
+	if ($name =~ /descr/) {
+	    push @seqDesc, $content;
+	    next;
+	}
+	# Ok, we have no idea what this attribute is. Dump to SimpleValue
+	my $simp = Bio::Annotation::SimpleValue->new( -value => $content);
+	$bioSeq->annotation->add_Annotation($name, $simp);
+    }
+    unless ($#seqDesc < 0) {
+	$bioSeq->desc( join "; ", @seqDesc);
+    }
+
+#>>>>  This should be modified so that any IDREF associated with the
+    # <Reference> is then used to associate the reference with the
+    # appropriate Feature
+
+    # Extract out <Reference>s associated with the sequence
+    my @refs;
+    my %tags = (
+		-title => "RefTitle",
+		-authors => "RefAuthors",
+		-location => "RefJournal",
+		);
+    foreach my $ref ( $xmlSeq->getElementsByTagName ("Reference") ) {
+	my %refVals;
+	foreach my $tag (keys %tags) {
+	    my $rt = &FIRSTDATA($ref->getElementsByTagName($tags{$tag})
+				->item(0));
+	    $rt =~ s/^[\s\r\n]+//;  # Kill leading space
+	    $rt =~ s/[\s\r\n]+$//;  # Kill trailing space
+	    $rt =~ s/[\s\r\n]+/ /;  # Collapse internal space runs
+	    $refVals{$tag} = $rt;
+	}
+	my $reference = Bio::Annotation::Reference->new( %refVals );
+
+	# Pull out any <Reference> information hidden in <Attributes>
+	my %refMap = (
+		      comment => [ 'comment', 'remark' ],
+		      medline => [ 'medline', ],
+		      pubmed => [ 'pubmed' ],
+		      start => [ 'start', 'begin' ],
+		      end => [ 'stop', 'end' ],
+		      );
+	my @refCom = ();
+	my $floppies = &GETFLOPPIES($ref);
+	foreach my $attr (@{$floppies}) {
+	    my ($name, $content) = &FLOPPYVALS($attr);
+	    my $value = "";
+	    # Cycle through the Seq methods:
+	    foreach my $method (keys %refMap) {
+		# Cycle through potential matching attributes:
+		foreach my $match (@{$refMap{$method}}) {
+		    # If the <Attribute> name matches one of the keys,
+		    # set $value, unless it has already been set
+		    $value ||= $content if ($name =~ /$match/i);
+		}
+		if ($value ne "") {
+		    my $str = '$reference->' . $method . "($value)";
+		    eval($str);
+		    next;
+		}
+	    }
+	    next if ($value ne "");
+	    # Don't know what the <Attribute> is, dump it to comments:
+	    push @refCom, $name . $nvtoken . $content;
+	}
+	unless ($#refCom < 0) {
+	    # Random stuff was found, tack it to the comment field
+	    my $exist = $reference->comment;
+	    $exist .= join ", ", @refCom;
+	    $reference->comment($exist);
+	}
+	push @refs, $reference;
+    }
+    $bioSeq->annotation->add_Annotation('reference'=>$_) foreach @refs;
+
+    # Extract the <Feature>s for this <Sequence>
+    foreach my $feat ( $xmlSeq->getElementsByTagName("Feature") ) {
+	$bioSeq->add_SeqFeature( $self->_parse_bsml_feature($feat) );
+    }
+
+    $species->classification( @classification );
+    $bioSeq->species( $species );
+
+# $seq->annotation->add_DBLink(@links);    ->
+
+    $self->{'current_node'}++;
+    return $bioSeq;
+}
+#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Get all the <Attribute> and <Qualifier> children for an object, and
+# return them as an array reference
+# ('floppy' since these elements have poor/no schema control)
+sub GETFLOPPIES {
+    my $obj = shift;
+
+    my @floppies;
+    my $attributes = $obj->getElementsByTagName ("Attribute");
+    for (my $i = 0; $i < $attributes->getLength; $i++) {
+	push @floppies, $attributes->item($i);
+    }
+    my $qualifiers = $obj->getElementsByTagName ("Qualifier");
+    for (my $i = 0; $i < $qualifiers->getLength; $i++) {
+	push @floppies, $qualifiers->item($i);
+    }
+    return \@floppies;
+}
+#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Given a DOM <Attribute> or <Qualifier> object, return the [name, value] pair
+sub FLOPPYVALS {
+    my $obj = shift;
+
+    my ($name, $value);
+    if      ($obj->getNodeName eq "Attribute") {
+	$name  = $obj->getAttribute('name');
+	$value = $obj->getAttribute('content');
+    } elsif ($obj->getNodeName eq "Qualifier") {
+	# Wheras <Attribute>s require both 'name' and 'content' attributes,
+	# <Qualifier>s can technically have either blank (and sometimes do)
+	my $n =  $obj->getAttribute('value-type');
+	$name = $n if ($n ne "");
+	my $v =  $obj->getAttribute('value');
+	$value = $v if ($v ne "");
+    }
+    return ($name, $value);
+}
+#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Returns the value of the first TEXT_NODE encountered below an element
+# Rational - avoid grabbing a comment rather than the PCDATA. Not foolproof...
+sub FIRSTDATA {
+    my $element = shift;
+    return unless ($element);
+
+    my $hopefuls = $element->getChildNodes;
+    my $data;
+    for (my $i = 0; $i < $hopefuls->getLength; $i++) {
+	if ($hopefuls->item($i)->getNodeType ==
+	  XML::DOM::Node::TEXT_NODE() ) {
+	    $data = $hopefuls->item($i)->getNodeValue;
+	    last;
+	}
+    }
+    return $data;
+}
+#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Just collapses whitespace runs in a string
+sub STRIP {
+    my $string = shift;
+    $string =~ s/[\s\r\n]+/ /g;
+    return $string;
+}
+
+=head2 to_bsml
+
+ Title   : to_bsml
+ Usage   : my $domDoc = $obj->to_bsml(@args)
+ Function: Generates an XML structure for one or more Bio::Seq objects.
+           If $seqref is an array ref, the XML tree generated will include
+           all the sequences in the array.
+ Returns : A reference to the XML DOM::Document object generated / modified
+ Args    : Argument array in form of -key => val. Recognized keys:
+
+      -seq A Bio::Seq reference, or an array reference of many of them
+
+   -xmldoc Specifies an existing XML DOM document to add the sequences
+           to. If included, then only data (no page formatting) will
+           be added. If not, a new XML::DOM::Document will be made,
+           and will be populated with both <Sequence> data, as well as
+           <Page> display elements.
+
+   -nodisp Do not generate <Display> elements, or any children
+           thereof, even if -xmldoc is not set.
+
+ -skipfeat If set to 'all', all <Feature>s will be skipped.  If it is
+           a hash reference, any <Feature> with a class matching a key
+           in the hash will be skipped - for example, to skip 'source'
+           and 'score' features, use:
+
+               -skipfeat => { source => 'Y', score => 'Y' }
+
+ -skiptags As above: if set to 'all', no tags are included, and if a
+           hash reference, those specific tags will be ignored.
+
+           Skipping some or all tags and features can result in
+           noticable speed improvements.
+
+   -nodata If true, then <Seq-data> will not be included.  This may be
+           useful if you just want annotations and do not care about
+           the raw ACTG information.
+
+   -return Default is 'xml', which will return a reference to the BSML
+           XML object. If set to 'seq' will return an array ref of the
+           <Sequence> objects added (rather than the whole XML object)
+
+    -close Early BSML browsers will crash if an element *could* have
+           children but does not, and is closed as an empty element
+           e.g. <Styles/>. If -close is true, then such tags are given
+           a comment child to explicitly close them e.g.  <Styles><!--
+           --></Styles>. This is default true, set to "0" if you do
+           not want this behavior.
+
+ Examples : my $domObj = $stream->to_bsml( -seq => \@fourCoolSequenceObjects,
+					   -skipfeat => { source => 1 },
+					   );
+
+            # Or add sequences to an existing BSML document:
+            $stream->to_bsml( -seq => \@fourCoolSequenceObjects,
+			      -skipfeat => { source => 1 },
+			      -xmldoc => $myBsmlDocumentInProgress,  );
+
+=cut
+
+sub to_bsml {
+    my $self = shift;
+    my $args = $self->_parseparams( -close => 1,
+				    -return => 'xml',
+				    @_);
+    $args->{NODISP} ||= $args->{NODISPLAY};
+    my $seqref = $args->{SEQ};
+    $seqref = (ref($seqref) eq 'ARRAY') ? $seqref : [ $seqref ];
+
+    #############################
+    # Basic BSML XML Components #
+    #############################
+
+    my $xml;
+    my ($bsmlElem, $defsElem, $seqsElem, $dispElem);
+    if ($args->{XMLDOC}) {
+	# The user has provided an existing XML DOM object
+	$xml = $args->{XMLDOC};
+	unless ($xml->isa("XML::DOM::Document")) {
+	    $self->throw('SeqIO::bsml.pm error:\n'.
+		 'When calling ->to_bsml( { xmldoc => $myDoc }), $myDoc \n' .
+		 'should be an XML::DOM::Document object, or an object that\n'.
+		 'inherits from that class (like BsmlHelper.pm)');
+	}
+    } else {
+	# The user has not provided a new document, make one from scratch
+	$xml = XML::DOM::Document->new();
+	$xml->setXMLDecl( $xml->createXMLDecl("1.0") );
+	my $url = "http://www.labbook.com/dtd/bsml2_2.dtd";
+	my $doc = $xml->createDocumentType("Bsml",$url);
+	$xml->setDoctype($doc);
+	$bsmlElem = $self->_addel( $xml, 'Bsml');
+	$defsElem = $self->_addel( $bsmlElem, 'Definitions');
+	$seqsElem = $self->_addel( $defsElem, 'Sequences');
+	unless ($args->{NODISP}) {
+	    $dispElem = $self->_addel( $bsmlElem, 'Display');
+	    my $stylElem = $self->_addel( $dispElem, 'Styles');
+	    my $style = $self->_addel( $stylElem, 'Style', {
+		type => "text/css" });
+	    my $styleText =
+		qq(Interval-widget { display : "1"; }\n) .
+		    qq(Feature { display-auto : "1"; });
+	    $style->appendChild( $xml->createTextNode($styleText) );
+	}
+    }
+
+    # Establish fundamental BSML elements, if they do not already exist
+    $bsmlElem ||= $xml->getElementsByTagName("Bsml")->item(0);
+    $defsElem ||= $xml->getElementsByTagName("Definitions")->item(0);
+    $seqsElem ||= $xml->getElementsByTagName("Sequences")->item(0);
+
+    ###############
+    # <Sequences> #
+    ###############
+
+    # Map over Bio::Seq to BSML
+    my %mol = ('dna' => 'DNA', 'rna' => 'RNA', 'protein' => 'AA');
+    my @xmlSequences;
+
+    foreach my $bioSeq (@{$seqref}) {
+	my $xmlSeq = $xml->createElement("Sequence");
+	my $FTs    = $xml->createElement("Feature-tables");
+
+	# Array references to hold <Reference> objects:
+	my $seqRefs = []; my $featRefs = [];
+	# Array references to hold <Attribute> values (not objects):
+	my $seqDesc = [];
+	push @{$seqDesc}, ["comment" , "This file generated to BSML 2.2 standards - joins will be collapsed to a single feature enclosing all members of the join"];
+	push @{$seqDesc}, ["description" , eval{$bioSeq->desc}];
+	foreach my $kwd ( eval{$bioSeq->get_keywords} ) {
+	    push @{$seqDesc}, ["keyword" , $kwd];
+	}
+	push @{$seqDesc}, ["keyword" , eval{$bioSeq->keywords}];
+	push @{$seqDesc}, ["version" , eval{$bioSeq->seq_version}];
+	push @{$seqDesc}, ["division" , eval{$bioSeq->division}];
+	push @{$seqDesc}, ["pid" , eval{$bioSeq->pid}];
+#	push @{$seqDesc}, ["bio_object" , ref($bioSeq)];
+	push @{$seqDesc}, ["primary_id" , eval{$bioSeq->primary_id}];
+	foreach my $dt (eval{$bioSeq->get_dates()} ) {
+	    push @{$seqDesc}, ["date" , $dt];
+	}
+	foreach my $ac (eval{$bioSeq->get_secondary_accessions()} ) {
+	    push @{$seqDesc}, ["secondary_accession" , $ac];
+	}
+
+	# Determine the accession number and a unique identifier
+	my $acc = $bioSeq->accession_number eq "unknown" ?
+	    "" : $bioSeq->accession_number;
+	my $id;
+	my $pi = $bioSeq->primary_id;
+	if ($pi && $pi !~ /Bio::/) {
+	    # Not sure I understand what primary_id is... It sometimes
+	    # is a string describing a reference to a BioSeq object...
+	    $id = "SEQ" . $bioSeq->primary_id;
+	} else {
+	    # Nothing useful found, make a new unique ID
+	    $id = $acc || ("SEQ-io" . $idcounter->{Sequence}++);
+	}
+	# print "$id->",ref($bioSeq->primary_id),"\n";
+	# An id field with spaces is interpreted as an idref - kill the spaces
+	$id =~ s/ /-/g;
+	# Map over <Sequence> attributes
+	my %attr = ( 'title'         => $bioSeq->display_id,
+		     'length'        => $bioSeq->length,
+		     'ic-acckey'     => $acc,
+		     'id'            => $id,
+		     'representation' => 'raw',
+		     );
+	$attr{molecule} = $mol{ lc($bioSeq->molecule) } if $bioSeq->can('molecule');
+
+
+	foreach my $a (keys %attr) {
+	    $xmlSeq->setAttribute($a, $attr{$a}) if (defined $attr{$a} &&
+						     $attr{$a} ne "");
+	}
+	# Orphaned Attributes:
+	$xmlSeq->setAttribute('topology', 'circular')
+	    if ($bioSeq->is_circular);
+	# <Sequence> strand, locus
+
+	$self->_add_page($xml, $xmlSeq) if ($dispElem);
+	################
+	# <Attributes> #
+	################
+
+	# Check for Bio::Annotations on the * <Sequence> *.
+	$self->_parse_annotation( -xml => $xml, -obj => $bioSeq,
+				  -desc => $seqDesc, -refs => $seqRefs);
+
+	# Incorporate species data
+	if (ref($bioSeq->species) eq 'Bio::Species') {
+	    # Need to peer into Bio::Species ...
+	    my @specs = ('common_name', 'genus', 'species', 'sub_species');
+	    foreach my $sp (@specs) {
+		next unless (my $val = $bioSeq->species()->$sp());
+		push @{$seqDesc}, [$sp , $val];
+	    }
+	    push @{$seqDesc}, ['classification',
+			       (join " ", $bioSeq->species->classification) ];
+	    # Species::binomial will return "genus species sub_species" ...
+	} elsif (my $val = $bioSeq->species) {
+	    # Ok, no idea what it is, just dump it in there...
+	    push @{$seqDesc}, ["species", $val];
+	}
+
+	# Add the description <Attribute>s for the <Sequence>
+	foreach my $seqD (@{$seqDesc}) {
+	    $self->_addel($xmlSeq, "Attribute", {
+		name => $seqD->[0], content => $seqD->[1]}) if ($seqD->[1]);
+	}
+
+	# If sequence references were added, make a Feature-table for them
+	unless ($#{$seqRefs} < 0) {
+	    my $seqFT = $self->_addel($FTs, "Feature-table", {
+		title => "Sequence References", });
+	    foreach my $feat (@{$seqRefs}) {
+		$seqFT->appendChild($feat);
+	    }
+	}
+
+	# This is the appropriate place to add <Feature-tables>
+	$xmlSeq->appendChild($FTs);
+
+	#############
+	# <Feature> #
+	#############
+
+#>>>>	# Perhaps it is better to loop through top_Seqfeatures?...
+#>>>>	# ...however, BSML does not have a hierarchy for Features
+
+	if (defined $args->{SKIPFEAT} &&
+	    $args->{SKIPFEAT} eq 'all') {
+	    $args->{SKIPFEAT} = { all => 1};
+	} else { $args->{SKIPFEAT} ||= {} }
+	foreach my $class (keys %{$args->{SKIPFEAT}}) {
+	    $args->{SKIPFEAT}{lc($class)} = $args->{SKIPFEAT}{$class};
+	}
+	# Loop through all the features
+	my @features = $bioSeq->all_SeqFeatures();
+	if (@features && !$args->{SKIPFEAT}{all}) {
+	    my $ft = $self->_addel($FTs, "Feature-table", {
+		title => "Features", });
+	    foreach my $bioFeat (@features ) {
+		my $featDesc = [];
+		my $class = lc($bioFeat->primary_tag);
+		# The user may have specified to ignore this type of feature
+		next if ($args->{SKIPFEAT}{$class});
+		my $id = "FEAT-io" . $idcounter->{Feature}++;
+		my $xmlFeat = $self->_addel( $ft, 'Feature', {
+		    'id' => $id,
+		    'class' => $class ,
+		    'value-type' => $bioFeat->source_tag });
+		# Check for Bio::Annotations on the * <Feature> *.
+		$self->_parse_annotation( -xml => $xml, -obj => $bioFeat,
+					  -desc => $featDesc, -id => $id,
+					  -refs =>$featRefs, );
+		# Add the description stuff for the <Feature>
+		foreach my $de (@{$featDesc}) {
+		    $self->_addel($xmlFeat, "Attribute", {
+			name => $de->[0], content => $de->[1]}) if ($de->[1]);
+		}
+		$self->_parse_location($xml, $xmlFeat, $bioFeat);
+
+		# loop through the tags, add them as <Qualifiers>
+		next if (defined $args->{SKIPTAGS} &&
+			 $args->{SKIPTAGS} =~ /all/i);
+		# Tags can consume a lot of CPU cycles, and can often be
+		# rather non-informative, so -skiptags can allow total or
+		# selective omission of tags.
+		foreach my $tag ($bioFeat->all_tags()) {
+		    next if (exists $args->{SKIPTAGS}{$tag});
+		    foreach my $val ($bioFeat->each_tag_value($tag)) {
+			$self->_addel( $xmlFeat, 'Qualifier', {
+			    'value-type' => $tag ,
+			    'value' => $val });
+		    }
+		}
+	    }
+	}
+
+	##############
+	# <Seq-data> #
+	##############
+
+	# Add sequence data
+	if ( (my $data = $bioSeq->seq) && !$args->{NODATA} ) {
+	    my $d = $self->_addel($xmlSeq, 'Seq-data');
+	    $d->appendChild( $xml->createTextNode($data) );
+	}
+
+	# If references were added, make a Feature-table for them
+	unless ($#{$featRefs} < 0) {
+	    my $seqFT = $self->_addel($FTs, "Feature-table", {
+		title => "Feature References", });
+	    foreach my $feat (@{$featRefs}) {
+		$seqFT->appendChild($feat);
+	    }
+	}
+
+	# Place the completed <Sequence> tree as a child of <Sequences>
+	$seqsElem->appendChild($xmlSeq);
+	push @xmlSequences, $xmlSeq;
+    }
+
+    # Prevent browser crashes by explicitly closing empty elements:
+    if ($args->{CLOSE}) {
+	my @problemChild = ('Sequences', 'Sequence', 'Feature-tables',
+			    'Feature-table', 'Screen', 'View',);
+	foreach my $kid (@problemChild) {
+	    foreach my $prob ($xml->getElementsByTagName($kid)) {
+		unless ($prob->hasChildNodes) {
+		    $prob->appendChild(
+			$xml->createComment(" Must close <$kid> explicitly "));
+		}
+	    }
+	}
+    }
+
+    if (defined $args->{RETURN} &&
+	$args->{RETURN} =~ /seq/i) {
+	return \@xmlSequences;
+    } else {
+	return $xml;
+    }
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $obj->write_seq(@args)
+ Function: Prints out an XML structure for one or more Bio::Seq objects.
+           If $seqref is an array ref, the XML tree generated will include
+           all the sequences in the array. This method is fairly simple,
+           most of the processing is performed within to_bsml.
+ Returns : A reference to the XML object generated / modified
+ Args    : Argument array. Recognized keys:
+
+      -seq A Bio::Seq reference, or an array reference of many of them
+
+           Alternatively, the method may be called simply as...
+
+           $obj->write_seq( $bioseq )
+
+           ... if only a single argument is passed, it is assumed that
+           it is the sequence object (can also be an array ref of
+           many Seq objects )
+
+-printmime If true prints "Content-type: $mimetype\n\n" at top of
+           document, where $mimetype is the value designated by this
+           key. For generic XML use text/xml, for BSML use text/x-bsml
+
+   -return This option will be supressed, since the nature of this
+           method is to print out the XML document. If you wish to
+           retrieve the <Sequence> objects generated, use the to_bsml
+           method directly.
+
+=cut
+
+sub write_seq {
+    my $self = shift;
+    my $args = $self->_parseparams( @_);
+    if ($#_ == 0 ) {
+	# If only a single value is passed, assume it is the seq object
+	unshift @_, "-seq";
+    }
+    # Build a BSML XML DOM object based on the sequence(s)
+    my $xml = $self->to_bsml( @_,
+			      -return => undef );
+    # Convert to a string
+    my $out = $xml->toString;
+    # Print after putting a return after each element - more readable
+    $out =~ s/>/>\n/g;
+    $self->_print("Content-type: " . $args->{PRINTMIME} . "\n\n")
+	if ($args->{PRINTMIME});
+    $self->_print( $out );
+    # Return the DOM tree in case the user wants to do something with it
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return $xml;
+}
+
+=head1 INTERNAL METHODS
+#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-
+
+ The following methods are used for internal processing, and should probably
+ not be accessed by the user.
+
+=head2 _parse_location
+
+ Title   : _parse_location
+ Usage   : $obj->_parse_location($xmlDocument, $parentElem, $SeqFeatureObj)
+ Function: Adds <Interval-loc> and <Site-loc> children to <$parentElem> based
+           on locations / sublocations found in $SeqFeatureObj. If
+           sublocations exist, the original location will be ignored.
+ Returns : An array ref containing the elements added to the parent.
+           These will have already been added to <$parentElem>
+ Args    : 0 The DOM::Document being modified
+           1 The DOM::Element parent that you want to add to
+           2 Reference to the Bio::SeqFeature being analyzed
+
+=cut
+
+    ###############################
+    # <Interval-loc> & <Site-loc> #
+    ###############################
+
+sub _parse_location {
+    my $self = shift;
+    my ($xml, $xmlFeat, $bioFeat) = @_;
+    my $bioLoc = $bioFeat->location;
+    my @locations;
+    if (ref($bioLoc) =~ /Split/) {
+	@locations = $bioLoc->sub_Location;
+	# BSML 2.2 does not recognize / support joins. For this reason,
+	# we will just use the upper-level location. The line below can
+	# be deleted or commented out if/when BSML 3 supports complex
+	# interval deffinitions:
+	@locations = ($bioLoc);
+    } else {
+	@locations = ($bioLoc);
+    }
+    my @added = ();
+
+    # Add the site or interval positional information:
+    foreach my $loc (@locations) {
+	my ($start, $end) = ($loc->start, $loc->end);
+	my %locAttr;
+	# Strand information is not well described in BSML
+	$locAttr{complement} = 1 if ($loc->strand == -1);
+	if ($start ne "" && ($start == $end || $end eq "")) {
+	    $locAttr{sitepos} = $start;
+	    push @added, $self->_addel($xmlFeat,'Site-loc',\%locAttr);
+	} elsif ($start ne "" && $end ne "") {
+	    if ($start > $end) {
+		# The feature is on the complementary strand
+		($start, $end) = ($end, $start);
+		$locAttr{complement} = 1;
+	    }
+	    $locAttr{startpos} = $start;
+	    $locAttr{endpos} = $end;
+	    push @added, $self->_addel($xmlFeat,'Interval-loc',\%locAttr);
+	} else {
+	    warn "Failure to parse SeqFeature location. Start = '$start' & End = '$end'";
+	}
+    }
+    return \@added;
+}
+
+=head2 _parse_bsml_feature
+
+ Title   : _parse_bsml_feature
+ Usage   : $obj->_parse_bsml_feature($xmlFeature )
+ Function: Will examine the <Feature> element provided by $xmlFeature and
+           return a generic seq feature.
+ Returns : Bio::SeqFeature::Generic
+ Args    : 0 XML::DOM::Element <Feature> being analyzed.
+
+=cut
+
+sub _parse_bsml_feature {
+    my $self = shift;
+    my ($feat) = @_;
+
+    my $basegsf = new Bio::SeqFeature::Generic;
+       # score
+       # frame
+       # source_tag
+
+    # Use the class as the primary tag value, if it is present
+    if ( my $val = $feat->getAttribute("class") ) {
+	$basegsf->primary_tag($val);
+    }
+
+    # Positional information is in <Interval-loc>s or <Site-loc>s
+    # We need to grab these in order, to try to recreate joins...
+    my @locations = ();
+    foreach my $kid ($feat->getChildNodes) {
+	my $nodeName = $kid->getNodeName;
+	next unless ($nodeName eq "Interval-loc" ||
+		     $nodeName eq "Site-loc");
+	push @locations, $kid;
+    }
+    if ($#locations == 0) {
+	# There is only one location specified
+	$self->_parse_bsml_location($locations[0], $basegsf);
+    } elsif ($#locations > 0) {
+#>>>>   # This is not working, I think the error is somewhere downstream
+	# of add_sub_SeqFeature, probably in RangeI::union ?
+	# The sub features are added fine, but the EXPANDed parent feature
+	# location has a messed up start - Bio::SeqFeature::Generic ref
+	# instead of an integer - and an incorrect end  - the end of the first
+	# sub feature added, not of the union of all of them.
+
+	# Also, the SeqIO::genbank.pm output is odd - the sub features appear
+	# to be listed with the *previous* feature, not this one.
+
+	foreach my $location (@locations) {
+	    my $subgsf = $self->_parse_bsml_location($location);
+	  #  print "start ", $subgsf->start,"\n";
+	  #  print "end ", $subgsf->end,"\n";
+	    $basegsf->add_sub_SeqFeature($subgsf, 'EXPAND');
+	}
+	# print $feat->getAttribute('id'),"\n";
+	# print $basegsf->primary_tag,"\n";
+
+    } else {
+	# What to do if there are no locations? Nothing needed?
+    }
+
+    # Look at any <Attribute>s or <Qualifier>s that are present:
+    my $floppies = &GETFLOPPIES($feat);
+    foreach my $attr (@{$floppies}) {
+	my ($name, $content) = &FLOPPYVALS($attr);
+
+	if ($name =~ /xref/i) {
+	    # Do we want to put these in DBLinks??
+	}
+
+	# Don't know what the object is, dump it to a tag:
+	$basegsf->add_tag_value(lc($name), $content);
+    }
+
+    # Mostly this helps with debugging, but may be of utility...
+    # Add a tag holding the BSML id value
+    if ( (my $val = $feat->getAttribute('id')) &&
+	 !$basegsf->has_tag('bsml-id')) {
+	# Decided that this got a little sloppy...
+#	$basegsf->add_tag_value("bsml-id", $val);
+    }
+    return $basegsf;
+}
+
+=head2 _parse_bsml_location
+
+ Title   : _parse_bsml_location
+ Usage   : $obj->_parse_bsml_feature( $intOrSiteLoc, $gsfObject )
+ Function: Will examine the <Interval-loc> or <Site-loc> element provided
+ Returns : Bio::SeqFeature::Generic
+ Args    : 0 XML::DOM::Element <Interval/Site-loc> being analyzed.
+           1 Optional SeqFeature::Generic to use
+
+=cut
+
+sub _parse_bsml_location {
+    my $self = shift;
+    my ($loc, $gsf) = @_;
+
+    $gsf ||= new Bio::SeqFeature::Generic;
+    my $type = $loc->getNodeName;
+    my ($start, $end);
+    if ($type eq 'Interval-loc') {
+	$start = $loc->getAttribute('startpos');
+	$end = $loc->getAttribute('endpos');
+    } elsif ($type eq 'Site-loc') {
+	$start = $end = $loc->getAttribute('sitepos');
+    } else {
+	warn "Unknown location type '$type', could not make GSF\n";
+	return;
+    }
+    $gsf->start($start);
+    $gsf->end($end);
+
+    # BSML does not have an explicit method to set undefined strand
+    if (my $s = $loc->getAttribute("complement")) {
+	if ($s) {
+	    $gsf->strand(-1);
+	} else {
+	    $gsf->strand(1);
+	}
+    } else {
+	# We're setting "strand nonspecific" here - bad idea?
+	# In most cases the user likely meant it to be on the + strand
+	$gsf->strand(0);
+    }
+
+    return $gsf;
+}
+
+=head2 _parse_reference
+
+ Title   : _parse_reference
+ Usage   : $obj->_parse_reference(@args )
+ Function: Makes a new <Reference> object from a ::Reference, which is
+           then stored in an array provide by -refs. It will be
+           appended to the XML tree later.
+ Returns :
+ Args    : Argument array. Recognized keys:
+
+      -xml The DOM::Document being modified
+
+   -refobj The Annotation::Reference Object
+
+     -refs An array reference to hold the new <Reference> DOM object
+
+       -id Optional. If the XML id for the 'calling' element is
+           provided, it will be placed in any <Reference> refs
+           attribute.
+
+=cut
+
+sub _parse_reference {
+    my $self = shift;
+    my $args = $self->_parseparams( @_);
+    my ($xml, $ref, $refRef) = ($args->{XML}, $args->{REFOBJ}, $args->{REFS});
+
+    ###############
+    # <Reference> #
+    ###############
+
+    my $xmlRef = $xml->createElement("Reference");
+#>> This may not be the right way to make a BSML dbxref...
+    if (my $link = $ref->medline) {
+	$xmlRef->setAttribute('dbxref', $link);
+    }
+
+    # Make attributes for some of the characteristics
+    my %stuff = ( start => $ref->start,
+		  end => $ref->end,
+		  rp => $ref->rp,
+		  comment => $ref->comment,
+		  pubmed => $ref->pubmed,
+		  );
+    foreach my $s (keys %stuff) {
+	$self->_addel($xmlRef, "Attribute", {
+	    name => $s, content => $stuff{$s} }) if ($stuff{$s});
+    }
+    $xmlRef->setAttribute('refs', $args->{ID}) if ($args->{ID});
+    # Add the basic information
+    # Should probably check for content before creation...
+    $self->_addel($xmlRef, "RefAuthors")->
+	appendChild( $xml->createTextNode(&STRIP($ref->authors)) );
+    $self->_addel($xmlRef, "RefTitle")->
+	appendChild( $xml->createTextNode(&STRIP($ref->title)) );
+    $self->_addel($xmlRef, "RefJournal")->
+	appendChild( $xml->createTextNode(&STRIP($ref->location)) );
+    # References will be added later in a <Feature-Table>
+    push @{$refRef}, $xmlRef;
+}
+
+=head2 _parse_annotation
+
+ Title   : _parse_annotation
+ Usage   : $obj->_parse_annotation(@args )
+ Function: Will examine any Annotations found in -obj. Data found in
+           ::Comment and ::DBLink structures, as well as Annotation
+           description fields are stored in -desc for later
+           generation of <Attribute>s. <Reference> objects are generated
+           from ::References, and are stored in -refs - these will
+           be appended to the XML tree later.
+ Returns :
+ Args    : Argument array. Recognized keys:
+
+      -xml The DOM::Document being modified
+
+      -obj Reference to the Bio object being analyzed
+
+    -descr An array reference for holding description text items
+
+     -refs An array reference to hold <Reference> DOM objects
+
+       -id Optional. If the XML id for the 'calling' element is
+           provided, it will be placed in any <Reference> refs
+           attribute.
+
+=cut
+
+sub _parse_annotation {
+    my $self = shift;
+    my $args = $self->_parseparams( @_);
+    my ($xml, $obj, $descRef, $refRef) =
+	( $args->{XML}, $args->{OBJ}, $args->{DESC}, $args->{REFS} );
+    # No good place to put any of this (except for references). Most stuff
+    # just gets dumped to <Attribute>s
+    my $ann = $obj->annotation;
+    return  unless ($ann);
+#	use BMS::Branch; my $debug = BMS::Branch->new( ); warn "$obj :"; $debug->branch($ann);
+    unless (ref($ann) =~ /Collection/) {
+	# Old style annotation. It seems that Features still use this
+	# form of object
+	$self->_parse_annotation_old(@_);
+	return;
+    }
+
+    foreach my $key ($ann->get_all_annotation_keys()) {
+	foreach my $thing ($ann->get_Annotations($key)) {
+	    if ($key eq 'description') {
+		push @{$descRef}, ["description" , $thing->value];
+	    } elsif ($key eq 'comment') {
+		push @{$descRef}, ["comment" , $thing->text];
+	    } elsif ($key eq 'dblink') {
+		# DBLinks get dumped to attributes, too
+		push @{$descRef}, ["db_xref" ,  $thing->database . ":"
+				   . $thing->primary_id ];
+		if (my $com = $thing->comment) {
+		    push @{$descRef}, ["link" , $com->text ];
+		}
+
+	    } elsif ($key eq 'reference') {
+		$self->_parse_reference( @_, -refobj => $thing );
+	    } elsif (ref($thing) =~ /SimpleValue/) {
+		push @{$descRef}, [$key , $thing->value];
+	    } else {
+		# What is this??
+		push @{$descRef}, ["error", "bsml.pm did not understand ".
+				   "'$key' = '$thing'" ];
+	    }
+	}
+    }
+}
+
+=head2 _parse_annotation_old
+
+    Title   : _parse_annotation_old
+ Usage   : $obj->_parse_annotation_old(@args)
+ Function: As above, but for the old Annotation system.
+           Apparently needed because Features are still using the old-style
+           annotations?
+ Returns :
+ Args    : Argument array. Recognized keys:
+
+      -xml The DOM::Document being modified
+
+      -obj Reference to the Bio object being analyzed
+
+    -descr An array reference for holding description text items
+
+     -refs An array reference to hold <Reference> DOM objects
+
+       -id Optional. If the XML id for the 'calling' element is
+           provided, it will be placed in any <Reference> refs
+           attribute.
+
+=cut
+
+    ###############
+    # <Reference> #
+    ###############
+
+sub _parse_annotation_old {
+    my $self = shift;
+    my $args = $self->_parseparams( @_);
+    my ($xml, $obj, $descRef, $refRef) =
+	( $args->{XML}, $args->{OBJ}, $args->{DESC}, $args->{REFS} );
+    # No good place to put any of this (except for references). Most stuff
+    # just gets dumped to <Attribute>s
+    if (my $ann = $obj->annotation) {
+	push @{$descRef}, ["annotation", $ann->description];
+	foreach my $com ($ann->each_Comment) {
+	    push @{$descRef}, ["comment" , $com->text];
+	}
+
+	# Gene names just get dumped to <Attribute name="gene">
+	foreach my $gene ($ann->each_gene_name) {
+	    push @{$descRef}, ["gene" , $gene];
+	}
+
+	# DBLinks get dumped to attributes, too
+	foreach my $link ($ann->each_DBLink) {
+	    push @{$descRef}, ["db_xref" ,
+			       $link->database . ":" . $link->primary_id ];
+	    if (my $com = $link->comment) {
+		push @{$descRef}, ["link" , $com->text ];
+	    }
+	}
+
+	# References get produced and temporarily held
+	foreach my $ref ($ann->each_Reference) {
+	    $self->_parse_reference( @_, -refobj => $ref );
+	}
+    }
+}
+
+=head2 _add_page
+
+ Title   : _add_page
+ Usage   : $obj->_add_page($xmlDocument, $xmlSequenceObject)
+ Function: Adds a simple <Page> and <View> structure for a <Sequence>
+ Returns : a reference to the newly created <Page>
+ Args    : 0 The DOM::Document being modified
+           1 Reference to the <Sequence> object
+
+=cut
+
+sub _add_page {
+    my $self = shift;
+    my ($xml, $seq) = @_;
+    my $disp = $xml->getElementsByTagName("Display")->item(0);
+    my $page = $self->_addel($disp, "Page");
+    my ($width, $height) = ( 7.8, 5.5);
+    my $screen = $self->_addel($page, "Screen", {
+	width => $width, height => $height, });
+#    $screen->appendChild($xml->createComment("Must close explicitly"));
+    my $view = $self->_addel($page, "View", {
+	seqref => $seq->getAttribute('id'),
+	title => $seq->getAttribute('title'),
+	title1 => "{NAME}",
+	title2 => "{LENGTH} {UNIT}",
+    });
+    $self->_addel($view, "View-line-widget", {
+	shape => 'horizontal',
+	hcenter => $width/2 + 0.7,
+	'linear-length' => $width - 2,
+    });
+    $self->_addel($view, "View-axis-widget");
+    return $page;
+}
+
+
+=head2 _addel
+
+ Title   : _addel
+ Usage   : $obj->_addel($parentElem, 'ChildName',
+			{ anAttr => 'someValue', anotherAttr => 'aValue',})
+ Function: Add an element with attribute values to a DOM tree
+ Returns : a reference to the newly added element
+ Args    : 0 The DOM::Element parent that you want to add to
+           1 The name of the new child element
+           2 Optional hash reference containing
+             attribute name => attribute value assignments
+
+=cut
+
+sub _addel {
+    my $self = shift;
+    my ($root, $name, $attr) = @_;
+
+    # Find the DOM::Document for the parent
+    my $doc = $root->getOwnerDocument || $root;
+    my $elem = $doc->createElement($name);
+    foreach my $a (keys %{$attr}) {
+	$elem->setAttribute($a, $attr->{$a});
+    }
+    $root->appendChild($elem);
+    return $elem;
+}
+
+=head2 _show_dna
+
+ Title   : _show_dna
+ Usage   : $obj->_show_dna($newval)
+ Function: (cut-and-pasted directly from embl.pm)
+ Returns : value of _show_dna
+ Args    : newvalue (optional)
+
+=cut
+
+sub _show_dna {
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_show_dna'} = $value;
+    }
+    return $obj->{'_show_dna'};
+}
+
+=head2 _initialize
+
+ Title   : _initialize
+ Usage   : $dom = $obj->_initialize(@args)
+ Function: Coppied from embl.pm, and augmented with initialization of the
+           XML DOM tree
+ Returns :
+ Args    : -file => the XML file to be parsed
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+  # hash for functions for decoding keys.
+  $self->{'_func_ftunit_hash'} = {};
+  $self->_show_dna(1); # sets this to one by default. People can change it
+
+  my %param = @args;  # From SeqIO.pm
+  @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+  if ( exists $param{-file} && $param{-file} !~ /^>/) {
+      # Is it blasphemy to add your own keys to an object in another package?
+      # domtree => the parsed DOM tree retruned by XML::DOM
+      $self->{'domtree'} = $self->_parse_xml( $param{-file} );
+      # current_node => the <Sequence> node next in line for next_seq
+      $self->{'current_node'} = 0;
+  }
+
+  $self->sequence_factory( new Bio::Seq::SeqFactory
+			   ( -verbose => $self->verbose(),
+			     -type => 'Bio::Seq::RichSeq'))
+      if( ! defined $self->sequence_factory );
+}
+
+
+=head2 _parseparams
+
+ Title   : _parseparams
+ Usage   : my $paramHash = $obj->_parseparams(@args)
+ Function: Borrowed from Bio::Parse.pm, who borrowed it from CGI.pm
+           Lincoln Stein -> Richard Resnick -> here
+ Returns : A hash reference of the parameter keys (uppercase) pointing to
+           their values.
+ Args    : An array of key, value pairs. Easiest to pass values as:
+           -key1 => value1, -key2 => value2, etc
+           Leading "-" are removed.
+
+=cut
+
+sub _parseparams {
+    my $self = shift;
+    my %hash = ();
+    my @param = @_;
+
+    # Hacked out from Parse.pm
+    # The next few lines strip out the '-' characters which
+    # preceed the keys, and capitalizes them.
+    for (my $i=0;$i<@param;$i+=2) {
+        $param[$i]=~s/^\-//;
+        $param[$i]=~tr/a-z/A-Z/;
+    }
+    pop @param if @param %2;  # not an even multiple
+    %hash = @param;
+    return \%hash;
+}
+
+=head2 _parse_xml
+
+ Title   : _parse_xml
+ Usage   : $dom = $obj->_parse_xml($filename)
+ Function: uses XML::DOM to construct a DOM tree from the BSML document
+ Returns : a reference to the parsed DOM tree
+ Args    : 0 Path to the XML file needing to be parsed
+
+=cut
+
+sub _parse_xml {
+    my $self = shift;
+    my $file = shift;
+
+    unless (-e $file) {
+	$self->throw("Could not parse non-existant XML file '$file'.");
+	return;
+    }
+    my $parser = new XML::DOM::Parser;
+    my $doc = $parser->parsefile ($file);
+    return $doc;
+}
+
+sub DESTROY {
+    my $self = shift;
+    # Reports off the net imply that DOM::Parser will memory leak if you
+    # do not explicitly dispose of it:
+    # http://aspn.activestate.com/ASPN/Mail/Message/perl-xml/788458
+    my $dom = $self->{'domtree'};
+    # For some reason the domtree can get undef-ed somewhere...
+    $dom->dispose if ($dom);
+}
+
+
+=head1 TESTING SCRIPT
+
+ The following script may be used to test the conversion process. You
+ will need a file of the format you wish to test. The script will
+ convert the file to BSML, store it in /tmp/bsmltemp, read that file
+ into a new SeqIO stream, and write it back as the original
+ format. Comparison of this second file to the original input file
+ will allow you to track where data may be lost or corrupted. Note
+ that you will need to specify $readfile and $readformat.
+
+ use Bio::SeqIO;
+ # Tests preservation of details during round-trip conversion:
+ # $readformat -> BSML -> $readformat
+ my $tempspot = "/tmp/bsmltemp";  # temp folder to hold generated files
+ my $readfile = "rps4y.embl";     # The name of the file you want to test
+ my $readformat = "embl";         # The format of the file being tested
+
+ system "mkdir $tempspot" unless (-d $tempspot);
+ # Make Seq object from the $readfile
+ my $biostream = Bio::SeqIO->new( -file => "$readfile" );
+ my $seq = $biostream->next_seq();
+
+ # Write BSML from SeqObject
+ my $bsmlout = Bio::SeqIO->new( -format => 'bsml',
+				   -file => ">$tempspot/out.bsml");
+ warn "\nBSML written to $tempspot/out.bsml\n";
+ $bsmlout->write_seq($seq);
+ # Need to kill object for following code to work... Why is this so?
+ $bsmlout = "";
+
+ # Make Seq object from BSML
+ my $bsmlin = Bio::SeqIO->new( -file => "$tempspot/out.bsml",
+				  -format => 'bsml');
+ my $seq2 = $bsmlin->next_seq();
+
+ # Write format back from Seq Object
+ my $genout = Bio::SeqIO->new( -format => $readformat,
+				   -file => ">$tempspot/out.$readformat");
+ $genout->write_seq($seq2);
+ warn "$readformat  written to $tempspot/out.$readformat\n";
+
+ # BEING LOST:
+ # Join information (not possible in BSML 2.2)
+ # Sequence type (??)
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml_sax.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml_sax.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/bsml_sax.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,272 @@
+# $Id: bsml_sax.pm,v 1.4.4.1 2006/10/02 23:10:28 sendu Exp $
+# BioPerl module for Bio::SeqIO::bsml_sax
+#
+# Cared for by Jason Stajich
+#
+
+=head1 NAME
+
+Bio::SeqIO::bsml_sax - BSML sequence input/output stream using SAX
+
+=head1 SYNOPSIS
+
+ It is probably best not to use this object directly, but rather go
+ through the SeqIO handler system. To read a BSML file:
+
+    $stream = Bio::SeqIO->new( -file => $filename, -format => 'bsml');
+
+    while ( my $bioSeqObj = $stream->next_seq() ) {
+	# do something with $bioSeqObj
+    }
+
+ To write a Seq object to the current file handle in BSML XML format:
+
+    $stream->write_seq( -seq => $seqObj);
+
+ If instead you would like a XML::DOM object containing the BSML, use:
+
+    my $newXmlObject = $stream->to_bsml( -seq => $seqObj);
+
+=head1 DEPENDENCIES
+
+ In addition to parts of the Bio:: hierarchy, this module uses:
+
+ XML::SAX
+
+=head1 DESCRIPTION
+
+ This object can transform Bio::Seq objects to and from BSML (XML)
+ flatfiles.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+ Report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution. Bug reports can be submitted via the
+ web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=cut
+
+package Bio::SeqIO::bsml_sax;
+use vars qw($Default_Source);
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use XML::SAX;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::SeqIO XML::SAX::Base);
+
+$Default_Source = 'BSML';
+
+sub _initialize {
+    my ($self) = shift;
+    $self->SUPER::_initialize(@_);
+    $self->{'_parser'} = XML::SAX::ParserFactory->parser('Handler' => $self);
+    if( ! defined $self->sequence_factory ) {
+	$self->sequence_factory(new Bio::Seq::SeqFactory
+				(-verbose => $self->verbose(),
+				 -type => 'Bio::Seq::RichSeq'));
+    }
+    return;
+}
+
+=head1 METHODS
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : my $bioSeqObj = $stream->next_seq
+ Function: Retrieves the next sequence from a SeqIO::bsml stream.
+ Returns : A reference to a Bio::Seq::RichSeq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my $self = shift;
+    if( @{$self->{'_seendata'}->{'_seqs'} || []} ||
+	eof($self->_fh)) {
+	return shift @{$self->{'_seendata'}->{'_seqs'}};
+    }
+    $self->{'_parser'}->parse_file($self->_fh);
+    return shift @{$self->{'_seendata'}->{'_seqs'}};
+}
+
+# XML::SAX::Base methods
+
+sub start_document {
+    my ($self,$doc) = @_;
+    $self->{'_seendata'} = {'_seqs'    => [],
+			    '_authors' => [],
+			    '_feats'   => [] };
+    $self->SUPER::start_document($doc);
+}
+
+sub end_document {
+    my ($self,$doc) = @_;
+    $self->SUPER::end_document($doc);
+}
+
+
+sub start_element {
+    my ($self,$ele) = @_;
+    my $name = uc($ele->{'LocalName'});
+    my $attr = $ele->{'Attributes'};
+    my $seqid = defined $self->{'_seendata'}->{'_seqs'}->[-1] ?
+	$self->{'_seendata'}->{'_seqs'}->[-1]->display_id : undef;
+    for my $k ( keys %$attr ) {
+	$attr->{uc $k} = $attr->{$k};
+	delete $attr->{$k};
+    }
+    if( $name eq 'BSML' ) {
+
+    } elsif( $name eq 'DEFINITIONS' ) {
+    } elsif( $name eq 'SEQUENCES' ) {
+
+    } elsif( $name eq 'SEQUENCE' ) {
+	my ($id,$acc,$title,
+	    $desc,$length,$topology,
+	    $mol) =  map { $attr->{'{}'.$_}->{'Value'} } qw(ID IC-ACCKEY
+							    TITLE COMMENT
+							    LENGTH
+							    TOPOLOGY
+							    MOLECULE);
+	push @{$self->{'_seendata'}->{'_seqs'}},
+	$self->sequence_factory->create
+	    (
+	     -display_id          => $id,
+	     -accession_number    => $acc,
+	     -description         => $desc,
+	     -length              => $length,
+	     -is_circular         => ($topology =~ /^linear$/i) ? 0 : 1,
+	     -molecule            => $mol,
+	     );
+
+    } elsif( $name eq 'FEATURE-TABLES' ) {
+    } elsif( $name eq 'ATTRIBUTE' ) {
+	my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+	my ($name,$content) = map { $attr->{'{}'.$_}->{'Value'} } qw(NAME CONTENT);
+	if($name =~ /^version$/i ) {
+	    my ($version);
+	    if($content =~ /^[^\.]+\.(\d+)/) {
+		$version = $1;
+	    } else { $version = $content }
+	    $curseq->seq_version($version);
+	} elsif( $name eq 'organism-species') {
+	    my ($genus,$species,$subsp) = split(/\s+/,$content,3);
+	    $curseq->species(Bio::Species->new(-sub_species => $subsp,
+					       -classification =>
+					       [$species,$genus]));
+	} elsif( $name eq 'organism-classification' ) {
+	    my (@class) =(split(/\s*;\s*/,$content),$curseq->species->species);
+	    $curseq->species->classification([reverse @class]);
+	} elsif( $name eq 'database-xref' ) {
+	    my ($db,$id) = split(/:/,$content);
+	    $curseq->annotation->add_Annotation('dblink',
+					      Bio::Annotation::DBLink->new
+						( -database  => $db,
+						  -primary_id=> $id));
+	} elsif( $name eq 'date-created' ||
+		 $name eq 'date-last-updated' ) {
+	    $curseq->add_date($content);
+	}
+    } elsif( $name eq 'FEATURE' ) {
+	my ($id,$class,$type,$title,$display_auto)
+	    =  map { $attr->{'{}'.$_}->{'Value'} } qw(ID CLASS VALUE-TYPE
+						      TITLE DISPLAY-AUTO);
+
+	push @{$self->{'_seendata'}->{'_feats'}},
+	Bio::SeqFeature::Generic->new
+	    ( -seq_id      => $self->{'_seendata'}->{'_seqs'}->[-1]->display_id,
+	      -source_tag  => $Default_Source,
+	      -primary_tag => $type,
+	      -tag => {'ID'    => $id,
+		   });
+
+    } elsif( $name eq 'QUALIFIER') {
+	my ($type,$value) =  map { $attr->{'{}'.$_}->{'Value'} } qw(VALUE-TYPE
+								    VALUE);
+	my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
+	$curfeat->add_tag_value($type,$value);
+    } elsif( $name eq 'INTERVAL-LOC' ) {
+	my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
+	my ($start,$end,$strand) =
+	    map { $attr->{'{}'.$_}->{'Value'} } qw(STARTPOS
+						   ENDPOS
+						   COMPLEMENT);
+
+	$curfeat->start($start);
+	$curfeat->end($end);
+	$curfeat->strand(-1) if($strand);
+    } elsif( $name eq 'REFERENCE' ) {
+	push @{$self->{'_seendata'}->{'_annot'}},
+	Bio::Annotation::Reference->new();
+    }
+
+    push @{$self->{'_state'}}, $name;
+    $self->SUPER::start_element($ele);
+}
+
+sub end_element {
+    my ($self,$ele) = @_;
+    pop @{$self->{'_state'}};
+    my $name = uc $ele->{'LocalName'};
+    my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+    if( $name eq 'REFERENCE') {
+	my $ref = pop @{$self->{'_seendata'}->{'_annot'}};
+	$curseq->annotation->add_Annotation('reference',$ref);
+    } elsif( $name eq 'FEATURE' ) {
+	my $feat = pop @{$self->{'_seendata'}->{'_feats'}};
+	$curseq->add_SeqFeature($feat);
+    }
+    $self->SUPER::end_element($ele);
+}
+
+sub characters {
+    my ($self,$data) = @_;
+    if( ! @{$self->{'_state'}} ) {
+	$self->warn("Calling characters with no previous start_element call. Ignoring data");
+    } else {
+	my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+	my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
+	my $curannot = $self->{'_seendata'}->{'_annot'}->[-1];
+	my $name = $self->{'_state'}->[-1];
+	if( $name eq 'REFAUTHORS' ) {
+	    $curannot->authors($data->{'Data'});
+	} elsif( $name eq 'REFTITLE') {
+	    $curannot->title($data->{'Data'});
+	} elsif( $name eq 'REFJOURNAL') {
+	    $curannot->location($data->{'Data'});
+	} elsif( $name eq 'SEQ-DATA') {
+	    $data->{'Data'} =~ s/\s+//g;
+	    $curseq->seq($data->{'Data'});
+	}
+    }
+    $self->SUPER::characters($data);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chadoxml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chadoxml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chadoxml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1470 @@
+# $Id: chadoxml.pm,v 1.19.4.1 2006/10/02 23:10:28 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::chadoxml
+#
+# Peili Zhang   <peili at morgan.harvard.edu>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::chadoxml - chadoxml sequence output stream
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler system:
+
+    $writer = Bio::SeqIO->new(-file => ">chado.xml",
+                              -format => 'chadoxml');
+
+    # assume you already have a Sequence object
+    $writer->write_seq($seq_obj);
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to chadoxml flat
+file databases (for chadoxml DTD, see
+http://gmod.cvs.sourceforge.net/gmod/schema/chado/dat/chado.dtd).
+
+This is currently a write-only module.
+
+    $seqio = Bio::SeqIO->new(-file => '>outfile.xml',
+                             -format => 'chadoxml');
+
+    # we have a Bio::Seq object $seq which is a gene located on
+    # chromosome arm 'X', to be written out to chadoxml
+    # before converting to chadoxml, $seq object B<must> be transformed
+    # so that all the coordinates in $seq are against the source
+    # feature to be passed into Bio::SeqIO::chadoxml->write_seq()
+    # -- chromosome arm X in the example below.
+
+    $seqio->write_seq(-seq=>$seq,
+                      -seq_so_type=>'gene',
+                      -src_feature=>'X',
+                      -src_feat_type=>'chromosome_arm',
+		                -nounflatten=>1,
+                      -is_analysis=>'true',
+                      -data_source=>'GenBank');
+
+The chadoxml output of Bio::SeqIO::chadoxml-E<gt>write_seq() method can be
+passed to the loader utility in XORT package
+(http://gmod.cvs.sourceforge.net/gmod/schema/XMLTools/XORT/)
+to be loaded into chado.
+
+This object is currently implemented to work with sequence and
+annotation data from whole genome projects deposited in GenBank. It
+may not be able to handle all different types of data from all
+different sources.
+
+In converting a Bio::Seq object into chadoxml, a top-level feature is
+created to represent the object and all sequence features inside the
+Bio::Seq object are treated as subfeatures of the top-level
+feature. The Bio::SeqIO::chadoxml object calls
+Bio::SeqFeature::Tools::Unflattener to unflatten the flat feature list
+contained in the subject Bio::Seq object, to build gene model
+containment hierarchy conforming to chado central dogma model: gene
+--E<gt> mRNA --E<gt> exons and protein.
+
+Destination of data in the subject Bio::Seq object $seq is as following:
+
+	*$seq->display_id:  name of the top-level feature;
+
+	*$seq->accession_number: if defined, uniquename and
+				 feature_dbxref of the top-level
+				 feature if not defined,
+				 $seq->display_id is used as the
+				 uniquename of the top-level feature;
+
+	*$seq->molecule: transformed to SO type, used as the feature
+			type of the top-level feature if -seq_so_type
+			argument is supplied, use the supplied SO type
+			as the feature type of the top-level feature;
+
+	*$seq->species: organism of the top-level feature;
+
+	*$seq->seq: residues of the top-level feature;
+
+	*$seq->is_circular, $seq->division: feature_cvterm;
+
+	*$seq->keywords, $seq->desc, comments: featureprop;
+
+	*references: pub and feature_pub;
+		medline/pubmed ids: pub_dbxref;
+		comments: pubprop;
+
+	*feature "source" span: featureloc for top-level feature;
+
+	*feature "source" db_xref: feature_dbxref for top-level feature;
+
+	*feature "source" other tags: featureprop for top-level feature;
+
+	*subfeature 'symbol' or 'label' tag: feature uniquename, if
+                     none of these is present, the chadoxml object
+                     generates feature uniquenames as:
+                     <gene>-<feature_type>-<span>
+                     (e.g. foo-mRNA--1000..3000);
+
+	*gene model: feature_relationship built based on the
+                     containment hierarchy;
+
+	*feature span: featureloc;
+
+	*feature accession numbers: feature_dbxref;
+
+	*feature tags (except db_xref, symbol and gene): featureprop;
+
+Things to watch out for:
+
+	*chado schema change: this version works with the chado
+                               version tagged chado_1_01 in GMOD CVS.
+
+	*feature uniquenames: especially important if using XORT
+                              loader to do incremental load into
+                              chado. may need pre-processing of the
+                              source data to put the correct
+                              uniquenames in place.
+
+	*pub uniquenames: chadoxml->write_seq() has the FlyBase policy
+                          on pub uniquenames hard-coded, it assigns
+                          pub uniquenames in the following way: for
+                          journals and books, use ISBN number; for
+                          published papers, use MEDLINE ID; for
+                          everything else, use FlyBase unique
+                          identifier FBrf#. need to modify the code to
+                          implement your policy. look for the comments
+                          in the code.
+
+	*for pubs possibly existing in chado but with no knowledge of
+         its uniquename:put "op" as "match", then need to run the
+                        output chadoxml through a special filter that
+                        talks to chado database and tries to find the
+                        pub by matching with the provided information
+                        instead of looking up by the unique key. after
+                        matching, the filter also resets the "match"
+                        operation to either "force" (default), or
+                        "lookup", or "insert", or "update". the
+                        "match" operation is for a special FlyBase use
+                        case. please modify to work according to your
+                        rules.
+
+	*chado initialization for loading:
+
+		cv & cvterm: in the output chadoxml, all cv's and
+                             cvterm's are lookup only. Therefore,
+                             before using XORT loader to load the
+                             output into chado, chado must be
+                             pre-loaded with all necessary CVs and
+                             CVterms, including "SO" , "property
+                             type", "relationship type", "pub type",
+                             "pubprop type", "pub relationship type",
+                             "sequence topology", "GenBank feature
+                             qualifier", "GenBank division". A pub by
+                             the uniquename 'nullpub' of type 'null
+                             pub' needs to be inserted.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHOR - Peili Zhang
+
+Email peili at morgan.harvard.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::chadoxml;
+use strict;
+use English;
+
+use lib $ENV{CodeBase};
+use XML::Writer;
+use IO::File;
+use IO::Handle;
+use Bio::Seq;
+use Bio::Seq::RichSeq;
+use Bio::SeqIO::FTHelper;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Factory::SequenceStreamI;
+use Bio::SeqFeature::Generic;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+use Bio::SeqFeature::Tools::Unflattener;
+
+#global variables
+undef(my %finaldatahash); #data from Bio::Seq object stored in a hash
+undef(my %datahash); #data from Bio::Seq object stored in a hash
+
+my $chadotables = 'feature featureprop feature_relationship featureloc feature_cvterm cvterm cv feature_pub pub pub_dbxref pub_author author pub_relationship pubprop feature_dbxref dbxref db';
+
+my %fkey = (
+	"cvterm.cv_id"			=> "cv",
+	"dbxref.db_id"			=> "db",
+	"feature.type_id" 		=> "cvterm",
+	"feature.organism_id" 		=> "organism",
+	"feature.dbxref_id" 		=> "dbxref",
+	"featureprop.type_id" 		=> "cvterm",
+	"feature_pub.pub_id" 		=> "pub",
+	"feature_cvterm.cvterm_id"	=> "cvterm",
+	"feature_cvterm.pub_id"		=> "pub",
+	"feature_dbxref.dbxref_id"	=> "dbxref",
+	"feature_relationship.object_id"	=> "feature",
+	"feature_relationship.subject_id"	=> "feature",
+	"feature_relationship.type_id" 	=> "cvterm",
+	"featureloc.srcfeature_id"	=> "feature",
+	"pub.type_id"			=> "cvterm",
+	"pub_dbxref.dbxref_id"		=> "dbxref",
+	"pub_author.author_id"		=> "author",
+	"pub_relationship.obj_pub_id"	=> "pub",
+	"pub_relationship.subj_pub_id"	=> "pub",
+	"pub_relationship.type_id"	=> "cvterm",
+	"pubprop.type_id"		=> "cvterm",
+);
+
+my %feattype_args2so = (
+	"aberr"				=> "aberration_junction",
+#	"conflict"			=> "sequence_difference",
+#	"polyA_signal"			=> "polyA_signal_sequence",
+	"variation"			=> "sequence_variant",
+	"mutation1"			=> "point_mutation",		#for single-base mutation
+	"mutation2"			=> "sequence_variant",		#for multi-base mutation
+	"rescue"			=> "rescue_fragment",
+#	"rfrag"				=> "restriction_fragment",
+	"protein_bind"			=> "protein_binding_site",
+	"misc_feature"			=> "region",
+#	"prim_transcript"		=> "primary_transcript",
+	"CDS"				=> "protein",
+	"reg_element"			=> "regulatory_region",
+	"seq_variant"			=> "sequence_variant",
+	"mat_peptide"			=> "mature_peptide",
+	"sig_peptide"			=> "signal_peptide",
+);
+
+undef(my %organism);
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+
+    my($self, at args) = @_;
+
+    $self->SUPER::_initialize(@args);
+    unless( defined $self->sequence_factory ) {
+        $self->sequence_factory(new Bio::Seq::SeqFactory
+                                (-verbose => $self->verbose(),
+                                 -type => 'Bio::Seq::RichSeq'));
+    }
+
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(-seq=>$seq, -seq_so_type=>$seqSOtype,
+			      -src_feature=>$srcfeature,
+			      -src_feat_type=>$srcfeattype,
+			      -nounflatten=>0 or 1,
+			      -is_analysis=>'true' or 'false',
+			      -data_source=>$datasource)
+ Function: writes the $seq object (must be seq) into chadoxml.
+	   Current implementation:
+	   1. for non-mRNA records,
+	   a top-level feature of type $seq->alphabet is
+	   generated for the whole GenBank record, features listed
+           are unflattened for DNA records to build gene model
+	   feature graph, and for the other types of records all
+	   features in $seq are treated as subfeatures of the top-level
+	   feature.
+	   2. for mRNA records,
+	   if a 'gene' feature is present, it B<must> have a /symbol
+	   or /label tag to contain the uniquename of the gene. a top-
+	   level feature of type 'gene' is generated. the mRNA is written
+	   as a subfeature of the top-level gene feature, and the other
+	   sequence features listed in $seq are treated as subfeatures
+	   of the mRNA feature.
+ Returns : 1 for success and 0 for error
+
+
+ Args     : A Bio::Seq object $seq, optional $seqSOtype, $srcfeature,
+	         $srcfeattype, $nounflatten, $is_analysis and $data_source.
+           when $srcfeature (a string, the uniquename of the source
+           feature) is given, the location and strand information of
+           the top-level feature against the source feature will be
+           derived from the sequence feature called 'source' of the
+           $seq object, a featureloc record is generated for the top
+           -level feature on $srcfeature. when $srcfeature is given,
+           $srcfeattype must also be present. All feature coordinates
+           in $seq should be against $srcfeature.  $seqSOtype is the
+           optional SO term to use as the type of the top-level feature.
+           For example, a GenBank data file for a Drosophila melanogaster
+           genome scaffold has the molecule type of "DNA", when
+           converting to chadoxml, a $seqSOtype argument of
+           "golden_path_region" can be supplied to save the scaffold
+           as a feature of type "golden_path_region" in chadoxml, instead
+           of "DNA".  a feature with primary tag of 'source' must be
+           present in the sequence feature list of $seq, to decribe the
+           whole sequence record.
+
+
+=cut
+
+sub write_seq {
+	my $usage = <<EOUSAGE;
+Bio::SeqIO::chadoxml->write_seq()
+Usage   : \$stream->write_seq(-seq=>\$seq,
+			      -seq_so_type=>\$SOtype,
+			      -src_feature=>\$srcfeature,
+			      -src_feat_type=>\$srcfeattype,
+			      -nounflatten=>0 or 1,
+                              -is_analysis=>'true' or 'false',
+                              -data_source=>\$datasource)
+Args    : \$seq		: a Bio::Seq object
+	  \$SOtype	: the SO term to use as the feature type of
+	                  the \$seq record, optional
+	  \$srcfeature	: unique name of the source feature, a string
+			  containing at least one alphabetical letter
+			  (a-z, A-Z), optional
+	  \$srcfeattype	: feature type of \$srcfeature. one of SO terms.
+			  optional
+	  when \$srcfeature is given, \$srcfeattype becomes mandatory,
+	  \$datasource	: source of the sequence annotation data,
+			  e.g. 'GenBank' or 'GFF'.
+EOUSAGE
+
+	my ($self, at args) = @_;
+
+	my ($seq, $seq_so_type, $srcfeature, $srcfeattype, $nounflatten, $isanalysis, $datasource) =
+	   $self->_rearrange([qw(SEQ
+				 SEQ_SO_TYPE
+				 SRC_FEATURE
+				 SRC_FEAT_TYPE
+				 NOUNFLATTEN
+				 IS_ANALYSIS
+				 DATA_SOURCE
+				 )],
+			      @args);
+	#print "$seq_so_type, $srcfeature, $srcfeattype\n";
+
+	if( !defined $seq ) {
+	    $self->throw("Attempting to write with no seq!");
+	}
+
+	if( ! ref $seq || ! $seq->isa('Bio::Seq::RichSeqI') ) {
+	    $self->warn(" $seq is not a RichSeqI compliant module. Attempting to dump, but may fail!");
+	}
+
+	#$srcfeature, when provided, should contain at least one alphabetical letter
+	if (defined $srcfeature)
+	{
+	    if ($srcfeature =~ /[a-zA-Z]/)
+	    {
+		chomp($srcfeature);
+	    } else {
+		$self->throw( $usage );
+	    }
+
+	    #check for mandatory $srcfeattype
+	    if (! defined $srcfeattype)
+	    {
+		$self->throw( $usage );
+		#$srcfeattype must be a string of non-whitespace characters
+	    } else {
+		if ($srcfeattype =~ /\S+/) {
+		    chomp($srcfeattype);
+		} else {
+		    $self->throw( $usage );
+		}
+	    }
+	}
+
+	# variables local to write_seq()
+        my $div = undef;
+	my $hkey = undef;
+	undef(my @top_featureprops);
+	my $name = $seq->display_id;
+	undef(my @feature_cvterms);
+	undef(my %sthash);
+	undef(my %dvhash);
+	undef(my %h1);
+	undef(my %h2);
+	my $temp = undef;
+	my $ann = undef;
+	undef(my @references);
+	undef(my @feature_pubs);
+	my $ref = undef;
+	my $location = undef;
+	my $fbrf = undef;
+	my $journal = undef;
+	my $issue = undef;
+	my $volume = undef;
+	my $volumeissue = undef;
+	my $pages = undef;
+	my $year = undef;
+	my $pubtype = undef;
+#	my $miniref= undef;
+	my $uniquename = undef;
+	my $refhash = undef;
+	my $feat = undef;
+	my $tag = undef;
+	my $tag_cv = undef;
+	my $ftype = undef;
+	my $subfeatcnt = undef;
+	undef(my @top_featrels);
+	undef (my %srcfhash);
+
+	local($^W) = 0; # supressing warnings about uninitialized fields.
+
+	if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
+		$uniquename = $seq->accession_number;
+	} elsif ($seq->can('accession') && defined $seq->accession && $seq->accession ne 'unknown') {
+		$uniquename = $seq->accession;
+	} else {
+		$uniquename = $name;
+	}
+        my $len = $seq->length();
+	if ($len == 0) {
+		$len = undef;
+	}
+
+	undef(my $gb_type);
+	if (!$seq->can('molecule') || ! defined ($gb_type = $seq->molecule()) ) {
+		$gb_type = $seq->alphabet || 'DNA';
+	}
+	$gb_type = 'DNA' if $ftype eq 'dna';
+	$gb_type = 'RNA' if $ftype eq 'rna';
+
+	if (defined $seq_so_type) {
+		$ftype = $seq_so_type;
+	}
+	else {
+		$ftype = $gb_type;
+	}
+
+	my %ftype_hash = ( "name" => $ftype, "cv_id" => {"name" => 'SO'});
+
+	my $spec = $seq->species();
+	if (!defined $spec) {
+		$self->throw("$seq does not know what organism it is from, which is required by chado. cannot proceed!\n");
+	} else {
+		%organism = ("genus"=>$spec->genus(), "species" => $spec->species());
+	}
+
+        my $residues = $seq->seq || '';
+
+	#set is_analysis flag for gene model features
+	undef(my $isanal);
+	if ($ftype eq 'gene' || $ftype eq 'mRNA' || $ftype eq 'exon' || $ftype eq 'protein') {
+		$isanal = $isanalysis;
+		$isanal = 'false' if !defined $isanal;
+	}
+
+	%datahash = (
+		"name"		=> $name,
+		"uniquename"	=> $uniquename,
+		"seqlen"	=> $len,
+		"residues"	=> $residues,
+		"type_id"	=> \%ftype_hash,
+		"organism_id"	=> \%organism,
+		"is_analysis"	=> $isanal,
+		);
+
+	#if $srcfeature is not given, use the Bio::Seq object itself as the srcfeature for featureloc's
+	if (!defined $srcfeature) {
+		$srcfeature = $uniquename;
+		$srcfeattype = $ftype;
+	}
+
+	#default data source is 'GenBank'
+	if (!defined $datasource) {
+		$datasource = 'GenBank';
+	}
+
+	if ($datasource =~ /GenBank/i) {
+		#sequence topology as feature_cvterm
+		if ($seq->is_circular) {
+			%sthash = (
+				"cvterm_id"	=> {'name' => 'circular',
+						    'cv_id' => {
+							'name' => 'sequence topology',
+						    },
+						},
+				   "pub_id"	=> {'uniquename' => 'nullpub',
+						    'type_id' => {
+							'name' => 'null pub',
+							'cv_id' => {
+							    'name'=> 'pub type',
+							},
+						    },
+						},
+				);
+		} else {
+			%sthash = (
+				"cvterm_id"	=> { 'name' => 'linear',
+						     'cv_id' => {
+							 'name' => 'sequence topology',
+						     }
+						 },
+				"pub_id"	=> {'uniquename' => 'nullpub',
+						    'type_id' => {
+							'name' => 'null pub',
+							'cv_id' => {
+							    'name'=> 'pub type',
+							},
+						    },
+						},
+				   );
+		}
+		push(@feature_cvterms, \%sthash);
+
+		#division as feature_cvterm
+       		if ($seq->can('division') && defined $seq->division()) {
+       		 	$div = $seq->division();
+			%dvhash = (
+				"cvterm_id"	=> {'name' => $div,
+						    'cv_id' => {
+							'name' => 'GenBank division'}},
+				"pub_id"	=> {'uniquename' => 'nullpub',
+						    'type_id' => {
+							'name' => 'null pub',
+							'cv_id' => {
+							    'name'=> 'pub type'},
+						    	}},
+				);
+			push(@feature_cvterms, \%dvhash);
+		}
+
+		$datahash{'feature_cvterm'} = \@feature_cvterms;
+	}
+
+	#featureprop's
+	#DEFINITION
+	if ($seq->can('desc') && defined $seq->desc()) {
+		$temp = $seq->desc();
+
+		my %prophash = (
+			"type_id" 	=> {'name' => 'description',
+					    'cv_id' => {
+						'name' => 'property type'}},
+			"value"		=> $temp,
+			);
+
+		push(@top_featureprops, \%prophash);
+        }
+
+	#KEYWORDS
+	if ($seq->can('keywords')) {
+	    $temp = $seq->keywords();
+
+	    if (defined $temp && $temp ne '.' && $temp ne '') {
+		my %prophash = (
+				"type_id"       => {'name' => 'keywords',
+						    'cv_id' =>
+						    {'name' => 'property type'}
+						},
+				"value"          => $temp,
+                        	);
+
+		push(@top_featureprops, \%prophash);
+	    }
+        }
+
+	#COMMENT
+	if ($seq->can('annotation')) {
+		$ann = $seq->annotation();
+		foreach my $comment ($ann->get_Annotations('comment')) {
+			$temp = $comment->as_text();
+			#print "fcomment: $temp\n";
+			my %prophash = (
+				"type_id"	=> {'name' => 'comment',
+						    'cv_id' =>
+						    {'name' => 'property type'}},
+				"value"		=> $temp,
+				);
+
+			push(@top_featureprops, \%prophash);
+		}
+	}
+
+	#accession and version as feature_dbxref
+	my @top_dbxrefs = ();
+	if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
+	    my $db = $self->_guess_acc_db($seq, $seq->accession_number);
+	    my %acchash = (
+			   "db_id"	=> {'name' => $db},
+			   "accession"	=> $seq->accession_number,
+			   "version"	=> $seq->seq_version,
+			   );
+	    my %fdbx = ('dbxref_id' => \%acchash);
+	    push(@top_dbxrefs, \%fdbx);
+	}
+
+	if( $seq->isa('Bio::Seq::RichSeqI') && defined $seq->get_secondary_accessions() ) {
+		my @secacc = $seq->get_secondary_accessions();
+		my $acc;
+		foreach $acc (@secacc) {
+			my %acchash = (
+				"db_id"         => {'name' => 'GB'},
+				"accession"	=> $acc,
+				);
+			my %fdbx = ('dbxref_id' => \%acchash);
+			push(@top_dbxrefs, \%fdbx);
+		}
+	}
+
+	#GI number
+	if( $seq->isa('Bio::Seq::RichSeqI') && defined ($seq->pid)) {
+		my $id = $seq->pid;
+		#print "reftype: ", ref($id), "\n";
+
+		#if (ref($id) eq 'HASH') {
+		my %acchash = (
+			"db_id"		=> {'name' => 'GI'},
+			"accession"	=> $id,
+			);
+		my %fdbx = ('dbxref_id' => \%acchash);
+		push (@top_dbxrefs, \%fdbx);
+	}
+
+	#REFERENCES as feature_pub
+	if (defined $ann) {
+	    #get the references
+	    @references = $ann->get_Annotations('reference');
+	    foreach $ref (@references) {
+		undef(my %pubhash);
+		$refhash = $ref->hash_tree();
+		$location = $ref->location || $refhash->{'location'};
+		#print "location: $location\n";
+
+		#get FBrf#, special for FlyBase SEAN loading
+		if (index($location, ' ==') >= 0) {
+		    $location =~ /\s==/;
+				#print "match: $MATCH\n";
+				#print "prematch: $PREMATCH\n";
+				#print "postmatch: $POSTMATCH\n";
+		    $fbrf = $PREMATCH;
+		    $location = $POSTMATCH;
+		    $location =~ s/^\s//;
+		}
+
+		#print "location: $location\n";
+		#unpublished reference
+		if ($location =~ /Unpublished/) {
+		    $pubtype = 'unpublished';
+		    %pubhash = (
+				"title"		=> $ref->title || $refhash->{'title'},
+				#"miniref"	=> substr($location, 0, 255),
+				#"uniquename"	=> $fbrf,
+				"type_id"	=> {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
+				);
+		}
+		#submitted
+		elsif ($location =~ /Submitted/) {
+		    $pubtype = 'submitted';
+
+		    %pubhash = (
+				"title"		=> $ref->title || $refhash->{'title'},
+				#"miniref"	=> substr($location, 0, 255),
+				#"uniquename"	=> $fbrf,
+				"type_id"	=> {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
+				);
+
+		    undef(my $pyear);
+		    $pyear = $self->_getSubmitYear($location);
+		    if (defined $pyear) {
+			$pubhash{'pyear'} = $pyear;
+		    }
+		}
+
+		#published journal paper
+		elsif ($location =~ /\D+\s\d+\s\((\d+|\d+-\d+)\),\s(\d+-\d+|\d+--\d+)\s\(\d\d\d\d\)$/) {
+		    $pubtype = 'paper';
+
+				#parse location to get journal, volume, issue, pages & year
+		    $location =~ /\(\d\d\d\d\)$/;
+
+		    $year = $MATCH;
+		    my $stuff = $PREMATCH;
+		    $year =~ s/\(//; #remove the leading parenthesis
+		    $year =~ s/\)//; #remove the trailing parenthesis
+
+		    $stuff =~ /,\s(\d+-\d+|\d+--\d+)\s$/;
+
+		    $pages = $MATCH;
+		    $stuff = $PREMATCH;
+		    $pages =~ s/^, //; #remove the leading comma and space
+		    $pages =~ s/ $//; #remove the last space
+
+		    $stuff =~ /\s\d+\s\((\d+|\d+-\d+)\)$/;
+
+		    $volumeissue = $MATCH;
+		    $journal = $PREMATCH;
+		    $volumeissue =~ s/^ //; #remove the leading space
+		    $volumeissue =~ /\((\d+|\d+-\d+)\)$/;
+		    $issue = $MATCH;
+		    $volume = $PREMATCH;
+		    $issue =~ s/^\(//; #remove the leading parentheses
+		    $issue =~ s/\)$//; #remove the last parentheses
+		    $volume =~ s/^\s//;	#remove the leading space
+		    $volume =~ s/\s$//;	#remove the last space
+
+		    %pubhash = (
+				"title"		=> $ref->title || $refhash->{'title'},
+				"volume"	=> $volume,
+				"issue"		=> $issue,
+				"pyear"		=> $year,
+				"pages"		=> $pages,
+				#"miniref"	=> substr($location, 0, 255),
+				#"miniref"	=> ' ',
+				#"uniquename"	=> $fbrf,
+				"type_id"	=> {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}},
+				"pub_relationship" => {
+				    'obj_pub_id' => {
+					'uniquename' => $journal,
+					'title' => $journal,
+					#'miniref' => substr($journal, 0, 255),
+					'type_id' =>{'name' => 'journal',
+						     'cv_id' =>
+						     {'name' => 'pub type'
+						      },
+						 },
+						     #'pubprop' =>{'value'=> $journal,
+						     #	     'type_id'=>{'name' => 'abbreviation', 'cv_id' => {'name' => 'pubprop type'}},
+						     #	    },
+						 },
+					   'type_id' => {
+					       'name' => 'published_in',
+					       'cv_id' => {
+						   'name' => 'pub relationship type'},
+					   },
+				},
+				);
+		}
+
+		#other references
+		else {
+		    $pubtype = 'other';
+		    %pubhash = (
+				"title"		=> $ref->title || $refhash->{'title'},
+				#"miniref"	=> $fbrf,
+				"type_id"	=> {
+				    'name' => $pubtype,
+				    'cv_id' => {'name' =>'pub type'}
+				}
+				);
+		}
+
+		#pub_author
+		my $autref = $self->_getRefAuthors($ref);
+		if (defined $autref) {
+		    $pubhash{'pub_author'} = $autref;
+		}
+		# if no author and is type 'submitted' and has submitter address, use the first 100 characters of submitter address as the author lastname.
+		else {
+		    if ($pubtype eq 'submitted') {
+			my $autref = $self->_getSubmitAddr($ref);
+			if (defined $autref) {
+			    $pubhash{'pub_author'} = $autref;
+			}
+		    }
+		}
+
+		#$ref->comment as pubprop
+		#print "ref comment: ", $ref->comment, "\n";
+		#print "ref comment: ", $refhash->{'comment'}, "\n";
+		if (defined $ref->comment || defined $refhash->{'comment'}) {
+		    my $comnt = $ref->comment || $refhash->{'comment'};
+				#print "remark: ", $comnt, "\n";
+		    $pubhash{'pubprop'} = {
+			"type_id"       => {'name' => 'comment', 'cv_id' => {'name' => 'pubprop type'}},
+			"value"		=> $comnt,
+		    };
+		}
+
+		#pub_dbxref
+		undef(my @pub_dbxrefs);
+		if (defined $fbrf) {
+		    push(@pub_dbxrefs, {dbxref_id => {accession => $fbrf, db_id => {'name' => 'FlyBase'}}});
+		}
+		if (defined ($temp = $ref->medline)) {
+		    push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'MEDLINE'}}});
+				#use medline # as the pub's uniquename
+		    $pubhash{'uniquename'} = $temp;
+		}
+		if (defined ($temp = $ref->pubmed)) {
+		    push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'PUBMED'}}});
+		}
+		$pubhash{'pub_dbxref'} = \@pub_dbxrefs;
+
+		#if the pub uniquename is not defined or blank, put its FBrf# as its uniquename
+		#this is unique to FlyBase
+		#USERS OF THIS MODULE: PLEASE MODIFY HERE TO IMPLEMENT YOUR POLICY
+		# ON PUB UNIQUENAME!!!
+		if (!defined $pubhash{'uniquename'} || $pubhash{'uniquename'} eq '') {
+		    if (defined $fbrf) {
+			$pubhash{'uniquename'} = $fbrf;
+		    }
+				#else {
+				#	$pubhash{'uniquename'} = $self->_CreatePubUname($ref);
+				#}
+		}
+
+		#add to collection of references
+		#if the pub covers the entire sequence of the top-level feature, add it to feature_pubs
+		if (($ref->start == 1 && $ref->end == $len) || (!defined $ref->start && !defined $ref->end)) {
+		    push(@feature_pubs, {"pub_id" => \%pubhash});
+		}
+		#the pub is about a sub-sequence of the top-level feature
+		#create a feature for the sub-sequence and add pub as its feature_pub
+		#featureloc of this sub-sequence is against the top-level feature, in interbase coordinates.
+		else {
+		    my %parf = (
+				'uniquename'	=> $uniquename . ':' . $ref->start . "\.\." . $ref->end,
+				'organism_id'	=>\%organism,
+				'type_id'	=>{'name' =>'region', 'cv_id' => {'name' => 'SO'}},
+				);
+		    my %parfsrcf = (
+				    'uniquename'	=> $uniquename,
+				    'organism_id'	=>\%organism,
+				    );
+		    my %parfloc = (
+				   'srcfeature_id'	=> \%parfsrcf,
+				   'fmin'		=> $ref->start - 1,
+				   'fmax'		=> $ref->end,
+				   );
+		    $parf{'featureloc'} = \%parfloc;
+		    $parf{'feature_pub'} = {'pub_id' => \%pubhash};
+		    my %ffr = (
+			       'subject_id'	=> \%parf,
+			       'type_id'		=> { 'name' => 'partof', 'cv_id' => { 'name' => 'relationship type'}},
+			       );
+		    push(@top_featrels, \%ffr);
+		}
+	    }
+	    $datahash{'feature_pub'} = \@feature_pubs;
+	}
+
+	##construct srcfeature hash for use in featureloc
+	if (defined $srcfeature) {
+		%srcfhash = ('uniquename' 	=> $srcfeature,
+				'organism_id'   => \%organism,
+				'type_id' 	=> {'name' => $srcfeattype, 'cv_id' => {'name' => 'SO'}},
+			);
+
+	#	my %fr = (
+	#		"object_id"	=> \%srcfhash,
+	#		"type_id"	=> { 'name' => 'partof', 'cv_id' => { 'name' => 'relationship type'}},
+	#		);
+
+	#	push (@top_featrels, \%fr);
+	}
+
+	#unflatten the seq features in $seq if $seq is a gene or a DNA sequence
+	if (($gb_type eq 'gene' || $gb_type eq 'DNA') &&
+	    !$nounflatten) {
+		my $u = Bio::SeqFeature::Tools::Unflattener->new;
+		$u->unflatten_seq(-seq=>$seq, -use_magic=>1);
+	}
+
+	my @top_sfs = $seq->get_SeqFeatures;
+	#print $#top_sfs, "\n";
+
+	#SUBFEATURES
+
+	if ($datasource =~ /GenBank/i) {
+		$tag_cv = 'GenBank feature qualifier';
+	} elsif ($datasource =~ /GFF/i) {
+		$tag_cv = 'GFF tag';
+	} else {
+		$tag_cv = 'property type';
+	}
+
+	my $si = 0;
+	foreach $feat (@top_sfs) {
+		#$feat = $top_sfs[$si];
+		#print "si: $si\n";
+		my $prim_tag = $feat->primary_tag;
+		#print $prim_tag, "\n";
+
+		# get all qualifiers of the 'source' feature, load these as top_featureprops of the top level feature
+		if ($prim_tag eq 'source') {
+			foreach $tag ($feat->all_tags()) {
+				#db_xref
+				if ($tag eq 'db_xref')   {
+					my @t1 = $feat->each_tag_value($tag);
+					foreach $temp (@t1) {
+					   $temp =~ /:/;
+					   my $db = $PREMATCH;
+					   my $xref = $POSTMATCH;
+					   my %acchash = (
+						"db_id"		=> {'name' => $db},
+						"accession"	=> $xref,
+						);
+					   my %fdbx = ('dbxref_id' => \%acchash);
+					   push (@top_dbxrefs, \%fdbx);
+					}
+				#other tags as featureprops
+				} elsif ($tag ne 'gene') {
+					my %prophash = undef;
+					%prophash = (
+                        			"type_id"       => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
+						"value"		=> join(' ',$feat->each_tag_value($tag)),
+						);
+					push(@top_featureprops, \%prophash);
+				}
+			}
+
+			#featureloc for the top-level feature
+			my $fmin = undef;
+			my $fmax = undef;
+			my $strand = undef;
+			my %fl = undef;
+
+			$fmin = $feat->start - 1;
+			$fmax = $feat->end;
+			$strand = $feat->strand;
+
+			%fl = (
+				"srcfeature_id"	=> \%srcfhash,
+				"fmin"		=> $fmin,
+				"fmax"		=> $fmax,
+				"strand"	=> $strand,
+				);
+
+			$datahash{'featureloc'} = \%fl;
+
+			#delete 'source' feature from @top_sfs
+			splice(@top_sfs, $si, 1);
+		}
+		$si ++;
+	#close loop over top_sfs
+	}
+
+	#the top-level features other than 'source'
+	foreach $feat (@top_sfs) {
+		#print $feat->primary_tag, "\n";
+
+		my $r = $self->_subfeat2featrelhash($name, $ftype, $feat, \%srcfhash, $tag_cv, $isanalysis);
+
+		if (!($ftype eq 'mRNA' && $feat->primary_tag eq 'gene')) {
+			my %fr = %$r;
+			push(@top_featrels, \%fr);
+		} else {
+			%finaldatahash = %$r;
+		}
+	}
+
+	if (@top_dbxrefs) {
+		$datahash{'feature_dbxref'} = \@top_dbxrefs;
+	}
+
+	if (@top_featureprops) {
+		$datahash{'featureprop'} = \@top_featureprops;
+	}
+
+	if (@top_featrels) {
+		$datahash{'feature_relationship'} = \@top_featrels;
+	}
+
+	if ($ftype eq 'mRNA' && %finaldatahash) {
+		$finaldatahash{'feature_relationship'} = {
+						'subject_id'	=> \%datahash,
+						'type_id'	=> { 'name' => 'partof', 'cv_id' => { 'name' => 'relationship type'}},
+							 };
+	} else {
+		%finaldatahash = %datahash;
+	}
+
+	my $mainTag = 'feature';
+	$self->_hash2xml(undef, $mainTag, \%finaldatahash);
+
+	return 1;
+}
+
+sub _hash2xml {
+    my $self = shift;
+    my $isMatch = undef;
+    $isMatch = shift;
+    my $ult = shift;
+    my $ref = shift;
+    my %mh = %$ref;
+    undef(my $writer);
+    $writer = shift if (@_);
+    my $key;
+    my $v;
+    my $sh;
+    my $xx;
+    my $yy;
+    my $nt;
+    my $ntref;
+    my $output;
+    my $root = shift if (@_);
+    #print "ult: $ult\n";
+    if (!defined $writer) {
+	$root = 1;
+	$writer = new XML::Writer(OUTPUT => $self->_fh,
+				  DATA_MODE => 1,
+				  DATA_INDENT => 3);
+
+	#print header
+	$writer->xmlDecl("UTF-8");
+	$writer->comment("created by Peili Zhang, Flybase, Harvard University");
+
+	#start chadoxml
+	$writer->startTag('chado');
+    }
+    my $temp;
+    my %subh = undef;
+
+    #start opeing tag
+    #if pub record of type 'journal', form the 'ref' attribute for special pub lookup
+    #requires that the journal name itself is also stored as a pubprop record for the journal with value equal
+    #to the journal name and type of 'abbreviation'.
+    if ($ult eq 'pub' && $mh{'type_id'}->{'name'} eq 'journal') {
+	$writer->startTag($ult, 'ref' => $mh{'title'} . ':journal:abbreviation');
+    }
+
+    #special pub match if pub uniquename not known
+    elsif ($ult eq 'pub' && !defined $mh{'uniquename'}) {
+	$writer->startTag($ult, 'op' => 'match');
+	#set the match flag, all the sub tags should also have "op"="match"
+	$isMatch = 1;
+    }
+
+    #if cvterm or cv, lookup only
+    elsif (($ult eq 'cvterm') || ($ult eq 'cv')) {
+	$writer->startTag($ult, 'op' => 'lookup');
+    }
+
+    #if nested tables of match table, match too
+    elsif ($isMatch) {
+	$writer->startTag($ult, 'op' => 'match');
+    }
+
+    else {
+	$writer->startTag($ult);
+    }
+
+    #first loop to produce xml for all the table columns
+    foreach $key (keys %mh)
+    {
+	#print "key: $key\n";
+	$xx = ' ' . $key;
+	$yy = $key . ' ';
+	if (index($chadotables, $xx) < 0 && index($chadotables, $yy) < 0)
+	{
+	    if ($isMatch) {
+		$writer->startTag($key, 'op' => 'match');
+	    } else {
+		$writer->startTag($key);
+	    }
+
+	    my $x = $ult . '.' . $key;
+	    #the column is a foreign key
+	    if (defined $fkey{$x})
+	    {
+		$nt = $fkey{$x};
+		$sh = $mh{$key};
+		$self->_hash2xml($isMatch, $nt, $sh, $writer, 0);
+	    } else
+	    {
+		#print "$key: $mh{$key}\n";
+		$writer->characters($mh{$key});
+	    }
+	    $writer->endTag($key);
+	}
+    }
+
+    #second loop to produce xml for all the nested tables
+    foreach $key (keys %mh)
+    {
+	#print "key: $key\n";
+	$xx = ' ' . $key;
+	$yy = $key . ' ';
+	#a nested table
+	if (index($chadotables, $xx) > 0 || index($chadotables, $yy) > 0)
+	{
+	    #$writer->startTag($key);
+	    $ntref = $mh{$key};
+	    #print "$key: ", ref($ntref), "\n";
+	    if (ref($ntref) =~ 'HASH') {
+		$self->_hash2xml($isMatch, $key, $ntref, $writer, 0);
+	    } elsif (ref($ntref) =~ 'ARRAY') {
+		#print "array dim: ", $#$ntref, "\n";
+		foreach $ref (@$ntref) {
+				#print "\n";
+		    $self->_hash2xml($isMatch, $key, $ref, $writer, 0);
+		}
+	    }
+	    #$writer->endTag($key);
+	}
+    }
+
+    #end tag
+    $writer->endTag($ult);
+
+    if ($root == 1) {
+	$writer->endTag('chado');
+    }
+}
+
+sub _guess_acc_db {
+	my $self = shift;
+	my $seq = shift;
+	my $acc = shift;
+	#print "acc: $acc\n";
+
+	if ($acc =~ /^NM_\d{6}/ || $acc =~ /^NP_\d{6}/ || $acc =~ /^NT_\d{6}/ || $acc =~ /^NC_\d{6}/) {
+		return "RefSeq";
+	} elsif ($acc =~ /^XM_\d{6}/ || $acc =~ /^XP_\d{6}/ || $acc =~ /^XR_\d{6}/) {
+		return "RefSeq";
+	} elsif ($acc =~ /^[a-zA-Z]{1,2}\d{5,6}/) {
+		return "GB";
+	} elsif ($seq->molecule() eq 'protein' && $acc =~ /^[a-zA-z]\d{5}/) {
+		return "PIR";
+	} elsif ($seq->molecule() eq 'protein' && $acc =~ /^\d{6,7}[a-zA-Z]/) {
+		return "PRF";
+	} elsif ($acc =~ /\d+/ && $acc !~ /[a-zA-Z]/) {
+		return "LocusID";
+	} elsif ($acc =~ /^CG\d+/ || $acc =~ /^FB[a-z][a-z]\d+/) {
+		return "FlyBase";
+	} else {
+		return "unknown";
+	}
+}
+
+sub _subfeat2featrelhash {
+	my $self = shift;
+	my $genename = shift;
+	my $seqtype = shift;
+	my $feat = shift;
+	my $r = shift;
+	my %srcf = %$r;		#srcfeature hash for featureloc.srcfeature_id
+	my $tag_cv = shift;
+	my $isanalysis = shift;
+
+	my $prim_tag = $feat->primary_tag;
+
+	my $sfunique = undef;		#subfeature uniquename
+	my $sfname = undef;		#subfeature name
+	my $sftype = undef;		#subfeature type
+
+	if ($feat->has_tag('symbol')) {
+		($sfunique) = $feat->each_tag_value("symbol");
+	} elsif ($feat->has_tag('label')) {
+		($sfunique) = $feat->each_tag_value("label");
+	} else {
+		#$self->throw("$prim_tag at " . $feat->start . "\.\." . $feat->end . " does not have symbol or label! To convert into chadoxml, a seq feature must have a /symbol or /label tag holding its unique name.");
+		#generate feature unique name as <genename>-<feature-type>-<span>
+		$sfunique = $self->_genFeatUniqueName($genename, $feat);
+	}
+
+	if ($feat->has_tag('Name')) {
+		($sfname) = $feat->each_tag_value("Name");
+	}
+
+	#feature type translation
+	if (defined $feattype_args2so{$prim_tag}) {
+		$sftype = $feattype_args2so{$prim_tag};
+	} else {
+		$sftype = $prim_tag;
+	}
+
+	if ($prim_tag eq 'mutation') {
+		if ($feat->start == $feat->end) {
+			$sftype = $feattype_args2so{'mutation1'};
+		} else {
+			$sftype = $feattype_args2so{'mutation2'};
+		}
+	}
+
+	#set is_analysis flag for gene model features
+	undef(my $isanal);
+	if ($sftype eq 'gene' || $sftype eq 'mRNA' || $sftype eq 'exon' || $sftype eq 'protein') {
+		$isanal = $isanalysis;
+	}
+
+	my %sfhash = (
+		"name"			=> $sfname,
+		"uniquename"		=> $sfunique,
+		"organism_id"		=> \%organism,
+		"type_id"		=> { 'name' => $sftype, 'cv_id' => { 'name' => 'SO'}},
+		"is_analysis"           => $isanal,
+		);
+
+	#make a copy of %sfhash for passing to this method when recursively called
+	#my %srcfeat = (
+        #        "name"                  => $sfname,
+        #        "uniquename"            => $sfunique,
+        #        "organism_id"           => \%organism,
+        #        "type_id"               => { 'name' => $sftype, 'cv_id' => { 'name' => 'SO'}},
+        #        );
+
+	#featureloc for subfeatures
+	undef(my $sfmin);
+	undef(my $sfmax);
+	undef(my $is_sfmin_partial);
+	undef(my $is_sfmax_partial);
+	undef(my $sfstrand);
+	$sfmin = $feat->start - 1;
+	$sfmax = $feat->end;
+	$sfstrand = $feat->strand();
+
+	#if the gene feature in an mRNA record, cannot use its coordinates, omit featureloc
+	if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
+	} else {
+		if ($feat->location->isa('Bio::Location::FuzzyLocationI')) {
+			if ($feat->location->start_pos_type() ne 'EXACT') {
+				$is_sfmin_partial = 'true';
+			}
+			if ($feat->location->end_pos_type() ne 'EXACT') {
+				$is_sfmax_partial = 'true';
+			}
+		}
+
+		my %sfl = (
+			"srcfeature_id"	=> \%srcf,
+			"fmin"		=> $sfmin,
+			"is_fmin_partial" => $is_sfmin_partial || '',
+			"fmax"		=> $sfmax,
+			"is_fmax_partial" => $is_sfmax_partial || '',
+			"strand"	=> $sfstrand,
+			);
+
+		$sfhash{'featureloc'} = \%sfl;
+	}
+
+
+	#subfeature tags
+	undef(my @sfdbxrefs);		#subfeature dbxrefs
+	undef(my @sub_featureprops);	#subfeature props
+	foreach my $tag ($feat->all_tags()) {
+		#feature_dbxref for features
+		if ($tag eq 'db_xref')   {
+			my @t1 = $feat->each_tag_value($tag);
+			#print "# of dbxref: @t1\n";
+			for my $temp (@t1) {
+			   $temp =~ /:/;
+			   my $db = $PREMATCH;
+			   my $xref = $POSTMATCH;
+			   #print "db: $db; xref: $xref\n";
+			   my %acchash = (
+				"db_id"		=> {'name' => $db},
+				"accession"	=> $xref,
+				);
+			   my %sfdbx = ('dbxref_id' => \%acchash);
+			   push (@sfdbxrefs, \%sfdbx);
+			}
+		#featureprop for features, excluding GFF Name & Parent tags
+		} elsif ($tag ne 'gene' && $tag ne 'symbol' && $tag ne 'Name' && $tag ne 'Parent') {
+			foreach my $val ($feat->each_tag_value($tag)) {
+				my %prophash = undef;
+				%prophash = (
+                      			"type_id"       => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
+					"value"		=> $val,
+				);
+				push(@sub_featureprops, \%prophash);
+			}
+		}
+	}
+	if (@sub_featureprops) {
+		$sfhash{'featureprop'} = \@sub_featureprops;
+	}
+	if (@sfdbxrefs) {
+		$sfhash{'feature_dbxref'} = \@sfdbxrefs;
+	}
+
+	undef(my @ssfeatrel);
+	if ($feat->has_tag('locus_tag')) {
+		($genename)= $feat->each_tag_value('locus_tag');
+	} elsif ($feat->has_tag('gene')) {
+		($genename)= $feat->each_tag_value('gene');
+	}
+
+	foreach my $sf ($feat->get_SeqFeatures()) {
+		#print $sf->primary_tag, "\n";
+		my $rref = $self->_subfeat2featrelhash($genename, $sftype, $sf, \%srcf, $tag_cv, $isanalysis);
+		if (defined $rref) {
+			push(@ssfeatrel, $rref);
+		}
+	}
+
+	if (@ssfeatrel) {
+		$sfhash{'feature_relationship'} = \@ssfeatrel;
+	}
+
+	#subj-obj relationship type
+	undef(my $reltypename);
+	if ($sftype eq 'protein') {
+		$reltypename = 'producedby';
+	} else {
+		$reltypename = 'partof';
+	}
+
+	my %fr = (
+		"subject_id"	=> \%sfhash,
+		"type_id"		=> { 'name' => $reltypename, 'cv_id' => { 'name' => 'relationship type'}},
+		);
+
+	if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
+		return \%sfhash;
+	} else {
+		return \%fr;
+	}
+
+}
+
+#generate uniquename for feature as: <genename>-<feature-type>-<span> (foo-mRNA-10..1000)
+sub _genFeatUniqueName {
+	my $self = shift;
+	my $genename = shift;
+	my $feat = shift;
+	undef(my $uniquename);
+	my $ftype = $feat->primary_tag;
+	my $start = $feat->start;
+	my $end = $feat->end;
+
+	if ($feat->has_tag('locus_tag')) {
+		($genename) = $feat->each_tag_value("locus_tag");
+	} elsif ($feat->has_tag('gene')) {
+		($genename) = $feat->each_tag_value("gene");
+	}
+
+	$uniquename = $genename . '-' . $ftype . '-' . $start . "\.\." . $end;
+
+	return $uniquename;
+}
+
+#create uniquename for pubs with no medline id and no FBrf#
+#use "<authors>, <year>, <type>" as the uniquename (same as miniref)
+#<authors> is <sole-author-surname>    if one author,
+#  or <first-author-surname> and <second-author-surname>   if two,
+#  or <first-author-surname> et al.   if more
+#sub _CreatePubUname {
+#	my $self = shift;
+#	my $pub = shift;
+#	undef(my $pubuname);
+#
+#	return $pubuname;
+#}
+
+#get authors of a reference
+#returns ref to the array of author hashes
+sub _getRefAuthors {
+	my $self = shift;
+	my $ref = shift;
+
+	my $temp = $ref->authors;
+	undef(my @authors);
+	undef(my @aut);
+
+	#there are authors
+	if ($temp ne '.') {
+		if (index($temp, ' and ') > 0) {
+			$temp =~ / and /;
+			my $lastauthor = $POSTMATCH;
+			@authors = split(/\, /, $PREMATCH);
+			push (@authors, $lastauthor);
+		} else {
+			@authors = split(/\, /, $temp);
+		}
+
+		my $a;
+		my $i = 0;
+		foreach $a (@authors) {
+			$i ++;
+			#parse the author lastname and givennames
+			undef(my $last);
+			undef(my $given);
+			if (index($a, ',') > 0) {	#genbank format, last,f.m.
+				($last, $given) = split(/\,/, $a);
+			} elsif (index($a, ' ') > 0) {	#embl format, last f.m.
+				($last, $given) = split(/ /, $a);
+			}
+			my %au = (
+				'surname'	=> $last,
+				'givennames'	=> $given,
+				);
+			push(@aut, {author_id => \%au, arank => $i});
+		}
+
+		return \@aut;
+	}
+
+	#no authors, Bio::SeqIO::genbank doesn't pick up 'CONSRTM' line.
+	else {
+		return;
+	}
+
+}
+
+#extract submission year from the citation of the submitted reference
+#genbank format for the submitted citation: JOURNAL   Submitted (DD-MON-YYYY) submitter address
+sub _getSubmitYear {
+    my $self = shift;
+    my $citation = shift;
+
+    if ($citation !~ /Submitted/) {
+	$self->warn("not citation for a submitted reference. cannot extract submission year.");
+	return;
+    } else {
+	$citation =~ /Submitted \(\d\d-[a-zA-Z]{3}-\d{4}\)/;
+	my $a = $MATCH;
+	$a =~ /\d{4}/;
+	my $year = $MATCH;
+
+	return $year;
+    }
+}
+
+sub _getSubmitAddr {
+    my $self = shift;
+    my $ref = shift;
+    undef(my %author);
+
+    my $citation = $ref->location;
+    if ($citation !~ /Submitted/) {
+	$self->warn("not citation for a submitted reference. cannot extract submission year.");
+	return;
+    } else {
+	$citation =~ /Submitted \(\d\d-[a-zA-Z]{3}-\d{4}\)/;
+	my $a = $POSTMATCH;
+	if (defined $a) {
+	    $a =~ s/^\s//;
+	    %author = (
+		       'author_id'	=> {'surname'	=> substr($a, 0, 100)},
+		       );
+	    return \%author;
+	} else {
+	    return;
+	}
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaos.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaos.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaos.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,711 @@
+# $Id: chaos.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $
+# $Date: 2006/10/02 23:10:28 $
+#
+# BioPerl module for Bio::SeqIO::chaos
+#
+# Chris Mungall <cjm at fruitfly.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::chaos - chaos sequence input/output stream
+
+=head1 SYNOPSIS
+
+    #In general you will not want to use this module directly;
+    #use the chaosxml format via SeqIO
+
+    $outstream = Bio::SeqIO->new(-file => $filename,
+                                 -format => 'chaosxml');
+
+    while ( my $seq = $instream->next_seq() ) {
+       $outstream->write_seq($seq);
+    }
+
+=head1 DESCRIPTION
+
+This is the guts of L<Bio::SeqIO::chaosxml> - please refer to the
+documentation for this module
+
+B<CURRENTLY WRITE ONLY>
+
+ChaosXML is an XML mapping of the chado relational database; for more
+information, see http://www.fruitfly.org/chaos-xml
+
+chaos can be represented in various syntaxes - XML, S-Expressions or
+indented text. You should see the relevant SeqIO file. You will
+probably want to use L<Bio::SeqIO::chaosxml>, which is a wrapper to
+this module.
+
+=head2 USING STAG OBJECTS
+
+B<non-standard bioperl stuff you dont necessarily need to know follows>
+
+This module (in write mode) is an B<event producer> - it generates XML
+events via the L<Data::Stag> module. If you only care about the final
+end-product xml, use L<Bio::SeqIO::chaosxml>
+
+You can treat the resulting chaos-xml stream as stag XML objects;
+
+    $outstream = Bio::SeqIO->new(-file => $filename, -format => 'chaos');
+
+    while ( my $seq = $instream->next_seq() ) {
+       $outstream->write_seq($seq);
+    }
+    my $chaos = $outstream->handler->stag;
+    # stag provides get/set methods for xml elements
+    # (these are chaos objects, not bioperl objects)
+    my @features = $chaos->get_feature;
+    my @feature_relationships = $chaos->get_feature_relationships;
+    # stag objects can be queried with functional-programming
+    # style queries
+    my @features_in_range =
+      $chaos->where('feature',
+                    sub {
+                         my $featureloc = shift->get_featureloc;
+                         $featureloc->strand == 1 &&
+                         $featureloc->nbeg > 10000 &&
+                         $featureloc->nend < 20000;
+                    });
+    foreach my $feature (@features_in_range) {
+      my $featureloc = $feature->get_featureloc;
+      printf "%s [%d->%d on %s]\n",
+        $feature->sget_name,
+        $featureloc->sget_nbeg,
+        $featureloc->sget_end,
+        $featureloc->sget_srcfeature_id;
+    }
+
+=head1 MODULES REQUIRED
+
+L<Data::Stag>
+
+Downloadable from CPAN; see also http://stag.sourceforge.net
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHOR - Chris Mungall
+
+Email cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::chaos;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+use Bio::SeqFeature::Tools::TypeMapper;
+use Bio::SeqFeature::Tools::FeatureNamer;
+use Bio::SeqFeature::Tools::IDHandler;
+use Data::Stag qw(:all);
+
+use base qw(Bio::SeqIO);
+
+our $TM = 'Bio::SeqFeature::Tools::TypeMapper';
+our $FNAMER = 'Bio::SeqFeature::Tools::FeatureNamer';
+our $IDH = 'Bio::SeqFeature::Tools::IDHandler';
+
+sub _initialize {
+    my($self, at args) = @_;
+
+    $self->SUPER::_initialize(@args);
+    if( ! defined $self->sequence_factory ) {
+	$self->sequence_factory(new Bio::Seq::SeqFactory
+				(-verbose => $self->verbose(),
+				 -type => 'Bio::Seq::RichSeq'));
+    }
+    my $wclass = $self->default_handler_class;
+    $self->handler($wclass);
+    if ($self->_fh) {
+        $self->handler->fh($self->_fh);
+    }
+    $self->{_end_of_data} = 0;
+    $self->_type_by_id_h({});
+    my $t = time;
+    my $ppt = localtime $t;
+    $self->handler->S("chaos");
+    $self->handler->ev(chaos_metadata=>[
+                                        [chaos_version=>1],
+                                        [chaos_flavour=>'bioperl'],
+                                        [feature_unique_key=>'feature_id'],
+                                        [equiv_chado_release=>'chado_1_01'],
+                                        [export_unixtime=>$t],
+                                        [export_localtime=>$ppt],
+                                        [export_host=>$ENV{HOST}],
+                                        [export_user=>$ENV{USER}],
+                                        [export_perl5lib=>$ENV{PERL5LIB}],
+                                        [export_program=>$0],
+                                        [export_module=>'Bio::SeqIO::chaos'],
+                                        [export_module_cvs_id=>'$Id: chaos.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $'],
+                                       ]);
+
+    return;
+}
+
+sub DESTROY {
+    my $self = shift;
+    $self->end_of_data();
+    $self->SUPER::DESTROY();
+}
+
+sub end_of_data {
+    my $self = shift;
+    return if $self->{_end_of_data};
+    $self->{_end_of_data} = 1;
+    $self->handler->E("chaos");
+}
+
+sub default_handler_class {
+    return Data::Stag->makehandler;
+}
+
+=head2 context_namespace
+
+ Title   : context_namespace
+ Usage   : $obj->context_namespace($newval)
+ Function:
+ Example :
+ Returns : value of context_namespace (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+IDs will be preceeded with the context namespace
+
+=cut
+
+sub context_namespace{
+    my $self = shift;
+
+    return $self->{'context_namespace'} = shift if @_;
+    return $self->{'context_namespace'};
+}
+
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    my $seq = $self->sequence_factory->create
+	(
+         #         '-verbose' =>$self->verbose(),
+         #	 %params,
+         #	 -seq => $seqc,
+         #	 -annotation => $annotation,
+         #	 -features => \@features
+        );
+    return $seq;
+}
+
+sub handler {
+    my $self = shift;
+    $self->{_handler} = shift if @_;
+    return $self->{_handler};
+}
+
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object (must be seq) to the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq
+
+
+=cut
+
+sub write_seq {
+    my ($self,$seq) = @_;
+
+    if( !defined $seq ) {
+	$self->throw("Attempting to write with no seq!");
+    }
+
+    if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
+	$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
+    }
+
+    # get a handler - must inherit from Data::Stag::BaseHandler;
+    my $w = $self->handler;
+
+    # start of data
+    ###    $w->S("chaos_block");
+
+    my $seq_chaos_feature_id;
+
+    # different seq objects have different version accessors -
+    # weird but true
+    my $version = $seq->can('seq_version') ? $seq->seq_version : $seq->version;
+
+    my $accversion = $seq->accession_number;
+    if ($version) {
+        $accversion .= ".$version";
+    }
+
+    if ($accversion) {
+        $seq_chaos_feature_id = $accversion;
+    }
+    else {
+        $seq_chaos_feature_id = $self->get_chaos_feature_id($seq);
+        $accversion = $seq_chaos_feature_id;
+    }
+
+    # All ids must have a namespace prefix
+    if ($seq_chaos_feature_id !~ /:/) {
+        $seq_chaos_feature_id = "GenericSeqDB:$seq_chaos_feature_id";
+    }
+
+#    if ($seq->accession_number eq 'unknown') {
+#        $seq_chaos_feature_id = $self->get_chaos_feature_id('contig', $seq);
+#    }
+
+    my $haplotype;
+    if ($seq->desc =~ /haplotype(.*)/i) {
+        # yikes, no consistent way to specify haplotype in gb
+        $haplotype = $1;
+        $haplotype =~ s/\s+/_/g;
+        $haplotype =~ s/\W+//g;
+    }
+
+    my $OS;
+    # Organism lines
+    if (my $spec = $seq->species) {
+        my ($species, $genus, @class) = $spec->classification();
+        $OS = "$genus $species";
+        if (my $ssp = $spec->sub_species) {
+            $OS .= " $ssp";
+        }
+        $self->genus_species($OS);
+        if( $spec->common_name ) {
+            my $common = $spec->common_name;
+            # genbank parser sets species->common_name to
+            # be "Genus Species (common name)" which is wrong;
+            # we will correct for this; if common_name is set
+            # correctly then carry on
+            if ($common =~ /\((.*)\)/) {
+                $common = $1;
+            }
+	    $OS .= " (".$common.")";
+        }
+    }
+    if ($OS) {
+        $self->organismstr($OS);
+    }
+    if ($haplotype) {
+        # genus_species is part of uniquename - add haplotype
+        # to make it genuinely unique
+        $self->genus_species($self->genus_species .= " $haplotype");
+    }
+
+    my $uname = $self->make_uniquename($self->genus_species, $accversion);
+
+    # data structure representing the core sequence for this record
+    my $seqnode =
+      Data::Stag->new(feature=>[
+                                [feature_id=>$seq_chaos_feature_id],
+                                [dbxrefstr=>'SEQDB:'.$accversion],
+                                [name=>$seq->display_name],
+				[uniquename=>$uname],
+                                [residues=>$seq->seq],
+                               ]);
+
+    # soft properties
+    my %prop = ();
+
+    $seqnode->set_type('databank_entry');
+
+    map {
+        $prop{$_} = $seq->$_() if $seq->can($_);
+    } qw(desc keywords division molecule is_circular);
+    $prop{dates} = join("; ", $seq->get_dates) if $seq->can("get_dates");
+
+    local($^W) = 0;   # supressing warnings about uninitialized fields.
+
+    # Reference lines
+    my $count = 1;
+    foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
+        # TODO
+    }
+    # Comment lines
+
+    $seqnode->add_featureprop([[type=>'haplotype'],[value=>$haplotype]])
+      if $haplotype;
+    foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
+        $seqnode->add_featureprop([[type=>'comment'],[value=>$comment->text]]);
+    }
+    if ($OS) {
+	$seqnode->set_organismstr($OS);
+    }
+
+    my @sfs = $seq->get_SeqFeatures;
+
+    # genbank usually includes a 'source' feature - we just
+    # migrate the data from this to the actual source feature
+    my @sources = grep {$_->primary_tag eq 'source'} @sfs;
+    @sfs = grep {$_->primary_tag ne 'source'} @sfs;
+    $self->throw(">1 source types") if @sources > 1;
+    my $source = shift @sources;
+    if ($source) {
+
+	my $tempw = Data::Stag->makehandler;
+	$self->write_sf($source, $seq_chaos_feature_id, $tempw);
+	my $snode = $tempw->stag;
+	$seqnode->add($_->name, $_->data)
+	  foreach ($snode->get_featureprop,
+		   $snode->get_feature_dbxref);
+
+    }
+
+
+    # throw the writer an event
+    $w->ev(@$seqnode);
+
+    $seqnode = undef;      # free memory
+
+    # make events for all the features within the record
+    foreach my $sf ( @sfs ) {
+        $FNAMER->name_feature($sf);
+        $FNAMER->name_contained_features($sf);
+        $self->write_sf($sf, $seq_chaos_feature_id);
+    }
+
+    # data end
+    ### $w->E("chaos_block");
+    return 1;
+}
+
+
+sub organismstr{
+    my $self = shift;
+
+    return $self->{'organismstr'} = shift if @_;
+    return $self->{'organismstr'};
+}
+
+
+sub genus_species{
+    my $self = shift;
+
+    return $self->{'genus_species'} = shift if @_;
+    return $self->{'genus_species'};
+}
+
+
+# maps ID to type
+sub _type_by_id_h {
+    my $self = shift;
+    $self->{_type_by_id_h} = shift if @_;
+    return $self->{_type_by_id_h};
+}
+
+
+
+# ----
+# writes a seq feature
+# ----
+
+sub write_sf {
+    my $self = shift;
+    my $sf = shift;
+    my $seq_chaos_feature_id = shift;
+    my $w = shift || $self->handler;
+
+    my %props =
+      map {
+          lc($_)=>[$sf->each_tag_value($_)]
+      } $sf->all_tags;
+
+    my $loc = $sf->location;
+    my $name = $FNAMER->generate_feature_name($sf);
+    my $type = $sf->primary_tag;
+
+    # The CDS (eg in a genbank feature) implicitly represents
+    # the protein
+    $type =~ s/CDS/polypeptide/;
+
+    my @subsfs = $sf->sub_SeqFeature;
+    my @locnodes = ();
+    my $sid = $loc->is_remote ? $loc->seq_id : $seq_chaos_feature_id;
+
+    my $CREATE_SPLIT_SFS = 0;
+
+    if($CREATE_SPLIT_SFS &&
+       $loc->isa("Bio::Location::SplitLocationI") ) {
+        # turn splitlocs into subfeatures
+        my $n = 1;
+        push(@subsfs,
+             map {
+                 my $ssf =
+                   Bio::SeqFeature::Generic->new(
+
+                                                 -start=>$_->start,
+                                                 -end=>$_->end,
+                                                 -strand=>$_->strand,
+                                                 -primary=>$self->subpartof($type),
+                                              );
+                 if ($_->is_remote) {
+                     $ssf->location->is_remote(1);
+                     $ssf->location->seq_id($_->seq_id);
+                 }
+                 $ssf;
+               } $loc->each_Location);
+    }
+    elsif( $loc->isa("Bio::Location::RemoteLocationI") ) {
+        # turn splitlocs into subfeatures
+        my $n = 1;
+        push(@subsfs,
+             map {
+                 Bio::SeqFeature::Generic->new(
+#                                               -name=>$name.'.'.$n++,
+                                               -start=>$_->start,
+                                               -end=>$_->end,
+                                               -strand=>$_->strand,
+                                               -primary=>$self->subpartof($type),
+                                              )
+               } $loc->each_Location);
+    }
+    else {
+        my ($beg, $end, $strand) = $self->bp2ib($loc);
+	if (!$strand) {
+	    use Data::Dumper;
+	    print Dumper $sf, $loc;
+	    $self->throw("($beg, $end, $strand) - no strand\n");
+	}
+        @locnodes = (
+                     [featureloc=>[
+                                   [nbeg=>$beg],
+                                   [nend=>$end],
+                                   [strand=>$strand],
+                                   [srcfeature_id=>$sid],
+                                   [locgroup=>0],
+                                   [rank=>0],
+                                  ]
+                     ]
+                    );
+    }
+    my $feature_id = $self->get_chaos_feature_id($sf);
+
+    delete $props{id} if $props{id};
+    # do something with genbank stuff
+    my $pid = $props{'protein_id'};
+    my $tn = $props{'translation'};
+    my @xrefs = @{$props{'db_xref'} || []};
+    if ($pid) {
+	push(@xrefs, "protein:$pid->[0]");
+    }
+
+    my $org = $props{organism} ? $props{organism}->[0] : undef;
+    if (!$org && $self->organismstr) {
+        $org = $self->organismstr;
+    }
+    my $uname = $name ? $name.'/'.$feature_id : $feature_id;
+    if ($self->genus_species && $name) {
+        $uname = $self->make_uniquename($self->genus_species, $name);
+    }
+    if (!$uname) {
+        $self->throw("cannot make uniquename for $feature_id $name");
+    }
+    $self->_type_by_id_h->{$feature_id} = $type;
+    my $fnode =
+      [feature=>[
+                 [feature_id=>$feature_id],
+                 $name ? ([name=>$name]) : (),
+                 [uniquename=>$uname],
+                 [type=>$type],
+		 $tn ? ([residues=>$tn->[0]],
+			[seqlen=>length($tn->[0])],
+			#####[md5checksum=>md5checksum($tn->[0])],
+		       ) :(),
+		 $org ? ([organismstr=>$org]) : (),
+                 @locnodes,
+		 (map {
+		     [feature_dbxref=>[
+				       [dbxrefstr=>$_]
+				      ]
+		     ]
+		 } @xrefs),
+                 (map {
+                     my $k = $_;
+		     my $rank=0;
+                     map { [featureprop=>[[type=>$k],[value=>$_],[rank=>$rank++]]] } @{$props{$k}}
+                 } keys %props),
+                ]];
+    $w->ev(@$fnode);
+
+    my $rank = 0;
+    if (@subsfs) {
+	# strand is always determined by FIRST feature listed
+	# (see genbank entry for trans-spliced mod(mdg4) AE003734)
+	my $strand = $subsfs[0];
+
+	# almost all the time, all features are on same strand
+	my @sfs_on_main_strand = grep {$_->strand == $strand} @subsfs;
+	my @sfs_on_other_strand = grep {$_->strand != $strand} @subsfs;
+
+	sort_by_strand($strand, \@sfs_on_main_strand);
+	sort_by_strand(0-$strand, \@sfs_on_other_strand);
+	@subsfs = (@sfs_on_main_strand, @sfs_on_other_strand);
+
+	foreach my $ssf (@subsfs) {
+	    my $ssfid = $self->write_sf($ssf, $sid);
+	    #my $rtype = 'part_of';
+            my $rtype =
+              $TM->get_relationship_type_by_parent_child($sf,$ssf);
+	    if ($ssf->primary_tag eq 'CDS') {
+		$rtype = 'derives_from';
+	    }
+	    $w->ev(feature_relationship=>[
+					  [subject_id=>$ssfid],
+					  [object_id=>$feature_id],
+					  [type=>$rtype],
+					  [rank=>$rank++],
+					 ]
+		  );
+	}
+    }
+    else {
+        # parents not stored as bioperl containment hierarchy
+        my @parent_ids = @{$props{parent} || []};
+        foreach my $parent_id (@parent_ids) {
+            my $ptype =
+              $self->_type_by_id_h->{$parent_id} || 'unknown';
+            my $rtype =
+              $TM->get_relationship_type_by_parent_child($ptype,$type);
+	    $w->ev(feature_relationship=>[
+					  [subject_id=>$feature_id],
+					  [object_id=>$parent_id],
+					  [type=>$rtype],
+					  [rank=>$rank++],
+					 ]
+		  );
+        }
+    }
+    return $feature_id;
+}
+
+sub sort_by_strand {
+    my $strand = shift || 1;
+    my $sfs = shift;
+    @$sfs = sort { ($a->start <=> $b->start) * $strand } @$sfs;
+    return;
+}
+
+sub make_uniquename {
+    my $self = shift;
+    my $org = shift;
+    my $name = shift;
+
+    my $os = $org;
+    $os =~ s/\s+/_/g;
+    $os =~ s/\(/_/g;
+    $os =~ s/\)/_/g;
+    $os =~ s/_+/_/g;
+    $os =~ s/^_+//g;
+    $os =~ s/_+$//g;
+    return "$os:$name";
+}
+
+
+sub get_chaos_feature_id {
+    my $self = shift;
+    my $ob = shift;
+
+    my $id;
+    if ($ob->isa("Bio::SeqI")) {
+        $id = $ob->accession_number . '.' . ($ob->can('seq_version') ? $ob->seq_version : $ob->version);
+    }
+    else {
+        $ob->isa("Bio::SeqFeatureI") || $self->throw("$ob must be either SeqI or SeqFeatureI");
+
+        if ($ob->primary_id) {
+            $id = $ob->primary_id;
+        }
+        else {
+            eval {
+                $id = $IDH->generate_unique_persistent_id($ob);
+            };
+            if ($@) {
+                $self->warn($@);
+                $id = "$ob"; # last resort - use memory pointer ref
+                # will not be persistent, but will be unique
+            }
+        }
+    }
+    if (!$id) {
+        if ($ob->isa("Bio::SeqFeatureI")) {
+            $id = $IDH->generate_unique_persistent_id($ob);
+        }
+        else {
+            $self->throw("Cannot generate a unique persistent ID for a Seq without either primary_id or accession");
+        }
+    }
+    if ($id) {
+        $id = $self->context_namespace ? $self->context_namespace . ":" . $id : $id;
+
+    }
+    return $id;
+}
+
+# interbase and directional semantics
+sub bp2ib {
+    my $self = shift;
+    my $loc = shift;
+    my ($s, $e, $str) =
+      ref($loc) eq "ARRAY" ? (@$loc) : ($loc->start, $loc->end, $loc->strand);
+    $s--;
+    if ($str < 0) {
+        ($s, $e) = ($e, $s);
+    }
+    return ($s, $e, $str || 1);
+}
+
+sub subpartof {
+    my $self = shift;
+    my $type = 'partof_'.shift;
+    $type =~ s/partof_CDS/CDS_exon/;
+    $type =~ s/partof_protein/CDS_exon/;
+    $type =~ s/partof_polypeptide/CDS_exon/;
+    $type =~ s/partof_\w*RNA/exon/;
+    return $type;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaosxml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaosxml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/chaosxml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+# $Id: chaosxml.pm,v 1.3.6.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::chaosxml
+#
+# Chris Mungall <cjm at fruitfly.org>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::chaosxml - chaosxml sequence input/output stream
+
+=head1 SYNOPSIS
+
+    #In general you will not want to use this module directly;
+    #use the chaosxml format via SeqIO
+
+    $outstream = Bio::SeqIO->new(-file => $filename, -format => 'chaosxml');
+
+    while ( my $seq = $instream->next_seq() ) {
+       $outstream->write_seq($seq);
+    }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from chaos files.
+
+B<CURRENTLY WRITE ONLY>
+
+ChaosXML is an XML mapping of the chado relational database; for more
+information, see http://www.fruitfly.org/chaos-xml
+
+Chaos can have other syntaxes than XML (eg S-Expressions, Indented text)
+
+See L<Bio::SeqIO::chaos> for a full description
+
+
+=head1 VERY VERY IMPORTANT
+
+!!!!!!!!!!!CHADO AND CHAOS USE INTERBASE COORDINATES!!!!!!!!!!!!!!!!
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHOR - Chris Mungall
+
+Email cjm at fruitfly.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::chaosxml;
+use strict;
+
+use Data::Stag::XMLWriter;
+
+use base qw(Bio::SeqIO::chaos);
+
+sub default_handler_class {
+    return Data::Stag->getformathandler('xml');
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ctf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ctf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ctf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: ctf.pm,v 1.15.4.1 2006/10/02 23:10:29 sendu Exp $
+# BioPerl module for Bio::SeqIO::ctf
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::ctf - ctf trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from ctf trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::ctf;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::Quality'));      
+  }
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'ctf');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'ctf');
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/embl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/embl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/embl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1497 @@
+# $Id: embl.pm,v 1.92.4.5 2006/10/31 15:48:58 cjfields Exp $
+#
+# BioPerl module for Bio::SeqIO::EMBL
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::embl - EMBL sequence input/output stream
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler system. Go:
+
+    $stream = Bio::SeqIO->new(-file => $filename, -format => 'EMBL');
+
+    while ( (my $seq = $stream->next_seq()) ) {
+	    # do something with $seq
+    }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from EMBL flat
+file databases.
+
+There is a lot of flexibility here about how to dump things which
+should be documented more fully.
+
+There should be a common object that this and Genbank share (probably
+with Swissprot). Too much of the magic is identical.
+
+=head2 Optional functions
+
+=over 3
+
+=item _show_dna()
+
+(output only) shows the dna or not
+
+=item _post_sort()
+
+(output only) provides a sorting func which is applied to the FTHelpers
+before printing
+
+=item _id_generation_func()
+
+This is function which is called as
+
+   print "ID   ", $func($annseq), "\n";
+
+To generate the ID line. If it is not there, it generates a sensible ID
+line using a number of tools.
+
+If you want to output annotations in EMBL format they need to be
+stored in a Bio::Annotation::Collection object which is accessible
+through the Bio::SeqI interface method L<annotation()|annotation>.
+
+The following are the names of the keys which are polled from a
+L<Bio::Annotation::Collection> object.
+
+ reference  - Should contain Bio::Annotation::Reference objects
+ comment    - Should contain Bio::Annotation::Comment objects
+ dblink     - Should contain Bio::Annotation::DBLink objects
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqIO::embl;
+use vars qw(%FTQUAL_NO_QUOTE);
+use strict;
+use Bio::SeqIO::FTHelper;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::SeqIO);
+
+%FTQUAL_NO_QUOTE=(
+  'anticodon'=>1,
+  'citation'=>1,
+  'codon'=>1,
+  'codon_start'=>1,
+  'cons_splice'=>1,
+  'direction'=>1,
+  'evidence'=>1,
+  'label'=>1,
+  'mod_base'=> 1,
+  'number'=> 1,
+  'rpt_type'=> 1,
+  'rpt_unit'=> 1,
+  'transl_except'=> 1,
+  'transl_table'=> 1,
+  'usedin'=> 1,
+);
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+  # hash for functions for decoding keys.
+  $self->{'_func_ftunit_hash'} = {};
+  $self->_show_dna(1); # sets this to one by default. People can change it
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq::RichSeq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    my ($pseq,$c,$line,$name,$desc,$acc,$seqc,$mol,$div,
+        $date, $comment, @date_arr);
+
+    my ($annotation, %params, @features) =
+       new Bio::Annotation::Collection;
+
+    $line = $self->_readline;
+    # This needs to be before the first eof() test
+
+    if( !defined $line ) {
+        return; # no throws - end of file
+    }
+
+    if( $line =~ /^\s+$/ ) {
+        while( defined ($line = $self->_readline) ) {
+            $line =~/^\S/ && last;
+        }
+        # return without error if the whole next sequence was just a single
+        # blank line and then eof
+        return unless $line;
+    }
+
+    # no ID as 1st non-blank line, need short circuit and exit routine
+    $self->throw("EMBL stream with no ID. Not embl in my book")
+       unless $line =~ /^ID\s+\S+/;
+
+	# At this point we are sure that $line contains an ID header line
+	my $alphabet;
+    if ( $line =~ tr/;/;/ == 6) {   # New style headers contain exactly six semicolons.
+
+    	# New style header (EMBL Release >= 87, after June 2006)
+    	my $topology;
+    	my $sv;
+
+    	# ID   DQ299383; SV 1; linear; mRNA; STD; MAM; 431 BP.
+		# This regexp comes from the new2old.pl conversion script, from EBI
+    	$line =~ m/^ID   (\w+);\s+SV (\d+); (\w+); ([^;]+); (\w{3}); (\w{3}); (\d+) BP./;
+    	($name, $sv, $topology, $mol, $div) = ($1, $2, $3, $4, $6);
+
+    	if (defined($sv)) {
+	    $params{'-seq_version'} = $sv;
+	    $params{'-version'} = $sv;
+    	}
+
+    	if ($topology eq "circular") {
+	    $params{'-is_circular'} = 1;
+    	}
+	
+	if (defined $mol ) {
+	    if ($mol =~ /DNA/) {
+		$alphabet='dna';
+	    }
+	    elsif ($mol =~ /RNA/) {
+		$alphabet='rna';
+	    }
+	    elsif ($mol =~ /AA/) {
+		$alphabet='protein';
+	    }
+	}
+    }
+    else {
+	
+    	# Old style header (EMBL Release < 87, before June 2006)
+    	($name, $mol, $div) = ($line =~ /^ID\s+(\S+)[^;]*;\s+(\S+)[^;]*;\s+(\S+)[^;]*;/);
+	
+	if($mol) {
+	    if ( $mol =~ /circular/ ) {
+		$params{'-is_circular'} = 1;
+		$mol =~  s|circular ||;
+	    }
+	    if (defined $mol ) {
+		if ($mol =~ /DNA/) {
+		    $alphabet='dna';
+		}
+		elsif ($mol =~ /RNA/) {
+		    $alphabet='rna';
+		}
+		elsif ($mol =~ /AA/) {
+		    $alphabet='protein';
+		}
+	    }
+	}
+    }
+
+    unless( defined $name && length($name) ) {
+	$name = "unknown_id";
+    }
+
+	# $self->warn("not parsing upper annotation in EMBL file yet!");
+   my $buffer = $line;
+    local $_;
+    BEFORE_FEATURE_TABLE :
+	until( !defined $buffer ) {
+	    $_ = $buffer;
+	    # Exit at start of Feature table
+	    if( /^(F[HT]|SQ)/ ) {
+		$self->_pushback($_) if( $1 eq 'SQ' || $1 eq 'FT');
+		last;
+	    }
+	    # Description line(s)
+	    if (/^DE\s+(\S.*\S)/) {
+		$desc .= $desc ? " $1" : $1;
+	    }
+
+	    #accession number
+	    if( /^AC\s+(.*)?/ ) {
+		my @accs = split(/[; ]+/, $1); # allow space in addition
+		$params{'-accession_number'} = shift @accs
+		    unless defined $params{'-accession_number'};
+		push @{$params{'-secondary_accessions'}}, @accs;
+	    }
+
+	    #version number
+	    if( /^SV\s+\S+\.(\d+);?/ ) {
+		my $sv = $1;
+		#$sv =~ s/\;//;
+		$params{'-seq_version'} = $sv;
+		$params{'-version'} = $sv;
+	    }
+
+	    #date (NOTE: takes last date line)
+	    if( /^DT\s+(.+)$/ ) {
+		my $line = $1;
+		my ($date, $version) = split(' ', $line, 2);
+		$date =~ tr/,//d; # remove comma if new version
+		if ($version =~ /\(Rel\. (\d+), Created\)/xms ) {
+                    my $release = Bio::Annotation::SimpleValue->new(
+								    -tagname    => 'creation_release',
+								    -value      => $1
+								    );
+                    $annotation->add_Annotation($release);
+		} elsif ($version =~ /\(Rel\. (\d+), Last updated, Version (\d+)\)/xms ) {
+                    my $release = Bio::Annotation::SimpleValue->new(
+								    -tagname    => 'update_release',
+								    -value      => $1
+								    );
+                    $annotation->add_Annotation($release);
+
+                    my $update = Bio::Annotation::SimpleValue->new(
+								   -tagname    => 'update_version',
+								   -value      => $2
+								   );
+                    $annotation->add_Annotation($update);
+		}
+		push @{$params{'-dates'}}, $date;
+	    }
+
+	    #keywords
+	    if( /^KW   (.*)\S*$/ ) {
+		my @kw = split(/\s*\;\s*/,$1);
+		push @{$params{'-keywords'}}, @kw;
+	    }
+
+	    # Organism name and phylogenetic information
+	    elsif (/^O[SC]/) {
+		# pass the accession number so we can give an informative throw message if necessary
+		my $species = $self->_read_EMBL_Species(\$buffer, $params{'-accession_number'});
+		$params{'-species'}= $species;
+	    }
+
+	    # References
+	    elsif (/^R/) {
+		my @refs = $self->_read_EMBL_References(\$buffer);
+		foreach my $ref ( @refs ) {
+		    $annotation->add_Annotation('reference',$ref);
+		}
+	    }
+
+	    # DB Xrefs
+	    elsif (/^DR/) {
+		my @links = $self->_read_EMBL_DBLink(\$buffer);
+		foreach my $dblink ( @links ) {
+		    $annotation->add_Annotation('dblink',$dblink);
+		}
+	    }
+
+	    # Comments
+	    elsif (/^CC\s+(.*)/) {
+		$comment .= $1;
+		$comment .= " ";
+		while (defined ($_ = $self->_readline) ) {
+		    if (/^CC\s+(.*)/) {
+			$comment .= $1;
+			$comment .= " ";
+		    }
+		    else {
+			last;
+		    }
+		}
+		my $commobj = Bio::Annotation::Comment->new();
+		$commobj->text($comment);
+		$annotation->add_Annotation('comment',$commobj);
+		$comment = "";
+	    }
+
+	    # Get next line.
+	    $buffer = $self->_readline;
+	}
+
+   while( defined ($_ = $self->_readline) ) {
+		/^FT\s{3}\w/ && last;
+		/^SQ / && last;
+		/^CO / && last;
+   }
+   $buffer = $_;
+
+   if (defined($buffer) && $buffer =~ /^FT /) {
+		until( !defined ($buffer) ) {
+			my $ftunit = $self->_read_FTHelper_EMBL(\$buffer);
+
+			# process ftunit
+         my $feat =
+			  $ftunit->_generic_seqfeature($self->location_factory(), $name);
+
+         # add taxon_id from source if available
+         if($params{'-species'} && ($feat->primary_tag eq 'source')
+            && $feat->has_tag('db_xref')
+            && (! $params{'-species'}->ncbi_taxid())) {
+				foreach my $tagval ($feat->get_tag_values('db_xref')) {
+					if(index($tagval,"taxon:") == 0) {
+						$params{'-species'}->ncbi_taxid(substr($tagval,6));
+						last;
+					}
+				}
+         }
+
+         # add feature to list of features
+			push(@features, $feat);
+
+			if( $buffer !~ /^FT/ ) {
+				last;
+			}
+		}
+   }
+   # skip comments
+   while( defined ($buffer) && $buffer =~ /^XX/ ) {
+       $buffer = $self->_readline();
+	 }
+
+   if( $buffer =~ /^CO/  ) {
+		until( !defined ($buffer) ) {
+			my $ftunit = $self->_read_FTHelper_EMBL(\$buffer);
+			# process ftunit
+			push(@features,
+                             $ftunit->_generic_seqfeature($self->location_factory(),
+                                                          $name));
+
+			if( $buffer !~ /^CO/ ) {
+				last;
+			}
+		}
+   }
+   if( $buffer !~ /^SQ/  ) {
+		while( defined ($_ = $self->_readline) ) {
+			/^SQ/ && last;
+		}
+   }
+   $seqc = "";
+   while( defined ($_ = $self->_readline) ) {
+		m{^//} && last;
+		$_ = uc($_);
+		s/[^A-Za-z]//g;
+		$seqc .= $_;
+   }
+   my $seq = $self->sequence_factory->create
+	  (-verbose => $self->verbose(),
+		-division => $div,
+		-seq => $seqc,
+		-desc => $desc,
+		-display_id => $name,
+		-annotation => $annotation,
+		-molecule => $mol,
+		-alphabet => $alphabet,
+		-features => \@features,
+		%params);
+   return $seq;
+}
+
+
+
+=head2 _write_ID_line
+
+ Title   : _write_ID_line
+ Usage   : $self->_write_ID_line($seq);
+ Function: Writes the EMBL Release 87 format ID line to the stream, unless
+         : there is a user-supplied ID line generation function in which
+         : case that is used instead.
+         : ( See Bio::SeqIO::embl::_id_generation_function(). )
+ Returns : nothing
+ Args    : Bio::Seq object
+
+=cut
+
+sub _write_ID_line {
+
+	my ($self, $seq) = @_;
+
+	my $id_line;
+	# If there is a user-supplied ID generation function, use it.
+	if( $self->_id_generation_func ) {
+		$id_line = "ID   " . &{$self->_id_generation_func}($seq) . "\nXX\n";
+	}
+	# Otherwise, generate a standard EMBL release 87 (June 2006) ID line.
+	else {
+
+		# The sequence name is supposed to be the primary accession number,
+		my $name = $seq->accession_number();
+		if (!$name) {
+			# but if it is not present, use the sequence ID.
+			$name = $seq->id();
+		}
+
+		$self->warn("No whitespace allowed in EMBL id [". $name. "]") if $name =~ /\s/;
+
+		# Use the sequence version, or default to 1.
+		my $version = $seq->version() || 1;
+
+		my $len = $seq->length();
+
+	 	# Taxonomic division.
+	 	my $div;
+		if ( $seq->can('division') && defined($seq->division) && $self->_is_valid_division($seq->division) ) {
+			$div = $seq->division();
+		}
+		else {
+			$div ||= 'UNC';			# 'UNC' is the EMBL division code for 'unclassified'.
+		}
+
+		my $mol;
+		# If the molecule type is a valid EMBL type, use it.
+		if (  $seq->can('molecule')
+		      && defined($seq->molecule)
+		      && $self->_is_valid_molecule_type($seq->molecule)
+		    )
+		{
+			$mol = $seq->molecule();
+		}
+		# Otherwise, choose unassigned DNA or RNA based on the alphabet.
+		elsif ($seq->can('primary_seq') && defined $seq->primary_seq->alphabet) {
+			my $alphabet =$seq->primary_seq->alphabet;
+			if ($alphabet eq 'dna') {
+				$mol ='unassigned DNA';
+			}
+			elsif ($alphabet eq 'rna') {
+				$mol='unassigned RNA';
+			}
+			elsif ($alphabet eq 'protein') {
+				$self->warn("Protein sequence found; EMBL is a nucleotide format.");
+				$mol='AA';	# AA is not a valid EMBL molecule type.
+			}
+		}
+
+		my $topology = 'linear';
+		if ($seq->is_circular) {
+			$topology = 'circular';
+		}
+
+        $mol ||= '';# 'unassigned'; ?
+		$id_line = "ID   $name; SV $version; $topology; $mol; STD; $div; $len BP.\nXX\n";
+		$self->_print($id_line);
+	}
+}
+
+=head2 _is_valid_division
+
+ Title   : _is_valid_division
+ Usage   : $self->_is_valid_division($div)
+ Function: tests division code for validity
+ Returns : true if $div is a valid EMBL release 87 taxonomic division.
+ Args    : taxonomic division code string
+
+=cut
+
+sub _is_valid_division {
+	my ($self, $division) = @_;
+
+	my %EMBL_divisions = (
+		"PHG"    => 1, 			# Bacteriophage
+		"ENV"    => 1, 			# Environmental Sample
+		"FUN"    => 1, 			# Fungal
+		"HUM"    => 1,  		# Human
+		"INV"    => 1,  		# Invertebrate
+		"MAM"    => 1,  		# Other Mammal
+		"VRT"    => 1,  		# Other Vertebrate
+		"MUS"    => 1,  		# Mus musculus
+		"PLN"    => 1,  		# Plant
+		"PRO"    => 1, 			# Prokaryote
+	    "ROD"    => 1, 			# Other Rodent
+	    "SYN"    => 1, 			# Synthetic
+	    "UNC"    => 1, 			# Unclassified
+	    "VRL"    => 1 			# Viral
+	);
+
+	return exists($EMBL_divisions{$division});
+}
+
+=head2 _is_valid_molecule_type
+
+ Title   : _is_valid_molecule_type
+ Usage   : $self->_is_valid_molecule_type($mol)
+ Function: tests molecule type for validity
+ Returns : true if $mol is a valid EMBL release 87 molecule type.
+ Args    : molecule type string
+
+=cut
+
+sub _is_valid_molecule_type {
+	my ($self, $moltype) = @_;
+
+	my %EMBL_molecule_types = (
+		"genomic DNA"    => 1,
+		"genomic RNA"    => 1,
+		"mRNA"           => 1,
+		"tRNA"           => 1,
+		"rRNA"           => 1,
+		"snoRNA"         => 1,
+		"snRNA"          => 1,
+		"scRNA"          => 1,
+		"pre-RNA"        => 1,
+		"other RNA"      => 1,
+	    "other DNA"      => 1,
+	    "unassigned DNA" => 1,
+	    "unassigned RNA" => 1
+	);
+
+	return exists($EMBL_molecule_types{$moltype});
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object (must be seq) to the stream
+ Returns : 1 for success and undef for error
+ Args    : array of 1 to n Bio::SeqI objects
+
+
+=cut
+
+sub write_seq {
+	my ($self, at seqs) = @_;
+
+	foreach my $seq ( @seqs ) {
+		$self->throw("Attempting to write with no seq!") unless defined $seq;
+		unless ( ref $seq && $seq->isa('Bio::SeqI' ) ) {
+			$self->warn("$seq is not a SeqI compliant sequence object!")
+			  if $self->verbose >= 0;
+			unless ( ref $seq && $seq->isa('Bio::PrimarySeqI' ) ) {
+				$self->throw("$seq is not a PrimarySeqI compliant sequence object!");
+			}
+		}
+		my $str = $seq->seq || '';
+
+		# Write the ID line.
+		$self->_write_ID_line($seq);
+
+
+		# Write the accession line if present
+		my( $acc );
+		{
+			if( my $func = $self->_ac_generation_func ) {
+				$acc = &{$func}($seq);
+			} elsif( $seq->isa('Bio::Seq::RichSeqI') &&
+						defined($seq->accession_number) ) {
+				$acc = $seq->accession_number;
+				$acc = join("; ", $acc, $seq->get_secondary_accessions);
+			} elsif ( $seq->can('accession_number') ) {
+				$acc = $seq->accession_number;
+			}
+
+			if (defined $acc) {
+				$self->_print("AC   $acc;\n",
+								  "XX\n") || return;
+			}
+		}
+
+		# Date lines
+		my $switch=0;
+		if( $seq->can('get_dates') ) {
+            my @dates =  $seq->get_dates();
+            my $ct = 1;
+            my $date_flag = 0;
+            my ($cr) = $seq->get_Annotations("creation_release");
+            my ($ur) = $seq->get_Annotations("update_release");
+            my ($uv) = $seq->get_Annotations("update_version");
+
+            unless ($cr && $ur && $ur) {
+                $date_flag = 1;
+            }
+
+            foreach my $dt (@dates){
+                if (!$date_flag) {
+                    $self->_write_line_EMBL_regex("DT   ","DT   ",
+                            $dt." (Rel. $cr, Created)",
+                            '\s+|$',80) if $ct == 1;
+                    $self->_write_line_EMBL_regex("DT   ","DT   ",
+                            $dt." (Rel. $ur, Last updated, Version $uv)",
+                            '\s+|$',80) if $ct == 2;
+                } else { # other formats?
+                    $self->_write_line_EMBL_regex("DT   ","DT   ",
+                            $dt,'\s+|$',80);
+                }
+                $switch =1;
+                $ct++;
+            }
+			if ($switch == 1) {
+				$self->_print("XX\n") || return;
+			}
+		}
+
+		# Description lines
+		$self->_write_line_EMBL_regex("DE   ","DE   ",$seq->desc(),'\s+|$',80) || return; #'
+		$self->_print( "XX\n") || return;
+
+		# if there, write the kw line
+		{
+			my( $kw );
+			if( my $func = $self->_kw_generation_func ) {
+				$kw = &{$func}($seq);
+			} elsif( $seq->can('keywords') ) {
+				$kw = $seq->keywords;
+			}
+			if (defined $kw) {
+				$self->_write_line_EMBL_regex("KW   ", "KW   ", $kw, '\s+|$', 80) || return; #'
+				$self->_print( "XX\n") || return;
+			}
+		}
+
+		# Organism lines
+
+		if ($seq->can('species') && (my $spec = $seq->species)) {
+			my @class = $spec->classification();
+            shift @class; # get rid of species name. Some embl files include
+                          # the species name in the OC lines, but this seems
+                          # more like an error than something we need to
+                          # emulate
+			my $OS = $spec->scientific_name;
+            if ($spec->common_name) {
+                $OS .= ' ('.$spec->common_name.')';
+            }
+			$self->_print("OS   $OS\n") || return;
+			my $OC = join('; ', reverse(@class)) .'.';
+			$self->_write_line_EMBL_regex("OC   ","OC   ",$OC,'; |$',80) || return;
+			if ($spec->organelle) {
+				$self->_write_line_EMBL_regex("OG   ","OG   ",$spec->organelle,'; |$',80) || return;
+			}
+			$self->_print("XX\n") || return;
+		}
+
+		# Reference lines
+		my $t = 1;
+		if ( $seq->can('annotation') && defined $seq->annotation ) {
+			foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
+				$self->_print( "RN   [$t]\n") || return;
+
+				# Having no RP line is legal, but we need both
+				# start and end for a valid location.
+				my $start = $ref->start;
+				my $end   = $ref->end;
+				if ($start and $end) {
+					$self->_print( "RP   $start-$end\n") || return;
+				} elsif ($start or $end) {
+					$self->throw("Both start and end are needed for a valid RP line.  Got: start='$start' end='$end'");
+				}
+
+				if (my $med = $ref->medline) {
+					$self->_print( "RX   MEDLINE; $med.\n") || return;
+				}
+				if (my $pm = $ref->pubmed) {
+					$self->_print( "RX   PUBMED; $pm.\n") || return;
+				}
+				$self->_write_line_EMBL_regex("RA   ", "RA   ",
+														$ref->authors . ";",
+														'\s+|$', 80) || return; #'
+
+				# If there is no title to the reference, it appears
+				# as a single semi-colon.  All titles must end in
+				# a semi-colon.
+				my $ref_title = $ref->title || '';
+				$ref_title =~ s/[\s;]*$/;/;
+				$self->_write_line_EMBL_regex("RT   ", "RT   ", $ref_title,    '\s+|$', 80) || return; #'
+				$self->_write_line_EMBL_regex("RL   ", "RL   ", $ref->location, '\s+|$', 80) || return; #'
+				if ($ref->comment) {
+					$self->_write_line_EMBL_regex("RC   ", "RC   ", $ref->comment, '\s+|$', 80) || return; #'
+				}
+				$self->_print("XX\n") || return;
+				$t++;
+			}
+
+			# DB Xref lines
+			if (my @db_xref = $seq->annotation->get_Annotations('dblink') ) {
+			    for my $dr (@db_xref) {
+				my $db_name = $dr->database;
+				my $prim    = $dr->primary_id;
+
+				my $opt     = $dr->optional_id || '';
+				my $line = $opt ? "$db_name; $prim; $opt." : "$db_name; $prim.";
+				$self->_write_line_EMBL_regex("DR   ", "DR   ", $line, '\s+|$', 80) || return; #'
+			    }
+			    $self->_print("XX\n") || return;
+			}
+			
+			# Comment lines
+			foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
+				$self->_write_line_EMBL_regex("CC   ", "CC   ", $comment->text, '\s+|$', 80) || return; #'
+				$self->_print("XX\n") || return;
+			}
+		}
+		# "\\s\+\|\$"
+
+		## FEATURE TABLE
+
+		$self->_print("FH   Key             Location/Qualifiers\n") || return;
+		$self->_print("FH\n") || return;
+
+		my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : ();
+		if ($feats[0]) {
+			if( defined $self->_post_sort ) {
+				# we need to read things into an array.
+				# Process. Sort them. Print 'em
+
+				my $post_sort_func = $self->_post_sort();
+				my @fth;
+
+				foreach my $sf ( @feats ) {
+					push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
+				}
+
+				@fth = sort { &$post_sort_func($a,$b) } @fth;
+
+				foreach my $fth ( @fth ) {
+					$self->_print_EMBL_FTHelper($fth) || return;
+				}
+			} else {
+				# not post sorted. And so we can print as we get them.
+				# lower memory load...
+
+				foreach my $sf ( @feats ) {
+					my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
+					foreach my $fth ( @fth ) {
+						if( $fth->key eq 'CONTIG') {
+							$self->_show_dna(0);
+						}
+						$self->_print_EMBL_FTHelper($fth) || return;
+					}
+				}
+			}
+		}
+
+		if( $self->_show_dna() == 0 ) {
+			$self->_print( "//\n") || return;
+			return;
+		}
+		$self->_print( "XX\n") || return;
+
+		# finished printing features.
+
+		$str =~ tr/A-Z/a-z/;
+
+		# Count each nucleotide
+		my $alen = $str =~ tr/a/a/;
+		my $clen = $str =~ tr/c/c/;
+		my $glen = $str =~ tr/g/g/;
+		my $tlen = $str =~ tr/t/t/;
+
+		my $len = $seq->length();
+		my $olen = $seq->length() - ($alen + $tlen + $clen + $glen);
+		if( $olen < 0 ) {
+			$self->warn("Weird. More atgc than bases. Problem!");
+		}
+
+		$self->_print("SQ   Sequence $len BP; $alen A; $clen C; $glen G; $tlen T; $olen other;\n") || return;
+
+		my $nuc = 60;		# Number of nucleotides per line
+		my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line
+		my $out_pat   = 'A11' x 6; # Pattern for packing a line
+		my $length = length($str);
+
+		# Calculate the number of nucleotides which fit on whole lines
+		my $whole = int($length / $nuc) * $nuc;
+
+		# Print the whole lines
+		my( $i );
+		for ($i = 0; $i < $whole; $i += $nuc) {
+			my $blocks = pack $out_pat,
+			  unpack $whole_pat,
+				 substr($str, $i, $nuc);
+			$self->_print(sprintf("     $blocks%9d\n", $i + $nuc)) || return;
+		}
+
+		# Print the last line
+		if (my $last = substr($str, $i)) {
+			my $last_len = length($last);
+			my $last_pat = 'a10' x int($last_len / 10) .'a'. $last_len % 10;
+			my $blocks = pack $out_pat,
+			  unpack($last_pat, $last);
+			$self->_print(sprintf("     $blocks%9d\n", $length)) ||
+			  return; # Add the length to the end
+		}
+
+		$self->_print( "//\n") || return;
+
+		$self->flush if $self->_flush_on_write && defined $self->_fh;
+    }
+	return 1;
+}
+
+=head2 _print_EMBL_FTHelper
+
+ Title   : _print_EMBL_FTHelper
+ Usage   :
+ Function: Internal function
+ Returns : 1 if writing suceeded, otherwise undef
+ Args    :
+
+
+=cut
+
+sub _print_EMBL_FTHelper {
+   my ($self,$fth) = @_;
+
+   if( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) {
+       $fth->warn("$fth is not a FTHelper class. Attempting to print, but there could be tears!");
+   }
+
+
+   #$self->_print( "FH   Key             Location/Qualifiers\n");
+   #$self->_print( sprintf("FT   %-15s  %s\n",$fth->key,$fth->loc));
+   # let
+   if( $fth->key eq 'CONTIG' ) {
+       $self->_print("XX\n") || return;
+       $self->_write_line_EMBL_regex("CO   ",
+				     "CO   ",$fth->loc,
+				     '\,|$',80) || return; #'
+       return 1;
+   }
+   $self->_write_line_EMBL_regex(sprintf("FT   %-15s ",$fth->key),
+				 "FT                   ",$fth->loc,
+				 '\,|$',80) || return; #'
+   foreach my $tag ( keys %{$fth->field} ) {
+       if( ! defined $fth->field->{$tag} ) { next; }
+       foreach my $value ( @{$fth->field->{$tag}} ) {
+	   $value =~ s/\"/\"\"/g;
+	   if ($value eq "_no_value") {
+	       $self->_write_line_EMBL_regex("FT                   ",
+					     "FT                   ",
+					     "/$tag",'.|$',80) || return; #'
+	   }
+           # there are almost 3x more quoted qualifier values and they
+           # are more common too so we take quoted ones first
+           elsif (!$FTQUAL_NO_QUOTE{$tag}) {
+              my $pat = $value =~ /\s/ ? '\s|\-|$' : '.|\-|$';
+	      $self->_write_line_EMBL_regex("FT                   ",
+					    "FT                   ",
+					    "/$tag=\"$value\"",$pat,80) || return;
+           } else {
+              $self->_write_line_EMBL_regex("FT                   ",
+					    "FT                   ",
+					    "/$tag=$value",'.|$',80) || return; #'
+           }
+       }
+   }
+
+   return 1;
+}
+
+#'
+=head2 _read_EMBL_References
+
+ Title   : _read_EMBL_References
+ Usage   :
+ Function: Reads references from EMBL format. Internal function really
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _read_EMBL_References {
+   my ($self,$buffer) = @_;
+   my (@refs);
+
+   # assumme things are starting with RN
+
+   if( $$buffer !~ /^RN/ ) {
+       warn("Not parsing line '$$buffer' which maybe important");
+   }
+   my $b1;
+   my $b2;
+   my $title;
+   my $loc;
+   my $au;
+   my $med;
+   my $pm;
+   my $com;
+
+   while( defined ($_ = $self->_readline) ) {
+       /^R/ || last;
+       /^RP   (\d+)-(\d+)/ && do {$b1=$1;$b2=$2;};
+       /^RX   MEDLINE;\s+(\d+)/ && do {$med=$1};
+       /^RX   PUBMED;\s+(\d+)/ && do {$pm=$1};
+       /^RA   (.*)/ && do {
+	   $au = $self->_concatenate_lines($au,$1); next;
+       };
+       /^RT   (.*)/ && do {
+	   $title = $self->_concatenate_lines($title,$1); next;
+       };
+       /^RL   (.*)/ && do {
+	   $loc = $self->_concatenate_lines($loc,$1); next;
+       };
+       /^RC   (.*)/ && do {
+	   $com = $self->_concatenate_lines($com,$1); next;
+       };
+   }
+
+   my $ref = new Bio::Annotation::Reference;
+   $au =~ s/;\s*$//g;
+   $title =~ s/;\s*$//g;
+
+   $ref->start($b1);
+   $ref->end($b2);
+   $ref->authors($au);
+   $ref->title($title);
+   $ref->location($loc);
+   $ref->medline($med);
+   $ref->comment($com);
+   $ref->pubmed($pm);
+
+   push(@refs,$ref);
+   $$buffer = $_;
+
+   return @refs;
+}
+
+=head2 _read_EMBL_Species
+
+ Title   : _read_EMBL_Species
+ Usage   :
+ Function: Reads the EMBL Organism species and classification
+           lines.
+ Example :
+ Returns : A Bio::Species object
+ Args    : a reference to the current line buffer, accession number
+
+=cut
+
+sub _read_EMBL_Species {
+    my( $self, $buffer, $acc ) = @_;
+    my $org;
+
+    $_ = $$buffer;
+    my( $sub_species, $species, $genus, $common, $sci_name, $class_lines );
+    while (defined( $_ ||= $self->_readline )) {
+        if (/^OS\s+(.+)/) {
+            $sci_name .= ($sci_name) ? ' '.$1 : $1;
+        }
+        elsif (s/^OC\s+(.+)$//) {
+            $class_lines .= $1;
+        }
+        elsif (/^OG\s+(.*)/) {
+            $org = $1;
+        }
+        else {
+            last;
+        }
+
+        $_ = undef; # Empty $_ to trigger read of next line
+    }
+
+    $$buffer = $_;
+
+    $sci_name || return;
+
+    # Convert data in classification lines into classification array.
+    # only split on ';' or '.' so that classification that is 2 or more words
+    # will still get matched, use map() to remove trailing/leading/intervening
+    # spaces
+    my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /[;\.]+/, $class_lines;
+
+    # do we have a genus?
+    my $possible_genus = $class[-1];
+    $possible_genus .= "|$class[-2]" if $class[-2];
+    if ($sci_name =~ /^($possible_genus)/) {
+        $genus = $1;
+        ($species) = $sci_name =~ /^$genus\s+(.+)/;
+    }
+    else {
+        $species = $sci_name;
+    }
+
+    # Don't make a species object if it is "Unknown" or "None"
+    if ($genus) {
+        return if $genus =~ /^(Unknown|None)$/i;
+    }
+
+    # is this organism of rank species or is it lower?
+    # (doesn't catch everything, but at least the guess isn't dangerous)
+    if ($species =~ /subsp\.|var\./) {
+        ($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
+    }
+
+    # sometimes things have common name in brackets, like
+    # Schizosaccharomyces pombe (fission yeast), so get rid of the common
+    # name bit. Probably dangerous if real scientific species name ends in
+    # bracketed bit.
+    unless ($class[-1] eq 'Viruses') {
+        ($species, $common) = $species =~ /^(.+)\s+\((.+)\)$/;
+        $sci_name =~ s/\s+\(.+\)$// if $common;
+    }
+
+    # Bio::Species array needs array in Species -> Kingdom direction
+    unless ($class[-1] eq $sci_name) {
+        push(@class, $sci_name);
+    }
+    @class = reverse @class;
+
+    # do minimal sanity checks before we hand off to Bio::Species which won't
+    # be able to give informative throw messages if it has to throw because
+    # of problems here
+    $self->throw("$acc seems to be missing its OS line: invalid.") unless $sci_name;
+    my %names;
+    foreach my $i (0..$#class) {
+        my $name = $class[$i];
+        $names{$name}++;
+        if ($names{$name} > 1 && $name ne $class[$i - 1]) {
+            $self->throw("$acc seems to have an invalid species classification.");
+        }
+    }
+
+    my $make = Bio::Species->new();
+    $make->scientific_name($sci_name);
+    $make->classification(@class);
+    unless ($class[-1] eq 'Viruses') {
+        $make->genus($genus) if $genus;
+        $make->species($species) if $species;
+        $make->sub_species($sub_species) if $sub_species;
+        $make->common_name($common) if $common;
+    }
+    $make->organelle($org) if $org;
+    return $make;
+}
+
+=head2 _read_EMBL_DBLink
+
+ Title   : _read_EMBL_DBLink
+ Usage   :
+ Function: Reads the EMBL database cross reference ("DR") lines
+ Example :
+ Returns : A list of Bio::Annotation::DBLink objects
+ Args    :
+
+=cut
+
+sub _read_EMBL_DBLink {
+    my( $self,$buffer ) = @_;
+    my( @db_link );
+
+    $_ = $$buffer;
+    while (defined( $_ ||= $self->_readline )) {
+        if( /^DR   ([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?\.$/) {
+	    my ($databse, $prim_id, $sec_id) = ($1,$2,$3);
+	    my $link = Bio::Annotation::DBLink->new(-database    => $databse,
+						    -primary_id  => $prim_id,
+						    -optional_id => $sec_id);
+
+            push(@db_link, $link);
+	} else {
+            last;
+        }
+        $_ = undef;	       # Empty $_ to trigger read of next line
+    }
+    
+    $$buffer = $_;
+
+    return @db_link;
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function:
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filehandle{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_filehandle'} = $value;
+    }
+    return $obj->{'_filehandle'};
+
+}
+
+=head2 _read_FTHelper_EMBL
+
+ Title   : _read_FTHelper_EMBL
+ Usage   : _read_FTHelper_EMBL($buffer)
+ Function: reads the next FT key line
+ Example :
+ Returns : Bio::SeqIO::FTHelper object
+ Args    : filehandle and reference to a scalar
+
+
+=cut
+
+sub _read_FTHelper_EMBL {
+    my ($self,$buffer) = @_;
+
+    my ($key,   # The key of the feature
+        $loc,   # The location line from the feature
+        @qual,  # An arrray of lines making up the qualifiers
+	);
+
+    if ($$buffer =~ /^FT\s{3}(\S+)\s+(\S+)/ ) {
+        $key = $1;
+        $loc = $2;
+        # Read all the lines up to the next feature
+        while ( defined($_ = $self->_readline) ) {
+            if (/^FT(\s+)(.+?)\s*$/) {
+                # Lines inside features are preceeded by 19 spaces
+                # A new feature is preceeded by 3 spaces
+                if (length($1) > 4) {
+                    # Add to qualifiers if we're in the qualifiers
+                    if (@qual) {
+                        push(@qual, $2);
+                    }
+                    # Start the qualifier list if it's the first qualifier
+                    elsif (substr($2, 0, 1) eq '/') {
+                        @qual = ($2);
+                    }
+                    # We're still in the location line, so append to location
+                    else {
+                        $loc .= $2;
+                    }
+                } else {
+                    # We've reached the start of the next feature
+                    last;
+                }
+            } else {
+                # We're at the end of the feature table
+                last;
+            }
+        }
+    } elsif( $$buffer =~ /^CO\s+(\S+)/) {
+	$key = 'CONTIG';
+	$loc = $1;
+	# Read all the lines up to the next feature
+	while ( defined($_ = $self->_readline) ) {
+	    if (/^CO\s+(\S+)\s*$/) {
+		$loc .= $1;
+	    } else {
+		# We've reached the start of the next feature
+		last;
+	    }
+	}
+    } else {
+        # No feature key
+        return;
+    }
+
+    # Put the first line of the next feature into the buffer
+    $$buffer = $_;
+
+    # Make the new FTHelper object
+    my $out = new Bio::SeqIO::FTHelper();
+    $out->verbose($self->verbose());
+    $out->key($key);
+    $out->loc($loc);
+
+    # Now parse and add any qualifiers.  (@qual is kept
+    # intact to provide informative error messages.)
+  QUAL: for (my $i = 0; $i < @qual; $i++) {
+        $_ = $qual[$i];
+        my( $qualifier, $value ) = m{^/([^=]+)(?:=(.+))?}
+            or $self->throw("Can't see new qualifier in: $_\nfrom:\n"
+                . join('', map "$_\n", @qual));
+        if (defined $value) {
+            # Do we have a quoted value?
+            if (substr($value, 0, 1) eq '"') {
+                # Keep adding to value until we find the trailing quote
+                # and the quotes are balanced
+                QUOTES:
+                while ($value !~ /"$/ or $value =~ tr/"/"/ % 2) { #"
+                    $i++;
+                    my $next = $qual[$i];
+                    if (!defined($next)) {
+                        $self->warn("Unbalanced quote in:\n".join("\n", @qual).
+                            "\nAdding quote to close...".
+                            "Check sequence quality!");
+                        $value .= '"';
+                        last QUOTES;
+                    }
+
+                    # Join to value with space if value or next line contains a space
+                    $value .= (grep /\s/, ($value, $next)) ? " $next" : $next;
+                }
+                # Trim leading and trailing quotes
+                $value =~ s/^"|"$//g;
+                # Undouble internal quotes
+                $value =~ s/""/"/g; #"
+            }
+        } else {
+            $value = '_no_value';
+        }
+
+        # Store the qualifier
+        $out->field->{$qualifier} ||= [];
+        push(@{$out->field->{$qualifier}},$value);
+    }
+
+    return $out;
+}
+
+=head2 _write_line_EMBL
+
+ Title   : _write_line_EMBL
+ Usage   :
+ Function: internal function
+ Example :
+ Returns : 1 if writing suceeded, else undef
+ Args    :
+
+
+=cut
+
+sub _write_line_EMBL {
+   my ($self,$pre1,$pre2,$line,$length) = @_;
+
+   $length || $self->throw("Miscalled write_line_EMBL without length. Programming error!");
+   my $subl = $length - length $pre2;
+   my $linel = length $line;
+   my $i;
+
+   my $sub = substr($line,0,$length - length $pre1);
+
+   $self->_print( "$pre1$sub\n") || return;
+
+   for($i= ($length - length $pre1);$i < $linel;) {
+       $sub = substr($line,$i,($subl));
+       $self->_print( "$pre2$sub\n") || return;
+       $i += $subl;
+   }
+
+   return 1;
+}
+
+=head2 _write_line_EMBL_regex
+
+ Title   : _write_line_EMBL_regex
+ Usage   :
+ Function: internal function for writing lines of specified
+           length, with different first and the next line
+           left hand headers and split at specific points in the
+           text
+ Example :
+ Returns : nothing
+ Args    : file handle, first header, second header, text-line, regex for line breaks, total line length
+
+
+=cut
+
+sub _write_line_EMBL_regex {
+    my ($self,$pre1,$pre2,$line,$regex,$length) = @_;
+
+    #print STDOUT "Going to print with $line!\n";
+
+    $length || $self->throw("Programming error - called write_line_EMBL_regex without length.");
+
+    my $subl = $length - (length $pre1) -1 ;
+    my( @lines );
+
+  CHUNK: while($line) {
+        foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) {
+            if($line =~ m/^(.{1,$subl})($pat)(.*)/ ) {
+                my $l = $1.$2;
+                my $newl = $3;
+                $line = substr($line,length($l));
+                # be strict about not padding spaces according to
+                # genbank format
+                $l =~ s/\s+$//;
+                push(@lines, $l);
+                next CHUNK;
+            }
+        }
+        # if we get here none of the patterns matched $subl or less chars
+        $self->warn("trouble dissecting \"$line\"\n     into chunks ".
+                    "of $subl chars or less - this tag won't print right");
+        # insert a space char to prevent infinite loops
+        $line = substr($line,0,$subl) . " " . substr($line,$subl);
+    }
+    my $s = shift @lines;
+    ($self->_print("$pre1$s\n") || return) if $s;
+    foreach my $s ( @lines ) {
+        $self->_print("$pre2$s\n") || return;
+    }
+
+    return 1;
+}
+
+=head2 _post_sort
+
+ Title   : _post_sort
+ Usage   : $obj->_post_sort($newval)
+ Function:
+ Returns : value of _post_sort
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _post_sort{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_post_sort'} = $value;
+    }
+    return $obj->{'_post_sort'};
+
+}
+
+=head2 _show_dna
+
+ Title   : _show_dna
+ Usage   : $obj->_show_dna($newval)
+ Function:
+ Returns : value of _show_dna
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _show_dna{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_show_dna'} = $value;
+    }
+    return $obj->{'_show_dna'};
+
+}
+
+=head2 _id_generation_func
+
+ Title   : _id_generation_func
+ Usage   : $obj->_id_generation_func($newval)
+ Function:
+ Returns : value of _id_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _id_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_id_generation_func'} = $value;
+    }
+    return $obj->{'_id_generation_func'};
+
+}
+
+=head2 _ac_generation_func
+
+ Title   : _ac_generation_func
+ Usage   : $obj->_ac_generation_func($newval)
+ Function:
+ Returns : value of _ac_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _ac_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_ac_generation_func'} = $value;
+    }
+    return $obj->{'_ac_generation_func'};
+
+}
+
+=head2 _sv_generation_func
+
+ Title   : _sv_generation_func
+ Usage   : $obj->_sv_generation_func($newval)
+ Function:
+ Returns : value of _sv_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _sv_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_sv_generation_func'} = $value;
+    }
+    return $obj->{'_sv_generation_func'};
+
+}
+
+=head2 _kw_generation_func
+
+ Title   : _kw_generation_func
+ Usage   : $obj->_kw_generation_func($newval)
+ Function:
+ Returns : value of _kw_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _kw_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_kw_generation_func'} = $value;
+    }
+    return $obj->{'_kw_generation_func'};
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/entrezgene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/entrezgene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/entrezgene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,931 @@
+# $Id: entrezgene.pm,v 1.22.4.4 2006/11/23 15:02:02 sendu Exp $
+# BioPerl module for Bio::SeqIO::entrezgene
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::entrezgene - Entrez Gene ASN1 parser
+
+=head1 SYNOPSIS
+
+   use Bio::SeqIO;
+
+   # don't instantiate directly - instead do
+   my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
+                               -file => $file);
+   my $gene = $seqio->next_seq;
+
+=head1 DESCRIPTION
+
+This is EntrezGene ASN bioperl parser. It is built on top of 
+L<Bio::ASN1::EntrezGene>, a low level ASN parser built by Mingyi Liu 
+(L<http://sourceforge.net/projects/egparser>). The easiest way to 
+use it is shown above.
+
+You will get most of the Entrez Gene annotation such as gene symbol, 
+gene name and description, accession numbers associated 
+with the gene, etc. Almost all of these are given as  L<Bio::AnnotationI> objects.
+
+If you need all the data do:
+
+   my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
+                               -file => $file,
+                               -debug => 'on' );
+   my ($gene,$genestructure,$uncaptured) = $seqio->next_seq;
+
+The second variable returned, C<$genestructure>, is a L<Bio::Cluster::SequenceFamily>
+object. It contains all Refseqs and the genomic contigs that are associated 
+with the particular gene. The third variable, C<$uncaptured>, is a reference 
+to a plain array.
+
+You can also modify the output to allow back compatibility with the old 
+LocusLink parser:
+
+   my $seqio = Bio::SeqIO->new(-format => 'entrezgene',
+                               -file => $file,
+                               -locuslink => 'convert');
+
+The C<-debug> and C<-locuslink> options slow down the parser.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Stefan Kirov
+
+Email skirov at utk.edu
+
+=head1 CONTRIBUTORS
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+This parser is based on Bio::ASN1::EntrezGene module.
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::entrezgene;
+
+use strict;
+use Bio::ASN1::EntrezGene;
+use Bio::Seq;
+use Bio::Species;
+use Bio::Annotation::SimpleValue;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Comment;
+use Bio::SeqFeature::Generic;
+use Bio::Annotation::Reference;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::GeneStructure;
+use Bio::Cluster::SequenceFamily;
+#use Bio::Ontology::Ontology; Relationships.... later
+use Bio::Ontology::Term;
+use Bio::Annotation::OntologyTerm;
+
+use base qw(Bio::SeqIO);
+
+%main::eg_to_ll =('Official Full Name' => 'OFFICIAL_GENE_NAME',
+						  'chromosome' => 'CHR',
+						  'cyto' => 'MAP', 
+						  'Official Symbol' => 'OFFICIAL_SYMBOL');
+ at main::egonly = keys %main::eg_to_ll;
+# We define $xval and some other variables so we don't have 
+# to pass them as arguments
+my ($seq,$ann,$xval,%seqcollection,$buf);
+
+sub _initialize {
+	my($self, at args) = @_;
+	$self->SUPER::_initialize(@args);
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	$self->{_debug} = $param{-debug} || 'off';
+	$self->{_locuslink} = $param{-locuslink}||'no';
+	$self->{_service_record} = $param{-service_record}||'no';
+	$self->{_parser} = Bio::ASN1::EntrezGene->new(file=>$param{-file});
+	#Instantiate the low level parser here (it is -file in Bioperl
+   #-should tell M.)
+	#$self->{_parser}->next_seq; #First empty record- bug in Bio::ASN::Parser
+}
+
+
+sub next_seq {
+    my $self=shift;
+    my $value = $self->{_parser}->next_seq(1); 
+	 # $value contains data structure for the
+	 # record being parsed. 2 indicates the recommended
+	 # trimming mode of the data structure
+	 #I use 1 as I prefer not to descend into size 0 arrays
+	 return unless ($value);
+    my $debug=$self->{_debug};
+    $self->{_ann} = Bio::Annotation::Collection->new();
+    $self->{_currentann} = Bio::Annotation::Collection->new();
+    my @alluncaptured;
+    # parse the entry
+    #my @keys=keys %{$value}; obsolete
+    $xval=$value->[0];
+    return new Bio::Seq (-id=>'Generif service record', -seq=>'') if (($self->{_service_record} ne 'yes')&&
+                    ($xval->{gene}->{desc} =~ /record to support submission of generifs for a gene not in entrez/i));
+    #Basic data
+	 #$xval->{summary}=~s/\n//g; 
+    my $seq = Bio::Seq->new(
+                        -display_id  => $xval->{gene}{locus},
+                        -accession_number =>$xval->{'track-info'}{geneid},
+                        -desc=>$xval->{summary}
+                   );
+    #Source data here
+    $self->_add_to_ann($xval->{'track-info'}->{status},'Entrez Gene Status'); 
+    my $lineage=$xval->{source}{org}{orgname}{lineage};
+    $lineage=~s/[\s\n]//g;
+    my ($comp, at lineage);
+    while ($lineage) {
+        ($comp,$lineage)=split(/;/,$lineage,2);
+        unshift @lineage,$comp;
+    }
+    unless (exists($xval->{source}{org}{orgname}{name}{binomial})) {
+        shift @lineage;
+        my ($gen,$sp)=split(/\s/, $xval->{source}{org}{taxname});
+        if (($sp)&&($sp ne '')) {
+            if ($gen=~/plasmid/i) {
+                $sp=$gen.$sp;
+            }
+            unshift @lineage,$sp;
+        }
+        else {
+         unshift @lineage,'unknown';
+        }
+    }
+    else {
+        my $sp=$xval->{source}{org}{orgname}{name}{binomial}{species};
+        if (($sp)&&($sp ne '')) {
+            my ($spc,$strain)=split('sp.',$sp);#Do we need strain?
+            $spc=~s/\s//g;
+            if (($spc)&&($spc ne '')) {
+                unshift @lineage,$spc;
+            }
+            else {
+                unshift @lineage,'unknown';
+            }
+        }
+        else {
+            unshift @lineage,'unknown';
+        }
+    }
+     my $specie=new Bio::Species(-classification=>[@lineage],
+                                -ncbi_taxid=>$xval->{source}{org}{db}{tag}{id});
+    $specie->common_name($xval->{source}{org}{common});
+    if (exists($xval->{source}->{subtype}) && ($xval->{source}->{subtype})) {
+        if (ref($xval->{source}->{subtype}) eq 'ARRAY') {
+            foreach my $subtype (@{$xval->{source}->{subtype}}) {
+               $self->_add_to_ann($subtype->{name},$subtype->{subtype});
+            }
+        }
+        else {
+            $self->_add_to_ann($xval->{source}->{subtype}->{name},$xval->{source}->{subtype}->{subtype}); 
+        }
+    }
+    #Synonyms
+    if (ref($xval->{gene}->{syn}) eq 'ARRAY') {
+        foreach my $symsyn (@{$xval->{gene}->{syn}}) {
+        $self->_add_to_ann($symsyn,'ALIAS_SYMBOL');
+        }
+    }
+    else {
+        $self->_add_to_ann($xval->{gene}->{syn},'ALIAS_SYMBOL') if ($xval->{gene}->{syn});
+    }
+    
+    
+    #COMMENTS (STS not dealt with yet)
+    if (ref($xval->{comments}) eq 'ARRAY') {
+        for my $i (0..$#{$xval->{comments}}) {
+            $self->{_current}=$xval->{comments}->[$i];
+            push @alluncaptured,$self->_process_all_comments();
+           }
+    }
+    else {
+        $self->{_current}=$xval->{comments};
+        push @alluncaptured,$self->_process_all_comments();
+    }
+       #Gene
+       if (exists($xval->{gene}->{db})) {
+       if (ref($xval->{gene}->{db}) eq 'ARRAY') {
+        foreach my $genedb (@{$xval->{gene}->{db}}) {
+            my $id=exists($genedb->{tag}->{id})?$genedb->{tag}->{id}:$genedb->{tag}->{str};
+            $self->_add_to_ann($id,$genedb->{db});
+        }
+        }
+        else {
+		my $id=($xval->{gene}->{db}->{tag}->{id})?
+			$xval->{gene}->{db}->{tag}->{id}:$xval->{gene}->{db}->{tag}->{str};
+            $self->_add_to_ann($id,$xval->{gene}->{db}->{db});
+        }
+	$self->_add_to_ann($xval->{gene}->{'locus-tag'},'LOCUS_SYNONYM');
+        delete $xval->{gene}->{db} unless ($debug eq 'off');
+        }
+       #LOCATION To do: uncaptured stuff
+       if (exists($xval->{location})) {
+        if (ref($xval->{location}) eq 'ARRAY') {
+            foreach my $loc (@{$xval->{location}}) {
+                $self->_add_to_ann($loc->{'display-str'},$loc->{method}->{'map-type'});
+            }
+        }
+        else {
+            $self->_add_to_ann($xval->{location}->{'display-str'},$xval->{location}->{method}->{'map-type'});
+        }
+        delete $xval->{location} unless ($debug eq 'off');
+       }
+       #LOCUS
+       if (ref($xval->{locus}) eq 'ARRAY') {
+       foreach my $locus (@{$xval->{locus}}) {
+        $self->{_current}=$locus;
+        push @alluncaptured,$self->_process_locus();
+        }
+       }
+        else {
+            push @alluncaptured,$self->_process_locus($xval->{locus});
+        }
+        #Homology
+        my ($uncapt,$hom,$anchor)=_process_src($xval->{homology}->{source});
+        foreach my $homann (@$hom) {
+            $self->{_ann}->add_Annotation('dblink',$homann);
+        }
+        push @alluncaptured,$uncapt;
+        #Index terms
+        if ((exists($xval->{'xtra-index-terms'}))&&($xval->{'xtra-index-terms'})) {
+        if (ref($xval->{'xtra-index-terms'}) eq 'ARRAY') {
+          foreach my $term (@{$xval->{'xtra-index-terms'}}) {
+           $self->_add_to_ann($term,'Index terms');
+           }
+        }
+        else {
+          $self->_add_to_ann($xval->{'xtra-index-terms'},'Index terms');
+        }
+        }
+        #PROPERTIES
+        my @prop;
+        if (exists($xval->{properties})) {
+        if (ref($xval->{properties}) eq 'ARRAY') {
+          foreach my $property (@{$xval->{properties}}) {
+            push @alluncaptured,$self->_process_prop($property);
+           }
+        }
+        else {
+          push @alluncaptured,$self->_process_prop($xval->{properties});
+        }
+        }
+        $seq->annotation($self->{_ann});
+        $seq->species($specie);
+        my @seqs;
+        foreach my $key (keys %seqcollection) {#Optimize this, no need to go through hash?
+          push @seqs,@{$seqcollection{$key}};
+        }
+        my $cluster = Bio::Cluster::SequenceFamily->new(-family_id=>$seq->accession_number,
+                                                 -description=>"Entrez Gene " . $seq->accession_number,
+                                               -members=>\@seqs);#Our EntrezGene object
+        #clean
+    unless ($debug eq 'off') {
+        delete $xval->{homology}->{source};
+        delete($xval->{summary});
+        delete($xval->{'track-info'});
+        delete($xval->{gene}{locus});
+        delete($xval->{source}{org}{orgname}{lineage});
+        delete $xval->{source}{org}{orgname}{name}{binomial}{species};
+        delete $xval->{gene}{syn};
+        delete $xval->{source}->{subtype};
+        delete $xval->{comments};
+        delete $xval->{properties};
+        delete $xval->{'xtra-index-terms'};
+        delete $xval->{status};
+    }
+    push @alluncaptured,$xval;
+        undef %seqcollection;
+    undef $xval;
+    #print 'x';
+    $seq->annotation(_backcomp_ll($self->{_ann})) if ($self->{_locuslink} eq 'convert');#Fix this!
+    return wantarray ? ($seq,$cluster,\@alluncaptured):$seq;#Hilmar's suggestion
+  }
+
+sub _process_refseq {
+my $self=shift;
+my $products=shift;
+my $ns=shift;
+my $pid;
+my (@uncaptured, at products);
+if (ref($products) eq 'ARRAY') { @products=@{$products}; }
+else {push @products,$products ;}
+foreach my $product (@products) {
+    if (($product->{seqs}->{whole}->{gi})||($product->{accession})){#Minimal data required
+        my $cann=Bio::Annotation::Collection->new();
+        $pid=$product->{accession};
+        my $nseq = Bio::Seq->new(
+                        -accession_number => $product->{seqs}->{whole}->{gi},
+                        -display_id=>$product->{accession},
+                        -authority=> $product->{heading}, -namespace=>$ns
+                   );
+                   if ($product->{source}) {
+                    unless ($nseq->authority) {$nseq->authority($product->{source}->{src}->{db})};
+                    my ($uncapt,$allann)=_process_src($product->{source});
+                    delete $product->{source};
+                    push @uncaptured,$uncapt;
+                    foreach my $annotation (@{$allann}) {
+                        $cann->add_Annotation('dblink',$annotation);
+                    }
+                    }
+    delete  $product->{seqs}->{whole}->{gi};
+    delete $product->{accession};
+    delete $product->{source};
+    delete $product->{heading};
+    my ($uncapt,$ann,$cfeat)=$self->_process_comments($product->{comment});
+    push @uncaptured,$uncapt;
+    foreach my $feat (@{$cfeat}) {
+        $nseq->add_SeqFeature($feat);
+    }
+    if ($product->{products}) {
+       my ($uncapt,$prodid)=$self->_process_refseq($product->{products});
+       push @uncaptured,$uncapt;
+       my $simann=new Bio::Annotation::SimpleValue(-value=>$prodid,-tagname=>'product');
+        $cann->add_Annotation($simann);
+    }
+    foreach my $key (keys %$ann) {
+                    foreach my $val (@{$ann->{$key}}) {
+                        $cann->add_Annotation($key,$val);
+                    }
+                }
+    $nseq->annotation($cann);
+    push @{$seqcollection{seq}},$nseq;
+}
+}
+return \@uncaptured,$pid;
+}
+
+sub _process_links {
+my $self=shift;
+ my $links=shift;
+ my (@annot, at uncapt);
+ if (ref($links) eq 'ARRAY') {
+    foreach my $link (@$links) {
+        my ($uncapt,$annot)=_process_src($link->{source});
+        push @uncapt,$uncapt;
+        foreach my $annotation (@$annot) {
+          $self->{_ann}->add_Annotation('dblink',$annotation);
+        }
+    }
+ }
+ else { my ($uncapt,$annot)=_process_src($links->{source});         
+        push @uncapt,$uncapt;
+        foreach my $annotation (@$annot) {
+          $self->{_ann}->add_Annotation('dblink',$annotation);
+        }
+    }
+return @uncapt;
+}
+
+sub _add_to_ann {#Highest level only
+my ($self,$val,$tag)=@_;
+  #  $val=~s/\n//g;#Low level EG parser leaves this so we take care of them here
+    unless ($tag) {
+     warn "No tagname for value $val, tag $tag ",$seq->id,"\n";
+     return;
+    }
+        my $simann=new Bio::Annotation::SimpleValue(-value=>$val,-tagname=>$tag);
+        $self->{_ann}->add_Annotation($simann);
+}
+
+sub _process_comments {
+	my $self=shift;
+	my $prod=shift;
+	my (%cann, at feat, at uncaptured, at comments, at sfann);
+	if ((ref($prod) eq 'HASH') && (exists($prod->{comment}))) {
+		$prod=$prod->{comment};
+	}
+	if (ref($prod) eq 'ARRAY') { @comments=@{$prod}; }
+	else {push @comments,$prod;}
+	for my $i (0..$#comments) {#Each comments is a
+		my ($desc,$nfeat,$add, at ann, at comm);
+		my $comm=$comments[$i];
+		# next unless (exists($comm->{comment}));#Should be more careful when calling _process_comment:To do
+		my $heading=$comm->{heading} || 'description';
+		unless (exists($comm->{comment})) {
+			if (($comm->{type})&&($self->{_current_heading})) {
+				$comm->{type}=$self->{_current_heading};
+			}
+			if ((exists($comm->{type})) && (exists($comm->{text}))&& ($comm->{type} ne 'comment')) {
+				my ($uncapt,$annot,$anchor)=_process_src($comm->{source});
+				my $cann=shift (@$annot);
+				if ($cann) {
+					$cann->optional_id($comm->{text});
+					$cann->authority($comm->{type});
+					$cann->version($comm->{version});
+					push @sfann,$cann;
+				}
+			}
+			undef $comm->{comment}; $add=1;#Trick in case we miss something
+		}
+		while ((exists($comm->{comment})&&$comm->{comment})) {
+			if ($comm->{source}) {
+				my ($uncapt,$allann,$anchor) = _process_src($comm->{source});
+				if ($allann) {
+					delete $comm->{source};
+					push @uncaptured,$uncapt;
+					foreach my $annotation (@{$allann}) {
+						if ($annotation->{_anchor}) {$desc.=$annotation->{_anchor}.' ';}
+						$annotation->optional_id($heading);
+						push @sfann,$annotation;
+						push @{$cann{'dblink'}},$annotation;
+					}
+				}
+			}
+			$comm=$comm->{comment};#DOES THIS NEED TO BE REC CYCLE?
+			if (ref($comm) eq 'ARRAY') {
+				@comm=@{$comm};
+			}
+			else {
+				push @comm,$comm;
+			}
+			foreach my $ccomm (@comm) {
+				next unless ($ccomm);
+				if (exists($ccomm->{source})) {
+					my ($uncapt,$allann,$anchor) = _process_src($ccomm->{source});
+					if ($allann) {
+						@sfann=@{$allann};
+						delete $ccomm->{source};
+						push @uncaptured,$uncapt;
+					}
+				}
+				$ccomm=$ccomm->{comment} if (exists($ccomm->{comment}));#Alice in Wonderland
+				my @loc;
+				if (ref($ccomm) eq 'ARRAY') {
+					@loc=@{$ccomm};
+				}
+				else {
+					push @loc,$ccomm;
+				}
+            foreach my $loc (@loc) {
+					if ((exists($loc->{text}))&&($loc->{text}=~/Location/i)){
+						my ($l1,$rest)=split(/-/,$loc->{text});
+						$l1=~s/\D//g;
+						$rest=~s/^\s//;
+						my ($l2,$scorestr)=split(/\s/,$rest,2);
+						my ($scoresrc,$score)=split(/:/,$scorestr);
+						$score=~s/\D//g;
+						my (%tags,$tag);
+						unless ($l1) {
+							next;
+						}
+						$nfeat=Bio::SeqFeature::Generic->new(-start=>$l1, 
+																		 -end=>$l2, 
+																		 -strand=>$tags{strand}, 
+																		 -source=>$loc->{type},
+																		 -seq_id=>$desc, 
+																		 -primary=>$heading, 
+																		 -score=>$score, 
+																		 -tag    => {score_src=>$scoresrc});
+						my $sfeatann=new Bio::Annotation::Collection;
+						foreach my $sfann (@sfann) {
+							$sfeatann->add_Annotation('dblink',$sfann);
+						}
+						undef @sfann;
+						$nfeat->annotation($sfeatann);#Thus the annotation will be available both in the seq and seqfeat?
+						push @feat,$nfeat;
+						delete $loc->{text};
+						delete $loc->{type};
+					}
+					elsif (exists($loc->{label})) {
+						my $simann=new Bio::Annotation::SimpleValue(-value=>$loc->{text},-tagname=>$loc->{label});
+						delete $loc->{text};
+						delete $loc->{label};
+						push @{$cann{'simple'}},$simann;
+						push @uncaptured,$loc;
+					}
+					elsif (exists($loc->{text})) {
+						my $simann=new Bio::Annotation::SimpleValue(-value=>$loc->{text},-tagname=>$heading);
+						delete $loc->{text};
+						push @{$cann{'simple'}},$simann;
+						push @uncaptured,$loc;
+					}
+					
+            }
+			}#Bit clumsy but that's what we get from the low level parser
+		}
+	}
+	if (@sfann) {push @{$cann{'dblink'}}, at sfann;}#Annotation that is not location specific, for example phenotype
+	undef $self->{_current_heading};
+	return \@uncaptured,\%cann,\@feat;
+}
+
+
+sub _process_src {
+    my $src=shift;
+    return unless (exists($src->{src}->{tag}));
+    my @ann;
+    my $db=$src->{src}->{db};
+    delete $src->{src}->{db};
+    my $anchor=$src->{anchor}||'';
+    delete $src->{anchor};
+    my $url;
+    if ($src->{url}) {
+            $url=$src->{url};
+            $url=~s/\n//g;
+            delete $src->{url};
+        }
+        if ($src->{src}->{tag}->{str}) {
+            my @sq=split(/[,;]/,$src->{src}->{tag}->{str});
+            delete $src->{src}->{tag};
+            foreach my $id (@sq) {
+                $id=~s/\n//g;
+                undef $anchor if ($anchor eq 'id');
+                my $simann=new Bio::Annotation::DBLink(-database => $db,
+                                        -primary_id => $id, -authority=>$src->{heading}
+                    );
+                $simann->url($url) if ($url);#DBLink should have URL!
+                push @ann, $simann;
+            }
+        }
+        else {
+            my $id=$src->{src}->{tag}->{id}||'';
+            delete $src->{src}->{tag};
+            undef $anchor if ($anchor eq 'id');
+            $id=~s/\n//g;
+            my $simann=new Bio::Annotation::DBLink(-database => $db,
+                                        -primary_id => $id, -authority=>$src->{heading}
+                    );
+            if ($anchor) {
+                $simann->{_anchor}=$anchor ;
+                $simann->optional_id($anchor);
+            }
+            $simann->url($url) if ($url);#DBLink should have URL!
+            push @ann, $simann;
+        }
+        return $src, \@ann,$anchor;
+}
+
+sub _add_references {
+my $self=shift;
+my $refs=shift;
+if (ref($refs) eq 'ARRAY') {
+    foreach my $ref(@$refs) {
+        my $refan=new Bio::Annotation::Reference(-database => 'Pubmed',
+                                        -primary_id => $ref);
+        $self->{_ann}->add_Annotation('Reference',$refan);
+    }
+}
+else {
+    my $refan=new Bio::Annotation::Reference(-database => 'Pubmed',
+                                        -primary_id => $refs);
+        $self->{_ann}->add_Annotation('Reference',$refan);
+}
+}
+
+#Should we do this at all if no seq coord are present?
+sub _process_locus {
+my $self=shift;
+my @uncapt;
+return $self unless (exists($self->{_current}->{accession})&&($self->{_current}->{accession}));
+my $gseq=new Bio::Seq(-display_id=>$self->{_current}->{accession},-version=>$self->{_current}->{version},
+            -accession_number=>$self->{_current}->{seqs}->{'int'}->{id}->{gi},
+            -authority=>$self->{_current}->{type}, -namespace=>$self->{_current}->{heading});
+delete $self->{_current}->{accession};
+delete $self->{_current}->{version};
+delete $self->{_current}->{'int'}->{id}->{gi};
+my ($start,$end,$strand);
+if (exists($self->{_current}->{seqs}->{'int'}->{from})) {
+ $start=$self->{_current}->{seqs}->{'int'}->{from};
+ delete $self->{_current}->{seqs}->{'int'}->{from};
+ #unless ($start) {print $locus->{seqs}->{'int'}->{from},"\n",$locus,"\n";}
+ $end=$self->{_current}->{seqs}->{'int'}->{to};
+ delete $self->{_current}->{seqs}->{'int'}->{to};
+ delete $self->{_current}->{seqs}->{'int'}->{strand};
+ $strand=$self->{_current}->{seqs}->{'int'}->{strand} eq 'minus'?-1:1
+	if (exists($self->{_current}->{seqs}->{'int'}->{strand}));#1 being default
+    my $nfeat=Bio::SeqFeature::Generic->new(-start=>$start, -end=>$end, -strand=>$strand, primary=>'gene location');
+    $gseq->add_SeqFeature($nfeat);
+}
+my @products;
+if (ref($self->{_current}->{products}) eq 'ARRAY') {
+    @products=@{$self->{_current}->{products}};
+}
+else {
+    push @products,$self->{_current}->{products};
+}
+delete $self->{_current}->{products};
+my $gstruct=new Bio::SeqFeature::Gene::GeneStructure;
+foreach my $product (@products) {
+    my ($tr,$uncapt)=_process_products_coordinates($product,$start,$end,$strand);
+    $gstruct->add_transcript($tr) if ($tr);
+    undef $tr->{parent}; #Because of a cycleG
+    push @uncapt,$uncapt;
+}
+$gseq->add_SeqFeature($gstruct);
+push @{$seqcollection{genestructure}},$gseq;
+return @uncapt;
+}
+
+=head1 _process_products_coordinates
+
+To do:
+
+=cut
+
+
+sub _process_products_coordinates {
+my $coord=shift;
+my $start=shift||0;#In case it is not known: should there be an entry at all?
+my $end=shift||1;
+my $strand=shift||1;
+my (@coords, at uncapt);
+return unless (exists($coord->{accession}));
+my $transcript=new Bio::SeqFeature::Gene::Transcript(-primary=>$coord->{accession}, #Desc is actually non functional...
+                                          -start=>$start,-end=>$end,-strand=>$strand, -desc=>$coord->{type});
+
+if ((exists($coord->{'genomic-coords'}->{mix}->{'int'}))||(exists($coord->{'genomic-coords'}->{'packed-int'}))) {
+ at coords=exists($coord->{'genomic-coords'}->{mix}->{'int'})?@{$coord->{'genomic-coords'}->{mix}->{'int'}}:
+                                    @{$coord->{'genomic-coords'}->{'packed-int'}};
+foreach my $exon (@coords) {
+    next unless (exists($exon->{from}));
+    my $exonobj=new Bio::SeqFeature::Gene::Exon(-start=>$exon->{from},-end=>$exon->{to},-strand=>$strand);
+    $transcript->add_exon($exonobj);
+    delete $exon->{from};
+    delete $exon->{to};
+    delete $exon->{strand};
+    push @uncapt,$exon;
+}
+}
+my ($prot,$uncapt);
+if (exists($coord->{products})) {
+    my ($prot,$uncapt)=_process_products_coordinates($coord->{products},$start,$end,$strand);
+    $transcript->add_SeqFeature($prot);
+    push @uncapt,$uncapt;
+}
+return $transcript,\@uncapt;
+}
+
+=head1 _process_prop
+
+To do: process GO
+
+=cut
+
+sub _process_prop {
+    my $self=shift;;
+    my $prop=shift;
+    my @uncapt;
+    if (exists($prop->{properties})) {#Iterate
+        if (ref($prop->{properties}) eq 'ARRAY') {
+            foreach my $propn (@{$prop->{properties}}) {
+               push @uncapt,$self->_process_prop($propn);
+            }
+        }
+        else {
+            push @uncapt,$self->_process_prop($prop->{properties});
+        }
+    }
+    unless ((exists($prop->{heading})) && ($prop->{heading} eq 'GeneOntology')) {
+        $self->_add_to_ann($prop->{text},$prop->{label}) if (exists($prop->{text})); 
+        delete $prop->{text};
+        delete $prop->{label};
+        push @uncapt,$prop;
+        return \@uncapt;
+    }
+    #Will do GO later
+    if (exists($prop->{comment})) {
+    push @uncapt,$self->_process_go($prop->{comment});
+    }
+}
+
+
+sub _process_all_comments {
+my $self=shift;
+my $product=$self->{_current};#Better without copying 
+my @alluncaptured;
+my $heading=$product->{heading} if (exists($product->{heading}));
+           if ($heading) {
+               delete $product->{heading};
+               CLASS: {
+                   if ($heading =~ 'RefSeq Status') {#IN case NCBI changes slightly the spacing:-)
+                    $self->_add_to_ann($product->{label},'RefSeq status');  last CLASS;
+                   }
+                   if ($heading =~ 'NCBI Reference Sequences') {#IN case NCBI changes slightly the spacing:-)
+                    my @uncaptured=$self->_process_refseq($product->{products},'refseq');
+                    push @alluncaptured, at uncaptured; last CLASS;
+                   }
+                   if ($heading =~ 'Related Sequences') {#IN case NCBI changes slightly the spacing:-)
+                    my @uncaptured=$self->_process_refseq($product->{products});
+                    push @alluncaptured, at uncaptured;  last CLASS;
+                   }
+                    if ($heading =~ 'Sequence Tagges Sites') {#IN case NCBI changes slightly the spacing:-)
+                    my @uncaptured=$self->_process_links($product);
+                     push @alluncaptured, at uncaptured;
+                     last CLASS;
+                   }
+                   if ($heading =~ 'Additional Links') {#IN case NCBI changes slightly the spacing:-)
+                    push @alluncaptured,$self->_process_links($product->{comment});
+                     last CLASS;
+                   }
+                   if ($heading =~ 'LocusTagLink') {#IN case NCBI changes slightly the spacing:-)
+                     $self->_add_to_ann($product->{source}->{src}->{tag}->{id},$product->{source}->{src}->{db}); 
+                    last CLASS;
+                   }
+                   if ($heading =~ 'Sequence Tagged Sites') {#IN case NCBI changes slightly the spacing:-)
+                     push @alluncaptured,$self->_process_STS($product->{comment}); 
+                     delete $product->{comment};
+                    last CLASS;
+                   }
+		   if ($heading =~ 'Pathways') {
+                     $self->{_current_heading}='Pathways';
+                    last CLASS;
+                   }
+               }
+    }
+	if (exists($product->{type})&&($product->{type} eq 'generif')) {
+		push @alluncaptured,$self->_process_grif($product);
+		return @alluncaptured;#Maybe still process the comments?
+	}
+	if (exists($product->{refs})) {
+                $self->_add_references($product->{refs}->{pmid});
+                delete $product->{refs}->{pmid}; push @alluncaptured,$product;
+            }
+	if (exists($product->{comment})) {
+                my ($uncapt,$allan,$allfeat)=$self->_process_comments($product->{comment});
+                foreach my $key (keys %$allan) {
+                    foreach my $val (@{$allan->{$key}}) {
+                        $self->{_ann}->add_Annotation($key,$val);
+                    }
+                }
+                delete $product->{refs}->{comment}; push @alluncaptured,$uncapt;
+            }
+    #if (exists($product->{source})) {
+    #    my ($uncapt,$ann,$anchor)=_process_src($product->{source});
+    #    foreach my $dbl (@$ann) {
+    #        $self->{_ann}->add_Annotation('dblink',$dbl);
+    #    }
+    #}
+return @alluncaptured;
+}
+
+sub _process_STS {
+my $self=shift;
+my $comment=shift;
+my @comm;
+push @comm,( ref($comment) eq 'ARRAY')? @{$comment}:$comment;
+foreach my $product (@comm) {
+ my $sts=new Bio::Ontology::Term->new( 
+                -identifier  => $product->{source}->{src}->{tag}->{id},
+                -name        => $product->{source}->{anchor}, -comment=>$product->{source}->{'post-text'});
+$sts->namespace($product->{source}->{src}->{db});
+$sts->authority('STS marker');
+my @alt;
+push @alt, ( ref($product->{comment}) eq 'ARRAY') ? @{$product->{comment}}:$product->{comment};
+foreach my $alt (@alt) {
+    $sts->add_synonym($alt->{text});
+}
+my $annterm = new Bio::Annotation::OntologyTerm();
+                $annterm->term($sts);
+                $self->{_ann}->add_Annotation('OntologyTerm',$annterm);
+}
+}
+
+sub _process_go {
+    my $self=shift;
+    my $comm=shift;
+    my @comm;
+    push @comm,( ref($comm) eq 'ARRAY')? @{$comm}:$comm;
+    foreach my $comp (@comm) {
+        my $category=$comp->{label};
+        if (ref($comp->{comment}) eq 'ARRAY') {
+            foreach my $go (@{$comp->{comment}}) {
+                my $term=_get_go_term($go,$category);
+                my $annterm = new Bio::Annotation::OntologyTerm (-tagname => 'Gene Ontology');
+                $annterm->term($term);
+                $self->{_ann}->add_Annotation('OntologyTerm',$annterm);
+            }
+        }
+        else {
+            my $term=_get_go_term($comp->{comment},$category);
+            my $annterm = new Bio::Annotation::OntologyTerm (-tagname => 'Gene Ontology');
+            $annterm->term($term);
+            $self->{_ann}->add_Annotation('OntologyTerm',$annterm);
+        }
+    }
+}
+
+sub _process_grif {
+my $self=shift;
+my $grif=shift;
+if (ref($grif->{comment}) eq 'ARRAY') {#Insane isn't it?
+	my @uncapt;
+	foreach my $product (@{$grif->{comment}}) {
+		next unless (exists($product->{text})); 
+		my $uproduct=$self->_process_grif($product);
+	    #$self->{_ann->add_Annotation($type,$grifobj);
+		push @uncapt,$uproduct;
+	}
+	return \@uncapt;
+}
+if (exists($grif->{comment}->{comment})) {
+	$grif=$grif->{comment};
+}
+my $ref= (ref($grif->{refs}) eq 'ARRAY') ? shift @{$grif->{refs}}:$grif->{refs};
+my $refergene='';
+my $refdb='';
+my ($obj,$type);
+if ($ref->{pmid}) {
+    if (exists($grif->{source})) { #unfortunatrely we cannot put yet everything in
+        $refergene=$grif->{source}->{src}->{tag}->{id};
+        $refdb=$grif->{source}->{src}->{db};
+    }    
+	my $grifobj=new  Bio::Annotation::Comment(-text=>$grif->{text});
+	$obj = new Bio::Annotation::DBLink(-database => 'generif',
+                                        -primary_id => $ref->{pmid}, #The pubmed id (at least the first one) which is a base for the conclusion
+                                        -version=>$grif->{version},
+                                        -optional_id=>$refergene,
+                                        -authority=>$refdb
+                    ); 
+	$obj->comment($grifobj);
+    $type='dblink';
+}
+else {
+	$obj=new  Bio::Annotation::SimpleValue($grif->{text},'generif');
+    $type='generif';
+}
+delete $grif->{text};
+delete $grif->{version};
+delete $grif->{type};
+delete $grif->{refs};
+$self->{_ann}->add_Annotation($type,$obj);
+return $grif;
+}
+
+sub _get_go_term {
+my $go=shift;
+my $category=shift;
+    my $refan=new Bio::Annotation::Reference( #We expect one ref per GO
+        -medline => $go->{refs}->{pmid}, -title=>'no title');
+    my $term = Bio::Ontology::Term->new( 
+        -identifier  => $go->{source}->{src}->{tag}->{id},
+        -name        => $go->{source}->{anchor},
+        -definition  => $go->{source}->{anchor},
+        -comment     => $go->{source}->{'post-text'},
+        -version     =>$go->{version});
+    $term->add_reference($refan);
+    $term->namespace($category);
+return $term;
+}
+
+
+sub _backcomp_ll {
+my $ann=shift;
+my $newann=Bio::Annotation::Collection->new();
+        #$newann->{_annotation}->{ALIAS_SYMBOL}=$ann->{_annotation}->{ALIAS_SYMBOL};
+       # $newann->{_annotation}->{CHR}=$ann->{_annotation}->{chromosome};
+       # $newann->{_annotation}->{MAP}=$ann->{_annotation}->{cyto};
+       foreach my $tagmap (keys %{$ann->{_typemap}->{_type}}) {
+	next if (grep(/$tagmap/, at main::egonly));
+        $newann->{_annotation}->{$tagmap}=$ann->{_annotation}->{$tagmap};
+	}
+        #$newann->{_annotation}->{Reference}=$ann->{_annotation}->{Reference};
+        #$newann->{_annotation}->{generif}=$ann->{_annotation}->{generif};
+        #$newann->{_annotation}->{comment}=$ann->{_annotation}->{comment};
+       # $newann->{_annotation}->{OFFICIAL_GENE_NAME}=$ann->{_annotation}->{'Official Full Name'};
+        $newann->{_typemap}->{_type}=$ann->{_typemap}->{_type};
+        foreach my $ftype (keys %main::eg_to_ll) {
+		my $newkey=$main::eg_to_ll{$ftype};
+		$newann->{_annotation}->{$newkey}=$ann->{_annotation}->{$ftype};
+		$newann->{_typemap}->{_type}->{$newkey}='Bio::Annotation::SimpleValue';
+		delete $newann->{_typemap}->{_type}->{$ftype};
+		$newann->{_annotation}->{$newkey}->[0]->{tagname}=$newkey;
+        }
+	foreach my $dblink (@{$newann->{_annotation}->{dblink}}) {
+            next unless ($dblink->{_url});
+            my $simann=new Bio::Annotation::SimpleValue(-value=>$dblink->{_url},-tagname=>'URL');
+            $newann->add_Annotation($simann);
+        }
+
+#        my $simann=new Bio::Annotation::SimpleValue(-value=>$seq->desc,-tagname=>'comment');
+#        $newann->add_Annotation($simann);
+    
+return $newann;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/excel.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/excel.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/excel.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,301 @@
+# $Id: excel.pm,v 1.4.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::excel
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2005.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2005.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::excel - sequence input/output stream from a
+                    MSExcel-formatted table
+
+=head1 SYNOPSIS
+
+  #It is probably best not to use this object directly, but
+  #rather go through the SeqIO handler system. Go:
+
+  $stream = Bio::SeqIO->new(-file => $filename, -format => 'excel');
+
+  while ( my $seq = $stream->next_seq() ) {
+	# do something with $seq
+  }
+
+=head1 DESCRIPTION
+
+This class transforms records in a MS Excel workbook file into
+Bio::Seq objects. It is derived from the table format module and
+merely defines additional properties and overrides the way to get data
+from the file and advance to the next record.
+
+The module permits specifying which columns hold which type of
+annotation. The semantics of certain attributes, if present, are
+pre-defined, e.g., accession number and sequence. Additional
+attributes may be added to the annotation bundle. See
+L<Bio::SeqIO::table> for a complete list of parameters and
+capabilities.
+
+You may also specify the worksheet from which to obtain the data, and
+after finishing one worksheet you may change the name to keep reading
+from another worksheet (in the same file).
+
+This module depends on Spreadsheet::ParseExcel to parse the underlying
+Excel file.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+
+Bug reports can be submitted via email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::excel;
+use strict;
+
+use Bio::SeqIO;
+use Spreadsheet::ParseExcel;
+#use Spreadsheet::ParseExcel::Workbook;
+
+use base qw(Bio::SeqIO::table);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::SeqIO->new(-file => $filename, -format => 'excel')
+ Function: Returns a new seqstream
+ Returns : A Bio::SeqIO stream for a MS Excel format
+
+ Args    : Supports the same named parameters as Bio::SeqIO::table,
+           except -delim, which obviously does not apply to a binary
+           format. In addition, the following parameters are supported.
+
+             -worksheet the name of the worksheet holding the table;
+                        if unspecified the first worksheet will be
+                        used
+
+
+=cut
+
+sub _initialize {
+    my($self, at args) = @_;
+
+    # chained initialization
+    $self->SUPER::_initialize(@args);
+
+    # our own parameters
+    my ($worksheet) = $self->_rearrange([qw(WORKSHEET)], @args);
+
+    # store options and apply defaults
+    $self->worksheet($worksheet || 0);
+
+}
+
+=head2 worksheet
+
+ Title   : worksheet
+ Usage   : $obj->worksheet($newval)
+ Function: Get/set the name of the worksheet holding the table. The
+           worksheet name may also be a numeric index.
+
+           You may change the value during parsing at any time in
+           order to start reading from a different worksheet (in the
+           same file).
+
+ Example :
+ Returns : value of worksheet (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub worksheet{
+    my $self = shift;
+
+    if (@_) {
+        my $sheetname = shift;
+        # on set we reset the parser here in order to allow reading
+        # from multiple worksheets in a row
+        $self->_worksheet(undef) if defined($sheetname);
+        return $self->{'worksheet'} = $sheetname;
+    }
+    return $self->{'worksheet'};
+}
+
+=head2 close
+
+ Title   : close
+ Usage   :
+ Function: Close and/or release the resources used by this parser instance.
+
+           We override this here in order to free up the worksheet and
+           other related objects.
+
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub close{
+    my $self = shift;
+
+    $self->_worksheet(undef);
+    # make sure we chain to the inherited method
+    $self->SUPER::close(@_);
+}
+
+=head1 Internal methods
+
+All methods with a leading underscore are not meant to be part of the
+'official' API. They are for use by this module only, consider them
+private unless you are a developer trying to modify this module.
+
+=cut
+
+=head2 _worksheet
+
+ Title   : _worksheet
+ Usage   : $obj->_worksheet($newval)
+ Function: Get/set the worksheet object to be used for accessing cells.
+ Example :
+ Returns : value of _worksheet (a Spreadsheet::ParseExcel::Worksheet object)
+ Args    : on set, new value (a Spreadsheet::ParseExcel::Worksheet
+           object or undef, optional)
+
+
+=cut
+
+sub _worksheet{
+    my $self = shift;
+
+    return $self->{'_worksheet'} = shift if @_;
+    return $self->{'_worksheet'};
+}
+
+=head2 _next_record
+
+ Title   : _next_record
+ Usage   :
+ Function: Navigates the underlying file to the next record.
+
+           We override this here in order to adapt navigation to data
+           in an Excel worksheet.
+
+ Example :
+ Returns : TRUE if the navigation was successful and FALSE
+           otherwise. Unsuccessful navigation will usually be treated
+           as an end-of-file condition.
+ Args    :
+
+
+=cut
+
+sub _next_record{
+    my $self = shift;
+
+    my $wsheet = $self->_worksheet();
+    if (! defined($wsheet)) {
+        # worksheet hasn't been initialized yet, do so now
+        my $wbook = Spreadsheet::ParseExcel::Workbook->Parse($self->_fh);
+        $wsheet = $wbook->Worksheet($self->worksheet);
+        # store the result
+        $self->_worksheet($wsheet);
+        # re-initialize the current row
+        $self->{'_row'} = -1;
+    }
+
+    # we need a valid worksheet to continue
+    return unless defined($wsheet);
+
+    # check whether we are at or beyond the last defined row
+    my ($minrow, $maxrow) = $wsheet->RowRange();
+    return if $self->{'_row'} >= $maxrow;
+
+    # we don't check for empty rows here as in order to do that we'd
+    # have to know in which column to look
+    # so, just advance to the next row
+    $self->{'_row'}++;
+
+    # done
+    return 1;
+}
+
+=head2 _get_row_values
+
+ Title   : _get_row_values
+ Usage   :
+ Function: Get the values for the current line (or row) as an array in
+           the order of columns.
+
+           We override this here in order to adapt access to column
+           values to data contained in an Excel worksheet.
+
+ Example :
+ Returns : An array of column values for the current row.
+ Args    :
+
+
+=cut
+
+sub _get_row_values{
+    my $self = shift;
+
+    # obtain the range of columns - we use all that are defined
+    my $wsheet = $self->_worksheet();
+    my ($colmin,$colmax) = $wsheet->ColRange();
+
+    # build the array of columns for the current row
+    my @cols = ();
+    my $row = $self->{'_row'};
+    for (my $i = $colmin; $i <= $colmax; $i++) {
+        my $cell = $wsheet->Cell($row, $i);
+        push(@cols, defined($cell) ? $cell->Value : $cell);
+    }
+    # done
+    return @cols;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/exp.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/exp.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/exp.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: exp.pm,v 1.15.4.1 2006/10/02 23:10:29 sendu Exp $
+# BioPerl module for Bio::SeqIO::exp
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::exp - exp trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from exp trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::exp;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::Quality'));
+  }
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'exp');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'exp');
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,266 @@
+# $Id: fasta.pm,v 1.58.4.1 2006/10/02 23:10:29 sendu Exp $
+# BioPerl module for Bio::SeqIO::fasta
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#          and Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Ewan Birney & Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::fasta - fasta sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from fasta flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Ewan Birney & Lincoln Stein
+
+Email: birney at ebi.ac.uk
+       lstein at cshl.org
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::fasta;
+use vars qw($WIDTH @SEQ_ID_TYPES $DEFAULT_SEQ_ID_TYPE);
+use strict;
+
+use Bio::Seq::SeqFactory;
+use Bio::Seq::SeqFastaSpeedFactory;
+
+use base qw(Bio::SeqIO);
+
+ at SEQ_ID_TYPES = qw(accession accession.version display primary);
+$DEFAULT_SEQ_ID_TYPE = 'display';
+
+BEGIN { $WIDTH = 60}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  my ($width) = $self->_rearrange([qw(WIDTH)], @args);
+  $width && $self->width($width);
+  unless ( defined $self->sequence_factory ) {
+      $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+    my( $self ) = @_;
+    my $seq;
+    my $alphabet;
+    local $/ = "\n>";
+    return unless my $entry = $self->_readline;
+
+    chomp($entry);
+    if ($entry =~ m/\A\s*\Z/s)  { # very first one
+	return unless $entry = $self->_readline;
+	chomp($entry);
+    }
+    $entry =~ s/^>//;
+
+    my ($top,$sequence) = split(/\n/,$entry,2);
+    defined $sequence && $sequence =~ s/>//g;
+#    my ($top,$sequence) = $entry =~ /^>?(.+?)\n+([^>]*)/s
+#	or $self->throw("Can't parse fasta entry");
+
+    my ($id,$fulldesc);
+    if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
+	($id,$fulldesc) = ($1,$2);
+    }
+    
+    if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space 
+                                                   # between > and name \AE
+    defined $sequence && $sequence =~ tr/ \t\n\r//d;	# Remove whitespace
+
+    # for empty sequences we need to know the mol.type
+    $alphabet = $self->alphabet();
+    if(defined $sequence && length($sequence) == 0) {
+	if(! defined($alphabet)) {
+	    # let's default to dna
+	    $alphabet = "dna";
+	}
+    } else {
+	# we don't need it really, so disable
+	# we want to keep this if SeqIO alphabet was set by user
+	# not sure if this could break something
+	#$alphabet = undef;
+    }
+
+    $seq = $self->sequence_factory->create(
+					   -seq         => $sequence,
+					   -id          => $id,
+					   # Ewan's note - I don't think this healthy
+					   # but obviously to taste.
+					   #-primary_id  => $id,
+					   -desc        => $fulldesc,
+					   -alphabet    => $alphabet,
+					   -direct      => 1,
+					   );
+
+
+
+
+    # if there wasn't one before, set the guessed type
+    #unless ( defined $alphabet ) {
+	# don't assume that all our seqs are the same as the first one found
+	#$self->alphabet($seq->alphabet());
+    #}
+    return $seq;
+
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: Writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Array of 1 or more Bio::PrimarySeqI objects
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   my $width = $self->width;
+   foreach my $seq (@seq) {
+		$self->throw("Did not provide a valid Bio::PrimarySeqI object") 
+		  unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+
+		my $str = $seq->seq;
+		my $top;
+
+		# Allow for different ids 
+		my $id_type = $self->preferred_id_type;
+		if( $id_type =~ /^acc/i ) {
+			$top = $seq->accession_number();
+			if( $id_type =~ /vers/i ) {
+				$top .= "." . $seq->version();
+			}
+		} elsif($id_type =~ /^displ/i ) { 
+			$self->warn("No whitespace allowed in FASTA ID [". $seq->display_id. "]")
+			  if defined $seq->display_id && $seq->display_id =~ /\s/;
+			$top = $seq->display_id();
+			$top = '' unless defined $top;
+			$self->warn("No whitespace allowed in FASTA ID [". $top. "]")
+			  if defined $top && $top =~ /\s/;
+		} elsif($id_type =~ /^pri/i ) {
+			$top = $seq->primary_id();
+		}
+
+		if ($seq->can('desc') and my $desc = $seq->desc()) {
+			$desc =~ s/\n//g;
+			$top .= " $desc";
+		}
+		if(defined $str && length($str) > 0) {
+			$str =~ s/(.{1,$width})/$1\n/g;
+		} else {
+			$str = "\n";
+		}
+		$self->_print (">",$top,"\n",$str) or return;
+   }
+
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+
+=head2 width
+
+ Title   : width
+ Usage   : $obj->width($newval)
+ Function: Get/Set the line width for FASTA output
+ Returns : value of width
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub width{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'width'} = $value;
+    }
+    return $self->{'width'} || $WIDTH;
+}
+
+=head2 preferred_id_type
+
+ Title   : preferred_id_type
+ Usage   : $obj->preferred_id_type('accession')
+ Function: Get/Set the preferred type of identifier to use in the ">ID" position
+           for FASTA output.
+ Returns : string, one of values defined in @Bio::SeqIO::fasta::SEQ_ID_TYPES.
+           Default = $Bio::SeqIO::fasta::DEFAULT_SEQ_ID_TYPE ('display').
+ Args    : string when setting. This must be one of values defined in 
+           @Bio::SeqIO::fasta::SEQ_ID_TYPES. Allowable values:
+           accession, accession.version, display, primary
+ Throws  : fatal exception if the supplied id type is not in @SEQ_ID_TYPES.
+
+=cut
+
+sub preferred_id_type {
+    my ($self,$type) = @_;
+    if( defined $type ) {
+	if( ! grep lc($type) eq $_, @SEQ_ID_TYPES) {
+	    $self->throw(-class=>'Bio::Root::BadParameter',
+			 -text=>"Invalid ID type \"$type\". Must be one of: @SEQ_ID_TYPES");
+	}
+	$self->{'_seq_id_type'} = lc($type);
+#	print STDERR "Setting preferred_id_type=$type\n";
+    }
+    $self->{'_seq_id_type'} || $DEFAULT_SEQ_ID_TYPE;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fastq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fastq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/fastq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,286 @@
+# BioPerl module for Bio::SeqIO::fastq
+#
+# Cared for by Tony Cox <avc at sanger.ac.uk>
+#
+# Copyright Tony Cox
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 29, 2001  incept data
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::fastq - fastq sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq and Bio::Seq::Quality
+objects to and from fastq flat file databases.
+
+Fastq is a file format used frequently at the Sanger Centre to bundle
+a fasta sequence and its quality data. A typical fastaq entry takes
+the from:
+
+  @HCDPQ1D0501
+  GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT.....
+  +HCDPQ1D0501
+  !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65.....
+
+Fastq files have sequence and quality data on a single line and the
+quality values are single-byte encoded. To retrieve the decimal values
+for qualities you need to subtract 33 (or Octal 41) from each byte and
+then convert to a '2 digit + 1 space' integer. You can check if 33 is
+the right number because the first byte which is always '!'
+corresponds to a quality value of 0.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Tony Cox
+
+Email: avc at sanger.ac.uk
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::fastq;
+use strict;
+
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::Quality'));      
+  }
+}
+
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+  my( $self ) = @_;
+  my $seq;
+  my $alphabet;
+  local $/ = "\n\@";
+
+  return unless my $entry = $self->_readline;
+
+  if ($entry eq '@')  {  # very first one
+    return unless $entry = $self->_readline;
+  }
+  my ($top,$sequence,$top2,$qualsequence) = $entry =~ /^
+                                                        \@?(.+?)\n
+                                                        ([^\@]*?)\n
+                                                        \+?(.+?)\n
+                                                        (.*)\n
+                                                      /xs
+    or $self->throw("Can't parse fastq entry");
+  my ($id,$fulldesc) = $top =~ /^\s*(\S+)\s*(.*)/
+    or $self->throw("Can't parse fastq header");
+  if ($id eq '') {$id=$fulldesc;}   # FIX incase no space between \@ and name
+  $sequence =~ s/\s//g;             # Remove whitespace
+  $qualsequence =~ s/\s//g;
+  
+  if(length($sequence) != length($qualsequence)){
+    $self->warn("Fastq sequence/quality data length mismatch error\n");
+    $self->warn("Sequence: $top, seq length: ",length($sequence), " Qual length: ", length($qualsequence), " \n");
+    $self->warn("$sequence\n");
+    $self->warn("$qualsequence\n");
+    $self->warn("FROM ENTRY: \n\n$entry\n");
+  }
+
+  my @qual = split('', $qualsequence);
+
+  my $qual;
+  foreach (@qual) {$qual .=  (unpack("C",$_) - 33) ." "};
+  
+
+  # for empty sequences we need to know the mol.type
+  $alphabet = $self->alphabet();
+  if(length($sequence) == 0) {
+      if(! defined($alphabet)) {
+	  # let's default to dna
+	  $alphabet = "dna";
+      }
+  } else {
+      # we don't need it really, so disable
+      $alphabet = undef;
+  }
+
+  # create the Quality object
+  $seq = $self->sequence_factory->create(
+					 -qual         => $qual,
+					 -seq          => $sequence,
+					 -id           => $id,
+					 -primary_id   => $id,
+					 -desc         => $fulldesc,
+					 -alphabet     => $alphabet
+					 );
+  
+  # if there wasn't one before, set the guessed type
+  $self->alphabet($seq->alphabet());
+  
+  return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq::Quality or Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   foreach my $seq (@seq) {
+     my $str = $seq->seq;
+     my $top = $seq->display_id();
+     if ($seq->can('desc') and my $desc = $seq->desc()) {
+	 $desc =~ s/\n//g;
+        $top .= " $desc";
+     }
+     if(length($str) > 0) {
+	    $str =~ s/(.{1,60})/$1\n/g;
+     } else {
+	    $str = "\n";
+     }
+     
+     $self->_print (">",$top,"\n",$str) or return;
+   }
+
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+=head2 write_qual
+
+ Title   : write_qual
+ Usage   : $stream->write_qual(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq::Quality object
+
+
+=cut
+
+sub write_qual {
+   my ($self, at seq) = @_;
+   foreach my $seq (@seq) {
+     unless ($seq->isa("Bio::Seq::Quality")){
+        warn("You can write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n");
+        next;
+     } 
+     my @qual = @{$seq->qual};
+     my $top = $seq->display_id();
+     if ($seq->can('desc') and my $desc = $seq->desc()) {
+	 $desc =~ s/\n//g;
+        $top .= " $desc";
+     }
+     my $qual = "" ;
+     if(scalar(@qual) > 0) {
+        my $max = 60;
+        for (my $q = 0;$q<scalar(@qual);$q++){
+            $qual .= $qual[$q] . " ";
+            if(length($qual) > $max){
+                $qual .= "\n";
+                $max += 60;
+            }
+        }
+     } else {
+	    $qual = "\n";
+     }
+     
+     $self->_print (">",$top,"\n",$qual,"\n") or return;
+   }
+   return 1;
+}
+
+=head2 write_fastq
+
+ Title   : write_fastq
+ Usage   : $stream->write_fastq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq::Quality object
+
+
+=cut
+
+sub write_fastq {
+   my ($self, at seq) = @_;
+   foreach my $seq (@seq) {
+     unless ($seq->isa("Bio::Seq::Quality")){
+        warn("You can write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n");
+        next;
+     } 
+     my $str = $seq->seq;
+     my @qual = @{$seq->qual};
+     my $top = $seq->display_id();
+     if ($seq->can('desc') and my $desc = $seq->desc()) {
+	 $desc =~ s/\n//g;
+        $top .= " $desc";
+     }
+     if(length($str) == 0) {
+	    $str = "\n";
+     }
+     my $qual = "" ;
+     if(scalar(@qual) > 0) {
+        for (my $q = 0;$q<scalar(@qual);$q++){
+            $qual .= chr($qual[$q] + 33);
+        }
+     } else {
+	    $qual = "\n";
+     }
+     
+     $self->_print ("\@",$top,"\n",$str,"\n") or return;
+     $self->_print ("+",$top,"\n",$qual,"\n") or return;
+   }
+   return 1;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/featHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/featHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/featHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,801 @@
+# $Id: featHandler.pm,v 1.15.4.1 2006/10/02 23:10:30 sendu Exp $
+# 
+#
+# Helper module for Bio::SeqIO::game::featHandler
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game::featHandler -- a class for handling feature elements
+
+=head1 SYNOPSIS
+
+This module is not used directly
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::game::featHandler converts game XML E<lt>annotationE<gt>
+elements into flattened Bio::SeqFeature::Generic objects to be added
+to the sequence
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game::featHandler;
+
+use Bio::SeqFeature::Generic;
+use Bio::Location::Split;
+use Data::Dumper;
+use strict;
+
+use vars qw {};                                                                                
+
+use base qw(Bio::SeqIO::game::gameSubs);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $featHandler = Bio::SeqIO::game::featHandler->new($seq, $seq_h, $ann_l)
+ Function: creates an object to deal with sequence features 
+ Returns : a handler object
+ Args    : $seq   -- a Bio::SeqI compliant object
+           $seq_h -- ref. to a hash of other sequences associated 
+                     with the main sequence (proteins, etc)
+           $ann_l -- ref. to a list of annotations
+
+=cut
+
+sub new {
+    my ($caller, $seq, $seq_h, $ann_l  ) = @_;
+    my $class = ref($caller) || $caller;
+
+    my $self = bless ({                                                                             
+        seq           => $seq,                                                                           
+        curr_feats    => [],
+	curr_coords   => [],
+	seq_h         => $seq_h,
+	ann_l         => $ann_l,
+    }, $class);
+
+    return $self;
+}
+
+=head2 add_source
+
+ Title   : add_source
+ Usage   : $featHandler->add_source($seq->length, \%tags);
+ Function: creates a source feature
+ Returns : a Bio::SeqFeature::Generic object 
+ Args    : sequence length and a ref. to a hash of tag/value attributes
+
+=cut
+
+sub add_source {
+    my ($self, $length, $tags) = @_;
+    my $feat = Bio::SeqFeature::Generic->new( -primary => 'source',
+					      -start   => 1,
+					      -end     => $length,
+					    );
+    for ( keys %{$tags} ) {
+	for my $val ( @{$tags->{$_}} ) {
+	    $feat->add_tag_value( $_ => $val );
+	}
+    }
+
+    return $feat;
+}
+
+=head2 has_gene
+
+ Title   : has_gene
+ Usage   : my $gene = $self->_has_gene($gene, $gname, $id)
+ Function: method to get/set the current gene feature
+ Returns : a Bio::SeqFeature::Generic object (if there is a gene)
+ Args    : (optional)
+           $gene  -- an XML element for the annotation
+           $gname -- gene name
+           $id    -- gene ID (not always the same as the name)
+
+=cut
+
+sub has_gene {
+    my ($self, $gene, $gname, $id) = @_;
+    
+    # use name preferentially over id. We can't edit IDs in Apollo
+    # AFAIK, and this will create an orphan CDS for newly created 
+    # transcipts -- I think this needs more work
+    #$id = $gname if $id && $gname;
+
+    unless ( $gene ) {
+	if ( defined $self->{curr_gene} ) {
+	    return $self->{curr_gene};
+	}
+        else {
+	    return 0;
+        }
+    }
+    else {
+        if ( $id && !$self->{curr_ltag} ) {
+	    $self->{curr_ltag} = $id;
+	}
+	if ( $gname && !$self->{curr_gname} ) {
+	    $self->{curr_gname} = $gname;
+	}
+	    
+	my $tags  = {};
+	
+	for my $child ( @{$gene->{Children}} ) {
+	    my $name = $child->{Name};
+
+	    if ( $name eq 'dbxref' ) {
+	        $tags->{dbxref} ||= [];
+		push @{$tags->{dbxref}}, $self->dbxref( $child );
+	    }
+            elsif ( $name !~ /name/ ){
+                $self->complain("Unrecognized element '$name'. I don't " .
+			        "know what to do with $name elements");
+	    }
+	}
+	
+	my $feat = Bio::SeqFeature::Generic->new( 
+	    -primary => 'gene',
+	);
+        my %seen;
+	for ( keys %{$tags} ) {
+	    for my $val ( @{$tags->{$_}} ) {
+		$feat->add_tag_value( $_ => $val ) unless ++$seen{$_.$val} > 1;
+	    }
+	}
+	    
+	$self->{curr_gene} = $feat;
+	return $feat;
+    }	
+}
+
+=head2 _has_CDS
+
+ Title   : _has_CDS
+ Usage   : my $cds = $self->_has_CDS
+ Function: internal getter/setter for CDS features
+ Returns : a Bio::SeqFeature::Generic transcript object (or nothing)
+ Args    : a Bio::SeqFeature::Generic transcript feature
+
+=cut
+
+sub _has_CDS {
+    my ($self, $transcript) = @_;
+
+    if ( !$transcript ) {
+	if ( defined $self->{curr_cds} ) {
+	    return $self->{curr_cds};
+        }
+	else {
+	    return 0;
+	}
+    }
+    else {
+	my $tags = $self->{curr_tags};
+	$self->{curr_cds} = $self->_add_CDS( $transcript, $tags );
+    }
+}
+
+=head2 add_annotation
+
+ Title   : add_annotation
+ Usage   : $featHandler->add_annotation($seq, $type, $id, $tags, $feats)
+ Function: converts a containment hierarchy into an ordered list of flat features
+ Returns : nothing
+ Args    : $seq   -- a Bio::SeqI compliant object
+           $type  -- the annotation type
+           $id    -- the anotation ID
+           $tags  -- ref. to a hash of tag/value attributes
+           $feats -- ref to an array of Bio::SeqFeature::Generic objects
+
+=cut
+
+sub add_annotation {
+    my ($self, $seq, $type, $id, $tags, $feats) = @_;
+
+    # is this a generic feature?
+    unless ( $self->has_gene ) {
+	shift;
+	$self->_add_generic_annotation(@_);
+	return 0;
+    }
+
+    my $feat;
+
+    if ( $type eq 'gene' ) {
+	$feat = $self->has_gene;
+	$feat->add_tag_value( gene => ($self->{curr_gname} || $id) )
+	    unless $feat->has_tag('gene');
+    }
+    else {
+	$feat = Bio::SeqFeature::Generic->new;
+	$feat->primary_tag($type);
+	my $gene = $self->has_gene;
+	$gene->add_tag_value( gene => ($self->{curr_gname} || $id) )
+	    unless $gene->has_tag('gene');
+	$feat->add_tag_value( gene => ($self->{curr_gname} || $id) )
+	    unless $feat->has_tag('gene');;
+    }
+    for ( keys %{$tags} ) {
+	# or else add simple tag/value pairs
+	if ( $_ eq 'name' && $tags->{type}->[0] eq 'gene' ) {
+	    $feat->add_tag_value( gene => $tags->{name}->[0] )
+		unless $feat->has_tag( 'gene' );
+	    delete $tags->{name};
+	}
+	else {
+	    next if $_ eq 'type' && $tags->{$_}->[0] eq 'gene';
+	    next if $_ eq 'gene' && $feat->has_tag( 'gene' );
+	    for my $val ( @{$tags->{$_}} ) {
+		$feat->add_tag_value( $_ => $val );
+	    }
+	}
+    }
+
+
+    $feat->strand( $self->{curr_strand} );
+    $feat->start( $self->{curr_coords}->[0] );
+    $feat->end( $self->{curr_coords}->[1] );
+
+    # create an array of features for the annotation (order matters)
+    my @annotations = ( $feat );
+
+    # add the gene feature if the annotation is not a gene
+    if ( $self->has_gene && $type ne 'gene') {
+	my $gene = $self->has_gene;
+	$gene->strand( $self->{curr_strand} );
+	$gene->start( $self->{curr_coords}->[0] );
+        $gene->end( $self->{curr_coords}->[-1] );
+	push @annotations, $gene;
+	$self->{curr_gene} = '';
+    }
+
+    # add the subfeatures
+    for ( @{$feats} ) {
+	$self->complain("bad feature $_") unless ref($_) =~ /Bio/;
+	push @annotations, $_;
+    }
+    
+    # add the annotation array to the list for this sequence
+    my $seqid = $seq->id;
+    my $list = $self->{ann_l};
+    
+    # make sure the feature_sets appear in ascending order
+    if ( $list->[0] && $annotations[0]->start < $list->[0]->start ) {
+	    unshift @{$list}, @annotations;
+       }
+    else {
+        push @{$list}, @annotations;
+    }
+
+    # garbage collection
+    $self->{curr_gene}   = '';
+    $self->{curr_ltag}   = '';
+    $self->{curr_gname}  = '';
+    $self->{curr_coords} = [];
+    $self->{curr_feats}  = [];
+    $self->{curr_strand} = 0;
+    $self->{ann_seq}     = $seq;    
+    $self->flush;
+}
+
+
+=head2 _add_generic_annotation
+
+ Title   : _add_generic_annotation
+ Usage   : $self->_add_generic_annotation($seq, $type, $id, $tags, $feats)
+ Function: an internal method to handle non-gene annotations
+ Returns : nothing
+ Args    : $seq   -- a Bio::SeqI compliant object
+           $type  -- the annotation type
+           $id    -- the anotation ID
+           $tags  -- ref. to a hash of tag/value attributes
+           $feats -- ref to an array of Bio::SeqFeature::Generic objects
+
+=cut
+
+sub _add_generic_annotation {
+    my ($self, $seq, $type, $id, $tags, $feats) = @_;
+    
+    for ( @$feats ) {
+	$_->primary_tag($type);
+    }
+
+    push @{$self->{ann_l}}, @$feats;
+
+    $self->{curr_coords} = [];
+    $self->{curr_feats}  = [];
+    $self->{curr_strand} = 0;
+    $self->{ann_seq}     = $seq;
+    $self->flush;
+}
+
+
+=head2 feature_set
+
+ Title   : feature_set
+ Usage   : push @feats, $featHandler->feature_set($id, $gname, $set, $anntype);
+ Function: handles <feature_span> hierarchies (usually a transcript)
+ Returns : a list of Bio::SeqFeature::Generic objects
+ Args    : $id      -- ID of the feature set
+           $gname   -- name of the gene
+           $set     -- the <feature_set> object
+           $anntype -- type of the parent annotation
+
+
+=cut
+
+
+sub feature_set {
+    my ($self, $id, $gname, $set, $anntype) = @_;
+    my $stype = $set->{_type}->{Characters};
+    $self->{curr_loc}      = [];
+    $self->{curr_tags}     = {};
+    $self->{curr_subfeats} = [];
+    $self->{curr_strand}   = 0;
+    my @feats = ();
+    my $tags = $self->{curr_tags};
+    my $sname = $set->{_name}->{Characters} ||
+        $set->{Attributes}->{id};
+
+    if ( $set->{Attributes}->{problem} ) {
+        $tags->{problem} = [$set->{Attributes}->{problem}];
+    }
+
+    my @fcount = grep { $_->{Name} eq 'feature_span' } @{$set->{Children}};
+    
+    if ( @fcount == 1 ) {
+	$self->_build_feature_set($set, 1);
+	my ($feat) = @{$self->{curr_subfeats}};
+	$feat->primary_tag('transcript') if $feat->primary_tag eq 'exon';
+	if ( $feat->primary_tag eq 'transcript' ) {
+	    $feat->add_tag_value( gene => ($gname || $id) )
+		unless $feat->has_tag('gene');
+	}
+        
+        my %seen_tag;
+	for my $tag ( keys %{$tags} ) {
+	    for my $val ( @{$tags->{$tag}} ) {
+		$feat->add_tag_value( $tag => $val ) 
+		    if $val && ++$seen_tag{$tag.$val} < 2;
+	    }
+	}
+	@feats = ($feat);
+    }
+    else {
+	$self->{curr_ltag}     = $id;
+	$self->{curr_cds}      = '';
+	$gname = $id if $gname eq 'gene';
+	$self->{curr_gname} = $gname;
+
+	if ( $self->has_gene ) {
+	    unless ( $anntype =~/RNA/i ) {
+		$stype =~ s/transcript/mRNA/;
+	    }
+	}
+	
+	$self->{curr_feat}  = Bio::SeqFeature::Generic->new(
+							    -primary => $stype,
+							    -id      => $id,
+							    );
+	my $feat = $self->{curr_feat};
+	$self->_build_feature_set($set);
+	
+	my $gene = $gname || $self->{curr_ltag};
+	
+	$feat->add_tag_value( gene => $gene )
+	    unless $feat->has_tag('gene');
+
+	# if there is an annotated protein product
+	my $cds = $self->_has_CDS( $feat );
+
+	if ( $cds ) {
+            $feat->primary_tag('mRNA');
+
+	    # we really just want one value here
+	    $cds->remove_tag('standard_name') if $cds->has_tag('standard_name');
+	    $cds->add_tag_value( standard_name => $sname );
+	    $cds->remove_tag('gene') if $cds->has_tag('gene');
+	    $cds->add_tag_value( gene => $gene );
+	    
+            # catch empty protein ids
+            if ( $cds->has_tag('protein_id' ) && !$cds->get_tag_values('protein_id') ) {
+		my $pid = $self->protein_id($cds, $sname);
+		$cds->remove_tag('protein_id');
+		$cds->add_tag_value( protein_id => $pid );
+	    }
+
+	    # make sure other subfeats are tied to the transcript
+            # via a 'standard_name' qualifier and the gene via a 'gene' qualifier
+	    my @subfeats = @{$self->{curr_subfeats}};
+            for my $sf ( @ subfeats ) {
+                $sf->add_tag_value( standard_name => $sname )
+		    unless $sf->has_tag('standard_name');
+                $sf->add_tag_value( gene => $gene )
+		    unless $sf->has_tag('gene');
+            }
+	    
+	    $feat->add_tag_value( standard_name => $sname )
+		unless $feat->has_tag('standard_name');
+	    $feat->add_tag_value( gene => $gene )
+		unless $feat->has_tag('gene');
+
+            # if the mRNA and CDS are the same length, the mRNA is redundant
+            # lose the mRNA, steal its tags and give them to the CDS
+            my %seen;
+	    if ( $feat->length == $cds->length ) {
+		for my $t ( $feat->all_tags ) {
+		    next if $t =~ /gene|standard_name/;
+		    $cds->add_tag_value( $t => $feat->get_tag_values($t) );
+		}
+		undef $feat;
+	    }
+
+	    @feats = sort { $a->start <=> $b->start } ($cds, @subfeats);
+	    unshift @feats, $feat if $feat;
+	}
+	else {
+	    if ( @{$self->{curr_loc}} > 1 ) {
+		my $loc = Bio::Location::Split->new( -splittype => 'JOIN' );
+		
+		# sort the exons in ascending start order
+		my @loc = sort { $a->start <=> $b->start } @{$self->{curr_loc}};
+		
+		# then add them to the transcript location
+		for ( @loc ) {
+		    $loc->add_sub_Location( $_ ) 
+		}
+		$feat->location( $loc );
+	    }
+	    else {
+		$feat->location( $self->{curr_loc}->[0] );
+	    }	
+ 	    
+	    for ( keys %$tags ) {
+		# expunge duplicate gene attributes
+		next if /gene/ && $feat->has_tag('gene');
+		for my $v ( @{$tags->{$_}} ) {
+		    $feat->add_tag_value( $_ => $v );
+		}
+	    }
+
+	    # make sure other subfeats are tied to the transcript
+	    my @subfeats = @{$self->{curr_subfeats}};
+	    for my $sf ( @ subfeats ) {
+		$sf->add_tag_value( standard_name => $sname )
+		    unless $sf->has_tag('standard_name');
+		$sf->add_tag_value( gene => $gene )
+		    unless $sf->has_tag('gene');
+	    }
+
+	    @feats = ( $feat, @subfeats );
+	}
+    }
+    
+    # adjust the maximum extent of the annotated feature 
+    # if req'd (ie the <annotation> element)
+    $self->{curr_coords}->[0] ||= 1000000000000;
+    $self->{curr_coords}->[1] ||= -1000000000000;
+    for ( @feats ) {
+        if ( $self->{curr_coords}->[0] > $_->start ) {
+	    $self->{curr_coords}->[0] = $_->start;
+	}
+	if ( $self->{curr_coords}->[1] < $_->end ) {
+	    $self->{curr_coords}->[1] = $_->end;
+	}
+    }
+    
+    $self->flush( $set );
+
+    return @feats;
+}
+
+
+=head2 _build_feature_set
+
+ Title   : _build_feature_set
+ Usage   : $self->_build_feature_set($set, 1) # 1 flag means retain the exon as a subfeat
+ Function: an internal method to process attributes and subfeats of a feature set
+ Returns : nothing
+ Args    : $set -- a <feature_set> element
+           1    -- optional flag to retain exons as subfeats.  Otherwise, they will
+                   be converted to sublocations of a parent CDS feature
+
+=cut
+
+
+sub _build_feature_set {
+    my ($self, $set, $keep_subfeat) = @_;
+
+    for my $child ( @{$set->{Children}} ) {
+        my $name = $child->{Name};
+
+        # these elements require special handling
+        if ( $name eq 'date' ) {
+            $self->date( $child );
+        }
+        elsif ( $name eq 'comment' ) {
+            $self->comment( $child );
+        }
+        elsif ( $name eq 'evidence' ) {
+            $self->evidence( $child );
+        }
+        elsif ( $name eq 'feature_span' ) {
+            $self->_add_feature_span( $child, $keep_subfeat );
+	}
+        elsif ( $name eq 'property' ) {
+            $self->property( $child );
+        }
+
+        # need to add the db_xref tags to the gene?
+        # otherwise, simple tag/value pairs
+        elsif ( $name =~ /synonym|author|description/) {
+            $self->{curr_tags}->{$name} = [$child->{Characters}];
+        }
+        elsif ( $name !~ /name|type|seq/ ){
+            $self->complain("Unrecognized element '$name'. I don't " .
+                            "know what to do with $name elements");
+
+        }
+    }
+}
+
+=head2 _add_feature_span
+
+ Title   : _add_feature_span
+ Usage   : $self->_add_feature_span($el, 1)
+ Function: an internal method to process <feature_span> elements
+ Returns : nothing
+ Args    : $el -- a <feature_span> element
+           1   -- an optional flag to retain exons as subfeatures
+
+
+=cut
+
+
+sub _add_feature_span {
+    my ($self, $el, $keep_subfeat) = @_;
+
+    my $tags  = $self->{curr_tags};
+    my $feat  = $self->{curr_feat};
+    my $type  = $el->{_type}->{Characters} || $el->{Name};
+    my $id    = $el->{Attributes}->{id} || $el->{_name}->{Characters};
+    my $seqr  = $el->{_seq_relationship};
+    my $start = int $seqr->{_span}->{_start}->{Characters};
+    my $end   = int $seqr->{_span}->{_end}->{Characters};
+    my $stype = $seqr->{Attributes}->{type}; 
+    my $seqid = $seqr->{Attributes}->{seq};
+
+    push @{$self->{seq_l}}, $self->{seq_h}->{$seqid};
+
+    if ( $start > $end ) {
+	$self->{curr_strand} = -1;
+	($start, $end) = ($end, $start);
+    }
+    else {
+	$self->{curr_strand} = 1;
+    }
+
+    # add exons to the transcript
+    if ( $type eq 'exon' ) {
+	my $sl = Bio::Location::Simple->new( -start  => $start,
+                                             -end    => $end,
+                                             -strand => $self->{curr_strand} );
+        push @{$self->{curr_loc}}, $sl;
+    }
+    
+    # apollo and gadfly use different tags for the same thing 
+    if ( $type =~ /start_codon|translate offset/ ) {
+        $self->{curr_tags}->{codon_start} = [$start];
+    }
+    else { 
+	if ( $type eq 'exon' ) {
+	    return unless $keep_subfeat;
+	}
+	push @{$self->{curr_subfeats}}, 
+	Bio::SeqFeature::Generic->new( -start   => $start,
+				       -end     => $end,
+				       -strand  => $self->{curr_strand},
+				       -primary => $type );
+    }
+
+    # identify the translation product     
+    my $tscript = $el->{Attributes}->{produces_seq};
+    if ( $tscript && $tscript ne 'null') {
+	my $subseq = $self->{seq_h}->{$el->{Attributes}->{produces_seq}};
+        $self->{curr_tags}->{product} = [$el->{Attributes}->{produces_seq}];
+	$self->{curr_tags}->{translation} = [$subseq->seq] if $subseq;
+    }      
+
+    $self->flush( $el );
+}
+
+=head2 _add_CDS
+
+ Title   : _add_CDS
+ Usage   : my $cds = $self->_add_CDS($transcript, $tags)
+ Function: an internal method to create a CDS feature from a transcript feature
+ Returns : a Bio::SeqFeature::Generic object
+ Args    : $transcript -- a Bio::SeqFeature::Generic object for a transcript
+           $tags       -- ref. to a hash of tag/value attributes
+
+=cut
+
+sub _add_CDS {
+    my ($self, $feat, $tags) = @_;
+    my $loc  = {};
+    my $single = 0;
+
+    if ( @{$self->{curr_loc}} > 1 ) {        
+        $loc = Bio::Location::Split->new;
+
+        # sort the exons in ascending start order
+	my @loc = sort { $a->start <=> $b->start } @{$self->{curr_loc}};
+
+        # then add them to the location object
+        for ( @loc ) {
+            $loc->add_sub_Location( $_ );
+        }
+    }
+    else {
+        $loc = $self->{curr_loc}->[0];
+        $single++;
+    }
+
+    # create a CDS
+    my @exons = $single ? $loc : $loc->sub_Location(1);
+
+    $feat->location($loc);
+    # try to find a peptide
+    my $seq = $self->{seq_h}->{ $tags->{protein_id}->[0] };
+    $seq  ||= $self->{seq_h}->{ $tags->{product}->[0] } ||
+	      $self->{seq_h}->{ $tags->{gene}->[0] } ||
+	      $self->{seq_h}->{ $tags->{standard_name}->[0] };
+     
+
+    # Can we count on the description format being consistent?
+    # Why is CDS coordinate info saved as description text not 
+    # specified in the DTD?  Anyone have a better idea? Aww,
+    # who am I kidding, I'm the only one who will ever read this!
+    my ($start, $stop, $peptide) = ();
+    if ( $seq ) {
+	$peptide = $seq->display_id;
+	my $desc = $seq->description || '';
+	$desc =~ s/,|\n//g;
+	$desc =~ s/\)(\w)/\) $1/g;
+
+	if ( $desc =~ /cds_boundaries:.+?(\d+)\.\.(\d+)/ ) {
+	    ($start, $stop) = ($1 - $self->{offset}, $2 - $self->{offset});
+	}
+	else {
+	    # OK, I guess the transcript must be the CDS then
+	    $start = $loc->start;
+	    $stop  = $loc->end;
+	}
+    }
+    else {
+        $self->warn("I did not find a protein sequence for " . $feat->display_name);
+    }
+
+    delete $tags->{transcript};
+    
+    # now chop off the UTRs to create a CDS
+    my @exons_to_add = ();
+    #warn scalar(@exons), " exons, $start, $stop\n";
+    for ( @exons ) {
+        my $exon = Bio::Location::Simple->new;
+
+	if ( $_->end < $start || $_->start > $stop ) {
+	    #warn "exon out of range\n";
+	    next;
+	}
+	if ( $_->start < $start && $_->end > $start ) {
+	    #warn "chopping off left UTR\n";
+	    $exon->start( $start );
+	}
+	if ( $_->end > $stop && $_->start < $stop ) {
+	    #warn "chopping off right UTR\n";
+	    $exon->end( $stop );
+	}
+
+	$exon->start( $_->start ) unless $exon->start;
+	$exon->end( $_->end ) unless $exon->end;
+	$exon->strand ( $self->{curr_strand} );
+	push @exons_to_add, $exon;
+    }
+
+    my $cds_loc;
+    if ( @exons_to_add > 1 ) {
+        $cds_loc = Bio::Location::Split->new( -splittype => 'JOIN'  );
+        for ( @exons_to_add ) {
+	    $cds_loc->add_sub_Location( $_ );
+        }
+    }
+    else {
+	$cds_loc = $exons_to_add[0];	
+    }
+
+    my $parent = $self->{curr_gname} || $self->{curr_ltag};
+    
+    # try not to steal too many mRNA attributes for the CDS
+    my $cds_tags = {};
+    for my $k ( keys %$tags ) {
+	if ( $k =~ /product|protein|translation|codon_start/ ) {
+	    $cds_tags->{$k} = $tags->{$k};
+	    delete $tags->{$k};
+	}
+    } 
+
+    for ( keys %$tags ) {
+	for my $v ( @{$tags->{$_}} ) {
+	    $feat->add_tag_value( $_ => $v )
+		unless $feat->has_tag($_);
+	}
+    }    
+
+    if ( $self->{curr_gname} ) {
+        $cds_tags->{gene} = [$self->{curr_gname}];	
+    }
+    
+    my $gene = $self->has_gene;
+    
+    my $cds = Bio::SeqFeature::Generic->new( 
+        -primary  => 'CDS',
+	-location => $cds_loc,
+    );
+
+    $cds_tags->{translation} = [$seq->seq];
+  
+    for ( keys %{$cds_tags} ) {
+	my %seen;
+	for my $val (@{$cds_tags->{$_}}) {
+	    next if ++$seen{$val} > 1;
+	    $cds->add_tag_value( $_ => $val );
+	}        
+    }
+    
+    $cds;
+}
+
+1;
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,222 @@
+# $Id: 
+#
+# BioPerl module for Bio::SeqIO::game::gameHandler
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game::gameHandler -- PerlSAX handler for game-XML
+
+=head1 SYNOPSIS
+
+This modules is not used directly
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::game::gameHandler is the top-level XML handler invoked by PerlSAX
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game::gameHandler;
+
+use Bio::SeqIO::game::seqHandler;
+use strict;
+use vars qw {};
+
+use base qw(Bio::SeqIO::game::gameSubs);
+
+=head2 start_document
+
+ Title   : start_document
+ Function: begin parsing the document
+
+=cut
+
+sub start_document {
+    my ($self, $document) = @_;
+
+    $self->SUPER::start_document($document);
+    
+    $self->{sequences}    = {};
+    $self->{annotations}  = {};
+    $self->{computations} = {};
+    $self->{map_position} = {};
+    $self->{focus}        = {};
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Function: finish parsing the document
+
+=cut
+
+sub end_document {
+    my ($self, $document) = @_;
+    
+    $self->SUPER::end_document($document);
+    
+    return $self;    
+}
+
+=head2 load
+
+ Title   : load
+ Usage   : $seqs = $handler->load
+ Function: start parsing
+ Returns : a ref to a list of sequence objects
+ Args    : an optional flag to supress <computation_analysis> elements (not used yet)
+
+=cut
+
+sub load {
+    my $self = shift;
+    my $suppress_comps = shift;
+    my @seqs = ();
+    
+    for ( 1..$self->{game} ) {
+        my $seq  = $self->{sequences}->{$_} 
+	  or $self->throw("No sequences defined");
+        my $ann  = $self->{annotations}->{$_};
+        my $comp = $self->{computations}->{$_};
+	my $map  = $self->{map_position}->{$_};
+        my $foc  = $self->{focus}->{$_}
+	  or $self->throw("No main sequence defined");
+	my $src  = $self->{has_source};
+	
+	my $bio = Bio::SeqIO::game::seqHandler->new( $seq, $ann, $comp, $map, $src );
+	push @seqs, $bio->convert;
+    }
+    
+    \@seqs;
+}
+
+=head2 s_game
+
+ Title   : s_game
+ Function: begin parsing game element
+
+=cut
+
+sub s_game {
+    my ($self, $e) = @_;
+    my $el = $self->curr_element;
+    $self->{game}++;
+    
+    my $version = $el->{Attributes}->{version};
+    
+    unless ( defined $version ) {
+	$self->complain("No GAME-xml version specified -- guessing v1.2\n");
+        $version = 1.2;
+    }
+    if ( defined($version) && $version == 1.2) {
+        $self->{origin_offset} = 1;
+    } else {
+	$self->{origin_offset} = 0;
+    }
+
+    if (defined($version) && ($version != 1.2)) {
+        $self->complain("GAME version $version is not supported\n",
+		        "I'll try anyway but I may fail!\n");
+    }
+    
+}
+
+=head2 e_game
+
+ Title   : e_game
+ Function: process the game element
+
+=cut
+
+sub e_game {
+    my ($self, $el) = @_;
+    $self->flush( $el );
+}
+
+=head2 e_seq
+
+ Title   : e_seq
+ Function: process the sequence element
+
+=cut
+
+sub e_seq {
+    my ($self, $e) = @_;
+    my $el = $self->curr_element();
+    $self->{sequences}->{$self->{game}} ||= [];
+    my $seqs = $self->{sequences}->{$self->{game}};
+    
+    if ( defined $el->{Attributes}->{focus} ) {
+	$self->{focus}->{$self->{game}} = $el;
+    }
+    push @{$seqs}, $el;
+    
+    $self->flush;
+}
+
+=head2 e_map_position
+
+ Title   : e_map_position
+ Function: process the map_position element
+
+=cut
+
+sub e_map_position {
+    my ($self, $e) = @_;
+    my $el = $self->curr_element;
+    $self->{map_position}->{$self->{game}} = $el;
+}
+
+=head2 e_annotation
+
+ Title   : e_annotation
+ Function: process the annotation
+
+=cut
+
+sub e_annotation {
+    my ($self, $e) = shift;
+    my $el = $self->curr_element;
+    $self->{annotations}->{$self->{game}} ||= [];
+    my $anns = $self->{annotations}->{$self->{game}};
+    push @{$anns}, $el;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameSubs.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameSubs.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameSubs.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,481 @@
+# $Id: gameSubs.pm,v 1.9.4.1 2006/10/02 23:10:30 sendu Exp $
+# some of the following code was pillaged from the CPAN module
+# XML::Handler::Subs
+#
+# Copyright (C) 1999 Ken MacLeod
+# XML::Handler::XMLWriter is free software; you can redistribute it and/or
+# modify it under the same terms as Perl itself.
+
+#
+# BioPerl module for Bio::SeqIO::game::gameSubs
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game::gameSubs -- a base class for game-XML parsing
+
+=head1 SYNOPSIS
+
+Not used directly
+
+=head1 DESCRIPTION
+
+A bag of tricks for game-XML parsing.  The PerlSAX handler methods were
+stolen from Chris Mungall's XML base class, which he stole from Ken MacLeod's
+XML::Handler::Subs
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game::gameSubs;
+use XML::Parser::PerlSAX;
+use UNIVERSAL;
+use strict;
+
+use vars qw {};
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : not used directly
+ Returns : a gameHandler object
+ Args    : an XML filename
+
+=cut
+
+sub new {
+    my $type = shift;
+    my $file = shift || "";
+    my $self = (@_ == 1) ? { %{ (shift) } } : { @_ };
+    if ($file) {
+	$self->{file} = $file;
+    }
+
+    return bless $self, $type;
+}
+
+
+=head2 go
+
+ Title   : go
+ Usage   : not used directly
+ Function: starts PerlSAX XML parsing
+
+=cut
+
+sub go {
+    my $self = shift;
+    XML::Parser::PerlSAX->new->parse(Source => { SystemId => "$self->{file}" },
+				     Handler => $self);
+}
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : not used directly 
+
+=cut
+
+sub start_document {
+    my ($self, $document) = @_;
+
+    $self->{Names} = [];
+    $self->{Nodes} = [];
+}
+
+
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : not used directly
+
+=cut
+
+sub end_document {
+    my ($self, $document) = @_;
+
+    delete $self->{Names};
+    delete $self->{Nodes};
+
+    return();
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   : not used directly
+
+=cut
+
+sub start_element {
+    my ($self, $element) = @_;
+
+    $element->{Children} = [];
+
+    $element->{Name} =~ tr/A-Z/a-z/;
+    push @{$self->{Names}}, $element->{Name};
+    push @{$self->{Nodes}}, $element;
+
+    my $el_name = "s_" . $element->{Name};
+    $el_name =~ s/[^a-zA-Z0-9_]/_/g;
+    if ($ENV{DEBUG_XML_SUBS}) {
+	print STDERR "xml_subs:$el_name\n";
+    }
+    if ($self->can($el_name)) {
+	$self->$el_name($element);
+	return 1;
+    }
+
+    return 0;
+}
+
+=head2 end_element
+
+ Title   : end_element
+ Usage   : not used directly
+
+=cut
+
+sub end_element {
+    my ($self, $element) = @_;
+
+    my $called_sub = 0;
+    
+    $element->{Name} =~ tr/A-Z/a-z/;
+    
+    my $el_name = "e_" . $element->{Name};
+    $el_name =~ s/[^a-zA-Z0-9_]/_/g;
+    
+    my $rval = 0;
+    if ($ENV{DEBUG_XML_SUBS}) {
+	print STDERR "xml_subs:$el_name\n";
+    }
+    if ($self->can($ {el_name})) {
+	$rval = $self->$el_name($element) || 0;
+	$called_sub = 1;
+    }
+    my $curr_element = $self->{Nodes}->[$#{$self->{Nodes}}];
+
+    pop @{$self->{Names}};
+    pop @{$self->{Nodes}};
+
+    if ($rval eq -1 || !$called_sub) {
+	if (@{$self->{Nodes}}) {
+	    my $parent = $self->{Nodes}->[$#{$self->{Nodes}}];
+	    push(@{$parent->{Children}}, $curr_element);
+	    $parent->{"_".$curr_element->{Name}} = $curr_element;
+	}
+    }
+
+    return $called_sub;
+}
+
+
+
+=head2 characters
+
+ Title   : characters
+ Usage   : not used directly
+
+=cut
+
+sub characters {
+    my ($self, $characters) = @_;
+
+    my $str = $self->strip_characters($characters->{Data});
+    my $curr_element = $self->curr_element();
+    $curr_element->{Characters} .= $str;
+    0;
+}
+
+=head2 strip_characters
+
+ Title   : strip_characters
+ Usage   : not used directly
+ Function: cleans up XML element contents
+
+=cut
+
+sub strip_characters {
+    my ($self, $str) = @_;
+    $str =~ s/^[ \n\t]* *//g;
+    $str =~ s/ *[\n\t]*$//g;
+    $str;
+}
+
+=head2 curr_element
+
+ Title   : curr_element
+ Usage   : not used directly
+ Function: returns the currently open element
+
+=cut
+
+sub curr_element {
+    my $self = shift;
+    return $self->{Nodes}->[-1];
+}
+
+=head2 flush
+
+ Title   : flush
+ Usage   : $self->flush($element) # or $element->flush
+ Function: prune a branch from the XML tree
+ Returns : true if successful
+ Args    : an element object (optional)
+
+=cut
+
+sub flush {
+    my $self = shift;
+    my $victim = shift || $self->curr_element;
+    $victim = {};
+    return 1;
+}
+
+# throw a non-fatal warning
+
+=head2 complain
+
+ Title   : complain
+ Usage   : $self->complain("This is terrible; I am not happy")
+ Function: throw a non-fatal warning, formats message for pretty-printing
+ Returns : nothing
+ Args    : a list of strings
+
+=cut
+
+sub complain {
+    my $self = shift;
+    return 0 unless $self->{verbose};
+    my $msg  = join '', @_;
+    $msg =~ s/\n/ /g;
+    my @msg = split /\s+/, $msg;
+    my $new_msg = '';
+    
+    for ( @msg ) {
+        my ($last_chunk) = $new_msg =~ /\n?(.+)$/;
+	my $l = $last_chunk ? length $last_chunk : 0; 
+	if ( (length $_) + $l > 45 ) {
+	    $new_msg .= "\n$_ ";
+	}
+	else {
+	    $new_msg .= $_ . ' ';
+	}
+    }
+    
+    $self->warn($new_msg);
+}
+
+=head2 dbxref
+
+ Title   : dbxref
+ Usage   : $self->db_xref($el, $tags) 
+ Function: an internal method to flatten dbxref elements
+ Returns : the db_xref (eg wormbase:C02D5.1)
+ Args    : an element object (reqd) and a hash ref of tag/values (optional)
+
+=cut
+
+sub dbxref {                                                                                 
+    my ($self, $el, $tags) = @_;
+    $tags ||= $self->{curr_tags};
+    my $db  = $el->{_xref_db}->{Characters};
+    my $acc = $el->{_unique_id}  ||
+              $el->{_db_xref_id} ||                                                                      
+              $el->{_xref_db_id};
+    my $id  = $acc->{Characters} or return 0;                                                          
+    $self->flush( $el );
+    
+    # capture both the database and accession number
+    $id=  $id =~ /^\w+$/ ? "$db:$id" : $id;
+    $tags->{dbxref} ||= [];
+    push @{$tags->{dbxref}}, $id;
+    $id;
+}
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $self->comment($comment_element)
+ Function: a method to flatten comment elements
+ Returns : a string
+ Args    : an comment element (reqd) and a hash ref of tag/values (optional)
+ Note    : The hope here is that we can unflatten structured comments
+           in game-derived annotations happen to make a return trip
+
+=cut
+
+sub comment {
+    my ($self, $el, $tags) = @_;
+        
+    $tags ||= $self->{curr_tags};
+    my $text = $el->{_text}->{Characters};
+    my $pers = $el->{_person}->{Characters};
+    my $date = $el->{_date}->{Characters};
+    my $int  = $el->{_internal}->{Characters};
+    $self->flush( $el );
+    
+    my $comment = "person=$pers; "  if $pers;
+    $comment   .= "date=$date; "    if $date;
+    $comment   .= "internal=$int; " if $int;
+    $comment   .= "text=$text"      if $text;
+    
+    $tags->{comment} ||= [];
+    push @{$tags->{comment}}, $comment;
+    $comment;
+}
+
+=head2 property
+
+ Title   : property
+ Usage   : $self->property($property_element)
+ Function: an internal method to flatten property elements
+ Returns : a hash reference
+ Args    : an property/output element (reqd) and a hash ref of tag/values (optional)
+ Note: This method is aliased to 'output' to handle structurally identical output elements
+
+=cut
+
+*output = \&property;
+sub property {
+    my ($self, $el, $tags) = @_;
+    
+    $tags   ||= $self->{curr_tags};
+    my $key   = $el->{_type}->{Characters};
+    my $value = $el->{_value}->{Characters};
+    $self->flush( $el );    
+    
+    $tags->{$key} ||= [];
+    push @{$tags->{$key}}, $value;
+    $tags;
+}
+
+=head2 evidence
+
+ Title   : evidence
+ Usage   : $self->evidence($evidence_element)
+ Function: a method to flatten evidence elements
+ Returns : a string
+ Args    : an evidence element
+
+=cut
+
+sub evidence {                                                                                     
+    my ($self, $el) = @_;                                                                           
+    my $tags = $self->{curr_tags};                                                                  
+    my $text = $el->{Characters} or return 0;                                                       
+    my $type = $el->{Attributes}->{type};                                                           
+    my $res  = $el->{Attributes}->{result};                                                         
+    $self->flush( $el );
+                                                                                                
+    my $evidence = "type=$type; " if $type;                                                         
+    $evidence   .= "result=$res; " if $res;                                                         
+    $evidence   .= "evidence=$text";
+    
+    $tags->{evidence}||= [];
+    push @{$tags->{evidence}}, $evidence;                                                                
+    $evidence;                                                                                      
+} 
+
+=head2 date
+
+ Title   : date
+ Usage   : $self->date($date_element)
+ Function: a method to flatten date elements
+ Returns : true if successful
+ Args    : a date element
+
+=cut
+
+sub date {
+    my ($self, $el) = @_;
+    my $tags  = $self->{curr_tags};
+    my $date  = $el->{Characters} or return 0;
+    my $stamp = $el->{Attributes}->{timestamp};
+    $self->flush( $el );
+    
+    $tags->{date} ||= [];
+    push @{$tags->{date}}, $date;
+    $tags->{timestamp} ||= [];
+    push @{$tags->{timestamp}}, $stamp;
+    1;
+}
+
+
+=head2 protein_id
+
+ Title   : protein_id
+ Usage   : $pid = $self->protein_id($cds, $standard_name)
+ Function: a method to search for a protein name
+ Returns : a string
+ Args    : the CDS object plus the transcript\'s 'standard_name'
+
+=cut
+
+sub protein_id {
+    my ($self, $cds, $sn) = @_;
+    my $psn;
+    if ( $cds->has_tag('protein_id') ) {
+        ($psn) = $cds->get_tag_values('protein_id');
+    }
+    elsif ( $cds->has_tag('product') ) {
+        ($psn) = $cds->get_tag_values('product');
+        $psn =~ s/.+?(\S+)$/$1/;
+    }
+    elsif ( $cds->has_tag('gene') ) {
+        ($psn) = $cds->get_tag_values('gene');
+    }
+    elsif ( $sn ) {
+	$psn = $sn;
+    }
+    else {
+        $self->complain("Could not find an ID for the protein");
+        return '';
+    }
+
+    $psn =~ s/-R/-P/;
+    return $psn;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameWriter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameWriter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/gameWriter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1272 @@
+# $Id: gameWriter.pm,v 1.13.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::game::gameWriter
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game::gameWriter -- a class for writing game-XML
+
+=head1 SYNOPSIS
+
+  use Bio::SeqIO;
+
+  my $in  = Bio::SeqIO->new( -format => 'genbank',
+                             -file => 'myfile.gbk' );
+  my $out = Bio::SeqIO->new( -format => 'game',
+                             -file => 'myfile.xml' );
+
+  # get a sequence object
+  my $seq = $in->next_seq;
+
+  #write it in GAME format
+  $out->write_seq($seq);
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::game::gameWriter writes GAME-XML (v. 1.2) that is readable
+by Apollo.  It is best not used directly.  It is accessed via
+Bio::SeqIO.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game::gameWriter;
+
+use strict;
+use IO::String;
+use XML::Writer;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Tools::Unflattener;
+
+use base qw(Bio::SeqIO::game::gameSubs);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $writer = Bio::SeqIO::game::gameWriter->new($seq);
+ Function: constructor method for gameWriter 
+ Returns : a game writer object 
+ Args    : a Bio::SeqI implementing object
+           optionally, an argument to set map_position to on.
+           ( map => 1 ).  This will create a map_position elemant
+           that will cause the feature coordinates to be remapped to
+           a parent seqeunce.  A sequence name in the format seq:xxx-xxx
+           is expected to determine the offset for the map_position.
+           The default behavior is to have features mapped relative to 
+           the sequence contained in the GAME-XML file
+
+=cut
+
+sub new {
+    my ($caller, $seq, %arg) = @_;
+    my $class = ref($caller) || $caller;
+    my $self = bless ( { seq => $seq }, $class );
+
+    # make a <map_position> element only if requested 
+    $self->{map} = 1 if $arg{map};
+    $self->{anon_set_counters} = {}; #counters for numbering anonymous result and feature sets
+    return $self;
+}
+
+=head2 write_to_game
+
+ Title   : write_to_game
+ Usage   : $writer->write_to_game
+ Function: writes the sequence object to game-XML 
+ Returns : xml as a multiline string
+ Args    : none
+
+=cut
+
+sub write_to_game {
+    my $self   = shift;
+    my $seq    = $self->{seq};
+    my @feats  = $seq->remove_SeqFeatures;
+
+    # intercept nested features 
+    my @nested_feats = grep { $_->get_SeqFeatures } @feats;
+    @feats = grep { !$_->get_SeqFeatures } @feats;
+    map { $seq->add_SeqFeature($_) } @feats;
+
+# NB -- Maybe this belongs in Bio::SeqFeatute::Tools::Unflattener
+
+#    # intercept non-coding RNAs and transposons with contained genes
+#    # GAME-XML has these features as top level annotations which contain
+#    # gene elements
+#    my @gene_containers = ();
+     
+#    for ( @feats ) {
+#	if ( $_->primary_tag =~ /[^m]RNA|repeat_region|transpos/ && 
+#	     $_->has_tag('gene') ) {
+#	    my @genes = $_->get_tag_values('gene');
+#	    my ($min, $max) = (10000000000000,-10000000000000);
+#	    for my $g ( @genes ) {
+#		my $gene;
+#		for my $item ( @feats ) {
+#		    next unless $item->primary_tag eq 'gene';
+#		    my ($n) = $item->get_tag_values('gene');
+#		    next unless $n =~ /$g/;
+#		    $gene = $item;
+#		    last;
+#		}
+#		next unless $gene && ref $gene;
+#		$max = $gene->end if $gene->end > $max;
+#		$min = $gene->start if $gene->start < $min;
+#	    }
+#	    
+#	    push @gene_containers, $_ if $_->length >= ($max - $min);
+#	}
+#	else {
+#	    $seq->add_SeqFeature($_);
+#	}
+#    }
+	
+    # unflatten 
+    my $uf = Bio::SeqFeature::Tools::Unflattener->new;
+    $uf->unflatten_seq( -seq => $seq, use_magic => 1 );
+    
+    # rearrange snRNA and transposon hierarchies
+    # $self->_rearrange_hierarchies($seq, @gene_containers);
+
+    # add back nested feats
+    $seq->add_SeqFeature( @nested_feats  );
+    
+    my $atts  = {};
+    my $xml = '';
+    
+    # write the XML to a string
+    my $xml_handle = IO::String->new($xml);
+    my $writer = XML::Writer->new(OUTPUT      => $xml_handle,
+				  DATA_MODE   => 1,
+				  DATA_INDENT => 2,
+				  NEWLINE     => 1
+				  );
+    $self->{writer} = $writer;
+#    $writer->xmlDecl("UTF-8");
+#    $writer->doctype("game", 'game', "http://www.fruitfly.org/annot/gamexml.dtd.txt");
+    $writer->comment("GAME-XML generated by Bio::SeqIO::game::gameWriter");
+    $writer->comment("Created " . localtime);
+    $writer->comment('Questions: mckays at cshl.edu');
+    $writer->startTag('game', version => 1.2);
+    
+    my @sources = grep { $_->primary_tag =~ /source|origin|region/i } $seq->get_SeqFeatures;
+    
+    for my $source ( @sources ) {
+	next unless $source->length == $seq->length;
+	for ( qw{ name description db_xref organism md5checksum } ) {
+	    if ( $source->has_tag($_) ) {
+		$self->{has_organism} = 1 if /organism/;
+		($atts->{$_}) = $source->get_tag_values($_);
+	    }
+	}
+    }
+    
+
+    #set a name in the attributes if none was given
+    $atts->{name} ||= $seq->accession_number ne 'unknown'
+      ? $seq->accession_number : $seq->display_name;
+
+    $self->_seq($seq, $atts);
+    
+    # make a map_position element if req'd
+    if ( $self->{map} ) {
+	my $seqtype;
+	if ( $atts->{mol_type} || $seq->alphabet ) {
+	    $seqtype = $atts->{mol_type} || $seq->alphabet;
+	}
+	else {
+	    $seqtype = 'unknown';
+	}    
+	
+	$writer->startTag(
+			  'map_position', 
+			  seq => $atts->{name},
+			  type => $seqtype
+			  );
+	
+	my ($arm, $start, undef, $end) = $atts->{name} =~ /(\S+):(-?\d+)(\.\.|-)(-?\d+)/;
+	$self->_element('arm', $arm) if $arm;
+	$self->_span($start, $end);
+	$writer->endTag('map_position');
+    }
+
+    for ( $seq->top_SeqFeatures ) {
+
+      if($_->isa('Bio::SeqFeature::Computation')) {
+	$self->_comp_analysis($_);
+      }
+      else {
+        # if the feature has subfeatures, we will assume it is a gene
+	# (hope this is safe!)
+	if ( $_->get_SeqFeatures ) {
+	  $self->_write_gene($_);
+	} else {
+	  # non-gene stuff only
+	  next if $_->primary_tag =~ /CDS|mRNA|exon|UTR/;
+	  $self->_write_feature($_);
+	}
+      }
+    }    
+    
+    $writer->endTag('game');
+    $writer->end;
+    $xml;
+}
+
+=head2 _rearrange_hierarchies
+
+ Title   : _rearrange_hierarchies
+ Usage   : $self->_rearrange_hierarchies($seq)
+ Function: internal method to rearrange gene containment hierarchies
+           so that snRNA or transposon features contain their genes
+           rather than the other way around
+ Returns : nothing
+ Args    : a Bio::RichSeq object
+ Note    : Not currently used, may be removed
+
+=cut
+
+sub _rearrange_hierarchies { #renamed to not conflict with Bio::Root::_rearrange
+    my ($self, $seq, @containers) = @_;
+    my @feats   = $seq->remove_SeqFeatures;
+    my @genes   = grep { $_->primary_tag eq 'gene' } @feats;
+    my @addback = grep { $_->primary_tag ne 'gene' } @feats;
+    
+    for ( @containers ) {
+	my @has_genes = $_->get_tag_values('gene');
+	for my $has_gene ( @has_genes ) {
+	    for my $gene ( @genes ) {
+		next unless $gene;
+		my ($gname) = $gene->get_tag_values('gene');
+		if ( $gname eq $has_gene ) {
+		    $_->add_SeqFeature($gene);
+		    undef $gene;
+		}
+	    }
+	}
+    }    
+   
+    push @addback, (@containers, grep { defined $_ } @genes );
+    $seq->add_SeqFeature(@addback);
+}
+
+
+=head2 _write_feature
+
+ Title   : _write_feature
+ Usage   : $seld->_write_feature($feat, 1)
+ Function: internal method for writing generic features as <annotation> elements
+ Returns : nothing
+ Args    : a Bio::SeqFeature::Generic object and an optional flag to write a
+           bare feature set with no annotation wrapper
+
+=cut
+
+sub _write_feature {
+    my ($self, $feat, $bare) = @_;
+    my $writer = $self->{writer};
+    my $id;
+
+    for ( 'standard_name', $feat->primary_tag, 'ID' ) {
+	$id = $self->_find_name($feat, $_ );
+	last if $id;
+    } 
+
+    $id ||= $feat->primary_tag . '_' . ++$self->{$feat->primary_tag}->{id};
+
+    unless ( $bare ) {
+	$writer->startTag('annotation', id => $id); 
+	$self->_element('name', $id);
+	$self->_element('type', $feat->primary_tag);
+    }
+
+    $writer->startTag('feature_set', id => $id);
+    $self->_element('name', $id);
+    $self->_element('type', $feat->primary_tag);
+    $self->_render_tags( $feat,
+			 \&_render_date_tags,
+			 \&_render_comment_tags,
+			 \&_render_tags_as_properties
+		       );
+    $self->_feature_span($id, $feat);
+    $writer->endTag('feature_set');
+    $writer->endTag('annotation') unless $bare;
+}
+
+=head2 _write_gene
+
+ Title   : _write_gene
+ Usage   : $self->_write_gene($feature)
+ Function: internal method for rendering gene containment hierarchies into 
+           a nested <annotation> element 
+ Returns : nothing
+ Args    : a nested Bio::SeqFeature::Generic gene feature
+ Note    : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected.  If other gene 
+           subfeatures occur as level one subfeatures (same level as mRNA subfeats) 
+           an attempt will be made to link them to transcripts via the 'standard_name'
+           qualifier
+
+=cut
+
+sub _write_gene {
+    my ($self, $feat) = @_;
+    my $writer = $self->{writer};
+    my $str = $feat->strand;
+    my $id = $self->_find_name($feat, 'standard_name')
+          || $self->_find_name($feat, 'gene')
+	  || $self->_find_name($feat, $feat->primary_tag)
+	  || $self->_find_name($feat, 'locus_tag') 
+	  || $self->_find_name($feat, 'symbol')
+          || $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
+Could not find a gene/feature ID, feature must have a primary tag or a tag
+with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
+EOM
+    my $gid = $self->_find_name($feat, 'gene') || $id;
+
+    $writer->startTag('annotation', id => $id);
+    $self->_element('name', $gid);
+    $self->_element('type', $feat->primary_tag);
+    $self->_render_tags( $feat,
+			 \&_render_date_tags,
+			 \&_render_dbxref_tags,
+			 \&_render_comment_tags,
+			 \&_render_tags_as_properties,
+		       );
+    
+    my @genes;
+    
+    if ( $feat->primary_tag eq 'gene' ) {
+	@genes = ($feat);
+    }
+    else {
+	# we are in a gene container; gene must then be one level down
+	@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
+    }
+
+    for my $g ( @genes ) {
+	my $id ||= $self->_find_name($g, 'standard_name')
+               || $self->_find_name($g, 'gene') 
+	       || $self->_find_name($feat, 'locus_tag')
+               || $self->_find_name($feat, 'symbol')
+               || $self->throw("Could not find a gene ID");
+	my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
+
+	$writer->startTag('gene', association => 'IS');
+        $self->_element('name', $gid);
+        $writer->endTag('gene');
+
+        my $proteins;
+	my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
+	my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
+	my @variants = ('A' .. 'Z');
+
+	for my $mRNA (@mRNAs) {
+	    my ($sn, @units);
+            # if the mRNA is a generic transcript, it must be a non-spliced RNA gene
+            # Make a synthetic exon to help build a hierarchy.  We have to assume that
+            # the location is not segmented (otherwise it should be a mRNA)
+	    if ( $mRNA->primary_tag eq 'transcript') {
+		my $exon = Bio::SeqFeature::Generic->new ( -primary => 'exon' );
+		$exon->location($mRNA->location);
+		$mRNA->add_SeqFeature($exon);
+	    }
+
+            # no subfeats? Huh? revert to generic feature
+	    unless ( $mRNA->get_SeqFeatures ) {
+		$self->_write_feature($mRNA, 1); # 1 flag writes the bare feature
+                                                 # with no annotation wrapper
+		next;
+	    }
+
+	    my $name = $self->_find_name($mRNA, $mRNA->primary_tag) 
+                     || $self->_find_name($mRNA, 'standard_name');
+
+	    my %attributes;
+            my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;
+
+	    # make sure we have the right CDS for alternatively spliced genes
+	    # This is meant to deal with sequences from flattened game annotations, 
+	    # where both the mRNA and CDS have split locations
+	    if ( $cds && @mRNAs > 1 && $name ) {
+		$cds = $self->_check_cds($cds, $name);
+	    }
+	    elsif ( $cds && @mRNAs == 1 ) {
+		# The mRNA/CDS pairing must be right. Get the transcript name from the CDS
+		if ( $cds->has_tag('standard_name') ) {
+		    ($name) = $cds->get_tag_values('standard_name');
+                }
+	    }
+	    
+	    if ( !$name ) {
+		# assign a name to the transcript if it has no 'standard_name' binder
+		$name = $id . '-R' . (shift @variants);
+	    }
+
+            my $pname;
+
+	    if ( $cds ) {
+		($sn) = $cds->get_tag_values('standard_name')
+		    if $cds->has_tag('standard_name');
+		($sn) ||= $cds->get_tag_values('mRNA')
+		   if $cds->has_tag('mRNA');
+
+		# the protein needs a name
+		my $psn = $self->protein_id($cds, $sn);
+                $self->{curr_pname} = $psn;
+
+		# the mRNA need to know the name of its protein
+		unless ( $feat->has_tag('protein_id') ) {
+		    $feat->add_tag_value('protein_id', $psn);
+		}
+
+                # define the translation offset
+		my ($c_start, $c_end);
+		if ( $cds->has_tag('codon_start') ){
+		    ($c_start) = $cds->get_tag_values('codon_start');
+		    $cds->remove_tag('codon_start');
+		}
+		else {
+		    $c_start = 1;
+		}
+		my $cs  = Bio::SeqFeature::Generic->new;
+		if ( $c_start == 1 ) {
+		    $c_start = $cds->strand > 0 ? $cds->start : $cds->end;
+		}
+		if ( $cds->strand < 1 ) {
+		    $c_end = $c_start;
+		    $c_start = $c_start - 2;
+		}
+		else {
+		    $c_end = $c_start + 2;
+		}
+		$cs->start($c_start);
+		$cs->end($c_end);
+		$cs->strand($cds->strand);
+		$cs->primary_tag('start_codon');
+		$cs->add_tag_value( 'standard_name' => $name );
+		push @units, $cs;
+
+
+		if ( $cds->has_tag('problem') ) {
+		    my ($val) = $cds->get_tag_values('problem');
+		    $cds->remove_tag('problem');
+		    $attributes{problem} = $val;
+		}
+		
+		my ($aa) = $cds->get_tag_values('translation')
+		    if $cds->has_tag('translation');
+		
+		if ( $aa && $psn ) {
+		    $cds->remove_tag('translation');
+		    my %add_seq = ();
+		    $add_seq{residues} = $aa;
+		    $add_seq{header} = ['seq',
+					id     => $psn,
+					length => length $aa,
+					type   => 'aa' ];
+		    
+		    if ( $cds->has_tag('product_desc') ) {
+			($add_seq{desc}) = $cds->get_tag_values('product_desc');
+			$cds->remove_tag('product_desc');
+		    }
+		    
+		    unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
+			my $start = $cds->start;
+			my $end   = $cds->end;
+			my $str   = $cds->strand;
+			my $acc   = $self->{seq}->accession || $self->{seq}->display_id;
+			$str = $str < 0 ? '[-]' : '';
+			$add_seq{desc}  = "translation from_gene[$gid] " .
+			    "cds_boundaries:(" . $acc . 
+			    ":$start..$end$str) transcript_info:[$name]";
+		    }
+		    $self->{add_seqs} ||= [];
+		    push @{$self->{add_seqs}}, \%add_seq;
+		}
+	    }
+
+	    
+	    $writer->startTag('feature_set', id => $name);
+	    $self->_element('name', $name);
+	    $self->_element('type', 'transcript');
+	    $self->_render_tags($_,
+				\&_render_date_tags,
+				\&_render_comment_tags,
+				\&_render_tags_as_properties,
+			       ) for ( $mRNA, ($cds) || () );
+	     
+	    # any UTR's, etc associated with this transcript?
+	    for my $thing ( @other_stuff ) {
+		if ( $thing->has_tag('standard_name') ) {
+		    my ($v)  = $thing->get_tag_values('standard_name');
+		    if ( $v eq $sn ) {
+			push @units, $thing;
+		    }
+		}
+	    }
+	    
+	    # add the exons
+	    push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
+	    @units = sort { $a->start <=> $b->start } @units;
+
+	    my $count  = 0;
+	    
+	    if ( $str < 0 ) {
+		@units = reverse @units;
+	    }
+            
+	    for my $unit ( @units ) {
+		if ( $unit->primary_tag eq 'exon' ) {
+		    my $ename = $id;
+		    $ename .= ':' . ++$count;
+		    $self->_feature_span($ename, $unit);
+		}
+		elsif ( $unit->primary_tag eq 'start_codon' ) {
+		    $self->_feature_span(($sn || $gid), $unit, $self->{curr_pname});
+		}
+		else {
+		    my $uname = $unit->primary_tag . ":$id";
+		    $self->_feature_span($uname, $unit);
+		}
+	    }
+	    $self->{curr_pname} = '';
+	    $writer->endTag('feature_set');
+	}
+	
+	$self->{other_stuff} = \@other_stuff;
+    }    
+    
+    $writer->endTag('annotation');
+
+    # add the protein sequences
+    for ( @{$self->{add_seqs}} ) {
+	my %h = %$_;
+	$writer->startTag(@{$h{header}});
+	my @desc = split /\s+/, $h{desc};
+	my $desc = '';
+	for my $word (@desc) {
+	    my ($lastline) = $desc =~ /.*^(.+)$/sm;
+	    $lastline ||= '';
+	    $desc .= length $lastline < 50 ? " $word " : "\n      $word ";
+	}
+        $self->_element('description', "\n     $desc\n    ");
+
+	my $aa = $h{residues};
+	$aa =~ s/(\w{60})/$1\n      /g;
+	$aa =~ s/\n\s+$//m;
+	$aa = "\n      " . $aa . "\n    ";
+	$self->_element('residues', $aa);
+	$writer->endTag('seq');
+	$self->{add_seqs} = [];
+    }
+    
+    # Is there anything else associated with the gene?  We have to write other
+    # features as stand-alone annotations or apollo will assume they are
+    # transcripts
+    for my $thing ( @{$self->{other_stuff}} ) {
+	next if $thing->has_tag('standard_name');
+	$self->_write_feature($thing);
+    }
+    $self->{other_stuff} = [];
+}
+
+
+=head2 _check_cds
+
+ Title   : _check_cds
+ Usage   : $self->_check_cds($cds, $name)
+ Function: internal method to check if the CDS associated with an mRNA is
+           the correct alternative splice variant
+ Returns : a Bio::SeqFeature::Generic CDS object
+ Args    : the CDS object plus the transcript\'s 'standard_name'
+ Note    : this method only works if alternatively spliced transcripts are bound
+           together by a 'standard_name' or 'mRNA' qualifier.  If none is present, 
+           we will hope that the exons were derived from a segmented RNA or a CDS 
+           with no associated mRNA feature.  Neither of these two cases would be 
+           confounded by alternative splice variants.
+
+=cut
+
+
+sub _check_cds {
+    my ($self, $cds, $name) = @_;
+    my $cname = $self->_find_name( $cds, 'standard_name' )
+             || $self->_find_name( $cds, 'mRNA');
+    
+    if ( $cname ) {
+	if ( $cname eq $name ) {
+	    return $cds;
+	}
+	else {
+	    my @CDS = grep { $_->primary_tag eq 'CDS' } @{$self->{feats}};
+	    for ( @CDS ) {
+		my ($sname) = $_->_find_name( $_, 'standard_name' )
+		           || $_->_find_name( $_, $_->primary_tag );
+		return $_ if $sname eq $name;
+	    }
+	    return '';
+	}
+    }
+    else {
+	return $cds;
+    }
+
+}
+
+=head2 _comp_analysis
+
+  Usage:
+  Desc :
+  Ret  :
+  Args :
+  Side Effects:
+  Example:
+
+=cut
+
+sub _comp_analysis {
+  my ($self, $feat) = @_;
+  my $writer = $self->{writer};
+
+  $writer->startTag('computational_analysis');
+  $self->_element('program', $feat->program_name || 'unknown program');
+  $self->_element('database', $feat->database_name) if $feat->database_name;
+  $self->_element('version', $feat->program_version) if $feat->program_version;
+  $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
+  $self->_render_tags($feat,
+		      \&_render_date_tags,
+		      \&_render_tags_as_properties,
+		     );
+  $self->_comp_result($feat);
+  $writer->endTag('computational_analysis');
+}
+
+=head2 _comp_result
+
+  Usage:
+  Desc : recursively render a feature and its subfeatures as
+         <result_set> and <result_span> elements
+  Ret  : nothing meaningful
+  Args : a feature
+
+=cut
+
+
+sub _comp_result {
+  my ($self,$feat) = @_;
+
+  #check that all our subfeatures have the same strand
+  
+
+  #write result sets for things that have subfeatures, or things
+  #that have some tags
+  if( my @subfeats = $feat->get_SeqFeatures or $feat->get_all_tags ) {
+    my $writer = $self->{writer};
+    $writer->startTag('result_set',
+		      ($feat->can('computation_id') && defined($feat->computation_id))
+		        ? (id => $feat->computation_id) : ()
+		     );
+    my $fakename = $feat->primary_tag || 'no_name';
+    $self->_element('name', $feat->display_name || ($fakename).'_'.++$self->{anon_result_set_counters}{$fakename} );
+    $self->_seq_relationship('query', $feat);
+    $self->_render_tags($feat,
+			\&_render_output_tags
+		       );
+    for (@subfeats) { #render the subfeats, if any
+      $self->_comp_result($_);
+    }
+    $self->_comp_result_span($feat); #also have a span to hold this info
+    $writer->endTag('result_set');
+  } else {
+    #just write result spans for simple things
+    $self->_comp_result_span($feat);
+  }
+}
+
+=head2 _comp_result_span
+
+  Usage: _comp_result_span('foo12',$feature);
+  Desc : write GAME XML for a Bio::SeqFeature::Computation feature
+         that has no subfeatures
+  Ret  : nothing meaningful
+  Args : name for this span (some kind of identifier),
+         SeqFeature object to put into this span
+  Side Effects:
+  Example:
+
+=cut
+
+sub _comp_result_span {
+
+  my ($self, $feat) = @_;
+  my $writer = $self->{writer};
+
+  $writer->startTag('result_span',
+		    ($feat->can('computation_id') && defined($feat->computation_id) ? (id => $feat->computation_id) : ())
+		   );
+  $self->_element('name', $feat->display_name) if $feat->display_name;
+  $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
+  my $has_score = $feat->can('has_score') ? $feat->has_score : defined($feat->score);
+  $self->_element('score', $feat->score) if $has_score;
+  $self->_render_tags($feat,
+		      \&_render_output_tags
+		     );
+  $self->_seq_relationship('query', $feat);
+  $self->_render_tags($feat,
+		      \&_render_target_tags,
+		     );
+  $writer->endTag('result_span');
+}
+
+=head2 _render_tags
+
+  Usage:
+  Desc :
+  Ret  :
+  Args :
+  Side Effects:
+  Example:
+
+=cut
+
+sub _render_tags {
+  my ($self,$feat, at render_funcs) = @_;
+
+  my @tagnames = $feat->get_all_tags;
+
+  #do a chain-of-responsibility down the allowed
+  #tag handlers types for the context in which this is
+  #called
+  foreach my $func (@render_funcs) {
+    @tagnames = $self->$func($feat, at tagnames);
+  }
+}
+
+=head2 _render_output_tags
+
+  Usage:
+  Desc : print out <output> elements, with contents
+         taken from the SeqFeature::Computation's 'output' tag
+  Ret  : array of tag names this did not render
+  Args : feature object, list of tag names to maybe render
+
+  In game xml, only <result_span> and <result_set> elements can
+  have <output> elements.
+
+=cut
+
+sub _render_output_tags {
+  my ($self, $feat, @tagnames) = @_;
+  my $writer = $self->{writer};
+  my @passed_up;
+
+  for my $tag (@tagnames) {
+    if(lc($tag) eq 'output') {
+      my @outputs = $feat->get_tag_values($tag);
+      while(my($type,$val) = splice @outputs,0,2) {
+	$writer->startTag('output');
+	$self->_element('type',$type);
+	$self->_element('value',$val);
+	$writer->endTag('output');
+      }
+    }
+    else {
+      push @passed_up,$tag;
+    }
+  }
+  return @passed_up;
+}
+
+=head2 _render_tags_as_properties
+
+  Usage:
+  Desc :
+  Ret  : empty array
+  Args : feature object, array of tag names
+  Side Effects:
+  Example:
+
+  In game xml, <annotation>, <computational_analysis>,
+  and <feature_set> elements can have properties.
+
+=cut
+
+sub _render_tags_as_properties {
+  my ($self,$feat, at tagnames) = @_;
+
+  foreach my $tag (@tagnames) {
+    if( $tag ne $feat->primary_tag ) {
+      $self->_property($tag,$_) for $feat->get_tag_values($tag);
+    }
+  }
+  return ();
+}
+
+=head2 _render_comment_tags
+
+  Usage:
+  Desc :
+  Ret  : names of tags that were not comment tags
+  Args : feature object, tag names available for us to render
+  Side Effects: writes XML
+  Example:
+
+  In game xml, <annotation> and <feature_set> elements can
+  have comments.
+
+=cut
+
+sub _render_comment_tags {
+  my ($self,$feat, at tagnames) = @_;
+  my $writer = $self->{writer};
+  my @passed_up;
+  for my $tag ( @tagnames ) {
+    if( lc($tag) eq 'comment' ) {
+      for my $val ($feat->get_tag_values($tag)) {
+	if ( $val =~ /=.+?;.+=/ ) {
+	  $self->_unflatten_attribute('comment', $val);
+	} else {
+	  $writer->startTag('comment');
+	  $self->_element('text', $val);
+	  $writer->endTag('comment');
+	}
+      }
+    } else {
+      push @passed_up,$tag;
+    }
+  }
+  return @passed_up;
+}
+
+=head2 _render_date_tags
+
+  Usage:
+  Desc :
+  Ret  : names of tags that were not date tags
+  Args : feature, list of tag names available for us to render
+  Side Effects: writes XML for <date> elements
+  Example:
+
+  In game xml, <annotation>, <computational_analysis>,
+  <transaction>, <comment>, and <feature_set> elements
+  can have <date>s.
+
+=cut
+
+sub _render_date_tags {
+  my ($self,$feat, at tagnames) = @_;
+  my @passed_up;
+  my $date;
+  my %timestamp;
+  foreach my $tag (@tagnames) {
+    if ( lc($tag) eq 'date' ) {
+      ($date) = $feat->get_tag_values($tag);
+    } elsif ( lc($tag) eq 'timestamp' ) {
+      ($timestamp{'timestamp'}) = $feat->get_tag_values($tag);
+      #ignore timestamps, they are folded in with date elem above
+    } else {
+      push @passed_up,$tag;
+    }
+  }
+  $self->_element('date', $date, \%timestamp) if defined($date);
+  return @passed_up;
+}
+
+=head2 _render_dbxref_tags
+
+  Desc : look for xref tags and render them if they are there
+  Ret  : tag names that we didn't render
+  Args : feature object, list of tag names to render
+  Side Effects: writes a <dbxref> element if a tag with name
+                matching /xref$/i is present
+
+
+  In game xml, <annotation> and <seq> elements can have dbxrefs.
+
+=cut
+
+#TODO: can't sequences also have database xrefs?  how to find those?
+sub _render_dbxref_tags {
+  my ($self, $feat, @tagnames) = @_;
+  my @passed_up;
+  for my $tag ( @tagnames ) {                           #look through all the tags
+    if( $tag =~ /xref$/i ) {                            #if they are xref tags
+      my $writer = $self->{writer};
+      for my $val ( $feat->get_all_tag_values($tag) ) { #get all their values
+	if( my ($db,$dbid) = $val =~ /(\S+):(\S+)/ ) {  #and render them as xrefs
+	  $writer->startTag('dbxref');
+	  $self->_element('xref_db', $db);
+	  $dbid = $val if $db =~ /^[A-Z]O$/; # -> ontology, like GO
+	  $self->_element('db_xref_id', $dbid);
+	  $writer->endTag('dbxref');
+	}
+      }
+    } else {
+      push @passed_up,$tag;
+    }
+  }
+  return @passed_up;
+}
+
+
+=head2 _render_target_tags
+
+  Usage:
+  Desc : process any 'Target' tags that would indicate a sequence alignment subject
+  Ret  : array of tag names that we didn't render
+  Args : feature object
+  Side Effects: writes a <seq_relationship> of type 'subject' if it finds
+                any properly formed tags named 'Target'
+  Example:
+
+  In game xml, <result_span>, <feature_span>, and <result_set> can have
+  <seq_relationship>s.  <result_set> can only have one, a 'query' relation.
+
+=cut
+
+sub _render_target_tags {
+  my ($self,$feat, at tagnames) = @_;
+  my @passed_up;
+  foreach my $tag (@tagnames) {
+    if($tag eq 'Target' && (my @alignment = $feat->get_tag_values('Target')) >= 3) {
+      $self->_seq_relationship('subject',
+			       Bio::Location::Simple->new( -start => $alignment[1],
+							   -end   => $alignment[2],
+							 ),
+			       $alignment[0],
+			       $alignment[3],
+			      );
+    } else {
+      push @passed_up, $tag;
+    }
+  }
+  return @passed_up;
+}
+
+
+=head2 _property
+
+ Title   : _property
+ Usage   : $self->_property($tag => $value); 
+ Function: an internal method to write property XML elements
+ Returns : nothing
+ Args    : a tag/value pair
+
+=cut
+
+sub _property {
+    my ($self, $tag, $val) = @_;
+    my $writer = $self->{writer};
+
+    if ( length $val > 45 ) {
+	my @val = split /\s+/, $val;
+	$val = '';
+	
+	for my $word (@val) {
+	    my ($lastline) = $val =~ /.*^(.+)$/sm;
+	    $lastline ||= '';
+	    $val .= length $lastline < 45 ? " $word " : "\n          $word";
+	}
+	$val = "\n         $val\n        ";
+	$val =~ s/(\S)\s{2}(\S)/$1 $2/g;
+    }
+    $writer->startTag('property');
+    $self->_element('type', $tag);
+    $self->_element('value', $val);
+    $writer->endTag('property');
+}
+
+=head2 _unflatten_attribute
+
+ Title   : _unflatten_attribute
+ Usage   : $self->_unflatten_attribute($name, $value)
+ Function: an internal method to unflatten and write comment or evidence elements
+ Returns : nothing
+ Args    : a list of strings
+
+=cut
+
+sub _unflatten_attribute {
+    my ($self, $name, $val) = @_;
+    my $writer = $self->{writer};
+    my %pair;
+    my @pairs = split ';', $val;
+    for my $p ( @pairs ) {
+	my @pair = split '=', $p;
+	$pair[0] =~ s/^\s+|\s+$//g;
+	$pair[1] =~ s/^\s+|\s+$//g;
+	$pair{$pair[0]} = $pair[1];
+    }
+    $writer->startTag($name);
+    for ( keys %pair ) {
+	$self->_element($_, $pair{$_});
+    }
+    $writer->endTag($name);
+    
+
+}
+
+=head2 _xref
+
+ Title   : _xref
+ Usage   : $self->_xref($value) 
+ Function: an internal method to write db_xref elements
+ Returns : nothing 
+ Args    : a list of strings
+
+=cut
+
+sub _xref {
+    my ($self, @xrefs) = @_;
+    my $writer = $self->{writer};
+    for my $xref ( @xrefs ) {
+	my ($db, $acc) = $xref =~ /(\S+):(\S+)/;
+	$writer->startTag('dbxref');
+	$self->_element('xref_db', $db);
+	$acc = $xref if $db eq 'GO';
+	$self->_element('db_xref_id', $acc);
+	$writer->endTag('dbxref');
+    }
+}
+
+=head2 _feature_span
+
+ Title   : _feature_span
+ Usage   : $self->_feature_span($name, $type, $loc)
+ Function: an internal method to write a feature_span element
+          (the actual feature with coordinates)
+ Returns : nothing 
+ Args    : a feature name and Bio::SeqFeatureI-compliant object
+
+=cut
+
+sub _feature_span {
+    my ($self, $name, $feat, $pname) = @_;
+    my $type = $feat->primary_tag;
+    my $writer = $self->{writer};
+    my %atts = ( id => $name );
+    
+    if ( $pname ) {
+	$pname =~ s/-R/-P/;
+	$atts{produces_seq} = $pname;
+    }
+
+    $writer->startTag('feature_span', %atts );
+    $self->_element('name', $name);
+    $self->_element('type', $type);
+    $self->_seq_relationship('query', $feat);
+    $writer->endTag('feature_span');
+}
+
+=head2 _seq_relationship
+
+ Title   : _seq_relationship
+ Usage   : $self->_seq_relationship($type, $loc)
+ Function: an internal method to handle feature_span sequence relationships
+ Returns : nothing
+ Args    : feature type, a Bio::LocationI-compliant object,
+           (optional) sequence name (defaults to the query seq)
+           and (optional) alignment string
+
+=cut
+
+sub _seq_relationship {
+    my ($self, $type, $loc, $seqname, $alignment) = @_;
+    my $writer = $self->{'writer'};
+
+    $seqname ||= #if no seqname passed in, use the name of our annotating seq
+      $self->{seq}->accession_number ne 'unknown' && $self->{seq}->accession_number
+	|| $self->{seq}->display_id || 'unknown';
+    $writer->startTag(
+		      'seq_relationship',
+		      type => $type,
+		      seq  => $seqname,
+		     );
+    $self->_span($loc);
+    $writer->_element('alignment',$alignment) if $alignment;
+    $writer->endTag('seq_relationship');
+}
+
+=head2 _element
+
+ Title   : _element
+ Usage   : $self->_element($name, $chars, $atts)
+ Function: an internal method to generate 'generic' XML elements
+ Example : 
+ my $name = 'foo';
+ my $content = 'bar';
+ my $attributes = { baz => 1 }; 
+ # print the element
+ $self->_element($name, $content, $attributes);
+ Returns : nothing 
+ Args    : the element name and content plus a ref to an attribute hash
+
+=cut
+
+sub _element {
+    my ($self, $name, $chars, $atts) = @_;
+    my $writer = $self->{writer};
+    my %atts = $atts ? %$atts : ();
+    
+    $writer->startTag($name, %atts);
+    $writer->characters($chars);
+    $writer->endTag($name);
+}
+
+=head2 _span
+
+ Title   : _span
+ Usage   : $self->_span($loc)
+ Function: an internal method to write the 'span' element
+ Returns : nothing
+ Args    : a Bio::LocationI-compliant object
+
+=cut
+
+sub _span {
+    my ($self, @loc) = @_;
+    my ($loc, $start, $end);
+
+    if ( @loc == 1 ) {
+	$loc = $loc[0];
+    }
+    elsif ( @loc == 2 ) {
+	($start, $end) = @loc;
+    }
+
+    if ( $loc ) {
+	($start, $end) = ($loc->start, $loc->end);
+	($start, $end) = ($end, $start) if $loc->strand < 0;
+    } 
+    elsif ( !$start ) {
+	($start, $end) = (1, $self->{seq}->length);
+    }
+    
+    my $writer = $self->{writer};
+    $writer->startTag('span');
+    $self->_element('start', $start);
+    $self->_element('end', $end);
+    $writer->endTag('span');
+}
+
+=head2 _seq
+
+ Title   : _seq
+ Usage   : $self->_seq($seq, $dna) 
+ Function: an internal method to print the 'sequence' element
+ Returns : nothing
+ Args    : and Bio::SeqI-compliant object and a reference to an attribute  hash
+
+=cut
+
+sub _seq {
+    my ($self, $seq, $atts) = @_;
+
+    my $writer = $self->{'writer'};
+
+   
+    # game moltypes
+    my $alphabet = $seq->alphabet;
+    $alphabet ||= $seq->mol_type if $seq->can('mol_type');
+    $alphabet =~ s/protein/aa/;
+    $alphabet =~ s/rna/cdna/;
+    
+    my @seq = ( 'seq',
+		id     => $atts->{name},
+		length => $seq->length,
+		type   => $alphabet,
+	       	focus  => "true"	       
+	      );
+
+    if ( $atts->{md5checksum} ) {
+	push @seq, (md5checksum => $atts->{md5checksum});
+	delete $atts->{md5checksum};
+    }
+    $writer->startTag(@seq);
+
+    for my $k ( keys %{$atts} ) {
+	if ( $k =~ /xref/ ) {
+	    $self->_xref($atts->{$k});
+	}
+	else {
+	    $self->_element($k, $atts->{$k});
+	}    
+    }
+    
+    # add leading spaces and line breaks for 
+    # nicer xml formatting/indentation
+    my $sp  = (' ' x 6);
+    my $dna = $seq->seq;
+    $dna =~ s/(\w{60})/$1\n$sp/g;
+    $dna = "\n$sp" . $dna . "\n    ";
+    
+    if ( $seq->species && !$self->{has_organism}) {
+        my $species = $seq->species->binomial;
+	$self->_element('organism', $species);
+    }
+    
+    $self->_element('residues', $dna);
+    $writer->endTag('seq');
+}
+
+=head2 _find_name
+
+ Title   : _find_name
+ Usage   : my $name = $self->_find_name($feature)
+ Function: an internal method to look for a gene name
+ Returns : a string 
+ Args    : a Bio::SeqFeatureI-compliant object
+
+=cut
+
+sub _find_name {
+    my ($self, $feat, $key) = @_;
+    my $name;
+    
+    if ( $key && $feat->has_tag($key) ) {
+	($name) = $feat->get_tag_values($key);
+	return $name;
+    }
+    else {
+#      warn "Could not find name '$key'\n";
+	return '';
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/seqHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/seqHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game/seqHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,517 @@
+# $Id: seqHandler.pm,v 1.27.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::game::seqHandler
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game::seqHandler -- a class for handling game-XML sequences
+
+=head1 SYNOPSIS
+
+This modules is not used directly
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::game::seqHandler processes all of the sequences associated with a game record
+and, via feature handlers, processes the associated annotations
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game::seqHandler;
+
+use Data::Dumper;
+
+use Bio::SeqIO::game::featHandler;
+use Bio::SeqFeature::Generic;
+use Bio::Seq::RichSeq;
+use Bio::Species;
+use strict;
+
+use vars qw {};
+
+use base qw(Bio::SeqIO::game::gameSubs);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $seqHandler = Bio::SeqIO::game::seqHandler->new($seq, $ann, $comp, $map, $src )
+ Function: constructor method to create a sequence handler
+ Returns : a sequence handler object
+ Args    : $seq  -- an XML sequence element
+           $ann  -- a ref. to a list of <annotation> elements
+           $comp -- a ref. to a list of <computational_analysis> elements (not used yet)
+           $map  -- a <map_position> element
+           $src  -- a flag to indicate that the sequence already has a source feature
+
+=cut
+
+sub new {
+    my ($caller, $seq, $ann, $comp, $map, $src ) =  @_;
+
+    my $class = ref($caller) || $caller;
+
+    my $self = bless ( { 
+	seqs     => $seq,
+        anns     => $ann,
+        comps    => $comp,
+        map_pos  => $map,
+	has_source => $src,
+        seq_h    => {},
+	ann_l    => []
+    }, $class );
+
+    return $self;
+}
+
+=head2 convert
+
+ Title   : convert
+ Usage   : @seqs = $seqHandler->convert
+ Function: converts the main XML sequence element and associated annotations to Bio::
+ Returns : a ref. to a an array containing the sequence object and a ref. to a list of  features
+ Args    : none
+
+ Note    : The features and sequence are kept apart to facilitate downstream filtering of features 
+
+=cut
+
+sub convert {
+    my $self = shift;
+    my @ann  = @{$self->{anns}} if defined $self->{anns};;
+    my @seq  = @{$self->{seqs}};
+    
+    # not used yet
+    my @comp;
+    if ( $self->{comps} ) {
+        @comp = @{$self->{comps}}    
+    }
+    
+    # process the sequence elements
+    for ( @seq ) {
+	$self->_add_seq( $_ );
+    }
+    
+    # process the annotation elements
+    for ( @ann ) {
+	$self->_annotation( $_ );
+    }
+    
+    return $self->_order_feats( $self->{seq_h} );
+}
+
+=head2 _order_feats
+
+ Title   : _order_feats
+ Usage   : $self->_order_feats( $self->{seq_h} )
+ Function: an internal method to ensure the source feature comes first
+           and keep gene, mRNA and CDS features together 
+ Returns : a ref. to an array containing the sequence object and a ref. to a list of  features 
+ Args    : a ref. to a hash of sequences
+
+=cut
+
+sub _order_feats {
+    my ($self, $seqs) = @_;
+    my $seq = $self->{main_seq};
+    my $id  = $seq->id;
+    my $ann = $self->{ann_l};
+
+    # make sure source(s) come first
+    my @src   = grep { $_->primary_tag =~ /source|origin|\bregion\b/ } @$ann;
+    # preserve gene->mRNA->CDS or ncRNA->gene->transcript order
+    my @genes = grep { $_->primary_tag =~ /gene|CDS|[a-z]+RNA|transcript/ } @$ann;
+    my @other = sort { $a->start <=> $b->start || $b->end   <=> $a->end  } 
+                grep { $_->primary_tag !~ /source|origin|\bregion\b/ } 
+                grep { $_->primary_tag !~ /gene|mRNA|CDS/ } @$ann;
+    
+    return [$seq, [@src, @genes, @other]];
+}
+
+=head2 _add_seq
+
+ Title   : _add_seq
+ Usage   : $self->_add_seq($seq_element)
+ Function: an internal method to process the sequence elements
+ Returns : nothing
+ Args    : a sequence element
+
+=cut
+
+sub _add_seq {
+    my ($self, $el) = @_;
+    my $residues = '';
+
+    if ($el->{_residues}) {
+        $residues = $el->{_residues}->{Characters};
+        $residues =~ s/[ \n\r]//g;
+        $residues =~ s/\!//g;
+        $residues =~ tr/a-z/A-Z/;
+    } 
+    else {
+	return 0;
+    }
+
+    my $id   = $el->{Attributes}->{id};
+    my $ver  = $el->{Attributes}->{version};
+    my $name = $el->{_name}->{Characters};
+    
+    if ($name && $name ne $id) {
+        $self->complain("The sequence name and unique ID do not match.  Using ID");
+    }
+    
+    # get/set the sequence object
+    my $seq = $self->_seq($id);
+    
+    # get/set the feature handler
+    my $featHandler = $self->_feat_handler;
+    
+    # populate the sequence object
+    $seq->seq($residues);
+    $seq->seq_version($ver) if $ver;
+    
+    # assume the id is the accession number
+    if ( $id =~ /^\w+$/ ) {
+	$seq->accession($id);
+    }
+    
+    # If the focus attribute is set to "true", this is the main
+    # sequence
+    my $focus = 0;
+    if ( defined $el->{Attributes}->{focus} ) {
+        $self->{main_seq} = $seq;
+        $focus++;
+    }
+
+    # make sure real and annotated lengths match
+    my $length = $el->{Attributes}->{'length'};
+    $length && $seq->length(int($length));
+    if ( $seq->seq && defined($length) && $seq->length != int($length) ) {
+        $self->complain("The specified sequence has length ", $seq->length(),
+                        " but the length attribute= ", $length);
+        $seq->seq( undef );
+        $seq->length( int($length) );
+    }
+
+    # deal with top-level annotations
+    my $tags = {};
+    if ( $el->{Attributes}->{md5checksum} ) {
+	$tags->{md5checksum} = [$el->{Attributes}->{md5checksum}];
+    }
+    if ($el->{_dbxref}) {
+	$tags->{dbxref} ||= [];
+        push @{$tags->{dbxref}}, $self->dbxref( $el->{_dbxref} );
+    }
+    if ($el->{_description}) {
+        my $desc = $el->{_description}->{Characters};
+        $seq->description( $desc );
+    } 
+    if ($el->{_organism}) {
+        my @organism = split /\s+/, $el->{_organism}->{Characters};
+        if (@organism < 2) {
+	    $self->complain("Species name should have at least two words");
+	}
+	else {
+	    my $species = Bio::Species->new( -classification => [reverse @organism] );
+	    $seq->species($species);
+	}
+    }
+    if ( defined($seq->species) ) {
+	$tags->{organism} = [$seq->species->binomial];
+    }
+#    elsif ($seq eq $self->{main_seq}) {
+#	$self->warn("The source organism for this sequence was\n" .
+#		    "not specified.  I will guess Drosophila melanogaster.\n" .
+#		    "Otherwise, add <organism>Genus species</organism>\n" .
+#		    "to the main sequence element");
+#	my @class = qw/ Eukaryota Metazoa Arthropoda Insecta Pterygota
+#	                Neoptera Endopterygota Diptera Brachycera 
+#	                Muscomorpha Ephydroidea Drosophilidae Drosophila melanogaster/;
+#	my $species = Bio::Species->new( -classification => [ reverse @class ],
+#					 -common_name    => 'fruit fly' );
+#	$seq->species( $species );
+#    }
+    
+    # convert GAME to bioperl molecule types
+    my $alphabet = $el->{Attributes}->{type};
+    if ( $alphabet ) {
+        $alphabet =~ s/aa/protein/;
+	$alphabet =~ s/cdna/rna/;
+	$seq->alphabet($alphabet);
+    }
+
+    # add a source feature if req'd
+    if ( !$self->{has_source} && $focus ) {
+	#$self->{source} = $featHandler->add_source($seq->length, $tags);
+    }
+    
+    if ( $focus ) {
+        # add the map position
+        $self->_map_position( $self->{map_pos}, $seq );
+        $featHandler->{offset} = $self->{offset};
+    }
+    
+    # prune the sequence from the parse tree
+    $self->flush;
+}
+
+=head2 _map_position
+
+ Title   : _map_position
+ Usage   : $self->_map_position($map_posn_element)
+ Function: an internal method to process the <map_position> element
+ Returns : nothing
+ Args    : a map_position element
+
+=cut
+
+sub _map_position {
+    my ($self, $el) = @_;
+
+    # we can live without it
+    if ( !$el ) {
+	$self->{offset}= 0;
+	return 0;
+    }
+
+
+    # chromosome and coordinates
+    my $arm   = $el->{_arm}->{Characters};
+    my $type  = $el->{Attributes}->{type};
+    my $loc   = $el->{_span};
+    my $start = $loc->{_start}->{Characters};
+    my $end   = $loc->{_end}->{Characters};
+    
+    # define the offset (may be a partial sequence)
+    # The coordinates will be relative but the CDS description
+    # coordinates may be absolute if the game-XML comes from apollo 
+    # or gadfly
+    $self->{offset} = $start - 1;
+
+    my $seq_id = $el->{Attributes}->{seq};
+    my $seq = $self->{seq_h}->{$seq_id};
+    
+    unless ( $seq ) {
+        $self->throw("Map position with no corresponding sequence object");
+    }
+    unless ($seq eq $self->{main_seq}){
+        $self->throw("Map position does not correspond to the main sequence");
+    }
+    
+    my $species = '';
+    
+    # create/update the top-level sequence feature if req'd
+    if ( $self->{source} ) {
+	my $feat = $self->{source};
+    
+	unless ($feat->has_tag('organism')) {
+	    $species = eval {$seq->species->binomial} || 'unknown species';
+	    $feat->add_tag_value( organism => $species );
+	}
+    
+	my %tags = ( mol_type   => "genomic dna",
+		     chromosome => $arm,
+		     location   => "$start..$end",
+		     type       => $type
+		     );
+    
+	for (keys %tags) {
+	    $feat->add_tag_value( $_ => $tags{$_} );
+	}
+        
+	$seq->add_SeqFeature($feat);
+    }
+
+    # come up with a description if there is none
+    my $desc = $seq->description;
+    if ( $species && $arm && $start && $end && !$desc) {
+	$seq->description("$species chromosome $arm $start..$end " .
+	                  "segment of complete sequence");
+    }
+    
+    $self->flush;
+}
+
+=head2 _annotation
+
+ Title   : _annotation
+ Usage   : $self->_annotation($annotation_element)
+ Function: an internal method to process <annotation> elements
+ Returns : nothing
+ Args    : an annotation element
+
+=cut
+
+sub _annotation {
+    my ($self, $el) = @_;
+
+    my $id      = $el->{Attributes}->{id};
+    my $type    = $el->{_type}->{Characters};
+    my $tags    = {};
+    my $gname   = $el->{_name}->{Characters} eq $id ? '' : $el->{_name}->{Characters};
+
+    # 'transposable element' is too long (breaks Bio::SeqIO::GenBank)
+    # $type =~ s/transposable_element/repeat_region/;
+    
+    # annotations must be on the main sequence
+    my $seqid = $self->{main_seq}->id;
+    my $featHandler = $self->_feat_handler;
+    
+    my @feats = ();
+    
+    for my $child ( @{$el->{Children}} ) {
+        my $name = $child->{Name};
+	
+	# these elements require special handling
+	if ( $name eq 'dbxref' ) {
+	    $tags->{dbxref} ||= [];
+	    push @{$tags->{dbxref}}, $self->dbxref( $child );
+	}
+	elsif ( $name eq 'aspect' ) {
+	    $tags->{dbxref} ||= [];
+	    push @{$tags->{dbxref}}, $self->dbxref( $child->{_dbxref} );
+	}
+        elsif ( $name eq 'feature_set' ) {
+            push @feats, $featHandler->feature_set( $id, $gname, $child, $type );
+	}
+        elsif ( $name eq 'comment' ) {
+	    $tags->{comment} = [$self->comment( $child )];
+	}
+	elsif ( $name eq 'property' ) {
+	    $self->property( $child, $tags );
+	}
+	elsif ( $name eq 'gene' ) {
+	    # we may be dealing with an annotation that is not
+	    # a gene, so we have to nest the gene inside it
+	    $featHandler->has_gene( $child, $gname, $id )
+        }
+        
+	# otherwise, tag/value pairs
+	# -- mild dtd enforcement
+	# synonym is not in the dtd but shows up in gadfly
+	# annotations	
+	elsif ( $name =~ /type|synonym/ ) {
+	    $tags->{$name} = [$child->{Characters}];
+	}
+	elsif ( $name ne 'name' ) {
+            $self->complain("Unrecognized element '$name'. I don't " .
+                            "know what to do with $name elements in " .
+                            "top-level sequence annotations." );
+        }
+
+    }
+	
+    # add a gene annotation if required
+    unless ( $featHandler->has_gene || $type ne 'gene' ) {
+	$featHandler->has_gene( $el, $gname, $id )
+    }
+
+    if ( $tags->{symbol} ) {
+        if ( !$tags->{gene} ) {
+	   $tags->{gene} = $tags->{symbol};
+	}
+	delete $tags->{symbol};
+    }
+    
+    
+    $featHandler->add_annotation( $self->{main_seq}, $type, $id, $tags, \@feats );
+    $self->flush;
+}
+
+# get/set the sequence object
+
+=head2 _seq
+
+ Title   : _seq
+ Usage   : my $seq = $self->_seq
+ Function: an internal sequence getter/setter
+ Returns : a Bio::RichSeq object
+ Args    : a sequence ID
+
+=cut
+
+sub _seq {
+    my ($self, $id) = @_;
+    $id || $self->throw("A unique id must be provided for the sequence");
+    
+    my $seq = {};
+    
+    if ( defined $self->{seq_h}->{$id}) {
+	$seq = $self->{seq_h}->{$id};
+    } else {
+	$seq = Bio::Seq::RichSeq->new( -id => $id );
+        $self->{seq_h}->{$id} = $seq; # store it
+    }
+    
+    return $seq;
+}
+
+#get/set the feature handler
+
+=head2 _feat_handler
+
+ Title   : _feat_handler
+ Usage   : my $featHandler = $self->_featHandler
+ Function: an internal getter/setter for feature handling objects 
+ Returns : a Bio::SeqIO::game::featHandler object
+ Args    : none
+
+=cut
+
+sub _feat_handler {
+    my $self = shift;
+    
+    my $handler = {};
+    my $seq = $self->{main_seq};
+    
+    if ( defined $self->{feat_handler} ) {
+	$handler = $self->{feat_handler};
+    }
+    else {
+        my @args = ( $seq, $self->{seq_h}, $self->{ann_l} );
+	$handler = Bio::SeqIO::game::featHandler->new( @args );
+        $self->{feat_handler} = $handler;
+    }
+
+    return $handler;
+}
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/game.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,178 @@
+# $Id: game.pm,v 1.38.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::game
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::game -- a class for parsing and writing game-XML
+
+=head1 SYNOPSIS
+
+This module is not used directly, use SeqIO.
+
+ use Bio::SeqIO;
+
+ my $in = Bio::SeqIO->new ( -file    => 'file.xml', 
+                            -format  =>  'game',
+                            -verbose => 1 );
+
+ my $seq = $in->next_seq;
+
+=head1 DESCRIPTION
+
+Bio::SeqIO::game will parse game XML (version 1.2) or write game XML from 
+a Bio::SeqI implementing object.  The XML is readable by the genome 
+annotation editor 'Apollo' (www.gmod.org).  It is not backwards compatible 
+with the previous version of game XML.  The XML format currently used by 
+Apollo contains a single 'main' annotated sequence, so we will only get a 
+single annotated sequence in the stream when parsing a game-XML record.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution.
+
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::game;
+
+use Bio::SeqIO::game::gameHandler;
+use Bio::SeqIO::game::gameWriter;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+    my ($self, @args) = @_;
+    $self->SUPER::_initialize(@args);
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : my $seq = $seqio->next_seq;
+ Function: get the main sequence object
+ Returns : a Bio::Seq::RichSeq object
+ Args    : none
+
+
+=cut
+
+sub next_seq {
+    my $self   = shift;
+    
+    my $seq_l  = $self->_getseqs;
+    my $annseq = shift @{$seq_l};
+    my $seq    = $annseq->[0];
+    my $feats  = $annseq->[1];
+    
+    for ( @{$feats} ) {
+	$seq->add_SeqFeature( $_ );
+    }
+
+    return $seq;
+}   
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $seqio->write_seq($seq)
+ Function: writes a sequence object as game XML
+ Returns : nothing
+ Args    : a Bio::SeqI compliant object
+
+=cut
+
+sub write_seq {
+    my ($self, $seq) = @_;
+    my $writer = Bio::SeqIO::game::gameWriter->new($seq);
+    my $xml = $writer->write_to_game;
+    $self->_print($xml);
+}
+
+=head2 _getseqs
+
+ Title   : _getseqs
+ Usage   : $self->_getseqs
+ Function: An internal method to invoke the PerlSAX XML handler and get
+           the sequence objects
+ Returns : an reference to an array with sequence object and annotations
+ Args    : none
+
+=cut
+
+sub _getseqs {
+    my $self = shift;
+    if ( defined $self->{seq_l} ) {
+        return $self->{seq_l};
+    }
+    else {
+	my $fh = $self->_fh;
+	my $text = join '', <$fh>;
+	$text || $self->throw("Input file is empty or does not exist");
+	my $source = $text =~ /type>(source|origin|\bregion\b)<\/type/gm ? 1 : 0;
+        my $handler = Bio::SeqIO::game::gameHandler->new;
+	$handler->{has_source} = $source if $source;
+	$handler->{verbose} = 1 if $self->verbose;
+        my $parser  = XML::Parser::PerlSAX->new( Handler => $handler );
+        my $game    = $parser->parse( $text );
+	$self->{seq_l} = $game->load;
+    }
+}
+
+=head2 _hide_dna
+
+ Title   : _hide_dna
+ Usage   : $seqio->_hide_dna
+ Function: Hide the DNA for really huge sequences
+ Returns : nothing 
+ Args    : none
+
+=cut
+
+sub _hide_dna {
+    my $self = shift;
+    
+    my $annseqs = $self->_getseqs;
+
+    for ( @{$annseqs} ) {
+        my $seq = $_->[0];
+        $seq->seq('');
+    }
+    return 0;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/gcg.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/gcg.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/gcg.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,296 @@
+# $Id: gcg.pm,v 1.26.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::gcg
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#          and Lincoln Stein <lstein at cshl.org>
+#
+# Copyright Ewan Birney & Lincoln Stein
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::gcg - GCG sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from GCG flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Ewan Birney & Lincoln Stein
+
+Email: E<lt>birney at ebi.ac.ukE<gt>
+       E<lt>lstein at cshl.orgE<gt>
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::gcg;
+use strict;
+
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq::RichSeq'));
+   }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+   my ($self, at args)    = @_;
+   my($id,$type,$desc,$line,$chksum,$sequence,$date,$len);
+
+   while( defined($_ = $self->_readline()) ) {
+
+       ## Get the descriptive info (anything before the line with '..')
+       unless( /\.\.$/ ) { $desc.= $_; }
+       ## Pull ID, Checksum & Type from the line containing '..'
+       /\.\.$/ && do     { $line = $_; chomp;
+                           if(/Check\:\s(\d+)\s/) { $chksum = $1; }
+                           if(/Type:\s(\w)\s/)    { $type   = $1; }
+                           if(/(\S+)\s+Length/)
+			   { $id     = $1; }
+			   if(/Length:\s+(\d+)\s+(\S.+\S)\s+Type/ )
+			   { $len = $1; $date = $2;}
+                           last;
+                         }
+   }
+   return if ( !defined $_);
+   chomp($desc);  # remove last "\n"
+
+   while( defined($_ = $self->_readline()) ) {
+
+       ## This is where we grab the sequence info.
+
+       if( /\.\.$/ ) {
+        $self->throw("Looks like start of another sequence. See documentation. ");
+       }
+
+       next if($_ eq "\n");       ## skip whitespace lines in formatted seq
+       s/[^a-zA-Z]//g;            ## remove anything that is not alphabet char
+       # $_ = uc($_);               ## uppercase sequence: NO. Keep the case. HL
+       $sequence .= $_;
+   }
+   ##If we parsed out a checksum, we might as well test it
+
+   if(defined $chksum) {
+       unless(_validate_checksum($sequence,$chksum)) {
+	   $self->throw("Checksum failure on parsed sequence.");
+       }
+   }
+
+   ## Remove whitespace from identifier because the constructor
+   ## will throw a warning otherwise...
+   if(defined $id) { $id =~ s/\s+//g;}
+
+   ## Turn our parsed "Type: N" or "Type: P" (if found) into the appropriate
+   ## keyword that the constructor expects...
+   if(defined $type) {
+       if($type eq "N") { $type = "dna";      }
+       if($type eq "P") { $type = "prot";    }
+   }
+
+   return $self->sequence_factory->create(-seq  => $sequence,
+					  -id   => $id,
+					  -desc => $desc,
+					  -type => $type,
+					  -dates => [ $date ]
+					  );
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the formatted $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of Bio::PrimarySeqI object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+    for my $seq (@seq) {
+	$self->throw("Did not provide a valid Bio::PrimarySeqI object")
+	    unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+
+        $self->warn("No whitespace allowed in GCG ID [". $seq->display_id. "]")
+            if $seq->display_id =~ /\s/;
+
+	my $str         = $seq->seq;
+	my $comment     = $seq->desc || '';
+	my $id          = $seq->id;
+	my $type        = ( $seq->alphabet() =~ /[dr]na/i ) ? 'N' : 'P';
+	my $timestamp;
+
+	if( $seq->can('get_dates') ) {
+	    ($timestamp) = $seq->get_dates;
+	} else {
+	    $timestamp = localtime(time);
+	}
+	my($sum,$offset,$len,$i,$j,$cnt, at out);
+
+	$len = length($str);
+	## Set the offset if we have any non-standard numbering going on
+	$offset=1;
+	# checksum
+	$sum = $self->GCG_checksum($seq);
+
+	#Output the sequence header info
+	push(@out,"$comment\n");
+	push(@out,"$id  Length: $len  $timestamp  Type: $type  Check: $sum  ..\n\n");
+
+	#Format the sequence
+	$i = $#out + 1;
+	for($j = 0 ; $j < $len ; ) {
+	    if( $j % 50 == 0) {
+		$out[$i] = sprintf("%8d  ",($j+$offset)); #numbering
+	    }
+	    $out[$i] .= sprintf("%s",substr($str,$j,10));
+	    $j += 10;
+	    if( $j < $len && $j % 50 != 0 ) {
+		$out[$i] .= " ";
+	    }elsif($j % 50 == 0 ) {
+		$out[$i++] .= "\n\n";
+	    }
+	}
+	local($^W) = 0;
+	if($j % 50 != 0 ) {
+	    $out[$i] .= "\n";
+	}
+	$out[$i] .= "\n";
+	return unless $self->_print(@out);
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 GCG_checksum
+
+ Title     : GCG_checksum
+ Usage     : $cksum = $gcgio->GCG_checksum($seq);
+ Function  : returns a gcg checksum for the sequence specified
+
+             This method can also be called as a class method.
+ Example   :
+ Returns   : a GCG checksum string
+ Argument  : a Bio::PrimarySeqI implementing object
+
+=cut
+
+sub GCG_checksum {
+    my ($self,$seqobj) = @_;
+    my $index = 0;
+    my $checksum = 0;
+    my $char;
+
+    my $seq = $seqobj->seq();
+    $seq =~ tr/a-z/A-Z/;
+
+    foreach $char ( split(/[\.\-]*/, $seq)) {
+	$index++;
+	$checksum += ($index * (unpack("c",$char) || 0) );
+	if( $index ==  57 ) {
+	    $index = 0;
+	}
+    }
+
+    return ($checksum % 10000);
+}
+
+=head2 _validate_checksum
+
+ Title   : _validate_checksum
+ Usage   : n/a - internal method
+ Function: if parsed gcg sequence contains a checksum field
+         : we compare it to a value computed here on the parsed
+         : sequence. A checksum mismatch would indicate some
+         : type of parsing failure occured.
+         :
+ Returns : 1 for success, 0 for failure
+ Args    : string containing parsed seq, value of parsed cheksum
+
+
+=cut
+
+sub _validate_checksum {
+    my($seq,$parsed_sum) = @_;
+    my($i,$len,$computed_sum,$cnt);
+
+    $len = length($seq);
+
+    #Generate the GCG Checksum value
+
+    for($i=0; $i<$len ;$i++) {
+	$cnt++;
+	$computed_sum += $cnt * ord(substr($seq,$i,1));
+	($cnt == 57) && ($cnt=0);
+    }
+    $computed_sum %= 10000;
+
+    ## Compare and decide if success or failure
+
+    if($parsed_sum == $computed_sum) {
+	return 1;
+    } else { return 0; }
+
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/genbank.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/genbank.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/genbank.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1686 @@
+# $Id: genbank.pm,v 1.140.2.6 2006/11/23 18:58:19 cjfields Exp $
+#
+# BioPerl module for Bio::SeqIO::GenBank
+#
+# Cared for by Bioperl project bioperl-l(at)bioperl.org
+#
+# Copyright Elia Stupka and contributors see AUTHORS section
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::genbank - GenBank sequence input/output stream
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler:
+
+    $stream = Bio::SeqIO->new(-file => $filename,
+                              -format => 'GenBank');
+
+    while ( my $seq = $stream->next_seq() ) {
+	    # do something with $seq
+    }
+
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from GenBank flat
+file databases.
+
+There is some flexibility here about how to write GenBank output
+that is not fully documented.
+
+=head2 Optional functions
+
+=over 3
+
+=item _show_dna()
+
+(output only) shows the dna or not
+
+=item _post_sort()
+
+(output only) provides a sorting func which is applied to the FTHelpers
+before printing
+
+=item _id_generation_func()
+
+This is function which is called as
+
+   print "ID   ", $func($seq), "\n";
+
+To generate the ID line. If it is not there, it generates a sensible ID
+line using a number of tools.
+
+If you want to output annotations in Genbank format they need to be
+stored in a Bio::Annotation::Collection object which is accessible
+through the Bio::SeqI interface method L<annotation()|annotation>.
+
+The following are the names of the keys which are pulled from a
+L<Bio::Annotation::Collection> object:
+
+ reference       - Should contain Bio::Annotation::Reference objects
+ comment         - Should contain Bio::Annotation::Comment objects
+ dblink          - Should contain a Bio::Annotation::DBLink object
+ segment         - Should contain a Bio::Annotation::SimpleValue object
+ origin          - Should contain a Bio::Annotation::SimpleValue object
+ wgs             - Should contain a Bio::Annotation::SimpleValue object
+
+=back
+
+=head1 Where does the data go?
+
+Data parsed in Bio::SeqIO::genbank is stored in a variety of data
+fields in the sequence object that is returned. Here is a partial list
+of fields.
+
+Items listed as RichSeq or Seq or PrimarySeq and then NAME() tell you
+the top level object which defines a function called NAME() which
+stores this information.
+
+Items listed as Annotation 'NAME' tell you the data is stored the
+associated Bio::AnnotationCollectionI object which is associated with
+Bio::Seq objects.  If it is explictly requested that no annotations
+should be stored when parsing a record of course they will not be
+available when you try and get them.  If you are having this problem
+look at the type of SeqBuilder that is being used to contruct your
+sequence object.
+
+ Comments             Annotation 'comment'
+ References           Annotation 'reference'
+ Segment              Annotation 'segment'
+ Origin               Annotation 'origin'
+ Dbsource             Annotation 'dblink'
+
+ Accessions           PrimarySeq accession_number()
+ Secondary accessions RichSeq get_secondary_accessions()
+ GI number            PrimarySeq primary_id()
+ LOCUS                PrimarySeq display_id()
+ Keywords             RichSeq get_keywords()
+ Dates                RichSeq get_dates()
+ Molecule             RichSeq molecule()
+ Seq Version          RichSeq seq_version()
+ PID                  RichSeq pid()
+ Division             RichSeq division()
+ Features             Seq get_SeqFeatures()
+ Alphabet             PrimarySeq alphabet()
+ Definition           PrimarySeq description() or desc()
+ Version              PrimarySeq version()
+
+ Sequence             PrimarySeq seq()
+
+There is more information in the Feature-Annotation HOWTO about each
+field and how it is mapped to the Sequence object
+L<http://bioperl.open-bio.org/wiki/HOWTO:Feature-Annotation>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Bioperl Project
+
+bioperl-l at bioperl.org
+
+Original author Elia Stupka, elia -at- tigem.it
+
+=head1 CONTRIBUTORS
+
+Ewan Birney birney at ebi.ac.uk
+Jason Stajich jason at bioperl.org
+Chris Mungall cjm at fruitfly.bdgp.berkeley.edu
+Lincoln Stein lstein at cshl.org
+Heikki Lehvaslaiho, heikki at ebi.ac.uk
+Hilmar Lapp, hlapp at gmx.net
+Donald G. Jackson, donald.jackson at bms.com
+James Wasmuth, james.wasmuth at ed.ac.uk
+Brian Osborne, bosborne at alum.mit.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::genbank;
+use vars qw(%FTQUAL_NO_QUOTE);
+use strict;
+
+use Bio::SeqIO::FTHelper;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::SeqIO);
+
+%FTQUAL_NO_QUOTE=(
+		  'anticodon'    => 1,
+		  'citation'     => 1,
+		  'codon'        => 1,
+		  'codon_start'  => 1,
+		  'cons_splice'  => 1,
+		  'direction'    => 1,
+		  'evidence'     => 1,
+		  'label'        => 1,
+		  'mod_base'     => 1,
+		  'number'       => 1,
+		  'rpt_type'     => 1,
+		  'rpt_unit'     => 1,
+		  'transl_except'=> 1,
+		  'transl_table' => 1,
+		  'usedin'       => 1,
+		  );
+
+sub _initialize {
+	my($self, at args) = @_;
+
+	$self->SUPER::_initialize(@args);
+	# hash for functions for decoding keys.
+	$self->{'_func_ftunit_hash'} = {};
+	$self->_show_dna(1); # sets this to one by default. People can change it
+	if( ! defined $self->sequence_factory ) {
+		$self->sequence_factory(new Bio::Seq::SeqFactory
+										(-verbose => $self->verbose(),
+										 -type => 'Bio::Seq::RichSeq'));
+	}
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    my $builder = $self->sequence_builder();
+    my $seq;
+    my %params;
+
+  RECORDSTART:
+    while (1) {
+	my $buffer;
+	my (@acc, @features);
+	my ($display_id, $annotation);
+	my $species;
+
+	# initialize; we may come here because of starting over
+	@features = ();
+	$annotation = undef;
+	@acc = ();
+	$species = undef;
+	%params = (-verbose => $self->verbose);	# reset hash
+	local($/) = "\n";
+	while(defined($buffer = $self->_readline())) {
+	    last if index($buffer,'LOCUS       ') == 0;
+	}
+	return unless defined $buffer; # end of file
+	$buffer =~ /^LOCUS\s+(\S.*)$/o ||
+	    $self->throw("GenBank stream with bad LOCUS line. Not GenBank in my book. Got '$buffer'");
+
+	my @tokens = split(' ', $1);
+
+	# this is important to have the id for display in e.g. FTHelper,
+	# otherwise you won't know which entry caused an error
+	$display_id = shift(@tokens);
+	$params{'-display_id'} = $display_id;
+	# may still be useful if we don't want the seq
+	$params{'-length'} = shift(@tokens);
+	# the alphabet of the entry
+	$params{'-alphabet'} = (lc(shift @tokens) eq 'bp') ? 'dna' : 'protein';
+	# for aa there is usually no 'molecule' (mRNA etc)
+	if (($params{'-alphabet'} eq 'dna') || (@tokens > 2)) {
+	    $params{'-molecule'} = shift(@tokens);
+	    my $circ = shift(@tokens);
+	    if ($circ eq 'circular') {
+		$params{'-is_circular'} = 1;
+		$params{'-division'} = shift(@tokens);
+	    } else {
+				# 'linear' or 'circular' may actually be omitted altogether
+		$params{'-division'} =
+		    (CORE::length($circ) == 3 ) ? $circ : shift(@tokens);
+	    }
+	} else {
+	    $params{'-molecule'} = 'PRT' if($params{'-alphabet'} eq 'aa');
+	    $params{'-division'} = shift(@tokens);
+	}
+	my $date = join(' ', @tokens); # we lump together the rest
+
+	# this is per request bug #1513
+	# we can handle
+	# 9-10-2003
+	# 9-10-03
+	# 09-10-2003
+	# 09-10-03
+	if($date =~ s/\s*((\d{1,2})-(\w{3})-(\d{2,4})).*/$1/) {
+	    if( length($date) < 11 ) {
+		# improperly formatted date
+		# But we'll be nice and fix it for them
+		my ($d,$m,$y) = ($2,$3,$4);
+		if( length($d) == 1 ) {
+		    $d = "0$d";
+		}
+		# guess the century here
+		if( length($y) == 2 ) {
+		    if( $y > 60 ) { # arbitrarily guess that '60' means 1960
+			$y = "19$y";
+		    } else {
+			$y = "20$y";
+		    }
+		    $self->warn("Date was malformed, guessing the century for $date to be $y\n");
+		}
+		$params{'-dates'} = [join('-',$d,$m,$y)];
+	    } else {
+		$params{'-dates'} = [$date];
+	    }
+	}
+	# set them all at once
+	$builder->add_slot_value(%params);
+	%params = ();
+
+	# parse the rest if desired, otherwise start over
+	if(! $builder->want_object()) {
+	    $builder->make_object();
+	    next RECORDSTART;
+	}
+
+	# set up annotation depending on what the builder wants
+	if($builder->want_slot('annotation')) {
+	    $annotation = new Bio::Annotation::Collection;
+	}
+	$buffer = $self->_readline();
+	until( !defined ($buffer) ) {
+	    $_ = $buffer;
+	    # Description line(s)
+	    if (/^DEFINITION\s+(\S.*\S)/) {
+		my @desc = ($1);
+		while ( defined($_ = $self->_readline) ) {
+		    if( /^\s+(.*)/ ) { push (@desc, $1); next };
+		    last;
+		}
+		$builder->add_slot_value(-desc => join(' ', @desc));
+				# we'll continue right here because DEFINITION always comes
+				# at the top of the entry
+		$buffer= $_;
+	    }
+	    # accession number (there can be multiple accessions)
+	    if( /^ACCESSION\s+(\S.*\S)/ ) {
+		push(@acc, split(/\s+/,$1));
+		while( defined($_ = $self->_readline) ) {
+		    /^\s+(.*)/ && do { push (@acc, split(/\s+/,$1)); next };
+		    last;
+		}
+		$buffer = $_;
+		next;
+	    }
+	    # PID
+	    elsif( /^PID\s+(\S+)/ ) {
+		$params{'-pid'} = $1;
+	    }
+	    # Version number
+	    elsif( /^VERSION\s+(.+)$/ ) {
+		my ($acc,$gi) = split(' ',$1);
+		if($acc =~ /^\w+\.(\d+)/) {
+		    $params{'-version'} = $1;
+		    $params{'-seq_version'} = $1;
+		}
+		if($gi && (index($gi,"GI:") == 0)) {
+		    $params{'-primary_id'} = substr($gi,3);
+		}
+	    }
+	    # Keywords
+	    elsif( /^KEYWORDS\s+(.*)/ ) {
+		my @kw = split(/\s*\;\s*/,$1);
+		while( defined($_ = $self->_readline) ) {
+		    chomp;
+		    /^\s+(.*)/ && do { push (@kw, split(/\s*\;\s*/,$1)); next };
+		    last;
+		}
+
+		@kw && $kw[-1] =~ s/\.$//;
+		$params{'-keywords'} = \@kw;
+		$buffer = $_;
+		next;
+	    }
+	    # Organism name and phylogenetic information
+	    elsif (/^SOURCE/) {
+		if($builder->want_slot('species')) {
+		    $species = $self->_read_GenBank_Species(\$buffer);
+		    $builder->add_slot_value(-species => $species);
+		} else {
+		    while(defined($buffer = $self->_readline())) {
+			last if substr($buffer,0,1) ne ' ';
+		    }
+		}
+		next;
+	    }
+	    # References
+	    elsif (/^REFERENCE/) {
+		if($annotation) {
+		    my @refs = $self->_read_GenBank_References(\$buffer);
+		    foreach my $ref ( @refs ) {
+			$annotation->add_Annotation('reference',$ref);
+		    }
+		} else {
+		    while(defined($buffer = $self->_readline())) {
+			last if substr($buffer,0,1) ne ' ';
+		    }
+		}
+		next;
+	    }
+	    # Comments
+	    elsif (/^COMMENT\s+(.*)/) {
+		if($annotation) {
+		    my $comment = $1;
+		    while (defined($_ = $self->_readline)) {
+			last if (/^\S/);
+			$comment .= $_;
+		    }
+		    $comment =~ s/\n/ /g;
+		    $comment =~ s/  +/ /g;
+		    $annotation->add_Annotation('comment',
+						Bio::Annotation::Comment->new(-text => $comment,
+									      -tagname => 'comment'));
+		    $buffer = $_;
+		} else {
+		    while(defined($buffer = $self->_readline())) {
+			last if substr($buffer,0,1) ne ' ';
+		    }
+		}
+		next;
+	    }
+	    # Corresponding Genbank nucleotide id, Genpept only
+	    elsif( /^DBSOURCE\s+(.+)/ ) {
+		if ($annotation) {
+		    my $dbsource = $1;
+		    while (defined($_ = $self->_readline)) {
+			last if (/^\S/);
+			$dbsource .= $_;
+		    }
+				# deal with swissprot dbsources
+		    if( $dbsource =~ s/swissprot:\s+locus\s+(\S+)\,.+\n// ) {
+			$annotation->add_Annotation
+			    ('dblink',
+			     Bio::Annotation::DBLink->new
+			     (-primary_id => $1,
+			      -database => 'swissprot',
+			      -tagname => 'dblink'));
+			if( $dbsource =~ s/\s+created:\s+([^\.]+)\.\n// ) {
+			    $annotation->add_Annotation
+				('swissprot_dates',
+				 Bio::Annotation::SimpleValue->new
+				 (-tagname => 'date_created',
+				  -value => $1));
+			}
+			while( $dbsource =~ s/\s+(sequence|annotation)\s+updated:\s+([^\.]+)\.\n//g ) {
+			    $annotation->add_Annotation
+				('swissprot_dates',
+				 Bio::Annotation::SimpleValue->new
+				 (-tagname => 'date_updated',
+				  -value => $1));
+			}
+			$dbsource =~ s/\n/ /g;
+			if( $dbsource =~ s/\s+xrefs:\s+((?:\S+,\s+)+\S+)\s+xrefs/xrefs/ ) {
+			    # will use $i to determine even or odd
+			    # for swissprot the accessions are paired
+			    my $i = 0;
+			    for my $dbsrc ( split(/,\s+/,$1) ) {
+				if( $dbsrc =~ /(\S+)\.(\d+)/ ||
+				    $dbsrc =~ /(\S+)/ ) {
+				    my ($id,$version) = ($1,$2);
+				    $version ='' unless defined $version;
+				    my $db;
+				    if( $id =~ /^\d\S{3}/) {
+					$db = 'PDB';
+				    } else {
+					$db = ($i++ % 2 ) ? 'GenPept' : 'GenBank';
+				    }
+				    $annotation->add_Annotation
+					('dblink',
+					 Bio::Annotation::DBLink->new
+					 (-primary_id => $id,
+					  -version => $version,
+					  -database => $db,
+					  -tagname => 'dblink'));
+				}
+			    }
+			} elsif( $dbsource =~ s/\s+xrefs:\s+(.+)\s+xrefs/xrefs/i ) {
+			    # download screwed up and ncbi didn't put acc in for gi numbers
+			    my $i = 0;
+			    for my $id ( split(/\,\s+/,$1) ) {
+				my ($acc,$db);
+				if( $id =~ /gi:\s+(\d+)/ ) {
+				    $acc= $1;
+				    $db = ($i++ % 2 ) ? 'GenPept' : 'GenBank';
+				} elsif( $id =~ /pdb\s+accession\s+(\S+)/ ) {
+				    $acc= $1;
+				    $db = 'PDB';
+				} else {
+				    $acc= $id;
+				    $db = '';
+				}
+				$annotation->add_Annotation
+				    ('dblink',
+				     Bio::Annotation::DBLink->new
+				     (-primary_id => $acc,
+				      -database => $db,
+				      -tagname => 'dblink'));
+			    }
+			} else {
+			    $self->debug("Cannot match $dbsource\n");
+			}
+			if( $dbsource =~ s/xrefs\s+\(non\-sequence\s+databases\):\s+
+			    ((?:\S+,\s+)+\S+)//x ) {
+			    for my $id ( split(/\,\s+/,$1) ) {
+				my $db;
+				# this is because GenBank dropped the spaces!!!
+				# I'm sure we're not going to get this right
+				if( $id =~ s/^(EchoBASE|IntAct|SWISS-2DPAGE|ECO2DBASE|ECOGENE|TIGRFAMs|TIGR|GO|InterPro|Pfam|PROSITE|SGD|GermOnline|HSSP|PhosSite)://i ) {
+				    $db = $1;
+				}
+				$annotation->add_Annotation
+				    ('dblink',
+				     Bio::Annotation::DBLink->new
+				     (-primary_id => $id,
+				      -database => $db,
+				      -tagname => 'dblink'));
+			    }
+			}
+
+		    } else {
+			if( $dbsource =~ /(\S+)\.(\d+)/ ) {
+			    my ($id,$version) = ($1,$2);
+			    $annotation->add_Annotation
+				('dblink',
+				 Bio::Annotation::DBLink->new
+				 (-primary_id => $id,
+				  -version => $version,
+				  -database => 'GenBank',
+				  -tagname => 'dblink'));
+			}
+		    }
+
+		    $buffer = $_;
+		} else {
+		    while(defined($buffer = $self->_readline())) {
+			last if substr($buffer,0,1) ne ' ';
+		    }
+		}
+		next;
+	    }
+	    # Exit at start of Feature table, or start of sequence
+	    last if( /^(FEATURES|ORIGIN)/ );
+	    # Get next line and loop again
+	    $buffer = $self->_readline;
+	}
+	return unless defined $buffer;
+
+	# add them all at once for efficiency
+	$builder->add_slot_value(-accession_number => shift(@acc),
+				 -secondary_accessions => \@acc,
+				 %params);
+	$builder->add_slot_value(-annotation => $annotation) if $annotation;
+	%params = (); # reset before possible re-use to avoid setting twice
+
+	# start over if we don't want to continue with this entry
+	if(! $builder->want_object()) {
+	    $builder->make_object();
+	    next RECORDSTART;
+	}
+	# some "minimal" formats may not necessarily have a feature table
+	if($builder->want_slot('features') && defined($_) && /^FEATURES/o) {
+	    # need to read the first line of the feature table
+	    $buffer = $self->_readline;
+	    # DO NOT read lines in the while condition -- this is done as a side
+	    # effect in _read_FTHelper_GenBank!
+	    while( defined($buffer) ) {
+				# check immediately -- not at the end of the loop
+				# note: GenPept entries obviously do not have a BASE line
+		last if( $buffer =~ /^BASE|ORIGIN|CONTIG|WGS/o);
+
+				# slurp in one feature at a time -- at return, the start of
+				# the next feature will have been read already, so we need
+				# to pass a reference, and the called method must set this
+				# to the last line read before returning
+
+		my $ftunit = $self->_read_FTHelper_GenBank(\$buffer);
+
+				# fix suggested by James Diggans
+
+		if( !defined $ftunit ) {
+		    # GRRRR. We have fallen over. Try to recover
+		    $self->warn("Unexpected error in feature table for ".$params{'-display_id'}." Skipping feature, attempting to recover");
+		    unless( ($buffer =~ /^\s{5,5}\S+/o) or
+			    ($buffer =~ /^\S+/o)) {
+			$buffer = $self->_readline();
+		    }
+		    next;	# back to reading FTHelpers
+		}
+
+				# process ftunit
+		my $feat =
+		    $ftunit->_generic_seqfeature($self->location_factory(),
+						 $display_id);
+				# add taxon_id from source if available
+		if($species && ($feat->primary_tag eq 'source') &&
+		   $feat->has_tag('db_xref') && (! $species->ncbi_taxid() ||
+						 ($species->ncbi_taxid && $species->ncbi_taxid =~ /^list/))) {
+		    foreach my $tagval ($feat->get_tag_values('db_xref')) {
+			if(index($tagval,"taxon:") == 0) {
+			    $species->ncbi_taxid(substr($tagval,6));
+			    last;
+			}
+		    }
+		}
+				# add feature to list of features
+		push(@features, $feat);
+	    }
+	    $builder->add_slot_value(-features => \@features);
+	    $_ = $buffer;
+	}
+	if( defined ($_) ) {
+	    if( /^CONTIG/o ) {
+		my @contig;
+		while($_ !~ m{^//}) { # end of file
+		    $_ =~ /^(?:CONTIG)?\s+(.*)/;
+		    $annotation->add_Annotation(
+						Bio::Annotation::SimpleValue->new(-value   => $1,
+										  -tagname => 'CONTIG'));
+		    $_ = $self->_readline;
+		}
+		$self->_pushback($_);
+	    } elsif( /^WGS|WGS_SCAFLD\s+/o ) { # catch WGS/WGS_SCAFLD lines
+		while($_ =~ s/(^WGS|WGS_SCAFLD)\s+//){ # gulp lines
+		    chomp;
+		    $annotation->add_Annotation(
+						Bio::Annotation::SimpleValue->new(-value => $_,
+										  -tagname => $1));
+		    $_ = $self->_readline;
+		}
+	    } elsif(! m{^(ORIGIN|//)} ) { # advance to the sequence, if any
+		while (defined( $_ = $self->_readline) ) {
+		    last if m{^(ORIGIN|//)};
+		}
+	    }
+	}
+	if(! $builder->want_object()) {
+	    $builder->make_object(); # implicit end-of-object
+	    next RECORDSTART;
+	}
+	if($builder->want_slot('seq')) {
+	    # the fact that we want a sequence does not necessarily mean that
+	    # there also is a sequence ...
+	    if(defined($_) && s/^ORIGIN\s+//) {
+		chomp;
+		if( $annotation && length($_) > 0 ) {
+		    $annotation->add_Annotation('origin',
+						Bio::Annotation::SimpleValue->new(-tagname => 'origin',
+										  -value => $_));
+		}
+		my $seqc = '';
+		while( defined($_ = $self->_readline) ) {
+		    m{^//} && last;
+		    $_ = uc($_);
+		    s/[^A-Za-z]//g;
+		    $seqc .= $_;
+		}
+		$self->debug("sequence length is ". length($seqc) ."\n");
+		$builder->add_slot_value(-seq => $seqc);
+	    }
+	} elsif ( defined($_) && (substr($_,0,2) ne '//')) {
+	    # advance to the end of the record
+	    while( defined($_ = $self->_readline) ) {
+		last if substr($_,0,2) eq '//';
+	    }
+	}
+	# Unlikely, but maybe the sequence is so weird that we don't want it
+	# anymore. We don't want to return undef if the stream's not exhausted
+	# yet.
+	$seq = $builder->make_object();
+	next RECORDSTART unless $seq;
+	last RECORDSTART;
+    }				# end while RECORDSTART
+
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object (must be seq) to the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of 1 to n Bio::SeqI objects
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seqs) = @_;
+
+    foreach my $seq ( @seqs ) {
+	$self->throw("Attempting to write with no seq!") unless defined $seq;
+
+	if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
+	    $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
+	}
+
+	my $str = $seq->seq;
+
+	my ($div, $mol);
+	my $len = $seq->length();
+
+	if ( $seq->can('division') ) {
+	    $div=$seq->division;
+	}
+	if( !defined $div || ! $div ) { $div = 'UNK'; }
+	my $alpha = $seq->alphabet;
+	if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) ) {
+	    $mol =  $alpha || 'DNA';
+	}
+
+	my $circular = 'linear  ';
+	$circular = 'circular' if $seq->is_circular;
+
+	local($^W) = 0;	# supressing warnings about uninitialized fields.
+
+	my $temp_line;
+	if( $self->_id_generation_func ) {
+	    $temp_line = &{$self->_id_generation_func}($seq);
+	} else {
+	    my $date = '';
+	    if( $seq->can('get_dates') ) {
+		($date) = $seq->get_dates();
+	    }
+
+	    $self->warn("No whitespace allowed in GenBank display id [". $seq->display_id. "]")
+		if $seq->display_id =~ /\s/;
+
+	    $temp_line = sprintf ("%-12s%-15s%13s %s%4s%-8s%-8s %3s %-s",
+				  'LOCUS', $seq->id(),$len,
+				  (lc($alpha) eq 'protein') ? ('aa','', '') :
+				  ('bp', '',$mol),$circular,
+				  $div,$date);
+	}
+
+	$self->_print("$temp_line\n");
+	$self->_write_line_GenBank_regex("DEFINITION  ", "            ",
+					 $seq->desc(),"\\s\+\|\$",80);
+
+	# if there, write the accession line
+
+	if( $self->_ac_generation_func ) {
+	    $temp_line = &{$self->_ac_generation_func}($seq);
+	    $self->_print("ACCESSION   $temp_line\n");
+	} else {
+	    my @acc = ();
+	    push(@acc, $seq->accession_number());
+	    if( $seq->isa('Bio::Seq::RichSeqI') ) {
+		push(@acc, $seq->get_secondary_accessions());
+	    }
+	    $self->_print("ACCESSION   ", join(" ", @acc), "\n");
+	    # otherwise - cannot print <sigh>
+	}
+
+	# if PID defined, print it
+	if($seq->isa('Bio::Seq::RichSeqI') && $seq->pid()) {
+	    $self->_print("PID         ", $seq->pid(), "\n");
+	}
+
+	# if there, write the version line
+
+	if( defined $self->_sv_generation_func() ) {
+	    $temp_line = &{$self->_sv_generation_func}($seq);
+	    if( $temp_line ) {
+		$self->_print("VERSION     $temp_line\n");
+	    }
+	} else {
+	    if($seq->isa('Bio::Seq::RichSeqI') && defined($seq->seq_version)) {
+		my $id = $seq->primary_id(); # this may be a GI number
+		$self->_print("VERSION     ",
+			      $seq->accession_number(), ".", $seq->seq_version,
+			      ($id && ($id =~ /^\d+$/) ? "  GI:".$id : ""),
+			      "\n");
+	    }
+	}
+
+	# if there, write the DBSOURCE line
+	foreach my $ref ( $seq->annotation->get_Annotations('dblink') ) {
+	    # if ($ref->comment eq 'DBSOURCE') {
+	    $self->_print('DBSOURCE    accession ',
+			  $ref->primary_id, "\n");
+	    # }
+	}
+
+	# if there, write the keywords line
+	if( defined $self->_kw_generation_func() ) {
+	    $temp_line = &{$self->_kw_generation_func}($seq);
+	    $self->_print("KEYWORDS    $temp_line\n");
+	} else {
+	    if( $seq->can('keywords') ) {
+		my $kw = $seq->keywords;
+		$kw .= '.' if( $kw !~ /\.$/ );
+		$self->_print("KEYWORDS    $kw\n");
+	    }
+	}
+
+	# SEGMENT if it exists
+	foreach my $ref ( $seq->annotation->get_Annotations('segment') ) {
+	    $self->_print(sprintf ("%-11s %s\n",'SEGMENT',
+				   $ref->value));
+	}
+
+	# Organism lines
+	if (my $spec = $seq->species) {
+	    my ($on, $sn, $cn) = ($spec->organelle,
+				  $spec->scientific_name,
+				  $spec->common_name);
+
+	    my $abname = $spec->name('abbreviated') ? # from genbank file
+		$spec->name('abbreviated')->[0] : $sn;
+	    my $sl = $on ? "$on "            : '';
+	    $sl   .= $cn ? $abname." ($cn)." : "$abname.";
+
+	    $self->_write_line_GenBank_regex("SOURCE      ", ' 'x12, $sl, "\\s\+\|\$",80);
+	    $self->_print("  ORGANISM  ", $spec->scientific_name, "\n");
+	    my @classification = $spec->classification;
+	    shift(@classification);
+	    my $OC = join('; ', (reverse(@classification))) .'.';
+	    $self->_write_line_GenBank_regex(' 'x12,' 'x12,
+					     $OC,"\\s\+\|\$",80);
+	}
+
+	# Reference lines
+	my $count = 1;
+	foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
+	    $temp_line = "REFERENCE   $count";
+	    if ($ref->start) {
+                $temp_line .= sprintf ("  (%s %d to %d)",
+                                       ($seq->alphabet() eq "protein" ?
+                                        "residues" : "bases"),
+                                       $ref->start,$ref->end);
+            } elsif ($ref->gb_reference) {
+                $temp_line .= sprintf ("  (%s)", $ref->gb_reference);
+            }
+	    $self->_print("$temp_line\n");
+	    $self->_write_line_GenBank_regex("  AUTHORS   ",' 'x12,
+					     $ref->authors,"\\s\+\|\$",80);
+	    $self->_write_line_GenBank_regex("  CONSRTM   ",' 'x12,
+					     $ref->consortium,"\\s\+\|\$",80) if $ref->consortium;
+	    $self->_write_line_GenBank_regex("  TITLE     "," "x12,
+					     $ref->title,"\\s\+\|\$",80);
+	    $self->_write_line_GenBank_regex("  JOURNAL   "," "x12,
+					     $ref->location,"\\s\+\|\$",80);
+	    if( $ref->medline) {
+		$self->_write_line_GenBank_regex("  MEDLINE   "," "x12,
+						 $ref->medline, "\\s\+\|\$",80);
+		# I am assuming that pubmed entries only exist when there
+		# are also MEDLINE entries due to the indentation
+	    }
+	    # This could be a wrong assumption
+	    if( $ref->pubmed ) {
+		$self->_write_line_GenBank_regex("   PUBMED   "," "x12,
+						 $ref->pubmed, "\\s\+\|\$",
+						 80);
+	    }
+	    # put remark at the end
+	    if ($ref->comment) {
+		$self->_write_line_GenBank_regex("  REMARK    "," "x12,
+						 $ref->comment,"\\s\+\|\$",80);
+	    }
+	    $count++;
+	}
+
+	# Comment lines
+	foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
+	    $self->_write_line_GenBank_regex("COMMENT     "," "x12,
+					     $comment->text,"\\s\+\|\$",80);
+	}
+	$self->_print("FEATURES             Location/Qualifiers\n");
+
+	if( defined $self->_post_sort ) {
+	    # we need to read things into an array. Process. Sort them. Print 'em
+
+	    my $post_sort_func = $self->_post_sort();
+	    my @fth;
+
+	    foreach my $sf ( $seq->top_SeqFeatures ) {
+		push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
+	    }
+
+	    @fth = sort { &$post_sort_func($a,$b) } @fth;
+
+	    foreach my $fth ( @fth ) {
+		$self->_print_GenBank_FTHelper($fth);
+	    }
+	} else {
+	    # not post sorted. And so we can print as we get them.
+	    # lower memory load...
+
+	    foreach my $sf ( $seq->top_SeqFeatures ) {
+		my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
+		foreach my $fth ( @fth ) {
+		    if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
+			$sf->throw("Cannot process FTHelper... $fth");
+		    }
+		    $self->_print_GenBank_FTHelper($fth);
+		}
+	    }
+	}
+
+	# deal with WGS; WGS_SCAFLD present only if WGS is also present
+	if($seq->annotation->get_Annotations('WGS')) {
+	    foreach my $wgs
+		(map {$seq->annotation->get_Annotations($_)} qw(WGS WGS_SCAFLD)) {
+		    $self->_print(sprintf ("%-11s %s\n",$wgs->tagname,
+					   $wgs->value));
+		}
+	    $self->_show_dna(0);
+	}
+	if($seq->annotation->get_Annotations('CONTIG')) {
+	    my $ct = 0;
+	    my $cline;
+	    foreach my $contig ($seq->annotation->get_Annotations('CONTIG')) {
+		unless ($ct) {
+		    $cline = $contig->tagname."      ".$contig->value."\n";
+		} else {
+		    $cline = "            ".$contig->value."\n";
+		}
+		$self->_print($cline);
+		$ct++;
+	    }
+	    $self->_show_dna(0);
+	}
+	if( $seq->length == 0 ) { $self->_show_dna(0) }
+
+	if( $self->_show_dna() == 0 ) {
+	    $self->_print("\n//\n");
+	    return;
+	}
+
+	# finished printing features.
+
+	$str =~ tr/A-Z/a-z/;
+
+	# Count each nucleotide
+	unless(  $mol eq 'protein' ) {
+	    my $alen = $str =~ tr/a/a/;
+	    my $clen = $str =~ tr/c/c/;
+	    my $glen = $str =~ tr/g/g/;
+	    my $tlen = $str =~ tr/t/t/;
+
+	    my $olen = $len - ($alen + $tlen + $clen + $glen);
+	    if( $olen < 0 ) {
+		$self->warn("Weird. More atgc than bases. Problem!");
+	    }
+
+	    my $base_count = sprintf("BASE COUNT %8s a %6s c %6s g %6s t%s\n",
+				     $alen,$clen,$glen,$tlen,
+				     ( $olen > 0 ) ?
+				     sprintf("%6s others",$olen) : '');
+	    $self->_print($base_count);
+	}
+
+	my ($o) = $seq->annotation->get_Annotations('origin');
+	$self->_print(sprintf("%-12s%s\n",
+			      'ORIGIN', $o ? $o->value : ''));
+	# print out the sequence
+	my $nuc = 60;		# Number of nucleotides per line
+	my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line
+	my $out_pat   = 'A11' x 6; # Pattern for packing a line
+	my $length = length($str);
+
+	# Calculate the number of nucleotides which fit on whole lines
+	my $whole = int($length / $nuc) * $nuc;
+
+	# Print the whole lines
+	my $i;
+	for ($i = 0; $i < $whole; $i += $nuc) {
+	    my $blocks = pack $out_pat,
+	    unpack $whole_pat,
+	    substr($str, $i, $nuc);
+	    chop $blocks;
+	    $self->_print(sprintf("%9d $blocks\n", $i + $nuc - 59));
+	}
+
+	# Print the last line
+	if (my $last = substr($str, $i)) {
+	    my $last_len = length($last);
+	    my $last_pat = 'a10' x int($last_len / 10) .
+		'a'. $last_len % 10;
+	    my $blocks = pack $out_pat,
+	    unpack($last_pat, $last);
+	    $blocks =~ s/ +$//;
+	    $self->_print(sprintf("%9d $blocks\n",
+				  $length - $last_len + 1));
+	}
+
+	$self->_print("//\n");
+
+	$self->flush if $self->_flush_on_write && defined $self->_fh;
+	return 1;
+    }
+}
+
+=head2 _print_GenBank_FTHelper
+
+ Title   : _print_GenBank_FTHelper
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _print_GenBank_FTHelper {
+    my ($self,$fth) = @_;
+
+    if( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) {
+	$fth->warn("$fth is not a FTHelper class. Attempting to print, but there could be tears!");
+    }
+    $self->_write_line_GenBank_regex(sprintf("     %-16s",$fth->key),
+				     " "x21,
+				     $fth->loc,"\,\|\$",80);
+    foreach my $tag ( keys %{$fth->field} ) {
+	foreach my $value ( @{$fth->field->{$tag}} ) {
+	    $value =~ s/\"/\"\"/g;
+	    if ($value eq "_no_value") {
+		$self->_write_line_GenBank_regex(" "x21,
+						 " "x21,
+						 "/$tag","\.\|\$",80);
+	    }
+	    # there are almost 3x more quoted qualifier values and they
+	    # are more common too so we take quoted ones first
+	    elsif (!$FTQUAL_NO_QUOTE{$tag}) {
+		my ($pat) = ($value =~ /\s/ ? '\s|$' : '.|$');
+		$self->_write_line_GenBank_regex(" "x21,
+						 " "x21,
+						 "/$tag=\"$value\"",$pat,80);
+
+	    } else {
+		$self->_write_line_GenBank_regex(" "x21,
+						 " "x21,
+						 "/$tag=$value","\.\|\$",80);
+	    }
+	}
+    }
+
+}
+
+
+=head2 _read_GenBank_References
+
+ Title   : _read_GenBank_References
+ Usage   :
+ Function: Reads references from GenBank format. Internal function really
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _read_GenBank_References {
+    my ($self,$buffer) = @_;
+    my (@refs);
+    my $ref;
+
+    # assumme things are starting with RN
+
+    if( $$buffer !~ /^REFERENCE/ ) {
+	warn("Not parsing line '$$buffer' which maybe important");
+    }
+
+    $_ = $$buffer;
+
+    my (@title, at loc, at authors, at consort, at com, at medline, at pubmed);
+
+  REFLOOP: while( defined($_) || defined($_ = $self->_readline) ) {
+      if (/^\s{2}AUTHORS\s+(.*)/o) {
+	  push (@authors, $1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/o && do { push (@authors, $1);next;};
+	      last;
+	  }
+	  $ref->authors(join(' ', @authors));
+      }
+      if (/^\s{2}CONSRTM\s+(.*)/o) {
+	  push (@consort, $1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/o && do { push (@consort, $1);next;};
+	      last;
+	  }
+	  $ref->consortium(join(' ', @consort));
+      }
+      if (/^\s{2}TITLE\s+(.*)/o)  {
+	  push (@title, $1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/o && do { push (@title, $1);
+				     next;
+				 };
+	      last;
+	  }
+	  $ref->title(join(' ', @title));
+      }
+      if (/^\s{2}JOURNAL\s+(.*)/o) {
+	  push(@loc, $1);
+	  while ( defined($_ = $self->_readline) ) {
+	      # we only match when there are at least 4 spaces
+	      # there is probably a better way to match this
+	      # as it assumes that the describing tag is short enough
+	      /^\s{9,}(.*)/o && do { push(@loc, $1);
+				     next;
+				 };
+	      last;
+	  }
+	  $ref->location(join(' ', @loc));
+	  redo REFLOOP;
+      }
+      if (/^\s{2}REMARK\s+(.*)/o) {
+	  push (@com, $1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/o && do { push(@com, $1);
+				     next;
+				 };
+	      last;
+	  }
+	  $ref->comment(join(' ', @com));
+	  redo REFLOOP;
+      }
+      if( /^\s{2}MEDLINE\s+(.*)/ ) {
+	  push(@medline,$1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/ && do { push(@medline, $1);
+				    next;
+				};
+	      last;
+	  }
+	  $ref->medline(join(' ', @medline));
+	  redo REFLOOP;
+      }
+      if( /^\s{3}PUBMED\s+(.*)/ ) {
+	  push(@pubmed,$1);
+	  while ( defined($_ = $self->_readline) ) {
+	      /^\s{9,}(.*)/ && do { push(@pubmed, $1);
+				    next;
+				};
+	      last;
+	  }
+	  $ref->pubmed(join(' ', @pubmed));
+	  redo REFLOOP;
+      }
+
+      /^REFERENCE/o && do {
+	  # store current reference
+	  $self->_add_ref_to_array(\@refs,$ref) if $ref;
+	  # reset
+	  @authors = ();
+	  @title = ();
+	  @loc = ();
+	  @com = ();
+	  @pubmed = ();
+	  @medline = ();
+	  # create the new reference object
+	  $ref = Bio::Annotation::Reference->new(-tagname => 'reference');
+	  # check whether start and end base is given
+	  if (/^REFERENCE\s+\d+\s+\([a-z]+ (\d+) to (\d+)\)/){
+	      $ref->start($1);
+	      $ref->end($2);
+	  } elsif (/^REFERENCE\s+\d+\s+\((.*)\)/) {
+	      $ref->gb_reference($1);
+	  }
+      };
+
+      /^(FEATURES)|(COMMENT)/o && last;
+
+      $_ = undef;	       # Empty $_ to trigger read of next line
+  }
+
+    # store last reference
+    $self->_add_ref_to_array(\@refs,$ref) if $ref;
+
+    $$buffer = $_;
+
+    #print "\nnumber of references found: ", $#refs+1,"\n";
+
+    return @refs;
+}
+
+#
+# This is undocumented as it shouldn't be called by anywhere else as
+# read_GenBank_References. For those who still want to know:
+#
+# Purpose: adds a Reference object to an array of Reference objects, takes
+#     care of possible cleanups to be done (currently, only author and title
+#     will be chopped of trailing semicolons).
+# Parameters:
+#     a reference to an array of Reference objects
+#     the Reference object to be added
+# Returns: nothing
+#
+sub _add_ref_to_array {
+    my ($self, $refs, $ref) = @_;
+
+    # first, polish author and title by removing possible trailing semicolons
+    my $au = $ref->authors();
+    my $title = $ref->title();
+    $au =~ s/;\s*$//g if $au;
+    $title =~ s/;\s*$//g if $title;
+    $ref->authors($au);
+    $ref->title($title);
+    # the rest should be clean already, so go ahead and add it
+    push(@{$refs}, $ref);
+}
+
+=head2 _read_GenBank_Species
+
+ Title   : _read_GenBank_Species
+ Usage   :
+ Function: Reads the GenBank Organism species and classification
+           lines. Able to deal with unconvential Organism naming
+           formats, and varietas in plants
+ Example : ORGANISM  unknown marine gamma proteobacterium NOR5
+           $genus = undef
+           $species = unknown marine gamma proteobacterium NOR5
+
+           ORGANISM  Drosophila sp. 'white tip scutellum'
+           $genus = Drosophila
+           $species = sp. 'white tip scutellum'
+           (yes, this really is a species and that is its name)
+           $subspecies = undef
+
+           ORGANISM  Ajellomyces capsulatus var. farciminosus
+           $genus = Ajellomyces
+           $species = capsulatus
+           $subspecies = var. farciminosus
+
+           ORGANISM  Hepatitis delta virus
+           $genus = undef (though this virus has a genus in its lineage, we
+                           cannot know that without a database lookup)
+           $species = Hepatitis delta virus
+
+ Returns : A Bio::Species object
+ Args    : A reference to the current line buffer
+
+=cut
+
+sub _read_GenBank_Species {
+    my ($self, $buffer) = @_;
+
+    my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown',
+		      'Unspecified', 'Unknown', 'None', 'unclassified',
+		      'unidentified organism', 'not supplied');
+    # dictionary of synonyms for taxid 32644
+    my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
+    # all above can be part of valid species name
+
+    $_ = $$buffer;
+
+    my( $sub_species, $species, $genus, $sci_name, $common, $class_lines,
+        $source_flag, $abbr_name, $organelle, $sl );
+    # upon first entering the loop, we must not read a new line -- the SOURCE
+    # line is already in the buffer (HL 05/10/2000)
+    while (defined($_) || defined($_ = $self->_readline())) {
+	# de-HTMLify (links that may be encountered here don't contain
+	# escaped '>', so a simple-minded approach suffices)
+	s/<[^>]+>//g;
+	if ( /^SOURCE\s+(.*)/o ) {
+	    $sl = $1;
+	    $sl =~ s/\.$//;	# remove trailing dot
+	    $source_flag = 1;
+	} elsif ( /^\s{2}ORGANISM/o ) {
+	    $source_flag = 0;
+	    ($sci_name) = $_ =~ /\w+\s+(.*)/o;
+	} elsif ($source_flag) {
+	    $sl .= $_;
+	    $sl =~ s/\n//g;
+	    $sl =~ s/\s+/ /g;
+	    $source_flag = 0;
+	} elsif ( /^\s+(.+)/o ) {
+	    my $line = $1;
+            # if first line doesn't end in ; or ., it is part of a long
+            # organism line
+            if ($line !~ /[;\.]$/) {
+                $sci_name .= ' '.$line;
+            }
+            else {
+                $class_lines .= $line;
+            }
+	} else {
+	    last;
+	}
+
+	$_ = undef;	       # Empty $_ to trigger read of next line
+    }
+    $$buffer = $_;
+
+    # parse out organelle, common name, abbreviated name if present;
+    # this should catch everything, but falls back to
+    # entire SOURCE line just in case
+    if ($sl =~ m{^
+		     (mitochondrion|chloroplast|plastid)?
+		     \s*(.*?)
+		     \s*(?: \( (.*?) \) )?\.?
+		     $ 
+		 }xms ){ 
+        ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional
+    } else {
+        $abbr_name = $sl;	# nothing caught; this is a backup!
+    }
+
+    $sci_name || return;
+
+    # Convert data in classification lines into classification array.
+    # only split on ';' or '.' so that classification that is 2 or more words will
+    # still get matched, use map() to remove trailing/leading/intervening spaces
+    my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /[;\.]+/, $class_lines;
+
+    # do we have a genus?
+    my $possible_genus = $class[-1];
+    $possible_genus .= "|$class[-2]" if $class[-2];
+    if ($sci_name =~ /^($possible_genus)/) {
+	$genus = $1;
+	($species) = $sci_name =~ /^$genus\s+(.+)/;
+    }
+    else {
+	$species = $sci_name;
+    }
+
+    # is this organism of rank species or is it lower?
+    # (we don't catch everything lower than species, but it doesn't matter -
+    # this is just so we abide by previous behaviour whilst not calling a
+    # species a subspecies)
+    if ($species =~ /subsp\.|var\./) {
+	($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
+    }
+
+    # Don't make a species object if it's empty or "Unknown" or "None"
+    # return unless $genus and  $genus !~ /^(Unknown|None)$/oi;
+    # Don't make a species object if it belongs to taxid 32644
+#	my $unkn = grep { $_ =~ /^\Q$sl\E$/; } @unkn_names;
+    my $unkn = grep { $_ eq $sl } @unkn_names;
+    return unless ($species || $genus) and $unkn == 0;
+
+    # Bio::Species array needs array in Species -> Kingdom direction
+    push(@class, $sci_name);
+    @class = reverse @class;
+
+    my $make = Bio::Species->new();
+    $make->scientific_name($sci_name);
+    $make->classification(@class) if @class > 0;
+    $make->common_name( $common ) if $common;
+    $make->name('abbreviated', $abbr_name) if $abbr_name;
+    $make->organelle($organelle) if $organelle;
+    #$make->sub_species( $sub_species ) if $sub_species;
+    return $make;
+}
+
+=head2 _read_FTHelper_GenBank
+    
+ Title   : _read_FTHelper_GenBank
+ Usage   : _read_FTHelper_GenBank($buffer)
+ Function: reads the next FT key line
+ Example :
+ Returns : Bio::SeqIO::FTHelper object
+ Args    : filehandle and reference to a scalar
+
+=cut
+
+sub _read_FTHelper_GenBank {
+    my ($self,$buffer) = @_;
+
+    my ($key,			# The key of the feature
+	$loc			# The location line from the feature
+	);
+    my @qual = ();	  # An array of lines making up the qualifiers
+
+    if ($$buffer =~ /^\s{5}(\S+)\s+(.+?)\s*$/o) {
+	$key = $1;
+	$loc = $2;
+	# Read all the lines up to the next feature
+	while ( defined($_ = $self->_readline) ) {
+	    if (/^(\s+)(.+?)\s*$/o) {
+				# Lines inside features are preceded by 21 spaces
+				# A new feature is preceded by 5 spaces
+		if (length($1) > 6) {
+		    # Add to qualifiers if we're in the qualifiers, or if it's
+		    # the first qualifier
+		    if (@qual || (index($2,'/') == 0)) {
+			push(@qual, $2);
+		    }
+		    # We're still in the location line, so append to location
+		    else {
+			$loc .= $2;
+		    }
+		} else {
+		    # We've reached the start of the next feature
+		    last;
+		}
+	    } else {
+				# We're at the end of the feature table
+		last;
+	    }
+	}
+    } else {
+	# No feature key
+	$self->debug("no feature key!\n");
+	# change suggested by JDiggans to avoid infinite loop-
+	# see bugreport 1062.
+	# reset buffer to prevent infinite loop
+	$$buffer = $self->_readline();
+	return;
+    }
+
+    # Put the first line of the next feature into the buffer
+    $$buffer = $_;
+
+    # Make the new FTHelper object
+    my $out = new Bio::SeqIO::FTHelper();
+    $out->verbose($self->verbose());
+    $out->key($key);
+    $out->loc($loc);
+
+    # Now parse and add any qualifiers.  (@qual is kept
+    # intact to provide informative error messages.)
+  QUAL:
+    for (my $i = 0; $i < @qual; $i++) {
+	$_ = $qual[$i];
+	my( $qualifier, $value ) = (m{^/([^=]+)(?:=(.+))?})
+	    or $self->warn("cannot see new qualifier in feature $key: ".
+			   $qual[$i]);
+	$qualifier = '' unless( defined $qualifier);
+	if (defined $value) {
+	    # Do we have a quoted value?
+	    if (substr($value, 0, 1) eq '"') {
+				# Keep adding to value until we find the trailing quote
+				# and the quotes are balanced
+		while ($value !~ /\"$/ or $value =~ tr/"/"/ % 2) {
+		    if($i >= $#qual) {
+			$self->warn("Unbalanced quote in:\n" .
+				    join("\n", @qual) .
+				    "No further qualifiers will " .
+				    "be added for this feature");
+			last QUAL;
+		    }
+		    $i++; # modifying a for-loop variable inside of the loop
+		    # is not the best programming style ...
+		    my $next = $qual[$i];
+
+		    # add to value with a space unless the value appears
+		    # to be a sequence (translation for example)
+		    # if(($value.$next) =~ /[^A-Za-z\"\-]/o) {
+		    # changed to explicitly look for translation tag - cjf 06/8/29
+		    if ($qualifier ne 'translation') {
+			$value .= " ";
+		    }
+		    $value .= $next;
+		}
+				# Trim leading and trailing quotes
+		$value =~ s/^"|"$//g;
+				# Undouble internal quotes
+		$value =~ s/""/\"/g;
+	    } elsif ( $value =~ /^\(/ ) { # values quoted by ()s
+				# Keep adding to value until we find the trailing bracket
+				# and the ()s are balanced
+		my $left = ($value =~ tr/\(/\(/); # count left parens
+		my $right = ($value =~ tr/\)/\)/); # count right parens
+
+		while( $left != $right ) { # was "$value !~ /\)$/ or $left != $right"
+		    if( $i >= $#qual) {
+			$self->warn("Unbalanced parens in:\n".
+				    join("\n", @qual).
+				    "\nNo further qualifiers will ".
+				    "be added for this feature");
+			last QUAL;
+		    }
+		    $i++;
+		    my $next = $qual[$i];
+		    $value .= $next;
+		    $left += ($next =~ tr/\(/\(/);
+		    $right += ($next =~ tr/\)/\)/);
+		}
+	    }
+	} else {
+	    $value = '_no_value';
+	}
+	# Store the qualifier
+	$out->field->{$qualifier} ||= [];
+	push(@{$out->field->{$qualifier}},$value);
+    }
+    return $out;
+}
+
+=head2 _write_line_GenBank
+
+ Title   : _write_line_GenBank
+ Usage   :
+ Function: internal function
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _write_line_GenBank {
+    my ($self,$pre1,$pre2,$line,$length) = @_;
+
+    $length || $self->throw("Miscalled write_line_GenBank without length. Programming error!");
+    my $subl = $length - length $pre2;
+    my $linel = length $line;
+    my $i;
+
+    my $subr = substr($line,0,$length - length $pre1);
+
+    $self->_print("$pre1$subr\n");
+    for($i= ($length - length $pre1);$i < $linel;  $i += $subl) {
+	$subr = substr($line,$i,$subl);
+	$self->_print("$pre2$subr\n");
+    }
+
+}
+
+=head2 _write_line_GenBank_regex
+
+ Title   : _write_line_GenBank_regex
+ Usage   :
+ Function: internal function for writing lines of specified
+           length, with different first and the next line
+           left hand headers and split at specific points in the
+           text
+ Example :
+ Returns : nothing
+ Args    : file handle, 
+           first header,  
+           second header, 
+           text-line, 
+           regex for line breaks, 
+           total line length
+
+
+=cut
+
+sub _write_line_GenBank_regex {
+    my ($self,$pre1,$pre2,$line,$regex,$length) = @_;
+
+    #print STDOUT "Going to print with $line!\n";
+
+    $length || $self->throw( "Miscalled write_line_GenBank without length. Programming error!");
+
+    my $subl = $length - (length $pre1) - 2;
+    my @lines = ();
+
+  CHUNK: while($line) {
+      foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) {
+	  if($line =~ m/^(.{1,$subl})($pat)(.*)/ ) {
+	      my $l = $1.$2;
+	      my $newl = $3;
+	      $line = substr($line,length($l));
+	      # be strict about not padding spaces according to
+	      # genbank format
+	      $l =~ s/\s+$//;
+	      push(@lines, $l);
+	      next CHUNK;
+	  }
+      }
+      # if we get here none of the patterns matched $subl or less chars
+      $self->warn("trouble dissecting \"$line\"\n     into chunks ".
+		  "of $subl chars or less - this tag won't print right");
+      # insert a space char to prevent infinite loops
+      $line = substr($line,0,$subl) . " " . substr($line,$subl);
+  }
+    my $s = shift @lines;
+    $self->_print("$pre1$s\n") if $s;
+    foreach my $s ( @lines ) {
+	$self->_print("$pre2$s\n");
+    }
+}
+
+=head2 _post_sort
+
+ Title   : _post_sort
+ Usage   : $obj->_post_sort($newval)
+ Function:
+ Returns : value of _post_sort
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _post_sort {
+    my ($obj,$value) = @_;
+    if( defined $value) {
+	$obj->{'_post_sort'} = $value;
+    }
+    return $obj->{'_post_sort'};
+}
+
+    
+=head2 _show_dna
+
+ Title   : _show_dna
+ Usage   : $obj->_show_dna($newval)
+ Function:
+ Returns : value of _show_dna
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _show_dna {
+    my ($obj,$value) = @_;
+    if( defined $value) {
+	$obj->{'_show_dna'} = $value;
+    }
+    return $obj->{'_show_dna'};
+}
+
+=head2 _id_generation_func
+
+ Title   : _id_generation_func
+ Usage   : $obj->_id_generation_func($newval)
+ Function:
+ Returns : value of _id_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _id_generation_func {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->{'_id_generation_func'} = $value;
+    }
+    return $obj->{'_id_generation_func'};
+}
+
+    
+=head2 _ac_generation_func
+
+ Title   : _ac_generation_func
+ Usage   : $obj->_ac_generation_func($newval)
+ Function:
+ Returns : value of _ac_generation_func
+ Args    : newvalue (optional)
+
+=cut
+
+sub _ac_generation_func {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->{'_ac_generation_func'} = $value;
+    }
+    return $obj->{'_ac_generation_func'};
+}
+
+    
+=head2 _sv_generation_func
+
+ Title   : _sv_generation_func
+ Usage   : $obj->_sv_generation_func($newval)
+ Function:
+ Returns : value of _sv_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _sv_generation_func {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->{'_sv_generation_func'} = $value;
+    }
+    return $obj->{'_sv_generation_func'};
+
+}
+
+    
+=head2 _kw_generation_func
+
+ Title   : _kw_generation_func
+ Usage   : $obj->_kw_generation_func($newval)
+ Function:
+ Returns : value of _kw_generation_func
+ Args    : newvalue (optional)
+
+=cut
+
+	
+sub _kw_generation_func {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->{'_kw_generation_func'} = $value;
+    }
+    return $obj->{'_kw_generation_func'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/interpro.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/interpro.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/interpro.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,295 @@
+# $Id: interpro.pm,v 1.9.4.2 2006/10/16 17:08:16 sendu Exp $
+#
+# BioPerl module for interpro
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::interpro - InterProScan XML input/output stream 
+
+=head1 SYNOPSIS
+
+  # do not call this module directly, use Bio::SeqIO
+
+  use strict;
+  use Bio::SeqIO;
+
+  my $io = Bio::SeqIO->new(-format => "interpro",
+                           -file   => $interpro_file);
+
+  while (my $seq = $io->next_seq) {
+    # use the Sequence object
+  }
+
+=head1 DESCRIPTION
+
+L<Bio::SeqIO::interpro> will parse Interpro scan XML (version 1.2) and
+create L<Bio::SeqFeature::Generic> objects based on the contents of the
+XML document.
+
+L<Bio::SeqIO::interpro> will also attach the annotation given in the XML
+file to the L<Bio::SeqFeature::Generic> objects that it creates.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jared Fox
+
+Email jaredfox at ucla.edu
+
+=head1 CONTRIBUTORS
+
+Allen Day allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::interpro;
+use strict;
+use Bio::SeqFeature::Generic;
+use XML::DOM;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::DBLink;
+use base qw(Bio::SeqIO);
+
+my $idcounter = {};  # Used to generate unique id values
+my $nvtoken = ": ";  # The token used if a name/value pair has to be stuffed
+                     # into a single line
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : my $seqobj = $stream->next_seq
+ Function: Retrieves the next sequence from a SeqIO::interpro stream.
+ Returns : A Bio::Seq::RichSeq object
+ Args    : 
+
+=cut
+
+sub next_seq {
+	my $self = shift;
+	my ($desc);
+	my $bioSeq = $self->_sequence_factory->create(-verbose =>$self->verbose());
+
+	my $zinc = "(\"zincins\")";
+	my $wing = "\"Winged helix\"";
+	my $finger = "\"zinc finger\"";
+
+	my $xml_fragment = undef;
+	while(my $line = $self->_readline()){
+
+		my $where = index($line, $zinc);
+		my $wherefinger = index($line, $finger);
+		my $finishedline = $line;
+		my $wingwhere = index($line, $wing);
+
+		# the interpro XML is not fully formed, so we need to convert the 
+		# extra double quotes and ampersands into appropriate XML chracter codes
+		if($where > 0){
+			my @linearray = split /$zinc/, $line;
+			$finishedline = join "&quot;zincins&quot;", $linearray[0], $linearray[2];
+		}
+		if(index($line, "&") > 0){
+			my @linearray = split /&/, $line;
+			$finishedline = join "&amp;", $linearray[0], $linearray[1];
+		}
+		if($wingwhere > 0){
+			my @linearray = split /$wing/, $line;
+			$finishedline = join "&quot;Winged helix&quot;", $linearray[0], $linearray[1];
+		}
+
+		$xml_fragment .= $finishedline;
+		last if $finishedline =~ m!</protein>!;
+	}
+
+	return unless $xml_fragment =~ /<protein/;
+
+	$self->_parse_xml($xml_fragment);
+
+	my $dom = $self->_dom;
+
+	my ($protein_node) = $dom->findnodes('/protein');
+	my @interproNodes = $protein_node->findnodes('/protein/interpro');
+	my @DBNodes = $protein_node->findnodes('/protein/interpro/match');
+	for(my $interpn=0; $interpn<scalar(@interproNodes); $interpn++){
+		my $ipnlevel = join "", "/protein/interpro[", $interpn + 1, "]";
+		my @matchNodes = $protein_node->findnodes($ipnlevel);
+		for(my $match=0; $match<scalar(@matchNodes); $match++){
+			my $matlevel = join "", "/protein/interpro[", $interpn+1, "]/match[", 
+			  $match+1, "]/location";
+			my @locNodes = $protein_node->findnodes($matlevel);
+
+			my @seqFeatures = map { Bio::SeqFeature::Generic->new(
+                  -start => $_->getAttribute('start'), 
+						-end => $_->getAttribute('end'), 
+                  -score => $_->getAttribute('score'), 
+                  -source_tag => 'IPRscan',
+                  -primary_tag => 'region',
+                  -display_name => $interproNodes[$interpn]->getAttribute('name'),
+                  -seq_id => $protein_node->getAttribute('id') ),
+					} @locNodes;
+			foreach my $seqFeature (@seqFeatures){
+				my $annotation1 = Bio::Annotation::DBLink->new;
+				$annotation1->database($matchNodes[$match]->getAttribute('dbname'));
+				$annotation1->primary_id($matchNodes[$match]->getAttribute('id'));
+				$annotation1->comment($matchNodes[$match]->getAttribute('name'));
+				$seqFeature->annotation->add_Annotation('dblink',$annotation1);
+				
+				my $annotation2 = Bio::Annotation::DBLink->new;
+				$annotation2->database('INTERPRO');
+				$annotation2->primary_id($interproNodes[$interpn]->getAttribute('id'));
+				$annotation2->comment($interproNodes[$interpn]->getAttribute('name'));
+				$seqFeature->annotation->add_Annotation('dblink',$annotation2);
+
+				# Bug 1908 (enhancement)
+ 				my $annotation3  = Bio::Annotation::DBLink->new;
+  				$annotation3->database($DBNodes[$interpn]->getAttribute('dbname'));
+  				$annotation3->primary_id($DBNodes[$interpn]->getAttribute('id'));
+  				$annotation3->comment($DBNodes[$interpn]->getAttribute('name'));
+  				$seqFeature->annotation->add_Annotation('dblink',$annotation3);
+			}
+			$bioSeq->add_SeqFeature(@seqFeatures);
+		}
+	}
+	my $accession = $protein_node->getAttribute('id');
+	my $displayname = $protein_node->getAttribute('name');
+	$bioSeq->accession($accession);
+	$bioSeq->display_name($displayname);
+	return $bioSeq;
+}
+
+=head2 _initialize
+
+ Title   : _initialize
+ Usage   : 
+ Function: 
+ Returns :
+ Args    :
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+  # hash for functions for decoding keys.
+  $self->{'_func_ftunit_hash'} = {}; 
+
+  my %param = @args;  # From SeqIO.pm
+  @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+
+  my $line = undef;
+  # fast forward to first <protein/> record.
+  while($line = $self->_readline()){
+    if($line =~ /<protein/){
+      $self->_pushback($line);
+      last;
+    }
+  }
+
+  $self->_xml_parser( XML::DOM::Parser->new() );
+
+  $self->_sequence_factory( new Bio::Seq::SeqFactory
+                           ( -verbose => $self->verbose(),
+                             -type => 'Bio::Seq::RichSeq'))
+    if ( ! defined $self->sequence_factory );
+}
+
+=head2 _sequence_factory
+
+ Title   : _sequence_factory
+ Usage   : 
+ Function: 
+ Returns :
+ Args    :
+
+=cut
+
+sub _sequence_factory {
+  my $self = shift;
+  my $val = shift;
+
+  $self->{'sequence_factory'} = $val if defined($val);
+  return $self->{'sequence_factory'};
+}
+
+=head2 _xml_parser
+
+ Title   : _xml_parser
+ Usage   : 
+ Function: 
+ Returns :
+ Args    :
+
+=cut
+
+sub _xml_parser {
+  my $self = shift;
+  my $val = shift;
+
+  $self->{'xml_parser'} = $val if defined($val);
+  return $self->{'xml_parser'};
+}
+
+=head2 _parse_xml
+
+ Title   : _parse_xml
+ Usage   : 
+ Function: 
+ Returns :
+ Args    :
+
+=cut
+
+sub _parse_xml {
+  my ($self,$xml) = @_;
+  $self->_dom( $self->_xml_parser->parse($xml) );
+  return 1;
+}
+
+=head2 _dom
+
+ Title   : _dom
+ Usage   : 
+ Function: 
+ Returns :
+ Args    :
+
+=cut
+
+sub _dom {
+  my $self = shift;
+  my $val = shift;
+
+  $self->{'dom'} = $val if defined($val);
+  return $self->{'dom'};
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/kegg.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/kegg.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/kegg.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,298 @@
+# $Id: kegg.pm,v 1.18.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::kegg
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Allen Day
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::kegg - KEGG sequence input/output stream
+
+=head1 SYNOPSIS
+
+  # It is probably best not to use this object directly, but
+  # rather go through the SeqIO handler system. Go:
+
+  use Bio::SeqIO;
+
+  $stream = Bio::SeqIO->new(-file => $filename, -format => 'KEGG');
+
+  while ( my $seq = $stream->next_seq() ) {
+	# do something with $seq
+  }
+
+=head1 DESCRIPTION
+
+This class transforms KEGG gene records into Bio::Seq objects.
+
+=head2 Mapping of record properties to object properties
+
+This section is supposed to document which sections and properties of
+a KEGG databank record end up where in the Bioperl object model. It
+is far from complete and presently focuses only on those mappings
+which may be non-obvious. $seq in the text refers to the
+Bio::Seq::RichSeqI implementing object returned by the parser for each
+record.
+
+=over 4
+
+=item 'ENTRY'
+
+ $seq->primary_id
+
+=item 'NAME'
+
+ $seq->display_id
+
+=item 'DEFINITION'
+
+ $seq->annotation->get_Annotations('description');
+
+=item 'ORTHOLOG'
+
+ grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
+
+=item 'CLASS'
+
+ grep {$_->database eq 'PATH'}
+          $seq->annotation->get_Annotations('dblink')
+
+=item 'POSITION'
+
+FIXME, NOT IMPLEMENTED
+
+=item 'PATHWAY'
+
+ for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
+    #
+ }
+
+=item 'DBLINKS'
+
+ $seq->annotation->get_Annotations('dblink')
+
+=item 'CODON_USAGE'
+
+FIXME, NOT IMPLEMENTED
+
+=item 'AASEQ'
+
+ $seq->translate->seq
+
+=item 'NTSEQ'
+
+ $seq-E<gt>seq
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::kegg;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::DBLink;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+	my($self, at args) = @_;
+
+	$self->SUPER::_initialize(@args);
+	# hash for functions for decoding keys.
+	$self->{'_func_ftunit_hash'} = {};
+	if( ! defined $self->sequence_factory ) {
+		$self->sequence_factory(new Bio::Seq::SeqFactory
+										(-verbose => $self->verbose(),
+										 -type => 'Bio::Seq::RichSeq'));
+	}
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::RichSeq object
+ Args    :
+
+=cut
+
+sub next_seq {
+	my ($self, at args) = @_;
+	my $builder = $self->sequence_builder();
+	my $seq;
+	my %params;
+
+	my $buffer;
+	my (@acc, @features);
+	my ($display_id, $annotation);
+	my $species;
+
+	# initialize; we may come here because of starting over
+	@features = ();
+	$annotation = undef;
+	@acc = ();
+	$species = undef;
+	%params = (-verbose => $self->verbose); # reset hash
+	local($/) = "///\n";
+
+	$buffer = $self->_readline();
+
+	return if( !defined $buffer ); # end of file
+	$buffer =~ /^ENTRY/ ||
+	  $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
+
+	my %FIELDS;
+	my @chunks = split /\n(?=\S)/, $buffer;
+
+	foreach my $chunk (@chunks){
+		my($key) = $chunk =~ /^(\S+)/;
+		$FIELDS{$key} = $chunk;
+	}
+
+	# changing to split method to get entry_ids that include
+	# sequence version like Whatever.1
+	my(undef,$entry_id,$entry_seqtype,$entry_species) =
+	  split(' ',$FIELDS{ENTRY});
+
+	my($name);
+	if ($FIELDS{NAME}) {
+          ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
+	}
+
+        my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
+
+        if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
+          ($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
+          $definition =~ s/\s+/ /gs;
+        }
+        if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
+          ($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
+          $aa_seq =~ s/\s+//g;
+        }
+        if(( exists  $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
+          ($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
+          $nt_seq =~ s/\s+//g;
+        }
+
+	$annotation = Bio::Annotation::Collection->new();
+
+	$annotation->add_Annotation('description',
+						Bio::Annotation::Comment->new(-text => $definition));
+
+	$annotation->add_Annotation('aa_seq',
+						Bio::Annotation::Comment->new(-text => $aa_seq));
+
+	my($ortholog_db,$ortholog_id,$ortholog_desc);
+	if ($FIELDS{ORTHOLOG}) {
+		($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
+		  =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
+
+        $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
+                     -database => $ortholog_db,
+                     -primary_id => $ortholog_id,
+                     -comment => $ortholog_desc) );
+  }
+
+  if($FIELDS{MOTIF}){
+     $FIELDS{MOTIF} =~ s/^MOTIF\s+//;
+     while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
+         my $db = $1;
+         my $ids = $2;
+         foreach my $id (split(/\s+/, $ids)){
+
+     $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
+              -database =>$db,
+              -primary_id => $id,
+              -comment => "")   );
+        }
+     }
+  }
+
+  if($FIELDS{PATHWAY}) {
+     $FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
+     while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
+        $annotation->add_Annotation('pathway',
+           Bio::Annotation::Comment->new(-text => "$1"));
+     }
+  }
+
+  if ($FIELDS{CLASS}) {
+      $FIELDS{CLASS} =~ s/^CLASS\s+//;
+      $FIELDS{'CLASS'} =~ s/\n//g;
+      while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
+          my ($pathway,$db,$id) = ($1,$2,$3);
+          $pathway =~ s/\s+/ /g;
+          $pathway =~ s/\s$//g;
+          $pathway =~ s/^\s+//;
+          $annotation->add_Annotation('pathway',
+                  Bio::Annotation::Comment->new(-text => $pathway));
+
+          $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
+                      -database => $db, -primary_id => $id));
+      }
+  }
+
+  if($FIELDS{DBLINKS}) {
+      $FIELDS{DBLINKS} =~ s/^DBLINKS/       /;
+      while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ ### modified
+           $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
+                    -database => $1, -primary_id => $2)) if $1;
+      }
+  }
+
+  $params{'-alphabet'}         = 'dna';
+  $params{'-seq'}              = $nt_seq;
+  $params{'-display_id'}       = $name;
+  $params{'-accession_number'} = $entry_id;
+  $params{'-species'}          = Bio::Species->new(
+											  -common_name => $entry_species);
+  $params{'-annotation'}       = $annotation;
+
+  $builder->add_slot_value(%params);
+  $seq = $builder->make_object();
+
+  return $seq;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/largefasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/largefasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/largefasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,166 @@
+# $Id: largefasta.pm,v 1.24.4.1 2006/10/02 23:10:29 sendu Exp $
+# BioPerl module for Bio::SeqIO::largefasta
+#
+# Cared for by Jason Stajich
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# 
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::largefasta - method i/o on very large fasta sequence files
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from fasta flat
+file databases.
+
+This module handles very large sequence files by using the
+Bio::Seq::LargePrimarySeq module to store all the sequence data in
+a file.  This can be a problem if you have limited disk space on your
+computer because this will effectively cause 2 copies of the sequence
+file to reside on disk for the life of the
+Bio::Seq::LargePrimarySeq object.  The default location for this is
+specified by the L<File::Spec>-E<gt>tmpdir routine which is usually /tmp
+on UNIX.  If a sequence file is larger than the swap space (capacity
+of the /tmp dir) this could cause problems for the machine.  It is
+possible to set the directory where the temporary file is located by
+adding the following line to your code BEFORE calling next_seq. See
+L<Bio::Seq::LargePrimarySeq> for more information.
+
+    $Bio::Seq::LargePrimarySeq::DEFAULT_TEMP_DIR = 'newdir';
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Jason Stajich
+
+Email: jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::largefasta;
+use vars qw($FASTALINELEN);
+use strict;
+
+use Bio::Seq::SeqFactory;
+
+$FASTALINELEN = 60;
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);    
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(), 
+			       -type => 'Bio::Seq::LargePrimarySeq'));      
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+    my ($self) = @_;
+#  local $/ = "\n";
+    my $largeseq = $self->sequence_factory->create();
+    my ($id,$fulldesc,$entry);
+    my $count = 0;
+    my $seen = 0;
+    while( defined ($entry = $self->_readline) ) {
+	if( $seen == 1 && $entry =~ /^\s*>/ ) {
+	    $self->_pushback($entry);
+	    return $largeseq;
+	}
+#	if ( ($entry eq '>') || eof($self->_fh) ) { $seen = 1; next; }      
+	if ( ($entry eq '>')  ) { $seen = 1; next; }      
+	elsif( $entry =~ /\s*>(.+?)$/ ) {
+	    $seen = 1;
+	    ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
+		or $self->warn("Can't parse fasta header");
+	    $largeseq->display_id($id);
+	    $largeseq->primary_id($id);	  
+	    $largeseq->desc($fulldesc);
+	} else {
+	    $entry =~ s/\s+//g;
+	    $largeseq->add_sequence_as_string($entry);
+	}
+	(++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
+    }
+    return unless $seen;
+    return $largeseq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   foreach my $seq (@seq) {       
+     my $top = $seq->id();
+     if ($seq->can('desc') and my $desc = $seq->desc()) {
+	 $desc =~ s/\n//g;
+	 $top .= " $desc";
+     }
+     $self->_print (">",$top,"\n");
+     my $end = $seq->length();
+     my $start = 1;
+     while( $start < $end ) {
+	 my $stop = $start + $FASTALINELEN - 1;
+	 $stop = $end if( $stop > $end );
+	 $self->_print($seq->subseq($start,$stop), "\n");
+	 $start += $FASTALINELEN;
+     }
+   }
+
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/lasergene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/lasergene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/lasergene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+#-----------------------------------------5~------------------------------------
+# PACKAGE : Bio::SeqIO::lasergene
+# AUTHOR  : Malcolm Cook <mec at stowers-institute.org>
+# CREATED : Feb 16 1999
+# REVISION: $Id: lasergene.pm,v 1.5.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# _History_
+#
+# This code is based on the Bio::SeqIO::raw module with
+# the necessary minor tweaks necessary to get it to read (only)
+# Lasergene formatted sequences
+#
+# Cleaned up by Torsten Seemann June 2006
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::lasergene - Lasergene sequence file input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::SeqIO> class.
+
+=head1 DESCRIPTION
+
+This object can product Bio::Seq::RichSeq objects from Lasergene sequence files.
+
+IT DOES NOT PARSE ANY ATTIBUTE VALUE PAIRS IN THE HEADER OF THE LASERGENE FORMATTED FILE.
+
+IT DOES NOT WRITE THESE FILES EITHER.
+
+=head1 REFERENCES
+
+  https://www.dnastar.com/products/lasergene.php
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHORS
+
+  Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
+  Malcolm Cook  - mec AT stowers-institute.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqIO::lasergene;
+
+use strict;
+
+use base qw(Bio::SeqIO);
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : none
+
+=cut
+
+use Bio::Seq;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+
+sub next_seq {
+   my ($self) = @_;
+
+   my $state = 0;
+   my @comment;
+   my @sequence;
+
+   while (my $line = $self->_readline) {
+     $state = 1 if $state == 0;
+     chomp $line;
+     next if $line =~ m/^\s*$/; # skip blank lines
+
+     if ($line eq '^^') {  # end of a comment or sequence
+       $state++;
+       last if $state > 2; # we have comment and sequence so exit
+     }
+     elsif ($state == 1) { # another piece of comment
+       push @comment, $line;
+     }
+     elsif ($state == 2) { # another piece of sequence
+       push @sequence, $line
+     }
+     else {
+       $self->throw("unreachable state reached, probable bug!");
+     }
+   }
+
+   # return quietly if there was nothing in the file
+   return if $state == 0;
+
+   # ensure we read some comment and some sequence
+   if ($state < 2) {
+     $self->throw("unexpected end of file");
+   }
+
+   my $sequence = join('', @sequence);
+#   print STDERR "SEQ=[[$sequence]]\n";
+   $sequence or $self->throw("empty sequence in lasergene file");
+   my $seq = Bio::Seq->new(-seq => $sequence);
+
+   my $comment = join('; ', @comment);
+#   print STDERR "COM=[[$comment]]\n";
+   my $anno = Bio::Annotation::Collection->new;
+   $anno->add_Annotation('comment', Bio::Annotation::Comment->new(-text => $comment) );
+   $seq->annotation($anno);
+
+   return $seq;
+}
+
+=head2 write_seq (NOT IMPLEMENTED)
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Array of Bio::PrimarySeqI objects
+
+=cut
+
+sub write_seq {
+  my ($self, @seq) = @_;
+  $self->throw("write_seq() is not implemented for the lasergene format.");
+}
+
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/locuslink.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/locuslink.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/locuslink.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,590 @@
+# $Id: locuslink.pm,v 1.12.4.1 2006/10/02 23:10:29 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::locuslink
+#
+# Cared for by Keith Ching <kching at gnf.org>
+#
+# Copyright Keith Ching
+#
+# You may distribute this module under the same terms as perl itself
+
+#
+# (c) Keith Ching, kching at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+# 
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::locuslink - LocusLink input/output stream
+
+=head1 SYNOPSIS
+
+   # don't instantiate directly - instead do
+   my $seqio = Bio::SeqIO->new(-format => "locuslink", -file => \STDIN);
+
+=head1 DESCRIPTION
+
+This module parses LocusLink into Bio::SeqI objects with rich
+annotation, but no sequence.
+
+The input file has to be in the LL_tmpl format - the tabular format
+will not work.
+
+The way the current implementation populates the object is rather a
+draft work than a finished work of art. Note that at this stage the
+LocusLink entries cannot be round-tripped, because the parser loses
+certain information. For instance, most of the alternative transcript
+descriptions are not retained. The parser also misses any element
+that deals with visual representation (e.g., 'button') except for the
+URLs. Almost all of the pieces of the annotation are kept in a
+Bio::Annotation::Collection object, see L<Bio::Annotation::Collection>
+for more information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via 
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Keith Ching
+
+Email kching at gnf.org
+
+=head1 CONTRIBUTORS
+
+Hilmar Lapp, hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::locuslink;
+
+use strict;
+
+use Bio::Seq::SeqFactory;
+use Bio::Species;
+use Bio::Annotation::DBLink;
+#use Bio::Annotation::Reference;
+use Bio::Annotation::Comment;
+use Bio::Annotation::SimpleValue;
+use Bio::Annotation::OntologyTerm;
+use Bio::Annotation::Collection;
+
+use base qw(Bio::SeqIO);
+
+# list of all the field names in locuslink
+my @locuslink_keys = qw(
+		       ACCNUM
+		       ALIAS_PROT
+		       ALIAS_SYMBOL
+		       ASSEMBLY
+		       BUTTON
+		       CDD
+		       CHR
+		       COMP
+		       CONTIG
+		       CURRENT_LOCUSID
+		       DB_DESCR
+		       DB_LINK
+		       ECNUM
+		       EVID
+		       EXTANNOT
+		       GO
+		       GRIF
+		       LINK
+		       LOCUSID
+		       LOCUS_CONFIRMED
+		       LOCUS_TYPE
+		       MAP
+		       MAPLINK
+		       NC
+		       NG
+		       NM
+		       NP
+		       NR
+		       OFFICIAL_GENE_NAME
+		       OFFICIAL_SYMBOL
+		       OMIM
+		       ORGANISM
+		       PHENOTYPE
+		       PHENOTYPE_ID
+		       PMID
+		       PREFERRED_GENE_NAME
+		       PREFERRED_PRODUCT
+		       PREFERRED_SYMBOL
+		       PRODUCT
+		       PROT
+		       RELL
+		       STATUS
+		       STS
+		       SUMFUNC
+		       SUMMARY
+		       TRANSVAR
+		       TYPE
+		       UNIGENE
+		       XG
+		       XM
+		       XP
+		       XR
+		       );
+
+# list of fields to make simple annotations from
+# fields not listed here or as a key in feature hash are ignored (lost).
+my %anntype_map = (
+		   SimpleValue => [qw(
+				      ALIAS_PROT
+				      ALIAS_SYMBOL
+				      CDD
+				      CHR
+				      CURRENT_LOCUSID
+				      ECNUM
+				      EXTANNOT
+				      MAP
+				      NC
+				      NR
+				      OFFICIAL_GENE_NAME
+				      OFFICIAL_SYMBOL
+				      PHENOTYPE
+				      PREFERRED_GENE_NAME
+				      PREFERRED_PRODUCT
+				      PREFERRED_SYMBOL
+				      PRODUCT
+				      RELL
+				      SUMFUNC
+				      )
+				   ],
+		   Comment     => [qw(
+				      SUMMARY
+				      )
+				   ],
+		   );
+
+
+# certain fields are not named the same as the symgene database list
+my %dbname_map = (
+		  pfam    => 'Pfam',
+		  smart   => 'SMART',
+		  NM      => 'RefSeq',
+		  NP      => 'RefSeq',
+		  XP      => 'RefSeq',
+		  XM      => 'RefSeq',
+		  NG      => 'RefSeq',
+		  XG      => 'RefSeq',
+		  XR      => 'RefSeq',
+		  PROT    => 'GenBank',
+		  ACCNUM  => 'GenBank',
+		  CONTIG  => 'GenBank',
+		  # certain fields are not named the same as the symgene
+		  # database list: rename the fields the symgene database name
+		  # key = field name in locuslink
+		  # value = database name in sym
+		  #GO      => 'GO',
+		  OMIM    => 'MIM',
+		  GRIF    => 'GRIF',
+		  STS     => 'STS',
+		  UNIGENE => 'UniGene',
+		  );
+
+# certain CDD entries use the wrong prefix for the accession number
+# cddprefix will replace the key w/ the value for these entries
+my %cddprefix = (
+		 pfam     => 'PF',
+		 smart    => 'SM',
+		 );
+
+# alternate mappings if one field does not exist
+my %alternate_map = (
+		  OFFICIAL_GENE_NAME => 'PREFERRED_GENE_NAME',
+		  OFFICIAL_SYMBOL    => 'PREFERRED_SYMBOL',
+		    );
+
+# for these field names, we only care about the first value X in value X|Y|Z
+my @ll_firstelements = qw(
+                          NM
+			  NP
+			  NG
+			  XG
+			  XM
+			  XP
+			  XR
+			  PROT
+			  STS
+			  ACCNUM
+			  CONTIG
+			  GRIF
+			  );
+
+# these fields need to be flattened into a single string, using the given
+# join string
+my %flatten_tags = (
+		    ASSEMBLY            => ',',
+		    ORGANISM            => '',  # this should occur only once
+		    OFFICIAL_SYMBOL     => '',  # this should occur only once
+		    OFFICIAL_GENE_NAME  => '',  # this should occur only once
+		    LOCUSID             => '',  # this should occur only once
+		    PMID                => ',',
+		    PREFERRED_SYMBOL    => ', ',
+		    PREFERRED_GENE_NAME => ', '
+);
+
+# set the default search pattern for all the field names
+my %feature_pat_map = map { ($_ , "^$_: (.+)\n"); } @locuslink_keys;
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+
+  # overwrite the search pattern w/ the first value pattern
+  foreach my $key(@ll_firstelements){
+      $feature_pat_map{$key}="^$key: ([^|]+)";
+  }
+
+  # special search pattern for cdd entries
+  foreach my $key(keys %cddprefix) {
+      $feature_pat_map{$key}='^CDD: .+\|'.$key.'(\d+)';
+  }
+
+  # special patterns for specific fields
+  $feature_pat_map{MAP}      = '^MAP: (.+?)\|';
+  $feature_pat_map{MAPHTML}  = '^MAP: .+\|(<.+>)\|';
+  $feature_pat_map{GO}       = '^GO: .+\|.+\|\w+\|(GO:\d+)\|';
+  $feature_pat_map{GO_DESC}  = '^GO: .+\|(.+)\|\w+\|GO:\d+\|';
+  $feature_pat_map{GO_CAT}   = '^GO: (.+)\|.+\|\w+\|GO:\d+\|';
+  $feature_pat_map{EXTANNOT} = '^EXTANNOT: (.+)\|(.+)\|\w+\|.+\|\d+';
+
+  # set the sequence factory of none has been set already
+  if(! $self->sequence_factory()) {
+      $self->sequence_factory(Bio::Seq::SeqFactory->new(
+					      -type => 'Bio::Seq::RichSeq'));
+  }
+}
+
+
+#########################
+#
+sub search_pattern{
+#
+#########################
+        my ($self,
+	    $entry,		#text to search
+	    $searchconfirm,	#to make sure you got the right thing
+	    $searchpattern,
+	    $searchtype) = @_;
+        my @query = $entry=~/$searchpattern/gm;
+        if ($searchconfirm ne "FALSE"){
+	    $self->warn("No $searchtype found\n$entry\n") unless @query;
+	    foreach (@query){
+		if (!($_=~/$searchconfirm/)){
+		    $self->throw("error\n$entry\n$searchtype parse $_ does not match $searchconfirm\n");
+		}
+	    }#endforeach
+        }#endsearchconfirm
+        return(@query);
+}#endsub
+############
+#
+sub read_species{
+#
+############
+	my ($spline)=@_;
+	my $species;
+	my $genus;
+	($genus,$species)=$spline=~/([^ ]+) ([^ ]+)/;
+	my $make = Bio::Species->new();
+	$make->classification( ($species,$genus) );
+	return $make;
+}
+################
+#
+sub read_dblink{
+#
+################
+	my ($ann,$db,$ref)=@_;
+	my @results=$ref ? @$ref : ();
+	foreach my $id(@results){
+	  if($id){
+	    $ann->add_Annotation('dblink',
+				 Bio::Annotation::DBLink->new(
+							  -database =>$db ,
+							  -primary_id =>$id));
+	  }
+	}
+	return($ann);
+}
+
+################
+#
+sub read_reference{
+#
+################
+        my ($ann,$db,$results)=@_;
+
+	if($results){	
+	    chomp($results);
+	    my @ids=split(/,/,$results);
+	    $ann = read_dblink($ann,$db,\@ids) if @ids;
+	}
+	return $ann; 
+}#endsub
+
+################
+#
+sub add_annotation{
+#
+################
+    my ($ac,$type,$text,$anntype)=@_;
+    my @args;
+
+    $anntype = 'SimpleValue' unless $anntype;
+    SWITCH : {
+	$anntype eq 'SimpleValue' && do {
+	    push(@args, -value => $text, -tagname => $type);
+	    last SWITCH;
+	};
+	$anntype eq 'Comment'     && do {
+	    push(@args, -text  => $text, -tagname => 'comment');
+	    last SWITCH;
+	};
+    }
+    $ac->add_Annotation("Bio::Annotation::$anntype"->new(@args));
+    return($ac);
+}#endsub
+
+################
+#
+sub add_annotation_ref{
+#
+################
+        my ($ann,$type,$textref)=@_;
+	my @text=$textref ? @$textref : ();
+	
+	foreach my $text(@text){
+		$ann->add_Annotation($type,Bio::Annotation::SimpleValue->new(-value => $text));
+        }
+        return($ann);
+}#endsub
+
+################
+#
+sub make_unique{
+#
+##############
+    my ($ann,$key) = @_;
+    
+    my %seen = ();
+    foreach my $dbl ($ann->remove_Annotations($key)) {
+	if(!exists($seen{$dbl->as_text()})) {
+	    $seen{$dbl->as_text()} = 1;
+	    $ann->add_Annotation($dbl);
+	} 
+    }
+    return $ann;
+}
+
+################
+#
+sub next_seq{
+#
+##############
+	my ($self, @args)=@_;
+	my (@results,$search,$ref,$cddref);
+
+	# LOCUSLINK entries begin w/ >>
+	local $/="\n>>";
+
+	# slurp in a whole entry and return if no more entries
+	return unless my $entry = $self->_readline;
+
+	# strip the leading '>>' if it's the first entry
+	if (index($entry,'>>') == 0) { #first entry
+	    $entry = substr($entry,2);
+	}
+
+	# we aren't interested in obsoleted entries, so we need to loop
+	# and skip those until we've found the next not obsoleted
+	my %record = ();
+	while($entry && ($entry =~ /\w/)) {
+	    if (!($entry=~/LOCUSID/)){
+		$self->throw("No LOCUSID in first line of record. ".
+			     "Not LocusLink in my book.");
+	    }
+	    # see whether it's an obsoleted entry, and if so jump to the next
+	    # one entry right away
+	    if($entry =~ /^CURRENT_LOCUSID:/m) {
+		# read next entry and continue
+		$entry = $self->_readline;
+		%record = ();
+		next;
+	    }
+	    # loop through list of features and get field values
+	    # place into record hash as array refs
+	    foreach my $key (keys %feature_pat_map){
+		$search=$feature_pat_map{$key};
+		@results=$self->search_pattern($entry,'FALSE',$search,$search);
+		$record{$key} = @results ? [@results] : undef;
+	    }#endfor
+	    # terminate loop as this one hasn't been obsoleted
+	    last;
+	}
+
+	# we have reached the end-of-file ...
+	return unless %record;
+
+	# special processing for CDD entries like pfam and smart
+	my ($PRESENT, at keep);
+	foreach my $key(keys %cddprefix){
+	    #print "check CDD $key\n";
+	    if($record{$key}) {
+		@keep=();
+		foreach my $list (@{$record{$key}}) {
+		    # replace AC with correct AC number
+		    push(@keep,$cddprefix{$key}.$list);	    
+		}
+		# replace CDD ref with correctly prefixed AC number
+		$record{$key} = [@keep];
+	    }
+       	}
+	# modify CDD references	@=();
+	if($record{CDD}) {
+	    @keep=();
+	    foreach my $cdd (@{$record{CDD}}) {
+		$PRESENT = undef;
+		foreach my $key (keys %cddprefix) {
+		    if ($cdd=~/$key/){
+			$PRESENT = 1;
+			last;
+		    }
+		}
+		push(@keep,$cdd) if(! $PRESENT);
+	    }
+	    $record{CDD} = [@keep];
+	}
+
+	# create annotation collection - we'll need it now
+	my $ann = Bio::Annotation::Collection->new();
+
+	foreach my $field(keys %dbname_map){
+	    $ann=read_dblink($ann,$dbname_map{$field},$record{$field});
+	}
+	
+	# add GO link as an OntologyTerm annotation
+	if($record{GO}) {
+	    for(my $j = 0; $j < @{$record{GO}}; $j++) {
+		my $goann = Bio::Annotation::OntologyTerm->new(
+					   -identifier => $record{GO}->[$j],
+					   -name => $record{GO_DESC}->[$j],
+					   -ontology => $record{GO_CAT}->[$j]);
+		$ann->add_Annotation($goann);
+	    }
+	}
+
+	$ann=add_annotation_ref($ann,'URL',$record{LINK});
+	$ann=add_annotation_ref($ann,'URL',$record{DB_LINK});
+
+	# everything else gets a simple tag or comment value annotation
+	foreach my $anntype (keys %anntype_map) {
+	    foreach my $key (@{$anntype_map{$anntype}}){
+		if($record{$key}){
+		    foreach (@{$record{$key}}){
+			#print "$key\t\t$_\n";
+			$ann=add_annotation($ann,$key,$_,$anntype);
+		    }
+		}
+	    }
+	}
+
+	# flatten designated attributes into a scalar value
+	foreach my $field (keys %flatten_tags) {
+	    if($record{$field}) {
+		$record{$field} = join($flatten_tags{$field},
+				       @{$record{$field}});
+	    }
+	}
+
+	# annotation that expects the array flattened out
+	$ann=read_reference($ann,'PUBMED',$record{PMID});
+	if($record{ASSEMBLY}) {
+	    my @assembly=split(/,/,$record{ASSEMBLY});
+	    $ann=read_dblink($ann,'GenBank',\@assembly);
+	}
+
+	# replace fields w/ alternate if original does not exist
+	foreach my $fieldval (keys %alternate_map){
+	    if((! $record{$fieldval}) && ($record{$alternate_map{$fieldval}})){
+		$record{$fieldval}=$record{$alternate_map{$fieldval}};
+	    }
+	}
+
+	# presently we can't store types or context of dblinks - therefore
+	# we need to remove duplicates that only differ in context
+	make_unique($ann,'dblink');
+
+	# create sequence object (i.e., let seq.factory create one)
+	my $seq = $self->sequence_factory->create(
+			     -verbose => $self->verbose(),
+			     -accession_number => $record{LOCUSID},
+			     -desc => $record{OFFICIAL_GENE_NAME},
+			     -display_id => $record{OFFICIAL_SYMBOL},
+			     -species => read_species($record{ORGANISM}),
+			     -annotation => $ann);
+
+	# dump out object contents
+	# show_obj([$seq]);
+
+	return($seq);
+}
+
+################
+#
+sub show_obj{
+#
+################
+        my ($seqlistref)=@_;
+        my @list=@$seqlistref;
+        my $out = Bio::SeqIO->new('-fh' => \*STDOUT, -format => 'genbank' );
+	my ($ann, at values,$val);
+
+        foreach my $seq(@list){
+                $out->write_seq($seq);
+                $ann=$seq->annotation;
+       		foreach my $key ( $ann->get_all_annotation_keys() ) {
+        		@values = $ann->get_Annotations($key);
+	           	foreach my $value ( @values ) {
+	              		# value is an Bio::AnnotationI, and defines a "as_text" method
+				$val=$value->as_text;
+	             		print "Annotation ",$key,"\t\t",$val,"\n";
+	           	}
+		}
+        }
+}#endsub
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/metafasta.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/metafasta.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/metafasta.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+# $Id: metafasta.pm,v 1.9.4.1 2006/10/02 23:10:30 sendu Exp $
+# BioPerl module for Bio::SeqIO::metafasta
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::metafasta - metafasta sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+  use Bio::SeqIO;
+
+  # read the metafasta file
+  $io = Bio::SeqIO->new(-file => "test.metafasta",
+                        -format => "metafasta" );
+
+  $seq = $io->next_seq;
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq::Meta objects to and from metafasta
+flat file databases.
+
+For sequence part the code is an exact copy of Bio::SeqIO::fasta
+module. The only added bits deal with meta data IO.
+
+The format of a metafasta file is
+
+  >test
+  ABCDEFHIJKLMNOPQRSTUVWXYZ
+  &charge
+  NBNAANCNJCNNNONNCNNUNNXNZ
+  &chemical
+  LBSAARCLJCLSMOIMCHHULRXRZ
+
+where the sequence block is followed by one or several meta blocks.
+Each meta block starts with the ampersand character '&' in the first
+column and is immediately followed by the name of the meta data which
+continues until the new line. The meta data follows it. All
+characters, except new line, are important in meta data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::metafasta;
+use vars qw($WIDTH);
+use strict;
+
+use Bio::Seq::SeqFactory;
+use Bio::Seq::SeqFastaSpeedFactory;
+use Bio::Seq::Meta;
+
+use base qw(Bio::SeqIO);
+
+BEGIN { $WIDTH = 60}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  my ($width) = $self->_rearrange([qw(WIDTH)], @args);
+  $width && $self->width($width);
+  unless ( defined $self->sequence_factory ) {
+      $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+	my( $self ) = @_;
+	my $seq;
+	my $alphabet;
+	local $/ = "\n>";
+	return unless my $entry = $self->_readline;
+
+	chomp($entry);
+	if ($entry =~ m/\A\s*\Z/s)  { # very first one
+		return unless $entry = $self->_readline;
+		chomp($entry);
+	}
+	$entry =~ s/^>//;
+
+	my ($top,$sequence) = split(/\n/,$entry,2);
+	defined $sequence && $sequence =~ s/>//g;
+
+	my @metas;
+	($sequence, @metas) = split /\n&/, $sequence;
+
+	my ($id,$fulldesc);
+	if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
+		($id,$fulldesc) = ($1,$2);
+	}
+
+	if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space 
+	                                               # between > and name \AE
+	defined $sequence && $sequence =~ s/\s//g;	  # Remove whitespace
+
+	# for empty sequences we need to know the mol.type
+	$alphabet = $self->alphabet();
+	if(defined $sequence && length($sequence) == 0) {
+		if(! defined($alphabet)) {
+			# let's default to dna
+			$alphabet = "dna";
+		}
+	} else {
+		# we don't need it really, so disable
+		$alphabet = undef;
+	}
+
+	$seq = $self->sequence_factory->create(
+						-seq         => $sequence,
+						-id          => $id,
+					   # Ewan's note - I don't think this healthy
+					   # but obviously to taste.
+					   #-primary_id  => $id,
+					   -desc        => $fulldesc,
+					   -alphabet    => $alphabet,
+					   -direct      => 1,
+													  );
+
+	$seq = $seq->primary_seq;
+	bless $seq, 'Bio::Seq::Meta';
+
+	foreach my $meta (@metas) {
+		my ($name,$string) = split /\n/, $meta;
+		# $split ||= '';
+		$string =~ s/\n//g;	# Remove newlines, spaces are important
+		$seq->named_meta($name, $string);
+	}
+
+	# if there wasn't one before, set the guessed type
+	unless ( defined $alphabet ) {
+		$self->alphabet($seq->alphabet());
+	}
+	return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of 1 to n Bio::PrimarySeqI objects
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   my $width = $self->width;
+   foreach my $seq (@seq) {
+       $self->throw("Did not provide a valid Bio::PrimarySeqI object") 
+	   unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+
+       my $str = $seq->seq;
+       my $top = $seq->display_id();
+       if ($seq->can('desc') and my $desc = $seq->desc()) {
+	   $desc =~ s/\n//g;
+	   $top .= " $desc";
+       }
+       if(length($str) > 0) {
+	   $str =~ s/(.{1,$width})/$1\n/g;
+       } else {
+	   $str = "\n";
+       }
+       $self->_print (">",$top,"\n",$str) or return;
+       if ($seq->isa('Bio::Seq::MetaI')) {
+           foreach my $meta ($seq->meta_names) {
+               my $str = $seq->named_meta($meta);
+               $str =~ s/(.{1,$width})/$1\n/g;
+               $self->_print ("&",$meta,"\n",$str);
+           }
+       }
+   }
+
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+=head2 width
+
+ Title   : width
+ Usage   : $obj->width($newval)
+ Function: Get/Set the line width for METAFASTA output
+ Returns : value of width
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub width{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'width'} = $value;
+    }
+    return $self->{'width'} || $WIDTH;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/phd.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/phd.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/phd.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,278 @@
+# $Id: phd.pm,v 1.21.4.2 2006/10/10 18:42:50 sendu Exp $
+#
+# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself.
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::phd - .phd file input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::SeqIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform .phd files (from Phil Green's phred basecaller)
+to and from Bio::Seq::Quality objects
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR Chad Matsalla
+
+Chad Matsalla
+bioinformatics at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# 'Let the code begin...
+
+package Bio::SeqIO::phd;
+use strict;
+use Bio::Seq::SeqFactory;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq::Quality'));
+  }
+}
+
+=head2 next_seq()
+
+ Title   : next_seq()
+ Usage   : $swq = $stream->next_seq()
+ Function: returns the next phred sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+ Notes   : This is really redundant because AFAIK there is no such thing as
+  	   a .phd file that contains more then one sequence. It is included as
+	   an interface thing and for consistency.
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    my ($entry,$done,$qual,$seq);
+    my ($id, at lines, @bases, @qualities, @trace_indices) = ('');
+    if (!($entry = $self->_readline)) { return; }
+	if ($entry =~ /^BEGIN_SEQUENCE\s+(\S+)/) {
+          $id = $1;
+     }
+     my $in_comments = 0;
+    my $in_dna = 0;
+    my $base_number = 0;
+     my $comments = {};
+    while ($entry = $self->_readline) {
+	return if (!$entry);
+	chomp($entry);
+     if ($entry =~ /^BEGIN_COMMENT/) {
+          $in_comments = 1;
+          while ($in_comments == 1) {
+              $entry = $self->_readline();
+               chomp($entry);
+              if ($entry) {
+                    if ($entry =~ /^END_COMMENT/) {
+                         $in_comments = 0;
+                    }
+                    else {
+                         my ($name,$content) = split(/:/,$entry);
+                         if ($content) { $content =~ s/^\s//g; }
+                         $comments->{$name} = $content;
+                    }
+               }
+          }
+     }
+	if ($entry =~ /^BEGIN_CHROMAT:\s+(\S+)/) {
+	     # this is where I used to grab the ID
+          if (!$id) {
+               $id = $1;
+          }
+          $entry = $self->_readline();
+	}
+	if ($entry =~ /^BEGIN_DNA/) {
+	    $entry =~ /^BEGIN_DNA/;
+	    $in_dna = 1;
+	    $entry = $self->_readline();
+	}
+	if ($entry =~ /^END_DNA/) {
+	    $in_dna = 0;
+	}
+	if ($entry =~ /^END_SEQUENCE/) {
+	}
+	if (!$in_dna) { next;  }
+	$entry =~ /(\S+)\s+(\S+)(?:\s+(\S+))?/;
+	push @bases,$1;
+	push @qualities,$2;
+    #Not sure that a trace index values are required for phd file
+    push(@trace_indices,$3) if defined $3;
+	push(@lines,$entry);
+    }
+     # $self->debug("csmCreating objects with id = $id\n");
+    my $swq = $self->sequence_factory->create
+	(-seq        => join('', at bases),
+	 -qual       => \@qualities,
+     -trace      => \@trace_indices,
+	 -id         => $id,
+	 -primary_id => $id,
+	 -display_id => $id,
+	 );
+     # this should be an actual object to assist in serialization
+     # but I don't have time for this now.
+     if ($comments) { $swq->{comments} = $comments; }
+    return $swq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq(-Quality => $swq, <comments>)
+ Usage   : $obj->write_seq(     -Quality => $swq,);
+ Function: Write out an scf.
+ Returns : Nothing.
+ Args    : Requires: a reference to a Quality object to form the
+           basis for the scf. Any other arguments are assumed to be comments
+           and are put into the comments section of the scf. Read the
+           specifications for scf to decide what might be good to put in here.
+ Notes   : These are the comments that reside in the header of a phd file
+	   at the present time. If not provided in the parameter list for
+	   write_phd(), the following default values will be used:
+	CHROMAT_FILE: $swq->id()
+	ABI_THUMBPRINT: 0
+	PHRED_VERSION: 0.980904.e
+	CALL_METHOD: phred
+	QUALITY_LEVELS: 99
+	TIME: <current time>
+	TRACE_ARRAY_MIN_INDEX: 0
+	TRACE_ARRAY_MAX_INDEX: unknown
+	CHEM: unknown
+	DYE: unknown
+     IMPORTANT: This method does not write the trace index where this
+          call was made. All base calls are placed at index 1.
+
+
+=cut
+
+sub write_seq {
+    my ($self, at args) = @_;
+    my @phredstack;
+    my ($label,$arg);
+
+    my ($swq, $swq2, $chromatfile, $abithumb,
+	$phredversion, $callmethod,
+	$qualitylevels,$time,
+	$trace_min_index,
+	$trace_max_index,
+	$chem, $dye
+	) = $self->_rearrange([qw(QUALITY
+                                  SEQWITHQUALITY
+				  CHROMAT_FILE
+				  ABI_THUMBPRINT
+				  PHRED_VERSION
+				  CALL_METHOD
+				  QUALITY_LEVELS
+				  TIME
+				  TRACE_ARRAY_MIN_INDEX
+				  TRACE_ARRAY_MAX_INDEX
+				  CHEM
+				  DYE
+				  )], @args);
+
+    $swq = $swq2 if not $swq and $swq2;
+    unless (ref($swq) eq "Bio::Seq::Quality") {
+	$self->throw("You must pass a Bio::Seq::Quality object to write_scf as a parameter named \"Quality\"");
+    }
+    my $id = $swq->id();
+    if (!$id) { $id = "UNDEFINED in Quality Object"; }
+    push @phredstack,("BEGIN_SEQUENCE $id","","BEGIN_COMMENT","");
+
+    $chromatfile = 'undefined in write_phd' unless defined $chromatfile;
+    push @phredstack,"CHROMAT_FILE: $chromatfile";
+
+    $abithumb = 0 unless defined $abithumb;
+    push @phredstack,"ABI_THUMBPRINT: $abithumb";
+
+    $phredversion = "0.980904.e" unless defined $phredversion;
+    push @phredstack,"PHRED_VERSION: $phredversion";
+
+    $callmethod = 'phred' unless defined $callmethod;
+    push @phredstack,"CALL_METHOD: $callmethod";
+
+    $qualitylevels = 99 unless defined $qualitylevels;
+    push @phredstack,"QUALITY_LEVELS: $qualitylevels";
+
+    $time = localtime() unless defined $time;
+    push @phredstack,"TIME: $time";
+
+    $trace_min_index = 0 unless defined $trace_min_index;
+    push @phredstack,"TRACE_ARRAY_MIN_INDEX: $trace_min_index";
+
+    $trace_max_index = '10000'  unless defined $trace_max_index;
+    push @phredstack,"TRACE_ARRAY_MAX_INDEX: $trace_max_index";
+
+    $chem = 'unknown' unless defined $chem;
+    push @phredstack,"CHEM: $chem";
+
+    $dye = 'unknown' unless defined $dye;
+    push @phredstack, "DYE: $dye";
+
+    push @phredstack,("END_COMMENT","","BEGIN_DNA");
+
+    foreach (@phredstack) {  $self->_print($_."\n"); }
+
+    my $length = $swq->length();
+    if ($length eq "DIFFERENT") {
+	$self->throw("Can't create the phd because the sequence and the quality in the Quality object are of different lengths.");
+    }
+    for (my $curr = 1; $curr<=$length; $curr++) {
+	$self->_print (uc($swq->baseat($curr))." ".
+		       $swq->qualat($curr)." 10".
+               "\n");
+    }
+    $self->_print ("END_DNA\n\nEND_SEQUENCE\n");
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pir.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pir.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pir.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,158 @@
+# $Id: pir.pm,v 1.23.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::PIR
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::pir - PIR sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from pir flat
+file databases.
+
+Note: This does not completely preserve the PIR format - quality
+information about sequence is currently discarded since bioperl
+does not have a mechanism for handling these encodings in sequence
+data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Aaron Mackey E<lt>amackey at virginia.eduE<gt>
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::pir;
+use strict;
+
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+    my ($self) = @_;
+    local $/ = "\n>";
+    return unless my $line = $self->_readline;
+    if( $line eq '>' ) {	# handle the very first one having no comment
+	return unless $line = $self->_readline;
+    }
+    my ($top, $desc,$seq) = ( $line =~ /^(.+?)\n(.+?)\n([^>]*)/s )  or
+	$self->throw("Cannot parse entry PIR entry [$line]");
+
+
+    my ( $type,$id ) = ( $top =~ /^>?([PF])1;(\S+)\s*$/ ) or
+	$self->throw("PIR stream read attempted without leading '>P1;' [ $line ]");
+
+    # P - indicates complete protein
+    # F - indicates protein fragment
+    # not sure how to stuff these into a Bio object
+    # suitable for writing out.
+    $seq =~ s/\*//g;
+    $seq =~ s/[\(\)\.\/\=\,]//g;
+    $seq =~ s/\s+//g;		# get rid of whitespace
+
+    my ($alphabet) = ('protein');
+    # TODO - not processing SFS data
+    return $self->sequence_factory->create
+	(-seq        => $seq,
+	 -primary_id => $id,
+	 -id         => $id,
+	 -desc       => $desc,
+	 -alphabet   => $alphabet
+	 );
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Array of Bio::PrimarySeqI objects
+
+
+=cut
+
+sub write_seq {
+    my ($self, @seq) = @_;
+    for my $seq (@seq) {
+	$self->throw("Did not provide a valid Bio::PrimarySeqI object")
+	    unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+
+        $self->warn("No whitespace allowed in PIR ID [". $seq->display_id. "]")
+            if $seq->display_id =~ /\s/;
+
+	my $str = $seq->seq();
+	return unless $self->_print(">P1;".$seq->id(),
+				    "\n", $seq->desc(), "\n",
+				    $str, "*\n");
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pln.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pln.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/pln.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+# $Id: pln.pm,v 1.12.4.1 2006/10/02 23:10:30 sendu Exp $
+# BioPerl module for Bio::SeqIO::pln
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::pln - pln trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from pln trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::pln;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq'));      
+  }
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::SeqWithQuality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'pln');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'pln');
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/qual.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/qual.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/qual.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+# $Id: qual.pm,v 1.30.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# Copyright (c) 1997-9 bioperl, Chad Matsalla. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself.
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::qual - .qual file input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class
+(see L<Bio::SeqIO> for details).
+
+  my $in_qual = Bio::SeqIO->new(-file    => $qualfile,
+                                -format  => 'qual',
+                                -verbose => $verbose);
+
+=head1 DESCRIPTION
+
+This object can transform .qual (similar to fasta) objects to and from
+Bio::Seq::Quality objects. See L<Bio::Seq::Quality> for details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR Chad Matsalla
+
+Chad Matsalla
+bioinformatics at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::qual;
+use strict;
+use Bio::Seq::SeqFactory;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+	my($self, at args) = @_;
+	$self->SUPER::_initialize(@args);
+	if( ! defined $self->sequence_factory ) {
+		$self->sequence_factory(new Bio::Seq::SeqFactory
+										(-verbose => $self->verbose(),
+										 -type => 'Bio::Seq::PrimaryQual'));
+	}
+}
+
+=head2 next_seq()
+
+ Title   : next_seq()
+ Usage   : $scf = $stream->next_seq()
+ Function: returns the next scf sequence in the stream
+ Returns : Bio::Seq::PrimaryQual object
+ Notes   : Get the next quality sequence from the stream.
+
+=cut
+
+sub next_seq {
+    my ($self, at args) = @_;
+    my ($qual,$seq);
+    my $alphabet;
+    local $/ = "\n>";
+
+    return unless my $entry = $self->_readline;
+
+    if ($entry eq '>')  {	# very first one
+		 return unless $entry = $self->_readline;
+    }
+
+    # original: my ($top,$sequence) = $entry =~ /^(.+?)\n([^>]*)/s
+    my ($top,$sequence) = $entry =~ /^(.+?)\n([^>]*)/s
+		or $self->throw("Can't parse entry [$entry]");
+    my ($id,$fulldesc) = $top =~ /^\s*(\S+)\s*(.*)/
+		or $self->throw("Can't parse fasta header");
+    $id =~ s/^>//;
+    # create the seq object
+    $sequence =~ s/\n+/ /g;
+    return $self->sequence_factory->create
+		(-qual        => $sequence,
+		 -id         => $id,
+		 -primary_id => $id,
+		 -display_id => $id,
+		 -desc       => $fulldesc
+		);
+}
+
+=head2 _next_qual
+
+ Title   : _next_qual
+ Usage   : $seq = $stream->_next_qual() (but do not do
+      	  that. Use $stream->next_seq() instead)
+ Function: returns the next quality in the stream
+ Returns : Bio::Seq::PrimaryQual object
+ Args    : NONE
+ Notes	: An internal method. Gets the next quality in
+	        the stream.
+
+=cut
+
+sub _next_qual {
+	my $qual = next_primary_qual( $_[0], 1 );
+	return $qual;
+}
+
+=head2 next_primary_qual()
+
+ Title   : next_primary_qual()
+ Usage   : $seq = $stream->next_primary_qual()
+ Function: returns the next sequence in the stream
+ Returns : Bio::PrimaryQual object
+ Args    : NONE
+
+=cut
+
+sub next_primary_qual {
+	# print("CSM next_primary_qual!\n");
+	my( $self, $as_next_qual ) = @_;
+	my ($qual,$seq);
+	local $/ = "\n>";
+
+	return unless my $entry = $self->_readline;
+
+	if ($entry eq '>')  {  # very first one
+		return unless $entry = $self->_readline;
+	}
+
+	my ($top,$sequence) = $entry =~ /^(.+?)\n([^>]*)/s
+      or $self->throw("Can't parse entry [$entry]");
+	my ($id,$fulldesc) = $top =~ /^\s*(\S+)\s*(.*)/
+      or $self->throw("Can't parse fasta header");
+	$id =~ s/^>//;
+	# create the seq object
+	$sequence =~ s/\n+/ /g;
+	if ($as_next_qual) {
+            $qual = Bio::Seq::PrimaryQual->new(-qual       => $sequence,
+                                               -id         => $id,
+                                               -primary_id => $id,
+                                               -display_id => $id,
+                                               -desc       => $fulldesc
+                                              );
+	}
+	return $qual;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $obj->write_seq( -source => $source,
+		            -header => "some information");
+ Function: Write out a list of quality values to a fasta-style file.
+ Returns : Nothing.
+ Args    : Requires a reference to a Bio::Seq::Quality object or a
+	        PrimaryQual object as the -source. Optional: information
+	        for the header.
+ Notes   : If no -header is provided, $obj->id() will be used where
+	        $obj is a reference to either a Quality object or a
+	        PrimaryQual object. If $source->id() fails, "unknown" will be
+	        the header. If the Quality object has $source->length()
+           of "DIFFERENT" (read the pod, luke), write_seq will use the
+           length of the PrimaryQual object within the Quality
+           object.
+
+=cut
+
+sub write_seq {
+	my ($self, at args) = @_;
+	my ($source)  = $self->_rearrange([qw(SOURCE HEADER)], @args);
+	if (!$source || ( !$source->isa('Bio::Seq::Quality') &&
+							!$source->isa('Bio::Seq::PrimaryQual')   )) {
+		$self->throw("You must pass a Bio::Seq::Quality or a Bio::Seq::PrimaryQual object to write_seq() as a parameter named \"source\"");
+	}
+	my $header = ($source->can("header") && $source->header) ?
+	              $source->header :
+	             ($source->can("id") && $source->id) ?
+		           $source->id :
+					  "unknown";
+	my @quals = $source->qual();
+	# ::dumpValue(\@quals);
+	$self->_print (">$header \n");
+	my (@slice,$max,$length);
+	$length = $source->length();
+#	if ($length eq "DIFFERENT") {
+#		$self->warn("You passed a Bio::Seq::Quality object that contains a sequence and quality of differing lengths. Using the length of the PrimaryQual component of the Quality object.");
+#		$length = $source->qual_obj()->length();
+#    }
+	# print("Printing $header to a file.\n");
+	for (my $count = 1; $count<=$length; $count+= 50) {
+		if ($count+50 > $length) { $max = $length; }
+		else { $max = $count+49; }
+		my @slice = @{$source->subqual($count,$max)};
+		$self->_print (join(' ', at slice), "\n");
+	}
+
+	$self->flush if $self->_flush_on_write && defined $self->_fh;
+	return 1;
+}
+
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/raw.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/raw.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/raw.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,173 @@
+#-----------------------------------------------------------------------------
+# PACKAGE : Bio::SeqIO::raw
+# AUTHOR  : Ewan Birney <birney at ebi.ac.uk>
+# CREATED : Feb 16 1999
+# REVISION: $Id: raw.pm,v 1.21.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# Copyright (c) 1997-9 bioperl, Ewan Birney. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself.
+#
+# _History_
+#
+# Ewan Birney <birney at ebi.ac.uk> developed the SeqIO
+# schema and the first prototype modules.
+#
+# This code is based on his Bio::SeqIO::Fasta module with
+# the necessary minor tweaks necessary to get it to read
+# and write raw formatted sequences made by
+# chris dagdigian <dag at sonsorol.org>
+#
+# October 18, 1999  Largely rewritten by Lincoln Stein
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::raw - raw sequence file input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::SeqIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from raw flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+  Ewan Birney   E<lt>birney at ebi.ac.ukE<gt>
+  Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+=head1 CONTRIBUTORS
+
+  Jason Stajich E<lt>jason at bioperl.org<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqIO::raw;
+use strict;
+
+use Bio::Seq::SeqFactory;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+
+=cut
+
+sub next_seq{
+   my ($self, at args) = @_;
+   ## When its 1 sequence per line with no formatting at all,
+   ## grabbing it should be easy :)
+
+   my $nextline = $self->_readline();
+   return unless defined $nextline;
+
+   my $sequence = uc($nextline);
+   $sequence =~ s/\W//g;
+
+   return  $self->sequence_factory->create(-seq => $sequence);
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Array of Bio::PrimarySeqI objects
+
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   foreach my $seq (@seq) {
+       $self->throw("Must provide a valid Bio::PrimarySeqI object")
+	   unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+     $self->_print($seq->seq, "\n") or return;
+   }
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return 1;
+}
+
+=head2 write_qual
+
+ Title   : write_qual
+ Usage   : $stream->write_qual($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq::Quality object
+
+
+=cut
+
+sub write_qual {
+   my ($self, at seq) = @_;
+   my @qual = ();
+   foreach (@seq) {
+     unless ($_->isa("Bio::Seq::Quality")){
+        warn("You cannot write raw qualities without supplying a Bio::Seq::Quality object! You passed a ", ref($_), "\n");
+        next;
+     }
+     @qual = @{$_->qual};
+     if(scalar(@qual) == 0) {
+	    $qual[0] = "\n";
+     }
+
+     $self->_print (join " ", @qual,"\n") or return;
+
+   }
+   return 1;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/scf.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/scf.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/scf.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1293 @@
+# $Id: scf.pm,v 1.36.4.2 2006/11/08 17:25:55 sendu Exp $
+#
+# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself.
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::scf - .scf file input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly. Use it via the Bio::SeqIO class, see
+L<Bio::SeqIO> for more information.
+
+=head1 DESCRIPTION
+
+This object can transform .scf files to and from Bio::Seq::SequenceTrace
+objects.  Mechanisms are present to retrieve trace data from scf
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR Chad Matsalla
+
+Chad Matsalla
+bioinformatics at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+Tony Cox, avc at sanger.ac.uk
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+Nancy Hansen, nhansen at mail.nih.gov
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::scf;
+use vars qw($DEFAULT_QUALITY);
+use strict;
+use Bio::Seq::SeqFactory;
+use Bio::Seq::SequenceTrace;
+use Dumpvalue();
+
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1);
+
+BEGIN {
+    $DEFAULT_QUALITY= 10;
+}
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+			      (-verbose => $self->verbose(),
+			       -type => 'Bio::Seq::Quality'));
+  }
+}
+
+=head2 next_seq()
+
+ Title   : next_seq()
+ Usage   : $scf = $stream->next_seq()
+ Function: returns the next scf sequence in the stream
+ Returns : a Bio::Seq::SequenceTrace object
+ Args    : NONE
+ Notes   : Fills the interface specification for SeqIO.
+	        The SCF specification does not provide for having more then
+           one sequence in a given scf. So once the filehandle has been open
+           and passed to SeqIO do not expect to run this function more then
+           once on a given scf unless you embraced and extended the SCF
+  	   standard.  SCF comments are accessible through the Bio::SeqI
+           interface method annotation().
+
+=cut
+
+#'
+sub next_seq {
+	my ($self) = @_;
+	my ($seq, $seqc, $fh, $buffer, $offset, $length, $read_bytes, @read,
+		 %names);
+	# set up a filehandle to read in the scf
+	$fh = $self->_filehandle();
+	unless ($fh) {		# simulate the <> function
+		if ( !fileno(ARGV) or eof(ARGV) ) {
+			return unless my $ARGV = shift;
+			open(ARGV,$ARGV) or
+			  $self->throw("Could not open $ARGV for SCF stream reading $!");
+		}
+		$fh = \*ARGV;
+	}
+	binmode $fh;		# for the Win32/Mac crowds
+	return unless read $fh, $buffer, 128; # no exception; probably end of file
+	# now, the master data structure will be the creator
+	my $creator;
+	# he first thing to do is parse the header. This is common
+	# among all versions of scf.
+	# the rest of the the information is different between the
+	# the different versions of scf.
+
+	$creator->{header} = $self->_get_header($buffer);
+	if ($creator->{header}->{'version'} lt "3.00") {
+		$self->debug("scf.pm is working with a version 2 scf.\n");
+		# first gather the trace information
+		$length = $creator->{header}->{'samples'} *
+		  $creator->{header}->{sample_size}*4;
+		$buffer = $self->read_from_buffer($fh, $buffer, $length,
+													 $creator->{header}->{samples_offset});
+		# @read = unpack "n$length",$buffer;
+		# these traces need to be split
+		# returns a reference to a hash
+		$creator->{traces} = $self->_parse_v2_traces(
+													 $buffer,$creator->{header}->{sample_size});
+		# now go and get the base information
+		$offset = $creator->{header}->{bases_offset};
+		$length = ($creator->{header}->{bases} * 12);
+		seek $fh,$offset,0;
+		$buffer = $self->read_from_buffer($fh,$buffer,$length,$creator->{header}->{bases_offset});
+		# now distill the information into its fractions.
+		# the old way : $self->_set_v2_bases($buffer);
+		# ref to an array, ref to a hash, string
+		($creator->{peak_indices},
+		 $creator->{qualities},
+		 $creator->{sequence},
+		 $creator->{accuracies}) = $self->_parse_v2_bases($buffer);
+
+	} else {
+		$self->debug("scf.pm is working with a version 3+ scf.\n");
+		my $transformed_read;
+		my $current_read_position = $creator->{header}->{sample_offset};
+		$length = $creator->{header}->{'samples'}*
+		  $creator->{header}->{sample_size};
+		# $dumper->dumpValue($creator->{header});
+		foreach (qw(a c g t)) {
+			$buffer = $self->read_from_buffer($fh,$buffer,$length,$current_read_position);
+			my $byte = "n";
+			if ($creator->{header}->{sample_size} == 1) {
+				$byte = "c";
+			}
+			@read = unpack "${byte}${length}",$buffer;
+			# this little spurt of nonsense is because
+			# the trace values are given in the binary
+			# file as unsigned shorts but they really
+			# are signed deltas. 30000 is an arbitrary number
+			# (will there be any traces with a given
+			# point greater then 30000? I hope not.
+			# once the read is read, it must be changed
+			# from relative
+			foreach (@read) {
+				if ($_ > 30000) {
+					$_ -= 65536;
+				}
+			}
+			$transformed_read = $self->_delta(\@read,"backward");
+			# For 8-bit data we need to emulate a signed/unsigned
+			# cast that is implicit in the C implementations.....
+			if ($creator->{header}->{sample_size} == 1) {
+				foreach (@{$transformed_read}) {
+					$_ += 256 if ($_ < 0);
+				}
+			}
+			$current_read_position += $length;
+			$creator->{'traces'}->{$_} = join(' ',@{$transformed_read});
+		}
+		
+		# now go and get the peak index information
+		$offset = $creator->{header}->{bases_offset};
+		$length = ($creator->{header}->{bases} * 4);
+		$buffer = $self->read_from_buffer($fh,$buffer,$length,$offset);
+		$creator->{peak_indices} = $self->_get_v3_peak_indices($buffer);
+		$offset += $length;
+		# now go and get the accuracy information
+		$buffer = $self->read_from_buffer($fh,$buffer,$length,$offset);
+		$creator->{accuracies} = $self->_get_v3_base_accuracies($buffer);
+		# OK, now go and get the base information.
+		$offset += $length;
+		$length = $creator->{header}->{bases};
+		$buffer = $self->read_from_buffer($fh,$buffer,$length,$offset);
+		$creator->{'sequence'} = unpack("a$length",$buffer);
+		# now, finally, extract the calls from the accuracy information.
+		$creator->{qualities} = $self->_get_v3_quality(
+											  $creator->{'sequence'},$creator->{accuracies});
+	}
+	# now go and get the comment information
+	$offset = $creator->{header}->{comments_offset};
+	seek $fh,$offset,0;
+	$length = $creator->{header}->{comment_size};
+	$buffer = $self->read_from_buffer($fh,$buffer,$length);
+	$creator->{comments} = $self->_get_comments($buffer);
+        my @name_comments = grep {$_->tagname() eq 'NAME'}
+                          $creator->{comments}->get_Annotations('comment');
+        my $name_comment = $name_comments[0]->as_text();
+        $name_comment =~ s/^Comment:\s+//;
+
+	my $swq = Bio::Seq::Quality->new(
+												-seq =>   $creator->{'sequence'},
+												-qual =>	$creator->{'qualities'},
+		-id   =>	$name_comment
+											  );
+	my $returner = Bio::Seq::SequenceTrace->new(
+										   -swq      =>   $swq,
+											-trace_a  =>   $creator->{'traces'}->{'a'},
+											-trace_t  =>   $creator->{'traces'}->{'t'},
+										   -trace_g  =>   $creator->{'traces'}->{'g'},
+										   -trace_c  =>   $creator->{'traces'}->{'c'},
+						               -accuracy_a    => $creator->{'accuracies'}->{'a'},
+		                           -accuracy_t    => $creator->{'accuracies'}->{'t'},
+					                  -accuracy_g    => $creator->{'accuracies'}->{'g'},
+			                        -accuracy_c    => $creator->{'accuracies'}->{'c'},
+                                 -peak_indices  => $creator->{'peak_indices'}
+															 );
+
+        $returner->annotation($creator->{'comments'}); # add SCF comments
+	return $returner;
+}
+
+
+=head2 _get_v3_quality()
+
+ Title   : _get_v3_quality()
+ Usage   : $self->_get_v3_quality()
+ Function: Set the base qualities from version3 scf
+ Returns : Nothing. Alters $self.
+ Args    : None.
+ Notes   :
+
+=cut
+
+#'
+sub _get_v3_quality {
+    my ($self,$sequence,$accuracies) = @_;
+    my @bases = split//,$sequence;
+    my (@qualities,$currbase,$currqual,$counter);
+    for ($counter=0; $counter <= $#bases ; $counter++) {
+	$currbase = lc($bases[$counter]);
+	if ($currbase eq "a") { $currqual = $accuracies->{'a'}->[$counter]; }
+	elsif ($currbase eq "c") { $currqual = $accuracies->{'c'}->[$counter]; }
+	elsif ($currbase eq "g") { $currqual = $accuracies->{'g'}->[$counter]; }
+	elsif ($currbase eq "t") { $currqual = $accuracies->{'t'}->[$counter]; }
+	else { $currqual = "unknown"; }
+	push @qualities,$currqual;
+    }
+    return \@qualities;
+}
+
+=head2 _get_v3_peak_indices($buffer)
+
+ Title   : _get_v3_peak_indices($buffer)
+ Usage   : $self->_get_v3_peak_indices($buffer);
+ Function: Unpacks the base accuracies for version3 scf
+ Returns : Nothing. Alters $self
+ Args    : A scalar containing binary data.
+ Notes   :
+
+=cut
+
+sub _get_v3_peak_indices {
+    my ($self,$buffer) = @_;
+    my $length = length($buffer);
+    my @read = unpack "N$length",$buffer;
+     return join(' ', at read);
+}
+
+=head2 _get_v3_base_accuracies($buffer)
+
+ Title   : _get_v3_base_accuracies($buffer)
+ Usage   : $self->_get_v3_base_accuracies($buffer)
+ Function: Set the base accuracies for version 3 scf's
+ Returns : Nothing. Alters $self.
+ Args    : A scalar containing binary data.
+ Notes   :
+
+=cut
+
+#'
+sub _get_v3_base_accuracies {
+    my ($self,$buffer) = @_;
+    my $length = length($buffer);
+    my $qlength = $length/4;
+    my $offset = 0;
+    my (@qualities, at sorter,$counter,$round,$last_base,$accuracies,$currbase);
+    foreach $currbase (qw(a c g t)) {
+	     my @read;
+	     $last_base = $offset + $qlength;
+	     for (;$offset < $last_base; $offset += $qlength) {
+                    # a bioperler (perhaps me?) changed the unpack string to include 'n' rather than 'C'
+                    # on 040322 I think that 'C' is correct. please email chad if you would like to accuse me of being incorrect
+	          @read = unpack "C$qlength", substr($buffer,$offset,$qlength);
+	          $accuracies->{$currbase} = \@read;
+	     }
+    }
+     return $accuracies;
+}
+
+
+=head2 _get_comments($buffer)
+
+ Title   : _get_comments($buffer)
+ Usage   : $self->_get_comments($buffer);
+ Function: Gather the comments section from the scf and parse it into its
+	        components.
+ Returns : a Bio::Annotation::Collection object
+ Args    : The buffer. It is expected that the buffer contains a binary
+	        string for the comments section of an scf file according to
+	        the scf file specifications.
+ Notes   :
+
+=cut
+
+sub _get_comments {
+	my ($self,$buffer) = @_;
+    my $comments = new Bio::Annotation::Collection();
+	my $size = length($buffer);
+	my $comments_retrieved = unpack "a$size",$buffer;
+	$comments_retrieved =~ s/\0//;
+	my @comments_split = split/\n/,$comments_retrieved;
+	if (@comments_split) {
+		foreach (@comments_split) {
+			/(\w+)=(.*)/;
+			if ($1 && $2) {
+                my ($tagname, $text) = ($1, $2);
+                my $comment_obj = Bio::Annotation::Comment->new(
+                                     -text => $text,
+                                     -tagname => $tagname);
+
+                $comments->add_Annotation('comment', $comment_obj);
+			}
+		}
+	}
+	$self->{'comments'} = $comments;
+	return $comments;
+}
+
+=head2 _get_header()
+
+ Title   : _get_header($buffer)
+ Usage   : $self->_get_header($buffer);
+ Function: Gather the header section from the scf and parse it into its
+           components.
+ Returns : Reference to a hash containing the header components.
+ Args    : The buffer. It is expected that the buffer contains a binary
+           string for the header section of an scf file according to the
+           scf file specifications.
+ Notes   : None.
+
+=cut
+
+sub _get_header {
+	my ($self,$buffer) = @_;
+	my $header;
+	($header->{'scf'},
+	 $header->{'samples'},
+	 $header->{'sample_offset'},
+	 $header->{'bases'},
+	 $header->{'bases_left_clip'},
+	 $header->{'bases_right_clip'},
+	 $header->{'bases_offset'},
+	 $header->{'comment_size'},
+	 $header->{'comments_offset'},
+	 $header->{'version'},
+	 $header->{'sample_size'},
+	 $header->{'code_set'},
+	 @{$header->{'header_spare'}} ) = unpack "a4 NNNNNNNN a4 NN N20", $buffer;
+
+	$self->{'header'} = $header;
+	return $header;
+}
+
+=head2 _parse_v2_bases($buffer)
+
+ Title   : _parse_v2_bases($buffer)
+ Usage   : $self->_parse_v2_bases($buffer);
+ Function: Gather the bases section from the scf and parse it into its
+           components.
+ Returns :
+ Args    : The buffer. It is expected that the buffer contains a binary
+           string for the bases section of an scf file according to the
+           scf file specifications.
+ Notes   : None.
+
+=cut
+
+sub _parse_v2_bases {
+    my ($self,$buffer) = @_;
+    my $length = length($buffer);
+    my ($offset2,$currbuff,$currbase,$currqual,$sequence, at qualities, at indices);
+    my (@read,$harvester,$accuracies);
+    for ($offset2=0;$offset2<$length;$offset2+=12) {
+	     @read = unpack "N C C C C a C3", substr($buffer,$offset2,$length);
+	     push @indices,$read[0];
+	     $currbase = lc($read[5]);
+	     if ($currbase eq "a") { $currqual = $read[1]; }
+	     elsif ($currbase eq "c") { $currqual = $read[2]; }
+	     elsif ($currbase eq "g") { $currqual = $read[3]; }
+	     elsif ($currbase eq "t") { $currqual = $read[4]; }
+	     else { $currqual = "UNKNOWN"; }
+         push @{$accuracies->{"a"}},$read[1];
+         push @{$accuracies->{"c"}},$read[2];
+         push @{$accuracies->{"g"}},$read[3];
+         push @{$accuracies->{"t"}},$read[4];
+
+	     $sequence .= $currbase;
+	     push @qualities,$currqual;
+    }
+     return (\@indices,\@qualities,$sequence,$accuracies)
+}
+
+=head2 _parse_v2_traces(\@traces_array)
+
+ Title   : _pares_v2_traces(\@traces_array)
+ Usage   : $self->_parse_v2_traces(\@traces_array);
+ Function: Parses an scf Version2 trace array into its base components.
+ Returns : Nothing. Modifies $self.
+ Args    : A reference to an array of the unpacked traces section of an
+           scf version2 file.
+
+=cut
+
+sub _parse_v2_traces {
+    my ($self,$buffer,$sample_size) = @_;
+     my $byte;
+     if ($sample_size == 1) { $byte = "c"; }
+     else { $byte = "n"; }
+     my $length = CORE::length($buffer);
+     my @read = unpack "${byte}${length}",$buffer;
+          # this will be an array to the reference holding the array
+     my $traces;
+     my $array = 0;
+     for (my $offset2 = 0; $offset2< scalar(@read); $offset2+=4) {
+	          push @{$traces->{'a'}},$read[$offset2];
+	          push @{$traces->{'t'}},$read[$offset2+1];
+	          push @{$traces->{'g'}},$read[$offset2+3];
+	          push @{$traces->{'c'}},$read[$offset2+2];
+    }
+    return $traces;
+}
+
+
+sub get_trace_deprecated_use_the_sequencetrace_object_instead {
+    # my ($self,$base_channel,$traces) = @_;
+    # $base_channel =~ tr/a-z/A-Z/;
+    # if ($base_channel !~ /A|T|G|C/) {
+    # 	$self->throw("You tried to ask for a base channel that wasn't A,T,G, or C. Ask for one of those next time.");
+    ##} elsif ($base_channel) {
+     #	my @temp = split(' ',$self->{'traces'}->{$base_channel});
+	#return \@temp;
+    #}
+}
+
+sub _deprecated_get_peak_indices_deprecated_use_the_sequencetrace_object_instead {
+    my ($self) = shift;
+    my @temp = split(' ',$self->{'parsed'}->{'peak_indices'});
+    return \@temp;
+}
+
+
+=head2 get_header()
+
+ Title   : get_header()
+ Usage   : %header = %{$obj->get_header()};
+ Function: Return the header for this scf.
+ Returns : A reference to a hash containing the header for this scf.
+ Args    : None.
+ Notes   :
+
+=cut
+
+sub get_header {
+    my ($self) = shift;
+    return $self->{'header'};
+}
+
+=head2 get_comments()
+
+ Title   : get_comments()
+ Usage   : %comments = %{$obj->get_comments()};
+ Function: Return the comments for this scf.
+ Returns : A Bio::Annotation::Collection object
+ Args    : None.
+ Notes   :
+
+=cut
+
+sub get_comments {
+    my ($self) = shift;
+    return $self->{'comments'};
+}
+
+sub _dump_traces_outgoing_deprecated_use_the_sequencetrace_object {
+    my ($self,$transformed) = @_;
+    my (@sA, at sT, at sG, at sC);
+    if ($transformed) {
+	@sA = @{$self->{'text'}->{'t_samples_a'}};
+	@sC = @{$self->{'text'}->{'t_samples_c'}};
+	@sG = @{$self->{'text'}->{'t_samples_g'}};
+	@sT = @{$self->{'text'}->{'t_samples_t'}};
+    }
+    else {
+	@sA = @{$self->{'text'}->{'samples_a'}};
+	@sC = @{$self->{'text'}->{'samples_c'}};
+	@sG = @{$self->{'text'}->{'samples_g'}};
+	@sT = @{$self->{'text'}->{'samples_t'}};
+    }
+    print ("Count\ta\tc\tg\tt\n");
+    for (my $curr=0; $curr < scalar(@sG); $curr++) {
+	print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n");
+    }
+    return;
+}
+
+sub _dump_traces_incoming_deprecated_use_the_sequencetrace_object {
+    # my ($self) = @_;
+    # my (@sA, at sT, at sG, at sC);
+    # @sA = @{$self->{'traces'}->{'A'}};
+    # @sC = @{$self->{'traces'}->{'C'}};
+    # @sG = @{$self->{'traces'}->{'G'}};
+    # @sT = @{$self->{'traces'}->{'T'}};
+    # @sA = @{$self->get_trace('A')};
+    # @sC = @{$self->get_trace('C')};
+    # @sG = @{$self->get_trace('G')};
+    # @sT = @{$self->get_trace('t')};
+    # print ("Count\ta\tc\tg\tt\n");
+    # for (my $curr=0; $curr < scalar(@sG); $curr++) {
+    # 	print("$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n");
+    #}
+    #return;
+}
+
+=head2 write_seq
+
+ Title   : write_seq(-Quality => $swq, <comments>)
+ Usage   : $obj->write_seq(
+               -target => $swq,
+			-version => 2,
+			-CONV => "Bioperl-Chads Mighty SCF writer.");
+ Function: Write out an scf.
+ Returns : Nothing.
+ Args    : Requires: a reference to a Bio::Seq::Quality object to form the
+           basis for the scf.
+	   if -version is provided, it should be "2" or "3". A SCF of that
+	   version will be written.
+	   Any other arguments are assumed to be comments and are put into
+	   the comments section of the scf. Read the specifications for scf
+	   to decide what might be good to put in here.
+
+ Notes   :
+          For best results, use a SequenceTrace object.
+          The things that you need to write an scf:
+          a) sequence
+          b) quality
+          c) peak indices
+          d) traces
+          - You _can_ write an scf with just a and b by passing in a
+               SequenceWithQuality object- false traces will be synthesized
+               for you.
+
+=cut
+
+sub write_seq {
+    my ($self,%args) = @_;
+    my %comments;
+    my ($label,$arg);
+    my ($swq) = $self->_rearrange([qw(TARGET)], %args);
+     my $writer_fodder;
+     if (ref($swq) =~ /Bio::Seq::SequenceTrace|Bio::Seq::Quality/) {
+               if (ref($swq) eq "Bio::Seq::Quality") {
+                         # this means that the object *has no trace data*
+                         # we might as well synthesize some now, ok?
+                    my $swq2 = new Bio::Seq::SequenceTrace(
+                         -swq     =>   $swq
+                    );
+                    $swq2->_synthesize_traces();
+                    $swq2->set_accuracies();
+                    $swq = $swq2;
+               }
+     }
+    else  {
+	$self->throw("You must pass a Bio::Seq::Quality or a Bio::Seq::SequenceTrace object to write_seq as a parameter named \"target\"");
+    }
+          # all of the rest of the arguments are comments for the scf
+    foreach $arg (sort keys %args) {
+	next if ($arg =~ /target/i);
+	($label = $arg) =~ s/^\-//;
+	$writer_fodder->{comments}->{$label} = $args{$arg};
+    }
+    if (!$comments{'NAME'}) { $comments{'NAME'} = $swq->id(); }
+          # HA! Bwahahahaha.
+    $writer_fodder->{comments}->{'CONV'} = "Bioperl-Chads Mighty SCF writer." unless defined $comments{'CONV'};
+          # now deal with the version of scf they want to write
+    if ($writer_fodder->{comments}->{version}) {
+	     if ($writer_fodder->{comments}->{version} != 2 && $writer_fodder->{comments}->{version} != 3) {
+	          $self->warn("This module can only write version 2.0 or 3.0 scf's. Writing a version 2.0 scf by default.");
+	          $writer_fodder->{header}->{version} = "2.00";
+	     }
+	     elsif ($writer_fodder->{comments}->{'version'} > 2) {
+	          $writer_fodder->{header}->{'version'} = "3.00";
+	     }
+          else {
+               $writer_fodder->{header}->{version} = "2";
+          }
+    }
+    else {
+	     $writer_fodder->{header}->{'version'} = "3.00";
+    }
+          # set a few things in the header
+    $writer_fodder->{'header'}->{'magic'} = ".scf";
+    $writer_fodder->{'header'}->{'sample_size'} = "2";
+    $writer_fodder->{'header'}->{'bases'} = length($swq->seq());
+    $writer_fodder->{'header'}->{'bases_left_clip'} = "0";
+    $writer_fodder->{'header'}->{'bases_right_clip'} = "0";
+    $writer_fodder->{'header'}->{'sample_size'} = "2";
+    $writer_fodder->{'header'}->{'code_set'} = "9";
+    @{$writer_fodder->{'header'}->{'spare'}} = qw(0 0 0 0 0 0 0 0 0 0
+					 0 0 0 0 0 0 0 0 0 0);
+    $writer_fodder->{'header'}->{'samples_offset'} = "128";
+     $writer_fodder->{'header'}->{'samples'} = $swq->trace_length();
+          # create the binary for the comments and file it in writer_fodder
+    $writer_fodder->{comments} =  $self->_get_binary_comments(
+               $writer_fodder->{comments});
+          # create the binary and the strings for the traces, bases,
+          # offsets (if necessary), and accuracies (if necessary)
+    $writer_fodder->{traces} = $self->_get_binary_traces(
+               $writer_fodder->{'header'}->{'version'},
+               $swq,$writer_fodder->{'header'}->{'sample_size'});
+    my ($b_base_offsets,$b_base_accuracies,$samples_size,$bases_size);
+    #
+    # version 2
+    #
+    if ($writer_fodder->{'header'}->{'version'} == 2) {
+          $writer_fodder->{bases} = $self->_get_binary_bases(
+                         2,
+                         $swq,
+                         $writer_fodder->{'header'}->{'sample_size'});
+	     $samples_size = CORE::length($writer_fodder->{traces}->{'binary'});
+	     $bases_size = CORE::length($writer_fodder->{bases}->{binary});
+	     $writer_fodder->{'header'}->{'bases_offset'} = 128 + $samples_size;
+	     $writer_fodder->{'header'}->{'comments_offset'} = 128 +
+               $samples_size + $bases_size;
+	     $writer_fodder->{'header'}->{'comments_size'} =
+               length($writer_fodder->{'comments'}->{binary});
+	     $writer_fodder->{'header'}->{'private_size'} = "0";
+	     $writer_fodder->{'header'}->{'private_offset'} = 128 +
+               $samples_size + $bases_size +
+               $writer_fodder->{'header'}->{'comments_size'};
+          $writer_fodder->{'header'}->{'binary'} =
+	      $self->_get_binary_header($writer_fodder->{header});
+          $dumper->dumpValue($writer_fodder) if $self->verbose > 0;
+	     $self->_print ($writer_fodder->{'header'}->{'binary'})
+               or print("Could not write binary header...\n");
+	     $self->_print ($writer_fodder->{'traces'}->{'binary'})
+               or print("Could not write binary traces...\n");
+	     $self->_print ($writer_fodder->{'bases'}->{'binary'})
+               or print("Could not write binary base structures...\n");
+	     $self->_print ($writer_fodder->{'comments'}->{'binary'})
+               or print("Could not write binary comments...\n");
+    }
+    else {
+          ($writer_fodder->{peak_indices},
+           $writer_fodder->{accuracies},
+           $writer_fodder->{bases},
+           $writer_fodder->{reserved} ) =
+               $self->_get_binary_bases(
+                    3,
+                    $swq,
+                    $writer_fodder->{'header'}->{'sample_size'}
+               );
+	     $writer_fodder->{'header'}->{'bases_offset'} = 128 +
+               length($writer_fodder->{'traces'}->{'binary'});
+	     $writer_fodder->{'header'}->{'comments_size'} =
+               length($writer_fodder->{'comments'}->{'binary'});
+	          # this is:
+	          # bases_offset + base_offsets + accuracies + called_bases +
+               # reserved
+	     $writer_fodder->{'header'}->{'private_size'} = "0";
+
+	     $writer_fodder->{'header'}->{'comments_offset'} =
+	          128+length($writer_fodder->{'traces'}->{'binary'})+
+		         length($writer_fodder->{'peak_indices'}->{'binary'})+
+		         length($writer_fodder->{'accuracies'}->{'binary'})+
+			    length($writer_fodder->{'bases'}->{'binary'})+
+			    length($writer_fodder->{'reserved'}->{'binary'});
+	$writer_fodder->{'header'}->{'private_offset'} =
+          $writer_fodder->{'header'}->{'comments_offset'} +
+               $writer_fodder->{'header'}->{'comments_size'};
+	$writer_fodder->{'header'}->{'spare'}->[1] =
+	     $writer_fodder->{'header'}->{'comments_offset'} +
+		     length($writer_fodder->{'comments'}->{'binary'});
+     $writer_fodder->{header}->{binary} =
+          $self->_get_binary_header($writer_fodder->{header});
+	$self->_print ($writer_fodder->{'header'}->{'binary'})
+          or print("Couldn't write header\n");
+	$self->_print ($writer_fodder->{'traces'}->{'binary'})
+          or print("Couldn't write samples\n");
+	$self->_print ($writer_fodder->{'peak_indices'}->{'binary'})
+          or print("Couldn't write peak offsets\n");
+	$self->_print ($writer_fodder->{'accuracies'}->{'binary'})
+          or print("Couldn't write accuracies\n");
+	$self->_print ($writer_fodder->{'bases'}->{'binary'})
+          or print("Couldn't write called_bases\n");
+	$self->_print ($writer_fodder->{'reserved'}->{'binary'})
+          or print("Couldn't write reserved\n");
+	$self->_print ($writer_fodder->{'comments'}->{'binary'})
+          or print ("Couldn't write comments\n");
+    }
+
+    # kinda unnecessary, given the close() below, but maybe that'll go
+    # away someday.
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+
+    $self->close();
+}
+
+
+
+
+
+=head2 _get_binary_header()
+
+ Title   : _get_binary_header();
+ Usage   : $self->_get_binary_header();
+ Function: Provide the binary string that will be used as the header for
+	        a scfv2 document.
+ Returns : A binary string.
+ Args    : None. Uses the entries in the $self->{'header'} hash. These
+	        are set on construction of the object (hopefully correctly!).
+ Notes   :
+
+=cut
+
+sub _get_binary_header {
+    my ($self,$header) = @_;
+    my $binary = pack "a4 NNNNNNNN a4 NN N20",
+    (
+     $header->{'magic'},
+     $header->{'samples'},
+     $header->{'samples_offset'},
+     $header->{'bases'},
+     $header->{'bases_left_clip'},
+     $header->{'bases_right_clip'},
+     $header->{'bases_offset'},
+     $header->{'comments_size'},
+     $header->{'comments_offset'},
+     $header->{'version'},
+     $header->{'sample_size'},
+     $header->{'code_set'},
+     @{$header->{'spare'}}
+     );
+	return $binary;
+}
+
+=head2 _get_binary_traces($version,$ref)
+
+ Title   : _set_binary_tracesbases($version,$ref)
+ Usage   : $self->_set_binary_tracesbases($version,$ref);
+ Function: Constructs the trace and base strings for all scfs
+ Returns : Nothing. Alters self.
+ Args    : $version - "2" or "3"
+	   $sequence - a scalar containing arbitrary sequence data
+	   $ref - a reference to either a SequenceTraces or a
+          SequenceWithQuality object.
+ Notes   : This is a really complicated thing.
+
+=cut
+
+sub _get_binary_traces {
+    my ($self,$version,$ref,$sample_size) = @_;
+          # ref _should_ be a Bio::Seq::SequenceTrace, but might be a
+          # Bio::Seq::Quality
+     my $returner;
+     my $sequence = $ref->seq();
+     my $sequence_length = length($sequence);
+          # first of all, do we need to synthesize the trace?
+          # if so, call synthesize_base
+     my ($traceobj, at traces,$current);
+     if ( ref($ref) eq "Bio::Seq::Quality" ) {
+          $traceobj = new Bio::Seq::Quality(
+               -target   =>   $ref
+          );
+          $traceobj->_synthesize_traces();
+     }
+     else {
+          $traceobj = $ref;
+          if ($version eq "2") {
+               my $trace_length = $traceobj->trace_length();
+               for ($current = 1; $current <= $trace_length; $current++) {
+                    foreach (qw(a c g t)) {
+                         push @traces,$traceobj->trace_value_at($_,$current);
+                    }
+               }
+          }
+          elsif ($version == 3) {
+               foreach my $current_trace (qw(a c g t)) {
+                    my @trace = @{$traceobj->trace($current_trace)};
+                    foreach (@trace) {
+                         if ($_ > 30000) {
+                              $_ -= 65536;
+                         }
+                    }
+                    my $transformed = $self->_delta(\@trace,"forward");
+                    if($sample_size == 1){
+                         foreach (@{$transformed}) {
+                              $_ += 256 if ($_ < 0);
+                         }
+                    }
+                    push @traces,@{$transformed};
+               }
+          }
+     }
+     $returner->{version} = $version;
+     $returner->{string} = \@traces;
+     my $length_of_traces = scalar(@traces);
+     my $byte;
+     if ($sample_size == 1) { $byte = "c"; } else { $byte = "n"; }
+          # an unsigned integer should be I, but this is too long
+          #
+     $returner->{binary} = pack "n${length_of_traces}", at traces;
+     $returner->{length} = CORE::length($returner->{binary});
+     return $returner;
+}
+
+
+sub _get_binary_bases {
+     my ($self,$version,$trace,$sample_size) = @_;
+     my $byte;
+     if ($sample_size == 1) { $byte = "c"; } else { $byte = "n"; }
+     my ($returner, at current_row,$current_base,$string,$binary);
+     my $length = $trace->length();
+     if ($version == 2) {
+          $returner->{'version'} = "2";
+         for (my $current_base =1; $current_base <= $length; $current_base++) {
+               my @current_row;
+               push @current_row,$trace->peak_index_at($current_base);
+               push @current_row,$trace->accuracy_at("a",$current_base);
+               push @current_row,$trace->accuracy_at("c",$current_base);
+               push @current_row,$trace->accuracy_at("g",$current_base);
+               push @current_row,$trace->accuracy_at("t",$current_base);
+               push @current_row,$trace->baseat($current_base);
+               push @current_row,0,0,0;
+               push @{$returner->{string}}, at current_row;
+               $returner->{binary} .= pack "N C C C C a C3", at current_row;
+          }
+          return $returner;
+     }
+     else {
+          $returner->{'version'} = "3.00";
+          $returner->{peak_indices}->{string} = $trace->peak_indices();
+          my $length = scalar(@{$returner->{peak_indices}->{string}});
+          $returner->{peak_indices}->{binary} =
+               pack "N$length",@{$returner->{peak_indices}->{string}};
+          $returner->{peak_indices}->{length} =
+               CORE::length($returner->{peak_indices}->{binary});
+          my @accuracies;
+          foreach my $base (qw(a c g t)) {
+               $returner->{accuracies}->{$base} = $trace->accuracies($base);
+               push @accuracies,@{$trace->accuracies($base)};
+          }
+          $returner->{sequence} = $trace->seq();
+          $length = scalar(@accuracies);
+               # this really is "c" for samplesize == 2
+          $returner->{accuracies}->{binary} = pack "c${length}", at accuracies;
+          $returner->{accuracies}->{length} =
+               CORE::length($returner->{accuracies}->{binary});
+          $length = $trace->seq_obj()->length();
+          for (my $count=0; $count< $length; $count++) {
+               push @{$returner->{reserved}->{string}},0,0,0;
+          }
+     }
+     $length = scalar(@{$returner->{reserved}->{string}});
+               # this _must_ be "c"
+     $returner->{'reserved'}->{'binary'} =
+          pack "c$length",@{$returner->{reserved}->{string}};
+     $returner->{'reserved'}->{'length'} =
+          CORE::length($returner->{'reserved'}->{'binary'});
+          # $returner->{'bases'}->{'string'} = $trace->seq();
+     my @bases = split('',$trace->seq());
+     $length = $trace->length();
+     $returner->{'bases'}->{'binary'} = $trace->seq();
+          # print("Returning this:\n");
+          # $dumper->dumpValue($returner);
+     return ($returner->{peak_indices},
+             $returner->{accuracies},
+             $returner->{bases},
+             $returner->{reserved});
+
+}
+
+
+=head2 _make_trace_string($version)
+
+ Title   : _make_trace_string($version)
+ Usage   : $self->_make_trace_string($version)
+ Function: Merges trace data for the four bases to produce an scf
+	   trace string. _requires_ $version
+ Returns : Nothing. Alters $self.
+ Args    : $version - a version number. "2" or "3"
+ Notes   :
+
+=cut
+
+sub _make_trace_string {
+	my ($self,$version) = @_;
+	my @traces;
+	my @traces_view;
+	my @as = @{$self->{'text'}->{'samples_a'}};
+	my @cs = @{$self->{'text'}->{'samples_c'}};
+	my @gs = @{$self->{'text'}->{'samples_g'}};
+	my @ts = @{$self->{'text'}->{'samples_t'}};
+	if ($version == 2) {
+	    for (my $curr=0; $curr < scalar(@as); $curr++) {
+		$as[$curr] = $DEFAULT_QUALITY unless defined $as[$curr];
+		$cs[$curr] = $DEFAULT_QUALITY unless defined $cs[$curr];
+		$gs[$curr] = $DEFAULT_QUALITY unless defined $gs[$curr];
+		$ts[$curr] = $DEFAULT_QUALITY unless defined $ts[$curr];
+		push @traces,($as[$curr],$cs[$curr],$gs[$curr],$ts[$curr]);
+	    }
+	}
+	elsif ($version == 3) {
+		@traces = (@as, at cs, at gs, at ts);
+	}
+	else {
+		$self->throw("No idea what version required to make traces here. You gave #$version#  Bailing.");
+	}
+	my $length = scalar(@traces);
+	$self->{'text'}->{'samples_all'} = \@traces;
+
+}
+
+=head2 _get_binary_comments(\@comments)
+
+ Title   : _get_binary_comments(\@comments)
+ Usage   : $self->_get_binary_comments(\@comments);
+ Function: Provide a binary string that will be the comments section of
+	   the scf file. See the scf specifications for detailed
+	   specifications for the comments section of an scf file. Hint:
+	   CODE=something\nBODE=something\n\0
+ Returns :
+ Args    : A reference to an array containing comments.
+ Notes   : None.
+
+=cut
+
+sub _get_binary_comments {
+    my ($self,$rcomments) = @_;
+     my $returner;
+    my $comments_string = '';
+    my %comments = %$rcomments;
+    foreach my $key (sort keys %comments) {
+	$comments{$key} ||= '';
+	$comments_string .= "$key=$comments{$key}\n";
+    }
+    $comments_string .= "\n\0";
+     my $length = CORE::length($comments_string);
+     $returner->{length} = $length;
+     $returner->{string} = $comments_string;
+     $returner->{binary} = pack "A$length",$comments_string;
+     return $returner;
+}
+
+#=head2 _fill_missing_data($swq)
+#
+# Title   : _fill_missing_data($swq)
+# Usage   : $self->_fill_missing_data($swq);
+# Function: If the $swq with quality has no qualities, set all qualities
+#	   to 0.
+#	   If the $swq has no sequence, set the sequence to N's.
+# Returns : Nothing. Modifies the Bio::Seq::Quality that was passed as an
+#	   argument.
+# Args    : A reference to a Bio::Seq::Quality
+# Notes   : None.
+#
+#=cut
+#
+##'
+#sub _fill_missing_data {
+#    my ($self,$swq) = @_;
+#    my $qual_obj = $swq->qual_obj();
+#    my $seq_obj = $swq->seq_obj();
+#    if ($qual_obj->length() == 0 && $seq_obj->length() != 0) {
+#	my $fake_qualities = ("$DEFAULT_QUALITY ")x$seq_obj->length();
+#	$swq->qual($fake_qualities);
+#    }
+#    if ($seq_obj->length() == 0 && $qual_obj->length != 0) {
+#	my $sequence = ("N")x$qual_obj->length();
+#	$swq->seq($sequence);
+#    }
+#}
+
+=head2 _delta(\@trace_data,$direction)
+
+ Title   : _delta(\@trace_data,$direction)
+ Usage   : $self->_delta(\@trace_data,$direction);
+ Function:
+ Returns : A reference to an array containing modified trace values.
+ Args    : A reference to an array containing trace data and a string
+	   indicating the direction of conversion. ("forward" or
+	   "backward").
+ Notes   : This code is taken from the specification for SCF3.2.
+	   http://www.mrc-lmb.cam.ac.uk/pubseq/manual/formats_unix_4.html
+
+=cut
+
+
+sub _delta {
+	my ($self,$rsamples,$direction) = @_;
+	my @samples = @$rsamples;
+		# /* If job == DELTA_IT:
+		# *  change a series of sample points to a series of delta delta values:
+		# *  ie change them in two steps:
+		# *  first: delta = current_value - previous_value
+		# *  then: delta_delta = delta - previous_delta
+		# * else
+		# *  do the reverse
+		# */
+		# int i;
+		# uint_2 p_delta, p_sample;
+
+	my ($i,$num_samples,$p_delta,$p_sample, at samples_converted,$p_sample1,$p_sample2);
+        my $SLOW_BUT_CLEAR = 0;
+        $num_samples = scalar(@samples);
+	# c-programmers are funny people with their single-letter variables
+
+	if ( $direction eq "forward" ) {
+            if($SLOW_BUT_CLEAR){
+		$p_delta  = 0;
+		for ($i=0; $i < $num_samples; $i++) {
+			$p_sample = $samples[$i];
+			$samples[$i] = $samples[$i] - $p_delta;
+			$p_delta  = $p_sample;
+		}
+		$p_delta  = 0;
+		for ($i=0; $i < $num_samples; $i++) {
+			$p_sample = $samples[$i];
+			$samples[$i] = $samples[$i] - $p_delta;
+			$p_delta  = $p_sample;
+		}
+            } else {
+                for ($i = $num_samples-1; $i > 1; $i--){
+                    $samples[$i] = $samples[$i] - 2*$samples[$i-1] + $samples[$i-2];
+                }
+                $samples[1] = $samples[1] - 2*$samples[0];
+            }
+	}
+	elsif ($direction eq "backward") {
+            if($SLOW_BUT_CLEAR){
+		$p_sample = 0;
+		for ($i=0; $i < $num_samples; $i++) {
+			$samples[$i] = $samples[$i] + $p_sample;
+			$p_sample = $samples[$i];
+		}
+		$p_sample = 0;
+		for ($i=0; $i < $num_samples; $i++) {
+			$samples[$i] = $samples[$i] + $p_sample;
+			$p_sample = $samples[$i];
+		}
+            } else {
+                $p_sample1 = $p_sample2 = 0;
+                for ($i = 0; $i < $num_samples; $i++){
+                    $p_sample1 = $p_sample1 + $samples[$i];
+                    $samples[$i] = $p_sample1 + $p_sample2;
+                    $p_sample2 = $samples[$i];
+                }
+
+            }
+	}
+	else {
+		$self->warn("Bad direction. Use \"forward\" or \"backward\".");
+	}
+	return \@samples;
+}
+
+=head2 _unpack_magik($buffer)
+
+ Title   : _unpack_magik($buffer)
+ Usage   : $self->_unpack_magik($buffer)
+ Function: What unpack specification should be used? Try them all.
+ Returns : Nothing.
+ Args    : A buffer containing arbitrary binary data.
+ Notes   : Eliminate the ambiguity and the guesswork. Used in the
+	   adaptation of _delta(), mostly.
+
+=cut
+
+sub _unpack_magik {
+	my ($self,$buffer) = @_;
+	my $length = length($buffer);
+	my (@read,$counter);
+	foreach (qw(c C s S i I l L n N v V)) {
+		@read = unpack "$_$length", $buffer;
+		for ($counter=0; $counter < 20; $counter++) {
+			print("$read[$counter]\n");
+		}
+	}
+}
+
+=head2 read_from_buffer($filehandle,$buffer,$length)
+
+ Title   : read_from_buffer($filehandle,$buffer,$length)
+ Usage   : $self->read_from_buffer($filehandle,$buffer,$length);
+ Function: Read from the buffer.
+ Returns : $buffer, containing a read of $length
+ Args    : a filehandle, a buffer, and a read length
+ Notes   : I just got tired of typing
+	   "unless (length($buffer) == $length)" so I put it here.
+
+=cut
+
+sub read_from_buffer {
+	my ($self,$fh,$buffer,$length,$start_position) = @_;
+          # print("Reading from a buffer!!! length($length) ");
+     if ($start_position) {
+               # print(" startposition($start_position)(".sprintf("%X", $start_position).")\n");
+     }
+          # print("\n");
+     if ($start_position) {
+               # print("seeking to this position in the file: (".$start_position.")\n");
+          seek ($fh,$start_position,0);
+               # print("done. here is where I am now: (".tell($fh).")\n");
+     }
+     else {
+          # print("You did not specify a start position. Going from this position (the current position) (".tell($fh).")\n");
+     }
+	read $fh, $buffer, $length;
+	unless (length($buffer) == $length) {
+		$self->warn("The read was incomplete! Trying harder.");
+		my $missing_length = $length - length($buffer);
+		my $buffer2;
+		read $fh,$buffer2,$missing_length;
+		$buffer .= $buffer2;
+		if (length($buffer) != $length) {
+			$self->throw("Unexpected end of file while reading from SCF file. I should have read $length but instead got ".length($buffer)."! Current file position is ".tell($fh).".");
+		}
+	}
+
+	return $buffer;
+}
+
+=head2 _dump_keys()
+
+ Title   : _dump_keys()
+ Usage   : &_dump_keys($a_reference_to_some_hash)
+ Function: Dump out the keys in a hash.
+ Returns : Nothing.
+ Args    : A reference to a hash.
+ Notes   : A debugging method.
+
+=cut
+
+sub _dump_keys {
+	my $rhash = shift;
+	if ($rhash !~ /HASH/) {
+		print("_dump_keys: that was not a hash.\nIt was #$rhash# which was this reference:".ref($rhash)."\n");
+		return;
+	}
+	print("_dump_keys: The keys for $rhash are:\n");
+	foreach (sort keys %$rhash) {
+		print("$_\n");
+	}
+}
+
+=head2 _dump_base_accuracies()
+
+ Title   : _dump_base_accuracies()
+ Usage   : $self->_dump_base_accuracies();
+ Function: Dump out the v3 base accuracies in an easy to read format.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : A debugging method.
+
+=cut
+
+sub _dump_base_accuracies {
+	my $self = shift;
+	print("Dumping base accuracies! for v3\n");
+	print("There are this many elements in a,c,g,t:\n");
+	print(scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n");
+	my $number_traces = scalar(@{$self->{'text'}->{'v3_base_accuracy_a'}});
+	for (my $counter=0; $counter < $number_traces; $counter++ ) {
+		print("$counter\t");
+		print $self->{'text'}->{'v3_base_accuracy_a'}->[$counter]."\t";
+		print $self->{'text'}->{'v3_base_accuracy_c'}->[$counter]."\t";
+		print $self->{'text'}->{'v3_base_accuracy_g'}->[$counter]."\t";
+		print $self->{'text'}->{'v3_base_accuracy_t'}->[$counter]."\t";
+		print("\n");
+	}
+}
+
+=head2 _dump_peak_indices_incoming()
+
+ Title   : _dump_peak_indices_incoming()
+ Usage   : $self->_dump_peak_indices_incoming();
+ Function: Dump out the v3 peak indices in an easy to read format.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : A debugging method.
+
+=cut
+
+sub _dump_peak_indices_incoming {
+	my $self = shift;
+	print("Dump peak indices incoming!\n");
+	my $length = $self->{'bases'};
+	print("The length is $length\n");
+	for (my $count=0; $count < $length; $count++) {
+		print("$count\t$self->{parsed}->{peak_indices}->[$count]\n");
+	}
+}
+
+=head2 _dump_base_accuracies_incoming()
+
+ Title   : _dump_base_accuracies_incoming()
+ Usage   : $self->_dump_base_accuracies_incoming();
+ Function: Dump out the v3 base accuracies in an easy to read format.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : A debugging method.
+
+=cut
+
+sub _dump_base_accuracies_incoming {
+	my $self = shift;
+	print("Dumping base accuracies! for v3\n");
+		# print("There are this many elements in a,c,g,t:\n");
+		# print(scalar(@{$self->{'parsed'}->{'v3_base_accuracy_a'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_c'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_g'}}).",".scalar(@{$self->{'text'}->{'v3_base_accuracy_t'}})."\n");
+	my $number_traces = $self->{'bases'};
+	for (my $counter=0; $counter < $number_traces; $counter++ ) {
+		print("$counter\t");
+		foreach (qw(A T G C)) {
+			print $self->{'parsed'}->{'base_accuracies'}->{$_}->[$counter]."\t";
+		}
+		print("\n");
+	}
+}
+
+
+=head2 _dump_comments()
+
+ Title   : _dump_comments()
+ Usage   : $self->_dump_comments();
+ Function: Debug dump the comments section from the scf.
+ Returns : Nothing.
+ Args    : Nothing.
+ Notes   : None.
+
+=cut
+
+sub _dump_comments {
+    my ($self) = @_;
+    warn ("SCF comments:\n");
+    foreach my $k (keys %{$self->{'comments'}}) {
+	warn ("\t {$k} ==> ", $self->{'comments'}->{$k}, "\n");
+    }
+}
+
+
+
+1;
+__END__
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/strider.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/strider.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/strider.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,230 @@
+# $Id: strider.pm,v 1.6.4.2 2006/10/02 23:10:30 sendu Exp $
+# BioPerl module for Bio::SeqIO::strider
+#
+# Cared for by Malcolm Cook <mec at stowers-institute.org>
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# April 7th, 2005  Malcolm Cook authored
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::strider - DNA strider sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from strider
+'binary' format, as documented in the strider manual, in which the
+first 112 bytes are a header, following by the sequence, followed by a
+sequence description.
+
+Note: it does NOT assign any sequence identifier, since they are not
+contained in the byte stream of the file; the Strider application
+simply displays the name of the file on disk as the name of the
+sequence. The caller should set the id, probably based on the name of
+the file (after possibly cleaning up whitespace, which ought not to be
+used as the id in most applications).
+
+Note: the strider 'comment' is mapped to the BioPerl 'description'
+(since there is no other text field, and description maps to defline
+text).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Malcolm Cook
+
+Email: mec at stowers-institute.org
+
+=head1 CONTRIBUTORS
+
+Modelled after Bio::SeqIO::fasta by Ewan Birney E<lt>birney at ebi.ac.ukE<gt> and
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::strider;
+use strict;
+use warnings;
+
+
+use Bio::Seq::SeqFactory;
+use Convert::Binary::C;
+
+use base qw(Bio::SeqIO);
+
+my $c = new Convert::Binary::C (
+				ByteOrder => 'BigEndian',
+				Alignment => 2
+			       );
+
+my $headerdef;
+{local ($/);
+ # See this file's __DATA__ section for the c structure definitions
+ # for strider binary header data.  Here we slurp it all into $headerdef.
+ $headerdef = <DATA>};
+
+$c->parse($headerdef);
+
+my $size_F_HEADER = 112;
+
+die "expected strider header structure size of $size_F_HEADER" unless $size_F_HEADER eq $c->sizeof('F_HEADER');
+
+my %alphabet2type = (
+		     # map between BioPerl alphabet and strider
+		     # sequence type code.
+
+		     # From Strider Documentation: the sequence type:
+		     # 1, 2, 3 and 4 for DNA, DNA Degenerate, RNA and
+		     # Protein sequence files, respectively.  
+
+		     # TODO: determine 'DNA Degenerate' based on
+		     # sequence alphabet?
+
+		     dna => 1,
+		     rna => 3,
+		     protein => 4,
+		    );
+
+my %type2alphabet = reverse %alphabet2type;
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  unless ( defined $self->sequence_factory ) {
+    $self->sequence_factory(Bio::Seq::SeqFactory->new(-verbose => $self->verbose(), 
+						      -type => 'Bio::Seq::RichSeq'));
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+  my( $self ) = @_;
+  my $fh =  $self->_fh;
+  my ($header,$sequence,$fulldesc);
+  read $fh,$header,$size_F_HEADER or return ; #don't die here with "could not read header: $@" !!!;
+  $self->throw("required $size_F_HEADER bytes while reading strider header in " . $self->{'_file'} . " but found: " . length($header))  
+    unless $size_F_HEADER == length($header);
+  my $headerdata = $c->unpack('F_HEADER',$header) or return;
+  read $fh,$sequence,$headerdata->{nLength};
+  read $fh,$fulldesc,$headerdata->{com_length};
+  $fulldesc =~ s/\cM/ /g;	# gratuitous replacement of mac
+                                # linefeed with space.
+  my $seq = $self->sequence_factory->create(
+					    # -id          => $main::ARGV, #might want to set this in caller to $ARGV.
+					    -seq         => $sequence,
+					    -desc        => $fulldesc,
+					    -alphabet    => $type2alphabet{$headerdata->{type}} || 'dna',
+					   );
+
+  return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of 1 to n Bio::PrimarySeqI objects
+
+
+=cut
+
+sub write_seq {
+  my ($self, at seq) = @_;
+  my $fh =  $self->_fh() || *STDOUT; #die "could not determine filehandle in strider.pm";
+  foreach my $seq (@seq) {
+    $self->throw("Did not provide a valid Bio::PrimarySeqI object") 
+      unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
+    my  $headerdata = $c->pack('F_HEADER',{
+					   versionNb   => 0, 
+					   type        => $alphabet2type{$seq->alphabet} || $alphabet2type{dna},
+					   topology    => $seq->is_circular ? 1 : 0,
+					   nLength     => $seq->length,
+					   nMinus      => 0,
+					   com_length  => length($seq->desc || ""),
+					  });
+    print $fh $headerdata, $seq->seq() || "" , $seq->desc || "";
+  }
+}
+
+1;
+
+__DATA__
+
+//The following was taken from the strider 1.4 release notes Appendix (with
+//some comments gleaned from other parts of manual)
+
+struct F_HEADER
+{
+char versionNb;  // the format version number, currently it is set to 0
+char type;       // 1=DNA, 2=DNA Degenerate, 3=RNA or 4=Protein
+char topology;   // linear or circular - 0 for a linear sequence, 1 for a circular one
+char reserved1;
+int reserved2;
+int reserved3;
+int reserved4;
+char reserved5;
+char filler1;
+short filler2;
+int filler3;
+int reserved6;
+int nLength; // Sequence length -  the length the Sequence field (the number of char in the text, each being a base or an aa)
+int nMinus; // nb of "negative" bases, i.e. the number of bases numbered with negative numbers
+int reserved7;
+int reserved8;
+int reserved9;
+int reserved10;
+int reserved11;
+char reserved12[32];
+short reserved13;
+short filler4;
+char reserved14;
+char reserved15;
+char reserved16;
+char filler5;
+int com_length; //  the length the Comment field (the number of char in the text).
+int reserved17;
+int filler6;
+int filler7;
+};

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/swiss.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/swiss.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/swiss.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1354 @@
+# $Id: swiss.pm,v 1.92.4.5 2006/11/09 05:55:14 cjfields Exp $
+#
+# BioPerl module for Bio::SeqIO::swiss
+#
+# Copyright Elia Stupka
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::swiss - Swissprot sequence input/output stream
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the SeqIO handler system:
+
+    use Bio::SeqIO;
+
+    $stream = Bio::SeqIO->new(-file => $filename, 
+                              -format => 'swiss');
+
+    while ( my $seq = $stream->next_seq() ) {
+       # do something with $seq
+    }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from Swissprot flat
+file databases.
+
+There is a lot of flexibility here about how to dump things which needs
+to be documented.
+
+
+=head2 Optional functions
+
+=over 3
+
+=item _show_dna()
+
+(output only) shows the dna or not
+
+=item _post_sort()
+
+(output only) provides a sorting func which is applied to the FTHelpers
+before printing
+
+=item _id_generation_func()
+
+This is function which is called as 
+
+   print "ID   ", $func($seq), "\n";
+
+To generate the ID line. If it is not there, it generates a sensible ID
+line using a number of tools.
+
+If you want to output annotations in Swissprot format they need to be
+stored in a Bio::Annotation::Collection object which is accessible
+through the Bio::SeqI interface method L<annotation()|annotation>.  
+
+The following are the names of the keys which are polled from a
+L<Bio::Annotation::Collection> object.
+
+ reference   - Should contain Bio::Annotation::Reference objects
+ comment     - Should contain Bio::Annotation::Comment objects
+ dblink      - Should contain Bio::Annotation::DBLink objects
+ gene_name   - Should contain Bio::Annotation::SimpleValue object
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions, 
+preferably to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Elia Stupka
+
+Email elia at tll.org.sg
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::swiss;
+use vars qw(@Unknown_names @Unknown_genus);
+use strict;
+use Bio::SeqIO::FTHelper;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Tools::SeqStats;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::Comment;
+use Bio::Annotation::Reference;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::SimpleValue;
+use Bio::Annotation::StructuredValue;
+
+use base qw(Bio::SeqIO);
+
+# this is for doing species name parsing
+ at Unknown_names=('other', 'unidentified',
+                     'unknown organism', 'not specified', 
+                     'not shown', 'Unspecified', 'Unknown', 
+                     'None', 'unclassified', 'unidentified organism', 
+                     'not supplied'
+                    );
+# dictionary of synonyms for taxid 32644
+# all above can be part of valid species name
+ at Unknown_genus = qw(unknown unclassified uncultured unidentified);
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+   
+  # hash for functions for decoding keys.
+  $self->{'_func_ftunit_hash'} = {};
+  $self->_show_dna(1); # sets this to one by default. People can change it
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory
+                  (-verbose => $self->verbose(), 
+                   -type => 'Bio::Seq::RichSeq'));      
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+=cut
+
+sub next_seq {
+   my ($self, at args) = @_;
+   my ($pseq,$c,$line,$name,$desc,$acc,$seqc,$mol,$div, $sptr,$seq_div,
+       $date,$comment, at date_arr);
+   my $genename = "";
+   my ($annotation, %params, @features) = ( new Bio::Annotation::Collection);
+
+   local $_;
+
+   while( defined($_ = $self->_readline) && /^\s+$/ ) { 
+   }
+   return unless defined $_ && /^ID\s/;
+
+   # fixed to allow _DIVISION to be optional for bug #946
+   # see bug report for more information
+   # 
+   # 9/6/06 Note: Swiss/TrEMBL sequences have no division acc. to UniProt
+   # release notes; this is fixed to simplify the regex parsing
+   # STANDARD (SwissProt) and PRELIMINARY (TrEMBL) added to namespace()
+   unless(  m{^
+                ID              \s+     #
+                (\S+)           \s+     #  $1  entryname
+                ([^\s;]+);      \s+     #  $2  DataClass
+                (?:PRT;)?       \s+     #  Molecule Type (optional)
+                [0-9]+[ ]AA     \.      #  Sequencelength (capture?)
+                $
+                }ox ) {
+            # I couldn't find any new current UniProt sequences
+            # that matched this format:
+            # || m/^ID\s+(\S+)\s+(_([^\s_]+))? /ox ) { 
+       $self->throw("swissprot stream with no ID. Not swissprot in my book");
+   }
+   ($name, $seq_div) = ($1, $2);
+    $params{'-namespace'} =
+      ($seq_div eq 'Reviewed'   || $seq_div eq 'STANDARD')     ? 'Swiss-Prot' :
+      ($seq_div eq 'Unreviewed' || $seq_div eq 'PRELIMINARY')  ? 'TrEMBL'     :
+       $seq_div;
+    # we shouldn't be setting the division, but for now...
+    my ($junk, $division) = split q(_), $name;
+    $params{'-division'} = $division;
+   $params{'-alphabet'} = 'protein';
+    # this is important to have the id for display in e.g. FTHelper, otherwise
+    # you won't know which entry caused an error
+   $params{'-display_id'} = $name;
+   
+   BEFORE_FEATURE_TABLE :
+   while( defined($_ = $self->_readline) ) {
+       
+       # Exit at start of Feature table and at the sequence at the
+       # latest HL 05/11/2000
+       last if( /^(FT|SQ)/ );
+
+       # Description line(s)
+       if (/^DE\s+(\S.*\S)/) {
+           $desc .= $desc ? " $1" : $1;
+       }
+       #Gene name
+       elsif(/^GN\s+(.*)/) {
+       $genename .= " " if $genename;
+       $genename .= $1;
+       }
+       #accession number(s)
+       elsif( /^AC\s+(.+)/) {
+       my @accs = split(/[; ]+/, $1); # allow space in addition
+       $params{'-accession_number'} = shift @accs 
+           unless defined $params{'-accession_number'};
+       push @{$params{'-secondary_accessions'}}, @accs;
+       }
+       #date and sequence version
+       elsif( /^DT\s+(.*)/ ) {
+        my $line = $1;
+        my ($date, $version) = split(' ', $line, 2);
+        $date =~ tr/,//d; # remove comma if new version    
+        if ($version =~ /\(Rel\. (\d+), Last sequence update\)/ || # old
+                        /sequence version (\d+)\./) { #new
+            my $update = Bio::Annotation::SimpleValue->new(
+                                        -tagname    => 'seq_update',
+                                        -value      => $1
+                                        );
+            $annotation->add_Annotation($update);
+        } elsif ($version =~ /\(Rel\. (\d+), Last annotation update\)/ || #old
+                             /entry version (\d+)\./) { #new
+            $params{'-version'} = $1;
+        }
+       push @{$params{'-dates'}}, $date;
+       }
+       # Organism name and phylogenetic information
+       elsif (/^O[SCG]/) {
+           my $species = $self->_read_swissprot_Species($_);
+           $params{'-species'}= $species;
+       # now we are one line ahead -- so continue without reading the next
+       # line   HL 05/11/2000
+       }
+       # References
+       elsif (/^R/) {
+       my $refs = $self->_read_swissprot_References($_);
+       foreach my $r (@$refs) {
+           $annotation->add_Annotation('reference',$r);
+       }
+       } 
+       # Comments
+       elsif (/^CC\s{3}(.*)/) {
+       $comment .= $1;
+       $comment .= "\n";
+       while (defined ($_ = $self->_readline) && /^CC\s{3}(.*)/ ) {
+           $comment .= $1 . "\n";
+       }
+       my $commobj = Bio::Annotation::Comment->new();
+       # note: don't try to process comments here -- they may contain
+           # structure. LP 07/30/2000
+       $commobj->text($comment);
+       $annotation->add_Annotation('comment',$commobj);
+       $comment = "";
+       $self->_pushback($_);
+       }
+       #DBLinks
+       # old regexp
+       # /^DR\s+(\S+)\;\s+(\S+)\;\s+(\S+)[\;\.](.*)$/) {
+       # new regexp from Andreas Kahari  bug #1584
+       elsif (/^DR\s+(\S+)\;\s+(\S+)\;\s+([^;]+)[\;\.](.*)$/) {
+	   my ($database,$primaryid,$optional,$comment) = ($1,$2,$3,$4);
+
+	   # drop leading and training spaces and trailing .
+	   $comment =~ s/\.\s*$//;
+	   $comment =~ s/^\s+//;
+	   
+	   my $dblinkobj =  Bio::Annotation::DBLink->new
+	       (-database    => $database,
+		-primary_id  => $primaryid,
+		-optional_id => $optional,
+		-comment     => $comment,
+	       );
+	   
+	   $annotation->add_Annotation('dblink',$dblinkobj);
+       }
+       #keywords
+       elsif( /^KW\s+(.*)$/ ) {
+       my @kw = split(/\s*\;\s*/,$1);
+       defined $kw[-1] && $kw[-1] =~ s/\.$//;
+       push @{$params{'-keywords'}}, @kw;   
+       }
+   }
+   # process and parse the gene name line if there was one (note: we
+   # can't do this above b/c GN may be multi-line and we can't
+   # unequivocally determine whether we've seen the last GN line in
+   # the new format)
+   if ($genename && ($genename =~ s/[\.; ]+$//)) {
+       my $gn = Bio::Annotation::StructuredValue->new();
+       if ($genename =~ /Name=/) {
+           # new format (e.g., Name=RCHY1; Synonyms=ZNF363, CHIMP)
+           my $j = 0;
+           foreach my $genes (split(/; and /, $genename)) {
+               foreach my $names (split(/;\s+/, $genes)) {
+                   $names =~ s/^\s*([A-Za-z]+)=//;
+                   $gn->add_value([$j,-1], split(/, /, $names));
+               }
+               $j++;
+           }
+       } else {
+           # old format
+           foreach my $gene (split(/ AND /, $genename)) {
+               $gene =~ s/^\(//;
+               $gene =~ s/\)$//;
+               $gn->add_value([-1,-1], split(/ OR /, $gene));
+           }
+       }
+       $annotation->add_Annotation('gene_name', $gn,
+                                   "Bio::Annotation::SimpleValue");
+   }
+   
+   FEATURE_TABLE :
+   # if there is no feature table, or if we've got beyond, exit loop or don't
+   # even enter    HL 05/11/2000
+   while (defined $_ && /^FT/ ) {
+       my $ftunit = $self->_read_FTHelper_swissprot($_);
+       
+       # process ftunit
+       # when parsing of the line fails we get undef returned
+       if($ftunit) {
+       push(@features,
+        $ftunit->_generic_seqfeature($self->location_factory(),
+                         $params{'-seqid'}, "SwissProt"));
+       } else {
+       $self->warn("failed to parse feature table line for seq " .
+               $params{'-display_id'}. "\n$_");
+       }
+       $_ = $self->_readline;
+   }
+   while( defined($_) && ! /^SQ/ ) { 
+       $_ = $self->_readline;
+   }
+   $seqc = "";  
+   while( defined ($_ = $self->_readline) ) {
+       last if m{^//};
+       s/[^A-Za-z]//g;       
+       $seqc .= uc($_);
+   }
+
+   my $seq=  $self->sequence_factory->create
+       (-verbose  => $self->verbose,
+    %params,
+    -seq      => $seqc,
+    -desc     => $desc,
+    -features => \@features,
+    -annotation => $annotation,
+    );
+
+   # The annotation doesn't get added by the contructor
+   $seq->annotation($annotation);
+
+   return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object (must be seq) to the stream
+ Returns : 1 for success and 0 for error
+ Args    : array of 1 to n Bio::SeqI objects
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seqs) = @_;
+    foreach my $seq ( @seqs ) {
+    $self->throw("Attempting to write with no seq!") unless defined $seq;
+
+    if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
+        $self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
+    }
+
+    my $i;
+    my $str = $seq->seq;
+
+    my $div;
+    my $ns = ($seq->can('namespace')) && $seq->namespace();
+    my $len = $seq->length();
+
+    if ( !$seq->can('division') || ! defined ($div = $seq->division()) ) {
+        $div = 'UNK';
+    }
+    
+    # namespace dictates database, takes precedent over division. Sorry!
+    if (defined($ns)) {
+        $div = ($ns eq 'Swiss-Prot') ? 'Reviewed'    :
+               ($ns eq 'TrEMBL')     ? 'Unreviewed' :
+               $ns;
+    } else {
+        $ns = 'Swiss-Prot';
+        # division not reset; acts as fallback
+    }
+
+    $self->warn("No whitespace allowed in SWISS-PROT display id [". $seq->display_id. "]")
+        if $seq->display_id =~ /\s/;
+
+    my $temp_line;
+    if( $self->_id_generation_func ) {
+        $temp_line = &{$self->_id_generation_func}($seq);
+    } else {
+        #$temp_line = sprintf ("%10s     STANDARD;      %3s;   %d AA.",
+        #            $seq->primary_id()."_".$div,$mol,$len);
+        # Reconstructing the ID relies heavily upon the input source having
+        # been in a format that is parsed as this routine expects it -- that is,
+        # by this module itself. This is bad, I think, and immediately breaks
+        # if e.g. the Bio::DB::GenPept module is used as input.
+        # Hence, switch to display_id(); _every_ sequence is supposed to have
+        # this. HL 2000/09/03
+        # Changed to reflect ID line changes in UniProt
+        # Oct 2006 - removal of molecule type - see bug 2134 
+        $temp_line = sprintf ("%-24s%-12s%9d AA.",
+                  $seq->display_id(), $div.';', $len);
+    }
+
+    $self->_print( "ID   $temp_line\n");
+
+    # if there, write the accession line
+    local($^W) = 0; # supressing warnings about uninitialized fields
+
+    if( $self->_ac_generation_func ) {
+        $temp_line = &{$self->_ac_generation_func}($seq);
+        $self->_print( "AC   $temp_line\n");
+    } else {
+        if ($seq->can('accession_number') ) {
+        $self->_print("AC   ",$seq->accession_number,";");
+        if ($seq->can('get_secondary_accessions') ) {
+            foreach my $sacc ($seq->get_secondary_accessions) {
+            $self->_print(" ",$sacc,";");
+            }
+            $self->_print("\n");
+        }
+        else {
+            $self->_print("\n");
+        }
+        }
+        # otherwise - cannot print <sigh>
+    }
+
+    # Date lines and sequence versions (changed 6/15/2006)
+    # This is rebuilt from scratch using the current SwissProt/UniProt format
+    if( $seq->can('get_dates') ) {
+        my @dates =  $seq->get_dates();
+        my $ct = 1;
+        my $seq_version = $seq->version;
+        my ($update_version) = $seq->get_Annotations("seq_update");
+        foreach my $dt (@dates){
+        $self->_write_line_swissprot_regex("DT   ","DT   ",
+                $dt.', integrated into UniProtKB/'.$ns,
+                "\\s\+\|\$",80) if $ct == 1;
+        $self->_write_line_swissprot_regex("DT   ","DT   ",
+                $dt.", sequence version $update_version.",
+                "\\s\+\|\$",80) if $ct == 2;
+        $self->_write_line_swissprot_regex("DT   ","DT   ",
+                $dt.", entry version $seq_version.",
+                "\\s\+\|\$",80) if $ct == 3;
+        $ct++;
+        }
+    }
+
+    #Definition lines
+    $self->_write_line_swissprot_regex("DE   ","DE   ",$seq->desc(),"\\s\+\|\$",80);
+
+    #Gene name
+    if ((my @genes = $seq->annotation->get_Annotations('gene_name') ) ) {
+        $self->_print("GN   ",
+              join(' OR ',
+                   map {
+                   $_->isa("Bio::Annotation::StructuredValue") ?
+                       $_->value(-joins => [" AND ", " OR "]) :
+                       $_->value();
+                   } @genes),
+              ".\n");
+    }
+
+    # Organism lines
+    if ($seq->can('species') && (my $spec = $seq->species)) {
+        my @class = $spec->classification();
+        shift(@class);
+        my $species = $spec->species;
+        my $genus = $spec->genus;
+        my $OS = $spec->scientific_name;
+        if ($class[-1] =~ /viruses/i) {
+            $OS = $species;
+            $OS .=  " ". $spec->sub_species if $spec->sub_species;
+        }
+        foreach (($spec->variant, $spec->common_name)) {
+            $OS .= " ($_)" if $_;
+        }
+        $self->_print( "OS   $OS.\n");
+        my $OC = join('; ', reverse(@class)) .'.';
+        $self->_write_line_swissprot_regex("OC   ","OC   ",$OC,"\; \|\$",80);
+        if ($spec->organelle) {
+            $self->_write_line_swissprot_regex("OG   ","OG   ",$spec->organelle,"\; \|\$",80);
+        }
+        if ($spec->ncbi_taxid) {
+            $self->_print("OX   NCBI_TaxID=".$spec->ncbi_taxid.";\n");
+        }
+    }
+
+    # Reference lines
+    my $t = 1;
+    foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
+        $self->_print( "RN   [$t]\n");
+        # changed by lorenz 08/03/00
+        # j.gilbert and h.lapp agreed that the rp line in swissprot seems 
+        # more like a comment than a parseable value, so print it as is
+        if ($ref->rp) {
+        $self->_write_line_swissprot_regex("RP   ","RP   ",$ref->rp,
+                           "\\s\+\|\$",80);
+        }
+        if ($ref->comment) {
+        $self->_write_line_swissprot_regex("RC   ","RC   ",$ref->comment,
+                           "\\s\+\|\$",80);
+        }
+        if ($ref->medline) {
+        # new RX format in swissprot LP 09/17/00
+        if ($ref->pubmed) {
+            $self->_write_line_swissprot_regex("RX   ","RX   ",
+                               "MEDLINE=".$ref->medline.
+                               "; PubMed=".$ref->pubmed.";",
+                               "\\s\+\|\$",80);
+        } else {
+            $self->_write_line_swissprot_regex("RX   MEDLINE; ","RX   MEDLINE; ",
+                               $ref->medline.".","\\s\+\|\$",80);
+        }
+        }
+        my $author = $ref->authors .';' if($ref->authors);
+        my $title = $ref->title .';' if( $ref->title);
+            my $rg = $ref->rg . ';' if $ref->rg;
+
+        $self->_write_line_swissprot_regex("RG   ","RG   ",$rg,"\\s\+\|\$",80) if $rg;
+        $self->_write_line_swissprot_regex("RA   ","RA   ",$author,"\\s\+\|\$",80) if $author;
+        $self->_write_line_swissprot_regex("RT   ","RT   ",$title,"\\s\+\|\$",80) if $title;
+        $self->_write_line_swissprot_regex("RL   ","RL   ",$ref->location,"\\s\+\|\$",80);
+        $t++;
+    }
+
+    # Comment lines
+
+    foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
+        foreach my $cline (split ("\n", $comment->text)) {
+	    while (length $cline > 74) {
+		$self->_print("CC   ",(substr $cline,0,74),"\n");
+		$cline = substr $cline,74;
+	    }
+	    $self->_print("CC   ",$cline,"\n");
+        }
+    }
+
+    foreach my $dblink ( $seq->annotation->get_Annotations('dblink') ) 
+    {
+	my ($primary_id) = $dblink->primary_id;
+	
+        if (defined($dblink->comment) && ($dblink->comment) ) {
+	    $self->_print("DR   ",$dblink->database,"; ",$primary_id,"; ",
+			  $dblink->optional_id,"; ",$dblink->comment,".\n");
+        } elsif($dblink->optional_id) {
+	    $self->_print("DR   ",$dblink->database,"; ",
+			  $primary_id,"; ",
+			  $dblink->optional_id,".\n");
+        }
+        else {
+	    $self->_print("DR   ",$dblink->database,
+			  "; ",$primary_id,"; ","-.\n");
+        }
+    }
+
+    # if there, write the kw line
+    {
+        my( $kw );
+        if( my $func = $self->_kw_generation_func ) {
+        $kw = &{$func}($seq);
+        } elsif( $seq->can('keywords') ) {      
+        $kw = $seq->keywords;
+        if( ref($kw) =~ /ARRAY/i ) {
+            $kw = join("; ", @$kw);
+        }
+        $kw .= '.' if( $kw !~ /\.$/ );
+        }       
+        $self->_write_line_swissprot_regex("KW   ","KW   ",
+                           $kw, "\\s\+\|\$",80);           
+    }
+
+        #Check if there is seqfeatures before printing the FT line
+    my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : ();
+    if ($feats[0]) {
+        if( defined $self->_post_sort ) {
+
+        # we need to read things into an array. Process. Sort them. Print 'em
+        
+        my $post_sort_func = $self->_post_sort();
+        my @fth;
+
+        foreach my $sf ( @feats ) {
+            push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
+        }
+        @fth = sort { &$post_sort_func($a,$b) } @fth;
+
+        foreach my $fth ( @fth ) {
+            $self->_print_swissprot_FTHelper($fth);
+        }
+        } else {
+        # not post sorted. And so we can print as we get them.
+        # lower memory load...
+
+        foreach my $sf ( @feats ) {
+            my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
+            foreach my $fth ( @fth ) {
+            if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
+                $sf->throw("Cannot process FTHelper... $fth");
+            }
+
+            $self->_print_swissprot_FTHelper($fth);
+            }
+        }
+        }
+
+        if( $self->_show_dna() == 0 ) {
+        return;
+        }
+    }
+    # finished printing features.
+
+    # molecular weight
+    my $mw = ${Bio::Tools::SeqStats->get_mol_wt($seq->primary_seq)}[0];
+    # checksum
+    # was crc32 checksum, changed it to crc64 
+    my $crc64 = $self->_crc64(\$str); 
+    $self->_print( sprintf("SQ   SEQUENCE  %4d AA;  %d MW;  %16s CRC64;\n",
+                   $len,$mw,$crc64));
+    $self->_print( "     ");
+    my $linepos;
+    for ($i = 0; $i < length($str); $i += 10) {
+        $self->_print( substr($str,$i,10), " ");
+        $linepos += 11;
+        if( ($i+10)%60 == 0 && (($i+10) < length($str))) {
+        $self->_print( "\n     ");
+        }
+    }
+    $self->_print( "\n//\n");
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+    }
+}
+
+# Thanks to James Gilbert for the following two. LP 08/01/2000
+
+=head2 _generateCRCTable
+
+ Title   : _generateCRCTable
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _generateCRCTable {
+  # 10001000001010010010001110000100
+  # 32
+  my $poly = 0xEDB88320;
+  my ($self) = shift;
+        
+  $self->{'_crcTable'} = [];
+  foreach my $i (0..255) {
+    my $crc = $i;
+    for (my $j=8; $j > 0; $j--) {
+      if ($crc & 1) {
+    $crc = ($crc >> 1) ^ $poly;
+      }
+      else {
+    $crc >>= 1;
+      }
+    }
+    ${$self->{'_crcTable'}}[$i] = $crc;
+  }
+}
+
+
+=head2 _crc32
+
+ Title   : _crc32
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _crc32 {
+  my( $self, $str ) = @_;
+  
+  $self->throw("Argument to crc32() must be ref to scalar")
+    unless ref($str) eq 'SCALAR';
+  
+  $self->_generateCRCTable() unless exists $self->{'_crcTable'};
+  
+  my $len = length($$str);
+  
+  my $crc = 0xFFFFFFFF;
+  for (my $i = 0; $i < $len; $i++) {
+    # Get upper case value of each letter
+    my $int = ord uc substr $$str, $i, 1;
+    $crc = (($crc >> 8) & 0x00FFFFFF) ^ 
+      ${$self->{'_crcTable'}}[ ($crc ^ $int) & 0xFF ];
+  }
+  return $crc;
+}
+
+=head2 _crc64
+
+ Title   : _crc64
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _crc64{
+    my ($self, $sequence) = @_;
+    my $POLY64REVh = 0xd8000000;
+    my @CRCTableh = 256;
+    my @CRCTablel = 256;
+    my $initialized;       
+    
+
+    my $seq = $$sequence;
+      
+    my $crcl = 0;
+    my $crch = 0;
+    if (!$initialized) {
+    $initialized = 1;
+    for (my $i=0; $i<256; $i++) {
+        my $partl = $i;
+        my $parth = 0;
+        for (my $j=0; $j<8; $j++) {
+        my $rflag = $partl & 1;
+        $partl >>= 1;
+        $partl |= (1 << 31) if $parth & 1;
+        $parth >>= 1;
+        $parth ^= $POLY64REVh if $rflag;
+        }
+        $CRCTableh[$i] = $parth;
+        $CRCTablel[$i] = $partl;
+    }
+    }
+    
+    foreach (split '', $seq) {
+    my $shr = ($crch & 0xFF) << 24;
+    my $temp1h = $crch >> 8;
+    my $temp1l = ($crcl >> 8) | $shr;
+    my $tableindex = ($crcl ^ (unpack "C", $_)) & 0xFF;
+    $crch = $temp1h ^ $CRCTableh[$tableindex];
+    $crcl = $temp1l ^ $CRCTablel[$tableindex];
+    }
+    my $crc64 = sprintf("%08X%08X", $crch, $crcl);
+        
+    return $crc64;
+      
+}
+
+=head2 _print_swissprot_FTHelper
+
+ Title   : _print_swissprot_FTHelper
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _print_swissprot_FTHelper {
+   my ($self,$fth,$always_quote) = @_;
+   $always_quote ||= 0;
+   my ($start,$end) = ('?', '?');
+   
+   if( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) {
+       $fth->warn("$fth is not a FTHelper class. ".
+          "Attempting to print, but there could be tears!");
+   }
+   my $desc = "";
+
+   for my $tag ( qw(description gene note product) ) {
+       if( exists $fth->field->{$tag} ) {
+       $desc = @{$fth->field->{$tag}}[0].".";
+       last;
+       }
+   }
+   $desc =~ s/\.$//;
+   
+   my $key =substr($fth->key,0,8);
+   my $loc = $fth->loc;
+   if( $loc =~ /(\?|\d+|\>\d+|<\d+)?\.\.(\?|\d+|<\d+|>\d+)?/ ) {
+       $start = $1 if defined $1;
+       $end = $2 if defined $2;
+
+       # to_FTString only returns one value when start == end, #JB955
+       # so if no match is found, assume it is both start and end #JB955
+   } elsif ( $loc =~ /join\((\d+)((?:,\d+)+)?\)/) {
+       my @y = ($1);
+       if( defined( my $m = $2) ) {
+       $m =~ s/^\,//;
+       push @y, split(/,/,$m);
+       }
+       for my $x ( @y ) {
+       $self->_write_line_swissprot_regex(
+           sprintf("FT   %-8s %6s %6s       ",
+               $key,
+               $x ,$x),
+           "FT                                ",
+           $desc.'.','\s+|$',80);
+       }
+       return;
+       
+   } else {
+       $start = $end = $fth->loc; 
+   }
+   
+   $self->_write_line_swissprot_regex(sprintf("FT   %-8s %6s %6s       ",
+                          $key,
+                          $start ,$end),
+                      "FT                                ",
+                      $desc.'.','\s+|$',80);
+}
+#'
+
+=head2 _read_swissprot_References
+
+ Title   : _read_swissprot_References
+ Usage   :
+ Function: Reads references from swissprot format. Internal function really
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _read_swissprot_References{
+   my ($self,$line) = @_;
+   my ($b1, $b2, $rp, $rg, $title, $loc, $au, $med, $com, $pubmed);
+   my @refs;
+   local $_ = $line;
+   while( defined $_ ) {
+       if( /^[^R]/ || /^RN/ ) { 
+       if( $rp ) { 
+           $rg =~ s/;\s*$//g if defined($rg);
+               if (defined($au)) {
+                   $au =~ s/;\s*$//;
+               } else {
+                   $au = $rg;
+               }
+               $title =~ s/;\s*$//g if defined($title);
+           push @refs, Bio::Annotation::Reference->new(-title   => $title,
+                               -start   => $b1,
+                               -end     => $b2,
+                               -authors => $au,
+                               -location=> $loc,
+                               -medline => $med,
+                               -pubmed  => $pubmed,
+                               -comment => $com,
+                               -rp      => $rp,
+                                                           -rg      => $rg);
+               # reset state for the next reference
+           $rp = '';
+       }
+           if (index($_,'R') != 0) {
+               $self->_pushback($_); # want this line to go back on the list
+               last; # may be the safest exit point HL 05/11/2000
+           }
+           # don't forget to reset the state for the next reference
+           $b1 = $b2 = $rg = $med = $com = $pubmed = undef;
+           $title = $loc = $au = undef;
+       } elsif ( /^RP\s{3}(.+? OF (\d+)-(\d+).*)/) { 
+       $rp  .= $1;
+       $b1   = $2;
+       $b2   = $3; 
+       } elsif ( /^RP\s{3}(.*)/) {
+       if($rp) { $rp .= " ".$1 }
+       else    { $rp = $1 }
+       } elsif( /^RX\s{3}MEDLINE;\s+(\d+)(?!<;)/ )  {
+       $med  = $1;
+       } elsif( /^RX\s{3}MEDLINE=(\d+);\s+PubMed=(\d+);/ ) { 
+       $med   = $1;
+       $pubmed= $2;
+       } elsif( /^RX\s{3}PubMed=(\d+);/ ) { # can start with pubmed only
+       $pubmed = $1;
+       } elsif( /^RA\s{3}(.*)/ ) { 
+       $au .= $au ? " $1" : $1;
+       } elsif( /^RG\s{3}(.*)/ ) { 
+       $rg .= $rg ? " $1" : $1;
+       } elsif ( /^RT\s{3}(.*)/ ) { 
+           if ($title) {
+               my $tline = $1;
+               $title .= ($title =~ /[\w;,:\?!]$/) ? " $tline" : $tline;
+           } else {
+               $title = $1;
+           }
+       } elsif (/^RL\s{3}(.*)/ ) { 
+       $loc .= $loc ? " $1" : $1;
+       } elsif ( /^RC\s{3}(.*)/ ) {
+       $com .= $com ? " $1" : $1;
+       } 
+       #/^CC/ && last;
+       #/^SQ/ && last; # there may be sequences without CC lines! HL 05/11/2000
+       $_ = $self->_readline;
+   }
+   return \@refs;
+}
+
+
+=head2 _read_swissprot_Species
+
+ Title   : _read_swissprot_Species
+ Usage   :
+ Function: Reads the swissprot Organism species and classification
+           lines.
+             Able to deal with unconventional species names.
+ Example : OS Unknown prokaryotic organism
+             $genus = undef ; $species = Unknown prokaryotic organism
+ Returns : A Bio::Species object
+ Args    :
+
+=cut
+
+sub _read_swissprot_Species {
+    my( $self,$line ) = @_;
+    my $org;
+    local $_ = $line;
+
+    my( $sub_species, $species, $genus, $common, $variant, $ncbi_taxid, $sci_name, $class_lines, $descr );
+    my $osline = "";
+    while ( defined $_ ) {
+        last unless /^O[SCGX]/;
+        # believe it or not, but OS may come multiple times -- at this time
+        # we can't capture multiple species
+        if(/^OS\s+(\S.+)/ && (! defined($sci_name))) {
+            $osline .= " " if $osline;
+            $osline .= $1;
+            if($osline =~ s/(,|, and|\.)$//) {
+                ($sci_name, $descr) = $osline =~ /(\S[^\(]+)(.*)/;
+                $sci_name =~ s/\s+$//;
+                
+                while($descr =~ /\(([^\)]+)\)/g) {
+                    my $item = $1;
+                    # strain etc may not necessarily come first (yes, swissprot
+                    # is messy)
+                    if((! defined($variant)) &&
+                        (($item =~ /(^|[^\(\w])([Ss]train|isolate|serogroup|serotype|subtype|clone)\b/) ||
+                        ($item =~ /^(biovar|pv\.|type\s+)/))) {
+                        $variant = $item;
+                    }
+                    elsif($item =~ s/^subsp\.\s+//) {
+                        if(! $sub_species) {
+                            $sub_species = $item;
+                        }
+                        elsif(! $variant) {
+                            $variant = $item;
+                        }
+                    }
+                    elsif(! defined($common)) {
+                        # we're only interested in the first common name
+                        $common = $item;
+                        if((index($common, '(') >= 0) &&
+                           (index($common, ')') < 0)) {
+                            $common .= ')';
+                        }
+                    }
+                }
+            }
+        }
+        elsif (s/^OC\s+(\S.+)$//) {
+            $class_lines .= $1;
+        }
+        elsif (/^OG\s+(.*)/) {
+            $org = $1;
+        }
+        elsif (/^OX\s+(.*)/ && (! defined($ncbi_taxid))) {
+            my $taxstring = $1;
+            # we only keep the first one and ignore all others
+            if ($taxstring =~ /NCBI_TaxID=([\w\d]+)/) {
+                $ncbi_taxid = $1;
+            }
+            else {
+                $self->throw("$taxstring doesn't look like NCBI_TaxID");
+            }
+        }
+        $_ = $self->_readline;
+    }
+    $self->_pushback($_); # pushback the last line because we need it
+    
+    $sci_name || return;
+    
+    #if the organism belongs to taxid 32644 then no Bio::Species object.
+#    return if grep { /^\Q$sci_name$/ } @Unknown_names;
+    return if grep { $_ eq $sci_name } @Unknown_names;
+    
+    # Convert data in classification lines into classification array.
+    # only split on ';' or '.' so that classification that is 2 or more words
+    # will still get matched, use map() to remove trailing/leading/intervening
+    # spaces
+    my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /[;\.]+/, $class_lines;
+    
+    if ($class[0] =~ /viruses/i) {
+        # viruses have different OS/OC syntax
+        my @virusnames = split(/\s+/, $sci_name);
+        $species = (@virusnames > 1) ? pop(@virusnames) : '';
+        $genus = join(" ", @virusnames);
+        $sub_species = $descr;
+    }
+    else {
+        # do we have a genus?
+        my $possible_genus = $class[-1];
+        $possible_genus .= "|$class[-2]" if $class[-2];
+        if ($sci_name =~ /^($possible_genus)/) {
+            $genus = $1;
+            ($species) = $sci_name =~ /^$genus\s+(.+)/;
+        }
+        else {
+            $species = $sci_name;
+        }
+        
+        # is this organism of rank species or is it lower?
+        # (doesn't catch everything, but at least the guess isn't dangerous)
+        if ($species =~ /subsp\.|var\./) {
+            ($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
+        }
+    }
+    
+    # Bio::Species array needs array in Species -> Kingdom direction
+    unless ($class[-1] eq $sci_name) {
+        push(@class, $sci_name);
+    }
+    @class = reverse @class;
+    
+    my $taxon = Bio::Species->new();
+    $taxon->scientific_name($sci_name);
+    $taxon->classification(@class);
+    $taxon->common_name($common) if $common;
+    $taxon->sub_species($sub_species) if $sub_species;
+    $taxon->organelle($org) if $org;
+    $taxon->ncbi_taxid($ncbi_taxid) if $ncbi_taxid;
+    $taxon->variant($variant) if $variant;
+    
+    # done
+    return $taxon;
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function: 
+ Example : 
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+# inherited from SeqIO.pm ! HL 05/11/2000
+
+=head2 _read_FTHelper_swissprot
+
+ Title   : _read_FTHelper_swissprot
+ Usage   : _read_FTHelper_swissprot(\$buffer)
+ Function: reads the next FT key line
+ Example :
+ Returns : Bio::SeqIO::FTHelper object 
+ Args    : 
+
+
+=cut
+
+sub _read_FTHelper_swissprot {
+    my ($self,$line ) = @_;
+    # initial version implemented by HL 05/10/2000
+    # FIXME this may not be perfect, so please review 
+    # lots of cleaning up by JES 2004/07/01, still may not be perfect =)
+    
+    local $_ = $line;
+    my ($key,   # The key of the feature
+        $loc,   # The location line from the feature
+        $desc,  # The descriptive text
+        );
+    if( m/^FT\s{3}(\w+)\s+([\d\?\<]+)\s+([\d\?\>]+)\s*(.*)$/ox) {
+    $key = $1;
+    my $loc1 = $2;
+    my $loc2 = $3;
+    $loc = "$loc1..$loc2";
+    if($4 && (length($4) > 0)) {
+        $desc = $4;
+        chomp($desc);
+    } else {
+        $desc = "";
+    }
+    } 
+    
+    while ( defined($_ = $self->_readline) &&
+        /^FT\s{20,}(\S.*)$/ ) { 
+    if( $desc) { $desc .= " $1" }
+    else { $desc = $1 }
+    chomp($desc);
+    }    
+    $self->_pushback($_);
+    unless( $key ) { 
+        # No feature key. What's this?
+    $self->warn("No feature key in putative feature table line: $line");
+        return;
+    } 
+        
+    # Make the new FTHelper object
+    my $out = new Bio::SeqIO::FTHelper(-verbose => $self->verbose());
+    $out->key($key);
+    $out->loc($loc);
+    
+    # store the description if there is one
+    if( $desc && length($desc) ) {
+    $desc =~ s/\.$//;
+    push(@{$out->field->{"description"}}, $desc);
+    }
+    return $out;
+}
+
+
+=head2 _write_line_swissprot
+
+ Title   : _write_line_swissprot
+ Usage   :
+ Function: internal function
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub _write_line_swissprot{
+   my ($self,$pre1,$pre2,$line,$length) = @_;
+
+   $length || $self->throw( "Miscalled write_line_swissprot without length. Programming error!");
+   my $subl = $length - length $pre2;
+   my $linel = length $line;
+   my $i;
+
+   my $sub = substr($line,0,$length - length $pre1);
+
+   $self->_print( "$pre1$sub\n");
+   
+   for($i= ($length - length $pre1);$i < $linel;) {
+       $sub = substr($line,$i,($subl));
+       $self->_print( "$pre2$sub\n");
+       $i += $subl;
+   }
+
+}
+
+=head2 _write_line_swissprot_regex
+
+ Title   : _write_line_swissprot_regex
+ Usage   :
+ Function: internal function for writing lines of specified
+           length, with different first and the next line 
+           left hand headers and split at specific points in the
+           text
+ Example :
+ Returns : nothing
+ Args    : file handle, first header, second header, text-line, regex for line breaks, total line length
+
+
+=cut
+
+sub _write_line_swissprot_regex {
+   my ($self,$pre1,$pre2,$line,$regex,$length) = @_;
+   
+   #print STDOUT "Going to print with $line!\n";
+
+   $length || $self->throw( "Miscalled write_line_swissprot without length. Programming error!");
+
+   if( length $pre1 != length $pre2 ) {
+       $self->warn( "len 1 is ". length ($pre1) . " len 2 is ". length ($pre2) . "\n");
+       $self->throw( "Programming error - cannot called write_line_swissprot_regex with different length \npre1 ($pre1) and \npre2 ($pre2) tags!");
+   }
+
+   my $subl = $length - (length $pre1) -1 ;
+   my @lines;
+
+   while($line =~ m/(.{1,$subl})($regex)/g) {
+       push(@lines, $1.$2);
+   }
+   
+   my $s = shift @lines;
+   $self->_print( "$pre1$s\n");
+   foreach my $s ( @lines ) {
+       $self->_print( "$pre2$s\n");
+   }
+}
+
+=head2 _post_sort
+
+ Title   : _post_sort
+ Usage   : $obj->_post_sort($newval)
+ Function: 
+ Returns : value of _post_sort
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _post_sort{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_post_sort'} = $value;
+    }
+    return $obj->{'_post_sort'};
+
+}
+
+=head2 _show_dna
+
+ Title   : _show_dna
+ Usage   : $obj->_show_dna($newval)
+ Function: 
+ Returns : value of _show_dna
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _show_dna{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_show_dna'} = $value;
+    }
+    return $obj->{'_show_dna'};
+
+}
+
+=head2 _id_generation_func
+
+ Title   : _id_generation_func
+ Usage   : $obj->_id_generation_func($newval)
+ Function: 
+ Returns : value of _id_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _id_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_id_generation_func'} = $value;
+    }
+    return $obj->{'_id_generation_func'};
+
+}
+
+=head2 _ac_generation_func
+
+ Title   : _ac_generation_func
+ Usage   : $obj->_ac_generation_func($newval)
+ Function: 
+ Returns : value of _ac_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _ac_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_ac_generation_func'} = $value;
+    }
+    return $obj->{'_ac_generation_func'};
+
+}
+
+=head2 _sv_generation_func
+
+ Title   : _sv_generation_func
+ Usage   : $obj->_sv_generation_func($newval)
+ Function: 
+ Returns : value of _sv_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _sv_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_sv_generation_func'} = $value;
+    }
+    return $obj->{'_sv_generation_func'};
+
+}
+
+=head2 _kw_generation_func
+
+ Title   : _kw_generation_func
+ Usage   : $obj->_kw_generation_func($newval)
+ Function: 
+ Returns : value of _kw_generation_func
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _kw_generation_func{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'_kw_generation_func'} = $value;
+    }
+    return $obj->{'_kw_generation_func'};
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tab.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tab.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tab.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+#-----------------------------------------------------------------------------
+# PACKAGE : Bio::SeqIO::tab
+# AUTHOR  : Philip Lijnzaad <p.lijnzaad at med.uu.nl>
+# CREATED : Feb 6 2003
+# REVISION: $Id: tab.pm,v 1.6.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# Copyright (c) This module is free software; you can redistribute it
+# and/or modify it under the same terms as Perl itself.
+#
+# _History_
+#
+# Ewan Birney <birney at ebi.ac.uk> developed the SeqIO
+# schema and the first prototype modules.
+#
+# This code is based on his Bio::SeqIO::raw
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::tab - nearly raw sequence file input/output
+stream. Reads/writes id"\t"sequence"\n"
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the L<Bio::SeqIO> class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from tabbed flat
+file databases.
+
+It is very useful when doing large scale stuff using the Unix command
+line utilities (grep, sort, awk, sed, split, you name it). Imagine
+that you have a format converter 'seqconvert' along the following
+lines:
+
+  my $in  = Bio::SeqIO->newFh(-fh => \*STDIN , '-format' => $from);
+  my $out = Bio::SeqIO->newFh(-fh=> \*STDOUT, '-format' => $to);
+  print $out $_ while <$in>;
+
+then you can very easily filter sequence files for duplicates as:
+
+  $ seqconvert < foo.fa -from fasta -to tab | sort -u |\
+       seqconvert -from tab -to fasta > foo-unique.fa
+
+Or grep [-v] for certain sequences with:
+
+  $ seqconvert < foo.fa -from fasta -to tab | grep -v '^S[a-z]*control' |\
+       seqconvert -from tab -to fasta > foo-without-controls.fa
+
+Or chop up a huge file with sequences into smaller chunks with:
+
+  $ seqconvert < all.fa -from fasta -to tab | split -l 10 - chunk-
+  $ for i in chunk-*; do seqconvert -from tab -to fasta < $i > $i.fa; done
+  # (this creates files chunk-aa.fa, chunk-ab.fa, ..., each containing 10
+  # sequences)
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHORS
+
+Philip Lijnzaad, p.lijnzaad at med.uu.nl
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SeqIO::tab;
+use strict;
+
+use Bio::Seq;
+
+use base qw(Bio::SeqIO);
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    :
+
+
+=cut
+
+sub next_seq{
+   my ($self, at args) = @_;
+   ## When its 1 sequence per line with no formatting at all,
+   ## grabbing it should be easy :)
+
+   my $nextline = $self->_readline();
+   chomp($nextline) if defined $nextline;
+   return unless defined $nextline;
+   if ($nextline =~ /^([^\t]*)\t(.*)/) {
+       my ($id, $seq)=($1, uc($2));
+       $seq =~ s/\W//g;
+       return  Bio::Seq->new(-display_id=> $id, -seq => $seq);
+   }  else {
+       $self->throw("Can't parse tabbed sequence entry:'$nextline' around line $.");
+   }
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+   my ($self, at seq) = @_;
+   foreach (@seq) {
+       if ($_->display_id() =~ /\t/) {
+           $self->throw("display_id [".$_->display_id()."] contains TAB -- illegal in tab format");
+       }
+       $self->_print($_->display_id(), "\t",$_->seq, "\n") or return;
+   }
+   return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/table.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/table.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/table.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,824 @@
+# $Id: table.pm,v 1.4.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::table
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+
+#
+# (c) Hilmar Lapp, hlapp at gmx.net, 2005.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2005.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::table - sequence input/output stream from a delimited table
+
+=head1 SYNOPSIS
+
+  #It is probably best not to use this object directly, but
+  #rather go through the SeqIO handler system. Go:
+
+  $stream = Bio::SeqIO->new(-file => $filename, -format => 'table');
+
+  while ( my $seq = $stream->next_seq() ) {
+	# do something with $seq
+  }
+
+=head1 DESCRIPTION
+
+This class transforms records in a table-formatted text file into
+Bio::Seq objects.
+
+A table-formatted text file of sequence records for the purposes of
+this module is defined as a text file with each row corresponding to a
+sequence, and the attributes of the sequence being in different
+columns. Columns are delimited by a common delimiter, for instance tab
+or comma.
+
+The module permits specifying which columns hold which type of
+annotation. The semantics of certain attributes, if present, are
+pre-defined, e.g., accession number and sequence. Additional
+attributes may be added to the annotation bundle.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+
+Bug reports can be submitted via email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::table;
+use strict;
+
+use Bio::Species;
+use Bio::Seq::SeqFactory;
+use Bio::Annotation::Collection;
+use Bio::Annotation::SimpleValue;
+
+use base qw(Bio::SeqIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::SeqIO->new(-file => $filename, -format => 'table')
+ Function: Returns a new seqstream
+ Returns : A Bio::SeqIO stream for a table format
+ Args    : Named parameters:
+
+             -file    name of file to read
+             -fh      filehandle to attach to
+             -comment leading character(s) introducing a comment line
+             -header  the number of header lines to skip; the first
+                      non-comment header line will be used to obtain
+                      column names; column names will be used as the
+                      default tags for attaching annotation.
+             -delim   the delimiter for columns as a regular expression;
+                      consecutive occurrences of the delimiter will
+                      not be collapsed.
+             -display_id the one-based index of the column containing
+                      the display ID of the sequence
+             -accession_number the one-based index of the column
+                      containing the accession number of the sequence
+             -seq     the one-based index of the column containing
+                      the sequence string of the sequence
+             -species the one-based index of the column containing the
+                      species for the sequence record; if not a
+                      number, will be used as the static species
+                      common to all records
+             -annotation if provided and a scalar (but see below), a
+                      flag whether or not all additional columns are
+                      to be preserved as annotation, the tags used
+                      will either be 'colX' if there is no column
+                      header and where X is the one-based column
+                      index, and otherwise the column headers will be
+                      used as tags;
+
+                      if a reference to an array, or a square
+                      bracket-enclosed string of comma-delimited
+                      values, only those columns (one-based index)
+                      will be preserved as annotation, tags as before;
+
+                      if a reference to a hash, or a curly
+                      braces-enclosed string of comma-delimited key
+                      and value pairs in alternating order, the keys
+                      are one-based column indexes to be preserved,
+                      and the values are the tags under which the
+                      annotation is to be attached; if not provided or
+                      supplied as undef, no additional annotation will
+                      be preserved.
+             -colnames a reference to an array of column labels, or a
+                      string of comma-delimited labels, denoting the
+                      columns to be converted into annotation; this is
+                      an alternative to -annotation and will be
+                      ignored if -annotation is also supplied with a
+                      valid value.
+             -trim    flag determining whether or not all values should
+                      be trimmed of leading and trailing white space
+                      and double quotes
+
+           Additional arguments may be used to e.g. set factories and
+           builders involved in the sequence object creation (see the
+           POD of Bio::SeqIO).
+
+
+=cut
+
+sub _initialize {
+    my($self, at args) = @_;
+
+    # chained initialization
+    $self->SUPER::_initialize(@args);
+
+    # our own parameters
+    my ($cmtchars,
+        $header,
+        $delim,
+        $display_id,
+        $accnr,
+        $seq,
+        $taxon,
+        $useann,
+        $colnames,
+        $trim) =
+            $self->_rearrange([qw(COMMENT
+                                  HEADER
+                                  DELIM
+                                  DISPLAY_ID
+                                  ACCESSION_NUMBER
+                                  SEQ
+                                  SPECIES
+                                  ANNOTATION
+                                  COLNAMES
+                                  TRIM)
+                              ], @args);
+
+    # store options and apply defaults
+    $self->comment_char(defined($cmtchars) ? $cmtchars : "#")
+        if (!defined($self->comment_char)) || defined($cmtchars);
+    $self->delimiter(defined($delim) ? $delim : "\t")
+        if (!defined($self->delimiter)) || defined($delim);
+    $self->header($header) if defined($header);
+    $self->trim_values($trim) if defined($trim);
+
+    # attribute columns
+    my $attrs = {};
+    $attrs->{-display_id} = $display_id if defined($display_id);
+    $attrs->{-accession_number} = $accnr if defined($accnr);
+    $attrs->{-seq} = $seq if defined($seq);
+    if (defined($taxon)) {
+        if (ref($taxon) || ($taxon =~ /^\d+$/)) {
+            # either a static object, or a column reference
+            $attrs->{-species} = $taxon;
+        } else {
+            # static species as a string
+            $attrs->{-species} = Bio::Species->new(
+                -classification => [reverse(split(' ',$taxon))]);
+        }
+    }
+    $self->attribute_map($attrs);
+
+    # annotation columns, if any
+    if ($useann && !ref($useann)) {
+        # it's a scalar; check whether this is in fact an array or
+        # hash as a string rather than just a flag
+        if ($useann =~ /^\[(.*)\]$/) {
+            $useann = [split(/[,;]/,$1)];
+        } elsif ($useann =~ /^{(.*)}$/) {
+            $useann = {split(/[,;]/,$1)};
+        } # else it is probably indeed just a flag
+    }
+    if (ref($useann)) {
+        my $ann_map;
+        if (ref($useann) eq "ARRAY") {
+            my $has_header = ($self->header > 0);
+            $ann_map = {};
+            foreach my $i (@$useann) {
+                $ann_map->{$i} = $has_header ? undef : "col$i";
+            }
+        } else {
+            # no special handling necessary
+            $ann_map = $useann;
+        }
+        $self->annotation_map($ann_map);
+    } else {
+        $self->keep_annotation($useann || $colnames);
+        # annotation columns, if any
+        if ($colnames && !ref($colnames)) {
+            # an array as a string
+            $colnames =~ s/^\[(.*)\]$/$1/;
+            $colnames = [split(/[,;]/,$colnames)];
+        }
+        $self->annotation_columns($colnames) if ref($colnames);
+    }
+
+    # make sure we have a factory defined
+    if(!defined($self->sequence_factory)) {
+	$self->sequence_factory(
+            Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
+                                      -type => 'Bio::Seq::RichSeq'));
+    }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::RichSeq object
+ Args    :
+
+=cut
+
+sub next_seq {
+    my $self = shift;
+
+    # skip until not a comment and not an empty line
+    my $line_ok = $self->_next_record();
+
+    # if there is a header but we haven't read past it yet then do so now
+    if ($line_ok && (! $self->_header_skipped) && $self->header) {
+        $line_ok = $self->_parse_header();
+        $self->_header_skipped(1);
+    }
+
+    # return if we reached end-of-file
+    return unless $line_ok;
+
+    # otherwise, parse the record
+
+    # split into columns
+    my @cols = $self->_get_row_values();
+    # trim leading and trailing whitespace and quotes if desired
+    if ($self->trim_values) {
+        for(my $i = 0; $i < scalar(@cols); $i++) {
+            if ($cols[$i]) {
+                # trim off whitespace
+                $cols[$i] =~ s/^\s+//;
+                $cols[$i] =~ s/\s+$//;
+                # trim off double quotes
+                $cols[$i] =~ s/^"//;
+                $cols[$i] =~ s/"$//;
+            }
+        }
+    }
+
+    # assign values for columns in the attribute map
+    my $attrmap = $self->_attribute_map;
+    my %params = ();
+    foreach my $attr (keys %$attrmap) {
+        if ((!ref($attrmap->{$attr})) && ($attrmap->{$attr} =~ /^\d+$/)) {
+            # this is a column index, add to instantiation parameters
+            $params{$attr} = $cols[$attrmap->{$attr}];
+        } else {
+            # not a column index; we assume it's a static value
+            $params{$attr} = $attrmap->{$attr};
+        }
+    }
+
+    # add annotation columns to the annotation bundle
+    my $annmap = $self->_annotation_map;
+    if ($annmap && %$annmap) {
+        my $anncoll = Bio::Annotation::Collection->new();
+        foreach my $col (keys %$annmap) {
+            next unless $cols[$col]; # skip empty columns!
+            $anncoll->add_Annotation(
+                Bio::Annotation::SimpleValue->new(-value  => $cols[$col],
+                                                  -tagname=> $annmap->{$col}));
+        }
+        $params{'-annotation'} = $anncoll;
+    }
+
+    # ask the object builder to add the slots that we've gathered
+    my $builder = $self->sequence_builder();
+    $builder->add_slot_value(%params);
+    # and instantiate the object
+    my $seq = $builder->make_object();
+
+    # done!
+    return $seq;
+}
+
+=head2 comment_char
+
+ Title   : comment_char
+ Usage   : $obj->comment_char($newval)
+ Function: Get/set the leading character(s) designating a line as
+           a comment-line.
+ Example :
+ Returns : value of comment_char (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub comment_char{
+    my $self = shift;
+
+    return $self->{'comment_char'} = shift if @_;
+    return $self->{'comment_char'};
+}
+
+=head2 header
+
+ Title   : header
+ Usage   : $obj->header($newval)
+ Function: Get/set the number of header lines to skip before the
+           rows containing actual sequence records.
+
+           If set to zero or undef, means that there is no header and
+           therefore also no column headers.
+
+ Example :
+ Returns : value of header (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub header{
+    my $self = shift;
+
+    return $self->{'header'} = shift if @_;
+    return $self->{'header'};
+}
+
+=head2 delimiter
+
+ Title   : delimiter
+ Usage   : $obj->delimiter($newval)
+ Function: Get/set the column delimiter. This will in fact be
+           treated as a regular expression. Consecutive occurrences
+           will not be collapsed to a single one.
+
+ Example :
+ Returns : value of delimiter (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub delimiter{
+    my $self = shift;
+
+    return $self->{'delimiter'} = shift if @_;
+    return $self->{'delimiter'};
+}
+
+=head2 attribute_map
+
+ Title   : attribute_map
+ Usage   : $obj->attribute_map($newval)
+ Function: Get/set the map of sequence object initialization
+           attributes (keys) to one-based column index.
+
+           Attributes will usually need to be prefixed by a dash, just
+           as if they were passed to the new() method of the sequence
+           class.
+
+ Example :
+ Returns : value of attribute_map (a reference to a hash)
+ Args    : on set, new value (a reference to a hash or undef, optional)
+
+
+=cut
+
+sub attribute_map{
+    my $self = shift;
+
+    # internally we store zero-based maps - so we need to convert back
+    # and forth here
+    if (@_) {
+        my $arg = shift;
+        # allow for and protect against undef
+        return delete $self->{'_attribute_map'} unless defined($arg);
+        # copy to avoid side-effects
+        my $attr_map = {%$arg};
+        foreach my $key (keys %$attr_map) {
+            if ((!ref($attr_map->{$key})) && ($attr_map->{$key} =~ /^\d+$/)) {
+                $attr_map->{$key}--;
+            }
+        }
+        $self->{'_attribute_map'} = $attr_map;
+    }
+    # there may not be a map
+    return unless exists($self->{'_attribute_map'});
+    # we need to copy in order not to override the stored map!
+    my %attr_map = %{$self->{'_attribute_map'}};
+    foreach my $key (keys %attr_map) {
+        if ((!ref($attr_map{$key})) && ($attr_map{$key} =~ /^\d+$/)) {
+            $attr_map{$key}++;
+        }
+    }
+    return \%attr_map;
+}
+
+=head2 annotation_map
+
+ Title   : annotation_map
+ Usage   : $obj->annotation_map($newval)
+ Function: Get/set the mapping between one-based column indexes
+           (keys) and annotation tags (values).
+
+           Note that the map returned by this method may change after
+           the first next_seq() call if the file contains a column
+           header and no annotation keys have been predefined in the
+           map, because upon reading the column header line the tag
+           names will be set automatically.
+
+           Note also that the map may reference columns that are used
+           as well in the sequence attribute map.
+
+ Example :
+ Returns : value of annotation_map (a reference to a hash)
+ Args    : on set, new value (a reference to a hash or undef, optional)
+
+
+=cut
+
+sub annotation_map{
+    my $self = shift;
+
+    # internally we store zero-based maps - so we need to convert back
+    # and forth here
+    if (@_) {
+        my $arg = shift;
+        # allow for and protect against undef
+        return delete $self->{'_annotation_map'} unless defined($arg);
+        # copy to avoid side-effects
+        my $ann_map = {%$arg};
+        # make sure we sort the keys numerically or otherwise we may
+        # clobber a key with a higher index
+        foreach my $key (sort { $a <=> $b } keys(%$ann_map)) {
+            $ann_map->{$key-1} = $ann_map->{$key};
+            delete $ann_map->{$key};
+        }
+        $self->{'_annotation_map'} = $ann_map;
+        # also make a note that we want to keep annotation
+        $self->keep_annotation(1);
+    }
+    # there may not be a map
+    return unless exists($self->{'_annotation_map'});
+    # we need to copy in order not to override the stored map!
+    my %ann_map = %{$self->{'_annotation_map'}};
+    # here we need to sort numerically in reverse order ...
+    foreach my $key (sort { $b <=> $a } keys(%ann_map)) {
+        $ann_map{$key+1} = $ann_map{$key};
+        delete $ann_map{$key};
+    }
+    return \%ann_map;
+}
+
+=head2 keep_annotation
+
+ Title   : keep_annotation
+ Usage   : $obj->keep_annotation($newval)
+ Function: Get/set flag whether or not to keep values from
+           additional columns as annotation.
+
+           Additional columns are all those columns in the input file
+           that aren't referenced in the attribute map.
+
+ Example :
+ Returns : value of keep_annotation (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub keep_annotation{
+    my $self = shift;
+
+    return $self->{'keep_annotation'} = shift if @_;
+    return $self->{'keep_annotation'};
+}
+
+=head2 annotation_columns
+
+ Title   : annotation_columns
+ Usage   : $obj->annotation_columns($newval)
+ Function: Get/set the names (labels) of the columns to be used for
+           annotation.
+
+           This is an alternative to using annotation_map. In order to
+           have any effect, it must be set before the first call of
+           next_seq(), and obviously there must be a header line (or
+           row) too giving the column labels.
+
+ Example :
+ Returns : value of annotation_columns (a reference to an array)
+ Args    : on set, new value (a reference to an array of undef, optional)
+
+
+=cut
+
+sub annotation_columns{
+    my $self = shift;
+
+    return $self->{'annotation_columns'} = shift if @_;
+    return $self->{'annotation_columns'};
+}
+
+=head2 trim_values
+
+ Title   : trim_values
+ Usage   : $obj->trim_values($newval)
+ Function: Get/set whether or not to trim leading and trailing
+           whitespace off all column values.
+ Example :
+ Returns : value of trim_values (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub trim_values{
+    my $self = shift;
+
+    return $self->{'trim_values'} = shift if @_;
+    return $self->{'trim_values'};
+}
+
+=head1 Internal methods
+
+All methods with a leading underscore are not meant to be part of the
+'official' API. They are for use by this module only, consider them
+private unless you are a developer trying to modify this module.
+
+=cut
+
+=head2 _attribute_map
+
+ Title   : _attribute_map
+ Usage   : $obj->_attribute_map($newval)
+ Function: Get only. Same as attribute_map, but zero-based indexes.
+
+           Note that any changes made to the returned map will change
+           the map used by this instance. You should know what you are
+           doing if you modify the returned value (or if you call this
+           method in the first place).
+
+ Example :
+ Returns : value of _attribute_map (a reference to a hash)
+ Args    : none
+
+
+=cut
+
+sub _attribute_map{
+    my $self = shift;
+
+    return $self->{'_attribute_map'};
+}
+
+=head2 _annotation_map
+
+ Title   : _annotation_map
+ Usage   : $obj->_annotation_map($newval)
+ Function: Get only. Same as annotation_map, but with zero-based indexes.
+
+           Note that any changes made to the returned map will change
+           the map used by this instance. You should know what you are
+           doing if you modify the returned value (or if you call this
+           method in the first place).
+
+ Example :
+ Returns : value of _annotation_map (a reference to a hash)
+ Args    : none
+
+
+=cut
+
+sub _annotation_map{
+    my $self = shift;
+
+    return $self->{'_annotation_map'};
+}
+
+=head2 _header_skipped
+
+ Title   : _header_skipped
+ Usage   : $obj->_header_skipped($newval)
+ Function: Get/set the flag whether the header was already
+           read (and skipped) or not.
+ Example :
+ Returns : value of _header_skipped (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub _header_skipped{
+    my $self = shift;
+
+    return $self->{'_header_skipped'} = shift if @_;
+    return $self->{'_header_skipped'};
+}
+
+=head2 _next_record
+
+ Title   : _next_record
+ Usage   :
+ Function: Navigates the underlying file to the next record.
+
+           For row-based records in delimited text files, this will
+           skip all empty lines and lines with a leading comment
+           character.
+
+           This method is here is to serve as a hook for other formats
+           that conceptually also represent tables but aren't
+           formatted as row-based text files.
+
+ Example :
+ Returns : TRUE if the navigation was successful and FALSE
+           otherwise. Unsuccessful navigation will usually be treated
+           as an end-of-file condition.
+ Args    :
+
+
+=cut
+
+sub _next_record{
+    my $self = shift;
+
+    my $cmtcc = $self->comment_char;
+    my $line = $self->_readline();
+
+    # skip until not a comment and not an empty line
+    while (defined($line)
+           && (($cmtcc && ($line =~ /^\s*$cmtcc/))
+               || ($line =~ /^\s*$/))) {
+        $line = $self->_readline();
+    }
+
+    return $self->{'_line'} = $line;
+}
+
+=head2 _parse_header
+
+ Title   : _parse_header
+ Usage   :
+ Function: Parse the table header and navigate past it.
+
+           This method is called if the number of header rows has been
+           specified equal to or greater than one, and positioned at
+           the first header line (row). By default the first header
+           line (row) is used for setting column names, but additional
+           lines (rows) may be skipped too. Empty lines and comment
+           lines do not count as header lines (rows).
+
+           This method will call _next_record() to navigate to the
+           next header line (row), if there is more than one header
+           line (row). Upon return, the file is presumed to be
+           positioned at the first record after the header.
+
+           This method is here is to serve as a hook for other formats
+           that conceptually also represent tables but aren't
+           formatted as row-based text files.
+
+           Note however that the only methods used to access file
+           content or navigate the position are _get_row_values() and
+           _next_record(), so it should usually suffice to override
+           those.
+
+ Example :
+ Returns : TRUE if navigation past the header was successful and FALSE
+           otherwise. Unsuccessful navigation will usually be treated
+           as an end-of-file condition.
+ Args    :
+
+
+=cut
+
+sub _parse_header{
+    my $self = shift;
+
+    # the first header line contains the column headers, see whether
+    # we need them
+    if ($self->keep_annotation) {
+        my @colnames = $self->_get_row_values();
+        # trim leading and trailing whitespace if desired
+        if ($self->trim_values) {
+            # trim off whitespace
+            @colnames = map { $_ =~ s/^\s+//; $_ =~ s/\s+$//; $_; } @colnames;
+            # trim off double quotes
+            @colnames = map { $_ =~ s/^"//; $_ =~ s/"$//; $_; } @colnames;
+        }
+        # build or complete annotation column map
+        my $annmap = $self->annotation_map || {};
+        if (! %$annmap) {
+            # check whether columns have been defined by name rather than index
+            if (my $anncols = $self->annotation_columns) {
+                # first sanity check: all column names must map
+                my %colmap = map { ($_,1); } @colnames;
+                foreach my $col (@$anncols) {
+                    if (!exists($colmap{$col})) {
+                        $self->throw("no such column labeled '$col'");
+                    }
+                }
+                # now map to the column indexes
+                %colmap = map { ($_,1); } @$anncols;
+                for (my $i = 0; $i < scalar(@colnames); $i++) {
+                    if (exists($colmap{$colnames[$i]})) {
+                        $annmap->{$i+1} = $colnames[$i];
+                    }
+                }
+            } else {
+                # no columns specified, default to all non-attribute columns
+                for (my $i = 0; $i < scalar(@colnames); $i++) {
+                    $annmap->{$i+1} = $colnames[$i];
+                }
+                # subtract all attribute-referenced columns
+                foreach my $attrcol (values %{$self->attribute_map}) {
+                    if ((!ref($attrcol)) && ($attrcol =~ /^\d+$/)) {
+                        delete $annmap->{$attrcol};
+                    }
+                }
+            }
+        } else {
+            # fill in where the tag names weren't pre-defined
+            for (my $i = 0; $i < scalar(@colnames); $i++) {
+                if (exists($annmap->{$i+1}) && ! defined($annmap->{$i+1})) {
+                    $annmap->{$i+1} = $colnames[$i];
+                }
+            }
+        }
+        $self->annotation_map($annmap);
+    }
+
+    # now read past the header
+    my $header_lines = $self->header;
+    my $line_ok = 1;
+    while (defined($line_ok) && ($header_lines > 0)) {
+        $line_ok = $self->_next_record();
+        $header_lines--;
+    }
+
+    return $line_ok;
+}
+
+=head2 _get_row_values
+
+ Title   : _get_row_values
+ Usage   :
+ Function: Get the values for the current line (or row) as an array in
+           the order of columns.
+
+           This method is here is to serve as a hook for other formats
+           that conceptually also represent tables but aren't
+           formatted as row-based text files.
+
+ Example :
+ Returns : An array of column values for the current row.
+ Args    :
+
+
+=cut
+
+sub _get_row_values{
+    my $self = shift;
+    my $delim = $self->delimiter;
+    my $line = $self->{'_line'};
+    chomp($line);
+    my @cols = split(/$delim/,$line);
+    return @cols;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigr.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigr.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigr.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1343 @@
+# $Id: tigr.pm,v 1.12.4.1 2006/10/02 23:10:30 sendu Exp $
+# BioPerl module for Bio::SeqIO::tigr
+#
+# Cared for by Josh Lauricha (laurichj at bioinfo.ucr.edu)
+#
+# Copyright Josh Lauricha
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::tigr - TIGR XML sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from efa flat
+file databases.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHORS - Josh Lauricha
+
+Email: laurichj at bioinfo.ucr.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# TODO:
+#  - Clean up code
+#  - Find and fix bugs ;)
+
+# Let the code begin...
+package Bio::SeqIO::tigr;
+use strict;
+
+use Bio::Seq::RichSeq;
+use Bio::Species;
+use Bio::Annotation::Comment;
+use Bio::SeqFeature::Generic;
+use Bio::Seq::SeqFactory;
+use Bio::Seq::RichSeq;
+use Data::Dumper;
+use Error qw/:try/;
+
+use base qw(Bio::SeqIO);
+
+sub _initialize
+{
+	my($self, @args) = @_;
+
+	$self->SUPER::_initialize(@args);
+	$self->sequence_factory(new Bio::Seq::SeqFactory(
+			-type => 'Bio::Seq::RichSeq')
+	);
+
+	# Parse the document
+	$self->_process();
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq object
+ Args    : NONE
+
+=cut
+
+sub next_seq()
+{
+	my ($self) = @_;
+	
+	# Check for any more sequences
+	return if !defined($self->{_sequences}) or scalar(@{$self->{_sequences}}) < 1;
+
+	# get the next sequence
+	my $seq = shift(@{ $self->{_sequences} } );
+
+	# Get the 5' and 3' ends
+	my ($source) = grep { $_->primary_tag() eq 'source' } $seq->get_SeqFeatures();
+	my ($end5) = $source->get_tag_values('end5');
+	my ($end3) = $source->get_tag_values('end3');
+
+	# Sort the 5' and 3':
+	my ($start, $end) = ( $end5 < $end3  ? ( $end5, $end3 ) : ( $end3, $end5 ) );
+
+	# make the start a perl index
+	$start -= 1;
+
+	# Figure out the length
+	my $length = $end - $start;
+
+	# check to make sure $start >= 0 and $end <= length(assembly_seq)
+    if($start < 0) {
+        throw Bio::Root::OutOfRange("the sequence start is $start < 0");
+    } elsif($end > length($self->{_assembly}->{seq})) {
+        throw Bio::Root::OutOfRange("the sequence end is $end < " . length($self->{_assembly}->{seq}));
+    } elsif($start >= $end) {
+        throw Bio::Root::OutOfRange("the sequence start is after end $start >= $end");
+    }
+
+	# Get and set the real sequence
+	$seq->seq(substr($self->{_assembly}->{seq}, $start, $length));
+
+	if( $end5 > $end3 ) {
+		# Reverse complement the sequence
+		$seq->seq( $seq->primary_seq()->revcom()->seq() );
+	}
+
+	# add the translation to each CDS
+	foreach my $feat ($seq->get_SeqFeatures()) {
+		next if $feat->primary_tag() ne "CDS";
+
+		# Check for an invalid protein
+		try {
+			# Get the subsq
+			my $cds = new Bio::PrimarySeq(
+				-strand => 1,
+				-id  => $seq->accession_number(),
+				-seq => $seq->subseq($feat->location())
+			);
+
+			# Translate it
+			my $trans = $cds->translate(undef, undef, undef, undef, 1, 1)->seq();
+
+			# Add the tag
+			$feat->add_tag_value(translation => $trans);
+		} catch Bio::Root::Exception with {
+			print STDERR 'TIGR strikes again, the CDS is not a valid protein: ', $seq->accession_number(), "\n"
+				if $self->verbose() > 0;
+		};
+	}
+
+	# Set the display id to the accession number if there
+	# is no display id
+	$seq->display_id( $seq->accession_number() ) unless $seq->display_id();
+	
+	return $seq;
+}
+
+sub _process
+{
+	my($self) = @_;
+	my $line;
+	my $tu = undef;
+
+	$line = $self->_readline();
+	do {
+		if($line =~ /<\?xml\s+version\s+=\s+"\d+\.\d+"\?>/o) {
+			# do nothing
+		} elsif ($line =~ /<!DOCTYPE (\w+) SYSTEM "[\w\.]+">/o) {
+			$self->throw("DOCTYPE of $1, not TIGR!")
+				if $1 ne "TIGR" ;
+		} elsif ($line =~ /<TIGR>/o) {
+			$self->_pushback($line);
+			$self->_process_tigr();
+		} elsif ($line =~ /<ASSEMBLY.*?>/o) {
+			$self->_pushback($line);
+			$self->_process_assembly();
+		} elsif ($line =~ /<\/TIGR>/o) {
+			$self->{'eof'}     = 1;
+			return;
+		} else {
+			$self->throw("Unknown or Invalid process directive:",
+				join('', ($line =~ /^\s*(<[^>]+>)/o)));
+		}
+		$line = $self->_readline();
+	} while( defined( $line ) );
+}
+
+sub _process_tigr
+{
+	my($self) = @_;
+	my $line;
+
+	$line = $self->_readline();
+	if($line !~ /<TIGR>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_tigr called but no ",
+		             "<TIGR> found in stream");
+	}
+
+	$line = $self->_readline();
+	if($line =~ /<PSEUDOCHROMOSOME>/o) {
+		$self->_pushback($line);
+		$self->_process_pseudochromosome();
+	} elsif ($line =~ /<ASSEMBLY.*?>/o) {
+		$self->_pushback($line);
+		$self->_process_assembly();
+	}
+}
+
+sub _process_pseudochromosome
+{
+	my($self) = @_;
+	my $line;
+
+	$line = $self->_readline();
+	return if $line !~ /<PSEUDOCHROMOSOME>/o;
+
+	$line = $self->_readline();
+
+	if($line =~ /<SCAFFOLD>/o) {
+		$self->_pushback($line);
+		$self->_process_scaffold();
+		$line = $self->_readline();
+	} else {
+		$self->warn( "No Scaffold found in <PSUEDOCHROMOSOME> this " .
+		             "is a violation of the TIGR dtd, but we ignore " .
+		             "it so we are ignoring the error\n"
+		);
+	}
+
+	if($line =~ /<ASSEMBLY.*>/o) {
+		$self->_pushback($line);
+		$self->_process_assembly();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Missing required ASSEMBLY in <PSEUDOCHROMOSOME>");
+	}
+
+	if($line =~ /<\/PSEUDOCHROMOSOME>/) {
+		return;
+	}
+
+	$self->throw("Reached end of _process_psuedochromosome");
+}
+
+sub _process_assembly
+{
+	my($self) = @_;
+	my $line;
+
+	$line = $self->_readline();
+	if($line !~ /<ASSEMBLY([^>]*)>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_assembly called ",
+		             "but no <ASSEMBLY> found in stream");
+	}
+
+	my %attribs = ($1 =~ /(\w+)\s*=\s+"(.*?)"/og);
+	$self->{_assembly}->{date}       = $attribs{CURRENT_DATE};
+	$self->{_assembly}->{db}         = $attribs{DATABASE};
+	$self->{_assembly}->{chromosome} = $attribs{CHROMOSOME};
+
+	$line = $self->_readline();
+	my($attr, $val); 
+	if(($attr, $val) = ($line =~ /<ASMBL_ID([^>]*)>([^<]*)<\/ASMBL_ID>/o)) {
+		%attribs = ($attr =~ /(\w+)\s*=\s+"(.*?)"/og);
+		$self->{_assembly}->{clone_name} = $attribs{CLONE_NAME};
+		$self->{_assembly}->{clone} = $val;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <ASMBL_ID> missing");
+	}
+
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+
+		$self->{_assembly}->{end5} = $cs->{end5};
+		$self->{_assembly}->{end3} = $cs->{end3};
+
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<HEADER>/o) {
+		$self->_pushback($line);
+		$self->_process_header();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <HEADER> missing");
+	}
+
+	if($line =~ /<TILING_PATH>/o) {
+		$self->_pushback($line);
+		$self->_process_tiling_path();
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<GENE_LIST>/o) {
+		$self->_pushback($line);
+		$self->_process_gene_list();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <GENE_LIST> missing");
+	}
+
+	if($line =~ /<MISC_INFO>/o) {
+		$self->_pushback($line);
+		$self->_process_misc_info();
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<REPEAT_LIST>/o) {
+		$self->_pushback($line);
+		$self->_process_repeat_list();
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<ASSEMBLY_SEQUENCE>/o) {
+		$self->_pushback($line);
+		$self->_process_assembly_seq();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <ASSEMBLY_SEQUENCE> missing");
+	}
+
+	if($line =~ /<\/ASSEMBLY>/o) {
+		return;
+	}
+	$self->throw("Reached the end of <ASSEMBLY>");
+}
+
+sub _process_assembly_seq()
+{
+	my ($self) = @_;
+	my $line;
+	
+	$line = $self->_readline();
+	if($line !~ /<ASSEMBLY_SEQUENCE>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_assembly_seq called ".
+			     "with no <ASSEMBLY_SEQUENCE> in the stream");
+	}
+
+	# Protect agains lots of smaller lines
+	my @chunks;
+
+	do {
+		$line = $self->_readline();
+		last unless $line;
+
+		my $seq;
+		if (($seq) = ($line =~ /^\s*(\w+)\s*$/o)) {
+			push(@chunks, $seq);
+		} elsif( ($seq) = ( $line =~ /^\s*(\w+)<\/ASSEMBLY_SEQUENCE>\s*$/o) ) {
+			push(@chunks, $seq);
+			$self->{_assembly}->{seq} = join('', @chunks);
+			return;
+		}
+	} while( $line );
+
+	$self->throw("Reached end of _proces_assembly");
+}
+
+sub _process_coordset($)
+{
+	my ($self) = @_;
+	my $line;
+	my $h;
+
+	$line = $self->_readline();
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		$line = $self->_readtag();
+		($h->{end5}, $h->{end3}) = ($line =~ /<COORDSET>\s*<END5>\s*(\d+)\s*<\/END5>\s*<END3>\s*(\d+)\s*<\/END3>/os);
+		if(!defined($h->{end5}) or !defined($h->{end3})) {
+			$self->throw("Invalid <COORDSET>: $line");
+		}
+		return $h;
+	} else {
+		$self->throw("Bio::SeqIO::tigr::_process_coordset() called ",
+		             "but no <COORDSET> found in stream");
+	}
+}
+
+sub _process_header
+{
+	my ($self) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<HEADER>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_header called ",
+		             "but no <HEADER> found in stream");
+	}
+
+	$line = $self->_readtag();
+	if($line =~ /<CLONE_NAME>([^>]+)<\/CLONE_NAME>/o) {
+		$self->{_assembly}->{clone_name} = $1;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <CLONE_NAME> missing");
+	}
+
+	if($line =~ /<SEQ_LAST_TOUCHED>/o) {
+		# Ignored for now
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Reqired <SEQ_LAST_TOUCHED> missing");
+	}
+
+	if($line =~ /<GB_ACCESSION>([^<]*)<\/GB_ACCESSION>/o) {
+		$self->{_assembly}->{gb} = $1;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <GB_ACCESSION> missing");
+	}
+
+	if($line =~ /<ORGANISM>\s*(.+)\s*<\/ORGANISM>/o) {
+		my( $genus, $species, @ss ) = split(/\s+/o, $1);
+		$self->{_assembly}->{species} = new Bio::Species();
+		$self->{_assembly}->{species}->genus($genus);
+		$self->{_assembly}->{species}->species($species);
+		$self->{_assembly}->{species}->sub_species(join(' ', @ss)) if scalar(@ss) > 0;
+
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <ORGANISM> missing");
+	}
+
+	if($line =~ /<LINEAGE>([^<]*)<\/LINEAGE>/o) {
+		$self->{_assembly}->{species}->classification(
+			$self->{_assembly}->{species}->species(),
+			reverse(split(/\s*;\s*/o, $1))
+		);
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <LINEAGE> missing");
+	}
+
+	if($line =~ /<SEQ_GROUP>([^<]*)<\/SEQ_GROUP>/o) {
+		# ingnored
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <SEQ_GROUP> missing");
+	}
+
+	while($line =~ /<KEYWORDS>[^<]*<\/KEYWORDS>/o) {
+		push(@{$self->{_assembly}->{keywords}}, $1);
+		$line = $self->_readtag();
+	}
+
+	while($line =~ /<GB_DESCRIPTION>([^<]+)<\/GB_DESCRIPTION>/o) {
+		push(@{$self->{_assembly}->{gb_desc}},$1);
+		$line = $self->_readtag();
+	}
+
+	while($line =~ /<GB_COMMENT>([^<]+)<\/GB_COMMENT>/o) {
+		push(@{$self->{_assembly}->{gb_comment}}, $1);
+		$line = $self->_readtag();
+	}
+
+	if(my %h = ($line =~ /<AUTHOR_LIST(?:\s*(\w+)\s*=\s*"([^"]+)"\s*)*>/o)) {
+		#$header->{'AUTHOR_LIST'}=$h{'CONTACT'};
+		# Ignored
+		while($line !~ /<\/AUTHOR_LIST>/o) {
+			$self->_readtag();
+		}
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <AUTHOR_LIST> missing");
+	}
+
+	if($line =~ /<\/HEADER>/o) {
+		return;
+	}
+
+	$self->throw("Reached end of header\n");
+}
+
+sub _process_gene_list
+{
+	my($self) = @_;
+	my $line;
+
+	$line = $self->_readline();
+	if($line !~ /<GENE_LIST>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_gene_list called ",
+		             "but no <GENE_LIST> in the stream");
+	}
+
+	$line = $self->_readline();
+	if($line =~ /<PROTEIN_CODING>/o) {
+		$self->_pushback($line);
+		$self->_process_protein_coding();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <PROTEIN_CODING> missing");
+	}
+
+	if($line =~ /<RNA_GENES>/o) {
+		$self->_pushback($line);
+		$self->_process_rna_genes();
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <RNA_GENES> missing");
+	}
+
+	if($line =~ /<\/GENE_LIST>/o) {
+		return;
+	}
+
+	$self->throw("Reached end of _process_gene_list");
+}
+
+sub _process_protein_coding
+{
+	my ($self) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<PROTEIN_CODING>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_protein_coding called"
+		             . "but no <GENE_LIST> in the stream");
+	}
+
+	$line = $self->_readline();
+	while($line and $line =~ /<TU>/o) {
+		$self->_pushback($line);
+		$self->_process_tu();
+		$line = $self->_readline();
+	}
+
+	# Sort the sequences
+	@{$self->{_sequences}} = sort {
+		my($one, $two) = ( $a, $b );
+		($one) = grep { $_->primary_tag() eq 'source' } $one->get_SeqFeatures();
+		($two) = grep { $_->primary_tag() eq 'source' } $two->get_SeqFeatures();
+		return 0 unless defined $one and defined $two;
+		($one) = sort { $a <=> $b } $one->get_tagset_values(qw/end5 end3/);
+		($two) = sort { $a <=> $b } $two->get_tagset_values(qw/end5 end3/);
+		return $one <=> $two;
+	} @{$self->{_sequences}};
+
+	if($line =~ /<\/PROTEIN_CODING>/o) {
+		return;
+	}
+
+	$self->throw("Reached end of _process_protein_coding");
+}
+
+
+sub _process_rna_genes
+{
+	my ($self) = @_;
+	my $line = $self->_readline();
+
+	if($line =~ /<RNA_GENES>/o) {
+		while($line !~ /<\/RNA_GENES>/o) {
+			$line = $self->_readline();
+		}
+	} else {
+		$self->throw("Bio::SeqIO::tigr::_process_rna_genes called ",
+		             "but no <RNA_GENES> in the stream");
+	}
+}
+
+sub _process_misc_info
+{
+	my ($self) = @_;
+	my $line = $self->_readline();
+
+	if($line =~ /<MISC_INFO>/o) {
+		while($line !~ /<\/MISC_INFO>/o) {
+			$line = $self->_readline();
+		}
+	} else {
+		$self->throw("Bio::SeqIO::tigr::_process_misc_info called ",
+		             "but no <MISC_INFO> in the stream");
+	}
+}
+
+sub _process_repeat_list
+{
+	my ($self) = @_;
+	my $line = $self->_readline();
+
+	if($line =~ /<REPEAT_LIST>/o) {
+		while($line !~ /<\/REPEAT_LIST>/o) {
+			$line = $self->_readline();
+		}
+	} else {
+		$self->throw("Bio::SeqIO::tigr::_process_repeat_list called ",
+		             "but no <MISC_INFO> in the stream");
+	}
+}
+
+sub _process_tiling_path
+{
+	my($self) = @_;
+	my $line = $self->_readline();
+
+
+	if($line =~ /<TILING_PATH>/o) {
+		while($line !~ /<\/TILING_PATH>/o) {
+			$line = $self->_readline();
+		}
+	} else {
+		$self->throw("Bio::SeqIO::tigr::_process_repeat_list called ",
+		             "but no <MISC_INFO> in the stream");
+	}
+}
+
+sub _process_scaffold
+{
+	my ($self) = @_;
+	my $line;
+
+	# for now we just skip them
+	$line = $self->_readline();
+	return if $line !~ /<SCAFFOLD>/o;
+	do {
+		$line = $self->_readline();
+	} while(defined($line) && $line !~ /<\/SCAFFOLD>/o);
+}
+
+sub _process_tu
+{
+	my($self) = @_;
+	my $line = $self->_readline();
+
+	try {
+		my $tu = new Bio::Seq::RichSeq(-strand => 1);
+		$tu->species( $self->{_assembly}->{species} );
+
+		# Add the source tag, so we can add the GO annotations to it
+		$tu->add_SeqFeature(new Bio::SeqFeature::Generic(-source_tag => 'TIGR', -primary_tag => 'source'));
+		
+		if($line !~ /<TU>/o) {
+			$self->throw("Process_tu called when no <TU> tag");
+		}
+
+		$line = $self->_readtag();
+		if ($line =~ /<FEAT_NAME>([\w\.]+)<\/FEAT_NAME>/o) {
+			$tu->accession_number($1);
+			$tu->add_secondary_accession($1);
+			$line = $self->_readtag();
+		} else {
+			$self->throw("Invalid Feat_Name");
+		}
+
+		while($line =~ /<GENE_SYNONYM>/o) {
+			# ignore
+			$line = $self->_readtag();
+		}
+	
+		while($line =~ /<CHROMO_LINK>\s*([\w\.]+)\s*<\/CHROMO_LINK>/o) {
+			$tu->add_secondary_accession($1);
+			$line = $self->_readtag();
+		}
+
+		if ($line =~ /<DATE>([^>]*)<\/DATE>/o) {
+			$tu->add_date($1) if $1 and $1 !~ /^\s*$/o;
+			$line = $self->_readline();
+		} else {
+			#$self->throw("Invalid Date: $line");
+		}
+
+		if ($line =~ /<GENE_INFO>/o) {
+			$self->_pushback($line);
+			$self->_process_gene_info($tu);
+			$line = $self->_readline();
+		} else {
+			$self->throw("Invalid Gene_Info");
+		}
+
+		my $source;
+		my $end5;
+		my $end3;
+		if($line =~ /<COORDSET>/o) {
+			$self->_pushback($line);
+			my $cs = $self->_process_coordset();
+	
+			$end5 = $cs->{end5};
+			$end3 = $cs->{end3};
+
+			my $length = $end3 - $end5;
+			my $strand = $length <=> 0;
+			$length = $length * $strand;
+			$length++; # Correct for starting at 1, not 0
+
+			# Add X filler sequence
+			$tu->seq('X' x $length);
+
+			# Get the source tag:
+			my($source) = grep { $_->primary_tag() eq 'source' } $tu->get_SeqFeatures();
+
+			# Set the start and end values
+			$source->start(1);
+			$source->end($length);
+			$source->strand(1);
+
+			# Add a bunch of tags to it
+			$source->add_tag_value(clone      => $self->{_assembly}->{clone});
+			$source->add_tag_value(clone_name => $self->{_assembly}->{clone_name});
+			$source->add_tag_value(end5       => $end5);
+			$source->add_tag_value(end3       => $end3);
+			$source->add_tag_value(chromosome => $self->{_assembly}->{chromosome});
+			$source->add_tag_value(strand     => ( $strand == 1 ? 'positive' : 'negative' ));
+
+			$line = $self->_readline();
+		} else {
+			$self->throw("Invalid Coordset");
+		}
+
+		if($line =~ /<MODEL[^>]*>/o) {
+			do {
+				$self->_pushback($line);
+				$self->_process_model($tu, $end5, $end3);
+				$line = $self->_readline();
+			} while($line =~ /<MODEL[^>]*>/o);
+			$self->_pushback($line);
+			$line = $self->_readtag();
+		} else {
+			$self->throw("Expected <MODEL> not found");
+		}
+		
+		if($line =~ /<TRANSCRIPT_SEQUENCE>/o) {
+			my @chunks;
+			$line = $self->_readline();
+			while ($line =~ /^\s*([ACGT]+)\s*$/o) {
+				push( @chunks, $1 );
+				$line = $self->_readline();
+			}
+			#	$line = $self->_readline();
+		}
+		
+		if($line =~ /<GENE_EVIDENCE>/o) {
+			$line = $self->_readtag();
+		}
+		
+		while($line =~ /<URL[^>]*>[^<]*<\/URL>/o) {
+			$line = $self->_readtag();
+		}
+		
+		if($line =~ /<\/TU>/o) {
+			push(@{$self->{_sequences}}, $tu);
+			return;
+		} else {
+			$self->throw("Expected </TU> not found: $line");
+		}
+	} catch Bio::Root::OutOfRange with {
+		my $E = shift;
+		$self->warn(sprintf("One sub location of a sequence is invalid near line $.\: %s", $E->text()));
+		$line = $self->_readline() until $line =~ /<\/TU>/o;
+		return;
+	};
+}
+
+sub _process_gene_info
+{
+	my($self, $tu) = @_;
+	my $line = $self->_readline();
+
+	$self->throw("Invalid Gene Info: $line") if $line !~ /<GENE_INFO>/o;
+	$line = $self->_readline();
+
+	if($line =~ /<LOCUS>\s*([\w\.]+)\s*<\/LOCUS>/o) {
+		$tu->accession_number($1);
+		$tu->add_secondary_accession($1);
+		$line = $self->_readline();
+	} elsif( $line =~ /<LOCUS>.*<\/LOCUS>/o) {
+		# We should throw an error, but TIGR doesn't alwasy play
+		# nice with adhering to their dtd
+		$line = $self->_readtag();
+	} else {
+		#$self->throw("Invalid Locus: $line");
+	}
+
+	if($line =~ /<ALT_LOCUS>\s*([\w\.]+)\s*<\/ALT_LOCUS>/o) {
+		$tu->accession_number($1);
+		$tu->add_secondary_accession($1);
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<PUB_LOCUS>\s*([\w\.]+)\s*<\/PUB_LOCUS>/o) {
+		$tu->accession_number($1);
+		$tu->add_secondary_accession($1);
+		$line = $self->_readtag();
+	} elsif( $line =~ /<PUB_LOCUS>.*<\/PUB_LOCUS>/o) {
+		$line = $self->_readtag();
+#		$self->throw("Invalid Pub_Locus");
+	}
+
+	if($line =~ /<GENE_NAME.*>.*<\/GENE_NAME>/o) {
+		# Skip the GENE_NAME
+		$line = $self->_readtag();
+	}
+
+	if(my($attr, $value) = ($line =~ /<COM_NAME([^>]*)>([^>]+)<\/COM_NAME>/o)) {
+		#%attribs = ($attr =~ /(\w+)\s*=\s+"(.*?)"/og);
+		#$geneinfo->{'CURATED'} = $attribs{CURATED};
+		#$geneinfo->{IS_PRIMARY} = $attribs{IS_PRIMARY}
+		# TODO: add a tag on sources for curated
+		$tu->desc($value);
+		$line = $self->_readtag();
+	} else {
+		$self->throw("invalid com_name: $line");
+	}
+
+	while($line =~ /<COMMENT>([^<]+)<\/COMMENT>/o) {
+		my $comment = new Bio::Annotation::Comment(
+			-text => $1
+		);
+		$tu->annotation()->add_Annotation('comment', $comment);
+		$line = $self->_readtag();
+	}
+
+	while($line =~ /<PUB_COMMENT>([^<]+)<\/PUB_COMMENT>/o) {
+		my $comment = new Bio::Annotation::Comment(
+			-text => $1
+		);
+		$tu->annotation()->add_Annotation('comment', $comment);
+		$line = $self->_readtag();
+	}
+
+	if($line =~ /<EC_NUM>([\w\-\\\.]+)<\/EC_NUM>/o) {
+		#$geneinfo->{'EC_NUM'} = $1;
+		$line = $self->_readtag();
+	}
+
+	if($line =~ /<GENE_SYM>\s*([^<]+)\s*<\/GENE_SYM>/o) {
+		#$tu->add_secondary_accession($1);
+		$line = $self->_readtag();
+	}
+
+	if($line =~ /<IS_PSEUDOGENE>([^>]+)<\/IS_PSEUDOGENE>/o) {
+		#$geneinfo->{'IS_PSEUDOGENE'} = $1;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("invalid is_pseudogene: $line");
+	}
+
+	if($line =~ /<FUNCT_ANNOT_EVIDENCE/o) {
+		$line = $self->_readtag();
+	}
+
+	if($line =~ /<DATE>([^>]+)<\/DATE>/o) {
+		#$geneinfo->{'DATE'} = $1;
+		$line = $self->_readtag();
+	}
+
+	while($line =~ /<GENE_ONTOLOGY>/o) {
+		# Get the source tag
+		my($source) = grep { $_->primary_tag() eq 'source' } $tu->get_SeqFeatures();
+
+		my @ids = ( $line =~ /(<GO_ID.*?<\/GO_ID>)/gso);
+		foreach my $go (@ids) {
+			my($assignment) = ($go =~ /<GO_ID\s+ASSIGNMENT\s+=\s+"GO:(\d+)">/os);
+			my($term)       = ($go =~ /<GO_TERM>([^<]+)<\/GO_TERM>/os);
+			my($type)       = ($go =~ /<GO_TYPE>([^<]+)<\/GO_TYPE>/os);
+			# TODO: Add GO annotation
+			if(defined $type and defined $assignment and defined $term) {
+				# Add the GO Annotation
+				$source->add_tag_value(
+				GO => "ID: $assignment; Type: $type; $term"
+				);
+			}
+		}
+		$line = $self->_readtag();
+	}
+	
+	if($line =~ /<\/GENE_INFO/o) {
+		return;
+	}
+
+	$self->throw("unexpected end of gene_info");
+}
+
+sub _build_location
+{
+	my($self, $end5, $end3, $length, $cs) = @_;
+	
+	# Find the start and end of the location
+	# relative to the sequence.
+	my $start = abs( $end5 - $cs->{end5} ) + 1;
+	my $end   = abs( $end5 - $cs->{end3} ) + 1;
+
+	# Do some bounds checking:
+	if( $start < 1 ) {
+		throw Bio::Root::OutOfRange(
+			-text => "locations' start( $start) must be >= 1"
+		);
+	} elsif( $end > $length ) {
+		throw Bio::Root::OutOfRange(
+			-text => "locations' end( $end ) must be <= length( $length )"
+		);
+	} elsif( $start > $end ) {
+		throw Bio::Root::OutOfRange(
+			-text => "locations' start ( $start ) must be < end ( $end ) $end5, $end3, $cs->{end5}, $cs->{end3}"
+		);
+	}
+
+	return new Bio::Location::Simple( -start => $start, -end => $end, -strand => 1 );
+}
+
+sub _process_model
+{
+	my($self, $tu, $end5, $end3) = @_;
+	my $line;
+	my( $source ) = grep { $_->primary_tag() eq 'source' } $tu->get_SeqFeatures();
+	my $model = new Bio::SeqFeature::Generic(
+		-source_tag  => 'TIGR',
+		-primary_tag => 'MODEL',
+	);
+
+	$line = $self->_readline();
+	if($line !~ /<MODEL ([^>]+)>/o) {
+		$self->throw("Invalid Model: $line")
+	}
+	my %attribs = ($1 =~ /(\w+)\s*=\s*"([^"]*)"/og);
+	#$model->{'CURATED'} = $attribs{'CURATED'};
+	# TODO: Add tag to model
+	$line = $self->_readline();
+
+	if($line =~ /<FEAT_NAME>\s*([\w\.]+)\s*<\/FEAT_NAME>/o) {
+		$model->add_tag_value( feat_name => $1 );
+		$tu->add_secondary_accession($1);
+		$line = $self->_readline();
+	} else {
+		$self->throw("Invalid Feature Name: $line");
+	}
+
+	if($line =~ /<PUB_LOCUS>\s*([\w\.]+)\s*<\/PUB_LOCUS>/o) {
+		$model->add_tag_value( pub_locus => $1 );
+		$tu->add_secondary_accession($1);
+		$line = $self->_readline();
+	} else {
+#		$self->throw("Invalid Pub_Locus: $line");
+	}
+
+	if($line =~ /<CDNA_SUPPORT>/o) {
+		$self->_pushback($line);
+		$self->_process_cdna_support( $model );
+		$line = $self->_readline();
+	}
+
+	while($line =~ /<CHROMO_LINK>([^>]+)<\/CHROMO_LINK>/o) {
+		$model->add_tag_value( chromo_link => $1 );
+		$line = $self->_readline();
+	} 
+
+	if($line =~ /<DATE>([^>]+)<\/DATE>/o) {
+		$line = $self->_readline();
+	} else {
+		$self->throw("Invalid Date: $line");
+	}
+
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+		
+		$model->start( $loc->start() );
+		$model->end(   $loc->end()   );
+		$line = $self->_readline();
+	} else {
+		$self->throw("Invalid Coordset: $line");
+	}
+
+	my $exon = new Bio::SeqFeature::Generic(
+		-source_tag  => 'TIGR',
+		-primary_tag => 'EXON',
+		-location => new Bio::Location::Split(),
+		-tags => [ locus => $tu->accession_number() ],
+	);
+	$exon->add_tag_value( model => $model->get_tag_values('feat_name') );
+
+	my $cds  = new Bio::SeqFeature::Generic(
+		-source_tag  => 'TIGR',
+		-primary_tag => 'CDS',
+		-location => new Bio::Location::Split(),
+		-tags => [ locus => $tu->accession_number() ],
+	);
+	$cds->add_tag_value( model => $model->get_tag_values('feat_name') );
+	my $utr = [];
+
+	if($line =~ /<EXON>/o) {
+		do {
+			$self->_pushback($line);
+			$self->_process_exon( $tu, $exon, $cds, $utr, $end5, $end3 );
+			$line = $self->_readline();
+		} while($line =~ /<EXON>/o);
+	} else {
+		$self->throw("Required <EXON> missing");
+	}
+	
+	until($line =~ /<\/MODEL>/o) {
+		$line = $self->_readline();
+	}
+
+
+	$_->add_tag_value( model => $model->get_tag_values('feat_name') )
+		foreach @$utr;
+
+	# Add the model, EXONs, CDS, and UTRs
+	$tu->add_SeqFeature($model) if $model and $model->start() >= 1;
+	$tu->add_SeqFeature($exon)  if $exon  and scalar($exon->location()->each_Location()) >= 1;
+	$tu->add_SeqFeature($cds)   if $cds   and scalar($cds->location()->each_Location()) >= 1;
+	$tu->add_SeqFeature(@$utr);
+
+	return;
+}
+
+sub _process_cdna_support
+{
+	my($self, $model) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<CDNA_SUPPORT>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_cdna_support called ",
+		             "but no <CDNA_SUPPORT> in the stream");
+	}
+
+	$line = $self->_readline();
+
+	while( $line =~ /<ACCESSION([^>]+)>(.*)<\/ACCESSION>/o) {
+		# Save the text
+		my $desc = $2;
+		
+		# Get the element's attributes
+		my %attribs = ($1 =~ /(\w+)\s*=\s*"([^"]*)"/og);
+
+		# Add the tag to the model
+		$model->add_tag_value(
+			cdna_support => "DBXRef: $attribs{DBXREF}; $desc"
+		);
+
+		$line = $self->_readline();
+	}
+
+	if( $line =~ /<\/CDNA_SUPPORT>/o) {
+		return;
+	}
+	$self->throw("reached end of _process_cdna_support");
+}
+
+
+sub _process_exon
+{
+	my($self, $tu, $exon, $cds, $utr, $end5, $end3 ) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<EXON>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_exon called ",
+		             "but no <EXON> in the stream");
+	}
+
+	$line = $self->_readtag();
+	if($line =~ /<FEAT_NAME>([^<]+)<\/FEAT_NAME>/o) {
+		# Ignore
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <FEAT_NAME> missing");
+	}
+
+	if($line =~ /<DATE>([^<]+)<\/DATE>/o) {
+		# Ignore
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <DATE> missing");
+	}
+
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+		$exon->location()->add_sub_Location($loc);
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<CDS>/o) {
+		$self->_pushback($line);
+		$self->_process_cds($tu, $end5, $end3, $cds);
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<UTRS>/o) {
+		$self->_pushback($line);
+		$self->_process_utrs($tu, $end5, $end3, $utr);
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<\/EXON>/o) {
+		return;
+	}
+
+	$self->throw("Reached End of Bio::SeqIO::tigr::_process_exon");
+}
+
+sub _process_cds
+{
+	my($self, $tu, $end5, $end3, $cds) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<CDS>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_cda_support called ",
+		             "but no <CDS> in the stream");
+	}
+	
+	$line = $self->_readtag();
+	if($line =~ /<FEAT_NAME>([^<]+)<\/FEAT_NAME>/o) {
+		#$cds->{'FEAT_NAME'} = $1;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <FEAT_NAME> missing");
+	}
+
+	if($line =~ /<DATE>([^<]+)<\/DATE>/o) {
+		#$cds->{'DATE'} = $1;
+		$line = $self->_readtag();
+	} else {
+		$self->throw("Required <DATE> missing");
+	}
+
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+		$cds->location()->add_sub_Location($loc);
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<\/CDS>/o) {
+		return;
+	}
+
+	$self->throw("Reached onf of Bio::SeqIO::tigr::_process_cds");
+}
+
+sub _process_utrs
+{
+	my($self, $tu, $end5, $end3, $utrs) = @_;
+	my $line = $self->_readline();
+
+	if($line !~ /<UTRS/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_utrs called but no ",
+		             "<UTRS> found in stream");
+	}
+
+	$line = $self->_readline();
+	while($line !~ /<\/UTRS>/o) {
+		$self->_pushback($line);
+		if($line =~ /<LEFT_UTR>/o) {
+			$self->_process_left_utr($tu, $end5, $end3, $utrs);
+		} elsif ($line =~ /<RIGHT_UTR>/o) {
+			$self->_process_right_utr($tu, $end5, $end3, $utrs);
+		} elsif ($line =~ /<EXTENDED_UTR>/o) {
+			$self->_process_ext_utr($tu, $end5, $end3, $utrs);
+		} else {
+			$self->throw("Unexpected tag");
+		}
+	
+		$line = $self->_readline();
+	}
+
+	if($line =~ /<\/UTRS>/o) {
+		return $utrs;
+	}
+	$self->throw("Reached end of Bio::SeqIO::tigr::_process_utrs");
+}
+
+sub _process_left_utr
+{
+	my($self, $tu, $end5, $end3, $utrs) = @_;
+	my $line = $self->_readline();
+	my $coordset;
+
+	if($line !~ /<LEFT_UTR>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_left_utr called but ",
+		             "no <LEFT_UTR> found in stream");
+	}
+
+	$line = $self->_readtag();
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+
+		push(@$utrs, new Bio::SeqFeature::Generic(
+		        -source_tag  => 'TIGR',
+				-primary_tag => 'LEFT_UTR',
+				-strand => 1,
+				-start => $loc->start(),
+				-end   => $loc->end()
+		));
+
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<\/LEFT_UTR>/o) {
+		return;
+	}
+	$self->throw("Reached end of Bio::SeqIO::tigr::_process_left_utr");
+}
+
+sub _process_right_utr
+{
+	my($self, $tu, $end5, $end3, $utrs) = @_;
+	my $line = $self->_readline();
+	my $coordset;
+
+	if($line !~ /<RIGHT_UTR>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_right_utr called but ",
+		             "no <RIGHT_UTR> found in stream");
+	}
+
+	$line = $self->_readtag();
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		$coordset = $self->_process_coordset();
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+
+		push(@$utrs, new Bio::SeqFeature::Generic(
+		        -source_tag  => 'TIGR',
+				-primary_tag => 'RIGHT_UTR',
+				-strand => 1,
+				-start => $loc->start(),
+				-end   => $loc->end()
+		));
+
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<\/RIGHT_UTR>/o) {
+		return $coordset;
+	}
+	$self->throw("Reached end of Bio::SeqIO::tigr::_process_right_utr");
+}
+
+sub _process_ext_utr
+{
+	my($self, $tu, $end5, $end3, $utrs) = @_;
+	my $line = $self->_readline();
+	my $coordset;
+
+	if($line !~ /<EXTENDED_UTR>/o) {
+		$self->throw("Bio::SeqIO::tigr::_process_ext_utr called but ",
+		             "no <EXTENDED_UTR> found in stream");
+	}
+
+	$line = $self->_readtag();
+	if($line =~ /<COORDSET>/o) {
+		$self->_pushback($line);
+		my $cs = $self->_process_coordset();
+		my $loc = $self->_build_location($end5, $end3, $tu->length(), $cs);
+
+		push(@$utrs, new Bio::SeqFeature::Generic(
+		        -source_tag  => 'TIGR',
+				-primary_tag => 'EXTENDED_UTR',
+				-strand => 1,
+				-start => $loc->start(),
+				-end   => $loc->end()
+		));
+
+		$line = $self->_readline();
+	} else {
+		$self->throw("Required <COORDSET> missing");
+	}
+
+	if($line =~ /<\/EXTENDED_UTR>/o) {
+		return $coordset;
+	}
+	$self->throw("Reached end of Bio::SeqIO::tigr::_process_ext_utr");
+}
+
+sub _readtag
+{
+	my($self) = @_;
+	my $line = $self->_readline();
+	chomp($line);
+
+	my $tag;
+	if(($tag) = ($line =~ /^[^<]*<\/(\w+)/o)) {
+		$self->_pushback($1) if $line =~ /<\/$tag>(.+)$/;
+		return "</$tag>";
+	}
+ 
+	until(($tag) = ($line =~ /<(\w+)[^>]*>/o)) {
+		$line = $self->_readline();
+		chomp $line;
+	}
+
+	until($line =~ /<\/$tag>/) {
+		$line .= $self->_readline();
+	}
+
+	if(my ($val) = ($line =~ /(<$tag.*>.*?<\/$tag>)/s)) {
+		if($line =~ /<\/$tag>\s*(\w+[\s\w]*?)\s*$/s) {
+			$self->_pushback($1)
+		}
+		return $val;
+	}
+	$self->throw("summerror");
+}
+
+sub _readline
+{
+	my($self) = @_;
+	my $line;
+	do {
+		$line = $self->SUPER::_readline();
+	} while(defined($line) and $line =~ /^\s*$/o);
+
+	return $line;
+}
+
+sub throw
+{
+	my($self, @s) = @_;
+	my $string = "[$.]" . join('', @s);
+	$self->SUPER::throw($string);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigrxml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigrxml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tigrxml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,533 @@
+# $Id: tigrxml.pm,v 1.8.4.1 2006/10/02 23:10:30 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO::tigrxml
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::tigrxml - Parse TIGR (new) XML 
+
+=head1 SYNOPSIS
+
+  use Bio::SeqIO;
+  my $in = new Bio::SeqIO(-format => 'tigrcoordset',
+                          -file   => 'file.xml');
+
+  while( my $seq = $in->next_seq ) {
+     # do something...
+  }
+
+=head1 DESCRIPTION
+
+This is a parser for TIGR Coordset XML for their in-progress
+annotation dbs.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqIO::tigrxml;
+use vars qw($Default_Source);
+use strict;
+use XML::SAX;
+use XML::SAX::Writer;
+use Data::Dumper;
+use Bio::Seq::SeqFactory;
+use Bio::Species;
+use Bio::SeqFeature::Generic;
+use Bio::Annotation::Reference;
+use Bio::Annotation::Comment;
+use Bio::Annotation::DBLink;
+use List::Util qw(min max);
+
+use base qw(Bio::SeqIO XML::SAX::Base);
+
+
+$Default_Source = 'TIGR';
+
+sub _initialize {
+    my ($self) = shift;
+    $self->SUPER::_initialize(@_);
+    $self->{'_parser'} = XML::SAX::ParserFactory->parser('Handler' => $self);
+    if( ! defined $self->sequence_factory ) {
+	$self->sequence_factory(new Bio::Seq::SeqFactory
+				(-verbose => $self->verbose(), 
+				 -type => 'Bio::Seq::RichSeq'));
+    }
+    return;
+}
+
+sub next_seq {    
+    my $self = shift;
+    if( @{$self->{'_seendata'}->{'_seqs'} || []} || 
+	eof($self->_fh)) { 
+	return shift @{$self->{'_seendata'}->{'_seqs'}};
+    }
+    $self->{'_parser'}->parse_file($self->_fh);
+    return shift @{$self->{'_seendata'}->{'_seqs'}};
+}
+
+# XML::SAX::Base methods
+
+sub start_document {
+    my ($self,$doc) = @_;
+    $self->{'_seendata'} = {'_seqs'    => [],
+			    '_authors' => [],
+			    '_feats'   => [] };
+    $self->SUPER::start_document($doc);
+}
+
+sub end_document { 
+    my ($self,$doc) = @_;
+    $self->SUPER::end_document($doc);
+}
+
+sub start_element {
+    my ($self,$ele) = @_;
+    # attributes
+    my $name = uc $ele->{'LocalName'};
+    my $attr = $ele->{'Attributes'};
+    my $seqid = defined $self->{'_seendata'}->{'_seqs'}->[-1] ? 
+	$self->{'_seendata'}->{'_seqs'}->[-1]->display_id : undef;
+	
+    # we're going to try and be SO-nice here
+    if( $name eq 'ASSEMBLY' ) { # New sequence
+	my ($len) = $attr->{'{}COORDS'}->{'Value'} =~ /\d+\-(\d+)/;
+	push @{$self->{'_seendata'}->{'_seqs'}},
+	$self->sequence_factory->create
+	    (
+	     -display_id => $attr->{'{}ASMBL_ID'}->{'Value'},
+	     -length     => $len,
+	     );
+    } elsif( $name eq 'HEADER' ) { 
+    } elsif( $name eq 'CLONE_NAME' ) {
+    } elsif( $name eq 'ORGANISM' ) { 
+    } elsif( $name eq 'AUTHOR_LIST' ) {
+	$self->{'_seendata'}->{'_authors'} = [];
+    } elsif( $name eq 'TU' ) { # gene feature
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $fname = $attr->{'{}FEAT_NAME'}->{'Value'};
+	my $f = Bio::SeqFeature::Generic->new
+	    (-seq_id      => $seqid,
+	     -start       => $s,
+	     -end         => $e,
+	     -strand      => $strand,
+	     -primary_tag => 'gene', # what does this really map to?
+	     -source_tag  => $Default_Source,
+	     -tag         => { 
+		 'Note'         => $attr->{'{}COM_NAME'}->{'Value'},
+		 'ID'           => $fname,
+		 'locus'        => $attr->{'{}LOCUS'}->{'Value'},
+		 'pub_locus'    => $attr->{'{}PUB_LOCUS'}->{'Value'},
+		 'alt_locus'    => $attr->{'{}ALT_LOCUS'}->{'Value'},
+		 'pub_comment'  => $attr->{'{}PUB_COMMENT'}->{'Value'},
+	     }
+	     );
+	push @{$self->{'_seendata'}->{'_feats'}}, $f;
+	# add this feature to the current sequence
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);
+    } elsif( $name eq 'MODEL' ) { # mRNA/transcript
+	# reset the UTRs
+	$self->{'_seendata'}->{"five_prime_UTR"}= undef;
+	$self->{'_seendata'}->{"three_prime_UTR"} = undef;
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $parent = $self->{'_seendata'}->{'_feats'}->[-1];
+	my ($parentid) = $parent->get_tag_values('ID');
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'transcript',
+	     -source_tag  => $Default_Source,
+	     -start       => $s,	     # we use parent start/stop because 'MODEL' means CDS start/stop
+	     -end         => $e,             # but we want to reflect 
+	     -strand      => $strand,
+	     -seq_id      => $seqid,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+		 'Parent' => $parentid,
+		 'Note'   => $attr->{'{}COMMENT'}->{'Value'},
+	     });
+	$parent->add_SeqFeature($f);
+	push @{$self->{'_seendata'}->{'_feats'}}, $f;
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);
+    } elsif( $name eq 'EXON' ) { # exon feature
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $parent = $self->{'_seendata'}->{'_feats'}->[-1];
+	
+	my ($parentid) = $parent->get_tag_values('ID');	
+
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'exon',
+	     -source_tag  => $Default_Source,
+	     -seq_id      => $seqid,
+	     -start       => $s,
+	     -end         => $e, 
+	     -strand      => $strand,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+		 'Parent' => $parentid,
+	     });
+	$parent->add_SeqFeature($f,'EXPAND');
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);
+	# we'll still just add exons to the transcript 
+    } elsif( $name eq 'PROTEIN_SEQ' ) { 
+	
+    } elsif( $name eq 'CDS' ) {
+	# CDS will be the translation of the transcript
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $parent = $self->{'_seendata'}->{'_feats'}->[-1];
+	my ($parentid) = $parent->get_tag_values('ID');
+	$self->assert($parent->primary_tag eq 'transcript', 'Testing for primary tag equivalent to mRNA');
+	$self->assert($parent->strand == $strand || abs($s-$e) == 0, 'Testing that parent feature and current feature strand are equal '. $parentid. ' '.$attr->{'{}FEAT_NAME'}->{'Value'});
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'CDS',
+	     -source_tag  => $Default_Source,
+	     -seq_id      => $seqid,
+	     -start       => $s,
+	     -end         => $e, 
+	     -strand      => $parent->strand,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+		 'Parent' => $parentid, # should be the mRNA
+	     });
+	$parent->add_SeqFeature($f);
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);	    
+    } elsif( $name eq 'RNA-EXON' ) {
+
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $parent = $self->{'_seendata'}->{'_feats'}->[-1];
+	my ($parentid) = $parent->get_tag_values('ID');
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'tRNA_exon', # tRNA_exon?
+	     -source_tag  => $Default_Source,
+	     -seq_id      => $seqid,
+	     -start       => $s,
+	     -end         => $e, 
+	     -strand      => $strand,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+		 'Parent' => $parentid,
+	     }
+	     );
+	$parent->add_SeqFeature($f);
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);
+    } elsif( $name eq 'PRE-TRNA' ) { # tRNA gene
+	my ($s,$e) = ( $attr->{'{}COORDS'}->{'Value'} =~/(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $f = Bio::SeqFeature::Generic->new
+	    ( -primary_tag => 'tRNA_coding_gene',
+	      -source_tag  => $Default_Source,
+	      -seq_id      => $seqid,
+	      -start       => $s,
+	      -end         => $e,
+	      -strand      => $strand,
+	      -tag         => {'ID' => $attr->{'{}FEAT_NAME'}->{'Value'}, 
+			   }
+	      );
+	push  @{$self->{'_seendata'}->{'_feats'}}, $f;	
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);	
+    } elsif( $name eq 'TRNA' ) { # tRNA transcript
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $parent = $self->{'_seendata'}->{'_feats'}->[-1];
+	my ($parentid) = $parent->get_tag_values('ID');
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'tRNA_primary_transcript',
+	     -source_tag  => $Default_Source,
+	     -start       => $s,
+	     -end         => $e, 
+	     -strand      => $strand,
+	     -seq_id      => $seqid,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+		 'Parent' => $parentid,
+		 'Note'   => $attr->{'{}COM_NAME'}->{'Value'},
+		 'anticodon' => $attr->{'{}ANTICODON'}->{'Value'},
+		 'pub_locus' => $attr->{'{}PUB_LOCUS'}->{'Value'},
+
+	     });
+	$parent->add_SeqFeature($f);
+	push  @{$self->{'_seendata'}->{'_feats'}}, $f;	
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);
+    } elsif( $name eq 'REPEAT_LIST' ) {
+    } elsif( $name eq 'REPEAT' ) {
+	my ($s,$e) = ($attr->{'{}COORDS'}->{'Value'} =~ /(\d+)\-(\d+)/);
+	my $strand = 1;
+	if( $s > $e) { 
+	    ($s,$e,$strand) = ( $e,$s,-1);
+	}
+	my $f = Bio::SeqFeature::Generic->new
+	    (-primary_tag => 'simple_repeat',
+	     -source_tag  => $Default_Source,
+	     -seq_id      => $seqid,
+	     -start       => $s,
+	     -end         => $e, 
+	     -stand       => $strand,
+	     -tag         => {
+		 'ID'     => $attr->{'{}FEAT_NAME'}->{'Value'},
+	     });
+
+	push @{$self->{'_seendata'}->{'_feats'}}, $f;
+	$self->{'_seendata'}->{'_seqs'}->[-1]->add_SeqFeature($f);	
+    } elsif ( $name  eq 'AUTHOR' ) {
+    } elsif( $name eq 'GB_DESCRIPTION' ) {
+	
+    } elsif( $name eq 'GB_COMMENT' ) {
+    } elsif( $name eq 'LINEAGE' ) {
+	
+    } else { 
+	$self->warn("Unknown element $name, ignored\n");
+    }
+    push @{$self->{'_state'}}, $name;
+    $self->SUPER::start_element($ele);
+}
+
+sub end_element {
+    my ($self,$ele) = @_;
+    pop @{$self->{'_state'}};
+    my $name = $ele->{'LocalName'};
+    my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+    if( $name eq 'AUTHOR_LIST' ) {
+	if( $curseq->can('annotation') ) {
+	    $curseq->annotation->add_Annotation
+		('reference',Bio::Annotation::Reference->new
+		 (-authors => join(',',@{$self->{'_seendata'}->{'_authors'}}))
+		 );	    
+	}
+	$self->{'_seendata'}->{'_authors'} = [];
+    } elsif( $name eq 'ASSEMBLY' ) {
+	if( @{$self->{'_seendata'}->{'_feats'} || []} ) {
+	    $self->warn("Leftover features which were not finished!");
+	}
+	$self->debug("end element for ASSEMBLY ". $curseq->display_id. "\n");
+    } elsif( $name eq 'TU' || 
+	     $name eq 'TRNA'  || $name eq 'PRE-TRNA' || 
+	     $name eq 'REPEAT' ) {
+	pop @{$self->{'_seendata'}->{'_feats'}};
+    } elsif( $name eq 'MODEL' ) {
+	# This is all to for adding UTRs	
+
+	my $model = pop @{$self->{'_seendata'}->{'_feats'}};
+	my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+	# sort smallest to largest, don't forget about 
+	# strandedness
+	my ($parentid) = $model->get_tag_values('Parent');	
+
+	my @features = $model->get_SeqFeatures();
+	my @exons = sort { $a->start <=> $b->start } 
+  	            grep { $_->primary_tag eq 'exon' } @features;
+	
+        my @cdsexons = sort { $a->start <=> $b->start } 
+	               grep { $_->primary_tag eq 'CDS' } @features;
+	
+	# look at the exons, find those which come after the model start
+	my $cdsexon = shift @cdsexons;	
+	my $exon = shift @exons; # first exon
+	if( ! defined $cdsexon ) { 
+	    $self->warn( "no CDS exons $parentid!");
+	    return;
+	} elsif( ! defined $exon ) { 
+	    $self->warn("no exons $parentid!" );
+	    return;
+	}
+	my $utrct = 1;
+	while( defined $exon && $exon->start < $cdsexon->start ) {
+	    my ($pid) = $exon->get_tag_values('Parent');
+	    $self->debug("LeftPhase: tu-id $parentid mrna-id $pid exon is ".
+			 $exon->location->to_FTstring. 
+			 " CDSexon is ".$cdsexon->location->to_FTstring."\n");
+	    
+	    my $utr = Bio::SeqFeature::Generic->new
+		(-seq_id      => $exon->seq_id,
+		 -strand      => $exon->strand,
+		 -primary_tag => $exon->strand > 0 ? "five_prime_UTR" : "three_prime_UTR",
+		 -source_tag  => $Default_Source,
+		 -tag         => { 
+		     'ID'     => "$pid.UTR".$utrct++,
+		     'Parent' => $pid },
+		 );
+	    my ($ns,$ne);
+	    if( $utr->primary_tag eq 'five_prime_UTR' ) {
+		$ns = $exon->start;
+		$ne = min ( $exon->end, $cdsexon->start - 1);
+	    } else {
+		$ne = min( $exon->end, $cdsexon->start - 1);
+		$ns = $exon->start;
+	    }
+	    $utr->start($ns); $utr->end($ne);	    
+	    $model->add_SeqFeature($utr);
+	    $curseq->add_SeqFeature($utr);
+	    $exon = shift @exons;
+	}
+	@exons = sort { $a->start <=> $b->start } 
+	         grep {$_->primary_tag eq 'exon' } @features;
+        @cdsexons = sort { $a->start <=> $b->start } 
+	            grep { $_->primary_tag eq 'CDS' } @features;
+	
+	$cdsexon = pop @cdsexons;
+	$exon = pop @exons;
+	if( ! defined $cdsexon ) { 
+	    $self->warn( "no CDS exons $parentid!");
+	    return;
+	} elsif( ! defined $exon ) { 
+	    $self->warn("no exons $parentid!" );
+	    return;
+	}
+	$utrct = 1;
+	while( defined $exon &&$exon->end > $cdsexon->end ) { 
+	    my ($pid) = $exon->get_tag_values('Parent');
+	    $self->debug("RightPhase: tu-id $parentid mrna-id $pid exon is ".
+			 $exon->location->to_FTstring. 
+			 " CDSexon is ".$cdsexon->location->to_FTstring."\n");
+	    
+	    my $utr = Bio::SeqFeature::Generic->new
+	       (-seq_id      => $exon->seq_id,
+		-strand      => $exon->strand,
+		-primary_tag => $exon->strand < 0 ? "five_prime_UTR" : "three_prime_UTR",
+		-source_tag  => $Default_Source,
+		-tag         => { 
+		    'Parent' => $pid,
+		    'ID'     => "$pid.UTR".$utrct++,
+		}
+		);
+	    my ($ns,$ne);
+	    if( $utr->primary_tag eq 'three_prime_UTR' ) {		
+		$ns = max ( $exon->start, $cdsexon->end + 1);
+		$ne = $exon->end;		
+	    } else {		
+		$ns = $cdsexon->end+1;
+		$ne = max ( $exon->end, $cdsexon->start + 1);
+	    }
+	    $utr->start($ns); $utr->end($ne);
+	    
+	    $model->add_SeqFeature($utr);
+	    $curseq->add_SeqFeature($utr);
+	    $exon = pop @exons;
+	}
+    }
+    $self->SUPER::end_element($ele);
+}
+
+sub characters {
+    my ($self,$data) = @_;
+    if( ! @{$self->{'_state'}} ) {
+	$self->warn("Calling characters with no previous start_element call. Ignoring data");
+    } else {
+	my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
+	my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
+	my $name = $self->{'_state'}->[-1];	
+	if( defined $curseq ) { 
+	    if( $name eq 'CLONE_NAME' ) {
+		$self->debug("Clone name is ",$data->{'Data'}, "\n");
+		$curseq->display_id($data->{'Data'});
+	    } elsif( $name eq 'ORGANISM' ) {
+		my ($genus,$species,$subspec) = split(/\s+/,$data->{Data},3);
+		$curseq->species(Bio::Species->new(
+						   -classification => 
+						   [$species,$genus],
+						   -sub_species => $species));
+	    } elsif( $name eq 'LINEAGE' ) {
+		$curseq->species->classification( 
+				  [ 
+				    $curseq->species->species,
+				    $curseq->species->genus, 
+				    reverse  (map { s/^\s+//; 
+						    s/\s+$//; $_; } 
+					      split /[;\.]+/,$data->{'Data'} ),
+				    ]
+				  );
+	    } elsif( $name eq 'AUTHOR' ) {
+		push @{$self->{'_seendata'}->{'_authors'}}, $data->{'Data'};
+	    }
+	}
+	if( defined $curfeat ) {
+	    if( $name eq 'EXON' ) { # exon feature
+	    } elsif( $name eq 'RNA-EXON' ) {
+		
+	    } elsif( $name eq 'PROTEIN_SEQ' ) { 
+		$curfeat->add_tag_value('translation',$data->{'Data'});
+	    } elsif( $name eq 'CDS' ) {
+	    } elsif( $name eq 'PRE-TRNA' ) { # tRNA gene
+	    } elsif( $name eq 'TRNA' ) { # tRNA transcript
+	    } elsif( $name eq 'REPEAT_LIST' ) {
+	    } elsif( $name eq 'REPEAT' ) {
+		$curfeat->add_tag_value('Note',$data->{'Data'});
+	    } elsif( $name eq 'GB_COMMENT' ) {
+		$curseq->annotation->add_Annotation
+		    ('comment',
+		     Bio::Annotation::Comment->new(-text => $data->{'Data'}));
+	    } elsif( $name eq 'GB_DESCRIPTION' ) {
+		$curseq->description($data->{'Data'});
+	    }
+	}
+    }
+    $self->SUPER::characters($data);
+}
+
+
+sub assert { 
+    my ($self,$test,$msg) = @_;
+    $self->throw($msg) unless $test;
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq/tinyseqHandler.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq/tinyseqHandler.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq/tinyseqHandler.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,268 @@
+# BioPerl module for Bio::SeqIO::tinyseqHandler
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Bristol-Myers Squibb
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::tinyseq::tinyseqHandler - XML event handlers to support NCBI TinySeq XML parsing
+
+=head1 SYNOPSIS
+
+Do not use this module directly; use the SeqIO handler system:
+
+  $stream = Bio::SeqIO->new( -file => $filename, -format => 'tinyseq' );
+
+  while ( my $seq = $stream->next_seq ) {
+    ....
+  }
+
+=head1 DESCRIPTION
+
+This object provides event handler methods for parsing sequence files
+in the NCBI TinySeq XML format.  A TinySeq is a lightweight XML file
+of sequence information on one or more sequences, analgous to FASTA
+format.
+
+See L<http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.mod.dtd> for the DTD.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 SEE ALSO
+
+L<Bio::SeqIO>, L<Bio::Seq>.
+
+=head1 AUTHOR
+
+Donald Jackson, E<lt>donald.jackson at bms.comE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::tinyseq::tinyseqHandler;
+
+use strict;
+use warnings;
+
+
+use vars qw(%ATTMAP);
+
+use base qw(Bio::Root::Root);
+
+# %ATTMAP defines correspondence between TSeq elements, PrimarySeq attributes
+# Format: element_name => { xml_attname => pseq_attname }
+%ATTMAP	= ( TSeq_sequence	=> { Data	=> '-seq'},
+	    TSeq_gi		=> { Data	=> '-primary_id' },
+	    TSeq_defline	=> { Data	=> '-desc' },
+	    TSeq_sid		=> { Data	=> '-sid' },
+	    TSeq_accver		=> { Data	=> '-accver' },
+	    TSeq_taxid		=> { Data	=> '-taxid' },
+	    TSeq_orgname	=> { Data	=> '-organism' }
+	   );
+
+=head2 new
+
+  Title		: new
+  Usage		: $handler = Bio::SeqIO::tinyseq::tinyseqHandler->new()
+  Function	: instantiates a tinyseqHandler for use by
+                  XML::Parser::PerlSAX
+  Returns	: Bio::SeqIO::tinyseq::tinyseqHandler object
+  Args		: NONE
+
+=cut
+
+sub new {
+    my ($proto, @args) = @_;
+    my $class = ref($proto) || $proto;
+
+    my $self =  bless({}, $class);
+
+    return $self;
+}
+
+#######################################
+# Event hadling methods for PerlSax   #
+#######################################
+
+sub doctype_decl {
+    my ($self, $doctype) = @_;
+    # make sure we have a tinyseq
+    unless ($doctype->{'SystemId'} eq 'http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd') {
+	$self->throw("This document doesn't use the NCBI TinySeq dtd; it's a ", $doctype->{'SystemId'} );
+    }
+
+}
+
+=head2 start_document
+
+  Title		: start_document
+  Usage		: NONE
+  Function	: start_document handler for use by XML::Parser::PerlSAX
+  Returns	: NONE
+  Args		: NONE
+
+=cut
+
+sub start_document {
+    my ($self) = @_;
+
+    $self->{'_seqatts'} = [];
+    $self->{'_elements'} = [];
+}
+
+=head2 end_document
+
+  Title		: end_document
+  Usage		: NONE
+  Function	: end_document handler for use by XML::Parser::PerlSAX
+  Returns	: NONE
+  Args		: NONE
+
+=cut
+
+sub end_document {
+    my ($self) = @_;
+    return $self->{'_seqatts'};
+}
+
+=head2 start_element
+
+  Title		: start_element
+  Usage		: NONE
+  Function	: start_element handler for use by XML::Parser::PerlSAX
+  Returns	: NONE
+  Args		: NONE
+
+=cut
+
+sub start_element {
+    my ($self, $starting) = @_;
+
+    push(@{$self->{'_elements'}}, $starting);
+}
+
+=head2 end_element
+
+  Title		: end_element
+  Usage		: NONE
+  Function	: end_element handler for use by XML::Parser::PerlSAX
+  Returns	: NONE
+  Args		: NONE
+
+=cut
+
+sub end_element {
+    my ($self, $ending) = @_;
+
+    # do I have a handler for this element?
+    my $ename = $ending->{'Name'};
+    $self->$ename if ($self->can($ename));
+}
+
+=head2 characters
+
+  Title		: characters
+  Usage		: NONE
+  Function	: characters handler for use by XML::Parser::PerlSAX
+  Returns	: NONE
+  Args		: NONE
+
+=cut
+
+sub characters {
+    my ($self, $characters) = @_;
+
+    my $data = $characters->{'Data'};
+
+    return unless (defined($data) and $data =~ /\S/);
+
+    my $current = $self->_current_element;
+    $current->{'Data'} = $data;
+}
+
+
+###########################################
+# Element-specific handlers
+# called at END of element name
+##########################################
+
+=head2 TSeq
+
+  Title		: TSeq
+  Usage		: NONE
+  Function	: event handler for END of a TSeq element
+  Returns	: loh of parsed sequence atts for Bio::SeqIO::tinyseq
+  Args		: NONE
+
+=cut
+
+sub TSeq {
+    my ($self) = @_;
+
+    my %seqatts;
+
+    # map elements onto PrimarySeq keys
+    while (my $element = pop @{ $self->{'_elements'} }) {
+	my $element_name = $element->{'Name'};
+	last if ($element_name eq 'TSeq');
+
+	my $conversion = $ATTMAP{$element_name} or next;
+
+	while(my($element_att, $pseq_att) = each %$conversion) {
+	    $seqatts{$pseq_att} = $element->{$element_att};
+	}
+    }
+
+    push(@{ $self->{'_seqatts'} }, \%seqatts);
+
+}
+
+#############################################
+# Utility method to return current element info
+##############################################
+
+=head2 _current_element
+
+  Title		: _current_element
+  Usage		: Internal method
+  Function	: Utility method to return current element info
+  Returns	: XML::Parser::PerlSAX hash for current element
+  Args		: NONE
+
+=cut
+
+sub _current_element {
+    my ($self) = @_;
+    return $self->{'_elements'}->[-1];
+}
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/tinyseq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,400 @@
+# BioPerl module for Bio::SeqIO::tinyseq
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Bristol-Myers Squibb
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::tinyseq - reading/writing sequences in NCBI TinySeq format
+
+=head1 SYNOPSIS
+
+Do not use this module directly; use the SeqIO handler system:
+
+  $stream = Bio::SeqIO->new( -file => $filename, -format => 'tinyseq' );
+
+  while ( my $seq = $stream->next_seq ) {
+    ....
+  }
+
+=head1 DESCRIPTION
+
+This object reads and writes Bio::Seq objects to and from TinySeq XML
+format.  A TinySeq is a lightweight XML file of sequence information,
+analgous to FASTA format.
+
+See L<http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.mod.dtd> for the DTD.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.bioperl.org
+
+=head1 SEE ALSO
+
+L<Bio::SeqIO>, L<Bio::Seq>.
+
+=head1 AUTHOR
+
+Donald Jackson, E<lt>donald.jackson at bms.comE<gt>
+
+Parts of this module and the test script were patterned after Sheldon
+McKay's L<Bio::SeqIO::game>.  If it breaks, however, it's my fault not
+his ;).
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::SeqIO::tinyseq;
+
+use strict;
+use Bio::Root::Root;
+use Bio::Seq::SeqFastaSpeedFactory;
+use Bio::Species;
+use Bio::SeqIO::tinyseq::tinyseqHandler;
+use XML::Parser::PerlSAX;
+use XML::Writer;
+use Bio::Root::Version;
+
+our $VERSION = ${Bio::Root::Version::VERSION};
+
+use base qw(Bio::SeqIO);
+
+sub _initialize {
+    my ($self, @args) = @_;
+
+    $self->SUPER::_initialize(@args);
+
+    unless (defined $self->sequence_factory) {
+	$self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
+    }
+
+    $self->{'_species_objects'} = {};
+    $self->{_parsed} = 0;
+}
+
+=head2 next_seq
+
+  Title		: next_seq
+  Usage		: $seq = $stream->next_seq()
+  Function	: returns the next sequence in the stream
+  Returns	: Bio::Seq object
+  Args		: NONE
+
+=cut
+
+sub next_seq {
+    my ($self) = @_;
+
+    $self->_get_seqs() unless ($self->{_parsed});
+
+    return shift @{$self->{_seqlist}};
+}
+
+=head2 write_seq
+
+  Title		: write_seq
+  Usage		: $seq = $stream->write_seq(@sequence_objects); undef $stream
+  Function	: outputs one or more sequence objects as TinySeq XML
+  Returns	: 1 on success
+  Args		: one or more sequence objects as TinySeq XML
+
+Because the TSeq dtd includes closing tags after all sets are written,
+the output will not be complete until the program terminates or the
+object is forced out of scope (see close_writer()).  May not perfectly
+reproduce TSeq_sid element for all sequences
+
+=cut
+
+sub write_seq {
+    my ($self, @seqobjs) = @_;
+
+    $self->throw('write_seq must be called with at least one  Bio::SeqI or Bio::PrimarySeqI compliant object')
+	unless (@seqobjs and ( $seqobjs[0]->isa('Bio::SeqI') || $seqobjs[0]->isa('Bio::PrimarySeqI')));
+
+    my $writer = $self->_get_writer;
+
+    foreach my $seqobj (@seqobjs) {
+	my ($id_element, $id_value) = $self->_get_idstring($seqobj);
+ 	$writer->startTag('TSeq');
+	$writer->emptyTag('TSeq_seqtype', value => $self->_convert_seqtype($seqobj));
+	$writer->dataElement('TSeq_gi', $seqobj->primary_id || '');
+	$writer->dataElement($id_element, $id_value);
+	#$writer->dataElement('TSeq_orgname', $seqobj->taxid) if ($seqobj->can('taxid'); # just a placeholder
+	$writer->dataElement('TSeq_defline', $seqobj->desc);
+	$writer->dataElement('TSeq_length', $seqobj->length);
+	$writer->dataElement('TSeq_sequence', $seqobj->seq);
+
+	if ($seqobj->can('species') && $seqobj->species) {
+	    $self->_write_species($writer, $seqobj->species);
+	}
+
+	$writer->endTag('TSeq');
+    }
+    1;
+}
+
+=head2 _get_seqs
+
+  Title		: _get_seqs
+  Usage		: Internal function - use next_seq() instead
+  Function	: parses the XML and creates Bio::Seq objects
+  Returns	: 1 on success
+  Args		: NONE
+
+Currently stores all sequence objects into memory.  I will work on do
+more of a stream-based approach
+
+=cut
+
+sub _get_seqs {
+    my ($self) = @_;
+
+    my $fh = $self->_fh;
+
+    my $handler = Bio::SeqIO::tinyseq::tinyseqHandler->new();
+    my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
+
+    my @seqatts = $parser->parse( Source => { ByteStream => $fh });
+
+    my $factory = $self->sequence_factory;
+
+    $self->{_seqlist} ||= [];
+    foreach my $seqatt(@seqatts) {
+	foreach my $subatt(@$seqatt) { # why are there two hashes?
+	    my $seqobj = $factory->create(%$subatt);
+	    $self->_assign_identifier($seqobj, $subatt);
+
+	    if ($seqobj->can('species')) {
+# 		my $class = [reverse(split(/ /, $subatt->{'-organism'}))];
+# 		my $species = Bio::Species->new( -classification	=> $class,
+# 						 -ncbi_taxid		=> $subatt->{'-taxid'} );
+		my $species = $self->_get_species($subatt->{'-organism'}, $subatt->{'-taxid'});
+		$seqobj->species($species) if ($species);
+	    }
+
+	    push(@{$self->{_seqlist}}, $seqobj);
+	}
+    }
+    $self->{_parsed} = 1;
+}
+
+=head2 _get_species
+
+  Title		: _get_species
+  Usage		: Internal function
+  Function	: gets a Bio::Species object from cache or creates as needed
+  Returns	: a Bio::Species object on success, undef on failure
+  Args		: a classification string (eg 'Homo sapiens') and
+                  a NCBI taxon id (optional)
+
+Objects are cached for parsing multiple sequence files.
+
+=cut
+
+sub _get_species {
+     my ($self, $orgname, $taxid) = @_;
+
+     unless ($self->{'_species_objects'}->{$orgname}) {
+	 my $species = $self->_create_species($orgname, $taxid);
+	 $self->{'_species_objects'}->{$orgname} = $species;
+     }
+     return $self->{'_species_objects'}->{$orgname};
+}
+
+=head2 _create_species
+
+  Title		: _create_species
+  Usage		: Internal function
+  Function	: creates a Bio::Species object
+  Returns	: a Bio::Species object on success, undef on failure
+  Args		: a classification string (eg 'Homo sapiens') and
+                  a NCBI taxon id (optional)
+
+=cut
+
+sub _create_species {
+    my ($self, $orgname, $taxid) = @_;
+    return unless ($orgname); # not required in TinySeq dtd so don't throw an error
+
+    my %params;
+    $params{'-classification'} = [reverse(split(/ /, $orgname))];
+    $params{'-ncbi_taxid'} = $taxid if ($taxid);
+
+    my $species = Bio::Species->new(%params)
+	or return;
+
+    return $species;
+}
+
+
+=head2 _assign_identifier
+
+  Title		: _assign_identifier
+  Usage		: Internal function
+  Function	: looks for sequence accession
+  Returns	: 1 on success
+  Args		: NONE
+
+NCBI puts refseq accessions in TSeq_sid, others in TSeq_accver.
+
+=cut
+
+sub _assign_identifier {
+    my ($self, $seqobj, $atts) = @_;
+    my ($accession, $version);
+
+   if ($atts->{'-accver'}) {
+	($accession, $version) = split(/\./, $atts->{'-accver'});;
+    }
+    elsif ($atts->{'-sid'}) {
+	my $sidstring =$atts->{'-sid'};
+	$sidstring =~ s/^.+?\|//;
+	$sidstring =~ s/\|[^\|]*//;
+	($accession, $version) = split(/\./, $sidstring);;
+    }
+    else {
+	$self->throw('NO accession information found for this sequence');
+    }
+    $seqobj->accession_number($accession) if ($seqobj->can('accession_number'));
+    $seqobj->version($version) if ($seqobj->can('version'));
+
+}
+
+=head2 _convert_seqtype
+
+  Title		: _convert_seqtype
+  Usage		: Internal function
+  Function	: maps Bio::Seq::alphabet() values [dna/rna/protein] onto
+                  TSeq_seqtype values [protein/nucleotide]
+
+=cut
+
+sub _convert_seqtype {
+    my ($self, $seqobj) = @_;
+
+    return 'protein' 	if ($seqobj->alphabet eq 'protein');
+    return 'nucleotide' if ($seqobj->alphabet eq 'dna');
+    return 'nucleotide' if ($seqobj->alphabet eq 'rna');
+
+    # if we get here there's a problem!
+    $self->throw("Alphabet not defined, can't assign type for $seqobj");
+}
+
+=head2 _get_idstring
+
+  Title		: _get_idstring
+  Usage		: Internal function
+  Function	: parse accession and version info from TSeq_accver
+                  or TSeq_sid
+
+=cut
+
+sub _get_idstring {
+    # NCBI puts refseq ids in TSeq_sid, others in TSeq_accver.  No idea why.
+    my ($self, $seqobj) = @_;
+    my $accver = $seqobj->accession_number;
+    $accver .= '.' . $seqobj->version if ($seqobj->can('version') and $seqobj->version);
+    if ($accver =~ /^(NM_|NP_|XM_|XP_|NT_|NC_|NG_)/) {
+	return ('TSeq_sid', join('|', 'ref', $accver, ''));
+    }
+    else {
+	return ('TSeq_accver', $accver);
+    }
+}
+
+=head2 _get_writer
+
+  Title		: _get_writer
+  Usage		: Internal function
+  Function	: instantiate XML::Writer object if needed,
+                  output initial XML
+
+=cut
+
+sub _get_writer {
+    # initialize writer, start doc so write_seq can work one at a time
+    my ($self) = @_;
+
+    unless ($self->{_writer}) {
+	my $fh = $self->_fh;
+	my $writer = XML::Writer->new(OUTPUT 		=> $fh,
+				      DATA_MODE		=> 1,
+				      DATA_INDENT	=> 2,
+				      NEWLINE		=> 1,
+				      );
+	$writer->doctype('TSeqSet', '-//NCBI//NCBI TSeq/EN', 'http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd');
+
+	$writer->comment("Generated by Bio::SeqIO::tinyseq VERSION $VERSION");
+
+	$writer->startTag('TSeqSet');
+
+	$self->{_writer} = $writer;
+    }
+    return $self->{_writer};
+}
+
+=head2 close_writer
+
+  Title		: close_writer
+  Usage		: $self->close_writer()
+  Function	: terminate XML output
+  Args		: NONE
+  Returns	: 1 on success
+
+Called automatically by DESTROY when object goes out of scope
+
+=cut
+
+sub close_writer {
+    # close out any dangling writer
+    my ($self) = @_;
+    if ($self->{_writer}) {
+	my $writer = $self->{_writer};
+	$writer->endTag('TSeqSet');
+	$writer->end;
+	undef $writer;
+    }
+    close($self->_fh) if ($self->_fh);
+    1;
+}
+
+sub _write_species {
+    my ($self, $writer, $species) = @_;
+    $writer->dataElement('TSeq_orgname', $species->binomial);
+    $writer->dataElement('TSeq_taxid', $species->ncbi_taxid)
+	if($species->ncbi_taxid);
+}
+
+sub DESTROY {
+    # primarily to close out a writer!
+    my ($self) = @_;
+    $self->close_writer;
+    undef $self;
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ztr.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ztr.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO/ztr.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# $Id: ztr.pm,v 1.14.4.1 2006/10/02 23:10:30 sendu Exp $
+# BioPerl module for Bio::SeqIO::ztr
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO::ztr - ztr trace sequence input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::SeqIO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Seq objects to and from ztr trace
+files.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Aaron Mackey
+
+Email: amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::SeqIO::ztr;
+use vars qw(@ISA $READ_AVAIL);
+use strict;
+
+use Bio::SeqIO;
+use Bio::Seq::SeqFactory;
+
+push @ISA, qw( Bio::SeqIO );
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+    if ($@) {
+	$READ_AVAIL = 0;
+    } else {
+	push @ISA, "Bio::SeqIO::staden::read";
+	$READ_AVAIL = 1;
+    }
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);  
+  if( ! defined $self->sequence_factory ) {
+      $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::Seq::Quality'));      
+  }
+
+  my ($compression) = $self->_rearrange([qw[COMPRESSION]], @args);
+  $compression = 2 unless defined $compression;
+  $self->compression($compression);
+
+  unless ($READ_AVAIL) {
+      Bio::Root::Root->throw( -class => 'Bio::Root::SystemException',
+			      -text  => "Bio::SeqIO::staden::read is not available; make sure the bioperl-ext package has been installed successfully!"
+			    );
+  }
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = $stream->next_seq()
+ Function: returns the next sequence in the stream
+ Returns : Bio::Seq::Quality object
+ Args    : NONE
+
+=cut
+
+sub next_seq {
+
+    my ($self) = @_;
+
+    my ($seq, $id, $desc, $qual) = $self->read_trace($self->_fh, 'ztr');
+
+    # create the seq object
+    $seq = $self->sequence_factory->create(-seq        => $seq,
+					   -id         => $id,
+					   -primary_id => $id,
+					   -desc       => $desc,
+					   -alphabet   => 'DNA',
+					   -qual       => $qual
+					   );
+    return $seq;
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq(@seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+
+=cut
+
+sub write_seq {
+    my ($self, at seq) = @_;
+
+    my $fh = $self->_fh;
+    foreach my $seq (@seq) {
+	$self->write_trace($fh, $seq, 'ztr' . $self->compression);
+    }
+
+    $self->flush if $self->_flush_on_write && defined $self->_fh;
+    return 1;
+}
+
+=head2 compression
+
+ Title   : compression
+ Usage   : $stream->compression(3);
+ Function: determines the level of ZTR compression
+ Returns : the current (or newly set) value.
+ Args    : 1, 2 or 3 - any other (defined) value will cause the compression
+           to be reset to the default of 2.
+
+
+=cut
+
+sub compression {
+
+    my ($self, $val) = @_;
+
+    if (defined $val) {
+	if ($val =~ m/^1|2|3$/o) {
+	    $self->{_compression} = $val;
+	} else {
+	    $self->{_compression} = 2;
+	}
+    }
+
+    return $self->{_compression};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,767 @@
+# $Id: SeqIO.pm,v 1.92.4.5 2006/12/05 20:54:39 sendu Exp $
+#
+# BioPerl module for Bio::SeqIO
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#       and Lincoln Stein  <lstein at cshl.org>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqIO - Handler for SeqIO Formats
+
+=head1 SYNOPSIS
+
+    use Bio::SeqIO;
+
+    $in  = Bio::SeqIO->new(-file => "inputfilename" ,
+                           -format => 'Fasta');
+    $out = Bio::SeqIO->new(-file => ">outputfilename" ,
+                           -format => 'EMBL');
+
+    while ( my $seq = $in->next_seq() ) {
+	    $out->write_seq($seq);
+    }
+
+  # Now, to actually get at the sequence object, use the standard Bio::Seq
+  # methods (look at Bio::Seq if you don't know what they are)
+
+    use Bio::SeqIO;
+
+    $in  = Bio::SeqIO->new(-file => "inputfilename" ,
+                           -format => 'genbank');
+
+    while ( my $seq = $in->next_seq() ) {
+       print "Sequence ",$seq->id, " first 10 bases ",
+             $seq->subseq(1,10), "\n";
+    }
+
+
+  # The SeqIO system does have a filehandle binding. Most people find this
+  # a little confusing, but it does mean you can write the world's
+  # smallest reformatter
+
+    use Bio::SeqIO;
+
+    $in  = Bio::SeqIO->newFh(-file => "inputfilename" ,
+                             -format => 'Fasta');
+    $out = Bio::SeqIO->newFh(-format => 'EMBL');
+
+    # World's shortest Fasta<->EMBL format converter:
+    print $out $_ while <$in>;
+
+
+=head1 DESCRIPTION
+
+Bio::SeqIO is a handler module for the formats in the SeqIO set (eg,
+Bio::SeqIO::fasta). It is the officially sanctioned way of getting at
+the format objects, which most people should use.
+
+The Bio::SeqIO system can be thought of like biological file handles.
+They are attached to filehandles with smart formatting rules (eg,
+genbank format, or EMBL format, or binary trace file format) and
+can either read or write sequence objects (Bio::Seq objects, or
+more correctly, Bio::SeqI implementing objects, of which Bio::Seq is
+one such object). If you want to know what to do with a Bio::Seq
+object, read L<Bio::Seq>.
+
+The idea is that you request a stream object for a particular format.
+All the stream objects have a notion of an internal file that is read
+from or written to. A particular SeqIO object instance is configured
+for either input or output. A specific example of a stream object is
+the Bio::SeqIO::fasta object.
+
+Each stream object has functions
+
+   $stream->next_seq();
+
+and
+
+   $stream->write_seq($seq);
+
+As an added bonus, you can recover a filehandle that is tied to the
+SeqIO object, allowing you to use the standard E<lt>E<gt> and print
+operations to read and write sequence objects:
+
+    use Bio::SeqIO;
+
+    $stream = Bio::SeqIO->newFh(-format => 'Fasta',
+                                -fh     => \*ARGV);
+    # read from standard input or the input filenames
+
+    while ( $seq = <$stream> ) {
+	  # do something with $seq
+    }
+
+and
+
+    print $stream $seq; # when stream is in output mode
+
+This makes the simplest ever reformatter
+
+    #!/usr/bin/perl
+
+    $format1 = shift;
+    $format2 = shift || die
+       "Usage: reformat format1 format2 < input > output";
+
+    use Bio::SeqIO;
+
+    $in  = Bio::SeqIO->newFh(-format => $format1, -fh => \*ARGV );
+    $out = Bio::SeqIO->newFh(-format => $format2 );
+    # Note: you might want to quote -format to keep older
+    # perl's from complaining.
+
+    print $out $_ while <$in>;
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::SeqIO-E<gt>new()
+
+   $seqIO = Bio::SeqIO->new(-file => 'filename',   -format=>$format);
+   $seqIO = Bio::SeqIO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $seqIO = Bio::SeqIO->new(-format => $format);
+
+The new() class method constructs a new Bio::SeqIO object.  The
+returned object can be used to retrieve or print Seq objects. new()
+accepts the following parameters:
+
+=over 5
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $seqIO = Bio::SeqIO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+A string filehandle is handy if you want to modify the output in the
+memory, before printing it out. The following program reads in EMBL
+formatted entries from a file and prints them out in fasta format with
+some HTML tags:
+
+  use Bio::SeqIO;
+  use IO::String;
+  my $in  = Bio::SeqIO->new(-file => "emblfile",
+  			                   -format => 'EMBL');
+  while ( my $seq = $in->next_seq() ) {
+      # the output handle is reset for every file
+      my $stringio = IO::String->new($string);
+      my $out = Bio::SeqIO->new(-fh => $stringio,
+  			                       -format => 'fasta');
+      # output goes into $string
+      $out->write_seq($seq);
+      # modify $string
+      $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
+      # print into STDOUT
+      print $string;
+  }
+
+=item -format
+
+Specify the format of the file.  Supported formats include fasta,
+genbank, embl, swiss (SwissProt), Entrez Gene and tracefile formats
+such as abi (ABI) and scf. There are many more, for a complete listing
+see the SeqIO HOWTO (L<http://bioperl.open-bio.org/wiki/HOWTO:SeqIO>).
+
+If no format is specified and a filename is given then the module
+will attempt to deduce the format from the filename suffix. If
+there is no suffix that Bioperl understands then it will attempt
+to guess the format based on file content. If this is unsuccessful
+then Fasta format is assumed.
+
+The format name is case-insensitive: 'FASTA', 'Fasta' and 'fasta' are
+all valid.
+
+Currently, the tracefile formats (except for SCF) require installation
+of the external Staden "io_lib" package, as well as the
+Bio::SeqIO::staden::read package available from the bioperl-ext
+repository.
+
+=item -alphabet
+
+Sets the alphabet ('dna', 'rna', or 'protein'). When the alphabet is
+set then Bioperl will not attempt to guess what the alphabet is. This
+may be important because Bioperl does not always guess correctly.
+
+=item -flush
+
+By default, all files (or filehandles) opened for writing sequences
+will be flushed after each write_seq() (making the file immediately
+usable).  If you do not need this facility and would like to marginally
+improve the efficiency of writing multiple sequences to the same file
+(or filehandle), pass the -flush option '0' or any other value that
+evaluates as defined but false:
+
+  my $gb = new Bio::SeqIO -file   => "<gball.gbk",
+                          -format => "gb";
+  my $fa = new Bio::SeqIO -file   => ">gball.fa",
+                          -format => "fasta",
+                          -flush  => 0; # go as fast as we can!
+  while($seq = $gb->next_seq) { $fa->write_seq($seq) }
+
+
+=back
+
+=head2 Bio::SeqIO-E<gt>newFh()
+
+   $fh = Bio::SeqIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
+   $fh = Bio::SeqIO->newFh(-format => $format);
+   # etc.
+
+This constructor behaves like new(), but returns a tied filehandle
+rather than a Bio::SeqIO object.  You can read sequences from this
+object using the familiar E<lt>E<gt> operator, and write to it using
+print().  The usual array and $_ semantics work.  For example, you can
+read all sequence objects into an array like this:
+
+  @sequences = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and
+printf() are not supported.
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $sequence = $seqIO-E<gt>next_seq()
+
+Fetch the next sequence from the stream.
+
+=head2 $seqIO-E<gt>write_seq($sequence [,$another_sequence,...])
+
+Write the specified sequence(s) to the stream.
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+to one of the Bioperl mailing lists.
+
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, Lincoln Stein
+
+Email birney at ebi.ac.uk
+      lstein at cshl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#' Let the code begin...
+
+package Bio::SeqIO;
+
+use strict;
+
+use Bio::Factory::FTLocationFactory;
+use Bio::Seq::SeqBuilder;
+use Bio::Tools::GuessSeqFormat;
+use Symbol();
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI);
+
+sub BEGIN {
+    eval { require Bio::SeqIO::staden::read; };
+}
+
+my %valid_alphabet_cache;
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::SeqIO->new(-file => $filename,
+                                     -format => 'Format')
+ Function: Returns a new sequence stream
+ Returns : A Bio::SeqIO stream initialised with the appropriate format
+ Args    : Named parameters:
+             -file => $filename
+             -fh => filehandle to attach to
+             -format => format
+
+           Additional arguments may be used to set factories and
+           builders involved in the sequence object creation. None of
+           these must be provided, they all have reasonable defaults.
+             -seqfactory   the Bio::Factory::SequenceFactoryI object
+             -locfactory   the Bio::Factory::LocationFactoryI object
+             -objbuilder   the Bio::Factory::ObjectBuilderI object
+
+See L<Bio::SeqIO::Handler>
+
+=cut
+
+my $entry = 0;
+
+sub new {
+	my ($caller, at args) = @_;
+	my $class = ref($caller) || $caller;
+
+	# or do we want to call SUPER on an object if $caller is an
+	# object?
+	if( $class =~ /Bio::SeqIO::(\S+)/ ) {
+		my ($self) = $class->SUPER::new(@args);
+		$self->_initialize(@args);
+		return $self;
+	} else {
+
+		my %param = @args;
+		@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+
+	if (!defined($param{-file}) && !defined($param{-fh})) {
+	  $class->throw("file argument provided, but with an undefined value") if exists($param{'-file'});
+	  $class->throw("fh argument provided, but with an undefined value") if (exists($param{'-fh'}));
+	}
+
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{-file} || $ARGV[0] );
+
+	if( ! $format ) {
+	    if ($param{-file}) {
+		$format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
+	    } elsif ($param{-fh}) {
+		$format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
+	    }
+	}
+	$format = "\L$format";	# normalize capitalization to lower case
+        $class->throw("Unknown format given or could not determine it [$format]")
+            unless $format;
+	return unless( $class->_load_format_module($format) );
+	return "Bio::SeqIO::$format"->new(@args);
+    }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to the Bio::SeqIO::Fh class
+ Args    :
+
+See L<Bio::SeqIO::Fh>
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $sequence = <$fh>;   # read a sequence object
+           print $fh $sequence; # write a sequence object
+ Returns : filehandle tied to Bio::SeqIO class
+ Args    : none
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+# _initialize is chained for all SeqIO classes
+
+sub _initialize {
+	my($self, @args) = @_;
+
+	# flush is initialized by the Root::IO init
+
+	my ($seqfact,$locfact,$objbuilder, $alphabet) =
+	  $self->_rearrange([qw(SEQFACTORY
+			      LOCFACTORY
+			      OBJBUILDER
+                  ALPHABET)
+							  ], @args);
+
+	$locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose)
+	  if ! $locfact;
+	$objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose)
+	  unless $objbuilder;
+	$self->sequence_builder($objbuilder);
+	$self->location_factory($locfact);
+
+	# note that this should come last because it propagates the sequence
+	# factory to the sequence builder
+	$seqfact && $self->sequence_factory($seqfact);
+        
+    #bug 2160
+    $alphabet && $self->alphabet($alphabet);
+
+
+	# initialize the IO part
+	$self->_initialize_io(@args);
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $seq = stream->next_seq
+ Function: Reads the next sequence object from the stream and returns it.
+
+           Certain driver modules may encounter entries in the stream
+           that are either misformatted or that use syntax not yet
+           understood by the driver. If such an incident is
+           recoverable, e.g., by dismissing a feature of a feature
+           table or some other non-mandatory part of an entry, the
+           driver will issue a warning. In the case of a
+           non-recoverable situation an exception will be thrown.  Do
+           not assume that you can resume parsing the same stream
+           after catching the exception. Note that you can always turn
+           recoverable errors into exceptions by calling
+           $stream->verbose(2).
+
+ Returns : a Bio::Seq sequence object
+ Args    : none
+
+See L<Bio::Root::RootI>, L<Bio::Factory::SeqStreamI>, L<Bio::Seq>
+
+=cut
+
+sub next_seq {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::SeqIO object.");
+}
+
+=head2 write_seq
+
+ Title   : write_seq
+ Usage   : $stream->write_seq($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Seq object
+
+=cut
+
+sub write_seq {
+    my ($self, $seq) = @_;
+    $self->throw("Sorry, you cannot write to a generic Bio::SeqIO object.");
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : $self->alphabet($newval)
+ Function: Set/get the molecule type for the Seq objects to be created.
+ Example : $seqio->alphabet('protein')
+ Returns : value of alphabet: 'dna', 'rna', or 'protein'
+ Args    : newvalue (optional)
+ Throws  : Exception if the argument is not one of 'dna', 'rna', or 'protein'
+
+=cut
+
+sub alphabet {
+   my ($self, $value) = @_;
+
+   if ( defined $value) {
+		$value = lc $value;
+		unless ($valid_alphabet_cache{$value}) {
+			# instead of hard-coding the allowed values once more, we check by
+			# creating a dummy sequence object
+			eval {
+				require Bio::PrimarySeq;
+				my $seq = Bio::PrimarySeq->new('-verbose' => $self->verbose,
+														 '-alphabet' => $value);
+			};
+			if ($@) {
+				$self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
+			}
+			$valid_alphabet_cache{$value} = 1;
+		}
+		$self->{'alphabet'} = $value;
+   }
+   return $self->{'alphabet'};
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL SeqIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+	my ($self, $format) = @_;
+	my $module = "Bio::SeqIO::" . $format;
+	my $ok;
+
+	eval {
+		$ok = $self->_load_module($module);
+	};
+	if ( $@ ) {
+		print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the SeqIO system please see the SeqIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+		;
+	}
+	return $ok;
+}
+
+=head2 _concatenate_lines
+
+ Title   : _concatenate_lines
+ Usage   : $s = _concatenate_lines($line, $continuation_line)
+ Function: Private. Concatenates two strings assuming that the second stems
+           from a continuation line of the first. Adds a space between both
+           unless the first ends with a dash.
+
+           Takes care of either arg being empty.
+ Example :
+ Returns : A string.
+ Args    :
+
+=cut
+
+sub _concatenate_lines {
+	my ($self, $s1, $s2) = @_;
+
+	$s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
+	return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function: This method is deprecated. Call _fh() instead.
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+=cut
+
+sub _filehandle {
+    my ($self, at args) = @_;
+    return $self->_fh(@args);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function: guess format based on file suffix
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+ Notes   : formats that _filehandle() will guess include fasta,
+           genbank, scf, pir, embl, raw, gcg, ace, bsml, swissprot,
+           fastq and phd/phred
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'abi'     if /\.ab[i1]$/i;
+   return 'ace'     if /\.ace$/i;
+   return 'alf'     if /\.alf$/i;
+   return 'bsml'    if /\.(bsm|bsml)$/i;
+   return 'ctf'     if /\.ctf$/i;
+   return 'embl'    if /\.(embl|ebl|emb|dat)$/i;
+	return 'entrezgene' if /\.asn$/i;
+   return 'exp'     if /\.exp$/i;
+   return 'fasta'   if /\.(fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa)$/i;
+   return 'fastq'   if /\.fastq$/i;
+   return 'gcg'     if /\.gcg$/i;
+   return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i;
+   return 'phd'     if /\.(phd|phred)$/i;
+   return 'pir'     if /\.pir$/i;
+   return 'pln'     if /\.pln$/i;
+   return 'raw'     if /\.(txt)$/i;
+   return 'scf'     if /\.scf$/i;
+   return 'swiss'   if /\.(swiss|sp)$/i;
+
+   # from Strider 1.4 Release Notes: The file name extensions used by
+   # Strider 1.4 are ".xdna", ".xdgn", ".xrna" and ".xprt" for DNA,
+   # DNA Degenerate, RNA and Protein Sequence Files, respectively
+   return 'strider' if /\.(xdna|xdgn|xrna|xprt)$/i;
+
+   return 'ztr'     if /\.ztr$/i;
+}
+
+sub DESTROY {
+	my $self = shift;
+	$self->close();
+}
+
+sub TIEHANDLE {
+	my ($class,$val) = @_;
+	return bless {'seqio' => $val}, $class;
+}
+
+sub READLINE {
+	my $self = shift;
+	return $self->{'seqio'}->next_seq() unless wantarray;
+	my (@list, $obj);
+	push @list, $obj while $obj = $self->{'seqio'}->next_seq();
+	return @list;
+}
+
+sub PRINT {
+	my $self = shift;
+	$self->{'seqio'}->write_seq(@_);
+}
+
+=head2 sequence_factory
+
+ Title   : sequence_factory
+ Usage   : $seqio->sequence_factory($seqfactory)
+ Function: Get/Set the Bio::Factory::SequenceFactoryI
+ Returns : Bio::Factory::SequenceFactoryI
+ Args    : [optional] Bio::Factory::SequenceFactoryI
+
+=cut
+
+sub sequence_factory{
+   my ($self,$obj) = @_;
+   if( defined $obj ) {
+		if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
+			$self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()");
+		}
+		$self->{'_seqio_seqfactory'} = $obj;
+		my $builder = $self->sequence_builder();
+		if($builder && $builder->can('sequence_factory') &&
+			(! $builder->sequence_factory())) {
+			$builder->sequence_factory($obj);
+		}
+   }
+   $self->{'_seqio_seqfactory'};
+}
+
+=head2 object_factory
+
+ Title   : object_factory
+ Usage   : $obj->object_factory($newval)
+ Function: This is an alias to sequence_factory with a more generic name.
+ Example :
+ Returns : value of object_factory (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub object_factory{
+	return shift->sequence_factory(@_);
+}
+
+=head2 sequence_builder
+
+ Title   : sequence_builder
+ Usage   : $seqio->sequence_builder($seqfactory)
+ Function: Get/Set the Bio::Factory::ObjectBuilderI used to build sequence
+           objects.
+
+           If you do not set the sequence object builder yourself, it
+           will in fact be an instance of L<Bio::Seq::SeqBuilder>, and
+           you may use all methods documented there to configure it.
+
+ Returns : a Bio::Factory::ObjectBuilderI compliant object
+ Args    : [optional] a Bio::Factory::ObjectBuilderI compliant object
+
+=cut
+
+sub sequence_builder{
+	my ($self,$obj) = @_;
+	if( defined $obj ) {
+		if( ! ref($obj) || ! $obj->isa('Bio::Factory::ObjectBuilderI') ) {
+			$self->throw("Must provide a valid Bio::Factory::ObjectBuilderI object to ".ref($self)."::sequence_builder()");
+		}
+		$self->{'_object_builder'} = $obj;
+	}
+	$self->{'_object_builder'};
+}
+
+=head2 location_factory
+
+ Title   : location_factory
+ Usage   : $seqio->location_factory($locfactory)
+ Function: Get/Set the Bio::Factory::LocationFactoryI object to be used for
+           location string parsing
+ Returns : a Bio::Factory::LocationFactoryI implementing object
+ Args    : [optional] on set, a Bio::Factory::LocationFactoryI implementing
+           object.
+
+=cut
+
+sub location_factory{
+	my ($self,$obj) = @_;
+	if( defined $obj ) {
+		if( ! ref($obj) || ! $obj->isa('Bio::Factory::LocationFactoryI') ) {
+			$self->throw("Must provide a valid Bio::Factory::LocationFactoryI" .
+							 " object to ".ref($self)."->location_factory()");
+		}
+		$self->{'_seqio_locfactory'} = $obj;
+	}
+	$self->{'_seqio_locfactory'};
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SeqUtils.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SeqUtils.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SeqUtils.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,775 @@
+# $Id: SeqUtils.pm,v 1.34.2.2 2006/11/16 19:05:59 cjfields Exp $
+#
+# BioPerl module for Bio::SeqUtils
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqUtils - Additional methods for PrimarySeq objects
+
+=head1 SYNOPSIS
+
+    use Bio::SeqUtils;
+    # get a Bio::PrimarySeqI compliant object, $seq, somehow
+    $util = new Bio::SeqUtils;
+    $polypeptide_3char = $util->seq3($seq);
+    # or
+    $polypeptide_3char = Bio::SeqUtils->seq3($seq);
+
+    # set the sequence string (stored in one char code in the object)
+    Bio::SeqUtils->seq3($seq, $polypeptide_3char);
+
+    # translate a sequence in all six frames
+    @seqs = Bio::SeqUtils->translate_6frames($seq);
+
+    # inplace editing of the sequence
+    Bio::SeqUtils->mutate($seq,
+                          Bio::LiveSeq::Mutation->new(-seq => 'c',
+                                                      -pos => 3
+                                                     ));
+    # mutate a sequence to desired similarity%
+    $newseq = Bio::SeqUtils-> evolve
+        ($seq, $similarity, $transition_transversion_rate);
+
+
+    # concatenate two or more sequences with annotations and features,
+    # the first sequence will be modified
+    Bio::SeqUtils->cat(@seqs);
+
+    # truncate a sequence, retaining features and adjusting their
+    # coordinates if necessary
+    my $truncseq = Bio::SeqUtils->trunc_with_features($seq, 100, 200);
+
+    # reverse complement a sequence and its features
+    my $revcomseq = Bio::SeqUtils->revcom_with_features($seq);
+
+=head1 DESCRIPTION
+
+This class is a holder of methods that work on Bio::PrimarySeqI-
+compliant sequence objects, e.g. Bio::PrimarySeq and
+Bio::Seq. These methods are not part of the Bio::PrimarySeqI
+interface and should in general not be essential to the primary function
+of sequence objects. If you are thinking of adding essential
+functions, it might be better to create your own sequence class.
+See L<Bio::PrimarySeqI>, L<Bio::PrimarySeq>, and L<Bio::Seq> for more.
+
+The methods take as their first argument a sequence object. It is
+possible to use methods without first creating a SeqUtils object,
+i.e. use it as an anonymous hash.
+
+The first two methods, seq3() and seq3in(), give out or read in protein
+sequences coded in three letter IUPAC amino acid codes.
+
+The next two methods, translate_3frames() and translate_6frames(), wrap
+around the standard translate method to give back an array of three
+forward or all six frame translations.
+
+The mutate() method mutates the sequence string with a mutation
+description object.
+
+The cat() method concatenates two or more sequences. The first sequence 
+is modified by addition of the remaining sequences. All annotations and 
+sequence features will be transferred.
+
+The revcom_with_features() and trunc_with_features() methods are similar
+to the revcom() and trunc() methods from Bio::Seq, but also adjust any
+features associated with the sequence as appropriate.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Roy Chaudhuri, roy at colibase d bham d ac d uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqUtils;
+use vars qw(%ONECODE %THREECODE);
+use strict;
+use Carp;
+
+use base qw(Bio::Root::Root);
+# new inherited from RootI
+
+BEGIN {
+    # Note : Ambiguity code 'J' = I/L (used for ambiguities in mass-spec data)
+    %ONECODE =
+    ('Ala' => 'A', 'Asx' => 'B', 'Cys' => 'C', 'Asp' => 'D',
+     'Glu' => 'E', 'Phe' => 'F', 'Gly' => 'G', 'His' => 'H',
+     'Ile' => 'I', 'Lys' => 'K', 'Leu' => 'L', 'Met' => 'M',
+     'Asn' => 'N', 'Pro' => 'P', 'Gln' => 'Q', 'Arg' => 'R',
+     'Ser' => 'S', 'Thr' => 'T', 'Val' => 'V', 'Trp' => 'W',
+     'Xaa' => 'X', 'Tyr' => 'Y', 'Glx' => 'Z', 'Ter' => '*',
+     'Sec' => 'U', 'Pyl' => 'O', 'Xle' => 'J'
+     );
+
+    %THREECODE =
+    ('A' => 'Ala', 'B' => 'Asx', 'C' => 'Cys', 'D' => 'Asp',
+     'E' => 'Glu', 'F' => 'Phe', 'G' => 'Gly', 'H' => 'His',
+     'I' => 'Ile', 'K' => 'Lys', 'L' => 'Leu', 'M' => 'Met',
+     'N' => 'Asn', 'P' => 'Pro', 'Q' => 'Gln', 'R' => 'Arg',
+     'S' => 'Ser', 'T' => 'Thr', 'V' => 'Val', 'W' => 'Trp',
+     'Y' => 'Tyr', 'Z' => 'Glx', 'X' => 'Xaa', '*' => 'Ter',
+     'U' => 'Sec', 'O' => 'Pyl', 'J' => 'Xle'
+     );
+}
+
+=head2 seq3
+
+ Title   : seq3
+ Usage   : $string = Bio::SeqUtils->seq3($seq)
+ Function: Read only method that returns the amino acid sequence as a
+           string of three letter codes. alphabet has to be
+           'protein'. Output follows the IUPAC standard plus 'Ter' for
+           terminator. Any unknown character, including the default
+           unknown character 'X', is changed into 'Xaa'. A noncoded
+           aminoacid selenocystein is recognized (Sec, U).
+
+ Returns : A scalar
+ Args    : character used for stop in the protein sequence optional,
+           defaults to '*' string used to separate the output amino
+           acid codes, optional, defaults to ''
+
+=cut
+
+sub seq3 {
+   my ($self, $seq, $stop, $sep ) = @_;
+
+   $seq->isa('Bio::PrimarySeqI') ||
+       $self->throw('Not a Bio::PrimarySeqI object but [$self]');
+   $seq->alphabet eq 'protein' ||
+       $self->throw('Not a protein sequence');
+
+   if (defined $stop) {
+       length $stop != 1 and $self->throw('One character stop needed, not [$stop]');
+       $THREECODE{$stop} = "Ter";
+   }
+   $sep ||= '';
+
+   my $aa3s;
+   foreach my $aa  (split //, uc $seq->seq) {
+       $THREECODE{$aa} and $aa3s .= $THREECODE{$aa}. $sep, next;
+       $aa3s .= 'Xaa'. $sep;
+   }
+   $sep and substr($aa3s, -(length $sep), length $sep) = '' ;
+   return $aa3s;
+}
+
+=head2 seq3in
+
+ Title   : seq3in
+ Usage   : $seq = Bio::SeqUtils->seq3in($seq, 'MetGlyTer')
+ Function: Method for changing of the sequence of a
+           Bio::PrimarySeqI sequence object. The three letter amino
+           acid input string is converted into one letter code.  Any
+           unknown character triplet, including the default 'Xaa', is
+           converted into 'X'.
+
+ Returns : Bio::PrimarySeq object
+ Args    : sequence string
+           optional character to be used for stop in the protein sequence,
+              defaults to '*'
+           optional character to be used for unknown in the protein sequence,
+              defaults to 'X'
+
+=cut
+
+sub seq3in {
+   my ($self, $seq, $string, $stop, $unknown) = @_;
+
+   $seq->isa('Bio::PrimarySeqI') ||
+	  $self->throw("Not a Bio::PrimarySeqI object but [$self]");
+   $seq->alphabet eq 'protein' ||
+	  $self->throw('Not a protein sequence');
+
+   if (defined $stop) {
+		length $stop != 1 and $self->throw("One character stop needed, not [$stop]");
+		$ONECODE{'Ter'} = $stop;
+   }
+   if (defined $unknown) {
+		length $unknown != 1 and $self->throw("One character stop needed, not [$unknown]");
+		$ONECODE{'Xaa'} = $unknown;
+   }
+
+   my ($aas, $aa3);
+   my $length = (length $string) - 2;
+   for (my $i = 0 ; $i < $length ; $i += 3)  {
+		$aa3 = substr($string, $i, 3);
+		$aa3 = ucfirst(lc($aa3)); 
+		$ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next;
+		$aas .= $ONECODE{'Xaa'};
+   }
+   $seq->seq($aas);
+   return $seq;
+}
+
+=head2 translate_3frames
+
+ Title   : translate_3frames
+ Usage   : @prots = Bio::SeqUtils->translate_3frames($seq)
+ Function: Translate a nucleotide sequence in three forward frames.
+           The IDs of the sequences are appended with '-0F', '-1F', '-2F'.
+ Returns : An array of seq objects
+ Args    : sequence object
+           same arguments as to Bio::PrimarySeqI::translate
+
+=cut
+
+sub translate_3frames {
+    my ($self, $seq, @args ) = @_;
+
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).  ']  can not be translated.')
+	unless $seq->can('translate');
+
+    my ($stop, $unknown, $frame, $tableid, $fullCDS, $throw) = @args;
+    my @seqs;
+    my $f = 0;
+    while ($f != 3) {
+        my $translation = $seq->translate($stop, $unknown,$f,$tableid, $fullCDS, $throw );
+	$translation->id($seq->id. "-". $f. "F");
+	push @seqs, $translation;
+	$f++;
+    }
+
+    return @seqs;
+}
+
+=head2 translate_6frames
+
+ Title   : translate_6frames
+ Usage   : @prots = Bio::SeqUtils->translate_6frames($seq)
+ Function: translate a nucleotide sequence in all six frames
+           The IDs of the sequences are appended with '-0F', '-1F', '-2F',
+           '-0R', '-1R', '-2R'.
+ Returns : An array of seq objects
+ Args    : sequence object
+           same arguments as to Bio::PrimarySeqI::translate
+
+=cut
+
+sub translate_6frames {
+    my ($self, $seq, @args ) = @_;
+
+    my @seqs = $self->translate_3frames($seq, @args);
+    my @seqs2 = $self->translate_3frames($seq->revcom, @args);
+    foreach my $seq2 (@seqs2) {
+	my ($tmp) = $seq2->id;
+	$tmp =~ s/F$/R/g;
+	$seq2->id($tmp);
+    }
+    return @seqs, @seqs2;
+}
+
+
+=head2 valid_aa
+
+ Title   : valid_aa
+ Usage   : my @aa = $table->valid_aa
+ Function: Retrieves a list of the valid amino acid codes.
+           The list is ordered so that first 21 codes are for unique 
+           amino acids. The rest are ['B', 'Z', 'X', '*'].
+ Returns : array of all the valid amino acid codes
+ Args    : [optional] $code => [0 -> return list of 1 letter aa codes,
+				1 -> return list of 3 letter aa codes,
+				2 -> return associative array of both ]
+
+=cut
+
+sub valid_aa{
+   my ($self,$code) = @_;
+
+   if( ! $code ) { 
+       my @codes;
+       foreach my $c ( sort values %ONECODE ) {
+	   push @codes, $c unless ( $c =~ /[BZX\*]/ );
+       }
+       push @codes, qw(B Z X *); # so they are in correct order ?
+       return @codes;
+  }
+   elsif( $code == 1 ) { 
+       my @codes;
+       foreach my $c ( sort keys %ONECODE ) {
+	   push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ );
+       }
+       push @codes, ('Asx', 'Glx', 'Xaa', 'Ter' );
+       return @codes;
+   }
+   elsif( $code == 2 ) { 
+       my %codes = %ONECODE;
+       foreach my $c ( keys %ONECODE ) {
+	   my $aa = $ONECODE{$c};
+	   $codes{$aa} = $c;
+       }
+       return %codes;
+   } else {
+       $self->warn("unrecognized code in ".ref($self)." method valid_aa()");
+       return ();
+   }
+}
+
+=head2 mutate
+
+ Title   : mutate
+ Usage   : Bio::SeqUtils->mutate($seq,$mutation1, $mutation2);
+ Function: Inplace editing of the sequence.
+
+           The second argument can be a Bio::LiveSeq::Mutation object
+           or an array of them. The mutations are applied sequentially
+           checking only that their position is within the current
+           sequence.  Insertions are inserted before the given
+           position.
+
+ Returns : boolean
+ Args    : sequence object
+           mutation, a Bio::LiveSeq::Mutation object, or an array of them
+
+See L<Bio::LiveSeq::Mutation>.
+
+=cut
+
+sub mutate {
+    my ($self, $seq, @mutations ) = @_;
+
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).
+                 '] should be a Bio::PrimarySeqI ')
+	unless $seq->isa('Bio::PrimarySeqI');
+    $self->throw('Object [$mutations[0]] '. 'of class ['. ref($mutations[0]).
+                 '] should be a Bio::LiveSeq::Mutation')
+	unless $mutations[0]->isa('Bio::LiveSeq::Mutation');
+
+    foreach my $mutation (@mutations) {
+        $self->throw('Attempting to mutate sequence beyond its length')
+            unless $mutation->pos - 1 <= $seq->length;
+
+        my $string = $seq->seq;
+        substr $string, $mutation->pos - 1, $mutation->len, $mutation->seq;
+        $seq->seq($string);
+    }
+    1;
+}
+
+
+=head2 cat
+
+  Title   : cat
+  Usage   : my $catseq = Bio::SeqUtils->cat(@seqs)
+  Function: Concatenates an array of Bio::Seq objects, using the first sequence
+            as a target. Annotations and sequence features are copied over 
+            from any additional objects. Adjusts the coordinates of copied 
+            features.
+  Returns : a boolean
+  Args    : array of sequence objects
+
+Note that annotations have no sequence locations. If you concatenate
+sequences with the same annotations they will all be added.
+
+=cut
+
+sub cat {
+    my ($self, $seq, @seqs) = @_;
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).
+                 '] should be a Bio::PrimarySeqI ')
+        unless $seq->isa('Bio::PrimarySeqI');
+    
+
+    for my $catseq (@seqs) {
+        $self->throw('Object [$catseq] '. 'of class ['. ref($catseq).
+                     '] should be a Bio::PrimarySeqI ')
+            unless $catseq->isa('Bio::PrimarySeqI');
+
+        $self->throw('Trying to concatenate sequences with different alphabets: '.
+                     $seq->display_id. '('. $seq->alphabet. ') and '. $catseq->display_id.
+                     '('. $catseq->alphabet. ')')
+            unless $catseq->alphabet eq $seq->alphabet;
+
+
+        my $length=$seq->length;
+        $seq->seq($seq->seq.$catseq->seq);
+
+        # move annotations
+        if ($seq->isa("Bio::AnnotatableI") and $catseq->isa("Bio::AnnotatableI")) {
+            foreach my $key ( $catseq->annotation->get_all_annotation_keys() ) {
+
+                foreach my $value ( $catseq->annotation->get_Annotations($key) ) {
+                    $seq->annotation->add_Annotation($key, $value);
+                }
+            } 
+        }
+        
+        # move SeqFeatures
+        if ( $seq->isa('Bio::SeqI') and $catseq->isa('Bio::SeqI')) {
+            for my $feat ($catseq->get_SeqFeatures) {
+                $seq->add_SeqFeature($self->_coord_adjust($feat, $length));
+            }
+        }
+
+    }
+    1;
+}
+
+
+=head2 trunc_with_features
+
+ Title   : trunc_with_features
+ Usage   : $trunc=Bio::SeqUtils->trunc_with_features($seq, $start, $end);
+ Function: Like Bio::Seq::trunc, but keeps features (adjusting coordinates
+           where necessary. Features that partially overlap the region have
+           their location changed to a Bio::Location::Fuzzy.
+ Returns : A new sequence object
+ Args    : A sequence object, start coordinate, end coordinate (inclusive)
+
+
+=cut
+
+sub trunc_with_features{
+    use Bio::Range;
+    my ($self,$seq,$start,$end) = @_;
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).
+                 '] should be a Bio::SeqI ')
+    unless $seq->isa('Bio::SeqI');
+    my $trunc=$seq->trunc($start, $end);
+    my $truncrange=Bio::Range->new(-start=>$start, -end=>$end, -strand=>0);
+    #move annotations
+    foreach my $key ( $seq->annotation->get_all_annotation_keys() ) {
+	foreach my $value ( $seq->annotation->get_Annotations($key) ) {
+	    $trunc->annotation->add_Annotation($key, $value);
+	}
+    } 
+    
+    #move features
+    $trunc->add_SeqFeature(grep {$_=$self->_coord_adjust($_, 1-$start, $end+1-$start) if $_->overlaps($truncrange)} $seq->get_SeqFeatures);
+    return $trunc;
+}
+
+
+
+=head2 _coord_adjust
+
+  Title   : _coord_adjust
+  Usage   : my $newfeat=Bio::SeqUtils->_coord_adjust($feature, 100, $seq->length);
+  Function: Recursive subroutine to adjust the coordinates of a feature
+            and all its subfeatures. If a sequence length is specified, then
+            any adjusted features that have locations beyond the boundaries
+            of the sequence are converted to Bio::Location::Fuzzy objects.
+
+  Returns : A Bio::SeqFeatureI compliant object.
+  Args    : A Bio::SeqFeatureI compliant object,
+            the number of bases to add to the coordinates
+            (optional) the length of the parent sequence
+
+
+=cut
+
+sub _coord_adjust {
+    my ($self, $feat, $add, $length)=@_;
+    $self->throw('Object [$feat] '. 'of class ['. ref($feat).
+                 '] should be a Bio::SeqFeatureI ')
+        unless $feat->isa('Bio::SeqFeatureI');
+    my @adjsubfeat;
+    for my $subfeat ($feat->remove_SeqFeatures) {
+        push @adjsubfeat, $self->_coord_adjust($subfeat, $add, $length);
+    }
+    my @loc;
+    for ($feat->location->each_Location) {
+        my @coords=($_->start, $_->end);
+        my $strand=$_->strand;
+	my $type=$_->location_type;
+        map s/(\d+)/if ($add+$1<1) {'<1'} elsif (defined $length and $add+$1>$length) {">$length"} else {$add+$1}/ge, @coords;
+	my($newstart,$newend)=@coords;
+	unless ($type eq 'IN-BETWEEN') {
+	    push @loc, Bio::Location::Fuzzy->new(-start=>$newstart,
+						 -end=>$newend,
+						 -strand=>$strand,
+						 -location_type=>$type
+						);
+	} else {
+	    push @loc, Bio::Location::Simple->new(-start=>$newstart,
+					  -end=>$newend,
+					  -strand=>$strand,
+					  -location_type=>$type
+					 );
+	}
+    }
+    my $newfeat=Bio::SeqFeature::Generic->new(-primary=>$feat->primary_tag);
+    foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
+	foreach my $value ( $feat->annotation->get_Annotations($key) ) {
+	    $newfeat->annotation->add_Annotation($key, $value);
+	}
+    } 
+    if (@loc==1) {
+        $newfeat->location($loc[0])
+    } else {
+        my $loc=Bio::Location::Split->new;
+        $loc->add_sub_Location(@loc);
+        $newfeat->location($loc);
+    }
+    $newfeat->add_SeqFeature($_) for @adjsubfeat;
+    return $newfeat;
+}
+
+
+=head2 revcom_with_features
+
+ Title   : revcom_with_features
+ Usage   : $revcom=Bio::SeqUtils->revcom_with_features($seq);
+ Function: Like Bio::Seq::revcom, but keeps features (adjusting coordinates
+           as appropriate.
+ Returns : A new sequence object
+ Args    : A sequence object
+
+
+=cut
+
+sub revcom_with_features{
+    my ($self,$seq) = @_;
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).
+                 '] should be a Bio::SeqI ')
+    unless $seq->isa('Bio::SeqI');
+    my $revcom=$seq->revcom;
+    
+    #move annotations
+    foreach my $key ( $seq->annotation->get_all_annotation_keys() ) {
+	foreach my $value ( $seq->annotation->get_Annotations($key) ) {
+	    $revcom->annotation->add_Annotation($key, $value);
+	}
+    } 
+
+    #move features
+    $revcom->add_SeqFeature(map {$self->_feature_revcom($_, $seq->length)} $seq->get_SeqFeatures);
+    return $revcom;
+}
+
+=head2 _feature_revcom
+
+  Title   : _feature_revcom
+  Usage   : my $newfeat=Bio::SeqUtils->_feature_revcom($feature, $seq->length);
+  Function: Recursive subroutine to reverse complement a feature and
+            all its subfeatures. The length of the parent sequence must be
+            specified.
+
+  Returns : A Bio::SeqFeatureI compliant object.
+  Args    : A Bio::SeqFeatureI compliant object,
+            the length of the parent sequence
+
+
+=cut
+
+sub _feature_revcom {
+    my ($self, $feat, $length)=@_;
+    $self->throw('Object [$feat] '. 'of class ['. ref($feat).
+                 '] should be a Bio::SeqFeatureI ')
+        unless $feat->isa('Bio::SeqFeatureI');
+    my @adjsubfeat;
+    for my $subfeat ($feat->remove_SeqFeatures) {
+        push @adjsubfeat, $self->_feature_revcom($subfeat, $length);
+    }
+    my @loc;
+    for ($feat->location->each_Location) {
+	my $type=$_->location_type;
+        my $strand;
+	if ($_->strand==-1) {$strand=1}
+	elsif ($_->strand==1) {$strand=-1}
+	else {$strand=$_->strand}
+	my $newend=$self->_coord_revcom($_->start,
+					$_->start_pos_type,
+					$length);
+	my $newstart=$self->_coord_revcom($_->end,
+					  $_->end_pos_type,
+					  $length);
+	unless ($type eq 'IN-BETWEEN') {
+	    push @loc, Bio::Location::Fuzzy->new(-start=>$newstart,
+						 -end=>$newend,
+						 -strand=>$strand,
+						 -location_type=>$type
+						);
+	} else {
+	    push @loc, Bio::Location::Simple->new(-start=>$newstart,
+						  -end=>$newend,
+						  -strand=>$strand,
+						  -location_type=>$type
+						 );
+	}
+    }
+    my $newfeat=Bio::SeqFeature::Generic->new(-primary=>$feat->primary_tag);
+    foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
+	foreach my $value ( $feat->annotation->get_Annotations($key) ) {
+	    $newfeat->annotation->add_Annotation($key, $value);
+	}
+    } 
+    if (@loc==1) {
+        $newfeat->location($loc[0])
+    } else {
+        my $loc=Bio::Location::Split->new;
+        $loc->add_sub_Location(@loc);
+        $newfeat->location($loc);
+    }
+    $newfeat->add_SeqFeature($_) for @adjsubfeat;
+    return $newfeat;
+}
+
+sub _coord_revcom {
+    my ($self, $coord, $type, $length)=@_;
+    if ($type eq 'BETWEEN' or $type eq 'WITHIN') {
+	$coord=~s/(\d+)(.*)(\d+)/$length+1-$3.$2.$length+1-$1/ge;
+    } else {
+	$coord=~s/(\d+)/$length+1-$1/ge;
+	$coord='>'.$coord if $type eq 'BEFORE';
+	$coord='<'.$coord if $type eq 'AFTER';
+    }
+    return $coord;
+}
+
+=head2 evolve
+
+  Title   : evolve
+  Usage   : my $newseq = Bio::SeqUtils->
+                evolve($seq, $similarity, $transition_transversion_rate);
+  Function: Mutates the sequence by point mutations until the similarity of
+            the new sequence has decreased to the required level. 
+            Transition/transversion rate is adjustable.
+  Returns : A new Bio::PrimarySeq object
+  Args    : sequence object
+            percentage similarity (e.g. 80)
+            tr/tv rate, optional, defaults to 1 (= 1:1)
+
+Set the verbosity of the Bio::SeqUtils object to positive integer to
+see the mutations as they happen.
+
+This method works only on nucleotide sequences. It prints a warning if
+you set the target similarity to be less than 25%.
+
+Transition/transversion ratio is an observed attribute of an sequence
+comparison. We are dealing here with the transition/transversion rate
+that we set for our model of sequence evolution.
+
+=cut
+
+sub evolve {
+    my ($self, $seq, $sim, $rate) = @_;
+    $rate ||= 1;
+
+    $self->throw('Object [$seq] '. 'of class ['. ref($seq).
+                     '] should be a Bio::PrimarySeqI ')
+            unless $seq->isa('Bio::PrimarySeqI');
+    
+    $self->throw("[$sim] ". ' should be a positive integer or float under 100')
+            unless $sim =~ /^[+\d.]+$/ and $sim <= 100;
+
+    $self->warn("Nucleotide sequences are 25% similar by chance.
+        Do you really want to set similarity to [$sim]%?\n")
+            unless $sim >25 ;
+
+    $self->throw('Only nucleotide sequences are supported')
+            if $seq->alphabet eq 'protein';
+
+
+    # arrays of possible changes have transitions as first items
+    my %changes;
+    $changes{'a'} = ['t', 'c', 'g'];
+    $changes{'t'} = ['a', 'c', 'g'];
+    $changes{'c'} = ['g', 'a', 't'];
+    $changes{'g'} = ['c', 'a', 't'];
+
+
+    # given the desired rate, find out where cut off points need to be
+    # when random numbers are generated from 0 to 100
+    # we are ignoring identical mutations (e.g. A->A) to speed things up
+    my $bin_size = 100/($rate + 2);  
+    my $transition = 100 - (2*$bin_size);
+    my $first_transversion = $transition + $bin_size;
+
+    # unify the look of sequence strings
+    my $string = lc $seq->seq; # lower case
+    $string =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
+    # store the original sequence string
+    my $oristring = $string;
+    my $length = $seq->length;
+
+    while (1) {
+        # find the location in the string to change
+        my $loc = int (rand $length) + 1;
+
+
+        # nucleotide to change
+        my $oldnuc = substr $string, $loc-1, 1;
+        my $newnuc;
+
+        # nucleotide it is changed to
+        my $choose = rand(100);
+        if ($choose < $transition ) {
+            $newnuc =  $changes{$oldnuc}[0];
+        }
+        elsif ($choose < $first_transversion ) {
+            $newnuc =  $changes{$oldnuc}[1];
+        } else {
+            $newnuc =  $changes{$oldnuc}[2];
+        }
+
+        # do the change
+        substr $string, $loc-1, 1 , $newnuc;
+
+        $self->debug("$loc$oldnuc>$newnuc\n");
+
+        # stop evolving if the limit has been reached
+        last if $self->_get_similarity($oristring, $string) <= $sim;
+
+    }
+
+    return new Bio::PrimarySeq(-id => $seq->id. "-$sim",
+                               -description => $seq->description,
+                               -seq => $string
+                              )
+}
+
+
+sub _get_similarity  {
+    my ($self, $oriseq, $seq) = @_;
+
+    my $len = length($oriseq);
+    my $c;
+
+    for (my $i = 0; $i< $len; $i++ ) {
+        $c++ if substr($oriseq, $i, 1) eq substr($seq, $i, 1);
+    }
+    return 100 * $c/$len;
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAlign.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAlign.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAlign.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2540 @@
+# $Id: SimpleAlign.pm,v 1.108.2.6 2006/11/17 09:32:42 sendu Exp $
+# BioPerl module for SimpleAlign
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+#
+#  History:
+#	11/3/00 Added threshold feature to consensus and consensus_aa  - PS
+#	May 2001 major rewrite - Heikki Lehvaslaiho
+
+=head1 NAME
+
+Bio::SimpleAlign - Multiple alignments held as a set of sequences
+
+=head1 SYNOPSIS
+
+  # Use Bio::AlignIO to read in the alignment
+  $str = Bio::AlignIO->new(-file => 't/data/testaln.pfam');
+  $aln = $str->next_aln();
+
+  # Describe
+  print $aln->length;
+  print $aln->no_residues;
+  print $aln->is_flush;
+  print $aln->no_sequences;
+  print $aln->score;
+  print $aln->percentage_identity;
+  print $aln->consensus_string(50);
+
+  # Find the position in the alignment for a sequence location
+  $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
+
+  # Extract sequences and check values for the alignment column $pos
+  foreach $seq ($aln->each_seq) {
+      $res = $seq->subseq($pos, $pos);
+      $count{$res}++;
+  }
+  foreach $res (keys %count) {
+      printf "Res: %s  Count: %2d\n", $res, $count{$res};
+  }
+
+  # Manipulate
+  $aln->remove_seq($seq);
+  $mini_aln = $aln->slice(20,30);  # get a block of columns
+  $mini_aln = $aln->select_noncont(1,3,5,7,11); # get single columns
+  $new_aln = $aln->remove_columns([20,30]); # remove by position
+  $new_aln = $aln->remove_columns(['mismatch']); # remove by property
+
+  # Analyze
+  $str = $aln->consensus_string($threshold_percent);
+  $str = $aln->match_line();
+  $str = $aln->cigar_line()
+  $id = $aln->percentage_identity;
+
+See the module documentation for details and more methods.
+
+=head1 DESCRIPTION
+
+SimpleAlign is an object that handles a multiple sequence alignment
+(MSA). It is very permissive of types (it does not insist on sequences
+being all same length, for example). Think of it as a set of sequences
+with a whole series of built-in manipulations and methods for reading and
+writing alignments.
+
+SimpleAlign uses L<Bio::LocatableSeq>, a subclass of L<Bio::PrimarySeq>,
+to store its sequences. These are subsequences with a start and end
+positions in the parent reference sequence. Each sequence in the
+SimpleAlign object is a Bio::LocatableSeq.
+
+SimpleAlign expects the combination of name, start, and end for a
+given sequence to be unique in the alignment, and this is the key for the
+internal hashes (name, start, end are abbreviated C<nse> in the code).
+However, in some cases people do not want the name/start-end to be displayed:
+either multiple names in an alignment or names specific to the alignment
+(ROA1_HUMAN_1, ROA1_HUMAN_2 etc). These names are called
+C<displayname>, and generally is what is used to print out the
+alignment. They default to name/start-end.
+
+The SimpleAlign Module is derived from the Align module by Ewan Birney.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Ewan Birney, birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Allen Day, allenday-at-ucla.edu,
+Richard Adams, Richard.Adams-at-ed.ac.uk,
+David J. Evans, David.Evans-at-vir.gla.ac.uk,
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org,
+Allen Smith, allens-at-cpan.org,
+Jason Stajich, jason-at-bioperl.org,
+Anthony Underwood, aunderwood-at-phls.org.uk,
+Xintao Wei & Giri Narasimhan, giri-at-cs.fiu.edu
+Brian Osborne, bosborne at alum.mit.edu
+
+=head1 SEE ALSO
+
+L<Bio::LocatableSeq>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# 'Let the code begin...
+
+package Bio::SimpleAlign;
+use vars qw(%CONSERVATION_GROUPS);
+use strict;
+
+use Bio::LocatableSeq;  # uses Seq's as list
+
+use Bio::Seq;
+use Bio::SeqFeature::Generic;
+
+BEGIN {
+    # This data should probably be in a more centralized module...
+    # it is taken from Clustalw documentation.
+    # These are all the positively scoring groups that occur in the
+    # Gonnet Pam250 matrix. The strong and weak groups are
+    # defined as strong score >0.5 and weak score =<0.5 respectively.
+
+    %CONSERVATION_GROUPS = (
+            'strong' => [ qw(
+						 STA
+						 NEQK
+						 NHQK
+						 NDEQ
+						 QHRK
+						 MILV
+						 MILF
+						 HY
+						 FYW )],
+				'weak' => [ qw(
+                      CSA
+					       ATV
+					       SAG
+					       STNK
+					       STPA
+					       SGND
+					       SNDEQK
+					       NDEQHK
+					       NEQHRK
+					       FVLIM
+					       HFY )],);
+}
+
+use base qw(Bio::Root::Root Bio::Align::AlignI Bio::AnnotatableI);
+
+=head2 new
+
+ Title     : new
+ Usage     : my $aln = new Bio::SimpleAlign();
+ Function  : Creates a new simple align object
+ Returns   : Bio::SimpleAlign
+ Args      : -source => string representing the source program
+                        where this alignment came from
+
+=cut
+
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  my ($src,$score) = $self->_rearrange([qw(SOURCE SCORE)], @args);
+  $src && $self->source($src);
+  defined $score && $self->score($score);
+  # we need to set up internal hashs first!
+
+  $self->{'_seq'} = {};
+  $self->{'_order'} = {};
+  $self->{'_start_end_lists'} = {};
+  $self->{'_dis_name'} = {};
+  $self->{'_id'} = 'NoName';
+  $self->{'_symbols'} = {};
+  # maybe we should automatically read in from args. Hmmm...
+
+  return $self; # success - we hope!
+}
+
+=head1 Modifier methods
+
+These methods modify the MSA by adding, removing or shuffling complete
+sequences.
+
+=head2 add_seq
+
+ Title     : add_seq
+ Usage     : $myalign->add_seq($newseq);
+ Function  : Adds another sequence to the alignment. *Does not* align
+             it - just adds it to the hashes.
+ Returns   : nothing
+ Args      : a Bio::LocatableSeq object
+             order (optional)
+
+See L<Bio::LocatableSeq> for more information
+
+=cut
+
+sub addSeq {
+    my $self = shift;
+    $self->deprecated("addSeq - deprecated method. Use add_seq() instead.");
+    $self->add_seq(@_);
+}
+
+sub add_seq {
+    my $self = shift;
+    my $seq  = shift;
+    my $order = shift;
+    my ($name,$id,$start,$end);
+
+    if( ! ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
+	$self->throw("Unable to process non locatable sequences [". ref($seq). "]");
+    }
+
+    $id = $seq->id() ||$seq->display_id || $seq->primary_id;
+    $start = $seq->start();
+    $end  = $seq->end();
+
+    # build the symbol list for this sequence,
+    # will prune out the gap and missing/match chars
+    # when actually asked for the symbol list in the
+    # symbol_chars
+    map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq) if $seq->seq;
+
+    if( !defined $order ) {
+	$order = keys %{$self->{'_seq'}};
+    }
+    $name = sprintf("%s/%d-%d",$id,$start,$end);
+
+    if( $self->{'_seq'}->{$name} ) {
+	$self->warn("Replacing one sequence [$name]\n") unless $self->verbose < 0;
+    }
+    else {
+	$self->debug( "Assigning $name to $order\n");
+
+	$self->{'_order'}->{$order} = $name;
+
+	unless( exists( $self->{'_start_end_lists'}->{$id})) {
+	    $self->{'_start_end_lists'}->{$id} = [];
+	}
+	push @{$self->{'_start_end_lists'}->{$id}}, $seq;
+    }
+
+    $self->{'_seq'}->{$name} = $seq;
+
+}
+
+
+=head2 remove_seq
+
+ Title     : remove_seq
+ Usage     : $aln->remove_seq($seq);
+ Function  : Removes a single sequence from an alignment
+ Returns   :
+ Argument  : a Bio::LocatableSeq object
+
+=cut
+
+sub removeSeq {
+    my $self = shift;
+    $self->deprecated("removeSeq - deprecated method. Use remove_seq() instead.");
+    $self->remove_seq(@_);
+}
+
+sub remove_seq {
+    my $self = shift;
+    my $seq = shift;
+    my ($name,$id,$start,$end);
+
+    $self->throw("Need Bio::Locatable seq argument ")
+	unless ref $seq && $seq->isa( 'Bio::LocatableSeq');
+
+    $id = $seq->id();
+    $start = $seq->start();
+    $end  = $seq->end();
+    $name = sprintf("%s/%d-%d",$id,$start,$end);
+
+    if( !exists $self->{'_seq'}->{$name} ) {
+	$self->throw("Sequence $name does not exist in the alignment to remove!");
+    }
+
+    delete $self->{'_seq'}->{$name};
+
+    # we need to remove this seq from the start_end_lists hash
+
+    if (exists $self->{'_start_end_lists'}->{$id}) {
+	# we need to find the sequence in the array.
+
+	my ($i, $found);;
+	for ($i=0; $i < @{$self->{'_start_end_lists'}->{$id}}; $i++) {
+	    if (${$self->{'_start_end_lists'}->{$id}}[$i] eq $seq) {
+		$found = 1;
+		last;
+	    }
+	}
+	if ($found) {
+	    splice @{$self->{'_start_end_lists'}->{$id}}, $i, 1;
+	}
+	else {
+	    $self->throw("Could not find the sequence to remoce from the start-end list");
+	}
+    }
+    else {
+	$self->throw("There is no seq list for the name $id");
+    }
+    # we need to shift order hash
+    my %rev_order = reverse %{$self->{'_order'}};
+    my $no = $rev_order{$name};
+    my $no_sequences = $self->no_sequences;
+    for (; $no < $no_sequences; $no++) {
+       $self->{'_order'}->{$no} = $self->{'_order'}->{$no+1};
+    }
+    delete $self->{'_order'}->{$no};
+    return 1;
+}
+
+
+=head2 purge
+
+ Title   : purge
+ Usage   : $aln->purge(0.7);
+ Function: Removes sequences above given sequence similarity
+           This function will grind on large alignments. Beware!
+ Example :
+ Returns : An array of the removed sequences
+ Args    : float, threshold for similarity
+
+=cut
+
+sub purge {
+	my ($self,$perc) = @_;
+	my (%duplicate, @dups);
+
+	my @seqs = $self->each_seq();
+
+	for (my $i=0;$i< @seqs - 1;$i++ ) { #for each seq in alignment
+		my $seq = $seqs[$i];
+
+		#skip if already in duplicate hash
+		next if exists $duplicate{$seq->display_id} ;
+		my $one = $seq->seq();
+
+		my @one = split '', $one;	#split to get 1aa per array element
+
+		for (my $j=$i+1;$j < @seqs;$j++) {
+			my $seq2 = $seqs[$j];
+
+			#skip if already in duplicate hash
+			next if exists $duplicate{$seq2->display_id} ;
+
+			my $two = $seq2->seq();
+			my @two = split '', $two;
+
+			my $count = 0;
+			my $res = 0;
+			for (my $k=0;$k<@one;$k++) {
+				if ( $one[$k] ne '.' && $one[$k] ne '-' && defined($two[$k]) &&
+					  $one[$k] eq $two[$k]) {
+					$count++;
+				}
+				if ( $one[$k] ne '.' && $one[$k] ne '-' && defined($two[$k]) &&
+					  $two[$k] ne '.' && $two[$k] ne '-' ) {
+					$res++;
+				}
+			}
+
+			my $ratio = 0;
+			$ratio = $count/$res unless $res == 0;
+
+			# if above threshold put in duplicate hash and push onto
+			# duplicate array for returning to get_unique
+			if ( $ratio > $perc ) {
+				$self->warn("duplicate: ", $seq2->display_id) if $self->verbose > 0;
+				$duplicate{$seq2->display_id} = 1;
+				push @dups, $seq2;
+			}
+		}
+	}
+	foreach my $seq (@dups) {
+		$self->remove_seq($seq);
+	}
+	return @dups;
+}
+
+=head2 sort_alphabetically
+
+ Title     : sort_alphabetically
+ Usage     : $ali->sort_alphabetically
+ Function  : Changes the order of the alignemnt to alphabetical on name
+             followed by numerical by number.
+ Returns   :
+ Argument  :
+
+=cut
+
+sub sort_alphabetically {
+    my $self = shift;
+    my ($seq,$nse, at arr,%hash,$count);
+
+    foreach $seq ( $self->each_seq() ) {
+	$nse = $seq->get_nse;
+	$hash{$nse} = $seq;
+    }
+
+    $count = 0;
+
+    %{$self->{'_order'}} = (); # reset the hash;
+
+    foreach $nse ( sort _alpha_startend keys %hash) {
+	$self->{'_order'}->{$count} = $nse;
+
+	$count++;
+    }
+    1;
+}
+
+=head2 set_new_reference
+
+ Title     : set_new_reference
+ Usage     : $aln->set_new_reference(3 or 'B31'):  Select the 3rd sequence, or
+             the sequence whoes name is "B31" (full, exact, and case-sensitive),
+             as the reference (1st) sequence
+ Function  : Change/Set a new reference (i.e., the first) sequence
+ Returns   : a new Bio::SimpleAlign object.
+             Throws an exception if designated sequence not found
+ Argument  : a positive integer of sequence order, or a sequence name
+             in the original alignment
+
+=cut
+
+sub set_new_reference {
+    my ($self, $seqid) = @_;
+    my $aln = $self->new;
+    my (@seq, @ids, @new_seq);
+    my $is_num=0;
+    foreach my $seq ( $self->each_seq() ) {
+	push @seq, $seq;
+	push @ids, $seq->display_id;
+    }
+
+    if ($seqid =~ /^\d+$/) { # argument is seq position
+	$is_num=1;
+	$self->throw("The new reference sequence number has to be a positive integer >1 and <= no_sequences ") if ($seqid <= 1 || $seqid > $self->no_sequences);
+    } else { # argument is a seq name
+	$self->throw("The new reference sequence not in alignment ") unless &_in_aln($seqid, \@ids);
+    }
+
+    for (my $i=0; $i<=$#seq; $i++) {
+	my $pos=$i+1;
+        if ( ($is_num && $pos == $seqid) || ($seqid eq $seq[$i]->display_id) ) {
+	    unshift @new_seq, $seq[$i];
+	} else {
+	    push @new_seq, $seq[$i];
+	}
+    }
+    foreach (@new_seq) { $aln->add_seq($_);  }
+    return $aln;
+}
+
+sub _in_aln {  # check if input name exists in the alignment
+    my ($str, $ref) = @_;
+    foreach (@$ref) {
+	return 1 if $str eq $_;
+    }
+    return 0;
+}
+
+
+=head2 uniq_seq
+
+ Title     : uniq_seq
+ Usage     : $aln->uniq_seq():  Remove identical sequences in
+             in the alignment.  Ambiguous base ("N", "n") and
+             leading and ending gaps ("-") are NOT counted as
+             differences.
+ Function  : Make a new alignment of unique sequence types (STs)
+ Returns   : 1. a new Bio::SimpleAlign object (all sequences renamed as "ST")
+             2. ST of each sequence in STDERR
+ Argument  : None
+
+=cut
+
+sub uniq_seq {
+    my ($self, $seqid) = @_;
+    my $aln = $self->new;
+    my (%member, %order, @seq, @uniq_str);
+    my $order=0;
+    my $len = $self->length();
+    foreach my $seq ( $self->each_seq() ) {
+	my $str = $seq->seq();
+
+# it's necessary to ignore "n", "N", leading gaps and ending gaps in
+# comparing two sequence strings
+
+# 1st, convert "n", "N" to "?" (for DNA sequence only):
+	$str =~ s/n/\?/gi if $str =~ /^[atcgn-]+$/i;
+# 2nd, convert leading and ending gaps to "?":
+	$str = &_convert_leading_ending_gaps($str, '-', '?');
+	my $new = new Bio::LocatableSeq(-id=>$seq->id(),
+					-seq=>$str,
+					-start=>1,
+					-end=>$len);
+	push @seq, $new;
+    }
+
+    foreach my $seq (@seq) {
+	my $str = $seq->seq();
+	my ($seen, $key) = &_check_uniq($str, \@uniq_str, $len);
+	if ($seen) { # seen before
+	    my @memb = @{$member{$key}};
+	    push @memb, $seq;
+	    $member{$key} = \@memb;
+	} else {  # not seen
+	    push @uniq_str, $key;
+	    $order++;
+	    $member{$key} = [ ($seq) ];
+	    $order{$key} = $order;
+	}
+    }
+
+    foreach my $str (sort {$order{$a} <=> $order{$b}} keys %order) { # sort by input order
+# convert leading/ending "?" back into "-" ("?" throws errors by SimpleAlign):
+	my $str2 = &_convert_leading_ending_gaps($str, '?', '-');
+# convert middle "?" back into "N" ("?" throws errors by SimpleAlign):
+	$str2 =~ s/\?/N/g if $str2 =~ /^[atcg\-\?]+$/i;
+	my $new = new Bio::LocatableSeq(-id=>"ST".$order{$str},
+					-seq=>$str2,
+					-start=>1,
+					-end=>length($str));
+	$aln->add_seq($new);
+#	print STDERR "ST".$order{$str}, "\t=>";
+	foreach (@{$member{$str}}) {
+        $self->debug($_->id(), "\t", "ST", $order{$str}, "\n");
+    }
+#	print STDERR "\n";
+    }
+    return $aln;
+}
+
+sub _check_uniq {  # check if same seq exists in the alignment
+    my ($str1, $ref, $length) = @_;
+    my @char1=split //, $str1;
+    my @array=@$ref;
+
+    return (0, $str1) if @array==0; # not seen (1st sequence)
+
+    foreach my $str2 (@array) {
+	my $diff=0;
+	my @char2=split //, $str2;
+	for (my $i=0; $i<=$length-1; $i++) {
+	    next if $char1[$i] eq '?';
+	    next if $char2[$i] eq '?';
+	    $diff++ if $char1[$i] ne $char2[$i];
+	}
+	return (1, $str2) if $diff == 0;  # seen before
+    }
+
+    return (0, $str1); # not seen
+}
+
+sub _convert_leading_ending_gaps {
+    my $s=shift;
+    my $sym1=shift;
+    my $sym2=shift;
+    my @array=split //, $s;
+# convert leading char:
+    for (my $i=0; $i<=$#array; $i++) {
+	($array[$i] eq $sym1) ? ($array[$i] = $sym2):(last);
+    }
+# convert ending char:
+    for (my $i = $#array; $i>= 0; $i--) {
+	($array[$i] eq $sym1) ? ($array[$i] = $sym2):(last);
+    }
+    my $s_new=join '', @array;
+    return $s_new;
+}
+
+=head1 Sequence selection methods
+
+Methods returning one or more sequences objects.
+
+=head2 each_seq
+
+ Title     : each_seq
+ Usage     : foreach $seq ( $align->each_seq() )
+ Function  : Gets a Seq object from the alignment
+ Returns   : Seq object
+ Argument  :
+
+=cut
+
+sub eachSeq {
+    my $self = shift;
+    $self->deprecated("eachSeq - deprecated method. Use each_seq() instead.");
+    $self->each_seq();
+}
+
+sub each_seq {
+	my $self = shift;
+	my (@arr,$order);
+
+	foreach $order ( sort { $a <=> $b } keys %{$self->{'_order'}} ) {
+		if( exists $self->{'_seq'}->{$self->{'_order'}->{$order}} ) {
+			push(@arr,$self->{'_seq'}->{$self->{'_order'}->{$order}});
+		}
+	}
+	return @arr;
+}
+
+
+=head2 each_alphabetically
+
+ Title     : each_alphabetically
+ Usage     : foreach $seq ( $ali->each_alphabetically() )
+ Function  : Returns a sequence object, but the objects are returned
+             in alphabetically sorted order.
+             Does not change the order of the alignment.
+ Returns   : Seq object
+ Argument  :
+
+=cut
+
+sub each_alphabetically {
+	my $self = shift;
+	my ($seq,$nse, at arr,%hash,$count);
+
+	foreach $seq ( $self->each_seq() ) {
+		$nse = $seq->get_nse;
+		$hash{$nse} = $seq;
+	}
+
+	foreach $nse ( sort _alpha_startend keys %hash) {
+		push(@arr,$hash{$nse});
+	}
+	return @arr;
+}
+
+sub _alpha_startend {
+    my ($aname,$astart,$bname,$bstart);
+    ($aname,$astart) = split (/-/,$a);
+    ($bname,$bstart) = split (/-/,$b);
+
+    if( $aname eq $bname ) {
+	return $astart <=> $bstart;
+    }
+    else {
+	return $aname cmp $bname;
+    }
+}
+
+=head2 each_seq_with_id
+
+ Title     : each_seq_with_id
+ Usage     : foreach $seq ( $align->each_seq_with_id() )
+ Function  : Gets a Seq objects from the alignment, the contents
+             being those sequences with the given name (there may be
+             more than one)
+ Returns   : Seq object
+ Argument  : a seq name
+
+=cut
+
+sub eachSeqWithId {
+    my $self = shift;
+    $self->deprecated("eachSeqWithId - deprecated method. Use each_seq_with_id() instead.");
+    $self->each_seq_with_id(@_);
+}
+
+sub each_seq_with_id {
+    my $self = shift;
+    my $id = shift;
+
+    $self->throw("Method each_seq_with_id needs a sequence name argument")
+	unless defined $id;
+
+    my (@arr, $seq);
+
+    if (exists($self->{'_start_end_lists'}->{$id})) {
+	@arr = @{$self->{'_start_end_lists'}->{$id}};
+    }
+    return @arr;
+}
+
+=head2 get_seq_by_pos
+
+ Title     : get_seq_by_pos
+ Usage     : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
+ Function  : Gets a sequence based on its position in the alignment.
+             Numbering starts from 1.  Sequence positions larger than
+             no_sequences() will thow an error.
+ Returns   : a Bio::LocatableSeq object
+ Args      : positive integer for the sequence osition
+
+=cut
+
+sub get_seq_by_pos {
+
+    my $self = shift;
+    my ($pos) = @_;
+
+    $self->throw("Sequence position has to be a positive integer, not [$pos]")
+	unless $pos =~ /^\d+$/ and $pos > 0;
+    $self->throw("No sequence at position [$pos]")
+	unless $pos <= $self->no_sequences ;
+
+    my $nse = $self->{'_order'}->{--$pos};
+    return $self->{'_seq'}->{$nse};
+}
+
+=head2 seq_with_features
+
+ Title   : seq_with_features
+ Usage   : $seq = $aln->seq_with_features(-pos => 1,
+                                          -consensus => 60
+                                          -mask =>
+           sub { my $consensus = shift;
+
+                 for my $i (1..5){
+                    my $n = 'N' x $i;
+                    my $q = '\?' x $i;
+                    while($consensus =~ /[^?]$q[^?]/){
+                       $consensus =~ s/([^?])$q([^?])/$1$n$2/;
+                    }
+                  }
+                 return $consensus;
+               }
+                                         );
+ Function: produces a Bio::Seq object by first splicing gaps from -pos
+           (by means of a splice_by_seq_pos() call), then creating
+           features using non-? chars (by means of a consensus_string()
+           call with stringency -consensus).
+ Returns : a Bio::Seq object
+ Args    : -pos : required. sequence from which to build the Bio::Seq
+             object
+           -consensus : optional, defaults to consensus_string()'s
+             default cutoff value
+           -mask : optional, a coderef to apply to consensus_string()'s
+             output before building features.  this may be useful for
+             closing gaps of 1 bp by masking over them with N, for
+             instance
+
+=cut
+
+sub seq_with_features{
+   my ($self,%arg) = @_;
+
+   #first do the preparatory splice
+   $self->throw("must provide a -pos argument") unless $arg{-pos};
+   $self->splice_by_seq_pos($arg{-pos});
+
+   my $consensus_string = $self->consensus_string($arg{-consensus});
+   $consensus_string = $arg{-mask}->($consensus_string)
+	 if defined($arg{-mask});
+
+   my(@bs, at es);
+
+   push @bs, 1 if $consensus_string =~ /^[^?]/;
+
+   while($consensus_string =~ /\?[^?]/g){
+	 push @bs, pos($consensus_string);
+   }
+   while($consensus_string =~ /[^?]\?/g){
+	 push @es, pos($consensus_string);
+   }
+
+   push @es, length($consensus_string) if $consensus_string =~ /[^?]$/;
+
+   my $seq = Bio::Seq->new();
+
+#   my $rootfeature = Bio::SeqFeature::Generic->new(
+#                -source_tag => 'location',
+#                -start      => $self->get_seq_by_pos($arg{-pos})->start,
+#                -end        => $self->get_seq_by_pos($arg{-pos})->end,
+#                                                  );
+#   $seq->add_SeqFeature($rootfeature);
+
+   while(my $b = shift @bs){
+	 my $e = shift @es;
+	 $seq->add_SeqFeature(
+       Bio::SeqFeature::Generic->new(
+         -start => $b - 1 + $self->get_seq_by_pos($arg{-pos})->start,
+         -end   => $e - 1 + $self->get_seq_by_pos($arg{-pos})->start,
+         -source_tag => $self->source || 'MSA',
+       )
+     );
+   }
+
+   return $seq;
+}
+
+
+=head1 Create new alignments
+
+The result of these methods are horizontal or vertical subsets of the
+current MSA.
+
+=head2 select
+
+ Title     : select
+ Usage     : $aln2 = $aln->select(1, 3) # three first sequences
+ Function  : Creates a new alignment from a continuous subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will thow an error.
+ Returns   : a Bio::SimpleAlign object
+ Args      : positive integer for the first sequence
+             positive integer for the last sequence to include (optional)
+
+=cut
+
+sub select {
+    my $self = shift;
+    my ($start, $end) = @_;
+
+    $self->throw("Select start has to be a positive integer, not [$start]")
+	unless $start =~ /^\d+$/ and $start > 0;
+    $self->throw("Select end has to be a positive integer, not [$end]")
+	unless $end  =~ /^\d+$/ and $end > 0;
+    $self->throw("Select $start [$start] has to be smaller than or equal to end [$end]")
+	unless $start <= $end;
+
+    my $aln = $self->new;
+    foreach my $pos ($start .. $end) {
+	$aln->add_seq($self->get_seq_by_pos($pos));
+    }
+    $aln->id($self->id);
+    return $aln;
+}
+
+=head2 select_noncont
+
+ Title     : select_noncont
+ Usage     : $aln2 = $aln->select_noncont(1, 3) # 1st and 3rd sequences
+ Function  : Creates a new alignment from a subset of
+             sequences.  Numbering starts from 1.  Sequence positions
+             larger than no_sequences() will thow an error.
+ Returns   : a Bio::SimpleAlign object
+ Args      : array of integers for the sequences
+
+=cut
+
+sub select_noncont {
+	my $self = shift;
+	my (@pos) = @_;
+	my $end = $self->no_sequences;
+	@pos = sort @pos;
+	foreach ( @pos ) {
+		$self->throw("position must be a positive integer, > 0 and <= $end not [$_]")
+		  unless( /^\d+$/ && $_ > 0 && $_ <= $end );
+	}
+	my $aln = $self->new;
+	foreach my $p (@pos) {
+		$aln->add_seq($self->get_seq_by_pos($p));
+	}
+	$aln->id($self->id);
+	return $aln;
+}
+
+=head2 slice
+
+ Title     : slice
+ Usage     : $aln2 = $aln->slice(20,30)
+ Function  : Creates a slice from the alignment inclusive of start and
+             end columns, and the first column in the alignment is denoted 1.
+             Sequences with no residues in the slice are excluded from the
+             new alignment and a warning is printed. Slice beyond the length of
+             the sequence does not do padding.
+ Returns   : A Bio::SimpleAlign object
+ Args      : Positive integer for start column, positive integer for end column,
+             optional boolean which if true will keep gap-only columns in the newly
+             created slice. Example:
+
+             $aln2 = $aln->slice(20,30,1)
+
+=cut
+
+sub slice {
+	my $self = shift;
+	my ($start, $end, $keep_gap_only) = @_;
+
+	$self->throw("Slice start has to be a positive integer, not [$start]")
+	  unless $start =~ /^\d+$/ and $start > 0;
+	$self->throw("Slice end has to be a positive integer, not [$end]")
+	  unless $end =~ /^\d+$/ and $end > 0;
+	$self->throw("Slice $start [$start] has to be smaller than or equal to end [$end]")
+	  unless $start <= $end;
+	$self->throw("This alignment has only ". $self->length . " residues. Slice start " .
+					 "[$start] is too big.") if $start > $self->length;
+
+	my $aln = $self->new;
+	$aln->id($self->id);
+	foreach my $seq ( $self->each_seq() ) {
+		my $new_seq = Bio::LocatableSeq->new(-id      => $seq->id,
+														 -strand  => $seq->strand,
+														 -verbose => $self->verbose);
+		# seq
+		my $seq_end = $end;
+		$seq_end = $seq->length if( $end > $seq->length );
+
+		my $slice_seq = $seq->subseq($start, $seq_end);
+		$new_seq->seq( $slice_seq );
+
+		$slice_seq =~ s/\W//g;
+		
+		if ($start > 1) {
+			my $pre_start_seq = $seq->subseq(1, $start - 1);
+			$pre_start_seq =~ s/\W//g;
+			if (!defined($seq->strand)) {
+				$new_seq->start( $seq->start + CORE::length($pre_start_seq) );
+			} elsif ($seq->strand < 0){
+				$new_seq->start( $seq->end - CORE::length($pre_start_seq) - CORE::length($slice_seq) + 1);
+			} else {
+			$new_seq->start( $seq->start + CORE::length($pre_start_seq)  );
+			}
+		} else {
+			$new_seq->start( $seq->start);
+		}
+		$new_seq->end( $new_seq->start + CORE::length($slice_seq) - 1 );
+
+		if ($new_seq->start and $new_seq->end >= $new_seq->start) {
+			$aln->add_seq($new_seq);
+		} else {
+			if( $keep_gap_only ) {
+				$aln->add_seq($new_seq);
+			} else {
+				my $nse = $seq->get_nse();
+				$self->warn("Slice [$start-$end] of sequence [$nse] contains no residues.".
+								" Sequence excluded from the new alignment.");
+			}
+		}
+	}
+
+	return $aln;
+}
+
+=head2 remove_columns
+
+ Title     : remove_columns
+ Usage     : $aln2 = $aln->remove_columns(['mismatch','weak']) or
+             $aln2 = $aln->remove_columns([0,0],[6,8])
+ Function  : Creates an aligment with columns removed corresponding to
+             the specified type or by specifying the columns by number.
+ Returns   : Bio::SimpleAlign object
+ Args      : Array ref of types ('match'|'weak'|'strong'|'mismatch'|'gaps'|
+             'all_gaps_columns') or array ref where the referenced array
+             contains a pair of integers that specify a range.
+             The first column is 0,
+
+=cut
+
+sub remove_columns {
+	my ($self, at args) = @_;
+	@args || return $self;
+   my $aln;
+
+	if ($args[0][0] =~ /^[a-z]+$/i) {
+		 $aln = $self->_remove_columns_by_type($args[0]);
+	} elsif ($args[0][0] =~ /^\d+$/) {
+       $aln = $self->_remove_columns_by_num(\@args);
+	} else {
+		 $self->throw("You must pass array references to remove_columns(), not @args");
+	}
+   $aln;
+}
+
+
+=head2 remove_gaps
+
+ Title     : remove_gaps
+ Usage     : $aln2 = $aln->remove_gaps
+ Function  : Creates an aligment with gaps removed
+ Returns   : a Bio::SimpleAlign object
+ Args      : a gap character(optional) if none specified taken
+                from $self->gap_char,
+             [optional] $all_gaps_columns flag (1 or 0, default is 0)
+                        indicates that only all-gaps columns should be deleted
+
+Used from method L<remove_columns> in most cases. Set gap character
+using L<gap_char()|gap_char>.
+
+=cut
+
+sub remove_gaps {
+    my ($self,$gapchar,$all_gaps_columns) = @_;
+    my $gap_line;
+    if ($all_gaps_columns) {
+        $gap_line = $self->all_gap_line($gapchar);
+    } else {
+        $gap_line = $self->gap_line($gapchar);
+    }
+    my $aln = $self->new;
+
+    my @remove;
+    my $length = 0;
+    my $del_char = $gapchar || $self->gap_char;
+    # Do the matching to get the segments to remove
+    while ($gap_line =~ m/[$del_char]/g) {
+        my $start = pos($gap_line)-1;
+        $gap_line=~/\G[$del_char]+/gc;
+        my $end = pos($gap_line)-1;
+
+        #have to offset the start and end for subsequent removes
+        $start-=$length;
+        $end  -=$length;
+        $length += ($end-$start+1);
+        push @remove, [$start,$end];
+    }
+
+    #remove the segments
+    $aln = $#remove >= 0 ? $self->_remove_col($aln,\@remove) : $self;
+    return $aln;
+}
+
+
+sub _remove_col {
+    my ($self,$aln,$remove) = @_;
+    my @new;
+
+    # splice out the segments and create new seq
+    foreach my $seq($self->each_seq){
+        my $new_seq = new Bio::LocatableSeq(
+						 -id      => $seq->id,
+					    -strand  => $seq->strand,
+					    -verbose => $self->verbose);
+        my $sequence = $seq->seq;
+        foreach my $pair(@{$remove}){
+            my $start = $pair->[0];
+            my $end   = $pair->[1];
+            $sequence = $seq->seq unless $sequence;
+            my $spliced;
+            $spliced .= $start > 0 ? substr($sequence,0,$start) : '';
+            $spliced .= substr($sequence,$end+1,$seq->length-$end+1);
+            $sequence = $spliced;
+            if ($start == 1) {
+              $new_seq->start($end);
+            }
+            else {
+              $new_seq->start( $seq->start);
+            }
+            # end
+            if($end >= $seq->end){
+             $new_seq->end( $start);
+            }
+            else {
+             $new_seq->end($seq->end);
+            }
+        }
+        $new_seq->seq($sequence) if $sequence;
+		  push @new, $new_seq;
+    }
+    # add the new seqs to the alignment
+    foreach my $new(@new){
+        $aln->add_seq($new);
+    }
+    return $aln;
+}
+
+sub _remove_columns_by_type {
+	my ($self,$type) = @_;
+	my $aln = $self->new;
+	my @remove;
+
+	my $gap = $self->gap_char if (grep { $_ eq 'gaps'} @{$type});
+	my $all_gaps_columns = $self->gap_char if (grep /all_gaps_columns/,@{$type});
+	my %matchchars = ( 'match'           => '\*',
+                       'weak'             => '\.',
+                       'strong'           => ':',
+                       'mismatch'         => ' ',
+                       'gaps'             => '',
+                       'all_gaps_columns' => ''
+                     );
+	# get the characters to delete against
+	my $del_char;
+	foreach my $type (@{$type}){
+		$del_char.= $matchchars{$type};
+	}
+
+	my $length = 0;
+	my $match_line = $self->match_line;
+	# do the matching to get the segments to remove
+	if($del_char){
+		while($match_line =~ m/[$del_char]/g ){
+			my $start = pos($match_line)-1;
+			$match_line=~/\G[$del_char]+/gc;
+			my $end = pos($match_line)-1;
+
+			#have to offset the start and end for subsequent removes
+			$start-=$length;
+			$end  -=$length;
+			$length += ($end-$start+1);
+			push @remove, [$start,$end];
+		}
+	}
+
+	# remove the segments
+	$aln = $#remove >= 0 ? $self->_remove_col($aln,\@remove) : $self;
+	$aln = $aln->remove_gaps() if $gap;
+	$aln = $aln->remove_gaps('', 1) if $all_gaps_columns;
+
+	$aln;
+}
+
+
+sub _remove_columns_by_num {
+	my ($self,$positions) = @_;
+	my $aln = $self->new;
+
+	# sort the positions to remove columns at the end 1st
+	@$positions = sort { $b->[0] <=> $a->[0] } @$positions;
+	$aln = $self->_remove_col($aln,$positions);
+	$aln;
+}
+
+
+=head1 Change sequences within the MSA
+
+These methods affect characters in all sequences without changing the
+alignment.
+
+=head2 splice_by_seq_pos
+
+ Title   : splice_by_seq_pos
+ Usage   : $status = splice_by_seq_pos(1);
+ Function: splices all aligned sequences where the specified sequence
+           has gaps.
+ Example :
+ Returns : 1 on success
+ Args    : position of sequence to splice by
+
+
+=cut
+
+sub splice_by_seq_pos{
+  my ($self,$pos) = @_;
+
+  my $guide = $self->get_seq_by_pos($pos);
+  my $guide_seq = $guide->seq;
+
+  $guide_seq =~ s/\./\-/g;
+
+  my @gaps = ();
+  $pos = -1;
+  while(($pos = index($guide_seq, '-', $pos)) > -1 ){
+    unshift @gaps, $pos;
+    $pos++;
+  }
+
+  foreach my $seq ($self->each_seq){
+    my @bases = split '', $seq->seq;
+
+    splice(@bases, $_, 1) foreach @gaps;
+    $seq->seq(join('', @bases));
+  }
+
+  1;
+}
+
+=head2 map_chars
+
+ Title     : map_chars
+ Usage     : $ali->map_chars('\.','-')
+ Function  : Does a s/$arg1/$arg2/ on the sequences. Useful for gap
+             characters
+
+             Notice that the from (arg1) is interpretted as a regex,
+             so be careful about quoting meta characters (eg
+             $ali->map_chars('.','-') wont do what you want)
+ Returns   :
+ Argument  : 'from' rexexp
+             'to' string
+
+=cut
+
+sub map_chars {
+    my $self = shift;
+    my $from = shift;
+    my $to   = shift;
+    my ($seq,$temp);
+
+    $self->throw("Need exactly two arguments")
+	unless defined $from and defined $to;
+
+    foreach $seq ( $self->each_seq() ) {
+	$temp = $seq->seq();
+	$temp =~ s/$from/$to/g;
+	$seq->seq($temp);
+    }
+    return 1;
+}
+
+
+=head2 uppercase
+
+ Title     : uppercase()
+ Usage     : $ali->uppercase()
+ Function  : Sets all the sequences to uppercase
+ Returns   :
+ Argument  :
+
+=cut
+
+sub uppercase {
+    my $self = shift;
+    my $seq;
+    my $temp;
+
+    foreach $seq ( $self->each_seq() ) {
+      $temp = $seq->seq();
+      $temp =~ tr/[a-z]/[A-Z]/;
+
+      $seq->seq($temp);
+    }
+    return 1;
+}
+
+=head2 cigar_line
+
+ Title    : cigar_line()
+ Usage    : %cigars = $align->cigar_line()
+ Function : Generates a "cigar" (Compact Idiosyncratic Gapped Alignment
+            Report) line for each sequence in the alignment. Examples are
+            "1,60" or "5,10:12,58", where the numbers refer to conserved
+            positions within the alignment. The keys of the hash are the
+            NSEs (name/start/end) assigned to each sequence.
+ Args     : none
+ Returns  : Hash of strings (cigar lines)
+
+=cut
+
+sub cigar_line {
+	my $self = shift;
+	my %cigars;
+
+	my @consensus = split "",($self->consensus_string(100));
+	my $len = $self->length;
+	my $gapchar = $self->gap_char;
+
+	# create a precursor, something like (1,4,5,6,7,33,45),
+	# where each number corresponds to a conserved position
+	foreach my $seq ( $self->each_seq ) {
+		my @seq = split "", uc ($seq->seq);
+		my $pos = 1;
+		for (my $x = 0 ; $x < $len ; $x++ ) {
+			if ($seq[$x] eq $consensus[$x]) {
+				push @{$cigars{$seq->get_nse}},$pos;
+				$pos++;
+			} elsif ($seq[$x] ne $gapchar) {
+				$pos++;
+			}
+		}
+	}
+	# duplicate numbers - (1,4,5,6,7,33,45) becomes (1,1,4,5,6,7,33,33,45,45)
+	for my $name (keys %cigars) {
+		splice @{$cigars{$name}}, 1, 0, ${$cigars{$name}}[0] if
+		  ( ${$cigars{$name}}[0] + 1 < ${$cigars{$name}}[1] );
+      push @{$cigars{$name}}, ${$cigars{$name}}[$#{$cigars{$name}}] if
+           ( ${$cigars{$name}}[($#{$cigars{$name}} - 1)] + 1 <
+		          ${$cigars{$name}}[$#{$cigars{$name}}] );
+		for ( my $x = 1 ; $x < $#{$cigars{$name}} - 1 ; $x++) {
+			if (${$cigars{$name}}[$x - 1] + 1 < ${$cigars{$name}}[$x]  &&
+		       ${$cigars{$name}}[$x + 1]  > ${$cigars{$name}}[$x] + 1) {
+	         splice @{$cigars{$name}}, $x, 0, ${$cigars{$name}}[$x];
+			}
+      }
+	}
+  # collapse series - (1,1,4,5,6,7,33,33,45,45) becomes (1,1,4,7,33,33,45,45)
+  for my $name (keys %cigars) {
+	  my @remove;
+	  for ( my $x = 0 ; $x < $#{$cigars{$name}} ; $x++) {
+		   if ( ${$cigars{$name}}[$x] == ${$cigars{$name}}[($x - 1)] + 1 &&
+			     ${$cigars{$name}}[$x] == ${$cigars{$name}}[($x + 1)] - 1 ) {
+		      unshift @remove,$x;
+	      }
+	   }
+      for my $pos (@remove) {
+		  	splice @{$cigars{$name}}, $pos, 1;
+	   }
+   }
+   # join and punctuate
+   for my $name (keys %cigars) {
+ 	  my ($start,$end,$str) = "";
+ 	  while ( ($start,$end) = splice @{$cigars{$name}}, 0, 2 ) {
+ 		  $str .= ($start . "," . $end . ":");
+ 	  }
+ 	  $str =~ s/:$//;
+      $cigars{$name} = $str;
+   }
+   %cigars;
+}
+
+
+=head2 match_line
+
+ Title    : match_line()
+ Usage    : $line = $align->match_line()
+ Function : Generates a match line - much like consensus string
+            except that a line indicating the '*' for a match.
+ Args     : (optional) Match line characters ('*' by default)
+            (optional) Strong match char (':' by default)
+            (optional) Weak match char ('.' by default)
+ Returns  : String
+
+=cut
+
+sub match_line {
+	my ($self,$matchlinechar, $strong, $weak) = @_;
+	my %matchchars = ('match'    => $matchlinechar || '*',
+							  'weak'     => $weak          || '.',
+							  'strong'   => $strong        || ':',
+							  'mismatch' => ' ',
+						  );
+
+	my @seqchars;
+	my $alphabet;
+	foreach my $seq ( $self->each_seq ) {
+		push @seqchars, [ split(//, uc ($seq->seq)) ];
+		$alphabet = $seq->alphabet unless defined $alphabet;
+	}
+	my $refseq = shift @seqchars;
+	# let's just march down the columns
+	my $matchline;
+ POS:
+	foreach my $pos ( 0..$self->length ) {
+		my $refchar = $refseq->[$pos];
+		my $char = $matchchars{'mismatch'};
+		unless( defined $refchar ) {
+			last if $pos == $self->length; # short circuit on last residue
+			# this in place to handle jason's soon-to-be-committed
+			# intron mapping code
+			goto bottom;
+		}
+		my %col = ($refchar => 1);
+		my $dash = ($refchar eq '-' || $refchar eq '.' || $refchar eq ' ');
+		foreach my $seq ( @seqchars ) {
+			next if $pos >= scalar @$seq;
+			$dash = 1 if( $seq->[$pos] eq '-' || $seq->[$pos] eq '.' ||
+							  $seq->[$pos] eq ' ' );
+			$col{$seq->[$pos]}++ if defined $seq->[$pos];
+		}
+		my @colresidues = sort keys %col;
+
+		# if all the values are the same
+		if( $dash ) { $char =  $matchchars{'mismatch'} }
+		elsif( @colresidues == 1 ) { $char = $matchchars{'match'} }
+		elsif( $alphabet eq 'protein' ) { # only try to do weak/strong
+			# matches for protein seqs
+	    TYPE:
+			foreach my $type ( qw(strong weak) ) {
+				# iterate through categories
+				my %groups;
+				# iterate through each of the aa in the col
+				# look to see which groups it is in
+				foreach my $c ( @colresidues ) {
+					foreach my $f ( grep { index($_,$c) >= 0 } @{$CONSERVATION_GROUPS{$type}} ) {
+						push @{$groups{$f}},$c;
+					}
+				}
+			 GRP:
+				foreach my $cols ( values %groups ) {
+					@$cols = sort @$cols;
+					# now we are just testing to see if two arrays
+					# are identical w/o changing either one
+					# have to be same len
+					next if( scalar @$cols != scalar @colresidues );
+					# walk down the length and check each slot
+					for($_=0;$_ < (scalar @$cols);$_++ ) {
+						next GRP if( $cols->[$_] ne $colresidues[$_] );
+					}
+					$char = $matchchars{$type};
+					last TYPE;
+				}
+			}
+		}
+	 bottom:
+		$matchline .= $char;
+	}
+	return $matchline;
+}
+
+
+=head2 gap_line
+
+ Title    : gap_line()
+ Usage    : $line = $align->gap_line()
+ Function : Generates a gap line - much like consensus string
+            except that a line where '-' represents gap
+ Args     : (optional) gap line characters ('-' by default)
+ Returns  : string
+
+=cut
+
+sub gap_line {
+    my ($self,$gapchar) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh; # column gaps vector
+    foreach my $seq ( $self->each_seq ) {
+		my $i = 0;
+    	map {$gap_hsh{$_->[0]} = undef} grep {$_->[1] eq $gapchar}
+		  map {[$i++, $_]} split(//, uc ($seq->seq));
+    }
+    my $gap_line;
+    foreach my $pos ( 0..$self->length-1 ) {
+	  $gap_line .= (exists $gap_hsh{$pos}) ? $gapchar:'.';
+    }
+    return $gap_line;
+}
+
+=head2 all_gap_line
+
+ Title    : all_gap_line()
+ Usage    : $line = $align->all_gap_line()
+ Function : Generates a gap line - much like consensus string
+            except that a line where '-' represents all-gap column
+ Args     : (optional) gap line characters ('-' by default)
+ Returns  : string
+
+=cut
+
+sub all_gap_line {
+    my ($self,$gapchar) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh;		# column gaps counter hash
+    my @seqs = $self->each_seq;
+    foreach my $seq ( @seqs ) {
+	my $i = 0;
+    	map {$gap_hsh{$_->[0]}++} grep {$_->[1] eq $gapchar}
+	map {[$i++, $_]} split(//, uc ($seq->seq));
+    }
+    my $gap_line;
+    foreach my $pos ( 0..$self->length-1 ) {
+	if (exists $gap_hsh{$pos} && $gap_hsh{$pos} == scalar @seqs) {
+            # gaps column
+	    $gap_line .= $gapchar;
+	} else {
+	    $gap_line .= '.';
+	}
+    }
+    return $gap_line;
+}
+
+=head2 gap_col_matrix
+
+ Title    : gap_col_matrix()
+ Usage    : my $cols = $align->gap_col_matrix()
+ Function : Generates an array of hashes where
+            each entry in the array is a hash reference
+            with keys of all the sequence names and
+            and value of 1 or 0 if the sequence has a gap at that column
+ Args     : (optional) gap line characters ($aln->gap_char or '-' by default)
+
+=cut
+
+sub gap_col_matrix {
+    my ($self,$gapchar) = @_;
+    $gapchar = $gapchar || $self->gap_char;
+    my %gap_hsh; # column gaps vector
+    my @cols;
+    foreach my $seq ( $self->each_seq ) {
+	my $i = 0;
+	my $str = $seq->seq;
+	my $len = $seq->length;
+	my $ch;
+	my $id = $seq->display_id;
+	while( $i < $len ) {
+	    $ch = substr($str, $i, 1);
+	    $cols[$i++]->{$id} = ($ch eq $gapchar);
+	}
+    }
+    return \@cols;
+}
+
+=head2 match
+
+ Title     : match()
+ Usage     : $ali->match()
+ Function  : Goes through all columns and changes residues that are
+             identical to residue in first sequence to match '.'
+             character. Sets match_char.
+
+             USE WITH CARE: Most MSA formats do not support match
+             characters in sequences, so this is mostly for output
+             only. NEXUS format (Bio::AlignIO::nexus) can handle
+             it.
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+=cut
+
+sub match {
+    my ($self, $match) = @_;
+
+    $match ||= '.';
+    my ($matching_char) = $match;
+    $matching_char = "\\$match" if $match =~ /[\^.$|()\[\]]/ ;  #';
+    $self->map_chars($matching_char, '-');
+
+    my @seqs = $self->each_seq();
+    return 1 unless scalar @seqs > 1;
+
+    my $refseq = shift @seqs ;
+    my @refseq = split //, $refseq->seq;
+    my $gapchar = $self->gap_char;
+
+    foreach my $seq ( @seqs ) {
+	my @varseq = split //, $seq->seq();
+	for ( my $i=0; $i < scalar @varseq; $i++) {
+	    $varseq[$i] = $match if defined $refseq[$i] &&
+		( $refseq[$i] =~ /[A-Za-z\*]/ ||
+		  $refseq[$i] =~ /$gapchar/ )
+		      && $refseq[$i] eq $varseq[$i];
+	}
+	$seq->seq(join '', @varseq);
+    }
+    $self->match_char($match);
+    return 1;
+}
+
+
+=head2 unmatch
+
+ Title     : unmatch()
+ Usage     : $ali->unmatch()
+ Function  : Undoes the effect of method match. Unsets match_char.
+ Returns   : 1
+ Argument  : a match character, optional, defaults to '.'
+
+See L<match> and L<match_char>
+
+=cut
+
+sub unmatch {
+    my ($self, $match) = @_;
+
+    $match ||= '.';
+
+    my @seqs = $self->each_seq();
+    return 1 unless scalar @seqs > 1;
+
+    my $refseq = shift @seqs ;
+    my @refseq = split //, $refseq->seq;
+    my $gapchar = $self->gap_char;
+    foreach my $seq ( @seqs ) {
+	my @varseq = split //, $seq->seq();
+	for ( my $i=0; $i < scalar @varseq; $i++) {
+	    $varseq[$i] = $refseq[$i] if defined $refseq[$i] &&
+		( $refseq[$i] =~ /[A-Za-z\*]/ ||
+		  $refseq[$i] =~ /$gapchar/ ) &&
+		      $varseq[$i] eq $match;
+	}
+	$seq->seq(join '', @varseq);
+    }
+    $self->match_char('');
+    return 1;
+}
+
+=head1 MSA attibutes
+
+Methods for setting and reading the MSA attributes.
+
+Note that the methods defining character semantics depend on the user
+to set them sensibly.  They are needed only by certain input/output
+methods. Unset them by setting to an empty string ('').
+
+=head2 id
+
+ Title     : id
+ Usage     : $myalign->id("Ig")
+ Function  : Gets/sets the id field of the alignment
+ Returns   : An id string
+ Argument  : An id string (optional)
+
+=cut
+
+sub id {
+    my ($self, $name) = @_;
+
+    if (defined( $name )) {
+	$self->{'_id'} = $name;
+    }
+
+    return $self->{'_id'};
+}
+
+=head2 accession
+
+ Title     : accession
+ Usage     : $myalign->accession("PF00244")
+ Function  : Gets/sets the accession field of the alignment
+ Returns   : An acc string
+ Argument  : An acc string (optional)
+
+=cut
+
+sub accession {
+    my ($self, $acc) = @_;
+
+    if (defined( $acc )) {
+	$self->{'_accession'} = $acc;
+    }
+
+    return $self->{'_accession'};
+}
+
+=head2 description
+
+ Title     : description
+ Usage     : $myalign->description("14-3-3 proteins")
+ Function  : Gets/sets the description field of the alignment
+ Returns   : An description string
+ Argument  : An description string (optional)
+
+=cut
+
+sub description {
+    my ($self, $name) = @_;
+
+    if (defined( $name )) {
+	$self->{'_description'} = $name;
+    }
+
+    return $self->{'_description'};
+}
+
+=head2 missing_char
+
+ Title     : missing_char
+ Usage     : $myalign->missing_char("?")
+ Function  : Gets/sets the missing_char attribute of the alignment
+             It is generally recommended to set it to 'n' or 'N'
+             for nucleotides and to 'X' for protein.
+ Returns   : An missing_char string,
+ Argument  : An missing_char string (optional)
+
+=cut
+
+sub missing_char {
+    my ($self, $char) = @_;
+
+    if (defined $char ) {
+	$self->throw("Single missing character, not [$char]!") if CORE::length($char) > 1;
+	$self->{'_missing_char'} = $char;
+    }
+
+    return $self->{'_missing_char'};
+}
+
+=head2 match_char
+
+ Title     : match_char
+ Usage     : $myalign->match_char('.')
+ Function  : Gets/sets the match_char attribute of the alignment
+ Returns   : An match_char string,
+ Argument  : An match_char string (optional)
+
+=cut
+
+sub match_char {
+    my ($self, $char) = @_;
+
+    if (defined $char ) {
+	$self->throw("Single match character, not [$char]!") if CORE::length($char) > 1;
+	$self->{'_match_char'} = $char;
+    }
+
+    return $self->{'_match_char'};
+}
+
+=head2 gap_char
+
+ Title     : gap_char
+ Usage     : $myalign->gap_char('-')
+ Function  : Gets/sets the gap_char attribute of the alignment
+ Returns   : An gap_char string, defaults to '-'
+ Argument  : An gap_char string (optional)
+
+=cut
+
+sub gap_char {
+    my ($self, $char) = @_;
+
+    if (defined $char || ! defined $self->{'_gap_char'} ) {
+	$char= '-' unless defined $char;
+	$self->throw("Single gap character, not [$char]!") if CORE::length($char) > 1;
+	$self->{'_gap_char'} = $char;
+    }
+    return $self->{'_gap_char'};
+}
+
+=head2 symbol_chars
+
+ Title   : symbol_chars
+ Usage   : my @symbolchars = $aln->symbol_chars;
+ Function: Returns all the seen symbols (other than gaps)
+ Returns : array of characters that are the seen symbols
+ Args    : boolean to include the gap/missing/match characters
+
+=cut
+
+sub symbol_chars{
+   my ($self,$includeextra) = @_;
+
+   unless ($self->{'_symbols'}) {
+       foreach my $seq ($self->each_seq) {
+           map { $self->{'_symbols'}->{$_} = 1; } split(//,$seq->seq);
+       }
+   }
+   my %copy = %{$self->{'_symbols'}};
+   if( ! $includeextra ) {
+       foreach my $char ( $self->gap_char, $self->match_char,
+			  $self->missing_char) {
+	   delete $copy{$char} if( defined $char );
+       }
+   }
+   return keys %copy;
+}
+
+=head1 Alignment descriptors
+
+These read only methods describe the MSA in various ways.
+
+
+=head2 score
+
+ Title     : score
+ Usage     : $str = $ali->score()
+ Function  : get/set a score of the alignment
+ Returns   : a score for the alignment
+ Argument  : an optional score to set
+
+=cut
+
+sub score {
+  my $self = shift;
+  $self->{score} = shift if @_;
+  return $self->{score};
+}
+
+=head2 consensus_string
+
+ Title     : consensus_string
+ Usage     : $str = $ali->consensus_string($threshold_percent)
+ Function  : Makes a strict consensus
+ Returns   : Consensus string
+ Argument  : Optional treshold ranging from 0 to 100.
+             The consensus residue has to appear at least threshold %
+             of the sequences at a given location, otherwise a '?'
+             character will be placed at that location.
+             (Default value = 0%)
+
+=cut
+
+sub consensus_string {
+    my $self = shift;
+    my $threshold = shift;
+
+    my $out = "";
+    my $len = $self->length - 1;
+
+    foreach ( 0 .. $len ) {
+	$out .= $self->_consensus_aa($_,$threshold);
+    }
+    return $out;
+}
+
+sub _consensus_aa {
+    my $self = shift;
+    my $point = shift;
+    my $threshold_percent = shift || -1 ;
+    my ($seq,%hash,$count,$letter,$key);
+    my $gapchar = $self->gap_char;
+    foreach $seq ( $self->each_seq() ) {
+	$letter = substr($seq->seq,$point,1);
+	$self->throw("--$point-----------") if $letter eq '';
+	($letter eq $gapchar || $letter =~ /\./) && next;
+	# print "Looking at $letter\n";
+	$hash{$letter}++;
+    }
+    my $number_of_sequences = $self->no_sequences();
+    my $threshold = $number_of_sequences * $threshold_percent / 100. ;
+    $count = -1;
+    $letter = '?';
+
+    foreach $key ( sort keys %hash ) {
+	# print "Now at $key $hash{$key}\n";
+	if( $hash{$key} > $count && $hash{$key} >= $threshold) {
+	    $letter = $key;
+	    $count = $hash{$key};
+	}
+    }
+    return $letter;
+}
+
+
+=head2 consensus_iupac
+
+ Title     : consensus_iupac
+ Usage     : $str = $ali->consensus_iupac()
+ Function  : Makes a consensus using IUPAC ambiguity codes from DNA
+             and RNA. The output is in upper case except when gaps in
+             a column force output to be in lower case.
+
+             Note that if your alignment sequences contain a lot of
+             IUPAC ambiquity codes you often have to manually set
+             alphabet.  Bio::PrimarySeq::_guess_type thinks they
+             indicate a protein sequence.
+ Returns   : consensus string
+ Argument  : none
+ Throws    : on protein sequences
+
+=cut
+
+sub consensus_iupac {
+    my $self = shift;
+    my $out = "";
+    my $len = $self->length-1;
+
+    # only DNA and RNA sequences are valid
+    foreach my $seq ( $self->each_seq() ) {
+	$self->throw("Seq [". $seq->get_nse. "] is a protein")
+	    if $seq->alphabet eq 'protein';
+    }
+    # loop over the alignment columns
+    foreach my $count ( 0 .. $len ) {
+	$out .= $self->_consensus_iupac($count);
+    }
+    return $out;
+}
+
+sub _consensus_iupac {
+    my ($self, $column) = @_;
+    my ($string, $char, $rna);
+
+    #determine all residues in a column
+    foreach my $seq ( $self->each_seq() ) {
+	$string .= substr($seq->seq, $column, 1);
+    }
+    $string = uc $string;
+
+    # quick exit if there's an N in the string
+    if ($string =~ /N/) {
+	$string =~ /\W/ ? return 'n' : return 'N';
+    }
+    # ... or if there are only gap characters
+    return '-' if $string =~ /^\W+$/;
+
+    # treat RNA as DNA in regexps
+    if ($string =~ /U/) {
+	$string =~ s/U/T/;
+	$rna = 1;
+    }
+
+    # the following s///'s only need to be done to the _first_ ambiguity code
+    # as we only need to see the _range_ of characters in $string
+
+    if ($string =~ /[VDHB]/) {
+	$string =~ s/V/AGC/;
+	$string =~ s/D/AGT/;
+	$string =~ s/H/ACT/;
+	$string =~ s/B/CTG/;
+    }
+
+    if ($string =~ /[SKYRWM]/) {
+	$string =~ s/S/GC/;
+	$string =~ s/K/GT/;
+	$string =~ s/Y/CT/;
+	$string =~ s/R/AG/;
+	$string =~ s/W/AT/;
+	$string =~ s/M/AC/;
+    }
+
+    # and now the guts of the thing
+
+    if ($string =~ /A/) {
+        $char = 'A';                     # A                      A
+        if ($string =~ /G/) {
+            $char = 'R';                 # A and G (purines)      R
+            if ($string =~ /C/) {
+                $char = 'V';             # A and G and C          V
+                if ($string =~ /T/) {
+                    $char = 'N';         # A and G and C and T    N
+                }
+            } elsif ($string =~ /T/) {
+                $char = 'D';             # A and G and T          D
+            }
+        } elsif ($string =~ /C/) {
+            $char = 'M';                 # A and C                M
+            if ($string =~ /T/) {
+                $char = 'H';             # A and C and T          H
+            }
+        } elsif ($string =~ /T/) {
+            $char = 'W';                 # A and T                W
+        }
+    } elsif ($string =~ /C/) {
+        $char = 'C';                     # C                      C
+        if ($string =~ /T/) {
+            $char = 'Y';                 # C and T (pyrimidines)  Y
+            if ($string =~ /G/) {
+                $char = 'B';             # C and T and G          B
+            }
+        } elsif ($string =~ /G/) {
+            $char = 'S';                 # C and G                S
+        }
+    } elsif ($string =~ /G/) {
+        $char = 'G';                     # G                      G
+        if ($string =~ /C/) {
+            $char = 'S';                 # G and C                S
+        } elsif ($string =~ /T/) {
+            $char = 'K';                 # G and T                K
+        }
+    } elsif ($string =~ /T/) {
+        $char = 'T';                     # T                      T
+    }
+
+    $char = 'U' if $rna and $char eq 'T';
+    $char = lc $char if $string =~ /\W/;
+
+    return $char;
+}
+
+
+=head2 consensus_meta
+
+ Title     : consensus_meta
+ Usage     : $seqmeta = $ali->consensus_meta()
+ Function  : Returns a Bio::Seq::Meta object containing the consensus
+             strings derived from meta data analysis.
+ Returns   : Bio::Seq::Meta 
+ Argument  : Bio::Seq::Meta 
+ Throws    : non-MetaI object
+
+=cut
+
+sub consensus_meta {
+    my ($self, $meta) = @_;
+    if ($meta and !$meta->isa('Bio::Seq::MetaI')) {
+        $self->throw('Not a Bio::Seq::MetaI object');
+    }
+    return $self->{'_aln_meta'} = $meta if $meta;
+    return $self->{'_aln_meta'} 
+}
+
+=head2 is_flush
+
+ Title     : is_flush
+ Usage     : if ( $ali->is_flush() )
+ Function  : Tells you whether the alignment
+           : is flush, i.e. all of the same length
+ Returns   : 1 or 0
+ Argument  :
+
+=cut
+
+sub is_flush {
+    my ($self,$report) = @_;
+    my $seq;
+    my $length = (-1);
+    my $temp;
+
+    foreach $seq ( $self->each_seq() ) {
+	if( $length == (-1) ) {
+	    $length = CORE::length($seq->seq());
+	    next;
+	}
+
+	$temp = CORE::length($seq->seq());
+	if( $temp != $length ) {
+	    $self->warn("expecting $length not $temp from ".
+			$seq->display_id) if( $report );
+	    $self->debug("expecting $length not $temp from ".
+			 $seq->display_id);
+	    $self->debug($seq->seq(). "\n");
+	    return 0;
+	}
+    }
+
+    return 1;
+}
+
+
+=head2 length
+
+ Title     : length()
+ Usage     : $len = $ali->length()
+ Function  : Returns the maximum length of the alignment.
+             To be sure the alignment is a block, use is_flush
+ Returns   : Integer
+ Argument  :
+
+=cut
+
+sub length_aln {
+    my $self = shift;
+    $self->deprecated("length_aln - deprecated method. Use length() instead.");
+    $self->length(@_);
+}
+
+sub length {
+    my $self = shift;
+    my $seq;
+    my $length = (-1);
+    my ($temp,$len);
+
+    foreach $seq ( $self->each_seq() ) {
+        if ($self->isa("Bio::Seq::LargeSeqI")) {
+            $temp = $seq->length();
+        } else {
+	    $temp = $seq->length;
+        }
+	if( $temp > $length ) {
+	    $length = $temp;
+	}
+    }
+
+    return $length;
+}
+
+
+=head2 maxdisplayname_length
+
+ Title     : maxdisplayname_length
+ Usage     : $ali->maxdisplayname_length()
+ Function  : Gets the maximum length of the displayname in the
+             alignment. Used in writing out various MSA formats.
+ Returns   : integer
+ Argument  :
+
+=cut
+
+sub maxname_length {
+    my $self = shift;
+    $self->deprecated("maxname_length - deprecated method.".
+		      " Use maxdisplayname_length() instead.");
+    $self->maxdisplayname_length();
+}
+
+sub maxnse_length {
+    my $self = shift;
+    $self->deprecated("maxnse_length - deprecated method.".
+		      " Use maxnse_length() instead.");
+    $self->maxdisplayname_length();
+}
+
+sub maxdisplayname_length {
+    my $self = shift;
+    my $maxname = (-1);
+    my ($seq,$len);
+
+    foreach $seq ( $self->each_seq() ) {
+	$len = CORE::length $self->displayname($seq->get_nse());
+
+	if( $len > $maxname ) {
+	    $maxname = $len;
+	}
+    }
+
+    return $maxname;
+}
+
+=head2 max_metaname_length
+
+ Title     : max_metaname_length
+ Usage     : $ali->max_metaname_length()
+ Function  : Gets the maximum length of the meta name tags in the
+             alignment for the sequences and for the alignment.
+             Used in writing out various MSA formats.
+ Returns   : integer
+ Argument  : None
+
+=cut
+
+sub max_metaname_length {
+    my $self = shift;
+    my $maxname = (-1);
+    my ($seq,$len);
+    
+    # check seq meta first
+    for $seq ( $self->each_seq() ) {
+        next if !$seq->isa('Bio::Seq::MetaI' || !$seq->meta_names);
+        for my $mtag ($seq->meta_names) {
+            $len = CORE::length $mtag;
+            if( $len > $maxname ) {
+                $maxname = $len;
+            }
+        }
+    }
+    
+    # alignment meta
+    for my $meta ($self->consensus_meta) {
+        next unless $meta;
+        for my $name ($meta->meta_names) {
+            $len = CORE::length $name;
+            if( $len > $maxname ) {
+                $maxname = $len;
+            }
+        }
+    }
+
+    return $maxname;
+}
+
+=head2 no_residues
+
+ Title     : no_residues
+ Usage     : $no = $ali->no_residues
+ Function  : number of residues in total in the alignment
+ Returns   : integer
+ Argument  :
+
+=cut
+
+sub no_residues {
+    my $self = shift;
+    my $count = 0;
+
+    foreach my $seq ($self->each_seq) {
+	my $str = $seq->seq();
+
+	$count += ($str =~ s/[A-Za-z]//g);
+    }
+
+    return $count;
+}
+
+=head2 no_sequences
+
+ Title     : no_sequences
+ Usage     : $depth = $ali->no_sequences
+ Function  : number of sequence in the sequence alignment
+ Returns   : integer
+ Argument  :
+
+=cut
+
+sub no_sequences {
+    my $self = shift;
+
+    return scalar($self->each_seq);
+}
+
+
+=head2 average_percentage_identity
+
+ Title   : average_percentage_identity
+ Usage   : $id = $align->average_percentage_identity
+ Function: The function uses a fast method to calculate the average
+           percentage identity of the alignment
+ Returns : The average percentage identity of the alignment
+ Args    : None
+ Notes   : This method implemented by Kevin Howe calculates a figure that is
+           designed to be similar to the average pairwise identity of the
+           alignment (identical in the absence of gaps), without having to
+           explicitly calculate pairwise identities proposed by Richard Durbin.
+           Validated by Ewan Birney ad Alex Bateman.
+
+=cut
+
+sub average_percentage_identity{
+   my ($self, at args) = @_;
+
+   my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
+                   'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
+
+   my ($len, $total, $subtotal, $divisor, $subdivisor, @seqs, @countHashes);
+
+   if (! $self->is_flush()) {
+       $self->throw("All sequences in the alignment must be the same length");
+   }
+
+   @seqs = $self->each_seq();
+   $len = $self->length();
+
+   # load the each hash with correct keys for existence checks
+
+   for( my $index=0; $index < $len; $index++) {
+       foreach my $letter (@alphabet) {
+	   $countHashes[$index]->{$letter} = 0;
+       }
+   }
+   foreach my $seq (@seqs)  {
+       my @seqChars = split //, $seq->seq();
+       for( my $column=0; $column < @seqChars; $column++ ) {
+	   my $char = uc($seqChars[$column]);
+	   if (exists $countHashes[$column]->{$char}) {
+	       $countHashes[$column]->{$char}++;
+	   }
+       }
+   }
+
+   $total = 0;
+   $divisor = 0;
+   for(my $column =0; $column < $len; $column++) {
+       my %hash = %{$countHashes[$column]};
+       $subdivisor = 0;
+       foreach my $res (keys %hash) {
+	   $total += $hash{$res}*($hash{$res} - 1);
+	   $subdivisor += $hash{$res};
+       }
+       $divisor += $subdivisor * ($subdivisor - 1);
+   }
+   return $divisor > 0 ? ($total / $divisor )*100.0 : 0;
+}
+
+=head2 percentage_identity
+
+ Title   : percentage_identity
+ Usage   : $id = $align->percentage_identity
+ Function: The function calculates the average percentage identity
+           (aliased to average_percentage_identity)
+ Returns : The average percentage identity
+ Args    : None
+
+=cut
+
+sub percentage_identity {
+    my $self = shift;
+    return $self->average_percentage_identity();
+}
+
+=head2 overall_percentage_identity
+
+ Title   : percentage_identity
+ Usage   : $id = $align->percentage_identity
+ Function: The function calculates the percentage identity of
+           the conserved columns
+ Returns : The percentage identity of the conserved columns
+ Args    : None
+
+=cut
+
+sub overall_percentage_identity{
+   my ($self, at args) = @_;
+
+   my @alphabet = ('A','B','C','D','E','F','G','H','I','J','K','L','M',
+                   'N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
+
+   my ($len, $total, @seqs, @countHashes);
+
+   if (! $self->is_flush()) {
+       $self->throw("All sequences in the alignment must be the same length");
+   }
+
+   @seqs = $self->each_seq();
+   $len = $self->length();
+
+   # load the each hash with correct keys for existence checks
+   for( my $index=0; $index < $len; $index++) {
+       foreach my $letter (@alphabet) {
+	   $countHashes[$index]->{$letter} = 0;
+       }
+   }
+   foreach my $seq (@seqs)  {
+       my @seqChars = split //, $seq->seq();
+       for( my $column=0; $column < @seqChars; $column++ ) {
+	   my $char = uc($seqChars[$column]);
+	   if (exists $countHashes[$column]->{$char}) {
+	       $countHashes[$column]->{$char}++;
+	   }
+       }
+   }
+
+   $total = 0;
+   for(my $column =0; $column < $len; $column++) {
+       my %hash = %{$countHashes[$column]};
+       foreach ( values %hash ) {
+	   next if( $_ == 0 );
+	   $total++ if( $_ == scalar @seqs );
+	   last;
+       }
+   }
+   return ($total / $len ) * 100.0;
+}
+
+=head1 Alignment positions
+
+Methods to map a sequence position into an alignment column and back.
+column_from_residue_number() does the former. The latter is really a
+property of the sequence object and can done using
+L<Bio::LocatableSeq::location_from_column>:
+
+    # select somehow a sequence from the alignment, e.g.
+    my $seq = $aln->get_seq_by_pos(1);
+    #$loc is undef or Bio::LocationI object
+    my $loc = $seq->location_from_column(5);
+
+
+=head2 column_from_residue_number
+
+ Title   : column_from_residue_number
+ Usage   : $col = $ali->column_from_residue_number( $seqname, $resnumber)
+ Function: This function gives the position in the alignment
+           (i.e. column number) of the given residue number in the
+           sequence with the given name. For example, for the
+           alignment
+
+    	     Seq1/91-97 AC..DEF.GH.
+   	     Seq2/24-30 ACGG.RTY...
+  	        Seq3/43-51 AC.DDEF.GHI
+
+           column_from_residue_number( "Seq1", 94 ) returns 6.
+           column_from_residue_number( "Seq2", 25 ) returns 2.
+           column_from_residue_number( "Seq3", 50 ) returns 10.
+
+           An exception is thrown if the residue number would lie
+           outside the length of the aligment
+           (e.g. column_from_residue_number( "Seq2", 22 )
+
+      	  Note: If the the parent sequence is represented by more than
+	        one alignment sequence and the residue number is present in
+	        them, this method finds only the first one.
+
+ Returns : A column number for the position in the alignment of the
+           given residue in the given sequence (1 = first column)
+ Args    : A sequence id/name (not a name/start-end)
+           A residue number in the whole sequence (not just that
+           segment of it in the alignment)
+
+=cut
+
+sub column_from_residue_number {
+    my ($self, $name, $resnumber) = @_;
+
+    $self->throw("No sequence with name [$name]") unless $self->{'_start_end_lists'}->{$name};
+    $self->throw("Second argument residue number missing") unless $resnumber;
+
+    foreach my $seq ($self->each_seq_with_id($name)) {
+	my $col;
+	eval {
+	    $col = $seq->column_from_residue_number($resnumber);
+	};
+	next if $@;
+	return $col;
+    }
+
+    $self->throw("Could not find a sequence segment in $name ".
+		 "containing residue number $resnumber");
+
+}
+
+=head1 Sequence names
+
+Methods to manipulate the display name. The default name based on the
+sequence id and subsequence positions can be overridden in various
+ways.
+
+=head2 displayname
+
+ Title     : displayname
+ Usage     : $myalign->displayname("Ig", "IgA")
+ Function  : Gets/sets the display name of a sequence in the alignment
+ Returns   : A display name string
+ Argument  : name of the sequence
+             displayname of the sequence (optional)
+
+=cut
+
+sub get_displayname {
+    my $self = shift;
+    $self->deprecated("get_displayname - deprecated method. Use displayname() instead.");
+    $self->displayname(@_);
+}
+
+sub set_displayname {
+    my $self = shift;
+    $self->deprecated("set_displayname - deprecated method. Use displayname() instead.");
+    $self->displayname(@_);
+}
+
+sub displayname {
+    my ($self, $name, $disname) = @_;
+
+    $self->throw("No sequence with name [$name]")
+        unless defined $self->{'_seq'}->{$name};
+
+    if(  $disname and  $name) {
+	$self->{'_dis_name'}->{$name} = $disname;
+	return $disname;
+    }
+    elsif( defined $self->{'_dis_name'}->{$name} ) {
+	return  $self->{'_dis_name'}->{$name};
+    } else {
+	return $name;
+    }
+}
+
+=head2 set_displayname_count
+
+ Title     : set_displayname_count
+ Usage     : $ali->set_displayname_count
+ Function  : Sets the names to be name_# where # is the number of
+             times this name has been used.
+ Returns   : 1, on success
+ Argument  :
+
+=cut
+
+sub set_displayname_count {
+    my $self= shift;
+    my (@arr,$name,$seq,$count,$temp,$nse);
+
+    foreach $seq ( $self->each_alphabetically() ) {
+	$nse = $seq->get_nse();
+
+	#name will be set when this is the second
+	#time (or greater) is has been seen
+
+	if( defined $name and $name eq ($seq->id()) ) {
+	    $temp = sprintf("%s_%s",$name,$count);
+	    $self->displayname($nse,$temp);
+	    $count++;
+	} else {
+	    $count = 1;
+	    $name = $seq->id();
+	    $temp = sprintf("%s_%s",$name,$count);
+	    $self->displayname($nse,$temp);
+	    $count++;
+	}
+    }
+    return 1;
+}
+
+=head2 set_displayname_flat
+
+ Title     : set_displayname_flat
+ Usage     : $ali->set_displayname_flat()
+ Function  : Makes all the sequences be displayed as just their name,
+             not name/start-end
+ Returns   : 1
+ Argument  :
+
+=cut
+
+sub set_displayname_flat {
+    my $self = shift;
+    my ($nse,$seq);
+
+    foreach $seq ( $self->each_seq() ) {
+	$nse = $seq->get_nse();
+	$self->displayname($nse,$seq->id());
+    }
+    return 1;
+}
+
+=head2 set_displayname_normal
+
+ Title     : set_displayname_normal
+ Usage     : $ali->set_displayname_normal()
+ Function  : Makes all the sequences be displayed as name/start-end
+ Returns   : 1, on success
+ Argument  :
+
+=cut
+
+sub set_displayname_normal {
+    my $self = shift;
+    my ($nse,$seq);
+
+    foreach $seq ( $self->each_seq() ) {
+	$nse = $seq->get_nse();
+	$self->displayname($nse,$nse);
+    }
+    return 1;
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $obj->source($newval)
+ Function: sets the Alignment source program
+ Example :
+ Returns : value of source
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub source{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_source'} = $value;
+    }
+    return $self->{'_source'};
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $aln->annotation or 
+           $aln->annotation($ann)
+ Function: Gets or sets the annotation
+ Returns : Bio::AnnotationCollectionI object
+ Args    : None or Bio::AnnotationCollectionI object
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+        $obj->throw("object of class ".ref($value)." does not implement ".
+                "Bio::AnnotationCollectionI. Too bad.")
+            unless $value->isa("Bio::AnnotationCollectionI");
+        $obj->{'_annotation'} = $value;
+    } elsif( ! defined $obj->{'_annotation'}) {
+        $obj->{'_annotation'} = Bio::Annotation::Collection->new();
+    }
+    return $obj->{'_annotation'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAnalysisI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAnalysisI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/SimpleAnalysisI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,271 @@
+# $Id: SimpleAnalysisI.pm,v 1.9.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::SimpleAnalysisI
+#
+# Cared for by Martin Senger <martin.senger at gmail.com>
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SimpleAnalysisI - A simple interface to any (local or remote) analysis tool
+
+=head1 SYNOPSIS
+
+This is an interface module - you do not instantiate it.
+Use other modules instead (those that implement this interface).
+
+=head1 DESCRIPTION
+
+This interface contains public methods for accessing and controlling
+local and remote analysis tools. It is meant to be used on the client
+side. The interface consists only of a necessary set of methods for
+synchronous invocation of analysis tools. For more complex set,
+including an asynchronous access, see interface C<Bio::AnalysisI>
+(which inherits from this one, by the way).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Martin Senger (martin.senger at gmail.com)
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003, Martin Senger and EMBL-EBI.
+All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 SEE ALSO
+
+=over
+
+=item *
+
+http://www.ebi.ac.uk/soaplab/Perl_Client.html
+
+=back
+
+=head1 APPENDIX
+
+This is actually the main documentation...
+
+If you try to call any of these methods directly on this
+C<Bio::SimpleAnalysisI> object you will get a I<not implemented> error
+message.
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::SimpleAnalysisI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+# -----------------------------------------------------------------------------
+
+=head2 analysis_name
+
+ Usage   : $tool->analysis_name;
+ Returns : a name of this analysis
+ Args    : none
+
+=cut
+
+sub analysis_name { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 analysis_spec
+
+ Usage   : $tool->analysis_spec;
+ Returns : a hash reference describing this analysis
+ Args    : none
+
+The returned hash reference uses the following keys (not all of them always
+present, perhaps others present as well): C<name>, C<type>, C<version>,
+C<supplier>, C<installation>, C<description>.
+
+=cut
+
+sub analysis_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 input_spec
+
+ Usage   : $tool->input_spec;
+ Returns : an array reference with hashes as elements
+ Args    : none
+
+The analysis input data are named, and can be also associated with a
+default value, with allowed values and with few other attributes. The
+names are important for feeding the analysis with the input data (the
+inputs are given to methods C<run> and C<wait_for> as name/value
+pairs).
+
+=cut
+
+sub input_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 result_spec
+
+ Usage   : $tool->result_spec;
+ Returns : a hash reference with result names as keys
+           and result types as values
+ Args    : none
+
+An analysis can produce several results, or the same result in several
+different formats. All such results are named and can be retrieved
+using their names by metod C<result>.
+
+Here is an example of the result specification:
+
+  $result_spec = {
+          'outseq' => 'String',
+          'report' => 'String',
+          'detailed_status' => 'String'
+        };
+
+=cut
+
+sub result_spec { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 run
+
+ Usage   : $tool->run ( ['sequence=@my.seq', 'osformat=embl'] )
+ Returns : $self
+ Args    : data and parameters for this execution
+           (in various formats)
+
+Create a job, start it, and wait for its completion. The method is
+identical to the method C<wait_for>. Why there are two methods doing
+the same? Because it is expected that the sub-classes may implement
+them differently (an example is an interface C<Bio::AnalysisI> which
+uses method C<run> for an asynchronous execution and method
+C<wait_for> for a synchronous one.
+
+Usually, after this call, you ask for results of the finished job:
+
+    $analysis->run (...)->result;
+
+The input data and prameters for this execution can be specified in
+various ways:
+
+=over
+
+=item array reference
+
+The array has scalar elements of the form
+
+   name = [[@]value]
+
+where C<name> is the name of an input data or input parameter (see
+method C<input_spec> for finding what names are recognized by this
+analysis) and C<value> is a value for this data/parameter. If C<value>
+is missing a 1 is assumed (which is convenient for the boolean
+options). If C<value> starts with C<@> it is treated as a local
+filename, and its contents is used as the data/parameter value.
+
+=item hash reference
+
+The same as with the array reference but now there is no need to use
+an equal sign. The hash keys are input names and hash values their
+data. The values can again start with a C<@> sign indicating a local
+filename.
+
+=back
+
+=cut
+
+sub run { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 wait_for
+
+ Usage   : $tool->wait_for ( { 'sequence' => '@my,file' } )
+ Returns : $self
+ Args    : the same as for method 'run'
+
+Create a job, start it and wait for its completion. The method is
+identical to the method C<run>. See details in the C<run> method.
+
+=cut
+
+sub wait_for { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 status
+
+ Usage   : $tool->status
+ Returns : string describing a status of the execution
+ Args    : none
+
+It returns one of the following strings (and perhaps more if a server
+implementation extended possible job states):
+
+   CREATED              (not run yet)
+   COMPLETED            (run and finished normally)
+   TERMINATED_BY_ERROR  (run and finished with an error or a signal)
+
+=cut
+
+sub status { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+=head2 result
+
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : none (but an implementation may choose
+           to add arguments for instructions how to process
+           the raw result)
+
+The method returns a scalar representing a result of an executed
+job. If the job was terminated by an error the result may contain an
+error message instead of the real data (or both, depending on the
+implementation).
+
+=cut
+
+sub result { shift->throw_not_implemented(); }
+
+# -----------------------------------------------------------------------------
+
+1;
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Species.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Species.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Species.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,528 @@
+# $Id: Species.pm,v 1.35.4.6 2006/12/05 20:54:39 sendu Exp $
+#
+# BioPerl module for Bio::Species
+#
+# Cared for by James Gilbert <jgrg at sanger.ac.uk>
+# Reimplemented by Sendu Bala <bix at sendu.me.uk>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Species - Generic species object
+
+=head1 SYNOPSIS
+
+    $species = Bio::Species->new(-classification => [@classification]);
+                                    # Can also pass classification
+                                    # array to new as below
+
+    $species->classification(qw( sapiens Homo Hominidae
+                                 Catarrhini Primates Eutheria
+                                 Mammalia Vertebrata Chordata
+                                 Metazoa Eukaryota ));
+
+    $genus = $species->genus();
+
+    $bi = $species->binomial();     # $bi is now "Homo sapiens"
+
+    # For storing common name
+    $species->common_name("human");
+
+    # For storing subspecies
+    $species->sub_species("accountant");
+
+=head1 DESCRIPTION
+
+Provides a very simple object for storing phylogenetic
+information.  The classification is stored in an array,
+which is a list of nodes in a phylogenetic tree.  Access to
+getting and setting species and genus is provided, but not
+to any of the other node types (eg: "phylum", "class",
+"order", "family").  There's plenty of scope for making the
+model more sophisticated, if this is ever needed.
+
+A methods are also provided for storing common
+names, and subspecies.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+James Gilbert email B<jgrg at sanger.ac.uk>
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#' Let the code begin...
+
+package Bio::Species;
+use strict;
+
+use Bio::DB::Taxonomy;
+use Bio::Tree::Tree;
+use Scalar::Util qw(weaken isweak);
+use base qw(Bio::Taxon);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = Bio::Species->new(-classification => \@class)
+ Function: Build a new Species object
+ Returns : Bio::Species object
+ Args    : -ncbi_taxid     => NCBI taxonomic ID (optional)
+           -classification => arrayref of classification
+
+=cut
+
+sub new {
+    my($class, @args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+    
+    my ($org, $sp, $var, $classification) =
+        $self->_rearrange([qw(ORGANELLE
+					       SUB_SPECIES
+					       VARIANT
+					       CLASSIFICATION)], @args);
+    
+    if (defined $classification && ref($classification) eq "ARRAY" && @{$classification}) {
+        $self->classification(@$classification);
+    }
+    else {
+        # store a tree on ourselves so we can use Tree methods
+        $self->{tree} = new Bio::Tree::Tree();
+        
+        # some things want to freeze/thaw Bio::Species objects, but
+        # _root_cleanup_methods contains a CODE ref, delete it.
+        # delete $self->{tree}->{_root_cleanup_methods};
+    }
+    
+    defined $org && $self->organelle($org);
+    defined $sp  && $self->sub_species($sp); 
+    defined $var && $self->variant($var);
+    
+    return $self;
+}
+
+=head2 classification
+
+ Title   : classification
+ Usage   : $self->classification(@class_array);
+           @classification = $self->classification();
+ Function: Get/set the lineage of this species. The array provided must be in
+           the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc.
+ Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae
+           Catarrhini Primates Eutheria Mammalia Vertebrata
+           Chordata Metazoa Eukaryota));
+ Returns : Classification array
+ Args    : Classification array 
+                 OR
+           A reference to the classification array. In the latter case
+           if there is a second argument and it evaluates to true,
+           names will not be validated. NB: in any case, names are never
+           validated anyway.
+
+=cut
+
+sub classification {
+    my ($self, @vals) = @_;
+
+    if (@vals) {
+        if (ref($vals[0]) eq 'ARRAY') {
+            @vals = @{$vals[0]};
+        }
+        
+        # make sure the lineage contains us as first or second element
+        # (lineage may have subspecies, species, genus ...)
+        my $name = $self->node_name;
+        if ($name && ($name ne $vals[0] && $name ne $vals[1]) && $name ne "$vals[1] $vals[0]") {
+            if ($name =~ /^$vals[1] $vals[0]\s*(.+)/) {
+                # just assume the problem is someone tried to make a Bio::Species starting at subspecies
+                #*** no idea if this is appropriate! just a possible fix related to bug 2092
+                $self->sub_species($1);
+                $name = $self->node_name("$vals[1] $vals[0]");
+            }
+            else {
+                $self->throw("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')");
+            }
+        }
+        
+        # create a lineage for ourselves
+        my $db = Bio::DB::Taxonomy->new(-source => 'list', -names => [reverse @vals]);
+        unless ($self->scientific_name) {
+            # assume we're supposed to be the leaf of the supplied lineage
+            $self->scientific_name($vals[0]);
+        }
+        unless ($self->rank) {
+            # and that we are rank species
+            $self->rank('species');
+        }
+        
+        $self->db_handle($db);
+
+        $self->{tree} = Bio::Tree::Tree->new(-node => $self);
+        # some things want to freeze/thaw Bio::Species objects, but tree's
+        # _root_cleanup_methods contains a CODE ref, delete it.
+        #*** even if we don't delete the cleanup methods, we still get memory
+        #    leak-like symtoms, and the actual cleanup causes a mass of
+        #    warnings... needs investigation!
+        delete $self->{tree}->{_root_cleanup_methods};
+    }
+    
+    @vals = ();
+    foreach my $node ($self->{tree}->get_lineage_nodes($self), $self) {
+        unshift(@vals, $node->scientific_name || next);
+    }
+    weaken($self->{tree}->{'_rootnode'}) unless isweak($self->{tree}->{'_rootnode'});
+    return @vals;
+}
+
+=head2 ncbi_taxid
+
+ Title   : ncbi_taxid
+ Usage   : $obj->ncbi_taxid($newval)
+ Function: Get/set the NCBI Taxon ID
+ Returns : the NCBI Taxon ID as a string
+ Args    : newvalue to set or undef to unset (optional)
+
+=cut
+
+=head2 common_name
+
+ Title   : common_name
+ Usage   : $self->common_name( $common_name );
+           $common_name = $self->common_name();
+ Function: Get or set the common name of the species
+ Example : $self->common_name('human')
+ Returns : The common name in a string
+ Args    : String, which is the common name (optional)
+
+=cut
+
+=head2 division
+
+ Title   : division
+ Usage   : $obj->division($newval)
+ Function: Genbank Division for a species
+ Returns : value of division (a scalar)
+ Args    : value of division (a scalar)
+
+=cut
+
+=head2 species
+
+ Title   : species
+ Usage   : $self->species( $species );
+           $species = $self->species();
+ Function: Get or set the scientific species name.
+ Example : $self->species('Homo sapiens');
+ Returns : Scientific species name as string
+ Args    : Scientific species name as string
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    
+	if ($species) {
+		$self->{_species} = $species;
+	}
+	
+	unless (defined $self->{_species}) {
+		# work it out from our nodes
+		my $species_taxon = $self->{tree}->find_node(-rank => 'species');
+		unless ($species_taxon) {
+			# just assume we are rank species
+			$species_taxon = $self;
+		}
+		
+		$species = $species_taxon->scientific_name;
+		
+		#
+		# munge it like the Bio::SeqIO modules used to do
+		# (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments
+		#  referring to 'ORGANISM' etc.)
+		#
+		
+		my $root = $self->{tree}->get_root_node;
+		unless ($root) {
+            $self->{tree} = new Bio::Tree::Tree(-node => $species_taxon);
+            delete $self->{tree}->{_root_cleanup_methods};
+            $root = $self->{tree}->get_root_node;
+        }
+        
+		my @spflds = split(' ', $species);
+		if (@spflds > 1 && $root->node_name ne 'Viruses') {
+			$species = undef;
+			
+			# does the next term start with uppercase?
+			# yes: valid genus; no then unconventional
+			# e.g. leaf litter basidiomycete sp. Collb2-39
+			my $genus;
+			if ($spflds[0] =~ m/^[A-Z]/) {
+				$genus = shift(@spflds);
+			}
+			else {
+				undef $genus;
+			}
+			
+			my $sub_species;
+			if (@spflds) {
+				while (my $fld = shift @spflds) {
+					$species .= "$fld ";
+					# does it have subspecies or varieties?
+					last if ($fld =~ m/(sp\.|var\.)/);
+				}
+				chop $species;	# last space
+				$sub_species = join ' ', at spflds if(@spflds);
+			}
+			else {
+				$species = 'sp.';
+			}
+			
+			# does ORGANISM start with any words which make its genus undefined?
+			# these are in @unkn_genus	
+			# this in case species starts with uppercase so isn't caught above. 
+			# alter common name if required
+			my $unconv = 0; # is it unconventional species name?
+			my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
+			foreach (@unkn_genus) {
+				if ($genus && $genus =~ m/$_/i)	{
+					$species = $genus . " " . $species;
+					undef $genus;
+					$unconv = 1;
+					last;
+				}
+				elsif ($species =~ m/$_/i)	{
+					$unconv = 1;
+					last;
+				}
+			}
+			if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/)	{
+				# need to extract subspecies from conventional ORGANISM format.  
+				# Will the 'word' in a two element species name
+				# e.g. $species = 'thummi thummi' => $species='thummi' & 
+				# $sub_species='thummi'
+				$sub_species = $2;
+			}
+			
+			$self->genus($genus) if $genus;
+			$self->sub_species($sub_species) if $sub_species;
+		}
+		
+		$self->{_species} = $species;
+	}
+	
+	return $self->{_species};
+}
+
+=head2 genus
+
+ Title   : genus
+ Usage   : $self->genus( $genus );
+           $genus = $self->genus();
+ Function: Get or set the scientific genus name.
+ Example : $self->genus('Homo');
+ Returns : Scientific genus name as string
+ Args    : Scientific genus name as string
+
+=cut
+
+sub genus {
+    my ($self, $genus) = @_;
+    
+	if ($genus) {
+        $self->{_genus} = $genus;
+    }
+	
+	unless (defined $self->{_genus}) {
+		my $genus_taxon = $self->{tree}->find_node(-rank => 'genus');
+		unless ($genus_taxon) {
+			# just assume our ancestor is rank genus
+			$genus_taxon = $self->ancestor;
+		}
+		
+		$self->{_genus} = $genus_taxon->scientific_name if $genus_taxon;
+	}
+	
+	return $self->{_genus};
+}
+
+=head2 sub_species
+
+ Title   : sub_species
+ Usage   : $obj->sub_species($newval)
+ Function: Get or set the scientific subspecies name.
+ Returns : value of sub_species
+ Args    : newvalue (optional)
+
+=cut
+
+sub sub_species {
+    my ($self, $sub) = @_;
+    
+    unless (defined $self->{'_sub_species'}) {
+        my $ss_taxon = $self->{tree}->find_node(-rank => 'subspecies');
+        if ($ss_taxon) {
+            if ($sub) {
+                $ss_taxon->scientific_name($sub);
+            }
+            return $ss_taxon->scientific_name;
+        }
+    }
+    
+    # fall back to direct storage on self
+    $self->{'_sub_species'} = $sub if $sub;
+    return $self->{'_sub_species'};
+}
+
+=head2 variant
+
+ Title   : variant
+ Usage   : $obj->variant($newval)
+ Function: Get/set variant information for this species object (strain,
+           isolate, etc).
+ Example : 
+ Returns : value of variant (a scalar)
+ Args    : new value (a scalar or undef, optional)
+
+=cut
+
+sub variant{
+    my ($self, $var) = @_;
+    
+    unless (defined $self->{'_variant'}) {
+        my $var_taxon = $self->{tree}->find_node(-rank => 'variant');
+        if ($var_taxon) {
+            if ($var) {
+                $var_taxon->scientific_name($var);
+            }
+            return $var_taxon->scientific_name;
+        }
+    }
+    
+    # fall back to direct storage on self
+    $self->{'_variant'} = $var if $var;
+    return $self->{'_variant'};
+}
+
+=head2 binomial
+
+ Title   : binomial
+ Usage   : $binomial = $self->binomial();
+           $binomial = $self->binomial('FULL');
+ Function: Returns a string "Genus species", or "Genus species subspecies",
+           if the first argument is 'FULL' (and the species has a subspecies).
+ Args    : Optionally the string 'FULL' to get the full name including
+           the subspecies.
+
+=cut
+
+sub binomial {
+    my ($self, $full) = @_;
+    my $rank = $self->rank || 'no rank';
+    
+    my ($species, $genus) = ($self->species, $self->genus);
+    unless (defined $species) {
+        $species = 'sp.';
+        $self->warn("requested binomial but classification was not set");
+    }
+    $genus = '' unless( defined $genus);
+    
+    $species =~ s/$genus\s+//;
+    
+    my $bi = "$genus $species";
+    if (defined($full) && $full =~ /full/i) { 
+        my $ssp = $self->sub_species;
+        if ($ssp) {
+            $ssp =~ s/$bi\s+//;
+            $ssp =~ s/$species\s+//;
+            $bi .= " $ssp";
+        }
+    }
+    return $bi;
+}
+
+=head2 validate_species_name
+
+ Title   : validate_species_name
+ Usage   : $result = $self->validate_species_name($string);
+ Function: Validate the species portion of the binomial
+ Args    : string
+ Notes   : The string following the "genus name" in the NCBI binomial
+           is so variable that it's not clear that this is a useful
+           function. Consider the binomials 
+           "Simian 11 rotavirus (serotype 3 / strain SA11-Patton)",
+           or "St. Thomas 3 rotavirus", straight from GenBank.
+           This is particularly problematic in microbes and viruses.
+           As such, this isn't actually used automatically by any Bio::Species
+           method.
+=cut
+
+sub validate_species_name {
+    my( $self, $string ) = @_;
+
+    return 1 if $string eq "sp.";
+	return 1 if $string =~ /strain/;
+    return 1 if $string =~ /^[a-z][\w\s-]+$/i;
+    $self->throw("Invalid species name '$string'");
+}
+
+sub validate_name {
+    return 1;
+}
+
+=head2 organelle
+
+ Title   : organelle
+ Usage   : $self->organelle( $organelle );
+           $organelle = $self->organelle();
+ Function: Get or set the organelle name
+ Example : $self->organelle('Chloroplast')
+ Returns : The organelle name in a string
+ Args    : String, which is the organelle name
+
+=cut
+
+sub organelle {
+    my($self) = shift;
+    return $self->{'_organelle'} = shift if @_;
+    return $self->{'_organelle'};
+}
+
+sub dont_DESTROY {
+    my $self = shift;
+    $self->{tree}->cleanup_tree if $self->{tree};
+    delete $self->{tree};
+    $self->node_cleanup;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Atom.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Atom.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Atom.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,609 @@
+# $Id: Atom.pm,v 1.12.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::Atom
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::Atom - Bioperl structure Object, describes an Atom
+
+=head1 SYNOPSIS
+
+  #add synopsis here
+
+=head1 DESCRIPTION
+
+This object stores a Bio::Structure::Atom
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Structure::Atom;
+use strict;
+
+use Bio::Structure::Residue;
+use base qw(Bio::Root::Root);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $struc = Bio::Structure::Atom->new( 
+                                           -id  => 'human_id',
+                                           );
+
+ Function: Returns a new Bio::Structure::Atom object from basic 
+	constructors. Probably most called from Bio::Structure::IO.
+ Returns : a new Bio::Structure::Atom object
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $x, $y, $z) =
+        $self->_rearrange([qw(
+			      ID
+			      X
+			      Y
+			      Z
+                              )],
+                          @args);
+
+    $id		&& $self->id($id);
+    $x		&& $self->x($x);
+    $y		&& $self->y($y);
+    $z		&& $self->z($z);
+
+    return $self;
+}
+
+
+
+=head2 x()
+
+ Title   : x
+ Usage   : $x = $atom->x($x);
+ Function: Set/gets the X coordinate for an Atom
+ Returns : The value for the X coordinate of the Atom (This is just a number,
+ 	it is expected to be in Angstrom, but no garantees)
+ Args    : The X coordinate as a number
+
+=cut
+
+sub x {
+	my ($self,$value) = @_;
+	if( defined $value) {
+		# do we want to check if $value contains really a number ?
+		$self->{'x'} = $value;
+   	}
+	return $self->{'x'};
+}
+
+
+=head2 y()
+
+ Title   : y
+ Usage   : $y = $atom->y($y);
+ Function: Set/gets the Y coordinate for an Atom
+ Returns : The value for the Y coordinate of the Atom (This is just a number,
+ 	it is eypected to be in Angstrom, but no garantees)
+ Args    : The Y coordinate as a number
+
+=cut
+
+sub y {
+	my ($self,$value) = @_;
+	if( defined $value) {
+		# do we want to check if $value contains really a number ?
+		$self->{'y'} = $value;
+   	}
+	return $self->{'y'};
+}
+
+
+=head2 z()
+
+ Title   : z
+ Usage   : $z = $atom->z($z);
+ Function: Set/gets the Z coordinate for an Atom
+ Returns : The value for the Z coordinate of the Atom (This is just a number,
+ 	it is ezpected to be in Angstrom, but no garantees)
+ Args    : The Z coordinate as a number
+
+=cut
+
+sub z {
+	my ($self,$value) = @_;
+	if( defined $value) {
+		# do we want to check if $value contains really a number ?
+		$self->{'z'} = $value;
+   	}
+	return $self->{'z'};
+}
+
+
+=head2 xyz()
+
+ Title   : xyz
+ Usage   : ($x,$y,$z) = $atom->xyz;
+ Function: Gets the XYZ coordinates for an Atom
+ Returns : A list with the value for the XYZ coordinate of the Atom 
+ Args    : 
+
+=cut
+
+sub xyz {
+	my ($self) = @_;
+
+	return ($self->x, $self->y, $self->z);
+}
+
+
+=head2 residue()
+
+ Title   : residue
+ Usage   : 
+ Function:  No code here, all parent/child stuff via Entry
+ Returns : 
+ Args    : 
+
+=cut
+
+sub residue {
+	my($self, $value) = @_;
+
+	$self->throw("all parent/child stuff via Entry\n");
+}
+
+
+=head2 icode()
+
+ Title   : icode
+ Usage   : $icode = $atom->icode($icode)
+ Function: Sets/gets the icode
+ Returns : Returns the icode for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub icode {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'icode'} = $value;
+	}
+	return $self->{'icode'};
+}
+
+
+=head2 serial()
+
+ Title   : serial
+ Usage   : $serial = $atom->serial($serial)
+ Function: Sets/gets the serial number
+ Returns : Returns the serial number for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub serial {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'serial'} = $value;
+	}
+	return $self->{'serial'};
+}
+
+
+=head2 occupancy()
+
+ Title   : occupancy
+ Usage   : $occupancy = $atom->occupancy($occupancy)
+ Function: Sets/gets the occupancy
+ Returns : Returns the occupancy for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub occupancy {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'occupancy'} = $value;
+	}
+	return $self->{'occupancy'};
+}
+
+
+=head2 tempfactor()
+
+ Title   : tempfactor
+ Usage   : $tempfactor = $atom->tempfactor($tempfactor)
+ Function: Sets/gets the tempfactor
+ Returns : Returns the tempfactor for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub tempfactor {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'tempfactor'} = $value;
+	}
+	return $self->{'tempfactor'};
+}
+
+
+=head2 segID()
+
+ Title   : segID
+ Usage   : $segID = $atom->segID($segID)
+ Function: Sets/gets the segID
+ Returns : Returns the segID for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub segID {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'segID'} = $value;
+	}
+	return $self->{'segID'};
+}
+
+
+=head2 pdb_atomname()
+
+ Title   : pdb_atomname
+ Usage   : $pdb_atomname = $atom->pdb_atomname($pdb_atomname)
+ Function: Sets/gets the pdb_atomname (atomname used in the PDB file)
+ Returns : Returns the pdb_atomname for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub pdb_atomname {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'pdb_atomname'} = $value;
+	}
+	return $self->{'pdb_atomname'};
+}
+
+
+=head2 element()
+
+ Title   : element
+ Usage   : $element = $atom->element($element)
+ Function: Sets/gets the element
+ Returns : Returns the element for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub element {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'element'} = $value;
+	}
+	return $self->{'element'};
+}
+
+
+=head2 charge()
+
+ Title   : charge
+ Usage   : $charge = $atom->charge($charge)
+ Function: Sets/gets the charge
+ Returns : Returns the charge for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub charge {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'charge'} = $value;
+	}
+	return $self->{'charge'};
+}
+
+
+=head2 sigx()
+
+ Title   : sigx
+ Usage   : $sigx = $atom->sigx($sigx)
+ Function: Sets/gets the sigx
+ Returns : Returns the sigx for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub sigx {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'sigx'} = $value;
+	}
+	return $self->{'sigx'};
+}
+
+
+=head2 sigy()
+
+ Title   : sigy
+ Usage   : $sigy = $atom->sigy($sigy)
+ Function: Sets/gets the sigy
+ Returns : Returns the sigy for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub sigy {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'sigy'} = $value;
+	}
+	return $self->{'sigy'};
+}
+
+
+=head2 sigz()
+
+ Title   : sigz
+ Usage   : $sigz = $atom->sigz($sigz)
+ Function: Sets/gets the sigz
+ Returns : Returns the sigz for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub sigz {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'sigz'} = $value;
+	}
+	return $self->{'sigz'};
+}
+
+
+=head2 sigocc()
+
+ Title   : sigocc
+ Usage   : $sigocc = $atom->sigocc($sigocc)
+ Function: Sets/gets the sigocc
+ Returns : Returns the sigocc for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub sigocc {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'sigocc'} = $value;
+	}
+	return $self->{'sigocc'};
+}
+
+
+=head2 sigtemp()
+
+ Title   : sigtemp
+ Usage   : $sigtemp = $atom->sigtemp($sigtemp)
+ Function: Sets/gets the sigtemp
+ Returns : Returns the sigtemp for this atom
+ Args    : reference to an Atom
+
+=cut
+
+sub sigtemp {
+	my($self, $value) = @_;
+
+	if (defined $value) {
+		$self->{'sigtemp'} = $value;
+	}
+	return $self->{'sigtemp'};
+}
+
+
+=head2 aniso()
+
+ Title   : aniso
+ Usage   : $u12 = $atom->aniso("u12", $u12)
+ Function: Sets/gets the anisotropic temperature factors
+ Returns : Returns the requested factor for this atom
+ Args    : reference to an Atom, name of the factor, value for the factor
+
+=cut
+
+sub aniso {
+	my($self, $name, $value) = @_;
+
+	if ( !defined $name) {
+		$self->throw("You need to supply a name of the anisotropic temp factor you want to get");
+	}
+	if (defined $value) {
+		$self->{$name} = $value;
+	}
+	return $self->{$name};
+}
+
+# placeholders 
+sub u11 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub u22 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub u33 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub u12 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub u13 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub u23 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu11 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu22 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu33 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu12 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu13 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+sub sigu23 {
+	my ($self, $name, $value) = @_;
+	$self->aniso($name,$value);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+=head2 id()
+
+ Title   : id
+ Usage   : $atom->id("CZ2")
+ Function: Gets/sets the ID for this atom
+ Returns : the ID
+ Args    : the ID
+
+=cut
+
+sub id {
+        my ($self, $value) = @_;;
+        if (defined $value) {
+	        $self->{'id'} = $value;
+        }
+        return $self->{'id'};
+}
+
+sub DESTROY {
+	my $self =  shift;
+	
+	# dummy, nothing needs to be done here
+}
+
+#
+# from here on only private methods
+#
+
+=head2 _remove_residue()
+
+ Title   : _remove_residue
+ Usage   : 
+ Function: Removes the Residue this Atom is atttached to.
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_residue {
+	my ($self) = shift;
+
+	$self->throw("no code here at the moment\n");
+}
+
+
+=head2 _grandparent()
+
+ Title   : _grandparent
+ Usage   : 
+ Function: get/set a symbolic reference to our grandparent
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _grandparent {
+	my($self,$symref) = @_;
+
+	if (ref($symref)) {
+		$self->throw("Thou shall only pass strings in here, no references $symref\n");
+	}
+	if (defined $symref) {
+		$self->{'grandparent'} = $symref;
+	}
+	return $self->{'grandparent'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Chain.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Chain.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Chain.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,235 @@
+# $Id: Chain.pm,v 1.11.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::Chain
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::Chain - Bioperl structure Object, describes a chain
+
+=head1 SYNOPSIS
+
+  #add synopsis here
+
+=head1 DESCRIPTION
+
+This object stores a Bio::Structure::Chain
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Structure::Chain;
+use strict;
+
+use Bio::Structure::Entry;
+use Bio::Structure::Model;
+use base qw(Bio::Root::Root);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $struc = Bio::Structure::Chain->new( 
+                           -id  => 'human_id',
+                           -accession_number => 'AL000012',
+                                           );
+
+ Function: Returns a new Bio::Structure::Chain object from basic 
+	        constructors. Usually called from Bio::Structure::IO.
+ Returns : a new Bio::Structure::Chain object
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $residue ) =
+        $self->_rearrange([qw(
+			      ID
+			      RESIDUE
+                              )],
+                          @args);
+
+    $id      && $self->id($id);
+    $self->{'residue'} = [];
+    # the 'smallest' item that can be added to a chain is a residue. 
+    $residue && $self->throw("use a method based on an Entry object for now");
+    return $self;
+}
+
+
+
+=head2 residue()
+
+ Title   : residue 
+ Usage   : 
+ Function:  nothing useful until I get symbolic references to do what I want
+ Returns : 
+ Args    : 
+
+=cut
+
+sub residue {
+	my ($self,$value) = @_;
+
+	$self->throw("use a method on an Entry object to do what you want");
+}
+
+
+=head2 add_residue()
+
+ Title   : add_residue
+ Usage   : 
+ Function: nothing useful until I get symbolic references to do what I want
+ Returns : 
+ Args    : 
+
+=cut
+
+sub add_residue {
+	my($self,$value) = @_;
+
+	$self->throw("you want entry->add_residue(chain, residue)\n");
+}
+
+=head2 model()
+
+ Title   : model
+ Usage   : 
+ Function: nothing useful until I get symbolic references to do what I want
+ Returns : 
+ Args    : 
+
+=cut
+
+sub model {
+	my($self, $value) = @_;
+
+	$self->throw("go via a Entry object please\n");
+}
+
+
+=head2 id()
+
+ Title   : id
+ Usage   : $chain->id("chain B")
+ Function: Gets/sets the ID for this chain
+ Returns : the ID
+ Args    : the ID
+
+=cut
+
+sub id {
+        my ($self, $value) = @_;;
+        if (defined $value) {
+	        $self->{'id'} = $value;
+        }
+        return $self->{'id'};
+}
+
+
+sub DESTROY {
+	my $self = shift;
+	
+	# no specific destruction for now
+}
+
+
+#
+# from here on only private methods
+#
+
+=head2 _remove_residues()
+
+ Title   : _remove_residues
+ Usage   : 
+ Function: 
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_residues {
+	my ($self) = shift;
+
+	$self->throw("nothing usefull in here, go see Entry\n");
+}
+
+
+=head2 _remove_model()
+
+ Title   : _remove_model
+ Usage   : 
+ Function: Removes the Model this Chain is atttached to.
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_model {
+	my ($self) = shift;
+
+	$self->throw("go see an Entry object, nothing here\n");
+}
+
+
+=head2 _grandparent()
+
+ Title   : _grandparent
+ Usage   : 
+ Function: get/set a symbolic reference to our grandparent
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _grandparent {
+	my($self,$symref) = @_;
+
+	if (ref($symref)) {
+		$self->throw("Thou shall only pass strings in here, no references $symref\n");
+	}
+	if (defined $symref) {
+		$self->{'grandparent'} = $symref;
+	}
+	return $self->{'grandparent'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Entry.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Entry.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Entry.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1006 @@
+# $Id: Entry.pm,v 1.27.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::Entry
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::Entry - Bioperl structure Object, describes the whole entry
+
+=head1 SYNOPSIS
+
+  #add synopsis here
+
+=head1 DESCRIPTION
+
+This object stores a whole Bio::Structure entry. It can consist of one
+or more models (L<Bio::Structure::Model>), which in turn consist of one 
+or more chains (L<Bio::Structure::Chain>). A chain is composed of residues 
+(L<Bio::Structure::Residue>) and a residue consists of atoms 
+(L<Bio::Structure::Atom>). If no specific model or chain is chosen, the 
+first one is chosen.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal 
+methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Structure::Entry;
+use strict;
+
+use Bio::Structure::Model;
+use Bio::Structure::Chain;
+use Bio::Annotation::Collection;
+use Tie::RefHash;
+
+use base qw(Bio::Root::Root Bio::Structure::StructureI);
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $struc = Bio::Structure::Entry->new( 
+                                           -id  => 'structure_id',
+                                           );
+
+ Function: Returns a new Bio::Structure::Entry object from basic 
+	        constructors. Probably most called from Bio::Structure::IO.
+ Returns : a new Bio::Structure::Model object
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $model, $chain, $residue ) =
+        $self->_rearrange([qw(
+			      ID
+			      MODEL
+			      CHAIN
+			      RESIDUE )], @args);
+
+    # where to store parent->child relations (1 -> 1..n)
+    #  value to this hash will be an array ref
+    #  by using Tie::RefHash we can store references in this hash
+    $self->{'p_c'} = ();
+    tie %{ $self->{'p_c'} } , "Tie::RefHash";
+    
+    # where to store child->parent relations (1 -> 1)
+    $self->{'c_p'} = ();
+    tie %{ $self->{'c_p'} } , "Tie::RefHash";
+
+    $id      && $self->id($id);
+
+    $self->{'model'} = [];
+    $model   && $self->model($model);
+
+    if($chain) {
+		 if ( ! defined($self->model) ) { # no model yet, create default one
+			 $self->_create_default_model;
+		 }
+		 for my $m ($self->model) { # add this chain on all models
+			 $m->chain($chain);
+		 }
+    }
+
+    $residue  && $self->residue($residue);
+
+    # taken from Bio::Seq (or should we just inherit Bio::Seq and override some methods)
+    my $ann = Bio::Annotation::Collection->new;
+    $self->annotation($ann);
+
+    return $self;
+}
+
+
+=head2 model()
+
+ Title   : model
+ Function: Connects a (or a list of) Model objects to a Bio::Structure::Entry.
+ 	        To add a Model (and keep the existing ones) use add_model()
+ 	        It returns a list of Model objects.
+ Returns : List of Bio::Structure::Model objects
+ Args    : One Model or a reference to an array of Model objects
+
+=cut
+
+sub model {
+	my ($self, $model) = @_;
+	
+	if( defined $model) {
+		if( (ref($model) eq "ARRAY") ||
+		      ($model->isa('Bio::Structure::Model')) ) {
+			# remove existing ones, tell they've become orphan
+			my @obj = $self->model;
+			if (@obj) {
+				for my $m (@obj) {
+					$self->_remove_from_graph($m);
+					$self->{'model'} = [];
+				}
+			}
+			# add the new ones
+			$self->add_model($self,$model);
+		}
+		else {
+			$self->throw("Supplied a $model to model, we want a Bio::Structure::Model or a list of these\n");
+		}
+   	}
+	# give back list of models via general get method
+	$self->get_models($self);
+}
+
+
+
+=head2 add_model()
+
+ Title   : add_model
+ Usage   : $structure->add_model($model);
+ Function: Adds a (or a list of) Model objects to a Bio::Structure::Entry.
+ Returns : 
+ Args    : One Model or a reference to an array of Model objects
+
+=cut
+
+sub add_model {
+	my($self,$entry,$model) = @_;
+
+	# if only one argument and it's a model, change evrything one place
+	# this is for people calling $entry->add_model($model);
+	if ( !defined $model && ref($entry) =~ /^Bio::Structure::Model/) {
+		$model = $entry;
+		$entry = $self;
+	}
+	# $self and $entry are the same here, but it's used for uniformicity
+	if ( !defined($entry) || ref($entry) !~ /^Bio::Structure::Entry/) {
+		$self->throw("first argument to add_model needs to be a Bio::Structure::Entry object\n");
+	}
+	if (defined $model) {
+		if (ref($model) eq "ARRAY") {
+    			# if the user passed in a reference to an array
+			for my $m ( @{$model} ) {
+				if( ! $m->isa('Bio::Structure::Model') ) {
+					$self->throw("$m is not a Model\n");
+				}
+				if ( $self->_parent($m) ) {
+					$self->throw("$m already assigned to a parent\n");
+				}
+				push @{$self->{'model'}}, $m;
+				# create a stringified version of our ref
+				# not used untill we get symbolic ref working
+				#my $str_ref = "$self";
+				#$m->_grandparent($str_ref);
+			}
+		}
+		elsif ( $model->isa('Bio::Structure::Model') ) { 
+			if ( $self->_parent($model) ) { # already assigned to a parent
+				$self->throw("$model already assigned\n");
+			}
+			push @{$self->{'model'}}, $model;
+			# create a stringified version of our ref
+			#my $str_ref = "$self";
+			#$model->_grandparent($str_ref);
+		}
+		else {
+			$self->throw("Supplied a $model to add_model, we want a Model or list of Models\n");
+		}
+	}
+
+	my $array_ref = $self->{'model'};
+	return $array_ref ? @{$array_ref} : ();
+}
+
+
+=head2 get_models()
+
+ Title   : get_models
+ Usage   : $structure->get_models($structure);
+ Function: general get method for models attached to an Entry
+ Returns : a list of models attached to this entry
+ Args    : an Entry
+
+=cut
+
+sub get_models {
+	my ($self, $entry) = @_;
+
+	# self and entry can be the same
+	if ( !defined $entry) {
+		$entry = $self;
+	}
+	# pass through to add_model
+	$self->add_model($entry);
+}
+
+
+=head2 id()
+
+ Title   : id
+ Usage   : $entry->id("identity");
+ Function: Gets/sets the ID 
+ Returns : The ID
+ Args    : 
+
+=cut
+
+sub id {
+	my ($self, $value) = @_;
+	if (defined $value) {
+		$self->{'id'} = $value;
+	}
+	return $self->{'id'};
+}
+
+
+=head2 chain()
+
+ Title   : chain
+ Usage   : @chains  = $structure->chain($chain);
+ Function: Connects a Chain or a list of Chain objects to a Bio::Structure::Entry.
+ Returns : List of Bio::Structure::Chain objects
+ Args    : A Chain or a reference to an array of Chain objects
+
+=cut
+
+sub chain {
+	my ($self, $chain) = @_;
+
+	if ( ! $self->model ) {
+		$self->_create_default_model;
+	}
+	my @models = $self->model;
+	my $first_model = $models[0];
+
+	if ( defined $chain) {
+		
+		if( (ref($chain) eq "ARRAY") || ($chain->isa('Bio::Structure::Chain')) ) {
+			# remove existing ones, tell they've become orphan
+			my @obj = $self->get_chains($first_model);
+			if (@obj) {
+				for my $c (@obj) {
+					$self->_remove_from_graph($c);
+				}
+			}
+			# add the new ones
+			$self->add_chain($first_model,$chain);
+		}
+		else {
+			$self->throw("Supplied a $chain to chain, we want a Bio::Structure::Chain or a list of these\n");
+		}
+	}
+	$self->get_chains($first_model);
+}
+
+
+=head2 add_chain()
+
+ Title   : add_chain
+ Usage   : @chains  = $structure->add_chain($model,$chain);
+ Function: Adds one or more Chain objects to a Bio::Structure::Entry.
+ Returns : List of Chain objects associated with the Model
+ Args    : A Model object and a Chain object or a reference to an array of 
+           of Chain objects
+
+=cut
+
+sub add_chain {
+	my($self, $model, $chain) = @_;
+
+	if (ref($model) !~ /^Bio::Structure::Model/) {
+		$self->throw("add_chain: first argument needs to be a Model object ($model)\n");
+	}
+	if (defined $chain) {
+		if (ref($chain) eq "ARRAY") {
+			# if the user passed in a reference to an array
+			for my $c ( @{$chain} ) {
+				if( ! $c->isa('Bio::Structure::Chain') ) {
+					$self->throw("$c is not a Chain\n");
+				}
+				if ( $self->_parent($c) ) {
+					$self->throw("$c already assigned to a parent\n");
+				}
+				$self->_parent($c, $model);
+				$self->_child($model, $c);
+				# stringify $self ref
+				#my $str_ref = "$self";
+				#$c->_grandparent($str_ref);
+			}
+		}
+		elsif ( $chain->isa('Bio::Structure::Chain') ) { 
+			if ( $self->_parent($chain) ) { # already assigned to parent
+				$self->throw("$chain already assigned to a parent\n");
+			}
+			$self->_parent($chain,$model);
+			$self->_child($model, $chain);
+			# stringify $self ref
+			#my $str_ref = "$self";
+			#$chain->_grandparent($str_ref);
+		}
+		else {
+			$self->throw("Supplied a $chain to add_chain, we want a Chain or list of Chains\n");
+		}
+	}
+	my $array_ref = $self->_child($model);
+	return $array_ref ? @{$array_ref} : ();
+}
+
+
+=head2 get_chains()
+
+ Title   : get_chains
+ Usage   : $entry->get_chains($model);
+ Function: General get method for Chains attached to a Model
+ Returns : A list of Chains attached to this model
+ Args    : A Model
+
+=cut
+
+sub get_chains {
+	my ($self, $model) = @_;
+
+	if (! defined $model) {
+		$model = ($self->get_models)[0];
+	}
+	# pass through to add_chain
+	$self->add_chain($model);
+}
+
+
+=head2 residue()
+
+ Title   : residue
+ Usage   : @residues  = $structure->residue($residue);
+ Function: Connects a (or a list of) Residue objects to a Bio::Structure::Entry.
+ Returns : List of Bio::Structure::Residue objects
+ Args    : One Residue or a reference to an array of Residue objects
+
+=cut
+
+sub residue {
+	my ($self, $residue) = @_;
+
+	if ( ! $self->model ) {
+		my $m = $self->_create_default_model;
+		$self->add_model($self,$m);
+	}
+	my @models = $self->model;
+	my $first_model = $models[0];
+	
+	if ( ! $self->get_chains($first_model) ) {
+		my $c = $self->_create_default_chain;
+		$self->add_chain($first_model, $c);
+	}
+	my @chains = $self->get_chains($first_model);
+	my $first_chain = $chains[0];
+
+	if( defined $residue) {
+		if( (ref($residue) eq "ARRAY") ||
+		      ($residue->isa('Bio::Structure::Residue')) ) {
+			# remove existing ones, tell they've become orphan
+			my @obj = $self->get_residues($first_chain);
+			if (@obj) {
+				for my $r (@obj) {
+					$self->_remove_from_graph($r);
+				}
+			}
+			# add the new ones
+			$self->add_residue($first_chain,$residue);
+		}
+		else {
+			$self->throw("Supplied a $residue to residue, we want a Bio::Structure::Residue or a list of these\n");
+		}
+	}
+	$self->get_residues($first_chain);
+}
+
+
+=head2 add_residue()
+
+ Title   : add_residue
+ Usage   : @residues  = $structure->add_residue($chain,$residue);
+ Function: Adds one or more Residue objects to a Bio::Structure::Entry.
+ Returns : List of Bio::Structure::Residue objects
+ Args    : A Chain object and a Residue object or a reference to an array of 
+           Residue objects
+
+=cut
+
+sub add_residue {
+	my($self,$chain,$residue) = @_;
+
+	if (ref($chain) !~ /^Bio::Structure::Chain/) {
+		$self->throw("add_residue: first argument needs to be a Chain object\n");
+	}
+	if (defined $residue) {
+		if (ref($residue) eq "ARRAY") {
+    			# if the user passed in a reference to an array
+			for my $r ( @{$residue} ) {
+				if( ! $r->isa('Bio::Structure::Residue') ) {
+					$self->throw("$r is not a Residue\n");
+				}
+				if ( $self->_parent($r) ) {
+					$self->throw("$r already belongs to a parent\n");
+				}
+				$self->_parent($r, $chain);
+				$self->_child($chain, $r);
+				# stringify
+				my $str_ref = "$self";
+				$r->_grandparent($str_ref);
+			}
+		}
+		elsif ( $residue->isa('Bio::Structure::Residue') ) { 
+			if ( $self->_parent($residue) ) {
+				$self->throw("$residue already belongs to a parent\n");
+			}
+			$self->_parent($residue, $chain);
+			$self->_child($chain, $residue);
+			# stringify
+			my $str_ref = "$self";
+			$residue->_grandparent($str_ref);
+		}
+		else {
+			$self->throw("Supplied a $residue to add_residue, we want a Residue or list of Residues\n");
+		}
+	}
+	my $array_ref = $self->_child($chain);
+	return $array_ref ? @{$array_ref} : ();
+}
+
+
+=head2 get_residues()
+
+ Title   : get_residues
+ Usage   : $structure->get_residues($chain);
+ Function: General get method for Residues attached to a Chain
+ Returns : A list of residues attached to this Chain
+ Args    : A Chain
+
+=cut
+
+sub get_residues {
+	my ($self, $chain) = @_;
+
+	if ( !defined $chain) {
+		$self->throw("get_residues needs a Chain as argument");
+	}
+	# pass through to add_residue
+	$self->add_residue($chain);
+}
+
+
+=head2 add_atom()
+
+ Title   : add_atom
+ Usage   : @atoms  = $structure->add_atom($residue,$atom);
+ Function: Adds a (or a list of) Atom objects to a Bio::Structure::Residue.
+ Returns : List of Bio::Structure::Atom objects
+ Args    : A Residue and an Atom
+
+=cut
+
+sub add_atom {
+	my($self,$residue,$atom) = @_;
+
+	if (ref($residue) !~ /^Bio::Structure::Residue/) {
+		$self->throw("add_atom: first argument needs to be a Residue object\n");
+	}
+	if (defined $atom) {
+		if (ref($atom) eq "ARRAY") {
+    			# if the user passed in a reference to an array
+			for my $a ( @{$atom} ) {
+				if( ! $a->isa('Bio::Structure::Atom') ) {
+					$self->throw("$a is not an Atom\n");
+				}
+				if ( $self->_parent($a) ) {
+					$self->throw("$a already belongs to a parent\n");
+				}
+				$self->_parent($a, $residue);
+				$self->_child($residue, $a);
+				# stringify
+				#my $str_ref = "$self";
+				#$r->_grandparent($str_ref);
+			}
+		}
+		#elsif ( $atom->isa('Bio::Structure::Atom') ) { 
+		elsif ( ref($atom) =~ /^Bio::Structure::Atom/ ) { 
+			if ( $self->_parent($atom) ) {
+				$self->throw("$atom already belongs to a parent\n");
+			}
+			$self->_parent($atom, $residue);
+			$self->_child($residue, $atom);
+			# stringify
+			#my $str_ref = "$self";
+			#$atom->_grandparent($str_ref);
+		}
+	}
+	my $array_ref = $self->_child($residue);
+	return $array_ref ? @{$array_ref} : ();
+}
+
+
+=head2 get_atoms()
+
+ Title   : get_atoms
+ Usage   : $structure->get_atoms($residue);
+ Function: General get method for Atoms attached to a Residue
+ Returns : A list of Atoms attached to this Residue
+ Args    : A Residue
+
+=cut
+
+sub get_atoms {
+	my ($self, $residue) = @_;
+
+	if ( !defined $residue) {
+		$self->throw("get_atoms needs a Residue as argument");
+	}
+	# pass through to add_atom
+	$self->add_atom($residue);
+}
+
+
+=head2 parent()
+
+ Title   : parent
+ Usage   : $structure->parent($residue);
+ Function: Returns the parent of the argument
+ Returns : The parent of the argument
+ Args    : A Bio::Structure object
+
+=cut
+
+=head2 connect
+
+ Title   : connect
+ Usage   : 
+ Function: Alias to conect()
+ Returns : 
+ Args    : 
+
+=cut
+
+sub connect {
+	my $self = shift;
+	return $self->conect(@_);
+}
+
+=head2 conect()
+
+ Title   : conect
+ Usage   : $structure->conect($source);
+ Function: Get/set method for conect
+ Returns : A list of serial numbers for Atoms connected to source
+ 	        (together with $entry->get_atom_by_serial($model, $serial),
+           this should be OK for now)
+ Args    : The source, the serial number for the source Atom, and the type
+
+=cut
+
+sub conect {
+	my ($self, $source, $serial, $type) = @_;
+	
+	if ( !defined $source ) {
+		$self->throw("You need to supply at least a source to connect");
+	}
+	if ( defined $serial && defined $type ) {
+		if ( !exists(${$self->{'conect'}}{$source}) || 
+			  ref(${$self->{'conect'}}{$source} !~ /^ARRAY/ ) ) {
+			${$self->{'conect'}}{$source} = [];
+		}
+		# we also need to store type, a conect object might be better 
+		my $c = $serial . "_" . $type;
+		push @{ ${$self->{'conect'}}{$source} }, $c;
+	}
+	# Bug 1894
+	return () if ( !exists $self->{'conect'}{$source} || 
+					  !defined $self->{'conect'}{$source} );
+	return @{ ${$self->{'conect'}}{$source} };
+}
+
+=head2 get_all_connect_source
+
+ Title   : get_all_connect_source
+ Usage   : 
+ Function: Alias to get_all_conect_source()
+ Returns : 
+ Args    : 
+
+=cut
+
+sub get_all_connect_source {
+	my $self = shift;
+	return get_all_conect_source(@_);
+}
+
+=head2 get_all_conect_source()
+
+ Title   : get_all_conect_source
+ Usage   : @sources = $structure->get_all_conect_source;
+ Function: Get all the sources for the conect records
+ Returns : A list of serial numbers for atoms connected to source
+ 	        (together with $entry->get_atom_by_serial($model, $serial), 
+           this should be OK for now)
+ Args    : 
+ Notes   : This is a bit of a kludge, but it is the best for now. Conect info might need
+ 	        to go in a separate object
+
+=cut
+
+sub get_all_conect_source {
+	my ($self) = shift;
+	my (@sources);
+
+	for my $source (sort {$a<=>$b} keys %{$self->{'conect'}}) {
+		push @sources, $source;
+	}
+	return @sources;
+}
+
+
+=head2 master()
+
+ Title   : master
+ Usage   : $structure->master($source);
+ Function: Get/set method for master
+ Returns : The master line
+ Args    : The master line for this entry
+
+=cut
+
+sub master {
+	my ($self, $value) = @_;
+	if (defined $value) {
+		$self->{'master'} = $value;
+	}
+	return $self->{'master'};
+}
+
+
+=head2 seqres()
+
+ Title   : seqres
+ Usage   : $seqobj = $structure->seqres("A");
+ Function: Gets a sequence object containing the sequence from the SEQRES record.
+ 	        if a chain-ID is given, the sequence for this chain is given, if none
+	        is provided the first chain is choosen
+ Returns : A Bio::PrimarySeq
+ Args    : The chain-ID of the chain you want the sequence from
+
+=cut
+
+sub seqres {
+	my ($self, $chainid) = @_;
+	my $s_u = "x3 A1 x7 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3 x1 A3";
+	my (%seq_ch);
+	if ( !defined $chainid) {
+		my $m = ($self->get_models($self))[0];
+		my $c = ($self->get_chains($m))[0];
+		$chainid = $c->id;
+	}
+	my $seqres = ($self->annotation->get_Annotations("seqres"))[0];
+	my $seqres_string = $seqres->as_text;
+	$self->debug("seqres : $seqres_string\n");
+	$seqres_string =~ s/^Value: //;
+	# split into lines of 62 long
+	my @l = unpack("A62" x (length($seqres_string)/62), $seqres_string);
+	for my $line (@l) {
+		# get out chain_id and sequence
+		# we use a1, as A1 strips all spaces :(
+		my ($chid, $seq) = unpack("x3 a1 x7 A51", $line);
+		if ($chid eq " ") {
+			$chid = "default";
+		}
+		$seq =~ s/(\w+)/\u\L$1/g;	# ALA -> Ala  (for SeqUtils)
+		$seq =~ s/\s//g; 		# strip all spaces
+		$seq_ch{$chid} .= $seq;
+		$self->debug("seqres : $chid $seq_ch{$chid}\n");
+	}
+	# do we have a seqres for this chainid
+	if(! exists $seq_ch{$chainid} ) {
+		$self->warn("There is no SEQRES known for chainid \"$chainid\"");
+		return;
+	}
+
+	# this will break for non-protein structures (about 10% for now) XXX KB
+	my $pseq = Bio::PrimarySeq->new(-alphabet => 'protein');
+	$pseq = Bio::SeqUtils->seq3in($pseq,$seq_ch{$chainid});
+	my $id = $self->id . "_" . $chainid;
+	$pseq->id($id);
+	return $pseq;
+}
+
+
+=head2 get_atom_by_serial()
+
+ Title   : get_atom_by_serial
+ Usage   : $structure->get_atom_by_serial($model,$serial);
+ Function: Get the Atom by serial
+ Returns : The Atom object with this serial number in the model
+ Args    : Model on which to work, serial number for atom
+ 	        (if only a number is supplied, the first model is chosen)
+
+=cut
+
+sub get_atom_by_serial {
+	my ($self, $model, $serial) = @_;
+
+	if ($model =~ /^\d+$/ && !defined $serial) { # only serial given
+		$serial = $model;
+		my @m = $self->get_models($self);
+		$model = $m[0];
+	}
+	if ( !defined $model || ref($model) !~ /^Bio::Structure::Model/ ) {
+		$self->throw("Could not find (first) model\n");
+	}
+	if ( !defined $serial || ($serial !~ /^\d+$/) ) {
+		$self->throw("The serial number you provided looks fishy ($serial)\n");
+	}
+	for my $chain ($self->get_chains($model) ) {
+		for my $residue ($self->get_residues($chain) ) {
+			for my $atom ($self->get_atoms($residue) ) {
+				# this could get expensive, do we cache ???
+				next unless ($atom->serial == $serial);
+				return $atom;
+			}
+		}
+	}
+} 
+
+sub parent {
+	my ($self, $obj) = @_;
+	
+	if ( !defined $obj) {
+		$self->throw("parent: you need to supply an argument to get the parent from\n");
+	}
+
+	# for now we pass on to _parent, untill we get the symbolic ref thing working.
+	$self->_parent($obj);
+}
+
+sub DESTROY {
+	my $self = shift;
+
+	%{ $self->{'p_c'} } = ();
+	%{ $self->{'c_p'} } = ();
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($seq_obj)
+ Function:
+ Example :
+ Returns : value of annotation
+ Args    : newvalue (optional)
+
+=cut
+
+sub annotation {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'annotation'} = $value;
+    }
+    return $obj->{'annotation'};
+}
+
+
+#
+# from here on only private methods
+#
+
+=head2 _remove_models()
+
+ Title   : _remove_models
+ Usage   : 
+ Function: Removes the models attached to an Entry. Tells the models they
+ 	        do not belong to this Entry any more
+ Returns : 
+ Args    : 
+
+=cut
+
+#
+
+sub _remove_models {
+	my ($self) = shift;
+
+	;
+}
+
+
+=head2 _create_default_model()
+
+ Title   : _create_default_model
+ Usage   : 
+ Function: Creates a default Model for this Entry. Typical situation
+ 	        in an X-ray structure where there is only one model
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _create_default_model {
+	my ($self) = shift;
+
+	my $model = Bio::Structure::Model->new(-id => "default");
+	return $model;
+}
+
+
+=head2 _create_default_chain()
+
+ Title   : _create_default_chain
+ Usage   : 
+ Function: Creates a default Chain for this Model. Typical situation
+ 	        in an X-ray structure where there is only one chain
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _create_default_chain {
+	my ($self) = shift;
+
+	my $chain = Bio::Structure::Chain->new(-id => "default");
+	return $chain;
+}
+
+
+
+=head2 _parent()
+
+ Title   : _parent
+ Usage   : This is an internal function only. It is used to have one 
+ 	        place that keeps track of which object has which other object 
+	        as parent. Thus allowing the underlying modules (Atom, Residue,...)
+	        to have no knowledge about all this (and thus removing the possibility
+	        of reference cycles).
+	        This method hides the details of manipulating references to an anonymous
+	        hash.
+ Function: To get/set an objects parent 
+ Returns : A reference to the parent if it exist, undef otherwise. In the 
+ 	        current implementation each node should have a parent (except Entry).
+ Args    : 
+
+=cut
+
+# manipulating the c_p hash
+
+sub _parent {
+    no strict "refs";
+    my ($self, $key, $value) = @_;
+
+    if ( (!defined $key) || (ref($key) !~ /^Bio::/) ) {
+	$self->throw("First argument to _parent needs to be a reference to a Bio:: object ($key)\n");
+    }
+    if ( (defined $value) && (ref($value) !~ /^Bio::/) ) {
+	$self->throw("Second argument to _parent needs to be a reference to a Bio:: object\n");
+    }
+    # no checking here for consistency of key and value, needs to happen in caller
+
+    if (defined $value) {
+	# is this value already in, shout
+	if (defined ( $self->{'c_p'}->{$key}) && 
+	    exists ( $self->{'c_p'}->{$key})
+	    ) {
+	    $self->throw("_parent: $key already has a parent ${$self->{'c_p'}}{$key}\n");
+    }
+    ${$self->{'c_p'}}{$key} = $value;
+	}
+	return ${$self->{'c_p'}}{$key}; 
+}
+
+
+=head2 _child()
+
+ Title   : _child
+ Usage   : This is an internal function only. It is used to have one 
+ 	        place that keeps track of which object has which other object 
+	        as child. Thus allowing the underlying modules (Atom, Residue,...)
+	        to have no knowledge about all this (and thus removing the possibility
+	        to have no knowledge about all this (and thus removing the possibility
+	        of reference cycles).
+	        This method hides the details of manipulating references to an anonymous
+ 	        hash.
+ Function: To get/set an the children of an object 
+ Returns : A reference to an array of child(ren) if they exist, undef otherwise. 
+ Args    : 
+
+=cut
+
+# manipulating the p_c hash
+sub _child {
+	my ($self, $key, $value) = @_;
+	
+	if ( (!defined $key) || (ref($key) !~ /^Bio::/) ) {
+		$self->throw("First argument to _child needs to be a reference to a Bio:: object\n");
+	}
+	if ( (defined $value) && (ref($value) !~ /^Bio::/) ) {
+		$self->throw("Second argument to _child needs to be a reference to a Bio:: object\n");
+	}
+	# no checking here for consistency of key and value, needs to happen in caller
+	
+	if (defined $value) {
+		if ( !exists(${$self->{'p_c'}}{$key}) || ref(${$self->{'p_c'}}{$key}) !~ /^ARRAY/ ) {
+			${$self->{'p_c'}}{$key} = [];
+		}
+		push @{ ${$self->{'p_c'}}{$key} }, $value;
+	}
+	return  ${$self->{'p_c'}}{$key}; 
+}
+
+=head2 _remove_from_graph()
+
+ Title   : _remove_from_graph
+ Usage   : This is an internal function only. It is used to remove from
+ 	        the parent/child graph. We only remove the links from object to
+	        his parent. Not the ones from object to its children.
+ Function: To remove an object from the parent/child graph
+ Returns : 
+ Args    : The object to be orphaned
+
+=cut
+
+sub _remove_from_graph {
+	my ($self, $object) = @_;
+	
+	if ( !defined($object) && ref($object) !~ /^Bio::/) {
+		$self->throw("_remove_from_graph needs a Bio object as argument");
+	}
+	if ( $self->_parent($object) ) {
+		my $dad = $self->_parent($object);
+		# if we have a parent, remove me as being a child
+		for my $k (0 .. $#{$self->_child($dad)}) {
+			if ($object eq ${$self->{'p_c'}{$dad}}[$k]) {
+				splice(@{$self->{'p_c'}{$dad}}, $k,1);
+			}
+		}
+		delete( $self->{'c_p'}{$object});
+	}
+}
+
+			
+sub _print_stats_pc {
+	# print stats about the parent/child hashes
+	my ($self) =@_;
+	my $pc = scalar keys %{$self->{'p_c'}};
+	my $cp = scalar keys %{$self->{'c_p'}};
+	my $now_time = Time::HiRes::time();
+	$self->debug("pc stats: P_C $pc C_P $cp $now_time\n");
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO/pdb.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO/pdb.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO/pdb.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1424 @@
+# $Id: pdb.pm,v 1.17.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Structure::IO::pdb
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright 2001, 2002 Kris Boulez
+#
+# Framework is a copy of Bio::SeqIO::embl.pm
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::IO::pdb - PDB input/output stream
+
+=head1 SYNOPSIS
+
+It is probably best not to use this object directly, but
+rather go through the Bio::Structure::IO handler system. Go:
+
+    $stream = Bio::Structure::IO->new(-file => $filename,
+                                      -format => 'PDB');
+
+    while (my $structure = $stream->next_structure) {
+	    # do something with $structure
+    }
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Structure objects to and from PDB flat
+file databases. The working is similar to that of the Bio::SeqIO handlers.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Structure::IO::pdb;
+use strict;
+use Bio::Structure::Entry;
+#use Bio::Structure::Model;
+#use Bio::Structure::Chain;
+#use Bio::Structure::Residue;
+use Bio::Structure::Atom;
+use Bio::SeqFeature::Generic;
+use Bio::Annotation::Reference;
+
+use base qw(Bio::Structure::IO);
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  $self->SUPER::_initialize(@args);
+
+  my ($noheader, $noatom) =
+  	$self->_rearrange([qw(
+			NOHEADER
+			NOATOM
+		)],
+		@args);
+  $noheader && $self->_noheader($noheader);
+  $noatom   && $self->_noatom($noatom);
+}
+
+
+=head2 next_structure;
+
+ Title   : next_structure
+ Usage   : $struc = $stream->next_structure()
+ Function: returns the next structure in the stream
+ Returns : Bio::Structure object
+ Args    :
+
+=cut
+
+sub next_structure {
+   my ($self, at args) = @_;
+   my ($line);
+   my ($obslte, $title, $caveat, $compnd, $source, $keywds,
+	$expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
+	$turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
+	$site, $cryst1, $tvect,);
+   my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
+   my $all_headers = ( !$self->_noheader );  # we'll parse all headers and store as annotation
+   my %header;  # stores all header RECORDs an is stored as annotations when ATOM is reached
+
+
+   $line = $self->_readline;   # This needs to be before the first eof() test
+
+   if( !defined $line ) {
+       return; # no throws - end of file
+   }
+
+   if( $line =~ /^\s+$/ ) {
+       while( defined ($line = $self->_readline) ) {
+	   $line =~/\S/ && last;
+       }
+   }
+   if( !defined $line ) {
+       return; # end of file
+   }
+   $line =~ /^HEADER\s+\S+/ || $self->throw("PDB stream with no HEADER. Not pdb in my book");
+   my($header_line) = unpack "x10 a56", $line;
+   $header{'header'} = $header_line;
+   my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
+   $idcode =~ s/^\s*(\S+)\s*$/$1/;
+   $struc->id($idcode);
+	$self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
+
+   my $buffer = $line;
+
+   BEFORE_COORDINATES :
+   until( !defined $buffer ) {
+       $_ = $buffer;
+
+       # Exit at start of coordinate section
+       last if /^(MODEL|ATOM|HETATM)/;
+
+       # OBSLTE line(s)
+       if (/^OBSLTE / && $all_headers) {
+		$obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
+		$header{'obslte'} = $obslte;
+       }
+
+       # TITLE line(s)
+       if (/^TITLE / && $all_headers) {
+		$title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
+		$header{'title'} = $title;
+       }
+
+       # CAVEAT line(s)
+       if (/^CAVEAT / && $all_headers) {
+		$caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
+		$header{'caveat'} = $caveat;
+       }
+
+       # COMPND line(s)
+       if (/^COMPND / && $all_headers) {
+		$compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
+		$header{'compnd'} = $compnd;
+$self->debug("get COMPND $compnd\n");
+       }
+
+	# SOURCE line(s)
+	if (/^SOURCE / && $all_headers) {
+		$source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
+		$header{'source'} = $source;
+	}
+
+	# KEYWDS line(s)
+	if (/^KEYWDS / && $all_headers) {
+		$keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
+		$header{'keywds'} = $keywds;
+	}
+
+	# EXPDTA line(s)
+	if (/^EXPDTA / && $all_headers) {
+		$expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
+		$header{'expdta'} = $expdta;
+	}
+
+	# AUTHOR line(s)
+	if (/^AUTHOR / && $all_headers) {
+		$author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
+		$header{'author'} = $author;
+	}
+
+	# REVDAT line(s)
+	#  a bit more elaborate as we also store the modification number
+	if (/^REVDAT / && $all_headers) {
+		##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
+		##$modnum =~ s/\s+//; # remove  spaces
+		##$revdat{$modnum} .= $rol;
+		my ($rol) = unpack "x7 a59", $_;
+		$revdat .= $rol;
+		$header{'revdat'} = $revdat;
+	}
+
+	# SPRSDE line(s)
+	if (/^SPRSDE / && $all_headers) {
+		$sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
+		$header{'sprsde'} = $sprsde;
+	}
+
+	# jRNL line(s)
+	if (/^JRNL / && $all_headers) {
+		$jrnl = $self->_read_PDB_jrnl(\$buffer);
+		$struc->annotation->add_Annotation('reference',$jrnl);
+		$header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
+	}
+
+	# REMARK line(s)
+	#  we only parse the "REMARK   1" lines (additional references)
+	#  thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
+	if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
+		my $remark_num = $1;
+		if ($remark_num == 1) {
+			my @refs = $self->_read_PDB_remark_1(\$buffer);
+			# How can we find the primary reference when writing (JRNL record) XXX KB
+			foreach my $ref (@refs) {
+				$struc->annotation->add_Annotation('reference', $ref);
+			}
+			# $_ still holds the REMARK_1 line, $buffer now contains the first non
+			#  REMARK_1 line. We need to parse it in this pass (so no else block)
+			$_ = $buffer;
+		}
+		# for the moment I don't see a better solution (other then using goto)
+		if (/^REMARK\s+(\d+)\s*/) {
+			my $r_num = $1;
+			if ($r_num != 1) { # other remarks, we store literlly at the moment
+				my ($rol) = unpack "x11 a59", $_;
+				$remark{$r_num} .= $rol;
+			}
+		}
+	} # REMARK
+
+	# DBREF line(s)
+	#  references to sequences in other databases
+	#  we store as 'dblink' annotations and whole line as simple annotation (round-trip)
+	if (/^DBREF / && $all_headers) {
+		my ($rol) = unpack "x7 a61", $_;
+		$dbref .= $rol;
+		$header{'dbref'} = $dbref;
+		my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
+		$db =~ s/\s*$//;
+		$acc =~ s/\s*$//;
+		my $link = Bio::Annotation::DBLink->new;
+		$link->database($db);
+		$link->primary_id($acc);
+		$struc->annotation->add_Annotation('dblink', $link);
+	} # DBREF
+
+	# SEQADV line(s)
+	if (/^SEQADV / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$header{'seqadv'} .= $rol;
+	} # SEQADV
+
+	# SEQRES line(s)
+	#  this is (I think) the sequence of macromolecule that was analysed
+	#  this will be returned when doing $struc->seq
+	if (/^SEQRES / && $all_headers) {
+		my ($rol) = unpack "x8 a62", $_;
+		$header{'seqres'} .= $rol;
+	} # SEQRES
+
+	# MODRES line(s)
+	if (/^MODRES / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$header{'modres'} .= $rol;
+	} # MODRES
+
+	# HET line(s)
+	if (/^HET / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$header{'het'} .= $rol;
+	} # HET
+
+	# HETNAM line(s)
+	if (/^HETNAM / && $all_headers) {
+		my ($rol) = unpack "x8 a62", $_;
+		$header{'hetnam'} .= $rol;
+	} # HETNAM
+
+	# HETSYN line(s)
+	if (/^HETSYN / && $all_headers) {
+		my ($rol) = unpack "x8 a62", $_;
+		$header{'hetsyn'} .= $rol;
+	} # HETSYN
+
+	# FORMUL line(s)
+	if (/^FORMUL / && $all_headers) {
+		my ($rol) = unpack "x8 a62", $_;
+		$header{'formul'} .= $rol;
+	} # FORMUL
+
+	# HELIX line(s)
+	#  store as specific object ??
+	if (/^HELIX / && $all_headers) {
+		my ($rol) = unpack "x7 a69", $_;
+		$header{'helix'} .= $rol;
+	} # HELIX
+
+	# SHEET line(s)
+	#  store as specific object ??
+	if (/^SHEET / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$header{'sheet'} .= $rol;
+	} # SHEET
+
+	# TURN line(s)
+	#  store as specific object ??
+	if (/^TURN / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$turn .= $rol;
+		$header{'turn'} = $turn;
+	} # TURN
+
+	# SSBOND line(s)
+	#  store in connection-like object (see parsing of CONECT record)
+	if (/^SSBOND / && $all_headers) {
+		my ($rol) = unpack "x7 a65", $_;
+		$ssbond .= $rol;
+		$header{'ssbond'} = $ssbond;
+	} # SSBOND
+
+	# LINK
+	#  store like SSBOND ?
+	if (/^LINK / && $all_headers) {
+		my ($rol) = unpack "x12 a60", $_;
+		$link .= $rol;
+		$header{'link'} = $link;
+	} # LINK
+
+	# HYDBND
+	#  store like SSBOND
+	if (/^HYDBND / && $all_headers) {
+		my ($rol) = unpack "x12 a60", $_;
+		$hydbnd .= $rol;
+		$header{'hydbnd'} = $hydbnd;
+	} # HYDBND
+
+	# SLTBRG
+	#  store like SSBOND ?
+	if (/^SLTBRG / && $all_headers) {
+		my ($rol) = unpack "x12 a60",$_;
+		$sltbrg .= $rol;
+		$header{'sltbrg'} = $sltbrg;
+	} # SLTBRG
+
+	# CISPEP
+	#   store like SSBOND ?
+	if (/^CISPEP / && $all_headers) {
+		my ($rol) = unpack "x7 a52", $_;
+		$cispep .= $rol;
+		$header{'cispep'} = $cispep;
+	}
+
+	# SITE line(s)
+	if (/^SITE / && $all_headers) {
+		my ($rol) = unpack "x7 a54", $_;
+		$site .= $rol;
+		$header{'site'} = $site;
+	} # SITE
+
+	# CRYST1 line
+	#  store in some crystallographic subobject ?
+	if (/^CRYST1/ && $all_headers) {
+		my ($rol) = unpack "x6 a64", $_;
+		$cryst1 .= $rol;
+		$header{'cryst1'} = $cryst1;
+	} # CRYST1
+
+	# ORIGXn line(s) (n=1,2,3)
+	if (/^(ORIGX\d) / && $all_headers) {
+		my $origxn = lc($1);
+		my ($rol) = unpack "x10 a45", $_;
+		$header{$origxn} .= $rol;
+	} # ORIGXn
+
+	# SCALEn line(s) (n=1,2,3)
+	if (/^(SCALE\d) / && $all_headers) {
+		my $scalen = lc($1);
+		my ($rol) = unpack "x10 a45", $_;
+		$header{$scalen} .= $rol;
+	} # SCALEn
+
+	# MTRIXn line(s) (n=1,2,3)
+	if (/^(MTRIX\d) / && $all_headers) {
+		my $mtrixn = lc($1);
+		my ($rol) = unpack "x7 a53", $_;
+		$header{$mtrixn} .= $rol;
+	} # MTRIXn
+
+	# TVECT line(s)
+	if (/^TVECT / && $all_headers) {
+		my ($rol) = unpack "x7 a63", $_;
+		$tvect .= $rol;
+		$header{'tvect'} = $tvect;
+	}
+
+	# Get next line.
+	$buffer = $self->_readline;
+   }
+
+   # store %header entries a annotations
+   if (%header) {
+	for my $record (keys %header) {
+		my $sim = Bio::Annotation::SimpleValue->new();
+		$sim->value($header{$record});
+		$struc->annotation->add_Annotation($record, $sim);
+	}
+   }
+   # store %remark entries as annotations
+   if (%remark) {
+	for my $remark_num (keys %remark) {
+		my $sim = Bio::Annotation::SimpleValue->new();
+		$sim->value($remark{$remark_num});
+		$struc->annotation->add_Annotation("remark_$remark_num", $sim);
+	}
+   }
+
+   # Coordinate section, the real meat
+   #
+   #  $_ contains a line beginning with (ATOM|MODEL)
+
+   $buffer = $_;
+
+
+   if (defined($buffer) && $buffer =~ /^(ATOM |MODEL |HETATM)/ ) {  # can you have an entry without ATOM ?
+	until( !defined ($buffer) ) {				 #  (yes : 1a7z )
+		   # read in one model at a time
+		   my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
+		   # add this to $struc
+		   $struc->add_model($struc, $model);
+
+		   if ($buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
+			   last;
+		   }
+	}
+   }
+   else {
+	   $self->throw("Could not find a coordinate section in this record\n");
+   }
+
+
+   until( !defined $buffer ) {
+	$_ = $buffer;
+
+   	# CONNECT records
+	if (/^CONECT/) {
+		# do not differentiate between different type of connect (column dependant)
+		my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
+		my (@conect) = unpack $conect_unpack, $_;
+		for my $k (0 .. $#conect) {
+			$conect[$k] =~ s/\s//g;
+		}
+		my $source = shift @conect;
+		my $type;
+		for my $k (0 .. 9) {
+			next unless ($conect[$k] =~ /^\d+$/);
+			# 0..3 		bond
+			if( $k <= 3 ) {
+				$type = "bond";
+			}
+			# 4..5,7..8 	hydrogen bonded
+			elsif( ($k >= 4 && $k <= 5) || ($k >= 7 && $k <= 8) ) {
+				$type = "hydrogen";
+			}
+			# 6, 9		salt bridged
+			elsif( $k == 6 || $k == 9 ) {
+				$type = "saltbridged";
+			} else {
+				$self->throw("k has impossible value ($k), check brain");
+			}
+			$struc->conect($source, $conect[$k], $type);
+		}
+	}
+
+	# MASTER record
+	if (/^MASTER /) {
+		# the numbers in here a checksums, we should use them :)
+		my ($rol) = unpack "x10 a60", $_;
+		$struc->master($rol);
+	}
+
+	if (/^END/) {
+		# this it the end ...
+	}
+
+   	$buffer = $self->_readline;
+   }
+
+
+   return $struc;
+}
+
+=head2 write_structure
+
+ Title   : write_structure
+ Usage   : $stream->write_structure($struc)
+ Function: writes the $struc object (must be a Bio::Structure) to the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Structure object
+
+=cut
+
+sub write_structure {
+	my ($self, $struc) = @_;
+	if( !defined $struc ) {
+		$self->throw("Attempting to write with no structure!");
+	}
+
+	if( ! ref $struc || ! $struc->isa('Bio::Structure::StructureI') ) {
+		$self->throw(" $struc is not a StructureI compliant module.");
+	}
+	my ($ann, $string, $output_string, $key);
+	# HEADER
+	($ann) = $struc->annotation->get_Annotations("header");
+	if ($ann) {
+		$string = $ann->as_text;
+		$string =~ s/^Value: //;
+		$output_string = pack ("A10 A56", "HEADER", $string);
+	} else {	# not read in via read_structure, create HEADER line
+		my $id = $struc->id;
+		if (!$id) {
+			$id = "UNK1";
+		}
+		if (length($id) > 4) {
+			$id = substr($id,0,4);
+		}
+		my $classification = "DEFAULT CLASSIFICATION";
+		my $dep_date       = "24-JAN-70";
+		$output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
+	}
+	$output_string .= " " x (80 - length($output_string) );
+	$self->_print("$output_string\n");
+
+	my (%header);
+	for  $key ($struc->annotation->get_all_annotation_keys) {
+		$header{$key} = 1;;
+	}
+
+	exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
+
+	exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE   ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
+
+	exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
+
+	exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
+
+	exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
+
+	exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
+
+	exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
+
+	exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
+
+	exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
+					-annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
+
+	exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE  ", -cont => "9-10",
+					-annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
+
+	# JRNL en REMARK 1
+	my ($jrnl_done, $remark_1_counter);
+	if ( !exists $header{'jrnl'} ) {
+		$jrnl_done = 1;
+	}
+	foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
+		if( !$jrnl_done ) { # JRNL record
+			$ref->authors && $self->_write_PDB_simple_record(-name => "JRNL        AUTH",
+					-cont => "17-18", -rol => "20-70", -string => $ref->authors );
+			$ref->title && $self->_write_PDB_simple_record(-name => "JRNL        TITL",
+					-cont => "17-18", -rol => "20-70", -string => $ref->title );
+			$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL        EDIT",
+					-cont => "17-18", -rol => "20-70", -string => $ref->editors );
+			$ref->location && $self->_write_PDB_simple_record(-name => "JRNL        REF ",
+					-cont => "17-18", -rol => "20-70", -string => $ref->location );
+			$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL        EDIT",
+					-cont => "17-18", -rol => "20-70", -string => $ref->editors );
+			$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL        REFN",
+					-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
+			$jrnl_done = 1;
+		} else { # REMARK 1
+			if (!$remark_1_counter) { # header line
+				my $remark_1_header_line = "REMARK   1" . " " x 70;
+				$self->_print("$remark_1_header_line\n");
+				$remark_1_counter = 1;
+			}
+			# per reference header
+			my $rem_line = "REMARK   1 REFERENCE " . $remark_1_counter;
+			$rem_line .= " " x (80 - length($rem_line) );
+			$self->_print($rem_line,"\n");
+			$ref->authors && $self->_write_PDB_simple_record(-name => "REMARK   1  AUTH",
+					-cont => "17-18", -rol => "20-70", -string => $ref->authors );
+			$ref->title && $self->_write_PDB_simple_record(-name => "REMARK   1  TITL",
+					-cont => "17-18", -rol => "20-70", -string => $ref->title );
+			$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK   1  EDIT",
+					-cont => "17-18", -rol => "20-70", -string => $ref->editors );
+			$ref->location && $self->_write_PDB_simple_record(-name => "REMARK   1  REF ",
+					-cont => "17-18", -rol => "20-70", -string => $ref->location );
+			$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK   1  EDIT",
+					-cont => "17-18", -rol => "20-70", -string => $ref->editors );
+			$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK   1  REFN",
+					-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
+			$remark_1_counter++;
+		}
+	}
+	if (! defined $remark_1_counter ) { 	# no remark 1 record written yet
+		my $remark_1_header_line = "REMARK   1" . " " x 70;
+		$self->_print("$remark_1_header_line\n");  # write dummy  (we need this line)
+	}
+
+	# REMARK's  (not 1 at the moment, references)
+	my (%remarks, $remark_num);
+	for  $key (keys %header) {
+		next unless ($key =~ /^remark_(\d+)$/);
+		next if ($1 == 1);
+		$remarks{$1} = 1;
+	}
+	for $remark_num (sort {$a <=> $b} keys %remarks) {
+		$self->_write_PDB_remark_record($struc, $remark_num);
+	}
+
+	exists $header{'dbref'} && $self->_write_PDB_simple_record(-name =>  "DBREF  ",
+					-annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
+	exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
+					-annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
+	exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES  ",
+					-annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
+	exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
+					-annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
+	exists $header{'het'} && $self->_write_PDB_simple_record(-name =>    "HET    ",
+					-annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
+	exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM  ",
+					-annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
+	exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN  ",
+					-annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
+	exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL  ",
+					-annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
+	exists $header{'helix'} && $self->_write_PDB_simple_record(-name =>  "HELIX  ",
+					-annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
+	exists $header{'sheet'} && $self->_write_PDB_simple_record(-name =>  "SHEET  ",
+					-annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
+	exists $header{'turn'} && $self->_write_PDB_simple_record(-name =>   "TURN   ",
+					-annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
+	exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
+					-annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
+	exists $header{'link'} && $self->_write_PDB_simple_record(-name =>   "LINK        ",
+					-annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
+	exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND      ",
+					-annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
+	exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG      ",
+					-annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
+	exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
+					-annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
+	exists $header{'site'} && $self->_write_PDB_simple_record(-name =>   "SITE   ",
+					-annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
+	exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
+					-annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
+	for my $k (1..3) {
+		my $origxn = "origx".$k;
+		my $ORIGXN = uc($origxn)."    ";
+		exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
+			-annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
+	}
+	for my $k (1..3) {
+		my $scalen = "scale".$k;
+		my $SCALEN = uc($scalen)."    ";
+		exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
+			-annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
+	}
+	for my $k (1..3) {
+		my $mtrixn = "mtrix".$k;
+		my $MTRIXN = uc($mtrixn)." ";
+		exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
+			-annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
+	}
+	exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT  ",
+					-annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
+
+	# write out coordinate section
+	#
+	my %het_res;  # hetero residues
+	$het_res{'HOH'} = 1;  # water is default
+	if (exists $header{'het'}) {
+		my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
+		$het_line =~ s/^Value: //;
+		for ( my $k = 0; $k <= length $het_line ; $k += 63) {
+			my $l = substr $het_line, $k, 63;
+			$l =~ s/^\s*(\S+)\s+.*$/$1/;
+			$het_res{$l} = 1;
+		}
+	}
+	for my $model ($struc->get_models) {
+		# more then one model ?
+		if ($struc->get_models > 1) {
+			my $model_line = sprintf("MODEL     %4d", $model->id);
+			$model_line .= " " x (80 - length($model_line) );
+			$self->_print($model_line, "\n");
+		}
+		for my $chain ($struc->get_chains($model)) {
+			my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
+			my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
+			my $last_record = ""; # Used to spot an ATOM -> HETATM change within a chain
+			$chain_id = $chain->id;
+			if ( $chain_id eq "default" ) {
+				$chain_id = " ";
+			}
+			$self->debug("model_id: $model->id chain_id: $chain_id\n");
+			for $residue ($struc->get_residues($chain)) {
+				($resname, $resnum) = split /-/, $residue->id;
+				for $atom ($struc->get_atoms($residue)) {
+					if ($het_res{$resname}) {  # HETATM
+						if ( $resname ne "HOH" && $last_record eq "ATOM  " ) {
+							# going from ATOM -> HETATM, we have to write TER
+							my $ter_line = "TER   ";
+							$ter_line .= sprintf("%5d", $atom_serial + 1);
+							$ter_line .= "      ";
+							$ter_line .= sprintf("%3s ", $prev_resname);
+							$ter_line .= $chain_id;
+							$ter_line .= sprintf("%4d", $prev_resnum);
+							$ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
+							$ter_line .= " " x (80 - length $ter_line);  # extend to 80 chars
+							$self->_print($ter_line,"\n");
+						}
+						$atom_line = "HETATM";
+					} else {
+						$atom_line = "ATOM  ";
+					}
+					$last_record = $atom_line;
+					$atom_line .= sprintf("%5d ", $atom->serial);
+					$atom_serial = $atom->serial; # we need it for TER record
+					$atom_icode = $atom->icode;
+					# remember some stuff if next iteration needs writing TER
+					$prev_resname = $resname;
+					$prev_resnum  = $resnum;
+					$prev_atomicode = $atom_icode;
+					# getting the name of the atom correct is subtrivial
+					my $atom_id = $atom->id;
+					# is pdb_atomname set, then use this (most probably set when
+					# reading in the PDB record)
+					my $pdb_atomname = $atom->pdb_atomname;
+					if( defined $pdb_atomname ) {
+						$atom_line .= sprintf("%-4s", $pdb_atomname);
+					} else {
+						# start (educated) guessing
+						my $element = $atom->element;
+						if( defined $element && $element ne "H") {
+							# element should be at first two positions (right justified)
+							# ie. Calcium should be "CA  "
+							#     C alpha should be " CA "
+							if( length($element) == 2 ) {
+								$atom_line .= sprintf("%-4s", $atom->id);
+							} else {
+								$atom_line .= sprintf(" %-3s", $atom->id);
+							}
+						} else { # old behaviour do a best guess
+							if ($atom->id =~ /^\dH/) { # H: four positions, left justified
+								$atom_line .= sprintf("%-4s", $atom->id);
+							} elsif (length($atom_id) == 4) {
+								if ($atom_id =~ /^(H\d\d)(\d)$/) {  # turn H123 into 3H12
+									$atom_line .= $2.$1;
+								} else {	# no more guesses, no more alternatives
+									$atom_line .= $atom_id;
+								}
+							} else { # if we get here and it is not correct let me know
+								$atom_line .= sprintf(" %-3s", $atom->id);
+							}
+						}
+					}
+					# we don't do alternate location at this moment
+					$atom_line .= " "; 				# 17
+					$atom_line .= sprintf("%3s",$resname);		# 18-20
+					$atom_line .= " ".$chain_id; 			# 21, 22
+					$atom_line .= sprintf("%4d", $resnum); 		# 23-26
+					$atom_line .= $atom->icode ? $atom->icode : " "; # 27
+					$atom_line .= "   ";				# 28-30
+					$atom_line .= sprintf("%8.3f", $atom->x);	# 31-38
+					$atom_line .= sprintf("%8.3f", $atom->y);	# 39-46
+					$atom_line .= sprintf("%8.3f", $atom->z);	# 47-54
+					$atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
+					$atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
+					$atom_line .= "      ";				# 67-72
+					$atom_line .= $atom->segID ? 			# segID 73-76
+							sprintf("%-4s",  $atom->segID) :
+							"    ";
+					$atom_line .= $atom->element ?
+							sprintf("%2s", $atom->element) :
+							"  ";
+					$atom_line .= $atom->charge ?
+							sprintf("%2s", $atom->charge) :
+							"  ";
+
+					$self->_print($atom_line,"\n");
+				}
+			}
+			# write out TER record
+			if ( $resname ne "HOH" ) {
+				my $ter_line = "TER   ";
+				$ter_line .= sprintf("%5d", $atom_serial + 1);
+				$ter_line .= "      ";
+				$ter_line .= sprintf("%3s ", $resname);
+				$ter_line .= $chain_id;
+				$ter_line .= sprintf("%4d", $resnum);
+				$ter_line .= $atom_icode ? $atom_icode : " "; # 27
+				$ter_line .= " " x (80 - length $ter_line);  # extend to 80 chars
+				$self->_print($ter_line,"\n");
+			}
+		}
+		if ($struc->get_models > 1) { # we need ENDMDL
+			my $endmdl_line = "ENDMDL" . " " x 74;
+			$self->_print($endmdl_line, "\n");
+		}
+	} # for my $model
+
+	# CONECT
+	my @sources = $struc->get_all_conect_source;
+	my ($conect_line, at conect, @bond, @hydbond, @saltbridge, $to, $type);
+	for my $source (@sources) {
+		# get all conect's
+		my @conect = $struc->conect($source);
+		# classify
+		for my $con (@conect) {
+			($to, $type) = split /_/, $con;
+			if($type eq "bond") {
+				push @bond, $to;
+			} elsif($type eq "hydrogenbonded") {
+				push @hydbond, $to;
+			} elsif($type eq "saltbridged") {
+				push @saltbridge, $to;
+			} else {
+				$self->throw("type $type is unknown for conect");
+			}
+		}
+		# and write out CONECT lines as long as there is something
+		# in one of the arrays
+		while ( @bond || @hydbond ||  @saltbridge) {
+			my ($b, $hb, $sb);
+			$conect_line = "CONECT". sprintf("%5d", $source);
+			for my $k (0..3) {
+				$b = shift @bond;
+				$conect_line .= $b ? sprintf("%5d", $b) : "    ";
+			}
+			for my $k (4..5) {
+				$hb = shift @hydbond;
+				$conect_line .= $hb ? sprintf("%5d", $hb) : "    ";
+			}
+			$sb = shift @saltbridge;
+			$conect_line .= $sb ? sprintf("%5d", $sb) : "    ";
+			for my $k (7..8) {
+				$hb = shift @hydbond;
+				$conect_line .= $hb ? sprintf("%5d", $hb) : "    ";
+			}
+			$sb = shift @saltbridge;
+			$conect_line .= $sb ? sprintf("%5d", $sb) : "    ";
+
+			$conect_line .= " " x (80 - length($conect_line) );
+			$self->_print($conect_line, "\n");
+		}
+	}
+
+	# MASTER line contains checksums, we should calculate them of course :)
+	my $master_line = "MASTER    " . $struc->master;
+	$master_line .= " " x (80 - length($master_line) );
+	$self->_print($master_line, "\n");
+
+	my $end_line = "END" . " " x 77;
+	$self->_print($end_line,"\n");
+
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function:
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+=cut
+
+sub _filehandle{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_filehandle'} = $value;
+    }
+    return $obj->{'_filehandle'};
+
+}
+
+=head2 _noatom
+
+ Title   : _noatom
+ Usage   : $obj->_noatom($newval)
+ Function:
+ Example :
+ Returns : value of _noatom
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _noatom{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_noatom'} = $value;
+    }
+    return $obj->{'_noatom'};
+
+}
+
+=head2 _noheader
+
+ Title   : _noheader
+ Usage   : $obj->_noheader($newval)
+ Function:
+ Example :
+ Returns : value of _noheader
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _noheader{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'_noheader'} = $value;
+    }
+    return $obj->{'_noheader'};
+
+}
+
+=head2 _read_PDB_singlecontline
+
+ Title   : _read_PDB_singlecontline
+ Usage   : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
+ Function: read single continued record from PDB
+ Returns : concatenated record entry (between $fromto columns)
+ Args    : record, colunm delimiters, buffer
+
+=cut
+
+sub _read_PDB_singlecontline {
+	my ($self, $record, $fromto, $buffer) = @_;
+	my $concat_line;
+
+	my ($begin, $end) = (split (/-/, $fromto));
+	my $unpack_string = "x8 a2 ";
+	if($begin == 12) { # one additional space
+		$unpack_string .= "x1 a59";
+	} else {
+		$unpack_string .= "a60";
+	}
+	$_ = $$buffer;
+	while (defined( $_ ||= $self->_readline ) ) {
+		if ( /^$record/ ) {
+			my($cont, $rol) = unpack $unpack_string, $_;
+			if($cont =~ /\d$/ && $begin == 11) { # continuation line
+			     			# and text normally at pos 11
+		       		$rol =~ s/^\s//; # strip leading space
+			}
+			## no space (store litteraly) $concat_line .= $rol . " ";
+			$concat_line .= $rol;
+		} else {
+			last;
+		}
+
+		$_ = undef;
+	}
+	$concat_line =~ s/\s$//;  # remove trailing space
+	$$buffer = $_;
+
+	return $concat_line;
+}
+
+
+=head2 _read_PDB_jrnl
+
+ Title   : _read_PDB_jrnl
+ Usage   : $obj->_read_PDB_jrnl($\buffer))
+ Function: read jrnl record from PDB
+ Returns : Bio::Annotation::Reference object
+ Args    :
+
+=cut
+
+sub _read_PDB_jrnl {
+	my ($self, $buffer) = @_;
+
+	$_ = $$buffer;
+	my ($auth, $titl,$edit,$ref,$publ,$refn);
+	while (defined( $_ ||= $self->_readline )) {
+		if (/^JRNL /) {
+			# this code belgons in a seperate method (shared with
+			# remark 1 parsing)
+			my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
+			$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
+			$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
+			$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
+			$ref  = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
+			$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
+			$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
+		} else {
+			last;
+		}
+
+		$_ = undef; # trigger reading of next line
+	} # while
+
+	$$buffer = $_;
+	my $jrnl_ref = Bio::Annotation::Reference->new;
+
+	$jrnl_ref->authors($auth);
+	$jrnl_ref->title($titl);
+	$jrnl_ref->location($ref);
+	$jrnl_ref->publisher($publ);
+	$jrnl_ref->editors($edit);
+	$jrnl_ref->encoded_ref($refn);
+
+	return $jrnl_ref;
+} # sub _read_PDB_jrnl
+
+
+=head2 _read_PDB_remark_1
+
+ Title   : _read_PDB_remark_1
+ Usage   : $obj->_read_PDB_remark_1($\buffer))
+ Function: read "remark 1"  record from PDB
+ Returns : array of Bio::Annotation::Reference objects
+ Args    :
+
+=cut
+
+sub _read_PDB_remark_1 {
+	my ($self, $buffer) = @_;
+
+	$_ = $$buffer;
+	my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum);
+	my @refs;
+
+	while (defined( $_ ||= $self->_readline )) {
+		if (/^REMARK   1 /) {
+			if (/^REMARK   1\s+REFERENCE\s+(\d+)\s*/) {
+				$refnum = $1;
+				if ($refnum != 1) { # this is first line of a reference
+					my $rref = Bio::Annotation::Reference->new;
+					$rref->authors($auth);
+					$rref->title($titl);
+					$rref->location($ref);
+					$rref->publisher($publ);
+					$rref->editors($edit);
+					$rref->encoded_ref($refn);
+					$auth = $titl = $edit = $ref = $publ = $refn = undef;
+					push @refs, $rref;
+				}
+			} else {
+				# this code belgons in a seperate method (shared with
+				# remark 1 parsing)
+				my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
+				$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
+				$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
+				$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
+				$ref  = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
+				$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
+				$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
+			}
+		} else {
+			# have we seen any reference at all (could be single REMARK  1 line
+			if ( ! defined ($refnum) ) {
+				last; # get out of while()
+			}
+
+			# create last reference
+                        my $rref = Bio::Annotation::Reference->new;
+		        $rref->authors($auth);
+		        $rref->title($titl);
+		        $rref->location($ref);
+			$rref->publisher($publ);
+			$rref->editors($edit);
+			$rref->encoded_ref($refn);
+			push @refs, $rref;
+			last;
+		}
+
+		$_ = undef; # trigger reading of next line
+	} # while
+
+	$$buffer = $_;
+
+	return @refs;
+} # sub _read_PDB_jrnl
+
+
+=head2 _read_PDB_coordinate_section
+
+ Title   : _read_PDB_coordinate_section
+ Usage   : $obj->_read_PDB_coordinate_section($\buffer))
+ Function: read one model from a PDB
+ Returns : Bio::Structure::Model object
+ Args    :
+
+=cut
+
+sub _read_PDB_coordinate_section {
+	my ($self, $buffer, $struc) = @_;
+	my ($model_num, $chain_name, $residue_name, $atom_name);  # to keep track of state
+	$model_num = "";
+	$chain_name = "";
+	$residue_name = "";
+	$atom_name = "";
+
+	my $atom_unpack =   "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
+	my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
+
+	my $model = Bio::Structure::Model->new;
+	$model->id('default');
+	my $noatom = $self->_noatom;
+	my ($chain, $residue, $atom, $old);
+	my (%_ch_in_model);  # which chains are already in this model
+
+	$_ = $$buffer;
+	while (defined( $_ ||= $self->_readline )) {
+		# start of a new model
+		if (/^MODEL\s+(\d+)/) {
+			$model_num = $1;
+$self->debug("_read_PDB_coor: parsing model $model_num\n");
+			$model->id($model_num);
+			if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
+				$old = 1;
+			}
+		}
+		# old hier ook setten XXX
+		# ATOM lines, if first set chain
+		if (/^(ATOM |HETATM|SIGATM)/) {
+			my @line_elements = unpack $atom_unpack, $_;
+			my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
+			for my $k (0 .. $#line_elements) {
+				$line_elements[$k] =~ s/^\s+//; # remove leading space
+				$line_elements[$k] =~ s/\s+$//; # remove trailing space
+				$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
+			}
+			my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
+				$occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
+			$chainID = 'default' if ( !defined $chainID );
+			if ($chainID ne $chain_name) { # possibly a new chain
+				# fix for bug #1187
+				#  we can have ATOM/HETATM of an already defined chain (A B A B)
+				#  e.g. 1abm
+
+				if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
+					$chain = $_ch_in_model{$chainID};
+				} else {  # we create a new chain
+					$chain = Bio::Structure::Chain->new;
+					$struc->add_chain($model,$chain);
+					$chain->id($chainID);
+					$_ch_in_model{$chainID} = $chain;
+				}
+				$chain_name = $chain->id;
+			}
+			#my $res_name_num = $resname."-".$resseq;
+			my $res_name_num = $resname."-".$resseq;
+			$res_name_num .= '.'.$icode if $icode;
+			if ($res_name_num ne $residue_name) { # new residue
+				$residue = Bio::Structure::Residue->new;
+				$struc->add_residue($chain,$residue);
+				$residue->id($res_name_num);
+				$residue_name = $res_name_num;
+				$atom_name = ""; # only needed inside a residue
+			}
+			# get out of here if we don't want the atom objects
+			if ($noatom) {
+				$_ = undef;
+				next;
+			}
+			# alternative location: only take first one
+			if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
+				$_ = undef; # trigger reading next line
+				next;
+			}
+			if (/^(ATOM |HETATM)/) { # ATOM  / HETATM
+				$atom_name = $atomname;
+				$atom = Bio::Structure::Atom->new;
+				$struc->add_atom($residue,$atom);
+				$atom->id($atomname);
+				$atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
+				$atom->serial($serial);
+				$atom->icode($icode);
+				$atom->x($x);
+				$atom->y($y);
+				$atom->z($z);
+				$atom->occupancy($occupancy);
+				$atom->tempfactor($tempfactor);
+				$atom->segID($segID); # deprecated but used by people
+				if (! $old ) {
+					$atom->element($element);
+					$atom->charge($charge);
+				}
+			}
+			else {  # SIGATM
+				my $sigx = $x;
+				my $sigy = $y;
+				my $sigz = $z;
+				my $sigocc = $occupancy;
+				my $sigtemp = $tempfactor;
+				if ($atom_name ne $atomname) {  # something wrong with PDB file
+					$self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
+				}
+				$atom->sigx($sigx);
+				$atom->sigy($sigy);
+				$atom->sigz($sigz);
+				$atom->sigocc($sigocc);
+				$atom->sigtemp($sigtemp);
+
+			}
+		} # ATOM|HETARM|SIGATM
+
+		# ANISOU | SIGUIJ  lines
+		if (/^(ANISOU|SIGUIJ)/) {
+			if ($noatom) {
+				$_ = undef;
+				next;
+			}
+			my @line_elements = unpack $anisou_unpack, $_;
+			for my $k (0 .. $#line_elements) {
+				$line_elements[$k] =~ s/^\s+//; # remove leading space
+				$line_elements[$k] =~ s/\s+$//; # remove trailing space
+				$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
+			}
+			my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
+				$u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
+$self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
+			if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
+				$_ = undef;
+				next;
+			}
+			if (/^ANISOU/) {
+				if ($atom_name ne $atomname) {  # something wrong with PDB file
+					$self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
+				}
+				$atom->aniso("u11",$u11);
+				$atom->aniso("u22",$u22);
+				$atom->aniso("u33",$u33);
+				$atom->aniso("u12",$u12);
+				$atom->aniso("u13",$u13);
+				$atom->aniso("u23",$u23);
+			}
+			else { # SIGUIJ
+				if ($atom_name ne $atomname) {  # something wrong with PDB file
+					$self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
+				}
+				# could use different variable names, but hey ...
+				$atom->aniso("sigu11",$u11);
+				$atom->aniso("sigu22",$u22);
+				$atom->aniso("sigu33",$u33);
+				$atom->aniso("sigu12",$u12);
+				$atom->aniso("sigu13",$u13);
+				$atom->aniso("sigu23",$u23);
+			}
+		} # ANISOU | SIGUIJ
+
+		if (/^TER /) {
+			$_ = undef;
+			next;
+		}
+
+		if (/^ENDMDL/) {
+			$_ = $self->_readline;
+			last;
+		}
+
+		if (/^(CONECT|MASTER)/) { # get out of here
+			# current line is OK
+			last;
+		}
+		$_ = undef;
+
+	} # while
+
+	$$buffer = $_;
+
+	return $model;
+} # _read_PDB_coordinate_section
+
+
+sub _write_PDB_simple_record {
+	my ($self, @args) = @_;
+	my ($name, $cont , $annotation, $rol, $string) =
+		$self->_rearrange([qw(
+				NAME
+				CONT
+				ANNOTATION
+				ROL
+				STRING
+			)],
+			@args);
+	if (defined $string && defined $annotation) {
+		$self->throw("you can only supply one of -annoation or -string");
+	}
+	my ($output_string, $ann_string, $t_string);
+	my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
+	my $rol_length = $rol_end - $rol_begin +1;
+	if ($string) {
+		if (length $string > $rol_length) {
+			# we might need to split $string in multiple lines
+			while (length $string > $rol_length) {
+				# other option might be to go for a bunch of substr's
+				my @c = split//,$string;
+				my $t = $rol_length; # index into @c
+				while ($c[$t] ne " ") { # find first space, going backwards
+$self->debug("c[t]: $c[$t] $t\n");
+					$t--;
+					if ($t == 0) { $self->throw("Found no space for $string\n"); }
+				}
+$self->debug("t: $t rol_length: $rol_length\n");
+				$ann_string .= substr($string, 0, $t);
+$self->debug("ann_string: $ann_string\n");
+				$ann_string .= " " x ($rol_length - $t );
+				$string = substr($string, $t+1);
+				$string =~ s/^\s+//;
+$self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
+			}
+			$ann_string .= $string;
+		} else {
+			$ann_string = $string;
+		}
+	} else {
+		$ann_string = $annotation->as_text;
+		$ann_string =~ s/^Value: //;
+	}
+	# ann_string contains the thing to write out, writing out happens below
+	my $ann_length = length $ann_string;
+
+$self->debug("ann_string: $ann_string\n");
+	if ($cont) {
+		my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
+		if ( $ann_length > $rol_length ) { # we need to continuation lines
+			my $first_line = 1;
+			my $cont_number = 2;
+			my $out_line;
+			my $num_pos = $rol_length;
+			my $i = 0;
+			while( $i < $ann_length ) {
+				$t_string = substr($ann_string, $i, $num_pos);
+$self->debug("t_string: $t_string~~$i $num_pos\n");
+				if ($first_line) {
+					$out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
+					$out_line .= " " x (80 - length($out_line) ) . "\n";
+					$first_line = 0;
+					$output_string = $out_line;
+					$i += $num_pos;	# first do counter
+					if ($rol_begin - $c_end == 1) { # next line one character less
+						$num_pos--;
+					}
+				} else {
+					$out_line = $name . sprintf("%2d",$cont_number);
+					# a space after continuation number
+					if ($rol_begin - $c_end == 1) {  # one space after cont number
+						$out_line .= " ";
+						$out_line .=  $t_string;
+					} else {
+						$out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
+					}
+					$out_line .= " " x (80 -length($out_line) ) . "\n";
+					$cont_number++;
+					$output_string .= $out_line;
+					$i += $num_pos;
+				}
+			}
+		} else { # no continuation
+			my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
+			$output_string = $name . " " x $spaces . $ann_string;
+			$output_string .= " " x (80 - length($output_string) );
+		}
+	} else { # no contintuation lines
+		if ($ann_length < $rol_length) {
+			$output_string = $name . $ann_string;
+			$output_string .= " " x (80 - length($output_string) );
+		} else {
+			for (my $i = 0; $i < $ann_length; $i += $rol_length) {
+				my $out_line;
+				$t_string = substr($ann_string, $i, $rol_length);
+				$out_line = $name . $t_string;
+				$out_line .= " " x (80 -length($out_line) ) . "\n";
+				$output_string .= $out_line;
+			}
+		}
+	}
+	$output_string =~ s/\n$//;  # remove trailing newline
+	$self->_print("$output_string\n");
+
+}
+
+sub _write_PDB_remark_record {
+	my ($self, $struc, $remark_num) = @_;
+	my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
+	my $name = sprintf("REMARK %3d ",$remark_num);
+	$self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,528 @@
+# $Id: IO.pm,v 1.12.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Structure::IO
+#
+# Copyright 2001, 2002 Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+#
+# _history
+# October 18, 1999  Largely rewritten by Lincoln Stein
+# November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified
+# 			where needed. Factoring out common methods
+# 			(to Bio::Root::IO) might be a good idea.
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::IO - Handler for Structure Formats
+
+=head1 SYNOPSIS
+
+    use Bio::Structure::IO;
+
+    $in  = Bio::Structure::IO->new(-file => "inputfilename",
+                                   -format => 'pdb');
+
+    while ( my $struc = $in->next_structure() ) {
+       print "Structure ", $struc->id, " number of models: ",
+             scalar $struc->model,"\n";
+    }
+
+=head1 DESCRIPTION
+
+Bio::Structure::IO is a handler module for the formats in the
+Structure::IO set (e.g. L<Bio::Structure::IO::pdb>). It is the officially
+sanctioned way of getting at the format objects, which most people
+should use.
+
+The Bio::Structure::IO system can be thought of like biological file
+handles.  They are attached to filehandles with smart formatting rules
+(e.g. PDB format) and can either read or write structure objects
+(Bio::Structure objects, or more correctly, Bio::Structure::StructureI
+implementing objects, of which Bio::Structure is one such object). If
+you want to know what to do with a Bio::Structure object, read
+L<Bio::Structure>.
+
+The idea is that you request a stream object for a particular format.
+All the stream objects have a notion of an internal file that is read
+from or written to. A particular Structure::IO object instance is
+configured for either input or output. A specific example of a stream
+object is the Bio::Structure::IO::pdb object.
+
+Each stream object has functions
+
+   $stream->next_structure();
+
+and
+
+   $stream->write_structure($struc);
+
+also
+
+   $stream->type() # returns 'INPUT' or 'OUTPUT'
+
+As an added bonus, you can recover a filehandle that is tied to the
+Structure::IOIO object, allowing you to use the standard E<lt>E<gt>
+and print operations to read and write structure::IOuence objects:
+
+    use Bio::Structure::IO;
+
+    $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input
+
+    while ( $structure = <$stream> ) {
+   	# do something with $structure
+    }
+
+and
+
+    print $stream $structure; # when stream is in output mode
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::Structure::IO-E<gt>new()
+
+   $stream = Bio::Structure::IO->new(-file => 'filename',   -format=>$format);
+   $stream = Bio::Structure::IO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $stream = Bio::Structure::IO->new(-format => $format);
+
+The new() class method constructs a new Bio::Structure::IO object. The
+returned object can be used to retrieve or print Bio::Structure
+objects.  new() accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $strucIO = Bio::Structure::IO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   pdb         Protein Data Bank format
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename.  If this is unsuccessful,
+PDB format is assumed.
+
+The format name is case insensitive.  'PDB', 'Pdb' and 'pdb' are
+all supported.
+
+=back
+
+=head2 Bio::Structure::IO-E<gt>newFh()
+
+   $fh = Bio::Structure::IO->newFh(-fh   => \*FILEHANDLE, -format=>$format);
+   $fh = Bio::Structure::IO->newFh(-format => $format);
+   # etc.
+
+This constructor behaves like new(), but returns a tied filehandle
+rather than a Bio::Structure::IO object.  You can read structures from this
+object using the familiar E<lt>E<gt> operator, and write to it using
+print().  The usual array and $_ semantics work.  For example, you can
+read all structure objects into an array like this:
+
+  @structures = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and printf()
+are not supported.
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $structure = $structIO-E<gt>next_structure()
+
+Fetch the next structure from the stream.
+
+=head2 $structIO-E<gt>write_structure($struc [,$another_struc,...])
+
+Write the specified structure(s) to the stream.
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.
+Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS - Ewan Birney, Lincoln Stein, Kris Boulez
+
+Email birney at ebi.ac.uk, lstein at cshl.org, kris.boulez at algonomics.com
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Structure::IO;
+
+use strict;
+
+use Bio::PrimarySeq;
+use Symbol();
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format')
+ Function: Returns a new structIOstream
+ Returns : A Bio::Structure::IO handler initialised with the appropriate format
+ Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+
+=cut
+
+my $entry = 0;
+
+sub new {
+    my ($caller, at args) = @_;
+    my $class = ref($caller) || $caller;
+
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::Structure::IO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);
+	$self->_initialize(@args);
+	return $self;
+    } else {
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} ||
+	    $class->_guess_format( $param{-file} || $ARGV[0] ) ||
+		'pdb';
+	$format = "\L$format";	# normalize capitalization to lower case
+
+	# normalize capitalization
+	return unless( &_load_format_module($format) );
+	return "Bio::Structure::IO::$format"->new(@args);
+    }
+}
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
+           $structure = <$fh>;   # read a structure object
+           print $fh $structure; # write a structure object
+ Returns : filehandle tied to the Bio::Structure::IO::Fh class
+ Args    :
+
+=cut
+
+sub newFh {
+  my $class = shift;
+  return unless my $self = $class->new(@_);
+  return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $structure = <$fh>;   # read a structure object
+           print $fh $structure; # write a structure object
+ Returns : filehandle tied to the Bio::Structure::IO::Fh class
+ Args    :
+
+=cut
+
+
+sub fh {
+  my $self = shift;
+  my $class = ref($self) || $self;
+  my $s = Symbol::gensym;
+  tie $$s,$class,$self;
+  return $s;
+}
+
+
+# _initialize is chained for all SeqIO classes
+
+sub _initialize {
+    my($self, @args) = @_;
+
+    # not really necessary unless we put more in RootI
+    $self->SUPER::_initialize(@args);
+
+    # initialize the IO part
+    $self->_initialize_io(@args);
+}
+
+=head2 next_structure
+
+ Title   : next_structure
+ Usage   : $structure = stream->next_structure
+ Function: Reads the next structure object from the stream and returns a
+           Bio::Structure::Entry object.
+
+           Certain driver modules may encounter entries in the stream that
+           are either misformatted or that use syntax not yet understood
+           by the driver. If such an incident is recoverable, e.g., by
+           dismissing a feature of a feature table or some other non-mandatory
+           part of an entry, the driver will issue a warning. In the case
+           of a non-recoverable situation an exception will be thrown.
+           Do not assume that you can resume parsing the same stream after
+           catching the exception. Note that you can always turn recoverable
+           errors into exceptions by calling $stream->verbose(2) (see
+           Bio::RootI POD page).
+ Returns : a Bio::Structure::Entry object
+ Args    : none
+
+=cut
+
+sub next_structure {
+   my ($self, $struc) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object.");
+}
+
+# Do we want people to read out the sequence directly from a $structIO stream
+#
+##=head2 next_primary_seq
+##
+## Title   : next_primary_seq
+## Usage   : $seq = $stream->next_primary_seq
+## Function: Provides a primaryseq type of sequence object
+## Returns : A Bio::PrimarySeqI object
+## Args    : none
+##
+##
+##=cut
+##
+##sub next_primary_seq {
+##   my ($self) = @_;
+##
+##   # in this case, we default to next_seq. This is because
+##   # Bio::Seq's are Bio::PrimarySeqI objects. However we
+##   # expect certain sub classes to override this method to provide
+##   # less parsing heavy methods to retrieving the objects
+##
+##   return $self->next_seq();
+##}
+
+=head2 write_structure
+
+ Title   : write_structure
+ Usage   : $stream->write_structure($structure)
+ Function: writes the $structure object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Structure object
+
+=cut
+
+sub write_seq {
+    my ($self, $struc) = @_;
+    $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object.");
+}
+
+
+# De we need this here
+#
+##=head2 alphabet
+##
+## Title   : alphabet
+## Usage   : $self->alphabet($newval)
+## Function: Set/get the molecule type for the Seq objects to be created.
+## Example : $seqio->alphabet('protein')
+## Returns : value of alphabet: 'dna', 'rna', or 'protein'
+## Args    : newvalue (optional)
+## Throws  : Exception if the argument is not one of 'dna', 'rna', or 'protein'
+##
+##=cut
+##
+##sub alphabet {
+##   my ($self, $value) = @_;
+##
+##   if ( defined $value) {
+##       # instead of hard-coding the allowed values once more, we check by
+##       # creating a dummy sequence object
+##       eval {
+##	   my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
+##       };
+##       if($@) {
+##	   $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
+##       }
+##       $self->{'alphabet'} = "\L$value";
+##   }
+##   return $self->{'alphabet'};
+##}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL Structure::IO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($format) = @_;
+  my ($module, $load, $m);
+
+  $module = "_<Bio/Structure/IO/$format.pm";
+  $load = "Bio/Structure/IO/$format.pm";
+
+  return 1 if $main::{$module};
+  eval {
+    require $load;
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$load: $format cannot be found
+Exception $@
+For more information about the Structure::IO system please see the
+Bio::Structure::IO docs.  This includes ways of checking for formats at
+compile time, not run time
+END
+  ;
+    return;
+  }
+  return 1;
+}
+
+=head2 _concatenate_lines
+
+ Title   : _concatenate_lines
+ Usage   : $s = _concatenate_lines($line, $continuation_line)
+ Function: Private. Concatenates two strings assuming that the second stems
+           from a continuation line of the first. Adds a space between both
+           unless the first ends with a dash.
+
+           Takes care of either arg being empty.
+ Example :
+ Returns : A string.
+ Args    :
+
+=cut
+
+sub _concatenate_lines {
+    my ($self, $s1, $s2) = @_;
+    $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
+    return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
+}
+
+=head2 _filehandle
+
+ Title   : _filehandle
+ Usage   : $obj->_filehandle($newval)
+ Function: This method is deprecated. Call _fh() instead.
+ Example :
+ Returns : value of _filehandle
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _filehandle {
+    my ($self, at args) = @_;
+    return $self->_fh(@args);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'fasta'   if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
+   return 'genbank' if /\.(gb|gbank|genbank)$/i;
+   return 'scf'     if /\.scf$/i;
+   return 'pir'     if /\.pir$/i;
+   return 'embl'    if /\.(embl|ebl|emb|dat)$/i;
+   return 'raw'     if /\.(txt)$/i;
+   return 'gcg'     if /\.gcg$/i;
+   return 'ace'     if /\.ace$/i;
+   return 'bsml'    if /\.(bsm|bsml)$/i;
+   return 'pdb'     if /\.(ent|pdb)$/i;
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'structio' => $val}, $class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'structio'}->next_seq() unless wantarray;
+  my (@list, $obj);
+  push @list, $obj while $obj = $self->{'structio'}->next_seq();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'structio'}->write_seq(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Model.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Model.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Model.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,290 @@
+# $Id: Model.pm,v 1.10.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::Model
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::Model - Bioperl structure Object, describes a Model
+
+=head1 SYNOPSIS
+
+  #add synopsis here
+
+=head1 DESCRIPTION
+
+This object stores a Bio::Structure::Chain
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Structure::Model;
+use strict;
+
+use Bio::Structure::Entry;
+use Bio::Structure::Chain;
+use base qw(Bio::Root::Root);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $struc = Bio::Structure::Model->new( 
+                                           -id  => 'human_id',
+                                           );
+
+ Function: Returns a new Bio::Structure::Model object from basic 
+	constructors. Probably most called from Bio::Structure::IO.
+ Returns : a new Bio::Structure::Model object
+
+=cut
+
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $chain, $residue ) =
+        $self->_rearrange([qw(
+			      ID
+			      CHAIN
+			      RESIDUE
+                              )],
+                          @args);
+
+    $id      && $self->id($id);
+
+    $chain && $self->throw("you have to add chain via an Entry object\n");
+
+    $residue && $self->throw("you have to add residues via an Entry object\n");
+
+    return $self;
+}
+
+
+
+=head2 chain()
+
+ Title   : chain
+ Usage   : 
+ Function: will eventually allow parent/child navigation not via an Entry object
+ Returns : 
+ Args    : 
+
+=cut
+
+sub chain {
+	my ($self,$value) = @_;
+
+	$self->throw("go via an Entry object\n");
+}
+
+
+=head2 add_chain()
+
+ Title   : add_chain
+ Usage   : 
+ Function:  will eventually allow parent/child navigation not via an Entry object
+ Returns : 
+ Args    : 
+
+=cut
+
+sub add_chain {
+	my ($self,$value) = @_;
+
+	$self->throw("go via an Entry object for now\n");
+}
+
+=head2 entry()
+
+ Title   : entry
+ Usage   : 
+ Function:  will eventually allow parent/child navigation not via an Entry object
+ Returns : 
+ Args    : 
+
+=cut
+
+sub entry {
+	my($self) = @_;
+
+	$self->throw("Model::entry go via an Entry object please\n");
+}
+
+
+=head2 id()
+
+ Title   : id
+ Usage   : $model->id("model 5")
+ Function: Gets/sets the ID for this model
+ Returns : the ID
+ Args    : the ID
+
+=cut
+
+sub id {
+	my ($self, $value) = @_;;
+	if (defined $value) {
+		$self->{'id'} = $value;
+	}
+	return $self->{'id'};
+}
+
+=head2 residue()
+
+ Title   : residue
+ Usage   : 
+ Function:  will eventually allow parent/child navigation not via an Entry object
+ Returns : 
+ Args    : 
+
+=cut
+
+sub residue {
+	my ($self, @args) = @_;
+
+	$self->throw("need to go via Entry object or learn symbolic refs\n");
+}
+
+
+=head2 add_residue()
+
+ Title   : add_residue
+ Usage   : 
+ Function:  will eventually allow parent/child navigation not via an Entry object
+ Returns : 
+ Args    : 
+
+=cut
+
+sub add_residue {
+	my ($self, @args) = @_;
+
+	$self->throw("go via entry->add_residue(chain, residue)\n");
+}
+
+
+
+sub DESTROY {
+	my $self = shift;
+
+	# no specific DESTROY for now
+}
+
+#
+# from here on only private methods
+#
+
+=head2 _remove_chains()
+
+ Title   : _remove_chains
+ Usage   : 
+ Function: Removes the chains attached to a Model. Tells the chains they
+ 	don't belong to this Model any more
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_chains {
+	my ($self) = shift;
+
+	$self->throw("use Entry methods pleae\n");
+}
+
+
+=head2 _remove_entry()
+
+ Title   : _remove_entry
+ Usage   : 
+ Function: Removes the Entry this Model is atttached to.
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_entry {
+	my ($self) = shift;
+
+	$self->throw("use a method based on an Entry object\n");
+}
+
+
+=head2 _create_default_chain()
+
+ Title   : _create_default_chain
+ Usage   : 
+ Function: Creates a default Chain for this Model. Typical situation
+ 	in an X-ray structure where there is only one chain
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _create_default_chain {
+	my ($self) = shift;
+
+	my $chain = Bio::Structure::Chain->new(-id => "default");
+}
+
+
+=head2 _grandparent()
+
+ Title   : _grandparent
+ Usage   : 
+ Function: get/set a symbolic reference to our grandparent
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _grandparent {
+	my($self,$symref) = @_;
+
+	if (ref($symref)) {
+		$self->throw("Thou shall only pass strings in here, no references $symref\n");
+	}
+	if (defined $symref) {
+		$self->{'grandparent'} = $symref;
+	}
+	return $self->{'grandparent'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Residue.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Residue.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/Residue.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,251 @@
+# $Id: Residue.pm,v 1.11.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::Residue
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::Residue - Bioperl structure Object, describes a Residue
+
+=head1 SYNOPSIS
+
+  #add synopsis here
+
+=head1 DESCRIPTION
+
+This object stores a Bio::Structure::Residue
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Structure::Residue;
+use strict;
+
+use Bio::Structure::Chain;
+use Bio::Structure::Atom;
+use base qw(Bio::Root::Root);
+
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $residue = Bio::Structure::Residue->new( 
+                                           -id  => 'human_id',
+                                           );
+
+ Function: Returns a new Bio::Structure::Residue object from basic 
+	constructors. Probably most called from Bio::Structure::IO.
+ Returns : a new Bio::Structure::Residue object
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $atom ) =
+        $self->_rearrange([qw(
+			      ID
+			      ATOM
+                              )],
+                          @args);
+
+    $id      && $self->id($id);
+
+    $self->{'atom'} = [];
+
+    # the 'smallest' (and only) item that can be added to a residue is an atom 
+
+    $atom && $self->throw("add atoms via an Entry object entry->add_atom(residue,atom)\n");
+
+    return $self;
+}
+
+
+
+=head2 atom()
+
+ Title   : atom 
+ Usage   : 
+ Function:  nothing usefull untill I get symbolic references to do what I want
+ Returns : 
+ Args    : 
+
+=cut
+
+sub atom {
+	my ($self,$value) = @_;
+
+	$self->throw("no code down here, go see an Entry object nearby\n");
+}
+
+
+=head2 add_atom()
+
+ Title   : add_atom
+ Usage   : 
+ Function:  nothing usefull untill I get symbolic references to do what I want
+ Returns : 
+ Args    : 
+
+=cut
+
+sub add_atom {
+	my($self,$value) = @_;
+
+	$self->throw("nothing here, use a method on an Entry object\n");
+}
+
+
+=head2 chain()
+
+ Title   : chain
+ Usage   : $chain = $residue->chain($chain)
+ Function: Sets the Chain this Residue belongs to
+ Returns : Returns the Chain this Residue belongs to
+ Args    : reference to a Chain
+
+=cut
+
+sub chain {
+	my($self, $value) = @_;
+
+	$self->throw("use an Entry based method please\n");
+}
+
+
+=head2 id()
+
+ Title   : id
+ Usage   : $residue->id("TRP-35")
+ Function: Gets/sets the ID for this residue
+ Returns : the ID
+ Args    : the ID
+
+=cut
+
+sub id {
+        my ($self, $value) = @_;;
+        if (defined $value) {
+	        $self->{'id'} = $value;
+        }
+        return $self->{'id'};
+}
+
+
+=head2 DESTROY()
+
+ Title   : DESTROY
+ Usage   : 
+ Function: destructor ( get rid of circular references )
+ Returns : 
+ Args    : 
+
+=cut
+
+sub DESTROY {
+	my $self = shift;
+
+	# no specific destruction for now
+}
+
+
+#
+# from here on only private methods
+#
+
+=head2 _remove_atoms()
+
+ Title   : _remove_atoms
+ Usage   : 
+ Function: Removes the atoms attached to a Residue. Tells the atoms they
+ 	don't belong to this Residue any more
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_atoms {
+	my ($self) = shift;
+
+	$self->throw("no code here\n");
+}
+
+
+=head2 _remove_chain()
+
+ Title   : _remove_chain
+ Usage   : 
+ Function: Removes the Chain this Residue is atttached to.
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _remove_chain {
+	my ($self) = shift;
+
+	$self->{'chain'} = undef;
+}
+
+
+=head2 _grandparent()
+
+ Title   : _grandparent
+ Usage   : 
+ Function: get/set a symbolic reference to our grandparent
+ Returns : 
+ Args    : 
+
+=cut
+
+sub _grandparent {
+	my($self,$symref) = @_;
+
+	if (ref($symref)) {
+		$self->throw("Thou shall only pass strings in here, no references $symref\n");
+	}
+	if (defined $symref) {
+		$self->{'grandparent'} = $symref;
+	}
+	return $self->{'grandparent'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/Res.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/Res.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/Res.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1392 @@
+# $Header: /home/repository/bioperl/bioperl-live/Bio/Structure/SecStr/DSSP/Res.pm,v 1.10.4.3 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Structure::SecStr::DSSP::Res.pm
+#
+# Cared for by Ed Green <ed at compbio.berkeley.edu>
+#
+# Copyright Univ. of California
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::SecStr::DSSP::Res - Module for parsing/accessing dssp output
+
+=head1 SYNOPSIS
+
+  my $dssp_obj = new Bio::Structure::SecStr::DSSP::Res('-file'=>'filename.dssp');
+
+  # or
+
+  my $dssp_obj = new Bio::Structure::SecStr::DSSP::Res('-fh'=>\*STDOUT);
+
+  # get DSSP defined Secondary Structure for residue 20
+  $sec_str = $dssp_obj->resSecStr( 20 );
+
+  # get dssp defined sec. structure summary for PDB residue  # 10 of chain A
+
+  $sec_str = $dssp_obj->resSecStrSum( '10:A' );
+
+=head1 DESCRIPTION
+
+DSSP::Res is a module for objectifying DSSP output.  Methods are then
+available for extracting all the information within the output file
+and convenient subsets of it.
+The principal purpose of DSSP is to determine secondary structural
+elements of a given structure.
+
+    ( Dictionary of protein secondary structure: pattern recognition
+      of hydrogen-bonded and geometrical features.
+      Biopolymers. 1983 Dec;22(12):2577-637. )
+
+The DSSP program is available from:
+  http://www.cmbi.kun.nl/swift/dssp
+
+This information is available on a per residue basis ( see resSecStr
+and resSecStrSum methods ) or on a per chain basis ( see secBounds
+method ).
+
+resSecStr() & secBounds() return one of the following:
+    'H' = alpha helix
+    'B' = residue in isolated beta-bridge
+    'E' = extended strand, participates in beta ladder
+    'G' = 3-helix (3/10 helix)
+    'I' = 5 helix (pi helix)
+    'T' = hydrogen bonded turn
+    'S' = bend
+    ''  = no assignment
+
+A more general classification is returned using the resSecStrSum()
+method.  The purpose of this is to have a method for DSSP and STRIDE
+derived output whose range is the same.
+Its output is one of the following:
+
+    'H' = helix         ( => 'H', 'G', or 'I' from above )
+    'B' = beta          ( => 'B' or 'E' from above )
+    'T' = turn          ( => 'T' or 'S' from above )
+    ' ' = no assignment ( => ' ' from above )
+
+The methods are roughly divided into 3 sections:
+1.  Global features of this structure (PDB ID, total surface area,
+    etc.).  These methods do not require an argument.
+2.  Residue specific features ( amino acid, secondary structure,
+    solvent exposed surface area, etc. ).  These methods do require an
+    arguement.  The argument is supposed to uniquely identify a
+    residue described within the structure.  It can be of any of the
+    following forms:
+    ('#A:B') or ( #, 'A', 'B' )
+      || |
+      || - Chain ID (blank for single chain)
+      |--- Insertion code for this residue.  Blank for most residues.
+      |--- Numeric portion of residue ID.
+
+    (#)
+     |
+     --- Numeric portion of residue ID.  If there is only one chain and
+         it has no ID AND there is no residue with an insertion code at this
+         number, then this can uniquely specify a residue.
+
+    ('#:C') or ( #, 'C' )
+      | |
+      | -Chain ID
+      ---Numeric portion of residue ID.
+
+  If a residue is incompletely specified then the first residue that
+  fits the arguments is returned.  For example, if 19 is the argument
+  and there are three chains, A, B, and C with a residue whose number
+  is 19, then 19:A will be returned (assuming its listed first).
+
+  Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
+  are not supported by these modules.
+
+3.  Value-added methods.  Return values are not verbatem strings
+    parsed from DSSP or STRIDE output.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ed Green
+
+Email ed at compbio.berkeley.edu
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each method.
+Internal methods are preceded with a _
+
+=cut
+
+package Bio::Structure::SecStr::DSSP::Res;
+use strict;
+use Bio::Root::IO;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root);
+
+# Would be a class variable if Perl had them
+
+               #attribute        begin col        # columns
+our %lookUp = ( 'pdb_resnum'     => [  5,           5 ],
+		'insertionco'    => [  10,          1 ],
+		'pdb_chain'      => [  11,          1 ],
+		
+		'amino_acid'     => [  13,          1 ],
+		'term_sig'       => [  14,          1 ],
+		
+		'ss_summary'     => [  16,          1 ],
+		'3tph'           => [  18,          1 ],
+		'4tph'           => [  19,          1 ],
+		'5tph'           => [  20,          1 ],
+		'geo_bend'       => [  21,          1 ],
+		'chirality'      => [  22,          1 ],
+		'beta_br1la'     => [  23,          1 ],
+		'beta_br2la'     => [  24,          1 ],
+
+		'bb_part1nu'     => [  25,          4 ],
+		'bb_part2nu'     => [  29,          4 ],
+		'betash_lab'     => [  33,          1 ],
+		
+		'solv_acces'     => [  34,          4 ],
+		
+		'hb1_nh_o_p'     => [  39,          6 ],
+		'hb1_nh_o_e'     => [  46,          4 ],
+		
+		'hb1_o_hn_p'     => [  50,          6 ],
+		'hb1_o_hn_e'     => [  57,          4 ],
+		
+		'hb2_nh_o_p'     => [  61,          6 ],
+		'hb2_nh_o_e'     => [  68,          4 ],
+		
+		'hb2_o_hn_p'     => [  72,          6 ],
+		'hb2_o_hn_e'     => [  79,          4 ],
+		
+		'tco'            => [  85,          6 ],
+		
+		'kappa'          => [  91,          6 ],
+		
+		'alpha'          => [  97,          6 ],
+	
+		'phi'            => [ 103,          6 ],
+
+		'psi'            => [ 109,          6 ],
+		
+		'x_ca'           => [ 115,          7 ],
+		
+		'y_ca'           => [ 122,          7 ],
+		
+		'z_ca'           => [ 129,          7 ] );
+
+
+=head1 CONSTRUCTOR
+
+
+=cut
+
+
+=head2 new
+
+ Title         : new
+ Usage         : makes new object of this class
+ Function      : Constructor
+ Example       : $dssp_obj = Bio::DSSP:Res->new( filename or FILEHANDLE )
+ Returns       : object (ref)
+ Args          : filename ( must be proper DSSP output file )
+
+=cut
+
+sub new {
+    my ( $class, @args ) = @_;
+    my $self = $class->SUPER::new( @args );
+    my $io = Bio::Root::IO->new( @args );
+    $self->_parse( $io->_fh() );
+    $io->close();
+    return $self;
+}
+
+=head1 ACCESSORS
+
+
+=cut
+
+# GLOBAL FEATURES / INFO / STATS
+
+=head2 totSurfArea
+
+ Title         : totSurfArea
+ Usage         : returns total accessible surface area in square Ang.
+ Function      :
+ Example       : $surArea = $dssp_obj->totSurfArea();
+ Returns       : scalar
+ Args          : none
+
+=cut
+
+sub totSurfArea {
+    my $self = shift;
+    return $self->{ 'Head' }->{ 'ProAccSurf' };
+}
+
+=head2 numResidues
+
+ Title         : numResidues
+ Usage         : returns the total number of residues in all chains or
+                 just the specified chain if a chain is specified
+ Function      :
+ Example       : $num_res = $dssp_obj->numResidues();
+ Returns       : scalar int
+ Args          : none
+
+
+=cut
+
+sub numResidues {
+    my $self = shift;
+    my $chain = shift;
+    if ( !( $chain ) ) {
+	return $self->{'Head'}->{'TotNumRes'};
+    }
+    else {
+	my ( $num_res,
+	     $cont_seg );
+	my $cont_seg_pnt = $self->_contSegs();
+	foreach $cont_seg ( @{ $cont_seg_pnt } ) {
+	    if ( $chain eq $cont_seg->[ 2 ] ) {
+		# this segment is part of the chain we want
+		$num_res += ( $self->_toDsspKey( $cont_seg->[ 1 ] )
+			      - $self->_toDsspKey( $cont_seg->[ 0 ] )
+			      + 1 ); # this works because we know the
+				     # the region between the start
+				     # and end of a dssp key is
+				     # continuous
+	    }
+	}
+	return $num_res;
+    }
+}
+
+#  STRAIGHT FROM PDB ENTRY
+
+=head2 pdbID
+
+ Title         : pdbID
+ Usage         : returns pdb identifier ( 1FJM, e.g.)
+ Function      :
+ Example       : $pdb_id = $dssp_obj->pdbID();
+ Returns       : scalar string
+ Args          : none
+
+
+=cut
+
+sub pdbID {
+    my $self = shift;
+    return $self->{'Head'}->{'PDB'};
+}
+
+=head2 pdbAuthor
+
+ Title         : pdbAuthor
+ Usage         : returns author field
+ Function      :
+ Example       : $auth = $dssp_obj->pdbAuthor()
+ Returns       : scalar string
+ Args          : none
+
+
+=cut
+
+sub pdbAuthor {
+    my $self = shift;
+    return $self->{'Head'}->{'AUTHOR'};
+}
+
+=head2 pdbCompound
+
+ Title         : pdbCompound
+ Usage         : returns pdbCompound given in PDB file
+ Function      :
+ Example       : $cmpd = $dssp_obj->pdbCompound();
+ Returns       : scalar string
+ Args          : none
+
+
+=cut
+
+sub pdbCompound {
+    my $self = shift;
+    return $self->{'Head'}->{'COMPND'};
+}
+
+=head2 pdbDate
+
+ Title         : pdbDate
+ Usage         : returns date given in PDB file
+ Function      :
+ Example       : $pdb_date = $dssp_obj->pdbDate();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbDate {
+    my $self = shift;
+    return $self->{'Head'}->{'DATE'};
+}
+
+=head2 pdbHeader
+
+ Title         : pdbHeader
+ Usage         : returns header info from PDB file
+ Function      :
+ Example       : $header = $dssp_obj->pdbHeader();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbHeader {
+    my $self = shift;
+    return $self->{'Head'}->{'HEADER'};
+}
+
+=head2 pdbSource
+
+ Title         : pdbSource
+ Usage         : returns pdbSource information from PDBSOURCE line
+ Function      :
+ Example       : $pdbSource = $dssp_obj->pdbSource();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbSource {
+    my $self = shift;
+    return $self->{'Head'}->{'SOURCE'};
+}
+
+
+# RESIDUE SPECIFIC ACCESSORS
+
+=head2 resAA
+
+ Title         : resAA
+ Usage         : fetches the 1 char amino acid code, given an id
+ Function      :
+ Example       : $aa = $dssp_obj->resAA( '20:A' ); # pdb id as arg
+ Returns       : 1 character scalar string
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resAA {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
+}
+
+=head2 resPhi
+
+ Title         : resPhi
+ Usage         : returns phi angle of a single residue
+ Function      : accessor
+ Example       : $phi = $dssp_obj->resPhi( RESIDUE_ID )
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resPhi {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'phi' };
+}
+
+=head2 resPsi
+
+ Title         : resPsi
+ Usage         : returns psi angle of a single residue
+ Function      : accessor
+ Example       : $psi = $dssp_obj->resPsi( RESIDUE_ID )
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resPsi {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'psi' };
+}
+
+=head2 resSolvAcc
+
+ Title         : resSolvAcc
+ Usage         : returns solvent exposed area of this residue in
+                 square Angstroms
+ Function      :
+ Example       : $solv_acc = $dssp_obj->resSolvAcc( RESIDUE_ID );
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSolvAcc {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
+}
+
+=head2 resSurfArea
+
+ Title         : resSurfArea
+ Usage         : returns solvent exposed area of this residue in
+                 square Angstroms
+ Function      :
+ Example       : $solv_acc = $dssp_obj->resSurfArea( RESIDUE_ID );
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSurfArea {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'solv_acces' };
+}
+
+=head2 resSecStr
+
+ Title         : resSecStr
+ Usage         : $ss = $dssp_obj->resSecStr( RESIDUE_ID );
+ Function      : returns the DSSP secondary structural designation of this residue
+ Example       :
+ Returns       : a character ( 'B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ' )
+ Args          : RESIDUE_ID
+ NOTE          : The range of this method differs from that of the
+    resSecStr method in the STRIDE SecStr parser.  That is because of the
+    slightly different format for STRIDE and DSSP output.  The resSecStrSum
+    method exists to map these different ranges onto an identical range.
+
+=cut
+
+sub resSecStr {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
+    return $ss_char if $ss_char;
+    return ' ';
+}
+
+
+=head2 resSecStrSum
+
+ Title         : resSecStrSum
+ Usage         : $ss = $dssp_obj->resSecStrSum( $id );
+ Function      : returns what secondary structure group this residue belongs
+                 to.  One of:  'H': helix ( H, G, or I )
+                               'B': beta  ( B or E )
+                               'T': turn  ( T or S )
+                               ' ': none  ( ' ' )
+                 This method is similar to resSecStr, but the information
+                 it returns is less specific.
+ Example       :
+ Returns       : a character ( 'H', 'B', 'T', or ' ' )
+ Args          : dssp residue number of pdb residue identifier
+
+
+=cut
+
+sub resSecStrSum {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    my $ss_char = $self->{ 'Res' }->[ $dssp_key ]->{ 'ss_summary' };
+    if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
+	return 'H';
+    }
+    if ( $ss_char eq ' ' || !( $ss_char ) ) {
+	return ' ';
+    }
+    if ( $ss_char eq 'B' || $ss_char eq 'E' ) {
+	return 'B';
+    }
+    else {
+	return 'T';
+    }
+}
+
+# DSSP SPECIFIC
+
+=head2 hBonds
+
+ Title         : hBonds
+ Usage         : returns number of 14 different types of H Bonds
+ Function      :
+ Example       : $hb = $dssp_obj->hBonds
+ Returns       : pointer to 14 element array of ints
+ Args          : none
+ NOTE          : The different type of H-Bonds reported are, in order:
+    TYPE O(I)-->H-N(J)
+    IN PARALLEL BRIDGES
+    IN ANTIPARALLEL BRIDGES
+    TYPE O(I)-->H-N(I-5)
+    TYPE O(I)-->H-N(I-4)
+    TYPE O(I)-->H-N(I-3)
+    TYPE O(I)-->H-N(I-2)
+    TYPE O(I)-->H-N(I-1)
+    TYPE O(I)-->H-N(I+0)
+    TYPE O(I)-->H-N(I+1)
+    TYPE O(I)-->H-N(I+2)
+    TYPE O(I)-->H-N(I+3)
+    TYPE O(I)-->H-N(I+4)
+    TYPE O(I)-->H-N(I+5)
+
+=cut
+
+sub hBonds {
+    my $self = shift;
+    return $self->{ 'HBond'};
+}
+
+=head2 numSSBr
+
+ Title         : numSSBr
+ Usage         : returns info about number of SS-bridges
+ Function      :
+ Example       : @SS_br = $dssp_obj->numSSbr();
+ Returns       : 3 element scalar int array
+ Args          : none
+
+
+=cut
+
+sub numSSBr {
+    my $self = shift;
+    return ( $self->{'Head'}->{'TotSSBr'},
+	     $self->{'Head'}->{'TotIaSSBr'},
+	     $self->{'Head'}->{'TotIeSSBr'} );
+}
+
+=head2 resHB_O_HN
+
+ Title         : resHB_O_HN
+ Usage         : returns pointer to a 4 element array
+                 consisting of: relative position of binding
+                 partner #1, energy of that bond (kcal/mol),
+                 relative positionof binding partner #2,
+                 energy of that bond (kcal/mol).  If the bond
+                 is not bifurcated, the second bond is reported
+                 as 0, 0.0
+ Function      : accessor
+ Example       : $oBonds_ptr = $dssp_obj->resHB_O_HN( RESIDUE_ID )
+ Returns       : pointer to 4 element array
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resHB_O_HN {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_p' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_o_hn_e' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_p' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_o_hn_e' } );
+}
+
+
+=head2 resHB_NH_O
+
+ Title         : resHB_NH_O
+ Usage         : returns pointer to a 4 element array
+                 consisting of: relative position of binding
+                 partner #1, energy of that bond (kcal/mol),
+                 relative positionof binding partner #2,
+                 energy of that bond (kcal/mol).  If the bond
+                 is not bifurcated, the second bond is reported
+                 as 0, 0.0
+ Function      : accessor
+ Example       : $nhBonds_ptr = $dssp_obj->resHB_NH_O( RESIDUE_ID )
+ Returns       : pointer to 4 element array
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resHB_NH_O {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return ( $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_p' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb1_nh_o_e' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_p' },
+	     $self->{ 'Res' }->[ $dssp_key ]->{ 'hb2_nh_o_e' } );
+}
+
+
+=head2 resTco
+
+ Title         : resTco
+ Usage         : returns tco angle around this residue
+ Function      : accessor
+ Example       : resTco = $dssp_obj->resTco( RESIDUE_ID )
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resTco {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'tco' };
+}
+
+
+=head2 resKappa
+
+ Title         : resKappa
+ Usage         : returns kappa angle around this residue
+ Function      : accessor
+ Example       : $kappa = $dssp_obj->resKappa( RESIDUE_ID )
+ Returns       : scalar
+ Args          : RESIDUE_ID ( dssp or PDB )
+
+
+=cut
+
+sub resKappa {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'kappa' };
+}
+
+
+=head2 resAlpha
+
+ Title         : resAlpha
+ Usage         : returns alpha angle around this residue
+ Function      : accessor
+ Example       : $alpha = $dssp_obj->resAlpha( RESIDUE_ID )
+ Returns       : scalar
+ Args          : RESIDUE_ID ( dssp or PDB )
+
+
+=cut
+
+sub resAlpha {
+    my $self = shift;
+    my @args = @_;
+    my $dssp_key = $self->_toDsspKey( @args );
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'alpha' };
+}
+
+# VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
+
+=head2 secBounds
+
+ Title         : secBounds
+ Usage         : gets residue ids of boundary residues in each
+                 contiguous secondary structural element of specified
+                 chain
+ Function      : returns pointer to array of 3 element arrays.  First
+                 two elements are the PDB IDs of the start and end points,
+                 respectively and inclusively.  The last element is the
+                 DSSP secondary structural assignment code,
+                 i.e. one of : ('B', 'E', 'G', 'H', 'I', 'S', 'T', or ' ')
+ Example       : $ss_elements_pts = $dssp_obj->secBounds( 'A' );
+ Returns       : pointer to array of arrays
+ Args          : chain id ( 'A', for example ).  No arg => no chain id
+
+
+=cut
+
+sub secBounds {
+    my $self = shift;
+    my $chain = shift;
+    my %sec_bounds;
+
+    $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
+
+    # if we've memoized this chain, use that
+    if ( $self->{ 'SecBounds' } ) {
+	# check to make sure chain is valid
+	if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
+	    $self->throw( "No such chain: $chain\n" );
+	}
+	return $self->{ 'SecBounds' }->{ $chain };
+    }
+
+    my ( $cur_element, $i, $cur_chain, $beg, );
+
+    #initialize
+    $cur_element = $self->{ 'Res' }->[ 1 ]->{ 'ss_summary' };
+    $beg = 1;
+
+    for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
+	if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
+	    # element is terminated by a chain discontinuity
+	    push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
+		  [ $self->_toPdbId( $beg ),
+		    $self->_toPdbId( $i - 1 ),
+		    $cur_element ] );
+	    $i++;
+	    $beg = $i;
+	    $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
+	}
+	
+	elsif ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ne $cur_element ) {
+	    # element is terminated by beginning of a new element
+	    push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
+		  [ $self->_toPdbId( $beg ),
+		    $self->_toPdbId( $i - 1 ),
+		    $cur_element ] );
+	    $beg = $i;
+	    $cur_element = $self->{ 'Res' }->[ $i ]->{ 'ss_summary' };
+	}
+    }
+    #last residue
+    if ( $self->{ 'Res' }->[ $i ]->{ 'ss_summary' } eq $cur_element ) {
+	push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
+	      [ $self->_toPdbId( $beg ),
+		$self->_toPdbId( $i ),
+		$cur_element ] );
+    }
+
+    else {
+	push( @{ $sec_bounds{ $self->_pdbChain( $beg ) } },
+	      [ $self->_toPdbId( $beg ),
+		$self->_toPdbId( $i - 1 ),
+		$cur_element ] );
+	push( @{ $sec_bounds{ $self->_pdbChain( $i ) } },
+	      [ $self->_toPdbId( $i ),
+		$self->_toPdbId( $i ),
+		$self->{ 'Res' }->[ $i ]->{ 'ss_summary' } ] );
+    }
+
+    $self->{ 'SecBounds' } = \%sec_bounds;
+
+    # check to make sure chain is valid
+    if ( !( $self->{ 'SecBounds' }->{ $chain } ) ) {
+	$self->throw( "No such chain: $chain\n" );
+    }
+
+    return $self->{ 'SecBounds' }->{ $chain };
+}
+
+
+
+=head2 chains
+
+ Title         : chains
+ Usage         : returns pointer to array of chain I.D.s (characters)
+ Function      :
+ Example       : $chains_pnt = $dssp_obj->chains();
+ Returns       : array of characters, one of which may be ' '
+ Args          : none
+
+
+=cut
+
+sub chains {
+    my $self = shift;
+    my $cont_segs = $self->_contSegs();
+    my %chains;
+    my $seg;
+    foreach $seg ( @{ $cont_segs } ) {
+	$chains{ $seg->[ 2 ] } = 1;
+    }
+    my @chains = keys( %chains );
+    return \@chains;
+}
+
+
+=head2 residues
+
+    Title : residues
+    Usage : returns array of residue identifiers for all residues in
+    the output file, or in a specific chain
+    Function :
+    Example : @residues_ids = $dssp_obj->residues()
+    Returns : array of residue identifiers
+    Args : if none => returns residue ids of all residues of all
+    chains (in order); if chain id is given, returns just the residue
+    ids of residues in that chain
+
+
+=cut
+
+# Can't use the standard interface for getting the amino acid,
+# pdb_resnum, etc. in this method because we don't *know* the residue
+# indentifiers - we are building a list of them.
+sub residues {
+    my $self  = shift;
+    my $chain = shift;
+    my @residues;
+    my $num_res = $self->_numResLines();
+    my $aa;
+    for ( my $i = 1; $i <= $num_res; $i++ ) {
+	# find what character was in the slot for tha amino acid code,
+	# if it's a '!' we know this is not a *real* amino acid, it's
+	# a chain discontinuity marker 
+	$aa = $self->{ 'Res' }->[ $i ]->{ 'amino_acid' };
+	if ( $aa ne '!' ) {
+	    if ( !$chain ||
+		 $chain eq $self->{ 'Res' }->[ $i ]->{ 'pdb_chain' } ) {
+		push( @residues, 
+		      $self->{ 'Res' }->[ $i ]->{ 'pdb_resnum' }.
+		      $self->{ 'Res' }->[ $i ]->{ 'insertionco' }.
+		      ":".
+		      $self->{ 'Res' }->[ $i ]->{ 'pdb_chain' } );
+	    }
+	}
+    }
+    return @residues;
+}
+
+
+=head2 getSeq
+
+ Title         : getSeq
+ Usage         : returns a Bio::PrimarySeq object which represents a good
+                 guess at the sequence of the given chain
+ Function      : For most chains of most entries, the sequence returned by
+                 this method will be very good.  However, it is inherently
+                 unsafe to rely on DSSP to extract sequence information about
+                 a PDB entry.  More reliable information can be obtained from
+                 the PDB entry itself.
+ Example       : $pso = $dssp_obj->getSeq( 'A' );
+ Returns       : (pointer to) a PrimarySeq object
+ Args          : Chain identifier.  If none given, ' ' is assumed.  If no ' '
+                 chain, the first chain is used.
+
+
+=cut
+
+sub getSeq {
+    my $self  = shift;
+    my $chain = shift;
+
+    my ( $pot_chain,
+	 $seq,
+	 $frag_num,
+	 $frag,
+	 $curPdbNum,
+	 $lastPdbNum,
+	 $gap_len,
+	 $i,
+	 $id,
+	 );
+    my @frags;
+
+    if ( !( $chain ) ) {
+	$chain = ' ';
+    }
+
+    if ( $self->{ 'Seq' }->{ $chain } ) {
+	return $self->{ 'Seq' }->{ $chain };
+    }
+
+    my $contSegs_pnt = $self->_contSegs();
+
+    # load up specified chain
+    foreach $pot_chain ( @{ $contSegs_pnt } ) {
+	if ( $pot_chain->[ 2 ] eq $chain ) {
+	    push( @frags, $pot_chain );
+	}
+    }
+    
+    # if that didn't work, just get the first one
+    if ( !( @frags ) ) {
+	$chain = $contSegs_pnt->[ 0 ]->[ 2 ];
+	foreach $pot_chain ( @{ $contSegs_pnt } ) {
+	    if ( $pot_chain->[ 2 ] eq $chain ) {
+		push( @frags, $pot_chain );
+	    }
+	}
+    }
+
+    # now build the sequence string
+    $seq = "";
+    $frag_num = 0;
+    foreach $frag ( @frags ) {
+	$frag_num++;
+	if ( $frag_num > 1 ) {  # we need to put in some gap seq
+	    $curPdbNum = $self->_pdbNum( $frag->[ 0 ] );
+	    $gap_len = $curPdbNum - $lastPdbNum - 1;
+	    if ( $gap_len > 0 ) {
+		$seq .= 'u' x $gap_len;
+	    }
+	    else {
+		$seq .= 'u';
+	    }
+	}
+	for ( $i = $frag->[ 0 ]; $i <= $frag->[ 1 ]; $i++ ) {
+	    $seq .= $self->_resAA( $i );
+	}
+	$lastPdbNum = $self->_pdbNum( $i - 1 );
+    }
+
+
+
+    $id = $self->pdbID();
+    $id .= ":$chain";
+
+    $self->{ 'Seq' }->{ $chain } =  Bio::PrimarySeq->new ( -seq => $seq,
+							   -id  => $id,
+							   -moltype => 'protein'
+							   );
+    return $self->{ 'Seq' }->{ $chain };
+}
+
+=head1 INTERNAL METHODS
+
+
+=cut
+
+=head2 _pdbChain
+
+ Title         : _pdbChain
+ Usage         : returns the pdb chain id of given residue
+ Function      :
+ Example       : $chain_id = $dssp_obj->pdbChain( DSSP_KEY );
+ Returns       : scalar
+ Args          : DSSP_KEY ( dssp or pdb )
+
+
+=cut
+
+sub _pdbChain {
+    my $self = shift;
+    my $dssp_key = shift;
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_chain' };
+}
+
+=head2 _resAA
+
+ Title         : _resAA
+ Usage         : fetches the 1 char amino acid code, given a dssp id
+ Function      :
+ Example       : $aa = $dssp_obj->_resAA( dssp_id );
+ Returns       : 1 character scalar string
+ Args          : dssp_id
+
+
+=cut
+
+sub _resAA {
+    my $self = shift;
+    my $dssp_key = shift;
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'amino_acid' };
+}
+
+
+=head2 _pdbNum
+
+ Title        : _pdbNum
+ Usage        : fetches the numeric portion of the identifier for a given
+                residue as reported by the pdb entry.  Note, this DOES NOT
+                uniquely specify a residue.  There may be an insertion code
+                and/or chain identifier differences.
+ Function     :
+ Example      : $pdbNum = $self->_pdbNum( DSSP_ID );
+ Returns      : a scalar
+ Args         : DSSP_ID
+
+
+=cut
+
+sub _pdbNum {
+    my $self = shift;
+    my $dssp_key = shift;
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'pdb_resnum' };
+}
+
+=head2 _pdbInsCo
+
+ Title        : _pdbInsCo
+ Usage        : fetches the Insertion Code for this residue, if it has one.
+ Function     :
+ Example      : $pdbNum = $self->_pdbInsCo( DSSP_ID );
+ Returns      : a scalar
+ Args         : DSSP_ID
+
+
+=cut
+
+sub _pdbInsCo {
+    my $self = shift;
+    my $dssp_key = shift;
+    return $self->{ 'Res' }->[ $dssp_key ]->{ 'insertionco' };
+}
+
+=head2 _toPdbId
+
+ Title        : _toPdbId
+ Usage        : Takes a dssp key and builds the corresponding
+                PDB identifier string
+ Function     :
+ Example      : $pdbId = $self->_toPdbId( DSSP_ID );
+ Returns      : scalar
+ Args         : DSSP_ID
+
+=cut
+
+sub _toPdbId {
+    my $self = shift;
+    my $dssp_key = shift;
+    my $pdbId = ( $self->_pdbNum( $dssp_key ).
+		  $self->_pdbInsCo( $dssp_key ) );
+    my $chain = $self->_pdbChain( $dssp_key );
+    $pdbId = "$pdbId:$chain" if $chain;
+    return $pdbId;
+}
+
+=head2 _contSegs
+
+ Title         : _contSegs
+ Usage         : find the endpoints of continuous regions of this structure
+ Function      : returns pointer to array of 3 element array.
+                 Elements are the dssp keys of the start and end points of each
+                 continuous element and its PDB chain id (may be blank).
+                 Note that it is common to have several
+                 continuous elements with the same chain id.  This occurs
+                 when an internal region is disordered and no structural
+                 information is available.
+ Example       : $cont_seg_ptr = $dssp_obj->_contSegs();
+ Returns       : pointer to array of arrays
+ Args          : none
+
+
+=cut
+
+sub _contSegs {
+    my $self = shift;
+    if ( $self->{ 'contSegs' } ) {
+	return $self->{ 'contSegs' };
+    }
+    else {
+	# first time, so make contSegs
+	my ( $cur_chain, $i, $beg );
+	my @contSegs;
+	#initialize
+	$cur_chain = $self->_pdbChain( 1 );
+	$beg = 1;
+	#internal residues
+	for ( $i = 2; $i <= $self->_numResLines() - 1; $i++ ) {
+	    if ( $self->{ 'Res' }->[ $i ]->{ 'amino_acid' } eq '!' ) {
+		push( @contSegs, [ $beg, $i - 1, $cur_chain ] );
+		$beg = $i + 1;
+		$cur_chain = $self->_pdbChain( $i + 1 );
+	    }
+	}
+	# last residue must be the end of a chain
+	push( @contSegs, [ $beg, $i, $cur_chain ] );
+
+	$self->{ 'contSegs' } = \@contSegs;
+	return $self->{ 'contSegs' };
+    }
+}
+
+=head2 _numResLines
+
+ Title         : _numResLines
+ Usage         : returns the total number of residue lines in this
+                 dssp file.
+                 This number is DIFFERENT than the number of residues in
+                 the pdb file because dssp has chain termination and chain
+                 discontinuity 'residues'.
+ Function      :
+ Example       : $num_res = $dssp_obj->_numResLines();
+ Returns       : scalar int
+ Args          : none
+
+
+=cut
+
+sub _numResLines {
+    my $self = shift;
+    return ( $#{$self->{ 'Res' }} );
+}
+
+=head2 _toDsspKey
+
+ Title         : _toDsspKey
+ Usage         : returns the unique dssp integer key given a pdb residue id.
+                 All accessor methods require (internally)
+                 the dssp key.   This method is very useful in converting
+                 pdb keys to dssp keys so the accessors can accept pdb keys
+                 as argument.  PDB Residue IDs are inherently
+                 problematic since they have multiple parts of
+                 overlapping function and ill-defined or observed
+                 convention in form.  Input can be in any of the formats
+                 described in the DESCRIPTION section above.
+ Function      :
+ Example       : $dssp_id = $dssp_obj->_pdbKeyToDsspKey( '10B:A' )
+ Returns       : scalar int
+ Args          : pdb residue identifier: num[insertion code]:[chain]
+
+
+=cut
+
+sub _toDsspKey {
+    # Consider adding lookup table for 'common' name (like 20:A) for
+    # fast access.  Could be built during parse of input.
+
+    my $self = shift;
+    my $arg_str;
+
+    my ( $key_num, $chain_id, $ins_code );
+
+    # check to see how many args are given
+    if ( $#_ > 1 ) { # multiple args
+	$key_num = shift;
+	if ( $#_ > 1 ) { # still multiple args => ins. code, too
+	    $ins_code = shift;
+	    $chain_id = shift;
+	}
+	else { # just one more arg. => chain_id
+	    $chain_id = shift;
+	}
+    }
+    else { # only single arg.  Might be number or string
+	$arg_str = shift;
+	if ( $arg_str =~ /:/ ) {
+	    # a chain is specified
+	    ( $chain_id ) = ( $arg_str =~ /:(.)/);
+	    $arg_str =~ s/:.//;
+	}
+	if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
+	    # an insertion code is specified
+	    ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
+	    $arg_str =~ s/[A-Z]|[a-z]//g;
+	}
+	#now, get the number bit-> everything still around
+	$key_num = $arg_str;
+    }
+
+    # Now find the residue which fits this description.  Linear search is
+    # probably not the best way to do this, but oh well...
+    for ( my $i = 1; $i <= $self->_numResLines(); $i++ ) {
+	unless ( ($self->{'Res'}->[$i]->{'term_sig'} eq '*') ||
+		 ($self->{'Res'}->[$i]->{'amino_acid'} eq '!') ) {
+	    # chain break 'residue', doesn't match anything
+	    if ( $key_num == $self->{'Res'}->[$i]->{'pdb_resnum'} ) {
+		if ( $chain_id ) { # if a chain was specified
+		    if ( $chain_id eq $self->{'Res'}->[$i]->{'pdb_chain'} ) {
+			# and it's the right one
+			if ( $ins_code ) { # if insertion code was specified
+			    if ( $ins_code eq $self->{'Res'}->[$i]->{'insertionco'} ) {
+				# and it's the right one
+				return $i;
+			    }
+			}
+			else { # no isertion code specified, this is it
+			    return $i;
+			}
+		    }
+		}
+		else { # no chain was specified
+		    return $i;
+		}
+	    }
+	}
+    }
+    $self->throw( "PDB key not found." );
+}
+
+=head2 _parse
+
+ Title         : _parse
+ Usage         : parses dssp output
+ Function      :
+ Example       : used by the constructor
+ Returns       :
+ Args          : input source ( handled by Bio::Root:IO )
+
+
+=cut
+
+sub _parse {
+    my $self = shift;
+    my $file = shift;
+    my $cur;
+    my $current_chain;
+    my ( @elements, @hbond );
+    my ( %head, %his, );
+    my $element;
+    my $res_num;
+
+    $cur = <$file>;
+    unless ( $cur =~ /^==== Secondary Structure Definition/ ) {
+	$self->throw( "Not dssp output" );
+	return;
+    }
+
+    # REFERENCE line (always there)
+    $cur = <$file>;
+    ( $element ) = ( $cur =~ /^REFERENCE\s+(.+?)\s+\./ );
+    $head{ 'REFERENCE' } = $element;
+
+    $cur = <$file>;
+    # Check for HEADER line (not always there)
+    if ( $cur =~ /^HEADER\s/ ) {
+	@elements = split( /\s+/, $cur );
+	pop( @elements ); # take off that annoying period
+	$head{ 'PDB' } = pop( @elements );
+	$head{ 'DATE' } = pop( @elements );
+	# now, everything else is "header" except for the word
+	# HEADER
+	shift( @elements );
+	$element = shift( @elements );
+	while ( @elements ) {
+	    $element = $element." ".shift( @elements );
+	}
+	$head{ 'HEADER' } = $element;
+	
+	$cur = <$file>;
+    }
+
+    # Check for COMPND line (not always there)
+    if ( $cur =~ /^COMPND\s/ ) {
+	($element) = ( $cur =~ /^COMPND\s+(.+?)\s+\./ );
+	$head{ 'COMPND' } = $element;
+	
+	$cur = <$file>;
+    }
+
+    # Check for SOURCE or PDBSOURCE line (not always there)
+    if ( $cur =~ /^PDBSOURCE\s/ ) {
+	($element) = ( $cur =~ /^PDBSOURCE\s+(.+?)\s+\./ );
+	$head{ 'SOURCE' } = $element;
+	
+	$cur = <$file>;
+    }
+
+    elsif ( $cur =~ /^SOURCE\s/ ) {
+	($element) = ( $cur =~ /^SOURCE\s+(.+?)\s+\./ );
+	$head{ 'SOURCE' } = $element;
+	
+	$cur = <$file>;
+    }
+
+    # Check for AUTHOR line (not always there)
+    if ( $cur =~ /^AUTHOR/ ) {
+	($element) = ( $cur =~ /^AUTHOR\s+(.+?)\s+/ );
+	$head{ 'AUTHOR' } = $element;
+
+	$cur = <$file>;
+    }
+
+    # A B C D E TOTAL NUMBER OF RESIDUES, NUMBER ... line
+    @elements = split( /\s+/, $cur );
+    shift( @elements );
+    $head{ 'TotNumRes' } = shift( @elements );
+    $head{ 'NumChain' }  = shift( @elements );
+    $head{ 'TotSSBr' }   = shift( @elements );
+    $head{ 'TotIaSSBr' } = shift( @elements );
+    $head{ 'TotIeSSBr' } = shift( @elements );
+
+    $cur = <$file>;
+    ( $element ) = ( $cur =~ /\s*(\d+\.\d*)\s+ACCESSIBLE SURFACE OF PROTEIN/ );
+    $head{ 'ProAccSurf' } = $element;
+    $self->{ 'Head' } = \%head;
+
+    for ( my $i = 1; $i <= 14; $i++ ) {
+	$cur = <$file>;
+	( $element ) =
+	    $cur =~ /\s*(\d+)\s+\d+\.\d+\s+TOTAL NUMBER OF HYDROGEN/;
+	push( @hbond, $element );
+#	$hbond{ $hBondType } = $element;
+    }
+    $self->{ 'HBond' } = \@hbond;
+
+    my $histogram_finished = 0;
+    while ( !($histogram_finished) && chomp( $cur = <$file> ) ) {
+	if ( $cur =~ /RESIDUE AA STRUCTURE/ ) {
+	    $histogram_finished = 1;
+	}
+    }
+
+    while ( $cur = <$file> ) {
+	if ( $cur =~ m/^\s*$/ ) {
+	    next;
+	}
+	$res_num = substr( $cur, 0, 5 );
+	$res_num =~ s/\s//g;
+	$self->{ 'Res' }->[ $res_num ] = &_parseResLine( $cur );
+    }
+}
+
+
+=head2 _parseResLine
+
+ Title         : _parseResLine
+ Usage         : parses a single residue line
+ Function      :
+ Example       : used internally
+ Returns       :
+ Args          : residue line ( string )
+
+
+=cut
+
+sub _parseResLine() {
+    my $cur = shift;
+    my ( $feat, $value );
+    my %elements;
+
+    foreach $feat ( keys %lookUp ) {
+	$value = substr( $cur, $lookUp{ $feat }->[0],
+			 $lookUp{ $feat }->[1] );
+	$value =~ s/\s//g;
+	$elements{$feat} = $value ;
+    }
+
+    # if no chain id, make it '-' (like STRIDE...very convenient)
+    if ( !( $elements{ 'pdb_chain' } ) || $elements{ 'pdb_chain'} eq ' ' ) {
+	$elements{ 'pdb_chain' } = '-';
+    }
+    return \%elements;
+}
+
+1;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/DSSP/Res.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/Res.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/Res.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/Res.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1110 @@
+# $id $
+#
+# bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm
+#
+# Cared for by Ed Green <ed at compbio.berkeley.edu>
+#
+# Copyright Univ. of California
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output
+
+=head1 SYNOPSIS
+
+ my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' );
+
+ # or
+
+ my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT );
+
+ # Get secondary structure assignment for PDB residue 20 of chain A
+ $sec_str = $stride_obj->resSecStr( '20:A' );
+
+ # same
+ $sec_str = $stride_obj->resSecStr( 20, 'A' )
+
+=head1 DESCRIPTION
+
+STRIDE::Res is a module for objectifying STRIDE output.  STRIDE is a
+program (similar to DSSP) for assigning secondary structure to
+individual residues of a pdb structure file.
+
+    ( Knowledge-Based Protein Secondary Structure Assignment,
+    PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) )
+
+STRIDE is available here:
+http://www.embl-heidelberg.de/argos/stride/down_stride.html
+
+Methods are then available for extracting all of the infomation
+present within the output or convenient subsets of it.
+
+Although they are very similar in function, DSSP and STRIDE differ
+somewhat in output format.  Thes differences are reflected in the
+return value of some methods of these modules.  For example, both
+the STRIDE and DSSP parsers have resSecStr() methods for returning
+the secondary structure of a given residue.  However, the range of
+return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the
+range of values for STRIDE is ( H, G, I, E, B, b, T, and C ).  See
+individual methods for details.
+
+The methods are roughly divided into 3 sections:
+
+  1.  Global features of this structure (PDB ID, total surface area,
+      etc.).  These methods do not require an argument. 
+  2.  Residue specific features ( amino acid, secondary structure,
+      solvent exposed surface area, etc. ).  These methods do require an
+      arguement.  The argument is supposed to uniquely identify a
+      residue described within the structure.  It can be of any of the
+      following forms:
+      ('#A:B') or ( #, 'A', 'B' )
+  	|| |
+  	|| - Chain ID (blank for single chain)
+  	|--- Insertion code for this residue.  Blank for most residues.
+  	|--- Numeric portion of residue ID.
+
+      (#)
+       |
+       --- Numeric portion of residue ID.  If there is only one chain and
+  	   it has no ID AND there is no residue with an insertion code at this
+  	   number, then this can uniquely specify a residue.
+
+      ('#:C') or ( #, 'C' )
+  	| |
+  	| -Chain ID
+  	---Numeric portion of residue ID.
+
+     If a residue is incompletely specified then the first residue that
+     fits the arguments is returned.  For example, if 19 is the argument
+     and there are three chains, A, B, and C with a residue whose number
+     is 19, then 19:A will be returned (assuming its listed first).
+
+     Since neither DSSP nor STRIDE correctly handle alt-loc codes, they
+     are not supported by these modules.
+
+ 3.  Value-added methods.  Return values are not verbatem strings
+     parsed from DSSP or STRIDE output.  
+
+=head1 FEEDBACK
+
+=head2 MailingLists
+
+UsUser feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ed Green
+
+Email ed at compbio.berkeley.edu
+
+
+=head1 APPENDIX
+
+The Rest of the documentation details each method.
+Internal methods are preceded with a _.
+
+
+=cut
+
+package Bio::Structure::SecStr::STRIDE::Res;
+use strict;
+use Bio::Root::IO;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root);
+
+our %ASGTable = ( 'aa'         =>  0,
+		  'resNum'     =>  1,
+		  'ssAbbr'     =>  2,
+		  'ssName'     =>  3,
+		  'phi'        =>  4,
+		  'psi'        =>  5,
+		  'surfArea'   =>  6 );
+
+our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N',
+		 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q',
+		 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H',
+		 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K',
+		 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P',
+		 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W',
+		 'TYR' => 'Y', 'VAL' => 'V' );
+
+=head2 new
+
+ Title         : new
+ Usage         : makes new object of this class
+ Function      : Constructor
+ Example       : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' =>  filename 
+						     # or 
+						     '-fh'   => FILEHANDLE )
+ Returns       : object (ref)
+ Args          : filename or filehandle( must be proper STRIDE output )
+
+=cut
+
+sub new {
+    my ( $class, @args ) = @_;
+    my $self = $class->SUPER::new( @args );
+    my $io   = Bio::Root::IO->new( @args );
+    $self->_parse( $io ); # not passing filehandle !
+    $io->close();
+    return $self;
+}
+
+# GLOBAL FEATURES / INFO / STATS
+
+=head2 totSurfArea
+
+ Title         : totSurfArea
+ Usage         : returns sum of surface areas of all residues of all
+                 chains considered.  Result is memoized.
+ Function      :
+ Example       : $tot_SA = $stride_obj->totSurfArea();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub totSurfArea {
+    my $self = shift;
+    my $total = 0;
+    my ( $chain, $res );
+
+    if ( $self->{ 'SurfArea' } ) {
+	return $self->{ 'SurfArea' };
+    }
+    else {
+	foreach $chain ( keys %{$self->{ 'ASG' }} ) {
+	    for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
+		$total += 
+		    $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}];
+	    }
+	}
+    }
+
+    $self->{ 'SurfArea' } = $total;
+    return $self->{ 'SurfArea' };
+   
+}
+
+=head2 numResidues
+
+ Title         : numResidues
+ Usage         : returns total number of residues in all chains or
+                 just the specified chain
+ Function      : 
+ Example       : $tot_res = $stride_obj->numResidues();
+ Returns       : scalar int
+ Args          : none or chain id
+
+
+=cut
+
+sub numResidues {
+    my $self = shift;
+    my $chain = shift;
+    my $total = 0;
+    my $key;
+    foreach $key ( keys %{$self->{ 'ASG' }} ) {
+	if ( $chain ) {
+	    if ( $key eq $chain ) {
+		$total += $#{$self->{ 'ASG' }{ $key }};
+	    }
+	}
+	else {
+	    $total += $#{$self->{ 'ASG' }{ $key }};
+	}
+    }
+    return $total;
+}
+
+# STRAIGHT FROM THE PDB ENTRY
+
+=head2 pdbID
+
+ Title         : pdbID
+ Usage         : returns pdb identifier ( 1FJM, e.g. )
+ Function      : 
+ Example       : $pdb_id = $stride_obj->pdbID();
+ Returns       : scalar string
+ Args          : none
+
+
+=cut
+
+sub pdbID {
+    my $self = shift;
+    return $self->{ 'PDB' };
+}
+=head2 pdbAuthor
+
+ Title         : pdbAuthor
+ Usage         : returns author of this PDB entry
+ Function      : 
+ Example       : $auth = $stride_obj->pdbAuthor()
+ Returns       : scalar string
+ Args          : none
+
+
+=cut
+
+sub pdbAuthor {
+    my $self = shift;
+    return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } );
+}
+
+=head2 pdbCompound
+
+ Title         : pdbCompound
+ Usage         : returns string of what was found on the  
+                 CMP lines
+ Function      : 
+ Example       : $cmp = $stride_obj->pdbCompound();
+ Returns       : string
+ Args          : none
+
+
+=cut
+
+sub pdbCompound {
+    my $self = shift;
+    return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } );
+}
+
+=head2 pdbDate
+
+ Title         : pdbDate
+ Usage         : returns date given in PDB file
+ Function      :
+ Example       : $pdb_date = $stride_obj->pdbDate();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbDate {
+    my $self = shift;
+    return $self->{ 'DATE' };
+}
+
+=head2 pdbHeader
+
+ Title         : pdbHeader
+ Usage         : returns string of characters found on the PDB header line
+ Function      :
+ Example       : $head = $stride_obj->pdbHeader();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbHeader {
+    my $self = shift;
+    return $self->{ 'HEAD' }->{ 'HEADER' };
+}
+
+=head2 pdbSource
+
+ Title         : pdbSource
+ Usage         : returns string of what was found on SRC lines
+ Function      : 
+ Example       : $src = $stride_obj->pdbSource();
+ Returns       : scalar
+ Args          : none
+
+
+=cut
+
+sub pdbSource {
+    my $self = shift;
+    return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } );
+}
+
+# RESIDUE SPECIFIC ACCESSORS
+
+=head2 resAA
+
+ Title         : resAA
+ Usage         : returns 1 letter abbr. of the amino acid specified by
+                 the arguments
+ Function      : 
+ Examples      : $aa = $stride_obj->resAA( RESIDUE_ID );
+ Returns       : scalar character
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resAA {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
+}
+
+=head2 resPhi
+
+ Title         : resPhi
+ Usage         : returns phi angle of specified residue
+ Function      :
+ Example       : $phi = $stride_obj->resPhi( RESIDUE_ID );
+ Returns       : scaler
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resPhi {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ];
+}
+
+=head2 resPsi
+
+ Title         : resPsi
+ Usage         : returns psi angle of specified residue
+ Function      :
+ Example       : $psi = $stride_obj->resPsi( RESIDUE_ID );
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resPsi {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ];
+}
+
+=head2 resSolvAcc
+
+ Title         : resSolvAcc
+ Usage         : returns stride calculated surface area of specified residue
+ Function      : 
+ Example       : $sa = $stride_obj->resSolvAcc( RESIDUE_ID );
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSolvAcc {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
+}
+
+=head2 resSurfArea
+
+ Title         : resSurfArea
+ Usage         : returns stride calculated surface area of specified residue
+ Function      : 
+ Example       : $sa = $stride_obj->resSurfArea( RESIDUE_ID );
+ Returns       : scalar
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSurfArea {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ];
+}
+
+=head2 resSecStr
+
+ Title         : resSecStr 
+ Usage         : gives one letter abbr. of stride determined secondary
+                 structure of specified residue
+ Function      : 
+ Example       : $ss = $stride_obj->resSecStr( RESIDUE_ID );
+ Returns       : one of: 'H' => Alpha Helix
+                         'G' => 3-10 helix
+                         'I' => PI-helix
+                         'E' => Extended conformation
+                         'B' or 'b' => Isolated bridge
+                         'T' => Turn
+                         'C' => Coil
+                         ' ' => None
+                # NOTE:  This range is slightly DIFFERENT from the
+                #        DSSP method of the same name
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSecStr {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ];
+}
+
+=head2 resSecStrSum
+
+ Title         : resSecStrSum
+ Usage         : gives one letter summary of secondary structure of
+                 specified residue.  More general than secStruc() 
+ Function      :
+ Example       : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID );
+ Returns       : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil)
+ Args          : residue identifier(s) ( SEE INTRO NOTE )
+
+
+=cut
+
+sub resSecStrSum {
+    my $self = shift;
+    my @args = @_;
+    my $ss_char = $self->resSecStr( @args );
+
+    if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) {
+	return 'H';
+    }
+    if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) {
+	return 'B';
+    }
+    if ( $ss_char eq 'T' ) {
+	return 'T';
+    }
+    else {
+	return 'C';
+    }
+}
+
+# STRIDE SPECIFIC
+
+=head2 resSecStrName
+
+ Title         : resSecStrName
+ Usage         : gives full name of the secondary structural element
+                 classification of the specified residue
+ Function      : 
+ Example       : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID );
+ Returns       : scalar string
+ Args          : RESIDUE_ID
+
+
+=cut
+
+sub resSecStrName {
+    my $self = shift;
+    my @args = @_;
+    my ( $ord, $chain ) = $self->_toOrdChain( @args );
+    return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ];
+}
+
+=head2 strideLocs
+
+ Title         : strideLocs
+ Usage         : returns stride determined contiguous secondary
+    structural elements as specified on the LOC lines
+ Function      : 
+ Example       : $loc_pnt = $stride_obj->strideLocs();
+ Returns       : pointer to array of 5 element arrays.
+    0 => stride name of structural element
+    1 => first residue pdb key (including insertion code, if app.)
+    2 => first residue chain id
+    3 => last residue pdb key (including insertion code, if app.)
+    4 => last residue chain id
+    NOTE the differences between this range and the range of SecBounds()
+ Args          : none
+
+
+=cut
+
+sub strideLocs {
+    my $self = shift;
+    return $self->{ 'LOC' };
+}
+
+# VALUE ADDED METHODS (NOT JUST PARSE/REPORT)
+
+=head2 secBounds
+
+ Title         : secBounds
+ Usage         : gets residue ids of boundary residues in each
+                 contiguous secondary structural element of specified
+                 chain 
+ Function      : 
+ Example       : $ss_bound_pnt = $stride_obj->secBounds( 'A' );
+ Returns       : pointer to array of 3 element arrays.  First two elements
+                 are the PDB IDs of the start and end points, respectively
+                 and inclusively.  The last element is the STRIDE secondary
+                 structural element code (same range as resSecStr).
+ Args          : chain identifier ( one character ).  If none, '-' is assumed
+
+
+=cut
+
+sub secBounds {
+    # Requires a chain name.  If left blank, we assume ' ' which equals '-'
+    my $self  = shift;
+    my $chain = shift;
+    my @SecBounds;
+
+    $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' );
+
+    # if we've memoized this one, use that
+    if ( $self->{ 'SecBounds' }->{ $chain } ) {
+	return $self->{ 'SecBounds' }->{ $chain }; 
+    }
+
+    #check to make sure chain is valid
+    if ( !( $self->{ 'ASG' }->{ $chain } ) ) {
+	$self->throw( "No such chain: $chain\n" );
+    }
+
+    my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]->
+	[ $ASGTable{ 'ssAbbr' } ];
+    my $beg = 1;
+    my $i;
+
+    for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) {
+	if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] 
+	     ne $cur_element ) {
+	    push( @SecBounds, [ $beg, $i -1 , $cur_element ] );
+	    $beg = $i;
+	    $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]->
+		[ $ASGTable{ 'ssAbbr' } ];
+	}
+    }
+    
+    if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] 
+	 eq $cur_element ) {
+	push( @SecBounds, [ $beg, $i, $cur_element ] );
+    }
+    else {
+	push( @SecBounds, [ $beg, $i - 1, $cur_element ], 
+	      [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]->
+		[ $ASGTable{ 'ssAbbr' } ] ] );
+    }
+    
+    $self->{ 'SecBounds' }->{ $chain } = \@SecBounds;
+    return $self->{ 'SecBounds' }->{ $chain };
+}
+
+=head2 chains
+
+ Title         : chains
+ Usage         : gives array chain I.D.s (characters)
+ Function      :
+ Example       : @chains = $stride_obj->chains();
+ Returns       : array of characters
+ Args          : none
+
+
+=cut
+
+sub chains {
+    my $self = shift;
+    my @chains = keys ( %{ $self->{ 'ASG' } } );
+    return \@chains;
+}
+
+=head2 getSeq
+
+ Title         : getSeq
+ Usage         : returns a Bio::PrimarySeq object which represents an
+                 approximation at the sequence of the specified chain.
+ Function      : For most chain of most entries, the sequence returned by
+                 this method will be very good.  However, it it inherently 
+                 unsafe to rely on STRIDE to extract sequence information about
+                 a PDB entry.  More reliable information can be obtained from
+                 the PDB entry itself.  If a second option is given
+                 (and evaluates to true), the sequence generated will
+                 have 'X' in spaces where the pdb residue numbers are
+                 discontinuous.  In some cases this results in a
+                 better sequence object (when the  discontinuity is
+		 due to regions which were present, but could not be
+		 resolved).  In other cases, it will result in a WORSE
+                 sequence object (when the discontinuity is due to
+		 historical sequence numbering and all sequence is
+		 actually resolved).
+ Example       : $pso = $dssp_obj->getSeq( 'A' );
+ Returns       : (pointer to) a PrimarySeq object
+ Args          : Chain identifier.  If none given, '-' is assumed.  
+
+
+=cut
+
+sub getSeq {
+    my $self    = shift;
+    my $chain   = shift;
+    my $fill_in = shift;
+
+    if ( !( $chain ) ) {
+	$chain = '-';
+    }
+
+    if ( $self->{ 'Seq' }->{ $chain } ) {
+	return $self->{ 'Seq' }->{ $chain };
+    }
+
+    my ( $seq, 
+	 $num_res,
+	 $last_res_num,
+	 $cur_res_num, 
+	 $i,
+	 $step,
+	 $id
+	 );
+
+    $seq = "";
+    $num_res = $self->numResidues( $chain );
+    $last_res_num = $self->_pdbNum( 1, $chain );
+    for ( $i = 1; $i <= $num_res; $i++ ) {
+	if ( $fill_in ) {
+	    $cur_res_num = $self->_pdbNum( $i, $chain );
+	    $step = $cur_res_num - $last_res_num;
+	    if ( $step > 1 ) {
+		$seq .= 'X' x ( $step - 1 );
+	    }
+	}
+	$seq .= $self->_resAA( $i, $chain );
+	$last_res_num = $cur_res_num;
+    }
+
+    $id = $self->pdbID();
+    $id .= "$chain";
+
+    $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq,
+							 -id  => $id,
+							 -moltype => 'protein'
+							 );
+
+    return $self->{ 'Seq' }->{ $chain };
+}
+
+=head1 INTERNAL METHODS
+
+=head2 _pdbNum
+
+ Title        : _pdbNum
+ Usage        : fetches the numeric portion of the identifier for a given
+                residue as reported by the pdb entry.  Note, this DOES NOT
+                uniquely specify a residue.  There may be an insertion code
+                and/or chain identifier differences.
+ Function     : 
+ Example      : $pdbNum = $self->pdbNum( 3, 'A' );
+ Returns      : a scalar
+ Args         : valid ordinal num / chain combination
+
+
+=cut
+
+sub _pdbNum {
+    my $self  = shift;
+    my $ord   = shift;
+    my $chain = shift;
+    if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
+	$self->throw( "No such ordinal $ord in chain $chain.\n" );
+    }
+    my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
+    my $num_part;
+    ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ );
+    return $num_part;
+}
+
+=head2 _resAA
+
+ Title         : _resAA
+ Usage         : returns 1 letter abbr. of the amino acid specified by
+                 the arguments
+ Function      : 
+ Examples      : $aa = $stride_obj->_resAA( 3, '-' );
+ Returns       : scalar character
+ Args          : ( ord. num, chain )
+
+
+=cut
+
+sub _resAA {
+    my $self  = shift;
+    my $ord   = shift;
+    my $chain = shift;
+    if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
+	$self->throw( "No such ordinal $ord in chain $chain.\n" );
+    }
+    return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} );
+}
+
+=head2 _pdbInsCo
+
+ Title        : _pdbInsCo
+ Usage        : fetches the Insertion code for this residue.
+ Function     : 
+ Example      : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' );
+ Returns      : a scalar
+ Args         : ordinal number and chain
+
+
+=cut
+
+sub _pdbInsCo {
+    my $self  = shift;
+    my $ord   = shift;
+    my $chain = shift;
+    if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) {
+	$self->throw( "No such ordinal $ord in chain $chain.\n" );
+    }
+    my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ];
+    my $letter_part;
+    ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any 
+                                                    # non-word character(s)
+    return $letter_part;
+}
+
+=head2 _toOrdChain
+
+ Title         : _toOrdChain
+ Usage         : takes any set of residue identifying parameters and
+    wrestles them into a two element array:  the chain and the ordinal
+    number of this residue.  This two element array can then be
+    efficiently used as keys in many of the above accessor methods
+ ('#A:B') or ( #, 'A', 'B' )
+   || |
+   || - Chain ID (blank for single chain)
+   |--- Insertion code for this residue.  Blank for most residues.
+   |--- Numeric portion of residue ID.
+
+  (#)
+   |
+   --- Numeric portion of residue ID.  If there is only one chain and
+   it has no ID AND there is no residue with an insertion code at this
+   number, then this can uniquely specify a residue.
+
+  #  ('#:C) or ( #, 'C' )
+       | |
+       | -Chain ID
+       ---Numeric portion of residue ID.
+
+  If a residue is incompletely specified then the first residue that 
+  fits the arguments is returned.  For example, if 19 is the argument 
+  and there are three chains, A, B, and C with a residue whose number 
+  is 19, then 19:A will be returned (assuming its listed first).
+
+ Function      :
+ Example       : my ( $ord, $chain ) = $self->_toOrdChain( @args );
+ Returns       : two element array
+ Args          : valid set of residue identifier(s) ( SEE NOTE ABOVE )
+
+
+=cut
+
+sub _toOrdChain {
+    my $self = shift;
+    my $arg_str;
+
+    my ( $key_num, $chain_id, $ins_code, $key, $i );
+    
+    # check to see how many args are given
+    if ( $#_ >= 1 ) { # multiple args
+	$key_num = shift;
+	if ( $#_ >= 1 ) { # still multiple args => ins. code, too
+	    $ins_code = shift;
+	    $chain_id = shift;
+	}
+	else { # just one more arg. => chain_id
+	    $chain_id = shift;
+	}
+    }
+    else { # only single arg.  Might be number or string
+	$arg_str = shift;
+	if ( $arg_str =~ /:/ ) {
+	    # a chain is specified
+	    ( $chain_id ) = ( $arg_str =~ /:(.)/);
+	    $arg_str =~ s/:.//;
+	}
+	if ( $arg_str =~ /[A-Z]|[a-z]/ ) {
+	    # an insertion code is specified
+	    ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ );
+	    $arg_str =~ s/[A-Z]|[a-z]//g;
+	}
+	#now, get the number bit-> everything still around
+	$key_num = $arg_str;
+    }     
+
+    $key = "$key_num$ins_code";
+    if ( !( $chain_id ) || $chain_id eq ' ' ) {
+	$chain_id = '-';
+    }    
+
+    if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) {
+	$self->throw( "No such chain: $chain_id" );
+    }
+    
+    for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) {
+	if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq
+	     $key ) {
+	    return ( $i, $chain_id );
+	}
+    }
+    
+    $self->throw( "No such key: $key" );
+
+}
+
+=head2 _parse
+
+ Title         : _parse
+ Usage         : as name suggests, parses stride output, creating object
+ Function      :
+ Example       : $self->_parse( $io );
+ Returns       : 
+ Args          : valid Bio::Root::IO object
+
+
+=cut
+
+sub _parse {
+    my $self = shift;
+    my $io = shift;
+    my $file = $io->_fh();
+
+    # Parse top lines
+	if ( $self->_parseTop( $io ) ) {
+	$self->throw( "Not stride output" );
+    }
+
+    # Parse the HDR, CMP, SCR, and AUT lines
+    $self->_parseHead( $io );
+
+    # Parse the CHN, SEQ, STR, and LOC  lines
+    $self->_parseSummary( $io ); # we're ignoring this
+
+    # Parse the ASG lines
+    $self->_parseASG( $io );
+}
+
+=head2 _parseTop
+
+ Title         : _parseTop
+ Usage         : makes sure this looks like stride output
+ Function      :
+ Example       : 
+ Returns       :
+ Args          :
+
+
+=cut
+
+sub _parseTop {
+    my $self = shift;
+    my $io = shift;
+    my $file = $io->_fh();
+    my $cur = <$file>;
+    if ( $cur =~ /^REM  ---/ ) {
+	return 0;
+    }
+    return 1;
+}
+
+=head2 _parseHead
+
+ Title         : _parseHead
+ Usage         : parses
+ Function      : HDR, CMP, SRC, and AUT lines
+ Example       :
+ Returns       :
+ Args          :
+
+
+=cut
+
+sub _parseHead {
+    my $self = shift;
+    my $io = shift;
+    my $file = $io->_fh();
+    my $cur;
+    my $element;
+    my ( @elements, @cmp, @src, @aut );
+    my %head = {};
+    my $still_head = 1;
+
+    $cur = <$file>;
+    while ( $cur =~ /^REM / ) {
+	$cur = <$file>;
+    }
+
+    if ( $cur =~ /^HDR / ) {
+	@elements = split( /\s+/, $cur );
+	shift( @elements );
+	pop( @elements );
+	$self->{ 'PDB' }  = pop( @elements );
+	$self->{ 'DATE' } = pop( @elements );
+	# now, everything else is "header" except for the word
+	# HDR
+	$element = join( ' ', @elements );
+	$head{ 'HEADER' } = $element;
+    }
+
+    $cur = <$file>;
+    while ( $cur =~ /^CMP / ) {
+	( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ );
+	push( @cmp, $cur );
+	$cur = <$file>;
+    }
+
+    while ( $cur =~ /^SRC / ) {
+	( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ );
+	push( @src, $cur );
+	$cur = <$file>;
+    }
+
+    while ( $cur =~ /^AUT / ) {
+	( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ );
+	push( @aut, $cur );
+	$cur = <$file>;
+    }
+
+    $head{ 'CMP' } = \@cmp;
+    $head{ 'SRC' } = \@src;
+    $head{ 'AUT' } = \@aut;
+    $self->{ 'HEAD' } = \%head;
+}
+
+=head2 _parseSummary
+
+ Title         : _parseSummary
+ Usage         : parses LOC lines
+ Function      :
+ Example       :
+ Returns       :
+ Args          :
+
+
+=cut
+
+sub _parseSummary {
+    my $self = shift;
+    my $io = shift;
+    my $file = $io->_fh();
+    my $cur = <$file>;
+    my $bound_set;
+    my $element;
+    my ( @elements, @cur );
+    my @LOC_lookup = ( [ 5,  12 ],   # Element name
+	# reduntdant	       [ 18, 3 ],    # First residue name
+		       [ 22, 5 ],    # First residue PDB number
+		       [ 28, 1 ],    # First residue Chain ID
+	# redundant	       [ 35, 3 ],    # Last residue name
+		       [ 40, 5 ],    # Last residue PDB number
+		       [ 46, 1 ]  ); # Last residue Chain ID
+
+    #ignore these lines
+    while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) {
+	$cur = <$file>;
+    }
+
+    while ( $cur =~ /^LOC / ) {
+	foreach $bound_set ( @LOC_lookup ) {
+	    $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] );
+	    $element =~ s/\s//g;
+	    push( @cur, $element );
+	}
+	push( @elements, [ @cur ] );
+	$cur = <$file>;
+	@cur = ();
+    }
+    $self->{ 'LOC' } = \@elements;
+
+}
+
+=head2 _parseASG
+
+ Title         : _parseASG
+ Usage         : parses ASG lines
+ Function      :
+ Example       :
+ Returns       :
+ Args          :
+
+
+=cut
+
+sub _parseASG {
+    my $self = shift;
+    my $io = shift;
+    my $file = $io->_fh();
+    my $cur = <$file>;
+    my $bound_set;
+    my $ord_num;
+    my ( $chain, $last_chain );
+    my $element;
+    my %ASG;
+    my ( @cur, @elements );
+    my @ASG_lookup = ( [ 5,  3 ],  # Residue name
+		  #    [ 9,  1 ],  # Chain ID
+		       [ 10, 5 ],  # PDB residue number (w/ins.code)
+		  #    [ 16, 4 ],  # ordinal stride number
+		       [ 24, 1 ],  # one letter sec. stru. abbr.
+		       [ 26, 13],  # full sec. stru. name
+		       [ 42, 7 ],  # phi angle
+		       [ 52, 7 ],  # psi angle
+		       [ 64, 5 ] );# residue solv. acc.
+
+    while ( $cur =~ /^REM / ) {
+	$cur = <$file>;
+    }
+
+    while ( $cur =~ /^ASG / ) {
+	# get ordinal number for array key
+	$ord_num = substr( $cur, 16, 4 );
+	$ord_num =~ s/\s//g;
+
+	# get the chain id
+	$chain = substr( $cur, 9, 1 );
+	
+	if ( $last_chain && ( $chain ne $last_chain ) ) {
+	    $ASG{ $last_chain } = [ @elements ];
+	    @elements = ();
+	}
+
+	# now get the rest of the info on this line
+	foreach $bound_set ( @ASG_lookup ) {
+	    $element = substr( $cur, $bound_set->[ 0 ], 
+			       $bound_set->[ 1 ] );
+	    $element =~ s/\s//g;
+	    push( @cur, $element );
+	}
+	$elements[ $ord_num ] = [ @cur ];
+	$cur = <$file>;
+	@cur = ();
+	$last_chain = $chain;
+    }
+
+    $ASG{ $chain } = [ @elements ];
+
+    $self->{ 'ASG' } = \%ASG;
+}
+
+1;
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/SecStr/STRIDE/Res.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Structure/StructureI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Structure/StructureI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Structure/StructureI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,64 @@
+# $Id: StructureI.pm,v 1.7.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Structure::StructureI
+#
+# Cared for by Kris Boulez <kris.boulez at algonomics.com>
+#
+# Copyright Kris Boulez
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Structure::StructureI - Abstract Interface for a Structure objects
+
+=head1 SYNOPSIS
+
+Give standard usage here
+
+=head1 DESCRIPTION
+
+Describe the interface here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Kris Boulez
+
+Email kris.boulez at algonomics.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Structure::StructureI;
+use strict;
+
+use base qw(Bio::Root::RootI);
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Alphabet.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Alphabet.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Alphabet.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,182 @@
+# $Id: Alphabet.pm,v 1.10.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::Alphabet
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::Alphabet - BSANE/BioCORBA compliant symbol list alphabet
+
+=head1 SYNOPSIS
+
+  {
+      my $alphabet = new Bio::Symbols::Alphabet(-symbols => [ @s ],
+  						-subalphabets => [ @alphas ] );
+
+      my @symbols = $alphabet->symbols;
+      my @subalphas = $alphabet->alphabets;
+      if( $alphabet->contains($symbol) ) {
+  	  # do something
+      }
+  }
+
+=head1 DESCRIPTION
+
+Alphabet contains set of symbols, which can be concatenated to
+form symbol lists. Sequence string, for example, is stringified
+representation of the symbol list (tokens of symbols).
+
+This module was implemented for the purposes of meeting the
+BSANE/BioCORBA spec 0.3 only.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Symbol::Alphabet;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Symbol::AlphabetI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Symbol::Alphabet();
+ Function: Builds a new Bio::Symbol::Alphabet object 
+ Returns : Bio::Symbol::Alphabet
+ Args    : -symbols  => Array ref of Bio::Symbol::SymbolI objects
+           -subalphas=> Array ref of Bio::Symbol::AlphabetI objects 
+                        representing sub alphabets
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    $self->{'_symbols'} = [];
+    $self->{'_alphabets'} = [];
+    my ($symbols, $subalphas) = $self->_rearrange([qw(SYMBOLS SUBALPHAS)],
+						  @args);
+
+    defined $symbols && ref($symbols) =~ /array/i && $self->symbols(@$symbols);
+    defined $subalphas && ref($subalphas) =~ /array/i && $self->alphabets(@$subalphas);
+    return $self;
+}
+
+=head2 AlphabetI Interface methods
+
+=cut
+
+=head2 symbols
+
+ Title   : symbols
+ Usage   : my @symbols = $alphabet->symbols();
+ Function: Get/Set Symbol list for an alphabet
+           List of symbols, which make up this alphabet.
+ Returns : Array of Bio::Symbol::SymbolI objects
+ Args    : (optionalalphabets) Array of Bio::Symbol::SymbolI objects
+
+=cut
+
+sub symbols {
+    my ($self, at args) = @_;
+    if( @args ) { 
+	$self->{'_symbols'} = [];
+	foreach my $symbol ( @args ) {
+	    if( ! defined $symbol || ! ref($symbol) || 
+		! $symbol->isa('Bio::Symbol::SymbolI') ) {
+		$self->warn("Did not provide a proper Bio::Symbol::SymbolI to method 'symbols' (got $symbol)");
+	    } else { 
+		push @{$self->{'_symbols'}}, $symbol;
+	    }
+	}
+    }
+    return @{$self->{'_symbols'}};
+}
+
+=head2 alphabets
+
+ Title   : alphabets
+ Usage   : my @alphabets = $alphabet->alphabets();
+ Function: Get/Set Sub Alphabet list for an alphabet 
+           Sub-alphabets. E.g. codons made from DNAxDNAxDNA alphabets
+ Returns : Array of Bio::Symbol::AlphabetI objects
+ Args    : (optional) Array of Bio::Symbol::AlphabetI objects
+
+=cut
+
+sub alphabets {
+    my ($self, at args) = @_;
+   if( @args ) { 
+       $self->{'_alphabets'} = [];
+       foreach my $alpha ( @args ) {
+	   if( ! $alpha->isa('Bio::Symbol::AlphabetI') ) {
+	       $self->warn("Did not provide a proper Bio::Symbol::AlphabetI to method 'alphabets' (got $alpha)");
+	   } else { 
+	       push @{$self->{'_alphabets'}}, $alpha;
+	   }
+       }
+   }
+    return @{$self->{'_alphabets'}};
+}
+
+=head2 contains
+
+ Title   : contains
+ Usage   : if($alphabet->contains($symbol)) { }
+ Function: Tests of Symbol is contained in this alphabet
+ Returns : Boolean
+ Args    : Bio::Symbol::SymbolI
+
+=cut
+
+sub contains{
+   my ($self,$testsymbol) = @_;
+   foreach my $symbol ( $self->symbols ) {
+       return 1 if( $symbol->equals($testsymbol) );
+   }
+   return 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/AlphabetI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/AlphabetI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/AlphabetI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,153 @@
+# $Id: AlphabetI.pm,v 1.8.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::AlphabetI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::AlphabetI - A Symbol Alphabet
+
+=head1 SYNOPSIS
+
+    # get a Bio::Symbol::AlphabetI object somehow
+    my @symbols = $alphabet->symbols;
+    my @subalphas = $alphabet->alphabets;
+    if( $alphabet->contains($symbol) ) {
+	# do something
+    }
+
+=head1 DESCRIPTION
+
+Alphabet contains set of symbols, which can be concatenated to form
+symbol lists. Sequence string, for example, is stringified
+representation of the symbol list (tokens of symbols).
+
+This module was implemented for the purposes of meeting the
+BSANE/BioCORBA spec 0.3 only.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Symbol::AlphabetI;
+use strict;
+use Bio::Root::RootI;
+
+=head2 AlphabetI Interface methods
+
+=cut
+
+=head2 symbols
+
+ Title   : symbols
+ Usage   : my @symbols = $alphabet->symbols();
+ Function: Get/Set Symbol list for an alphabet
+           List of symbols, which make up this alphabet.
+ Returns : Array of L<Bio::Symbol::SymbolI> objects
+ Args    : (optional) Array of L<Bio::Symbol::SymbolI> objects
+
+=cut
+
+sub symbols{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 alphabets
+
+ Title   : alphabets
+ Usage   : my @alphabets = $alphabet->alphabets();
+ Function: Get/Set Sub Alphabet list for an alphabet 
+           Sub-alphabets. E.g. codons made from DNAxDNAxDNA alphabets
+ Returns : Array of L<Bio::Symbol::AlphabetI> objects
+ Args    : (optional) Array of L<Bio::Symbol::AlphabetI> objects
+
+=cut
+
+sub alphabets{
+    my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 contains
+
+ Title   : contains
+ Usage   : if($alphabet->contains($symbol)) { }
+ Function: Tests of Symbol is contained in this alphabet
+ Returns : Boolean
+ Args    : L<Bio::Symbol::SymbolI>
+
+=cut
+
+sub contains{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+# Other methods from BSANE - not sure if we will implement here or only in
+# BioCORBA implementation
+
+# Resolve symbols from the token string.
+#    SymbolList to_symbol(in string tokens) raises ( IllegalSymbolException) ;
+
+# Convinience method, which returns gap symbol that do not
+# match with any other symbols in the alphabet.
+#   Symbol get_gap_symbol() raises ( DoesNotExist) ;
+
+
+# Returns a ambiguity symbol, which represent list of
+# symbols. All symbols in a list must be members of
+# this alphabet otherwise IllegalSymbolException is
+# thrown.
+# Symbol get_ambiguity( in SymbolList symbols) raises( IllegalSymbolException);
+
+
+#  Returns a Symbol, which represents ordered list of symbols
+#  given as a parameter. Each symbol in the list must be member of
+#  different sub-alphabet in the order defined by the alphabets
+#  attribute. For example, codons can be represented by a compound
+#  Alphabet of three DNA Alphabets, in which case the get_symbol(
+#  SymbolList[ a,g,t]) method of the Alphabet returns Symbol for
+#  the codon agt.<p>
+
+#  IllegalSymbolException is raised if members of symbols
+#  are not Symbols over the alphabet defined by
+#  get_alphabets()-method
+#  Symbol get_symbol(in SymbolList symbols) raises(IllegalSymbolException) ;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/DNAAlphabet.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/DNAAlphabet.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/DNAAlphabet.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,112 @@
+# $Id: DNAAlphabet.pm,v 1.6.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::DNAAlphabet
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::DNAAlphabet - A ready made DNA alphabet
+
+=head1 SYNOPSIS
+
+    use Bio::Symbol::DNAAlphabet;
+    my $alpha = new Bio::Symbol::DNAAlphabet();
+    foreach my $symbol ( $alpha->symbols ) {
+	print "symbol is $symbol\n";
+    }
+
+=head1 DESCRIPTION
+
+This object builds an Alphabet with DNA symbols.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Symbol::DNAAlphabet;
+use strict;
+
+use Bio::Symbol::Symbol;
+use Bio::Tools::IUPAC;
+
+use base qw(Bio::Symbol::Alphabet);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Symbol::DNAAlphabet();
+ Function: Builds a new Bio::Symbol::DNAAlphabet object 
+ Returns : Bio::Symbol::DNAAlphabet
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);  
+  my %alphabet = Bio::Tools::IUPAC::iupac_iub();
+  my %symbols;
+  foreach my $let ( keys %alphabet ) {
+      next unless @{$alphabet{$let}} == 1 || $let eq 'U';
+      $symbols{$let} = new Bio::Symbol::Symbol(-name => $let,
+					       -token => $let);      
+  }
+  
+  foreach my $let ( keys %alphabet ) {
+      next if( $symbols{$let} || $let eq 'U');
+      my @subsymbols;
+      
+      foreach my $sublet ( @{$alphabet{$let}} ) {
+	  push @subsymbols, $symbols{$sublet};
+      }
+      my $alpha = new Bio::Symbol::Alphabet(-symbols => \@subsymbols);
+      $symbols{$let} = new Bio::Symbol::Symbol(-name    => $let,
+					       -token   => $let,
+					       -matches => $alpha,  
+					       -symbols => \@subsymbols); 
+  }
+  
+  $self->symbols(values %symbols); 
+  return $self;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/ProteinAlphabet.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/ProteinAlphabet.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/ProteinAlphabet.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,113 @@
+# $Id: ProteinAlphabet.pm,v 1.7.2.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::ProteinAlphabet
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::ProteinAlphabet - A ready made Protein alphabet
+
+=head1 SYNOPSIS
+
+    use Bio::Symbol::ProteinAlphabet;
+    my $alpha = new Bio::Symbol::ProteinAlphabet();
+    foreach my $symbol ( $alpha->symbols ) {
+	print "symbol is $symbol\n";
+    }
+
+=head1 DESCRIPTION
+
+This object builds an Alphabet with Protein symbols.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Symbol::ProteinAlphabet;
+use strict;
+
+use Bio::Symbol::Symbol;
+use Bio::Tools::IUPAC;
+use Bio::SeqUtils;
+
+use base qw(Bio::Symbol::Alphabet);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Symbol::ProteinAlphabet();
+ Function: Builds a new Bio::Symbol::ProteinAlphabet object 
+ Returns : Bio::Symbol::ProteinAlphabet
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);  
+  my %aa = Bio::SeqUtils->valid_aa(2);
+  my %codes = Bio::Tools::IUPAC->iupac_iup();
+  my %symbols;
+  my @left;
+  
+  foreach my $let ( keys %codes  ) {  
+      if( scalar @{$codes{$let}} != 1) { push @left, $let; next; }
+      $symbols{$let} = new Bio::Symbol::Symbol(-name => $aa{$let},
+					       -token => $let);      
+  }
+  foreach my $l ( @left ) {
+      my @subsym;
+      foreach my $sym ( @{$codes{$l}} ) {
+	  push @subsym, $symbols{$sym};
+      }
+      my $alpha = new Bio::Symbol::Alphabet(-symbols => \@subsym);
+      $symbols{$l} = new Bio::Symbol::Symbol(-name => $aa{$l},
+					       -token => $l,
+					       -matches => $alpha,
+					       -symbols => \@subsym);
+  }
+  
+  $self->symbols(values %symbols); 
+  return $self;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/README.Symbol
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/README.Symbol	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/README.Symbol	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+This is unused code written simply for the benefit of impelementing
+the BioCORBA / OMG BSANE spec. We've pretty much given up on this in
+2002 as anything useful. So unless someone finds a need for this code
+we'll probably remove it in future releases.
+
+-Jason Stajich August 2003

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Symbol.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Symbol.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/Symbol.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,222 @@
+# $Id: Symbol.pm,v 1.9.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::Symbol
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::Symbol - A biological symbol
+
+=head1 SYNOPSIS
+
+    use Bio::Symbol::Symbol;
+    my $thymine = new Bio::Symbol::Symbol(-name => 'Thy',
+					  -token=> 'T');
+    my $a = new Bio::Symbol::Symbol(-token => 'A' );
+    my $u = new Bio::Symbol::Symbol(-token => 'U' );
+    my $g = new Bio::Symbol::Symbol(-token => 'G' );
+
+    my $M = new Bio::Symbol::Symbol(-name  => 'Met',
+				    -token => 'M',
+				    -symbols => [ $a, $u, $g ]);
+
+    my ($name,$token) = ($a->name, $a->token);
+    my @symbols       = $a->symbols;
+    my $matches       = $a->matches;
+
+=head1 DESCRIPTION
+
+Symbol represents a single token in the sequence. Symbol can have
+multiple synonyms or matches within the same Alphabet, which
+makes possible to represent ambiguity codes and gaps.
+
+Symbols can be also composed from ordered list other symbols. For
+example, codons can be represented by single Symbol using a
+compound Alphabet made from three DNA Alphabets.
+
+This module was implemented for the purposes of meeting the
+BSANE/BioCORBA spec 0.3 only.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Symbol::Symbol;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Symbol::Alphabet;
+
+use base qw(Bio::Root::Root Bio::Symbol::SymbolI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Symbol::Symbol();
+ Function: Builds a new Bio::Symbol::Symbol object 
+ Returns : Bio::Symbol::Symbol
+ Args    : -name    => descriptive name (string) [e.g. Met]
+           -token   => Shorthand token (string)  [e.g. M]
+           -symbols => Symbols that make up this symbol (array) [e.g. AUG]
+           -matches => Alphabet in the event symbol is an ambiguity
+                       code.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->{'_symbols'} = [];
+
+  my ($name, $token, $symbols,
+      $matches) = $self->_rearrange([qw(NAME TOKEN SYMBOLS 
+					MATCHES)],
+				    @args);
+  $token && $self->token($token);
+  $name && $self->name($name);
+  $symbols && ref($symbols) =~ /array/i && $self->symbols(@$symbols);
+  $matches && $self->matches($matches); 
+  return $self;
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $symbol->name();
+ Function: Get/Set Descriptive name for Symbol
+ Returns : string
+ Args    : (optional) string
+
+=cut
+
+sub name {
+   my ($self,$value) = @_;
+   if( $value ) {
+       $self->{'_name'} = $value;
+   }
+   return $self->{'_name'} || '';
+}
+
+=head2 token
+
+ Title   : token
+ Usage   : my $token = $self->token();
+ Function: Get/Set token for this symbol
+ Example : Letter A,C,G,or T for a DNA alphabet Symbol
+ Returns : string
+ Args    : (optional) string
+
+=cut
+
+sub token{
+   my ($self,$value) = @_;
+   if( $value ) {
+       $self->{'_token'} = $value;
+   }
+   return $self->{'_token'} || '';
+}
+
+=head2 symbols
+
+ Title   : symbols
+ Usage   : my @symbols = $self->symbols();
+ Function: Get/Set Symbols this Symbol is composed from
+ Example : Ambiguity symbols are made up > 1 base symbol
+ Returns : Array of Bio::Symbol::SymbolI objects
+ Args    : (optional) Array of Bio::Symbol::SymbolI objects
+
+
+=cut
+
+sub symbols{
+   my ($self, at args) = @_;
+   if( @args ) {
+       $self->{'_symbols'} = [@args];
+   } 
+   return @{$self->{'_symbols'}};
+}
+
+=head2 matches
+
+ Title   : matches
+ Usage   : my $matchalphabet = $symbol->matches();
+ Function: Get/Set (Sub) alphabet of symbols matched by this symbol
+           including the symbol itself (i.e. if symbol is DNA
+           ambiguity code W then the matches contains symbols for W
+           and T)
+ Returns : Bio::Symbol::AlphabetI
+ Args    : (optional) Bio::Symbol::AlphabetI
+
+=cut
+
+sub matches{
+   my ($self,$matches) = @_;
+   
+   if( $matches ) {
+       if( ! $matches->isa('Bio::Symbol::AlphabetI') ) {
+	   $self->warn("Must pass in a Bio::Symbol::AlphabetI object to matches function");
+	   # stick with previous value
+       } else { 
+	   $self->{'_matches'} = $matches;
+       }
+   }
+   return $self->{'_matches'};
+}
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if( $symbol->equals($symbol2) ) { }
+ Function: Tests if a symbol is equal to another 
+ Returns : Boolean
+ Args    : Bio::Symbol::SymbolI
+
+=cut
+
+sub equals{
+   my ($self,$symbol2) = @_;
+   # Let's just test based on Tokens for now 
+   # Doesn't handle DNA vs PROTEIN accidential comparisons
+   return  $self->token eq $symbol2->token;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/SymbolI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/SymbolI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Symbol/SymbolI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,161 @@
+# $Id: SymbolI.pm,v 1.9.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Symbol::SymbolI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Symbol::SymbolI - Interface for a Symbol
+
+=head1 SYNOPSIS
+
+    # get a Bio::Symbol::SymbolI object somehow
+
+    my ($name,$token) = ($symbol->name, $symbol->token);
+    my @symbols       = $symbol->symbols;
+    my $matches       = $symbol->matches;
+
+=head1 DESCRIPTION
+
+Symbol represents a single token in the sequence. Symbol can have
+multiple synonyms or matches within the same Alphabet, which
+makes possible to represent ambiguity codes and gaps.
+
+Symbols can be also composed from ordered list other symbols. For
+example, codons can be represented by single Symbol using a
+compound Alphabet made from three DNA Alphabets.
+
+This module was implemented for the purposes of meeting the
+BSANE/BioCORBA spec 0.3 only.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Symbol::SymbolI;
+use strict;
+use base qw(Bio::Root::RootI);
+
+=head2 Bio::Symbol::SymbolI interface methods
+
+=cut
+
+=head2 name
+
+ Title   : name
+ Usage   : my $name = $symbol->name();
+ Function: Get/Set Descriptive name for Symbol
+ Returns : string
+ Args    : (optional) string
+
+=cut
+
+sub name{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 token
+
+ Title   : token
+ Usage   : my $token = $self->token();
+ Function: Get/Set token for this symbol
+ Example : Letter A,C,G,or T for a DNA alphabet Symbol
+ Returns : string
+ Args    : (optional) string
+
+=cut
+
+sub token{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 symbols
+
+ Title   : symbols
+ Usage   : my @symbols = $self->symbols();
+ Function: Get/Set Symbols this Symbol is composed from
+ Example : A codon is composed of 3 DNA symbols
+ Returns : Array of Bio::Symbol::SymbolI objects
+ Args    : (optional) Array of Bio::Symbol::SymbolI objects
+
+
+=cut
+
+sub symbols{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 matches
+
+ Title   : matches
+ Usage   : my $matchalphabet = $symbol->matches();
+ Function: Get/Set (Sub) alphabet of symbols matched by this symbol
+           including the symbol itself (i.e. if symbol is DNA
+           ambiguity code W then the matches contains symbols for W
+           and T)
+ Returns : Bio::Symbol::AlphabetI
+ Args    : (optional) Bio::Symbol::AlphabetI
+
+=cut
+
+sub matches{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();   
+}
+
+=head2 equals
+
+ Title   : equals
+ Usage   : if( $symbol->equals($symbol2) ) { }
+ Function: Tests if a symbol is equal to another 
+ Returns : Boolean
+ Args    : Bio::Symbol::SymbolI
+
+=cut
+
+sub equals{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,735 @@
+# $Id: Taxon.pm,v 1.1.4.3 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::Taxon
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala, based heavily on a module by Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxon - A node in a represented taxonomy
+
+=head1 SYNOPSIS
+
+  use Bio::Taxon;
+
+  # Typically you will get a Taxon from a Bio::DB::Taxonomy object
+  # but here is how you initialize one
+  my $taxon = new Bio::Taxon(-name      => $name,
+                             -id        => $id,
+                             -rank      => $rank,
+                             -division  => $div);
+
+  # Get one from a database
+  my $dbh = new Bio::DB::Taxonomy(-source   => 'flatfile',
+                                  -directory=> '/tmp',
+                                  -nodesfile=> '/path/to/nodes.dmp',
+                                  -namesfile=> '/path/to/names.dmp');
+  my $human = $dbh->get_taxon(-name => 'Homo sapiens');
+  $human = $dbh->get_taxon(-taxonid => '9606');
+
+  print "id is ", $human->id, "\n"; # 9606
+  print "rank is ", $human->rank, "\n"; # species
+  print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
+  print "division is ", $human->division, "\n"; # Primates
+
+  my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
+
+  # You can quickly make your own lineages with the list database
+  my @ranks = qw(superkingdom class genus species);
+  my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
+  my $list_dbh = new Bio::DB::Taxonomy(-source => 'list', -names => \@h_lineage,
+                                                          -ranks => \@ranks);
+  $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
+  my @names = $human->common_names; # @names is empty
+  $human->common_names('woman');
+  @names = $human->common_names; # @names contains woman
+
+  # You can switch to another database when you need more information
+  my $entrez_dbh = new Bio::Db::Taxonomy(-source => 'entrez');
+  $human->db_handle($entrez_dbh);
+  @names = $human->common_names; # @names contains woman, human, man
+
+  # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
+  # methods (and can manually create our own taxa and taxonomy without the use
+  # of any database)
+  my $homo = $human->ancestor;
+
+  # Though be careful with each_Descendent - unless you add_Descendent()
+  # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
+  # does not ask the database for the answer. You can ask the database yourself
+  # using the same method:
+  ($human) = $homo->db_handle->each_Descendent($homo);
+
+  # We can also take advantage of Bio::Tree::Tree* methods:
+  # a) some methods are available with just an empty tree object
+  use Bio::Tree::Tree;
+  my $tree_functions = new Bio::Tree::Tree();
+  my @lineage = $tree_functions->get_lineage_nodes($human);
+  my $lca = $tree_functions->get_lca($human, $mouse);
+
+  # b) for other methods, create a tree using your Taxon object
+  my $tree = new Bio::Tree::Tree(-node => $human);
+  my @taxa = $tree->get_nodes;
+  $homo = $tree->find_node(-rank => 'genus');
+
+  # Normally you can't get the lca of a list-database derived Taxon and an
+  # entrez or flatfile-derived one because the two different databases might
+  # have different roots and different numbers of ranks between the root and the
+  # taxa of interest. To solve this, make a tree of the Taxon with the more
+  # detailed lineage and splice out all the taxa that won't be in the lineage of
+  # your other Taxon:
+  my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
+  my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
+  my $mouse_tree = new Bio::Tree::Tree(-node => $entrez_mouse);
+  $mouse_tree->splice(-keep_rank => \@ranks);
+  $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
+
+=head1 DESCRIPTION
+
+This is the next generation (for Bioperl) of representing Taxonomy
+information. Previously all information was managed by a single
+object called Bio::Species. This new implementation allows
+representation of the intermediate nodes not just the species nodes
+and can relate their connections.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich,    jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
+Juguang Xiao,     juguang at tll.org.sg
+Gabriel Valiente, valiente at lsi.upc.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Taxon;
+use strict;
+
+use Bio::DB::Taxonomy;
+
+use base qw(Bio::Tree::Node Bio::IdentifiableI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Taxonomy::Node();
+ Function: Builds a new Bio::Taxonomy::Node object 
+ Returns : an instance of Bio::Taxonomy::Node
+ Args    : -dbh               => a reference to a Bio::DB::Taxonomy object
+                                 [no default]
+           -name              => a string representing the taxon name
+                                 (scientific name)
+           -id                => human readable id - typically NCBI taxid
+           -ncbi_taxid        => same as -id, but explicitely say that it is an
+                                 NCBI taxid
+           -rank              => node rank (one of 'species', 'genus', etc)
+           -common_names      => array ref of all common names
+           -division          => 'Primates', 'Rodents', etc
+           -genetic_code      => genetic code table number
+           -mito_genetic_code => mitochondrial genetic code table number
+           -create_date       => date created in database
+           -update_date       => date last updated in database
+           -pub_date          => date published in database
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
+        $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
+        $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
+                                            NCBI_TAXID COMMON_NAME COMMON_NAMES
+                                            GENETIC_CODE MITO_GENETIC_CODE
+                                            CREATE_DATE UPDATE_DATE PUB_DATE
+                                            PARENT_ID)], @args);
+    
+    if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
+        $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
+    }
+    elsif(!defined $id) { 
+        $id = $objid || $ncbitaxid;
+    }
+    defined $id && $self->id($id);
+    $self->{_ncbi_tax_id_provided} = 1 if $ncbitaxid;
+    
+    defined $rank && $self->rank($rank);
+    defined $name && $self->node_name($name);
+    
+    my @common_names;
+    if ($commonnames) {
+        $self->throw("-common_names takes only an array reference") unless ref($commonnames) eq 'ARRAY';
+        @common_names = @{$commonnames};
+        if ($commonname) {
+            my %c_names = map { $_ => 1 } @common_names;
+            unless (exists $c_names{$commonname}) {
+                unshift(@common_names, $commonname);
+            }
+        }
+    }
+    @common_names > 0 && $self->common_names(@common_names);
+    
+    defined $gcode      && $self->genetic_code($gcode);
+    defined $mitocode   && $self->mitochondrial_genetic_code($mitocode);
+    defined $createdate && $self->create_date($createdate);
+    defined $updatedate && $self->update_date($updatedate);
+    defined $pubdate    && $self->pub_date($pubdate);
+    defined $div        && $self->division($div);
+    defined $dbh        && $self->db_handle($dbh);
+    
+    # deprecated and will issue a warning when method called,
+    # eventually to be removed completely as option
+    defined $parent_id  && $self->parent_id($parent_id);
+    
+    # some things want to freeze/thaw Bio::Species objects, but
+    # _root_cleanup_methods contains a CODE ref, delete it.
+    delete $self->{_root_cleanup_methods};
+    
+    return $self;
+}
+
+=head1 Bio::IdentifiableI interface 
+
+Also see L<Bio::IdentifiableI>
+
+=head2 version
+
+ Title   : version
+ Usage   : $taxon->version($newval)
+ Returns : value of version (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub version {
+    my $self = shift;
+    return $self->{'version'} = shift if @_;
+    return $self->{'version'};
+}
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $taxon->authority($newval)
+ Returns : value of authority (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub authority {
+    my $self = shift;
+    return $self->{'authority'} = shift if @_;
+    return $self->{'authority'};
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $taxon->namespace($newval)
+ Returns : value of namespace (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub namespace {
+    my $self = shift;
+    return $self->{'namespace'} = shift if @_;
+    return $self->{'namespace'};
+}
+
+=head1 Bio::Taxonomy::Node implementation
+
+=head2 db_handle
+
+ Title   : db_handle
+ Usage   : $taxon->db_handle($newval)
+ Function: Get/Set Bio::DB::Taxonomy Handle
+ Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
+ Args    : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
+
+Also see L<Bio::DB::Taxonomy>
+
+=cut
+
+sub db_handle {
+    my $self = shift;
+    if (@_) {
+        my $db = shift;
+        
+        if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
+            $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
+        }
+        if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
+            my $new_self = $self->_get_similar_taxon_from_db($self, $db);
+            $self->_merge_taxa($new_self) if $new_self;
+        }
+        
+        # NB: The Bio::DB::Taxonomy modules access this data member directly
+        # to avoid calling this method and going infinite
+        $self->{'db_handle'} = $db;
+    }
+    return $self->{'db_handle'};
+}
+
+=head2 rank
+
+ Title   : rank
+ Usage   : $taxon->rank($newval)
+ Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
+ Returns : value of rank (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub rank {
+    my $self = shift;
+    return $self->{'rank'} = shift if @_;
+    return $self->{'rank'};
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $taxon->id($newval)
+ Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
+           ncbi_taxid() are synonyms of this method.
+ Returns : id (a scalar)
+ Args    : none to get, OR scalar to set
+
+=cut
+
+sub id {
+    my $self = shift;
+    return $self->SUPER::id(@_);
+}
+
+*object_id = \&id;
+
+=head2 ncbi_taxid
+
+ Title   : ncbi_taxid
+ Usage   : $taxon->ncbi_taxid($newval)
+ Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
+           returns an id when ncbi_taxid has been explictely set with this
+           method.
+ Returns : id (a scalar)
+ Args    : none to get, OR scalar to set
+
+=cut
+
+sub ncbi_taxid {
+    my ($self, $id) = @_;
+    
+    if ($id) {
+        $self->{_ncbi_tax_id_provided} = 1;
+        return $self->SUPER::id($id);
+    }
+    
+    if ($self->{_ncbi_tax_id_provided}) {
+        return $self->SUPER::id;
+    }
+    return;
+}
+
+=head2 parent_id
+
+ Title   : parent_id
+ Usage   : $taxon->parent_id()
+ Function: Get parent ID, (NCBI Taxonomy ID in most cases);
+           parent_taxon_id() is a synonym of this method.
+ Returns : value of parent_id (a scalar)
+ Args    : none
+ Status  : deprecated
+
+=cut
+
+sub parent_id {
+    my $self = shift;
+    if (@_) {
+        $self->warn("You can no longer set the parent_id - use ancestor() instead");
+    }
+    my $ancestor = $self->ancestor() || return;
+    return $ancestor->id;
+}
+
+*parent_taxon_id = \&parent_id;
+
+=head2 genetic_code
+
+ Title   : genetic_code
+ Usage   : $taxon->genetic_code($newval)
+ Function: Get/set genetic code table
+ Returns : value of genetic_code (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub genetic_code {
+    my $self = shift;
+    return $self->{'genetic_code'} = shift if @_;
+    return $self->{'genetic_code'};
+}
+
+=head2 mitochondrial_genetic_code
+
+ Title   : mitochondrial_genetic_code
+ Usage   : $taxon->mitochondrial_genetic_code($newval)
+ Function: Get/set mitochondrial genetic code table
+ Returns : value of mitochondrial_genetic_code (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub mitochondrial_genetic_code {
+    my $self = shift;
+    return $self->{'mitochondrial_genetic_code'} = shift if @_;
+    return $self->{'mitochondrial_genetic_code'};
+}
+
+=head2 create_date
+
+ Title   : create_date
+ Usage   : $taxon->create_date($newval)
+ Function: Get/Set Date this node was created (in the database)
+ Returns : value of create_date (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub create_date {
+    my $self = shift;
+    return $self->{'create_date'} = shift if @_;
+    return $self->{'create_date'};
+}
+
+=head2 update_date
+
+ Title   : update_date
+ Usage   : $taxon->update_date($newval)
+ Function: Get/Set Date this node was updated (in the database)
+ Returns : value of update_date (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub update_date {
+    my $self = shift;
+    return $self->{'update_date'} = shift if @_;
+    return $self->{'update_date'};
+}
+
+=head2 pub_date
+
+ Title   : pub_date
+ Usage   : $taxon->pub_date($newval)
+ Function: Get/Set Date this node was published (in the database)
+ Returns : value of pub_date (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub pub_date {
+    my $self = shift;
+    return $self->{'pub_date'} = shift if @_;
+    return $self->{'pub_date'};
+}
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $ancestor_taxon = $taxon->ancestor()
+ Function: Retrieve the ancestor taxon. Normally the database is asked what the
+           ancestor is.
+
+           If you manually set the ancestor (or you make a Bio::Tree::Tree with
+           this object as an argument to new()), the database (if any) will not
+           be used for the purposes of this method.
+
+           To restore normal database behaviour, call ancestor(undef) (which
+           would remove this object from the tree), or request this taxon again
+           as a new Taxon object from the database.
+
+ Returns : Bio::Taxon
+ Args    : none
+
+=cut
+
+sub ancestor {
+    my $self = shift;
+    my $ancestor = $self->SUPER::ancestor(@_);
+    my $dbh = $self->db_handle || return $ancestor;
+    
+    if ($ancestor) {
+        return $ancestor;
+    }
+    else {
+        #*** could avoid the db lookup if we knew our current id was definitely
+        #    information from the db...
+        my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
+        return $dbh->ancestor($definitely_from_dbh);
+    }
+}
+
+=head2 get_Parent_Node
+
+ Title   : get_Parent_Node
+ Function: Synonym of ancestor()
+ Status  : deprecated
+
+=cut
+
+sub get_Parent_Node {
+    my $self = shift;
+    $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
+    return $self->ancestor(@_);
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @taxa = $taxon->each_Descendent();
+ Function: Get all the descendents for this Taxon (but not their descendents,
+           ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
+           this method.
+
+           Note that this method never asks the database for the descendents;
+           it will only return objects you have manually set with
+           add_Descendent(), or where this was done for you by making a
+           Bio::Tree::Tree with this object as an argument to new().
+
+           To get the database descendents use
+           $taxon->db_handle->each_Descendent($taxon).
+
+ Returns : Array of Bio::Taxon objects
+ Args    : optionally, when you have set your own descendents, the string
+           "height", "creation", "alpha", "revalpha", or coderef to be used to
+           sort the order of children nodes.
+
+=cut
+
+# implemented by Bio::Tree::Node
+
+=head2 get_Children_Nodes
+
+ Title   : get_Children_Nodes
+ Function: Synonym of each_Descendent()
+ Status  : deprecated
+
+=cut
+
+sub get_Children_Nodes {
+    my $self = shift;
+    $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
+    return $self->each_Descendent(@_);
+}
+
+=head2 name
+
+  Title:    name
+  Usage:    $taxon->name('scientific', 'Homo sapiens');
+            $taxon->name('common', 'human', 'man');
+            my @names = @{$taxon->name('common')};
+  Function: Get/set the names. node_name(), scientific_name() and common_names()
+            are shorthands to name('scientific'), name('scientific') and
+            name('common') respectively.
+  Returns:  names (a array reference)
+  Args:     Arg1 => the name_class. You can assign any text, but the words
+                'scientific' and 'common' have the special meaning, as
+                scientific name and common name, respectively. 'scientific' and
+                'division' are treated specially, allowing only the first value
+                in the Arg2 list to be set.
+            Arg2 .. => list of names
+
+=cut
+
+sub name {
+    my ($self, $name_class, @names) = @_;
+    $self->throw('No name class specified') unless defined $name_class;
+    
+    if (@names) {
+        if ($name_class =~ /scientific|division/i) {
+            delete $self->{'_names_hash'}->{$name_class};
+            @names = (shift(@names));
+        }
+        push @{$self->{'_names_hash'}->{$name_class}}, @names;
+    }
+    return $self->{'_names_hash'}->{$name_class} || return;
+}
+
+=head2 node_name
+
+ Title   : node_name
+ Usage   : $taxon->node_name($newval)
+ Function: Get/set the name of this taxon (node), typically the scientific name
+           of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
+           of this method.
+ Returns : value of node_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub node_name {
+    my $self = shift;
+    my @v = @{$self->name('scientific', @_) || []};
+    return pop @v;
+}
+
+*scientific_name = \&node_name;
+
+=head2 common_names
+
+ Title   : common_names
+ Usage   : $taxon->common_names($newval)
+ Function: Get/add the other names of this taxon, typically the genbank common
+           name and others, eg. 'Human' and 'man'. common_name() is a synonym
+           of this method.
+ Returns : array of names in list context, one of those names in scalar context
+ Args    : on add, new list of names (scalars, optional)
+
+=cut
+
+sub common_names {
+    my $self = shift;
+    my @v = @{$self->name('common', @_) || []};
+    return ( wantarray ) ? @v : pop @v;
+}
+
+*common_name = \&common_names;
+
+=head2 division
+
+ Title   : division
+ Usage   : $taxon->division($newval)
+ Function: Get/set the division this taxon belongs to, eg. 'Primates' or
+           'Bacteria'.
+ Returns : value of division (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub division {
+    my $self = shift;
+    my @v = @{$self->name('division', at _) || []};
+    return pop @v;
+}
+
+# get a node from the database that is like the supplied node
+sub _get_similar_taxon_from_db {
+    #*** not really happy with this having to be called so much; there must be
+    #    a better way...
+    my ($self, $taxon, $db) = @_;
+    $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
+    ($self->id || $self->node_name) || return;
+    $db ||= $self->db_handle || return;
+    
+    my $db_taxon = $db->get_taxon(-taxonid => $taxon->id) if $taxon->id;
+    unless ($db_taxon) {
+        my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
+        
+        my $own_rank = $taxon->rank || 'no rank';
+        foreach my $try_id (@try_ids) {
+            my $try = $db->get_taxon(-taxonid => $try_id);
+            my $try_rank = $try->rank || 'no rank';
+            if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
+                $db_taxon = $try;
+                last;
+            }
+        }
+    }
+    
+    return $db_taxon;
+}
+
+# merge data from supplied Taxon into self
+sub _merge_taxa {
+    my ($self, $taxon) = @_;
+    $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+    return if ($taxon eq $self);
+    
+    foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
+        my $own = $self->$attrib();
+        my $his = $taxon->$attrib();
+        if (!$own && $his) {
+            $self->$attrib($his);
+        }
+    }
+    
+    my $own = $self->rank || 'no rank';
+    my $his = $taxon->rank || 'no rank';
+    if ($own eq 'no rank' && $his ne 'no rank') {
+        $self->rank($his);
+    }
+    
+    my %own_cnames = map { $_ => 1 } $self->common_names;
+    my %his_cnames = map { $_ => 1 } $taxon->common_names;
+    foreach (keys %his_cnames) {
+        unless (exists $own_cnames{$_}) {
+            $self->common_names($_);
+        }
+    }
+    
+    #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
+}
+
+=head2 remove_Descendent
+
+ Title   : remove_Descendent
+ Usage   : $node->remove_Descedent($node_foo);
+ Function: Removes a specific node from being a Descendent of this node
+ Returns : nothing
+ Args    : An array of Bio::Node::NodeI objects which have been previously
+           passed to the add_Descendent call of this object.
+
+=cut
+
+sub remove_Descendent {
+    # need to override this method from Bio::Tree::Node since it casually
+    # throws away nodes if they don't branch
+    my ($self, at nodes) = @_;
+    my $c= 0;
+    foreach my $n ( @nodes ) {
+        if ($self->{'_desc'}->{$n->internal_id}) {
+            $self->{_removing_descendent} = 1;
+            $n->ancestor(undef);
+            $self->{_removing_descendent} = 0;
+            $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
+            delete $self->{'_desc'}->{$n->internal_id};
+            $c++;
+        }
+    }
+    return $c;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Taxon.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/FactoryI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/FactoryI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/FactoryI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,110 @@
+#
+#
+# BioPerl interface of Bio::Taxnomoy::FactoryI
+#
+# Cared for by Juguang Xiao
+#
+# You may distribute this module under the same terms as Perl itself
+#
+# POD documentation - main does before the code
+
+=head1 NAME
+
+Bio::Taxonomy::FactoryI - interface to define how to access NCBI Taxonoy
+
+=head1 DESCRIPTION
+
+NB: This module has been deprecated.
+
+$factory-E<gt>fetch is a general method to fetch Taxonomy by either NCBI
+taxid or any types of names.
+
+$factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
+one-step higher rank of the taxonomy specified as argument.
+
+$factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
+those are one-step lower rank of the taxonomy specified as the
+argument.
+
+=head1 AUTHOR - Juguang Xiao
+
+juguang at tll.org.sg
+
+=head1 CONTRIBUTORS
+
+Additional contributors' names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Taxonomy::FactoryI;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+=head2 fetch
+
+  Title:    fetch
+  Usage:    my $taxonomy = $factory->fetch(-taxon_id => 9605);
+            my $taxonomy = $factory->fetch(-common_name => 'mammals');
+  Fuctnion: Fetch taxonomy by taxon_id, common name or scientific name.
+  Returns:  an instance of Bio::Taxonomy
+  Args:     -taxon_id => NCBI taxonomy ID
+            -common_name => comon name, such as 'human', 'mammals'
+            -scientifc_name => specitic name, such as 'sapiens', 'Mammalia'
+
+=cut
+
+sub fetch {
+    shift->throw_not_implemented;
+}
+
+=head2 fuzzy_fetch
+
+  Title:    fuzzy_fetch
+  Usage:    my @taxonomy = $factory->fuzzy_fetch(-name => 'mouse');
+  Function: Fuzzy fetch by name, or any text information found in DB
+  Returns:  an array reference of Bio::Taxonomy objects
+  Args:     -name => any name, such as common name, variant, scientific name
+            -description, or -desc => any text information
+
+=cut
+
+sub fuzzy_fetch {
+    shift->throw_not_implemented;
+}
+
+=head2 fetch_parent
+
+  Title:    fetch_parent
+  Usage:    my $parent_taxonomy = $factory->fetch_parent($taxonomy);
+  Function: Fetch the parent that is one-rank higher than the argument.
+  Returns:  an instance of Bio::Taxonomy, or undef if the arg is the top one.
+  Args:     a Bio::Taxonomy object.
+
+=cut
+
+sub fetch_parent {
+    shift->throw_not_implemented;
+}
+
+=head2 fetch_children
+
+  Title:    fetch_children
+  Usage:    my @children_taxonomy = $factory->fetch_children($taxonomy);
+  Function: Fetch all children those are one-rank lower than the argument.
+  Returns:  an array reference of Bio::Taxonomy objects
+  Args:     a Bio::Taxonomy object.
+
+=cut
+
+sub fetch_children {
+    shift->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Node.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Node.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Node.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+# $Id: Node.pm,v 1.17.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Taxonomy::Node
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxonomy::Node - A node in a represented taxonomy
+
+=head1 SYNOPSIS
+
+  use Bio::Taxon;
+  # This module has been renamed Bio::Taxon - use that instead
+
+=head1 DESCRIPTION
+
+This module has been renamed Bio::Taxon - use that instead.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Juguang Xiao,     juguang at tll.org.sg
+Gabriel Valiente, valiente at lsi.upc.edu
+Sendu Bala,       bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Taxonomy::Node;
+use strict;
+
+
+use base qw(Bio::Taxon);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->warn("This module has been renamed Bio::Taxon - use that instead");
+    return $self;
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Taxon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Taxon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Taxon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,643 @@
+# $Id: Taxon.pm,v 1.5.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Taxonomy::Taxon
+#
+# Cared for by Dan Kortschak but pilfered extensively from 
+# the Bio::Tree::Node code of Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxonomy::Taxon - Generic Taxonomic Entity object
+
+=head1 SYNOPSIS
+
+    NB: This module is deprecated. Use Bio::Taxon instead.
+
+    use Bio::Taxonomy::Taxon;
+    my $taxonA = new Bio::Taxonomy::Taxon();
+    my $taxonL = new Bio::Taxonomy::Taxon();
+    my $taxonR = new Bio::Taxonomy::Taxon();
+
+    my $taxon = new Bio::Taxonomy::Taxon();
+    $taxon->add_Descendents($taxonL);
+    $taxon->add_Descendents($taxonR);
+
+    my $species = $taxon->species;
+
+=head1 DESCRIPTION
+
+Makes a taxonomic unit suitable for use in a taxonomic tree
+
+=head1 AUTHOR
+
+Dan Kortschak email B<kortschak at rsbs.anu.edu.au>
+
+=head1 CONTRIBUTORS
+
+Sendu Bala: bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# code begins...
+
+package Bio::Taxonomy::Taxon;
+use vars qw($CREATIONORDER);
+use strict;
+
+use Bio::Species;
+
+use base qw(Bio::Root::Root Bio::Tree::NodeI);
+
+BEGIN { 
+    $CREATIONORDER = 0;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Taxonomy::Taxon();
+ Function: Builds a new Bio::Taxonomy::Taxon object
+ Returns : Bio::Taxonomy::Taxon
+ Args    : -descendents   => array pointer to descendents (optional)
+     	   -branch_length => branch length [integer] (optional)
+     	   -taxon     => taxon
+           -id     => unique taxon id for node (from NCBI's list preferably)
+           -rank  => the taxonomic level of the node (also from NCBI)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->warn("Bio::Taxonomy::Taxon is deprecated. Use Bio::Taxon instead.");
+  
+  my ($children,$branchlen,$id,$taxon,$rank,$desc) = 
+
+                      $self->_rearrange([qw(DESCENDENTS
+                                            BRANCH_LENGTH
+                                            ID
+                                            TAXON
+                                            RANK
+                                            DESC)], @args);
+  
+  $self->{'_desc'} = {};
+  defined $desc && $self->description($desc);
+  defined $taxon && $self->taxon($taxon);
+  defined $id && $self->id($id);
+  defined $branchlen && $self->branch_length($branchlen);
+  defined $rank && $self->rank($rank);
+
+  if( defined $children ) {
+      if( ref($children) !~ /ARRAY/i ) {
+	  $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents");
+      }
+      foreach my $c ( @$children ) { 	
+ 	  $self->add_Descendent($c);
+      }
+  }
+  $self->_creation_id($CREATIONORDER++);
+  return $self;
+}
+
+=head2 add_Descendent
+
+ Title   : add_Descendent
+ Usage   : $taxon->add_Descendant($taxon);
+ Function: Adds a descendent to a taxon
+ Returns : number of current descendents for this taxon
+ Args    : Bio::Taxonomy::Taxon
+           boolean flag, true if you want to ignore the fact that you are
+           adding a second node with the same unique id (typically memory 
+           location reference in this implementation).  default is false and 
+           will throw an error if you try and overwrite an existing node.
+
+=cut
+
+sub add_Descendent{
+
+   my ($self,$node,$ignoreoverwrite) = @_;
+
+   return -1 if( ! defined $node ) ;
+   if( ! $node->isa('Bio::Taxonomy::Taxon') ) {
+       $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon");
+       return -1;
+   }
+   # do we care about order?
+   $node->{'_ancestor'} = $self;
+   if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
+       $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
+   }
+   
+   $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
+   
+   $self->invalidate_height();
+   
+   return scalar keys %{$self->{'_desc'}};
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent($sortby)
+ Usage   : my @taxa = $taxon->each_Descendent;
+ Function: all the descendents for this taxon (but not their descendents
+					      i.e. not a recursive fetchall)
+ Returns : Array of Bio::Taxonomy::Taxon objects
+ Args    : $sortby [optional] "height", "creation" or coderef to be used
+           to sort the order of children taxa.
+
+=cut
+
+sub each_Descendent{
+   my ($self, $sortby) = @_;
+
+   # order can be based on branch length (and sub branchlength)
+
+   $sortby ||= 'height';
+
+   if (ref $sortby eq 'CODE') {
+       my @values = sort $sortby values %{$self->{'_desc'}};
+       return @values;
+   } else  {
+       if ($sortby eq 'height') {
+	   return map { $_->[0] }
+		  sort { $a->[1] <=> $b->[1] || 
+			 $a->[2] <=> $b->[2] } 
+	       map { [$_, $_->height, $_->internal_id ] } 
+	   values %{$self->{'_desc'}};
+       } else {
+	   return map { $_->[0] }
+	          sort { $a->[1] <=> $b->[1] } 
+	          map { [$_, $_->height ] }
+	          values %{$self->{'_desc'}};	   
+       }
+   }
+}
+
+=head2 remove_Descendent
+
+ Title   : remove_Descendent
+ Usage   : $taxon->remove_Descedent($taxon_foo);
+ Function: Removes a specific taxon from being a Descendent of this taxon
+ Returns : nothing
+ Args    : An array of Bio::taxonomy::Taxon objects which have be previously
+           passed to the add_Descendent call of this object.
+
+=cut
+
+sub remove_Descendent{
+   my ($self, at nodes) = @_;
+   foreach my $n ( @nodes ) { 
+       if( $self->{'_desc'}->{$n->internal_id} ) {
+	   $n->{'_ancestor'} = undef;
+	   $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef;
+	   delete $self->{'_desc'}->{$n->internal_id};
+	   
+       } else { 
+	   $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self));
+	   $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
+       }
+   }
+   1;
+}
+
+=head2 remove_all_Descendents
+
+ Title   : remove_all_Descendents
+ Usage   : $taxon->remove_All_Descendents()
+ Function: Cleanup the taxon's reference to descendents and reset
+           their ancestor pointers to undef, if you don't have a reference
+           to these objects after this call they will be cleanedup - so
+           a get_nodes from the Tree object would be a safe thing to do first
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub remove_all_Descendents{
+   my ($self) = @_;
+   # this won't cleanup the taxa themselves if you also have
+   # a copy/pointer of them (I think)...
+   while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
+       $val->{'_ancestor'} = undef;
+   }
+   $self->{'_desc'} = {};
+   1;
+}
+
+=head2 get_Descendents
+
+ Title   : get_Descendents
+ Usage   : my @taxa = $taxon->get_Descendents;
+ Function: Recursively fetch all the taxa and their descendents
+           *NOTE* This is different from each_Descendent
+ Returns : Array or Bio::Taxonomy::Taxon objects
+ Args    : none
+
+=cut
+
+# implemented in the interface 
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : $taxon->ancestor($newval)
+ Function: Set the Ancestor
+ Returns : value of ancestor
+ Args    : newvalue (optional)
+
+=cut
+
+sub ancestor {
+   my ($self, $value) = @_;
+   if (defined $value) {
+       $self->{'_ancestor'} = $value;
+   }
+   return $self->{'_ancestor'};
+}
+
+=head2 branch_length
+
+ Title   : branch_length
+ Usage   : $obj->branch_length($newval)
+ Function:
+ Example :
+ Returns : value of branch_length
+ Args    : newvalue (optional)
+
+=cut
+
+sub branch_length {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'branch_length'} = $value;
+    }
+    return $self->{'branch_length'};
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function:
+ Returns : value of description
+ Args    : newvalue (optional)
+
+=cut
+
+sub description {
+   my ($self,$value) = @_;
+   if( defined $value  ) {
+       $self->{'_description'} = $value;
+   }
+   return $self->{'_description'};
+}
+
+=head2 rank
+
+ Title   : rank
+ Usage   : $obj->rank($newval)
+ Function: Set the taxonomic rank
+ Returns : taxonomic rank of taxon
+ Args    : newvalue (optional)
+
+=cut
+
+sub rank {
+   my ($self,$value) = @_;
+   if (defined $value) {
+      $self->{'_rank'} = $value;
+   }
+   return $self->{'_rank'};
+}
+
+=head2 taxon
+
+ Title   : taxon
+ Usage   : $obj->taxon($newtaxon)
+ Function: Set the name of the taxon
+ Example :
+ Returns : name of taxon
+ Args    : newtaxon (optional)
+
+=cut
+
+# because internal taxa have names too...
+sub taxon {
+   my ($self,$value) = @_;
+   if( defined $value  ) {
+       $self->{'_taxon'} = $value;
+   }
+   return $self->{'_taxon'};
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id($newval)
+ Function:
+ Example :
+ Returns : value of id
+ Args    : newvalue (optional)
+
+=cut
+
+sub id {
+   my ($self,$value) = @_;
+   if( defined $value ) {
+       $self->{'_id'} = $value;
+   }
+   return $self->{'_id'};
+}
+
+sub DESTROY {
+    my ($self) = @_;
+    # try to insure that everything is cleaned up
+    $self->SUPER::DESTROY();
+    if( defined $self->{'_desc'} &&
+	ref($self->{'_desc'}) =~ /ARRAY/i ) {
+	while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
+	    $node->{'_ancestor'} = undef; # ensure no circular references
+	    $node->DESTROY();
+	    $node = undef;
+	}
+	$self->{'_desc'} = {};
+    }
+}
+
+=head2 internal_id
+
+ Title   : internal_id
+ Usage   : my $internalid = $taxon->internal_id
+ Function: Returns the internal unique id for this taxon
+           (a monotonically increasing number for this in-memory implementation
+            but could be a database determined unique id in other 
+	    implementations)
+ Returns : unique id
+ Args    : none
+
+=cut
+
+sub internal_id {
+   return $_[0]->_creation_id;
+}
+
+=head2 _creation_id
+
+ Title   : _creation_id
+ Usage   : $obj->_creation_id($newval)
+ Function: a private method signifying the internal creation order
+ Returns : value of _creation_id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub _creation_id {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_creation_id'} = $value;
+    }
+    return $self->{'_creation_id'} || 0;
+}
+
+# The following methods are implemented by NodeI decorated interface
+
+=head2 is_Leaf
+
+ Title   : is_Leaf
+ Usage   : if( $node->is_Leaf )
+ Function: Get Leaf status
+ Returns : boolean
+ Args    : none
+
+=cut
+
+sub is_Leaf {
+    my ($self) = @_;
+    my $rc = 0;
+    $rc = 1 if( ! defined $self->{'_desc'} ||	
+		keys %{$self->{'_desc'}} == 0);
+    return $rc;
+}
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : my $str = $taxon->to_string()
+ Function: For debugging, provide a taxon as a string
+ Returns : string
+ Args    : none
+
+=cut
+
+=head2 height
+
+ Title   : height
+ Usage   : my $len = $taxon->height
+ Function: Returns the height of the tree starting at this
+           taxon.  Height is the maximum branchlength.
+ Returns : The longest length (weighting branches with branch_length) to a leaf
+ Args    : none
+
+=cut
+
+sub height { 
+    my ($self) = @_;
+
+    return $self->{'_height'} if( defined $self->{'_height'} );
+    
+    if( $self->is_Leaf ) { 
+      if( !defined $self->branch_length ) { 
+	      $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' ));
+	      return 0;
+      }
+      return $self->branch_length;
+   }
+   my $max = 0;
+   foreach my $subnode ( $self->each_Descendent ) { 
+       my $s = $subnode->height;
+       if( $s > $max ) { $max = $s; }
+   }
+   return ($self->{'_height'} = $max + ($self->branch_length || 1));
+}
+
+=head2 invalidate_height
+
+ Title   : invalidate_height
+ Usage   : private helper method
+ Function: Invalidate our cached value of the taxon's height in the tree
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub invalidate_height { 
+    my ($self) = @_;
+    
+    $self->{'_height'} = undef;
+    if( $self->ancestor ) {
+	    $self->ancestor->invalidate_height;
+    }
+}
+
+=head2 classify
+
+ Title   : classify
+ Usage   : @obj->classify()
+ Function: a method to return the classification of a species
+ Returns : name of taxon and ancestor's taxon recursively
+ Args    : boolean to specify whether we want all taxa not just ranked 
+           levels
+
+=cut
+
+sub classify {
+   my ($self,$allnodes) = @_;
+
+   my @classification=($self->taxon);
+   my $node=$self;
+
+   while (defined $node->ancestor) {
+      push @classification, $node->ancestor->taxon if $allnodes==1;
+      $node=$node->ancestor;
+   }
+
+   return (@classification);
+}
+
+=head2 has_rank
+
+ Title   : has_rank
+ Usage   : $obj->has_rank($rank)
+ Function: a method to query ancestors' rank
+ Returns : boolean
+ Args    : $rank
+
+=cut
+
+sub has_rank {
+   my ($self,$rank) = @_;
+
+   return $self if $self->rank eq $rank;
+
+   while (defined $self->ancestor) {
+      return $self if $self->ancestor->rank eq $rank;
+      $self=$self->ancestor;
+   }
+
+   return;
+}
+
+=head2 has_taxon
+
+ Title   : has_taxon
+ Usage   : $obj->has_taxon($taxon)
+ Function: a method to query ancestors' taxa
+ Returns : boolean
+ Args    : Bio::Taxonomy::Taxon object
+
+=cut
+
+sub has_taxon {
+   my ($self,$taxon) = @_;
+
+   return $self if 
+      ((defined $self->id && $self->id == $taxon->id) ||
+      ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank));
+
+   while (defined $self->ancestor) {
+      return $self if 
+         ((defined $self->id && $self->id == $taxon->id) ||
+         ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) &&
+         ($self->taxon ne 'no rank'));
+      $self=$self->ancestor;
+   }
+
+   return;
+}
+
+=head2 distance_to_root
+
+ Title   : distance_to_root
+ Usage   : $obj->distance_to_root
+ Function: a method to query ancestors' taxa
+ Returns : number of links to root
+ Args    :
+
+=cut
+
+sub distance_to_root {
+   my ($self,$taxon) = @_;
+
+   my $count=0;
+
+   while (defined $self->ancestor) {
+      $count++;
+      $self=$self->ancestor;
+   }
+
+   return $count;
+}
+
+=head2 recent_common_ancestor
+
+ Title   : recent_common_ancestor
+ Usage   : $obj->recent_common_ancestor($taxon)
+ Function: a method to query find common ancestors
+ Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank
+ Args    : Bio::Taxonomy::Taxon
+
+=cut
+
+sub recent_common_ancestor {
+   my ($self,$node) = @_;
+
+   while (defined $node->ancestor) {
+      my $common=$self->has_taxon($node);
+      return $common if defined $common;
+      $node=$node->ancestor;
+   }
+
+   return;
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $obj=$taxon->species;
+ Function: Returns a Bio::Species object reflecting the taxon's tree position
+ Returns : a Bio::Species object
+ Args    : none
+
+=cut
+
+sub species {
+   my ($self) = @_;
+   my $species;
+
+   if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') {
+      $species = Bio::Species->new(-classification => $self->ancestor->classify);
+      $species->genus($self->ancestor->ancestor->taxon);
+      $species->species($self->ancestor->taxon);
+      $species->sub_species($self->taxon);
+   } elsif ($self->has_rank('species')) {
+      $species = Bio::Species->new(-classification => $self->classify);
+      $species->genus($self->ancestor->taxon);
+      $species->species($self->taxon);
+   } else {
+      $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n");
+   }
+   return $species;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Tree.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Tree.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy/Tree.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,450 @@
+# $Id: Tree.pm,v 1.3.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Taxonomy::Tree
+#
+# Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface.
+
+=head1 SYNOPSIS
+
+    NB: This module is deprecated. Use Bio::Taxon in combination with
+    Bio::Tree::Tree instead
+
+    # like from a TreeIO
+    my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
+    my $tree = $treeio->next_tree;
+    my @nodes = $tree->get_nodes;
+    my $root = $tree->get_root_node;
+    my @leaves = $tree->get_leaves;
+
+
+=head1 DESCRIPTION
+
+This object holds handles to Taxonomic Nodes which make up a tree.
+
+=head1 EXAMPLES
+
+  use Bio::Species;
+  use Bio::Taxonomy::Tree;
+
+  my $human=new Bio::Species;
+  my $chimp=new Bio::Species;
+  my $bonobo=new Bio::Species;
+
+  $human->classification(qw( sapiens Homo Hominidae
+                             Catarrhini Primates Eutheria
+                             Mammalia Euteleostomi Vertebrata 
+                             Craniata Chordata
+                             Metazoa Eukaryota ));
+  $chimp->classification(qw( troglodytes Pan Hominidae
+                             Catarrhini Primates Eutheria
+                             Mammalia Euteleostomi Vertebrata 
+                             Craniata Chordata
+                             Metazoa Eukaryota ));
+  $bonobo->classification(qw( paniscus Pan Hominidae
+                              Catarrhini Primates Eutheria
+                              Mammalia Euteleostomi Vertebrata 
+                              Craniata Chordata
+                              Metazoa Eukaryota ));
+
+  # ranks passed to $taxonomy match ranks of species
+  my @ranks = ('superkingdom','kingdom','phylum','subphylum',
+               'no rank 1','no rank 2','class','no rank 3','order',
+               'suborder','family','genus','species');
+
+  my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
+                                 -method => 'trust',
+                                 -order => -1);
+
+
+  my $tree1=new Bio::Taxonomy::Tree;
+  my $tree2=new Bio::Taxonomy::Tree;
+
+  $tree1->make_species_branch($human,$taxonomy);
+  $tree2->make_species_branch($chimp,$taxonomy);
+
+  my ($homo_sapiens)=$tree1->get_leaves;
+
+  $tree1->splice($tree2);
+
+  $tree1->add_species($bonobo,$taxonomy);
+
+  my @taxa;
+  foreach my $leaf ($tree1->get_leaves) {
+     push @taxa,$leaf->taxon;
+  }
+  print join(", ", at taxa)."\n";
+
+  @taxa=();
+  $tree1->remove_branch($homo_sapiens);
+  foreach my $leaf ($tree1->get_leaves) {
+     push @taxa,$leaf->taxon;
+  }
+  print join(", ", at taxa)."\n";
+
+=head1 FEEDBACK
+
+See AUTHOR
+
+=head1 AUTHOR - Dan Kortschak
+
+Email kortschak at rsbs.anu.edu.au
+
+=head1 CONTRIBUTORS
+
+Mainly Jason Stajich
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Code begins...
+
+
+package Bio::Taxonomy::Tree;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Taxonomy::Taxon;
+
+# Import rank information from Bio::Taxonomy.pm
+use vars qw(@RANK %RANK);
+
+use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Taxonomy::Tree();
+ Function: Builds a new Bio::Taxonomy::Tree object 
+ Returns : Bio::Taxonomy::Tree
+ Args    : 
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  
+  my $self = $class->SUPER::new(@args);
+  $self->warn("Bio::Taxonomy::Tree is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
+  
+  $self->{'_rootnode'} = undef;
+  $self->{'_maxbranchlen'} = 0;
+
+  my ($root)= $self->_rearrange([qw(ROOT)], @args);
+  if( $root ) { $self->set_root_node($root); }
+  return $self;
+}
+
+
+=head2 get_nodes
+
+ Title   : get_nodes
+ Usage   : my @nodes = $tree->get_nodes()
+ Function: Return list of Bio::Taxonomy::Taxon objects
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : (named values) hash with one value 
+           order => 'b|breadth' first order or 'd|depth' first order
+
+=cut
+
+sub get_nodes{
+   my ($self, @args) = @_;
+   
+   my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)], at args);
+   $order ||= 'depth';
+   $sortby ||= 'height';
+
+   if ($order =~ m/^b|(breadth)$/oi) {
+      my $node = $self->get_root_node;
+      my @children = ($node);
+      for (@children) {
+	   push @children, $_->each_Descendent($sortby);
+      }
+      return @children;
+   }
+
+   if ($order =~ m/^d|(depth)$/oi) {
+       # this is depth-first search I believe
+       my $node = $self->get_root_node;
+       my @children = ($node,$node->get_Descendents($sortby));
+       return @children;
+   }
+}
+
+=head2 get_root_node
+
+ Title   : get_root_node
+ Usage   : my $node = $tree->get_root_node();
+ Function: Get the Top Node in the tree, in this implementation
+           Trees only have one top node.
+ Returns : Bio::Taxonomy::Taxon object
+ Args    : none
+
+=cut
+
+
+sub get_root_node{
+   my ($self) = @_;
+   return $self->{'_rootnode'};
+}
+
+=head2 set_root_node
+
+ Title   : set_root_node
+ Usage   : $tree->set_root_node($node)
+ Function: Set the Root Node for the Tree
+ Returns : Bio::Taxonomy::Taxon
+ Args    : Bio::Taxonomy::Taxon
+
+=cut
+
+
+sub set_root_node{
+   my ($self,$value) = @_;
+   if( defined $value ) { 
+      if( ! $value->isa('Bio::Taxonomy::Taxon') ) { 
+	   $self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon");
+	   return $self->get_root_node;
+      }
+      $self->{'_rootnode'} = $value;
+   }
+   return $self->get_root_node;
+}
+
+
+=head2 get_leaves
+
+ Title   : get_leaves
+ Usage   : my @nodes = $tree->get_leaves()
+ Function: Return list of Bio::Taxonomy::Taxon objects
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : 
+
+=cut
+
+
+sub get_leaves{
+   my ($self) = @_;
+   
+   my $node = $self->get_root_node;
+   my @leaves;
+   my @children = ($node);
+   for (@children) {
+      push @children, $_->each_Descendent();
+   }
+   for (@children) {
+      push @leaves, $_ if $_->is_Leaf;
+   }
+   return @leaves;
+}
+
+=head2 make_species_branch
+
+ Title   : make_species_branch
+ Usage   : @nodes = $tree->make_species_branch($species,$taxonomy)
+ Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object
+ Returns : array of Bio::Taxonomy::Taxon objects
+ Args    : Bio::Species and Bio::Taxonomy objects
+
+=cut
+
+# I'm not happy that make_species_branch and make_branch are seperate routines
+# should be able to just make_branch and have it sort things out
+
+sub make_species_branch{
+   my ($self,$species,$taxonomy) = @_;
+   
+   if (! $species->isa('Bio::Species') ) {
+      $self->throw("Trying to classify $species which is not a Bio::Species object");
+   }
+   if (! $taxonomy->isa('Bio::Taxonomy') ) {
+      $self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object");
+   }
+
+   # this is done to make sure we aren't duplicating a path (let God sort them out)
+   if (defined $self->get_root_node) {
+      $self->get_root_node->remove_all_Descendents;
+   }
+
+   my @nodes;
+
+   # nb taxa in [i][0] and ranks in [i][1]
+   my @taxa=$taxonomy->classify($species);
+
+   for (my $i = 0; $i < @taxa; $i++) {
+      $nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0],
+                                           -rank  => $taxa[$i][1]);
+   }
+
+   for (my $i = 0; $i < @taxa-1; $i++) {
+      $nodes[$i]->add_Descendent($nodes[$i+1]);
+   }
+
+   $self->set_root_node($nodes[0]);
+
+   return @nodes;
+}
+
+
+=head2 make_branch
+
+ Title   : make_branch
+ Usage   : $tree->make_branch($node)
+ Function: Make a linear Bio::Taxonomy::Tree object from a leafish node
+ Returns :
+ Args    : Bio::Taxonomy::Taxon object
+
+=cut
+
+
+sub make_branch{
+   my ($self,$node) = @_;
+
+   # this is done to make sure we aren't duplicating a path (let God sort them out)
+   # note that if you are using a linked set of node which include node 
+   # already in the tree, this will break
+   $self->get_root_node->remove_all_Descendents;
+   
+   while (defined $node->ancestor) {
+      $self->set_root_node($node);
+      $node=$node->ancestor;
+   }
+}
+
+
+=head2 splice
+
+ Title   : splice
+ Usage   : @nodes = $tree->splice($tree)
+ Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two
+ Returns : array of Bio::Taxonomy::Taxon added to tree
+ Args    : Bio::Taxonomy::Tree object
+
+=cut
+
+
+sub splice{
+   my ($self,$tree) = @_;
+
+   my @nodes;
+
+   my @newleaves = $tree->get_leaves;
+   foreach my $leaf (@newleaves) {
+      push @nodes,$self->add_branch($leaf);
+   }
+
+   return @nodes;
+}
+
+=head2 add_species
+
+ Title   : add_species
+ Usage   : @nodes = $tree->add_species($species,$taxonomy)
+ Function: Return a of Bio::Taxonomy::Tree object with a new species added
+ Returns : array of Bio::Taxonomy::Taxon added to tree
+ Args    : Bio::Species object
+
+=cut
+
+
+sub add_species{
+   my ($self,$species,$taxonomy) = @_;
+
+   my $branch=Bio::Taxonomy::Tree->new;
+   my @nodes=$branch->make_species_branch($species,$taxonomy);
+
+   my ($newleaf)=$branch->get_leaves;
+  
+   return $self->add_branch($newleaf);
+}
+
+=head2 add_branch
+
+ Title   : add_branch
+ Usage   : $tree->add_branch($node,boolean)
+ Function: Return a of Bio::Taxonomy::Tree object with a new branch added
+ Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree
+ Args    : Bio::Taxonomy::Taxon object
+           boolean flag to force overwrite of descendent
+             (see Bio::Node->add_Descendent)
+
+=cut
+
+
+sub add_branch {
+   my ($self,$node,$force) = @_;
+
+   my $best_node_level=0;
+   my ($best_node, at nodes,$common);
+
+   my @leaves=$self->get_leaves;
+   foreach my $leaf (@leaves) {
+      $common=$node->recent_common_ancestor($leaf); # the root of the part to add
+      if (defined $common && ($common->distance_to_root > $best_node_level)) {
+         $best_node_level = $common->distance_to_root;
+         $best_node = $common;
+      }
+   }
+
+   return unless defined $best_node;
+
+   push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents);
+   foreach my $node (@nodes) {
+      if ((defined $best_node->id && $best_node->id == $node->id) ||
+         ($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) &&
+         ($best_node->rank ne 'no rank')) {
+         foreach my $descendent ($common->each_Descendent) {
+            $node->add_Descendent($descendent,$force);
+         }
+      }
+
+      $self->set_root_node($node) if $node->distance_to_root==0;
+   }
+
+   return ($common->get_Descendents);
+}
+
+=head2 remove_branch
+
+ Title   : remove_branch
+ Usage   : $tree->remove_branch($node)
+ Function: remove a branch up to the next multifurcation
+ Returns :
+ Args    : Bio::Taxonomy::Taxon object
+
+=cut
+
+
+sub remove_branch{
+   my ($self,$node) = @_;
+
+   # we can define a branch at any point along it
+   
+   while (defined $node->ancestor) {
+      last if $node->ancestor->each_Descendent > 1;
+      $node=$node->ancestor;
+   }
+   $node->remove_all_Descendents; # I'm not sure if this is necessary,
+                                  # but I don't see that remove_Descendent
+                                  # has the side effect of deleting
+                                  # descendent nodes of the deletee
+   $node->ancestor->remove_Descendent($node);
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Taxonomy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,434 @@
+# $Id: Taxonomy.pm,v 1.8.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::Taxonomy
+#
+# Cared for by Juguang Xiao
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Taxonomy - representing Taxonomy.
+
+=head1 SYNOPSIS
+
+  NB: This module is deprecated. Use Bio::Taxon in combination with
+  Bio::Tree::Tree methods instead.
+
+  use Bio::Taxonomy;
+
+  # CREATION: You can either create an instance by assigning it,
+  # or fetch it through factory.
+
+  # Create the nodes first. See Bio::Taxonomy::Node for details.
+  my $node_species_sapiens = Bio::Taxonomy::Node->new(
+      -object_id => 9606, # or -ncbi_taxid. Requird tag
+      -names => {
+          'scientific' => ['sapiens'],
+          'common_name' => ['human']
+      },
+      -rank => 'species'  # Required tag
+  );
+  my $node_genus_Homo = Bio::Taxonomy::Node->new(
+      -object_id => 9605,
+      -names => { 'scientific' => ['Homo'] },
+      -rank => 'genus'
+  );
+  my $node_class_Mammalia = Bio::Taxonomy::Node->new(
+      -object_id => 40674,
+      -names => {
+          'scientific' => ['Mammalia'],
+          'common' => ['mammals']
+      },
+      -rank => 'class'
+  );
+  my $taxonomy = Bio::Taxonomy->new;
+  $taxonomy->add_node($node_class_Mammalia);
+  $taxonomy->add_node($node_species_sapiens);
+  $taxonomy->add_node($node_genus_Homo);
+
+  # OR you can fetch it through a factory implementing
+  # Bio::Taxonomy::FactoryI
+  my $factory;
+
+  my $taxonomy = $factory->fetch_by_ncbi_taxid(40674);
+
+  # USAGE
+
+  # In this case, binomial returns a defined value.
+  my $binomial = $taxonomy->binomial;
+
+  # 'common_names' refers to the lowest-rank node's common names, in
+  # array.
+  my @common_names = $taxonomy->common_names;
+
+  # 'get_node', will return undef if the rank is no defined in
+  # taxonomy object.  It will throw error if the rank string is not
+  # defined, say 'species lah'.
+  my $node = $taxonomy->get_node('class');
+  my @nodes = $taxonomy->get_all_nodes;
+
+  # Also, you can search for parent and children nodes, if taxonomy
+  # comes with factory.
+
+  my $parent_taxonomy = $taxonomy->get_parent
+
+=head1 DESCRIPTION
+
+Bio::Taxonomy object represents any rank-level in taxonomy system,
+rather than Bio::Species which is able to represent only
+species-level.
+
+There are two ways to create Taxonomy object, e.g.
+1) instantiate an object and assign all nodes on your own code; and
+2) fetch an object by factory.
+
+=head2 Creation by instantiation
+
+The abstraction of Taxonomy is actually a hash in data structure
+term. The keys of the hash are the rank names, such as 'genus' and
+'species', and the values are the instances of Bio::Taxonomy::Node.
+
+=head2 Creation by Factory fetching
+
+NCBI Taxonomy system is well accepted as the standard. The Taxonomy
+Factories in bioperl access this system, through HTTP to NCBI Entrez,
+dump file, and advanced biosql database.
+
+Bio::Taxonomy::FactoryI defines all methods that all implementations
+must obey.
+
+$factory-E<gt>fetch is a general method to fetch Taxonomy by either
+NCBI taxid or any types of names.
+
+$factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
+one-step higher rank of the taxonomy specified as argument.
+
+$factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
+those are one-step lower rank of the taxonomy specified as the
+argument.
+
+=head2 Usage of Taxonomy object
+
+##
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 CONTACT
+
+Juguang Xiao, juguang at tll.org.sg
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# code begins...
+
+
+package Bio::Taxonomy;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Taxonomy();
+ Function: Builds a new Bio::Taxonomy object
+ Returns : Bio::Taxonomy
+ Args    : -method  -> method used to decide classification
+                       (none|trust|lookup)
+           -ranks   -> what ranks are there
+
+=cut
+
+
+sub new {
+   my ($class, at args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+   $self->warn("Bio::Taxonomy is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
+
+   $self->{'_method'}='none';
+   $self->{'_ranks'}=[];
+   $self->{'_rank_hash'}={};
+    $self->{_hierarchy} = {}; # used to store the nodes, with ranks as keys.
+   my ($method,$ranks,$order) = $self->_rearrange([qw(METHOD RANKS ORDER)], @args);
+
+   if ($method) {
+      $self->method($method);
+   }
+
+   if (defined $ranks &&
+      (ref($ranks) eq "ARRAY") ) {
+      $self->ranks(@$ranks);
+   } else {
+      # default ranks
+      # I think these are in the right order, but not sure:
+      # some parvorder|suborder and varietas|subspecies seem
+      # to be at the same level - any taxonomists?
+      # I don't expect that these will actually be used except as a way
+      # to find what ranks there are in taxonomic use
+      $self->ranks(('root',
+        'superkingdom', 'kingdom',
+        'superphylum', 'phylum', 'subphylum',
+        'superclass', 'class', 'subclass', 'infraclass',
+        'superorder', 'order', 'suborder', 'parvorder', 'infraorder',
+        'superfamily', 'family', 'subfamily',
+        'tribe', 'subtribe',
+        'genus', 'subgenus',
+        'species group', 'species subgroup', 'species', 'subspecies',
+        'varietas', 'forma', 'no rank'));
+   }
+
+   return $self;
+}
+
+
+=head2 method
+
+ Title   : method
+ Usage   : $obj = taxonomy->method($method);
+ Function: set or return the method used to decide classification
+ Returns : $obj
+ Args    : $obj
+
+=cut
+
+
+sub method {
+   my ($self,$value) = @_;
+   if (defined $value && $value=~/none|trust|lookup/) {
+       $self->{'_method'} = $value;
+   }
+   return $self->{'_method'};
+}
+
+
+=head2 classify
+
+ Title   : classify
+ Usage   : @obj[][0-1] = taxonomy->classify($species);
+ Function: return a ranked classification
+ Returns : @obj of taxa and ranks as word pairs separated by "@"
+ Args    : Bio::Species object
+
+=cut
+
+
+sub classify {
+   my ($self,$value) = @_;
+   my @ranks;
+
+   if (! $value->isa('Bio::Species') ) {
+      $self->throw("Trying to classify $value which is not a Bio::Species object");
+   }
+
+   my @classes=reverse($value->classification);
+
+   if ($self->method eq 'none') {
+      for (my $i=0; $i < @classes-2; $i++) {
+         ($ranks[$i][0],$ranks[$i][1])=($classes[$i],'no rank');
+      }
+      push @ranks,[$classes[-2],'genus'];
+      push @ranks,[$value->binomial,'species'];
+   } elsif ($self->method eq 'trust') {
+      if (scalar(@classes)==scalar($self->ranks)) {
+         for (my $i=0; $i < @classes; $i++) {
+            if ($self->rank_of_number($i) eq 'species') {
+               push @ranks,[$value->binomial,$self->rank_of_number($i)];
+            } else {
+               push @ranks,[$classes[$i],$self->rank_of_number($i)];
+            }
+         }
+      } else {
+         $self->throw("Species object and taxonomy object cannot be reconciled");
+      }
+   } elsif ($self->method eq 'lookup') {
+      # this will lookup a DB for the rank of a taxon name
+      # I imagine that some kind of Bio::DB class will be need to
+      # be given to the taxonomy object to act as an DB interface
+      # (I'm not sure how useful this is though - if you have a DB of
+      # taxonomy - why would you be doing things this way?)
+      $self->throw_not_implemented();
+   }
+
+   return @ranks;
+}
+
+
+=head2 level_of_rank
+
+ Title   : level_of_rank
+ Usage   : $obj = taxonomy->level_of_rank($obj);
+ Function: returns the level of a rank name
+ Returns : $obj
+ Args    : $obj
+
+=cut
+
+
+sub level_of {
+   my ($self,$value) = @_;
+
+   return $self->{'_rank_hash'}{$value};
+}
+
+
+=head2 rank_of_number
+
+ Title   : rank_of_number
+ Usage   : $obj = taxonomy->rank_of_number($obj);
+ Function: returns the rank name of a rank level
+ Returns : $obj
+ Args    : $obj
+
+=cut
+
+
+sub rank_of_number {
+   my ($self,$value) = @_;
+
+   return ${$self->{'_ranks'}}[$value];
+}
+
+
+=head2 ranks
+
+ Title   : ranks
+ Usage   : @obj = taxonomy->ranks(@obj);
+ Function: set or return all ranks
+ Returns : @obj
+ Args    : @obj
+
+=cut
+
+
+sub ranks {
+   my ($self, at value) = @_;
+
+   # currently this makes no uniqueness sanity check (this should be done)
+   # I am think that adding a way of converting multiple 'no rank' ranks
+   # to unique 'no rank #' ranks so that the level of a 'no rank' is
+   # abstracted way from the user - I'm not sure of the value of this
+
+   if (@value) {
+      $self->{'_ranks'}=\@value;
+   }
+
+   for (my $i=0; $i <= @{$self->{'_ranks'}}-1; $i++) {
+      $self->{'_rank_hash'}{$self->{'_ranks'}[$i]}=$i unless $self->{'_ranks'}[$i] eq 'no rank';
+   }
+
+   return @{$self->{'_ranks'}};
+}
+
+=head2 add_node
+
+  Title:    add_node
+  Usage:    $obj->add_node($node[, $node2, ...]);
+  Function: add one or more Bio::Taxonomy::Node objects
+  Returns:  None
+  Args:     any number of Bio::Taxonomy::Node(s)
+
+=cut
+
+sub add_node {
+    my ($self, @nodes) = @_;
+    foreach(@nodes){
+        $self->throw("A Bio::Taxonomy::Node object needed")
+            unless($_->isa('Bio::Taxonomy::Node'));
+        my ($node, $rank) = ($_, $_->rank);
+        if(exists $self->{_hierarchy}->{$rank}){
+#            $self->throw("$rank has been defined");
+#            print STDERR "RANK:$rank\n";
+#            return;
+        }
+        $self->{_hierarchy}->{$rank} = $node;
+    }
+}
+
+=head2 binomial
+
+  Title   : binomial
+  Usage   : my $val = $obj->binomial;
+  Function: returns the binomial name if this taxonomy reachs species level
+  Returns : the binomial name
+            OR undef if taxonmy does not reach species level
+  Args    : [No arguments]
+
+=cut
+
+sub binomial {
+    my $self = shift;
+    return $self->get_node('species')->scientific_name;
+    my $genus = $self->get_node('genus');
+    my $species = $self->get_node('species');
+    return ($species && $genus) ? "$species $genus" : undef;
+}
+
+=head2 get_node
+
+  Title   : get_node
+  Usage   : $node = $taxonomy->get_node('species');
+  Function: get a Bio::Taxonomy::Node object according to rank name
+  Returns : a Bio::Taxonomy::Node object or undef if null
+  Args    : a vaild rank name
+
+=cut
+
+sub get_node {
+    my ($self, $rank) = @_;
+    unless(grep /$rank/, keys %{$self->{_hierarchy}}){
+        $self->throw("'$rank' is not in the rank list");
+    }
+    return (exists $self->{_hierarchy}->{$rank})?
+        $self->{_hierarchy}->{$rank} : undef;
+}
+
+=head2 classification
+
+  Title   : classification
+  Usage   : @names = $taxonomy->classification;
+  Function: get the classification names of one taxonomy
+  Returns : array of names
+  Args    : [No arguments]
+
+=cut
+
+sub classification {
+    my $self = shift;
+    my %rank_hash = %{$self->{_rank_hash}};
+    my %hierarchy = %{$self->{_hierarchy}};
+    my @ordered_nodes = sort {
+        ($rank_hash{$a} <=> $rank_hash{$b})
+    } keys %hierarchy;
+    return map {$hierarchy{$_}->scientific_name} @ordered_nodes;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AlignFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AlignFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AlignFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,143 @@
+# $Id: AlignFactory.pm,v 1.8.14.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Tools::AlignFactory
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::AlignFactory - Base object for alignment factories
+
+=head1 SYNOPSIS
+
+You wont be using this as an object, but using a dervied class
+like Bio::Tools::pSW
+
+=head1 DESCRIPTION
+
+Holds common Alignment Factory attributes in place
+
+=head1 CONTACT
+
+http://bio.perl.org/ or birney at sanger.ac.uk 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tools::AlignFactory;
+use strict;
+
+use base qw(Bio::Root::Root);
+
+BEGIN {
+    eval {
+	require Bio::Ext::Align;
+    };
+    if ( $@ ) {
+	print STDERR ("\nThe C-compiled engine for Smith Waterman alignments (Bio::Ext::Align) has not been installed.\n Please install the bioperl-ext package\n\n");
+	exit(1);
+    }
+}
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize(@args);
+  # set up defaults
+  
+  $self->{'kbyte'} = 20000;
+  $self->{'report'} = 0;  
+  return $self;
+}
+
+
+=head2 kbyte
+
+ Title     : kbyte()
+ Usage     : set/gets the amount of memory able to be used
+ Function  : 
+           : $factory->kbyte(200);
+           :
+ Returns   : 
+ Argument  : memory in kilobytes
+
+=cut
+
+sub kbyte {
+    my ($self,$value) = @_;
+    
+    if( defined $value ) {
+	$self->{'kbyte'} = $value;
+    } 
+    return $self->{'kbyte'};
+}
+
+
+=head2 report
+
+ Title     : report()
+ Usage     : set/gets the report boolean to issue reports or not
+ Function  : 
+           : $factory->report(1); # reporting goes on
+           :
+ Returns   : n/a
+ Argument  : 1 or 0
+
+=cut
+
+sub report {
+    my ($self,$value) = @_;
+    
+
+    if( defined $value ) {
+	if( $value != 1 && $value != 0 ) {
+	    $self->throw("Attempting to modify AlignFactory Report with no boolean value!");
+	}
+	$self->{'report'} = $value;
+    } 
+
+    return $self->{'report'};
+}
+
+=head2 set_memory_and_report
+
+ Title   : set_memory_and_report
+ Usage   : Only used by subclasses.
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub set_memory_and_report{
+   my ($self) = @_;
+
+   if( $self->{'kbyte'} < 5 ) {
+       $self->throw("You can suggest aligning things with less than 5kb");
+   }
+
+   &Bio::Ext::Align::change_max_BaseMatrix_kbytes($self->{'kbyte'});
+
+   if( $self->{'report'} == 0 ) {
+       &Bio::Ext::Align::error_off(16);
+   } else {
+       &Bio::Ext::Align::error_on(16);
+   }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Consed.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Consed.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Consed.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1837 @@
+# $Id: Consed.pm,v 1.41.4.1 2006/10/02 23:10:32 sendu Exp $
+# Bio::Tools::Alignment::Consed
+#
+# Cared for by Chad Matsalla
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Alignment::Consed - A module to work with objects from consed .ace files
+
+=head1 SYNOPSIS
+
+  # a report for sequencing stuff
+  my $o_consed = new Bio::Tools::Alignment::Consed( 
+      -acefile => "/path/to/an/acefile.ace.1",
+      -verbose => 1);
+  my $foo = $o_consed->set_reverse_designator("r");
+  my $bar = $o_consed->set_forward_designator("f");
+
+  # get the contig numbers
+  my @keys = $o_consed->get_contigs();
+
+  # construct the doublets
+  my $setter_doublets = $o_consed->choose_doublets();
+
+  # get the doublets
+  my @doublets = $o_consed->get_doublets();
+
+=head1 DESCRIPTION
+
+L<Bio::Tools::Alignment::Consed> provides methods and objects to deal
+with the output from the Consed software suite. Specifically,
+takes an C<.ace> file and provides objects for the results.
+
+A word about doublets: This module was written to accomodate a large
+EST sequencing operation. In this case, EST's were sequenced from the
+3' and from the 5' end of the EST. The objective was to find a
+consensus sequence for these two reads.  Thus, a contig of two is what
+we wanted, and this contig should consist of the forward and reverse
+reads of a getn clone. For example, for a forward designator of "F"
+and a reverse designator of "R", if the two reads chad1F and chad1R
+were in a single contig (for example Contig 5) it will be determined
+that the consensus sequence for Contig 5 will be the sequence for
+clone chad1.
+
+Doublets are good!
+
+This module parses C<.ace> and related files. A detailed list of methods
+can be found at the end of this document.
+
+I wrote a detailed rationale for design that may explain the reasons
+why some things were done the way they were done. That document is
+beyond the scope of this pod and can probably be found in the
+directory from which this module was 'made' or at
+L<http://www.dieselwurks.com/bioinformatics/consedpm_documentation.pdf>.
+
+Note that the POD in that document might be old but the original
+rationale still stands.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email chad-at-dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#' 
+
+package Bio::Tools::Alignment::Consed;
+
+use strict;
+
+use FileHandle;
+use Dumpvalue qw(dumpValue);
+use Bio::Tools::Alignment::Trim;
+use File::Spec;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+our %DEFAULTS = ( 'f_designator' => 'f',
+		  'r_designator' => 'r');
+
+=head2 new()
+
+ Title   : new(-acefile => $path_to_some_acefile, -verbose => "1")
+ Usage   : $o_consed = Bio::Tools::Alignment::Consed->
+              new(-acefile => $path_to_some_acefile, -verbose => "1");
+ Function: Construct the Bio::Tools::Alignment::Consed object. Sets
+	   verbosity for the following procedures, if necessary:
+	   1. Construct a new Bio::Tools::Alignment::Trim object, to
+	   handle quality trimming 2. Read in the acefile and parse it
+
+ Returns : A reference to a Bio::Tools::Alignment::Consed object.
+ Args    : A hash. (-acefile) is the filename of an acefile. If a full path
+	   is not specified "./" is prepended to the filename and used from
+	   instantiation until destruction. If you want 
+           Bio::Tools::Alignment::Consed to be noisy during parsing of
+           the acefile, specify some value for (-verbose).
+
+=cut
+
+sub new {
+    my ($class,%args) = @_;
+    my $self = $class->SUPER::new(%args);
+
+    $self->{'filename'} = $args{'-acefile'};
+
+    # this is special to UNIX and should probably use catfile : FIXME/TODO
+#    if (!($self->{'filename'} =~ m{/})) { 
+#	$self->{'filename'} = "./".$self->{'filename'}; 
+#    } 
+#    $self->{'filename'} =~ m/(.*\/)(.*)ace.*$/;
+#    $self->{'path'} = $1;
+
+    # this is more generic and should work on most systems   
+    (undef, $self->{'path'}, undef) = File::Spec->splitpath($self->{'filename'});
+
+    $self->_initialize_io('-file'=>$self->{'filename'});
+    $self->{'o_trim'} = new Bio::Tools::Alignment::Trim(-verbose => $self->verbose());
+    $self->set_forward_designator($DEFAULTS{'f_designator'});
+    $self->set_reverse_designator($DEFAULTS{'r_designator'});
+
+    $self->_read_file();
+    return $self;
+}
+
+=head2 verbose()
+
+ Title   : verbose()
+ Usage   : $o_consed->verbose(1);
+ Function: Set the verbosity level for debugging messages. On instantiation
+	   of the Bio::Tools::Alignment::Consed object the verbosity level
+           is set to 0 (quiet).
+ Returns : 1 or 0.
+ Args    : The verbosity levels are:
+	      0 - quiet
+	      1 - noisy
+	      2 - noisier
+	      3 - annoyingly noisy
+
+This method for setting verbosity has largely been superseeded by a
+sub-by-sub way, where for every sub you can provide a (-verbose)
+switch. I am doing converting this bit-by-bit so do not be surprised
+if some subs do not honour this.
+
+=cut
+
+# from RootI
+
+# backwards compat
+sub set_verbose { (shift)->verbose(@_) }
+
+=head2 get_filename()
+
+ Title   : get_filename()
+ Usage   : $o_consed->get_filename();
+ Function: Returns the name of the acefile being used by the
+	   Bio::Tools::Alignment::Consed object.
+ Returns : A scalar containing the name of a file.
+ Args    : None.
+
+=cut
+
+
+sub get_filename {
+    my $self = shift;
+    return $self->{'filename'};
+}
+
+=head2 count_sequences_with_grep()
+
+ Title   : count_sequences_with_grep()
+ Usage   : $o_consed->count_sequences_with_grep();
+ Function: Use /bin/grep to scan through the files in the ace project dir
+	   and count sequences in those files. I used this method in the
+	   development of this module to verify that I was getting all of the
+	   sequences. It works, but it is (I think) unix-like platform
+	   dependent.
+ Returns : A scalar containing the number of sequences in the ace project
+	   directory.
+ Args    : None.
+
+If you are on a non-UNIX platform, you really do not have to use
+this. It is more of a debugging routine designed to address very
+specific problems.
+
+This method was reimplemented to be platform independent with a pure
+perl implementation.  The above note can be ignored.
+
+=cut
+
+sub count_sequences_with_grep {
+    my $self = shift;
+    my ($working_dir,$grep_cli, at total_grep_sequences);
+    # this should be migrated to a pure perl implementation ala
+    # Tom Christiansen's 'tcgrep'
+    # http://www.cpan.org/modules/by-authors/id/TOMC/scripts/tcgrep.gz
+
+    open(my $FILE, $self->{'filename'}) or do { $self->warn("cannot open file ".$self->{'filename'}. " for grepping"); return}; 
+    my $counter =0;
+    while(<$FILE>) { $counter++ if(/^AF/); }
+
+    close $FILE;
+    opendir(my $SINGLETS,$self->{'path'});
+    foreach my $f ( readdir($SINGLETS) ) {
+	next unless ($f =~ /\.singlets$/); 
+	open(my $FILE, $self->catfile($self->{'path'},$f)) or do{ $self->warn("cannot open file ".$self->catfile($self->{'path'},$f)); next };
+	while(<$FILE>) { $counter++ if(/^>/) }
+	close $FILE;
+    }
+    return $counter;
+}
+
+=head2 get_path()
+
+ Title   : get_path()
+ Usage   : $o_consed->get_path();
+ Function: Returns the path to the acefile this object is working with.
+ Returns : Scalar. The path to the working acefile.
+ Args    : None.
+
+=cut
+
+sub get_path {
+    my $self = shift;
+    return $self->{'path'};
+}
+
+=head2 get_contigs()
+
+ Title   : get_contigs()
+ Usage   : $o_consed->get_contigs();
+ Function: Return the keys to the Bio::Tools::Alignment::Consed object.
+ Returns : An array containing the keynames in the
+           Bio::Tools::Alignment::Consed object.
+ Args    : None.
+
+This would normally be used to get the keynames for some sort of
+iterator. These keys are worthless in general day-to-day use because
+in the Consed acefile they are simply Contig1, Contig2, ...
+
+=cut
+
+sub get_contigs {
+    my ($self,$contig) = @_;
+    my @contigs = sort keys %{$self->{'contigs'}};
+    return @contigs;
+}
+
+=head2 get_class($contig_keyname)
+
+ Title   : get_class($contig_keyname)
+ Usage   : $o_consed->get_class($contig_keyname);
+ Function: Return the class name for this contig
+ Returns : A scalar representing the class of this contig.
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_class {
+    my ($self,$contig) = @_;
+    return $self->{contigs}->{$contig}->{class};
+}
+
+=head2 get_quality_array($contig_keyname)
+
+ Title   : get_quality_array($contig_keyname)
+ Usage   : $o_consed->get_quality_array($contig_keyname);
+ Function: Returns the quality for the consensus sequence for the given
+	   contig as an array. See get_quality_scalar to get this as a scalar.
+ Returns : An array containing the quality for the consensus sequence with
+	   the given keyname.
+ Args    : The keyname of a contig. Note: This is a keyname. The key would
+	   normally come from get_contigs.
+
+Returns an array, not a reference. Is this a bug? I<thinking> No.
+Well, maybe.  Why was this developed like this? I was using FreezeThaw
+for object persistence, and when it froze out these arrays it took a
+long time to thaw it. Much better as a scalar.
+
+See L<get_quality_scalar()|get_quality_scalar>
+
+=cut
+
+sub get_quality_array {
+    my ($self,$contig) = @_;
+    return split ' ', $self->{contigs}->{$contig}->{quality};
+}
+
+=head2 get_quality_scalar($contig_keyname)
+
+ Title   : get_quality_scalar($contig_keyname)
+ Usage   : $o_consed->get_quality_scalar($contig_keyname);
+ Function: Returns the quality for the consensus sequence for the given
+	   contig as a scalar. See get_quality_array to get this as an array.
+ Returns : An scalar containing the quality for the consensus sequence with
+           the given keyname.
+ Args    : The keyname of a contig. Note this is a _keyname_. The key would
+	   normally come from get_contigs.
+
+Why was this developed like this? I was using FreezeThaw for object
+persistence, and when it froze out these arrays it took a coon's age
+to thaw it. Much better as a scalar.
+
+See L<get_quality_array()|get_quality_array>
+
+=cut
+
+#'
+sub get_quality_scalar {
+    my ($self,$contig) = @_;
+    return $self->{'contigs'}->{$contig}->{'quality'};
+}
+
+=head2 freeze_hash()
+
+ Title   : freeze_hash()
+ Usage   : $o_consed->freeze_hash();
+
+ Function: Use Ilya's FreezeThaw module to create a persistent data
+	   object for this Bio::Tools::Alignment::Consed data
+	   structure. In the case of AAFC, we use
+	   Bio::Tools::Alignment::Consed to pre-process bunches of
+	   sequences, freeze the structures, and send in a harvesting
+	   robot later to do database stuff.
+ Returns : 0 or 1;
+ Args    : None.
+
+This procedure was removed so Consed.pm won't require FreezeThaw.
+
+=cut
+
+#'
+sub freeze_hash {
+    my $self = shift;
+    $self->warn("This method (freeze_hash) was removed ".
+                "from the bioperl consed.pm. Sorry.\n");
+    if (1==2) {
+        $self->debug("Bio::Tools::Alignment::Consed::freeze_hash:".
+                     " \$self->{path} is $self->{path}\n");
+        my $filename = $self->{'path'}."frozen";
+        my %contigs = %{$self->{'contigs'}};
+        my $frozen = freeze(%contigs);
+        umask 0001;
+        open (my $FREEZE,">$filename") or do {
+            $self->warn( "Bio::Tools::Alignment::Consed could not ".
+                         "freeze the contig hash because the file ".
+                         "($filename) could not be opened: $!\n");
+            return 1;
+        };
+        print $FREEZE $frozen;
+        return 0;
+    }
+}
+
+=head2 get_members($contig_keyname)
+
+ Title   : get_members($contig_keyname)
+ Usage   : $o_consed->get_members($contig_keyname);
+ Function: Return the _names_ of the reads in this contig.
+ Returns : An array containing the names of the reads in this contig.
+ Args    : The keyname of a contig. Note this is a keyname. The keyname
+	   would normally come from get_contigs.
+
+See L<get_contigs()|get_contigs>
+
+=cut
+
+sub get_members {
+    my ($self,$contig) = @_;
+    if (!$contig) {
+	$self->warn("You need to provide the name of a contig to ".
+                    "use Bio::Tools::Alignment::Consed::get_members!\n");
+	return;
+    }
+    return @{$self->{'contigs'}->{$contig}->{'member_array'}};
+}
+
+=head2 get_members_by_name($some_arbitrary_name)
+
+ Title   : get_members_by_name($some_arbitrary_name)
+ Usage   : $o_consed->get_members_by_name($some_arbitrary_name);
+ Function: Return the names of the reads in a contig. This is the name given
+	   to $contig{key} based on what is in the contig. This is different
+	   from the keys retrieved through get_contigs().
+ Returns : An array containing the names of the reads in the contig with this
+	   name.
+ Args    : The name of a contig. Not a key, but a name.
+
+Highly inefficient. use some other method if possible.
+See L<get_contigs()|get_contigs>
+
+=cut
+
+sub get_members_by_name {
+    my ($self,$name) = @_;
+    # build a list to try to screen for redundancy
+    my @contigs_with_that_name;
+    foreach my $currkey ( sort keys %{$self->{'contigs'}} ) {
+	next if (!$self->{'contigs'}->{$currkey}->{'name'});
+	if ($self->{'contigs'}->{$currkey}->{'name'} eq "$name") {
+	    push @contigs_with_that_name,$currkey;
+	}
+    }
+    my $count = @contigs_with_that_name;
+    if ($count == 1) {
+	my $contig_num = $contigs_with_that_name[0];
+	return @{$self->{'contigs'}->{$contig_num}->{'member_array'}};
+    }
+}
+
+=head2 get_contig_number_by_name($some_arbitrary_name)
+
+ Title   : get_contig_number_by_name($some_arbitrary_name)
+ Usage   : $o_consed->get_contig_number_by_name($some_arbitrary_name);
+ Function: Return the names of the reads in a contig. This is the name given
+	   to $contig{key} based on what is in the contig. This is different
+	   from the keys retrieved through get_contigs().
+ Returns : An array containing the names of the reads in the contig with this
+	   name.
+ Args    : The name of a contig. Not a key, but a name.
+
+See L<get_contigs()|get_contigs>
+
+=cut
+
+sub get_contig_number_by_name {
+    my ($self,$name) = @_;
+    foreach my $currkey (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'}->{$currkey}->{'name'} && 
+	    $self->{'contigs'}->{$currkey}->{'name'} eq "$name") {
+	    return $currkey;
+	}
+    }
+}	
+
+=head2 get_sequence($contig_keyname)
+
+ Title   : get_sequence($contig_keyname)
+ Usage   : $o_consed->get_sequence($contig_keyname); 
+ Function: Returns the consensus sequence for a given contig.
+ Returns : A scalar containing a sequence.
+ Args    : The keyname of a contig. Note this is a key. The key would
+	   normally come from get_contigs.
+
+See L<get_contigs()|get_contigs>
+
+=cut
+
+sub get_sequence {
+    my ($self,$contig) = @_;
+    return $self->{'contigs'}->{$contig}->{'consensus'};
+}
+
+=head2 set_final_sequence($some_sequence)
+
+ Title   : set_final_sequence($name,$some_sequence)
+ Usage   : $o_consed->set_final_sequence($name,$some_sequence);
+ Function: Provides a manual way to set the sequence for a given key in the
+	   contig hash. Rarely used.
+ Returns : 0 or 1;
+ Args    : The name (not the keyname) of a contig and an arbitrary string.
+
+A method with a questionable and somewhat mysterious origin. May raise
+the dead or something like that.
+
+=cut
+
+sub set_final_sequence {
+    my ($self,$name,$sequence) = @_;
+    if (!$self->{'contigs'}->{$name}) {
+	$self->warn("You cannot set the final sequence for ".
+                    "$name because it doesn't exist!\n");
+	return 1;
+    }
+    else {
+	$self->{'contigs'}->{$name}->{'final_sequence'} = $sequence;
+    }
+    return 0;
+}
+
+=head2  _read_file()
+
+ Title   : _read_file();
+ Usage   : _read_file();
+ Function: An internal subroutine used to read in an acefile and parse it
+	   into a Bio::Tools::Alignment::Consed object.
+ Returns : 0 or 1.
+ Args    : Nothing.
+
+This routine creates and saves the filhandle for reading the files in
+{fh}
+
+=cut
+
+sub _read_file {
+    my ($self) = @_;
+    my ($line,$in_contig,$in_quality,$contig_number,$top);
+    # make it easier to type $fhl
+    while (defined($line=$self->_readline()) ) {
+	chomp $line;
+	# check if there is anything on this line
+	# if not, you can stop gathering consensus sequence
+	if (!$line) {
+	    # if the line is blank you are no longer to gather consensus 
+	    # sequence or quality values
+	    $in_contig = 0;
+	    $in_quality = 0;
+	}
+	# you are currently gathering consensus sequence
+	elsif ($in_contig) {
+	    if ($in_contig == 1) {
+		$self->debug("Adding $line to consensus of contig number $contig_number.\n");
+		$self->{'contigs'}->{$contig_number}->{'consensus'} .= $line;
+	    }
+	}
+	elsif ($in_quality) {
+	    if (!$line) {
+		$in_quality = undef;
+	    }
+	    else {
+
+		# I wrote this in here because acefiles produced by
+		# cap3 do not have a leading space like the acefiles
+		# produced by phrap and there is the potential to have
+		# concatenated quality values like this: 2020 rather
+		# then 20 20 whre lines collide. Thanks Andrew for
+		# noticing.
+
+		if ($self->{'contigs'}->{$contig_number}->{'quality'} &&
+                    !($self->{'contigs'}->{$contig_number}->{'quality'} =~ m/\ $/)) {
+		    $self->{'contigs'}->{$contig_number}->{'quality'} .= " ";
+		}
+		$self->{'contigs'}->{$contig_number}->{'quality'} .= $line;
+	    }
+	}
+	elsif ($line =~ /^BQ/) {
+	    $in_quality = 1;
+	}
+
+	# the line /^CO/ like this:
+	# CO Contig1 796 1 1 U
+	# can be broken down as follows:
+	# CO - Contig!
+	# Contig1 - the name of this contig
+	# 796 - Number of bases in this contig
+	# 1 - Number of reads in this contig
+	# 1 - number of base segments in this contig
+	# U - Uncomplemented
+
+	elsif ($line =~ /^CO/) {
+	    $line =~ m/^CO\ Contig(\d+)\ \d+\ \d+\ \d+\ (\w)/;
+	    $contig_number = $1;
+	    if ($2 eq "C") {
+		$self->debug("Contig $contig_number is complemented!\n");
+	    }
+	    $self->{'contigs'}->{$contig_number}->{'member_array'} = [];
+	    $self->{'contigs'}->{$contig_number}->{'contig_direction'} = "$2";
+	    $in_contig = 1;
+	}
+
+	# 000713
+	# this BS is deprecated, I think.
+	# haha, I am really witty. <ew>
+
+	elsif ($line =~ /^BSDEPRECATED/) {
+	    $line =~ m/^BS\s+\d+\s+\d+\s+(.+)/;
+	    my $member = $1;
+	    $self->{'contigs'}->{$contig_number}->{$member}++;
+	}
+	# the members of the contigs are determined by the AF line in the ace file
+	elsif ($line =~ /^AF/) {
+	    $self->debug("I see an AF line here.\n");
+	    $line =~ /^AF\ (\S+)\ (\w)\ (\S+)/;
+
+            # push the name of the current read onto the member array for this contig
+	    push @{$self->{'contigs'}->{$contig_number}->{'member_array'}},$1;
+
+            # the first read in the contig will be named the "top" read
+	    if (!$top) {
+		$self->debug("\$top is not set.\n");
+		if ($self->{'contigs'}->{$contig_number}->{'contig_direction'} eq "C") {
+		    $self->debug("Reversing the order of the reads. The bottom will be $1\n");
+
+		    # if the contig sequence is marked as the
+		    # complement, the top becomes the bottom and$
+		    $self->{'contigs'}->{$contig_number}->{'bottom_name'} = $1;
+		    $self->{'contigs'}->{$contig_number}->{'bottom_complement'} = $2;
+		    $self->{'contigs'}->{$contig_number}->{'bottom_start'} = $3;
+		}
+		else {
+		    $self->debug("NOT reversing the order of the reads. ".
+                                 "The top_name will be $1\n");
+		    # if the contig sequence is marked as the
+		    # complement, the top becomes the bottom and$
+		    $self->{'contigs'}->{$contig_number}->{'top_name'} = $1;
+		    $self->{'contigs'}->{$contig_number}->{'top_complement'} = $2;
+		    $self->{'contigs'}->{$contig_number}->{'top_start'} = $3;
+		}
+		$top = 1;
+	    }
+	    else {
+
+		# if the contig sequence is marked as the complement,
+		# the top becomes the bottom and the bottom becomes
+		# the top
+		if ($self->{'contigs'}->{$contig_number}->{'contig_direction'} eq "C") {
+		    $self->debug("Reversing the order of the reads. The top will be $1\n");
+		    $self->{'contigs'}->{$contig_number}->{'top_name'} = $1;
+		    $self->{'contigs'}->{$contig_number}->{'top_complement'} = $2;
+		    $self->{'contigs'}->{$contig_number}->{'top_start'} = $3;
+		}
+		else {
+		    $self->debug("NOT reversing the order of the reads. The bottom will be $1\n");
+		    $self->{'contigs'}->{$contig_number}->{'bottom_name'} = $1;
+		    $self->{'contigs'}->{$contig_number}->{'bottom_complement'} = $2;
+		    $self->{'contigs'}->{$contig_number}->{'bottom_start'} = $3;
+		}
+		$top = undef;
+	    }
+	}
+    }
+    return 0;
+}
+
+=head2 set_reverse_designator($some_string)
+
+ Title   : set_reverse_designator($some_string)
+ Usage   : $o_consed->set_reverse_designator($some_string);
+ Function: Set the designator for the reverse read of contigs in this
+	   Bio::Tools::Alignment::Consed object. Used to determine if
+           contigs containing two reads can be named.
+ Returns : The value of $o_consed->{reverse_designator} so you can check
+	   to see that it was set properly.
+ Args    : An arbitrary string.
+
+May be useful only to me. I<shrug>
+
+=cut
+
+sub set_reverse_designator {
+    my ($self,$reverse_designator) = @_;
+    $self->{'reverse_designator'} = $reverse_designator;
+    $self->{'o_trim'}->set_reverse_designator($reverse_designator);
+    return $self->{'reverse_designator'};
+}				# end set_reverse_designator
+
+=head2 set_forward_designator($some_string)
+
+ Title   : set_forward_designator($some_string)
+ Usage   : $o_consed->set_forward_designator($some_string);
+ Function: Set the designator for the forward read of contigs in this
+	   Bio::Tools::Alignment::Consed object. Used to determine if
+           contigs containing two reads can be named.
+ Returns : The value of $o_consed->{forward_designator} so you can check
+	   to see that it was set properly.
+ Args    : An arbitrary string.
+
+May be useful only to me. I<shrug>
+
+=cut
+
+sub set_forward_designator {
+    my ($self,$forward_designator) = @_;
+    $self->{'forward_designator'} = $forward_designator;
+    $self->{'o_trim'}->set_forward_designator($forward_designator);
+    return $self->{'forward_designator'};
+}				# end set_forward_designator
+
+=head2 set_designator_ignore_case("yes")
+
+ Title   : set_designator_ignore_case("yes")
+ Usage   : $o_consed->set_designator_ignore_case("yes");
+ Function: Deprecated.
+ Returns : Deprecated.
+ Args    : Deprecated.
+
+Deprecated. Really. Trust me.
+
+=cut
+
+sub set_designator_ignore_case {
+    my ($self,$ignore_case) = @_;
+    if ($ignore_case eq "yes") {
+	$self->{'designator_ignore_case'} = 1;
+    }
+    return $self->{'designator_ignore_case'};
+}				# end set_designator_ignore_case
+
+=head2 set_trim_points_singlets_and_singletons()
+
+ Title   : set_trim_points_singlets_and_singletons()
+ Usage   : $o_consed->set_trim_points_singlets_and_singletons();
+ Function: Set the trim points for singlets and singletons based on
+	   quality.  Uses the Bio::Tools::Alignment::Trim object. Use
+	   at your own risk because the Bio::Tools::Alignment::Trim
+	   object was designed specifically for me and is mysterious
+	   in its ways. Every time somebody other then me uses it a
+	   swarm of locusts decends on a small Central American
+	   village so do not say you weren't warned.
+ Returns : Nothing.
+ Args    : None.
+
+Working on exceptions and warnings here.
+
+See L<Bio::Tools::Alignment::Trim> for more information
+
+=cut
+
+#' to make my emacs happy
+
+sub set_trim_points_singlets_and_singletons {
+    my ($self) = @_;
+    $self->debug("Consed.pm : \$self is $self\n");
+    my (@points,$trimmed_sequence);
+    if (!$self->{'doublets_set'}) {
+        $self->debug("You need to set the doublets before you use ".
+                     "set_trim_points_singlets_and_doublets. Doing that now.");
+	$self->set_doublets();
+    }
+    foreach (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'}->{$_}->{'class'} eq "singlet") {
+	    $self->debug("Singlet $_\n");
+	    # this is what Warehouse wants
+	    #         my ($self,$sequence,$quality,$name) = @_;
+	    # this is what Bio::Tools::Alignment::Trim::trim_singlet wants:
+	    # my ($self,$sequence,$quality,$name,$class) = @_;
+	    # the following several lines are to make the parameter passing legible.
+	    my ($sequence,$quality,$name,$class);
+	    $sequence = $self->{'contigs'}->{$_}->{'consensus'};
+	    if (!$self->{'contigs'}->{$_}->{'quality'}) { $quality = "unset"; }
+	    else { $quality = $self->{'contigs'}->{$_}->{'quality'}; }
+	    $name = $self->{'contigs'}->{$_}->{'name'};
+	    $class = $self->{'contigs'}->{$_}->{'class'};
+	    @points = @{$self->{'o_trim'}->trim_singlet($sequence,$quality,$name,$class)};
+	    $self->{'contigs'}->{$_}->{'start_point'} = $points[0];
+	    $self->{'contigs'}->{$_}->{'end_point'} = $points[1];
+	    $self->{'contigs'}->{$_}->{'sequence_trimmed'} = 
+                substr($self->{contigs}->{$_}->{'consensus'},$points[0],$points[1]-$points[0]);
+	}
+    }
+    $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_singlets".
+                 "_and_singletons: Done setting the quality trimpoints.\n");
+    return;
+}  # end set_trim_points_singlet
+
+=head2 set_trim_points_doublets()
+
+ Title   : set_trim_points_doublets()
+ Usage   : $o_consed->set_trim_points_doublets();
+ Function: Set the trim points for doublets based on quality. Uses the
+	   Bio::Tools::Alignment::Trim object. Use at your own risk because
+           the Bio::Tools::Alignment::Trim object was designed specifically
+           for me and is mysterious in its ways. Every time somebody other
+           then me uses it you risk a biblical plague being loosed on your
+           city.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : Working on exceptions here.
+
+See L<Bio::Tools::Alignment::Trim> for more information
+
+=cut
+
+sub set_trim_points_doublets {
+    my $self = shift;
+    my @points;
+    $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
+                 " Restoring zeros for doublets.\n");
+    # &show_missing_sequence($self);
+    $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
+                 " Setting doublet trim points.\n");
+    foreach (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'}->{$_}->{'class'} eq "doublet") {
+            # my ($self,$sequence,$quality,$name,$class) = @_;
+            my @quals = split(' ',$self->{'contigs'}->{$_}->{'quality'});
+
+	    @points = $self->{o_trim}->trim_doublet
+                ($self->{'contigs'}->{$_}->{'consensus'},
+                 $self->{'contigs'}->{$_}->{'quality'},
+                 $self->{'contigs'}->{$_}->{name},
+                 $self->{'contigs'}->{$_}->{'class'});
+	    $self->{'contigs'}->{$_}->{'start_point'} = $points[0];
+	    $self->{'contigs'}->{$_}->{'end_point'} = $points[1];
+            # now set this
+	    $self->{'contigs'}->{$_}->{'sequence_trimmed'} =
+                substr($self->{contigs}->{$_}->{'consensus'},
+                       $points[0],$points[1]-$points[0]);
+	    # 010102 the deprecated way to do things:
+	}
+    }
+    $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
+                 " Done setting doublet trim points.\n"); 
+    return;
+} # end set_trim_points_doublets
+
+=head2 get_trimmed_sequence_by_name($name)
+
+ Title   : get_trimmed_sequence_by_name($name)
+ Usage   : $o_consed->get_trimmed_sequence_by_name($name);
+ Function: Returns the trimmed_sequence of a contig with {name} eq $name.
+ Returns : A scalar- the trimmed sequence.
+ Args    : The {name} of a contig.
+ Notes   : 
+
+=cut
+
+sub get_trimmed_sequence_by_name {
+    my ($self,$name) = @_;
+    my $trimmed_sequence;
+    my $contigname = &get_contig_number_by_name($self,$name);
+    my $class = $self->{'contigs'}->{$contigname}->{'class'};
+    # what is this business and who was smoking crack while writing this?
+    # if ($class eq "singlet") {
+    # send the sequence, the quality, and the name
+    # $trimmed_sequence = $self->{o_trim}->trim_singlet
+    #  ($self->{'contigs'}->{$contigname}->{consensus},
+    #   $self->{'contigs'}->{$contigname}->{'quality'},$name);
+    # }
+    return $self->{'contigs'}->{$contigname}->{'sequence_trimmed'};
+}
+
+=head2 set_dash_present_in_sequence_name("yes")
+
+ Title   : set_dash_present_in_sequence_name("yes")
+ Usage   : $o_consed->set_dash_present_in_sequence_name("yes");
+ Function: Deprecated. Part of an uncompleted thought. ("Oooh! Shiny!")
+ Returns : Nothing.
+ Args    : "yes" to set {dash_present_in_sequence_name} to 1
+ Notes   : 
+
+=cut
+
+sub set_dash_present_in_sequence_name {
+    my ($self,$dash_present) = @_;
+    if ($dash_present eq "yes") {
+	$self->{'dash_present_in_sequence_name'} = 1;
+    }
+    else {
+	$self->{'dash_present_in_sequence_name'} = 0;
+    }
+    return $self->{'dash_present_in_sequence_name'};
+} # end set_dash_present_in_sequence_name
+
+=head2 set_doublets()
+
+ Title   : set_doublets()
+ Usage   : $o_consed->set_doublets();
+ Function: Find pairs that have similar names and mark them as doublets
+	   and set the {name}.
+ Returns : 0 or 1.
+ Args    : None.
+
+A complicated subroutine that iterates over the
+Bio::Tools::Alignment::Consed looking for contigs of 2. If the forward
+and reverse designator are removed from each of the reads in
+{'member_array'} and the remaining reads are the same, {name} is set
+to that name and the contig's class is set as "doublet".  If any of
+those cases fail the contig is marked as a "pair".
+
+=cut
+
+#' make my emacs happy
+
+sub set_doublets {
+    my ($self) = @_;
+    # set the designators in the Bio::Tools::Alignment::Trim object
+
+    $self->{'o_trim'}->set_designators($self->{'reverse_designator'},
+				       $self->{'forward_designator'});
+    foreach my $key_contig (sort keys %{$self->{'contigs'}}) {
+
+	# if there is a member array (why would there not be? This should be a die()able offence
+	# but for now I will leave it
+	if ($self->{'contigs'}->{$key_contig}->{'member_array'}) {
+	    # if there are two reads in this contig 
+	    # i am pretty sure that this is wrong but i am keeping it for reference
+	    # if (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 2 || !$self->{'contigs'}->{$key_contig}->{'class'}) {
+	    # <seconds later>
+	    # <nod> WRONG. Was I on crack?
+	    if (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 2) {
+		$self->{'contigs'}->{$key_contig}->{'num_members'} = 2;
+		$self->debug("\tThere are 2 members! Looking for the contig name...\n");
+		my $name = _get_contig_name($self,$self->{'contigs'}->{$key_contig}->{'member_array'});
+		$self->debug("The name is $name\n") if defined $name;
+		if ($name) {
+		    $self->{'contigs'}->{$key_contig}->{'name'} = $name;
+		    $self->{'contigs'}->{$key_contig}->{'class'} = "doublet";
+		} else {
+		    $self->debug("$key_contig is a pair.\n");
+		    $self->{'contigs'}->{$key_contig}->{'class'} = "pair";
+		}
+	    }
+            # this is all fair and good but what about singlets?
+            # they have one reads in the member_array but certainly are not singletons
+	    elsif (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 1) {
+		# set the name to be the name of the read
+		$self->{'contigs'}->{$key_contig}->{name} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}}[0];
+		# set the number of members to be one
+		$self->{'contigs'}->{$key_contig}->{num_members} = 1;
+		# if this was a singlet, it would already belong to the class "singlet"
+		# so leave it alone
+		# if it is not a singlet, it is a singleton! lablel it appropriately
+		unless ($self->{'contigs'}->{$key_contig}->{'class'}) {
+		    $self->{'contigs'}->{$key_contig}->{'class'} = "singleton";
+		}
+	    }
+            # set the multiplet characteristics
+	    elsif (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} >= 3) {
+		$self->{'contigs'}->{$key_contig}->{'num_members'} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}};
+		$self->{'contigs'}->{$key_contig}->{'class'} = "multiplet";
+	    }
+	    $self->{'contigs'}->{$key_contig}->{'num_members'} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}};
+
+	}
+    }
+    $self->{'doublets_set'} = "done";
+    return 0;
+}				# end set_doublets
+
+=head2 set_singlets
+
+ Title   : set_singlets
+ Usage   : $o_consed->set_singlets();
+ Function: Read in a singlets file and place them into the
+	   Bio::Tools::Alignment::Consed object.
+ Returns : Nothing.
+ Args    : A scalar to turn on verbose parsing of the singlets file.
+ Notes   : 
+
+=cut
+
+sub set_singlets {
+    # parse out the contents of the singlets file
+    my ($self) = @_;
+    $self->debug("Bio::Tools::Alignment::Consed Adding singlets to the contig hash...\n"); 
+    my $full_filename = $self->{'filename'};
+    $self->debug("Bio::Tools::Alignment::Consed::set_singlets: \$full_filename is $full_filename\n");
+    $full_filename =~ m/(.*\/)(.*ace.*)$/; 			       
+    my ($base_path,$filename) = ($1,$2);
+    $self->debug("Bio::Tools::Alignment::Consed::set_singlets: singlets filename is $filename and \$base_path is $base_path\n");
+    $filename =~ m/(.*)ace.*$/;
+    my $singletsfile = $base_path.$1."singlets";
+    $self->debug("\$singletsfile is $singletsfile\n");
+     if (!-f $singletsfile) {
+          # there is no singlets file.
+          $self->{'singlets_set'} = "done";
+          return;
+     }
+	$self->debug("$singletsfile is indeed a file. Trying to open it...\n");
+    my $singlets_fh = Bio::Root::IO->new(-file => $singletsfile);
+    my ($sequence,$name,$count);
+    while ($_ = $singlets_fh->_readline()) {
+	chomp $_;
+	if (/\>/) {
+	    if ($name && $sequence) {
+		$self->debug("Adding $name with sequence $sequence to hash...\n");
+		push @{$self->{'contigs'}->{$name}->{'member_array'}},$name;
+		$self->{'contigs'}->{$name}->{'consensus'} = $sequence;
+		$self->{'contigs'}->{$name}->{'name'} = $name;
+		$self->{'contigs'}->{$name}->{"singlet"} = 1;
+		$self->{'contigs'}->{$name}->{'class'} = "singlet";
+	    }
+	    $sequence = $name = undef;
+	    $count++;
+	    m/^\>(.*)\s\sCHROMAT/;
+	    $name = $1;
+	    if (!$name) {
+		m/\>(\S+)\s/;
+		$name = $1;
+	    }
+	}
+	else { $sequence .= $_; }	
+    }
+    if ($name && $sequence) {
+	$self->debug("Pushing the last of the singlets ($name)\n");
+	@{$self->{'contigs'}->{$name}->{'member_array'}} = $name;
+	$self->{'contigs'}->{$name}->{'consensus'} = $sequence;
+	$self->{'contigs'}->{$name}->{'name'} = $name;
+	$self->{'contigs'}->{$name}->{"singlet"} = 1;
+	$self->{'contigs'}->{$name}->{'class'} = "singlet";
+    }
+    $self->debug("Bio::Tools::Alignment::Consed::set_singlets: Done adding singlets to the singlets hash.\n");
+    $self->{'singlets_set'} = "done";
+    return 0;
+}				# end sub set_singlets
+
+=head2 get_singlets()
+
+ Title   : get_singlets()
+ Usage   : $o_consed->get_singlets();
+ Function: Return the keynames of the singlets.
+ Returns : An array containing the keynames of all 
+           Bio::Tools::Alignment::Consed sequences in the class "singlet".
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_singlets {
+    # returns an array of singlet names
+    # singlets have "singlet"=1 in the hash
+    my $self = shift;
+    if (!$self->{singlets_set}) {
+	$self->debug("You need to set the singlets before you get them. Doing that now.");
+	$self->set_singlets();
+    }	
+
+    my (@singlets, at array);
+    foreach my $key (sort keys %{$self->{'contigs'}}) {
+	# @array = @{$Consed::contigs{$key}->{'member_array'}};
+	# somethimes a user will try to get a list of singlets before the classes for the rest of the
+	# contigs has been set (see t/test.t for how I figured this out. <bah>			
+	# so either way, just return class=singlets
+	if (!$self->{'contigs'}->{$key}->{'class'}) {
+	    # print("$key has no class. why?\n");
+	}
+	elsif ($self->{'contigs'}->{$key}->{'class'} eq "singlet") {
+	    push @singlets,$key;
+	}
+    }
+    return @singlets;
+}
+
+=head2 set_quality_by_name($name,$quality)
+
+ Title   : set_quality_by_name($name,$quality)
+ Usage   : $o_consed->set_quality_by_name($name,$quality);
+ Function: Deprecated. Make the contig with {name} have {'quality'} $quality.
+           Probably used for testing.
+ Returns : Nothing.
+ Args    : The name of a contig and a scalar for its quality.
+ Notes   : Deprecated.
+
+=cut
+
+sub set_quality_by_name {
+    # this is likely deprecated
+    my ($self,$name,$quality) = shift;
+    my $return;
+    foreach (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'} eq "$name" || $self->{'contigs'}->{'name'} eq "$name") {
+	    $self->{'contigs'}->{'quality'} = $quality;
+	    $return=1;
+	}
+    }
+    if ($return) { return "0"; } else { return "1"; }
+}				# end set quality by name
+
+=head2 set_singlet_quality()
+
+ Title   : set_singlet_quality()
+ Usage   : $o_consed->set_singlet_quality();
+ Function: For each singlet, go to the appropriate file in phd_dir and read
+           in the phred quality for that read and place it into {'quality'}
+ Returns : 0 or 1.
+ Args    : None.
+ Notes   : This is the next subroutine that will receive substantial revision
+           in the next little while. It really should eval the creation of
+           Bio::Tools::Alignment::Phred objects and go from there. 
+
+=cut
+
+sub set_singlet_quality {
+    my $self = shift;
+    my $full_filename = $self->{'filename'};
+    $full_filename =~ m/(.*\/)(.*)ace.*$/;
+    my ($base_path,$filename) = ($1,"$2"."qual");
+    my $singletsfile = $base_path.$filename;
+    if (-f $singletsfile) {
+	# print("$singletsfile is indeed a file. Trying to open it...\n");
+    }
+    else {
+	$self->warn("$singletsfile is not a file. Sorry.\n");
+	return;
+    }
+	my $singlets_fh = Bio::Root::IO->new(-file => $singletsfile);
+	my ($sequence,$name,$count);
+    my ($identity,$line,$quality, at qline);
+    while ($line = $singlets_fh->_readline()) {
+	chomp $line;
+	if ($line =~ /^\>/) {
+	    $quality = undef;
+	    $line =~ m/\>(\S*)\s/;
+	    $identity = $1;
+	}
+	else {
+	    if ($self->{'contigs'}->{$identity}) {
+		$self->{'contigs'}->{$identity}->{'quality'} .= "$line ";
+	    }
+	}
+
+    }
+    return 0;
+}
+
+=head2 set_contig_quality()
+
+ Title   : set_contig_quality()
+ Usage   : $o_consed->set_contig_quality();
+ Function: Deprecated.
+ Returns : Deprecated.
+ Args    : Deprecated.
+ Notes   : Deprecated. Really. Trust me.
+
+=cut
+
+sub set_contig_quality {
+    # note: contigs _include_ singletons but _not_ singlets
+    my ($self) = shift;
+          # the unexpected results I am referring to here are a doubling of quality values.
+          # the profanity I uttered on discovering this reminded me of the simpsons:
+          # Ned Flanders: "That is the loudest profanity I have ever heard!"
+     $self->warn("set_contig_quality is deprecated and will likely produce unexpected results");
+    my $full_filename = $self->{'filename'};
+    # Run_SRC3700_2000-08-01_73+74.fasta.screen.contigs.qual
+    # from Consed.pm
+    $full_filename =~ m/(.*\/)(.*)ace.*$/;
+    my ($base_path,$filename) = ($1,"$2"."contigs.qual");
+    my $singletsfile = $base_path.$filename;
+    if (-f $singletsfile) {
+	# print("$singletsfile is indeed a file. Trying to open it...\n");
+    }
+    else {
+	$self->warn("Bio::Tools::Alignment::Consed::set_contig_quality $singletsfile is not a file. Sorry.\n");
+	return;
+    }
+    my $contig_quality_fh = Bio::Root::IO->new(-file => $singletsfile);
+
+    my ($sequence,$name,$count,$identity,$line,$quality);
+    while ($line = $contig_quality_fh->_readline()) {
+	chomp $line;
+	if ($line =~ /^\>/) {
+	    $quality = undef;
+	    $line =~ m/\>.*Contig(\d+)\s/;
+	    $identity = $1;
+	}
+	else {
+	    if ($self->{'contigs'}->{$identity} ) {
+		$self->{'contigs'}->{$identity}->{'quality'} .= " $line";
+	    }
+	}
+    }
+}				# end set_contig_quality
+
+=head2 get_multiplets()
+
+ Title   : get_multiplets()
+ Usage   : $o_consed->get_multiplets();
+ Function: Return the keynames of the multiplets.
+ Returns : Returns an array containing the keynames of all 
+           Bio::Tools::Alignment::Consed sequences in the class "multiplet".
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_multiplets {
+	    # returns an array of multiplet names
+	    # multiplets have # members > 2
+    my $self = shift;
+    my (@multiplets, at array);
+    foreach my $key (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'}->{$key}->{'class'}) {
+	    if ($self->{'contigs'}->{$key}->{'class'} eq "multiplet") {
+		push @multiplets,$key;
+	    }
+	}
+    }
+    return @multiplets;
+}
+
+=head2 get_all_members()
+
+  Title   : get_all_members()
+  Usage   : @all_members = $o_consed->get_all_members();
+  Function: Return a list of all of the read names in the 
+            Bio::Tools::Alignment::Consed object.
+  Returns : An array containing all of the elements in all of the
+            {'member_array'}s.
+  Args    : None.
+  Notes   : 
+
+=cut
+
+sub get_all_members {
+    my $self = shift;
+    my @members;
+    foreach my $key (sort keys %{$self->{'contigs'}}) {
+	if ($key =~ /^singlet/) {
+	    push @members,$self->{'contigs'}->{$key}->{'member_array'}[0];
+	}
+	elsif ($self->{'contigs'}->{$key}->{'member_array'}) {
+	    push @members,@{$self->{'contigs'}->{$key}->{'member_array'}};
+	}
+	# else {
+	#	print("Bio::Tools::Alignment::Consed: $key is _not_ an array. Pushing $self->{'contigs'}->{$key}->{'member_array'} onto \@members\n");
+	#	push @members,$self->{'contigs'}->{$key}->{'member_array'};
+	# }
+    }
+    return @members;
+}
+
+=head2 sum_lets($total_only)
+
+ Title   : sum_lets($total_only)
+ Usage   : $statistics = $o_consed->sum_lets($total_only);
+ Function: Provide numbers for how many sequences were accounted for in the
+           Bio::Tools::Alignment::Consed object.
+ Returns : If a scalar is present, returns the total number of
+           sequences accounted for in all classes. If no scalar passed
+           then returns a string that looks like this:
+           Singt/singn/doub/pair/mult/total : 2,0,1(2),0(0),0(0),4
+           This example means the following: There were 1 singlets.
+           There were 0 singletons.  There were 1 doublets for a total
+           of 2 sequences in this class.  There were 0 pairs for a
+           total of 0 sequences in this class.  There were 0
+           multiplets for a total of 0 sequences in this class.  There
+           were a total of 4 sequences accounted for in the
+           Bio::Tools::Alignment::Consed object.   
+ Args : A scalar is optional to change the way the numbers are returned.  
+ Notes:
+
+=cut
+
+sub sum_lets {
+    my ($self,$total_only) = @_;
+    my ($count,$count_multiplets,$multiplet_count);
+    my $singlets = &get_singlets($self); $count += $singlets;
+    my $doublets = &get_doublets($self); $count += ($doublets * 2);
+    my $pairs = &get_pairs($self); $count += ($pairs * 2);
+    my $singletons = &get_singletons($self); $count += $singletons;
+    my @multiplets = &get_multiplets($self);
+    $count_multiplets = @multiplets;
+    my $return_string;
+    foreach (@multiplets) {
+	my $number_members = $self->{'contigs'}->{$_}->{num_members};	
+	$multiplet_count += $number_members;
+    }
+    if ($multiplet_count) {
+	$count += $multiplet_count;
+    }
+    foreach (qw(multiplet_count singlets doublets pairs singletons
+                multiplets count_multiplets)) {
+	no strict 'refs';	# renege for the block
+	if (!${$_}) {
+            ${$_} = 0;
+	}
+    }
+    if (!$multiplet_count) { $multiplet_count = 0; }
+    if ($total_only) {
+        return $count;
+    }
+    $return_string = "Singt/singn/doub/pair/mult/total : ".
+        "$singlets,$singletons,$doublets(".
+         ($doublets*2)."),$pairs(".($pairs*2).
+        "),$count_multiplets($multiplet_count),$count";
+    return $return_string;
+}
+
+=head2 write_stats()
+
+ Title   : write_stats()
+ Usage   : $o_consed->write_stats();
+ Function: Write a file called "statistics" containing numbers similar to
+	   those provided in sum_lets().
+ Returns : Nothing. Write a file in $o_consed->{path} containing something
+	   like this:
+
+           0,0,50(100),0(0),0(0),100
+
+           Where the numbers provided are in the format described in the
+	   documentation for sum_lets().
+ Args    : None.
+ Notes   : This might break platform independence, I do not know.
+
+See L<sum_lets()|sum_lets>
+
+=cut
+
+sub write_stats {
+    # worry about platform dependence here?
+    # oh shucksdarn.
+    my $self = shift;
+    my $stats_filename = $self->{'path'}."statistics";
+    my $statistics_raw = $self->sum_lets;
+    my ($statsfilecontents) = $statistics_raw =~ s/.*\ \:\ //g;
+    umask 0001;
+    my $fh = new Bio::Root::IO(-file=>"$stats_filename");
+    # open(STATSFILE,">$stats_filename") or print("Could not open the statsfile: $!\n");
+    $fh->_print("$statsfilecontents");
+    # close STATSFILE;
+    $fh->close();
+}
+
+=head2 get_singletons()
+
+ Title   : get_singletons()
+ Usage   : @singletons = $o_consed->get_singletons();
+ Function: Return the keynames of the singletons.
+ Returns : Returns an array containing the keynames of all
+	   Bio::Tools::Alignment::Consed sequences in the class "singleton".
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_singletons {
+		# returns an array of singleton names
+		# singletons are contigs with one member (see consed documentation)
+	my $self = shift;
+	my (@singletons, at array);
+	foreach my $key (sort keys %{$self->{'contigs'}}) {
+		if ($self->{'contigs'}->{$key}->{'class'}) {
+		    # print ("$key class: $self->{'contigs'}->{$key}->{'class'}\n");
+		}
+		else {
+		    # print("$key belongs to no class. why?\n");
+		}
+		if ($self->{'contigs'}->{$key}->{'member_array'}) {
+			@array = @{$self->{'contigs'}->{$key}->{'member_array'}};
+		}
+		my $num_array_elem = @array;
+		if ($num_array_elem == 1 && $self->{'contigs'}->{$key}->{'class'} && $self->{'contigs'}->{$key}->{'class'} eq "singleton") { push @singletons,$key; }
+	}
+	return @singletons;
+}
+
+=head2 get_pairs()
+
+ Title   : get_pairs()
+ Usage   : @pairs = $o_consed->get_pairs();
+ Function: Return the keynames of the pairs.
+ Returns : Returns an array containing the keynames of all
+           Bio::Tools::Alignment::Consed sequences in the class "pair".
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_pairs {
+    # returns an array of pair contig names
+    # a pair is a contig of two where the names do not match
+    my $self = shift;
+    my (@pairs, at array);
+    foreach my $key (sort keys %{$self->{'contigs'}}) {
+        if ($self->{'contigs'}->{$key}->{'member_array'}) {
+            if (@{$self->{'contigs'}->{$key}->{'member_array'}} == 2 &&
+                $self->{'contigs'}->{$key}->{'class'} eq "pair") {
+                push @pairs,$key;
+            }
+        }
+    }
+    return @pairs;
+}
+
+=head2 get_name($contig_keyname)
+
+ Title   : get_name($contig_keyname)
+ Usage   : $name = $o_consed->get_name($contig_keyname);
+ Function: Return the {name} for $contig_keyname.
+ Returns : A string. ({name})
+ Args    : A contig keyname.
+ Notes   : 
+
+=cut
+
+sub get_name {
+    my ($self,$contig) = @_;
+    return $self->{'contigs'}->{$contig}->{'name'};
+}
+
+=head2 _get_contig_name(\@array_containing_reads)
+
+ Title   : _get_contig_name(\@array_containing_reads)
+ Usage   : $o_consed->_get_contig_name(\@array_containing_reads);
+ Function: The logic for the set_doublets subroutine.
+ Returns : The name for this contig.
+ Args    : A reference to an array containing read names.
+ Notes   : Depends on reverse_designator. Be sure this is set the way you
+	   intend.
+
+=cut
+
+sub _get_contig_name {
+    my ($self,$r_array) = @_;
+    my @contig_members = @$r_array;
+    my @name_nodir;
+    foreach (@contig_members) {
+        # how can I distinguish the clone name from the direction label?
+        # look for $Consed::reverse_designator and $Consed::forward_designator
+        # what if you do not find _any_ of those?
+        my $forward_designator = $self->{'forward_designator'} || "f";
+        my $reverse_designator = $self->{'reverse_designator'} || "r";
+        my $any_hits = /(.+)($forward_designator.*)/ || /(.+)($reverse_designator.*)/||/(.+)(_.+)/;
+        my $name = $1;
+        my $suffix = $2;
+        if ($name) {
+            # print("\t\$name is $name ");
+        }
+        if ($suffix) {
+            # print("and \$suffix is $suffix.\n");
+        }
+                                # Jee, I hope we get a naming convention soon
+        if ($suffix) {
+            if ($suffix =~ /^$forward_designator/ || $suffix =~ /^$reverse_designator/) {
+                push @name_nodir,$name;
+            }
+				# bugwatch here! should this be unnested?
+            else {
+                push @name_nodir,"$name$suffix";
+            }
+        }
+    }
+    # print("\@name_nodir: @name_nodir\n");
+    my $mismatch = 0;
+    for (my $counter=0; $counter<@name_nodir;$counter++) {
+        next if ($name_nodir[0] eq $name_nodir[$counter]);
+        $mismatch = 1;
+    }
+    if ($mismatch == 0) {
+        # print("\tYou have a cohesive contig named $name_nodir[0].\n\n");
+        return $name_nodir[0];
+    } else {
+        # print("\tYou have mixed names in this contig.\n\n");
+    }
+}                               # end _get_contig_name
+
+=head2 get_doublets()
+
+ Title   : get_doublets()
+ Usage   : @doublets = $o_consed->get_doublets();
+ Function: Return the keynames of the doublets.
+ Returns : Returns an array containing the keynames of all
+           Bio::Tools::Alignment::Consed sequences in the class "doublet".
+ Args    : None.
+ Notes   : 
+
+=cut
+
+sub get_doublets {
+    my $self = shift;
+    if (!$self->{doublets_set}) {
+        $self->warn("You need to set the doublets before you can get them. Doing that now.");
+        $self->set_doublets();
+    }
+    my @doublets;
+    foreach (sort keys %{$self->{'contigs'}}) {
+        if ($self->{'contigs'}->{$_}->{name} && $self->{'contigs'}->{$_}->{'class'} eq "doublet") {
+            push @doublets,$_;
+        }
+    }
+    return @doublets;
+}                               # end get_doublets
+
+=head2 dump_hash()
+
+ Title   : dump_hash()
+ Usage   : $o_consed->dump_hash();
+ Function: Use dumpvar.pl to dump out the Bio::Tools::Alignment::Consed
+           object to STDOUT.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : I used this a lot in debugging.
+
+=cut
+
+sub dump_hash {
+    my $self = shift;
+    my $dumper = new Dumpvalue;
+    $self->debug( "Bio::Tools::Alignment::Consed::dump_hash - ".
+                  "The following is the contents of the contig hash...\n");
+    $dumper->dumpValue($self->{'contigs'});
+}
+
+=head2 dump_hash_compact()
+
+ Title   : dump_hash_compact()
+ Usage   : $o_consed->dump_hash_compact();
+ Function: Dump out the Bio::Tools::Alignment::Consed object in a compact way.
+ Returns : Nothing.
+ Args    : Nothing.
+ Notes   : Cleaner then dumpValue(), dumpHash(). I used this a lot in
+           debugging.
+
+=cut
+
+sub dump_hash_compact {
+    no strict 'refs';           # renege for the block
+    my ($self,$sequence) = @_;
+    # get the classes
+    my @singlets = $self->get_singlets();
+    my @singletons = $self->get_singletons();
+    my @doublets = $self->get_doublets();
+    my @pairs = $self->get_pairs();
+    my @multiplets = $self->get_multiplets();
+    print("Name\tClass\tMembers\tQuality?\n");
+    foreach (@singlets) {
+        my @members = $self->get_members($_);
+        print($self->get_name($_)."\tsinglets\t".(join',', at members)."\t");
+        if ($self->{'contigs'}->{$_}->{'quality'}) {
+            print("qualities found here\n");
+        } else {
+            print("no qualities found here\n");
+        }
+
+    }
+    foreach (@singletons) {
+        my @members = $self->get_members($_);
+        print($self->get_name($_)."\tsingletons\t".(join',', at members)."\t");
+        if ($self->{'contigs'}->{$_}->{'quality'}) {
+            print("qualities found here\n");
+        } else {
+            print("no qualities found here\n");
+        }
+    }
+    foreach my $pair (@pairs) {
+        my @members = $self->get_members($pair);
+        my $name;
+        if (!$self->get_name($pair)) {
+            $name = "BLANK";
+        } else {
+            $name = $self->get_name($pair);
+        }
+        print("$name\tpairs\t".(join',', at members)."\n");
+    }
+    foreach (@doublets) {
+        my @members = $self->get_members_by_name($_);
+        print("$_\tdoublets\t".(join',', at members)."\t");
+        my $contig_number = &get_contig_number_by_name($self,$_);
+        if ($self->{'contigs'}->{$contig_number}->{'quality'}) {
+            print("qualities found here\n");
+        } else {
+            print("no qualities found here\n");
+        }
+        # print($_."\tdoublets\t".(join',', at members)."\n");
+    }
+    foreach (@multiplets) {
+        my @members = $self->get_members($_);
+        print("Contig $_"."\tmultiplets\t".(join',', at members)."\n");
+    }
+}                               # end dump_hash_compact
+
+=head2 get_phreds()
+
+ Title   : get_phreds()
+ Usage   : @phreds = $o_consed->get_phreds();
+ Function: For each doublet in the Bio::Tools::Alignment::Consed hash, go
+           and get the phreds for the top and bottom reads. Place them into
+           {top_phreds} and {bottom_phreds}.
+ Returns : Nothing.
+ Args    : Nothing.
+
+Requires parse_phd() and reverse_and_complement(). I realize that it
+would be much more elegant to pull qualities as required but there
+were certain "features" in the acefile that required a bit more
+detailed work be done to get the qualities for certain parts of the
+consensus sequence. In order to make _sure_ that this was done
+properly I wrote things to do all steps and then I used dump_hash()
+and checked each one to ensure expected bahavior. I have never changed
+this, so there you are.
+
+=cut
+
+sub get_phreds {
+    # this subroutine is the target of a rewrite to use the Bio::Tools::Alignment::Phred object.
+    my $self = shift;    
+    my $current_contig;
+    foreach $current_contig (sort keys %{$self->{'contigs'}}) {	
+	if ($self->{'contigs'}->{$current_contig}->{'class'} eq "doublet") {
+	    $self->debug("$current_contig is a doublet. Going to parse_phd for top($self->{'contigs'}->{$current_contig}->{'top_name'}) and bottom($self->{'contigs'}->{$current_contig}->{'bottom_name'})\n");
+	    my $r_phreds_top = &parse_phd($self,$self->{'contigs'}->{$current_contig}->{'top_name'});
+	    my $r_phreds_bottom = &parse_phd($self,$self->{'contigs'}->{$current_contig}->{'bottom_name'});
+	    if ($self->{'contigs'}->{$current_contig}->{'top_complement'} eq "C") {
+		# print("Reversing and complementing...\n");
+		$r_phreds_top = &reverse_and_complement($r_phreds_top);
+	    }
+	    if ($self->{'contigs'}->{$current_contig}->{'bottom_complement'} eq "C") {
+		$r_phreds_bottom = &reverse_and_complement($r_phreds_bottom);
+	    }
+	    $self->{'contigs'}->{$current_contig}->{'top_phreds'} = $r_phreds_top;
+	    $self->{'contigs'}->{$current_contig}->{'bottom_phreds'} = $r_phreds_bottom;
+	}
+    }
+}
+
+=head2 parse_phd($read_name)
+
+ Title   : parse_phd($read_name)
+ Usage   : $o_consed->parse_phd($read_name);
+ Function: Suck in the contents of a .phd file.
+ Returns : A reference to an array containing the quality values for the read.
+ Args    : The name of a read.
+ Notes   : This is a significantly weak subroutine because it was always
+	   intended that these functions, along with the functions provided by
+	   get_phreds() be put into the Bio::SeqIO:phd module. This is done
+           now but the Bio::Tools::Alignment::Consed module has not be
+           rewritten to reflect this change.
+
+See L<Bio::SeqIO::phd> for more information.
+
+=cut
+
+sub parse_phd {
+    my ($self,$sequence_name) = @_;
+    $self->debug("Parsing phd for $sequence_name\n");
+    my $in_dna = 0;
+    my $base_number = 0;
+    my (@bases, at current_line);
+    # print("parse_phd: $sequence_name\n");
+    my $fh = new Bio::Root::IO
+        (-file=>"$self->{path}/../phd_dir/$sequence_name.phd.1");
+    while ($fh->_readline()) {
+	# print("Reading a line from a phredfile!\n");
+	chomp;
+	if (/^BEGIN_DNA/) { $in_dna = 1; next}
+	if (/^END_DNA/) { last; }
+	if (!$in_dna) { next; }
+	push(@bases,$_);
+    }
+    return \@bases;
+}
+
+=head2 reverse_and_complement(\@source)
+
+ Title   : reverse_and_complement(\@source)
+ Usage   : $reference_to_array = $o_consed->reverse_and_complement(\@source);
+ Function: A stub for the recursive routine reverse_recurse().
+ Returns : A reference to a reversed and complemented array of phred data.
+ Args    : A reference to an array of phred data.
+ Notes   : 
+
+=cut
+
+sub reverse_and_complement {
+    my $r_source = shift;
+    my $r_destination;
+    $r_destination = &reverse_recurse($r_source,$r_destination);
+    return $r_destination;
+}
+
+=head2 reverse_recurse($r_source,$r_destination)
+
+ Title   : reverse_recurse(\@source,\@destination)
+ Usage   : $o_consed->reverse_recurse(\@source,\@destination);
+ Function: A recursive routine to reverse and complement an array of
+           phred data.
+ Returns : A reference to an array containing reversed phred data.
+ Args    : A reference to a source array and a reverence to a destination
+	   array.
+
+Recursion is kewl, but this sub should likely be _reverse_recurse.
+
+=cut
+
+
+sub reverse_recurse($$) {
+    my ($r_source,my $r_destination) = @_;
+    if (!@$r_source) {
+        return $r_destination;
+    }
+    $_=pop(@$r_source);
+    s/c/g/ || s/g/c/ || s/a/t/ || s/t/a/;
+    push(@$r_destination,$_);
+    &reverse_recurse($r_source,$r_destination);
+}
+
+=head2 show_missing_sequence()
+
+ Title   : show_missing_sequence();
+ Usage   : $o_consed->show_missing_sequence();
+ Function: Used by set_trim_points_doublets() to fill in quality values where
+	   consed (phrap?) set them to 0 at the beginning and/or end of the
+	   consensus sequences.
+ Returns : Nothing.
+ Args    : None.
+
+Acts on doublets only. Really very somewhat quite ugly. A disgusting
+kludge. I<insert pride here> It was written stepwise with no real plan
+because it was not really evident why consed (phrap?)  was doing this.
+
+=cut
+
+sub show_missing_sequence() {
+
+    # decide which sequence should not have been clipped at consensus
+    # position = 0
+
+    my $self = shift;
+    &get_phreds($self);
+    my ($current_contig, at qualities);
+    foreach $current_contig (sort keys %{$self->{'contigs'}}) {
+	if ($self->{'contigs'}->{$current_contig}->{'class'} eq "doublet") {
+	    my $number_leading_xs = 0;
+	    my $number_trailing_xs = 0;
+	    my $measurer = $self->{'contigs'}->{$current_contig}->{'quality'};
+	    while ($measurer =~ s/^\ 0\ /\ /) {
+		$number_leading_xs++;
+	    }
+	    while ($measurer =~ s/\ 0(\s*)$/$1/) {
+		$number_trailing_xs++;
+	    }
+	    @qualities = split(' ',$self->{'contigs'}->{$current_contig}->{'quality'});
+	    my $in_initial_zeros = 0;
+	    for (my $count=0;$count<scalar(@qualities); $count++) {
+		if ($qualities[$count] == 0) {
+		    my ($quality,$top_phred_position,$bottom_phred_position,$top_phred_data,$bottom_phred_data);
+		    # print("The quality of the consensus at ".($count+1)." is zero. Retrieving the real quality value.\n");
+		    # how do I know which strand to get these quality values from????
+		    # boggle
+		    my $top_quality_here = $self->{'contigs'}->{$current_contig}->{'top_phreds'}->[0-$self->{'contigs'}->{$current_contig}->{'top_start'}+$count+1];
+		    my $bottom_quality_here = $self->{'contigs'}->{$current_contig}->{'bottom_phreds'}->[1-$self->{'contigs'}->{$current_contig}->{'bottom_start'}+$count];
+		    if (!$bottom_quality_here || (1-$self->{'contigs'}->{$current_contig}->{'bottom_start'}+$count)<0) {
+			$bottom_quality_here = "not found";
+		    }
+		    if (!$top_quality_here) {
+			$top_quality_here = "not found";
+		    }
+		    # print("Looking for quals at position $count of $current_contig: top position ".(0-$self->{'contigs'}->{$current_contig}->{top_start}+$count)." ($self->{'contigs'}->{$current_contig}->{top_name}) $top_quality_here , bottom position ".(1-$self->{'contigs'}->{$current_contig}->{bottom_start}+$count)." ($self->{'contigs'}->{$current_contig}->{bottom_name}) $bottom_quality_here\n"); 
+		    if ($count<$number_leading_xs) {
+			# print("$count is less then $number_leading_xs so I will get the quality from the top strand\n");
+			# print("retrieved quality is ".$self->{'contigs'}->{$current_contig}->{top_phreds}[0-$self->{'contigs'}->{$current_contig}->{top_start}+$count+1]."\n");
+			my $quality = $top_quality_here;
+			$quality =~ /\S+\s(\d+)\s+/;
+			$quality = $1;
+			# print("retrieved quality for leading zero $count is $quality\n");
+			# t 9 9226
+			$qualities[$count] = $quality;
+		    } else {
+			# this part is tricky
+			# if the contig is like this
+			#      cccccccccccccccc
+			# ffffffffffffffffff
+			#          rrrrrrrrrrrrrrrrr
+			# then take the quality value for the trailing zeros in the cons. seq from the r
+			#
+			# but if the contig is like this
+			#      cccccccccccccccccc
+			#      ffffffffffffffffffffffffffffffff
+			# rrrrrrrrrrrrrrrrrrrrrrrxxxxxxxxr
+			#                      ^^^
+			# then any zeros that fall in the positions (^) must be decided whether the quality
+			# is the qual from the f or r strand. I will use the greater number
+			# does a similar situation exist for the leading zeros? i dunno
+			#
+			# print("$count is greater then $number_leading_xs so I will get the quality from the bottom strand\n");
+			# print("retrieved quality is ".$contigs->{$current_contig}->{top_phreds}[0-$contigs->{$current_contig}->{top_start}+$count+1]."\n");
+			# my ($quality,$top_phred_position,$bottom_phred_position,$top_phred_data,$bottom_phred_data);
+			if ($bottom_quality_here eq "not found") {
+			    # $top_phred_position = 1-$contigs->{$current_contig}->{bottom_start}+$count;
+			    # print("Going to get quality from here: $top_phred_position of the top.\n");
+			    # my $temp_quality - $contigs->{$current_contig}->{top_phreds}
+			    # $quality = $contigs->{$current_contig}->{top_phreds}[$top_phred_position];
+			    $top_quality_here =~ /\w+\s(\d+)\s/;
+			    $quality = $1;
+			} elsif ($top_quality_here eq "not found") {
+			    # $bottom_phred_position = 1+$contigs->{$current_contig}->{bottom_start}+$count;
+			    # print("Going to get quality from here: $bottom_phred_position of the bottom.\n");
+			    # $quality = $contigs->{$current_contig}->{bottom_phreds}[$bottom_phred_position];
+			    # print("Additional: no top quality but bottom is $quality\n");
+			    $bottom_quality_here =~ /\w+\s(\d+)\s/;
+			    $quality = $1;
+			} else {
+			    # print("Oh jeepers, there are 2 qualities to choose from at this position.\n");
+			    # print("Going to compare these phred qualities: top: #$top_quality_here# bottom: #$bottom_quality_here#\n");
+			    # now you have to compare them
+			    # my $top_quality_phred = $contigs->{$current_contig}->{top_phreds}[$top_phred_position];
+			    # #t 40 875#
+			    # print("regexing #$top_quality_here#... ");
+			    $top_quality_here =~ /\w\ (\d+)\s/;
+			    my $top_quality = $1;
+			    # print("$top_quality\nregexing #$bottom_quality_here#... ");
+			    $bottom_quality_here =~ /\w\ (\d+)\s/;
+			    my $bottom_quality = $1;
+			    # print("$bottom_quality\n");
+			    # print("top_quality: $top_quality bottom quality: $bottom_quality\n");
+			    if ($bottom_quality > $top_quality) {
+				# print("Chose to take the bottom quality: $bottom_quality\n");
+				$quality = $bottom_quality;
+			    } else {
+				# print("Chose to take the top quality: $top_quality\n");
+				$quality = $top_quality;
+			    }
+			}
+			if (!$quality) {
+			    # print("Warning: no quality value for $current_contig, position $count!\n");
+			    # print("Additional data: top quality phred: $top_quality_here\n");
+			    # print("Additional data: bottom quality phred: $bottom_quality_here\n");
+			} else {
+			    $qualities[$count] = $quality;
+			}
+		    }						
+		}
+
+	    }
+	    unless (!@qualities) {
+		$self->{'contigs'}->{$current_contig}->{'quality'} = join(" ", at qualities);
+	    }
+	    $self->{'contigs'}->{$current_contig}->{'bottom_phreds'} = undef;
+	    $self->{'contigs'}->{$current_contig}->{'top_phreds'} = undef;
+	    my $count = 1;
+	}			# end foreach key
+    }
+}
+
+
+1;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Consed.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Trim.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Trim.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Trim.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,651 @@
+# $Id: Trim.pm,v 1.21.4.1 2006/10/02 23:10:32 sendu Exp $
+# Bio::Tools::Alignment::Trim.pm
+#
+# Cared for by Chad Matsalla
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code 
+
+=head1 NAME 
+
+Bio::Tools::Alignment::Trim - A kludge to do specialized trimming of
+	sequence based on quality.
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Alignment::Trim;
+  $o_trim = new Bio::Tools::Alignment::Trim;
+  $o_trim->set_reverse_designator("R");
+  $o_trim->set_forward_designator("F");
+
+
+=head1 DESCRIPTION
+
+This is a specialized module designed by Chad for Chad to trim sequences
+based on a highly specialized list of requirements. In other words, write
+something that will trim sequences 'just like the people in the lab would
+do manually'.
+
+I settled on a sliding-window-average style of search which is ugly and
+slow but does _exactly_ what I want it to do.
+
+Mental note: rewrite this.
+
+It is very important to keep in mind the context in which this module was
+written: strictly to support the projects for which Consed.pm was
+designed.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists     - About the mailing
+lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics-at-dieselwurks.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Alignment::Trim;
+
+use strict;
+use Dumpvalue;
+
+use vars qw(%DEFAULTS);
+
+use base qw(Bio::Root::Root);
+
+BEGIN {
+    %DEFAULTS = ( 'f_designator' => 'f',
+		  'r_designator' => 'r',
+                  'windowsize' => '10',
+                  'phreds' => '20');
+}
+
+=head2 new()
+
+ Title   : new()
+ Usage   : $o_trim = Bio::Tools::Alignment::Trim->new();
+ Function: Construct the Bio::Tools::Alignment::Trim object. No parameters
+	   are required to create this object. It is strictly a bundle of
+	   functions, as far as I am concerned.
+ Returns : A reference to a Bio::Tools::Alignment::Trim object.
+ Args    : (optional)
+           -windowsize (default 10)
+           -phreds (default 20)
+
+
+=cut 
+
+sub new {
+    my ($class, at args) = @_;    
+    my $self = $class->SUPER::new(@args);
+    my($windowsize,$phreds) =
+        $self->_rearrange([qw(
+                    WINDOWSIZE
+                    PHREDS
+                              )],
+                          @args);
+    $self->{windowsize} = $windowsize || $DEFAULTS{'windowsize'};
+    $self->{phreds} = $phreds || $DEFAULTS{'phreds'};
+          # print("Constructor set phreds to ".$self->{phreds}."\n") if $self->verbose > 0;
+    $self->set_designators($DEFAULTS{'f_designator'},
+			   $DEFAULTS{'r_designator'});
+    return $self;
+}
+
+=head2 set_designators($forward_designator,$reverse_designator)
+
+ Title   : set_designators(<forward>,<reverse>)
+ Usage   : $o_trim->set_designators("F","R")
+ Function: Set the string by which the system determines whether a given
+	sequence represents a forward or a reverse read.
+ Returns : Nothing.
+ Args    : two scalars: one representing the forward designator and one
+	representing the reverse designator
+
+=cut 
+
+sub set_designators {
+    my $self = shift;
+    ($self->{'f_designator'},$self->{'r_designator'}) = @_;
+}
+
+=head2 set_forward_designator($designator)
+
+ Title   : set_forward_designator($designator)
+ Usage   : $o_trim->set_forward_designator("F")
+ Function: Set the string by which the system determines if a given
+	sequence is a forward read.
+ Returns : Nothing.
+ Args    : A string representing the forward designator of this project.
+
+=cut 
+
+sub set_forward_designator {
+    my ($self,$desig) = @_;
+    $self->{'f_designator'} = $desig;
+}
+
+=head2 set_reverse_designator($reverse_designator)
+
+ Title   : set_reverse_designator($reverse_designator)
+ Function: Set the string by which the system determines if a given
+	sequence is a reverse read.
+ Usage   : $o_trim->set_reverse_designator("R")
+ Returns : Nothing.
+ Args    : A string representing the forward designator of this project.
+
+=cut 
+
+sub set_reverse_designator {
+    my ($self,$desig) = @_;
+    $self->{'r_designator'} = $desig;
+}
+
+=head2 get_designators()
+
+ Title   : get_designators()
+ Usage   : $o_trim->get_designators()
+ Returns : A string describing the current designators.
+ Args    : None
+ Notes   : Really for informational purposes only. Duh.
+
+=cut 
+
+sub get_designators {
+    my $self = shift;
+    return("forward: ".$self->{'f_designator'}." reverse: ".$self->{'r_designator'}); 
+}	
+
+=head2 trim_leading_polys()
+
+ Title   : trim_leading_polys()
+ Usage   : $o_trim->trim_leading_polys()
+ Function: Not implemented. Does nothing.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : This function is not implemented. Part of something I wanted to
+	do but never got around to doing.
+
+=cut 
+
+sub trim_leading_polys {
+	my ($self, $sequence) = @_;
+}
+
+=head2 dump_hash()
+
+ Title   : dump_hash()
+ Usage   : $o_trim->dump_hash()
+ Function: Unimplemented.
+ Returns : Nothing.
+ Args    : None.
+ Notes   : Does nothing.
+
+=cut 
+
+sub dump_hash {
+	my $self = shift;
+	my %hash = %{$self->{'qualities'}};
+} # end dump_hash
+
+=head2 trim_singlet($sequence,$quality,$name,$class)
+
+ Title   : trim_singlet($sequence,$quality,$name,$class)
+ Usage   : ($r_trim_points,$trimmed_sequence) =
+	@{$o_trim->trim_singlet($sequence,$quality,$name,$class)};
+ Function: Trim a singlet based on its quality.
+ Returns : a reference to an array containing the forward and reverse
+	trim points and the trimmed sequence.
+ Args    : $sequence : A sequence (SCALAR, please)
+	   $quality : A _scalar_ of space-delimited quality values.
+	   $name : the name of the sequence
+	   $class : The class of the sequence. One of qw(singlet
+		singleton doublet pair multiplet)
+ Notes   : At the time this was written the bioperl objects SeqWithQuality
+	and PrimaryQual did not exist. This is what is with the clumsy
+	passing of references and so on. I will rewrite this next time I
+	have to work with it. I also wasn't sure whether this function
+	should return just the trim points or the points and the sequence.
+	I decided that I always wanted both so that's how I implemented
+	it.
+     - Note that the size of the sliding windows is set during construction of
+       the Bio::Tools::Alignment::Trim object.
+
+=cut 
+
+sub trim_singlet {
+    my ($self,$sequence,$quality,$name,$class) = @_;
+    # this split is done because I normally store quality values in a
+    # space-delimited scalar rather then in an array.
+    # I do this because serialization of the arrays is tough.
+    my @qual = split(' ',$quality);
+    my @points;
+    my $sequence_length = length($sequence);
+    my ($returnstring,$processed_sequence);
+    # smooth out the qualities
+    my $r_windows = &_sliding_window(\@qual,$self->{windowsize});
+    # find out the leading and trailing trimpoints
+    my $start_base = $self->_get_start($r_windows,$self->{windowsize},$self->{phreds});
+    my (@new_points,$trimmed_sequence);
+    # do you think that any sequence shorter then 100 should be
+    # discarded? I don't think that this should be the decision of this
+    # module.
+    # removed, 020926
+    $points[0] = $start_base;
+    # whew! now for the end base
+    # required parameters: reference_to_windows,windowsize,$phredvalue,start_base
+    my $end_base = &_get_end($r_windows,$self->{windowsize},
+			     $self->{phreds},$start_base);
+    $points[1] = $end_base;
+    # now do the actual trimming
+    # CHAD : I don't think that it is a good idea to call chop_sequence here
+    # because chop_sequence also removes X's and N's and things
+    # and that is not always what is wanted
+    return \@points;
+}
+
+=head2 trim_doublet($sequence,$quality,$name,$class)
+
+ Title   : trim_doublet($sequence,$quality,$name,$class) 
+ Usage   : ($r_trim_points,$trimmed_sequence) =
+	    @{$o_trim->trim_singlet($sequence,$quality,$name,$class)};
+ Function: Trim a singlet based on its quality.
+ Returns : a reference to an array containing the forward and reverse
+ Args    : $sequence : A sequence
+	   $quality : A _scalar_ of space-delimited quality values.
+	   $name : the name of the sequence
+	   $class : The class of the sequence. One of qw(singlet
+		singleton doublet pair multiplet)
+ Notes   : At the time this was written the bioperl objects SeqWithQuality
+	and PrimaryQual did not exist. This is what is with the clumsy
+	passing of references and so on. I will rewrite this next time I
+	have to work with it. I also wasn't sure whether this function
+	should return just the trim points or the points and the sequence.
+	I decided that I always wanted both so that's how I implemented
+	it.
+
+=cut 
+
+#'
+sub trim_doublet {
+    my ($self,$sequence,$quality,$name,$class) = @_;
+    my @qual = split(' ',$quality);
+    my @points;
+    my $sequence_length = length($sequence);
+    my ($returnstring,$processed_sequence);
+          # smooth out the qualities
+    my $r_windows = &_sliding_window(\@qual,$self->{windowsize});
+          # determine where the consensus sequence starts
+    my $offset = 0;
+    for (my $current = 0; $current<$sequence_length;$current++) {
+          if ($qual[$current] != 0) {
+               $offset = $current;
+               last;
+          }
+    }
+          # start_base required: r_quality,$windowsize,$phredvalue
+    my $start_base = $self->_get_start($r_windows,$self->{windowsize},$self->{phreds},$offset);
+    if ($start_base > ($sequence_length - 100)) {
+          $points[0] = ("FAILED");
+	     $points[1] = ("FAILED");
+          return @points;
+     }
+    $points[0] = $start_base;
+         #
+         # whew! now for the end base
+         # 
+         # required parameters: reference_to_windows,windowsize,$phredvalue,start_base
+         #								    |	
+         # 010420 NOTE: We will no longer get the end base to avoid the Q/--\___/-- syndrome
+    my $end_base = $sequence_length;
+    my $start_of_trailing_zeros = &count_doublet_trailing_zeros(\@qual);
+    $points[1] = $end_base;
+          # CHAD : I don't think that it is a good idea to call chop_sequence here
+          # because chop_sequence also removes X's and N's and things
+          # and that is not always what is wanted
+     return @points;
+}				# end trim_doublet
+
+=head2 chop_sequence($name,$class,$sequence, at points)
+
+ Title   : chop_sequence($name,$class,$sequence, at points)
+ Usage   : ($start_point,$end_point,$chopped_sequence) = 
+	$o_trim->chop_sequence($name,$class,$sequence, at points);
+ Function: Chop a sequence based on its name, class, and sequence.
+ Returns : an array containing three scalars:
+	1- the start trim point
+	2- the end trim point
+	3- the chopped sequence
+ Args    :
+	   $name : the name of the sequence
+	   $class : The class of the sequence. One of qw(singlet
+		singleton doublet pair multiplet)
+	   $sequence : A sequence
+	   @points : An array containing two elements- the first contains
+		the start trim point and the second conatines the end trim
+		point.
+
+=cut
+
+sub chop_sequence {
+    my ($self,$name,$class,$sequence, at points) = @_;
+     print("Coming into chop_sequence, \@points are @points\n");
+    my $fdesig = $self->{'f_designator'};
+    my $rdesig = $self->{'r_designator'};
+    if (!$points[0] && !$points[1]) {
+	$sequence = "junk";
+	return $sequence;
+    }
+    if ($class eq "singlet" && $name =~ /$fdesig$/) {
+	$sequence = substr($sequence,$points[0],$points[1]-$points[0]);
+    }
+    elsif ($class eq "singlet" && $name =~ /$rdesig$/) {
+	$sequence = substr($sequence,$points[0],$points[1]-$points[0]);
+    }		
+    elsif ($class eq "singleton" && $name =~ /$fdesig$/) {
+	$sequence = substr($sequence,$points[0],$points[1]-$points[0]);
+    }
+    elsif ($class eq "singleton" && $name =~ /$rdesig$/) {
+	$sequence = substr($sequence,$points[0],$points[1]-$points[0]);
+    }
+    elsif ($class eq "doublet") {
+	$sequence = substr($sequence,$points[0],$points[1]-$points[0]);
+    }
+    # this is a _terrible_ to do this! i couldn't seem to find a better way
+    # i thought something like s/(^.*[Xx]{5,})//g; might work, but no go
+    # no time to find a fix!
+    my $length_before_trimming = length($sequence);
+    my $subs_Xs = $sequence =~ s/^.*[Xx]{5,}//g;
+    if ($subs_Xs) {
+	my $length_after_trimming = length($sequence);
+	my $number_Xs_trimmed = $length_before_trimming - $length_after_trimming;
+	$points[0] += $number_Xs_trimmed;
+    }
+    $length_before_trimming = length($sequence);
+    my $subs_Ns = $sequence =~ s/[Nn]{1,}$//g;
+    if ($subs_Ns) {
+	my $length_after_trimming = length($sequence);
+	my $number_Ns_trimmed = $length_before_trimming - $length_after_trimming;
+	$points[1] -= $number_Ns_trimmed;
+	$points[1] -= 1;
+    }
+     push @points,$sequence;
+     print("chop_sequence \@points are @points\n");
+    return @points;
+}
+
+=head2 _get_start($r_quals,$windowsize,$phreds,$offset)
+
+ Title   : _get_start($r_quals,$windowsize,$phreds,$offset)
+ Usage   : $start_base = $self->_get_start($r_windows,5,20);
+ Function: Provide the start trim point for this sequence.
+ Returns : a scalar representing the start of the sequence
+ Args    : 
+	$r_quals : A reference to an array containing quality values. In
+		context, this array of values has been smoothed by then
+		sliding window-look ahead algorithm.
+	$windowsize : The size of the window used when the sliding window
+		look-ahead average was calculated.
+	$phreds : <fill in what this does here>
+	$offset : <fill in what this does here>
+
+=cut 
+
+sub _get_start {
+    my ($self,$r_quals,$windowsize,$phreds,$offset) = @_;
+     print("Using $phreds phreds\n")  if $self->verbose > 0;
+          # this is to help determine whether the sequence is good at all
+    my @quals = @$r_quals;
+    my ($count,$count2,$qualsum);
+    if ($offset) { $count = $offset; } else { $count = 0; }
+          # search along the length of the sequence
+    for (; ($count+$windowsize) <= scalar(@quals); $count++) {
+               # sum all of the quality values in this window.
+          my $cumulative=0;
+          for($count2 = $count; $count2 < $count+$windowsize; $count2++) {
+               if (!$quals[$count2]) {
+                         # print("Quals don't exist here!\n");
+               }
+               else {
+                    $qualsum += $quals[$count2]; 
+                         # print("Incremented qualsum to ($qualsum)\n");
+               }
+               $cumulative++;
+          }
+               # print("The sum of this window (starting at $count) is $qualsum. I counted $cumulative bases.\n");
+               # if the total of windowsize * phreds is 
+          if ($qualsum && $qualsum >= $windowsize*$phreds) { return $count; }
+	     $qualsum = 0;
+    }
+    # if ($count > scalar(@quals)-$windowsize) { return; }
+    return $count;
+}	
+
+=head2 _get_end($r_qual,$windowsize,$phreds,$count)
+
+ Title   : _get_end($r_qual,$windowsize,$phreds,$count)
+ Usage   : my $end_base = &_get_end($r_windows,20,20,$start_base);
+ Function: Get the end trim point for this sequence.
+ Returns : A scalar representing the end trim point for this sequence.
+ Args    : 
+	$r_qual : A reference to an array containing quality values. In
+		context, this array of values has been smoothed by then
+		sliding window-look ahead algorithm.
+	$windowsize : The size of the window used when the sliding window
+		look-ahead average was calculated.
+	$phreds : <fill in what this does here>
+	$count : Start looking for the end of the sequence here.
+
+=cut 
+
+sub _get_end {
+    my ($r_qual,$windowsize,$phreds,$count) = @_;
+    my @quals = @$r_qual;
+    my $total_bases = scalar(@quals);
+    my ($count2,$qualsum,$end_of_quals,$bases_counted);
+    if (!$count) { $count=0; }
+  BASE: for (; $count < $total_bases; $count++) {
+      $bases_counted = 0;
+      $qualsum = 0;
+    POSITION: for($count2 = $count; $count2 < $total_bases; $count2++) {
+	$bases_counted++;
+
+	if ($count2 == $total_bases-1) {
+	    $qualsum += $quals[$count2];
+	    $bases_counted++;
+	    last BASE;
+	}
+	elsif ($bases_counted == $windowsize) {
+	    $qualsum += $quals[$count2];
+	    if ($qualsum < $bases_counted*$phreds) {
+		return $count+$bases_counted+$windowsize;
+	    }
+	    next BASE;
+	}
+	else {
+	    $qualsum += $quals[$count2];
+	}
+    }
+      if ($qualsum < $bases_counted*$phreds) {
+	  return $count+$bases_counted+$windowsize;
+      }
+      else { }
+      $qualsum = 0;
+  }				# end for
+    if ($end_of_quals) {
+	my $bases_for_average = $total_bases-$count2;
+	return $count2;
+    }
+    else { }
+    if ($qualsum) { } # print ("$qualsum\n");
+    return $total_bases;
+} # end get_end
+
+=head2 count_doublet_trailing_zeros($r_qual)
+
+ Title   : count_doublet_trailing_zeros($r_qual)
+ Usage   : my $start_of_trailing_zeros = &count_doublet_trailing_zeros(\@qual);
+ Function: Find out when the trailing zero qualities start.
+ Returns : A scalar representing where the zeros start.
+ Args    : A reference to an array of quality values.
+ Notes   : Again, this should be rewritten to use PrimaryQual objects.
+	A more detailed explanation of why phrap puts these zeros here should
+	be written and placed here. Please email and hassle the author.
+
+
+=cut 
+
+sub count_doublet_trailing_zeros {
+	my ($r_qual) = shift;
+	my $number_of_trailing_zeros = 0;
+	my @qualities = @$r_qual;
+	for (my $current=scalar(@qualities);$current>0;$current--) {
+		if ($qualities[$current] && $qualities[$current] != 0) {
+			$number_of_trailing_zeros = scalar(@qualities)-$current;
+			return $current+1;
+		}
+	}
+	return scalar(@qualities);
+} # end count_doublet_trailing_zeros
+
+=head2 _sliding_window($r_quals,$windowsize)
+
+ Title   : _sliding_window($r_quals,$windowsize)
+ Usage   : my $r_windows = &_sliding_window(\@qual,$windowsize);
+ Function: Create a sliding window, look-forward-average on an array
+	of quality values. Used to smooth out differences in qualities.
+ Returns : A reference to an array containing the smoothed values.
+ Args    : $r_quals: A reference to an array containing quality values.
+	   $windowsize : The size of the sliding window.
+ Notes   : This was written before PrimaryQual objects existed. They
+	   should use that object but I haven't rewritten this yet.
+
+=cut 
+
+#'
+sub _sliding_window {
+    my ($r_quals,$windowsize) = @_;
+    my (@window, at quals,$qualsum,$count,$count2,$average, at averages,$bases_counted);
+    @quals = @$r_quals;    
+    my $size_of_quality = scalar(@quals);
+          # do this loop for all of the qualities
+     for ($count=0; $count <= $size_of_quality; $count++) {
+          $bases_counted = 0;
+          BASE: for($count2 = $count; $count2 < $size_of_quality; $count2++) {
+               $bases_counted++;
+                    # if the search hits the end of the averages, stop
+                    # this is for the case near the end where bases remaining < windowsize
+               if ($count2 == $size_of_quality) {
+                    $qualsum += $quals[$count2];
+                    last BASE;
+               }				
+                    # if the search hits the size of the window
+               elsif ($bases_counted == $windowsize) {
+                    $qualsum += $quals[$count2];
+                    last BASE;
+               }
+                    # otherwise add the quality value
+               unless (!$quals[$count2]) {
+                    $qualsum += $quals[$count2];
+               }
+          }
+          unless (!$qualsum || !$windowsize) {
+              $average = $qualsum / $bases_counted;
+               if (!$average) { $average = "0"; }
+     	     push @averages,$average;
+          }
+	     $qualsum = 0;
+     }
+          # 02101 Yes, I repaired the mismatching numbers between averages and windows.
+          # print("There are ".scalar(@$r_quals)." quality values. They are @$r_quals\n");
+          # print("There are ".scalar(@averages)." average values. They are @averages\n");
+    return \@averages;
+     
+}
+
+=head2 _print_formatted_qualities
+
+ Title   : _print_formatted_qualities(\@quals)
+ Usage   : &_print_formatted_qualities(\@quals);
+ Returns : Nothing. Prints.
+ Args    : A reference to an array containing quality values.
+ Notes   : An internal procedure used in debugging. Prints out an array nicely.
+
+=cut 
+
+sub _print_formatted_qualities {
+    my $rquals = shift;
+    my @qual = @$rquals;
+    for (my $count=0; $count<scalar(@qual) ; $count++) {
+	if (($count%10)==0) { print("\n$count\t"); }
+	if ($qual[$count]) { print ("$qual[$count]\t");}
+	else { print("0\t"); }
+    }
+    print("\n");
+}
+
+=head2 _get_end_old($r_qual,$windowsize,$phreds,$count)
+
+ Title   : _get_end_old($r_qual,$windowsize,$phreds,$count)
+ Usage   : Deprecated. Don't use this!
+ Returns : Deprecated. Don't use this!
+ Args    : Deprecated. Don't use this!
+
+=cut 
+
+#'
+sub _get_end_old {
+    my ($r_qual,$windowsize,$phreds,$count) = @_;
+    warn("Do Not Use this function (_get_end_old)");
+    my $target = $windowsize*$phreds;
+    my @quals = @$r_qual;
+    my $total_bases = scalar(@quals);
+    my ($count2,$qualsum,$end_of_quals);
+    if (!$count) { $count=0; }
+  BASE: for (; $count < $total_bases; $count++) {
+      for($count2 = $count; $count2 < $count+$windowsize; $count2++) {
+	  if ($count2 == scalar(@quals)-1) {
+	      $qualsum += $quals[$count2];
+	      $end_of_quals = 1;
+	      last BASE;
+
+	  }
+	  $qualsum += $quals[$count2];
+      }
+      if ($qualsum < $windowsize*$phreds) {
+	  return $count+$windowsize;
+      }
+      $qualsum = 0;
+  }				# end for
+}				# end get_end_old
+
+
+# Autoload methods go after =cut, and are processed by the autosplit program.
+
+1;
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Alignment/Trim.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA/ESEfinder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA/ESEfinder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/DNA/ESEfinder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,336 @@
+# $Id: ESEfinder.pm,v 1.12.4.3 2006/11/08 17:25:55 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Analysis::DNA::ESEfinder
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::DNA::ESEfinder - a wrapper around ESEfinder
+server
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Analysis::DNA::ESEfinder;
+  use strict;
+
+  my $seq; # a Bio::PrimarySeqI or Bio::SeqI object
+
+  $seq = Bio::Seq->new(
+       -primary_id => 'test',
+       -seq=>'atgcatgctaggtgtgtgttttgtgggttgtactagctagtgat'.
+       -alphabet=>'dna');
+
+  my $ese_finder = Bio::Tools::Analysis::DNA::ESEfinder->
+      new(-seq => $seq);
+
+  # run ESEfinder prediction on a DNA sequence
+  $ese_finder->run();
+
+  die "Could not get a result"
+      unless $ese_finder->status =~ /^COMPLETED/;
+
+  print $ese_finder->result;      # print raw prediction to STDOUT
+
+  foreach my $feat ( $ese_finder->result('Bio::SeqFeatureI') ) {
+
+      # do something to SeqFeature
+      # e.g. print as GFF
+      print $feat->gff_string, "\n";
+      # or store within the sequence - if it is a Bio::SeqI
+      $seq->add_SeqFeature($feat)
+
+  }
+
+=head1 DESCRIPTION
+
+This class is a wrapper around the ESEfinder web server which uses
+experimentally defined scoring matrices to identify possible exonic
+splicing enhancers in human transcripts.
+
+The results can be retrieved in 4 ways.
+
+=over 4
+
+=item 1.
+
+C<$ese_finder-E<gt>result('')> retrieves the raw text output of the
+program
+
+=item 2.
+
+C<$ese_finder-E<gt>result('all')> returns a Bio::Seq::Meta::Array object
+with prediction scores for all residues in the sequence
+
+
+=item 3.
+
+C<$ese_finder-E<gt>result('Bio::SeqFeatureI')> returns an array of
+Bio::SeqFeature objects for sequences with significant scores. Feature
+tags are score, motif, SR_protein and method
+
+=item 4.
+
+C<$ese_finder-E<gt>result('raw')> returns an array of significant matches
+with each element being a reference to [SR_protein, position, motif, 
+score]
+
+=back
+
+See L<http://exon.cshl.org/ESE/index.html>
+
+This the second implentation of Bio::SimpleAnalysisI which hopefully
+will make it easier to write wrappers on various services. This class
+uses a web resource and therefore inherits from L<Bio::WebAgent>.
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+#should have own 
+
+package Bio::Tools::Analysis::DNA::ESEfinder;
+
+use Data::Dumper;
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw (POST);
+use HTML::HeadParser;
+use Bio::SeqFeature::Generic;
+use Bio::Seq::Meta::Array;
+use Bio::WebAgent;
+use strict;
+
+#inherits directly from SimpleAnalysisBase
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+
+#global vars are now file-scoped lexicals
+
+my $URL = 'http://rulai.cshl.org/cgi-bin/tools/ESE/esefinder.cgi';
+	
+my $ANALYSIS_NAME = 'ESEfinder';
+
+
+my $ANALYSIS_SPEC =
+    {
+     'name' => 'ESEfinder',
+     'type' => 'DNA', #compulsory entry as is used for seq checking
+     'version' => '2.0',
+     'supplier' => 'Krainer lab, Cold Spring Harbor Laboratory, POBOX100, Bungtown Rd, COld Spring Harbor, NY, USA',
+     'description' => 'to identify exonic splicing elements in human transcripts',
+    };
+
+my $INPUT_SPEC =
+    [{
+      'mandatory' => 'true',
+      'type' => 'Bio::PrimarySeqI',
+      'name' => 'sequence',
+     }];
+
+my $RESULT_SPEC =
+    {
+     '' => 'bulk',  # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     'raw' => 'Array of [ SR_protein, position, motif, score]',
+     'all' => 'Bio::Seq::Meta::Array object'
+    };
+
+
+### unique to this module ##
+sub _init {
+    ## fills in fixed data for class ##
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
+    $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} =$ANALYSIS_NAME;
+    return $self;
+}
+
+
+sub _run {
+    my $self  = shift;
+    my $seq_fasta;
+    my $stringfh = new IO::String($seq_fasta);
+    my $seqout = new Bio::SeqIO(-fh => $stringfh,
+                                -format => 'fasta');
+    $seqout->write_seq($self->seq);
+    $self->debug($seq_fasta);
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+
+    my $request = POST $self->url,
+        #Content_Type => 'x-www-form-urlencoded',
+            Content  => [
+                         protein1 => 1,
+                         protein2 => 1,
+                         protein3 => 1,
+                         protein4 => 1,
+                         radio_sf2 => 0,
+                         radio_sc35 => 0,
+                         radio_srp40 => 0,
+                         radio_srp55 => 0,
+                         sequence =>$seq_fasta,
+                        ];
+    my $content = $self->request($request);
+    if( $content->is_error  ) {
+	$self->throw(ref($self)." Request Error:\n".$content->as_string);
+    }
+
+    my $text = $content->content; #1st reponse
+    my ($tmpfile) = $text =~ /value="(tmp\/.+txt)"/;
+    # now get data for all residues #
+    my $rq2 = POST 'http://rulai.cshl.org/cgi-bin/tools/ESE/resultfile.txt',
+        #Content_Type => 'x-www-form-urlencoded',
+            Content => [
+                        fname => $tmpfile,
+                       ];
+    my $ua2 = Bio::WebAgent->new();
+    my $content2 = $ua2->request($rq2);
+    if( $content2->is_error  ) {
+	$self->throw(ref($self)." Request Error:\n".$content2->as_string);
+    }
+
+    my $text2 = $content2->content;
+    $self->{'_result'} = $text2;		
+    $self->status('COMPLETED') if $text2 ne '';
+
+    #print Dumper $response;
+}
+
+
+sub result {
+
+    #make sec feat of above threshold scores #
+
+    my ($self,$value) = @_;
+
+    my @sig_pdctns;
+    my @fts;
+
+    if ($value ) {
+	my $result = IO::String->new($self->{'_result'});
+        my $current_SR;
+        my $all_st_flag = 0;
+        my %all;
+        while (my $line = <$result>) {
+            #make array of all scores or threshold depending on $value
+            last if $line =~ /^All scores/ && $value ne 'all' or $line =~ /2001,/;
+            $all_st_flag++ if $line =~ /All scores/;
+            next if $value eq 'all' && $all_st_flag == 0;
+
+            #parse line
+            if ($line =~ /^Protein/) {
+                ($current_SR) = $line =~/:\s+(\S+)/;
+                $current_SR =~ s{/}{_}; # remove unallowed charcters from hash
+            }
+            if ( $line =~/^\d+/ && $value ne 'all') {
+                push @sig_pdctns, [$current_SR, split /\s+/, $line] ;
+            } elsif ($line =~ /^\d+/) {
+
+                push @{$all{$current_SR}}, [split /\s+/, $line];
+            }
+        }
+
+        if ($value eq 'Bio::SeqFeatureI') {
+            foreach (@sig_pdctns) {
+                #make new ese object for each row of results
+                push @fts, Bio::SeqFeature::Generic->new
+                    (
+                     -start => $_->[1],
+                     -end => $_->[1] + length($_->[2]) -1,
+                     -source => 'ESEfinder',
+                     -primary => 'ESE',
+                     -tag =>{
+                             score =>$_->[3],
+                             motif=> $_->[2],
+                             SR_protein=> $_->[0],
+                             method=> 'ESEfinder',
+                            },
+                    );
+            }
+            return @fts;
+        }
+        ## convert parsed data into a meta array format
+        elsif ($value eq 'all') {
+            bless ($self->seq, "Bio::Seq::Meta::Array");
+            $self->seq->isa("Bio::Seq::MetaI")
+                || $self->throw("$self is not a Bio::Seq::MetaI");
+
+            for my $prot (keys %all) {
+                my @meta;
+                my $len =  scalar @{$all{$prot}} ;
+                for (my $i = 0; $i < $len; $i++ ) {
+                    $meta[$i] = $all{$prot}[$i][2];
+                }
+
+                # assign default name here so that the
+                # Bio::Seq::Meta::Array can work for all classes
+                # implementing it and we can avoid having to make
+                # asubclass for each implementation
+
+                $Bio::Seq::Meta::Array::DEFAULT_NAME = "ESEfinder_SRp55";
+                my $meta_name = $self->analysis_spec->{'name'} . "_" . "$prot";
+                $self->seq->named_meta($meta_name,\@meta );
+            }
+            # return  seq array object implementing meta sequence #
+            return $self->seq;
+
+        }
+		#return ref to array of arrays
+        return \@sig_pdctns;
+    }
+    return $self->{'_result'};
+}
+
+
+1;
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Domcut.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Domcut.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Domcut.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,358 @@
+# $Id: Domcut.pm,v 1.0 2003/07/ 11
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::Domcut
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::Domcut -  a wrapper around Domcut server
+
+=head1  SYNOPSIS
+
+  use   Bio::Tools::Analysis::Protein::Domcut;
+  #get a  Bio::PrimarySeq
+  use Bio::PrimarySeq;
+  my $seq = new Bio::PrimarySeq
+     (-seq=>'IKLCVNLAILAKAHLIELALAL',
+     -primary_id=>'test'); # a Bio::PrimarySeqI object
+
+  my $domcut = Bio::Tools::Analysis::Protein::Domcut->new (-seq=>$seq);
+  $domcut->run;
+  print $domcut->result;# #raw text to standard out
+
+=head1  DESCRIPTION
+
+A module to remotely retrieve predictions of protein domain
+boundaries.  Each residue in the protein receives a score, those
+better than the significance threshold and at a local minimum receive
+a rank - i.e., the best minimum is rank 1, the second best minimum is
+rank2 etc. These correspond to domain boundaries.  e.g.,
+
+  my $analysis_object = Bio::Tools::Analysis::Protein::Domcut->new
+     (-seq => $seq);
+
+creates a new object. The sequence supplied must be a Bio::PrimarySeq and not
+a Bio::Seq object. 
+
+  $analysis_object->run;
+
+submits the query to the server and obtains raw text output
+
+Given an amino acid sequence the results can be obtained in 4 formats,
+determined by the argument to the result method
+
+=over 4
+
+=item 1
+
+The raw text of the program output
+
+  my $rawdata = $analysis_object->result;
+
+=item 2
+
+A reference to an array of hashes of scores for each state and the
+assigned state. Each element in the array is a residue (indexed from 0).
+
+  my $data_ref = $analysis_object->result('parsed');
+  print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
+  print "predicted struc  at residue 2 is $data_ref->[1]{'struc}\n";
+
+=item 3
+
+An array of Bio::SeqFeature::Generic objects where each feature is a
+predicted unit of secondary structure. Only stretches of helix/sheet
+predictions for longer than 4 residues are defined as helices.
+So, in order to add features to an existing Bio::Seq object;
+
+  # get a Bio::Seq object
+  my $seqobj;
+  my $tool = Bio::Tools::Analysis::Protein::Domcut->new
+      ( -seq => $seqobj->primary_seq);
+  $tool->run;
+
+  my @fts = $tool->result(Bio::SeqFeatureI);
+
+  $seqobj->add_SeqFeature(@fts);
+
+  # if you want  meta sequences as well :
+  my $meta = $tool->result('meta');
+  $seqobj->primary_seq($meta);
+
+  # can access meta data in a Bio::Seq object via a 
+  # call to primary_seq:
+
+  print $seq4->primary_seq->named_submeta_text('Domcut', 1,2), "\n";
+
+=item 4
+
+A Bio::Seq::Meta::Array implementing sequence.
+
+This is a Bio::Seq object that can also hold data about each residue
+in the sequence. In this case, the sequence can be associated with a
+single array of Domcut prediction scores.  e.g.,
+
+  my $meta_sequence = $analysis_object->result('meta');
+  print "scores from residues 10 -20 are ",
+      $meta_sequence->submeta_text(10,20), "\n";
+
+Many methods common to all analyses are inherited from
+Bio::Tools::Analysis::SimpleAnalysisBase.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::Tools::Analysis::SimpleAnalysisBase>, 
+L<Bio::Seq::Meta::Array>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+
+=cut
+
+
+use strict;
+package Bio::Tools::Analysis::Protein::Domcut;
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw(GET);
+use Bio::SeqFeature::Generic;
+use Bio::Seq::Meta::Array;
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+my $URL = 'http://www.Bork.EMBL-Heidelberg.DE/Docu/mikita/domplot.cgi?';
+my $ANALYSIS_NAME = 'Domcut';
+my $ANALYSIS_SPEC =
+    {
+     'name'        => 'Domcut',
+     'type'        => 'protein', #compulsory entry as is used for seq checking
+     'version'     => 'n/a',
+     'supplier'    => 'Ohara lab, Laboratory of DNA technology, 
+                       Kazusa DNA Research Institute, 1532-3 Yana,
+                       Kisarazu, Japan',
+     'description' => 'to predict domain boundaries in proteins',
+     'reference'   => 'Bioinformatics 19, 673-674 (2003)',
+    };
+
+
+my $INPUT_SPEC =
+    [
+     {
+      'mandatory' => 'true',
+      'type'      => 'Bio::PrimarySeqI',
+      'name'      => 'seq',
+     },
+    ];
+
+my  $RESULT_SPEC =
+    {
+     ''                 => 'bulk',              # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     'parsed'           => "Array of {'score' =>, 'rank'=> ]",
+     'meta'             => 'Bio::Seq::Meta::Array object'
+    };
+
+=head2 result
+
+ Name    : result
+ Purpose : To retrieve results of analysis in one of several formats.
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : various - see keysin $RESULT_SPEC. 
+
+The method returns a result of an executed job. If the job was
+terminated by an error the result may contain an error message instead
+of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defines the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>. Tagnames are 'score' 
+and 'rank'.
+
+=item 'parsed'
+
+Array of array references of [score, rank].
+
+=item 'all'
+
+A Bio::Seq::Meta::Array object. Scores can be accessed using methods
+from this class. Meta sequence name is Domcut.
+
+=back
+
+=cut
+
+
+sub result {
+    my ($self,$value) = @_;
+    my @scores;
+    my @fts;
+
+    if ($value ) {
+        # parse raw text if not already done so
+        if (!exists($self->{'_parsed'})) {
+            my $result = IO::String->new($self->{'_result'});
+            while (my $line = <$result>) {
+                next if $line =~/#/;
+                $line =~/(\-?\d\.\d+)\s+(\d+)?/;
+                push @scores, {score => $1,
+                               rank  => ($2)?$2:'' ,
+                              };
+            }
+            #hold parsed results in object, saves having to reparse each time
+            $self->{'_parsed'} = \@scores;
+        }
+        #make aarray of Bio::SeqFeature::Generic objects
+        if ($value eq 'Bio::SeqFeatureI') {
+            my $i = 0;          #array index (= aa num -1)
+			my $in_trough = 0;
+			my ($st, $end, $rank, $min_score, $min_locus) = (0,0,0,0,0);
+			my $seqlen = $self->seq->length();
+            for my $score (@{$self->{'_parsed'}}) {
+
+				##start a potential trough
+				if ($in_trough == 0 && $score->{'score'} < -0.09) {
+					$in_trough = 1;
+					$st        = $i+1;
+					}
+
+				## in a trough, is it ranked?
+				elsif ( $in_trough == 1 && $score->{'score'} < -0.09 && $i +1 < $seqlen){
+					if ($score->{'rank'} ) {
+						$rank      = $score->{'rank'};
+						$min_score = $score->{'score'};
+					    $min_locus = $i + 1;
+						}
+				}
+							
+				## end of trough or end of sequence, make into feature
+                ## if possible
+				elsif ($in_trough == 1 && ($score->{'score'} > -0.09 ||
+						 $i +1  == $seqlen) ){
+					if ($rank != 0) {
+                    	push @fts, Bio::SeqFeature::Generic->new (
+                         	-start   => $st,
+                            -end     => $i +1, #current position
+                         	-primary => 'Linker',
+							-source  => 'Domcut',
+                         	-tag => {
+                                  score   => $min_score,
+                                  rank    => $rank,
+								  residue => $min_locus,
+                                 },
+                        );
+					}
+					##and reset parameters ##
+					($st, $in_trough, $min_locus, $min_score, $rank) = (0,0,0,0,0);
+                }
+                $i++;
+            }
+            return @fts;
+        }
+        ## convert parsed data into a meta array format
+        elsif ($value eq 'meta') {
+
+			## only need to bless  once
+			if (! $self->seq->isa("Bio::Seq::MetaI")){
+				bless ($self->seq, "Bio::Seq::Meta::Array");
+				}
+            $self->seq->isa("Bio::Seq::MetaI")
+                || $self->throw("$self is not a Bio::Seq::MetaI");
+            my $meta_name = "Domcut";
+
+            #test that sequence does not have already a meta seq with same name
+            if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
+                $self->warn ("$meta_name already exists , not overwriting!");
+                next;
+            }
+
+            ### or should be an instance variable?? ##
+            $Bio::Seq::Meta::Array::DEFAULT_NAME = 'Domcut';
+            my @meta = map{$_->{'score'}} @{$self->{'_parsed'}};
+            $self->seq->named_meta($meta_name,\@meta );
+
+            # return  seq array object implementing meta sequence #
+            return $self->seq;
+
+        }
+        #       return ref to array of predictions;
+        elsif ($value eq 'parsed') {
+            return $self->{'_parsed'};
+        }
+    }
+    #else if no arguments return raw text
+    return $self->{'_result'};
+}
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'}    = $INPUT_SPEC;
+    $self->{'_RESULT_SPEC'}   = $RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
+    return $self;
+}
+
+sub _run {
+    my $self  = shift;
+    my $seq_fasta = $self->seq->seq;
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+    my $rqst = GET $self->url . "&seqnam=". "&sequence=".
+                      $seq_fasta. "&outform=dat";
+
+    my $content = $self->request($rqst);
+    my $text = $content->content; #1st reponse
+    $self->{'_result'} = $text;
+    $self->status('COMPLETED') if $text ne '';
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/ELM.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/ELM.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/ELM.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,372 @@
+# $Id: ELM.pm,v 1.12.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::ELM
+#
+# Cared for by Richard Adams <richard.adams at ed.ac.uk>
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1     NAME
+
+Bio::Tools::Analysis::Protein::ELM - a wrapper around the ELM server which predicts short functional motifs on amino acid sequences
+
+=head1     SYNOPSIS
+
+  # get a Bio::Seq object to start with, or a Bio::PrimaryI object.
+
+  my $tool = Bio::Tools::Analysis::Protein::ELM->
+      new(seq => $seqobj->primary_seq() );	
+  $tool->compartment(['ER', 'Golgi']);
+  $tool->species(9606);
+  $tool->run;
+  my @fts = $tool->Result('Bio::SeqFeatureI');
+  $seqobj->addSeqFeature(@fts);
+
+=head1    DESCRIPTION
+
+This module is a wrapper around the ELM server L<http://elm.eu.org/>
+which predicts short functional motifs on amino acid sequences. 
+
+False positives can be limited by providing values for the species
+and cellular compartment of the protein. To set the species attribute,
+use either a L<Bio::Species> object or an NCBI taxon ID number.  To set
+the cell compartment attribute (any number of compartments can be
+chosen) use an array reference to a list of compartment names.
+
+Results can be obtained either as raw text output, parsed into a
+data structure, or as Bio::SeqFeature::Generic objects.
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+use strict;
+package Bio::Tools::Analysis::Protein::ELM;
+use vars qw(%cc);
+use HTML::HeadParser;
+use Bio::SeqFeature::Generic;
+use HTTP::Request::Common qw(POST);
+use IO::String;
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+## valid cell compartments ##
+%cc = (
+      all            => 1,
+      nucleus        => 'GO:0005634',
+      extracellular  => 'GO:0005576',
+      cytoplasm      => 'GO:0005737',
+      peroxisome     => 'GO:0005777',
+      glycosome      => 'GO:0020015',
+      glyoxisome     => 'GO:0009514',
+      golgi          => 'GO:0005794',
+      er             => 'GO:0005783',
+      lysosome       => 'GO:0005764',
+      endosome       => 'GO:0005768',
+      plasma_membrane=> 'GO:0005886',
+		);
+
+my $URL           = 'http://elm.eu.org/basicELM/cgimodel.py';
+my $ANALYSIS_NAME = 'ELM';
+my $INPUT_SPEC    =
+    [
+     {
+      'mandatory' => 'true',
+      'type'      => 'Bio::PrimarySeqI',
+      'name'      => 'seq',
+     },
+     {
+      'mandatory' => 'false',
+      'type'      => 'taxon_id or Bio::Species object',
+      'name'      => 'species',
+      'default'   => '9606',
+     },
+     {
+      'mandatory' => 'false',
+      'type'      => 'string',
+      'name'      => 'compartment',
+      'default'   => [1],
+     },
+    ];
+
+my  $RESULT_SPEC =
+    {
+     ''                 => 'bulk',              # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     'parsed'           => '{motif1_name=>{locus=>[],
+					   peptide=>[],
+					   regexp=>[]
+					  },
+			    }',
+    };
+my $ANALYSIS_SPEC= {name        => 'ELM',
+				    type        => 'Protein',
+                    version     => 'n/a',
+                    supplier    =>'BioComputing Unit, EMBL',
+					description =>'Prediction of linear functional motifs
+                                  in proteins',
+					reference   => 'NAR, 31:3625-3630'};
+
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'}    = $INPUT_SPEC;
+    $self->{'_RESULT_SPEC'}   = $RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
+    return $self;
+}
+
+=head2        compartment
+
+ name        : compartment
+ usage       : $elm->compartment(['golgi', 'er']);
+ purpose     : get/setter for cell compartment specifications
+ arguments   : None, single compartment string or ref to array of
+               compartment names.
+ returns     : Array of compartment names (default if not previously set).
+
+=cut
+
+sub compartment {
+
+    my ($self, $arg) = @_;
+    if ($arg) {
+
+        # convert to array ref if not one already
+	if (ref ($arg) ne 'ARRAY') {
+            $arg = [$arg];
+	}
+ 
+        ## now add params if valid
+	for my $param (@$arg) {
+            if (exists($cc{lc($param)})) {
+                push @{$self->{'_compartment'}} , $cc{$param};
+            } else {
+                $self->warn("invalid argument ! Must be one of " .
+                            join "\n", keys %cc );
+            }
+        }                       #end of for loop
+			
+    }                           #endif $arg
+    return defined($self->{'_compartment'})? $self->{'_compartment'}
+        : $self->input_spec()->[2]{'default'};
+
+}
+
+=head1      species
+
+ name      : species
+ usage     : $tool->species('9606');
+ purpose   : get/setter for species selction for ELM server
+ arguments : none, taxon_id or Bio::Species object
+ returns   : a string of the ncbi taxon_id
+
+=cut
+
+sub species {
+    my ($self, $arg) = @_;
+
+    if ($arg) {
+        if (ref($arg) && $arg->isa('Bio::Species')) {
+            $self->{'_species'} = $arg->ncbi_taxid();
+        } elsif ($arg =~ /^\d+$/) {
+            $self->{'_species'} = $arg;
+        } else {
+            $self->warn("Argument must be a Bio::Species object or ". 
+                        " an integer NCBI taxon id. ");
+        }
+    }                           #end if $arg
+    return defined($self->{'_species'})?$self->{'_species'}
+        :$self->input_spec()->[1]{'default'};
+	
+}
+
+sub  _run {
+    my $self  = shift;
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    #$self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+	
+    #### this deals with being able to submit multiple checkboxed
+    #### slections
+
+    #1st of all make param array
+    my @cc_str;
+    my @cmpts = @{$self->compartment()};
+    for (my $i = 0; $i <= $#cmpts ; $i++) {
+        splice @cc_str, @cc_str, 0,   'userCC',$cmpts[$i];
+    }
+    my %h = (swissprotId      => "",
+             sequence         => $self->seq->seq,
+             userSpecies      => $self->species,
+             typedUserSpecies => '',
+             fun              => "Submit");
+    splice (@cc_str, @cc_str,0, ( map{$_, $h{$_}} keys %h));
+
+		
+    my $request = POST $self->url(),
+        Content_Type => 'form-data',
+            Content  => \@cc_str;
+    $self->debug( $request->as_string);
+    my $r1 = $self->request($request);
+    if ( $r1->is_error  ) {
+	$self->warn(ref($self)." Request Error:\n".$r1->as_string);
+	return;
+    }
+
+    my $text = $r1->content;
+    my ($url) = $text =~ /URL=\S+(fun=\S+r=\d)/s; 
+    #$url =~ s/amp;//g ;
+    my ($resp2);
+    $url = $URL . "?" .$url;
+    while (1) {
+	my $req2 = HTTP::Request->new(GET=>$url);
+	my $r2 = $self->request ($req2);
+	if ( $r2->is_error ) {
+	    $self->warn(ref($self)." Request Error:\n".$r2->as_string);	    
+	    return;
+	} 
+	$resp2 = $r2->content();
+	
+	if ($resp2 !~ /patient/s) {
+	    $self->status('COMPLETED');
+	    $resp2=~ s/<[^>]+>/ /sg;
+            $self->{'_result'} = $resp2;
+	    return;
+	} else {
+	    print "." if $self->verbose > 0;
+	    $self->sleep(1);
+	}
+    }
+}
+
+=head1      result
+
+ name      : result
+ usage     : $tool->result('Bio::SeqFeatureI');
+ purpose   : parse results into sequence features or basic data format
+ arguments : 1. none    (retrieves raw text without html)
+             2. a value (retrieves data structure)
+             3. 'Bio::SeqFeatureI' (returns array of sequence features)
+                tag names are : {method => 'ELM', motif => motifname,
+                                 peptide => seqeunce of match,
+                                 concensus => regexp of match}.
+ returns   : see arguments. 
+
+=cut
+
+sub result {
+    my ($self, $val) = @_;
+    if ($val) {
+        if (!exists($self->{'_parsed'}) ) {
+            $self->_parse_raw();
+        }
+        if ($val eq 'Bio::SeqFeatureI') {
+            my @fts;
+            for my $motif (keys %{$self->{'_parsed'}}) {
+                for (my $i = 0; $i< scalar @{$self->{'_parsed'}{$motif}{'locus'}};$i++) {
+                    my ($st, $end) = split /\-/, $self->{'_parsed'}{$motif}{'locus'}[$i];
+                    push @fts, Bio::SeqFeature::Generic->new 
+                        (
+                         -start       => $st,
+                         -end         => $end,
+                         -primary_tag => 'Domain',
+                         -source      => 'ELM',
+                         -tag   => {
+                                    method    => 'ELM',
+                                    motif     => $motif,
+                                    peptide   => $self->{'_parsed'}{$motif}{'peptide'}[$i],
+                                    concensus => $self->{'_parsed'}{$motif}{'regexp'}[0],
+                                   });
+                }
+            }
+            return @fts;
+        }                       #end if BioSeqFeature
+        return $self->{'_parsed'};
+    }                           #endif ($val)
+    return $self->{'_result'};
+}
+
+## internal sub to parse raw data into internal data structure which is cached.
+sub _parse_raw {
+    my $self = shift;
+    my $result = IO::String->new($self->{'_result'});
+    my $in_results = 0;
+    my $name;
+    my %results;
+    my $last;
+    while (my $l = <$result>) {
+        next unless  $in_results > 0 ||$l =~ /^\s+Elm\s+Name\s+Instances/;
+        $in_results++;          #will be set whnstart of results reached.
+        last if $l =~ /List of excluded/;
+        next unless $in_results >1;
+
+        my @line_parts = split /\s+/, $l;
+        shift @line_parts;
+        ## if result has motif name on 1 line
+        if (scalar @line_parts == 1 && $line_parts[0]=~ /^\s*(\w+_\w+)/) {
+            $name = $1;
+            next;
+        }
+        ## else if is line with loci /seq matches
+        elsif (@line_parts > 1) {
+            my $index = 0;      ## array index
+            my $read_loci = 0;  ## flag to know that loci are being read
+            while ($index <= $#line_parts) {
+                my $word = $line_parts[$index++];
+                if ($read_loci ==0 && $word =~/_/) {
+                    $name = $word;
+                } elsif ($read_loci == 0 && $word =~ /^\w+$/ ) {
+	            push @{$results{$name}{'peptide'}}, $word;
+                } elsif ($word =~ /\d+\-\d+/) {
+                    $read_loci = 1;
+                    push @{$results{$name}{'locus'}}, $word;
+                } else {        ## only get here if there are elements
+                    last;
+                }
+            }                   #end of while
+            push @{$results{$name}{'regexp'}}, $line_parts[$#line_parts];
+        }                       #end of elsif
+
+    }                           #end of while
+
+    $self->{'_parsed'} = 	\%results;
+}
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/ELM.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/GOR4.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/GOR4.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/GOR4.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,365 @@
+# $Id: GOR4.pm,v 1.0 2003/07/ 11
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::GOR4
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1  NAME
+
+Bio::Tools::Analysis::Protein::GOR4 - a wrapper around GOR4 protein
+secondary structure prediction server
+
+=head1  SYNOPSIS
+
+  use Bio::Tools::Analysis::Protein::GOR4;
+  #get a Bio::Seq or Bio::PrimarySeq
+  use Bio::PrimarySeq;
+  $seq = new Bio::PrimarySeq
+    (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
+     -primary_id=>'test'); # a Bio::PrimarySeqI object
+
+  my $gor4 = Bio::Tools::Analysis::Protein::GOR4->new (-seq=>$seq);
+  $gor4->run;
+  print $gor4->result;# #raw text to standard error
+
+=head1  DESCRIPTION
+
+A module to remotely retrieve predictions of protein secondary
+structure.  Each residue in the protein receives a score representing
+the likelihood of existing in each of three different states (helix,
+coil or sheet), e.g.,
+
+  my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::GOR4->
+      new(-seq => $seq);
+
+creates a new object
+
+  $analysis_object->run;
+
+submits the query to the server and obtains raw text output
+
+Given an amino acid sequence the results can be obtained in 4 formats,
+determined by the argument to the result method
+
+=over 4
+
+=item 1
+
+The raw text of the program output
+
+  my $rawdata = $analysis_object->result;
+
+=item 2
+
+An reference to an array of hashes of scores for each state and the
+assigned state.
+
+  my $data_ref = $analysis_object->result('parsed');
+  print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
+  print "predicted struc  at residue 2 is $data_ref->[1]{'struc}\n";
+
+=item 3
+
+An array of Bio::SeqFeature::Generic objects where each feature is a
+predicted unit of secondary structure. Only stretches of helix/sheet
+predictions for longer than 4 residues are defined as helices. See 
+Bio::Tools::Analysis::Domcut.pm for examples of how to add sequence
+features.
+
+  my @fts = $analysis_object->result(Bio::SeqFeatureI);
+  for my $ft (@fts) {
+      print " From ",  $ft->start, " to  ",$ft->end, " struc: " ,
+             ($ft->each_tag_value('type'))[0]  ,"\n";
+  }
+
+=item 4
+
+A Bio::Seq::Meta::Array implementing sequence.
+
+This is a Bio::Seq object that can also hold data about each residue
+in the sequence In this case, the sequence can be associated with a
+single array of GOR4 prediction scores.  e.g.,
+
+  my $meta_sequence = $analysis_object->result('all');
+  print "helix scores from residues 10-20 are ",
+      $meta_sequence->named_submeta_text("GOR4_helix",10,20), "\n";
+
+Meta sequence names are : GOR4_helix, GOR4_sheet, GOR4_coil,
+GOR4_struc, representing the scores for each residue.
+
+Many methods common to all analyses are inherited from
+Bio::Tools::Analysis::SimpleAnalysisBase.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::Tools::Analysis::SimpleAnalysisBase>, 
+L<Bio::Seq::Meta::Array>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk,
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+use strict;
+
+package Bio::Tools::Analysis::Protein::GOR4;
+
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw(POST);
+use Bio::SeqFeature::Generic;
+use Bio::Seq::Meta::Array;
+
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+use constant MIN_STRUC_LEN => 3;
+my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_gor4.pl';
+my $ANALYSIS_NAME = 'GOR4';
+my $ANALYSIS_SPEC = {name => 'Gor4', type => 'Protein'};
+my $INPUT_SPEC    = [
+                     {mandatory =>'true',
+                      type      => 'Bio::PrimarySeqI',
+                      'name'    => 'seq',
+                  },
+                 ];
+my  $RESULT_SPEC =
+    {
+     ''                 => 'bulk',              # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     raw                => '[ {struc =>, helix=> ,sheet=>, coil=>}]',
+     meta                => 'Bio::Seq::Meta::Array object',
+    };
+
+=head2 result
+
+ Name    : result
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : see keys of $RESULT_SPEC
+
+The method returns a result of an executed job. If the job was
+terminated by an error the result may contain an error message instead
+of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defines the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.  Feature primary
+tag is "2ary".  Feature tags are "type" (which can be helix, sheet or
+coil) "method" (GOR4).
+
+=item 'parsed'
+
+Array of hash references of { helix =E<gt>, sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
+
+=item 'meta'
+
+A Bio::Seq::Meta::Array object. Scores can be accessed using methods
+from this class. Meta sequence names are GOR4_helix, GOR4_sheet,
+GOR4_coil, GOR4_struc.
+
+
+=back
+
+
+=cut
+
+
+sub result {
+    my ($self,$value) = @_;
+
+    my @scores;
+    my @fts;
+
+    if ($value ) {
+        #parse into basic raw form, store this as well as '_result'
+        if (!exists($self->{'_parsed'}) ) {
+            my $result = IO::String->new($self->{'_result'});
+            while (my $line = <$result>) {
+                next unless $line =~ /^\w\s/; # or for sopma/hnn  /^[A-Z]\s/
+                $line =~/(\w)\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
+                push @scores, { struc => $1,
+                                helix => $2,
+                                sheet => $3,
+                                coil  => $4,
+                              };
+            }
+            $self->{'_parsed'} = \@scores;
+        }
+        if ($value eq 'Bio::SeqFeatureI') {
+            $self->_get_2ary_coords();
+            for my $type (keys %{$self->{'_parsed_coords'}} ) {
+                next if $type =~  /\w{2,}/; #if not H,C,E or T
+                for my $loc (@{$self->{'_parsed_coords'}{$type}} ) {
+                    push @fts, Bio::SeqFeature::Generic->new
+                        (-start   => $loc->{'start'},
+                         -end     => $loc->{'end'},
+                         -source  => 'GOR4',
+                         -primary => 'Region',
+                         -tag => {
+                                  type => $type,
+                                  method => $self->analysis_name,
+                                 });
+                }               #end of array of strucs of type
+            }                   # end of all 2nd struc elements
+            delete $self->{'_parsed_coords'}; #remove temp data
+            return @fts;
+        }                       #endif BioSeqFeature
+
+        elsif ($value eq 'meta') {
+            #1st of all make 3 or 4 arrays of scores for each type from column data
+            my %type_scores;
+            for my $aa (@{$self->{'_parsed'}}) {
+                push @{$type_scores{'struc'}}, $aa->{'struc'};
+                push @{$type_scores{'helix'}}, $aa->{'helix'};
+                push @{$type_scores{'sheet'}}, $aa->{'sheet'};
+                push @{$type_scores{'coil'}}, $aa->{'coil'};
+            }
+			
+			## bless if necessary ##
+			if (!$self->seq->isa("Bio::Seq::Meta::Array")){
+            	bless ($self->seq, "Bio::Seq::Meta::Array");
+				}
+            $self->seq->isa("Bio::Seq::MetaI")
+                || $self->throw("$self is not a Bio::Seq::MetaI");
+            $Bio::Seq::Meta::Array::DEFAULT_NAME = 'GOR4_struc';
+
+            ## now make meta_Sequence
+            for my $struc_type (keys %type_scores) {
+                my $meta_name = "GOR4". "_" . "$struc_type";
+                my @meta = map{$_->{$struc_type}} @{$self->{'_parsed'}};
+                if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
+                    $self->warn ("$meta_name already exists , not overwriting!");
+                    next;
+                }
+                $self->seq->named_meta($meta_name,\@meta );
+            }
+            # return  seq array object implementing meta sequence #
+            return $self->seq;
+
+        } 
+		else  {
+           return $self->{'_parsed'};
+         }
+    }  #endif ($value)
+
+    #return raw result if no return fomrt stated
+    return $self->{'_result'};
+}
+
+sub _get_2ary_coords {
+    #helper sub for result;
+    ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
+    #i.e., helical prediction for 1 residue isn't very meaningful...
+    ## and poulates array of hashes with start/end values.
+    ##keys of $Result are 'H' 'T' 'C' 'E'.
+    #could be put into a secondary base class if need be
+    my ($self) = @_;
+    my @prot = @{$self->{'_parsed'}};
+    my %Result;
+    for (my $index = 0; $index <= $#prot; $index++) {
+
+        my $type        = $prot[$index]{'struc'};
+        next unless $type =~ /[HTCE]/;
+        my $length = 1;
+        for (my $j = $index + 1; $j <= $#prot; $j++) {
+            my $test = $prot[$j];
+            if ($test->{'struc'} eq $type) {
+                $length++;
+            } elsif (  $length > MIN_STRUC_LEN  ||
+                       ($length <= MIN_STRUC_LEN && $type eq 'T') ) {
+                push @{$Result{$type}}, {start => $index + 1 ,  end => $j};
+                $index += $length -1;
+                last;
+            } else {
+                $index += $length - 1;
+                last;
+            }
+        }
+    }
+    $self->{'_parsed_coords'} = \%Result; #temp assignment
+}
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
+    $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} =$ANALYSIS_NAME;
+    return $self;
+}
+
+
+sub  _run {
+    my $self  = shift;
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+    my $request = POST $self->url,
+        Content_Type => 'form-data',
+            Content  => [title => "",
+                         notice => $self->seq->seq,
+                         ali_width => 70,
+                        ];
+
+    my $content = $self->request($request);
+    my $text = $content->content;
+    return unless $text;
+    my ($next) = $text =~ /Prediction.*?=(.*?)>/;
+    return unless $next;
+    my $out = 'http://npsa-pbil.ibcp.fr/'.$next;
+    my $req2 = HTTP::Request->new(GET=>$out);
+    my $resp2 = $self->request($req2);
+	$self->status('COMPLETED') if $resp2 ne '';
+    $self->{'_result'} = $resp2->content;
+}
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/HNN.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/HNN.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/HNN.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,369 @@
+# $Id: HNN.pm,v 1.0 2003/07/ 11
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::HNN
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::HNN - a wrapper around the HNN protein
+secondary structure prediction server
+
+=head1  SYNOPSIS
+
+  use	Bio::Tools::Analysis::Protein::HNN;
+  #get a Bio::Seq or Bio::PrimarySeq
+  use Bio::PrimarySeq;
+  my $seq = new Bio::PrimarySeq
+      (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
+       -primary_id=>'test'); # a Bio::PrimarySeqI object
+
+  my $hnn = Bio::Tools::Analysis::Protein::HNN->new (-seq=>$seq);
+  $hnn->run;
+  print $hnn->result;# #raw text to standard error
+
+=head1  DESCRIPTION	
+
+A module to remotely retrieve predictions of protein secondary
+structure.  Each residue in the protein receives a score representing
+the likelihood of existing in each of three different states (helix,
+coil or sheet), e.g.:
+
+  my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::HNN->new
+     (-seq => $seq);
+
+creates a new object
+
+  $analysis_object->run;
+
+submits the query to the server and obtains raw text output.
+
+Given an amino acid sequence the results can be obtained in 4 formats,
+determined by the argument to the result method:
+
+=over 4
+
+=item 1
+
+The raw text of the program output.
+
+  my $rawdata = $analysis_object->result;				
+
+=item 2
+
+A reference to an array of hashes of scores for each state and the
+assigned state.
+
+  my $data_ref = $analysis_object->result('parsed');					
+  print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";	
+  print "predicted struc  at residue 2 is $data_ref->[1]{'struc}\n";
+
+=item 3
+
+An array of Bio::SeqFeature::Generic objects where each feature is a
+predicted unit of secondary structure. Only stretches of helix/sheet
+predictions for longer than 4 residues are defined as helices.
+
+  my @fts = $analysis_object->result(Bio::SeqFeatureI);
+  for my $ft (@fts) {		
+      print " From ",  $ft->start, " to  ",$ft->end, " struc: " ,
+             ($ft->each_tag_value('type'))[0]  ,"\n";
+  }
+
+=item 4
+
+A Bio::Seq::Meta::Array implementing sequence.
+
+This is a Bio::Seq object that can also hold data about each residue
+in the sequence In this case, the sequence can be associated with a
+single array of HNN prediction scores.  e.g.,
+
+  my $meta_sequence = $analysis_object->result('meta');
+
+  print "helix scores from residues 10-20 are ",
+      $meta_sequence->named_submeta_text("HNN_helix",10,20), "\n";
+
+Meta sequence default names are : HNN_helix, HNN_sheet, HNN_coil,
+HNN_struc, representing the scores for each residue.
+
+Many methods common to all analyses are inherited from
+L<Bio::Tools::Analysis::SimpleAnalysisBase>.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::Tools::Analysis::SimpleAnalysisBase>, 
+L<Bio::Seq::Meta::Array>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk,
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+use strict;
+
+package Bio::Tools::Analysis::Protein::HNN;
+
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw (POST);
+use Bio::SeqFeature::Generic;
+use Bio::Seq::Meta::Array;
+
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+#extends array for 2struc.
+
+my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl';
+my $ANALYSIS_NAME= 'HNN';
+my $ANALYSIS_SPEC= {name => 'HNN', type => 'Protein'};
+my $INPUT_SPEC = [
+                  { mandatory => 'true',
+                    type      => 'Bio::PrimarySeqI',
+                    'name'    => 'seq',
+                  },
+                 ];
+my  $RESULT_SPEC =
+    {
+     ''                 => 'bulk',  # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     raw                => '[ {helix=>, sheet=>, struc=>, coil=>}]',
+     meta               => 'Bio::Seq::Meta::Array object',
+    };
+use constant MIN_STRUC_LEN => 3; 
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'}    = $INPUT_SPEC;
+    $self->{'_RESULT_SPEC'}   = $RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
+    return $self;
+}
+
+
+sub  _run {
+    my $self  = shift;
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+    my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl',
+        Content_Type => 'form-data',
+            Content  => [title => "",
+                         notice => $self->seq->seq,
+                         ali_width => 70,
+                        ];
+
+    my $text = $self->request($request)->content;
+    return unless $text;
+    my ($next) = $text =~ /Prediction.*?=(.*?)>/;
+    return unless $next;
+    my $out = "http://npsa-pbil.ibcp.fr/".$next;
+    my $req2 = HTTP::Request->new(GET=>$out);
+    my $resp2 = $self->request ($req2);
+	$self->status('COMPLETED') if $resp2 ne '';
+    $self->{'_result'} = $resp2->content;
+	return $self;
+}
+
+
+=head2 result
+
+ NAme    : result
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : see keys of $INPUT_SPEC
+
+The method returns a result of an executed job. If the job was
+terminated by an error the result may contain an error message instead
+of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags.
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defines the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.  Feature primary
+tag is "2ary".  Feature tags are "type" (which can be helix, sheet or
+coil) "method" (HNN).
+
+=item 'parsed'
+
+Array of hash references of scores/structure assignations { helix =E<gt>,
+sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
+
+=item 'all'
+
+A Bio::Seq::Meta::Array object. Scores can be accessed using methods
+from this class. Meta sequence names are HNN_helix, HNN_sheet,
+HNN_coil, HNN_struc.
+
+=back
+
+
+=cut
+
+
+sub result {
+    my ($self,$value) = @_;
+
+    my @scores;
+    my @fts;
+
+    if ($value ) {
+        #parse into basic raw form, store this as well as '_result'
+        if (!exists($self->{'_parsed'}) ) {
+            my $result = IO::String->new($self->{'_result'});
+            while (my $line = <$result>) {
+                next unless $line =~ /^[HEC]\s/; # or for sopma/hnn  /^[A-Z]\s/
+                $line =~/^([A-Z])\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
+                push @scores, { struc => $1,
+                                helix => $2,
+                                sheet => $3,
+                                coil  => $4,
+                              };
+            }
+            $self->{'_parsed'} = \@scores;
+        }
+        if ($value eq 'Bio::SeqFeatureI') {
+            $self->_get_2ary_coords();
+            for my $type (keys %{$self->{'_parsed_coords'}} ) {
+                next if $type =~  /\w{2,}/; #if not H,C,E or T
+                for my $loc (@{$self->{'_parsed_coords'}{$type}} ) {
+                    push @fts, Bio::SeqFeature::Generic->new
+                        (-start => $loc->{'start'},
+                         -end => $loc->{'end'},
+                         -source => 'HNN',
+                         -primary => 'Domain',
+                         -tag => {
+                                  type => $type,
+                                  method => $self->analysis_name,
+                                 });
+                }               #end of array of strucs of type
+            }                   # end of all 2nd struc elements
+            delete $self->{'_parsed_coords'}; #remove temp data
+            return @fts;
+        }                       #endif BioSeqFeature
+
+        elsif ($value eq 'meta') {
+            #1st of all make 3 or 4 arrays of scores for each type from column data
+            my %type_scores;
+            for my $aa (@{$self->{'_parsed'}}) {
+                push @{$type_scores{'struc'}}, $aa->{'struc'};
+                push @{$type_scores{'helix'}}, $aa->{'helix'};
+                push @{$type_scores{'sheet'}}, $aa->{'sheet'};
+                push @{$type_scores{'coil'}}, $aa->{'coil'};
+            }
+			
+			## bless as metasequence if necessary
+			if (!$self->seq->isa("Bio::Seq::MetaI")) {
+            	bless ($self->seq, "Bio::Seq::Meta::Array");
+			  }
+            $self->seq->isa("Bio::Seq::MetaI")
+                || $self->throw("$self is not a Bio::Seq::MetaI");
+
+			## now make meta sequence
+            $Bio::Seq::Meta::Array::DEFAULT_NAME = 'HNN_struc';
+            for my $struc_type (keys %type_scores) {
+                my $meta_name = "HNN". "_" . "$struc_type";
+                my @meta = map{$_->{$struc_type}} @{$self->{'_parsed'}};
+                if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
+                    $self->warn ("$meta_name already exists , not overwriting!");
+                    next;
+                }
+                $self->seq->named_meta($meta_name,\@meta );
+            }
+            # return  seq array object implementing meta sequence #
+            return $self->seq;
+
+        }
+		 ## else for aa true value get data structure back ##
+		 else  {
+            return $self->{'_parsed'};
+        	}	
+    }                           #endif ($value)
+
+    #return raw result if no return fomrt stated
+    return $self->{'_result'};
+}
+
+
+sub _get_2ary_coords {
+    #helper sub for result;
+    ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
+    #i.e., helical prediction for 1 residue isn't very meaningful...
+    ## and poulates array of hashes with start/end values.
+    #could be put into a secondary base class if need be
+    my ($self) = @_;
+    my @prot = @{$self->{'_parsed'}};
+    my %Result;
+    for (my $index = 0; $index <= $#prot; $index++) {
+        my $type = $prot[$index]{'struc'};
+        next unless $type =~ /[HTCE]/;
+        my $length = 1;
+        for (my $j = $index + 1; $j <= $#prot; $j++) {
+            my $test = $prot[$j];
+            if ($test->{'struc'} eq $type) {
+                $length++;
+            } elsif (  $length > MIN_STRUC_LEN  ||
+                       ($length <= MIN_STRUC_LEN && $type eq 'T') ) {
+                push @{$Result{$type}}, {start => $index + 1 ,  end => $j};
+                $index += $length -1;
+                last;
+            } else {
+                $index += $length - 1;
+                last;
+            }
+        }
+    }
+    $self->{'_parsed_coords'} = \%Result; #temp assignment
+}
+
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Mitoprot.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Mitoprot.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Mitoprot.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,317 @@
+# $Id: Mitoprot.pm,
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::Mitoprot
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::Mitoprot - a wrapper around Mitoprot
+server
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Analysis::Protein::Mitoprot;
+
+  use Bio::PrimarySeq;
+  my $seq = new Bio::PrimarySeq
+    (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
+     -primary_id=>'test'); # a Bio::PrimarySeqI object
+
+  my $mitoprot = Bio::Tools::Analysis::Protein::Mitoprot->new
+     ( -seq => $seq
+     ); # sequence must be  >!5aa long and start with an M.
+
+  # run Mitoprot prediction on a DNA sequence
+  my $mitoprot->run();
+
+
+  die "Could not get a result" unless $mitoprot->status =~ /^COMPLETED/;
+
+  print $mitoprot->result;     # print raw prediction to STDOUT
+
+  foreach my $feat ( $mitoprot->result('Bio::SeqFeatureI') ) {
+
+      # do something to SeqFeature
+      # e.g. print as GFF
+      print $feat->gff_string, "\n";
+      # or store within the sequence - if it is a Bio::RichSeqI
+      $seq->add_SeqFeature($feat);
+
+ }
+
+=head1 DESCRIPTION
+
+This class is a wrapper around the Mitoprot web server which
+calculates the probability of a sequence containing a mitochondrial
+targetting peptide. See http://mips.gsf.de/cgi-bin/proj/medgen/mitofilter
+for more details.
+
+The results can be obtained in 3 formats:
+
+=over 3
+
+=item 1
+
+The raw text of the program output
+
+  my $rawdata = $analysis_object->result;
+
+=item 2
+
+An reference to a hash of  scores :
+
+  my $data_ref = $analysis_object->result('parsed'); print "predicted
+  export prob is $data_ref->{'export_prob'}\n"; #
+
+key values of returned hash are input_length, basic_aas, acidic_aas,
+export_prob, charge, cleavage_site.
+
+=item 3
+
+A Bio::SeqFeature::Generic object
+
+  my $ft = $analysis_object->result(Bio::SeqFeatureI);
+  print "export prob is ", ($ft->each_tag_value('export_prob'))[0]  ,"\n";
+
+
+This the second implentation of Bio::SimpleAnalysisI which hopefully
+will make it easier to write wrappers on various services. This class
+uses a web resource and therefore inherits from Bio::WebAgent.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>,
+L<Bio::Tools::Analysis::SimpleAnalysisBase>,
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk,
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Analysis::Protein::Mitoprot;
+use vars qw($FLOAT);
+use strict;
+
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw(GET);
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+$FLOAT = '[+-]?\d*\.\d*';
+
+my $URL = 'http://ihg.gsf.de/cgi-bin/paolo/mitofilter?';
+
+
+my %STATUS =  map { $_ => 1 } qw(CREATED COMPLETED TERMINATED_BY_ERROR);
+
+my $MIN_LEN = 60;               #min len for protein analysis
+my $ANALYSIS_NAME = "Mitoprot";
+
+my $ANALYSIS_SPEC =
+    {
+     'name'        => 'Mitoprot',
+     'type'        => 'Protein',
+     'version'     => '1.0a4',
+     'supplier'    => 'Munich Information Center for ProteinSequences',
+     'description' => 'mitochondrial sig seq prediction',
+    };
+
+my $INPUT_SPEC =
+    [
+     {
+      'mandatory' => 'true',
+      'type'      => 'Bio::PrimarySeqI',
+      'name'      => 'seq',          #value must be name of method used to set value
+     },
+    ];
+
+my $RESULT_SPEC =
+    {
+     '' => 'raw text results',  # same as undef
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+     'all' => 'hash of results',
+    };
+
+
+
+### unique to this module ##
+
+=head2 result
+
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : various
+
+The method returns a result of an executed job. If the job was
+terminated by an error the result may contain an error message instead
+of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defines the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.  Feature primary
+tag is "SigSeq".  Feature tags are input_length , basic_aas,
+acidic_aas, export_prob, charge, cleavage_site, method.
+
+=item 'parsed'
+
+hash references of parsed results { input_length =E<gt>, basic_aas=E<gt>,
+acidic_aas=E<gt>, export_prob=E<gt>, charge=E<gt>, cleavage_site=E<gt>}.
+
+=back
+
+=cut
+
+
+sub result {
+    my ($self,$value) = @_;
+    #make sec feat of above threshold scores #
+
+    my @sig_pdctns;
+    my @fts;
+
+    if ($value ) {
+        my $result = IO::String->new($self->{'_result'});
+        my %results;
+        while (my $line = <$result>) {
+            #make array of all scores or threshold depending on $value
+            next unless $line =~ /\d/ || $line =~ /^Cle/;
+            if ($line =~ /^Net[^+\-\d]+  # Net, then anything except +,- or digit
+                          ((\+|-)?\d+)/x) #then get charge with optional + or -
+                              {
+                               $results{'charge'} = $1;
+                              } elsif ($line =~ /^Input[^\d]+(\d+)/ ) {
+                                  $results{'input_length'} = $1;
+                              } elsif ($line =~ /basic[^\d]+(\d+)$/ ) {
+                                  $results{'basic_aas'} = $1;
+                              } elsif ($line =~ /acidic[^\d]+(\d+)$/) {
+                                  $results{'acidic_aas'} = $1;
+                              } elsif ($line =~ /^Cleavage[^\d]+(\d+)$/) {
+                                  $results{'cleavage_site'} = $1;
+                              } elsif ($line =~ /^Cleavage/) {
+                                  $results{'cleavage_site'} = 'not predictable';
+                              } elsif ($line =~ /^of export[^\d]+((0|1)\.\d+)$/) {
+                                  $results{'export_prob'} = $1;
+                              }
+        }
+
+        if ($value eq 'Bio::SeqFeatureI') {
+            push @fts, Bio::SeqFeature::Generic->new
+                (
+                 -start => 1,
+                 -end => ($results{'cleavage_site'} =~
+                          /^\d+$/)?$results{'cleavage_site'}:$self->seq->length,
+                 -source => 'Mitoprot',
+                 -primary => 'Region',
+                 -tag =>{
+                         export_prob   => $results{'export_prob'},
+                         charge        => $results{'charge'},
+                         basic_aas     => $results{'basic_aas'},
+                         acid_aas      => $results{'acidic_aas'},
+                         region_name   => 'Transit_peptide',
+                         method        => 'MitoProt',
+                         cleavage_site => $results{'cleavage_site'},
+                        },
+                );
+            return @fts;        #return Bioseqfeature array
+        }
+        ## convert parsed data into a meta array format
+        else  {
+            return \%results;   # hash based results ref
+        }
+    }
+    return $self->{'_result'};
+}
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
+    $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} =$ANALYSIS_SPEC->{'name'};
+    return $self;
+}
+
+sub _process_arguments {
+    #extra checking for sequence length
+    #mitoprot specific argument testing
+    my ($self, $args) = @_;
+    #use base checking for existence of mandatory fields
+    $self->SUPER::_process_arguments($args) ;
+
+    #then check specifics
+    $self->throw ("1st_aa must be M") if $self->seq->subseq(1,1) !~ /M/i;
+    $self->throw ("sequence must be at least 15aa long") if $self->seq->length< 15;
+    return;
+}
+
+
+
+sub _run {
+    #request submitted by get not by post
+    my $self  = shift;
+    $self->delay(1);
+    $self->sleep;
+
+    $self->status('TERMINATED_BY_ERROR');
+    my $url = $self->url . "seq=".lc($self->seq->seq). "&seqnam=";
+    my $request = GET $url;
+    my $content = $self->request($request);
+    my $text = $content->content; #1st reponse
+
+    #remove html stuff
+    $text =~ s/.*<PRE>(.*)<\/PRE>.*/$1/s;
+    $text =~ s/<[^>]+>//sg;
+
+    $self->status('COMPLETED') if $text ne '' && $self->seq->length > $MIN_LEN;
+    $self->{'_result'} = $text;
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/NetPhos.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/NetPhos.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/NetPhos.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,299 @@
+# $Id: NetPhos.pm,v 1.13.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::NetPhos
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::NetPhos - a wrapper around NetPhos server
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Analysis::Protein::NetPhos;
+
+  my $seq; # a Bio::PrimarySeqI object
+  my $threshold  = "0.90";
+
+  my $netphos = Bio::Tools::Analysis::Protein::NetPhos->new
+     ( -seq => $seq,
+       -threshold => $threshold );
+
+  # run NetPhos prediction on a sequence
+  my $netphos->run();
+
+  # alternatively you can say
+  $netphos->seq($seq)->threshold($threshold)->run;
+
+  die "Could not get a result" unless $netphos->status =~ /^COMPLETED/;
+
+  print $netphos->result;     # print raw prediction to STDOUT
+
+  foreach my $feat ( $netphos->result('Bio::SeqFeatureI') ) {
+
+      # do something to SeqFeature
+      # e.g. print as GFF
+      print $feat->gff_string, "\n";
+      # or store within the sequence - if it is a Bio::RichSeqI
+      $seq->add_SeqFeature($feat)
+
+ }
+
+=head1 DESCRIPTION
+
+This class is wrapper around the NetPhos 2.0 server which produces
+neural network predictions for serine, threonine and tyrosine
+phosphorylation sites in eukaryotic proteins.
+
+See L<http://www.cbs.dtu.dk/services/NetPhos/>.
+
+This the first implentation of Bio::SimpleAnalysisI which hopefully
+will make it easier to write wrappers on various services. This class
+uses a web resource and therefore inherits from Bio::WebAgent.
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Analysis::Protein::NetPhos;
+use vars qw($FLOAT);
+use strict;
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw (POST);
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+    $FLOAT = '[+-]?\d*\.\d*';
+    my $URL = 'http://www.cbs.dtu.dk/cgi-bin/nph-webface';
+
+
+    my $ANALYSIS_SPEC =
+        {
+         'name'        => 'NetPhos',
+         'type'        => 'Protein',
+         'version'     => '2.0',
+         'supplier'    => 'Center for Biological Sequence Analysis,
+                           Technical University of Denmark',
+         'description' => 'Prediction of serine, threonine and tyrosine
+                             phosphorylation sites in eukaryotic proteins',
+        };
+
+    my $INPUT_SPEC =
+        [
+         {
+          'mandatory' => 'true',
+          'type'      => 'Bio::PrimarySeqI',
+          'name'      => 'seq',
+         },
+         {
+          'mandatory' => 'false',
+          'type'      => 'float',
+          'name'      => 'threshold',
+          'default'   => 0.8,
+         }
+        ];
+
+    my $RESULT_SPEC =
+        {
+         ''                 => 'bulk',  # same as undef
+         'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeeature::Generic',
+         'raw'              => 'Array of [ position, score, residue ]'
+        };
+
+
+=head2 result
+
+ Name    : result
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : none (but an implementation may choose
+           to add arguments for instructions how to process
+           the raw result)
+
+The method returns a scalar representing a result of an executed
+job. If the job was terminated by an error the result may contain an
+error message instead of the real data (or both, depending on the
+implementation).
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defined the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.
+
+=item anything else
+
+Array of array references of [ position, score, residue].
+
+
+=back
+
+
+=cut
+
+sub result {
+    my ($self,$value) = @_;
+
+    my @predictions;
+    my @fts;
+
+    if ($value ) {
+
+        my $result = IO::String->new($self->{'_result'});
+        while (<$result>) {
+            next if /^____/;
+            /^\S+ +(\d+) +\w+ +(0\.\d+) +.([STY])/;
+            next unless $3 and $2 > $self->threshold;
+            push @predictions, [$1, $2, $3];
+        }
+        if ($value eq 'Bio::SeqFeatureI') {
+            foreach  (@predictions) {
+                push @fts, Bio::SeqFeature::Generic->new
+                    (-start   => $_->[0],
+                     -end     => $_->[0] ,
+                     -source  => 'NetPhos',
+                     -primary => 'Site',
+                     -tag     => {
+                               score   => $_->[1],
+                               residue => $_->[2] });
+            }
+            return @fts;
+        }
+        return \@predictions;
+    }
+
+    return $self->{'_result'};
+}
+
+=head2  threshold
+
+ Usage   : $job->threshold(...)
+ Returns  : The significance threshold of a prediction
+ Args     : None (retrieves value) or a value beween 0 and 1.
+ Purpose  : Get/setter of the threshold to be sumitted for analysis.
+
+=cut
+
+sub threshold {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ( $value !~ /$FLOAT/ or $value < 0 or $value > 1 ) {
+           $self->throw("I need a value between 0 and 1 , not  [". $value. "]")
+       }
+       $self->{'_threshold'} = $value;
+       return $self;
+   }
+   return $self->{'_threshold'} || $self->input_spec->[1]{'default'} ;
+}
+
+sub _init 
+		{
+	my $self = shift;
+	$self->url($URL);
+	$self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
+	$self->{'_INPUT_SPEC'} =$INPUT_SPEC;
+	$self->{'_RESULT_SPEC'} =$RESULT_SPEC;
+	$self->{'_ANALYSIS_NAME'} =$ANALYSIS_SPEC->{name};
+	return $self;
+}
+
+sub _run {
+    my $self = shift;
+
+    # format the sequence into fasta
+    my $seq_fasta;
+    my $stringfh = new IO::String($seq_fasta);
+    my $seqout = new Bio::SeqIO(-fh => $stringfh,
+                                -format => 'fasta');
+    $seqout->write_seq($self->seq);
+    $self->debug($seq_fasta);
+
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+
+    $self->status('TERMINATED_BY_ERROR');
+
+    my $request = POST $self->url,
+            Content_Type => 'form-data',
+            Content      => [configfile => '/usr/opt/www/pub/CBS/services/NetPhos-2.0/NetPhos.cf',
+                             SEQPASTE   => $seq_fasta];
+    my $content = $self->request($request);
+    my $text    = $content->content;
+
+    my ($result_url) = $text =~ /follow <a href="(.*?)"/;
+    return 0 unless $result_url;
+    $self->debug("url is $result_url\n\n");
+
+    my $ua2      = $self->clone;
+    my $content2 = $ua2->request(POST $result_url);
+
+    my $ua3      = $self->clone;
+    $result_url  =~ s/&.*//;
+    $self->debug("final result url is $result_url\n");
+    my $content3 = $ua3->request(POST $result_url);
+    #print Dumper $content3;
+    my $response = $content3->content;
+
+
+    $response =~ s/.*<pre>(.*)<\/pre>.*/$1/s;
+    $response =~ s/<.*?>//gs;
+
+    $self->{'_result'} = $response;
+
+    $self->status('COMPLETED') if $response ne '';
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Scansite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Scansite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Scansite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,439 @@
+# $Id: Scansite.pm,v 1.16.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::Scansite
+#
+# Cared for by Richard Adams <richard.adams at ed.ac.uk>
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::Scansite - a wrapper around the Scansite server
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Analysis::Protein::Scansite;
+
+  my $seq; # a Bio::PrimarySeqI object
+
+  my $tool = Bio::Tools::Analysis::Protein::Scansite->new
+     ( -seq => $seq->primary_seq ); 
+
+  # run Scansite prediction on a sequence
+  $tool->run();
+
+  # alternatively you can say
+  $tool->seq($seq->primary_seq)->run;
+
+  die "Could not get a result" unless $tool->status =~ /^COMPLETED/;
+
+  print $tool->result;     # print raw prediction to STDOUT
+
+  foreach my $feat ( $tool->result('Bio::SeqFeatureI') ) {
+
+      # do something to SeqFeature
+      # e.g. print as GFF
+      print $feat->gff_string, "\n";
+      # or store within the sequence - if it is a Bio::RichSeqI
+      $seq->add_SeqFeature($feat);
+
+ }
+
+=head1 DESCRIPTION
+
+This class is a wrapper around the Scansite 2.0 server which produces
+predictions for serine, threonine and tyrosine phosphorylation sites
+in eukaryotic proteins. At present this is a basic wrapper for the
+"Scan protein by input sequence" functionality, which takes a sequence
+and searches for motifs, with the option to select the search
+stringency. At present, searches for specific phosphorylation
+sites are not supported; all predicted sites are returned.
+
+=head2 Return formats
+
+The Scansite results can be obtained in several formats:
+
+=over 3
+
+=item 1.
+
+By calling
+
+  my $res = $tool->result('');
+
+$res holds a string of the predicted sites in tabular format.
+
+=item 2.
+
+By calling 
+
+  my $data_ref = $tool->result('value')
+
+$data_ref is a reference to an array of hashes. Each element in the
+array represents a predicted phosphorylation site. The hash keys are
+the names of the data fields,i.e.,
+
+    'motif'      => 'Casn_Kin1'       # name of kinase
+    'percentile' => 0.155             # see Scansite docs
+    'position'   => 9                 # position in protein
+    'protein'    => 'A1'              # protein id
+    'score'      => 0.3696            # see Scansite docs
+    'sequence'   => 'ASYFDTASYFSADAT' # sequence surrounding site
+    'site'       => 'S9'              # phosphorylated residue
+    'zscore'     => '-3.110'          # see Scansite docs
+
+=item 3.
+
+By calling
+
+  my @fts = $tool->Result('Bio::SeqFeatureI');
+
+which returns an array of L<Bio::SeqFeatureI> compliant objects with
+primary tag value 'Site' and tag names of 'motif', 'score',
+'sequence', 'zscore' as above.
+
+=back
+
+See L<http://scansite.mit.edu/>.
+
+This inherits Bio::SimpleAnalysisI which hopefully makes it easier to
+write wrappers on various services. This class uses a web resource and
+therefore inherits from L<Bio::WebAgent>.
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Analysis::Protein::Scansite;
+use vars qw($FLOAT @STRINGENCY);
+use strict;
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw(POST);
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+$FLOAT = '[+-]?\d*\.\d*';
+ at STRINGENCY = qw(High Medium Low);
+my $URL = 'http://scansite.mit.edu/cgi-bin/motifscan_seq';
+
+
+    my $ANALYSIS_SPEC =
+        {
+         'name'        => 'Scansite',
+         'type'        => 'Protein',
+         'version'     => '2.0',
+         'supplier'    => 'Massachusetts Institute of Technology',
+         'description' => 'Prediction of serine, threonine and tyrosine
+                             phosphorylation sites in eukaryotic proteins',
+        };
+
+    my $INPUT_SPEC =
+        [
+         {
+          'mandatory' => 'true',
+          'type'      => 'Bio::PrimarySeqI',
+          'name'      => 'seq',
+         },
+         {
+          'mandatory' => 'false',
+          'type'      => 'text',
+          'name'      => 'protein_id',
+          'default'   => 'unnamed',
+         },
+         {
+          'mandatory' => 'false',
+          'type'      => 'text',
+          'name'      => 'stringency',
+          'default'   => 'High',
+         },
+        ];
+
+    my $RESULT_SPEC =
+        {
+         ''                 => 'bulk',  # same as undef
+         'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+         'raw'              => 'Array of {motif=>, percentile=>, position=>,
+					  protein=>, score=>, site=>, zscore=>
+                                          sequence=>
+	     				 }',
+        };
+
+
+=head2 result
+
+ Name    : result
+ Usage   : $job->result (...)
+ Returns : a result created by running an analysis
+ Args    : none (but an implementation may choose
+           to add arguments for instructions how to process
+           the raw result)
+
+The method returns a scalar representing a result of an executed
+job. If the job was terminated by an error, the result may contain 
+an error message instead of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defined the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.
+
+=item 'parsed'
+
+Returns a reference to an array of hashes containing the data of one
+phosphorylation site prediction. Key values are:
+
+motif, percentile, position, protein, score, site, zscore,  sequence.
+
+
+=back
+
+
+=cut
+
+sub result {
+    my ($self,$value) = @_;
+	if( !exists($self->{'_result'}) || $self->status ne 'COMPLETED'){
+		$self->throw("Cannot get results, analysis not run!");
+		}	
+    my @fts;
+
+    if ($value ) {
+		if ($value eq 'Bio::SeqFeatureI') {
+			for my $hit (@{$self->{'_parsed'}}) {
+				push @fts, Bio::SeqFeature::Generic->new(
+					-start       => $hit->{'position'},
+					-end         => $hit->{'position'},
+				    -primary_tag => 'Site',
+					-source      => 'Scansite',
+					-tag => {
+						score     => $hit->{'score'},
+						zscore    => $hit->{'zscore'},
+						motif     => $hit->{'motif'},
+						site      => $hit->{'site'},
+						sequence  => $hit->{'sequence'},
+							},
+				);
+			}
+			return @fts;
+		}
+		elsif ($value eq 'meta') {
+			$self->throw("No meta sequences available in this analysis!");
+			}
+		## else get here
+		return $self->{'_parsed'};
+    }
+
+    return $self->{'_result'};
+}
+
+=head2  stringency
+
+ Usage    : $job->stringency(...)
+ Returns  : The significance stringency of a prediction
+ Args     : None (retrieves value) or 'High', 'Medium' or 'Low'.
+ Purpose  : Get/setter of the stringency to be sumitted for analysis.
+
+=cut
+
+sub stringency {
+   my ($self,$value) = @_;
+   if( $value) {
+       if (! grep{$_=~ /$value/i}@STRINGENCY ) {
+           $self->throw("I need a stringency of [".
+						join " ", @STRINGENCY    .
+						"], not [$value]");
+       }
+       $self->{'_stringency'} = $value;
+       return $self;
+   }
+   return $self->{'_stringency'} || $self->input_spec->[2]{'default'} ;
+}
+
+=head2  protein_id
+
+ Usage    : $job->protein_id(...)
+ Returns  : The sequence id of the protein or 'unnamed' if not set. 
+ Args     : None  
+ Purpose  : Getter of the seq_id. Returns the display_id of the sequence
+            object. 
+
+=cut
+
+sub protein_id {
+	my $self = shift;
+	return defined ($self->seq())? $self->seq->display_id()
+				     : $self->input_spec->[1]{'default'};
+}
+
+sub _init 
+		{
+	my $self = shift;
+	$self->url($URL);
+	$self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
+	$self->{'_INPUT_SPEC'}    = $INPUT_SPEC;
+	$self->{'_RESULT_SPEC'}   = $RESULT_SPEC;
+	$self->{'_ANALYSIS_NAME'} = $ANALYSIS_SPEC->{'name'};
+	return $self;
+}
+
+sub _run {
+    my $self = shift;
+
+    # format the sequence into fasta
+	$self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+
+    $self->status('TERMINATED_BY_ERROR');
+
+    my $request = POST $self->url,
+            Content      => [sequence     => $self->seq->seq(),
+							 protein_id   => $self->protein_id(),
+							 motif_option => 'all',
+							 motifs       => '',
+							 motif_groups => '',
+							 stringency   => $self->stringency(),
+						 	 domain_flag  => '',
+							 submit       => "Submit Request",
+							];
+	## raw html report, 
+    my $content = $self->request($request);
+    my $text    = $content->content;
+
+	##access result data from tag in html
+	my @parsed_Results = ();
+	my @unwantedParams = qw(db source class);
+	my @results        = split /sitestats\.phtml\?/, $text;
+	shift @results; 
+
+	##this module generates 'parsed' output directly from html,
+	## avoids having toparse twice. 
+
+	for my $hit (@results) {
+		## get results string
+		my ($res) = $hit =~ /^(.+?)"/;
+
+		#get key value pairs
+		my %params = $res =~/(\w+)=([^&]+)/g;
+
+		##remove unwanted data from hash
+		map{delete $params{$_}} @unwantedParams;
+		push @parsed_Results, \%params;
+	}  
+	
+	## now generate text output in table format
+	my $out_Str = '';
+	$out_Str   .=  $self->_make_header(\@parsed_Results);
+	$out_Str   .=  $self->_add_data(\@parsed_Results);
+		
+
+    $self->{'_result'} = $out_Str;
+	$self->{'_parsed'} = \@parsed_Results;
+	
+	## is successsful if there are results or if there are no results and
+	## this beacuse there are no matches, not because of parsing errors etc.
+    $self->status('COMPLETED') if $text ne ''       &&
+	(scalar @results > 0 ||	
+	(scalar @results == 0 && $text =~/No sites found/));
+    if ($text =~ /server\s+error/i) {
+    	$self->warn("There was an internal server error !- text below") ;
+		$self->warn($text);
+        return; 
+    }
+}
+
+sub _process_arguments {
+
+    # extra checking for sequence length
+    # mitoprot specific argument testing
+    my ($self, $args) = @_;
+    #use base checking for existence of mandatory fields
+    $self->SUPER::_process_arguments($args); 
+   
+   # specific requirements
+   $self->throw("Sequence must be > 15 amino acids long!") 
+           if $self->seq->length < 15;
+   $self->throw("Sequence must be protein")
+          unless $self->seq->alphabet() eq 'protein';
+}
+
+sub _make_header {
+	my ($self, $res) = @_;
+	my $header = '';
+	for my $k (sort keys %{$res->[0]} ){
+		next if $k eq 'sequence';
+		$header .= $k;
+		$header .= ' 'x(12 -length($k));
+	}
+	$header .= "sequence\n\n";
+	return $header;
+}
+
+sub _add_data {
+	my ($self, $res) = @_;
+	my $outstr = '';
+	for my $hit  (@$res) {
+		for my $k (sort keys %$hit ){
+			next if $k eq 'sequence';
+			$outstr .= $hit->{$k};
+			$outstr .= ' 'x(12 - length($hit->{$k}));
+			}
+		$outstr .= $hit->{'sequence'}. "\n" if $hit->{'sequence'};
+	}
+	return $outstr;
+
+
+}
+	
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Sopma.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Sopma.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/Protein/Sopma.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,482 @@
+# $Id: Sopma.pm,v 1.0 2003/07/ 11
+#
+# BioPerl module for Bio::Tools::Analysis::Protein::Sopma
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+
+=head1 NAME
+
+Bio::Tools::Analysis::Protein::Sopma - a wrapper around the
+Sopma protein secondary structure prediction server
+
+=head1  SYNOPSIS
+
+  use Bio::Tools::Analysis::Protein::Sopma;
+  #get a Bio::Seq or Bio::PrimarySeq
+  my $seq;
+
+  my $sopma = Bio::Tools::Analysis::Protein::Sopma->new
+      (-seq=>$seq, states=>4);
+  $sopma->run;
+  print $sopma->result;# #raw text to standard error
+
+=head1  DESCRIPTION
+
+A module to remotely retrieve predictions of protein secondary
+structure.  Each residue in the protein receives a score representing
+the likelihood of existing in each of four different states (helix,
+coil, turn or sheet), e.g.,
+
+  my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::Sopma->new
+      ( -seq          => $seq,
+        -states       => 4,
+        -window_width => 15,
+      );
+
+creates a new object.  Compulsory argument -seq.  Optional arguments
+-states, -window_width,-similarity_threshold. These arguments can also be
+set by direct methods , e.g.,
+
+  $analysis_object->states(4);
+  $analysis_object->run;
+
+submits the query to the server and obtains raw text output. Given an
+amino acid sequence the results can be obtained in 4 formats,
+determined by the argument to the result method:
+
+=over 4
+
+=item 1
+
+The raw text of the program output.
+
+  my $rawdata = $analysis_object->result;
+
+=item 2
+
+A reference to an array of hashes of scores for each state and the
+assigned state.
+
+  my $data_ref = $analysis_object->result('parsed');
+  print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
+  print "predicted struc  at residue 2 is $data_ref->[1]{'struc}\n";
+
+Hash keys are 'helix', 'struc', 'sheet', 'coil', 'turn'.
+
+=item 3
+
+An array of Bio::SeqFeature::Generic objects where each feature is a
+predicted unit of secondary structure. Only stretches of helix/sheet
+predictions for longer than 4 residues are defined as helices/sheets.
+
+  my @fts = $analysis_object->result(Bio::SeqFeatureI);
+  for my $ft (@fts) {
+      print " From ",  $ft->start, " to  ",$ft->end, " struc: " ,
+             ($ft->each_tag_value('type'))[0]  ,"\n";
+  }
+
+=item 4
+
+A Bio::Seq::Meta::Array implementing sequence.
+
+This is a Bio::Seq object that can also hold data about each residue
+in the sequence.  In this case, the sequence can be associated with a
+arrays of Sopma prediction scores.  e.g.,
+
+  my $meta_sequence = $analysis_object->result('meta');
+  print "scores from residues 10 -20 are ",
+      $meta_sequence->named_submeta_text("Sopma_helix",10,20), "\n";
+
+Meta sequence names are : Sopma_helix, Sopma_sheet, Sopma_turn,
+Sopma_coil, Sopma_struc, representing the scores for each residue.
+
+Many methods common to all analyses are inherited from
+Bio::Tools::Analysis::SimpleAnalysisBase.
+
+=back
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::Tools::Analysis::SimpleAnalysisBase>
+L<Bio::Seq::Meta::Array>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+
+=head1 APPENDIX
+
+=cut
+
+use strict;
+
+package Bio::Tools::Analysis::Protein::Sopma;
+
+use IO::String;
+use Bio::SeqIO;
+use HTTP::Request::Common qw (POST);
+use Bio::SeqFeature::Generic;
+use Bio::Seq::Meta::Array;
+
+
+use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
+
+#extends array for 2struc.
+my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl';
+my $ANALYSIS_NAME= 'Sopma';
+my $ANALYSIS_SPEC= {name => 'Sopma', type => 'Protein'};
+my $INPUT_SPEC = [
+                  {mandatory=>'true',
+                   type     => 'Bio::PrimarySeqI',
+                   'name'   => 'seq',
+                  },
+                  {mandatory =>'false',
+                   type      => 'integer',
+                   name      => 'similarity_threshold',
+                   default   => 8,
+                  },
+                  {mandatory  =>'false',
+                   type       => 'integer',
+                   name       => 'window_width',
+                   default    => 17,
+                  },
+                  {mandatory  =>'false',
+                   type       => 'integer',
+                   name       => 'states',
+                   default    => 4,
+                  },
+                 ];
+my  $RESULT_SPEC =
+    {
+     ''   => 'bulk',              # same as undef
+     raw  => '[{struc=>, helix=>, turn=>, coil=>, sheet=>}]',
+     meta => 'Bio::Seq::Meta::Array object',
+     'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
+    };
+use constant MIN_STRUC_LEN => 3; 
+
+
+=head2  similarity_threshold
+
+  Useage  : $job->similarity_threshold(...)
+  Returns : The  similarity threshold used in the analysis
+  Args    : None (retrieves value) or  an integer (default = 8) 
+            that sets the similarity threshold .
+
+This method gets/sets the  similarity threshold for the prediction.
+
+=cut
+
+sub similarity_threshold {
+    my ($self, $value) = @_;
+    if ($value) {
+        $self->throw ("similarity_threshold must be integer")
+            unless $value =~ /^\d+$/;
+        $self->{'_similarity_threshold'} = $value;
+    }
+    $self->{'_similarity_threshold'} ||= $self->input_spec->[1]{'default'};
+    return $self->{'_similarity_threshold'};
+}
+
+=head2  window_width
+
+  Usage    : $job->window_width(...)
+  Returns  : The window width used in the analysis
+  Args     : None (retrieves value) or  an integer (default = 17)
+             that sets the window width.
+
+This method gets/sets the window width for the prediction, .  If
+attempted to set longer than the sequence, warns of error.
+
+=cut
+
+sub window_width {
+    my ($self, $value) = @_;
+    if ($value) {
+        $self->throw ("window_width must be integer")
+            unless $value =~ /^\d+$/;
+        $self->{'_window_width'} = $value;
+    }
+    $self->{'_window_width'} ||= $self->input_spec->[2]{'default'};
+    $self->warn ("window width longer than sequence!")
+        unless $self->{'_window_width'} < $self->seq->length;
+    return $self->{'_window_width'};
+}
+
+=head2  states
+
+  Usage    : $job->states(...)
+  Returns  : The number of secondary structure prediction states
+  Args     : None (retrieves value) or either '3' or '4' to set
+             prior to running analysis.
+
+This method gets/sets the number of states for the prediction, either
+3 or 4 (includes turns).
+
+=cut
+
+sub states {
+    my ($self, $value) = @_;
+    if ($value) {
+        $self->throw ("number of states must be 3 or 4")
+            unless $value == 3 or $value ==4;
+        $self->{'_states'} = $value;
+    }
+    $self->{'_states'} ||= $self->input_spec->[3]{'default'};
+    return $self->{'_states'};
+}
+
+=head2 result
+
+  Usage   : $job->result (...)
+  Returns : a result created by running an analysis
+  Args    : various
+
+The method returns a result of an executed job. If the job was
+terminated by an error the result may contain an error message instead
+of the real data.
+
+This implementation returns differently processed data depending on
+argument:
+
+=over 3
+
+=item undef
+
+Returns the raw ASCII data stream but without HTML tags
+
+=item 'Bio::SeqFeatureI'
+
+The argument string defines the type of bioperl objects returned in an
+array.  The objects are L<Bio::SeqFeature::Generic>.  Feature primary
+tag is "2ary".  Feature tags are "type" (which can be helix, sheet
+coil, or turn if 4 state prediction requested) "method" (Sopma)
+
+=item 'parsed'
+
+Array of hash references of scores/structure assignations 
+{ helix =E<gt> , sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
+
+=item 'all'
+
+A Bio::Seq::Meta::Array object. Scores can be accessed using methods
+from this class. Meta sequence names are Sopma_helix, Sopma_sheet,
+Sopma_coil, Sopma_turn (if defined), and Sopma_struc.
+
+
+=back
+
+
+=cut
+
+sub result {
+    my ($self,$value, $run_id) = @_;
+
+    my @score;
+    my @fts;
+
+    if ($value ) {
+        if (!exists($self->{'_parsed'} )) {
+            my $result = IO::String->new($self->{'_result'});
+            while (my $line = <$result>) {
+                next unless $line =~ /^[HCET]\s/; # or for sopma/hnn  /^[A-Z]\s/
+                $line =~/^([A-Z])\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
+                push @score, { struc => $1,
+                               helix => $2,
+                               sheet => $3,
+                               coil => $5,
+                             };
+                #include turn if 4states are requested
+                $score[$#score]{'turn'} = $4 if $self->states == 4;
+                #can optimize by duplicating code here
+            }
+            $self->{'_parsed'} = \@score;
+        }
+        if ($value eq 'Bio::SeqFeatureI') {
+            $self->_get_2ary_coords();
+            for my $type (keys %{$self->{'_parsed_coords'}} ) {
+                next if $type =~  /\w{2,}/; #if not H,C,E or T
+
+				## these 2 are added to distinguish features on same
+               ## sequence run with different params
+				my $tag_hash = {
+								type   => $type,
+                                method => $self->analysis_name,
+								};
+				$self->_add_params_to_result($tag_hash);
+
+				## now make feature object
+                for my $loc (@{$self->{'_parsed_coords'}{$type}} ) {
+                    push  @fts,   Bio::SeqFeature::Generic->new
+                        (-start   => $loc->{'start'},
+                         -end     => $loc->{'end'},
+                         -source  => 'Sopma',
+                         -primary => 'Domain',
+                         -tag => $tag_hash,
+                                 );
+                }               #end of array of strucs of type
+            }                   # end of all 2nd struc elements
+            delete $self->{'_parsed_coords'}; #remove temp data
+            return @fts;
+        }                       #endif BioSeqFeature
+
+        elsif ($value eq 'meta') {
+            #1st of all make 3 or 4 arrays of scores for each type from column data
+            my %type_scores;
+            for my $aa (@{$self->{'_parsed'}}) {
+                for my $type (qw(struc helix sheet  coil)) {
+                    push @{$type_scores{$type}}, $aa->{$type};
+                }
+                push @{$type_scores{'turn'}}, $aa->{'turn'} if  exists $aa->{'turn'};
+            }
+			
+			## convert to meta sequence array ##
+			if (!$self->seq->isa("Bio::Seq::Meta::Array")) {
+           		 bless ($self->seq, "Bio::Seq::Meta::Array");
+				}
+            $self->seq->isa("Bio::Seq::MetaI")
+                || $self->throw("$self is not a Bio::Seq::MetaI");
+
+
+            $Bio::Seq::Meta::Array::DEFAULT_NAME = 'Sopma_struc';
+            for my $struc_type (keys %type_scores) {
+                my $meta_name = "Sopma". "_" . "$struc_type";
+				if ($run_id) {
+					$meta_name .= "|$run_id";
+				}
+                my @meta = map{$_->{$struc_type}} @{$self->{'_parsed'}};
+                if (grep{$_ eq $meta_name}$self->seq->meta_names >0) {
+                    $self->warn ("$meta_name already exists , not overwriting!");
+                    next;
+                }
+                $self->seq->named_meta($meta_name,\@meta );
+            }
+            # return  seq array object implementing meta sequence #
+            return $self->seq;
+
+        }
+		## else return parsed data if $value is defined
+		 else {
+            return $self->{'_parsed'};
+        }
+
+    }                           #endif ($value)
+    #return raw result if no return format stated
+    return $self->{'_result'};
+}
+
+sub _init {
+    my $self = shift;
+    $self->url($URL);
+    $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
+    $self->{'_INPUT_SPEC'}    = $INPUT_SPEC;
+    $self->{'_RESULT_SPEC'}   = $RESULT_SPEC;
+    $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
+    return $self;
+}
+
+sub _get_2ary_coords {
+    #helper sub for result;
+    ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
+    #i.e., helical prediction for 1 residue isn't very meaningful...
+    ## and poulates array of hashes with start/end values.
+    ##keys of $Result are 'H' 'T' 'C' 'E'. 
+    my ($self) = @_;
+    my @prot = @{$self->{'_parsed'}};
+    my %Result;
+    for (my $index = 0; $index <= $#prot; $index++) {
+
+        my $type        = $prot[$index]{'struc'};
+        next unless $type && $type =~ /[HTCE]/;
+        my $length = 1;
+        for (my $j = $index + 1; $j <= $#prot; $j++) {
+            my $test = $prot[$j];
+            if ($test->{'struc'} eq $type) {
+                $length++;
+            } elsif (  $length > MIN_STRUC_LEN  ||
+                       ($length <= MIN_STRUC_LEN && $type eq 'T') ) {
+                push @{$Result{$type}}, {start => $index + 1 ,  end => $j};
+                $index += $length -1;
+                last;
+            } else {
+                $index += $length - 1;
+                last;
+            }
+        }
+    }
+    $self->{'_parsed_coords'} = \%Result; #temp assignment
+}
+
+sub  _run {
+    my $self  = shift;
+    $self->delay(1);
+    # delay repeated calls by default by 3 sec, set delay() to change
+    $self->sleep;
+    $self->status('TERMINATED_BY_ERROR');
+    my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl',
+        Content_Type => 'form-data',
+            Content  => [title     => "",
+                         notice    => $self->seq->seq,
+                         ali_width => 70,
+                         states    => $self->states,
+                         threshold => $self->similarity_threshold ,
+                         width     => $self->window_width,
+                        ];
+
+    my $text = $self->request($request)->content;
+    return $self unless $text;
+
+    #### get text only version of results ## 
+    my ($next) = $text =~ /Prediction.*?=(.*?)>/;
+    my $out    = "http://npsa-pbil.ibcp.fr/". "$next";
+    my $req2   = HTTP::Request->new(GET=>$out);
+    my $resp2  = $self->request ($req2);
+    $self->{'_result'} = $resp2->content;
+    $self->status('COMPLETED') if $resp2 ne '';
+    return $self;
+}
+
+sub _add_params_to_result{
+	## called when making Seqfeature objects
+	my ($self, $tag_hash) = @_;
+	my $hash;
+	## adds input parameter values to SeqFeatureI results where multiple
+    ##  parameter values are possible. Only adds value if not default. 
+	map{$hash->{$_->{'name'}} = $_}@{$self->input_spec()};
+
+	for my $p (keys %$hash) {
+		if (!ref($self->$p) && $self->$p ne $hash->{$p}{'default'}) {
+			$tag_hash->{$p} = $self->$p;
+		}
+	}
+				 
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/SimpleAnalysisBase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/SimpleAnalysisBase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Analysis/SimpleAnalysisBase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,269 @@
+# $Id: SimpleAnalysisBase.pm,v 1.12.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Analysis::SimpleAnalysisBase
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Richard Adams
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Analysis::SimpleAnalysisBase - abstract superclass for
+SimpleAnalysis implementations
+
+=head1 SYNOPSIS
+
+# not to be run directly
+
+=head1 DESCRIPTION
+
+This class is a generic implementation of SimpleAnalysisI and should
+be used as a base class for specific implementations.
+
+Modules implementing SimpleAnalysisBase only need to provide specific 
+_init(), _run() and result() methods, plus any get/set methods for 
+parameters to the analysis program.
+
+=head1 SEE ALSO
+
+L<Bio::SimpleAnalysisI>, 
+L<Bio::WebAgent>
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+Richard Adams, Richard.Adams at ed.ac.uk, 
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Analysis::SimpleAnalysisBase;
+
+use strict;
+use Data::Dumper;
+
+my $FLOAT = '[+-]?\d*\.\d*';
+
+my %STATUS =  map { $_ => 1 } qw(CREATED COMPLETED TERMINATED_BY_ERROR);
+
+use base qw(Bio::WebAgent Bio::SimpleAnalysisI);
+
+=head2 new
+
+ Usage   : $job->new(...)
+ Returns : a new analysis object, 
+ Args    : none (but an implementation may choose
+           to add arguments representing parameters for the analysis
+           program. Each key value of must have a method implemented
+           for it in a subclass. A seq () method is provided here as
+           this will probably be needed by all sequence analysis programs
+
+=cut
+
+sub new {
+    my $class = shift;
+
+    my $self = $class->SUPER::new(); #WebAgent new
+    $self->_init; #this line has to be before the attributes are filled in
+    while ( @_ ) {
+        my $key = lc shift;
+        $key =~ s/^-//;
+        $self->$key(shift);
+    }
+    return $self;
+}
+
+=head2 seq
+
+ Usage   : $job->seq()
+ Returns : a Bio::PrimarySeqI implementing sequence object, or void
+ Args    : None, or a Bio::PrimarySeqI implementing object 
+
+=cut
+
+sub seq {
+    my ($self,$value) = @_;
+    if ( defined $value) {
+        $self->throw("I need a Bio::PrimarySeqI, not  [". $value. "]")
+            unless $value->isa('Bio::PrimarySeqI');
+		$self->throw(" I need a PrimarySeq object, not a BioSeq object ")
+			if $value->isa('Bio::SeqI');
+
+        my $mol_type = $self->analysis_spec->{'type'};
+        $self->throw("I need a [" . $mol_type . "]  seq, not a  [". $value->alphabet. "]")
+            unless $value->alphabet =~/$mol_type/i;
+        $self->{'_seq'} = $value;
+        return $self;
+    }
+    return $self->{'_seq'} ;
+}
+
+=head2  analysis_name
+
+    Usage     : $analysis->analysis_name();
+    Returns   : The analysis name
+    Arguments : none
+
+=cut
+
+sub analysis_name {
+    my $self = shift;
+    return $self->{'_ANALYSIS_NAME'};
+}
+
+=head2  analysis_spec
+
+    Usage    :  $analysis->analysis_spec();
+    Returns  :  a hash reference to  a hash of analysis parameters. See
+                Bio::SimpleAnalysisI for a list of recommended key values.
+    Arguments:  none
+
+=cut
+
+sub analysis_spec {
+    my $self = shift;
+    return $self->{'_ANALYSIS_SPEC'};
+}
+
+=head2 clear
+
+    Usage     : $analysis->clear();
+    Returns   : true value on success
+    Arguments : none
+    Purpose   : to remove raw results from a previous analysis so that
+                an analysis can be repeated with different parameters.
+
+=cut
+
+sub clear {
+	my $self= shift;
+	if (defined($self->{'_result'})) {
+		delete $self->{'_result'};
+		}
+	if (defined ($self->{'_parsed'})) {
+		delete $self->{'_parsed'};
+		}
+	return 1;
+}
+		 
+
+
+=head2  input_spec
+
+    Usage     : $analysis->input_spec();
+    Returns   : a  reference to  an array of  hashes of analysis parameters. See
+                Bio::SimpleAnalysisI for a list of recommended key values.
+    Arguments : none
+
+=cut
+
+sub input_spec {
+    my $self = shift;
+    return $self->{'_INPUT_SPEC'};
+}
+
+=head2  result_spec
+
+    Usage     : $analysis->result_spec();
+    Returns   : a  reference to  a   hashes of resultformats. See
+                Bio::SimpleAnalysisI for a list of recommended key values. 
+                The key values can be used as parameters to the result() 
+                method, the values provide descriptions.
+    Arguments : none
+
+=cut
+
+sub result_spec {
+    my $self = shift;
+    return $self->{'_RESULT_SPEC'};
+}
+
+sub run {
+    my ($self, $args) = @_;
+    $self->_process_arguments ($args) if $args;
+
+    # check input
+    $self->throw("Need a sequence object as an input") unless $self->seq;
+    $self->debug(Data::Dumper->Dump([$self],[$self]));
+
+    # internal run()
+    $self->_run;
+    return $self;
+}
+
+sub wait_for {
+    my ($self, $args) = @_;
+    $self->run($args);
+}
+
+sub status {
+    my ($self,$value) = @_;
+
+    if( defined $value) {
+        no strict 'refs';
+        my $class = ref($self);
+        $self->throw("Not a valid status value [$value]\n".
+                     "Valid values are ". join(", ", keys %STATUS ))
+            unless defined $STATUS{$value};
+        $self->{'_status'} = $value;
+        use strict;
+    }
+    return $self->{'_status'} || 'CREATED' ;
+}
+
+sub _process_arguments {
+    my ($self, $args) = @_;
+
+    my %spec;
+    map {$spec{ $_->{'name'} } = $_ } @{$self->input_spec};
+
+    $self->debug(Data::Dumper->Dump([\%spec, $args],[\%spec, $args]));
+    foreach my $key (keys %$args) {
+        my $value = $args->{$key};
+
+        $self->throw("Unknown argument [$key]")
+            unless $spec{$key};
+        $self->$key($value);
+    }
+
+    foreach my $key (keys %spec) {
+        $self->throw("Mandatory argument [$key] is not set")
+            if $spec{$key}{'mandatory'} eq 'true' and not defined $self->$key;
+    }
+}
+
+
+sub _run { shift->throw_not_implemented();}
+	
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AnalysisResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AnalysisResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/AnalysisResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,318 @@
+# $Id: AnalysisResult.pm,v 1.17.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Tools::AnalysisResult
+#
+# Cared for by Hilmar Lapp <hlapp-at-gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::AnalysisResult - Base class for analysis result objects and parsers
+
+=head1 SYNOPSIS
+
+    # obtain a AnalysisResult derived object somehow
+
+    print "Method ", $result->analysis_method(),
+          ", version ", $result->analysis_method_version(),
+          ", performed on ", $result->analysis_date(), "\n";
+
+    # annotate a sequence utilizing SeqAnalysisParserI methods
+    while($feat = $result->next_feature()) {
+	$seq->add_SeqFeature($feat);
+    }
+    $result->close();
+
+    # query object, e.g. a Bio::SeqI implementing object
+    $queryseq = $result->analysis_query();
+
+    # Subject of the analysis -- may be undefined. Refer to derived module
+    # to find out what is returned.
+    $subject = $result->analysis_subject();
+
+=head1 DESCRIPTION
+
+The AnalysisResult module is supposed to be the base class for modules
+encapsulating parsers and interpreters for the result of a analysis
+that was carried out with a query sequence.
+
+The notion of an analysis represented by this base class is that of a
+unary or binary operator, taking either one query or a query and a
+subject and producing a result. The query is e.g. a sequence, and a
+subject is either a sequence, too, or a database of sequences.
+
+This module also implements the Bio::SeqAnalysisParserI interface, and
+thus can be used wherever such an object fits.  See
+L<Bio::SeqAnalysisParserI>.  Developers will
+find a ready-to-use B<parse()> method, but need to implement
+B<next_feature()> in an inheriting class. Support for initialization
+with input file names and reading from streams is also ready to use.
+
+Note that this module does not provide support for B<running> an
+analysis.  Rather, it is positioned in the subsequent parsing step
+(concerned with turning raw results into BioPerl objects).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::AnalysisResult;
+use strict;
+
+use base qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::AnalysisResultI Bio::Root::IO);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+
+  my $make = $self->SUPER::_initialize(@args);
+
+  $self->_initialize_state(@args);
+  return $make; # success - we hope!
+}
+
+=head2 _initialize_state
+
+ Title   : _initialize_state
+ Usage   : n/a; usually called by _initialize()
+ Function: This method is for BioPerl B<developers> only, as indicated by the
+           leading underscore in its name.
+
+           Performs initialization or reset of the state of this object. The
+           difference to _initialize() is that it may be called at any time,
+           and repeatedly within the lifetime of this object. B<Note>, however,
+           that this is potentially dangerous in a multi-threading
+           environment. In general, calling this method twice is discouraged
+           for this reason.
+
+           This method is supposed to reset the state such that any 'history'
+           is lost. State information that does not change during object
+           lifetime is not considered as history, e.g. parent, name, etc shall
+           not be reset. An inheriting object should only be concerned with
+           state information it introduces itself, and for everything else
+           call SUPER::_initialize_state(@args).
+
+           An example is parsing an input file: a state reset implies
+           discarding any unread input, and the actual input itself, followed
+           by setting the new input.
+
+           The argument syntax is the same as for L<new()|new> and L<_initialize()|_initialize>,
+           i.e., named parameters following the -name=>$value convention.
+           The following parameters are dealt with by the implementation
+           provided here:
+              -INPUT, -FH, -FILE
+           (tags are case-insensitive).
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _initialize_state {
+    my ($self, at args) = @_;
+
+    $self->close();
+    $self->_initialize_io(@args);
+
+    $self->{'_analysis_sbjct'} = undef;
+    $self->{'_analysis_query'} = undef;
+    $self->{'_analysis_prog'} = undef;
+    $self->{'_analysis_progVersion'} = undef;
+    $self->{'_analysis_date'} = undef;
+
+    return 1;
+}
+
+#  =head2 parse
+#
+#   Title   : parse
+#   Usage   : $obj->parse(-input=>$inputobj, [ -params=>[@params] ],
+#  		       [ -method => $method ] )
+#   Function: Sets up parsing for feature retrieval from an analysis file,
+#             or object.
+#
+#             This method was originally required by SeqAnalysisParserI, but
+#             is now discouraged due to potential problems in a multi-
+#             threading environment (CORBA!). If called only once, it doesn't
+#             add any functionality to calling new() with the same
+#             parameters.
+#
+#             The implementation provided here calls automatically
+#             _initialize_state() and passes on -input=>$inputobj and
+#             @params as final arguments.
+#   Example :
+#   Returns : void
+#   Args    : B<input>  - object/file where analysis are coming from
+#  	   B<params> - parameter to use when parsing/running analysis
+#  	   B<method> - method of analysis
+#
+#  =cut
+
+sub parse {
+    my ($self, @args) = @_;
+
+    my ($input, $params, $method) =
+	$self->_rearrange([qw(INPUT
+			      PARAMS
+			      METHOD
+			      )],
+			  @args);
+
+    # initialize with new input
+    if($params) {
+	$self->_initialize_state('-input' => $input, @$params);
+    } else {
+	$self->_initialize_state('-input' => $input);
+    }
+    $self->analysis_method($method) if $method;
+}
+
+=head2 analysis_query
+
+ Usage     : $query_obj = $result->analysis_query();
+ Purpose   : Set/Get the name of the query used to generate the result, that
+             is, the entity on which the analysis was performed. Will mostly
+             be a sequence object (Bio::PrimarySeq compatible).
+ Argument  :
+ Returns   : The object set before. Mostly a Bio::PrimarySeq compatible object.
+
+=cut
+
+#--------
+sub analysis_query {
+    my ($self, $obj) = @_;
+    if($obj) {
+	$self->{'_analysis_query'} = $obj;
+    }
+    return $self->{'_analysis_query'};
+}
+#--------
+
+=head2 analysis_subject
+
+ Usage     : $result->analyis_subject();
+ Purpose   : Set/Get the subject of the analysis against which it was
+             performed. For similarity searches it will probably be a database,
+             and for sequence feature predictions (exons, promoters, etc) it
+             may be a collection of models or homologous sequences that were
+             used, or undefined.
+ Returns   : The object that was set before, or undef.
+ Argument  :
+
+=cut
+
+#---------------
+sub analysis_subject {
+#---------------
+    my ($self, $sbjct_obj) = @_;
+    if($sbjct_obj) {
+	$self->{'_analysis_sbjct'} = $sbjct_obj;
+    }
+    return $self->{'_analysis_sbjct'};
+}
+
+
+=head2 analysis_date
+
+ Usage     : $result->analysis_date();
+ Purpose   : Set/Get the date on which the analysis was performed.
+ Returns   : String
+ Argument  :
+ Comments  :
+
+=cut
+
+#----------
+sub analysis_date {
+    my ($self, $date) = @_;
+    if($date) {
+	$self->{'_analysis_date'} = $date;
+    }
+    return $self->{'_analysis_date'};
+}
+#----------
+
+=head2 analysis_method
+
+ Usage     : $result->analysis_method();
+ Purpose   : Set/Get the name of the sequence analysis method that was used
+             to produce this result (BLASTP, FASTA, etc.). May also be the
+             actual name of a program.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method {
+#-------------
+    my ($self, $method) = @_;
+    if($method) {
+	$self->{'_analysis_prog'} = $method;
+    }
+    return $self->{'_analysis_prog'};
+}
+
+=head2 analysis_method_version
+
+ Usage     : $result->analysis_method_version();
+ Purpose   : Set/Get the version string of the analysis program.
+           : (e.g., 1.4.9MP, 2.0a19MP-WashU).
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#---------------------
+sub analysis_method_version {
+#---------------------
+    my ($self, $version) = @_;
+    if($version) {
+	$self->{'_analysis_progVersion'} = $version;
+    }
+    return $self->{'_analysis_progVersion'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPbl2seq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPbl2seq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPbl2seq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,458 @@
+# $Id: BPbl2seq.pm,v 1.29.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# Bioperl module Bio::Tools::BPbl2seq
+#	based closely on the Bio::Tools::BPlite modules
+#	Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
+#	Lorenz Pollak (lorenz at ist.org, bioperl port)
+#
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 20, 2000
+# May 29, 2001
+#	Fixed bug which prevented reading of more than one HSP / hit.
+#	This fix required changing calling syntax as described below. (PS)
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence
+alignment using the BLAST algorithm.
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::BPbl2seq;
+  my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out');
+  $report->sbjctName;
+  $report->sbjctLength;
+  while(my $hsp = $report->next_feature) {
+         $hsp->score;
+         $hsp->bits;
+         $hsp->percent;
+         $hsp->P;
+         $hsp->match;
+         $hsp->positive;
+         $hsp->length;
+	 $hsp->querySeq;
+	 $hsp->sbjctSeq;
+	 $hsp->homologySeq;
+	 $hsp->query->start;
+	 $hsp->query->end;
+	 $hsp->sbjct->start;
+	 $hsp->sbjct->end;
+	 $hsp->sbjct->seq_id;
+	 $hsp->sbjct->overlaps($exon);
+ }
+
+=head1 DESCRIPTION
+
+B<NOTE:> This module's functionality has been implemented in
+L<Bio::SearchIO::blast> and therefore is not actively maintained.
+
+BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq
+is a program for comparing and aligning two sequences using BLAST.
+Although the report format is similar to that of a conventional BLAST,
+there are a few differences so that BPlite is unable to read bl2seq
+reports directly.
+
+From the user's perspective, one difference between bl2seq and other
+blast reports is that the bl2seq report does not print out the name of
+the first of the two aligned sequences.  (The second sequence name is
+given in the report as the name of the "hit").  Consequently, BPbl2seq
+has no way of identifying the name of the initial sequence unless it
+is passed to constructor as a second argument as in:
+
+	my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU");
+
+If the name of the first sequence (the "query") is not passed to
+BPbl2seq.pm in this manner, the name of the first sequence will be
+left as "unknown".  (Note that to preserve a common interface with the
+other BLAST programs the two sequences being compared are referred to
+in bl2seq as "query" and "subject" although this is perhaps a bit
+misleading when simply comparing 2 sequences as opposed to querying a
+database.)
+
+In addition, since there will only be (at most) one "subject" (hit) in
+a bl2seq report, one should use the method $report-E<gt>next_feature,
+rather than $report-E<gt>nextSbjct-E<gt>nextHSP to obtain the next
+high scoring pair.
+
+One should note that the previous (0.7) version of BPbl2seq used
+slightly different syntax. That version had a bug and consequently the
+old syntax has been eliminated.  Attempts to use the old syntax will
+return error messages explaining the (minor) recoding required to use
+the current syntax.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 ACKNOWLEDGEMENTS
+
+Based on work of:
+Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
+Lorenz Pollak (lorenz at ist.org, bioperl port)
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=cut
+
+#'
+package Bio::Tools::BPbl2seq;
+
+use strict;
+use Bio::Tools::BPlite;
+use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct
+use Symbol;
+
+use base qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
+
+#@ISA = qw(Bio::Tools::BPlite);
+
+=head2 new
+
+ Title   : new
+ Function: Create a new Bio::Tools::BPbl2seq object
+ Returns : Bio::Tools::BPbl2seq
+ Args    : -file     input file (alternative to -fh)
+           -fh       input stream (alternative to -file)
+           -queryname    name of query sequence
+           -report_type What type of BLAST was run (blastn,blastp,tblastn...)
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->warn("Use of Bio::Tools::BPbl2seq is deprecated".
+                   "Use Bio::SearchIO classes instead");
+    # initialize IO
+    $self->_initialize_io(@args);
+
+    my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME 
+						REPORT_TYPE)], @args);
+    $queryname = 'unknown' if( ! defined $queryname );
+    if( $rt && $rt =~ /BLAST/i ) {
+	$self->{'BLAST_TYPE'} = uc($rt);
+    } else { 
+	$self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects");
+    }
+    my $sbjct = $self->getSbjct();
+    $self->{'_current_sbjct'} = $sbjct;
+
+    $self->{'_query'}->{'NAME'} = $queryname;
+    return $self;
+}
+
+
+=head2 getSbjct
+
+ Title    :
+ Usage    : $sbjct = $obj->getSbjct();
+ Function : Method of obtaining single "subject" of a bl2seq report
+ Example  : my $sbjct = $obj->getSbjct ) {}
+ Returns  : Sbjct object or undef if finished
+ Args     :
+
+=cut
+
+sub getSbjct {
+  my ($self) = @_;
+#  $self->_fastForward or return;
+
+  #######################
+  # get bl2seq "sbjct" name and length #
+  #######################
+  my $length;
+  my $def;
+ READLOOP: while(defined ($_ = $self->_readline) ) {
+     if ($_ =~ /^>(.+)$/) {
+	$def = $1;
+	next READLOOP;
+     }
+    elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) {
+	$length = $1;	
+	next READLOOP;
+     }
+    elsif ($_ =~ /^\s{0,2}Score/) {
+	$self->_pushback($_); 	
+	last READLOOP;
+     }
+  }
+  return if ! defined $def;
+  $def =~ s/\s+/ /g;
+  $def =~ s/\s+$//g;
+  
+
+  ####################
+  # the Sbjct object #
+  ####################
+  my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+					    '-length'=>$length,
+					    '-parent'=>$self);
+  return $sbjct;
+}
+
+
+
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while( my $feat = $res->next_feature ) { # do something }
+ Function: calls next_feature function from BPlite.
+ Example :
+ Returns : A Bio::SeqFeatureI compliant object, in this case a
+           Bio::Tools::BPlite::HSP object, and FALSE if there are no more
+           HSPs.
+ Args    : None
+
+=cut
+
+sub next_feature{
+   my ($self) = @_;
+   my ($sbjct, $hsp);
+   $sbjct = $self->{'_current_sbjct'};
+   unless( defined $sbjct ) {
+       $self->debug(" No hit object found for bl2seq report \n ");
+       return;
+   }
+   $hsp = $sbjct->nextHSP;
+   return $hsp || undef;
+}
+
+=head2  queryName
+
+ Title    :
+ Usage    : $name = $report->queryName();
+ Function : get /set the name of the query
+ Example  :
+ Returns  : name of the query
+ Args     :
+
+=cut
+
+sub  queryName {
+    my ($self, $queryname) = @_;
+    if( $queryname ) {
+	$self->{'_query'}->{'NAME'} = $queryname;
+    }
+    $self->{'_query'}->{'NAME'};
+}
+
+=head2  sbjctName
+
+ Title    :
+ Usage    : $name = $report->sbjctName();
+ Function : returns the name of the Sbjct
+ Example  :
+ Returns  : name of the Sbjct
+ Args     :
+
+=cut
+
+sub  sbjctName {
+	my $self = shift;
+#	unless( defined  $self->{'_current_sbjct'} ) {
+#       		my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
+#       		return unless defined $sbjct;
+#   	}
+	$self->{'_current_sbjct'}->{'NAME'} || '';
+}
+
+=head2 sbjctLength
+
+ Title    :  sbjctLength
+ Usage    : $length = $report->sbjctLength();
+ Function : returns the length of the Sbjct
+ Example  :
+ Returns  : name of the Sbjct
+ Args     :
+
+=cut
+
+sub sbjctLength {
+	my $self = shift;
+#	unless( defined  $self->{'_current_sbjct'} ) {
+#       		my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
+#       		return unless defined $sbjct;
+#   	}
+	$self->{'_current_sbjct'}->{'LENGTH'};
+}
+
+=head2 P
+
+ Title    : P
+ Usage    :
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub P     {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ");
+}
+
+=head2 percent
+
+ Title    : percent
+ Usage    : $hsp->percent();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub percent  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ");
+}
+
+=head2 match
+
+ Title    : match
+ Usage    : $hsp->match();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub match  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ");
+}
+
+=head2 positive
+
+ Title    : positive
+ Usage    : $hsp->positive();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub positive  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 querySeq
+
+ Title    : querySeq
+ Usage    : $hsp->querySeq();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub querySeq  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 sbjctSeq
+
+ Title    : sbjctSeq
+ Usage    : $hsp->sbjctSeq();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub sbjctSeq  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 homologySeq
+
+ Title    : homologySeq
+ Usage    : $hsp->homologySeq();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub homologySeq  {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 qs
+
+ Title    : qs
+ Usage    : $hsp->qs();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub qs        {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 ss
+
+ Title    : ss
+ Usage    : $hsp->ss();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub ss     {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+=head2 hs
+
+ Title    : hs
+ Usage    : $hsp->hs();
+ Function : Syntax no longer supported, error message only
+
+=cut
+
+sub hs   {
+	my $self = shift;
+	$self->throw("Syntax used is no longer supported.\n  See BPbl2seq.pm documentation for current syntax.\n ") ;
+}
+
+sub _fastForward {
+    my ($self) = @_;
+    return 0 if $self->{'REPORT_DONE'}; # empty report
+    while(defined( $_ = $self->_readline() ) ) {
+	if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) {
+	    $self->_pushback($_);	
+	    return 1;
+	}
+    }
+    $self->warn("Possible error (1) while parsing BLAST report!");
+}
+
+sub DESTROY { 
+    my $self = shift; 
+    if( defined  $self->{'_current_sbjct'} ) { 
+	$self->{'_current_sbjct'}->{'PARENT'} = undef;
+	$self->{'_current_sbjct'} = undef;
+    }
+    $self->_io_cleanup(); 
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/HSP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/HSP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/HSP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,502 @@
+###############################################################################
+# Bio::Tools::BPlite::HSP
+###############################################################################
+# HSP = High Scoring Pair (to all non-experts as I am)
+#
+# The original BPlite.pm module has been written by Ian Korf !
+# see http://sapiens.wustl.edu/~ikorf
+#
+# You may distribute this module under the same terms as perl itself
+
+
+#
+# BioPerl module for Bio::Tools::BPlite::HSP
+#
+# Cared for by Peter Schattner <schattner at alum.mit.edu>
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::BPlite::HSP - Blast report High Scoring Pair (HSP)
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::BPlite;
+ my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
+ {
+    while(my $sbjct = $report->nextSbjct) {
+	while (my $hsp = $sbjct->nextHSP) {
+	    $hsp->score;
+	    $hsp->bits;
+	    $hsp->percent;
+	    $hsp->P;
+	    $hsp->match;
+	    $hsp->positive;
+	    $hsp->length;
+	    $hsp->querySeq;
+	    $hsp->sbjctSeq;
+	    $hsp->homologySeq;
+	    $hsp->query->start;
+	    $hsp->query->end;
+	    $hsp->hit->start;
+	    $hsp->hit->end;
+	    $hsp->hit->seq_id;
+	    $hsp->hit->overlaps($exon);
+	}
+    }
+
+    # the following line takes you to the next report in the stream/file
+    # it will return 0 if that report is empty,
+    # but that is valid for an empty blast report.
+    # Returns -1 for EOF.
+
+    last if $report->_parseHeader == -1;
+
+ redo
+ }
+
+=head1 DESCRIPTION
+
+This object handles the High Scoring Pair data for a Blast report.
+This is where the percent identity, query and hit sequence length,
+P value, etc are stored and where most of the necessary information is located when building logic around parsing a Blast report.
+
+See L<Bio::Tools::BPlite> for more detailed information on the entire
+BPlite Blast parsing system.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::BPlite::HSP;
+
+use strict;
+
+# to disable overloading comment this out:
+#use overload '""' => '_overload';
+
+# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair
+
+use Bio::SeqFeature::Similarity;
+
+use base qw(Bio::SeqFeature::SimilarityPair);
+
+sub new {
+    my ($class, @args) = @_;
+
+    # workaround to make sure frame is not set before strand is
+    # interpreted from query/hit info 
+    # this workaround removes the key from the hash
+    # so the superclass does not try and work with it
+    # we'll take care of setting it in this module later on
+
+    my %newargs = @args;
+    foreach ( keys %newargs ) {
+	if( /frame$/i ) {
+	    delete $newargs{$_};
+	} 
+    }
+    # done with workaround
+
+    my $self = $class->SUPER::new(%newargs);
+    
+    my ($score,$bits,$match,$hsplength,$positive,$gaps,$p,$exp,$qb,$qe,$sb,
+	$se,$qs,$ss,$hs,$qname,$sname,$qlength,$slength,$qframe,$sframe,
+	$blasttype) = 
+	    $self->_rearrange([qw(SCORE
+				  BITS
+				  MATCH
+				  HSPLENGTH
+				  POSITIVE
+				  GAPS				  
+				  P
+				  EXP
+				  QUERYBEGIN
+				  QUERYEND
+				  SBJCTBEGIN
+				  SBJCTEND
+				  QUERYSEQ
+				  SBJCTSEQ
+				  HOMOLOGYSEQ
+				  QUERYNAME
+				  SBJCTNAME
+				  QUERYLENGTH
+				  SBJCTLENGTH
+				  QUERYFRAME
+				  SBJCTFRAME
+				  BLASTTYPE
+				  )], at args);
+    
+    $blasttype = 'UNKNOWN' unless $blasttype;
+    $self->report_type($blasttype);
+    # Determine strand meanings
+    my ($queryfactor, $sbjctfactor) = (1,0); # default
+    if ($blasttype eq 'BLASTP' || $blasttype eq 'TBLASTN' ) {
+	$queryfactor = 0;
+    }
+    if ($blasttype eq 'TBLASTN' || $blasttype eq 'TBLASTX' || 
+	$blasttype eq 'BLASTN' )  {
+	$sbjctfactor = 1;
+    }
+    
+    # Set BLAST type
+    $self->{'BLAST_TYPE'} = $blasttype;
+	
+    # Store the aligned query as sequence feature
+    my $strand;
+    if ($qe > $qb) {		# normal query: start < end
+		if ($queryfactor) { $strand = 1; } else { $strand = undef; }
+		$self->query( Bio::SeqFeature::Similarity->new
+		      (-start=>$qb, -end=>$qe, -strand=>$strand, 
+		       -source=>"BLAST" ) ) }
+    else {			# reverse query (i dont know if this is possible, but feel free to correct)	
+		if ($queryfactor) { $strand = -1; } else { $strand = undef; }
+		$self->query( Bio::SeqFeature::Similarity->new
+		      (-start=>$qe, -end=>$qb, -strand=>$strand,
+		       -source=>"BLAST" ) ) }
+
+    # store the aligned hit as sequence feature
+    if ($se > $sb) {		# normal hit
+	if ($sbjctfactor) { $strand = 1; } else { $strand = undef; }
+	$self->hit( Bio::SeqFeature::Similarity->new
+			(-start=>$sb, -end=>$se, -strand=>$strand,
+			 -source=>"BLAST" ) ) }
+    else { # reverse hit: start bigger than end
+	if ($sbjctfactor) { $strand = -1; } else { $strand = undef; }
+	$self->hit( Bio::SeqFeature::Similarity->new
+			(-start=>$se, -end=>$sb, -strand=>$strand,
+			 -source=>"BLAST" ) ) }
+    
+    # name the sequences
+    $self->query->seq_id($qname); # query name
+    $self->hit->seq_id($sname);   # hit name
+
+    # set lengths
+    $self->query->seqlength($qlength); # query length
+    $self->hit->seqlength($slength);   # hit length
+
+    # set object vars
+    $self->score($score);
+    $self->bits($bits);
+
+    $self->significance($p);
+    $self->{'EXP'} = $exp;
+    
+    $self->query->frac_identical($match);
+    $self->hit->frac_identical($match);
+    $self->{'HSPLENGTH'} = $hsplength;
+    $self->{'PERCENT'} = int((1000 * $match)/$hsplength)/10;
+    $self->{'POSITIVE'} = $positive;
+    $self->{'GAPS'} = $gaps;
+    $self->{'QS'} = $qs;
+    $self->{'SS'} = $ss;
+    $self->{'HS'} = $hs;
+    
+    $self->frame($qframe, $sframe);
+    return $self;		# success - we hope!
+}
+
+# to disable overloading comment this out:
+sub _overload {
+	my $self = shift;
+	return $self->start."..".$self->end." ".$self->bits;
+}
+
+=head2 report_type
+
+ Title    : report_type
+ Usage    : $type = $sbjct->report_type()
+ Function : Returns the type of report from which this hit was obtained.
+            This usually pertains only to BLAST and friends reports, for which
+            the report type denotes what type of sequence was aligned against
+            what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, 
+            TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
+ Example  : 
+ Returns  : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
+ Args     : a string on set (you should know what you are doing)
+
+=cut
+
+sub report_type {
+    my ($self, $rpt) = @_;
+    if($rpt) {
+	$self->{'_report_type'} = $rpt;
+    }
+    return $self->{'_report_type'};
+}
+
+=head2 EXP
+
+ Title   : EXP
+ Usage   : my $exp = $hsp->EXP;
+ Function: returns the EXP value for the HSP
+ Returns : string value
+ Args    : none
+ Note    : Patch provided by Sami Ashour for BTK parsing
+
+
+=cut
+
+sub EXP{
+    return $_[0]->{'EXP'};
+}
+
+
+=head2 P
+
+ Title    : P
+ Usage    : $hsp->P();
+ Function : returns the P (significance) value for a HSP 
+ Returns  : (double) significance value
+ Args     :
+
+=cut
+
+sub P {
+	my ($self, @args) = @_;
+	my $float = $self->significance(@args);
+	my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1
+	if ($float =~ /^$match$/) {
+	    # Is a C float
+	    return $float;
+	} elsif ("1$float" =~ /^$match$/) {
+	    # Almost C float, Jitterbug 974
+	    return "1$float";
+	} else {
+		$self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead.");
+		return 0;
+	}
+}
+
+=head2 percent
+
+ Title    : percent
+ Usage    : $hsp->percent();
+ Function : returns the percent matching 
+ Returns  : (double) percent matching
+ Args     : none
+
+=cut
+
+sub percent         {shift->{'PERCENT'}}
+
+
+=head2 match
+
+ Title    : match
+ Usage    : $hsp->match();
+ Function : returns the match
+ Example  : 
+ Returns  : (double) frac_identical 
+ Args     :
+
+=cut
+
+sub match           {shift->query->frac_identical(@_)}
+
+=head2 hsplength
+
+ Title    : hsplength
+ Usage    : $hsp->hsplength();
+ Function : returns the HSP length (including gaps)
+ Returns  : (integer) HSP length
+ Args     : none
+
+=cut
+
+sub hsplength              {shift->{'HSPLENGTH'}}
+
+=head2 positive
+
+ Title    : positive
+ Usage    : $hsp->positive();
+ Function : returns the number of positive matches (symbols in the alignment
+            with a positive score)
+ Returns  : (int) number of positive matches in the alignment
+ Args     : none
+
+=cut
+
+sub positive        {shift->{'POSITIVE'}}
+
+=head2 gaps
+
+ Title    : gaps
+ Usage    : $hsp->gaps();
+ Function : returns the number of gaps or 0 if none
+ Returns  : (int) number of gaps or 0 if none
+ Args     : none
+
+=cut
+
+sub gaps        {shift->{'GAPS'}}
+
+=head2 querySeq
+
+ Title    : querySeq
+ Usage    : $hsp->querySeq();
+ Function : returns the query sequence
+ Returns  : (string) the Query Sequence 
+ Args     : none
+
+=cut
+
+sub querySeq        {shift->{'QS'}}
+
+=head2 sbjctSeq
+
+ Title    : sbjctSeq
+ Usage    : $hsp->sbjctSeq();
+ Function : returns the Sbjct sequence 
+ Returns  : (string) the Sbjct Sequence 
+ Args     : none
+
+=cut
+
+sub sbjctSeq        {shift->{'SS'}}
+
+=head2 homologySeq
+
+ Title    : homologySeq
+ Usage    : $hsp->homologySeq();
+ Function : returns the homologous sequence 
+ Returns  : (string) homologous sequence 
+ Args     : none
+
+=cut
+
+sub homologySeq     {shift->{'HS'}}
+
+=head2 qs
+
+ Title    : qs
+ Usage    : $hsp->qs();
+ Function : returns the Query Sequence (same as querySeq)
+ Returns  : (string) query Sequence 
+ Args     : none
+
+=cut
+
+sub qs              {shift->{'QS'}}
+
+=head2 ss
+
+ Title    : ss
+ Usage    : $hsp->ss();
+ Function : returns the subject sequence ( same as sbjctSeq) 
+ Returns  : (string) Sbjct Sequence
+ Args     : none
+
+=cut
+
+sub ss              {shift->{'SS'}}
+
+=head2 hs
+
+ Title    : hs
+ Usage    : $hsp->hs();
+ Function : returns the Homologous Sequence (same as homologySeq ) 
+ Returns  : (string) Homologous Sequence
+ Args     : none
+
+=cut
+
+sub hs              {shift->{'HS'}}
+
+sub frame {
+    my ($self, $qframe, $sframe) = @_;
+    if( defined $qframe ) {
+	if( $qframe == 0 ) {
+	    $qframe = undef;
+	} elsif( $qframe !~ /^([+-])?([1-3])/ ) {	    
+	    $self->warn("Specifying an invalid query frame ($qframe)");
+	    $qframe = undef;
+	} else { 
+	    if( ($1 eq '-' && $self->query->strand >= 0) || 
+		($1 eq '+' && $self->query->strand <= 0) ) {
+		$self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
+	    }
+	    # Set frame to GFF [0-2]
+	    $qframe = $2 - 1;
+	}
+	$self->{'QFRAME'} = $qframe;
+    }
+    if( defined $sframe ) {
+	  if( $sframe == 0 ) {
+	    $sframe = undef;
+	  } elsif( $sframe !~ /^([+-])?([1-3])/ ) {	    
+	    $self->warn("Specifying an invalid hit frame ($sframe)");
+	    $sframe = undef;
+	  } else { 
+	      if( ($1 eq '-' && $self->hit->strand >= 0) || 
+		  ($1 eq '+' && $self->hit->strand <= 0) ) 
+	      {
+		  $self->warn("Hit frame ($sframe) did not match strand of hit (". $self->hit->strand() . ")");
+	      }
+	      
+	      # Set frame to GFF [0-2]
+	      $sframe = $2 - 1;
+	  }
+	  $self->{'SFRAME'} = $sframe;
+      }
+
+    (defined $qframe && $self->SUPER::frame($qframe) && 
+     ($self->{'FRAME'} = $qframe)) || 
+    (defined $sframe && $self->SUPER::frame($sframe) && 
+     ($self->{'FRAME'} = $sframe));
+
+    if (wantarray() && 
+	$self->{'BLAST_TYPE'} eq 'TBLASTX') 
+    { 
+	return ($self->{'QFRAME'}, $self->{'SFRAME'}); 
+    } elsif (wantarray())  { 
+	(defined $self->{'QFRAME'} && 
+	 return ($self->{'QFRAME'}, undef)) || 
+	     (defined $self->{'SFRAME'} && 
+	      return (undef, $self->{'SFRAME'})); 
+    } else { 
+	(defined $self->{'QFRAME'} && 
+	 return $self->{'QFRAME'}) || 
+	(defined $self->{'SFRAME'} && 
+	 return $self->{'SFRAME'}); 
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Iteration.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Iteration.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Iteration.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,393 @@
+# $Id: Iteration.pm,v 1.18.4.1 2006/10/02 23:10:33 sendu Exp $
+# Bioperl module Bio::Tools::BPlite::Iteration
+#	based closely on the Bio::Tools::BPlite modules
+#	Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+#	Lorenz Pollak (lorenz at ist.org, bioperl port)
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 20, 2000
+# POD documentation - main docs before the code
+#
+# Added to get a simple_align object for a psiblast run with the -m 6 flag /AE
+# 
+
+=head1 NAME
+
+Bio::Tools::BPlite::Iteration - object for parsing single iteration
+of a PSIBLAST report
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::BPpsilite;
+
+   open my $FH, "t/psiblastreport.out";
+   $report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
+
+   # determine number of iterations executed by psiblast
+   $total_iterations = $report->number_of_iterations;
+   $last_iteration = $report->round($total_iterations);
+
+   # Process only hits found in last iteration ...
+   $oldhitarray_ref = $last_iteration->oldhits;
+   HIT: while($sbjct = $last_iteration->nextSbjct) {
+       $id = $sbjct->name;
+       $is_old =  grep  /\Q$id\E/, @$oldhitarray_ref;
+       if ($is_old ){next HIT;}
+   #  do something with new hit...
+   }
+
+=head2 ALIGNMENTS
+
+  # This assumed that you have $db pointing to a database, $out to an output file
+  # $slxdir to a directory and $psiout    
+  # note the alignments can only be obtained if the flag "-m 6" is run.
+  # It might also be necessary to use the flag -v to get all alignments
+  # 
+    my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
+		     'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout ); 
+    my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
+    my $report = $factory->blastpgp($seq);
+    my $total_iterations = $report->number_of_iterations();
+    my $last_iteration = $report->round($total_iterations);
+    my $align=$last_iteration->Align;
+    my $slxfile=$slxdir.$id.".slx";
+    my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
+    $slx->write_aln($align);
+
+=head1 DESCRIPTION
+
+See the documentation for BPpsilite.pm for a description of the
+Iteration.pm module.
+
+=head1 AUTHORS - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at cgt.mc.duke.edu
+
+=head1 ACKNOWLEDGEMENTS
+
+Based on work of:
+Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+Lorenz Pollak (lorenz at ist.org, bioperl port)
+
+=head1 COPYRIGHT
+
+BPlite.pm is copyright (C) 1999 by Ian Korf. 
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+package Bio::Tools::BPlite::Iteration;
+
+use strict;
+use Bio::Tools::BPlite; #
+use Bio::Tools::BPlite::Sbjct;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    ($self->{'PARENT'},$self->{'ROUND'}) =
+	$self->_rearrange([qw(PARENT
+			      ROUND
+			      )], at args);
+    
+    $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
+    $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
+
+    if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
+    else                    {$self->{'REPORT_DONE'} = 1} # empty report
+  
+    return $self; # success - we hope!
+}
+
+=head2 query
+
+ Title    : query
+ Usage    : $query = $obj->query();
+ Function : returns the query object
+ Example  :
+ Returns  : query object
+ Args     :
+
+=cut
+
+sub query    {shift->{'QUERY'}}
+
+=head2 qlength
+
+ Title    : qlength
+ Usage    : $len = $obj->qlength();
+ Returns  : length of query
+ Args     : none
+
+=cut
+
+sub qlength  {shift->{'LENGTH'}}
+
+=head2 newhits
+
+ Title    :  newhits
+ Usage    : $newhits = $obj->newhits();
+ Returns  : reference to an array listing all the hits 
+            from the current iteration which were not identified 
+            in the previous iteration
+ Args     : none
+
+=cut
+
+sub  newhits  {shift->{'NEWHITS'}}
+
+=head2 oldhits
+
+ Title    :  oldhits
+ Usage    : $oldhits = $obj->oldhits();
+ Returns  : reference to an array listing all the hits from 
+            the current iteration which were identified and 
+            above threshold in the previous iteration
+ Args     : none
+
+=cut
+
+sub  oldhits  {shift->{'OLDHITS'}}
+
+
+=head2 nextSbjct
+
+ Title    : nextSbjct
+ Usage    : $sbjct = $obj->nextSbjct();
+ Function : Method of iterating through all the Sbjct retrieved
+            from parsing the report 
+#Example  : while ( my $sbjct = $obj->nextSbjct ) {}
+ Returns  : next Sbjct object or undef if finished
+ Args     :
+
+=cut
+
+sub nextSbjct {
+    my ($self) = @_;
+    $self->_fastForward or return;
+
+    #######################
+    # get all sbjct lines #
+    #######################
+    my $def = $self->_readline();
+
+    while(defined ($_ = $self->_readline) )  {
+	if    ($_ !~ /\w/)            {next}
+	elsif ($_ =~ /Strand HSP/)    {next} # WU-BLAST non-data
+	elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last}
+	elsif ($_ =~ /^(\d+) .* \d+$/) { # This is not correct at all
+	    $self->_pushback($_); # 1: HSP does not work for -m 6 flag
+	    $def = $1;		  # 2: length/name are incorrect     
+	    my $length = undef;	  # 3: Names are repeated many times.
+	    my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+						      '-length'=>$length,
+						      '-parent'=>$self);
+	    return $sbjct;
+	}			# m-6 
+	elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {
+	    $self->_pushback( $_); 
+	    last;
+	} else {$def .= $_}
+    }
+    $def = '' unless defined $def;
+    $def =~ s/\s+/ /g;
+    $def =~ s/\s+$//g;
+    $def =~ s/Length = ([\d,]+)$//g;
+    my $length = $1;
+    return 0 unless $def =~ /^>/;
+    $def =~ s/^>//;
+
+    ####################
+    # the Sbjct object #
+    ####################
+    my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+					      '-length'=>$length,
+					      '-parent'=>$self);
+    return $sbjct;
+}
+
+
+# This is added by /AE
+
+=head2 Align
+
+ Title    : Align
+ Usage    : $SimpleAlign = $obj->Align();
+ Function : Method to obtain a simpleAlign object from psiblast
+ Example  : $SimpleAlign = $obj->Align();
+ Returns  : SimpleAlign object or undef if not found.
+ BUG      : Only works if psiblast has been run with m 6 flag
+ Args     :
+
+=cut
+
+sub Align {
+    use Bio::SimpleAlign;
+    my ($self) = @_;
+    $self->_fastForward or return;
+    my $lastline = $self->_readline();
+    return unless $lastline =~ /^QUERY/; # If psiblast not run correctly
+    my (%sequence,%first,%last,$num);
+
+    if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){
+	my $name='QUERY';
+	my $start=$1; 
+	my $seq=$2; 
+	my $stop=$3; 
+	$seq =~ s/-/\./g; 
+	$start =~ s/ //g; 
+	$stop =~ s/ //g; 
+	$sequence{$name} .= $seq; 
+	if ($first{$name} eq ''){$first{$name}=$start;} 
+	if ($stop ne ''){$last{$name}=$stop;} 
+#     print "FOUND:\t$seq\t$start\t$stop\n"; 
+	$num=0;
+    } 
+    while(defined($_ = $self->_readline()) ){
+	chomp($_);
+	if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){
+	    my $name='QUERY';
+	    my $start=$1; 
+	    my $seq=$2; 
+	    my $stop=$3; 
+	    $seq   =~ s/-/\./g; 
+	    $start =~ s/ //g; 
+	    $stop  =~ s/ //g; 
+	    $sequence{$name} .= $seq; 
+	    if ($first{$name} eq '') { $first{$name} = $start;} 
+	    if ($stop ne '') { $last{$name}=$stop;} 
+	    $num=0;
+	} elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){
+	    my $name=$1.".".$num;
+	    my $start=$2;
+	    my $seq=$3;
+	    my $stop=$4;
+	    $seq =~ s/-/\./g;
+	    $start =~ s/ //g;
+	    $stop =~ s/ //g;
+	    $sequence{$name} .= $seq;
+	    if ($first{$name} eq ''){$first{$name}=$start;}
+	    if ($stop ne ''){$last{$name}=$stop;}
+	    $num++;
+	} 
+    } 
+    my $align = new Bio::SimpleAlign();
+    my @keys=sort keys(%sequence);
+    foreach my $name (@keys){
+	my $nse = $name."/".$first{$name}."-".$last{$name};
+	my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name},
+					     -id  => $name,
+					     -name  => $nse,
+					     -start  => $first{$name},
+					     -end  => $last{$name}
+					     );
+
+	$align->add_seq($seqobj);
+    }
+    return $align;
+}
+
+# Start of internal subroutines.
+
+sub _parseHeader {
+  my ($self) = @_;
+  my (@old_hits, @new_hits);
+
+  my $newhits_true = ($self->{'ROUND'} < 2) ? 1  : 0 ;
+  while(defined($_ = $self->_readline()) ) {
+    if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/)    {
+	my $id = $1;
+	my $score= $2;	#not used currently
+	my $evalue= $3; 	#not used currently
+    	if ($newhits_true) { push ( @new_hits, $id);}
+    	else { push (@old_hits, $id);}
+    }
+    elsif ($_ =~ /^Sequences not found previously/)  {$newhits_true = 1 ;}
+# This is changed for "-m 6" option /AE
+    elsif ($_ =~ /^>/ || $_ =~ /^QUERY/)
+    {
+	$self->_pushback($_);
+	$self->{'OLDHITS'} = \@old_hits;
+	$self->{'NEWHITS'} = \@new_hits;
+	return 1;
+    }
+    elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
+      	$self->_pushback($_);
+      	return 0; #  no sequences found in this iteration
+    }
+  }
+  return 0; # no sequences found in this iteration
+}
+
+sub _fastForward {
+  my ($self) = @_;
+  return 0 if $self->{'REPORT_DONE'}; # empty report
+
+  while(defined($_ = $self->_readline()) ) {
+      if( $_ =~ /^>/ ||
+	  $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE
+	  $self->_pushback($_);
+	  return 1;
+      }
+#    print "FASTFORWARD",$_,"\n";
+      if ($_ =~ /^>|^Parameters|^\s+Database:/) {
+	  $self->_pushback($_);
+	  return 1;
+      }
+  }
+  $self->warn("Possible error (2) while parsing BLAST report!");
+}
+
+
+=head2 _readline
+
+ Title   : _readline
+ Usage   : $obj->_readline
+ Function: Reads a line of input.
+
+           Note that this method implicitely uses the value of $/ that is
+           in effect when called.
+
+           Note also that the current implementation does not handle pushed
+           back input correctly unless the pushed back input ends with the
+           value of $/.
+ Example :
+ Returns : 
+
+=cut
+
+sub _readline{
+   my ($self) = @_;
+   return $self->{'PARENT'}->_readline();
+}
+
+=head2 _pushback
+
+ Title   : _pushback
+ Usage   : $obj->_pushback($newvalue)
+ Function: puts a line previously read with _readline back into a buffer
+ Example :
+ Returns :
+ Args    : newvalue
+
+=cut
+
+sub _pushback {
+   my ($self, $arg) = @_;   
+   return $self->{'PARENT'}->_pushback($arg);    
+}
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Sbjct.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Sbjct.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite/Sbjct.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,367 @@
+# $Id: Sbjct.pm,v 1.33.4.1 2006/10/02 23:10:33 sendu Exp $
+###############################################################################
+# Bio::Tools::BPlite::Sbjct
+###############################################################################
+#
+# The original BPlite.pm module has been written by Ian Korf !
+# see http://sapiens.wustl.edu/~ikorf
+#
+# You may distribute this module under the same terms as perl itself
+
+
+#
+# BioPerl module for Bio::Tools::BPlite::Sbjct
+#
+# Cared for by Peter Schattner <schattner at alum.mit.edu>
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::BPlite::Sbjct - A Blast Subject (database search Hit)
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::BPlite;
+  my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
+  while(my $sbjct = $report->nextSbjct) {
+      $sbjct->name;    # access to the hit name
+      "$sbjct";        # overloaded to return name
+      $sbjct->nextHSP; # gets the next HSP from the sbjct
+      while (my $hsp = $sbjct->nextHSP) {
+ 	 # canonical form is again a while loop
+      }
+  }
+
+=head1 DESCRIPTION
+
+See L<Bio::Tools::BPlite> for a more detailed information about the
+BPlite BLAST parsing objects.
+
+The original BPlite.pm module has been written by Ian Korf!
+See http://sapiens.wustl.edu/~ikorf
+
+The Sbjct object encapsulates a Hit in a Blast database
+search.  The Subjects are the "Hits" for a particular query.  A
+Subject may be made up of multiple High Scoring Pairs (HSP) which are
+accessed through the nextHSP method.
+
+If you are searching for the P-value or percent identity that is
+specific to each HSP and you will need to use the nextHSP method to
+get access to that data.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::BPlite::Sbjct;
+
+use strict;
+
+use Bio::Tools::BPlite::HSP; # we want to use HSP
+#use overload '""' => 'name';
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    ($self->{'NAME'},$self->{'LENGTH'},
+     $self->{'PARENT'}) =
+	 $self->_rearrange([qw(NAME
+			       LENGTH
+			       PARENT
+			       )], at args);
+    $self->report_type($self->{'PARENT'}->{'BLAST_TYPE'} || 'UNKNOWN');
+    $self->{'HSP_ALL_PARSED'} = 0;
+    
+  return $self;
+}
+
+=head2 name
+
+ Title    : name
+ Usage    : $name = $obj->name();
+ Function : returns the name of the Sbjct 
+ Example  : 
+ Returns  : name of the Sbjct 
+ Args     :
+
+=cut
+
+sub name {shift->{'NAME'}}
+
+=head2 report_type
+
+ Title    : report_type
+ Usage    : $type = $sbjct->report_type()
+ Function : Returns the type of report from which this hit was obtained.
+            This usually pertains only to BLAST and friends reports, for which
+            the report type denotes what type of sequence was aligned against
+            what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt, 
+            TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
+ Example  : 
+ Returns  : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
+ Args     : a string on set (you should know what you are doing)
+
+=cut
+
+sub report_type {
+    my ($self, $rpt) = @_;
+    if($rpt) {
+	$self->{'_report_type'} = $rpt;
+    }
+    return $self->{'_report_type'};
+}
+
+=head2 nextFeaturePair
+
+ Title    : nextFeaturePair
+ Usage    : $name = $obj->nextFeaturePair();
+ Function : same as the nextHSP function 
+ Example  : 
+ Returns  : next FeaturePair 
+ Args     :
+
+=cut
+
+sub nextFeaturePair {shift->nextHSP}; # just another name
+
+=head2 nextHSP
+
+ Title    : nextHSP
+ Usage    : $hsp = $obj->nextHSP();
+ Function : returns the next available High Scoring Pair
+ Example  : 
+ Returns  : Bio::Tools::HSP  or null if finished
+ Args     :
+
+=cut
+
+sub nextHSP {
+  my ($self) = @_;  
+  return  if $self->{'HSP_ALL_PARSED'};
+  
+  ############################
+  # get and parse scorelines #
+  ############################
+  my ($qframe, $sframe);
+  my $scoreline = $self->_readline();
+  my $nextline = $self->_readline();
+  return if not defined $nextline;
+  $scoreline .= $nextline;
+  my ($score, $bits);
+  if ($scoreline =~ /\d bits\)/) {
+      ($score, $bits) = ( $scoreline =~
+			  /Score = (\d+) \((\S+) bits\)/); # WU-BLAST
+  } else {
+      ($bits, $score) = ( $scoreline =~
+			  /Score =\s+(\S+) bits \((\d+)/); # NCBI-BLAST
+  }
+  unless( defined $bits && defined $score ) { 
+      $self->warn("Weird scoreline ($scoreline) bailing\n");
+      return;
+  }
+  my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/);
+  my ($positive) = ($scoreline =~ /Positives = (\d+)/);
+  my ($gaps) = ($scoreline =~ /Gaps = (\d+)/);
+  if($self->report_type() eq 'TBLASTX') {
+      ($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/;
+  } elsif ($self->report_type() eq 'TBLASTN')  {
+      ($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
+  } else {
+      ($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
+  }
+  $positive = $match if not defined $positive;
+  $gaps = '0' if not defined $gaps;
+  my ($p)        = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/);
+  unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/}
+  my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/);
+  $exp = -1 unless( defined $exp );
+
+  $self->throw("Unable to parse '$scoreline'") unless defined $score;
+  
+  #######################
+  # get alignment lines #
+  #######################
+  my (@hspline);
+  local $_;
+  while( defined($_ = $self->_readline()) ) {
+      if (/^WARNING:|^NOTE:/) {
+	  while(defined($_ = $self->_readline())) {last if $_ !~ /\S/}
+      }
+      elsif ( ! /\S/o)         {next}
+      elsif (/Strand HSP/o)    {next} # WU-BLAST non-data
+      elsif (/^\s*Strand/o)    {next} # NCBI-BLAST non-data
+      elsif (/^\s*Score/o)     {$self->_pushback($_); last}
+
+      elsif (/^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda|^\s+Subset/o)   
+      {    
+	  #ps 5/28/01	
+	  # elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/)   {
+	  $self->_pushback($_);
+
+	  $self->{'HSP_ALL_PARSED'} = 1;
+	  last;
+      } elsif( /^BLAST/ ) {
+	  $self->_pushback($_);
+	  $self->{'HSP_ALL_PARSED'} = 1;
+	  last;
+      } elsif( $_ =~ /^\s*Frame/ ) {
+	  if ($self->report_type() eq 'TBLASTX') {
+	      ($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/;
+	  } elsif ($self->report_type() eq 'TBLASTN') {
+	      ($sframe) = $_ =~ /Frame = ([\+-]\d)/;
+	  } else {
+	      ($qframe) = $_ =~ /Frame = ([\+-]\d)/;
+	  }
+      }
+      else {
+	  push @hspline, $_;	#      store the query line
+	  $nextline = $self->_readline();
+	  # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line
+	  my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline;
+	  push @hspline, $l1;	# store the alignment line
+	  my $l2 = $self->_readline(); push @hspline, $l2; # grab/store the sbjct line
+      }
+  }
+  
+  #########################
+  # parse alignment lines #
+  #########################
+  my ($ql, $sl, $as) = ("", "", "");
+  my ($qb, $qe, $sb, $se) = (0,0,0,0);
+  my (@QL, @SL, @AS); # for better memory management
+  
+  for(my $i=0;$i<@hspline;$i+=3) {
+    # warn $hspline[$i], $hspline[$i+2];
+    $hspline[$i]   =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/o;
+    $ql = $2; $qb = $1 unless $qb; $qe = $3;
+    
+    my $offset = index($hspline[$i], $ql);
+    $as = substr($hspline[$i+1], $offset, CORE::length($ql));
+    
+    $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/o;
+    $sl = $2; $sb = $1 unless $sb; $se = $3;
+
+    push @QL, $ql; push @SL, $sl; push @AS, $as;
+  }
+
+  ##################
+  # the HSP object #
+  ##################
+  $ql = join("", @QL);
+  $sl = join("", @SL);
+  $as = join("", @AS);
+# Query name and length are not in the report for a bl2seq report so {'PARENT'}->query and
+# {'PARENT'}->qlength will not be available.
+  my ($qname, $qlength) = ('unknown','unknown');
+  if ($self->{'PARENT'}->can('query')) {
+	$qname   = $self->{'PARENT'}->query;
+	$qlength = $self->{'PARENT'}->qlength;
+  }	
+  
+  my $hsp = new Bio::Tools::BPlite::HSP
+      ('-score'      => $score, 
+       '-bits'       => $bits, 
+       '-match'      => $match,
+       '-positive'   => $positive, 
+       '-gaps'       => $gaps,
+       '-hsplength'  => $hsplength,
+       '-p'          => $p,
+       '-exp'        => $exp,
+       '-queryBegin' => $qb, 
+       '-queryEnd'   => $qe, 
+       '-sbjctBegin' => $sb,
+       '-sbjctEnd'   => $se, 
+       '-querySeq'   => $ql, 
+       '-sbjctSeq'   => $sl,
+       '-homologySeq'=> $as, 
+       '-queryName'  => $qname,
+#			'-queryName'=>$self->{'PARENT'}->query,
+       '-sbjctName'  => $self->{'NAME'},
+       '-queryLength'=> $qlength,
+#		       	'-queryLength'=>$self->{'PARENT'}->qlength,
+       '-sbjctLength'=> $self->{'LENGTH'},
+       '-queryFrame' => $qframe,
+       '-sbjctFrame' => $sframe,
+       '-blastType'  => $self->report_type());
+  return $hsp;
+}
+
+=head2 _readline
+
+ Title   : _readline
+ Usage   : $obj->_readline
+ Function: Reads a line of input.
+
+           Note that this method implicitely uses the value of $/ that is
+           in effect when called.
+
+           Note also that the current implementation does not handle pushed
+           back input correctly unless the pushed back input ends with the
+           value of $/.
+ Example :
+ Returns : 
+
+=cut
+
+sub _readline{
+   my ($self) = @_;
+   return $self->{'PARENT'}->_readline();
+}
+
+=head2 _pushback
+
+ Title   : _pushback
+ Usage   : $obj->_pushback($newvalue)
+ Function: puts a line previously read with _readline back into a buffer
+ Example :
+ Returns :
+ Args    : newvalue
+
+=cut
+
+sub _pushback {
+   my ($self, $arg) = @_;   
+   return $self->{'PARENT'}->_pushback($arg);    
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPlite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,464 @@
+# $Id: BPlite.pm,v 1.42.4.1 2006/10/02 23:10:31 sendu Exp $
+##############################################################################
+# Bioperl module Bio::Tools::BPlite
+##############################################################################
+#
+# The original BPlite.pm module has been written by Ian Korf !
+# see http://sapiens.wustl.edu/~ikorf
+#
+# You may distribute this module under the same terms as perl itself
+
+=head1 NAME
+
+Bio::Tools::BPlite - Lightweight BLAST parser
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::BPlite;
+ my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
+
+  {
+    $report->query;
+    $report->database;
+    while(my $sbjct = $report->nextSbjct) {
+	$sbjct->name;
+	while (my $hsp = $sbjct->nextHSP) {
+	    $hsp->score;
+	    $hsp->bits;
+	    $hsp->percent;
+	    $hsp->P;
+            $hsp->EXP;
+	    $hsp->match;
+	    $hsp->positive;
+	    $hsp->length;
+	    $hsp->querySeq;
+	    $hsp->sbjctSeq;
+	    $hsp->homologySeq;
+	    $hsp->query->start;
+	    $hsp->query->end;
+	    $hsp->hit->start;
+	    $hsp->hit->end;
+	    $hsp->hit->seq_id;
+	    $hsp->hit->overlaps($exon);
+	}
+    }
+
+    # the following line takes you to the next report in the stream/file
+    # it will return 0 if that report is empty,
+    # but that is valid for an empty blast report.
+    # Returns -1 for EOF.
+
+    last if ($report->_parseHeader == -1);
+    redo;
+  }
+
+
+=head1 DESCRIPTION
+
+BPlite is a package for parsing BLAST reports. The BLAST programs are a family
+of widely used algorithms for sequence database searches. The reports are
+non-trivial to parse, and there are differences in the formats of the various
+flavors of BLAST. BPlite parses BLASTN, BLASTP, BLASTX, TBLASTN, and TBLASTX
+reports from both the high performance WU-BLAST, and the more generic
+NCBI-BLAST.
+
+Many people have developed BLAST parsers (I myself have made at least three).
+BPlite is for those people who would rather not have a giant object
+specification, but rather a simple handle to a BLAST report that works well
+in pipes.
+
+=head2 Object
+
+BPlite has three kinds of objects, the report, the subject, and the HSP. To
+create a new report, you pass a filehandle reference to the BPlite constructor.
+
+ my $report = new Bio::Tools::BPlite(-fh=>\*STDIN); # or any other filehandle
+
+The report has two attributes (query and database), and one method (nextSbjct).
+
+ $report->query;     # access to the query name
+ $report->database;  # access to the database name
+ $report->nextSbjct; # gets the next subject
+ while(my $sbjct = $report->nextSbjct) {
+     # canonical form of use is in a while loop
+ }
+
+A subject is a BLAST hit, which should not be confused with an HSP (below). A
+BLAST hit may have several alignments associated with it. A useful way of
+thinking about it is that a subject is a gene and HSPs are the exons. Subjects
+have one attribute (name) and one method (nextHSP).
+
+ $sbjct->name;    # access to the subject name
+ $sbjct->nextHSP; # gets the next HSP from the sbjct
+ while(my $hsp = $sbjct->nextHSP) {
+     # canonical form is again a while loop
+ }
+
+An HSP is a high scoring pair, or simply an alignment.  HSP objects
+inherit all the useful methods from RangeI/SeqFeatureI/FeaturePair,
+but provide an additional set of attributes (score, bits, percent, P,
+match, EXP, positive, length, querySeq, sbjctSeq, homologySeq) that
+should be familiar to anyone who has seen a blast report.
+
+For lazy/efficient coders, two-letter abbreviations are available for the 
+attributes with long names (qs, ss, hs). Ranges of the aligned sequences in
+query/subject and other information (like seqname) are stored
+in SeqFeature objects (i.e.: $hsp-E<gt>query, $hsp-E<gt>subject which is equal to
+$hsp-E<gt>feature1, $hsp-E<gt>feature2). querySeq, sbjctSeq and homologySeq do only
+contain the alignment sequences from the blast report.
+
+ $hsp->score;
+ $hsp->bits;
+ $hsp->percent;
+ $hsp->P;
+ $hsp->match;
+ $hsp->positive;
+ $hsp->length;
+ $hsp->querySeq;      $hsp->qs;
+ $hsp->sbjctSeq;      $hsp->ss;
+ $hsp->homologySeq;   $hsp->hs;
+ $hsp->query->start;
+ $hsp->query->end;
+ $hsp->query->seq_id;
+ $hsp->hit->primary_tag; # "similarity"
+ $hsp->hit->source_tag;  # "BLAST"
+ $hsp->hit->start;
+ $hsp->hit->end;
+ ...
+
+So a very simple look into a BLAST report might look like this.
+
+ my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
+ while(my $sbjct = $report->nextSbjct) {
+     print ">",$sbjct->name,"\n";
+     while(my $hsp = $sbjct->nextHSP) {
+	 	print "\t",$hsp->start,"..",$hsp->end," ",$hsp->bits,"\n";
+     }
+ }
+
+The output of such code might look like this:
+
+ >foo
+     100..155 29.5
+     268..300 20.1
+ >bar
+     100..153 28.5
+     265..290 22.1
+
+
+=head1 AUTHORS
+
+Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+Lorenz Pollak (lorenz at ist.org, bioperl port)
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was developed at the Genome Sequencing Center at Washington
+Univeristy, St. Louis, MO.
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at cgt.mc.duke.edu
+
+=head1 COPYRIGHT
+
+Copyright (C) 1999 Ian Korf. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+package Bio::Tools::BPlite;
+
+use strict;
+
+use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct
+use Symbol;
+
+use base qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
+
+# new comes from a RootI now
+
+=head2 new
+
+ Title   : new
+ Function: Create a new Bio::Tools::BPlite object
+ Returns : Bio::Tools::BPlite
+ Args    : -file     input file (alternative to -fh)
+           -fh       input stream (alternative to -file)
+
+=cut
+
+sub new {
+  my ($class, @args) = @_; 
+  my $self = $class->SUPER::new(@args);
+    $self->warn("Use of Bio::Tools::BPlite is deprecated".
+                   "Use Bio::SearchIO classes instead");
+  # initialize IO
+  $self->_initialize_io(@args);
+
+  $self->{'QPATLOCATION'} = [];  # Anonymous array of query pattern locations for PHIBLAST
+
+  if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
+  else                     {$self->{'REPORT_DONE'} = 1} # empty report
+  
+  return $self; # success - we hope!
+}
+
+# for SeqAnalysisParserI compliance
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while( my $feat = $res->next_feature ) { # do something }
+ Function: SeqAnalysisParserI implementing function. This implementation
+           iterates over all HSPs. If the HSPs of the current subject match
+           are exhausted, it will automatically call nextSbjct().
+ Example :
+ Returns : A Bio::SeqFeatureI compliant object, in this case a
+           Bio::Tools::BPlite::HSP object, and FALSE if there are no more
+           HSPs.
+ Args    : None
+
+=cut
+
+sub next_feature{
+   my ($self) = @_;
+   my ($sbjct, $hsp);
+   $sbjct = $self->{'_current_sbjct'};
+   unless( defined $sbjct ) {
+       $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
+       return  unless defined $sbjct;
+   }   
+   $hsp = $sbjct->nextHSP;
+   unless( defined $hsp ) {
+       $self->{'_current_sbjct'} = undef;
+       return $self->next_feature;
+   }
+   return $hsp || undef;
+}
+
+=head2 query
+
+ Title    : query
+ Usage    : $query = $obj->query();
+ Function : returns the query object
+ Example  :
+ Returns  : query object
+ Args     :
+
+=cut
+
+sub query    {shift->{'QUERY'}}
+
+=head2 qlength
+
+ Title    : qlength
+ Usage    : $len = $obj->qlength();
+ Function : returns the length of the query 
+ Example  :
+ Returns  : length of query
+ Args     :
+
+=cut
+
+sub qlength  {shift->{'LENGTH'}}
+
+=head2 pattern
+
+ Title    : pattern
+ Usage    : $pattern = $obj->pattern();
+ Function : returns the pattern used in a PHIBLAST search
+
+=cut
+
+sub pattern {shift->{'PATTERN'}}
+
+=head2 query_pattern_location
+
+ Title    : query_pattern_location
+ Usage    : $qpl = $obj->query_pattern_location();
+ Function : returns reference to array of locations in the query sequence
+            of pattern used in a PHIBLAST search
+
+=cut
+
+sub query_pattern_location {shift->{'QPATLOCATION'}}
+
+=head2 database
+
+ Title    : database
+ Usage    : $db = $obj->database();
+ Function : returns the database used in this search
+ Example  :
+ Returns  : database used for search
+ Args     :
+
+=cut
+
+sub database {shift->{'DATABASE'}}
+
+=head2 nextSbjct
+
+ Title    : nextSbjct
+ Usage    : $sbjct = $obj->nextSbjct();
+ Function : Method of iterating through all the Sbjct retrieved 
+            from parsing the report 
+ Example  : while ( my $sbjct = $obj->nextSbjct ) {}
+ Returns  : next Sbjct object or null if finished
+ Args     :
+
+=cut
+
+sub nextSbjct {
+  my ($self) = @_;
+  
+  $self->_fastForward or return;
+  local $_;
+  #######################
+  # get all sbjct lines #
+  #######################
+  my $def = $self->_readline();  
+  while(defined ($_ = $self->_readline() ) ) {
+    if    (! /\w/)           {next}
+    elsif (/Strand HSP/o)    {next} # WU-BLAST non-data
+    elsif (/^\s{0,2}Score/o) {$self->_pushback($_); last}
+    elsif (/^Histogram|^Searching|^Parameters|
+            ^\s+Database:|
+            ^\s+Posted date:/ox) {
+	$self->_pushback($_); 
+	last;
+    } else {
+	$def .= $_;
+    }
+  }
+  if( ! $def ) { 
+      return;
+  }
+  $def =~ s/\s+/ /g;
+  $def =~ s/\s+$//g;
+  
+  my $length;
+  if( $def =~ s/Length = ([\d,]+)$//g ) {
+      $length = $1;
+  }
+  return unless $def =~ /^>/;
+  $def =~ s/^>//;
+
+  ####################
+  # the Sbjct object #
+  ####################
+  my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
+					    '-length'=>$length,
+                                            '-parent'=>$self);
+  return $sbjct;
+}
+
+# begin private routines
+
+sub _parseHeader {
+  my ($self) = @_;
+
+  # normally, _parseHeader will break out of the parse as soon as it
+  # reaches a new Subject (i.e. the first one after the header) if you
+  # call _parseHeader twice in a row, with nothing in between, all you
+  # accomplish is a ->nextSubject call..  so we need a flag to
+  # indicate that we have *entered* a header, before we are allowed to
+  # leave it!
+
+  my $header_flag = 0; # here is the flag/ It is "false" at first, and
+                       # is set to "true" when any valid header element
+                       # is encountered
+  local $_;
+  $self->{'REPORT_DONE'} = 0;  # reset this bit for a new report
+  while(defined($_ = $self->_readline() ) ) {
+      s/\(\s*\)//;      
+      if (/^Query=(?:\s+(.+))?/s) {
+	  $header_flag = 1;	# valid header element found
+	  my $query = $1;
+	  while( defined($_ = $self->_readline() ) ) {
+	      # Continue reading query name until encountering either
+	      # a line that starts with "Database" or a blank line.
+	      # The latter condition is needed in order to be able to
+	      # parse megablast output correctly, since Database comes
+	      # before (not after) the query.
+	      if( ($_ =~ /^Database/) || ($_ =~ /^$/) ) {
+		  $self->_pushback($_); last;
+	      }	      
+	      $query .= $_;
+	  }
+	  $query =~ s/\s+/ /g;
+	  $query =~ s/\s+$//;
+	  $query =~ s/^>//;
+
+	  my $length = 0;
+	  if( $query =~ /\(([\d,]+)\s+\S+\)\s*$/ ) {      
+	      $length = $1;
+	      $length =~ s/,//g;
+	  } else { 
+	      $self->debug("length is 0 for '$query'\n");
+	  }
+	  $self->{'QUERY'} = $query;
+	  $self->{'LENGTH'} = $length;
+      }
+      elsif (/^(<b>)?(T?BLAST[NPX])\s+([\w\.-]+)\s+(\[[\w-]*\])/o) { 
+	  $self->{'BLAST_TYPE'} = $2; 
+	  $self->{'BLAST_VERSION'} = $3;
+      }				# BLAST report type - not a valid header element # JB949
+      
+      # Support Paracel BTK output
+      elsif ( $_ =~ /(^[A-Z0-9_]+)\s+BTK\s+/ ) { 
+	  $self->{'BLAST_TYPE'} = $1;
+	  $self->{'BTK'} = 1;
+      } 
+      elsif ($_ =~ /^Database:\s+(.+)/) {$header_flag = 1;$self->{'DATABASE'} = $1} # valid header element found
+      elsif ($_ =~ /^\s*pattern\s+(\S+).*position\s+(\d+)\D/) {   
+	  # For PHIBLAST reports
+	  $header_flag = 1;	# valid header element found
+	  $self->{'PATTERN'} = $1;
+	  push (@{$self->{'QPATLOCATION'}}, $2);
+      } 
+      elsif (($_ =~ /^>/) && ($header_flag==1)) {$self->_pushback($_); return 1} # only leave if we have actually parsed a valid header!
+      elsif (($_ =~ /^Parameters|^\s+Database:/) && ($header_flag==1)) { 
+      # if we entered a header, and saw nothing before the stats at the end, 
+      # then it was empty
+	  $self->_pushback($_);
+	  return 0;		# there's nothing in the report
+      } elsif( /Reference:\s+Aaron E\. Darling/ ) {
+	  $self->{'BTK'} = 1;
+      }  
+      # bug fix suggested by MI Sadowski via Martin Lomas
+      # see bug report #1118
+      if( ref($self->_fh()) !~ /GLOB/ && 
+	  $self->_fh()->can('EOF') && eof($self->_fh()) ) {
+	  $self->warn("unexpected EOF in file\n");
+	  return -1;
+      }
+  }
+  return -1; # EOF
+}
+
+sub _fastForward {
+    my ($self) = @_;
+    return 0 if $self->{'REPORT_DONE'}; # empty report
+    local $_;
+    while(defined( $_ = $self->_readline() ) ) {
+	if (/^Histogram|^Searching|^Parameters|^\s+Database:|
+             ^\s+Posted date:/xo) {
+	    return 0;
+	} elsif( $self->{'BTK'} && /^BLAST/o ) {
+	    return 0;
+	} elsif( /^>/ ) {
+	    $self->_pushback($_);	
+	    return 1;
+	}
+    }
+    unless( $self->{'BTK'} ) { # Paracel BTK reports have no footer
+	$self->warn("Possible error (1) while parsing BLAST report!");
+    }
+}
+
+1;
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPpsilite.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPpsilite.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/BPpsilite.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,360 @@
+# $Id: BPpsilite.pm,v 1.26.4.1 2006/10/02 23:10:31 sendu Exp $
+# Bioperl module Bio::Tools::BPpsilite
+############################################################
+#	based closely on the Bio::Tools::BPlite modules
+#	Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+#	Lorenz Pollak (lorenz at ist.org, bioperl port)
+#
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+# _history
+# October 20, 2000
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::BPpsilite - Lightweight BLAST parser for (iterated) psiblast reports
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::BPpsilite;
+  open my $FH, "t/psiblastreport.out";
+  $report = Bio::Tools::BPpsilite->new(-fh=>$FH);
+
+  # determine number of iterations executed by psiblast
+  $total_iterations = $report->number_of_iterations;
+  $last_iteration = $report->round($total_iterations);
+
+  # Process only hits found in last iteration ...
+   $oldhitarray_ref = $last_iteration->oldhits;
+   HIT: while($sbjct = $last_iteration->nextSbjct) {
+  	  $id = $sbjct->name;
+  	  $is_old =  grep  /\Q$id\E/, @$oldhitarray_ref;
+  	  if ($is_old ){next HIT;}
+  #  do something with new hit...
+  }
+
+
+=head1 DESCRIPTION
+
+B<NOTE:> This module's functionality has been implemented in
+L<Bio::SearchIO::blast> and therefore is not actively maintained.
+
+BPpsilite is a package for parsing multiple iteration PSIBLAST
+reports.  It is based closely on Ian Korf's L<Bio::Tools::BPlite>
+module for parsing single iteration BLAST reports (as modified by
+Lorenz Pollak).
+
+Two of the four basic objects of L<Bio::Tools::BPpsilite> are
+identical to the corresponding objects in BPlite - the "HSP.pm" and
+"Sbjct.pm" objects.  This DESCRIPTION documents only the one new
+object, the "iteration", as well as the additional methods that are
+implemented in BPpsilite that are not in BPlite.  See the BPlite
+documentation for information on the BPlite, SBJCT and HSP objects.
+
+The essential difference between PSIBLAST and the other BLAST programs
+(in terms of report parsing) is that PSIBLAST performs multiple
+iterations of the BLASTing of the database and the results of all of
+these iterations are stored in a single PSIBLAST report.  (For general
+information on PSIBLAST see the README.bla file in the standalone
+BLAST distribution and references therein). PSIBLAST's use of multiple
+iterations imposes additional demands on the report parser: * There
+are several iterations of hits.  Many of those hits will be repeated
+in more than one iteration.  Often only the last iteration will be of
+interest.  * Each iteration will list two different kinds of hits -
+repeated hits that were used in the model and newly identified hits -
+which may need to be processed in different manners * The total number
+of iterations performed is not displayed in the report until (almost)
+the very end of the report.  (The user can specify a maximum number of
+iterations for the PSIBLAST search, but the program may perform fewer
+iterations if convergence is reached)
+
+BPpsilite addresses these issues by offering the following methods:
+
+* The total number of iteration used is given by the method
+   number_of_iterations as in:
+
+	$total_iterations = $report->number_of_iterations;
+
+* Results from an arbitrary iteration round can be accessed by using
+  the 'round' method:
+
+	$iteration3_report = $report->round(3);
+
+* The ids of the sequences which passed the significance threshold for
+  the first time in the "nth" iteration can be identified by using the
+  newhits method.  Previously identified hits are identified by using
+  the oldhits method, as in:
+
+ 	$oldhitarray_ref = $iteration3_report->oldhits;
+ 	$newhitarray_ref = $iteration3_report->newhits;
+
+BPpsilite.pm should work equally well on reports generated by the
+StandAloneBlast.pm local BLAST module as with reports generated by
+remote psiblast searches. For examples of usage of BPpsilite.pm, the
+user is referred to the BPpsilite.t script in the "t" directory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email: schattner at alum.mit.edu
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 ACKNOWLEDGEMENTS
+
+Based on work of:
+Ian Korf (ikorf at sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf), 
+Lorenz Pollak (lorenz at ist.org, bioperl port)
+
+=head1 COPYRIGHT
+
+BPlite.pm is copyright (C) 1999 by Ian Korf. 
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+package Bio::Tools::BPpsilite;
+
+use strict;
+use Bio::Tools::BPlite::Iteration; #
+use Bio::Tools::BPlite::Sbjct; #   Debug code
+use Bio::Tools::BPlite; 
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+sub new {
+  my ($class, @args) = @_; 
+  my $self = $class->SUPER::new(@args);
+    $self->warn("Use of Bio::Tools::BPpsilite is deprecated".
+                   "Use Bio::SearchIO classes instead");
+  # initialize IO
+  $self->_initialize_io(@args);
+  $self->{'_tempdir'} = $self->tempdir('CLEANUP' => 1);
+  $self->{'QPATLOCATION'} = [];  # Anonymous array of query pattern locations for PHIBLAST
+  $self->{'NEXT_ITERATION_NUMBER'} = 1;
+  $self->{'TOTAL_ITERATION_NUMBER'} = -1;  # -1 indicates preprocessing not yet done
+
+  if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
+  else                     {$self->{'REPORT_DONE'} = 1} # empty report
+  
+  return $self; # success - we hope!
+}
+
+=head2 query
+
+ Title    : query
+ Usage    : $query = $obj->query();
+ Function : returns the query object
+ Returns  : query object
+ Args     :
+
+=cut
+
+sub query    {shift->{'QUERY'}}
+
+=head2 qlength
+
+ Title    : qlength
+ Usage    : $len = $obj->qlength();
+ Function : returns the length of the query 
+ Returns  : length of query
+ Args     :
+
+=cut
+
+sub qlength  {shift->{'LENGTH'}}
+
+=head2 database
+
+ Title    : database
+ Usage    : $db = $obj->database();
+ Function : returns the database used in this search
+ Returns  : database used for search
+ Args     :
+
+=cut
+
+sub database {shift->{'DATABASE'}}
+
+=head2 number_of_iterations
+
+ Title    : number_of_iterations
+ Usage    : $total_iterations = $obj-> number_of_iterations();
+ Function : returns the total number of iterations used in this search
+ Returns  : total number of iterations used for search
+ Args     : none
+
+=cut
+
+
+=head2 pattern
+
+ Title    : database
+ Usage    : $pattern = $obj->pattern();
+ Function : returns the pattern used in a PHIBLAST search
+
+=cut
+
+sub pattern {shift->{'PATTERN'}}
+
+=head2 query_pattern_location
+
+ Title    : query_pattern_location
+ Usage    : $qpl = $obj->query_pattern_location();
+ Function : returns reference to array of locations in the query sequence
+            of pattern used in a PHIBLAST search
+
+=cut
+
+sub query_pattern_location {shift->{'QPATLOCATION'}}
+
+
+
+
+sub number_of_iterations {
+	my $self = shift;
+  	if ($self->{'TOTAL_ITERATION_NUMBER'} == -1){&_preprocess($self);}
+	$self->{'TOTAL_ITERATION_NUMBER'};
+}
+
+=head2 round
+
+ Title    : round
+ Usage    : $Iteration3 = $report->round(3);
+ Function : Method of retrieving data from a specific iteration 
+ Example  :  
+ Returns  : reference to requested Iteration object or null if argument
+		is greater than total number of iterations
+ Args     : number of the requested iteration
+
+=cut
+
+sub round {
+  my $self = shift;
+  my $iter_num = shift;
+  $self->_initialize_io(-file => Bio::Root::IO->catfile
+			($self->{'_tempdir'},"iteration".$iter_num.".tmp"));
+  if( ! $self->_fh ) {
+      $self->throw("unable to re-open iteration file for round ".$iter_num);
+  }
+  return Bio::Tools::BPlite::Iteration->new(-round=>$iter_num,
+					    -parent=>$self);
+}
+
+# begin private routines
+
+sub _parseHeader {
+  my ($self) = @_;
+
+  
+  while(defined ($_ = $self->_readline) ) {
+    if ($_ =~ /^Query=\s+([^\(]+)/)    {
+      my $query = $1;
+      while(defined ($_ = $self->_readline)) {
+        last if $_ !~ /\S/;
+	$query .= $_;
+      }
+      $query =~ s/\s+/ /g;
+      $query =~ s/^>//;
+      $query =~ /\((\d+)\s+\S+\)\s*$/;
+      my $length = $1;
+      $self->{'QUERY'} = $query;
+      $self->{'LENGTH'} = $length;
+    }
+    elsif ($_ =~ /^Database:\s+(.+)/) {$self->{'DATABASE'} = $1}
+    elsif ($_ =~ /^\s*pattern\s+(\S+).*position\s+(\d+)\D/) 
+    {   # For PHIBLAST reports
+	$self->{'PATTERN'} = $1;
+	push (@{$self->{'QPATLOCATION'}}, $2);
+    } elsif ($_ =~ /^>|^Results from round 1/)    {
+	$self->_pushback($_); 
+	return 1;
+    } elsif ($_ =~ /^Parameters|^\s+Database:/) {
+	$self->_pushback($_); 
+	return 0; # there's nothing in the report
+    }
+  }
+}
+
+=head2 _preprocess
+
+ Title    : _preprocess
+ Usage    : internal routine, not called directly
+ Function : determines number of iterations in report and prepares
+	    data so individual iterations canbe parsed in non-sequential 
+            order 
+ Example  :  
+ Returns  : nothing. Sets TOTAL_ITERATION_NUMBER in object's hash
+ Args     : reference to calling object
+
+=cut
+
+#'
+sub _preprocess {
+	my $self = shift;
+	#	$self->throw(" PSIBLAST report preprocessing not implemented yet!");
+
+	my  $oldround = 0;
+	my ($currentline, $currentfile, $round);
+
+	# open output file for data from iteration round #1
+	$round = 1;
+	$currentfile = Bio::Root::IO->catfile($self->{'_tempdir'}, 
+					  "iteration$round.tmp");
+	open (my $FILEHANDLE, ">$currentfile") || 
+	  $self->throw("cannot open filehandle to write to file $currentfile");
+
+	while(defined ($currentline = $self->_readline()) ) {
+		if ($currentline =~ /^Results from round\s+(\d+)/) {
+			if ($oldround) { 
+				close ($FILEHANDLE);
+			}
+			$round = $1;
+			$currentfile = Bio::Root::IO->catfile($self->{'_tempdir'}, 
+															  "iteration$round.tmp");
+
+			close $FILEHANDLE;
+			open ($FILEHANDLE, ">$currentfile") || 
+			  $self->throw("cannot open filehandle to write to file $currentfile");
+			$oldround = $round;
+		} elsif ($currentline =~ /CONVERGED/){ 
+			# This is a fix for psiblast parsing with -m 6 /AE
+			$round--;
+		}
+		print $FILEHANDLE $currentline ;
+	}
+	$self->{'TOTAL_ITERATION_NUMBER'}= $round;
+	# It is necessary to close filehandle otherwise the whole
+	# file will not be read later !!
+	close $FILEHANDLE;
+}
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Blat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Blat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Blat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,241 @@
+# $Id: Blat.pm,v 1.7.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Blat
+#
+# Written by Balamurugan Kumarasamy
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+=head1 NAME
+
+Bio::Tools::Blat - parser for Blat program
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Blat;
+  my $blat_parser = new Bio::Tools::Blat(-fh =>$filehandle );
+  while( my $blat_feat = $blat_parser->next_result ) {
+        push @blat_feat, $blat_feat;
+  }
+
+=head1 DESCRIPTION
+
+ Parser for Blat program
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+ Report bugs to the Bioperl bug tracking system to help us keep track
+ of the bugs and their resolution. Bug reports can be submitted the
+ web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Balamurugan Kumarasamy
+
+ Email: bala at tll.org.sg
+
+=head1 APPENDIX
+
+ The rest of the documentation details each of the object methods.
+ Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Blat;
+use strict;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Blat(-fh=>$filehandle);
+ Function: Builds a new Bio::Tools::Blat object
+ Returns : Bio::Tools::Blat
+ Args    : -filename
+           -fh (filehandle)
+
+=cut
+
+sub new {
+      my($class, at args) = @_;
+
+      my $self = $class->SUPER::new(@args);
+      $self->_initialize_io(@args);
+
+      return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $blat_parser->next_result
+ Function: Get the next result set from parser data
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    : none
+
+=cut
+
+sub next_result {
+	my ($self) = @_;
+	my $filehandle;
+	my $line;
+	my $id;
+
+	while ($_=$self->_readline()){
+		# first split on spaces:
+		$line = $_;
+		chomp $line;
+
+		my ($matches, $mismatches, $rep_matches, $n_count, $q_num_insert,
+			 $q_base_insert, $t_num_insert, $t_base_insert, $strand, $q_name,
+			 $q_length, $q_start, $q_end, $t_name, $t_length, 
+			 $t_start, $t_end, $block_count, $block_sizes, $q_starts,
+			 $t_starts
+			) = split;
+
+		my $superfeature = Bio::SeqFeature::Generic->new();
+
+		# ignore any preceeding text
+		next unless ( $matches =~/^\d+$/ );
+
+		# create as many features as blocks there are in each output line
+		my (%feat1, %feat2);
+		$feat1{name} = $t_name;
+		$feat2{name} = $q_name;
+
+		$strand = $1 if ($strand =~/([+-])[+-]/);
+
+		$feat2{strand} = 1;
+		$feat1{strand} = $strand;
+
+		my $percent_id = sprintf "%.2f",
+		(100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches));
+
+		unless ( $q_length ){
+			$self->warn("length of query is zero, something is wrong!");
+			next;
+		}
+
+		my $score   = sprintf "%.2f",
+		(100 * ( $matches + $mismatches + $rep_matches ) / $q_length);
+
+		# size of each block of alignment (inclusive)
+		my @block_sizes     = split ",",$block_sizes;
+
+		# start position of each block (you must add 1 as psl output 
+		# is off by one in the start coordinate)
+		my @q_start_positions = split ",",$q_starts;
+		my @t_start_positions = split ",",$t_starts;
+
+		$superfeature->seq_id($q_name);
+		$superfeature->score( $score );
+		$superfeature->add_tag_value('percent_id',$percent_id);
+
+		# each line of output represents one possible entire aligment 
+		# of the query (feat1) and the target(feat2)
+
+		for (my $i=0; $i<$block_count; $i++ ){
+
+			my ($query_start,$query_end);
+
+			if ( $strand eq '+' ){
+				$query_start = $q_start_positions[$i] + 1;
+				$query_end   = $query_start + $block_sizes[$i] - 1;
+			}else{
+				$query_end   = $q_length  - $q_start_positions[$i];
+				$query_start = $query_end - $block_sizes[$i] + 1;
+			}
+
+			#$feat2 {start} = $q_start_positions[$i] + 1;
+			#$feat2 {end}   = $feat2{start} + $block_sizes[$i] - 1;
+			$feat2 {start} = $query_start;
+			$feat2 {end}   = $query_end;
+			if ( $query_end <  $query_start ){
+				$self->warn("dodgy feature coordinates: end = $query_end, start = $query_start. Reversing...");
+				$feat2 {end}   = $query_start;
+				$feat2 {start} = $query_end;
+			}
+
+			$feat1 {start} = $t_start_positions[$i] + 1;
+			$feat1 {end}   = $feat1{start} + $block_sizes[$i] - 1;
+
+			# we put all the features with the same score and percent_id
+			$feat2 {score}   = $score;
+			$feat1 {score}   = $feat2 {score};
+			$feat2 {percent} = $percent_id;
+			$feat1 {percent} = $feat2 {percent};
+
+			# other stuff:
+			$feat1 {db}         = undef;
+			$feat1 {db_version} = undef;
+			$feat1 {program}    = 'blat';
+			$feat1 {p_version}  = '1';
+			$feat1 {source}     = 'blat';
+			$feat1 {primary}    = 'similarity';
+			$feat2 {source}     = 'blat';
+			$feat2 {primary}    = 'similarity';
+
+			my $feature_pair = $self->create_feature(\%feat1, \%feat2);
+			$superfeature->add_sub_SeqFeature( $feature_pair,'EXPAND');
+		}
+		return $superfeature;
+	}
+}
+
+=head2 create_feature
+
+ Title   : create_feature
+ Usage   : my $feat=$blat_parser->create_feature($feature,$seqname)
+ Function: creates a SeqFeature Generic object
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    :
+
+
+=cut
+
+sub create_feature {
+    my ($self, $feat1,$feat2) = @_;
+    my $feature1= Bio::SeqFeature::Generic->new(
+							  -seq_id     =>$feat1->{name},
+							  -start      =>$feat1->{start},
+                       -end        =>$feat1->{end},
+                       -strand     =>$feat1->{strand},
+                       -score      =>$feat1->{score},
+                       -source     =>$feat1->{source},
+                       -primary    =>$feat1->{primary} );
+
+    my $feature2= Bio::SeqFeature::Generic->new(
+                       -seq_id     =>$feat2->{name},
+							  -start      =>$feat2->{start},
+                       -end        =>$feat2->{end},
+                       -strand     =>$feat2->{strand},
+                       -score      =>$feat2->{score},
+                       -source     =>$feat2->{source},
+                       -primary    =>$feat2->{primary} );
+
+    my $featurepair = Bio::SeqFeature::FeaturePair->new;
+    $featurepair->feature1 ($feature1);
+    $featurepair->feature2 ($feature2);
+
+	 $featurepair->add_tag_value('evalue',$feat2->{p});
+	 $featurepair->add_tag_value('percent_id',$feat2->{percent});
+	 $featurepair->add_tag_value("hid",$feat2->{primary});
+    return  $featurepair;
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Blat.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/CodonTable.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/CodonTable.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/CodonTable.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,824 @@
+# $Id: CodonTable.pm,v 1.37.2.1 2006/10/02 23:10:31 sendu Exp $
+#
+# bioperl module for Bio::Tools::CodonTable
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::CodonTable - Bioperl codon table object
+
+=head1 SYNOPSIS
+
+  # This is a read-only class for all known codon tables.  The IDs are
+  # the ones used by nucleotide sequence databases.  All common IUPAC
+  # ambiguity codes for DNA, RNA and animo acids are recognized.
+
+  # to use
+  use Bio::Tools::CodonTable;
+
+  # defaults to ID 1 "Standard"
+  $myCodonTable   = Bio::Tools::CodonTable->new();
+  $myCodonTable2  = Bio::Tools::CodonTable->new( -id => 3 );
+
+  # change codon table
+  $myCodonTable->id(5);
+
+  # examine codon table
+  print  join (' ', "The name of the codon table no.", $myCodonTable->id(4),
+	       "is:", $myCodonTable->name(), "\n");
+
+  # print possible codon tables
+  $tables = Bio::Tools::CodonTable->tables;
+  while ( ($id,$name) = each %{$tables} ) {
+    print "$id = $name\n";
+  }
+
+  # translate a codon
+  $aa = $myCodonTable->translate('ACU');
+  $aa = $myCodonTable->translate('act');
+  $aa = $myCodonTable->translate('ytr');
+
+  # reverse translate an amino acid
+  @codons = $myCodonTable->revtranslate('A');
+  @codons = $myCodonTable->revtranslate('Ser');
+  @codons = $myCodonTable->revtranslate('Glx');
+  @codons = $myCodonTable->revtranslate('cYS', 'rna');
+
+  # reverse translate an entire amino acid sequence into a IUPAC
+  # nucleotide string
+
+  my $seqobj    = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
+  my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
+
+  #boolean tests
+  print "Is a start\n"       if $myCodonTable->is_start_codon('ATG');
+  print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar');
+  print "Is a unknown\n"     if $myCodonTable->is_unknown_codon('JTG');
+
+=head1 DESCRIPTION
+
+Codon tables are also called translation tables or genetic codes
+since that is what they represent. A bit more complete picture
+of the full complexity of codon usage in various taxonomic groups
+is presented at the NCBI Genetic Codes Home page.
+
+CodonTable is a BioPerl class that knows all current translation
+tables that are used by primary nucleotide sequence databases
+(GenBank, EMBL and DDBJ). It provides methods to output information
+about tables and relationships between codons and amino acids.
+
+This class and its methods recognized all common IUPAC ambiguity codes
+for DNA, RNA and animo acids. The translation method follows the
+conventions in EMBL and TREMBL databases.
+
+It is a nuisance to separate RNA and cDNA representations of nucleic
+acid transcripts. The CodonTable object accepts codons of both type as
+input and allows the user to set the mode for output when reverse
+translating. Its default for output is DNA.
+
+Note: 
+
+This class deals primarily with individual codons and amino
+acids. However in the interest of speed you can L<translate>
+longer sequence, too. The full complexity of protein translation
+is tackled by L<Bio::PrimarySeqI::translate>.
+
+
+The amino acid codes are IUPAC recommendations for common amino acids:
+
+          A           Ala            Alanine
+          R           Arg            Arginine
+          N           Asn            Asparagine
+          D           Asp            Aspartic acid
+          C           Cys            Cysteine
+          Q           Gln            Glutamine
+          E           Glu            Glutamic acid
+          G           Gly            Glycine
+          H           His            Histidine
+          I           Ile            Isoleucine
+          L           Leu            Leucine
+          K           Lys            Lysine
+          M           Met            Methionine
+          F           Phe            Phenylalanine
+          P           Pro            Proline
+		  O           Pyl            Pyrrolysine (22nd amino acid)
+		  U           Sec            Selenocysteine (21st amino acid)
+          S           Ser            Serine
+          T           Thr            Threonine
+          W           Trp            Tryptophan
+          Y           Tyr            Tyrosine
+          V           Val            Valine
+          B           Asx            Aspartic acid or Asparagine
+          Z           Glx            Glutamine or Glutamic acid
+		  J           Xle            Isoleucine or Valine (mass spec ambiguity)
+          X           Xaa            Any or unknown amino acid
+
+
+It is worth noting that, "Bacterial" codon table no. 11 produces an
+polypeptide that is, confusingly, identical to the standard one. The
+only differences are in available initiator codons.
+
+
+NCBI Genetic Codes home page:
+     http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
+
+EBI Translation Table Viewer:
+     http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
+
+Amended ASN.1 version with ids 16 and 21 is at:
+     ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
+
+Thanks to Matteo diTomasso for the original Perl implementation
+of these tables.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tools::CodonTable;
+use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA 	    $CODONGAP $GAP
+	    %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+use Bio::Tools::IUPAC;
+use Bio::SeqUtils;
+
+use base qw(Bio::Root::Root);
+
+
+# first set internal values for all translation tables
+
+BEGIN { 
+    use constant CODONSIZE => 3;
+    $GAP = '-';
+    $CODONGAP = $GAP x CODONSIZE;
+
+    @NAMES =			#id
+	(
+	 'Standard',		#1
+	 'Vertebrate Mitochondrial',#2
+	 'Yeast Mitochondrial',# 3
+	 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
+	 'Invertebrate Mitochondrial',#5
+	 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
+	 '', '',
+	 'Echinoderm Mitochondrial',#9
+	 'Euplotid Nuclear',#10
+	 '"Bacterial"',# 11
+	 'Alternative Yeast Nuclear',# 12
+	 'Ascidian Mitochondrial',# 13
+	 'Flatworm Mitochondrial',# 14
+	 'Blepharisma Nuclear',# 15
+	 'Chlorophycean Mitochondrial',# 16
+	 '', '',  '', '',
+	 'Trematode Mitochondrial',# 21
+	 'Scenedesmus obliquus Mitochondrial', #22
+	 'Thraustochytrium Mitochondrial' #23
+	 );
+
+    @TABLES =
+	qw(
+	   FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
+	   FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   '' ''
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
+	   FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
+	   FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   '' '' '' ''
+	   FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG   
+	   FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
+	   );
+
+
+    @STARTS =
+	qw(
+	   ---M---------------M---------------M----------------------------
+	   --------------------------------MMMM---------------M------------
+	   ----------------------------------MM----------------------------
+	   --MM---------------M------------MMMM---------------M------------
+	   ---M----------------------------MMMM---------------M------------
+	   -----------------------------------M----------------------------
+	   '' ''
+	   -----------------------------------M----------------------------
+	   -----------------------------------M----------------------------
+	   ---M---------------M------------MMMM---------------M------------
+	   -------------------M---------------M----------------------------
+	   -----------------------------------M----------------------------
+	   -----------------------------------M----------------------------
+	   -----------------------------------M----------------------------
+	   -----------------------------------M----------------------------
+	   '' ''  '' ''
+	   -----------------------------------M---------------M------------  
+	   -----------------------------------M----------------------------
+	   --------------------------------M--M---------------M------------
+	   );
+
+    my @nucs = qw(t c a g);
+    my $x = 0;
+    ($CODONS, $TRCOL) = ({}, {});
+    for my $i (@nucs) {
+	for my $j (@nucs) {
+	    for my $k (@nucs) {
+		my $codon = "$i$j$k";
+		$CODONS->{$codon} = $x;
+		$TRCOL->{$x} = $codon;
+		$x++;
+	    }
+	}
+    }
+    %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();    
+    %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
+    %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
+    $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
+    $TERMINATOR = '*';
+}
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id) =
+	$self->_rearrange([qw(ID
+			     )],
+			 @args);
+
+    $id = 1 if ( ! $id );
+    $id  && $self->id($id);
+    return $self; # success - we hope!
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id(3); $id_integer = $obj->id();
+ Function:
+
+           Sets or returns the id of the translation table.  IDs are
+           integers from 1 to 15, excluding 7 and 8 which have been
+           removed as redundant. If an invalid ID is given the method
+           returns 0, false.
+
+
+ Example :
+ Returns : value of id, a scalar, 0 if not a valid
+ Args    : newvalue (optional)
+
+=cut
+
+sub id{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if (  !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
+	   $self->warn("Not a valid codon table ID [$value] ");
+	   $value = 0;
+       }
+       $self->{'id'} = $value;
+   }
+   return $self->{'id'};
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name()
+ Function: returns the descriptive name of the translation table
+ Example :
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub name{
+   my ($self) = @_;
+
+   my ($id) = $self->{'id'};
+   return $NAMES[$id-1];
+}
+
+=head2 tables
+
+ Title   : tables
+ Usage   : $obj->tables()  or  Bio::Tools::CodonTable->tables()
+ Function: returns a hash reference where each key is a valid codon
+           table id() number, and each value is the corresponding
+           codon table name() string
+ Example :
+ Returns : A hashref
+ Args    : None
+
+
+=cut
+
+sub tables{
+  my %tables;
+  for my $id (1 .. @NAMES) {
+    my $name = $NAMES[$id-1];
+    $tables{$id} = $name if $name;
+  }
+  return \%tables;
+}
+                
+=head2 translate
+
+ Title   : translate
+ Usage   : $obj->translate('YTR')
+ Function: Returns a string of one letter amino acid codes from 
+           nucleotide sequence input. The imput can be of any length.
+
+           Returns 'X' for unknown codons and codons that code for
+           more than one amino acid. Returns an empty string if input
+           is not three characters long. Exceptions for these are:
+
+             - IUPAC amino acid code B for Aspartic Acid and
+               Asparagine, is used.
+             - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
+               used.
+             - if the codon is two nucleotides long and if by adding
+               an a third character 'N', it codes for a single amino
+               acid (with exceptions above), return that, otherwise
+               return empty string.
+
+           Returns empty string for other input strings that are not
+           three characters long.
+
+ Example :
+ Returns : a string of one letter ambiguous IUPAC amino acid codes
+ Args    : ambiguous IUPAC nucleotide string
+
+
+=cut
+
+sub translate {
+    my ($self, $seq) = @_;
+    $self->throw("Calling translate without a seq argument!") unless defined $seq;
+    return '' unless $seq;
+
+    my $id = $self->id;
+    my ($partial) = 0;
+    $partial = 2 if length($seq) % CODONSIZE == 2;
+    
+    $seq = lc $seq;
+    $seq =~ tr/u/t/;
+    my $protein = "";
+    if ($seq =~ /[^actg]/ ) { #ambiguous chars
+        for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
+            my $triplet = substr($seq, $i, CODONSIZE);
+	    if( $triplet eq $CODONGAP ) {
+		$protein .= $GAP;
+	    } elsif (exists $CODONS->{$triplet}) {
+		$protein .= substr($TABLES[$id-1], 
+				   $CODONS->{$triplet},1);
+	    } else {
+		$protein .= $self->_translate_ambiguous_codon($triplet);
+	    }
+	}
+    } else { # simple, strict translation
+	for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
+            my $triplet = substr($seq, $i, CODONSIZE); 
+            if( $triplet eq $CODONGAP ) {
+		$protein .= $GAP;
+	    } if (exists $CODONS->{$triplet}) {
+                $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
+	    } else {
+                $protein .= 'X';
+            }
+        }
+    }
+    if ($partial == 2) { # 2 overhanging nucleotides
+	my $triplet = substr($seq, ($partial -4)). "n";
+	if( $triplet eq $CODONGAP ) {
+	    $protein .= $GAP;
+	} elsif (exists $CODONS->{$triplet}) {
+	    my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);       
+	    $protein .= $aa;
+	} else {
+	    $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
+	}
+    }
+    return $protein;
+}
+
+sub _translate_ambiguous_codon {
+    my ($self, $triplet, $partial) = @_;
+    $partial ||= 0;
+    my $id = $self->id;
+    my $aa;
+    my @codons = _unambiquous_codons($triplet);
+    my %aas =();
+    foreach my $codon (@codons) {
+	$aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
+    }
+    my $count = scalar keys %aas;
+    if ( $count == 1 ) {
+	$aa = (keys %aas)[0];
+    }
+    elsif ( $count == 2 ) {
+	if ($aas{'D'} and $aas{'N'}) {
+	    $aa = 'B';
+	}
+	elsif ($aas{'E'} and $aas{'Q'}) {
+	    $aa = 'Z';
+	} else {
+	    $partial ? ($aa = '') : ($aa = 'X');
+	}
+    } else {
+	$partial ? ($aa = '') :  ($aa = 'X');
+    }
+    return $aa;
+}
+
+=head2 translate_strict
+
+ Title   : translate_strict
+ Usage   : $obj->translate_strict('ACT')
+ Function: returns one letter amino acid code for a codon input
+
+           Fast and simple translation. User is responsible to resolve
+           ambiguous nucleotide codes before calling this
+           method. Returns 'X' for unknown codons and an empty string
+           for input strings that are not three characters long.
+
+           It is not recommended to use this method in a production
+           environment. Use method translate, instead.
+
+ Example :
+ Returns : A string
+ Args    : a codon = a three nucleotide character string
+
+
+=cut
+
+sub translate_strict{
+   my ($self, $value) = @_;
+   my ($id) = $self->{'id'};
+
+   $value  = lc $value;
+   $value  =~ tr/u/t/;
+
+   if (length $value != 3 ) {
+       return '';
+   }
+   elsif (!(defined $CODONS->{$value}))  {
+       return 'X';
+   }
+   else {
+       return substr($TABLES[$id-1],$CODONS->{$value},1);
+   }
+}
+
+=head2 revtranslate
+
+ Title   : revtranslate
+ Usage   : $obj->revtranslate('G')
+ Function: returns codons for an amino acid
+
+           Returns an empty string for unknown amino acid
+           codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
+           Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
+           letter amino acid codes are accepted. '*' and 'Ter' are
+           used for terminator.
+
+           By default, the output codons are shown in DNA.  If the
+           output is needed in RNA (tr/t/u/), add a second argument
+           'RNA'.
+
+ Example : $obj->revtranslate('Gly', 'RNA')
+ Returns : An array of three lower case letter strings i.e. codons
+ Args    : amino acid, 'RNA'
+
+=cut
+
+sub revtranslate {
+    my ($self, $value, $coding) = @_;
+    my ($id) = $self->{'id'};
+    my (@aas,  $p);
+    my (@codons) = ();
+
+    if (length($value) == 3 ) {
+	$value = lc $value;
+	$value = ucfirst $value;
+	$value = $THREELETTERSYMBOLS{$value};
+    }
+    if ( defined $value and $value =~ /$VALID_PROTEIN/ 
+	 and length($value) == 1 ) {
+	$value = uc $value;
+	@aas = @{$IUPAC_AA{$value}};	
+	foreach my $aa (@aas) {
+	    #print $aa, " -2\n";
+	    $aa = '\*' if $aa eq '*';
+	    while ($TABLES[$id-1] =~ m/$aa/g) {
+		$p = pos $TABLES[$id-1];
+		push (@codons, $TRCOL->{--$p});
+	    }
+	}
+    }
+
+    if ($coding and uc ($coding) eq 'RNA') {
+	for my $i (0..$#codons)  {
+	    $codons[$i] =~ tr/t/u/;
+	}
+    }
+
+    return @codons;
+}
+=head2 reverse_translate_all
+
+ Title   : reverse_translate_all
+ Usage   : my $iup_str = $cttable->reverse_translate_all($seq_object)
+           my $iup_str = $cttable->reverse_translate_all($seq_object,
+                                                         $cutable,
+                                                         15);
+ Function: reverse translates a protein sequence into IUPAC nucleotide
+           sequence. An 'X' in the protein sequence is converted to 'NNN'
+           in the nucleotide sequence.
+ Returns : a string
+ Args    : a Bio::PrimarySeqI compatible object (mandatory)
+           a Bio::CodonUsage::Table object and a threshold if only
+             codons with a relative frequency above the threshold are
+             to be considered. 
+
+
+=cut
+
+sub reverse_translate_all {
+	
+	my ($self, $obj, $cut, $threshold) = @_;
+
+    ## check args are OK
+
+	if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
+		$self->throw(" I need a Bio::PrimarySeqI object, not a [".
+						ref($obj) . "]");
+		}
+	if($obj->alphabet ne 'protein')	{
+		$self->throw("Cannot reverse translate, need an amino acid sequence .".
+                     "This sequence is of type [" . $obj->alphabet ."]");
+		}
+	my @data;
+	my @seq = split '', $obj->seq;
+
+	## if we're not supplying a codon usage table...
+	if( !$cut && !$threshold) {
+		## get lists of possible codons for each aa. 
+		for my $aa (@seq) {
+			if ($aa =~ /x/i) {
+				push @data, (['NNN']);
+			}else {
+				my @cods = $self->revtranslate($aa);
+				push @data, \@cods;
+			}
+		}
+	}else{
+	#else we are supplying a codon usage table, we just want common codons
+	#check args first. 
+		if(!$cut->isa('Bio::CodonUsage::Table'))	{
+			$self->throw("I need a Bio::CodonUsage::Table object, not a [".
+                     ref($cut). "].");
+			}
+		my $cod_ref = $cut->probable_codons($threshold);
+		for my $aa (@seq) {
+			if ($aa =~ /x/i) {
+				push @data, (['NNN']);
+				next;
+				}
+			push @data, $cod_ref->{$aa};
+		}
+	}
+
+	return $self->_make_iupac_string(\@data);
+
+}
+
+=head2 is_start_codon
+
+ Title   : is_start_codon
+ Usage   : $obj->is_start_codon('ATG')
+ Function: returns true (1) for all codons that can be used as a
+           translation start, false (0) for others.
+ Example : $myCodonTable->is_start_codon('ATG')
+ Returns : boolean
+ Args    : codon
+
+
+=cut
+
+sub is_start_codon{
+   my ($self, $value) = @_;
+   my ($id) = $self->{'id'};
+
+   $value  = lc $value;
+   $value  =~ tr/u/t/;
+
+   if (length $value != 3  )  {
+       return 0;
+   }
+   else {
+       my $result = 1;
+       my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
+       foreach my $c (@ms) {
+	   $result = 0 if $c ne 'M';
+       }
+       return $result;
+   }
+}
+
+
+
+=head2 is_ter_codon
+
+ Title   : is_ter_codon
+ Usage   : $obj->is_ter_codon('GAA')
+ Function: returns true (1) for all codons that can be used as a
+           translation tarminator, false (0) for others.
+ Example : $myCodonTable->is_ter_codon('ATG')
+ Returns : boolean
+ Args    : codon
+
+
+=cut
+
+sub is_ter_codon{
+   my ($self, $value) = @_;
+   my ($id) = $self->{'id'};
+
+   $value  = lc $value;
+   $value  =~ tr/u/t/;
+
+   if (length $value != 3  )  {
+       return 0;
+   }
+   else {
+       my $result = 1;
+       my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
+       foreach my $c (@ms) {
+	   $result = 0 if $c ne $TERMINATOR;
+       }
+       return $result;
+   }
+}
+
+=head2 is_unknown_codon
+
+ Title   : is_unknown_codon
+ Usage   : $obj->is_unknown_codon('GAJ')
+ Function: returns false (0) for all codons that are valid,
+	    true (1) for others.
+ Example : $myCodonTable->is_unknown_codon('NTG')
+ Returns : boolean
+ Args    : codon
+
+
+=cut
+
+sub is_unknown_codon{
+   my ($self, $value) = @_;
+   my ($id) = $self->{'id'};
+
+   $value  = lc $value;
+   $value  =~ tr/u/t/;
+
+   if (length $value != 3  )  {
+       return 1;
+   }
+   else {
+       my $result = 0;
+       my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
+       $result = 1 if scalar @cs == 0;
+       return $result;
+   }
+}
+
+=head2 _unambiquous_codons
+
+ Title   : _unambiquous_codons
+ Usage   : @codons = _unambiquous_codons('ACN')
+ Function:
+ Example :
+ Returns : array of strings (one letter unambiguous amino acid codes)
+ Args    : a codon = a three IUPAC nucleotide character string
+
+=cut
+
+sub _unambiquous_codons{
+    my ($value) = @_;
+    my @nts = ();
+    my @codons = ();
+    my ($i, $j, $k);
+    @nts = map { $IUPAC_DNA{uc $_} }  split(//, $value);
+    for my $i (@{$nts[0]}) {
+	for my $j (@{$nts[1]}) {
+	    for my $k (@{$nts[2]}) {
+		push @codons, lc "$i$j$k";
+	    }
+	}
+    }
+    return @codons;
+}
+
+=head2 add_table
+
+ Title   : add_table
+ Usage   : $newid = $ct->add_table($name, $table, $starts)
+ Function: Add a custom Codon Table into the object.
+           Know what you are doing, only the length of
+           the argument strings is checked!
+ Returns : the id of the new codon table
+ Args    : name, a string, optional (can be empty)
+           table, a string of 64 characters
+           startcodons, a string of 64 characters, defaults to standard
+
+=cut
+
+sub add_table {
+    my ($self, $name, $table, $starts) = @_;
+
+    $name ||= 'Custom'. scalar @NAMES + 1;
+    $starts ||= $STARTS[0]; 
+    $self->throw('Suspect input!')
+        unless length($table) == 64 and length($starts) == 64;
+
+    push @NAMES, $name;
+    push @TABLES, $table;
+    push @STARTS, $starts;
+
+    return scalar @NAMES;
+
+}
+
+sub _make_iupac_string {
+
+	my ($self, $cod_ref) = @_;
+	if(ref($cod_ref) ne 'ARRAY') {
+		$self->throw(" I need a reference to a list of references to codons, ".
+					 " not a [". ref($cod_ref) . "].");
+		}
+    my %iupac_hash   = Bio::Tools::IUPAC->iupac_rev_iub();
+	my $iupac_string = ''; ## the string to be returned
+	for my $aa (@$cod_ref) {
+
+		## scan through codon positions, record the differing values,	
+		# then look up in the iub hash
+		for my $index(0..2) {
+			my %h;
+			map { my $k = substr($_,$index,1);
+		 		$h{$k}  = undef;} @$aa;
+			my $lookup_key = join '', sort{$a cmp $b}keys %h;
+
+            ## extend string 
+			$iupac_string .= $iupac_hash{uc$lookup_key};
+		}
+	}
+    return $iupac_string;
+
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Coil.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Coil.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Coil.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,286 @@
+# $Id: Coil.pm,v 1.6.4.1 2006/10/02 23:10:31 sendu Exp $
+# Parser module for Coil Bio::Tools::Coil
+#
+# Based on the EnsEMBL module Bio::EnsEMBL::Pipeline::Runnable::Protein::Coil
+# originally written by Marc Sohrmann (ms2 at sanger.ac.uk)
+# Written in BioPipe by Balamurugan Kumarasamy <savikalpa at fugu-sg.org>
+# Cared for by the Fugu Informatics team (fuguteam at fugu-sg.org)
+
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Coil - parser for Coil output
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::Coil
+ my $parser = new Bio::Tools::Coil();
+ while( my $sp_feat = $parser->next_result($file) ) {
+       #do something
+       #eg
+       push @sp_feat, $sp_feat;
+ }
+
+=head1 DESCRIPTION
+
+ Parser for Coil output
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Based on the EnsEMBL module Bio::EnsEMBL::Pipeline::Runnable::Protein::Coil
+ originally written by Marc Sohrmann (ms2 at sanger.ac.uk)
+ Written in BioPipe by Balamurugan Kumarasamy <savikalpa at fugu-sg.org>
+ Cared for by the Fugu Informatics team (fuguteam at fugu-sg.org)
+
+=head1 APPENDIX
+
+ The rest of the documentation details each of the object methods.
+ Internal methods are usually preceded with a _
+
+
+=cut
+
+package Bio::Tools::Coil;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    $self->_initialize_io(@args);
+
+    return $self;
+}
+
+=head2 parse_results
+
+ Title   : parse_results
+ Usage   : obj->parse_results
+ Function: Parses the coil output. Automatically called by
+           next_result() if not yet done.
+ Example :
+ Returns :
+
+=cut
+
+sub parse_results {
+    my ($self,$resfile) = @_;
+    my $filehandle = $resfile;
+    my %result_hash =_read_fasta($filehandle);#bala no file handle
+        my @ids = keys %result_hash;
+    my @feats; 
+    foreach my $id (keys %result_hash){      
+	my $pep = reverse ($result_hash{$id});
+	my $count = my $switch = 0;
+	my ($start, $end);
+	while (my $aa = chop $pep) {
+	    $count++;
+	    if (!$switch && $aa eq "x") {
+		$start = $count;
+		$switch = 1;
+	    }
+	    elsif ($switch && $aa ne "x") {
+		$end = $count-1;
+		my (%feature);
+		$feature{name}       = $id;
+		$feature{start}      = $start;
+		$feature{end}        = $end;
+		$feature{source}     = "Coils";
+		$feature{primary}    = 'ncoils';
+		($feature{program})   = 'ncoils';
+		$feature{logic_name} = 'Coils';
+		my $new_feat = $self->create_feature (\%feature);
+		$self->_add_prediction($new_feat);
+		$switch = 0;
+	    }
+	}
+    }
+
+    $self->_predictions_parsed(1);
+
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : while($feat = $coil->next_result($file)) {
+                  # do something
+           }
+ Function: Returns the next protein feature of the coil output file
+ Returns : 
+ Args    :
+
+=cut
+
+sub next_result {
+
+    my ($self,$resfile) = @_;
+    my $gene;
+
+    $self->parse_results($resfile) unless $self->_predictions_parsed();
+
+    $gene = $self->_result();
+
+    return $gene;
+
+}
+
+=head2 _result
+
+ Title   : _result
+ Usage   : $feat = $obj->_result()
+ Function: internal
+ Example :
+ Returns :
+
+=cut
+
+sub _result {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_feats'}) && @{$self->{'_feats'}});
+    return shift(@{$self->{'_feats'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($feat)
+ Function: internal
+ Example :
+ Returns :
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_feats'})) {
+        $self->{'_feats'} = [];
+    }
+    push(@{$self->{'_feats'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+        $self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+
+=head2 create_feature
+
+ Title   : create_feature
+ Usage   : obj->create_feature(\%feature)
+ Function: Internal(not to be used directly)
+ Returns :
+ Args    :
+
+
+=cut
+
+sub create_feature {
+    my ($self, $feat) = @_;
+
+
+    # create feature object
+    my $feature = Bio::SeqFeature::Generic->new
+	(-seq_id     => $feat->{name},
+	 -start       => $feat->{start},
+	 -end         => $feat->{end},
+	 -score       => $feat->{score},
+	 -source      => $feat->{source},
+	 -primary     => $feat->{primary},
+	 -logic_name  => $feat->{logic_name}, 
+	 );
+    $feature->add_tag_value('evalue',0);
+    $feature->add_tag_value('percent_id','NULL');
+    $feature->add_tag_value("hid",$feat->{primary});
+
+
+    return $feature;
+
+}
+
+=head2 _read_fasta
+
+ Title   : _read_fasta
+ Usage   : obj->_read_fasta($file)
+ Function: Internal(not to be used directly)
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _read_fasta {
+    local (*FILE) = @_;
+    my( $id , $seq , %name2seq);#bala
+        while (<FILE>) {
+	    chomp;		#bala
+	    if (/^>(\S+)/) {
+
+		my $new_id = $1;
+		if ($id) {
+		    $name2seq{$id} = $seq;
+		}
+		$id = $new_id ; $seq = "" ;
+	    } elsif (eof) {
+		if ($id) {
+		    $seq .= $_ ;#bala line instead of $_
+		    $name2seq{$id} = $seq;
+		}
+	    }
+	    else {
+		$seq .= $_;
+	    }
+        }
+    return %name2seq;
+}
+
+1;
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Coil.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ECnumber.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ECnumber.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ECnumber.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,570 @@
+# $Id: ECnumber.pm,v 1.11.4.1 2006/10/02 23:10:31 sendu Exp $
+#
+# BioPerl module for Bio::Tools::ECnumber
+#
+# Cared for by Christian M. Zmasek <czmasek at gnf.org> or <cmzmasek at yahoo.com>
+#
+# (c) Christian M. Zmasek, czmasek at gnf.org, 2002.
+# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
+#
+# You may distribute this module under the same terms as perl itself.
+# Refer to the Perl Artistic License (see the license accompanying this
+# software package, or see http://www.perl.com/language/misc/Artistic.html)
+# for the terms under which you may use, modify, and redistribute this module.
+#
+# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# POD documentation - main docs before the code
+
+
+=head1 NAME
+
+Bio::Tools::ECnumber - representation of EC numbers (Enzyme Classification)
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::ECnumber;
+
+  # Creation of ECnumber objects
+  my $EC1 = Bio::Tools::ECnumber->new( -ec_string => "4.3.2.1" );
+  my $EC2 = Bio::Tools::ECnumber->new( -ec_string => "EC 1.1.1.1" );
+  my $EC3 = Bio::Tools::ECnumber->new();
+
+  # Copying
+  my $EC4 = $EC1->copy();
+
+  # Modification/canonicalization of ECnumber objects
+  print $EC3->EC_string( "1.01.01.001" ); # Prints "1.1.1.1".
+
+  # Stringify
+  print $EC3->EC_string();
+  # or
+  print $EC3->to_string();
+
+  # Test for equality
+  # -- Against ECnumber object:
+  if ( $EC3->is_equal( $EC2 ) ) { # Prints "equal".
+      print "equal";
+  }
+  # -- Against string representation of EC number:
+  if ( ! $EC3->is_equal( "1.1.1.-" ) ) { # Prints "not equal".
+      print "not equal";
+  }
+
+  # Test for membership
+  my $EC5 = Bio::Tools::ECnumber->new( -ec_string => "4.3.2.-" ); 
+  # -- Against ECnumber object.
+  if ( $EC1->is_member( $EC5 ) ) { # Prints "member".
+      print "member"; 
+  }
+  # -- Against string representation of EC number.
+  if ( ! $EC1->is_member( "4.3.1.-" ) ) { # Prints "not member".
+      print "not member";
+  }
+
+=head1 DESCRIPTION
+
+L<Bio::Tools::ECnumber> is a representation of EC numbers, 
+the numerical heirarchy for Enzyme Classification.
+
+See L<http://www.chem.qmul.ac.uk/iubmb/enzyme/> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Christian M. Zmasek
+
+Email: czmasek at gnf.org  or  cmzmasek at yahoo.com
+
+WWW:   http://www.genetics.wustl.edu/eddy/people/zmasek/
+
+Address: 
+
+  Genomics Institute of the Novartis Research Foundation
+  10675 John Jay Hopkins Drive
+  San Diego, CA 92121
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tools::ECnumber;
+use strict;
+
+use constant DEFAULT => "-";
+use constant TRUE    => 1;
+use constant FALSE   => 0;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : $EC1 = Bio::Tools::ECnumber->new( -ec_string => "4.3.2.1" );
+           or
+           $EC2 = Bio::Tools::ECnumber->new( -ec_string => "4.3.2.2",
+                                             -comment   => "Is EC 4.3.2.2" );
+           or                      
+           $EC3 = Bio::Tools::ECnumber->new(); # EC3 is now "-.-.-.-"                      
+ Function: Creates a new ECnumber object.
+           Parses a EC number from "x.x.x.x", "EC x.x.x.x",
+           "ECx.x.x.x", or "EC:x.x.x.x";
+           x being either a positive integer or a "-".
+ Returns : A new ECnumber object.
+ Args    : A string representing a EC number, e.g. "4.3.2.1"
+           or "EC 4.3.2.1" or "1.-.-.-".
+
+=cut
+
+sub new {
+    my( $class, @args ) = @_;
+    
+    my $self = $class->SUPER::new( @args );
+
+    my ( $EC_string, $comment )
+    = $self->_rearrange( [ qw( EC_STRING COMMENT ) ], @args );
+
+    $self->init(); 
+ 
+    $EC_string && $self->EC_string( $EC_string );
+    $comment   && $self->comment( $comment );
+                        
+    return $self;
+    
+} # new
+
+
+
+=head2 init
+
+ Title   : init()
+ Usage   : $EC1->init(); # EC1 is now "-.-.-.-"    
+ Function: Initializes this ECnumber to default values.
+ Returns : 
+ Args    :
+
+=cut
+
+sub init {
+    my( $self ) = @_;
+
+    $self->enzyme_class( DEFAULT );
+    $self->sub_class( DEFAULT );
+    $self->sub_sub_class( DEFAULT );
+    $self->serial_number( DEFAULT );
+    $self->comment( "" );
+  
+} # init
+
+
+
+=head2 copy
+
+ Title   : copy()
+ Usage   : $EC2 = $EC1->copy();
+ Function: Creates a new ECnumber object which is an exact copy
+           of this ECnumber.
+ Returns : A copy of this ECnumber.
+ Args    :
+
+=cut
+
+sub copy {
+    my( $self ) = @_;
+    
+    my $new_ec = $self->new();
+    $new_ec->enzyme_class(  $self->enzyme_class() );
+    $new_ec->sub_class(     $self->sub_class() );
+    $new_ec->sub_sub_class( $self->sub_sub_class() );
+    $new_ec->serial_number( $self->serial_number() );
+    $new_ec->comment(       $self->comment() );
+    return $new_ec; 
+
+} # copy
+
+
+
+=head2 EC_string
+
+ Title   : EC_string
+ Usage   : $EC3->EC_string( "1.1.1.-" );
+           or
+           print $EC3->EC_string();
+ Function: Set/get for string representations of EC numbers.
+           Parses a EC number from "x.x.x.x", "EC x.x.x.x",
+           "ECx.x.x.x", or "EC:x.x.x.x";
+           x being either a positive integer or a "-".
+ Returns : A string representations of a EC number.
+ Args    : A string representations of a EC number.
+
+=cut
+
+sub EC_string {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $value =~ s/\s+//g; # Removes white space.
+        $value =~ s/^EC//i; # Removes "EC".
+        $value =~ s/^://;   # Removes ":".
+
+        if ( $value =~ /^([\d-]*)\.([\d-]*)\.([\d-]*)\.([\d-]*)$/ ) {
+            $self->enzyme_class( $1 );
+            $self->sub_class( $2 );
+            $self->sub_sub_class( $3 );
+            $self->serial_number( $4 );
+        }
+        else {
+            $self->throw( "Illegal format error [$value]" );
+        }
+    }
+
+    return $self->to_string();
+
+} # EC_string
+
+
+
+=head2 to_string
+
+ Title   : to_string()
+ Usage   : print $EC3->to_string();
+ Function: To string method for EC numbers
+           (equals the "get" functionality of "EC_string").
+ Returns : A string representations of a EC number.
+ Args    :
+
+=cut
+
+sub to_string {
+    my ( $self ) = @_;
+
+    my $s  = $self->enzyme_class() . ".";
+    $s    .= $self->sub_class() . ".";
+    $s    .= $self->sub_sub_class() . ".";   
+    $s    .= $self->serial_number();
+    return $s;
+    
+} # to_string
+
+
+
+=head2 is_equal
+
+ Title   : is_equal
+ Usage   : if ( $EC3->is_equal( $EC2 ) )
+           or
+           if ( $EC3->is_equal( "1.1.1.-" ) )
+ Function: Checks whether this ECnumber is equal to the argument
+           EC number (please note: "1.1.1.1" != "1.1.1.-").
+ Returns : True (1) or false (0).
+ Args    : A ECnumber object or a string representation of a EC number.
+
+=cut
+
+sub is_equal {
+    my ( $self, $value ) = @_;
+
+    if ( $self->_is_not_reference( $value ) ) {
+        $value = $self->new( -ec_string => $value );
+    }
+    else {
+        $self->_is_ECnumber_object( $value );
+    }
+    
+    unless ( $self->enzyme_class() eq $value->enzyme_class() ) {
+        return FALSE;
+    } 
+    unless ( $self->sub_class() eq $value->sub_class() ) {
+        return FALSE;
+    } 
+    unless ( $self->sub_sub_class() eq $value->sub_sub_class() ) {
+        return FALSE;
+    } 
+    unless ( $self->serial_number() eq $value->serial_number() ) {
+        return FALSE;
+    } 
+    return TRUE;
+
+} # is_equal
+
+
+
+=head2 is_member
+
+ Title   : is_member
+ Usage   : if ( $EC1->is_member( $EC5 ) )
+           or
+           if ( $EC1->is_member( "4.3.-.-" ) )
+ Function: Checks whether this ECnumber is a member of the (incomplete)
+           argument EC number (e.g. "1.1.1.1" is a member of "1.1.1.-"
+           but not of "1.1.1.2").
+ Returns : True (1) or false (0).
+ Args    : A ECnumber object or a string representation of a EC number.
+
+=cut
+
+sub is_member {
+    my ( $self, $value ) = @_;
+
+    if ( $self->_is_not_reference( $value ) ) {
+        $value = $self->new( -ec_string => $value );
+    }
+    else {
+        $self->_is_ECnumber_object( $value );
+    }
+    $self->_check_for_illegal_defaults();
+    $value->_check_for_illegal_defaults();
+
+    unless ( $value->enzyme_class() eq DEFAULT
+    ||       $self->enzyme_class() eq $value->enzyme_class() ) {
+        return FALSE;
+    } 
+    unless (  $value->sub_class() eq DEFAULT 
+    ||        $self->sub_class() eq $value->sub_class() ) {
+        return FALSE;
+    } 
+    unless ( $value->sub_sub_class() eq DEFAULT
+    ||       $self->sub_sub_class() eq $value->sub_sub_class() ) {
+        return FALSE;
+    } 
+    unless ( $value->serial_number() eq DEFAULT
+    ||       $self->serial_number() eq $value->serial_number() ) {
+        return FALSE;
+    } 
+    return TRUE;
+
+} # is_member 
+
+
+
+=head2 enzyme_class
+
+ Title   : enzyme_class
+ Usage   : $EC1->enzyme_class( 1 );
+           or 
+           print $EC1->enzyme_class(); 
+ Function: Set/get for the enzyme class number of ECnumbers.
+ Returns : The enzyme class number of this ECnumber.
+ Args    : A positive integer or "-".
+
+=cut
+
+sub enzyme_class {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $self->{ "_enzyme_class" } = $self->_check_number( $value );
+    }
+   
+    return $self->{ "_enzyme_class" };
+    
+} # enzyme_class
+
+
+
+=head2 sub_class
+
+ Title   : sub_class
+ Usage   : $EC1->sub_class( 4 );
+           or 
+           print $EC1->sub_class(); 
+ Function: Set/get for the enzyme sub class number of ECnumbers.
+ Returns : The enzyme sub class number of this ECnumber.
+ Args    : A positive integer or "-".
+
+=cut
+
+sub sub_class {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $self->{ "_sub_class" } = $self->_check_number( $value );
+    }
+   
+    return $self->{ "_sub_class" };
+    
+} # sub_class
+
+
+
+=head2 sub_sub_class 
+
+ Title   : sub_sub_class
+ Usage   : $EC1->sub_sub_class( 12 );
+           or 
+           print $EC1->sub_sub_class(); 
+ Function: Set/get for the enzyme sub sub class number of ECnumbers.
+ Returns : The enzyme sub sub class number of this ECnumber.
+ Args    : A positive integer or "-".
+
+=cut
+
+sub sub_sub_class {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $self->{ "_sub_sub_class" } = $self->_check_number( $value );
+    }
+   
+    return $self->{ "_sub_sub_class" };
+    
+} # sub_sub_class
+
+
+
+=head2 serial_number
+
+ Title   : serial_number
+ Usage   : $EC1->serial_number( 482 );
+           or 
+           print $EC1->serial_number(); 
+ Function: Set/get for the serial number of ECnumbers.
+ Returns : The serial number of this ECnumber.
+ Args    : A positive integer or "-".
+
+=cut
+
+sub serial_number {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $self->{ "_serial_number" } = $self->_check_number( $value );
+    }
+   
+    return $self->{ "_serial_number" };
+    
+} # serial_number
+
+
+
+=head2 comment
+
+ Title   : comment
+ Usage   : $EC1->comment( "deprecated" );
+           or 
+           print $EC1->comment();
+ Function: Set/get for a arbitrary comment.
+ Returns : A comment [scalar].
+ Args    : A comment [scalar].
+
+=cut
+
+sub comment {
+    my ( $self, $value ) = @_;
+
+    if ( defined $value) {
+        $self->{ "_comment" } = $value;
+    }
+   
+    return $self->{ "_comment" };
+    
+} # comment
+
+
+
+# Title   : _check_number
+# Function: Checks and standardizes the individual numbers of a EC number
+#           (removes leading zeros, removes white spaces).
+# Returns : A standardized number.
+# Args    : A string representing a number in a EC number.
+sub _check_number {
+    my ( $self, $value ) = @_;
+    
+    my $original_value = $value;
+    $value =~ s/\s+//g;   # Removes white space.
+    if ( $value eq "" ) {
+        $value = DEFAULT;  
+    }
+    $value =~ s/^0+//;    # Removes leading zeros.
+    if ( $value eq "" ) { # If it was "0" (or "00"), it would be "" now.
+        $value = "0";
+    }
+    elsif ( $value ne DEFAULT 
+    &&      $value =~ /\D/ ) {
+        $self->throw( "Illegal format error [$original_value]" );
+    }
+    return $value;
+
+} # _check_number
+
+
+
+# Title   : _check_for_illegal_defaults()
+# Function: Checks for situations like "1.-.1.1", which
+#           are illegal in membership tests.
+# Returns :
+# Args    :
+sub _check_for_illegal_defaults {
+    my ( $self ) = @_;
+   
+    if ( ( $self->sub_sub_class() eq DEFAULT 
+    &&     $self->serial_number() ne DEFAULT ) ||
+         ( $self->sub_class()     eq DEFAULT 
+    &&     $self->sub_sub_class() ne DEFAULT ) ||
+         ( $self->enzyme_class()  eq DEFAULT 
+    &&     $self->sub_class()     ne DEFAULT ) ) {
+        $self->throw( "Illegal format error for comparison ["
+        . $self->to_string() . "]" );
+    } 
+
+} # _check_for_illegal_defaults
+
+
+
+# Title   : _is_not_reference
+# Function: Checks whether the argument is not a reference.
+# Returns : True or false.
+# Args    : A scalar.
+sub _is_not_reference {
+    my ( $self, $value ) = @_;
+
+    return ( ! ref( $value ) );
+    
+} # _is_not_reference
+
+
+
+# Title   : _is_ECnumber_object
+# Function: Checks whether the arument is a ECnumber.
+# Returns :
+# Args    : A reference.
+sub _is_ECnumber_object {
+    my ( $self, $value ) = @_;
+
+    unless( $value->isa( "Bio::Tools::ECnumber" ) ) {
+        $self->throw( "Found [". ref( $value ) 
+        ."] where [Bio::Tools::ECnumber] expected" );
+    }   
+    
+} # _is_ECnumber_object
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EMBOSS/Palindrome.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EMBOSS/Palindrome.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EMBOSS/Palindrome.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,194 @@
+# $Id: Palindrome.pm,v 1.4.4.1 2006/10/02 23:10:33 sendu Exp $
+#
+# BioPerl module for Bio::Tools::EMBOSS::Palindrome
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::EMBOSS::Palindrome - parse EMBOSS palindrome output
+
+=head1 SYNOPSIS
+
+  # a simple script to turn palindrome output into GFF3
+  use Bio::Tools::EMBOSS::Palindrome;
+  use Bio::Tools::GFF;
+
+  my $parser = new Bio::Tools::EMBOSS::Palindrome(-file => $filename);
+  my $out    = new Bio::Tools::GFF(-gff_version => 3,
+                                   -file => ">$filename.gff");
+  while( my $seq = $parser->next_seq ) {
+     for my $feat ( $seq->get_SeqFeatures ) {
+        $out->write_feature($feat);
+     }
+  }
+
+=head1 DESCRIPTION
+
+This is a parser for the EMBOSS tool 'palindrome'.  It will produce a
+L<Bio::Seq> object for each sequence analyzed.  The sequence will be
+empty (but will be of the correct length) and will have attached to it
+L<Bio::SeqFeature::FeaturePair> objects which wil
+
+
+=head2 FUTURE WORK
+
+It may be consolidated into another framework at a later time, but for
+the time being it will stay a separate modules.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::EMBOSS::Palindrome;
+use vars qw($DEFAULT_SOURCETAG);
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::IO);
+$DEFAULT_SOURCETAG = 'palindrome';
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::EMBOSS::Palindrome();
+ Function: Builds a new Bio::Tools::EMBOSS::Palindrome object 
+ Returns : an instance of Bio::Tools::EMBOSS::Palindrome
+ Args    : -file/-fh  => a filename or filehandle for
+                         initializing the parser
+
+=cut
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : my $seq = $parser->next_seq;
+ Function: Get the next feature set from the 
+ Returns : L<Bio::SeqI> object
+ Args    : none
+
+
+=cut
+
+sub next_seq {
+    my ($self) = @_;
+    my (%searching, $seq,$state);
+    my $source = $self->source_tag;
+    $state = 0;
+    while(defined($_ = $self->_readline)) {
+	if( /^\s+$/ ) {
+	    next;
+	} elsif( /^Palindromes\s+of\s*:\s+(\S+)/o ) {
+	    $state = 0;
+	    if( $seq )  {
+		$self->_pushback($_);
+		return $seq;
+	    } 
+	    $seq = new Bio::Seq(-display_id => $1);
+	    # now get ready to store for the next record
+	    $searching{'-seq_id'} = $1;
+	} elsif( /^Sequence\s+length\s+is\s*:\s+(\d+)/o ) {
+	    $seq->length($1);
+	    $searching{'-tag'}->{'seqlength'} = $1;
+	} elsif( /^(Start|End)\s+at\s+position\s*:\s+(\d+)/ ) {
+	    $searching{'-tag'}->{lc($1)} = $2;
+	} elsif( m/^(Maximum|Minimum)\s+length\s+of\s+Palindromes\s+
+		 is\s*:\s+(\d+)/ox) {
+	    $searching{'-tag'}->{lc($1).'_length'} = $2;
+	} elsif( /^(Maximum\s+gap)\s+between\s+elements\s+is\s*:\s+(\d+)/o ) {
+	    $searching{'-tag'}->{lc($1)} = $2;
+	} elsif( m/^Number\s+of\s+mismatches\s+allowed\s+
+		 in\s+Palindrome\s*:\s+(\d+)/ox ) {
+	    $searching{'-tag'}->{'allowed_mismatches'} = $1;
+	} elsif( /^Palindromes:/o ) {
+	    $state = 1;
+	} elsif( $state == 1 ) {
+	    my $feature = new Bio::SeqFeature::FeaturePair
+		(-primary_tag  => 'similarity',
+		 -source_tag   => $source);
+	    for(my $i = 0; $i < 3; $i++ ) {
+		if ($i != 1) {
+		    if( /^(\d+)\s+(\S+)\s+(\d+)/o ) {
+			my ($start,$match,$end) = ($1,$2,$3);
+			my $type = $i == 0 ? 'feature1' : 'feature2';
+			($start,$end) = sort { $a <=> $b } ($start,$end);
+			$feature->$type(
+					Bio::SeqFeature::Generic->new
+					(%searching,
+					 -start       => $start,
+					 -end         => $end,
+					 -strand      => $i == 0 ? 1 : -1,
+					 -primary_tag => 'similarity',
+					 -source_tag  => $source)
+					);
+		    } else { 
+			chomp;
+			warn("Out of sync, line did not match:'$_'\n");
+		    }
+
+		}
+		$_ = $self->_readline;
+	    }
+	    $seq->add_SeqFeature($feature);
+	}
+    }
+    return $seq;
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $obj->source_tag($newval)
+ Function: Get/Set Source Tag ('palindrome') by default
+ Returns : value of source_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source_tag{
+    my $self = shift;
+
+    return $self->{'source_tag'} = shift if @_;
+    return $self->{'source_tag'} || $DEFAULT_SOURCETAG;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EPCR.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EPCR.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/EPCR.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,232 @@
+# $Id: EPCR.pm,v 1.18.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::EPCR
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::EPCR - Parse ePCR output and make features
+
+=head1 SYNOPSIS
+
+    # A simple annotation pipeline wrapper for ePCR data
+    # assuming ePCR data is already generated in file seq1.epcr
+    # and sequence data is in fasta format in file called seq1.fa
+
+    use Bio::Tools::EPCR;
+    use Bio::SeqIO;
+    my $parser = new Bio::Tools::EPCR(-file => 'seq1.epcr');
+    my $seqio = new Bio::SeqIO(-format => 'fasta', -file => 'seq1.fa');
+    my $seq = $seqio->next_seq || die("cannot get a seq object from SeqIO");
+
+    while( my $feat = $parser->next_feature ) {
+	# add EPCR annotation to a sequence
+	$seq->add_SeqFeature($feat);
+    }
+    my $seqout = new Bio::SeqIO(-format => 'embl');
+    $seqout->write_seq($seq);
+
+
+=head1 DESCRIPTION
+
+This object serves as a parser for ePCR data, creating a
+Bio::SeqFeatureI for each ePCR hit.  These can be processed or added
+as annotation to an existing Bio::SeqI object for the purposes of
+automated annotation.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::EPCR;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $epcr = new Bio::Tools::EPCR(-file => $file,
+					   -primary => $fprimary, 
+					   -source => $fsource, 
+					   -groupclass => $fgroupclass);
+ Function: Initializes a new EPCR parser
+ Returns : Bio::Tools::EPCR
+ Args    : -fh   => filehandle
+           OR
+           -file => filename
+
+           -primary => a string to be used as the common value for
+                       each features '-primary' tag.  Defaults to
+                       'sts'.  (This in turn maps to the GFF 'type'
+                       tag (aka 'method')).
+
+            -source => a string to be used as the common value for
+                       each features '-source' tag.  Defaults to
+                       'e-PCR'. (This in turn maps to the GFF 'source'
+                       tag)
+
+             -groupclass => a string to be used as the name of the tag
+                           which will hold the sts marker namefirst
+                           attribute.  Defaults to 'name'.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($primary, $source, 
+      $groupclass) = $self->_rearrange([qw(PRIMARY
+					   SOURCE 
+					   GROUPCLASS)], at args);
+  $self->primary(defined $primary ? $primary : 'sts');
+  $self->source(defined $source ? $source : 'e-PCR');
+  $self->groupclass(defined $groupclass ? $groupclass : 'name');
+
+  $self->_initialize_io(@args);
+  return $self;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none    
+
+=cut
+
+sub next_feature {
+    my ($self) = @_;
+    my $line = $self->_readline;
+    return unless defined($line);
+    chomp($line);
+    my($seqname,$location,$mkrname, $rest) = split(/\s+/,$line,4);
+    
+    my ($start,$end) = ($location =~ /(\S+)\.\.(\S+)/);
+
+    # `e-PCR -direct` results code match strand in $rest as (+) and (-).  Decode it if present.
+    my $strandsign;
+    if ($rest =~ m/^\(([+-])\)(.*)$/) {
+      ($strandsign,$rest) = ($1, $2);
+    } else {
+      $strandsign = "?";
+    }
+    my $strand = $strandsign eq "+" ? 1 :  $strandsign eq "-" ? -1 : 0;
+
+    my $markerfeature = new Bio::SeqFeature::Generic 
+	( '-start'   => $start,
+	  '-end'     => $end,
+	  '-strand'  => $strand,
+	  '-source'  => $self->source,
+	  '-primary' => $self->primary,
+	  '-seq_id'  => $seqname,
+	  '-tag'     => {
+	      $self->groupclass => $mkrname,
+	      ($rest ? ('Note'            => $rest ) : ()),
+	  });
+    #$markerfeature->add_tag_value('Note', $rest) if defined $rest;
+    return $markerfeature;
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $obj->source($newval)
+ Function: 
+ Example : 
+ Returns : value of source (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source{
+    my $self = shift;
+    return $self->{'_source'} = shift if @_;
+    return $self->{'_source'};
+}
+
+=head2 primary
+
+ Title   : primary
+ Usage   : $obj->primary($newval)
+ Function: 
+ Example : 
+ Returns : value of primary (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub primary{
+    my $self = shift;
+    return $self->{'_primary'} = shift if @_;
+    return $self->{'_primary'};
+}
+
+=head2 groupclass
+
+ Title   : groupclass
+ Usage   : $obj->groupclass($newval)
+ Function: 
+ Example : 
+ Returns : value of groupclass (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub groupclass{
+    my $self = shift;
+
+    return $self->{'_groupclass'} = shift if @_;
+    return $self->{'_groupclass'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ERPIN.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ERPIN.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ERPIN.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,337 @@
+# $Id: ERPIN.pm,v 1.5.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::ERPIN
+#
+# Cared for by Chris Fields <cjfields-at-uiuc-dot-edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::ERPIN -  a parser for ERPIN output
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::ERPIN;
+  my $parser = new Bio::Tools::ERPIN( -file => $rna_output,
+                                      -motiftag => 'protein_bind'
+                                      -desctag => 'TRAP_binding');
+  #parse the results
+  while( my $motif = $parser->next_prediction) {
+    # do something here
+  }
+
+=head1 DESCRIPTION
+
+Parses raw ERPIN output.
+
+This module is not currently complete.  As is, it will parse raw
+ERPIN long format output and pack information into
+Bio::SeqFeature::Generic objects.  
+
+Several values have also been added in the 'tag' hash.  These can be
+accessed using the following syntax:
+
+  my ($entry) = $feature->get_Annotations('SecStructure');
+
+Added tags are : 
+   tset         - training set used for the sequence
+   tsetdesc     - training set description line
+   cutoff       - cutoff value used
+   database     - name of database
+   dbdesc       - description of database
+   dbratios     - nucleotide ratios of database (used to calculate evalue)
+   descline     - entire description line (in case the regex used for
+                  sequence ID doesn't adequately catch the name
+   accession    - accession number of sequence (if present)
+   logodds      - logodds score value
+   sequence     - sequence from hit, separated based on training set
+
+See t/ERPIN.t for example usage.
+
+At some point a more complicated feature object may be used to support
+this data rather than forcing most of the information into tag/value
+pairs in a SeqFeature::Generic.  This will hopefully allow for more
+flexible analysis of data (specifically RNA secondary structural
+data).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Fields
+
+Email cjfields-at-uiuc-dot-edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::ERPIN;
+use strict;
+
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+our($MotifTag,$SrcTag,$DescTag) = qw(misc_binding ERPIN erpin);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::ERPIN();
+ Function: Builds a new Bio::Tools::ERPIN object 
+ Returns : an instance of Bio::Tools::ERPIN
+ Args    : -fh/-file for input filename
+           -motiftag => primary tag used in gene features (default 'misc_binding')
+           -desctag => tag used for display_name name (default 'erpin')
+           -srctag  => source tag used in all features (default 'ERPIN')
+
+=cut
+
+sub _initialize {
+    my($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($motiftag,$desctag,$srctag) =  $self->SUPER::_rearrange([qw(MOTIFTAG
+                                                                    DESCTAG
+                                                                    SRCTAG
+                                   )],
+                                    @args);
+    $self->motif_tag(defined $motiftag ? $motiftag : $MotifTag);
+    $self->source_tag(defined $srctag ? $srctag : $SrcTag);
+    $self->desc_tag(defined $desctag ? $desctag : $DescTag);
+    foreach (qw(_tset _tset_desc _cutoff _db _db_desc
+               _db_ratios _eval_cutoff _seqid _secacc _seqdesc )) {
+        $self->{$_}='';
+    }
+}
+
+=head2 motif_tag
+
+ Title   : motiftag
+ Usage   : $obj->motiftag($newval)
+ Function: Get/Set the value used for 'motif_tag', which is used for setting the
+           primary_tag.
+           Default is 'misc_binding' as set by the global $MotifTag.
+           'misc_binding' is used here because a conserved RNA motif is capable
+           of binding proteins (regulatory proteins), antisense RNA (siRNA),
+           small molecules (riboswitches), or nothing at all (tRNA,
+           terminators, etc.).  It is recommended that this be changed to other
+           tags ('misc_RNA', 'protein_binding', 'tRNA', etc.) where appropriate.
+           For more information, see:
+           http://www.ncbi.nlm.nih.gov/collab/FT/index.html
+ Returns : value of motif_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub motif_tag{
+    my $self = shift;
+
+    return $self->{'motif_tag'} = shift if @_;
+    return $self->{'motif_tag'};
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $obj->source_tag($newval)
+ Function: Get/Set the value used for the 'source_tag'.
+           Default is 'ERPIN' as set by the global $SrcTag
+ Returns : value of source_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source_tag{
+    my $self = shift;
+
+    return $self->{'source_tag'} = shift if @_;
+    return $self->{'source_tag'};
+}
+
+=head2 desc_tag
+
+ Title   : desc_tag
+ Usage   : $obj->desc_tag($newval)
+ Function: Get/Set the value used for the query motif.  This will be placed in
+           the tag '-display_name'.  Default is 'erpin' as set by the global
+           $DescTag.  Use this to manually set the descriptor (motif searched for).
+           Since there is no way for this module to tell what the motif is from the
+           name of the descriptor file or the ERPIN output, this should
+           be set every time an ERPIN object is instantiated for clarity
+ Returns : value of exon_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub desc_tag{
+    my $self = shift;
+
+    return $self->{'desc_tag'} = shift if @_;
+    return $self->{'desc_tag'};
+}
+
+=head2 analysis_method
+
+ Usage     : $obj->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /ERPIN/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /ERPIN/i)) {
+    $self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $obj->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the ERPIN result
+           file. Call this method repeatedly until FALSE is returned.
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    : None (at present)
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $obj->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the ERPIN result
+           file. Call this method repeatedly until FALSE is returned.
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    : None (at present)
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my ($motiftag,$srctag,$desctag) = ( $self->motif_tag,
+                       $self->source_tag,
+                       $self->desc_tag);
+    # hit vars
+    my ($strand, $start, $end, $sequence, $logodds, $score)=0;
+    while($_ = $self->_readline) {
+        #skip blank lines
+        next if /^\s+$/;
+        # parse header; there's probably a better way to do this, perhaps by
+        # mapping, but this works for now...
+        if(/^Training set:\s+\"(.*)\":$/) {
+            $self->{'_tset'}=$1;
+        }
+        elsif(/\s+(\d+ sequences of length \d+)/){
+            $self->{'_tset_descr'}=$1;
+        }
+        elsif(/^Cutoff:\s+(\S+)\s+$/) {
+            $self->{'_cutoff'}=$1;
+        }
+        elsif(/^Database:\s+\"(.*)\"$/) {
+            $self->{'_db'}=$1;
+        }
+        elsif(/^\s+(\d+ nucleotides to be processed in \d+ sequence)$/) {
+            $self->{'_db_desc'}=$1;
+        }
+        elsif(/^\s+ATGC ratios:\s(\d.\d+)\s+(\d.\d+)\s+(\d.\d+)\s+(\d.\d+)$/) {
+            my $atgc=sprintf("A=%0.3f T=%0.3f G=%0.3f C=%0.3f", $1, $2, $3, $4);
+            $self->{'_db_ratios'}=$atgc;
+        }
+        elsif(/^E-value at cutoff \S+ for \S+(?:G|M|k)?b double strand data: (\S+)/) {
+            $self->{'_eval_cutoff'}=$1;
+        }
+        # catch hit, store in private hash keys
+        elsif (/^>(.*)/) {
+            $self->{_seq_desc} = $1;
+            if($self->{_seq_desc} =~
+               /(?:P<db>gb|gi|emb|dbj|sp|pdb|bbs|ref|lcl)\|(\d+)((?:\:|\|)\w+\|(\S*.\d+)\|)?/) { 
+                $self->{_seqid} = $1; # pulls out gid
+                $self->{_seq_acc} = $3;
+            } else {
+                $self->{_seqid} = $self->{_seq_desc};
+                $self->{_seq_acc} = '';
+            }
+        }
+        # parse next hit
+        elsif (/^(FW|RC)\s+\d+\s+(\d+)..(\d+)\s+(\d+.\d+)\s+(.*)/) {
+            ($strand, $start, $end, $logodds, $score)=($1, $2, $3, $4, $5);
+            $score =~ s/^e/1e/i;
+            chomp ($sequence = $self->_readline); # grab next line, which is the sequence hit
+            my $gene = Bio::SeqFeature::Generic->new(-seq_id => $self->{_seqid},
+                                                      -start  => $start,
+                                                      -end    => $end,
+                                                      -strand => $strand eq 'FW' ? 1 : -1,
+                                                      -score  => $score,
+                                                      -primary_tag => $motiftag,
+                                                      -source_tag  => $srctag,
+                                                      -display_name => $desctag,
+                                                      -tag     => {
+                                                        'tset'          => $self->{_tset},
+                                                        'tsetdesc'      => $self->{_tset_descr},
+                                                        'cutoff'        => $self->{_cutoff},
+                                                        'database'      => $self->{_db},
+                                                        'dbdesc'        => $self->{_db_desc},
+                                                        'dbratios'      => $self->{_db_ratios},
+                                                        'descline'      => $self->{_seq_desc},
+                                                        'accession'     => $self->{_seq_acc},
+                                                        'logodds'       => $logodds,
+                                                        'sequence'      => $sequence}
+                                                    );
+            return $gene;
+        }
+        #else {
+        #    $self->debug("unrecognized line: $_");
+        #}
+    }
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ESTScan.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ESTScan.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ESTScan.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,364 @@
+# $Id: ESTScan.pm,v 1.14.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::ESTScan
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::ESTScan - Results of one ESTScan run
+
+=head1 SYNOPSIS
+
+   $estscan = Bio::Tools::ESTScan->new(-file => 'result.estscan');
+   # filehandle:
+   $estscan = Bio::Tools::ESTScan->new( -fh  => \*INPUT );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
+   while($gene = $estscan->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene
+       foreach my $orf ($gene->exons()) {
+	   # $orf is an instance of Bio::Tools::Prediction::Exon
+	   $cds_str = $orf->predicted_cds();
+       }
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $estscan->close();
+
+=head1 DESCRIPTION
+
+The ESTScan module provides a parser for ESTScan coding region prediction
+output.
+
+This module inherits off L<Bio::Tools::AnalysisResult> and therefore
+implements the L<Bio::SeqAnalysisParserI> interface. 
+See L<Bio::SeqAnalysisParserI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net (or hilmar.lapp at pharma.novartis.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::ESTScan;
+use strict;
+use Symbol;
+
+use Bio::Root::Root;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my ($self, at args) = @_;
+
+    # first call the inherited method!
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    if(! $self->analysis_method()) {
+	$self->analysis_method('ESTScan');
+    }
+}
+
+=head2 analysis_method
+
+ Usage     : $estscan->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /estscan/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /estscan/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($orf = $estscan->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the ESTScan result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $estscan->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the ESTScan result
+           file. Call this method repeatedly until FALSE is returned.
+
+           So far, this method DOES NOT work for reverse strand predictions,
+           even though the code looks like.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my ($gene, $seq, $cds, $predobj);
+    my $numins = 0;
+
+    # predictions are in the format of FASTA sequences and can be parsed one
+    # at a time
+    $seq = $self->_fasta_stream()->next_seq();
+    return unless $seq;
+    # there is a new prediction
+    $gene = Bio::Tools::Prediction::Gene->new('-primary' => "ORFprediction",
+                                              '-source' => "ESTScan");
+    # score starts the description
+    $seq->desc() =~ /^([\d.]+)\s*(.*)/ or
+	$self->throw("unexpected format of description: no score in " .
+		     $seq->desc());
+    $gene->score($1);
+    $seq->desc($2);
+    # strand may end the description
+    if($seq->desc() =~ /(.*)minus strand$/) {
+	my $desc = $1;
+	$desc =~ s/;\s+$//;
+	$seq->desc($desc);
+	$gene->strand(-1);
+    } else {
+	$gene->strand(1);
+    }
+    # check for the format: default or 'all-in-one' (option -a)
+    if($seq->desc() =~ /^(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s*(.*)/) {
+	# default format
+	$seq->desc($5);
+	$predobj = Bio::Tools::Prediction::Exon->new('-source' => "ESTScan",
+						     '-start' => $3,
+						     '-end' => $4);
+	$predobj->strand($gene->strand());
+	$predobj->score($gene->score()); # FIXME or $1, or $2 ?
+	$predobj->primary_tag("InternalExon");
+	$predobj->seq_id($seq->display_id());
+	# add to gene structure object
+	$gene->add_exon($predobj);
+	# add predicted CDS
+	$cds = $seq->seq();
+	$cds =~ s/[a-z]//g; # remove the deletions, but keep the insertions
+	$cds = Bio::PrimarySeq->new('-seq' => $cds,
+				    '-display_id' => $seq->display_id(),
+				    '-desc' => $seq->desc(),
+				    '-alphabet' => "dna");
+	$gene->predicted_cds($cds);
+	$predobj->predicted_cds($cds);
+	if($gene->strand() == -1) {
+	    $self->warn("reverse strand ORF, but unable to reverse coordinates!");
+	}
+    } else {
+	#
+	# All-in-one format (hopefully). This encodes the following information
+	# into the sequence:
+	# 1) untranslated regions: stretches of lower-case letters
+	# 2) translated regions: stretches of upper-case letters
+	# 3) insertions in the translated regions: capital X
+	# 4) deletions in the translated regions: a single lower-case letter
+	#
+	# if reverse strand ORF, save a lot of hassle by reversing the sequence
+	if($gene->strand() == -1) {
+	    $seq = $seq->revcom();
+	}
+	my $seqstr = $seq->seq();
+	while($seqstr =~ /^([a-z]*)([A-Z].*)$/) {
+	    # leading 5'UTR
+	    my $utr5 = $1;
+	    # exon + 3'UTR
+	    my $exonseq = $2;
+	    # strip 3'UTR and following exons
+	    if($exonseq =~ s/([a-z]{2,}.*)$//) {
+		$seqstr = $1;
+	    } else {
+		$seqstr = "";
+	    }
+	    # start: take care of yielding the absolute coordinate
+	    my $start = CORE::length($utr5) + 1;
+	    if($predobj) {
+		$start += $predobj->end() + $numins;
+	    }
+	    # for the end coordinate, we need to subtract the insertions
+	    $cds = $exonseq;
+	    $cds =~ s/[X]//g;
+	    my $end = $start + CORE::length($cds) - 1;
+	    # construct next exon object
+	    $predobj = Bio::Tools::Prediction::Exon->new('-start' => $start,
+							 '-end' => $end);
+	    $predobj->source_tag("ESTScan");
+	    $predobj->primary_tag("InternalExon");
+	    $predobj->seq_id($seq->display_id());
+	    $predobj->strand($gene->strand());
+	    $predobj->score($gene->score());
+	    # add the exon to the gene structure object
+	    $gene->add_exon($predobj);
+	    # add the predicted CDS
+	    $cds = $exonseq;
+	    $cds =~ s/[a-z]//g; # remove the deletions, but keep the insertions
+	    $cds = Bio::PrimarySeq->new('-seq' => $cds,
+					'-display_id' => $seq->display_id(),
+					'-desc' => $seq->desc(),
+					'-alphabet' => "dna");
+	    # only store the first one in the overall prediction
+	    $gene->predicted_cds($cds) unless $gene->predicted_cds();
+	    $predobj->predicted_cds($cds);
+	    # add the predicted insertions and deletions as subfeatures
+	    # of the exon
+	    my $fea = undef;
+	    while($exonseq =~ /([a-zX])/g) {
+		my $indel = $1;
+		# start and end: start looking at the position after the
+		# previous feature
+		if($fea) {
+		    $start = $fea->start()+$numins;
+		    $start -= 1 if($fea->primary_tag() eq 'insertion');
+		} else {
+		    $start = $predobj->start()+$numins-1;
+		}
+		#print "# numins = $numins, indel = $indel, start = $start\n";
+		$start = index($seq->seq(), $indel, $start) + 1 - $numins;
+		$fea = Bio::SeqFeature::Generic->new('-start' => $start,
+						     '-end' => $start);
+		$fea->source_tag("ESTScan");
+		$fea->seq_id($seq->display_id());
+		$fea->strand($predobj->strand());
+		if($indel eq 'X') {
+		    # an insertion (depends on viewpoint: to get the 'real'
+		    # CDS, a base has to be inserted, i.e., the HMMER model
+		    # inserted a base; however, the sequencing process deleted
+		    # a base that was there).
+		    $fea->primary_tag("insertion");
+		    # we need to count insertions because these are left out
+		    # of any coordinates saved in the objects (which is correct
+		    # because insertions change the original sequence, so
+		    # coordinates wouldn't match)
+		    $numins++;
+		} else {
+		    # a deletion (depends on viewpoint: to get the 'real'
+		    # CDS, a base has to be deleted, i.e., the HMMER model
+		    # deleted a base; however, the sequencing process inserted
+		    # a base that wasn't there).
+		    $fea->primary_tag("deletion");
+		    $fea->add_tag_value('base', $indel);
+		}
+		$predobj->add_sub_SeqFeature($fea);
+	    }
+	}
+    }
+    
+    return $gene;
+}
+
+=head2 close
+
+ Title   : close
+ Usage   : $result->close()
+ Function: Closes the file handle associated with this result file.
+           Inherited method, overridden.
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub close {
+   my ($self, @args) = @_;
+
+   delete($self->{'_fastastream'});
+   $self->SUPER::close(@args);
+}
+
+=head2 _fasta_stream
+
+ Title   : _fasta_stream
+ Usage   : $result->_fasta_stream()
+ Function: Gets/Sets the FASTA sequence IO stream for reading the contents of
+           the file associated with this MZEF result object.
+
+           If called for the first time, creates the stream from the filehandle
+           if necessary.
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _fasta_stream {
+    my ($self, $stream) = @_;
+    
+    if($stream || (! exists($self->{'_fastastream'}))) {
+	if(! $stream) {
+	    $stream = Bio::SeqIO->new('-fh' => $self->_fh(),
+				      '-format' => "fasta");
+	}
+	$self->{'_fastastream'} = $stream;
+    }
+    return $self->{'_fastastream'};
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Eponine.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Eponine.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Eponine.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,295 @@
+# $Id: Eponine.pm,v 1.13.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Eponine
+#
+# Cared for by Tania Oh <gisoht at nus.edu.sg>
+#
+# Copyright Tania Oh 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Eponine - Results of one Eponine run
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::Eponine;
+ use strict;
+    my $seq = "/data/seq.fa";
+    my $threshold  = "0.999";
+    my @params = ( -seq => $seq,
+                   -threshold => $threshold);
+
+   my $factory = Bio::Tools::Run::Eponine->new(@params);
+     # run eponine against fasta 
+        my $r = $factory->run_eponine($seq);
+        my $parser = Bio::Tools::Eponine->new($r);
+
+       while (my $feat = $parser->next_prediction){
+                #$feat contains array of SeqFeature
+               foreach my $orf($feat) {
+                   print $orf->seq_id. "\n";
+               }
+       }
+
+=head1 DESCRIPTION
+
+Parser for Eponine, a probabilistic transcription start site detector
+optimized for mammalian genomic sequence. This module inherits off
+Bio::Tools::AnalysisResult and therefore implements 
+Bio::SeqAnalysisParserI (see L<Bio::Tools::AnalysisResult> and
+L<Bio::SeqAnalysisParserI>).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Tania Oh 
+
+E<lt>gisoht-at-nus.edu.sgE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Eponine;
+use strict;
+
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # first call the inherited method!
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    # handle our own parameters
+
+    # our private state variables
+    $self->{'_preds_parsed'} = 0;
+    #array of Bio::SeqFeatures
+    $self->{'_flist'} =[];
+}
+
+=head2 analysis_method
+
+ Usage     : $mzef->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /mzef/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /epo/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $mzef->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $mzef->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # return the next gene structure (transcript)
+    return $self->_prediction();
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+
+    while(defined($_ = $self->_readline())) {
+        if (! /^\#/){ #ignore introductory lines
+
+	      my @element = split;
+	      my (%feature);
+	      $feature {name} = $element[0];
+	      $feature {score} = $element[5];
+	      $feature {start} = $element[3];
+	      $feature {end} = $element[4];
+	      $feature {strand} = $element[6];
+	      $feature {source}= 'Eponine';
+	      $feature {primary}= 'TSS';
+	      $feature {program} = 'eponine-scan';
+	      $feature {program_version} = '2';
+            
+	      $self->create_feature(\%feature);
+	            next;
+
+	}
+    }
+    $self->_predictions_parsed(1);
+}
+
+=head2 create_feature
+
+    Title   :   create_feature
+    Usage   :   obj->create_feature($feature)
+    Function:   Returns an array of features
+    Returns :   Returns an array of features
+    Args    :   none
+
+=cut
+
+sub create_feature {
+    my ($self, $feat) = @_;
+     #create and fill Bio::EnsEMBL::Seqfeature object
+
+      my $tss = Bio::SeqFeature::Generic->new
+                    (   -seq_id  => $feat->{'name'},
+                        -start   => $feat->{'start'},
+                        -end     => $feat->{'end'},
+                        -strand  => $feat->{'strand'},
+            		-score   => $feat->{'score'},
+                        -source_tag  => $feat->{'source'},
+		        -primary_tag => $feat->{'primary'});
+
+		
+
+  if ($tss) {
+         # add to _flist
+      push(@{$self->{'_flist'}}, $tss);
+   }
+
+   #print $tss->gff_string;
+}
+			    
+
+
+
+
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_flist'}) && @{$self->{'_flist'}});
+    return shift(@{$self->{'_flist'}});
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    # array of pre-parsed predictions
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Est2Genome.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Est2Genome.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Est2Genome.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,343 @@
+# $Id: Est2Genome.pm,v 1.17.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Est2Genome
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Est2Genome - Parse est2genome output, makes simple Bio::SeqFeature::Generic objects
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Est2Genome;
+
+  my $featureiter = new Bio::Tools::Est2Genome(-file => 'output.est2genome');
+
+  # This is going to be fixed to use the SeqAnalysisI next_feature
+  # Method eventually when we have the objects to put the data in
+  # properly
+  while( my $f = $featureiter->parse_next_gene ) {
+   # process Bio::SeqFeature::Generic objects here
+  }
+
+=head1 DESCRIPTION
+
+This module is a parser for C<est2genome> [EMBOSS] alignments of est/cdna
+sequence to genomic DNA.  This is generally accepted as the best
+program for predicting splice sites based on est/dnas (as far as I know).
+
+This module currently does not try pull out the ungapped alignments
+(Segment) but may in the future.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Est2Genome;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Root::Root;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::Intron;
+use Bio::SeqFeature::Gene::GeneStructure;
+use Bio::SeqFeature::SimilarityPair;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Est2Genome();
+ Function: Builds a new Bio::Tools::Est2Genome object
+ Returns : an instance of Bio::Tools::Est2Genome
+ Args    : -file => 'output.est2genome' or
+           -fh   => \*EST2GENOMEOUTPUT
+           -genomefirst => 1  # genome was the first input (not standard)
+
+=cut
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # call the inherited method first
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    my ($genome_is_first) = $self->_rearrange([qw(GENOMEFIRST)], @args);
+
+    delete($self->{'_genome_is_first'});
+    $self->{'_genome_is_first'} = $genome_is_first if(defined($genome_is_first));
+    $self->analysis_method("est2genome");
+}
+
+=head2 analysis_method
+
+ Usage     : $sim4->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /est2genome/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method {
+#-------------
+    my ($self, $method) = @_;
+    if($method && ($method !~ /est2genome/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 parse_next_gene
+
+ Title   : parse_next_gene
+ Usage   : @gene = $est2genome_result->parse_next_gene;
+           foreach $exon (@exons) {
+               # do something
+           }
+
+ Function: Parses the next alignments of the est2genome result file and
+           returns the found exons as an array of
+           Bio::SeqFeature::SimilarityPair objects. Call
+           this method repeatedly until an empty array is returned to get the
+           results for all alignments.
+
+           The $exon->seq_id() attribute will be set to the identifier of the
+           respective sequence for both sequences.
+           The length is accessible via the seqlength()
+           attribute of $exon->query() and
+           $exon->est_hit().
+ Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects
+           or Bio::SeqFeature::Gene::GeneStructure
+ Args    : flag(1/0) indicating to return Bio::SeqFeature::Gene::GeneStructure or Bio::SeqFeature::SimilarityPair
+           defaults to 0
+
+=cut
+
+sub parse_next_gene {
+   my ($self,$return_gene) = @_;
+   return $self->_parse_gene_struct if $return_gene;
+   my $seensegment = 0;
+   my @features;
+   my ($qstrand,$hstrand) = (1,1);
+   my $lasthseqname;
+   while( defined($_ = $self->_readline) ) {
+       if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {
+	   if( $seensegment ) {
+	       $self->_pushback($_);
+	       return wantarray ? @features : \@features;
+	   }
+	   $hstrand = -1 if $1 eq 'reversed';
+	   $qstrand = -1 if $4 eq 'REVERSED GENE';
+	   #$self->debug( "1=$1, 2=$2, 4=$4\n");
+       }
+       elsif( /^Exon/ ) {
+	   my ($name,$len,$score,$qstart,$qend,$qseqname,
+	       $hstart,$hend, $hseqname) = split;
+	   $lasthseqname = $hseqname;
+	   my $query = new Bio::SeqFeature::Similarity(-primary => $name,
+						       -source  => $self->analysis_method,
+						       -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
+						       -start   => $qstart,
+						       -end     => $qend,
+						       -strand  => $qstrand,
+						       -score   => $score,
+						       -tag => {
+#							   'Location' => "$hstart..$hend",
+							   'Sequence' => "$hseqname",
+							   }
+						       );
+	   my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit',
+						     -source  => $self->analysis_method,
+						     -seq_id => $hseqname,
+						     -start   => $hstart,
+						     -end     => $hend,
+						     -strand  => $hstrand,
+						     -score   => $score,
+						     -tag => {
+#							 'Location' => "$qstart..$qend",
+							 'Sequence' => "$qseqname",
+							
+						     }
+						     );
+	   push @features, new Bio::SeqFeature::SimilarityPair
+	       (-query => $query,
+		-hit   => $hit,
+		-source => $self->analysis_method);
+       } elsif( /^([\-\+\?])(Intron)/) {
+	   my ($name,$len,$score,$qstart,$qend,$qseqname) = split;
+	   push @features, new Bio::SeqFeature::Generic(-primary => $2,
+							-source => $self->analysis_method,
+							-start => $qstart,
+							-end   => $qend,
+							-strand => $qstrand,
+							-score  => $score,
+							-seq_id => $qseqname,
+							-tag => {
+							    'Sequence' => $lasthseqname});
+       } elsif( /^Span/ ) {
+       } elsif( /^Segment/ ) {
+	   $seensegment = 1;
+       } elsif( /^\s+$/ ) { # do nothing
+       } else {
+	   $self->warn( "unknown line $_\n");
+       }
+   }
+   return unless( @features );
+   return wantarray ? @features : \@features;
+}
+
+sub _parse_gene_struct {
+   my ($self) = @_;
+   my $seensegment = 0;
+   my @features;
+   my ($qstrand,$hstrand) = (1,1);
+   my $lasthseqname;
+   my $gene = new Bio::SeqFeature::Gene::GeneStructure(-source => $self->analysis_method);
+   my $transcript = new Bio::SeqFeature::Gene::Transcript(-source => $self->analysis_method);
+   my @suppf;
+   my @exon;
+   while( defined($_ = $self->_readline) ) {
+       if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {
+	      if( $seensegment ) {
+	       $self->_pushback($_);
+	       return $gene;
+	      }
+    	   $hstrand = -1 if $1 eq 'reversed';
+    	   $qstrand = -1 if $4 eq 'REVERSED GENE';
+       }
+       elsif( /^Exon/ ) {
+    	   my ($name,$len,$score,$qstart,$qend,$qseqname,$hstart,$hend, $hseqname) = split;
+    	   $lasthseqname = $hseqname;
+    	   my $exon = new Bio::SeqFeature::Gene::Exon(-primary => $name,
+						       -source  => $self->analysis_method,
+						       -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
+						       -start   => $qstart,
+						       -end     => $qend,
+						       -strand  => $qstrand,
+						       -score   => $score,
+						       -tag => {
+                            #'Location' => "$hstart..$hend",
+           							   'Sequence' => "$hseqname",
+							              }
+						       );
+          $transcript->seq_id($qseqname) unless $transcript->seq_id;
+          $exon->add_tag_value('phase',0);
+          push @exon, $exon;
+              
+       } elsif( /^([\-\+\?])(Intron)/) {
+         next; #intron auto matically built from exons..hope thats ok..
+       } elsif( /^Span/ ) {
+       } elsif( /^Segment/ ) {
+    	    my ($name,$len,$score,$qstart,$qend,$qseqname,$hstart,$hend, $hseqname) = split;
+	         my $query = new Bio::SeqFeature::Similarity(-primary => $name,
+						       -source  => $self->analysis_method,
+						       -seq_id => $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
+						       -start   => $qstart,
+						       -end     => $qend,
+						       -strand  => $qstrand,
+						       -score   => $score,
+						       -tag => {
+#							   'Location' => "$hstart..$hend",
+							   'Sequence' => "$hseqname",
+							   }
+						     );
+      	   my $hit = new Bio::SeqFeature::Similarity(-primary => 'exon_hit',
+                                           						     -source  => $self->analysis_method,
+                                          						     -seq_id => $hseqname,
+                                          						     -start   => $hstart,
+                                          						     -end     => $hend,
+                                          						     -strand  => $hstrand,
+                                          						     -score   => $score,
+                                          						     -tag => {
+                                                            #	'Location' => "$qstart..$qend",
+                                               							 'Sequence' => "$qseqname",
+						                                                }
+						     );
+        	   my $support =  new Bio::SeqFeature::SimilarityPair (-query => $query,
+                                                              		-hit   => $hit,
+                                                              		-source => $self->analysis_method);
+             push @suppf, $support;
+       } elsif( /^\s+$/ ) { # do nothing
+       } else {
+      	   $self->warn( "unknown line $_\n");
+       }
+   }
+   return unless $#exon >=0;
+   foreach my $e(@exon){
+    my @add;
+    foreach my $sf(@suppf){
+      if($sf->overlaps($e)){
+          push @add,$sf;
+      }
+    }
+    $e->add_tag_value('supporting_feature', at add);
+    $transcript->add_exon($e);
+  }
+  
+   $gene->add_transcript($transcript);
+   $gene->seq_id($transcript->seq_id);
+   return $gene;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none
+
+=cut
+
+sub next_feature {
+    my ($self) = shift;
+    $self->throw("We haven't really done this right, yet, use parse_next_gene");
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Fgenesh.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Fgenesh.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Fgenesh.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,531 @@
+# $Id: Fgenesh.pm,v 1.10.4.2 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Fgenesh
+#
+# Cared for by Christopher Dwan (chris at dwan.org)
+#
+# Copied, lock stock & barrel from Genscan.pm
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Fgenesh - parse results of one Fgenesh run
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::Fgenesh;
+
+   $fgenesh = Bio::Tools::Fgenesh->new(-file => 'result.fgenesh');
+   # filehandle:
+   $fgenesh = Bio::Tools::Fgenesh->new( -fh  => \*INPUT );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $fgensh->next_feature() is the same
+   while($gene = $fgenesh->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene, which inherits
+       # off Bio::SeqFeature::Gene::Transcript.
+       #
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # initial exons only
+       @init_exons = $gene->exons('Initial');
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # terminal exons only
+       @term_exons = $gene->exons('Terminal');
+       # singleton exons: 
+       ($single_exon) = $gene->exons();
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $fgenesh->close();
+
+=head1 DESCRIPTION
+
+The Fgenesh module provides a parser for Fgenesh (version 2) gene structure 
+prediction output. It parses one gene prediction into a 
+Bio::SeqFeature::Gene::Transcript- derived object.
+
+This module also implements the L<Bio::SeqAnalysisParserI> interface, and thus
+can be used wherever such an object fits. 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Dwan
+
+Email chris-at-dwan.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Fgenesh;
+use strict;
+use Symbol;
+
+use Bio::Root::Root;
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+my %ExonTags = ('CDSf' => 'Initial',
+		'CDSi' => 'Internal',
+		'CDSl' => 'Terminal',
+		'CDSo' => 'Singleton');
+    
+sub _initialize_state {
+    my ($self, at args) = @_;
+    
+    # first call the inherited method!
+    $self->SUPER::_initialize_state(@args);
+
+    # our private state variables
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+    # seq stack
+    $self->{'_seqstack'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $genscan->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /genscan/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /fgenesh/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $fgenesh->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Fgenesh result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $fgenesh->next_prediction()) { ... }
+ Function: Returns the next gene structure prediction of the Genscan result
+           file. Call this method repeatedly until FALSE is returned.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # get next gene structure
+    $gene = $self->_prediction();
+
+    if($gene) {
+	# fill in predicted protein, and if available the predicted CDS
+	#
+
+	# use the seq stack if there's a seq on it
+	my $seqobj = pop(@{$self->{'_seqstack'}});
+        my ($id, $seq);
+	unless ($seqobj) {
+	   ($id, $seq) = $self->_read_fasta_seq();
+           my $alphabet;
+           if (($id =~ /mrna/) || ($id =~ /cds/)) { $alphabet = 'dna'; }
+           else { $alphabet = 'protein'; }
+           $seqobj = Bio::PrimarySeq->new('-seq'        => $seq,
+                                          '-display_id' => $id,
+                                          '-alphabet'   => $alphabet); 
+        }
+	if ($seqobj) {
+
+	    # check that prediction number matches the prediction number
+	    # indicated in the sequence id (there may be incomplete gene
+	    # predictions that contain only signals with no associated protein
+            # prediction.
+
+	    $gene->primary_tag() =~ /[^0-9]([0-9]+)$/;
+	    my $prednr = $1;
+	    if ($id !~ /_predicted_(\w+)_$prednr/) {
+		# this is not our sequence, so push back for next prediction
+		push(@{$self->{'_seqstack'}}, $seqobj);
+	    } else {
+                if ($1 eq "protein") {
+		  $gene->predicted_protein($seqobj);
+                } elsif (($1 eq "mrna") || ($1 eq "cds")) {
+                  $self->_has_cds(1);
+                  $gene->predicted_cds($seqobj);
+                  
+                  # Have to go back in and get the protein...
+                  ($id, $seq) = $self->_read_fasta_seq();
+                  if ($id =~ /_cds_/) { 
+                    ($id, $seq) = $self->_read_fasta_seq(); 
+                  }
+ 
+		  $seqobj = Bio::PrimarySeq->new('-seq' => $seq,
+			    		         '-display_id' => $id,
+						 '-alphabet' => "protein");
+		  $gene->predicted_protein($seqobj);
+		}
+	    }
+	}
+    }
+
+    return $gene;
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my $gene;
+    my $seqname;
+
+    while(defined($_ = $self->_readline())) {
+
+	if(/^\s*(\d+)\s+([+\-])/) {
+            my $line = $_;
+
+	    # exon or signal
+	    my $prednr = $1;
+            my $strand = ($2 eq '+') ? 1 : -1;
+
+	    if(! defined($gene)) {
+		$gene = Bio::Tools::Prediction::Gene->new(
+                                       '-primary' => "GenePrediction$prednr",
+				       '-source' => 'Fgenesh');
+	    }
+	    # split into fields
+	    chomp();
+	    my @flds = split(/\s+/, $line);
+
+	    # create the feature object depending on the type of signal
+	    my $predobj;
+	    my $is_exon = grep {$line =~ $_} keys(%ExonTags);
+            my ($start, $end);
+	    if($is_exon) {
+		$predobj = Bio::Tools::Prediction::Exon->new();
+                $predobj->score($flds[8]);
+                $start   = $flds[5];
+                $end     = $flds[7];
+	    } else {
+		# PolyA site, or TSS 
+		$predobj = Bio::SeqFeature::Generic->new();
+                $predobj->score($flds[5]);
+                $start   = $flds[4];
+                $end     = $flds[4];
+	    }
+	    # set common fields
+	    $predobj->source_tag('Fgenesh');
+	    $predobj->strand($strand);
+
+# Following tactical commenting-out made by
+# malcolm.cook at stowers-institute.org since coordinate reversal is
+# apparently vestigial copy/paste detritus from Genscan.pm origins of
+# this module and this is NOT needed for fgenesh (at least in v
+# 2.1.4).
+
+#	    if($predobj->strand() == 1) {
+		$predobj->start($start);
+		$predobj->end($end);
+#	    } else {
+#		$predobj->end($start);
+#		$predobj->start($end);
+#	    }
+
+            # print STDERR "start $start end $end\n";
+	    # add to gene structure (should be done only when start and end
+	    # are set, in order to allow for proper expansion of the range)
+	    if($is_exon) {
+		# first, set fields unique to exons
+		$predobj->primary_tag($ExonTags{$flds[3]} . 'Exon');
+		$predobj->is_coding(1);
+		my $cod_offset;
+		if($predobj->strand() == 1) {
+		    $cod_offset = ($flds[9] - $predobj->start()) % 3;
+		    # Possible values are -2, -1, 0, 1, 2. -1 and -2 correspond
+		    # to offsets 2 and 1, resp. Offset 3 is the same as 0.
+		    $cod_offset += 3 if($cod_offset < 1);		    
+		} else {
+		    # On the reverse strand the Genscan frame also refers to
+		    # the first base of the first complete codon, but viewed
+		    # from forward, which is the third base viewed from
+		    # reverse.
+		    $cod_offset = ($flds[11] - $predobj->end()) % 3;
+		    # Possible values are -2, -1, 0, 1, 2. Due to the reverse
+		    # situation, {2,-1} and {1,-2} correspond to offsets
+		    # 1 and 2, resp. Offset 3 is the same as 0.
+		    $cod_offset -= 3 if($cod_offset >= 0);
+		    $cod_offset = -$cod_offset;
+		}
+		# Offsets 2 and 1 correspond to frame 1 and 2 (frame of exon
+		# is the frame of the first base relative to the exon, or the
+		# number of bases the first codon is missing).
+		$predobj->frame(3 - $cod_offset);
+                # print STDERR "  frame is " . $predobj->frame() . "\n";
+		# then add to gene structure object
+		$gene->add_exon($predobj, $ExonTags{$flds[1]});		
+	    } elsif($flds[3] eq 'PolA') {
+		$predobj->primary_tag("PolyAsite");
+		$gene->poly_A_site($predobj);
+	    } elsif($flds[3] eq 'TSS') {
+	        $predobj->primary_tag("Promoter"); # (hey! a TSS is NOT a promoter... what's going on here?...
+		$gene->add_promoter($predobj);
+                #I'd like to do this (for now):
+		#$predobj->primary_tag("TSS"); #this is not the right model, but, it IS a feature at least.
+                #but the followg errs out
+		#$gene->add_SeqFeature($predobj); #err: MSG: start is undefined when bounds at Bio::SeqFeature::Generic::add_SeqFeature 671 check since gene has no start yet
+	    }
+	    else {
+	      $self->throw("unrecognized prediction line: " . $line);
+	    }
+	    next;
+	}
+
+	if(/^\s*$/ && defined($gene)) {
+	    # current gene is completed
+	    $gene->seq_id($seqname);
+	    $self->_add_prediction($gene);
+	    $gene = undef;
+	    next;
+	}
+
+	if(/^(FGENESH)\s+([\d\.]+)/) {
+	    $self->analysis_method($1);
+	    $self->analysis_method_version($2);
+            if (/\s(\S+)\sgenomic DNA/) {
+              $self->analysis_subject($1);
+            }
+	    next;
+	}
+
+	if(/^\s*Seq name:\s+(\S+)/) {
+	    $seqname = $1;
+	    next;
+	}
+        
+	/^Predicted protein/ && do {
+	    # section of predicted sequences
+	    $self->_pushback($_);
+	    last;
+	};
+    }
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+=head2 _has_cds
+
+ Title   : _has_cds()
+ Usage   : $obj->_has_cds()
+ Function: Whether or not the result contains the predicted CDSs, too.
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _has_cds {
+    my ($self, $val) = @_;
+
+    $self->{'_has_cds'} = $val if $val;
+    if(! exists($self->{'_has_cds'})) {
+	$self->{'_has_cds'} = 0;
+    }
+    return $self->{'_has_cds'};
+}
+
+=head2 _read_fasta_seq
+
+ Title   : _read_fasta_seq()
+ Usage   : ($id,$seqstr) = $obj->_read_fasta_seq();
+ Function: Simple but specialised FASTA format sequence reader. Uses
+           $self->_readline() to retrieve input, and is able to strip off
+           the traling description lines.
+ Example :
+ Returns : An array of two elements: fasta_id & sequence
+
+=cut
+
+sub _read_fasta_seq {
+    my ($self) = @_;
+    my ($id, $seq);
+    #local $/ = ">";
+    
+    my $entry = $self->_readline();
+    # print " ^^ $entry\n";
+    return unless ($entry);
+    $entry = $self->_readline() if ($entry =~ /^Predicted protein/);
+    # print " ^^ $entry\n";
+
+    # Pick up the header / id.
+    if ($entry =~ /^>FGENESH:/) {
+      if ($entry =~ /^>FGENESH:\s+(\d+)/) {
+         # print STDERR "  this is a predicted gene\n";
+         $id  = "_predicted_protein_" . $1;
+      } elsif ($entry =~ /^>FGENESH:\[mRNA\]\s+(\d+)/) {
+	# print STDERR "  this is an mRNA\n";
+         $id  = "_predicted_mrna_" . $1;
+      } elsif ($entry =~ /^>FGENESH:\[exon\]\s+Gene:\s+(\d+)/) {
+         $id  = "_predicted_cds_"  . $1;
+      }
+      $seq = "";
+      $entry = $self->_readline();
+    }
+
+    my $done = 0;
+    while (!$done) {
+       # print "*** $entry\n";
+       if (($entry =~ /^>FGENESH:\[exon\]/) && ($id =~ /^_predicted_cds_/)) {
+         # print STDERR "  -- informed about an exon header...\n";
+         $entry = $self->_readline();
+       } else {
+         $seq .= $entry;
+         # print STDERR "  Added $entry\n";
+       }
+
+       last unless $entry  = $self->_readline();
+       if (($entry =~ /^>/) && 
+           (!(($entry =~ /^>FGENESH:\[exon\]/) && ($id =~ /^_predicted_cds_/)))) {
+          $self->_pushback($entry); last;    
+       }
+    }
+
+    # id and sequence
+    $seq =~ s/\s//g; # Remove whitespace
+    return ($id, $seq);
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Fgenesh.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/FootPrinter.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/FootPrinter.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/FootPrinter.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,268 @@
+# $Id: FootPrinter.pm,v 1.8.4.2 2006/11/08 17:25:55 sendu Exp $
+# BioPerl module for Bio::Tools::FootPrinter
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::FootPrinter - write sequence features in FootPrinter format
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::FootPrinter;
+
+    my $tool = Bio::Tools::FootPrinter->new(-file=>"footprinter.out");
+
+    while (my $result = $tool->next_feature){
+      foreach my $feat($result->sub_SeqFeature){
+        print $result->seq_id."\t".$feat->start."\t".$feat->end."\t".$feat->seq->seq."\n";
+      }
+    }
+
+=head1 DESCRIPTION
+
+This module writes sequence features in FootPrinter format. 
+See L<http://bio.cs.washington.edu/software.html> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon 
+
+Email shawnh at fugu-sg.org 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::FootPrinter;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::FootPrinter();
+ Function: Builds a new Bio::Tools::FootPrinter object 
+ Returns : Bio::Tools::FootPrinter
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+
+  return $self;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : my $r = $footprint->next_feature
+ Function: Get the next feature from parser data
+ Returns : L<Bio::SeqFeature::Generic> 
+ Args    : none
+
+=cut
+
+sub next_feature{
+   my ($self) = @_;
+   $self->_parse_predictions() unless $self->_predictions_parsed();
+   return shift @{$self->{'_feature'}};
+
+}
+
+=head2 _add_feature
+
+ Title   : _add_feature
+ Usage   : $footprint->_add_feature($feat)
+ Function: Add feature to array
+ Returns : none
+ Args    : none
+
+=cut
+
+sub _add_feature {
+    my ($self,$feat) = @_;
+    if($feat){
+        push @{$self->{'_feature'}},$feat;
+    }
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions
+ Usage   : my $r = $footprint->_parse_predictions
+ Function: do the parsing 
+ Returns : none 
+ Args    : none
+
+=cut
+
+sub _parse_predictions {
+  my ($self) = @_;
+  $/="";
+  my ($seq,$second,$third,$name);
+  while ($_ = $self->_readline) {
+    chomp;
+    my @array = split("\n",$_);
+    if ($#array == 5) {
+      # get rid of header
+      shift(@array); shift(@array);
+    }
+    if($#array == 3){
+        if($name){
+            $name=~s/>//;
+            my $feat = $self->_parse($name,$seq,$second,$third);
+            $self->_add_feature($feat);
+        }
+        $name    = shift @array;
+        $seq     = $array[0];
+        $second  = $array[1];
+        $third   = $array[2];
+        next;
+    }
+    $seq        .= $array[0];
+    $third      .= $array[2];
+  }
+  $name=~s/>//;
+  my $feat = $self->_parse($name,$seq,$second,$third);
+  $self->_add_feature($feat);
+
+  $self->_predictions_parsed(1);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $footprint->_predictions_parsed(1)
+ Function: Get/Set for whether predictions parsed
+ Returns : 1/0
+ Args    : none
+
+=cut
+
+sub _predictions_parsed {
+    my ($self,$val) = @_;
+    if($val){
+        $self->{'_predictions_parsed'} = $val;
+    }
+    return $self->{'_predictions_parsed'};
+}
+
+
+=head2 _parse
+
+ Title   : _parse
+ Usage   : $footprint->_parse($name,$seq,$pattern)
+ Function: do the actual parsing
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    : none
+
+=cut
+
+sub _parse {
+    my ($self,$name,$seq,$score,$pattern) = @_;
+    my @char  = split('',$pattern);
+    my @score = split('',$score);
+
+    my ($prev,$word, at words, at word_scores,$word_score);
+
+    my $i = 0;
+    for my $c ( @char ) {
+        if( ! $word) {
+            $word .= $c;
+            $prev = $c;
+	    defined $score[$i] && 
+		($score[$i] =~ /\d/) && ($word_score += $score[$i]);
+        } elsif ($c eq $prev){
+	    $word .=$c;
+	    $prev  = $c;
+	    defined $score[$i] && 
+		($score[$i] =~ /\d/) && ($word_score += $score[$i]);
+        } else {
+            # remove words with only \s
+            $word=~s/\s+//g;
+            if ($word ne ''){
+		push @words, $word;
+		push @word_scores, ($word_score/length($word));
+            }
+            $word =$c;
+	    $prev = $c;
+	    $word_score = 0;
+	    defined $score[$i] &&
+		($score[$i] =~ /\d/) && ($word_score += $score[$i]);
+        }
+	$i++;
+    }
+    $word =~s/\s+//g;
+    if( length($word) ){
+	push @words, $word;
+    }
+    my $last;
+    my $feat = new Bio::SeqFeature::Generic(-seq_id=>$name);
+    my $offset = $i = 0;
+    my $count = 1;
+    for my $w (@words){
+        if(length($w) ) { 
+	    my $index = index($pattern,$w,$offset);
+	    $offset = $index + length($w);
+	    my $subfeat = new Bio::SeqFeature::Generic 
+		( -seq_id  =>"$name-motif".$count++,
+		  -start   => $index+1, 
+		  -end     => $index+length($w),
+		  -source  =>"FootPrinter",
+		  -score   => $word_scores[$i]
+		  );
+	    # ugh - not sure the sub_SeqFeature situation will
+	    # be around forever- things should probably be
+	    # grouped by a 'group' tag instead ala GFF3 
+	    # perhaps when Lincoln's API changes are 
+	    # made to SeqFeatures this will get changed
+	    $feat->add_sub_SeqFeature($subfeat,'EXPAND');
+        }
+	$i++;
+    }
+    my $priseq = Bio::PrimarySeq->new(-id=>$name,-seq=>$seq);
+    $feat->attach_seq($priseq);
+    return $feat;
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GFF.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GFF.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GFF.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1266 @@
+# $Id: GFF.pm,v 1.58.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::GFF
+#
+# Cared for by the Bioperl core team
+#
+# Copyright Matthew Pocock
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::GFF - A Bio::SeqAnalysisParserI compliant GFF format parser
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::GFF;
+
+    # specify input via -fh or -file
+    my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 2);
+    my $feature;
+    # loop over the input stream
+    while($feature = $gffio->next_feature()) {
+        # do something with feature
+    }
+    $gffio->close();
+
+    # you can also obtain a GFF parser as a SeqAnalasisParserI in
+    # HT analysis pipelines (see Bio::SeqAnalysisParserI and
+    # Bio::Factory::SeqAnalysisParserFactory)
+    my $factory = Bio::Factory::SeqAnalysisParserFactory->new();
+    my $parser = $factory->get_parser(-input => \*STDIN, -method => "gff");
+    while($feature = $parser->next_feature()) {
+        # do something with feature
+    }
+
+=head1 DESCRIPTION
+
+This class provides a simple GFF parser and writer. In the sense of a
+SeqAnalysisParser, it parses an input file or stream into SeqFeatureI
+objects, but is not in any way specific to a particular analysis
+program and the output that program produces.
+
+That is, if you can get your analysis program spit out GFF, here is
+your result parser.
+
+=head1 GFF3 AND SEQUENCE DATA
+
+[added by cjm 2004/07/09]
+
+GFF3 supports sequence data; see
+http://song.sourceforge.net/gff3-jan04.shtml
+
+There are a number of ways to deal with this -
+
+If you call
+
+  $gffio->ignore_sequence(1)
+
+prior to parsing the sequence data is ignored; this is useful if you
+just want the features. It avoids the memory overhead in building and
+caching sequences
+
+Alternatively, you can call either
+
+  $gffio->get_seqs()
+
+Or
+
+  $gffio->seq_id_by_h()
+
+At the B<end> of parsing to get either a list or hashref of Bio::Seq
+objects (see the documentation for each of these methods)
+
+Note that these objects will not have the features attached - you have
+to do this yourself, OR call
+
+  $gffio->features_attached_to_seqs(1)
+
+PRIOR to parsing; this will ensure that the Seqs have the features
+attached; ie you will then be able to call
+
+  $seq->get_SeqFeatures();
+
+And use Bio::SeqIO methods
+
+Note that auto-attaching the features to seqs will incur a higher
+memory overhead as the features must be cached until the sequence data
+is found
+
+=head1 TODO
+
+Make a Bio::SeqIO class specifically for GFF3 with sequence data
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Matthew Pocock
+
+Email mrp-at-sanger.ac.uk
+
+=head1 CONTRIBUTORS 
+
+Jason Stajich, jason-at-biperl-dot-org
+Chris Mungall, cjm-at-fruitfly-dot-org
+Steffen Grossmann [SG], grossman at molgen.mpg.de
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::GFF;
+
+use vars qw($HAS_HTML_ENTITIES);
+use strict;
+
+use Bio::Seq::SeqFactory;
+use Bio::LocatableSeq;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $parser = new Bio::Tools::GFF(-gff_version => 2,
+					    -file        => "filename.gff");
+           or
+           my $writer = new Bio::Tools::GFF(-gff_version => 3,
+					    -file        => ">filename.gff3");
+ Function: Creates a new instance. Recognized named parameters are -file, -fh,
+           and -gff_version.
+ Returns : a new object
+ Args    : named parameters
+           -gff_version => [1,2,3]
+
+=cut
+
+
+{   # make a class variable such that we can generate unique ID's over
+    # a session, no matter how many instances of GFF.pm we make
+    # since these have to be unique within the scope of a GFF file.
+    
+    my $gff3_featureID = 0;
+    
+    sub _incrementGFF3ID {
+        my ($self) = @_;
+        return ++ $gff3_featureID;
+    }
+}
+
+
+sub new {
+  my ($class, @args) = @_;
+  my $self = $class->SUPER::new(@args);
+  
+  my ($gff_version, $noparse) = $self->_rearrange([qw(GFF_VERSION NOPARSE)], at args);
+
+  # initialize IO
+  $self->_initialize_io(@args);
+  $self->_parse_header() unless $noparse;
+
+  $gff_version ||= 2;
+  if( ! $self->gff_version($gff_version) )  {
+      $self->throw("Can't build a GFF object with the unknown version ".
+		   $gff_version);
+  }
+  $self->{'_first'} = 1;
+  return $self;
+}
+
+=head2 _parse_header
+
+ Title   : _parse_header
+ Usage   : $gffio->_parse_header()
+ Function: used to turn parse GFF header lines.  currently
+           produces Bio::LocatableSeq objects from ##sequence-region
+           lines
+ Returns : 1 on success
+ Args    : none
+
+
+=cut
+
+sub _parse_header{
+   my ($self) = @_;
+
+   my @unhandled;
+   local $^W = 0; # hide warnings when we try and parse from a file opened
+                  # for writing - there isn't really a better way to do
+                  # AFAIK - cannot detech if a FH is read or write.
+   while(my $line = $self->_readline()){
+ 	 my $handled = 0;
+	 next if /^\s+$/;
+	 if($line =~ /^\#\#sequence-region\s+(\S+)\s+(\S+)\s+(\S+)\s*/){
+	   my($seqid,$start,$end) = ($1,$2,$3);
+	   push @{ $self->{'segments'} }, Bio::LocatableSeq->new
+	       (
+		-id    => unescape($seqid),
+		-start => $start,
+		-end   => $end,
+                -length => ($end - $start + 1),  ## make the length explicit
+		);
+	   $handled = 1;
+	 } elsif($line =~ /^(\#\#feature-ontology)/) {
+	   #to be implemented
+	   $self->warn("$1 header tag parsing unimplemented");
+	 } elsif($line =~ /^(\#\#attribute-ontology)/) {
+	   #to be implemented
+	   $self->warn("$1 header tag parsing unimplemented");
+	 } elsif($line =~ /^(\#\#source-ontology)/) {
+	   #to be implemented
+	   $self->warn("$1 header tag parsing unimplemented");
+	 } elsif($line =~ /^(\#\#\#)/) {
+	   #to be implemented
+	   $self->warn("$1 header tag parsing unimplemented");
+	 } elsif($line =~ /^(\#\#FASTA)/) {
+            # initial ##FASTA is optional - artemis does not use it
+            $line = $self->_readline();
+            if ($line !~ /^\>(\S+)/) {
+                $self->throw("##FASTA directive must be followed by fasta header, not: $line");
+            }
+	 } else {
+         }
+         
+         if ($line =~ /^\>(.*)/) {
+             # seq data can be at header or footer
+             my $seq = $self->_parse_sequence($line);
+             if ($seq) {
+                 $self->_seq_by_id_h->{$seq->primary_id} = $seq;
+             }
+         }
+             
+
+ 	 if(!$handled){
+	   push @unhandled, $line
+ 	 }
+
+	 #looks like the header is over!
+	 last unless $line =~ /^\#/;
+   }
+
+   foreach my $line (@unhandled){
+	 $self->_pushback($line);
+   }
+
+   return 1;
+}
+
+sub _parse_sequence {
+    my ($self, $line) = @_;
+
+    if ($line =~ /^\>(.*)/) {
+        
+        my $seqid = $1;
+        $seqid =~ s/\s+$//;
+        my $desc = '';
+        if ($seqid =~ /(\S+)\s+(.*)/) {
+            ($seqid, $desc) = ($1,$2);
+        }
+        my $res = '';
+        while (my $line = $self->_readline) {
+            if ($line =~ /^\#/) {
+                last;
+            }
+            if ($line =~ /^\>/) {
+                $self->_pushback($line);
+                last;
+            }
+            $line =~ s/\s//g;
+            $res .= $line;
+        }
+        return if $self->ignore_sequence;
+
+        my $seqfactory = Bio::Seq::SeqFactory->new('Bio::Seq');
+        my $seq = $seqfactory->create(-seq=>$res, 
+                                      -id=>$seqid,
+                                      -desc=>$desc);
+        $seq->accession_number($seqid);
+        if ($self->features_attached_to_seqs) {
+            my @feats = 
+              @{$self->_feature_idx_by_seq_id->{$seqid}};
+            $seq->add_SeqFeature($_) foreach @feats;
+            @{$self->_feature_idx_by_seq_id->{$seqid}} = ();
+        }
+        return $seq;
+    }
+    else {
+        $self->throw("expected fasta header, not: $line");
+    }
+}
+
+
+=head2 next_segment
+
+ Title   : next_segment
+ Usage   : my $seq = $gffio->next_segment;
+ Function: Returns a Bio::LocatableSeq object corresponding to a 
+           GFF "##sequence-region" header line.
+ Example :
+ Returns : A Bio::LocatableSeq object, or undef if
+           there are no more sequences.
+ Args    : none
+
+
+=cut
+
+sub next_segment{
+   my ($self, at args) = @_;
+   return shift @{ $self->{'segments'} } if defined $self->{'segments'};
+   return;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $gffio->next_feature();
+ Function: Returns the next feature available in the input file or stream, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none    
+
+=cut
+
+sub next_feature {
+    my ($self) = @_;
+    
+    my $gff_string;
+    
+    # be graceful about empty lines or comments, and make sure we return undef
+    # if the input's consumed
+    while(($gff_string = $self->_readline()) && defined($gff_string)) {	
+        if ($gff_string =~ /^\#\#\#/) {
+            # all forward refs have been seen; TODO
+        }
+        next if($gff_string =~ /^\#/ || $gff_string =~ /^\s*$/ ||
+                $gff_string =~ m{^//});
+
+        while ($gff_string =~ /^\>(.+)/) {
+            # fasta can be in header or footer
+            my $seq = $self->_parse_sequence($gff_string);
+            if ($seq) {
+                $self->_seq_by_id_h->{$seq->primary_id} = $seq;
+                $gff_string = $self->_readline;
+                last unless $gff_string;
+            }
+        }
+        last; 
+    }
+    return unless $gff_string;
+
+    my $feat = Bio::SeqFeature::Generic->new();
+    $self->from_gff_string($feat, $gff_string);
+
+    if ($self->features_attached_to_seqs) {
+        push(@{$self->_feature_idx_by_seq_id->{$feat->seq_id}},
+             $feat);
+    }
+
+    return $feat;
+}
+
+sub _feature_idx_by_seq_id {
+    my $self = shift;
+    $self->{__feature_idx_by_seq_id} = shift if @_;
+    $self->{__feature_idx_by_seq_id} = {}
+      unless $self->{__feature_idx_by_seq_id};
+    return $self->{__feature_idx_by_seq_id};
+}
+
+
+=head2 from_gff_string
+
+ Title   : from_gff_string
+ Usage   : $gff->from_gff_string($feature, $gff_string);
+ Function: Sets properties of a SeqFeatureI object from a GFF-formatted
+           string. Interpretation of the string depends on the version
+           that has been specified at initialization.
+
+           This method is used by next_feature(). It actually dispatches to
+           one of the version-specific (private) methods.
+ Example :
+ Returns : void
+ Args    : A Bio::SeqFeatureI implementing object to be initialized
+           The GFF-formatted string to initialize it from
+
+=cut
+
+sub from_gff_string {
+    my ($self, $feat, $gff_string) = @_;
+
+    if($self->gff_version() == 1)  {
+	return $self->_from_gff1_string($feat, $gff_string);
+    } elsif( $self->gff_version() == 3 ) {
+	return $self->_from_gff3_string($feat, $gff_string);
+    } else {
+	return $self->_from_gff2_string($feat, $gff_string);
+    }
+}
+
+=head2 _from_gff1_string
+
+ Title   : _from_gff1_string
+ Usage   :
+ Function:
+ Example :
+ Returns : void
+ Args    : A Bio::SeqFeatureI implementing object to be initialized
+           The GFF-formatted string to initialize it from
+
+=cut
+
+sub _from_gff1_string {
+   my ($gff, $feat, $string) = @_;
+   chomp $string;
+   my ($seqname, $source, $primary, $start, $end, $score, 
+       $strand, $frame, @group) = split(/\t/, $string);
+
+   if ( !defined $frame ) {
+       $feat->throw("[$string] does not look like GFF to me");
+   }
+   $frame = 0 unless( $frame =~ /^\d+$/);
+   $feat->seq_id($seqname);
+   $feat->source_tag($source);
+   $feat->primary_tag($primary);
+   $feat->start($start);
+   $feat->end($end);
+   $feat->frame($frame);
+   if ( $score eq '.' ) {
+       #$feat->score(undef);
+   } else {
+       $feat->score($score);
+   }
+   if ( $strand eq '-' ) { $feat->strand(-1); }
+   if ( $strand eq '+' ) { $feat->strand(1); }
+   if ( $strand eq '.' ) { $feat->strand(0); }
+   foreach my $g ( @group ) {
+       if ( $g =~ /(\S+)=(\S+)/ ) {
+	   my $tag = $1;
+	   my $value = $2;
+	   $feat->add_tag_value($1, $2);
+       } else {
+	   $feat->add_tag_value('group', $g);
+       }
+   }
+}
+
+=head2 _from_gff2_string
+
+ Title   : _from_gff2_string
+ Usage   :
+ Function:
+ Example :
+ Returns : void
+ Args    : A Bio::SeqFeatureI implementing object to be initialized
+           The GFF2-formatted string to initialize it from
+
+
+=cut
+
+sub _from_gff2_string {
+   my ($gff, $feat, $string) = @_;
+   chomp($string);
+
+   # according to the Sanger website, GFF2 should be single-tab
+   # separated elements, and the free-text at the end should contain
+   # text-translated tab symbols but no "real" tabs, so splitting on
+   # \t is safe, and $attribs gets the entire attributes field to be
+   # parsed later
+
+   my ($seqname, $source, $primary, $start, 
+       $end, $score, $strand, $frame, @attribs) = split(/\t+/, $string);
+   my $attribs = join '', @attribs;  # just in case the rule 
+                                     # against tab characters has been broken
+   if ( !defined $frame ) {
+       $feat->throw("[$string] does not look like GFF2 to me");
+   }
+   $feat->seq_id($seqname);
+   $feat->source_tag($source);
+   $feat->primary_tag($primary);
+   $feat->start($start);
+   $feat->end($end);
+   $feat->frame($frame);
+   if ( $score eq '.' ) {
+       # $feat->score(undef);
+   } else {
+       $feat->score($score);
+   }
+   if ( $strand eq '-' ) { $feat->strand(-1); }
+   if ( $strand eq '+' ) { $feat->strand(1); }
+   if ( $strand eq '.' ) { $feat->strand(0); }
+
+
+   #  <Begin Inefficient Code from Mark Wilkinson> 
+   # this routine is necessay to allow the presence of semicolons in
+   # quoted text Semicolons are the delimiting character for new
+   # tag/value attributes.  it is more or less a "state" machine, with
+   # the "quoted" flag going up and down as we pass thorugh quotes to
+   # distinguish free-text semicolon and hash symbols from GFF control
+   # characters
+   
+   
+   my $flag = 0; # this could be changed to a bit and just be twiddled
+   my @parsed;
+
+   # run through each character one at a time and check it
+   # NOTE: changed to foreach loop which is more efficient in perl
+   # --jasons
+
+   for my $a ( split //, $attribs ) { 
+       # flag up on entering quoted text, down on leaving it
+       if( $a eq '"') { $flag = ( $flag == 0 ) ? 1:0 }
+       elsif( $a eq ';' && $flag ) { $a = "INSERT_SEMICOLON_HERE"}
+       elsif( $a eq '#' && ! $flag ) { last } 
+       push @parsed, $a;
+   }
+   $attribs = join "", @parsed; # rejoin into a single string
+
+   # <End Inefficient Code>   
+   # Please feel free to fix this and make it more "perlish"
+
+   my @key_vals = split /;/, $attribs;   # attributes are semicolon-delimited
+
+   foreach my $pair ( @key_vals ) {
+       # replace semicolons that were removed from free-text above.
+       $pair =~ s/INSERT_SEMICOLON_HERE/;/g;        
+
+       # separate the key from the value
+       my ($blank, $key, $values) = split  /^\s*([\w\d]+)\s/, $pair; 
+
+
+       if( defined $values ) {
+	   my @values;
+	   # free text is quoted, so match each free-text block
+	   # and remove it from the $values string
+	   while ($values =~ s/"(.*?)"//){
+	       # and push it on to the list of values (tags may have
+	       # more than one value... and the value may be undef)	       
+	       push @values, $1;
+	   }
+
+	   # and what is left over should be space-separated
+	   # non-free-text values
+
+	   my @othervals = split /\s+/, $values;  
+	   foreach my $othervalue(@othervals){
+	       # get rid of any empty strings which might 
+	       # result from the split
+	       if (CORE::length($othervalue) > 0) {push @values, $othervalue}  
+	   }
+
+	   foreach my $value(@values){
+	       $feat->add_tag_value($key, $value);
+	   }
+       }
+   }
+}
+
+
+sub _from_gff3_string {
+    my ($gff, $feat, $string) = @_;
+    chomp($string);
+
+    # according to the now nearly final GFF3 spec, columns should 
+    # be tab separated, allowing unescaped spaces to occur in
+    # column 9
+
+    my ($seqname, $source, $primary, $start, $end, 
+	$score, $strand, $frame, $groups) = split(/\t/, $string);
+    
+    if ( ! defined $frame ) {
+	$feat->throw("[$string] does not look like GFF3 to me");
+    }
+    $feat->seq_id($seqname);
+    $feat->source_tag($source);
+    $feat->primary_tag($primary);
+    $feat->start($start);
+    $feat->end($end);
+    $feat->frame($frame);
+    if ( $score eq '.' ) {
+	#$feat->score(undef);
+    } else {
+	$feat->score($score);
+    }
+    if ( $strand eq '-' ) { $feat->strand(-1); }
+    if ( $strand eq '+' ) { $feat->strand(1); }
+    if ( $strand eq '.' ) { $feat->strand(0); }
+    my @groups = split(/\s*;\s*/, $groups);
+
+    for my $group (@groups) {
+	my ($tag,$value) = split /=/,$group;
+	$tag             = unescape($tag);
+	my @values       = map {unescape($_)} split /,/,$value;
+	for my $v ( @values ) {  $feat->add_tag_value($tag,$v); }
+    }
+}
+
+# taken from Bio::DB::GFF
+sub unescape {
+  my $v = shift;
+  $v =~ tr/+/ /;
+  $v =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
+  return $v;
+}
+
+=head2 write_feature
+
+ Title   : write_feature
+ Usage   : $gffio->write_feature($feature);
+ Function: Writes the specified SeqFeatureI object in GFF format to the stream
+           associated with this instance.
+ Returns : none
+ Args    : An array of Bio::SeqFeatureI implementing objects to be serialized
+
+=cut
+
+sub write_feature {
+    my ($self, @features) = @_;
+    return unless @features;
+    if( $self->{'_first'} && $self->gff_version() == 3 ) {
+	$self->_print("##gff-version 3\n");
+    }
+    $self->{'_first'} = 0;
+    foreach my $feature ( @features ) {
+	$self->_print($self->gff_string($feature)."\n");
+    }
+}
+
+=head2 gff_string
+
+ Title   : gff_string
+ Usage   : $gffstr = $gffio->gff_string($feature);
+ Function: Obtain the GFF-formatted representation of a SeqFeatureI object.
+           The formatting depends on the version specified at initialization.
+
+           This method is used by write_feature(). It actually dispatches to
+           one of the version-specific (private) methods.
+ Example :
+ Returns : A GFF-formatted string representation of the SeqFeature
+ Args    : A Bio::SeqFeatureI implementing object to be GFF-stringified
+
+=cut
+
+sub gff_string{
+    my ($self, $feature) = @_;
+
+    if($self->gff_version() == 1) {
+    	return $self->_gff1_string($feature);
+    } elsif( $self->gff_version() == 3 ) {
+    	return $self->_gff3_string($feature);
+    } elsif( $self->gff_version() == 2.5 ) {
+    	return $self->_gff25_string($feature);
+    } else {
+    	return $self->_gff2_string($feature);
+    }
+}
+
+=head2 _gff1_string
+
+ Title   : _gff1_string
+ Usage   : $gffstr = $gffio->_gff1_string
+ Function: 
+ Example :
+ Returns : A GFF1-formatted string representation of the SeqFeature
+ Args    : A Bio::SeqFeatureI implementing object to be GFF-stringified
+
+=cut
+
+sub _gff1_string{
+   my ($gff, $feat) = @_;
+   my ($str,$score,$frame,$name,$strand);
+
+   if( $feat->can('score') ) {
+       $score = $feat->score();
+   }
+   $score = '.' unless defined $score;
+
+   if( $feat->can('frame') ) {
+       $frame = $feat->frame();
+   }
+   $frame = '.' unless defined $frame;
+
+   $strand = $feat->strand();
+   if(! $strand) {
+       $strand = ".";
+   } elsif( $strand == 1 ) {
+       $strand = '+';
+   } elsif ( $feat->strand == -1 ) {
+       $strand = '-';
+   }
+   
+   if( $feat->can('seqname') ) {
+       $name = $feat->seq_id();
+       $name ||= 'SEQ';
+   } else {
+       $name = 'SEQ';
+   }
+
+
+   $str = join("\t",
+                 $name,
+		 $feat->source_tag(),
+		 $feat->primary_tag(),
+		 $feat->start(),
+		 $feat->end(),
+		 $score,
+		 $strand,
+		 $frame);
+
+   foreach my $tag ( $feat->all_tags ) {
+       foreach my $value ( $feat->each_tag_value($tag) ) {
+	   $str .= " $tag=$value";
+       }
+   }
+
+
+   return $str;
+}
+
+=head2 _gff2_string
+
+ Title   : _gff2_string
+ Usage   : $gffstr = $gffio->_gff2_string
+ Function: 
+ Example :
+ Returns : A GFF2-formatted string representation of the SeqFeature
+ Args    : A Bio::SeqFeatureI implementing object to be GFF2-stringified
+
+=cut
+
+sub _gff2_string{
+   my ($gff, $origfeat) = @_;
+    my $feat;
+   if ($origfeat->isa('Bio::SeqFeature::FeaturePair')){
+       $feat = $origfeat->feature2;
+   } else {
+       $feat = $origfeat;
+   }
+   my ($str1, $str2,$score,$frame,$name,$strand);
+
+   if( $feat->can('score') ) {
+       $score = $feat->score();
+   }
+   $score = '.' unless defined $score;
+
+   if( $feat->can('frame') ) {
+       $frame = $feat->frame();
+   }
+   $frame = '.' unless defined $frame;
+
+   $strand = $feat->strand();
+   if(! $strand) {
+       $strand = ".";
+   } elsif( $strand == 1 ) {
+       $strand = '+';
+   } elsif ( $feat->strand == -1 ) {
+       $strand = '-';
+   }
+
+   if( $feat->can('seqname') ) {
+       $name = $feat->seq_id();
+       $name ||= 'SEQ';
+   } else {
+       $name = 'SEQ';
+   }
+   $str1 = join("\t",
+                 $name,
+		 $feat->source_tag(),
+		 $feat->primary_tag(),
+		 $feat->start(),
+		 $feat->end(),
+		 $score,
+		 $strand,
+		 $frame);
+   # the routine below is the only modification I made to the original
+   # ->gff_string routine (above) as on November 17th, 2000, the
+   # Sanger webpage describing GFF2 format reads: "From version 2
+   # onwards, the attribute field must have a tag value structure
+   # following the syntax used within objects in a .ace file,
+   # flattened onto one line by semicolon separators. Tags must be
+   # standard identifiers ([A-Za-z][A-Za-z0-9_]*).  Free text values
+   # must be quoted with double quotes".
+
+   # MW
+
+   
+   my @all_tags = $feat->all_tags;
+   my @group;
+   if (@all_tags) {  # only play this game if it is worth playing...
+       foreach my $tag ( @all_tags ) {
+	   my @v;
+	   foreach my $value ( $feat->each_tag_value($tag) ) {
+ 	       unless( defined $value && length($value) ) {
+		   $value = '""';
+	       } elsif ($value =~ /[^A-Za-z0-9_]/){
+		   $value =~ s/\t/\\t/g; # substitute tab and newline 
+		                         # characters
+		   $value =~ s/\n/\\n/g; # to their UNIX equivalents
+		   $value = '"' . $value . '" ';
+	       }                                 # if the value contains 
+	                                         # anything other than valid 
+	                                         # tag/value characters, then 
+	                                         # quote it
+	       push @v, $value;
+	       # for this tag (allowed in GFF2 and .ace format)
+	   }
+	   push @group, "$tag ".join(" ", @v);
+       }
+   }
+   $str2 .= join(' ; ', @group);
+   # Add Target information for Feature Pairs
+   if( ! $feat->has_tag('Target') && # This is a bad hack IMHO
+       ! $feat->has_tag('Group') &&
+       $origfeat->isa('Bio::SeqFeature::FeaturePair') ) {
+       $str2 = sprintf("Target %s %d %d", $origfeat->feature1->seq_id,
+		       ( $origfeat->feature1->strand < 0 ? 
+			 ( $origfeat->feature1->end,
+			   $origfeat->feature1->start) :
+			 ( $origfeat->feature1->start,
+			   $origfeat->feature1->end) 
+			 )) . ($str2?" ; ".$str2:"");  # need to put Target information before other tag/value pairs - mw
+   }
+   return $str1."\t".$str2;
+}
+
+
+
+=head2 _gff25_string
+
+ Title   : _gff25_string
+ Usage   : $gffstr = $gffio->_gff2_string
+ Function: To get a format of GFF that is peculiar to Gbrowse/Bio::DB::GFF
+ Example :
+ Returns : A GFF2.5-formatted string representation of the SeqFeature
+ Args    : A Bio::SeqFeatureI implementing object to be GFF2.5-stringified
+
+=cut
+
+sub _gff25_string {
+    my ($gff, $origfeat) = @_;
+    my $feat;
+    if ($origfeat->isa('Bio::SeqFeature::FeaturePair')){
+	$feat = $origfeat->feature2;
+    } else {
+	$feat = $origfeat;
+    }
+    my ($str1, $str2,$score,$frame,$name,$strand);
+
+    if( $feat->can('score') ) {
+	$score = $feat->score();
+    }
+    $score = '.' unless defined $score;
+
+    if( $feat->can('frame') ) {
+	$frame = $feat->frame();
+    }
+    $frame = '.' unless defined $frame;
+
+    $strand = $feat->strand();
+    if(! $strand) {
+	$strand = ".";
+    } elsif( $strand == 1 ) {
+	$strand = '+';
+    } elsif ( $feat->strand == -1 ) {
+	$strand = '-';
+    }
+
+    if( $feat->can('seqname') ) {
+	$name = $feat->seq_id();
+	$name ||= 'SEQ';
+    } else {
+	$name = 'SEQ';
+    }
+    $str1 = join("\t",
+                 $name,
+		 $feat->source_tag(),
+		 $feat->primary_tag(),
+		 $feat->start(),
+		 $feat->end(),
+		 $score,
+		 $strand,
+		 $frame);
+
+    my @all_tags = $feat->all_tags;
+    my @group; my @firstgroup;
+    if (@all_tags) {   # only play this game if it is worth playing...
+	foreach my $tag ( @all_tags ) {
+	    my @v;
+	    foreach my $value ( $feat->each_tag_value($tag) ) {
+		unless( defined $value && length($value) ) {
+		    $value = '""';
+		} elsif ($value =~ /[^A-Za-z0-9_]/){
+		    $value =~ s/\t/\\t/g; # substitute tab and newline 
+		    # characters
+		    $value =~ s/\n/\\n/g; # to their UNIX equivalents
+		    $value = '"' . $value . '" ';
+		}		# if the value contains 
+		# anything other than valid 
+		# tag/value characters, then 
+		# quote it
+		push @v, $value;
+		# for this tag (allowed in GFF2 and .ace format)
+	    }
+	    if (($tag eq 'Group') || ($tag eq 'Target')){ # hopefully we wont get both...
+		push @firstgroup, "$tag ".join(" ", @v);
+							} else {
+							    push @group, "$tag ".join(" ", @v);
+							}
+	}
+		   }
+    $str2 = join(' ; ', (@firstgroup, @group));
+    # Add Target information for Feature Pairs
+    if( ! $feat->has_tag('Target') && # This is a bad hack IMHO
+	! $feat->has_tag('Group') &&
+	$origfeat->isa('Bio::SeqFeature::FeaturePair') ) {
+	$str2 = sprintf("Target %s ; tstart %d ; tend %d", $origfeat->feature1->seq_id,
+			( $origfeat->feature1->strand < 0 ? 
+			  ( $origfeat->feature1->end,
+			    $origfeat->feature1->start) :
+			  ( $origfeat->feature1->start,
+			    $origfeat->feature1->end) 
+			)) . ($str2?" ; ".$str2:""); # need to put the target info before other tag/value pairs - mw
+    }
+    return $str1 . "\t".  $str2;
+}
+
+
+=head2 _gff3_string
+
+  Title   : _gff3_string
+  Usage   : $gffstr = $gffio->_gff3_string
+  Function: 
+  Example :
+  Returns : A GFF3-formatted string representation of the SeqFeature
+  Args    : A Bio::SeqFeatureI implementing object to be GFF3-stringified
+
+=cut
+
+sub _gff3_string {
+    my ($gff, $origfeat) = @_;
+    my $feat;
+    if ($origfeat->isa('Bio::SeqFeature::FeaturePair')){
+	$feat = $origfeat->feature2;
+    } else {
+	$feat = $origfeat;
+    }
+
+    my $ID = $gff->_incrementGFF3ID();
+
+    my ($score,$frame,$name,$strand);
+
+    if( $feat->can('score') ) {
+	$score = $feat->score();
+    }
+    $score = '.' unless defined $score;
+
+    if( $feat->can('frame') ) {
+	$frame = $feat->frame();
+    }
+    $frame = '.' unless defined $frame;
+
+    $strand = $feat->strand();
+
+    if(! $strand) {
+	$strand = ".";
+    } elsif( $strand == 1 ) {
+	$strand = '+';
+    } elsif ( $feat->strand == -1 ) {
+	$strand = '-';
+    }
+
+    if( $feat->can('seqname') ) {
+	$name = $feat->seq_id();
+	$name ||= 'SEQ';
+    } else {
+	$name = 'SEQ';
+    }
+
+    my @groups;
+
+    # force leading ID and Parent tags
+    my @all_tags =  grep { !/ID/ && !/Parent/ } $feat->all_tags;
+    unshift @all_tags, 'Parent' if $feat->has_tag('Parent');
+    unshift @all_tags, 'ID' if $feat->has_tag('ID');
+
+    for my $tag ( @all_tags ) {
+        next if $tag eq 'Target';
+	my $valuestr;	# a string which will hold one or more values 
+                        # for this tag, with quoted free text and 
+                        # space-separated individual values.
+	my @v;
+	for my $value ( $feat->each_tag_value($tag) ) {	    
+	    if(  defined $value && length($value) ) { 
+		#$value =~ tr/ /+/;  #spaces are allowed now
+
+		if ($value =~ /[^a-zA-Z0-9\,\;\=\.:\%\^\*\$\@\!\+\_\?\-]/) {
+		    $value =~ s/\t/\\t/g;	# substitute tab and newline 
+                                                # characters
+		    $value =~ s/\n/\\n/g;	# to their UNIX equivalents
+
+# Unescaped quotes are not allowed in GFF3
+#		    $value = '"' . $value . '"';
+		}
+		$value =~ s/([\t\n\r%&\=;,])/sprintf("%%%X",ord($1))/ge;
+	    } else {
+		# if it is completely empty, 
+		# then just make empty double 
+		# quotes
+		$value = '""';
+	    }
+	    push @v, $value;
+	}
+	$tag= lcfirst($tag) unless ($tag 
+          =~ /
+     ^ID|Name|Alias|Parent|Gap|Target|Derives_from|Note|Dbxref|Ontology_term$
+             /);
+
+	push @groups, "$tag=".join(",", at v);
+    }
+# Add Target information for Feature Pairs
+    if( $feat->has_tag('Target') && 
+	! $feat->has_tag('Group') &&
+	$origfeat->isa('Bio::SeqFeature::FeaturePair') ) {
+
+        my $target_id = $origfeat->feature1->seq_id;
+        $target_id =~ s/([\t\n\r%&\=;,])/sprintf("%%%X",ord($1))/ge;    
+     
+	push @groups, sprintf("Target=%s %d %d", 
+			      $target_id,
+			      ( $origfeat->feature1->strand < 0 ? 
+				( $origfeat->feature1->end,
+				  $origfeat->feature1->start) :
+				( $origfeat->feature1->start,
+				  $origfeat->feature1->end) 
+			      ));
+    }
+    
+# unshift @groups, "ID=autogenerated$ID" unless ($feat->has_tag('ID'));
+    
+    my $gff_string = "";
+    if ($feat->location->isa("Bio::Location::SplitLocationI")) {
+	my @locs = $feat->location->each_Location;
+	foreach my $loc (@locs) {
+	    $gff_string .= join("\t",
+				$name,
+				$feat->source_tag() || '.',
+				$feat->primary_tag(),
+				$loc->start(),
+				$loc->end(),
+				$score,
+				$strand,
+				$frame,
+				join(';', @groups)) . "\n";
+	}
+	chop $gff_string;
+	return $gff_string;
+    } else {
+	$gff_string = join("\t",
+			   $name,
+			   $feat->source_tag() || '.',
+			   $feat->primary_tag(),
+			   $feat->start(),
+			   $feat->end(),
+			   $score,
+			   $strand,
+			   $frame, 
+			   join(';', @groups));
+    }
+    return $gff_string;
+}
+
+=head2 gff_version
+
+  Title   : _gff_version
+  Usage   : $gffversion = $gffio->gff_version
+  Function: 
+  Example :
+  Returns : The GFF version this parser will accept and emit.
+  Args    : none
+
+=cut
+
+sub gff_version {
+    my ($self, $value) = @_;
+    if(defined $value && grep {$value == $_ } ( 1, 2, 2.5, 3)) {
+	$self->{'GFF_VERSION'} = $value;
+    }
+    return $self->{'GFF_VERSION'};
+}
+
+# Make filehandles
+
+=head2 newFh
+
+ Title   : newFh
+ Usage   : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
+ Function: does a new() followed by an fh()
+ Example : $fh = Bio::Tools::GFF->newFh(-file=>$filename,-format=>'Format')
+           $feature = <$fh>;            # read a feature object
+           print $fh $feature;          # write a feature object
+ Returns : filehandle tied to the Bio::Tools::GFF class
+ Args    :
+
+
+=cut
+
+sub newFh {
+    my $class = shift;
+    return unless my $self = $class->new(@_);
+    return $self->fh;
+}
+
+=head2 fh
+
+ Title   : fh
+ Usage   : $obj->fh
+ Function:
+ Example : $fh = $obj->fh;      # make a tied filehandle
+           $feature = <$fh>;    # read a feature object
+           print $fh $feature;  # write a feature object
+ Returns : filehandle tied to Bio::Tools::GFF class
+ Args    : none
+
+
+=cut
+
+
+sub fh {
+    my $self = shift;
+    my $class = ref($self) || $self;
+    my $s = Symbol::gensym;
+    tie $$s,$class,$self;
+    return $s;
+}
+
+# This accessor is used for accessing the Bio::Seq objects from a GFF3
+# file; if the file you are using has no sequence data you can ignore
+# this accessor
+
+# This accessor returns a hash reference containing Bio::Seq objects,
+# indexed by Bio::Seq->primary_id
+
+sub _seq_by_id_h {
+    my $self = shift;
+
+    return $self->{'_seq_by_id_h'} = shift if @_;
+    $self->{'_seq_by_id_h'} = {}
+    unless $self->{'_seq_by_id_h'};
+    return $self->{'_seq_by_id_h'};
+}
+
+=head2 get_seqs
+
+ Title   : get_seqs
+ Usage   :
+ Function: Returns all Bio::Seq objects populated by GFF3 file
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub get_seqs {
+    my ($self, at args) = @_;
+    return values %{$self->_seq_by_id_h};
+}
+
+=head2 features_attached_to_seqs
+
+ Title   : features_attached_to_seqs
+ Usage   : $obj->features_attached_to_seqs(1);
+ Function: For use with GFF3 containg sequence only
+
+    Setting this B<before> parsing ensures that all Bio::Seq object
+    created will have the appropriate features added to them
+
+    defaults to false (off)
+
+    Note that this mode will incur higher memory usage because features
+    will have to be cached until the relevant feature comes along
+
+ Example : 
+ Returns : value of features_attached_to_seqs (a boolean)
+ Args    : on set, new value (a boolean, optional)
+
+
+=cut
+
+sub features_attached_to_seqs{
+    my $self = shift;
+
+    return $self->{'_features_attached_to_seqs'} = shift if @_;
+    return $self->{'_features_attached_to_seqs'};
+}
+
+=head2 ignore_sequence
+
+ Title   : ignore_sequence
+ Usage   : $obj->ignore_sequence(1);
+ Function: For use with GFF3 containg sequence only
+
+    Setting this B<before> parsing means that all sequence data will be
+    ignored
+
+ Example : 
+ Returns : value of ignore_sequence (a boolean)
+ Args    : on set, new value (a boolean, optional)
+
+=cut
+
+sub ignore_sequence{
+    my $self = shift;
+
+    return $self->{'_ignore_sequence'} = shift if @_;
+    return $self->{'_ignore_sequence'};
+}
+
+
+sub DESTROY {
+    my $self = shift;
+    $self->close();
+}
+
+sub TIEHANDLE {
+    my ($class,$val) = @_;
+    return bless {'gffio' => $val}, $class;
+}
+
+sub READLINE {
+    my $self = shift;
+    return $self->{'gffio'}->next_feature() unless wantarray;
+    my (@list, $obj);
+    push @list, $obj while $obj = $self->{'gffio'}->next_feature();
+    return @list;
+}
+
+sub PRINT {
+    my $self = shift;
+    $self->{'gffio'}->write_feature(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Gel.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Gel.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Gel.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+# $Id: Gel.pm,v 1.11.4.1 2006/10/02 23:10:32 sendu Exp $
+# 
+# BioPerl module for Bio::Tools::Gel
+# Copyright Allen Day <allenday at ucla.edu>
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Gel - Calculates relative electrophoretic migration distances
+
+=head1 SYNOPSIS
+
+    use Bio::PrimarySeq;
+    use Bio::Tools::RestrictionAnalysis;
+    use Bio::Tools::Gel;
+
+    # get a sequence
+    my $d = 'AAAAAAAAAGAATTCTTTTTTTTTTTTTTGAATTCGGGGGGGGGGGGGGGGGGGG';
+    my $seq1 = Bio::Seq->new(-id=>'groundhog day',-seq=>$d);
+
+    # cut it with an enzyme
+    my $ra=Bio::Restriction::Analysis->new(-seq=>$seq1);
+    @cuts = $ra->fragments('EcoRI'), 3;
+
+    # analyse the fragments in a gel
+    my $gel = Bio::Tools::Gel->new(-seq=>\@cuts,-dilate=>10);
+    my %bands = $gel->bands;
+    foreach my $band (sort {$b <=> $a} keys %bands){
+      print $band,"\t", sprintf("%.1f", $bands{$band}),"\n";
+    }
+
+    #prints:
+    #20   27.0
+    #25   26.0
+    #10   30.0
+
+
+=head1 DESCRIPTION
+
+This takes a set of sequences or Bio::Seq objects, and calculates their
+respective migration distances using:
+    distance = dilation * (4 - log10(length(dna));
+
+Source: Molecular Cloning, a Laboratory Manual. Sambrook, Fritsch, Maniatis. 
+CSHL Press, 1989.
+
+Bio::Tools::Gel currently calculates migration distances based solely on
+the length of the nucleotide sequence.  Secondary or tertiary structure, 
+curvature, and other biophysical attributes of a sequence are currently 
+not considered.  Polypeptide migration is currently not supported.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Allen Day
+
+Email allenday at ucla.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Gel;
+use strict;
+
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $gel = new Bio::Tools::Gel(-seq => $sequence,-dilate => 3);
+ Function: Initializes a new Gel
+ Returns : Bio::Tools::Gel
+ Args    : -seq      => Bio::Seq(s), scalar(s) or list of either/both 
+                        (default: none)
+           -dilate   => Expand band migration distances (default: 1)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($seqs,$dilate) = $self->_rearrange([qw(SEQ DILATE)],
+					  @args);
+  if( ! ref($seqs)  ) {
+      $self->add_band([$seqs]);
+  } elsif( ref($seqs) =~ /array/i ||
+	   $seqs->isa('Bio::PrimarySeqI') ) {
+      $self->add_band($seqs);
+  } 
+  $self->dilate($dilate || 1);
+  
+  return $self;
+}
+
+
+=head2 add_band
+
+ Title   : add_band
+ Usage   : $gel->add_band($seq);
+ Function: Calls _add_band with a (possibly created) Bio::Seq object.
+ Returns : 
+ Args    : Bio::Seq, scalar sequence, or list of either/both.
+
+=cut
+
+sub add_band {
+  my($self,$args) = @_;
+
+  foreach my $arg (@$args){
+      my $seq;
+      if( ! ref($arg) ) {
+	  if( $arg =~ /^\d+/ ) {
+	      $seq= Bio::PrimarySeq->new(-seq=>"N"x$arg, -id => $arg);
+	  } else {
+	      $seq= Bio::PrimarySeq->new(-seq=>$arg,-id=>length($arg));
+	  }
+      } elsif( $arg->isa('Bio::PrimarySeqI') ) {
+	  $seq = $arg;
+      } 
+
+    $seq->validate_seq or $seq->throw("invalid symbol in sequence".$seq->seq()."\n");
+    $self->_add_band($seq);
+  }
+}
+
+=head2 _add_band
+
+ Title   : _add_band
+ Usage   : $gel->_add_band($seq);
+ Function: Adds a new band to the gel.
+ Returns : 
+ Args    : Bio::Seq object
+
+=cut
+
+sub _add_band {
+  my($self,$arg) = @_;  
+  if( defined $arg) {
+      push (@{$self->{'bands'}},$arg);
+  }
+}
+
+=head2 dilate
+
+ Title   : dilate
+ Usage   : $gel->dilate(1);
+ Function: Sets/retrieves the dilation factor.
+ Returns : dilation factor 
+ Args    : Float or none
+
+=cut
+
+sub dilate {
+  my($self,$arg) = @_;
+  return $self->{dilate} unless $arg;
+  $self->throw("-dilate should be numeric") if defined $arg and $arg =~ /[^e\d\.]/;
+  $self->{dilate} = $arg;
+  return $self->{dilate};
+}
+
+sub migrate {
+  my ($self,$arg) = @_;
+  $arg = $self unless $arg;
+  if ( $arg ) {
+      return 4 - log10($arg);
+  } else { return 0; }
+}
+
+=head2 bands
+
+ Title   : bands
+ Usage   : $gel->bands;
+ Function: Calculates migration distances of sequences.
+ Returns : hash of (seq_id => distance)
+ Args    : 
+
+=cut
+
+sub bands {
+  my $self = shift;
+  $self->throw("bands() is read-only") if @_;
+
+  my %bands = ();
+  
+  foreach my $band (@{$self->{bands}}){
+      my $distance = $self->dilate * migrate($band->length);
+      $bands{$band->id} = $distance;
+  }
+
+  return %bands;
+}
+
+=head2 log10
+
+ Title   : log10
+ Usage   : log10($n);
+ Function: returns base 10 log of $n.
+ Returns : float
+ Args    : float
+
+=cut
+
+#from programming perl
+sub log10 {
+    my $n = shift;
+    return log($n)/log(10);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Geneid.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Geneid.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Geneid.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,324 @@
+# $Id: Geneid.pm,v 1.5.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# Cared for by Keith James
+#
+# Copyright Genome Research Ltd.
+#
+# You may distribute this module under the same terms as Perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Geneid - Results of one geneid run
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Geneid;
+  my $gid = Bio::Tools::Geneid(-file => "geneid.out");
+
+  while (my $gene = $gid->next_prediction)
+  {
+    my @transcripts = $gene->transcripts;
+      foreach my $t (@transcripts)
+      {
+        my @exons = $t->exons;
+        foreach my $e (@exons)
+        {
+          printf("Exon %d..%d\n", $e->start, $e->end);
+        }
+      }
+  }
+
+=head1 DESCRIPTION
+
+This is the parser for the output of geneid by Enrique Blanco and
+Roderic Guigó (IMIM-UPF). See http://www1.imim.es/software/geneid. It
+relies on native geneid output format internally and will work with
+geneid versions 1.0 and 1.1. Currently this module supports only the
+default mode of operation which is to predict exons and assemble an
+optimal gene prediction.
+
+It takes either a file handle or a file name and returns a
+Bio::SeqFeature::Gene::GeneStructure object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Keith James
+
+ Email: kdj at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Geneid;
+
+use vars qw($SOURCE_TAG);
+use strict;
+
+use Bio::Tools::AnalysisResult;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::GeneStructure;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+$SOURCE_TAG = 'geneid';
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj->new(-file = "<geneid.out");
+           $obj->new(-fh => \*GI);
+ Function: Constructor for geneid wrapper. Takes either a file
+         : or filehandle
+ Returns : L<Bio::Tools::Geneid>
+
+=cut
+
+sub new
+{
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->_initialize_io(@args);
+    return $self;
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $geneid->next_prediction)
+           {
+               # do something
+           }
+ Function: Returns the gene structure prediction of the geneid result
+           file. Call this method repeatedly until FALSE is returned.
+ Returns : A Bio::SeqFeature::Gene::GeneStructure object
+ Args    : None
+
+=cut
+
+sub next_prediction
+{
+    my ($self) = @_;
+
+    my ($gene, $transcript, $current_gene_id);
+    my $transcript_score = 0;
+
+    my ($gene_id, $exon_type, $exon_start, $exon_end, $exon_score,
+        $exon_strand, $start_phase, $end_phase, $start_sig_score,
+        $end_sig_score, $coding_pot_score, $homol_score);
+
+    while (defined($_ = $self->_readline))
+    {
+        $self->debug($_) if ($self->verbose > 0);
+
+        s/^\s+//;
+        s/\s+$//;
+
+        # We have a choice of geneid, gff or XML formats. The native
+        # geneid format has more information than gff. However, we
+        # then need to perform the hack of extracting the sequence ID
+        # from the header of the embedded Fasta file which comes after
+        # the exon data, as it is not stored elsewhere. Ack.
+        if (/^>(\S+)\|GeneId/)
+        {
+            my $target_id = $1;
+            $self->_target_id($target_id) unless defined $self->_target_id;
+            next;
+        }
+
+        next unless (/(Single|First|Internal|Terminal)/);
+
+        my @fields = split(/\s+/, $_);
+
+        # Grab gene_id from eol first as there are issues with
+        # inconsistent whitespace in the AA coords field
+        $gene_id = pop @fields;
+
+        ($exon_type, $exon_start, $exon_end, $exon_score,
+         $exon_strand, $start_phase, $end_phase, $start_sig_score,
+         $end_sig_score, $coding_pot_score, $homol_score) = @fields[0..10];
+
+        if (! defined $current_gene_id)
+        {
+            # Starting the requested prediction
+            $current_gene_id = $gene_id;
+            $transcript_score = $exon_score;
+
+            $gene = Bio::SeqFeature::Gene::GeneStructure->new(-source =>
+                                                              $SOURCE_TAG);
+            $transcript = Bio::SeqFeature::Gene::Transcript->new(-source =>
+                                                                 $SOURCE_TAG);
+
+            $self->_add_exon($gene, $transcript, $exon_type, $exon_start, $exon_end, $exon_score,
+                             $exon_strand, $start_phase, $end_phase, $start_sig_score,
+                             $end_sig_score, $coding_pot_score, $homol_score);
+        }
+        elsif ($gene_id eq $current_gene_id)
+        {
+            # Still in requested prediction
+            $transcript_score += $exon_score;
+
+            $self->_add_exon($gene, $transcript, $exon_type, $exon_start, $exon_end, $exon_score,
+                             $exon_strand, $start_phase, $end_phase, $start_sig_score,
+                             $end_sig_score, $coding_pot_score, $homol_score);
+        }
+        else
+        {
+            # Found following prediction
+            $self->_pushback($_);
+            last;
+        }
+    }
+
+    if (defined $gene)
+    {
+        $transcript->seq_id($self->_target_id);
+        $transcript->score($transcript_score);
+        $gene->add_transcript($transcript);
+        $gene->seq_id($self->_target_id);
+
+        foreach my $exon ($gene->exons)
+        {
+            $exon->seq_id($self->_target_id);
+        }
+
+        $self->_set_strand($gene);
+    }
+
+    return $gene;
+}
+
+=head2 _add_exon
+
+ Title   : _add_exon
+ Usage   : $obj->_add_exon($gene, $transcript, ... exon data ...)
+ Function: Adds a new exon to both gene and transcript from the data
+         : supplied as args
+ Example :
+ Returns : Nothing
+
+=cut
+
+sub _add_exon
+{
+    my ($self, $gene, $transcript, $exon_type, $exon_start, $exon_end,
+        $exon_score, $exon_strand, $start_phase, $end_phase, $start_sig_score,
+        $end_sig_score, $coding_pot_score, $homol_score) = @_;
+
+    $exon_type =~ s/First/Initial/;
+
+    my $strand = $exon_strand eq '+' ? 1 : -1;
+
+    my $exon = Bio::SeqFeature::Gene::Exon->new(-source => $SOURCE_TAG,
+                                                -start  => $exon_start,
+                                                -end    => $exon_end,
+                                                -strand => $strand,
+                                                -score  => $exon_score);
+    $exon->is_coding(1);
+    $exon->add_tag_value("Type", $exon_type);
+    $exon->add_tag_value('phase', $start_phase);
+    $exon->add_tag_value('end_phase', $end_phase);
+    $exon->add_tag_value('start_signal_score', $start_sig_score);
+    $exon->add_tag_value('end_signal_score', $end_sig_score);
+    $exon->add_tag_value('coding_potential_score', $coding_pot_score);
+    $exon->add_tag_value('homology_score', $homol_score);
+
+    $transcript->strand($strand) unless $transcript->strand != 0;
+    $transcript->add_exon($exon, $exon_type);
+}
+
+=head2 _set_strand
+
+ Title   : _set_strand
+ Usage   : $obj->_set_strand($gene)
+ Function: Sets the overall gene strand to the same strand as all
+         : the exons if they are all on the same strand, or to strand 0
+         : if the exons are on different strands.
+ Example :
+ Returns : Nothing
+
+=cut
+
+sub _set_strand
+{
+    my ($self, $gene) = @_;
+
+    my $fwd = 0;
+    my $rev = 0;
+
+    my @exons = $gene->exons;
+    foreach my $exon (@exons)
+    {
+        my $strand = $exon->strand;
+
+        if ($strand == 1)
+        {
+            $fwd++;
+        }
+        elsif ($strand == -1)
+        {
+            $rev++;
+        }
+    }
+
+    if ($#exons == $fwd)
+    {
+        $gene->strand(1);
+    }
+    elsif ($#exons == $rev)
+    {
+        $gene->strand(-1);
+    }
+    else
+    {
+        $gene->strand(0);
+    }
+
+    return $gene;
+}
+
+=head2 _target_id
+
+ Title   : _target_id
+ Usage   : $obj->_target_id
+ Function: get/set for genomic sequence id
+ Example :
+ Returns : A target ID
+
+=cut
+
+sub _target_id
+{
+    my ($self,$val) = @_;
+    if ($val)
+    {
+        $self->{'_target_id'} = $val;
+    }
+
+    return $self->{'_target_id'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genemark.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genemark.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genemark.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,512 @@
+# $Id: Genemark.pm,v 1.17.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Genemark
+#
+# Cared for by Mark Fiers <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp, Mark Fiers
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Genemark - Results of one Genemark run
+
+=head1 SYNOPSIS
+
+   $Genemark = Bio::Tools::Genemark->new(-file => 'result.Genemark');
+   # filehandle:
+   $Genemark = Bio::Tools::Genemark->new( -fh  => \*INPUT );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $Genemark->next_feature() is the same
+   while($gene = $Genemark->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene, which inherits
+       # off Bio::SeqFeature::Gene::Transcript.
+       #
+       # $gene->exons() returns an array of
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # initial exons only
+       @init_exons = $gene->exons('Initial');
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # terminal exons only
+       @term_exons = $gene->exons('Terminal');
+       # singleton exons:
+       ($single_exon) = $gene->exons();
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $Genemark->close();
+
+=head1 DESCRIPTION
+
+The Genemark module provides a parser for Genemark gene structure
+prediction output. It parses one gene prediction into a
+Bio::SeqFeature::Gene::Transcript- derived object.
+
+This module has been developed around genemark.hmm for eukaryots v2.2a
+and will probably not work with other versions.
+
+
+This module also implements the Bio::SeqAnalysisParserI interface, and
+thus can be used wherever such an object fits. See
+L<Bio::SeqAnalysisParserI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp, Mark Fiers
+
+Email hlapp at gmx.net
+      m.w.e.j.fiers at plant.wag-ur.nl
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Genemark;
+use strict;
+use Symbol;
+
+use Bio::Root::Root;
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+use Bio::Seq;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my ($self, at args) = @_;
+
+    # first call the inherited method!
+    $self->SUPER::_initialize_state(@args);
+
+    # our private state variables
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+    # seq stack
+    $self->{'_seqstack'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $Genemark->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /GeneMark.hmm/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method {
+#-------------
+    my ($self, $method) = @_;
+    if($method && ($method !~ /Genemark\.hmm/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $Genemark->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genemark result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $Genemark->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genemark result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # get next gene structure
+    $gene = $self->_prediction();
+
+    return $gene;
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns :
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my %exontags = ('Initial' => 'Initial',
+		    'Internal' => 'Internal',
+		    'Terminal' => 'Terminal',
+		    'Single' => '',
+		    '_na_' => '');
+    my $exontag;
+    my $gene;
+    my $seqname;
+    my $exontype;
+    my $current_gene_no = -1;
+
+    while(defined($_ = $self->_readline())) {
+
+	if( (/^\s*(\d+)\s+(\d+)/) || (/^\s*(\d+)\s+[\+\-]/)) {
+
+	    #  this is an exon, Genemark doesn't predict anything else
+	    # $prednr corresponds to geneno.
+	    my $prednr = $1;
+
+	    #exon no:
+	    my $signalnr = 0;
+	    if ($2) { my $signalnr = $2; } # used in tag: exon_no
+	
+	    # split into fields
+	    chomp();
+	    my @flds = split(' ', $_);
+
+	    # create the feature (an exon) object
+	    my $predobj = Bio::Tools::Prediction::Exon->new();
+
+		
+	    # define info depending on it being eu- or prokaryot
+	    my ($start, $end, $orientation, $prediction_source);
+
+	    if ($self->analysis_method() =~ /PROKARYOTIC/i) {
+	        $prediction_source = "Genemark.hmm.pro";
+	       	$orientation = ($flds[1] eq '+') ? 1 : -1;
+	        ($start, $end) = @flds[(2,3)];
+		$exontag = "_na_";
+
+	    } else {		
+	        $prediction_source = "Genemark.hmm.eu";
+	       	$orientation = ($flds[2] eq '+') ? 1 : -1;
+	        ($start, $end) = @flds[(4,5)];
+		$exontag = $flds[3];
+	    }
+
+	    #store the data in the exon object
+            $predobj->source_tag($prediction_source);
+	    $predobj->start($start);		
+	    $predobj->end($end);
+	    $predobj->strand($orientation);
+
+	    $predobj->primary_tag($exontags{$exontag} . "Exon");
+
+	    $predobj->add_tag_value('exon_no',"$signalnr") if ($signalnr);
+
+    	    $predobj->is_coding(1);
+		
+		
+	    # frame calculation as in the genscan module
+	    # is to be implemented...
+	
+	    #If the $prednr is not equal to the current gene, we
+	    #need to make a new gene and close the old one
+	    if($prednr != $current_gene_no) {
+ 	        # a new gene, store the old one if it exists
+		if (defined ($gene)) {
+		    $gene->seq_id($seqname);
+		    $gene = undef ;
+		}
+		#and make a new one
+		$gene = Bio::Tools::Prediction::Gene->new
+		    (
+		     '-primary' => "GenePrediction$prednr",
+		     '-source' => $prediction_source);
+                $self->_add_prediction($gene);		
+		$current_gene_no = $prednr;
+	    }
+	
+	    # Add the exon to the gene
+	    $gene->add_exon($predobj, ($exontag eq "_na_" ?
+				       undef : $exontags{$exontag}));
+
+	}
+
+	if(/^(Genemark\.hmm\s*[PROKARYOTIC]*)\s+\(Version (.*)\)$/i) {
+	    $self->analysis_method($1);
+
+	    my $gm_version = $2;
+
+	    $self->analysis_method_version($gm_version);
+	    next;
+	}
+
+       #Matrix file for eukaryot version
+       if (/^Matrices file:\s+(\S+)?/i)  {
+	    $self->analysis_subject($1);
+	    # since the line after the matrix file is always the date
+	    # (in the output file's I have seen!) extract and store this
+	    # here
+	     if (defined(my $_date = $self->_readline())) {
+	         chomp ($_date);
+	     	 $self->analysis_date($_date);
+	     }
+	}			
+	
+        #Matrix file for prokaryot version
+       if (/^Model file name:\s+(\S+)/) {
+	    $self->analysis_subject($1);
+	    # since the line after the matrix file is always the date
+	    # (in the output file's I have seen!) extract and store this
+	    # here
+	    my $_date = $self->_readline() ;
+	    if (defined($_date = $self->_readline())) {
+	         chomp ($_date);
+	     	 $self->analysis_date($_date);
+	     }
+	}
+	
+	if(/^Sequence[ file]? name:\s+(.+)\s*$/i) {
+	    $seqname = $1;
+	    #    $self->analysis_subject($seqname);
+	    next;
+	}
+	
+
+	/^>/ && do {		
+    	    $self->_pushback($_);
+
+	    # section of predicted aa sequences on recognition
+	    # of a fasta start, read all sequences and find the
+	    # appropriate gene
+            while (1) {
+	       my ($aa_id, $seq) = $self->_read_fasta_seq();
+	       last unless ($aa_id);
+
+	       #now parse through the predictions to add the pred. protein
+	       FINDPRED: foreach my $gene (@{$self->{'_preds'}}) {
+	            $gene->primary_tag() =~ /[^0-9]([0-9]+)$/;
+		    my $geneno = $1;
+		    if ($aa_id =~ /\|gene.$geneno\|/) {
+		          #print "x SEQ : \n $seq \nXXXX\n";
+  			  my $seqobj = Bio::Seq->new('-seq' => $seq,
+	                     		             '-display_id' => $aa_id,
+					              '-alphabet' => "protein");
+			$gene->predicted_protein($seqobj);
+			last FINDPRED;
+		    }	
+
+	       }
+           }				
+
+ 	   last;
+	};
+    }
+
+    # if the analysis query object contains a ref to a Seq of PrimarySeq
+    # object, then extract the predicted sequences and add it to the gene
+    # object.
+    if (defined $self->analysis_query()) {
+        my $orig_seq = $self->analysis_query();
+        FINDPREDSEQ: foreach my $gene (@{$self->{'_preds'}}) {
+	   my $predseq = "";
+	   foreach my $exon ($gene->exons()) {
+		#print $exon->start() . " " . $exon->end () . "\n";
+		$predseq .= $orig_seq->subseq($exon->start(), $exon->end());
+	   }
+
+	   my $seqobj = Bio::PrimarySeq->new('-seq' => $predseq,
+	                     		     '-display_id' => "transl");
+	   $gene->predicted_cds($seqobj);
+	}
+    }
+
+
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns :
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns :
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+=head2 _has_cds
+
+ Title   : _has_cds()
+ Usage   : $obj->_has_cds()
+ Function: Whether or not the result contains the predicted CDSs, too.
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _has_cds {
+    my ($self, $val) = @_;
+
+    $self->{'_has_cds'} = $val if $val;
+    if(! exists($self->{'_has_cds'})) {
+	$self->{'_has_cds'} = 0;
+    }
+    return $self->{'_has_cds'};
+}
+
+=head2 _read_fasta_seq
+
+ Title   : _read_fasta_seq()
+ Usage   : ($id,$seqstr) = $obj->_read_fasta_seq();
+ Function: Simple but specialised FASTA format sequence reader. Uses
+           $self->_readline() to retrieve input, and is able to strip off
+           the traling description lines.
+ Example :
+ Returns : An array of two elements.
+
+=cut
+
+sub _read_fasta_seq {
+    my ($self) = @_;
+    my ($id, $seq);
+    local $/ = ">";
+
+    return 0 unless (my $entry = $self->_readline());
+
+    $entry =~ s/^>//;
+    # complete the entry if the first line came from a pushback buffer
+    while(! ($entry =~ />$/)) {
+	last unless ($_ = $self->_readline());
+	$entry .= $_;
+    }
+
+    # delete everything onwards from an new fasta start (>)
+    $entry =~ s/\n>.*$//s;
+    # id and sequence
+
+    if($entry =~ s/^(.+)\n//) {
+	$id = $1;
+	$id =~ s/ /_/g;
+	$seq = $entry;
+	$seq =~ s/\s//g;	
+	#print "\n@@ $id \n@@ $seq \n##\n";
+    } else {
+	$self->throw("Can't parse Genemark predicted sequence entry");
+    }
+    $seq =~ s/\s//g; # Remove whitespace
+    return ($id, $seq);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genewise.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genewise.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genewise.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,335 @@
+# $Id: Genewise.pm,v 1.25.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Genewise
+#
+# Copyright Fugu Team 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Genewise - Results of one Genewise run
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Genewise;
+  my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
+
+  while (my $gene = $gw->next_prediction){
+    my @transcripts = $gene->transcripts;
+      foreach my $t(@transcripts){
+        my @exons =  $t->exons;
+        foreach my $e(@exons){
+            print $e->start." ".$e->end."\n";
+        }
+      }
+  }
+
+=head1 DESCRIPTION
+
+This is the parser for the output of Genewise. It takes either a file
+handle or a file name and returns a 
+Bio::SeqFeature::Gene::GeneStructure object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Fugu Team, Jason Stajich 
+
+ Email: fugui at worf.fugu-sg.org
+ Email: jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Genewise;
+use vars qw($Srctag);
+use strict;
+use Symbol;
+
+use Bio::Tools::AnalysisResult;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::GeneStructure;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+$Srctag = 'genewise';
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj->new(-file=>"genewise.out");
+           $obj->new(-fh=>\*GW);
+ Function: Constructor for genewise wrapper. Takes either a file or filehandle
+ Example :
+ Returns : Bio::Tools::Genewise object
+
+See L<Bio::Tools::Genewise>
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+  return $self;
+}
+
+=head2 _get_strand
+
+ Title   : _get_strand
+ Usage   : $obj->_get_strand
+ Function: takes start and end values, swap them if start>end and 
+           returns end
+ Example :
+ Returns :$start,$end,$strand
+
+=cut
+
+sub _get_strand {
+  my ($self,$start,$end) = @_;
+  defined($start) || $self->throw("Need a start");
+  defined($end)   || $self->throw("Need an end");
+  my $strand;
+  if ($start > $end) {
+    my $tmp = $start;
+    $start = $end;
+    $end = $tmp;
+    $strand = -1;
+  }
+  else {
+    $strand = 1;
+  }
+  return ($start,$end,$strand);
+}
+
+=head2 _score
+
+ Title   : _score
+ Usage   : $obj->_score
+ Function: get/set for score info
+ Returns : a score value
+
+=cut
+
+sub _score {
+    my $self = shift;
+    return $self->{'_score'} = shift if @_;
+    return $self->{'_score'};
+}
+
+=head2 _prot_id
+
+ Title   : _prot_id
+ Usage   : $obj->_prot_id
+ Function: get/set for protein id 
+ Returns :a protein id
+
+=cut
+
+sub _prot_id {
+    my $self = shift;
+    return $self->{'_prot_id'} = shift if @_;
+    return $self->{'_prot_id'};
+}
+
+=head2 _target_id
+
+ Title   : _target_id
+ Usage   : $obj->_target_id
+ Function: get/set for genomic sequence id
+ Example :
+ Returns :a target id
+
+=cut
+
+sub _target_id {
+    my $self = shift;
+    return $self->{'_target_id'} = shift if @_;
+    return $self->{'_target_id'};
+}
+
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $genewise->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the gene structure prediction of the Genewise result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : a Bio::SeqFeature::Gene::GeneStructure object
+ Args    :
+
+See L<Bio::SeqFeature::Gene::GeneStructure>
+
+=cut
+
+
+sub next_prediction {
+    my ($self) = @_;
+
+    unless ( $self->parsed ){
+	$self->_parse_genes;
+	$self->parsed(1);
+    }
+    return shift @{$self->{'_genes'}};
+}
+
+sub parsed {
+    my $self = shift;
+    return $self->{'_parsed'} = 1 if @_ && $_[0]; 
+    return $self->{'_parsed'};
+}
+  
+sub _parse_genes {
+	my ($self) = @_;
+	my @genes;
+	local ($/) = "//";
+
+	while ( defined($_ = $self->_readline) ) {
+		$self->debug( $_ ) if( $self->verbose > 0);
+		if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
+	      $self->_score($1);# unless defined $self->_score;    
+      } 
+      if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
+	      $self->_prot_id($1); #unless defined $self->_prot_id;
+	   } 
+	
+     if( /Target Sequence\s+(\S+)/ ) {	
+	    $self->_target_id($1);# unless defined $self->_target_id;
+	  }
+     next unless /Gene\s+\d+\n/;
+
+     my @genes_txt = split(/Gene\s+\d+\n/,$_);
+     shift @genes_txt; #remove first empty entry
+       
+     foreach my $gene_txt (@genes_txt) {
+	    # If genewise has assigned a strand to the gene as a whole
+	    # overall gene start and end
+	    my ($g_start, $g_end, $type) = 
+			$gene_txt =~ m/Gene\s+
+								(\d+)[\s-]+    # start (1-based)
+								(\d+)\s+       # end
+								(?:\[(\w+)\])? # 
+								/x;
+	    my $g_strand;
+	    my $source_tag = $type ? "$Srctag". "_$type" : $Srctag;
+	    my $genes = new Bio::SeqFeature::Gene::GeneStructure
+		 (-source => $source_tag);
+	    my $transcript = new Bio::SeqFeature::Gene::Transcript
+		 (-source => $source_tag,
+		 -score  => $self->_score);
+	    ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
+	    $genes->strand($g_strand);
+
+	    # grab exon + supporting feature info
+	    my @exons;
+	    unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
+	 	    @exons = $gene_txt =~ m/(Exon .+\s+)/g;
+	    }
+	    my $nbr = 1;
+	    # loop through each exon-supporting feature pair
+	    foreach my $e (@exons){
+		   my ($e_start,$e_end,$phase) = 
+                 $e =~ m/Exon\s+
+			                (\d+)[\s-]+     # start (1 based)
+				             (\d+)\s+        # end
+				             phase\s+(\d+)   # phase
+				             /x;
+		my $e_strand;
+		($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
+								 $e_end);
+		$transcript->strand($e_strand) unless $transcript->strand != 0;
+		my $exon = new Bio::SeqFeature::Gene::Exon
+		    (-seq_id =>$self->_target_id,
+		     -source => $source_tag,
+		     -start  =>$e_start, 
+		     -end    =>$e_end, 
+		     -score  => $self->_score,
+		     #-frame => $phase,
+		     -strand =>$e_strand);
+
+		$exon->add_tag_value('phase',$phase);
+		$exon->is_coding(1);
+		if( $self->_prot_id ) {
+		    $exon->add_tag_value('Target',"Protein:".$self->_prot_id);
+		}
+		$exon->add_tag_value("Exon",$nbr++);
+		if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
+		    my ($geno_start,$geno_end,
+			$prot_start, $prot_end) = ($1,$2,$3,$4);
+		    my $prot_strand;
+		    ($prot_start,$prot_end,
+		     $prot_strand) = $self->_get_strand($prot_start,$prot_end);
+		    my $pf = new Bio::SeqFeature::Generic
+			( -start   => $prot_start, 
+			  -end     => $prot_end,
+			  -seq_id  => $self->_prot_id,
+			  -score   => $self->_score,
+			  -strand  => $prot_strand,
+			  -source  => $source_tag,
+			  -primary_tag => 'supporting_protein_feature',);
+		    my $geno_strand;
+		    ($geno_start,$geno_end,
+		     $geno_strand) = $self->_get_strand($geno_start,$geno_end);
+		    my $gf = new Bio::SeqFeature::Generic 
+			( -start   => $geno_start,
+			  -end     => $geno_end,
+			  -seq_id  => $self->_target_id,
+			  -score   => $self->_score,
+			  -strand  => $geno_strand,
+			  -source  => $source_tag,
+			  -primary_tag => 'supporting_genomic_feature',);
+		    my $fp = new Bio::SeqFeature::FeaturePair
+			(-feature1 =>$gf,
+			 -feature2 =>$pf);
+		    $exon->add_tag_value( 'supporting_feature',$fp );
+		    if( $self->_prot_id ) {
+			$exon->add_tag_value('Target',$prot_start);
+			$exon->add_tag_value('Target',$prot_end);
+		    }
+		}
+		$transcript->add_exon($exon);
+	    }
+	    $transcript->seq_id($self->_target_id);
+	    $transcript->add_tag_value('Id', $self->_prot_id);
+	    $genes->add_transcript($transcript);
+	    $genes->seq_id($self->_target_id);
+	    push @genes, $genes;
+	}
+    }
+    $self->{'_genes'} = \@genes;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genomewise.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genomewise.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genomewise.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,219 @@
+# $Id: Genomewise.pm,v 1.6.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Genomewise
+#
+# Copyright Jason Stajich <jason-at-bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Genomewise - Results of one Genomewise run
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Genomewise;
+  my $gw = Bio::Tools::Genomewise(-file=>"genomewise.out");
+
+  while (my $gene = $gw->next_prediction){
+      my @transcripts = $gw->transcripts;
+      foreach my $t(@transcripts){
+        my @exons =  $t->exons;
+        foreach my $e(@exons){
+            print $e->start." ".$e->end."\n";
+        }
+      }
+  }
+
+=head1 DESCRIPTION
+
+This is the parser for the output of Genewise. It takes either a file
+handle or a file name and returns a
+Bio::SeqFeature::Gene::GeneStructure object.  You will need to specify
+the proper target sequence id on the object with the
+$feature-E<gt>seq_id($seqid).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Fugu Team, Jason Stajich 
+
+ Email: fugui-at-worf.fugu-sg.org
+        jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Genomewise;
+use vars qw($Srctag);
+use strict;
+
+use Bio::Tools::AnalysisResult;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::GeneStructure;
+
+use base qw(Bio::Tools::Genewise);
+
+$Srctag = 'genomewise';
+
+=head2 new
+
+ Title   : new
+ Usage   : $obj->new(-file=>"genewise.out");
+           $obj->new(-fh=>\*GW);
+ Function: Constructor for genomewise wrapper. Takes either a file or filehandle
+ Example :
+ Returns : L<Bio::Tools::Genomewise>
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  return $self;
+}
+
+=head2 _get_strand
+
+ Title   : _get_strand
+ Usage   : $obj->_get_strand
+ Function: takes start and end values, swap them if start>end and returns end
+ Example :
+ Returns :$start,$end,$strand
+
+=cut
+
+=head2 score
+
+ Title   : score
+ Usage   : $obj->score
+ Function: get/set for score info
+ Example :
+ Returns : a score value
+
+=cut
+
+=head2 _prot_id
+
+ Title   : _prot_id
+ Usage   : $obj->_prot_id
+ Function: get/set for protein id 
+ Example :
+ Returns :a protein id
+
+=cut
+
+=head2 _target_id
+
+ Title   : _target_id
+ Usage   : $obj->_target_id
+ Function: get/set for genomic sequence id
+ Example :
+ Returns :a target id
+
+=cut
+
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $genewise->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the gene structure prediction of the Genomewise result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : a Bio::SeqFeature::Gene::GeneStructure object
+ Args    :
+
+=cut
+
+
+sub next_prediction {
+    my ($self) = @_;
+
+    my $genes;
+    while ($_ = $self->_readline) {
+	$self->debug( $_ ) if( $self->verbose > 0);
+	last if m{^//};
+
+	if( /^Gene\s+\d+\s*$/ ) {
+	    $genes = new Bio::SeqFeature::Gene::GeneStructure
+		(-source => $Srctag,
+		 -seq_id => $self->_target_id, # if this had been specified
+		 );
+	    $_ = $self->_readline;
+	    $self->debug( $_ ) if( $self->verbose > 0);
+
+	    unless ( /^Gene\s+(\d+)\s+(\d+)\s*$/ ) {
+		$self->warn("Unparseable genomewise output");
+		last;
+	    }
+	    my $transcript = new Bio::SeqFeature::Gene::Transcript
+		(-source => $Srctag,
+		 -seq_id => $self->_target_id, # if this had been specified
+		 -start  => $1,
+		 -end    => $2,
+		 );
+	    my $nbr = 1;
+	    while( $_ = $self->_readline ) {    
+		$self->debug( $_ ) if( $self->verbose > 0);
+
+		unless( m/^\s+Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/ ){
+		    $self->_pushback($_);
+		    last;
+		}
+		my ($e_start,$e_end,$phase,$e_strand) = ($1,$2,$3);
+		
+		($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
+								 $e_end);
+		$transcript->strand($e_strand) unless $transcript->strand != 0;
+		
+		my $exon = new Bio::SeqFeature::Gene::Exon 
+		    (-seq_id=>$self->_target_id,
+		     -source => $Srctag,
+		     -start=>$e_start, 
+		     -end=>$e_end, 
+		     -frame => $phase,
+		     -strand=>$e_strand);
+		$exon->add_tag_value("Exon",$nbr++);
+		$exon->add_tag_value('phase',$phase);
+		$transcript->add_exon($exon);
+	    }
+	    $genes->add_transcript($transcript);
+	    last; # only process a single gene at a time
+	}
+    }
+    return $genes;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genscan.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genscan.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Genscan.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,489 @@
+# $Id: Genscan.pm,v 1.29.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Genscan
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Genscan - Results of one Genscan run
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::Genscan;
+
+   $genscan = Bio::Tools::Genscan->new(-file => 'result.genscan');
+   # filehandle:
+   $genscan = Bio::Tools::Genscan->new( -fh  => \*INPUT );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
+   while($gene = $genscan->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene, which inherits
+       # off Bio::SeqFeature::Gene::Transcript.
+       #
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # initial exons only
+       @init_exons = $gene->exons('Initial');
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # terminal exons only
+       @term_exons = $gene->exons('Terminal');
+       # singleton exons: 
+       ($single_exon) = $gene->exons();
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $genscan->close();
+
+=head1 DESCRIPTION
+
+The Genscan module provides a parser for Genscan gene structure prediction
+output. It parses one gene prediction into a Bio::SeqFeature::Gene::Transcript-
+derived object.
+
+This module also implements the Bio::SeqAnalysisParserI interface, and thus
+can be used wherever such an object fits. See L<Bio::SeqAnalysisParserI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp at gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Genscan;
+use strict;
+use Symbol;
+
+use Bio::Root::Root;
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+my %ExonTags = ('Init' => 'Initial',
+		'Intr' => 'Internal',
+		'Term' => 'Terminal',
+		'Sngl' => '');
+    
+sub _initialize_state {
+    my ($self, at args) = @_;
+    
+    # first call the inherited method!
+    $self->SUPER::_initialize_state(@args);
+
+    # our private state variables
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+    # seq stack
+    $self->{'_seqstack'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $genscan->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /genscan/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /genscan/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $genscan->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genscan result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $genscan->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genscan result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # get next gene structure
+    $gene = $self->_prediction();
+
+    if($gene) {
+	# fill in predicted protein, and if available the predicted CDS
+	#
+	my ($id, $seq);
+	# use the seq stack if there's a seq on it
+	my $seqobj = pop(@{$self->{'_seqstack'}});
+	if(! $seqobj) {
+	    # otherwise read from input stream
+	    ($id, $seq) = $self->_read_fasta_seq();
+	    # there may be no sequence at all, or none any more
+	    if($id && $seq) {
+		$seqobj = Bio::PrimarySeq->new('-seq' => $seq,
+					       '-display_id' => $id,
+					       '-alphabet' => "protein");
+	    }
+	}
+	if($seqobj) {
+	    # check that prediction number matches the prediction number
+	    # indicated in the sequence id (there may be incomplete gene
+	    # predictions that contain only signals with no associated protein
+	    # and CDS, like promoters, poly-A sites etc)
+	    $gene->primary_tag() =~ /[^0-9]([0-9]+)$/;
+	    my $prednr = $1;
+	    if($seqobj->display_id() !~ /_predicted_\w+_$prednr\|/) {
+		# this is not our sequence, so push back for next prediction
+		push(@{$self->{'_seqstack'}}, $seqobj);
+	    } else {
+		$gene->predicted_protein($seqobj);
+		# CDS prediction, too?
+		if($self->_has_cds()) {
+		    ($id, $seq) = $self->_read_fasta_seq();
+		    $seqobj = Bio::PrimarySeq->new('-seq' => $seq,
+						   '-display_id' => $id,
+						   '-alphabet' => "dna");
+		    $gene->predicted_cds($seqobj);
+		}
+	    }
+	}
+    }
+
+    return $gene;
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my $gene;
+    my $seqname;
+
+    while(defined($_ = $self->_readline())) {
+	if(/^\s*(\d+)\.(\d+)/) {
+	    # exon or signal
+	    my $prednr = $1;
+	    my $signalnr = $2; # not used presently
+	    if(! defined($gene)) {
+		$gene = Bio::Tools::Prediction::Gene->new(
+                                       '-primary' => "GenePrediction$prednr",
+				       '-source' => 'Genscan');
+	    }
+	    # split into fields
+	    chomp();
+	    my @flds = split(' ', $_);
+	    # create the feature object depending on the type of signal
+	    my $predobj;
+	    my $is_exon = grep {$_ eq $flds[1];} (keys(%ExonTags));
+	    if($is_exon) {
+		$predobj = Bio::Tools::Prediction::Exon->new();
+	    } else {
+		# PolyA site, or Promoter
+		$predobj = Bio::SeqFeature::Generic->new();
+	    }
+	    # set common fields
+	    $predobj->source_tag('Genscan');
+	    $predobj->score($flds[$#flds]);
+	    $predobj->strand((($flds[2] eq '+') ? 1 : -1));
+	    my ($start, $end) = @flds[(3,4)];
+	    if($predobj->strand() == 1) {
+		$predobj->start($start);
+		$predobj->end($end);
+	    } else {
+		$predobj->end($start);
+		$predobj->start($end);
+	    }
+	    # add to gene structure (should be done only when start and end
+	    # are set, in order to allow for proper expansion of the range)
+	    if($is_exon) {
+		# first, set fields unique to exons
+		$predobj->start_signal_score($flds[8]);
+		$predobj->end_signal_score($flds[9]);
+		$predobj->coding_signal_score($flds[10]);
+		$predobj->significance($flds[11]);
+		$predobj->primary_tag($ExonTags{$flds[1]} . 'Exon');
+		$predobj->is_coding(1);
+		# Figure out the frame of this exon. This is NOT the frame
+		# given by Genscan, which is the absolute frame of the base
+		# starting the first predicted complete codon. By comparing
+		# to the absolute frame of the first base we can compute the
+		# offset of the first complete codon to the first base of the
+		# exon, which determines the frame of the exon.
+		my $cod_offset;
+		if($predobj->strand() == 1) {
+		    $cod_offset = $flds[6] - (($predobj->start()-1) % 3);
+		    # Possible values are -2, -1, 0, 1, 2. -1 and -2 correspond
+		    # to offsets 2 and 1, resp. Offset 3 is the same as 0.
+		    $cod_offset += 3 if($cod_offset < 1);		    
+		} else {
+		    # On the reverse strand the Genscan frame also refers to
+		    # the first base of the first complete codon, but viewed
+		    # from forward, which is the third base viewed from
+		    # reverse.
+		    $cod_offset = $flds[6] - (($predobj->end()-3) % 3);
+		    # Possible values are -2, -1, 0, 1, 2. Due to the reverse
+		    # situation, {2,-1} and {1,-2} correspond to offsets
+		    # 1 and 2, resp. Offset 3 is the same as 0.
+		    $cod_offset -= 3 if($cod_offset >= 0);
+		    $cod_offset = -$cod_offset;
+		}
+		# Offsets 2 and 1 correspond to frame 1 and 2 (frame of exon
+		# is the frame of the first base relative to the exon, or the
+		# number of bases the first codon is missing).
+		$predobj->frame(3 - $cod_offset);
+		# then add to gene structure object
+		$gene->add_exon($predobj, $ExonTags{$flds[1]});		
+	    } elsif($flds[1] eq 'PlyA') {
+		$predobj->primary_tag("PolyAsite");
+		$gene->poly_A_site($predobj);
+	    } elsif($flds[1] eq 'Prom') {
+		$predobj->primary_tag("Promoter");
+		$gene->add_promoter($predobj);
+	    }
+	    next;
+	}
+	if(/^\s*$/ && defined($gene)) {
+	    # current gene is completed
+	    $gene->seq_id($seqname);
+	    $self->_add_prediction($gene);
+	    $gene = undef;
+	    next;
+	}
+	if(/^(GENSCAN)\s+(\S+)/) {
+	    $self->analysis_method($1);
+	    $self->analysis_method_version($2);
+	    next;
+	}
+	if(/^Sequence\s+(\S+)\s*:/) {
+	    $seqname = $1;
+	    next;
+	}
+        
+	if(/^Parameter matrix:\s+(\S+)/i) {
+	    $self->analysis_subject($1);
+	   next;
+	}
+	
+	if(/^Predicted coding/) {
+	    $self->_has_cds(1);
+	    next;
+	}
+	/^>/ && do {
+	    # section of predicted sequences
+	    $self->_pushback($_);
+	    last;
+	};
+    }
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+=head2 _has_cds
+
+ Title   : _has_cds()
+ Usage   : $obj->_has_cds()
+ Function: Whether or not the result contains the predicted CDSs, too.
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _has_cds {
+    my ($self, $val) = @_;
+
+    $self->{'_has_cds'} = $val if $val;
+    if(! exists($self->{'_has_cds'})) {
+	$self->{'_has_cds'} = 0;
+    }
+    return $self->{'_has_cds'};
+}
+
+=head2 _read_fasta_seq
+
+ Title   : _read_fasta_seq()
+ Usage   : ($id,$seqstr) = $obj->_read_fasta_seq();
+ Function: Simple but specialised FASTA format sequence reader. Uses
+           $self->_readline() to retrieve input, and is able to strip off
+           the traling description lines.
+ Example :
+ Returns : An array of two elements.
+
+=cut
+
+sub _read_fasta_seq {
+    my ($self) = @_;
+    my ($id, $seq);
+    local $/ = ">";
+    
+    my $entry = $self->_readline();
+    if($entry) {
+	$entry =~ s/^>//;
+	# complete the entry if the first line came from a pushback buffer
+	while($entry !~ />$/) {
+	    last unless $_ = $self->_readline();
+	    $entry .= $_;
+	}
+	# delete everything onwards from an intervening empty line (at the
+	# end there might be statistics stuff)
+	$entry =~ s/\n\n.*$//s;
+	# id and sequence
+	if($entry =~ /^(\S+)\n([^>]+)/) {
+	    $id = $1;
+	    $seq = $2;
+	} else {
+	    $self->throw("Can't parse Genscan predicted sequence entry");
+	}
+	$seq =~ s/\s//g; # Remove whitespace
+    }
+    return ($id, $seq);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Glimmer.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Glimmer.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Glimmer.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,343 @@
+# $Id: Glimmer.pm,v 1.4.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Glimmer
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Glimmer - parser for GlimmerM/GlimmerHMM eukaryotic gene predictions
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::Glimmer;
+
+   my $parser = new Bio::Tools::Glimmer(-file => $file);
+   # filehandle:
+   $parser = Bio::Tools::Glimmer->new( -fh  => \*INPUT );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $glimmer->next_feature() is the same
+
+   while(my $gene = $parser->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene, which inherits
+       # off Bio::SeqFeature::Gene::Transcript.
+       #
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # initial exons only
+       @init_exons = $gene->exons('Initial');
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # terminal exons only
+       @term_exons = $gene->exons('Terminal');
+   }
+
+=head1 DESCRIPTION
+
+This is a module for parsing GlimmerM and GlimmerHMM predictions 
+It will create gene objects from the prediction report which can 
+be attached to a sequence using Bioperl objects, or output as GFF 
+suitable for loading into Bio::DB::GFF for use with Gbrowse.
+
+GlimmerM is open source and available at 
+L<http://www.tigr.org/software/glimmerm/>.
+
+GlimmerHMM is open source and available at
+L<http://www.cbcb.umd.edu/software/GlimmerHMM/>.
+
+=head1 BUGS
+
+This module does B<not> parse Glimmer2 or Glimmer3 bacterial gene
+prediction files. Details on their output formats can be found at
+L<http://www.cbcb.umd.edu/software/glimmer/>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Torsten Seemann
+
+Mark Johnson
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Glimmer;
+use strict;
+
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # first call the inherited method!
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    $self->{'_preds_parsed'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Glimmer();
+ Function: Builds a new Bio::Tools::Glimmer object 
+ Returns : an instance of Bio::Tools::Glimmer
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  return $self;
+}
+
+=head2 analysis_method
+
+ Usage     : $glimmer->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /glimmer/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /glimmer/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $glimmer->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Glimmer result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $glimmer->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Glimmer result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # get next gene structure
+    $gene = $self->_prediction();
+    return $gene;
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+
+    my ($gene,$seqname,$seqlen,$source,$lastgenenum);
+    
+    while(defined($_ = $self->_readline())) {
+	if( /^(Glimmer\S*)\s+\(Version\s*(\S+)\)/ ) {
+	    $source = "$1_$2";
+	    next;
+	} elsif( /^(Glimmer\S*)$/ ) { # GlimmerHMM has no version
+	    $source = $1;
+	    next;
+	} elsif(/^Sequence name:\s+(.+)$/ ) {
+	    $seqname = $1;
+	    next;
+	} elsif( /^Sequence length:\s+(\S+)/ ) {
+	    $seqlen = $1;
+	    next;
+	} elsif( m/^(Predicted genes)|(Gene)|\s+\#/ || /^\s+$/ ) { 
+	    next;
+	    
+	} elsif( # GlimmerM/HMM gene-exon prediction line
+		 /^\s+(\d+)\s+ # gene num
+		 (\d+)\s+      # exon num
+		 ([\+\-])\s+   # strand
+		 (\S+)\s+      # exon type
+		 (\d+)\s+(\d+) # exon start, end
+		 \s+(\d+)      # exon length		 
+		 /ox ) {
+	    my ($genenum,$exonnum,$strand,$type,$start,$end,$len) = 
+		( $1,$2,$3,$4,$5,$6,$7);
+	    if( ! $lastgenenum || $lastgenenum != $genenum) {		
+		$self->_add_prediction($gene) if ( $gene );
+		$gene = Bio::Tools::Prediction::Gene->new
+		    (
+		     '-seq_id'      => $seqname,
+		     '-primary_tag' => "gene",
+		     '-source_tag'  => $source,
+		     '-tag'         => { 'Group' => "GenePrediction$genenum"},
+		     );
+	    }
+	    my $exon = new Bio::Tools::Prediction::Exon
+		('-seq_id'     => $seqname,
+		 '-start'      => $start,
+		 '-end'        => $end,
+		 '-strand'     => $strand eq '-' ? '-1' : '1',
+		 '-source_tag' => $source,
+		 '-primary_tag'=> 'exon',
+		 '-tag'         => { 'Group' => "GenePrediction$genenum"},
+		 );
+	    $gene->add_exon($exon,lc($type));
+	    $lastgenenum = $genenum;
+	}
+    }
+    $self->_add_prediction($gene) if( $gene );
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Grail.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Grail.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Grail.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,256 @@
+# $Id: Grail.pm,v 1.11.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Grail
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Grail - Results of one Grail run
+
+=head1 SYNOPSIS
+
+   $grail = Bio::Tools::Grail->new(-file => 'result.grail');
+   # filehandle:
+   $grail = Bio::Tools::Grail->new( -fh  => \*INPUT );
+
+   # parse the results
+   while($gene = $grail->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene
+
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # initial exons only
+       @init_exons = $gene->exons('Initial');
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # terminal exons only
+       @term_exons = $gene->exons('Terminal');
+       # singleton exons only -- should be same as $gene->exons() because
+       # there are no other exons supposed to exist in this structure
+       @single_exons = $gene->exons('Single');
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $genscan->close();
+
+=head1 DESCRIPTION
+
+The Grail module provides a parser for Grail gene structure prediction
+output.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::Grail;
+use strict;
+
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+use Symbol;
+
+use base qw(Bio::Root::IO Bio::Root::Root);
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+
+  return $self;
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $grail->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Grail result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    
+    # get next gene structure
+    my $gene = $self->_prediction();
+
+    if($gene) {
+	# fill in predicted protein, and if available the predicted CDS
+	#
+	my ($id, $seq);
+	# use the seq stack if there's a seq on it
+	my $seqobj = pop(@{$self->{'_seqstack'}});
+	if(! $seqobj) {
+	    # otherwise read from input stream
+	    ($id, $seq) = $self->_read_fasta_seq();
+	    $seqobj = Bio::PrimarySeq->new('-seq' => $seq,
+					   '-display_id' => $id,
+					   '-alphabet' => "protein");
+	}
+	# check that prediction number matches the prediction number
+	# indicated in the sequence id (there may be incomplete gene
+	# predictions that contain only signals with no associated protein
+	# and CDS, like promoters, poly-A sites etc)
+	$gene->primary_tag() =~ /[^0-9]([0-9]+)$/;
+	my $prednr = $1;
+	if($seqobj->display_id() !~ /_predicted_\w+_$prednr\|/) {
+	    # this is not our sequence, so push back for the next prediction
+	    push(@{$self->{'_seqstack'}}, $seqobj);
+	} else {
+	    $gene->predicted_protein($seqobj);
+	    # CDS prediction, too?
+	    if($self->_has_cds()) {
+		($id, $seq) = $self->_read_fasta_seq();
+		$seqobj = Bio::PrimarySeq->new('-seq' => $seq,
+					       '-display_id' => $id,
+					       '-alphabet' => "dna");
+		$gene->predicted_cds($seqobj);
+	    }
+	}
+    }
+    return $gene;
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+
+    # code needs to go here
+    
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+=head2 _has_cds
+
+ Title   : _has_cds()
+ Usage   : $obj->_has_cds()
+ Function: Whether or not the result contains the predicted CDSs, too.
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _has_cds {
+    my ($self, $val) = @_;
+
+    $self->{'_has_cds'} = $val if $val;
+    if(! exists($self->{'_has_cds'})) {
+	$self->{'_has_cds'} = 0;
+    }
+    return $self->{'_has_cds'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GuessSeqFormat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GuessSeqFormat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/GuessSeqFormat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,935 @@
+# $Id: GuessSeqFormat.pm,v 1.15.4.1 2006/10/02 23:10:32 sendu Exp $
+#------------------------------------------------------------------
+#
+# BioPerl module Bio::Tools::GuessSeqFormat
+#
+# Cared for by Andreas Kähäri, andreas.kahari at ebi.ac.uk
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Tools::GuessSeqFormat - Module for determining the sequence
+format of the contents of a file, a string, or through a
+filehandle.
+
+=head1 SYNOPSIS
+
+    # To guess the format of a flat file, given a filename:
+    my $guesser = new Bio::Tools::GuessSeqFormat( -file => $filename );
+    my $format  = $guesser->guess;
+
+    # To guess the format from an already open filehandle:
+    my $guesser = new Bio::Tools::GuessSeqFormat( -fh => $filehandle );
+    my $format  = $guesser->guess;
+    # If the filehandle is seekable (STDIN isn't), it will be
+    # returned to its original position.
+
+    # To guess the format of one or several lines of text (with
+    # embedded newlines):
+    my $guesser = new Bio::Tools::GuessSeqFormat( -text => $linesoftext );
+    my $format = $guesser->guess;
+
+    # To create a Bio::Tools::GuessSeqFormat object and set the
+    # filename, filehandle, or line to parse afterwards:
+    my $guesser = new Bio::Tools::GuessSeqFormat;
+    $guesser->file($filename);
+    $guesser->fh($filehandle);
+    $guesser->text($linesoftext);
+
+    # To guess in one go, given e.g. a filename:
+    my $format = new Bio::Tools::GuessSeqFormat( -file => $filename )->guess;
+
+=head1 DESCRIPTION
+
+Bio::Tools::GuessSeqFormat tries to guess the format ("swiss",
+"pir", "fasta" etc.) of the sequence or MSA in a file, in a
+scalar, or through a filehandle.
+
+The guess() method of a Bio::Tools::GuessSeqFormat object will
+examine the data, line by line, until it finds a line to which
+only one format can be assigned.  If no conclusive guess can be
+made, undef is returned.
+
+If the Bio::Tools::GuessSeqFormat object is given a filehandle
+which is seekable, it will be restored to its original position
+on return from the guess() method.
+
+=head2 Formats
+
+Tests are currently implemented for the following formats:
+
+=over
+
+=item *
+
+ACeDB ("ace")
+
+=item *
+
+Blast ("blast")
+
+=item *
+
+ClustalW ("clustalw")
+
+=item *
+
+Codata ("codata")
+
+=item *
+
+EMBL ("embl")
+
+=item *
+
+FastA sequence ("fasta")
+
+=item *
+
+FastXY/FastA alignment ("fastxy")
+
+=item *
+
+Game XML ("game")
+
+=item *
+
+GCG ("gcg")
+
+=item *
+
+GCG Blast ("gcgblast")
+
+=item *
+
+GCG FastA ("gcgfasta")
+
+=item *
+
+GDE ("gde")
+
+=item *
+
+Genbank ("genbank")
+
+=item *
+
+Genscan ("genscan")
+
+=item *
+
+GFF ("gff")
+
+=item *
+
+HMMER ("hmmer")
+
+=item *
+
+PAUP/NEXUS ("nexus")
+
+=item *
+
+Phrap assembly file ("phrap")
+
+=item *
+
+NBRF/PIR ("pir")
+
+=item *
+
+Mase ("mase")
+
+=item *
+
+Mega ("mega")
+
+=item *
+
+GCG/MSF ("msf")
+
+=item *
+
+Pfam ("pfam")
+
+=item *
+
+Phylip ("phylip")
+
+=item *
+
+Prodom ("prodom")
+
+=item *
+
+Raw ("raw")
+
+=item *
+
+RSF ("rsf")
+
+=item *
+
+Selex ("selex")
+
+=item *
+
+Stockholm ("stockholm")
+
+=item *
+
+Swissprot ("swiss")
+
+=item *
+
+Tab ("tab")
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and
+other Bioperl modules.  Send your comments and suggestions
+preferably to one of the Bioperl mailing lists.  Your
+participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us
+keep track the bugs and their resolution.  Bug reports can be
+submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Andreas Kähäri, andreas.kahari at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Heikki Lehväslaiho, heikki-at-bioperl-dot-org
+
+=cut
+
+
+package Bio::Tools::GuessSeqFormat;
+
+use strict;
+use warnings;
+
+
+use base qw(Bio::Root::Root);
+
+=head1 METHODS
+
+Methods available to Bio::Tools::GuessSeqFormat objects
+are described below.  Methods with names beginning with an
+underscore are considered to be internal.
+
+=cut
+
+=head2 new
+
+ Title      : new
+ Usage      : $guesser = new Bio::Tools::GuessSeqFormat( ... );
+ Function   : Creates a new object.
+ Example    : See SYNOPSIS.
+ Returns    : A new object.
+ Arguments  : -file The filename of the file whose format is to
+                    be guessed, or
+              -fh   An already opened filehandle from which a text
+                    stream may be read, or
+              -text A scalar containing one or several lines of
+                    text with embedded newlines.
+
+    If more than one of the above arguments are given, they
+    are tested in the order -text, -file, -fh, and the first
+    available argument will be used.
+
+=cut
+
+sub new
+{
+    my $class = shift;
+    my @args  = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my $attr;
+    my $value;
+
+    while (@args) {
+        $attr = shift @args;
+        $attr = lc $attr;
+        $value = shift @args;
+        $self->{$attr} = $value;
+    }
+
+    return $self;
+}
+
+=head2 file
+
+ Title      : file
+ Usage      : $guesser->file($filename);
+              $filename = $guesser->file;
+ Function   : Gets or sets the current filename associated with
+              an object.
+ Returns    : The new filename.
+ Arguments  : The filename of the file whose format is to be
+              guessed.
+
+    A call to this method will clear the current filehandle and
+    the current lines of text associated with the object.
+
+=cut
+
+sub file
+{
+    # Sets and/or returns the filename to use.
+    my $self = shift;
+    my $file = shift;
+
+    if (defined $file) {
+        # Set the active filename, and clear the filehandle and
+        # text line, if present.
+        $self->{-file} = $file;
+        $self->{-fh} = $self->{-text} = undef;
+    }
+
+    return $self->{-file};
+}
+
+=head2 fh
+
+ Title      : fh
+ Usage      : $guesser->fh($filehandle);
+              $filehandle = $guesser->fh;
+ Function   : Gets or sets the current filehandle associated with
+              an object.
+ Returns    : The new filehandle.
+ Arguments  : An already opened filehandle from which a text
+              stream may be read.
+
+    A call to this method will clear the current filename and
+    the current lines of text associated with the object.
+
+=cut
+
+sub fh
+{
+    # Sets and/or returns the filehandle to use.
+    my $self = shift;
+    my $fh = shift;
+
+    if (defined $fh) {
+        # Set the active filehandle, and clear the filename and
+        # text line, if present.
+        $self->{-fh} = $fh;
+        $self->{-file} = $self->{-text} = undef;
+    }
+
+    return $self->{-fh};
+}
+
+
+=head2 text
+
+ Title      : text
+ Usage      : $guesser->text($linesoftext);
+              $linesofext = $guesser->text;
+ Function   : Gets or sets the current text associated with an
+              object.
+ Returns    : The new lines of texts.
+ Arguments  : A scalar containing one or several lines of text,
+              including embedded newlines.
+
+    A call to this method will clear the current filename and
+    the current filehandle associated with the object.
+
+=cut
+
+sub text
+{
+    # Sets and/or returns the text lines to use.
+    my $self = shift;
+    my $text = shift;
+
+    if (defined $text) {
+        # Set the active text lines, and clear the filehandle
+        # and filename, if present.
+        $self->{-text} = $text;
+        $self->{-fh} = $self->{-file} = undef;
+    }
+
+    return $self->{-text};
+}
+
+=head2 guess
+
+ Title      : guess
+ Usage      : $format = $guesser->guess;
+              @format = $guesser->guess; # if given a line of text
+ Function   : Guesses the format of the data accociated with the
+              object.
+ Returns    : A format string such as "swiss" or "pir".  If a
+              format can not be found, undef is returned.
+ Arguments  : None.
+
+    If the object is associated with a filehandle and if that
+    filehandle is searchable, the position of the filehandle
+    will be returned to its original position before the method
+    returns.
+
+=cut
+
+our %formats = (
+    ace         => { test => \&_possibly_ace        },
+    blast       => { test => \&_possibly_blast      },
+    clustalw    => { test => \&_possibly_clustalw   },
+    codata      => { test => \&_possibly_codata     },
+    embl        => { test => \&_possibly_embl       },
+    fasta       => { test => \&_possibly_fasta      },
+    fastxy      => { test => \&_possibly_fastxy     },
+    game        => { test => \&_possibly_game       },
+    gcg         => { test => \&_possibly_gcg        },
+    gcgblast    => { test => \&_possibly_gcgblast   },
+    gcgfasta    => { test => \&_possibly_gcgfasta   },
+    gde         => { test => \&_possibly_gde        },
+    genbank     => { test => \&_possibly_genbank    },
+    genscan     => { test => \&_possibly_genscan    },
+    gff         => { test => \&_possibly_gff        },
+    hmmer       => { test => \&_possibly_hmmer      },
+    nexus       => { test => \&_possibly_nexus      },
+    mase        => { test => \&_possibly_mase       },
+    mega        => { test => \&_possibly_mega       },
+    msf         => { test => \&_possibly_msf        },
+    phrap       => { test => \&_possibly_phrap      },
+    pir         => { test => \&_possibly_pir        },
+    pfam        => { test => \&_possibly_pfam       },
+    phylip      => { test => \&_possibly_phylip     },
+    prodom      => { test => \&_possibly_prodom     },
+    raw         => { test => \&_possibly_raw        },
+    rsf         => { test => \&_possibly_rsf        },
+    selex       => { test => \&_possibly_selex      },
+    stockholm   => { test => \&_possibly_stockholm  },
+    swiss       => { test => \&_possibly_swiss      },
+    tab         => { test => \&_possibly_tab        }
+);
+
+sub guess
+{
+    my $self = shift;
+
+    foreach my $fmt_key (keys %formats) {
+        $formats{$fmt_key}{fmt_string} = $fmt_key;
+    }
+
+    my $fh;
+    my $start_pos;
+    my @lines;
+    if (defined $self->{-text}) {
+	# Break the text into separate lines.
+	@lines = split /\n/, $self->{-text};
+    } elsif (defined $self->{-file}) {
+        # If given a filename, open the file.
+        open($fh, $self->{-file}) or
+            $self->throw("Can not open '$self->{-file}' for reading: $!");
+    } elsif (defined $self->{-fh}) {
+        # If given a filehandle, figure out if it's a plain GLOB
+        # or a IO::Handle which is seekable.  In the case of a
+        # GLOB, we'll assume it's seekable.  Get the current
+        # position in the stream.
+        $fh = $self->{-fh};
+        if (ref $fh eq 'GLOB') {
+            $start_pos = tell($fh);
+        } elsif (UNIVERSAL::isa($fh, 'IO::Seekable')) {
+            $start_pos = $fh->getpos();
+        }
+    }
+
+    my $done  = 0;
+    my $lineno = 0;
+    my $fmt_string;
+    while (!$done) {
+        my $line;       # The next line of the file.
+        my $match = 0;  # Number of possible formats of this line.
+
+	if (defined $self->{-text}) {
+	    last if (scalar @lines == 0);
+	    $line = shift @lines;
+	} else {
+	    last if (!defined($line = <$fh>));
+	}
+        next if ($line =~ /^\s*$/); # Skip white and empty lines.
+
+        chomp($line);
+        $line =~ s/\r$//;   # Fix for DOS files on Unix.
+        ++$lineno;
+
+        while (my ($fmt_key, $fmt) = each (%formats)) {
+            if ($fmt->{test}($line, $lineno)) {
+                ++$match;
+                $fmt_string = $fmt->{fmt_string};
+                # Debugging:
+                #FIXME#printf STDERR "%s: %s\n", $fmt_string, $line;
+                    #FIXME#if $self->verbose > 0;
+            }
+        }
+
+        # We're done if there was only one match.
+        $done = ($match == 1);
+    }
+
+    if (defined $self->{-file}) {
+        # Close the file we opened.
+        close($fh);
+    } elsif (ref $fh eq 'GLOB') {
+        # Try seeking to the start position.
+        seek($fh, $start_pos, 0);
+    } elsif (defined $fh && $fh->can('setpos')) {
+        # Seek to the start position.
+        $fh->setpos($start_pos);
+    }
+    return ($done ? $fmt_string : undef);
+}
+
+=head1 HELPER SUBROUTINES
+
+All helper subroutines will, given a line of text and the line
+number of the same line, return 1 if the line possibly is from a
+file of the type that they perform a test of.
+
+A zero return value does not mean that the line is not part
+of a certain type of file, just that the test did not find any
+characteristics of that type of file in the line.
+
+=head2 _possibly_ace
+
+From bioperl test data, and from
+"http://www.isrec.isb-sib.ch/DEA/module8/B_Stevenson/Practicals/transcriptome_recon/transcriptome_recon.html".
+
+=cut
+
+sub _possibly_ace
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /^(?:Sequence|Peptide|DNA|Protein) [":]/);
+}
+
+=head2 _possibly_blast
+
+ From various blast results.
+
+=cut
+
+sub _possibly_blast
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 &&
+        $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/);
+}
+
+=head2 _possibly_clustalw
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_clustalw
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /CLUSTAL/);
+}
+
+=head2 _possibly_codata
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_codata
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^ENTRY/) ||
+            ($lineno == 2 && $line =~ /^SEQUENCE/) ||
+            $line =~ m{^(?:ENTRY|SEQUENCE|///)});
+}
+
+=head2 _possibly_embl
+
+From
+"http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3.3".
+
+=cut
+
+sub _possibly_embl
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^ID   / && $line =~ /BP\.$/);
+}
+
+=head2 _possibly_fasta
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_fasta
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno != 1 && $line =~ /^[A-IK-NP-Z]+$/i) ||
+            $line =~ /^>\s*\w/);
+}
+
+=head2 _possibly_fastxy
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_fastxy
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^ FAST(?:XY|A)/) ||
+            ($lineno == 2 && $line =~ /^ version \d/));
+}
+
+=head2 _possibly_game
+
+From bioperl testdata.
+
+=cut
+
+sub _possibly_game
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /^<!DOCTYPE game/);
+}
+
+=head2 _possibly_gcg
+
+From bioperl, Bio::SeqIO::gcg.
+
+=cut
+
+sub _possibly_gcg
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /Length: .*Type: .*Check: .*\.\.$/);
+}
+
+=head2 _possibly_gcgblast
+
+From bioperl testdata.
+
+=cut
+
+sub _possibly_gcgblast
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) ||
+            ($lineno == 2 &&
+             $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/));
+}
+
+=head2 _possibly_gcgfasta
+
+From bioperl testdata.
+
+=cut
+
+sub _possibly_gcgfasta
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) ||
+            ($lineno == 2 && $line =~ /FASTA/));
+}
+
+=head2 _possibly_gde
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_gde
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /^[{}]$/ ||
+            $line =~ /^(?:name|longname|sequence-ID|
+                          creation-date|direction|strandedness|
+                          type|offset|group-ID|creator|descrip|
+                          comment|sequence)/x);
+}
+
+=head2 _possibly_genbank
+
+From "http://www.ebi.ac.uk/help/formats.html".
+Format of [apparantly optional] file header from
+"http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm".
+
+=cut
+
+sub _possibly_genbank
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /GENETIC SEQUENCE DATA BANK/) ||
+            ($lineno == 1 && $line =~ /^LOCUS /) ||
+            ($lineno == 2 && $line =~ /^DEFINITION /) ||
+            ($lineno == 3 && $line =~ /^ACCESSION /));
+}
+
+=head2 _possibly_genscan
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_genscan
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^GENSCAN.*Date.*Time/) ||
+            ($line =~ /^(?:Sequence\s+\w+|Parameter matrix|Predicted genes)/));
+}
+
+=head2 _possibly_gff
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_gff
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^##gff-version/) ||
+            ($lineno == 2 && $line =~ /^##date/));
+}
+
+=head2 _possibly_hmmer
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_hmmer
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 2 && $line =~ /^HMMER/) ||
+            ($lineno == 3 &&
+             $line =~ /Washington University School of Medicine/));
+}
+
+=head2 _possibly_nexus
+
+From "http://paup.csit.fsu.edu/nfiles.html".
+
+=cut
+
+sub _possibly_nexus
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^#NEXUS/);
+}
+
+=head2 _possibly_mase
+
+From bioperl test data.
+More detail from "http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm".
+
+=cut
+
+sub _possibly_mase
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^;;/) ||
+            ($lineno > 1 && $line =~ /^;[^;]?/));
+}
+
+=head2 _possibly_mega
+
+From the ensembl broswer (AlignView data export).
+
+=cut
+
+sub _possibly_mega
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^#mega$/);
+}
+
+
+=head2 _possibly_msf
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_msf
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ m{^//} ||
+            $line =~ /MSF:.*Type:.*Check:|Name:.*Len:/);
+}
+
+=head2 _possibly_phrap
+
+From "http://biodata.ccgb.umn.edu/docs/contigimage.html".
+From "http://genetics.gene.cwru.edu/gene508/Lec6.htm".
+From bioperl test data ("*.ace.1" files).
+
+=cut
+
+sub _possibly_phrap
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /^(?:AS\ |CO\ Contig|BQ|AF\ |BS\ |RD\ |
+                          QA\ |DS\ |RT\{)/x);
+}
+
+=head2 _possibly_pir
+
+From "http://www.ebi.ac.uk/help/formats.html".
+The ".,()" spotted in bioperl test data.
+
+=cut
+
+sub _possibly_pir # "NBRF/PIR" (?)
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno != 1 && $line =~ /^[\sA-IK-NP-Z.,()]+\*?$/i) ||
+            $line =~ /^>(?:P1|F1|DL|DC|RL|RC|N3|N1);/);
+}
+
+=head2 _possibly_pfam
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_pfam
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ m{^\w+/\d+-\d+\s+[A-IK-NP-Z.]+}i);
+}
+
+=head2 _possibly_phylip
+
+From "http://www.ebi.ac.uk/help/formats.html".  Initial space
+allowed on first line (spotted in ensembl AlignView exported
+data).
+
+=cut
+
+sub _possibly_phylip
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^\s*\d+\s\d+/) ||
+            ($lineno == 2 && $line =~ /^\w\s+[A-IK-NP-Z\s]+/) ||
+            ($lineno == 3 && $line =~ /(?:^\w\s+[A-IK-NP-Z\s]+|\s+[A-IK-NP-Z\s]+)/)
+           );
+}
+
+=head2 _possibly_prodom
+
+From "http://prodes.toulouse.inra.fr/prodom/2002.1/documentation/data.php".
+
+=cut
+
+sub _possibly_prodom
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^ID   / && $line =~ /\d+ seq\.$/);
+}
+
+=head2 _possibly_raw
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_raw
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($line =~ /^(?:[sA-IK-NP-Z]+|[sa-ik-np-z]+)$/);
+}
+
+=head2 _possibly_rsf
+
+From "http://www.ebi.ac.uk/help/formats.html".
+
+=cut
+
+sub _possibly_rsf
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^!!RICH_SEQUENCE/) ||
+            $line =~ /^[{}]$/ ||
+            $line =~ /^(?:name|type|longname|
+                          checksum|creation-date|strand|sequence)/x);
+}
+
+=head2 _possibly_selex
+
+From "http://www.ebc.ee/WWW/hmmer2-html/node27.html".
+
+Assuming precense of Selex file header.  Data exported by
+Bioperl on Pfam and Selex formats are identical, but Pfam file
+only holds one alignment.
+
+=cut
+
+sub _possibly_selex
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^#=ID /) ||
+            ($lineno == 2 && $line =~ /^#=AC /) ||
+            ($line =~ /^#=SQ /));
+}
+
+=head2 _possibly_stockholm
+
+From bioperl test data.
+
+=cut
+
+sub _possibly_stockholm
+{
+    my ($line, $lineno) = (shift, shift);
+    return (($lineno == 1 && $line =~ /^# STOCKHOLM/) ||
+            $line =~ /^#=(?:GF|GS) /);
+}
+
+
+
+=head2 _possibly_swiss
+
+From "http://ca.expasy.org/sprot/userman.html#entrystruc".
+
+=cut
+
+sub _possibly_swiss
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^ID   / && $line =~ /AA\.$/);
+}
+
+=head2 _possibly_tab
+
+Contributed by Heikki.
+
+=cut
+
+sub _possibly_tab
+{
+    my ($line, $lineno) = (shift, shift);
+    return ($lineno == 1 && $line =~ /^[^\t]+\t[^\t]+/) ;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMM.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMM.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMM.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,575 @@
+## $Id: HMM.pm,v 1.8.4.2 2006/11/17 09:32:42 sendu Exp $
+
+# BioPerl module for Bio::Tools::HMM
+#
+# Cared for by Yee Man Chan <ymc at yahoo.com>
+#
+# Copyright Yee Man Chan
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::HMM - Perl extension to perform Hidden Markov Model calculations
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::HMM;
+  use Bio::SeqIO;
+  use Bio::Matrix::Scoring;
+
+  # create a HMM object
+  # ACGT are the bases NC mean non-coding and coding
+  $hmm = new Bio::Tools::HMM('-symbols' => "ACGT", '-states' => "NC");
+
+  # initialize some training observation sequences
+  $seq1 = new Bio::SeqIO(-file => $ARGV[0], -format => 'fasta');
+  $seq2 = new Bio::SeqIO(-file => $ARGV[1], -format => 'fasta');
+  @seqs = ($seq1, $seq2);
+
+  # train the HMM with the observation sequences
+  $hmm->baum_welch_training(\@seqs);
+
+  # get parameters
+  $init = $hmm->init_prob; # returns an array reference
+  $matrix1 = $hmm->transition_prob; # returns Bio::Matrix::Scoring
+  $matrix2 = $hmm->emission_prob; # returns Bio::Matrix::Scoring
+
+  # initialize training hidden state sequences
+  $hs1 = "NCNCNNNNNCCCCNNCCCNNNNC";
+  $hs2 = "NCNNCNNNNNNCNCNCNNNCNCN";
+  @hss = ($hs1, $hs2);
+
+  # train the HMM with both observation sequences and hidden state
+  # sequences
+  $hmm->statistical_training(\@seqs, \@hss);
+
+  # with the newly calibrated HMM, we can use viterbi algorithm
+  # to obtain the hidden state sequence underlying an observation 
+  # sequence
+  $hss = $hmm->viterbi($seq); # returns a string of hidden states
+
+=head1 DESCRIPTION
+
+Hidden Markov Model (HMM) was first introduced by Baum and his colleagues
+in a series of classic papers in the late 1960s and 1970s. It was first
+applied to the field of speech recognition with great success in the 1970s.
+
+Explosion in the amount sequencing data in the 1990s opened the field
+of Biological Sequence Analysis. Seeing HMM's effectiveness in detecing
+signals in biological sequences, Krogh, Mian and Haussler used HMM to find
+genes in E. coli DNA in a classical paper in 1994. Since then, there have
+been extensive application of HMM to other area of Biology, for example,
+multiple sequence alignment, CpG island detection and so on.
+
+=head1 DEPENDENCIES
+
+This package comes with the main bioperl distribution. You also need
+to install the lastest bioperl-ext package which contains the XS code
+that implements the algorithms. This package won't work if you haven't
+compiled the bioperl-ext package.
+
+=head1 TO-DO
+
+
+=over 3
+
+=item 1.
+
+Allow people to set and get the tolerance level in the EM algorithm.
+
+=item 2.
+
+Allow people to set and get the maximum number of iterations 
+to run in the EM algorithm.
+
+=item 3.
+
+A function to calculate the probability of an observation sequence
+
+=item 4.
+
+A function to do posterior decoding, ie to find the probabilty of
+seeing a certain observation symbol at position i.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+        This implementation was written by Yee Man Chan (ymc at yahoo.com).
+        Copyright (c) 2005 Yee Man Chan. All rights reserved. This program
+        is free software; you can redistribute it and/or modify it under
+        the same terms as Perl itself. All the code are written by Yee
+        Man Chan without borrowing any code from anywhere.
+
+=cut
+
+package Bio::Tools::HMM;
+
+use base qw(Bio::Root::Root);
+
+BEGIN {
+    eval {
+        require Bio::Ext::HMM;
+    };
+    if ( $@ ) {
+        die("\nThe C-compiled engine for Hidden Markov Model (HMM) has not been installed.\n Please read the install the bioperl-ext package\n\n");
+        exit(1);
+    }
+}
+
+sub new {
+   my ($class, @args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   my ($symbols, $states, $init, $a_mat, $e_mat) = $self->_rearrange([qw(SYMBOLS
+								STATES
+                                                                INIT
+                                                                AMAT
+                                                                EMAT
+							)], @args);
+
+   $self->throw("Observation Symbols are not defined!") unless defined $symbols; 
+   $self->throw("Hidden States are not defined!") unless defined $states; 
+
+   if (defined $symbols) {
+      if (scalar($symbols)) {
+         # check duplicate symbols
+         if ($self->symbols($symbols) < 0) {
+            $self->throw("Duplicate symbols!\n");
+         }
+      }
+      else {
+         $self->throw("We don't support list of symbols in this version.\n");
+      }
+   }
+
+   if (defined $states) {
+      if (scalar($states)) {
+         # check duplicate states
+         if ($self->states($states) < 0) {
+            $self->throw("Duplicate states!\n");
+         }
+      }
+      else {
+         $self->throw("We don't support list of states in this version.\n");
+      }
+   }
+
+   $self->{'hmm'} = new Bio::Ext::HMM::HMM($symbols, $states);
+   return $self;
+}
+
+=head2 likelihood
+
+ Title   : likelihood
+ Usage   : $prob = $hmm->likelihood($seq)
+ Function: Calculate the probability of an observation sequence given an HMM
+ Returns : An floating point number that is the logarithm of the probability
+           of an observation sequence given an HMM
+ Args    : The only argument is a string that is the observation sequence
+           you are interested in. Note that the sequence must not contain
+           any character that is not in the alphabet of observation symbols.
+
+=cut
+
+sub likelihood {
+   my ($self, $seq) = @_;
+   my $valid_symbols;
+
+   if( ! defined $seq) {
+      $self->warn("Cannot calculate without supply an observation sequence!");
+      return;
+   }
+   my $s = $self->{'symbols'};
+   $_ = $seq;
+   $valid_symbols = eval "tr/$s//;"; 
+   if ($valid_symbols != length($seq)) {
+      $self->throw("Observation Sequence contains characters that is not in the
+                    alphabet of observation symbols!\n");
+   }
+   return Bio::Ext::HMM->HMM_likelihood($self->{'hmm'}, $seq);
+}
+
+=head2 statistical_training
+
+ Title   : statistical_training
+ Usage   : $hmm->statistical_training(\@seqs, \@hss)
+ Function: Estimate the parameters of an HMM given an array of observation 
+           sequence and an array of the corresponding hidden state 
+           sequences
+ Returns : Returns nothing. The parameters of the HMM will be set to the 
+           estimated values
+ Args    : The first argument is a reference to an array of observation 
+           sequences. The second argument is a reference to an array of
+           hidden state sequences. Note that the lengths of an observation
+           sequence and a hidden state sequence must be the same.
+
+=cut
+
+sub statistical_training {
+   my ($self, $seqs, $hss) = @_;
+   my $valid_symbols;
+   my $seq_cnt, $hs_cnt;
+   my $i;
+
+   if( ! defined $seqs or ! defined $hss) {
+      $self->warn("Cannot calculate without supply an observation and a hidden state sequence!");
+      return;
+   }
+   $seq_cnt = @{$seqs};
+   $hs_cnt = @{$seqs};
+   if ($seq_cnt != $hs_cnt) {
+      $self->throw("There must be the same number of observation sequences and 
+                    hidden state sequences!\n");
+   }
+   for ($i = 0; $i < $seq_cnt; ++$i) {
+      if (length(@{$seqs}[$i]) != length(@{$hss}[$i])) {
+         $self->throw("The corresponding observation sequences and hidden state sequences must be of the same length!\n");
+      }
+   }
+   foreach $seq (@{$seqs}) {
+      my $s = $self->{'symbols'};
+      $_ = $seq;
+      $valid_symbols = eval "tr/$s//;"; 
+      if ($valid_symbols != length($seq)) {
+         $self->throw("Observation Sequence contains characters that is not in the
+                alphabet of observation symbols!\n");
+      }
+   }
+   foreach $seq (@{$hss}) {
+      my $s = $self->{'states'};
+      $_ = $seq;
+      $valid_symbols = eval "tr/$s//;"; 
+      if ($valid_symbols != length($seq)) {
+         $self->throw("Hidden State Sequence contains characters that is not in the
+                alphabet of hidden states!\n");
+      }
+   }
+   Bio::Ext::HMM->HMM_statistical_training($self->{'hmm'}, $seqs, $hss);
+}
+
+=head2 baum_welch_training
+
+ Title   : baum_welch_training
+ Usage   : $hmm->baum_welch_training(\@seqs)
+ Function: Estimate the parameters of an HMM given an array of observation 
+           sequence
+ Returns : Returns nothing. The parameters of the HMM will be set to the 
+           estimated values
+ Args    : The only argument is a reference to an array of observation 
+           sequences. 
+
+=cut
+
+sub baum_welch_training {
+   my ($self, $seqs) = @_;
+   my $valid_symbols;
+
+   if( ! defined $seqs) {
+      $self->warn("Cannot calculate without supply an observation sequence!");
+      return;
+   }
+   foreach $seq (@{$seqs}) {
+      my $s = $self->{'symbols'};
+      $_ = $seq;
+      $valid_symbols = eval "tr/$s//;"; 
+      if ($valid_symbols != length($seq)) {
+         $self->throw("Observation Sequence contains characters that is not in the
+                alphabet of observation symbols!\n");
+      }
+   }
+   Bio::Ext::HMM->HMM_baum_welch_training($self->{'hmm'}, $seqs);
+}
+
+=head2 viterbi
+
+ Title   : viterbi
+ Usage   : $hss = $hmm->viterbi($seq)
+ Function: Find out the hidden state sequence that can maximize the 
+           probability of seeing observation sequence $seq.
+ Returns : Returns a string that is the hidden state sequence that maximizes
+           the probability of seeing $seq.
+ Args    : The only argument is an observation sequence.
+
+=cut
+
+sub viterbi {
+   my ($self, $seq) = @_;
+   my $valid_symbols;
+
+   if( ! defined $seq) {
+      $self->warn("Cannot calculate without supply an observation sequence!");
+      return;
+   }
+   my $s = $self->{'symbols'};
+   $_ = $seq;
+   $valid_symbols = eval "tr/$s//;"; 
+   if ($valid_symbols != length($seq)) {
+      $self->throw("Observation Sequence contains characters that is not in the
+             alphabet of observation symbols!\n");
+   }
+   return Bio::Ext::HMM->HMM_viterbi($self->{'hmm'}, $seq);
+}
+
+=head2 symbols
+
+ Title     : symbols 
+ Usage     : $symbols = $hmm->symbols() #get
+           : $hmm->symbols($value) #set
+ Function  : the set get for the observation symbols
+ Example   :
+ Returns   : symbols string
+ Arguments : new value
+
+=cut
+
+sub symbols {
+   my ($self,$val) = @_;
+   my %alphabets = ();
+   my $c;
+
+   if ( defined $val ) {
+# find duplicate
+      
+      for ($i = 0; $i < length($val); ++$i) {
+         $c = substr($val, $i, 1);
+         if (defined $alphabets{$c}) {
+            $self->throw("Can't have duplicate symbols!");
+         }
+         else {
+            $alphabets{$c} = 1;
+         }
+      }
+      $self->{'symbols'} = $val;
+   }
+   return $self->{'symbols'};
+}
+
+
+=head2 states
+
+ Title     : states
+ Usage     : $states = $hmm->states() #get
+           : $hmm->states($value) #set
+ Function  : the set get for the hidden states
+ Example   :
+ Returns   : states string
+ Arguments : new value
+
+=cut
+
+sub states {
+   my ($self,$val) = @_;
+   my %alphabets = ();
+   my $c;
+
+   if ( defined $val ) {
+# find duplicate
+      
+      for ($i = 0; $i < length($val); ++$i) {
+         $c = substr($val, $i, 1);
+         if (defined $alphabets{$c}) {
+            $self->throw("Can't have duplicate states!");
+         }
+         else {
+            $alphabets{$c} = 1;
+         }
+      }
+      $self->{'states'} = $val;
+   }
+   return $self->{'states'};
+}
+
+=head2 init_prob
+
+ Title     : init_prob
+ Usage     : $init = $hmm->init_prob() #get
+           : $hmm->transition_prob(\@init) #set
+ Function  : the set get for the initial probability array
+ Example   :
+ Returns   : reference to double array
+ Arguments : new value
+
+=cut
+
+sub init_prob {
+   my ($self, $init) = @_;
+   my $i;
+   my @A;
+
+   if (defined $init) {
+      if (ref($init)) {
+         my $size = @{$init};
+         my $sum = 0.0;
+         foreach (@{$init}) {
+            $sum += $_;
+         }
+         if ($sum != 1.0) {
+            $self->throw("The sum of initial probability array must be 1.0!\n");
+         }
+         if ($size != length($self->{'states'})) {
+            $self->throw("The size of init array $size is different from the number of HMM's hidden states!\n");
+         }
+         for ($i = 0; $i < $size; ++$i) {
+            Bio::Ext::HMM::HMM->set_init_entry($self->{'hmm'}, substr($self->{'states'}, $i, 1), @{$init}[$i]);
+         }
+      }
+      else {
+         $self->throw("Initial Probability array must be a reference!\n");
+      }
+   }
+   else {
+      for ($i = 0; $i < length($self->{'states'}); ++$i) {
+         $A[$i] = Bio::Ext::HMM::HMM->get_init_entry($self->{'hmm'}, substr($self->{'states'}, $i, 1));
+      }
+      return \@A;
+   } 
+}
+
+=head2 transition_prob
+
+ Title     : transition_prob
+ Usage     : $transition_matrix = $hmm->transition_prob() #get
+           : $hmm->transition_prob($matrix) #set
+ Function  : the set get for the transition probability mairix
+ Example   :
+ Returns   : Bio::Matrix::Scoring 
+ Arguments : new value
+
+=cut
+
+sub transition_prob {
+   my ($self, $matrix) = @_;
+   my $i, $j;
+   my @A;
+
+   if (defined $matrix) {
+      if ($matrix->isa('Bio::Matrix::Scoring')) {
+         my $row = join("", $matrix->row_names);
+         my $col = join("", $matrix->column_names);
+         if ($row ne $self->{'states'}) {
+            $self->throw("Names of the rows ($row) is different from the states of HMM " . $self->{'states'});
+         } 
+         if ($col ne $self->{'states'}) {
+            $self->throw("Names of the columns ($col) is different from the states of HMM " . $self->{'states'});
+         }
+         for ($i = 0; $i < length($self->{'states'}); ++$i) {
+            my $sum = 0.0;
+            my $a = substr($self->{'states'}, $i, 1);
+            for ($j = 0; $j < length($self->{'states'}); ++$j) {
+               my $b = substr($self->{'states'}, $j, 1);
+               $sum += $matrix->entry($a, $b);
+            }
+            if ($sum != 1.0) {
+               $self->throw("Sum of probabilities for each from-state must be 1.0!\n");
+            }
+         }
+         for ($i = 0; $i < length($self->{'states'}); ++$i) {
+            my $a = substr($self->{'states'}, $i, 1);
+            for ($j = 0; $j < length($self->{'states'}); ++$j) {
+               my $b = substr($self->{'states'}, $j, 1);
+               Bio::Ext::HMM::HMM->set_a_entry($self->{'hmm'}, $a, $b, $matrix->entry($a, $b));
+            }
+         }
+      }
+      else {
+         $self->throw("Transition Probability matrix must be of type Bio::Matrix::Scoring.\n");
+      }
+   }
+   else {
+      for ($i = 0; $i < length($self->{'states'}); ++$i) {
+         for ($j = 0; $j < length($self->{'states'}); ++$j) {
+            $A[$i][$j] = Bio::Ext::HMM::HMM->get_a_entry($self->{'hmm'}, substr($self->{'states'}, $i, 1), substr($self->{'states'}, $j, 1));
+         }
+      }
+      my @rows = split(//, $self->{'states'});
+      return $matrix = new Bio::Matrix::Scoring(-values => \@A, -rownames => \@rows, -colnames => \@rows);
+   } 
+}
+
+=head2 emission_prob
+
+ Title     : emission_prob
+ Usage     : $emission_matrix = $hmm->emission_prob() #get
+           : $hmm->emission_prob($matrix) #set
+ Function  : the set get for the emission probability mairix
+ Example   :
+ Returns   : Bio::Matrix::Scoring 
+ Arguments : new value
+
+=cut
+
+sub emission_prob {
+   my ($self, $matrix) = @_;
+   my $i, $j;
+   my @A;
+
+   if (defined $matrix) {
+      if ($matrix->isa('Bio::Matrix::Scoring')) {
+         my $row = join("", $matrix->row_names);
+         my $col = join("", $matrix->column_names);
+         if ($row ne $self->{'states'}) {
+            $self->throw("Names of the rows ($row) is different from the states of HMM " . $self->{'states'});
+         } 
+         if ($col ne $self->{'symbols'}) {
+            $self->throw("Names of the columns ($col) is different from the symbols of HMM " . $self->{'symbols'});
+         }
+         for ($i = 0; $i < length($self->{'states'}); ++$i) {
+            my $sum = 0.0;
+            my $a = substr($self->{'states'}, $i, 1);
+            for ($j = 0; $j < length($self->{'symbols'}); ++$j) {
+               my $b = substr($self->{'symbols'}, $j, 1);
+               $sum += $matrix->entry($a, $b);
+            }
+            if ($sum != 1.0) {
+               $self->throw("Sum of probabilities for each state must be 1.0!\n");
+            }
+         }
+         for ($i = 0; $i < length($self->{'states'}); ++$i) {
+            my $a = substr($self->{'states'}, $i, 1);
+            for ($j = 0; $j < length($self->{'symbols'}); ++$j) {
+               my $b = substr($self->{'symbols'}, $j, 1);
+               Bio::Ext::HMM::HMM->set_e_entry($self->{'hmm'}, $a, $b, $matrix->entry($a, $b));
+            }
+         }
+      }
+      else {
+         $self->throw("Emission Probability matrix must be of type Bio::Matrix::Scoring.\n");
+      }
+   }
+   else {
+      for ($i = 0; $i < length($self->{'states'}); ++$i) {
+         for ($j = 0; $j < length($self->{'symbols'}); ++$j) {
+            $A[$i][$j] = Bio::Ext::HMM::HMM->get_e_entry($self->{'hmm'}, substr($self->{'states'}, $i, 1), substr($self->{'symbols'}, $j, 1));
+         }
+      }
+      my @rows = split(//, $self->{'states'});
+      my @cols = split(//, $self->{'symbols'});
+      return $matrix = new Bio::Matrix::Scoring(-values => \@A, -rownames => \@rows, -colnames => \@cols);
+   } 
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Domain.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Domain.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Domain.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,341 @@
+# $Id: Domain.pm,v 1.12.8.1 2006/10/02 23:10:33 sendu Exp $
+#
+# BioPerl module for Bio::Tools::HMMER::Domain
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::HMMER::Domain - One particular domain hit from HMMER 
+
+=head1 SYNOPSIS
+
+Read the Bio::Tools::HMMER::Results docs
+
+=head1 DESCRIPTION
+
+A particular domain score. We reuse the Homol SeqFeature system
+here, so this inherits off Homol SeqFeature. As this code
+originally came from a separate project, there are some backward
+compatibility stuff provided to keep this working with old code.
+
+Don't forget this inherits off Bio::SeqFeature, so all your usual
+nice start/end/score stuff is ready for use.
+
+=head1 CONTACT
+
+Ewan Birney, birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+package Bio::Tools::HMMER::Domain;
+
+use Bio::SeqFeature::Generic;
+use strict;
+
+
+use base qw(Bio::SeqFeature::FeaturePair);
+
+sub new { 
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'alignlines'} = [];
+
+  my $hmmf1 = Bio::SeqFeature::Generic->new(@args);
+  my $hmmf2 = Bio::SeqFeature::Generic->new(@args);
+
+  $self->feature1($hmmf1);
+  $self->feature2($hmmf2);
+
+  return $self;
+}
+
+=head2 add_alignment_line
+
+ Title   : add_alignment_line
+ Usage   : $domain->add_alignment_line($line_from_hmmer_output);
+ Function: add an alignment line to this Domain object
+ Returns : Nothing
+ Args    : scalar
+
+ Adds an alignment line, mainly for storing the HMMER alignments
+as flat text which can be reguritated. You're right. This is *not
+nice* and not the right way to do it.  C'est la vie.
+
+=cut
+
+sub add_alignment_line {
+    my $self = shift;
+    my $line = shift;
+    push(@{$self->{'alignlines'}},$line);
+}
+
+=head2 each_alignment_line
+
+ Title   : each_alignment_line
+ Usage   : foreach $line ( $domain->each_alignment_line )
+ Function: reguritates the alignment lines as they were fed in.
+           only useful realistically for printing.
+ Example :
+ Returns : 
+ Args    : None
+
+
+=cut
+
+sub each_alignment_line {
+    my $self = shift;
+    return @{$self->{'alignlines'}};
+}
+
+=head2 get_nse
+
+ Title   : get_nse
+ Usage   : $domain->get_nse()
+ Function: Provides a seqname/start-end format, useful
+           for unique keys. nse stands for name-start-end
+           It is used alot in Pfam
+ Example :
+ Returns : A string
+ Args    : Optional seperator 1 and seperator 2 (default / and -)
+
+
+=cut
+
+
+
+sub get_nse {
+    my $self = shift;
+    my $sep1 = shift;
+    my $sep2 = shift;
+
+    if( !defined $sep2 ) {
+	$sep2 = "-";
+    }
+    if( !defined $sep1 ) {
+	$sep1 = "/";
+    }
+
+    return sprintf("%s%s%d%s%d",$self->seq_id,$sep1,$self->start,$sep2,$self->end);
+}
+
+
+#  =head2 start_seq
+
+#   Title   : start_seq
+#   Usage   : Backward compatibility with old HMMER modules.
+#             should use $domain->start
+#   Function:
+#   Example :
+#   Returns : 
+#   Args    :
+
+#  =cut
+
+sub start_seq {
+    my $self = shift;
+    my $start = shift;
+    
+    $self->warn("Using old domain->start_seq. Should use domain->start");
+    return $self->start($start);
+}
+
+#  =head2 end_seq
+
+#   Title   : end_seq
+#   Usage   : Backward compatibility with old HMMER modules.
+#             should use $domain->end
+#   Function:
+#   Example :
+#   Returns : 
+#   Args    :
+
+#  =cut
+
+sub end_seq {
+    my $self = shift;
+    my $end = shift;
+
+    $self->warn("Using old domain->end_seq. Should use domain->end");
+    return $self->end($end);
+}
+
+#  =head2 start_hmm
+
+#   Title   : start_hmm
+#   Usage   : Backward compatibility with old HMMER modules, and
+#             for convience. Equivalent to $self->homol_SeqFeature->start
+#   Function:
+#   Example :
+#   Returns : 
+#   Args    :
+
+#  =cut
+
+sub start_hmm { 
+    my $self = shift; 
+    my $start = shift; 
+    $self->warn("Using old domain->start_hmm. Should use domain->hstart");
+    return $self->hstart($start); 
+}
+
+#  =head2 end_hmm
+
+#   Title   : end_hmm
+#   Usage   : Backward compatibility with old HMMER modules, and
+#             for convience. Equivalent to $self->homol_SeqFeature->start
+#   Function:
+#   Example :
+#   Returns : 
+#   Args    :
+
+#  =cut
+
+sub end_hmm {
+    my $self = shift;
+    my $end = shift;
+
+    $self->warn("Using old domain->end_hmm. Should use domain->hend");
+    return $self->hend($end); 
+}
+
+=head2 hmmacc
+
+ Title   : hmmacc
+ Usage   : $domain->hmmacc($newacc)
+ Function: set get for HMM accession number. This is placed in the homol
+           feature of the HMM
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub hmmacc{
+   my ($self,$acc) = @_;
+   if( defined $acc ) {
+       $self->feature2->add_tag_value('accession',$acc);
+   }
+   my @vals = $self->feature2->each_tag_value('accession');
+   return shift @vals;
+}
+
+=head2 hmmname
+
+ Title   : hmmname
+ Usage   : $domain->hmmname($newname)
+ Function: set get for HMM accession number. This is placed in the homol
+           feature of the HMM
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub hmmname {
+    return shift->hseq_id(@_);
+}
+
+=head2 bits
+
+ Title   : bits
+ Usage   :
+ Function: backward compatibility. Same as score
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub bits{
+   return shift->score(@_);
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   :
+ Function: $domain->evalue($value);
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub evalue{
+    return shift->_tag_value('evalue', at _);
+}
+
+=head2 seqbits
+
+ Title   : seqbits
+ Usage   :
+ Function: $domain->seqbits($value);
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub seqbits {
+    return shift->_tag_value('seqbits', at _);
+}
+
+=head2 seq_range
+
+ Title   : seq_range
+ Usage   : 
+ Function: Throws an exception to catch scripts which need to upgrade
+ Example :
+ Returns : 
+ Args    :
+
+=cut
+
+sub seq_range{
+   my ($self, at args) = @_;
+
+   $self->throw("You have accessed an old method. Please recode your script to the new bioperl HMMER module");
+}
+
+=head2 hmm_range
+
+ Title   : hmm_range
+ Usage   :
+ Function: Throws an exception to catch scripts which need to upgrade
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub hmm_range{
+   my ($self, at args) = @_;
+
+   $self->throw("You have accessed an old method. Please recode your script to the new bioperl HMMER module");
+}
+
+1;  # says use was ok
+__END__
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Results.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Results.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Results.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,964 @@
+# $Id: Results.pm,v 1.28.4.1 2006/10/02 23:10:33 sendu Exp $
+#
+# Perl Module for HMMResults
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+#Copyright Genome Research Limited (1997).
+
+=head1 NAME
+
+Bio::Tools::HMMER::Results - Object representing HMMER output results
+
+=head1 SYNOPSIS
+
+   # parse a hmmsearch file (can also parse a hmmpfam file)
+   $res = new Bio::Tools::HMMER::Results( -file => 'output.hmm' ,
+                                          -type => 'hmmsearch');
+
+   # print out the results for each sequence
+   foreach $seq ( $res->each_Set ) {
+       print "Sequence bit score is",$seq->bits,"\n";
+       foreach $domain ( $seq->each_Domain ) {
+           print " Domain start ",$domain->start," end ",$domain->end,
+	   " score ",$domain->bits,"\n";
+       }
+   }
+
+   # new result object on a sequence/domain cutoff of
+   # 25 bits sequence, 15 bits domain
+   $newresult = $res->filter_on_cutoff(25,15);
+
+   # alternative way of getting out all domains directly
+   foreach $domain ( $res->each_Domain ) {
+       print "Domain on ",$domain->seq_id," with score ",
+       $domain->bits," evalue ",$domain->evalue,"\n";
+   }
+
+=head1 DESCRIPTION
+
+This object represents HMMER output, either from hmmsearch or
+hmmpfam. For hmmsearch, a series of HMMER::Set objects are made, one
+for each sequence, which have the the bits score for the object. For
+hmmpfam searches, only one Set object is made.
+
+
+These objects come from the original HMMResults modules used
+internally in Pfam, written by Ewan Birney. Ewan then converted them to
+BioPerl objects in 1999. That conversion is meant to be backwardly
+compatible, but may not be (caveat emptor).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::HMMER::Results;
+
+use strict;
+
+use Bio::Tools::HMMER::Domain;
+use Bio::Tools::HMMER::Set;
+use Symbol;
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::SeqAnalysisParserI);
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+  $self->{'domain'} = []; # array of HMMUnits
+  $self->{'seq'} = {};
+
+  my ($parsetype) = $self->_rearrange([qw(TYPE)], at args);
+  $self->_initialize_io(@args);
+  if( !defined $parsetype ) {
+      $self->throw("No parse type provided. should be hmmsearch or hmmpfam");
+  }
+  $self->parsetype($parsetype);
+  if( defined $self->_fh() ) {
+      if( $parsetype eq 'hmmsearch' ) {
+	  $self->_parse_hmmsearch($self->_fh());
+      } elsif ( $parsetype eq 'hmmpfam' ) {
+	  $self->_parse_hmmpfam($self->_fh());
+      } else {
+	  $self->throw("Did not recoginise type $parsetype");
+      }
+  }
+
+  return $self; # success - we hope!
+}
+
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while( my $feat = $res->next_feature ) { # do something }
+ Function: SeqAnalysisParserI implementing function
+ Example :
+ Returns : A Bio::SeqFeatureI compliant object, in this case,
+           each DomainUnit object, ie, flattening the Sequence
+           aspect of this.
+ Args    : None
+
+
+=cut
+
+sub next_feature{
+   my ($self) = @_;
+
+   if( $self->{'_started_next_feature'} == 1 ) {
+       return shift @{$self->{'_next_feature_array'}};
+   } else {
+       $self->{'_started_next_feature'} = 1;
+       my @array;
+       foreach my $seq ( $self->each_Set() ) {
+	   foreach my $unit ( $seq->each_Domain() ) {
+	       push(@array,$unit);
+	   }
+       }
+       my $res = shift @array;
+       $self->{'_next_feature_array'} = \@array;
+       return $res;
+   }
+
+   $self->throw("Should not reach here! Error!");
+}
+
+
+=head2 number
+
+ Title   : number
+ Usage   : print "There are ",$res->number," domains hit\n";
+ Function: provides the number of domains in the HMMER report
+
+=cut
+
+sub number {
+    my $self = shift;
+    my @val;
+    my $ref;
+    $ref = $self->{'domain'};
+
+
+    @val = @{$self->{'domain'}};
+    return scalar @val;
+}
+
+=head2 seqfile
+
+ Title   : seqfile
+ Usage   : $obj->seqfile($newval)
+ Function:
+ Example :
+ Returns : value of seqfile
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seqfile{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'seqfile'} = $value;
+    }
+    return $self->{'seqfile'};
+
+}
+
+=head2 hmmfile
+
+ Title   : hmmfile
+ Usage   : $obj->hmmfile($newval)
+ Function:
+ Example :
+ Returns : value of hmmfile
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub hmmfile{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'hmmfile'} = $value;
+    }
+    return $self->{'hmmfile'};
+
+}
+
+=head2 add_Domain
+
+ Title   : add_Domain
+ Usage   : $res->add_Domain($unit)
+ Function: adds a domain to the results array. Mainly used internally.
+ Args    : A Bio::Tools::HMMER::Domain
+
+
+=cut
+
+sub add_Domain {
+    my $self = shift;
+    my $unit = shift;
+    my $name;
+
+    $name = $unit->seq_id();
+
+    if( ! exists $self->{'seq'}->{$name} ) {
+	$self->warn("Adding a domain of $name but with no HMMSequence. Will be kept in domain array but not added to a HMMSequence");
+    } else {
+	$self->{'seq'}->{$name}->add_Domain($unit);
+    }
+    push(@{$self->{'domain'}},$unit);
+}
+
+
+=head2 each_Domain
+
+ Title   : each_Domain
+ Usage   : foreach $domain ( $res->each_Domain() )
+ Function: array of Domain units which are held in this report
+ Returns : array
+ Args    : none
+
+
+=cut
+
+sub each_Domain {
+    my $self = shift;
+    my (@arr,$u);
+
+    foreach $u ( @{$self->{'domain'}} ) {
+	push(@arr,$u);
+    }
+
+    return @arr;
+}
+
+
+=head2 domain_bits_cutoff_from_evalue
+
+ Title   : domain_bits_cutoff_from_evalue
+ Usage   : $cutoff = domain_bits_cutoff_from_evalue(0.01);
+ Function: return a bits cutoff from an evalue using the
+           scores here. Somewhat interesting logic:
+            Find the two bit score which straddle the evalue
+            if( 25 is between these two points) return 25
+            else return the midpoint.
+
+           This logic tries to ensure that with large signal to
+           noise separation one still has sensible 25 bit cutoff
+ Returns :
+ Args    :
+
+=cut
+
+sub domain_bits_cutoff_from_evalue {
+    my $self = shift;
+    my $eval = shift;
+    my ($dom,$prev, at doms,$cutoff,$sep,$seen);
+
+    @doms = $self->each_Domain;
+
+
+    @doms = map { $_->[0] }
+            sort { $b->[1] <=> $a->[1] }
+            map { [ $_, $_->bits] } @doms;
+    $seen = 0;
+    foreach $_ ( @doms ) {
+	if( $_->evalue > $eval ) {
+	    $seen = 1;
+	    $dom = $_;
+	    last;
+	}
+	$prev = $_;
+    }
+
+    if( ! defined $prev || $seen == 0) {
+	$self->throw("Evalue is either above or below the list...");
+	return;
+    }
+
+    $sep = $prev->bits - $dom->bits ;
+
+    if( $sep < 1 ) {
+	return $prev->bits();
+    }
+    if( $dom->bits < 25 && $prev->bits > 25 ) {
+	return 25;
+    }
+
+    return int( $dom->bits + $sep/2 ) ;
+
+}
+
+
+sub dictate_hmm_acc {
+    my $self = shift;
+    my $acc = shift;
+    my ($unit);
+
+
+    foreach $unit ( $self->eachHMMUnit() ) {
+	$unit->hmmacc($acc);
+    }
+}
+
+=head2 write_FT_output
+
+ Title   : write_FT_output
+ Usage   : $res->write_FT_output(\*STDOUT,'DOMAIN')
+ Function: writes feature table output ala swissprot
+ Returns :
+ Args    :
+
+
+=cut
+
+sub write_FT_output {
+    my $self = shift;
+    my $file = shift;
+    my $idt  = shift;
+    my ($seq,$unit);
+
+    if( !defined $idt ) {
+	$idt = "DOMAIN";
+    }
+
+    foreach $seq ( $self->each_Set() ) {
+	print $file sprintf("ID   %s\n",$seq->name());
+	foreach $unit ( $seq->each_Domain() ) {
+	    print $file sprintf("FT   %s   %d %d %s\n",$idt,
+				$unit->start,$unit->end,$unit->hmmname);
+	}
+	print $file "//\n";
+    }
+}
+
+=head2 filter_on_cutoff
+
+ Title   : filter_on_cutoff
+ Usage   : $newresults = $results->filter_on_cutoff(25,15);
+ Function: Produces a new HMMER::Results module which has
+           been trimmed at the cutoff.
+ Returns : a Bio::Tools::HMMER::Results module
+ Args    : sequence cutoff and domain cutoff. in bits score
+           if you want one cutoff, simply use same number both places
+
+=cut
+
+sub filter_on_cutoff {
+    my $self = shift;
+    my $seqthr = shift;
+    my $domthr = shift;
+    my ($new,$seq,$unit, at array, at narray);
+
+    if( !defined $domthr ) {
+       $self->throw("hmmresults filter on cutoff needs two arguments");
+    }
+
+    $new = Bio::Tools::HMMER::Results->new(-type => $self->parsetype);
+
+    foreach $seq ( $self->each_Set()) {
+	next if( $seq->bits() < $seqthr );
+	$new->add_Set($seq);
+	foreach $unit ( $seq->each_Domain() ) {
+	    next if( $unit->bits() < $domthr );
+	    $new->add_Domain($unit);
+	}
+    }
+    $new;
+}
+
+=head2 write_ascii_out
+
+ Title   : write_ascii_out
+ Usage   : $res->write_ascii_out(\*STDOUT)
+ Function: writes as
+           seq seq_start seq_end model-acc model_start model_end model_name
+ Returns :
+ Args    :
+
+  FIXME: Now that we have no modelacc, this is probably a bad thing.
+
+=cut
+
+# writes as seq sstart send modelacc hstart hend modelname
+
+sub write_ascii_out {
+    my $self = shift;
+    my $fh = shift;
+    my ($unit,$seq);
+
+    if( !defined $fh) {
+	$fh = \*STDOUT;
+    }
+
+
+    foreach $seq ( $self->each_Set()) {
+	foreach $unit ( $seq->each_Domain()) {
+	    print $fh sprintf("%s %4d %4d %s %4d %4d %4.2f %4.2g %s\n",
+			      $unit->seq_id(),$unit->start(),$unit->end(),
+			      $unit->hmmacc,$unit->hstart,$unit->hend,
+			      $unit->bits,$unit->evalue,$unit->hmmname);
+	}
+    }
+
+}
+
+=head2 write_GDF_bits
+
+ Title   : write_GDF_bits
+ Usage   : $res->write_GDF_bits(25,15,\*STDOUT)
+ Function: writes GDF format with a sequence,domain threshold
+ Returns :
+ Args    :
+
+=cut
+
+sub write_GDF_bits {
+    my $self = shift;
+    my $seqt = shift;
+    my $domt = shift;
+    my $file = shift;
+    my $seq;
+    my $unit;
+    my (@array, at narray);
+
+    if( !defined $file ) {
+	$self->throw("Attempting to use write_GDF_bits without passing in correct arguments!");
+	return;
+    }
+
+    foreach $seq ( $self->each_Set()) {
+
+	if( $seq->bits() < $seqt ) {
+	    next;
+	}
+
+	foreach $unit ( $seq->each_Domain() ) {
+	    if( $unit->bits() < $domt ) {
+		next;
+	    }
+	    push(@array,$unit);
+	}
+
+    }
+
+    @narray = sort { my ($aa,$bb,$st_a,$st_b);
+		     $aa = $a->seq_id();
+		     $bb = $b->seq_id();
+		     if ( $aa eq $bb) {
+			 $st_a = $a->start();
+			 $st_b = $b->start();
+			 return $st_a <=> $st_b;
+			 }
+		     else {
+			 return $aa cmp $bb;
+		     } } @array;
+
+    foreach $unit ( @narray ) {
+	print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue);
+    }
+
+}
+
+sub write_scores_bits {
+    my $self = shift;
+    my $seqt = shift;
+    my $domt = shift;
+    my $file = shift;
+    my $seq;
+    my $unit;
+    my (@array, at narray);
+
+    if( !defined $file ) {
+	$self->warn("Attempting to use write_scores_bits without passing in correct arguments!");
+	return;
+    }
+
+    foreach $seq ( $self->eachHMMSequence()) {
+
+	if( $seq->bits() < $seqt ) {
+	    next;
+	}
+
+	foreach $unit ( $seq->eachHMMUnit() ) {
+	    if( $unit->bits() < $domt ) {
+		next;
+	    }
+	    push(@array,$unit);
+	}
+
+    }
+
+    @narray = sort { my ($aa,$bb,$st_a,$st_b);
+		     $aa = $a->bits();
+		     $bb = $b->bits();
+		     return $aa <=> $bb;
+		     } @array;
+
+    foreach $unit ( @narray ) {
+	print $file sprintf("%4.2f %s\n",$unit->bits(),$unit->get_nse());
+    }
+
+}
+
+sub write_GDF {
+    my $self = shift;
+    my $file = shift;
+    my $unit;
+
+    if( !defined $file ) {
+	$file = \*STDOUT;
+    }
+
+
+    foreach $unit ( $self->eachHMMUnit() ) {
+	print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue);
+    }
+
+}
+
+sub highest_noise {
+    my $self = shift;
+    my $seqt = shift;
+    my $domt = shift;
+    my ($seq,$unit,$hseq,$hdom,$noiseseq,$noisedom);
+
+    $hseq = $hdom = -100000;
+
+    foreach $seq ( $self->eachHMMSequence()) {
+	if( $seq->bits() < $seqt && $seq->bits() > $hseq  ) {
+	    $hseq = $seq->bits();
+	    $noiseseq = $seq;
+	}
+	foreach $unit ( $seq->eachHMMUnit() ) {
+	    if( (($seq->bits() < $seqt) || ($seq->bits() > $seqt && $unit->bits < $domt)) && $unit->bits() > $hdom ) {
+		$hdom  = $unit->bits();
+		$noisedom = $unit;
+	    }
+	}
+    }
+
+
+    return ($noiseseq,$noisedom);
+
+}
+
+
+sub lowest_true {
+    my $self = shift;
+    my $seqt = shift;
+    my $domt = shift;
+    my ($seq,$unit,$lowseq,$lowdom,$trueseq,$truedom);
+
+    if( ! defined $domt ) {
+	$self->warn("lowest true needs at least a domain threshold cut-off");
+	return (0,0);
+    }
+
+    $lowseq = $lowdom = 100000;
+
+    foreach $seq ( $self->eachHMMSequence()) {
+
+	if( $seq->bits() >= $seqt && $seq->bits() < $lowseq  ) {
+	    $lowseq = $seq->bits();
+	    $trueseq = $seq;
+	}
+	if( $seq->bits() < $seqt ) {
+	    next;
+	}
+
+	foreach $unit ( $seq->eachHMMUnit() ) {
+	    if( $unit->bits() >= $domt && $unit->bits() < $lowdom ) {
+		$lowdom  = $unit->bits();
+		$truedom = $unit;
+	    }
+	}
+    }
+
+
+    return ($trueseq,$truedom);
+
+}
+
+
+
+=head2 add_Set
+
+ Title   : add_Set
+ Usage   : Mainly internal function
+ Function:
+ Returns :
+ Args    :
+
+
+=cut
+
+sub add_Set {
+    my $self = shift;
+    my $seq  = shift;
+    my $name;
+
+    $name = $seq->name();
+
+    if( exists $self->{'seq'}->{$name} ) {
+	$self->throw("You alredy have $name in HMMResults!");
+    }
+    $self->{'seq'}->{$name} = $seq;
+}
+
+
+=head2 each_Set
+
+ Title   : each_Set
+ Usage   :
+ Function:
+ Returns :
+ Args    :
+
+
+=cut
+
+sub each_Set {
+    my $self = shift;
+    my (@array,$name);
+
+
+    foreach $name ( keys %{$self->{'seq'}} ) {
+	push(@array,$self->{'seq'}->{$name});
+    }
+    return @array;
+}
+
+
+=head2 get_Set
+
+ Title   : get_Set
+ Usage   : $set = $res->get_Set('sequence-name');
+ Function: returns the Set for a particular sequence
+ Returns : a HMMER::Set object
+ Args    : name of the sequence
+
+
+=cut
+
+sub get_Set {
+    my $self = shift;
+    my $name = shift;
+
+    return $self->{'seq'}->{$name};
+}
+
+
+=head2 _parse_hmmpfam
+
+ Title   : _parse_hmmpfam
+ Usage   : $res->_parse_hmmpfam($filehandle)
+ Function:
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _parse_hmmpfam {
+    my $self = shift;
+    my $file = shift;
+
+    my ($id,$sqfrom,$sqto,$hmmf,$hmmt,$sc,$ev,
+	$unit,$nd,$seq,$name,$seqname,$from,
+	$to,%hash,%acc,$acc);
+    my $count = 0;
+
+    while(<$file>) {
+        if( /^HMM file:\s+(\S+)/ ) { $self->hmmfile($1); next; }
+	elsif( /^Sequence file:\s+(\S+)/ ) { $self->seqfile($1); next }
+	elsif( /^Query(\s+sequence)?:\s+(\S+)/ ) {
+
+	    $seqname = $2;
+
+	    $seq     = Bio::Tools::HMMER::Set->new();
+
+	    $seq ->name($seqname);
+	    $self->add_Set($seq);
+	    %hash = ();
+
+	    while(<$file>){
+
+		if( /Accession:\s+(\S+)/ ) { $seq->accession($1); next }
+		elsif( s/^Description:\s+// ) { chomp; $seq->desc($_); next }
+		/^Parsed for domains/ && last;
+
+		# This is to parse out the accession numbers in old Pfam format.
+		# now not support due to changes in HMMER.
+
+		if( (($id,$acc, $sc, $ev, $nd) = /^\s*(\S+)\s+(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) {
+		    $hash{$id} = $sc; # we need this for the sequence
+		                      # core of the domains below!
+		    $acc {$id} = $acc;
+
+		    # this is the more common parsing routine
+		} elsif ( (($id,$sc, $ev, $nd) =
+			   /^\s*(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/) ) {
+
+		    $hash{$id} = $sc; # we need this for the
+		                      # sequence score of hte domains below!
+
+		}
+	    }
+
+	    while(<$file>) {
+		/^Align/ && last;
+		m{^//} && last;
+		# this is meant to match
+
+		#Sequence Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+		#-------- ------- ----- -----    ----- -----      -----  -------
+		#PF00621    1/1     198   372 ..     1   207 []   281.6    1e-80
+
+		if( (($id, $sqfrom, $sqto, $hmmf,$hmmt,$sc, $ev) =
+		     /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) {
+		    $unit = Bio::Tools::HMMER::Domain->new();
+		    $unit->seq_id  ($seqname);
+		    $unit->hmmname  ($id);
+		    $unit->start    ($sqfrom);
+		    $unit->end      ($sqto);
+		    $unit->hstart($hmmf);
+		    $unit->hend  ($hmmt);
+		    $unit->bits     ($sc);
+		    $unit->evalue   ($ev);
+
+		    if( !exists($hash{$id}) ) {
+			$self->throw("HMMResults parsing error in hmmpfam for $id - can't find sequecne score");
+		    }
+
+		    $unit->seqbits($hash{$id});
+
+		    if( defined $acc{$id} ) {
+			$unit->hmmacc($acc{$id});
+		    }
+
+		    # this should find it's own sequence!
+		    $self->add_Domain($unit);
+		}
+	    }
+	    if( m{^//} ) { next; }
+
+	    $_ = <$file>;
+	    # parses alignment lines. Icky as we have to break on the same line
+	    # that we need to read to place the alignment lines with the unit.
+
+	    while(1) {
+		(!defined $_ || m{^//}) && last;
+
+		# matches:
+		# PF00621: domain 1 of 1, from 198 to 372
+		if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) {
+
+		    $name = $1;
+		    $from = $2;
+		    $to   = $3;
+
+		    # find the HMMUnit which this alignment is from
+
+		    $unit = $self->get_unit_nse($seqname,$name,$from,$to);
+		    if( !defined $unit ) {
+			$self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!");
+			$_ = <$file>;
+			next;
+		    }
+		    while(<$file>) {
+			m{^//} && last;
+			/^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last;
+			$unit->add_alignment_line($_);
+		    }
+		} else {
+		    $_ = <$file>;
+		}
+	    }
+
+	    # back to main 'Query:' loop
+	}
+    }
+}
+
+# mainly internal function
+
+sub get_unit_nse {
+    my $self    = shift;
+    my $seqname = shift;
+    my $domname = shift;
+    my $start   = shift;
+    my $end     = shift;
+
+    my($seq,$unit);
+
+    $seq = $self->get_Set($seqname);
+
+    if( !defined $seq ) {
+	$self->throw("Could not get sequence name $seqname - so can't get its unit");
+    }
+
+    foreach $unit ( $seq->each_Domain() ) {
+	if( $unit->hmmname() eq $domname && $unit->start() == $start &&  $unit->end() == $end ) {
+	    return $unit;
+	}
+    }
+
+    return;
+}
+
+
+=head2 _parse_hmmsearch
+
+ Title   : _parse_hmmsearch
+ Usage   : $res->_parse_hmmsearch($filehandle)
+ Function:
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _parse_hmmsearch {
+    my $self = shift;
+    my $file = shift;
+    my ($id,$sqfrom,$sqto,$sc,$ev,$unit,$nd,$seq,$hmmf,$hmmt,
+	$hmmfname,$hmmacc, $hmmid, %seqh);
+    my $count = 0;
+
+    while(<$file>) {
+        /^HMM file:\s+(\S+)/ and do { $self->hmmfile($1); $hmmfname = $1 };
+	/^Accession:\s+(\S+)/ and do { $hmmacc = $1 };
+	/^Query HMM:\s+(\S+)/ and do { $hmmid = $1 };
+	/^Sequence database:\s+(\S+)/ and do { $self->seqfile($1) };
+        /^Scores for complete sequences/ && last;
+    }
+
+    $hmmfname = "given" if not $hmmfname;
+
+    while(<$file>) {
+	/^Parsed for domains/ && last;
+	if( (($id, $sc, $ev, $nd) = /(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) {
+	    $seq = Bio::Tools::HMMER::Set->new();
+	    $seq->name($id);
+	    $seq->bits($sc);
+	    $seqh{$id} = $sc;
+	    $seq->evalue($ev);
+	    $self->add_Set($seq);
+	    $seq->accession($hmmacc);
+	}
+    }
+
+    while(<$file>) {
+	/^Alignments of top-scoring domains/ && last;
+	if( (($id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev) = /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) {
+	    $unit = Bio::Tools::HMMER::Domain->new();
+
+	    $unit->seq_id($id);
+	    $unit->hmmname($hmmfname);
+	    $unit->start($sqfrom);
+	    $unit->end($sqto);
+	    $unit->bits($sc);
+	    $unit->hstart($hmmf);
+	    $unit->hend($hmmt);
+	    $unit->evalue($ev);
+	    $unit->seqbits($seqh{$id});
+	    $self->add_Domain($unit);
+	    $count++;
+	}
+    }
+
+    $_ = <$file>;
+
+    ## Recognize and store domain alignments
+
+    while(1) {
+	if( !defined $_ ) {
+	    last;
+	}
+        /^Histogram of all scores/ && last;
+
+        # matches:
+        # PF00621: domain 1 of 1, from 198 to 372
+        if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) {
+            my $name = $1;
+            my $from = $2;
+            my $to   = $3;
+
+            # find the HMMUnit which this alignment is from
+            $unit = $self->get_unit_nse($name,$hmmfname,$from,$to);
+
+            if( !defined $unit ) {
+                $self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!");
+                next;
+            }
+            while(<$file>) {
+                /^Histogram of all scores/ && last;
+                /^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last;
+                $unit->add_alignment_line($_);
+            }
+        }
+        else {
+            $_ = <$file>;
+        }
+    }
+
+    return $count;
+}
+
+=head2 parsetype
+
+ Title   : parsetype
+ Usage   : $obj->parsetype($newval)
+ Function:
+ Returns : value of parsetype
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub parsetype{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_parsetype'} = $value;
+    }
+    return $self->{'_parsetype'};
+}
+
+1;  # says use was ok
+__END__
+
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Set.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Set.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/HMMER/Set.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,249 @@
+# $Id: Set.pm,v 1.15.4.1 2006/10/02 23:10:33 sendu Exp $
+#
+# BioPerl module for Bio::Tools::HMMER::Set
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::HMMER::Set - Set of identical domains from HMMER matches
+
+=head1 SYNOPSIS
+
+    # get a Set object probably from the results object
+    print "Bits score over set ",$set->bits," evalue ",$set->evalue,"\n";
+
+    foreach $domain ( $set->each_Domain ) {
+	print "Domain start ",$domain->start," end ",$domain->end,"\n";
+    }
+
+=head1 DESCRIPTION
+
+Represents a set of HMMER domains hitting one sequence. HMMER reports two
+different scores, a per sequence total score (and evalue) and a per
+domain score and evalue. This object represents a collection of the same
+domain with the sequence bits score and evalue. (these attributes are also
+on the per domain scores, which you can get there).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.Bug reports can be submitted via the
+web: http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney-at-ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::HMMER::Set;
+use strict;
+
+use Bio::Tools::HMMER::Domain;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    my ($name,$acc,$desc) = $self->_rearrange([qw(NAME ACCESSION DESC)],
+					      @args);
+    $name && $self->name($name);
+    $acc  && $self->accession($acc);
+    $desc && $self->desc($desc);
+
+
+    $self->{'domains'} = [];
+    $self->{'domainnames'} = {};
+    return $self;
+}
+
+=head2 add_Domain
+
+ Title   : add_Domain
+ Usage   : $set->add_Domain($domain)
+ Function: adds the domain to the list
+ Returns : nothing
+ Args    : A Bio::Tools::HMMER::Domain object
+
+=cut
+
+sub add_Domain{
+   my ($self,$domain) = @_;
+
+
+   if( ! defined $domain || ! $domain->isa("Bio::Tools::HMMER::Domain") ) {
+       $self->throw("[$domain] is not a Bio::Tools::HMMER::Domain. aborting");
+   }
+   return if $self->{'domainnames'}->{$domain->get_nse}++;
+   push(@{$self->{'domains'}},$domain);
+
+}
+
+=head2 each_Domain
+
+ Title   : each_Domain
+ Usage   : foreach $domain ( $set->each_Domain() )
+ Function: returns an array of domain objects in this set
+ Returns : array
+ Args    : none
+
+
+=cut
+
+sub each_Domain{
+   my ($self, at args) = @_;
+
+   return @{$self->{'domains'}};
+}
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function:
+ Example :
+ Returns : value of name
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub name{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'name'} = $value;
+    }
+    return $obj->{'name'};
+
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $obj->desc($newval)
+ Function:
+ Example :
+ Returns : value of desc
+ Args    : newvalue (optional)
+
+=cut
+
+sub desc{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'desc'} = $value;
+    }
+    return $self->{'desc'};
+
+}
+
+=head2 accession
+
+ Title   : accession
+ Usage   : $obj->accession($newval)
+ Function:
+ Example :
+ Returns : value of accession
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub accession{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'accession'} = $value;
+    }
+    return $self->{'accession'};
+}
+
+
+=head2 bits
+
+ Title   : bits
+ Usage   : $obj->bits($newval)
+ Function:
+ Example :
+ Returns : value of bits
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub bits{
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+      $obj->{'bits'} = $value;
+    }
+    return $obj->{'bits'};
+
+}
+
+=head2 evalue
+
+ Title   : evalue
+ Usage   : $obj->evalue($newval)
+ Function:
+ Example :
+ Returns : value of evalue
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub evalue{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'evalue'} = $value;
+    }
+    return $obj->{'evalue'};
+
+}
+
+
+sub addHMMUnit {
+    my $self = shift;
+    my $unit = shift;
+
+    $self->warn("Using old addHMMUnit call on Bio::Tools::HMMER::Set. Should replace with add_Domain");
+    return $self->add_Domain($unit);
+}
+
+sub eachHMMUnit {
+    my $self = shift;
+    $self->warn("Using old eachHMMUnit call on Bio::Tools::HMMER::Set. Should replace with each_Domain");
+    return $self->each_Domain();
+}
+
+1;  # says use was ok
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Hmmpfam.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Hmmpfam.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Hmmpfam.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,215 @@
+# $Id: Hmmpfam.pm,v 1.8.4.1 2006/10/02 23:10:32 sendu Exp $
+# BioPerl module for Bio::Tools::Hmmpfam
+#
+# Cared for by  Balamurugan Kumarasamy
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::Tools::Hmmpfam - Parser for Hmmpfam program
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Hmmpfam;
+  my $hmmpfam_parser = new Bio::Tools::Hmmpfam(-fh =>$filehandle );
+  while( my $hmmpfam_feat = $hmmpfam_parser->next_result ) {
+        push @hmmpfam_feat, $hmmpfam_feat;
+  }
+
+=head1 DESCRIPTION
+
+Parser for Hmmpfam  program.  See also L<Bio::SearchIO::hmmer>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Balamurugan Kumarasamy
+
+ Email: fugui at worf.fugu-sg.org
+
+=head1 APPENDIX
+
+ The rest of the documentation details each of the object methods.
+ Internal methods are usually preceded with a _
+
+
+=cut
+
+package Bio::Tools::Hmmpfam;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Hmmpfam(-fh=>$filehandle);
+ Function: Builds a new Bio::Tools::Hmmpfam object
+ Returns : Bio::Tools::Hmmpfam
+ Args    : -filename
+           -fh (filehandle)
+
+=cut
+
+sub new {
+      my($class, at args) = @_;
+
+      my $self = $class->SUPER::new(@args);
+      $self->_initialize_io(@args);
+
+      return $self;
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $hmmpfam_parser->next_result
+ Function: Get the next result set from parser data
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    my $filehandle;
+    
+ my $line;
+
+    my $id;
+    while ($_=$self->_readline()) {
+         $line = $_;
+         chomp $line;
+    
+        if ( $line=~m/^Alignments of top-scoring domains/ ) {
+          while( my $rest = $self->_readline() ) { last if $rest =~ m!^//! }
+        }
+
+        next if ($line=~m/^Model/ || /^\-/ || /^$/);
+        
+        if ($line=~m/^Query sequence:\s+(\S+)/) {
+           $id = $1;
+           $self->seqname($id);
+        }
+       
+       if (my ($hid, $start, $end, $hstart, $hend, $score, $evalue) = $line=~m/^(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+\S+\s+(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)/) {
+            my %feature;
+            
+            ($feature{name}) = $self->seqname;
+            $feature{raw_score} = $score;
+            $feature{p_value} = sprintf ("%.3e", $evalue);
+            $feature{score} = $feature{p_value};
+            $feature{start} = $start;
+            $feature{end} = $end;
+            $feature{hname} = $hid;
+            $feature{hstart} = $hstart;
+            $feature{hend} = $hend;
+            ($feature{source}) = 'pfam';
+            $feature{primary} = $hid;
+            ($feature{program}) = 'pfam';
+            ($feature{db}) = 'db1';
+            ($feature{logic_name}) = 'hmmpfam';
+            my $new_feat = $self->create_feature (\%feature);
+            return $new_feat
+        
+        }
+        next;
+
+    }
+    return;
+}
+
+=head2 create_feature
+
+ Title   : create_feature
+ Usage   : my $feat=$hmmpfam_parser->create_feature($feature,$seqname)
+ Function: creates a SeqFeature Generic object
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    :
+
+
+=cut
+
+sub create_feature {
+    my ($self, $feat) = @_;
+
+
+
+    my $feature1= Bio::SeqFeature::Generic->new( -seq_id  =>$feat->{name},
+                                                -start      =>$feat->{start},
+                                                -end        =>$feat->{end},
+                                                -score      =>$feat->{score},
+                                                -source     =>$feat->{source},
+                                                -primary    =>$feat->{primary},
+                                                   );
+    
+
+
+    my $feature2= Bio::SeqFeature::Generic->new(
+                                                 -start      =>$feat->{hstart},
+                                                 -end        =>$feat->{hend},
+                                                  );
+
+
+
+
+    my $featurepair = Bio::SeqFeature::FeaturePair->new;
+    $featurepair->feature1 ($feature1);
+    $featurepair->feature2 ($feature2);
+   
+   $featurepair->add_tag_value('evalue',$feat->{p_value});
+   $featurepair->add_tag_value('percent_id','NULL');
+   $featurepair->add_tag_value("hid",$feat->{primary});
+    return  $featurepair; 
+        
+}
+
+=head2 seqname
+
+ Title   :   seqname
+ Usage   :   obj->seqname($seqname)
+ Function:   Internal(not to be used directly)
+ Returns :
+ Args    :   seqname
+
+=cut
+
+sub seqname{
+    my($self,$seqname)=@_;
+
+    if(defined($seqname))
+    {
+        $self->{'seqname'}=$seqname;
+    }
+
+    return $self->{'seqname'};
+
+}
+
+1;
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Hmmpfam.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/IUPAC.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/IUPAC.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/IUPAC.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,364 @@
+# $Id: IUPAC.pm,v 1.29.2.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for IUPAC
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::IUPAC - Generates unique Seq objects from an ambiguous Seq object
+
+=head1 SYNOPSIS
+
+ use Bio::Seq;
+ use Bio::Tools::IUPAC;
+
+ my $ambiseq = new Bio::Seq (-seq => 'ARTCGUTGR', -alphabet => 'dna');
+ my $stream  = new Bio::Tools::IUPAC(-seq => $ambiseq);
+
+ while ($uniqueseq = $stream->next_seq()) {
+     # process the unique Seq object.
+ }
+
+=head1 DESCRIPTION
+
+IUPAC is a tool that produces a stream of unique, "strict"-satisfying Seq
+objects from an ambiquous Seq object (containing non-standard characters given
+the meaning shown below)
+
+        Extended DNA / RNA alphabet :
+        (includes symbols for nucleotide ambiguity)
+        ------------------------------------------
+        Symbol       Meaning      Nucleic Acid
+        ------------------------------------------
+         A            A           Adenine
+         C            C           Cytosine
+         G            G           Guanine
+         T            T           Thymine
+         U            U           Uracil
+         M          A or C
+         R          A or G
+         W          A or T
+         S          C or G
+         Y          C or T
+         K          G or T
+         V        A or C or G
+         H        A or C or T
+         D        A or G or T
+         B        C or G or T
+         X      G or A or T or C
+         N      G or A or T or C
+
+        IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
+          Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
+
+-----------------------------------
+
+       Amino Acid alphabet:
+        ------------------------------------------
+        Symbol           Meaning
+        ------------------------------------------
+        A        Alanine
+        B        Aspartic Acid, Asparagine
+        C        Cystine
+        D        Aspartic Acid
+        E        Glutamic Acid
+        F        Phenylalanine
+        G        Glycine
+        H        Histidine
+        I        Isoleucine
+        J        Isoleucine/Leucine
+        K        Lysine
+        L        Leucine
+        M        Methionine
+        N        Asparagine
+        O        Pyrrolysine
+        P        Proline
+        Q        Glutamine
+        R        Arginine
+        S        Serine
+        T        Threonine
+        U        Selenocysteine
+        V        Valine
+        W        Tryptophan
+        X        Unknown
+        Y        Tyrosine
+        Z        Glutamic Acid, Glutamine
+        *        Terminator
+
+
+        IUPAC-IUP AMINO ACID SYMBOLS:
+          Biochem J. 1984 Apr 15; 219(2): 345-373
+          Eur J Biochem. 1993 Apr 1; 213(1): 2
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey-at-virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tools::IUPAC;
+
+use strict;
+use vars qw(%IUP %IUB %REV_IUB $AUTOLOAD);
+
+BEGIN {
+    %IUB = ( A => [qw(A)],
+	     C => [qw(C)],
+	     G => [qw(G)],
+	     T => [qw(T)],
+	     U => [qw(U)],
+	     M => [qw(A C)],
+	     R => [qw(A G)],
+	     W => [qw(A T)],
+	     S => [qw(C G)],
+	     Y => [qw(C T)],
+	     K => [qw(G T)],
+	     V => [qw(A C G)],
+	     H => [qw(A C T)],
+	     D => [qw(A G T)],
+	     B => [qw(C G T)],
+	     X => [qw(G A T C)],
+	     N => [qw(G A T C)]
+	     );
+	%REV_IUB = (A	=> 'A',
+				T	=> 'T',
+				C	=> 'C',
+				G 	=> 'G',
+				AC	=> 'M',
+				AG	=> 'R',
+				AT	=> 'W',
+				CG	=> 'S',
+				CT	=> 'Y',
+				'GT'	=> 'K',
+				ACG	=> 'V',
+				ACT	=> 'H',
+				AGT	=> 'D',
+				CGT	=> 'B',
+				ACGT=> 'N',
+				N	=> 'N'
+				);
+
+
+    %IUP = (A => [qw(A)],
+	    B => [qw(D N)],
+	    C => [qw(C)],
+	    D => [qw(D)],
+	    E => [qw(E)],
+	    F => [qw(F)],
+	    G => [qw(G)],
+	    H => [qw(H)],
+	    I => [qw(I)],
+        J => [qw(I L)],
+	    K => [qw(K)],
+	    L => [qw(L)],
+	    M => [qw(M)],
+	    N => [qw(N)],
+        O => [qw(O)],
+	    P => [qw(P)],
+	    Q => [qw(Q)],
+	    R => [qw(R)],
+	    S => [qw(S)],
+	    T => [qw(T)],
+	    U => [qw(U)],
+	    V => [qw(V)],
+	    W => [qw(W)],
+	    X => [qw(X)],
+	    Y => [qw(Y)],
+	    Z => [qw(E Q)],
+	    '*' => ['*']
+	    );
+
+}
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : new Bio::Tools::IUPAC $seq;
+ Function: returns a new seq stream (akin to SeqIO)
+ Returns : a Bio::Tools::IUPAC stream object that will produce unique
+           Seq objects on demand.
+ Args    : an ambiguously coded Seq.pm object that has a specified 'alphabet'
+
+
+=cut
+
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($seq) = $self->_rearrange([qw(SEQ)], at args);
+    if((! defined($seq)) && @args && ref($args[0])) {
+	# parameter not passed as named parameter?
+	$seq = $args[0];
+    }
+    $seq->isa('Bio::Seq') or
+	$self->throw("Must supply a Seq.pm object to IUPAC!");
+    $self->{'_SeqObj'} = $seq;
+    if ($self->{'_SeqObj'}->alphabet() =~ m/^[dr]na$/i ) {
+        # nucleotide seq object
+	$self->{'_alpha'} = [ map { $IUB{uc($_)} }
+			      split('', $self->{'_SeqObj'}->seq()) ];
+    } elsif ($self->{'_SeqObj'}->alphabet() =~ m/^protein$/i ) {
+        # amino acid seq object
+	$self->{'_alpha'} = [ map { $IUP{uc($_)} }
+			       split('', $self->{'_SeqObj'}->seq()) ];
+    } else { # unknown type: we could make a guess, but let's not.
+	$self->throw("You must specify the 'type' of sequence provided to IUPAC");
+    }
+    $self->{'_string'} = [(0) x length($self->{'_SeqObj'}->seq())];
+    scalar @{$self->{'_string'}} or $self->throw("Sequence has zero-length!");
+    $self->{'_string'}->[0] = -1;
+    return $self;
+}
+
+=head2 next_seq
+
+ Title   : next_seq
+ Usage   : $iupac->next_seq()
+ Function: returns the next unique Seq object
+ Returns : a Seq.pm object
+ Args    : none.
+
+
+=cut
+
+sub next_seq{
+    my ($self) = @_;
+
+    for my $i ( 0 .. $#{$self->{'_string'}} ) {
+	next unless $self->{'_string'}->[$i] || @{$self->{'_alpha'}->[$i]} > 1;
+	if ( $self->{'_string'}->[$i] == $#{$self->{'_alpha'}->[$i]} ) { # rollover
+	    if ( $i == $#{$self->{'_string'}} ) { # end of possibilities
+		return;
+	    } else {
+		$self->{'_string'}->[$i] = 0;
+		next;
+	    }
+	} else {
+	    $self->{'_string'}->[$i]++;
+	    my $j = -1;
+	    $self->{'_SeqObj'}->seq(join('', map { $j++; $self->{'_alpha'}->[$j]->[$_]; } @{$self->{'_string'}}));
+	    my $desc = $self->{'_SeqObj'}->desc();
+	    if ( !defined $desc ) { $desc = ""; }
+
+	    $self->{'_num'}++;
+	    1 while $self->{'_num'} =~ s/(\d)(\d\d\d)(?!\d)/$1,$2/;
+	    $desc =~ s/( \[Bio::Tools::IUPAC-generated\sunique sequence # [^\]]*\])|$/ \[Bio::Tools::IUPAC-generated unique sequence # $self->{'_num'}\]/;
+	    $self->{'_SeqObj'}->desc($desc);
+	    $self->{'_num'} =~ s/,//g;
+	    return $self->{'_SeqObj'};
+	}
+    }
+}
+
+=head2 iupac_iup
+
+ Title   : iupac_iup
+ Usage   : my %aasymbols = $iupac->iupac_iup
+ Function: Returns a hash of PROTEIN symbols -> symbol components
+ Returns : Hash
+ Args    : none
+
+=cut
+
+sub iupac_iup{
+   return %IUP;
+
+}
+
+=head2 iupac_iub
+
+ Title   : iupac_iub
+ Usage   : my %dnasymbols = $iupac->iupac_iub
+ Function: Returns a hash of DNA symbols -> symbol components
+ Returns : Hash
+ Args    : none
+
+=cut
+
+sub iupac_iub{
+   return %IUB;
+}
+
+=head2 iupac_rev_iub
+
+ Title   : iupac_rev_iub
+ Usage   : my %dnasymbols = $iupac->iupac_rev_iub
+ Function: Returns a hash of nucleotide combinations -> IUPAC code
+           (a reverse of the iupac_iub hash).
+ Returns : Hash
+ Args    : none
+
+=cut
+
+sub iupac_rev_iub{
+   return %REV_IUB;
+}
+
+=head2 count
+
+ Title   : count
+ Usage   : my $total = $iupac->count();
+ Function: Calculates the number of unique, unambiguous sequences that
+           this ambiguous sequence could generate
+ Return  : int
+ Args    : none
+
+=cut
+
+sub count {
+    my ($self) = @_;
+
+    my $count = 1;
+    $count *= scalar(@$_) for (@{$self->{'_alpha'}});
+    return $count;
+}
+
+
+sub AUTOLOAD {
+
+    my $self = shift @_;
+    my $method = $AUTOLOAD;
+    $method =~ s/.*:://;
+    return $self->{'_SeqObj'}->$method(@_)
+	unless $method eq 'DESTROY';
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Lucy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Lucy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Lucy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,722 @@
+# $Id: Lucy.pm,v 1.15.4.1 2006/10/02 23:10:32 sendu Exp $ 
+#
+# BioPerl module for Bio::Tools::Lucy
+#
+# Copyright Her Majesty the Queen of England
+# written by Andrew Walsh (paeruginosa at hotmail.com) during employment with 
+# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Lucy - Object for analyzing the output from Lucy,
+  a vector and quality trimming program from TIGR
+
+=head1 SYNOPSIS
+
+  # Create the Lucy object from an existing Lucy output file
+  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);
+  $lucyObj = Bio::Tools::Lucy->new(@params);
+
+  # Get names of all sequences
+  $names = $lucyObj->get_sequence_names();
+
+  #  Print seq and qual values for sequences >400 bp in order to run CAP3
+  foreach $name (@$names) {
+      next unless $lucyObj->length_clear($name) > 400;
+      print SEQ ">$name\n", $lucyObj->sequence($name), "\n";
+      print QUAL ">$name\n", $lucyObj->quality($name), "\n";
+  }
+
+  # Get an array of Bio::PrimarySeq objects
+  @seqObjs = $lucyObj->get_Seq_Objs();
+
+
+=head1 DESCRIPTION
+
+Bio::Tools::Lucy.pm provides methods for analyzing the sequence and
+quality values generated by Lucy program from TIGR.
+
+Lucy will identify vector, poly-A/T tails, and poor quality regions in
+a sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)
+
+The input to Lucy can be the Phred sequence and quality files
+generated from running Phred on a set of chromatograms.
+
+Lucy can be obtained (free of charge to academic users) from
+www.tigr.org/softlab
+
+There are a few methods that will only be available if you make some
+minor changes to the source for Lucy and then recompile.  The changes
+are in the 'lucy.c' file and there is a diff between the original and
+the modified file in the Appendix
+
+Please contact the author of this module if you have any problems
+making these modifications.
+
+You do not have to make these modifications to use this module.
+
+=head2 Creating a Lucy object
+
+  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, 
+	     'fwd_desig' => '_F', 'rev_desig' => '_R');
+  $lucyObj = Bio::Tools::Lucy->new(@params);
+
+=head2 Using a Lucy object
+
+  You should get an array with the sequence names in order to use
+  accessor methods.  Note: The Lucy binary program will fail unless
+  the sequence names provided as input are unique.
+
+  $names_ref = $lucyObj->get_sequence_names();
+
+  This code snippet will produce a Fasta format file with sequence
+  lengths and %GC in the description line.
+
+  foreach $name (@$names) {
+      print FILE ">$name\t",
+		 $lucyObj->length_clear($name), "\t",
+		 $lucyObj->per_GC($name), "\n",
+		 $lucyObj->sequence($name), "\n";
+  }
+
+
+  Print seq and qual values for sequences >400 bp in order to assemble
+  them with CAP3 (or other assembler).
+
+  foreach $name (@$names) {
+      next unless $lucyObj->length_clear($name) > 400;
+      print SEQ ">$name\n", $lucyObj->sequence($name), "\n";
+      print QUAL ">$name\n", $lucyObj->quality($name), "\n";
+  }
+
+  Get all the sequences as Bio::PrimarySeq objects (eg., for use with
+  Bio::Tools::Run::StandaloneBlast to perform BLAST).
+
+  @seqObjs = $lucyObj->get_Seq_Objs();
+
+  Or use only those sequences that are full length and have a Poly-A
+  tail.
+
+  foreach $name (@$names) {
+      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));
+      push @seqObjs, $lucyObj->get_Seq_Obj($name);
+  }
+
+
+  Get the names of those sequences that were rejected by Lucy.
+
+  $rejects_ref = $lucyObj->get_rejects();
+
+  Print the names of the rejects and 1 letter code for reason they
+  were rejected.
+
+  foreach $key (sort keys %$rejects_ref) {
+      print "$key:  ", $rejects_ref->{$key};
+  }
+
+  There is a lot of other information available about the sequences
+  analyzed by Lucy (see APPENDIX).  This module can be used with the
+  DBI module to store this sequence information in a database.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Andrew G. Walsh		paeruginosa at hotmail.com
+
+=head1 APPENDIX
+
+Methods available to Lucy objects are described below.  Please note
+that any method beginning with an underscore is considered internal
+and should not be called directly.
+
+=cut
+
+
+package Bio::Tools::Lucy;
+
+use vars qw($AUTOLOAD @ATTR %OK_FIELD);
+use strict;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+ at ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); 
+foreach my $attr (@ATTR) {
+    $OK_FIELD{$attr}++
+}
+
+sub AUTOLOAD {
+    my $self = shift;
+    my $attr = $AUTOLOAD;
+    $attr =~ s/.*:://;
+    $attr = lc $attr;
+    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
+    $self->{$attr} = shift if @_;
+    return $self->{$attr};
+}
+
+=head2 new
+
+ Title	 :  new
+ Usage	 :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', 
+	    fwd_desig => '_F')
+ Function:  creates a Lucy object from Lucy analysis files
+ Returns :  reference to Bio::Tools::Lucy object
+ Args	 :  seqfile	Fasta sequence file generated by Lucy
+	       qualfile	Quality values file generated by Lucy
+	       infofile	Info file created when Lucy is run with -debug 
+                     'infofile' option
+	       stderrfile	Standard error captured from Lucy when Lucy is run 
+			 with -info option and STDERR is directed to stderrfile 
+			 (ie. lucy ... 2> stderrfile).
+			 Info in this file will include sequences dropped for low 
+			 quality. If you've modified Lucy source (see adv_stderr below), 
+			 it will also include info on which sequences were dropped because 
+			 they were vector, too short, had no insert, and whether a poly-A 
+			 tail was found (if Lucy was run with -cdna option).
+	       lucy_verbose verbosity level (0-1).  
+	       fwd_desig	The string used to determine whether sequence is a 
+          forward read.  
+			 The parser will assume that this match will occus at the 
+			 end of the sequence name string.
+	       rev_desig	As above, for reverse reads. 
+ 	       adv_stderr	Can be set to a true value (1).  Will only work if 
+          you have modified 
+			 the Lucy source code as outlined in DESCRIPTION and capture 
+			 the standard error from Lucy.
+
+If you don't provide filenames for qualfile, infofile or stderrfile,
+the module will assume that .qual, .info, and .stderr are the file
+extensions and search in the same directory as the .seq file for these
+files.
+
+For example, if you create a Lucy object with $lucyObj =
+Bio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will
+find lucy.qual, lucy.info and lucy.stderr.
+
+You can omit any or all of the quality, info or stderr files, but you
+will not be able to use all of the object methods (see method
+documentation below).
+
+=cut
+
+sub new {
+	my ($class, at args) = @_;
+	my $self = $class->SUPER::new(@args);
+	my ($attr, $value);
+	while (@args) {
+		$attr = shift @args;
+		$attr = lc $attr;
+		$value = shift @args;
+		$self->{$attr} = $value;
+	}
+	&_parse($self);
+	return $self;
+}
+
+=head2 _parse
+
+ Title	 :  _parse
+ Usage	 :  n/a (internal function)
+ Function:  called by new() to parse Lucy output files
+ Returns :  nothing
+ Args	 :  none
+
+=cut
+
+sub _parse {
+	my $self = shift;
+	$self->{seqfile} =~ /^(\S+)\.\S+$/;
+	my $file = $1;
+
+	print "Opening $self->{seqfile} for parsing...\n" if $self->{lucy_verbose};
+	open my $SEQ, $self->{seqfile} or $self->throw("Could not open sequence file: $self->{seqfile}");
+	my ($name, $line);
+	my $seq = "";
+	my @lines = <$SEQ>;
+	while ($line = pop @lines) {
+		chomp $line;
+		if ($line =~ /^>(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {    
+			$name = $1;
+			if ($self->{fwd_desig}) {
+				$self->{sequences}{$name}{direction} = "F" if $name =~ /^(\S+)($self->{fwd_desig})$/;
+			}
+			if ($self->{rev_desig}) {
+				$self->{sequences}{$name}{direction} = "R" if $name =~ /^(\S+)($self->{rev_desig})$/;
+			}
+			$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4
+			$self->{sequences}{$name}{max_clone_len} = $3;
+			$self->{sequences}{$name}{med_clone_len} = $4; 
+			$self->{sequences}{$name}{beg_clear} = $5;
+			$self->{sequences}{$name}{end_clear} = $6;
+			$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     
+			my $beg = $5-1; # substr function begins with index 0
+			$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);
+			my $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;
+			my $countGC =  $seq =~ tr/[GC]//;
+			$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;
+			$seq = "";
+		}
+		else {
+			$seq = $line.$seq;
+		}
+	}
+
+	# now parse quality values (check for presence of quality file first) 
+	if ($self->{qualfile}) {
+		open my $QUAL, "$self->{qualfile}" or $self->throw("Could not open quality file: $self->{qualfile}");
+		@lines = <$QUAL>;
+	}
+	elsif (-e "$file.qual") {
+		print "You did not set qualfile, but I'm opening $file.qual\n" if $self->{lucy_verbose};
+	$self->qualfile("$file.qual");
+		open my $QUAL, "$file.qual" or $self->throw("Could not open quality file: $file.qual");
+		@lines = <$QUAL>;
+	}
+    else {
+		 print "I did not find a quality file.  You will not be able to use all of the accessor methods.\n" if $self->{lucy_verbose};
+		 @lines = ();
+    }
+
+	my (@vals, @slice, $num, $tot, $vals);  
+	my $qual = ""; 
+	while ($line = pop @lines) {
+		chomp $line;
+		if ($line =~ /^>(\S+)/) {
+			$name = $1;
+			@vals = split /\s/ , $qual;
+			@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];
+			$vals = join "\t", @slice;
+			$self->{sequences}{$name}{quality} = $vals;
+			$qual = "";
+			foreach $num (@slice) {
+				$tot += $num;
+			}
+			$num = @slice;
+			$self->{sequences}{$name}{avg_quality} = $tot/$num;
+			$tot = 0;
+		}
+		else {
+			$qual = $line.$qual;
+		}
+	}
+
+	# determine whether reads are full length
+	if ($self->{infofile}) {
+		open my $INFO, "$self->{infofile}" or $self->throw("Could not open info file: $self->{infofile}");
+		@lines = <$INFO>;
+	}
+	elsif (-e "$file.info") {
+		print "You did not set infofile, but I'm opening $file.info\n" if $self->{lucy_verbose};
+		$self->infofile("$file.info");
+		open my $INFO, "$file.info" or $self->throw("Could not open info file: $file.info");
+		@lines = <$INFO>;
+	}
+	else {
+		print "I did not find an info file.  You will not be able to use all of the accessor methods.\n" if $self->{lucy_verbose};
+		@lines = ();
+	}
+
+	foreach (@lines) {
+		/^(\S+).+CLV\s+(\d+)\s+(\d+)$/;
+		if ($2>0 && $3>0) {
+			$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too
+		}
+	}
+
+
+	# parse rejects (and presence of poly-A if Lucy has been modified)
+	if ($self->{stderrfile}) {
+		open my $STDERR_LUCY, "$self->{stderrfile}" or $self->throw("Could not open quality file: $self->{stderrfile}");
+		@lines = <$STDERR_LUCY>;
+	}
+	elsif (-e "$file.stderr") {
+		print "You did not set stderrfile, but I'm opening $file.stderr\n" if $self->{lucy_verbose};
+		$self->stderrfile("$file.stderr");
+		open my $STDERR_LUCY, "$file.stderr" or $self->throw("Could not open quality file: $file.stderr");
+		@lines = <$STDERR_LUCY>;
+	}
+	else {
+		print "I did not find a standard error file.  You will not be able to use all of the accessor methods.\n" if $self->{lucy_verbose};
+		@lines = ();
+	}
+
+	if ($self->{adv_stderr}) {
+		foreach (@lines) {
+			$self->{reject}{$1} = "Q" if /dropping\s+(\S+)/;
+			$self->{reject}{$1} = "V" if /Vector: (\S+)/;
+			$self->{reject}{$1} = "E" if /Empty: (\S+)/;
+			$self->{reject}{$1} = "S" if m{Short/ no insert: (\S+)};
+			$self->{sequences}{$1}{polyA} = 1 if /(\S+) has PolyA/;
+			if (/Dropped PolyA: (\S+)/) {
+				$self->{reject}{$1} = "P";
+				delete $self->{sequences}{$1};
+			}
+		}
+	}
+	else {
+		foreach (@lines) {
+			$self->{reject}{$1} = "R" if /dropping\s+(\S+)/;
+		}
+	}
+}
+
+=head2 get_Seq_Objs
+
+ Title   :  get_Seq_Objs
+ Usage   :  $lucyObj->get_Seq_Objs()
+ Function:  returns an array of references to Bio::PrimarySeq objects 
+	    where -id = 'sequence name' and -seq = 'sequence'
+
+ Returns :  array of Bio::PrimarySeq objects
+ Args	 :  none
+
+=cut
+
+sub get_Seq_Objs {
+    my $self = shift;
+    my($seqobj, @seqobjs);
+    foreach my $key (sort keys %{$self->{sequences}}) {
+	$seqobj = Bio::PrimarySeq->new( -seq => "$self->{sequences}{$key}{sequence}",
+					-id => "$key");
+	push @seqobjs, $seqobj;
+    }
+    return \@seqobjs;
+} 
+
+=head2 get_Seq_Obj
+
+ Title   :  get_Seq_Obj
+ Usage   :  $lucyObj->get_Seq_Obj($seqname)
+ Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'
+	    and -seq = 'sequence'
+ Returns :  reference to Bio::PrimarySeq object
+ Args	 :  name of a sequence 
+
+=cut
+
+sub get_Seq_Obj {
+    my ($self, $key) = @_;
+    my $seqobj = Bio::PrimarySeq->new( -seq => "$self->{sequences}{$key}{sequence}",
+                                    -id => "$key");
+    return $seqobj;
+}
+
+=head2 get_sequence_names
+
+ Title   :  get_sequence_names
+ Usage   :  $lucyObj->get_sequence_names
+ Function:  returns reference to an array of names of the sequences analyzed by Lucy.
+	    These names are required for most of the accessor methods.  
+	    Note: The Lucy binary will fail unless sequence names are unique.
+ Returns :  array reference
+ Args	 :  none 
+
+=cut
+
+sub get_sequence_names {
+    my $self = shift;
+    my @keys = sort keys %{$self->{sequences}};
+    return \@keys;
+}
+
+=head2 sequence
+
+ Title   :  sequence
+ Usage   :  $lucyObj->sequence($seqname)
+ Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.
+ Returns :  string
+ Args	 :  name of a sequence                   
+
+=cut
+
+sub sequence {
+    my ($self, $key) = @_;
+    return $self->{sequences}{$key}{sequence};
+}
+
+=head2 quality
+
+ Title   :  quality
+ Usage   :  $lucyObj->quality($seqname)
+ Function:  returns the quality values of one of the sequences analyzed by Lucy.
+	    This method depends on the user having provided a quality file.
+ Returns :  string
+ Args    :  name of a sequence
+
+=cut
+
+sub quality {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{quality};
+}
+
+=head2 avg_quality
+
+ Title   :  avg_quality
+ Usage   :  $lucyObj->avg_quality($seqname)
+ Function:  returns the average quality value for one of the sequences analyzed by Lucy.
+ Returns :  float
+ Args    :  name of a sequence
+
+=cut
+
+sub avg_quality {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{avg_quality};
+}
+
+=head2 direction
+
+ Title   :  direction
+ Usage   :  $lucyObj->direction($seqname)
+ Function:  returns the direction for one of the sequences analyzed by Lucy
+	    providing that 'fwd_desig' or 'rev_desig' were set when the
+ 	    Lucy object was created.
+	    Strings returned are: 'F' for forward, 'R' for reverse.  
+ Returns :  string 
+ Args    :  name of a sequence
+
+=cut
+
+sub direction {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; 
+    return "";
+}
+
+=head2 length_raw
+
+ Title   :  length_raw
+ Usage   :  $lucyObj->length_raw($seqname)
+ Function:  returns the length of a DNA sequence prior to quality/ vector 
+	    trimming by Lucy.
+ Returns :  integer
+ Args    :  name of a sequence
+
+=cut
+
+sub length_raw {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{length_raw};
+}
+
+=head2 length_clear
+
+ Title   :  length_clear
+ Usage   :  $lucyObj->length_clear($seqname)
+ Function:  returns the length of a DNA sequence following quality/ vector   
+            trimming by Lucy.
+ Returns :  integer
+ Args    :  name of a sequence
+
+=cut
+
+sub length_clear {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{length_clear};
+}
+
+=head2 start_clear
+
+ Title   :  start_clear
+ Usage   :  $lucyObj->start_clear($seqname)
+ Function:  returns the beginning position of good quality, vector free DNA sequence 
+	    determined by Lucy.
+ Returns :  integer
+ Args    :  name of a sequence
+
+=cut
+
+sub start_clear {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{beg_clear};
+}
+
+
+=head2 end_clear
+
+ Title   :  end_clear
+ Usage   :  $lucyObj->end_clear($seqname)
+ Function:  returns the ending position of good quality, vector free DNA sequence
+            determined by Lucy.
+ Returns :  integer
+ Args    :  name of a sequence
+
+=cut
+
+sub end_clear {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{end_clear};
+}
+
+=head2 per_GC
+
+ Title   :  per_GC
+ Usage   :  $lucyObj->per_GC($seqname)
+ Function:  returns the percente of the good quality, vector free DNA sequence
+            determined by Lucy.
+ Returns :  float
+ Args    :  name of a sequence
+
+=cut
+
+sub per_GC {
+    my($self, $key) = @_;
+    return $self->{sequences}{$key}{per_GC};
+}
+
+=head2 full_length
+
+ Title   :  full_length
+ Usage   :  $lucyObj->full_length($seqname)
+ Function:  returns the truth value for whether or not the sequence read was
+            full length (ie. vector present on both ends of read).  This method
+            depends on the user having provided the 'info' file (Lucy must be
+            run with the -debug 'info_filename' option to get this file).
+ Returns :  boolean 
+ Args    :  name of a sequence
+
+=cut
+
+sub full_length {
+    my($self, $key) = @_;
+    return 1 if $self->{sequences}{$key}{full_length};
+    return 0;
+}
+
+=head2 polyA
+
+ Title   :  polyA
+ Usage   :  $lucyObj->polyA($seqname)
+ Function:  returns the truth value for whether or not a poly-A tail was detected
+            and clipped by Lucy.  This method depends on the user having modified
+            the source for Lucy as outlined in DESCRIPTION and invoking Lucy with
+            the -cdna option and saving the standard error.
+            Note, the final sequence will not show the poly-A/T region.
+ Returns :  boolean
+ Args    :  name of a sequence
+
+=cut
+
+sub polyA {
+    my($self, $key) = @_;
+    return 1 if $self->{sequences}{$key}{polyA};
+    return 0;
+}
+
+=head2 get_rejects
+
+ Title   :  get_rejects
+ Usage   :  $lucyObj->get_rejects()
+ Function:  returns a hash containing names of rejects and a 1 letter code for the 
+ 	    reason Lucy rejected the sequence.
+	    Q- rejected because of low quality values
+	    S- sequence was short
+	    V- sequence was vector 
+	    E- sequence was empty
+	    P- poly-A/T trimming caused sequence to be too short
+	    In order to get the rejects, you must provide a file with the standard
+	    error from Lucy.  You will only get the quality category rejects unless
+	    you have modified the source and recompiled Lucy as outlined in DESCRIPTION.
+ Returns :  hash reference
+ Args    :  none
+
+=cut
+
+sub get_rejects {
+    my $self = shift;
+    return $self->{reject};
+}
+
+=head2 Diff for Lucy source code 
+
+  352a353,354
+  >       /* AGW added next line */
+  >       fprintf(stderr, "Empty: %s\n", seqs[i].name);
+  639a642,643
+  > 	    /* AGW added next line */
+  > 	    fprintf(stderr, "Short/ no insert: %s\n", seqs[i].name);
+  678c682,686
+  < 	if (left) seqs[i].left+=left;
+  ---
+  > 	if (left) {
+  > 	  seqs[i].left+=left;
+  > 	  /*  AGW added next line */
+  > 	  fprintf(stderr, "%s has PolyA (left).\n", seqs[i].name);
+  > 	}
+  681c689,693
+  < 	if (right) seqs[i].right-=right;
+  ---
+  > 	if (right) {
+  > 	  seqs[i].right-=right;
+  > 	  /* AGW added next line */
+  > 	  fprintf(stderr, "%s has PolyA (right).\n", seqs[i].name);
+  > 	}
+  682a695,696
+  > 	  /* AGW added next line */
+  > 	  fprintf(stderr, "Dropped PolyA: %s\n", seqs[i].name);	
+  734a749,750
+  > 	  /* AGW added next line */
+  > 	  fprintf(stderr, "Vector: %s\n", seqs[i].name);
+
+=cut
+
+=head2 This patch is to be applied to lucy.c from the lucy-1.19p release
+
+ 277a278,279
+ >       /* AGW added next line */
+ >       fprintf(stderr, "Short/ no insert: %s\n", seqs[i].name);
+ 588c590,592
+ <     if ((seqs[i].len=bases)<=0)
+ ---
+ >     if ((seqs[i].len=bases)<=0) {
+ >       /* AGW added next line */
+ >       fprintf(stderr, "Empty: %s\n", seqs[i].name);
+ 589a594
+ >     }
+ 893c898,902
+ <       if (left) seqs[i].left+=left;
+ ---
+ >       if (left) {
+ >         seqs[i].left+=left;
+ >         /*  AGW added next line */
+ >         fprintf(stderr, "%s has PolyA (left).\n", seqs[i].name);
+ >       }
+ 896c905,909
+ <       if (right) seqs[i].right-=right;
+ ---
+ >       if (right) {
+ >         seqs[i].right-=right;
+ >         /*  AGW added next line */
+ >         fprintf(stderr, "%s has PolyA (right).\n", seqs[i].name);
+ >         }
+ 898a912,913
+ >         /* AGW added next line */
+ >         fprintf(stderr, "Dropped PolyA: %s\n", seqs[i].name);
+ 949a965,966
+ >         /* AGW added next line */
+ >           fprintf(stderr, "Vector: %s\n", seqs[i].name);
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/MZEF.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/MZEF.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/MZEF.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,340 @@
+# $Id: MZEF.pm,v 1.14.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::MZEF
+#
+# Cared for by Hilmar Lapp <hlapp-at-gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::MZEF - Results of one MZEF run
+
+=head1 SYNOPSIS
+
+   $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
+   # filehandle:
+   $mzef = Bio::Tools::MZEF->new( -fh  => \*INPUT );
+   # to indicate that the sequence was reversed prior to feeding it to MZEF
+   # and that you want to have this reflected in the strand() attribute of 
+   # the exons, as well have the coordinates translated to the non-reversed
+   # sequence
+   $mzef = Bio::Tools::MZEF->new( -file   => 'result.mzef',
+                                  -strand => -1 );
+
+   # parse the results
+   # note: this class is-a Bio::Tools::AnalysisResult which implements
+   # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
+   while($gene = $mzef->next_prediction()) {
+       # $gene is an instance of Bio::Tools::Prediction::Gene
+
+       # $gene->exons() returns an array of 
+       # Bio::Tools::Prediction::Exon objects
+       # all exons:
+       @exon_arr = $gene->exons();
+
+       # internal exons only
+       @intrl_exons = $gene->exons('Internal');
+       # note that presently MZEF predicts only internal exons!
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # will stay open)
+   $mzef->close();
+
+=head1 DESCRIPTION
+
+The MZEF module provides a parser for MZEF gene structure prediction
+output.
+
+This module inherits off L<Bio::Tools::AnalysisResult> and therefore
+implements L<Bio::SeqAnalysisParserI>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::MZEF;
+use strict;
+
+use Bio::Tools::Prediction::Gene;
+use Bio::Tools::Prediction::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # first call the inherited method!
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    # handle our own parameters
+    my ($strand, $params) =
+	$self->_rearrange([qw(STRAND
+			      )],
+			  @args);
+
+    # our private state variables
+    $strand = 1 unless defined($strand);
+    $self->{'_strand'} = $strand;
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $mzef->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /mzef/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /mzef/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $mzef->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $mzef->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the MZEF result
+           file. Call this method repeatedly until FALSE is returned.
+
+           Note that with the present version of MZEF there will only be one
+           object returned, because MZEF does not predict individual genes
+           but just potential internal exons.
+ Example :
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    :
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my $gene;
+
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions() unless $self->_predictions_parsed();
+
+    # return the next gene structure (transcript)
+    return $self->_prediction();
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my ($method); # set but not used presently
+    my $exon_tag = "InternalExon";
+    my $gene;
+    # my $seqname; # name given in output is poorly formatted
+    my $seqlen;
+    my $prednr = 1;
+
+    while(defined($_ = $self->_readline())) {
+	if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
+	    # exon or signal
+	    if(! defined($gene)) {
+		$gene = Bio::Tools::Prediction::Gene->new(
+                                       '-primary' => "GenePrediction$prednr",
+				       '-source' => 'MZEF');
+	    }
+	    # we handle start-end first because may not be space delimited
+	    # for large numbers
+	    my ($start,$end) = ($1,$2);
+	    s/^\s*(\d+)\s*-\s*(\d+)\s+//;
+	    # split the rest into fields
+	    chomp();
+	    # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
+	    # index:              0   1   2   3   4   5   6   7
+	    my @flds = split(' ', $_);
+	    # create the feature object depending on the type of signal --
+	    # which is always an (internal) exon for MZEF
+	    my $predobj = Bio::Tools::Prediction::Exon->new();
+	    # set common fields
+	    $predobj->source_tag('MZEF');
+	    $predobj->significance($flds[0]);
+	    $predobj->score($flds[0]); # what shall we set as overall score?
+	    $predobj->strand($self->{'_strand'}); # MZEF searches only one
+	    if($predobj->strand() == 1) {
+		$predobj->start($start);
+		$predobj->end($end);
+	    } else {
+		$predobj->start($seqlen-$end+1);
+		$predobj->end($seqlen-$start+1);
+	    }
+	    # set scores
+	    $predobj->start_signal_score($flds[5]);
+	    $predobj->end_signal_score($flds[7]);
+	    $predobj->coding_signal_score($flds[6]);
+	    # frame -- we simply extract the one with highest score from the
+	    # orf field, and store the individual scores for now
+	    my $frm = index($flds[4], "1");
+	    $predobj->frame(($frm < 0) ? undef : $frm);
+	    $predobj->primary_tag($exon_tag);
+	    $predobj->is_coding(1);
+	    # add to gene structure (should be done only when start and end
+	    # are set, in order to allow for proper expansion of the range)
+	    $gene->add_exon($predobj);		
+	    next;
+	}
+	if(/^\s*Internal .*(MZEF)/) {
+	    $self->analysis_method($1);
+	    next;
+	}
+	if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
+	    # $seqname = $1; # this is too poor currently (file name truncated
+                             # to 10 chars) in order to be sensible enough
+	    $seqlen = $2;
+	    next;
+	}
+    }
+    # $gene->seq_id($seqname);
+    $self->_add_prediction($gene) if defined($gene);
+    $self->_predictions_parsed(1);
+}
+
+=head2 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+
+    return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
+    return shift(@{$self->{'_preds'}});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+
+    if(! exists($self->{'_preds'})) {
+	$self->{'_preds'} = [];
+    }
+    push(@{$self->{'_preds'}}, $gene);
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/OddCodes.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/OddCodes.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/OddCodes.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,452 @@
+#$Id: OddCodes.pm,v 1.19.4.1 2006/10/02 23:10:32 sendu Exp $
+#-----------------------------------------------------------------------------
+# PACKAGE    : OddCodes.pm
+# PURPOSE    : To write amino acid sequences in alternative alphabets
+# AUTHOR     : Derek Gatherer (D.Gatherer at organon.nhe.akzonobel.nl)
+# SOURCE     :
+# CREATED    : 8th July 2000
+# MODIFIED   :
+# DISCLAIMER : I am employed in the pharmaceutical industry but my
+#            : employers do not endorse or sponsor this module
+#            : in any way whatsoever.  The above email address is
+#            : given purely for the purpose of easy communication
+#            : with the author, and does not imply any connection
+#	     : between my employers and anything written below.
+# LICENCE    : You may distribute this module under the same terms
+#	     : as the rest of BioPerl.
+#----------------------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Tools::OddCodes - Object holding alternative alphabet coding for
+one protein sequence
+
+=head1 SYNOPSIS
+
+  # Take a sequence object from eg, an inputstream, and creates an
+  # object for the purposes of rewriting that sequence in another
+  # alphabet.  These are abbreviated amino acid sequence alphabets,
+  # designed to simplify the statistical aspects of analysing protein
+  # sequences, by reducing the combinatorial explosion of the
+  # 20-letter alphabet.  These abbreviated alphabets range in size
+  # from 2 to 8.
+
+  # Creating the OddCodes object, eg:
+
+	my $inputstream = Bio::SeqIO->new( '-file' => "seqfile",
+                                           '-format' => 'Fasta');
+	my $seqobj = $inputstream->next_seq();
+	my $oddcode_obj = Bio::Tools::Oddcodes->new(-seq => $seqobj);
+
+  # or:
+
+	my $seqobj = Bio::PrimarySeq->new
+              (-seq=>'[cut and paste a sequence here]',
+               -alphabet => 'protein',
+               -id => 'test');
+	my $oddcode_obj  =  Bio::Tools::OddCodes->new(-seq => $seqobj);
+
+  # do the alternative coding, returning the answer as a reference to
+  # a string
+
+	my $output = $oddcode_obj->structural();
+	my $output = $oddcode_obj->chemical();
+	my $output = $oddcode_obj->functional();
+	my $output = $oddcode_obj->charge();
+	my $output = $oddcode_obj->hydrophobic();
+	my $output = $oddcode_obj->Dayhoff();
+	my $output = $oddcode_obj->Sneath();
+	my $output = $oddcode_obj->Stanfel();
+
+
+  # display sequence in new form, eg:
+
+	my $new_coding = $$output;
+	print "\n$new_coding";
+
+=head1 DESCRIPTION
+
+Bio::Tools::Oddcodes is a welterweight object for rewriting a protein
+sequence in an alternative alphabet.  Eight of these are provided, ranging
+from the the 2-letter hydrophobic alphabet, to the 8-letter chemical
+alphabet.  These are useful for the statistical analysis of protein
+sequences since they can partially avoid the combinatorial explosion
+produced by the full 20-letter alphabet (eg. 400 dimers, 8000 trimers
+etc.)
+
+The objects will print out a warning if the input sequence is not a
+protein. If you know what you are doing, you can silence the warning
+by setting verbose() to a negative value.
+
+See SYNOPSIS above for object creation code.
+
+=head1 REFERENCES
+
+Stanfel LE (1996) A new approach to clustering the amino acids.  J. theor.
+Biol. 183, 195-205.
+
+Karlin S, Ost F and Blaisdell BE (1989)  Patterns in DNA and amino acid
+sequences and their statistical significance.  Chapter 6 of: Mathematical
+Methods for DNA Sequences.  Waterman MS (ed.)  CRC Press, Boca Raton , FL.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Derek Gatherer
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::OddCodes;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+sub new
+{
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+
+    my ($seqobj) = $self->_rearrange([qw(SEQ)], at args);
+    if((! defined($seqobj)) && @args && ref($args[0])) {
+	# parameter not passed as named parameter?
+	$seqobj = $args[0];
+    }
+    unless  ($seqobj->isa("Bio::PrimarySeqI"))
+    {
+        $self->throw("Bio::Tools::OddCodes only works on PrimarySeqI objects");
+    }
+
+    $self->{'_seqref'} = $seqobj;
+
+    return $self;
+}
+
+=head2 structural
+
+ Title   : structural
+ Usage   : $output = $oddcode_obj->structural();
+ Function: turns amino acid sequence into 3-letter structural alphabet
+	 : A (ambivalent), E (external), I (internal)
+ Example : a sequence ACDEFGH will become AAEEIAE
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub structural()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);	# see _pullseq() below
+
+# now the real business
+
+	$seqstring =~ tr/[ACGPSTWY]/1/;
+	$seqstring =~ tr/[RNDQEHK]/2/;
+	$seqstring =~ tr/[ILMFV]/3/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/E/;
+	$seqstring =~ tr/3/I/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 functional
+
+ Title   : functional
+ Usage   : $output = $oddcode_obj->functional();
+ Function: turns amino acid sequence into 4-letter functional alphabet
+	 : A (acidic), C (basic), H (hydrophobic), P (polar)
+ Example : a sequence ACDEFGH will become HPAAHHC
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub functional()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[HKR]/2/;
+	$seqstring =~ tr/[AFILMPVW]/3/;
+	$seqstring =~ tr/[CGNQSTY]/4/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/H/;
+	$seqstring =~ tr/4/P/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 hydrophobic
+
+ Title   : hydrophobic
+ Usage   : $output = $oddcode_obj->hydrophobic();
+ Function: turns amino acid sequence into 2-letter hydrophobicity alphabet
+	 : O (hydrophobic), I (hydrophilic)
+ Example : a sequence ACDEFGH will become OIIIOII
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub hydrophobic()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[AFILMPVW]/1/;
+	$seqstring =~ tr/[CDEGHKNQRSTY]/2/;
+	$seqstring =~ tr/1/I/;
+	$seqstring =~ tr/2/O/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Dayhoff
+
+ Title   : Dayhoff
+ Usage   : $output = $oddcode_obj->Dayhoff();
+ Function: turns amino acid sequence into 6-letter Dayhoff alphabet
+ Example : a sequence ACDEFGH will become CADDGCE
+         : A (=C),   C (=AGPST), D (=DENQ),
+         : E (=HKR), F (=ILMV),  G (=FWY)
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Dayhoff()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[C]/1/;
+	$seqstring =~ tr/[AGPST]/2/;
+	$seqstring =~ tr/[DENQ]/3/;
+	$seqstring =~ tr/[HKR]/4/;
+	$seqstring =~ tr/[ILMV]/5/;
+	$seqstring =~ tr/[FWY]/6/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+	$seqstring =~ tr/5/F/;
+	$seqstring =~ tr/6/G/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Sneath
+
+ Title   : Sneath
+ Usage   : $output = $oddcode_obj->Sneath();
+ Function: turns amino acid sequence into 7-letter Sneath alphabet
+ Example : a sequence ACDEFGH will become CEFFHCF
+         : A (=ILV), C (=AGP), D (=MNQ), E (=CST),
+         : F (=DE),  G (=KR),  H (=FHWY)
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Sneath()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[ILV]/1/;
+	$seqstring =~ tr/[AGP]/2/;
+	$seqstring =~ tr/[MNQ]/3/;
+	$seqstring =~ tr/[CST]/4/;
+	$seqstring =~ tr/[DE]/5/;
+	$seqstring =~ tr/[KR]/6/;
+	$seqstring =~ tr/[FHWY]/7/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+	$seqstring =~ tr/5/F/;
+	$seqstring =~ tr/6/G/;
+	$seqstring =~ tr/7/H/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 Stanfel
+
+ Title   : Stanfel
+ Usage   : $output = $oddcode_obj->Stanfel();
+ Function: turns amino acid sequence into 4-letter Stanfel alphabet
+ Example : a sequence ACDEFGH will become AACCDAE
+         : A (=ACGILMPSTV), C (=DENQ), D (=FWY), E (=HKR)
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub Stanfel()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[ACGILMPSTV]/1/;
+	$seqstring =~ tr/[DENQ]/2/;
+	$seqstring =~ tr/[FWY]/3/;
+	$seqstring =~ tr/[HKR]/4/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/D/;
+	$seqstring =~ tr/4/E/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 chemical
+
+ Title   : chemical
+ Usage   : $output = $oddcode_obj->chemical();
+ Function: turns amino acid sequence into 8-letter chemical alphabet
+	 : A (acidic), L (aliphatic), M (amide), R (aromatic)
+	 : C (basic),  H (hydroxyl),  I (imino), S (sulphur)
+ Example : a sequence ACDEFGH will become LSAARAC
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub chemical()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[AGILV]/2/;
+	$seqstring =~ tr/[NQ]/3/;
+	$seqstring =~ tr/[FWY]/4/;
+	$seqstring =~ tr/[RHK]/5/;
+	$seqstring =~ tr/[ST]/6/;
+	$seqstring =~ tr/P/7/;
+	$seqstring =~ tr/[CM]/8/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/L/;
+	$seqstring =~ tr/3/M/;
+	$seqstring =~ tr/4/R/;
+	$seqstring =~ tr/5/C/;
+	$seqstring =~ tr/6/H/;
+	$seqstring =~ tr/7/I/;
+	$seqstring =~ tr/8/S/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+=head2 charge
+
+ Title   : charge
+ Usage   : $output = $oddcode_obj->charge();
+ Function: turns amino acid sequence into 3-letter charge alphabet
+ Example : a sequence ACDEFGH will become NNAANNC
+         : A (negative; NOT anode), C (positive; NOT cathode), N (neutral)
+ Returns : Reference to the new sequence string
+ Args    : none
+
+=cut
+
+sub charge()
+{
+	my $self = $_[0];
+	my $seqstring = &_pullseq($self);
+
+# now the real business
+
+	$seqstring =~ tr/[DE]/1/;
+	$seqstring =~ tr/[HKR]/2/;
+	$seqstring =~ tr/[ACFGILMNPQSTVWY]/3/;
+	$seqstring =~ tr/1/A/;
+	$seqstring =~ tr/2/C/;
+	$seqstring =~ tr/3/N/;
+
+	return \$seqstring;
+
+# and that's that one
+}
+
+# _pullseq is called within each of the subroutines
+# it just checks a few things and returns the sequence
+
+sub _pullseq
+{
+	my $self = $_[0];
+
+	my $seqobj =  $self->{'_seqref'};
+
+	unless  ($seqobj->isa("Bio::PrimarySeqI"))
+	{
+		$self->throw("die, OddCodes works only on PrimarySeqI objects\n");
+    	}
+        $self->warn("\tAll OddCode alphabets need a protein sequence,\n".
+                    "\tbut BioPerl thinks this is not: [". $seqobj->id. "]")
+            unless $seqobj->alphabet eq 'protein' or $self->verbose < 0;;
+
+	my $seqstring = uc $seqobj->seq();
+
+	if(length($seqstring)<1)
+	{
+		$self->throw("$seqstring: die, sequence has zero length\n");
+	}
+	return $seqstring;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy/Result.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy/Result.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy/Result.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,343 @@
+# $Id: Result.pm,v 1.9.4.1 2006/10/02 23:10:34 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Phylo::Molphy::Result
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::Molphy::Result - container for data parsed from a ProtML run
+
+=head1 SYNOPSIS
+
+  # do not use this object directly, you will get it back as part of a 
+  # Molphy parser
+  use Bio::Tools::Phylo::Molphy;
+  my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml');
+  while( my $r = $parser->next_result ) {
+    # r is a Bio::Tools::Phylo::Molphy::Result object
+
+    # print the model name
+    print $r->model, "\n";
+
+    # get the substitution matrix
+    # this is a hash of 3letter aa codes -> 3letter aa codes representing
+    # substitution rate
+    my $smat = $r->substitution_matrix;
+    print "Arg -> Gln substitution rate is %d\n", 
+          $smat->{'Arg'}->{'Gln'}, "\n";
+
+    # get the transition probablity matrix
+    # this is a hash of 3letter aa codes -> 3letter aa codes representing
+    # transition probabilty
+    my $tmat = $r->transition_probability_matrix;
+    print "Arg -> Gln transition probablity is %.2f\n", 
+          $tmat->{'Arg'}->{'Gln'}, "\n";
+
+    # get the frequency for each of the residues
+    my $rfreqs = $r->residue_frequencies;
+
+    foreach my $residue ( keys %{$rfreqs} ) {
+       printf "residue %s  expected freq: %.2f observed freq: %.2f\n",
+              $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
+    }
+
+    my @trees;
+    while( my $t = $r->next_tree ) {
+        push @trees, $t;
+    }
+
+    print "search space is ", $r->search_space, "\n",
+          "1st tree score is ", $trees[0]->score, "\n";
+
+    # writing to STDOUT, use -file => '>filename' to specify a file
+    my $out = new Bio::TreeIO(-format => "newick");
+    $out->write_tree($trees[0]); # writing only the 1st tree
+  }
+
+
+=head1 DESCRIPTION
+
+A container for data parsed from a ProtML run.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::Molphy::Result;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Phylo::Molphy::Result();
+ Function: Builds a new Bio::Tools::Phylo::Molphy::Result object 
+ Returns : Bio::Tools::Phylo::Molphy::Result
+ Args    : 
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($trees,$smat,$freq,
+      $model, $sspace,
+      ) = $self->_rearrange([qw(TREES SUBSTITUTION_MATRIX
+				FREQUENCIES
+				MODEL SEARCH_SPACE)], @args);
+
+  if( $trees ) {
+      if(ref($trees) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize trees");
+      } else {
+	  foreach my $t ( @$trees ) {
+	      $self->add_tree($t);
+	  }
+      }
+  }
+  # initialize things through object methods to be a good 
+  # little OO programmer
+  if( ref($smat) =~ /HASH/i ) {
+      $self->substitution_matrix($smat);
+  }
+  if( ref($freq) =~ /HASH/i ) {
+      $self->residue_frequencies($freq);
+  }
+  
+  $model && $self->model($model); 
+  $sspace && $self->search_space($sspace);
+  $self->{'_treeiterator'} = 0;
+
+  return $self;
+}
+
+=head2 model
+
+ Title   : model
+ Usage   : $obj->model($newval)
+ Function: 
+ Returns : value of model
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub model{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'model'} = $value;
+    }
+    return $self->{'model'};
+
+}
+
+=head2 substitution_matrix
+
+ Title   : substitution_matrix
+ Usage   : my $smat = $result->subsitution_matrix;
+ Function: Get the relative substitution matrix calculated in the ML procedure
+ Returns : reference to hash of hashes where key is the aa/nt name and value
+           is another hash ref which contains keys for all the aa/nt 
+           possibilities
+ Args    : none
+
+
+=cut
+
+sub substitution_matrix{
+   my ($self,$val) = @_;
+   if(defined $val ) { 
+       if( ref($val) =~ /HASH/ ) {
+	   foreach my $v (values %{$val} ) {
+	       if( ref($v) !~ /HASH/i ) { 
+		   $self->warn("Must be a valid hashref of hashrefs for substition_matrix");
+		   return;
+	       }
+	   }
+	   $self->{'_substitution_matrix'} = $val;
+       } else { 
+	   $self->warn("Must be a valid hashref of hashrefs for substition_matrix");
+	   return;
+       }
+   }
+   return $self->{'_substitution_matrix'};
+}
+
+=head2 transition_probability_matrix
+
+ Title   : transition_probability_matrix
+ Usage   : my $matrixref = $molphy->transition_probablity_matrix();
+ Function: Gets the observed transition probability matrix
+ Returns : hash of hashes of aa/nt transition to each other aa/nt 
+ Args    : Transition matrix type, typically
+           '1PAM-1.0e05' or '1PAM-1.0e07'
+
+
+=cut
+
+sub transition_probability_matrix {
+   my ($self,$type,$val) = @_;
+   $type = '1PAM-1.0e7' unless defined $type;
+   if(defined $val ) { 
+       if( ref($val) =~ /HASH/ ) {
+	   foreach my $v (values %{$val} ) {
+	       if( ref($v) !~ /HASH/i ) { 
+		   $self->warn("Must be a valid hashref of hashrefs for transition_probability_matrix");
+		   return;
+	       }
+	   } 
+	   $self->{'_TPM'}->{$type} = $val;
+       } else { 
+	   $self->warn("Must be a valid hashref of hashrefs for transition_probablity_matrix");
+	   return;
+       }
+   }
+
+   # fix this for nucml where there are 2 values (one is just a transformation
+   # of the either, but how to represent?)
+   return $self->{'_TPM'}->{$type};
+}
+
+=head2 residue_frequencies
+
+ Title   : residue_frequencies
+ Usage   : my %data = $molphy->residue_frequencies()
+ Function: Get the modeled and expected frequencies for
+           each of the residues in the sequence
+ Returns : hash of either aa (protml) or nt (nucml) frequencies
+           each key will point to an array reference where
+           1st slot is model's expected frequency
+           2nd slot is observed frequency in the data
+           $hash{'A'}->[0] = 
+ Args    : none
+
+
+=cut
+
+#'
+
+sub residue_frequencies {
+   my ($self,$val) = @_;
+   if(defined $val ) { 
+       if( ref($val) =~ /HASH/ ) {
+	   $self->{'_residue_frequencies'} = $val;
+       } else { 
+	   $self->warn("Must be a valid hashref of hashrefs for residue_frequencies");
+       }
+   }
+   return %{$self->{'_residue_frequencies'}};
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree;
+ Function: Get the next tree from the factory
+ Returns : L<Bio::Tree::TreeI>
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self, at args) = @_;
+   return $self->{'_trees'}->[$self->{'_treeiterator'}++] || undef;
+}
+
+=head2 rewind_tree
+
+ Title   : rewind_tree_iterator
+ Usage   : $result->rewind_tree()
+ Function: Rewinds the tree iterator so that next_tree can be 
+           called again from the beginning
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind_tree_iterator {
+    shift->{'_treeiterator'} = 0;
+}
+
+=head2 add_tree
+
+ Title   : add_tree
+ Usage   : $result->add_tree($tree);
+ Function: Adds a tree 
+ Returns : integer which is the number of trees stored
+ Args    : L<Bio::Tree::TreeI>
+
+=cut
+
+sub add_tree{
+   my ($self,$tree) = @_;
+   if( $tree && ref($tree) && $tree->isa('Bio::Tree::TreeI') ) {
+       push @{$self->{'_trees'}},$tree;
+   }
+   return scalar @{$self->{'_trees'}};
+}
+
+=head2 search_space
+
+ Title   : search_space
+ Usage   : $obj->search_space($newval)
+ Function: 
+ Returns : value of search_space
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub search_space{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'search_space'} = $value;
+    }
+    return $self->{'search_space'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Molphy.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,296 @@
+# $Id: Molphy.pm,v 1.9.4.1 2006/10/02 23:10:33 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Phylo::Molphy
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::Molphy - parser for Molphy output
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Phylo::Molphy;
+  my $parser = new Bio::Tools::Phylo::Molphy(-file => 'output.protml');
+  while( my $r = $parser->next_result ) {
+    # r is a Bio::Tools::Phylo::Molphy::Result object
+
+    # print the model name
+    print $r->model, "\n";
+
+    # get the substitution matrix
+    # this is a hash of 3letter aa codes -> 3letter aa codes representing
+    # substitution rate
+    my $smat = $r->substitution_matrix;
+    print "Arg -> Gln substitution rate is %d\n", 
+          $smat->{'Arg'}->{'Gln'}, "\n";
+
+    # get the transition probablity matrix
+    # this is a hash of 3letter aa codes -> 3letter aa codes representing
+    # transition probabilty
+    my $tmat = $r->transition_probability_matrix;
+    print "Arg -> Gln transition probablity is %.2f\n", 
+          $tmat->{'Arg'}->{'Gln'}, "\n";
+
+    # get the frequency for each of the residues
+    my $rfreqs = $r->residue_frequencies;
+
+    foreach my $residue ( keys %{$rfreqs} ) {
+       printf "residue %s  expected freq: %.2f observed freq: %.2f\n",
+              $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];     
+    }
+
+    my @trees;
+    while( my $t = $r->next_tree ) {
+        push @trees, $t;
+    }
+
+    print "search space is ", $r->search_space, "\n",
+          "1st tree score is ", $trees[0]->score, "\n";
+
+    # writing to STDOUT, use -file => '>filename' to specify a file
+    my $out = new Bio::TreeIO(-format => "newick");
+    $out->write_tree($trees[0]); # writing only the 1st tree
+  }
+
+=head1 DESCRIPTION
+
+A parser for Molphy output (protml,dnaml)
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::Molphy;
+use strict;
+
+use Bio::Tools::Phylo::Molphy::Result;
+use Bio::TreeIO;
+use IO::String;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Phylo::Molphy();
+ Function: Builds a new Bio::Tools::Phylo::Molphy object 
+ Returns : Bio::Tools::Phylo::Molphy
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+
+  return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $r = $molphy->next_result
+ Function: Get the next result set from parser data
+ Returns : Bio::Tools::Phylo::Molphy::Result object
+ Args    : none
+
+
+=cut
+
+sub next_result{
+   my ($self) = @_;
+
+   # A little statemachine for the parser here
+   my ($state,$transition_ct,
+       @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
+   my ( %subst_matrix, @treelines, @treedata, %frequencies);
+   my ( $treenum,$possible_trees, $model);
+   my ($trans_type,$trans_amount);
+   my $parsed = 0;
+   while( defined ( $_ = $self->_readline()) ) {
+       $parsed = 1;
+       if( /^Relative Substitution Rate Matrix/ ) {
+	   if( %subst_matrix ) { 
+	       $self->_pushback($_);
+	       last;
+	   }
+	   $state = 0;
+	   my ( @tempdata);
+	   @resloc = ();
+	   while( defined ($_ = $self->_readline) ) {
+	       last if (/^\s+$/);
+	       # remove leading/trailing spaces
+	       s/^\s+//;
+	       s/\s+$//;
+	       my @data = split;
+	       my $i = 0;
+	       for my $l ( @data ) {
+		   if( $l =~ /\D+/ ) { 
+		       push @resloc, $l;
+		   }
+		   $i++;
+	       }
+	       push @tempdata, \@data;
+	   }
+	   my $i = 0;
+	   for my $row ( @tempdata ) {
+	       my $j = 0;
+	       for my $col ( @$row ) {
+		   if( $i == $j ) {
+		       # empty string for diagonals
+		       $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
+		   } else {
+		       $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
+		   }
+		   $j++;
+	       }
+	       $i++;
+	   }
+       } elsif( /^Transition Probability Matrix/ ) {	   
+	   if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
+	       $state = 1;
+	       my $newtrans_type = "$3-$1";
+	       $trans_amount = $1;
+	       if( defined $trans_type ) {
+		   # finish processing the transition_matrix
+		   my $i =0;
+		   foreach my $row ( @transition_matrix ) {
+		       my $j = 0;
+		       foreach my $col ( @$row ) {
+			   $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
+			   $j++;
+		       }
+		       $i++;
+		   }
+	       }
+	       $trans_type = $newtrans_type;
+	       $transition_ct = 0;
+	       @transition_matrix = ();
+	   }
+       } elsif ( /Acid Frequencies/ ) {
+	   $state = 0;
+	   $self->_readline(); # skip the next line
+	   while( defined( $_ = $self->_readline) ) {
+	       unless( /^\s+/) {
+		   $self->_pushback($_);
+		   last;
+	       }
+	       s/^\s+//;
+	       s/\s+$//;
+	       my ($index,$res,$model,$data) = split;
+	       $frequencies{$res} = [ $model,$data];
+	   }
+       } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
+	   my @save = ($1,$2,$3);	   
+	   # finish processing the transition_matrix
+	   my $i =0;
+	   foreach my $row ( @transition_matrix ) {
+	       my $j = 0;
+	       foreach my $col ( @$row ) {
+		   $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
+		   $j++;
+	       }
+	       $i++;
+	   }	   
+	   if( defined $treenum ) { 	       
+	       $self->_pushback($_);
+	       last;
+	   }
+	   
+	   $state = 2;	   
+	   ($treenum,$possible_trees, $model) = @save;
+	   $model =~ s/\s+/ /g;
+       } elsif( $state == 1 ) {
+	   next if( /^\s+$/ || /^\s+Ala/);
+	   s/^\s+//;
+	   s/\s+$//;
+	   if( $trans_type eq '1PAM-1.0e7' ) {
+	       # because the matrix is split up into 2-10 column sets 
+	       push @{$transition_matrix[$transition_ct++]}, split ;	   
+	       $transition_ct = 0 if $transition_ct % 20 == 0;
+	   } elsif( $trans_type eq '1PAM-1.0e5' ) {
+	       # because the matrix is split up into 2-10 column sets 
+	       my ($res, at row) = split;
+	       next if $transition_ct >= 20; # skip last 
+	       push @{$transition_matrix[$transition_ct++]}, @row;	   	       
+	   }
+       } elsif( $state == 2 ) {
+	   if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
+	       push @treedata, [ $1,$2];
+	   }
+	   # save this for the end so that we can 
+	   # be efficient and only open one tree parser
+	   push @treelines, $_;
+       }
+   }
+   # waiting till the end to do this, is it better
+   my @trees;
+   if( @treelines ) {
+       my $strdat = IO::String->new(join('', at treelines));
+       my $treeio = new Bio::TreeIO(-fh => $strdat,
+				    -format => 'newick');
+       while( my $tree = $treeio->next_tree ) {
+	   if( @treedata ) {
+	       my $dat = shift @treedata;
+	       # set the associated information
+	       $tree->id($dat->[0]);
+	       $tree->score($dat->[1]);
+	   }
+	   push @trees, $tree;
+       }
+   }
+   return unless( $parsed );
+   my $result = new Bio::Tools::Phylo::Molphy::Result
+       (-trees => \@trees,
+	-substitution_matrix => \%subst_matrix,
+	-frequencies         => \%frequencies,
+	-model               => $model,
+	-search_space        => $possible_trees,
+	);
+   while( my ($type,$mat) = each %transition_mat ) {
+       $result->transition_probability_matrix( $type,$mat);
+   }
+   $result;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/ModelResult.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/ModelResult.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/ModelResult.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,552 @@
+# $Id: ModelResult.pm,v 1.7.4.1 2006/10/02 23:10:34 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Phylo::PAML::ModelResult
+#
+# Cared for by Jason Stajich <jason at open-bio.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::PAML::ModelResult - A container for NSSite Model Result from PAML 
+
+=head1 SYNOPSIS
+
+  # get a ModelResult from a PAML::Result object
+  use Bio::Tools::Phylo::PAML;
+  my $paml = new Bio::Tools::Phylo::PAML(-file => 'mlc');
+  my $result = $paml->next_result;
+  foreach my $model ( $result->get_model_results ) {
+    print $model->model_num, " ", $model->model_description, "\n";
+    print $model->kappa, "\n";
+    print $model->run_time, "\n";
+# if you are using PAML < 3.15 then only one place for POS sites
+   for my $sites ( $model->get_pos_selected_sites ) {
+    print join("\t",@$sites),"\n";
+   }
+# otherwise query NEB and BEB slots
+   for my $sites ( $model->get_NEB_pos_selected_sites ) {
+     print join("\t",@$sites),"\n";
+   }
+
+   for my $sites ( $model->get_BEB_pos_selected_sites ) {
+    print join("\t",@$sites),"\n";
+   }
+
+  }
+
+=head1 DESCRIPTION
+
+Describe the object here
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at open-bio.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::PAML::ModelResult;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Phylo::PAML::ModelResult();
+ Function: Builds  a new Bio::Tools::Phylo::PAML::ModelResult object 
+ Returns : an instance of Bio::Tools::Phylo::PAML::ModelResult
+ Args    : -model_num           => model number
+           -model_description   => model description
+           -kappa               => value of kappa
+           -time_used           => amount of time
+           -pos_sites           => arrayref of sites under positive selection
+           -neb_sites           => arrayref of sites under positive selection (by NEB analysis)
+           -beb_sites           => arrayref of sites under positive selection (by BEB analysis)
+           -trees               => arrayref of tree(s) data for this model
+           -shape_params        => hashref of parameters 
+                                   ('shape' => 'alpha',
+				    'gamma' => $g, 
+				    'r' => $r, 
+				    'f' => $f 
+				    )
+                                    OR
+				    ( 'shape' => 'beta',
+				      'p' => $p, 
+				      'q' => $q
+				     )
+           -likelihood          => likelihood
+           -num_site_classes    => number of site classes
+           -dnds_site_classes   => hashref with two keys, 'p' and 'w'
+                                   which each point to an array, each
+                                   slot is for a different site class.
+                                   'w' is for dN/dS and 'p' is probability
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($modelnum,$modeldesc,$kappa,
+      $timeused,$trees,
+      $pos_sites,$neb_sites,$beb_sites,
+      $num_site_classes, $shape_params,
+      $dnds_classes,
+      $likelihood) =          $self->_rearrange([qw(MODEL_NUM 
+						      MODEL_DESCRIPTION
+						      KAPPA
+						      TIME_USED
+						      TREES
+						      POS_SITES
+                                                      NEB_SITES BEB_SITES
+						      NUM_SITE_CLASSES
+						      SHAPE_PARAMS
+						      DNDS_SITE_CLASSES
+						      LIKELIHOOD)],
+						 @args);
+  if( $trees ) {
+      if(ref($trees) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize trees");
+      } else { 
+	  foreach my $t ( @$trees ) {
+	      $self->add_tree($t);
+	  }
+      }
+  }
+  $self->{'_treeiterator'} = 0;
+  if( $pos_sites ) {
+      if(ref($pos_sites) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize pos_sites");
+      } else { 
+	  foreach my $s ( @$pos_sites ) {
+	      if( ref($s) !~ /ARRAY/i ) {
+		  $self->warn("need an array ref for each entry in the pos_sites object");
+		  next;
+	      }
+	      $self->add_pos_selected_site(@$s);
+	  }
+      }
+  }
+  if( $beb_sites ) {
+    if(ref($beb_sites) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize beb_sites");
+      } else { 
+	  foreach my $s ( @$beb_sites ) {
+	      if( ref($s) !~ /ARRAY/i ) {
+		  $self->warn("need an array ref for each entry in the beb_sites object");
+		  next;
+	      }
+	      $self->add_BEB_pos_selected_site(@$s);
+	  }
+      }
+  }
+  if( $neb_sites ) {
+    if(ref($neb_sites) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize neb_sites");
+      } else { 
+	  foreach my $s ( @$neb_sites ) {
+	      if( ref($s) !~ /ARRAY/i ) {
+		  $self->warn("need an array ref for each entry in the neb_sites object");
+		  next;
+	      }
+	      $self->add_NEB_pos_selected_site(@$s);
+	  }
+      }
+  }
+
+  defined $modelnum  && $self->model_num($modelnum);
+  defined $modeldesc && $self->model_description($modeldesc);
+  defined $kappa     && $self->kappa($kappa);
+  defined $timeused  && $self->time_used($timeused);
+  defined $likelihood  && $self->likelihood($likelihood);
+
+  $self->num_site_classes($num_site_classes || 0);
+  if( defined $dnds_classes ) {
+      if( ref($dnds_classes) !~ /HASH/i || 
+	  ! defined $dnds_classes->{'p'} || 
+	  ! defined $dnds_classes->{'w'} ) {
+	  $self->warn("-dnds_site_classes expects a hashref with keys p and w");
+      } else {
+	  $self->dnds_site_classes($dnds_classes);
+      }
+  }
+  if( defined $shape_params ) {
+      if( ref($shape_params) !~ /HASH/i ) {
+	  $self->warn("-shape_params expects a hashref not $shape_params\n");
+      } else {
+	  $self->shape_params($shape_params);
+      }
+  }
+  return $self;
+}
+
+
+=head2 model_num
+
+ Title   : model_num
+ Usage   : $obj->model_num($newval)
+ Function: Get/Set the Model number (0,1,2,3...)
+ Returns : value of model_num (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub model_num {
+    my $self = shift;
+    return $self->{'_num'} = shift if @_;
+    return $self->{'_num'};
+}
+
+=head2 model_description
+
+ Title   : model_description
+ Usage   : $obj->model_description($newval)
+ Function: Get/Set the model description
+           This is something like 'one-ratio', 'neutral', 'selection'
+ Returns : value of description (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub model_description{
+    my $self = shift;
+    return $self->{'_model_description'} = shift if @_;
+    return $self->{'_model_description'};
+}
+
+=head2 time_used
+
+ Title   : time_used
+ Usage   : $obj->time_used($newval)
+ Function: Get/Set the time it took to run this analysis
+ Returns : value of time_used (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub time_used{
+    my $self = shift;
+    return $self->{'_time_used'} = shift if @_;
+    return $self->{'_time_used'};
+}
+
+=head2 kappa
+
+ Title   : kappa
+ Usage   : $obj->kappa($newval)
+ Function: Get/Set kappa (ts/tv)
+ Returns : value of kappa (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub kappa{
+    my $self = shift;
+    return $self->{'_kappa'} = shift if @_;
+    return $self->{'_kappa'};
+}
+
+=head2 num_site_classes
+
+ Title   : num_site_classes
+ Usage   : $obj->num_site_classes($newval)
+ Function: Get/Set the number of site classes for this model
+ Returns : value of num_site_classes (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub num_site_classes{
+    my $self = shift;
+    return $self->{'_num_site_classes'} = shift if @_;
+    return $self->{'_num_site_classes'};
+}
+
+=head2 dnds_site_classes
+
+ Title   : dnds_site_classes
+ Usage   : $obj->dnds_site_classes($newval)
+ Function: Get/Set dN/dS site classes, a hashref
+           with 2 keys, 'p' and 'w' which point to arrays
+           one slot for each site class.
+ Returns : value of dnds_site_classes (a hashref)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub dnds_site_classes{
+    my $self = shift;
+    return $self->{'_dnds_site_classes'} = shift if @_;
+    return $self->{'_dnds_site_classes'};
+}
+
+=head2 get_pos_selected_sites
+
+ Title   : get_pos_selected_sites
+ Usage   : my @sites = $modelresult->get_pos_selected_sites();
+ Function: Get the sites which PAML has identified as under positive
+           selection (w > 1).  This returns an array with each slot
+           being a site, 4 values, 
+           site location (in the original alignment)
+           Amino acid    (I *think* in the first sequence)
+           P             (P value)
+           Significance  (** indicated > 99%, * indicates >=95%)
+ Returns : Array
+ Args    : none
+
+
+=cut
+
+sub get_pos_selected_sites{
+   return @{$_[0]->{'_posselsites'} || []};
+}
+
+=head2 add_pos_selected_site
+
+ Title   : add_pos_selected_site
+ Usage   : $result->add_pos_selected_site($site,$aa,$pvalue,$signif);
+ Function: Add a site to the list of positively selected sites
+ Returns : count of the number of sites stored
+ Args    : $site   - site number (in the alignment)
+           $aa     - amino acid under selection 
+           $pvalue - float from 0->1 represent probability site is under selection according to this model
+           $signif - significance (coded as either empty, '*', or '**'
+
+=cut
+
+sub add_pos_selected_site{
+   my ($self,$site,$aa,$pvalue,$signif) = @_;
+   push @{$self->{'_posselsites'}}, [ $site,$aa,$pvalue,$signif ];
+   return scalar @{$self->{'_posselsites'}};
+}
+
+=head2 get_NEB_pos_selected_sites
+
+ Title   : get_NEB_pos_selected_sites
+ Usage   : my @sites = $modelresult->get_NEB_pos_selected_sites();
+ Function: Get the sites which PAML has identified as under positive
+           selection (w > 1) using Naive Empirical Bayes.  
+           This returns an array with each slot being a site, 4 values, 
+           site location (in the original alignment)
+           Amino acid    (I *think* in the first sequence)
+           P             (P value)
+           Significance  (** indicated > 99%, * indicates > 95%)
+           post mean for w
+ Returns : Array
+ Args    : none
+
+
+=cut
+
+sub get_NEB_pos_selected_sites{
+   return @{$_[0]->{'_NEBposselsites'} || []};
+}
+
+=head2 add_NEB_pos_selected_site
+
+ Title   : add_NEB_pos_selected_site
+ Usage   : $result->add_NEB_pos_selected_site($site,$aa,$pvalue,$signif);
+ Function: Add a site to the list of positively selected sites
+ Returns : count of the number of sites stored
+ Args    : $site   - site number (in the alignment)
+           $aa     - amino acid under selection 
+           $pvalue - float from 0->1 represent probability site is under selection according to this model
+           $signif - significance (coded as either empty, '*', or '**'
+           $postmean - post mean for w
+
+=cut
+
+sub add_NEB_pos_selected_site{
+   my ($self, at args) = @_;
+   push @{$self->{'_NEBposselsites'}}, [ @args ];
+   return scalar @{$self->{'_NEBposselsites'}};
+}
+
+
+
+=head2 get_BEB_pos_selected_sites
+
+ Title   : get_BEB_pos_selected_sites
+ Usage   : my @sites = $modelresult->get_BEB_pos_selected_sites();
+ Function: Get the sites which PAML has identified as under positive
+           selection (w > 1) using Bayes Empirical Bayes.  
+           This returns an array with each slot being a site, 6 values, 
+           site location (in the original alignment)
+           Amino acid    (I *think* in the first sequence)
+           P             (P value)
+           Significance  (** indicated > 99%, * indicates > 95%)
+           post mean for w (mean)
+           Standard Error for w (SE)
+ Returns : Array
+ Args    : none
+
+=cut
+
+sub get_BEB_pos_selected_sites{
+   return @{$_[0]->{'_BEBposselsites'} || []};
+}
+
+=head2 add_BEB_pos_selected_site
+
+ Title   : add_BEB_pos_selected_site
+ Usage   : $result->add_BEB_pos_selected_site($site,$aa,$pvalue,$signif);
+ Function: Add a site to the list of positively selected sites
+ Returns : count of the number of sites stored
+ Args    : $site   - site number (in the alignment)
+           $aa     - amino acid under selection 
+           $pvalue - float from 0->1 represent probability site is under selection according to this model
+           $signif - significance (coded as either empty, '*', or '**'
+           $postmean - post mean for w
+           $SE       - Standard Error for w
+
+=cut
+
+sub add_BEB_pos_selected_site{
+   my ($self, at args) = @_;
+   push @{$self->{'_BEBposselsites'}}, [ @args ];
+   return scalar @{$self->{'_BEBposselsites'}};
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree;
+ Function: Get the next tree from the factory
+ Returns : L<Bio::Tree::TreeI>
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self, at args) = @_;
+   return $self->{'_trees'}->[$self->{'_treeiterator'}++] || undef;
+}
+
+=head2 get_trees
+
+ Title   : get_trees
+ Usage   : my @trees = $result->get_trees;
+ Function: Get all the parsed trees as an array
+ Returns : Array of trees
+ Args    : none
+
+
+=cut
+
+sub get_trees{
+   my ($self) = @_;
+   return @{$self->{'_trees'} || []};
+}
+
+=head2 rewind_tree
+
+ Title   : rewind_tree_iterator
+ Usage   : $result->rewind_tree()
+ Function: Rewinds the tree iterator so that next_tree can be 
+           called again from the beginning
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind_tree_iterator {
+    shift->{'_treeiterator'} = 0;
+}
+
+=head2 add_tree
+
+ Title   : add_tree
+ Usage   : $result->add_tree($tree);
+ Function: Adds a tree 
+ Returns : integer which is the number of trees stored
+ Args    : L<Bio::Tree::TreeI>
+
+=cut
+
+sub add_tree{
+   my ($self,$tree) = @_;
+   if( $tree && ref($tree) && $tree->isa('Bio::Tree::TreeI') ) {
+       push @{$self->{'_trees'}},$tree;
+   }
+   return scalar @{$self->{'_trees'}};
+}
+
+=head2 shape_params
+
+ Title   : shape_params
+ Usage   : $obj->shape_params($newval)
+ Function: Get/Set shape params for the distribution, 'alpha', 'beta'
+           which is a hashref 
+           with 1 keys, 'p' and 'q' 
+ Returns : value of shape_params (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub shape_params{
+    my $self = shift;
+    return $self->{'_shape_params'} = shift if @_;
+    return $self->{'_shape_params'};
+}
+
+=head2 likelihood
+
+ Title   : likelihood
+ Usage   : $obj->likelihood($newval)
+ Function: log likelihood
+ Returns : value of likelihood (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub likelihood{
+    my $self = shift;
+    return $self->{'likelihood'} = shift if @_;
+    return $self->{'likelihood'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/Result.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/Result.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML/Result.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1226 @@
+# $Id: Result.pm,v 1.22.4.1 2006/10/02 23:10:34 sendu Exp $ 
+#
+# BioPerl module for Bio::Tools::Phylo::PAML::Result
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich, Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::PAML::Result - A PAML result set object
+
+=head1 SYNOPSIS
+
+  # see Bio::Tools::Phylo::PAML for example usage
+  use Bio::Tools::Phylo::PAML;
+  my $parser = new Bio::Tools::Phylo::PAML
+    (-file => "./results/mlc", -dir => "./results/");
+
+  # get the first/next result; a Bio::Tools::Phylo::PAML::Result object,
+  # which isa Bio::SeqAnalysisResultI object.
+  my $result = $parser->next_result();
+
+  my @seqs         = $result->get_seqs;
+  my %input_params = $result->get_input_parameters;
+  my @basfreq      = $result->get_codon_pos_basefreq;
+  my $MLmatrix     = $result->get_MLmatrix; # get MaxLikelihood Matrix
+  my $NGmatrix     = $result->get_NGmatrix; # get Nei-Gojoburi Matrix
+
+
+  # for AAML runs
+  my $AAmatrix   = $result->get_AADistMatrix;
+  my $AAMLmatrix   = $result->get_AAMLDistMatrix;
+
+  # if -dir contains an rst file get list of
+  # Bio::PrimarySeq ancestral state reconstructions of the sequences
+  my @rsts          = $result->get_rst_seqs; 
+
+
+  # if you want to print the changes on the tree
+  # this will print out the 
+  # anc_aa       => ANCESTRAL AMINO ACID
+  # anc_prob     => ANCESTRAL AA PROBABILITY 
+  # derived_aa   => DERIVED AA
+  # derived_prob => DERIVE AA PROBABILITY (where appropriate - NA for extant/tip taxas)
+  # site         => which codon site this in the alignment
+    @trees = $result->get_rst_trees;
+    for my $t ( @trees ) {
+	for my $node ( $t->get_nodes ) {	
+	    next unless $node->ancestor; # skip root node
+	    my @changes = $node->get_tag_values('changes');
+	    my $chgstr = '';
+	    for my $c ( @changes ) { 
+		for my $k ( sort keys %$c ) {
+		    $chgstr .= "$k => $c->{$k} ";
+		}
+		$chgstr .= "\n\t";
+	    }
+
+	    printf "node:%s n=%s s=%s\n\t%s\n",
+	    $node->id, 
+	    $node->get_tag_values('n'),
+	    $node->get_tag_values('s'),
+	    $chgstr;
+	}
+    }
+
+  # Persite probabilities
+  my $persite = $result->get_rst_persite;
+  # let's score site 1
+  $site = $persite->[2]; 
+  # so site 2, node 2 (extant node, node 2)
+  print $site->[2]->{'codon'}, ' ',$site->[2]->{'aa'},"\n";
+  # site 2, node 3
+  print $site->[3]->{'codon'}, ' ',$site->[3]->{'aa'}, "\n";
+
+  # ancestral node 9, codon, aa, marginal probabilities; Yang95 is listed as 
+  #  (eqn. 4 in Yang et al. 1995 Genetics 141:1641-1650) in PAML rst file.
+  print $site->[9]->{'codon'}, ' ',$site->[9]->{'aa'}, ' ', $site->[9]->{'prob'}, ' ',
+        $site->[9]->{'Yang95_aa'},' ', $site->[9]->{'Yang95_aa_prob'},"\n";
+
+
+=head1 DESCRIPTION
+
+This is a container object for PAML Results.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Aaron Mackey
+
+ Email jason-at-bioperl-dot-org
+ Email amackey-at-virginia-dot-edu
+
+=head1 CONTRIBUTORS
+
+Albert Vilella avilella-AT-gmail-DOT-com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::PAML::Result;
+use strict;
+
+
+use base qw(Bio::Root::Root Bio::AnalysisResultI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = Bio::Tools::Phylo::PAML::Result->new(%data);
+ Function: Builds a new Bio::Tools::Phylo::PAML::Result object
+ Returns : Bio::Tools::Phylo::PAML::Result
+ Args    : -trees     => array reference of Bio::Tree::TreeI objects
+           -MLmatrix  => ML matrix
+           -seqs      => array reference of Bio::PrimarySeqI objects
+           -codonpos  => array reference of codon positions 
+           -codonfreq => array reference of codon frequencies
+           -version   => version string
+           -model     => model string
+           -patterns  => hashref with the fields '-patterns', '-ns', '-ls'
+           -stats     => array ref of misc stats   (optional)
+           -aafreq    => Hashref of AA frequencies (only for AAML)
+           -aadistmat => Bio::Matrix::PhylipDist   (only for AAML)
+           -aamldistmat => Bio::Matrix::PhylipDist   (only for pairwise AAML)
+           -ntfreq    => array ref of NT frequencies (only for BASEML)
+           -seqfile    => seqfile used
+           -kappa_mat => Bio::Matrix::PhylipDist of kappa values (only for BASEML)
+           -alpha_mat => Bio::Matrix::PhylipDist of alpha values (only for BASEML)
+           -NSSitesresult => arrayref of PAML::ModelResult 
+           -input_params  => input params from .ctl file 
+           -rst       => array reference of Bio::PrimarySeqI objects
+                         of ancestral state reconstruction
+           -rst_persite=> arrayref of persite data, this is a complicated set of AoH
+           -rst_trees  => rst trees with changes coded on the tree
+
+See Also: L<Bio::Tree::TreeI>, L<Bio::PrimarySeqI>, L<Bio::Matrix::PhylipDist>, L<Bio::Tools::Phylo::PAML>
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($trees,$mlmat,$seqs,$ngmatrix,
+      $codonpos,$codonfreq,$version,
+      $model,$patterns, $stats,
+      $aafreq, $aadistmat, 
+      $aamldistmat,
+      $ntfreqs, $seqfile, $kappa_mat, $alpha_mat,
+      $NSSitesresults,$input_params,$rst,$rst_persite,$rst_trees ) = 
+	  $self->_rearrange([qw
+			     (TREES MLMATRIX 
+			      SEQS NGMATRIX
+			      CODONPOS CODONFREQ
+			      VERSION MODEL PATTERNS
+			      STATS AAFREQ AADISTMAT
+			      AAMLDISTMAT 
+			      NTFREQ SEQFILE
+			      KAPPA_DISTMAT
+			      ALPHA_DISTMAT
+			      NSSITESRESULTS
+			      INPUT_PARAMS
+			      RST RST_PERSITE RST_TREES)], 
+			    @args);
+  $self->reset_seqs;
+  if( $trees ) {
+      if(ref($trees) !~ /ARRAY/i ) { 
+	  $self->warn("Must have provided a valid array reference to initialize trees");
+      } else { 
+	  foreach my $t ( @$trees ) {
+	      $self->add_tree($t);
+	  }
+      }
+  }
+  $self->{'_treeiterator'} = 0;
+
+  if( $mlmat ) {
+      if( ref($mlmat) !~ /ARRAY/i ) {
+	  $self->warn("Must have provided a valid array reference to initialize MLmatrix");
+      } else { 
+	  $self->set_MLmatrix($mlmat);
+      }
+  } 
+  if( $seqs ) { 
+      if( ref($seqs) !~ /ARRAY/i ) {
+	  $self->warn("Must have provided a valid array reference to initialize seqs");
+      } else {
+	  foreach my $s ( @$seqs ) {
+	      $self->add_seq($s);
+	  }
+      }
+  }
+  if( $ngmatrix ) {
+      if( ref($ngmatrix) !~ /ARRAY/i ) {
+	  $self->warn("Must have provided a valid array reference to initialize NGmatrix");
+      } else { 
+	  $self->set_NGmatrix($ngmatrix);
+      }
+  } 
+  if( $codonfreq ) {
+      if( ref($codonfreq) =~ /ARRAY/i ) {
+	  $self->set_CodonFreqs($codonfreq);
+      } else { 
+	  $self->warn("Must have provided a valid array reference to initialize codonfreq");
+      }
+  }
+
+  if( $codonpos ) {
+      if( ref($codonpos) !~ /ARRAY/i ) {
+	  $self->warn("Must have provided a valid array reference to initialize codonpos");
+      } else { 
+	  $self->set_codon_pos_basefreq(@$codonpos);
+      }
+  }
+
+  $self->version($version)   if defined $version;
+  $self->seqfile($seqfile)   if defined $seqfile;
+  $self->model($model)       if defined $model;
+  if( defined $patterns ) {
+      if( ref($patterns) =~ /HASH/i ) {
+	  $self->patterns($patterns);
+      } else {
+	  $self->warn("Must have provided a valid array reference to initialize patterns");
+      }
+  }
+
+  $self->{'_aafreqs'} = {};
+  if( $aafreq ) {
+      if( ref($aafreq) =~ /HASH/i ) {
+	  $self->set_AAFreqs($aafreq);
+      } else { 
+	  $self->warn("Must have provided a valid hash reference to initialize aafreq");
+      }
+  }
+  if( $stats ) {
+      if( ref($stats) =~ /HASH/i ) {
+	  while( my ($stat,$val) = each %$stats) {
+	      $self->add_stat($stat,$val);
+	  }
+      } else { 
+	  $self->warn("Must have provided a valid hash reference initialize stats");
+      }
+  }
+  $self->set_AADistMatrix($aadistmat) if defined $aadistmat;
+  $self->set_AAMLDistMatrix($aamldistmat) if defined $aamldistmat;
+
+  if( defined $NSSitesresults ) {
+      if( ref($NSSitesresults) !~ /ARRAY/i ) {
+	  $self->warn("expected an arrayref for -NSSitesresults");
+      } else { 
+	  foreach my $m ( @$NSSitesresults ) {
+	      $self->add_NSSite_result($m);
+	  }
+      }
+  }
+
+  $self->{'_ntfreqs'} = {};
+  if( $ntfreqs ) {
+      if( ref($ntfreqs) =~ /HASH/i ) {
+	  $self->set_NTFreqs($ntfreqs);
+      } else { 
+	  $self->warn("Must have provided a valid hash reference to initialize ntfreq");
+      }
+  }
+
+  if( $kappa_mat ) {
+      $self->set_KappaMatrix($kappa_mat);
+  } 
+  if( $alpha_mat ) {
+      $self->set_AlphaMatrix($alpha_mat);
+  }
+
+  if( $input_params ) {
+      if(  ref($input_params) !~ /HASH/i ) {
+	  $self->warn("need a valid HASH object for input_params\n");
+      } else {
+	  while( my ($p,$v) = each %$input_params ) {
+	      $self->set_input_parameter($p,$v);
+	  }
+      }
+      
+  }
+  $self->reset_rst_seqs;
+  if( $rst ) {
+      if( ref($rst) =~ /ARRAY/i ) {	  
+	  for ( @$rst ) {
+	      $self->add_rst_seq($_);
+	  }
+      } else {
+	  $self->warn("Need a valid array ref for -rst option\n");
+      }
+  }
+  if( defined $rst_persite ) {
+      $self->set_rst_persite($rst_persite);
+  }
+  $self->reset_rst_trees;
+  if( $rst_trees ) {
+      if( ref($rst_trees) =~ /ARRAY/i ) {	  
+	  for ( @$rst_trees ) {
+	      $self->add_rst_tree($_);
+	  }
+      } else {
+	  $self->warn("Need a valid array ref for -rst_trees option\n");
+      }
+  }
+
+  return $self;
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree;
+ Function: Get the next tree from the factory
+ Returns : L<Bio::Tree::TreeI>
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self, at args) = @_;
+   return $self->{'_trees'}->[$self->{'_treeiterator'}++] || undef;
+}
+
+=head2 get_trees
+
+ Title   : get_trees
+ Usage   : my @trees = $result->get_trees;
+ Function: Get all the parsed trees as an array
+ Returns : Array of trees
+ Args    : none
+
+
+=cut
+
+sub get_trees{
+   my ($self) = @_;
+   return @{$self->{'_trees'} || []};
+}
+
+=head2 rewind_tree
+
+ Title   : rewind_tree_iterator
+ Usage   : $result->rewind_tree()
+ Function: Rewinds the tree iterator so that next_tree can be 
+           called again from the beginning
+ Returns : none
+ Args    : none
+
+=cut
+
+sub rewind_tree_iterator {
+    shift->{'_treeiterator'} = 0;
+}
+
+=head2 add_tree
+
+ Title   : add_tree
+ Usage   : $result->add_tree($tree);
+ Function: Adds a tree 
+ Returns : integer which is the number of trees stored
+ Args    : L<Bio::Tree::TreeI>
+
+=cut
+
+sub add_tree{
+   my ($self,$tree) = @_;
+   if( $tree && ref($tree) && $tree->isa('Bio::Tree::TreeI') ) {
+       push @{$self->{'_trees'}},$tree;
+   }
+   return scalar @{$self->{'_trees'}};
+}
+
+
+=head2 set_MLmatrix
+
+ Title   : set_MLmatrix
+ Usage   : $result->set_MLmatrix($mat)
+ Function: Set the ML Matrix
+ Returns : none
+ Args    : Arrayref to MLmatrix (must be arrayref to 2D matrix whic is 
+	   lower triangle pairwise)
+
+
+=cut
+
+sub set_MLmatrix{
+   my ($self,$mat) = @_;
+   return unless ( defined $mat );
+   if( ref($mat) !~ /ARRAY/i ) {
+       $self->warn("Did not provide a valid 2D Array reference for set_MLmatrix");
+       return;
+   }
+   $self->{'_mlmatrix'} = $mat;
+}
+
+=head2 get_MLmatrix
+
+ Title   : get_MLmatrix
+ Usage   : my $mat = $result->get_MLmatrix()
+ Function: Get the ML matrix
+ Returns : 2D Array reference
+ Args    : none
+
+
+=cut
+
+sub get_MLmatrix{
+   my ($self, at args) = @_;
+   return $self->{'_mlmatrix'};
+}
+
+=head2 set_NGmatrix
+
+ Title   : set_NGmatrix
+ Usage   : $result->set_NGmatrix($mat)
+ Function: Set the Nei & Gojobori Matrix
+ Returns : none
+ Args    : Arrayref to NGmatrix (must be arrayref to 2D matrix whic is 
+	   lower triangle pairwise)
+
+
+=cut
+
+sub set_NGmatrix{
+   my ($self,$mat) = @_;
+   return unless ( defined $mat );
+   if( ref($mat) !~ /ARRAY/i ) {
+       $self->warn("Did not provide a valid 2D Array reference for set_NGmatrix");
+       return;
+   }
+   $self->{'_ngmatrix'} = $mat;
+}
+
+=head2 get_NGmatrix
+
+ Title   : get_NGmatrix
+ Usage   : my $mat = $result->get_NGmatrix()
+ Function: Get the Nei & Gojobori matrix
+ Returns : 2D Array reference
+ Args    : none
+
+
+=cut
+
+sub get_NGmatrix{
+   my ($self, at args) = @_;
+   return $self->{'_ngmatrix'};
+}
+
+
+=head2 add_seq
+
+ Title   : add_seq
+ Usage   : $obj->add_seq($seq)
+ Function: Add a Bio::PrimarySeq to the Result
+ Returns : none
+ Args    : Bio::PrimarySeqI
+See also : L<Bio::PrimarySeqI>
+
+=cut
+
+sub add_seq{
+   my ($self,$seq) = @_;
+   if( $seq ) { 
+       unless( $seq->isa("Bio::PrimarySeqI") ) {
+	   $self->warn("Must provide a valid Bio::PrimarySeqI to add_seq");
+	   return;
+       }
+       push @{$self->{'_seqs'}},$seq;
+   }
+
+}
+
+=head2 reset_seqs
+
+ Title   : reset_seqs
+ Usage   : $result->reset_seqs
+ Function: Reset the OTU seqs stored
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_seqs{
+   my ($self) = @_;
+   $self->{'_seqs'} = [];
+}
+
+=head2 get_seqs
+
+ Title   : get_seqs
+ Usage   : my @otus = $result->get_seqs
+ Function: Get the seqs Bio::PrimarySeq (OTU = Operational Taxonomic Unit)
+ Returns : Array of Bio::PrimarySeq
+ Args    : None
+See also : L<Bio::PrimarySeq>
+
+=cut
+
+sub get_seqs{
+   my ($self) = @_;
+   return @{$self->{'_seqs'}};
+}
+
+=head2 set_codon_pos_basefreq
+
+ Title   : set_codon_pos_basefreq
+ Usage   : $result->set_codon_pos_basefreq(@freqs)
+ Function: Set the codon position base frequencies
+ Returns : none
+ Args    : Array of length 3 where each slot has a hashref 
+           keyed on DNA base
+
+
+=cut
+
+sub set_codon_pos_basefreq {
+    my ($self, at codonpos) = @_;
+    if( scalar @codonpos != 3 ) { 
+	$self->warn("invalid array to set_codon_pos_basefreq, must be an array of length 3");
+	return;
+    }
+    foreach my $pos ( @codonpos ) { 
+	if( ref($pos) !~ /HASH/i ||
+	    ! exists $pos->{'A'} ) { 
+	    $self->warn("invalid array to set_codon_pos_basefreq, must be an array with hashreferences keyed on DNA bases, C,A,G,T");
+	}
+    }
+    $self->{'_codonposbasefreq'} = [@codonpos];
+}
+
+=head2 get_codon_pos_basefreq
+
+ Title   : get_codon_pos_basefreq
+ Usage   : my @basepos = $result->get_codon_pos_basefreq;
+ Function: Get the codon position base frequencies
+ Returns : Array of length 3 (each codon position), each 
+           slot is a hashref keyed on DNA bases, the values are
+           the frequency of the base at that position for all sequences
+ Args    : none
+ Note    : The array starts at 0 so position '1' is in position '0' 
+           of the array
+
+=cut
+
+sub get_codon_pos_basefreq{
+   my ($self) = @_;
+   return @{$self->{'_codonposbasefreq'}};
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $obj->version($newval)
+ Function: Get/Set version
+ Returns : value of version
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub version{
+   my $self = shift;
+   $self->{'_version'} = shift if @_;
+   return $self->{'_version'};
+}
+
+=head2 seqfile
+
+ Title   : seqfile
+ Usage   : $obj->seqfile($newval)
+ Function: Get/Set seqfile
+ Returns : value of seqfile
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub seqfile{
+   my $self = shift;
+   $self->{'_seqfile'} = shift if @_;
+   return $self->{'_seqfile'};
+}
+
+=head2 model
+
+ Title   : model
+ Usage   : $obj->model($newval)
+ Function: Get/Set model
+ Returns : value of model 
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub model{
+    my $self = shift;
+
+    return $self->{'_model'} = shift if @_;
+    return $self->{'_model'};
+}
+
+
+=head2 patterns
+
+ Title   : patterns
+ Usage   : $obj->patterns($newval)
+ Function: Get/Set Patterns hash
+ Returns : Hashref of pattern data
+ Args    : [optional] Hashref of patterns
+         : The hashref is typically
+         : { -patterns => \@arrayref
+         :   -ns       => $ns
+         :   -ls       => $ls
+         : }
+
+=cut
+
+sub patterns{
+    my $self = shift;
+    return $self->{'_patterns'} = shift if @_;
+    return $self->{'_patterns'};
+}
+
+=head2 set_AAFreqs
+
+ Title   : set_AAFreqs
+ Usage   : $result->set_AAFreqs(\%aafreqs);
+ Function: Get/Set AA freqs
+ Returns : none
+ Args    : Hashref, keys are the sequence names, each points to a hashref
+           which in turn has keys which are the amino acids
+
+
+=cut
+
+sub set_AAFreqs{
+   my ($self,$aafreqs) = @_;
+   
+   if( $aafreqs && ref($aafreqs) =~ /HASH/i ) {
+       foreach my $seqname ( keys %{$aafreqs} ) {
+	   $self->{'_aafreqs'}->{$seqname} = $aafreqs->{$seqname};
+       }
+   }
+}
+
+=head2 get_AAFreqs
+
+ Title   : get_AAFreqs
+ Usage   : my %all_aa_freqs = $result->get_AAFreqs() 
+            OR
+           my %seq_aa_freqs = $result->get_AAFreqs($seqname) 
+ Function: Get the AA freqs, either for every sequence or just 
+           for a specific sequence
+           The average aa freqs for the entire set are also available
+           for the sequence named 'Average'
+ Returns : Hashref
+ Args    : (optional) sequence name to retrieve aa freqs for
+
+
+=cut
+
+sub get_AAFreqs{
+   my ($self,$seqname) = @_;
+   if( $seqname ) {
+       return $self->{'_aafreqs'}->{$seqname} || {};
+   } else { 
+       return $self->{'_aafreqs'};
+   }
+}
+
+=head2 set_NTFreqs
+
+ Title   : set_NTFreqs
+ Usage   : $result->set_NTFreqs(\%aafreqs);
+ Function: Get/Set NT freqs
+ Returns : none
+ Args    : Hashref, keys are the sequence names, each points to a hashref
+           which in turn has keys which are the amino acids
+
+
+=cut
+
+sub set_NTFreqs{
+   my ($self,$freqs) = @_;
+   
+   if( $freqs && ref($freqs) =~ /HASH/i ) {
+       foreach my $seqname ( keys %{$freqs} ) {
+	   $self->{'_ntfreqs'}->{$seqname} = $freqs->{$seqname};
+       }
+   }
+}
+
+=head2 get_NTFreqs
+
+ Title   : get_NTFreqs
+ Usage   : my %all_nt_freqs = $result->get_NTFreqs() 
+            OR
+           my %seq_nt_freqs = $result->get_NTFreqs($seqname) 
+ Function: Get the NT freqs, either for every sequence or just 
+           for a specific sequence
+           The average nt freqs for the entire set are also available
+           for the sequence named 'Average'
+ Returns : Hashref
+ Args    : (optional) sequence name to retrieve nt freqs for
+
+
+=cut
+
+sub get_NTFreqs{
+   my ($self,$seqname) = @_;
+   if( $seqname ) {
+       return $self->{'_ntfreqs'}->{$seqname} || {};
+   } else { 
+       return $self->{'_ntfreqs'};
+   }
+}
+
+=head2 add_stat
+
+ Title   : add_stat
+ Usage   : $result->add_stat($stat,$value);
+ Function: Add some misc stat valuess (key/value pairs)
+ Returns : none
+ Args    : $stat  stat name
+           $value stat value
+
+
+=cut
+
+sub add_stat{
+   my ($self,$stat,$value) = @_;
+   return if( ! defined $stat || !defined $value );
+   $self->{'_stats'}->{$stat} = $value;
+   return;
+}
+
+=head2 get_stat
+
+ Title   : get_stat
+ Usage   : my $value = $result->get_stat($name);
+ Function: Get the value for a stat of a given name
+ Returns : scalar value
+ Args    : name of the stat
+
+
+=cut
+
+sub get_stat{
+   my ($self,$statname) = @_;
+   return $self->{'_stats'}->{$statname};
+}
+
+=head2 get_stat_names
+
+ Title   : get_stat_names
+ Usage   : my @names = $result->get_stat_names;
+ Function: Get the stat names stored for the result
+ Returns : array of names
+ Args    : none
+
+
+=cut
+
+sub get_stat_names{
+   my ($self) = @_;
+   return keys %{$self->{'_stats'} || {}};
+}
+
+=head2 get_AADistMatrix
+
+ Title   : get_AADistMatrix
+ Usage   : my $mat = $obj->get_AADistMatrix()
+ Function: Get AADistance Matrix
+ Returns : value of AADistMatrix (Bio::Matrix::PhylipDist)
+ Args    : none
+
+
+=cut
+
+sub get_AADistMatrix{
+    my $self = shift;
+    return $self->{'_AADistMatix'};
+}
+
+=head2 set_AADistMatrix
+
+ Title   : set_AADistMatrix
+ Usage   : $obj->set_AADistMatrix($mat);
+ Function: Set the AADistrance Matrix (Bio::Matrix::PhylipDist)
+ Returns : none
+ Args    : AADistrance Matrix (Bio::Matrix::PhylipDist)
+
+
+=cut
+
+sub set_AADistMatrix{
+   my ($self,$d) = @_;
+   if( ! $d || 
+       ! ref($d) ||
+       ! $d->isa('Bio::Matrix::PhylipDist') ) {
+       $self->warn("Must provide a valid Bio::Matrix::MatrixI for set_AADistMatrix");
+   }
+   $self->{'_AADistMatix'} = $d;
+   return;
+}
+
+=head2 get_AAMLDistMatrix
+
+ Title   : get_AAMLDistMatrix
+ Usage   : my $mat = $obj->get_AAMLDistMatrix()
+ Function: Get AAMLDistance Matrix
+ Returns : value of AAMLDistMatrix (Bio::Matrix::PhylipDist)
+ Args    : none
+
+
+=cut
+
+sub get_AAMLDistMatrix{
+    my $self = shift;
+    return $self->{'_AAMLDistMatix'};
+}
+
+=head2 set_AAMLDistMatrix
+
+ Title   : set_AAMLDistMatrix
+ Usage   : $obj->set_AAMLDistMatrix($mat);
+ Function: Set the AA ML Distrance Matrix (Bio::Matrix::PhylipDist)
+ Returns : none 
+ Args    : AAMLDistrance Matrix (Bio::Matrix::PhylipDist)
+
+
+=cut
+
+sub set_AAMLDistMatrix{
+   my ($self,$d) = @_;
+   if( ! $d || 
+       ! ref($d) ||
+       ! $d->isa('Bio::Matrix::PhylipDist') ) {
+       $self->warn("Must provide a valid Bio::Matrix::MatrixI for set_AAMLDistMatrix");
+   }
+   $self->{'_AAMLDistMatix'} = $d;
+   return;
+}
+
+=head2 add_NSSite_result
+
+ Title   : add_NSSite_result
+ Usage   : $result->add_NSSite_result($model)
+ Function: Add a NSsite result (PAML::ModelResult)
+ Returns : none
+ Args    : Bio::Tools::Phylo::PAML::ModelResult
+
+
+=cut
+
+sub add_NSSite_result{
+   my ($self,$model) = @_;
+   if( defined $model ) {
+       push @{$self->{'_nssiteresult'}}, $model;
+   }
+   return scalar @{$self->{'_nssiteresult'}};
+}
+
+=head2 get_NSSite_results
+
+ Title   : get_NSSite_results
+ Usage   : my @results = @{$self->get_NSSite_results};
+ Function: Get the reference to the array of NSSite_results
+ Returns : Array of PAML::ModelResult results
+ Args    : none
+
+
+=cut
+
+sub get_NSSite_results{
+   my ($self) = @_;
+   return @{$self->{'_nssiteresult'} || []};
+}
+
+=head2 set_CodonFreqs
+
+ Title   : set_CodonFreqs
+ Usage   : $obj->set_CodonFreqs($newval)
+ Function: Get/Set the Codon Frequence table
+ Returns : value of set_CodonFreqs (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub set_CodonFreqs{
+    my $self = shift;
+
+    return $self->{'_codonfreqs'} = shift if @_;
+    return $self->{'_codonfreqs'};
+}
+
+=head2 get_CodonFreqs
+
+ Title   : get_CodonFreqs
+ Usage   : my @codon_freqs = $result->get_CodonFreqs() 
+ Function: Get the Codon freqs
+ Returns : Array
+ Args    : none
+
+
+=cut
+
+sub get_CodonFreqs{
+   my ($self) = @_;
+   return @{$self->{'_codonfreqs'} || []};
+}
+
+
+=head2 BASEML Relavent values
+
+=cut
+
+=head2 get_KappaMatrix
+
+ Title   : get_KappaMatrix
+ Usage   : my $mat = $obj->get_KappaMatrix()
+ Function: Get KappaDistance Matrix
+ Returns : value of KappaMatrix (Bio::Matrix::PhylipDist)
+ Args    : none
+
+
+=cut
+
+sub get_KappaMatrix{
+    my $self = shift;
+    return $self->{'_KappaMatix'};
+}
+
+=head2 set_KappaMatrix
+
+ Title   : set_KappaMatrix
+ Usage   : $obj->set_KappaMatrix($mat);
+ Function: Set the KappaDistrance Matrix (Bio::Matrix::PhylipDist)
+ Returns : none
+ Args    : KappaDistrance Matrix (Bio::Matrix::PhylipDist)
+
+
+=cut
+
+sub set_KappaMatrix{
+   my ($self,$d) = @_;
+   if( ! $d || 
+       ! ref($d) ||
+       ! $d->isa('Bio::Matrix::PhylipDist') ) {
+       $self->warn("Must provide a valid Bio::Matrix::MatrixI for set_NTDistMatrix");
+   }
+   $self->{'_KappaMatix'} = $d;
+   return;
+}
+
+
+=head2 get_AlphaMatrix
+
+ Title   : get_AlphaMatrix
+ Usage   : my $mat = $obj->get_AlphaMatrix()
+ Function: Get AlphaDistance Matrix
+ Returns : value of AlphaMatrix (Bio::Matrix::PhylipDist)
+ Args    : none
+
+
+=cut
+
+sub get_AlphaMatrix{
+    my $self = shift;
+    return $self->{'_AlphaMatix'};
+}
+
+=head2 set_AlphaMatrix
+
+ Title   : set_AlphaMatrix
+ Usage   : $obj->set_AlphaMatrix($mat);
+ Function: Set the AlphaDistrance Matrix (Bio::Matrix::PhylipDist)
+ Returns : none
+ Args    : AlphaDistrance Matrix (Bio::Matrix::PhylipDist)
+
+
+=cut
+
+sub set_AlphaMatrix{
+   my ($self,$d) = @_;
+   if( ! $d || 
+       ! ref($d) ||
+       ! $d->isa('Bio::Matrix::PhylipDist') ) {
+       $self->warn("Must provide a valid Bio::Matrix::MatrixI for set_NTDistMatrix");
+   }
+   $self->{'_AlphaMatix'} = $d;
+   return;
+}
+
+=head2 set_input_parameter
+
+ Title   : set_input_parameter
+ Usage   : $obj->set_input_parameter($p,$vl);
+ Function: Set an Input Parameter 
+ Returns : none
+ Args    : $parameter and $value
+
+
+=cut
+
+sub set_input_parameter{
+   my ($self,$p,$v) = @_;
+   return unless defined $p;
+   $self->{'_input_parameters'}->{$p} = $v;
+}
+
+=head2 get_input_parameters
+
+ Title   : get_input_parameters
+ Usage   : $obj->get_input_parameters;
+ Function: Get Input Parameters 
+ Returns : Hash of key/value pairs
+ Args    : none
+
+
+=cut
+
+sub get_input_parameters{
+   my ($self) = @_;
+   return %{$self->{'_input_parameters'} || {}};
+}
+
+=head2 reset_input_parameters
+
+ Title   : reset_input_parameters
+ Usage   : $obj->reset_input_parameters;
+ Function: Reset the Input Parameters hash 
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_input_parameters{
+   my ($self) = @_;
+   $self->{'_input_parameters'} = {};
+}
+
+=head1 Reconstructed Ancestral State relevant options 
+
+=head2 add_rst_seq
+
+ Title   : add_rst_seq
+ Usage   : $obj->add_rst_seq($seq)
+ Function: Add a Bio::PrimarySeq to the RST Result
+ Returns : none
+ Args    : Bio::PrimarySeqI
+See also : L<Bio::PrimarySeqI>
+
+=cut
+
+sub add_rst_seq{
+   my ($self,$seq) = @_;
+   if( $seq ) { 
+       unless( $seq->isa("Bio::PrimarySeqI") ) {
+	   $self->warn("Must provide a valid Bio::PrimarySeqI to add_rst_seq");
+	   return;
+       }
+       push @{$self->{'_rstseqs'}},$seq;
+   }
+
+}
+
+=head2 reset_rst_seqs
+
+ Title   : reset_rst_seqs
+ Usage   : $result->reset_rst_seqs
+ Function: Reset the RST seqs stored
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_rst_seqs{
+   my ($self) = @_;
+   $self->{'_rstseqs'} = [];
+}
+
+=head2 get_rst_seqs
+
+ Title   : get_rst_seqs
+ Usage   : my @otus = $result->get_rst_seqs
+ Function: Get the seqs Bio::PrimarySeq
+ Returns : Array of Bio::PrimarySeqI objects
+ Args    : None
+See also : L<Bio::PrimarySeq>
+
+=cut
+
+sub get_rst_seqs{
+   my ($self) = @_;
+   return @{$self->{'_rstseqs'} || []};
+}
+
+
+=head2 add_rst_tree
+
+ Title   : add_rst_tree
+ Usage   : $obj->add_rst_tree($tree)
+ Function: Add a Bio::Tree::TreeI to the RST Result
+ Returns : none
+ Args    : Bio::Tree::TreeI
+See also : L<Bio::Tree::TreeI>
+
+=cut
+
+sub add_rst_tree{
+   my ($self,$tree) = @_;
+   if( $tree ) { 
+       unless( $tree->isa("Bio::Tree::TreeI") ) {
+	   $self->warn("Must provide a valid Bio::Tree::TreeI to add_rst_tree not $tree");
+	   return;
+       }
+       push @{$self->{'_rsttrees'}},$tree;
+   }
+}
+
+=head2 reset_rst_trees
+
+ Title   : reset_rst_trees
+ Usage   : $result->reset_rst_trees
+ Function: Reset the RST trees stored
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_rst_trees{
+   my ($self) = @_;
+   $self->{'_rsttrees'} = [];
+}
+
+=head2 get_rst_trees
+
+ Title   : get_rst_trees
+ Usage   : my @otus = $result->get_rst_trees
+ Function: Get the trees Bio::Tree::TreeI
+ Returns : Array of Bio::Tree::TreeI objects
+ Args    : None
+See also : L<Bio::Tree::TreeI>
+
+=cut
+
+sub get_rst_trees{
+   my ($self) = @_;
+   return @{$self->{'_rsttrees'} || []};
+}
+
+=head2 set_rst_persite
+
+ Title   : set_rst_persite
+ Usage   : $obj->set_rst_persite($newval)
+ Function: Get/Set the per-site RST values
+ Returns : value of set_rst_persite (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub set_rst_persite{
+    my $self = shift;
+
+    return $self->{'_rstpersite'} = shift if @_;
+    return $self->{'_rstpersite'};
+}
+
+=head2 get_rst_persite
+
+ Title   : get_rst_persite
+ Usage   : my @rst_persite = @{$result->get_rst_persite()} 
+ Function: Get the per-site RST values
+ Returns : Array
+ Args    : none
+
+
+=cut
+
+sub get_rst_persite{
+   my ($self) = @_;
+   return $self->{'_rstpersite'} || [];
+}
+
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/PAML.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1484 @@
+# $Id: PAML.pm,v 1.49.2.2 2006/11/08 17:25:55 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Phylo::PAML
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich, Aaron J Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::PAML - Parses output from the PAML programs codeml,
+baseml, basemlg, codemlsites and yn00
+
+=head1 SYNOPSIS
+
+  #!/usr/bin/perl -Tw
+  use strict;
+
+  use Bio::Tools::Phylo::PAML;
+
+  # need to specify the output file name (or a fh) (defaults to
+  # -file => "codeml.mlc"); also, optionally, the directory in which
+  # the other result files (rst, 2ML.dS, etc) may be found (defaults
+  # to "./")
+  my $parser = new Bio::Tools::Phylo::PAML
+    (-file => "./results/mlc", -dir => "./results/");
+
+  # get the first/next result; a Bio::Tools::Phylo::PAML::Result object,
+  # which isa Bio::SeqAnalysisResultI object.
+  my $result = $parser->next_result();
+
+  # get the sequences used in the analysis; returns Bio::PrimarySeq
+  # objects (OTU = Operational Taxonomic Unit).
+  my @otus = $result->get_seqs();
+
+  # codon summary: codon usage of each sequence [ arrayref of {
+  # hashref of counts for each codon } for each sequence and the
+  # overall sum ], and positional nucleotide distribution [ arrayref
+  # of { hashref of frequencies for each nucleotide } for each
+  # sequence and overall frequencies ]:
+  my ($codonusage, $ntdist) = $result->get_codon_summary();
+
+  # example manipulations of $codonusage and $ntdist:
+  printf "There were %d %s codons in the first seq (%s)\n",
+    $codonusage->[0]->{AAA}, 'AAA', $otus[0]->id();
+  printf "There were %d %s codons used in all the sequences\n",
+    $codonusage->[$#{$codonusage}]->{AAA}, 'AAA';
+  printf "Nucleotide %c was present %g of the time in seq %s\n",
+    'A', $ntdist->[1]->{A}, $otus[1]->id();
+
+  # get Nei & Gojobori dN/dS matrix:
+  my $NGmatrix = $result->get_NGmatrix();
+
+  # get ML-estimated dN/dS matrix, if calculated; this corresponds to
+  # the runmode = -2, pairwise comparison usage of codeml
+  my $MLmatrix = $result->get_MLmatrix();
+
+  # These matrices are length(@otu) x length(@otu) "strict lower
+  # triangle" 2D-matrices, which means that the diagonal and
+  # everything above it is undefined.  Each of the defined cells is a
+  # hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t",
+  # "S" and "N".  If a ML matrix, "lnL" and "kappa" will also be defined.
+  printf "The omega ratio for sequences %s vs %s was: %g\n",
+    $otus[0]->id, $otus[1]->id, $MLmatrix->[0]->[1]->{omega};
+
+  # with a little work, these matrices could also be passed to
+  # Bio::Tools::Run::Phylip::Neighbor, or other similar tree-building
+  # method that accepts a matrix of "distances" (using the LOWTRI
+  # option):
+  my $distmat = [ map { [ map { $$_{omega} } @$_ ] } @$MLmatrix ];
+
+  # for runmode's other than -2, get tree topology with estimated
+  # branch lengths; returns a Bio::Tree::TreeI-based tree object with
+  # added PAML parameters at each node
+  my ($tree) = $result->get_trees();
+  for my $node ($tree->get_nodes()) {
+     # inspect the tree: the "t" (time) parameter is available via
+     # $node->branch_length(); all other branch-specific parameters
+     # ("omega", "dN", etc.) are available via 
+     # ($omega) = $node->get_tag_values('omega');
+  }
+
+  # if you are using model based Codeml then trees are stored in each 
+  # modelresult object
+  for my $modelresult ( $result->get_NSSite_results ) {
+    # model M0, M1, etc
+    print "model is ", $modelresult->model_num, "\n";
+    my ($tree) = $modelresult->get_trees();
+    for my $node ($tree->get_nodes()) {
+     # inspect the tree: the "t" (time) parameter is available via
+     # $node->branch_length(); all other branch-specific parameters
+     # ("omega", "dN", etc.) are available via 
+     # ($omega) = $node->get_tag_values('omega');
+   }
+  }
+
+  # get any general model parameters: kappa (the
+  # transition/transversion ratio), NSsites model parameters ("p0",
+  # "p1", "w0", "w1", etc.), etc.
+  my $params = $result->get_model_params();
+  printf "M1 params: p0 = %g\tp1 = %g\n", $params->{p0}, $params->{p1};
+
+  # find, say, positively selected sites!
+  if ($params->{w2} > 1) {
+    for (my $i = 0; $i < @probs ; $i++) {
+      if ($probs[$i]->[2] > 0.5) {
+         # assumes model M1: three w's, w0, w1 and w2 (positive selection)
+         printf "position %d: (%g prob, %g omega, %g mean w)\n",
+           $i, $probs[$i]->[2], $params->{w2}, $probs[$i]->[3];
+      }
+    }
+  } else { print "No positive selection found!\n"; }
+
+  # parse AAML result files
+  my $aamat = $result->get_AADistMatrix();
+  my $aaMLmat = $result->get_AAMLDistMatrix();
+
+=head1 DESCRIPTION
+
+This module is used to parse the output from the PAML programs codeml,
+baseml, basemlg, codemlsites and yn00.  You can use the
+Bio::Tools::Run::Phylo::PAML::* modules to actually run some of the
+PAML programs, but this module is only useful to parse the output.
+
+=head1 TO DO
+
+Implement get_posteriors(). For NSsites models, obtain arrayrefs of 
+posterior probabilities for membership in each class for every 
+position; probabilities correspond to classes w0, w1, ... etc.
+
+  my @probs = $result->get_posteriors();
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Aaron Mackey
+
+Email jason-at-bioperl.org
+Email amackey-at-virginia.edu
+
+=head1 CONTRIBUTORS
+
+Albert Vilella avilella-AT-gmail-DOT-com
+Sendu Bala     bix at sendu.me.uk
+
+=head1 TODO
+
+RST parsing -- done, Avilella contributions bugzilla#1506, added by jason 1.29
+            -- still need to parse in joint probability and non-syn changes 
+               at site table
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::PAML;
+use vars qw($RSTFILENAME);
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::AnalysisParserI);
+
+BEGIN {
+  $RSTFILENAME = 'rst'; # where to get the RST data from
+}
+
+# other objects used:
+use IO::String;
+use File::Spec;
+use Bio::TreeIO;
+use Bio::Tools::Phylo::PAML::Result;
+use Bio::PrimarySeq;
+use Bio::Matrix::PhylipDist;
+use Bio::Tools::Phylo::PAML::ModelResult;
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Phylo::PAML(%args);
+ Function: Builds a new Bio::Tools::Phylo::PAML object
+ Returns : Bio::Tools::Phylo::PAML
+ Args    : Hash of options: -file, -fh, -dir
+           -file (or -fh) should contain the contents of the PAML
+                 outfile; 
+           -dir is the (optional) name of the directory in
+                which the PAML program was run (and includes other
+                PAML-generated files from which we can try to gather data)
+
+=cut
+
+sub new {
+
+  my ($class, @args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+  my ($dir) = $self->_rearrange([qw(DIR)], @args);
+  $self->{_dir} = $dir if defined $dir;
+
+  return $self;
+}
+
+=head2 Implement Bio::AnalysisParserI interface
+
+=cut
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : $result = $obj->next_result();
+ Function: Returns the next result available from the input, or
+           undef if there are no more results.
+ Example :
+ Returns : a Bio::Tools::Phylo::PAML::Result object
+ Args    : none
+
+=cut
+
+sub next_result {
+
+    my ($self) = @_;
+    my %data;
+    # parse the RST file, if it doesn't exist or if dir is not set
+    # this will just skip the parsing
+    $self->_parse_rst();
+    my $idlookup; # a hashreference to SEQID (number) ==> 'SEQUENCENAME'
+    # get the various codon and other sequence summary data, if necessary:
+    $self->_parse_summary
+	unless ($self->{'_summary'} && !$self->{'_summary'}->{'multidata'});
+    
+    # OK, depending on seqtype and runmode now, one of a few things can happen:
+    my $seqtype = $self->{'_summary'}->{'seqtype'};
+    if ($seqtype eq 'CODONML' || $seqtype eq 'AAML') {
+        my $has_model_line = 0;
+	while (defined ($_ = $self->_readline)) {
+	    if ($seqtype eq 'CODONML' && 
+		m/^pairwise comparison, codon frequencies:/) {
+		# runmode = -2, CODONML
+		$self->_pushback($_);
+		%data = $self->_parse_PairwiseCodon;
+		last;
+	    } elsif ($seqtype eq 'AAML' && m/^ML distances of aa seqs\.$/) {
+		$self->_pushback($_);
+		# get AA distances
+		%data = ( '-AAMLdistmat' => $self->_parse_aa_dists());
+		# $self->_pushback($_);
+		# %data = $self->_parse_PairwiseAA;
+		# last;	    
+	    } elsif (m/^Model\s+(\d+)/ ||
+                     ((! $has_model_line && m/^TREE/) &&
+		      $seqtype eq 'CODONML')) {
+		$self->_pushback($_);
+		my $model = $self->_parse_NSsitesBatch;
+		push @{$data{'-NSsitesresults'}}, $model;
+                $has_model_line = 1;
+	    } elsif ( m/for each branch/ ) {
+		my %branch_dnds = $self->_parse_branch_dnds;
+		if( ! defined $data{'-trees'} ) {
+		    warn("No trees have been loaded, can't do anything\n");
+		    next;
+		}
+		my ($tree) = @{$data{'-trees'}};
+		if( ! $tree || ! ref($tree) || 
+		    ! $tree->isa('Bio::Tree::Tree') ) {
+		    warn("no tree object already stored!\n");
+		    next;
+		}
+		# These need to be added to the Node/branches
+		while( my ($k,$v) = each %branch_dnds) {
+		    # we can probably do better by caching at some point
+		    my @nodes;
+		    for my $id ( split(/\.\./,$k ) ) {
+			my @nodes_L = map { $tree->find_node(-id => $_) } @{$idlookup->{$id}};
+			my $n = @nodes_L < 2 ? shift(@nodes_L) : $tree->get_lca(@nodes_L);
+			if( ! $n ) {
+			    warn("no node for $n\n");
+			}
+			unless( $n->is_Leaf && $n->id) { 
+			    $n->id($id);
+			}
+			push @nodes, $n;
+		    }
+		    my ($parent,$child) = @nodes;
+		    while ( my ($kk,$vv) = each %$v ) {
+			$child->add_tag_value($kk,$vv);
+		    }
+		}		
+	    } elsif (m/^TREE/) {
+		# runmode = 0
+		$self->_pushback($_);
+		($data{'-trees'},$idlookup) = $self->_parse_Forestry;
+		#last;
+	    } elsif (m/Heuristic tree search by stepwise addition$/ ) {
+		
+		# runmode = 3
+		$self->throw( -class => 'Bio::Root::NotImplemented',
+			      -text  => "StepwiseAddition not yet implemented!"
+			      );
+
+		# $self->_pushback($_);
+		# %data = $self->_parse_StepwiseAddition;
+		# last;
+
+	    } elsif (m/Heuristic tree search by NNI perturbation$/) {
+
+		# runmode = 4
+		$self->throw( -class => 'Bio::Root::NotImplemented',
+			      -text  => "NNI Perturbation not yet implemented!"
+			      );
+
+		# $self->_pushback($_);
+		# %data = $self->_parse_Perturbation;
+		# last;
+
+	    } elsif (m/^stage 0:/) {
+
+		# runmode = (1 or 2)
+		$self->throw( -class => 'Bio::Root::NotImplemented',
+			      -text  => "StarDecomposition not yet implemented!"
+			      );
+
+		$self->_pushback($_);
+		%data = $self->_parse_StarDecomposition;
+		last;
+
+	    }
+	}
+    } elsif ($seqtype eq 'BASEML') {	
+	while( defined($_ = $self->_readline) ) {
+	    if( /^Distances:/ ) {
+		$self->_pushback($_);
+		my ($kappa,$alpha) = $self->_parse_nt_dists();
+		%data = ( '-kappa_distmat' => $kappa,
+			  '-alpha_distmat' => $alpha
+			  );
+	    } elsif( /^TREE/ ) {
+		$self->_pushback($_);
+		($data{'-trees'},$idlookup) = $self->_parse_Forestry;
+	    }
+	}
+    } elsif ($seqtype eq 'YN00') {
+	while ($_ = $self->_readline) {
+	    if( m/^Estimation by the method|\(B\) Yang & Nielsen \(2000\) method/ ) {
+		$self->_pushback($_);
+		%data = $self->_parse_YN_Pairwise;
+		last;
+	    }
+	}
+    }
+    if (%data) {
+	$data{'-version'}   = $self->{'_summary'}->{'version'};
+	$data{'-seqs'}      = $self->{'_summary'}->{'seqs'};
+	$data{'-patterns'}  = $self->{'_summary'}->{'patterns'};
+	$data{'-ngmatrix'}  = $self->{'_summary'}->{'ngmatrix'};
+	$data{'-codonpos'}  = $self->{'_summary'}->{'codonposition'};
+	$data{'-codonfreq'} = $self->{'_summary'}->{'codonfreqs'};
+	$data{'-model'}     = $self->{'_summary'}->{'model'};
+	$data{'-seqfile'}     = $self->{'_summary'}->{'seqfile'};
+	$data{'-aadistmat'} = $self->{'_summary'}->{'aadistmat'};
+	$data{'-stats'}     = $self->{'_summary'}->{'stats'};
+	$data{'-aafreq'}    = $self->{'_summary'}->{'aafreqs'};
+	$data{'-ntfreq'}    = $self->{'_summary'}->{'ntfreqs'};
+	$data{'-input_params'} = $self->{'_summary'}->{'inputparams'};
+        $data{'-rst'}          = $self->{'_rst'}->{'rctrted_seqs'};
+        $data{'-rst_persite'}  = $self->{'_rst'}->{'persite'};
+        $data{'-rst_trees'}    = $self->{'_rst'}->{'trees'};
+	return Bio::Tools::Phylo::PAML::Result->new(%data);
+    } else {
+	return;
+    }
+}
+
+
+sub _parse_summary {
+    my ($self) = @_;
+
+    # Depending on whether verbose > 0 or not, and whether the result
+    # set comes from a multi-data run, the first few lines could be
+    # various things; we're going to throw away any sequence data
+    # here, since we'll get it later anyways
+
+    # multidata ? : \n\nData set 1\n
+    # verbose ? : cleandata ? : \nBefore deleting alignment gaps. \d sites\n
+    #                           [ sequence printout ]
+    #                           \nAfter deleting gaps. \d sites\n"
+    #           : [ sequence printout ]
+    # CODONML (in paml 3.12 February 2002)  <<-- what we want to see!
+
+    my $SEQTYPES = qr( (?: (?: CODON | AA | BASE | CODON2AA ) ML ) | YN00 )x;
+    while ($_ = $self->_readline) {
+	if ( m/^($SEQTYPES) \s+                      # seqtype: CODONML, AAML, BASEML, CODON2AAML, YN00, etc
+	       (?: \(in \s+ ([^\)]+?) \s* \) \s* )?  # version: "paml 3.12 February 2002"; not present < 3.1 or YN00
+	       (\S+) \s*                             # tree filename
+	       (?: (.+?) )?                          # model description (not there in YN00)
+	       \s* $                                 # trim any trailing space
+	       /ox
+	   ) {
+	    @{$self->{'_summary'}}{qw(seqtype version seqfile model)} = ($1, 
+									$2,
+									$3,
+									$4);
+	    defined $self->{'_summary'}->{'model'} &&
+		$self->{'_summary'}->{'model'} =~ s/Model:\s+//;
+	    last;
+	    
+	} elsif (m/^Data set \d$/) {
+	    $self->{'_summary'} = {};
+	    $self->{'_summary'}->{'multidata'}++;
+	}
+    }
+
+    unless (defined $self->{'_summary'}->{'seqtype'}) {	
+	$self->throw( -class => 'Bio::Root::NotImplemented',
+		      -text => 'Unknown format of PAML output did not see seqtype');
+    }
+    my $seqtype = $self->{'_summary'}->{'seqtype'};
+    $self->debug( "seqtype is $seqtype\n");
+    if ($seqtype eq "CODONML") {
+        $self->_parse_inputparams(); # settings from the .ctl file 
+	                             # that get printed
+        $self->_parse_patterns();    # codon patterns - not very interesting
+        $self->_parse_seqs();        # the sequences data used for analysis
+        $self->_parse_codoncts();    # counts and distributions of codon/nt
+	                             # usage
+        $self->_parse_codon_freqs(); # codon frequencies
+        $self->_parse_distmat();     # NG distance matrices
+    } elsif ($seqtype eq "AAML") {
+        $self->_parse_inputparams;
+
+        $self->_parse_patterns();
+        $self->_parse_seqs();     # the sequences data used for analysis
+        $self->_parse_aa_freqs(); # codon frequencies
+
+	# get AA distances
+        $self->{'_summary'}->{'aadistmat'} = $self->_parse_aa_dists();
+
+    } elsif ($seqtype eq "CODON2AAML") {
+	$self->throw( -class => 'Bio::Root::NotImplemented',
+		      -text => 'CODON2AAML parsing not yet implemented!');
+    } elsif ($seqtype eq "BASEML") {
+	$self->_parse_patterns();
+	$self->_parse_seqs();
+	$self->_parse_nt_freqs();
+
+    } elsif ($seqtype eq "YN00") {
+	$self->_parse_codon_freqs();
+	$self->_parse_codoncts();	
+	$self->_parse_distmat(); # NG distance matrices
+	
+    } else {
+	$self->throw( -class => 'Bio::Root::NotImplemented',
+		      -text => 'Unknown seqtype, not yet implemented!',
+		      -value => $seqtype
+		    );
+    }
+
+}
+
+
+sub _parse_inputparams { 
+    my ($self) = @_;
+    
+    while( defined($_ = $self->_readline ) ) {
+	if(/^((?:Codon frequencies)|(?:Site-class models))\s*:\s+(.+)/ ) {
+	    my ($param,$val) = ($1,$2);	    
+	    $self->{'_summary'}->{'inputparams'}->{$param} = $val;
+	} elsif( /^\s+$/ ) {
+	    next;
+	} elsif( /^ns\s+=\s+/ ) {
+	    $self->_pushback($_);
+	    last;
+        }
+    }
+}
+
+sub _parse_codon_freqs {
+    my ($self) = @_;
+    my ($okay,$done) = (0,0);
+    
+    while( defined($_ = $self->_readline ) ) {
+	if( /^Nei|\(A\) Nei/ ) { $self->_pushback($_); last }
+	last if( $done);
+	next if ( /^\s+/);
+	next unless($okay || /^Codon position x base \(3x4\) table\, overall/ );
+	$okay = 1;
+	if( s/^position\s+(\d+):\s+// ) {
+	    my $pos = $1;
+	    s/\s+$//;
+	    my @bases = split;
+	    foreach my $str ( @bases ) {
+		my ( $base,$freq) = split(/:/,$str,2);
+		$self->{'_summary'}->{'codonposition'}->[$pos-1]->{$base} = $freq;
+	    }
+	    $done = 1 if $pos == 3;
+        } 
+    }
+    $done = 0;
+    while( defined( $_ = $self->_readline) ) {
+        if( /^Nei\s\&\sGojobori|\(A\)\sNei-Gojobori/ ) { $self->_pushback($_); last }
+        last if ( $done );
+        if( /^Codon frequencies under model, for use in evolver:/ ){
+            while( defined( $_ = $self->_readline) ) {
+                last if( /^\s+$/ );
+		s/^\s+//;
+		s/\s+$//;
+		push @{$self->{'_summary'}->{'codonfreqs'}},[split];
+	    }
+	    $done = 1;
+        }
+    }
+}
+
+sub _parse_aa_freqs {
+    my ($self) = @_;
+    my ($okay,$done,$header) = (0,0,0);
+    my (@bases);
+    my $numseqs = scalar @{$self->{'_summary'}->{'seqs'} || []};
+    while( defined($_ = $self->_readline ) ) {
+	if( /^TREE/ || /^AA distances/ ) { $self->_pushback($_); last }
+	last if( $done);
+	next if ( /^\s+$/ || /^\(Ambiguity/ );
+	if( /^Frequencies\./ ) { 
+	    $okay = 1;
+	} elsif( ! $okay ) { # skip till we see 'Frequencies.
+	    next;
+	} elsif ( ! $header ) {
+	    s/^\s+//;        # remove leading whitespace
+	    @bases = split;  # get an array of the all the aa names
+	    $header = 1;
+	    $self->{'_summary'}->{'aafreqs'} = {}; # reset/clear values
+	    next;
+	} elsif( /^\#\s+constant\s+sites\:\s+
+		 (\d+)\s+ # constant sites
+		 \(\s*([\d\.]+)\s*\%\s*\)/x){
+	    $self->{'_summary'}->{'stats'}->{'constant_sites'} = $1;
+	    $self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
+	} elsif( /^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/x ) {
+	    $self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
+	    $done = 1; # done for sure
+	} else { 
+	    my ($seqname, at freqs) = split;
+	    my $basect = 0;
+	    foreach my $f ( @freqs ) { 
+		# this will also store 'Average'
+		$self->{'_summary'}->{'aafreqs'}->{$seqname}->{$bases[$basect++]} = $f;
+	    }	    
+	}
+    }
+}
+
+
+# This is for parsing the automatic tree output
+
+sub _parse_StarDecomposition {
+    my ($self) = @_;
+    my %data;
+
+    return %data;
+}
+
+sub _parse_aa_dists {
+    my ($self) = @_;
+    my ($okay,$seen,$done) = (0,0,0);
+    my (%matrix, at names, at values);
+    my $numseqs = scalar @{$self->{'_summary'}->{'seqs'} || []};
+    my $type = '';
+    while( defined ($_ = $self->_readline ) ) {
+	last if $done;
+	if( /^TREE/ ) { $self->_pushback($_); last; }
+	if( /^\s+$/ ) {
+	    last if( $seen );
+	    next;
+	}
+	if( /^(AA|ML) distances/ ) {
+	    $okay = 1;
+	    $type = $1;
+	    next;
+	} 
+	
+	
+	s/\s+$//g; # remove trailing space
+	if( $okay ) {
+	    my ($seqname, at vl) = split;
+	    $seen = 1;
+	    my $i = 0;
+	    # hacky workaround to problem with 3.14 aaml
+	    if( $type eq 'ML' && 
+		! @names && # first entry
+		@vl) { # not empty
+		push @names, $self->{'_summary'}->{'seqs'}->[0]->display_id;
+	    }
+	    for my $s ( @names ) {
+		last unless @vl;
+		$matrix{$seqname}->{$s} = 
+		    $matrix{$s}->{$seqname} = shift @vl;
+	    }
+	    push @names, $seqname;
+
+	    $matrix{$seqname}->{$seqname} = 0;
+	}
+	$done = 1 if( scalar @names == $numseqs);
+    }
+    my %dist;
+    my $i = 0;
+    @values = ();
+    foreach my $lname ( @names ) {
+	my @row;
+	my $j = 0;
+	foreach my $rname ( @names ) {
+	    my $v = $matrix{$lname}->{$rname};
+	    $v = $matrix{$rname}->{$lname} unless defined $v;
+	    push @row, $v;
+	    $dist{$lname}{$rname} = [$i,$j++];
+	}
+	$i++;
+	push @values, \@row;
+    }
+    return new Bio::Matrix::PhylipDist
+	(-program=> $self->{'_summary'}->{'seqtype'},
+	 -matrix => \%dist,
+	 -names  => \@names,
+	 -values => \@values );
+}
+
+sub _parse_patterns { 
+    my ($self) = @_;
+    my ($patternct, at patterns,$ns,$ls);    
+    while( defined($_ = $self->_readline) ) {
+	if( /^Codon position/ ) {
+	    $self->_pushback($_);
+	    last;
+	} elsif( /^Codon usage/ ) {
+	    $self->_pushback($_);
+	    last;
+	} elsif( $patternct ) { 
+#	    last unless ( @patterns == $patternct );
+	    last if( /^\s+$/ );
+	    s/^\s+//;
+	    push @patterns, split;
+	} elsif( /^ns\s+\=\s*(\d+)\s+ls\s+\=\s*(\d+)/ ) {
+	    ($ns,$ls) = ($1,$2);
+	} elsif( /^\# site patterns \=\s*(\d+)/ ) {
+	    $patternct = $1;
+	} else { 
+#	    $self->debug("Unknown line: $_");
+	}
+    }
+    $self->{'_summary'}->{'patterns'} = { -patterns => \@patterns,
+					  -ns       => $ns,
+					  -ls       => $ls};
+}
+
+sub _parse_seqs { 
+
+    # this should in fact be packed into a Bio::SimpleAlign object instead of
+    # an array but we'll stay with this for now 
+    my ($self) = @_;
+    my (@firstseq, at seqs);
+    while( defined ($_ = $self->_readline) ) {
+	if( /^(TREE|Codon)/ ) { $self->_pushback($_);  last }
+	last if( /^\s+$/ && @seqs > 0 );
+	next if ( /^\s+$/ );
+	next if( /^\d+\s+$/ );
+
+	my ($name,$seqstr) = split(/\s+/,$_,2);
+	$seqstr =~ s/\s+//g; # remove whitespace 
+	unless( @firstseq) {
+	    @firstseq = split(//,$seqstr);
+	    push @seqs, new Bio::PrimarySeq(-display_id  => $name,
+					    -seq         => $seqstr);
+	} else { 
+
+	    my $i = 0;
+	    my $v;
+	    while(($v = index($seqstr,'.',$i)) >= $i ) {
+		# replace the '.' with the correct seq from the
+		substr($seqstr,$v,1,$firstseq[$v]);
+		$i = $v;
+	    }
+	    $self->debug( "adding seq $seqstr\n");
+	    push @seqs, new Bio::PrimarySeq(-display_id  => $name,
+					    -seq         => $seqstr);
+	}
+    }
+    $self->{'_summary'}->{'seqs'} = \@seqs;
+    1;
+}
+
+sub _parse_codoncts { }
+
+sub _parse_distmat { 
+    my ($self) = @_;
+    my @results;
+    my $ver = 3.14;
+    
+    while( defined ($_ = $self->_readline) ) {
+        next if/^\s+$/;
+        # Bypass the reference information (4 lines)
+        if (/^\(A\)\sNei-Gojobori\s\(1986\)\smethod/) {
+            $ver = 3.15;
+            $_ = $self->_readline;
+            $_ = $self->_readline;
+            $_ = $self->_readline;
+            $_ = $self->_readline;
+        }
+        last;
+    }
+    
+    return unless (/^Nei\s*\&\s*Gojobori/);
+    # skip the next line is ver > 3.15
+    $self->_readline if ($ver > 3.14);
+
+    # skip the next 3 lines
+    if( $self->{'_summary'}->{'seqtype'} eq 'CODONML' ) {
+        $self->_readline;
+        $self->_readline;
+        $self->_readline;
+    }
+    my $seqct = 0;
+    my @seqs;
+    while( defined ($_ = $self->_readline ) ) {	
+        last if( /^\s+$/ && exists $self->{'_summary'}->{'ngmatrix'} );
+        next if( /^\s+$/ || /^NOTE:/i );
+        chomp;
+        my ($seq,$rest) = split(/\s+/,$_,2);
+        $rest = '' unless defined $rest; # get rid of empty messages
+        my $j = 0;
+        if( $self->{'_summary'}->{'seqtype'} eq 'YN00') {
+            push @seqs, Bio::PrimarySeq->new(-display_id => $seq);
+        }
+        while ( $rest && $rest =~ 
+                /(\-?\d+(\.\d+)?)\s*\(\-?(\d+(\.\d+)?)\s+(\-?\d+(\.\d+)?)\)/g ) {
+            $self->{'_summary'}->{'ngmatrix'}->[$j++]->[$seqct] = 
+                { 'omega' => $1,
+                  'dN'    => $3,
+                  'dS'    => $5 };
+        }
+        $seqct++;
+    }
+    if($self->{'_summary'}->{'seqtype'} eq 'YN00' && @seqs ){ 
+        $self->{'_summary'}->{'seqs'} = \@seqs;
+    }
+
+    1;
+}
+
+
+sub _parse_PairwiseCodon {
+    my ($self) = @_;
+    my @result;
+    my ($a,$b,$log,$model,$t,$kappa,$omega);
+    while( defined( $_ = $self->_readline) ) {
+	if( /^pairwise comparison, codon frequencies\:\s*(\S+)\./) {
+	    $model = $1;
+	} elsif( /^(\d+)\s+\((\S+)\)\s+\.\.\.\s+(\d+)\s+\((\S+)\)/ ) {
+	    ($a,$b) = ($1,$3);
+	} elsif( /^lnL\s+\=\s*(\-?\d+(\.\d+)?)/ ) {
+	    $log = $1;
+	    if( defined( $_ = $self->_readline) ) {
+		s/^\s+//;
+		($t,$kappa,$omega) = split;
+	    }
+	} elsif( m/^t\=\s*(\d+(\.\d+)?)\s+
+		 S\=\s*(\d+(\.\d+)?)\s+
+		 N\=\s*(\d+(\.\d+)?)\s+
+		 dN\/dS\=\s*(\d+(\.\d+)?)\s+
+		 dN\=\s*(\d+(\.\d+)?)\s+
+		 dS\=\s*(\d+(\.\d+)?)/ox ) {
+	    $result[$b-1]->[$a-1] = { 
+		'lnL' => $log,
+		't' => defined $t && length($t) ? $t : $1,
+		'S' => $3,
+		'N' => $5,
+		'kappa' => $kappa,
+		'omega' => defined $omega && length($omega) ? $omega : $7,
+		'dN' => $9,
+		'dS' => $11 };
+	} elsif( /^\s+$/ ) { 
+	    next; 
+	} elsif( /^\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/ ) {
+	} else { 
+	    $self->debug( "unknown line: $_");
+	}
+    }
+    return ( -mlmatrix => \@result);
+}
+
+sub _parse_YN_Pairwise {
+    my ($self) = @_;
+    my @result;
+    while( defined( $_ = $self->_readline) ) {
+	last if( /^seq\.\s+seq\./);
+    }
+    while( defined( $_ = $self->_readline) ) {
+	if( m/^\s+(\d+)\s+  # seq #
+	    (\d+)\s+        # seq #
+	    (\d+(\.\d+))\s+ # S
+	    (\d+(\.\d+))\s+ # N
+	    (\d+(\.\d+))\s+ # t
+	    (\d+(\.\d+))\s+ # kappa
+	    (\d+(\.\d+))\s+ # omega
+	    \-??(\d+(\.\d+))\s+ # dN
+	    \+\-\s+
+	    \-??(\d+(\.\d+))\s+ # dN SE
+	    \-??(\d+(\.\d+))\s+ # dS
+	    \+\-\s+
+	    \-??(\d+(\.\d+))\s+ # dS SE
+	    /ox 
+	    ) {
+	    
+	    $result[$2-1]->[$1-1] = { 
+		'S' => $3,
+		'N' => $5,
+		't' => $7,
+		'kappa' => $9,
+		'omega' => $11,
+		'dN' => $13,
+		'dN_SE' => $15,
+		'dS' => $17,
+		'dS_SE' => $19,
+	    };
+	} elsif( /^\s+$/ ) { 
+	    next; 
+	} elsif( /^\(C\) LWL85, LPB93 & LWLm methods/) {
+	    $self->_pushback($_);
+	    last;
+	}
+	
+    }
+    return ( -mlmatrix => \@result);
+}
+
+sub _parse_Forestry {
+    my ($self) = @_;
+    my ($instancecount,$num_param,
+	$loglikelihood,$score,$done,$treelength) = (0,0,0,0,0,0);
+    my $okay = 0;
+    my (@ids,%match, at branches, at trees);
+    while( defined ($_ = $self->_readline) ) {
+	last if $done;	
+	if( s/^TREE\s+\#\s*\d+:\s+// ) {
+	    ($score) = (s/MP\s+score\:\s+(\S+)\s+$// );
+	    @ids = /(\d+)[\,\)]/g;
+	} elsif( /^Node\s+\&/ || /^\s+N37/ || /^(CODONML|AAML|YN00|BASEML)/ ||
+		 /^\*\*/ || /^Detailed output identifying parameters/) {
+	    $self->_pushback($_);
+	    $done = 1;
+	    last;
+	} elsif( /^tree\s+length\s+\=\s+(\S+)/ ) {
+	    $treelength = $1;	# not going to store this for now
+            # as it is directly calculated from
+	    # $tree->total_branch_length;
+	}   elsif( /^\s*lnL\(.+np\:\s*(\d+)\)\:\s+(\S+)/ ) {
+	# elsif( /^\s*lnL\(.+\)\:\s+(\S+)/ ) {
+	    ($num_param,$loglikelihood) = ($1,$2);
+	} elsif( /^\(/) {
+	    s/([\,:])\s+/$1/g;
+	    my $treestr = new IO::String($_);
+	    my $treeio = new Bio::TreeIO(-fh => $treestr,
+					 -format => 'newick');
+	    my $tree = $treeio->next_tree;
+	    if( $tree ) {
+		$tree->score($loglikelihood);
+		$tree->id("num_param:$num_param");
+		if( $okay > 0 ) {
+                  # we don't save the trees with the number labels
+		    if( ! %match && @ids) {
+			my $i = 0;
+			for my $m ( /([^():,]+):/g ) {
+			    $match{shift @ids} = [$m];			    
+			}
+			my %grp;
+			while ( my $br = shift @branches ) {
+			    my ($parent,$child) = @$br;
+			    if( $match{$child} ) {
+				push @{$match{$parent}}, @{$match{$child}};
+			    } else {
+				push @branches, $br;
+			    }
+			}
+			if( $self->verbose > 1 ) {
+			    for my $k ( sort { $a <=> $b } keys %match ) {
+				$self->debug( "$k -> ",
+					      join(",",@{$match{$k}}), "\n");
+			    }
+			}
+		    }
+		    push @trees, $tree;
+		}
+	    }
+	    $okay++;
+	} elsif( /^\s*\d+\.\.\d+/ ) {
+	    push @branches, map { [split(/\.\./,$_)] } split;
+	}
+    }
+    return \@trees,\%match;
+}
+
+sub _parse_NSsitesBatch {
+    my $self = shift;
+    my (%data,$idlookup); 
+    my ($okay,$done) =(0,0);
+    while( defined($_ = $self->_readline) ) {
+	last if $done;
+	next if /^\s+$/;
+	next unless( $okay || /^Model\s+\d+/ || /^TREE/);
+
+	if( /^Model\s+(\d+)/ ) {
+	    if( $okay ) {
+		# this only happens if $okay was already 1 and 
+		# we hit a Model line
+		$self->_pushback($_);
+		$done = 1;
+	    } else {
+		chomp;
+		$data{'-model_num'}        = $1;
+		($data{'-model_description'}) = ( /\:\s+(.+)/ );
+		$okay = 1;
+	    }
+	} elsif( /^Time used\:\s+(\S+)/ ) {
+	    $data{'-time_used'} = $1;
+	    $done = 1;
+	} elsif( /^kappa\s+\(ts\/tv\)\s+\=\s+(\S+)/ ) { 	    
+	    $data{'-kappa'} = $1;
+	} elsif( /^TREE/ ) {
+	    $self->_pushback($_);
+	    ($data{'-trees'},$idlookup) = $self->_parse_Forestry;
+	    if( defined $data{'-trees'} && 
+		scalar @{$data{'-trees'}} ) {
+		$data{'-likelihood'}= $data{'-trees'}->[0]->score;
+	    }
+            $okay = 1;
+	} elsif ( /^omega\s+\(dn\/ds\)\s+\=\s+(\S+)/i ) {
+            # for M0 (single ratio for the entire tree)
+            # explicitly put '1.00000' rather than '1', because \d+\.\d{5}
+            # is reported in all other cases.
+            my @p = (q/1.00000/); # since there is only one class,
+            my @w = $1;
+            $data{'-dnds_site_classes'} = { 'p' => \@p,
+                                            'w' => \@w};
+            # since no K=X is provided, put 1 here
+            $data{q/-num_site_classes/} = 1;
+	} elsif( /^(Naive Empirical Bayes)|(Bayes Empirical Bayes)|(Positively\sselected\ssites)/i ) {
+	    $self->_pushback($_);
+	    my ($sites,$neb,$beb) = $self->_parse_Pos_selected_sites;
+	    $data{'-pos_sites'} = $sites;
+	    $data{'-neb_sites'} = $neb;
+	    $data{'-beb_sites'} = $beb;
+	} elsif( /^dN/i ) {
+	    if( /K\=(\d+)/ ) {
+		$data{'-num_site_classes'} = $1;   
+                while ($_ = $self->_readline) {
+                    unless ($_ =~ /^\s+$/) {
+                        $self->_pushback($_);
+                        last;
+                    }
+                }
+                if ( /^site class/ ) {
+                    $self->_readline;
+                    my @p = $self->_readline =~ /(\d+\.\d{5})/g;
+                    my @b_w = $self->_readline =~ /(\d+\.\d{5})/g;
+                    my @f_w = $self->_readline =~ /(\d+\.\d{5})/g;
+                    my @w;
+                    foreach my $i (0..$#b_w) {
+                        push @w, { q/background/ => $b_w[$i],
+                                   q/foreground/ => $f_w[$i] };
+                    }
+                    $data{'-dnds_site_classes'} = { q/p/ => \@p,
+                                                    q/w/ => \@w };
+                } else {
+		my @p = $self->_readline =~ /(\d+\.\d{5})/g;
+		my @w = $self->_readline =~ /(\d+\.\d{5})/g;
+		$data{'-dnds_site_classes'} = { 'p' => \@p,
+						'w' => \@w};
+                }
+	    } elsif( /for each branch/ ) {
+		my %branch_dnds = $self->_parse_branch_dnds;
+		if( ! defined $data{'-trees'} ) {
+		    warn("No trees have been loaded, can't do anything\n");
+		    next;
+		}
+		my ($tree) = @{$data{'-trees'}};
+		if( ! $tree || ! ref($tree) || 
+		    ! $tree->isa('Bio::Tree::Tree') ) {
+		    warn("no tree object already stored!\n");
+		    next;
+		}
+		# These need to be added to the Node/branches
+		while( my ($k,$v) = each %branch_dnds) {
+		    # we can probably do better by caching at some point
+		    my @nodes;
+		    for my $id ( split(/\.\./,$k ) ) {
+            my @nodes_L = map { $tree->find_node(-id => $_) } @{$idlookup->{$id}};
+            my $n = @nodes_L < 2 ? shift(@nodes_L) : $tree->get_lca(@nodes_L);
+			if( ! $n ) {
+			    $self->warn("no node could be found for $id (no lca?)");
+			}
+			unless( $n->is_Leaf && $n->id) { 
+			    $n->id($id);
+			}
+			push @nodes, $n;
+		    }
+		    my ($parent,$child) = @nodes;
+		    while ( my ($kk,$vv) = each %$v ) {
+			$child->add_tag_value($kk,$vv);
+		    }
+		}
+	    }
+	} elsif( /^Parameters in beta:/ ) {
+	    $_ = $self->_readline; # need the next line
+	    if ( /p\=\s+(\S+)\s+q\=\s+(\S+)/ ) {
+		$data{'-shape_params'} = { 
+		    'shape' => 'beta',
+		    'p'     => $1,
+		    'q'     => $2 };
+	    } else {
+		$self->warn("unparseable beta parameters: $_");
+	    }
+	} elsif( /^Parameters in beta\&w\>1:/ ) {
+            # Parameters in beta&w>1:
+            #   p0=  1.00000  p=  0.07642 q=  0.85550
+            #  (p1=  0.00000) w=  1.00000
+	    $_ = $self->_readline; # need the next line
+            my ($p0,$p,$q,$p1,$w);
+	    if ( /p0\=\s+(\S+)\s+p\=\s+(\S+)\s+q\=\s+(\S+)/ ) {
+                $p0 = $1; $p = $2; $q = $3;
+	    } else {
+		$self->warn("unparseable beta parameters: $_");
+	    }
+	    $_ = $self->_readline; # need the next line
+	    if ( /\(p1\=\s+(\S+)\)\s+w\=\s*(\S+)/ ) {
+                $p1 = $1; $w = $2;
+		$data{'-shape_params'} = { 
+		    'shape' => 'beta',
+		    'p0'    => $p0,
+		    'p'     => $p,
+		    'q'     => $q,
+		    'p1'    => $p1,
+                    'w'     => $w };
+	    } else {
+		$self->warn("unparseable beta parameters: $_");
+	    }
+	}  elsif( /^alpha\s+\(gamma\)\s+\=\s+(\S+)/ ) {
+	    my $gamma = $1;
+	    $_ = $self->_readline;
+	    my (@r, at f);
+	    if( s/^r\s+\(\s*\d+\)\:\s+// ) {
+		@r = split;
+	    }
+	    $_ = $self->_readline;
+	    if( s/^f\s*\:\s+// ) {
+		@f = split;
+	    }
+	    $data{'-shape_params'} = { 
+		'shape' => 'alpha',
+		'gamma' => $gamma,
+		'r'     => \@r,
+		'f'     => \@f };
+	}
+    }
+    return new Bio::Tools::Phylo::PAML::ModelResult(%data);
+}
+
+sub _parse_Pos_selected_sites {
+    my $self = shift;
+    my $okay = 0;
+    my (%sites) = ('default' => [],
+		   'neb'     => [],
+		   'beb'     => []);
+    my $type = 'default';
+    while( defined($_ = $self->_readline) ) {
+	next if ( /^\s+$/ || /^\s+Pr\(w\>1\)/ );
+	if(  /^Time used/ || /^TREE/) {
+	    $self->_pushback($_);
+	    last;
+	}
+	if( /^Naive Empirical Bayes/i ) {
+	    $type = 'neb';
+	} elsif( /^Bayes Empirical Bayes/i ) {
+	    $type = 'beb';
+	} elsif( /^Positively selected sites/ ) {
+	    $okay = 1;
+	} elsif( $okay && /^\s+(\d+)\s+(\S+)\s+(\-?\d+(?:\.\d+)?)(\**)\s+(\-?\d+(?:\.\d+)?)\s+\+\-\s+(\-?\d+(?:\.\d+)?)/ ) {
+	    my $signif = $4;
+	    $signif = '' unless defined $signif;
+	    push @{$sites{$type}}, [$1,$2,$3,$signif,$5,$6];
+	} elsif( $okay && /^\s+(\d+)\s+(\S+)\s+(\-?\d*(?:.\d+))(\**)\s+(\-?\d+(?:\.\d+)?)/ ) {
+	    my $signif = $4;
+	    $signif = '' unless defined $signif;
+	    push @{$sites{$type}}, [$1,$2,$3,$signif,$5];
+	} elsif( $okay && /^\s+(\d+)\s+(\S)\s+([\d\.\-\+]+)(\**)/ ) {
+	    my $signif = $4; 
+	    $signif = '' unless defined $signif;
+	    push @{$sites{$type}}, [$1,$2,$3,$signif];
+	} 
+    }
+    return ($sites{'default'}, $sites{'neb'}, $sites{'beb'});
+}
+
+sub _parse_branch_dnds { 
+    my $self = shift;
+    my ($okay) = (0);
+    my %branch_dnds;
+    my @header;
+    while(defined($_ = $self->_readline ) ) {
+	next if( /^\s+$/);
+	next unless ( $okay || /^\s+branch\s+t/);
+	if( /^\s+branch\s+(.+)/ ) {
+	    s/^\s+//;
+	    @header = split(/\s+/,$_);
+	    $okay = 1;
+	} elsif( /^\s*(\d+\.\.\d+)/ ) {
+	    my $branch = $1;
+	    s/^\s+//;
+	    my $i =0;
+	    # fancyness just maps the header names like 't' or 'dN'
+	    # into the hash so we get at the end of the day
+	    # 't' => 0.067
+	    # 'dN'=> 0.001
+	    $branch_dnds{$branch} = { map { $header[$i++] => $_ } split};
+	} else { 
+	    $self->_pushback($_);
+	    last;
+	}
+    }
+    return %branch_dnds;
+}
+
+
+#baseml stuff
+sub _parse_nt_freqs {
+    my ($self) = @_;
+    my ($okay,$done,$header) = (0,0,0);
+    my (@bases);
+    my $numseqs = scalar @{$self->{'_summary'}->{'seqs'} || []};
+    while( defined($_ = $self->_readline ) ) {
+	if( /^TREE/ || /^Distances/ ) { $self->_pushback($_); last }
+	last if( $done);
+	next if ( /^\s+$/ || /^\(Ambiguity/ );
+	if( /^Frequencies\./ ) { 
+	    $okay = 1;
+	} elsif( ! $okay ) {	# skip till we see 'Frequencies.
+	    next;
+	} elsif ( ! $header ) {
+	    s/^\s+//;		# remove leading whitespace
+	    @bases = split;	# get an array of the all the aa names
+	    $header = 1;
+	    $self->{'_summary'}->{'ntfreqs'} = {}; # reset/clear values
+	    next;
+	} elsif( /^\#\s+constant\s+sites\:\s+
+		 (\d+)\s+	# constant sites
+		 \(\s*([\d\.]+)\s*\%\s*\)/ox){
+	    $self->{'_summary'}->{'stats'}->{'constant_sites'} = $1;
+	    $self->{'_summary'}->{'stats'}->{'constant_sites_percentage'} = $2;
+	} elsif( /^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/ox ) {
+	    $self->{'_summary'}->{'stats'}->{'loglikelihood'} = $1;
+	    $done = 1;		# done for sure
+	} else { 
+	    my ($seqname, at freqs) = split;
+	    my $basect = 0;
+	    foreach my $f ( @freqs ) { 
+		# this will also store 'Average'
+		$self->{'_summary'}->{'ntfreqs'}->{$seqname}->{$bases[$basect++]} = $f;
+	    }
+	}
+    }
+}
+
+sub _parse_nt_dists {
+    my ($self) = @_;
+    my ($okay,$seen,$done) = (0,0,0);
+    my (%matrix, at names);
+    my $numseqs = scalar @{$self->{'_summary'}->{'seqs'} || []};
+    my $type = '';
+    while( defined ($_ = $self->_readline ) ) {
+	if( /^TREE/ ) { $self->_pushback($_); last; }
+	last if $done;
+	next if(/^This matrix is not used in later/);
+	if( /^\s+$/ ) {
+	    last if( $seen );
+	    next;
+	}
+	if( /^Distances:(\S+)\s+\(([^\)]+)\)\s+\(alpha set at (\-?\d+\.\d+)\)/ ) {
+	    $okay = 1;
+	    $type = $1;
+	    next;
+	} 
+	s/\s+$//g; # remove trailing space
+	if( $okay ) {
+	    my ($seqname,$vl) = split(/\s+/,$_,2);	    
+	    $seen = 1;
+	    my $i = 0;
+	    if( defined $vl ) {
+		while( $vl =~ /(\-?\d+\.\d+)\s*\(\s*(\-?\d+\.\d+)\s*\)\s*/g ) {
+		    my ($kappa,$alpha) = ($1,$2);
+		    $matrix{$seqname}{$names[$i]} = 
+			$matrix{$names[$i]}{$seqname} = [$kappa,$alpha];
+
+		    $i++;
+		}
+		unless($i) {
+		    $self->warn("no matches for $vl\n");
+		}
+	    }
+	    
+	    push @names, $seqname;
+	    $matrix{$seqname}->{$seqname} = [0,0];
+	}
+	$done = 1 if( scalar @names == $numseqs);
+    }
+    my %dist;
+    my $i = 0;
+    my (@kvalues, at avalues);
+    foreach my $lname ( @names ) {
+	my (@arow, at krow);
+	my $j = 0;
+	foreach my $rname ( @names ) {
+	    my $v = $matrix{$lname}{$rname};
+
+	    push @krow, $v->[0]; # kappa values
+	    push @arow, $v->[1]; # alpha 
+	    $dist{$lname}{$rname} = [$i,$j++];
+	}
+	$i++;
+	push @kvalues, \@krow;
+	push @avalues, \@arow;
+    }
+    return 
+	(new Bio::Matrix::PhylipDist
+	 (-program=> $self->{'_summary'}->{'seqtype'},
+	  -matrix => \%dist,
+	  -names  => \@names,
+	  -values => \@kvalues ),
+	 new Bio::Matrix::PhylipDist
+	 (-program=> $self->{'_summary'}->{'seqtype'},
+	  -matrix => \%dist,
+	  -names  => \@names,
+	  -values => \@avalues )
+	 );
+}
+
+# BASEML
+sub _parse_rate_parametes {
+    my $self = shift;
+    my (%rate_parameters);
+    while( defined($_ = $self->_readline) ) {
+	if( /^Rate\s+parameters:\s+/ ) {
+	    s/\s+$//;
+	    $rate_parameters{'rate_parameters'} = [split(/\s+/,$_)];
+	} elsif(/^Base\s+frequencies:\s+/) {
+	    s/\s+$//;
+	    $rate_parameters{'base_frequencies'} = [split(/\s+/,$_)];
+	} elsif( m/^Rate\s+matrix\s+Q,\s+Average\s+Ts\/Tv\s+(\([^\)+]+\))?\s*\=\s+
+		 (\-?\d+\.\d+)/x) {
+	    $rate_parameters{'average_TsTv'} = $1;
+	    while( defined ($_ = $self->_readline) ) {
+		# short circuit
+		last if(/^\s+$/);
+		if( /^alpha/ ) { 
+		    $self->_pushback($_);
+		    last;
+		}
+		s/^\s+//;
+		s/\s+$//;
+		push @{$rate_parameters{'rate_matrix_Q'}}, [split];
+	    }
+	} elsif(/^alpha\s+\(gamma,\s+K=\s*(\d+)\s*\)\s*\=\s*(\-?\d+\.\d+)/ ) {
+	    $rate_parameters{'K'} = $1;
+	    $rate_parameters{'alpha'} = $2;
+	} elsif(s/^(r|f):\s+// ) {
+	    my ($p) = $1;
+	    s/\s+$//;
+	    $rate_parameters{$p} = [split];
+	}
+    }
+}
+
+
+# RST parsing
+sub _parse_rst { 
+  my ($self) = @_;
+  return unless $self->{'_dir'} && -d $self->{'_dir'} && -r $self->{'_dir'};
+
+  my $rstfile = File::Spec->catfile($self->{'_dir'},$RSTFILENAME);
+  return unless -e $rstfile && ! -z $rstfile;
+  
+  my $rstio = Bio::Root::IO->new(-file => $rstfile);
+
+  # define whatever data structures you need to store the data
+  # key points are to reuse existing bioperl objs (like Bio::Seq) 
+  # where appropriate
+    
+  my (@firstseq, at seqs, at trees, at per_site_prob);
+  my $count;
+  while ( defined( $_ = $rstio->_readline ) ) {
+      # implement the parsing here
+      if( /^TREE\s+\#\s+(\d+)/ ) {
+	  while(defined ($_ = $rstio->_readline) ) {
+	      if( /tree\s+with\s+node\s+labels\s+for/) {
+		  my $tree = Bio::TreeIO->new(-noclose =>1,
+					      -fh      => $rstio->_fh,
+					      -format  =>'newick')->next_tree;
+		  # cleanup leading/trailing whitespace
+		  for my $n ( $tree->get_nodes ) {
+		      my $id = $n->id;
+		      $id =~ s/^\s+//; $id =~ s/\s+$//;
+		      $n->id($id);
+		      
+		      if( defined( my $blen = $n->branch_length) ) {
+			  $blen =~ s/^\s+//; $blen =~ s/\s+$//;
+			  $n->branch_length($blen);
+		      }
+		      
+		  }
+		  push @trees, $tree;
+		  last;
+	      }
+	  } 
+      } elsif(/^Prob\sof\sbest\scharacter\sat\seach\snode,\slisted\sby\ssite/){
+	  $self->{'_rst'}->{'persite'} = [];
+	  while(defined($_ = $rstio->_readline ) ) {
+	      next if(/^Site/ || /^\s+$/ );
+	      if( s/^\s+(\d+)\s+(\d+)\s+([^:]+)\s+:\s+(.+)// ) {
+		  my ($sitenum,$freq,$extant,$ancestral) = ($1,$2,$3,$4);
+		  my (@anc_site, at extant_site);
+		  @anc_site = {};
+		  @extant_site = {};
+		  while( $extant =~ s/^([A-Z]{3})\s+\(([A-Z])\)\s+//g ) {
+		      push @extant_site, {'codon'=>$1,'aa' => $2 };
+		  }
+		  while( $ancestral =~ s/^([A-Z]{3})\s+([A-Z])\s+  # codon AA
+			                (\S+)\s+                   # Prob
+			                \(([A-Z])\s+(\S+)\)\s+//xg # AA Prob
+			 ) {
+		      push @anc_site, {'codon'            => $1,
+				       'aa'               => $2, 
+				       'prob'             => $3,
+				       'Yang95_aa'        => $4, 
+				       'Yang95_aa_prob'   => $5};
+		  }
+		  # saving persite
+		  $self->{'_rst'}->{'persite'}->[$sitenum] = [@extant_site,
+							      @anc_site];
+		  
+	      } elsif(/^Summary\sof\schanges\salong\sbranches\./ ) {
+		  last;
+	      }
+	  }
+      } elsif( /^Check\sroot\sfor\sdirections\sof\schange\./ || 
+	       /^Summary\sof\schanges\salong\sbranches\./ ) {
+	  my (@branches, at branch2node,$branch,$node);
+	  my $tree = $trees[-1];
+	  if( ! $tree ) {
+	      $self->warn("No tree built before parsing Branch changes\n");
+	      last;
+	  }
+	  my @nodes = ( map { $_->[0] } 
+			sort { $a->[1] <=> $b->[1] } 
+			map { [$_, $_->id =~ /^(\d+)\_?/] } $tree->get_nodes);
+	  unshift @nodes, undef; # fake first node so that index will match nodeid
+	  while(defined($_ = $rstio->_readline ) ) {
+	      next if /^\s+$/;
+	      if( m/^List\sof\sextant\sand\sreconstructed\ssequences/ ) {
+		  $rstio->_pushback($_);
+		  last;
+	      } elsif( /^Branch\s+(\d+):\s+(\d+)\.\.(\d+)\s+/ ) {
+		  my ($left,$right);
+		  ($branch,$left,$right) = ($1,$2,$3);
+		  ($node) = $nodes[$right];
+		  if( ! $node ) {
+		      warn("cannot find $right in $tree ($branch $left..$right)\n");
+		      last;
+		  }
+		  my ($n,$s) = (/\(n=\s*(\S+)\s+s=\s*(\S+)\)/);
+		  $node->add_tag_value('n', $n);
+		  $node->add_tag_value('s', $s);
+		  $branch2node[$branch] = $right;		  
+	      } elsif( /^\s+(\d+)\s+([A-Z])\s+(\S+)\s+\-\>\s+([A-Z])\s+(\S+)?/){
+		  my ($site,$anc,$aprob, $derived,$dprob)= ($1,$2,$3,$4,$5);
+		  if( ! $node ) {
+		      $self->warn("no branch line was previously parsed!");
+		      next;
+		  }
+		  my %c = ( 'site'        => $site,
+			    'anc_aa'      => $anc,
+			    'anc_prob'    => $aprob,
+			    'derived_aa'  => $derived,
+			    );
+		  $c{'derived_prob'} = $dprob if defined $dprob;
+		  $node->add_tag_value('changes',\%c);
+	      }
+	  }	  
+      } elsif( /^Overall\s+accuracy\s+of\s+the\s+(\d+)\s+ancestral\s+sequences:/) 
+      {
+	  my $line = $rstio->_readline;
+	  $line =~ s/^\s+//; $line =~ s/\s+$//;
+	  my @overall_site = split(/\s+/,$line);
+	  # skip next 2 lines, want the third
+	  for ( 1..3 ) {
+	      $line = $rstio->_readline;
+	  }
+	  $line =~ s/^\s+//; $line =~ s/\s+$//;
+	  my @overall_seq = split(/\s+/,$line);	  
+	  if( @overall_seq != @overall_site ||
+	      @overall_seq != @seqs ) {
+	      $self->warn("out of sync somehow seqs, site scores don't match\n");
+	      warn("@seqs @overall_seq @overall_site\n");
+	  }
+	  for ( @seqs ) {
+	      $_->description(sprintf("overall_accuracy_site=%s overall_accuracy_seq=%s",
+				      shift @overall_site,
+				      shift @overall_seq));
+	  } 
+      } elsif (m/^List of extant and reconstructed sequences/o) {
+	  while ( defined( $_ = $rstio->_readline ) ) {
+	      last if( /^Overall accuracy of the/ );
+	      last if( /^\s+$/ && @seqs > 0 );
+	      next if ( /^\s+$/ );
+	      next if( /^\d+\s+$/ );
+	      # runmode = (0)
+	      # this should in fact be packed into a Bio::SimpleAlign object
+	      # instead of an array but we'll stay with this for now
+	      if ($_ =~ /^node /) {
+		  my ($name,$num,$seqstr) = split(/\s+/,$_,3);
+		  $name .= $num;
+		  $seqstr =~ s/\s+//g; # remove whitespace 
+		  unless( @firstseq ) {
+		      @firstseq = split(//,$seqstr);
+		      push @seqs, Bio::PrimarySeq->new(-display_id  => $name,
+						       -seq         => $seqstr);
+		  } else { 
+		      my $i = 0;
+		      my $v;
+		      while (($v = index($seqstr,'.',$i)) >= $i ) {
+			  # replace the '.' with the correct seq from the
+			  substr($seqstr,$v,1,$firstseq[$v]);
+			  $i = $v;
+		      }
+		      $self->debug( "adding seq $seqstr\n");
+		      push @seqs, Bio::PrimarySeq->new
+			  (-display_id  => $name,
+			   -seq         => $seqstr);
+		  }
+	      }
+	  }
+	  $self->{'_rst'}->{'rctrted_seqs'} = \@seqs;
+      } else {
+	  
+      }
+  }
+  $self->{'_rst'}->{'trees'} = \@trees;
+  return;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip/ProtDist.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip/ProtDist.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Phylo/Phylip/ProtDist.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+# $Id: ProtDist.pm,v 1.9.4.1 2006/10/02 23:10:34 sendu Exp $
+# BioPerl module for Bio::Tools::Phylo::Phylip::ProtDist
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::Phylip::ProtDist - parser for ProtDist output
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::Phylo::Phylip::ProtDist;
+    my $parser = new Bio::Tools::Phylo::Phylip::ProtDist(-file => 'outfile');
+    while( my $result = $parser->next_matrix) {
+      # do something with it
+    }
+
+=head1 DESCRIPTION
+
+A parser for ProtDist output into a L<Bio::Matrix::PhylipDist> object.
+See also L<Bio::Matrix::IO::phylip> this module may go away.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon 
+
+Email shawnh at fugu-sg.org 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Phylo::Phylip::ProtDist;
+use strict;
+
+use Bio::Matrix::PhylipDist;
+
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Phylo::Phylip::ProtDist();
+ Function: Builds a new Bio::Tools::Phylo::Phylip::ProtDist object 
+ Returns : Bio::Tools::ProtDist
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+           -program  => 'programname' # name of the program
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+  my ($prog) = $self->_rearrange([qw(PROGRAM)], @args);
+  $self->{'_program'} = $prog;
+  return $self;
+}
+
+=head2 next_matrix
+
+ Title   : next_matrix
+ Usage   : my $matrix = $parser->next_matrix
+ Function: Get the next result set from parser data
+ Returns : L<Bio::Matrix::PhylipDist>
+ Args    : none
+
+
+=cut
+
+sub next_matrix{
+   my ($self) = @_;
+   my @names;
+   my @values;
+   my $entry;
+   my $size = 0;
+   while ($entry=$self->_readline) {
+       if($#names >=0 && $entry =~/^\s+\d+\n$/){
+	   $self->_pushback($_);
+	   last;
+       } elsif($entry=~/^\s+(\d+)\n$/){	   
+	   $size = $1;
+	   next;
+       } elsif( $entry =~ s/^\s+(\-?\d+\.\d+)/$1/ ) {
+	   my (@line) = split( /\s+/,$entry);
+	   push @{$values[-1]}, @line;
+	   next;
+       }
+       my ($n, at line) = split( /\s+/,$entry);
+       
+       push @names, $n;
+       push @values, [@line];
+   }
+   if( scalar @names != $size ) {
+       $self->warn("The number of entries ".(scalar @names).
+		   " is not the same $size");
+   }
+   $#names>=0 || return;
+   my %dist;
+   my $i=0;
+   for my $name (@names){
+       my $j=0;
+       for my $n (@names) {
+	   $dist{$name}{$n} = [$i,$j];
+	   $j++;
+       }
+       $i++;
+   }
+   return Bio::Matrix::PhylipDist->new(-program => $self->{'_program'},
+					     -matrix  => \%dist,
+					     -names   => \@names,
+					     -values  => \@values);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Exon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Exon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Exon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,209 @@
+# $Id: Exon.pm,v 1.16.4.1 2006/10/02 23:10:35 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Prediction::Exon
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Prediction::Exon - A predicted exon feature
+
+=head1 SYNOPSIS
+
+  # See documentation of methods.
+
+=head1 DESCRIPTION
+
+A feature representing a predicted exon. This class actually inherits
+off Bio::SeqFeature::Gene::Exon and therefore has all that
+functionality (also implements Bio::SeqFeatureI), plus a few methods
+supporting predicted features, like various scores and a
+significance. Even though these were inspired by GenScan results, at
+least a subset should be generally useable for exon prediction
+results.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Prediction::Exon;
+use strict;
+
+
+use base qw(Bio::SeqFeature::Gene::Exon);
+
+sub new {
+    my($class, at args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+
+    return $self;
+}
+
+
+=head2 predicted_cds
+
+ Title   : predicted_cds
+ Usage   : $predicted_cds_dna = $exon->predicted_cds();
+           $exon->predicted_cds($predicted_cds_dna);
+ Function: Get/Set the CDS (coding sequence) as predicted by a program.
+
+           This method is independent of an attached_seq. There is no
+           guarantee whatsoever that the returned CDS has anything to do
+           (e.g., matches) with the sequence covered by the exons as annotated
+           through this object.
+
+ Example :
+ Returns : A Bio::PrimarySeqI implementing object holding the DNA sequence
+           defined as coding by a prediction of a program.
+ Args    : On set, a Bio::PrimarySeqI implementing object holding the DNA 
+           sequence defined as coding by a prediction of a program.
+
+=cut
+
+sub predicted_cds {
+    my ($self, $cds) = @_;
+
+    if(defined($cds)) {
+	$self->{'_predicted_cds'} = $cds;
+    }
+    return $self->{'_predicted_cds'};
+}
+
+=head2 predicted_protein
+
+ Title   : predicted_protein
+ Usage   : $predicted_protein_seq = $exon->predicted_protein();
+           $exon->predicted_protein($predicted_protein_seq);
+ Function: Get/Set the protein translation as predicted by a program.
+
+           This method is independent of an attached_seq. There is no
+           guarantee whatsoever that the returned translation has anything to
+           do with the sequence covered by the exons as annotated
+           through this object, or the sequence returned by predicted_cds(),
+           although it should usually be just the standard translation.
+
+ Example :
+ Returns : A Bio::PrimarySeqI implementing object holding the protein 
+           translation as predicted by a program.
+ Args    : On set, a Bio::PrimarySeqI implementing object holding the protein 
+           translation as predicted by a program.
+
+=cut
+
+sub predicted_protein {
+    my ($self, $aa) = @_;
+
+    if(defined($aa)) {
+	$self->{'_predicted_aa'} = $aa;
+    }
+    return $self->{'_predicted_aa'};
+}
+
+=head2 significance
+
+ Title   : significance
+ Usage   : $evalue = $obj->significance();
+           $obj->significance($evalue);
+ Function: 
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub significance {
+    return shift->_tag_value('signif', @_);
+}
+
+=head2 start_signal_score
+
+ Title   : start_signal_score
+ Usage   : $sc = $obj->start_signal_score();
+           $obj->start_signal_score($evalue);
+ Function: Get/Set a score for the exon start signal (acceptor splice site
+           or initiation signal).
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub start_signal_score {
+    return shift->_tag_value('AccScore', @_);
+}
+
+=head2 end_signal_score
+
+ Title   : end_signal_score
+ Usage   : $sc = $obj->end_signal_score();
+           $obj->end_signal_score($evalue);
+ Function: Get/Set a score for the exon end signal (donor splice site
+           or termination signal).
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub end_signal_score {
+    return shift->_tag_value('DonScore', @_);
+}
+
+=head2 coding_signal_score
+
+ Title   : coding_signal_score
+ Usage   : $sc = $obj->coding_signal_score();
+           $obj->coding_signal_score($evalue);
+ Function: Get/Set a score for the exon coding signal (e.g., coding potential).
+ Returns : 
+ Args    : 
+
+
+=cut
+
+sub coding_signal_score {
+    return shift->_tag_value('CodScore', @_);
+}
+
+#
+# Everything else is just inherited from SeqFeature::Generic.
+#
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Gene.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Gene.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prediction/Gene.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+# $Id: Gene.pm,v 1.15.4.1 2006/10/02 23:10:35 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Prediction::Gene
+#
+# Cared for by Hilmar Lapp <hlapp at gmx.net>
+#
+# Copyright Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Prediction::Gene - a predicted gene structure feature
+
+=head1 SYNOPSIS
+
+  #See documentation of methods.
+
+=head1 DESCRIPTION
+
+A feature representing a predicted gene structure. This class actually
+inherits off Bio::SeqFeature::Gene::Transcript and therefore has all that
+functionality, plus a few methods supporting predicted sequence features,
+like a predicted CDS and a predicted translation.
+
+Exons held by an instance of this class will usually be instances of
+Bio::Tools::Prediction::Exon, although they do not have to be. Refer to the
+documentation of the class that produced the instance.
+
+Normally, you will not want to create an instance of this class yourself.
+Instead, classes representing the results of gene structure prediction
+programs will do that.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Hilmar Lapp
+
+Email hlapp-at-gmx.net or hilmar.lapp-at-pharma.novartis.com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Prediction::Gene;
+use strict;
+
+
+
+use base qw(Bio::SeqFeature::Gene::Transcript);
+
+sub new {
+    my($class, at args) = @_;
+    
+    my $self = $class->SUPER::new(@args);
+
+    my ($primary,$ptag) = $self->_rearrange([qw(PRIMARY PRIMARY_TAG)], at args);
+    $self->primary_tag('predicted_gene') unless $primary || $ptag;
+
+    return $self; 
+}
+
+
+=head2 predicted_cds
+
+ Title   : predicted_cds
+ Usage   : $predicted_cds_dna = $gene->predicted_cds();
+           $gene->predicted_cds($predicted_cds_dna);
+ Function: Get/Set the CDS (coding sequence) as predicted by a program.
+
+           This method is independent of an attached_seq. There is no
+           guarantee whatsoever that the returned CDS has anything to do
+           (e.g., matches) with the sequence covered by the exons as annotated
+           through this object.
+
+ Example :
+ Returns : A Bio::PrimarySeqI implementing object holding the DNA sequence
+           defined as coding by a prediction of a program.
+ Args    : On set, a Bio::PrimarySeqI implementing object holding the DNA 
+           sequence defined as coding by a prediction of a program.
+
+=cut
+
+sub predicted_cds {
+    my ($self, $cds) = @_;
+
+    if(defined($cds)) {
+	$self->{'_predicted_cds'} = $cds;
+    }
+    return $self->{'_predicted_cds'};
+}
+
+=head2 predicted_protein
+
+ Title   : predicted_protein
+ Usage   : $predicted_protein_seq = $gene->predicted_protein();
+           $gene->predicted_protein($predicted_protein_seq);
+ Function: Get/Set the protein translation as predicted by a program.
+
+           This method is independent of an attached_seq. There is no
+           guarantee whatsoever that the returned translation has anything to
+           do with the sequence covered by the exons as annotated
+           through this object, or the sequence returned by predicted_cds(),
+           although it should usually be just the standard translation.
+
+ Example :
+ Returns : A Bio::PrimarySeqI implementing object holding the protein 
+           translation as predicted by a program.
+ Args    : On set, a Bio::PrimarySeqI implementing object holding the protein 
+           translation as predicted by a program.
+
+=cut
+
+sub predicted_protein {
+    my ($self, $aa) = @_;
+
+    if(defined($aa)) {
+	$self->{'_predicted_aa'} = $aa;
+    }
+    return $self->{'_predicted_aa'};
+}
+
+#
+# Everything else is just inherited from SeqFeature::GeneStructure.
+#
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor/Base.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor/Base.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Assessor/Base.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+# $Id: Base.pm,v 1.4.4.1 2006/10/02 23:10:36 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Primer::Assessor::Base
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Primer::Assessor::Base - base class for common assessor things
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::Primer::Assessor::Base
+
+    $base->weight(10);
+
+=head1 DESCRIPTION
+
+Base class for assessors, probably only defining the weight function
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney-at-ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+
+
+package Bio::Tools::Primer::Assessor::Base;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ( $caller, @args) = @_;   
+    my ($self) = $caller->SUPER::new(@args); 
+
+    my ($weight) = $self->_rearrange([qw(WEIGHT)], at args);
+
+    if( !defined $weight ) {
+	$weight = 10;
+    }
+
+    $self->weight($weight);
+
+    # done - we hope
+    return $self;
+}
+
+sub weight {
+    my $self   = shift;
+    my $weight = shift;
+
+    if( defined $weight ) {
+	$self->{'weight'} = $weight;
+    }
+
+    return $self->{'weight'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/AssessorI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/AssessorI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/AssessorI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+# $Id: AssessorI.pm,v 1.5.4.1 2006/10/02 23:10:35 sendu Exp $
+# BioPerl module for Bio::Tools::Primer::AssessorI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Primer::AssessorI - interface for assessing primer pairs
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::Primer::AssessorI;
+
+    if( $obj->isa('Bio::Tools::Primer::AssessorI') ) {
+	my $score = $obj->assess($primer_pair);
+    }
+
+
+=head1 DESCRIPTION
+
+The Primer Assessor interface provides a interface for scoring
+functions of primer pairs to comply to. It is mainly used by
+Bio::Tools::Primer::Design module
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney-at-ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tools::Primer::AssessorI;
+
+
+
+use base qw(Bio::Root::RootI);
+
+sub assess {
+    my ($self) = shift;
+    $self->throw_not_implemented();
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Feature.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Feature.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Feature.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+# $Id: Feature.pm,v 1.5.4.1 2006/10/02 23:10:35 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Primer::Feature
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Primer::Feature - position of a single primer
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::Primer::Feature;
+
+    my $pf = Bio::Tools::Primer::Feature->new( -start => $start, -end => $end, -strand => $strand);
+    $pf->attach_seq($seq);
+
+    # is a SeqFeatureI
+
+    print "primer starts at ",$pf->start," with sequence ",$pf->seq->seq(),"\n";
+
+    # helper functions
+
+    print "GC percentage ",$pf->gc(),"\n";
+    print "has inversion of size 4 at ",$pf->inversion(4),"\n";
+
+
+
+=head1 DESCRIPTION
+
+Primer Features represents one primer in a primer pair. This object is
+mainly for designing primers, and probably principly used in the
+primer design system
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney-at-ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+
+package Bio::Tools::Primer::Feature;
+
+use base qw(Bio::SeqFeature::Generic);
+
+
+
+sub new {
+    my ( $caller, @args) = @_;   
+    my ($self) = $caller->SUPER::new(@args); 
+
+    # done - we hope
+    return $self;
+}
+
+sub gc_percent {
+    my $self = shift;
+
+    my $seq = $self->seq();
+
+    if( !defined $seq ) {
+	$self->throw("Primer feature has no attached sequence, can't calculate GC");
+    }
+
+    my $str = $seq->seq();
+
+    my $count = $str =~ tr/GCgc/GCgc/;
+
+    return $count*100.0 / $seq->length;
+}
+
+sub inversion {
+    my $self = shift;
+    my $size = shift;
+
+    if( !defined $size ) {
+	$self->throw("Must have size paramter in inversion");
+    }
+
+    my $seq = $self->seq();
+
+    if( !defined $seq ) {
+	$self->throw("Primer feature has no attached sequence, can't calculate inversion");
+    }
+
+    my $len = $seq->length - $size;
+
+    my $str = $seq->seq();
+
+    foreach my $i ( 0 .. $len ) {
+	my $revstr = substr($str,$i,$size);
+	my $orig = $revstr;
+	$revstr = reverse $revstr;
+	$revstr = s/[^ATGCNatgcn]/N/g;
+
+	$revstr =~ tr/ATGCNatgcn/TACGNtacgn/;
+
+	if( $str =~ /$revstr/ ) {
+	    return $orig;
+	}
+    }
+
+    return;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Pair.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Pair.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer/Pair.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+# $Id: Pair.pm,v 1.4.4.1 2006/10/02 23:10:35 sendu Exp $
+# BioPerl module for Bio::Tools::Primer::Pair
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Primer::Pair - two primers on left and right side
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::Primer::Pair;
+
+    my $pair = Bio::Tools::Primer::Pair->new( -left => $leftp , -right => $rightp);
+
+    # helper functions
+
+    print "GC percentage different",$pf->gc_difference(),"\n";
+    print "product length is ",$pf->product_length,"\n";
+
+
+
+=head1 DESCRIPTION
+
+Primer Pairs represents one primer in a primer pair. This object is mainly for
+designing primers, and probably principly used in the primer design system
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney-at-ebi.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+
+package Bio::Tools::Primer::Pair;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my ( $caller, @args) = @_;   
+    my ($self) = $caller->SUPER::new(@args); 
+
+    my ($left,$right) = $self->_rearrange([qw(LEFT RIGHT)], at args);
+
+    if( !defined $left || !defined $right ) {
+	$self->throw("Pair must be initialised with left and right primers");
+    }
+
+    $self->left($left);
+    $self->right($right);
+
+    # done - we hope
+    return $self;
+}
+
+sub left {
+    my $self = shift;
+    my $left = shift;
+
+    if( defined $left ) {
+	if( !ref $left || !$left->isa("Bio::Tools::Primer::Feature") ) {
+	    $self->throw("left primer must be a Bio::Tools::Primer::Feature, not $left");
+	}
+	$self->{'left'} = $left;
+    }
+
+    return $self->{'left'};
+}
+
+
+sub right {
+    my $self = shift;
+    my $right = shift;
+
+    if( defined $right ) {
+	if( !ref $right || !$right->isa("Bio::Tools::Primer::Feature") ) {
+	    $self->throw("right primer must be a Bio::Tools::Primer::Feature, not $right");
+	}
+	$self->{'right'} = $right;
+    }
+
+    return $self->{'right'};
+}
+
+sub gc_difference {
+    my $self = shift;
+
+    return abs ( $self->left->gc_percent - $self->right->gc_percent );
+}
+
+sub product_length {
+    my $self = shift;
+
+    return $self->right->end - $self->left->start +1;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer3.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer3.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer3.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,421 @@
+# $Id: Primer3.pm,v 1.20.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Primer3
+#
+# Copyright (c) 2003 bioperl, Rob Edwards. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or
+#           modify it under the same terms as Perl itself.
+#
+# Copyright Rob Edwards
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Primer3 - Create input for and work with the output from
+the program primer3
+
+=head1 SYNOPSIS
+
+ # parse primer3 output to get some data
+ # this is also called from Bio::Tools::Run::Primer3
+ use Bio::Tools::Primer3;
+
+ # read a primer3 output file
+ my $p3 = Bio::Tools::Primer3->new(-file=>"data/primer3_output.txt");
+
+ # how many results were there?
+ my $num = $p3->number_of_results;
+ print "There were $num results\n";
+
+ # get all the results
+ my $all_results = $p3->all_results;
+ print "ALL the results\n";
+ foreach my $key (keys %{$all_results}) {
+    print "$key\t${$all_results}{$key}\n";
+ }
+
+ # get specific results
+ my $result1 = $p3->primer_results(1);
+ print "The first primer is\n";
+ foreach my $key (keys %{$result1}) {
+    print "$key\t${$result1}{$key}\n";
+ }
+
+ # get the results as a Bio::Seq::PrimedSeq stream
+ my $primer = $p3->next_primer;
+ print "The left primer in the stream is ",
+   $primer->get_primer('-left_primer')->seq->seq, "\n";
+
+=head1 DESCRIPTION
+
+Bio::Tools::Primer3 creates the input files needed to design primers using
+primer3 and provides mechanisms to access data in the primer3 output files.
+
+This module provides a bioperl interface to the program primer3. See
+http://www-genome.wi.mit.edu/genome_software/other/primer3.html
+for details and to download the software.
+
+This module is based on one written by Chad Matsalla
+(bioinformatics1 at dieselwurks.com)
+
+I have ripped some of his code, and added a lot of my own. I hope he
+is not mad at me!
+
+This is probably best run in one of the two following ways:
+
+  i. To parse the output from Bio::Tools::Run::Primer3.
+     You will most likely just use next_primer to get the results from
+     Bio::Tools::Run::Primer3.
+  ii. To parse the output of primer3 handed to it as a file name.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR -
+
+  Rob Edwards
+
+  redwards at utmem.edu
+
+  Based heavily on work of
+
+  Chad Matsalla
+
+  bioinformatics1 at dieselwurks.com
+
+=head1 CONTRIBUTORS
+
+  Brian Osborne bosborne at alum.mit.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::Primer3;
+
+use strict;
+use Bio::Seq;
+use Bio::Seq::PrimedSeq;
+use Bio::SeqFeature::Primer;
+use Clone qw(clone);
+
+use vars qw($AUTOLOAD @PRIMER3_PARAMS %OK_FIELD $ID);
+
+BEGIN {
+ @PRIMER3_PARAMS=qw(results seqobject);
+
+ foreach my $attr (@PRIMER3_PARAMS) {$OK_FIELD{$attr}++}
+}
+
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+sub AUTOLOAD {
+ my $self = shift;
+ my $attr = $AUTOLOAD;
+ $attr =~ s/.*:://;
+ $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
+ $self->{$attr} = shift if @_;
+ return $self->{$attr};
+}
+
+$ID = 'Bio::Tools::Primer3';
+
+=head2 new
+
+  Title   : new()
+  Usage   : my $primer3 = Bio::Tools::Primer3->new(-file=>$file)
+            to read a primer3 output file.
+  Function: Parse primer3 output
+  Returns : Does not return anything. If called with a filename will
+            allow you to retrieve the results
+  Args    : -file (optional) file of primer3 results to parse -verbose
+            (optional) set verbose output
+  Notes   :
+
+=cut
+
+
+sub new {
+ my($class,%args) = @_;
+ my $self = $class->SUPER::new(%args);
+
+ if ($args{'-file'}) {$self->_readfile($args{'-file'})}
+ if ($args{'-verbose'}) {$self->{'verbose'}=1}
+ return $self;
+}
+
+
+=head2 number_of_results
+
+  Title   : number_of_results()
+  Usage   : $primer3->number_of_results()
+  Function: Retrieve the number of primers returned from Primer3.
+  Returns : A scalar
+  Args    : None
+  Notes   : This returns the count of the primers returned by Primer3
+             (aka how many of them there are).
+             This is one more than the maximum offset into the zero
+             based list of primers that is accessed by primer_results().
+
+=cut
+
+sub number_of_results {
+ my $self=shift;
+ return $self->{'maximum_primers_returned'} + 1;
+}
+
+
+=head2 all_results
+
+  Title   : all_results()
+  Usage   : $primer3->all_results() to print all results or
+            $primer3->all_results('primer3 result name',
+            'other results') to return a specific result
+  Function: Retrieve the results returned from Primer3.
+  Returns : A reference to a hash
+  Args    : Optional array of results to retrieve
+
+=cut
+
+sub all_results {
+	my ($self, @results) = @_;
+	my %hash;
+	if (@results) {
+		# we only want a few things
+		foreach my $result (@results) {$hash{$result}=$self->{'results'}->$result}
+	} else {
+		foreach my $result (keys %{$self->{'results'}}) {
+			$hash{$result}=$self->{'results'}->{$result};
+		}
+	}
+
+	return \%hash;
+}
+
+
+=head2 primer_results
+
+  Title   : primer_results()
+  Usage   : $primer3->primer_results(2) to print results for the third
+            choice primer (indexed on 0)
+  Function: Retrieve the results returned from Primer3 for specific primer pairs.
+  Returns : A reference to a hash
+  Args    : A number between 0 and the maximum number of primers to retrieve
+
+=cut
+
+sub primer_results {
+	my ($self, $toget) = @_;
+	if ($toget > $self->{'maximum_primers_returned'}) {
+		$self->warn("Didn't get any results for $toget");
+		return 0;
+	}
+	else {
+		return \%{$self->{'results_by_number'}->{$toget}};
+	}
+}
+
+=head2 _readfile
+
+  Title   : _readfile()
+  Usage   : $self->_readfile();
+  Function: An internal function that reads a file and sets up the results
+  Returns : Nothing.
+  Args    : None
+  Notes   :
+
+=cut
+
+sub _readfile {
+	my ($self, $file) = @_;
+	$self->_initialize_io(-file=>$file);
+	my $line;
+	my $id='primer 3 parsed results'; # hopefully we'll get this, but we can set a temp id in case not.
+	while (defined($line=$self->_readline()) ) {
+		chomp $line;
+		next unless ($line);
+		my ($return, $value) = split /=/, $line;
+		if (uc($return) eq "SEQUENCE") {
+			$self->{seqobject}=Bio::Seq->new(-seq=>$value, $id=>$id);
+			next;
+		}
+		if (uc($return) eq "PRIMER_SEQUENCE_ID") {
+			if ($self->{seqobject}) {$self->{seqobject}->id($value)} else {$id=$value}
+		}
+
+		$self->{'results'}->{$return} = $value;
+	}
+
+	# convert the results to individual results
+	$self->_separate();
+}
+
+=head2 next_primer
+
+  Title   : next_primer()
+  Usage   : while (my $primed_seq  = $primer3->next_primer()) {
+  Function: Retrieve the primed sequence and a primer pair, one at a time
+  Returns : Returns a Bio::Seq::PrimedSeq feature, one at a time
+  Args    : None
+  Notes   : Use $primed_seq->annotated_seq to get an annotated sequence
+            object you can write out.
+
+=cut
+
+sub next_primer {
+	my $self = shift;
+	# here we are going to convert the primers to Bio::SeqFeature::Primer objects
+	# and the primer/sequence to Bio::Seq::PrimedSeq objects
+	# the problem at the moment is that PrimedSeq can only take one sequence/primer pair, and
+	# yet for each sequence we can have lots of primer pairs. We need a way to overcome this.
+	# at the moment we can do this as a stream, I guess.
+
+	$self->warn("No primers were found for: ".$self->{'seqobject'}->{'primary_id'})
+	  if (! $self->number_of_results);
+
+	$self->{'next_to_return'} = 0 unless ($self->{'next_to_return'});
+	return if ($self->{'next_to_return'} >= $self->{'maximum_primers_returned'});
+	my $results = $self->primer_results($self->{'next_to_return'});
+
+	$self->throw("No left primer sequence") unless (${$results}{'PRIMER_LEFT_SEQUENCE'});
+	$self->throw("No right primer sequence") unless (${$results}{'PRIMER_RIGHT_SEQUENCE'});
+	$self->throw("No target sequence") unless ($self->{'seqobject'});
+
+	my $left_seq  = Bio::SeqFeature::Primer->new(
+                                   -primer_sequence_id => "left_primer",
+											  -sequence => ${$results}{'PRIMER_LEFT_SEQUENCE'},
+											  -display_id => ($self->{'next_to_return'} + 1) );
+	my $right_seq = Bio::SeqFeature::Primer->new(
+											  -primer_sequence_id => "right_primer",
+											  -sequence => ${$results}{'PRIMER_RIGHT_SEQUENCE'},
+											  -display_id => ($self->{'next_to_return'} + 1) );
+
+	# add data to the Primer objects
+	for my $key (%$results) {
+		# skip the primer sequence data, already added above
+		next if ($key =~ /PRIMER_(LEFT|RIGHT)_SEQUENCE/i );
+		if ($key =~ /PRIMER_LEFT/i) {
+			$left_seq->add_tag_value($key, $$results{$key});
+		} elsif ($key =~ /PRIMER_RIGHT/i) {
+			$right_seq->add_tag_value($key, $$results{$key});
+		}
+	}
+
+	my $primed_seq = Bio::Seq::PrimedSeq->new(-target_sequence => clone($self->{'seqobject'}),
+															-left_primer => $left_seq,
+															-right_primer => $right_seq);
+
+	# add data to the the PrimedSeq object that's not specific to the Primers
+	for my $key (%$results) {
+		next if ($key =~ /PRIMER_(LEFT|RIGHT)/i );
+			$primed_seq->add_tag_value($key, $$results{$key});
+	}
+
+	$self->{'next_to_return'}++;
+	return $primed_seq;
+}
+
+
+=head2 primer_stream
+
+  Title   : primer_stream()
+  Usage   : while (my $primed_seq  = $primer3->primer_stream()) {
+  Function: Retrieve the primer/sequences one at a time
+  Returns : Returns a Bio::Seq::PrimedSeq feature, one at a time
+  Args    : None
+  Notes   : Deprecated, just a link to next_primer
+
+=cut
+
+sub primer_stream {
+	my $self=shift;
+	my $primedseq = $self->next_primer;
+	return $primedseq;
+}
+
+=head2 _separate
+
+  Title   : _separate()
+  Usage   : $self->_separate();
+  Function: An internal function that groups the results by number
+            (e.g. primer pair 1, etc)
+  Returns : Nothing.
+  Args    : None
+  Notes   :
+
+=cut
+
+sub _separate {
+	my $self = shift;
+	my %results; # the results that we find
+	my $maxlocation = -1; # the maximum number of primers returned
+	foreach my $key (keys %{$self->{'results'}}) {
+		next if (${$self->{'input_options'}}{$key}); # don't process it if it is an input key
+
+		my $location; # the number of the primer pair
+		# names will have values like
+		# PRIMER_RIGHT_SEQUENCE, PRIMER_RIGHT_2_SEQUENCE, PRIMER_PRODUCT_SIZE, and
+		# PRIMER_PRODUCT_SIZE_3 hence we need to find and remove the number
+		my $tempkey=$key;
+		if ($tempkey =~ s/_(\d+)//) {
+			$location=$1;
+			if ($location > $maxlocation) {$maxlocation = $location}
+		} elsif ( $tempkey =~ /PRIMER_(RIGHT|LEFT)/ ) {
+			# first primers reported without a number, therefore set $location to 0
+			$location = 0;
+			if ($location > $maxlocation) {$maxlocation = $location}
+		} else {
+			$location = 0;
+		}
+		# we will hash the results by number, and then by name
+		${$results{$location}}{$tempkey}=${$self->{'results'}}{$key};
+	}
+	$self->{'results_by_number'}=\%results;
+	$self->{'maximum_primers_returned'}=$maxlocation;
+}
+
+=head2 _set_variable
+
+  Title   : _set_variable()
+  Usage   : $self->_set_variable('variable name', 'value');
+  Function: An internal function that sets a variable
+  Returns : Nothing.
+  Args    : None
+  Notes   : Used to set $self->{results} and $self->seqobject
+
+=cut
+
+sub _set_variable {
+        my ($self, $name, $value)=@_;
+        next unless ($name);
+        $self->{$name} = $value;
+}
+
+1;
+
+__END__
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Primer3.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prints.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prints.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prints.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,232 @@
+# $Id: Prints.pm,v 1.13.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Prints
+#
+# Cared for by  Balamurugan Kumarasamy
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+#
+
+=head1 NAME
+
+Bio::Tools::Prints - Parser for FingerPRINTScanII program 
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Prints;
+  my $prints_parser = new Bio::Tools::Prints(-fh =>$filehandle );
+  while( my $prints_feat = $prints_parser->next_result ) {
+        push @prints_feat, $prints_feat;
+  }
+
+=head1 DESCRIPTION
+
+ PRINTScan II is a PRINTS fingerprint identification algorithm.
+ Copyright (C) 1998,1999  Phil Scordis
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+ Report bugs to the Bioperl bug tracking system to help us keep track
+ of the bugs and their resolution. Bug reports can be submitted via
+ the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Balamurugan Kumarasamy
+
+ bala at tll.org.sg
+ juguang at tll.org.sg
+
+=head1 APPENDIX
+
+ The rest of the documentation details each of the object methods.
+ Internal methods are usually preceded with a _
+
+
+=cut
+
+package Bio::Tools::Prints;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Prints(-fh=>$filehandle);
+ Function: Builds a new Bio::Tools::Prints object
+ Returns : Bio::Tools::Prints
+ Args    : -filename
+           -fh (filehandle)
+
+=cut
+
+sub new {
+      my($class, at args) = @_;
+
+      my $self = $class->SUPER::new(@args);
+      $self->_initialize_io(@args);
+
+      return $self;
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $prints_parser->next_result
+ Function: Get the next result set from parser data
+ Returns : L<Bio::SeqFeature::Generic>
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    my %printsac;
+    my @features;
+    my $line;
+    my $sequenceId;
+     
+    while ($_=$self->_readline()) {
+      
+           $line = $_;
+           chomp $line;
+
+           if ($line =~ s/^Sn;//) { # We have identified a Sn; line so there should be the following:
+
+              ($sequenceId) = $line =~ /^\s*(\w+)/;
+              $self->seqname($sequenceId);
+              next;
+           }
+              
+           if ($line =~ s/^1TBH//) {
+               my  ($id) = $line =~ /^\s*(\w+)/;
+               my ($ac) = $line =~ /(PR\w+)\s*$/;
+               $printsac{$id} = $ac;
+               $self->print_sac(\%printsac);
+               next;
+           }
+             
+           if ($line =~ s/^3TB//) {
+              
+              if ($line =~ s/^[HN]//) {
+                   my($num)="";
+                   $line =~ s/^\s+//;
+
+                   my @elements = split /\s+/, $line;
+
+                   my ($fingerprintName,$motifNumber,$temp,$tot,$percentageIdentity,$profileScore,$pvalue,$subsequence,$motifLength,$lowestMotifPosition,$matchPosition,$highestMotifPosition) = @elements;
+    
+                   my $start = $matchPosition;
+                   my $end = $matchPosition + $motifLength - 1;
+                   my $print_sac = $self->print_sac;
+                   
+                   my %printsac =  %{$print_sac};
+                   my $print =  $printsac{$fingerprintName};
+                   my $seqname=$self->seqname;
+                   my $feat = "$print,$start,$end,$percentageIdentity,$profileScore,$pvalue";
+                   my $new_feat =  $self->create_feature($feat,$seqname);
+                   return $new_feat;
+               }
+               if ($line =~ s/^F//) {
+                   return;  
+               }
+                   next;                                                       
+               }
+            next;         
+ 
+      }
+
+}
+
+=head2 create_feature
+
+ Title   : create_feature
+ Usage   : my $feat=$prints_parser->create_feature($feature,$seqname)
+ Function: creates a SeqFeature Generic object
+ Returns : L<Bio::SeqFeature::FeaturePair>
+ Args    :
+
+
+=cut
+
+sub create_feature {
+    my ($self, $feat,$sequenceId) = @_;
+
+    my @f = split (/,/,$feat);
+    # create feature object
+    my $feature= Bio::SeqFeature::Generic->new(
+        -seq_id    =>$sequenceId,
+        -start=>$f[1],
+        -end  => $f[2],
+        -score      => $f[4],
+        -source     => "PRINTS",
+        -primary    =>$f[0],
+        -logic_name => "PRINTS",
+    );
+    $feature->add_tag_value('evalue',$f[5]);
+    $feature->add_tag_value('percent_id',$f[3]);
+        
+    my $feature2 = Bio::SeqFeature::Generic->new(
+        -seq_id => $f[0],
+        -start => 0,
+        -end => 0,
+    );
+    my $fp = Bio::SeqFeature::FeaturePair->new(
+        -feature1 => $feature,
+        -feature2 => $feature2
+    );
+    return  $fp; 
+}
+
+=head2 print_sac
+
+ Title   : print_sac
+ Usage   : $prints_parser->print_sac($print_sac)
+ Function: get/set for print_sac
+ Returns :
+ Args    :
+
+
+=cut
+
+sub print_sac {
+    my $self = shift;
+    return $self->{'print_sac'} = shift if @_;
+    return $self->{'print_sac'};
+}
+
+=head2 seqname 
+
+ Title   : seqname
+ Usage   : $prints_parser->seqname($seqname)
+ Function: get/set for seqname
+ Returns :
+ Args    :
+
+
+=cut
+
+sub seqname {
+    my($self,$seqname)=@_;
+    return $self->{'seqname'}=$seqname if(defined($seqname));
+    return $self->{'seqname'};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Prints.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Profile.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Profile.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Profile.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+# $Id: Profile.pm,v 1.8.4.1 2006/10/02 23:10:32 sendu Exp $
+# BioPerl module for Bio::Tools::Profile
+#
+# Cared for by Balamurugan Kumarasamy
+#
+# You may distribute this module under the same terms as perl itself
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Profile - parse Profile output
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::Profile;
+ my $profile_parser = new Bio::Tools::Profile(-fh =>$filehandle );
+ while( my $profile_feat = $profile_parser->next_result ) {
+       push @profile_feat, $profile_feat;
+}
+
+=head1 DESCRIPTION
+
+ Parser for Profile output
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Balamurugan Kumarasamy
+
+ Email: fugui at worf.fugu-sg.org
+
+=head1 APPENDIX
+
+ The rest of the documentation details each of the object methods.
+ Internal methods are usually preceded with a _
+
+
+=cut
+
+
+package Bio::Tools::Profile;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Profile();
+ Function: Builds a new Bio::Tools::Profile object
+ Returns : Bio::Tools::Profile
+ Args    : -filename
+           -fh ($filehandle)
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    $self->_initialize_io(@args);
+
+    return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $profile_parser->next_result
+ Function: Get the next result set from parser data
+ Returns : L<Bio::SeqFeature::FeaturePair>
+ Args    : none
+
+
+=cut
+
+sub next_result {
+        my ($self) = @_;
+
+        my %printsac;
+        my $line;
+        my @features;
+        while ($_=$self->_readline()) { 
+                $line = $_;
+                chomp $line;
+                my ($nscore,$rawscore,$from,$to,$hfrom,$hto,$ac) = $line =~ /(\S+)\s+(\d+)\s*pos.\s+(\d*)\s*-\s+(\d*)\s*\[\s+(\d*),\s+(\S*)\]\s*(\w+)/;
+                #for example in this output line 
+                #38.435   2559 pos.  19958 - 20212 [    1,    -1] PS50011|PROTEIN_KINASE_DOM Protein kinase domain profile.
+                #$nscore = 38.435
+                #$rawscore = 2559
+                #$from = 19958
+                #$end = 20212
+                #$hfrom = 1
+                #$hto =-1
+                #$ac = PS50011
+                my $feat = "$ac,$from,$to,$hfrom,$hto,$nscore";
+                my $new_feat= $self->create_feature($feat);
+                return $new_feat
+
+        } 
+}
+
+
+=head2 create_feature
+
+ Title   : create_feature
+ Usage   : my $feat= $profile_parser->create_feature($feature)
+ Function: creates a Bio::SeqFeature::FeaturePair object
+ Returns : L<Bio::SeqFeature::FeaturePair>
+ Args    :
+
+
+=cut
+
+sub create_feature {
+        my ($self, $feat) = @_;
+
+        my @f = split (/,/,$feat);
+
+
+        my $hto = $f[4];
+
+        if ($f[4] =~ /-1/) {
+ 
+           $hto = $f[2] - $f[1] + 1;
+        
+        }
+
+
+        my $feat1 = new Bio::SeqFeature::Generic ( -start => $f[1],
+                                                   -end => $f[2],
+                                                   -score => $f[5],
+                                                   -source=>'pfscan',
+                                                   -primary=>$f[0]);  
+        
+        my $feat2 = new Bio::SeqFeature::Generic (-start => $f[3],
+                                                  -end => $hto,
+                                                  );  
+
+        my $feature = new Bio::SeqFeature::FeaturePair(-feature1 => $feat1,
+                                                    -feature2 => $feat2);
+
+        return $feature;
+        
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Promoterwise.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Promoterwise.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Promoterwise.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,232 @@
+# $Id: Promoterwise.pm,v 1.8.4.1 2006/10/02 23:10:32 sendu Exp $
+# BioPerl module for Bio::Tools::Promoterwise
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Promoterwise - parser for Promoterwise tab format output
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Promoterwise;
+
+  my $pw = Bio::Tools::Promoterwise->new(-file=>"out",
+                                         -query1_seq=>$seq1,
+                                         -query2_seq=>$seq2);
+  while (my $fp = $pw->next_result){
+    print "Hit Length: ".$fp->feature1->length."\n";
+    print "Hit Start: ".$fp->feature1->start."\n";
+    print "Hit End: ".$fp->feature1->end."\n";
+    print "Hsps: \n";
+    my @first_hsp = $fp->feature1->sub_SeqFeature;
+    my @second_hsp = $fp->feature2->sub_SeqFeature;
+    foreach my $i (0..$#first_hsp){
+      print $first_hsp[$i]->start. " ".$first_hsp[$i]->end." ".
+            $second_hsp[$i]->start. " ".$second_hsp[$i]->end."\n";
+    }
+  }
+
+=head1 DESCRIPTION
+
+Promoteriwise is an alignment algorithm that relaxes the constraint
+that local alignments have to be co-linear. Otherwise it provides a
+similar model to DBA, which is designed for promoter sequence
+alignments.  Promoterwise is written by Ewan Birney.  It is part of
+the wise2 package available at
+L<ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/>
+
+This module is the parser for the Promoterwise output in tab format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Promoterwise;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Promoterwise();
+ Function: Builds a new Bio::Tools::Promoterwise object
+ Returns : L<Bio::Tools::Promoterwise>
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+  my ($query1,$query2) = $self->_rearrange([qw(QUERY1_SEQ QUERY2_SEQ)], at args);
+  $self->query1_seq($query1) if ($query1);
+  $self->query2_seq($query2) if ($query2);
+
+  return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $r = $rpt_masker->next_result
+ Function: Get the next result set from parser data
+ Returns : an  L<Bio::SeqFeature::FeaturePair>
+ Args    : none
+
+
+=cut
+
+sub next_result {
+  my ($self) = @_;
+  $self->_parse unless $self->_parsed;
+  return $self->_next_result;
+}
+
+sub _parse{
+   my ($self) = @_;
+   my (%hash, at fp);
+   while (defined($_ = $self->_readline()) ) {
+       chomp;
+       my @array = split;
+       push @{$hash{$array[-1]}}, \@array;
+   }
+   foreach my $key(keys %hash){
+    my $sf1 = Bio::SeqFeature::Generic->new(-primary=>"conserved_element",
+                                            -source_tag=>"promoterwise");
+    $sf1->attach_seq($self->query1_seq) if $self->query1_seq;
+    my $sf2 = Bio::SeqFeature::Generic->new(-primary=>"conserved_element",
+                                            -source_tag=>"promoterwise");
+    $sf2->attach_seq($self->query2_seq) if $self->query2_seq;
+    foreach my $info(@{$hash{$key}}){
+	
+      my ($score,$id1,$start_1,$end_1, $strand_1,$s1_len,
+	  $id2,$start_2,$end_2,$strand_2,$s2_len, $group);
+      if( @{$info} == 12 ) {
+	  ($score,$id1,$start_1,$end_1, $strand_1,$s1_len,
+	   $id2,$start_2,$end_2,$strand_2,$s2_len, $group) = @{$info};
+      } elsif( @{$info} == 10 ) {
+	  ($score,$id1,$start_1,$end_1, $strand_1,
+	   $id2,$start_2,$end_2,$s2_len, $group) = @{$info};
+      } else {
+	  $self->throw("unknown promoterwise output, ", scalar @{$info},
+		       " columns, expected 10 or 12\n");
+      }
+      if(!$sf1->strand && !$sf2->strand){
+        $sf1->strand($strand_1);
+        $sf2->strand($strand_2);
+        $sf1->seq_id($id1);
+        $sf2->seq_id($id2);
+        $sf1->score($score);
+        $sf2->score($score);
+      }
+
+      my $sub1 = Bio::SeqFeature::Generic->new(-start=>$start_1,
+                                              -seq_id=>$id1,
+                                              -end  =>$end_1,
+                                              -strand=>$strand_1,
+                                              -primary=>"conserved_element",
+                                              -source_tag=>"promoterwise",
+                                              -score=>$score);
+      $sub1->attach_seq($self->query1_seq) if $self->query1_seq;
+
+      my $sub2 = Bio::SeqFeature::Generic->new(-start=>$start_2,
+                                              -seq_id=>$id2,
+                                              -end  =>$end_2,
+                                              -strand=>$strand_2,
+                                              -primary=>"conserved_element",
+                                              -source_tag=>"promoterwise",
+                                              -score=>$score);
+      $sub2->attach_seq($self->query2_seq) if $self->query2_seq;
+      $sf1->add_SeqFeature($sub1,'EXPAND');
+      $sf2->add_SeqFeature($sub2,'EXPAND');
+    }
+
+    my $fp = Bio::SeqFeature::FeaturePair->new(-feature1=>$sf1,
+                                               -feature2=>$sf2);
+    push @fp, $fp;
+  }
+    $self->_feature_pairs(\@fp);
+    $self->_parsed(1);
+    return;
+}
+
+sub _feature_pairs {
+  my ($self,$fp) = @_;
+  if($fp){
+    $self->{'_feature_pairs'} = $fp;
+  }
+  return  $self->{'_feature_pairs'};
+}
+
+sub _next_result {
+  my ($self) = @_;
+  return unless (exists($self->{'_feature_pairs'}) && @{$self->{'_feature_pairs'}});
+  return shift(@{$self->{'_feature_pairs'}});
+}
+sub _parsed {
+  my ($self,$flag) = @_;
+  if($flag){
+    $self->{'_flag'} = 1;
+  }
+  return $self->{'_flag'};
+}
+
+sub query1_seq {
+  my ($self,$val) = @_;
+  if($val){
+    $self->{'query1_seq'} = $val;
+  }
+  return $self->{'query1_seq'};
+}
+sub query2_seq {
+  my ($self,$val) = @_;
+  if($val){
+    $self->{'query2_seq'} = $val;
+  }
+  return $self->{'query2_seq'};
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/PrositeScan.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/PrositeScan.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/PrositeScan.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,170 @@
+
+=head1 NAME
+
+Bio::Tools::PrositeScan - Parser for ps_scan result
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::PrositeScan;
+
+  my $factory = Bio::Tools::PrositeScan->new(
+      -file => 'out.PrositeScan'
+  );
+
+  while(my $match = $factory->next_prediction){
+      #  $match is of Bio::SeqFeature::FeaturePair
+      my $q_id = $fatch->feature1->seq_id;
+      my $h_id = $fatch->feature2->seq_id;
+  }
+
+=head1 DESCRIPTION
+
+This is the parser of the output of ps_scan program. It takes either a file
+handler or a file name, and returns a Bio::SeqFeature::FeaturePair object.
+
+=head1 AUTHOR
+
+Juguang Xiao, juguang at tll.org.sg
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::PrositeScan;
+use vars qw(@FORMATS);
+use strict;
+use Bio::Seq;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::FeaturePair;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+ at FORMATS = qw(SCAN FASTA PSA MSA PFF MATCHLIST);
+
+=head2 new
+
+  Title   : new
+  Usage   : Bio::Tools::PrositeScan->new(-file => 'out.PrositeScan');
+            Bio::Tools::PrositeScan->new(-fh => \*FH);
+  Returns : L<Bio::Tools::PrositeScan>
+
+=cut
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->_initialize_io(@args);
+    my ($format) = $self->_rearrange([qw(FORMAT)], @args);
+    $format || $self->throw("format needed");
+    if(grep /^$format$/i, @FORMATS){
+        $self->format($format);
+    }else{
+        $self->throw("Invalid format, [$format]");
+    }
+    return $self;
+}
+
+sub format {
+    my $self = shift;
+    return $self->{_format} = shift if(@_);
+    return $self->{_format};
+}
+
+=head2 next_prediction
+
+  Title   : new
+  Usage   : 
+      while($result = $factory->next_prediction){
+          ;
+      }
+
+  Returns : a Bio::SeqFeature::FeaturePair object
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    unless($self->_parsed){
+        $self->_parse;
+        $self->_parsed(1);
+    }
+    return shift @{$self->{_matches}};
+}
+
+sub next_result {
+    return shift->next_prediction;
+}
+
+sub _parsed {
+    my $self = shift;
+    return $self->{_parsed} = 1 if @_ && $_[0];
+    return $self->{_parsed};
+}
+
+sub _parse {
+    my $self = shift;
+    my $format = $self->format;
+    if($self->format =~ /^fasta$/){
+        $self->_parse_fasta;
+    }else{
+        $self->throw("the [$format] parser has not been written");
+    }
+}
+
+sub _parse_fasta {
+    my ($self) = @_;
+    my @matches;
+    my $fp;
+    my $seq;
+    while(defined($_ = $self->_readline)){
+        chop;
+        if(/^\>([^>]+)/){
+            my $fasta_head = $1;
+            if($fasta_head =~ /([^\/]+)\/(\d+)\-(\d+)(\s+)\:(\s+)(\S+)/){
+                my $q_id = $1;
+                my $q_start = $2;
+                my $q_end = $3;
+                my $h_id = $6;
+                if(defined $fp){
+                    $self->_attach_seq($seq, $fp);
+                    push @matches, $fp;
+                }
+                $fp = Bio::SeqFeature::FeaturePair->new(
+                    -feature1 => Bio::SeqFeature::Generic->new(
+                        -seq_id => $q_id,
+                        -start => $q_start,
+                        -end => $q_end
+                    ),
+                    -feature2 => Bio::SeqFeature::Generic->new(
+                        -seq_id => $h_id,
+                        -start => 0,
+                        -end => 0
+                    )
+                );
+                $seq = '';
+            }else{
+                $self->throw("ERR:\t\[$_\]");
+            }
+        }else{ # sequence lines, ignored
+            $seq .= $_;
+        }
+    }
+    if(defined $fp){
+        $self->_attach_seq($seq, $fp);
+        push @matches, $fp;
+    }
+    push @{$self->{_matches}}, @matches;
+    
+}
+
+sub _attach_seq {
+    my ($self, $seq, $fp) = @_;
+    if(defined $fp){
+        my $whole_seq = 'X' x ($fp->start-1);
+        $whole_seq .= $seq;
+        $fp->feature1->attach_seq(
+            Bio::Seq->new(-seq => $whole_seq)
+        );
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Pseudowise.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Pseudowise.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Pseudowise.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,280 @@
+# BioPerl module for Bio::Tools::Pseudowise
+#
+# $Id: Pseudowise.pm,v 1.10.4.1 2006/10/02 23:10:32 sendu Exp $
+# 
+# Copyright Jason Stajich, Fugu Team 
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Pseudowise - Results of one Pseudowise run
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Pseudowise;
+
+  my $parser = Bio::Tools::Pseudowise->new(-file=>"pw.out");
+  while(my $feat = $parser->next_result){
+      push @feat, $feat;
+  }
+
+=head1 DESCRIPTION
+
+Pseudowise is a pseudogene prediction program written by Ewan Birney
+as part of the Wise Package. This module is the parser for the output
+of the program.
+
+http://www.sanger.ac.uk/software/wise2
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Previous committed by the Fugu Team 
+
+Re-written by Jason Stajich jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Pseudowise;
+use strict;
+use Symbol;
+
+use Bio::Root::Root;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::GeneStructure;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+sub _initialize_state {
+    my ($self, at args) = @_;
+
+    # first call the inherited method!
+    $self->SUPER::_initialize_state(@args);
+
+    # our private state variables
+    $self->{'_preds_parsed'} = 0;
+    $self->{'_has_cds'} = 0;
+    # array of pre-parsed predictions
+    $self->{'_preds'} = [];
+    # seq stack
+    $self->{'_seqstack'} = [];
+}
+
+=head2 analysis_method
+
+ Usage     : $pseudowise->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /pseudowise/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /pseudowise/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none
+
+See Also  L<Bio::SeqFeatureI>
+
+=cut
+
+sub next_feature {
+    return shift->next_prediction(@_);
+}
+
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $pseudowise->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the gene of the Pseudowise result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : a Bio::SeqFeature::Generic 
+ Args    : none
+
+See Also L<Bio::SeqFeature::Generic>
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    # if the prediction section hasn't been parsed yet, we do this now
+    $self->_parse_predictions unless $self->_predictions_parsed;
+
+    # get next gene structure
+    return $self->_prediction();
+}
+
+=head2 _parse_predictions
+
+ Title   : _parse_predictions()
+ Usage   : $obj->_parse_predictions()
+ Function: Parses the prediction section. Automatically called by
+           next_prediction() if not yet done.
+ Example :
+ Returns : 
+
+=cut
+
+sub _parse_predictions {
+    my ($self) = @_;
+    my $gene;
+    my @genes;
+
+    local $/= "\n";
+    local($_);
+    my %tags;
+    while (defined( $_ = $self->_readline)){ 
+	if( /^(Total codons|\S+)\s+:\s+(\S+)/ ) {
+	    $tags{$1} = $2;
+	} elsif(m!^//! ) {
+	    if( $gene ) {
+		$gene = undef;
+		%tags = ();
+	    }
+	} elsif (/Gene\s+(\d+)\s*$/i) {
+	    $gene = Bio::SeqFeature::Generic->new 
+		( -primary => 'pseudogene',
+		  -source  => 'pseudowise',
+		  -tag     => \%tags);
+	    push @genes, $gene;
+	} elsif( /Gene\s+(\d+)\s+(\d+)/i ) {
+	    if( $1 < $2 ) {
+		$gene->start($1);
+		$gene->end($2);
+		$gene->strand(1);
+	    } else {
+		$gene->start($2);
+		$gene->end($1);
+		$gene->strand(-1);
+	    }
+	} elsif (/Exon\s+(\d+)\s+(\d+)\s+phase\s+(\S+)/i) {
+	    my ($s,$e,$st) = ($1,$2,1);
+	    if( $s > $e) {
+		($s,$e,$st)=($e,$s,-1);
+	    }
+	    my $exon = Bio::SeqFeature::Generic->new 
+		( -start   => $s,
+		  -end     => $e,
+		  -strand  => $st,
+		  -primary => 'exon',
+		  -source  => 'pseudowise',
+		  -tag     => {'frame'  => $3});
+	    $gene->add_sub_SeqFeature($exon);
+	} 
+    }
+    $self->_add_prediction(\@genes);
+    $self->_predictions_parsed(1);
+}
+
+=head1 _prediction
+
+ Title   : _prediction()
+ Usage   : $gene = $obj->_prediction()
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _prediction {
+    my ($self) = @_;
+    return shift(@{$self->{'_preds'} || []});
+}
+
+=head2 _add_prediction
+
+ Title   : _add_prediction()
+ Usage   : $obj->_add_prediction($gene)
+ Function: internal
+ Example :
+ Returns : 
+
+=cut
+
+sub _add_prediction {
+    my ($self, $gene) = @_;
+    $self->{'_preds'} ||= [];
+
+    if( ref($gene) =~ /ARRAY/ ) {
+	push(@{$self->{'_preds'}}, @$gene);
+    } else {
+	push(@{$self->{'_preds'}}, $gene);
+    }
+}
+
+=head2 _predictions_parsed
+
+ Title   : _predictions_parsed
+ Usage   : $obj->_predictions_parsed
+ Function: internal
+ Example :
+ Returns : TRUE or FALSE
+
+=cut
+
+sub _predictions_parsed {
+    my ($self, $val) = @_;
+
+    $self->{'_preds_parsed'} = $val if $val;
+    if(! exists($self->{'_preds_parsed'})) {
+	$self->{'_preds_parsed'} = 0;
+    }
+    return $self->{'_preds_parsed'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/QRNA.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/QRNA.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/QRNA.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,411 @@
+# $Id: QRNA.pm,v 1.5.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::QRNA
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::QRNA - A Parser for qrna output
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::QRNA;
+  my $parser = new Bio::Tools::QRNA(-file => $qrnaoutput);
+  while( my $feature = $parser->next_feature ) {
+    # do something here
+  }
+
+=head1 DESCRIPTION
+
+Parses QRNA output (E.Rivas;
+http://www.genetics.wustl.edu/eddy/software/#qrna).
+
+This module is not complete, but currently it packs information from
+each QRNA alignment into a single Bio::SeqFeature::Generic object.
+
+Not all options for QRNA output have been tested or tried.  It has
+been tested on sliding window output (-w -x) and shuffled output (-b
+or -B).
+
+See t/QRNA.t for example usage.
+
+At some point we may have more complicated feature object which will
+support this data rather than forcing most of the information into
+tag/value pairs in a SeqFeature::Generic.
+
+Running with -verbose =E<gt> 1 will store extra data in the feature.  The
+entire unparsed entry for a particular feature will be stored as a
+string in the tag 'entry' it is accessible via:
+
+  my ($entry) = $f->each_tag_value('entry');
+
+The winning model for any given alignment test will be the name stored
+in the primary_tag field of feature.  The bit score will stored in the
+score field.  The logoddpost is availble via the a tag/value pair.
+This example code will show how to print out the score and log odds
+post for each model.
+
+  # assuming you got a feature already
+  print "model score logoddspost\n";
+  foreach my $model ( qw(OTH COD RNA) ) {
+    my ($score)       = $f->get_tag_values("$model\_score");
+    my ($logoddspost) = $f->get_tag_values("$model\_logoddspost");
+    print "$model $score $logoddspost\n";
+  }
+
+The start and end of the alignment for both the query and hit sequence
+are available through the L<Bio::SeqFeature::FeaturePair> interface,
+specifically L<Bio::SeqFeature::FeaturePair::feature1> and
+L<Bio::SeqFeature::FeaturePair::feature2>.  Additionally if you have
+run QRNA with an input file which has the location of the alignment
+stored in the FASTA filename as in (ID/START-END) which is the default
+output format from L<Bio::AlignIO::fasta> produced alignment output,
+this module will re-number start/end for the two sequences so they are
+in the actual coordinates of the sequence rather than the relative
+coordinates of the alignment.  You may find the bioperl utillity
+script search2alnblocks useful in creating your input files for QRNA.
+
+Some other words of warning, QRNA uses a 0 based numbering system for
+sequence locations, Bioperl uses a 1 based system.  You'll notice that
+locations will be +1 they are reported in the raw QRNA output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::QRNA;
+use vars qw(@Models);
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::FeaturePair;
+
+use base qw(Bio::Root::IO Bio::SeqAnalysisParserI);
+ at Models = qw(OTH COD RNA);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::QRNA();
+ Function: Builds a new Bio::Tools::QRNA object 
+ Returns : an instance of Bio::Tools::QRNA
+ Args    : -fh/-file filehandle/filename standard input for 
+                     Bio::Root:IO objects
+
+=cut
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : my $feature = $parser->next_feature
+ Function: Get the next QRNA feature
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub next_feature {
+    my ($self) = @_;
+    my $f = shift @{$self->{'_parsed_features'} || []};
+    if( ! defined $f && $self->_parse_pair ) {
+	$f = shift @{$self->{'_parsed_features'} || []};
+    }
+    return $f;
+}
+
+sub _parse_pair {
+   my ($self, at args) = @_;
+   my (@features,%data);
+   my $seenstart = 0;
+   while( defined( $_ = $self->_readline) ) {
+       next if( /^\#\-\-/o );
+       if( /^\#\s+(qrna)\s+(\S+)\s+\(([^\)]+)\)/o ) {
+	   $self->program_name($1);
+	   $self->program_version($2);
+	   $self->program_date($3);
+       } elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) {
+	   $self->PAM_model($2);
+       } elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) {
+	   $self->RNA_model($2);
+       } elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) {
+	   $self->seq_file($2);	   
+       } elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) {
+	   if( $seenstart ) { 
+	       if( $data{'alignment_len'} ) {
+		   push @features, $self->_make_feature(\%data);
+	       }
+	       $self->_pushback($_);
+	       last;
+	   }
+	   $seenstart = 1;
+       } elsif( /^\#/ ) {
+	   next;
+       } elsif( />(\S+)\s+\((\d+)\)/ ) {
+	   if( @{$data{'seqs'} || []} == 2 ) { 
+	       $self->warn( "already seen seqs ".join(' ', ,map { $_->[0] } 
+						      @{$data{'seqs'}}). "\n");
+	   } else { 
+	       push @{$data{'seqs'}}, [$1,$2];
+	   }
+       } elsif( /^length alignment:\s+(\d+)\s+\(id\=(\d+(\.\d+)?)\)/o ) {
+	   
+	   if( $data{'alignment_len'} ) {
+	       push @features, $self->_make_feature(\%data);	
+	       # reset all the data but the 'seqs' field
+	       %data  = ( 'seqs' => $data{'seqs'} );
+	   }
+	   
+	   if( /\(((sre_)?shuffled)\)/ ) { 
+	       $data{'shuffled'} = $1;
+	   }
+	   $data{'alignment_len'} = $1;
+	   $data{'alignment_pid'} = $2;
+       } elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]\((\d+)\)\s+
+		 \-\-\s+\((\S+\s+\S+\s+\S+\s+\S+)\)/ox ) {
+	   $data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6];
+	   @{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7);
+       } elsif( /^winner\s+\=\s+(\S{3})/ ) {
+	   $data{'winning_model'} = $1;
+       } elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) {
+	   # QRNA is 0-based
+	   # Bioperl is 1 based
+	   $data{'model_location'}->{$1} = [ $2,$3 ];
+       }  elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) {
+	   while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) {
+	       my ($model,$score)= ($1,$2);
+	       if( $model =~ s/^logoddspost// ) {
+		   $data{'model_scores'}->{'logoddspost'}->{$model} = $score;
+	       } else {
+		   $data{'model_scores'}->{'bits'}->{$model} = $score;
+	       }
+	   }
+       }
+       $data{'entry'} .= $_;
+   }
+   if( @features ) {
+       push @{$self->{'_parsed_features'}}, @features;
+       return scalar @features;
+   }
+   return 0;
+}
+
+=head2 PAM_model
+
+ Title   : PAM_model
+ Usage   : $obj->PAM_model($newval)
+ Function: 
+ Example : 
+ Returns : value of PAM_model (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub PAM_model{
+    my $self = shift;
+    return $self->{'PAM_model'} = shift if @_;
+    return $self->{'PAM_model'};
+}
+
+=head2 RNA_model
+
+ Title   : RNA_model
+ Usage   : $obj->RNA_model($newval)
+ Function: 
+ Example : 
+ Returns : value of RNA_model (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub RNA_model{
+    my $self = shift;
+
+    return $self->{'RNA_model'} = shift if @_;
+    return $self->{'RNA_model'};
+}
+
+=head2 seq_file
+
+ Title   : seq_file
+ Usage   : $obj->seq_file($newval)
+ Function: 
+ Example : 
+ Returns : value of seq_file (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub seq_file{
+    my $self = shift;
+
+    return $self->{'seq_file'} = shift if @_;
+    return $self->{'seq_file'};
+}
+
+
+=head2 program_name
+
+ Title   : program_name
+ Usage   : $obj->program_name($newval)
+ Function: 
+ Example : 
+ Returns : value of program_name (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub program_name{
+    my $self = shift;
+
+    return $self->{'program_name'} = shift if @_;
+    return $self->{'program_name'} || 'qrna';
+}
+
+=head2 program_version
+
+ Title   : program_version
+ Usage   : $obj->program_version($newval)
+ Function: 
+ Example : 
+ Returns : value of program_version (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub program_version{
+    my $self = shift;
+
+    return $self->{'program_version'} = shift if @_;
+    return $self->{'program_version'};
+}
+
+=head2 program_date
+
+ Title   : program_date
+ Usage   : $obj->program_date($newval)
+ Function: 
+ Example : 
+ Returns : value of program_date (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub program_date{
+    my $self = shift;
+    return $self->{'program_date'} = shift if @_;
+    return $self->{'program_date'};
+}
+
+sub _make_feature { 
+    my ($self,$data) = @_; 
+    my ($qoffset,$hoffset) = (1,1);
+    # when you run qrna and have produced ID/START-END
+    # formatted input strings we can remap the location
+    # to the original
+
+    # name is stored as the first entry in the seq array ref
+    my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0],
+		       $data->{'seqs'}->[1]->[0]);
+    if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
+	($qid,$qoffset) = ($1,$2);
+    }
+    if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
+	($hid,$hoffset) = ($1,$2);
+    }
+
+    my $f = new Bio::SeqFeature::FeaturePair;
+
+    my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}};
+    my $qf = new Bio::SeqFeature::Generic
+	( -primary_tag => $data->{'winning_model'},
+	  -source_tag  => $self->program_name,
+	  -score       => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}},
+	  -start       => $s+$qoffset,
+	  -end         => $e+$qoffset,
+	  -seq_id      => $qid,
+	  -strand      => ($s < $e ) ? 1 : -1,
+	  );
+
+    my $hf = new Bio::SeqFeature::Generic
+	( -primary_tag => $qf->primary_tag,
+	  -source_tag  => $qf->source_tag,
+	  -score       => $qf->score,
+	  -seq_id      => $hid,
+	  -start       => $s + $hoffset,
+	  -end         => $e + $hoffset,
+	  -strand      => $qf->strand,
+	  );
+    $f->feature1($qf);
+    $f->feature2($hf);
+    $f->add_tag_value('alignment_len', $data->{'alignment_len'});
+    $f->add_tag_value('alignment_pid', $data->{'alignment_pid'});
+    # store the other model scores and data
+    foreach my $model ( @Models ) {
+	$f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model});
+	$f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model});
+	if( ! $data->{'model_location'}->{$model} ) {
+	    if( $self->verbose > 0 ) {
+		$self->debug( $data->{'entry'} );
+	    }
+	    $self->throw("no location parsed for $model in ",
+	    (map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end);
+	} else { 
+	    $f->add_tag_value("$model\_positions", 
+			      join("..",@{$data->{'model_location'}->{$model} }));
+	}
+    }
+    # probably a better way to store this - as 
+    # a seq object perhaps
+    $f->add_tag_value('seq1', @{$data->{'seqs'}->[0]});
+    $f->add_tag_value('seq2', @{$data->{'seqs'}->[1]});
+    $f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0;
+    if( $data->{'shuffled'} ) {
+	$f->add_tag_value('shuffled', $data->{'shuffled'});
+    }
+    return $f;					       
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RNAMotif.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RNAMotif.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RNAMotif.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,414 @@
+# $Id: RNAMotif.pm,v 1.10.4.4 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::RNAMotif
+#
+# Cared for by Chris Fields <cjfields-at-uiuc-dot-edu>
+#
+# Copyright Chris Fields
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::RNAMotif - A parser for RNAMotif output
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::RNAMotif;
+  my $parser = new Bio::Tools::RNAMotif(-file => $rna_output,
+                                        -motiftag => 'protein_bind'
+                                        -desctag => 'TRAP_binding');
+  #parse the results
+  while( my $motif = $parser->next_prediction) {
+    # do something here
+  }
+
+=head1 DESCRIPTION
+
+Parses raw RNAMotif output.  RNAMotif uses a RNA profile, consisting
+of sequence and structural elements stored in a descriptor file, to
+search for potential motifs in a DNA sequence file.  For more
+information, see:
+
+Macke TJ, Ecker DJ, Gutell RR, Gautheret D, Case DA, Sampath R. 
+RNAMotif, an RNA secondary structure definition and search algorithm.
+Nucleic Acids Res. 2001 Nov 15;29(22):4724-35. 
+http://www.scripps.edu/mb/case/casegr-sh-3.5.html.
+
+This module is not currently complete.  As is, it will parse raw
+RNAMotif output (i.e. information not passed through the secondary
+programs rmfmt or rm2ct) and pack information into
+Bio::SeqFeature::Generic objects.  Currently, parsing extra output
+utilized by the sprintf() function in an RNAMotif descriptor is not
+implemented; this information is instead packed into the score tag,
+which can be accessed by using the following:
+
+  my ($score) = $feature->score; 
+
+If the score contains anything besides a digit, it will throw a
+warning that sprintf() may have been used.
+Several values have also been added in the 'tag' hash.  These can be
+accessed using the following syntax:
+
+  my ($entry) = $feature->get_Annotations('Secstructure');
+
+Added tags are : 
+
+   descline     - entire description line (in case the regex used for
+                  sequence ID doesn't adequately catch the name
+   descfile     - name of the descriptor file (may include path to file)
+   secstrucure  - contains structural information from the descriptor
+                  used as a query
+   sequence     - sequence of motif, separated by spaces according to
+                  matches to the structure in the descriptor (in
+                  SecStructure).
+
+See t/RNAMotif.t for example usage.
+
+The clean_features method can also be used to return a list of seqfeatures (in a
+Bio::SeqFeature::Collection object) that are within a particular region.   RNAMotif
+is prone with some descriptors to returning redundant hits; an attempt to rectify
+this problem is attempted with RNAMotif's companion program rmprune, which returns
+the structure with the longest helices (and theoretically the best scoring structure).
+However, this doesn't take into account alternative foldings which may score better.
+This method adds a bit more flexibility, giving the user the ability to screen folds
+based on where the feature is found and the score.  Passing a positive integer x
+screens SeqFeatures based on the highest score within x bp, while a negative integer
+screens based on the lowest score. So, to return the highest scoring values within
+20 bp (likely using an arbitrary scroing system in the SCORE section of a descriptor
+file), one could use:
+
+  $list = $obj->clean_features(20); 
+
+... and returning the lowest scoring structures within the same region (when the
+score is based on calculated free energies from efn2) can be accomplished
+by the following:
+
+  $list = $obj->clean_features(-20);
+
+If you wanted the best feature in a sequence, you could set this to a large number,
+preferrably on that exceeds the bases in a sequence
+
+  $list = $obj->clean_features(10000000);
+
+Each seqfeature in the collection can then be acted upon:
+
+  @sf = $list->get_all_features;
+  for my $f (@sf) {
+    # do crazy things here
+  }
+
+At some point a more complicated feature object may be used to support
+this data rather than forcing most of the information into tag/value
+pairs in a SeqFeature::Generic.  This will hopefully allow for more
+flexible analysis of data (specifically RNA secondary structural
+data).  It works for now...
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chris Fields
+
+Email cjfields-at-uiuc-dot-edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::RNAMotif;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::Collection;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+our($MotifTag,$SrcTag,$DescTag) = qw(misc_binding RNAMotif rnamotif);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::RNAMotif();
+ Function: Builds a new Bio::Tools::RNAMotif object 
+ Returns : an instance of Bio::Tools::RNAMotif
+ Args    : -fh/-file for input filename
+           -motiftag => primary tag used in gene features (default 'misc_binding')
+           -desctag => tag used for display_name name (default 'rnamotif')
+           -srctag  => source tag used in all features (default 'RNAMotif')
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  my ($motiftag,$desctag,$srctag) =  $self->SUPER::_rearrange([qw(MOTIFTAG
+                                                                  DESCTAG
+                                                                  SRCTAG
+                                 )],
+                                  @args);
+  $self->motif_tag(defined $motiftag ? $motiftag : $MotifTag);
+  $self->source_tag(defined $srctag ? $srctag : $SrcTag);
+  $self->desc_tag(defined $desctag ? $desctag : $DescTag);
+  $self->{'_sec_structure' => '',
+          '_dfile' => ''};
+}
+
+=head2 motif_tag
+
+ Title   : motif_tag
+ Usage   : $obj->motif_tag($newval)
+ Function: Get/Set the value used for 'motif_tag', which is used for setting the
+           primary_tag.
+           Default is 'misc_binding' as set by the global $MotifTag.
+           'misc_binding' is used here because a conserved RNA motif is capable
+           of binding proteins (regulatory proteins), antisense RNA (siRNA),
+           small molecules (riboswitches), or nothing at all (tRNA,
+           terminators, etc.).  It is recommended that this be changed to other
+           tags ('misc_RNA', 'protein_binding', 'tRNA', etc.) where appropriate.
+           For more information, see:
+           http://www.ncbi.nlm.nih.gov/collab/FT/index.html
+ Returns : value of motif_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub motif_tag{
+    my $self = shift;
+
+    return $self->{'motif_tag'} = shift if @_;
+    return $self->{'motif_tag'};
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $obj->source_tag($newval)
+ Function: Get/Set the value used for the 'source_tag'.
+           Default is 'RNAMotif' as set by the global $SrcTag
+ Returns : value of source_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source_tag{
+    my $self = shift;
+
+    return $self->{'source_tag'} = shift if @_;
+    return $self->{'source_tag'};
+}
+
+
+=head2 desc_tag
+
+ Title   : desc_tag
+ Usage   : $obj->desc_tag($newval)
+ Function: Get/Set the value used for the query motif.  This will be placed in
+           the tag '-display_name'.  Default is 'rnamotif' as set by the global
+           $DescTag.  Use this to manually set the descriptor (motif searched for).
+           Since there is no way for this module to tell what the motif is from the
+           name of the descriptor file or the RNAMotif output, this should
+           be set every time an RNAMotif object is instantiated for clarity
+ Returns : value of exon_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+=cut
+
+sub desc_tag{
+    my $self = shift;
+
+    return $self->{'desc_tag'} = shift if @_;
+    return $self->{'desc_tag'};
+}
+
+=head2 analysis_method
+
+ Usage     : $obj->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /RNAMotif/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /RNAMotif/i)) {
+    $self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $obj->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the RNAMotif result
+           file. Call this method repeatedly until FALSE is returned.
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+ Returns : A Bio::Tools::Prediction::Gene object.
+ Args    : None (at present)
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $obj->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the RNAMotif result
+           file. Call this method repeatedly until FALSE is returned.
+ Returns : A Bio::SeqFeature::Generic object
+ Args    : None (at present)
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my ($motiftag,$srctag,$desctag) = ( $self->motif_tag,
+                       $self->source_tag,
+                       $self->desc_tag);
+    my ($score, $strand, $start, $length, $sequence, $end, $seqid, $description)=0;
+    while($_ = $self->_readline) {
+        while($_ =~ /^#RM/) { # header line
+            if(/^#RM\sdescr\s(.*)$/) { # contains sec structure
+                $self->{'_sec_structure'}=$1;
+            }
+            if(/^#RM\sdfile\s(.*)$/) { # contains dfile
+                $self->{'_dfile'}=$1;
+            }
+            $_ = $self->_readline;
+        }
+        if(m/^>((\S*)\s.*)$/) {
+            $seqid = $2;
+            $description = $1; # contains entire description line if needed
+            if($seqid =~  /(gb|emb|dbj|sp|pdb|bbs|ref|lcl)\|(.*)\|/) {
+                $seqid = $2; # pulls out gid
+            }
+        }
+        # start pulling out hit information...
+        # regex is m/^\S+\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s(.*)$/
+        # m/^\S+\s+     # seqID, not needed
+        # (.+)\s+       # score, or extra info using sprintf()
+        # (\d+)\s+      # strand
+        # (\d+)\s+      # start
+        # (\d+)\s       # length
+        # (.*)$/        # sequence, divided according to descriptor
+        if(m/^\S+\s+(.+)\s+(\d+)\s+(\d+)\s+(\d+)\s(.*)$/) {
+            ($score, $strand, $start, $length, $sequence, $end)=
+                ($1, $2, $3, $4, $5, 0);
+            if( $strand==0 ) {
+                $end = $start + $length -1;
+                $strand = 1;
+            } else {
+                $end = $start - $length + 1;
+                ($start, $end, $strand) = ($end, $start, -1);
+            }
+            my $gene = Bio::SeqFeature::Generic->new(-seq_id => $seqid,
+                                                      -start  => $start,
+                                                      -end    => $end,
+                                                      -strand => $strand,
+                                                      -score  => $score,
+                                                      -primary_tag => $motiftag,
+                                                      -source_tag  => $srctag,
+                                                      -display_name => $desctag,
+                                                      -tag     => {
+                                                        'descline'       => $description,
+                                                        'descfile'      => $self->{'_dfile'},
+                                                        'secstructure'  => $self->{'_sec_structure'},
+                                                        'sequence'       => $sequence});
+            return $gene;
+        }
+    }
+}
+
+=head2 clean_features
+
+ Title   : next_prediction
+ Usage   : @list = $obj->clean_features(-10);
+ Function: Cleans (reduces redundant hits) based on score, position
+ Returns : a Bio::SeqFeature::Collection object
+ Args    : Pos./Neg. integer (for highest/lowest scoring seqfeature within x bp).
+ Throws  : Error unless digit is entered.  
+
+=cut
+
+sub clean_features {
+    my $self = shift;
+    my $bp = shift;
+    $self->throw("No arg, need pos. or neg. integer") if !$bp;
+    $self->throw("Need pos. or neg. integer") if ($bp !~ /\-?\d/ || $bp =~ /\./);
+    my ($b, $sf2);
+    my @list = ();
+    my @features = ();
+    while (my $pred = $self->next_prediction) {
+        push @features, $pred;
+    }
+    while (@features) {
+        $b = shift @features if !defined($b);
+        $sf2 = shift @features;
+        # from same sequence?
+        if ($sf2) { # if there is no feature, then...
+            if ($b->seq_id == $sf2->seq_id) {
+                # close starts (probable redundant hit)?
+                if(abs(($b->start)-($sf2->start)) <= abs($bp)) {
+                    # which score is better?
+                    if( (($bp < 0) && ($b->score > $sf2->score))  ||  # lowest score
+                        (($bp > 0) && ($b->score < $sf2->score)) ){   # highest score
+                        $b = $sf2;
+                        next;
+                    } else {
+                        next;
+                    }
+                }
+                push @list, $b;
+                $b = $sf2;
+            }
+        }
+    }
+    push @list, $b if $b;
+    my $col = Bio::SeqFeature::Collection->new;
+    $col->add_features(\@list);
+    return $col;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RandomDistFunctions.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RandomDistFunctions.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RandomDistFunctions.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,201 @@
+# $Id: RandomDistFunctions.pm,v 1.7.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::RandomDistFunctions
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::RandomDistFunctions - A set of routines useful for
+generating random data in different distributions
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::RandomDistFunctions;
+  my $dist = Bio::Tools::RandomDistFunctions->new();
+  for my $v ( 1..1000 ) { 
+    my $birth_dist = $dist->rand_birth_distribution($lambda);
+    # ... do something with the variable
+  }
+
+=head1 DESCRIPTION
+
+Most of the code is based on the C implementation of these routines in
+Mike Sanderson's r8s's package.  See http://ginger.ucdavis.edu/ for
+information on his software.
+
+This code tries to be fast and use available faster BigInt and GMP
+library methods when those modules are available.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Thanks to Mike Sanderson for assistance in the getting this
+implementation together.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::RandomDistFunctions;
+require Exporter;
+use vars qw(%LOADED @EXPORT_OK); use strict;
+
+#use Math::BigFloat lib => 'GMP,Bit::Vector';
+#use Math::BigInt lib  => 'GMP,Bit::Vector';
+use POSIX;
+
+use base qw(Bio::Root::Root);
+
+=head2 birth_distribution
+
+ Title   : rand_birth_distribution
+ Usage   : my $randvar = $dist->
+            rand_birth_distribution($lambda);
+ Function: Returns a random number from a birth process waiting 
+           time with a fixed interval
+           1.0.  Times are measured from 0=present,1=root;
+ Returns : floating point number
+ Args    : $lambda ( > 0 )
+ References : This is based on code by Mike Sanders in r8s.
+              Ross, Stochastic Processes, p. 145 for the density
+
+=cut
+
+sub rand_birth_distribution{
+   my ($self,$lambda) = @_;
+   if( ! ref($self) && 
+       $self !~ /RandomDistFunctions/ ) { 
+       $lambda = $self;
+   }
+   unless( $lambda ) { 
+       $self->throw("Cannot call birth_distribution without a valid lambda value (>0)");
+   }
+   return 1 - (log(rand(1) * (exp($lambda) - 1)+1)/ $lambda); 
+}
+
+
+=head2 rand_geometric_distribution
+
+ Title   : rand_geometric_distribution
+ Usage   : my $randvar = $dist->rand_geometric_distribution($param);
+ Function: Returns a random geometric variate distributed with 
+           paramater $param, according to
+           c.d.f. 1 - ( 1- param) ^ n 
+ Returns : integer
+ Args    : $param ( 0 > $param < 1 )
+
+
+=cut
+
+sub rand_geometric_distribution{
+   my ($self,$param) = @_;
+   if( ! ref($self) && 
+       $self !~ /RandomDistFunctions/ ) { 
+       $param = $self;
+   }
+   unless( $param ) { 
+       $self->throw("Cannot call rand_geometric_distribution without a valid param value (>0)");
+   }
+
+   my $den;
+   if( $param < 1e-8) { 
+       $den = (-1 * $param) - ( $param * $param ) / 2;
+   } else { 
+       $den = log(1 - $param);
+   }
+   my $z = log(1 - rand(1)) / $den;
+   return POSIX::floor($z) + 1;
+   # MSanderson comments from r8s code
+   # Is this the right truncation of the real-valued expression above?
+   # YES
+   # Checked by reference to the expected mean of the distribution in
+   # 100,000 replicates
+   # EX = 1/param Var = (1-param)/param^2 See Olkin, Gleser, and
+   # Derman, p. 193ff. Probability Models and Applications, 1980.
+}
+
+=head2 rand_exponentional_distribution
+
+ Title   : rand_exponentional_distribution
+ Usage   : my $var = $dist->rand_exponentional_distribution($param);
+ Function: Returns a random exponential variate distributed with parameter
+           $param, according to c.d.f 1 - e^(-param * x)
+ Returns : floating point number 
+ Args    : $param ( > 0 )
+
+
+=cut
+
+sub rand_exponentional_distribution {
+   my ($self,$param) = @_;
+      if( ! ref($self) && 
+       $self !~ /RandomDistFunctions/ ) { 
+       $param = $self;
+   }
+   unless( $param ) { 
+       $self->throw("Cannot call rand_exponentional_distribution without a valid param value (>0)");
+   }
+   return log( 1- rand(1)) / $param;
+}
+
+=head2 rand_normal_distribution
+
+ Title   : rand_normal_distribution
+ Usage   : my $var = $dist->rand_normal_distribution()
+ Function: Returns a random normal (gaussian) variate distributed 
+ Returns : floating point number
+ Args    : none
+
+
+=cut
+
+sub rand_normal_distribution{
+    my $gset;
+    my ($rsq,$v1,$v2) = ( 0,0,0);
+    do { 
+	$v1 = 2 * rand(1) - 1;
+	$v2 = 2 * rand(1) - 1;
+	$rsq= $v1**2 + $v2 ** 2;
+    } while( $rsq >= 1 || $rsq == 0);
+    my $fac = sqrt(-2.0 * log($rsq) / $rsq );
+    $gset = $v1 * $fac;
+    return $v2 * $fac;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RepeatMasker.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RepeatMasker.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RepeatMasker.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,155 @@
+# $Id: RepeatMasker.pm,v 1.10.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::RepeatMasker
+#
+# Cared for by Shawn Hoon <shawnh at fugu-sg.org>
+#
+# Copyright Shawn Hoon
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::RepeatMasker - a parser for RepeatMasker output
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::RepeatMasker;
+    my $parser = new Bio::Tools::RepeatMasker(-file => 'seq.fa.out');
+    while( my $result = $parser->next_result ) {
+      # get some value
+    }
+
+=head1 DESCRIPTION
+
+A parser for RepeatMasker output
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Shawn Hoon
+
+Email shawnh at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::RepeatMasker;
+use strict;
+
+use Bio::SeqFeature::FeaturePair;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::RepeatMasker();
+ Function: Builds a new Bio::Tools::RepeatMasker object
+ Returns : Bio::Tools::RepeatMasker
+ Args    : -fh/-file => $val, for initing input, see Bio::Root::IO
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+
+  return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $r = $rpt_masker->next_result
+ Function: Get the next result set from parser data
+ Returns : Bio::SeqFeature::FeaturePair
+           Feature1 is the Query coordinates and Feature2 is the Hit
+ Args    : none
+
+=cut
+
+sub next_result {
+    my ($self) = @_;
+    while (defined($_=$self->_readline()) ) {
+	if (/no repetitive sequences detected/) {
+	    $self->warn( "RepeatMasker didn't find any repetitive sequences\n");
+	    return ;
+	}
+	#ignore introductory lines
+	if (/\d+/) {
+	    my @element = split;
+	    # ignore features with negatives
+	    next if ($element[11-13] =~ /-/);
+	    my (%feat1, %feat2);
+	    my @line = split;
+	    my ($score, $query_name, $query_start, $query_end, $strand,
+		$repeat_name, $repeat_class ) = @line[0, 4, 5, 6, 8, 9, 10];
+
+	    my ($hit_start,$hit_end);
+
+	    if ($strand eq '+') {
+		($hit_start, $hit_end) = @line[11, 12];
+		$strand = 1;
+	    } elsif ($strand eq 'C') {
+		($hit_start, $hit_end) = @line[12, 13];
+		$strand = -1;
+	    }
+	    my $rf = Bio::SeqFeature::Generic->new
+		(-seq_id      => $query_name,
+		 -score       => $score,
+		 -start       => $query_start,
+		 -end         => $query_end,
+		 -strand      => $strand,
+		 -source_tag  => 'RepeatMasker',
+		 -primary_tag => $repeat_class,
+		 -tag => { 'Target'=> [$repeat_name,$hit_start,$hit_end]},
+		);
+
+	    my $rf2 = Bio::SeqFeature::Generic->new
+		(-seq_id         => $repeat_name,
+		 -score          => $score,
+		 -start          => $hit_start,
+		 -end            => $hit_end,
+		 -strand         => $strand,
+		 -source_tag     => "RepeatMasker",
+		 -primary_tag    => $repeat_class,
+		 -tag => { 'Target'=> [$query_name,$query_start,$query_end] },
+		);
+
+	    my $fp = Bio::SeqFeature::FeaturePair->new(-feature1 => $rf,
+						       -feature2 => $rf2);
+	    return $fp;
+	}
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RestrictionEnzyme.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RestrictionEnzyme.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/RestrictionEnzyme.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1226 @@
+#------------------------------------------------------------------
+# $Id: RestrictionEnzyme.pm,v 1.35.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module Bio::Tools::RestrictionEnzyme
+#
+# Cared for by Steve Chervitz <sac at bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Tools::RestrictionEnzyme - Bioperl object for a restriction endonuclease
+(cuts DNA at specific locations)
+
+DEPRECATED. Please use the Bio::Restriction modules instead.
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::RestrictionEnzyme;
+
+  ## Create a new object by name.
+
+  $re1 = Bio::Tools::RestrictionEnzyme->new(-NAME =>'EcoRI');
+
+  ## Create a new object using special syntax
+  ## which specifies the enzyme name, recognition site, and cut position.
+  ## Used for enzymes not known to this module.
+
+  $re2 = Bio::Tools::RestrictionEnzyme->new(-NAME =>'EcoRV--GAT^ATC', 
+                                            -MAKE =>'custom');
+
+  ## Get a list of the resulting fragments when a sequence is cut with
+  ## the given enzyme. The method expects a Bio::Seq object.
+
+  @fragments = $re2->cut_seq($seqobj);
+
+  ## Get a list of names of all available restriction enzymes 
+  ## known to this module.
+
+  @all = $re->available_list();
+
+  ## Get the names of restriction enzymes that have 6 bp 
+  ## recognition sequences.
+
+  @sixcutters = $re->available_list(6);
+
+=head1 DESCRIPTION
+
+The Bio::Tools::RestrictionEnzyme.pm module encapsulates generic data and 
+methods for using restriction endonucleases for in silico restriction
+analysis of DNA sequences.
+
+=head2 Considerations
+
+This module is a precursor for a more full featured version that may do such
+things as download data from online databases such as REBase L<http://rebase.neb.com/>.
+Thus, there is currently no functionality for obtaining data regarding commercial
+availability of a restriction enzyme.
+
+At some point in the future, it may make sense to derive RestrictionEnzymes
+from a class such as Bio::Enzyme or Bio::Prot::Protein (neither of which now
+exist) so that more data about the enzyme and related information can be 
+easily obtained.
+
+This module is currently in use at L<http://genome-www.stanford.edu/Sacch3D/analysis/>.
+
+=head2 Digesting on Runs of N
+
+To digest a sequence on runs of N's in the sequence. Here's what you can do:
+
+    $re_n  = Bio::Tools::RestrictionEnzyme->new(-name=>'N--NNNNN',
+                                                -make=>'custom');
+
+Specify the number of N's you want to match in the -name parameter. 
+So the above example will recognize and cut at runs of 5 Ns.
+If you wanted to cut at runs of 10 N's, you would use 
+
+     -name => 'N--NNNNNNNNNN'
+
+Note that you must use a specific number of N's, you cannot use a regexp to
+digest at N+ for example, because the actual number of N's at each site are
+not recorded when the sequence is analyzed. So cut_locations( ) wouldn't be 
+correct. 
+
+=head1 EXAMPLES
+
+See the script C<examples/restriction.pl> in the Bioperl distribution.
+
+=head1 DEPENDENCIES 
+
+Bio::Tools::RestrictionEnzyme is a concrete class that inherits from 
+L<Bio::Root::Root> and uses by delegation L<Bio::PrimarySeq>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other Bioperl
+modules. Send your comments and suggestions preferably to one of the Bioperl
+mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Steve Chervitz, E<lt>sac-at-bioperl.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2002 Steve A. Chervitz. All Rights Reserved.
+This module is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<Bio::Root::Root>, 
+L<Bio::PrimarySeq>.
+
+=cut
+
+#
+##
+###
+#### END of main POD documentation.
+###
+##
+#'
+
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private
+and are intended for internal use by this module. They are
+B<not> considered part of the public interface and are described here
+for documentation purposes only.
+
+=cut
+
+
+package Bio::Tools::RestrictionEnzyme;
+use strict;
+
+use vars qw (@EXPORT_OK %EXPORT_TAGS @RE_available);
+
+use base qw(Bio::Root::Root Exporter);
+ at EXPORT_OK   = qw(@RE_available);
+%EXPORT_TAGS = ( std => [qw(@RE_available)] );
+
+# Generated from REBASE version 208 (strider format), dated Aug 1 2002
+# using scripts/contributed/rebase2list.pl
+# Syntax: RE-name => 'SITE CUTS-AT' where SITE and CUTS-AT are separated 
+# by a space.
+
+my %RE = (
+ 'AasI'	=> 'GACNNNNNNGTC 7',
+ 'AatI'	=> 'AGGCCT 3',
+ 'AatII'	=> 'GACGTC 5',
+ 'AauI'	=> 'TGTACA 1',
+ 'AccI'	=> 'GTMKAC 2',
+ 'AccII'	=> 'CGCG 2',
+ 'AccIII'	=> 'TCCGGA 1',
+ 'Acc16I'	=> 'TGCGCA 3',
+ 'Acc65I'	=> 'GGTACC 1',
+ 'Acc113I'	=> 'AGTACT 3',
+ 'AccB1I'	=> 'GGYRCC 1',
+ 'AccB7I'	=> 'CCANNNNNTGG 7',
+ 'AclI'	=> 'AACGTT 2',
+ 'AcsI'	=> 'RAATTY 1',
+ 'AcvI'	=> 'CACGTG 3',
+ 'AcyI'	=> 'GRCGYC 2',
+ 'AdeI'	=> 'CACNNNGTG 6',
+ 'AfaI'	=> 'GTAC 2',
+ 'AfeI'	=> 'AGCGCT 3',
+ 'AflI'	=> 'GGWCC 1',
+ 'AflII'	=> 'CTTAAG 1',
+ 'AflIII'	=> 'ACRYGT 1',
+ 'AgeI'	=> 'ACCGGT 1',
+ 'AhaIII'	=> 'TTTAAA 3',
+ 'AhdI'	=> 'GACNNNNNGTC 6',
+ 'AhlI'	=> 'ACTAGT 1',
+ 'AleI'	=> 'CACNNNNGTG 5',
+ 'AluI'	=> 'AGCT 2',
+ 'Alw21I'	=> 'GWGCWC 5',
+ 'Alw44I'	=> 'GTGCAC 1',
+ 'AlwNI'	=> 'CAGNNNCTG 6',
+ 'Ama87I'	=> 'CYCGRG 1',
+ 'AocI'	=> 'CCTNAGG 2',
+ 'Aor51HI'	=> 'AGCGCT 3',
+ 'ApaI'	=> 'GGGCCC 5',
+ 'ApaBI'	=> 'GCANNNNNTGC 8',
+ 'ApaLI'	=> 'GTGCAC 1',
+ 'ApoI'	=> 'RAATTY 1',
+ 'AscI'	=> 'GGCGCGCC 2',
+ 'AseI'	=> 'ATTAAT 2',
+ 'AsiAI'	=> 'ACCGGT 1',
+ 'AsiSI'	=> 'GCGATCGC 5',
+ 'AsnI'	=> 'ATTAAT 2',
+ 'AspI'	=> 'GACNNNGTC 4',
+ 'Asp700I'	=> 'GAANNNNTTC 5',
+ 'Asp718I'	=> 'GGTACC 1',
+ 'AspEI'	=> 'GACNNNNNGTC 6',
+ 'AspHI'	=> 'GWGCWC 5',
+ 'AspLEI'	=> 'GCGC 3',
+ 'AspS9I'	=> 'GGNCC 1',
+ 'AsuI'	=> 'GGNCC 1',
+ 'AsuII'	=> 'TTCGAA 2',
+ 'AsuC2I'	=> 'CCSGG 2',
+ 'AsuNHI'	=> 'GCTAGC 1',
+ 'AvaI'	=> 'CYCGRG 1',
+ 'AvaII'	=> 'GGWCC 1',
+ 'AviII'	=> 'TGCGCA 3',
+ 'AvrII'	=> 'CCTAGG 1',
+ 'AxyI'	=> 'CCTNAGG 2',
+ 'BalI'	=> 'TGGCCA 3',
+ 'BamHI'	=> 'GGATCC 1',
+ 'BanI'	=> 'GGYRCC 1',
+ 'BanII'	=> 'GRGCYC 5',
+ 'BanIII'	=> 'ATCGAT 2',
+ 'BbeI'	=> 'GGCGCC 5',
+ 'BbrPI'	=> 'CACGTG 3',
+ 'BbuI'	=> 'GCATGC 5',
+ 'Bbv12I'	=> 'GWGCWC 5',
+ 'BclI'	=> 'TGATCA 1',
+ 'BcnI'	=> 'CCSGG 2',
+ 'BcoI'	=> 'CYCGRG 1',
+ 'BcuI'	=> 'ACTAGT 1',
+ 'BetI'	=> 'WCCGGW 1',
+ 'BfaI'	=> 'CTAG 1',
+ 'BfmI'	=> 'CTRYAG 1',
+ 'BfrI'	=> 'CTTAAG 1',
+ 'BfrBI'	=> 'ATGCAT 3',
+ 'BfuCI'	=> 'GATC 0',
+ 'BglI'	=> 'GCCNNNNNGGC 7',
+ 'BglII'	=> 'AGATCT 1',
+ 'BlnI'	=> 'CCTAGG 1',
+ 'BloHII'	=> 'CTGCAG 5',
+ 'BlpI'	=> 'GCTNAGC 2',
+ 'Bme18I'	=> 'GGWCC 1',
+ 'Bme1390I'	=> 'CCNGG 2',
+ 'Bme1580I'	=> 'GKGCMC 5',
+ 'BmtI'	=> 'GCTAGC 5',
+ 'BmyI'	=> 'GDGCHC 5',
+ 'BoxI'	=> 'GACNNNNGTC 5',
+ 'Bpu14I'	=> 'TTCGAA 2',
+ 'Bpu1102I'	=> 'GCTNAGC 2',
+ 'Bsa29I'	=> 'ATCGAT 2',
+ 'BsaAI'	=> 'YACGTR 3',
+ 'BsaBI'	=> 'GATNNNNATC 5',
+ 'BsaHI'	=> 'GRCGYC 2',
+ 'BsaJI'	=> 'CCNNGG 1',
+ 'BsaOI'	=> 'CGRYCG 4',
+ 'BsaWI'	=> 'WCCGGW 1',
+ 'BscI'	=> 'ATCGAT 2',
+ 'Bsc4I'	=> 'CCNNNNNNNGG 7',
+ 'BscBI'	=> 'GGNNCC 3',
+ 'BscFI'	=> 'GATC 0',
+ 'Bse8I'	=> 'GATNNNNATC 5',
+ 'Bse21I'	=> 'CCTNAGG 2',
+ 'Bse118I'	=> 'RCCGGY 1',
+ 'BseAI'	=> 'TCCGGA 1',
+ 'BseBI'	=> 'CCWGG 2',
+ 'BseCI'	=> 'ATCGAT 2',
+ 'BseDI'	=> 'CCNNGG 1',
+ 'BseJI'	=> 'GATNNNNATC 5',
+ 'BseLI'	=> 'CCNNNNNNNGG 7',
+ 'BsePI'	=> 'GCGCGC 1',
+ 'BseSI'	=> 'GKGCMC 5',
+ 'BseX3I'	=> 'CGGCCG 1',
+ 'BshI'	=> 'GGCC 2',
+ 'Bsh1236I'	=> 'CGCG 2',
+ 'Bsh1285I'	=> 'CGRYCG 4',
+ 'BshFI'	=> 'GGCC 2',
+ 'BshNI'	=> 'GGYRCC 1',
+ 'BshTI'	=> 'ACCGGT 1',
+ 'BsiBI'	=> 'GATNNNNATC 5',
+ 'BsiCI'	=> 'TTCGAA 2',
+ 'BsiEI'	=> 'CGRYCG 4',
+ 'BsiHKAI'	=> 'GWGCWC 5',
+ 'BsiHKCI'	=> 'CYCGRG 1',
+ 'BsiLI'	=> 'CCWGG 2',
+ 'BsiMI'	=> 'TCCGGA 1',
+ 'BsiQI'	=> 'TGATCA 1',
+ 'BsiSI'	=> 'CCGG 1',
+ 'BsiWI'	=> 'CGTACG 1',
+ 'BsiXI'	=> 'ATCGAT 2',
+ 'BsiYI'	=> 'CCNNNNNNNGG 7',
+ 'BsiZI'	=> 'GGNCC 1',
+ 'BslI'	=> 'CCNNNNNNNGG 7',
+ 'BsoBI'	=> 'CYCGRG 1',
+ 'Bsp13I'	=> 'TCCGGA 1',
+ 'Bsp19I'	=> 'CCATGG 1',
+ 'Bsp68I'	=> 'TCGCGA 3',
+ 'Bsp106I'	=> 'ATCGAT 2',
+ 'Bsp119I'	=> 'TTCGAA 2',
+ 'Bsp120I'	=> 'GGGCCC 1',
+ 'Bsp143I'	=> 'GATC 0',
+ 'Bsp143II'	=> 'RGCGCY 5',
+ 'Bsp1286I'	=> 'GDGCHC 5',
+ 'Bsp1407I'	=> 'TGTACA 1',
+ 'Bsp1720I'	=> 'GCTNAGC 2',
+ 'BspA2I'	=> 'CCTAGG 1',
+ 'BspCI'	=> 'CGATCG 4',
+ 'BspDI'	=> 'ATCGAT 2',
+ 'BspEI'	=> 'TCCGGA 1',
+ 'BspHI'	=> 'TCATGA 1',
+ 'BspLI'	=> 'GGNNCC 3',
+ 'BspLU11I'	=> 'ACATGT 1',
+ 'BspMII'	=> 'TCCGGA 1',
+ 'BspTI'	=> 'CTTAAG 1',
+ 'BspT104I'	=> 'TTCGAA 2',
+ 'BspT107I'	=> 'GGYRCC 1',
+ 'BspXI'	=> 'ATCGAT 2',
+ 'BsrBRI'	=> 'GATNNNNATC 5',
+ 'BsrFI'	=> 'RCCGGY 1',
+ 'BsrGI'	=> 'TGTACA 1',
+ 'BssAI'	=> 'RCCGGY 1',
+ 'BssECI'	=> 'CCNNGG 1',
+ 'BssHI'	=> 'CTCGAG 1',
+ 'BssHII'	=> 'GCGCGC 1',
+ 'BssKI'	=> 'CCNGG 0',
+ 'BssNAI'	=> 'GTATAC 3',
+ 'BssT1I'	=> 'CCWWGG 1',
+ 'Bst98I'	=> 'CTTAAG 1',
+ 'Bst1107I'	=> 'GTATAC 3',
+ 'BstACI'	=> 'GRCGYC 2',
+ 'BstAPI'	=> 'GCANNNNNTGC 7',
+ 'BstBI'	=> 'TTCGAA 2',
+ 'BstBAI'	=> 'YACGTR 3',
+ 'Bst4CI'	=> 'ACNGT 3',
+ 'BstC8I'	=> 'GCNNGC 3',
+ 'BstDEI'	=> 'CTNAG 1',
+ 'BstDSI'	=> 'CCRYGG 1',
+ 'BstEII'	=> 'GGTNACC 1',
+ 'BstENI'	=> 'CCTNNNNNAGG 5',
+ 'BstENII'	=> 'GATC 0',
+ 'BstFNI'	=> 'CGCG 2',
+ 'BstH2I'	=> 'RGCGCY 5',
+ 'BstHHI'	=> 'GCGC 3',
+ 'BstHPI'	=> 'GTTAAC 3',
+ 'BstKTI'	=> 'GATC 3',
+ 'BstMAI'	=> 'CTGCAG 5',
+ 'BstMCI'	=> 'CGRYCG 4',
+ 'BstMWI'	=> 'GCNNNNNNNGC 7',
+ 'BstNI'	=> 'CCWGG 2',
+ 'BstNSI'	=> 'RCATGY 5',
+ 'BstOI'	=> 'CCWGG 2',
+ 'BstPI'	=> 'GGTNACC 1',
+ 'BstPAI'	=> 'GACNNNNGTC 5',
+ 'BstSCI'	=> 'CCNGG 0',
+ 'BstSFI'	=> 'CTRYAG 1',
+ 'BstSNI'	=> 'TACGTA 3',
+ 'BstUI'	=> 'CGCG 2',
+ 'Bst2UI'	=> 'CCWGG 2',
+ 'BstXI'	=> 'CCANNNNNNTGG 8',
+ 'BstX2I'	=> 'RGATCY 1',
+ 'BstYI'	=> 'RGATCY 1',
+ 'BstZI'	=> 'CGGCCG 1',
+ 'BstZ17I'	=> 'GTATAC 3',
+ 'Bsu15I'	=> 'ATCGAT 2',
+ 'Bsu36I'	=> 'CCTNAGG 2',
+ 'BsuRI'	=> 'GGCC 2',
+ 'BsuTUI'	=> 'ATCGAT 2',
+ 'BtgI'	=> 'CCRYGG 1',
+ 'BthCI'	=> 'GCNGC 4',
+ 'Cac8I'	=> 'GCNNGC 3',
+ 'CaiI'	=> 'CAGNNNCTG 6',
+ 'CauII'	=> 'CCSGG 2',
+ 'CciNI'	=> 'GCGGCCGC 2',
+ 'CelII'	=> 'GCTNAGC 2',
+ 'CfoI'	=> 'GCGC 3',
+ 'CfrI'	=> 'YGGCCR 1',
+ 'Cfr9I'	=> 'CCCGGG 1',
+ 'Cfr10I'	=> 'RCCGGY 1',
+ 'Cfr13I'	=> 'GGNCC 1',
+ 'Cfr42I'	=> 'CCGCGG 4',
+ 'ChaI'	=> 'GATC 4',
+ 'ClaI'	=> 'ATCGAT 2',
+ 'CpoI'	=> 'CGGWCCG 2',
+ 'CspI'	=> 'CGGWCCG 2',
+ 'Csp6I'	=> 'GTAC 1',
+ 'Csp45I'	=> 'TTCGAA 2',
+ 'CspAI'	=> 'ACCGGT 1',
+ 'CviAII'	=> 'CATG 1',
+ 'CviJI'	=> 'RGCY 2',
+ 'CviRI'	=> 'TGCA 2',
+ 'CviTI'	=> 'RGCY 2',
+ 'CvnI'	=> 'CCTNAGG 2',
+ 'DdeI'	=> 'CTNAG 1',
+ 'DpnI'	=> 'GATC 2',
+ 'DpnII'	=> 'GATC 0',
+ 'DraI'	=> 'TTTAAA 3',
+ 'DraII'	=> 'RGGNCCY 2',
+ 'DraIII'	=> 'CACNNNGTG 6',
+ 'DrdI'	=> 'GACNNNNNNGTC 7',
+ 'DsaI'	=> 'CCRYGG 1',
+ 'DseDI'	=> 'GACNNNNNNGTC 7',
+ 'EaeI'	=> 'YGGCCR 1',
+ 'EagI'	=> 'CGGCCG 1',
+ 'Eam1105I'	=> 'GACNNNNNGTC 6',
+ 'Ecl136II'	=> 'GAGCTC 3',
+ 'EclHKI'	=> 'GACNNNNNGTC 6',
+ 'EclXI'	=> 'CGGCCG 1',
+ 'Eco24I'	=> 'GRGCYC 5',
+ 'Eco32I'	=> 'GATATC 3',
+ 'Eco47I'	=> 'GGWCC 1',
+ 'Eco47III'	=> 'AGCGCT 3',
+ 'Eco52I'	=> 'CGGCCG 1',
+ 'Eco72I'	=> 'CACGTG 3',
+ 'Eco81I'	=> 'CCTNAGG 2',
+ 'Eco88I'	=> 'CYCGRG 1',
+ 'Eco91I'	=> 'GGTNACC 1',
+ 'Eco105I'	=> 'TACGTA 3',
+ 'Eco130I'	=> 'CCWWGG 1',
+ 'Eco147I'	=> 'AGGCCT 3',
+ 'EcoHI'	=> 'CCSGG 0',
+ 'EcoICRI'	=> 'GAGCTC 3',
+ 'EcoNI'	=> 'CCTNNNNNAGG 5',
+ 'EcoO65I'	=> 'GGTNACC 1',
+ 'EcoO109I'	=> 'RGGNCCY 2',
+ 'EcoRI'	=> 'GAATTC 1',
+ 'EcoRII'	=> 'CCWGG 0',
+ 'EcoRV'	=> 'GATATC 3',
+ 'EcoT14I'	=> 'CCWWGG 1',
+ 'EcoT22I'	=> 'ATGCAT 5',
+ 'EcoT38I'	=> 'GRGCYC 5',
+ 'EgeI'	=> 'GGCGCC 3',
+ 'EheI'	=> 'GGCGCC 3',
+ 'ErhI'	=> 'CCWWGG 1',
+ 'EsaBC3I'	=> 'TCGA 2',
+ 'EspI'	=> 'GCTNAGC 2',
+ 'FatI'	=> 'CATG 0',
+ 'FauNDI'	=> 'CATATG 2',
+ 'FbaI'	=> 'TGATCA 1',
+ 'FblI'	=> 'GTMKAC 2',
+ 'FmuI'	=> 'GGNCC 4',
+ 'FnuDII'	=> 'CGCG 2',
+ 'Fnu4HI'	=> 'GCNGC 2',
+ 'FriOI'	=> 'GRGCYC 5',
+ 'FseI'	=> 'GGCCGGCC 6',
+ 'FspI'	=> 'TGCGCA 3',
+ 'FspAI'	=> 'RTGCGCAY 4',
+ 'Fsp4HI'	=> 'GCNGC 2',
+ 'FunI'	=> 'AGCGCT 3',
+ 'FunII'	=> 'GAATTC 1',
+ 'HaeI'	=> 'WGGCCW 3',
+ 'HaeII'	=> 'RGCGCY 5',
+ 'HaeIII'	=> 'GGCC 2',
+ 'HapII'	=> 'CCGG 1',
+ 'HgiAI'	=> 'GWGCWC 5',
+ 'HgiCI'	=> 'GGYRCC 1',
+ 'HgiJII'	=> 'GRGCYC 5',
+ 'HhaI'	=> 'GCGC 3',
+ 'Hin1I'	=> 'GRCGYC 2',
+ 'Hin6I'	=> 'GCGC 1',
+ 'HinP1I'	=> 'GCGC 1',
+ 'HincII'	=> 'GTYRAC 3',
+ 'HindII'	=> 'GTYRAC 3',
+ 'HindIII'	=> 'AAGCTT 1',
+ 'HinfI'	=> 'GANTC 1',
+ 'HpaI'	=> 'GTTAAC 3',
+ 'HpaII'	=> 'CCGG 1',
+ 'Hpy8I'	=> 'GTNNAC 3',
+ 'Hpy99I'	=> 'CGWCG 5',
+ 'Hpy178III'	=> 'TCNNGA 2',
+ 'Hpy188I'	=> 'TCNGA 3',
+ 'Hpy188III'	=> 'TCNNGA 2',
+ 'HpyCH4I'	=> 'CATG 3',
+ 'HpyCH4III'	=> 'ACNGT 3',
+ 'HpyCH4IV'	=> 'ACGT 1',
+ 'HpyCH4V'	=> 'TGCA 2',
+ 'HpyF10VI'	=> 'GCNNNNNNNGC 8',
+ 'Hsp92I'	=> 'GRCGYC 2',
+ 'Hsp92II'	=> 'CATG 4',
+ 'HspAI'	=> 'GCGC 1',
+ 'ItaI'	=> 'GCNGC 2',
+ 'KasI'	=> 'GGCGCC 1',
+ 'KpnI'	=> 'GGTACC 5',
+ 'Kpn2I'	=> 'TCCGGA 1',
+ 'KspI'	=> 'CCGCGG 4',
+ 'Ksp22I'	=> 'TGATCA 1',
+ 'KspAI'	=> 'GTTAAC 3',
+ 'Kzo9I'	=> 'GATC 0',
+ 'LpnI'	=> 'RGCGCY 3',
+ 'LspI'	=> 'TTCGAA 2',
+ 'MabI'	=> 'ACCWGGT 1',
+ 'MaeI'	=> 'CTAG 1',
+ 'MaeII'	=> 'ACGT 1',
+ 'MaeIII'	=> 'GTNAC 0',
+ 'MamI'	=> 'GATNNNNATC 5',
+ 'MboI'	=> 'GATC 0',
+ 'McrI'	=> 'CGRYCG 4',
+ 'MfeI'	=> 'CAATTG 1',
+ 'MflI'	=> 'RGATCY 1',
+ 'MhlI'	=> 'GDGCHC 5',
+ 'MlsI'	=> 'TGGCCA 3',
+ 'MluI'	=> 'ACGCGT 1',
+ 'MluNI'	=> 'TGGCCA 3',
+ 'Mly113I'	=> 'GGCGCC 2',
+ 'Mph1103I'	=> 'ATGCAT 5',
+ 'MroI'	=> 'TCCGGA 1',
+ 'MroNI'	=> 'GCCGGC 1',
+ 'MroXI'	=> 'GAANNNNTTC 5',
+ 'MscI'	=> 'TGGCCA 3',
+ 'MseI'	=> 'TTAA 1',
+ 'MslI'	=> 'CAYNNNNRTG 5',
+ 'MspI'	=> 'CCGG 1',
+ 'Msp20I'	=> 'TGGCCA 3',
+ 'MspA1I'	=> 'CMGCKG 3',
+ 'MspCI'	=> 'CTTAAG 1',
+ 'MspR9I'	=> 'CCNGG 2',
+ 'MssI'	=> 'GTTTAAAC 4',
+ 'MstI'	=> 'TGCGCA 3',
+ 'MunI'	=> 'CAATTG 1',
+ 'MvaI'	=> 'CCWGG 2',
+ 'MvnI'	=> 'CGCG 2',
+ 'MwoI'	=> 'GCNNNNNNNGC 7',
+ 'NaeI'	=> 'GCCGGC 3',
+ 'NarI'	=> 'GGCGCC 2',
+ 'NciI'	=> 'CCSGG 2',
+ 'NcoI'	=> 'CCATGG 1',
+ 'NdeI'	=> 'CATATG 2',
+ 'NdeII'	=> 'GATC 0',
+ 'NgoAIV'	=> 'GCCGGC 1',
+ 'NgoMIV'	=> 'GCCGGC 1',
+ 'NheI'	=> 'GCTAGC 1',
+ 'NlaIII'	=> 'CATG 4',
+ 'NlaIV'	=> 'GGNNCC 3',
+ 'Nli3877I'	=> 'CYCGRG 5',
+ 'NmuCI'	=> 'GTSAC 0',
+ 'NotI'	=> 'GCGGCCGC 2',
+ 'NruI'	=> 'TCGCGA 3',
+ 'NruGI'	=> 'GACNNNNNGTC 6',
+ 'NsbI'	=> 'TGCGCA 3',
+ 'NsiI'	=> 'ATGCAT 5',
+ 'NspI'	=> 'RCATGY 5',
+ 'NspIII'	=> 'CYCGRG 1',
+ 'NspV'	=> 'TTCGAA 2',
+ 'NspBII'	=> 'CMGCKG 3',
+ 'OliI'	=> 'CACNNNNGTG 5',
+ 'PacI'	=> 'TTAATTAA 5',
+ 'PaeI'	=> 'GCATGC 5',
+ 'PaeR7I'	=> 'CTCGAG 1',
+ 'PagI'	=> 'TCATGA 1',
+ 'PalI'	=> 'GGCC 2',
+ 'PauI'	=> 'GCGCGC 1',
+ 'PceI'	=> 'AGGCCT 3',
+ 'PciI'	=> 'ACATGT 1',
+ 'PdiI'	=> 'GCCGGC 3',
+ 'PdmI'	=> 'GAANNNNTTC 5',
+ 'Pfl23II'	=> 'CGTACG 1',
+ 'PflBI'	=> 'CCANNNNNTGG 7',
+ 'PflFI'	=> 'GACNNNGTC 4',
+ 'PflMI'	=> 'CCANNNNNTGG 7',
+ 'PfoI'	=> 'TCCNGGA 1',
+ 'PinAI'	=> 'ACCGGT 1',
+ 'Ple19I'	=> 'CGATCG 4',
+ 'PmaCI'	=> 'CACGTG 3',
+ 'PmeI'	=> 'GTTTAAAC 4',
+ 'PmlI'	=> 'CACGTG 3',
+ 'Ppu10I'	=> 'ATGCAT 1',
+ 'PpuMI'	=> 'RGGWCCY 2',
+ 'PpuXI'	=> 'RGGWCCY 2',
+ 'PshAI'	=> 'GACNNNNGTC 5',
+ 'PshBI'	=> 'ATTAAT 2',
+ 'PsiI'	=> 'TTATAA 3',
+ 'Psp03I'	=> 'GGWCC 4',
+ 'Psp5II'	=> 'RGGWCCY 2',
+ 'Psp6I'	=> 'CCWGG 0',
+ 'Psp1406I'	=> 'AACGTT 2',
+ 'PspAI'	=> 'CCCGGG 1',
+ 'Psp124BI'	=> 'GAGCTC 5',
+ 'PspEI'	=> 'GGTNACC 1',
+ 'PspGI'	=> 'CCWGG 0',
+ 'PspLI'	=> 'CGTACG 1',
+ 'PspN4I'	=> 'GGNNCC 3',
+ 'PspOMI'	=> 'GGGCCC 1',
+ 'PspPI'	=> 'GGNCC 1',
+ 'PspPPI'	=> 'RGGWCCY 2',
+ 'PssI'	=> 'RGGNCCY 5',
+ 'PstI'	=> 'CTGCAG 5',
+ 'PsuI'	=> 'RGATCY 1',
+ 'PsyI'	=> 'GACNNNGTC 4',
+ 'PvuI'	=> 'CGATCG 4',
+ 'PvuII'	=> 'CAGCTG 3',
+ 'RcaI'	=> 'TCATGA 1',
+ 'RsaI'	=> 'GTAC 2',
+ 'RsrII'	=> 'CGGWCCG 2',
+ 'Rsr2I'	=> 'CGGWCCG 2',
+ 'SacI'	=> 'GAGCTC 5',
+ 'SacII'	=> 'CCGCGG 4',
+ 'SalI'	=> 'GTCGAC 1',
+ 'SanDI'	=> 'GGGWCCC 2',
+ 'SatI'	=> 'GCNGC 2',
+ 'SauI'	=> 'CCTNAGG 2',
+ 'Sau96I'	=> 'GGNCC 1',
+ 'Sau3AI'	=> 'GATC 0',
+ 'SbfI'	=> 'CCTGCAGG 6',
+ 'ScaI'	=> 'AGTACT 3',
+ 'SciI'	=> 'CTCGAG 3',
+ 'ScrFI'	=> 'CCNGG 2',
+ 'SdaI'	=> 'CCTGCAGG 6',
+ 'SduI'	=> 'GDGCHC 5',
+ 'SecI'	=> 'CCNNGG 1',
+ 'SelI'	=> 'CGCG 0',
+ 'SexAI'	=> 'ACCWGGT 1',
+ 'SfcI'	=> 'CTRYAG 1',
+ 'SfeI'	=> 'CTRYAG 1',
+ 'SfiI'	=> 'GGCCNNNNNGGCC 8',
+ 'SfoI'	=> 'GGCGCC 3',
+ 'Sfr274I'	=> 'CTCGAG 1',
+ 'Sfr303I'	=> 'CCGCGG 4',
+ 'SfuI'	=> 'TTCGAA 2',
+ 'SgfI'	=> 'GCGATCGC 5',
+ 'SgrAI'	=> 'CRCCGGYG 2',
+ 'SgrBI'	=> 'CCGCGG 4',
+ 'SinI'	=> 'GGWCC 1',
+ 'SlaI'	=> 'CTCGAG 1',
+ 'SmaI'	=> 'CCCGGG 3',
+ 'SmiI'	=> 'ATTTAAAT 4',
+ 'SmiMI'	=> 'CAYNNNNRTG 5',
+ 'SmlI'	=> 'CTYRAG 1',
+ 'SnaBI'	=> 'TACGTA 3',
+ 'SpaHI'	=> 'GCATGC 5',
+ 'SpeI'	=> 'ACTAGT 1',
+ 'SphI'	=> 'GCATGC 5',
+ 'SplI'	=> 'CGTACG 1',
+ 'SrfI'	=> 'GCCCGGGC 4',
+ 'Sse9I'	=> 'AATT 0',
+ 'Sse232I'	=> 'CGCCGGCG 2',
+ 'Sse8387I'	=> 'CCTGCAGG 6',
+ 'Sse8647I'	=> 'AGGWCCT 2',
+ 'SseBI'	=> 'AGGCCT 3',
+ 'SspI'	=> 'AATATT 3',
+ 'SspBI'	=> 'TGTACA 1',
+ 'SstI'	=> 'GAGCTC 5',
+ 'SstII'	=> 'CCGCGG 4',
+ 'StuI'	=> 'AGGCCT 3',
+ 'StyI'	=> 'CCWWGG 1',
+ 'SunI'	=> 'CGTACG 1',
+ 'SwaI'	=> 'ATTTAAAT 4',
+ 'TaaI'	=> 'ACNGT 3',
+ 'TaiI'	=> 'ACGT 4',
+ 'TaqI'	=> 'TCGA 1',
+ 'TasI'	=> 'AATT 0',
+ 'TatI'	=> 'WGTACW 1',
+ 'TauI'	=> 'GCSGC 4',
+ 'TelI'	=> 'GACNNNGTC 4',
+ 'TfiI'	=> 'GAWTC 1',
+ 'ThaI'	=> 'CGCG 2',
+ 'TliI'	=> 'CTCGAG 1',
+ 'Tru1I'	=> 'TTAA 1',
+ 'Tru9I'	=> 'TTAA 1',
+ 'TscI'	=> 'ACGT 4',
+ 'TseI'	=> 'GCWGC 1',
+ 'Tsp45I'	=> 'GTSAC 0',
+ 'Tsp509I'	=> 'AATT 0',
+ 'Tsp4CI'	=> 'ACNGT 3',
+ 'TspEI'	=> 'AATT 0',
+ 'Tth111I'	=> 'GACNNNGTC 4',
+ 'TthHB8I'	=> 'TCGA 1',
+ 'UnbI'	=> 'GGNCC 0',
+ 'Van91I'	=> 'CCANNNNNTGG 7',
+ 'Vha464I'	=> 'CTTAAG 1',
+ 'VneI'	=> 'GTGCAC 1',
+ 'VpaK11AI'	=> 'GGWCC 0',
+ 'VpaK11BI'	=> 'GGWCC 1',
+ 'VspI'	=> 'ATTAAT 2',
+ 'XagI'	=> 'CCTNNNNNAGG 5',
+ 'XapI'	=> 'RAATTY 1',
+ 'XbaI'	=> 'TCTAGA 1',
+ 'XceI'	=> 'RCATGY 5',
+ 'XcmI'	=> 'CCANNNNNNNNNTGG 8',
+ 'XhoI'	=> 'CTCGAG 1',
+ 'XhoII'	=> 'RGATCY 1',
+ 'XmaI'	=> 'CCCGGG 1',
+ 'XmaIII'	=> 'CGGCCG 1',
+ 'XmaCI'	=> 'CCCGGG 1',
+ 'XmaJI'	=> 'CCTAGG 1',
+ 'XmiI'	=> 'GTMKAC 2',
+ 'XmnI'	=> 'GAANNNNTTC 5',
+ 'XspI'	=> 'CTAG 1',
+ 'ZhoI'	=> 'ATCGAT 2',
+ 'ZraI'	=> 'GACGTC 3',
+ 'Zsp2I'	=> 'ATGCAT 5',
+);
+
+ at RE_available = sort keys %RE;
+
+
+=head1 new
+
+ Title     : new
+ Purpose   : Initializes the RestrictionEnzyme object and calls
+           : superclass constructor last (Bio:Seq.pm).
+ Returns   : n/a
+ Argument  : Parameters passed to new()
+ Comments  : A RestrictionEnzyme object manages its recognition sequence
+           : as a Bio::PrimarySeq object.
+
+See Also   : L<_make_custom>(), L<_make_standard>(), L<Bio::PrimarySeq.pm::_initialize()>
+
+=cut
+
+#---------------
+sub new {
+#---------------
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    $self->warn("Use of Bio::Tools::RestrictionEnzyme is deprecated".
+                   "Use Bio::Restriction classes instead");
+    my ($name,$make) = $self->_rearrange([qw(NAME MAKE)], at args);
+
+    $name && $self->name($name);
+    my %data;
+    if(defined $make && $make eq 'custom') {
+	%data = $self->_make_custom($name); 
+    } else {
+	%data = $self->_make_standard($name);
+    }
+    $self->{'_seq'} = new Bio::PrimarySeq(%data, 
+				   -VERBOSE =>$self->verbose,
+ 				   -alphabet => 'dna',
+				   );
+    return $self;
+}
+
+
+#=head1 _make_standard
+#
+# Title     : _make_standard
+# Usage     : n/a; automatically called by _initialize()
+# Purpose   : Permits custom RE object construction from name.
+#	    : 'EcoRI'.
+# Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
+# Argument  : String containing string with special syntax.
+# Throws    : Exception if the requested enzyme name is unavailable.
+#	    : NOTE: Case sensitive.
+#
+#See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>, L<_make_custom()|_make_custom>
+#
+#=cut
+
+#------------------
+sub _make_standard {
+#------------------
+    my($self, $name) = @_;
+
+    $name =~ s/^\s+|\s+$//g;
+ 
+    $self->is_available($name) || 
+	$self->throw("Unavailable or undefined enzyme: $name (Note: CASE SENSITIVE)\n" .
+		     "Currently available enzymes: \n at RE_available\n");
+
+    my @data = split( ' ', $RE{$name});
+    my (%dat);
+    $dat{-SEQ} = $data[0];
+    $dat{-NAME} = $dat{-ID}= $name;    
+    $self->{'_cuts_after'} = $data[1];
+
+    return %dat;
+}
+
+
+#=head1 _make_custom
+#
+# Title     : _make_custom
+# Usage     : n/a; automatically called by _initialize()
+# Purpose   : Permits custom RE object construction from strings 
+#	    : such as 'EcoRI--G^AATTC' as the name of the enzyme.
+# Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
+# Argument  : String containing string with special syntax.
+# Throws    : Exception if the string has bad syntax.
+#	    : Warning if the string did not specify cut position.
+#	    :         Places cut site after 5'-most position.
+#
+#See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>
+#
+#=cut
+
+#'
+#-----------------
+sub _make_custom {
+#-----------------
+    my($self, $name) = @_;
+
+    $name =~ s/\s+//g;
+    my @parts  = split '--', $name;
+    my (%dat);
+    $dat{-NAME} = $dat{-ID} = $parts[0];
+    $self->name($parts[0]);  ## Reset name
+
+    $parts[1] || return $self->throw("Undefined recognition site for $parts[0].",
+				      "Use this syntax: EcoRV--GAT^ATC");
+    ## Determine the cuts_after point.
+    my $cut_index = index $parts[1], '^';
+    if( $cut_index <0) { $cut_index = 0;
+			 $self->warn("Unknown cut position for $parts[0]. Assuming position 0\n" . 
+				     "Use carat to specify cut position (e.g., G^AATTC)"); }
+    $self->{'_cuts_after'} =  $cut_index;
+
+    ## Save the recognition sequence after removing the '^'
+    $parts[1] =~ s/\^//g;
+    $dat{-SEQ} = $parts[1];
+    return %dat;
+}
+    
+
+=head1 cuts_after
+
+ Title     : cuts_after
+ Usage     : $num = $re->cuts_after();
+ Purpose   : Sets/Gets an integer indicating the position of cleavage 
+           : relative to the 5' end of the recognition sequence.
+ Returns   : Integer
+ Argument  : Integer (optional)
+ Throws    : Exception if argument is non-numeric.
+ Access    : Public
+ Comments  : This method is only needed to change the cuts at
+           : position. This data is automatically set during
+           : construction.
+
+See Also   : L<_make_standard()|_make_standard>, L<_make_custom()|_make_custom>
+
+=cut
+
+#'
+#---------------
+sub cuts_after { 
+#---------------
+    my $self = shift; 
+    if(@_) { my $num = shift;
+	     if($num == 0 and $num ne '0') {
+		 $self->throw("The cuts_after position be an integer ($num)");
+	     }
+	     $self->{'_cuts_after'} = $num;
+	 }
+    $self->{'_cuts_after'}; 
+}
+
+
+
+=head1 site
+
+ Title     : site
+ Usage     : $re->site();
+ Purpose   : Gets the recognition sequence for the enzyme. 
+ Example   : $seq_string = $re->site();
+ Returns   : String containing recognition sequence indicating 
+           : cleavage site as in  'G^AATTC'.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you want a simple string representing the site without 
+             any '^', use the string() method.
+
+See Also   : L<string()|string>
+
+=cut
+
+#---------
+sub site {
+#---------
+    my $self = shift;
+    my $seq = $self->seq;
+    my $cuts_after = $self->cuts_after;
+    if($cuts_after > 0) {
+	if( $cuts_after >= $seq->length) {
+	    return $seq->seq.'^';
+	} else { 
+	    return $seq->subseq(1, $self->cuts_after).'^'.$seq->subseq($self->cuts_after+1, $seq->length); 
+	}
+    } else {
+        return $seq->seq;
+    }
+}
+
+
+=head1 seq
+
+ Title     : seq
+ Usage     : $re->seq();
+ Purpose   : Get the Bio::PrimarySeq.pm-derived object representing 
+           : the recognition sequence
+ Returns   : String
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<string()|string>, L<revcom()|revcom>
+
+=cut
+
+#---------
+sub seq    {  my $self = shift; $self->{'_seq'}; }
+#---------
+
+
+
+=head1 string
+
+ Title     : string
+ Usage     : $re->string();
+ Purpose   : Get a string representing the recognition sequence.
+ Returns   : String. Does NOT contain a  '^' representing the cut location
+             as returned by the site() method
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Delegates to the Bio::PrimarySeq-derived object.
+
+See Also   : L<seq()|seq>, L<site()|site>, L<revcom()|revcom>
+
+=cut
+
+#-----------
+sub string {  my $self = shift; $self->{'_seq'}->seq; }
+#-----------
+
+
+
+=head1 revcom
+
+ Title     : revcom
+ Usage     : $re->revcom();
+ Purpose   : Get a string representing the reverse complement of
+           : the recognition sequence.
+ Returns   : String
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Delegates to the Bio::PrimarySeq.pm-derived object, but needs to
+             get out the string from it, as now Bio::PrimarySeq->revcom makes a
+             Bio::PrimarySeq object
+
+See Also   : L<seq()|seq>, L<string()|string>
+
+=cut
+
+#-----------
+sub revcom {  my $self = shift; $self->{'_seq'}->revcom->seq(); }
+#-----------
+
+
+
+=head1 cut_seq
+
+ Title     : cut_seq
+ Usage     : $re->cut_seq(<sequence object>);
+ Purpose   : Conceptually cut or "digest" a DNA sequence with the given enzyme.
+ Example   : $string = $re->cut_seq(<sequence object>); 
+ Returns   : List of strings containing the resulting fragments.
+ Argument  : Reference to a Bio::PrimarySeq.pm-derived object.
+ Throws    : Exception if argument is not an object.
+           : (Does not yet verify that it is derived from Bio::PrimarySeq.pm.)
+ Comments  : Strategy relies on Perl's built-in split() function.
+           : Since split removes the recognition pattern, the resulting
+           : fragments are repaired after split()-ing.
+           : A side-effect of this is that for sites with ambiguous
+           : recognition sequence (i.e., containing N), the fragments
+           : will contain ambiguity characters instead of AGCT.
+           :
+           : There is currently no support for partial digestions.
+           : There is currently no support for circular sequences.
+           : (This should just involve merging the first and last frag
+           : if $seqObj->is_circular returns true).
+
+=cut
+
+#'
+#-------------
+sub cut_seq {
+#-------------
+    my( $self, $seqObj) = @_;
+    if( !ref($seqObj) || 
+	! $seqObj->isa('Bio::PrimarySeqI') ) {
+	$self->throw( "Can't cut sequence. Missing or invalid object".
+		      "seqObj: $seqObj");
+    }
+
+    my $cuts_after = $self->{'_cuts_after'};
+    my ($site_3prime_seq, $site_5prime_seq);
+    my $reSeq = $self->seq;
+    if($cuts_after == 0) {
+	$site_3prime_seq = '';
+	$site_5prime_seq = $reSeq->seq();
+    } elsif($cuts_after == $reSeq->length) {
+	$site_3prime_seq = $reSeq->seq();
+	$site_5prime_seq = '';
+    } else {
+	$site_3prime_seq = $reSeq->subseq(1, $self->{'_cuts_after'});
+	$site_5prime_seq = $reSeq->subseq($self->{'_cuts_after'}+1, $reSeq->length);
+    }
+
+    $self->debug("3' site: $site_3prime_seq\n5' site: $site_5prime_seq\n");
+
+    my(@re_frags);
+    my $seq = uc $self->_expanded_string;
+
+    if(!$self->palindromic and $self->name ne 'N') {
+	my $revseq = $self->_expanded_string( $reSeq->revcom->seq() );
+	$seq .= '|'.uc($revseq);
+    }
+    $self->debug(__PACKAGE__, ": site seq: $seq\n");
+    $self->debug(__PACKAGE__, ": splitting ", $reSeq->seq, "\n");
+    @re_frags = split(/$seq/i, $seqObj->seq);
+
+    $self->debug(__PACKAGE__, ": cut_seq, ", scalar(@re_frags), " fragments.\n");
+
+    ## Re-attach the split recognition site back to the frags
+    ## since perl zapped them in the split() call.
+    my($i);
+    my $numFrags = scalar @re_frags;
+    for($i=0; $i<$numFrags; $i++) {
+        $i < $#re_frags  and $re_frags[$i] = $re_frags[$i].$site_3prime_seq;
+        $i > 0           and $re_frags[$i] = $site_5prime_seq.$re_frags[$i];
+    }
+    @re_frags;
+}
+
+=head1 cut_locations
+
+ Title     : cut_locations
+ Usage     : my $locations = $re->cut_locations(<sequence_object>);
+ Purpose   : Report the location of the recognition site(s) within
+           : an input sequence. 
+ Example   : my $locations = $re->annotate_seq($seqObj);
+ Returns   : Arrayref of starting locations where enzyme would cut 
+ Argument  : Reference to a Bio::PrimarySeqI-derived sequence object.
+ Throws    : n/a
+ Comments  : 
+
+=cut
+
+#-----------------
+sub cut_locations {
+#-----------------
+    my($self, $seqobj) = @_;
+
+    my $site = $self->_expanded_string;
+    my $seq = $seqobj->seq;
+    study($seq);
+    my @locations;
+    while( $seq =~ /($site)/ig ) {
+        # $` is preceding string before pattern so length returns position
+	push @locations, length($`); 	
+    }
+    return \@locations;
+}    
+
+# Purpose : Expand nucleotide ambiguity codes to their representative letters
+# Argument: (optional) the string to be expanded. If not supplied, used
+#           the string returned by $self->string().
+# Returns : String
+sub _expanded_string {
+    my ($self, $str) = @_;
+    
+    $str ||= $self->string;
+
+    if( $self->name ne 'N' ) {
+        $str =~ s/N|X/\./g;
+        $str =~ s/R/\[AG\]/g;
+        $str =~ s/Y/\[CT\]/g;
+        $str =~ s/S/\[GC\]/g;
+        $str =~ s/W/\[AT\]/g;
+        $str =~ s/M/\[AC\]/g;
+        $str =~ s/K/\[TG\]/g;
+        $str =~ s/B/\[CGT\]/g;
+        $str =~ s/D/\[AGT\]/g;
+        $str =~ s/H/\[ACT\]/g;
+        $str =~ s/V/\[ACG\]/g;
+    }
+    return $str;
+}
+
+
+=head1 annotate_seq
+
+ Title     : annotate_seq
+ Usage     : $re->annotate_seq(<sequence_object>);
+ Purpose   : Identify the location of the recognition site(s) within
+           : an input sequence. Uses HTML.
+ Example   : $annot_seq = $re->annotate_seq($seqObj);
+ Returns   : String containing the annotated sequence.
+ Argument  : Reference to a Bio::PrimarySeq.pm-derived sequence object.
+ Throws    : n/a
+ Comments  : The annotated sequence must be viewed with a web
+           : browser to see the location(s) of the recognition site(s).
+
+=cut
+
+#-----------------
+sub annotate_seq {
+#-----------------
+    my($self, $seqObj) = @_;
+
+    my $site = $self->_expanded_string;
+    my $seq = $seqObj->seq;
+
+    $seq =~ s|$site|<b>$site</b>|g;
+    return $seq;
+}    
+
+
+=head1 palindromic
+
+ Title     : palindromic
+ Usage     : $re->palindromic();
+ Purpose   : Determines if the recognition sequence is palindromic
+           : for the current restriction enzyme.
+ Returns   : Boolean
+ Argument  : n/a
+ Throws    : n/a
+ Access    : Public 
+ Comments  : A palindromic site (EcoRI): 5-GAATTC-3
+           :                             3-CTTAAG-5
+
+=cut
+
+#----------------
+sub palindromic {
+#----------------
+    my $self = shift;
+    $self->string eq $self->revcom;
+}
+
+
+
+=head1 is_available
+
+ Title     : is_available
+ Usage     : $re->is_available(<string containing name of enzyme>);
+ Purpose   : Determine if an enzyme is available (to this module).
+           : (see the package lexical %RE).
+ Example   : $re->is_available('EcoRI');
+           : &Bio::Tools::RestrictionEnzyme::is_available($object,'EcoRI');
+ Returns   : Boolean
+ Argument  : String
+ Throws    : n/a
+ Comments  : This method does NOT give information about
+           : commercial availability (yet). 
+           : Enzyme names are CASE SENSITIVE.
+
+See Also   : L<available_list()|available_list>
+
+=cut
+
+#----------------
+sub is_available {
+#----------------
+    my($self,$name) = @_;
+    exists $RE{$name};
+}
+
+#--------------
+sub available {
+#--------------
+    my($self,$name) = @_;
+    $self->warn('available() is deprecated; use is_available() instead');
+    $self->is_available($name);
+}
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function: 
+ Example : 
+ Returns : value of name
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub name{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'name'} = $value;
+    }
+    return $obj->{'name'};
+
+}
+
+=head1 available_list
+
+ Title     : available_list
+ Usage     : $re->available_list([<integer>]);
+ Purpose   : Retrieve a list of currently available enzymes.
+ Example   : @all = $re->available_list();  ## All enzymes
+           : @six_cutters = $re->available_list(6);  ## All 6-cutters
+ Returns   : List of strings
+ Argument  : Integer (optional)
+ Throws    : n/a
+ Comments  : This method may be more appropriate for a REData.pm class.
+
+See Also   : L<is_available()|is_available>
+
+=cut
+
+#-------------------
+sub available_list {
+#-------------------
+    my($self,$size) = @_;
+    $size ||= 'all';
+
+    $size eq 'all' and return @RE_available;
+
+    my(@data, @names);
+    foreach (@RE_available) {
+	@data = split /\s/, $RE{$_};
+	if(length $data[0] == $size) {
+	    push @names, $_;
+	}
+    }
+    @names;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/GenericParameters.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/GenericParameters.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/GenericParameters.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+# $Id: GenericParameters.pm,v 1.6.2.1 2006/10/02 23:10:36 sendu Exp $
+#
+# BioPerl module for wrapping runtime parameters
+#
+# Cared for by Chad Matsalla (bioinformatics1 at dieselwurks dot com)
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::GenericParameters - An object for the parameters used to run programs
+
+=head1 SYNOPSIS
+
+  my $void   = $obj->set_parameter("parameter_name","parameter_value"); 
+  my $value  = $obj->get_parameter("parameter_name");
+
+=head1 DESCRIPTION
+
+This is a basic container to hold the parameters used to run a
+program.  This module may get incorporated into the more generic
+Bio::Tools::Run framework in bioperl-run distribution.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks dot com
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::Run::GenericParameters;
+use strict;
+
+use base qw(Bio::Root::Root Bio::Tools::Run::ParametersI);
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+    return $self;
+}
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : $parameter_object->get_parameter($param_name);
+ Function: Get the value of a parameter named $param_name
+ Returns : A scalar that should be a string
+ Args    : A scalar that should be a string
+
+=cut
+
+sub get_parameter {
+    my ($self,$arg) = @_;
+    return $self->{params}->{$arg};
+}
+
+=head2 set_parameter
+
+ Title   : set_parameter
+ Usage   : $parameter_object->set_parameter($param_name => $param_value);
+ Function: Set the value of a parameter named $param_name to $param_value
+ Returns : Void
+ Args    : A hash containing name=>value pairs
+
+=cut
+
+sub set_parameter {
+    my ($self,$name,$value) = @_;
+    $self->{params}->{$name} = $value;
+}
+
+=head2 available_parameters
+
+ Title   : available_parameters
+ Usage   : my @paramnames = $parameter_object->available_parameters
+ Function: Returns the names of the available parameters
+ Returns : list of available parameter names
+ Args    : none
+
+=cut
+
+sub available_parameters {
+    my $self = shift;
+    return keys %{$self->{params}};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/GenericParameters.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/ParametersI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/ParametersI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/ParametersI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+# $Id: ParametersI.pm,v 1.5.4.1 2006/10/02 23:10:36 sendu Exp $
+#
+# BioPerl module for wrapping runtime parameters
+#
+# Cared for by Chad Matsalla (bioinformatics1 at dieselwurks dot com)
+#
+# Copyright Chad Matsalla
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::ParametersI - A Base object for the parameters used to run programs
+
+=head1 SYNOPSIS
+
+  # do not use this object directly, it provides the following methods
+  # for its subclasses
+
+  my $void   = $obj->set_parameter("parameter_name","parameter_value"); 
+  my $value  = $obj->get_parameter("parameter_name");
+
+=head1 DESCRIPTION
+
+This is a basic container to hold the parameters used to run a program.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Chad Matsalla
+
+Email bioinformatics1 at dieselwurks dot com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Run::ParametersI;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::RootI);
+
+=head2 get_parameter
+
+ Title   : get_parameter
+ Usage   : $parameter_object->get_parameter($param_name);
+ Function: Get the value of a parameter named $param_name
+ Returns : A scalar that should be a string
+ Args    : A scalar that should be a string
+
+=cut
+
+sub get_parameter {
+   my ($self,$arg) = @_;
+     $self->throw_not_implemented;
+}
+
+
+=head2 set_parameter
+
+ Title   : set_parameter
+ Usage   : $parameter_object->set_parameter($param_name => $param_value);
+ Function: Set the value of a parameter named $param_name to $param_value
+ Returns : Void
+ Args    : A hash containing name=>value pairs
+
+=cut
+
+sub set_parameter {
+   my ($self,$name,$value) = @_;
+     $self->throw_not_implemented;
+}
+
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/ParametersI.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+
+ This directory is now mainly in bioperl-run package
+which you should install alongside Bioperl to get over
+50 different runnable options. We might decide to merge
+the two packages again, but the feeling is that bioperl
+"core" is getting a little too big.
+
+ Bioperl-run follows the same release tagging schedule
+as Bioperl. Pick it up from www.bioperl.org and/or CPAN

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/RemoteBlast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/RemoteBlast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/RemoteBlast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,754 @@
+# $Id: RemoteBlast.pm,v 1.38.4.2 2006/10/16 17:08:15 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Run::RemoteBlast
+#
+# FORMERLY Cared for by Jason Stajich, Mat Wiepert
+#
+# Somewhat cared for by Roger Hall, Chris Fields (when they have time)
+#
+# Copyright Jason Stajich, Bioperl
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::RemoteBlast - Object for remote execution of the NCBI Blast
+via HTTP
+
+=head1 SYNOPSIS
+
+  #Remote-blast "factory object" creation and blast-parameter initialization
+
+  use Bio::Tools::Run::RemoteBlast;
+  use strict;
+  my $prog = 'blastp';
+  my $db   = 'swissprot';
+  my $e_val= '1e-10';
+
+  my @params = ( '-prog' => $prog,
+         '-data' => $db,
+         '-expect' => $e_val,
+         '-readmethod' => 'SearchIO' );
+
+  my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
+
+  #change a query paramter
+  $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
+
+  #change a retrieval parameter
+  $Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'DESCRIPTIONS'} = 1000;
+
+  #remove a parameter
+  delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
+
+  #$v is just to turn on and off the messages
+  my $v = 1;
+
+  my $str = Bio::SeqIO->new(-file=>'amino.fa' , -format => 'fasta' );
+
+  while (my $input = $str->next_seq()){
+    #Blast a sequence against a database:
+
+    #Alternatively, you could  pass in a file with many
+    #sequences rather than loop through sequence one at a time
+    #Remove the loop starting 'while (my $input = $str->next_seq())'
+    #and swap the two lines below for an example of that.
+    my $r = $factory->submit_blast($input);
+    #my $r = $factory->submit_blast('amino.fa');
+
+    print STDERR "waiting..." if( $v > 0 );
+    while ( my @rids = $factory->each_rid ) {
+      foreach my $rid ( @rids ) {
+        my $rc = $factory->retrieve_blast($rid);
+        if( !ref($rc) ) {
+          if( $rc < 0 ) {
+            $factory->remove_rid($rid);
+          }
+          print STDERR "." if ( $v > 0 );
+          sleep 5;
+        } else {
+          my $result = $rc->next_result();
+          #save the output
+          my $filename = $result->query_name()."\.out";
+          $factory->save_output($filename);
+          $factory->remove_rid($rid);
+          print "\nQuery Name: ", $result->query_name(), "\n";
+          while ( my $hit = $result->next_hit ) {
+            next unless ( $v > 0);
+            print "\thit name is ", $hit->name, "\n";
+            while( my $hsp = $hit->next_hsp ) {
+              print "\t\tscore is ", $hsp->score, "\n";
+            }
+          }
+        }
+      }
+    }
+  }
+
+  # This example shows how to change a CGI parameter:
+  $Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'BLOSUM25';
+
+  # And this is how to delete a CGI parameter:
+  delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
+
+
+=head1 DESCRIPTION
+
+Class for remote execution of the NCBI Blast via HTTP.
+
+For a description of the many CGI parameters see:
+http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
+
+Various additional options and input formats are available.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.bioperl.org
+
+=head1 AUTHOR 
+
+Please do NOT contact Jason directly about this module.  Please post to
+the bioperl mailing list (L<FEEDBACK>). If you would like to be the
+official maintainer of this module, please volunteer on the list and
+we will make it official in this POD.
+
+First written by Jason Stajich, many others have helped keep it running.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Run::RemoteBlast;
+
+use vars qw($AUTOLOAD $URLBASE %HEADER %RETRIEVALHEADER
+	    $RIDLINE $MODVERSION %PUTPARAMS %GETPARAMS);
+use strict;
+
+use Bio::SeqIO;
+use IO::String;
+use Bio::Tools::BPlite;
+use Bio::SearchIO;
+use LWP;
+use HTTP::Request::Common;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+BEGIN {
+    $MODVERSION = $Bio::Root::Version::VERSION;
+    $URLBASE = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
+
+    # In GET/PUTPARAMS the values are regexes which validate the input.
+    %PUTPARAMS = (
+	'AUTO_FORMAT' 	=> '(Off|(Semi|Full)auto)',	# Off, Semiauto, Fullauto
+	'COMPOSITION_BASED_STATISTICS'	=> '(yes|no)',	# yes, no
+	'DATABASE' 	=>  '.*',
+	'DB_GENETIC_CODE' => '([1-9]|1[1-6]|2(1|2))',   # 1..16,21,22
+	'ENDPOINTS'	=> '(yes|no)',			# yes,no
+	'ENTREZ_QUERY'	=> '.*',
+	'EXPECT'	=> '\d+(\.\d+)?([eE]-\d+)?',	# Positive double
+	'FILTER'	=> '[LRm]',			# L or R or m
+	'GAPCOSTS'	=> '-?\d+(\.\d+)\s+i-?\d+(\.\d+)',
+					# Two space separated float values
+	'GENETIC_CODE'	=> '([1-9]|1[1-6]|2(1|2))',	# 1..16,21,22
+	'HITLIST_SIZE'	=> '\d+',			# Positive integer
+	'I_THRESH'	=> '-?\d+(\.\d+)([eE]-\d+)?',	# float
+	'LAYOUT'	=> '(One|Two)Windows?',		# onewindow, twowindows
+	'LCASE_MASK'	=> '(yes|no)',			# yes, no
+	'MATRIX_NAME'	=> '.*',
+	'NUCL_PENALTY'	=> '-\d+',			# Negative integer
+	'NUCL_REWARD'	=> '-?\d+',			# Integer
+	'OTHER_ADVANCED' => '.*',
+	'PERC_IDENT'	=> '\d\d+',			# Integer, 0-99 inclusive
+	'PHI_PATTERN'	=> '.*',
+	'PROGRAM'	=> 't?blast[pnx]',
+					# tblastp, tblastn, tblastx, blastp, blastn, blastx
+	'QUERY'		=> '.*',
+	'QUERY_FILE'	=> '.*',
+	'QUERY_BELIEVE_DEFLINE'	=> '(yes|no)',		# yes, no
+	'QUERY_FROM'	=> '\d+',			# Positive integer
+	'QUERY_TO'	=> '\d+',			# Positive integer
+	'SEARCHSP_EFF'	=> '\d+',			# Positive integer
+	'SERVICE'	=> '(plain|p[sh]i|(rps|mega)blast)',
+					# plain,psi,phi,rpsblast,megablast
+	'THRESHOLD'	=> '-?\d+',			# Integer
+	'UNGAPPED_ALIGNMENT' => '(yes|no)',		# yes, no
+	'WORD_SIZE'	=> '\d+'			# Positive integer
+					  );
+    %GETPARAMS = (
+   'ALIGNMENTS'	=> '\d+',			# Positive integer
+	'ALIGNMENT_VIEW' =>
+		  '(Pairwise|(Flat)?QueryAnchored(NoIdentities)?|Tabular)',
+	 # Pairwise, QueryAnchored, QueryAnchoredNoIdentities, 
+  	 # FlatQueryAnchored, FlatQueryAnchoredNoIdentities, Tabular
+	 'DESCRIPTIONS'	=> '\d+',			# Positive integer
+	 'ENTREZ_LINKS_NEW_WINDOW' => '(yes|no)',	# yes, no
+	 'EXPECT_LOW'	=> '\d+(\.\d+)?([eE]-\d+)?',	# Positive double
+	 'EXPECT_HIGH'	=> '\d+(\.\d+)?([eE]-\d+)?',	# Positive double
+	 'FORMAT_ENTREZ_QUERY' => '',
+	 'FORMAT_OBJECT'	=> 
+    '(Alignment|Neighbors|PSSM|SearchInfo|TaxBlast(Parent|MultiFrame)?)',
+					# Alignment, Neighbors, PSSM,  SearchInfo 
+					# TaxBlast, TaxblastParent, TaxBlastMultiFrame 
+	 'FORMAT_TYPE'	=> '((HT|X)ML|ASN\.1|Text)',
+					# HTML, Text, ASN.1, XML
+	 'NCBI_GI'	=> '(yes|no)',			# yes, no
+	 'RID' 		=>  '.*',
+	 'RESULTS_FILE' 	=>  '(yes|no)',			# yes, no
+	 'SERVICE' 	=>  '(plain|p[sh]i|(rps|mega)blast)',
+					# plain,psi,phi,rpsblast,megablast
+	 'SHOW_OVERVIEW' =>  '(yes|no)'			# yes, no
+					  );
+
+    # Default values go in here for PUT
+    %HEADER = (
+	       'CMD'                          => 'Put',
+	       'FORMAT_OBJECT'                => 'Alignment',
+	       'COMPOSITION_BASED_STATISTICS' => 'off', 
+	       'DATABASE'	    	      => 'nr',
+	       'EXPECT'			      => '1e-3', 
+	       'FILTER'			      => 'L', 
+	       'PROGRAM'		      => 'blastp', 
+	       'SERVICE'		      => 'plain' 
+	       );
+    
+    # Default values go in here for GET
+    %RETRIEVALHEADER = (
+			'CMD'            => 'Get',
+			'ALIGNMENTS'	 => '50',
+			'ALIGNMENT_VIEW' => 'Pairwise',
+			'DESCRIPTIONS'	 => '100',
+			'FORMAT_TYPE'	 => 'Text',
+			);
+    
+    $RIDLINE = 'RID\s+=\s+(\S+)';
+}
+
+sub new {
+	my ($caller, @args) = @_;
+	# chained new
+	my $self = $caller->SUPER::new(@args);
+	# so that tempfiles are cleaned up
+	$self->_initialize_io();
+	my ($prog, $data, $readmethod, $url_base) =
+        $self->_rearrange([qw(PROG DATA READMETHOD URL_BASE)],
+					 @args);
+	# Use these two parameters for backward-compatibility. 
+	# Overridden by PROGRAM and DATABASE if supplied.
+	$self->submit_parameter('PROGRAM',$prog) if $prog;
+	$self->submit_parameter('DATABASE',$data) if $data;
+
+	$readmethod = 'SearchIO' unless defined $readmethod;
+	$self->readmethod($readmethod);
+
+	# Now read the rest of the parameters and set them all
+
+	# PUT parameters first
+	my @putValues = $self->_rearrange([keys %PUTPARAMS], at args);
+	my %putNames;
+	@putNames{keys %PUTPARAMS} = @putValues;
+	foreach my $putName (keys %putNames) {
+		$self->submit_parameter($putName,$putNames{$putName});
+	}
+	# GET parameters second
+	my @getValues = $self->_rearrange([keys %GETPARAMS], at args);
+	my %getNames;
+	@getNames{keys %GETPARAMS} = @getValues;
+	foreach my $getName (keys %getNames) {
+		$self->retrieve_parameter($getName,$getNames{$getName});
+	}
+        # private variable to keep track of total rids
+    $self->{'_total_rids'} = 0;
+    $url_base ||= $URLBASE;  # default to regular NCBI BLAST URL
+    $self->set_url_base($url_base);
+	return $self;
+}
+
+=head2 retrieve_parameter
+
+ Title   : retrieve_parameter
+ Usage   : my $db = $self->retrieve_parameter
+ Function: Get/Set the named parameter for the retrieve_blast operation.
+ Returns : string
+ Args    : $name : name of GET parameter
+	 $val : optional value to set the parameter to
+
+=cut
+
+sub retrieve_parameter {
+	my ($self, $name, $val) = @_;
+	$name = uc($name);
+	$self->throw($name." is not a valid GET parameter.") unless
+	  exists $GETPARAMS{$name};
+	if (defined $val) {
+    	my $regex = $GETPARAMS{$name};
+    	$val =~ m/^$regex$/i or 
+		  $self->throw("Value ".$val." for GET parameter ".$name." does not match expression ".$regex.". Rejecting.");
+		$RETRIEVALHEADER{$name} = $val;
+	}
+	return $RETRIEVALHEADER{$name};
+}
+
+=head2 submit_parameter
+
+ Title   : submit_parameter
+ Usage   : my $db = $self->submit_parameter
+ Function: Get/Set the named parameter for the submit_blast operation.
+ Returns : string
+ Args    : $name : name of PUT parameter
+    $val : optional value to set the parameter to
+
+=cut
+
+sub submit_parameter {
+    my ($self, $name, $val) = @_;
+    $name = uc($name);
+    $self->throw($name." is not a valid PUT parameter.") unless
+	exists $PUTPARAMS{$name};
+    if (defined $val) {
+    	my $regex = $PUTPARAMS{$name};
+    	$val =~ m/^$regex$/i or 
+		$self->throw("Value ".$val." for PUT parameter ".$name." does not match expression ".$regex.". Rejecting.");
+	$HEADER{$name} = $val;
+    }
+    return $HEADER{$name};
+}
+
+=head2 header
+
+ Title   : header
+ Usage   : my $header = $self->header
+ Function: Get HTTP header for blast query
+ Returns : string
+ Args    : none
+
+=cut
+
+sub header {
+    my ($self) = @_;
+    return %HEADER;
+}
+
+=head2 readmethod
+
+ Title   : readmethod
+ Usage   : my $readmethod = $self->readmethod
+ Function: Get/Set the method to read the blast report
+ Returns : string
+ Args    : string [ Blast, BPlite, blasttable, xml ]
+
+=cut
+
+sub readmethod {
+    my ($self, $val) = @_;
+    if( defined $val ) {
+	$self->{'_readmethod'} = $val;
+    }
+    return $self->{'_readmethod'};
+}
+
+
+=head2 program
+
+ Title   : program
+ Usage   : my $prog = $self->program
+ Function: Get/Set the program to run. Retained for backwards-compatibility.
+ Returns : string
+ Args    : string [ blastp, blastn, blastx, tblastn, tblastx ]
+
+=cut
+
+sub program {
+    my ($self, $val) = @_;
+    return $self->submit_parameter('PROGRAM',$val);
+}
+
+
+=head2 database
+
+ Title   : database
+ Usage   : my $db = $self->database
+ Function: Get/Set the database to search. Retained for backwards-compatibility.
+ Returns : string
+ Args    : string [ swissprot, nr, nt, etc... ]
+
+=cut
+
+sub database {
+    my ($self, $val) = @_;
+    return $self->submit_parameter('DATABASE',$val);
+}
+
+
+=head2 expect
+
+ Title   : expect
+ Usage   : my $expect = $self->expect
+ Function: Get/Set the E value cutoff. Retained for backwards-compatibility.
+ Returns : string
+ Args    : string [ '1e-4' ]
+
+=cut
+
+sub expect {
+    my ($self, $val) = @_;
+    return $self->submit_parameter('EXPECT',$val);
+}
+
+=head2 ua
+
+ Title   : ua
+ Usage   : my $ua = $self->ua or
+           $self->ua($ua)
+ Function: Get/Set a LWP::UserAgent for use
+ Returns : reference to LWP::UserAgent Object
+ Args    : none
+ Comments: Will create a UserAgent if none has been requested before.
+
+=cut
+
+sub ua {
+    my ($self, $value) = @_;    
+    if( ! defined $self->{'_ua'} ) {
+	$self->{'_ua'} = LWP::UserAgent->new(env_proxy => 1, parse_head => 0);
+	my $nm = ref($self);
+	$nm =~ s/::/_/g;
+	$self->{'_ua'}->agent("bioperl-$nm/$MODVERSION");
+    }
+    return $self->{'_ua'};
+}
+
+=head2 proxy
+
+ Title   : proxy
+ Usage   : $httpproxy = $db->proxy('http')  or
+           $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args    : $protocol : an array ref of the protocol(s) to set/get
+           $proxyurl : url of the proxy to use for the specified protocol
+
+=cut
+
+sub proxy {
+    my ($self,$protocol,$proxy) = @_;
+    return if ( !defined $self->ua || !defined $protocol
+		      || !defined $proxy );
+    return $self->ua->proxy($protocol,$proxy);
+}
+
+sub add_rid {
+    my ($self, @vals) = @_;
+    foreach ( @vals ) {
+	$self->{'_rids'}->{$_} = $self->{'_total_rids'};
+        $self->{'_total_rids'}++; 
+    }
+    return scalar keys %{$self->{'_rids'}};
+}
+
+sub remove_rid {
+    my ($self, @vals) = @_;
+    foreach ( @vals ) {
+	delete $self->{'_rids'}->{$_};
+    }
+    return scalar keys %{$self->{'_rids'}};
+}
+
+sub each_rid {
+    my ($self) = @_;
+    # sort on key value, a little tricky...
+    my @sort_rids = sort {$self->{'_rids'}->{$a} <=> $self->{'_rids'}->{$b}} keys %{$self->{'_rids'}};
+    return @sort_rids;
+}
+
+=head2 submit_blast
+
+ Title   : submit_blast
+ Usage   : $self->submit_blast([$seq1,$seq2]);
+ Function: Submit blast jobs to ncbi blast queue on sequence(s)
+ Returns : Blast report object as defined by $self->readmethod
+ Args    : input can be:
+           * sequence object
+           * array ref of sequence objects
+           * filename of file containing fasta formatted sequences
+
+=cut
+
+sub submit_blast {
+    my ($self, $input) = @_;
+    my @seqs = $self->_load_input($input);
+    my $url_base = $self->get_url_base;
+    return 0 unless ( @seqs );
+    my $tcount = 0;
+    my %header = $self->header;
+    foreach my $seq ( @seqs ) {
+	#If query has a fasta header, the output has the query line.
+	$header{'QUERY'} = ">".(defined $seq->display_id() ? $seq->display_id() : "").
+		" ".(defined $seq->desc() ? $seq->desc() : "")."\n".$seq->seq();
+	my $request = POST $url_base, [%header];
+	$self->warn($request->as_string) if ( $self->verbose > 0);
+	my $response = $self->ua->request( $request);
+
+	if( $response->is_success ) {
+	    my @subdata = split(/\n/, $response->content );
+	    my $count = 0;
+	    foreach ( @subdata ) {
+			if( /$RIDLINE/ ) {
+		    	$count++;
+		    	print STDERR $_ if( $self->verbose > 0);
+		    	$self->add_rid($1);		
+		    	last;
+			}	
+	    }
+	    if( $count == 0 ) {
+		$self->warn("req was ". $request->as_string() . "\n");
+		$self->warn(join('', @subdata));
+	    }    	
+	    $tcount += $count;
+	} else {
+	    # should try and be a little more verbose here
+	    $self->warn("req was ". $request->as_string() . "\n" .
+			$response->error_as_HTML);
+	    $tcount = -1;
+		}
+    }
+    return $tcount;
+}
+
+=head2 retrieve_blast
+
+ Title   : retrieve_blast
+ Usage   : my $blastreport = $blastfactory->retrieve_blast($rid);
+ Function: Attempts to retrieve a blast report from remote blast queue
+ Returns : -1 on error,
+           0 on 'job not finished',
+           Bio::SearchIO object
+ Args    : Remote Blast ID (RID)
+
+=cut
+
+sub retrieve_blast {
+    my($self, $rid) = @_;
+    my ($fh,$tempfile) = $self->tempfile();
+    close $fh;			#explicit close
+    my $url_base = $self->get_url_base;
+    my %hdr = %RETRIEVALHEADER;
+    $hdr{'RID'} = $rid;
+    my $req = POST $url_base, [%hdr];
+    $self->debug("retrieve request is " . $req->as_string());
+    my $response = $self->ua->request($req, $tempfile);
+    if( $response->is_success ) {
+    	if( $self->verbose > 0 ) {
+	    #print content of reply if verbose > 1
+            open(my $TMP, $tempfile) or $self->throw("cannot open $tempfile");
+            while(<$TMP>) { print $_; }
+    	}   
+        ## if proper reply 
+        open(my $TMP, $tempfile) || $self->throw("Error opening $tempfile");
+        my $waiting = 1;
+        my $s = 0;
+        my $got_content = 0;
+        while(<$TMP>) {
+            if (/./) {
+                $got_content = 1;
+            }
+            if( /<\?xml version=/ ) { # xml time
+                $waiting = 0;
+                last;
+            }
+            if( /QBlastInfoBegin/i ) {
+                $s = 1;
+            } elsif( $s ) {
+                if( /Status=(WAITING|ERROR|READY)/i ) {
+                    if( $1 eq 'WAITING' ) {
+                        $waiting = 1;
+                    } elsif( $1 eq 'ERROR' ) {
+                        close($TMP);
+                        open(my $ERR, "<$tempfile") or $self->throw("cannot open file $tempfile");
+                        $self->warn(join("", <$ERR>));
+                        return -1;
+                    } elsif( $1 eq 'READY' ) {
+                        $waiting = 0;
+                        last;
+                    } else {
+                        $self->warn("Unknown status $1:\n");
+                        last;
+                    }
+                }
+            }
+        }
+        close($TMP);
+        if( ! $waiting ) {
+            my $blastobj;
+            my $mthd = $self->readmethod;
+            if( $mthd =~ /BPlite/i ) {
+                $blastobj = new Bio::Tools::BPlite(-file => $tempfile);
+            } elsif( $mthd =~ /blasttable/i ) {
+            # pre-process
+            my ($fh2,$tempfile2) = $self->tempfile();
+            open(my $TMP,$tempfile) || $self->throw($!);
+            my $s = 0;
+            while(<$TMP>) {
+                if(/\<PRE\>/i ) {
+                $s = 1;
+                } elsif( /\<\/PRE\>/i ) {
+                $s = 0;
+                last;
+                } elsif( $s ) {
+                print $fh2 $_;
+                }
+            } 
+            close($fh2);
+            $blastobj = new Bio::SearchIO( -file => $tempfile2,
+                               -format => 'blasttable');
+            } elsif( $mthd =~ /xml/ ) {
+            $blastobj = new Bio::SearchIO( -file => $tempfile,
+                               -format => 'blastxml');
+            } else {
+            $blastobj = new Bio::SearchIO( -file => $tempfile,
+                               -format => 'blast');
+            } 
+            
+            ## store filename in object ##
+            $self->file($tempfile);
+            return $blastobj;
+        } elsif (!$got_content) {
+            # server returned no content, can't be good
+            $self->warn("Server failed to return any data");
+            return -1
+        } else {		# still working
+            return 0;
+        }
+	
+    } else {
+	$self->warn($response->error_as_HTML);
+	return -1;
+    }
+}
+
+=head2 save_output
+
+ Title   : saveoutput
+ Usage   : my $saveoutput = $self->save_output($filename)
+ Function: Method to save the blast report
+ Returns : 1 (throws error otherwise)
+ Args    : string [rid, filename]
+
+=cut
+
+sub save_output {
+	my ($self, $filename) = @_;
+	if( ! defined $filename ) {
+		$self->throw("Can't save blast output.  You must specify a filename to save to.");
+	}
+	my $blastfile = $self->file;
+	#open temp file and output file, have to filter out some HTML
+	open(my $TMP, $blastfile) or $self->throw("cannot open $blastfile");
+
+	open(my $SAVEOUT, ">", $filename) or $self->throw("cannot open $filename");
+	my $seentop = 0;
+	while(<$TMP>) {
+		next if (/<pre>/);
+		if(/^(?:[T]?BLAST[NPX])\s*.+$/i ||
+           /^RPS-BLAST\s*.+$/i ||
+           /<\?xml\sversion=/ ||
+           /^#\s+(?:[T]?BLAST[NPX])\s*.+$/) {
+			$seentop=1;
+		} 
+        next if !$seentop;
+		if( $seentop ) {
+			print $SAVEOUT $_;
+		}
+	}
+	return 1;
+}
+
+sub _load_input {
+	my ($self, $input) = @_;
+
+	if( ! defined $input ) {
+		$self->throw("Calling remote blast with no input");
+	}
+	my @seqs;
+	if( ! ref $input ) {
+		if( -e $input ) {
+			my $seqio = new Bio::SeqIO(-format => 'fasta',
+												-file => $input);
+			while( my $seq = $seqio->next_seq ) {
+				push @seqs, $seq;
+			}
+		} else {
+			$self->throw("Input $input was not a valid filename");
+		}
+	} elsif( ref($input) =~ /ARRAY/i ) {
+		foreach ( @$input ) {
+			if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
+				push @seqs, $_;
+			} else {
+				$self->warn("Trying to add a " . ref($_) .
+								" but expected a Bio::PrimarySeqI");
+			}
+		}
+		if( ! @seqs) {
+			$self->throw("Did not pass in valid input -- no sequence objects found");
+		}
+	} elsif( $input->isa('Bio::PrimarySeqI') ) {
+		push @seqs, $input;
+	}
+	return @seqs;
+}
+
+1;
+
+=head2 set_url_base
+
+ Title   : set_url_base
+ Usage   : $self->set_url_base($url)
+ Function: Method to override the default NCBI BLAST database
+ Returns : None
+ Args    : string (database url like
+ NOTE    : This is highly experimental; we cannot maintain support on
+           databases other than the default NCBI database at this time
+
+=cut
+
+sub set_url_base {
+    my $self = shift;
+    $self->{'_urlbase'} = shift if @_;
+}
+
+=head2 get_url_base
+
+ Title   : get_url_base
+ Usage   : my $url = $self->set_url_base
+ Function: Get the current URL for BLAST database searching
+ Returns : string (URL used for remote blast searches)
+ Args    : None
+
+=cut
+
+sub get_url_base {
+    my $self = shift;
+    return $self->{'_urlbase'};
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/StandAloneBlast.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/StandAloneBlast.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/StandAloneBlast.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1132 @@
+# $Id: StandAloneBlast.pm,v 1.63.4.1 2006/10/02 23:10:36 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Run::StandAloneBlast
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::StandAloneBlast - Object for the local execution 
+of the NCBI BLAST program suite (blastall, blastpgp, bl2seq). 
+There is experimental support for WU-Blast and NCBI rpsblast.
+
+=head1 SYNOPSIS
+
+ # Local-blast "factory object" creation and blast-parameter
+ # initialization:
+
+ @params = (-database => 'swissprot',-outfile => 'blast1.out');
+ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+
+ # Blast a sequence against a database:
+
+ $str = Bio::SeqIO->new(-file=>'t/amino.fa', -format => 'Fasta');
+ $input = $str->next_seq();
+ $input2 = $str->next_seq();
+ $blast_report = $factory->blastall($input);
+
+ # Run an iterated Blast (psiblast) of a sequence against a database:
+
+ $factory->j(3);    # 'j' is blast parameter for # of iterations
+ $factory->outfile('psiblast1.out');
+ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+ $blast_report = $factory->blastpgp($input);
+
+ # Use blast to align 2 sequences against each other:
+
+ $factory = Bio::Tools::Run::StandAloneBlast->new(-outfile => 'bl2seq.out');
+ $factory->bl2seq($input, $input2);
+
+ # Experimental support for WU-Blast 2.0
+
+ my $factory = Bio::Tools::Run::StandAloneBlast->new(-program =>"wublastp",
+                                                     -database =>"swissprot",
+                                                     -e => 1e-20); 
+ my $blast_report = $factory->wublast($seq);
+
+ # Experimental support for NCBI rpsblast
+
+ my $factory = Bio::Tools::Run::StandAloneBlast->new(-db => 'CDD/Cog', 
+                                                     -expect => 0.001);
+ $factory->F('T'); # turn on SEG filtering of query sequence
+ my $blast_report = $factory->rpsblast($seq);
+
+ # Various additional options and input formats are available,
+ # see the DESCRIPTION section for details.
+
+=head1 DESCRIPTION
+
+This DESCRIPTION only documents Bio::Tools::Run::StandAloneBlast: - a
+Bioperl object for running the NCBI standAlone BLAST package.  Blast,
+itself, is a large & complex program - for more information regarding
+BLAST, please see the BLAST documentation which accompanies the BLAST
+distribution. BLAST is available from ftp://ncbi.nlm.nih.gov/blast/.
+
+A source of confusion in documenting a BLAST interface is that the
+term "program" is used in - at least - three different ways in the
+BLAST documentation.  In this DESCRIPTION, "program" will refer to the
+BLAST routine set by the BLAST C<-p> parameter that can be set to blastn,
+blastp, tblastx etc.  We will use the term Blast "executable" to refer
+to the various different executable files that may be called - ie
+blastall, blastpgp or bl2seq.  In addition, there are several BLAST
+capabilities, which are also referred to as "programs", and are
+implemented by using specific combinations of BLAST executables,
+programs and parameters.  They will be referred by their specific
+names - eg PSIBLAST and PHIBLAST.
+
+Before running StandAloneBlast it is necessary: to install BLAST 
+on your system, to edit set the environmental variable $BLASTDIR 
+or your $PATH variable to point to the BLAST directory, and to 
+ensure that users have execute privileges for the BLAST program.  
+
+If the databases which will be searched by BLAST are located in the 
+data subdirectory of the blast program directory (the default 
+installation location), StandAloneBlast will find them; however, 
+if the database files are located in any other location, environmental 
+variable $BLASTDATADIR will need to be set to point to that directory.
+
+The use of the StandAloneBlast module is as follows: Initially, a
+local blast "factory object" is created. The constructor may be passed
+an optional array of (non-default) parameters to be used by the
+factory, eg:
+
+ @params = (-program => 'blastn', -database => 'ecoli.nt');
+ $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+
+Any parameters not explicitly set will remain as the defaults of the
+BLAST executable.  Note each BLAST executable has somewhat different
+parameters and options.  See the BLAST Documentation for a description
+or run the BLAST executable from the command line followed solely with
+a "-" to see a list of options and default values for that executable;
+eg E<gt>blastall -.
+
+BLAST parameters can be changed and/or examined at any time after the
+factory has been created.  The program checks that any
+parameter/switch being set/read is valid.  Except where specifically
+noted, StandAloneBlast uses the same single-letter, case-sensitive
+parameter names as the actual blast program.  Currently no checks are
+included to verify that parameters are of the proper type (e.g. string
+or numeric) or that their values are within the proper range.
+
+As an example, to change the value of the Blast parameter 'e' ('e' is
+the parameter for expectation-value cutoff) 
+
+  $expectvalue = 0.01;
+  $factory->e($expectvalue);
+
+Note that for improved script readibility one can modify the name of
+the BLAST parameters as desired as long as the initial letter (and
+case) of the parameter are preserved, e.g.:
+
+  $factory->expectvalue($expectvalue);
+
+Unfortunately, some of the BLAST parameters are not the single 
+letter one might expect (eg "iteration round" in blastpgp is 'j'). 
+Again one can check by using, for example:
+
+  > blastpgp - .
+
+Once the factory has been created and the appropriate parameters set,
+one can call one of the supported blast executables.  The input
+sequence(s) to these executables may be fasta file(s) as described in
+the BLAST documentation.
+
+  $inputfilename = 't/testquery.fa';
+  $blast_report = $factory->blastall($inputfilename);
+
+In addition, sequence input may be in the form of either a Bio::Seq
+object or or an array of Bio::Seq objects, e.g.:
+
+  $input = Bio::Seq->new(-id => "test query",
+                         -seq => "ACTACCCTTTAAATCAGTGGGGG");
+  $blast_report = $factory->blastall($input);
+
+For blastall and non-psiblast blastpgp runs, report object is either a
+L<Bio::Tools::BPlite> or L<Bio::SearchIO> object, selected by the user 
+with the parameter _READMETHOD.  The leading underscore is needed to
+distinguish this option from options which are passed to the BLAST
+executable. The default parser is Bio::SearchIO::blast.  If BPlite
+method is selected, L<Bio::Tools::BPlite> objects will be returned for
+standard blast and L<Bio::Tools::BPpsilite> for a multiple-iteration
+blasts, and a L<Bio::Tools::BPbl2seq> for bl2seq.  In any case, the "raw"
+blast report is also available. The filename is set by the in the
+'outfile' parameter and has the default value of "blastreport.out".
+The BPlite method is only provided to support legacy code since
+the BPlite modules are no longer maintained - do not use BPlite
+since these modules will be removed eventually.
+
+For psiblast execution in the BLAST "jumpstart" mode, the program must
+be passed (in addition to the query sequence itself) an alignment
+containing the query sequence (in the form of a SimpleAlign object) as
+well as a "mask" specifying at what residues position-specific scoring
+matrices (PSSMs) are to used and at what residues default scoring
+matrices (eg BLOSUM) are to be used. See psiblast documentation for
+more details.  The mask itself is a string of 0's and 1's which is the
+same length as each sequence in the alignment and has a "1" at
+locations where (PSSMs) are to be used and a "0" at all other
+locations. So for example:
+
+  $str = Bio::AlignIO->new(-file => "cysprot.msf", 
+                           -format => 'msf');
+  $aln = $str->next_aln();
+  $len = $aln->length_aln();
+  $mask = '1' x $len;
+  # simple case where PSSM's to be used at all residues
+  $report = $factory->blastpgp("cysprot1.fa", $aln, $mask);
+
+For bl2seq execution, StandAloneBlast.pm can be combined with
+AlignIO.pm to directly produce a SimpleAlign object from the alignment
+of the two sequences produced by bl2seq as in:
+
+  # Get 2 sequences
+  $str = Bio::SeqIO->new(-file=>'t/amino.fa' , -format => 'Fasta');
+  my $seq3 = $str->next_seq();
+  my $seq4 = $str->next_seq();
+
+  # Run bl2seq on them
+  $factory = Bio::Tools::Run::StandAloneBlast->new(-program => 'blastp',
+                                                   -outfile => 'bl2seq.out');
+  my $bl2seq_report = $factory->bl2seq($seq3, $seq4);
+
+  # Use AlignIO.pm to create a SimpleAlign object from the bl2seq report
+  $str = Bio::AlignIO->new(-file=> 'bl2seq.out',-format => 'bl2seq');
+  $aln = $str->next_aln();
+
+For more examples of syntax and use of Blast.pm, the user is
+encouraged to run the scripts standaloneblast.pl in the bioperl
+examples/tools directory and StandAloneBlast.t in the bioperl t/ 
+directory.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via 
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR -  Peter Schattner
+
+Email schattner at alum.mit.edu
+
+=head1 MAINTAINER - Torsten Seemann
+
+Email torsten at infotech.monash.edu.au
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Run::StandAloneBlast;
+
+use vars qw($AUTOLOAD $PROGRAMDIR  $DATADIR $BLASTTYPE
+	    @BLASTALL_PARAMS @BLASTPGP_PARAMS @WUBLAST_PARAMS 
+		 @WUBLAST_SWITCH @RPSBLAST_PARAMS @BL2SEQ_PARAMS 
+       @OTHER_PARAMS %OK_FIELD $DEFAULTREADMETHOD
+	    );
+		 
+use strict;
+
+use Bio::Root::IO;
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Tools::BPbl2seq;
+use Bio::Tools::BPpsilite;
+use Bio::SearchIO;
+use File::Spec;
+
+use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase Bio::Factory::ApplicationFactoryI);
+
+BEGIN {
+        @BLASTALL_PARAMS = qw(A B C D E F G I J K L M O P Q R S T U V W X Y Z a b d e f g i l m n o p q r s t v w y z);
+        @BLASTPGP_PARAMS = qw(A B C E F G H I J K L M N O P Q R S T U W X Y Z a b c d e f h i j k l m o p q s t u v y z);
+        @RPSBLAST_PARAMS = qw(F I J L N O P T U V X Y Z a b d e i l m o p v y z);
+        @BL2SEQ_PARAMS = qw(A D E F G I J M S T U V W X Y a d e g i j m o p q r t);
+	$DEFAULTREADMETHOD = 'BLAST';
+	$BLASTTYPE = 'ncbi';
+	@WUBLAST_PARAMS = 
+	  qw( E S E2 S2 W T X M Y Z L K H V  B
+     matrix Q R filter wordmask filter maskextra 
+     hitdist wink ctxfactor gapE gapS gapE2 gapS2 gapW gapX olf golf 
+     olmax golmax gapdecayrate topcomboN topcomboE sumstatsmethod 
+     hspsepqmax hspsepsmax gapsepqmax gapsepsmax altscore hspmax gspmax 
+     qoffset nwstart nwlen qrecmin qrecmax 
+     dbrecmin dbrecmax vdbdescmax dbchunks sort_by_pvalue 
+     cpus putenv getenv progress o database input);
+	@WUBLAST_SWITCH = 
+     qw(kap sump poissonp lcfilter lcmask echofilter stats nogap 
+     gapall pingpong 
+     nosegs postsw span2 span1 span prune consistency 
+     links ucdb gi noseqs qtype qres sort_by_pvalue sort_by_count 
+     sort_by_highscore sort_by_totalscore sort_by_subjectlength
+     mmio nonnegok novalidctxok shortqueryok notes warnings 
+     errors endputenv 
+     getenv endgetenv abortonerror abortonfatal); 
+
+	# Non BLAST parameters start with underscore to differentiate them
+	# from BLAST parameters
+	@OTHER_PARAMS = qw(_READMETHOD);
+
+	# my @other_switches = qw(QUIET);
+
+	# Authorize attribute fields
+	foreach my $attr (@BLASTALL_PARAMS, @BLASTPGP_PARAMS, @RPSBLAST_PARAMS,
+							@BL2SEQ_PARAMS, @OTHER_PARAMS , at WUBLAST_PARAMS, 
+                     @WUBLAST_SWITCH )
+     { $OK_FIELD{$attr}++; }
+
+        # You will need to enable Blast to find the Blast program.  
+        # This can be done in at least two different ways:
+        #  1. define an environmental variable blastDIR:
+        #	export BLASTDIR=/home/peter/blast   or
+        #  2. include a definition of an environmental variable 
+        # BLASTDIR in every script that will use StandAloneBlast.pm.
+
+	$PROGRAMDIR = $BLASTTYPE eq 'ncbi' ? $ENV{'BLASTDIR'}: $ENV{'WUBLASTDIR'};
+
+	# If local BLAST databases are not stored in the standard
+	# /data directory, the variable BLASTDATADIR will need to be 
+	# set explicitly 
+	$DATADIR =  $ENV{'BLASTDATADIR'} || $ENV{'BLASTDB'} || '';
+}
+
+
+=head1 BLAST parameters
+
+Essentially all BLAST parameter can be set via StandAloneBlast.pm.
+Some of the most commonly used parameters are listed below.  All
+parameters have defaults and are optional (I think.)  For a complete
+listing of settable parameters, run the relevant executable BLAST
+program with the option "-" as in blastall -
+
+=head2 Blastall
+
+  -p  Program Name [String]
+        Input should be one of "blastp", "blastn", "blastx", 
+        "tblastn", or "tblastx".
+  -d  Database [String] default = nr
+        The database specified must first be formatted with formatdb.
+        Multiple database names (bracketed by quotations) will be accepted.
+        An example would be -d "nr est"
+   -i  Query File [File In]   Set by StandAloneBlast.pm from script.
+    default = stdin. The query should be in FASTA format.  If multiple FASTA entries are in the input
+        file, all queries will be searched.
+  -e  Expectation value (E) [Real] default = 10.0
+  -o  BLAST report Output File [File Out]  Optional,
+	default = ./blastreport.out ; set by StandAloneBlast.pm		
+  -S  Query strands to search against database (for blast[nx], and tblastx).  3 is both, 1 is top, 2 is bottom [Integer]
+	default = 3
+
+=head2 Blastpgp (including Psiblast)
+
+  -j   is the maximum number of rounds (default 1; i.e., regular BLAST)
+  -h   is the e-value threshold for including sequences in the
+	score matrix model (default 0.001)
+  -c   is the "constant" used in the pseudocount formula specified in the paper (default 10)
+  -B  Multiple alignment file for PSI-BLAST "jump start mode"  Optional
+  -Q  Output File for PSI-BLAST Matrix in ASCII [File Out]  Optional
+
+=head2 E<lt>rpsblastE<gt>
+
+  -d  Database [String] default = (none - you must specify a database)
+        The database specified must first be formatted with formatdb.
+        Multiple database names (bracketed by quotations) will be accepted.
+        An example would be -d "Cog Smart"
+   -i  Query File [File In]   Set by StandAloneBlast.pm from script.
+    default = stdin. The query should be in FASTA format.  If multiple FASTA entries are in the input
+        file, all queries will be searched.
+  -e  Expectation value (E) [Real] default = 10.0
+  -o  BLAST report Output File [File Out]  Optional,
+	default = ./blastreport.out ; set by StandAloneBlast.pm		
+
+=head2 Bl2seq
+
+  -i  First sequence [File In]
+  -j  Second sequence [File In]
+  -p  Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
+    default = blastp
+  -o  alignment output file [File Out] default = stdout
+  -e  Expectation value (E) [Real]  default = 10.0
+  -S  Query strands to search against database (blastn only).  3 is both, 1 is top, 2 is bottom [Integer]
+    default = 3
+
+=head2 WU-Blast
+
+  -p Program Name [String] 
+        Input should be one of "wublastp", "wublastn", "wublastx", 
+        "wutblastn", or "wutblastx".
+  -d  Database [String] default = nr
+        The database specified must first be formatted with xdformat.
+  -i  Query File [File In]   Set by StandAloneBlast.pm from script.
+    default = stdin. The query should be in FASTA format.  If multiple FASTA entries are in the input
+        file, all queries will be searched.
+  -E  Expectation value (E) [Real] default = 10.0
+  -o  BLAST report Output File [File Out]  Optional,
+	default = ./blastreport.out ; set by StandAloneBlast.pm		
+
+=cut
+
+sub new {
+    my ($caller, @args) = @_;
+    # chained new
+    my $self = $caller->SUPER::new(@args);
+
+    # to facilitiate tempfile cleanup
+    my ($tfh,$tempfile) = $self->io->tempfile();
+    close($tfh); # we don't want the filehandle, just a temporary name
+    $self->o($tempfile) unless $self->o;
+    $self->_READMETHOD($DEFAULTREADMETHOD);
+    while (@args)  {
+	my $attr =   shift @args;
+    	my $value =  shift @args;
+    	next if( $attr eq '-verbose');
+    	# we allow both 'attr' and '-attr' options on the new() call
+    	$attr =~ s/^-//;
+    	# the workaround to deal with initializing
+	if($attr =~/^\s*program\s*$|^p$/){
+	    if($value =~/^wu*/){
+		$BLASTTYPE="wublast";
+	    }
+	    $attr = 'p';
+	}
+	if($attr =~/outfile/){
+	    $attr = 'o';
+	}
+
+	$self->$attr($value);
+    }
+    return $self;
+}
+
+=head2 quiet
+
+ Title   : quiet
+ Usage   : $obj->quiet($newval)
+ Function: 
+ Example : 
+ Returns : value of quiet (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub quiet{
+    my $self = shift;
+    return $self->{'_quiet'} = shift if @_;
+    return $self->{'_quiet'};
+}
+
+sub AUTOLOAD {
+    my $self = shift;
+    my $attr = $AUTOLOAD;
+    $attr =~ s/.*:://;    
+    my $attr_letter = $BLASTTYPE eq 'ncbi' ? substr($attr, 0, 1) : $attr;
+
+    # actual key is first letter of $attr unless first attribute
+    # letter is underscore (as in _READMETHOD), the $attr is a BLAST
+    # parameter and should be truncated to its first letter only
+    $attr = ($attr_letter eq '_') ? $attr : $attr_letter;
+    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
+    $self->{$attr_letter} = shift if @_;
+    return $self->{$attr_letter};
+}
+
+=head1 Methods
+
+=head2 executable
+
+ Title   : executable
+ Usage   : my $exe = $blastfactory->executable('blastall');
+ Function: Finds the full path to the 'codeml' executable
+ Returns : string representing the full path to the exe
+ Args    : [optional] name of executable to set path to 
+           [optional] boolean flag whether or not warn when exe is not found
+
+
+=cut
+
+sub executable {
+   my ($self, $exename, $exe,$warn) = @_;
+   $exename = 'blastall' unless (defined $exename || $BLASTTYPE ne 'ncbi');
+
+   if( defined $exe && -x $exe ) {
+     $self->{'_pathtoexe'}->{$exename} = $exe;
+   }
+   unless( defined $self->{'_pathtoexe'}->{$exename} ) {
+       my $f = $self->program_path($exename);	    
+       $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
+        
+       #  This is how I meant to split up these conditionals --jason
+       # if exe is null we will execute this (handle the case where
+       # PROGRAMDIR pointed to something invalid)
+       unless( $exe )  {  # we didn't find it in that last conditional
+	   if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
+	       $self->{'_pathtoexe'}->{$exename} = $exe;
+	   } else { 
+	       $self->warn("Cannot find executable for $exename") if $warn;
+	       $self->{'_pathtoexe'}->{$exename} = undef;
+	   }
+       }
+   }
+   return $self->{'_pathtoexe'}->{$exename};
+}
+
+
+=head2 program_path
+
+ Title   : program_path
+ Usage   : my $path = $factory->program_path();
+ Function: Builds path for executable 
+ Returns : string representing the full path to the exe
+ Args    : none
+
+=cut
+
+sub program_path {
+    my ($self,$program_name) = @_;
+    my @path;
+    push @path, $self->program_dir if $self->program_dir;
+    push @path, $program_name .($^O =~ /mswin/i ?'.exe':'');
+
+    return Bio::Root::IO->catfile(@path);
+}
+
+=head2 program_dir
+
+ Title   : program_dir
+ Usage   : my $dir = $factory->program_dir();
+ Function: Abstract get method for dir of program. 
+ Returns : string representing program directory 
+ Args    : none 
+
+=cut
+
+sub program_dir {
+    $PROGRAMDIR;
+}
+
+sub program {
+    my $self = shift;
+    if( wantarray ) {
+	return ($self->executable, $self->p());
+    } else {
+	return $self->executable(@_);
+    }
+}
+
+=head2  blastall
+
+ Title   : blastall
+ Usage   :  $blast_report = $factory->blastall('t/testquery.fa');
+	or
+	       $input = Bio::Seq->new(-id=>"test query",
+				      -seq=>"ACTACCCTTTAAATCAGTGGGGG");
+	       $blast_report = $factory->blastall($input);
+	or 
+	      $seq_array_ref = \@seq_array;  
+         # where @seq_array is an array of Bio::Seq objects
+	      $blast_report = $factory->blastall(\@seq_array);
+ Returns : Reference to a Blast object or BPlite object 
+           containing the blast report.
+ Args    : Name of a file or Bio::Seq object or an array of 
+           Bio::Seq object containing the query sequence(s). 
+           Throws an exception if argument is not either a string 
+           (eg a filename) or a reference to a Bio::Seq object 
+           (or to an array of Seq objects).  If argument is string, 
+           throws exception if file corresponding to string name can 
+           not be found.
+
+=cut
+
+sub blastall {
+    my ($self,$input1) = @_;
+    $self->io->_io_cleanup();
+    my $executable = 'blastall';
+    my $input2;
+# Create input file pointer
+    my $infilename1 = $self->_setinput($executable, $input1);
+    if (! $infilename1) {$self->throw(" $input1 ($infilename1) not Bio::Seq object or array of Bio::Seq objects or file name!");}
+
+    $self->i($infilename1);	# set file name of sequence to be blasted to inputfilename1 (-i param of blastall)
+    
+    my $blast_report = &_generic_local_blast($self, $executable, 
+					     $input1, $input2);
+}
+
+=head2  wublast
+
+ Title   : wublast
+ Usage   :  $blast_report = $factory->wublast('t/testquery.fa');
+	or
+	       $input = Bio::Seq->new(-id=>"test query",
+				      -seq=>"ACTACCCTTTAAATCAGTGGGGG");
+	       $blast_report = $factory->wublast($input);
+	or 
+	      $seq_array_ref = \@seq_array;  # where @seq_array is an array of Bio::Seq objects
+	      $blast_report = $factory->wublast(\@seq_array);
+ Returns :  Reference to a Blast object 
+ Args    : Name of a file or Bio::Seq object or an array of 
+           Bio::Seq object containing the query sequence(s). 
+           Throws an exception if argument is not either a string 
+           (eg a filename) or a reference to a Bio::Seq object 
+           (or to an array of Seq objects).  If argument is string, 
+           throws exception if file corresponding to string name can 
+           not be found.
+
+=cut
+
+sub wublast {
+  my ($self,$input1) = @_;
+  $self->io->_io_cleanup();
+  my $executable = 'wublast';
+  my $infilename1 = $self->_setinput($executable, $input1);
+  if (! $infilename1) {$self->throw(" $input1 ($infilename1) not Bio::Seq object or array of Bio::Seq objects or file name!");}
+  $self->input($infilename1);	# set file name of sequence to be blasted to inputfilename1 (-i param of blastall)
+  my $blast_report = &_generic_local_wublast($self, $executable, $input1);
+}
+
+=head2  blastpgp
+
+ Title   : blastpgp
+ Usage   :  $blast_report = $factory-> blastpgp('t/testquery.fa');
+	or
+	       $input = Bio::Seq->new(-id=>"test query",
+				      -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
+	       $blast_report = $factory->blastpgp ($input);
+	or
+	      $seq_array_ref = \@seq_array;  
+         # where @seq_array is an array of Bio::Seq objects
+	      $blast_report = $factory-> blastpgp(\@seq_array);
+ Returns : Reference to a Bio::SearchIO object or BPlite object 
+           containing the blast report (BPlite only if you specify 
+           _READMETHOD=> 'BPlite')
+ Args    : Name of a file or Bio::Seq object. In psiblast jumpstart 
+           mode two additional arguments are required: a SimpleAlign 
+           object one of whose elements is the query and a "mask" to 
+           determine how BLAST should select scoring matrices see 
+           DESCRIPTION above for more details.
+
+           Throws an exception if argument is not either a string 
+           (eg a filename) or a reference to a Bio::Seq object 
+           (or to an array of Seq objects).  If argument is string, 
+           throws exception if file corresponding to string name can 
+           not be found.
+ Returns : Reference to Bio::SearchIO object 
+           or Bio::Tools::BPpsilite if you specify 
+           _READMETHOD => 'BPlite' object containing the blast report.
+
+=cut
+
+sub blastpgp {
+    my $self = shift;
+    my $executable = 'blastpgp';
+    my $input1 = shift;
+    my $input2 = shift;
+	 # used by blastpgp's -B option to specify which 
+	 # residues are position aligned
+    my $mask = shift;
+
+    my  ($infilename1, $infilename2 )  = $self->_setinput($executable, 
+							  $input1, $input2, 
+							  $mask);
+    if (!$infilename1) {$self->throw(" $input1  not Bio::Seq object or array of Bio::Seq objects or file name!");}
+    $self->i($infilename1);	# set file name of sequence to be blasted to inputfilename1 (-i param of blastpgp)
+    if  ($input2) {
+	unless ($infilename2) {$self->throw("$input2 not SimpleAlign Object in pre-aligned psiblast\n");}
+	$self->B($infilename2);	# set file name of partial alignment to inputfilename2 (-B param of blastpgp)
+    }
+    my $blast_report = &_generic_local_blast($self, $executable, $input1, $input2);
+}
+
+=head2  rpsblast
+
+ Title   : rpsblast
+ Usage   :  $blast_report = $factory->rpsblast('t/testquery.fa');
+	or
+	       $input = Bio::Seq->new(-id=>"test query",
+				      -seq=>"MVVLCRADDEEQQPPTCADEEQQQVVGG");
+	       $blast_report = $factory->rpsblast($input);
+	or
+	      $seq_array_ref = \@seq_array;  
+         # where @seq_array is an array of Bio::Seq objects
+	      $blast_report = $factory->rpsblast(\@seq_array);
+ Args    : Name of a file or Bio::Seq object or an array of 
+           Bio::Seq object containing the query sequence(s). 
+           Throws an exception if argument is not either a string 
+           (eg a filename) or a reference to a Bio::Seq object 
+           (or to an array of Seq objects).  If argument is string, 
+           throws exception if file corresponding to string name can 
+           not be found.
+ Returns : Reference to a Bio::SearchIO object or BPlite object 
+           containing the blast report (BPlite only if you specify 
+           _READMETHOD=> 'BPlite')
+
+=cut
+
+sub rpsblast {
+    my ($self, $input1) = @_;
+	
+    my $executable = 'rpsblast';
+
+    # Create input file pointer
+    my $infilename1 = $self->_setinput($executable, $input1);
+    if (! $infilename1) { 
+	   $self->throw(" $input1 ($infilename1) not Bio::Seq object or array of Bio::Seq objects or file name!");
+	 }
+    $self->i($infilename1);	# set file name of sequence to be blasted to inputfilename1 (-i param of blastall)
+    
+	 # Run like a standard NCBI blast from this point
+    my $blast_report = _generic_local_blast($self, $executable);
+}
+
+=head2   bl2seq
+
+ Title   : bl2seq
+ Usage   : $factory-> bl2seq('t/seq1.fa', 't/seq2.fa');
+	or
+	  $input1 = Bio::Seq->new(-id=>"test query1",
+				  -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
+	  $input2 = Bio::Seq->new(-id=>"test query2",
+				  -seq=>"ACTADDEMMMMMMMDEEQQQVVGG");
+	  $blast_report = $factory->bl2seq ($input1,  $input2);
+ Returns : Reference to a BPbl2seq object containing the blast report.
+ Args    : Names of 2 files  or 2 Bio::Seq objects containing the 
+           sequences to be aligned by bl2seq.
+
+           Throws an exception if argument is not either a pair of 
+           strings (eg filenames) or references to Bio::Seq objects.  
+           If arguments are strings, throws exception if files 
+           corresponding to string names can not be found.
+
+=cut
+
+sub bl2seq {
+    my $self = shift;
+    my $executable = 'bl2seq';
+    my $input1 = shift;
+    my $input2 = shift;
+
+# Create input file pointer
+    my  ($infilename1, $infilename2 )  = $self->_setinput($executable, 
+							  $input1, $input2);
+    if (!$infilename1){$self->throw(" $input1  not Seq Object or file name!");}
+    if (!$infilename2){$self->throw("$input2  not Seq Object or file name!");}
+
+    $self->i($infilename1);	# set file name of first sequence to 
+                                # be aligned to inputfilename1 
+                                # (-i param of bl2seq)
+    $self->j($infilename2);	# set file name of first sequence to 
+                                # be aligned to inputfilename2 
+                                # (-j param of bl2seq)
+    my $blast_report = &_generic_local_blast($self, $executable);    
+}
+#################################################
+
+=head2  _generic_local_blast
+
+ Title   : _generic_local_blast
+ Usage   : internal function not called directly
+ Returns : Bio::SearchIO or Bio::Tools::BPlite object
+ Args    : Reference to calling object and name of BLAST executable 
+
+=cut
+
+sub _generic_local_blast {
+    my $self = shift;
+    my $executable = shift;
+
+    # Create parameter string to pass to Blast program
+    my $param_string = $self->_setparams($executable);
+
+    # run Blast
+    my $blast_report = &_runblast($self, $executable, $param_string);
+}
+
+
+=head2  _generic_local_wublast
+
+ Title   : _generic_local_wublast
+ Usage   :  internal function not called directly
+ Returns :  Blast object
+ Args    :   Reference to calling object and name of BLAST executable 
+
+=cut
+
+sub _generic_local_wublast {
+    my $self = shift;
+    my $executable = shift;
+
+    # Create parameter string to pass to Blast program
+    my $param_string = $self->_setparams($executable);
+    $param_string = " ".$self->database." ".$self->input." ".$param_string;
+
+    # run Blast
+    my $blast_report = &_runwublast($self, $executable, $param_string);
+}
+
+=head2  _runblast
+
+ Title   :  _runblast
+ Usage   :  Internal function, not to be called directly	
+ Function:   makes actual system call to Blast program
+ Example :
+ Returns : Report object in the appropriate format (Bio::SearchIO)
+           or if BPlite is requested: Bio::Tools::BPlite, 
+           Bio::Tools::BPpsilite,or Bio::Tools::BPbl2seq)
+ Args    : Reference to calling object, name of BLAST executable, 
+           and parameter string for executable 
+
+=cut
+
+sub _runblast {
+	my ($self,$executable,$param_string) = @_;
+	my ($blast_obj,$exe);
+	if( ! ($exe = $self->executable($executable)) ) {
+		$self->warn("cannot find path to $executable");
+		return;
+	}
+	my $commandstring = $exe. $param_string;
+
+	# next line for debugging
+	$self->debug( "$commandstring \n");
+
+	my $status = system($commandstring);
+
+	$self->throw("$executable call crashed: $? $commandstring\n")  
+	  unless ($status==0) ;
+	my $outfile = $self->o() ;	# get outputfilename
+	my $signif = $self->e()  || 1e-5  ; 
+
+	# set significance cutoff to set expectation value or default value
+	# (may want to make this value vary for different executables)
+
+	# If running bl2seq or psiblast (blastpgp with multiple iterations),
+	# the specific parsers for these programs must be used (ie BPbl2seq or
+	# BPpsilite).  Otherwise either the Blast parser or the BPlite
+	# parsers can be selected.
+
+	if ($self->_READMETHOD =~ /^(Blast|SearchIO)/i )  {
+		$blast_obj = Bio::SearchIO->new(-file=>$outfile,
+												  -format => 'blast' )  ;
+	} elsif( $self->_READMETHOD =~ /BPlite/i ) {
+		if ($executable =~ /bl2seq/i)  {
+			# Added program info so BPbl2seq can compute strand info
+			$blast_obj = Bio::Tools::BPbl2seq->new(-file => $outfile,
+																-REPORT_TYPE => $self->p );
+		} elsif ($executable =~ /blastpgp/i && defined $self->j() && 
+					$self->j() > 1)  {
+			$self->debug( "using psilite parser\n");
+			$blast_obj = Bio::Tools::BPpsilite->new(-file => $outfile);
+		} elsif( $executable =~ /blastall|rpsblast/i) { 
+			$blast_obj = Bio::Tools::BPlite->new(-file=>$outfile);
+		} else { 
+			$self->warn("Unrecognized executable $executable");
+		}
+	} else {
+		$self->warn("Unrecognized readmethod ".$self->_READMETHOD);
+	}
+
+	return $blast_obj;
+}
+
+=head2  _runwublast
+
+ Title   :  _runwublast
+ Usage   :  Internal function, not to be called directly	
+ Function:   makes actual system call to WU-Blast program
+ Example :
+ Returns : Report Blast object
+ Args    : Reference to calling object, name of BLAST executable, 
+           and parameter string for executable 
+
+=cut
+
+sub _runwublast {
+	my ($self,$executable,$param_string) = @_;
+	my ($blast_obj,$exe);
+	if( ! ($exe = $self->executable($self->p))){
+            $self->warn("cannot find path to $executable");
+            return;
+	}
+	my $commandstring = $exe.  " ".$param_string;
+
+	# next line for debugging
+	$self->debug( "$commandstring \n");
+
+	my $status = system($commandstring);
+
+	$self->throw("$executable call crashed: $? $commandstring\n")  
+	  unless ($status==0) ;
+	my $outfile = $self->o() ;	# get outputfilename
+	$blast_obj = Bio::SearchIO->new(-file=>$outfile,
+		                          			-format => 'blast') ;
+	return $blast_obj;
+}
+
+=head2  _setinput
+
+ Title   :  _setinput
+ Usage   :  Internal function, not to be called directly	
+ Function:   Create input file(s) for Blast executable
+ Example :
+ Returns : name of file containing Blast data input
+ Args    : Seq object reference or input file name
+
+=cut
+
+sub _setinput {
+	my ($self, $executable, $input1, $input2) = @_;
+	my ($seq, $temp, $infilename1, $infilename2,$fh ) ;
+	#  If $input1 is not a reference it better be the name of a file with
+	#  the sequence/ alignment data...
+	$self->io->_io_cleanup();
+
+ SWITCH:  {
+      unless (ref $input1) {
+			$infilename1 = (-e $input1) ? $input1 : 0 ;
+			last SWITCH; 
+      }
+		#  $input may be an array of BioSeq objects...
+      if (ref($input1) =~ /ARRAY/i ) {
+			($fh,$infilename1) = $self->io->tempfile();
+			$temp =  Bio::SeqIO->new(-fh=> $fh, 
+											 -format => 'fasta');
+			foreach $seq (@$input1) {
+				unless ($seq->isa("Bio::PrimarySeqI")) {return 0;}
+				$seq->display_id($seq->display_id);
+				$temp->write_seq($seq);
+			}
+			close $fh;
+			$fh = undef;
+			last SWITCH;
+      }
+		#  $input may be a single BioSeq object...
+      elsif ($input1->isa("Bio::PrimarySeqI")) {
+			($fh,$infilename1) = $self->io->tempfile();
+
+			# just in case $input1 is taken from an alignment and has spaces (ie
+			# deletions) indicated within it, we have to remove them - otherwise
+			# the BLAST programs will be unhappy
+
+			my $seq_string =  $input1->seq();
+			$seq_string =~ s/\W+//g; # get rid of spaces in sequence
+			$input1->seq($seq_string);
+			$temp =  Bio::SeqIO->new(-fh=> $fh, '-format' => 'fasta');
+			$temp->write_seq($input1);
+			close $fh;
+			undef $fh;
+			last SWITCH;
+      }
+      $infilename1 = 0;		# Set error flag if you get here
+	}				# End SWITCH
+	unless ($input2) { return $infilename1; }
+ SWITCH2:  {
+      unless (ref $input2) {
+			$infilename2 =   (-e $input2) ? $input2 : 0 ;
+			last SWITCH2; 
+      }
+      if ($input2->isa("Bio::PrimarySeqI")  && $executable  eq 'bl2seq' ) {
+			($fh,$infilename2) = $self->io->tempfile();
+
+			$temp =  Bio::SeqIO->new(-fh=> $fh, '-format' => 'Fasta');
+			$temp->write_seq($input2);
+			close $fh;
+			undef $fh;
+			last SWITCH2;
+      }
+		# Option for using psiblast's pre-alignment "jumpstart" feature
+      elsif ($input2->isa("Bio::SimpleAlign")  && 
+				 $executable  eq 'blastpgp' ) {
+			# a bit of a lie since it won't be a fasta file
+	  ($fh,$infilename2) = $self->io->tempfile(); 
+
+	  # first we retrieve the "mask" that determines which residues should
+	  # by scored according to their position and which should be scored
+	  # using the non-position-specific matrices
+
+	  my @mask = split("", shift );	#  get mask
+
+	  # then we have to convert all the residues in every sequence to upper
+	  # case at the positions that we want psiblast to use position specific
+	  # scoring
+
+	  foreach $seq ( $input2->each_seq() ) {
+		  my @seqstringlist = split("",$seq->seq());
+		  for (my $i = 0; $i < scalar(@mask); $i++) {
+			  unless ( $seqstringlist[$i] =~ /[a-zA-Z]/ ) {next}
+			  $seqstringlist[$i] = $mask[$i] ? uc $seqstringlist[$i]: lc $seqstringlist[$i] ;
+		  }
+		  my $newseqstring = join("", @seqstringlist);
+		  $seq->seq($newseqstring);
+	  }
+	  #  Now we need to write out the alignment to a file 
+	  # in the "psi format" which psiblast is expecting
+	  $input2->map_chars('\.','-');
+	  $temp =  Bio::AlignIO->new(-fh=> $fh, '-format' => 'psi');
+	  $temp->write_aln($input2);
+	  close $fh;
+	  undef $fh;
+	  last SWITCH2;
+  }
+      $infilename2 = 0;		# Set error flag if you get here
+	}				# End SWITCH2
+	return ($infilename1, $infilename2);
+}
+
+=head2  _setparams
+
+ Title   : _setparams
+ Usage   : Internal function, not to be called directly	
+ Function: Create parameter inputs for Blast program
+ Example :
+ Returns : parameter string to be passed to Blast 
+ Args    : Reference to calling object and name of BLAST executable
+
+=cut
+
+sub _setparams {
+    my ($self,$executable) = @_;
+    my ($attr, $value, @execparams);
+
+    if    ($executable eq 'blastall') { @execparams = @BLASTALL_PARAMS; }
+    elsif ($executable eq 'blastpgp') { @execparams = @BLASTPGP_PARAMS; }
+    elsif ($executable eq 'rpsblast') { @execparams = @RPSBLAST_PARAMS; }
+    elsif ($executable eq 'bl2seq'  ) { @execparams = @BL2SEQ_PARAMS;   }
+    elsif ($executable eq 'wublast' ) { @execparams = @WUBLAST_PARAMS;  }
+
+    my $param_string = "";
+    for $attr ( @execparams ) {
+        $value = $self->$attr();
+        next unless (defined $value);
+        # Need to prepend datadirectory to database name
+        if ($executable eq 'wublast') {
+          next if $attr =~ /database|^d$/;
+          next if $attr =~ /input|^i$/;
+          $attr = 'o' if ($attr =~/outfile/);
+        }
+
+	if ($attr  eq 'd' && ($executable ne 'bl2seq')) { 
+	    my @dbs = split(/ /, $value);
+	    for (my $i = 0; $i < scalar(@dbs); $i++) {
+		# moved the test for full path db to work with multiple databases
+		if (! (-e $dbs[$i].".nin" || -e $dbs[$i].".pin") &&
+		    ! (-e $dbs[$i].".nal" || -e $dbs[$i].".pal") ) {
+		    $dbs[$i] = File::Spec->catdir($DATADIR, $dbs[$i]);
+		}
+		$value = '"'.join(" ", @dbs).'"';
+	    }
+	}
+# put params in format expected by Blast
+	$attr  = '-'. $attr ;       
+	$param_string .= " $attr  $value ";
+    }
+
+    if ($self->quiet()) { 
+      $param_string .= '  2> '.File::Spec->devnull;
+    }
+    if ($executable eq 'wublast') {
+	foreach my $attr (@WUBLAST_SWITCH) {
+	    my $value = $self->$attr();
+	    next unless (defined $value);
+	    my $attr_key = ' -'.(lc $attr);
+	    $param_string .=$attr_key;
+	}
+    }
+    return $param_string;
+}
+
+
+=head1 Bio::Tools::Run::Wrapper methods
+
+=cut
+
+=head2 no_param_checks
+
+ Title   : no_param_checks
+ Usage   : $obj->no_param_checks($newval)
+ Function: Boolean flag as to whether or not we should
+           trust the sanity checks for parameter values  
+ Returns : value of no_param_checks
+ Args    : newvalue (optional)
+
+
+=cut
+
+=head2 save_tempfiles
+
+ Title   : save_tempfiles
+ Usage   : $obj->save_tempfiles($newval)
+ Function: 
+ Returns : value of save_tempfiles
+ Args    : newvalue (optional)
+
+
+=cut
+
+=head2 outfile_name
+
+ Title   : outfile_name
+ Usage   : my $outfile = $tcoffee->outfile_name();
+ Function: Get/Set the name of the output file for this run
+           (if you wanted to do something special)
+ Returns : string
+ Args    : [optional] string to set value to
+
+
+=cut
+
+
+=head2 tempdir
+
+ Title   : tempdir
+ Usage   : my $tmpdir = $self->tempdir();
+ Function: Retrieve a temporary directory name (which is created)
+ Returns : string which is the name of the temporary directory
+ Args    : none
+
+
+=cut
+
+=head2 cleanup
+
+ Title   : cleanup
+ Usage   : $tcoffee->cleanup();
+ Function: Will cleanup the tempdir directory after a PAML run
+ Returns : none
+ Args    : none
+
+
+=cut
+
+=head2 io
+
+ Title   : io
+ Usage   : $obj->io($newval)
+ Function:  Gets a Bio::Root::IO object
+ Returns : Bio::Root::IO
+ Args    : none
+
+
+=cut
+
+1;
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/WrapperBase.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/WrapperBase.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Run/WrapperBase.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,374 @@
+# $Id: WrapperBase.pm,v 1.21.4.2 2006/10/17 09:12:57 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Run::WrapperBase
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::WrapperBase - A Base object for wrappers around executables
+
+=head1 SYNOPSIS
+
+  # do not use this object directly, it provides the following methods
+  # for its subclasses
+
+  my $errstr = $obj->error_string();
+  my $exe    = $obj->executable();
+  $obj->save_tempfiles($booleanflag)
+  my $outfile= $obj->outfile_name();
+  my $tempdir= $obj->tempdir(); # get a temporary dir for executing
+  my $io     = $obj->io;  # Bio::Root::IO object
+  my $cleanup= $obj->cleanup(); # remove tempfiles
+
+  $obj->run({-arg1 => $value});
+
+=head1 DESCRIPTION
+
+This is a basic module from which to build executable wrapper modules.
+It has some basic methods to help when implementing new modules.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track of
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Run::WrapperBase;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use base qw(Bio::Root::Root);
+
+use File::Spec;
+use File::Path qw(); # don't import anything
+
+=head2 run
+
+ Title   : run
+ Usage   : $wrapper->run({ARGS HERE});
+ Function: Support generic running with args passed in
+           as a hashref
+ Returns : Depends on the implementation, status OR data
+ Args    : hashref of named arguments
+
+
+=cut
+
+sub run {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 error_string
+
+ Title   : error_string
+ Usage   : $obj->error_string($newval)
+ Function: Where the output from the last analysis run is stored.
+ Returns : value of error_string
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub error_string{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'_error_string'} = $value;
+    }
+    return $self->{'_error_string'} || '';
+}
+
+=head2 arguments
+
+ Title   : arguments
+ Usage   : $obj->arguments($newval)
+ Function: Commandline parameters 
+ Returns : value of arguments
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub arguments {
+  my ($self,$value) = @_;
+  if(defined $value) {
+    $self->{'_arguments'} = $value;
+  }
+  return $self->{'_arguments'} || '';
+}
+
+
+=head2 no_param_checks
+
+ Title   : no_param_checks
+ Usage   : $obj->no_param_checks($newval)
+ Function: Boolean flag as to whether or not we should
+           trust the sanity checks for parameter values
+ Returns : value of no_param_checks
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub no_param_checks{
+   my ($self,$value) = @_;
+   if( defined $value || ! defined $self->{'no_param_checks'} ) {
+       $value = 0 unless defined $value;
+      $self->{'no_param_checks'} = $value;
+    }
+    return $self->{'no_param_checks'};
+}
+
+=head2 save_tempfiles
+
+ Title   : save_tempfiles
+ Usage   : $obj->save_tempfiles($newval)
+ Function: 
+ Returns : value of save_tempfiles
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub save_tempfiles{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'save_tempfiles'} = $value;
+    }
+    return $self->{'save_tempfiles'};
+}
+
+=head2 outfile_name
+
+ Title   : outfile_name
+ Usage   : my $outfile = $wrapper->outfile_name();
+ Function: Get/Set the name of the output file for this run
+           (if you wanted to do something special)
+ Returns : string
+ Args    : [optional] string to set value to
+
+
+=cut
+
+sub outfile_name{
+   my ($self,$nm) = @_;
+   if( defined $nm || ! defined $self->{'_outfilename'} ) { 
+       $nm = 'mlc' unless defined $nm;
+       $self->{'_outfilename'} = $nm;
+   }
+   return $self->{'_outfilename'};
+}
+
+
+=head2 tempdir
+
+ Title   : tempdir
+ Usage   : my $tmpdir = $self->tempdir();
+ Function: Retrieve a temporary directory name (which is created)
+ Returns : string which is the name of the temporary directory
+ Args    : none
+
+
+=cut
+
+sub tempdir{
+   my ($self) = shift;
+
+   $self->{'_tmpdir'} = shift if @_;
+   unless( $self->{'_tmpdir'} ) {
+       $self->{'_tmpdir'} = $self->io->tempdir(CLEANUP => ! $self->save_tempfiles );
+   }
+   unless( -d $self->{'_tmpdir'} ) {
+       mkdir($self->{'_tmpdir'},0777);
+   }
+   return $self->{'_tmpdir'};
+}
+
+=head2 cleanup
+
+ Title   : cleanup
+ Usage   : $wrapper->cleanup();
+ Function: Will cleanup the tempdir directory
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub cleanup{
+   my ($self) = @_;
+   $self->io->_io_cleanup();
+   if( defined $self->{'_tmpdir'} && -d $self->{'_tmpdir'} ) {
+       # $self->io->rmtree($self->{'_tmpdir'});
+       File::Path->rmtree( $self->{'_tmpdir'} );
+   }
+}
+
+=head2 io
+
+ Title   : io
+ Usage   : $obj->io($newval)
+ Function: Gets a Bio::Root::IO object
+ Returns : Bio::Root::IO object
+ Args    : none
+
+
+=cut
+
+sub io{
+   my ($self) = @_;
+   unless( defined $self->{'io'} ) {
+       $self->{'io'} = new Bio::Root::IO(-verbose => $self->verbose());
+   }
+    return $self->{'io'};
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version = $wrapper->version()
+ Function: Returns the program version (if available)
+ Returns : string representing version of the program 
+ Args    : [Optional] value to (re)set version string
+
+
+=cut
+
+sub version{
+   my ($self, at args) = @_;
+   return;
+}
+
+=head2 executable
+
+ Title   : executable
+ Usage   : my $exe = $factory->executable();
+ Function: Finds the full path to the executable
+ Returns : string representing the full path to the exe
+ Args    : [optional] name of executable to set path to
+           [optional] boolean flag whether or not warn when exe is not found
+
+=cut
+
+sub executable{
+   my ($self, $exe,$warn) = @_;
+
+   if( defined $exe ) {
+     $self->{'_pathtoexe'} = $exe;
+   }
+   unless( defined $self->{'_pathtoexe'} ) {
+       my $prog_path = $self->program_path;
+       if( $prog_path && -e $prog_path && -x $prog_path ) {
+           $self->{'_pathtoexe'} = $prog_path;
+       } else {
+           my $exe;
+           if( ( $exe = $self->io->exists_exe($self->program_name) ) &&
+               -x $exe ) {
+               $self->{'_pathtoexe'} = $exe;
+           } else {
+               $self->warn("Cannot find executable for ".$self->program_name) if $warn;
+               $self->{'_pathtoexe'} = undef;
+           }
+       }
+   }
+   $self->{'_pathtoexe'};
+}
+
+=head2 program_path
+
+ Title   : program_path
+ Usage   : my $path = $factory->program_path();
+ Function: Builds path for executable 
+ Returns : string representing the full path to the exe
+ Args    : none
+
+=cut
+
+sub program_path {
+    my ($self) = @_;
+    my @path;
+    push @path, $self->program_dir if $self->program_dir;
+    push @path, $self->program_name.($^O =~ /mswin/i ?'.exe':'');
+
+    return File::Spec->catfile(@path);
+}
+
+=head2 program_dir
+
+ Title   : program_dir
+ Usage   : my $dir = $factory->program_dir();
+ Function: Abstract get method for dir of program. To be implemented
+           by wrapper.
+ Returns : string representing program directory 
+ Args    : none 
+
+=cut
+
+sub program_dir {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 program_name
+
+ Title   : program_name
+ Usage   : my $name = $factory->program_name();
+ Function: Abstract get method for name of program. To be implemented
+           by wrapper.
+ Returns : string representing program name
+ Args    : none
+
+=cut
+
+sub program_name {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+sub DESTROY {
+    my $self= shift;
+    unless ( $self->save_tempfiles ) {
+	$self->cleanup();
+    }
+    $self->SUPER::DESTROY();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Seg.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Seg.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Seg.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,134 @@
+# $Id: Seg.pm,v 1.10.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Seg
+#
+# Copyright Balamurugan Kumarasamy
+# Totally re-written, added docs and tests -- Torsten Seemann, Sep 2006
+#
+# Copyright 
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Seg - parse C<seg> output 
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Seg;
+  my $parser = Bio::Tools::Seg->(-file => 'seg.fasta');
+  while ( my $f = $parser->next_result ) {
+    if ($f->score < 1.5) {
+      print $f->location->to_FTstring, " is low complexity\n";
+    }
+  }
+
+=head1 DESCRIPTION
+
+C<seg> identifies low-complexity regions on a protein sequence.
+It is usually part of the C<WU-BLAST> and C<InterProScan> packages.
+
+The L<Bio::Tools::Seg> module will only parse the "fasta" output 
+modes of C<seg>, i.e. C<seg -l> (low complexity regions only), 
+C<seg -h> (high complexity regions only), or C<seg -a> (both low 
+and high). 
+
+It creates a L<Bio::SeqFeature::Generic> for each FASTA-like entry 
+found in the input file. It is up to the user to appropriately filter 
+these using the feature's score.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Torsten Seemann
+
+Email - torsten.seemann AT infotech.monash.edu.au
+
+=head1 CONTRIBUTOR - Bala
+
+Email - savikalpa at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Seg;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = Bio::Tools::Seg->new();
+ Function: Builds a new Bio::Tools::Seg object
+ Returns : Bio::Tools::Seg
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+
+=cut
+
+
+sub new {
+	my($class, at args) = @_;
+	my $self = $class->SUPER::new(@args);
+	$self->_initialize_io(@args);
+	return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $seg->next_result
+ Function: Get the next result set from parser data
+ Returns : Bio::SeqFeature::Generic
+ Args    : none
+
+=cut
+
+sub next_result {
+	my ($self) = @_;
+
+	# For example in this line 
+	# test_prot(214-226) complexity=2.26 (12/2.20/2.50)
+	# $1 is test_prot  
+	# $2 is 214 
+	# $3 is 226 
+	# $4 is 2.26
+
+	while (my $line = $self->_readline) {
+		if ($line =~ /^\>\s*?(\S+)?\s*?\((\d+)\-(\d+)\)\s*complexity=(\S+)/) {
+			return Bio::SeqFeature::Generic->new(
+				-seq_id     => $1,
+				-start      => $2,
+				-end        => $3,
+				-score      => $4,
+				-source_tag => 'Seg',
+				-primary    => 'low_complexity'
+			);
+		}
+	}
+}
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Seg.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqPattern.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqPattern.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqPattern.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,912 @@
+# $Id: SeqPattern.pm,v 1.21.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# bioperl module for Bio::Tools::SeqPattern
+#
+# Cared for by  Steve Chervitz  (sac-at-bioperl.org)
+#
+# Copyright  Steve Chervitz
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::SeqPattern - represent a sequence pattern or motif
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::SeqPattern;
+
+ my $pat1     = 'T[GA]AA...TAAT';
+ my $pattern1 = Bio::Tools::SeqPattern->new(-SEQ =>$pat1, -TYPE =>'Dna');
+
+ my $pat2     = '[VILM]R(GXX){3,2}...[^PG]';
+ my $pattern2 = Bio::Tools::SeqPattern->new(-SEQ =>$pat2, -TYPE =>'Amino');
+
+=head1 DESCRIPTION
+
+L<Bio::Tools::SeqPattern> module encapsulates generic data and
+methods for manipulating regular expressions describing nucleic or
+amino acid sequence patterns (a.k.a, "motifs").
+
+L<Bio::Tools::SeqPattern> is a concrete class that inherits from L<Bio::Seq>.
+
+This class grew out of a need to have a standard module for doing routine
+tasks with sequence patterns such as:
+
+  -- Forming a reverse-complement version of a nucleotide sequence pattern
+  -- Expanding patterns containing ambiguity codes
+  -- Checking for invalid regexp characters
+  -- Untainting yet preserving special characters in the pattern
+
+Other features to look for in the future:
+
+  -- Full pattern syntax checking
+  -- Conversion between expanded and condensed forms of the pattern
+
+=head1 MOTIVATIONS
+
+A key motivation for L<Bio::Tools::SeqPattern> is to have a way to
+generate a reverse complement of a nucleotide sequence pattern.
+This makes possible simultaneous pattern matching on both sense and
+anti-sense strands of a query sequence.
+
+In principle, one could do such a search more inefficiently by testing
+against both sense and anti-sense versions of a sequence.
+It is entirely equivalent to test a regexp containing both sense and
+anti-sense versions of the *pattern* against one copy of the sequence.
+The latter approach is much more efficient since:
+
+   1) You need only one copy of the sequence.
+   2) Only one regexp is executed.
+   3) Regexp patterns are typically much smaller than sequences.
+
+Patterns can be quite complex and it is often difficult to
+generate the reverse complement pattern. The Bioperl SeqPattern.pm
+addresses this problem, providing a convenient set of tools
+for working with biological sequence regular expressions.
+
+Not all patterns have been tested. If you discover a pattern that
+is not handled properly by Bio::Tools::SeqPattern.pm, please
+send me some email (sac at bioperl.org). Thanks.
+
+=head1 OTHER FEATURES
+
+=head2 Extended Alphabet Support
+
+This module supports the same set of ambiguity codes for nucleotide
+sequences as supported by L<Bio::Seq>. These ambiguity codes
+define the behavior or the L<expand()> method.
+
+ ------------------------------------------
+ Symbol       Meaning      Nucleic Acid
+ ------------------------------------------
+  A            A           (A)denine
+  C            C           (C)ytosine
+  G            G           (G)uanine
+  T            T           (T)hymine
+  U            U           (U)racil
+  M          A or C        a(M)ino group
+  R          A or G        pu(R)ine
+  W          A or T        (W)eak bond
+  S          C or G        (S)trong bond
+  Y          C or T        p(Y)rimidine
+  K          G or T        (K)eto group
+  V        A or C or G
+  H        A or C or T
+  D        A or G or T
+  B        C or G or T
+  X      G or A or T or C
+  N      G or A or T or C
+  .      G or A or T or C
+
+
+
+ ------------------------------------------
+ Symbol           Meaning
+ ------------------------------------------
+ A        Alanine
+ C        Cysteine
+ D        Aspartic Acid
+ E        Glutamic Acid
+ F        Phenylalanine
+ G        Glycine
+ H        Histidine
+ I        Isoleucine
+ K        Lysine
+ L        Leucine
+ M        Methionine
+ N        Asparagine
+ P        Proline
+ Q        Glutamine
+ R        Arginine
+ S        Serine
+ T        Threonine
+ V        Valine
+ W        Tryptophan
+ Y        Tyrosine
+
+ B        Aspartic Acid, Asparagine
+ Z        Glutamic Acid, Glutamine
+ X        Any amino acid
+ .        Any amino acid
+
+
+=head2 Multiple Format Support
+
+Ultimately, this module should be able to build SeqPattern.pm objects
+using a variety of pattern formats such as ProSite, Blocks, Prints, GCG, etc.
+Currently, this module only supports patterns using a grep-like syntax.
+
+=head1 USAGE
+
+A simple demo script called seq_pattern.pl is included in the examples/
+directory of the central Bioperl distribution.
+
+=head1 SEE ALSO
+
+L<Bio::Seq> - Lightweight sequence object.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Steve Chervitz, sac-at-bioperl.org
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-8 Steve Chervitz. All Rights Reserved.
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+#
+##
+###
+#### END of main POD documentation.
+###
+##
+#'
+# CREATED : 28 Aug 1997
+
+
+package Bio::Tools::SeqPattern;
+
+use base qw(Bio::Root::Root);
+use strict;
+use vars qw ($ID);
+$ID  = 'Bio::Tools::SeqPattern';
+
+## These constants may be more appropriate in a Bio::Dictionary.pm
+## type of class.
+my $PURINES      = 'AG';
+my $PYRIMIDINES  = 'CT';
+my $BEE      = 'DN';
+my $ZED      = 'EQ';
+my $Regexp_chars = '\w,.\*()\[\]<>\{\}^\$';  # quoted for use in regexps
+
+## Package variables used in reverse complementing.
+my (%Processed_braces, %Processed_asterics);
+
+#####################################################################################
+##                                 CONSTRUCTOR                                     ##
+#####################################################################################
+
+
+=head1 new
+
+ Title     : new
+ Usage     : my $seqpat = new Bio::Tools::SeqPattern();
+ Purpose   : Verifies that the type is correct for superclass (Bio::Seq.pm)
+           : and calls superclass constructor last.
+ Returns   : n/a
+ Argument  : Parameters passed to new()
+ Throws    : Exception if the pattern string (seq) is empty.
+ Comments  : The process of creating a new SeqPattern.pm object
+           : ensures that the pattern string is untained.
+
+See Also   : L<Bio::Root::Root::new()>,
+             L<Bio::Seq::_initialize()>
+
+=cut
+
+#----------------
+sub new {
+#----------------
+    my($class, %param) = @_;
+
+    my $self = $class->SUPER::new(%param);
+    my ($seq,$type) = $self->_rearrange([qw(SEQ TYPE)], %param);
+
+    $seq || $self->throw("Empty pattern.");
+    my $t;
+    # Get the type ready for Bio::Seq.pm
+    if ($type =~ /nuc|[dr]na/i) {
+	$t = 'Dna';
+    } elsif ($type =~ /amino|pep|prot/i) {
+	$t = 'Amino';
+    }
+    $seq =~ tr/a-z/A-Z/;  #ps 8/8/00 Canonicalize to upper case
+    $self->str($seq);
+    $self->type($t);
+
+    return $self;
+}
+
+
+=head1 alphabet_ok
+
+ Title     : alphabet_ok
+ Usage     : $mypat->alphabet_ok;
+ Purpose   : Checks for invalid regexp characters.
+           : Overrides Bio::Seq::alphabet_ok() to allow
+           : additional regexp characters ,.*()[]<>{}^$
+           : in addition to the standard genetic alphabet.
+           : Also untaints the pattern and sets the sequence
+           : object's sequence to the untained string.
+ Returns   : Boolean (1 | 0)
+ Argument  : n/a
+ Throws    : Exception if the pattern contains invalid characters.
+ Comments  : Does not call the superclass method.
+           : Actually permits any alphanumeric, not just the
+           : standard genetic alphabet.
+
+=cut
+
+#----------------'
+sub alphabet_ok {
+#----------------
+    my( $self) = @_;
+
+    return 1 if $self->{'_alphabet_checked'};
+
+    $self->{'_alphabet_checked'} = 1;
+
+    my $pat = $self->seq();
+
+    if($pat =~ /[^$Regexp_chars]/io) {
+	$self->throw("Pattern contains invalid characters: $pat",
+		     'Legal characters: a-z,A-Z,0-9,,.*()[]<>{}^$ ');
+    }
+
+    # Untaint pattern (makes code taint-safe).
+    $pat  =~ /([$Regexp_chars]+)/io;
+    $self->setseq(uc($1));
+#    print STDERR "\npattern ok: $pat\n";
+    1;
+}
+
+=head1 expand
+
+ Title     : expand
+ Usage     : $seqpat_object->expand();
+ Purpose   : Expands the sequence pattern using special ambiguity codes.
+ Example   : $pat = $seq_pat->expand();
+ Returns   : String containing fully expanded sequence pattern
+ Argument  : n/a
+ Throws    : Exception if sequence type is not recognized
+           : (i.e., is not one of [DR]NA, Amino)
+
+See Also   : L<Extended Alphabet Support>, L<_expand_pep>(), L<_expand_nuc>()
+
+=cut
+
+#----------
+sub expand {
+#----------
+    my $self = shift;
+
+    if($self->type =~ /[DR]na/i) { $self->_expand_nuc(); }
+    elsif($self->type =~ /Amino/i) { $self->_expand_pep(); }
+    else{
+	$self->throw("Don't know how to expand ${\$self->type} patterns.\n");
+    }
+}
+
+
+=head1 _expand_pep
+
+ Title     : _expand_pep
+ Usage     : n/a; automatically called by expand()
+ Purpose   : Expands peptide patterns
+ Returns   : String (the expanded pattern)
+ Argument  : String (the unexpanded pattern)
+ Throws    : n/a
+
+See Also   : L<expand>(), L<_expand_nuc>()
+
+=cut
+
+#----------------
+sub _expand_pep {
+#----------------
+    my ($self,$pat) = @_;
+    $pat ||= $self->str;
+    $pat =~ s/X/./g;
+    $pat =~ s/^</\^/;
+    $pat =~ s/>$/\$/;
+
+    ## Avoid nested situations: [bmnq] --/--> [[$ZED]mnq]
+    ## Yet correctly deal with: fze[bmnq] ---> f[$BEE]e[$ZEDmnq]
+    if($pat =~ /\[\w*[BZ]\w*\]/) {
+	$pat =~ s/\[(\w*)B(\w*)\]/\[$1$ZED$2\]/g;
+	$pat =~ s/\[(\w*)Z(\w*)\]/\[$1$BEE$2\]/g;
+	$pat =~ s/B/\[$ZED\]/g;
+	$pat =~ s/Z/\[$BEE\]/g;
+    } else {
+	$pat =~ s/B/\[$ZED\]/g;
+	$pat =~ s/Z/\[$BEE\]/g;
+    }
+    $pat =~ s/\((.)\)/$1/g;  ## Doing these last since:
+    $pat =~ s/\[(.)\]/$1/g;  ## Pattern could contain [B] (for example)
+
+    return $pat;
+}
+
+
+
+=head1 _expand_nuc
+
+ Title     : _expand_nuc
+ Purpose   : Expands nucleotide patterns
+ Returns   : String (the expanded pattern)
+ Argument  : String (the unexpanded pattern)
+ Throws    : n/a
+
+See Also   : L<expand>(), L<_expand_pep>()
+
+=cut
+
+#---------------
+sub _expand_nuc {
+#---------------
+    my ($self,$pat) = @_;
+
+    $pat ||= $self->str;
+    $pat =~ s/N|X/./g;
+    $pat =~ s/pu/R/ig;
+    $pat =~ s/py/Y/ig;
+    $pat =~ s/U/T/g;
+    $pat =~ s/^</\^/;
+    $pat =~ s/>$/\$/;
+
+    ## Avoid nested situations: [ya] --/--> [[ct]a]
+    ## Yet correctly deal with: sg[ya] ---> [gc]g[cta]
+    if($pat =~ /\[\w*[RYSWMK]\w*\]/) {
+	$pat =~ s/\[(\w*)R(\w*)\]/\[$1$PURINES$2\]/g;
+	$pat =~ s/\[(\w*)Y(\w*)\]/\[$1$PYRIMIDINES$2\]/g;
+	$pat =~ s/\[(\w*)S(\w*)\]/\[$1GC$2\]/g;
+	$pat =~ s/\[(\w*)W(\w*)\]/\[$1AT$2\]/g;
+	$pat =~ s/\[(\w*)M(\w*)\]/\[$1AC$2\]/g;
+	$pat =~ s/\[(\w*)K(\w*)\]/\[$1GT$2\]/g;
+	$pat =~ s/\[(\w*)V(\w*)\]/\[$1ACG$2\]/g;
+	$pat =~ s/\[(\w*)H(\w*)\]/\[$1ACT$2\]/g;
+	$pat =~ s/\[(\w*)D(\w*)\]/\[$1AGT$2\]/g;
+	$pat =~ s/\[(\w*)B(\w*)\]/\[$1CGT$2\]/g;
+	$pat =~ s/R/\[$PURINES\]/g;
+	$pat =~ s/Y/\[$PYRIMIDINES\]/g;
+	$pat =~ s/S/\[GC\]/g;
+	$pat =~ s/W/\[AT\]/g;
+	$pat =~ s/M/\[AC\]/g;
+	$pat =~ s/K/\[GT\]/g;
+	$pat =~ s/V/\[ACG\]/g;
+	$pat =~ s/H/\[ACT\]/g;
+	$pat =~ s/D/\[AGT\]/g;
+	$pat =~ s/B/\[CGT\]/g;
+    } else {
+	$pat =~ s/R/\[$PURINES\]/g;
+	$pat =~ s/Y/\[$PYRIMIDINES\]/g;
+	$pat =~ s/S/\[GC\]/g;
+	$pat =~ s/W/\[AT\]/g;
+	$pat =~ s/M/\[AC\]/g;
+	$pat =~ s/K/\[GT\]/g;
+	$pat =~ s/V/\[ACG\]/g;
+	$pat =~ s/H/\[ACT\]/g;
+	$pat =~ s/D/\[AGT\]/g;
+	$pat =~ s/B/\[CGT\]/g;
+    }
+    $pat =~ s/\((.)\)/$1/g;  ## Doing thses last since:
+    $pat =~ s/\[(.)\]/$1/g;  ## Pattern could contain [y] (for example)
+
+    return $pat;
+}
+
+
+
+=head1 revcom
+
+ Title     : revcom
+ Usage     : revcom([1]);
+ Purpose   : Forms a pattern capable of recognizing the reverse complement
+           : version of a nucleotide sequence pattern.
+ Example   : $pattern_object->revcom();
+           : $pattern_object->revcom(1); ## returns expanded rev complement pattern.
+ Returns   : Object reference for a new Bio::Tools::SeqPattern containing
+           : the revcom of the current pattern as its sequence.
+ Argument  : (1) boolean (optional) (default= false)
+           :     true : expand the pattern before rev-complementing.
+           :     false: don't expand pattern before or after rev-complementing.
+ Throws    : Exception if called for amino acid sequence pattern.
+ Comments  : This method permits the simultaneous searching of both
+           : sense and anti-sense versions of a nucleotide pattern
+           : by means of a grep-type of functionality in which any
+           : number of patterns may be or-ed into the recognition
+           : pattern.
+           : Overrides Bio::Seq::revcom() and calls it first thing.
+           : The order of _fixpat() calls is critical.
+
+See Also   : L<Bio::Seq::revcom()>, L<_fixpat_1>(), L<_fixpat_2>(), L<_fixpat_3>(), L<_fixpat_4>(), L<_fixpat_5>()
+
+=cut
+
+#-----------'
+sub revcom {
+#-----------
+    my($self,$expand) = @_;
+
+    if ($self->type !~ /Dna|Rna/i) {
+	$self->throw("Can't get revcom for ${\$self->type} sequence types.\n");
+    }
+#    return $self->{'_rev'} if defined $self->{'_rev'};
+
+    $expand ||= 0;
+    my $str = $self->str;
+    $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+    my $rev = CORE::reverse $str;
+    $rev    =~ tr/[](){}<>/][)(}{></;
+
+    if($expand) {
+	$rev = $self->_expand_nuc($rev);
+#	print "\nExpanded: $rev\n";
+    }
+
+    %Processed_braces = ();
+    %Processed_asterics = ();
+
+    my $fixrev = _fixpat_1($rev);
+#   print "FIX 1: $fixrev";<STDIN>;
+
+     $fixrev = _fixpat_2($fixrev);
+#   print "FIX 2: $fixrev";<STDIN>;
+
+     $fixrev = _fixpat_3($fixrev);
+#    print "FIX 3: $fixrev";<STDIN>;
+
+     $fixrev = _fixpat_4($fixrev);
+#    print "FIX 4: $fixrev";<STDIN>;
+
+     $fixrev = _fixpat_5($fixrev);
+#    print "FIX 5: $fixrev";<STDIN>;
+
+##### Added by ps 8/7/00 to allow non-greedy matching
+     $fixrev = _fixpat_6($fixrev);
+#    print "FIX 6: $fixrev";<STDIN>;
+
+#    $self->{'_rev'} = $fixrev;
+
+     return new Bio::Tools::SeqPattern(-seq =>$fixrev, -type =>$self->type);
+}
+
+
+
+=head1 _fixpat_1
+
+ Title     : _fixpat_1
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all {7,5} --> {5,7}     (Part I)
+           :           and [T^] --> [^T]      (Part II)
+           :           and *N   --> N*        (Part III)
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#--------------
+sub _fixpat_1 {
+#--------------
+    my $pat = shift;
+
+    ## Part I:
+    my (@done, at parts);
+    while(1) {
+	$pat =~ /(.*)\{(\S+?)\}(.*)/ or do{ push @done, $pat; last; };
+	$pat = $1.'#{'.reverse($2).'}'.$3;
+#	print "1: $1\n2: $2\n3: $3\n";
+#	print "modified pat: $pat";<STDIN>;
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    $pat = join('', reverse @done);
+
+    ## Part II:
+    @done = ();
+    while(1) {
+	$pat =~ /(.*)\[(\S+?)\](.*)/ or do{ push @done, $pat; last; };
+	$pat = $1.'#['.reverse($2).']'.$3;
+#	print "1: $1\n2: $2\n3: $3\n";
+#	print "modified pat: $pat";<STDIN>;
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    $pat = join('', reverse @done);
+
+    ## Part III:
+    @done = ();
+    while(1) {
+	$pat =~ /(.*)\*([\w.])(.*)/ or do{ push @done, $pat; last; };
+	$pat = $1.'#'.$2.'*'.$3;
+	$Processed_asterics{$2}++;
+#	print "1: $1\n2: $2\n3: $3\n";
+#	print "modified pat: $pat";<STDIN>;
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+
+}
+
+
+=head1 _fixpat_2
+
+ Title     : _fixpat_2
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all {5,7}Y ---> Y{5,7}
+           :          and {10,}. ---> .{10,}
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded, partially reversed pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#--------------
+sub _fixpat_2 {
+#--------------
+    my $pat = shift;
+
+    local($^W) = 0;
+    my (@done, at parts,$braces);
+    while(1) {
+#	$pat =~ s/(.*)([^])])(\{\S+?\})([\w.])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
+	$pat =~ s/(.*)(\{\S+?\})([\w.])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
+	$braces = $2;
+	$braces =~ s/[{}]//g;
+	$Processed_braces{"$3$braces"}++;
+#	print "modified pat: $pat";<STDIN>;
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+}
+
+
+=head1 _fixpat_3
+
+ Title     : _fixpat_3
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all {5,7}(XXX) ---> (XXX){5,7}
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded, partially reversed pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#-------------
+sub _fixpat_3 {
+#-------------
+    my $pat = shift;
+
+    my (@done, at parts,$braces,$newpat,$oldpat);
+    while(1) {
+#	$pat =~ s/(.+)(\{\S+\})(\(\w+\))(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
+	if( $pat =~ /(.*)(.)(\{\S+\})(\(\w+\))(.*)/) {
+	    $newpat = "$1#$2$4$3$5";
+##ps	    $oldpat = "$1#$2$3$4$5";
+#	    print "1: $1\n2: $2\n3: $3\n4: $4\n5: $5\n";
+##ps	    $braces = $3;
+##ps	    $braces =~ s/[{}]//g;
+##ps	    if( exists $Processed_braces{"$2$braces"} || exists $Processed_asterics{$2}) {
+##ps		$pat = $oldpat;  # Don't change it. Already processed.
+#		print "saved pat: $pat";<STDIN>;
+##ps	    } else {
+#		print "new pat: $newpat";<STDIN>;
+		$pat = $newpat;  # Change it.
+##ps	    }
+	} elsif( $pat =~ /^(\{\S+\})(\(\w+\))(.*)/) {
+	    $pat = "#$2$1$3";
+	} else {
+	    push @done, $pat; last;
+	}
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+}
+
+
+=head1 _fixpat_4
+
+ Title     : _fixpat_4
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all {5,7}[XXX] ---> [XXX]{5,7}
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded, partially reversed  pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#---------------
+sub _fixpat_4 {
+#---------------
+    my $pat = shift;
+
+    my (@done, at parts,$braces,$newpat,$oldpat);
+    while(1) {
+#	$pat =~ s/(.*)(\{\S+\})(\[\w+\])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
+#	$pat =~ s/(.*)([^\w.])(\{\S+\})(\[\w+\])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
+	if( $pat =~ /(.*)(.)(\{\S+\})(\[\w+\])(.*)/) {
+	    $newpat = "$1#$2$4$3$5";
+	    $oldpat = "$1#$2$3$4$5";
+#	    print "1: $1\n2: $2\n3: $3\n4: $4\n5: $5\n";
+	    $braces = $3;
+	    $braces =~ s/[{}]//g;
+	    if( (defined $braces and defined $2) and
+		exists $Processed_braces{"$2$braces"} || exists $Processed_asterics{$2}) {
+		$pat = $oldpat;  # Don't change it. Already processed.
+#		print "saved pat: $pat";<STDIN>;
+	    } else {
+		$pat = $newpat;  # Change it.
+#		print "new pat: $pat";<STDIN>;
+	    }
+	} elsif( $pat =~ /^(\{\S+\})(\[\w+\])(.*)/) {
+	    $pat = "#$2$1$3";
+	} else {
+	    push @done, $pat; last;
+	}
+
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+}
+
+
+=head1 _fixpat_5
+
+ Title     : _fixpat_5
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all *[XXX]  ---> [XXX]*
+           :          and *(XXX)  ---> (XXX)*
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded, partially reversed pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#--------------
+sub _fixpat_5 {
+#--------------
+    my $pat = shift;
+
+    my (@done, at parts,$newpat,$oldpat);
+    while(1) {
+#	$pat =~ s/(.*)(\{\S+\})(\[\w+\])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
+#	$pat =~ s/(.*)([^\w.])(\{\S+\})(\[\w+\])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
+	if( $pat =~ /(.*)(.)\*(\[\w+\]|\(\w+\))(.*)/) {
+	    $newpat = "$1#$2$3*$4";
+	    $oldpat = "$1#$2*$3$4";
+#	    print "1: $1\n2: $2\n3: $3\n4: $4\n";
+	    if( exists $Processed_asterics{$2}) {
+		$pat = $oldpat;  # Don't change it. Already processed.
+#		print "saved pat: $pat";<STDIN>;
+	    } else {
+		$pat = $newpat;  # Change it.
+#		print "new pat: $pat";<STDIN>;
+	    }
+	} elsif( $pat =~ /^\*(\[\w+\]|\(\w+\))(.*)/) {
+	    $pat = "#$1*$3";
+	} else {
+	    push @done, $pat; last;
+	}
+
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+}
+
+
+
+
+
+############################
+#
+#  PS: Added 8/7/00 to allow non-greedy matching patterns
+#
+######################################
+
+=head1 _fixpat_6
+
+ Title     : _fixpat_6
+ Usage     : n/a; called automatically by revcom()
+ Purpose   : Utility method for revcom()
+           : Converts all ?Y{5,7}  ---> Y{5,7}?
+           :          and ?(XXX){5,7}  ---> (XXX){5,7}?
+           :          and ?[XYZ]{5,7}  ---> [XYZ]{5,7}?
+ Returns   : String (the new, partially reversed pattern)
+ Argument  : String (the expanded, partially reversed pattern)
+ Throws    : n/a
+
+See Also   : L<revcom>()
+
+=cut
+
+#--------------
+sub _fixpat_6 {
+#--------------
+    my $pat = shift;
+    my (@done, at parts);
+
+   @done = ();
+    while(1) {
+	$pat =~   /(.*)\?(\[\w+\]|\(\w+\)|\w)(\{\S+?\})?(.*)/ or do{ push @done, $pat; last; };
+     my $quantifier = $3 ? $3 : ""; # Shut up warning if no explicit quantifier
+ 	$pat = $1.'#'.$2.$quantifier.'?'.$4;
+#	$pat = $1.'#'.$2.$3.'?'.$4;
+
+#	print "1: $1\n2: $2\n3: $3\n";
+#	print "modified pat: $pat";<STDIN>;
+	@parts = split '#', $pat;
+	push @done, $parts[1];
+	$pat = $parts[0];
+#	print "done: $parts[1]<---\nnew pat: $pat<---";<STDIN>;
+	last if not $pat;
+    }
+    return join('', reverse @done);
+
+ }
+
+=head2 str
+
+ Title   : str
+ Usage   : $obj->str($newval)
+ Function:
+ Returns : value of str
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub str{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'str'} = $value;
+    }
+    return $obj->{'str'};
+
+}
+
+=head2 type
+
+ Title   : type
+ Usage   : $obj->type($newval)
+ Function:
+ Returns : value of type
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub type{
+   my $obj = shift;
+   if( @_ ) {
+      my $value = shift;
+      $obj->{'type'} = $value;
+    }
+    return $obj->{'type'};
+
+}
+
+1;
+
+__END__
+
+#########################################################################
+#  End of class
+#########################################################################
+
+=head1 FOR DEVELOPERS ONLY
+
+=head2 Data Members
+
+Information about the various data members of this module is provided
+for those wishing to modify or understand the code. Two things to bear
+in mind:
+
+=over 2
+
+=item 1 Do NOT rely on these in any code outside of this module.
+
+All data members are prefixed with an underscore to signify that they
+are private.  Always use accessor methods. If the accessor doesn't
+exist or is inadequate, create or modify an accessor (and let me know,
+too!).
+
+=item 2 This documentation may be incomplete and out of date.
+
+It is easy for this documentation to become obsolete as this module is
+still evolving.  Always double check this info and search for members
+not described here.
+
+=back
+
+An instance of Bio::Tools::RestrictionEnzyme.pm is a blessed reference
+to a hash containing all or some of the following fields:
+
+ FIELD          VALUE
+ ------------------------------------------------------------------------
+ _rev     : The corrected reverse complement of the fully expanded pattern.
+
+ INHERITED DATA MEMBERS:
+
+ _seq     : (From Bio::Seq.pm) The original, unexpanded input sequence after untainting.
+ _type    : (From Bio::Seq.pm) 'Dna' or 'Amino'
+
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqStats.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqStats.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqStats.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,875 @@
+# $Id: SeqStats.pm,v 1.30.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::SeqStats
+#
+# Cared for by
+#
+# Copyright Peter Schattner
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::SeqStats - Object holding statistics for one 
+particular sequence
+
+=head1 SYNOPSIS
+
+  # build a primary nucleic acid or protein sequence object somehow
+  # then build a statistics object from the sequence object
+
+  $seqobj = Bio::PrimarySeq->new(-seq      => 'ACTGTGGCGTCAACTG',
+                                 -alphabet => 'dna',
+                                 -id       => 'test');
+  $seq_stats  =  Bio::Tools::SeqStats->new(-seq => $seqobj);
+
+  # obtain a hash of counts of each type of monomer
+  # (i.e. amino or nucleic acid)
+  print "\nMonomer counts using statistics object\n";
+  $seq_stats  =  Bio::Tools::SeqStats->new(-seq=>$seqobj);
+  $hash_ref = $seq_stats->count_monomers();  # e.g. for DNA sequence
+  foreach $base (sort keys %$hash_ref) {
+      print "Number of bases of type ", $base, "= ", 
+         %$hash_ref->{$base},"\n";
+  }
+
+  # obtain the count directly without creating a new statistics object
+  print "\nMonomer counts without statistics object\n";
+  $hash_ref = Bio::Tools::SeqStats->count_monomers($seqobj);
+  foreach $base (sort keys %$hash_ref) {
+      print "Number of bases of type ", $base, "= ", 
+         %$hash_ref->{$base},"\n";
+  }
+
+
+  # obtain hash of counts of each type of codon in a nucleic acid sequence
+  print "\nCodon counts using statistics object\n";
+  $hash_ref = $seq_stats-> count_codons();  # for nucleic acid sequence
+  foreach $base (sort keys %$hash_ref) {
+      print "Number of codons of type ", $base, "= ", 
+         %$hash_ref->{$base},"\n";
+  }
+
+  #  or
+  print "\nCodon counts without statistics object\n";
+  $hash_ref = Bio::Tools::SeqStats->count_codons($seqobj);
+  foreach $base (sort keys %$hash_ref) {
+      print "Number of codons of type ", $base, "= ", 
+         %$hash_ref->{$base},"\n";
+  }
+
+  # Obtain the molecular weight of a sequence. Since the sequence 
+  # may contain ambiguous monomers, the molecular weight is returned 
+  # as a (reference to) a two element array containing greatest lower 
+  # bound (GLB) and least upper bound (LUB) of the molecular weight
+  $weight = $seq_stats->get_mol_wt();
+  print "\nMolecular weight (using statistics object) of sequence ", 
+          $seqobj->id(), " is between ", $$weight[0], " and " ,
+          $$weight[1], "\n";
+
+  #  or
+  $weight = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+  print "\nMolecular weight (without statistics object) of sequence ", 
+        $seqobj->id(), " is between ", $$weight[0], " and " ,
+        $$weight[1], "\n";
+
+  # Calculate mean Kyte-Doolittle hydropathicity (aka "gravy" score)
+  my $prot = Bio::PrimarySeq->new(-seq=>'MSFVLVAPDMLATAAADVVQIGSAVSAGS',
+                                  -alphabet=>'protein');
+  my $gravy = Bio::Tools::SeqStats->hydropathicity($seqobj);
+  print "might be hydropathic" if $gravy > 1;  
+
+=head1 DESCRIPTION
+
+Bio::Tools::SeqStats is a lightweight object for the calculation of
+simple statistical and numerical properties of a sequence. By
+"lightweight" I mean that only "primary" sequences are handled by the
+object.  The calling script needs to create the appropriate primary
+sequence to be passed to SeqStats if statistics on a sequence feature
+are required.  Similarly if a codon count is desired for a
+frame-shifted sequence and/or a negative strand sequence, the calling
+script needs to create that sequence and pass it to the SeqStats
+object.
+
+Nota that nucleotide sequences in bioperl do not strictly separate RNA
+and DNA sequences. By convention, sequences from RNA molecules are
+shown as is they were DNA. Objects are supposed to make the
+distinction when needed. This class is one of the few where this
+distinctions needs to be made. Internally, it changes all Ts into Us
+before weight and monomer count.
+
+SeqStats can be called in two distinct manners.  If only a single
+computation is required on a given sequence object, the method can be
+called easily using the SeqStats object directly:
+
+  $weight = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+
+Alternately, if several computations will be required on a given
+sequence object, an "instance" statistics object can be constructed
+and used for the method calls:
+
+  $seq_stats = Bio::Tools::SeqStats->new($seqobj);
+  $monomers = $seq_stats->count_monomers();
+  $codons = $seq_stats->count_codons();
+  $weight = $seq_stats->get_mol_wt();
+  $gravy = $seq_stats->hydropathicity();
+
+As currently implemented the object can return the following values
+from a sequence:
+
+=over
+
+=item *
+
+The molecular weight of the sequence: get_mol_wt()
+
+=item *
+
+The number of each type of monomer present: count_monomers()
+
+=item *
+
+The number of each codon present in a nucleic acid sequence:
+count_codons()
+
+=item *
+
+The mean hydropathicity ("gravy" score) of a protein:
+hydropathicity()
+
+=back
+
+For DNA and RNA sequences single-stranded weights are returned. The
+molecular weights are calculated for neutral, or not ionized,
+nucleic acids. The returned weight is the sum of the
+base-sugar-phosphate residues of the chain plus one weight of water to
+to account for the additional OH on the phosphate of the 5' residue
+and the additional H on the sugar ring of the 3' residue.  Note that
+this leads to a difference of 18 in calculated molecular weights
+compared to some other available programs (e.g. Informax VectorNTI).
+
+Note that since sequences may contain ambiguous monomers (e.g. "M",
+meaning "A" or "C" in a nucleic acid sequence), the method get_mol_wt
+returns a two-element array containing the greatest lower bound and
+least upper bound of the molecule. For a sequence with no ambiguous
+monomers, the two elements of the returned array will be equal. The
+method count_codons() handles ambiguous bases by simply counting all
+ambiguous codons together and issuing a warning to that effect.
+
+
+=head1 DEVELOPERS NOTES
+
+Ewan moved it from Bio::SeqStats to Bio::Tools::SeqStats
+
+Heikki made tiny adjustments (+/- 0.01 daltons) to amino acid
+molecular weights to have the output match values in SWISS-PROT.
+
+Torsten added hydropathicity calculation.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Peter Schattner
+
+Email schattner AT alum.mit.edu
+
+=head1 CONTRIBUTOR - Torsten Seemann 
+
+Email torsten.seemann AT infotech.monash.edu.au
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+package Bio::Tools::SeqStats;
+use strict;
+use vars qw(%Alphabets %Alphabets_strict $amino_weights
+	    $rna_weights $dna_weights %Weights $amino_hydropathicity);
+use Bio::Seq;
+use base qw(Bio::Root::Root);
+
+BEGIN {
+	%Alphabets =   (
+			 'dna'     => [ qw(A C G T R Y M K S W H B V D X N) ],
+		    'rna'     => [ qw(A C G U R Y M K S W H B V D X N) ],
+		    'protein' => [ qw(A R N D C Q E G H I L K M F U
+									 P S T W X Y V B Z *) ], # sac: added B, Z
+						);
+
+# SAC: new strict alphabet: doesn't allow any ambiguity characters.
+    %Alphabets_strict = (
+			 'dna'     => [ qw( A C G T ) ],
+			 'rna'     => [ qw( A C G U ) ],
+			 'protein'    => [ qw(A R N D C Q E G H I L K M F U
+					      P S T W Y V) ],
+			 );
+
+
+#  IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
+#   Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
+
+#  Amino Acid alphabet
+
+# ------------------------------------------
+# Symbol           Meaning
+# ------------------------------------------
+
+    my $amino_A_wt = 89.09;
+    my $amino_C_wt = 121.15;
+    my $amino_D_wt = 133.1;
+    my $amino_E_wt = 147.13;
+    my $amino_F_wt = 165.19;
+    my $amino_G_wt = 75.07;
+    my $amino_H_wt = 155.16;
+    my $amino_I_wt = 131.17;
+    my $amino_K_wt = 146.19;
+    my $amino_L_wt = 131.17;
+    my $amino_M_wt = 149.21;
+    my $amino_N_wt = 132.12;
+    my $amino_P_wt = 115.13;
+    my $amino_Q_wt = 146.15;
+    my $amino_R_wt = 174.20;
+    my $amino_S_wt = 105.09;
+    my $amino_T_wt = 119.12;
+    my $amino_U_wt = 168.06;
+    my $amino_V_wt = 117.15;
+    my $amino_W_wt = 204.23;
+    my $amino_Y_wt = 181.19;
+
+
+    $amino_weights = {
+	'A'     => [$amino_A_wt, $amino_A_wt], # Alanine
+	'B'     => [$amino_N_wt, $amino_D_wt], # Aspartic Acid, Asparagine
+	'C'     => [$amino_C_wt, $amino_C_wt], # Cysteine
+	'D'     => [$amino_D_wt, $amino_D_wt], # Aspartic Acid
+	'E'     => [$amino_E_wt, $amino_E_wt], # Glutamic Acid
+	'F'     => [$amino_F_wt, $amino_F_wt], # Phenylalanine
+	'G'     => [$amino_G_wt, $amino_G_wt], # Glycine
+	'H'     => [$amino_H_wt, $amino_H_wt], # Histidine
+	'I'     => [$amino_I_wt, $amino_I_wt], # Isoleucine
+	'K'     => [$amino_K_wt, $amino_K_wt], # Lysine
+	'L'     => [$amino_L_wt, $amino_L_wt], # Leucine
+	'M'     => [$amino_M_wt, $amino_M_wt], # Methionine
+	'N'     => [$amino_N_wt, $amino_N_wt], # Asparagine
+	'P'     => [$amino_P_wt, $amino_P_wt], # Proline
+	'Q'     => [$amino_Q_wt, $amino_Q_wt], # Glutamine
+	'R'     => [$amino_R_wt, $amino_R_wt], # Arginine
+	'S'     => [$amino_S_wt, $amino_S_wt], # Serine
+	'T'     => [$amino_T_wt, $amino_T_wt], # Threonine
+	'U'     => [$amino_U_wt, $amino_U_wt], # SelenoCysteine
+	'V'     => [$amino_V_wt, $amino_V_wt], # Valine
+	'W'     => [$amino_W_wt, $amino_W_wt], # Tryptophan
+	'X'     => [$amino_G_wt, $amino_W_wt], # Unknown
+	'Y'     => [$amino_Y_wt, $amino_Y_wt], # Tyrosine
+	'Z'     => [$amino_Q_wt, $amino_E_wt], # Glutamic Acid, Glutamine
+    };
+
+    # Extended Dna / Rna alphabet
+    use vars ( qw($C $O $N $H $P $water) );
+    use vars ( qw($adenine   $guanine   $cytosine   $thymine   $uracil));
+    use vars ( qw($ribose_phosphate   $deoxyribose_phosphate   $ppi));
+    use vars ( qw($dna_A_wt   $dna_C_wt   $dna_G_wt  $dna_T_wt
+		  $rna_A_wt   $rna_C_wt   $rna_G_wt   $rna_U_wt));
+    use vars ( qw($dna_weights   $rna_weights   %Weights));
+
+    $C = 12.01;
+    $O = 16.00;
+    $N = 14.01;
+    $H = 1.01;
+    $P = 30.97;
+    $water = 18.015;
+
+    $adenine = 5 * $C + 5 * $N + 5 * $H;
+    $guanine = 5 * $C + 5 * $N + 1 * $O + 5 * $H;
+    $cytosine = 4 * $C + 3 * $N + 1 * $O + 5 * $H;
+    $thymine = 5 * $C + 2 * $N + 2 * $O + 6 * $H;
+    $uracil = 4 * $C + 2 * $N + 2 * $O + 4 * $H;
+
+    $ribose_phosphate = 5 * $C + 7 * $O + 9 * $H + 1 * $P;
+    # neutral (unionized) form
+    $deoxyribose_phosphate = 5 * $C + 6 * $O + 9 * $H + 1 * $P;
+
+    # the following are single strand molecular weights / base
+    $dna_A_wt = $adenine + $deoxyribose_phosphate - $water;
+    $dna_C_wt = $cytosine + $deoxyribose_phosphate - $water;
+    $dna_G_wt = $guanine + $deoxyribose_phosphate - $water;
+    $dna_T_wt = $thymine + $deoxyribose_phosphate - $water;
+
+    $rna_A_wt = $adenine + $ribose_phosphate - $water;
+    $rna_C_wt = $cytosine + $ribose_phosphate - $water;
+    $rna_G_wt = $guanine + $ribose_phosphate - $water;
+    $rna_U_wt = $uracil + $ribose_phosphate - $water;
+
+    $dna_weights = {
+	'A'             => [$dna_A_wt,$dna_A_wt],            # Adenine
+	'C'             => [$dna_C_wt,$dna_C_wt],            # Cytosine
+	'G'             => [$dna_G_wt,$dna_G_wt],            # Guanine
+	'T'             => [$dna_T_wt,$dna_T_wt],            # Thymine
+	'M'             => [$dna_C_wt,$dna_A_wt],            # A or C
+	'R'             => [$dna_A_wt,$dna_G_wt],            # A or G
+	'W'             => [$dna_T_wt,$dna_A_wt],            # A or T
+	'S'             => [$dna_C_wt,$dna_G_wt],            # C or G
+	'Y'             => [$dna_C_wt,$dna_T_wt],            # C or T
+	'K'             => [$dna_T_wt,$dna_G_wt],            # G or T
+	'V'             => [$dna_C_wt,$dna_G_wt],            # A or C or G
+	'H'             => [$dna_C_wt,$dna_A_wt],            # A or C or T
+	'D'             => [$dna_T_wt,$dna_G_wt],            # A or G or T
+	'B'             => [$dna_C_wt,$dna_G_wt],            # C or G or T
+	'X'             => [$dna_C_wt,$dna_G_wt],            # G or A or T or C
+	'N'             => [$dna_C_wt,$dna_G_wt],            # G or A or T or C
+    };
+
+    $rna_weights =  {
+	'A'             => [$rna_A_wt,$rna_A_wt],            # Adenine
+	'C'             => [$rna_C_wt,$rna_C_wt],            # Cytosine
+	'G'             => [$rna_G_wt,$rna_G_wt],            # Guanine
+	'U'             => [$rna_U_wt,$rna_U_wt],            # Uracil
+	'M'             => [$rna_C_wt,$rna_A_wt],            # A or C
+	'R'             => [$rna_A_wt,$rna_G_wt],            # A or G
+	'W'             => [$rna_U_wt,$rna_A_wt],            # A or U
+	'S'             => [$rna_C_wt,$rna_G_wt],            # C or G
+	'Y'             => [$rna_C_wt,$rna_U_wt],            # C or U
+	'K'             => [$rna_U_wt,$rna_G_wt],            # G or U
+	'V'             => [$rna_C_wt,$rna_G_wt],            # A or C or G
+	'H'             => [$rna_C_wt,$rna_A_wt],            # A or C or U
+	'D'             => [$rna_U_wt,$rna_G_wt],            # A or G or U
+	'B'             => [$rna_C_wt,$rna_G_wt],            # C or G or U
+	'X'             => [$rna_C_wt,$rna_G_wt],            # G or A or U or C
+	'N'             => [$rna_C_wt,$rna_G_wt],            # G or A or U or C
+    };
+
+    %Weights =   (
+		  'dna'     =>  $dna_weights,
+		  'rna'     =>  $rna_weights,
+		  'protein' =>  $amino_weights,
+		  );
+
+    $amino_weights = {
+	'A'     => [$amino_A_wt, $amino_A_wt], # Alanine
+	'B'     => [$amino_N_wt, $amino_D_wt], # Aspartic Acid, Asparagine
+	'C'     => [$amino_C_wt, $amino_C_wt], # Cysteine
+	'D'     => [$amino_D_wt, $amino_D_wt], # Aspartic Acid
+	'E'     => [$amino_E_wt, $amino_E_wt], # Glutamic Acid
+	'F'     => [$amino_F_wt, $amino_F_wt], # Phenylalanine
+	'G'     => [$amino_G_wt, $amino_G_wt], # Glycine
+	'H'     => [$amino_H_wt, $amino_H_wt], # Histidine
+	'I'     => [$amino_I_wt, $amino_I_wt], # Isoleucine
+	'K'     => [$amino_K_wt, $amino_K_wt], # Lysine
+	'L'     => [$amino_L_wt, $amino_L_wt], # Leucine
+	'M'     => [$amino_M_wt, $amino_M_wt], # Methionine
+	'N'     => [$amino_N_wt, $amino_N_wt], # Asparagine
+	'P'     => [$amino_P_wt, $amino_P_wt], # Proline
+	'Q'     => [$amino_Q_wt, $amino_Q_wt], # Glutamine
+	'R'     => [$amino_R_wt, $amino_R_wt], # Arginine
+	'S'     => [$amino_S_wt, $amino_S_wt], # Serine
+	'T'     => [$amino_T_wt, $amino_T_wt], # Threonine
+	'U'     => [$amino_U_wt, $amino_U_wt], # SelenoCysteine
+	'V'     => [$amino_V_wt, $amino_V_wt], # Valine
+	'W'     => [$amino_W_wt, $amino_W_wt], # Tryptophan
+	'X'     => [$amino_G_wt, $amino_W_wt], # Unknown
+	'Y'     => [$amino_Y_wt, $amino_Y_wt], # Tyrosine
+	'Z'     => [$amino_Q_wt, $amino_E_wt], # Glutamic Acid, Glutamine
+	};
+	
+	# Amino acid scale: Hydropathicity.
+	# Ref: Kyte J., Doolittle R.F. J. Mol. Biol. 157:105-132(1982).
+	# http://au.expasy.org/tools/pscale/Hphob.Doolittle.html
+	
+	$amino_hydropathicity = {
+    A =>  1.800,  
+    R => -4.500,  
+    N => -3.500,  
+    D => -3.500,  
+    C =>  2.500,  
+    Q => -3.500,  
+    E => -3.500,  
+    G => -0.400,  
+    H => -3.200,  
+    I =>  4.500,  
+    L =>  3.800,  
+    K => -3.900,  
+    M =>  1.900,  
+    F =>  2.800,  
+    P => -1.600,  
+    S => -0.800,  
+    T => -0.700,  
+    W => -0.900,  
+    Y => -1.300,  
+    V =>  4.200,  
+	};
+
+}
+
+sub new {
+	my($class, at args) = @_;
+	my $self = $class->SUPER::new(@args);
+
+	my ($seqobj) = $self->_rearrange([qw(SEQ)], at args);
+	unless  ($seqobj->isa("Bio::PrimarySeqI")) {
+		$self->throw("SeqStats works only on PrimarySeqI objects");
+	}
+	if ( !defined $seqobj->alphabet || 
+		  !defined $Alphabets{$seqobj->alphabet}) {
+		$self->throw("Must have a valid alphabet defined for seq (".
+						 join(",",keys %Alphabets));
+	}
+	$self->{'_seqref'} = $seqobj;
+	# check the letters in the sequence
+	$self->{'_is_strict'} = _is_alphabet_strict($seqobj); 
+	return $self;
+}
+
+=head2 count_monomers
+
+ Title   : count_monomers
+ Usage   : $rcount = $seq_stats->count_monomers();
+           or $rcount = $seq_stats->Bio::Tools::SeqStats->($seqobj);
+ Function: Counts the number of each type of monomer (amino acid or
+	        base) in the sequence.
+           Ts are counted as Us in RNA sequences.
+ Example :
+ Returns : Reference to a hash in which keys are letters of the
+           genetic alphabet used and values are number of occurrences
+           of the letter in the sequence.
+ Args    : None or reference to sequence object
+ Throws  : Throws an exception if type of sequence is unknown (ie amino
+           or nucleic)or if unknown letter in alphabet. Ambiguous
+           elements are allowed.
+
+=cut
+
+sub count_monomers{
+	my %count  = ();
+	my $seqobj;
+	my $_is_strict;
+	my $element = '';
+	my $_is_instance = 1 ;
+	my $self = shift @_;
+	my $object_argument = shift @_;
+
+	# First we need to determine if the present object is an instance
+	# object or if the sequence object has been passed as an argument
+
+	if (defined $object_argument) {
+		$_is_instance = 0;
+	}
+
+	# If we are using an instance object...
+	if ($_is_instance) {
+		if ($self->{'_monomer_count'}) {
+			return $self->{'_monomer_count'}; # return count if previously calculated
+		}
+		$_is_strict =  $self->{'_is_strict'}; # retrieve "strictness"
+		$seqobj =  $self->{'_seqref'};
+	} else {
+		#  otherwise...
+		$seqobj =  $object_argument;
+
+		#  Following two lines lead to error in "throw" routine
+		$seqobj->isa("Bio::PrimarySeqI") ||
+		  $self->throw("SeqStats works only on PrimarySeqI objects");
+		# is alphabet OK? Is it strict?
+		$_is_strict =  _is_alphabet_strict($seqobj);
+	}
+
+	my $alphabet =  $_is_strict ? $Alphabets_strict{$seqobj->alphabet} :
+	  $Alphabets{$seqobj->alphabet}  ; # get array of allowed letters
+
+	# convert everything to upper case to be safe
+	my $seqstring = uc $seqobj->seq();
+
+	# Since T is used in RichSeq RNA sequences, do conversion locally
+	$seqstring =~ s/T/U/g if $seqobj->alphabet eq 'rna';
+
+	#  For each letter, count the number of times it appears in
+	#  the sequence
+ LETTER:
+	foreach $element (@$alphabet) {
+		# skip terminator symbol which may confuse regex
+		next LETTER if $element eq '*';
+		$count{$element} = ( $seqstring =~ s/$element/$element/g);
+	}
+
+	if ($_is_instance) {
+		$self->{'_monomer_count'} = \%count;  # Save in case called again later
+	}
+
+	return \%count;
+}
+
+=head2  get_mol_wt
+
+ Title   : get_mol_wt
+ Usage   : $wt = $seqobj->get_mol_wt() or
+           $wt = Bio::Tools::SeqStats ->get_mol_wt($seqobj);
+ Function: Calculate molecular weight of sequence
+           Ts are counted as Us in RNA sequences.
+ Example :
+
+ Returns : Reference to two element array containing lower and upper
+           bounds of molecule molecular weight. For DNA and RNA
+           sequences single-stranded weights are returned. If
+           sequence contains no ambiguous elements, both entries in
+           array are equal to molecular weight of molecule.
+ Args    : None or reference to sequence object
+ Throws  : Exception if type of sequence is unknown (ie not amino or
+           nucleic) or if unknown letter in alphabet. Ambiguous
+           elements are allowed.
+
+=cut
+
+sub get_mol_wt {
+	my $seqobj;
+	my $_is_strict;
+	my $element = '';
+	my $_is_instance = 1 ;
+	my $self = shift @_;
+	my $object_argument = shift @_;
+	my ($weight_array, $rcount);
+
+	if (defined $object_argument) {
+		$_is_instance = 0;
+	}
+
+	if ($_is_instance) {
+		if ($weight_array = $self->{'_mol_wt'}) {
+			# return mol. weight if previously calculated
+			return $weight_array;
+		}
+		$seqobj =  $self->{'_seqref'};
+		$rcount = $self->count_monomers();
+	} else {
+		$seqobj =  $object_argument;
+		$seqobj->isa("Bio::PrimarySeqI") ||
+		  $self->throw("Error: SeqStats works only on PrimarySeqI objects");
+		$_is_strict =  _is_alphabet_strict($seqobj); # is alphabet OK?
+		$rcount =  $self->count_monomers($seqobj);
+	}
+
+	# We will also need to know what type of monomer we are dealing with
+	my $moltype = $seqobj->alphabet();
+
+	# In general,the molecular weight is bounded below by the sum of the
+	# weights of lower bounds of each alphabet symbol times the number of
+	# occurrences of the symbol in the sequence. A similar upper bound on
+	# the weight is also calculated.
+
+	# Note that for "strict" (i.e. unambiguous) sequences there is an
+	# inefficiency since the upper bound = the lower bound and there are
+	# two calculations.  However, this decrease in performance will be
+	# minor and leads to significantly more readable code.
+
+	my $weight_lower_bound = 0;
+	my $weight_upper_bound = 0;
+	my $weight_table =  $Weights{$moltype};
+
+	# compute weight of all the residues
+	foreach $element (keys %$rcount) {
+		$weight_lower_bound += $$rcount{$element} * $$weight_table{$element}->[0];
+		$weight_upper_bound += $$rcount{$element} * $$weight_table{$element}->[1];
+	}
+	if ($moltype =~ /protein/) {
+    	# remove H2O during peptide bond formation.
+    	$weight_lower_bound -= $water * ($seqobj->length - 1);
+    	$weight_upper_bound -= $water * ($seqobj->length - 1);
+	} else {
+    	# Correction because phosphate of 5' residue has additional OH and
+    	# sugar ring of 3' residue has additional H
+    	$weight_lower_bound += $water;
+    	$weight_upper_bound += $water;
+	}
+
+	$weight_lower_bound = sprintf("%.1f", $weight_lower_bound);
+	$weight_upper_bound = sprintf("%.1f", $weight_upper_bound);
+
+	$weight_array = [$weight_lower_bound, $weight_upper_bound];
+
+	if ($_is_instance) {
+		$self->{'_mol_wt'} = $weight_array;  # Save in case called again later
+	}
+	return $weight_array;
+}
+
+
+=head2  count_codons
+
+ Title   : count_codons
+ Usage   : $rcount = $seqstats->count_codons(); or
+           $rcount = Bio::Tools::SeqStats->count_codons($seqobj);
+
+ Function: Counts the number of each type of codons in a given frame
+           for a dna or rna sequence.
+ Example :
+ Returns : Reference to a hash in which keys are codons of the genetic
+           alphabet used and values are number of occurrences of the
+           codons in the sequence. All codons with "ambiguous" bases
+           are counted together.
+ Args    : None or reference to sequence object
+
+ Throws  : an exception if type of sequence is unknown or protein.
+
+=cut
+
+sub count_codons {
+	my $rcount = {};
+	my $codon ;
+	my $seqobj;
+	my $_is_strict;
+	my $element = '';
+	my $_is_instance = 1 ;
+	my $self = shift @_;
+	my $object_argument = shift @_;
+
+	if (defined $object_argument) {
+		$_is_instance = 0;
+	}
+
+	if ($_is_instance) {
+		if ($rcount = $self->{'_codon_count'}) {
+			return $rcount;        # return count if previously calculated
+		}
+		$_is_strict =  $self->{'_is_strict'}; # retrieve "strictness"
+		$seqobj =  $self->{'_seqref'};
+	} else {
+		$seqobj =  $object_argument;
+		$seqobj->isa("Bio::PrimarySeqI") ||
+		  $self->throw("Error: SeqStats works only on PrimarySeqI objects");
+		$_is_strict =  _is_alphabet_strict($seqobj);
+	}
+
+	# Codon counts only make sense for nucleic acid sequences
+	my $alphabet = $seqobj->alphabet();
+
+	unless ($alphabet =~ /[dr]na/i) {
+		$seqobj->throw("Codon counts only meaningful for dna or rna, ".
+							"not for $alphabet sequences.");
+	}
+
+	# If sequence contains ambiguous bases, warn that codons
+	# containing them will all be lumped together in the count.
+
+	if (!$_is_strict ) {
+		$seqobj->warn("Sequence $seqobj contains ambiguous bases.".
+		" All codons with ambiguous bases will be added together in count.")
+                    if $self->verbose >= 0 ;
+	}
+
+	my $seq = $seqobj->seq();
+
+	# Now step through the string by threes and count the codons
+
+ CODON:
+	while (length($seq) > 2) {
+		$codon = uc substr($seq,0,3);
+		$seq = substr($seq,3);
+		if ($codon =~ /[^ACTGU]/i) {
+			$$rcount{'ambiguous'}++; #lump together ambiguous codons
+			next CODON;
+		}
+		if (!defined $$rcount{$codon}) {
+			$$rcount{$codon}= 1 ;
+			next CODON;
+		}
+		$$rcount{$codon}++;  # default
+	}
+
+	if ($_is_instance) {
+		$self->{'_codon_count'} = $rcount;  # Save in case called again later
+	}
+
+	return $rcount;
+}
+
+
+=head2  hydropathicity
+
+ Title   : hydropathicity
+ Usage   : $gravy = $seqstats->hydropathicity(); or
+           $gravy = Bio::Tools::SeqStats->hydropathicity($seqobj);
+
+ Function: Calculates the mean Kyte-Doolittle hydropathicity for a
+           protein sequence. Also known as the "gravy" score. Refer to 
+           Kyte J., Doolittle R.F., J. Mol. Biol. 157:105-132(1982). 
+ Example :
+ Returns : float 
+ Args    : None or reference to sequence object
+
+ Throws  : an exception if type of sequence is not protein.
+
+=cut
+
+sub hydropathicity {
+	my $seqobj;
+	my $_is_strict;
+	my $element = '';
+	my $_is_instance = 1 ;
+	my $self = shift @_;
+	my $object_argument = shift @_;
+
+	if (defined $object_argument) {
+		$_is_instance = 0;
+	}
+
+	if ($_is_instance) {
+		if (my $gravy = $self->{'_hydropathicity'}) {
+			return $gravy;        # return value if previously calculated
+		}
+		$_is_strict =  $self->{'_is_strict'}; # retrieve "strictness"
+		$seqobj =  $self->{'_seqref'};
+	} else {
+		$seqobj =  $object_argument;
+		$seqobj->isa("Bio::PrimarySeqI") ||
+		  $self->throw("Error: SeqStats works only on PrimarySeqI objects");
+		$_is_strict =  _is_alphabet_strict($seqobj);
+	}
+	
+	# hydropathicity not menaingful for empty sequences
+	unless ($seqobj->length() > 0) {
+	  $seqobj->throw("hydropathicity not defined for zero-length sequences");
+        }
+
+	# hydropathicity only make sense for protein sequences
+	my $alphabet = $seqobj->alphabet();
+
+	unless ($alphabet =~ /protein/i) {
+		$seqobj->throw("hydropathicity only meaningful for protein, ".
+							"not for $alphabet sequences.");
+	}
+
+	# If sequence contains ambiguous bases, warn that codons
+	# containing them will all be lumped together in the count.
+
+	unless ($_is_strict ) {
+		$seqobj->throw("Sequence $seqobj contains ambiguous amino acids. ".
+		"Hydropathicity can not be caculated.")
+	}
+
+	my $seq = $seqobj->seq();
+
+	# Now step through the string and add up the hydropathicity values
+
+    my $gravy = 0;
+    for my $i ( 0 .. length($seq) ) {
+       my $codon = uc(substr($seq,$i,1));
+       $gravy += $amino_hydropathicity->{$codon}||0; # table look-up
+    }
+    $gravy /= length($seq);
+
+
+	if ($_is_instance) {
+		$self->{'_hydropathicity'} = $gravy;  # Save in case called again later
+	}
+
+	return $gravy;
+}
+
+
+=head2  _is_alphabet_strict
+
+ Title   :  _is_alphabet_strict
+ Usage   :
+ Function: internal function to determine whether there are
+           any ambiguous elements in the current sequence
+ Example :
+ Returns : 1 if strict alphabet is being used,
+           0 if ambiguous elements are present
+ Args    :
+
+ Throws  : an exception if type of sequence is unknown (ie amino or
+           nucleic) or if unknown letter in alphabet. Ambiguous
+           monomers are allowed.
+
+=cut
+
+sub _is_alphabet_strict {
+
+	my ($seqobj) = @_;
+	my $moltype = $seqobj->alphabet();
+
+	# convert everything to upper case to be safe
+	my $seqstring = uc $seqobj->seq();
+
+	# Since T is used in RichSeq RNA sequences, do conversion locally
+	$seqstring =~ s/T/U/g if $seqobj->alphabet eq 'rna';
+
+	# First we check if only the 'strict' letters are present in the
+	# sequence string If not, we check whether the remaining letters
+	# are ambiguous monomers or whether there are illegal letters in
+	# the string
+
+	# $alpha_array is a ref to an array of the 'strictly' allowed letters
+	my $alpha_array =   $Alphabets_strict{$moltype} ;
+
+	# $alphabet contains the allowed letters in string form
+	my $alphabet = join ('', @$alpha_array) ;
+	unless ($seqstring =~ /[^$alphabet]/)  {
+		return 1 ;
+	}
+
+	# Next try to match with the alphabet's ambiguous letters
+	$alpha_array =   $Alphabets{$moltype} ;
+	$alphabet = join ('', @$alpha_array) ;
+
+	unless ($seqstring =~ /[^$alphabet]/)  {
+		return 0 ;
+	}
+
+	# If we got here there is an illegal letter in the sequence
+	$seqobj->throw("Alphabet not OK for $seqobj");
+}
+
+=head2   _print_data
+
+ Title   : _print_data
+ Usage   : $seqobj->_print_data() or Bio::Tools::SeqStats->_print_data();
+ Function: Displays dna / rna parameters (used for debugging)
+ Returns : 1
+ Args    : None
+
+Used for debugging.
+
+=cut
+
+sub _print_data {
+
+    print "\n adenine  = :  $adenine \n";
+    print "\n guanine  = :  $guanine \n";
+    print "\n cytosine = :  $cytosine \n";
+    print "\n thymine  = :  $thymine \n";
+    print "\n uracil   = :  $uracil \n";
+
+    print "\n dna_A_wt = :  $dna_A_wt \n";
+    print "\n dna_C_wt = :  $dna_C_wt \n";
+    print "\n dna_G_wt = :  $dna_G_wt \n";
+    print "\n dna_T_wt = :  $dna_T_wt \n";
+
+    print "\n rna_A_wt = :  $rna_A_wt \n";
+    print "\n rna_C_wt = :  $rna_C_wt \n";
+    print "\n rna_G_wt = :  $rna_G_wt \n";
+    print "\n rna_U_wt = :  $rna_U_wt \n";
+
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqWords.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqWords.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SeqWords.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,287 @@
+# $Id: SeqWords.pm,v 1.17.4.1 2006/10/02 23:10:32 sendu Exp $
+#---------------------------------------------------------------------------
+# PACKAGE    : Bio::Tools::SeqWords
+# PURPOSE    : To count n-mers in any sequence of characters
+# AUTHOR     : Derek Gatherer (d.gatherer at vir.gla.ac.uk)
+# SOURCE     : 
+# CREATED    : 21st March 2000
+# MODIFIED   : 11th November 2003 (DG - new method, count_overlap_words)
+# LICENCE    : You may distribute this module under the same terms 
+#	          : as the rest of BioPerl.
+#---------------------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Tools::SeqWords - Object holding n-mer statistics for a sequence
+
+=head1 SYNOPSIS
+
+Take a sequence object and create an object for the purposes of 
+holding n-mer word statistics about that sequence. The sequence can 
+be nucleic acid or protein.
+
+In count_words() the words are counted in a non-overlapping manner,
+ie. in the style of a codon table, but with any word length.
+
+In count_overlap_words() the words are counted in an overlapping
+manner.
+
+For counts on opposite strand (DNA/RNA), a reverse complement
+method should be performed, and then the count repeated.
+
+Create the SeqWords object, e.g.:
+
+  my $inputstream = Bio::SeqIO->new(-file => "seqfile", 
+	                                 -format => 'Fasta');
+  my $seqobj = $inputstream->next_seq();
+  my $seq_word = Bio::Tools::SeqWords->new(-seq => $seqobj);
+
+Or:
+
+  my $seqobj = Bio::PrimarySeq->new(-seq => "agggtttccc",
+                                    -alphabet => 'dna',
+                                    -id => 'test');
+  my $seq_word  =  Bio::Tools::SeqWords->new(-seq => $seqobj);
+
+  # obtain a hash of word counts, eg:
+
+  my $hash_ref = $seq_stats->count_words($word_length);
+
+  # display hash table, eg:
+
+  my %hash = %$hash_ref;
+  foreach my $key(sort keys %hash)
+  {
+    print "\n$key\t$hash{$key}";
+  }
+
+Or:
+
+  my $hash_ref = 
+     Bio::Tools::SeqWords->count_words($seqobj,$word_length);
+
+=head1 DESCRIPTION
+
+L<Bio::Tools::SeqWords> is a featherweight object for the calculation of
+n-mer word occurrences in a single sequence.  It is envisaged that the
+object will be useful for construction of scripts which use n-mer word
+tables as the raw material for statistical calculations; for instance,
+hexamer frequency for the calculation of coding protential, or the
+calculation of periodicity in repetitive DNA.  Triplet frequency is
+already handled by L<Bio::Tools::SeqStats> (author: Peter Schattner).
+
+There are a few possible applications for protein, e.g. hypothesised
+amino acid 7-mers in heat shock proteins, or proteins with multiple
+simple motifs.  Sometimes these protein periodicities are best seen
+when the amino acid alphabet is truncated, e.g. Shulman alphabet.  Since
+there are quite a few of these shortened alphabets, this module does
+not specify any particular alphabet.
+
+See Synopsis above for object creation code.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Derek Gatherer, in the loosest sense of the word 'author'.  The
+general shape of the module is lifted directly from the SeqStat
+module of Peter Schattner. The central subroutine to count the words is
+adapted from original code provided by Dave Shivak, in response to a
+query on the bioperl mailing list.  At least 2 other people provided
+alternative means (equally good but not used in the end) of performing
+the same calculation.  Thanks to all for your assistance.
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::SeqWords;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my($class, at args) = @_;
+    # our new standard way of instantiation
+    my $self = $class->SUPER::new(@args);
+
+    my ($seqobj) = $self->_rearrange([qw(SEQ)], at args);
+    if((! defined($seqobj)) && @args && ref($args[0])) {
+	# parameter not passed as named parameter?
+	$seqobj = $args[0];
+    }
+    
+    if(! $seqobj->isa("Bio::PrimarySeqI")) { 
+	$self->throw(ref($self) . " works only on PrimarySeqI objects\n");
+    }
+	
+    $self->{'_seqref'} = $seqobj;
+    return $self; 
+}
+
+
+=head2 count_words
+
+ Title   : count_words
+ Usage   : $word_count = $seq_stats->count_words($word_length)
+                or 
+           $word_count = $seq_stats->Bio::Tools::SeqWords->($seqobj,$word_length);
+ Function: Counts non-overlapping words within a string, any alphabet is 
+           used
+ Example : a sequence ACCGTCCGT, counted at word length 4, will give the hash
+           {ACCG => 1, TCCG => 1}
+ Returns : Reference to a hash in which keys are words (any length) of the
+           alphabet used and values are number of occurrences of the word 
+           in the sequence.
+ Args    : Word length as scalar and, reference to sequence object if
+           required
+
+           Throws an exception word length is not a positive integer
+           or if word length is longer than the sequence.
+
+=cut
+
+sub count_words
+{
+    my ($self,$seqobj,$word_length) = @_;
+
+    # check how we were called, and if necessary rearrange arguments
+    if(ref($seqobj)) {
+	# call as SeqWords->count_words($seq, $wordlen)
+	if(! $seqobj->isa("Bio::PrimarySeqI")) { 
+	    $self->throw("SeqWords works only on PrimarySeqI objects\n");
+	}
+    } else {
+	# call as $obj->count_words($wordlen)
+	$word_length = $seqobj;
+	$seqobj = undef;
+    }
+
+    if(! defined($seqobj)){
+	      $seqobj =  $self->{'_seqref'};
+    }
+    
+    if($word_length eq "" || $word_length =~ /[a-z]/i){
+	      $self->throw("SeqWords cannot accept non-numeric characters".
+		     " or a null value in the \$word_length variable\n");
+    }elsif ($word_length <1 || ($word_length - int($word_length)) >0){
+	      $self->throw("SeqWords requires the word length to be a ".
+		     "positive integer\n");
+    }
+
+    my $seqstring = uc $seqobj->seq();
+
+    if($word_length > length($seqstring)){
+	      $self->throw("die in _count, \$word_length is bigger ".
+		    "than sequence length\n");
+    }
+
+    my $type = "non-overlap";
+    my $words = _count($seqobj, $word_length, $type);
+    return $words;   # ref. to a hash
+}
+
+=head2 count_overlap_words
+
+ Title   : count_overlap_words
+ Usage   : $word_count = $word_obj->count_overlap_words($word_length);
+ Function: Counts overlapping words within a string, any alphabet is used
+ Example : A sequence ACCAACCA, counted at word length 4, will give the hash
+	        {ACCA=>2, CCAA=>1, CAAC=>1, AACC=>1}
+ Returns : Reference to a hash in which keys are words (any length) of the 
+           alphabet used and values are number of occurrences of the word in 
+           the sequence.
+ Args    : Word length as scalar
+
+           Throws an exception if word length is not a positive integer
+           or if word length is longer than the sequence.
+
+=cut
+
+sub count_overlap_words
+{
+    my ($self,$seqobj,$word_length) = @_;
+ # check how we were called, and if necessary rearrange arguments
+    if(ref($seqobj)){
+	# call as SeqWords->count_words($seq, $wordlen)
+	      if(! $seqobj->isa("Bio::PrimarySeqI")){
+	          $self->throw("SeqWords works only on PrimarySeqI objects\n");
+	      }
+    }else{
+	# call as $obj->count_words($wordlen)
+	      $word_length = $seqobj;
+	      $seqobj = undef;
+    }
+
+    if(! defined($seqobj)) {
+	$seqobj =  $self->{'_seqref'};
+    }
+    my $seqstring = uc $seqobj->seq();
+
+    if($word_length > length($seqstring)){
+	      $self->throw("die in _count, \$word_length is bigger ".
+		    "than sequence length\n");
+    }
+    
+    my $type = "overlap";
+    my $words = _count($seqobj, $word_length, $type);
+    return $words;   # ref. to a hash
+}
+
+# the actual counting routine
+# used by both count_words and count_overlap_words
+sub _count {
+    my ($seqobj, $word_length, $type) = @_;
+    my %codon = ();
+
+    # now the real business
+    # JS - remove DNA assumption
+
+    my $seqstring = uc $seqobj->seq();
+    if($type eq "non-overlap")
+    {
+	while($seqstring =~ /((\w){$word_length})/gim){
+	    $codon{uc($1)}++;
+	}
+    } elsif($type eq "overlap"){
+	my $seqlen = $seqobj->length();	# measure length
+	for (my $frame = 1; $frame <= $word_length; $frame++) { 
+            # run through frames
+	    my $seqstring = uc($seqobj->subseq($frame,$seqlen));
+            # take the relevant substring
+	    while($seqstring =~ /((\w){$word_length})/gim){
+		$codon{uc($1)}++; # keep adding to hash
+	    }
+	}
+    } else {
+	Bio::Root::Root->throw("\nSomething badly wrong here. \$type: $type can only be overlap or non-overlap");
+      }
+    return \%codon;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/saigo.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/saigo.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/saigo.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,219 @@
+# BioPerl module for Bio::Tools::SiRNA::Ruleset::saigo
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Bristol-Myers Squibb
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::SiRNA::Ruleset::saigo - Perl object implementing the Saigo
+group's rules for designing small inhibitory RNAs
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Instead, use Bio::Tools::SiRNA and
+specify the saigo ruleset:
+
+  use Bio::Tools::SiRNA;
+
+  my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
+                                               -rules  => 'saigo'
+    );
+  my @pairs = $sirna_designer->design;
+
+  foreach $pair (@pairs) {
+      my $sense_oligo_sequence = $pair->sense->seq;
+      my $antisense_oligo_sequence = $pair->antisense->seq;
+
+      # print out results
+      print join ("\t", $pair->start, $pair->end, $pair->rank,
+                  $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
+  }
+
+=head1 DESCRIPTION
+
+This package implements the rules for designing siRNA reagents
+published by Ui-Tei et al (2004).  The rules are:
+
+=over 5
+
+=item 1.
+
+The first base in the sense strand of the duplex must be a G or C
+
+=item 2.
+
+The first base in the antisense strand of the duplex must be an A or U
+
+=item 3.
+
+The first 7 nucleotides in the antisense strand of the duplex must be
+A or U
+
+=item 4.
+
+There cannot be more than 9 consecutive G or C nucleotides
+
+=item 5.
+
+The first 12 nucleotides in the sense strand of the duplex should have
+33-66% GC
+
+=back
+
+The module inherits from Bio::Tools::SiRNA.  See the documentation for
+that module for information on how to specify the target and recover
+the SiRNA duplex information.
+
+=head2 EXPORT
+
+None.
+
+=head1 SEE ALSO
+
+L<Bio::Tools::SiRNA>, 
+L<Bio::SeqFeature::SiRNA::Pair>,
+L<Bio::SeqFeature::SiRNA::Oligo>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::SiRNA::Ruleset::saigo;
+
+use strict;
+
+use base qw(Bio::Tools::SiRNA);
+
+=head2 new
+
+  Title	: new
+  Usage  : Do not call directly - use Bio::Tools::SiRNA->new instead.
+  Returns : Bio::Tools::SiRNA::Ruleset::saigo object
+  Args	: none
+
+=cut
+
+sub new {
+    my ($proto, %args) = @_;
+    my $class = ref($proto) || $proto;
+    
+    $args{'RULES'} = 'saigo';
+
+    return $self::SUPER->new(%args);
+ }
+
+sub _get_oligos {
+    my ($self) = @_;
+
+    my ($targseq, $targstart) = $self->_get_targetregion;
+
+    foreach my $i (0 .. (length($targseq) - 23)) {
+	my $testseq = substr($targseq, $i, 23);		
+	$self->add_oligos($testseq, $targstart + $i + 1) if ($self->_oligo_ok($testseq));
+    }
+}
+
+
+sub _get_sense {
+    my ($self, $target) = @_;
+    #trim off 1st 2 nt to get overhang
+    $target =~ s/^..//;
+    #convert T's to U's (transcribe)
+    $target =~ s/T/U/gi;
+
+    return $target;
+}
+
+sub _get_anti {
+    my ($self, $target) = @_;
+    my @target = split(//, $target);
+    my ($nt, at antitarget);
+ 
+    while ($nt = pop @target) {
+	push(@antitarget, $self->_comp($nt));
+    }
+    my $anti = join('', @antitarget);
+    #trim off 1st 2 nt to get overhang
+    $anti =~ s/^..//;
+    #convert T's to U's
+    $anti =~ s/T/U/gi;
+
+    return $anti;
+}
+
+sub _oligo_ok {
+    my ($self, $testseq) = @_;
+
+    $self->debug("Testing $testseq...\n");
+
+    my @testseq = split(//, $testseq);
+    # is 5p end of sense strand a G/C?
+    unless ($testseq[2] =~ /[GC]/i) {
+	$self->debug("No G/C at sense 5' end\n");
+	return 0;
+    }
+    # is 5p end of antisense strand an A/T?
+    unless ($testseq[20] =~ /[AT]/i) {
+	$self->debug("No A/T at antisense 5' end\n");
+	return 0;
+    }
+
+    # are 4 of the last 7 bases in the duplex A/T?
+    my $atcount_3p = grep { /[AT]/i } @testseq[14 .. 20];
+    unless ($atcount_3p >= 4) {
+	$self->debug("Found $atcount_3p A/T in last 7 bases of duplex\n");
+	return 0;
+    }
+    # what is gc fraction in rest of duplex? Target: 33 to 66 pct gc (4-8 of 12)
+    my $gccount_5p = grep { /[GC]/i } @testseq[2 .. 13];
+    if ($gccount_5p < 4) {
+	$self->debug("Found only $gccount_5p GCs in 5p end of duplex\n");
+	return 0;
+    }
+    if ($gccount_5p > 8) {
+	$self->debug("Found only $gccount_5p GCs in 5p end of duplex\n");
+	return 0;
+    }
+    
+    # no more than 9 consecutive GC
+    if ($testseq =~ /[GC]{9,}?/i) {
+	$self->debug("Found more than 9 consecutive GCs\n");
+	return 0;
+    }
+
+    $self->debug("Oligo passed \n");
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/tuschl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/tuschl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA/Ruleset/tuschl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,219 @@
+#
+#
+# BioPerl module for Bio::Tools::SiRNA::Ruleset::tuschl
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Bristol-Myers Squibb
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::SiRNA::Ruleset::tuschl - Perl object implementing the
+tuschl group's rules for designing small inhibitory RNAs
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Instead, use Bio::Tools::SiRNA and
+specify the tuschl ruleset:
+
+  use Bio::Tools::SiRNA;
+
+  my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
+                                               -rules  => 'tuschl'
+    );
+  my @pairs = $sirna_designer->design;
+
+  foreach $pair (@pairs) {
+      my $sense_oligo_sequence = $pair->sense->seq;
+      my $antisense_oligo_sequence = $pair->antisense->seq;
+
+      # print out results
+      print join ("\t", $pair->start, $pair->end, $pair->rank,
+                  $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
+  }
+
+=head1 DESCRIPTION
+
+This package implements the rules for designing siRNA reagents
+developed by Tuschl and colleagues (see
+http://www.rockefeller.edu/labheads/tuschl/sirna.html). It looks for
+oligos that match the following patterns in the target sequence:
+
+  1. AA(N19)TT (rank 1)
+  2. AA(N21) (rank 2)
+  3. NA(N21) (rank 3)
+
+The package also supports selection of siRNA seqences that can be
+transcribed by pol3:
+
+    A[A,G]N17[C,T]
+
+=head1 SEE ALSO
+
+L<Bio::Tools::SiRNA>, L<Bio::SeqFeature::SiRNA::Pair>,
+L<Bio::SeqFeature::SiRNA::Oligo>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+
+=cut
+
+package Bio::Tools::SiRNA::Ruleset::tuschl;
+
+use 5.006;
+use strict;
+
+use base qw(Bio::Tools::SiRNA);
+
+our %PATTERNS = ( 1 	=> '(AA.{19}TT)',
+		  2 	=> '(AA.{19}[ACG][ACG])',
+		  3 	=> '([CGT]A.{21})',
+		  Pol3	=> '(.A[AG].{17}[CT]..)'
+		  );
+
+our $DEFAULT_CUTOFF = 2;
+
+=head2 new
+
+  Title	: new
+  Usage	: Do not call directly - use Bio::Tools::SiRNA->new instead.
+  Returns : Bio::Tools::SiRNA::Ruleset::saigo object
+  Args	: none
+
+=cut
+
+sub new {
+    my ($proto, %args) = @_;
+    my $class = ref($proto) || $proto;
+    
+    $args{'RULES'} = 'tuschl';
+
+    return $self::SUPER->new(%args);
+ }
+
+sub _regex {
+    my ($self, $rank) = @_;
+    return $PATTERNS{$rank};
+}
+
+sub cutoff {
+    my ($self, $cutoff) = @_;
+    if ($cutoff) {
+	$self->{'cutoff'} = $cutoff;
+    }
+    elsif (!$self->{'cutoff'}) {
+	$self->{'cutoff'} = $DEFAULT_CUTOFF;
+    }
+    return $self->{'cutoff'};
+}
+
+
+sub _get_oligos {
+    #use regular expressions to pull out oligos
+    my ($self) = @_;
+
+    my @ranks;
+    if ($self->cutoff eq 'pol3') {
+	@ranks = ('pol3');
+    }
+    else {
+	@ranks = (1 .. $self->cutoff);
+    }
+    
+    foreach my $rank (@ranks) {
+	my $regex = $self->_regex($rank);
+	#my @exclude;
+
+
+# 	my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
+# 	my $seq = $targregion->seq->seq;
+# 	# but this way I loose start info
+# 	my $targstart = $targregion->start;
+	my ($seq, $targstart) = $self->_get_targetregion();
+
+	while ( $seq =~ /(.*?)$regex/gi ) {
+	    my $target = $2;
+
+	    # check for too many Gs (or Cs on the other strand)
+	    next if ( $target =~ /G{ $self->gstring,}/io );
+	    next if ( $target =~ /C{ $self->gstring,}/io );
+# 	skip Ns (for filtering)
+	    next if ( $target =~ /N/i);
+
+	    my $start = length($1) + $targstart;
+	    my $stop = $start + length($target) -1;
+
+	    my @gc = ( $target =~ /G|C/gi);
+	    my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target)));
+	    next if ($fxGC < $self->min_gc);
+	    next if ($fxGC > $self->max_gc);
+	    
+	    $self->add_oligos($target, $start, $rank);
+	}
+    }
+}
+
+	 
+sub _get_sense {
+    my ($self, $target) = @_;
+    # trim off 1st 2 nt to get overhang
+    $target =~ s/^..//;
+    # convert T's to U's (transcribe)
+    $target =~ s/T/U/gi;
+    # force last 2 nt to be T's
+    $target =~ s/..$/TT/;
+
+    return $target;
+}
+
+sub _get_anti {
+    my ($self, $target) = @_;
+    my @target = split(//, $target);
+    my ($nt, at antitarget);
+
+    while ($nt = pop @target) {
+	push(@antitarget, $self->_comp($nt));
+    }
+    my $anti = join('', @antitarget);
+    # trim off 1st 2 nt to get overhang
+    $anti =~ s/^..//;
+    # convert T's to U's
+    $anti =~ s/T/U/gi;
+    # convert last 2 NT's to T
+    $anti =~ s/..$/TT/;
+
+    return $anti;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/SiRNA.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,575 @@
+# $Id: SiRNA.pm,v 1.13.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::SiRNA
+#
+# Cared for by Donald Jackson, donald.jackson at bms.com
+#
+# Copyright Bristol-Myers Squibb
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+SiRNA - Perl object for designing small inhibitory RNAs.
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::SiRNA;
+
+  my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
+                                               -rules  => 'saigo'
+    );
+  my @pairs = $sirna_designer->design;
+
+  foreach $pair (@pairs) {
+      my $sense_oligo_sequence = $pair->sense->seq;
+      my $antisense_oligo_sequence = $pair->antisense->seq;
+
+      # print out results
+      print join ("\t", $pair->start, $pair->end, $pair->rank,
+                  $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
+  }
+
+=head1 DESCRIPTION
+
+Package for designing siRNA reagents.
+
+Input is a L<Bio::SeqI>-compliant object (the target).
+
+Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are
+added to the feature table of the target sequence.  Each
+Bio::SeqFeature::SiRNA::Pair contains two subfeatures
+(Bio::SeqFeature::Oligo objects) which correspond to the individual
+oligos.  These objects provide accessors for the information on the
+individual reagent pairs.
+
+This verion of Bio::Tools::SiRNA represents a major change in architecture.
+Specific 'rulesets' for siRNA selection as developed by various groups are
+implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from
+Bio::Tools::SiRNA.  This will make it easier to add new rule sets or modify
+existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are 
+implemented. For consistency, the Tuschl rules are implemented by default.
+
+In addition, this module provides three 'extra' rules which can be added
+above and beyond any ruleset.
+
+=over 3
+
+=item 1.
+
+SiRNAs that overlap known SNPs (identified as SeqFeatures with 
+primary tag = variation) can be avoided.
+
+=item 2.
+
+Other regions (with primary tag = 'Excluded') can also be skipped.  I
+use this with Bio::Tools::Run::Mdust to avoid low-complexity regions
+(must be run separately), but other programs could also be used.
+
+=item 3.
+
+SiRNAs may also be selected in the 3 prime UTR of a gene by setting
+$sirna_designer-E<gt>include_3pr() to true.
+
+=back
+
+=head2 EXPORT
+
+None.
+
+=head1 SEE ALSO
+
+L<Bio::Tools::Run::Mdust>, L<Bio::SeqFeature::SiRNA::Pair>,
+L<Bio::SeqFeature::SiRNA::Oligo>..
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::SiRNA;
+
+require 5.005_62;
+use strict;
+use warnings;
+
+use vars qw($AUTOLOAD);
+
+use Bio::Seq::RichSeq;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::SiRNA::Oligo;
+use Bio::SeqFeature::SiRNA::Pair;
+
+
+use base qw(Bio::Root::Root);
+
+
+our %COMP = ( A => 'T',
+	      T => 'A',
+	      C => 'G',
+	      G => 'C',
+	      N => 'N',
+	      );
+
+our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS
+		   GSTRING TMPDIR TARGET DEBUG);
+
+
+=head2 new
+
+ Title		: new
+ Usage		: my $sirna_designer = Bio::Tools::SiRNA->new();
+ Function	: Constructor for designer object
+ Returns	: Bio::Tools::SiRNA object
+ Args		: target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq
+                  start_pad - distance from the CDS start to skip (default 75)
+                  end_pad - distance from the CDS end to skip (default 50)
+                  include_3pr - set to true to include SiRNAs in the 3prime UTR (default false)
+                  rules - rules for selecting siRNAs, currently supporting saigo and tuschl
+                  min_gc - minimum GC fraction (NOT percent) (default 0.4)
+                  max_gc - maximum GC fraction (NOT percent) (default 0.6)
+                  cutoff - worst 'rank' accepted(default 3)
+                  avoid_snps - boolean - reject oligos that overlap a variation
+                     SeqFeature in the target (default true)
+                  gstring - maximum allowed consecutive Gs.
+                     Too many can cause problems in synthesis (default 4)
+  Note		: All arguments can also be changed/accessed using autoloaded 
+                 methods such as:
+
+    my $start_pad = $sirna_designer->start_pad().
+
+=cut
+
+sub new {
+    my ($proto, @args) = @_;
+    my $pkg = ref($proto) || $proto;
+
+    my $self = {};
+    bless ($self, $pkg);
+
+    my %args;
+
+    @args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); 
+    
+    if ($args{'RULES'}) {
+	$self->rules($args{'RULES'});
+    }
+
+    $self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask
+    $self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask
+    $self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR
+    $self->{'min_gc'} = $args{'MIN_GC'} || 0.40;
+    $self->{'max_gc'} = $args{'MAX_GC'} || 0.60;
+    $self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted
+    $self->{'oligos'} = [];
+    defined($args{'AVOID_SNPS'}) ? $self->{'avoid_snps'} = $args{'AVOID_SNPS'} :  
+	$self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs)
+    $self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis
+    $self->{'tmpdir'} = $args{'TMPDIR'}  || $ENV{'TMPDIR'} || $ENV{'TMP'} || '';
+    $self->{'debug'} = $args{'DEBUG'} || 0;
+
+    $self->target($args{'TARGET'}) if ($args{'TARGET'});
+
+    return $self;
+}
+
+
+=head2 target 
+
+  Title		: target
+  Usage		: my $target_seq = $sirna_designer->target(); # get the current target
+                  OR 
+                  $sirna_designer->target($new_target_seq); # set a new target 
+  Function	: Set/get the target as a Bio::SeqI-compliant object
+  Returns	: a Bio::SeqI-compliant object
+  Args		: a Bio::SeqI-compliant object (optional)
+
+=cut
+
+sub target {
+    my ($self, $target) = @_;
+
+    if ($target) {
+	unless ($target->isa('Bio::SeqI')) {
+	    $self->throw(  -class => 'Bio::Root::BadParameter',
+			   -text  => "Target must be passed as a Bio::Seq object" );
+	}
+	if ($target->can('molecule')) {
+	    ( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or
+		$self->throw(  -class => 'Bio::Root::BadParameter',
+			       -text  =>  "Sequences of type ". $target->molecule. " are not supported"
+			       );
+	}
+	else {
+	    ($target->alphabet eq 'dna') or 
+		$self->throw(  -class => 'Bio::Root::BadParameter',
+			       -text  =>  "Sequences of alphabet ". $target->alphabet. " are not supported"
+			       );
+	}
+	
+	$self->{'target'} = $target;
+	return 1;
+
+    }
+    elsif ($self->{'target'}) {
+	return $self->{'target'};
+    }
+    else {
+	$self->throw("Target sequence not defined");
+    }
+}
+
+=head2 rules
+
+    Title	: rules
+    Usage	: $sirna->rules('ruleset')
+    Purpose	: set/get ruleset to use for selecting SiRNA oligo pairs.
+    Returns	: not sure yet
+    Args	: a ruleset name (currently supported: Tuschl, Saigo)
+                  or a Bio::Tools::SiRNA::RulesetI compliant object
+
+=cut
+
+sub rules {
+    my ($self, $rules) = @_;
+
+    if ($rules) {
+	$self->_load_ruleset($rules);
+    }
+    # default: use tuschl rules
+    unless ($self->{_rules}) {
+	$self->_load_ruleset('tuschl');
+    }
+    return $self->{_rules};
+}
+
+sub _load_ruleset {
+    my ($self, $ruleset) = @_;
+
+    my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset));
+
+    eval "require $rule_module";
+    
+    if ($@) {
+	#warn join("\n", '@INC contains:', @INC, undef);
+	$self->throw("Unable to load $rule_module: $@");
+	return;
+    }
+
+    else {
+	$self->{_rules} = $rule_module;
+	bless($self, $rule_module); # recast as subclass
+    }
+	
+    return 1;
+}
+
+=head2 design
+
+  Title		: design
+  Usage		: my @pairs = $sirna_designer->design();
+  Purpose	: Design SiRNA oligo pairs.  
+  Returns	: A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects
+  Args		: none
+
+=cut
+
+sub design {	
+    my ($self) = @_;
+
+    ($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified');
+
+#     unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) {
+# 	$self->_define_target();
+#     }
+
+    my @oligos = $self->_get_oligos();
+       
+    return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures );
+}
+    
+sub _define_target {
+    my ($self) = @_;
+    my ($feat, $cds, $left, $right);
+
+    my $target = $self->target or 
+	$self->throw("Unable to design oligos - no target provided");
+
+    ($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures'));
+    
+    if ($cds) {
+	$left = $cds->start + $self->start_pad;
+	if (!$self->include_3pr) {
+	    $right = $cds->end - $self->end_pad;
+	}
+	else {
+	    $right = $target->length - $self->end_pad;
+	}
+    }
+    else {
+	$left = 0 + $self->start_pad;
+	$right = $target->length - $self->end_pad;
+    }
+
+
+    # is there anything left?
+    if (($right - $left) < 20) {
+	$self->throw("There isn't enough sequence to design oligos.  Please reduce start_pad and end_pad or supply more sequence");
+    }
+    # define target region 
+    my $targregion = Bio::SeqFeature::Generic->new( -start 		=> $left,
+						    -end 		=> $right,
+						    -primary		=> 'Target' );
+    $self->target->add_SeqFeature($targregion);
+
+    # locate excluded regions
+    my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures;
+
+    if ($self->avoid_snps) {
+	my @snps =  grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
+	push(@excluded, @snps);
+    }
+    
+    $self->excluded(\@excluded);
+
+    return $targregion;
+}
+
+sub _get_targetregion {
+    my ($self) = @_;
+    
+    my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
+    $targregion ||= $self->_define_target;
+
+    $self->throw("Target region for SiRNA design not defined") unless ($targregion);
+
+    my $seq = $targregion->seq->seq;
+    # but this way I loose start info
+     my $targstart = $targregion->start;
+
+    return ($seq, $targstart);
+}   
+
+# MOVE to SiRNA::Ruleset::tuschl
+# sub _regex {
+#     my ($self, $rank) = @_;
+#     return $PATTERNS{$rank};
+# }
+
+# sub _get_oligos {
+#     # use regular expressions to pull out oligos
+
+#     my ($self, $rank) = @_;
+#     my $regex = $self->_regex($rank);
+#     my @exclude;
+
+
+#     my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
+#     my $seq = $targregion->seq->seq;
+#     # but this way I loose start info
+#     my $targstart = $targregion->start;
+    
+#     # exclude masked region
+#     push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures);
+
+#     # add SNP checking
+#     if ($self->avoid_snps) {
+# 	my @snps =  grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
+# 	push(@exclude, @snps);
+#     }
+
+#     while ( $seq =~ /$regex/gi ) {
+# 	my $target = $1;
+
+# 	# check for too many Gs (or Cs on the other strand)
+# 	next if ( $target =~ /G{ $self->gstring,}/io );
+# 	next if ( $target =~ /C{ $self->gstring,}/io );
+# 	# skip Ns (for filtering)
+# 	next if ( $target =~ /N/i);
+
+# 	my $start = length($`) + $targstart;
+# 	my $stop = $start + length($target) -1;
+
+# 	my @gc = ( $target =~ /G|C/gi);
+# 	my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target)));
+# 	next if ($fxGC < $self->min_gc);
+# 	next if ($fxGC > $self->max_gc);
+
+# 	my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $start,
+# 							-end 		=> $stop,
+# 							-strand 	=> 1,
+# 							-seq 		=> _get_sense($target),
+# 							-source_tag	=> ref($self),
+# 						       );	
+
+# 	my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start 	=> $start,
+# 							 -end		=> $stop,
+# 							 -strand	=> -1,
+# 							 -seq 		=> _get_anti($target), 
+# 							 -source_tag	=> ref($self),
+# 							 );
+
+#   	my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank 		=> $rank,
+# 						       -fxGC		=> $fxGC,
+# 						       -sense 		=> $sense,
+# 						       -antisense 	=> $asense,     
+# 						       -source_tag	=> ref($self),
+# 						       );
+
+# 	unless ($self->_has_overlap($sirna, \@exclude)) {
+# 	    $self->target->add_SeqFeature($sirna);
+# 	}
+#     }
+# }    
+
+=head2 add_oligos
+
+  Title		: add_oligos
+  Usage	 	: $sirna_designer->add_oligos($sequence, $start, $rank);
+  Purpose	: Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects
+  Args		: Oligo sequence and start position (required), rank/score (optional)
+
+=cut
+
+sub add_oligos {
+    my ($self, $seq, $start, $rank) = @_;
+
+    ($seq) or throw ('No sequence supplied for add_oligos');
+    (defined $start) or throw ('No start position specified for  add_oligos');
+    
+    my ($end) = $start + length($seq);
+
+    my ($sseq) = $self->_get_sense($seq);
+    my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $start,
+						    -end 		=> ($start + length($sseq)),
+						    -strand 	=> 1,
+						    -seq 		=> $sseq,
+						    -source_tag	=> ref($self),
+						    );	
+
+    my $aseq = $self->_get_anti($seq);
+    my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $end,
+						     -end		=> ($end - length($aseq)),
+						     -strand		=> -1,
+						     -seq 		=> $aseq, 
+						     -source_tag	=> ref($self),
+						     );
+
+    my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank 		=> $rank,
+						  # -fxGC		=> $fxGC,
+						   -sense 		=> $sense,
+						   -antisense 	=> $asense,     
+						   -source_tag	=> ref($self),
+						   );
+
+    unless ($self->_has_overlap($sirna, $self->excluded)) {
+	$self->target->add_SeqFeature($sirna);
+    }
+}
+
+sub _has_overlap {
+    # flag any pairs that overlap an UNDESIRED feature (eg SNP)
+    # return true if there is overlap, false if not
+
+    my ($self, $test, $flist) = @_;
+    print STDERR "Checking oligo at ", $test->start, " to ",$test->end, "\n" 
+	if ($self->debug);
+    
+    foreach my $feat (@$flist) {
+	if (($test->start <= $feat->end) and ($test->end >= $feat->start)) {
+	    print STDERR "Overlaps ", $feat->primary_tag, " at ",
+	    $feat->start, " to ", $feat->end, "\n" if ($self->debug);
+	    return 1;
+	}
+    }
+    return 0; # default - no overlap
+}
+    
+# MOVE to SiRNA::Ruleset::tuschl
+	 
+# sub _get_sense {
+#     my ($target) = @_;
+#     # trim off 1st 2 nt to get overhang
+#     $target =~ s/^..//;
+#     # convert T's to U's (transcribe)
+#     $target =~ s/T/U/gi;
+#     # force last 2 nt to be T's
+#     $target =~ s/..$/TT/;
+
+#     return $target;
+# }
+
+# sub _get_anti {
+#     my ($target) = @_;
+#     my @target = split(//, $target);
+#     my ($nt, at antitarget);
+
+#     while ($nt = pop @target) {
+# 	push(@antitarget, $COMP{$nt});
+#     }
+#     my $anti = join('', @antitarget);
+#     # trim off 1st 2 nt to get overhang
+#     $anti =~ s/^..//;
+#     # convert T's to U's
+#     $anti =~ s/T/U/gi;
+#     # convert last 2 NT's to T
+#     $anti =~ s/..$/TT/;
+
+#     return $anti;
+# }
+
+
+sub AUTOLOAD {
+    my ($self, $value) = @_;
+    my $name = $AUTOLOAD;
+    $name =~ s/.+:://;
+
+    return if ($name eq 'DESTROY');
+
+
+    if (defined $value) {
+	$self->{$name} = $value;
+    }
+
+    unless (exists $self->{$name}) {
+	$self->throw("Attribute $name not defined for ". ref($self));
+    }
+
+    return $self->{$name};
+}
+
+sub _comp {
+    my ($self, $char) = @_;
+
+    return unless ($char);
+    $char = uc($char);
+    return $COMP{ $char };
+}
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sigcleave.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sigcleave.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sigcleave.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,589 @@
+#-----------------------------------------------------------------------------
+# PACKAGE : Bio::Tools::Sigcleave
+# AUTHOR  : Chris Dagdigian, dag at sonsorol.org
+# CREATED : Jan 28 1999
+# REVISION: $Id: Sigcleave.pm,v 1.22.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved.
+#           This module is free software; you can redistribute it and/or 
+#           modify it under the same terms as Perl itself.
+#
+# _History_
+#
+# Object framework ripped from Steve Chervits's SeqPattern.pm
+# 
+# Core EGCG Sigcleave emulation from perl code developed by
+# Danh Nguyen & Kamalakar Gulukota which itself was based 
+# loosely on Colgrove's signal.c program.
+#
+# The overall idea is to replicate the output of the sigcleave
+# program which was distributed with the EGCG extension to the GCG sequence
+# analysis package. There is also an accessor method for just getting at
+# the raw results.
+#
+#-----------------------------------------------------------------------------
+
+=head1 NAME
+
+Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis
+
+=head1 SYNOPSIS
+
+=head2 Object Creation
+
+  use Bio::Tools::Sigcleave ();
+
+  # to keep the module backwar compatible, you can pass it a sequence string, but
+  # there recommended say is to pass it a Seq object
+
+  # this works
+  $seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
+  $sig = new Bio::Tools::Sigcleave(-seq  => $seq,
+                                                -type => 'protein',
+                                                -threshold=>'3.5',
+                                                );
+  # but you do:
+  $seqobj = Bio::PrimarySeq->new(-seq => $seq);
+
+  $sig = new Bio::Tools::Sigcleave(-seq  => $seqobj,
+                                                -threshold=>'3.5',
+                                                );
+
+  # now you can detect procaryotic signal sequences as well as eucaryotic
+  $sig->matrix('eucaryotic'); # or 'procaryotic'
+
+
+=head2 Object Methods & Accessors
+
+  # you can use this method to fine tune the threshod before printing out the results
+  $sig->result_count:
+
+  %raw_results      = $sig->signals;
+  $formatted_output = $sig->pretty_print;
+
+=head1 DESCRIPTION
+
+"Sigcleave" was a program distributed as part of the free EGCG add-on
+to earlier versions of the GCG Sequence Analysis package. A new
+implementation of the algorithm is now part of EMBOSS package.
+
+From the EGCG documentation:
+
+  SigCleave uses the von Heijne method to locate signal sequences, and
+  to identify the cleavage site. The method is 95% accurate in
+  resolving signal sequences from non-signal sequences with a cutoff
+  score of 3.5, and 75-80% accurate in identifying the cleavage
+  site. The program reports all hits above a minimum value.
+
+The EGCG Sigcleave program was written by Peter Rice (E-mail:
+pmr at sanger.ac.uk Post: Informatics Division, The Sanger Centre,
+Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK).
+
+Since EGCG is no longer distributed for the latest versions of GCG,
+this code was developed to emulate the output of the original program
+as much as possible for those who lost access to sigcleave when
+upgrading to newer versions of GCG.
+
+There are 2 accessor methods for this object. "signals" will return a
+perl associative array containing the sigcleave scores keyed by amino
+acid position.  "pretty_print" returns a formatted string similar to
+the output of the original sigcleave utility.
+
+In both cases, the "threshold" setting controls the score reporting
+level. If no value for threshold is passed in by the user, the code
+defaults to a reporting value of 3.5.
+
+In this implemntation the accessor will never return any
+score/position pair which does not meet the threshold limit. This is
+the slightly different from the behaviour of the 8.1 EGCG sigcleave
+program which will report the highest of the under-threshold results
+if nothing else is found.
+
+
+Example of pretty_print output:
+
+	SIGCLEAVE of sigtest from: 1 to 146
+
+	Report scores over 3.5
+	Maximum score 4.9 at residue 131
+
+	 Sequence:  FVILAAMSIQGSA-NLQTQWKSTASLALET
+        	    | (signal)    | (mature peptide)
+          	118            131
+
+	 Other entries above 3.5
+
+	Maximum score 3.7 at residue 112
+
+	 Sequence:  CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET
+         	   | (signal)    | (mature peptide)
+           	99            112
+
+
+=head1 FEEDBACK
+
+When updating and maintaining a module, it helps to know that people
+are actually using it. Let us know if you find a bug, think this code
+is useful or have any improvements/features to suggest.
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Chris Dagdigian, dag-at-sonsorol.org  & others
+
+=head1 CONTRIBUTORS
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 VERSION
+
+Bio::Tools::Sigcleave, $Id: Sigcleave.pm,v 1.22.4.1 2006/10/02 23:10:32 sendu Exp $
+
+=head1 COPYRIGHT
+
+Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved.
+This module is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 REFERENCES / SEE ALSO
+
+von Heijne G. (1986) "A new method for predicting signal sequences
+cleavage sites."  Nucleic Acids Res. 14, 4683-4690.
+
+von Heijne G. (1987) in "Sequence Analysis in Molecular Biology:
+Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117).
+
+
+=head1 APPENDIX
+
+The following documentation describes the various functions
+contained in this module. Some functions are for internal 
+use and are not meant to be called by the user; they are 
+preceded by an underscore ("_").
+
+=cut
+
+#
+##
+###
+#### END of main POD documentation.
+###
+##
+#
+
+package Bio::Tools::Sigcleave;
+
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root);
+use strict;
+use vars qw ($ID %WeightTable_euc  %WeightTable_pro );
+$ID  = 'Bio::Tools::Sigcleave';
+
+  %WeightTable_euc = (
+#Sample: 161 aligned sequences
+# R     -13 -12 -11 -10  -9  -8  -7  -6  -5  -4  -3  -2  -1  +1  +2 Expect
+ 'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47,  6, 80, 18,  6, 14.5],
+ 'C' => [ 3,  6,  9,  7,  9, 14,  6,  8,  5,  6, 19,  3,  9,  8,  3,  4.5],
+ 'D' => [ 0,  0,  0,  0,  0,  0,  0,  0,  5,  3,  0,  5,  0, 10, 11,  8.9],
+ 'E' => [ 0,  0,  0,  1,  0,  0,  0,  0,  3,  7,  0,  7,  0, 13, 14, 10.0],
+ 'F' => [13,  9, 11, 11,  6,  7, 18, 13,  4,  5,  0, 13,  0,  6,  4,  5.6],
+ 'G' => [ 4,  4,  3,  6,  3, 13,  3,  2, 19, 34,  5,  7, 39, 10,  7, 12.1],
+ 'H' => [ 0,  0,  0,  0,  0,  1,  1,  0,  5,  0,  0,  6,  0,  4,  2,  3.4],
+ 'I' => [15, 15,  8,  6, 11,  5,  4,  8,  5,  1, 10,  5,  0,  8,  7,  7.4],
+ 'K' => [ 0,  0,  0,  1,  0,  0,  1,  0,  0,  4,  0,  2,  0, 11,  9, 11.3],
+ 'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23,  8, 20,  1,  8,  4, 12.1],
+ 'M' => [ 0,  3,  7,  4,  1,  6,  2,  2,  0,  0,  0,  1,  0,  1,  2,  2.7],
+ 'N' => [ 0,  1,  0,  1,  1,  0,  0,  0,  3,  3,  0, 10,  0,  4,  7,  7.1],
+ 'P' => [ 2,  0,  2,  0,  0,  4,  1,  8, 20, 14,  0,  1,  3,  0, 22,  7.4],
+ 'Q' => [ 0,  0,  0,  1,  0,  6,  1,  0, 10,  8,  0, 18,  3, 19, 10,  6.3],
+ 'R' => [ 2,  0,  0,  0,  0,  1,  0,  0,  7,  4,  0, 15,  0, 12,  9,  7.6],
+ 'S' => [ 9,  3,  8,  6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4],
+ 'T' => [ 2, 10,  5,  4,  5, 13,  7,  7, 12,  6, 17,  8,  6,  3, 10,  9.7],
+ 'V' => [20, 25, 15, 18, 13, 15, 11, 27,  0, 12, 32,  3,  0,  8, 17, 11.1],
+ 'W' => [ 4,  3,  3,  1,  1,  2,  6,  3,  1,  3,  0,  9,  0,  2,  0,  1.8],
+ 'Y' => [ 0,  1,  4,  0,  0,  1,  3,  1,  1,  2,  0,  5,  0,  1,  7,  5.6]
+);
+
+  %WeightTable_pro = (
+#Sample: 36 aligned sequences
+#  R    -13 -12 -11 -10  -9  -8  -7  -6  -5  -4  -3  -2  -1  +1  +2 Expect
+  'A' => [0,  8,  8,  9,  6,  7,  5,  6,  7,  7, 24,  2, 31, 18,  4,  3.2],
+  'C' => [1,  0,  0,  1,  1,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  1.0],
+  'D' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  8,  2.0],
+  'E' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  4,  8,  2.2],
+  'F' => [2,  4,  3,  4,  1,  1,  8,  0,  4,  1,  0,  7,  0,  1,  0,  1.3],
+  'G' => [4,  2,  2,  2,  3,  5,  2,  4,  2,  2,  0,  2,  2,  1,  0,  2.7],
+  'H' => [0,  0,  1,  0,  0,  0,  0,  1,  1,  0,  0,  7,  0,  1,  0,  0.8],
+  'I' => [3,  1,  5,  1,  5,  0,  1,  3,  0,  0,  0,  0,  0,  0,  2,  1.7],
+  'K' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  2,  0,  3,  0,  2.5],
+  'L' => [8, 11,  9,  8,  9, 13,  1,  0,  2,  2,  1,  2,  0,  0,  1,  2.7],
+  'M' => [0,  2,  1,  1,  3,  2,  3,  0,  1,  2,  0,  4,  0,  0,  1,  0.6],
+  'N' => [0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  0,  3,  0,  1,  4,  1.6],
+  'P' => [0,  1,  1,  1,  1,  1,  2,  3,  5,  2,  0,  0,  0,  0,  5,  1.7],
+  'Q' => [0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  0,  3,  0,  0,  1,  1.4],
+  'R' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  1.7],
+  'S' => [1,  0,  1,  4,  4,  1,  5, 15,  5,  8,  5,  2,  2,  0,  0,  2.6],
+  'T' => [2,  0,  4,  2,  2,  2,  2,  2,  5,  1,  3,  0,  1,  1,  2,  2.2],
+  'V' => [5,  7,  1,  3,  1,  4,  7,  0,  0,  4,  3,  0,  0,  2,  0,  2.5],
+  'W' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0.4],
+  'Y' => [0,  0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  1,  0,  0,  0,  1.3]
+);
+
+
+##
+## Now we calculate the _real_ values for the weight tables
+##
+##
+## yeah yeah yeah there is lots of math here that gets repeated
+## every single time a sigcleave object gets created. This is
+## a quick hack to make sure that we get the scores as accurate as
+## possible. Need all those significant digits....
+##
+## suggestions for speedup aproaches welcome
+##
+
+
+foreach my $i (keys %WeightTable_euc) {
+	my $expected = $WeightTable_euc{$i}[15];
+	if ($expected > 0) {
+		for (my $j=0; $j<16; $j++) {
+			if ($WeightTable_euc{$i}[$j] == 0) {
+				$WeightTable_euc{$i}[$j] = 1; 
+				if ($j == 10 || $j == 12) {
+					$WeightTable_euc{$i}[$j] = 1.e-10;
+				}
+			}
+			$WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected);
+		}
+	}
+}
+
+
+foreach my $i (keys %WeightTable_pro) {
+	my $expected = $WeightTable_pro{$i}[15];
+	if ($expected > 0) {
+		for (my $j=0; $j<16; $j++) {
+			if ($WeightTable_pro{$i}[$j] == 0) {
+				$WeightTable_pro{$i}[$j] = 1; 
+				if ($j == 10 || $j == 12) {
+					$WeightTable_pro{$i}[$j] = 1.e-10;
+				}
+			}
+			$WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected);
+		}
+	}
+}
+
+#####################################################################################
+##                                 CONSTRUCTOR                                     ##
+#####################################################################################
+
+
+sub new {
+    my ($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    #my $self = Bio::Seq->new(@args);
+
+    my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)], at args);
+
+    defined $threshold && $self->threshold($threshold);
+    $matrix && $self->matrix($matrix);
+    $seq && $self->seq($seq);
+
+    return $self;
+}
+
+
+
+=head1 threshold
+
+ Title     : threshold
+ Usage     : $value = $self->threshold
+ Purpose   : Read/write method sigcleave score reporting threshold.
+ Returns   : float.
+ Argument  : new value, float
+ Throws    : on non-number argument
+ Comments  : defaults to 3.5
+ See Also   : n/a
+
+=cut
+
+#----------------
+sub threshold {
+#----------------
+	my ($self, $value) = @_;
+	if( defined $value) {
+		$self->throw("I need a number, not [$value]")
+		  if  $value !~ /^[+-]?[\d\.]+$/;
+		$self->{'_threshold'} = $value;
+	}
+	return $self->{'_threshold'} || 3.5 ;
+}
+
+=head1 matrix
+
+ Title     : matrix
+ Usage     : $value = $self->matrix('procaryotic')
+ Purpose   : Read/write method sigcleave matrix.
+ Returns   : float.
+ Argument  : new value: 'eucaryotic' or 'procaryotic'
+ Throws    : on non-number argument
+ Comments  : defaults to 3.5
+ See Also   : n/a
+
+=cut
+
+#----------------
+sub matrix {
+#----------------
+	my ($self, $value) = @_;
+	if( defined $value) {
+		$self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]")
+		  unless  $value eq 'eucaryotic' or $value eq 'procaryotic';
+		$self->{'_matrix'} = $value;
+	}
+	return $self->{'_matrix'} || 'eucaryotic' ;
+}
+
+=head1 seq
+
+ Title     : seq
+ Usage     : $value = $self->seq($seq_object)
+ Purpose   : set the Seq object to be used
+ Returns   : Seq object
+ Argument  : protein sequence or Seq object
+ See Also   : n/a
+
+=cut
+
+#----------------
+sub seq {
+#----------------
+	my ($self, $value) = @_;
+	if( defined $value) {
+		if ($value->isa('Bio::PrimarySeqI')) {
+			$self->{'_seq'} = $value;
+		} else {
+			$self->{'_seq'} = Bio::PrimarySeq->new(-seq => $value, 
+																-alphabet => 'protein');
+		}
+	}
+	return $self->{'_seq'};
+}
+
+=head1 _Analyze
+
+ Title     : _Analyze
+ Usage     : N/A This is an internal method. Not meant to be called from outside
+           : the package
+           :
+ Purpose   : calculates sigcleave score and amino acid position for the
+           : given protein sequence. The score reporting threshold can
+           : be adjusted by passing in the "threshold" parameter during
+           : object construction. If no threshold is passed in, the code
+           : defaults to reporting any scores equal to or above 3.5
+           :
+ Returns   : nothing. results are added to the object
+ Argument  : none.
+ Throws    : nothing.
+ Comments  : nothing.
+See Also   : n/a
+
+=cut
+
+#----------------
+sub _Analyze {
+#----------------
+    my($self) = @_;
+
+    my %signals;
+    my @hitWeight = ();
+    my @hitsort   = ();
+    my @hitpos    = ();
+    my $maxSite   = "";
+    my $seqPos    = "";
+    my $istart    = "";
+    my $iend      = "";
+    my $icol      = "";
+    my $i         = "";
+    my $weight    = "";
+    my $k         = 0;
+    my $c         = 0;
+    my $seqBegin  = 0;
+    my $pVal      = -13;
+    my $nVal      = 2;
+    my $nHits     = 0;
+    my $seqEnd    = $self->seq->length;
+    my $pep       = $self->seq->seq;
+    my $minWeight = $self->threshold;
+    my $matrix    = $self->matrix;
+
+    ## The weight table is keyed by UPPERCASE letters so we uppercase
+    ## the pep string because we don't want to alter the actual object
+    ## sequence.
+
+    $pep =~ tr/a-z/A-Z/;
+
+    for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) {
+		 $istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal;
+		 $iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd;
+		 $icol= $iend - $istart + 1;
+		 $weight = 0.00;
+		 for ($k=0; $k<$icol; $k++) {
+			 $c = substr($pep, $istart + $k, 1);
+
+			 ## CD: The if(defined) stuff was put in here because Sigcleave.pm
+			 ## CD: kept getting warnings about undefined vals during 'make test' ...
+			 if ($matrix eq 'eucaryotic') {
+				 $weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k];
+			 } else {
+				 $weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k];
+			 }
+		 }
+		 $signals{$seqPos+1} = sprintf ("%.1f", $weight)	if $weight >= $minWeight;
+    }
+    $self->{"_signal_scores"} = { %signals };
+}
+
+
+=head1 signals
+
+ Title     : signals
+ Usage     : %sigcleave_results = $sig->signals;
+           :
+ Purpose   : Accessor method for sigcleave results
+           : 
+ Returns   : Associative array. The key value represents the amino acid position
+           : and the value represents the score. Only scores that
+           : are greater than or equal to the THRESHOLD value are reported.
+           : 
+ Argument  : none.
+ Throws    : none.
+ Comments  : none.
+See Also   : THRESHOLD
+
+=cut
+
+#----------------
+sub signals {
+#----------------
+	my $self = shift;
+	my %results;
+	my $position;
+
+	# do the calculations
+	$self->_Analyze;
+
+	foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) {
+		$results{$position} = $self->{'_signal_scores'}{$position};
+	}
+	return %results;
+}
+
+
+=head1 result_count
+
+ Title     : result_count
+ Usage     : $count = $sig->result_count;
+           :
+ Purpose   : Accessor method for sigcleave results
+           : 
+ Returns   : Integer, number of results above the threshold
+           : 
+ Argument  : none.
+ Throws    : none.
+ Comments  : none.
+
+See Also   : THRESHOLD
+
+=cut
+
+#----------------
+sub result_count {
+#----------------
+	my $self = shift;
+	$self->_Analyze;
+	return keys %{ $self->{'_signal_scores'} };
+}
+
+
+=head1 pretty_print
+
+ Title     : pretty_print
+ Usage     : $output = $sig->pretty_print;
+           : print $sig->pretty_print;
+           :
+ Purpose   : Emulates the output of the EGCG Sigcleave
+           : utility.
+           : 
+ Returns   : A formatted string.
+ Argument  : none.
+ Throws    : none.
+ Comments  : none.
+See Also   : n/a
+
+=cut
+
+#----------------
+sub pretty_print {
+#----------------
+    my $self = shift;
+    my $pos;
+    my $output;
+    my $cnt = 1;
+    my %results  = $self->signals;
+    my @hits     = keys %results;
+    my $hitcount = $#hits; $hitcount++;
+    my $thresh   = $self->threshold;
+    my $seqlen   = $self->seq->length || 0;
+    my $name     = $self->seq->id || 'NONAME';
+    my $pep      = $self->seq->seq;
+    $pep      =~ tr/a-z/A-Z/;
+
+    $output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n";
+
+    if ($hitcount > 0) {
+		 $output .= "Report scores over $thresh\n";
+		 foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) {
+			 my $start = $pos - 15;
+			 $start = 1 if $start < 1;
+			 my $sig = substr($pep,$start -1,$pos-$start );
+
+			 $output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos);
+			 $output .= "\n";
+			 $output .= " Sequence:  ";
+			 $output .= $sig;
+			 $output .= "-" x (15- length($sig));
+			 $output .= "-";
+			 $output .= substr($pep,$pos-1,50);
+			 $output .= "\n";
+			 $output .= " " x 12;
+			 $output .= "| \(signal\)      | \(mature peptide\)\n";
+			 $output .= sprintf("          %3d             %3d\n\n",$start,$pos);
+
+			 if (($hitcount > 1) && ($cnt == 1)) {
+				 $output .= " Other entries above $thresh\n\n";
+			 }
+			 $cnt++;
+		 }
+    }
+    $output;
+}
+
+
+1;
+__END__
+
+
+#########################################################################
+#  End of class 
+#########################################################################

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Signalp.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Signalp.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Signalp.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,262 @@
+# $Id: Signalp.pm,v 1.8.4.2 2006/10/19 18:44:40 jason Exp $
+# Parser module for Signalp Bio::Tools::Signalp
+#
+# 
+# Based on the EnsEMBL module
+# Bio::EnsEMBL::Pipeline::Runnable::Protein::Signalp originally
+# written by Marc Sohrmann (ms2 at sanger.ac.uk) Written in BioPipe by
+# Balamurugan Kumarasamy <savikalpa at fugu-sg.org> Cared for by the Fugu
+# Informatics team (fuguteam at fugu-sg.org)
+
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Signalp - parser for Signalp output
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::Signalp;
+
+ my $parser = Bio::Tools::Signalp->new(-fh =>$filehandle );
+
+ while( my $sp_feat = $parser->next_result ) {
+   if ($sp_feat->score > 0.9) {
+      push @likely_sigpep, $sp_feat;
+   }
+ }
+
+=head1 DESCRIPTION
+
+C<SignalP> predicts the presence and location of signal peptide
+cleavage sites in amino acid sequences.
+
+L<Bio::Tools::Signalp> parses the output of C<SignalP> to provide a 
+L<Bio::SeqFeature::Generic> object describing the signal peptide
+found, if any. It returns a variety of tags extracted from the NN and HMM
+analysis. Most importantly, the C<score()> attribute contains the
+NN probability of this being a true signal peptide.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+ User feedback is an integral part of the evolution of this and other
+ Bioperl modules. Send your comments and suggestions preferably to
+ the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted va the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Based on the EnsEMBL module
+Bio::EnsEMBL::Pipeline::Runnable::Protein::Signalp originally written
+by Marc Sohrmann (ms2_AT_sanger.ac.uk). Written in BioPipe by
+Balamurugan Kumarasamy savikalpa_AT_fugu-sg.org. Cared for by the Fugu
+Informatics team (fuguteam_AT_fugu-sg.org)
+
+=head1 CONTRIBUTORS
+
+Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Signalp;
+use strict;
+
+use Bio::SeqFeature::Generic;
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::Signalp();
+ Function: Builds a new Bio::Tools::Signalp object
+ Returns : Bio::Tools::Signalp
+ Args    : -fh/-file => $val, # for initing input, see Bio::Root::IO
+
+=cut
+
+sub new {
+      my($class, at args) = @_;
+
+      my $self = $class->SUPER::new(@args);
+      $self->_initialize_io(@args);
+
+      return $self;
+}
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $signalp->next_result
+ Function: Get the next result set from parser data
+ Returns : Bio::SeqFeature::Generic
+ Args    : none
+
+=cut
+
+sub next_result {
+        my ($self) = @_;
+        
+        while (my $line=$self->_readline()) {
+           chomp $line;
+           
+           if ($line=~/^\>(\S+)/) {
+              $self->_seqname($1);
+           }
+           elsif ($line=~/max\.\s+Y\s+(\S+)\s+\S+\s+\S+\s+(\S+)/) {
+              $self->_fact1($2);
+           }
+           elsif ($line=~/mean\s+S\s+(\S+)\s+\S+\s+\S+\s+(\S+)/) {
+              my $fact2 = $2;
+              
+              if ($fact2 eq 'YES' and $self->_fact1 eq 'YES') {
+                  
+                  my $line = $self->_readline();
+		  my $end;
+              
+                  if ($line =~ /Most likely cleavage site between pos\.\s+(\d+)/) {
+                      my $end = $1;
+                      my (%feature);
+                      $feature{seq_id} = $self->_seqname;
+                      $feature{start} = 1;
+                      $feature{end} = $end;
+                      $feature{source_tag} = 'Signalp';
+                      $feature{primary}= 'signal_peptide';
+                      $self->_parse_hmm_result(\%feature);
+                      my $new_feat = $self->_create_feature (\%feature);
+                      return $new_feat;
+                  }
+                  else {
+                      $self->throw ("parsing problem in signalp");
+                  }
+                  
+              }
+           }
+        
+        }
+}
+
+=head2 _parse_hmm_result
+
+ Title   : _parse_hmm_result
+ Usage   : $self->_parse_hmm_result(\%feature)
+ Function: Internal (not to be used directly)
+ Returns : hash of feature values
+ Args    : hash of more feature values
+
+=cut
+
+sub _parse_hmm_result {
+    my ($self, $feature_hash) = @_;
+    while(my $line = $self->_readline){
+        chomp $line;
+        if($line =~ /Prediction: (.+)$/){
+            $feature_hash->{hmmProdiction} = $1;
+        }elsif($line =~ /Signal peptide probability: ([0-9\.]+)/){
+            $feature_hash->{peptideProb} = $1;
+        }elsif($line =~ /Signal anchor probability: ([0-9\.]+)/){
+            $feature_hash->{anchorProb} = $1;
+            last;
+        }
+    }
+}
+
+=head2 _create_feature
+
+ Title   : _create_feature
+ Usage   : $self->create_feature(\%feature)
+ Function: Internal (not to be used directly)
+ Returns : hash of feature values
+ Args    : hash of more feature values
+
+=cut
+
+sub _create_feature {
+    my ($self, $feat) = @_;
+
+    # create feature object
+    my $feature = Bio::SeqFeature::Generic->new(
+         -seq_id      => $feat->{name},
+         -start       => $feat->{start},
+         -end         => $feat->{end},
+         -score       => $feat->{score},
+         -source      => $feat->{source},
+         -primary     => $feat->{primary},
+         -logic_name  => $feat->{logic_name}, 
+    );
+           
+    $feature->score($feat->{peptideProb});
+    $feature->add_tag_value('peptideProb', $feat->{peptideProb});
+    $feature->add_tag_value('anchorProb', $feat->{anchorProb});
+    $feature->add_tag_value('evalue',$feat->{anchorProb});
+    $feature->add_tag_value('percent_id','NULL');
+    $feature->add_tag_value("hid",$feat->{primary});
+    $feature->add_tag_value('SignalpPrediction', $feat->{hmmProdiction});
+    return $feature; 
+
+}
+
+=head2 _seqname
+
+ Title   : _seqname
+ Usage   : $self->_seqname($name)
+ Function: Internal (not to be used directly)
+ Returns :
+ Args    :
+
+=cut
+
+sub _seqname{
+    my ($self,$seqname)=@_;
+
+    if (defined$seqname){
+        $self->{'seqname'}=$seqname;
+    }
+    return $self->{'seqname'};
+}
+
+=head2 _fact1
+
+ Title   : _fact1
+ Usage   : $self->fact1($fact1)
+ Function: Internal (not to be used directly)
+ Returns : 
+ Args    :
+
+=cut
+
+sub _fact1{
+    my ($self, $fact1)=@_;
+
+    if (defined $fact1){
+       $self->{'fact1'}=$fact1;
+    }
+    return $self->{'fact1'};
+}
+
+
+
+1;
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Signalp.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Exon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Exon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Exon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,183 @@
+# $Id: Exon.pm,v 1.16.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Sim4::Exon
+#
+# Cared for by Ewan Birney <birney-at-sanger.ac.uk>
+# and Hilmar Lapp <hlapp-at-gmx.net>
+#
+# Copyright Ewan Birney, Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Sim4::Exon - A single exon determined by an alignment
+
+=head1 SYNOPSIS
+
+  # See Bio::Tools::Sim4::Results for a description of the context.
+
+  # an instance of this class is-a Bio::SeqFeature::SimilarityPair
+
+  # coordinates of the exon (recommended way):
+  print "exon from ", $exon->start(),
+  	" to ", $exon->end(), "\n";
+
+  # the same (feature1() inherited from Bio::SeqFeature::FeaturePair)
+  print "exon from ", $exon->feature1()->start(),
+  	" to ", $exon->feature1()->end(), "\n";
+  # also the same (query() inherited from Bio::SeqFeature::SimilarityPair):
+  print "exon from ", $exon->query()->start(),
+  	" to ", $exon->query()->end(), "\n";
+
+  # coordinates on the matching EST (recommended way):
+  print "matches on EST from ", $exon->est_hit()->start(),
+  	" to ", $exon->est_hit()->end(), "\n";
+
+  # the same (feature2() inherited from Bio::SeqFeature::FeaturePair)
+  print "matches on EST from ", $exon->feature2()->start(),
+  	" to ", $exon->feature2()->end(), "\n";
+  # also the same (subject() inherited from Bio::SeqFeature::SimilarityPair):
+  print "exon from ", $exon->subject()->start(),
+  	" to ", $exon->subject()->end(), "\n";
+
+=head1 DESCRIPTION
+
+This class inherits from Bio::SeqFeature::SimilarityPair and represents an
+exon on a genomic sequence determined by similarity, that is, by aligning an
+EST sequence (using Sim4 in this case). Consequently, the notion of query and
+subject is always from the perspective of the genomic sequence: query refers
+to the genomic seq, subject to the aligned EST hit. Because of this,
+$exon-E<gt>start(), $exon-E<gt>end() etc will always return what you expect. 
+
+To get the coordinates on the matching EST, refer to the properties of the
+feature returned by L<est_hit>().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, Hilmar Lapp
+
+Ewan Birney E<lt>birney-at-sanger.ac.ukE<gt>
+Hilmar Lapp E<lt>hlapp-at-gmx.netE<gt> or E<lt>hilmar.lapp-at-pharma.novartis.comE<gt>.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Sim4::Exon;
+use strict;
+
+
+use base qw(Bio::SeqFeature::SimilarityPair);
+
+sub new {
+    my ($class, at args) = @_;
+    my %param = @args;
+    my $self = $class->SUPER::new(@args);
+
+    my ($prim, $prim_tag, $source, $source_tag) = 
+	$self->_rearrange([qw(PRIMARY
+			      PRIMARY_TAG 
+			      SOURCE
+			      SOURCE_TAG)], 
+			  @args);
+
+    $self->primary_tag('exon') unless $prim || $prim_tag;
+    $self->source_tag('Sim4') unless $source || $source_tag;
+    $self->strand(0) unless defined($self->strand());
+    $self->query();
+    return $self; 
+}
+
+=head2 percentage_id
+
+ Title   : percentage_id
+ Usage   : $obj->percentage_id($newval)
+ Function: This is a synonym for 100 * $obj->est_hit()->frac_identical().
+ Returns : value of percentage_id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub percentage_id {
+    my ($self, @args) = @_;
+    my $frac;
+    my $val;
+    my $delegated = 0;
+    
+    if(@args) {
+	$frac = $args[0];
+	$frac /= 100.0 if defined($frac);
+    }
+    if($self->query()->can('frac_identical')) {
+	if(defined($frac)) {
+	    $self->query()->frac_identical($frac);
+	}
+	$val = 100.0 * $self->query()->frac_identical();
+	$delegated = 1;
+    }
+    if($self->est_hit()->can('frac_identical')) {
+	if(defined($frac)) {
+	    $self->est_hit()->frac_identical($frac);
+	}
+	# this intentiously overwrites previous $val
+	$val = 100.0 * $self->est_hit()->frac_identical();
+	$delegated = 1;
+    }
+    if(! $delegated) {
+	if(@args) {
+	    $val = shift(@args);
+	    $self->{'percentage_id'} = $val;
+	} else {
+	    $val = $self->{'percentage_id'};
+	}
+    }
+    return $val;
+}
+
+=head2 est_hit
+
+ Title   : est_hit
+ Usage   : $est_feature = $obj->est_hit();
+ Function: Returns the EST hit pointing to (i.e., aligned to by Sim4) this
+           exon (i.e., genomic region). At present, merely a synonym for
+           $obj->feature2().
+ Returns : An Bio::SeqFeatureI implementing object.
+ Args    : 
+
+
+=cut
+
+sub est_hit {
+    my $self = shift;
+    return $self->feature2(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Results.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Results.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Sim4/Results.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,431 @@
+# $Id: Results.pm,v 1.25.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Sim4::Results
+#
+# Cared for by Ewan Birney <birney-at-sanger.ac.uk>
+#          and Hilmar Lapp <hlapp-at-gmx.net>
+#
+# Copyright Ewan Birney and Hilmar Lapp
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Sim4::Results - Results of one Sim4 run
+
+=head1 SYNOPSIS
+
+   # to preset the order of EST and genomic file as given on the sim4 
+   # command line:
+   my $sim4 = Bio::Tools::Sim4::Results->new(-file => 'result.sim4',
+                                             -estfirst => 1);
+   # to let the order be determined automatically (by length comparison):
+   $sim4 = Bio::Tools::Sim4::Results->new( -file => 'sim4.results' );
+   # filehandle:
+   $sim4 = Bio::Tools::Sim4::Results->new( -fh   => \*INPUT );
+
+   # parse the results
+   while(my $exonset = $sim4->next_exonset()) {
+       # $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Sim4::Exons
+       # as sub features
+       print "Delimited on sequence ", $exonset->seq_id(), 
+             "from ", $exonset->start(), " to ", $exonset->end(), "\n";
+       foreach my $exon ( $exonset->sub_SeqFeature() ) {
+	  # $exon is-a Bio::SeqFeature::FeaturePair
+	  print "Exon from ", $exon->start, " to ", $exon->end, 
+                " on strand ", $exon->strand(), "\n";
+          # you can get out what it matched using the est_hit attribute
+          my $homol = $exon->est_hit();
+          print "Matched to sequence ", $homol->seq_id, 
+                " at ", $homol->start," to ", $homol->end, "\n";
+      }
+   }
+
+   # essential if you gave a filename at initialization (otherwise the file
+   # stays open)
+   $sim4->close();
+
+=head1 DESCRIPTION
+
+The sim4 module provides a parser and results object for sim4 output. The
+sim4 results are specialised types of SeqFeatures, meaning you can add them
+to AnnSeq objects fine, and manipulate them in the "normal" seqfeature manner.
+
+The sim4 Exon objects are Bio::SeqFeature::FeaturePair inherited objects. The 
+$esthit = $exon-E<gt>est_hit() is the alignment as a feature on the matching 
+object (normally, an EST), in which the start/end points are where the hit
+lies.
+
+To make this module work sensibly you need to run
+
+     sim4 genomic.fasta est.database.fasta
+or
+     sim4 est.fasta genomic.database.fasta
+
+To get the sequence identifiers recorded for the first sequence, too, use
+A=4 as output option for sim4.
+
+One fiddle here is that there are only two real possibilities to the matching
+criteria: either one sequence needs reversing or not. Because of this, it
+is impossible to tell whether the match is in the forward or reverse strand
+of the genomic DNA. We solve this here by assuming that the genomic DNA is
+always forward. As a consequence, the strand attribute of the matching EST is
+unknown, and the strand attribute of the genomic DNA (i.e., the Exon object)
+will reflect the direction of the hit.
+
+See the documentation of parse_next_alignment() for abilities of the parser
+to deal with the different output format options of sim4.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, Hilmar Lapp
+
+Ewan Birney E<lt>birney-at-sanger.ac.ukE<gt>
+Hilmar Lapp E<lt>hlapp-at-gmx.netE<gt> or E<lt>hilmar.lapp-at-pharma.novartis.comE<gt>.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Sim4::Results;
+use strict;
+
+
+use File::Basename;
+use Bio::Root::Root;
+use Bio::Tools::Sim4::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # call the inherited method first
+    my $make = $self->SUPER::_initialize_state(@args);
+
+    my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
+
+    delete($self->{'_est_is_first'});
+    $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
+    $self->analysis_method("Sim4");
+}
+
+=head2 analysis_method
+
+ Usage     : $sim4->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /sim4/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /sim4/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 parse_next_alignment
+
+ Title   : parse_next_alignment
+ Usage   : @exons = $sim4_result->parse_next_alignment;
+           foreach $exon (@exons) {
+               # do something
+           }
+ Function: Parses the next alignment of the Sim4 result file and returns the
+           found exons as an array of Bio::Tools::Sim4::Exon objects. Call
+           this method repeatedly until an empty array is returned to get the
+           results for all alignments.
+
+           The $exon->seq_id() attribute will be set to the identifier of the
+           respective sequence for both sequences if A=4 was used in the sim4
+           run, and otherwise for the second sequence only. If the output does
+           not contain the identifier, the filename stripped of path and 
+           extension is used instead. In addition, the full filename 
+           will be recorded for both features ($exon inherits off 
+           Bio::SeqFeature::SimilarityPair) as tag 'filename'. The length
+           is accessible via the seqlength() attribute of $exon->query() and
+           $exon->est_hit().
+
+           Note that this method is capable of dealing with outputs generated
+           with format 0,1,3, and 4 (via the A=n option to sim4). It
+           automatically determines which of the two sequences has been 
+           reversed, and adjusts the coordinates for that sequence. It will
+           also detect whether the EST sequence(s) were given as first or as
+           second file to sim4, unless this has been specified at creation
+           time of the object.
+
+ Example :
+ Returns : An array of Bio::Tools::Sim4::Exon objects
+ Args    :
+
+
+=cut
+
+sub parse_next_alignment {
+   my ($self) = @_;
+   my @exons = ();
+   my %seq1props = ();
+   my %seq2props = ();
+   # we refer to the properties of each seq by reference
+   my ($estseq, $genomseq, $to_reverse);
+   my $started = 0;
+   my $hit_direction = 1;
+   my $output_fmt = 3; # same as 0 and 1 (we cannot deal with A=2 produced
+                       # output yet)
+   
+   while(defined($_ = $self->_readline())) {
+       #chomp();
+       #
+       # bascially, each sim4 'hit' starts with seq1...
+       #
+       /^seq1/ && do {
+	   if($started) {
+	       $self->_pushback($_);
+	       last;
+	   }
+	   $started = 1;
+
+	   # filename and length of seq 1
+	   /^seq1\s+=\s+(\S+)\,\s+(\d+)/ ||
+	       $self->throw("Sim4 parsing error on seq1 [$_] line. Sorry!");
+	   $seq1props{'filename'} = $1;
+	   $seq1props{'length'} = $2;
+	   next;
+       };
+       /^seq2/ && do {
+	   # the second hit has also the database name in the >name syntax 
+	   # (in brackets).
+	   /^seq2\s+=\s+(\S+)\s+\(>?(\S+)\s*\)\,\s+(\d+)/||
+	       $self->throw("Sim4 parsing error on seq2 [$_] line. Sorry!");
+	   $seq2props{'filename'} = $1;
+	   $seq2props{'seqname'} = $2;
+	   $seq2props{'length'} = $3;
+	   next;
+       };
+       if(/^>(\S+)\s*(.*)$/) {
+	   # output option was A=4, which not only gives the complete
+	   # description lines, but also causes the longer sequence to be
+	   # reversed if the second file contained one (genomic) sequence
+	   $seq1props{'seqname'} = $1;
+	   $seq1props{'description'} = $2 if $2;
+	   $output_fmt = 4;
+	   # we handle seq1 and seq2 both here
+	   if(defined($_ = $self->_readline()) && (/^>(\S+)\s*(.*)$/)) {
+	       $seq2props{'seqname'} = $1; # redundant, since already set above
+	       $seq2props{'description'} = $2 if $2;
+	   }
+	   next;
+       }
+       /^\(complement\)/ && do {
+	   $hit_direction = -1;
+	   next;
+       };
+       # this matches
+       # start-end (start-end) pctid%
+       if(/(\d+)-(\d+)\s+\((\d+)-(\d+)\)\s+(\d+)%/) {
+ 	   $seq1props{'start'} = $1;
+ 	   $seq1props{'end'} = $2;
+ 	   $seq2props{'start'} = $3;
+ 	   $seq2props{'end'} = $4;
+	   my $pctid   = $5;
+	   
+	   if(! defined($estseq)) {
+	       # for the first time here: need to set the references referring
+	       # to seq1 and seq2 
+	       if(! exists($self->{'_est_is_first'})) {
+		   # detect which one is the EST by looking at the lengths,
+		   # and assume that this holds throughout the entire result
+		   # file (i.e., when this method is called for the next
+		   # alignment, this will not be checked again)
+		   if($seq1props{'length'} > $seq2props{'length'}) {
+		       $self->{'_est_is_first'} = 0;
+		   } else {
+		       $self->{'_est_is_first'} = 1;
+		   }
+	       }
+	       if($self->{'_est_is_first'}) {
+		   $estseq = \%seq1props;
+		   $genomseq = \%seq2props;
+		   # if the EST is given first, A=4 selects the genomic
+		   # seq for being reversed (reversing the EST is default)
+		   $to_reverse = ($output_fmt == 4) ? $genomseq : $estseq;
+	       } else {
+		   $estseq = \%seq2props;
+		   $genomseq = \%seq1props;
+		   # if the EST is the second, A=4 does not change the
+		   # seq being reversed (always the EST is reversed)
+		   $to_reverse = $estseq;
+	       }
+	   }
+	   if($hit_direction == -1) {
+	       # we have to reverse the coordinates of one of both seqs
+	       my $tmp = $to_reverse->{'start'};
+	       $to_reverse->{'start'} =
+		   $to_reverse->{'length'} - $to_reverse->{'end'} + 1;
+	       $to_reverse->{'end'} = $to_reverse->{'length'} - $tmp + 1;
+	   }
+	   # create and initialize the exon object
+	   my $exon = Bio::Tools::Sim4::Exon->new(
+					    '-start' => $genomseq->{'start'},
+					    '-end'   => $genomseq->{'end'},
+					    '-strand' => $hit_direction);
+	   if(exists($genomseq->{'seqname'})) {
+	       $exon->seq_id($genomseq->{'seqname'});
+	   } else {
+	       # take filename stripped of path as fall back
+	       my ($basename) = &File::Basename::fileparse($genomseq->{'filename'}, '\..*');
+	       $exon->seq_id($basename);
+	   }
+	   $exon->feature1()->add_tag_value('filename',
+					    $genomseq->{'filename'});
+	   # feature1 is supposed to be initialized to a Similarity object,
+           # but we provide a safety net
+	   if($exon->feature1()->can('seqlength')) {
+	       $exon->feature1()->seqlength($genomseq->{'length'});
+	   } else {
+	       $exon->feature1()->add_tag_value('SeqLength',
+						$genomseq->{'length'});
+	   }
+	   # create and initialize the feature wrapping the 'hit' (the EST)
+	   my $fea2 = Bio::SeqFeature::Similarity->new(
+                                            '-start' => $estseq->{'start'},
+					    '-end'   => $estseq->{'end'},
+					    '-strand' => 0,
+					    '-primary' => "aligning_EST");
+	   if(exists($estseq->{'seqname'})) {
+	       $fea2->seq_id($estseq->{'seqname'});
+	   } else {
+	       # take filename stripped of path as fall back
+	       my ($basename) =
+		   &File::Basename::fileparse($estseq->{'filename'}, '\..*');
+	       $fea2->seq_id($basename);
+	   }
+	   $fea2->add_tag_value('filename', $estseq->{'filename'});
+	   $fea2->seqlength($estseq->{'length'});
+	   # store
+	   $exon->est_hit($fea2);	   
+	   # general properties
+	   $exon->source_tag($self->analysis_method());
+	   $exon->percentage_id($pctid);
+	   $exon->score($exon->percentage_id());
+	   # push onto array
+	   push(@exons, $exon);
+	   next; # back to while loop
+       }
+   }
+   return @exons;
+}
+
+=head2 next_exonset
+
+ Title   : next_exonset
+ Usage   : $exonset = $sim4_result->parse_next_exonset;
+           print "Exons start at ", $exonset->start(), 
+                 "and end at ", $exonset->end(), "\n";
+           foreach $exon ($exonset->sub_SeqFeature()) {
+               # do something
+           }
+ Function: Parses the next alignment of the Sim4 result file and returns the
+           set of exons as a container of features. The container is itself
+           a Bio::SeqFeature::Generic object, with the Bio::Tools::Sim4::Exon
+           objects as sub features. Start, end, and strand of the container
+           will represent the total region covered by the exons of this set.
+
+           See the documentation of parse_next_alignment() for further
+           reference about parsing and how the information is stored.
+
+ Example : 
+ Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Sim4::Exon
+           objects as sub features.
+ Args    :
+
+=cut
+
+sub next_exonset {
+    my $self = shift;
+    my $exonset;
+
+    # get the next array of exons
+    my @exons = $self->parse_next_alignment();
+    unless( @exons ) {
+	return if eof($self->_fh);
+	return $self->next_exonset;
+    } 
+    # create the container of exons as a feature object itself, with the
+    # data of the first exon for initialization
+    $exonset = Bio::SeqFeature::Generic->new('-start' => $exons[0]->start(),
+					     '-end' => $exons[0]->end(),
+					     '-strand' => $exons[0]->strand(),
+					     '-primary' => "ExonSet");
+    $exonset->source_tag($exons[0]->source_tag());
+    $exonset->seq_id($exons[0]->seq_id());
+    # now add all exons as sub features, with enabling EXPANsion of the region
+    # covered in total
+    foreach my $exon (@exons) {
+	$exonset->add_sub_SeqFeature($exon, 'EXPAND');
+    }
+    return $exonset;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($exonset = $sim4->next_feature()) {
+                  # do something
+           }
+ Function: Does the same as L<next_exonset()>. See there for documentation of
+           the functionality. Call this method repeatedly until FALSE is
+           returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_exonset() at present.
+
+ Example :
+ Returns : A Bio::SeqFeature::Generic object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_exonset doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_exonset(@args);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Exon.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Exon.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Exon.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,259 @@
+# $Id: Exon.pm,v 1.6.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Spidey::Exon
+#
+# Cared for by Ryan Golhar <golharam at umdnj.edu>
+#
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Spidey::Exon - A single exon determined by an alignment
+
+=head1 SYNOPSIS
+
+  # See Bio::Tools::Spidey::Results for a description of the context.
+
+  # an instance of this class is-a Bio::SeqFeature::SimilarityPair
+
+  # coordinates of the exon (recommended way):
+  print "exon from ", $exon->start(),
+  	" to ", $exon->end(), "\n";
+
+  # the same (feature1() inherited from Bio::SeqFeature::FeaturePair)
+  print "exon from ", $exon->feature1()->start(),
+  	" to ", $exon->feature1()->end(), "\n";
+  # also the same (query() inherited from Bio::SeqFeature::SimilarityPair):
+  print "exon from ", $exon->query()->start(),
+  	" to ", $exon->query()->end(), "\n";
+
+  # coordinates on the matching EST (recommended way):
+  print "matches on EST from ", $exon->est_hit()->start(),
+  	" to ", $exon->est_hit()->end(), "\n";
+
+  # the same (feature2() inherited from Bio::SeqFeature::FeaturePair)
+  print "matches on EST from ", $exon->feature2()->start(),
+  	" to ", $exon->feature2()->end(), "\n";
+  # also the same (subject() inherited from Bio::SeqFeature::SimilarityPair):
+  print "exon from ", $exon->subject()->start(),
+  	" to ", $exon->subject()->end(), "\n";
+
+=head1 DESCRIPTION
+
+This class inherits from Bio::SeqFeature::SimilarityPair and represents an
+exon on a genomic sequence determined by similarity, that is, by aligning an
+EST sequence (using Spidey in this case). Consequently, the notion of query and
+subject is always from the perspective of the genomic sequence: query refers
+to the genomic seq, subject to the aligned EST hit. Because of this,
+$exon-E<gt>start(), $exon-E<gt>end() etc will always return what you expect. 
+
+To get the coordinates on the matching EST, refer to the properties of the
+feature returned by L<est_hit>().
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions preferably
+ to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ryan Golhar
+
+Email golharam at umdnj.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Spidey::Exon;
+use strict;
+
+
+use base qw(Bio::SeqFeature::SimilarityPair);
+
+sub new {
+    my ($class, at args) = @_;
+    my %param = @args;
+    my $self = $class->SUPER::new(@args);
+
+    my ($prim, $prim_tag, $source, $source_tag) = 
+	$self->_rearrange([qw(PRIMARY
+			      PRIMARY_TAG 
+			      SOURCE
+			      SOURCE_TAG)], 
+			  @args);
+
+    $self->primary_tag('exon') unless $prim || $prim_tag;
+    $self->source_tag('Spidey') unless $source || $source_tag;
+    $self->strand(0) unless defined($self->strand());
+    $self->query();
+    return $self; 
+}
+
+=head2 percentage_id
+
+ Title   : percentage_id
+ Usage   : $obj->percentage_id
+ Function: This is the percent id as reported by Spidey
+ Returns : value of percentage_id
+ Args    : 
+
+
+=cut
+
+sub percentage_id {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+	    $val = shift(@args);
+	    $self->{'percentage_id'} = $val;
+	} else {
+	    $val = $self->{'percentage_id'};
+	}
+	return $val;
+}
+
+=head2 est_hit
+
+ Title   : est_hit
+ Usage   : $est_feature = $obj->est_hit();
+ Function: Returns the EST hit pointing to (i.e., aligned to by Spidey) this
+           exon (i.e., genomic region). At present, merely a synonym for
+           $obj->feature2().
+ Returns : An Bio::SeqFeatureI implementing object.
+ Args    : 
+
+
+=cut
+
+sub est_hit {
+    my $self = shift;
+    return $self->feature2(@_);
+}
+
+=head2 mismatches
+
+ Title   : mismatches
+ Usage   : $obj->mismatches;
+ Function: Returns the mismatches of the cDNA to (i.e., aligned to by Spidey) this
+           exon (i.e., genomic region). 
+ Returns : value of mismatches.
+ Args    : 
+
+
+=cut
+
+sub mismatches {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+	    $val = shift(@args);
+	    $self->{'mismatches'} = $val;
+	} else {
+	    $val = $self->{'mismatches'};
+	}
+	return $val;
+}
+
+=head2 gaps
+
+ Title   : gaps
+ Usage   : $obj->gaps;
+ Function: Returns the gaps of the cDNA to (i.e., aligned to by Spidey) this
+           exon (i.e., genomic region). 
+ Returns : value of gaps.
+ Args    : 
+
+
+=cut
+
+sub gaps {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+	    $val = shift(@args);
+	    $self->{'gaps'} = $val;
+	} else {
+	    $val = $self->{'gaps'};
+	}
+	return $val;
+}
+
+=head2 donor
+
+ Title   : donor
+ Usage   : $obj->donor;
+ Function: Returns 0 if a splice donor site does not exist, or 
+           1 if a splice donor site exists
+ Returns : value of existence of donor splice site (0 or 1)
+ Args    :
+
+
+=cut
+
+sub donor {
+	my ($self, @args) = @_;
+	my $val;
+
+	if (@args) {
+		$val = shift @args;
+		$self->{'donor'} = $val;
+	} else {
+		$val = $self->{'donor'};
+	}
+	return $val;
+}
+
+=head2 acceptor
+
+ Title   : acceptor
+ Usage   : $obj->acceptor;
+ Function: Returns 0 if a splice acceptor site does not exist, or 
+           1 if a splice acceptor site exists
+ Returns : value of existence of acceptor splice site (0 or 1)
+ Args    :
+
+
+=cut
+
+sub acceptor {
+	my ($self, @args) = @_;
+	my $val;
+
+	if (@args) {
+		$val = shift @args;
+		$self->{'acceptor'} = $val;
+	} else {
+		$val = $self->{'acceptor'};
+	}
+	return $val;
+}
+
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Exon.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Results.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Results.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Results.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,527 @@
+# $Id: Results.pm,v 1.10.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tools::Spidey::Results
+#
+# Cared for by Ryan Golhar <golharam at umdnj.edu>
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Spidey::Results - Results of a Spidey run
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::Spidey::Results;
+	my $spidey = Bio::Tools::Spidey::Results->new(-file => 'result.spidey' );
+
+	# or
+
+	my $spidey = Bio::Tools::Spidey::Results->new( -fh   => \*INPUT );
+
+	# get the exons before doing anything else
+	my $exonset = $spidey->next_exonset();
+
+	# parse the results
+	my @exons = $exonset->sub_SeqFeature();
+	print "Total no of Exons: ", scalar(@exons), "\n";
+
+	print "Genomic sequence length: ", $spidey->genomic_dna_length(), "\n";
+
+	# $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Spidey::Exons
+	# as sub features
+	print "Delimited on sequence ", $exonset->seq_id(), " from ", 
+		$exonset->start(), " to ", $exonset->end(), "\n";
+
+	foreach my $exon ( $exonset->sub_SeqFeature() ) {
+		# $exon is-a Bio::SeqFeature::FeaturePair
+		print "Exon from ", $exon->start, " to ", $exon->end, 
+			" on strand ", $exon->strand(), "\n";
+		# you can get out what it matched using the est_hit attribute
+		my $homol = $exon->est_hit();
+		print "Matched to sequence ", $homol->seq_id, 
+			" at ", $homol->start," to ", $homol->end, "\n";
+	}
+
+	# essential if you gave a filename at initialization (otherwise 
+  	# the file stays open)
+	$spidey->close();
+
+=head1 DESCRIPTION
+
+The spidey module provides a parser and results object for spidey 
+output. The spidey results are specialised types of SeqFeatures, 
+meaning you can add them to AnnSeq objects fine, and manipulate them 
+in the "normal" seqfeature manner.
+
+The spidey Exon objects are Bio::SeqFeature::FeaturePair inherited 
+objects. The $esthit = $exon-E<gt>est_hit() is the alignment as a 
+feature on the matching object (normally, a cDNA), in which the 
+start/end points are where the hit lies.
+
+To make this module work sensibly you need to run
+
+     spidey -i genomic.fasta -m cDNA.fasta
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ryan Golhar
+
+Email golharam at umdnj.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. 
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::Spidey::Results;
+use strict;
+
+
+use File::Basename;
+use Bio::Root::Root;
+use Bio::Tools::Spidey::Exon;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+
+sub _initialize_state {
+    my($self, at args) = @_;
+
+    # call the inherited method first
+    my $make = $self->SUPER::_initialize_state(@args);
+
+#    my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
+
+#    delete($self->{'_est_is_first'});
+#    $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
+    $self->analysis_method("Spidey");
+}
+
+=head2 analysis_method
+
+ Usage     : $spidey->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /Spidey/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /Spidey/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 parse_next_alignment
+
+ Title   : parse_next_alignment
+ Usage   : @exons = $spidey_result->parse_next_alignment;
+           foreach $exon (@exons) {
+               # do something
+           }
+ Function: Parses the next alignment of the Spidey result file and returns the
+           found exons as an array of Bio::Tools::Spidey::Exon objects. Call
+           this method repeatedly until an empty array is returned to get the
+           results for all alignments.
+ Example :
+ Returns : An array of Bio::Tools::Spidey::Exon objects
+ Args    :
+
+
+=cut
+
+sub parse_next_alignment {
+	my ($self) = @_;
+	my $started = 0;
+	my ($version) = 0;
+	my %seq1props = ();
+	my %seq2props = ();
+	my ($strand) = 0; # 1 = plus, -1 = minus
+	my ($exoncount) = -1;
+	my @exons = ();
+	
+	# we refer to the properties of each seq by reference
+#	my ($estseq, $genomseq, $to_reverse);
+#	my $hit_direction = 1;
+   
+	while(defined($_ = $self->_readline())) {
+		chomp();
+
+		#
+		# bascially, parse a Spidey result...
+		#
+		# matches: --SPIDEY version 1.40--
+		/^--SPIDEY version (\d+\.\d+)--/ && do {
+			if($started) {
+				$self->_pushback($_);
+				last;
+			}
+			$version = $1;
+			if ($version != 1.40) {
+				$self->throw("Spidey parser only designed to work with Spidey v1.40\n");
+			}
+			$started = 1;
+			next;
+		};
+		# matches: Genomic: lcl|some_name other information, 1234 bp
+		/^Genomic:\s([\w|\.]+)\s[\w\s]+,\s(\d+)\sbp/ && do {
+			# $seq1props{'filename'} = $1;
+			$seq1props{'seqname'} = $1;
+			$seq1props{'length'} = $2;	   
+			$self->genomic_dna_length($seq1props{'length'});
+			next;
+		};
+		# matches: mRNA:
+		/^mRNA:\s([\w|\.]+)\s[\w\s]+,\s(\d+)\sbp/ && do {
+			# $seq2props{'filename'} = $1;
+			$seq2props{'seqname'} = $1;
+			$seq2props{'length'} = $2;
+			next;
+		};
+		/^Strand:/ && do {
+			if (/plus/) {
+				$strand = 1;
+			} else {
+				$strand = -1;
+			}
+			next;
+		};
+		/^Number of exons: (\d+)/ && do {
+			$exoncount = $1;
+			
+			my ($genomic_start, $genomic_stop, $cdna_start, $cdna_stop, $id, $mismatches, $gaps, $splice_donor, $splice_acceptor, $uncertain);
+
+			# the next $exoncount lines contains information about the matches of each exon.  
+			# we should parse this information here
+			for (my $ec = 1; $ec <= $exoncount; $ec++) {
+				if (defined($_ = $self->_readline())) {
+					chomp();
+					
+					if (/^Exon\s$ec[\(\)-]*:\s(\d+)-(\d+)\s\(gen\)\s+(\d+)-(\d+)\s\(mRNA\)\s+id\s([\d\.inf-]+)%\s+mismatches\s(\d+)\s+gaps\s(\d+)\s+splice\ssite\s\(d\s+a\):\s(\d+)\s+(\d+)\s*(\w*)/) {
+						$genomic_start = $1;
+						$genomic_stop = $2;
+						$cdna_start = $3;
+						$cdna_stop = $4;
+						$id = $5;
+						$mismatches = $6;
+						$gaps = $7;
+						$splice_donor = $8;
+						$splice_acceptor = $9;
+						$uncertain = $10;
+					} else {
+						$self->throw( "Failed to match anything:\n$_\n");
+					}
+
+					my $exon = Bio::Tools::Spidey::Exon->new('-start'  => $genomic_start,
+										'-end'    => $genomic_stop,
+										'-strand' => $strand);
+					$exon->seq_id($seq1props{'seqname'});
+
+					# feature1 is supposed to be initialized to a Similarity object, but we provide a safety net
+					if ($exon->feature1()->can('seqlength')) {
+						$exon->feature1()->seqlength($seq1props{'length'});
+					} else {
+						$exon->feature1()->add_tag_value('seqlength', $seq1props{'length'});
+					}
+
+					# create and initialize the feature wrapping the 'hit' (the cDNA)
+					my $fea2 = Bio::SeqFeature::Similarity->new('-start'   => $cdna_start,
+										    '-end'     => $cdna_stop,
+										    '-strand'  => $strand,
+										    '-primary' => "aligning_cDNA");
+					$fea2->seq_id($seq2props{'seqname'});
+					$fea2->seqlength($seq2props{'length'});
+					# store
+					$exon->est_hit($fea2);	   
+
+					# general properties
+					$exon->source_tag($self->analysis_method());
+					$exon->percentage_id($5);
+					$exon->mismatches($6);
+					$exon->gaps($7);
+					$exon->donor($8);
+					$exon->acceptor($9);
+
+					# push onto array
+					push(@exons, $exon);
+				} else {
+					$self->throw("Unexpected end of file reached\n");
+				}
+			}
+			next;
+		};
+		/^Number of splice sites: (\d+)/ && do {
+			$self->splicesites($1);	
+			next;
+		};
+		/^mRNA coverage: (\d+)%/ && do {
+			$self->est_coverage($1);
+			next;
+		};
+		/^overall percent identity: ([\d\.]+)%/ && do {
+			$self->overall_percentage_id($1);
+		};
+		/^Missing mRNA ends: (\w+)/ && do {
+			$self->missing_mrna_ends($1);
+		};
+		# Typical format:
+		# 	Exon 1: 36375798-36375691 (gen)  1-108 (mRNA)
+		#
+		#
+		#	CCTCTTTTTCTTTGCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
+		#        	   |  ||||||||||||||||||||||||||||||||||||||||||||||
+		#	          ATGTCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
+		#	           M  S  G  Y  I  P  S  Y  L  D  K  D  E  L  C  V  V 
+		#
+		#
+		#	ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAGGT
+		#	||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+		#	ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAG
+		#	  C  G  D  K  A  T  G  Y  H  Y  R  C  I  T  C  E  G  C  K 
+		#
+		#
+		#	AAATGGCA
+		#
+		/^Exon (\d+): (\d+)-(\d+) \(gen\)\s+(\d+)-(\d+) \(mRNA\)/ && do {
+			my ($exon_num, $gen_start, $gen_stop, $cdna_start, $cdna_stop);						
+			$exon_num = $1;
+			$gen_start = $2;
+			$gen_stop = $3;
+			$cdna_start = $4;
+			$cdna_stop = $5;			
+		}
+	}
+	return @exons;
+}
+
+=head2 next_exonset
+
+ Title   : next_exonset
+ Usage   : $exonset = $spidey_result->parse_next_exonset;
+           print "Exons start at ", $exonset->start(), 
+                 "and end at ", $exonset->end(), "\n";
+           foreach $exon ($exonset->sub_SeqFeature()) {
+               # do something
+           }
+ Function: Parses the next alignment of the Spidey result file and returns the
+           set of exons as a container of features. The container is itself
+           a Bio::SeqFeature::Generic object, with the Bio::Tools::Spidey::Exon
+           objects as sub features. Start, end, and strand of the container
+           will represent the total region covered by the exons of this set.
+
+           See the documentation of parse_next_alignment() for further
+           reference about parsing and how the information is stored.
+
+ Example : 
+ Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Spidey::Exon
+           objects as sub features.
+ Args    :
+
+=cut
+
+sub next_exonset {
+    my $self = shift;
+    my $exonset;
+
+    # get the next array of exons
+    my @exons = $self->parse_next_alignment();
+    return if($#exons < 0);
+    # create the container of exons as a feature object itself, with the
+    # data of the first exon for initialization
+    $exonset = Bio::SeqFeature::Generic->new('-start' => $exons[0]->start(),
+					     '-end' => $exons[0]->end(),
+					     '-strand' => $exons[0]->strand(),
+					     '-primary' => "ExonSet");
+    $exonset->source_tag($exons[0]->source_tag());
+    $exonset->seq_id($exons[0]->seq_id());
+    # now add all exons as sub features, with enabling EXPANsion of the region
+    # covered in total
+    foreach my $exon (@exons) {
+	$exonset->add_sub_SeqFeature($exon, 'EXPAND');
+    }
+    return $exonset;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($exonset = $spidey->next_feature()) {
+                  # do something
+           }
+ Function: Does the same as L<next_exonset()>. See there for documentation of
+           the functionality. Call this method repeatedly until FALSE is
+           returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_exonset() at present.
+
+ Example :
+ Returns : A Bio::SeqFeature::Generic object.
+ Args    :
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_exonset doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_exonset(@args);
+}
+
+=head2 genomic_dna_length
+
+ Title   : genomic_dna_length
+ Usage   : $spidey->genomic_dna_length();
+ Function: Returns the length of the genomic DNA used in this Spidey result
+ Example :
+ Returns : An integer value.
+ Args    :
+
+=cut
+
+sub genomic_dna_length {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+		$val = shift(@args);
+		$self->{'genomic_dna_length'} = $val;
+	} else {
+		$val = $self->{'genomic_dna_length'};
+	}
+	return $val;
+}
+
+=head2 splicesites
+
+ Title   : splicesites
+ Usage   : $spidey->splicesites();
+ Function: Returns the number of splice sites found in this Spidey result
+ Example :
+ Returns : An integer value.
+ Args    :
+
+=cut
+
+sub splicesites {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+		$val = shift(@args);
+		$self->{'splicesites'} = $val;
+	} else {
+		$val = $self->{'splicesites'};
+	}
+	return $val;
+}
+
+=head2 est_coverage
+
+ Title   : est_coverage
+ Usage   : $spidey->est_coverage();
+ Function: Returns the percent of est coverage in this Spidey result
+ Example :
+ Returns : An integer value.
+ Args    :
+
+=cut
+
+sub est_coverage {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+		$val = shift(@args);
+		$self->{'est_coverage'} = $val;
+	} else {
+		$val = $self->{'est_coverage'};
+	}
+	return $val;
+}
+
+=head2 overall_percentage_id
+
+ Title   : overall_percentage_id
+ Usage   : $spidey->overall_percentage_id();
+ Function: Returns the overall percent id in this Spidey result
+ Example :
+ Returns : An float value.
+ Args    :
+
+=cut
+
+sub overall_percentage_id {
+	my ($self, @args) = @_;
+	my $val;
+    
+	if(@args) {
+		$val = shift(@args);
+		$self->{'overall_percentage_id'} = $val;
+	} else {
+		$val = $self->{'overall_percentage_id'};
+	}
+	return $val;
+}
+
+=head2 missing_mrna_ends
+
+ Title   : missing_mrna_ends
+ Usage   : $spidey->missing_mrna_ends();
+ Function: Returns left/right/neither from Spidey
+ Example :
+ Returns : A string value.
+ Args    :
+
+=cut
+
+sub missing_mrna_ends
+{
+        my ($self, @args) = @_;
+        my $val;
+
+        if(@args) {
+                $val = shift(@args);
+                $self->{'missing_mrna_ends'} = $val;
+        } else {
+                $val = $self->{'missing_mrna_ends'};
+        }
+        return $val;
+
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Spidey/Results.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Tmhmm.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Tmhmm.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Tmhmm.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,136 @@
+# $Id: Tmhmm.pm,v 1.10.4.2 2006/10/19 18:44:40 jason Exp $
+#
+# BioPerl module for Bio::Tools::Tmhmm
+#
+# Original copyright Balamurugan Kumarasamy
+# Re-written cleanly by Torsten Seemann, Sep 2006
+#
+# Copyright:
+# You may distribute this module under the same terms as Perl itself
+
+=head1 NAME
+
+Bio::Tools::Tmhmm - parse TMHMM output (TransMembrane HMM)
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Tmhmm;
+  my $parser = Bio::Tools::Tmhmm->new(-fh => $filehandle );
+  while ( my $tmhmm_feat = $parser->next_result ) {
+     # do something, e.g.
+     push @tmhmm_feat, $tmhmm_feat;
+  }
+
+=head1 DESCRIPTION
+
+TMHMM is software for the prediction of transmembrane helices in proteins.
+See  L<http://www.cbs.dtu.dk/services/TMHMM/> for more details.
+
+This module parses the "long output" format of TMHMM 2.0 and
+creates a Bio:SeqFeature::Generic object for each C<TMHelix> feature found
+from lines like this:
+
+  my_sequence_id  TMHMM2.0  TMhelix     54    76
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Torsten Seemann
+
+Email torsten.seemann AT infotech.monash.edu.au
+
+=head1 CONTRIBUTOR - Bala
+
+Email savikalpa at fugu-sg.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Tmhmm;
+
+use strict;
+
+use Bio::Tools::AnalysisResult;
+use Bio::Root::Root;
+use Bio::Root::IO;
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::Tools::AnalysisResult);
+
+use Bio::SeqFeature::Generic;
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = Bio::Tools::Tmhmm->new();
+ Function: Builds a new Bio::Tools::Tmhmm object
+ Returns : Bio::Tools::Tmhmm
+ Args    : Either of the following as per L<Bio::Root::IO> interface
+             -fh   => $filehandle 
+             -file => $filename
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->_initialize_io(@args);
+  return $self;
+}
+
+
+=head2 next_result
+
+ Title   : next_result
+ Usage   : my $feat = $Tmhmm->next_result
+ Function: Get the next result set from parser data
+ Returns : Bio::SeqFeature::Generic
+ Args    : none
+
+=cut
+
+sub next_result {
+  my $self = shift;
+
+  # # my_sequence_id Length: 178
+  # my_sequence_id  TMHMM2.0  outside      1    53
+  # my_sequence_id  TMHMM2.0  TMhelix     54    76
+  # my_sequence_id  TMHMM2.0  inside      77   115
+
+  while (my $line = $self->_readline) { 
+    if ( $line =~ m/^(\S+)\s+(\S+)\s+(TMhelix)\s+(\d+)\s+(\d+)$/i ) {
+       return Bio::SeqFeature::Generic->new(
+	 -primary => 'transmembrane',
+         -seq_id  => $1,
+	 -source  => $2,
+	 -start   => $4,
+	 -end     => $5,
+       );
+    }
+  }
+}
+
+1;
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/Tmhmm.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/dpAlign.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/dpAlign.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/dpAlign.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,671 @@
+## $Id: dpAlign.pm,v 1.18.4.2 2006/11/17 09:32:42 sendu Exp $
+
+# BioPerl module for Bio::Tools::dpAlign
+#
+# Cared for by Yee Man Chan <ymc at yahoo.com>
+#
+# Copyright Yee Man Chan
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::dpAlign - Perl extension to do pairwise dynamic programming sequence alignment
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::dpAlign;
+  use Bio::SeqIO;
+  use Bio::SimpleAlign;
+  use Bio::AlignIO;
+  use Bio::Matrix::IO;
+
+  $seq1 = new Bio::SeqIO(-file => $ARGV[0], -format => 'fasta');
+  $seq2 = new Bio::SeqIO(-file => $ARGV[1], -format => 'fasta');
+
+  # create a dpAlign object
+  # to do global alignment, specify DPALIGN_GLOBAL_MILLER_MYERS
+  # to do ends-free alignment, specify DPALIGN_ENDSFREE_MILLER_MYERS
+  $factory = new dpAlign(-match => 3,
+                     -mismatch => -1,
+                     -gap => 3,
+                     -ext => 1,
+                     -alg => Bio::Tools::dpAlign::DPALIGN_LOCAL_MILLER_MYERS);
+
+  # actually do the alignment
+  $out = $factory->pairwise_alignment($seq1->next_seq, $seq2->next_seq);
+  $alnout = new Bio::AlignIO(-format => 'pfam', -fh => \*STDOUT);
+  $alnout->write_aln($out);
+
+  # To do protein alignment, set the sequence type to protein
+  # By default all protein alignments are using BLOSUM62 matrix
+  # the gap opening cost is 7 and gap extension is 1. These
+  # values are from ssearch. To use your own custom substitution 
+  # matrix, you can create a Bio::Matrix::MatrixI object.
+
+  $parser = new Bio::Matrix::IO(-format => 'scoring', -file => 'blosum50.mat');
+  $matrix = $parser->next_matrix;
+  $factory = new Bio::Tools::dpAlign(-matrix => $matrix, -alg => Bio::Tools::dpAlign::DPALIGN_LOCAL_MILLERMYERS);
+  $seq1->alphabet('protein');
+  $seq2->alphabet('protein');
+  $out = $factory->pairwise_alignment($seq1->next_seq, $seq2->next_seq);
+  $alnout->write_aln($out);
+
+  # use the factory to make some output
+
+  $factory->align_and_show($seq1, $seq2, STDOUT);
+
+  # use Phil Green's algorithm to calculate the optimal local
+  # alignment score between two sequences quickly. It is very
+  # useful when you are searching a query sequence in a database
+  # of sequences. Since finding a alignment is more costly 
+  # than just calculating scores, you can save time if you only 
+  # align sequences that have a high alignment score.
+
+  # To use this feature, first you call the sequence_profile function
+  # to obtain the profile of the query sequence.
+  $profile = $factory->sequence_profile($query);
+
+  %scores = ();
+  # Then use a loop to run a database of sequences against the
+  # profile to obtain a table of alignment scores
+  $dbseq = Bio::SeqIO(-file => 'dbseq.fa', -format => 'fasta');
+  while (defined($seq = $dbseq->next_seq)) {
+      $scores{$seq->id} = $factory->pairwise_alignment_score($profile, $seq);
+  }
+
+=head1 DESCRIPTION
+
+Dynamic Programming approach is considered to be the most sensitive
+way to align two biological sequences. There are currently three major
+types of dynamic programming algorithms: Global Alignment, Local
+Alignment and Ends-free Alignment.
+
+Global Alignment compares two sequences in their entirety.  By
+inserting gaps in the two sequences, it aligns two sequences to
+minimize the edit distance as defined by the gap cost function and the
+substitution matrix. Global Alignment is generally applied to two
+sequences that are very similar in length and content.
+
+Local Alignment instead attempts to find out the subsequences that has
+the minimal edit distance among all possible subsequences.  It is good
+for sequences that has a stretch of subsequences that are similar to
+each other.
+
+Ends-free Alignment is a special case of Global Alignment. There are
+no gap penalty imposed for the gaps that extended from the end points
+of two sequences. Therefore it will be a good application when you
+think one sequence is contained by the other or when you think two
+sequences overlap each other.
+
+Dynamic Programming was first introduced by Needleman-Wunsch (1970) to
+globally align two sequences. The idea of local alignment was later
+introduced by Smith-Waterman (1981). Gotoh (1982) improved both
+algorithms by introducing auxillary arrays that reduced the time
+complexity of the algorithms to O(m*n).  Miller-Myers (1988) exploits
+the divide-and-conquer idea introduced by Hirschberg (1975) to solve
+the affine gap cost dynamic programming using only linear space. At
+the time of this writing, it is accepted that Miller-Myers is the
+fastest single CPU implementation and using the least memory that is
+truly equivalent to original algorithm introduced by
+Needleman-Wunsch. According to Aaron Mackey, Phil Green's SWAT
+implemention introduced a heuristic that does not consider paths
+throught the matrix where the score would be less than the gap opening
+penalty, yielding a 1.5-2X speedup on most comparisons. to skip the
+calculation of some cells. However, his approach is only good for
+calculating the minimum edit distance and find out the corresponding
+subsequences (aka search phase). Bill Pearson's popular dynamic
+programming alignment program SSEARCH uses Phil Green's algorithm to
+find the subsequences and then Miller-Myers's algorithm to find the
+actual alignment. (aka alignment phase)
+
+The current implementation supports local alignment of either DNA
+sequences or protein sequences. It allows you to specify either the
+Miller-Myers Global Alignment (DPALIGN_GLOBAL_MILLER_MYERS) or
+Miller-Myers Local Alignment (DPALIGN_LOCAL_MILLER_MYERS). For DNA
+alignment, you can specify the scores for match, mismatch, gap opening
+cost and gap extension cost. For protein alignment, it is using
+BLOSUM62 by default. Currently the substitution matrix is not
+configurable.
+
+=head1 DEPENDENCIES
+
+This package comes with the main bioperl distribution. You also need
+to install the lastest bioperl-ext package which contains the XS code
+that implements the algorithms. This package won't work if you haven't
+compiled the bioperl-ext package.
+
+=head1 TO-DO
+
+
+=over 3
+
+=item 1.
+
+Support IUPAC code for DNA sequence
+
+=item 2.
+
+Allow custom substitution matrix for DNA. Note that for proteins, you
+can now use your own subsitution matirx.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+        This implementation was written by Yee Man Chan (ymc at yahoo.com).
+        Copyright (c) 2003 Yee Man Chan. All rights reserved. This program
+        is free software; you can redistribute it and/or modify it under
+        the same terms as Perl itself. Special thanks to Aaron Mackey
+        and WIlliam Pearson for the helpful discussions. [The portion
+        of code inside pgreen subdirectory was borrowed from ssearch. It
+        should be distributed in the same terms as ssearch.]
+
+=cut
+
+package Bio::Tools::dpAlign;
+
+use Bio::SimpleAlign;
+
+use base qw(Bio::Tools::AlignFactory);
+
+# Gotoh algorithm as defined in J. Mol. Biol. (1982) 162, 705-708
+# use constant DSW_GOTOH => 1;
+# Hirschberg's algorithm as defined in Myers & Miller in 
+# CABIOS, Vol 4, No. 1, 1988, p 11-17
+# This algorithm is used in both the search phase and the
+# alignment phase.
+use constant DPALIGN_LOCAL_MILLER_MYERS => 1;
+use constant DPALIGN_GLOBAL_MILLER_MYERS => 2;
+use constant DPALIGN_ENDSFREE_MILLER_MYERS => 3;
+# my toy algorithm that tries to do SW as fast as possible
+# use constant DSW_FSW => 3; 
+# Phil Green's approximation to Smith-Waterman. It avoid calculations
+# that might result in a score less than the opening gap penalty.
+# This is the algorithm used by ssearch. Phil Green's algorithm is
+# used in the search phase while Miller-Myers algorithm is used in
+# the alignment phase
+#use constant DPALIGN_LOCAL_GREEN => 2; 
+
+BEGIN {
+    eval {
+        require Bio::Ext::Align;
+    };
+    if ( $@ ) {
+        die("\nThe C-compiled engine for Smith Waterman alignments (Align) has not been installed.\n Please read the install the bioperl-ext package\n\n");
+        exit(1);
+    }
+}
+
+sub new {
+   my ($class, @args) = @_;
+
+   my $self = $class->SUPER::new(@args);
+
+   my ($match, $mismatch, $gap, $ext, $alg, $matrix) = $self->_rearrange([qw(MATCH
+								MISMATCH
+								GAP
+								EXT
+								ALG
+								MATRIX	
+							)], @args);
+
+   $self->match(3) unless defined $match;
+   $self->mismatch(-1) unless defined $mismatch;
+   $self->gap(3) unless defined $gap;
+   $self->ext(1) unless defined $ext;
+   $self->alg(DPALIGN_LOCAL_MILLER_MYERS) unless defined $alg;
+
+   if (defined $match) {
+	if ($match =~ /^\d+$/) {
+	    $self->match($match);
+	}
+	else {
+	    $self->throw("Match score must be a number, not [$match]");
+	}
+    }
+
+    if (defined $mismatch) {
+	if ($match =~ /^\d+$/) {
+	    $self->mismatch($mismatch);
+	}
+	else {
+	    $self->throw("Mismatch penalty must be a number, not [$mismatch]");
+	}
+    }
+
+    if (defined $gap) {
+	if ($gap =~ /^\d+$/) {
+	    $self->gap($gap);
+	}
+	else {
+	    $self->throw("Gap penalty must be a number, not [$gap]");
+	}
+    }
+
+    if (defined $ext) {
+	if ($ext =~ /^\d+$/) {
+	    $self->ext($ext);
+	}
+	else {
+	    $self->throw("Extension penalty must be a number, not [$ext]");
+	}
+    }
+
+    if (defined $alg) {
+	if ($alg == DPALIGN_LOCAL_MILLER_MYERS or $alg == DPALIGN_GLOBAL_MILLER_MYERS or $alg == DPALIGN_ENDSFREE_MILLER_MYERS) {
+	    $self->alg($alg);
+	}
+	else {
+	    $self->throw("Algorithm must be either 1, 2 or 3");
+	}
+    }
+
+    if (defined $matrix and $matrix->isa('Bio::Matrix::MatrixI')) {
+        $self->{'matrix'} = Bio::Ext::Align::ScoringMatrix->new(join("", $matrix->row_names), $self->gap, $self->ext);
+        foreach $rowname ($matrix->row_names) {
+            foreach $colname ($matrix->column_names) {
+                Bio::Ext::Align::ScoringMatrix->set_entry($self->{'matrix'}, $rowname, $colname, $matrix->entry($rowname, $colname));
+            }
+        }
+    }
+    else {
+        $self->{'matrix'} = 0;
+    }
+
+    return $self;
+}
+
+=head2 sequence_profile
+
+ Title   : sequence_profile
+ Usage   : $prof = $factory->sequence_profile($seq1)
+ Function: Makes a dpAlign_SequenceProfile object from one sequence
+ Returns : A dpAlign_SequenceProfile object
+ Args    : The lone argument is a Bio::PrimarySeqI that we want to 
+	   build a profile for. Usually, this would be the Query sequence
+
+=cut
+
+sub sequence_profile {
+    my ($self, $seq1) = @_;
+
+    if( ! defined $seq1 || ! $seq1->isa('Bio::PrimarySeqI')) {
+        $self->warn("Cannot call sequence_profilewithout specifing one sequence (Bio::PrimarySeqI object)");
+        return;
+    }
+
+    # fix Jitterbug #1044
+    if( $seq1->length() < 2) {
+        $self->warn("cannot create sequence profile with length less than 2");
+        return;
+    }
+    # create engine objects
+    $seq1->display_id('seq1') unless ( defined $seq1->id() );
+
+    if ($seq1->alphabet eq 'dna') {
+	return Bio::Ext::Align::SequenceProfile->dna_new($seq1->seq, $self->{'match'}, $self->{'mismatch'}, $self->{'gap'}, $self->{'ext'});
+    }
+    elsif ($seq1->alphabet eq 'protein') {
+	return Bio::Ext::Align::SequenceProfile->protein_new($seq1->seq, $self->{'matrix'}); 
+    }
+    else {
+	croak("There is currently no support for the types of sequences you want to align!\n");
+	return;
+    }
+}
+
+=head2 pairwise_alignment_score
+
+ Title   : pairwise_alignment_score
+ Usage   : $score = $factory->pairwise_alignment_score($prof,$seq2)
+ Function: Makes a SimpleAlign object from two sequences
+ Returns : An integer that is the score of the optimal alignment.
+ Args    : The first argument is the sequence profile obtained from a
+	   call to the sequence_profile function. The second argument 
+	   is a Bio::PrimarySeqI object to be aligned. The second argument
+	   is usually a sequence in the database sequence. Note
+	   that this function only uses Phil Green's algorithm and 
+	   therefore theoretically may not always give you the optimal
+	   score.
+
+=cut
+
+sub pairwise_alignment_score {
+    my ($self, $prof, $seq2) = @_;
+
+    if( ! defined $prof || ! $prof->isa('Bio::Ext::Align::SequenceProfile') || 
+        ! defined $seq2 || ! $seq2->isa('Bio::PrimarySeqI') ) {
+        $self->warn("Cannot call pairwise_alignment_score without specifing 2 sequences (Bio::PrimarySeqI objects)");
+        return;
+    }
+    # fix Jitterbug #1044
+    if( $seq2->length() < 2) {
+        $self->warn("cannot align sequences with length less than 2");
+        return;
+    }
+    $self->set_memory_and_report();
+    # create engine objects
+    $seq2->display_id('seq2') unless ( defined $seq2->id() );
+
+    if ($prof->alphabet eq 'dna' and $seq2->alphabet eq 'dna') {
+	return Bio::Ext::Align::Score_DNA_Sequences($prof, $seq2->seq);
+    }
+    elsif ($prof->alphabet eq 'protein' and $seq2->alphabet eq 'protein') {
+	return Bio::Ext::Align::Score_Protein_Sequences($prof, $seq2->seq);
+    }
+    else {
+	croak("There is currently no support for the types of sequences you want to align!\n");
+	return;
+    }
+}
+
+=head2 pairwise_alignment
+
+ Title   : pairwise_alignment
+ Usage   : $aln = $factory->pairwise_alignment($seq1,$seq2)
+ Function: Makes a SimpleAlign object from two sequences
+ Returns : A SimpleAlign object if there is an alignment with positive
+	   score. Otherwise, return undef.
+ Args    : The first and second arguments are both Bio::PrimarySeqI
+	   objects that are to be aligned.
+
+=cut
+
+sub pairwise_alignment {
+    my ($self, $seq1, $seq2) = @_;
+    my ($aln, $out);
+
+    if( ! defined $seq1 || ! $seq1->isa('Bio::PrimarySeqI') ||
+        ! defined $seq2 || ! $seq2->isa('Bio::PrimarySeqI') ) {
+        $self->warn("Cannot call pairwise_alignment without specifing 2 sequences (Bio::PrimarySeqI objects)");
+        return;
+    }
+    # fix Jitterbug #1044
+    if( $seq1->length() < 2 ||
+        $seq2->length() < 2 ) {
+        $self->warn("cannot align sequences with length less than 2");
+        return;
+    }
+    $self->set_memory_and_report();
+    # create engine objects
+    $seq1->display_id('seq1') unless ( defined $seq1->id() );
+    $seq2->display_id('seq2') unless ( defined $seq2->id() );
+
+    if ($seq1->alphabet eq 'dna' and $seq2->alphabet eq 'dna') {
+	$aln = Bio::Ext::Align::Align_DNA_Sequences($seq1->seq, $seq2->seq, $self->{'match'}, $self->{'mismatch'}, $self->{'gap'}, $self->{'ext'}, $self->{'alg'});
+    }
+    elsif ($seq1->alphabet eq 'protein' and $seq2->alphabet eq 'protein') {
+	$aln = Bio::Ext::Align::Align_Protein_Sequences($seq1->seq, $seq2->seq, $self->{'matrix'}, $self->{'alg'});
+    }
+    else {
+	croak("There is currently no support for the types of sequences you want to align!\n");
+	return;
+    }
+
+    if (not defined $aln or $aln == 0) {
+	return;
+    }
+
+    $out = Bio::SimpleAlign->new();
+
+    $out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln1,
+					 -start => $aln->start1,
+					 -end => $aln->end1,
+					 -id => $seq1->id));
+    
+    $out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln2,
+					 -start => $aln->start2,
+					 -end => $aln->end2,
+					 -id => $seq2->id));
+    $out->score($aln->score);
+    return $out;
+}
+
+=head2 align_and_show
+
+ Title   : align_and_show
+ Usage   : $factory->align_and_show($seq1,$seq2,STDOUT)
+
+=cut
+
+sub align_and_show {
+    my ($self, $seq1, $seq2, $fh) = @_;
+    my ($aln, $out);
+
+    if (! defined $fh) {
+	$fh = \*STDOUT;
+    }
+    if( ! defined $seq1 || ! $seq1->isa('Bio::PrimarySeqI') ||
+        ! defined $seq2 || ! $seq2->isa('Bio::PrimarySeqI') ) {
+        $self->warn("Cannot call pairwise_alignment without specifing 2 sequences (Bio::PrimarySeqI objects)");
+        return;
+    }
+    # fix Jitterbug #1044
+    if( $seq1->length() < 2 ||
+        $seq2->length() < 2 ) {
+        $self->warn("cannot align sequences with length less than 2");
+        return;
+    }
+    $self->set_memory_and_report();
+    # create engine objects
+    $seq1->display_id('seq1') unless ( defined $seq1->id() );
+    $seq2->display_id('seq2') unless ( defined $seq2->id() );
+
+    if ($seq1->alphabet eq 'dna' and $seq2->alphabet eq 'dna') {
+	$aln = Bio::Ext::Align::Align_DNA_Sequences($seq1->seq, $seq2->seq, $self->{'match'}, $self->{'mismatch'}, $self->{'gap'}, $self->{'ext'}, $self->{'alg'});
+    }
+    elsif ($seq1->alphabet eq 'protein' and $seq2->alphabet eq 'protein') {
+	$aln = Bio::Ext::Align::Align_Protein_Sequences($seq1->seq, $seq2->seq, $self->{'matrix'}, $self->{'alg'});
+    }
+    else {
+	croak("There is currently no support for the types of sequences you want to align!\n");
+    }
+
+    $out = Bio::Ext::Align::AlnBlock->new();
+    my $s1 = Bio::Ext::Align::AlnSequence->new();
+    my $s2 = Bio::Ext::Align::AlnSequence->new();
+    my $a1 = $aln->aln1;
+    my $a2 = $aln->aln2;
+    my $first_col = undef;
+    my $last_col = undef;
+    my $col;
+    my $alu1;
+    my $alu2;
+    my $g1 = 0;
+    my $g2 = 0;
+
+# construct AlnBlock
+    for (my $i = 0; $i < length($a1); ++$i) {
+	$col = Bio::Ext::Align::AlnColumn->new();
+	$alu1 = Bio::Ext::Align::AlnUnit->new();
+	$alu2 = Bio::Ext::Align::AlnUnit->new();
+	$first_col = $col unless defined $first_col;
+	Bio::Ext::Align::AlnColumn::set_next($last_col, $col) if defined $last_col;
+	
+	if (substr($a1, $i, 1) eq "-") {
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu1, "INSERT");
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu2, "SEQUENCE");
+	    ++$g1;
+	}
+	elsif (substr($a2, $i, 1) eq "-") {
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu1, "SEQUENCE");
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu2, "INSERT");
+	    ++$g2;
+	}
+	else {
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu1, "SEQUENCE");
+	    Bio::Ext::Align::AlnUnit::set_text_label($alu2, "SEQUENCE");
+	}
+
+	Bio::Ext::Align::AlnUnit::set_start($alu1, $aln->start1+$i-$g1-2);
+	Bio::Ext::Align::AlnUnit::set_end($alu1, $aln->start1+$i-$g1-2);
+	Bio::Ext::Align::AlnUnit::set_start($alu2, $aln->start2+$i-$g2-2);
+	Bio::Ext::Align::AlnUnit::set_end($alu2, $aln->start2+$i-$g2-2);
+	Bio::Ext::Align::AlnColumn::add_alu($col, $alu1);
+	Bio::Ext::Align::AlnColumn::add_alu($col, $alu2);
+	$last_col = $col;
+    }
+    Bio::Ext::Align::AlnBlock::set_start($out, $first_col);
+    $col = Bio::Ext::Align::AlnColumn->new();
+    $alu1 = Bio::Ext::Align::AlnUnit->new();
+    $alu2 = Bio::Ext::Align::AlnUnit->new();
+    Bio::Ext::Align::AlnUnit::set_start($alu1, $aln->end1);
+    Bio::Ext::Align::AlnUnit::set_end($alu1, $aln->end1);
+    Bio::Ext::Align::AlnUnit::set_text_label($alu1, "END");
+    Bio::Ext::Align::AlnUnit::set_start($alu2, $aln->end2);
+    Bio::Ext::Align::AlnUnit::set_end($alu2, $aln->end2);
+    Bio::Ext::Align::AlnUnit::set_text_label($alu2, "END");
+    Bio::Ext::Align::AlnColumn::add_alu($col, $alu1);
+    Bio::Ext::Align::AlnColumn::add_alu($col, $alu2);
+    Bio::Ext::Align::AlnColumn::set_next($last_col, $col);
+
+    &Bio::Ext::Align::write_pretty_str_align($out,$seq1->id,$seq1->seq,$seq2->id,$seq2->seq,12,50,$fh);
+}
+
+=head2 match
+
+ Title     : match 
+ Usage     : $match = $factory->match() #get
+           : $factory->match($value) #set
+ Function  : the set get for the match score
+ Example   :
+ Returns   : match value
+ Arguments : new value
+
+=cut
+
+sub match {
+    my ($self,$val) = @_;
+
+
+    if( defined $val ) {
+        if( $val < 0 ) {    # Fixed so that match==0 is allowed /AE
+            $self->throw("Can't have a match score less than 0");
+        }
+        $self->{'match'} = $val;
+    }
+    return $self->{'match'};
+}
+
+=head2 mismatch
+
+ Title     : mismatch 
+ Usage     : $mismatch = $factory->mismatch() #get
+           : $factory->mismatch($value) #set
+ Function  : the set get for the mismatch penalty
+ Example   :
+ Returns   : mismatch value
+ Arguments : new value
+
+=cut
+
+sub mismatch {
+    my ($self,$val) = @_;
+
+
+    if( defined $val ) {
+        if( $val > 0 ) {    # Fixed so that mismatch==0 is allowed /AE
+            $self->throw("Can't have a mismatch penalty greater than 0");
+        }
+        $self->{'mismatch'} = $val;
+    }
+    return $self->{'mismatch'};
+}
+
+
+=head2 gap
+
+ Title     : gap
+ Usage     : $gap = $factory->gap() #get
+           : $factory->gap($value) #set
+ Function  : the set get for the gap penalty
+ Example   :
+ Returns   : gap value
+ Arguments : new value
+
+=cut
+
+sub gap {
+    my ($self,$val) = @_;
+
+
+    if( defined $val ) {
+        if( $val < 0 ) {    # Fixed so that gap==0 is allowed /AE
+            $self->throw("Can't have a gap penalty less than 0");
+        }
+        $self->{'gap'} = $val;
+    }
+    return $self->{'gap'};
+}
+
+=head2 ext
+
+ Title     : ext
+ Usage     : $ext = $factory->ext() #get
+           : $factory->ext($value) #set
+ Function  : the set get for the ext penalty
+ Example   :
+ Returns   : ext value
+ Arguments : new value
+
+=cut
+
+sub ext {
+    my ($self,$val) = @_;
+
+    if( defined $val ) {
+        if( $val < 0 ) {    # Fixed so that ext==0 is allowed /AE
+            $self->throw("Can't have a extension penalty less than 0");
+        }
+        $self->{'ext'} = $val;
+    }
+    return $self->{'ext'};
+}
+
+=head2 alg
+
+ Title     : alg
+ Usage     : $alg = $factory->alg() #get
+           : $factory->alg($value) #set
+ Function  : the set get for the algorithm
+ Example   :
+ Returns   : alg value
+ Arguments : new value
+
+=cut
+
+sub alg {
+    my ($self,$val) = @_;
+
+    if( defined $val ) {
+        if( $val != DPALIGN_LOCAL_MILLER_MYERS and $val != DPALIGN_GLOBAL_MILLER_MYERS and $val != DPALIGN_ENDSFREE_MILLER_MYERS) {    
+            $self->throw("Can't have an algorithm that is not 1, 2 or 3");
+        }
+        $self->{'alg'} = $val;
+    }
+    return $self->{'alg'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ipcress.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ipcress.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/ipcress.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,278 @@
+# $Id: ipcress.pm,v 1.8.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::ipcress
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# Copyright Sheldon McKay
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::ipcress - Parse ipcress output and make features
+
+=head1 SYNOPSIS
+
+    # A simple annotation pipeline wrapper for ipcress data
+    # assuming ipcress data is already generated in file seq1.ipcress
+    # and sequence data is in fasta format in file called seq1.fa
+
+    use Bio::Tools::ipcress;
+    use Bio::SeqIO;
+    my $parser = new Bio::Tools::ipcress(-file => 'seq1.ipcress');
+    my $seqio = new Bio::SeqIO(-format => 'fasta', -file => 'seq1.fa');
+    my $seq = $seqio->next_seq || die("cannot get a seq object from SeqIO");
+
+    while( my $feat = $parser->next_feature ) {
+	# add ipcress annotation to a sequence
+	$seq->add_SeqFeature($feat);
+    }
+    my $seqout = new Bio::SeqIO(-format => 'embl');
+    $seqout->write_seq($seq);
+
+
+=head1 DESCRIPTION
+
+This object serves as a parser for ipcress data, creating a
+Bio::SeqFeatureI for each ipcress hit.  These can be processed or added
+as annotation to an existing Bio::SeqI object for the purposes of
+automated annotation.
+
+This module is adapted from the Bio::Tools::EPCR module
+written by Jason Stajich (jason-at-bioperl.org).
+
+Ipcress is available through Guy Slater's Exonerate package 
+http://www.ebi.ac.uk/~guy/exonerate/
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::ipcress;
+use strict;
+
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $ipcress = new Bio::Tools::ipcress(-file => $file,
+					   -primary => $fprimary, 
+					   -source => $fsource, 
+					   -groupclass => $fgroupclass);
+ Function: Initializes a new ipcress parser
+ Returns : Bio::Tools::ipcress
+ Args    : -fh   => filehandle
+           OR
+           -file => filename
+
+           -primary => a string to be used as the common value for
+                       each features '-primary' tag.  Defaults to
+                       the sequence ontology term 'PCR_product'.
+                       (This in turn maps to the GFF 'type'
+                       tag (aka 'method')).
+
+            -source => a string to be used as the common value for
+                       each features '-source' tag.  Defaults to
+                       'ipcress'. (This in turn maps to the GFF 'source'
+                       tag)
+
+             -groupclass => a string to be used as the name of the tag
+                           which will hold the sts marker namefirst
+                           attribute.  Defaults to 'name'.
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($primary, $source, 
+      $groupclass, $file, $fh) = $self->_rearrange([qw(PRIMARY
+						       SOURCE 
+						       GROUPCLASS
+						       FILE FH)], at args);
+  $self->primary(defined $primary ? $primary : 'PCR_product');
+  $self->source(defined $source ? $source : 'ipcress');
+  $self->groupclass(defined $groupclass ? $groupclass : 'name');
+
+  local $/ = 'Ipcress result';
+  my @result;
+
+  if ($file) {
+      open FH, $file;
+      @result = (<FH>);
+      close FH;
+  }
+  elsif ($fh) {
+      @result = (<$fh>);
+  }
+  else {
+      $self->throw("Bio::Tools::ipcress: no input file");
+  }
+
+
+  shift @result;
+
+  $self->{result} = \@result;
+
+  return $self;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none    
+
+=cut
+
+sub next_feature {
+    my ($self) = @_;
+    my $result = shift @{$self->{result}};
+    return unless defined($result);
+
+    chomp $result;
+    my @lines = split "\n", $result;
+    my ($ipcress) = grep /ipcress: /, @lines;
+
+    my (undef,$seqname,$mkrname,$length,undef,$start,$mismatchL,
+       undef,undef,$mismatchR,$desc) = split /\s+/, $ipcress;
+ 
+    my $end = $start + $length;
+    $start += 1;
+
+    my $strand = $desc eq 'forward' ? '+' : $desc eq 'revcomp' ? '-' : 0;
+    
+    my ($left)  = grep /\# forward/, @lines;
+    $left  =~ s/[^A-Z]+//g;
+    my ($right) = grep /\# revcomp/, @lines;
+    $right =~ s/[^A-Z]+//g;
+    $right = reverse $right;
+
+    # if there are multiple hits, increment the name for
+    # the groupclass
+    if (++$self->{seen}->{$mkrname} > 1) {
+        $mkrname .= "\.$self->{seen}->{$mkrname}";
+    }
+
+
+    my $markerfeature = new Bio::SeqFeature::Generic 
+	( '-start'   => $start,
+	  '-end'     => $end,
+	  '-strand'  => $strand,
+	  '-source'  => $self->source,
+	  '-primary' => $self->primary,
+	  '-seq_id'  => $seqname,
+	  '-tag'     => {
+	      $self->groupclass => $mkrname,
+	  });
+
+    if (!$strand) {
+	$markerfeature->add_tag_value('Note' => "bad product: single primer amplification");
+    }
+
+    $markerfeature->add_tag_value('left_primer' => $left);
+    $markerfeature->add_tag_value('right_primer' => $right);
+    $markerfeature->add_tag_value('left_mismatches' => $mismatchL) if $mismatchL;
+    $markerfeature->add_tag_value('right_mismatches' => $mismatchR) if $mismatchR;
+
+    return $markerfeature;
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $obj->source($newval)
+ Function: 
+ Example : 
+ Returns : value of source (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source{
+    my $self = shift;
+    return $self->{'_source'} = shift if @_;
+    return $self->{'_source'};
+}
+
+=head2 primary
+
+ Title   : primary
+ Usage   : $obj->primary($newval)
+ Function: 
+ Example : 
+ Returns : value of primary (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub primary{
+    my $self = shift;
+    return $self->{'_primary'} = shift if @_;
+    return $self->{'_primary'};
+}
+
+=head2 groupclass
+
+ Title   : groupclass
+ Usage   : $obj->groupclass($newval)
+ Function: 
+ Example : 
+ Returns : value of groupclass (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub groupclass{
+    my $self = shift;
+
+    return $self->{'_groupclass'} = shift if @_;
+    return $self->{'_groupclass'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/isPcr.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/isPcr.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/isPcr.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,246 @@
+# $Id: isPcr.pm,v 1.9.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::isPcr
+#
+# Cared for by Sheldon McKay <mckays at cshl.edu>
+#
+# Copyright Sheldon McKay
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::isPcr - Parse isPcr output and make features
+
+=head1 SYNOPSIS
+
+    # A simple annotation pipeline wrapper for isPcr data
+    # assuming isPcr data is already generated in file seq1.isPcr
+    # and sequence data is in fasta format in file called seq1.fa
+
+    # Note: this parser is meant for the default fasta output from
+    # isPcr.  bed and psl output formats are not supported.
+
+    use Bio::Tools::IsPcr;
+    use Bio::SeqIO;
+    my $parser = new Bio::Tools::isPcr(-file => 'seq1.isPcr');
+    my $seqio = new Bio::SeqIO(-format => 'fasta', -file => 'seq1.fa');
+    my $seq = $seqio->next_seq || die("cannot get a seq object from SeqIO");
+
+    while( my $feat = $parser->next_feature ) {
+	# add isPcr annotation to a sequence
+	$seq->add_SeqFeature($feat);
+    }
+    my $seqout = new Bio::SeqIO(-format => 'embl');
+    $seqout->write_seq($seq);
+
+
+=head1 DESCRIPTION
+
+This object serves as a parser for isPcr data (in the default fasta
+format), creating a Bio::SeqFeatureI for each isPcr hit.  
+These can be processed or added as annotation to an existing
+Bio::SeqI object for the purposes of automated annotation.
+
+This module is adapted from the Bio::Tools::EPCR module
+written by Jason Stajich (jason-at-bioperl.org).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sheldon McKay
+
+Email mckays at cshl.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::isPcr;
+use strict;
+
+use Bio::SeqIO;
+use Bio::SeqFeature::Generic;
+
+
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $ispcr = new Bio::Tools::isPcr( -file => $file,
+					      -primary => $fprimary, 
+					      -source => $fsource,
+					      -groupclass => $fgroupclass);
+
+ Function: Initializes a new isPcr parser
+ Returns : Bio::Tools::isPcr
+ Args    : -fh   => filehandle
+           OR
+           -file => filename
+
+           -primary => a string to be used as the common value for
+                       each features '-primary' tag.  Defaults to
+                       the sequence ontology term 'PCR_product'.  
+                       (This in turn maps to the GFF 'type'
+                       tag (aka 'method')).
+
+            -source => a string to be used as the common value for
+                       each features '-source' tag.  Defaults to
+                       'isPcr'. (This in turn maps to the GFF 'source'
+                       tag)
+
+            -groupclass => a string to be used as the name of the tag
+                           which will hold the sts marker namefirst
+                           attribute.  Defaults to 'name'.
+
+=cut
+
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($primary,$source,$groupclass) = 
+      $self->_rearrange([qw/PRIMARY SOURCE GROUPCLASS/], at args);
+  $self->primary(defined $primary ? $primary : 'PCR_product');
+  $self->source(defined $source ? $source : 'isPcr');
+  $self->groupclass(defined $groupclass ? $groupclass : 'name');
+
+  # default output for isPcr is fasta format
+  $self->{io} = Bio::SeqIO->new(-format => 'fasta', @args);
+
+  return $self;
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : $seqfeature = $obj->next_feature();
+ Function: Returns the next feature available in the analysis result, or
+           undef if there are no more features.
+ Example :
+ Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
+           more features.
+ Args    : none    
+
+=cut
+
+sub next_feature {
+    my ($self) = @_;
+    my $result = $self->{io}->next_seq;
+    return unless defined $result;
+    
+    my ($seqname,$location)    = split ':', $result->primary_id;
+    my ($pcrname,$left,$right) = split /\s+/, $result->desc;
+    my ($start,$strand,$end)   = $location =~ /^(\d+)([-+])(\d+)$/;
+    my $amplicon = $result->seq;
+
+    # if there are multiple hits, increment the name for
+    # the groupclass
+    if (++$self->{seen}->{$pcrname} > 1) {
+	$pcrname .= "\.$self->{seen}->{$pcrname}";
+    }
+
+    my $tags = {
+	$self->groupclass => $pcrname,
+	amplicon          => $amplicon,
+	left_primer       => $left,
+	right_primer       => $right
+	};
+
+    my $markerfeature = Bio::SeqFeature::Generic->new( 
+						       '-start'   => $start,
+						       '-end'     => $end,
+						       '-strand'  => $strand,
+						       '-source'  => $self->source,
+						       '-primary' => $self->primary,
+						       '-seq_id'  => $seqname,
+						       '-tag'     => $tags
+						       );
+
+    return $markerfeature;
+}
+
+=head2 source
+
+ Title   : source
+ Usage   : $obj->source($newval)
+ Function: 
+ Example : 
+ Returns : value of source (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source{
+    my $self = shift;
+    return $self->{'_source'} = shift if @_;
+    return $self->{'_source'};
+}
+
+=head2 primary
+
+ Title   : primary
+ Usage   : $obj->primary($newval)
+ Function: 
+ Example : 
+ Returns : value of primary (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub primary{
+    my $self = shift;
+    return $self->{'_primary'} = shift if @_;
+    return $self->{'_primary'};
+}
+
+=head2 groupclass
+
+ Title   : groupclass
+ Usage   : $obj->groupclass($newval)
+ Function: 
+ Example : 
+ Returns : value of groupclass (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub groupclass{
+    my $self = shift;
+
+    return $self->{'_groupclass'} = shift if @_;
+    return $self->{'_groupclass'};
+}
+
+1;


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/isPcr.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pICalculator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pICalculator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pICalculator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,348 @@
+# $Id: pICalculator.pm,v 1.9.4.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::pICalculator
+#
+# Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
+#
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::pICalculator - calculate the isoelectric point of a protein
+
+=head1 DESCRIPTION
+
+Calculates the isoelectric point of a protein, the pH at which there
+is no overall charge on the protein. Calculates the charge on a protein
+at a given pH. Can use built-in sets of pK values or custom pK sets.
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::pICalculator;
+  use Bio::SeqIO;
+
+  my $in = Bio::SeqIO->new( -fh => \*STDIN ,
+                            -format => 'Fasta' );
+
+  my $calc = Bio::Tools::pICalculator->new(-places => 2,
+                                           -pKset => 'EMBOSS');
+
+  while ( my $seq = $in->next_seq ) {
+     $calc->seq($seq);
+     my $iep = $calc->iep;
+     print sprintf( "%s\t%s\t%.2f\n",
+                    $seq->id,
+                    $iep,
+                    $calc->charge_at_pH($iep) );
+
+     for( my $i = 0; $i <= 14; $i += 0.5 ){
+        print sprintf( "pH = %.2f\tCharge = %.2f\n",
+                       $i,
+                       $calc->charge_at_pH($i) );
+     }
+  }
+
+=head1 SEE ALSO
+
+http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
+http://emboss.sourceforge.net/apps/cvs/iep.html
+http://us.expasy.org/tools/pi_tool.html
+
+=head1 LIMITATIONS
+
+There are various sources for the pK values of the amino acids. 
+The set of pK values chosen will affect the pI reported.
+
+The charge state of each residue is assumed to be independent of 
+the others. Protein modifications (such as a phosphate group) that 
+have a charge are ignored.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this
+and other Bioperl modules. Send your comments and suggestions 
+preferably to one of the Bioperl mailing lists.
+Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the 
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Mark Southern (mark_southern at merck.com). From an algorithm by David 
+Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
+Modification for Bioperl, additional documentation by Brian Osborne.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
+free software. It may be used, redistributed and/or modified under the terms
+of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Private methods are usually preceded by a _.
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::pICalculator;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+# pK values from the DTASelect program from Scripps
+# http://fields.scripps.edu/DTASelect
+my $DTASelect_pK = {  N_term   =>  8.0,
+                      K        => 10.0, # Lys
+                      R        => 12.0, # Arg
+                      H        =>  6.5, # His
+                      D        =>  4.4, # Asp
+                      E        =>  4.4, # Glu
+                      C        =>  8.5, # Cys
+                      Y        => 10.0, # Tyr
+                      C_term   =>  3.1
+                    };
+
+# pK values from the iep program from EMBOSS
+# http://emboss.sourceforge.net/apps/cvs/iep.html
+my $Emboss_pK  = { N_term   =>  8.6,
+                   K        => 10.8, # Lys
+                   R        => 12.5, # Arg
+                   H        =>  6.5, # His
+                   D        =>  3.9, # Asp
+                   E        =>  4.1, # Glu
+                   C        =>  8.5, # Cys
+                   Y        => 10.1, # Tyr
+                   C_term   =>  3.6
+                 };
+
+=head2 desc
+
+ Title   : new
+ Usage   : Bio::Tools::pICalculator->new
+ Function: Instantiates the Bio::Tools::pICalculator object
+ Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
+                                                  # a Bio::Seq object
+                                                  -seq => $seq,
+                                                  -places => 2 );
+           or:
+
+           $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
+                                                  # a Bio::Seq object
+                                                  -seq => $seq,
+                                                  -places => 1 );
+
+           Constructs a new pICalculator. Arguments are a flattened hash.
+           Valid, optional keys are:
+
+           pKset - A reference to a hash with key value pairs for the 
+                   pK values of the charged amino acids. Required keys
+                   are:
+
+                   N_term   C_term   K   R   H   D   E   C   Y
+
+           pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will 
+                   specify an internal set of pK values to be used. The 
+                   default is 'EMBOSS'
+
+           seq - A Bio::Seq sequence object to analyze
+
+           places - The number of decimal places to use in the
+                    isoelectric point calculation. The default is 2.
+
+ Returns : The description
+ Args    : The description or none
+
+=cut
+
+sub new {
+   my( $class, %opts ) = @_;
+   my $self = $class->SUPER::new(%opts);
+   $self = bless {}, ref $self || $self;
+   $self->seq( $opts{-seq} ) if exists $opts{-seq};
+   $self->pKset( $opts{-pKset} || 'EMBOSS' );
+   exists $opts{-places} ? $self->places( $opts{-places} ) :
+     $self->places(2);
+   return $self;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $calc->seq($seqobj)
+ Function: Sets or returns the Bio::Seq used in the calculation
+ Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
+           $calc = Bio::Tools::pICalculator->new;
+           $calc->seq($seqobj);
+ Returns : Bio::Seq object
+ Args    : Bio::Seq object or none
+
+=cut
+
+sub seq {
+   my( $this, $seq ) = @_;
+   unless( defined $seq && UNIVERSAL::isa($seq,'Bio::Seq') ){
+      $this->throw("$seq is not a valid Bio::Seq object");
+   }
+   $this->{-seq} = $seq;
+   $this->{-count} = count_charged_residues( $seq );
+   return $this->{-seq};
+}
+
+=head2 pKset
+
+ Title   : pKset
+ Usage   : $pkSet = $calc->pKSet(\%pKSet)
+ Function: Sets or returns the hash of pK values used in the calculation
+ Example : $calc->pKset('emboss')
+ Returns : reference to pKset hash
+ Args    : The reference to a pKset hash, a string, or none. Examples:
+
+           pKset - A reference to a hash with key value pairs for the
+                   pK values of the charged amino acids. Required keys
+                   are:
+
+                   N_term   C_term   K   R   H   D   E   C   Y
+
+           pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will 
+                   specify an internal set of pK values to be used. The 
+                   default is 'EMBOSS'
+
+=cut
+
+sub pKset {
+   my ( $this, $pKset ) = @_;
+   if( ref $pKset eq 'HASH' ){         # user defined pK values
+      $this->{-pKset} = $pKset;
+   }elsif( $pKset =~ /^emboss$/i ){    # from EMBOSS's iep program
+      $this->{-pKset} = $Emboss_pK;
+   }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
+      $this->{-pKset} = $DTASelect_pK;
+   }else{                              # default to EMBOSS
+      $this->{-pKset} = $Emboss_pK;
+   }
+   return $this->{-pKset};
+}
+
+sub places {
+   my $this = shift;
+   $this->{-places} = shift if @_;
+   return $this->{-places};
+}
+
+=head2 iep
+
+ Title   : iep
+ Usage   : $calc->iep
+ Function: Returns the isoelectric point
+ Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
+           $calc->seq($seqobj);
+           $iep = $calc->iep;
+ Returns : The isoelectric point of the sequence in the Bio::Seq object
+ Args    : None
+
+=cut
+
+sub iep {
+   my $this = shift;
+   return _calculate_iep($this->{-pKset},
+	    		 $this->{-places},
+			 $this->{-seq},
+			 $this->{-count}
+		        );
+}
+
+=head2 charge_at_pH
+
+ Title   : charge_at_pH
+ Usage   : $charge = $calc->charge_at_pH($pH)
+ Function: Sets or gets the description of the sequence
+ Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
+           $calc->seq($seqobj);
+           $charge = $calc->charge_at_ph("7");
+ Returns : The predicted charge at the given pH
+ Args    : pH
+
+=cut
+
+sub charge_at_pH {
+   my $this = shift;
+   return _calculate_charge_at_pH( shift, $this->{-pKset},
+				  $this->{-count} );
+}
+
+sub count_charged_residues {
+   my $seq = shift;
+   my $sequence = $seq->seq;
+   my $count;
+   for ( qw( K R H D E C Y ) ){ # charged AA's
+      $count->{$_}++ while $sequence =~ /$_/ig;
+   }
+   return $count;
+}
+
+sub _calculate_iep {
+    my( $pK, $places, $seq, $count ) = @_;
+    my $pH = 7.0;
+    my $step = 3.5;
+    my $last_charge = 0.0;
+    my $format = "%.${places}f";
+
+    unless( defined $count ){
+       $count = count_charged_residues($seq);
+    }
+    while(1){
+       my $charge = _calculate_charge_at_pH( $pH, $pK, $count );
+       last if sprintf($format,$charge) == 
+	 sprintf($format,$last_charge);
+       $charge > 0 ? ( $pH += $step ) : ( $pH -= $step );
+       $step /= 2.0;
+       $last_charge = $charge;
+    }
+    return sprintf( $format, $pH );
+}
+
+# it's the sum of all the partial charges for the
+# termini and all of the charged aa's!
+sub _calculate_charge_at_pH {
+   no warnings; # don't complain if a given key doesn't exist
+   my( $pH, $pK, $count ) = @_;
+   my $charge =    _partial_charge( $pK->{N_term}, $pH )
+   + $count->{K} * _partial_charge( $pK->{K},      $pH )
+   + $count->{R} * _partial_charge( $pK->{R},      $pH )
+   + $count->{H} * _partial_charge( $pK->{H},      $pH )
+   - $count->{D} * _partial_charge( $pH,      $pK->{D} )
+   - $count->{E} * _partial_charge( $pH,      $pK->{E} )
+   - $count->{C} * _partial_charge( $pH,      $pK->{C} )
+   - $count->{Y} * _partial_charge( $pH,      $pK->{Y} )
+   -               _partial_charge( $pH, $pK->{C_term} );
+   return $charge;
+}
+
+# Concentration Ratio is 10**(pK - pH) for positive groups
+# and 10**(pH - pK) for negative groups
+sub _partial_charge {
+   my $cr = 10 ** ( $_[0] - $_[1] );
+   return $cr / ( $cr + 1 );
+}
+
+1;
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pICalculator.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pSW.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pSW.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/pSW.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,417 @@
+## $Id: pSW.pm,v 1.27.4.1 2006/10/02 23:10:32 sendu Exp $
+
+#
+# BioPerl module for Bio::Tools::pSW
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::pSW - pairwise Smith Waterman object
+
+=head1 SYNOPSIS
+
+    use Bio::Tools::pSW;
+    use Bio::AlignIO;
+    my $factory = new Bio::Tools::pSW( '-matrix' => 'blosum62.bla',
+				       '-gap' => 12,
+				       '-ext' => 2,
+				       );
+
+    #use the factory to make some output
+
+    $factory->align_and_show($seq1,$seq2,STDOUT);
+
+    # make a Bio::SimpleAlign and do something with it
+
+    my $aln = $factory->pairwise_alignment($seq1,$seq2);
+    my $alnout = new Bio::AlignIO(-format => 'msf',
+				  -fh     => \*STDOUT);
+
+    $alnout->write_aln($aln);
+
+=head1 INSTALLATION
+
+This module is included with the central Bioperl distribution:
+
+   http://bio.perl.org/Core/Latest
+   ftp://bio.perl.org/pub/DIST
+
+Follow the installation instructions included in the INSTALL file.
+
+=head1 DESCRIPTION
+
+pSW is an Alignment Factory for protein sequences. It builds pairwise
+alignments using the Smith-Waterman algorithm. The alignment algorithm is
+implemented in C and added in using an XS extension. The XS extension basically
+comes from the Wise2 package, but has been slimmed down to only be the
+alignment part of that (this is a good thing!). The XS extension comes
+from the bioperl-ext package which is distributed along with bioperl.
+I<Warning:> This package will not work if you have not compiled the
+bioperl-ext package.
+
+The mixture of C and Perl is ideal for this sort of 
+problem. Here are some plus points for this strategy: 
+
+=over 2
+
+=item Speed and Memory 
+
+The algorithm is actually implemented in C, which means it is faster than
+a pure perl implementation (I have never done one, so I have no idea
+how faster) and will use considerably less memory, as it efficiently
+assigns memory for the calculation.
+
+=item Algorithm efficiency
+
+The algorithm was written using Dynamite, and so contains an automatic
+switch to the linear space divide-and-conquer method. This means you
+could effectively align very large sequences without killing your machine
+(it could take a while though!).
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules.  Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/           
+
+=head1 AUTHOR
+
+Ewan Birney, birney-at-sanger.ac.uk or birney-at-ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich, jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with an underscore "_".
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::pSW;
+use strict;
+no strict ( 'refs');
+
+BEGIN {
+    eval {
+	require Bio::Ext::Align;
+    };
+    if ( $@ ) {
+	die("\nThe C-compiled engine for Smith Waterman alignments (Bio::Ext::Align) has not been installed.\n Please read the install the bioperl-ext package\n\n");
+	exit(1);
+    }
+}
+
+use Bio::SimpleAlign;
+
+
+use base qw(Bio::Tools::AlignFactory);
+
+
+
+sub new {
+  my($class, at args) = @_;
+  
+  my $self = $class->SUPER::new(@args);
+
+  my($matrix,$gap,$ext) = $self->_rearrange([qw(MATRIX
+						GAP
+						EXT
+						)], at args);
+  
+  #default values - we have to load matrix into memory, so 
+  # we need to check it out now
+  if( ! defined $matrix || !($matrix =~ /\w/) ) {
+      $matrix = 'blosum62.bla';
+  }
+
+  $self->matrix($matrix); # will throw exception if it can't load it
+  $self->gap(12) unless defined $gap;
+  $self->ext(2) unless defined $ext;
+
+  # I'm pretty sure I am not doing this right... ho hum...
+  # This was not roght ($gap and $ext could not be 0) It is fixed now /AE
+  if(  defined $gap ) {
+      if( $gap =~ /^\d+$/ ) {
+	  $self->gap($gap);
+      } else {
+	  $self->throw("Gap penalty must be a number, not [$gap]");
+      }
+  }
+  if( defined $ext ) {
+      if( $ext =~ /^\d+$/ )  {
+	  $self->ext($ext);
+      } else {
+	  $self->throw("Extension penalty must be a number, not [$ext]");
+      }
+  }
+ 
+  return $self; 
+}
+
+
+=head2 pairwise_alignment
+
+ Title   : pairwise_alignment
+ Usage   : $aln = $factory->pairwise_alignment($seq1,$seq2)
+ Function: Makes a SimpleAlign object from two sequences
+ Returns : A SimpleAlign object
+ Args    :
+
+
+=cut
+
+sub pairwise_alignment{
+    my ($self,$seq1,$seq2) = @_;
+    my($t1,$t2,$aln,$out, at str1, at str2, at ostr1, at ostr2,$alc,$tstr,$tid,$start1,$end1,$start2,$end2,$alctemp);
+    
+    if( ! defined $seq1 || ! $seq1->isa('Bio::PrimarySeqI') ||
+	! defined $seq2 || ! $seq2->isa('Bio::PrimarySeqI') ) {
+	$self->warn("Cannot call pairwise_alignment without specifing 2 sequences (Bio::PrimarySeqI objects)");
+	return;
+    }
+    # fix Jitterbug #1044
+    if( $seq1->length() < 2 || 
+	$seq2->length() < 2 ) {
+	$self->warn("cannot align sequences with length less than 2");
+	return;
+    }
+    $self->set_memory_and_report();
+    # create engine objects 
+    $seq1->display_id('seq1') unless ( defined $seq1->id() );
+    $seq2->display_id('seq2') unless ( defined $seq2->id() );
+
+    $t1  = &Bio::Ext::Align::new_Sequence_from_strings($seq1->id(),
+						       $seq1->seq());
+    $t2  = &Bio::Ext::Align::new_Sequence_from_strings($seq2->id(),
+						       $seq2->seq());
+    $aln = &Bio::Ext::Align::Align_Sequences_ProteinSmithWaterman($t1,$t2,$self->{'matrix'},-$self->gap,-$self->ext);
+    if( ! defined $aln || $aln == 0 ) {
+	$self->throw("Unable to build an alignment");
+    }
+
+    # free sequence engine objects
+
+    $t1 = $t2 = 0;
+
+    # now we have to get into the AlnBlock structure and
+    # figure out what is aligned to what...
+
+    # we are going to need the sequences as arrays for convience
+
+    @str1 = split(//, $seq1->seq());
+    @str2 = split(//, $seq2->seq());
+
+    # get out start points
+
+    # The alignment is in alignment coordinates - ie the first
+    # residues starts at -1 and ends at 0. (weird I know).
+    # bio-coordinates are +2 from this...
+
+    $start1 = $aln->start()->alu(0)->start +2;
+    $start2 = $aln->start()->alu(1)->start +2;
+
+    # step along the linked list of alc units...
+
+    for($alc = $aln->start();$alc->at_end() != 1;$alc = $alc->next()) {
+	if( $alc->alu(0)->text_label eq 'SEQUENCE' ) {
+	    push(@ostr1,$str1[$alc->alu(0)->start+1]);
+	} else {
+	    # assumme it is in insert!
+	    push(@ostr1,'-');
+	}
+
+	if( $alc->alu(1)->text_label eq 'SEQUENCE' ) {
+	    push(@ostr2,$str2[$alc->alu(1)->start+1]);
+	} else {
+	    # assumme it is in insert!
+	    push(@ostr2,'-');
+	}
+	$alctemp = $alc;
+    }
+
+    #
+    # get out end points
+    #
+
+    # end points = real residue end in 'C' coordinates = residue
+    # end in biocoordinates. Oh... the wonder of coordinate systems!
+
+    $end1 = $alctemp->alu(0)->end+1;
+    $end2 = $alctemp->alu(1)->end+1;
+
+    # get rid of the alnblock 
+    $alc = 0;
+    $aln = 0;
+
+    # new SimpleAlignment
+    $out = Bio::SimpleAlign->new(); # new SimpleAlignment
+
+    $tstr = join('', at ostr1);
+    $tid = $seq1->id();
+    $out->add_seq(Bio::LocatableSeq->new( -seq=> $tstr,
+					 -start => $start1,
+					 -end   => $end1,
+					 -id=>$tid ));
+
+    $tstr = join('', at ostr2);
+    $tid = $seq2->id();
+    $out->add_seq(Bio::LocatableSeq->new( -seq=> $tstr,
+					 -start => $start2,
+					 -end => $end2,
+					 -id=> $tid ));
+
+    # give'm back the alignment
+
+    return $out;
+}
+
+=head2 align_and_show
+
+ Title   : align_and_show
+ Usage   : $factory->align_and_show($seq1,$seq2,STDOUT)
+
+=cut
+
+sub align_and_show {
+    my($self,$seq1,$seq2,$fh) = @_;
+    my($t1,$t2,$aln,$id,$str);
+
+if( ! defined $seq1 || ! $seq1->isa('Bio::PrimarySeqI') ||
+	! defined $seq2 || ! $seq2->isa('Bio::PrimarySeqI') ) {
+	$self->warn("Cannot call align_and_show without specifing 2 sequences (Bio::PrimarySeqI objects)");
+	return;
+    }
+    # fix Jitterbug #1044
+    if( $seq1->length() < 2 || 
+	$seq2->length() < 2 ) {
+	$self->warn("cannot align sequences with length less than 2");
+	return;
+    }
+    if( ! defined $fh ) { 
+	$fh = \*STDOUT;
+    }
+    $self->set_memory_and_report();
+    $seq1->display_id('seq1') unless ( defined $seq1->id() );
+    $seq2->display_id('seq2') unless ( defined $seq2->id() );
+
+    $t1  = &Bio::Ext::Align::new_Sequence_from_strings($seq1->id(),$seq1->seq());
+
+    $t2  = &Bio::Ext::Align::new_Sequence_from_strings($seq2->id(),$seq2->seq());
+    $aln = &Bio::Ext::Align::Align_Sequences_ProteinSmithWaterman($t1,$t2,$self->{'matrix'},-$self->gap,-$self->ext);
+    if( ! defined $aln || $aln == 0 ) {
+	$self->throw("Unable to build an alignment");
+    }
+
+    &Bio::Ext::Align::write_pretty_seq_align($aln,$t1,$t2,12,50,$fh);
+
+}
+
+=head2 matrix
+
+ Title     : matrix()
+ Usage     : $factory->matrix('blosum62.bla');
+ Function  : Reads in comparison matrix based on name
+           :
+ Returns   : 
+ Argument  : comparison matrix
+
+=cut
+
+sub matrix {
+    my($self,$comp) = @_;
+    my $temp;
+
+    if( !defined $comp ) {
+	$self->throw("You must have a comparison matrix to set!");
+    }
+
+    # talking to the engine here...
+
+    $temp = &Bio::Ext::Align::CompMat::read_Blast_file_CompMat($comp);
+
+    if( !(defined $temp) || $temp == 0 ) {
+	$self->throw("$comp cannot be read as a BLAST comparison matrix file");
+    }
+
+    $self->{'matrix'} = $temp;
+}
+
+
+
+=head2 gap
+
+ Title     : gap
+ Usage     : $gap = $factory->gap() #get
+           : $factory->gap($value) #set
+ Function  : the set get for the gap penalty
+ Example   :
+ Returns   : gap value 
+ Arguments : new value
+
+=cut
+
+sub gap {
+    my ($self,$val) = @_;
+    
+
+    if( defined $val ) {
+	if( $val < 0 ) {    # Fixed so that gap==0 is allowed /AE
+	    $self->throw("Can't have a gap penalty less than 0");
+	}
+	$self->{'gap'} = $val;
+    }
+    return $self->{'gap'};
+}
+
+
+=head2 ext
+
+ Title     : ext
+ Usage     : $ext = $factory->ext() #get
+           : $factory->ext($value) #set
+ Function  : the set get for the ext penalty
+ Example   :
+ Returns   : ext value 
+ Arguments : new value
+
+=cut
+
+sub ext {
+    my ($self,$val) = @_;
+    
+    if( defined $val ) {
+	if( $val < 0 ) {    # Fixed so that gap==0 is allowed /AE
+	    $self->throw("Can't have a gap penalty less than 0");
+	}
+	$self->{'ext'} = $val;
+    }
+    return $self->{'ext'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tools/tRNAscanSE.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tools/tRNAscanSE.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tools/tRNAscanSE.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,310 @@
+# $Id: tRNAscanSE.pm,v 1.5.2.1 2006/10/02 23:10:32 sendu Exp $
+#
+# BioPerl module for Bio::Tools::tRNAscanSE
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::tRNAscanSE - A parser for tRNAscan-SE output
+
+=head1 SYNOPSIS
+
+   use Bio::Tools::tRNAscanSE;
+
+   my $parser = Bio::Tools::tRNAscanSE->new(-file => 'result.tRNAscanSE');
+
+   # parse the results
+   while( my $gene = $parser->next_prediction ) {
+
+       @exon_arr = $gene->get_seqFeatures();
+
+   }
+
+=head1 DESCRIPTION
+
+This script will parse tRNAscan-SE output.  Just the tabular output of
+the tRNA locations in the genome for now.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tools::tRNAscanSE;
+use strict;
+
+use Bio::SeqFeature::Generic;
+
+use base qw(Bio::Tools::AnalysisResult);
+
+use vars qw($GeneTag $SrcTag $ExonTag);
+($GeneTag,$SrcTag,$ExonTag) = qw(tRNA_gene tRNAscan-SE tRNA_exon);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tools::tRNAscanSE();
+ Function: Builds a new Bio::Tools::tRNAscanSE object 
+ Returns : an instance of Bio::Tools::tRNAscanSE
+ Args    : -fh/-file for input filename
+           -genetag => primary tag used in gene features (default 'tRNA_gene')
+           -exontag => primary tag used in exon features (default 'tRNA_exon')
+           -srctag  => source tag used in all features (default 'tRNAscan-SE')
+
+
+=cut
+
+sub _initialize {
+  my($self, at args) = @_;
+  $self->SUPER::_initialize(@args);
+  my ($genetag,$exontag,$srctag) =  $self->SUPER::_rearrange([qw(GENETAG
+								 SRCTAG
+								 EXONTAG)],
+							      @args);
+  $self->gene_tag(defined $genetag ? $genetag : $GeneTag);
+  $self->source_tag(defined $srctag ? $srctag : $SrcTag);
+  $self->exon_tag(defined $exontag ? $exontag : $ExonTag);
+  $self->{'_seen'} = {};
+}
+
+=head2 gene_tag
+
+ Title   : gene_tag
+ Usage   : $obj->gene_tag($newval)
+ Function: Get/Set the value used for the 'gene_tag' of genes
+           Default is 'tRNA_gene' as set by the global $GeneTag
+ Returns : value of gene_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub gene_tag{
+    my $self = shift;
+
+    return $self->{'gene_tag'} = shift if @_;
+    return $self->{'gene_tag'};
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $obj->source_tag($newval)
+ Function: Get/Set the value used for the 'source_tag' of exons and genes
+           Default is 'tRNAscan-SE' as set by the global $SrcTag
+ Returns : value of source_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub source_tag{
+    my $self = shift;
+
+    return $self->{'_source_tag'} = shift if @_;
+    return $self->{'_source_tag'};
+}
+
+=head2 exon_tag
+
+ Title   : exon_tag
+ Usage   : $obj->exon_tag($newval)
+ Function: Get/Set the value used for the 'primary_tag' of exons
+           Default is 'tRNA_exon' as set by the global $ExonTag
+ Returns : value of exon_tag (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub exon_tag{
+    my $self = shift;
+
+    return $self->{'_exon_tag'} = shift if @_;
+    return $self->{'_exon_tag'};
+}
+
+
+=head2 analysis_method
+
+ Usage     : $genscan->analysis_method();
+ Purpose   : Inherited method. Overridden to ensure that the name matches
+             /tRNAscan-SE/i.
+ Returns   : String
+ Argument  : n/a
+
+=cut
+
+#-------------
+sub analysis_method { 
+#-------------
+    my ($self, $method) = @_;  
+    if($method && ($method !~ /tRNAscan-SE/i)) {
+	$self->throw("method $method not supported in " . ref($self));
+    }
+    return $self->SUPER::analysis_method($method);
+}
+
+=head2 next_feature
+
+ Title   : next_feature
+ Usage   : while($gene = $genscan->next_feature()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genscan result
+           file. Call this method repeatedly until FALSE is returned.
+
+           The returned object is actually a SeqFeatureI implementing object.
+           This method is required for classes implementing the
+           SeqAnalysisParserI interface, and is merely an alias for 
+           next_prediction() at present.
+
+ Example :
+ Returns : A Bio::SeqFeature::Generic object.
+ Args    :
+See also : L<Bio::SeqFeature::Generic>
+
+=cut
+
+sub next_feature {
+    my ($self, at args) = @_;
+    # even though next_prediction doesn't expect any args (and this method
+    # does neither), we pass on args in order to be prepared if this changes
+    # ever
+    return $self->next_prediction(@args);
+}
+
+=head2 next_prediction
+
+ Title   : next_prediction
+ Usage   : while($gene = $genscan->next_prediction()) {
+                  # do something
+           }
+ Function: Returns the next gene structure prediction of the Genscan result
+           file. Call this method repeatedly until FALSE is returned.
+
+ Example :
+ Returns : A Bio::SeqFeature::Generic object.
+ Args    :
+See also : L<Bio::SeqFeature::Generic>
+
+=cut
+
+sub next_prediction {
+    my ($self) = @_;
+    my ($genetag,$srctag,$exontag) = ( $self->gene_tag,
+				       $self->source_tag,
+				       $self->exon_tag);
+
+    while( defined($_ = $self->_readline) ) {
+	if( m/^(\S+)\s+       # sequence name
+	    (\d+)\s+          # tRNA #
+	    (\d+)\s+(\d+)\s+  # tRNA start,end
+	    (\w{3})\s+        # tRNA type
+	    ([CAGT]{3})\s+    # Codon
+	    (\d+)\s+(\d+)\s+  # Intron Begin End
+	    (\d+\.\d+)/ox     # Cove Score
+	    ) {
+	    
+	    my ($seqid,$tRNAnum,$start,$end,$type,
+		$codon,$intron_start,$intron_end,
+		$score) = ($1,$2,$3,$4,$5,$6,$7,$8,$9);
+	    
+	    my $strand = 1;
+	    if( $start > $end ) { 
+		($start,$end,$strand) = ($end,$start,-1);
+	    }
+	    if( $self->{'_seen'}->{"$seqid.$type"}++ ) {
+		$type .= "-".$self->{'_seen'}->{"$seqid.$type"};
+	    }
+	    my $gene = Bio::SeqFeature::Generic->new
+		( -seq_id => $seqid,
+		  -start  => $start,
+		  -end    => $end,
+		  -strand => $strand,
+		  -score  => $score,
+		  -primary_tag => $genetag,
+		  -source_tag  => $srctag,
+		  -tag     => {
+		      'ID'    => "tRNA:$type",
+		      'AminoAcid' => $type,
+		      'Codon'     => $codon,
+		  });
+	    if( $intron_start ) {
+		if( $intron_start > $intron_end ) {
+		    ($intron_start,$intron_end) = ($intron_end,$intron_start);
+		}
+		$gene->add_SeqFeature(Bio::SeqFeature::Generic->new
+				      ( -seq_id=> $seqid,
+					-start => $start,
+					-end   => $intron_start-1,
+					-strand=> $strand,
+					-primary_tag => $exontag,
+					-source_tag  => $srctag,
+					-tag => { 
+					    'Parent' => "tRNA:$type" 
+					    }));
+		$gene->add_SeqFeature(Bio::SeqFeature::Generic->new
+				      ( -seq_id=> $seqid,
+					-start => $intron_end+1,
+					-end   => $end,
+					-strand=> $strand,
+					-primary_tag => $exontag,
+					-source_tag  => $srctag,
+					-tag => { 
+					    'Parent' => "tRNA:$type" 
+					    }));
+	    } else {
+		$gene->add_SeqFeature(Bio::SeqFeature::Generic->new
+				      ( -seq_id=> $seqid,
+					-start => $start,
+					-end   => $end,
+					-strand=> $strand,
+					-primary_tag => $exontag,
+					-source_tag  => $srctag,
+					-tag => { 
+					     'Parent' => "tRNA:$type" 
+					     }));
+	    }
+	    return $gene;
+	} 
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/AlleleNode.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/AlleleNode.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/AlleleNode.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,500 @@
+# $Id: AlleleNode.pm,v 1.13.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::AlleleNode
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::AlleleNode - A Node with Alleles attached
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::AlleleNode;
+
+=head1 DESCRIPTION
+
+AlleleNodes are basic L<Bio::Tree::Node>s with the added ability to
+add Genotypes alleles as defined by the L<Bio::PopGen::IndividualI>
+interface.  Genotypes are defined by the L<Bio::PopGen::GenotypeI>
+interface, you will probably want to use the L<Bio::PopGen::Genotype>
+implementation.
+
+This is implemented via containment to avoid multiple inheritance
+problems.  Their is a L<Bio::PopGen::Individual> object which handles
+the L<Bio::PopGen::IndividualI> interface, and is accessible via the
+L<Bio::Tree::AlleleNode::individual> method.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=head1 HISTORY
+
+This module was re-written to be a combination of
+L<Bio::PopGen::Individual> and L<Bio::Tree::Node> primarily for use in
+L<Bio::PopGen::Simulation::Coalescent> simulations.
+
+=cut
+
+# Let the code begin...
+
+
+package Bio::Tree::AlleleNode;
+use vars qw($UIDCOUNTER);
+use strict;
+BEGIN { $UIDCOUNTER = 1 }
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Genotype;
+
+use base qw(Bio::Tree::Node Bio::PopGen::IndividualI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::AlleleNode();
+ Function: Builds a new Bio::Tree::AlleleNode() object 
+ Returns : an instance of Bio::Tree::AlleleNode
+ Args    : -unique_id     => $id,
+           -genotypes     => \@genotypes
+           -left          => pointer to Left descendent (optional)
+           -right         => pointer to Right descenent (optional)
+	   -branch_length => branch length [integer] (optional)
+           -bootstrap     => value   bootstrap value (string)
+           -description   => description of node
+           -id            => human readable (unique) id for node
+                             Should NOT contain the characters 
+                             '();:'
+=cut
+
+sub new { 
+    my($class, at args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    $self->individual( Bio::PopGen::Individual->new(@args));
+    return $self;
+}
+
+=head2 individual
+
+ Title   : individual
+ Usage   : $obj->individual($newval)
+ Function: Get/Set Access to the underlying individual object
+ Returns : L<Bio::PopGen::Individual> object
+ Args    : on set, new value (L<Bio::PopGen::Individual>)
+
+
+=cut
+
+sub individual {
+    my ($self,$newval) = @_;
+    if( defined $newval || ! defined $self->{'individual'} ) {
+	$newval = Bio::PopGen::Individual->new() unless defined $newval;
+	$self->{'individual'} = $newval;
+    }
+    return $self->{'individual'};
+}
+
+=head2 Bio::PopGen::Individual methods
+
+Methods required by L<Bio::PopGen::IndividualI>.
+
+
+=head2 unique_id
+
+ Title   : unique_id
+ Usage   : my $id = $individual->unique_id
+ Function: Unique Identifier
+ Returns : string representing unique identifier
+ Args    : string
+
+
+=cut
+
+sub unique_id{
+    my $self = shift;
+    $self->individual->unique_id(@_);
+}
+
+=head2 num_of_results
+
+ Title   : num_of_results
+ Usage   : my $count = $person->num_results;
+ Function: returns the count of the number of Results for a person
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub num_of_results {
+    my $self = shift;
+    $self->individual->num_of_results(@_);
+}
+
+=head2 add_Genotype
+
+ Title   : add_Genotype
+ Usage   : $individual->add_Genotype
+ Function: add a genotype value, only a single genotype
+           may be associated 
+ Returns : count of the number of genotypes associated with this individual
+ Args    : @genotypes - Bio::PopGen::GenotypeI object(s) containing 
+                        alleles plus a marker name
+
+=cut
+
+sub add_Genotype {
+    my $self = shift;
+    $self->individual->add_Genotype(@_);
+}
+
+=head2 reset_Genotypes
+
+ Title   : reset_Genotypes
+ Usage   : $individual->reset_Genotypes;
+ Function: Reset the genotypes stored for this individual
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_Genotypes{
+    my $self = shift;
+    $self->individual->reset_Genotypes(@_);
+}
+
+=head2 remove_Genotype
+
+ Title   : remove_Genotype
+ Usage   : $individual->remove_Genotype(@names)
+ Function: Removes the genotypes for the requested markers
+ Returns : none
+ Args    : Names of markers 
+
+
+=cut
+
+sub remove_Genotype{
+    my $self = shift;
+    $self->individual->remove_Genotype(@_);
+}
+
+=head2 get_Genotypes
+
+ Title   : get_Genotypes
+ Usage   : my @genotypes = $ind->get_Genotypes(-marker => $markername);
+ Function: Get the genotypes for an individual, based on a criteria
+ Returns : Array of genotypes
+ Args    : either none (return all genotypes) or 
+           -marker => name of marker to return (exact match, case matters)
+
+
+=cut
+
+sub get_Genotypes{
+    my $self = shift;
+    $self->individual->get_Genotypes(@_);
+}
+
+=head2 has_Marker
+
+ Title   : has_Marker
+ Usage   : if( $ind->has_Marker($name) ) {}
+ Function: Boolean test to see if an Individual has a genotype 
+           for a specific marker
+ Returns : Boolean (true or false)
+ Args    : String representing a marker name
+
+
+=cut
+
+sub has_Marker{
+    my $self = shift;
+    $self->individual->has_Marker(@_);
+}
+
+=head2 get_marker_names
+
+ Title   : get_marker_names
+ Usage   : my @names = $individual->get_marker_names;
+ Function: Returns the list of known marker names
+ Returns : List of strings
+ Args    : none
+
+
+=cut
+
+sub get_marker_names{
+   my $self = shift;
+   $self->individual->get_marker_names(@_);
+}
+
+=head2 Bio::Tree::Node methods
+
+Methods inherited from L<Bio::Tree::Node>.
+
+
+=head2 add_Descendent
+
+ Title   : add_Descendent
+ Usage   : $node->add_Descendant($node);
+ Function: Adds a descendent to a node
+ Returns : number of current descendents for this node
+ Args    : Bio::Node::NodeI
+           boolean flag, true if you want to ignore the fact that you are
+           adding a second node with the same unique id (typically memory 
+           location reference in this implementation).  default is false and 
+           will throw an error if you try and overwrite an existing node.
+
+
+=head2 each_Descendent
+
+ Title   : each_Descendent($sortby)
+ Usage   : my @nodes = $node->each_Descendent;
+ Function: all the descendents for this Node (but not their descendents
+					      i.e. not a recursive fetchall)
+ Returns : Array of Bio::Tree::NodeI objects
+ Args    : $sortby [optional] "height", "creation" or coderef to be used
+           to sort the order of children nodes.
+
+
+=head2 remove_Descendent
+
+ Title   : remove_Descendent
+ Usage   : $node->remove_Descedent($node_foo);
+ Function: Removes a specific node from being a Descendent of this node
+ Returns : nothing
+ Args    : An array of Bio::Node::NodeI objects which have be previously
+           passed to the add_Descendent call of this object.
+
+
+=head2 remove_all_Descendents
+
+ Title   : remove_all_Descendents
+ Usage   : $node->remove_All_Descendents()
+ Function: Cleanup the node's reference to descendents and reset
+           their ancestor pointers to undef, if you don't have a reference
+           to these objects after this call they will be cleaned up - so
+           a get_nodes from the Tree object would be a safe thing to do first
+ Returns : nothing
+ Args    : none
+
+
+
+=head2 get_all_Descendents
+
+ Title   : get_all_Descendents
+ Usage   : my @nodes = $node->get_all_Descendents;
+ Function: Recursively fetch all the nodes and their descendents
+           *NOTE* This is different from each_Descendent
+ Returns : Array or Bio::Tree::NodeI objects
+ Args    : none
+
+=cut
+
+# implemented in the interface 
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : $obj->ancestor($newval)
+ Function: Set the Ancestor
+ Returns : value of ancestor
+ Args    : newvalue (optional)
+
+
+=head2 branch_length
+
+ Title   : branch_length
+ Usage   : $obj->branch_length()
+ Function: Get/Set the branch length
+ Returns : value of branch_length
+ Args    : newvalue (optional)
+
+
+=head2 bootstrap
+
+ Title   : bootstrap
+ Usage   : $obj->bootstrap($newval)
+ Function: Get/Set the bootstrap value
+ Returns : value of bootstrap
+ Args    : newvalue (optional)
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: Get/Set the description string
+ Returns : value of description
+ Args    : newvalue (optional)
+
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id($newval)
+ Function: The human readable identifier for the node 
+ Returns : value of human readable id
+ Args    : newvalue (optional)
+ Note    : id cannot contain the chracters '();:'
+
+"A name can be any string of printable characters except blanks,
+colons, semicolons, parentheses, and square brackets. Because you may
+want to include a blank in a name, it is assumed that an underscore
+character ("_") stands for a blank; any of these in a name will be
+converted to a blank when it is read in."
+
+from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
+
+=cut
+
+=head2 internal_id
+
+ Title   : internal_id
+ Usage   : my $internalid = $node->internal_id
+ Function: Returns the internal unique id for this Node
+           (a monotonically increasing number for this in-memory implementation
+            but could be a database determined unique id in other 
+	    implementations)
+ Returns : unique id
+ Args    : none
+
+
+=head2 Bio::Node::NodeI decorated interface implemented
+
+The following methods are implemented by L<Bio::Node::NodeI> decorated
+interface.
+
+=head2 is_Leaf
+
+ Title   : is_Leaf
+ Usage   : if( $node->is_Leaf )
+ Function: Get Leaf status
+ Returns : boolean
+ Args    : none
+
+=cut
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : my $str = $node->to_string()
+ Function: For debugging, provide a node as a string
+ Returns : string
+ Args    : none
+
+=head2 height
+
+ Title   : height
+ Usage   : my $len = $node->height
+ Function: Returns the height of the tree starting at this
+           node.  Height is the maximum branchlength.
+ Returns : The longest length (weighting branches with branch_length) to a leaf
+ Args    : none
+
+=head2 invalidate_height
+
+ Title   : invalidate_height
+ Usage   : private helper method
+ Function: Invalidate our cached value of the node's height in the tree
+ Returns : nothing
+ Args    : none
+
+=cut
+
+#'
+
+=head2 add_tag_value
+
+ Title   : add_tag_value
+ Usage   : $node->add_tag_value($tag,$value)
+ Function: Adds a tag value to a node 
+ Returns : number of values stored for this tag
+ Args    : $tag   - tag name
+           $value - value to store for the tag
+
+
+=head2 remove_tag
+
+ Title   : remove_tag
+ Usage   : $node->remove_tag($tag)
+ Function: Remove the tag and all values for this tag
+ Returns : boolean representing success (0 if tag does not exist)
+ Args    : $tag - tagname to remove
+
+
+
+=head2 remove_all_tags
+
+ Title   : remove_all_tags
+ Usage   : $node->remove_all_tags()
+ Function: Removes all tags 
+ Returns : None
+ Args    : None
+
+
+
+=head2 get_all_tags
+
+ Title   : get_all_tags
+ Usage   : my @tags = $node->get_all_tags()
+ Function: Gets all the tag names for this Node
+ Returns : Array of tagnames
+ Args    : None
+
+
+=head2 get_tag_values
+
+ Title   : get_tag_values
+ Usage   : my @values = $node->get_tag_value($tag)
+ Function: Gets the values for given tag ($tag)
+ Returns : Array of values or empty list if tag does not exist
+ Args    : $tag - tag name
+
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $node->has_tag($tag)
+ Function: Boolean test if tag exists in the Node
+ Returns : Boolean
+ Args    : $tag - tagname
+
+
+=cut
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Compatible.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Compatible.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Compatible.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,408 @@
+# $Id: Compatible.pm,v 1.5.4.3 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::Compatible
+#
+# Cared for by Gabriel Valiente <valiente at lsi.upc.edu>
+#
+# Copyright Gabriel Valiente
+#
+# You may distribute this module under the same terms as Perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::Compatible - Testing compatibility of phylogenetic trees
+with nested taxa.
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::Compatible;
+  use Bio::TreeIO;
+  my $input = new Bio::TreeIO('-format' => 'newick',
+                              '-file'   => 'input.tre');
+  my $t1 = $input->next_tree;
+  my $t2 = $input->next_tree;
+
+  my ($incompat, $ilabels, $inodes) = $t1->is_compatible($t2);
+  if ($incompat) {
+    my %cluster1 = %{ $t1->cluster_representation };
+    my %cluster2 = %{ $t2->cluster_representation };
+    print "incompatible trees\n";
+    if (scalar(@$ilabels)) {
+      foreach my $label (@$ilabels) {
+        my $node1 = $t1->find_node(-id => $label);
+        my $node2 = $t2->find_node(-id => $label);
+        my @c1 = sort @{ $cluster1{$node1} };
+        my @c2 = sort @{ $cluster2{$node2} };
+        print "label $label";
+        print " cluster"; map { print " ",$_ } @c1;
+        print " cluster"; map { print " ",$_ } @c2; print "\n";
+      }
+    }
+    if (scalar(@$inodes)) {
+      while (@$inodes) {
+        my $node1 = shift @$inodes;
+        my $node2 = shift @$inodes;
+        my @c1 = sort @{ $cluster1{$node1} };
+        my @c2 = sort @{ $cluster2{$node2} };
+        print "cluster"; map { print " ",$_ } @c1;
+        print " properly intersects cluster";
+        map { print " ",$_ } @c2; print "\n";
+      }
+    }
+  } else {
+    print "compatible trees\n";
+  }
+
+=head1 DESCRIPTION
+
+Bio::Tree::Compatible is a Perl tool for testing compatibility of
+phylogenetic trees with nested taxa represented as Bio::Tree::Tree
+objects. It is based on a recent characterization of ancestral
+compatibility of semi-labeled trees in terms of their cluster
+representations.
+
+A semi-labeled tree is a phylogenetic tree with some of its internal
+nodes labeled, and it can represent a classification tree as well as a
+phylogenetic tree with nested taxa, with labeled internal nodes
+corresponding to taxa at a higher level of aggregation or nesting than
+that of their descendents.
+
+Two semi-labeled trees are compatible if their topological
+restrictions to the common labels are such that for each node label,
+the smallest clusters containing it in each of the trees coincide and,
+furthermore, no cluster in one of the trees properly intersects a
+cluster of the other tree.
+
+Future extensions of Bio::Tree::Compatible include a
+Bio::Tree::Supertree module for combining compatible phylogenetic
+trees with nested taxa into a common supertree.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 SEE ALSO
+
+=over
+
+=item * Philip Daniel and Charles Semple. Supertree Algorithms for
+Nested Taxa. In: Olaf R. P. Bininda-Emonds (ed.) Phylogenetic
+Supertrees: Combining Information to Reveal the Tree of Life,
+I<Computational Biology>, vol. 4, chap. 7, pp. 151-171. Kluwer (2004).
+
+=item * Charles Semple, Philip Daniel, Wim Hordijk, Roderic
+D. M. Page, and Mike Steel: Supertree Algorithms for Ancestral
+Divergence Dates and Nested Taxa. Bioinformatics B<20>(15), 2355-2360
+(2004).
+
+=item * Merce Llabres, Jairo Rocha, Francesc Rossello, and Gabriel
+Valiente: On the Ancestral Compatibility of Two Phylogenetic Trees
+with Nested Taxa. J. Math. Biol. B<53>(3), 340-364 (2006).
+
+=back
+
+=head1 AUTHOR - Gabriel Valiente
+
+Email valiente at lsi.upc.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+
+=cut
+
+package Bio::Tree::Compatible;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Set::Scalar;
+
+use base qw(Bio::Root::Root);
+
+=head2 postorder_traversal
+
+ Title   : postorder_traversal
+ Usage   : my @nodes = @{ $tree->postorder_traversal }
+ Function: Return list of nodes in postorder
+ Returns : reference to array of Bio::Tree::Node
+ Args    : none
+
+For example, the postorder traversal of the tree
+C<(((A,B)C,D),(E,F,G));> is a reference to an array of nodes with
+internal_id 0 through 9, because the Newick standard representation
+for phylogenetic trees is based on a postorder traversal.
+
+          +---A                    +---0
+          |                        |
+  +---+---C                +---4---2
+  |   |   |                |   |   |
+  |   |   +---B            |   |   +---1
+  |   |                    |   |
+  +   +-------D            9   +-------3
+  |                        |
+  |     +-----E            |     +-----5
+  |     |                  |     |
+  +-----+-----F            +-----8-----6
+        |                        |
+        +-----G                  +-----7
+
+=cut
+
+sub postorder_traversal {
+  my($self) = @_;
+  my @stack;
+  my @queue;
+  push @stack, $self->get_root_node;
+  while (@stack) {
+    my $node = pop @stack;
+    push @queue, $node;
+    foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
+      push @stack, $child;
+    }
+  }
+  my @postorder = reverse @queue;
+  return \@postorder;
+}
+
+=head2 cluster_representation
+
+ Title   : cluster_representation
+ Usage   : my %cluster = %{ $tree->cluster_representation }
+ Function: Compute the cluster representation of a tree
+ Returns : reference to hash of array of string indexed by
+           Bio::Tree::Node
+ Args    : none
+
+For example, the cluster representation of the tree
+C<(((A,B)C,D),(E,F,G));> is a reference to a hash associating an array
+of string (descendent labels) to each node, as follows:
+
+  0 --> [A]
+  1 --> [B]
+  2 --> [A,B,C]
+  3 --> [D]
+  4 --> [A,B,C,D]
+  5 --> [E]
+  6 --> [F]
+  7 --> [G]
+  8 --> [E,F,G]
+  9 --> [A,B,C,D,E,F,G]
+
+=cut
+
+sub cluster_representation {
+  my ($tree) = @_;
+  my %cluster;
+  my @postorder = @{ postorder_traversal($tree) };
+  foreach my $node ( @postorder ) {
+    my @labeled = map { $_->id } grep { $_->id } $node->get_Descendents;
+    push @labeled, $node->id if $node->id;
+    $cluster{$node} = \@labeled;
+  }
+  return \%cluster;
+}
+
+=head2 common_labels
+
+ Title   : common_labels
+ Usage   : my $labels = $tree1->common_labels($tree2);
+ Function: Return set of common node labels
+ Returns : Set::Scalar
+ Args    : Bio::Tree::Tree
+
+For example, the common labels of the tree C<(((A,B)C,D),(E,F,G));>
+and the tree C<((A,B)H,E,(J,(K)G)I);> are: C<[A,B,E,G]>.
+
+          +---A                 +---A
+          |                     |
+  +---+---C             +-------H
+  |   |   |             |       |
+  |   |   +---B         |       +---B
+  |   |                 |
+  +   +-------D         +-----------E
+  |                     |
+  |     +-----E         |   +-------J
+  |     |               |   |
+  +-----+-----F         +---I
+        |                   |
+        +-----G             +---G---K
+
+=cut
+
+sub common_labels {
+  my($self,$arg) = @_;
+  my @labels1 = map { $_->id } grep { $_->id } $self->get_nodes;
+  my $common = Set::Scalar->new( @labels1 );
+  my @labels2 = map { $_->id } grep { $_->id } $arg->get_nodes;
+  my $temp = Set::Scalar->new( @labels2 );
+  return $common->intersection($temp);
+}
+
+=head2 topological_restriction
+
+ Title   : topological_restriction
+ Usage   : $tree->topological_restriction($labels)
+ Function: Compute the topological restriction of a tree to a subset
+           of node labels
+ Returns : Bio::Tree::Tree
+ Args    : Set::Scalar
+
+For example, the topological restrictions of each of the trees
+C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to the labels
+C<[A,B,E,G]> are as follows:
+
+          +---A             +---A
+          |                 |
+  +---+---+             +---+
+  |       |             |   |
+  |       +---B         |   +---B
+  +                     |
+  |       +---E         +-------E
+  |       |             |
+  +-------+             +---+---G
+          |
+          +---G
+
+=cut
+
+sub topological_restriction {
+  my ($tree, $labels) = @_;
+  for my $node ( @{ postorder_traversal($tree) } ) {
+    unless (ref($node)) { # skip $node if already removed
+      my @cluster = map { $_->id } grep { $_->id } $node->get_Descendents;
+      push @cluster, $node->id if $node->id;
+      my $cluster = Set::Scalar->new(@cluster);
+      if ($cluster->is_disjoint($labels)) {
+        $tree->remove_Node($node);
+      } else {
+        if ($node->id and not $labels->has($node->id)) {
+          $node->{'_id'} = undef;
+        }
+      }
+    }
+  }
+}
+
+=head2 is_compatible
+
+ Title   : is_compatible
+ Usage   : $tree1->is_compatible($tree2)
+ Function: Test compatibility of two trees
+ Returns : boolean
+ Args    : Bio::Tree::Tree
+
+For example, the topological restrictions of the trees
+C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to their common
+labels, C<[A,B,E,G]>, are compatible. The respective cluster
+representations are as follows:
+
+  [A]                  [A]
+  [B]                  [B]
+  [E]                  [E]
+  [G]                  [G]
+  [A,B]                [A,B]
+  [E,G]                [A,B,E,G]
+  [A,B,E,G]
+
+As a second example, the trees C<(A,B);> and C<((B)A);> are
+incompatible. Their respective cluster representations are as follows:
+
+  [A]                  [B]
+  [B]                  [A,B]
+  [A,B]
+
+The reason is, the smallest cluster containing label C<A> is C<[A]> in
+the first tree but C<[A,B]> in the second tree.
+
+ +---A         A---B
+ |
+ +
+ |
+ +---B
+
+As a second example, the trees C<(((B,A),C),D);> and C<((A,(D,B)),C);>
+are also incompatible. Their respective cluster representations are as
+follows:
+
+  [A]                  [A]
+  [B]                  [B]
+  [C]                  [C]
+  [D]                  [D]
+  [A,B]                [B,D]
+  [A,B,C]              [A,B,D]
+  [A,B,C,D]            [A,B,C,D]
+
+The reason is, cluster C<[A,B]> properly intersects cluster
+C<[B,D]>. There are further incompatibilities between these trees:
+C<[A,B,C]> properly intersects both C<[B,D]> and C<[A,B,D]>.
+
+          +---B             +-------A
+          |                 |
+      +---+             +---+   +---D
+      |   |             |   |   |
+  +---+   +---A         |   +---+
+  |   |                 +       |
+  +   +-------C         |       +---B
+  |                     |
+  +-----------D         +-----------C
+
+=cut
+
+sub is_compatible {
+  my ($tree1, $tree2) = @_;
+  my $common = $tree1->Bio::Tree::Compatible::common_labels($tree2);
+  $tree1->Bio::Tree::Compatible::topological_restriction($common);
+  $tree2->Bio::Tree::Compatible::topological_restriction($common);
+  my @postorder1 = @{ postorder_traversal($tree1) };
+  my @postorder2 = @{ postorder_traversal($tree2) };
+  my %cluster1 = %{ cluster_representation($tree1) };
+  my %cluster2 = %{ cluster_representation($tree2) };
+  my $incompat = 0; # false
+  my @labels;
+  foreach my $label ( $common->elements ) {
+    my $node1 = $tree1->find_node(-id => $label);
+    my @labels1 = @{ $cluster1{$node1} };
+    my $cluster1 = Set::Scalar->new(@labels1);
+    my $node2 = $tree2->find_node(-id => $label);
+    my @labels2 = @{ $cluster2{$node2} };
+    my $cluster2 = Set::Scalar->new(@labels2);
+    unless ( $cluster1->is_equal($cluster2) ) {
+      $incompat = 1; # true
+      push @labels, $label;
+    }
+  }
+  my @nodes;
+  foreach my $node1 ( @postorder1 ) {
+    my @labels1 = @{ $cluster1{$node1} };
+    my $cluster1 = Set::Scalar->new(@labels1);
+    foreach my $node2 ( @postorder2 ) {
+      my @labels2 = @{$cluster2{$node2} };
+      my $cluster2 = Set::Scalar->new(@labels2);
+      if ($cluster1->is_properly_intersecting($cluster2)) {
+	$incompat = 1; # true
+	push @nodes, $node1, $node2;
+      }
+    }
+  }
+  return ($incompat, \@labels, \@nodes);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/DistanceFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/DistanceFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/DistanceFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,566 @@
+# $Id: DistanceFactory.pm,v 1.8.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::DistanceFactory
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::DistanceFactory - Construct a tree using distance based methods
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::DistanceFactory;
+  use Bio::AlignIO;
+  use Bio::Align::DNAStatistics;
+  my $tfactory = Bio::Tree::DistanceFactory->new(-method => "NJ");
+  my $stats    = Bio::Align::DNAStatistics->new();
+
+  my $alnin    = Bio::AlignIO->new(-format => 'clustalw',
+                                   -file   => 'file.aln');
+  my $aln = $alnin->next_aln;
+  # Of course matrix can come from a different place
+  # like PHYLIP if you prefer, Bio::Matrix::IO should be able
+  # to parse many things
+  my $jcmatrix = $stats->distance(-align => $aln, 
+                                  -method => 'Jukes-Cantor');
+  my $tree = $tfactory->make_tree($jcmatrix);
+
+
+=head1 DESCRIPTION
+
+This is a factory which will construct a phylogenetic tree based on
+the pairwise sequence distances for a set of sequences.  Currently
+UPGMA (Sokal and Michener 1958) and NJ (Saitou and Nei 1987) tree
+construction methods are implemented.
+
+=head1 REFERENCES
+
+Eddy SR, Durbin R, Krogh A, Mitchison G, (1998) "Biological Sequence Analysis",
+Cambridge Univ Press, Cambridge, UK.
+
+Howe K, Bateman A, Durbin R, (2002) "QuickTree: building huge
+Neighbour-Joining trees of protein sequences." Bioinformatics
+18(11):1546-1547.
+
+Saitou N and Nei M, (1987) "The neighbor-joining method: a new method
+for reconstructing phylogenetic trees." Mol Biol Evol 4(4):406-25.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tree::DistanceFactory;
+use vars qw($DefaultMethod $Precision);
+use strict;
+
+# some defaults
+$DefaultMethod = 'UPGMA';
+$Precision = 5;
+
+use Bio::Tree::Node;
+use Bio::Tree::Tree;
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::DistanceFactory();
+ Function: Builds a new Bio::Tree::DistanceFactory object 
+ Returns : an instance of Bio::Tree::DistanceFactory
+ Args    : -method => 'NJ' or 'UPGMA'
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;  
+  my $self = $class->SUPER::new(@args);
+
+  my ($method) = $self->_rearrange([qw(METHOD)],
+				   @args);
+  $self->method($method || $DefaultMethod);
+  return $self;
+}
+
+=head2 make_tree
+
+ Title   : make_tree
+ Usage   : my $tree = $disttreefact->make_tree($matrix);
+ Function: Build a Tree based on a distance matrix
+ Returns : L<Bio::Tree::TreeI>
+ Args    : L<Bio::Matrix::MatrixI> object
+
+
+=cut
+
+sub make_tree{
+   my ($self,$matrix) = @_;
+   if( ! defined $matrix || !ref($matrix) || 
+       ! $matrix->isa('Bio::Matrix::MatrixI') ) {
+       $self->warn("Need to provide a valid Bio::Matrix::MatrixI object to make_tree");
+       return;
+   }
+
+   my $method = uc ($self->method);
+   if( $method =~ /NJ/i ) {
+       return $self->_nj($matrix);
+   } elsif( $method =~ /UPGMA/i ) {
+       return $self->_upgma($matrix);
+   } else { 
+       $self->warn("Unknown tree construction method '$method'.  Cannot run.");
+       return;
+   }
+   
+}
+
+
+=head2 _nj
+
+ Title   : _nj
+ Usage   : my $tree = $disttreefact->_nj($matrix);
+ Function: Construct a tree based on distance matrix using the 
+           Neighbor Joining algorithm (Saitou and Nei, 1987)
+           Implementation based on Kevin Howe's Quicktree implementation
+           and uses his tricks (some based on Bill Bruno's work) to eliminate
+           negative branch lengths
+ Returns : L<Bio::Tree::TreeI>
+ Args    : L<Bio::Matrix::MatrixI> object
+
+=cut
+
+sub _nj {
+   my ($self,$distmat) = @_;
+
+   # we assume type checking of $aln has already been done
+   # client shouldn't be calling this directly anyways, using the
+   # make_tree method is preferred
+   
+   # so that we can trim the number of digits shown as the branch length
+   my $precisionstr = "%.$Precision"."f";
+
+   my @names =  $distmat->column_names;
+   my $N = scalar @names;
+   my ($i,$j,$m, at nodes,$mat, at r);
+   my $L = $N;
+
+   if( $N < 2 ) {
+       $self->warn("Can only perform NJ treebuilding on sets of 2 or more species\n");
+       return;
+   } elsif( $N == 2 ) {
+       $i = 0;
+       my $d = sprintf($precisionstr,
+		       $distmat->get_entry($names[0],$names[1]) / 2);
+       my $root = Bio::Tree::Node->new();
+       for my $nm ( @names ) {
+	   $root->add_Descendents( Bio::Tree::Node->new(-id => $nm,
+							-branch_length => $d));
+       }
+       return Bio::Tree::Tree(-root => $root);
+   }
+   my $c = 0;
+   
+   for ( $i = 0; $i < $N; $i++ ) {
+       push @nodes, Bio::Tree::Node->new(-id => $names[$i]);
+       my $ri = 0;
+       for( $j = 0; $j < $N; $j++ ) {
+	   $mat->[$i][$j] = $distmat->get_entry($names[$i],$names[$j]);
+	   $ri += $mat->[$i][$j];
+       }
+       $r[$i] = $ri / ($L -2);
+   }
+   
+   for( my $nodecount = 0; $nodecount < $N-3; $nodecount++) {
+       my ($mini,$minj,$min);
+       for($i = 0; $i < $N; $i++ ) {
+	   next unless defined $nodes[$i];
+	   for( $j = 0; $j < $i; $j++ ) {
+	       next unless defined $nodes[$j];
+	       my $dist = $mat->[$i][$j] - ($r[$i] + $r[$j]);
+	       if( ! defined $min ||
+		   $dist <= $min) {
+		   ($mini,$minj,$min) = ($i,$j,$dist);
+	       }
+	   }
+       }
+       my $dij    = $mat->[$mini][$minj];
+       my $dist_i = ($dij + $r[$mini] - $r[$minj]) / 2;
+       my $dist_j = $dij - $dist_i;
+       
+       # deal with negative branch lengths
+       # per code in K.Howe's quicktree
+       if( $dist_i < 0 ) {
+	   $dist_i = 0;
+	   $dist_j = $dij;
+	   $dist_j = 0 if( $dist_j < 0 );
+       } elsif( $dist_j < 0 ) { 
+	   $dist_j = 0;
+	   $dist_i = $dij;
+	   $dist_i = 0 if( $dist_i < 0 );
+       }
+       
+       $nodes[$mini]->branch_length(sprintf($precisionstr,$dist_i));
+       $nodes[$minj]->branch_length(sprintf($precisionstr,$dist_j));
+       
+       my $newnode = Bio::Tree::Node->new(-descendents => [ $nodes[$mini],
+							    $nodes[$minj] ]);
+
+       $nodes[$mini] = $newnode;
+       delete $nodes[$minj];
+       
+       # update the distance matrix
+       $r[$mini] = 0;
+       my ($dmi,$dmj);
+       for( $m = 0; $m < $N; $m++ ) {	   
+	   next unless defined $nodes[$m];
+	   if( $m != $mini ) {
+	       $dmj = $mat->[$m][$minj];
+	       
+	       my ($row,$col);
+	       ($row,$col) = ($m,$mini);
+	       $dmi = $mat->[$row][$col];
+	       
+	       # from K.Howe's notes in quicktree
+	       # we can actually adjust r[m] here, by using the form:
+	       # rm = ((rm * numseqs) - dmi - dmj + dmk) / (numseqs-1)
+
+	       # Note: in Bill Bruno's method for negative branch
+	       # elimination, then if either dist_i is positive and
+	       # dist_j is 0, or dist_i is zero and dist_j is positive
+	       # (after adjustment) then the matrix entry is formed
+	       # from the distance to the node in question (m) to the
+	       # node with the zero branch length (whichever it was).
+	       # I think my code already has the same effect; this is
+	       # certainly true if dij is equal to dist_i + dist_j,
+	       # which it should have been fixed to
+
+	       my $dmk = $mat->[$row][$col] = $mat->[$col][$row] = 
+		   ($dmi + $dmj - $dij) / 2;
+	       
+	       # If we don't want to try and correct negative brlens
+	       # this is essentially what is in Edddy et al, BSA book.
+	       # $r[$m] = (($r[$m] * $L) - $dmi - $dmj + $dmk) / ($L-1);
+	       # 
+	       $r[$m] = (($r[$m] * ($L - 2)) - $dmi - $dmj + 
+			 $mat->[$row][$col]) / ( $L - 3);
+	       $r[$mini] += $dmk;
+	   }
+       }
+       $L--;
+       $r[$mini] /= $L - 2;
+   }
+   
+   # should be 3 nodes left
+   my (@leftovernodes, at leftovers);
+   for( my $k = 0; $k < $N; $k++ ) {
+       if( defined $nodes[$k] ) {
+	   push @leftovers, $k;
+	   push @leftovernodes, $nodes[$k];
+       }
+   }
+   my ($l_0,$l_1,$l_2) = @leftovers;
+   
+   my $dist_i = ( $mat->[$l_1][$l_0] + $mat->[$l_2][$l_0] -
+		  $mat->[$l_2][$l_1] ) / 2;
+   
+   my $dist_j = ( $mat->[$l_1][$l_0] - $dist_i);
+   my $dist_k = ( $mat->[$l_2][$l_0] - $dist_i);
+
+   # This is Kev's code to get rid of negative branch lengths
+   if( $dist_i < 0 ) { 
+       $dist_i = 0;
+       $dist_j = $mat->[$l_1][$l_0];
+       $dist_k = $mat->[$l_2][$l_0];
+       if( $dist_j < 0 ) { 
+	   $dist_j = 0;
+	   $dist_k = ( $mat->[$l_2][$l_0] + $mat->[$l_2][$l_1] ) / 2;
+	   $dist_k = 0 if( $dist_k < 0 );
+       } elsif( $dist_k < 0 ) {
+	   $dist_k = 0;
+	   $dist_j = ($mat->[$l_1][$l_0] + $mat->[$l_2][$l_1]) / 2;
+	   $dist_j = 0 if( $dist_j < 0 );
+       }
+   } elsif( $dist_j < 0 ) {
+       $dist_j = 0;
+       $dist_i = $mat->[$l_1][$l_0];
+       $dist_k = $mat->[$l_2][$l_1];
+       if( $dist_i < 0 ) { 
+	   $dist_i = 0;
+	   $dist_k = ( $mat->[$l_2][$l_0] + $mat->[$l_2][$l_1]) / 2;
+	   $dist_k = 0 if( $dist_k  < 0 );
+       } elsif( $dist_k < 0 ) { 
+	   $dist_k = 0;
+	   $dist_i = ( $mat->[$l_1][$l_0] + $mat->[$l_2][$l_0]) / 2;
+	   $dist_i = 0 if( $dist_i < 0 );
+       }
+   } elsif( $dist_k < 0 ) {
+       $dist_k = 0;
+       $dist_i = $mat->[$l_2][$l_0];
+       $dist_j = $mat->[$l_2][$l_1];
+       if( $dist_i < 0 ) { 
+	   $dist_i = 0;
+	   $dist_j = ( $mat->[$l_1][$l_0] + $mat->[$l_2][$l_1] ) / 2;
+	   $dist_j = 0 if $dist_j < 0;
+       } elsif( $dist_j < 0  ) {
+	   $dist_j = 0;
+	   $dist_i = ($mat->[$l_1][$l_0] + $mat->[$l_2][$l_0]) / 2;
+	   $dist_i = 0 if $dist_i < 0;
+       }
+   }
+   $leftovernodes[0]->branch_length(sprintf($precisionstr,$dist_i));
+   $leftovernodes[1]->branch_length(sprintf($precisionstr,$dist_j));
+   $leftovernodes[2]->branch_length(sprintf($precisionstr,$dist_k));
+
+   Bio::Tree::Tree->new(-root => Bio::Tree::Node->new
+			(-descendents => \@leftovernodes));
+}
+
+=head2 _upgma
+
+ Title   : _upgma
+ Usage   : my $tree = $disttreefact->_upgma($matrix);
+ Function: Construct a tree based on alignment using UPGMA
+ Returns : L<Bio::Tree::TreeI>
+ Args    : L<Bio::Matrix::MatrixI> object
+
+
+=cut
+
+sub _upgma{
+   my ($self,$distmat) = @_;
+   # we assume type checking of $matrix has already been done
+   # client shouldn't be calling this directly anyways, using the
+   # make_tree method is preferred
+   
+   # algorithm, from Eddy, Durbin, Krogh, Mitchison, 1998
+   # originally by Sokal and Michener 1956
+
+   my $precisionstr = "%.$Precision"."f";
+   
+   my ($i,$j,$x,$y, at dmat, at orig, at nodes);
+
+   my @names = $distmat->column_names;
+   my $c = 0;
+   my @clusters = map { 
+       my $r = { 'id'        => $c,
+		 'height'    => 0,
+		 'contains'  => [$c],
+	     };
+       $c++;
+       $r;
+   } @names;
+
+   my $K = scalar @clusters;
+   my (@mins,$min);
+   for ( $i = 0; $i < $K; $i++ ) {
+       for( $j = $i+1; $j < $K; $j++ ) {
+	   my $d =  $distmat->get_entry($names[$i],$names[$j]);
+	   # get Min here on first time around, save 1 cycle
+	   $dmat[$j][$i] = $dmat[$i][$j] = $d;
+	   $orig[$i][$j] = $orig[$j][$i] = $d;
+	   if ( ! defined $min || $d <= $min ) {
+	       if( defined $min && $min == $d ) { 
+		   push @mins, [$i,$j];
+	       } else { 
+		   @mins = [$i,$j];
+		   $min  = $d;
+	       }
+	   }
+       }
+   }
+   # distance between each cluster is avg distance
+   # between pairs of sequences from each cluster
+   while( $K > 1 ) {       
+       # fencepost - we already have found the $min
+       # so very first time loop is executed we can skip checking
+       unless( defined $min ) {
+	   for($i = 0; $i < $K; $i++ ) {
+	       for( $j = $i+1; $j < $K; $j++ ) {
+		   my $dij = $dmat[$i][$j];
+		   if( ! defined $min ||
+		       $dij <= $min) {
+		       if( defined $min &&
+			   $min == $dij ) { 
+			   push @mins, [$i,$j];
+		       } else { 
+			   @mins = [ $i,$j ];
+			   $min = $dij;
+		       }
+		   }
+	       }
+	   }
+       }
+       # randomly break ties
+       ($x,$y) = @{ $mins[int(rand(scalar @mins))] };   
+
+       # now we are going to join clusters x and y, make a new cluster
+
+       my $node = Bio::Tree::Node->new();   
+       my @subids;
+       for my $cid ( $x,$y ) {
+	   my $nid = $clusters[$cid]->{'id'};
+	   if( ! defined $nodes[$nid] ) {
+	       $nodes[$nid] = Bio::Tree::Node->new(-id => $names[$nid]);
+	   }
+	   $nodes[$nid]->branch_length
+	       (sprintf($precisionstr,$min/2 - $clusters[$cid]->{'height'}));
+	   $node->add_Descendent($nodes[$nid]);
+	   push @subids, @{ $clusters[$cid]->{'contains'} };
+       }
+       my $cluster = { 'id'       => $c++,
+		       'height'   => $min / 2,
+		       'contains' => [@subids],
+		   };
+
+       $K--; # we are going to drop the last node so go ahead and decrement K
+       $nodes[$cluster->{'id'}] = $node;
+       if ( $y != $K ) {
+	   $clusters[$y] = $clusters[$K];
+	   $dmat[$y] = $dmat[$K];
+	   for ( $i = 0; $i < $K; $i++ ) {
+	       $dmat[$i][$y] = $dmat[$y][$i];
+	   }
+       }
+       delete $clusters[$K];
+       $clusters[$x] = $cluster;
+       # now recalculate @dmat
+       for( $i = 0; $i < $K; $i++ ) {	   
+	   if( $i != $x) {
+	       $dmat[$i][$x] = $dmat[$x][$i] = 
+		   &_upgma_distance($clusters[$i],$clusters[$x],\@orig);
+	   } else { 
+	       $dmat[$i][$i] = 0;
+	   }
+       }
+       # reset so next loop iteration
+       # we will find minimum distance
+       @mins = ();
+       $min = undef;
+   }
+   Bio::Tree::Tree->new(-root => $nodes[-1]);
+}
+
+# calculate avg distance between clusters - be they
+# single sequences or the combination of multiple seqences
+# $cluster_i and $cluster_j are the clusters to operate on
+# and $distances is a matrix (arrayref of arrayrefs) of pairwise 
+# differences indexed on the sequence ids - 
+# so $distances->[0][1] is the distance between sequences 0 and 1
+
+sub _upgma_distance { 
+    my ($cluster_i, $cluster_j, $distances) = @_;
+    my $ilen = scalar @{ $cluster_i->{'contains'} };
+    my $jlen = scalar @{ $cluster_j->{'contains'} };
+    my ($d,$count);
+    for( my $i = 0; $i < $ilen; $i++ ) {
+	my $i_id = $cluster_i->{'contains'}->[$i];
+	for( my $j = 0; $j < $jlen; $j++) {	    
+	    my $j_id = $cluster_j->{'contains'}->[$j];
+	    if( ! defined $distances->[$i_id][$j_id] ) {
+		warn("no value for $i_id $j_id\n");
+	    } else { 
+		$d += $distances->[$i_id][$j_id];
+	    }
+	    $count++;
+	}
+    }
+    return $d / $count;
+}
+
+=head2 method
+
+ Title   : method
+ Usage   : $obj->method($newval)
+ Function: 
+ Example : 
+ Returns : value of method (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub method{
+    my $self = shift;
+    return $self->{'_method'} = shift if @_;
+    return $self->{'_method'};
+}
+
+
+=head2 check_additivity
+
+ Title     : check_additivity
+ Usage     : if( $distance->check_additivity($matrix) ) {
+             }
+ Function  : See if matrix obeys additivity principal
+ Returns   : boolean
+ Args      : Bio::Matrix::MatrixI 
+ References: Based on a Java implementation by
+             Peter Sestoft, sestoft at dina.kvl.dk 1999-12-07 version 0.3
+             http://www.dina.kvl.dk/~sestoft/bsa.html
+             which in turn is based on algorithms described in 
+             R. Durbin, S. Eddy, A. Krogh, G. Mitchison. 
+             Biological Sequence Analysis CUP 1998, Chapter 7.
+
+=cut
+
+sub check_additivity{
+   my ($self,$matrix) = @_;
+   my @names = $matrix->column_names;
+   my $len = scalar @names;
+   return unless $len >= 4;
+   # look at all sets of 4
+   for( my $i = 0; $i < $len; $i++ ) { 
+       for( my $j = $i+1; $j< $len; $j++) {
+	   for( my $k = $j+1; $k < $len; $k ++ ) {
+	       for( my $m = $k +1; $m < $len; $m++ ) {
+		   my $DijDkm = $matrix->get_entry($names[$i],$names[$j]) + 
+		       $matrix->get_entry($names[$k],$names[$m]);
+		   my $DikDjm = $matrix->get_entry($names[$i],$names[$k]) + 
+		       $matrix->get_entry($names[$j],$names[$m]);
+		   my $DimDjk = $matrix->get_entry($names[$i],$names[$m]) + 
+		       $matrix->get_entry($names[$j],$names[$k]);
+		   if( !( ( $DijDkm == $DikDjm && $DijDkm >= $DimDjk)
+			  || ( $DijDkm == $DimDjk && $DijDkm >= $DikDjm)
+			  || ( $DikDjm == $DimDjk && $DikDjm >= $DijDkm) )) {
+		       return 0;
+		   }
+	       }
+	   }
+       } 
+   }
+   return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Draw/Cladogram.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Draw/Cladogram.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Draw/Cladogram.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,543 @@
+# Cladogram.pm,v 1.8 2005/09/04 07:35:05 valiente Exp
+#
+# BioPerl module for Cladogram
+#
+# Cared for by Gabriel Valiente <valiente at lsi.upc.edu>
+#
+# Copyright Gabriel Valiente
+#
+# You may distribute this module under the same terms as Perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::Draw::Cladogram - Drawing phylogenetic trees in
+Encapsulated PostScript (EPS) format.
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::Draw::Cladogram;
+  use Bio::TreeIO;
+  my $treeio = new Bio::TreeIO('-format' => 'newick',
+  			       '-file'   => 'input.nwk');
+  my $t1 = $treeio->next_tree;
+  my $t2 = $treeio->next_tree;
+
+  my $obj1 = new Bio::Tree::Draw::Cladogram(-tree => $t1);
+  $obj1->print(-file => 'cladogram.eps');
+
+  if ($t2) {
+    my $obj2 = new Bio::Tree::Draw::Cladogram(-tree => $t1, -second => $t2);
+    $obj2->print(-file => 'tanglegram.eps');
+  }
+
+=head1 DESCRIPTION
+
+Bio::Tree::Draw::Cladogram is a Perl tool for drawing Bio::Tree::Tree
+objects in Encapsulated PostScript (EPS) format. It can be utilized
+both for displaying a single phylogenetic tree (a cladogram) and for
+the comparative display of two phylogenetic trees (a tanglegram) such
+as a gene tree and a species tree, a host tree and a parasite tree,
+two alternative trees for the same set of taxa, or two alternative
+trees for overlapping sets of taxa.
+
+Phylogenetic trees are drawn as rectangular cladograms, with
+horizontal orientation and ancestral nodes centered over their
+descendents. The font used for taxa is Courier at 10 pt. A single
+Bio::Tree::Tree object is drawn with ancestors to the left and taxa
+flushed to the right. Two Bio::Tree::Tree objects are drawn with the
+first tree oriented left-to-right and the second tree oriented
+right-to-left, and with corresponding taxa connected by straight lines
+in a shade of gray. Each correspondence between a $taxon1 of the first
+tree and a $taxon2 of the second tree is established by setting
+$taxon1-E<gt>add_tag_value('connection',$taxon2). Thus, a taxon of the
+first tree can be connected to more than one taxon of the second tree,
+and vice versa.
+
+The branch from the parent to a child $node, as well as the child
+label, can be colored by setting $node-E<gt>add_tag_value('Rcolor',$r),
+$node-E<gt>add_tag_value('Gcolor',$g), and
+$node-E<gt>add_tag_value('Bcolor',$b), where $r, $g, and $b are the
+desired values for red, green, and blue (zero for lowest, one for
+highest intensity).
+
+This is a preliminary release of Bio::Tree::Draw::Cladogram. Future
+improvements include an option to output phylograms instead of
+cladograms. Beware that cladograms are automatically scaled according
+to branch lengths, but the current release has only been tested with
+trees having unit branch lengths.
+
+The print method could be extended to output graphic formats other
+than EPS, although there are many graphics conversion programs around
+that accept EPS input. For instance, most Linux distributions include
+epstopdf, a Perl script that together with Ghostscript, converts EPS
+to PDF.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Gabriel Valiente
+
+Email valiente at lsi.upc.edu
+
+Code for coloring branches contributed by Georgii A Bazykin
+(gbazykin at princeton.edu).
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tree::Draw::Cladogram;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use PostScript::TextBlock;
+
+use base qw(Bio::Root::Root);
+
+# The following private package variables are set by the new method
+# and used by the print method.
+
+my %xx;        # horizontal coordinate for each node
+my %yy;        # vertical coordinate for each node
+my $t1;        # first Bio::Tree::Tree object
+my $t2;        # second Bio::Tree::Tree object
+my $font;      # font name
+my $size;      # font size
+my $width;     # total drawing width
+my $height;    # total drawing height
+my $xstep;     # branch length in drawing
+my $tip;       # extra space between tip and label
+my $tipwidth1; # width of longest label among $t1 taxa
+my $tipwidth2; # width of longest label among $t2 taxa
+my $compact;   # whether or not to ignore branch lengths
+my $ratio;     # horizontal to vertical ratio
+my $colors;    # use colors to color edges
+my %Rcolor;    # red color for each node
+my %Gcolor;    # green color for each node
+my %Bcolor;    # blue color for each node
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::Draw::Cladogram();
+ Function: Builds a new Bio::Tree::Draw::Cladogram object 
+ Returns : Bio::Tree::Draw::Cladogram
+ Args    : -tree => Bio::Tree::Tree object
+           -second => Bio::Tree::Tree object (optional)
+           -font => font name [string] (optional)
+           -size => font size [integer] (optional)
+           -top => top margin [integer] (optional)
+           -bottom => bottom margin [integer] (optional)
+           -left => left margin [integer] (optional)
+           -right => right margin [integer] (optional)
+           -tip => extra tip space [integer] (optional)
+           -column => extra space between cladograms [integer] (optional)
+           -compact => ignore branch lengths [boolean] (optional)
+           -ratio => horizontal to vertical ratio [integer] (optional)
+           -colors => use colors to color edges [boolean] (optional)
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  ($t1, $t2, $font, $size, my $top, my $bottom, my $left, my $right,
+    $tip, my $column, $compact, $ratio, $colors) = $self->_rearrange([qw(TREE
+    SECOND FONT SIZE TOP BOTTOM LEFT RIGHT TIP COLUMN
+    COMPACT RATIO COLORS INHERIT)], @args);
+  $font ||= "Helvetica-Narrow";
+  $size ||= 12;
+  $top ||= 10;
+  $bottom ||= 10;
+  $left ||= 10;
+  $right ||= 10;
+  $tip ||= 5;
+  $column ||= 60;
+  $compact ||= 0;
+  $ratio ||= 1 / 1.6180339887;
+  $colors ||= 0;
+
+  # Roughly, a cladogram is set according to the following parameters.
+
+  #################################
+  #                           # T #   $top (T, top margin)
+  #        +---------+ XXX    #   #   $bottom (B, bottom margin)
+  #        |                  #   #   $left (L, left margin)
+  #        |                  #   #   $right (R, right margin)
+  #   +----+                  #   #   $tip (X, extra tip space)
+  #        |    +----+ XXXX   #   #   $width (total drawing width)
+  #        |    |             #   #   $height (total drawing height)
+  #        +----+             # Y #   $xstep (S, stem length)
+  #             |             #   #   $ystep (Y, space between taxa)
+  #             +----+ XX     #   #   $tiplen (string length of longest name)
+  #                           # B #   $tipwidth (N, size of longest name)
+  #################################
+  # L         S       X  N  R #
+  #############################
+
+  # A tanglegram is roughly set as follows. The only additional
+  # parameter is $column (C, length of connection lines between taxa
+  # of the two trees), but $tip occurs four times, and $tiplen and
+  # $tipwidth differ for the first and the second tree.
+
+  ###########################################################
+  #                                                         #
+  #        +---------+ XXX  ----- XXXXXX +----+             #
+  #        |                                  |             #
+  #        |                                  +----+        #
+  #   +----+                                  |    |        #
+  #        |    +----+ XXXX -----    XXX +----+    |        #
+  #        |    |                                  +----+   #
+  #        +----+                                  |        #
+  #             |                                  |        #
+  #             +----+ XX   -----   XXXX +---------+        #
+  #                                                         #
+  ###########################################################
+  # L                 X    X  C  X      X                 R #
+  ###########################################################
+
+  # An alternative would be to let the user set $width and $height in
+  # points and to scale down everything to fit the desired
+  # dimensions. However, the final EPS can later be scaled down to any
+  # desired size anyway.
+
+  my @taxa1 = $t1->get_leaf_nodes;
+  my $root1 = $t1->get_root_node;
+
+  $tipwidth1 = 0;
+  foreach my $taxon (@taxa1) {
+    my $w = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
+    if ($w > $tipwidth1) { $tipwidth1 = $w; }
+  }
+
+  my @taxa2;
+  my $root2;
+
+  my $ystep = 20;
+
+  if ($t2) {
+    @taxa2 = $t2->get_leaf_nodes;
+    $root2 = $t2->get_root_node;
+    $tipwidth2 = 0;
+    foreach my $taxon (@taxa2) {
+      my $w = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
+      if ($w > $tipwidth2) { $tipwidth2 = $w; }
+    }
+  }
+
+  my $stems = $root1->height + 1;
+  if ($t2) { $stems += $root2->height + 1; }
+  my $labels = $tipwidth1;
+  if ($t2) { $labels += $tipwidth2; }
+  $xstep = 20;
+  $width = $left + $stems * $xstep + $tip + $labels + $right;
+  if ($t2) { $width += $tip + $column + $tip + $tip; }
+  $height = $bottom + $ystep * (@taxa1 - 1) + $top;
+  if ($t2) {
+    if ( scalar(@taxa2) > scalar(@taxa1) ) {
+      $height = $bottom + $ystep * (@taxa2 - 1) + $top;
+    }
+  }
+  my $ystep1 = $height / scalar(@taxa1);
+  my $ystep2;
+  if ($t2) {
+    $ystep2 = $height / scalar(@taxa2);
+  }
+
+  my $x = $left + $xstep * ($root1->height + 1) + $tip;
+  my $y = $bottom;
+
+  for my $taxon (reverse @taxa1) {
+    $xx{$taxon} = $x - $tip;
+    $yy{$taxon} = $y;
+    $y += $ystep1;
+  }
+  $x -= $xstep;
+
+  my @stack;
+  my @queue; # postorder traversal
+  push @stack, $t1->get_root_node;
+  while (@stack) {
+    my $node = pop @stack;
+    push @queue, $node;
+    foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
+      push @stack, $child;
+    }
+  }
+  @queue = reverse @queue;
+
+  for my $node (@queue) {
+    if (!$node->is_Leaf) {
+      my @children = $node->each_Descendent;
+      my $child = shift @children;
+      my $xmin = $xx{$child};
+      my $ymin = my $ymax = $yy{$child};
+      foreach $child (@children) {
+	$xmin = $xx{$child} if $xx{$child} < $xmin;
+	$ymax = $yy{$child} if $yy{$child} > $ymax;
+	$ymin = $yy{$child} if $yy{$child} < $ymin;
+      }
+      $xx{$node} = $xmin - $xstep;
+      $yy{$node} = ($ymin + $ymax)/2;
+    }
+  }
+
+  $xx{$t1->get_root_node} = $left + $xstep;
+
+  my @preorder = $t1->get_nodes(-order => 'depth');
+
+  for my $node (@preorder) {
+    #print "\n$node";
+    if ($colors) {
+      if ($node->has_tag('Rcolor')) {
+        $Rcolor{$node} = $node->get_tag_values('Rcolor')
+      } else {
+        $Rcolor{$node} = 0
+      }
+      if ($node->has_tag('Gcolor')) {
+        $Gcolor{$node} = $node->get_tag_values('Gcolor')
+      } else {
+        $Gcolor{$node} = 0
+      }
+      if ($node->has_tag('Bcolor')) {
+        $Bcolor{$node} = $node->get_tag_values('Bcolor')
+      } else {
+        $Bcolor{$node} = 0
+      }
+      #print "\t$Rcolor{$node}\t$Gcolor{$node}\t$Bcolor{$node}";
+    }
+  }
+
+  if ($compact) { # ragged right, ignoring branch lengths
+
+    $width = 0;
+    shift @preorder; # skip root
+    for my $node (@preorder) {
+      $xx{$node} = $xx{$node->ancestor} + $xstep;
+      $width = $xx{$node} if $xx{$node} > $width;
+    }
+    $width += $tip + $tipwidth1 + $right;
+
+  } else { # set to aspect ratio and use branch lengths if available
+
+    my $total_height = (scalar($t1->get_leaf_nodes) - 1) * $ystep;
+    my $scale_factor = $total_height * $ratio / $t1->get_root_node->height;    
+
+    $width = $t1->get_root_node->height * $scale_factor;
+    $width += $left + $xstep;
+    $width += $tip + $tipwidth1 + $right;
+
+    shift @preorder; # skip root
+    for my $node (@preorder) {
+      my $bl = $node->branch_length;
+      $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
+      $xx{$node} = $xx{$node->ancestor} + $bl * $scale_factor;
+    }
+
+  }
+
+  if ($t2) {
+
+    $x = $left + $xstep * ($root1->height + 1) + $tip;
+    $x += $tipwidth1 + $tip + $column + $tip;
+    my $y = $bottom;
+
+    for my $taxon (reverse @taxa2) {
+      $xx{$taxon} = $x + $tipwidth2 + $tip;
+      $yy{$taxon} = $y;
+      $y += $ystep2;
+    }
+    $x += $xstep;
+
+    my @stack;
+    my @queue; # postorder traversal
+    push @stack, $t2->get_root_node;
+    while (@stack) {
+      my $node = pop @stack;
+      push @queue, $node;
+      foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
+	push @stack, $child;
+      }
+    }
+    @queue = reverse @queue;
+
+    for my $node (@queue) {
+      if (!$node->is_Leaf) {
+	my @children = $node->each_Descendent;
+	my $child = shift @children;
+	my $xmax = $xx{$child};
+	my $ymin = my $ymax = $yy{$child};
+	foreach $child (@children) {
+	  $xmax = $xx{$child} if $xx{$child} > $xmax;
+	  $ymax = $yy{$child} if $yy{$child} > $ymax;
+	  $ymin = $yy{$child} if $yy{$child} < $ymin;
+	}
+	$xx{$node} = $xmax + $xstep;
+	$yy{$node} = ($ymin + $ymax)/2;
+      }
+    }
+
+  }
+
+  return $self;
+}
+
+=head2 print
+
+ Title   : print
+ Usage   : $obj->print();
+ Function: Outputs $obj in Encapsulated PostScript (EPS) format 
+ Returns : 
+ Args    : -file => filename (optional)
+
+=cut
+
+sub print {
+  my($self, at args) = @_;
+
+  my ($file) = $self->_rearrange([qw(FILE)], @args);
+  $file ||= "output.eps"; # stdout
+
+  open(my $INFO,">", $file);
+  print $INFO "%!PS-Adobe-\n";
+  print $INFO "%%BoundingBox: 0 0 ", $width, " ", $height, "\n";
+  print $INFO "1 setlinewidth\n";
+  print $INFO "/$font findfont\n";
+  print $INFO "$size scalefont\n";
+  print $INFO "setfont\n";
+
+  # taxa labels are centered to 1/3 the font size
+
+  for my $taxon (reverse $t1->get_leaf_nodes) {
+    if ($colors) {
+      print $INFO $Rcolor{$taxon}, " ", $Gcolor{$taxon}, " ", $Bcolor{$taxon}, " setrgbcolor\n";
+    }
+    print $INFO $xx{$taxon} + $tip, " ", $yy{$taxon} - $size / 3, " moveto\n";
+    print $INFO "(", $taxon->id, ") show\n";
+  }
+
+  my $root1 = $t1->get_root_node;
+  for my $node ($t1->get_nodes) {
+    if ($node->ancestor) {
+      # print $xx{$node->ancestor}, " ", $yy{$node->ancestor}, " moveto\n";
+      # print $xx{$node}, " ", $yy{$node}, " lineto\n";
+      if ($colors) {
+	print $INFO "stroke\n";
+	print $INFO $Rcolor{$node->ancestor}, " ", $Gcolor{$node->ancestor}, " ", $Bcolor{$node->ancestor}, " setrgbcolor\n";
+      }
+      print $INFO $xx{$node}, " ", $yy{$node}, " moveto\n";
+      print $INFO $xx{$node->ancestor}, " ", $yy{$node}, " lineto\n";
+      print $INFO $xx{$node->ancestor}, " ", $yy{$node->ancestor}, " lineto\n";
+    }
+  }
+  my $ymin = $yy{$root1};
+  my $ymax = $yy{$root1};
+  foreach my $child ($root1->each_Descendent) {
+    $ymax = $yy{$child} if $yy{$child} > $ymax;
+    $ymin = $yy{$child} if $yy{$child} < $ymin;
+  }
+  my $zz = ($ymin + $ymax)/2;
+  if ($colors) {
+    print $INFO "stroke\n";
+    print $INFO $Rcolor{$root1}, " ", $Gcolor{$root1}, " ", $Bcolor{$root1}, " setrgbcolor\n";
+  }
+  print $INFO $xx{$root1}, " ", $zz, " moveto\n";
+  print $INFO $xx{$root1} - $xstep, " ", $zz, " lineto\n";
+
+  if ($t2) {
+
+    for my $taxon (reverse $t2->get_leaf_nodes) {
+      my $tiplen2 = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
+      print $INFO $xx{$taxon} - $tiplen2 - $tip, " ",
+        $yy{$taxon} - $size / 3, " moveto\n";
+      printf $INFO "(%s) show\n", $taxon->id;
+    }
+
+    for my $node ($t2->get_nodes) {
+      if ($node->ancestor) {
+        print $INFO $xx{$node}, " ", $yy{$node}, " moveto\n";
+        print $INFO $xx{$node->ancestor}, " ", $yy{$node}, " lineto\n";
+        print $INFO $xx{$node->ancestor}, " ",
+          $yy{$node->ancestor}, " lineto\n";
+      }
+    }
+
+    my $root2 = $t2->get_root_node;
+    my $ymin = $yy{$root2};
+    my $ymax = $yy{$root2};
+    foreach my $child2 ($root2->each_Descendent) {
+      $ymax = $yy{$child2} if $yy{$child2} > $ymax;
+      $ymin = $yy{$child2} if $yy{$child2} < $ymin;
+    }
+    my $zz = ($ymin + $ymax)/2;
+    print $INFO $xx{$root2}, " ", $zz, " moveto\n";
+    print $INFO $xx{$root2} + $xstep, " ", $zz, " lineto\n";
+
+    my @taxa1 = $t1->get_leaf_nodes;
+    my @taxa2 = $t2->get_leaf_nodes;
+
+    # set default connection between $t1 and $t2 taxa, unless
+    # overridden by the user (the latter not implemented yet)
+
+    foreach my $taxon1 (@taxa1) {
+      foreach my $taxon2 (@taxa2) {
+	if ($taxon1->id eq $taxon2->id) {
+	  $taxon1->add_tag_value('connection',$taxon2);
+	  last;
+	}
+      }
+    }
+
+    # draw connection lines between $t1 and $t2 taxa
+
+    print $INFO "stroke\n";
+    print $INFO "0.5 setgray\n";
+
+    foreach my $taxon1 (@taxa1) {
+      my @match = $taxon1->get_tag_values('connection');
+      foreach my $taxon2 (@match) {
+	my $x0 = $xx{$taxon1} + $tip
+	  + PostScript::Metrics::stringwidth($taxon1->id,$font,$size) + $tip;
+	my $x1 = $xx{$taxon1} + $tip + $tipwidth1 + $tip;
+        my $y1 = $yy{$taxon1};
+        my $x2 = $xx{$taxon2} - $tip - $tipwidth2 - $tip;
+        my $x3 = $xx{$taxon2} - $tip
+	  - PostScript::Metrics::stringwidth($taxon2->id,$font,$size) - $tip;
+        my $y2 = $yy{$taxon2};
+        print $INFO $x0, " ", $y1, " moveto\n";
+        print $INFO $x1, " ", $y1, " lineto\n";
+        print $INFO $x2, " ", $y2, " lineto\n";
+        print $INFO $x3, " ", $y2, " lineto\n";
+      }
+    }
+
+  }
+
+  print $INFO "stroke\n";
+  print $INFO "showpage\n";
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Node.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Node.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Node.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,726 @@
+# $Id: Node.pm,v 1.49.4.3 2006/11/27 15:46:48 sendu Exp $
+#
+# BioPerl module for Bio::Tree::Node
+#
+# Cared for by Jason Stajich <jason-at-bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::Node - A Simple Tree Node
+
+=head1 SYNOPSIS
+
+    use Bio::Tree::Node;
+    my $nodeA = new Bio::Tree::Node();
+    my $nodeL = new Bio::Tree::Node();
+    my $nodeR = new Bio::Tree::Node();
+
+    my $node = new Bio::Tree::Node();
+    $node->add_Descendent($nodeL);
+    $node->add_Descendent($nodeR);
+
+    print "node is not a leaf \n" if( $node->is_leaf);
+
+=head1 DESCRIPTION
+
+Makes a Tree Node suitable for building a Tree.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Aaron Mackey, amackey-at-virginia-dot-edu
+Sendu Bala,   bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tree::Node;
+use vars qw($CREATIONORDER);
+use strict;
+
+use Scalar::Util qw(weaken isweak);
+
+use base qw(Bio::Root::Root Bio::Tree::NodeI);
+
+BEGIN { 
+    $CREATIONORDER = 0;
+}
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::Node();
+ Function: Builds a new Bio::Tree::Node object
+ Returns : Bio::Tree::Node
+ Args    : -descendents   => arrayref of descendents (they will be
+                             updated s.t. their ancestor point is this
+                             node)
+           -branch_length => branch length [integer] (optional)
+           -bootstrap     => value   bootstrap value (string)
+           -description   => description of node
+           -id            => human readable id for node
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($children, $branchlen,$id,
+      $bootstrap, $desc,$d) = $self->_rearrange([qw(
+						    DESCENDENTS
+						    BRANCH_LENGTH
+						    ID
+						    BOOTSTRAP
+						    DESC
+						    DESCRIPTION
+						 )],
+					     @args);
+  $self->_register_for_cleanup(\&node_cleanup);
+  $self->{'_desc'} = {}; # for descendents
+  if( defined $d && defined $desc ) { 
+      $self->warn("can only accept -desc or -description, not both, accepting -description");
+      $desc = $d;
+  } elsif( defined $d && ! defined $desc ) {
+      $desc = $d;
+  }
+  defined $desc && $self->description($desc);
+  defined $bootstrap && $self->bootstrap($bootstrap);
+  defined $id && $self->id($id);
+  defined $branchlen && $self->branch_length($branchlen);
+  if( defined $children ) {
+      if( ref($children) !~ /ARRAY/i ) {
+	  $self->warn("Must specify a valid ARRAY reference to initialize a Node's Descendents");
+      }
+      foreach my $c ( @$children ) { 	
+	  $self->add_Descendent($c);
+      }
+  }
+  $self->_creation_id($CREATIONORDER++);
+  return $self;
+}
+
+=head2 add_Descendent
+
+ Title   : add_Descendent
+ Usage   : $node->add_Descendant($node);
+ Function: Adds a descendent to a node
+ Returns : number of current descendents for this node
+ Args    : Bio::Node::NodeI
+           boolean flag, true if you want to ignore the fact that you are
+           adding a second node with the same unique id (typically memory 
+           location reference in this implementation).  default is false and 
+           will throw an error if you try and overwrite an existing node.
+
+=cut
+
+sub add_Descendent{
+   my ($self,$node,$ignoreoverwrite) = @_;
+   return -1 if( ! defined $node );
+   
+   if( ! ref($node) ||
+       ref($node) =~ /HASH/ ||
+       ! $node->isa('Bio::Tree::NodeI') ) {
+       $self->throw("Trying to add a Descendent who is not a Bio::Tree::NodeI");
+       return -1;
+   }
+   
+   $self->{_adding_descendent} = 1;
+   # avoid infinite recurse
+   $node->ancestor($self) unless $node->{_setting_ancestor}; 
+   $self->{_adding_descendent} = 0;
+   
+   if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
+       $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
+   }
+   $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
+   
+   $self->invalidate_height();
+   
+   return scalar keys %{$self->{'_desc'}};
+}
+
+=head2 each_Descendent
+
+ Title   : each_Descendent($sortby)
+ Usage   : my @nodes = $node->each_Descendent;
+ Function: all the descendents for this Node (but not their descendents
+					      i.e. not a recursive fetchall)
+ Returns : Array of Bio::Tree::NodeI objects
+ Args    : $sortby [optional] "height", "creation", "alpha", "revalpha",
+           or coderef to be used to sort the order of children nodes.
+
+=cut
+
+sub each_Descendent{
+   my ($self, $sortby) = @_;
+
+   # order can be based on branch length (and sub branchlength)   
+   $sortby ||= 'none';
+   if (ref $sortby eq 'CODE') {
+       my @values = sort { $sortby->($a,$b) } values %{$self->{'_desc'}};
+       return @values;
+   } elsif ($sortby eq 'height') {
+       return map { $_->[0] }
+       sort { $a->[1] <=> $b->[1] || 
+		  $a->[2] <=> $b->[2] } 
+       map { [$_, $_->height, $_->internal_id ] } 
+       values %{$self->{'_desc'}};
+   } elsif( $sortby eq 'alpha' ) {
+       my @set;
+       for my $v ( values %{$self->{'_desc'}} ) {
+	   unless( $v->is_Leaf ) {
+	       my @lst = ( sort { $a cmp $b } map { $_->id } 
+                          grep { $_->is_Leaf } 
+			   $v->get_all_Descendents($sortby));
+	       push @set, [$v, $lst[0], $v->internal_id];
+	   } else {
+	       push @set, [$v, $v->id, $v->internal_id];
+	   }
+       } 
+       return map { $_->[0] }
+       sort {$a->[1] cmp $b->[1] || $a->[2] <=> $b->[2] } @set;       
+   } elsif( $sortby eq 'revalpha' ) {
+       my @set;
+       for my $v ( values %{$self->{'_desc'}} ) {
+	   if( ! defined $v->id && 
+	       ! $v->is_Leaf ) {
+	       my ($l) = ( sort { $b cmp $a } map { $_->id } 
+			   grep { $_->is_Leaf } 
+			   $v->get_all_Descendents($sortby));
+	       push @set, [$v, $l, $v->internal_id];
+	   } else { 
+	       push @set, [$v, $v->id, $v->internal_id];
+	   }
+       } 
+       return map { $_->[0] }
+       sort {$b->[1] cmp $a->[1] || $b->[2] <=> $a->[2] } @set;
+   } else { # creation
+       return map { $_->[0] }
+       sort { $a->[1] <=> $b->[1] } 
+       map { [$_, $_->internal_id ] }
+       values %{$self->{'_desc'}};	   
+   }
+}
+
+=head2 remove_Descendent
+
+ Title   : remove_Descendent
+ Usage   : $node->remove_Descedent($node_foo);
+ Function: Removes a specific node from being a Descendent of this node
+ Returns : nothing
+ Args    : An array of Bio::Node::NodeI objects which have been previously
+           passed to the add_Descendent call of this object.
+
+=cut
+
+sub remove_Descendent{
+   my ($self, at nodes) = @_;
+   my $c= 0;
+   foreach my $n ( @nodes ) { 
+       if( $self->{'_desc'}->{$n->internal_id} ) {
+        $self->{_removing_descendent} = 1;
+        $n->ancestor(undef);
+        $self->{_removing_descendent} = 0;
+	   # should be redundant
+	   $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
+	   delete $self->{'_desc'}->{$n->internal_id};
+	   $c++;
+       } else { 
+	   if( $self->verbose ) {
+	       $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
+	       $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
+	   }
+       }
+   }
+   
+   # remove unecessary nodes if we have removed the part 
+   # which branches.
+   my $a1 = $self->ancestor;   
+   if( $a1 ) {
+       my $bl = $self->branch_length || 0;
+       my @d = $self->each_Descendent;
+       if (scalar @d == 1) {
+	   $d[0]->branch_length($bl + ($d[0]->branch_length || 0));
+	   $a1->add_Descendent($d[0]);
+	   $a1->remove_Descendent($self);
+       }
+   }
+   $c;
+}
+
+=head2 remove_all_Descendents
+
+ Title   : remove_all_Descendents
+ Usage   : $node->remove_All_Descendents()
+ Function: Cleanup the node's reference to descendents and reset
+           their ancestor pointers to undef, if you don't have a reference
+           to these objects after this call they will be cleaned up - so
+           a get_nodes from the Tree object would be a safe thing to do first
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub remove_all_Descendents{
+   my ($self) = @_;
+   # this won't cleanup the nodes themselves if you also have
+   # a copy/pointer of them (I think)...
+   while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
+       $val->ancestor(undef);
+   }
+   $self->{'_desc'} = {};
+   1;
+}
+
+=head2 get_all_Descendents
+
+ Title   : get_all_Descendents
+ Usage   : my @nodes = $node->get_all_Descendents;
+ Function: Recursively fetch all the nodes and their descendents
+           *NOTE* This is different from each_Descendent
+ Returns : Array or Bio::Tree::NodeI objects
+ Args    : none
+
+=cut
+
+# get_all_Descendents implemented in the interface 
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : $obj->ancestor($newval)
+ Function: Set the Ancestor
+ Returns : ancestral node
+ Args    : newvalue (optional)
+
+=cut
+
+sub ancestor {
+    my $self = shift;
+    if (@_) {
+        my $new_ancestor = shift;
+        
+        # we can set ancestor to undef
+        if ($new_ancestor) {
+            $self->throw("This is [$new_ancestor], not a Bio::Tree::NodeI")
+		unless $new_ancestor->isa('Bio::Tree::NodeI');
+        }
+        
+        my $old_ancestor = $self->{'_ancestor'} || '';
+        if (!$old_ancestor || 
+	    ($old_ancestor && ( !$new_ancestor || 
+			       $new_ancestor ne $old_ancestor)) ) {
+            if( $old_ancestor && ! $old_ancestor->{_removing_descendent}) {
+		$old_ancestor->remove_Descendent($self);
+	    }
+            if ($new_ancestor && 
+		! $new_ancestor->{_adding_descendent} ) { # avoid infinite recurse
+                $self->{_setting_ancestor} = 1;
+                $new_ancestor->add_Descendent($self, 1);
+                $self->{_setting_ancestor} = 0;
+            }
+        }
+        weaken($self->{'_ancestor'} = $new_ancestor);
+    }
+    
+    return $self->{'_ancestor'};
+}
+
+=head2 branch_length
+
+ Title   : branch_length
+ Usage   : $obj->branch_length()
+ Function: Get/Set the branch length
+ Returns : value of branch_length
+ Args    : newvalue (optional)
+
+=cut
+
+sub branch_length{
+    my $self = shift;
+    if( @_ ) {
+	my $bl = shift;
+	if( defined $bl &&
+	    $bl =~ s/\[(\d+)\]// ) {
+	    $self->bootstrap($1);
+	}
+	$self->{'_branch_length'} = $bl;
+    $self->invalidate_height();
+    }
+    return $self->{'_branch_length'};
+}
+
+=head2 bootstrap
+
+ Title   : bootstrap
+ Usage   : $obj->bootstrap($newval)
+ Function: Get/Set the bootstrap value
+ Returns : value of bootstrap
+ Args    : newvalue (optional)
+
+=cut
+
+sub bootstrap { 
+    my $self = shift;
+    if( @_ ) {
+	if( $self->has_tag('B') ) {
+	    $self->remove_tag('B');
+	}
+	$self->add_tag_value('B',shift);
+    }
+    return ($self->get_tag_values('B'))[0];
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: Get/Set the description string
+ Returns : value of description
+ Args    : newvalue (optional)
+
+=cut
+
+sub description {
+    my $self = shift;
+    $self->{'_description'} = shift @_ if @_;
+    return $self->{'_description'};
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id($newval)
+ Function: The human readable identifier for the node 
+ Returns : value of human readable id
+ Args    : newvalue (optional)
+
+"A name can be any string of printable characters except blanks,
+colons, semicolons, parentheses, and square brackets. Because you may
+want to include a blank in a name, it is assumed that an underscore
+character ("_") stands for a blank; any of these in a name will be
+converted to a blank when it is read in."  
+
+from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
+
+Also note that these objects now support spaces, ();: because we can
+automatically quote the strings if they contain these characters.  The
+L<id_output> method does this for you so use the id() method to get
+the raw string while L<id_output> to get the pre-escaped string.
+
+=cut
+
+sub id {
+    my ($self, $value) = @_;
+    if (defined $value) {
+        #$self->warn("Illegal characters ();:  and space in the id [$value], converting to _ ")
+	# if $value =~ /\(\);:/ and $self->verbose >= 0;
+        #$value =~ s/[\(\);:\s]/_/g;
+        $self->{'_id'} = $value;
+    }
+    return $self->{'_id'};
+}
+
+=head2 Helper Functions
+
+=cut
+
+=head2 id_output
+
+ Title   : id_output
+ Usage   : my $id = $node->id_output;
+ Function: Return an id suitable for output in format like newick
+           so that if it contains spaces or ():; characters it is properly 
+           quoted
+ Returns : $id string if $node->id has a value
+ Args    : none
+
+=cut
+
+# implemented in NodeI interface 
+
+=head2 internal_id
+
+ Title   : internal_id
+ Usage   : my $internalid = $node->internal_id
+ Function: Returns the internal unique id for this Node
+           (a monotonically increasing number for this in-memory implementation
+            but could be a database determined unique id in other 
+	    implementations)
+ Returns : unique id
+ Args    : none
+
+=cut
+
+sub internal_id {
+   return $_[0]->_creation_id;
+}
+
+=head2 _creation_id
+
+ Title   : _creation_id
+ Usage   : $obj->_creation_id($newval)
+ Function: a private method signifying the internal creation order
+ Returns : value of _creation_id
+ Args    : newvalue (optional)
+
+=cut
+
+sub _creation_id {
+    my $self = shift @_;
+    $self->{'_creation_id'} = shift @_ if( @_);
+    return $self->{'_creation_id'} || 0;
+}
+
+=head2 Bio::Node::NodeI decorated interface implemented
+
+The following methods are implemented by L<Bio::Node::NodeI> decorated
+interface.
+
+=head2 is_Leaf
+
+ Title   : is_Leaf
+ Usage   : if( $node->is_Leaf )
+ Function: Get Leaf status
+ Returns : boolean
+ Args    : none
+
+=cut
+
+sub is_Leaf {
+    my ($self) = @_;
+    my $isleaf = ! (defined $self->{'_desc'} &&
+		 (keys %{$self->{'_desc'}} > 0) );
+    return $isleaf;
+}
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : my $str = $node->to_string()
+ Function: For debugging, provide a node as a string
+ Returns : string
+ Args    : none
+
+=head2 height
+
+ Title   : height
+ Usage   : my $len = $node->height
+ Function: Returns the height of the tree starting at this
+           node.  Height is the maximum branchlength to get to the tip.
+ Returns : The longest length (weighting branches with branch_length) to a leaf
+ Args    : none
+
+=cut
+
+sub height { 
+    my ($self) = @_;
+    return $self->{'_height'} if( defined $self->{'_height'} );
+    
+    return 0 if( $self->is_Leaf );
+    my $max = 0;
+    foreach my $subnode ( $self->each_Descendent ) { 
+	my $bl = $subnode->branch_length;
+	$bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
+	my $s = $subnode->height + $bl;
+	if( $s > $max ) { $max = $s; }
+    }
+    return ($self->{'_height'} = $max);
+}
+
+=head2 invalidate_height
+
+ Title   : invalidate_height
+ Usage   : private helper method
+ Function: Invalidate our cached value of the node height in the tree
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub invalidate_height { 
+    my ($self) = @_;
+    
+    $self->{'_height'} = undef;
+    if( defined $self->ancestor ) {
+	$self->ancestor->invalidate_height;
+    }
+}
+
+=head2 add_tag_value
+
+ Title   : add_tag_value
+ Usage   : $node->add_tag_value($tag,$value)
+ Function: Adds a tag value to a node 
+ Returns : number of values stored for this tag
+ Args    : $tag   - tag name
+           $value - value to store for the tag
+
+=cut
+
+sub add_tag_value{
+    my ($self,$tag,$value) = @_;
+    if( ! defined $tag || ! defined $value ) {
+	$self->warn("cannot call add_tag_value with an undefined value");
+    }
+    push @{$self->{'_tags'}->{$tag}}, $value;
+    return scalar @{$self->{'_tags'}->{$tag}};
+}
+
+=head2 remove_tag
+
+ Title   : remove_tag
+ Usage   : $node->remove_tag($tag)
+ Function: Remove the tag and all values for this tag
+ Returns : boolean representing success (0 if tag does not exist)
+ Args    : $tag - tagname to remove
+
+
+=cut
+
+sub remove_tag {
+   my ($self,$tag) = @_;
+   if( exists $self->{'_tags'}->{$tag} ) {
+       $self->{'_tags'}->{$tag} = undef;
+       delete $self->{'_tags'}->{$tag};
+       return 1;
+   }
+   return 0;
+}
+
+=head2 remove_all_tags
+
+ Title   : remove_all_tags
+ Usage   : $node->remove_all_tags()
+ Function: Removes all tags 
+ Returns : None
+ Args    : None
+
+=cut
+
+sub remove_all_tags{
+   my ($self) = @_;
+   $self->{'_tags'} = {};
+   return;
+}
+
+=head2 get_all_tags
+
+ Title   : get_all_tags
+ Usage   : my @tags = $node->get_all_tags()
+ Function: Gets all the tag names for this Node
+ Returns : Array of tagnames
+ Args    : None
+
+=cut
+
+sub get_all_tags{
+   my ($self) = @_;
+   my @tags = sort keys %{$self->{'_tags'} || {}};
+   return @tags;
+}
+
+=head2 get_tag_values
+
+ Title   : get_tag_values
+ Usage   : my @values = $node->get_tag_value($tag)
+ Function: Gets the values for given tag ($tag)
+ Returns : Array of values or empty list if tag does not exist
+ Args    : $tag - tag name
+
+=cut
+
+sub get_tag_values{
+   my ($self,$tag) = @_;
+   return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
+                     (@{$self->{'_tags'}->{$tag} || []})[0];
+}
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $node->has_tag($tag)
+ Function: Boolean test if tag exists in the Node
+ Returns : Boolean
+ Args    : $tag - tagname
+
+=cut
+
+sub has_tag {
+   my ($self,$tag) = @_;
+   return exists $self->{'_tags'}->{$tag};
+}
+
+sub node_cleanup {
+    my $self = shift;
+    return unless defined $self;
+    
+    #*** below is wrong, cleanup doesn't actually occur. Will replace with:
+    # $self->remove_all_Descendents; once further fixes in place..
+    if( defined $self->{'_desc'} &&
+        ref($self->{'_desc'}) =~ /ARRAY/i ) {
+        while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
+            $node->ancestor(undef); # insure no circular references
+            $node = undef;
+        }
+    }
+    $self->{'_desc'} = {};
+}
+
+=head2 reverse_edge
+
+ Title   : reverse_edge
+ Usage   : $node->reverse_edge(child);
+ Function: makes child be a parent of node
+ Requires: child must be a direct descendent of node
+ Returns : 1 on success, 0 on failure
+ Args    : Bio::Tree::NodeI that is in the tree
+
+=cut
+
+sub reverse_edge {
+    my ($self,$node) = @_;
+    if( $self->delete_edge($node) ) {
+      $node->add_Descendent($self);
+      return 1;
+    } 
+    return 0;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,518 @@
+# $Id: NodeI.pm,v 1.35.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::NodeI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::NodeI - Interface describing a Tree Node
+
+=head1 SYNOPSIS
+
+    # get a Tree::NodeI somehow
+    # like from a TreeIO
+    use Bio::TreeIO;
+    # read in a clustalw NJ in phylip/newick format
+    my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'file.dnd');
+
+    my $tree = $treeio->next_tree; # we'll assume it worked for demo purposes
+                                   # you might want to test that it was defined
+
+    my $rootnode = $tree->get_root_node;
+
+    # process just the next generation
+    foreach my $node ( $rootnode->each_Descendent() ) {
+	print "branch len is ", $node->branch_length, "\n";
+    }
+
+    # process all the children
+    my $example_leaf_node;
+    foreach my $node ( $rootnode->get_all_Descendents() ) {
+	if( $node->is_Leaf ) { 
+	    print "node is a leaf ... "; 
+            # for example use below
+            $example_leaf_node = $node unless defined $example_leaf_node; 
+	}
+	print "branch len is ", $node->branch_length, "\n";
+    }
+
+    # The ancestor() method points to the parent of a node
+    # A node can only have one parent
+
+    my $parent = $example_leaf_node->ancestor;
+
+    # parent won't likely have an description because it is an internal node
+    # but child will because it is a leaf
+
+    print "Parent id: ", $parent->id," child id: ", 
+          $example_leaf_node->id, "\n";
+
+
+=head1 DESCRIPTION
+
+A NodeI is capable of the basic structure of building a tree and
+storing the branch length between nodes.  The branch length is the
+length of the branch between the node and its ancestor, thus a root
+node in a Tree will not typically have a valid branch length.
+
+Various implementations of NodeI may extend the basic functions and
+allow storing of other information (like attatching a species object
+or full sequences used to build a tree or alternative sequences).  If
+you don't know how to extend a Bioperl object please ask, happy to
+help, we would also greatly appreciate contributions with improvements
+or extensions of the objects back to the Bioperl code base so that
+others don't have to reinvent your ideas.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Aaron Mackey amackey at virginia.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tree::NodeI;
+use strict;
+no warnings 'recursion';
+
+use base qw(Bio::Root::RootI);
+
+=head2 add_Descendent
+
+ Title   : add_Descendent
+ Usage   : $node->add_Descendant($node);
+ Function: Adds a descendent to a node
+ Returns : number of current descendents for this node
+ Args    : Bio::Node::NodeI
+
+
+=cut
+
+sub add_Descendent{
+   my ($self, at args) = @_;
+
+   $self->throw_not_implemented();
+}
+
+
+=head2 each_Descendent
+
+ Title   : each_Descendent
+ Usage   : my @nodes = $node->each_Descendent;
+ Function: all the descendents for this Node (but not their descendents 
+					      i.e. not a recursive fetchall)
+ Returns : Array of Bio::Tree::NodeI objects
+ Args    : none
+
+=cut
+
+sub each_Descendent{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 Decorated Interface methods
+
+=cut
+
+=head2 get_all_Descendents
+
+ Title   : get_all_Descendents($sortby)
+ Usage   : my @nodes = $node->get_all_Descendents;
+ Function: Recursively fetch all the nodes and their descendents
+           *NOTE* This is different from each_Descendent
+ Returns : Array or Bio::Tree::NodeI objects
+ Args    : $sortby [optional] "height", "creation", "alpha", "revalpha", 
+           or a coderef to be used to sort the order of children nodes.
+
+=cut
+
+sub get_all_Descendents{
+   my ($self, $sortby) = @_;
+   $sortby ||= 'none';   
+   my @nodes;
+   foreach my $node ( $self->each_Descendent($sortby) ) {
+       push @nodes, ($node,$node->get_all_Descendents($sortby));
+   }
+   return @nodes;
+}
+
+*get_Descendents = \&get_all_Descendents;
+
+=head2 is_Leaf
+
+ Title   : is_Leaf
+ Usage   : if( $node->is_Leaf ) 
+ Function: Get Leaf status
+ Returns : boolean
+ Args    : none
+
+=cut
+
+sub is_Leaf{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 descendent_count
+
+ Title   : descendent_count
+ Usage   : my $count = $node->descendent_count;
+ Function: Counts the number of descendents a node has 
+           (and all of their subnodes)
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub descendent_count{
+   my ($self) = @_;
+   my $count = 0;
+   
+   foreach my $node ( $self->each_Descendent ) { 
+       $count += 1;
+       $node->can('descendent_count') ? $count += $node->descendent_count : next;
+   }
+   return $count;
+}
+
+=head2 to_string
+
+ Title   : to_string
+ Usage   : my $str = $node->to_string()
+ Function: For debugging, provide a node as a string
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub to_string{
+   my ($self) = @_;
+   return join('',defined $self->id_output ? $self->id_output : '',
+		  defined $self->branch_length ? ':' . $self->branch_length 
+		  : ' ')
+}
+
+=head2 height
+
+ Title   : height
+ Usage   : my $len = $node->height
+ Function: Returns the height of the tree starting at this
+           node.  Height is the maximum branchlength to get to the tip.
+ Returns : The longest length (weighting branches with branch_length) to a leaf
+ Args    : none
+
+=cut
+
+sub height{
+    my ($self) = @_;
+
+    return 0 if( $self->is_Leaf );
+    
+    my $max = 0;
+    foreach my $subnode ( $self->each_Descendent ) { 
+	my $s = $subnode->height + $subnode->branch_length;;
+	if( $s > $max ) { $max = $s; }
+    }
+    return $max;
+}
+
+=head2 depth
+
+ Title   : depth
+ Usage   : my $len = $node->depth
+ Function: Returns the depth of the tree starting at this
+           node.  Depth is the distance from this node to the root.
+ Returns : The branch length to the root.
+ Args    : none
+
+=cut
+
+sub depth{
+   my ($self) = @_;
+   
+   my $depth = 0;
+   my $node = $self;
+   while( defined $node->ancestor ) { 
+       $depth += $node->branch_length;
+       $node = $node->ancestor;
+   }
+   return $depth;
+}
+
+=head2 Get/Set methods
+
+=cut
+
+=head2 branch_length
+
+ Title   : branch_length
+ Usage   : $obj->branch_length()
+ Function: Get/Set the branch length
+ Returns : value of branch_length
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub branch_length{
+    my ($self)= @_;
+    $self->throw_not_implemented();
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id($newval)
+ Function: The human readable identifier for the node 
+ Returns : value of human readable id
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub id{
+    my ($self)= @_;
+    $self->throw_not_implemented();
+}
+
+=head2 internal_id
+
+ Title   : internal_id
+ Usage   : my $internalid = $node->internal_id
+ Function: Returns the internal unique id for this Node
+ Returns : unique id
+ Args    : none
+
+=cut
+
+sub internal_id{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description($newval)
+ Function: Get/Set the description string
+ Returns : value of description
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub description{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 bootstrap
+
+ Title   : bootstrap
+ Usage   : $obj->bootstrap($newval)
+ Function: Get/Set the bootstrap value
+ Returns : value of bootstrap
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub bootstrap{
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 ancestor
+
+ Title   : ancestor
+ Usage   : my $node = $node->ancestor;
+ Function: Get/Set the ancestor node pointer for a Node
+ Returns : Null if this is top level node
+ Args    : none
+
+=cut
+
+
+sub ancestor{
+   my ($self, at args) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 invalidate_height
+
+ Title   : invalidate_height
+ Usage   : private helper method
+ Function: Invalidate our cached value of the node height in the tree
+ Returns : nothing
+ Args    : none
+
+=cut
+
+sub invalidate_height { 
+    shift->throw_not_implemented();
+}
+
+=head2 Methods for associating Tag/Values with a Node
+
+These methods associate tag/value pairs with a Node
+
+=head2 add_tag_value
+
+ Title   : add_tag_value
+ Usage   : $node->add_tag_value($tag,$value)
+ Function: Adds a tag value to a node 
+ Returns : number of values stored for this tag
+ Args    : $tag   - tag name
+           $value - value to store for the tag
+
+
+=cut
+
+sub add_tag_value{
+    shift->throw_not_implemented();
+}
+
+=head2 remove_tag
+
+ Title   : remove_tag
+ Usage   : $node->remove_tag($tag)
+ Function: Remove the tag and all values for this tag
+ Returns : boolean representing success (0 if tag does not exist)
+ Args    : $tag - tagname to remove
+
+
+=cut
+
+sub remove_tag {
+    shift->throw_not_implemented();
+}
+
+=head2 remove_all_tags
+
+ Title   : remove_all_tags
+ Usage   : $node->remove_all_tags()
+ Function: Removes all tags 
+ Returns : None
+ Args    : None
+
+
+=cut
+
+sub remove_all_tags{
+    shift->throw_not_implemented();  
+}
+
+=head2 get_all_tags
+
+ Title   : get_all_tags
+ Usage   : my @tags = $node->get_all_tags()
+ Function: Gets all the tag names for this Node
+ Returns : Array of tagnames
+ Args    : None
+
+
+=cut
+
+sub get_all_tags {
+    shift->throw_not_implemented();
+}
+
+=head2 get_tag_values
+
+ Title   : get_tag_values
+ Usage   : my @values = $node->get_tag_value($tag)
+ Function: Gets the values for given tag ($tag)
+ Returns : Array of values or empty list if tag does not exist
+ Args    : $tag - tag name
+
+
+=cut
+
+sub get_tag_values{
+    shift->throw_not_implemented();
+}
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $node->has_tag($tag)
+ Function: Boolean test if tag exists in the Node
+ Returns : Boolean
+ Args    : $tag - tagname
+
+
+=cut
+
+sub has_tag{
+    shift->throw_not_implemented();
+}
+
+
+=head2 Helper Functions
+
+=cut
+
+=head2 id_output
+
+ Title   : id_output
+ Usage   : my $id = $node->id_output;
+ Function: Return an id suitable for output in format like newick
+           so that if it contains spaces or ():; characters it is properly 
+           quoted
+ Returns : $id string if $node->id has a value
+ Args    : none
+
+
+=cut
+
+sub id_output{
+    my $node = shift;
+    my $id = $node->id;
+    return unless( defined $id && length($id ) );
+    # single quotes must become double quotes
+    # $id =~ s/'/''/g;
+    if( $id =~ /[\(\);:,\s]/ ) {
+	$id = '"'.$id.'"';
+    }
+    return $id;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeNHX.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeNHX.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/NodeNHX.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,173 @@
+# $Id: NodeNHX.pm,v 1.10.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::NodeNHX
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::NodeNHX - A Simple Tree Node with support for NHX tags
+
+=head1 SYNOPSIS
+
+    use Bio::Tree::NodeNHX;
+    my $nodeA = new Bio::Tree::NodeNHX();
+    my $nodeL = new Bio::Tree::NodeNHX();
+    my $nodeR = new Bio::Tree::NodeNHX();
+
+    my $node = new Bio::Tree::NodeNHX();
+    $node->add_Descendents($nodeL);
+    $node->add_Descendents($nodeR);
+
+    print "node is not a leaf \n" if( $node->is_leaf);
+
+=head1 DESCRIPTION
+
+Makes a Tree Node with NHX tags, suitable for building a Tree.  See
+L<Bio::Tree::Node> for a full list of functionality.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey at virginia.edu
+
+=head1 CONTRIBUTORS
+
+The NHX (New Hampshire eXtended) format was created by Chris Zmasek,
+and is described at:
+
+  http://www.genetics.wustl.edu/eddy/forester/NHX.html
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tree::NodeNHX;
+use strict;
+
+
+use base qw(Bio::Tree::Node);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::NodeNHX();
+ Function: Builds a new Bio::Tree::NodeNHX object
+ Returns : Bio::Tree::NodeNHX
+ Args    : -left          => pointer to Left descendent (optional)
+           -right         => pointer to Right descenent (optional)
+	   -branch_length => branch length [integer] (optional)
+           -bootstrap     => bootstrap value (string)
+           -description   => description of node
+           -id            => unique id for node
+           -nhx           => hashref of NHX tags and values
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($nhx) = $self->_rearrange([qw(NHX)], @args);
+  $self->nhx_tag($nhx);
+  return $self;
+}
+
+sub DESTROY {
+    my ($self) = @_;
+    # try to insure that everything is cleaned up
+    $self->SUPER::DESTROY();
+    if( defined $self->{'_desc'} &&
+	ref($self->{'_desc'}) =~ /ARRAY/i ) {
+	while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
+	    $node->{'_ancestor'} = undef; # insure no circular references
+	    $node->DESTROY();
+	    $node = undef;
+	}
+	$self->{'_desc'} = {};
+    }
+}
+
+sub to_string{
+   my ($self) = @_;
+   my @tags = $self->get_all_tags;
+   my $tagstr = '';
+   if( @tags ) {
+       $tagstr = '[' . join(":", "&&NHX", 
+			    map { "$_=" .join(',',
+					      $self->get_tag_values($_))}
+			    @tags ) . ']';
+   }
+   return sprintf("%s%s%s",
+		  defined $self->id ? $self->id : '',
+		  defined $self->branch_length ? ':' . 
+		  $self->branch_length : ' ',
+		  $tagstr);
+}
+
+=head2 nhx_tag
+
+ Title   : nhx_tag
+ Usage   : my $tag = $nodenhx->nhx_tag(%tags);
+ Function: Set tag-value pairs for NHX nodes
+ Returns : none
+ Args    : hashref to update the tags/value pairs
+           OR 
+           with a scalar value update the bootstrap value by default
+
+
+=cut
+
+sub nhx_tag {
+    my ($self, $tags) = @_;
+    if (defined $tags && (ref($tags) =~ /HASH/i)) {
+	while( my ($tag,$val) = each %$tags ) {
+	    if( ref($val) =~ /ARRAY/i ) {
+		for my $v ( @$val ) {
+		    $self->add_tag_value($tag,$v);
+		}
+	    } else {
+		$self->add_tag_value($tag,$val);
+	    }
+	}
+	if (exists $tags->{'B'}) {
+	    $self->bootstrap($tags->{'B'});
+	}
+    } elsif (defined $tags and ! ref ($tags)) {
+	print STDERR "here with $tags\n";
+        # bootstrap by default
+	$self->bootstrap($tags);
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/RandomFactory.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/RandomFactory.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/RandomFactory.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,534 @@
+# $Id: RandomFactory.pm,v 1.18.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::RandomFactory
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::RandomFactory
+  my @taxonnames;
+  my $factory = new Bio::Tree::RandomFactory( -taxa => \@taxonnames,
+  					      -maxcount => 10);
+
+  # or for anonymous samples
+
+  my $factory = new Bio::Tree::RandomFactory( -num_taxa => 6,
+					      -maxcount => 50);
+
+
+  my $tree = $factory->next_tree;
+
+=head1 DESCRIPTION
+
+Builds a random tree every time next_tree is called or up to -maxcount times.
+
+This module was originally written for Coalescent simulations see
+L<Bio::PopGen::Simulation::Coalescent>.  I've left the next_tree
+method intact although it is not generating random trees in the
+phylogenetic sense.  I would be happy for someone to provide
+alternative implementations which can be used here.  As written it
+will generate random topologies but the branch lengths are built from
+assumptions in the coalescent and are not appropriate for phylogenetic
+analyses.
+
+This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
+
+Hudson, R. R. 1990. Gene genealogies and the coalescent
+       process. Pp. 1-44 in D. Futuyma and J.  Antonovics, eds. Oxford
+       surveys in evolutionary biology. Vol. 7. Oxford University
+       Press, New York
+
+Sanderson, M ... 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-AT-bioperl.org
+
+=head1 CONTRIBUTORS
+
+Matthew Hahn, E<lt>matthew.hahn at duke.eduE<gt>
+Mike Sanderson 
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tree::RandomFactory;
+use vars qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
+use strict;
+
+$PRECISION_DIGITS = 3; # Precision for the branchlength
+$DefaultNodeType = 'Bio::Tree::Node';
+%Defaults = ('YuleRate'          => 1.0, # as set by Sanderson in Rates
+	     'Speciation'        => 1.0, #
+	     'DefaultTreeMethod' => 'yule',
+	     );
+
+use Bio::Tools::RandomDistFunctions;
+use Bio::Tree::Tree;
+
+use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $factory = new Bio::Tree::RandomFactory(-samples => \@samples,
+						      -maxcount=> $N);
+ Function: Initializes a Bio::Tree::RandomFactory object
+ Returns : Bio::Tree::RandomFactory
+ Args    : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
+           -maxcount => [optional] Maximum num trees to create
+           -randtype => Type of random trees so far support
+               - yule/backward_yule/BY [default]
+               - forward_yule/FY
+               - birthdeath_forward/BDF
+               - birthdeath_backwards/BDB
+
+
+          ONE of the following must be specified
+           -taxa     => $arrayref of taxa names
+           -num_taxa => integer indicating number of taxa in the tree
+
+=cut
+
+sub new{
+   my ($class, at args) = @_;
+   my $self = $class->SUPER::new(@args);
+   
+   $self->{'_treecounter'} = 0;
+   $self->{'_maxcount'} = 0;
+   my ($nodetype,$randtype,
+       $maxcount, $samps,$samplesize,
+       $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
+						 RANDTYPE
+						 MAXCOUNT
+						 SAMPLES
+						 SAMPLE_SIZE
+						 TAXA
+						 NUM_TAXA)],
+					     @args);
+   my @taxa;
+   $nodetype ||= $DefaultNodeType;
+   $self->nodetype($nodetype);
+   $taxa = $samps if defined $samps && ! defined $taxa;
+   $num_taxa = $samplesize if $samplesize && ! $num_taxa;
+   if( ! defined $taxa ) { 
+       if( ! defined $num_taxa || $num_taxa <= 0 ) { 
+	   $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
+       }
+       foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }      
+   } else { 
+       if( ref($taxa) !~ /ARRAY/i ) { 
+	   $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
+       }
+       @taxa = @$taxa;
+   }
+   
+   $self->taxa(\@taxa);
+   defined $maxcount && $self->maxcount($maxcount);   
+   $self->{'_count'} = 0;
+   return $self;
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $factory->next_tree
+ Function: Returns a random tree based on the initialized number of nodes
+           NOTE: if maxcount is not specified on initialization or
+                 set to a valid integer, subsequent calls to next_tree will 
+                 continue to return random trees and never return undef
+
+ Returns : Bio::Tree::TreeI object
+ Args    : none
+
+=cut
+
+
+sub next_tree{
+   my ($self,%options) = @_;
+   return if $self->maxcount && 
+       $self->{'_count'}++ >= $self->maxcount;
+   my $rand_type = $options{'randtype'} || $self->random_tree_method;
+   my $nodetype = $self->nodetype;
+   my $treearray;
+
+   if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
+
+   } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
+       $treearray = $self->rand_birthdeath_backwards_tree;       
+   } elsif( $rand_type =~ /(BY|backwards_yule)/i || 
+	    $rand_type =~ /^yule/i ) {
+       my $speciation = $options{'speciation'}; # can be undef
+       $treearray = $self->rand_yule_c_tree($speciation);       
+   } else { 
+       $self->warn("unrecognized random type $rand_type");
+   }
+   
+   my @nodes = ();   
+   foreach my $n ( @$treearray ) { 
+       for my $k ( qw(desc1 desc2) ) {
+	   next unless defined $n->{$k};
+	   push @{$n->{'descendents'}}, $nodes[$n->{$k}];
+       }
+       push @nodes, 
+       $nodetype->new(-id            => $n->{'nodenum'},
+		      -branch_length => $n->{'time'},
+		      -descendents   => $n->{'descendents'},
+		      );
+   }
+   my $T = Bio::Tree::Tree->new(-root => pop @nodes );
+   return $T;
+}
+
+
+=head2 maxcount
+
+ Title   : maxcount
+ Usage   : $obj->maxcount($newval)
+ Function: 
+ Returns : Maxcount value
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub maxcount{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if( $value =~ /^(\d+)/ ) { 
+	   $self->{'_maxcount'} = $1;
+       } else { 
+	   $self->warn("Must specify a valid Positive integer to maxcount");
+	   $self->{'_maxcount'} = 0;
+       }
+  }
+   return $self->{'_maxcount'};
+}
+
+
+=head2 reset_tree_count
+
+ Title   : reset_tree_count
+ Usage   : $factory->reset_tree_count;
+ Function: Reset the tree counter
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub reset_count{
+    shift->{'_count'} = 0;
+}
+
+=head2 taxa
+
+ Title   : taxa
+ Usage   : $obj->taxa($newval)
+ Function: Set the leaf node names
+ Returns : value of taxa
+ Args    : Arrayref of Taxon names
+
+
+=cut
+
+sub taxa {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if( ref($value) !~ /ARRAY/i ) { 
+	    $self->warn("Must specify a valid array ref to the method 'taxa'");
+	    $value = [];
+	} 
+	$self->{'_taxa'} = $value;
+	$self->{'_num_taxa'} = scalar @$value;
+    }
+    return $self->{'_taxa'};
+
+}
+
+=head2 num_taxa
+
+ Title   : num_taxa
+ Usage   : $obj->num_taxa($newval)
+ Function: Get the number of Taxa
+ Returns : value of num_taxa
+ Args    : none
+
+
+=cut
+
+sub num_taxa {
+    my ($self) = @_;
+    return  $self->{'_num_taxa'};
+}
+
+# alias old methods
+*num_samples = \&num_taxa;
+*samples = \&taxa;
+
+=head2 random
+
+ Title   : random
+ Usage   : my $rfloat = $node->random($size)
+ Function: Generates a random number between 0 and $size
+           This is abstracted so that someone can override and provide their
+           own special RNG.  This is expected to be a uniform RNG.
+ Returns : Floating point random
+ Args    : $maximum size for random number (defaults to 1)
+
+
+=cut
+
+sub random{
+   my ($self,$max) = @_;
+   return rand($max);
+}
+
+
+=head2 random_tree_method
+
+ Title   : random_tree_method
+ Usage   : $obj->random_tree_method($newval)
+ Function: 
+ Example : 
+ Returns : value of random_tree_method (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub random_tree_method{
+    my $self = shift;
+
+    return $self->{'random_tree_method'} = shift if @_;
+    return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
+}
+
+=head2 nodetype
+
+ Title   : nodetype
+ Usage   : $obj->nodetype($newval)
+ Function: 
+ Example : 
+ Returns : value of nodetype (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub nodetype{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       eval "require $value";
+       if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self). 
+			       "'$value'");}
+       
+       my $a = bless {},$value;
+       unless( $a->isa('Bio::Tree::NodeI')  ) {
+	   $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
+       }
+      $self->{'nodetype'} = $value;
+    }
+    return $self->{'nodetype'};
+}
+
+
+# The assignment of times are based on Mike Sanderson's r8s code
+# The topology assignment code is based on Richard Hudson's
+# make_trees
+
+
+sub rand_yule_c_tree {
+    my ($self,$speciation) = @_;
+    $speciation ||= $Defaults{'Speciation'};
+    my $n_taxa = $self->num_taxa;
+    my $taxa = $self->taxa || [];
+    my $nodetype = $self->nodetype;
+  
+    my $randfuncs = Bio::Tools::RandomDistFunctions->new();
+    my $rate = $Defaults{'YuleRate'};
+    my (@tree, at list, at times,$i,$in);
+    my $max = 2 * $n_taxa - 1;
+    for($in=0;$in < $max; $in++ ) { 
+	push @tree, { 'nodenum' => "Node$in" };
+    }
+    # setup leaf nodes
+    for($in=0;$in < $n_taxa;$in++)  {
+	$tree[$in]->{'time'} = 0;
+	$tree[$in]->{'desc1'} = undef;
+	$tree[$in]->{'desc2'} = undef;
+	if( my $r = $taxa->[$in] ) { 
+	    $tree[$in]->{'nodenum'} = $r;
+	}
+	push @list, $in;
+    }
+    
+    for( $i = 0; $i < $n_taxa - 1; $i++ ) {
+	# draw random interval times
+	push @times, $randfuncs->rand_birth_distribution($speciation);
+    }
+    # sort smallest to largest
+    @times = sort {$a <=> $b} @times;
+     # topology generation
+    for ($in = $n_taxa; $in > 1; $in-- ) {
+	my $time = shift @times;
+	
+	my $pick = int $self->random($in);    
+	my $nodeindex = $list[$pick];
+	$tree[$list[$pick]]->{'time'} = $time;
+	my $swap = 2 * $n_taxa - $in;
+	$tree[$swap]->{'desc1'} = $nodeindex;	
+	$list[$pick] = $list[$in-1];       
+	
+	$pick = int rand($in - 1);    
+	$nodeindex = $list[$pick];
+	$tree[$list[$pick]]->{'time'} = $time;
+	$tree[$swap]->{'desc2'} = $nodeindex;	
+	$list[$pick] = $swap;	
+    }
+    $tree[-1]->{'time'} = shift @times;
+    return \@tree;
+}
+
+
+
+sub rand_birthdeath_backwards_tree {
+    my ($self) = @_;
+    my $n_taxa = $self->num_taxa;
+    my $taxa = $self->taxa || [];
+  
+    my $randfuncs = Bio::Tools::RandomDistFunctions->new();
+    my $rate = $Defaults{'YuleRate'};
+    my (@tree, at list, at times,$i,$in);
+    my $max = 2 * $n_taxa - 1;
+    for($in=0;$in < $max; $in++ ) { 
+	push @tree, { 'nodenum' => "Node$in" };
+    }
+    # setup leaf nodes
+    for($in=0;$in < $n_taxa;$in++)  {
+	$tree[$in]->{'time'} = 0;
+	$tree[$in]->{'desc1'} = undef;
+	$tree[$in]->{'desc2'} = undef;
+	if( my $r = $taxa->[$in] ) { 
+	    # deal with pre-labeled nodes
+	    $tree[$in]->{'nodenum'} = $r;
+	}
+	push @list, $in;
+    }
+    my ($time) = (0);
+
+     # topology generation
+    for ($in = $n_taxa; $in > 1; $in-- ) {
+	my $pick = int $self->random($in);    
+	my $nodeindex = $list[$pick];
+	my $swap = 2 * $n_taxa - $in;
+	$time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
+	$tree[$list[$pick]]->{'time'} = $time;
+	$tree[$swap]->{'desc1'} = $nodeindex;	
+	$list[$pick] = $list[$in-1];       
+	
+	$pick = int rand($in - 1);    
+	$nodeindex = $list[$pick];
+	$tree[$list[$pick]]->{'time'} = $time;
+	$tree[$swap]->{'desc2'} = $nodeindex;	
+	$list[$pick] = $swap;	
+    }
+    my $root = $tree[-1];
+    $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
+    $root->{'time'} = $time;
+
+    # Normalize times by the root node...
+    for my $node ( @tree ) {
+	$node->{'time'} /= $root->{'time'};
+    }
+    return \@tree;
+}
+
+
+# The assignment of times are based on Mike Sanderson's r8s code
+# The topology assignment code is based on Richard Hudson's
+# make_trees
+
+sub rand_birth_death_tree {
+# Still need to finish
+#     my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
+#     my $n_taxa =  $self->num_taxa;
+#     my $dt = 0.1 / $n_taxa;
+#     my @tree;
+#     my $max = 3 * $n_taxa - 1;
+#     # setup leaf nodes
+    
+#     for($in=0;$in < $size;$in++)  {
+# 	push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
+# 		      'time'    => 0,
+# 		      'desc1'   => undef,
+# 		      'desc2'   => undef, 
+# 		  };
+#     }
+#     my $time = $dt;
+#     my $idx = 0;
+#     while( $n_taxa > 1 ) { 	
+# 	if ( event($dt * $spec_rate, $n_taxa) ) {
+# 	    my $pick = int $self->random($n_taxa);
+# 	    my $pick2 = int $self->random($n_taxa);
+# 	    while( $pick2 == $pick ) {
+# 		$pick2 = int $self->random($n_taxa);
+# 	    }
+	    # to finish....
+	    
+# 	    $tree[$swap]->{'desc1'} = $nodeindex;		    
+# 	}
+#     }
+
+	    
+
+# 	$list[$pick] = $list[$in-1];       
+	
+# 	$pick = int rand($in - 1);    
+# 	$nodeindex = $list[$pick];
+# 	$tree[$swap]->{'desc2'} = $nodeindex;	
+# 	$list[$pick] = $swap;	
+# 	$tree[$swap]->{'time'} = $times[$ix++];
+#     }
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Statistics.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Statistics.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Statistics.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+# $Id: Statistics.pm,v 1.12.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::Statistics
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::Statistics - Calculate certain statistics for a Tree
+
+=head1 SYNOPSIS
+
+  use Bio::Tree::Statistics;
+
+
+=head1 DESCRIPTION
+
+This should be where Tree statistics are calculated.  It was
+previously where statistics from a Coalescent simulation.  Currently
+it is empty because we have not added any Tree specific statistic
+calculations to this module yet.  We welcome any contributions.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+none so far
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tree::Statistics;
+use strict;
+
+
+use base qw(Bio::Root::Root);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::Statistics();
+ Function: Builds a new Bio::Tree::Statistics object 
+ Returns : Bio::Tree::Statistics
+ Args    :
+
+
+=cut
+
+
+=head2 assess_bootstrap
+
+ Title   : assess_bootstrap
+ Usage   : my $tree_with_bs = $stats->assess_bootstrap(\@bs_trees);
+ Function: Calculates the bootstrap for internal nodes based on
+ Returns : L<Bio::Tree::TreeI>
+ Args    : Arrayref of L<Bio::Tree::TreeI>s
+
+
+=cut
+
+sub assess_bootstrap{
+   my ($self,$bs_trees,$guide_tree) = @_;
+   my @consensus;
+
+   # internal nodes are defined by their children
+   
+   my (%lookup,%internal);
+   my $i = 0;
+   for my $tree ( $guide_tree, @$bs_trees ) {
+       # Do this as a top down approach, can probably be
+       # improved by caching internal node states, but not going
+       # to worry about it right now.
+       
+       my @allnodes = $tree->get_nodes;
+       my @internalnodes = grep { ! $_->is_Leaf } @allnodes;
+       for my $node ( @internalnodes ) {
+	   my @tips = sort map { $_->id } 
+	              grep { $_->is_Leaf() } $node->get_all_Descendents;
+	   my $id = "(".join(",", @tips).")";
+	   if( $i == 0 ) {
+	       $internal{$id} = $node->internal_id;
+	   } else { 
+	       $lookup{$id}++;
+	   }
+       }
+       $i++;
+   }
+   my @save;
+   for my $l ( keys %lookup ) {
+       if( defined $internal{$l} ) {#&& $lookup{$l} > $min_seen ) {
+	   my $intnode = $guide_tree->find_node(-internal_id => $internal{$l});
+	   $intnode->bootstrap(sprintf("%d",100 * $lookup{$l} / $i));
+       }
+   }
+   return $guide_tree;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Tree.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Tree.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/Tree.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,327 @@
+# $Id: Tree.pm,v 1.21.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::Tree
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::Tree - An Implementation of TreeI interface.
+
+=head1 SYNOPSIS
+
+    # like from a TreeIO
+    my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
+    my $tree = $treeio->next_tree;
+    my @nodes = $tree->get_nodes;
+    my $root = $tree->get_root_node;
+
+
+=head1 DESCRIPTION
+
+This object holds handles to Nodes which make up a tree.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Aaron Mackey amackey at virginia.edu
+Sendu Bala   bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tree::Tree;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::Tree::Tree();
+ Function: Builds a new Bio::Tree::Tree object 
+ Returns : Bio::Tree::Tree
+ Args    : -root     => L<Bio::Tree::NodeI> object which is the root
+             OR
+           -node     => L<Bio::Tree::NodeI> object from which the root will be
+                        determined
+
+           -nodelete => boolean, whether or not to try and cleanup all
+                                 the nodes when this this tree goes out
+                                 of scope.
+           -id       => optional tree ID
+           -score    => optional tree score value
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+  
+  my $self = $class->SUPER::new(@args);
+  $self->{'_rootnode'} = undef;
+  $self->{'_maxbranchlen'} = 0;
+  $self->_register_for_cleanup(\&cleanup_tree);
+  my ($root,$node,$nodel,$id,$score)= $self->_rearrange([qw(ROOT NODE NODELETE 
+						      ID SCORE)], @args);
+  
+  if ($node && ! $root) {
+    $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI');
+    my @lineage = $self->get_lineage_nodes($node);
+    $root = shift(@lineage) || $node;
+    
+    # to stop us pulling in entire database of a Bio::Taxon when we later do
+    # get_nodes() or similar, specifically set ancestor() for each node
+    if ($node->isa('Bio::Taxon')) {
+      push(@lineage, $node) unless $node eq $root;
+      my $ancestor = $root;
+      foreach my $lineage_node (@lineage) {
+        $lineage_node->ancestor($ancestor);
+      } continue { $ancestor = $lineage_node; }
+    }
+  }
+  if ($root) {
+    $self->set_root_node($root);
+  }
+  
+  $self->nodelete($nodel || 0);
+  $self->id($id)       if defined $id;
+  $self->score($score) if defined $score;
+  return $self;
+}
+
+
+=head2 nodelete
+
+ Title   : nodelete
+ Usage   : $obj->nodelete($newval)
+ Function: Get/Set Boolean whether or not to delete the underlying
+           nodes when it goes out of scope.  By default this is false
+           meaning trees are cleaned up.
+ Returns : boolean
+ Args    : on set, new boolean value
+
+
+=cut
+
+sub nodelete{
+    my $self = shift;
+    return $self->{'nodelete'} = shift if @_;
+    return $self->{'nodelete'};
+}
+
+=head2 get_nodes
+
+ Title   : get_nodes
+ Usage   : my @nodes = $tree->get_nodes()
+ Function: Return list of Tree::NodeI objects
+ Returns : array of Tree::NodeI objects
+ Args    : (named values) hash with one value 
+           order => 'b|breadth' first order or 'd|depth' first order
+
+=cut
+
+sub get_nodes{
+   my ($self, @args) = @_;
+   
+   my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)], at args);
+   $order ||= 'depth';
+   $sortby ||= 'none';
+   my $node = $self->get_root_node || return;
+   if ($order =~ m/^b|(breadth)$/oi) {
+       my @children = ($node);
+       for (@children) {
+        push @children, $_->each_Descendent($sortby);
+       }
+       return @children;
+   }
+
+   if ($order =~ m/^d|(depth)$/oi) {
+       # this is depth-first search I believe
+       my @children = ($node,$node->get_all_Descendents($sortby));
+       return @children;
+   }
+}
+
+=head2 get_root_node
+
+ Title   : get_root_node
+ Usage   : my $node = $tree->get_root_node();
+ Function: Get the Top Node in the tree, in this implementation
+           Trees only have one top node.
+ Returns : Bio::Tree::NodeI object
+ Args    : none
+
+=cut
+
+
+sub get_root_node{
+   my ($self) = @_;
+   return $self->{'_rootnode'};
+}
+
+=head2 set_root_node
+
+ Title   : set_root_node
+ Usage   : $tree->set_root_node($node)
+ Function: Set the Root Node for the Tree
+ Returns : Bio::Tree::NodeI
+ Args    : Bio::Tree::NodeI
+
+=cut
+
+sub set_root_node{
+   my $self = shift;
+   if( @_ ) { 
+       my $value = shift;
+       if( defined $value && 
+	   ! $value->isa('Bio::Tree::NodeI') ) { 
+	   $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI");
+	   return $self->get_root_node;
+       }
+       $self->{'_rootnode'} = $value;
+   } 
+   return $self->get_root_node;
+}
+
+=head2 total_branch_length
+
+ Title   : total_branch_length
+ Usage   : my $size = $tree->total_branch_length
+ Function: Returns the sum of the length of all branches
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub total_branch_length {
+   my ($self) = @_;
+   my $sum = 0;
+   if( defined $self->get_root_node ) {
+       for ( $self->get_root_node->get_all_Descendents('none') ) {
+	   $sum += $_->branch_length || 0;
+       }
+   }
+   return $sum;
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : my $id = $tree->id();
+ Function: An id value for the tree
+ Returns : scalar
+ Args    : [optional] new value to set
+
+
+=cut
+
+sub id{
+   my ($self,$val) = @_;
+   if( defined $val ) { 
+       $self->{'_treeid'} = $val;
+   }
+   return $self->{'_treeid'};
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $obj->score($newval)
+ Function: Sets the associated score with this tree
+           This is a generic slot which is probably best used 
+           for log likelihood or other overall tree score
+ Returns : value of score
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub score{
+   my ($self,$val) = @_;
+   if( defined $val ) { 
+       $self->{'_score'} = $val;
+   }
+   return $self->{'_score'};
+}
+
+
+# decorated interface TreeI Implements this
+
+=head2 height
+
+ Title   : height
+ Usage   : my $height = $tree->height
+ Function: Gets the height of tree - this LOG_2($number_nodes)
+           WARNING: this is only true for strict binary trees.  The TreeIO
+           system is capable of building non-binary trees, for which this
+           method will currently return an incorrect value!!
+ Returns : integer
+ Args    : none
+
+=head2 number_nodes
+
+ Title   : number_nodes
+ Usage   : my $size = $tree->number_nodes
+ Function: Returns the number of nodes in the tree
+ Returns : integer
+ Args    : none
+
+
+=cut
+
+
+# -- private internal methods --
+
+sub cleanup_tree {
+    my $self = shift;
+    unless( $self->nodelete ) {
+        for my $node ($self->get_nodes(-order  => 'b', -sortby => 'none')) {
+            #$node->ancestor(undef);
+            #$node = undef;
+            $node->node_cleanup;
+            undef $node;
+        }
+    }
+    $self->{'_rootnode'} = undef;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeFunctionsI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeFunctionsI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeFunctionsI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,765 @@
+# $Id: TreeFunctionsI.pm,v 1.28.2.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::TreeFunctionsI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
+
+=head1 SYNOPSIS
+
+  use Bio::TreeIO;
+  my $in = new Bio::TreeIO(-format => 'newick', -file => 'tree.tre');
+
+  my $tree = $in->next_tree;
+
+  my @nodes = $tree->find_node('id1');
+
+  if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
+   #...
+  }
+
+=head1 DESCRIPTION
+
+This interface provides a set of implementated Tree functions which
+only use the defined methods in the TreeI or NodeI interface.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
+
+Email jason-at-bioperl-dot-org
+Email amackey-at-virginia.edu
+Email jtr4v-at-virginia.edu
+
+=head1 CONTRIBUTORS
+
+Sendu Bala, bix at sendu.me.uk
+
+Rerooting code was worked on by
+
+  Daniel Barker d.barker-at-reading.ac.uk
+  Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tree::TreeFunctionsI;
+use strict;
+
+use base qw(Bio::Tree::TreeI);
+
+=head2 find_node
+
+ Title   : find_node
+ Usage   : my @nodes = $self->find_node(-id => 'node1');
+ Function: returns all nodes that match a specific field, by default this
+           is id, but different branch_length, 
+ Returns : List of nodes which matched search
+ Args    : text string to search for
+           OR
+           -fieldname => $textstring
+
+=cut
+
+sub find_node {
+   my ($self,$type,$field) = @_;
+   if( ! defined $type ) { 
+       $self->warn("Must request a either a string or field and string when searching");
+   }
+
+   # all this work for a '-' named field
+   # is so that we could potentially 
+   # expand to other constraints in 
+   # different implementations
+   # like 'find all nodes with boostrap < XX'
+
+   if( ! defined $field ) { 
+       # only 1 argument, default to searching by id
+       $field= $type; 
+       $type = 'id';
+   } else {   
+       $type =~ s/^-//;
+   }
+
+   # could actually do this by testing $rootnode->can($type) but
+   # it is possible that a tree is implemeted with different node types
+   # - although it is unlikely that the root node would be richer than the 
+   # leaf nodes.  Can't handle NHX tags right now
+    
+   my @nodes = grep { $_->can($type) && defined $_->$type() &&
+		     $_->$type() eq $field } $self->get_nodes();
+
+   if ( wantarray) { 
+       return @nodes;
+   } else { 
+       if( @nodes > 1 ) { 
+	   $self->warn("More than 1 node found but caller requested scalar, only returning first node");
+       }
+       return shift @nodes;
+   }
+}
+
+=head2 remove_Node
+
+ Title   : remove_Node
+ Usage   : $tree->remove_Node($node)
+ Function: Removes a node from the tree
+ Returns : boolean represent status of success
+ Args    : either Bio::Tree::NodeI or string of the node id
+
+=cut
+
+sub remove_Node {
+   my ($self,$input) = @_;
+   my $node = undef;
+   unless( ref($input) ) {
+       $node = $self->find_node($input);
+   }  elsif( ! $input->isa('Bio::Tree::NodeI') ) {
+       $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
+       return 0;
+   } else { 
+       $node = $input;
+   }
+   if( ! $node->ancestor && 
+       $self->get_root_node->internal_id != $node->internal_id) {
+     $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
+   } else { 
+     $node->ancestor->remove_Descendent($node);
+   }
+}
+
+=head2 get_lineage_nodes
+
+ Title   : get_lineage_nodes
+ Usage   : my @nodes = $tree->get_lineage_nodes($node);
+ Function: Get the full lineage of a node (all its ancestors, in the order
+           root->most recent ancestor)
+ Returns : list of nodes
+ Args    : either Bio::Tree::NodeI or string of the node id
+
+=cut
+
+sub get_lineage_nodes {
+    my ($self, $input) = @_;
+    my $node;
+    unless (ref $input) {
+        $node = $self->find_node($input);
+    }
+    elsif (! $input->isa('Bio::Tree::NodeI')) {
+        $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
+        return;
+    }
+    else { 
+        $node = $input;
+    }
+    
+    # when dealing with Bio::Taxon objects with databases, the root will always
+    # be the database's root, ignoring this Tree's set root node; prefer the
+    # Tree's idea of root.
+    my $root = $self->get_root_node || '';
+    
+    my @lineage;
+    while ($node) {
+        $node = $node->ancestor || last;
+        unshift(@lineage, $node);
+        $node eq $root && last;
+    }
+    return @lineage;
+}
+
+=head2 splice
+
+ Title   : splice
+ Usage   : $tree->splice(-remove_id => \@ids);
+ Function: Remove all the nodes from a tree that correspond to the supplied
+           args, making all the descendents of a removed node the descendents
+           of the removed node's ancestor.
+           You can ask to explicitly remove certain nodes by using -remove_*,
+           remove them conditionally by using -remove_* in combination with
+           -keep_*, or remove everything except certain nodes by using only
+           -keep_*.
+ Returns : n/a
+ Args    : just a list of Bio::Tree::NodeI objects to remove, OR
+           -key => value pairs, where -key has the prefix 'remove' or 'keep',
+           followed by an underscore, followed by a fieldname (like for the
+           method find_node). Value should be a scalar or an array ref of
+           scalars (again, like you might supply to find_node).
+
+           So (-remove_id => [1, 2]) will remove all nodes from the tree that
+           have an id() of '1' or '2', while
+           (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
+           an id() of '1'.
+           (-keep_id => [2]) will remove all nodes unless they have an id() of
+           '2' (note, no -remove_*).
+
+=cut
+
+sub splice {
+    my ($self, @args) = @_;
+    $self->throw("Must supply some arguments") unless @args > 0;
+    
+    my @nodes_to_remove;
+    if (ref($args[0])) {
+        $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
+        @nodes_to_remove = @args;
+    }
+    else {
+        $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
+        my %args = @args;
+        my @keep_nodes;
+        my @remove_nodes;
+        my $remove_all = 1;
+        while (my ($key, $value) = each %args) {
+            my @values = ref($value) ? @{$value} : ($value);
+            
+            if ($key =~ s/remove_//) {
+                $remove_all = 0;
+                foreach my $value (@values) {
+                    push(@remove_nodes, $self->find_node($key => $value));
+                }
+            }
+            elsif ($key =~ s/keep_//) {
+                foreach my $value (@values) {
+                    push(@keep_nodes, $self->find_node($key => $value));
+                }
+            }
+        }
+        
+        if ($remove_all) {
+            if (@keep_nodes == 0) {
+                $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
+                return;
+            }
+            
+            @remove_nodes = $self->get_nodes;
+        }
+        
+        if (@keep_nodes > 0) {
+            my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
+            foreach my $node (@remove_nodes) {
+                push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
+            }
+        }
+        else {
+            @nodes_to_remove = @remove_nodes;
+        }
+    }
+    
+    # do the splicing
+    #*** the algorithm here hasn't really been thought through and tested much,
+    #    will probably need revising
+    my %root_descs;
+    my $reroot = 0;
+    foreach my $node (@nodes_to_remove) {
+        my @descs = $node->each_Descendent;
+        
+        my $ancestor = $node->ancestor;
+        if (! $ancestor && ! $reroot) {
+            # we're going to remove the tree root, so will have to re-root the
+            # tree later
+            $reroot = 1;
+            %root_descs = map { $_->internal_id => $_ } @descs;
+            $node->remove_all_Descendents;
+            next;
+        }
+        
+        if (exists $root_descs{$node->internal_id}) {
+            # well, this one can't be the future root anymore
+            delete $root_descs{$node->internal_id};
+            
+            # but maybe one of this one's descs will become the root
+            foreach my $desc (@descs) {
+                $root_descs{$desc->internal_id} = $desc;
+            }
+        }
+        
+        # make the ancestor of our descendents our own ancestor, and give us
+        # no ancestor of our own to remove us from the tree
+        foreach my $desc (@descs) {
+            $desc->ancestor($ancestor);
+        }
+        $node->ancestor(undef);
+    }
+    
+    if ($reroot) {
+        my @candidates = values %root_descs;
+        $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
+        $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
+        $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
+    }
+}
+
+=head2 get_lca
+
+ Title   : get_lca
+ Usage   : get_lca(-nodes => \@nodes ); OR
+           get_lca(@nodes);
+ Function: given two or more nodes, returns the lowest common ancestor (aka most
+           recent common ancestor)
+ Returns : node object or undef if there is no commen ancestor
+ Args    : -nodes => arrayref of nodes to test, OR
+           just a list of nodes
+
+=cut
+
+sub get_lca {
+    my ($self, @args) = @_;
+    my ($nodes) = $self->_rearrange([qw(NODES)], at args);
+    my @nodes;
+    if (ref($nodes) eq 'ARRAY') {
+        @nodes = @{$nodes};
+    }
+    else {
+        @nodes = @args;
+    }
+    @nodes >= 2 or $self->throw("At least 2 nodes are required");
+    
+    # We must go root->leaf to get the correct answer to lca (in a world where
+    # internal_id might not be uniquely assigned), but leaf->root is more
+    # forgiving (eg. lineages may not all have the same root, or they may have
+    # different numbers of 'minor' taxa inbeteen 'major' ones).
+    #
+    # I use root->leaf so that we can easily do multiple nodes at once - no
+    # matter what taxa are below the lca, the lca and all its ancestors ought to
+    # be identical.
+    
+    my @paths;
+    foreach my $node (@nodes) {
+        my @path = ($self->get_lineage_nodes($node), $node);
+        push(@paths, \@path);
+    }
+    return unless @paths >= 2;
+    
+    my $lca;
+    LEVEL: while ($paths[0] > 0) {
+        my %node_ids;
+        my $node;
+        foreach my $path (@paths) {
+            $node = shift(@{$path}) || last LEVEL;
+            my $node_id = $node->internal_id;
+            unless (defined $node_id) {
+                $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
+                return;
+            }
+            $node_ids{$node_id}++;
+        }
+        
+        if (keys %node_ids == 1) {
+            $lca = $node;
+        }
+        else {
+            # at this point in the lineage the nodes are different; the previous
+            # loop had the lca
+            last LEVEL;
+        }
+    }
+    
+    # If the tree that we are contains the lca (get_lca could have been called
+    # on an empty tree, since it works with plain Nodes), prefer to return the
+    # node object that belongs to us
+    if ($lca && $self->number_nodes > 0) {
+        my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
+        $lca = $own_lca if $own_lca;
+    }
+    
+    return $lca;
+}
+
+=head2 merge_lineage
+
+ Title   : merge_lineage
+ Usage   : merge_lineage($node)
+ Function: Merge a lineage of nodes with this tree.
+ Returns : n/a
+ Args    : Bio::Tree::TreeI with only one leaf, OR
+           Bio::Tree::NodeI which has an ancestor
+
+ For example, if we are the tree $tree:
+
+ +---B
+ |
+ A
+ |
+ +---C
+
+ and we want to merge the lineage $other_tree:
+
+ A---C---D
+
+ After calling $tree->merge_lineage($other_tree), $tree looks like:
+
+ +---B
+ |
+ A
+ |
+ +---C---D
+
+=cut
+
+sub merge_lineage {
+    my ($self, $thing) = @_;
+    $self->throw("Must supply an object reference") unless ref($thing);
+    
+    my ($lineage_tree, $lineage_leaf);
+    if ($thing->isa('Bio::Tree::TreeI')) {
+        my @leaves = $thing->get_leaf_nodes;
+        $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
+        $lineage_tree = $thing;
+        $lineage_leaf = shift(@leaves);
+    }
+    elsif ($thing->isa('Bio::Tree::NodeI')) {
+        $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
+        $lineage_tree = $self->new(-node => $thing);
+        $lineage_leaf = $thing;
+    }
+    
+    # get the lca of this node and every leaf of the main tree until we find
+    # the branch that isn't in the main tree yet
+    my ($main_tree_lca, $new_branch_base);
+    foreach my $leaf ($self->get_leaf_nodes) {
+        $main_tree_lca = $self->get_lca($lineage_leaf, $leaf) || ($self->warn("couldn't get the lca of nodes ".$lineage_leaf->id." and ".$leaf->id."!") && next);
+        
+        my $branch_lca = $lineage_tree->find_node(-internal_id => $main_tree_lca->internal_id);
+        ($new_branch_base) = $branch_lca->each_Descendent;
+        if ($new_branch_base) {
+            if ($self->find_node(-internal_id => $new_branch_base->internal_id)) {
+                # this branch is already in the main tree, try again
+                $new_branch_base = undef;
+                next;
+            }
+            else {
+                last;
+            }
+        }
+        else {
+            # the lca is the lineage leaf itself, nothing for us to merge
+            return;
+        }
+    }
+    $new_branch_base || ($self->warn("couldn't merge the lineage of ".$lineage_leaf->id." with the rest of the tree!\n") && return);
+    $main_tree_lca->add_Descendent($new_branch_base);
+}
+
+=head2 contract_linear_paths
+
+ Title   : contract_linear_paths
+ Usage   : contract_linear_paths()
+ Function: Splices out all nodes in the tree that have an ancestor and only one
+           descendent.
+ Returns : n/a
+ Args    : none
+
+ For example, if we are the tree $tree:
+
+             +---E
+             |
+ A---B---C---D
+             |
+             +---F
+
+ After calling $tree->contract_linear_paths(), $tree looks like:
+
+     +---E
+     |
+ A---D
+     |
+     +---F
+
+=cut
+
+sub contract_linear_paths {
+    my $self = shift;
+    my @remove;
+    foreach my $node ($self->get_nodes) {
+        if ($node->ancestor && $node->each_Descendent == 1) {
+            push(@remove, $node);
+        }
+    }
+    $self->splice(@remove) if @remove;
+}
+
+=head2 distance
+
+ Title   : distance
+ Usage   : distance(-nodes => \@nodes )
+ Function: returns the distance between two given nodes
+ Returns : numerical distance
+ Args    : -nodes => arrayref of nodes to test
+
+=cut
+
+sub distance {
+    my ($self, at args) = @_;
+    my ($nodes) = $self->_rearrange([qw(NODES)], at args);
+    if( ! defined $nodes ) {
+	$self->warn("Must supply -nodes parameter to distance() method");
+	return;
+    }
+    $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
+    
+    my $lca = $self->get_lca(@{$nodes});
+    unless($lca) { 
+        $self->warn("could not find the lca of supplied nodes; can't find distance either");
+        return;
+    }
+    
+    my $cumul_dist = 0;
+    my $warned = 0;
+    foreach my $current_node (@{$nodes}) {
+        while (1) {
+            if ($current_node->branch_length) {
+                $cumul_dist += $current_node->branch_length;
+            }
+            elsif (! $warned) {
+                $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
+                $warned = 1;
+            }
+            
+            $current_node = $current_node->ancestor || last;
+            last if $current_node eq $lca;
+        }
+    }
+    
+    return $cumul_dist;
+}
+
+=head2 is_monophyletic
+
+ Title   : is_monophyletic
+ Usage   : if( $tree->is_monophyletic(-nodes => \@nodes, 
+				      -outgroup => $outgroup)
+ Function: Will do a test of monophyly for the nodes specified
+           in comparison to a chosen outgroup
+ Returns : boolean
+ Args    : -nodes    => arrayref of nodes to test
+           -outgroup => outgroup to serve as a reference
+
+
+=cut
+
+sub is_monophyletic{
+   my ($self, at args) = @_;
+   my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)], at args);
+
+   if( ! defined $nodes || ! defined $outgroup ) {
+       $self->warn("Must supply -nodes and -outgroup parameters to the method
+is_monophyletic");
+       return;
+   }
+   if( ref($nodes) !~ /ARRAY/i ) {
+       $self->warn("Must provide a valid array reference for -nodes");
+   }
+   
+   my $clade_root = $self->get_lca(@{$nodes});
+   unless( defined $clade_root ) { 
+       $self->warn("could not find clade root via lca");
+       return;
+   }
+   
+   my $og_ancestor = $outgroup->ancestor;
+   while( defined ($og_ancestor ) ) {
+       if( $og_ancestor->internal_id == $clade_root->internal_id ) {
+           # monophyly is violated
+           return 0;
+       }
+       $og_ancestor = $og_ancestor->ancestor;
+   }
+   return 1;
+}
+
+=head2 is_paraphyletic
+
+ Title   : is_paraphyletic
+ Usage   : if( $tree->is_paraphyletic(-nodes =>\@nodes,
+				      -outgroup => $node) ){ }
+ Function: Tests whether or not a given set of nodes are paraphyletic
+           (representing the full clade) given an outgroup
+ Returns : [-1,0,1] , -1 if the group is not monophyletic
+                       0 if the group is not paraphyletic
+                       1 if the group is paraphyletic
+ Args    : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
+           -outgroup => a Bio::Tree::NodeI to compare the nodes to
+
+
+=cut
+
+sub is_paraphyletic{
+   my ($self, at args) = @_;
+   my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)], at args);
+
+   if( ! defined $nodes || ! defined $outgroup ) {
+       $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
+       return;
+   }
+   if( ref($nodes) !~ /ARRAY/i ) { 
+       $self->warn("Must provide a valid array reference for -nodes");
+       return;
+   }
+
+   # Algorithm
+   # Find the lca
+   # Find all the nodes beneath the lca
+   # Test to see that none are missing from the nodes list
+   my %nodehash;
+   foreach my $n ( @$nodes ) {
+       $nodehash{$n->internal_id} = $n;
+   }
+   
+   my $clade_root = $self->get_lca(-nodes => $nodes );
+   unless( defined $clade_root ) { 
+       $self->warn("could not find clade root via lca");
+       return;
+   }
+   
+   my $og_ancestor = $outgroup->ancestor;
+
+   # Is this necessary/correct for paraphyly test?
+   while( defined ($og_ancestor ) ) {
+       if( $og_ancestor->internal_id == $clade_root->internal_id ) {
+           # monophyly is violated, could be paraphyletic
+           return -1;
+       }
+       $og_ancestor = $og_ancestor->ancestor;
+   }
+   my $tree = new Bio::Tree::Tree(-root     => $clade_root,
+				  -nodelete => 1);
+
+   foreach my $n ( $tree->get_nodes() ) { 
+       next unless $n->is_Leaf();
+       # if any leaf node is not in the list
+       # then it is part of the clade and so the list
+       # must be paraphyletic
+       return 1 unless (  $nodehash{$n->internal_id} );
+   }
+   return 0;
+}
+
+
+=head2 reroot
+
+ Title   : reroot
+ Usage   : $tree->reroot($node);
+ Function: Reroots a tree making a new node the root
+ Returns : 1 on success, 0 on failure
+ Args    : Bio::Tree::NodeI that is in the tree, but is not the current root
+
+=cut
+
+sub reroot {
+    my ($self,$new_root) = @_;
+    unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
+        $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
+        return 0;
+    }
+    
+    {
+        my $anc = $new_root->ancestor;	
+        unless( $anc ) {
+            return 0;
+        }
+        my $blen;
+        if( $new_root->is_Leaf() ) {
+            $blen = $new_root->branch_length;
+        } else {
+            $blen = ($new_root->branch_length() || 0) / 2;
+        }
+        my $node = $anc->new(-branch_length => $blen);
+        $new_root->branch_length($blen);
+        $anc->add_Descendent($node);
+        $anc->remove_Descendent($new_root);
+        $node->add_Descendent($new_root);
+        $new_root = $node;
+    }
+
+    my $old_root = $self->get_root_node;
+    if( $new_root == $old_root ) {
+        $self->warn("Node requested for reroot is already the root node!");
+        return 0;
+    }
+    
+    # reverse the ancestor & children pointers
+    my @path_from_oldroot = ($self->get_lineage_nodes($new_root), $new_root);
+    for (my $i = 0; $i < @path_from_oldroot - 1; $i++) {
+        my $current = $path_from_oldroot[$i];
+        my $next = $path_from_oldroot[$i + 1];
+        $current->remove_Descendent($next);
+        $current->branch_length($next->branch_length);
+        $next->add_Descendent($current);
+    }
+    # root node can be an artifical node which needs to be removed here
+    # when we are re-rooting.  We can only get its ancestor
+    # after we've reversed the path
+    my $anc = $old_root->ancestor;
+    my @d = $old_root->each_Descendent;
+    if( @d == 1 ) {
+    	$anc->add_Descendent(shift @d);
+        $anc->remove_Descendent($old_root);
+    }
+    $new_root->branch_length(undef);
+    $old_root = undef;
+    $self->set_root_node($new_root);
+
+    return 1;
+}
+
+=head2 findnode_by_id
+
+ Title   : findnode_by_id
+ Usage   : my $node = $tree->find_node_by_id($id);
+ Function: Get a node by its internal id (which should be 
+           unique for the tree)
+ Returns : L<Bio::Tree::NodeI>
+ Args    : node id
+
+
+=cut
+
+
+sub findnode_by_id {
+    my $tree = shift;
+    my $id = shift;
+    my $rootnode = $tree->get_root_node;
+    if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
+        return $rootnode;
+    }
+    # process all the children
+    foreach my $node ( $rootnode->get_Descendents ) {
+        if ( ($node->id) and ($node->id eq $id ) ) {
+            return $node;
+        }
+    }
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Tree/TreeI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,215 @@
+# $Id: TreeI.pm,v 1.17.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::Tree::TreeI
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::TreeI - A Tree object suitable for lots of things, designed
+  originally for Phylogenetic Trees.
+
+=head1 SYNOPSIS
+
+  # get a Bio::Tree::TreeI somehow
+  # like from a TreeIO
+  my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
+  my $tree   = $treeio->next_tree;
+  my @nodes  = $tree->get_nodes;
+  my @leaves = $tree->get_leaf_nodes;
+  my $root   = $tree->get_root_node;
+
+=head1 DESCRIPTION
+
+This object holds a pointer to the Root of a Tree which is a
+Bio::Tree::NodeI.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 CONTRIBUTORS
+
+Aaron Mackey, amackey at virginia.edu
+Elia Stupka,  elia at fugu-sg.org
+Sendu Bala,   bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Tree::TreeI;
+use strict;
+
+use base qw(Bio::Tree::NodeI);
+
+=head2 get_nodes
+
+ Title   : get_nodes
+ Usage   : my @nodes = $tree->get_nodes()
+ Function: Return list of Tree::NodeI objects
+ Returns : array of Tree::NodeI objects
+ Args    : (named values) hash with one value 
+           order => 'b|breadth' first order or 'd|depth' first order
+
+=cut
+
+sub get_nodes{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_root_node
+
+ Title   : get_root_node
+ Usage   : my $node = $tree->get_root_node();
+ Function: Get the Top Node in the tree, in this implementation
+           Trees only have one top node.
+ Returns : Bio::Tree::NodeI object
+ Args    : none
+
+=cut
+
+sub get_root_node{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 number_nodes
+
+ Title   : number_nodes
+ Usage   : my $size = $tree->number_nodes
+ Function: Find the number of nodes in the tree.
+ Returns : int
+ Args    : none
+
+=cut
+
+sub number_nodes{
+   my ($self) = @_;
+   my $root = $self->get_root_node;
+   if( defined $root && $root->isa('Bio::Tree::NodeI'))  {
+       return ($root->descendent_count + 1);
+   }
+   return 0;
+}
+
+=head2 total_branch_length
+
+ Title   : total_branch_length
+ Usage   : my $size = $tree->total_branch_length
+ Function: Returns the sum of the length of all branches
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub total_branch_length {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 height
+
+ Title   : height
+ Usage   : my $height = $tree->height
+ Function: Gets the height of tree - this LOG_2($number_nodes)
+           WARNING: this is only true for strict binary trees.  The TreeIO
+           system is capable of building non-binary trees, for which this
+           method will currently return an incorrect value!!
+ Returns : integer
+ Args    : none
+
+=cut
+
+sub height{
+   my ($self) = @_;
+   my $nodect =  $self->number_nodes;
+   return 0 if( ! $nodect ); 
+   return log($nodect) / log(2);
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : my $id = $tree->id();
+ Function: An id value for the tree
+ Returns : scalar
+ Args    : 
+
+
+=cut
+
+sub id{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $obj->score($newval)
+ Function: Sets the associated score with this tree
+           This is a generic slot which is probably best used 
+           for log likelihood or other overall tree score
+ Returns : value of score
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub score{
+   my ($self,$value) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 get_leaf_nodes
+
+ Title   : get_leaf_nodes
+ Usage   : my @leaves = $tree->get_leaf_nodes()
+ Function: Returns the leaves (tips) of the tree
+ Returns : Array of Bio::Tree::NodeI objects
+ Args    : none
+
+
+=cut
+
+sub get_leaf_nodes{
+   my ($self) = @_;
+   return grep { $_->is_Leaf() } $self->get_nodes(-sortby  => 'none');
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/TreeEventBuilder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/TreeEventBuilder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/TreeEventBuilder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,357 @@
+# $Id: TreeEventBuilder.pm,v 1.23.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::TreeEventBuilder
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::TreeEventBuilder - Build Bio::Tree::Tree's and 
+  Bio::Tree::Node's from Events 
+
+=head1 SYNOPSIS
+
+# internal use only
+
+=head1 DESCRIPTION
+
+This object will take events and build a Bio::Tree::TreeI compliant
+object makde up of Bio::Tree::NodeI objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::TreeEventBuilder;
+use strict;
+
+use Bio::Tree::Tree;
+use Bio::Tree::Node;
+
+use base qw(Bio::Root::Root Bio::Event::EventHandlerI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::TreeEventBuilder();
+ Function: Builds a new Bio::TreeIO::TreeEventBuilder object 
+ Returns : Bio::TreeIO::TreeEventBuilder
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+  my ($treetype, $nodetype) = $self->_rearrange([qw(TREETYPE 
+						    NODETYPE)], @args);
+  $treetype ||= 'Bio::Tree::Tree';
+  $nodetype ||= 'Bio::Tree::Node';
+
+  eval { 
+      $self->_load_module($treetype);
+      $self->_load_module($nodetype);
+  };
+
+  if( $@ ) {
+      $self->throw("Could not load module $treetype or $nodetype. \n$@\n")
+  }
+  $self->treetype($treetype);
+  $self->nodetype($nodetype);
+  $self->{'_treelevel'} = 0;
+  return $self;
+}
+
+=head2 treetype
+
+ Title   : treetype
+ Usage   : $obj->treetype($newval)
+ Function: 
+ Returns : value of treetype
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub treetype{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'treetype'} = $value;
+    }
+    return $self->{'treetype'};
+}
+
+=head2 nodetype
+
+ Title   : nodetype
+ Usage   : $obj->nodetype($newval)
+ Function: 
+ Returns : value of nodetype
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub nodetype{
+   my ($self,$value) = @_;
+   if( defined $value) {
+      $self->{'nodetype'} = $value;
+    }
+    return $self->{'nodetype'};
+}
+
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_document
+
+ Title   : start_document
+ Usage   : $handler->start_document
+ Function: Begins a Tree event cycle
+ Returns : none 
+ Args    : none
+
+=cut
+
+sub start_document {
+   my ($self) = @_;   
+   $self->{'_lastitem'} = {};
+   $self->{'_currentitems'} = [];
+   $self->{'_currentnodes'} = [];
+   return;
+}
+
+=head2 end_document
+
+ Title   : end_document
+ Usage   : my @trees = $parser->end_document
+ Function: Finishes a Phylogeny cycle
+ Returns : An array  Bio::Tree::TreeI
+ Args    : none
+
+=cut
+
+sub end_document {
+    my ($self,$label) = @_; 
+    my $root = $self->nodetype->new(
+	-id => $label,
+	-verbose => $self->verbose);
+    # aggregate the nodes into trees basically ad-hoc.
+    while ( @{$self->{'_currentnodes'}} ) {	
+	my ($node) = ( shift @{$self->{'_currentnodes'}});
+	$root->add_Descendent($node);
+    }
+
+    $self->debug("Root node is " . $root->to_string()."\n");
+    if( $self->verbose > 0 ) { 
+	foreach my $node ( $root->get_Descendents  ) {
+	    $self->debug("node is ". $node->to_string(). "\n");
+	}
+    }
+    my $tree = $self->treetype->new(-verbose => $self->verbose,
+				    -root => $root);
+    return $tree;       
+}
+
+=head2 start_element
+
+ Title   : start_element
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    : $data => hashref with key 'Name'
+
+=cut
+
+sub start_element{
+   my ($self,$data) =@_;
+   $self->{'_lastitem'}->{$data->{'Name'}}++;   
+
+   $self->debug("starting element: $data->{Name}\n");   
+   push @{$self->{'_lastitem'}->{'current'}},$data->{'Name'};
+   
+   my %data;
+   
+   if( $data->{'Name'} eq 'node' ) {
+       push @{$self->{'_currentitems'}}, \%data; 
+   } elsif ( $data->{Name} eq 'tree' ) {
+       $self->{'_treelevel'}++;
+   }
+}
+
+=head2 end_element
+
+ Title   : end_element
+ Usage   : 
+ Function:
+ Returns : none
+ Args    : $data => hashref with key 'Name'
+
+=cut
+
+sub end_element{
+   my ($self,$data) = @_;   
+
+   $self->debug("end of element: $data->{Name}\n");
+   # this is the stack where we push/pop items from it
+   my $curcount = scalar @{$self->{'_currentnodes'}};
+   my $level   = $self->{'_treelevel'};
+   my $levelct = $self->{'_nodect'}->[$self->{'_treelevel'}+1] || 0;
+
+   if( $data->{'Name'} eq 'node' ) {
+       my $tnode;
+       my $node = pop @{$self->{'_currentitems'}};	   
+
+       $tnode = $self->nodetype->new( -verbose => $self->verbose,
+				      %{$node});       
+       $self->debug( "new node will be ".$tnode->to_string."\n");
+       if ( !$node->{'-leaf'} && $levelct > 0) {
+	   $self->debug(join(',', map { $_->to_string } 
+			     @{$self->{'_currentnodes'}}). "\n");
+	   $self->throw("something wrong with event construction treelevel ".
+			"$level is recorded as having $levelct nodes  ".
+			"but current nodes at this level is $curcount\n")
+	       if( $levelct > $curcount);	
+	   for ( splice( @{$self->{'_currentnodes'}}, - $levelct)) {
+	       $self->debug("adding desc: " . $_->to_string . "\n");
+	       $tnode->add_Descendent($_);
+	   }
+	   $self->{'_nodect'}->[$self->{'_treelevel'}+1] = 0;
+       }
+       push @{$self->{'_currentnodes'}}, $tnode;
+       $self->{'_nodect'}->[$self->{'_treelevel'}]++;
+       
+       $self->debug ("added node: nodes in stack is $curcount, treelevel: $level, nodect: $levelct\n");
+       
+   } elsif(  $data->{'Name'} eq 'tree' ) { 
+       $self->debug("end of tree: nodes in stack is $curcount\n");
+       $self->{'_treelevel'}--;
+   }
+
+   $self->{'_lastitem'}->{ $data->{'Name'} }--; 
+   
+   pop @{$self->{'_lastitem'}->{'current'}};
+}
+
+
+=head2 in_element
+
+ Title   : in_element
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub in_element{
+   my ($self,$e) = @_;
+
+   return 0 if ! defined $self->{'_lastitem'} || 
+       ! defined $self->{'_lastitem'}->{'current'}->[-1];
+   return ($e eq $self->{'_lastitem'}->{'current'}->[-1]);
+
+}
+
+=head2 within_element
+
+ Title   : within_element
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub within_element{
+   my ($self,$e) = @_;
+   return $self->{'_lastitem'}->{$e};
+}
+
+=head2 characters
+
+ Title   : characters
+ Usage   : $handler->characters($text);
+ Function: Processes characters 
+ Returns : none
+ Args    : text string
+
+
+=cut
+
+sub characters{
+   my ($self,$ch) = @_;
+   if( $self->within_element('node') ) {
+       my $hash = pop @{$self->{'_currentitems'}};
+       if( $self->in_element('bootstrap') ) {
+	   # leading/trailing Whitespace-B-Gone
+	   $ch =~ s/^\s+//; $ch =~ s/\s+$//;  
+	   $hash->{'-bootstrap'} = $ch;
+       } elsif( $self->in_element('branch_length') ) {
+	   # leading/trailing Whitespace-B-Gone
+	   $ch =~ s/^\s+//; $ch =~ s/\s+$//;
+	   $hash->{'-branch_length'} = $ch;
+       } elsif( $self->in_element('id')  ) {
+	   $hash->{'-id'} = $ch;
+       } elsif( $self->in_element('description') ) {
+	   $hash->{'-desc'} = $ch;
+       } elsif ( $self->in_element('tag_name') ) {
+	   $hash->{'-NHXtagname'} = $ch;
+       } elsif ( $self->in_element('tag_value') ) {
+	   $hash->{'-nhx'}->{$hash->{'-NHXtagname'}} = $ch;
+	   delete $hash->{'-NHXtagname'};
+       } elsif( $self->in_element('leaf') ) {
+	   $hash->{'-leaf'} = $ch;
+       }
+       push @{$self->{'_currentitems'}}, $hash;
+   }
+   $self->debug("chars: $ch\n");
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/cluster.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/cluster.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/cluster.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,188 @@
+# $Id: cluster.pm,v 1.4.4.2 2006/11/27 15:46:48 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::cluster
+#
+# Contributed by Guillaume Rousse <Guillaume-dot-Rousse-at-inria-dot-fr>
+#
+# Copyright INRIA
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::cluster - A TreeIO driver module for parsing Algorithm::Cluster::treecluster() output
+
+=head1 SYNOPSIS
+
+  # do not use this module directly
+  use Bio::TreeIO;
+  use Algorithm::Cluster;
+  my ($result, $linkdist) = Algorithm::Cluster::treecluster(
+    distances => $matrix
+  );
+  my $treeio = new Bio::TreeIO(
+    -format   => 'cluster',
+    -result   =>  $result,
+    -linkdist =>  $linkdist,
+    -labels   =>  $labels
+  );
+  my $tree = $treeio->next_tree;
+
+=head1 DESCRIPTION
+
+This is a driver module for parsing Algorithm::Cluster::treecluster() output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Guillaume Rousse
+
+Email Guillaume-dot-Rousse-at-inria-dot-fr
+
+=head1 CONTRIBUTORS
+
+Jason Stajich - jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::cluster;
+use strict;
+
+use Bio::Event::EventGeneratorI;
+use IO::String;
+
+use base qw(Bio::TreeIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::cluster();
+ Function: Builds a new Bio::TreeIO::cluster object for reading Algorithm::Cluster::treecluster output
+ Returns : Bio::TreeIO::cluster
+ Args    :-result   => Algorithm::Cluster result
+          -linkdist => distance between links
+          -labels   => node labels
+
+=cut
+
+sub _initialize {
+  my $self = shift;
+  ($self->{_result},$self->{_linkdist},
+   $self->{_labels}) = $self->_rearrange([qw
+					  (RESULT LINKDIST LABELS)],
+					 @_);
+  $self->SUPER::_initialize(@_);
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree
+ Function: Gets the next tree in the stream
+ Returns : Bio::Tree::TreeI
+ Args    : none
+
+
+=cut
+
+sub next_tree {
+    my ($self) = @_;
+    if( ! $self->{_result} ){
+	$self->warn("Must provide value 'result' and 'linkdist' and 'labels' when initializing a TreeIO::cluster object");
+	return;
+    }
+    $self->_eventHandler->start_document();
+
+    # build tree from the root
+    $self->_eventHandler->start_element({Name => 'tree'});
+    $self->_recurse(-1, 0);
+    $self->_recurse(-1, 1);
+    $self->_eventHandler->end_element({Name => 'tree'});
+
+    return $self->_eventHandler->end_document;
+}
+
+sub _recurse {
+    my ($self, $line, $column) = @_;
+
+    my $id  = $self->{_result}->[$line]->[$column];
+    if ($id >= 0) {
+	# leaf
+	$self->debug("leaf $id\n");
+	$self->debug("distance $self->{_linkdist}->[$line]\n");
+	$self->debug("label $self->{_labels}->[$id]\n");
+	$self->_eventHandler->start_element({Name => 'node'});
+	$self->_eventHandler->start_element({Name => 'branch_length'});
+	$self->_eventHandler->characters($self->{_linkdist}->[$line]);
+	$self->_eventHandler->end_element({Name => 'branch_length'});
+	$self->_eventHandler->start_element({Name => 'id'});
+	$self->_eventHandler->characters($self->{_labels}->[$id]);
+	$self->_eventHandler->end_element({Name => 'id'});
+	$self->_eventHandler->start_element({Name => 'leaf'});
+	$self->_eventHandler->characters(1);
+	$self->_eventHandler->end_element({Name => 'leaf'});
+	$self->_eventHandler->end_element({Name => 'node'});
+    } else {
+	# internal node
+	$self->debug("internal node $id\n");
+	$self->debug("distance $self->{_linkdist}->[$line]\n");
+	$self->_eventHandler->start_element({Name => 'node'});
+	$self->_eventHandler->start_element({Name => 'branch_length'});
+	$self->_eventHandler->characters($self->{_linkdist}->[$line]);
+	$self->_eventHandler->end_element({Name => 'branch_length'});
+	$self->_eventHandler->start_element({Name => 'leaf'});
+	$self->_eventHandler->characters(0);
+	$self->_eventHandler->end_element({Name => 'leaf'});
+	$self->_eventHandler->start_element({Name => 'tree'});
+	my $child_id = - ($id + 1);
+	$self->_recurse($child_id, 0);
+	$self->_recurse($child_id, 1);
+	$self->_eventHandler->end_element({Name => 'tree'});
+	$self->_eventHandler->end_element({Name => 'node'});
+
+    }
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   :
+ Function: Sorry not possible with this format
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub write_tree{
+    $_[0]->throw("Sorry the format 'cluster' can only be used as an input format");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/lintree.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/lintree.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/lintree.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,242 @@
+# $Id: lintree.pm,v 1.8.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::lintree
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::lintree - Parser for lintree output trees
+
+=head1 SYNOPSIS
+
+  # do not use directly, use through Bio::TreeIO
+  use Bio::TreeIO;
+  my $treeio = new Bio::TreeIO(-format => 'lintree',
+                               -file   => 't/data/crab.nj');
+  my $tree = $treeio->next_tree;
+
+=head1 DESCRIPTION
+
+Parser for the lintree output which looks like this
+
+  13 sequences     1000 bootstraping
+1 A-salina
+2 C-vittat
+3 C-sp.
+4 L-aequit
+5 P-camtsc
+6 E-tenuim
+7 L-splend
+8 P-bernha
+9 P-acadia
+10 P-p(NE)
+11 P-p(GU)
+12 P-l(NE)
+13 P-l(GU)
+ 14 and   2        0.098857      1000
+ 14 and   3        0.127932      1000
+ 15 and   1        0.197471      1000
+ 15 and  14        0.029273       874
+ 16 and  10        0.011732      1000
+ 16 and  11        0.004529      1000
+ 17 and  12        0.002258      1000
+ 17 and  13        0.000428      1000
+ 18 and  16        0.017512      1000
+ 18 and  17        0.010824       998
+ 19 and   4        0.006534      1000
+ 19 and   5        0.006992      1000
+ 20 and  15        0.070461      1000
+ 20 and  18        0.030579       998
+ 21 and   8        0.003339      1000
+ 21 and   9        0.002042      1000
+ 22 and   6        0.011142      1000
+ 22 and  21        0.010693       983
+ 23 and  20        0.020714       996
+ 23 and  19        0.020350      1000
+ 24 and  23        0.008665       826
+ 24 and  22        0.013457       972
+ 24 and   7        0.025598      1000
+
+See http://www.bio.psu.edu/People/Faculty/Nei/Lab/software.htm for access
+to the program and N Takezaki, A Rzhetsky, and M Nei, "Phylogenetic test
+of the molecular clock and linearized trees." Mol Biol Evol 12(5):823-33.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Ideas and discussion from:
+ Alan Christoffels
+ Avril Coghlan
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::lintree;
+use vars qw(%Defaults);
+use strict;
+
+
+use base qw(Bio::TreeIO);
+$Defaults{'NodeType'} = "Bio::Tree::Node";
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::lintree();
+ Function: Builds a new Bio::TreeIO::lintree object 
+ Returns : an instance of Bio::TreeIO::lintree
+ Args    : -nodetype => Node type to create [default Bio::Tree::Node]
+
+
+=cut
+
+sub _initialize { 
+    my ($self, at args) = @_;
+    $self->SUPER::_initialize(@args);
+    my ($nodetype) = $self->_rearrange([qw(NODETYPE)], at args);
+    $nodetype ||= $Defaults{'NodeType'};
+    $self->nodetype($nodetype);
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree
+ Function: Gets the next tree in the stream
+ Returns : Bio::Tree::TreeI
+ Args    : none
+
+
+=cut
+
+sub next_tree {
+    my ($self) = @_;
+    my $seentop = 0;
+    my ($tipcount,%data, at nodes) = (0);
+    my $nodetype = $self->nodetype;   
+
+    while( defined( $_ = $self->_readline) ) {
+	if( /^\s*(\d+)\s+sequences/ox ) {
+	    if( $seentop ) { 
+		$self->_pushback($_);
+		last;
+	    }
+	    $tipcount = $1;
+	    $seentop = 1;
+	} elsif( /^(\d+)\s+(\S+)\s*$/ox ) {
+	    # deal with setting an outgroup
+	    unless( defined $data{'outgroup'} ) {
+		$data{'outgroup'} = [$1,$2];
+	    }
+	    $nodes[$1 - 1] = { '-id' => $2 }; 
+	} elsif( m/^\s*(\d+)\s+and\s+(\d+)\s+(\-?\d+\.\d+)(?:\s+(\d+))?/ox ) {
+	    my ($node,$descend,$blength,$bootstrap) = ( $1, $2, $3, $4 );
+	    # need to -- descend and node because
+	    # array is 0 based
+	    $node--;$descend--;
+	    $nodes[$descend]->{'-branch_length'} = $blength;
+	    $nodes[$descend]->{'-bootstrap'}     = $bootstrap; #? here
+	    $nodes[$node]->{'-id'} = $node+1;
+	    push @{$nodes[$node]->{'-d'}}, $descend;
+	    
+	} elsif( /\s+(\S+)\-distance was used\./ox ) {
+	    $data{'method'} = $1;
+	} elsif( /\s*seed=(\d+)/ox ) {
+	    $data{'seed'} = $1;
+	} elsif( m/^outgroup:\s+(\d+)\s+(\S+)/ox ) {
+	    $data{'outgroup'} = [$1,$2];
+	}
+    }
+    if( @nodes ) {
+	my @treenodes;
+	foreach my $n ( @nodes ) { 	
+	    push @treenodes, $nodetype->new(%{$n});
+	}
+	
+	foreach my $tn ( @treenodes ) {
+	    my $n = shift @nodes;
+	    for my $ptr ( @{ $n->{'-d'} || [] } ) {
+		$tn->add_Descendent($treenodes[$ptr]);
+	    }
+	}
+	my $T = Bio::Tree::Tree->new(-root => (pop @treenodes) );
+	if( $data{'outgroup'} ) {
+	    my ($outgroup) = $treenodes[$data{'outgroup'}->[0]];
+	    if( ! defined $outgroup) {
+		$self->warn("cannot find '". $data{'outgroup'}->[1]. "'\n");
+	    } else { 
+		$T->reroot($outgroup->ancestor);
+	    }
+	}
+	return $T;
+    }
+    return; # if there are no more trees, return undef
+	
+}
+
+=head2 nodetype
+
+ Title   : nodetype
+ Usage   : $obj->nodetype($newval)
+ Function: 
+ Example : 
+ Returns : value of nodetype (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub nodetype{
+   my ($self,$value) = @_;
+   if( defined $value) {
+       eval "require $value";
+       if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self). 
+			       "'$value'");}
+       
+       my $a = bless {},$value;
+       unless( $a->isa('Bio::Tree::NodeI')  ) {
+	   $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
+       }
+      $self->{'nodetype'} = $value;
+    }
+   return $self->{'nodetype'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/newick.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/newick.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/newick.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,424 @@
+# $Id: newick.pm,v 1.36.4.2 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::newick
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::newick - TreeIO implementation for parsing 
+  Newick/New Hampshire/PHYLIP format.
+
+=head1 SYNOPSIS
+
+  # do not use this module directly
+  use Bio::TreeIO;
+  my $treeio = new Bio::TreeIO(-format => 'newick', 
+                               -file => 't/data/LOAD_Ccd1.dnd');
+  my $tree = $treeio->next_tree;
+
+=head1 DESCRIPTION
+
+This module handles parsing and writing of Newick/PHYLIP/New Hampshire format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::newick;
+use vars qw($DefaultBootstrapStyle);
+use strict;
+
+use Bio::Event::EventGeneratorI;
+
+#initialize some package variables, could use 'our' but fails in perl < 5.6
+
+$DefaultBootstrapStyle = 'traditional';
+use base qw(Bio::TreeIO);
+
+
+=head2 new
+
+ Title   : new
+ Args    : -print_count     => boolean  default is false
+           -bootstrap_style => set the bootstrap style (one of nobranchlength,
+							molphy, traditional)
+           -order_by        => set the order by sort method 
+                               (see L<Bio::Node::Node::each_Descendent()> )
+
+=cut
+
+sub _initialize { 
+    my $self = shift;
+    $self->SUPER::_initialize(@_);
+    my ($print_count,$style,$order_by) = $self->_rearrange([qw(PRINT_COUNT 
+							       BOOTSTRAP_STYLE
+							       ORDER_BY)],
+					  @_);
+    $self->print_tree_count($print_count || 0);
+    $self->bootstrap_style($style || $DefaultBootstrapStyle);
+    $self->order_by($order_by) if defined $order_by;
+    return;
+}
+
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree
+ Function: Gets the next tree in the stream
+ Returns : L<Bio::Tree::TreeI>
+ Args    : none
+
+
+=cut
+
+sub next_tree{
+   my ($self) = @_;
+   local $/ = ";\n";
+   return unless $_ = $self->_readline;
+   s/[\r\n]//gs;
+   my $score;
+   my $despace = sub {my $dirty = shift; $dirty =~ s/\s+//gs; return $dirty};
+   my $dequote = sub {my $dirty = shift; $dirty =~ s/^"?\s*(.+?)\s*"?$/$1/; return $dirty};
+   s/([^"]*)(".+?")([^"]*)/$despace->($1) . $dequote->($2) . $despace->($3)/egsx;
+   if( s/^\s*\[([^\]]+)\]// ) {
+       my $match = $1;
+       $match =~ s/\s//g;
+       $match =~ s/lh\=//;
+       if( $match =~ /([-\d\.+]+)/ ) {
+	   $score = $1;
+       }
+   }
+
+   $self->debug("entry is $_\n");
+#   my $empty = chr(20);
+ 
+   # replace empty labels with a tag
+#   s/\(,/\($empty,/ig;
+#   s/,,/,$empty,/ig;
+#   s/,,/,/ig;
+#   s/,\)/,$empty\)/ig;
+#   s/\"/\'/ig;
+
+   my $chars = '';
+   $self->_eventHandler->start_document;
+   my ($prev_event,$lastevent,$id) = ('','','');
+   foreach my $ch ( split(//,$_) ) {
+       if( $ch eq ';' ) {
+	   my $tree = $self->_eventHandler->end_document($chars);
+	   $tree->score($score) if defined $score;
+	   return $tree;
+       } elsif( $ch eq '(' ) {
+	   $chars = '';
+	   $self->_eventHandler->start_element( {'Name' => 'tree'} );
+       } elsif($ch eq ')' ) {
+	   if( length($chars) ) {
+	       if( $lastevent eq ':' ) {
+		   $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
+		   $self->_eventHandler->characters($chars);
+		   $self->_eventHandler->end_element( {'Name' => 'branch_length'});
+		   $lastevent = $prev_event;
+	       } else { 
+		   $self->debug("internal node, id with no branchlength is $chars\n");
+		   $self->_eventHandler->start_element( { 'Name' => 'node' } );
+		   $self->_eventHandler->start_element( { 'Name' => 'id' } );
+		   $self->_eventHandler->characters($chars);
+		   $self->_eventHandler->end_element( { 'Name' => 'id' } );
+		   $id = $chars;
+	       }
+	       my $leafstatus = 0;
+	       if( $lastevent ne ')' ) {
+		   $leafstatus = 1;
+	       }
+
+	       $self->_eventHandler->start_element({'Name' => 'leaf'});
+	       $self->_eventHandler->characters($leafstatus);
+	       $self->_eventHandler->end_element({'Name' => 'leaf'});
+	       $id = '';
+	   } else {
+	       $self->_eventHandler->start_element( {'Name' => 'node'} );
+	   }
+
+ 	   $self->_eventHandler->end_element( {'Name' => 'node'} );
+	   $self->_eventHandler->end_element( {'Name' => 'tree'} );
+	   $chars = '';
+       } elsif ( $ch eq ',' ) {
+	   if( length($chars) ) {
+	       if( $lastevent eq ':' ) {
+		   $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
+		   $self->_eventHandler->characters($chars);
+		   $self->_eventHandler->end_element( {'Name' => 'branch_length'});
+		   $lastevent = $prev_event;
+		   $chars = '';		   
+	       } else { 
+		   $self->debug("leaf id with no branchlength is $chars\n");
+		   $self->_eventHandler->start_element( { 'Name' => 'node' } );
+		   $self->_eventHandler->start_element( { 'Name' => 'id' } );
+		   $self->_eventHandler->characters($chars);
+		   $self->_eventHandler->end_element( { 'Name' => 'id' } );
+		   $id = $chars;
+	       }
+	   } else {
+	       $self->_eventHandler->start_element( { 'Name' => 'node' } );
+	   }
+	   my $leafstatus = 0;
+	   if( $lastevent ne ')' ) {
+	       $leafstatus = 1;
+	   }
+	   $self->_eventHandler->start_element({'Name' => 'leaf'});
+	   $self->_eventHandler->characters($leafstatus);
+	   $self->_eventHandler->end_element({'Name' => 'leaf'});
+	   $self->_eventHandler->end_element( {'Name' => 'node'} );
+	   $chars = '';
+	   $id    = '';
+       } elsif( $ch eq ':' ) {
+	   $self->debug("id with a branchlength coming is $chars\n");
+	   $self->_eventHandler->start_element( { 'Name' => 'node' } );
+	   $self->_eventHandler->start_element( { 'Name' => 'id' } );	   
+	   $self->_eventHandler->characters($chars);
+	   $self->_eventHandler->end_element( { 'Name' => 'id' } );	   
+	   $id = $chars;
+	   $chars = '';
+       } else { 	   
+	   $chars .= $ch;
+	   next;
+       }
+       $prev_event = $lastevent;
+       $lastevent = $ch;
+   }
+   return;
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Write a tree out to data stream in newick/phylip format
+ Returns : none
+ Args    : L<Bio::Tree::TreeI> object
+
+=cut
+
+sub write_tree{
+   my ($self, at trees) = @_;  
+   my $orderby = $self->order_by;
+   my $bootstrap_style = $self->bootstrap_style;
+   if( $self->print_tree_count ){ 
+       $self->_print(sprintf(" %d\n",scalar @trees));
+   }
+   my $nl = $self->newline_each_node;
+   foreach my $tree( @trees ) {
+       my @data = _write_tree_Helper($tree->get_root_node,
+				     $bootstrap_style,
+				     $orderby,
+				     $nl);
+       if( $nl ) {
+	   chomp($data[-1]);# remove last newline
+	   $self->_print(join(",\n", @data), ";\n");
+       } else {
+	   $self->_print(join(',', @data), ";\n");
+       }
+   }
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return;
+}
+
+sub _write_tree_Helper {
+    my ($node,$style,$orderby,$nl) = @_;
+    $style = '' unless defined $style;
+    return () if (!defined $node);
+
+    my @data;
+    foreach my $n ( $node->each_Descendent($orderby) ) {
+	push @data, _write_tree_Helper($n,$style,$orderby,$nl);
+    }
+    
+    # let's explicitly write out the bootstrap if we've got it
+    my $id = $node->id_output;
+    my $bs = $node->bootstrap; # bs better not have any spaces?
+    $bs =~ s/\s+//g if defined $bs;
+    my $bl = $node->branch_length;
+    if( @data ) {
+	if( $nl ) {
+	    $data[0] = "(\n" . $data[0];
+	    $data[-1] .= ")\n";
+	} else {
+	    $data[0] = "(" . $data[0];
+	    $data[-1] .= ")";
+	}
+
+	if( $node->is_Leaf ) { 
+	    $node->debug("node is a leaf!  This is unexpected...");
+
+	    $id ||= '';
+	    if( ! defined $bl || ! length($bl) ||
+		($style && $style =~ /nobranchlength/i) ) {
+		$data[-1] .= $id;
+	    } elsif( defined $bl && length($bl) ) { 
+		$data[-1] .= "$id:$bl";
+	    } else { 
+		$data[-1] .= $id;
+	    }
+	} else { 
+	    if( ! defined $bl || ! length($bl) ||
+		($style && $style =~ /nobranchlength/i) ) {
+		
+		if( defined $id || defined $bs ) {
+		    $data[-1] .= defined $bs ? $bs : $id;
+		}
+	    } elsif( $style =~ /molphy/i ) {
+		if( defined $id ) {
+		    $data[-1] .= $id;
+		}
+		if( $bl =~ /\#/) {
+		    $data[-1] .= $bl;
+		} else { 
+		    $data[-1] .= ":$bl";
+		}
+		if( defined $bs ) { 
+		    $data[-1] .= "[$bs]";
+		}
+	    } else {
+		# traditional style of 
+		# ((A:1,B:2)81:3);   where 3 is internal node branch length
+		#                    and 81 is bootstrap/node label
+		if( defined $bs || defined $id ) {
+		    $data[-1] .= defined $bs ? "$bs:$bl" : "$id:$bl";
+		} elsif( $bl =~ /\#/ ) {
+		    $data[-1] .= $bl;
+		} else { 
+		    $data[-1] .= ":$bl"; 
+		}
+	    }
+	}
+    } elsif( defined $id || defined $bl ) {
+	my $str;
+	$id ||= '';
+	if( ! defined $bl || ! length($bl) ||
+	    ($style && $style =~ /nobranchlength/i) ) {
+	    $str = $id;
+	} elsif( defined $bl && length($bl) ) { 
+	    $str = "$id:$bl";
+	} else { 
+	    $str = $id;
+	}
+	push @data, $str;
+    }
+    return @data;
+}
+
+=head2 print_tree_count
+
+ Title   : print_tree_count
+ Usage   : $obj->print_tree_count($newval)
+ Function: Get/Set flag for printing out the tree count (paml,protml way)
+ Returns : value of print_tree_count (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub print_tree_count{
+    my $self = shift;
+    return $self->{'_print_tree_count'} = shift if @_;
+    return $self->{'_print_tree_count'} || 0;
+}
+
+=head2 bootstrap_style
+
+ Title   : bootstrap_style
+ Usage   : $obj->bootstrap_style($newval)
+ Function: A description of how bootstraps and branch lengths are
+           written, as the ID part of the internal node or else in []
+           in the branch length (Molphy-like; I am sure there is a
+           better name for this but am not sure where to go for some
+           sort of format documentation)
+
+           If no branch lengths are requested then no bootstraps are usually
+           written (unless someone REALLY wants this functionality...)
+
+           Can take on strings which contain the possible values of
+           'nobranchlength'   --> don't draw any branch lengths - this
+                                  is helpful if you don't want to have to 
+                                  go through and delete branch len on all nodes
+           'molphy' --> draw bootstraps (100) like
+                                  (A:0.11,B:0.22):0.33[100];
+           'traditional' --> draw bootstraps (100) like
+                                  (A:0.11,B:0.22)100:0.33;
+ Returns : value of bootstrap_style (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub bootstrap_style{
+    my $self = shift;
+    my $val = shift;
+    if( defined $val ) {
+
+	if( $val !~ /^nobranchlength|molphy|traditional/i ) {
+	    $self->warn("requested an unknown bootstrap style $val, expect one of nobranchlength,molphy,traditional, not updating value.  Default is $DefaultBootstrapStyle\n");
+	} else { 
+	    $self->{'_bootstrap_style'} = $val;
+	}
+    }
+    return $self->{'_bootstrap_style'} || $DefaultBootstrapStyle;
+}
+
+=head2 order_by
+
+ Title   : order_by
+ Usage   : $obj->order_by($newval)
+ Function: Allow node order to be specified (typically "alpha")
+           See L<Bio::Node::Node::each_Descendent()>
+ Returns : value of order_by (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub order_by {
+    my $self = shift;
+
+    return $self->{'order_by'} = shift if @_;
+    return $self->{'order_by'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nexus.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nexus.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nexus.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,317 @@
+# $Id: nexus.pm,v 1.13.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::nexus
+#
+# Cared for by Jason Stajich <jason-at-open-bio-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::nexus - A TreeIO driver module for parsing Nexus tree output from PAUP
+
+=head1 SYNOPSIS
+
+  use Bio::TreeIO;
+  my $in = new Bio::TreeIO(-file => 't/data/cat_tre.tre');
+  while( my $tree = $in->next_tree ) {
+  }
+
+=head1 DESCRIPTION
+
+This is a driver module for parsing PAUP Nexus tree format which
+basically is just a remapping of trees.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-open-bio-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::nexus;
+use strict;
+
+use Bio::Event::EventGeneratorI;
+use IO::String;
+
+use base qw(Bio::TreeIO);
+
+=head2 new
+
+ Title   : new
+ Args    : -header    => boolean  default is true 
+                         print/do not print #NEXUS header
+           -translate => boolean default is true
+                         print/do not print Node Id translation to a number
+
+=cut
+
+sub _initialize { 
+    my $self = shift;
+    $self->SUPER::_initialize(@_);
+    my ($hdr,$trans) = $self->_rearrange([qw(HEADER
+					     TRANSLATE)],
+					 @_);
+    $self->header(defined $hdr ? $hdr : 1 );
+    $self->translate_node(defined $trans ? $trans : 1);
+}
+
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree
+ Function: Gets the next tree in the stream
+ Returns : Bio::Tree::TreeI
+ Args    : none
+
+
+=cut
+
+sub next_tree {
+    my ($self) = @_;
+    unless ( $self->{'_parsed'} ) { 
+	$self->_parse;
+    }
+    return $self->{'_trees'}->[$self->{'_treeiter'}++];
+}
+
+sub rewind { 
+    shift->{'_treeiter'} = 0;
+}
+
+sub _parse {
+   my ($self) = @_;
+
+   $self->{'_parsed'} = 1;
+   $self->{'_treeiter'} = 0;
+
+   while( defined ( $_ = $self->_readline ) ) {
+       next if /^\s+$/;
+       last;
+   }
+   return unless( defined $_ );
+   
+   unless( /^\#NEXUS/i ) {
+       $self->warn("File does not start with #NEXUS"); #'
+	   return;
+   }
+
+   my $line;
+   my %translate;
+   while( defined ( $_ = $self->_readline ) ) {
+     $line .= $_;     
+   }
+   $line =~ s/\n/ /g;   
+   my @sections = split(/#NEXUS/i, $line);
+   for my $s ( @sections ) {
+     if( $self->verbose > 0 ) {
+       while( $s =~ s/(\[[^\]]+\])// ) {
+	 $self->debug("removing comment $1\n");
+       }
+     } else {
+       $s =~ s/(\[[^\]]+\])//g;
+     }
+     if( $s =~ /begin trees;(.+)(end;)?/i ) {
+       my $trees = $1;
+       if( $trees =~ s/\s+translate\s+([^;]+);//i )  {
+	 my $trans = $1;
+	 for my $n ( split(/\s*,\s*/,$trans) ) {
+	   my ($id,$tag) = split(/\s+/,$n);
+	   $translate{$id} = $tag;
+	 }
+       } else {
+	 $self->debug("no translate in: $trees\n");
+       }
+       while( $trees =~ /\s+tree\s+(\S+)\s*\=
+			 \s*(?:\[\S+\])?\s*([^\;]+;)\s*/igx) {
+	 my ($tree_name,$tree_str) = ($1,$2);
+	 
+	 # MrBayes does not print colons for node label
+	 # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g;
+	 my $buf = new IO::String($tree_str);	   
+	 my $treeio = new Bio::TreeIO(-format => 'newick',
+				      -fh     => $buf);
+	 my $tree = $treeio->next_tree;
+	 foreach my $node ( grep { $_->is_Leaf } $tree->get_nodes ) {
+	   my $id = $node->id;
+	   my $lookup = $translate{$id};
+	   $node->id($lookup || $id);
+	 }
+	 $tree->id($tree_name) if defined $tree_name;
+	 push @{$self->{'_trees'}},$tree;
+       }       
+     } else {
+       $self->debug("begin_trees failed: $s\n");
+     }
+   }
+   if( ! @sections ) {     
+     $self->debug("warn no sections: $line\n");
+   }
+}
+
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Writes a tree onto the stream
+ Returns : none
+ Args    : Bio::Tree::TreeI
+
+
+=cut
+
+sub write_tree{
+   my ($self, at trees) = @_;
+   if ( $self->header ) {
+       $self->_print("#NEXUS\n\n");
+   }
+   my $translate = $self->translate_node;
+   my $time = localtime();
+   $self->_print(sprintf("Begin trees; [Treefile created %s]\n",$time));
+
+   my ($first,$nodecter,%node2num) = (0,1);
+   foreach my $tree ( @trees ) {
+       
+       if( $first == 0 && 
+	   $translate ) { 
+	   $self->_print("\tTranslate\n");
+	   $self->_print(join(",\n",
+			      map { $node2num{$_->id} = $nodecter;
+				  sprintf("\t\t%d %s",$nodecter++,$_->id) }
+			      grep { $_->is_Leaf } $tree->get_nodes),
+			 "\n;\n");
+       }
+       my @data = _write_tree_Helper($tree->get_root_node,\%node2num);
+       if($data[-1] !~ /\)$/ ) {
+	   $data[0] = "(".$data[0];
+	   $data[-1] .= ")";
+       }
+       # by default all trees in bioperl are currently rooted
+       # something we'll try and fix one day....
+       $self->_print(sprintf("\t tree %s = [&%s] %s;\n",
+			     ($tree->id || 
+			      sprintf("Bioperl_%d",$first+1)),
+			     ( $tree->get_root_node ) ? 'R' : 'U',
+			     join(',', @data)));
+       $first++;
+   }
+   $self->_print("End;\n");
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return;
+}
+
+sub _write_tree_Helper {
+    my ($node,$node2num) = @_;
+    return () if (!defined $node);
+    my @data;
+    
+    foreach my $n ( $node->each_Descendent() ) {
+	push @data, _write_tree_Helper($n,$node2num);
+    }
+    if( @data > 1 ) {
+	$data[0] = "(" . $data[0];
+	$data[-1] .= ")";
+	# let's explicitly write out the bootstrap if we've got it
+	my $b;
+	
+	my $bl = $node->branch_length;
+	if( ! defined $bl ) {
+	} elsif($bl =~ /\#/ ) { 
+	 $data[-1] .= $bl;
+	} else { 
+	 $data[-1] .= ":$bl";
+	}
+	if( defined ($b = $node->bootstrap) ) {	    
+	    $data[-1] .= sprintf("[%s]",$b);
+	} elsif( defined ($b = $node->id) ) {
+	    $b = $node2num->{$b} if( $node2num->{$b} ); # translate node2num
+	    $data[-1] .= sprintf("[%s]",$b);
+	}
+
+    } else {
+	if( defined $node->id || defined $node->branch_length ) { 
+	    my $id= defined $node->id ? $node->id : '';
+	    if( length($id) && $node2num->{$id} ) {
+		$id = $node2num->{$id};		
+	    }
+	    push @data, sprintf("%s%s",$id,
+				defined $node->branch_length ? ":" .
+				$node->branch_length : '');
+	}
+    }
+    return @data;
+}
+
+=head2 header
+
+ Title   : header
+ Usage   : $obj->header($newval)
+ Function: 
+ Example : 
+ Returns : value of header (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub header{
+    my $self = shift;
+
+    return $self->{'header'} = shift if @_;
+    return $self->{'header'};
+}
+
+=head2 translate_node
+
+ Title   : translate_node
+ Usage   : $obj->translate_node($newval)
+ Function: 
+ Example : 
+ Returns : value of translate_node (a scalar)
+ Args    : on set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub translate_node{
+    my $self = shift;
+
+    return $self->{'translate_node'} = shift if @_;
+    return $self->{'translate_node'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nhx.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nhx.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/nhx.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,326 @@
+# $Id: nhx.pm,v 1.18.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::nhx
+#
+# Cared for by Aaron Mackey <amackey at virginia.edu>
+#
+# Copyright Aaron Mackey
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::nhx - TreeIO implementation for parsing 
+    Newick/New Hampshire eXtendend (NHX) format.
+
+=head1 SYNOPSIS
+
+  # do not use this module directly
+  use Bio::TreeIO;
+  my $treeio = new Bio::TreeIO(-format => 'nhx', -file => 'tree.dnd');
+  my $tree = $treeio->next_tree;
+
+=head1 DESCRIPTION
+
+This module handles parsing and writing of Newick/New Hampshire eXtended (NHX) format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted viax the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Aaron Mackey
+
+Email amackey-at-virginia.edu
+
+=head1 CONTRIBUTORS
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::nhx;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::Tree::NodeNHX;
+use Bio::Event::EventGeneratorI;
+#use XML::Handler::Subs;
+
+
+use base qw(Bio::TreeIO);
+
+sub _initialize {
+  my($self, %args) = @_;
+  $args{-nodetype} ||= 'Bio::Tree::NodeNHX';
+  $self->SUPER::_initialize(%args);
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree
+ Function: Gets the next tree in the stream
+ Returns : Bio::Tree::TreeI
+ Args    : none
+
+
+=cut
+
+sub next_tree{
+    my ($self) = @_;
+    local $/ = ";\n";
+    return unless $_ = $self->_readline;
+    s/\s+//g;
+    $self->debug("entry is $_\n");
+    my $chars = '';
+    $self->_eventHandler->start_document;
+    my ($prev_event,$lastevent,$last_leaf_event) = ('','','');
+    my @ch = split(//, $_);
+    foreach my $ch  (@ch) {
+	if( $ch eq ';' ) { 	   
+	    $self->_eventHandler->in_element('node') && 
+		$self->_eventHandler->end_element( {'Name' => 'node'});
+	    return $self->_eventHandler->end_document;
+	} elsif ($ch eq '[') {
+	    if ( length $chars ) {
+		if ( $lastevent eq ':' ) {
+		    $self->_eventHandler->start_element( { Name => 'branch_length' } );
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( { Name => 'branch_length' });
+		    $lastevent = $prev_event;
+		} else {
+		    $self->debug("id with no branchlength is $chars\n");
+		    $self->_eventHandler->start_element( { 'Name' => 'node' } );
+		    $self->_eventHandler->start_element( { 'Name' => 'id' } );
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( { 'Name' => 'id' } );		   
+		}
+	    } else {
+		$self->_eventHandler->start_element( { Name => 'node' } );
+	    }
+	    my $leafstatus = ( $last_leaf_event ne ')' ) ? 1 : 0;
+	    $self->_eventHandler->start_element({'Name' => 'leaf'});
+	    $self->_eventHandler->characters($leafstatus);
+	    $self->_eventHandler->end_element({'Name' => 'leaf'});	   
+	    $chars = '';
+	    
+	    $self->_eventHandler->start_element( { Name => 'nhx_tag' });
+	} elsif( $ch eq '(' ) {
+	    $chars = '';
+	    $self->_eventHandler->start_element( {'Name' => 'tree'} );
+	} elsif($ch eq ')' ) {
+	    if( length $chars ) {
+		if( $lastevent eq ':') {
+		    unless ($self->_eventHandler->within_element('nhx_tag')) {
+			$self->_eventHandler->start_element( { 'Name' => 'branch_length'});
+			$self->_eventHandler->characters($chars);
+			$self->_eventHandler->end_element( {'Name' => 'branch_length'});
+		    } else {
+			$self->throw("malformed input; end of node ) before ] found");
+		    }
+		} else { 
+		    $self->debug("id with no branchlength is '$chars'\n");
+		    $self->_eventHandler->start_element( { 'Name' => 'node' } );
+		    $self->_eventHandler->start_element( { 'Name' => 'id' } );
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( { 'Name' => 'id' } );
+		}
+	    } elsif ( $lastevent ne ']' ) {
+		$self->_eventHandler->start_element( {'Name' => 'node'} );
+	    }
+	    # problem here is that we need to detect if we coming up on
+	    # the end of a leaf node or a labeled internal node
+	    # each can have [] and each can have :, but only leaves are 
+	    # NOT proceeded by a ')'
+	    # the [] events throw us off
+	    my $leafstatus = ( $last_leaf_event ne ')' ) ? 1 : 0;
+	    $self->_eventHandler->start_element({'Name' => 'leaf'});
+	    $self->_eventHandler->characters($leafstatus);
+	    $self->_eventHandler->end_element({'Name' => 'leaf'});	   
+	    
+	    $self->_eventHandler->end_element( {'Name' => 'node'} );
+	    $self->_eventHandler->end_element( {'Name' => 'tree'} );
+	    $chars = '';
+	    $last_leaf_event = $ch;
+
+	} elsif ( $ch eq ',' ) {
+	    if( length $chars ) {
+		if( $lastevent eq ':' ) {
+		    $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( {'Name' => 'branch_length'});
+		    $lastevent = $prev_event;
+		} else { 
+		    $self->debug("id with no branchlength is $chars\n");
+		    $self->_eventHandler->start_element( { 'Name' => 'node' } );
+		    $self->_eventHandler->start_element( { 'Name' => 'id' } );
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( { 'Name' => 'id' } );
+		}   
+	    } elsif ( $lastevent ne ']' ) {
+		$self->_eventHandler->start_element( { 'Name' => 'node' } );
+	    }
+	    $self->_eventHandler->end_element( {'Name' => 'node'} );
+	    $chars = '';
+	    $last_leaf_event = $ch;
+	} elsif( $ch eq ':' ) {
+	    if ($self->_eventHandler->within_element('nhx_tag')) {
+		if ($lastevent eq '=') {
+		    $self->_eventHandler->start_element( { Name => 'tag_value' } );
+		    $self->_eventHandler->characters($chars);
+		    $self->_eventHandler->end_element( { Name => 'tag_value' } );
+		    $chars = '';
+		} else {
+		    if ($chars eq '&&NHX') {
+			$chars = ''; # get rid of &&NHX:
+		    } else {
+			$self->throw("Unrecognized, non \&\&NHX string: >>$chars<<");
+		    }
+		}
+	    } elsif ($lastevent ne ']') {
+		$self->debug("id with a branchlength coming is $chars\n");
+		$self->_eventHandler->start_element( { 'Name' => 'node' } );
+		$self->_eventHandler->start_element( { 'Name' => 'id' } );
+		$self->_eventHandler->characters($chars);
+		$self->_eventHandler->end_element( { 'Name' => 'id' } );
+		$chars = '';
+	    }
+	} elsif ( $ch eq '=' ) {
+	    if ($self->_eventHandler->within_element('nhx_tag')) {
+		$self->_eventHandler->start_element( { Name => 'tag_name' } );
+		$self->_eventHandler->characters($chars);
+		$self->_eventHandler->end_element( { Name => 'tag_name' } );
+		$chars = '';
+	    } else {
+		$chars .= $ch;
+	    }
+	} elsif ( $ch eq ']' ) {
+	    if ($self->_eventHandler->within_element('nhx_tag') && $lastevent eq '=') {
+		$self->_eventHandler->start_element( { Name => 'tag_value' } );
+		$self->_eventHandler->characters($chars);
+		$self->_eventHandler->end_element( { Name => 'tag_value' } );
+		$chars = '';
+		$self->_eventHandler->end_element( { Name => 'nhx_tag' } );
+	    } else {
+		$chars .= $ch;
+		next;
+	    }
+	} else { 	   
+	    $chars .= $ch;
+	    next;
+	}
+	$prev_event = $lastevent;
+	$lastevent = $ch;
+    }       
+    return;
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Write a tree out to data stream in nhx format
+ Returns : none
+ Args    : Bio::Tree::TreeI object
+
+=cut
+
+sub write_tree{
+    my ($self, at trees) = @_;
+    my $nl = $self->newline_each_node;
+   foreach my $tree ( @trees ) {
+       my @data = _write_tree_Helper($tree->get_root_node,$nl);
+       # per bug # 1471 do not include enclosing brackets.
+       # this is sort of cheating but it should work
+       # remove first and last paren if the set ends in a paren
+       if($data[-1] =~ s/\)$// ) {
+	   $data[0] =~ s/^\(//;
+       }
+       if( $nl ) {
+	   chomp($data[-1]);# remove last newline
+	   $self->_print(join(",\n", @data), ";\n");
+       } else {
+	   $self->_print(join(',', @data), ";\n");
+       }
+   }
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return;
+}
+
+sub _write_tree_Helper {
+    my ($node,$nl) = @_;
+    return () unless defined $node;
+    # rebless
+    $node = bless $node,'Bio::Tree::NodeNHX';
+    my @data;
+    
+    foreach my $n ( $node->each_Descendent() ) {
+	push @data, _write_tree_Helper($n,$nl);
+    }
+    
+    if( @data > 1 ) {
+	if( $nl ) {
+	    $data[0] = "(\n" . $data[0];
+	    $data[-1] .= ")\n";	
+	} else {
+	    $data[0] = "(" . $data[0];
+	    $data[-1] .= ")";
+	}
+
+	my $id = $node->id;
+	$data[-1] .= $id  if( defined $id );
+	my $blen  = $node->branch_length;
+	$data[-1] .= ":". $blen if $blen;	
+	# this is to not print out an empty NHX for the root node which is 
+	# a convience for how we get a handle to the whole tree
+	my @tags = $node->get_all_tags;
+	if( $node->ancestor || @tags ) {
+	    $data[-1] .= '[' . 
+		join(":", "&&NHX",
+		     map { "$_=" .join(',',$node->get_tag_values($_)) } 
+		     @tags ) . ']';
+	    
+	} else {
+	    if( $nl ) {
+		$data[0] = "(\n" . $data[0];
+		$data[-1] .= ")\n";	
+	    } else {
+		$data[0] = "(" . $data[0];
+		$data[-1] .= ")";
+	    }
+	}
+    } else { 
+	push @data, $node->to_string; # a leaf
+    }
+    return @data;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/pag.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/pag.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/pag.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,229 @@
+# $Id: pag.pm,v 1.9.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::pag
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::pag - Bio::TreeIO driver for Pagel format
+
+=head1 SYNOPSIS
+
+  use Bio::TreeIO;
+  my $in = Bio::TreeIO->new(-format => 'nexus',
+                            -file   => 't/data/adh.mb_tree.nexus');
+
+  my $out = Bio::TreeIO->new(-format => 'pag');
+  while( my $tree = $in->next_tree ) {
+    $out->write_tree($tree);
+  }
+
+=head1 DESCRIPTION
+
+Convert a Bio::TreeIO to Pagel format.
+More information here http://sapc34.rdg.ac.uk/meade/Mark/
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::pag;
+use vars qw($TaxonNameLen);
+use strict;
+
+$TaxonNameLen = 10;
+
+use base qw(Bio::TreeIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::pag();
+ Function: Builds a new Bio::TreeIO::pag object 
+ Returns : an instance of Bio::TreeIO::pag
+ Args    : -file/-fh for filename or filehandles
+
+=cut
+
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   :
+ Function: Write a tree out in Pagel format
+           Some options are only appropriate for bayesianmultistate and
+           the simpler output is only proper for discrete
+ Returns : none
+ Args    : -no_outgroups => (number)
+           -print_header => 0/1 (leave 0 for discrete, 1 for bayesianms)
+           -special_node => special node - not sure what they wanted to do here
+           -keep_outgroup => 0/1 (keep the outgroup node in the output)
+           -outgroup_ancestor => Bio::Tree::Node (if we want to exclude or include the outgroup this is what we operate on)
+           -tree_no       => a tree number label - only useful for BayesianMultistate
+
+
+=cut
+
+sub write_tree {
+    my ($self,$tree, at args) = @_;
+    my ($keep_outgroup,
+	$print_header,
+	$no_outgroups,
+	$special_node, 
+	$outgroup_ancestor,
+	$tree_no) = (0,0,1);
+    if( @args ) {
+	($no_outgroups,
+	 $print_header,
+	 $special_node, 
+	 $outgroup_ancestor,
+	 $tree_no,
+	 $keep_outgroup) = $self->_rearrange([qw(NO_OUTGROUPS
+						 PRINT_HEADER
+						 SPECIAL_NODE
+						 OUTGROUP_ANCESTOR
+						 TREE_NO
+						 KEEP_OUTGROUP)], at args);
+    }
+    my $newname_base = 1;
+
+    my $root = $tree->get_root_node;
+    my $eps = 0.0001;
+    my (%chars,%names);
+    my @nodes = $tree->get_nodes;
+    my $species_ct;
+    my $traitct;
+    for my $node ( @nodes ) {
+	if ((defined $special_node) && ($node eq $special_node)) {
+	    my $no_of_tree_nodes = scalar(@nodes);
+	    my $node_name = sprintf("N%d",$no_of_tree_nodes+1);
+	    $names{$node->internal_id} = $node_name;
+
+	} elsif ($node->is_Leaf) {
+	    $species_ct++;
+
+	    my $node_name = $node->id;
+	    if( length($node_name)> $TaxonNameLen ) {
+		$self->warn( "Found a taxon name longer than $TaxonNameLen letters, \n",
+			     "name will be abbreviated.\n");
+		$node_name = substr($node_name, 0,$TaxonNameLen);
+	    } else { 
+		# $node_name = sprintf("%-".$TaxonNameLen."s",$node_name);
+	    }
+	    $names{$node->internal_id} = $node_name;
+	    my @tags = sort $node->get_all_tags;
+	    my @charstates = map { ($node->get_tag_values($_))[0] } @tags;
+	    $traitct = scalar @charstates unless defined $traitct;
+	    $chars{$node->internal_id} = [@charstates];
+	} else {
+	    $names{$node->internal_id} = sprintf("N%d", $newname_base++);
+	}
+    }
+
+    # generate PAG representation
+    if( $print_header ) { 
+	if ($keep_outgroup) {
+	    $self->_print(sprintf("%d %d\n",$species_ct,$traitct));
+	} else {
+	    $self->_print( sprintf("%d %d\n",$species_ct-$no_outgroups,$traitct));
+	}
+    }
+
+    my @ancestors = ();
+    if ($keep_outgroup) {
+        push @ancestors, $root;
+    } else {
+	push @ancestors, ( $root, $outgroup_ancestor);
+    }
+    my @rest;
+    foreach my $node (@nodes) {
+        my $i = 0;
+        foreach my $anc (@ancestors) {
+            if ($node eq $anc) { $i = 1; last }
+        }
+        unless ($i > 0) {       # root not given in PAG
+            my $current_name = $names{$node->internal_id};
+	    my $branch_length_to_output;
+            if ($node->branch_length < $eps) {
+                my $msg_nodename = $current_name;
+                $msg_nodename =~ s/\s+$//;
+                warn( "TREE $tree_no, node \"$msg_nodename\": branch too ",
+		      "short (", $node->branch_length, "): increasing length to ",
+		      "$eps\n");
+                $branch_length_to_output = $eps;
+            } else {
+                $branch_length_to_output = $node->branch_length;
+            }
+	    my @line = ( $current_name,
+			 $names{$node->ancestor->internal_id},
+			 $branch_length_to_output);
+	    
+	    if ($node->is_Leaf) {		
+		push @line, @{$chars{$node->internal_id}};
+		$self->_print(join(',', @line),"\n");
+	    } else { 
+		push @rest, \@line;
+	    }
+        }
+    }
+    for ( @rest ) { 
+	$self->_print(join(',', @$_),"\n");
+    }
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   :
+ Function:
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub next_tree{
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/svggraph.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/svggraph.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/svggraph.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,214 @@
+#
+# BioPerl module for Bio::TreeIO::svg-graph
+#
+# Cared for by Allen Day <allenday at ucla.edu>
+#
+# Copyright Brian O'Connor
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::svggraph - A simple output format that converts a Tree object to an SVG output
+
+=head1 SYNOPSIS
+
+  use Bio::TreeIO;
+  my $in = new Bio::TreeIO(-file => 'input', -format => 'newick');
+  my $out = new Bio::TreeIO(-file => '>output', -format => 'svggraph');
+
+  while( my $tree = $in->next_tree ) {
+      my $svg_xml = $out->write_tree($tree);
+  }
+
+=head1 DESCRIPTION
+
+This outputs a tree as an SVG graphic using the SVG::Graph API
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Brian OConnor
+
+Email brian.oconnor-at-excite.com
+
+=head1 CONTRIBUTORS
+
+Allen Day
+Guillaume Rousse, Guillaume-dot-Rousse-at-inria-dot-fr
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::svggraph;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use SVG::Graph;
+use SVG::Graph::Data;
+use SVG::Graph::Data::Tree;
+use SVG::Graph::Data::Node;
+use Bio::Tree::TreeI;
+use Bio::Tree::Node;
+use Tree::DAG_Node;
+
+
+use base qw(Bio::TreeIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::svggraph();
+ Function: Builds a new Bio::TreeIO::svggraph object 
+ Returns : Bio::TreeIO::svggraph
+ Args    :-width    => image width (default 1600)
+          -height   => image height (default 1000)
+          -margin   => margin (default 30)
+          -stroke   => stroke color (default 'black')
+          -stroke_width=> stroke width (default 2)
+          -font_size=> font size (default '10px')
+          -nomalize => undef or 'log' (default is undef)
+
+=cut
+
+sub _initialize {
+    my $self = shift;
+    my ($width,$height,$margin,$stroke,
+	$stroke_width,$font_size,
+	$normalize) = $self->_rearrange([qw
+					 (WIDTH
+					  HEIGHT
+					  MARGIN
+					  STROKE
+					  STROKE_WIDTH
+					  FONT_SIZE
+					  NORMALIZE)],
+					@_);
+    $self->{_width}        = $width || 1600;
+    $self->{_height}       = $height || 1000;
+    $self->{_margin}       = defined $margin ? $margin : 30;
+    $self->{_stroke}       = $stroke || 'black';
+    $self->{_stroke_width} = $stroke_width || 2;
+    $self->{_font_size}    = $font_size || '10px';
+    $self->{_normalize}    = $normalize || '';
+    $self->SUPER::_initialize(@_);
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Write a tree out to data stream in newick/phylip format
+ Returns : none
+ Args    : Bio::Tree::TreeI object
+
+=cut
+
+sub write_tree{
+   my ($self,$tree) = @_;
+   my $line = $self->_write_tree_Helper($tree->get_root_node);
+   $self->_print($line. "\n");
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return;
+}
+
+sub _write_tree_Helper {
+   my ($self,$node) = @_;
+
+   my $graph = SVG::Graph->new
+       ('width'   => $self->{'_width'},
+	'height'  => $self->{'_height'},
+	'margin'  => $self->{'_margin'});
+   
+   my $group0 = $graph->add_frame;
+   my $tree = SVG::Graph::Data::Tree->new;
+   my $root = SVG::Graph::Data::Node->new;
+   $root->name($node->id);
+   $self->_decorateRoot($root, $node->each_Descendent());
+   $tree->root($root);
+   $group0->add_data($tree);
+
+   $group0->add_glyph('tree', 
+		      'stroke'      =>$self->{'_stroke'},
+		      'stroke-width'=>$self->{'_stroke_width'},
+		      'font-size'   =>$self->{'_font_size'});
+
+   return($graph->draw);
+}
+
+
+=head2 decorateRoot
+
+ Title   : _decorateRoot
+ Usage   : internal methods
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _decorateRoot {
+    my ($self,$previousNode, at children) = @_;
+    for my $child (@children) {
+	my $currNode = SVG::Graph::Data::Node->new;
+	$currNode->branch_label($child->id);
+	my $length = $child->branch_length;
+      CASE: 
+	{  # is this right? copies from Guillame
+	    if ($self->{_normalize} eq 'log') {
+		$length = log($length + 1);
+		last CASE;
+	    }
+	}
+	$currNode->branch_length($length);
+	$previousNode->add_daughter($currNode);
+	$self->_decorateRoot($currNode, $child->each_Descendent());
+    }
+}
+
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : 
+ Function: Sorry not possible with this format
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub next_tree{
+    $_[0]->throw("Sorry the format 'svggraph' can only be used as an output format");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/tabtree.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/tabtree.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO/tabtree.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,149 @@
+# $Id: tabtree.pm,v 1.10.4.1 2006/10/02 23:10:37 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO::tabtree
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::tabtree - A simple output format which displays a tree as an ASCII drawing
+
+=head1 SYNOPSIS
+
+  use Bio::TreeIO;
+  my $in = new Bio::TreeIO(-file => 'input', -format => 'newick');
+  my $out = new Bio::TreeIO(-file => '>output', -format => 'tabtree');
+
+  while( my $tree = $in->next_tree ) {
+      $out->write_tree($tree);
+  }
+
+=head1 DESCRIPTION
+
+This is a made up format just for outputting trees as an ASCII drawing.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::tabtree;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::TreeIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO::tabtree();
+ Function: Builds a new Bio::TreeIO::tabtree object 
+ Returns : Bio::TreeIO::tabtree
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class, at args) = @_;
+
+  my $self = $class->SUPER::new(@args);
+
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Write a tree out to data stream in newick/phylip format
+ Returns : none
+ Args    : Bio::Tree::TreeI object
+
+=cut
+
+sub write_tree{
+   my ($self,$tree) = @_;      
+   my $line = _write_tree_Helper($tree->get_root_node,"");
+   $self->_print($line. "\n");   
+   $self->flush if $self->_flush_on_write && defined $self->_fh;
+   return;
+}
+
+sub _write_tree_Helper {
+    my ($node,$indent) = @_;
+    return unless defined $node;
+
+    my @d = $node->each_Descendent();
+    my $line = "";
+    my ($i,$lastchild) = (0,scalar @d - 1);
+    for my $n ( @d ) {
+	if( $n->is_Leaf ) {
+	    $line .= sprintf("%s| \n%s\\-%s\n",
+			     $indent,$indent,$n->id || '');
+	} else { 
+	    $line .= sprintf("$indent|  %s\n",( $n->id ? 
+					       sprintf("(%s)",$n->id) : ''));
+	}
+	my $new_indent = $indent . (($i == $lastchild) ? "| " : "  ");
+	if( $n != $node ) {
+	    # avoid the unlikely case of cycles
+	    $line .= _write_tree_Helper($n,$new_indent);	
+	}
+    }
+    return $line;
+}
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : 
+ Function: Sorry not possible with this format
+ Returns : none
+ Args    : none
+
+
+=cut
+
+sub next_tree{
+    $_[0]->throw("Sorry the format 'tabtree' can only be used as an output format at this time");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/TreeIO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,301 @@
+# $Id: TreeIO.pm,v 1.20.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::TreeIO
+#
+# Cared for by Jason Stajich <jason at bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO - Parser for Tree files
+
+=head1 SYNOPSIS
+
+  {
+      use Bio::TreeIO;
+      my $treeio = new Bio::TreeIO('-format' => 'newick',
+  				   '-file'   => 'globin.dnd');
+      while( my $tree = $treeio->next_tree ) {
+  	  print "Tree is ", $tree->size, "\n";
+      }
+  }
+
+=head1 DESCRIPTION
+
+This is the driver module for Tree reading from data streams and
+flatfiles.  This is intended to be able to create Bio::Tree::TreeI
+objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO;
+use strict;
+
+# Object preamble - inherits from Bio::Root::Root
+
+use Bio::TreeIO::TreeEventBuilder;
+
+use base qw(Bio::Root::Root Bio::Root::IO Bio::Event::EventGeneratorI Bio::Factory::TreeFactoryI);
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::TreeIO();
+ Function: Builds a new Bio::TreeIO object 
+ Returns : Bio::TreeIO
+ Args    : a hash.  useful keys:
+   -format : Specify the format of the file.  Supported formats:
+
+     newick             Newick tree format
+     nexus              Nexus tree format
+     nhx                NHX tree format
+     svggraph           SVG graphical representation of tree
+     tabtree            ASCII text representation of tree
+     lintree            lintree output format
+
+=cut
+
+sub new {
+  my($caller, at args) = @_;
+  my $class = ref($caller) || $caller;
+    
+    # or do we want to call SUPER on an object if $caller is an
+    # object?
+    if( $class =~ /Bio::TreeIO::(\S+)/ ) {
+	my ($self) = $class->SUPER::new(@args);		
+	$self->_initialize(@args);
+	return $self;
+    } else { 
+
+	my %param = @args;
+	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+	my $format = $param{'-format'} || 
+	    $class->_guess_format( $param{'-file'} || $ARGV[0] ) ||
+	    'newick';
+	$format = "\L$format";	# normalize capitalization to lower case
+	
+	# normalize capitalization
+	return unless( $class->_load_format_module($format) );
+	return "Bio::TreeIO::$format"->new(@args);
+    }
+}
+
+
+=head2 next_tree
+
+ Title   : next_tree
+ Usage   : my $tree = $treeio->next_tree;
+ Function: Gets the next tree off the stream
+ Returns : Bio::Tree::TreeI or undef if no more trees
+ Args    : none
+
+=cut
+
+sub next_tree{
+   my ($self) = @_;
+   $self->throw("Cannot call method next_tree on Bio::TreeIO object must use a subclass");
+}
+
+=head2 write_tree
+
+ Title   : write_tree
+ Usage   : $treeio->write_tree($tree);
+ Function: Writes a tree onto the stream
+ Returns : none
+ Args    : Bio::Tree::TreeI
+
+
+=cut
+
+sub write_tree{
+   my ($self,$tree) = @_;
+   $self->throw("Cannot call method write_tree on Bio::TreeIO object must use a subclass");
+}
+
+
+=head2 attach_EventHandler
+
+ Title   : attach_EventHandler
+ Usage   : $parser->attatch_EventHandler($handler)
+ Function: Adds an event handler to listen for events
+ Returns : none
+ Args    : Bio::Event::EventHandlerI
+
+=cut
+
+sub attach_EventHandler{
+    my ($self,$handler) = @_;
+    return if( ! $handler );
+    if( ! $handler->isa('Bio::Event::EventHandlerI') ) {
+	$self->warn("Ignoring request to attatch handler ".ref($handler). ' because it is not a Bio::Event::EventHandlerI');
+    }
+    $self->{'_handler'} = $handler;
+    return;
+}
+
+=head2 _eventHandler
+
+ Title   : _eventHandler
+ Usage   : private
+ Function: Get the EventHandler
+ Returns : Bio::Event::EventHandlerI
+ Args    : none
+
+
+=cut
+
+sub _eventHandler{
+   my ($self) = @_;
+   return $self->{'_handler'};
+}
+
+sub _initialize {
+    my($self, @args) = @_;
+    $self->{'_handler'} = undef;
+    ($self->{'newline_each_node'}) = $self->_rearrange
+	([qw(NEWLINE_EACH_NODE)], at args);
+    
+    # initialize the IO part
+    $self->_initialize_io(@args);
+    $self->attach_EventHandler(Bio::TreeIO::TreeEventBuilder->new
+			       (-verbose => $self->verbose(), @args));
+}
+
+=head2 _load_format_module
+
+ Title   : _load_format_module
+ Usage   : *INTERNAL TreeIO stuff*
+ Function: Loads up (like use) a module at run time on demand
+ Example :
+ Returns :
+ Args    :
+
+=cut
+
+sub _load_format_module {
+  my ($self,$format) = @_;
+  my $module = "Bio::TreeIO::" . $format;
+  my $ok;
+  
+  eval {
+      $ok = $self->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$self: $format cannot be found
+Exception $@
+For more information about the TreeIO system please see the TreeIO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 newline_each_node
+
+ Title   : newline_each_node
+ Usage   : $obj->newline_each_node($newval)
+ Function: Get/set newline each node flag which is only applicable
+           for writing tree formats for nhx and newick, will
+           print a newline after each node or paren
+ Returns : value of newline_each_node (boolean)
+ Args    : on set, new value (a boolean or undef, optional)
+
+
+=cut
+
+sub newline_each_node{
+    my $self = shift;
+    return $self->{'newline_each_node'} = shift if @_;
+    return $self->{'newline_each_node'};
+}
+
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'newick'   if /\.(dnd|newick|nh)$/i;
+   return 'nhx'   if /\.(nhx)$/i;
+   return 'phyloxml' if /\.(xml)$/i;
+   return 'svggraph' if /\.svg$/i;
+   return 'lintree'  if( /\.(lin|lintree)$/i );
+}
+
+sub DESTROY {
+    my $self = shift;
+
+    $self->close();
+}
+
+sub TIEHANDLE {
+  my $class = shift;
+  return bless {'treeio' => shift},$class;
+}
+
+sub READLINE {
+  my $self = shift;
+  return $self->{'treeio'}->next_tree() unless wantarray;
+  my (@list,$obj);
+  push @list,$obj  while $obj = $self->{'treeio'}->next_tree();
+  return @list;
+}
+
+sub PRINT {
+  my $self = shift;
+  $self->{'treeio'}->write_tree(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/UpdateableSeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/UpdateableSeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/UpdateableSeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+# $Id: UpdateableSeqI.pm,v 1.7.4.1 2006/10/02 23:10:12 sendu Exp $
+#
+# BioPerl module for Bio::UpdateableSeqI
+#
+# Cared for by David Block <dblock at gene.pbi.nrc.ca>
+#
+# Copyright David Block
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::UpdateableSeqI - Descendant of Bio::SeqI that allows updates
+
+=head1 SYNOPSIS
+
+See Bio::SeqI for most of the documentation.
+See the documentation of the methods for further details.
+
+=head1 DESCRIPTION
+
+Bio::UpdateableSeqI is an interface for Sequence objects which are
+expected to allow users to perform basic editing functions (update/delete)
+on their component SeqFeatures.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - David Block
+
+Email dblock at gene.pbi.nrc.ca
+
+=head1 CONTRIBUTORS
+
+Ewan Birney forced me to this...
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::UpdateableSeqI;
+use strict;
+use Carp;
+
+# Object preamble - inherits from Bio::Root::Root
+
+
+
+use base qw(Bio::SeqI);
+
+
+=head2 delete_feature
+
+ Title   : delete_feature
+ Usage   : my $orphanlist=$self->delete_feature($feature,$transcript,$gene);
+ Function: deletes the specified $feature from the given transcript, if $transcript is sent and exists and $feature is a feature of $transcript,
+           or from $gene if the $feature is a feature of $gene, or from $self if $transcript and $gene are not sent.  Keeps track of the features
+           of the $gene object that may be left as orphans and returns them as a listref.
+ Example : I want to delete transcript 'abc' of gene 'def', with three exons, leaving only transcript 'ghi' with two exons.
+           This will leave exons 1 and 3 part of 'ghi', but exon 2 will become an orphan.
+           my $orphanlist=$seq->delete_feature($transcript{'abc'},undef,$gene{'def'});
+           $orphanlist is a reference to a list containing $exon{'2'};
+ Returns : a listref of orphaned features after the deletion of $feature (optional)
+ Args    : $feature - the feature to be deleted
+           $transcript - the transcript containing the $feature, so that a $feature can be removed from only one transcript when there are multiple
+                         transcripts in a gene.
+           $gene - the gene containing the $transcript and/or the $feature
+
+
+=cut
+
+sub delete_feature{
+   my ($self,$feature,$transcript,$gene) = @_;
+
+   $self->throw_not_implemented();
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAChange.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAChange.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAChange.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,469 @@
+# $Id: AAChange.pm,v 1.20.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::AAChange
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::AAChange - Sequence change class for polypeptides
+
+=head1 SYNOPSIS
+
+   $aamut = Bio::Variation::AAChange->new
+       ('-start'         => $start,
+ 	'-end'           => $end,
+ 	'-length'        => $len,
+ 	'-proof'         => $proof,
+ 	'-isMutation'    => 1,
+ 	'-mut_number'    => $mut_number
+ 	);
+
+   my $a1 = Bio::Variation::Allele->new;
+   $a1->seq($ori) if $ori;
+   $aamut->allele_ori($a1);
+   my $a2 = Bio::Variation::Allele->new;
+   $a2->seq($mut) if $mut;
+   $aachange->add_Allele($a2);
+   $aachange->allele_mut($a2);
+
+   print  "\n";
+
+   # add it to a SeqDiff container object
+   $seqdiff->add_Variant($rnachange);
+
+   # and create links to and from RNA level variant objects
+   $aamut->RNAChange($rnachange);
+   $rnachange->AAChange($rnachange);
+
+=head1 DESCRIPTION
+
+The instantiable class Bio::Variation::RNAChange describes basic
+sequence changes at polypeptide  level. It uses methods defined in
+superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
+for details.
+
+If the variation described by a AAChange object has a known
+Bio::Variation::RNAAChange object, create the link with method
+AAChange(). See L<Bio::Variation::AAChange> for more information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::AAChange;
+
+use vars qw($MATRIX);
+use strict;
+
+# Object preamble - inheritance
+
+use base qw(Bio::Variation::VariantI);
+
+BEGIN {
+
+my $matrix = << "__MATRIX__";
+#  Matrix made by matblas from blosum62.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 62
+#  Entropy =   0.6979, Expected =  -0.5209
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4
+R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4
+N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4
+D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4
+C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
+Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4
+E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4
+G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4
+H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4
+I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4
+L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4
+K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4
+M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4
+F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4
+S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4
+Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4
+V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4
+B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4
+Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4
+X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1
+__MATRIX__
+
+    my %blosum = ();
+    $matrix =~ /^ +(.+)$/m;
+    my @aas = split / +/, $1;
+    foreach my $aa (@aas) {
+	my $tmp = $aa;
+	$tmp = "\\$aa" if $aa eq '*';
+	$matrix =~ /^($tmp) +([-+]?\d.*)$/m;
+	my @scores = split / +/, $2 if defined $2;
+	my $count = 0;
+	foreach my $ak (@aas) {
+	    $blosum{$aa}->{$aas[$count]} = $scores[$count];
+	    $count++;
+	}
+    }
+    sub _matrix;
+    $MATRIX = \%blosum;
+}
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($start, $end, $length, $strand, $primary, $source,
+	$frame, $score, $gff_string,
+	$allele_ori,  $allele_mut,  $upstreamseq,  $dnstreamseq,
+	$label,  $status,  $proof,  $re_changes,  $region, $region_value,
+        $region_dist,
+	$numbering,  $mut_number,  $ismutation) =
+	    $self->_rearrange([qw(START
+				  END
+				  LENGTH
+				  STRAND
+				  PRIMARY
+				  SOURCE
+				  FRAME
+				  SCORE
+				  GFF_STRING
+				  ALLELE_ORI
+				  ALLELE_MUT
+				  UPSTREAMSEQ
+				  DNSTREAMSEQ
+				  LABEL
+				  STATUS
+				  PROOF
+				  RE_CHANGES
+				  REGION
+				  REGION_VALUE
+				  REGION_DIST
+				  NUMBERING
+				  MUT_NUMBER
+				  ISMUTATION
+				  )], at args);
+
+    $self->primary_tag("Variation");
+
+    $self->{ 'alleles' } = [];
+
+    $start && $self->start($start);
+    $end   && $self->end($end);
+    $length && $self->length($length);
+    $strand && $self->strand($strand);
+    $primary && $self->primary_tag($primary);
+    $source  && $self->source_tag($source);
+    $frame   && $self->frame($frame);
+    $score   && $self->score($score);
+    $gff_string && $self->_from_gff_string($gff_string);
+
+    $allele_ori && $self->allele_ori($allele_ori);
+    $allele_mut  && $self->allele_mut($allele_mut);
+    $upstreamseq  && $self->upstreamseq($upstreamseq);
+    $dnstreamseq  && $self->dnstreamseq($dnstreamseq);
+
+    $label  && $self->label($label);
+    $status  && $self->status($status);
+    $proof && $self->proof($proof);
+    $region  && $self->region($region);
+    $region_value  && $self->region_value($region_value);
+    $region_dist  && $self->region_dist($region_dist);
+    $numbering && $self->numbering($numbering);
+    $mut_number && $self->mut_number($mut_number);
+    $ismutation && $self->isMutation($ismutation);
+
+    return $self; # success - we hope!
+}
+
+=head2 RNAChange
+
+ Title   : RNAChange
+ Usage   : $mutobj = $self->RNAChange;
+         : $mutobj = $self->RNAChange($objref);
+ Function: Returns or sets the link-reference to a mutation/change object.
+           If there is no link, it will return undef
+ Returns : an obj_ref or undef
+
+=cut
+
+sub RNAChange {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::RNAChange') ) {
+	  $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
+	  return;
+      }
+      else {
+	  $self->{'RNAChange'} = $value;
+      }
+  }
+  unless (exists $self->{'RNAChange'}) {
+      return;
+  } else {
+      return $self->{'RNAChange'};
+  }
+}
+
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function:
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable subclass of L<Bio::Variation::VariantI> needs
+            to implement this method. Valid values are listed in
+            'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub label {
+    my ($self) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+
+    if ($self->start == 1 ) {
+	if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
+	    $type = 'no translation';
+	}
+	elsif ($o and $m and $o eq $m ) {
+	    $type = 'silent';
+	}
+	# more ...
+    }
+    elsif ($o and substr($o, 0, 1) eq '*' ) {
+	if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
+	    $type = 'post-elongation';
+	}
+	elsif ($m and $o eq $m ) {
+	    $type = 'silent, conservative';
+	}
+    }
+    elsif ($o and $m and $o eq $m) {
+	$type = 'silent, conservative';
+    }
+    elsif ($m and $m eq '*') {
+	$type = 'truncation';
+    }
+    elsif ($o and $m and $o eq $m) {
+	$type = 'silent, conservative';
+    }
+    elsif (not $m or
+	   ($o and $m and  length($o) > length($m) and
+	    substr($m, -1, 1) ne '*')) {
+	$type = 'deletion';
+	if ($o and $m and $o !~ $m and $o !~ $m) {
+	    $type .= ', complex';
+	}
+    }
+    elsif (not $o or
+	   ($o and $m and length($o) < length($m) and
+	    substr($m, -1, 1) ne '*' ) ) {
+	$type = 'insertion';
+	if ($o and $m and $o !~ $m and $o !~ $m) {
+	    $type .= ', complex';
+	}
+    }
+    elsif  ($o and $m and $o ne $m and
+	    length $o == 1 and  length $m  == 1 ) {
+	$type = 'substitution';
+	my $value = $self->similarity_score;
+	if (defined $value) {
+	    my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
+	    $type .= ", ". $cons;
+	}
+    } else {
+	$type = 'out-of-frame translation, truncation';
+    }
+    $self->{'label'} = $type;
+    return $self->{'label'};
+}
+
+
+=head2 similarity_score
+
+ Title   : similarity_score
+ Usage   : $self->similarity_score
+ Function: Measure for evolutionary conservativeness
+           of single amino substitutions. Uses BLOSUM62.
+           Negative numbers are noncoservative changes.
+ Returns : integer, undef if not single amino acid change
+
+=cut
+
+sub similarity_score {
+    my ($self) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+    return unless $o and $m and length $o == 1 and length $m == 1;
+    return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
+	$m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
+    return $MATRIX->{"\U$o"}->{"\U$m"};
+}
+
+=head2 trivname
+
+ Title   : trivname
+ Usage   : $self->trivname
+ Function: 
+
+           Given a Bio::Variation::AAChange object with linked
+           Bio::Variation::RNAChange and Bio::Variation::DNAMutation
+           objects, this subroutine creates a string corresponding to
+           the 'trivial name' of the mutation. Trivial name is
+           specified in Antonorakis & MDI Nomenclature Working Group:
+           Human Mutation 11:1-3, 1998.
+           http://www3.interscience.wiley.com/cgi-bin/abstract/5001291/ABSTRACT
+
+ Returns : string
+
+=cut
+
+
+sub trivname {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'trivname'} = $value;
+    } else {
+	my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
+	    ('', '', '', '', '');
+	my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+	#my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+
+	$aaori = substr ($o, 0, 1) if $o;
+	$aaori =~ tr/\*/X/;
+
+	my $sep;
+	if ($self->isMutation) {
+	    $sep = '>';
+	} else {
+	    $sep = '|';
+	}
+	my $trivname = $aaori. $self->start;
+	$trivname .= $sep if $sep eq '|';
+
+	my @alleles = $self->each_Allele;
+	foreach my $allele (@alleles) {
+	    my $m = $allele->seq if $allele->seq;
+
+	    $self->allele_mut($allele);
+	    #$trivname .=  $sep. uc $m if $m;
+
+	    $aamutterm = substr ($m, -1, 1) if $m;
+	    if ($self->RNAChange->label =~ /initiation codon/ and
+		( $o and $m and $o ne $m)) {
+		$aamut = 'X';
+	    }
+	    elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
+		$aamutsymbol = '';
+		$aamut = $aamutterm;
+	    }
+	    elsif ($self->RNAChange->label =~ /deletion/) {
+		$aamutsymbol = 'del';
+		if ($aamutterm eq '*') {
+		    $aatermnumber = $self->start + length($m) -1;
+		    $aamut = 'X'. $aatermnumber;
+		}
+		if ($self->RNAChange  && $self->RNAChange->label =~ /inframe/){
+		    $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
+		}
+	    }
+	    elsif ($self->RNAChange->label =~ /insertion/) {
+		$aamutsymbol = 'ins';
+		if (($aamutterm eq '*') && (length($m)-1 != 0)) {
+		    $aatermnumber = $self->start + length($m)-1;
+		    $aamut =  $aatermnumber. 'X';
+		}
+		if ($self->RNAChange->label =~ /inframe/){
+		    $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
+		}
+	    }
+	    elsif ($self->RNAChange->label =~ /complex/ ) {
+		my $diff = length($m) - length($o);
+		if ($diff >= 0 ) {
+		    $aamutsymbol = 'ins';
+		} else {
+		    $aamutsymbol = 'del' ;
+		}
+		if (($aamutterm eq '*') && (length($m)-1 != 0)) {
+		    $aatermnumber = $self->start + length($m)-1;
+		    $aamut =  $aatermnumber. 'X';
+		}
+		if ($self->RNAChange->label =~ /inframe/){
+
+		    if ($diff >= 0 ) {
+			$aamut = '+'. $diff ;
+		    } else {
+			$aamut =  $diff ;
+		    }
+		}
+	    }
+	    elsif ($self->label =~ /truncation/) {
+		$aamut = $m;
+	    } else {
+		$aamutsymbol = '';
+		$aamut = $aamutterm;
+	    }
+	    $aamut =~ tr/\*/X/;
+	    $trivname .= $aamutsymbol. $aamut. $sep;
+	}
+	chop $trivname;
+	$self->{'trivname'} = $trivname;
+    }
+    return $self->{'trivname'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAReverseMutate.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAReverseMutate.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/AAReverseMutate.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,294 @@
+# $Id: AAReverseMutate.pm,v 1.12.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::AAReverseMutate
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::AAReverseMutate - point mutation and codon
+  information from single amino acid changes
+
+=head1 SYNOPSIS
+
+    $aamut = new Bio::Variation::AAReverseMutate
+                 (-aa_ori => 'F', 
+		  -aa_mut => 'S',
+		  -codon_ori => 'ttc', # optional
+		  -codon_table => '3'  # defaults to 1
+		  );
+
+   @points = $aamut->each_Variant;
+
+   if (scalar @points > 0 ) {
+       foreach $rnachange ( @points ) {
+   	   # $rnachange is a Bio::Variation::RNAChange object
+   	   print " ", $rnachange->allele_ori->seq, ">", 
+	   $rnachange->allele_mut->seq, " in ",
+   	   $rnachange->codon_ori, ">", $rnachange->codon_mut,
+   	   " at position ", $rnachange->codon_pos, "\n";
+       }
+   } else  {
+       print "No point mutations possible\n",
+   }
+
+=head1 DESCRIPTION
+
+Bio::Variation::AAReverseMutate objects take in reference and mutated
+amino acid information and deduces potential point mutations at RNA
+level leading to this change. The choice can be further limited by
+letting the object know what is the the codon in the reference
+sequence. The results are returned as L<Bio::Variation::RNAChange>
+objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Variation::AAReverseMutate;
+
+use strict;
+
+# Object preamble - inheritance
+use Bio::Tools::CodonTable;
+use Bio::Variation::RNAChange;
+use Bio::Variation::Allele;
+
+use base qw(Bio::Root::Root);
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($aa_ori, $aa_mut, $codon_ori, $codon_table) =
+	$self->_rearrange([qw(AA_ORI
+			      AA_MUT
+			      CODON
+			      CODON_TABLE
+			      )], at args);
+
+  $aa_ori && $self->aa_ori($aa_ori);
+  $aa_mut  && $self->aa_mut($aa_mut);
+  $codon_ori  && $self->codon_ori($codon_ori);
+  $codon_table && $self->codon_table($codon_table);
+
+  return $self; # success - we hope!
+
+}
+
+
+=head2 aa_ori
+
+ Title   : aa_ori
+ Usage   : $obj->aa_ori();
+ Function: 
+
+            Sets and returns original aa sequence.  If value is not
+            set, returns false. 
+
+            Amino acid sequences are stored in upper case characters,
+            others in lower case.
+
+ Example : 
+ Returns : string
+ Args    : single character amino acid code
+
+=cut
+
+sub aa_ori {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) {
+	    $self->throw("'$value' is not a valid one letter amino acid symbol\n");
+	} else {
+	    $self->{'aa_ori'} = uc $value;
+	}
+    }
+    return $self->{'aa_ori'};    
+}
+
+
+=head2 aa_mut
+
+ Title   : aa_mut
+ Usage   : $obj->aa_mut();
+ Function: 
+
+            Sets and returns the mutated allele sequence.  If value is not
+            set, returns false. 
+
+ Example : 
+ Returns : string
+ Args    : single character amino acid code
+
+=cut
+
+
+sub aa_mut {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  uc($value) !~ /^[ARNDCQEGHILKMFPSTWYVBZX*]$/ ) {
+	    $self->throw("'$value' is not a valid one letter amino acid symbol\n");
+	} else {
+	    $self->{'aa_mut'} = uc $value;
+	}
+    }
+    return $self->{'aa_mut'};
+}
+
+
+=head2 codon_ori
+
+ Title   : codon_ori
+ Usage   : $obj->codon_ori();
+ Function: 
+
+            Sets and returns codon_ori triplet.  If value is not set,
+            returns false.  The string has to be three characters
+            long. The chracter content is not checked.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub codon_ori {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (length $value != 3 or lc $value =~ /[^atgc]/) {
+	    $self->warn("Codon string \"$value\" is not valid unique codon");
+	}
+	$self->{'codon_ori'} = lc $value;
+    }
+    return $self->{'codon_ori'};
+}
+
+=head2 codon_table
+
+ Title   : codon_table
+ Usage   : $obj->codon_table();
+ Function: 
+
+            Sets and returns the codon table id of the RNA
+            If value is not set, returns 1, 'universal' code, as the default.
+
+ Example : 
+ Returns : integer
+ Args    : none if get, the new value if set
+
+=cut
+
+
+sub codon_table {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  not $value =~ /^\d+$/ ) {
+	    $self->throw("'$value' is not a valid codon table ID\n".
+			"Has to be a positive integer. Defaulting to 1\n");
+	} else {
+	    $self->{'codon_table'} = $value;
+	}
+    }
+    if( ! exists $self->{'codon_table'} ) {
+	return 1;
+    } else {
+	return $self->{'codon_table'};
+    }
+}
+
+
+=head2 each_Variant
+
+ Title   : each_Variant
+ Usage   : $obj->each_Variant();
+ Function: 
+
+            Returns a list of Variants.
+
+ Example : 
+ Returns : list of Variants
+ Args    : none
+
+=cut
+
+sub each_Variant{
+   my ($self, at args) = @_;
+   
+   $self->throw("aa_ori is not defined\n") if not defined $self->aa_ori;
+   $self->throw("aa_mut is not defined\n") if not defined $self->aa_mut;
+
+   my (@points, $codon_pos, $allele_ori, $allele_mut);
+   my $ct  = Bio::Tools::CodonTable->new( '-id' => $self->codon_table );
+   foreach my $codon_ori ($ct->revtranslate($self->aa_ori)) {
+       next if $self->codon_ori and $self->codon_ori ne $codon_ori;
+       foreach my $codon_mut ($ct->revtranslate($self->aa_mut)) {
+	   my $k = 0;
+	   my $length = 0; 
+	   $codon_pos = $allele_ori = $allele_mut = undef;
+	   while ($k<3) {
+	       my $nt_ori = substr ($codon_ori, $k, 1);
+	       my $nt_mut = substr ($codon_mut, $k, 1);
+	       if ($nt_ori ne $nt_mut) {
+		   $length++;
+		   $codon_pos = $k+1;
+		   $allele_ori = $nt_ori;
+		   $allele_mut = $nt_mut;
+	       }
+	       $k++;
+	   }
+	   if ($length == 1) {
+	       my $rna = Bio::Variation::RNAChange->new 
+		   ('-length'        => '1',     
+		    '-codon_ori'     => $codon_ori,
+		    '-codon_mut'     => $codon_mut,
+		    '-codon_pos'     => $codon_pos,
+		    '-isMutation'    => 1
+		    );
+	       my $all_ori = Bio::Variation::Allele->new('-seq'=>$allele_ori);
+	       $rna->allele_ori($all_ori);
+	       my $all_mut = Bio::Variation::Allele->new('-seq'=>$allele_mut);
+	       $rna->allele_mut($all_mut);
+	       push @points, $rna;
+	   }
+       }
+   }
+   return @points;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/Allele.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/Allele.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/Allele.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,283 @@
+# $Id: Allele.pm,v 1.15.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::Allele
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::Allele - Sequence object with allele-specific attributes
+
+=head1 SYNOPSIS
+
+  $allele1 = Bio::Variation::Allele->new ( -seq => 'A',
+                                           -id  => 'AC00001.1',
+                                           -alphabet => 'dna',
+                                           -is_reference => 1
+                                         );
+
+=head1 DESCRIPTION
+
+List of alleles describe known sequence alternatives in a variable region.
+Alleles are contained in Bio::Variation::VariantI complying objects.
+See L<Bio::Variation::VariantI> for details.
+
+Bio::Varation::Alleles are PrimarySeqI complying objects which can
+contain database cross references as specified in
+Bio::DBLinkContainerI interface, too.
+
+A lot of the complexity with dealing with Allele objects are caused by
+null alleles; Allele objects that have zero length sequence string.
+
+In addition describing the allele by its sequence , it possible to
+give describe repeat structure within the sequence. This done using
+methods repeat_unit (e.g. 'ca') and repeat_count (e.g. 7).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Variation::Allele;
+
+use strict;
+
+# Object preamble - inheritance
+
+
+use base qw(Bio::PrimarySeq Bio::DBLinkContainerI);
+
+sub new {
+    my($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($is_reference, $repeat_unit, $repeat_count) =
+	   $self->_rearrange([qw(IS_REFERENCE
+				 REPEAT_UNIT
+				 REPEAT_COUNT
+				 )],
+			     @args);
+
+    $is_reference && $self->is_reference($is_reference);
+    $repeat_unit && $self->repeat_unit($repeat_unit);
+    $repeat_count && $self->repeat_count($repeat_count);
+
+    return $self; # success - we hope!
+}
+
+
+=head2 is_reference
+
+ Title   : is_reference
+ Usage   : $obj->is_reference()
+ Function: sets and returns boolean values. 
+           Unset values return false.
+ Example : $obj->is_reference()
+ Returns : boolean
+ Args    : optional true of false value
+
+
+=cut
+
+
+sub is_reference {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$value ? ($value = 1) : ($value = 0);
+	$self->{'is_reference'} = $value;
+    }
+    if( ! exists $self->{'is_reference'} ) {
+	return 0;
+    } 
+    else {
+	return $self->{'is_reference'};
+    }
+}
+
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   : $self->add_DBLink($ref)
+ Function: adds a link object
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+
+sub add_DBLink{
+   my ($self,$com) = @_;
+   if( ! $com->isa('Bio::Annotation::DBLink') ) {
+       $self->throw("Is not a link object but a  [$com]");
+   }
+   push(@{$self->{'link'}},$com);
+}
+
+=head2 each_DBLink
+
+ Title   : each_DBLink
+ Usage   : foreach $ref ( $self->each_DBlink() )
+ Function: gets an array of DBlink of objects
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_DBLink{
+   my ($self) = @_;   
+   return @{$self->{'link'}}; 
+}
+
+=head2 repeat_unit
+
+ Title   : repeat_unit
+ Usage   : $obj->repeat_unit('ca');
+ Function: 
+
+            Sets and returns the sequence of the repeat_unit the
+            allele is composed of.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub repeat_unit {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'repeat_unit'} = $value;
+    }
+    if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) {
+	$self->warn("Repeats do not add up!") 
+	    if ( $self->{'repeat_unit'} x $self->{'repeat_count'})  ne $self->{'seq'};
+    }
+    return $self->{'repeat_unit'};
+}
+
+=head2 repeat_count
+
+ Title   : repeat_count
+ Usage   : $obj->repeat_count();
+ Function: 
+
+            Sets and returns the number of repeat units in the allele.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub repeat_count {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  not $value =~ /^\d+$/ ) {
+	    $self->throw("[$value] for repeat_count has to be a positive integer\n");
+	} else {
+	    $self->{'repeat_count'} = $value;
+	}
+    }
+    if ($self->{'seq'} && $self->{'repeat_unit'} && $self->{'repeat_count'} ) {
+	$self->warn("Repeats do not add up!") 
+	    if ( $self->{'repeat_unit'} x $self->{'repeat_count'})  ne $self->{'seq'};
+    }
+    return $self->{'repeat_count'};
+}
+
+=head2 count
+
+ Title   : count
+ Usage   : $obj->count();
+ Function: 
+
+            Sets and returns the number of times this allele was observed.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub count {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  not $value =~ /^\d+$/ ) {
+	    $self->throw("[$value] for count has to be a positive integer\n");
+	} else {
+	    $self->{'count'} = $value;
+	}
+    }
+    return $self->{'count'};
+}
+
+
+=head2 frequency
+
+ Title   : frequency
+ Usage   : $obj->frequency();
+ Function: 
+
+            Sets and returns the frequency of the allele in the observed
+            population.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub frequency {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  not $value =~ /^\d+$/ ) {
+	    $self->throw("[$value] for frequency has to be a positive integer\n");
+	} else {
+	    $self->{'frequency'} = $value;
+	}
+    }
+    return $self->{'frequency'};
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/DNAMutation.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/DNAMutation.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/DNAMutation.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,385 @@
+# $Id: DNAMutation.pm,v 1.18.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::DNAMutation
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::DNAMutation - DNA level mutation class
+
+=head1 SYNOPSIS
+
+    $dnamut = Bio::Variation::DNAMutation->new
+        ('-start'         => $start,
+         '-end'           => $end,
+         '-length'        => $len,
+         '-upStreamSeq'   => $upflank,
+         '-dnStreamSeq'   => $dnflank,
+         '-proof'         => $proof,
+	 '-isMutation'    => 1,
+         '-mut_number'    => $mut_number
+        );
+    $a1 = Bio::Variation::Allele->new;
+    $a1->seq('a');
+    $dnamut->allele_ori($a1);
+    my $a2 = Bio::Variation::Allele->new;
+    $a2->seq('t');
+    $dnamut->add_Allele($a2);
+
+    print "Restriction changes are ", $dnamut->restriction_changes, "\n";
+
+    # add it to a SeqDiff container object
+    $seqdiff->add_Variant($dnamut);
+
+
+=head1 DESCRIPTION
+
+The instantiable class Bio::Variation::DNAMutation describes basic
+sequence changes in genomic DNA level. It uses methods defined in
+superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
+for details.
+
+If the variation described by a DNAMutation object is transcibed, link
+the corresponding Bio::Variation::RNAChange object to it using
+method RNAChange(). See L<Bio::Variation::RNAChange> for more information.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::DNAMutation;
+use strict;
+
+# Object preamble - inheritance
+
+use base qw(Bio::Variation::VariantI);
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+    
+    my ($start, $end, $length, $strand, $primary, $source, 
+	$frame, $score, $gff_string,
+	$allele_ori,  $allele_mut,  $upstreamseq,  $dnstreamseq,  
+	$label,  $status,  $proof,  $region, $region_value, $region_dist, $numbering, 
+	$cpg, $mut_number, $ismutation) =
+	    $self->_rearrange([qw(START
+				  END
+				  LENGTH
+				  STRAND
+				  PRIMARY
+				  SOURCE
+				  FRAME
+				  SCORE
+				  GFF_STRING
+				  ALLELE_ORI
+				  ALLELE_MUT
+				  UPSTREAMSEQ
+				  DNSTREAMSEQ
+				  LABEL
+				  STATUS
+				  PROOF
+				  REGION
+				  REGION_VALUE
+				  REGION_DIST
+				  NUMBERING
+				  CPG
+				  MUT_NUMBER
+				  ISMUTATION
+				  )],
+			      @args);
+
+    $self->primary_tag("Variation");
+
+    $self->{ 'alleles' } = [];
+
+    $start && $self->start($start);
+    $end   && $self->end($end);
+    $length && $self->length($length);
+    $strand && $self->strand($strand);
+    $primary && $self->primary_tag($primary);
+    $source  && $self->source_tag($source);
+    $frame   && $self->frame($frame);
+    $score   && $self->score($score);
+    $gff_string && $self->_from_gff_string($gff_string);
+    
+    $allele_ori && $self->allele_ori($allele_ori);
+    $allele_mut  && $self->allele_mut($allele_mut);
+    $upstreamseq  && $self->upStreamSeq($upstreamseq);
+    $dnstreamseq  && $self->dnStreamSeq($dnstreamseq);
+    
+    $label  && $self->label($label);
+    $status  && $self->status($status);
+    $proof && $self->proof($proof);
+    $region  && $self->region($region);
+    $region_value  && $self->region_value($region_value);
+    $region_dist  && $self->region_dist($region_dist);
+    $numbering && $self->numbering($numbering);
+    $mut_number && $self->mut_number($mut_number);
+    $ismutation && $self->isMutation($ismutation);
+
+    $cpg && $self->CpG($cpg);
+    
+    return $self; # success - we hope!
+}
+
+
+=head2 CpG
+
+ Title   : CpG
+ Usage   : $obj->CpG()
+ Function: sets and returns boolean values for variation 
+           hitting a CpG site.  Unset value return -1.
+ Example : $obj->CpG()
+ Returns : boolean
+ Args    : optional true of false value
+
+
+=cut
+
+
+sub CpG {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+       $value ? ($value = 1) : ($value = 0);
+       $obj->{'cpg'} = $value;
+   }
+    elsif (not defined $obj->{'label'}) {
+	$obj->{'cpg'} = $obj->_CpG_value;
+    }
+   else {
+       return $obj->{'cpg'};
+   }
+}
+
+
+
+sub _CpG_value {
+    my ($self) = @_;
+    if ($self->allele_ori eq $self->allele_mut and length ($self->allele_ori) == 1 ) {
+    
+	# valid only for point mutations
+	# CpG methylation-mediated deamination:
+	#   CG -> TG | CG -> CA substitutions
+	# implementation here is  less strict: if CpG dinucleotide was hit
+	
+	if ( ( ($self->allele_ori eq 'c') && (substr($self->upStreamSeq, 0, 1) eq 'g') ) ||
+	     ( ($self->allele_ori eq 'g') && (substr($self->dnStreamSeq, -1, 1) eq 'c') ) ) {
+	    return 1;
+	}
+	else {
+	    return 0;
+	}
+    } else {
+	$self->warn('CpG makes sense only in the context of point mutation');
+	return;
+    }
+}
+
+
+=head2 RNAChange
+
+ Title   : RNAChange
+ Usage   : $mutobj = $obj->RNAChange;
+         : $mutobj = $obj->RNAChange($objref);
+ Function: Returns or sets the link-reference to a mutation/change object.
+           If there is no link, it will return undef
+ Returns : an obj_ref or undef
+
+=cut
+
+
+sub RNAChange {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::RNAChange') ) {
+	  $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
+	  return;
+      }
+      else {
+	  $self->{'RNAChange'} = $value;
+      }
+  }
+  unless (exists $self->{'RNAChange'}) {
+      return;
+  } else {
+      return $self->{'RNAChange'};
+  }
+}
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function: 
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable subclass of L<Bio::Variation::VariantI> needs
+            to implement this method. Valid values are listed in
+            'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub label {
+    my ($self, $value) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+    
+    if (not $o and not $m ) {
+	$self->warn("[DNAMutation, label] Both alleles should not be empty!\n");
+	$type = 'no change'; # is this enough?
+    }
+    elsif ($o && $m && length($o) == length($m) && length($o) == 1) {
+	$type = 'point';
+	$type .= ", ". _point_type_label($o, $m);
+    }
+    elsif (not $o ) {
+	$type = 'insertion';
+    }
+    elsif (not $m  ) {
+	$type = 'deletion';
+    }
+    else {
+	$type = 'complex';
+    }
+    $self->{'label'} = $type;
+    return $self->{'label'};
+}
+
+
+sub _point_type_label {
+    my ($o, $m) = @_;
+    my ($type);
+    my %transition = ('a' => 'g',
+		   'g' => 'a',
+		   'c' => 't',
+		   't' => 'c');
+    $o = lc $o;
+    $m = lc $m;
+    if ($o eq $m) {
+	$type = 'no change';
+    }
+    elsif ($transition{$o} eq $m ) {
+	$type = 'transition';
+    }
+    else {
+	$type = 'transversion';
+    }
+}
+
+
+=head2 sysname
+
+ Title   : sysname
+ Usage   : $self->sysname
+ Function: 
+
+           This subroutine creates a string corresponding to the
+           'systematic name' of the mutation. Systematic name is
+           specified in Antonorakis & MDI Nomenclature Working Group:
+           Human Mutation 11:1-3, 1998. 
+           http://www3.interscience.wiley.com/cgi-bin/abstract/5001291/ABSTRACT
+ Returns : string
+
+=cut
+
+
+sub sysname {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'sysname'} = $value;
+    } else {
+	$self->warn('Mutation start position is not defined') 
+	    if not defined $self->start;
+	my $sysname = '';
+	# show the alphabet only if $self->SeqDiff->alphabet is set;
+	my $mol = '';
+
+if ($self->SeqDiff ) {
+	if ($self->SeqDiff && $self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'dna') {
+	    $mol = 'g.';
+	}
+	elsif ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'rna') {
+	    $mol = 'c.';
+	}
+    }
+	my $sep;
+	if ($self->isMutation) {
+	    $sep = '>';
+	} else {
+	    $sep = '|';
+	}
+	my $sign = '+'; 
+	$sign = '' if $self->start < 1;
+	$sysname .=  $mol ;#if $mol;
+	$sysname .= $sign. $self->start;
+
+	my @alleles = $self->each_Allele;
+	$self->allele_mut($alleles[0]);
+
+	$sysname .= 'del' if $self->label =~ /deletion/;
+	$sysname .= 'ins' if $self->label =~ /insertion/;
+	$sysname .=  uc $self->allele_ori->seq if $self->allele_ori->seq;
+
+
+
+	#push @alleles, $self->allele_mut if $self->allele_mut;
+	foreach my $allele (@alleles) {
+	    $self->allele_mut($allele);
+	    $sysname .= $sep if $self->label =~ /point/ or $self->label =~ /complex/;
+	    $sysname .=  uc $self->allele_mut->seq if $self->allele_mut->seq;
+	}
+	$self->{'sysname'} = $sysname;
+	#$self->{'sysname'} = $sign. $self->start. 
+	#    uc $self->allele_ori->seq. $sep. uc $self->allele_mut->seq;
+    }
+    return $self->{'sysname'};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/flat.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/flat.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/flat.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,715 @@
+# $Id: flat.pm,v 1.19.4.1 2006/10/02 23:10:38 sendu Exp $
+# BioPerl module for Bio::Variation::IO::flat
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::IO::flat - flat file sequence variation input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Variation::IO class.
+
+=head1 DESCRIPTION
+
+This object can transform Bio::Variation::SeqDiff objects to and from
+flat file databases. The format used is EMBL like extension of what is
+used by the "EBI Mutation Checker" at
+http://www.ebi.ac.uk/cgi-bin/mutations/check.cgi and will eventually
+replace it.
+
+More information of the attributes and values use can be found at
+http://www.ebi.ac.uk/mutations/recommendations/.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::IO::flat;
+
+use strict;
+
+use Text::Wrap;
+use Bio::Variation::SeqDiff;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::RNAChange;
+use Bio::Variation::AAChange;
+use Bio::Variation::Allele;
+
+
+use base qw(Bio::Variation::IO);
+
+sub new {
+    my($class, @args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 next
+
+
+ Title   : next
+ Usage   : $haplo = $stream->next()
+ Function: returns the next seqDiff in the stream
+ Returns : Bio::Variation::SeqDiff object
+ Args    : NONE
+
+=cut
+
+sub next {
+    my( $self ) = @_;
+    local $/ = '//';
+    return unless my $entry = $self->_readline;
+    
+    return if $entry =~ /^\s+$/;
+
+    $entry =~ /\s*ID\s+\S+/ || $self->throw("We do need an ID!");
+
+    my ($id, $offset, $alphabet) = $entry =~ /\s*ID +([^:]+)..(\d+)[^\)]*.\[?([cg])?/
+	or $self->throw("Can't parse ID line");
+#    $self->throw("$1|$2|$3");
+    my $h =Bio::Variation::SeqDiff->new(-id         => $id,
+					-offset     => $offset,
+					  );
+    if ($alphabet) { 
+	if ($alphabet eq 'g') {
+	    $alphabet = 'dna';
+	} 
+	elsif ($alphabet eq 'c') {
+	    $alphabet = 'rna';
+	}
+	$h->alphabet($alphabet);
+    }
+    #
+    # DNA 
+    #
+    my @dna = split ( / DNA;/, $entry );
+    shift @dna;
+    my $prevdnaobj;
+    foreach my $dna (@dna) {
+	$dna =~ s/Feature[ \t]+//g;
+	($dna) = split "RNA; ", $dna; 
+	#$self->warn("|$dna|") ;
+	#exit;
+	my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = 
+	    $dna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: ([ \n\w]+).+/change: ([^ /]+).+/dnflank: ([ \n\w]+)|s;
+	$change =~ s/[ \n]//g;	
+	my ($ori, $mut) =  split /[>\|]/, $change;
+	my ($variation_number, $change_number) = split /\./, $mut_number;
+	#$self->warn("|$mut_number|>|$variation_number|$change_number|");
+	my $dnamut;
+	if ($change_number and $change_number > 1 ) {
+	    my $a3 = Bio::Variation::Allele->new;
+	    $a3->seq($mut) if $mut;
+	    #$dnamut->add_Allele($a3);
+	    $prevdnaobj->add_Allele($a3);
+	} else {
+	    $upflank =~ s/[ \n]//g;
+	    $dnflank =~ s/[ \n]//g;
+	    my ($region, $junk, $region_value, $junk2, $region_dist) =  
+		$dna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s;
+	    #my $s = join ("|", $mut_number, $proof, $location, $upflank, 
+	    #	     $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5);
+	    #$self->warn($s);
+	    #exit;
+	    my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/;
+	    $end = $start if not defined $end ;
+	    my ($len) = $end - $start +1; 
+	    $len = 0, $start = $end if defined $sep and $sep eq '^';
+	    my $ismut = 0;
+	    $ismut = 1 if $change =~ m/>/; 
+	    
+	    $dnamut = Bio::Variation::DNAMutation->new 
+		('-start'         => $start,
+		 '-end'           => $end,
+		 '-length'        => $len,
+		 '-upStreamSeq'   => $upflank,
+		 '-dnStreamSeq'   => $dnflank,
+		 '-proof'         => $proof,
+		 '-mut_number'    => $mut_number
+		 );
+	    $prevdnaobj = $dnamut;
+	    my $a1 = Bio::Variation::Allele->new;
+	    $a1->seq($ori) if $ori;
+	    $dnamut->allele_ori($a1);
+	    my $a2 = Bio::Variation::Allele->new;
+	    $a2->seq($mut) if $mut;
+	    $dnamut->add_Allele($a2);
+	    if ($ismut) {
+		$dnamut->isMutation(1);
+		$dnamut->allele_mut($a2);
+	    }
+	    $dnamut->region($region) if defined $region;
+	    $dnamut->region_value($region_value) if defined $region_value;
+	    $dnamut->region_dist($region_dist) if defined $region_dist;
+
+	    $h->add_Variant($dnamut);
+	    $dnamut->SeqDiff($h);
+	}
+    }
+
+    #
+    # RNA 
+    #
+    my @rna = split ( / RNA;/, $entry );
+    shift @rna;
+    my $prevrnaobj;
+    foreach my $rna (@rna) {
+	$rna = substr ($rna, 0, index($rna, 'Feature      AA'));
+	$rna =~ s/Feature[ \t]+//g;
+	($rna) = split "DNA; ", $rna; 
+	#$self->warn("|$rna|") ;
+	my ($mut_number, $proof, $location, $upflank, $change, $dnflank) = 
+	    $rna =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: (\w+).+/change: ([^/]+).+/dnflank: (\w+)|s ;#'
+	my ($region, $junk, $region_value, $junk2, $region_dist) =  
+	    $rna =~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s;
+	#my $s = join ("|", $mut_number, $proof, $location, $upflank, 
+	#	      $change, $dnflank, $region, $region_value, $region_dist, $1,$2,$3,$4,$5);
+	#$self->warn($s);
+	#exit;
+	$change =~ s/[ \n]//g;	
+	my ($ori, $mut) =  split /[>\|]/, $change;
+	my $rnamut;
+	my ($variation_number, $change_number) = split /\./, $mut_number;
+	if ($change_number and $change_number > 1 ) {
+	    my $a3 = Bio::Variation::Allele->new;
+	    $a3->seq($mut) if $mut;
+	    #$rnamut->add_Allele($a3);
+	    $prevrnaobj->add_Allele($a3);
+	} else {
+	    my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/;
+	    $end = $start if not defined $end ;
+	    my ($len) = $end - $start + 1; 
+	    $len = 0, $start = $end if defined $sep and $sep eq '^'; 
+	    my $ismut;
+	    $ismut = 1 if $change =~ m/>/; 
+	    my ($codon_table) = $rna =~ m|.+/codon_table: (\d+)|s;
+	    my ($codon_pos) = $rna =~ m|.+/codon:[^;]+; ([123])|s;
+
+	    $rnamut = Bio::Variation::RNAChange->new 
+		('-start'         => $start,
+		 '-end'           => $end,
+		 '-length'        => $len,
+		 '-upStreamSeq'   => $upflank,
+		 '-dnStreamSeq'   => $dnflank,
+		 '-proof'         => $proof,
+		 '-mut_number'    => $mut_number
+		 
+		 );
+	    $prevrnaobj = $rnamut;
+	    my $a1 = Bio::Variation::Allele->new;
+	    $a1->seq($ori) if $ori;
+	    $rnamut->allele_ori($a1);
+	    my $a2 = Bio::Variation::Allele->new;
+	    $a2->seq($mut) if $mut;
+	    $rnamut->add_Allele($a2);
+	    if ($ismut) {
+		$rnamut->isMutation(1);
+		$rnamut->allele_mut($a2);
+	    }
+	    $rnamut->region($region) if defined $region;
+	    $rnamut->region_value($region_value) if defined $region_value;
+	    $rnamut->region_dist($region_dist) if defined $region_dist;
+
+	    $rnamut->codon_table($codon_table) if $codon_table;
+	    $rnamut->codon_pos($codon_pos) if $codon_pos;
+	    $h->add_Variant($rnamut);
+	    foreach my $mut ($h->each_Variant) {
+		if ($mut->isa('Bio::Variation::DNAMutation') ) {
+		    if ($mut->mut_number == $rnamut->mut_number) {
+			$rnamut->DNAMutation($mut);
+			$mut->RNAChange($rnamut);
+		    }
+		}
+	    }
+	}
+    }    
+    #
+    # AA 
+    #
+    my @aa = split ( / AA;/, $entry );
+    shift @aa;
+    my $prevaaobj;
+    foreach my $aa (@aa) {
+	$aa = substr ($aa, 0, index($aa, 'Feature      AA'));
+	$aa =~ s/Feature[ \t]+//g;
+	($aa) = split "DNA; ", $aa; 
+	#$self->warn("|$aa|") ;
+	my ($mut_number, $proof, $location, $change) = 
+	    $aa =~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+)./change: ([^/;]+)|s;
+	$change =~ s/[ \n]//g;	
+	#my $s = join ("|", $mut_number, $proof, $location, $change);
+	#$self->warn($s);
+	#exit;
+	$change =~ s/[ \n]//g;
+	$change =~ s/DNA$//;
+	my ($ori, $mut) =  split /[>\|]/, $change;
+	#print "------$location----$ori-$mut-------------\n";
+	my ($variation_number, $change_number) = split /\./, $mut_number;
+	my $aamut;
+	if ($change_number and $change_number > 1 ) {
+	    my $a3 = Bio::Variation::Allele->new;
+	    $a3->seq($mut) if $mut;
+	    $prevaaobj->add_Allele($a3);
+	} else {
+	    my ($start, $sep, $end) = $location =~ /(-?\d+)(.)?\D?(-?\d+)?/;
+	    $end = $start if not defined $end ;
+	    my ($len) = $end - $start + 1; 
+	    $len = 0, $start = $end if defined $sep and $sep eq '^'; 
+	    my $ismut;
+	    $ismut = 1 if $change =~ m/>/; 
+	    my ($region) =  $aa =~ m|.+/region: (\w+)|s ;	
+	    $aamut = Bio::Variation::AAChange->new 
+		('-start'         => $start,
+		 '-end'           => $end,
+		 '-length'        => $len,
+		 '-proof'         => $proof,
+		 '-mut_number'    => $mut_number	     
+		 );
+	    $prevaaobj = $aamut;
+	    my $a1 = Bio::Variation::Allele->new;
+	    $a1->seq($ori) if $ori;
+	    $aamut->allele_ori($a1);
+	    my $a2 = Bio::Variation::Allele->new;
+	    $a2->seq($mut) if $mut;
+	    $aamut->add_Allele($a2);
+	    if ($ismut) {
+		$aamut->isMutation(1);
+		$aamut->allele_mut($a2);
+	    }
+	    $region && $aamut->region($region);
+	    $h->add_Variant($aamut); 
+	    foreach my $mut ($h->each_Variant) {
+		if ($mut->isa('Bio::Variation::RNAChange') ) {
+		    if ($mut->mut_number == $aamut->mut_number) {
+			$aamut->RNAChange($mut);
+			$mut->AAChange($aamut);
+		    }
+		}
+	    }
+
+	}
+    }
+    return $h;
+}
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write(@seqDiffs)
+ Function: writes the $seqDiff object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Variation::SeqDiff object
+
+
+=cut
+
+sub write {
+    my ($self, at h) = @_;
+
+    #$columns = 75; #default for Text::Wrap
+    my %tag = 
+	(
+	 'ID'               => 'ID           ',
+	 'Description'      => 'Description  ',
+	 'FeatureKey'       => 'Feature      ',
+	 'FeatureQual'      => "Feature        ",
+	 'FeatureWrap'      => "Feature         ",
+	 'ErrorComment'     => 'Comment      '
+	 #'Comment'          => 'Comment      -!-',
+	 #'CommentLine'      => 'Comment         ',
+	 );
+    
+    if( !defined $h[0] ) {
+        $self->throw("Attempting to write with no information!");
+    }
+
+    foreach my $h (@h) {
+	
+	my @entry =();
+    
+	my ($text, $tmp, $tmp2, $sep);
+	my ($count) = 0;
+
+	
+	$text = $tag{ID};
+	
+	$text .= $h->id;
+	$text .= ":(". $h->offset;
+	$text .= "+1" if $h->sysname =~ /-/;
+	$text .= ")".  $h->sysname;
+	$text .= "; ".  $h->trivname if $h->trivname;
+	push (@entry, $text);
+
+	#Variants need to be ordered accoding to mutation_number attribute
+	#put them into a hash of arrays holding the Variant objects 
+	#This is necessary for cases like several distict mutations present 
+	# in the same sequence.
+	my @allvariants = $h->each_Variant;
+	my %variants = ();
+	foreach my $mut ($h->each_Variant) {
+	    push @{$variants{$mut->mut_number} }, $mut; 
+	}
+	#my ($variation_number, $change_number) = split /\./, $mut_number;
+	foreach my $var (sort keys %variants) {
+	    #print $var, ": ", join (" ", @{$variants{$var}}), "\n";	
+	    
+	    foreach my $mut (@{$variants{$var}}) {
+		#
+		# DNA
+		#
+		if ( $mut->isa('Bio::Variation::DNAMutation') ) {
+		    #collect all non-reference alleles
+		     $self->throw("allele_ori needs to be defined in [$mut]") 
+			 if not $mut->allele_ori;
+		     if ($mut->isMutation) {
+			 $sep = '>';
+		     } else {
+			 $sep = '|';
+		     }
+		     my @alleles = $mut->each_Allele;
+		     #push @alleles, $mut->allele_mut if $mut->allele_mut;
+		     my $count = 0; # two alleles
+		     foreach my $allele (@alleles) {
+			 $count++;
+			 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			 if ($change_number and $change_number != $count){
+			     $mut->mut_number("$change_number.$count");
+			 }
+			 $mut->allele_mut($allele);
+			 push (@entry,  
+			       $tag{FeatureKey}. 'DNA'. "; ". $mut->mut_number
+			       );
+			 #label
+			 $text=$tag{FeatureQual}. '/label: '. $mut->label;
+			 push (@entry, $text);
+			 
+			 #proof
+			 if ($mut->proof) {
+			     $text = $tag{FeatureQual}. '/proof: '. $mut->proof;
+			     push (@entry, $text) ;
+			 }
+			 #location
+			 $text = $tag{FeatureQual}. '/location: '; 
+			     #$mut->id. '; '. $mut->start; 
+			 if ($mut->length > 1 ) {#	    if ($mut->end - $mut->start ) {
+			     my $l = $mut->start + $mut->length -1;
+			     $text .= $mut->start. '..'.  $l;
+			 }
+			 elsif ($mut->length == 0) {
+			     my $tmp_start = $mut->start - 1;
+			     $tmp_start-- if $tmp_start == 0;
+			     $text .= $tmp_start. '^'. $mut->end;
+			 } else {
+			     $text .= $mut->start;
+			 }
+
+			 if ($h->alphabet && $h->alphabet eq 'dna') {
+			     $tmp = $mut->start + $h->offset;
+			     $tmp-- if $tmp <= 0;
+			     $mut->start < 1 && $tmp++; 
+			     #$text.= ' ('. $h->id. '::'. $tmp;
+			     $tmp2 = $mut->end + $h->offset;
+			     if ( $mut->length > 1 ) {
+				 $mut->end < 1 && $tmp2++; 
+				 $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2;
+			     }
+			     elsif ($mut->length == 0) {
+				 $tmp--;
+				 $tmp-- if $tmp == 0;
+				 $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2;
+			     } else {
+				 $text.= ' ('. $h->id. '::'. $tmp;
+			     }
+			     $text .= ')';
+			 }
+			 push (@entry, $text);
+			 #sequence
+			 push (@entry,  
+			       $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq
+			       );
+			 $text = '';
+			 $text = $mut->allele_ori->seq if $mut->allele_ori->seq;
+			 $text .= $sep;
+			 $text .= $mut->allele_mut->seq if $mut->allele_mut->seq;
+			 push (@entry,  
+			       wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, 
+				    $text)
+			       );
+			 
+			 push (@entry,  
+			       $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq
+			       );
+			 #restriction enzyme
+			 if ($mut->restriction_changes ne '') {
+			     $text = $mut->restriction_changes;
+			     $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); 
+			     push (@entry,
+				   $text
+				   );
+			 }
+			 #region
+			 if ($mut->region ) {
+			     $text = $tag{FeatureQual}. '/region: '. $mut->region;
+			     $text .= ';' if $mut->region_value or $mut->region_dist; 
+			     $text .= ' '. $mut->region_value if $mut->region_value;
+			     if ($mut->region_dist ) {
+				 $tmp = '';
+				 $tmp = '+' if $mut->region_dist > 1;
+				 $text .= " (". $tmp. $mut->region_dist. ')';
+			     }
+			     push (@entry, $text);
+			 }
+			 #CpG
+			 if ($mut->CpG) {
+			     push (@entry,  
+				   $tag{FeatureQual}. "/CpG"
+				   );
+			 }
+		     }
+		 }
+		 #
+		 # RNA
+		 #	    
+		 elsif ($mut->isa('Bio::Variation::RNAChange') ) {
+		     #collect all non-reference alleles
+		     $self->throw("allele_ori needs to be defined in [$mut]") 
+			 if not $mut->allele_ori;
+		     my @alleles = $mut->each_Allele;
+		     #push @alleles, $mut->allele_mut if $mut->allele_mut;
+		     if ($mut->isMutation) {
+			 $sep = '>';
+		     } else {
+			 $sep = '|';
+		     }
+
+		     my $count = 0; # two alleles
+		     foreach my $allele (@alleles) {
+			 $count++;
+			 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			 if ($change_number and $change_number != $count){
+			     $mut->mut_number("$change_number.$count");
+			 }
+			 $mut->allele_mut($allele);
+			 push (@entry,  
+			       $tag{FeatureKey}. 'RNA'. "; ". $mut->mut_number
+			       );
+			 #label
+			 $text=$tag{FeatureQual}. '/label: '. $mut->label;
+			 push (@entry, $text);
+			 #proof
+			 if ($mut->proof) {
+			     $text = $tag{FeatureQual}. '/proof: '. $mut->proof;
+			     push (@entry, $text) ;
+			 }
+			 #location
+			 $text = $tag{FeatureQual}. '/location: ' ; 
+			 if ($mut->length > 1 ) {
+			     $text .= $mut->start. '..'. $mut->end;
+			     $tmp2 = $mut->end + $h->offset;
+			 }
+			 elsif ($mut->length == 0) {
+			     my $tmp_start = $mut->start;
+			     $tmp_start--;
+			     $tmp_start-- if $tmp_start == 0;
+			     $text .= $tmp_start. '^'. $mut->end;
+			 } else {
+			     $text .= $mut->start;
+			 }
+
+			 if ($h->alphabet && $h->alphabet eq 'rna') {
+			     $tmp = $mut->start + $h->offset;
+			     $tmp-- if $tmp <= 0;
+			     #$mut->start < 1 && $tmp++;			     
+			     #$text.= ' ('. $h->id. '::'. $tmp;
+			     $tmp2 = $mut->end + $h->offset;
+			     #$mut->end < 1 && $tmp2++; 
+			     if ( $mut->length > 1 ) {
+				 $text.= ' ('. $h->id. '::'. $tmp. "..". $tmp2;
+			     }
+			     elsif ($mut->length == 0) {
+				 $tmp--;
+				 $text .= ' ('. $h->id. '::'. $tmp. '^'. $tmp2;
+			     } else {
+				 $text.= ' ('. $h->id. '::'. $tmp;
+			     }
+
+			     $text .= ')';
+			 }
+			 push (@entry, $text);
+
+			 #sequence
+			 push (@entry,  
+			       $tag{FeatureQual}. '/upflank: '. $mut->upStreamSeq
+			       );
+			 $text = '';
+			 $text = $mut->allele_ori->seq if $mut->allele_ori->seq;
+			 $text .= $sep;
+			 $text .= $mut->allele_mut->seq if $mut->allele_mut->seq;
+			 push (@entry,  
+			       wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, 
+				    $text)
+			       );
+			 push (@entry,  
+			       $tag{FeatureQual}. '/dnflank: '. $mut->dnStreamSeq
+			       );
+			 #restriction
+			 if ($mut->restriction_changes ne '') {
+			     $text = $mut->restriction_changes;
+			     $text = wrap($tag{FeatureQual}. '/re_site: ', $tag{FeatureWrap}, $text); 
+			     push (@entry,
+				   $text
+				   );
+			 }
+			 #coding
+			 if ($mut->region eq 'coding') {
+			     #codon table
+			     $text =  $tag{FeatureQual}. '/codon_table: ';
+			     $text .= $mut->codon_table;
+			     push (@entry, $text);
+			     #codon
+
+			     $text = $tag{FeatureQual}. '/codon: '. $mut->codon_ori. $sep;
+			     if ($mut->DNAMutation->label =~ /.*point/) {
+				 $text .= $mut->codon_mut;		     
+			     }
+			     else {
+				 $text .= '-';
+			     }
+			     $text .= "; ". $mut->codon_pos;
+			     push (@entry, $text);
+			 }
+			 #region
+			 if ($mut->region ) {
+			     $text = $tag{FeatureQual}. '/region: '. $mut->region;
+			     $text .= ';' if $mut->region_value or $mut->region_dist; 
+			     $text .= ' '. $mut->region_value if $mut->region_value;
+			     if ($mut->region_dist ) {
+				 $tmp = '';
+				 $tmp = '+' if $mut->region_dist > 1;
+				 $text .= " (". $tmp. $mut->region_dist. ')';
+			     }
+			     push (@entry, $text);
+			 }
+		     }
+		 }
+		 #
+		 # AA
+		 #	    
+		 elsif ($mut->isa('Bio::Variation::AAChange')) {
+		     #collect all non-reference alleles
+		     $self->throw("allele_ori needs to be defined in [$mut]") 
+			 if not $mut->allele_ori;
+		     if ($mut->isMutation) {
+			 $sep = '>';
+		     } else {
+			 $sep = '|';
+		     }
+		     my @alleles = $mut->each_Allele;
+		     #push @alleles, $mut->allele_mut if $mut->allele_mut;
+		     my $count = 0; # two alleles		     
+		     foreach my $allele (@alleles) {
+			 $count++;
+			 my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			 if ($change_number and $change_number != $count){
+			     $mut->mut_number("$change_number.$count");
+			 }
+			 $mut->allele_mut($allele);
+			 push (@entry,  
+			       $tag{FeatureKey}. 'AA'. "; ". $mut->mut_number
+			       );
+			 #label
+			 $text=$tag{FeatureQual}. '/label: '. $mut->label;
+			 push (@entry, $text) ;
+			 #proof
+			 if ($mut->proof) {
+			     $text = $tag{FeatureQual}. '/proof: '. $mut->proof;
+			     push (@entry, $text) ;
+			 }
+			 #location
+			 $text = $tag{FeatureQual}. '/location: '. 
+			     #$mut->id. '; '. $mut->start; 
+			     $mut->start; 
+			 if ($mut->length > 1 ) {
+			     $tmp = $mut->start + $mut->length -1;
+			     $text .= '..'. $tmp;
+			 }
+			 push (@entry, $text);
+			 #sequence
+			 $text = '';
+			 $text = $mut->allele_ori->seq if $mut->allele_ori->seq;
+			 $text .= $sep;
+			 $text .= $mut->allele_mut->seq if $mut->allele_mut->seq;
+			 push (@entry,  
+			       wrap($tag{FeatureQual}. '/change: ', $tag{FeatureWrap}, 
+				    $text)
+			       );
+			 #region
+			 if ($mut->region ) {
+			     $text = $tag{FeatureQual}. '/region: '. $mut->region;
+			     $text .= ';' if $mut->region_value or $mut->region_dist; 
+			     $text .= ' '. $mut->region_value if $mut->region_value;
+			     if ($mut->region_dist ) {
+				 $tmp = '';
+				 $tmp = '+' if $mut->region_dist > 1;
+				 $text .= " (". $tmp. $mut->region_dist. ')';
+			     }
+			     push (@entry, $text);
+			 }
+		     }
+		  }
+	     }
+	}
+	push (@entry, 
+	      "//"
+	      );  
+	my $str = join ("\n", @entry). "\n";
+	$str =~ s/\t/        /g;
+	$self->_print($str);
+    }
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/xml.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/xml.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO/xml.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,555 @@
+# $Id: xml.pm,v 1.19.4.1 2006/10/02 23:10:38 sendu Exp $
+# BioPerl module for Bio::Variation::IO::xml
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::IO::xml - XML sequence variation input/output stream
+
+=head1 SYNOPSIS
+
+Do not use this module directly.  Use it via the Bio::Variation::IO class.
+
+=head1 DESCRIPTION
+
+This object can transform L<Bio::Variation::SeqDiff> objects to and from XML
+file databases.
+
+The XML format, although consistent, is still evolving. The current
+DTD for it is at L<http://www.ebi.ac.uk/mutations/DTDE/seqDiff.dtd>.
+
+=head1 REQUIREMENTS
+
+To use this code you need the module L<XML::Twig> which creates an
+interface to L<XML::Parser> to read XML and modules L<XML::Writer> and
+L<IO::String> to write XML out.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::IO::xml;
+
+use vars qw($seqdiff $var $prevdnaobj $prevrnaobj $prevaaobj);
+use strict;
+
+use XML::Twig;
+use XML::Writer 0.4;
+use IO::String;
+use Bio::Variation::SeqDiff;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::RNAChange;
+use Bio::Variation::AAChange;
+use Bio::Variation::Allele;
+
+use base qw(Bio::Variation::IO);
+
+# _initialize is where the heavy stuff will happen when new is called
+
+sub new {
+    my ($class, at args) = @_;
+    my $self = bless {}, $class;
+    $self->_initialize(@args);
+    return $self;
+}
+
+sub _initialize {
+  my($self, at args) = @_;
+  return unless $self->SUPER::_initialize(@args);
+}
+
+=head2 next
+
+ Title   : next
+ Usage   : $haplo = $stream->next()
+ Function: returns the next seqDiff in the stream
+ Returns : Bio::Variation::SeqDiff object
+ Args    : NONE
+
+=cut
+
+
+sub _seqDiff {
+    my ($t, $term)= @_;
+    $seqdiff->id( $term->att('id') );
+    $seqdiff->alphabet( $term->att('moltype') );
+    $seqdiff->offset( $term->att('offset') );
+
+    foreach my $child ($term->children) {
+	 _variant($t, $child);
+    }
+}
+
+sub _variant {
+    my ($t, $term)= @_;
+    my $var;
+    my $att = $term->atts();
+    my ($variation_number, $change_number) = split /\./,  $att->{number}; 
+    
+    # if more than two alleles
+    if ($variation_number and $change_number and $change_number > 1 ) {
+	my $a3 = Bio::Variation::Allele->new;
+	$a3->seq( $term->first_child_text('allele_mut') ) 
+	    if $term->first_child_text('allele_mut');
+	if ($term->gi eq 'DNA') {
+	    $prevdnaobj->add_Allele($a3);
+	}
+	elsif ($term->gi eq 'RNA') {
+	    $prevrnaobj->add_Allele($a3);
+	} else { # AA
+	    $prevaaobj->add_Allele($a3);
+	}
+    } else { # create new variants
+	if ($term->gi eq 'DNA') {
+	    $var = new Bio::Variation::DNAMutation;
+	}
+	elsif ($term->gi eq 'RNA') {
+	    $var = new Bio::Variation::RNAChange;
+	} else { # AA
+	    $var = new Bio::Variation::AAChange;
+	}
+
+	# these are always present
+	$var->start( $att->{start} );
+	$var->end( $att->{end});
+	$var->length($att->{len});
+	$var->mut_number( $att->{number});
+	$var->upStreamSeq($term->first_child_text('upFlank'));
+	$var->dnStreamSeq($term->first_child_text('dnFlank'));
+	$var->proof($term->first_child_text('proof'));
+
+	# region
+	my $region = $term->first_child('region');
+	if ($region) {
+	    $var->region($region->text);
+	    my $region_atts = $region->atts;
+	    $var->region_value( $region_atts->{value} ) 
+		if $region_atts->{value};
+	    $var->region_dist( $region_atts->{dist} ) 
+		if $region_atts->{dist};
+	}
+	
+	# alleles
+	my $a1 = Bio::Variation::Allele->new;
+	$a1->seq($term->first_child_text('allele_ori') ) 
+	    if $term->first_child_text('allele_ori');
+	$var->allele_ori($a1);
+	my $a2 = Bio::Variation::Allele->new;
+	$a2->seq($term->first_child_text('allele_mut') ) 
+	    if $term->first_child_text('allele_mut');
+	$var->isMutation(1) if $term->att('isMutation');
+	$var->allele_mut($a2);	
+	$var->add_Allele($a2);
+	$var->length( $term->att('length') );
+	$seqdiff->add_Variant($var);
+
+	# variant specific code
+	if ($term->gi eq 'DNA') {
+	    $prevdnaobj = $var;
+	}
+	elsif ($term->gi eq 'RNA') {
+	    my $codon = $term->first_child('codon');
+	    if ($codon) {
+		my $codon_atts = $codon->atts;
+		$var->codon_table( $codon->att('codon_table') ) 
+		    if $codon_atts->{codon_table} and $codon_atts->{codon_table} != 1;
+		$var->codon_pos( $codon->att('codon_pos') ) 
+		    if $codon_atts->{codon_pos};
+	    }
+	    $prevdnaobj->RNAChange($var);
+	    $var->DNAMutation($prevdnaobj);
+	    $prevrnaobj = $var;
+	} else {
+	    $prevrnaobj->AAChange($var);
+	    $var->RNAChange($prevrnaobj);
+	    $prevaaobj = $var;
+	}
+    }
+}
+
+sub next {
+    my( $self ) = @_;
+
+    local $/ = "</seqDiff>\n";
+    return unless my $entry = $self->_readline;
+#    print  STDERR "|$entry|";
+    return unless $entry =~ /^\W*<seqDiff/;
+
+    $seqdiff = Bio::Variation::SeqDiff->new;
+
+    # create new parser object
+    my $twig_handlers = {'seqDiff' =>  \&_seqDiff };
+    my $t = new XML::Twig ( TwigHandlers => $twig_handlers,
+			    KeepEncoding => 1 );
+    $t->parse($entry);
+
+    return $seqdiff;
+}
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write(@haplos)
+ Function: writes the $seqDiff objects into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Variation::SeqDiff object
+
+=cut
+
+sub write {
+    my ($self, at h) = @_;
+
+    if( !defined $h[0] ) {
+        $self->throw("Attempting to write with no information!");
+    }
+    my $str;
+    my $output = IO::String->new($str);
+    my $w = new XML::Writer(OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 4 );
+
+    foreach my $h (@h) {
+	#
+	# seqDiff
+	#
+	$h->alphabet || $self->throw("Moltype of the reference sequence is not set!");
+	my $hasAA = 0;
+	foreach my $mut ($h->each_Variant) {	
+	    $hasAA = 1 if  $mut->isa('Bio::Variation::AAChange');
+	}
+	if ($hasAA) {
+	    $w->startTag("seqDiff",
+			 "id" => $h->id,
+			 "moltype" => $h->alphabet,
+			 "offset" => $h->offset,
+			 "sysname" => $h->sysname,
+			 "trivname" => $h->trivname
+			 );
+	} else {
+	    $w->startTag("seqDiff",
+			 "id" => $h->id,
+			 "moltype" => $h->alphabet,
+			 "offset" => $h->offset,
+			 "sysname" => $h->sysname
+			 );
+	}
+	my @allvariants = $h->each_Variant;
+	#print "allvars:", scalar @allvariants, "\n";
+	my %variants = ();
+	foreach my $mut ($h->each_Variant) {
+	    #print STDERR  $mut->mut_number, "\t", $mut, "\t",
+	    #$mut->proof, "\t", scalar $mut->each_Allele,  "\n";	
+	    push @{$variants{$mut->mut_number} }, $mut;
+	}
+	foreach my $var (sort keys %variants) {
+	    foreach my $mut (@{$variants{$var}}) {
+		#
+		# DNA
+		#
+		if( $mut->isa('Bio::Variation::DNAMutation') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("DNA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label);
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			if ($mut->upStreamSeq) {
+			    $w->startTag("upFlank");
+			    $w->characters($mut->upStreamSeq );
+			    $w->endTag;
+			}
+			#if ( $mut->isMutation) {
+			#if ($mut->allele_ori) {
+			$w->startTag("allele_ori");
+			$w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
+			$w->endTag;
+			#}	
+			#if ($mut->allele_mut) {
+			$w->startTag("allele_mut");
+			$w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
+			$w->endTag;
+			#}	
+			#}
+			if ($mut->dnStreamSeq) {
+			    $w->startTag("dnFlank");
+			    $w->characters($mut->dnStreamSeq );
+			    $w->endTag;
+			}
+			if ($mut->restriction_changes) {
+			    $w->startTag("restriction_changes");
+			    $w->characters($mut->restriction_changes);
+			    $w->endTag;
+			}	
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #DNA
+		    }
+		}
+		#
+		# RNA
+		#
+		elsif(  $mut->isa('Bio::Variation::RNAChange') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("RNA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label );
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			if ($mut->upStreamSeq) {
+			    $w->startTag("upFlank");
+			    $w->characters($mut->upStreamSeq );
+			    $w->endTag;
+			}	
+			#if ( $mut->isMutation) {
+			if ($mut->allele_ori) {
+			    $w->startTag("allele_ori");
+			    $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq ;
+			    $w->endTag;
+			}	
+			if ($mut->allele_mut) {
+			    $w->startTag("allele_mut");
+			    $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq ;
+			    $w->endTag;
+			}	
+			#}
+			if ($mut->dnStreamSeq) {
+			    $w->startTag("dnFlank");
+			    $w->characters($mut->dnStreamSeq );
+			    $w->endTag;
+			}
+			if ($mut->region eq 'coding') {
+			    if (! $mut->codon_mut) {
+				$w->startTag("codon",
+					     "codon_ori" => $mut->codon_ori,
+					     "codon_pos" => $mut->codon_pos
+					     );
+			    } else {
+				$w->startTag("codon",
+					     "codon_ori" => $mut->codon_ori,
+					     "codon_mut" => $mut->codon_mut,
+					     "codon_pos" => $mut->codon_pos
+					     );
+			    }
+			    $w->endTag;
+			}
+			if ($mut->codon_table != 1) {
+			    $w->startTag("codon_table");
+			    $w->characters($mut->codon_table);
+			    $w->endTag;
+			}	
+			
+			if ($mut->restriction_changes) {
+			    $w->startTag("restriction_changes");
+			    $w->characters($mut->restriction_changes);
+			    $w->endTag;
+			}	
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #RNA
+		    }
+		}
+		#
+		# AA
+		#
+		elsif(  $mut->isa('Bio::Variation::AAChange') ) {
+		    $mut->isMutation(0) if not $mut->isMutation;		
+		    my @alleles = $mut->each_Allele;
+		    my $count = 0;
+		    foreach my $allele (@alleles) {
+			$count++;
+			my ($variation_number, $change_number) = split /\./, $mut->mut_number;
+			if ($change_number and $change_number != $count){
+			    $mut->mut_number("$change_number.$count");
+			}
+			$mut->allele_mut($allele);
+			$w->startTag("AA",
+				     "number" => $mut->mut_number,
+				     "start"  => $mut->start,
+				     "end"    => $mut->end,
+				     "length" => $mut->length,
+				     "isMutation" => $mut->isMutation
+				     );
+
+			if ($mut->label) {
+			    foreach my $label (split ', ', $mut->label) {
+				$w->startTag("label");
+				$w->characters($label );
+				$w->endTag;
+			    }
+			}	
+			if ($mut->proof) {
+			    $w->startTag("proof");
+			    $w->characters($mut->proof );
+			    $w->endTag;
+			}	
+			#if ( $mut->isMutation) {
+			if ($mut->allele_ori) {
+			    $w->startTag("allele_ori");
+			    $w->characters($mut->allele_ori->seq) if $mut->allele_ori->seq;
+			    $w->endTag;
+			}	
+			if ($mut->allele_mut) {
+			    $w->startTag("allele_mut");
+			    $w->characters($mut->allele_mut->seq) if $mut->allele_mut->seq;
+			    $w->endTag;
+			}	
+			#}
+			if ($mut->region) {
+			    if($mut->region_value and $mut->region_dist) {
+				$w->startTag("region",
+					     "value" => $mut->region_value,
+					     "dist" => $mut->region_dist
+					     );
+			    }
+			    elsif($mut->region_value) {
+				$w->startTag("region",
+					     "value" => $mut->region_value
+					     );
+			    }
+			    elsif($mut->region_dist) {
+				$w->startTag("region",
+					     "dist" => $mut->region_dist
+					     );
+			    } else {
+				$w->startTag("region");
+			    }
+			    $w->characters($mut->region );
+			    $w->endTag;
+			}
+			$w->endTag; #AA
+		    }
+		}
+	    }
+	}
+    }
+    $w->endTag;
+
+
+    $w->end;
+    $self->_print($str);
+    $output = undef;
+    return 1;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/IO.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,341 @@
+# $Id: IO.pm,v 1.22.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::IO
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::IO - Handler for sequence variation IO Formats
+
+=head1 SYNOPSIS
+
+    use Bio::Variation::IO;
+
+    $in  = Bio::Variation::IO->new(-file => "inputfilename" , 
+                                   -format => 'flat');
+    $out = Bio::Variation::IO->new(-file => ">outputfilename" ,
+                                   -format => 'xml');
+
+    while ( my $seq = $in->next() ) {
+	   $out->write($seq);
+    }
+
+  # or
+
+    use Bio::Variation::IO;
+
+    #input file format can be read from the file extension (dat|xml)
+    $in  = Bio::Variation::IO->newFh(-file => "inputfilename");
+    $out = Bio::Variation::IO->newFh(-format => 'xml');
+
+    # World's shortest flat<->xml format converter:
+    print $out $_ while <$in>;
+
+=head1 DESCRIPTION
+
+Bio::Variation::IO is a handler module for the formats in the 
+Variation IO set (eg, Bio::Variation::IO::flat). It is the officially 
+sanctioned way of getting at the format objects, which most people 
+should use.
+
+The structure, conventions and most of the code is inherited from
+L<Bio::SeqIO> module. The main difference is that instead of using
+methods next_seq and write_seq, you drop '_seq' from the method names.
+
+The idea is that you request a stream object for a particular format.
+All the stream objects have a notion of an internal file that is read
+from or written to. A particular SeqIO object instance is configured
+for either input or output. A specific example of a stream object is
+the Bio::Variation::IO::flat object.
+
+Each stream object has functions
+
+   $stream->next();
+
+and
+
+   $stream->write($seqDiff);
+
+also
+
+   $stream->type() # returns 'INPUT' or 'OUTPUT'
+
+As an added bonus, you can recover a filehandle that is tied to the
+SeqIO object, allowing you to use the standard E<lt>E<gt> and print 
+operations to read and write sequence objects:
+
+    use Bio::Variation::IO;
+
+    $stream = Bio::Variation::IO->newFh(-format => 'flat'); 
+    # read from standard input
+
+    while ( $seq = <$stream> ) {
+	   # do something with $seq
+    }
+
+and
+
+    print $stream $seq; # when stream is in output mode
+
+This makes the simplest ever reformatter
+
+    #!/usr/local/bin/perl
+
+    $format1 = shift;
+    $format2 = shift;
+
+    use Bio::Variation::IO;
+
+    $in  = Bio::Variation::IO->newFh(-format => $format1 );
+    $out = Bio::Variation::IO->newFh(-format => $format2 );
+
+    print $out $_ while <$in>;
+
+
+=head1 CONSTRUCTORS
+
+=head2 Bio::Variation::IO-E<gt>new()
+
+   $seqIO = Bio::Variation::IO->new(-file => 'filename',   -format=>$format);
+   $seqIO = Bio::Variation::IO->new(-fh   => \*FILEHANDLE, -format=>$format);
+   $seqIO = Bio::Variation::IO->new(-format => $format);
+
+The new() class method constructs a new Bio::Variation::IO object.  The
+returned object can be used to retrieve or print BioSeq objects. new()
+accepts the following parameters:
+
+=over 4
+
+=item -file
+
+A file path to be opened for reading or writing.  The usual Perl
+conventions apply:
+
+   'file'       # open file for reading
+   '>file'      # open file for writing
+   '>>file'     # open file for appending
+   '+<file'     # open file read/write
+   'command |'  # open a pipe from the command
+   '| command'  # open a pipe to the command
+
+=item -fh
+
+You may provide new() with a previously-opened filehandle.  For
+example, to read from STDIN:
+
+   $seqIO = Bio::Variation::IO->new(-fh => \*STDIN);
+
+Note that you must pass filehandles as references to globs.
+
+If neither a filehandle nor a filename is specified, then the module
+will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
+semantics.
+
+=item -format
+
+Specify the format of the file.  Supported formats include:
+
+   flat        pseudo EMBL format
+   xml         seqvar xml format
+
+If no format is specified and a filename is given, then the module
+will attempt to deduce it from the filename.  If this is unsuccessful,
+Fasta format is assumed.
+
+The format name is case insensitive.  'FLAT', 'Flat' and 'flat' are
+all supported.
+
+=back
+
+=head2 Bio::Variation::IO-E<gt>newFh()
+
+   $fh = Bio::Variation::IO->newFh(-fh   => \*FILEHANDLE, -format=>$format);
+   $fh = Bio::Variation::IO->newFh(-format => $format);
+   # etc.
+
+   #e.g.
+   $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat');
+   print $out $seqDiff;
+
+This constructor behaves like new(), but returns a tied filehandle
+rather than a Bio::Variation::IO object.  You can read sequences from this
+object using the familiar E<lt>E<gt> operator, and write to it using print().
+The usual array and $_ semantics work.  For example, you can read all
+sequence objects into an array like this:
+
+  @mutations = <$fh>;
+
+Other operations, such as read(), sysread(), write(), close(), and printf() 
+are not supported.
+
+=head1 OBJECT METHODS
+
+See below for more detailed summaries.  The main methods are:
+
+=head2 $sequence = $seqIO-E<gt>next()
+
+Fetch the next sequence from the stream.
+
+=head2 $seqIO-E<gt>write($sequence [,$another_sequence,...])
+
+Write the specified sequence(s) to the stream.
+
+=head2 TIEHANDLE(), READLINE(), PRINT()
+
+These provide the tie interface.  See L<perltie> for more details.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::IO;
+
+use strict;
+
+
+use base qw(Bio::SeqIO);
+
+=head2 new
+
+ Title   : new
+ Usage   : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format')
+ Function: Returns a new seqstream
+ Returns : A Bio::Variation::IO::Handler initialised with the appropriate format
+ Args    : -file => $filename
+           -format => format
+           -fh => filehandle to attach to
+
+=cut
+
+
+sub new {
+   my ($class, %param) = @_;
+   my ($format);
+
+   @param{ map { lc $_ } keys %param } = values %param;  # lowercase keys
+   $format = $param{'-format'}
+             || $class->_guess_format( $param{-file} || $ARGV[0] )
+             || 'flat';
+   $format = "\L$format"; # normalize capitalization to lower case
+
+   return unless $class->_load_format_module($format);
+   return "Bio::Variation::IO::$format"->new(%param);
+}
+
+
+sub _load_format_module {
+  my ($class, $format) = @_;
+  my $module = "Bio::Variation::IO::" . $format;
+  my $ok;  
+  eval {
+      $ok = $class->_load_module($module);
+  };
+  if ( $@ ) {
+    print STDERR <<END;
+$class: $format cannot be found
+Exception $@
+For more information about the IO system please see the IO docs.
+This includes ways of checking for formats at compile time, not run time
+END
+  ;
+  }
+  return $ok;
+}
+
+=head2 next
+
+ Title   : next
+ Usage   : $seqDiff = $stream->next
+ Function: reads the next $seqDiff object from the stream
+ Returns : a Bio::Variation::SeqDiff object
+ Args    :
+
+=cut
+
+sub next {
+   my ($self, $seq) = @_;
+   $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object.");
+}
+
+sub next_seq {
+   my ($self, $seq) = @_;
+   $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'.");
+   $self->next($seq);
+}
+
+=head2 write
+
+ Title   : write
+ Usage   : $stream->write($seq)
+ Function: writes the $seq object into the stream
+ Returns : 1 for success and 0 for error
+ Args    : Bio::Variation::SeqDiff object
+
+=cut
+
+sub write {
+    my ($self, $seq) = @_;
+    $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object.");
+}
+
+sub write_seq {
+   my ($self, $seq) = @_;
+   $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'.");
+   $self->write($seq);
+}
+
+=head2 _guess_format
+
+ Title   : _guess_format
+ Usage   : $obj->_guess_format($filename)
+ Function:
+ Example :
+ Returns : guessed format of filename (lower case)
+ Args    :
+
+=cut
+
+sub _guess_format {
+   my $class = shift;
+   return unless $_ = shift;
+   return 'flat'     if /\.dat$/i;
+   return 'xml'     if /\.xml$/i;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,30 @@
+
+                       README for Bio::Variation classes
+
+
+These classes are part of "Computational Mutation Expression Toolkit"
+project at European Bioinformatics Institute
+<http://www.ebi.ac.uk/mutations/toolkit/>, but they are written to be
+as general as possinble.
+
+Bio::Variation name space contains modules to store sequence variation
+information as differences between the reference sequence and changes
+sequences. Also included are classes to write out and recrete objects
+from EMBL-like flat files and XML. Lastly, there are simple classes to
+calculate values for sequence change objects.
+
+See "Computational Mutation Expression Toolkit" web pages for more
+information:
+
+	http://www.ebi.ac.uk/mutations/toolkit/
+
+
+Send bug reports using the bioperl bug-tracking system at
+  http://bugzilla.open-bio.org/.
+ 
+Send general comments, questions, and feature requests to the bioperl
+mailing list:
+
+	bioperl-l at bioperl.org
+
+Heikki Lehväslaiho <heikki-at-bioperl-dot-org>

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/RNAChange.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/RNAChange.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/RNAChange.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,610 @@
+# $Id: RNAChange.pm,v 1.17.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::RNAChange
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::RNAChange - Sequence change class for RNA level
+
+=head1 SYNOPSIS
+
+   $rnachange = Bio::Variation::RNAChange->new
+       ('-start'         => $start,
+        '-end'           => $end,
+        '-length'        => $len,
+        '-codon_pos'     => $cp,
+        '-upStreamSeq'   => $upflank,
+        '-dnStreamSeq'   => $dnflank,
+        '-proof'         => $proof,
+   	'-isMutation'    => 1,
+        '-mut_number'    => $mut_number
+       );
+   $a1 = Bio::Variation::Allele->new;
+   $a1->seq('a');
+   $rnachange->allele_ori($a1);
+   my $a2 = Bio::Variation::Allele->new;
+   $a2->seq('t');
+   $rnachange->add_Allele($a2);
+   $rnachange->allele_mut($a2);
+
+   print "The codon change is ", $rnachange->codon_ori, 
+       ">", $rnachange->codon_mut, "\n"; 
+
+   # add it to a SeqDiff container object
+   $seqdiff->add_Variant($rnachange);
+
+   # and create links to and from DNA level mutation objects
+   $rnachange->DNAMutation($dnamut);
+   $dnamut->RNAChange($rnachange);
+
+=head1 DESCRIPTION
+
+The instantiable class Bio::Variation::DNAMutation describes basic
+sequence changes at RNA molecule level. It uses methods defined in
+superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
+for details.
+
+You are normally expected to create a corresponding
+Bio::Variation::DNAMutation object even if mutation is defined at
+RNA level. The numbering follows then cDNA numbering.  Link the
+DNAMutation object to the RNAChange object using the method
+DNAMutation(). If the variation described by a RNAChange object is
+translated, link the corresponding Bio::Variation::AAChange object
+to it using method AAChange(). See L<Bio::Variation::DNAMutation> and
+L<Bio::Variation::AAChange> for more information.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::RNAChange;
+use strict;
+
+# Object preamble - inheritance
+
+use Bio::Tools::CodonTable;
+
+use base qw(Bio::Variation::VariantI);
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my ($start, $end, $length, $strand, $primary, $source,
+        $frame, $score, $gff_string,
+        $allele_ori,  $allele_mut,  $upstreamseq,  $dnstreamseq,
+	$label,  $status,  $proof,  $region,  $region_value, $region_dist, $numbering,
+	$mut_number,  $isMutation,
+	$codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) =
+	    $self->_rearrange([qw(START
+				  END
+				  LENGTH
+				  STRAND
+				  PRIMARY
+				  SOURCE
+				  FRAME
+				  SCORE
+				  GFF_STRING
+				  ALLELE_ORI
+				  ALLELE_MUT
+				  UPSTREAMSEQ
+				  DNSTREAMSEQ
+				  LABEL
+				  STATUS
+				  PROOF
+				  REGION
+				  REGION_VALUE
+				  REGION_DIST
+				  NUMBERING
+				  MUT_NUMBER
+				  ISMUTATION
+				  CODON_ORI
+				  CODON_MUT
+				  CODON_POS
+				  TRANSLATION_TABLE
+				  CDS_END
+				  )], at args);
+    
+    $self->primary_tag("Variation");
+    
+    $self->{ 'alleles' } = [];
+    
+    $start && $self->start($start);
+    $end   && $self->end($end);
+    $length && $self->length($length);
+    $strand && $self->strand($strand);
+    $primary && $self->primary_tag($primary);
+    $source  && $self->source_tag($source);
+    $frame   && $self->frame($frame);
+    $score   && $self->score($score);
+    $gff_string && $self->_from_gff_string($gff_string);
+    
+    $allele_ori && $self->allele_ori($allele_ori);
+    $allele_mut  && $self->allele_mut($allele_mut);
+    $upstreamseq  && $self->upStreamSeq($upstreamseq);
+    $dnstreamseq  && $self->dnStreamSeq($dnstreamseq);
+    
+    $label  && $self->label($label);
+    $status  && $self->status($status);
+    $proof && $self->proof($proof);
+    $region  && $self->region($region);
+    $region_value  && $self->region_value($region_value);
+    $region_dist  && $self->region_dist($region_dist);
+    $numbering && $self->numbering($numbering);
+    $mut_number && $self->mut_number($mut_number);
+    $isMutation && $self->isMutation($isMutation);
+    
+    $codon_ori  && $self->codon_ori($codon_ori);
+    $codon_mut  && $self->codon_mut($codon_mut);
+    $codon_pos  && $self->codon_pos($codon_pos);
+    $codon_table && $self->codon_table($codon_table);
+    $cds_end  && $self->cds_end($cds_end);
+    return $self; # success - we hope!
+}
+
+
+=head2 codon_ori
+
+ Title   : codon_ori
+ Usage   : $obj->codon_ori();
+ Function: 
+
+            Sets and returns codon_ori triplet.  If value is not set,
+            creates the codon triplet from the codon position and
+            flanking sequences.  The string has to be three characters
+            long. The character content is not checked.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub codon_ori {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if (length $value != 3) {
+	    $self->warn("Codon string \"$value\" is not three characters long");
+	}
+	$self->{'codon_ori'} = $value;
+    }
+    elsif (! $self->{'codon_ori'}) {
+	my $codon_ori = '';
+
+	if ($self->region eq 'coding' && $self->start && $self->start  >= 1) {
+	    
+	    $self->warn('Codon position is not defined') 
+		if not defined $self->codon_pos;
+	    $self->warn('Upstream flanking sequence  is not defined') 
+		if not defined $self->upStreamSeq;
+	    $self->warn('Downstream flanking sequence  is not defined') 
+		if not defined $self->dnStreamSeq;
+
+	    my $cpos = $self->codon_pos; 
+	    $codon_ori = substr($self->upStreamSeq, -$cpos +1  , $cpos-1);
+	    $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos) 
+		if $self->allele_ori and $self->allele_ori->seq;
+	    $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori));
+	}
+	$self->{'codon_ori'} = lc $codon_ori;
+    }
+    return $self->{'codon_ori'};
+}
+
+
+=head2 codon_mut
+
+ Title   : codon_mut
+ Usage   : $obj->codon_mut();
+ Function: 
+
+            Sets and returns codon_mut triplet.  If value is not
+            set, creates the codon triplet from the codon position and
+            flanking sequences. Return undef for other than point mutations.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub codon_mut {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if (length $value != 3 ) {
+	    $self->warn("Codon string \"$value\" is not three characters long");
+	}
+	$self->{'codon_mut'} = $value;
+    }
+    else {
+	my $codon_mut = '';
+	if ($self->allele_ori->seq and $self->allele_mut->seq and
+	  CORE::length($self->allele_ori->seq) == 1 and 
+	  CORE::length($self->allele_mut->seq) == 1 and
+	    $self->region eq 'coding' and $self->start >= 1) {
+
+	    $self->warn('Codon position is not defined') 
+		if not defined $self->codon_pos;
+	    $self->warn('Upstream flanking sequnce  is not defined') 
+		if not defined $self->upStreamSeq;
+	    $self->warn('Downstream flanking sequnce  is not defined') 
+		if not defined $self->dnStreamSeq;
+	    $self->throw('Mutated allele is not defined') 
+		if not defined $self->allele_mut;
+	    
+	    my $cpos = $self->codon_pos;
+	    $codon_mut = substr($self->upStreamSeq, -$cpos +1  , $cpos-1);
+	    $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos) 
+		if $self->allele_mut and $self->allele_mut->seq; 
+	    $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut));
+	    
+	    $self->{'codon_mut'} = lc $codon_mut;
+	}
+    }
+    return $self->{'codon_mut'};
+}
+
+
+=head2 codon_pos
+
+ Title   : codon_pos
+ Usage   : $obj->codon_pos();
+ Function: 
+
+            Sets and returns the position of the mutation start in the
+            codon. If value is not set, returns false.
+
+ Example : 
+ Returns : 1,2,3
+ Args    : none if get, the new value if set
+
+=cut
+
+
+sub codon_pos {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if ( $value !~ /[123]/ ) {
+	    $self->throw("'$value' is not a valid codon position");
+	}
+	$self->{'codon_pos'} = $value;
+    }
+    return $self->{'codon_pos'};
+}
+
+
+=head2 codon_table
+
+ Title   : codon_table
+ Usage   : $obj->codon_table();
+ Function: 
+
+            Sets and returns the codon table id of the RNA
+            If value is not set, returns 1, 'universal' code, as the default.
+
+ Example : 
+ Returns : integer
+ Args    : none if get, the new value if set
+
+=cut
+
+
+sub codon_table {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	if (  not $value =~ /^\d$/ ) {
+	    $self->throw("'$value' is not a valid codon table ID\n".
+			"Has to be a positive integer. Defaulting to 1\n");
+	} else {
+	    $self->{'codon_table'} = $value;
+	}
+    }
+    if( ! exists $self->{'codon_table'} ) {
+	return 1;
+    } else {
+	return $self->{'codon_table'};
+    }
+}
+
+
+=head2 DNAMutation
+
+ Title   : DNAMutation
+ Usage   : $mutobj = $obj->DNAMutation;
+         : $mutobj = $obj->DNAMutation($objref);
+ Function: Returns or sets the link-reference to a mutation/change object.
+           If there is no link, it will return undef
+ Returns : an obj_ref or undef
+
+=cut
+
+
+sub DNAMutation {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if( ! $value->isa('Bio::Variation::DNAMutation') ) {
+	    $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]");
+	    return;
+	}
+	else {
+	    $self->{'DNAMutation'} = $value;
+	}
+    }
+    unless (exists $self->{'DNAMutation'}) {
+	return;
+    } else {
+	return $self->{'DNAMutation'};
+    }
+}
+
+
+=head2 AAChange
+
+ Title   : AAChange
+ Usage   : $mutobj = $obj->AAChange;
+         : $mutobj = $obj->AAChange($objref);
+ Function: Returns or sets the link-reference to a mutation/change object.
+           If there is no link, it will return undef
+ Returns : an obj_ref or undef
+
+=cut
+
+sub AAChange {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if( ! $value->isa('Bio::Variation::AAChange') ) {
+	    $self->throw("Is not a Bio::Variation::AAChange object but a [$self]");
+	return;
+	}
+	else {
+	    $self->{'AAChange'} = $value;
+	}
+    }
+    unless (exists $self->{'AAChange'}) {
+	return;
+    } else {
+	return $self->{'AAChange'};
+    }
+}    
+
+
+=head2 exons_modified
+
+ Title   : exons_modified
+ Usage   : $modified = $obj->exons_modified;
+         : $modified = $obj->exons_modified(1);
+ Function: Returns or sets information (example: a simple boolean flag) about
+           the modification of exons as a result of a mutation.
+
+=cut
+
+sub exons_modified {
+  my ($self,$value)=@_;
+  if (defined($value)) {
+    $self->{'exons_modified'}=$value;
+  }
+  return ($self->{'exons_modified'});
+}
+
+=head2 region
+
+ Title   : region
+ Usage   : $obj->region();
+ Function: 
+
+            Sets and returns the name of the sequence region type or
+            protein domain at this location.  If value is not set,
+            returns false.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+
+sub region {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'region'} = $value;
+    } 
+    elsif (not defined $self->{'region'}) {
+
+	$self->warn('Mutation start position is not defined') 
+	    if not defined $self->start and $self->verbose;
+	$self->warn('Mutation end position is not defined') 
+	    if not defined $self->end and $self->verbose;
+	$self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!')
+	    if not defined $self->cds_end and $self->verbose;
+	
+	$self->region('coding');
+	if ($self->end && $self->end < 0 ){
+	    $self->region('5\'UTR');
+	}
+	elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) {
+	    $self->region('3\'UTR');
+	}
+    }
+    return $self->{'region'};
+}
+
+=head2 cds_end
+
+ Title   : cds_end
+ Usage   : $cds_end = $obj->get_cds_end();
+ Function: 
+
+           Sets or returns the cds_end from the beginning of the DNA sequence
+           to the coordinate start used to describe variants.
+           Should be the location of the last nucleotide of the
+           terminator codon of the gene.
+
+ Example : 
+ Returns : value of cds_end, a scalar
+ Args    : 
+
+=cut
+
+
+
+sub cds_end {
+    my ($self, $value) = @_;
+    if (defined $value) {
+	$self->warn("[$value] is not a good value for sequence position") 
+	    if not $value =~ /^\d+$/ ;
+	$self->{'cds_end'} = $value;
+    } else {
+	$self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff;
+    }
+    return $self->{'cds_end'};
+}
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function: 
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable subclass of L<Bio::Variation::VariantI> needs
+            to implement this method. Valid values are listed in
+            'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub label {
+    my ($self) = @_;
+    my ($o, $m, $type);
+    $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
+    $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
+
+    my $ct  = Bio::Tools::CodonTable -> new ( -id => $self->codon_table );
+    if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) { 
+	if (defined $self->AAChange) {
+	    if ($self->start > 0 and $self->start < 4 ) {
+		$type = 'initiation codon';
+	    }
+	    elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
+		#AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) {
+		$type = 'termination codon';
+	    }
+	    elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) {
+		#elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") {
+		$type = 'nonsense';
+	    } 
+	    elsif ($o and $m and ($o eq $m or 
+				  $self->AAChange->allele_ori->seq eq 
+				  $self->AAChange->allele_mut->seq)) {
+		$type = 'silent';
+	    } else {
+		$type = 'missense';
+	    }
+	} else {
+	    $type = 'unknown';
+	}
+    }  else {
+	my $len = 0;
+	$len = CORE::length($o) if $o;
+	$len -= CORE::length($m) if $m;
+	if ($len%3 == 0 ) {
+	    $type = 'inframe';
+	} else {
+	    $type = 'frameshift';
+	}
+	if (not $m ) {
+	    $type .= ', '. 'deletion';
+	}
+	elsif (not $o ) {
+	    $type .= ', '. 'insertion';
+	}
+	else {
+	    $type .= ', '. 'complex';
+	}	
+	if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
+	    $type .= ', '. 'termination codon';
+	}
+    }
+
+    $self->{'label'} = $type;
+    return $self->{'label'};
+}
+
+
+=head2 _change_codon_pos
+
+ Title   : _change_codon_pos
+ Usage   : $newCodonPos = _change_codon_pos($myCodonPos, 5)
+ Function: 
+
+           Keeps track of the codon position in a changeing sequence
+
+ Returns : codon_pos = integer 1, 2 or 3
+ Args    : valid codon position 
+           signed integer offset to a new location in sequence
+
+=cut
+
+
+sub _change_codon_pos ($$)  {
+    my ($cpos, $i) = @_;
+
+    $cpos = ($cpos + $i%3)%3;
+    if ($cpos > 3 ) {
+	$cpos = $cpos - 3;
+    }
+    elsif ($cpos < 1 ) {
+	$cpos = $cpos + 3;
+    }
+    return $cpos;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SNP.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SNP.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SNP.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,221 @@
+# $Id: SNP.pm,v 1.16.4.1 2006/10/02 23:10:38 sendu Exp $
+# bioperl module for Bio::Variation::SNP
+#
+# Copyright Allen Day <allenday at ucla.edu>, Stan Nelson <snelson at ucla.edu>
+# Human Genetics, UCLA Medical School, University of California, Los Angeles
+
+=head1 NAME
+
+Bio::Variation::SNP - submitted SNP
+
+=head1 SYNOPSIS
+
+  $SNP = Bio::Variation::SNP->new ();
+
+=head1 DESCRIPTION
+
+Inherits from Bio::Variation::SeqDiff and Bio::Variation::Allele, with 
+additional methods that are (db)SNP specific (ie, refSNP/subSNP IDs, batch
+IDs, validation methods).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Allen Day E<lt>allenday at ucla.eduE<gt>
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::SNP;
+
+use strict;
+use vars qw($AUTOLOAD);
+use Bio::Root::Root;
+
+use base qw(Bio::Variation::SeqDiff Bio::Variation::Allele);
+
+=head2 get/set-able methods
+
+ Usage   : $is = $snp->method()
+ Function: for getting/setting attributes
+ Returns : a value.  probably a scalar.
+ Args    : if you're trying to set an attribute, pass in the new value.
+
+ Methods:
+ --------
+ id
+ type
+ observed
+ seq_5
+ seq_3
+ ncbi_build
+ ncbi_chr_hits
+ ncbi_ctg_hits
+ ncbi_seq_loc
+ ucsc_build 
+ ucsc_chr_hits
+ ucsc_ctg_hits
+ heterozygous
+ heterozygous_SE
+ validated
+ genotype
+ handle
+ batch_id
+ method
+ locus_id
+ symbol
+ mrna
+ protein
+ functional_class
+
+=cut
+
+#'
+my %OK_AUTOLOAD = (
+		id			=> '',
+		type			=> '',
+		observed		=> [],
+		seq_5			=> '',
+		seq_3			=> '',
+		ncbi_build		=> '',
+		ncbi_chr_hits		=> '',
+		ncbi_ctg_hits		=> '',
+		ncbi_seq_loc		=> '',
+		ucsc_build		=> '',
+		ucsc_chr_hits		=> '',
+		ucsc_ctg_hits		=> '',
+		heterozygous		=> '',
+		heterozygous_SE		=> '',
+		validated		=> '',
+		genotype		=> '',
+		handle			=> '',
+		batch_id		=> '',
+		method			=> '',
+		locus_id		=> '',
+		symbol			=> '',
+		mrna			=> '',
+		protein			=> '',
+		functional_class	=> '',
+		);
+
+sub AUTOLOAD {
+	my $self = shift;
+	my $param = $AUTOLOAD;
+	$param =~ s/.*:://;
+	$self->throw(__PACKAGE__." doesn't implement $param") unless defined $OK_AUTOLOAD{$param};
+
+	if( ref $OK_AUTOLOAD{$param} eq 'ARRAY' ) {
+		push @{$self->{$param}}, shift if @_;
+		return $self->{$param}->[scalar(@{$self->{$param}}) - 1];
+	} else {
+		$self->{$param} = shift if @_;
+		return $self->{$param};
+	}
+}
+
+
+#foreach my $slot (keys %RWSLOT){
+#	no strict "refs"; #add class methods to package
+#	*$slot = sub {
+#		shift;
+#		$RWSLOT{$slot} = shift if @_;
+#		return $RWSLOT{$slot};
+#	};
+#}
+
+
+=head2 is_subsnp
+
+ Title   : is_subsnp
+ Usage   : $is = $snp->is_subsnp()
+ Function: returns 1 if $snp is a subSNP
+ Returns : 1 or undef
+ Args    : NONE
+
+=cut
+
+sub is_subsnp {
+	return shift->{is_subsnp};
+}
+
+=head2 subsnp
+
+ Title   : subsnp
+ Usage   : $subsnp = $snp->subsnp()
+ Function: returns the currently active subSNP of $snp
+ Returns : Bio::Variation::SNP
+ Args    : NONE
+
+=cut
+
+sub subsnp {
+	my $self = shift;
+	return $self->{subsnps}->[ scalar($self->each_subsnp) - 1 ];
+}
+
+=head2 add_subsnp
+
+ Title   : add_subsnp
+ Usage   : $subsnp = $snp->add_subsnp()
+ Function: pushes the previous value returned by subsnp() onto a stack,
+           accessible with each_subsnp().
+           Sets return value of subsnp() to a new Bio::Variation::SNP
+           object, and returns that object.
+ Returns : Bio::Varitiation::SNP
+ Args    : NONE
+
+=cut
+
+sub add_subsnp {
+	my $self = shift;
+	$self->throw("add_subsnp(): cannot add subSNP to subSNP, only to refSNP")
+            if $self->is_subsnp;
+
+	my $subsnp = Bio::Variation::SNP->new;
+	push @{$self->{subsnps}}, $subsnp;
+	$self->subsnp->{is_subsnp} = 1;
+	return $self->subsnp;
+}
+
+=head2 each_subsnp
+
+ Title   : each_subsnp
+ Usage   : @subsnps = $snp->each_subsnp()
+ Function: returns a list of the subSNPs of a refSNP
+ Returns : list
+ Args    : NONE
+
+=cut
+
+sub each_subsnp {
+	my $self = shift;
+	$self->throw("each_subsnp(): cannot be called on a subSNP")
+            if $self->is_subsnp;
+	return @{$self->{subsnps}};
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SeqDiff.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SeqDiff.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/SeqDiff.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1135 @@
+# $Id: SeqDiff.pm,v 1.23.4.1 2006/10/02 23:10:38 sendu Exp $
+# bioperl module for Bio::Variation::SeqDiff
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+# cds_end definition?
+
+=head1 NAME
+
+Bio::Variation::SeqDiff - Container class for mutation/variant descriptions
+
+=head1 SYNOPSIS
+
+  $seqDiff = Bio::Variation::SeqDiff->new (
+                                           -id => $M20132,
+					   -alphabet => 'rna',
+                                           -gene_symbol => 'AR'
+                                           -chromosome => 'X',
+                                           -numbering => 'coding'
+                                           );
+  # get a DNAMutation object somehow
+  $seqDiff->add_Variant($dnamut);
+  print  $seqDiff->sys_name(), "\n"; 
+
+=head1 DESCRIPTION
+
+SeqDiff stores Bio::Variation::VariantI object references and
+descriptive information common to all changes in a sequence. Mutations
+are understood to be any kind of sequence markers and are expected to
+occur in the same chromosome. See L<Bio::Variation::VariantI> for details.
+
+The methods of SeqDiff are geared towards describing mutations in
+human genes using gene-based coordinate system where 'A' of the
+initiator codon has number 1 and the one before it -1. This is
+according to conventions of human genetics.
+
+There will be class Bio::Variation::Genotype to describe markers in
+different chromosomes and diploid genototypes.
+
+Classes implementing Bio::Variation::VariantI interface are 
+Bio::Variation::DNAMutation, Bio::Variation::RNAChange, and
+Bio::Variation::AAChange. See L<Bio::Variation::VariantI>,
+L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, and
+L<Bio::Variation::AAChange> for more information.
+
+Variant objects can be added using two ways: an array passed to the
+constructor or as individual Variant objects with add_Variant
+method.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 CONTRIBUTORS
+
+Eckhard Lehmann, ecky at e-lehmann.de
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Variation::SeqDiff;
+
+use strict;
+use Bio::Tools::CodonTable;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root);
+
+
+=head2 new
+
+  Title   : new
+  Usage   : $seqDiff = Bio::Variation::SeqDiff->new;
+  Function: generates a new Bio::Variation::SeqDiff
+  Returns : reference to a new object of class SeqDiff
+  Args    : 
+
+=cut
+
+sub new {
+    my($class, at args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($id, $sysname, $trivname, $chr, $gene_symbol, 
+       $desc, $alphabet, $numbering, $offset, $rna_offset, $rna_id, $cds_end,
+       $dna_ori, $dna_mut, $rna_ori, $rna_mut, $aa_ori, $aa_mut
+       #@variants, @genes
+       ) =
+	   $self->_rearrange([qw(ID
+				 SYSNAME
+				 TRIVNAME
+				 CHR
+				 GENE_SYMBOL
+				 DESC
+				 ALPHABET
+				 NUMBERING
+				 OFFSET
+				 RNA_OFFSET
+				 RNA_ID
+				 CDS_END
+				 DNA_ORI
+				 DNA_MUT
+				 RNA_ORI
+				 AA_ORI
+				 AA_MUT
+				 )],
+			    @args);
+    
+    #my $make = $self->SUPER::_initialize(@args);
+    
+    $id        && $self->id($id);           
+    $sysname   && $self->sysname($sysname); 
+    $trivname  && $self->trivname($trivname);
+    $chr       && $self->chromosome($chr);  
+    $gene_symbol && $self->gene_symbol($chr);
+    $desc      && $self->description($desc);
+    $alphabet   && $self->alphabet($alphabet);
+    $numbering && $self->numbering($numbering);
+    $offset    && $self->offset($offset);   
+    $rna_offset && $self->rna_offset($rna_offset);   
+    $rna_id    && $self->rna_id($rna_id);   
+    $cds_end   && $self->cds_end($cds_end);   
+
+    $dna_ori   && $self->dna_ori($dna_ori); 
+    $dna_mut   && $self->dna_mut($dna_mut); 
+    $rna_ori   && $self->rna_ori($rna_ori); 
+    $rna_mut   && $self->rna_mut($rna_mut); 
+    $aa_ori    && $self->aa_ori ($aa_ori);  
+    $aa_mut    && $self->aa_mut ($aa_mut);  
+
+    $self->{ 'variants' } = [];
+    #@variants && push(@{$self->{'variants'}}, at variants);
+
+    $self->{ 'genes' } = [];
+    #@genes && push(@{$self->{'genes'}}, at genes);
+
+    return $self; # success - we hope!
+}
+
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id(H0001); $id = $obj->id();
+ Function: 
+
+           Sets or returns the id of the seqDiff.
+           Should be used to give the collection of variants a UID
+           without semantic associations.
+
+ Example : 
+ Returns : value of id, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub id {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'id'} = $value;
+  }
+  else {
+      return $self->{'id'};
+  }
+}
+
+
+=head2 sysname
+
+ Title   : sysname
+ Usage   : $obj->sysname('5C>G'); $sysname = $obj->sysname();
+ Function: 
+
+           Sets or returns the systematic name of the seqDiff.  The
+           name should follow the HUGO Mutation Database Initiative
+           approved nomenclature. If called without first setting the
+           value, will generate it from L<Bio::Variation::DNAMutation>
+           objects attached.
+
+ Example : 
+ Returns : value of sysname, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub sysname {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	$self->{'sysname'} = $value;
+    }
+    elsif (not defined $self->{'sysname'}) {
+
+	my $sysname = ''; 
+	my $c = 0;
+	foreach my $mut ($self->each_Variant) {
+	    if( $mut->isa('Bio::Variation::DNAMutation') ) {
+		$c++;
+		if ($c == 1 ) {
+		    $sysname = $mut->sysname ;
+		}
+		else {
+		    $sysname .= ";". $mut->sysname;
+		}
+	    }
+	}
+	$sysname  = "[". $sysname. "]" if $c > 1;
+	$self->{'sysname'} = $sysname;
+    }
+    return $self->{'sysname'};
+}
+
+
+=head2 trivname
+
+ Title   : trivname
+ Usage   : $obj->trivname('[A2G;T56G]'); $trivname = $obj->trivname();
+ Function: 
+
+           Sets or returns the trivial name of the seqDiff.
+           The name should follow the HUGO Mutation Database Initiative
+           approved nomenclature. If called without first setting the
+           value, will generate it from L<Bio::Variation::AAChange>
+           objects attached.
+
+ Example : 
+ Returns : value of trivname, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub trivname {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	$self->{'trivname'} = $value;
+    }
+    elsif (not defined $self->{'trivname'}) {
+	
+	my $trivname = ''; 
+	my $c = 0;
+	foreach my $mut ($self->each_Variant) {
+	    if( $mut->isa('Bio::Variation::AAChange') ) {
+		$c++;
+		if ($c == 1 ) {
+		    $trivname = $mut->trivname ;
+		}
+		else {
+		    $trivname .= ";". $mut->trivname;
+		}
+	    }
+	}
+	$trivname  = "[". $trivname. "]" if $c > 1;
+	$self->{'trivname'} = $trivname;
+    }
+
+  else {
+      return $self->{'trivname'};
+  }
+}
+
+
+=head2 chromosome
+
+ Title   : chromosome
+ Usage   : $obj->chromosome('X'); $chromosome = $obj->chromosome();
+ Function: 
+
+           Sets or returns the chromosome ("linkage group") of the seqDiff.
+
+ Example : 
+ Returns : value of chromosome, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub chromosome {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'chromosome'} = $value;
+  }
+  else {
+      return $self->{'chromosome'};
+  }
+}
+
+
+=head2 gene_symbol
+
+ Title   : gene_symbol
+ Usage   : $obj->gene_symbol('FOS'); $gene_symbol = $obj->gene_symbol;
+ Function: 
+
+           Sets or returns the gene symbol for the studied CDS.
+
+ Example : 
+ Returns : value of gene_symbol, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub gene_symbol {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'gene_symbol'} = $value;
+  }
+  else {
+      return $self->{'gene_symbol'};
+  }
+}
+
+
+
+=head2 description
+
+ Title   : description
+ Usage   : $obj->description('short description'); $descr = $obj->description();
+ Function: 
+
+           Sets or returns the short description of the seqDiff.
+
+ Example : 
+ Returns : value of description, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub description {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'description'} = $value;
+  }
+  else {
+      return $self->{'description'};
+  }
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of primary reference sequence being one of 
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+ Returns : a string either 'dna','rna','protein'. 
+ Args    : none
+
+
+=cut
+
+sub alphabet {
+   my ($self,$value) = @_;
+   my %type = (dna => 1,
+	       rna => 1,
+	       protein => 1);
+   if( defined $value ) {
+       if ($type{$value}) {
+	   $self->{'alphabet'} = $value;
+       } else {
+	   $self->throw("$value is not valid alphabet value!");
+       }
+   }
+   return $self->{'alphabet'};
+}
+
+
+=head2 numbering
+
+ Title   : numbering
+ Usage   : $obj->numbering('coding'); $numbering = $obj->numbering();
+ Function: 
+
+           Sets or returns the string giving the numbering schema used
+           to describe the variants.
+
+ Example : 
+ Returns : value of numbering, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+
+sub numbering {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'numbering'} = $value;
+  }
+  else {
+      return $self->{'numbering'};
+  }
+}
+
+
+=head2 offset
+
+ Title   : offset
+ Usage   : $obj->offset(124); $offset = $obj->offset();
+ Function: 
+
+           Sets or returns the offset from the beginning of the DNA sequence 
+           to the coordinate start used to describe variants. Typically
+           the beginning of the coding region of the gene. 
+           The cds_start should be 1 + offset.
+
+ Example : 
+ Returns : value of offset, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+
+sub offset {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'offset'} = $value;
+  }
+  elsif (not defined $self->{'offset'} ) {
+      return $self->{'offset'} = 0;
+  }
+  else {
+      return $self->{'offset'};
+  }
+}
+
+
+=head2 cds_start
+
+ Title   : cds_start
+ Usage   : $obj->cds_start(123); $cds_start = $obj->cds_start();
+ Function: 
+
+           Sets or returns the cds_start from the beginning of the DNA
+           sequence to the coordinate start used to describe
+           variants. Typically the beginning of the coding region of
+           the gene. Needs to be and is implemented as 1 + offset.
+
+ Example : 
+ Returns : value of cds_start, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+
+sub cds_start {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'offset'} = $value - 1;
+  }
+  else {
+      return $self->{'offset'} + 1;
+  }
+}
+
+
+=head2 cds_end
+
+ Title   : cds_end
+ Usage   : $obj->cds_end(321); $cds_end = $obj->cds_end();
+ Function: 
+
+           Sets or returns the position of the last nucleotitide of the
+           termination codon. The coordinate system starts from cds_start.
+
+ Example : 
+ Returns : value of cds_end, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+
+sub cds_end {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'cds_end'} = $value;
+  }
+  else {
+      return $self->{'cds_end'};
+      #$self->{'cds_end'} = CORE::length($self->SeqDiff->rna_ori)/3;
+  }
+}
+
+
+=head2 rna_offset
+
+ Title   : rna_offset
+ Usage   : $obj->rna_offset(124); $rna_offset = $obj->rna_offset();
+ Function: 
+
+           Sets or returns the rna_offset from the beginning of the RNA sequence 
+           to the coordinate start used to describe variants. Typically
+           the beginning of the coding region of the gene. 
+
+ Example : 
+ Returns : value of rna_offset, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+
+sub rna_offset {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'rna_offset'} = $value;
+  }
+  elsif (not defined $self->{'rna_offset'} ) {
+      return $self->{'rna_offset'} = 0;
+  }
+  else {
+      return $self->{'rna_offset'};
+  }
+}
+
+
+=head2 rna_id
+
+ Title   : rna_id
+ Usage   : $obj->rna_id('transcript#3'); $rna_id = $obj->rna_id();
+ Function: 
+
+	    Sets or returns the ID for original RNA sequence of the seqDiff.
+
+ Example : 
+ Returns : value of rna_id, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub rna_id {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'rna_id'} = $value;
+  }
+  else {
+      return $self->{'rna_id'};
+  }
+}
+
+
+
+=head2 add_Variant
+
+ Title   : add_Variant
+ Usage   : $obj->add_Variant($variant)
+ Function: 
+
+           Pushes one Bio::Variation::Variant into the list of variants.
+           At the same time, creates a link from the Variant to SeqDiff
+           using its SeqDiff method.
+
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : Variant object
+
+=cut
+
+sub add_Variant {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::VariantI') ) {
+	  $self->throw("Is not a VariantI complying  object but a [$self]");
+	  return 0;
+      }
+      else {
+	  push(@{$self->{'variants'}},$value);
+	  $value->SeqDiff($self);
+	  return 1;
+      }
+  }
+  else {
+      return 0;
+  }
+}
+
+
+=head2 each_Variant
+
+ Title   : each_Variant
+ Usage   : $obj->each_Variant();
+ Function: 
+
+            Returns a list of Variants.
+
+ Example : 
+ Returns : list of Variants
+ Args    : none
+
+=cut
+
+sub each_Variant{
+   my ($self, at args) = @_;
+   
+   return @{$self->{'variants'}}; 
+}
+
+
+
+=head2 add_Gene
+
+ Title   : add_Gene
+ Usage   : $obj->add_Gene($gene)
+ Function: 
+
+           Pushes one L<Bio::LiveSeq::Gene> into the list of genes.
+
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : Bio::LiveSeq::Gene object
+
+See L<Bio::LiveSeq::Gene> for more information.
+
+=cut
+
+
+sub add_Gene {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::LiveSeq::Gene') ) {
+	  $value->throw("Is not a Bio::LiveSeq::Gene object but a  [$value]");
+	  return 0;
+      }
+      else {
+	  push(@{$self->{'genes'}},$value);
+	  return 1;
+      }
+  }
+  else {
+      return 0;
+  }
+}
+
+
+=head2 each_Gene
+
+ Title   : each_Gene
+ Usage   : $obj->each_Gene();
+ Function: 
+
+            Returns a list of L<Bio::LiveSeq::Gene>s.
+
+ Example : 
+ Returns : list of Genes
+ Args    : none
+
+=cut
+
+sub each_Gene{
+   my ($self, at args) = @_;
+
+   return @{$self->{'genes'}}; 
+}
+
+
+=head2 dna_ori
+
+ Title   : dna_ori
+ Usage   : $obj->dna_ori('atgctgctgctgct'); $dna_ori = $obj->dna_ori();
+ Function: 
+
+	    Sets or returns the original DNA sequence string of the seqDiff.
+
+ Example : 
+ Returns : value of dna_ori, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub dna_ori {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      $self->{'dna_ori'} = $value;
+  }
+  else {
+      return $self->{'dna_ori'};
+  }
+}
+
+
+=head2 dna_mut
+
+ Title   : dna_mut
+ Usage   : $obj->dna_mut('atgctggtgctgct'); $dna_mut = $obj->dna_mut();
+ Function: 
+
+	    Sets or returns the mutated DNA sequence of the seqDiff.
+            If sequence has not been set generates it from the
+            original sequence and DNA mutations.
+
+ Example : 
+ Returns : value of dna_mut, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub dna_mut {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      $self->{'dna_mut'} = $value;
+  }
+  else {
+      $self->_set_dnamut() unless $self->{'dna_mut'};
+      return $self->{'dna_mut'};
+  }
+}
+
+sub _set_dnamut {
+    my $self = shift;
+
+    return unless $self->{'dna_ori'}  && $self->each_Variant;
+
+    $self->{'dna_mut'} = $self->{'dna_ori'};
+    foreach ($self->each_Variant) {
+	next unless $_->isa('Bio::Variation::DNAMutation');
+	next unless $_->isMutation;
+
+	my ($s, $la, $le);
+	#lies the mutation less than 25 bases after the start of sequence?
+	if ($_->start < 25) {
+	    $s = 0; $la = $_->start - 1;
+	} else {
+	    $s = $_->start - 25; $la = 25;
+	}
+
+	#is the mutation an insertion?
+	$_->end($_->start) unless $_->allele_ori->seq;
+
+	#does the mutation end greater than 25 bases before the end of
+	#sequence?
+	if (($_->end + 25) > length($self->{'dna_mut'})) {
+	    $le = length($self->{'dna_mut'}) - $_->end;
+	} else {
+	    $le = 25;
+	}
+
+	$_->dnStreamSeq(substr($self->{'dna_mut'}, $s, $la));
+	$_->upStreamSeq(substr($self->{'dna_mut'}, $_->end, $le));
+
+	my $s_ori = $_->dnStreamSeq . $_->allele_ori->seq . $_->upStreamSeq;
+	my $s_mut = $_->dnStreamSeq . $_->allele_mut->seq . $_->upStreamSeq;
+
+	(my $str = $self->{'dna_mut'}) =~ s/$s_ori/$s_mut/;
+	$self->{'dna_mut'} = $str;
+    }
+}
+
+
+=head2 rna_ori
+
+ Title   : rna_ori
+ Usage   : $obj->rna_ori('atgctgctgctgct'); $rna_ori = $obj->rna_ori();
+ Function: 
+
+	    Sets or returns the original RNA sequence of the seqDiff.
+
+ Example : 
+ Returns : value of rna_ori, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub rna_ori {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'rna_ori'} = $value;
+  }
+  else {
+      return $self->{'rna_ori'};
+  }
+}
+
+
+=head2 rna_mut
+
+ Title   : rna_mut
+ Usage   : $obj->rna_mut('atgctggtgctgct'); $rna_mut = $obj->rna_mut();
+ Function: 
+
+	    Sets or returns the mutated RNA sequence of the seqDiff.
+
+ Example : 
+ Returns : value of rna_mut, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub rna_mut {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'rna_mut'} = $value;
+  }
+  else {
+      return $self->{'rna_mut'};
+  }
+}
+
+
+=head2 aa_ori
+
+ Title   : aa_ori
+ Usage   : $obj->aa_ori('MAGVLL*'); $aa_ori = $obj->aa_ori();
+ Function: 
+
+	    Sets or returns the original protein sequence of the seqDiff.
+
+ Example : 
+ Returns : value of aa_ori, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub aa_ori {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'aa_ori'} = $value;
+  }
+  else {
+      return $self->{'aa_ori'};
+  }
+}
+
+
+=head2 aa_mut
+
+ Title   : aa_mut
+ Usage   : $obj->aa_mut('MA*'); $aa_mut = $obj->aa_mut();
+ Function: 
+
+	    Sets or returns the mutated protein sequence of the seqDiff.
+
+ Example : 
+ Returns : value of aa_mut, a scalar
+ Args    : newvalue (optional)
+
+=cut
+
+
+sub aa_mut {
+  my ($self,$value) = @_;
+  if (defined $value) {
+    $self->{'aa_mut'} = $value;
+  }
+  else {
+      return $self->{'aa_mut'};
+  }
+}
+
+
+=head2 seqobj
+
+ Title   : seqobj
+ Usage   : $dnaobj = $obj->seqobj('dna_mut');
+ Function: 
+
+	    Returns the any original or mutated sequences as a
+	    Bio::PrimarySeq object.
+
+ Example : 
+ Returns : Bio::PrimarySeq object for the requested sequence
+ Args    : string, method name for the sequence requested
+
+See L<Bio::PrimarySeq> for more information.
+
+=cut
+
+sub seqobj {
+  my ($self,$value) = @_;
+  my $out;
+  my %valid_obj = 
+      map {$_, 1} qw(dna_ori rna_ori aa_ori dna_mut rna_mut aa_mut);
+  $valid_obj{$value} ||
+      $self->throw("Sequence type '$value' is not a valid type (".
+                  join(',', map "'$_'", sort keys %valid_obj) .") lowercase");
+  my ($alphabet) = $value =~ /([^_]+)/;
+  my $id =  $self->id;
+  $id =  $self->rna_id if $self->rna_id;
+  $alphabet = 'protein' if $alphabet eq 'aa';
+  $out = Bio::PrimarySeq->new
+      ( '-seq' => $self->{$value},
+	'-display_id'  => $id,
+	'-accession_number' => $self->id,
+	'-alphabet' => $alphabet
+	) if   $self->{$value} ;
+  return $out;
+}
+
+=head2 alignment
+
+ Title   : alignment
+ Usage   : $obj->alignment
+ Function: 
+
+           Returns a pretty RNA/AA sequence alignment from linked
+           objects.  Under construction: Only simple coding region
+           point mutations work.
+
+ Example : 
+ Returns : 
+ Args    : none
+
+=cut
+
+
+sub alignment {
+    my $self = shift;
+    my (@entry, $text);
+
+    my $maxflanklen = 12;
+
+    foreach my $mut ($self->each_Variant) {
+	if( $mut->isa('Bio::Variation::RNAChange') ) {
+
+	    my $upflank = $mut->upStreamSeq;
+	    my $dnflank = $mut->dnStreamSeq;
+	    my $cposd = $mut->codon_pos;
+	    my $rori = $mut->allele_ori->seq;
+	    my $rmut =  $mut->allele_mut->seq;
+	    my $rseqoriu = '';
+	    my $rseqmutu = '';
+	    my $rseqorid = '';
+	    my $rseqmutd = '';
+	    my $aaseqmutu = '';
+	    my (@rseqori, @rseqmut );
+
+	    #  point
+	    if ($mut->DNAMutation->label =~ /point/) {
+		if ($cposd == 1 ) {
+		    my $nt2d = substr($dnflank, 0, 2);
+		    push @rseqori, $rori. $nt2d;
+		    push @rseqmut, uc ($rmut). $nt2d;
+		    $dnflank = substr($dnflank, 2);
+		}
+		elsif ($cposd == 2) { 
+		    my $ntu = chop $upflank;
+		    my $ntd = substr($dnflank, 0, 1);
+		    push @rseqori, $ntu. $rori. $ntd;
+		    push @rseqmut,  $ntu. uc ($rmut). $ntd;
+		    $dnflank =  substr($dnflank, 1);
+		}
+		elsif ($cposd == 3) {
+		    my $ntu1 = chop $upflank;
+		    my $ntu2 = chop $upflank;
+		    push (@rseqori, $ntu2. $ntu1. $rori);
+		    push (@rseqmut, $ntu2. $ntu1. uc $rmut);
+		}		
+	    }
+	    #deletion
+	    elsif ($mut->DNAMutation->label =~ /deletion/) {
+		if ($cposd == 2 ) {
+		    $rseqorid = chop $upflank;
+		    $rseqmutd = $rseqorid;
+		}
+		for (my $i=1; $i<=$mut->length; $i++) {
+		    my $ntd .= substr($mut->allele_ori, $i-1, 1);
+		    $rseqorid .= $ntd;
+		    if  (length($rseqorid) == 3 ) {
+			push (@rseqori, $rseqorid);
+			push (@rseqmut, "   ");
+			$rseqorid = '';
+		    }		    
+		}
+
+		if ($rseqorid) {
+		    $rseqorid .= substr($dnflank, 0, 3-$rseqorid);
+		    push (@rseqori, $rseqorid);
+		    push (@rseqmut, "   ");
+		    $dnflank = substr($dnflank,3-$rseqorid);
+		} 
+	    }
+	    $upflank = reverse $upflank;
+	    # loop throught the flanks
+	    for (my $i=1; $i<=length($dnflank); $i++) {
+		
+		last if  $i > $maxflanklen;
+
+		my $ntd .= substr($dnflank, $i-1, 1);
+		my $ntu .= substr($upflank, $i-1, 1);
+
+		$rseqmutd .= $ntd;
+		$rseqorid .= $ntd;
+		$rseqmutu = $ntu. $rseqmutu;
+		$rseqoriu = $ntu. $rseqoriu;
+		
+		if  (length($rseqorid) == 3  and length($rseqorid) == 3) {
+		    push (@rseqori, $rseqorid);
+		    push (@rseqmut, $rseqmutd);
+		    $rseqorid =  $rseqmutd ='';
+		}
+		if  (length($rseqoriu) == 3  and length($rseqoriu) == 3) {
+		    unshift (@rseqori, $rseqoriu);
+		    unshift (@rseqmut, $rseqmutu);
+		    $rseqoriu =  $rseqmutu ='';
+		}
+
+		#print "|i=$i,  $cposd, $rseqmutd, $rseqorid\n";
+		#print "|i=$i,  $cposu, $rseqmutu, $rseqoriu\n\n";
+
+	    }
+
+	    push (@rseqori, $rseqorid);
+	    unshift (@rseqori, $rseqoriu);
+	    push (@rseqmut, $rseqmutd);
+	    unshift (@rseqmut, $rseqmutu);
+	    
+	    return unless $mut->AAChange;
+	    #translate
+	    my $tr = new Bio::Tools::CodonTable ('-id' => $mut->codon_table);
+	    my $apos =  $mut->AAChange->start;
+	    my $aposmax = CORE::length($self->aa_ori); #terminator codon no 
+	    my $rseqori;
+	    my $rseqmut;
+	    my $aaseqori;
+	    my $aaseqmut = "";
+	    for (my $i = 0; $i <= $#rseqori; $i++) {
+		 my $a = '';
+
+		 $a =  $tr->translate($rseqori[$i]) if length($rseqori[$i]) == 3;
+		 
+		 if (length($a) != 1 or 
+		     $apos - ( $maxflanklen/2 -1) + $i < 1 or 
+		     $apos - ( $maxflanklen/2 -1) + $i > $aposmax ) {
+		     $aaseqori .= "    ";
+		 } else {
+		     $aaseqori .= " ". $a. "  ";
+		 }
+		 my $b = '';
+		 if (length($rseqmut[$i]) == 3) {
+		     if ($rseqmut[$i] eq '   ') {
+			 $b = "_";
+		     } else {
+			 $b = $tr->translate($rseqmut[$i]);
+		     }
+		 }
+		 if (( $b ne $a and
+		       length($b) == 1 and 
+		       $apos - ( $maxflanklen/2 -1) + $i >= 1 ) or
+		     ( $apos - ( $maxflanklen/2 -1) + $i >= $aposmax and 
+		       $mut->label =~ 'termination')
+		     ) {
+		     $aaseqmut .= " ". $b. "  ";
+		 } else {
+		     $aaseqmut .= "    ";
+		 }
+		 
+		 if ($i == 0 and length($rseqori[$i]) != 3) {
+		     my $l = 3 - length($rseqori[$i]);
+		     $rseqori[$i] = (" " x $l). $rseqori[$i];
+		     $rseqmut[$i] = (" " x $l). $rseqmut[$i];
+		 }
+		 $rseqori .= $rseqori[$i]. " " if $rseqori[$i] ne '';
+		 $rseqmut .= $rseqmut[$i]. " " if $rseqmut[$i] ne '';
+	     }
+	    
+	    # collect the results
+	    push (@entry, 
+		  "\n"
+		  );   	    
+	    $text = "           ". $aaseqmut; 
+	    push (@entry, 
+		  $text
+		  );   	    
+	    $text = "Variant  : ". $rseqmut;
+	    push (@entry, 
+		  $text
+		  );   	    
+	    $text = "Reference: ". $rseqori;
+	    push (@entry, 
+		  $text
+		  );   	    
+	    $text = "           ". $aaseqori;
+	    push (@entry, 
+		  $text
+		  );   
+	    push (@entry, 
+		  "\n"
+		  );   
+	}
+
+    }
+
+    my $res;
+    foreach my $line (@entry) {
+       $res .=  "$line\n";
+    }
+    return $res;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/Variation/VariantI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/Variation/VariantI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/Variation/VariantI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1040 @@
+# $Id: VariantI.pm,v 1.20.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Variation::VariantI
+#
+# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::VariantI - Sequence Change SeqFeature abstract class
+
+=head1 SYNOPSIS
+
+  #get Bio::Variant::VariantI somehow
+  print $var->restriction_changes, "\n";
+  foreach $allele ($var->each_Allele) {
+      #work on Bio::Variation::Allele objects
+  }
+
+=head1 DESCRIPTION
+
+This superclass defines common methods to basic sequence changes.  The
+instantiable classes Bio::Variation::DNAMutation,
+Bio::Variation::RNAChange and Bio::Variation::AAChange use them.
+See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>,
+and L<Bio::Variation::AAChange> for more information.
+
+These classes store information, heavy computation to detemine allele
+sequences is done elsewhere.
+
+The database cross-references are implemented as
+Bio::Annotation::DBLink objects. The methods to access them are
+defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink>
+and L<Bio::DBLinkContainerI> for details.
+
+Bio::Variation::VariantI redifines and extends
+Bio::SeqFeature::Generic for sequence variations. This class
+describes specific sequence change events. These events are always
+from a specific reference sequence to something different. See
+L<Bio::SeqFeature::Generic> for more information.
+
+IMPORTANT: The notion of reference sequence permeates all
+Bio::Variation classes. This is especially important to remember when
+dealing with Alleles. In a polymorphic site, there can be a large
+number of alleles. One of then has to be selected to be the reference
+allele (allele_ori). ALL the rest has to be passed to the Variant
+using the method add_Allele, including the mutated allele in a
+canonical mutation. The IO modules and generated attributes depend on
+it. They ignore the allele linked to using allele_mut and circulate
+each Allele returned by each_Allele into allele_mut and calculate
+the changes between that and allele_ori.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::VariantI;
+use strict;
+# Object preamble - inheritance
+
+use base qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI);
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id
+ Function:
+
+           Read only method. Returns the id of the variation object.
+           The id is the id of the first DBLink object attached to this object.
+
+ Example :
+ Returns : scalar
+ Args    : none
+
+=cut
+
+sub id {
+   my ($self) = @_;
+   my @ids = $self->each_DBLink;
+   my $id = $ids[0] if scalar @ids > 0;
+   return $id->database. "::". $id->primary_id if $id;
+}
+
+
+=head2 add_Allele
+
+ Title   : add_Allele
+ Usage   : $self->add_Allele($allele)
+ Function: 
+
+	    Adds one Bio::Variation::Allele into the list of alleles.
+            Note that the method forces the convention that nucleotide
+            sequence is in lower case and amino acds are in upper
+            case.
+
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : Allele object
+
+=cut
+
+
+sub add_Allele {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::Allele') ) {
+	  my $com = ref $value;
+	  $self->throw("Is not a Allele object but a  [$com]");
+	  return 0;
+      } else {
+	  if ( $self->isa('Bio::Variation::AAChange') ) {
+	      $value->seq( uc $value->seq) if $value->seq;
+	  } else {
+	      $value->seq( lc $value->seq) if $value->seq;
+	  } 
+	  push(@{$self->{'alleles'}},$value); 
+	  $self->allele_mut($value); #????
+	  return 1;
+      }
+  } else {
+      return 0;
+  }
+}
+
+
+=head2 each_Allele
+
+ Title   : alleles
+ Usage   : $obj->each_Allele();
+ Function: 
+
+	     Returns a list of Bio::Variation::Allele objects
+
+ Example : 
+ Returns : list of Alleles
+ Args    : none
+
+=cut
+
+sub each_Allele{
+   my ($self, at args) = @_;
+   return @{$self->{'alleles'}};
+}
+
+
+=head2 isMutation
+
+ Title   : isMutation
+ Usage   : print join('/', $obj->each_Allele) if not $obj->isMutation;
+ Function:
+
+           Returns or sets the boolean value indicating that the
+           variant descibed is a canonical mutation with two alleles
+           assinged to be the original (wild type) allele and mutated
+           allele, respectively. If this value is not set, it is
+           assumed that the Variant descibes polymorphisms.
+
+ Returns : a boolean
+
+=cut
+
+sub isMutation {
+    my ($self,$value) = @_;
+    if (defined $value) {
+        if ($value ) {
+            $self->{'isMutation'} = 1;
+        } else {
+            $self->{'isMutation'} = 0;
+        }
+    }
+    return $self->{'isMutation'};
+} 
+
+
+=head2 allele_ori
+
+ Title   : allele_ori
+ Usage   : $obj->allele_ori();
+ Function: 
+
+            Links to and returns the Bio::Variation::Allele object.
+            If value is not set, returns false. All other Alleles are
+            compared to this.
+
+            Amino acid sequences are stored in upper case characters,
+            others in lower case.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+See L<Bio::Variation::Allele> for more.
+
+=cut
+
+sub allele_ori {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
+	   $self->throw("Value is not Bio::Variation::Allele but [$value]");
+       } else {
+	   if ( $self->isa('Bio::Variation::AAChange') ) {
+	       $value->seq( uc $value->seq) if $value->seq;
+	   } else {
+	       $value->seq( lc $value->seq) if $value->seq;
+	   } 
+	   $self->{'allele_ori'} = $value;
+       }
+   }
+   return $self->{'allele_ori'};
+}
+
+
+=head2 allele_mut
+
+ Title   : allele_mut
+ Usage   : $obj->allele_mut();
+ Function: 
+
+             Links to and returns the Bio::Variation::Allele
+             object.  Sets and returns the mutated allele sequence.
+             If value is not set, returns false.
+
+             Amino acid sequences are stored in upper case characters,
+             others in lower case.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+See L<Bio::Variation::Allele> for more.
+
+=cut
+
+
+sub allele_mut {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
+	   $self->throw("Value is not Bio::Variation::Allele but [$value]");
+       } else {
+	   if ( $self->isa('Bio::Variation::AAChange') ) {
+	       $value->seq( uc $value->seq) if $value->seq;
+	   } else {
+	       $value->seq( lc $value->seq) if $value->seq;
+	   } 
+	   $self->{'allele_mut'} = $value;
+       }
+   }
+   return $self->{'allele_mut'};
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $obj->length();
+ Function: 
+
+            Sets and returns the length of the affected original
+            allele sequence.  If value is not set, returns false == 0.
+
+            Value 0 means that the variant position is before the
+            start=end sequence position. (Value 1 would denote a point
+            mutation). This follows the convension to report an
+            insertion (2insT) in equivalent way to a corresponding
+            deletion (2delT) (Think about indel polymorpism ATC <=> AC
+            where the origianal state is not known ).
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub length {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'length'} = $value;
+  }
+   if ( ! exists $self->{'length'} ) {
+       return 0;
+   } 
+   return $self->{'length'};
+}
+
+=head2 upStreamSeq
+
+ Title   : upStreamSeq
+ Usage   : $obj->upStreamSeq();
+ Function: 
+
+            Sets and returns upstream flanking sequence string.  If
+            value is not set, returns false. The sequence should be
+            >=25 characters long, if possible.
+
+ Example : 
+ Returns : string or false
+ Args    : string
+
+=cut
+
+
+sub upStreamSeq {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'upstreamseq'} = $value;
+    }
+   return $self->{'upstreamseq'};
+}
+
+
+=head2 dnStreamSeq
+
+ Title   : dnStreamSeq
+ Usage   : $obj->dnStreamSeq();
+ Function: 
+
+            Sets and returns dnstream flanking sequence string.  If
+            value is not set, returns false. The sequence should be
+            >=25 characters long, if possible.
+
+ Example : 
+ Returns : string or false
+ Args    : string
+
+=cut
+
+
+sub dnStreamSeq {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'dnstreamseq'} = $value;
+    }
+    return $self->{'dnstreamseq'};
+    
+}
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function: 
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable class needs to implement this method. Valid
+            values are listed in 'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub label {
+    my ($self,$value) = @_;
+    $self->throw_not_implemented();
+}
+
+
+
+=head2 status
+
+ Title   : status
+ Usage   : $obj->status()
+ Function: 
+
+           Returns the status of the sequence change object.
+           Valid values are: 'suspected' and 'proven'
+
+ Example : $obj->status('proven');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub status {
+   my ($self,$value) = @_;
+   my %status = (suspected => 1,
+		 proven => 1
+		 );
+
+   if( defined $value) {
+       $value = lc $value;
+       if ($status{$value}) {
+	   $self->{'status'} = $value;
+       } 
+       else {
+	   $self->throw("$value is not valid status value!");
+       }
+    }
+   if( ! exists $self->{'status'} ) {
+       return "$self";
+   }
+   return $self->{'status'};
+}
+
+
+=head2 proof
+
+ Title   : proof
+ Usage   : $obj->proof()
+ Function: 
+
+           Returns the proof of the sequence change object.
+           Valid values are: 'computed' and 'experimental'.
+
+ Example : $obj->proof('computed');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub proof {
+    my ($self,$value) = @_;
+    my %proof = (computed => 1,
+		 experimental => 1
+		 );
+
+    if( defined $value) {
+	$value = lc $value;
+	if ($proof{$value}) {
+	    $self->{'proof'} = $value;
+	} else {
+	    $self->throw("$value is not valid proof value!");
+	}
+    }
+    return $self->{'proof'};
+}
+
+
+=head2 region
+
+ Title   : region
+ Usage   : $obj->region();
+ Function: 
+
+            Sets and returns the name of the sequence region type or
+            protein domain at this location.  If value is not set,
+            returns false.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub region {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'region'} = $value;
+    }
+    return $self->{'region'};
+}
+
+
+=head2 region_value
+
+ Title   : region_value
+ Usage   : $obj->region_value();
+ Function: 
+
+            Sets and returns the name of the sequence region_value or
+            protein domain at this location.  If value is not set,
+            returns false.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub region_value {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'region_value'} = $value;
+    }
+    return $self->{'region_value'};
+}
+
+=head2 region_dist
+
+ Title   : region_dist
+ Usage   : $obj->region_dist();
+ Function: 
+
+            Sets and returns the distance tot the closest region
+            (i.e. intro/exon or domain) boundary. If distance is not
+            set, returns false.
+
+ Example : 
+ Returns : integer
+ Args    : integer
+
+=cut
+
+
+sub region_dist {
+    my ($self,$value) = @_;
+    if( defined $value) {
+       if (  not $value =~ /^[+-]?\d+$/ ) {
+	   $self->throw("[$value] for region_dist has to be an integer\n");
+        } else {
+	    $self->{'region_dist'} = $value;
+        }
+    }
+    return $self->{'region_dist'};
+}
+
+
+=head2 numbering
+
+ Title   : numbering
+ Usage   : $obj->numbering()
+ Function: 
+
+           Returns the numbering chema used locating sequnce features.
+           Valid values are: 'entry' and 'coding'
+
+ Example : $obj->numbering('coding');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub numbering {
+   my ($self,$value) = @_;
+   my %numbering = (entry => 1,
+		    coding => 1
+		    );
+
+   if( defined $value) {
+       $value = lc $value;
+       if ($numbering{$value}) {
+	   $self->{'numbering'} = $value;
+       } 
+       else {
+	   $self->throw("'$value' is not a valid for numbering!");
+       }
+    }
+   if( ! exists $self->{'numbering'} ) {
+       return "$self";
+   }
+   return $self->{'numbering'};
+}
+
+=head2 mut_number
+
+ Title   : mut_number
+ Usage   : $num = $obj->mut_number;
+         : $num = $obj->mut_number($number);
+ Function: 
+
+           Returns or sets the number identifying the order in which the
+           mutation has been issued. Numbers shouldstart from 1.
+           If the number has never been set, the method will return ''
+
+           If you want the output from IO modules look nice and, for
+           multivariant/allele variations, make sense you better set
+           this attribute.
+
+ Returns : an integer
+
+=cut
+
+
+sub mut_number {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	$self->{'mut_number'} = $value;
+    }
+    unless (exists $self->{'mut_number'}) {
+	return ('');
+    } else {
+	return $self->{'mut_number'};
+    }
+}       
+
+
+=head2 SeqDiff
+
+ Title   : SeqDiff
+ Usage   : $mutobj = $obj->SeqDiff;
+         : $mutobj = $obj->SeqDiff($objref);
+ Function: 
+
+           Returns or sets the link-reference to the umbrella
+           Bio::Variation::SeqDiff object.  If there is no link,
+           it will return undef
+
+           Note: Adding a variant into a SeqDiff object will
+           automatically set this value.
+
+ Returns : an obj_ref or undef
+
+See L<Bio::Variation::SeqDiff> for more information.
+
+=cut
+
+sub SeqDiff {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if( ! $value->isa('Bio::Variation::SeqDiff') ) {
+	    $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]");
+	    return;
+	}
+	else {
+	    $self->{'seqDiff'} = $value;
+	}
+    }
+    unless (exists $self->{'seqDiff'}) {
+	return;
+    } else {
+	return $self->{'seqDiff'};
+    }
+}
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   : $self->add_DBLink($ref)
+ Function: adds a link object
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+
+sub add_DBLink{
+   my ($self,$com) = @_;
+   if( $com && ! $com->isa('Bio::Annotation::DBLink') ) {
+       $self->throw("Is not a link object but a  [$com]");
+   }
+   $com && push(@{$self->{'link'}},$com);
+}
+
+=head2 each_DBLink
+
+ Title   : each_DBLink
+ Usage   : foreach $ref ( $self->each_DBlink() )
+ Function: gets an array of DBlink of objects
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_DBLink{
+   my ($self) = @_;
+   
+   return @{$self->{'link'}}; 
+}
+
+=head2 restriction_changes
+
+ Title   : restriction_changes
+ Usage   : $obj->restriction_changes();
+ Function: 
+
+            Returns a string containing a list of restriction
+            enzyme changes of form +EcoRI, separated by
+            commas. Strings need to be valid restriction enzyme names
+            as stored in REBASE. allele_ori and allele_mut need to be assigned.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub restriction_changes { 
+    my ($self) = @_;
+
+    if (not $self->{'re_changes'}) { 
+	my %re = &_enzymes;
+	
+	# complain if used on AA data
+	if ($self->isa('Bio::Variation::AAChange')) {
+	    $self->throw('Restriction enzymes do not bite polypeptides!');
+	}
+	
+	#sanity checks
+	$self->warn('Upstream sequence is empty!')
+	    if $self->upStreamSeq eq '';
+	$self->warn('Downstream sequence is empty!')
+	    if $self->dnStreamSeq eq '';
+#	 $self->warn('Original allele sequence is empty!')
+#	     if $self->allele_ori eq '';
+#	 $self->warn('Mutated allele sequence is empty!')
+#	     if $self->allele_mut eq '';
+	
+	#reuse the non empty DNA level list at RNA level if the flanks are identical
+	#Hint: Check DNAMutation object first
+	if ($self->isa('Bio::Variation::RNAChange') and  $self->DNAMutation and
+	    $self->upStreamSeq eq $self->DNAMutation->upStreamSeq  and 
+	    $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and
+	    $self->DNAMutation->restriction_changes ne '' ) {
+	    $self->{'re_changes'} = $self->DNAMutation->restriction_changes;
+	} else {
+	    
+	    #maximum length of a type II restriction site in the current REBASE
+	    my ($le_dn) = 15; 
+	    my ($le_up) = $le_dn;
+
+	    #reduce the flank lengths if the desired length is not available
+	    $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq);
+	    $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq);
+
+	    #Build sequence strings to compare
+	    my ($oriseq, $mutseq);    
+	    $oriseq  = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up);
+	    $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq;
+	    $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq;
+	    $oriseq .= substr($self->dnStreamSeq, 0, $le_dn);
+	    $mutseq .= substr($self->dnStreamSeq, 0, $le_dn);
+	    
+	    # ... and their reverse complements
+	    my $oriseq_rev = _revcompl ($oriseq);
+	    my $mutseq_rev = _revcompl ($mutseq);
+
+	    # collect results into a string
+	    my $rec = '';
+	    foreach my $enz (sort keys (%re)) {
+		my $site = $re{$enz};
+		my @ori = ($oriseq=~ /$site/g);
+		my @mut = ($mutseq=~ /$site/g);
+		my @ori_r = ($oriseq_rev =~ /$site/g);
+		my @mut_r = ($mutseq_rev =~ /$site/g);
+		
+		$rec .= '+'. $enz. ", " 
+		    if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r);
+		$rec .= '-'. $enz. ", " 		    
+		    if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r);
+		
+	    }
+	    $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne '';
+	    $self->{'re_changes'} =  $rec;
+	}
+    }
+    return $self->{'re_changes'}
+}
+
+
+sub _revcompl { 
+    # side effect: lower case letters
+    my ($seq) = shift;
+
+    $seq = lc $seq;
+    $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/;
+    return CORE::reverse $seq;
+}
+
+
+sub _enzymes {
+ #REBASE version 005   type2.005
+ my %enzymes =  (
+         'AarI' => 'cacctgc',
+         'AatII' => 'gacgtc',
+         'AccI' => 'gt[ac][gt]ac',
+         'AceIII' => 'cagctc',
+         'AciI' => 'ccgc',
+         'AclI' => 'aacgtt',
+         'AcyI' => 'g[ag]cg[ct]c',
+         'AflII' => 'cttaag',
+         'AflIII' => 'ac[ag][ct]gt',
+         'AgeI' => 'accggt',
+         'AhaIII' => 'tttaaa',
+         'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc',
+         'AluI' => 'agct',
+         'AlwNI' => 'cag[acgt][acgt][acgt]ctg',
+         'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc',
+         'ApaI' => 'gggccc',
+         'ApaLI' => 'gtgcac',
+         'ApoI' => '[ag]aatt[ct]',
+         'AscI' => 'ggcgcgcc',
+         'AsuI' => 'gg[acgt]cc',
+         'AsuII' => 'ttcgaa',
+         'AvaI' => 'c[ct]cg[ag]g',
+         'AvaII' => 'gg[at]cc',
+         'AvaIII' => 'atgcat',
+         'AvrII' => 'cctagg',
+         'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c',
+         'BalI' => 'tggcca',
+         'BamHI' => 'ggatcc',
+         'BbvCI' => 'cctcagc',
+         'BbvI' => 'gcagc',
+         'BbvII' => 'gaagac',
+         'BccI' => 'ccatc',
+         'Bce83I' => 'cttgag',
+         'BcefI' => 'acggc',
+         'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc',
+         'BciVI' => 'gtatcc',
+         'BclI' => 'tgatca',
+         'BetI' => '[at]ccgg[at]',
+         'BfiI' => 'actggg',
+         'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc',
+         'BglII' => 'agatct',
+         'BinI' => 'ggatc',
+         'BmgI' => 'g[gt]gccc',
+         'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc',
+         'Bpu10I' => 'cct[acgt]agc',
+         'BsaAI' => '[ct]acgt[ag]',
+         'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc',
+         'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc',
+         'BsbI' => 'caacac',
+         'BscGI' => 'cccgt',
+         'BseMII' => 'ctcag',
+         'BsePI' => 'gcgcgc',
+         'BseRI' => 'gaggag',
+         'BseSI' => 'g[gt]gc[ac]c',
+         'BsgI' => 'gtgcag',
+         'BsiI' => 'cacgag',
+         'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg',
+         'BsmAI' => 'gtctc',
+         'BsmI' => 'gaatgc',
+         'Bsp1407I' => 'tgtaca',
+         'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'BspGI' => 'ctggac',
+         'BspHI' => 'tcatga',
+         'BspLU11I' => 'acatgt',
+         'BspMI' => 'acctgc',
+         'BspMII' => 'tccgga',
+         'BsrBI' => 'ccgctc',
+         'BsrDI' => 'gcaatg',
+         'BsrI' => 'actgg',
+         'BstEII' => 'ggt[acgt]acc',
+         'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'BtrI' => 'cacgtc',
+         'BtsI' => 'gcagtg',
+         'Cac8I' => 'gc[acgt][acgt]gc',
+         'CauII' => 'cc[cg]gg',
+         'Cfr10I' => '[ag]ccgg[ct]',
+         'CfrI' => '[ct]ggcc[ag]',
+         'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt',
+         'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc',
+         'ClaI' => 'atcgat',
+         'CviJI' => '[ag]gc[ct]',
+         'CviRI' => 'tgca',
+         'DdeI' => 'ct[acgt]ag',
+         'DpnI' => 'gatc',
+         'DraII' => '[ag]gg[acgt]cc[ct]',
+         'DraIII' => 'cac[acgt][acgt][acgt]gtg',
+         'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc',
+         'DrdII' => 'gaacca',
+         'DsaI' => 'cc[ag][ct]gg',
+         'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc',
+         'EciI' => 'ggcgga',
+         'Eco31I' => 'ggtctc',
+         'Eco47III' => 'agcgct',
+         'Eco57I' => 'ctgaag',
+         'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg',
+         'EcoRI' => 'gaattc',
+         'EcoRII' => 'cc[at]gg',
+         'EcoRV' => 'gatatc',
+         'Esp3I' => 'cgtctc',
+         'EspI' => 'gct[acgt]agc',
+         'FauI' => 'cccgc',
+         'FinI' => 'gggac',
+         'Fnu4HI' => 'gc[acgt]gc',
+         'FnuDII' => 'cgcg',
+         'FokI' => 'ggatg',
+         'FseI' => 'ggccggcc',
+         'GdiII' => 'cggcc[ag]',
+         'GsuI' => 'ctggag',
+         'HaeI' => '[at]ggcc[at]',
+         'HaeII' => '[ag]gcgc[ct]',
+         'HaeIII' => 'ggcc',
+         'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc',
+         'HgaI' => 'gacgc',
+         'HgiAI' => 'g[at]gc[at]c',
+         'HgiCI' => 'gg[ct][ag]cc',
+         'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt',
+         'HgiJII' => 'g[ag]gc[ct]c',
+         'HhaI' => 'gcgc',
+         'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc',
+         'HindII' => 'gt[ct][ag]ac',
+         'HindIII' => 'aagctt',
+         'HinfI' => 'ga[acgt]tc',
+         'HpaI' => 'gttaac',
+         'HpaII' => 'ccgg',
+         'HphI' => 'ggtga',
+         'Hpy178III' => 'tc[acgt][acgt]ga',
+         'Hpy188I' => 'tc[acgt]ga',
+         'Hpy99I' => 'cg[at]cg',
+         'KpnI' => 'ggtacc',
+         'Ksp632I' => 'ctcttc',
+         'MaeI' => 'ctag',
+         'MaeII' => 'acgt',
+         'MaeIII' => 'gt[acgt]ac',
+         'MboI' => 'gatc',
+         'MboII' => 'gaaga',
+         'McrI' => 'cg[ag][ct]cg',
+         'MfeI' => 'caattg',
+         'MjaIV' => 'gt[acgt][acgt]ac',
+         'MluI' => 'acgcgt',
+         'MmeI' => 'tcc[ag]ac',
+         'MnlI' => 'cctc',
+         'MseI' => 'ttaa',
+         'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg',
+         'MstI' => 'tgcgca',
+         'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc',
+         'NaeI' => 'gccggc',
+         'NarI' => 'ggcgcc',
+         'NcoI' => 'ccatgg',
+         'NdeI' => 'catatg',
+         'NheI' => 'gctagc',
+         'NlaIII' => 'catg',
+         'NlaIV' => 'gg[acgt][acgt]cc',
+         'NotI' => 'gcggccgc',
+         'NruI' => 'tcgcga',
+         'NspBII' => 'c[ac]gc[gt]g',
+         'NspI' => '[ag]catg[ct]',
+         'PacI' => 'ttaattaa',
+         'Pfl1108I' => 'tcgtag',
+         'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg',
+         'PleI' => 'gagtc',
+         'PmaCI' => 'cacgtg',
+         'PmeI' => 'gtttaaac',
+         'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc',
+         'PpuMI' => '[ag]gg[at]cc[ct]',
+         'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc',
+         'PsiI' => 'ttataa',
+         'PstI' => 'ctgcag',
+         'PvuI' => 'cgatcg',
+         'PvuII' => 'cagctg',
+         'RleAI' => 'cccaca',
+         'RsaI' => 'gtac',
+         'RsrII' => 'cgg[at]ccg',
+         'SacI' => 'gagctc',
+         'SacII' => 'ccgcgg',
+         'SalI' => 'gtcgac',
+         'SanDI' => 'ggg[at]ccc',
+         'SapI' => 'gctcttc',
+         'SauI' => 'cct[acgt]agg',
+         'ScaI' => 'agtact',
+         'ScrFI' => 'cc[acgt]gg',
+         'SduI' => 'g[agt]gc[act]c',
+         'SecI' => 'cc[acgt][acgt]gg',
+         'SexAI' => 'acc[at]ggt',
+         'SfaNI' => 'gcatc',
+         'SfeI' => 'ct[ag][ct]ag',
+         'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc',
+         'SgfI' => 'gcgatcgc',
+         'SgrAI' => 'c[ag]ccgg[ct]g',
+         'SimI' => 'gggtc',
+         'SmaI' => 'cccggg',
+         'SmlI' => 'ct[ct][ag]ag',
+         'SnaBI' => 'tacgta',
+         'SnaI' => 'gtatac',
+         'SpeI' => 'actagt',
+         'SphI' => 'gcatgc',
+         'SplI' => 'cgtacg',
+         'SrfI' => 'gcccgggc',
+         'Sse232I' => 'cgccggcg',
+         'Sse8387I' => 'cctgcagg',
+         'Sse8647I' => 'agg[at]cct',
+         'SspI' => 'aatatt',
+         'Sth132I' => 'cccg',
+         'StuI' => 'aggcct',
+         'StyI' => 'cc[at][at]gg',
+         'SwaI' => 'atttaaat',
+         'TaqI' => 'tcga',
+         'TaqII' => 'gaccga',
+         'TatI' => '[at]gtac[at]',
+         'TauI' => 'gc[cg]gc',
+         'TfiI' => 'ga[at]tc',
+         'TseI' => 'gc[at]gc',
+         'Tsp45I' => 'gt[cg]ac',
+         'Tsp4CI' => 'ac[acgt]gt',
+         'TspEI' => 'aatt',
+         'TspRI' => 'ca[cg]tg[acgt][acgt]',
+         'Tth111I' => 'gac[acgt][acgt][acgt]gtc',
+         'Tth111II' => 'caa[ag]ca',
+         'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg',
+         'UbaPI' => 'cgaacg',
+         'VspI' => 'attaat',
+         'XbaI' => 'tctaga',
+         'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'XhoI' => 'ctcgag',
+         'XhoII' => '[ag]gatc[ct]',
+         'XmaIII' => 'cggccg',
+         'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc'
+        );
+
+    return %enzymes;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/Bio/WebAgent.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Bio/WebAgent.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Bio/WebAgent.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,206 @@
+# $Id: WebAgent.pm,v 1.12.4.3 2006/11/08 17:25:54 sendu Exp $
+#
+# BioPerl module for Bio::WebAgent
+#
+# Cared for by Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+# For copyright and disclaimer see below.
+#
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::WebAgent - A base class for Web (any protocol) access
+
+=head1 SYNOPSIS
+
+  # This is a abstract superclass for bioperl modules accessing web
+  # resources - normally you do not instantiate it but one of its
+  # subclasess.
+
+=head1 DESCRIPTION
+
+This abstract superclass is a subclass of L<LWP::UserAgent> which
+allows protocol independent access of remote locations over
+the Net.
+
+It takes care of error handling, proxies and various net protocols.
+BioPerl classes accessing the net should inherit from it.  For details,
+see L<LWP::UserAgent>.
+
+The interface is still evolving. For now, two public methods have been
+copied from Bio::DB::WebDBSeqI: delay() and delay_policy. These are
+used to prevent overwhelming the server by rapidly repeated . Ideally
+there should be a common abstract superclass with these. See L<delay>.
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>, 
+L<Bio::DB::WebDBSeqI>, 
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003, Heikki Lehvaslaiho and EMBL-EBI.
+All Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::WebAgent;
+use vars qw($LAST_INVOCATION_TIME);
+use strict;
+
+use base qw(LWP::UserAgent Bio::Root::Root);
+
+
+sub new {
+	my $class = shift;
+
+        # We make env_proxy the default here, but it can be 
+        # over-ridden by $self->env_proxy later,
+        # or by new(env_proxy=>0) at constructor time
+        
+	my $self = $class->SUPER::new(env_proxy => 1);
+
+	while( @_ ) {
+		my $key = shift;
+		$key =~ s/^-//;
+		my $value = shift;
+		$self->can($key) || next;
+		$self->$key($value);
+	}
+
+	return $self; # success - we hope!
+
+}
+
+
+# -----------------------------------------------------------------------------
+
+=head2 url
+
+ Usage   : $agent->url
+ Returns : URL to reach out to Net
+ Args    : string
+
+=cut
+
+sub url { 
+   my ($self,$value) = @_;
+   if( defined $value) {
+		$self->{'_url'} = $value;
+   }
+   return $self->{'_url'};
+}
+
+
+=head2 delay
+
+ Title   : delay
+ Usage   : $secs = $self->delay([$secs])
+ Function: get/set number of seconds to delay between fetches
+ Returns : number of seconds to delay
+ Args    : new value
+
+NOTE: the default is to use the value specified by delay_policy().
+This can be overridden by calling this method, or by passing the
+-delay argument to new().
+
+=cut
+
+sub delay {
+   my ($self, $value) = @_;
+   if ($value) {
+       $self->throw("Need a positive integer, not [$value]")
+           unless $value >= 0;
+       $self->{'_delay'} = int $value;
+   }
+   return $self->{'_delay'} || $self->delay_policy;
+}
+
+=head2 delay_policy
+
+ Title   : delay_policy
+ Usage   : $secs = $self->delay_policy
+ Function: return number of seconds to delay between calls to remote db
+ Returns : number of seconds to delay
+ Args    : none
+
+NOTE: The default delay policy is 3s.  Override in subclasses to
+implement other delays.  The timer has only second resolution, so the delay
+will actually be +/- 1s.
+
+=cut
+
+sub delay_policy {
+   my $self = shift;
+   return 3;
+}
+
+
+=head2 sleep
+
+ Title   : sleep
+ Usage   : $self->sleep
+ Function: sleep for a number of seconds indicated by the delay policy
+ Returns : none
+ Args    : none
+
+NOTE: This method keeps track of the last time it was called and only
+imposes a sleep if it was called more recently than the delay_policy()
+allows.
+
+=cut
+
+sub sleep {
+   my $self = shift;
+   $LAST_INVOCATION_TIME ||=  0;
+   if (time - $LAST_INVOCATION_TIME < $self->delay) {
+      my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
+      $self->debug("sleeping for $delay seconds\n");
+      sleep $delay;
+   }
+   $LAST_INVOCATION_TIME = time;
+}
+
+1;
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/Build.PL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Build.PL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Build.PL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+#!/usr/bin/perl -w
+
+# This is a Module::Build script for Bioperl installation.
+# See http://search.cpan.org/~kwilliams/Module-Build/lib/Module/Build.pm
+
+# Uses a custom subclass of Module::Build called ModuleBuildBioperl that
+# doesn't get installed
+
+# In the future developers may need to alter the requires and recommends and
+# possibly Network sections of ModuleBuildBioperl->new() below, but otherwise
+# nothing else here is likely to need changing.
+
+use strict;
+use ModuleBuildBioperl;
+
+our @drivers;
+
+# Set up the ModuleBuildBioperl object
+my $build = ModuleBuildBioperl->new(
+    module_name         => 'Bio',
+    dist_name           => 'bioperl',
+    dist_version_from   => 'Bio/Root/Version.pm',
+    dist_author         => 'Bioperl Team <bioperl-l at bioperl.org>',
+    dist_abstract       => 'Bioinformatics Toolkit',
+    license             => 'artistic',
+    requires            => {
+                            'perl'                      => '5.6.1',
+                            'IO::String'                => 0,
+                            'DB_File'                   => 0
+                           },
+    build_requires      => {
+                            'Test::More'                => 0,
+                            'Module::Build'             => 0.2805,
+                            'Test::Harness'             => 2.62,
+                            'CPAN'                      => 1.81
+                           },
+    recommends          => { # does what you would expect of recommends, except more informative output and generates optional_features in META.yml
+                            'Ace'                       => '0/access of ACeDB database/Bio::DB::Ace,Bio::DB::GFF::Adaptor::ace',
+                            # this won't actually install due to circular dep, but we have no way of doing a post-install
+                            'Bio::ASN1::EntrezGene'     => '0/parsing entrezgene/Bio::SeqIO::entrezgene',
+                            # we actually need 1.01 of Class::AutoClass, but unfortunately it is versioned as 1.0
+                            'Class::AutoClass'          => '1/creating objects/Bio::Graph::SimpleGraph,Bio::Graph::SimpleGraph::Traversal,Bio::Graph::ProteinGraph',
+                            'Clone'                     => '0/cloning objects/Bio::Graph::ProteinGraph,Bio::Tools::Primer3',
+                            'Convert::Binary::C'        => '0/strider functionality/Bio::SeqIO::strider',
+                            'Data::Stag::XMLWriter'     => '0/writing choas xml files/Bio::SeqIO::chaosxml',
+                            'GD'                        => '1.3/rendering Sequences and Features/Bio::Graphics::Glyph*',
+                            'GD::SVG'                   => '0/creating SVG images/Bio::Graphics::Panel',
+                            # we specifically want Graph::Directed, but that has no VERSION
+                            'Graph'                     => '0/ontology engine implementation for the GO parser/Bio::Ontology::SimpleGOEngine::GraphAdaptor',
+                            'HTML::Entities'            => '0/remote analysis POST submissions/Bio::SearchIO::blastxml',
+                            'HTML::Parser'              => '3/screen scraping www.gdb.org/Bio::DB::GDB',
+                            'HTTP::Request::Common'     => '0/GenBank+GenPept sequence retrieval, remote http Blast jobs/Bio::DB::*,Bio::Tools::Run::RemoteBlast,Bio::Tools::Analysis::Protein*,Bio::Tools::Analysis::DNA*',
+                            'LWP::UserAgent'            => '0/remote access/Bio::DB::*,Bio::Tools::Run::RemoteBlast,Bio::Tools::WebBlat,Bio::WebAgent,Bio::Graphics::Glyph::image',
+                            'PostScript::TextBlock'     => '0/EPS output/Bio::Tree::Draw::Cladogram',
+                            'Set::Scalar'               => '0/proper operation/Bio::Tree::Compatible',
+                            'SOAP::Lite'                => '0/XEMBL Services and Bibliographic queries/Bio::DB::XEMBLService,Bio::DB::Biblio::soap',
+                            'Spreadsheet::ParseExcel'   => '0/parsing Excel files/Bio::SeqIO::excel',
+                            'Storable'                  => '0/storing sequence objects in local file cache/Bio::DB::FileCache,Bio::SeqFeature::Collection,Bio::PopGen::HtSNP,Bio::PopGen::TagHaplotype,Bio::DB::GFF::Adaptor::berkeleydb',
+                            'SVG'                       => '2.26/SVG output/Bio::Graphics::Pictogram',
+                            'SVG::Graph'                => '0.01/creating SVG images/Bio::TreeIO::svggraph',
+                            'Text::Shellwords'          => '0/test scripts/Bio::Graphics',
+                            'URI::Escape'               => '0/dealing with web resources/Bio::Tools::WebBlat,Bio::FeatureIO::gff,Bio::FeatureIO::interpro,Bio::DB::Biblio::eutils,Bio::DB::EUtilities::Cookie,Bio::DB::Query::GenBank,Bio::DB::NCBIHelper,Bio::SeqFeature::Annotated',
+                            'XML::DOM::XPath'           => '0.13/parsing interpro features/Bio::FeatureIO::interpro',
+                            'XML::Parser'               => '0/parsing xml/Bio::Biblio::IO::medlinexml',
+                            'XML::Parser::PerlSAX'      => '0/parsing xml/Bio::SeqIO::tinyseq,Bio::SeqIO::game::gameSubs,Bio::OntologyIO::InterProParser,Bio::ClusterIO::dbsnp',
+                            'XML::SAX'                  => '0.15/parsing xml/Bio::SearchIO::blastxml,Bio::SeqIO::tigrxml,Bio::SeqIO::bsml_sax',
+                            'XML::SAX::Writer'          => '0/writing xml/Bio::SeqIO::tigrxml',
+                            'XML::Twig'                 => '0/parsing xml/Bio::Variation::IO::xml,Bio::DB::Taxonomy::entrez,Bio::DB::Biblio::eutils,Bio::Graph::IO::psi_xml',
+                            'XML::Writer'               => '0.4/parsing and writing xml/Bio::SeqIO::agave,Bio::SeqIO::game::gameWriter,Bio::SeqIO::chadoxml,Bio::SeqIO::tinyseq,Bio::Variation::IO::xml,Bio::SearchIO::Writer::BSMLResultWriter'
+                           },
+    get_options         => {
+                            network => { } # not actually used by anything yet, but in the future say perl Build.PL --network
+                           },
+    auto_features       => {
+                            BioDBSeqFeature_BDB   => {
+                                                        description      => "BDB tests for Bio::DB::SeqFeature::Store",
+                                                        feature_requires => { 'DB_File' => 0 } # feature_requires is like requires, execpt that it doesn't trigger installation
+                                                     },
+                            BioDBGFF              => {
+                                                        description      => "BioDBGFF database tests (will need to answer questions before really enabling)",
+                                                        feature_requires => { 'DBI' => 0 },
+                                                        excludes_os      => ['mswin'],
+                                                        test             => \&test_biodbgff # ModuleBuildBioperl unique requirement that after everything else succeeds, supplied code ref must also return undef
+                                                     },
+                            BioDBSeqFeature_mysql => {
+                                                        description      => "MySQL tests for Bio::DB::SeqFeature::Store",
+                                                        feature_requires => { 'DBI' => 0, 'DBD::mysql' => 0 },
+                                                        test             => \&test_db
+                                                     },
+# The following code works, but since no tests in the test suite actually make use of this functionality, don't use it yet
+#                            Network               => {
+#                                                        description => "Enable tests that need an internet connection",
+#                                                        requires    => { 'LWP::UserAgent' => 0 },
+#                                                        options     => ['network'], # ModuleBuildBioperl unique requirement that --network was supplied
+#                                                        test        => \&ModuleBuildBioperl::test_internet
+#                                                     }
+                           },
+    dynamic_config      => 1
+    
+    #pm_files           => {} # modules in Bio are treated as if they were in lib and auto-installed
+    #script_files       => [] # scripts in scripts directory are installed on-demand
+);
+
+# Handle auto features
+if ($build->feature('BioDBSeqFeature_BDB')) {
+    make_bdb_test();
+}
+if ($build->feature('BioDBSeqFeature_mysql')) {
+    make_dbi_test();
+}
+{
+    $build->notes(network => $build->feature('Network'));
+    # then in test script:
+    #   use Module::Build;
+    #   my $build = Module::Build->current;
+    #   my $do_network_tests = $build->notes('network');
+}
+
+# Ask questions
+$build->choose_scripts;
+prompt_for_biodbgff() if $build->feature('BioDBGFF');
+
+# Request that some scripts run post-installation
+$build->add_post_install_script('maintenance/symlink_script.pl'); # takes a unix file path regardless of local OS
+
+# Add extra things to MANIFEST.SKIP
+$build->add_to_manifest_skip('bioperl.lisp', 'Bio/Tools/WebBlat.pm');
+
+# Create the build script and exit
+$build->create_build_script;
+
+exit;
+
+
+sub make_bdb_test {
+    my $path0 = File::Spec->catfile('t', 'BioDBSeqFeature.t');
+    my $path = File::Spec->catfile('t', 'BioDBSeqFeature_BDB.t');
+    open my $F, ">$path";
+    print $F <<END;
+system 'perl $path0 -adaptor berkeleydb -create 1 -temp 1';
+END
+    close $F;
+    $build->add_to_cleanup($path);
+    $build->add_to_manifest_skip($path);
+}
+
+sub test_db {
+    eval {require DBI;};  # if not installed, this sub won't actually be called
+    unless (eval {DBI->connect('dbi:mysql:test',undef,undef,{RaiseError=>0,PrintError=>0})}) {
+        return "Could not connect to test database";
+    }
+    return;
+}
+
+sub make_dbi_test {
+    my $path0 = File::Spec->catfile('t', 'BioDBSeqFeature.t');
+    my $path = File::Spec->catfile('t', 'BioDBSeqFeature_mysql.t');
+    open my $F,">$path";
+    print $F <<END;
+system 'perl $path0 -adaptor DBI::mysql -create 1 -temp 1 -dsn test';
+END
+    close $F;
+    $build->add_to_cleanup($path);
+    $build->add_to_manifest_skip($path);
+}
+
+sub test_biodbgff {
+    eval {require DBI;};  # if not installed, this sub won't actually be called
+    @drivers = DBI->available_drivers;
+    unless (grep {/mysql|Pg|Oracle/i} @drivers) {
+        return "MySQL, Pg nor Oracle DBI drivers are installed";
+    }
+    return;
+}
+
+sub prompt_for_biodbgff {
+    my $proceed = $build->y_n("Do you want to run the BioDBGFF live database tests? y/n", 'n');
+    
+    if ($proceed) {
+        my @driver_choices;
+        foreach my $poss ('mysql', 'Pg', 'Oracle') {
+            if (grep {/$poss/i} @drivers) {
+                my $choice = $poss;
+                $choice =~ s/^(.)/[$1]/;
+                push(@driver_choices, $choice);
+            }
+        }
+        
+        my $cfg = {};
+        
+        my $driver;
+        if (@driver_choices > 1) {
+            my ($default) = $driver_choices[0] =~ /\[(.)/;
+            $driver = $build->prompt("Which database driver should be used? ".join(" ", @driver_choices), $default);
+        }
+        else {
+            ($driver) = $driver_choices[0] =~ /\[(.)/;
+        }
+        if ($driver =~ /^[mM]/) {
+            $driver = 'mysql';
+        }
+        elsif ($driver =~ /^[pP]/) {
+            $driver = 'Pg';
+        }
+        elsif ($driver =~ /^[oO]/) {
+            $driver = 'Oracle';
+        }
+        $cfg->{dbd_driver} = $driver;
+        
+        $cfg->{test_db} = $build->prompt("Which database should I use for testing the $driver driver?", 'test');
+        $cfg->{test_host} = $build->prompt("On which host is database $cfg->{test_db} running (hostname, ip address or host:port)", 'localhost');
+        my $test_user = $build->prompt("User name for connecting to database $cfg->{test_db}?", 'undef');
+        $cfg->{test_user} = $test_user eq 'undef' ? '' : $test_user;
+        my $test_pass = $build->prompt("Password for connecting to database $cfg->{test_db}?", 'undef');
+        $cfg->{test_pass} = $test_pass eq 'undef' ? '' : $test_pass;
+        
+        my $use_host = 1;
+        if ($cfg->{test_host} eq 'undef' || $cfg->{test_host} eq 'localhost') {
+            $use_host = 0;
+        }
+        
+        my $test_dsn;
+        if ($driver eq 'Pg') {
+            $test_dsn = "dbi:$driver:dbname=$cfg->{test_db}";
+        }
+        else {
+            $test_dsn = "dbi:$driver:database=$cfg->{test_db}";
+        }
+        if ($use_host) {
+            $test_dsn .= ";host=$cfg->{test_host}";
+        }
+        $cfg->{test_dsn} = $test_dsn;
+        
+        my $path = File::Spec->catfile('t', 'do_biodbgff.tests');
+        if (open T,">$path") {
+            while (my ($key, $value) = each %$cfg) {
+                print T $key, "\t", $value,"\n";
+            }
+            close T;
+        }
+        
+        $build->add_to_cleanup($path);
+        $build->add_to_manifest_skip($path);
+        
+        $build->log_info("  - will run the BioDBGFF tests with database driver '$driver' and these settings:\n",
+                        "    Database $cfg->{test_db}\n",
+                        "    Host     $cfg->{test_host}\n",
+                        "    DSN      $test_dsn\n",
+                        "    User     $test_user\n",
+                        "    Password $test_pass\n");
+    }
+    else {
+        $build->log_info("  - will not run the BioDBGFF live database tests\n");
+    }
+    
+    $build->log_info("\n");
+}

Added: trunk/packages/bioperl/branches/upstream/current/Changes
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Changes	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Changes	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1173 @@
+$Id: Changes,v 1.49.4.1 2006/10/02 23:10:11 sendu Exp $
+  
+Revision history for Bioperl core modules
+
+Main trunk
+
+    n/a
+
+1.5 series - these releases represent the pre-1.6 releases, the 1.6 series
+    will be branches from the last 1.5.x release. 
+
+1.5.2 Developer release
+    Full details of changes since 1.5.1 are available online at:
+    http://www.bioperl.org/wiki/Change_log
+    The following represents a brief overview of the most important changes.
+
+    o Bio::Map
+      - Overhaul. Brand new system fully allows markers to have multiple
+        positions on multiple maps, and to have relative positions. Should be
+        backward compatible.
+    
+    o Bio::Taxonomy
+      - This module and all the modules in the Taxonomy directory now
+        deprecated in favour of Bio::Taxon and Bio::Tree::Tree
+    
+    o Bio::DB::Taxonomy
+      
+      - Taxonomy.pm
+        * get_Taxonomy_Node() eventually to be deprecated, renamed get_taxon().
+        
+        * New methods ancestor(), each_Descendent() and _handle_internal_id().
+        
+        * Allows for different database modules to create Bio::Taxon objects
+          with the same internal id when the same taxon is requested from each.
+    
+      - flatfile.pm
+        * get_Children_Taxids() is deprecated, superceded by each_Descendent().
+        
+        * No longer includes the fake root node 'root'; there are multiple roots
+          now (10239, 12884, 12908, 29384 and 131567). Consistent with entrez.pm
+    
+      - entrez.pm
+        * get_node() has new option -full
+        
+        * Caches data retrieved from website
+    
+    o Bio::Species
+      - Now a Bio::Taxon. Carries out the species name -> specific name munging
+        that Bio::DB::Taxonomy modules and SeqIO modules used to do, for
+        backward compatability in species() method.
+    
+    o Bio::Search and Bio::SearchIO
+      - Overhaul. The existing system has been sped up via some minor changes
+        (mostly gain-of-function to the API). Bio::PullParserI is introduced
+        as a potential eventual replacment for the existing system, though as
+        yet only a Hmmpfam parser exists written using it.
+
+
+1.5.1 Developer release
+
+    o Major problem with how Annotations were written out with
+      Bio::Seq is fixed by reverting to old behavior for
+      Bio::Annotation objects.
+
+    o Bio::SeqIO
+
+     - genbank.pm
+       * bug #1871; REFLOOP' parsing loop, I changed the pattern to
+         expect at l east 9 spaces at the beginning of a line to
+         indicate line wrapping.
+
+       * Treat multi-line SOURCE sections correctly, this defect broke
+         both common_name() and classification()
+
+       * parse swissprot fields in genpept file 
+
+       * parse WGS genbank records
+
+     - embl.pm
+        * Changed regexp for ID line. The capturing parentheses are
+          the same, the difference is an optional repeated-not-semi-
+          colon expression following the captured \S+. This means the
+          regexp works when the division looks like /PRO;/ or when the
+          division looks like /ANG ;/ - the latter is from EMBL
+          repbase
+
+        * fix ID line parsing: the molecule string can have spaces in
+          it. Like: "genomic DNA"
+
+     - swiss.pm: bugs  #1727, #1734
+     
+     - entrezgene.pm
+        * Added parser for entrezgene ASN1 (text format) files.
+          Uses Bio::ASN1::EntrezGene as a low level parser (get it from CPAN)
+
+    o Bio::AlignIO
+
+     -  maf.pm coordinate problem fixed
+      
+    o Bio::Taxonomy and Bio::DB::Taxonomy
+
+     - Parse NCBI XML now so that nearly all the taxonomy up-and-down 
+       can be done via Web without downloading all the sequence.
+ 
+    o Bio::Tools::Run::RemoteBlast supports more options and complies
+      to changes to the NCBI interface. It is reccomended that you
+      retrieve the data in XML instead of plain-text BLAST report to
+      insure proper parsing and retrieval of all information as NCBI
+      fully expects to change things in the future.
+ 
+    o Bio::Tree and Bio::TreeIO
+
+      - Fixes so that re-rooting a tree works properly
+
+      - Writing out nhx format from a newick/nexus file will properly output
+        bootstrap information.  The use must move the internal node labels over
+        to bootstraps.
+         for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
+            $node->bootstrap($node->id);
+	    $node->id('');
+	 }
+      - Nexus parsing is much more flexible now, does not care about
+        LF.
+
+      - Cladogram drawing module in Bio::Tree::Draw
+      
+      - Node height and depth now properly calculated
+
+      - fix tree pruning algorithm so that node with 1 child gets merged
+
+    o Graphics tweaks.  Glyph::xyplot improved.  Many other small-medium sized
+      bugs and improvements were added, see Gbrowse mailing list for most of 
+      these.
+
+    o Bio::DB::GFF partially supports GFF3.  See information about 
+      gff3_munge flag in scripts/Bio-DB-GFF/bulk_load_gff.pl.
+         
+    o Better location parsing in Bio::Factory::FTLocationFactory -
+      this is part of the engine for parsing EMBL/GenBank feature table
+      locations.  Nested join/order-by/complement are allowed now
+
+    o Bio::PrimarySeqI->translate now takes named parameters
+
+    o Bio::Tools::Phylo::PAML - parsing RST (ancestral sequence
+      reconstruction) is now supported.  Parsing different models and 
+      branch specific parametes are now supported.
+
+    o Bio::Factory::FTLocationFactory - parse hierarchical locations 
+      (joins of joins)
+
+    o Bio::Matrix::DistanceMatrix returns arrayrefs instead of arrays
+      for getter/setter functions
+
+    o Bio::SearchIO
+      
+      - blast bug #1739; match scientific notation in score 
+        and possible e+ values 
+
+      - blast.pm reads more WU-BLAST parameters and parameters, match
+        a full database pathname, 
+   
+      - Handle NCBI WEB and newer BLAST formats specifically
+        (Query|Sbjct:) match in alignment blocks can now be (Query|Sbjct).
+
+      - psl off-by-one error fixed
+
+      - exonerate parsing much improved, CIGAR and VULGAR can be parsed
+        and HSPs can be constructed from them.
+
+      - HSPs query/hit now have a seqdesc field filled out (this was
+        always available via $hit->description and
+        $result->query_description
+
+      - hmmer.pm can parse -A0 hmmpfam files
+
+      - Writer::GbrowseGFF more customizeable.
+
+    o Bio::Tools::Hmmpfam 
+      make e-value default score displayed in gff, rather than raw score
+      allow parse of multiple records
+
+
+1.5 Developer release
+
+    o Bio::Align::DNAStatistics and Bio::Align::ProteinStatistics
+      provide Jukes-Cantor and Kimura pairwise distance methods,
+      respectively.
+
+    o Bio::AlignIO support for "po" format of POA, and "maf";
+      Bio::AlignIO::largemultifasta is a new alternative to
+      Bio::AlignIO::fasta for temporary file-based manipulation of
+      particularly large multiple sequence alignments.
+
+    o Bio::Assembly::Singlet allows orphan, unassembled sequences to
+      be treated similarly as an assembled contig.
+
+    o Bio::CodonUsage provides new rare_codon() and probable_codons()
+      methods for identifying particular codons that encode a given
+      amino acid.
+
+    o Bio::Coordinate::Utils provides new from_align() method to build
+      a Bio::Coordinate pair directly from a
+      Bio::Align::AlignI-conforming object.
+
+    o Bio::DB::Biblio::eutils is a class for querying NCBI's Eutils.
+      Send a Pubmed, Pubmed Central, Entrez, or other query to NCBI's
+      web service using standard Pubmed query syntax, and retrieve
+      results as XML.
+
+    o Bio::DB::GFF has various sundry bug fixes.
+
+    o Bio::FeatureIO is a new SeqIO-style subsystem for
+      writing/reading genomic features to/from files.  I/O classes
+      exist for BED, GTF (aka GFF v2.5), and GFF v3.  Bio::FeatureIO
+      classes only read/write Bio::SeqFeature::Annotated objects.
+      Notably, the GFF v3 class requires features to be typed into the
+      Sequence Ontology.
+
+    o Bio::Graph namespace contains new modules for manipulation and
+      analysis of protein interaction graphs.
+
+    o Bio::Graphics has many bug fixes and shiny new glyphs.
+
+    o Bio::Index::Hmmer and Bio::Index::Qual provide multiple-file
+      indexing for HMMER reports and FASTA qual files, respectively.
+
+    o Bio::Map::Clone, Bio::Map::Contig, and Bio::Map::FPCMarker are
+      new objects that can be placed within a Bio::Map::MapI-compliant
+      genetic/physical map; Bio::Map::Physical provides a new physical
+      map type; Bio::MapIO::fpc provides finger-printed clone mapping
+      import.
+
+    o Bio::Matrix::PSM provide new support for postion-specific
+      (scoring) matrices (e.g. profiles, or "possums").
+
+    o Bio::Ontology::Ontology and Bio::Ontology::Term objects can now
+      be instantiated without explicitly using Bio::OntologyIO.  This
+      is possible through changes to Bio::Ontology::OntologyStore to
+      download ontology files from the web as necessary.  Locations of
+      ontology files are hard-coded into
+      Bio::Ontology::DocumentRegistry.
+
+    o Bio::PopGen includes many new methods and data types for
+      population genetics analyses.
+
+    o New constructor to Bio::Range, unions().  Given a list of
+      ranges, returns another list of "flattened" ranges --
+      overlapping ranges are merged into a single range with the
+      mininum and maximum coordinates of the entire overlapping group.
+
+    o Bio::Root::IO now supports -url, in addition to -file and -fh.
+      The new -url argument allows one to specify the network address
+      of a file for input.  -url currently only works for GET
+      requests, and thus is read-only.
+
+    o Bio::SearchIO::hmmer now returns individual Hit objects for each
+      domain alignment (thus containing only one HSP); previously
+      separate alignments would be merged into one hit if the domain
+      involved in the alignments was the same, but this only worked
+      when the repeated domain occured without interruption by any
+      other domain, leading to a confusing mixture of Hit and HSP
+      objects.
+
+    o Bio::Search::Result::ResultI-compliant report objects now
+      implement the "get_statistics" method to access
+      Bio::Search::StatisticsI objects that encapsulate any
+      statistical parameters associated with the search (e.g. Karlin's
+      lambda for BLAST/FASTA).
+ 
+    o Bio::Seq::LargeLocatableSeq combines the functionality already
+      found in Bio::Seq::LargeSeq and Bio::LocatableSeq.
+
+    o Bio::SeqFeature::Annotated is a replacement for
+      Bio::SeqFeature::Generic.  It breaks compliance with the
+      Bio::SeqFeatureI interface because the author was sick of
+      dealing with untyped annotation tags.  All
+      Bio::SeqFeature::Annotated annotations are Bio::AnnotationI
+      compliant, and accessible through Bio::Annotation::Collection.
+
+    o Bio::SeqFeature::Primer implements a Tm() method for primer
+      melting point predictions.
+
+    o Bio::SeqIO now supports AGAVE, BSML (via SAX), CHAOS-XML,
+      InterProScan-XML, TIGR-XML, and NCBI TinySeq formats.
+
+    o Bio::Taxonomy::Node now implements the methods necessary for
+      Bio::Species interoperability.
+
+    o Bio::Tools::CodonTable has new reverse_translate_all() and
+      make_iupac_string() methods.
+
+    o Bio::Tools::dpAlign now provides sequence profile alignments.
+
+    o Bio::Tools::GFF now parses GFF version 2.5 (a.k.a. GTF).
+
+    o Bio::Tools::Fgenesh, Bio::Tools::tRNAscanSE are new report
+      parsers.
+
+    o Bio::Tools::SiRNA includes two new rulesets (Saigo and Tuschl)
+      for designing small inhibitory RNA.
+
+    o Bio::Tree::DistanceFactory provides NJ and UPGMA tree-building
+      methods based on a distance matrix.
+
+    o Bio::Tree::Statistics provides an assess_bootstrap() method to
+      calculate bootstrap support values on a guide tree topology,
+      based on provided bootstrap tree topologies.
+  
+    o Bio::TreeIO now supports the Pagel (PAG) tree format.
+  
+1.4 branch
+
+1.4.1 
+
+  o Improvements to Bio::AlignIO::nexus for parsing TreeBase nexus files
+  
+  o Bio::Graphics will work with gd1 or gd2
+   
+  o Bio::SearchIO
+   - hmmer.pm Better hmmpfam parsing, fix bug for small number of alignment outputs
+     (RF lines alone)
+   - blast.pm Parse multi-line query fields properly
+   - small speed improvements to blasttable.pm and others
+
+  o Bio::DB::Taxonomy has better support for hierarchy traversal so that
+    Bio::Taxonomy::Node can be as simple as Bio::Species object while still
+    supporting more complex queries
+
+
+1.4. Stable major release
+
+Since initial 1.2.0, 3000 separate changes have been made to make this release. 
+
+   o installable scripts
+
+   o global module version from Bio::Root:Version
+
+   o Bio::Graphics
+      - major improvements; SVG support
+
+   o Bio::Popgen
+     - population genetics 
+     - support several population genetics types of questions.
+     - Tests for statistical neutrality of mutations
+       (Fu and Li's D/F, Tajima's D) are in Bio::PopGen::Statistics.
+       Tests of population structure (Wright's F-statistic: Fst) is in
+       Bio::PopGen::PopStats. Calculating composite linkage
+       disequilibrium (LD) is available in Bio::PopGen::Statistics as
+       well.
+     - Bio::PopGen::IO for reading in prettybase (SeattleSNPs)
+       and csv (comma delimited formatted) data.
+
+     - a directory for implementing population simulations has
+       been added Bio::PopGen::Simulation and 2 simulations - a
+       Coalescent and a simple single-locus multi-allele genetic drift
+       simulation have been provided.  This replaces the code in
+       Bio::Tree::RandomTree which has been deprecated until proper
+       methods for generating random phylogenetic trees are
+       implemented.
+
+   o Bio::Restriction
+      - new restrion analysis modules
+
+   o Bio::Tools::Analysis
+      - web based DNA and Protein analysis framework and several
+        implementations
+
+   o Bio::Seq::Meta
+     - per residue annotable sequences
+
+   o Bio::Matrix
+      - Bio::Matrix::PSM - Position Scoring Matrix
+      - Bio::Matrix::IO has been added for generalized parsing of
+        matrix data.  Matrix::IO::scoring and Matrix::IO::phylip are
+        initial implementations for parsing BLOSUM/PAM and Phylip
+        Distance matricies respectively.  A generic matrix
+        implementation for general use was added in
+        Bio::Matrix::Generic.
+
+   o Bio::Ontology
+     - major changes
+
+   o Bio:Tree
+
+   o Bio::Tools::SiRNA, Bio::SeqFeature::SiRNA
+     - small inhibitory RNA
+
+   o Bio::SeqFeature::Tools
+     - seqFeature mapping tools
+     - Bio::SeqFeature::Tools::Unflattener.pm
+       -- deal with mapping GenBank feature collections into
+          Chado/GFF3 processable feature sets (with SO term mappings)
+
+   o Bio::Tools::dpAlign
+     - pure perl dynamic programming sequence alignment
+     - needs Bioperl-ext
+
+   o new Bio::SearchIO formats
+     - axt and psl:  UCSC formats.
+     - blasttable: NCBI -m 8 or -m 9 format from blastall
+
+   o new Bio::SeqIO formats
+     - chado, tab, kegg, tigr, game
+     - important fixes for old modules
+
+   o Bio::AlignIO: maf
+
+   o improved Bio::Tools::Genewise
+
+   o Bio::SeqIO now can recongnize sequence formats automatically from
+     stream
+
+   o new parsers in Bio::Tools:
+      Blat, Geneid, Lagan, Mdust, Promoterwise, PrositeScan,  
+
+   o Bio::DB::Registry bugs fixed
+     - BerkeleyDB-indexed flat files can be used by the OBDA system
+     - Multiple seqdatabase.ini locations in OBDA_SEARCH_PATH are all
+       used by the OBDA system
+
+   o several new HOWTOs
+     - SimpleWebAnalysis, Trees, Feature Annotation, OBDA Access, Flat
+       Databases
+
+   o hundreds of new and improved files 
+
+
+   o 
+   o Bio::Tree::AlleleNode has been updated to be a container of
+     an Bio::PopGen::Individual object for use in the Coalescent simulations.
+
+
+1.2 Branch
+
+1.2.3 Stable release update
+    o Bug #1475 - Fix and add speedup to spliced_seq for remote location
+                  handling.
+    o Bug #1477 - Sel --> Sec abbreviation fixed
+    o Fix bug #1487 where paring in-between locations when 
+      end < start caused the FTLocationFactory logic to fail.
+    o Fix bug #1489 which was not dealing with keywords as an
+      arrayref properly (this is fixed on the main trunk because
+      keywords returns a string and the array is accessible via
+      get_keywords).
+    o Bio::Tree::Tree memory leak (bug #1480) fixed
+      Added a new initialization option -nodelete which
+      won't try and cleanup the containing nodes if this
+      is true.
+     o Bug with parsing labeled nodes with Bio::TreeIO::newick fixed
+       this was only present on the branch for the 1.2.1 and 1.2.2 series
+       - Also merged main trunk changes to the branch which make
+         newick -> nhx round tripping more effective (storing branch length
+         and bootstrap values in same locate for NodeNHX and Node 
+         implementations.)  Fixes to TreeIO parsing for labeled internal
+         also required small changes to TreeIO::nhx.  Improved
+         tests for this module as well.
+    o Bio::SearchIO
+      - Fixed bugs in BLAST parsing which couldn't parse NCBI
+        gapped blast properly (was losing hit significance values due to
+        the extra unexpeted column).    
+      - Parsing of blastcl3 (netblast from NCBI) now can handle case of
+        integer overflow (# of letters in nt seq dbs is > MAX_INT)
+        although doesn't try to correct it - will get the negative
+        number for you.  Added a test for this as well.
+      - Fixed HMMER parsing bug which prevented parsing when a hmmpfam report
+        has no top-level family classification scores but does have scores and
+        alignments for individual domains.
+      - Parsing FASTA reports where ungapped percent ID is < 10 and the 
+        regular expression to match the line was missing the possibility of
+        an extra space.  This is rare, which is why we probably did not 
+        catch it before.
+      - BLAST parsing picks up more of the statistics/parameter fields
+        at the bottom of reports.  Still not fully complete.
+      - SearchIO::Writer::HTMLResultWriter and TextResultWriter
+        were fixed to include many improvements and added flexiblity
+        in outputting the files.  Bug #1495 was also fixed in the process.
+     o Bio::DB::GFF
+      - Update for GFF3 compatibility.
+      - Added scripts for importing from UCSC and GenBank.
+      - Added a 1.2003 version number.
+     o Bio::Graphics
+      - Updated tutorial.
+      - Added a 1.2003 version number.
+     o SeqIO::swiss Bug #1504 fixed with swiss writing which was not
+       properly writing keywords out.
+     o Bio::SeqIO::genbank 
+      - Fixed bug/enhancement #1513 where dates of
+        the form D-MMM-YYYY were not parsed.  Even though this is
+        invalid format we can handle it - and also cleanup the date
+        string so it is properly formatted.
+      - Bug/enhancement #1517 fixed so that SEGMENT line can be parsed 
+        and written with Genbank format.  Similarly bug #1515 is fixed to
+        parse in the ORIGIN text.
+     o Bio::SeqIO::fasta, a new method called preferred_id_type allows you
+       to specify the ID type, one of (accession accession.version 
+       display primary).  See Bio::SeqIO::preferred_id_type method
+       documentation for more information.
+     o Unigene parsing updated to handle file format changes by NCBI
+
+1.2.2 Stable release update
+
+    o A series of bug fixes of the Bio::OntologyIO dagflat-related parsers:
+      - auto-discover ontology name
+      - bug in parsing relationships when certain characters are in the term
+      - fixed hard-coded prefix for term identifiers
+      - various smaller issues 
+
+    o Fixed bug in Bio::Annotation::OntologyTerm of not implementing all
+      of Bio::Ontology::TermI
+
+    o brought the OBDA Registry code up to latest specs 
+
+    o Bio::DB::GenBank
+      - eutils URL change
+      - accession number retrieval fixed
+
+    o Bio::SearchIO::blast - fix bug #1443 (missing last hits), parse megablast
+
+    o Bio::SearchIO::Writer::(HTML|Text)ResultWriter fix bugs #1458, 
+      #1459 which now properly report alignment start/end info
+      for translated BLAST/FASTA searches.
+    
+    o Bio::TreeIO::newick can parse labeled internal nodes
+
+    o Bio::Tools::BPbl2seq can properly report strand info for HSPs 
+      for BLASTX if if you provide -report_type => 'BLASTX' when
+      initializing a BPbl2seq object.  Bioperl 1.3 will have better
+      support for bl2seq in the SearchIO system.
+
+    o Bio::Root::IO support a -noclose boolean flag which will not
+      close a filehandle upon object cleanup - useful when sharing
+      a filehandle among objects.  Additionally code added s.t.
+      STDOUT/STDIN/STDERR will never be closed by Root::IO cleanup.
+
+    o Bio::Tools::Genemark bug #1435 fixed which was missing last prediction
+
+    o Bio::SeqIO::genbank 
+      - bug #1456 fixed which generated extra sequence lines
+      - write moltype correctly for genpept
+
+1.2.1 Stable release update
+
+    o Inclusion of WrapperBase, a needed component for StandAloneBlast
+
+    o Addition from main trunk of Ontology objects, principly to allow
+      BioSQL releases against 1.2.1
+
+    o Fixes and cleanup of Bio::Coordinate modules
+
+    o A fix to Bio::Index::EMBL allowing  retrieval of entries using
+      the primary accession number
+ 
+    o Other bug fixes, including bpindex GenBank fix
+   
+    o Bio::SeqIO::genbank bug #1389 fixed
+
+1.2  Stable major release
+ 
+    o More functionality added to Bio::Perl, the newbie module
+
+    o Bug fixes in Bio::TreeIO::newick fixes bug introduced in 1.0.2
+      Support for New Hampshire Extended (NHX) format parsing.
+
+    o Bio::Tools added support for parsing Genomewise, Pseudowise, Est2Genome,
+      Tmhmm, SignalP, Seg, RepeatMasker, FootPrinter, and a lightweight
+      Hmmpfam parser.
+
+    o New ontology parsing Bio::Ontology
+
+    o Bug fixes in Bio::SearchIO for HMMer parsing, support for
+      multi-report (mlib) fasta reports, support for waba and exonerate.
+
+    o Bio::ClusterIO for parsing Unigene clusters
+
+    o Bio::Assembly added for representing phrap and ace assembly clusters.
+
+    o Rudimentary support for writing Chado XML (see 
+      GMOD project: www.gmod.org for more information) 
+
+    o Bio::Coordinate for mapping between different coordinate systems such 
+      as protein -> cDNA -> Exon -> DNA and back.  Useful for mapping
+      features into different coordinate systems.  
+
+    o Bio::DB::GenBank/Bio::DB::GenPept now support Entrez queries
+      with the get_Stream_by_query method and supports the latest
+      NCBI eutils interface.
+
+    o Bugs fixed in Bio::SeqFeature::Collection an in-memory fast
+      object for extracting subsets of features : currently only
+      supports extraction by location.
+
+1.1.1 Developer release
+
+    o Deprecated modules are now listed in the DEPRECATED file
+
+    o New HowTo documents located in doc/howto describing 
+      a domain of Bioperl.
+
+    o Note that bugs are now stored at bugzilla.bioperl.org
+      and all old bugs are searchable through the bugzilla interface.
+
+    o Several reported bugs in Bio::Tools::Sigcleave and Bio::SimpleAlign 
+      have been addressed.
+
+    o Support for Genewise parsing in Bio::Tools::Genewise
+
+    o Start of Ontology framework with Bio::Ontology
+
+    o Speedup to the Bio::Root::Root object method _rearrange.
+      A global _load_module method was implemented to simplify the
+      dynamic loading of modules ala Bio::SeqIO::genbank.  This
+      method is now used by all the XXIO (AlignIO,TreeIO,SearchIO,SeqIO,
+      etc).
+
+    o Several performance improvements to sequence parsing in Bio::SeqIO.
+      Attempt to speedup by reducing object creation overhead.
+
+    o Bio::DB::GenBank and Bio::DB::GenPept use the NCBI's approved
+      method for sequence retrieval with their E-utils CGI scripts.
+      More work to support Entrez queries to their fullest is planned
+      before 1.2 release.
+
+    o Numerous fixes to Bio::SearchIO and sequence parsing (swissprot)
+
+1.1 Developer release 
+
+    o Bio::Tools::Run has been broken off into a new pkg bioperl-run,
+      this separation removes some of the complexity in our test suite
+      and separates the core modules in bioperl from those that need
+      external programs to run.
+
+    o With latest ExtUtils::MakeMaker module installed SGI/IRIX should
+      not run into trouble running the makefile
+
+    o Bio::Location and Bio::SeqIO::FTHelper are fixed to properly 
+      read,create,and write locations for grouped/split locations
+      (like mRNA features on genomic sequence).  
+
+    o Bio::Tools::Phlyo added for wrappers for parsing Molphy (protml)
+      and PAML (codeml,aaml, etc) parsing.
+
+    o Bio::Tree:: objects expanded to handle testing monophyly,
+      paraphyly, least common ancestor, etc.
+
+    o Bio::Coordinate for mapping locations from different coordinate spaces 
+
+    o Bio::SearchIO::waba added for parsing WABA, Bio::SearchIO::hmmer
+      added for parsing hmmpfam and hmmsearch output.
+
+    o Bio::SearchIO::Writer::TextResultWriter for outputting
+      a pseudo-blast textfile format
+
+
+1.0.2 Bug fix release
+
+    o Note: The modules Bio::DB::GenBank and Bio::DB::GenPept provided
+      in this release will not work after December 2002 when NCBI
+      shuts off the old Entrez cgi scripts.  We have already fixed
+      on our main development branch and the functionality will be
+      available in the next stable bioperl release (1.2) slated for
+      Fall 2002.
+ 
+    o Numerous parsing bugs in Bio::SearchIO::fasta found through 
+      testset by Robin Emig.  These were fixed as was the get_aln
+      method in Bio::Search::HSP::GenericHSP to handle the extra 
+      context sequence that is provided with a FastA alignment.
+    
+    o Migrating differences between Bio::Search::XX::BlastXX to 
+      Bio::Search::XX::GenericXX objects.  This included mechanism
+      to retrieve whole list of HSPs from Hits and whole list of Hits from 
+      Results in addition to the current next_XX iterator methods that
+      are available.  Added seq_inds() method to GenericHSP which identifies
+      indexes in the query or hit sequences where conserved,identical,gaps, 
+      or mismatch residues are located (adapted from Steve Chervitz's 
+      implementation in BlastHSP).
+
+    o Bio::DB::GFF bugs fixed and are necessary for latest GBrowse release.
+      Bio::DB::GFF::RelSegment is now Bio::SeqI compliant.
+      
+    o Bio::Graphics glyph set improved and extended for GBrowse release
+
+    o Bio::Tree::Tree get_nodes implementation improvement thanks
+      to Howard Ross notice performance problem when writing out 
+      unbalanced trees.
+
+    o Bio::Location::Fuzzy::new named parameter -loc_type became
+      -location_type, Bio::Location::Simple::new named parameter
+      -seqid becamse -seq_id.
+   
+    o Fixed major Bio::AlignIO::emboss parsing bug on needle output,
+      was mis-detecting that gaps should be placed at the beginning of
+      the alignment when the best alignment starts internally in the
+      sequence.
+
+1.0.1 Bug fix release
+	
+    o Minor bug fixes to Bio::DB:GFF.  Glyph sets improved.
+ 
+    o Parser fixes in SearchIO blast, fasta for more complete WU BLAST 
+      and mixed (3.3 - 3.4) versions of FASTA.
+
+    o Small API change to add methods for completeness across 
+      implementations of Bio::Search objects.  These new methods
+      in the interface are implemented by the GenericXX object as well 	
+      as the BlastXX objects.
+	* Bio::Search::Result::ResultI 
+	 - hits() method returns list of all Hits (next_hit is an 
+	   iterator method)
+	 	
+	* Bio::Search::Hit::HitI
+	 - hsps() method returns list of all HSPs (next_hsp is an 
+           iterator method)
+	
+    o The Bio::SearchIO::Writer classes have been fixed to handle results 
+       created from either psiblast (Search::BlastXX objects) or 
+       blast|fasta|blastxml objects (Search::GenericXX objects).  More work 
+       has to be done here to make it work properly and will nee major 
+       API changes.
+
+    o Bugs in Bio::Tools::HMMER fixed, including
+       * #1178 - Root::IO destructor wasn't being called
+       * #1034 - filter_on_cutoff now behaves properly
+ 
+    o Bio::SeqFeature::Computation initialization args fixed and 
+      tests added.
+
+    o Tests are somewhat cleaner, flat.t now properly cleans up after itsself,
+      
+    o Updated FAQ with more example based answers to typical questions
+
+    o Bug #1202 was fixed which would improperly join together qual values
+      parsed by Bio::SeqIO::qual when a trailing space was not present before
+      the newline.
+
+1.0.0 Major Stable Release
+
+  This represents a major release of bioperl with significant
+  improvements over the 0.7.x series of releases.   
+
+    o Bio::Tools::Blast is officially deprecated.  Please see
+      Bio::SearchIO for BLAST and FastA parsing.
+
+    o The methods trunc() and subseq() in Bio::PrimarySeqI now accepts
+      Bio::LocationI objects as well as start/end.
+
+    o Bio::Biblio contains modules for Bibliographic data. 
+      Bio::DB::Biblio contains the query modules.  Additionally one can
+      parse medlinexml from the ebi bibliographic query service (BQS)
+      system and Pubmed xml from NCBI.  See Martin Senger's
+      documentation in Bio::Biblio for more information.
+    
+    o Bio::DB::Registry is a sequence database registry part of 
+      Open Bioinformatics Database Access.  See
+      http://obda.open-bio.org for more information.
+    
+    o File-based and In-Memory Sequence caching is provided by
+      Bio::DB::InMemoryCache and Bio::DB::FileCache which acts like a
+      local database.
+
+    o Bio::Graphics for rendering sequences as PNG,JPG, or GIFs has
+      been added by Lincoln Stein.
+
+    o XEMBL SOAP service access in provided in Bio::DB::XEMBL.
+
+    o A FAQ has been started and is included in the release to provide
+      a starting point for frequent questions and issues.
+
+0.9.3 Developer's release
+    
+    o Event based parsing system improved (SearchIO).  With parsers for
+      XML Blast (blastxml), Text Blast (blast), and FASTA results (fasta).  
+      Additionally a lazy parsing system for text and html blast reports was
+      added and is called psiblast (name subject to change in future releases).
+
+    o Bio::Search objects improved and standardized with associated Interfaces
+      written.  The concept of a search "Hit" was standardized to be called
+      "hit" consistently and the use of "subject" was deprecated in all active
+      modules.
+
+    o Bio::Structure added (since 0.9.1) for Protein structure objects 
+      and PDB parser to retrieve and write these structures from data files. 
+
+    o Several important Bio::DB::GFF bug fixes for handling features that
+      are mapped to multiple reference points.  Updated mysql adaptor
+      so as to be able to store large (>100 megabase) chunks of DNA into
+      Bio::DB::GFF databases.
+
+0.9.2 Developer's release
+
+    o Bio::Search and Bio::SearchIO system introduced for event based
+      parsing of Blast,Fasta reports Bio::SearchIO supports ncbi BLAST
+      in text and XML and FASTA reports in standard output format.
+
+    o Bio::Tree and Bio::TreeIO for phylogenetic trees.  A Random tree
+      generator is included in Bio::TreeIO::RandomTrees and a
+      statistics module for evaluating.
+      
+    o Bio::DB::GFF, Lincoln Stein's GFF database suitable as a DB
+      server for DAS servers.
+
+    o Bio::Tools::BPlite is provides more robust parsing of BLAST
+      files.  The entire BPlite system migrated to using Bio::Root::IO
+      for the data stream.
+	
+    o Bio::Tools::Alignment for Consed and sequence Trimming
+      functionality.
+ 
+    o Bio::Structure for Protein structure information and parsing
+
+    o Bio::DB::GenBank/Bio::DB::GenPept updated to new NCBI Entrez
+      cgi-bin entry point which should be more reliable.
+   
+    o Bio::Map and Bio::MapIO for biological map navigation and a
+      framework afor parsing them in.  Only preliminary work here.
+ 
+    o Interface for executing EMBOSS programs locally in Bio::Factory::EMBOSS
+      Future work will integrate Pise and allow submission of analysis on
+      remote servers.
+
+    o Bio::AnnotationCollectionI and Bio::Annotation::Collection
+      introduced as new objects for handling Sequence Annotation
+      information (dblinks, references, etc) and is more robust that
+      previous system.
+ 
+    o Bio::Tools::FASTAParser introduced.
+
+    o Scripts from the bioperl script submission project and new
+      scripts from bioperl authors are included in "scripts" directory.
+
+    o Factory objects and interfaces are being introduced and are more
+      strictly enforced.
+	
+    o Bio::Root::Root introduced as the base object while
+      Bio::Root::RootI is now simply an interface.
+
+    o Bio::DB::RefSeq provides database access to copy of the NCBI
+      RefSeq database using the EBI dbfetch script.
+
+0.9.0 Developer's release
+
+    o perl version at least 5.005 is now required instead of perl 5.004
+
+    o Bio::Tools::Run::RemoteBlast is available for running remote 
+      blast jobs at NCBI.
+
+    o Bio::Tools::BPbl2seq was fixed to handle multiple HSPs.
+
+    o Bio::SeqFeature::GeneStructure migrated to Bio::SeqFeature::Gene.
+      Also added are related modules UTR3, UTR5, Exon, Intron, 
+      Promotor, PolyA and Transcript.
+
+    o Speedup of translate method in PrimarySeq     
+
+    o Bio::SimpleAlign has new methods: location_from_column(), slice(),
+      select(), dot(), get_seq_by_pos(), column_from_residue_number()
+
+    o Various fixes to Variation toolkit
+    
+    o Bio::DB::EMBL provides database access to EMBL sequence data.
+      Bio::DB::Universal provides a central way to point to indexes
+      and dbs in a single interface.
+
+    o Bio::DB::GFF - a database suitable for running DAS servers locally.
+
+    o Bio::Factory::EMBOSS is still in design phase as is  
+      Bio::Factory::ApplicationFactoryI
+
+    o Dia models for bioperl design are provided in the models/ directory
+
+0.7.2 Bug fix release
+
+    o documentation fixes in many modules - SYNOPSIS code verified 
+      to be runnable in many (but not all modules)
+
+    o corrected MANIFEST file from 0.7.1 release
+   
+    o Bug fix in Bio::SeqIO::FTHelper to properly handle
+      split locations
+
+    o Bio::SeqIO::genbank 
+	* Correct parsing and writing of genbank format with protein data
+	* moltype and molecule separation	                   
+
+    o Bio::SeqIO::largefasta fix to avoid inifinite loops
+	
+    o Bio::SimpleAlign fixed to correctly handle consensus 
+      sequence calculation
+
+    o Bio::Tools::HMMER supports hmmer 2.2g
+
+    o Bio::Tools::BPlite to support report type specific parsing.  Most 
+      major changes are not on the 0.7 branch.
+   
+    o Bio::Tools::Run::StandAloneBlast exists_blast() fixed and works 
+      with File::Spec 
+
+    o Bio::Variation::AAChange/RNAChange corrected labels and mutated alleles 
+	in several types of mutations:
+        1.) AA level: deletion, complex
+        2.) AA level: complex, inframe
+        3.) RNA level: silent
+
+    o  BPbl2seq parsing of empty reports will not die, but will return
+       a valid, empty, Bio::SeqFeature::SimilarityFeature for
+       $report->query() and $report->subject() methods.  So an easy
+       way to test if report was empty is to see if
+       $report->query->seqname is undefined.
+
+0.7.1 Bug fix release 
+
+    o Better parsing of genbank/EMBL files especially fixing bugs
+      related to Feature table parsing and locations on remote
+      sequences.  Additionally, species name parsing was better.
+
+    o Bio::SeqIO::genbank can parse now NCBI produced genbank database
+      which include a number of header lines.
+
+    o More strict genbank and EMBL format writing (corrected number of
+      spaces where appropriate).
+
+    o Bio::Tools::BPlite can better parse BLASTX reports - see BUGS
+      for related BPlite BUGS that are unresolved in this release.
+  
+    o Bio::DB::GenBank, Bio::DB::GenPept have less problems
+      downloading sequences from NCBI via HTTP.  Bio::DB::SwissProt can
+      use expasy mirrors or EBI dbfetch cgi-script.
+
+    o A moderate number of documentation improvements were made as
+      well to provide a better code synopsis in each module.
+
+
+0.7  Large number of changes, including refactoring of the
+     Object system, new parsers, new functionality and
+     all round better system. Highlights are:
+
+
+     o Refactored root of inheritance: moved to a lightweight Bio::Root::RootI;
+       Bio::Root::IO for I/O and file/handle capabilities.
+
+     o Imported BPlite modules from Ian Korf for BLAST
+       parsing. This is considered the supported BLAST parser;
+       Bio::Tools::Blast.pm will eventually phase out due to lack of support.
+
+     o Improved Sequence Feature model. Added complete location
+       modelling (with fuzzy and compound locations).  See
+       Bio::LocationI and the modules under Bio/Location.  Added
+       support in Genbank/EMBL format parsing to completely parse
+       feature tables for complex locations.
+
+     o Moved special support for databanks etc to specialized modules under
+       Bio/Seq/. One of these supports very large sequences through 
+       a temporary file as a backend.
+
+     o Explicit Gene, Transcript and Exon SeqFeature objects, supporting
+       CDS retrieval and exon shuffling.
+
+     o More parsers: Sim4, Genscan, MZEF, ESTScan, BPbl2seq, GFF
+
+     o Refactored Bio/DB/GenBank+GenPept. There is now also DB/SwissProt and
+       DB/GDB (the latter has platform-specific limitations).
+
+     o New analysis parser framework for HT sequence annotation (see
+       Bio::SeqAnalysisParserI and Bio::Factory::SeqAnalysisParserFactory)
+
+     o New Alignment IO framework
+
+     o New Index modules (Swissprot)
+
+     o New modules for running Blast within perl
+       (Bio::Tools::Run::StandAloneBlast). Added modules for running
+       Multiple Sequence Alignment tools ClustalW and TCoffee
+       (Bio::Tools::Run::Alignment).
+
+     o New Cookbook-style tutorial (see bptutorial.pl). Improved
+       documentation across the package.
+
+     o Much improved cross platform support. Many known incompatibilities
+       have been fixed; however, NT and Mac do not work across the entire
+       setup (see PLATFORMS).
+
+     o Many bug fixes, code restructuring, etc. Overall stability and
+       maintainability benefit a lot.
+
+     o A total of 957 automatic tests
+    
+
+0.6.2  
+
+   There are very few functionality changes but a large
+   number of software improvements/bug fixes across the package.
+
+   o The EMBL/GenBank parsing are improved.
+ 
+   o The Swissprot reading is improved. Swissprot writing
+     is disabled as it doesn't work at all. This needs to
+     wait for 0.7 release
+ 
+   o BLAST reports with no hits are correctly parsed.
+ 
+   o Several other bugs of the BLAST parser (regular expressions, ...)
+     fixed.
+
+   o Old syntax calls have been replaced with more modern syntax
+ 
+   o Modules that did not work at all, in particular the Sim4
+     set have been removed
+
+   o Bio::SeqFeature::Generic and Bio::SeqFeature::FeaturePair
+     have improved compliance with interface specs and documentation
+
+   o Mailing list documentation updated throughout the distribution
+
+   o Most minor bug fixes have happened.
+
+   o The scripts in /examples now work and have the modern syntax
+     rather than the deprecated syntax
+
+
+0.6.1  Sun April 2 2000
+
+   o Sequences can have Sequence Features attached to them
+        - The sequence features can be read from or written to
+          EMBL and GenBank style flat files
+
+   o Objects for Annotation, including References (but not
+     full medline abstracts), Database links and Comments are
+     provided
+
+   o A Species object to represent nodes on a taxonomy tree
+     is provided
+
+   o The ability to parse HMMER and Sim4 output has been added
+
+   o The Blast parsing has been improved, with better PSI-BLAST
+     support and better overall behaviour.
+
+   o Flat file indexed databases provide both random access 
+     and sequential access to their component sequences.
+
+   o A CodonTable object has been written with all known 
+     CodonTables accessible.
+
+   o A number of new lightweight analysis tools have been
+     added, such as molecular weight determination.
+
+    The 0.6 release also has improved software engineering
+  
+   o The sequence objects have been rewritten, providing more
+     maintainable and easier to implement objects. These
+     objects are backwardly compatible with the 0.05.1 objects
+
+   o Many objects are defined in terms of interfaces and then  
+     a Perl implementation has been provided. The interfaces
+     are found in the 'I' files (module names ending in 'I').
+
+     This means that it is possible to wrap C/CORBA/SQL access
+     as true "bioperl" objects, compatible with the rest of
+     bioperl.
+
+   o The SeqIO system has been overhauled to provide better
+     processing and perl-like automatic interpretation of <>
+     over arguments.
+
+   o Many more tests have been added (a total of 172 automatic
+     tests are now run before release).
+
+
+
+0.05.1 Tue Jun 29 05:30:44 1999
+        - Central distribution now requires Perl 5.004. This was
+          done to get around 5.003-based problems in Bio/Index/* 
+          and SimpleAlign.
+        - Various bug fixes in the Bio::Tools::Blast modules
+          including better exception handling and PSI-Blast 
+          support. See Bio/Tools/Blast/CHANGES for more.
+        - Fixed the Parse mechanism in Seq.pm to use readseq.
+	  Follow the instructions in README for how to install
+	  it (basically, you have to edit Parse.pm).
+        - Improved documentation of Seq.pm, indicating where 
+          objects are returned and where strings are returned.
+        - Fixed uninitialized warnings in Bio::Root::Object.pm
+          and Bio::Tools::SeqPattern.pm.
+        - Bug fixes for PR#s: 30,31,33-35,41,42,44,45,47-50,52.
+
+0.05  Sun Apr 25 01:14:11 1999
+        - Bio::Tools::Blast modules have less memory problems
+          and faster parsing. Webblast uses LWP and supports
+          more functionality. See Bio/Tools/Blast/CHANGES for more.
+        - The Bio::SeqIO system has been started, moving the
+          sequence reformatting code out of the sequence object
+        - The Bio::Index:: system has been started, providing
+          generic index capabilities and specifically works for
+          Fasta formatted databases and EMBL .dat formatted 
+	  databases
+        - The Bio::DB:: system started, providing access to 
+          databases, both via flat file + index (see above) and
+          via http to NCBI
+        - The scripts/ directory, where industrial strength scripts
+          are put has been started.
+        - Many changes - a better distribution all round.
+
+0.04.4  Wed Feb 17 02:20:13 1999
+	- Bug fixes in the Bio::Tools::Blast modules and postclient.pl
+	  (see Bio::Tools::Blast::CHANGES).
+        - Fixed a bug in Bio::Tools::Fasta::num_seqs().
+        - Beefed up the t/Fasta.t test script.
+        - Small fix in Bio::Seq::type() (now always returns a string).
+        - Changed Bio::Root::Utilities::get_newline_char() to 
+          get_newline() since it could return more than one char.
+        - Added $NEWLINE and $TIMEOUT_SECS to Bio::Root::Global.
+        - Changed default timeout to 20 seconds (was 3).
+        - Moved lengthy modification notes to the bottom of some files.
+	- Fixed SimpleAlign write_fasta bug.
+	- Beefed up SimpleAlign.t test
+
+0.04.3  Thu Feb  4 07:48:53 1999
+        - Bio::Root::Object.pm and Global.pm now detect when
+          script is run as a CGI and suppress output that is only
+          appropriate when running interactively.
+        - Bio::Root::Err::_set_context() adds name of script ($0).
+        - Added comments in Bio::Tools::WWW.pm and Bio::Root::Utilities.pm
+          regarding the use of the static objects via the qw(:obj) tag.
+        - Fixed the ambiguous reverse calls in Seq.pm and UnivAln.pm to 
+          CORE::reverse, avoiding Perl warnings.
+        - Bug fixes in Bio::Tools::Blast modules (version 0.074) and 
+          example scripts (see Bio::Tools::Blast::CHANGES).
+        - examples/seq/seqtools.pl no longer always warns about using 
+          -prot or -nucl command-line arguments; only when using the 
+          -debug argument.
+        - Methods added to Bio::Root::Utilities: create_filehandle(), 
+          get_newline_char(), and taste_file() to generalize filehandle 
+          creation and autodetect newline characters in files/streams
+          (see bug report #19).
+        - Bio::Root::IOManager::read() now handles timeouts and uses
+          Utilities::create_filehandle().
+        - Bio::Tools::Fasta.pm uses Utilities::get_newline_char() instead
+          of hardwiring in "\n".
+        - Bug fixes in the Bio::SimpleAlign and Bio::Tools::pSW
+
+0.04.2  Wed Dec 30 02:27:36 1998
+        - Bug fixes in Bio::Tools::Blast modules, version 0.073
+          (see Bio::Tools::Blast::CHANGES).
+        - Changed reverse calls in Bio/Seq.pm and Bio/UnivAln.pm
+          to CORE::reverse (prevents ambiguous warnings with 5.005).
+        - Appending '.tmp.bioperl' to temporary files created by
+          Bio::Root::Utilities::compress() or uncompress() to
+          make it easy to identify & cleanup these files as needed.
+        - Developers: Created CVS branch release-0-04-bug from
+          release-0-04-1. Before making bug fixes to the 0.04.1 release,
+          be sure to cvs checkout this branch into a clean area.
+
+0.04.1  Wed Dec 16 05:39:15 1998
+        - Bug fixes in Bio::Tools::Blast modules, version 0.072
+          (see Bio::Tools::Blast::CHANGES).
+        - Compile/SW/Makefile.PL now removes *.o and *.a files 
+          with make clean.
+
+0.04  Tue Dec  8 07:49:19 1998
+        - Lots of new modules added including:
+           * Ewan Birney's Bio::SimpleAlign.pm, Bio::Tools::AlignFactory.pm,
+             and Bio/Compile directory containing XS-linked C code for
+             creating Smith-Waterman sequence alignments from within Perl.
+           * Steve Chervitz's Blast distribution has been incorporated.
+           * Georg Fuellen's Bio::UnivAln.pm for multiple alignment objects.
+        - Bio/examples directory for demo scripts for all included modules.
+        - Bio/t directory containing test suit for all included modules.
+        - For changes specific to the Blast-related modules prior to
+          incorporation in this central distribution, see the CHANGES
+          file in the Bio/Tools/Blast directory.
+     
+0.01  Tue Sep  8 14:23:22 1998
+        - original version from central CVS tree; created by h2xs 1.18
+

Added: trunk/packages/bioperl/branches/upstream/current/DEPENDENCIES
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/DEPENDENCIES	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/DEPENDENCIES	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+# $Id: DEPENDENCIES,v 1.1.2.4 2006/12/06 18:20:16 sendu Exp $
+
+BioPerl Dependencies
+
+The following packages are used by BioPerl. Not all are required for
+BioPerl to operate properly, however some functionality will be missing
+without them. You can easily choose to install all of these during the
+normal installation process. Note that the PPM version of the BioPerl
+packages always tries to install all dependencies.
+
+The DBD::mysql, DB_File and XML::Parser modules require other applications
+or databases: MySQL, Berkeley DB, and expat respectively.
+
+NB: This list of packages is not authoritative. See the 'requires',
+'build_requires' and 'recommends' sections of Build.PL instead.
+
++-----------------------------------------------------------------------------+
+|        Module        |    Where it is Used   |   Bio* Modules Affected      |
+|----------------------+-----------------------+------------------------------+
+|                      |GenPept                |                              |
+|HTTP::Request::Common |sequence retrieval,    |Bio::DB::*                    |
+|                      |remote http BLAST jobs |Bio::Tools::Run::RemoteBlast  |
+|----------------------+-----------------------+------------------------------|
+|                      |GenBank, GenPept       |                              |
+|LWP::UserAgent        |sequence retrieval,    |Bio::DB::*,                   |
+|                      |remote http BLAST jobs |Bio::Tools::Run::RemoteBlast  |
+|----------------------+-----------------------+------------------------------|
+|Ace [1]               |Access to AceDB        |Bio::DB::Ace                  |
+|                      |databases              |                              |
+|----------------------+-----------------------+------------------------------|
+|                      |                       |Bio::SeqIO, Bio::Variation::*,|
+|IO::String            |handle to read or      |Bio::DB::*, Bio::Index::Blast,|
+|                      |write to a string      |Bio::Tools::*, Bio::Biblio::IO|
+|                      |                       |Bio::Structure::IO            |
+|----------------------+-----------------------+------------------------------|
+|XML::Parser [2]       |Parsing of XML         |Bio::Biblio::IO::medlinexml   |
+|                      |documents              |                              |
+|----------------------+-----------------------+------------------------------|
+|XML::Writer           |Parsing + writing of   |Bio::SeqIO::game,             |
+|                      |XML documents          |Bio::Variation::*             |
+|----------------------+-----------------------+------------------------------|
+|XML::Parser::PerlSAX  |Parsing of XML         |Bio::SeqIO::game,             |
+|                      |documents              |Bio::Variation::*,            |
+|                      |                       |Bio::Biblio::IO::medlinexml   |
+|----------------------+-----------------------+------------------------------|
+|                      |Parsing of XML         |Bio::Variation::IO::xml,      |
+|XML::Twig             |documents              |Bio::DB::Biblio::eutils,      |
+|                      |                       |Bio::Graph::IO::psi_xml       |
+|----------------------+-----------------------+------------------------------|
+|File::Temp            |Temporary File         |Bio::DB::FileCache,           |
+|                      |creation               |Bio::DB::XEMBL                |
+|----------------------+-----------------------+------------------------------|
+|SOAP::Lite            |SOAP protocol,         |Bio::Biblio::*,               |
+|                      |XEMBL Services         |Bio::DB::XEMBLService         |
+|----------------------+-----------------------+------------------------------|
+|HTML::Parser          |HTML parsing of        |Bio::DB::GDB                  |
+|                      |GDB page               |                              |
+|----------------------+-----------------------+------------------------------|
+|                      |MySQL API for loading  |                              |
+|DBD::mysql [3]        |and querying of MySQL- |Bio::DB::GFF, bioperl-db      |
+|                      |based GFF feature      |bioperl-pipeline              |
+|                      |and BioSQL databases   |                              |
+|----------------------+-----------------------+------------------------------|
+|GD [4][5]             |GD graphical drawing   |Bio::Graphics                 |
+|                      |library                |                              |
+|----------------------+-----------------------+------------------------------|
+|Storable              |Persistent object      |Bio::DB::FileCache            |
+|                      |storage & retrieval    |                              |
+|----------------------+-----------------------+------------------------------|
+|Text::Shellwords      |Text parser            |Bio::Graphics::FeatureFile    |
+|----------------------+-----------------------+------------------------------|
+|XML::DOM              |XML parser             |Bio::SeqIO::bsml,             |
+|                      |                       |Bio::SeqIO::interpro          |
+|----------------------+-----------------------+------------------------------|
+|                      |Perl access to         |Bio::DB::Flat, Bio::DB::Fasta,|
+|DB_File [6]           |Berkeley DB            |Bio::SeqFeature::Collection,  |
+|                      |                       |Bio::Index::*                 |
+|----------------------+-----------------------+------------------------------|
+|Graph::Directed       |Generic graph data and |Bio::Ontology::               |
+|                      |algorithms             |     SimpleOntologyEngine     |
+|----------------------+-----------------------+------------------------------|
+|Data::Stag::          |Structured Tags,       |Bio::SeqIO::chadoitext [7]    |
+|    ITextWriter       |datastructures         |                              |
+|----------------------+-----------------------+------------------------------|
+|Data::Stag::          |Structured Tags,       |Bio::SeqIO::chadosxpr [7]     |
+|    SxprWriter        |datastructures         |                              |
+|----------------------+-----------------------+------------------------------|
+|Data::Stag::XMLWriter |Structured Tags,       |Bio::SeqIO::chadoxml          |
+|                      |datastructures         |                              |
+|----------------------+-----------------------+------------------------------|
+|Text::Wrap            |Very optional          |Bio::SearchIO::Writer::       |
+|                      |                       |      TextResultWriter        |
+|----------------------+-----------------------+------------------------------|
+|HTML::Entities        |Parse BLAST results in |Bio::SearchIO::blastxml       |
+|                      |XML                    |                              |
+|----------------------+-----------------------+------------------------------|
+|Class::AutoClass [8]  |Used to create objects |Bio::Graph::SimpleGraph*      |
+|----------------------+-----------------------+------------------------------|
+|Clone                 |Used to clone objects  |Bio::Graph::ProteinGraph      |
+|----------------------+-----------------------+------------------------------|
+|                      |                       |Bio::SeqIO::bsml_sax,         |
+|XML::SAX              |New style SAX parser   |Bio::SeqIO::tigrxml,          |
+|                      |                       |Bio::SearchIO::blastxml       |
+|----------------------+-----------------------+------------------------------|
+|XML::SAX::Base        |New style SAX parser   |Bio::SeqIO::tigrxml           |
+|----------------------+-----------------------+------------------------------|
+|XML::SAX::Writer      |                       |                              |
+|----------------------+-----------------------+------------------------------|
+|XML::SAX::ExpatXS     |New style SAX parser   |Bio::SearchIO::blastxml       |
+|[2][9]                |                       |                              |
+|----------------------+-----------------------+------------------------------|
+|XML::Simple [2]       |Simple XML parsing     |Bio::DB::EUtilities           |
+|----------------------+-----------------------+------------------------------|
+|Convert::Binary::C    |Parsing of DNA strider |Bio::SeqIO::strider           |
+|                      |documents              |                              |
+|----------------------+-----------------------+------------------------------|
+|Spreadsheet::         |Read Microsoft Excel   |Bio::SeqIO::excel             |
+|    ParseExcel        |files                  |                              |
+|----------------------+-----------------------+------------------------------|
+|Bio::ASN1::EntrezGene |Parses ASN1 format     |Bio::SeqIO::entrezgene,       |
+|                      |                       |Bio::DB::EntrezGene           |
++-----------------------------------------------------------------------------+
+
+Notes
+
+    1. Available at http://stein.cshl.org 
+    2. Requires expat, at http://sourceforge.net/projects/expat/
+    3. Requires MySQL, from http://www.mysql.org 
+    4. Requires GD library (libgd) from http://www.boutell.com/gd 
+    5. Installing the GD library - libgd - is somewhat non-trivial since
+       there are a number of dependencies to consider. Matias Giovannini has
+       posted an excellent walkthrough for Mac OS X 10.4. 
+    6. Requires Berkeley DB, from Linux RPM or from
+       http://www.sleepycat.com 
+    7. These modules may be present in older distributions but are considered
+       redundant; use Bio::SeqIO::chadoxml instead. 
+    8. Bio::Graph::SimpleGraph requires Class::AutoClass v. 1.01;
+       earlier versions give very different results. 
+    9. This module is optional but recommended for speeding up parsing over
+       the default XML::SAX::PurePerl. If installed, XML::SAX::Expat currently
+       does not work correctly due to DTD problems.
+       
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/DEPRECATED
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/DEPRECATED	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/DEPRECATED	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,35 @@
+# These are modules which are deprecated and later removed from the toolkit
+# See http://www.bioperl.org/wiki/Deprecated_modules for the latest details
+
+                          Version    Version 
+Deprecated Modules       Deprecated  Removed Comment
+--------------------------------------------------------------------------------
+Bio::Annotation               1.0      1.1    use Bio::Annotation::Collection
+Bio::Tools::Blast             1.0      1.1    use Bio::SearchIO
+Bio::Tools::Blast::HSP        1.0      1.1    use Bio::Search::HSP::GenericHSP
+Bio::Tools::Blast::Sbjct      1.0      1.1    use Bio::Search::Hit::GenericHit
+Bio::Tools::Blast::HTML       1.0      1.1    use Bio::SearchIO::Writer::HTMLWriter
+Bio::Tools::SeqAnal           1.0      1.1    used only by deprecated Bio::Tools::Blast
+Bio::Tools::WWW               1.1      1.1.1  Just a collection of links
+Bio::UnivAln                  1.0      1.1    use Bio::SimpleAlign
+Bio::Tools::RestrictionEnzyme 1.5      1.6    use Bio::Restriction
+Bio::Tools::BPlite            1.5      1.6    use Bio::SearchIO
+Bio::Tools::BPpsilite         1.5      1.6    use Bio::SearchIO
+Bio::Tools::BPbl2seq          1.5      1.6    use Bio::SearchIO
+Bio::Ontology::SimpleGOEngine 1.5.1    1.6    use Bio::Ontology::OBOEngine
+Bio::Taxonomy                 1.5.1    1.7    use Bio::Taxon & Bio::Tree::Tree
+Bio::Taxonomy::Node           1.5.1    1.7    renamed Bio::Taxon
+Bio::Taxonomy::Taxon          1.5.1    1.7    use Bio::Taxon
+Bio::Taxonomy::Tree           1.5.1    1.7    use Bio::Taxon & Bio::Tree::Tree
+Bio::Taxonomy::FactoryI       1.5.1    1.7    Redundant, no implementors
+Bio::Search::Processor        1.5.1    1.7    Superseded by Bio::SearchIO
+Bio::Factory::ResultFactoryI  1.5.2    1.6    Superseded by Bio::Factory::ObjectFactory
+Bio::Factory::HitFactoryI     1.5.2    1.6    Superseded by Bio::Factory::ObjectFactory
+Bio::Root::Err                1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::Global             1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::IOManager          1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::Object             1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::Utilities          1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::Vector             1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Root::Xref               1.5.2    1.5.2  Bio::Root* redundant classes
+Bio::Tools::WebBlat           1.5.2    1.5.3  Requested that this not be maintained

Added: trunk/packages/bioperl/branches/upstream/current/INSTALL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/INSTALL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/INSTALL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,432 @@
+# $Id: INSTALL,v 1.47.4.8 2006/12/06 18:20:15 sendu Exp $
+
+Installing Bioperl for Unix
+
+     * 1 BIOPERL INSTALLATION
+     * 2 SYSTEM REQUIREMENTS
+     * 3 OPTIONAL
+     * 4 ADDITIONAL INSTALLATION INFORMATION
+     * 5 THE BIOPERL BUNDLE
+     * 6 PRELIMINARY PREPARATION
+     * 7 INSTALLING BIOPERL THE EASY WAY USING CPAN
+     * 8 INSTALLING BIOPERL THE EASY WAY USING 'Build.PL'
+     * 9 WHERE ARE THE MAN PAGES?
+     * 10 EXTERNAL PROGRAMS
+
+          * 10.1 Environment Variables
+
+     * 11 INSTALLING BIOPERL SCRIPTS
+     * 12 INSTALLING BIOPERL IN A PERSONAL MODULE AREA
+     * 13 INSTALLING BIOPERL MODULES THE HARD WAY
+     * 14 USING MODULES NOT INSTALLED IN THE STANDARD LOCATION
+     * 15 THE TEST SYSTEM
+     * 16 BUILDING THE OPTIONAL bioperl-ext PACKAGE
+
+          * 16.1 CONFIGURING for BSD and Solaris boxes
+          * 16.2 INSTALLATION
+
+BIOPERL INSTALLATION
+
+Bioperl has been installed on many forms of Unix,
+Win9X/NT/2000/XP, and on Mac OS X (see the PLATFORMS file for more
+details). Following are instructions for installing Bioperl for
+Unix/Linux/Mac OS X; Windows installation instructions can be found
+in INSTALL.WIN. For installing Bioperl for Mac OS X using Fink, see:
+
+http://www.bioperl.org/wiki/Getting_BioPerl#Mac_OS_X_using_fink
+
+SYSTEM REQUIREMENTS
+
+    * Perl 5.6.1 or later; version 5.8 and greater are recommended.
+
+    * External modules: Bioperl uses functionality provided in other
+      Perl modules. Some of these are included in the standard perl package
+      but some need to be obtained from the CPAN site. The list of external
+      modules is included in the DEPENDENCIES file.
+
+OPTIONAL
+
+   * ANSI C or GNU C compiler (gcc) for XS extensions (the
+     bioperl-ext package; see BUILDING THE OPTIONAL bioperl-ext
+     PACKAGE, below).
+
+ADDITIONAL INSTALLATION INFORMATION
+
+   * Additional information on Bioperl and MAC OS:
+      * OS 9 - http://bioperl.org/Core/mac-bioperl.html
+      * OS X - http://www.tc.umn.edu/~cann0010/Bioperl_OSX_install.html
+            (outdated, but useful for libgd installation notes)
+      * OS X - Installing using Fink (in Getting BioPerl)
+
+THE BIOPERL BUNDLE
+
+Users of previous versions of Bioperl may remember Bundle::BioPerl.
+You no longer need to install Bundle::BioPerl. Instead, the normal
+installation process will ask you if you'd like to install the
+optional external module dependencies that Bioperl has.
+
+A full list of BioPerl dependencies can be found inf the DEPENDENCIES
+file included with this distribution.
+
+PRELIMINARY PREPARATION
+
+   This is optional, but regardless of your subsequent choice of installation
+   method, it will help to carry out the following steps. They will increase
+   the likelyhood of installation success (especially of optional
+   dependencies).
+
+     * Upgrade CPAN:
+
+ >perl -MCPAN -e shell
+ cpan>install Bundle::CPAN
+ cpan>q
+
+     * Install/upgrade Module::Build, and make it your preferred installer:
+
+ >cpan
+ cpan>install Module::Build
+ cpan>o conf prefer_installer MB
+ cpan>o conf commit
+ cpan>q
+
+     * Install the expat and libgd libraries by whatever method is
+       appropriate for your system. If you install libgd in a non-standard
+       location, that is fine: when installing the perl module that needs it
+       you will be asked where you installed it.
+
+     * If your expat library is installed in a non-standard location, tell
+       CPAN about it:
+
+ >cpan
+ cpan>o conf makepl_arg "EXPATLIBPATH=/non-standard/lib EXPATINCPATH=/non-standard/include"
+ cpan>o conf commit
+
+INSTALLING BIOPERL THE EASY WAY USING CPAN
+
+You can use the CPAN shell to install Bioperl. For example:
+
+ >perl -MCPAN -e shell
+ 
+Or you might have the cpan alias installed:
+
+ >cpan
+
+Then find the name of the Bioperl version you want:
+
+ cpan>d /bioperl/
+ CPAN: Storable loaded ok
+ Going to read /home/bosborne/.cpan/Metadata
+ Database was generated on Mon, 20 Nov 2006 05:24:36 GMT
+ Distribution B/BI/BIRNEY/bioperl-1.2.tar.gz
+ Distribution B/BI/BIRNEY/bioperl-1.4.tar.gz
+ Distribution S/SE/SENDU/bioperl-1.5.2_100.tar.gz
+
+Now install:
+
+ cpan>install S/SE/SENDU/bioperl-1.5.2_100.tar.gz
+
+If you've installed everything perfectly and all the network connections
+are working then you may pass all the tests run in the './Build test' phase.
+It's also possible that you may fail some tests. Possible explanations:
+problems with local Perl installation, network problems, previously
+undetected bug in Bioperl, flawed test script, problems with CGI
+script used for sequence retrieval at public database, and so on. Remember
+that there are over 800 modules in Bioperl and the test suite is running
+more than 12000 individual tests, a few failed tests may not affect your
+usage of Bioperl.
+
+If you decide that the failed tests will not affect how you intend to use
+Bioperl and you'd like to install anyway do:
+
+ cpan>force install S/SE/SENDU/bioperl-1.5.2_100.tar.gz
+
+This is what most experienced Bioperl users would do. However, if you're
+concerned about a failed test and need assistance or advice then contact
+bioperl-l at bioperl.org.
+
+INSTALLING BIOPERL THE EASY WAY USING Build.PL
+
+The advantage of this approach is it's stepwise, so it's easy to stop and
+analyze in case of any problem.
+
+Download, then unpack the tar file. For example:
+
+ >gunzip bioperl-1.5.2_100.tar.gz
+ >tar xvf bioperl-1.5.2_100.tar
+ >cd bioperl-1.5.2_100
+
+Now issue the build commands:
+
+ >perl Build.PL
+ >./Build test
+
+If you've installed everything perfectly and all the network connections
+are working then you may pass all the tests run in the './Build test' phase.
+It's also possible that you may fail some tests. Possible explanations:
+problems with local Perl installation, network problems, previously
+undetected bug in Bioperl, flawed test script, problems with CGI script
+using for sequence retrieval at public database, and so on. Remember that
+there are over 800 modules in Bioperl and the test suite is running more
+than 12000 individual tests, a few failed tests may not affect your usage
+of Bioperl.
+
+If you decide that the failed tests will not affect how you intend to use
+Bioperl and you'd like to install anyway, or if all tests were fine, do:
+
+ >./Build install
+
+This is what most experienced Bioperl users would do. However, if you're
+concerned about a failed test and need assistance or advice then contact
+bioperl-l at bioperl.org.
+
+To './Build install' you need write permission in the perl5/site_perl/source
+area (or similar, depending on your environment). Usually this will require
+you becoming root, so you will want to talk to your systems manager if you
+don't have the necessary privileges.
+
+It is also straightforward to install the package outside of the this
+standard Perl5 location. See INSTALLING BIOPERL IN A PERSONAL MODULE
+AREA, below.
+
+WHERE ARE THE MAN PAGES?
+
+When using Makefile.PL (no longer covered in this documentation), we had
+to disable the automatic creation of man pages because this step was
+triggering a "line too long" error on some OSs due to shell constraints.
+If you want man pages installed use the Build.PL installation process
+discussed above.
+
+EXTERNAL PROGRAMS
+
+Bioperl can interface with some external programs for executing analyses.
+These include clustalw and t_coffee for Multiple Sequence Alignment
+(Bio::Tools::Run::Alignment::Clustalw and
+Bio::Tools::Run::Alignment::TCoffee) and blastall, blastpgp, and
+bl2seq for BLAST analyses (Bio::Tools::Run::StandAloneBlast), and
+to all the programs in the EMBOSS suite (Bio::Factory::EMBOSS).
+
+    Environment Variables
+
+Some modules which run external programs need certain environment
+variables set. If you do not have a local copy of the specific executable
+you do not need to set these variables. Additionally the modules will
+attempt to locate the specific applications in your runtime PATH variable.
+You may also need to set an environment variable to tell BioPerl about
+your network configuration if your site uses a firewall.
+
+Setting environment variables on unix means adding lines like the
+following to your shell *rc file.
+
+   For bash or sh:
+
+ export BLASTDIR=/data1/blast
+
+   For csh or tcsh:
+
+ setenv BLASTDIR /data1/blast
+
+Some environment variables include:
+
++------------------------------------------------------------------------+
+| Env. Variable |                      Description                       |
+|---------------+--------------------------------------------------------|
+|               |Specifies where the NCBI blastall, blastpgp, bl2seq,    |
+|BLASTDIR       |etc.. are located. A 'data' directory could also be     |
+|               |present in this directory as well, you could put your   |
+|               |blastable databases here.                               |
+|---------------+--------------------------------------------------------|
+|               |If one does not want to locate the data dir within the  |
+|BLASTDATADIR or|same dir as where the BLASTDIR variable points, a       |
+|BLASTDB        |BLASTDATADIR or BLASTDB variable can be set to point to |
+|               |a dir where BLAST database indexes are located.         |
+|---------------+--------------------------------------------------------|
+|BLASTMAT       |The directory containing the substitution matrices such |
+|               |as BLOSUM62.                                            |
+|---------------+--------------------------------------------------------|
+|CLUSTALDIR     |The directory where the clustalw executable is located. |
+|---------------+--------------------------------------------------------|
+|TCOFFEEDIR     |The directory where the t_coffee executable is located. |
+|---------------+--------------------------------------------------------|
+|               |If you access the internet via a proxy server then you  |
+|               |can tell the Bioperl modules which require network      |
+|               |access about this by using the http_proxy environment   |
+|http_proxy     |variable. The value set includes the proxy address and  |
+|               |the port, with optional username/password for           |
+|               |authentication purposes                                 |
+|               |(e.g. http://USERNAME:PASSWORD@proxy.example.com:8080). |
++------------------------------------------------------------------------+
+
+INSTALLING BIOPERL SCRIPTS
+
+Bioperl comes with a set of production-quality scripts that are
+kept in the scripts/ directory. You can install these scripts if you'd
+like, simply answer the questions during 'perl Build.PL'.
+The installation directory can be specified by:
+
+ perl Build.PL
+ ./Build install --install_path script=/foo/scripts
+ 
+By default they install to /usr/bin or similar, depending on platform.
+
+INSTALLING BIOPERL IN A PERSONAL MODULE AREA
+
+If you lack permission to install perl modules into the standard
+site_perl/ system area you can configure Bioperl to install itself
+anywhere you choose. Ideally this would be a personal perl directory or
+standard place where you plan to put all your 'local' or personal perl
+modules.
+
+   Example:
+
+ >perl Build.PL --install_base /home/users/dag
+ >./Build test
+ >./Build install
+
+This tells perl to install all the various parts of bioperl in the desired
+place, e.g. creating:
+
+   /home/users/dag/lib/perl5/Bio/Perl.pm
+
+Then in your Bioperl script you would write:
+
+ use lib "/home/users/dag/lib/perl5/";
+ use Bio::Perl;
+
+For more information on these sorts of custom installs see the documentation
+for Module::Build.
+
+If you are used to using something like:
+
+ >perl Makefile.PL PREFIX=/home/users/dag
+
+You can get similar behaviour by using this instead:
+
+ >perl Build.PL --prefix /home/users/dag
+
+For more information, see Module::Build::Cookbook documentation for
+Installing_in_the_same_location_as_ExtUtils::MakeMaker
+
+You can also use CPAN to install modules in your local directory. First
+enter the CPAN shell, then set the arguments for the commands
+"perl Makefile.PL" and "./Build install", like this:
+
+ >perl -e shell -MCPAN
+ cpan>o conf makepl_arg LIB=/home/users/dag/My_Local_Perl_Modules
+ cpan>o conf mbuild_install_arg "--install_path lib=/home/users/dag/My_Local_Perl_Modules"
+ cpan>o conf commit
+
+INSTALLING BIOPERL MODULES THE HARD WAY
+
+As a last resort, you can simply copy all files in Bio/ to any directory
+in which you have write privileges. This is generally NOT recommended
+since some modules may require special configuration (currently none do,
+but don't rely on this).
+
+You will need to set "use lib '/path/to/my/bioperl/modules';" in your perl
+scripts so that you can access these modules if they are not installed in
+the standard site_perl/ location. See above for an example.
+
+To get manpage documentation to work correctly you will have to
+configure man so that it looks in the proper directory. On most systems
+this will just involve adding an additional directory to your $MANPATH
+environment variable.
+
+The installation of the Compile directory can be similarly redirected, but
+execute the make commands from the Compile/SW directory.
+
+If all else fails and you are unable to access the perl distribution
+directories, ask your system administrator to place the files there for
+you. You can always execute perl scripts in the same directory as the
+location of the modules (Bio/ in the distribution) since perl always
+checks the current working directory when looking for modules.
+
+USING MODULES NOT INSTALLED IN THE STANDARD LOCATION
+
+You can explicitly tell perl where to look for modules by using the
+Lib module which comes standard with perl.
+
+   Example:
+
+ #!/usr/bin/perl
+ use lib "/home/users/dag/lib/perl5/";
+ use Bio::Perl;
+ #<...insert whizzy perl code here...>
+
+Or, you can set the environmental variable PERL5LIB:
+
+   csh or tcsh:
+
+ setenv PERL5LIB /home/users/dag/lib/perl5/
+
+   bash or sh:
+
+ export PERL5LIB=/home/users/dag/lib/perl5/
+
+THE TEST SYSTEM
+
+The Bioperl test system is located in the t/ directory and is
+automatically run whenever you execute the './Build test' command
+(having previously run 'Perl Build.PL'; if you have already installed
+Bioperl answer 'no' to script installation to get nicer test output
+later).
+Alternatively if you want to investigate the behavior of a specific test
+such as the Seq test you would type:
+
+ >./Build test --test_files t/Seq.t --verbose
+
+The ./ ensures you are using the Build script in the current directory to
+make sure you are testing the modules in this directory not ones
+installed elsewhere. The --test_files arguement can be used multiple times
+to try a set of test scripts in one go. The --verbose arguement outputs
+the detailed test results, instead of just the summary you see during
+'./Build test'.
+
+If you are trying to learn how to use a module, often the test suite is a
+good place to look. All good extreme programmers try and write a test
+BEFORE they write the module to insure that their module behaves the way
+they expect. You'll notice some 'ok' and 'skip' commands in a test, this
+is part of the Perl test suite that signifies a passed test with an 'ok
+N', where N is the test number. Alternatively you can tell Perl to skip
+tests. This is useful when, for example, your test detects that the
+network is not present and thus should skip, not fail, any tests that
+require a network connection.
+
+BUILDING THE OPTIONAL bioperl-ext PACKAGE
+
+The bioperl-ext package contains C code and XS extensions for
+various alignment and trace file modules (Bio::Tools::pSW for DNA
+Smith-Waterman, Bio::Tools::dpAlign for protein Smith-Waterman,
+Bio::SearchDist for EVD fitting of extreme value,
+Bio::SeqIO::staden).
+
+This Installation may work out-of-the box for most platforms except BSD
+and Solaris boxes. For other platforms skip this next paragraph.  Of note,
+the code for bioperl-ext has not been updated along with the rest of bioperl,
+so one may expect to see some issues.  If so, please report them to the
+BioPerl mailing list.
+
+    CONFIGURING for BSD and Solaris boxes
+
+You should add the line -fPIC to the CFLAGS line in
+Compile/SW/libs/makefile. This makes the compile generate position
+independent code, which is required for these architectures. In addition,
+on some Solaris boxes, the generated Makefile does not make the correct
+-fPIC/-fpic flags for the C compiler that is used. This requires manual
+editing of the generated Makefile to switch case. Try it out once, and if
+you get errors, try editing the -fpic line
+
+    INSTALLATION
+
+Move to the directory bioperl-ext. This is available as a separate package
+released from ftp://bioperl.org/pub/bioperl/DIST. This is where the C
+code and XS extension for the bp_sw module is held and execute these
+commands: (possibly after making the change for BSD and Solaris, as
+detailed above)
+
+ perl Makefile.PL   # makes the system specific makefile
+ make          # builds all the libaries
+ make test     # runs a short test
+ make install  # installs the package correctly.
+
+This should install the compiled extension. The Bio::Tools::pSW
+module will work cleanly now.

Added: trunk/packages/bioperl/branches/upstream/current/INSTALL.WIN
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/INSTALL.WIN	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/INSTALL.WIN	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,629 @@
+# $Id: INSTALL.WIN,v 1.20.4.5 2006/12/06 18:20:16 sendu Exp $
+
+                         Installing Bioperl on Windows
+
+   Contents
+
+     * 1 Introduction
+     * 2 Requirements
+     * 3 Installation using the Perl Package Manager
+     
+          * 3.1 GUI Installation
+          * 3.2 Comand-line Installation
+          
+     * 4 Installation using CPAN or manual installation
+     * 5 Bioperl
+     * 6 Perl on Windows
+     * 7 Bioperl on Windows
+     * 8 Beyond the Core
+
+          * 8.1 Setting environment variables
+          * 8.2 Installing bioperl-db
+
+     * 9 Bioperl in Cygwin
+     * 10 bioperl-db in Cygwin
+     * 11 Cygwin tips
+     * 12 MySQL and DBD::mysql
+     * 13 Expat
+     * 14 Directory for temporary files
+     * 15 BLAST
+     * 16 Compiling C code
+
+Introduction
+
+   This installation guide was written by Barry Moore, Nathan Haigh
+   and other Bioperl authors based on the original work of Paul Boutros. The
+   guide was updated for the BioPerl wiki by Chris Fields and Nathan
+   Haigh.
+
+   Please report problems and/or fixes to the BioPerl mailing list.
+   
+   An up-to-date version of this document can be found on the BioPerl wiki:
+   
+   http://www.bioperl.org/wiki/Installing_Bioperl_on_Windows
+
+Requirements
+
+   Only ActivePerl >= 5.8.8.819 is supported by the Bioperl team. Earlier
+   versions may work, but we do not support them.
+
+   One of the reason for this requirement is that ActivePerl >= 5.8.8.819 now
+   use Perl Package Manager 4 (PPM4). PPM4 is now superior to earlier
+   versions and also includes a Graphical User Interface (GUI). In short,
+   it's easier for us to produce and maintain a package for installation via
+   PPM and also easier for you to do the install! Proceed with earlier
+   versions at your own risk.
+
+   To install ActivePerl:
+
+           1) Download the ActivePerl MSI from ActiveState
+
+           2) Run the ActivePerl Installer (accepting all defaults is fine).
+
+Installation using the Perl Package Manager
+
+  GUI Installation
+
+           1) Start the Perl Package Manager GUI from the Start menu.
+
+           2) Go to Edit >> Preferences and click the Repositories tab. Add a
+           new repository for each of the following:
+
+                              Repositories to add
+       +----------------------------------------------------------------+
+       |           Name           |              Location               |
+       |--------------------------+-------------------------------------|
+       |BioPerl-Release Candidates|[37]http://bioperl.org/DIST/RC       |
+       |--------------------------+-------------------------------------|
+       |BioPerl-Regular Releases  |[38]http://bioperl.org/DIST          |
+       |--------------------------+-------------------------------------|
+       |Kobes                     |[39]http://theoryx5.uwinnipeg.ca/ppms|
+       |--------------------------+-------------------------------------|
+       |Bribes                    |[40]http://www.Bribes.org/perl/ppm   |
+       +----------------------------------------------------------------+
+
+
+           3) Select View >> All Packages.
+
+           4) In the search box type bioperl.
+
+           5) Right click the latest version of Bioperl available and choose
+           install.
+
+           5a) From bioperl 1.5.2 onward, all 'optional' pre-requisites will
+           be marked for installation. If you see that some of them complain
+           about needing a command-line installation (eg. XML::SAX::ExpatXS),
+           and you want those particular pre-requisites, stop now (skip step
+           6) and see the 'Command-line Installation' section.
+
+           6) Click the green arrow (Run marked actions) to complete the
+           installation.
+
+  Comand-line Installation
+
+           1) Follow steps 1) and 2) from 'GUI Installation' above, if you
+           haven't done so already.
+
+           2) Open a cmd window by going to Start >> Run and typing 'cmd' and
+           pressing return.
+
+           3) Type the following into the cmd window:
+            
+             ppm-shell
+             search bioperl
+             install #
+
+            (where the number matches the bioperl version needed) You can use '-force'
+            to force install if needed.
+
+Installation using CPAN or manual installation
+
+   Installation using PPM is preferred since it is easier, but if you run
+   into problems, or a ppm isn't available for the version/package of bioperl
+   you want, or you want to choose which optional dependencies to install,
+   you can install manually by downloading the appropriate package or by
+   using CPAN. In fact both methods ultimately need nmake to be
+   installed, CPAN to be upgraded to >= v1.81, Module::Build to be installed
+   (>= v0.2805) and Test::Harness to be upgraded to >= v2.62:
+
+           1) Download nmake
+
+           2) Double-click to run it, which extracts 3 files. Move both
+           NMAKE.EXE and the NMAKE.ERR files to a place in your PATH; if set
+           up properly, you can move these to your Perl bin directory,
+           normally C:\Perl\bin.
+
+           1) Open a cmd window by going to Start >> Run and typing 'cmd'
+           into the box and pressing return.
+
+           2) Type 'cpan' to enter the CPAN shell.
+
+           3) At the cpan> prompt, type 'install CPAN' to upgrade to the
+           latest version.
+
+           4) Quit (by typing 'q') and reload cpan. You may be asked some
+           configuration questions; accepting defaults is fine.
+
+           5) At the cpan> prompt, type 'o conf prefer_installer MB' to tell
+           CPAN to prefer to use Build.PL scripts for installation. Type 'o
+           conf commit' to save that choice.
+
+           6) At the cpan> prompt, type 'install Module::Build'.
+
+           7) At the cpan> prompt, type 'install Test::Harness'.
+
+   You can now follow the unix instructions for installing using CPAN, or
+   install manually:
+
+           8) Download the .zip version of the package you want.
+
+           9) Extract the archive in the normal way.
+
+           10) In a cmd window 'cd' to the directory you extracted to. Eg. if
+           you extracted to directory 'Temp', 'cd Temp\bioperl-1.5.2_100'
+
+           11) Type 'perl Build.PL' and answer the questions appropriately.
+
+           12) Type 'perl Build test'. All the tests should pass, but if they
+           don't, let us know. Your usage of Bioperl may not be affected
+           by the failure, so you can choose to continue anyway.
+
+           13) Type 'perl Build install' to install Bioperl.
+
+Bioperl
+
+   Bioperl is a large collection of Perl modules (extensions to the
+   Perl language) that aid in the task of writing Perl code to deal
+   with sequence data in a myriad of ways. Bioperl provides objects for
+   various types of sequence data and their associated features and
+   annotations. It provides interfaces for analysis of these sequences with a
+   wide variety of external programs (BLAST, FASTA, clustalw and
+   EMBOSS to name just a few). It provides interfaces to various types of
+   databases both remote (GenBank, EMBL etc) and local (MySQL,
+   Flat_databases flat files, GFF etc.) for storage and retrieval of
+   sequences. And finally with its associated documentation and
+   mailing lists, Bioperl represents a community of bioinformatics
+   professionals working in Perl who are committed to supporting both
+   development of Bioperl and the new users who are drawn to the project.
+
+   While most bioinformatics and computational biology applications are
+   developed in UNIX/Linux environments, more and more programs are
+   being ported to other operating systems like Windows, and many users
+   (often biologists with little background in programming) are looking for
+   ways to automate bioinformatics analyses in the Windows environment.
+
+   Perl and Bioperl can be installed natively on Windows NT/2000/XP.
+   Most of the functionality of Bioperl is available with this type of
+   install. Much of the heavy lifting in bioinformatics is done by programs
+   originally developed in lower level languages like C and Pascal
+   (e.g. BLAST, clustalw, Staden etc). Bioperl simply acts as
+   a wrapper for running and parsing output from these external programs.
+
+   Some of those programs (BLAST for example) are ported to Windows.
+   These can be installed and work quite happily with Bioperl in the native
+   Windows environment. Some external programs such as Staden and the
+   EMBOSS suite of programs can only be installed on Windows by using
+   Cygwin and its gcc C compiler (see Bioperl in Cygwin, below).
+   Recent attempts to port EMBOSS to Windows, however, have been mostly
+   successful.
+
+   If you have a fairly simple project in mind, want to start using Bioperl
+   quickly, only have access to a computer running Windows, and/or don't mind
+   bumping up against some limitations then Bioperl on Windows may be a
+   good place for you to start. For example, downloading a bunch of sequences
+   from GenBank and sorting out the ones that have a particular
+   annotation or feature works great. Running a bunch of your sequences
+   against remote or local BLAST, parsing the output and storing it
+   in a MySQL database would be fine also.
+
+   Be aware that most Bioperl developers are working in some type of a
+   UNIX environment (Linux, OS X, Cygwin). If you have
+   problems with Bioperl that are specific to the Windows environment, you
+   may be blazing new ground and your pleas for help on the Bioperl mailing
+   list may get few responses (you can but try!) - simply because no one
+   knows the answer to your Windows specific problem. If this is or becomes a
+   problem for you then you are better off working in some type of UNIX-like
+   environment. One solution to this problem that will keep you working on a
+   Windows machine it to install Cygwin, a UNIX emulation environment for
+   Windows. A number of Bioperl users are using this approach successfully
+   and it is discussed in more detail below.
+
+Perl on Windows
+
+   There are a couple of ways of installing Perl on a Windows machine. The
+   most common and easiest is to get the most recent build from
+   ActiveState, a software company that provides free builds of Perl for
+   Windows users. The current (October 2006) build is ActivePerl 5.8.8.819.
+   Bioperl also works on Perl 5.6.x, but due to installation problems etc,
+   only ActivePerl 5.8.8.819 or later is supported for WinXP installation.
+   To install ActivePerl on Windows:
+
+           1) Download the ActivePerl MSI from
+           http://www.activestate.com/Products/ActivePerl/.
+
+           2) Run the ActivePerl Installer (accepting all defaults is fine).
+
+   You can also build Perl yourself (which requires a C compiler) or download
+   one of the other binary distributions. The Perl source for building it
+   yourself is available from CPAN, as are a few other binary
+   distributions that are alternatives to ActiveState. This approach is not
+   recommended unless you have specific reasons for doing so and know what
+   you're doing. If that's the case you probably don't need to be reading
+   this guide.
+
+   Cygwin is a UNIX emulation environment for Windows and comes with
+   its own copy of Perl.
+
+   Information on Cygwin and Bioperl is found below.
+
+Bioperl on Windows
+
+   Perl is a programming language that has been extended a lot by the
+   addition of external modules.
+
+   These modules work with the core language to extend the functionality of
+   Perl.
+
+   Bioperl is one such extension to Perl. These modular extensions to
+   Perl sometimes depend on the functionality of other Perl modules and this
+   creates a dependency. You can't install module X unless you have already
+   installed module Y. Some Perl modules are so fundamentally useful that the
+   Perl developers have included them in the core distribution of Perl - if
+   you've installed Perl then these modules are already installed. Other
+   modules are freely available from CPAN, but you'll have to install them
+   yourself if you want to use them. Bioperl has such dependencies.
+
+   Bioperl is actually a large collection of Perl modules (over 1000
+   currently) and these modules are split into seven packages. These seven
+   packages are:
+
+   +------------------------------------------------------------------------+
+   |    Bioperl Group     |                    Functions                    |
+   |----------------------+-------------------------------------------------|
+   |bioperl (the core)    |Most of the main functionality of Bioperl        |
+   |----------------------+-------------------------------------------------|
+   |bioperl-run           |Wrappers to a lot of external programs           |
+   |----------------------+-------------------------------------------------|
+   |bioperl-ext           |Interaction with some alignment functions and the|
+   |                      |Staden package                                   |
+   |----------------------+-------------------------------------------------|
+   |bioperl-db            |Using Bioperl with BioSQL and local relational   |
+   |                      |databases                                        |
+   |----------------------+-------------------------------------------------|
+   |bioperl-microarray    |Microarray specific functions                    |
+   |----------------------+-------------------------------------------------|
+   |bioperl-pedigree      |manipulating genotype, marker, and individual    |
+   |                      |data for linkage studies                         |
+   |----------------------+-------------------------------------------------|
+   |bioperl-gui           |Some preliminary work on a graphical user        |
+   |                      |interface to some Bioperl functions              |
+   +------------------------------------------------------------------------+
+
+   The Bioperl core is what most new users will want to start with. Bioperl
+   (the core) and the Perl modules that it depends on can be easily installed
+   with the perl package Manager PPM. PPM is an ActivePerl utility for
+   installing Perl modules on systems using ActivePerl. PPM will look online
+   (you have to be connected to the internet of course) for files (these
+   files end with .ppd) that tell it how to install the modules you want and
+   what other modules your new modules depends on. It will then download and
+   install your modules and all dependent modules for you.
+
+   These .ppd files are stored online in PPM repositories. ActiveState
+   maintains the largest PPM repository and when you installed ActivePerl PPM
+   was installed with directions for using the ActiveState repositories.
+   Unfortunately the ActiveState repositories are far from complete and other
+   ActivePerl users maintain their own PPM repositories to fill in the gaps.
+   Installing will require you to direct PPM to look in three new
+   repositories as detailed in Installation Guide.
+
+   Once PPM knows where to look for Bioperl and it's dependencies you simply
+   tell PPM to search for packages with a particular name, select those of
+   interest and then tell PPM to install the selected packages.
+
+Beyond the Core
+
+   You may find that you want some of the features of other Bioperl groups
+   like bioperl-run or bioperl-db. Currently, plans include setting up PPM
+   packages for installing these parts of Bioperl; check this by doing a
+   Bioperl search in PPM.  If these are not available, though, you can use
+   the following instructions for installing the other distributions.
+
+   For this you will need a Windows version of the program make
+   called nmake:
+
+   http://download.microsoft.com/download/vc15/Patch/1.52/W95/EN-US/Nmake15.exe
+
+   You will also want to have a willingness to experiment. You'll have to
+   read the installation documents for each component that you want to
+   install, and use nmake where the instructions call for make, like so:
+
+ perl Makefile.PL
+ nmake
+ nmake test
+ nmake install
+
+   'nmake test' will likely produce lots of warnings, many of these can be
+   safely ignored (these stem from the excessively paranoid '-w' flag in
+   ActivePerl). You will have to determine from the installation documents
+   what dependencies are required, and you will have to get them, read their
+   documentation and install them first. It is recommended that you look
+   through the PPM repositories for any modules before resorting to using
+   nmake as there isn't any guarantee modules built using nmake will work.
+   The details of this are beyond the scope of this guide. Read the
+   documentation. Search Google. Try your best, and if you get stuck consult
+   with others on the BioPerl mailing list.
+
+    Setting environment variables
+
+   Some modules and tools such as Bio::Tools::Run::StandAloneBlast and
+   clustal_w, require that environment variables are set; a few examples
+   are listed in the INSTALL document. Different versions of Windows utilize
+   different methods for setting these variables. NOTE: The instructions that
+   comes with the BLAST executables for setting up BLAST on Windows are
+   out-of-date. Go to the following web address for instructions on setting
+   up standalone BLAST for Windows:
+   http://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/pc_setup.html
+
+     * For Windows XP, go here. This does not require a reboot but all
+       active shells will not reflect any changes made to the environment.
+     * For older versions (Windows 95 to ME), generally editing the
+       C:\autoexec.bat file to add a variable works. This requires a reboot.
+       Here's an example:
+
+ set BLASTDB=C:\blast\data
+
+   For either case, you can check the variable this way:
+
+ C:\Documents and Settings\Administrator>echo %BLASTDB%
+ C:\blast\data
+
+   Some versions of Windows may have problems differentiating forward and
+   back slashes used for directories. In general, always use backslashes (\).
+   If something isn't working properly try reversing the slashes to see if it
+   helps.
+
+   For setting up Cygwin environment variables quirks, see an example
+   below.
+
+    Installing bioperl-db
+
+   bioperl-db now works for Windows w/o installing CygWin. This has
+   primarily been tested on WinXP using MySQL5, but it is expected that other
+   bioperl-db supported databases (PostgreSQL, Oracle) should work.
+
+   You will need Bioperl rel. 1.5.2, a relational database (I use MySQL5 here
+   as an example), and the Perl modules DBI and DBD::mysql, which
+   can be installed from PPM as desribed above (make sure the additional
+   repositories for Kobes and Bribes are added, they will have the latest
+   releases). Do NOT try using nmake with these modules as they will not
+   build correctly under Windows! The PPM builds, by Randy Kobes, have been
+   modified and tested specifically for Windows and ActivePerl.
+
+   NOTE: we plan on having a PPM for bioperl-db available along with the
+   regular bioperl 1.5.2 release PPM. We will post instructions at that
+   time on using PPM to install bioperl-db.
+
+   To begin, follow instructions detailed in the Installation Guide for
+   adding the three new repositories (Bioperl, Kobes and Bribes). Then
+   install the following packages:
+
+           1) DBI
+           2) DBD-mysql
+
+   The next step involves creating a database. The following steps are for
+   MySQL5:
+
+ >mysqladmin -u root -p create bioseqdb
+ Enter password: **********
+
+   The database needs to be loaded with the BioSQL schema, which can be
+   downloaded as a tarball here.
+
+ >mysql -u root -p bioseqdb < biosqldb-mysql.sql
+ Enter password: **********
+
+   Download bioperl-db from CVS. Use the following to install the
+   modules:
+
+ perl Makefile.PL
+ nmake
+
+   Now, for testing out bioperl-db, make a copy of the file
+   DBHarness.conf.example in the bioperl-db test subdirectory (bioperl-db\t).
+   Rename it to DBHarness.biosql.conf, and modify it for your database setup
+   (particularly the user, password, database name, and driver). Save the
+   file, change back to the main bioperl-db directory, and run 'nmake test'.
+   You may see lots of the following lines,
+
+ ....
+ Subroutine Bio::Annotation::Reference::(eq redefined at C:/Perl/lib/overload.pm line 25,
+     <GEN0> line 1.
+ Subroutine new redefined at C:\Perl\src\bioperl\bioperl-live/Bio\Annotation\Reference.pm line 80,
+     <GEN0> line 1.
+ ....
+
+   which can be safely ignored (again, these come from ActivePerl's paranoid
+   '-w' flag). All tests should pass. NOTE : tests should be run with
+   a clean database with the BiOSQL schema loaded, but w/o taxonomy loaded
+   (see below).
+
+   To install, run:
+
+ nmake install
+
+   It is recommended that you load the taxonomy database using the script
+   load_ncbi_taxonomy.pl included in biosql-schema\scripts. You will need to
+   download the latest taxonomy files. This can be accomplished using the
+   -download flag in load_ncbi_taxonomy.pl, but it will not 'untar' the file
+   correctly unless you have GNU tar present in your PATH (which most Windows
+   users will not have), thus causing the following error:
+
+ >load_ncbi_taxonomy.pl -download -driver mysql -dbname bioseqdb -dbuser root -dbpass **********
+ The system cannot find the path specified.
+ Loading NCBI taxon database in taxdata:
+         ... retrieving all taxon nodes in the database
+         ... reading in taxon nodes from nodes.dmp
+ Couldn't open data file taxdata/nodes.dmp: No such file or directory rollback ineffective with
+ AutoCommit enabled at C:\Perl\src\bioperl\biosql-schema\scripts\load_ncbi_taxonomy.pl line 818.
+ Rollback ineffective while AutoCommit is on at
+ C:\Perl\src\bioperl\biosql-schema\scripts\load_ncbi_taxonomy.pl line 818.
+ rollback failed: Rollback ineffective while AutoCommit is on
+
+   Use a file decompression utility like 7-Zip to 'untar' the files in
+   the folder (if using 7-Zip, this can be accomplished by right-clicking on
+   the file and using the option 'Extract here'). Rerun the script without
+   the -download flag to load the taxonomic information. Be patient, as this
+   can take quite a while:
+
+ >load_ncbi_taxonomy.pl -driver mysql -dbname bioseqdb -dbuser root -dbpass **********
+
+ Loading NCBI taxon database in taxdata:
+         ... retrieving all taxon nodes in the database
+         ... reading in taxon nodes from nodes.dmp
+         ... insert / update / delete taxon nodes
+         ... (committing nodes)
+         ... rebuilding nested set left/right values
+         ... reading in taxon names from names.dmp
+         ... deleting old taxon names
+         ... inserting new taxon names
+         ... cleaning up
+ Done.
+
+   Now, load the database with your sequences using the script
+   load_seqdatabase.pl, in bioperl-db's bioperl-db\script directory:
+
+ C:\Perl\src\bioperl\bioperl-db\scripts\biosql>load_seqdatabase.pl -drive mysql
+                               -dbname bioseqdb -dbuser root -dbpass **********
+ Loading NP_249092.gpt ...
+ Done.
+
+   You may see occasional errors depending on the sequence format, which is a
+   non-platform-related issue. Many of these are due to not having an updated
+   taxonomic database and may be rectified by updating the taxonomic
+   information as detailed in load_ncbi_taxonomy.pl's POD.
+
+   Thanks to Baohua Wang, who found the initial Windows-specific problem in
+   Bio::Root::Root that led to this fix, to Sendu Bala for fixing
+   Bug #1938, and to Hilmar Lapp for his input.
+
+Bioperl in Cygwin
+
+   Cygwin is a Unix emulator and shell environment available free at
+   http://www.cygwin.com. Bioperl v. 1.* supposedly runs well within Cygwin,
+   though the latest release has not been tested with Cygwin yet. Some
+   users claim that installation of Bioperl is easier within Cygwin than
+   within Windows, but these may be users with UNIX backgrounds. A note on
+   Cygwin: it doesn't write to your Registry, it doesn't alter your system or
+   your existing files in any way, it doesn't create partitions, it simply
+   creates a cygwin/ directory and writes all of its files to that directory.
+   To uninstall Cygwin just delete that directory.
+
+   One advantage of using Bioperl in Cygwin is that all the external modules
+   are available through CPAN - the same cannot be said of ActiveState's PPM
+   utility.
+
+   To get Bioperl running first install the basic Cygwin package as well as
+   the Cygwin perl, make, binutils, and gcc packages. Clicking the View
+   button in the upper right of the installer window enables you to see
+   details on the various packages. Then start up Cygwin and follow the
+   Bioperl installation instructions for UNIX in Bioperl's INSTALL file
+   (for example, THE BIOPERL BUNDLE and INSTALLING BIOPERL THE EASY WAY USING
+   CPAN).
+
+bioperl-db in Cygwin
+
+   This package is installed using the instructions contained in the package,
+   without modification. Since postgres is a package within Cygwin this is
+   probably the easiest of the 3 platforms supported in bioperl-db to
+   install (postgres, Mysql, Oracle).
+
+Cygwin tips
+
+   If you can, install Cygwin on a drive or partition that's
+   NTFS-formatted, not FAT32-formatted. When you install Cygwin on
+   a FAT32 partition you will not be able to set permissions and ownership
+   correctly. In most situations this probably won't make any difference but
+   there may be occasions where this is a problem.
+
+   If you're trying to use some application or resource outside of Cygwin
+   directory and you're having a problem remember that Cygwin's path syntax
+   may not be the correct one. Cygwin understands /home/jacky or
+   /cygdrive/e/cygwin/home/jacky (when referring to the E: drive) but the
+   external resource may want E:/cygwin/home/jacky. So your *rc files may end
+   up with paths written in these different syntaxes, depending.
+
+MySQL and DBD::mysql
+
+   You may want to install a relational database in order to use BioPerl
+   db, BioSQL or OBDA. The easiest way to install Mysql is to use
+   the Windows binaries available at http://www.mysql.com. Note that
+   Windows does not have sockets, so you need to force the Mysql connections
+   to use TCP/IP instead. Do this by using the -h, or host, option from the
+   command-line. Example:
+
+ >mysql -h 127.0.0.1 -u <user> -p<password> <database>
+
+   Alternatively you could install postgres instead of MySQL, postgres is
+   already a package in Cygwin.
+
+   One known issue is that DBD::mysql can be tricky to install in Cygwin
+   and this module is required for the bioperl-db, Biosql, and
+   bioperl-pipeline external packages. Fortunately there's some good
+   instructions online:
+
+     * Instructions included with DBD::mysql:
+     
+       http://search.cpan.org/src/JWIED/DBD-mysql-2.1025/INSTALL.html#windows/cygwin
+       
+     * Additional instructions if you run into any problems; this
+       information is more up-to-date, covers post-2.9 DBD::mysql quirks in
+       Cygwin.
+       
+       http://rage.against.org/installingdbdmysqlInCygwin
+
+Expat
+
+   Note that expat comes with Cygwin (it's used by the modules
+   XML::Parser and XML::SAX::ExpatXS, which are used by certain
+   Bioperl modules).
+
+Directory for temporary files
+
+   Set the environmental variable TMPDIR, programs like BLAST and
+   clustalw need a place to create temporary files. e.g.:
+
+ setenv TMPDIR e:/cygwin/tmp     # csh, tcsh
+ export TMPDIR=e:/cygwin/tmp    # sh, bash
+
+   This is not the syntax that Cygwin understands, which would be something
+   like /cygdrive/e/cygwin/tmp or /tmp, this is the syntax that a Windows
+   application expects.
+
+   If this variable is not set correctly you'll see errors like this when you
+   run Bio::Tools::Run::StandAloneBlast:
+
+   ------------- EXCEPTION: Bio::Root::Exception -------------
+   MSG: Could not open /tmp/gXkwEbrL0a: No such file or directory
+   STACK: Error::throw
+   ..........
+
+   [edit]
+
+BLAST
+
+   If you want use BLAST we recommend that the Windows binary be obtained
+   from NCBI (ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ - the
+   file will be named something like blast-2.2.13-ia32-win32.exe). Then
+   follow the Windows instructions in README.bls. You will also need to set
+   the BLASTDIR environment variable to reflect the directory which holds the
+   blast executable and data folder. You may also want to set other variables
+   to reflect the location of your databases and substitution matrices if
+   they differ from the location of your blast executables; see
+   Installing Bioperl for Unix for more details.
+
+Compiling C code
+
+   Although we've recommended using the BLAST and MySQL binaries
+   you should be able to compile just about everything else from source code
+   using Cygwin's gcc. You'll notice when you're installing Cygwin that many
+   different libraries are also available (gd, jpeg, etc.).

Added: trunk/packages/bioperl/branches/upstream/current/LICENSE
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/LICENSE	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/LICENSE	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,131 @@
+
+
+
+
+                         The "Artistic License"
+
+                                Preamble
+
+The intent of this document is to state the conditions under which a
+Package may be copied, such that the Copyright Holder maintains some
+semblance of artistic control over the development of the package,
+while giving the users of the package the right to use and distribute
+the Package in a more-or-less customary fashion, plus the right to make
+reasonable modifications.
+
+Definitions:
+
+        "Package" refers to the collection of files distributed by the
+        Copyright Holder, and derivatives of that collection of files
+        created through textual modification.
+
+        "Standard Version" refers to such a Package if it has not been
+        modified, or has been modified in accordance with the wishes
+        of the Copyright Holder as specified below.
+
+        "Copyright Holder" is whoever is named in the copyright or
+        copyrights for the package.
+
+        "You" is you, if you're thinking about copying or distributing
+        this Package.
+
+        "Reasonable copying fee" is whatever you can justify on the
+        basis of media cost, duplication charges, time of people involved,
+        and so on.  (You will not be required to justify it to the
+        Copyright Holder, but only to the computing community at large
+        as a market that must bear the fee.)
+
+        "Freely Available" means that no fee is charged for the item
+        itself, though there may be fees involved in handling the item.
+        It also means that recipients of the item may redistribute it
+        under the same conditions they received it.
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications
+derived from the Public Domain or from the Copyright Holder.  A Package
+modified in such a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided
+that you insert a prominent notice in each changed file stating how and
+when you changed that file, and provided that you do at least ONE of the
+following:
+
+    a) place your modifications in the Public Domain or otherwise make them
+    Freely Available, such as by posting said modifications to Usenet or
+    an equivalent medium, or placing the modifications on a major archive
+    site such as uunet.uu.net, or by allowing the Copyright Holder to include
+    your modifications in the Standard Version of the Package.
+
+    b) use the modified Package only within your corporation or organization.
+
+    c) rename any non-standard executables so the names do not conflict
+    with standard executables, which must also be provided, and provide
+    a separate manual page for each non-standard executable that clearly
+    documents how it differs from the Standard Version.
+
+    d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or
+executable form, provided that you do at least ONE of the following:
+
+    a) distribute a Standard Version of the executables and library files,
+    together with instructions (in the manual page or equivalent) on where
+    to get the Standard Version.
+
+    b) accompany the distribution with the machine-readable source of
+    the Package with your modifications.
+
+    c) give non-standard executables non-standard names, and clearly
+    document the differences in manual pages (or equivalent), together
+    with instructions on where to get the Standard Version.
+
+    d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package.  You may charge any fee you choose for support of this
+Package.  You may not charge a fee for this Package itself.  However,
+you may distribute this Package in aggregate with other (possibly
+commercial) programs as part of a larger (possibly commercial) software
+distribution provided that you do not advertise this Package as a
+product of your own.  You may embed this Package's interpreter within
+an executable of yours (by linking); this shall be construed as a mere
+form of aggregation, provided that the complete Standard Version of the
+interpreter is so embedded.
+
+6. The scripts and library files supplied as input to or produced as
+output from the programs of this Package do not automatically fall
+under the copyright of this Package, but belong to whoever generated
+them, and may be sold commercially, and may be aggregated with this
+Package.  If such scripts or library files are aggregated with this
+Package via the so-called "undump" or "unexec" methods of producing a
+binary executable image, then distribution of such an image shall
+neither be construed as a distribution of this Package nor shall it
+fall under the restrictions of Paragraphs 3 and 4, provided that you do
+not represent such an executable image as a Standard Version of this
+Package.
+
+7. C subroutines (or comparably compiled subroutines in other
+languages) supplied by you and linked into this Package in order to
+emulate subroutines and variables of the language defined by this
+Package shall not be considered part of this Package, but are the
+equivalent of input as in Paragraph 6, provided these subroutines do
+not change the language in any way that would cause it to fail the
+regression tests for the language.
+
+8. Aggregation of this Package with a commercial distribution is always
+permitted provided that the use of this Package is embedded; that is,
+when no overt attempt is made to make this Package's interfaces visible
+to the end user of the commercial distribution.  Such use shall not be
+construed as a distribution of this Package.
+
+9. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+                                The End

Added: trunk/packages/bioperl/branches/upstream/current/MANIFEST
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/MANIFEST	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/MANIFEST	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1771 @@
+AUTHORS
+Bio/Align/AlignI.pm
+Bio/Align/DNAStatistics.pm
+Bio/Align/PairwiseStatistics.pm
+Bio/Align/ProteinStatistics.pm
+Bio/Align/StatisticsI.pm
+Bio/Align/Utilities.pm
+Bio/AlignIO.pm
+Bio/AlignIO/bl2seq.pm
+Bio/AlignIO/clustalw.pm
+Bio/AlignIO/emboss.pm
+Bio/AlignIO/fasta.pm
+Bio/AlignIO/largemultifasta.pm
+Bio/AlignIO/maf.pm
+Bio/AlignIO/mase.pm
+Bio/AlignIO/mega.pm
+Bio/AlignIO/meme.pm
+Bio/AlignIO/metafasta.pm
+Bio/AlignIO/msf.pm
+Bio/AlignIO/nexus.pm
+Bio/AlignIO/pfam.pm
+Bio/AlignIO/phylip.pm
+Bio/AlignIO/po.pm
+Bio/AlignIO/prodom.pm
+Bio/AlignIO/psi.pm
+Bio/AlignIO/selex.pm
+Bio/AlignIO/stockholm.pm
+Bio/AnalysisI.pm
+Bio/AnalysisParserI.pm
+Bio/AnalysisResultI.pm
+Bio/AnnotatableI.pm
+Bio/Annotation/AnnotationFactory.pm
+Bio/Annotation/Collection.pm
+Bio/Annotation/Comment.pm
+Bio/Annotation/DBLink.pm
+Bio/Annotation/OntologyTerm.pm
+Bio/Annotation/Reference.pm
+Bio/Annotation/SimpleValue.pm
+Bio/Annotation/StructuredValue.pm
+Bio/Annotation/Target.pm
+Bio/Annotation/TypeManager.pm
+Bio/AnnotationCollectionI.pm
+Bio/AnnotationI.pm
+Bio/Assembly/Contig.pm
+Bio/Assembly/ContigAnalysis.pm
+Bio/Assembly/IO.pm
+Bio/Assembly/IO/ace.pm
+Bio/Assembly/IO/phrap.pm
+Bio/Assembly/Scaffold.pm
+Bio/Assembly/ScaffoldI.pm
+Bio/Assembly/Singlet.pm
+Bio/Biblio.pm
+Bio/Biblio/Article.pm
+Bio/Biblio/BiblioBase.pm
+Bio/Biblio/Book.pm
+Bio/Biblio/BookArticle.pm
+Bio/Biblio/IO.pm
+Bio/Biblio/IO/medline2ref.pm
+Bio/Biblio/IO/medlinexml.pm
+Bio/Biblio/IO/pubmed2ref.pm
+Bio/Biblio/IO/pubmedxml.pm
+Bio/Biblio/Journal.pm
+Bio/Biblio/JournalArticle.pm
+Bio/Biblio/MedlineArticle.pm
+Bio/Biblio/MedlineBook.pm
+Bio/Biblio/MedlineBookArticle.pm
+Bio/Biblio/MedlineJournal.pm
+Bio/Biblio/MedlineJournalArticle.pm
+Bio/Biblio/Organisation.pm
+Bio/Biblio/Patent.pm
+Bio/Biblio/Person.pm
+Bio/Biblio/Proceeding.pm
+Bio/Biblio/Provider.pm
+Bio/Biblio/PubmedArticle.pm
+Bio/Biblio/PubmedBookArticle.pm
+Bio/Biblio/PubmedJournalArticle.pm
+Bio/Biblio/Ref.pm
+Bio/Biblio/Service.pm
+Bio/Biblio/TechReport.pm
+Bio/Biblio/Thesis.pm
+Bio/Biblio/WebResource.pm
+Bio/Cluster/ClusterFactory.pm
+Bio/Cluster/FamilyI.pm
+Bio/Cluster/SequenceFamily.pm
+Bio/Cluster/UniGene.pm
+Bio/Cluster/UniGeneI.pm
+Bio/ClusterI.pm
+Bio/ClusterIO.pm
+Bio/ClusterIO/dbsnp.pm
+Bio/ClusterIO/unigene.pm
+Bio/CodonUsage/IO.pm
+Bio/CodonUsage/Table.pm
+Bio/Coordinate/Chain.pm
+Bio/Coordinate/Collection.pm
+Bio/Coordinate/ExtrapolatingPair.pm
+Bio/Coordinate/GeneMapper.pm
+Bio/Coordinate/Graph.pm
+Bio/Coordinate/MapperI.pm
+Bio/Coordinate/Pair.pm
+Bio/Coordinate/Result.pm
+Bio/Coordinate/Result/Gap.pm
+Bio/Coordinate/Result/Match.pm
+Bio/Coordinate/ResultI.pm
+Bio/Coordinate/Utils.pm
+Bio/Das/FeatureTypeI.pm
+Bio/Das/SegmentI.pm
+Bio/DasI.pm
+Bio/DB/Ace.pm
+Bio/DB/Biblio/biofetch.pm
+Bio/DB/Biblio/eutils.pm
+Bio/DB/Biblio/pdf.pm
+Bio/DB/Biblio/soap.pm
+Bio/DB/BiblioI.pm
+Bio/DB/BioFetch.pm
+Bio/DB/CUTG.pm
+Bio/DB/DBFetch.pm
+Bio/DB/EMBL.pm
+Bio/DB/EntrezGene.pm
+Bio/DB/EUtilities.pm
+Bio/DB/EUtilities/Cookie.pm
+Bio/DB/EUtilities/efetch.pm
+Bio/DB/EUtilities/egquery.pm
+Bio/DB/EUtilities/einfo.pm
+Bio/DB/EUtilities/elink.pm
+Bio/DB/EUtilities/ElinkData.pm
+Bio/DB/EUtilities/epost.pm
+Bio/DB/EUtilities/esearch.pm
+Bio/DB/EUtilities/esummary.pm
+Bio/DB/Expression.pm
+Bio/DB/Expression/geo.pm
+Bio/DB/Failover.pm
+Bio/DB/Fasta.pm
+Bio/DB/FileCache.pm
+Bio/DB/Flat.pm
+Bio/DB/Flat/BDB.pm
+Bio/DB/Flat/BDB/embl.pm
+Bio/DB/Flat/BDB/fasta.pm
+Bio/DB/Flat/BDB/genbank.pm
+Bio/DB/Flat/BDB/swiss.pm
+Bio/DB/Flat/BDB/swissprot.pm
+Bio/DB/Flat/BinarySearch.pm
+Bio/DB/GDB.pm
+Bio/DB/GenBank.pm
+Bio/DB/GenericWebDBI.pm
+Bio/DB/GenPept.pm
+Bio/DB/GFF.pm
+Bio/DB/GFF/Adaptor/ace.pm
+Bio/DB/GFF/Adaptor/berkeleydb.pm
+Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm
+Bio/DB/GFF/Adaptor/biofetch.pm
+Bio/DB/GFF/Adaptor/biofetch_oracle.pm
+Bio/DB/GFF/Adaptor/dbi.pm
+Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
+Bio/DB/GFF/Adaptor/dbi/iterator.pm
+Bio/DB/GFF/Adaptor/dbi/mysql.pm
+Bio/DB/GFF/Adaptor/dbi/mysqlace.pm
+Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm
+Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm
+Bio/DB/GFF/Adaptor/dbi/oracle.pm
+Bio/DB/GFF/Adaptor/dbi/oracleace.pm
+Bio/DB/GFF/Adaptor/dbi/pg.pm
+Bio/DB/GFF/Adaptor/dbi/pg_fts.pm
+Bio/DB/GFF/Adaptor/memory.pm
+Bio/DB/GFF/Adaptor/memory/feature_serializer.pm
+Bio/DB/GFF/Adaptor/memory/iterator.pm
+Bio/DB/GFF/Aggregator.pm
+Bio/DB/GFF/Aggregator/alignment.pm
+Bio/DB/GFF/Aggregator/clone.pm
+Bio/DB/GFF/Aggregator/coding.pm
+Bio/DB/GFF/Aggregator/match.pm
+Bio/DB/GFF/Aggregator/none.pm
+Bio/DB/GFF/Aggregator/processed_transcript.pm
+Bio/DB/GFF/Aggregator/so_transcript.pm
+Bio/DB/GFF/Aggregator/transcript.pm
+Bio/DB/GFF/Aggregator/ucsc_acembly.pm
+Bio/DB/GFF/Aggregator/ucsc_ensgene.pm
+Bio/DB/GFF/Aggregator/ucsc_genscan.pm
+Bio/DB/GFF/Aggregator/ucsc_refgene.pm
+Bio/DB/GFF/Aggregator/ucsc_sanger22.pm
+Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm
+Bio/DB/GFF/Aggregator/ucsc_softberry.pm
+Bio/DB/GFF/Aggregator/ucsc_twinscan.pm
+Bio/DB/GFF/Aggregator/ucsc_unigene.pm
+Bio/DB/GFF/Featname.pm
+Bio/DB/GFF/Feature.pm
+Bio/DB/GFF/Homol.pm
+Bio/DB/GFF/RelSegment.pm
+Bio/DB/GFF/Segment.pm
+Bio/DB/GFF/Typename.pm
+Bio/DB/GFF/Util/Binning.pm
+Bio/DB/GFF/Util/Rearrange.pm
+Bio/DB/InMemoryCache.pm
+Bio/DB/LocationI.pm
+Bio/DB/MeSH.pm
+Bio/DB/NCBIHelper.pm
+Bio/DB/Query/GenBank.pm
+Bio/DB/Query/WebQuery.pm
+Bio/DB/QueryI.pm
+Bio/DB/RandomAccessI.pm
+Bio/DB/ReferenceI.pm
+Bio/DB/RefSeq.pm
+Bio/DB/Registry.pm
+Bio/DB/SeqFeature.pm
+Bio/DB/SeqFeature/NormalizedFeature.pm
+Bio/DB/SeqFeature/NormalizedFeatureI.pm
+Bio/DB/SeqFeature/NormalizedTableFeatureI.pm
+Bio/DB/SeqFeature/Segment.pm
+Bio/DB/SeqFeature/Store.pm
+Bio/DB/SeqFeature/Store/bdb.pm
+Bio/DB/SeqFeature/Store/berkeleydb.pm
+Bio/DB/SeqFeature/Store/DBI/Iterator.pm
+Bio/DB/SeqFeature/Store/DBI/mysql.pm
+Bio/DB/SeqFeature/Store/GFF3Loader.pm
+Bio/DB/SeqFeature/Store/memory.pm
+Bio/DB/SeqHound.pm
+Bio/DB/SeqI.pm
+Bio/DB/SeqVersion.pm
+Bio/DB/SeqVersion/gi.pm
+Bio/DB/SwissProt.pm
+Bio/DB/Taxonomy.pm
+Bio/DB/Taxonomy/entrez.pm
+Bio/DB/Taxonomy/flatfile.pm
+Bio/DB/Taxonomy/list.pm
+Bio/DB/Universal.pm
+Bio/DB/UpdateableSeqI.pm
+Bio/DB/WebDBSeqI.pm
+Bio/DB/XEMBL.pm
+Bio/DB/XEMBLService.pm
+Bio/DBLinkContainerI.pm
+Bio/DescribableI.pm
+Bio/Event/EventGeneratorI.pm
+Bio/Event/EventHandlerI.pm
+Bio/Expression/Contact.pm
+Bio/Expression/DataSet.pm
+Bio/Expression/FeatureGroup.pm
+Bio/Expression/FeatureGroup/FeatureGroupMas50.pm
+Bio/Expression/FeatureI.pm
+Bio/Expression/FeatureSet/FeatureSetMas50.pm
+Bio/Expression/Platform.pm
+Bio/Expression/ProbeI.pm
+Bio/Expression/Sample.pm
+Bio/Factory/AnalysisI.pm
+Bio/Factory/ApplicationFactoryI.pm
+Bio/Factory/DriverFactory.pm
+Bio/Factory/FTLocationFactory.pm
+Bio/Factory/HitFactoryI.pm
+Bio/Factory/LocationFactoryI.pm
+Bio/Factory/MapFactoryI.pm
+Bio/Factory/ObjectBuilderI.pm
+Bio/Factory/ObjectFactory.pm
+Bio/Factory/ObjectFactoryI.pm
+Bio/Factory/ResultFactoryI.pm
+Bio/Factory/SeqAnalysisParserFactory.pm
+Bio/Factory/SeqAnalysisParserFactoryI.pm
+Bio/Factory/SequenceFactoryI.pm
+Bio/Factory/SequenceProcessorI.pm
+Bio/Factory/SequenceStreamI.pm
+Bio/Factory/TreeFactoryI.pm
+Bio/FeatureHolderI.pm
+Bio/FeatureIO.pm
+Bio/FeatureIO/bed.pm
+Bio/FeatureIO/gff.pm
+Bio/FeatureIO/gtf.pm
+Bio/FeatureIO/interpro.pm
+Bio/FeatureIO/ptt.pm
+Bio/Graph/Edge.pm
+Bio/Graph/IO.pm
+Bio/Graph/IO/dip.pm
+Bio/Graph/IO/psi_xml.pm
+Bio/Graph/ProteinGraph.pm
+Bio/Graph/SimpleGraph.pm
+Bio/Graph/SimpleGraph/Traversal.pm
+Bio/Graphics.pm
+Bio/Graphics/ConfiguratorI.pm
+Bio/Graphics/Feature.pm
+Bio/Graphics/FeatureBase.pm
+Bio/Graphics/FeatureFile.pm
+Bio/Graphics/FeatureFile/Iterator.pm
+Bio/Graphics/Glyph.pm
+Bio/Graphics/Glyph/alignment.pm
+Bio/Graphics/Glyph/anchored_arrow.pm
+Bio/Graphics/Glyph/arrow.pm
+Bio/Graphics/Glyph/box.pm
+Bio/Graphics/Glyph/broken_line.pm
+Bio/Graphics/Glyph/cds.pm
+Bio/Graphics/Glyph/christmas_arrow.pm
+Bio/Graphics/Glyph/crossbox.pm
+Bio/Graphics/Glyph/dashed_line.pm
+Bio/Graphics/Glyph/diamond.pm
+Bio/Graphics/Glyph/dna.pm
+Bio/Graphics/Glyph/dot.pm
+Bio/Graphics/Glyph/dumbbell.pm
+Bio/Graphics/Glyph/ellipse.pm
+Bio/Graphics/Glyph/ex.pm
+Bio/Graphics/Glyph/extending_arrow.pm
+Bio/Graphics/Glyph/Factory.pm
+Bio/Graphics/Glyph/flag.pm
+Bio/Graphics/Glyph/gene.pm
+Bio/Graphics/Glyph/generic.pm
+Bio/Graphics/Glyph/graded_segments.pm
+Bio/Graphics/Glyph/group.pm
+Bio/Graphics/Glyph/heterogeneous_segments.pm
+Bio/Graphics/Glyph/image.pm
+Bio/Graphics/Glyph/lightning.pm
+Bio/Graphics/Glyph/line.pm
+Bio/Graphics/Glyph/merge_parts.pm
+Bio/Graphics/Glyph/merged_alignment.pm
+Bio/Graphics/Glyph/minmax.pm
+Bio/Graphics/Glyph/oval.pm
+Bio/Graphics/Glyph/pentagram.pm
+Bio/Graphics/Glyph/pinsertion.pm
+Bio/Graphics/Glyph/primers.pm
+Bio/Graphics/Glyph/processed_transcript.pm
+Bio/Graphics/Glyph/protein.pm
+Bio/Graphics/Glyph/ragged_ends.pm
+Bio/Graphics/Glyph/redgreen_box.pm
+Bio/Graphics/Glyph/redgreen_segment.pm
+Bio/Graphics/Glyph/repeating_shape.pm
+Bio/Graphics/Glyph/rndrect.pm
+Bio/Graphics/Glyph/ruler_arrow.pm
+Bio/Graphics/Glyph/saw_teeth.pm
+Bio/Graphics/Glyph/segmented_keyglyph.pm
+Bio/Graphics/Glyph/segments.pm
+Bio/Graphics/Glyph/so_transcript.pm
+Bio/Graphics/Glyph/span.pm
+Bio/Graphics/Glyph/splice_site.pm
+Bio/Graphics/Glyph/text_in_box.pm
+Bio/Graphics/Glyph/three_letters.pm
+Bio/Graphics/Glyph/tic_tac_toe.pm
+Bio/Graphics/Glyph/toomany.pm
+Bio/Graphics/Glyph/track.pm
+Bio/Graphics/Glyph/transcript.pm
+Bio/Graphics/Glyph/transcript2.pm
+Bio/Graphics/Glyph/translation.pm
+Bio/Graphics/Glyph/triangle.pm
+Bio/Graphics/Glyph/two_bolts.pm
+Bio/Graphics/Glyph/wave.pm
+Bio/Graphics/Glyph/weighted_arrow.pm
+Bio/Graphics/Glyph/whiskerplot.pm
+Bio/Graphics/Glyph/xyplot.pm
+Bio/Graphics/Panel.pm
+Bio/Graphics/Pictogram.pm
+Bio/Graphics/RendererI.pm
+Bio/Graphics/Util.pm
+Bio/IdCollectionI.pm
+Bio/IdentifiableI.pm
+Bio/Index/Abstract.pm
+Bio/Index/AbstractSeq.pm
+Bio/Index/Blast.pm
+Bio/Index/EMBL.pm
+Bio/Index/Fasta.pm
+Bio/Index/Fastq.pm
+Bio/Index/GenBank.pm
+Bio/Index/Hmmer.pm
+Bio/Index/Qual.pm
+Bio/Index/SwissPfam.pm
+Bio/Index/Swissprot.pm
+Bio/LiveSeq/AARange.pm
+Bio/LiveSeq/Chain.pm
+Bio/LiveSeq/ChainI.pm
+Bio/LiveSeq/DNA.pm
+Bio/LiveSeq/Exon.pm
+Bio/LiveSeq/Gene.pm
+Bio/LiveSeq/Intron.pm
+Bio/LiveSeq/IO/BioPerl.pm
+Bio/LiveSeq/IO/Loader.pm
+Bio/LiveSeq/IO/README
+Bio/LiveSeq/Mutation.pm
+Bio/LiveSeq/Mutator.pm
+Bio/LiveSeq/Prim_Transcript.pm
+Bio/LiveSeq/Range.pm
+Bio/LiveSeq/Repeat_Region.pm
+Bio/LiveSeq/Repeat_Unit.pm
+Bio/LiveSeq/SeqI.pm
+Bio/LiveSeq/Transcript.pm
+Bio/LiveSeq/Translation.pm
+Bio/LocatableSeq.pm
+Bio/Location/Atomic.pm
+Bio/Location/AvWithinCoordPolicy.pm
+Bio/Location/CoordinatePolicyI.pm
+Bio/Location/Fuzzy.pm
+Bio/Location/FuzzyLocationI.pm
+Bio/Location/NarrowestCoordPolicy.pm
+Bio/Location/Simple.pm
+Bio/Location/Split.pm
+Bio/Location/SplitLocationI.pm
+Bio/Location/WidestCoordPolicy.pm
+Bio/LocationI.pm
+Bio/Map/Clone.pm
+Bio/Map/Contig.pm
+Bio/Map/CytoMap.pm
+Bio/Map/CytoMarker.pm
+Bio/Map/CytoPosition.pm
+Bio/Map/EntityI.pm
+Bio/Map/FPCMarker.pm
+Bio/Map/LinkageMap.pm
+Bio/Map/LinkagePosition.pm
+Bio/Map/MapI.pm
+Bio/Map/Mappable.pm
+Bio/Map/MappableI.pm
+Bio/Map/Marker.pm
+Bio/Map/MarkerI.pm
+Bio/Map/Microsatellite.pm
+Bio/Map/OrderedPosition.pm
+Bio/Map/OrderedPositionWithDistance.pm
+Bio/Map/Physical.pm
+Bio/Map/Position.pm
+Bio/Map/PositionHandler.pm
+Bio/Map/PositionHandlerI.pm
+Bio/Map/PositionI.pm
+Bio/Map/Relative.pm
+Bio/Map/RelativeI.pm
+Bio/Map/SimpleMap.pm
+Bio/MapIO.pm
+Bio/MapIO/fpc.pm
+Bio/MapIO/mapmaker.pm
+Bio/Matrix/Generic.pm
+Bio/Matrix/IO.pm
+Bio/Matrix/IO/phylip.pm
+Bio/Matrix/IO/scoring.pm
+Bio/Matrix/MatrixI.pm
+Bio/Matrix/PhylipDist.pm
+Bio/Matrix/PSM/InstanceSite.pm
+Bio/Matrix/PSM/InstanceSiteI.pm
+Bio/Matrix/PSM/IO.pm
+Bio/Matrix/PSM/IO/mast.pm
+Bio/Matrix/PSM/IO/masta.pm
+Bio/Matrix/PSM/IO/meme.pm
+Bio/Matrix/PSM/IO/psiblast.pm
+Bio/Matrix/PSM/IO/transfac.pm
+Bio/Matrix/PSM/ProtMatrix.pm
+Bio/Matrix/PSM/ProtPsm.pm
+Bio/Matrix/PSM/Psm.pm
+Bio/Matrix/PSM/PsmHeader.pm
+Bio/Matrix/PSM/PsmHeaderI.pm
+Bio/Matrix/PSM/PsmI.pm
+Bio/Matrix/PSM/SiteMatrix.pm
+Bio/Matrix/PSM/SiteMatrixI.pm
+Bio/Matrix/Scoring.pm
+Bio/Ontology/DocumentRegistry.pm
+Bio/Ontology/GOterm.pm
+Bio/Ontology/InterProTerm.pm
+Bio/Ontology/OBOEngine.pm
+Bio/Ontology/OBOterm.pm
+Bio/Ontology/Ontology.pm
+Bio/Ontology/OntologyEngineI.pm
+Bio/Ontology/OntologyI.pm
+Bio/Ontology/OntologyStore.pm
+Bio/Ontology/Path.pm
+Bio/Ontology/PathI.pm
+Bio/Ontology/Relationship.pm
+Bio/Ontology/RelationshipFactory.pm
+Bio/Ontology/RelationshipI.pm
+Bio/Ontology/RelationshipType.pm
+Bio/Ontology/SimpleGOEngine.pm
+Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm
+Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm
+Bio/Ontology/SimpleOntologyEngine.pm
+Bio/Ontology/Term.pm
+Bio/Ontology/TermFactory.pm
+Bio/Ontology/TermI.pm
+Bio/OntologyIO.pm
+Bio/OntologyIO/dagflat.pm
+Bio/OntologyIO/goflat.pm
+Bio/OntologyIO/Handlers/BaseSAXHandler.pm
+Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm
+Bio/OntologyIO/Handlers/InterProHandler.pm
+Bio/OntologyIO/InterProParser.pm
+Bio/OntologyIO/obo.pm
+Bio/OntologyIO/simplehierarchy.pm
+Bio/OntologyIO/soflat.pm
+Bio/Perl.pm
+Bio/Phenotype/Correlate.pm
+Bio/Phenotype/Measure.pm
+Bio/Phenotype/MeSH/Term.pm
+Bio/Phenotype/MeSH/Twig.pm
+Bio/Phenotype/OMIM/MiniMIMentry.pm
+Bio/Phenotype/OMIM/OMIMentry.pm
+Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm
+Bio/Phenotype/OMIM/OMIMparser.pm
+Bio/Phenotype/Phenotype.pm
+Bio/Phenotype/PhenotypeI.pm
+Bio/PopGen/Genotype.pm
+Bio/PopGen/GenotypeI.pm
+Bio/PopGen/HtSNP.pm
+Bio/PopGen/Individual.pm
+Bio/PopGen/IndividualI.pm
+Bio/PopGen/IO.pm
+Bio/PopGen/IO/csv.pm
+Bio/PopGen/IO/hapmap.pm
+Bio/PopGen/IO/phase.pm
+Bio/PopGen/IO/prettybase.pm
+Bio/PopGen/Marker.pm
+Bio/PopGen/MarkerI.pm
+Bio/PopGen/PopStats.pm
+Bio/PopGen/Population.pm
+Bio/PopGen/PopulationI.pm
+Bio/PopGen/Simulation/Coalescent.pm
+Bio/PopGen/Simulation/GeneticDrift.pm
+Bio/PopGen/Statistics.pm
+Bio/PopGen/TagHaplotype.pm
+Bio/PopGen/Utilities.pm
+Bio/PrimarySeq.pm
+Bio/PrimarySeqI.pm
+Bio/PullParserI.pm
+Bio/Range.pm
+Bio/RangeI.pm
+Bio/Restriction/Analysis.pm
+Bio/Restriction/Enzyme.pm
+Bio/Restriction/Enzyme/MultiCut.pm
+Bio/Restriction/Enzyme/MultiSite.pm
+Bio/Restriction/EnzymeCollection.pm
+Bio/Restriction/EnzymeI.pm
+Bio/Restriction/IO.pm
+Bio/Restriction/IO/bairoch.pm
+Bio/Restriction/IO/base.pm
+Bio/Restriction/IO/itype2.pm
+Bio/Restriction/IO/withrefm.pm
+Bio/Root/Exception.pm
+Bio/Root/HTTPget.pm
+Bio/Root/IO.pm
+Bio/Root/Root.pm
+Bio/Root/RootI.pm
+Bio/Root/Storable.pm
+Bio/Root/Version.pm
+Bio/Search/BlastStatistics.pm
+Bio/Search/BlastUtils.pm
+Bio/Search/DatabaseI.pm
+Bio/Search/GenericDatabase.pm
+Bio/Search/GenericStatistics.pm
+Bio/Search/Hit/BlastHit.pm
+Bio/Search/Hit/Fasta.pm
+Bio/Search/Hit/GenericHit.pm
+Bio/Search/Hit/HitFactory.pm
+Bio/Search/Hit/HitI.pm
+Bio/Search/Hit/HMMERHit.pm
+Bio/Search/Hit/HmmpfamHit.pm
+Bio/Search/Hit/PsiBlastHit.pm
+Bio/Search/Hit/PullHitI.pm
+Bio/Search/HSP/BlastHSP.pm
+Bio/Search/HSP/FastaHSP.pm
+Bio/Search/HSP/GenericHSP.pm
+Bio/Search/HSP/HMMERHSP.pm
+Bio/Search/HSP/HmmpfamHSP.pm
+Bio/Search/HSP/HSPFactory.pm
+Bio/Search/HSP/HSPI.pm
+Bio/Search/HSP/PsiBlastHSP.pm
+Bio/Search/HSP/PSLHSP.pm
+Bio/Search/HSP/PullHSPI.pm
+Bio/Search/HSP/WABAHSP.pm
+Bio/Search/Iteration/GenericIteration.pm
+Bio/Search/Iteration/IterationI.pm
+Bio/Search/Processor.pm
+Bio/Search/Result/BlastResult.pm
+Bio/Search/Result/GenericResult.pm
+Bio/Search/Result/HMMERResult.pm
+Bio/Search/Result/HmmpfamResult.pm
+Bio/Search/Result/PullResultI.pm
+Bio/Search/Result/ResultFactory.pm
+Bio/Search/Result/ResultI.pm
+Bio/Search/Result/WABAResult.pm
+Bio/Search/SearchUtils.pm
+Bio/Search/StatisticsI.pm
+Bio/SearchDist.pm
+Bio/SearchIO.pm
+Bio/SearchIO/axt.pm
+Bio/SearchIO/blast.pm
+Bio/SearchIO/blasttable.pm
+Bio/SearchIO/blastxml.pm
+Bio/SearchIO/EventHandlerI.pm
+Bio/SearchIO/exonerate.pm
+Bio/SearchIO/fasta.pm
+Bio/SearchIO/FastHitEventBuilder.pm
+Bio/SearchIO/hmmer.pm
+Bio/SearchIO/hmmer_pull.pm
+Bio/SearchIO/IteratedSearchResultEventBuilder.pm
+Bio/SearchIO/megablast.pm
+Bio/SearchIO/psl.pm
+Bio/SearchIO/SearchResultEventBuilder.pm
+Bio/SearchIO/SearchWriterI.pm
+Bio/SearchIO/sim4.pm
+Bio/SearchIO/waba.pm
+Bio/SearchIO/wise.pm
+Bio/SearchIO/Writer/BSMLResultWriter.pm
+Bio/SearchIO/Writer/GbrowseGFF.pm
+Bio/SearchIO/Writer/HitTableWriter.pm
+Bio/SearchIO/Writer/HSPTableWriter.pm
+Bio/SearchIO/Writer/HTMLResultWriter.pm
+Bio/SearchIO/Writer/ResultTableWriter.pm
+Bio/SearchIO/Writer/TextResultWriter.pm
+Bio/Seq.pm
+Bio/Seq/BaseSeqProcessor.pm
+Bio/Seq/EncodedSeq.pm
+Bio/Seq/LargeLocatableSeq.pm
+Bio/Seq/LargePrimarySeq.pm
+Bio/Seq/LargeSeq.pm
+Bio/Seq/LargeSeqI.pm
+Bio/Seq/Meta.pm
+Bio/Seq/Meta/Array.pm
+Bio/Seq/MetaI.pm
+Bio/Seq/PrimaryQual.pm
+Bio/Seq/PrimedSeq.pm
+Bio/Seq/QualI.pm
+Bio/Seq/Quality.pm
+Bio/Seq/RichSeq.pm
+Bio/Seq/RichSeqI.pm
+Bio/Seq/SeqBuilder.pm
+Bio/Seq/SeqFactory.pm
+Bio/Seq/SeqFastaSpeedFactory.pm
+Bio/Seq/SequenceTrace.pm
+Bio/Seq/SeqWithQuality.pm
+Bio/Seq/TraceI.pm
+Bio/SeqAnalysisParserI.pm
+Bio/SeqFeature/Annotated.pm
+Bio/SeqFeature/AnnotationAdaptor.pm
+Bio/SeqFeature/Collection.pm
+Bio/SeqFeature/CollectionI.pm
+Bio/SeqFeature/Computation.pm
+Bio/SeqFeature/FeaturePair.pm
+Bio/SeqFeature/Gene/Exon.pm
+Bio/SeqFeature/Gene/ExonI.pm
+Bio/SeqFeature/Gene/GeneStructure.pm
+Bio/SeqFeature/Gene/GeneStructureI.pm
+Bio/SeqFeature/Gene/Intron.pm
+Bio/SeqFeature/Gene/NC_Feature.pm
+Bio/SeqFeature/Gene/Poly_A_site.pm
+Bio/SeqFeature/Gene/Promoter.pm
+Bio/SeqFeature/Gene/Transcript.pm
+Bio/SeqFeature/Gene/TranscriptI.pm
+Bio/SeqFeature/Gene/UTR.pm
+Bio/SeqFeature/Generic.pm
+Bio/SeqFeature/PositionProxy.pm
+Bio/SeqFeature/Primer.pm
+Bio/SeqFeature/Similarity.pm
+Bio/SeqFeature/SimilarityPair.pm
+Bio/SeqFeature/SiRNA/Oligo.pm
+Bio/SeqFeature/SiRNA/Pair.pm
+Bio/SeqFeature/Tools/FeatureNamer.pm
+Bio/SeqFeature/Tools/IDHandler.pm
+Bio/SeqFeature/Tools/TypeMapper.pm
+Bio/SeqFeature/Tools/Unflattener.pm
+Bio/SeqFeature/TypedSeqFeatureI.pm
+Bio/SeqFeatureI.pm
+Bio/SeqI.pm
+Bio/SeqIO.pm
+Bio/SeqIO/abi.pm
+Bio/SeqIO/ace.pm
+Bio/SeqIO/agave.pm
+Bio/SeqIO/alf.pm
+Bio/SeqIO/asciitree.pm
+Bio/SeqIO/bsml.pm
+Bio/SeqIO/bsml_sax.pm
+Bio/SeqIO/chadoxml.pm
+Bio/SeqIO/chaos.pm
+Bio/SeqIO/chaosxml.pm
+Bio/SeqIO/ctf.pm
+Bio/SeqIO/embl.pm
+Bio/SeqIO/entrezgene.pm
+Bio/SeqIO/excel.pm
+Bio/SeqIO/exp.pm
+Bio/SeqIO/fasta.pm
+Bio/SeqIO/fastq.pm
+Bio/SeqIO/FTHelper.pm
+Bio/SeqIO/game.pm
+Bio/SeqIO/game/featHandler.pm
+Bio/SeqIO/game/gameHandler.pm
+Bio/SeqIO/game/gameSubs.pm
+Bio/SeqIO/game/gameWriter.pm
+Bio/SeqIO/game/seqHandler.pm
+Bio/SeqIO/gcg.pm
+Bio/SeqIO/genbank.pm
+Bio/SeqIO/interpro.pm
+Bio/SeqIO/kegg.pm
+Bio/SeqIO/largefasta.pm
+Bio/SeqIO/lasergene.pm
+Bio/SeqIO/locuslink.pm
+Bio/SeqIO/metafasta.pm
+Bio/SeqIO/MultiFile.pm
+Bio/SeqIO/phd.pm
+Bio/SeqIO/pir.pm
+Bio/SeqIO/pln.pm
+Bio/SeqIO/qual.pm
+Bio/SeqIO/raw.pm
+Bio/SeqIO/scf.pm
+Bio/SeqIO/strider.pm
+Bio/SeqIO/swiss.pm
+Bio/SeqIO/tab.pm
+Bio/SeqIO/table.pm
+Bio/SeqIO/tigr.pm
+Bio/SeqIO/tigrxml.pm
+Bio/SeqIO/tinyseq.pm
+Bio/SeqIO/tinyseq/tinyseqHandler.pm
+Bio/SeqIO/ztr.pm
+Bio/SeqUtils.pm
+Bio/SimpleAlign.pm
+Bio/SimpleAnalysisI.pm
+Bio/Species.pm
+Bio/Structure/Atom.pm
+Bio/Structure/Chain.pm
+Bio/Structure/Entry.pm
+Bio/Structure/IO.pm
+Bio/Structure/IO/pdb.pm
+Bio/Structure/Model.pm
+Bio/Structure/Residue.pm
+Bio/Structure/SecStr/DSSP/Res.pm
+Bio/Structure/SecStr/STRIDE/Res.pm
+Bio/Structure/StructureI.pm
+Bio/Symbol/Alphabet.pm
+Bio/Symbol/AlphabetI.pm
+Bio/Symbol/DNAAlphabet.pm
+Bio/Symbol/ProteinAlphabet.pm
+Bio/Symbol/README.Symbol
+Bio/Symbol/Symbol.pm
+Bio/Symbol/SymbolI.pm
+Bio/Taxon.pm
+Bio/Taxonomy.pm
+Bio/Taxonomy/FactoryI.pm
+Bio/Taxonomy/Node.pm
+Bio/Taxonomy/Taxon.pm
+Bio/Taxonomy/Tree.pm
+Bio/Tools/AlignFactory.pm
+Bio/Tools/Alignment/Consed.pm
+Bio/Tools/Alignment/Trim.pm
+Bio/Tools/Analysis/DNA/ESEfinder.pm
+Bio/Tools/Analysis/Protein/Domcut.pm
+Bio/Tools/Analysis/Protein/ELM.pm
+Bio/Tools/Analysis/Protein/GOR4.pm
+Bio/Tools/Analysis/Protein/HNN.pm
+Bio/Tools/Analysis/Protein/Mitoprot.pm
+Bio/Tools/Analysis/Protein/NetPhos.pm
+Bio/Tools/Analysis/Protein/Scansite.pm
+Bio/Tools/Analysis/Protein/Sopma.pm
+Bio/Tools/Analysis/SimpleAnalysisBase.pm
+Bio/Tools/AnalysisResult.pm
+Bio/Tools/Blat.pm
+Bio/Tools/BPbl2seq.pm
+Bio/Tools/BPlite.pm
+Bio/Tools/BPlite/HSP.pm
+Bio/Tools/BPlite/Iteration.pm
+Bio/Tools/BPlite/Sbjct.pm
+Bio/Tools/BPpsilite.pm
+Bio/Tools/CodonTable.pm
+Bio/Tools/Coil.pm
+Bio/Tools/dpAlign.pm
+Bio/Tools/ECnumber.pm
+Bio/Tools/EMBOSS/Palindrome.pm
+Bio/Tools/EPCR.pm
+Bio/Tools/Eponine.pm
+Bio/Tools/ERPIN.pm
+Bio/Tools/Est2Genome.pm
+Bio/Tools/ESTScan.pm
+Bio/Tools/Fgenesh.pm
+Bio/Tools/FootPrinter.pm
+Bio/Tools/Gel.pm
+Bio/Tools/Geneid.pm
+Bio/Tools/Genemark.pm
+Bio/Tools/Genewise.pm
+Bio/Tools/Genomewise.pm
+Bio/Tools/Genscan.pm
+Bio/Tools/GFF.pm
+Bio/Tools/Glimmer.pm
+Bio/Tools/Grail.pm
+Bio/Tools/GuessSeqFormat.pm
+Bio/Tools/HMM.pm
+Bio/Tools/HMMER/Domain.pm
+Bio/Tools/HMMER/Results.pm
+Bio/Tools/HMMER/Set.pm
+Bio/Tools/Hmmpfam.pm
+Bio/Tools/ipcress.pm
+Bio/Tools/isPcr.pm
+Bio/Tools/IUPAC.pm
+Bio/Tools/Lucy.pm
+Bio/Tools/MZEF.pm
+Bio/Tools/OddCodes.pm
+Bio/Tools/Phylo/Molphy.pm
+Bio/Tools/Phylo/Molphy/Result.pm
+Bio/Tools/Phylo/PAML.pm
+Bio/Tools/Phylo/PAML/ModelResult.pm
+Bio/Tools/Phylo/PAML/Result.pm
+Bio/Tools/Phylo/Phylip/ProtDist.pm
+Bio/Tools/pICalculator.pm
+Bio/Tools/Prediction/Exon.pm
+Bio/Tools/Prediction/Gene.pm
+Bio/Tools/Primer/Assessor/Base.pm
+Bio/Tools/Primer/AssessorI.pm
+Bio/Tools/Primer/Feature.pm
+Bio/Tools/Primer/Pair.pm
+Bio/Tools/Primer3.pm
+Bio/Tools/Prints.pm
+Bio/Tools/Profile.pm
+Bio/Tools/Promoterwise.pm
+Bio/Tools/PrositeScan.pm
+Bio/Tools/Pseudowise.pm
+Bio/Tools/pSW.pm
+Bio/Tools/QRNA.pm
+Bio/Tools/RandomDistFunctions.pm
+Bio/Tools/RepeatMasker.pm
+Bio/Tools/RestrictionEnzyme.pm
+Bio/Tools/RNAMotif.pm
+Bio/Tools/Run/GenericParameters.pm
+Bio/Tools/Run/ParametersI.pm
+Bio/Tools/Run/README
+Bio/Tools/Run/RemoteBlast.pm
+Bio/Tools/Run/StandAloneBlast.pm
+Bio/Tools/Run/WrapperBase.pm
+Bio/Tools/Seg.pm
+Bio/Tools/SeqPattern.pm
+Bio/Tools/SeqStats.pm
+Bio/Tools/SeqWords.pm
+Bio/Tools/Sigcleave.pm
+Bio/Tools/Signalp.pm
+Bio/Tools/Sim4/Exon.pm
+Bio/Tools/Sim4/Results.pm
+Bio/Tools/SiRNA.pm
+Bio/Tools/SiRNA/Ruleset/saigo.pm
+Bio/Tools/SiRNA/Ruleset/tuschl.pm
+Bio/Tools/Spidey/Exon.pm
+Bio/Tools/Spidey/Results.pm
+Bio/Tools/Tmhmm.pm
+Bio/Tools/tRNAscanSE.pm
+Bio/Tree/AlleleNode.pm
+Bio/Tree/Compatible.pm
+Bio/Tree/DistanceFactory.pm
+Bio/Tree/Draw/Cladogram.pm
+Bio/Tree/Node.pm
+Bio/Tree/NodeI.pm
+Bio/Tree/NodeNHX.pm
+Bio/Tree/RandomFactory.pm
+Bio/Tree/Statistics.pm
+Bio/Tree/Tree.pm
+Bio/Tree/TreeFunctionsI.pm
+Bio/Tree/TreeI.pm
+Bio/TreeIO.pm
+Bio/TreeIO/cluster.pm
+Bio/TreeIO/lintree.pm
+Bio/TreeIO/newick.pm
+Bio/TreeIO/nexus.pm
+Bio/TreeIO/nhx.pm
+Bio/TreeIO/pag.pm
+Bio/TreeIO/svggraph.pm
+Bio/TreeIO/tabtree.pm
+Bio/TreeIO/TreeEventBuilder.pm
+Bio/UpdateableSeqI.pm
+Bio/Variation/AAChange.pm
+Bio/Variation/AAReverseMutate.pm
+Bio/Variation/Allele.pm
+Bio/Variation/DNAMutation.pm
+Bio/Variation/IO.pm
+Bio/Variation/IO/flat.pm
+Bio/Variation/IO/xml.pm
+Bio/Variation/README
+Bio/Variation/RNAChange.pm
+Bio/Variation/SeqDiff.pm
+Bio/Variation/SNP.pm
+Bio/Variation/VariantI.pm
+Bio/WebAgent.pm
+BUGS
+Build.PL
+Changes
+DEPENDENCIES
+DEPRECATED
+doc/Deobfuscator/bin/deob_index.pl
+doc/Deobfuscator/Build.PL
+doc/Deobfuscator/cgi-bin/deob_detail.cgi
+doc/Deobfuscator/cgi-bin/deob_flowchart.png
+doc/Deobfuscator/cgi-bin/deob_help.html
+doc/Deobfuscator/cgi-bin/deob_interface.cgi
+doc/Deobfuscator/Changes
+doc/Deobfuscator/lib/Deobfuscator.pm
+doc/Deobfuscator/LICENSE
+doc/Deobfuscator/Makefile.PL
+doc/Deobfuscator/MANIFEST
+doc/Deobfuscator/META.yml
+doc/Deobfuscator/README
+doc/Deobfuscator/t/00.load.t
+doc/Deobfuscator/t/pod.t
+doc/makedoc.PL
+doc/README
+examples/align/align_on_codons.pl
+examples/align/aligntutorial.pl
+examples/align/clustalw.pl
+examples/align/simplealign.pl
+examples/biblio/biblio-eutils-example.pl
+examples/biblio/biblio-soap-example.pl
+examples/biblio/biblio_soap.pl
+examples/Bio-DB-GFF/load_ucsc.pl
+examples/biographics/all_glyphs.pl
+examples/biographics/dynamic_glyphs.pl
+examples/biographics/feature_data.gff
+examples/biographics/feature_data.txt
+examples/biographics/lots_of_glyphs.pl
+examples/biographics/render_sequence.pl
+examples/bioperl.pl
+examples/cluster/dbsnp.pl
+examples/contributed/nmrpdb_parse.pl
+examples/contributed/prosite2perl.pl
+examples/contributed/rebase2list.pl
+examples/db/dbfetch
+examples/db/est_tissue_query.pl
+examples/db/gb2features.pl
+examples/db/get_seqs.pl
+examples/db/getGenBank.pl
+examples/db/rfetch.pl
+examples/db/use_registry.pl
+examples/generate_random_seq.pl
+examples/liveseq/change_gene.pl
+examples/longorf.pl
+examples/make_primers.pl
+examples/popgen/parse_calc_stats.pl
+examples/rev_and_trans.pl
+examples/revcom_dir.pl
+examples/root/exceptions1.pl
+examples/root/exceptions2.pl
+examples/root/exceptions3.pl
+examples/root/exceptions4.pl
+examples/root/lib/Bio/PrimarySeq.pm
+examples/root/lib/Bio/PrimarySeqI.pm
+examples/root/lib/Bio/Seq.pm
+examples/root/lib/Bio/SeqI.pm
+examples/root/lib/TestInterface.pm
+examples/root/lib/TestObject.pm
+examples/root/README
+examples/searchio/blast_example.pl
+examples/searchio/custom_writer.pl
+examples/searchio/hitwriter.pl
+examples/searchio/hspwriter.pl
+examples/searchio/htmlwriter.pl
+examples/searchio/psiblast_features.pl
+examples/searchio/psiblast_iterations.pl
+examples/searchio/rawwriter.pl
+examples/searchio/resultwriter.pl
+examples/searchio/waba2gff.pl
+examples/searchio/waba2gff3.pl
+examples/sirna/rnai_finder.cgi
+examples/sirna/TAG
+examples/structure/structure-io.pl
+examples/subsequence.cgi
+examples/tk/gsequence.pl
+examples/tk/hitdisplay.pl
+examples/tools/extract_genes.pl
+examples/tools/gb_to_gff.pl
+examples/tools/gff2ps.pl
+examples/tools/parse_codeml.pl
+examples/tools/psw.pl
+examples/tools/run_genscan.pl
+examples/tools/run_primer3.pl
+examples/tools/seq_pattern.pl
+examples/tools/standaloneblast.pl
+examples/tree/paup2phylip.pl
+INSTALL
+INSTALL.WIN
+LICENSE
+maintenance/authors.pl
+maintenance/check_NAME.pl
+maintenance/check_URLs.pl
+maintenance/modules.pl
+maintenance/ncbi_blast_switches.pl
+maintenance/pod.pl
+maintenance/README
+maintenance/symlink_script.pl
+maintenance/symlink_scripts.PLS
+maintenance/version.pl
+Makefile.PL
+MANIFEST			This list of files
+META.yml
+models/biblio.dia
+models/bio_liveseq_variation.dia
+models/bio_map.dia
+models/bio_restriction.dia
+models/bioperl.dia
+models/coordinatemapper.dia
+models/map_proposal.txt
+models/maps_and_markers.dia
+models/popgen.dia
+models/population_proposal.txt
+models/README
+ModuleBuildBioperl.pm
+PLATFORMS
+README
+scripts/biblio/biblio.PLS
+scripts/biblio/TAG
+scripts/Bio-DB-GFF/bp_genbank2gff.PLS
+scripts/Bio-DB-GFF/bulk_load_gff.PLS
+scripts/Bio-DB-GFF/fast_load_gff.PLS
+scripts/Bio-DB-GFF/genbank2gff3.PLS
+scripts/Bio-DB-GFF/generate_histogram.PLS
+scripts/Bio-DB-GFF/load_gff.PLS
+scripts/Bio-DB-GFF/meta_gff.PLS
+scripts/Bio-DB-GFF/process_gadfly.PLS
+scripts/Bio-DB-GFF/process_sgd.PLS
+scripts/Bio-DB-GFF/process_wormbase.PLS
+scripts/Bio-DB-GFF/README
+scripts/Bio-DB-GFF/TAG
+scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS
+scripts/biographics/bp_embl2picture.PLS
+scripts/biographics/bp_glyphs1-demo.PLS
+scripts/biographics/bp_glyphs2-demo.PLS
+scripts/das/README
+scripts/das/TAG
+scripts/DB/biofetch_genbank_proxy.PLS
+scripts/DB/bioflat_index.PLS
+scripts/DB/biogetseq.PLS
+scripts/DB/flanks.PLS
+scripts/DB/TAG
+scripts/graphics/contig_draw.PLS
+scripts/graphics/feature_draw.PLS
+scripts/graphics/frend.PLS
+scripts/graphics/README
+scripts/graphics/search_overview.PLS
+scripts/graphics/TAG
+scripts/index/bp_fetch.PLS
+scripts/index/bp_index.PLS
+scripts/index/bp_seqret.PLS
+scripts/index/TAG
+scripts/popgen/composite_LD.PLS
+scripts/popgen/heterogeneity_test.PLS
+scripts/README
+scripts/searchio/fastam9_to_table.PLS
+scripts/searchio/filter_search.PLS
+scripts/searchio/hmmer_to_table.PLS
+scripts/searchio/parse_hmmsearch.PLS
+scripts/searchio/README
+scripts/searchio/search2table.PLS
+scripts/searchio/TAG
+scripts/seq/extract_feature_seq.PLS
+scripts/seq/make_mrna_protein.PLS
+scripts/seq/seqconvert.PLS
+scripts/seq/split_seq.PLS
+scripts/seq/TAG
+scripts/seq/translate_seq.PLS
+scripts/seq/unflatten_seq.PLS
+scripts/seqstats/aacomp.PLS
+scripts/seqstats/chaos_plot.PLS
+scripts/seqstats/gccalc.PLS
+scripts/seqstats/oligo_count.PLS
+scripts/seqstats/TAG
+scripts/taxa/classify_hits_kingdom.PLS
+scripts/taxa/local_taxonomydb_query.PLS
+scripts/taxa/query_entrez_taxa.PLS
+scripts/taxa/TAG
+scripts/taxa/taxid4species.PLS
+scripts/taxa/taxonomy2tree.PLS
+scripts/tree/blast2tree.PLS
+scripts/tree/nexus2nh.PLS
+scripts/tree/TAG
+scripts/tree/tree2pag.PLS
+scripts/utilities/bp_mrtrans.PLS
+scripts/utilities/bp_nrdb.PLS
+scripts/utilities/bp_sreformat.PLS
+scripts/utilities/dbsplit.PLS
+scripts/utilities/mask_by_search.PLS
+scripts/utilities/mutate.PLS
+scripts/utilities/pairwise_kaks.PLS
+scripts/utilities/README
+scripts/utilities/remote_blast.PLS
+scripts/utilities/search2alnblocks.PLS
+scripts/utilities/search2BSML.PLS
+scripts/utilities/search2gff.PLS
+scripts/utilities/search2tribe.PLS
+scripts/utilities/seq_length.PLS
+scripts/utilities/TAG
+t/AAChange.t
+t/AAReverseMutate.t
+t/abi.t
+t/ace.t
+t/AlignIO.t
+t/AlignStats.t
+t/AlignUtil.t
+t/alignUtilities.t
+t/Allele.t
+t/Alphabet.t
+t/Annotation.t
+t/AnnotationAdaptor.t
+t/asciitree.t
+t/Assembly.t
+t/Biblio.t
+t/Biblio_biofetch.t
+t/Biblio_eutils.t
+t/BiblioReferences.t
+t/BioDBGFF.t
+t/BioDBSeqFeature.t
+t/BioFetch_DB.t
+t/BioGraphics.t
+t/BlastIndex.t
+t/bsml_sax.t
+t/Chain.t
+t/chaosxml.t
+t/cigarstring.t
+t/ClusterIO.t
+t/Coalescent.t
+t/CodonTable.t
+t/Compatible.t
+t/consed.t
+t/CoordinateGraph.t
+t/CoordinateMapper.t
+t/Correlate.t
+t/ctf.t
+t/CytoMap.t
+t/data/1A11.pdb
+t/data/1A3I.pdb
+t/data/1BPT.pdb
+t/data/503384.MEGABLAST.0
+t/data/503384.MEGABLAST.2
+t/data/5X_1895.FASTXY
+t/data/8HVP.pdb
+t/data/a_thaliana.blastn
+t/data/AAC12660.fa
+t/data/aaml.mlc
+t/data/aaml_pairwise.mlc
+t/data/AB077698.gb
+t/data/acefile.ace.1
+t/data/acefile.singlets
+t/data/adh.mb_tree.nexus
+t/data/AE003528_ecoli.bls
+t/data/AE003644_Adh-genomic.gb
+t/data/AF032047.gbk
+t/data/AF165282.gb
+t/data/AHCYL1.kegg
+t/data/alnfile.fasta
+t/data/amino.fa
+t/data/AnnIX-v003.gbk
+t/data/ar.embl
+t/data/ATF14F8.gbk
+t/data/atp1.matrix
+t/data/ay007676.gb
+t/data/AY095303S1.gbk
+t/data/ay116458.gb
+t/data/ay149291.gb
+t/data/AY763288.gb
+t/data/BAB68554.gb
+t/data/barns-combined.nex
+t/data/baseml.pairwise
+t/data/baseml.usertree
+t/data/basic-bush.nex
+t/data/basic-ladder.nex
+t/data/BC000007.gbk
+t/data/BEL16-LTR_AG.embl
+t/data/biodbgff/test.gff
+t/data/biodbgff/test.gff3
+t/data/biofpc.cor
+t/data/biofpc.fpc
+t/data/biographics/feature_data.txt
+t/data/biographics/t1/version1.gif
+t/data/biographics/t1/version1.png
+t/data/biographics/t1/version2.gif
+t/data/biographics/t1/version2.png
+t/data/biographics/t1/version3.png
+t/data/biographics/t1/version4.png
+t/data/biographics/t1/version5.png
+t/data/biographics/t1/version6.png
+t/data/biographics/t1/version7.png
+t/data/biographics/t1/version8.png
+t/data/biographics/t2/version1.gif
+t/data/biographics/t2/version1.png
+t/data/biographics/t2/version10.png
+t/data/biographics/t2/version11.png
+t/data/biographics/t2/version12.png
+t/data/biographics/t2/version13.png
+t/data/biographics/t2/version14.png
+t/data/biographics/t2/version2.gif
+t/data/biographics/t2/version2.png
+t/data/biographics/t2/version3.png
+t/data/biographics/t2/version4.png
+t/data/biographics/t2/version5.png
+t/data/biographics/t2/version6.png
+t/data/biographics/t2/version7.png
+t/data/biographics/t2/version8.png
+t/data/biographics/t2/version9.png
+t/data/biographics/t3/version1.gif
+t/data/biographics/t3/version1.png
+t/data/biographics/t3/version2.gif
+t/data/biographics/t3/version2.png
+t/data/biographics/t3/version3.png
+t/data/biographics/t3/version4.png
+t/data/biographics/t3/version5.png
+t/data/biographics/t3/version6.png
+t/data/biographics/t3/version7.png
+t/data/Bird_Ovomucoids.nex
+t/data/BK000016-tpa.gbk
+t/data/bl2seq.blastn
+t/data/bl2seq.blastn.rev
+t/data/bl2seq.blastx.out
+t/data/bl2seq.bug940.out
+t/data/bl2seq.out
+t/data/bl2seq.tblastx.out
+t/data/blast.report
+t/data/blat.psLayout3
+t/data/BLOSUM50
+t/data/blosum62.bla
+t/data/BN000066-tpa.embl
+t/data/branchSite.mlc
+t/data/brassica_ATH.WUBLASTN
+t/data/bug2120.phd
+t/data/c200-vs-yeast.BLASTN
+t/data/c200-vs-yeast.BLASTN.m8
+t/data/calm.swiss
+t/data/catalase-webblast.BLASTP
+t/data/cds-266.fas
+t/data/chad100.scf
+t/data/char-interleave.nex
+t/data/char-matrix-spaces.nex
+t/data/codeml.mlc
+t/data/codeml315.mlc
+t/data/codeml_lysozyme/2NG.dN
+t/data/codeml_lysozyme/2NG.dS
+t/data/codeml_lysozyme/2NG.t
+t/data/codeml_lysozyme/4fold.nuc
+t/data/codeml_lysozyme/lnf
+t/data/codeml_lysozyme/lysozymeSmall.ctl
+t/data/codeml_lysozyme/lysozymeSmall.trees
+t/data/codeml_lysozyme/lysozymeSmall.txt
+t/data/codeml_lysozyme/mlc
+t/data/codeml_lysozyme/rst
+t/data/codeml_lysozyme/rst1
+t/data/codeml_lysozyme/rub
+t/data/codeml_nssites.mlc
+t/data/compLD_missingtest.prettybase
+t/data/compLD_test.prettybase
+t/data/component.ontology.test
+t/data/component.ontology.test2
+t/data/consed_project/edit_dir/test_project.contigs
+t/data/consed_project/edit_dir/test_project.fasta
+t/data/consed_project/edit_dir/test_project.fasta.log
+t/data/consed_project/edit_dir/test_project.fasta.screen
+t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1
+t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2
+t/data/consed_project/edit_dir/test_project.fasta.screen.contigs
+t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual
+t/data/consed_project/edit_dir/test_project.fasta.screen.log
+t/data/consed_project/edit_dir/test_project.fasta.screen.problems
+t/data/consed_project/edit_dir/test_project.fasta.screen.problems.qual
+t/data/consed_project/edit_dir/test_project.fasta.screen.qual
+t/data/consed_project/edit_dir/test_project.fasta.screen.singlets
+t/data/consed_project/edit_dir/test_project.fasta.screen.view
+t/data/consed_project/edit_dir/test_project.newtags
+t/data/consed_project/edit_dir/test_project.phrap.out
+t/data/consed_project/edit_dir/test_project.screen.out
+t/data/consed_project/edit_dir/test_project_to_alu.cross
+t/data/consed_project/edit_dir/test_projectNewChromats.fof
+t/data/consed_project/phd_dir/ML4922R.phd.1
+t/data/consed_project/phd_dir/ML4924F.phd.1
+t/data/consed_project/phd_dir/ML4924R.phd.1
+t/data/consed_project/phd_dir/ML4947F.phd.1
+t/data/crab.dat.cn
+t/data/crab.nj
+t/data/crab.njb
+t/data/crypto.sim4-0
+t/data/crypto.sim4-3
+t/data/crypto.sim4-4
+t/data/ctgdemo.fpc
+t/data/cys1_dicdi.water
+t/data/cysprot.fa
+t/data/cysprot.msf
+t/data/cysprot.needle
+t/data/cysprot.tblastn
+t/data/cysprot.water
+t/data/cysprot1.fa
+t/data/cysprot1.FASTA
+t/data/cysprot1a.fa
+t/data/cysprot1a.msf
+t/data/cysprot1b.fa
+t/data/cysprot1b.hmmsearch
+t/data/cysprot1b.msf
+t/data/cysprot1b.newick
+t/data/cysprot_vs_gadfly.FASTA
+t/data/D10483.gbk
+t/data/D12555.gbk
+t/data/dbfa/1.fa
+t/data/dbfa/2.fa
+t/data/dbfa/3.fa
+t/data/dbfa/4.fa
+t/data/dbfa/5.fa
+t/data/dbfa/6.fa
+t/data/dbfa/7.fa
+t/data/directives.gff3
+t/data/dmel_2Lchunk.gb
+t/data/dna1.fa
+t/data/dna2.fa
+t/data/dnaE-bsub-prot.fa
+t/data/dnaE-bsub.fa
+t/data/dnaEbsub_ecoli.wublastx
+t/data/dnaEbsub_ecoli.wutblastn
+t/data/dnaEbsub_ecoli.wutblastx
+t/data/DQ018368.gb
+t/data/dq519393.gb
+t/data/ECAPAH02.embl
+t/data/ecoli-trna-qrna.out
+t/data/ecoli_domains.rps.xml
+t/data/ecoli_domains.rpsblast
+t/data/ecolitst.bls
+t/data/ecolitst.fa
+t/data/ecolitst.noseqs.wublastp
+t/data/ecolitst.wublastp
+t/data/empty.bl2seq
+t/data/entrezgene.dat
+t/data/example.hap
+t/data/example.phase
+t/data/expected.blast.out
+t/data/factor7.embl
+t/data/footprinter.out
+t/data/frac_problems.blast
+t/data/geneid_1.0.out
+t/data/genemark.out
+t/data/genewise.out
+t/data/genewise_output.paracel_btk
+t/data/genomewise.out
+t/data/genomic-seq.epcr
+t/data/genomic-seq.fasta
+t/data/genomic-seq.genscan
+t/data/genomic-seq.mzef
+t/data/Genscan.FastA
+t/data/gf-s71.needle
+t/data/glimmer.out
+t/data/Glimmer2.out
+t/data/Glimmer3.detail
+t/data/Glimmer3.predict
+t/data/GlimmerHMM.out
+t/data/GlimmerM.out
+t/data/GO.defs.test
+t/data/GO.defs.test2
+t/data/hemoglobinA.meg
+t/data/hg16_chroms.gff
+t/data/hmmpfam.out
+t/data/hmmpfam_fake.out
+t/data/hmmsearch.out
+t/data/hs_est.est2genome
+t/data/hs_fugu.newick
+t/data/hs_owlmonkey.aln
+t/data/hs_owlmonkey.fas
+t/data/hs_owlmonkey.fasta
+t/data/hsinsulin.blastcl3.blastn
+t/data/HUMBETGLOA.fa
+t/data/HUMBETGLOA.FASTA
+t/data/HUMBETGLOA.gff
+t/data/HUMBETGLOA.grail
+t/data/HUMBETGLOA.grailexp
+t/data/HUMBETGLOA.mzef
+t/data/HUMBETGLOA.tblastx
+t/data/humor.maf
+t/data/humts1.pal
+t/data/hybrid1.gff3
+t/data/hybrid2.gff3
+t/data/insulin.water
+t/data/interpro_ebi.xml
+t/data/interpro_short.xml
+t/data/intrablock-comment.nex
+t/data/kinases.tsv
+t/data/kinases.xls
+t/data/Kingdoms_DNA.nex
+t/data/knownGene.gff3
+t/data/L77119.hmmer
+t/data/little.largemultifasta
+t/data/LittleChrY.dbsnp.xml
+t/data/LL-sample.seq
+t/data/LOAD_Ccd1.dnd
+t/data/long-names.nex
+t/data/lucy.info
+t/data/lucy.qual
+t/data/lucy.seq
+t/data/lucy.stderr
+t/data/lysozyme6.protml
+t/data/lysozyme6.simple.protml
+t/data/M0.mlc
+t/data/mapmaker.out
+t/data/mapmaker.txt
+t/data/mast.dat
+t/data/masta.dat
+t/data/Mcjanrna_rdbII.gbk
+t/data/megablast_output.paracel_btk
+t/data/meme.dat
+t/data/mini-AE001405.gb
+t/data/mini-align.aln
+t/data/mixedmast.dat
+t/data/MmCT
+t/data/mpath.ontology.test
+t/data/MSGEFTUA.gb
+t/data/multi_1.fa
+t/data/multi_2.fa
+t/data/multi_blast.bls
+t/data/multifa.seq
+t/data/multifa.seq.qual
+t/data/multiline-intrablock-comment.nex
+t/data/multiseq.bls
+t/data/mus.bls.xml
+t/data/mutations.dat
+t/data/mutations.old.dat
+t/data/mutations.old.xml
+t/data/mutations.xml
+t/data/myco_sites.gff
+t/data/NC_001284.gbk
+t/data/NC_006346.gb
+t/data/NC_006511-short.gbk
+t/data/nei_gojobori_test.aln
+t/data/neighbor.dist
+t/data/new_blastn.txt
+t/data/newformat.swiss
+t/data/NM_002253.tseq
+t/data/NM_002254.gb
+t/data/no-genes.genscan
+t/data/no_cds_example.gb
+t/data/no_FH.embl
+t/data/no_hsps.blastp
+t/data/noninterleaved.phy
+t/data/NT_021877.gbk
+t/data/O_sat.wgs
+t/data/omim_genemap_test
+t/data/omim_text_test
+t/data/P33897
+t/data/P35527.gb
+t/data/PAM250
+t/data/pep-266.aln
+t/data/pfam_tests.stk
+t/data/phi.out
+t/data/phipsi.out
+t/data/phredfile.phd
+t/data/phylipdist-36.out
+t/data/phylipdist.out
+t/data/pictogram.fa
+t/data/plague_yeast.bls.xml
+t/data/polymorphism.dat
+t/data/polymorphism.old.xml
+t/data/polymorphism.xml
+t/data/popgen_saureus.dat
+t/data/popgen_saureus.multidat
+t/data/popstats.prettybase
+t/data/pre_rel9.swiss
+t/data/Primate_mtDNA.nex
+t/data/primedseq.fa
+t/data/primer3_infile.txt
+t/data/primer3_outfile.txt
+t/data/primer3_output.txt
+t/data/prints.out
+t/data/promoterwise.out
+t/data/protpars.phy
+t/data/pseudowise.out
+t/data/psi_xml.dat
+t/data/psiblastreport.out
+t/data/puzzle.tre
+t/data/qrna-relloc.out
+t/data/qualfile.qual
+t/data/quoted-strings1.nex
+t/data/quoted-strings2.nex
+t/data/Rab1.chaos-xml
+t/data/radical-whitespace.nex
+t/data/radical-whitespace_02.nex
+t/data/readtest.abi
+t/data/readtest.ctf
+t/data/readtest.exp
+t/data/readtest.pln
+t/data/readtest.ztr
+t/data/rebase.itype2
+t/data/rebase.withrefm
+t/data/registry/bdb/seqdatabase.ini
+t/data/registry/flat/seqdatabase.ini
+t/data/rel9.swiss
+t/data/repeatmasker.fa.out
+t/data/revcomp_mrna.gb
+t/data/rfam_tests.stk
+t/data/roa1.dat
+t/data/roa1.genbank
+t/data/roa1.swiss
+t/data/roa1_v2.dat
+t/data/sbay_c545-yeast.BLASTZ.PSL
+t/data/seg.out
+t/data/seqdatabase.ini
+t/data/seqfeaturedb/test.gff3
+t/data/seqfile.pir
+t/data/seqs.fas
+t/data/sequencefamily.dat
+t/data/short.blx
+t/data/signalp.negative.out
+t/data/signalp.positive.out
+t/data/sim4.for.for
+t/data/sim4.for.rev
+t/data/sim4.rev
+t/data/singleNSsite.mlc
+t/data/so.obo
+t/data/sofa.ontology
+t/data/SPAN_Family4nl.nex
+t/data/SPAN_Family7n.nex
+t/data/SPAN_Family8a.nex
+t/data/sparsealn.needle
+t/data/spidey.noalignment
+t/data/spidey.test1
+t/data/sprintf.rnamotif
+t/data/ssp160.embl.1
+t/data/stress_test_medline.xml
+t/data/stress_test_pubmed.xml
+t/data/sv40_small.xml
+t/data/swiss.dat
+t/data/swisspfam.data
+t/data/SwissProt.dat
+t/data/T7.aln
+t/data/tab1part.mif
+t/data/tab2part.mif
+t/data/tab3part.mif
+t/data/taxdump/names.dmp
+t/data/taxdump/nodes.dmp
+t/data/tblastn.out
+t/data/test.ace
+t/data/test.embl
+t/data/test.embl2sq
+t/data/test.fasta
+t/data/test.game
+t/data/test.gcg
+t/data/test.gcgblast
+t/data/test.gcgfasta
+t/data/test.genbank
+t/data/test.genbank.noseq
+t/data/test.interpro
+t/data/test.lasergene
+t/data/test.mase
+t/data/test.meme
+t/data/test.meme2
+t/data/test.metafasta
+t/data/test.nh
+t/data/test.nhx
+t/data/test.pfam
+t/data/test.pir
+t/data/test.ptt
+t/data/test.raw
+t/data/test.swiss
+t/data/test.tab
+t/data/test.tigrxml
+t/data/test.txt
+t/data/test.waba
+t/data/test_badlf.gcg
+t/data/testaln.aln
+t/data/testaln.fasta
+t/data/testaln.mase
+t/data/testaln.metafasta
+t/data/testaln.msf
+t/data/testaln.nexus
+t/data/testaln.pfam
+t/data/testaln.phylip
+t/data/testaln.po
+t/data/testaln.prodom
+t/data/testaln.psi
+t/data/testaln.selex
+t/data/testaln.stockholm
+t/data/testaln2.fasta
+t/data/testdat.exonerate
+t/data/testdbaccnums.out
+t/data/testfile.erpin
+t/data/testfuzzy.genbank
+t/data/tmhmm.out
+t/data/transfac.dat
+t/data/tree_nonewline.nexus
+t/data/Treebase-chlamy-dna.nex
+t/data/tricky.wublast
+t/data/trna.strict.rnamotif
+t/data/U58726.gb
+t/data/U71225.gb
+t/data/U83300.bsml
+t/data/UnaSmithHIV-both.nex
+t/data/unigene.data
+t/data/urease.tre.nexus
+t/data/version2.scf
+t/data/version3.scf
+t/data/worm_fam_2785.cdna
+t/data/X98338_Adh-mRNA.gb
+t/data/yeast.tRNAscanSE
+t/data/yn00.mlc
+t/DB.t
+t/DBCUTG.t
+t/DBFasta.t
+t/DNAMutation.t
+t/Domcut.t
+t/ECnumber.t
+t/ELM.t
+t/embl.t
+t/EMBL_DB.t
+t/EMBOSS_Tools.t
+t/EncodedSeq.t
+t/entrezgene.t
+t/ePCR.t
+t/ESEfinder.t
+t/est2genome.t
+t/EUtilities.t
+t/Exception.t
+t/Exonerate.t
+t/exp.t
+t/fasta.t
+t/FeatureHolder.x
+t/FeatureIO.t
+t/flat.t
+t/FootPrinter.t
+t/game.t
+t/GbrowseGFF.t
+t/gcg.t
+t/GDB.t
+t/Gel.t
+t/genbank.t
+t/GeneCoordinateMapper.t
+t/Geneid.t
+t/Genewise.t
+t/Genomewise.t
+t/Genpred.t
+t/GFF.t
+t/GOR4.t
+t/GOterm.t
+t/GraphAdaptor.t
+t/GuessSeqFormat.t
+t/hmmer.t
+t/hmmer_pull.t
+t/HNN.t
+t/HtSNP.t
+t/Index.t
+t/InstanceSite.t
+t/interpro.t
+t/InterProParser.t
+t/IUPAC.t
+t/kegg.t
+t/largefasta.t
+t/LargeLocatableSeq.t
+t/largepseq.t
+t/lasergene.t
+t/lib/Error.pm
+t/lib/Test/Builder.pm
+t/lib/Test/Builder/Module.pm
+t/lib/Test/Builder/Tester.pm
+t/lib/Test/Builder/Tester/Color.pm
+t/lib/Test/More.pm
+t/lib/Test/Simple.pm
+t/lib/Test/Tutorial.pod
+t/LinkageMap.t
+t/LiveSeq.t
+t/LocatableSeq.t
+t/Location.t
+t/LocationFactory.t
+t/LocusLink.t
+t/lucy.t
+t/Map.t
+t/MapIO.t
+t/masta.t
+t/Matrix.t
+t/Measure.t
+t/MeSH.t
+t/metafasta.t
+t/MetaSeq.t
+t/MicrosatelliteMarker.t
+t/MiniMIMentry.t
+t/MitoProt.t
+t/Molphy.t
+t/MultiFile.t
+t/multiple_fasta.t
+t/Mutation.t
+t/Mutator.t
+t/NetPhos.t
+t/Node.t
+t/obo_parser.t
+t/OddCodes.t
+t/OMIMentry.t
+t/OMIMentryAllelicVariant.t
+t/OMIMparser.t
+t/Ontology.t
+t/OntologyEngine.t
+t/OntologyStore.t
+t/PAML.t
+t/Perl.t
+t/phd.t
+t/Phenotype.t
+t/PhylipDist.t
+t/PhysicalMap.t
+t/pICalculator.t
+t/Pictogram.t
+t/pir.t
+t/pln.t
+t/PopGen.t
+t/PopGenSims.t
+t/primaryqual.t
+t/PrimarySeq.t
+t/primedseq.t
+t/Primer.t
+t/primer3.t
+t/Promoterwise.t
+t/ProtDist.t
+t/protgraph.t
+t/ProtMatrix.t
+t/ProtPsm.t
+t/Pseudowise.t
+t/psm.t
+t/QRNA.t
+t/qual.t
+t/RandDistFunctions.t
+t/RandomTreeFactory.t
+t/Range.t
+t/RangeI.t
+t/raw.t
+t/RefSeq.t
+t/Registry.t
+t/Relationship.t
+t/RelationshipType.t
+t/RemoteBlast.t
+t/RepeatMasker.t
+t/RestrictionAnalysis.t
+t/RestrictionIO.t
+t/RNAChange.t
+t/rnamotif.t
+t/RootI.t
+t/RootIO.t
+t/RootStorable.t
+t/Scansite.t
+t/scf.t
+t/SearchDist.t
+t/SearchIO.t
+t/Seg.t
+t/Seq.t
+t/seq_quality.t
+t/SeqAnalysisParser.t
+t/SeqBuilder.t
+t/SeqDiff.t
+t/SeqFeatCollection.t
+t/SeqFeature.t
+t/seqfeaturePrimer.t
+t/SeqHound_DB.t
+t/SeqIO.t
+t/SeqPattern.t
+t/seqread_fail.t
+t/SeqStats.t
+t/SequenceFamily.t
+t/sequencetrace.t
+t/SeqUtils.t
+t/SeqVersion.t
+t/seqwithquality.t
+t/SeqWords.t
+t/Sigcleave.t
+t/Signalp.t
+t/Sim4.t
+t/SimilarityPair.t
+t/SimpleAlign.t
+t/simpleGOparser.t
+t/singlet.t
+t/sirna.t
+t/SiteMatrix.t
+t/SNP.t
+t/Sopma.t
+t/Species.t
+t/Spidey.t
+t/splicedseq.t
+t/StandAloneBlast.t
+t/StructIO.t
+t/Structure.t
+t/swiss.t
+t/Symbol.t
+t/tab.t
+t/table.t
+t/TagHaplotype.t
+t/Taxonomy.t
+t/TaxonTree.t
+t/Tempfile.t
+t/Term.t
+t/Test.pm
+t/testformats.pl
+t/tigrxml.t
+t/tinyseq.t
+t/Tmhmm.t
+t/Tools.t
+t/Tree.t
+t/TreeBuild.t
+t/TreeIO.t
+t/trim.t
+t/tRNAscanSE.t
+t/UCSCParsers.t
+t/Unflattener.t
+t/Unflattener2.t
+t/UniGene.t
+t/Variation_IO.t
+t/WABA.t
+t/XEMBL_DB.t
+t/ztr.t

Added: trunk/packages/bioperl/branches/upstream/current/META.yml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/META.yml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/META.yml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2718 @@
+---
+name: bioperl
+version: 1.005002102
+author:
+  - 'Bioperl Team <bioperl-l at bioperl.org>'
+abstract: Bioinformatics Toolkit
+license: artistic
+resources:
+  license: http://opensource.org/licenses/artistic-license.php
+requires:
+  DB_File: 0
+  IO::String: 0
+  perl: 5.6.1
+build_requires:
+  CPAN: 1.81
+  Module::Build: 0.2805
+  Test::Harness: 2.62
+  Test::More: 0
+optional_features:
+  Bio::Biblio::IO::medlinexml:
+    description: parsing xml
+    requires:
+      XML::Parser: 0
+  'Bio::DB::*,Bio::Tools::Run::RemoteBlast,Bio::Tools::Analysis::Protein*,Bio::Tools::Analysis::DNA*':
+    description: 'GenBank+GenPept sequence retrieval, remote http Blast jobs'
+    requires:
+      HTTP::Request::Common: 0
+  'Bio::DB::*,Bio::Tools::Run::RemoteBlast,Bio::Tools::WebBlat,Bio::WebAgent,Bio::Graphics::Glyph::image':
+    description: remote access
+    requires:
+      LWP::UserAgent: 0
+  'Bio::DB::Ace,Bio::DB::GFF::Adaptor::ace':
+    description: access of ACeDB database
+    requires:
+      Ace: 0
+  'Bio::DB::FileCache,Bio::SeqFeature::Collection,Bio::PopGen::HtSNP,Bio::PopGen::TagHaplotype,Bio::DB::GFF::Adaptor::berkeleydb':
+    description: storing sequence objects in local file cache
+    requires:
+      Storable: 0
+  Bio::DB::GDB:
+    description: screen scraping www.gdb.org
+    requires:
+      HTML::Parser: 3
+  'Bio::DB::XEMBLService,Bio::DB::Biblio::soap':
+    description: XEMBL Services and Bibliographic queries
+    requires:
+      SOAP::Lite: 0
+  Bio::FeatureIO::interpro:
+    description: parsing interpro features
+    requires:
+      XML::DOM::XPath: 0.13
+  'Bio::Graph::ProteinGraph,Bio::Tools::Primer3':
+    description: cloning objects
+    requires:
+      Clone: 0
+  'Bio::Graph::SimpleGraph,Bio::Graph::SimpleGraph::Traversal,Bio::Graph::ProteinGraph':
+    description: creating objects
+    requires:
+      Class::AutoClass: 1
+  Bio::Graphics:
+    description: test scripts
+    requires:
+      Text::Shellwords: 0
+  Bio::Graphics::Glyph*:
+    description: rendering Sequences and Features
+    requires:
+      GD: 1.3
+  Bio::Graphics::Panel:
+    description: creating SVG images
+    requires:
+      GD::SVG: 0
+  Bio::Graphics::Pictogram:
+    description: SVG output
+    requires:
+      SVG: 2.26
+  Bio::Ontology::SimpleGOEngine::GraphAdaptor:
+    description: ontology engine implementation for the GO parser
+    requires:
+      Graph: 0
+  Bio::SearchIO::blastxml:
+    description: remote analysis POST submissions
+    requires:
+      HTML::Entities: 0
+  'Bio::SearchIO::blastxml,Bio::SeqIO::tigrxml,Bio::SeqIO::bsml_sax':
+    description: parsing xml
+    requires:
+      XML::SAX: 0.15
+  'Bio::SeqIO::agave,Bio::SeqIO::game::gameWriter,Bio::SeqIO::chadoxml,Bio::SeqIO::tinyseq,Bio::Variation::IO::xml,Bio::SearchIO::Writer::BSMLResultWriter':
+    description: parsing and writing xml
+    requires:
+      XML::Writer: 0.4
+  Bio::SeqIO::chaosxml:
+    description: writing choas xml files
+    requires:
+      Data::Stag::XMLWriter: 0
+  Bio::SeqIO::entrezgene:
+    description: parsing entrezgene
+    requires:
+      Bio::ASN1::EntrezGene: 0
+  Bio::SeqIO::excel:
+    description: parsing Excel files
+    requires:
+      Spreadsheet::ParseExcel: 0
+  Bio::SeqIO::strider:
+    description: strider functionality
+    requires:
+      Convert::Binary::C: 0
+  Bio::SeqIO::tigrxml:
+    description: writing xml
+    requires:
+      XML::SAX::Writer: 0
+  'Bio::SeqIO::tinyseq,Bio::SeqIO::game::gameSubs,Bio::OntologyIO::InterProParser,Bio::ClusterIO::dbsnp':
+    description: parsing xml
+    requires:
+      XML::Parser::PerlSAX: 0
+  'Bio::Tools::WebBlat,Bio::FeatureIO::gff,Bio::FeatureIO::interpro,Bio::DB::Biblio::eutils,Bio::DB::EUtilities::Cookie,Bio::DB::Query::GenBank,Bio::DB::NCBIHelper,Bio::SeqFeature::Annotated':
+    description: dealing with web resources
+    requires:
+      URI::Escape: 0
+  Bio::Tree::Compatible:
+    description: proper operation
+    requires:
+      Set::Scalar: 0
+  Bio::Tree::Draw::Cladogram:
+    description: EPS output
+    requires:
+      PostScript::TextBlock: 0
+  Bio::TreeIO::svggraph:
+    description: creating SVG images
+    requires:
+      SVG::Graph: 0.01
+  'Bio::Variation::IO::xml,Bio::DB::Taxonomy::entrez,Bio::DB::Biblio::eutils,Bio::Graph::IO::psi_xml':
+    description: parsing xml
+    requires:
+      XML::Twig: 0
+dynamic_config: 1
+provides:
+  Bio::Align::AlignI:
+    file: Bio/Align/AlignI.pm
+    version: 1.005002102
+  Bio::Align::DNAStatistics:
+    file: Bio/Align/DNAStatistics.pm
+    version: 1.005002102
+  Bio::Align::PairwiseStatistics:
+    file: Bio/Align/PairwiseStatistics.pm
+    version: 1.005002102
+  Bio::Align::ProteinStatistics:
+    file: Bio/Align/ProteinStatistics.pm
+    version: 1.005002102
+  Bio::Align::StatisticsI:
+    file: Bio/Align/StatisticsI.pm
+    version: 1.005002102
+  Bio::Align::Utilities:
+    file: Bio/Align/Utilities.pm
+    version: 1.005002102
+  Bio::AlignIO:
+    file: Bio/AlignIO.pm
+    version: 1.005002102
+  Bio::AlignIO::bl2seq:
+    file: Bio/AlignIO/bl2seq.pm
+    version: 1.005002102
+  Bio::AlignIO::clustalw:
+    file: Bio/AlignIO/clustalw.pm
+    version: 1.005002102
+  Bio::AlignIO::emboss:
+    file: Bio/AlignIO/emboss.pm
+    version: 1.005002102
+  Bio::AlignIO::fasta:
+    file: Bio/AlignIO/fasta.pm
+    version: 1.005002102
+  Bio::AlignIO::largemultifasta:
+    file: Bio/AlignIO/largemultifasta.pm
+    version: 1.005002102
+  Bio::AlignIO::maf:
+    file: Bio/AlignIO/maf.pm
+    version: 1.005002102
+  Bio::AlignIO::mase:
+    file: Bio/AlignIO/mase.pm
+    version: 1.005002102
+  Bio::AlignIO::mega:
+    file: Bio/AlignIO/mega.pm
+    version: 1.005002102
+  Bio::AlignIO::meme:
+    file: Bio/AlignIO/meme.pm
+    version: 1.005002102
+  Bio::AlignIO::metafasta:
+    file: Bio/AlignIO/metafasta.pm
+    version: 1.005002102
+  Bio::AlignIO::msf:
+    file: Bio/AlignIO/msf.pm
+    version: 1.005002102
+  Bio::AlignIO::nexus:
+    file: Bio/AlignIO/nexus.pm
+    version: 1.005002102
+  Bio::AlignIO::pfam:
+    file: Bio/AlignIO/pfam.pm
+    version: 1.005002102
+  Bio::AlignIO::phylip:
+    file: Bio/AlignIO/phylip.pm
+    version: 1.005002102
+  Bio::AlignIO::po:
+    file: Bio/AlignIO/po.pm
+    version: 1.005002102
+  Bio::AlignIO::prodom:
+    file: Bio/AlignIO/prodom.pm
+    version: 1.005002102
+  Bio::AlignIO::psi:
+    file: Bio/AlignIO/psi.pm
+    version: 1.005002102
+  Bio::AlignIO::selex:
+    file: Bio/AlignIO/selex.pm
+    version: 1.005002102
+  Bio::AlignIO::stockholm:
+    file: Bio/AlignIO/stockholm.pm
+    version: 1.005002102
+  Bio::AnalysisI:
+    file: Bio/AnalysisI.pm
+    version: 1.005002102
+  Bio::AnalysisI::JobI:
+    file: Bio/AnalysisI.pm
+    version: 1.005002102
+  Bio::AnalysisParserI:
+    file: Bio/AnalysisParserI.pm
+    version: 1.005002102
+  Bio::AnalysisResultI:
+    file: Bio/AnalysisResultI.pm
+    version: 1.005002102
+  Bio::AnnotatableI:
+    file: Bio/AnnotatableI.pm
+    version: 1.005002102
+  Bio::Annotation::AnnotationFactory:
+    file: Bio/Annotation/AnnotationFactory.pm
+    version: 1.005002102
+  Bio::Annotation::Collection:
+    file: Bio/Annotation/Collection.pm
+    version: 1.005002102
+  Bio::Annotation::Comment:
+    file: Bio/Annotation/Comment.pm
+    version: 1.005002102
+  Bio::Annotation::DBLink:
+    file: Bio/Annotation/DBLink.pm
+    version: 1.005002102
+  Bio::Annotation::OntologyTerm:
+    file: Bio/Annotation/OntologyTerm.pm
+    version: 1.005002102
+  Bio::Annotation::Reference:
+    file: Bio/Annotation/Reference.pm
+    version: 1.005002102
+  Bio::Annotation::SimpleValue:
+    file: Bio/Annotation/SimpleValue.pm
+    version: 1.005002102
+  Bio::Annotation::StructuredValue:
+    file: Bio/Annotation/StructuredValue.pm
+    version: 1.005002102
+  Bio::Annotation::Target:
+    file: Bio/Annotation/Target.pm
+    version: 1.005002102
+  Bio::Annotation::TypeManager:
+    file: Bio/Annotation/TypeManager.pm
+    version: 1.005002102
+  Bio::AnnotationCollectionI:
+    file: Bio/AnnotationCollectionI.pm
+    version: 1.005002102
+  Bio::AnnotationI:
+    file: Bio/AnnotationI.pm
+    version: 1.005002102
+  Bio::Assembly::Contig:
+    file: Bio/Assembly/Contig.pm
+    version: 1.005002102
+  Bio::Assembly::ContigAnalysis:
+    file: Bio/Assembly/ContigAnalysis.pm
+    version: 1.005002102
+  Bio::Assembly::IO:
+    file: Bio/Assembly/IO.pm
+    version: 1.005002102
+  Bio::Assembly::IO::ace:
+    file: Bio/Assembly/IO/ace.pm
+    version: 1.005002102
+  Bio::Assembly::IO::phrap:
+    file: Bio/Assembly/IO/phrap.pm
+    version: 1.005002102
+  Bio::Assembly::Scaffold:
+    file: Bio/Assembly/Scaffold.pm
+    version: 1.005002102
+  Bio::Assembly::ScaffoldI:
+    file: Bio/Assembly/ScaffoldI.pm
+    version: 1.005002102
+  Bio::Assembly::Singlet:
+    file: Bio/Assembly/Singlet.pm
+    version: 1.005002102
+  Bio::Biblio:
+    file: Bio/Biblio.pm
+    version: 1.005002102
+  Bio::Biblio::Article:
+    file: Bio/Biblio/Article.pm
+    version: 1.005002102
+  Bio::Biblio::BiblioBase:
+    file: Bio/Biblio/BiblioBase.pm
+    version: 1.005002102
+  Bio::Biblio::Book:
+    file: Bio/Biblio/Book.pm
+    version: 1.005002102
+  Bio::Biblio::BookArticle:
+    file: Bio/Biblio/BookArticle.pm
+    version: 1.005002102
+  Bio::Biblio::IO:
+    file: Bio/Biblio/IO.pm
+    version: 1.005002102
+  Bio::Biblio::IO::medline2ref:
+    file: Bio/Biblio/IO/medline2ref.pm
+    version: 1.005002102
+  Bio::Biblio::IO::medlinexml:
+    file: Bio/Biblio/IO/medlinexml.pm
+    version: 1.005002102
+  Bio::Biblio::IO::pubmed2ref:
+    file: Bio/Biblio/IO/pubmed2ref.pm
+    version: 1.005002102
+  Bio::Biblio::IO::pubmedxml:
+    file: Bio/Biblio/IO/pubmedxml.pm
+    version: 1.005002102
+  Bio::Biblio::Journal:
+    file: Bio/Biblio/Journal.pm
+    version: 1.005002102
+  Bio::Biblio::JournalArticle:
+    file: Bio/Biblio/JournalArticle.pm
+    version: 1.005002102
+  Bio::Biblio::MedlineArticle:
+    file: Bio/Biblio/MedlineArticle.pm
+    version: 1.005002102
+  Bio::Biblio::MedlineBook:
+    file: Bio/Biblio/MedlineBook.pm
+    version: 1.005002102
+  Bio::Biblio::MedlineBookArticle:
+    file: Bio/Biblio/MedlineBookArticle.pm
+    version: 1.005002102
+  Bio::Biblio::MedlineJournal:
+    file: Bio/Biblio/MedlineJournal.pm
+    version: 1.005002102
+  Bio::Biblio::MedlineJournalArticle:
+    file: Bio/Biblio/MedlineJournalArticle.pm
+    version: 1.005002102
+  Bio::Biblio::Organisation:
+    file: Bio/Biblio/Organisation.pm
+    version: 1.005002102
+  Bio::Biblio::Patent:
+    file: Bio/Biblio/Patent.pm
+    version: 1.005002102
+  Bio::Biblio::Person:
+    file: Bio/Biblio/Person.pm
+    version: 1.005002102
+  Bio::Biblio::Proceeding:
+    file: Bio/Biblio/Proceeding.pm
+    version: 1.005002102
+  Bio::Biblio::Provider:
+    file: Bio/Biblio/Provider.pm
+    version: 1.005002102
+  Bio::Biblio::PubmedArticle:
+    file: Bio/Biblio/PubmedArticle.pm
+    version: 1.005002102
+  Bio::Biblio::PubmedBookArticle:
+    file: Bio/Biblio/PubmedBookArticle.pm
+    version: 1.005002102
+  Bio::Biblio::PubmedJournalArticle:
+    file: Bio/Biblio/PubmedJournalArticle.pm
+    version: 1.005002102
+  Bio::Biblio::Ref:
+    file: Bio/Biblio/Ref.pm
+    version: 1.005002102
+  Bio::Biblio::Service:
+    file: Bio/Biblio/Service.pm
+    version: 1.005002102
+  Bio::Biblio::TechReport:
+    file: Bio/Biblio/TechReport.pm
+    version: 1.005002102
+  Bio::Biblio::Thesis:
+    file: Bio/Biblio/Thesis.pm
+    version: 1.005002102
+  Bio::Biblio::WebResource:
+    file: Bio/Biblio/WebResource.pm
+    version: 1.005002102
+  Bio::Cluster::ClusterFactory:
+    file: Bio/Cluster/ClusterFactory.pm
+    version: 1.005002102
+  Bio::Cluster::FamilyI:
+    file: Bio/Cluster/FamilyI.pm
+    version: 1.005002102
+  Bio::Cluster::SequenceFamily:
+    file: Bio/Cluster/SequenceFamily.pm
+    version: 1.005002102
+  Bio::Cluster::UniGene:
+    file: Bio/Cluster/UniGene.pm
+    version: 1.005002102
+  Bio::Cluster::UniGeneI:
+    file: Bio/Cluster/UniGeneI.pm
+    version: 1.005002102
+  Bio::ClusterI:
+    file: Bio/ClusterI.pm
+    version: 1.005002102
+  Bio::ClusterIO:
+    file: Bio/ClusterIO.pm
+    version: 1.005002102
+  Bio::ClusterIO::dbsnp:
+    file: Bio/ClusterIO/dbsnp.pm
+    version: 1.005002102
+  Bio::ClusterIO::unigene:
+    file: Bio/ClusterIO/unigene.pm
+    version: 1.005002102
+  Bio::CodonUsage::IO:
+    file: Bio/CodonUsage/IO.pm
+    version: 1.005002102
+  Bio::CodonUsage::Table:
+    file: Bio/CodonUsage/Table.pm
+    version: 1.005002102
+  Bio::Coordinate::Chain:
+    file: Bio/Coordinate/Chain.pm
+    version: 1.005002102
+  Bio::Coordinate::Collection:
+    file: Bio/Coordinate/Collection.pm
+    version: 1.005002102
+  Bio::Coordinate::ExtrapolatingPair:
+    file: Bio/Coordinate/ExtrapolatingPair.pm
+    version: 1.005002102
+  Bio::Coordinate::GeneMapper:
+    file: Bio/Coordinate/GeneMapper.pm
+    version: 1.005002102
+  Bio::Coordinate::Graph:
+    file: Bio/Coordinate/Graph.pm
+    version: 1.005002102
+  Bio::Coordinate::MapperI:
+    file: Bio/Coordinate/MapperI.pm
+    version: 1.005002102
+  Bio::Coordinate::Pair:
+    file: Bio/Coordinate/Pair.pm
+    version: 1.005002102
+  Bio::Coordinate::Result:
+    file: Bio/Coordinate/Result.pm
+    version: 1.005002102
+  Bio::Coordinate::Result::Gap:
+    file: Bio/Coordinate/Result/Gap.pm
+    version: 1.005002102
+  Bio::Coordinate::Result::Match:
+    file: Bio/Coordinate/Result/Match.pm
+    version: 1.005002102
+  Bio::Coordinate::ResultI:
+    file: Bio/Coordinate/ResultI.pm
+    version: 1.005002102
+  Bio::Coordinate::Utils:
+    file: Bio/Coordinate/Utils.pm
+    version: 1.005002102
+  Bio::DB::Ace:
+    file: Bio/DB/Ace.pm
+    version: 1.005002102
+  Bio::DB::Biblio::biofetch:
+    file: Bio/DB/Biblio/biofetch.pm
+    version: 1.005002102
+  Bio::DB::Biblio::eutils:
+    file: Bio/DB/Biblio/eutils.pm
+    version: 1.005002102
+  Bio::DB::Biblio::pdf:
+    file: Bio/DB/Biblio/pdf.pm
+    version: 1.005002102
+  Bio::DB::Biblio::soap:
+    file: Bio/DB/Biblio/soap.pm
+    version: 1.005002102
+  Bio::DB::BiblioI:
+    file: Bio/DB/BiblioI.pm
+    version: 1.005002102
+  Bio::DB::BioFetch:
+    file: Bio/DB/BioFetch.pm
+    version: 1.005002102
+  Bio::DB::CUTG:
+    file: Bio/DB/CUTG.pm
+    version: 1.005002102
+  Bio::DB::DBFetch:
+    file: Bio/DB/DBFetch.pm
+    version: 1.005002102
+  Bio::DB::EMBL:
+    file: Bio/DB/EMBL.pm
+    version: 1.005002102
+  Bio::DB::EUtilities:
+    file: Bio/DB/EUtilities.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::Cookie:
+    file: Bio/DB/EUtilities/Cookie.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::ElinkData:
+    file: Bio/DB/EUtilities/ElinkData.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::efetch:
+    file: Bio/DB/EUtilities/efetch.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::egquery:
+    file: Bio/DB/EUtilities/egquery.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::einfo:
+    file: Bio/DB/EUtilities/einfo.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::elink:
+    file: Bio/DB/EUtilities/elink.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::epost:
+    file: Bio/DB/EUtilities/epost.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::esearch:
+    file: Bio/DB/EUtilities/esearch.pm
+    version: 1.005002102
+  Bio::DB::EUtilities::esummary:
+    file: Bio/DB/EUtilities/esummary.pm
+    version: 1.005002102
+  Bio::DB::EntrezGene:
+    file: Bio/DB/EntrezGene.pm
+    version: 1.005002102
+  Bio::DB::Expression:
+    file: Bio/DB/Expression.pm
+    version: 1.005002102
+  Bio::DB::Expression::geo:
+    file: Bio/DB/Expression/geo.pm
+    version: 1.005002102
+  Bio::DB::Failover:
+    file: Bio/DB/Failover.pm
+    version: 1.005002102
+  Bio::DB::Fasta:
+    file: Bio/DB/Fasta.pm
+    version: 1.005002102
+  Bio::DB::Fasta::Stream:
+    file: Bio/DB/Fasta.pm
+    version: 1.005002102
+  Bio::DB::FileCache:
+    file: Bio/DB/FileCache.pm
+    version: 1.005002102
+  Bio::DB::Flat:
+    file: Bio/DB/Flat.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB:
+    file: Bio/DB/Flat/BDB.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB::embl:
+    file: Bio/DB/Flat/BDB/embl.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB::fasta:
+    file: Bio/DB/Flat/BDB/fasta.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB::genbank:
+    file: Bio/DB/Flat/BDB/genbank.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB::swiss:
+    file: Bio/DB/Flat/BDB/swiss.pm
+    version: 1.005002102
+  Bio::DB::Flat::BDB::swissprot:
+    file: Bio/DB/Flat/BDB/swissprot.pm
+    version: 1.005002102
+  Bio::DB::Flat::BinarySearch:
+    file: Bio/DB/Flat/BinarySearch.pm
+    version: 1.005002102
+  Bio::DB::GDB:
+    file: Bio/DB/GDB.pm
+    version: 1.005002102
+  Bio::DB::GFF:
+    file: Bio/DB/GFF.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::ace:
+    file: Bio/DB/GFF/Adaptor/ace.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::berkeleydb:
+    file: Bio/DB/GFF/Adaptor/berkeleydb.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::berkeleydb::iterator:
+    file: Bio/DB/GFF/Adaptor/berkeleydb/iterator.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::biofetch:
+    file: Bio/DB/GFF/Adaptor/biofetch.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::biofetch_oracle:
+    file: Bio/DB/GFF/Adaptor/biofetch_oracle.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi:
+    file: Bio/DB/GFF/Adaptor/dbi.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::caching_handle:
+    file: Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::faux_dbh:
+    file: Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::iterator:
+    file: Bio/DB/GFF/Adaptor/dbi/iterator.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::mysql:
+    file: Bio/DB/GFF/Adaptor/dbi/mysql.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::mysqlace:
+    file: Bio/DB/GFF/Adaptor/dbi/mysqlace.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::mysqlcmap:
+    file: Bio/DB/GFF/Adaptor/dbi/mysqlcmap.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::mysqlopt:
+    file: Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::oracle:
+    file: Bio/DB/GFF/Adaptor/dbi/oracle.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::oracleace:
+    file: Bio/DB/GFF/Adaptor/dbi/oracleace.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::pg:
+    file: Bio/DB/GFF/Adaptor/dbi/pg.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::dbi::pg_fts:
+    file: Bio/DB/GFF/Adaptor/dbi/pg_fts.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::memory:
+    file: Bio/DB/GFF/Adaptor/memory.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::memory::feature_serializer:
+    file: Bio/DB/GFF/Adaptor/memory/feature_serializer.pm
+    version: 1.005002102
+  Bio::DB::GFF::Adaptor::memory::iterator:
+    file: Bio/DB/GFF/Adaptor/memory/iterator.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator:
+    file: Bio/DB/GFF/Aggregator.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::alignment:
+    file: Bio/DB/GFF/Aggregator/alignment.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::clone:
+    file: Bio/DB/GFF/Aggregator/clone.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::coding:
+    file: Bio/DB/GFF/Aggregator/coding.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::match:
+    file: Bio/DB/GFF/Aggregator/match.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::none:
+    file: Bio/DB/GFF/Aggregator/none.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::processed_transcript:
+    file: Bio/DB/GFF/Aggregator/processed_transcript.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::so_transcript:
+    file: Bio/DB/GFF/Aggregator/so_transcript.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::transcript:
+    file: Bio/DB/GFF/Aggregator/transcript.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_acembly:
+    file: Bio/DB/GFF/Aggregator/ucsc_acembly.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_ensgene:
+    file: Bio/DB/GFF/Aggregator/ucsc_ensgene.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_genscan:
+    file: Bio/DB/GFF/Aggregator/ucsc_genscan.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_refgene:
+    file: Bio/DB/GFF/Aggregator/ucsc_refgene.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_sanger22:
+    file: Bio/DB/GFF/Aggregator/ucsc_sanger22.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_sanger22pseudo:
+    file: Bio/DB/GFF/Aggregator/ucsc_sanger22pseudo.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_softberry:
+    file: Bio/DB/GFF/Aggregator/ucsc_softberry.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_twinscan:
+    file: Bio/DB/GFF/Aggregator/ucsc_twinscan.pm
+    version: 1.005002102
+  Bio::DB::GFF::Aggregator::ucsc_unigene:
+    file: Bio/DB/GFF/Aggregator/ucsc_unigene.pm
+    version: 1.005002102
+  Bio::DB::GFF::Featname:
+    file: Bio/DB/GFF/Featname.pm
+    version: 1.005002102
+  Bio::DB::GFF::Feature:
+    file: Bio/DB/GFF/Feature.pm
+    version: 1.005002102
+  Bio::DB::GFF::Homol:
+    file: Bio/DB/GFF/Homol.pm
+    version: 1.005002102
+  Bio::DB::GFF::ID_Iterator:
+    file: Bio/DB/GFF.pm
+    version: 1.005002102
+  Bio::DB::GFF::RelSegment:
+    file: Bio/DB/GFF/RelSegment.pm
+    version: 1.005002102
+  Bio::DB::GFF::Segment:
+    file: Bio/DB/GFF/Segment.pm
+    version: 1.005002102
+  Bio::DB::GFF::Typename:
+    file: Bio/DB/GFF/Typename.pm
+    version: 1.005002102
+  Bio::DB::GFF::Util::Binning:
+    file: Bio/DB/GFF/Util/Binning.pm
+    version: 1.005002102
+  Bio::DB::GFF::Util::Rearrange:
+    file: Bio/DB/GFF/Util/Rearrange.pm
+    version: 1.005002102
+  Bio::DB::GenBank:
+    file: Bio/DB/GenBank.pm
+    version: 1.005002102
+  Bio::DB::GenPept:
+    file: Bio/DB/GenPept.pm
+    version: 1.005002102
+  Bio::DB::GenericWebDBI:
+    file: Bio/DB/GenericWebDBI.pm
+    version: 1.005002102
+  Bio::DB::InMemoryCache:
+    file: Bio/DB/InMemoryCache.pm
+    version: 1.005002102
+  Bio::DB::LocationI:
+    file: Bio/DB/LocationI.pm
+    version: 1.005002102
+  Bio::DB::MeSH:
+    file: Bio/DB/MeSH.pm
+    version: 1.005002102
+  Bio::DB::NCBIHelper:
+    file: Bio/DB/NCBIHelper.pm
+    version: 1.005002102
+  Bio::DB::Query::GenBank:
+    file: Bio/DB/Query/GenBank.pm
+    version: 1.005002102
+  Bio::DB::Query::WebQuery:
+    file: Bio/DB/Query/WebQuery.pm
+    version: 1.005002102
+  Bio::DB::QueryI:
+    file: Bio/DB/QueryI.pm
+    version: 1.005002102
+  Bio::DB::RandomAccessI:
+    file: Bio/DB/RandomAccessI.pm
+    version: 1.005002102
+  Bio::DB::RefSeq:
+    file: Bio/DB/RefSeq.pm
+    version: 1.005002102
+  Bio::DB::ReferenceI:
+    file: Bio/DB/ReferenceI.pm
+    version: 1.005002102
+  Bio::DB::Registry:
+    file: Bio/DB/Registry.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature:
+    file: Bio/DB/SeqFeature.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::NormalizedFeature:
+    file: Bio/DB/SeqFeature/NormalizedFeature.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::NormalizedFeatureI:
+    file: Bio/DB/SeqFeature/NormalizedFeatureI.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::NormalizedTableFeatureI:
+    file: Bio/DB/SeqFeature/NormalizedTableFeatureI.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Segment:
+    file: Bio/DB/SeqFeature/Segment.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store:
+    file: Bio/DB/SeqFeature/Store.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::DBI::Iterator:
+    file: Bio/DB/SeqFeature/Store/DBI/Iterator.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::DBI::mysql:
+    file: Bio/DB/SeqFeature/Store/DBI/mysql.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::GFF3Loader:
+    file: Bio/DB/SeqFeature/Store/GFF3Loader.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::bdb:
+    file: Bio/DB/SeqFeature/Store/bdb.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::berkeleydb:
+    file: Bio/DB/SeqFeature/Store/berkeleydb.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::berkeleydb::Iterator:
+    file: Bio/DB/SeqFeature/Store/berkeleydb.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::memory:
+    file: Bio/DB/SeqFeature/Store/memory.pm
+    version: 1.005002102
+  Bio::DB::SeqFeature::Store::memory::Iterator:
+    file: Bio/DB/SeqFeature/Store/memory.pm
+    version: 1.005002102
+  Bio::DB::SeqHound:
+    file: Bio/DB/SeqHound.pm
+    version: 1.005002102
+  Bio::DB::SeqI:
+    file: Bio/DB/SeqI.pm
+    version: 1.005002102
+  Bio::DB::SeqVersion:
+    file: Bio/DB/SeqVersion.pm
+    version: 1.005002102
+  Bio::DB::SeqVersion::gi:
+    file: Bio/DB/SeqVersion/gi.pm
+    version: 1.005002102
+  Bio::DB::SwissProt:
+    file: Bio/DB/SwissProt.pm
+    version: 1.005002102
+  Bio::DB::Taxonomy:
+    file: Bio/DB/Taxonomy.pm
+    version: 1.005002102
+  Bio::DB::Taxonomy::entrez:
+    file: Bio/DB/Taxonomy/entrez.pm
+    version: 1.005002102
+  Bio::DB::Taxonomy::flatfile:
+    file: Bio/DB/Taxonomy/flatfile.pm
+    version: 1.005002102
+  Bio::DB::Taxonomy::list:
+    file: Bio/DB/Taxonomy/list.pm
+    version: 1.005002102
+  Bio::DB::Universal:
+    file: Bio/DB/Universal.pm
+    version: 1.005002102
+  Bio::DB::UpdateableSeqI:
+    file: Bio/DB/UpdateableSeqI.pm
+    version: 1.005002102
+  Bio::DB::WebDBSeqI:
+    file: Bio/DB/WebDBSeqI.pm
+    version: 1.005002102
+  Bio::DB::XEMBL:
+    file: Bio/DB/XEMBL.pm
+    version: 1.005002102
+  Bio::DB::XEMBLService:
+    file: Bio/DB/XEMBLService.pm
+    version: 1.005002102
+  Bio::DBLinkContainerI:
+    file: Bio/DBLinkContainerI.pm
+    version: 1.005002102
+  Bio::Das::FeatureTypeI:
+    file: Bio/Das/FeatureTypeI.pm
+    version: 1.005002102
+  Bio::Das::SegmentI:
+    file: Bio/Das/SegmentI.pm
+    version: 1.005002102
+  Bio::DasI:
+    file: Bio/DasI.pm
+    version: 1.005002102
+  Bio::DescribableI:
+    file: Bio/DescribableI.pm
+    version: 1.005002102
+  Bio::Event::EventGeneratorI:
+    file: Bio/Event/EventGeneratorI.pm
+    version: 1.005002102
+  Bio::Event::EventHandlerI:
+    file: Bio/Event/EventHandlerI.pm
+    version: 1.005002102
+  Bio::Expression::Contact:
+    file: Bio/Expression/Contact.pm
+    version: 1.005002102
+  Bio::Expression::DataSet:
+    file: Bio/Expression/DataSet.pm
+    version: 1.005002102
+  Bio::Expression::FeatureGroup:
+    file: Bio/Expression/FeatureGroup.pm
+    version: 1.005002102
+  Bio::Expression::FeatureGroup::FeatureGroupMas50:
+    file: Bio/Expression/FeatureGroup/FeatureGroupMas50.pm
+    version: 1.005002102
+  Bio::Expression::FeatureI:
+    file: Bio/Expression/FeatureI.pm
+    version: 1.005002102
+  Bio::Expression::FeatureSet::FeatureSetMas50:
+    file: Bio/Expression/FeatureSet/FeatureSetMas50.pm
+    version: 1.005002102
+  Bio::Expression::Platform:
+    file: Bio/Expression/Platform.pm
+    version: 1.005002102
+  Bio::Expression::ProbeI:
+    file: Bio/Expression/ProbeI.pm
+    version: 1.005002102
+  Bio::Expression::Sample:
+    file: Bio/Expression/Sample.pm
+    version: 1.005002102
+  Bio::Factory::AnalysisI:
+    file: Bio/Factory/AnalysisI.pm
+    version: 1.005002102
+  Bio::Factory::ApplicationFactoryI:
+    file: Bio/Factory/ApplicationFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::DriverFactory:
+    file: Bio/Factory/DriverFactory.pm
+    version: 1.005002102
+  Bio::Factory::FTLocationFactory:
+    file: Bio/Factory/FTLocationFactory.pm
+    version: 1.005002102
+  Bio::Factory::HitFactoryI:
+    file: Bio/Factory/HitFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::LocationFactoryI:
+    file: Bio/Factory/LocationFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::MapFactoryI:
+    file: Bio/Factory/MapFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::ObjectBuilderI:
+    file: Bio/Factory/ObjectBuilderI.pm
+    version: 1.005002102
+  Bio::Factory::ObjectFactory:
+    file: Bio/Factory/ObjectFactory.pm
+    version: 1.005002102
+  Bio::Factory::ObjectFactoryI:
+    file: Bio/Factory/ObjectFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::ResultFactoryI:
+    file: Bio/Factory/ResultFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::SeqAnalysisParserFactory:
+    file: Bio/Factory/SeqAnalysisParserFactory.pm
+    version: 1.005002102
+  Bio::Factory::SeqAnalysisParserFactoryI:
+    file: Bio/Factory/SeqAnalysisParserFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::SequenceFactoryI:
+    file: Bio/Factory/SequenceFactoryI.pm
+    version: 1.005002102
+  Bio::Factory::SequenceProcessorI:
+    file: Bio/Factory/SequenceProcessorI.pm
+    version: 1.005002102
+  Bio::Factory::SequenceStreamI:
+    file: Bio/Factory/SequenceStreamI.pm
+    version: 1.005002102
+  Bio::Factory::TreeFactoryI:
+    file: Bio/Factory/TreeFactoryI.pm
+    version: 1.005002102
+  Bio::FeatureHolderI:
+    file: Bio/FeatureHolderI.pm
+    version: 1.005002102
+  Bio::FeatureIO:
+    file: Bio/FeatureIO.pm
+    version: 1.005002102
+  Bio::FeatureIO::bed:
+    file: Bio/FeatureIO/bed.pm
+    version: 1.005002102
+  Bio::FeatureIO::gff:
+    file: Bio/FeatureIO/gff.pm
+    version: 1.005002102
+  Bio::FeatureIO::gtf:
+    file: Bio/FeatureIO/gtf.pm
+    version: 1.005002102
+  Bio::FeatureIO::interpro:
+    file: Bio/FeatureIO/interpro.pm
+    version: 1.005002102
+  Bio::FeatureIO::ptt:
+    file: Bio/FeatureIO/ptt.pm
+    version: 1.005002102
+  Bio::Graph::Edge:
+    file: Bio/Graph/Edge.pm
+    version: 1.005002102
+  Bio::Graph::IO:
+    file: Bio/Graph/IO.pm
+    version: 1.005002102
+  Bio::Graph::IO::dip:
+    file: Bio/Graph/IO/dip.pm
+    version: 1.005002102
+  Bio::Graph::IO::psi_xml:
+    file: Bio/Graph/IO/psi_xml.pm
+    version: 1.005002102
+  Bio::Graph::ProteinGraph:
+    file: Bio/Graph/ProteinGraph.pm
+    version: 1.005002102
+  Bio::Graph::SimpleGraph:
+    file: Bio/Graph/SimpleGraph.pm
+    version: 1.005002102
+  Bio::Graph::SimpleGraph::Traversal:
+    file: Bio/Graph/SimpleGraph/Traversal.pm
+    version: 1.005002102
+  Bio::Graphics:
+    file: Bio/Graphics.pm
+    version: 1.005002102
+  Bio::Graphics::ConfiguratorI:
+    file: Bio/Graphics/ConfiguratorI.pm
+    version: 1.005002102
+  Bio::Graphics::Feature:
+    file: Bio/Graphics/Feature.pm
+    version: 1.005002102
+  Bio::Graphics::FeatureBase:
+    file: Bio/Graphics/FeatureBase.pm
+    version: 1.005002102
+  Bio::Graphics::FeatureFile:
+    file: Bio/Graphics/FeatureFile.pm
+    version: 1.005002102
+  Bio::Graphics::FeatureFile::Iterator:
+    file: Bio/Graphics/FeatureFile/Iterator.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph:
+    file: Bio/Graphics/Glyph.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::Factory:
+    file: Bio/Graphics/Glyph/Factory.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::alignment:
+    file: Bio/Graphics/Glyph/alignment.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::anchored_arrow:
+    file: Bio/Graphics/Glyph/anchored_arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::arrow:
+    file: Bio/Graphics/Glyph/arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::box:
+    file: Bio/Graphics/Glyph/box.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::broken_line:
+    file: Bio/Graphics/Glyph/broken_line.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::cds:
+    file: Bio/Graphics/Glyph/cds.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::christmas_arrow:
+    file: Bio/Graphics/Glyph/christmas_arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::crossbox:
+    file: Bio/Graphics/Glyph/crossbox.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::dashed_line:
+    file: Bio/Graphics/Glyph/dashed_line.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::diamond:
+    file: Bio/Graphics/Glyph/diamond.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::dna:
+    file: Bio/Graphics/Glyph/dna.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::dot:
+    file: Bio/Graphics/Glyph/dot.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::dumbbell:
+    file: Bio/Graphics/Glyph/dumbbell.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::ellipse:
+    file: Bio/Graphics/Glyph/ellipse.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::ex:
+    file: Bio/Graphics/Glyph/ex.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::extending_arrow:
+    file: Bio/Graphics/Glyph/extending_arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::flag:
+    file: Bio/Graphics/Glyph/flag.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::gene:
+    file: Bio/Graphics/Glyph/gene.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::generic:
+    file: Bio/Graphics/Glyph/generic.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::graded_segments:
+    file: Bio/Graphics/Glyph/graded_segments.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::group:
+    file: Bio/Graphics/Glyph/group.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::heterogeneous_segments:
+    file: Bio/Graphics/Glyph/heterogeneous_segments.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::image:
+    file: Bio/Graphics/Glyph/image.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::lightning:
+    file: Bio/Graphics/Glyph/lightning.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::line:
+    file: Bio/Graphics/Glyph/line.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::merge_parts:
+    file: Bio/Graphics/Glyph/merge_parts.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::merged_alignment:
+    file: Bio/Graphics/Glyph/merged_alignment.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::minmax:
+    file: Bio/Graphics/Glyph/minmax.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::oval:
+    file: Bio/Graphics/Glyph/oval.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::pentagram:
+    file: Bio/Graphics/Glyph/pentagram.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::pinsertion:
+    file: Bio/Graphics/Glyph/pinsertion.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::primers:
+    file: Bio/Graphics/Glyph/primers.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::processed_transcript:
+    file: Bio/Graphics/Glyph/processed_transcript.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::protein:
+    file: Bio/Graphics/Glyph/protein.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::ragged_ends:
+    file: Bio/Graphics/Glyph/ragged_ends.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::redgreen_box:
+    file: Bio/Graphics/Glyph/redgreen_box.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::redgreen_segment:
+    file: Bio/Graphics/Glyph/redgreen_segment.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::repeating_shape:
+    file: Bio/Graphics/Glyph/repeating_shape.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::rndrect:
+    file: Bio/Graphics/Glyph/rndrect.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::ruler_arrow:
+    file: Bio/Graphics/Glyph/ruler_arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::saw_teeth:
+    file: Bio/Graphics/Glyph/saw_teeth.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::segmented_keyglyph:
+    file: Bio/Graphics/Glyph/segmented_keyglyph.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::segments:
+    file: Bio/Graphics/Glyph/segments.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::so_transcript:
+    file: Bio/Graphics/Glyph/so_transcript.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::span:
+    file: Bio/Graphics/Glyph/span.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::splice_site:
+    file: Bio/Graphics/Glyph/splice_site.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::text_in_box:
+    file: Bio/Graphics/Glyph/text_in_box.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::three_letters:
+    file: Bio/Graphics/Glyph/three_letters.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::tic_tac_toe:
+    file: Bio/Graphics/Glyph/tic_tac_toe.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::toomany:
+    file: Bio/Graphics/Glyph/toomany.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::track:
+    file: Bio/Graphics/Glyph/track.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::transcript:
+    file: Bio/Graphics/Glyph/transcript.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::transcript2:
+    file: Bio/Graphics/Glyph/transcript2.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::translation:
+    file: Bio/Graphics/Glyph/translation.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::triangle:
+    file: Bio/Graphics/Glyph/triangle.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::two_bolts:
+    file: Bio/Graphics/Glyph/two_bolts.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::wave:
+    file: Bio/Graphics/Glyph/wave.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::weighted_arrow:
+    file: Bio/Graphics/Glyph/weighted_arrow.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::whiskerplot:
+    file: Bio/Graphics/Glyph/whiskerplot.pm
+    version: 1.005002102
+  Bio::Graphics::Glyph::xyplot:
+    file: Bio/Graphics/Glyph/xyplot.pm
+    version: 1.005002102
+  Bio::Graphics::Panel:
+    file: Bio/Graphics/Panel.pm
+    version: 1.005002102
+  Bio::Graphics::Pictogram:
+    file: Bio/Graphics/Pictogram.pm
+    version: 1.005002102
+  Bio::Graphics::RendererI:
+    file: Bio/Graphics/RendererI.pm
+    version: 1.005002102
+  Bio::Graphics::Util:
+    file: Bio/Graphics/Util.pm
+    version: 1.005002102
+  Bio::IdCollectionI:
+    file: Bio/IdCollectionI.pm
+    version: 1.005002102
+  Bio::IdentifiableI:
+    file: Bio/IdentifiableI.pm
+    version: 1.005002102
+  Bio::Index::Abstract:
+    file: Bio/Index/Abstract.pm
+    version: 1.005002102
+  Bio::Index::AbstractSeq:
+    file: Bio/Index/AbstractSeq.pm
+    version: 1.005002102
+  Bio::Index::Blast:
+    file: Bio/Index/Blast.pm
+    version: 1.005002102
+  Bio::Index::EMBL:
+    file: Bio/Index/EMBL.pm
+    version: 1.005002102
+  Bio::Index::Fasta:
+    file: Bio/Index/Fasta.pm
+    version: 1.005002102
+  Bio::Index::Fastq:
+    file: Bio/Index/Fastq.pm
+    version: 1.005002102
+  Bio::Index::GenBank:
+    file: Bio/Index/GenBank.pm
+    version: 1.005002102
+  Bio::Index::Hmmer:
+    file: Bio/Index/Hmmer.pm
+    version: 1.005002102
+  Bio::Index::Qual:
+    file: Bio/Index/Qual.pm
+    version: 1.005002102
+  Bio::Index::SwissPfam:
+    file: Bio/Index/SwissPfam.pm
+    version: 1.005002102
+  Bio::Index::Swissprot:
+    file: Bio/Index/Swissprot.pm
+    version: 1.005002102
+  Bio::LiveSeq::AARange:
+    file: Bio/LiveSeq/AARange.pm
+    version: 1.005002102
+  Bio::LiveSeq::Chain:
+    file: Bio/LiveSeq/Chain.pm
+    version: 1.005002102
+  Bio::LiveSeq::ChainI:
+    file: Bio/LiveSeq/ChainI.pm
+    version: 1.005002102
+  Bio::LiveSeq::DNA:
+    file: Bio/LiveSeq/DNA.pm
+    version: 1.005002102
+  Bio::LiveSeq::Exon:
+    file: Bio/LiveSeq/Exon.pm
+    version: 1.005002102
+  Bio::LiveSeq::Gene:
+    file: Bio/LiveSeq/Gene.pm
+    version: 1.005002102
+  Bio::LiveSeq::IO::BioPerl:
+    file: Bio/LiveSeq/IO/BioPerl.pm
+    version: 1.005002102
+  Bio::LiveSeq::IO::Loader:
+    file: Bio/LiveSeq/IO/Loader.pm
+    version: 1.005002102
+  Bio::LiveSeq::Intron:
+    file: Bio/LiveSeq/Intron.pm
+    version: 1.005002102
+  Bio::LiveSeq::Mutation:
+    file: Bio/LiveSeq/Mutation.pm
+    version: 1.005002102
+  Bio::LiveSeq::Mutator:
+    file: Bio/LiveSeq/Mutator.pm
+    version: 1.005002102
+  Bio::LiveSeq::Prim_Transcript:
+    file: Bio/LiveSeq/Prim_Transcript.pm
+    version: 1.005002102
+  Bio::LiveSeq::Range:
+    file: Bio/LiveSeq/Range.pm
+    version: 1.005002102
+  Bio::LiveSeq::Repeat_Region:
+    file: Bio/LiveSeq/Repeat_Region.pm
+    version: 1.005002102
+  Bio::LiveSeq::Repeat_Unit:
+    file: Bio/LiveSeq/Repeat_Unit.pm
+    version: 1.005002102
+  Bio::LiveSeq::SeqI:
+    file: Bio/LiveSeq/SeqI.pm
+    version: 1.005002102
+  Bio::LiveSeq::Transcript:
+    file: Bio/LiveSeq/Transcript.pm
+    version: 1.005002102
+  Bio::LiveSeq::Translation:
+    file: Bio/LiveSeq/Translation.pm
+    version: 1.005002102
+  Bio::LocatableSeq:
+    file: Bio/LocatableSeq.pm
+    version: 1.005002102
+  Bio::Location::Atomic:
+    file: Bio/Location/Atomic.pm
+    version: 1.005002102
+  Bio::Location::AvWithinCoordPolicy:
+    file: Bio/Location/AvWithinCoordPolicy.pm
+    version: 1.005002102
+  Bio::Location::CoordinatePolicyI:
+    file: Bio/Location/CoordinatePolicyI.pm
+    version: 1.005002102
+  Bio::Location::Fuzzy:
+    file: Bio/Location/Fuzzy.pm
+    version: 1.005002102
+  Bio::Location::FuzzyLocationI:
+    file: Bio/Location/FuzzyLocationI.pm
+    version: 1.005002102
+  Bio::Location::NarrowestCoordPolicy:
+    file: Bio/Location/NarrowestCoordPolicy.pm
+    version: 1.005002102
+  Bio::Location::Simple:
+    file: Bio/Location/Simple.pm
+    version: 1.005002102
+  Bio::Location::Split:
+    file: Bio/Location/Split.pm
+    version: 1.005002102
+  Bio::Location::SplitLocationI:
+    file: Bio/Location/SplitLocationI.pm
+    version: 1.005002102
+  Bio::Location::WidestCoordPolicy:
+    file: Bio/Location/WidestCoordPolicy.pm
+    version: 1.005002102
+  Bio::LocationI:
+    file: Bio/LocationI.pm
+    version: 1.005002102
+  Bio::Map::Clone:
+    file: Bio/Map/Clone.pm
+    version: 1.005002102
+  Bio::Map::Contig:
+    file: Bio/Map/Contig.pm
+    version: 1.005002102
+  Bio::Map::CytoMap:
+    file: Bio/Map/CytoMap.pm
+    version: 1.005002102
+  Bio::Map::CytoMarker:
+    file: Bio/Map/CytoMarker.pm
+    version: 1.005002102
+  Bio::Map::CytoPosition:
+    file: Bio/Map/CytoPosition.pm
+    version: 1.005002102
+  Bio::Map::EntityI:
+    file: Bio/Map/EntityI.pm
+    version: 1.005002102
+  Bio::Map::FPCMarker:
+    file: Bio/Map/FPCMarker.pm
+    version: 1.005002102
+  Bio::Map::LinkageMap:
+    file: Bio/Map/LinkageMap.pm
+    version: 1.005002102
+  Bio::Map::LinkagePosition:
+    file: Bio/Map/LinkagePosition.pm
+    version: 1.005002102
+  Bio::Map::MapI:
+    file: Bio/Map/MapI.pm
+    version: 1.005002102
+  Bio::Map::Mappable:
+    file: Bio/Map/Mappable.pm
+    version: 1.005002102
+  Bio::Map::MappableI:
+    file: Bio/Map/MappableI.pm
+    version: 1.005002102
+  Bio::Map::Marker:
+    file: Bio/Map/Marker.pm
+    version: 1.005002102
+  Bio::Map::MarkerI:
+    file: Bio/Map/MarkerI.pm
+    version: 1.005002102
+  Bio::Map::Microsatellite:
+    file: Bio/Map/Microsatellite.pm
+    version: 1.005002102
+  Bio::Map::OrderedPosition:
+    file: Bio/Map/OrderedPosition.pm
+    version: 1.005002102
+  Bio::Map::OrderedPositionWithDistance:
+    file: Bio/Map/OrderedPositionWithDistance.pm
+    version: 1.005002102
+  Bio::Map::Physical:
+    file: Bio/Map/Physical.pm
+    version: 1.005002102
+  Bio::Map::Position:
+    file: Bio/Map/Position.pm
+    version: 1.005002102
+  Bio::Map::PositionHandler:
+    file: Bio/Map/PositionHandler.pm
+    version: 1.005002102
+  Bio::Map::PositionHandlerI:
+    file: Bio/Map/PositionHandlerI.pm
+    version: 1.005002102
+  Bio::Map::PositionI:
+    file: Bio/Map/PositionI.pm
+    version: 1.005002102
+  Bio::Map::Relative:
+    file: Bio/Map/Relative.pm
+    version: 1.005002102
+  Bio::Map::RelativeI:
+    file: Bio/Map/RelativeI.pm
+    version: 1.005002102
+  Bio::Map::SimpleMap:
+    file: Bio/Map/SimpleMap.pm
+    version: 1.005002102
+  Bio::MapIO:
+    file: Bio/MapIO.pm
+    version: 1.005002102
+  Bio::MapIO::fpc:
+    file: Bio/MapIO/fpc.pm
+    version: 1.005002102
+  Bio::MapIO::mapmaker:
+    file: Bio/MapIO/mapmaker.pm
+    version: 1.005002102
+  Bio::Matrix::Generic:
+    file: Bio/Matrix/Generic.pm
+    version: 1.005002102
+  Bio::Matrix::IO:
+    file: Bio/Matrix/IO.pm
+    version: 1.005002102
+  Bio::Matrix::IO::phylip:
+    file: Bio/Matrix/IO/phylip.pm
+    version: 1.005002102
+  Bio::Matrix::IO::scoring:
+    file: Bio/Matrix/IO/scoring.pm
+    version: 1.005002102
+  Bio::Matrix::MatrixI:
+    file: Bio/Matrix/MatrixI.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO:
+    file: Bio/Matrix/PSM/IO.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO::mast:
+    file: Bio/Matrix/PSM/IO/mast.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO::masta:
+    file: Bio/Matrix/PSM/IO/masta.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO::meme:
+    file: Bio/Matrix/PSM/IO/meme.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO::psiblast:
+    file: Bio/Matrix/PSM/IO/psiblast.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::IO::transfac:
+    file: Bio/Matrix/PSM/IO/transfac.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::InstanceSite:
+    file: Bio/Matrix/PSM/InstanceSite.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::InstanceSiteI:
+    file: Bio/Matrix/PSM/InstanceSiteI.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::ProtMatrix:
+    file: Bio/Matrix/PSM/ProtMatrix.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::ProtPsm:
+    file: Bio/Matrix/PSM/ProtPsm.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::Psm:
+    file: Bio/Matrix/PSM/Psm.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::PsmHeader:
+    file: Bio/Matrix/PSM/PsmHeader.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::PsmHeaderI:
+    file: Bio/Matrix/PSM/PsmHeaderI.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::PsmI:
+    file: Bio/Matrix/PSM/PsmI.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::SiteMatrix:
+    file: Bio/Matrix/PSM/SiteMatrix.pm
+    version: 1.005002102
+  Bio::Matrix::PSM::SiteMatrixI:
+    file: Bio/Matrix/PSM/SiteMatrixI.pm
+    version: 1.005002102
+  Bio::Matrix::PhylipDist:
+    file: Bio/Matrix/PhylipDist.pm
+    version: 1.005002102
+  Bio::Matrix::Scoring:
+    file: Bio/Matrix/Scoring.pm
+    version: 1.005002102
+  Bio::Ontology::DocumentRegistry:
+    file: Bio/Ontology/DocumentRegistry.pm
+    version: 1.005002102
+  Bio::Ontology::GOterm:
+    file: Bio/Ontology/GOterm.pm
+    version: 1.005002102
+  Bio::Ontology::InterProTerm:
+    file: Bio/Ontology/InterProTerm.pm
+    version: 1.005002102
+  Bio::Ontology::OBOEngine:
+    file: Bio/Ontology/OBOEngine.pm
+    version: 1.005002102
+  Bio::Ontology::OBOterm:
+    file: Bio/Ontology/OBOterm.pm
+    version: 1.005002102
+  Bio::Ontology::Ontology:
+    file: Bio/Ontology/Ontology.pm
+    version: 1.005002102
+  Bio::Ontology::OntologyEngineI:
+    file: Bio/Ontology/OntologyEngineI.pm
+    version: 1.005002102
+  Bio::Ontology::OntologyI:
+    file: Bio/Ontology/OntologyI.pm
+    version: 1.005002102
+  Bio::Ontology::OntologyStore:
+    file: Bio/Ontology/OntologyStore.pm
+    version: 1.005002102
+  Bio::Ontology::Path:
+    file: Bio/Ontology/Path.pm
+    version: 1.005002102
+  Bio::Ontology::PathI:
+    file: Bio/Ontology/PathI.pm
+    version: 1.005002102
+  Bio::Ontology::Relationship:
+    file: Bio/Ontology/Relationship.pm
+    version: 1.005002102
+  Bio::Ontology::RelationshipFactory:
+    file: Bio/Ontology/RelationshipFactory.pm
+    version: 1.005002102
+  Bio::Ontology::RelationshipI:
+    file: Bio/Ontology/RelationshipI.pm
+    version: 1.005002102
+  Bio::Ontology::RelationshipType:
+    file: Bio/Ontology/RelationshipType.pm
+    version: 1.005002102
+  Bio::Ontology::SimpleGOEngine:
+    file: Bio/Ontology/SimpleGOEngine.pm
+    version: 1.005002102
+  Bio::Ontology::SimpleGOEngine::GraphAdaptor:
+    file: Bio/Ontology/SimpleGOEngine/GraphAdaptor.pm
+    version: 1.005002102
+  Bio::Ontology::SimpleGOEngine::GraphAdaptor02:
+    file: Bio/Ontology/SimpleGOEngine/GraphAdaptor02.pm
+    version: 1.005002102
+  Bio::Ontology::SimpleOntologyEngine:
+    file: Bio/Ontology/SimpleOntologyEngine.pm
+    version: 1.005002102
+  Bio::Ontology::Term:
+    file: Bio/Ontology/Term.pm
+    version: 1.005002102
+  Bio::Ontology::TermFactory:
+    file: Bio/Ontology/TermFactory.pm
+    version: 1.005002102
+  Bio::Ontology::TermI:
+    file: Bio/Ontology/TermI.pm
+    version: 1.005002102
+  Bio::OntologyIO:
+    file: Bio/OntologyIO.pm
+    version: 1.005002102
+  Bio::OntologyIO::Handlers::BaseSAXHandler:
+    file: Bio/OntologyIO/Handlers/BaseSAXHandler.pm
+    version: 1.005002102
+  Bio::OntologyIO::Handlers::InterProHandler:
+    file: Bio/OntologyIO/Handlers/InterProHandler.pm
+    version: 1.005002102
+  Bio::OntologyIO::Handlers::InterPro_BioSQL_Handler:
+    file: Bio/OntologyIO/Handlers/InterPro_BioSQL_Handler.pm
+    version: 1.005002102
+  Bio::OntologyIO::InterProParser:
+    file: Bio/OntologyIO/InterProParser.pm
+    version: 1.005002102
+  Bio::OntologyIO::dagflat:
+    file: Bio/OntologyIO/dagflat.pm
+    version: 1.005002102
+  Bio::OntologyIO::goflat:
+    file: Bio/OntologyIO/goflat.pm
+    version: 1.005002102
+  Bio::OntologyIO::obo:
+    file: Bio/OntologyIO/obo.pm
+    version: 1.005002102
+  Bio::OntologyIO::simplehierarchy:
+    file: Bio/OntologyIO/simplehierarchy.pm
+    version: 1.005002102
+  Bio::OntologyIO::soflat:
+    file: Bio/OntologyIO/soflat.pm
+    version: 1.005002102
+  Bio::Perl:
+    file: Bio/Perl.pm
+    version: 1.005002102
+  Bio::Phenotype::Correlate:
+    file: Bio/Phenotype/Correlate.pm
+    version: 1.005002102
+  Bio::Phenotype::MeSH::Term:
+    file: Bio/Phenotype/MeSH/Term.pm
+    version: 1.005002102
+  Bio::Phenotype::MeSH::Twig:
+    file: Bio/Phenotype/MeSH/Twig.pm
+    version: 1.005002102
+  Bio::Phenotype::Measure:
+    file: Bio/Phenotype/Measure.pm
+    version: 1.005002102
+  Bio::Phenotype::OMIM::MiniMIMentry:
+    file: Bio/Phenotype/OMIM/MiniMIMentry.pm
+    version: 1.005002102
+  Bio::Phenotype::OMIM::OMIMentry:
+    file: Bio/Phenotype/OMIM/OMIMentry.pm
+    version: 1.005002102
+  Bio::Phenotype::OMIM::OMIMentryAllelicVariant:
+    file: Bio/Phenotype/OMIM/OMIMentryAllelicVariant.pm
+    version: 1.005002102
+  Bio::Phenotype::OMIM::OMIMparser:
+    file: Bio/Phenotype/OMIM/OMIMparser.pm
+    version: 1.005002102
+  Bio::Phenotype::Phenotype:
+    file: Bio/Phenotype/Phenotype.pm
+    version: 1.005002102
+  Bio::Phenotype::PhenotypeI:
+    file: Bio/Phenotype/PhenotypeI.pm
+    version: 1.005002102
+  Bio::PopGen::Genotype:
+    file: Bio/PopGen/Genotype.pm
+    version: 1.005002102
+  Bio::PopGen::GenotypeI:
+    file: Bio/PopGen/GenotypeI.pm
+    version: 1.005002102
+  Bio::PopGen::HtSNP:
+    file: Bio/PopGen/HtSNP.pm
+    version: 1.005002102
+  Bio::PopGen::IO:
+    file: Bio/PopGen/IO.pm
+    version: 1.005002102
+  Bio::PopGen::IO::csv:
+    file: Bio/PopGen/IO/csv.pm
+    version: 1.005002102
+  Bio::PopGen::IO::hapmap:
+    file: Bio/PopGen/IO/hapmap.pm
+    version: 1.005002102
+  Bio::PopGen::IO::phase:
+    file: Bio/PopGen/IO/phase.pm
+    version: 1.005002102
+  Bio::PopGen::IO::prettybase:
+    file: Bio/PopGen/IO/prettybase.pm
+    version: 1.005002102
+  Bio::PopGen::Individual:
+    file: Bio/PopGen/Individual.pm
+    version: 1.005002102
+  Bio::PopGen::IndividualI:
+    file: Bio/PopGen/IndividualI.pm
+    version: 1.005002102
+  Bio::PopGen::Marker:
+    file: Bio/PopGen/Marker.pm
+    version: 1.005002102
+  Bio::PopGen::MarkerI:
+    file: Bio/PopGen/MarkerI.pm
+    version: 1.005002102
+  Bio::PopGen::PopStats:
+    file: Bio/PopGen/PopStats.pm
+    version: 1.005002102
+  Bio::PopGen::Population:
+    file: Bio/PopGen/Population.pm
+    version: 1.005002102
+  Bio::PopGen::PopulationI:
+    file: Bio/PopGen/PopulationI.pm
+    version: 1.005002102
+  Bio::PopGen::Simulation::Coalescent:
+    file: Bio/PopGen/Simulation/Coalescent.pm
+    version: 1.005002102
+  Bio::PopGen::Simulation::GeneticDrift:
+    file: Bio/PopGen/Simulation/GeneticDrift.pm
+    version: 1.005002102
+  Bio::PopGen::Statistics:
+    file: Bio/PopGen/Statistics.pm
+    version: 1.005002102
+  Bio::PopGen::TagHaplotype:
+    file: Bio/PopGen/TagHaplotype.pm
+    version: 1.005002102
+  Bio::PopGen::Utilities:
+    file: Bio/PopGen/Utilities.pm
+    version: 1.005002102
+  Bio::PrimarySeq:
+    file: Bio/PrimarySeq.pm
+    version: 1.005002102
+  Bio::PrimarySeq::Fasta:
+    file: Bio/DB/Fasta.pm
+    version: 1.005002102
+  Bio::PrimarySeqI:
+    file: Bio/PrimarySeqI.pm
+    version: 1.005002102
+  Bio::PullParserI:
+    file: Bio/PullParserI.pm
+    version: 1.005002102
+  Bio::Range:
+    file: Bio/Range.pm
+    version: 1.005002102
+  Bio::RangeI:
+    file: Bio/RangeI.pm
+    version: 1.005002102
+  Bio::Restriction::Analysis:
+    file: Bio/Restriction/Analysis.pm
+    version: 1.005002102
+  Bio::Restriction::Enzyme:
+    file: Bio/Restriction/Enzyme.pm
+    version: 1.005002102
+  Bio::Restriction::Enzyme::MultiCut:
+    file: Bio/Restriction/Enzyme/MultiCut.pm
+    version: 1.005002102
+  Bio::Restriction::Enzyme::MultiSite:
+    file: Bio/Restriction/Enzyme/MultiSite.pm
+    version: 1.005002102
+  Bio::Restriction::EnzymeCollection:
+    file: Bio/Restriction/EnzymeCollection.pm
+    version: 1.005002102
+  Bio::Restriction::EnzymeI:
+    file: Bio/Restriction/EnzymeI.pm
+    version: 1.005002102
+  Bio::Restriction::IO:
+    file: Bio/Restriction/IO.pm
+    version: 1.005002102
+  Bio::Restriction::IO::bairoch:
+    file: Bio/Restriction/IO/bairoch.pm
+    version: 1.005002102
+  Bio::Restriction::IO::base:
+    file: Bio/Restriction/IO/base.pm
+    version: 1.005002102
+  Bio::Restriction::IO::itype2:
+    file: Bio/Restriction/IO/itype2.pm
+    version: 1.005002102
+  Bio::Restriction::IO::withrefm:
+    file: Bio/Restriction/IO/withrefm.pm
+    version: 1.005002102
+  Bio::Root::Exception:
+    file: Bio/Root/Exception.pm
+    version: 1.005002102
+  Bio::Root::HTTPget:
+    file: Bio/Root/HTTPget.pm
+    version: 1.005002102
+  Bio::Root::IO:
+    file: Bio/Root/IO.pm
+    version: 1.005002102
+  Bio::Root::Root:
+    file: Bio/Root/Root.pm
+    version: 1.005002102
+  Bio::Root::RootI:
+    file: Bio/Root/RootI.pm
+    version: 1.005002102
+  Bio::Root::Storable:
+    file: Bio/Root/Storable.pm
+    version: 1.005002102
+  Bio::Root::Version:
+    file: Bio/Root/Version.pm
+    version: 1.005002102
+  Bio::Search::BlastStatistics:
+    file: Bio/Search/BlastStatistics.pm
+    version: 1.005002102
+  Bio::Search::BlastUtils:
+    file: Bio/Search/BlastUtils.pm
+    version: 1.005002102
+  Bio::Search::DatabaseI:
+    file: Bio/Search/DatabaseI.pm
+    version: 1.005002102
+  Bio::Search::GenericDatabase:
+    file: Bio/Search/GenericDatabase.pm
+    version: 1.005002102
+  Bio::Search::GenericStatistics:
+    file: Bio/Search/GenericStatistics.pm
+    version: 1.005002102
+  Bio::Search::HSP::BlastHSP:
+    file: Bio/Search/HSP/BlastHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::FastaHSP:
+    file: Bio/Search/HSP/FastaHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::GenericHSP:
+    file: Bio/Search/HSP/GenericHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::HMMERHSP:
+    file: Bio/Search/HSP/HMMERHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::HSPFactory:
+    file: Bio/Search/HSP/HSPFactory.pm
+    version: 1.005002102
+  Bio::Search::HSP::HSPI:
+    file: Bio/Search/HSP/HSPI.pm
+    version: 1.005002102
+  Bio::Search::HSP::HmmpfamHSP:
+    file: Bio/Search/HSP/HmmpfamHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::PSLHSP:
+    file: Bio/Search/HSP/PSLHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::PsiBlastHSP:
+    file: Bio/Search/HSP/PsiBlastHSP.pm
+    version: 1.005002102
+  Bio::Search::HSP::PullHSPI:
+    file: Bio/Search/HSP/PullHSPI.pm
+    version: 1.005002102
+  Bio::Search::HSP::WABAHSP:
+    file: Bio/Search/HSP/WABAHSP.pm
+    version: 1.005002102
+  Bio::Search::Hit::BlastHit:
+    file: Bio/Search/Hit/BlastHit.pm
+    version: 1.005002102
+  Bio::Search::Hit::Fasta:
+    file: Bio/Search/Hit/Fasta.pm
+    version: 1.005002102
+  Bio::Search::Hit::GenericHit:
+    file: Bio/Search/Hit/GenericHit.pm
+    version: 1.005002102
+  Bio::Search::Hit::HMMERHit:
+    file: Bio/Search/Hit/HMMERHit.pm
+    version: 1.005002102
+  Bio::Search::Hit::HitFactory:
+    file: Bio/Search/Hit/HitFactory.pm
+    version: 1.005002102
+  Bio::Search::Hit::HitI:
+    file: Bio/Search/Hit/HitI.pm
+    version: 1.005002102
+  Bio::Search::Hit::HmmpfamHit:
+    file: Bio/Search/Hit/HmmpfamHit.pm
+    version: 1.005002102
+  Bio::Search::Hit::PsiBlastHit:
+    file: Bio/Search/Hit/PsiBlastHit.pm
+    version: 1.005002102
+  Bio::Search::Hit::PullHitI:
+    file: Bio/Search/Hit/PullHitI.pm
+    version: 1.005002102
+  Bio::Search::Iteration::GenericIteration:
+    file: Bio/Search/Iteration/GenericIteration.pm
+    version: 1.005002102
+  Bio::Search::Iteration::IterationI:
+    file: Bio/Search/Iteration/IterationI.pm
+    version: 1.005002102
+  Bio::Search::Processor:
+    file: Bio/Search/Processor.pm
+    version: 1.005002102
+  Bio::Search::Result::BlastResult:
+    file: Bio/Search/Result/BlastResult.pm
+    version: 1.005002102
+  Bio::Search::Result::GenericResult:
+    file: Bio/Search/Result/GenericResult.pm
+    version: 1.005002102
+  Bio::Search::Result::HMMERResult:
+    file: Bio/Search/Result/HMMERResult.pm
+    version: 1.005002102
+  Bio::Search::Result::HmmpfamResult:
+    file: Bio/Search/Result/HmmpfamResult.pm
+    version: 1.005002102
+  Bio::Search::Result::PullResultI:
+    file: Bio/Search/Result/PullResultI.pm
+    version: 1.005002102
+  Bio::Search::Result::ResultFactory:
+    file: Bio/Search/Result/ResultFactory.pm
+    version: 1.005002102
+  Bio::Search::Result::ResultI:
+    file: Bio/Search/Result/ResultI.pm
+    version: 1.005002102
+  Bio::Search::Result::WABAResult:
+    file: Bio/Search/Result/WABAResult.pm
+    version: 1.005002102
+  Bio::Search::SearchUtils:
+    file: Bio/Search/SearchUtils.pm
+    version: 1.005002102
+  Bio::Search::StatisticsI:
+    file: Bio/Search/StatisticsI.pm
+    version: 1.005002102
+  Bio::SearchDist:
+    file: Bio/SearchDist.pm
+    version: 1.005002102
+  Bio::SearchIO:
+    file: Bio/SearchIO.pm
+    version: 1.005002102
+  Bio::SearchIO::EventHandlerI:
+    file: Bio/SearchIO/EventHandlerI.pm
+    version: 1.005002102
+  Bio::SearchIO::FastHitEventBuilder:
+    file: Bio/SearchIO/FastHitEventBuilder.pm
+    version: 1.005002102
+  Bio::SearchIO::IteratedSearchResultEventBuilder:
+    file: Bio/SearchIO/IteratedSearchResultEventBuilder.pm
+    version: 1.005002102
+  Bio::SearchIO::SearchResultEventBuilder:
+    file: Bio/SearchIO/SearchResultEventBuilder.pm
+    version: 1.005002102
+  Bio::SearchIO::SearchWriterI:
+    file: Bio/SearchIO/SearchWriterI.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::BSMLResultWriter:
+    file: Bio/SearchIO/Writer/BSMLResultWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::GbrowseGFF:
+    file: Bio/SearchIO/Writer/GbrowseGFF.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::HSPTableWriter:
+    file: Bio/SearchIO/Writer/HSPTableWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::HTMLResultWriter:
+    file: Bio/SearchIO/Writer/HTMLResultWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::HitTableWriter:
+    file: Bio/SearchIO/Writer/HitTableWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::ResultTableWriter:
+    file: Bio/SearchIO/Writer/ResultTableWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::Writer::TextResultWriter:
+    file: Bio/SearchIO/Writer/TextResultWriter.pm
+    version: 1.005002102
+  Bio::SearchIO::axt:
+    file: Bio/SearchIO/axt.pm
+    version: 1.005002102
+  Bio::SearchIO::blast:
+    file: Bio/SearchIO/blast.pm
+    version: 1.005002102
+  Bio::SearchIO::blasttable:
+    file: Bio/SearchIO/blasttable.pm
+    version: 1.005002102
+  Bio::SearchIO::blastxml:
+    file: Bio/SearchIO/blastxml.pm
+    version: 1.005002102
+  Bio::SearchIO::exonerate:
+    file: Bio/SearchIO/exonerate.pm
+    version: 1.005002102
+  Bio::SearchIO::fasta:
+    file: Bio/SearchIO/fasta.pm
+    version: 1.005002102
+  Bio::SearchIO::hmmer:
+    file: Bio/SearchIO/hmmer.pm
+    version: 1.005002102
+  Bio::SearchIO::hmmer_pull:
+    file: Bio/SearchIO/hmmer_pull.pm
+    version: 1.005002102
+  Bio::SearchIO::megablast:
+    file: Bio/SearchIO/megablast.pm
+    version: 1.005002102
+  Bio::SearchIO::psl:
+    file: Bio/SearchIO/psl.pm
+    version: 1.005002102
+  Bio::SearchIO::sim4:
+    file: Bio/SearchIO/sim4.pm
+    version: 1.005002102
+  Bio::SearchIO::waba:
+    file: Bio/SearchIO/waba.pm
+    version: 1.005002102
+  Bio::SearchIO::wise:
+    file: Bio/SearchIO/wise.pm
+    version: 1.005002102
+  Bio::Seq:
+    file: Bio/Seq.pm
+    version: 1.005002102
+  Bio::Seq::BaseSeqProcessor:
+    file: Bio/Seq/BaseSeqProcessor.pm
+    version: 1.005002102
+  Bio::Seq::EncodedSeq:
+    file: Bio/Seq/EncodedSeq.pm
+    version: 1.005002102
+  Bio::Seq::LargeLocatableSeq:
+    file: Bio/Seq/LargeLocatableSeq.pm
+    version: 1.005002102
+  Bio::Seq::LargePrimarySeq:
+    file: Bio/Seq/LargePrimarySeq.pm
+    version: 1.005002102
+  Bio::Seq::LargeSeq:
+    file: Bio/Seq/LargeSeq.pm
+    version: 1.005002102
+  Bio::Seq::LargeSeqI:
+    file: Bio/Seq/LargeSeqI.pm
+    version: 1.005002102
+  Bio::Seq::Meta:
+    file: Bio/Seq/Meta.pm
+    version: 1.005002102
+  Bio::Seq::Meta::Array:
+    file: Bio/Seq/Meta/Array.pm
+    version: 1.005002102
+  Bio::Seq::MetaI:
+    file: Bio/Seq/MetaI.pm
+    version: 1.005002102
+  Bio::Seq::PrimaryQual:
+    file: Bio/Seq/PrimaryQual.pm
+    version: 1.005002102
+  Bio::Seq::PrimedSeq:
+    file: Bio/Seq/PrimedSeq.pm
+    version: 1.005002102
+  Bio::Seq::QualI:
+    file: Bio/Seq/QualI.pm
+    version: 1.005002102
+  Bio::Seq::Quality:
+    file: Bio/Seq/Quality.pm
+    version: 1.005002102
+  Bio::Seq::RichSeq:
+    file: Bio/Seq/RichSeq.pm
+    version: 1.005002102
+  Bio::Seq::RichSeqI:
+    file: Bio/Seq/RichSeqI.pm
+    version: 1.005002102
+  Bio::Seq::SeqBuilder:
+    file: Bio/Seq/SeqBuilder.pm
+    version: 1.005002102
+  Bio::Seq::SeqFactory:
+    file: Bio/Seq/SeqFactory.pm
+    version: 1.005002102
+  Bio::Seq::SeqFastaSpeedFactory:
+    file: Bio/Seq/SeqFastaSpeedFactory.pm
+    version: 1.005002102
+  Bio::Seq::SeqWithQuality:
+    file: Bio/Seq/SeqWithQuality.pm
+    version: 1.005002102
+  Bio::Seq::SequenceTrace:
+    file: Bio/Seq/SequenceTrace.pm
+    version: 1.005002102
+  Bio::Seq::TraceI:
+    file: Bio/Seq/TraceI.pm
+    version: 1.005002102
+  Bio::SeqAnalysisParserI:
+    file: Bio/SeqAnalysisParserI.pm
+    version: 1.005002102
+  Bio::SeqFeature::Annotated:
+    file: Bio/SeqFeature/Annotated.pm
+    version: 1.005002102
+  Bio::SeqFeature::AnnotationAdaptor:
+    file: Bio/SeqFeature/AnnotationAdaptor.pm
+    version: 1.005002102
+  Bio::SeqFeature::Collection:
+    file: Bio/SeqFeature/Collection.pm
+    version: 1.005002102
+  Bio::SeqFeature::CollectionI:
+    file: Bio/SeqFeature/CollectionI.pm
+    version: 1.005002102
+  Bio::SeqFeature::Computation:
+    file: Bio/SeqFeature/Computation.pm
+    version: 1.005002102
+  Bio::SeqFeature::FeaturePair:
+    file: Bio/SeqFeature/FeaturePair.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::Exon:
+    file: Bio/SeqFeature/Gene/Exon.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::ExonI:
+    file: Bio/SeqFeature/Gene/ExonI.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::GeneStructure:
+    file: Bio/SeqFeature/Gene/GeneStructure.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::GeneStructureI:
+    file: Bio/SeqFeature/Gene/GeneStructureI.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::Intron:
+    file: Bio/SeqFeature/Gene/Intron.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::NC_Feature:
+    file: Bio/SeqFeature/Gene/NC_Feature.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::Poly_A_site:
+    file: Bio/SeqFeature/Gene/Poly_A_site.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::Promoter:
+    file: Bio/SeqFeature/Gene/Promoter.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::Transcript:
+    file: Bio/SeqFeature/Gene/Transcript.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::TranscriptI:
+    file: Bio/SeqFeature/Gene/TranscriptI.pm
+    version: 1.005002102
+  Bio::SeqFeature::Gene::UTR:
+    file: Bio/SeqFeature/Gene/UTR.pm
+    version: 1.005002102
+  Bio::SeqFeature::Generic:
+    file: Bio/SeqFeature/Generic.pm
+    version: 1.005002102
+  Bio::SeqFeature::PositionProxy:
+    file: Bio/SeqFeature/PositionProxy.pm
+    version: 1.005002102
+  Bio::SeqFeature::Primer:
+    file: Bio/SeqFeature/Primer.pm
+    version: 1.005002102
+  Bio::SeqFeature::SiRNA::Oligo:
+    file: Bio/SeqFeature/SiRNA/Oligo.pm
+    version: 1.005002102
+  Bio::SeqFeature::SiRNA::Pair:
+    file: Bio/SeqFeature/SiRNA/Pair.pm
+    version: 1.005002102
+  Bio::SeqFeature::Similarity:
+    file: Bio/SeqFeature/Similarity.pm
+    version: 1.005002102
+  Bio::SeqFeature::SimilarityPair:
+    file: Bio/SeqFeature/SimilarityPair.pm
+    version: 1.005002102
+  Bio::SeqFeature::Tools::FeatureNamer:
+    file: Bio/SeqFeature/Tools/FeatureNamer.pm
+    version: 1.005002102
+  Bio::SeqFeature::Tools::IDHandler:
+    file: Bio/SeqFeature/Tools/IDHandler.pm
+    version: 1.005002102
+  Bio::SeqFeature::Tools::TypeMapper:
+    file: Bio/SeqFeature/Tools/TypeMapper.pm
+    version: 1.005002102
+  Bio::SeqFeature::Tools::Unflattener:
+    file: Bio/SeqFeature/Tools/Unflattener.pm
+    version: 1.005002102
+  Bio::SeqFeature::TypedSeqFeatureI:
+    file: Bio/SeqFeature/TypedSeqFeatureI.pm
+    version: 1.005002102
+  Bio::SeqFeatureI:
+    file: Bio/SeqFeatureI.pm
+    version: 1.005002102
+  Bio::SeqI:
+    file: Bio/SeqI.pm
+    version: 1.005002102
+  Bio::SeqIO:
+    file: Bio/SeqIO.pm
+    version: 1.005002102
+  Bio::SeqIO::FTHelper:
+    file: Bio/SeqIO/FTHelper.pm
+    version: 1.005002102
+  Bio::SeqIO::MultiFile:
+    file: Bio/SeqIO/MultiFile.pm
+    version: 1.005002102
+  Bio::SeqIO::abi:
+    file: Bio/SeqIO/abi.pm
+    version: 1.005002102
+  Bio::SeqIO::ace:
+    file: Bio/SeqIO/ace.pm
+    version: 1.005002102
+  Bio::SeqIO::agave:
+    file: Bio/SeqIO/agave.pm
+    version: 1.005002102
+  Bio::SeqIO::alf:
+    file: Bio/SeqIO/alf.pm
+    version: 1.005002102
+  Bio::SeqIO::asciitree:
+    file: Bio/SeqIO/asciitree.pm
+    version: 1.005002102
+  Bio::SeqIO::bsml:
+    file: Bio/SeqIO/bsml.pm
+    version: 1.005002102
+  Bio::SeqIO::bsml_sax:
+    file: Bio/SeqIO/bsml_sax.pm
+    version: 1.005002102
+  Bio::SeqIO::chadoxml:
+    file: Bio/SeqIO/chadoxml.pm
+    version: 1.005002102
+  Bio::SeqIO::chaos:
+    file: Bio/SeqIO/chaos.pm
+    version: 1.005002102
+  Bio::SeqIO::chaosxml:
+    file: Bio/SeqIO/chaosxml.pm
+    version: 1.005002102
+  Bio::SeqIO::ctf:
+    file: Bio/SeqIO/ctf.pm
+    version: 1.005002102
+  Bio::SeqIO::embl:
+    file: Bio/SeqIO/embl.pm
+    version: 1.005002102
+  Bio::SeqIO::entrezgene:
+    file: Bio/SeqIO/entrezgene.pm
+    version: 1.005002102
+  Bio::SeqIO::excel:
+    file: Bio/SeqIO/excel.pm
+    version: 1.005002102
+  Bio::SeqIO::exp:
+    file: Bio/SeqIO/exp.pm
+    version: 1.005002102
+  Bio::SeqIO::fasta:
+    file: Bio/SeqIO/fasta.pm
+    version: 1.005002102
+  Bio::SeqIO::fastq:
+    file: Bio/SeqIO/fastq.pm
+    version: 1.005002102
+  Bio::SeqIO::game:
+    file: Bio/SeqIO/game.pm
+    version: 1.005002102
+  Bio::SeqIO::game::featHandler:
+    file: Bio/SeqIO/game/featHandler.pm
+    version: 1.005002102
+  Bio::SeqIO::game::gameHandler:
+    file: Bio/SeqIO/game/gameHandler.pm
+    version: 1.005002102
+  Bio::SeqIO::game::gameSubs:
+    file: Bio/SeqIO/game/gameSubs.pm
+    version: 1.005002102
+  Bio::SeqIO::game::gameWriter:
+    file: Bio/SeqIO/game/gameWriter.pm
+    version: 1.005002102
+  Bio::SeqIO::game::seqHandler:
+    file: Bio/SeqIO/game/seqHandler.pm
+    version: 1.005002102
+  Bio::SeqIO::gcg:
+    file: Bio/SeqIO/gcg.pm
+    version: 1.005002102
+  Bio::SeqIO::genbank:
+    file: Bio/SeqIO/genbank.pm
+    version: 1.005002102
+  Bio::SeqIO::interpro:
+    file: Bio/SeqIO/interpro.pm
+    version: 1.005002102
+  Bio::SeqIO::kegg:
+    file: Bio/SeqIO/kegg.pm
+    version: 1.005002102
+  Bio::SeqIO::largefasta:
+    file: Bio/SeqIO/largefasta.pm
+    version: 1.005002102
+  Bio::SeqIO::lasergene:
+    file: Bio/SeqIO/lasergene.pm
+    version: 1.005002102
+  Bio::SeqIO::locuslink:
+    file: Bio/SeqIO/locuslink.pm
+    version: 1.005002102
+  Bio::SeqIO::metafasta:
+    file: Bio/SeqIO/metafasta.pm
+    version: 1.005002102
+  Bio::SeqIO::phd:
+    file: Bio/SeqIO/phd.pm
+    version: 1.005002102
+  Bio::SeqIO::pir:
+    file: Bio/SeqIO/pir.pm
+    version: 1.005002102
+  Bio::SeqIO::pln:
+    file: Bio/SeqIO/pln.pm
+    version: 1.005002102
+  Bio::SeqIO::qual:
+    file: Bio/SeqIO/qual.pm
+    version: 1.005002102
+  Bio::SeqIO::raw:
+    file: Bio/SeqIO/raw.pm
+    version: 1.005002102
+  Bio::SeqIO::scf:
+    file: Bio/SeqIO/scf.pm
+    version: 1.005002102
+  Bio::SeqIO::strider:
+    file: Bio/SeqIO/strider.pm
+    version: 1.005002102
+  Bio::SeqIO::swiss:
+    file: Bio/SeqIO/swiss.pm
+    version: 1.005002102
+  Bio::SeqIO::tab:
+    file: Bio/SeqIO/tab.pm
+    version: 1.005002102
+  Bio::SeqIO::table:
+    file: Bio/SeqIO/table.pm
+    version: 1.005002102
+  Bio::SeqIO::tigr:
+    file: Bio/SeqIO/tigr.pm
+    version: 1.005002102
+  Bio::SeqIO::tigrxml:
+    file: Bio/SeqIO/tigrxml.pm
+    version: 1.005002102
+  Bio::SeqIO::tinyseq:
+    file: Bio/SeqIO/tinyseq.pm
+    version: 1.005002102
+  Bio::SeqIO::tinyseq::tinyseqHandler:
+    file: Bio/SeqIO/tinyseq/tinyseqHandler.pm
+    version: 1.005002102
+  Bio::SeqIO::ztr:
+    file: Bio/SeqIO/ztr.pm
+    version: 1.005002102
+  Bio::SeqUtils:
+    file: Bio/SeqUtils.pm
+    version: 1.005002102
+  Bio::SimpleAlign:
+    file: Bio/SimpleAlign.pm
+    version: 1.005002102
+  Bio::SimpleAnalysisI:
+    file: Bio/SimpleAnalysisI.pm
+    version: 1.005002102
+  Bio::Species:
+    file: Bio/Species.pm
+    version: 1.005002102
+  Bio::Structure::Atom:
+    file: Bio/Structure/Atom.pm
+    version: 1.005002102
+  Bio::Structure::Chain:
+    file: Bio/Structure/Chain.pm
+    version: 1.005002102
+  Bio::Structure::Entry:
+    file: Bio/Structure/Entry.pm
+    version: 1.005002102
+  Bio::Structure::IO:
+    file: Bio/Structure/IO.pm
+    version: 1.005002102
+  Bio::Structure::IO::pdb:
+    file: Bio/Structure/IO/pdb.pm
+    version: 1.005002102
+  Bio::Structure::Model:
+    file: Bio/Structure/Model.pm
+    version: 1.005002102
+  Bio::Structure::Residue:
+    file: Bio/Structure/Residue.pm
+    version: 1.005002102
+  Bio::Structure::SecStr::DSSP::Res:
+    file: Bio/Structure/SecStr/DSSP/Res.pm
+    version: 1.005002102
+  Bio::Structure::SecStr::STRIDE::Res:
+    file: Bio/Structure/SecStr/STRIDE/Res.pm
+    version: 1.005002102
+  Bio::Structure::StructureI:
+    file: Bio/Structure/StructureI.pm
+    version: 1.005002102
+  Bio::Symbol::Alphabet:
+    file: Bio/Symbol/Alphabet.pm
+    version: 1.005002102
+  Bio::Symbol::AlphabetI:
+    file: Bio/Symbol/AlphabetI.pm
+    version: 1.005002102
+  Bio::Symbol::DNAAlphabet:
+    file: Bio/Symbol/DNAAlphabet.pm
+    version: 1.005002102
+  Bio::Symbol::ProteinAlphabet:
+    file: Bio/Symbol/ProteinAlphabet.pm
+    version: 1.005002102
+  Bio::Symbol::Symbol:
+    file: Bio/Symbol/Symbol.pm
+    version: 1.005002102
+  Bio::Symbol::SymbolI:
+    file: Bio/Symbol/SymbolI.pm
+    version: 1.005002102
+  Bio::Taxon:
+    file: Bio/Taxon.pm
+    version: 1.005002102
+  Bio::Taxonomy:
+    file: Bio/Taxonomy.pm
+    version: 1.005002102
+  Bio::Taxonomy::FactoryI:
+    file: Bio/Taxonomy/FactoryI.pm
+    version: 1.005002102
+  Bio::Taxonomy::Node:
+    file: Bio/Taxonomy/Node.pm
+    version: 1.005002102
+  Bio::Taxonomy::Taxon:
+    file: Bio/Taxonomy/Taxon.pm
+    version: 1.005002102
+  Bio::Taxonomy::Tree:
+    file: Bio/Taxonomy/Tree.pm
+    version: 1.005002102
+  Bio::Tools::AlignFactory:
+    file: Bio/Tools/AlignFactory.pm
+    version: 1.005002102
+  Bio::Tools::Alignment::Consed:
+    file: Bio/Tools/Alignment/Consed.pm
+    version: 1.005002102
+  Bio::Tools::Alignment::Trim:
+    file: Bio/Tools/Alignment/Trim.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::DNA::ESEfinder:
+    file: Bio/Tools/Analysis/DNA/ESEfinder.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::Domcut:
+    file: Bio/Tools/Analysis/Protein/Domcut.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::ELM:
+    file: Bio/Tools/Analysis/Protein/ELM.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::GOR4:
+    file: Bio/Tools/Analysis/Protein/GOR4.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::HNN:
+    file: Bio/Tools/Analysis/Protein/HNN.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::Mitoprot:
+    file: Bio/Tools/Analysis/Protein/Mitoprot.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::NetPhos:
+    file: Bio/Tools/Analysis/Protein/NetPhos.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::Scansite:
+    file: Bio/Tools/Analysis/Protein/Scansite.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::Protein::Sopma:
+    file: Bio/Tools/Analysis/Protein/Sopma.pm
+    version: 1.005002102
+  Bio::Tools::Analysis::SimpleAnalysisBase:
+    file: Bio/Tools/Analysis/SimpleAnalysisBase.pm
+    version: 1.005002102
+  Bio::Tools::AnalysisResult:
+    file: Bio/Tools/AnalysisResult.pm
+    version: 1.005002102
+  Bio::Tools::BPbl2seq:
+    file: Bio/Tools/BPbl2seq.pm
+    version: 1.005002102
+  Bio::Tools::BPlite:
+    file: Bio/Tools/BPlite.pm
+    version: 1.005002102
+  Bio::Tools::BPlite::HSP:
+    file: Bio/Tools/BPlite/HSP.pm
+    version: 1.005002102
+  Bio::Tools::BPlite::Iteration:
+    file: Bio/Tools/BPlite/Iteration.pm
+    version: 1.005002102
+  Bio::Tools::BPlite::Sbjct:
+    file: Bio/Tools/BPlite/Sbjct.pm
+    version: 1.005002102
+  Bio::Tools::BPpsilite:
+    file: Bio/Tools/BPpsilite.pm
+    version: 1.005002102
+  Bio::Tools::Blat:
+    file: Bio/Tools/Blat.pm
+    version: 1.005002102
+  Bio::Tools::CodonTable:
+    file: Bio/Tools/CodonTable.pm
+    version: 1.005002102
+  Bio::Tools::Coil:
+    file: Bio/Tools/Coil.pm
+    version: 1.005002102
+  Bio::Tools::ECnumber:
+    file: Bio/Tools/ECnumber.pm
+    version: 1.005002102
+  Bio::Tools::EMBOSS::Palindrome:
+    file: Bio/Tools/EMBOSS/Palindrome.pm
+    version: 1.005002102
+  Bio::Tools::EPCR:
+    file: Bio/Tools/EPCR.pm
+    version: 1.005002102
+  Bio::Tools::ERPIN:
+    file: Bio/Tools/ERPIN.pm
+    version: 1.005002102
+  Bio::Tools::ESTScan:
+    file: Bio/Tools/ESTScan.pm
+    version: 1.005002102
+  Bio::Tools::Eponine:
+    file: Bio/Tools/Eponine.pm
+    version: 1.005002102
+  Bio::Tools::Est2Genome:
+    file: Bio/Tools/Est2Genome.pm
+    version: 1.005002102
+  Bio::Tools::Fgenesh:
+    file: Bio/Tools/Fgenesh.pm
+    version: 1.005002102
+  Bio::Tools::FootPrinter:
+    file: Bio/Tools/FootPrinter.pm
+    version: 1.005002102
+  Bio::Tools::GFF:
+    file: Bio/Tools/GFF.pm
+    version: 1.005002102
+  Bio::Tools::Gel:
+    file: Bio/Tools/Gel.pm
+    version: 1.005002102
+  Bio::Tools::Geneid:
+    file: Bio/Tools/Geneid.pm
+    version: 1.005002102
+  Bio::Tools::Genemark:
+    file: Bio/Tools/Genemark.pm
+    version: 1.005002102
+  Bio::Tools::Genewise:
+    file: Bio/Tools/Genewise.pm
+    version: 1.005002102
+  Bio::Tools::Genomewise:
+    file: Bio/Tools/Genomewise.pm
+    version: 1.005002102
+  Bio::Tools::Genscan:
+    file: Bio/Tools/Genscan.pm
+    version: 1.005002102
+  Bio::Tools::Glimmer:
+    file: Bio/Tools/Glimmer.pm
+    version: 1.005002102
+  Bio::Tools::Grail:
+    file: Bio/Tools/Grail.pm
+    version: 1.005002102
+  Bio::Tools::GuessSeqFormat:
+    file: Bio/Tools/GuessSeqFormat.pm
+    version: 1.005002102
+  Bio::Tools::HMM:
+    file: Bio/Tools/HMM.pm
+    version: 1.005002102
+  Bio::Tools::HMMER::Domain:
+    file: Bio/Tools/HMMER/Domain.pm
+    version: 1.005002102
+  Bio::Tools::HMMER::Results:
+    file: Bio/Tools/HMMER/Results.pm
+    version: 1.005002102
+  Bio::Tools::HMMER::Set:
+    file: Bio/Tools/HMMER/Set.pm
+    version: 1.005002102
+  Bio::Tools::Hmmpfam:
+    file: Bio/Tools/Hmmpfam.pm
+    version: 1.005002102
+  Bio::Tools::IUPAC:
+    file: Bio/Tools/IUPAC.pm
+    version: 1.005002102
+  Bio::Tools::Lucy:
+    file: Bio/Tools/Lucy.pm
+    version: 1.005002102
+  Bio::Tools::MZEF:
+    file: Bio/Tools/MZEF.pm
+    version: 1.005002102
+  Bio::Tools::OddCodes:
+    file: Bio/Tools/OddCodes.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::Molphy:
+    file: Bio/Tools/Phylo/Molphy.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::Molphy::Result:
+    file: Bio/Tools/Phylo/Molphy/Result.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::PAML:
+    file: Bio/Tools/Phylo/PAML.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::PAML::ModelResult:
+    file: Bio/Tools/Phylo/PAML/ModelResult.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::PAML::Result:
+    file: Bio/Tools/Phylo/PAML/Result.pm
+    version: 1.005002102
+  Bio::Tools::Phylo::Phylip::ProtDist:
+    file: Bio/Tools/Phylo/Phylip/ProtDist.pm
+    version: 1.005002102
+  Bio::Tools::Prediction::Exon:
+    file: Bio/Tools/Prediction/Exon.pm
+    version: 1.005002102
+  Bio::Tools::Prediction::Gene:
+    file: Bio/Tools/Prediction/Gene.pm
+    version: 1.005002102
+  Bio::Tools::Primer3:
+    file: Bio/Tools/Primer3.pm
+    version: 1.005002102
+  Bio::Tools::Primer::Assessor::Base:
+    file: Bio/Tools/Primer/Assessor/Base.pm
+    version: 1.005002102
+  Bio::Tools::Primer::AssessorI:
+    file: Bio/Tools/Primer/AssessorI.pm
+    version: 1.005002102
+  Bio::Tools::Primer::Feature:
+    file: Bio/Tools/Primer/Feature.pm
+    version: 1.005002102
+  Bio::Tools::Primer::Pair:
+    file: Bio/Tools/Primer/Pair.pm
+    version: 1.005002102
+  Bio::Tools::Prints:
+    file: Bio/Tools/Prints.pm
+    version: 1.005002102
+  Bio::Tools::Profile:
+    file: Bio/Tools/Profile.pm
+    version: 1.005002102
+  Bio::Tools::Promoterwise:
+    file: Bio/Tools/Promoterwise.pm
+    version: 1.005002102
+  Bio::Tools::PrositeScan:
+    file: Bio/Tools/PrositeScan.pm
+    version: 1.005002102
+  Bio::Tools::Pseudowise:
+    file: Bio/Tools/Pseudowise.pm
+    version: 1.005002102
+  Bio::Tools::QRNA:
+    file: Bio/Tools/QRNA.pm
+    version: 1.005002102
+  Bio::Tools::RNAMotif:
+    file: Bio/Tools/RNAMotif.pm
+    version: 1.005002102
+  Bio::Tools::RandomDistFunctions:
+    file: Bio/Tools/RandomDistFunctions.pm
+    version: 1.005002102
+  Bio::Tools::RepeatMasker:
+    file: Bio/Tools/RepeatMasker.pm
+    version: 1.005002102
+  Bio::Tools::RestrictionEnzyme:
+    file: Bio/Tools/RestrictionEnzyme.pm
+    version: 1.005002102
+  Bio::Tools::Run::GenericParameters:
+    file: Bio/Tools/Run/GenericParameters.pm
+    version: 1.005002102
+  Bio::Tools::Run::ParametersI:
+    file: Bio/Tools/Run/ParametersI.pm
+    version: 1.005002102
+  Bio::Tools::Run::RemoteBlast:
+    file: Bio/Tools/Run/RemoteBlast.pm
+    version: 1.005002102
+  Bio::Tools::Run::StandAloneBlast:
+    file: Bio/Tools/Run/StandAloneBlast.pm
+    version: 1.005002102
+  Bio::Tools::Run::WrapperBase:
+    file: Bio/Tools/Run/WrapperBase.pm
+    version: 1.005002102
+  Bio::Tools::Seg:
+    file: Bio/Tools/Seg.pm
+    version: 1.005002102
+  Bio::Tools::SeqPattern:
+    file: Bio/Tools/SeqPattern.pm
+    version: 1.005002102
+  Bio::Tools::SeqStats:
+    file: Bio/Tools/SeqStats.pm
+    version: 1.005002102
+  Bio::Tools::SeqWords:
+    file: Bio/Tools/SeqWords.pm
+    version: 1.005002102
+  Bio::Tools::SiRNA:
+    file: Bio/Tools/SiRNA.pm
+    version: 1.005002102
+  Bio::Tools::SiRNA::Ruleset::saigo:
+    file: Bio/Tools/SiRNA/Ruleset/saigo.pm
+    version: 1.005002102
+  Bio::Tools::SiRNA::Ruleset::tuschl:
+    file: Bio/Tools/SiRNA/Ruleset/tuschl.pm
+    version: 1.005002102
+  Bio::Tools::Sigcleave:
+    file: Bio/Tools/Sigcleave.pm
+    version: 1.005002102
+  Bio::Tools::Signalp:
+    file: Bio/Tools/Signalp.pm
+    version: 1.005002102
+  Bio::Tools::Sim4::Exon:
+    file: Bio/Tools/Sim4/Exon.pm
+    version: 1.005002102
+  Bio::Tools::Sim4::Results:
+    file: Bio/Tools/Sim4/Results.pm
+    version: 1.005002102
+  Bio::Tools::Spidey::Exon:
+    file: Bio/Tools/Spidey/Exon.pm
+    version: 1.005002102
+  Bio::Tools::Spidey::Results:
+    file: Bio/Tools/Spidey/Results.pm
+    version: 1.005002102
+  Bio::Tools::Tmhmm:
+    file: Bio/Tools/Tmhmm.pm
+    version: 1.005002102
+  Bio::Tools::dpAlign:
+    file: Bio/Tools/dpAlign.pm
+    version: 1.005002102
+  Bio::Tools::ipcress:
+    file: Bio/Tools/ipcress.pm
+    version: 1.005002102
+  Bio::Tools::isPcr:
+    file: Bio/Tools/isPcr.pm
+    version: 1.005002102
+  Bio::Tools::pICalculator:
+    file: Bio/Tools/pICalculator.pm
+    version: 1.005002102
+  Bio::Tools::pSW:
+    file: Bio/Tools/pSW.pm
+    version: 1.005002102
+  Bio::Tools::tRNAscanSE:
+    file: Bio/Tools/tRNAscanSE.pm
+    version: 1.005002102
+  Bio::Tree::AlleleNode:
+    file: Bio/Tree/AlleleNode.pm
+    version: 1.005002102
+  Bio::Tree::Compatible:
+    file: Bio/Tree/Compatible.pm
+    version: 1.005002102
+  Bio::Tree::DistanceFactory:
+    file: Bio/Tree/DistanceFactory.pm
+    version: 1.005002102
+  Bio::Tree::Draw::Cladogram:
+    file: Bio/Tree/Draw/Cladogram.pm
+    version: 1.005002102
+  Bio::Tree::Node:
+    file: Bio/Tree/Node.pm
+    version: 1.005002102
+  Bio::Tree::NodeI:
+    file: Bio/Tree/NodeI.pm
+    version: 1.005002102
+  Bio::Tree::NodeNHX:
+    file: Bio/Tree/NodeNHX.pm
+    version: 1.005002102
+  Bio::Tree::RandomFactory:
+    file: Bio/Tree/RandomFactory.pm
+    version: 1.005002102
+  Bio::Tree::Statistics:
+    file: Bio/Tree/Statistics.pm
+    version: 1.005002102
+  Bio::Tree::Tree:
+    file: Bio/Tree/Tree.pm
+    version: 1.005002102
+  Bio::Tree::TreeFunctionsI:
+    file: Bio/Tree/TreeFunctionsI.pm
+    version: 1.005002102
+  Bio::Tree::TreeI:
+    file: Bio/Tree/TreeI.pm
+    version: 1.005002102
+  Bio::TreeIO:
+    file: Bio/TreeIO.pm
+    version: 1.005002102
+  Bio::TreeIO::TreeEventBuilder:
+    file: Bio/TreeIO/TreeEventBuilder.pm
+    version: 1.005002102
+  Bio::TreeIO::cluster:
+    file: Bio/TreeIO/cluster.pm
+    version: 1.005002102
+  Bio::TreeIO::lintree:
+    file: Bio/TreeIO/lintree.pm
+    version: 1.005002102
+  Bio::TreeIO::newick:
+    file: Bio/TreeIO/newick.pm
+    version: 1.005002102
+  Bio::TreeIO::nexus:
+    file: Bio/TreeIO/nexus.pm
+    version: 1.005002102
+  Bio::TreeIO::nhx:
+    file: Bio/TreeIO/nhx.pm
+    version: 1.005002102
+  Bio::TreeIO::pag:
+    file: Bio/TreeIO/pag.pm
+    version: 1.005002102
+  Bio::TreeIO::svggraph:
+    file: Bio/TreeIO/svggraph.pm
+    version: 1.005002102
+  Bio::TreeIO::tabtree:
+    file: Bio/TreeIO/tabtree.pm
+    version: 1.005002102
+  Bio::UpdateableSeqI:
+    file: Bio/UpdateableSeqI.pm
+    version: 1.005002102
+  Bio::Variation::AAChange:
+    file: Bio/Variation/AAChange.pm
+    version: 1.005002102
+  Bio::Variation::AAReverseMutate:
+    file: Bio/Variation/AAReverseMutate.pm
+    version: 1.005002102
+  Bio::Variation::Allele:
+    file: Bio/Variation/Allele.pm
+    version: 1.005002102
+  Bio::Variation::DNAMutation:
+    file: Bio/Variation/DNAMutation.pm
+    version: 1.005002102
+  Bio::Variation::IO:
+    file: Bio/Variation/IO.pm
+    version: 1.005002102
+  Bio::Variation::IO::flat:
+    file: Bio/Variation/IO/flat.pm
+    version: 1.005002102
+  Bio::Variation::IO::xml:
+    file: Bio/Variation/IO/xml.pm
+    version: 1.005002102
+  Bio::Variation::RNAChange:
+    file: Bio/Variation/RNAChange.pm
+    version: 1.005002102
+  Bio::Variation::SNP:
+    file: Bio/Variation/SNP.pm
+    version: 1.005002102
+  Bio::Variation::SeqDiff:
+    file: Bio/Variation/SeqDiff.pm
+    version: 1.005002102
+  Bio::Variation::VariantI:
+    file: Bio/Variation/VariantI.pm
+    version: 1.005002102
+  Bio::WebAgent:
+    file: Bio/WebAgent.pm
+    version: 1.005002102
+  FeatureStore:
+    file: Bio/DB/GFF/Adaptor/berkeleydb.pm
+    version: 1.005002102
+generated_by: Module::Build version 0.2806
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.2.html
+  version: 1.2

Added: trunk/packages/bioperl/branches/upstream/current/Makefile.PL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/Makefile.PL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/Makefile.PL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+# Note: this file was auto-generated by Module::Build::Compat version 0.03
+    
+    unless (eval "use Module::Build::Compat 0.02; 1" ) {
+      print "This module requires Module::Build to install itself.\n";
+      
+      require ExtUtils::MakeMaker;
+      my $yn = ExtUtils::MakeMaker::prompt
+	('  Install Module::Build now from CPAN?', 'y');
+      
+      unless ($yn =~ /^y/i) {
+	die " *** Cannot install without Module::Build.  Exiting ...\n";
+      }
+      
+      require Cwd;
+      require File::Spec;
+      require CPAN;
+      
+      # Save this 'cause CPAN will chdir all over the place.
+      my $cwd = Cwd::cwd();
+      
+      CPAN::Shell->install('Module::Build::Compat');
+      CPAN::Shell->expand("Module", "Module::Build::Compat")->uptodate
+	or die "Couldn't install Module::Build, giving up.\n";
+      
+      chdir $cwd or die "Cannot chdir() back to $cwd: $!";
+    }
+    eval "use Module::Build::Compat 0.02; 1" or die $@;
+    use lib '.';
+    Module::Build::Compat->run_build_pl(args => \@ARGV);
+    require ModuleBuildBioperl;
+    Module::Build::Compat->write_makefile(build_class => 'ModuleBuildBioperl');

Added: trunk/packages/bioperl/branches/upstream/current/ModuleBuildBioperl.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/ModuleBuildBioperl.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/ModuleBuildBioperl.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1061 @@
+#!/usr/bin/perl -w
+
+# This is a subclass of Module::Build so we can override certain methods and do
+# fancy stuff
+
+# It was first written against Module::Build::Base v0.2805. Many of the methods
+# here are copy/pasted from there in their entirety just to change one or two
+# minor things, since for the most part Module::Build::Base code is hard to
+# cleanly override.
+
+# This was written by Sendu Bala and is released under the same license as
+# Bioperl itself
+
+package ModuleBuildBioperl;
+
+BEGIN {
+    # we really need Module::Build to be installed
+    unless (eval "use Module::Build 0.2805; 1") {
+        print "This package requires Module::Build v0.2805 or greater to install itself.\n";
+        
+        require ExtUtils::MakeMaker;
+        my $yn = ExtUtils::MakeMaker::prompt('  Install Module::Build now from CPAN?', 'y');
+        
+        unless ($yn =~ /^y/i) {
+            die " *** Cannot install without Module::Build.  Exiting ...\n";
+        }
+        
+        require Cwd;
+        require File::Spec;
+        require File::Copy;
+        require CPAN;
+        
+        # Save this because CPAN will chdir all over the place.
+        my $cwd = Cwd::cwd();
+        
+        my $build_pl = File::Spec->catfile($cwd, "Build.PL");
+        
+        File::Copy::move($build_pl, $build_pl."hidden"); # avoid bizarre bug with Module::Build tests using the wrong Build.PL if it happens to be in PERL5LIB
+        CPAN::Shell->install('Module::Build');
+        File::Copy::move($build_pl."hidden", $build_pl);
+        CPAN::Shell->expand("Module", "Module::Build")->uptodate or die "Couldn't install Module::Build, giving up.\n";
+        
+        chdir $cwd or die "Cannot chdir() back to $cwd: $!\n\n***\nInstallation will probably work fine if you now quit CPAN and try again.\n***\n\n";
+    }
+    
+    eval "use base Module::Build; 1" or die $@;
+    
+    # ensure we'll be able to reload this module later by adding its path to inc
+    use Cwd;
+    use lib Cwd::cwd();
+}
+
+use strict;
+use warnings;
+
+our $VERSION = 1.005002101;
+our @extra_types = qw(options excludes_os feature_requires test); # test must always be last in the list!
+our $checking_types = "requires|conflicts|".join("|", @extra_types);
+
+
+# our modules are in Bio, not lib
+sub find_pm_files {
+    my $self = shift;
+    foreach my $pm (@{$self->rscan_dir('Bio', qr/\.pm$/)}) {
+        $self->{properties}{pm_files}->{$pm} = File::Spec->catfile('lib', $pm);
+    }
+    
+    $self->_find_file_by_type('pm', 'lib');
+}
+
+# ask what scripts to install (this method is unique to bioperl)
+sub choose_scripts {
+    my $self = shift;
+    
+    # we can offer interactive installation by groups only if we have subdirs
+    # in scripts and no .PLS files there
+    opendir(my $scripts_dir, 'scripts') or die "Can't open directory 'scripts': $!\n";
+    my $int_ok = 0;
+    my @group_dirs;
+    while (my $thing = readdir($scripts_dir)) {
+        next if $thing =~ /^\./;
+        next if $thing eq 'CVS';
+        if ($thing =~ /PLS$|pl$/) {
+            $int_ok = 0;
+            last;
+        }
+        $thing = File::Spec->catfile('scripts', $thing);
+        if (-d $thing) {
+            $int_ok = 1;
+            push(@group_dirs, $thing);
+        }
+    }
+    closedir($scripts_dir);
+    my $question = $int_ok ? "Install [a]ll Bioperl scripts, [n]one, or choose groups [i]nteractively?" : "Install [a]ll Bioperl scripts or [n]one?";
+    
+    my $prompt = $self->prompt($question, 'a');
+    
+    if ($prompt =~ /^[aA]/) {
+        $self->log_info("  - will install all scripts\n");
+        $self->notes(chosen_scripts => 'all');
+    }
+    elsif ($prompt =~ /^[iI]/) {
+        $self->log_info("  - will install interactively:\n");
+        
+        my @chosen_scripts;
+        foreach my $group_dir (@group_dirs) {
+            my $group = File::Basename::basename($group_dir);
+            print "    * group '$group' has:\n";
+            
+            my @script_files = @{$self->rscan_dir($group_dir, qr/\.PLS$|\.pl$/)};
+            foreach my $script_file (@script_files) {
+                my $script = File::Basename::basename($script_file);
+                print "      $script\n";
+            }
+            
+            my $result = $self->prompt("    Install scripts for group '$group'? [y]es [n]o [q]uit", 'n');
+            die if $result =~ /^[qQ]/;
+            if ($result =~ /^[yY]/) {
+                $self->log_info("      + will install group '$group'\n");
+                push(@chosen_scripts, @script_files);
+            }
+            else {
+                $self->log_info("      - will not install group '$group'\n");
+            }
+        }
+        
+        my $chosen_scripts = @chosen_scripts ? join("|", @chosen_scripts) : 'none';
+        
+        $self->notes(chosen_scripts => $chosen_scripts);
+    }
+    else {
+        $self->log_info("  - won't install any scripts\n");
+        $self->notes(chosen_scripts => 'none');
+    }
+    
+    print "\n";
+}
+
+# our version of script_files doesn't take args but just installs those scripts
+# requested by the user after choose_scripts() is called. If it wasn't called,
+# installs all scripts in scripts directory
+sub script_files {
+    my $self = shift;
+    
+    my $chosen_scripts = $self->notes('chosen_scripts');
+    if ($chosen_scripts) {
+        return if $chosen_scripts eq 'none';
+        return { map {$_, 1} split(/\|/, $chosen_scripts) } unless $chosen_scripts eq 'all';
+    }
+    
+    return $_ = { map {$_,1} @{$self->rscan_dir('scripts', qr/\.PLS$|\.pl$/)} };
+}
+
+# process scripts normally, except that we change name from *.PLS to bp_*.pl
+sub process_script_files {
+    my $self = shift;
+    my $files = $self->find_script_files;
+    return unless keys %$files;
+  
+    my $script_dir = File::Spec->catdir($self->blib, 'script');
+    File::Path::mkpath( $script_dir );
+    
+    foreach my $file (keys %$files) {
+        my $result = $self->copy_if_modified($file, $script_dir, 'flatten') or next;
+        $self->fix_shebang_line($result) unless $self->os_type eq 'VMS';
+        $self->make_executable($result);
+        
+        my $final = File::Basename::basename($result);
+        $final =~ s/\.PLS$/\.pl/;                  # change from .PLS to .pl
+        $final =~ s/^/bp_/ unless $final =~ /^bp/; # add the "bp" prefix
+        $final = File::Spec->catfile($script_dir, $final);
+        $self->log_info("$result -> $final\n");
+        File::Copy::move($result, $final) or die "Can't rename '$result' to '$final': $!";
+    }
+}
+
+# extended to handle extra checking types
+sub features {
+    my $self = shift;
+    my $ph = $self->{phash};
+    
+    if (@_) {
+        my $key = shift;
+        if ($ph->{features}->exists($key)) {
+            return $ph->{features}->access($key, @_);
+        }
+        
+        if (my $info = $ph->{auto_features}->access($key)) {
+            my $failures = $self->prereq_failures($info);
+            my $disabled = grep( /^(?:\w+_)?(?:$checking_types)$/, keys %$failures ) ? 1 : 0;
+            return !$disabled;
+        }
+        
+        return $ph->{features}->access($key, @_);
+    }
+  
+    # No args - get the auto_features & overlay the regular features
+    my %features;
+    my %auto_features = $ph->{auto_features}->access();
+    while (my ($name, $info) = each %auto_features) {
+        my $failures = $self->prereq_failures($info);
+        my $disabled = grep( /^(?:\w+_)?(?:$checking_types)$/, keys %$failures ) ? 1 : 0;
+        $features{$name} = $disabled ? 0 : 1;
+    }
+    %features = (%features, $ph->{features}->access());
+  
+    return wantarray ? %features : \%features;
+}
+*feature = \&features;
+
+# overridden to fix a stupid bug in Module::Build and extended to handle extra
+# checking types
+sub check_autofeatures {
+    my ($self) = @_;
+    my $features = $self->auto_features;
+    
+    return unless %$features;
+    
+    $self->log_info("Checking features:\n");
+    
+    my $max_name_len = 0; # this wasn't set to 0 in Module::Build, causing warning in next line
+    $max_name_len = ( length($_) > $max_name_len ) ? length($_) : $max_name_len for keys %$features;
+    
+    while (my ($name, $info) = each %$features) {
+        $self->log_info("  $name" . '.' x ($max_name_len - length($name) + 4));
+        if ($name eq 'PL_files') {
+            print "got $name => $info\n";
+            print "info has:\n";
+            while (my ($key, $val) = each %$info) {
+                print "  $key => $val\n";
+            }
+        }
+        
+        if ( my $failures = $self->prereq_failures($info) ) {
+            my $disabled = grep( /^(?:\w+_)?(?:$checking_types)$/, keys %$failures ) ? 1 : 0;
+            $self->log_info( $disabled ? "disabled\n" : "enabled\n" );
+            
+            my $log_text;
+            while (my ($type, $prereqs) = each %$failures) {
+                while (my ($module, $status) = each %$prereqs) {
+                    my $required = ($type =~ /^(?:\w+_)?(?:requires|conflicts)$/) ? 1 : 0;
+                    my $prefix = ($required) ? '-' : '*';
+                    $log_text .= "    $prefix $status->{message}\n";
+                }
+            }
+            $self->log_warn($log_text) if $log_text && ! $self->quiet;
+        }
+        else {
+            $self->log_info("enabled\n");
+        }
+    }
+    
+    $self->log_info("\n");
+}
+
+# overriden just to hide pointless ugly warnings
+sub check_installed_status {
+    my $self = shift;
+    open (my $olderr, ">&", \*STDERR);
+    open(STDERR, "/dev/null");
+    my $return = $self->SUPER::check_installed_status(@_);
+    open(STDERR, ">&", $olderr);
+    return $return;
+}
+
+# extend to handle option checking (which takes an array ref) and code test
+# checking (which takes a code ref and must return a message only on failure)
+# and excludes_os (which takes an array ref of regexps).
+# also handles more informative output of recommends section
+sub prereq_failures {
+    my ($self, $info) = @_;
+    
+    my @types = (@{ $self->prereq_action_types }, @extra_types);
+    $info ||= {map {$_, $self->$_()} @types};
+    
+    my $out = {};
+    foreach my $type (@types) {
+        my $prereqs = $info->{$type} || next;
+        
+        my $status = {};
+        if ($type eq 'test') {
+            unless (keys %$out) {
+                $status->{message} = &{$prereqs};
+                $out->{$type}{'test'} = $status if $status->{message};
+            }
+        }
+        elsif ($type eq 'options') {
+            my @not_ok;
+            foreach my $wanted_option (@{$prereqs}) {
+                unless ($self->args($wanted_option)) {
+                    push(@not_ok, $wanted_option);
+                }
+            }
+            
+            if (@not_ok > 0) {
+                $status->{message} = "Command line option(s) '@not_ok' not supplied";
+                $out->{$type}{'options'} = $status;
+            }
+        }
+        elsif ($type eq 'excludes_os') {
+            foreach my $os (@{$prereqs}) {
+                if ($^O =~ /$os/i) {
+                    $status->{message} = "This feature isn't supported under your OS ($os)";
+                    $out->{$type}{'excludes_os'} = $status;
+                    last;
+                }
+            }
+        }
+        else {
+            while ( my ($modname, $spec) = each %$prereqs ) {
+                $status = $self->check_installed_status($modname, $spec);
+                
+                if ($type =~ /^(?:\w+_)?conflicts$/) {
+                    next if !$status->{ok};
+                    $status->{conflicts} = delete $status->{need};
+                    $status->{message} = "$modname ($status->{have}) conflicts with this distribution";
+                }
+                elsif ($type =~ /^(?:\w+_)?recommends$/) {
+                    next if $status->{ok};
+                    
+                    my ($preferred_version, $why, $by_what) = split("/", $spec);
+                    $by_what = join(", ", split(",", $by_what));
+                    $by_what =~ s/, (\S+)$/ and $1/;
+                    
+                    $status->{message} = (!ref($status->{have}) && $status->{have} eq '<none>'
+                                  ? "Optional prerequisite $modname is not installed"
+                                  : "$modname ($status->{have}) is installed, but we prefer to have $preferred_version");
+                    
+                    $status->{message} .= "\n   (wanted for $why, used by $by_what)";
+                    
+                    my $installed = $self->install_optional($modname, $preferred_version, $status->{message});
+                    next if $installed eq 'ok';
+                    $status->{message} = $installed unless $installed eq 'skip';
+                }
+                elsif ($type =~ /^feature_requires/) {
+                    next if $status->{ok};
+                }
+                else {
+                    next if $status->{ok};
+                    
+                    my $installed = $self->install_required($modname, $spec, $status->{message});
+                    next if $installed eq 'ok';
+                    $status->{message} = $installed;
+                }
+                
+                $out->{$type}{$modname} = $status;
+            }
+        }
+    }
+    
+    return keys %{$out} ? $out : return;
+}
+
+# install an external module using CPAN prior to testing and installation
+# should only be called by install_required or install_optional
+sub install_prereq {
+    my ($self, $desired, $version) = @_;
+    
+    if ($self->under_cpan) {
+        # Just add to the required hash, which CPAN >= 1.81 will check prior
+        # to install
+        $self->{properties}{requires}->{$desired} = $version;
+        $self->log_info("   I'll get CPAN to prepend the installation of this\n");
+        return 'ok';
+    }
+    else {
+        # Here we use CPAN to actually install the desired module, the benefit
+        # being we continue even if installation fails, and that this works
+        # even when not using CPAN to install.
+        require Cwd;
+        require CPAN;
+        
+        # Save this because CPAN will chdir all over the place.
+        my $cwd = Cwd::cwd();
+        
+        CPAN::Shell->install($desired);
+        my $msg;
+        if (CPAN::Shell->expand("Module", $desired)->uptodate) {
+            $self->log_info("\n\n*** (back in Bioperl Build.PL) ***\n * You chose to install $desired and it installed fine\n");
+            $msg = 'ok';
+        }
+        else {
+            $self->log_info("\n\n*** (back in Bioperl Build.PL) ***\n");
+            $msg = "You chose to install $desired but it failed to install";
+        }
+        
+        chdir $cwd or die "Cannot chdir() back to $cwd: $!";
+        return $msg;
+    }
+}
+
+# install required modules listed in 'requires' or 'build_requires' arg to
+# new that weren't already installed. Should only be called by prereq_failures
+sub install_required {
+    my ($self, $desired, $version, $msg) = @_;
+    
+    $self->log_info(" - ERROR: $msg\n");
+    
+    return $self->install_prereq($desired, $version);
+}
+
+# install optional modules listed in 'recommends' arg to new that weren't
+# already installed. Should only be called by prereq_failures
+sub install_optional {
+    my ($self, $desired, $version, $msg) = @_;
+    
+    unless (defined $self->{ask_optional}) {
+        $self->{ask_optional} = $self->prompt("Install [a]ll optional external modules, [n]one, or choose [i]nteractively?", 'n');
+    }
+    return 'skip' if $self->{ask_optional} =~ /^n/i;
+    
+    my $install;
+    if ($self->{ask_optional} =~ /^a/i) {
+        $self->log_info(" * $msg\n");
+        $install = 1;
+    }
+    else {
+        $install = $self->y_n(" * $msg\n   Do you want to install it? y/n", 'n');
+    }
+    
+    if ($install) {
+        return $self->install_prereq($desired, $version);
+    }
+    else {
+        $self->log_info(" * You chose not to install $desired\n");
+        return 'ok';
+    }
+}
+
+# there's no official way to discover if being run by CPAN, we take an approach
+# similar to that of Module::AutoInstall
+sub under_cpan {
+    my $self = shift;
+    
+    unless (defined $self->{under_cpan}) {
+        ## modified from Module::AutoInstall
+        
+        # load cpan config
+        require CPAN;
+        if ($CPAN::HandleConfig::VERSION) {
+            # Newer versions of CPAN have a HandleConfig module
+            CPAN::HandleConfig->load;
+        }
+        else {
+            # Older versions had the load method in Config directly
+            CPAN::Config->load;
+        }
+        
+        # Find the CPAN lock-file
+        my $lock = File::Spec->catfile($CPAN::Config->{cpan_home}, '.lock');
+        if (-f $lock) {
+            # Module::AutoInstall now goes on to open the lock file and compare
+            # its pid to ours, but we're not in a situation where we expect
+            # the pids to match, so we take the windows approach for all OSes:
+            # find out if we're in cpan_home
+            my $cwd  = File::Spec->canonpath(Cwd::cwd());
+            my $cpan = File::Spec->canonpath($CPAN::Config->{cpan_home});
+            
+            $self->{under_cpan} = index($cwd, $cpan) > -1;
+        }
+        
+        if ($self->{under_cpan}) {
+            $self->log_info("(I think I'm being run by CPAN, so will rely on CPAN to handle prerequisite installation)\n");
+        }
+        else {
+            $self->log_info("(I think you ran Build.PL directly, so will use CPAN to install prerequisites on demand)\n");
+            $self->{under_cpan} = 0;
+        }
+    }
+    
+    return $self->{under_cpan};
+}
+
+# overridden simply to not print the default answer if chosen by hitting return
+sub prompt {
+    my $self = shift;
+    my $mess = shift or die "prompt() called without a prompt message";
+    
+    my $def;
+    if ( $self->_is_unattended && !@_ ) {
+        die <<EOF;
+ERROR: This build seems to be unattended, but there is no default value
+for this question.  Aborting.
+EOF
+    }
+    $def = shift if @_;
+    ($def, my $dispdef) = defined $def ? ($def, "[$def] ") : ('', ' ');
+    
+    local $|=1;
+    print "$mess $dispdef";
+  
+    my $ans = $self->_readline();
+  
+    if ( !defined($ans)        # Ctrl-D or unattended
+         or !length($ans) ) {  # User hit return
+        #print "$def\n"; didn't like this!
+        $ans = $def;
+    }
+    
+    return $ans;
+}
+
+# like the Module::Build version, except that we always get version from
+# dist_version
+sub find_dist_packages {
+    my $self = shift;
+    
+    # Only packages in .pm files are candidates for inclusion here.
+    # Only include things in the MANIFEST, not things in developer's
+    # private stock.
+    
+    my $manifest = $self->_read_manifest('MANIFEST') or die "Can't find dist packages without a MANIFEST file - run 'manifest' action first";
+    
+    # Localize
+    my %dist_files = map { $self->localize_file_path($_) => $_ } keys %$manifest;
+    
+    my @pm_files = grep {exists $dist_files{$_}} keys %{ $self->find_pm_files };
+    
+    my $actual_version = $self->dist_version;
+    
+    # First, we enumerate all packages & versions,
+    # seperating into primary & alternative candidates
+    my( %prime, %alt );
+    foreach my $file (@pm_files) {
+        next if $dist_files{$file} =~ m{^t/};  # Skip things in t/
+        
+        my @path = split( /\//, $dist_files{$file} );
+        (my $prime_package = join( '::', @path[1..$#path] )) =~ s/\.pm$//;
+        
+        my $pm_info = Module::Build::ModuleInfo->new_from_file( $file );
+        
+        foreach my $package ( $pm_info->packages_inside ) {
+            next if $package eq 'main';  # main can appear numerous times, ignore
+            next if grep /^_/, split( /::/, $package ); # private package, ignore
+            
+            my $version = $pm_info->version( $package );
+            if ($version && $version != $actual_version) {
+                $self->log_warn("Package $package had version $version!\n");
+            }
+            $version = $actual_version;
+            
+            if ( $package eq $prime_package ) {
+                if ( exists( $prime{$package} ) ) {
+                    # M::B::ModuleInfo will handle this conflict
+                    die "Unexpected conflict in '$package'; multiple versions found.\n";
+                }
+                else {
+                    $prime{$package}{file} = $dist_files{$file};
+                    $prime{$package}{version} = $version if defined( $version );
+                }
+            }
+            else {
+                push( @{$alt{$package}}, { file => $dist_files{$file}, version => $version } );
+            }
+        }
+    }
+    
+    # Then we iterate over all the packages found above, identifying conflicts
+    # and selecting the "best" candidate for recording the file & version
+    # for each package.
+    foreach my $package ( keys( %alt ) ) {
+        my $result = $self->_resolve_module_versions( $alt{$package} );
+        
+        if ( exists( $prime{$package} ) ) { # primary package selected
+            if ( $result->{err} ) {
+                # Use the selected primary package, but there are conflicting
+                 # errors amoung multiple alternative packages that need to be
+                 # reported
+                 $self->log_warn("Found conflicting versions for package '$package'\n" .
+                                 "  $prime{$package}{file} ($prime{$package}{version})\n" . $result->{err});
+            }
+            elsif ( defined( $result->{version} ) ) {
+                # There is a primary package selected, and exactly one
+                # alternative package
+                
+                if ( exists( $prime{$package}{version} ) && defined( $prime{$package}{version} ) ) {
+                    # Unless the version of the primary package agrees with the
+                    # version of the alternative package, report a conflict
+                    if ( $self->compare_versions( $prime{$package}{version}, '!=', $result->{version} ) ) {
+                        $self->log_warn("Found conflicting versions for package '$package'\n" .
+                                        "  $prime{$package}{file} ($prime{$package}{version})\n" .
+                                        "  $result->{file} ($result->{version})\n");
+                    }
+                }
+                else {
+                  # The prime package selected has no version so, we choose to
+                  # use any alternative package that does have a version
+                  $prime{$package}{file}    = $result->{file};
+                  $prime{$package}{version} = $result->{version};
+                }
+            }
+            else {
+                # no alt package found with a version, but we have a prime
+                # package so we use it whether it has a version or not
+            }
+        }
+        else { # No primary package was selected, use the best alternative
+            if ( $result->{err} ) {
+                $self->log_warn("Found conflicting versions for package '$package'\n" . $result->{err});
+            }
+            
+            # Despite possible conflicting versions, we choose to record
+            # something rather than nothing
+            $prime{$package}{file}    = $result->{file};
+            $prime{$package}{version} = $result->{version} if defined( $result->{version} );
+        }
+    }
+  
+    # Stringify versions
+    for (grep exists $_->{version}, values %prime) {
+        $_->{version} = $_->{version}->stringify if ref($_->{version});
+    }
+  
+    return \%prime;
+}
+
+# our recommends syntax contains extra info that needs to be ignored at this
+# stage
+sub _parse_conditions {
+    my ($self, $spec) = @_;
+    
+    ($spec) = split("/", $spec);
+    
+    if ($spec =~ /^\s*([\w.]+)\s*$/) { # A plain number, maybe with dots, letters, and underscores
+        return (">= $spec");
+    }
+    else {
+        return split /\s*,\s*/, $spec;
+    }
+}
+
+# when generating META.yml, we output optional_features syntax (instead of
+# recommends syntax). Note that as of CPAN v1.8802 nothing useful is done
+# with this information, which is why we implement our own request to install
+# the optional modules in install_optional()
+sub prepare_metadata {
+    my ($self, $node, $keys) = @_;
+    my $p = $self->{properties};
+    
+    # A little helper sub
+    my $add_node = sub {
+        my ($name, $val) = @_;
+        $node->{$name} = $val;
+        push @$keys, $name if $keys;
+    };
+    
+    foreach (qw(dist_name dist_version dist_author dist_abstract license)) {
+        (my $name = $_) =~ s/^dist_//;
+        $add_node->($name, $self->$_());
+        die "ERROR: Missing required field '$_' for META.yml\n" unless defined($node->{$name}) && length($node->{$name});
+    }
+    $node->{version} = '' . $node->{version}; # Stringify version objects
+    
+    if (defined( $self->license ) && defined( my $url = $self->valid_licenses->{ $self->license } )) {
+        $node->{resources}{license} = $url;
+    }
+    
+    foreach ( @{$self->prereq_action_types} ) {
+        if (exists $p->{$_} and keys %{ $p->{$_} }) {
+            if ($_ eq 'recommends') {
+                my $hash;
+                while (my ($req, $val) = each %{ $p->{$_} }) {
+                    my ($ver, $why, $used_by) = split("/", $val);
+                    my $info = {};
+                    $info->{description} = $why;
+                    $info->{requires} = { $req => $ver };
+                    $hash->{$used_by} = $info;
+                }
+                $add_node->('optional_features', $hash);
+            }
+            else {
+                $add_node->($_, $p->{$_});
+            }
+        }
+    }
+    
+    if (exists $p->{dynamic_config}) {
+        $add_node->('dynamic_config', $p->{dynamic_config});
+    }
+    my $pkgs = eval { $self->find_dist_packages };
+    if ($@) {
+        $self->log_warn("$@\nWARNING: Possible missing or corrupt 'MANIFEST' file.\n" . "Nothing to enter for 'provides' field in META.yml\n");
+    }
+    else {
+        $node->{provides} = $pkgs if %$pkgs;
+    };
+    
+    if (exists $p->{no_index}) {
+        $add_node->('no_index', $p->{no_index});
+    }
+    
+    $add_node->('generated_by', "Module::Build version $Module::Build::VERSION");
+    
+    $add_node->('meta-spec', 
+            {version => '1.2',
+             url     => 'http://module-build.sourceforge.net/META-spec-v1.2.html',
+            });
+    
+    while (my($k, $v) = each %{$self->meta_add}) {
+        $add_node->($k, $v);
+    }
+    
+    while (my($k, $v) = each %{$self->meta_merge}) {
+        $self->_hash_merge($node, $k, $v);
+    }
+    
+    return $node;
+}
+
+# let us store extra things persistently in _build
+sub _construct {
+    my $self = shift;
+    $self = $self->SUPER::_construct(@_);
+    
+    my ($p, $ph) = ($self->{properties}, $self->{phash});
+    
+    foreach (qw(manifest_skip post_install_scripts)) {
+        my $file = File::Spec->catfile($self->config_dir, $_);
+        $ph->{$_} = Module::Build::Notes->new(file => $file);
+        $ph->{$_}->restore if -e $file;
+    }
+    
+    return $self;
+}
+sub write_config {
+    my $self = shift;
+    $self->SUPER::write_config;
+    
+    # write extra things
+    $self->{phash}{$_}->write() foreach qw(manifest_skip post_install_scripts);
+    
+    # be even more certain we can reload ourselves during a resume by copying
+    # ourselves to _build\lib
+    my $filename = File::Spec->catfile($self->{properties}{config_dir}, 'lib', 'ModuleBuildBioperl.pm');
+    my $filedir  = File::Basename::dirname($filename);
+    
+    File::Path::mkpath($filedir);
+    warn "Can't create directory $filedir: $!" unless -d $filedir;
+    
+    File::Copy::copy('ModuleBuildBioperl.pm', $filename);
+    warn "Unable to copy 'ModuleBuildBioperl.pm' to '$filename'\n" unless -e $filename;
+}
+
+# add a file to the default MANIFEST.SKIP
+sub add_to_manifest_skip {
+    my $self = shift;
+    my %files = map {$self->localize_file_path($_), 1} @_;
+    $self->{phash}{manifest_skip}->write(\%files);
+}
+
+# we always generate a new MANIFEST and MANIFEST.SKIP here, instead of allowing
+# existing files to remain
+sub ACTION_manifest {
+    my ($self) = @_;
+    
+    my $maniskip = 'MANIFEST.SKIP';
+    if ( -e 'MANIFEST' || -e $maniskip ) {
+        $self->log_warn("MANIFEST files already exist, will overwrite them\n");
+        unlink('MANIFEST');
+        unlink($maniskip);
+    }
+    $self->_write_default_maniskip($maniskip);
+    
+    require ExtUtils::Manifest;  # ExtUtils::Manifest is not warnings clean.
+    local ($^W, $ExtUtils::Manifest::Quiet) = (0,1);
+    ExtUtils::Manifest::mkmanifest();
+}
+
+# extended to add extra things to the default MANIFEST.SKIP
+sub _write_default_maniskip {
+    my $self = shift;
+    $self->SUPER::_write_default_maniskip;
+    
+    my @extra = keys %{$self->{phash}{manifest_skip}->read};
+    if (@extra) {
+        open(my $fh, '>>', 'MANIFEST.SKIP') or die "Could not open MANIFEST.SKIP file\n";
+        print $fh "\n# Avoid additional run-time generated things\n";
+        foreach my $line (@extra) {
+            print $fh $line, "\n";
+        }
+        close($fh);
+    }
+}
+
+# extended to run scripts post-installation
+sub ACTION_install {
+  my ($self) = @_;
+  require ExtUtils::Install;
+  $self->depends_on('build');
+  ExtUtils::Install::install($self->install_map, !$self->quiet, 0, $self->{args}{uninst}||0);
+  $self->run_post_install_scripts;
+}
+sub add_post_install_script {
+    my $self = shift;
+    my %files = map {$self->localize_file_path($_), 1} @_;
+    $self->{phash}{post_install_scripts}->write(\%files);
+}
+sub run_post_install_scripts {
+    my $self = shift;
+    my @scripts = keys %{$self->{phash}{post_install_scripts}->read};
+    foreach my $script (@scripts) {
+        $self->run_perl_script($script);
+    }
+}
+
+# for use with auto_features, which should require LWP::UserAgent as one of
+# its reqs
+sub test_internet {
+    eval {require LWP::UserAgent;};
+    if ($@) {
+        # ideally this won't happen because auto_feature already specified
+        # LWP::UserAgent, so this sub wouldn't get called if LWP not installed
+        return "LWP::UserAgent not installed";
+    }
+    my $ua = LWP::UserAgent->new;
+    $ua->timeout(10);
+    $ua->env_proxy;
+    my $response = $ua->get('http://search.cpan.org/');
+    unless ($response->is_success) {
+        return "Could not connect to the internet (http://search.cpan.org/)";
+    }
+    return;
+}
+
+# nice directory names for dist-related actions
+sub dist_dir {
+    my ($self) = @_;
+    my $version = $self->dist_version;
+    if ($version =~ /^\d\.\d{6}\d$/) {
+        # 1.x.x.100 returned as 1.x.x.1
+        $version .= '00';
+    }
+    $version =~ s/00(\d)/$1./g;
+    $version =~ s/\.$//;
+    
+    if (my ($minor, $rev) = $version =~ /^\d\.(\d)\.\d\.(\d+)$/) {
+        my $dev = ! ($minor % 2 == 0);
+        if ($rev == 100) {
+            my $replace = $dev ? "_$rev" : '';
+            $version =~ s/\.\d+$/$replace/;
+        }
+        elsif ($rev < 100) {
+            $rev = sprintf("%03d", $rev);
+            $version =~ s/\.\d+$/_$rev-RC/;
+        }
+        else {
+            $rev -= 100 unless $dev;
+            my $replace = $dev ? "_$rev" : ".$rev";
+            $version =~ s/\.\d+$/$replace/;
+        }
+    }
+    
+    return "$self->{properties}{dist_name}-$version";
+}
+sub ppm_name {
+    my $self = shift;
+    return $self->dist_dir.'-ppm';
+}
+
+# generate complete ppd4 version file
+sub ACTION_ppd {
+    my $self = shift;
+    
+    my $file = $self->make_ppd(%{$self->{args}});
+    $self->add_to_cleanup($file);
+    $self->add_to_manifest_skip($file);
+}
+
+# add pod2htm temp files to MANIFEST.SKIP, generated during ppmdist most likely
+sub htmlify_pods {
+    my $self = shift;
+    $self->SUPER::htmlify_pods(@_);
+    $self->add_to_manifest_skip('pod2htm*');
+}
+
+# don't copy across man3 docs since they're of little use under Windows and
+# have bad filenames
+sub ACTION_ppmdist {
+    my $self = shift;
+    my @types = $self->install_types(1);
+    $self->SUPER::ACTION_ppmdist(@_);
+    $self->install_types(0);
+}
+
+# when supplied a true value, pretends libdoc doesn't exist (preventing man3
+# installation for ppmdist). when supplied false, they exist again
+sub install_types {
+    my ($self, $no_libdoc) = @_;
+    $self->{no_libdoc} = $no_libdoc if defined $no_libdoc;
+    my @types = $self->SUPER::install_types;
+    if ($self->{no_libdoc}) {
+        my @altered_types;
+        foreach my $type (@types) {
+            push(@altered_types, $type) unless $type eq 'libdoc';
+        }
+        return @altered_types;
+    }
+    return @types;
+}
+
+# overridden from Module::Build::PPMMaker for ppd4 compatability
+sub make_ppd {
+    my ($self, %args) = @_;
+    
+    require Module::Build::PPMMaker;
+    my $mbp = Module::Build::PPMMaker->new();
+    
+    my %dist;
+    foreach my $info (qw(name author abstract version)) {
+        my $method = "dist_$info";
+        $dist{$info} = $self->$method() or die "Can't determine distribution's $info\n";
+    }
+    $dist{codebase} = $self->ppm_name.'.tar.gz';
+    $mbp->_simple_xml_escape($_) foreach $dist{abstract}, $dist{codebase}, @{$dist{author}};
+    
+    my (undef, undef, undef, $mday, $mon, $year) = localtime();
+    $year += 1900;
+    $mon++;
+    my $date = "$year-$mon-$mday";
+    
+    my $softpkg_version = $self->dist_dir;
+    $softpkg_version =~ s/^$dist{name}-//;
+    
+    # to avoid a ppm bug, instead of including the requires in the softpackage
+    # for the distribution we're making, we'll make a seperate Bundle::
+    # softpackage that contains all the requires, and require only the Bundle in
+    # the real softpackage
+    my ($bundle_name) = $dist{name} =~ /^.+-(.+)/;
+    $bundle_name ||= 'core';
+    $bundle_name =~ s/^(\w)/\U$1/;
+    my $bundle_dir = "Bundle-BioPerl-$bundle_name-$softpkg_version-ppm";
+    my $bundle_file = "$bundle_dir.tar.gz";
+    my $bundle_softpkg_name = "Bundle-BioPerl-$bundle_name";
+    $bundle_name = "Bundle::BioPerl::$bundle_name";
+    
+    # header
+    my $ppd = <<"PPD";
+    <SOFTPKG NAME=\"$dist{name}\" VERSION=\"$softpkg_version\" DATE=\"$date\">
+        <TITLE>$dist{name}</TITLE>
+        <ABSTRACT>$dist{abstract}</ABSTRACT>
+@{[ join "\n", map "        <AUTHOR>$_</AUTHOR>", @{$dist{author}} ]}
+        <PROVIDE NAME=\"$dist{name}::\" VERSION=\"$dist{version}\"/>
+PPD
+    
+    # provide section
+    foreach my $pm (@{$self->rscan_dir('Bio', qr/\.pm$/)}) {
+        # convert these filepaths to Module names
+        $pm =~ s/\//::/g;
+        $pm =~ s/\.pm//;
+        
+        $ppd .= sprintf(<<'EOF', $pm, $dist{version});
+        <PROVIDE NAME="%s" VERSION="%s"/>
+EOF
+    }
+    
+    # rest of softpkg
+    $ppd .= <<"PPD";
+        <IMPLEMENTATION>
+            <ARCHITECTURE NAME=\"MSWin32-x86-multi-thread-5.8\"/>
+            <CODEBASE HREF=\"$dist{codebase}\"/>
+            <REQUIRE NAME=\"$bundle_name\" VERSION=\"$dist{version}\"/>
+        </IMPLEMENTATION>
+    </SOFTPKG>
+PPD
+    
+    # now a new softpkg for the bundle
+    $ppd .= <<"PPD";
+    
+    <SOFTPKG NAME=\"$bundle_softpkg_name\" VERSION=\"$softpkg_version\" DATE=\"$date\">
+        <TITLE>$bundle_name</TITLE>
+        <ABSTRACT>Bundle of pre-requisites for $dist{name}</ABSTRACT>
+@{[ join "\n", map "        <AUTHOR>$_</AUTHOR>", @{$dist{author}} ]}
+        <PROVIDE NAME=\"$bundle_name\" VERSION=\"$dist{version}\"/>
+        <IMPLEMENTATION>
+            <ARCHITECTURE NAME=\"MSWin32-x86-multi-thread-5.8\"/>
+            <CODEBASE HREF=\"$bundle_file\"/>
+PPD
+    
+    # required section
+    # we do both requires and recommends to make installation on Windows as
+    # easy (mindless) as possible
+    for my $type ('requires', 'recommends') {
+        my $prereq = $self->$type;
+        while (my ($modname, $version) = each %$prereq) {
+            next if $modname eq 'perl';
+            ($version) = split("/", $version) if $version =~ /\//;
+            
+            # Module names must have at least one ::
+            unless ($modname =~ /::/) {
+                $modname .= '::';
+            }
+            
+            # Bio::Root::Version number comes out as triplet number like 1.5.2;
+            # convert to our own version
+            if ($modname eq 'Bio::Root::Version') {
+                $version = $dist{version};
+            }
+            
+            $ppd .= sprintf(<<'EOF', $modname, $version || '');
+            <REQUIRE NAME="%s" VERSION="%s"/>
+EOF
+        }
+    }
+    
+    # footer
+    $ppd .= <<'EOF';
+        </IMPLEMENTATION>
+    </SOFTPKG>
+EOF
+    
+    my $ppd_file = "$dist{name}.ppd";
+    my $fh = IO::File->new(">$ppd_file") or die "Cannot write to $ppd_file: $!";
+    print $fh $ppd;
+    close $fh;
+    
+    $self->delete_filetree($bundle_dir);
+    mkdir($bundle_dir) or die "Cannot create '$bundle_dir': $!";
+    $self->make_tarball($bundle_dir);
+    $self->delete_filetree($bundle_dir);
+    $self->add_to_cleanup($bundle_file);
+    $self->add_to_manifest_skip($bundle_file);
+    
+    return $ppd_file;
+}
+
+# we make all archive formats we want, not just .tar.gz
+# we also auto-run manifest action, since we always want to re-create
+# MANIFEST and MANIFEST.SKIP just-in-time
+sub ACTION_dist {
+    my ($self) = @_;
+    
+    $self->depends_on('manifest');
+    $self->depends_on('distdir');
+    
+    my $dist_dir = $self->dist_dir;
+    
+    $self->make_zip($dist_dir);
+    $self->make_tarball($dist_dir);
+    $self->delete_filetree($dist_dir);
+}
+
+# makes zip file for windows users and bzip2 files as well
+sub make_zip {
+    my ($self, $dir, $file) = @_;
+    $file ||= $dir;
+    
+    $self->log_info("Creating $file.zip\n");
+    my $zip_flags = $self->verbose ? '-r' : '-rq';
+    $self->do_system($self->split_like_shell("zip"), $zip_flags, "$file.zip", $dir);
+    
+    $self->log_info("Creating $file.bz2\n");
+    require Archive::Tar;
+    # Archive::Tar versions >= 1.09 use the following to enable a compatibility
+    # hack so that the resulting archive is compatible with older clients.
+    $Archive::Tar::DO_NOT_USE_PREFIX = 0;
+    my $files = $self->rscan_dir($dir);
+    Archive::Tar->create_archive("$file.tar", 0, @$files);
+    $self->do_system($self->split_like_shell("bzip2"), "-k", "$file.tar");
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/PLATFORMS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/PLATFORMS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/PLATFORMS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+# $Id: PLATFORMS,v 1.25.4.2 2006/11/23 12:36:14 sendu Exp $
+
+Perl general comments:
+
+	o Perl must be 5.6.1 or higher. We tend to test on > 5.8.
+
+	o Index.t will fail if you have an out-of-date DBM file
+	  installation or a bad DB_File installation
+
+
+Tested systems & OS Specific Comments or Warnings
+==================================================
+
+Machine : Debian Linux 2.6.8-2-686-sm
+Perl    : 5.8.7
+Comments: none
+
+Machine : Gentoo Linux 2.6.16-r9 x86_64
+Perl    : 5.8.8
+Comments: none
+
+Machine : FreeBSD 6.2-PRERELEASE i386 and FreeBSD 5.5-STABLE i386
+Perl    : 5.8.8
+Comments: none
+
+Machine : Win32, WinNT i386, Windows XP
+Perl    : ActiveState Perl 5.8.8.819
+Comments: Only ActiveState Perl >= 5.8 is known to work well, unlike other
+          platforms that can use perl 5.6.1.
+          Be sure that the module DB_File is installed and up-to-date 
+          to allow Bio::Index modules to work properly. 
+          Installing ppm's IO-stringy and IO-String and File-Temp are 
+	      necessary as well.
+          
+          See INSTALL.WIN for more information
+
+Machine : MacOS
+Perl    : MacPerl
+Comments: We don't recommend using Bioperl on MacOS 9 systems
+
+Machine : MacOS X 10.4.7 (Intel) and 10.4.8
+Perl    : 5.8.6
+Comments: Steve Cannon has made available Bioperl OS X installation
+          directions and notes online at the following URL:
+	      http://www.tc.umn.edu/~cann0010/Bioperl_OSX_install.html
+          Also see the Unix installation instructions at:
+          http://www.bioperl.org/wiki/Installing_Bioperl_for_Unix
+          Or install using CPAN.
+
+Machine : CentOS
+Perl    : n/a
+Comments: Module::Build, required for installation using Build.PL, may
+          have difficulty installing. You can force install it with
+          CPAN.

Added: trunk/packages/bioperl/branches/upstream/current/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,254 @@
+# $Id: README,v 1.45.4.2 2006/11/17 09:32:43 sendu Exp $
+
+This is the README file for the Bioperl central distribution.
+
+o Version
+
+ This is Bioperl version 1.5.2
+
+o Getting Started
+ 
+ Thanks for downloading this distribution!
+
+ Please see the the INSTALL or INSTALL.WIN documents for installation 
+ instructions.
+
+ For tutorials see the Bioperl Tutorial.pl (http://www.bioperl.org/wiki/Bptutorial.pl)
+ or the HOWTO documents and tutorials online at http://bioperl.org. 
+ To look at example code browse the scripts/ and examples/ directories
+
+ For people starting out with Perl and Bioperl, look at the Bio::Perl
+ module (go "perldoc Bio::Perl" from within this directory). This 
+ module is designed to flatten the learning curve for newcomers.
+
+ For a list of OS's and versions that are known to support Bioperl see the
+ PLATFORMS file.
+ 
+ For info on Bioperl read on!
+
+o About Bioperl
+
+ Bioperl is a package of public domain Perl tools for computational 
+ molecular biology.
+
+ Our website, http://bioperl.org, provides an online resource of
+ modules, scripts, and web links for developers of Perl-based software
+ for life science research.
+
+o Contact info
+
+ Bioperl developers: bioperl-l at bioperl.org
+
+ There's quite a variety of tools available in Bioperl, and more are
+ added all the time. If the tool you're looking for isn't described in
+ the documentation please write us, it could be undocumented or in process.
+
+ Project website     : http://bioperl.org
+ Project FTP server  : bioperl.org (anonymous FTP ok)
+
+ Bug reports         : http://bugzilla.open-bio.org/
+
+ Please send us bugs, in particular about documentation which you
+ think is unclear or problems in installation. We are also very
+ interested in functions which don't work the way you think they do!
+
+ Please see the AUTHORS file for the complete list of bioperl
+ developers and contributors.
+
+o About the directory structure
+
+ The bioperl directory structure is organized as follows:
+
+ Bio/            - Bioperl modules  
+ models/         - DIA drawing program generated OO UML for bioperl classes
+ t/              - Perl built-in tests
+ t/data/         - Data files used for the tests - provides good data
+                   examples for those new to bioinformatics data.	     
+ scripts/        - Useful production-quality scripts with POD documentation 
+ examples/       - Scripts demonstrating the many uses of Bioperl
+ maintenance/    - Bioperl housekeeping scripts
+
+o Documentation
+
+ The Bioperl Tutorial (http://www.bioperl.org/wiki/Bptutorial.pl) 
+ contains useful information for new and existing Bioperl users.
+ This file also contains a number of useful scripts that the 
+ student of Bioperl may want to examine.
+
+ Individual *.pm modules have their own embedded POD documentation
+ as well. A complete set of hyperlinked POD, or module, documentation 
+ is available at http://www.bioperl.org.
+
+ Remember that 'perldoc' is your friend. You can use it to read any
+ file containing POD formatted documentation without needing any type
+ of translator.
+ 
+ If you used the Build.PL installation, and depending on your platform,
+ you may have documentation installed as man pages, which can be
+ accessed in the usual way.
+
+ There is also an online course written at the Pasteur Institute. See
+ http://www.pasteur.fr/recherche/unites/sis/formation/bioperl.
+
+ Useful documentation in the form of example code can also be found
+ in the examples/ and scripts/ directories. The current collection 
+ includes scripts that run BLAST, index flat files, parse PDB 
+ structure files, make primers, retrieve ESTs based on tissue, align 
+ protein to nucleotide sequence, run GENSCAN on multiple sequences, 
+ and much more! See bioscripts.pod for a complete listing.
+
+o Releases
+  
+ Bioperl releases are always available from the website
+ http://www.bioperl.org or by FTP from ftp://bioperl.org (note that
+ we've had trouble with our new network setup which is not allowing
+ FTP to support passive mode properly, use http://www.bioperl.org/DIST
+ to get a listing of the distribution directory).  Each release is
+ tested with the test suite and cross-tested on a number of different
+ platforms.  See the PLATFORMS file for more information on a specific
+ platform.  All efforts are made to release a bug-free package,
+ however most major bugs in a release will be documented in the BUGS
+ file. See the Changes file for a listing of what features have been
+ added or what APIs have changed between releases.
+
+ Bioperl now has a consistent numbering scheme to indicate stable 
+ release series vs. development release series. A release number 
+ is a three digit number like 1.2.0 - the first digit
+ indicates the major release - the idea being that all the API calls in a
+ major release are reasonably consistent. The second number is the
+ release series. This is probably the most important number. Even
+ numbers here (1.0, 1.2 etc) indicate stable releases. Stable releases
+ are well tested and recommended for most uses. Odd numbers (1.1, 1.3
+ etc) are development releases which one should only use if you are
+ interested in the latest and greatest features. The final number (e.g.
+ 1.2.0, 1.2.1) is the bug fix release. The higher the number the
+ more bug fixes has been incorporated. In theory you can upgrade from one
+ bug fix release to the next with no changes to your own code (for production
+ cases, obviously check things out carefully before you switch over).
+
+o Caveats, warnings, etc
+
+ When you run the tests ("make test") some tests may issue
+ warnings messages or even fail.  Sometimes this is because we didn't
+ have anyone to test the test system on the combination of your operating
+ system, version of perl, and associated libraries and other modules.
+ Because Bioperl depends on several outside libraries we may not be
+ able to test every single combination so if there are warnings you 
+ may find that the package is still perfectly useful.  See the 
+ PLATFORMS file for reports of specific issues.
+ 
+ If you install the bioperl-run system and run tests when you don't
+ have the program installed you'll get messages like 'program XXX not
+ found, skipping tests'.  That's okay, Bioperl is doing what it is
+ supposed to do.  If you wanted to run the program you'd need to
+ install it first.
+ 
+ Not all scripts in the examples/ directory are correct and up-to-date.
+ We need volunteers to help maintain these so if you find they do not
+ work, submit a bug report to http://bugzilla.open-bio.org and consider
+ helping out in their maintenance.
+
+ If you are confused about what modules are appropriate when you try
+ and solve a particular issue in bioinformatics we urge you to look at
+ the Bioperl Tutorial or the HOWTO documents first. 
+
+o A simple module summary
+
+ Here is a quick summary of many of the useful modules and how the
+ toolkit is laid out:
+
+ All modules are in the Bio/ namespace, 
+ - Perl is for newbies and gives a functional interface to the main
+   parts of the package
+ - Seq is for Sequences (protein and DNA).
+   o Bio::PrimarySeq is a plain sequence (sequence data + identifiers)
+   o Bio::Seq is a PrimarySeq plus it has a Bio::Annotation::Collection 
+     and a Bio::SeqFeatureI objects attached.
+   o Bio::Seq::RichSeq is all of the above plus it has slots for
+     extra information specific to GenBank/EMBL/SwissProt files.
+   o Bio::Seq::LargeSeq is for sequences which are too big for
+     fitting into memory.
+ - SeqIO is for reading and writing Sequences, it is a front end
+   module for separate driver modules supporting the different
+   sequence formats
+ - SeqFeature - start/stop/strand annotations of sequences
+   o Bio::SeqFeature::Generic is basic catchall
+   o Bio::SeqFeature::Similarity a similarity sequence feature
+   o Bio::SeqFeature::FeaturePair a sequence feature which is pairwise
+     such as query/hit pairs
+ - SearchIO is for reading and writing pairwise alignment reports
+   like BLAST or FASTA
+ - Search is where the alignment objects are defined
+   o Bio::Search::Result::GenericResult is the result object (a blast query
+     is a Result object)
+   o Bio::Search::Hit::GenericHit is the Hit object (a query will have
+     0-> many hits in a database)
+   o Bio::Search::HSP::GenericHSP is the High-scoring Segment Pair
+     object defining the alignment(s) of the query and hit.  
+ - SimpleAlign is for multiple sequence alignments
+ - AlignIO is for reading and writing multiple sequence alignment
+   formats
+ - Assembly provides the start of an infrastructure for assemblies
+   and Assembly::IO IO converters for them
+ - DB is the namespace for all the database query objects
+   o Bio::DB::GenBank/GenPept are two modules which query NCBI entrez
+     for sequences
+   o Bio::DB::SwissProt/EMBL query various EMBL and SwissProt
+     repositories for a sequences
+   o Bio::DB::GFF is Lincoln Stein's fast, lightweight feature and
+     sequence database which is the backend to his GBrowse system 
+     (see www.gmod.org)
+   o Bio::DB::Flat is a fast implementation of the OBDA flat-file
+     indexing system (cross-language and cross-platform supported by
+     O|B|F projects see http://obda.open-bio.org).
+   o Bio::DB::BioFetch/DBFetch for OBDA, Web (HTTP) access to remote
+     databases.
+   o Bio::DB::InMemoryCache/FileCache (fast local caching of sequences
+     from remote dbs to speed up your access). 
+   o Bio::DB::Registry interface to the OBDA specification for remote 
+     data sources
+   o Bio::DB::XEMBL SOAP access to sequence databases
+   o Bio::DB::Biblio for access to remote bibliographic databases.
+ - Annotation collection of annotation objects (comments,
+   DBlinks, References, and misc key/value pairs)
+ - Coordinate is a system for mapping between different coordinate
+   systems such as DNA to protein or between assemblies.
+ - Index is for locally indexed flatfiles with BerkeleyDB
+ - Tools contains many miscellaneous parsers and function for different
+   bioinformatics needs
+   o Gene prediction parser (Genscan, MZEF, Grail, Genemark)
+   o Annotation format (GFF)
+   o simulate restriction enzyme cutting with RestrictionEnzyme
+   o Enumerate codon tables and valid sequences symbols (CodonTable, IUPAC)
+   o Phylogenetic program parsing (PAML, Molphy, Phylip)
+ - Map genetic and physical map representations
+ - Graphics render a sequence with its features or a sequence analysis
+   result.
+ - Structure - parse and represent protein structure data
+ - TreeIO is for reading and writing Tree formats
+ - Tree is the namespace for all the associated Tree objects
+   o Bio::Tree::Tree is the basic tree object
+   o Bio::Tree::Node are the nodes which make up the tree
+   o Bio::Tree::Statistics is for computing statistics for a tree
+   o Bio::Tree::TreeFunctionsI is where specific tree functions are
+     implemented (like is_monophyletic and lca)
+ - Bio::Biblio is where bibliographic data and database access objects
+   are kept
+ - Variation represent sequences with mutations and variations applied
+   so one can compare and represent wild-type and mutation versions of
+   a sequence.
+ - Root, basic objects for the internals of Bioperl
+
+o Upgrading from an older version
+ 
+ If you have a previously installed version of bioperl on your system
+ some of these notes may help you.  
+
+ Some modules have been removed because they have been superceded by
+ new development efforts.  They are documented in the DEPRECATED file
+ that is included in the release.  In addition some methods, or the
+ Application Programming Interface (API), have changed or been
+ removed.  You may find that scripts which worked with bioperl 1.4
+ may give you warnings or may not work at all (although we have tried
+ very hard to minimize this!).  Send an email to the list and we'll be
+ happy to give you pointers.

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Build.PL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Build.PL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Build.PL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,21 @@
+use strict;
+use warnings;
+use Module::Build;
+
+my $builder = Module::Build->new(
+    module_name         => 'Deobfuscator',
+    license             => 'perl',
+    dist_author         => 'Dave Messina <dave-pause at davemessina.net>',
+    dist_version_from   => 'lib/Deobfuscator.pm',
+    script_files        => ['bin/deob_index.pl',],
+    requires => {
+        'Test::More'       => 0,
+        'version'          => 0,
+		'Class::Inspector' => 0,
+		'DB_File'          => 0,
+        'CGI'              => 0,
+    },
+    add_to_cleanup      => [ 'Deobfuscator-*' ],
+);
+
+$builder->create_build_script();

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Changes
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Changes	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Changes	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+Revision history for Deobfuscator
+
+0.0.1  Fri Apr  7 14:34:50 2006
+       Initial release.
+
+0.0.2  Mon Apr 24 13:27:52 CDT 2006
+       NEW: Laura's cool flowchart added, which shows how the Deobfuscator
+            works. Updated README and deob_help.html to reflect that.
+       FIX: Cryptic "Can't close MODS file" error message rewritten.
+       NEW: Indulged my POD formatting obsession. Again. (Lots of minor
+            updates to the POD)
+       NEW: Added Feedback section to deob_help.html.
+       NEW: Added DOCUMENTATION section to README.
+       FIX: "About the mailing lists" URL changed throughout.
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/LICENSE
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/LICENSE	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/LICENSE	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,383 @@
+Terms of Perl itself
+
+a) the GNU General Public License as published by the Free
+   Software Foundation; either version 1, or (at your option) any
+   later version, or
+b) the "Artistic License"
+
+---------------------------------------------------------------------------
+
+The General Public License (GPL)
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc. 675 Mass Ave,
+Cambridge, MA 02139, USA. Everyone is permitted to copy and distribute
+verbatim copies of this license document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to share
+and change it. By contrast, the GNU General Public License is intended to
+guarantee your freedom to share and change free software--to make sure the
+software is free for all its users. This General Public License applies to most of
+the Free Software Foundation's software and to any other program whose
+authors commit to using it. (Some other Free Software Foundation software is
+covered by the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom
+to distribute copies of free software (and charge for this service if you wish), that
+you receive source code or can get it if you want it, that you can change the
+software or use pieces of it in new free programs; and that you know you can do
+these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to deny
+you these rights or to ask you to surrender the rights. These restrictions
+translate to certain responsibilities for you if you distribute copies of the
+software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or for a
+fee, you must give the recipients all the rights that you have. You must make
+sure that they, too, receive or can get the source code. And you must show
+them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer
+you this license which gives you legal permission to copy, distribute and/or
+modify the software.
+
+Also, for each author's protection and ours, we want to make certain that
+everyone understands that there is no warranty for this free software. If the
+software is modified by someone else and passed on, we want its recipients to
+know that what they have is not the original, so that any problems introduced by
+others will not reflect on the original authors' reputations.
+
+Finally, any free program is threatened constantly by software patents. We wish
+to avoid the danger that redistributors of a free program will individually obtain
+patent licenses, in effect making the program proprietary. To prevent this, we
+have made it clear that any patent must be licensed for everyone's free use or
+not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+GNU GENERAL PUBLIC LICENSE
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND
+MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+placed by the copyright holder saying it may be distributed under the terms of
+this General Public License. The "Program", below, refers to any such program
+or work, and a "work based on the Program" means either the Program or any
+derivative work under copyright law: that is to say, a work containing the
+Program or a portion of it, either verbatim or with modifications and/or translated
+into another language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not covered by
+this License; they are outside its scope. The act of running the Program is not
+restricted, and the output from the Program is covered only if its contents
+constitute a work based on the Program (independent of having been made by
+running the Program). Whether that is true depends on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code as
+you receive it, in any medium, provided that you conspicuously and appropriately
+publish on each copy an appropriate copyright notice and disclaimer of warranty;
+keep intact all the notices that refer to this License and to the absence of any
+warranty; and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and you may at
+your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it, thus
+forming a work based on the Program, and copy and distribute such
+modifications or work under the terms of Section 1 above, provided that you also
+meet all of these conditions:
+
+a) You must cause the modified files to carry prominent notices stating that you
+changed the files and the date of any change.
+
+b) You must cause any work that you distribute or publish, that in whole or in
+part contains or is derived from the Program or any part thereof, to be licensed
+as a whole at no charge to all third parties under the terms of this License.
+
+c) If the modified program normally reads commands interactively when run, you
+must cause it, when started running for such interactive use in the most ordinary
+way, to print or display an announcement including an appropriate copyright
+notice and a notice that there is no warranty (or else, saying that you provide a
+warranty) and that users may redistribute the program under these conditions,
+and telling the user how to view a copy of this License. (Exception: if the
+Program itself is interactive but does not normally print such an announcement,
+your work based on the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If identifiable
+sections of that work are not derived from the Program, and can be reasonably
+considered independent and separate works in themselves, then this License,
+and its terms, do not apply to those sections when you distribute them as
+separate works. But when you distribute the same sections as part of a whole
+which is a work based on the Program, the distribution of the whole must be on
+the terms of this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest your rights to
+work written entirely by you; rather, the intent is to exercise the right to control
+the distribution of derivative or collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program with the
+Program (or with a work based on the Program) on a volume of a storage or
+distribution medium does not bring the other work under the scope of this
+License.
+
+3. You may copy and distribute the Program (or a work based on it, under
+Section 2) in object code or executable form under the terms of Sections 1 and 2
+above provided that you also do one of the following:
+
+a) Accompany it with the complete corresponding machine-readable source
+code, which must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange; or,
+
+b) Accompany it with a written offer, valid for at least three years, to give any
+third party, for a charge no more than your cost of physically performing source
+distribution, a complete machine-readable copy of the corresponding source
+code, to be distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+c) Accompany it with the information you received as to the offer to distribute
+corresponding source code. (This alternative is allowed only for noncommercial
+distribution and only if you received the program in object code or executable
+form with such an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for making
+modifications to it. For an executable work, complete source code means all the
+source code for all modules it contains, plus any associated interface definition
+files, plus the scripts used to control compilation and installation of the
+executable. However, as a special exception, the source code distributed need
+not include anything that is normally distributed (in either source or binary form)
+with the major components (compiler, kernel, and so on) of the operating system
+on which the executable runs, unless that component itself accompanies the
+executable.
+
+If distribution of executable or object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the source
+code from the same place counts as distribution of the source code, even though
+third parties are not compelled to copy the source along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+expressly provided under this License. Any attempt otherwise to copy, modify,
+sublicense or distribute the Program is void, and will automatically terminate
+your rights under this License. However, parties who have received copies, or
+rights, from you under this License will not have their licenses terminated so long
+as such parties remain in full compliance.
+
+5. You are not required to accept this License, since you have not signed it.
+However, nothing else grants you permission to modify or distribute the Program
+or its derivative works. These actions are prohibited by law if you do not accept
+this License. Therefore, by modifying or distributing the Program (or any work
+based on the Program), you indicate your acceptance of this License to do so,
+and all its terms and conditions for copying, distributing or modifying the
+Program or works based on it.
+
+6. Each time you redistribute the Program (or any work based on the Program),
+the recipient automatically receives a license from the original licensor to copy,
+distribute or modify the Program subject to these terms and conditions. You
+may not impose any further restrictions on the recipients' exercise of the rights
+granted herein. You are not responsible for enforcing compliance by third parties
+to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent infringement
+or for any other reason (not limited to patent issues), conditions are imposed on
+you (whether by court order, agreement or otherwise) that contradict the
+conditions of this License, they do not excuse you from the conditions of this
+License. If you cannot distribute so as to satisfy simultaneously your obligations
+under this License and any other pertinent obligations, then as a consequence
+you may not distribute the Program at all. For example, if a patent license would
+not permit royalty-free redistribution of the Program by all those who receive
+copies directly or indirectly through you, then the only way you could satisfy
+both it and this License would be to refrain entirely from distribution of the
+Program.
+
+If any portion of this section is held invalid or unenforceable under any particular
+circumstance, the balance of the section is intended to apply and the section as
+a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any patents or other
+property right claims or to contest validity of any such claims; this section has
+the sole purpose of protecting the integrity of the free software distribution
+system, which is implemented by public license practices. Many people have
+made generous contributions to the wide range of software distributed through
+that system in reliance on consistent application of that system; it is up to the
+author/donor to decide if he or she is willing to distribute software through any
+other system and a licensee cannot impose that choice.
+
+This section is intended to make thoroughly clear what is believed to be a
+consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain countries
+either by patents or by copyrighted interfaces, the original copyright holder who
+places the Program under this License may add an explicit geographical
+distribution limitation excluding those countries, so that distribution is permitted
+only in or among countries not thus excluded. In such case, this License
+incorporates the limitation as if written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of the
+General Public License from time to time. Such new versions will be similar in
+spirit to the present version, but may differ in detail to address new problems or
+concerns.
+
+Each version is given a distinguishing version number. If the Program specifies a
+version number of this License which applies to it and "any later version", you
+have the option of following the terms and conditions either of that version or of
+any later version published by the Free Software Foundation. If the Program does
+not specify a version number of this License, you may choose any version ever
+published by the Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs
+whose distribution conditions are different, write to the author to ask for
+permission. For software which is copyrighted by the Free Software Foundation,
+write to the Free Software Foundation; we sometimes make exceptions for this.
+Our decision will be guided by the two goals of preserving the free status of all
+derivatives of our free software and of promoting the sharing and reuse of
+software generally.
+
+NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS
+NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE
+COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM
+"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR
+IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE,
+YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED
+TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY
+WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS
+PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
+(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS
+BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+
+---------------------------------------------------------------------------
+
+The Artistic License
+
+Preamble
+
+The intent of this document is to state the conditions under which a Package
+may be copied, such that the Copyright Holder maintains some semblance of
+artistic control over the development of the package, while giving the users of the
+package the right to use and distribute the Package in a more-or-less customary
+fashion, plus the right to make reasonable modifications.
+
+Definitions:
+
+-    "Package" refers to the collection of files distributed by the Copyright
+     Holder, and derivatives of that collection of files created through textual
+     modification. 
+-    "Standard Version" refers to such a Package if it has not been modified,
+     or has been modified in accordance with the wishes of the Copyright
+     Holder. 
+-    "Copyright Holder" is whoever is named in the copyright or copyrights for
+     the package. 
+-    "You" is you, if you're thinking about copying or distributing this Package.
+-    "Reasonable copying fee" is whatever you can justify on the basis of
+     media cost, duplication charges, time of people involved, and so on. (You
+     will not be required to justify it to the Copyright Holder, but only to the
+     computing community at large as a market that must bear the fee.) 
+-    "Freely Available" means that no fee is charged for the item itself, though
+     there may be fees involved in handling the item. It also means that
+     recipients of the item may redistribute it under the same conditions they
+     received it. 
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you duplicate
+all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications derived from
+the Public Domain or from the Copyright Holder. A Package modified in such a
+way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided
+that you insert a prominent notice in each changed file stating how and when
+you changed that file, and provided that you do at least ONE of the following:
+
+     a) place your modifications in the Public Domain or otherwise
+     make them Freely Available, such as by posting said modifications
+     to Usenet or an equivalent medium, or placing the modifications on
+     a major archive site such as ftp.uu.net, or by allowing the
+     Copyright Holder to include your modifications in the Standard
+     Version of the Package.
+
+     b) use the modified Package only within your corporation or
+     organization.
+
+     c) rename any non-standard executables so the names do not
+     conflict with standard executables, which must also be provided,
+     and provide a separate manual page for each non-standard
+     executable that clearly documents how it differs from the Standard
+     Version.
+
+     d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or executable
+form, provided that you do at least ONE of the following:
+
+     a) distribute a Standard Version of the executables and library
+     files, together with instructions (in the manual page or equivalent)
+     on where to get the Standard Version.
+
+     b) accompany the distribution with the machine-readable source of
+     the Package with your modifications.
+
+     c) accompany any non-standard executables with their
+     corresponding Standard Version executables, giving the
+     non-standard executables non-standard names, and clearly
+     documenting the differences in manual pages (or equivalent),
+     together with instructions on where to get the Standard Version.
+
+     d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this Package.
+You may charge any fee you choose for support of this Package. You may not
+charge a fee for this Package itself. However, you may distribute this Package in
+aggregate with other (possibly commercial) programs as part of a larger
+(possibly commercial) software distribution provided that you do not advertise
+this Package as a product of your own.
+
+6. The scripts and library files supplied as input to or produced as output from
+the programs of this Package do not automatically fall under the copyright of this
+Package, but belong to whomever generated them, and may be sold
+commercially, and may be aggregated with this Package.
+
+7. C or perl subroutines supplied by you and linked into this Package shall not
+be considered part of this Package.
+
+8. Aggregation of this Package with a commercial distribution is always permitted
+provided that the use of this Package is embedded; that is, when no overt attempt
+is made to make this Package's interfaces visible to the end user of the
+commercial distribution. Such use shall not be construed as a distribution of
+this Package.
+
+9. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.
+
+The End
+
+

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/MANIFEST
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/MANIFEST	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/MANIFEST	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+Build.PL
+Changes
+LICENSE
+MANIFEST
+META.yml # Will be created by "make dist"
+Makefile.PL
+README
+bin/deob_index.pl
+cgi-bin/deob_detail.cgi
+cgi-bin/deob_flowchart.png
+cgi-bin/deob_interface.cgi
+cgi-bin/deob_help.html
+lib/Deobfuscator.pm
+t/00.load.t
+t/pod.t
+

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/META.yml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/META.yml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/META.yml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,15 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         Deobfuscator
+version:      v0.0.2
+version_from: lib/Deobfuscator.pm
+installdirs:  site
+requires:
+    CGI:                           0
+    Class::Inspector:              0
+    DB_File:                       0
+    Test::More:                    0
+    version:                       0
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Makefile.PL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Makefile.PL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/Makefile.PL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,21 @@
+use strict;
+use warnings;
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+    NAME                => 'Deobfuscator',
+    AUTHOR              => 'Dave Messina <dave-pause at davemessina.net>',
+    VERSION_FROM        => 'lib/Deobfuscator.pm',
+    ABSTRACT_FROM       => 'lib/Deobfuscator.pm',
+    EXE_FILES           => ['bin/deob_index.pl',],
+    PL_FILES            => {},
+    PREREQ_PM => {
+        'Test::More'       => 0,
+        'version'          => 0,
+		'Class::Inspector' => 0,
+		'DB_File'          => 0,
+        'CGI'              => 0,
+    },
+    dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
+    clean               => { FILES => 'Deobfuscator-*' },
+);

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+Deobfuscator version 0.0.2
+
+The Deobfuscator was written to make it easier to determine the methods that
+are available from a given BioPerl module.
+
+BioPerl is a highly object-oriented software package, with often multiple
+levels of inheritance. Although each individual module is usually well 
+documented for the methods specific to it, identifying the inherited methods
+is less straightforward.
+
+The Deobfuscator indexes all of the BioPerl POD documentation, taking account
+of the inheritance tree, and then presents all of the methods available to
+each module through a searchable web interface.
+
+
+DOCUMENTATION
+
+All of the code in this distribution have POD documentation, which can be read using the perldoc command. For example,
+
+    perldoc lib/Deobfuscator.pm
+
+will show the POD documentation for the Deobfuscator module.
+
+Also, there are two files in the cgi-bin directory which may be helpful:
+
+cgi-bin/deob_help.html
+cgi-bin/deob_flowchart.png
+
+
+INSTALLATION
+
+Installation of the Deobfuscator package requires a little bit more than a
+typical CPAN module because there are some cgi scripts, and these need to be
+placed in a directory accessible to a webserver.
+
+Follow these steps to install the Deobfuscator on your system:
+
+1) Follow the standard CPAN installation procedure to install the Deobfuscator.pm module and the deob_index.pl program.
+
+Run the following commands:
+
+    perl Makefile.PL
+    make
+    make test
+    make install
+
+
+Alternatively, to install with Module::Build, you can use the following commands:
+
+    perl Build.PL
+    ./Build
+    ./Build test
+    ./Build install
+
+2) Copy the contents of the cgi-bin directory to your cgi-bin directory, or
+any directory from which the webserver allows scripts to be executed over the
+web.
+
+3) Make sure deob_interface.cgi and deob_detail.cgi are world-executable. On a
+UNIX system, the command
+
+    chmod o+x deob_interface.cgi deob_detail.cgi
+
+should do it.
+
+4) Run deob_index.pl. For a default installation, run it from your webserver's cgi-bin directory. On UNIX systems, it should be something like:
+
+	cd /Library/WebServer/CGI-Executables
+    deob_index.pl /Library/Perl/5.8.6/Bio .
+
+When the command finishes, it should show you some stats on the indexing. On my system it looked like this for BioPerl 1.5.1:
+
+This indexing run found:
+  803 files
+  798 pkg_name
+  772 desc
+  788 synopsis
+ 5660 methods
+
+If the number of files is much lower than this (like 0), then deob_index.pl may have been pointed to the wrong directory.
+
+There should also be some new files in the directory you ran it from:
+packages.db
+methods.db
+package_list.txt
+deob_index.log
+
+You can move or delete deob_index.log and the Deobfuscator should still work,
+but the other three files need to be in the same directory as deob_interface.cgi and deob_index.cgi unless you change the hardcoded variables in those scripts. See their documentation if you want to do that.
+
+5) Test your installation by pointing your browser to the deob_interface.cgi script. On my system, the URL is:
+
+http://localhost/cgi-bin/deob_interface.cgi
+
+If you get an error, check the permissions on the cgi-scripts and the files that deob_index.pl created in the last step. Your webserver error log may also be helpful.
+
+If you moved any of the files outside of your webserver's cgi-bin directory, make sure that the hardcoded variables in deob_interface.cgi point to their new location.
+
+6) That should be it! As always, check the POD documentation in the individual files for more information. And if you have comments, suggesions, or problems, send an email to the BioPerl mailing list <bioperl-l at bioperl.org>.
+
+
+DEPENDENCIES
+
+- version              Available from CPAN.
+- Class::Inspector     Available from CPAN.
+- BioPerl              Tested with v1.5.1, but other versions should work
+                       too. Get the latest from http://www.bioperl.org.
+
+
+COPYRIGHT AND LICENSE
+
+Copyright (C) 2006, Dave Messina and Laura Kavanaugh
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/deob_index.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/deob_index.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/deob_index.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,776 @@
+#!/usr/bin/perl -w
+
+# deob_index.pl
+# part of the Deobfuscator package
+# by Laura Kavanaugh and Dave Messina
+#
+# cared for by Dave Messina <dave-pause at davemessina.net>
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+deob_index.pl - extracts BioPerl documentation and indexes it in a database for easy retrieval
+
+=head1 VERSION
+
+This document describes deob_index.pl version 0.0.2
+
+
+=head1 SYNOPSIS
+
+deob_index.pl <path to BioPerl lib> <output path>
+
+=over
+
+=item <path to BioPerl lib>
+
+a directory path pointing to the root of the BioPerl lib tree. e.g. /export/share/lib/perl5/site_perl/5.8.7/Bio/
+
+=item <output path>
+
+where you would like deob_index.pl to put its output files.
+
+=back
+
+
+=head1 DESCRIPTION
+
+deob_index.pl goes through the entire BioPerl library tree looking for
+.pm and .pl files. For each one it finds, it tries to extract module-level
+POD documentation (e.g. SYNOPSIS, DESCRIPTION) and store it in a BerkeleyDB.
+It also tries to extract documentation for each method in the module and
+store that in a separate BerkeleyDB.
+
+Specific parts of the documentation for a module or method may be retrieved
+individually using the functions available in Deobfuscator.pm. See that module
+for details.
+
+While going through and trying to parse each module, deob_index.pl also
+reports what pieces of the documentation it can't find. For example, if
+a method's documentation doesn't describe the data type it returns, this 
+script logs that information to a file. This type of automated documentation-
+checking could be used to standardize and improve the documentation in 
+BioPerl.
+
+deob_index.pl creates four files:
+
+=over
+
+=item C<< package_list.txt >>
+
+A plaintext file listing each package found in the BioPerl directory that was
+searched. Packages are listed by their module names, such as 'Bio::SeqIO'.
+This file is used by L<deob_interface.cgi>.
+
+=item C<< packages.db >>
+
+A Berkeley DB, which stores package-level documentation, such as
+the synopsis and the description. Each key is a package name,
+e.g. "Bio::SeqIO", and each value string is composed of the 
+individual pieces of the documentation kept separate by 
+unique string record separators. The individual pieces of 
+documentation are pulled out of the string using the 
+get_pkg_docs function in Deobfuscator.pm. See that package
+for details.
+
+=item C<< methods.db >>
+
+Like packages.db, methods.db is also a Berkeley DB, except it 
+stores various pieces of information about individual methods
+available to a class. Each method might have documentation
+about its usage, its arguments, its return values, an example,
+and a description of its function. 
+
+Each key is the fully-qualified method name, e.g.
+"Bio::SeqIO::next_seq". Each value is a string containing all
+of the pieces of documentation concatenated together and
+separated by unique strings serving as record separators. The
+extraction of the actual documentation in these strings is
+handled by the get_method_docs subroutine in the Deobfuscator.pm
+module. See that package for details.
+
+Not all methods will have all of these types of documentation,
+and some methods will not have the different pieces of
+information clearly labeled and separated. For the latter type,
+deob_index.pl will try to store whatever free-form
+documentation that does exist, and the get_method_docs function
+in Deobfuscator.pm, if called without arguments, will return
+that documentation.
+
+=item C<< deob_index.log >>
+
+This file contains detailed information about errors
+encountered while trying to extract documentation during
+the indexing process.
+
+Each line in deob_index.log is a key-value pair describing
+a single parsing error.
+
+=back
+
+
+=head1 DIAGNOSTICS
+
+These are the parsing error codes reported in 'deob_index.log'.
+
+=head2 Package errors
+
+=over
+
+=item C<< PKG_NAME >>
+
+couldn't find the name of the package
+
+=item C<< SYNOPSIS >>
+
+couldn't find the synopsis
+
+=item C<< DESC >>
+
+couldn't find the description
+
+=item C<< METHODS >>
+
+couldn't find any methods
+
+=item C<< PKG_DUP >>
+
+This package name occurs more than once
+
+=back
+
+=head2 Method errors
+
+=over
+
+=item C<< FUNCTION >>
+
+couldn't find the function description
+
+=item C<< EXAMPLE >>
+
+couldn't find the example
+
+=item C<< ARGS >>
+
+couldn't find the method's arguments
+
+=item C<< USAGE >>
+
+couldn't find the usage statement
+
+=item C<< RETURNS >>
+
+couldn't find the return values
+
+=item C<< FREEFORM >>
+
+This method's documentation doesn't conform to the BioPerl standard of having
+clearly-labeled fields for title, function, example, args, usage, and returns.
+
+=item C<< METH_DUP >>
+
+This method name occurs more than once
+
+=back
+
+
+=head1 CONFIGURATION AND ENVIRONMENT
+
+This software requires:
+
+=over
+
+=item A working installation of the Berkeley DB
+
+The Berkeley DB comes standard with most UNIX distributions, so you may 
+already have it installed. See L<http://www.sleepycat.com> for more information.
+
+=item BioPerl
+
+deob_index.pl recursively navigates a directory of BioPerl modules. Note
+that the BioPerl module directory need not be "installed"; any old location
+will do. See L<http://www.bioperl.org> for the latest version.
+
+=back
+
+
+=head1 DEPENDENCIES
+
+L<version>, L<File::Find>, L<DB_File>
+
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+
+=head1 BUGS AND LIMITATIONS
+
+No bugs have been reported.
+
+deob_index.pl currently expects the sections of POD in a BioPerl module to
+be in a particular order, namely: NAME, SYNOPSIS, DESCRIPTION, CONSTRUCTORS,
+... , APPENDIX. Those sections are expected to be marked with =head1 POD tags,
+and the documentation for each method is expected to be in =head2 sections
+in the APPENDIX.
+
+Most, but not all BioPerl modules conform to this standard. Those that do not
+will cause deob_index.pl to report them as errors. Although the consistency
+of this standard is desirable for end-users of the documentation, this code
+probably needs to be a little bit more flexible (patches welcome!).
+
+This software has only been tested on in a UNIX environment. 
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists   - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 SEE ALSO
+
+L<Deobfuscator>, L<deob_interface.cgi>, L<deob_detail.cgi>
+
+
+=head1 AUTHOR
+
+Dave Messina C<< <dave-pause at davemessina.net> >>
+
+
+=head1 CONTRIBUTORS
+
+=over
+
+=item Laura Kavanaugh
+
+=item David Curiel
+
+=back
+
+
+=head1 ACKNOWLEDGMENTS
+
+This software was developed originally at the Cold Spring Harbor Laboratory's
+Advanced Bioinformatics Course between Oct 12-25, 2005. Many thanks to David
+Curiel, who provided much-needed guidance and assistance on this project.
+
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright (C) 2005-6 Laura Kavanaugh and Dave Messina. All Rights Reserved.
+
+This module is free software; you may redistribute it and/or modify it under the
+same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+use version; $VERSION = qv('0.0.2');
+use warnings;
+use strict;
+use File::Find;
+use DB_File;
+
+my $DEBUG = 0;
+
+my $usage = "
+deob_index.pl - extracts and parses BioPerl POD
+and stores the info in a database.
+
+USAGE: deob_index.pl <BioPerl lib dir> <output dir>
+
+where 
+
+<BioPerl lib dir> is the full path of the BioPerl distribution
+you'd like to index
+
+    e.g. /export/share/lib/perl5/site_perl/5.8.7/Bio/
+    
+and
+
+<output dir> is the path where the output files should be placed
+";
+
+unless ( @ARGV == 2 ) { die $usage; }
+
+my ( $source_dir, $dest_dir ) = @ARGV;
+
+# NOTE: we're allowing only one source directory, but File::Find supports
+# passing an array of dirs.
+
+
+# save a list of the BioPerl modules to a file
+my $list; # filehandle
+my $list_file = $dest_dir . "/package_list.txt";
+if ( -e $list_file) { unlink($list_file); }
+open $list, ">$list_file" or die "couldn't open $list_file:$!\n";
+
+# record misbehaving BioPerl docs to a file
+my $log;    # filehandle
+my $logfile = $dest_dir . "/deob_index.log";
+open $log, ">$logfile" or die "couldn't open $logfile:$!\n";
+
+# create databases
+my $meth_file = $dest_dir . '/methods.db';
+if ( -e $meth_file ) { unlink($meth_file); }    # remove for production?
+my $meth_db = create_db($meth_file) or die "couldn't create $meth_file: $!\n";
+my $pkg_file = $dest_dir . '/packages.db';
+if ( -e $pkg_file ) { unlink($pkg_file); }      # remove for production?
+my $pkg_db = create_db($pkg_file) or die "couldn't create $pkg_file: $!\n";
+
+# used to make sure we're parsing in the right order
+my %FLAG;
+
+# keep stats on our indexing
+my %stats;
+
+# wanted points to the subroutine which is run on each found file
+# ( in this program, that subroutine is &extract_pod )
+# no_chdir prevents find from chdir'ing into each subsequent directory
+my %FIND_OPTIONS = ( wanted => \&extract_pod, no_chdir => 1 );
+
+# This is the important line - Find::File actually doing the
+# traversal of the directory tree.
+find( \%FIND_OPTIONS, $source_dir );
+
+# output stats
+print STDOUT "\nThis indexing run found:\n";
+print $log "\nThis indexing run foundd:\n";
+foreach my $stat ( 'files', 'pkg_name', 'desc', 'synopsis', 'methods' ) {
+    printf STDOUT "%5d %s\n", $stats{$stat}, $stat;
+    printf $log "%5d %s\n", $stats{$stat}, $stat;
+}
+
+# close files and DBs
+untie $meth_db or die "couldn't close $meth_file: $!\n";
+untie $pkg_db  or die "couldn't close $pkg_file: $!\n";
+close $list    or die "couldn't close $list: $!\n";
+close $log     or die "couldn't close $log: $!\n";
+my $mode = 0666;
+chmod($mode, $pkg_file, $meth_file, $list_file);
+
+### Parsing subroutines ###
+sub extract_pod {
+    my ($file) = $_;
+
+    # skip unless it's a perl file that exists
+    return unless ( $file =~ /\.PLS$/ ) or ( $file =~ /\.p[ml]$/ );
+    return unless -e $file;
+
+    $stats{'files'}++;
+
+    open my $fh, $file or die "couldn't open $file:$!\n";
+
+    # these have to be done in order
+    my ( $pkg_name, $short_desc ) = get_pkg_name($fh);
+    my $synopsis     = get_synopsis($fh);
+    my $desc         = get_desc($fh);
+    my $constructors = get_constructors($fh);
+    my $methods      = get_methods($fh);
+
+    # record package name to our package list file
+    if ($pkg_name) { print $list $pkg_name, "\n"; }
+
+    # store valid package data here
+    my @pkg_data;
+
+    # error reporting
+    if ($pkg_name) {
+        $stats{'pkg_name'}++;
+        print $pkg_name, "\n" if $DEBUG == 1;
+    }
+    else {
+        print $log " PKG_NAME: $file\n";
+    }
+    if ($short_desc) {
+        $stats{'short_desc'}++;
+        push @pkg_data, $short_desc;
+        print $short_desc, "\n" if $DEBUG == 1;
+    }
+    else {
+		push @pkg_data, 'no short description available'; # store something
+        print $log "SHORT_DESC: $file\n";
+    }
+    if ($synopsis) {
+        $stats{'synopsis'}++;
+        print $synopsis, "\n" if $DEBUG == 1;
+        push @pkg_data, $synopsis;
+    }
+    else {
+		push @pkg_data, 'no synopsis available'; # store something
+        print $log " SYNOPSIS: $file\n";
+    }
+    if ($desc) {
+        $stats{'desc'}++;
+        print $desc, "\n" if $DEBUG == 1;
+        push @pkg_data, $desc;
+    }
+    else {
+		push @pkg_data, 'no description available'; # store something
+        print $log "     DESC: $file\n";
+    }
+    if ($methods) {
+        my $method_count = scalar keys %$methods;
+        print "**** Found $method_count methods in $pkg_name\n"
+            if $DEBUG == 2;
+        foreach my $method ( keys %$methods ) {
+            $stats{'methods'}++;
+            print $method, "\n//\n" if $DEBUG == 2;
+        }
+    }
+    else {
+        print $log "  METHODS: $file\n";
+    }
+
+    # prepare data for databases
+    my $pkg_record   = pkg_prep(@pkg_data);
+    my $meth_records = meth_prep( $pkg_name, $methods );
+
+    # load data in databases
+    if ($pkg_name) {
+        pkg_load( $pkg_db, $pkg_name, $pkg_record );
+        meth_load( $meth_db, $meth_records );
+    }
+}
+
+sub slurp_until_next {
+    my ($fh) = @_;
+
+    my @lines;
+
+    LINE: while (<$fh>) {
+
+        # if it's a POD directive
+        if (/^\=/) {
+
+            # reset our position to the beginning of the line
+            # so it is seen as part of the next POD section
+            seek $fh, -length($_), 1;
+            last LINE;
+        }
+        else {
+            push @lines, $_;
+        }
+    }
+    return join q{}, @lines;
+}
+
+sub get_pkg_name {
+    my ($fh) = @_;
+
+    my $pkg_name;
+    my $short_desc;
+
+    LINE: while (<$fh>) {
+        chomp;
+        print "**", $_, "\n" if $DEBUG == 2;
+
+        # grab package name
+        # - "short desc" is the one-line description of the package
+        if ( $_ =~ /^\=head1\s+NAME/ ) {
+            <$fh>;
+            my $next_line = <$fh>;
+            ( $pkg_name, $short_desc ) = split /\s+/, $next_line, 2;
+			$short_desc .= slurp_until_next($fh);
+
+            # strip off leading dash
+            $short_desc =~ s/^(\-)+\s+//;
+
+			# strip off trailing spaces
+			$short_desc =~ s/\s+$//;
+
+			# strip any newlines
+			$short_desc =~ s/\n/ /;
+
+            print $pkg_name, "\n" if $DEBUG == 1;
+
+            last LINE;
+        }
+
+        # we've hit a =head1, but it's the wrong one
+        elsif ( $_ =~ /^\=head1\s+/ ) {
+            last LINE;
+        }
+    }
+    if ($pkg_name) {
+        $FLAG{'pkg_name'} = 1;
+        return $pkg_name, $short_desc;
+    }
+}
+
+sub get_synopsis {
+    my ($fh) = @_;
+
+    my $synopsis;
+
+    LINE: while (<$fh>) {
+        chomp;
+        print "**", $_, "\n" if $DEBUG == 2;
+
+        if ( $_ =~ /^\=head1\s+SYNOPSIS/ ) {
+            $synopsis = slurp_until_next($fh);
+            last LINE;
+        }
+
+        # we've hit a =head1, but it's the wrong one
+        elsif ( $_ =~ /^\=head1\s+/ ) {
+            last LINE;
+        }
+    }
+    if ($synopsis) {
+        $FLAG{'synopsis'} = 1;
+        return $synopsis;
+    }
+}
+
+sub get_desc {
+    my ($fh) = @_;
+
+    my $desc;
+
+    LINE: while (<$fh>) {
+        chomp;
+        print "**", $_, "\n" if $DEBUG == 2;
+
+        if ( $_ =~ /^\=head1\s+DESCRIPTION/ ) {
+            $desc = slurp_until_next($fh);
+            last LINE;
+        }
+
+        # we've hit a =head1, but it's the wrong one
+        elsif ( $_ =~ /^\=head1\s+/ ) {
+            last LINE;
+        }
+    }
+    if ($desc) {
+        $FLAG{'description'} = 1;
+        return $desc;
+    }
+}
+
+sub get_constructors {
+
+    # not implemented
+
+    # should return a hashref
+}
+
+sub get_methods {
+    my ($fh) = @_;
+    my %methods;
+
+    # we shouldn't see any methods until after the APPENDIX
+    my $seen_appendix = 0;
+
+    # there's an '=cut' after we enter the APPENDIX
+    # we know the method '=head2' tags will come after it
+    my $seen_first_cut = 0;
+
+    LINE: while (<$fh>) {
+        if ( $_ =~ /^\=head1\s+APPENDIX/ ) {
+            $seen_appendix = 1;
+        }
+
+        # this should be the first tag after the APPENDIX
+        if ( $seen_appendix && $_ =~ /^\=cut/ ) {
+            $seen_first_cut = 1;
+        }
+
+        # this should be a method
+        if ( $seen_first_cut && $_ =~ /^\=head2\s+(\S+)/ ) {
+            $methods{$1} = slurp_until_next($fh);
+        }
+    }
+
+    # returns a hashref
+    return \%methods;
+}
+
+### Database subroutines ###
+sub create_db {
+    my ($filename) = @_;
+
+    my %hash;
+    my $hashref = \%hash;
+
+    tie %hash, "DB_File", $filename
+        or die "ERROR: couldn't open $filename:$!\n";
+
+    return $hashref;
+}
+
+sub pkg_prep {
+
+    # unique string on which to split our sub-records
+    my $rec_sep = 'DaVe-ReC-sEp';
+
+    my $record = join $rec_sep, @_;
+
+    return $record;
+}
+
+sub meth_prep {
+    my ( $pkg_name, $methods ) = @_;
+    my %records;
+
+    foreach my $entry ( keys %$methods ) {
+        my $key = $pkg_name . '::' . $entry;
+        my $record;    # what will be stored in the db
+        my $rec_sep = 'DaVe-ReC-sEp';
+
+        # if the method conforms to the BioPerl doc spec,
+        # we will split it into constituent pieces before storing
+        # it in the db. If not, we store the whole thing as one lump.
+
+        my $last;      # for grabbing multi-line entries
+        my %fields = (
+            'title'    => '',
+            'usage'    => '',
+            'function' => '',
+            'example'  => '',
+            'returns'  => '',
+            'args'     => '',
+        );
+
+
+        my @lines = split "\n", $methods->{$entry};
+        foreach my $line (@lines) {
+            if ( $line =~ /^\s+Title\s+:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'title'} = $1;
+                $last = \$fields{'title'};
+            }
+            elsif ( $line =~ /^\s+Usage\s+:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'usage'} = $1;
+                $last = \$fields{'usage'};
+            }
+            elsif ( $line =~ /^\s+Function\s?:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'function'} = $1;
+                $last = \$fields{'function'};
+            }
+            elsif ( $line =~ /^\s+Example\s+:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'example'} = $1;
+                $last = \$fields{'example'};
+            }
+            elsif ( $line =~ /^\s+Returns\s+:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'returns'} = $1;
+                $last = \$fields{'returns'};
+            }
+            elsif ( $line =~ /^\s+Args\s+:(.*)/ ) {
+                next if $1 =~ /^\s+$/;
+                $fields{'args'} = $1;
+                $last = \$fields{'args'};
+            }
+
+            # grab multi-line entries
+            elsif ( $line =~ /^\s{8,}(\s.*)/ ) { $$last .= $1; }
+        }
+
+        # debugging
+        if ( $DEBUG == 2 ) {
+            print "** $entry **\n";
+            foreach my $field ( keys %fields ) {
+                print STDOUT $field, "\t", $fields{$field}, "\n";
+            }
+            print "\n";
+        }
+
+        # if any of our fields have a value, store subrecords
+        my $filled_fields = grep /\w+/, values %fields;
+        print STDERR $key, "\t", $filled_fields, "\n" if $DEBUG == 3;
+        if ( $filled_fields > 0 ) {
+            if ( !$fields{'title'} ) { print $log '    TITLE: ', $key, "\n"; }
+            if ( !$fields{'usage'} ) { print $log '    USAGE: ', $key, "\n"; }
+            if ( !$fields{'function'} ) {
+                print $log ' FUNCTION: ', $key, "\n";
+            }
+            if ( !$fields{'example'} ) {
+                print $log '  EXAMPLE: ', $key, "\n";
+            }
+            if ( !$fields{'returns'} ) {
+                print $log '  RETURNS: ', $key, "\n";
+            }
+            if ( !$fields{'args'} ) { print $log '     ARGS: ', $key, "\n"; }
+
+            # create the records to be stored in the db
+            foreach my $field ( keys %fields ) {
+                my $subrecord
+                    = $rec_sep . '-' . $field . '|' . $fields{$field};
+                $record .= $subrecord;
+            }
+
+            # store the records
+            $records{$key} = $record;
+        }
+
+        # if no subfields, store whatever docs we do have for the method
+        else {
+            $record = $methods->{$entry};
+            print $log ' FREEFORM: ', $key, "\n";
+        }
+    }
+    return \%records;
+}
+
+sub pkg_load {
+    my ( $pkg_db, $pkg_name, $record ) = @_;
+
+    if ( exists $pkg_db->{$pkg_name} ) {
+        print $log '  PKG_DUP: ', $pkg_name, "\n";
+        warn(
+            "$pkg_name already exists in package db!\n",
+            "existing record:\n$pkg_db->{$pkg_name}\n",
+            "attempted to add:\n$record\n",
+            )
+            if $DEBUG == 2;
+    }
+    else {
+        $pkg_db->{$pkg_name} = $record;
+    }
+}
+
+sub meth_load {
+    my ( $meth_db, $records ) = @_;
+
+    foreach my $method ( keys %$records ) {
+        if ( exists( $meth_db->{$method} ) ) {
+            print $log ' METH_DUP: ', $method, "\n";
+            warn(
+                "$method already exists in method db!\n",
+                "existing record:\n$meth_db->{$method}\n",
+                "attempted to add:\n$records->{$method}\n",
+                )
+                if $DEBUG == 2;
+        }
+        else {
+            $meth_db->{$method} = $records->{$method};
+        }
+    }
+}
+
+__END__
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/bin/deob_index.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_detail.cgi
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_detail.cgi	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_detail.cgi	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,209 @@
+#!/usr/bin/perl -w
+
+# Deob_detail.cgi
+# part of the Deobfuscator package
+# by Laura Kavanaugh and Dave Messina
+#
+# cared for by Dave Messina <dave-pause at davemessina.net>
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+deob_detail.cgi - displays a web page of detailed information about a BioPerl method
+
+=head1 VERSION
+
+This document describes deob_detail.cgi version 0.0.2
+
+
+=head1 SYNOPSIS
+
+This program is designed to be called by deob_interface.cgi. See
+L</"DESCRIPTION"> for details.
+
+To install deob_detail.cgi and the rest of the Deobfuscator package, see the
+README.
+
+
+=head1 DESCRIPTION
+
+Deob_detail.cgi is called by deob_interface.cgi when a user clicks on a
+method name. This program extracts the documentation about that method from
+the Deobfuscator Berkeley DBs and returns it in some simple HTML formatting.
+
+
+=head1 DIAGNOSTICS
+
+None.
+
+
+=head1 CONFIGURATION AND ENVIRONMENT
+
+This program expects to have the 'methods.db' and 'packages.db' files in the
+same directory as itself. These two files are automatically generated when
+L<deob_index.pl> is run. If your installation requires that they be in a
+different location, change the $BerkeleyDB_packages and $BerkeleyDB_methods
+variables below to be fully qualified paths to the db files.
+
+
+=head1 DEPENDENCIES
+
+L<version>, L<CGI>, L<Deobfuscator>
+
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+
+=head1 BUGS AND LIMITATIONS
+
+No bugs have been reported.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists   - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 SEE ALSO
+
+L<Deobfuscator>, L<deob_interface.cgi>, L<deob_index.pl>
+
+
+=head1 AUTHOR
+
+Laura Kavanaugh
+
+
+=head1 CONTRIBUTORS
+
+=over
+
+=item Dave Messina C<< <dave-pause at davemessina.net> >>
+
+=item David Curiel
+
+=back
+
+
+=head1 ACKNOWLEDGMENTS
+
+This software was developed originally at the Cold Spring Harbor Laboratory's
+Advanced Bioinformatics Course between Oct 12-25, 2005. Many thanks to David
+Curiel, who provided much-needed guidance and assistance on this project.
+
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright (C) 2005-6 Laura Kavanaugh and Dave Messina. All Rights Reserved.
+
+This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+# Let the code begin...
+
+## HARDCODED VALUES ##
+# Change these to fit your installation.
+use lib './lib';
+my $BerkeleyDB_packages = './packages.db';
+my $BerkeleyDB_methods  = './methods.db';
+
+## You shouldn't need to change anything below here ##
+
+use version; $VERSION = qv('0.0.2');
+use warnings;
+use strict;
+use CGI ':standard';
+use Deobfuscator;
+
+# Open BerkeleyDBs
+my $packages_ref = Deobfuscator::open_db($BerkeleyDB_packages);
+my $methods_ref  = Deobfuscator::open_db($BerkeleyDB_methods);
+
+# 'method' is the name of the method passed in from deob_interface.cgi
+my $class_method = param('method');
+
+# Get all of the documentation fields out of the db
+my $title
+    = Deobfuscator::get_method_docs( $methods_ref, $class_method, "title" );
+if ( $title eq "0" ) { $title = "not documented"; }
+
+my $usage
+    = Deobfuscator::get_method_docs( $methods_ref, $class_method, "usage" );
+if ( $usage eq "0" ) { $usage = "not documented"; }
+
+my $function = Deobfuscator::get_method_docs( $methods_ref, $class_method,
+    "function" );
+if ( $function eq "0" ) { $function = "not documented"; }
+
+my $returns
+    = Deobfuscator::get_method_docs( $methods_ref, $class_method, "returns" );
+if ( $returns eq "0" ) { $returns = "not documented"; }
+
+my $args
+    = Deobfuscator::get_method_docs( $methods_ref, $class_method, "args" );
+if ( $args eq "0" ) { $args = "not documented"; }
+
+### Make the output page
+
+# Start the page
+print header;
+print start_html($class_method);
+
+# Define some styles
+my $style1
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:10px;background-color:lightgrey"};
+my $style2
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:10px"};
+my $style3
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:14px"};
+
+# open the table
+print '<div style="border:solid black 1px; width:100%; height:200; overflow:auto">';
+print '<table width="100%" $style3>';
+print "<tr><td colspan=4><center>$class_method</center></td></tr>";
+
+my @sections = ('Usage', 'Function', 'Returns', 'Args');
+my $sec_ndx = 0;
+
+foreach my $section ($usage, $function, $returns, $args) {
+
+	my $section_html = Deobfuscator::htmlify($section);
+	print "<tr><td $style1>$sections[$sec_ndx++]</td><td $style2>$section_html</td></tr>\n";
+}
+
+# close the table
+print "</table></div>";
+
+# finish the page
+print end_html;
+
+# close BerkeleyDB
+Deobfuscator::close_db($BerkeleyDB_packages);
+Deobfuscator::close_db($BerkeleyDB_methods);
+
+__END__
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_detail.cgi
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_flowchart.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_flowchart.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_help.html
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_help.html	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_help.html	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+<html>
+<head>
+<body style="font-family:verdana;font-size:14px">
+<h2>What is the BioPerl Deobfuscator?</h2>
+<p>The Deobfuscator was written to make it easier to determine the methods that
+are available from a given BioPerl module.</p>
+<p>BioPerl is a highly object-oriented software package, with often multiple
+levels of inheritance. Although each individual module is usually well 
+documented for the methods specific to it, identifying the inherited methods
+is less straightforward.</p>
+<p>The Deobfuscator indexes all of the BioPerl POD documentation, taking account
+of the inheritance tree, and then presents all of the methods available to
+each module through a searchable web interface.</p>
+<p>The following diagram lays out what each part of the Deobfuscator does, and how the pieces interact. For more details, see the POD documentation for Deobfuscator.pm, deob_interface.cgi, and deob_index.pl.
+<img src="deob_flowchart.png" ALT="a diagram showing how the Deobfuscator works" width=640 height=480 border=0>
+<h2>Feedback</h2>
+<p>Find a bug? Have a suggestion for improving the Deobfuscator or other BioPerl modules? Or better yet, have a patch you want to submit?</p>
+
+<h3>Mailing Lists</h3>
+<p>User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.</p>
+
+<ul>
+<li><a href="mailto:bioperl-l at bioperl.org">General discussion</a>
+<li><a href="http://www.bioperl.org/wiki/Mailing_lists">About the mailing lists</a>
+</ul>
+<h3>Reporting Bugs</h3>
+<p>Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:</p>
+
+<ul>
+<li><a href="  http://bugzilla.open-bio.org/">Bugzilla</a>
+</ul>
+
+<h2>Credits</h2>
+<p>This software was developed originally at the Cold Spring Harbor Laboratory's <a href="http://meetings.cshl.edu/courses/c-info05.shtml">Advanced Bioinformatics Course</a> between Oct 12-25, 2005. Many thanks to David Curiel, who provided much-needed guidance and assistance on this project.</p>
+<p>The BioPerl Deobfuscator was developed by Laura Kavanaugh and Dave Messina.</p>
+</body>
+</html>

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_interface.cgi
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_interface.cgi	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_interface.cgi	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,593 @@
+#!/usr/bin/perl -w
+
+# Deob_interface.cgi
+# part of the Deobfuscator package
+# by Laura Kavanaugh and Dave Messina
+#
+# cared for by Dave Messina <dave-pause at davemessina.net>
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+deob_interface.cgi - a WWW user interface to the BioPerl Deobfuscator
+
+=head1 VERSION
+
+This document describes deob_interface.cgi version 0.0.2
+
+
+=head1 SYNOPSIS
+
+    This program is designed to be used through a web browser. To install
+    deob_interface.cgi and the rest of the Deobfuscator package, see the
+    README.
+
+
+=head1 DESCRIPTION
+
+Deob_interface.cgi provides a web-based front-end to the BioPerl Deobfuscator.
+It uses the Deobfuscator package to open the Berkeley databases storing the
+BioPerl documentation and then display a list of the available modules. A
+search box is also provided if the user wants to pare down the list.
+
+When a user clicks on the name of a class, deob_interface.cgi looks up the
+stored documentation on the methods in that class, and all of the classes that
+class inherits from, and displays a list of those methods. The list shows the
+class, return values, and usage statement for each method. A user can see more
+extensive documentation for a method by clicking on its name or its class's
+name.
+
+
+=head1 DIAGNOSTICS
+
+=over
+
+=item C<< Can't open list of Perl module names >>
+
+deob_interface.cgi can't locate the textfile F<package_list.txt>
+containing the full list of BioPerl packages. By default this file should be
+in the same directory as F<deob_interface.cgi>. See L</"CONFIGURATION AND
+ENVIRONMENT"> for more information.
+
+=item C<< Can't close list of Perl module names >>
+
+deob_interface.cgi was unsuccessful in closing the F<package_list.txt>
+file after reading it. This is most likely a transient filesystem error.
+
+=item C<< Unknown sort option selected in deob_interface.cgi >>
+
+In the event a sort parameter other than I<sort by class> or I<sort by method>
+was sent to the sorting subroutine, deob_interface.cgi will exit with a fatal
+error.
+
+=back
+
+
+=head1 CONFIGURATION AND ENVIRONMENT
+
+See the F<README> for installation instructions.
+
+There are four hardcoded variables you may need to set. Look in
+deob_interface.cgi for a section labeled 'SET HARDCODED VALUES HERE'.
+
+=over
+
+=item C<< $deob_detail_path >>
+
+The URL of the F<deob_detail.cgi> program. Set to L<<
+http://localhost/cgi-bin/deob_detail.cgi >> by default. F<deob_detail.cgi>
+needs to be in your webserver's F<cgi-bin> directory or some location where
+you are allowed to serve executable code to the web.
+
+If you are setting up the Deobfuscator package on your own machine, the
+default URL will probably work. Otherwise, you will need to change the URL,
+replacing the C<< localhost portion >> with the hostname of your webserver,
+and replacing C<< cgi-bin >> with the path to F<deob_detail.cgi> (starting
+at your webserver's root directory).
+
+=item C<< $PERLMODULES >>
+
+The textfile containing a list of the BioPerl modules. Set to
+F<package_list.txt> by default. F<package_list.txt> is automatically generated
+by the L<< deob_index.pl >> script and its name is a hardcoded value.
+
+If your copy of F<package_list.txt> has a different name or is not in the
+same directory as F<deob_detail.cgi>, set $PERLMODULES to the full path of
+F<package_list.txt>'s location.
+
+=item C<< $BerkeleyDB_packages >>
+
+The Berkeley DB file storing documentation on BioPerl packages. Set to
+F<packages.db> by default. F<packages.db> is automatically generated by the
+L<< deob_index.pl >> script and its name is a hardcoded value.
+
+If your copy of F<packages.db> has a different name or is not in the same
+directory as F<deob_detail.cgi>, set C<< $BerkeleyDB_packages >> to the
+full path of F<packages.db>'s location.
+
+=item C<< $BerkeleyDB_methods >>
+
+The Berkeley DB file storing documentation on BioPerl methods. Set to F<methods.db> by default. F<methods.db> is automatically generated by the
+F<deob_index.pl> script and its name is a hardcoded value.
+
+If your copy of F<methods.db> has a different name or is not in the same
+directory as F<deob_detail.cgi>, set C<< $BerkeleyDB_methods >> to the
+full path of  F<methods.db>'s location.
+
+=back
+
+
+=head1 DEPENDENCIES
+
+L<version>, L<CGI>, L<Deobfuscator>
+
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+
+=head1 BUGS AND LIMITATIONS
+
+=over
+
+=item C<< Selecting a class name returns no methods >>
+
+Clicking on C<< Bio::Tools::dpAlign >> or C<< Bio::Tools::AlignFactory >> in
+the upper class selection pane produces an empty lower methods pane. There are
+undoubtedly other modules that will display this behavior. Reported by Laura
+Kavanaugh 2006-04-18.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists   - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 SEE ALSO
+
+L<Deobfuscator>, L<deob_detail.cgi>, L<deob_index.pl>
+
+
+=head1 AUTHOR
+
+Laura Kavanaugh
+
+
+=head1 CONTRIBUTORS
+
+=over
+
+=item Dave Messina C<< <dave-pause at davemessina.net> >>
+
+=item David Curiel
+
+=back
+
+
+=head1 ACKNOWLEDGMENTS
+
+This software was developed originally at the Cold Spring Harbor Laboratory's
+Advanced Bioinformatics Course between Oct 12-25, 2005. Many thanks to David
+Curiel, who provided much-needed guidance and assistance on this project. Also, special thanks to Todd Wylie for his help with CGI.
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright (C) 2005-6 Laura Kavanaugh and Dave Messina. All Rights Reserved.
+
+You may use modify or redistribute this software under the same terms as
+Perl itself.
+
+
+=head1 DISCLAIMER
+
+This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. See L<perlartistic>.
+
+
+=cut
+
+
+# Let the code begin...
+
+## SET HARDCODED VALUES HERE ##
+use lib './lib';
+my $PERLMODULES         = 'package_list.txt';
+my $BerkeleyDB_packages = 'packages.db';
+my $BerkeleyDB_methods  = 'methods.db';
+my $help_path           = 'deob_help.html';
+my $deob_detail_path    = 'http://localhost/cgi-bin/deob_detail.cgi';
+
+## You shouldn't need to change anything below here ##
+
+use version; $VERSION = qv('0.0.2');
+use warnings;
+use strict;
+use CGI ':standard';
+use Deobfuscator;
+
+my @available_modules;
+my $sort_method;
+my $ref_Class_hash;
+my $filter;
+my $search;
+my $sort_order;
+my $pattern_found = 0;
+my @all_modules;
+my $ref_BerkeleyDB_packages;
+my $ref_BerkeleyDB_methods;
+my $ref_sorted_keys;
+
+# if user previously set the sort order, we can send it with the first form
+$sort_order = param('sort_order') ? param('sort_order') : 'by method';
+
+# define some styles
+my $style1
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:10px;background-color:lightgrey"};
+my $style2
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:10px"};
+my $style3
+    = qq{style="border-collapse:collapse;border:solid black 1px;font-family:verdana;font-size:14px"};
+my $style4
+    = qq{style="border-collapse:collapse;border:0px;font-family:verdana;font-size:18px;font-weight:bold"};
+my $style5 = qq{style="font-family:verdana;font-size:14px"};
+my $style6 = qq{style="font-family:verdana;font-size:9px;font-style:italic"};
+
+# Open file containing all Bioperl package names
+open( MODS, $PERLMODULES )
+    or die "Can't open list of Perl module names $PERLMODULES: $!\n";
+
+# Open BerkeleyDB by getting hash references
+$ref_BerkeleyDB_packages = Deobfuscator::open_db($BerkeleyDB_packages);
+$ref_BerkeleyDB_methods  = Deobfuscator::open_db($BerkeleyDB_methods);
+
+# Grab input and remove whitespace
+my $pattern = param('search_string') ? param('search_string') : ' ';
+$pattern =~ s/\s//g;
+
+# Filter file names with user search string if one has been entered
+while (<MODS>) {
+    if (/\S+/) {    # capture list of all module names in case there are no
+                    # matches found to user input string
+        push @all_modules, $_;
+    }
+    if ($pattern) {
+        if (/$pattern/i) {
+            push @available_modules, $_;
+            $pattern_found = 1;
+        }
+    }
+    else {
+        if (/\S+/) {
+            push @available_modules, $_;
+        }
+    }
+}
+
+if ( scalar @available_modules < 1 ) {
+    @available_modules = @all_modules;
+}
+close MODS or die "Can't close list of Perl module names $PERLMODULES: $!";
+
+print header;
+
+print <<CSHL;
+<html>
+    <head>
+        <title>Deobfuscator</title>
+        <script language="JavaScript">
+
+        function submitMe(packageName) {
+            searchForm.module.value=packageName;
+            searchForm.Search.value='Search';
+            searchForm.submit();
+            return true;
+        }
+        </script>
+    </head>
+    <body $style5>
+	<div style="border:solid black 1px; width:100%; overflow:auto">
+    <table width=100%>
+    <tr>
+        <td><p $style4>Welcome to the BioPerl Deobfuscator</p></td>
+        <td><p align=right><a href="$help_path">what is it?</a></p></td>
+    </tr>
+    </table>
+    </div>
+    <br>
+    <br>
+    <div>
+        <form name="searchForm" action="">         
+            <input type="hidden" name="Search">
+            <input type="hidden" name="module">
+			<input type="hidden" name="sort_order" value="$sort_order">
+
+            Enter a search string or Perl regex (examples: Bio::SeqIO, seq, fasta\$)
+            <br>
+            <input style="width:30em" type="text"   name="search_string" value="$pattern"></input>
+            <input type="submit" name="Filter"></input></form>
+            <br>
+            OR select a class from the list:
+    </div>
+CSHL
+
+print <<CSHL2;
+
+            <div style="border:solid black 1px; width:100%; height:200; overflow:auto">
+            <table width="100%" $style2>
+CSHL2
+
+foreach my $package (@available_modules) {
+    chomp $package;
+
+    my $packageDesc
+        = Deobfuscator::get_pkg_docs( $ref_BerkeleyDB_packages, $package,
+        'short_desc' );
+    my $link = qq{<a href="javascript:submitMe('$package')">$package</a>};
+
+    print
+        "<tr><td $style1>$link</td><td $style2 width='75%'>$packageDesc</td></tr>\n";
+}
+
+
+print <<EOP;
+    </table>
+    </div>
+    <br>
+EOP
+
+
+# keep track of all our form values
+my $input_module = param('module');
+$filter          = param('Filter') ? param('Filter') : ' ';
+$search          = param('Search');
+$sort_order      = param('sort_order');
+
+# set position of sort button based on current sort order
+my $is_method;
+my $is_class;
+if ($sort_order) {
+	if ($sort_order eq 'by method') {
+		$is_method = 'selected';
+		$is_class  = '';
+	}
+	elsif ($sort_order eq 'by class') {
+		$is_method = '';
+		$is_class  = 'selected';
+	}
+	else {
+		$is_method = 'selected';
+		$is_class  = '';
+	}
+}
+
+# Process user input and return result
+if ( param() ) {    #1
+
+	# show button allowing user to set sort order
+	print <<SORT_CODE;
+
+<form name="SORT" action="">
+<input type="hidden" name="Search" value="$search">
+<input type="hidden" name="module" value="$input_module">
+<select name="sort_order" onChange="submit()">
+<option value="by method" $is_method>sort by method</option>
+<option value="by class" $is_class>sort by class</option>
+</select>
+</form>
+SORT_CODE
+
+	# grab sort order from form or sort by method as a default
+	$sort_method = param('sort_order') ? param('sort_order') : 'by method';
+
+	# filter not yet implemented, so this 'if' should never be true
+    if ( ( $filter eq "" ) && ( $input_module eq "" ) ) {
+
+        print "filter = $filter<br>search=$search<br>";
+        print "Please select a class from the menu or enter a search \n";
+        print "string and press \"Filter\" button\n";
+    }
+    elsif ($search) {
+
+        # Determine methods available to user's input class and the class
+        # where the methods reside.  Store results in a hash.
+        $ref_Class_hash = get_methods($input_module);
+
+        # Sort the method/class data according to user input and display
+        $ref_sorted_keys
+            = sorting( $input_module, $sort_method, $ref_Class_hash );
+
+        # Display results
+        display( $input_module, $ref_sorted_keys, $ref_Class_hash,
+            $ref_BerkeleyDB_methods, $deob_detail_path );
+
+    }
+	# filter not yet implemented, so this 'if' should never be true
+    elsif ($filter) {
+        if ( !($pattern_found) ) {
+            print "No match to string found, please try again";
+            h1('Welcome to the BioPerl Deobfuscator!'),;
+        }
+    }
+    else {
+        print "Not sure about that input. Please submit error report\n";
+    }
+
+}    #1
+
+# footer
+print "</html>\n";
+
+
+# Close BerkeleyDB
+Deobfuscator::close_db($BerkeleyDB_packages);
+Deobfuscator::close_db($BerkeleyDB_methods);
+
+
+########################  SUBROUTINES  #################################
+
+sub get_methods {    #1
+
+ # Get all available methods for user input class.  Deobfuscator package
+ # returns hash with key as user input class and value as ref to array.  The
+ # array contains references to an array for each Class, method pair.  This
+ # subroutine unpacks this data structure and, for each user input class
+ # creates a hash where the keys are a concatinated class--method pair and the
+ # values are the method (There is method to the maddness, its just obscure).
+
+    my ($user_class) = shift;
+
+    my $hashref = Deobfuscator::return_methods($user_class);
+
+    # Put data from Deobfuscaotr into hash so it can be sorted later according
+    # to user specification
+    my %Package_hash = ();
+
+    foreach my $array_ref ( @{ $hashref->{$user_class} } ) {    #3
+        my $key = $array_ref->[1] . "::" . $array_ref->[0];
+        $Package_hash{$key} = $array_ref->[0];
+
+    }    #3
+
+    return \%Package_hash;
+
+}    #1 End sub get_methods
+
+
+sub sorting {    #1
+    my ( $package, $sort, $ref_hash ) = @_;
+    my @sorted_keys;
+
+    # Sort by Class or method, depending on user request
+    if ( $sort =~ 'by class' ) {    #3
+            # Sort by Class name (use "lc" to ensure names containing capital
+            # letters are not sorted separately from lower case names
+        foreach my $first ( sort { lc $a cmp lc $b } keys %$ref_hash ) {    #4
+            $first =~ /^(.+)::/;
+            my $package_name = $1;
+            push @sorted_keys, $first;
+        }    #4
+
+    }
+    elsif ( $sort =~ 'by method' ) {    #3
+           # Sort alphabetically by method name (use "lc" in sort because some
+           # method names are capitalized and will appear first in
+           # an alphabetized list unless lower cased.)
+        foreach my $first (
+            sort { lc $ref_hash->{$a} cmp lc $ref_hash->{$b} }
+            keys %$ref_hash
+            )
+        {    #5
+            $first =~ /^(.+)::/;
+            my $package_name = $1;
+            push @sorted_keys, $first;
+        }    #5
+
+    }
+    else {    #3
+        die
+            "Unknown sort option >$sort< in deob_interface.cgi::sorting()\n";
+    }    #3
+
+    return \@sorted_keys;
+
+}    #1 End sorting subroutine
+
+
+sub display {    #1
+    my ( $package, $ref_sorted_array, $ref_hash, $db_hashref, $detail_path ) = @_;
+    my $search_word;
+
+    print <<CSHL;
+        <div style="border:solid black 1px; width:100%; overflow:auto">
+        <table width="100%" $style3>
+        <tr><td colspan=4><center>methods for <b>$package</b></center></td></tr>
+
+		</table></div>
+		<div style="border:solid black 1px; width:100%; height:200; overflow:auto">
+        <table width="100%" $style3>
+
+        <tr>
+            <td $style3 align=center>Method</td>
+            <td $style3 align=center>Class</td>
+            <td $style3 align=center>Returns</td>
+            <td $style3 align=center>Usage</td>
+        </tr>
+
+CSHL
+
+    foreach my $first (@$ref_sorted_array) {    #4
+        $first =~ /^(.+)::/;
+        my $package_name = urlify_pkg($1);
+
+        # Get the return values part of the documentation
+        my $return_methods_raw
+            = Deobfuscator::get_method_docs( $db_hashref, $first, "returns" );
+        if ( $return_methods_raw eq "0" ) {
+            $return_methods_raw = "not documented";
+        }
+
+        # Get the usage part of the documentation
+        my $return_usage_raw
+            = Deobfuscator::get_method_docs( $db_hashref, $first, "usage" );
+        if ( $return_usage_raw eq "0" ) {
+            $return_usage_raw = "not documented";
+        }
+
+        # clean up formatting a little
+        my $return_methods = Deobfuscator::htmlify($return_methods_raw);
+        my $return_usage   = Deobfuscator::htmlify($return_usage_raw);
+
+        # Display output
+        my $href = $detail_path . "?method=$first";
+        my $link
+            = qq{<a target="method" href="$href">$ref_hash->{$first}</a>};
+
+        my @columns
+            = ( $link, $package_name, $return_methods, $return_usage );
+
+
+        print "<tr><td $style2>", join( "</td><td $style2>", @columns ),
+            "</td></tr>\n";
+
+    }    #4
+
+    print <<EOP;
+    </table>
+    </div>
+
+EOP
+
+}    #1 End display subroutine
+
+sub urlify_pkg {
+    my ($pkg_name) = @_;
+    my $bioperl_doc_url = q{http://doc.bioperl.org/bioperl-live/};
+
+    my $pkg_as_path = $pkg_name;
+
+    # convert Bio::DB::RefSeq to Bio/DB/RefSeq
+    $pkg_as_path =~ s/::/\//g;
+    my $url  = $bioperl_doc_url . $pkg_as_path . '.html';
+    my $href = qq{<a href="$url">$pkg_name</a>};
+
+    return $href;
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/cgi-bin/deob_interface.cgi
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/Deobfuscator.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/Deobfuscator.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/Deobfuscator.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,488 @@
+package Deobfuscator;
+
+# module for retrieving method-specific documentation from a
+# Berkeley database
+#
+# first version by Dave Messina (dmessina at watson.wustl.edu) at the
+# Cold Spring Harbor Laboratory Advanced Bioinformatics Course
+# Oct 12-25, 2005
+
+# part of the Deobfuscator package
+# by Laura Kavanaugh and Dave Messina
+#
+# cared for by Dave Messina <dave-pause at davemessina.net>
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Deobfuscator - get BioPerl method and package information from a Berkeley DB
+
+
+=head1 VERSION
+
+This document describes Deobfuscator version 0.0.2
+
+
+=head1 SYNOPSIS
+
+    use Deobfuscator;
+
+    # get all the methods available to objects belonging to a class
+    # (including those inherited from parent classes)
+    my $hashref = Deobfuscator::return_methods('Bio::SeqIO', 'Bio::AlignIO');
+
+    # retrieve the return values for a method
+    my $method_db_ref = Deobfuscator::open_db('methods.db');
+    my $ret_vals = Deobfuscator::get_method_docs( $method_db_ref,
+                                                  'Bio::SeqIO::next_seq',
+                                                  'returns' );
+    close_db($method_db_ref);
+
+    # retrieve the synopsis documentation for a class
+    my $pkg_db_ref = Deobfuscator::open_db('packages.db');
+    my $synopsis = Deobfuscator::get_pkg_docs( $pkg_db_ref,
+                                              'Bio::SeqIO',
+                                              'synopsis' );
+    close_db($pkg_db_ref);
+
+
+=head1 DESCRIPTION
+
+The Deobfuscator module contains functions which relate to retrieving
+specific types of documentation about BioPerl packages and methods.
+
+The deob_index.pl script reads through all of the BioPerl files, extracts
+the documentation, and stores it in two BerkeleyDB databases. This module
+is then used to query those databases for information about a given method
+or package. (see the deob_index.pl documentation for more info.)
+
+The types of information available for individual methods include: the
+usage statement, the return values, the arguments to give to the method, the
+description of the function, and an example of how to use the method.
+
+The Deobfuscator module can be used also to retrieve the synopsis and
+description documentation for a given class.
+
+
+=head1 DIAGNOSTICS
+
+=over
+
+=item C<< error: couldn't eval $module >>
+
+A package couldn't be loaded (eval'd), which would prevent us from determining
+what its methods are.
+
+=item C<< error: couldn't open $filename >>
+
+One of the Berkeley databases couldn't be opened. Possible causes are:
+deob_index.pl wasn't run and so the databases weren't created, or the database
+files aren't in the correct place.
+
+=item C<< error: couldn't close database >>
+
+One of the Berkeley databases couldn't be closed. This might just be a 
+transient filesystem error.
+
+=back
+
+
+=head1 CONFIGURATION AND ENVIRONMENT
+
+This software requires:
+
+=over
+
+=item A working installation of the Berkeley DB
+
+The Berkeley DB comes standard with most UNIX distributions, so you may 
+already have it installed. See L<http://www.sleepycat.com> for more information.
+
+=item BioPerl
+
+Deobfuscator.pm recursively navigates a directory of BioPerl modules. Note
+that the BioPerl module directory need not be "installed"; any old location
+will do. See L<http://www.bioperl.org> for the latest version.
+
+=back
+
+
+=head1 DEPENDENCIES
+
+L<version>, L<Class::Inspector>, L<DB_File>
+
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+
+=head1 BUGS AND LIMITATIONS
+
+In the current implementation, Deobfuscator does not show internal or private
+methods (i.e. those whose name begins with an underscore). This is simply an
+option in the Class::Inspector->methods call, and so could be presented as an
+option to the user (patches welcome).
+
+No bugs have been reported.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://www.bioperl.org/wiki/Mailing_lists   - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+
+=head1 SEE ALSO
+
+L<deob_index.pl>
+
+
+=head1 AUTHOR
+
+Dave Messina C<< <dave-pause at davemessina.net> >>
+
+
+=head1 CONTRIBUTORS
+
+=over
+
+=item Laura Kavanaugh
+
+=item David Curiel
+
+=back
+
+
+=head1 ACKNOWLEDGMENTS
+
+This software was developed originally at the Cold Spring Harbor Laboratory's
+Advanced Bioinformatics Course between Oct 12-25, 2005. Many thanks to David
+Curiel, who provided much-needed guidance and assistance on this project.
+
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright (C) 2005-6 Laura Kavanaugh and Dave Messina. All Rights Reserved.
+
+This module is free software; you may redistribute it and/or modify it under the
+same terms as Perl itself. See L<perlartistic>.
+
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the functions.
+Internal methods are preceded with a "_".
+
+=cut
+
+
+use version; $VERSION = qv('0.0.2');
+use warnings;
+use strict;
+use Class::Inspector;
+use DB_File;
+
+use lib './lib';
+
+
+=head2 return_methods
+
+Title   : return_methods
+Usage   : $methods_hashref = Deobfuscator::return_methods('Bio::AlignIO',
+          'Bio::SeqIO');
+Function: traverses the inheritance tree for a given class to determine
+          the methods available to objects belonging to that class
+
+Returns : a reference to a hash. The hash keys are fully-qualified class
+          names, such as 'Bio::SeqIO'. The hash values are references to
+          an array of hashes, where each array element is a reference to 
+          a hash containing two key-value pairs, 'method' and 'class';
+
+Args    : a list of fully-qualified class names
+
+=cut
+
+sub return_methods {
+
+    my @input = @_;
+
+    # key: full class name
+    # value: a reference to an array of hashes
+    #    where each array element is a pointer to a hash
+    #    which contains two key: 'method' and 'class'
+    my %methods_of;
+
+
+    foreach my $class (@input) {
+
+        # fancy eval so that we can loop through different modules
+        _load_module($class);
+
+        # methods returned from Class::Inspector as:
+        # [
+        #   [ 'Class::method1',   'Class',   'method1', \&Class::method1   ],
+        #   [ 'Another::method2', 'Another', 'method2', \&Another::method2 ],
+        #   [ 'Foo::bar',         'Foo',     'bar',     \&Foo::bar         ],
+        # ]
+        my $methods_aryref3
+            = Class::Inspector->methods( $class, 'expanded', 'public' );
+
+
+        for ( my $i = 0; $i < scalar @{$methods_aryref3}; $i++ ) {
+            foreach my $meth ( $methods_aryref3->[$i] ) {
+                my $method_name    = $meth->[2];
+                my $inherited_from = $meth->[1];
+                push @{$methods_of{$class}}, [$method_name, $inherited_from];
+            }
+
+        }
+
+    }
+    return \%methods_of;
+}
+
+=head2 print_methods
+
+Title   : print_methods
+Usage   : print_methods('Bio::AlignIO','Bio::SeqIO');
+Function: traverses the inheritance tree for a given class to determine
+           the methods available to objects belonging to that class, then
+           pretty-prints the resulting information.
+Returns : nothing. But it does print to the current filehandle (usually
+           STDOUT).
+Args    : a list of fully-qualified class names
+
+=cut
+
+sub print_methods {
+
+    my @input = @_;
+
+    foreach my $class (@input) {
+
+        # fancy eval so that we can loop through different modules
+        _load_module($class);
+
+        # methods returned as
+        # [
+        #   [ 'Class::method1',   'Class',   'method1', \&Class::method1   ],
+        #   [ 'Another::method2', 'Another', 'method2', \&Another::method2 ],
+        #   [ 'Foo::bar',         'Foo',     'bar',     \&Foo::bar         ],
+        # ]
+        my $methods_aryref3
+            = Class::Inspector->methods( $class, 'expanded', 'public' );
+
+        print "methods for $class\n";
+        print "=========================================\n";
+
+        for ( my $i = 0; $i < scalar @{$methods_aryref3}; $i++ ) {
+            print "method $i\n";
+            foreach my $meth ( $methods_aryref3->[$i] ) {
+                print "\t           class: $meth->[1]\n";
+                print "\t          method: $meth->[2]\n";
+            }
+            print "--------------------------------------\n";
+        }
+
+    }
+
+}
+
+=head2 _load_module
+
+Title   : _load_module
+Usage   : * INTERNAL USE ONLY *
+Function: attempts to load a module
+Returns : nothing. But it does die upon failure to load.
+Args    : a module name
+
+=cut
+
+sub _load_module {
+    my $module = shift;
+    eval "require $module";
+    die "error! couldn't eval $module" if $@;
+}
+
+=head2 open_db
+
+Title   : open_db
+Usage   : open_db($filename)
+Function: opens a Berkeley DB
+Returns : a hashref tied to the DB
+Args    : a filename as a scalar
+
+=cut
+
+sub open_db {
+    my ($filename) = @_;
+    
+    my %hash;
+    my $hashref = \%hash;
+    
+    tie %hash, "DB_File", $filename or die "error: couldn't open $filename: $!\n";
+    
+    return $hashref;
+}
+
+=head2 close_db
+
+Title   : close_db
+Usage   : closes a Berkeley DB
+Function: closes a database
+Returns : nothing.
+Args    : a hashref to a tied Berkeley DB
+
+=cut
+
+sub close_db {
+    my ($hashref) = @_;
+    
+    untie $hashref or die "error: couldn't close database: $!\n";
+}
+
+=head2 get_pkg_docs
+
+Title   : get_pkg_docs
+Usage   : get_pkg_docs($db_hashref, 'Class name', 'documentation type');
+Function: returns a specified part of the documentation for a class
+Returns : a string containing the desired documentation or ' ' if the
+          documentation doesn't exist
+Args    : - $db_hashref is the ref to the hash tied to the DB
+          - Class name is of the form 'Bio::SeqIO'
+          - documentation type is the subfield of the method's POD.
+          The possible values of documentation type are:
+          short_desc, synopsis, desc
+
+=cut
+
+sub get_pkg_docs {
+    my ($db_hashref, $pkg_name, $info_type) = @_;
+
+    # hash to store our hash value, now split out into its constituent parts
+    my %record;
+        
+    my $rec_sep = 'DaVe-ReC-sEp';
+
+    # if the method isn't in our db
+    if ( ! exists($db_hashref->{$pkg_name}) ) {
+        return 0;
+    }
+
+    # grab the constituent parts of the pkg record
+    ( $record{'short_desc'}, $record{'synopsis'}, $record{'desc'} ) = 
+        ( split $rec_sep, $db_hashref->{$pkg_name} );
+
+    # return just the part that was asked for
+    if ( exists($record{$info_type}) ) {
+        return $record{$info_type};
+    }
+    else { return ' '; }
+}
+
+=head2 get_method_docs
+
+Title   : get_method_docs
+Usage   : get_method_docs($db_hashref, 'Class+method name', 'documentation type');
+Example : get_method_docs($db_hashref, 'Bio::SeqIO::next_aln', 'args');
+Function: returns a specified part of the documentation for a class's method
+Returns : a string containing the desired documentation, or 0 if the
+         desired documentation doesn't exist
+Args    : - $db_hashref is the ref to the hash tied to the DB
+          - Class+method name is of the form 'Bio::SeqIO::next_aln',
+            where Bio::SeqIO is the class and next_aln is the method.
+          - documentation type is the subfield of the method's POD.
+            The possible values of documentation type are:
+            title, usage, function, returns, args
+
+=cut
+
+sub get_method_docs {
+    my ($db_hashref, $meth_name, $info_type) = @_;
+
+    my %record;
+    my $whole_record;
+
+    my $rec_sep = 'DaVe-ReC-sEp';
+
+    # if the method isn't in our db
+    if ( !exists( $db_hashref->{$meth_name} ) ) {
+        return 0;
+    }
+
+    # separate the sub-records using the record separator and field tag
+    my @parts = split $rec_sep, $db_hashref->{$meth_name};
+
+    # put individual info types into separate hash entries...
+    foreach my $part (@parts) {
+        if ($part =~ /^-(\w+)\|(.*)/) { $record{$1} = $2; }
+    
+    # ... and put the whole thing into one big string
+        $whole_record .= "$part\n";
+    }
+
+    # return a specific part if that was asked for
+    if ($info_type) {
+        # return just the part that was asked for      
+        if ( exists( $record{$info_type} ) ) {
+
+			# if there's really nothing in there, say so.
+			if ( ( $record{$info_type} =~ /^[\s\n]*$/)
+         	|| ( $record{$info_type} eq '') ) { return 0; }
+			else { 
+				return $record{$info_type};
+			}
+        }
+        # or return everything
+        else { return $whole_record; }
+    }
+    # otherwise return whole record
+    else {
+        return $whole_record;
+    }
+}
+
+=head2 htmlify
+
+Title   : htmlify
+Usage   : htmlify($string);
+Example : htmlify('this is a : doc);
+Function: does some crude reformatting of POD method documentation by swapping
+          isolated colons (':') into HTML <br> tags
+Returns : a string
+Args    : a string
+
+=cut
+
+sub htmlify {
+	my ($string) = @_;
+
+	# change isolated colons into <br> tags
+	$string =~ s/\s:\s/ <br> /g;
+
+	return $string;
+}
+
+1;
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/lib/Deobfuscator.pm
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/00.load.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/00.load.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/00.load.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+use Test::More tests => 1;
+
+BEGIN {
+use_ok( 'Deobfuscator' );
+}
+
+diag( "Testing Deobfuscator $Deobfuscator::VERSION" );

Added: trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/pod.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/pod.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/Deobfuscator/t/pod.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();

Added: trunk/packages/bioperl/branches/upstream/current/doc/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+For HOWTOs, tutorials, and module documentation please see the 
+BioPerl Wiki at http://bioperl.org.

Added: trunk/packages/bioperl/branches/upstream/current/doc/makedoc.PL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/doc/makedoc.PL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/doc/makedoc.PL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+
+use lib ".";
+use strict;
+
+require Bio::Root::Version;
+
+foreach ( @ARGV ) {
+    `perl -pi -e 's/\\\@\\\@VERSION\\\@\\\@/$Bio::Root::Version::VERSION/g;' "$_"`;
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/doc/makedoc.PL
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/load_ucsc.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/load_ucsc.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/load_ucsc.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,252 @@
+#!/usr/bin/perl
+
+use strict;
+
+use enum qw(:u_ refmethod refsource refgroup refseq refstart refstop refscore refstrand refphase qrystart qrystop sizes starts);
+use enum qw(:v_ refmethod refsource refgroup refseq refstrand refscore refphase txstart txstop cdsstart cdsstop exonstarts exonstops);
+
+use enum qw(:all_bacends__ x matches misMatches repMatches nCount qNumInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:all_est__     bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:all_mrna__   bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:all_sts_primer__ matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:all_sts_seq__    matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:bacEndPairs__ bin chrom chromStart chromEnd name score strand pslTable lfCount lfStarts lfSizes lfNames);
+use enum qw(:blatFish__  bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:gap__ bin chrom chromStart chromEnd ix n size type bridge);
+use enum qw(:gl__ bin frag start end strand);
+use enum qw(:gold__ bin chrom chromStart chromEnd ix type frag fragStart fragEnd strand);
+use enum qw(:intronEst__ bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:mrna__      bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:rmsk__      bin swScore milliDiv milliDel milliIns genoName genoStart genoEnd genoLeft strand repName repClass repFamily repStart repEnd repLeft id);
+use enum qw(:clonePos__ name seqSize phase chrom chromStart chromEnd stage faFile);
+use enum qw(:ctgPos__ contig size chrom chromStart chromEnd);
+use enum qw(:cytoBand__ chrom chromStart chromEnd name gieStain);
+use enum qw(:fishClones__ chrom chromStart chromEnd name score placeCount bandStarts bandEnds labs placeType accCount accNames stsCount stsNames beCount beNames);
+use enum qw(:gcPercent__ chrom chromStart chromEnd name gcPpt);
+use enum qw(:genscan__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
+use enum qw(:genscanSubopt__ bin chrom chromStart chromEnd name score strand);
+use enum qw(:jaxOrtholog__ humanSymbol humanBand mgiId mouseSymbol mouseChr mouseCm mouseBand);
+use enum qw(:refGene__ name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds);
+use enum qw(:refLink__ name product mrnaAcc protAcc geneName prodName locusLinkID omimId);
+use enum qw(:refSeqAli__    bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert tBaseInsert strand qName qSize qStart qEnd tName tSize tStart tEnd blockCount blockSizes qStarts tStarts);
+use enum qw(:simpleRepeat__ bin chrom chromStart chromEnd name period copyNum consensusSize perMatch perIndel score A C G T entropy sequence);
+use enum qw(:stsAlias__ alias identNo trueName);
+use enum qw(:stsInfo__ identNo name gbCount genbank gdbCount gdb nameCount otherNames dbSTSid otherDbstsCount otherDbSTS leftPrimer rightPrimer distance organism sequence otherUCSCcount otherUCSC mergeUCSCcount mergeUCSC genethonName genethonChr genethonPos genethonLOD marshfieldName marshfieldChr marshfieldPos marshfieldLOD wiyacName wiyacChr wiyacPos wiyacLOD wirhName wirhChr wirhPos wirhLOD gm99gb4Name gm99gb4Chr gm99gb4Pos gm99gb4LOD gm99g3Name gm99g3Chr gm99g3Pos gm99g3LOD tngName tngChr tngPos tngLOD);
+use enum qw(:stsMap__ chrom chromStart chromEnd name score identNo ctgAcc otherAcc genethonChrom genethonPos marshfieldChrom marshfieldPos gm99Gb4Chrom gm99Gb4Pos shgcTngChrom shgcTngPos shgcG3Chrom shgcG3Pos wiYacChrom wiYacPos wiRhChrom wiRhPos fishChrom beginBand endBand lab);
+use enum qw(:uniGene_2__ bin chrom chromStart chromEnd name score strand txStart txEnd reserved exonCount exonStarts exonEnds);
+###############################################
+# end enum
+###############################################
+
+my %parentpos;
+my %nolandmark = map {$_=>1} qw(gap cpgIsland recombRate_decode recombRate_marshfield recombRate_genethon
+								humMusL zoom1_humMusL zoom50_humMusL zoom2500_humMusL
+								genscanSubopt simpleRepeat snpNih snpTsc
+							   );
+
+foreach my $filename (@ARGV){
+  my $newfilename = $filename;
+  $newfilename =~ s/txt\.gz/gff/;
+  open(my $fhi, "zcat $filename |");
+  open(my $fho, ">$newfilename");
+
+  while(my $line = <$fhi>){
+
+  #these three should work the same way as unigene, but the fields are different order
+  # $filename =~ /affyRatio/               ? toGFF($line,$fho,['affyRatio',               '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
+  # $filename =~ /nci60/                   ? toGFF($line,$fho,['nci60',                   '', 3, 0, 1, 2, 4, 5,-1,-1,-1,10,11]) :
+  # $filename =~ /rnaCluster/              ? toGFF($line,$fho,['rnaCluster',              '', 4, 1, 2, 3, 5, 6,-1, 7, 8,11,12]) :
+
+  #these two are not yet handled
+  # $filename =~ /cpgIsland/               ? toGFF($line,$fho,['cpgIsland',               '', 3, 0, 1, 2,-1,-1,-1]) :
+  # $filename =~ /estOrientInfo/           ? toGFF($line,$fho,['estOrientInfo',           '',]) :
+
+
+    $filename =~ /uniGene_2/               ? toGFF($line,$fho,['uniGene_2',               '', 4, 1, 2, 3, 5, 6,-1,-1,-1,11,12]) :
+
+    $filename =~ /all_bacends/             ? toGFF($line,$fho,['bacends',                 '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
+    $filename =~ /all_est/                 ? toGFF($line,$fho,['est',                     '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /all_mrna/                ? toGFF($line,$fho,['mrna',                    '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /all_sts_primer/          ? toGFF($line,$fho,['sts_primer',              '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
+    $filename =~ /all_sts_seq/             ? toGFF($line,$fho,['sts_seq',                 '', 9,13,15,16,-1, 8,-1,11,12,18,20]) :
+    $filename =~ /blastzBestMouse/         ? toGFF($line,$fho,['blastzBestMouse',         '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /blastzMm2/               ? toGFF($line,$fho,['blastzMm2',               '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /blastzTightMouse/        ? toGFF($line,$fho,['blastzTightMouse',        '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /blatFish/                ? toGFF($line,$fho,['blatFish',                '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /chimpBac/                ? toGFF($line,$fho,['chimpBac',                '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /chimpBlat/               ? toGFF($line,$fho,['chimpBlat',               '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /clonePos/                ? toGFF($line,$fho,['clonePos',                '', 0, 3, 4, 5,-1,-1, 2]) :
+    $filename =~ /ctgPos/                  ? toGFF($line,$fho,['ctgPos',                  '', 0, 2, 3, 4,-1,-1,-1]) :
+    $filename =~ /cytoBand/                ? toGFF($line,$fho,['cytoBand',                '', 3, 0, 1, 2,-1,-1,-1]) :
+    $filename =~ /est/                     ? toGFF($line,$fho,['est',                     '',10,14,16,17,-1,9,-1,12,13,19,21]) :
+    $filename =~ /fishClones/              ? toGFF($line,$fho,['fishClones',              '', 3, 0, 1, 2, 4,-1,-1]) :
+    $filename =~ /gap/                     ? toGFF($line,$fho,['gap',                     '', 7, 1, 2, 3,-1,-1,-1]) :
+    $filename =~ /gcPercent/               ? toGFF($line,$fho,['gcPercent',               '', 3, 0, 1, 2, 4,-1,-1]) :
+    $filename =~ /genMapDb/                ? toGFF($line,$fho,['genMapDb',                '', 3, 0, 1, 2, 4, 5,-1]) :
+    $filename =~ /genscanSubopt/           ? toGFF($line,$fho,['genscanSubopt',           '', 4, 1, 2, 3, 5, 6,-1]) :
+    $filename =~ /gold/                    ? toGFF($line,$fho,['gold',                    '', 6, 1, 2, 3,-1, 9,-1, 7, 8]) :
+    $filename =~ /intronEst/               ? toGFF($line,$fho,['intron_est',              '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /recombRate/              ? eval { toGFF($line,$fho,['recombRate_decode',       '', 3, 0, 1, 2, 4,-1,-1]);
+                                                    toGFF($line,$fho,['recombRate_marshfield',   '', 3, 0, 1, 2, 7,-1,-1]);
+                                                    toGFF($line,$fho,['recombRate_genethon',     '', 3, 0, 1, 2,10,-1,-1]); } :
+
+    $filename =~ /refSeqAli/               ? toGFF($line,$fho,['refSeqAli',               '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /rmsk/                    ? toGFF($line,$fho,['rmsk',                    '',10, 5, 6, 7,-1, 9,-1,13,14]) :
+    $filename =~ /simpleRepeat/            ? toGFF($line,$fho,['simpleRepeat',            '', 4, 1, 2, 3,10,-1,-1]) :
+    $filename =~ /snpNih/                  ? toGFF($line,$fho,['snpNih',                  '', 4, 1, 2, 3]) :
+    $filename =~ /snpTsc/                  ? toGFF($line,$fho,['snpTsc',                  '', 4, 1, 2, 3]) :
+    $filename =~ /stsMap/                  ? toGFF($line,$fho,['stsMap',                  '', 3, 0, 1, 2, 4,-1,-1]) :
+    $filename =~ /xenoEst/                 ? toGFF($line,$fho,['xenoEst',                 '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /xenoMrna/                ? toGFF($line,$fho,['xenoMrna',                '',10,14,16,17,-1, 9,-1,12,13,19,21]) :
+    $filename =~ /zoom1_humMusL/           ? toGFF($line,$fho,['zoom1_humMusL',           '', 4, 1, 2, 3, 5, 6,-1]) :
+    $filename =~ /zoom2500_humMusL/        ? toGFF($line,$fho,['zoom2500_humMusL',        '', 4, 1, 2, 3, 5, 6,-1]) :
+    $filename =~ /zoom50_humMusL/          ? toGFF($line,$fho,['zoom50_humMusL',          '', 4, 1, 2, 3, 5, 6,-1]) :
+    $filename =~ /humMusL/                 ? toGFF($line,$fho,['humMusL',                 '', 4, 1, 2, 3, 5, 6,-1]) :
+
+     $filename =~ /(refGene|genscan|acembly|ensGene|refFlat|sanger22pseudo|sanger22|softberryGene|twinscan)/ ?
+     toGFF2($line,$fho,
+	    [$1, -1, 0, 2, 3, -1, -1, 4, 5, 6, 7, 9, 10]) :
+
+    0;
+  }
+
+  close($fhi);
+  close($fho);
+}
+
+###############################################
+# begin filetype-specific subroutines
+###############################################
+
+sub toGFF2 {
+  my($line,$fho, $maps) = @_;
+
+  chomp $line; my @fields = split /\t/, $line;
+
+  if(!$nolandmark{render($maps->[v_refmethod],\@fields)}){
+	print $fho join "\t", map {render($maps->[$_],\@fields)} (v_refseq,
+														 v_refsource,
+														 v_refmethod,
+														 v_txstart,
+														 v_txstop,
+														 v_refscore,
+														 v_refstrand,
+														 v_refphase,);
+	print $fho "\t";
+	print $fho "Sequence " . render($maps->[v_refgroup],\@fields);
+	print $fho "\n";
+  }
+
+  if(!$nolandmark{render($maps->[v_refmethod],\@fields)}){
+	print $fho join "\t", map {render($maps->[$_],\@fields)} (v_refseq,
+														 v_refsource,
+														 v_refmethod,
+														 v_cdsstart,
+														 v_cdsstop,
+														 v_refscore,
+														 v_refstrand,
+														 v_refphase,);
+	print $fho "\t";
+	print $fho "CDS " . render($maps->[v_refgroup],\@fields);
+	print $fho "\n";
+  }
+
+  if(defined($maps->[v_exonstarts]) and defined($maps->[v_exonstops])){
+	my @starts = split /,/, render($maps->[v_exonstarts],\@fields);
+	my @stops  = split /,/, render($maps->[v_exonstops],\@fields);
+
+	while(my $start = shift @starts){
+	  my $stop = shift @stops;
+	  print $fho join "\t", (render($maps->[v_refseq],\@fields),
+						render($maps->[v_refsource],\@fields),
+						render($maps->[v_refmethod],\@fields),
+						$start,
+						$stop,
+						render($maps->[v_refscore],\@fields),
+						render($maps->[v_refstrand],\@fields),
+						render($maps->[v_refphase],\@fields),
+						render($maps->[v_refmethod],\@fields) . " " . render($maps->[v_refgroup],\@fields)
+					   ), "\n";
+	}
+  }
+
+}
+
+sub toGFF {
+  my($line,$fho, $maps) = @_;
+
+  chomp $line; my @fields = split /\t/, $line;
+
+  if(!$maps->[u_qrystart] and !$nolandmark{render($maps->[u_refmethod],\@fields)}){
+	print $fho join "\t", map {render($maps->[$_],\@fields)} (u_refseq,
+														 u_refsource,
+														 u_refmethod,
+														 u_refstart,
+														 u_refstop,
+														 u_refscore,
+														 u_refstrand,
+														 u_refphase);
+	print $fho "\t";
+	print $fho "Sequence " . render($maps->[u_refgroup],\@fields);
+	print $fho "\n";
+  }
+  print $fho join "\t", map {render($maps->[$_],\@fields)} (u_refseq,
+													   u_refsource,
+													   u_refmethod,
+													   u_refstart,
+													   u_refstop,
+													   u_refscore,
+													   u_refstrand,
+													   u_refphase);
+  print $fho "\t";
+  if($maps->[u_qrystart] >= 0){
+    print $fho "Target:" . render($maps->[u_refmethod],\@fields) . " ";
+    print $fho render($maps->[u_refgroup],\@fields) . " " .
+	    render($maps->[u_qrystart],\@fields) . " " .
+	    render($maps->[u_qrystop], \@fields);
+  } else {
+    print $fho "Sequence " . render($maps->[u_refgroup],\@fields) . " ";
+  }
+  print $fho "\n";
+
+  if(defined($maps->[u_starts]) and defined($maps->[u_sizes])){
+	my @starts = split /,/, render($maps->[u_starts],\@fields);
+	my @sizes = split /,/, render($maps->[u_sizes],\@fields);
+
+	my $start;
+	while(defined($start = shift @starts)){
+	  my $size = shift @sizes;
+
+	  if($maps->[u_qrystart] < 1 and $maps->[u_qrystop] < 1){
+	    print $fho join "\t", (render($maps->[u_refseq],\@fields),
+						render($maps->[u_refsource],\@fields),
+						render($maps->[u_refmethod],\@fields),
+						render($maps->[u_refstart],\@fields) + $start,
+						render($maps->[u_refstart],\@fields) + $start + $size,
+						render($maps->[u_refscore],\@fields),
+						render($maps->[u_refstrand],\@fields),
+						render($maps->[u_refphase],\@fields),
+						render($maps->[u_refmethod],\@fields) . " " . render($maps->[u_refgroup],\@fields)
+					   ), "\n";
+	  } else {
+	    print $fho join "\t", (render($maps->[u_refseq],\@fields),
+						render($maps->[u_refsource],\@fields),
+						render($maps->[u_refmethod],\@fields),
+						$start,
+						$start + $size,
+						render($maps->[u_refscore],\@fields),
+						render($maps->[u_refstrand],\@fields),
+						render($maps->[u_refphase],\@fields),
+						render($maps->[u_refmethod],\@fields) . " " . render($maps->[u_refgroup],\@fields)
+					   ), "\n";
+	  }
+	}
+  }
+}
+
+sub render {
+  my($index,$fields) = @_;
+  return '.' if $index == -1;
+  return $index unless $index =~ /^\d+$/;
+  return $fields->[$index];
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/Bio-DB-GFF/load_ucsc.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/align/align_on_codons.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/align/align_on_codons.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/align/align_on_codons.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+#!/usr/bin/perl -w
+
+use strict;
+use vars qw($USAGE %VALIDALIGN $CODONSIZE);
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Bio::LocatableSeq;
+use Bio::SimpleAlign;
+use Getopt::Long;
+use Bio::Tools::CodonTable;
+use Carp;
+
+BEGIN {
+    $CODONSIZE = 3; # parametrize everything like a good little programmer
+    if( ! defined $ENV{'CLUSTALDIR'} ) { 
+	$ENV{'CLUSTALDIR'} = '/usr/local/bin';
+    } 
+    if( ! defined $ENV{'TCOFFEEDIR'} ) { 
+	$ENV{'TCOFFEEDIR'} = '/usr/local/bin';
+    }
+    $USAGE = 
+qq{align_on_codons.pl < file.fa
+-h/--help                 See this information
+-f/--frame            Translation Frame (0,1,2) are valid (defaults to '0')
+-ct/--codontable      Codon table to use (defaults to '1')
+                        see perldoc Bio::PrimarySeq for more information
+-i/--input            Input Filename (defaults to STDIN)
+-o/--output           Output Filename (defaults to STDOUT)
+-sf/--seqformat       Input format (defaults to FASTA/Pearson)
+-af/--alignformat     Alignment output format (clustal,fasta,nexus,phylip,
+		      msf,pfam,mase,meme,prodom,selex,stockholm)
+-ap/--alignprog       ClustalW, TCoffee (currently only support 
+					 local execution) 
+-v/--verbose          Run in verbose mode
+};
+
+    %VALIDALIGN = ('clustalw' => 'Bio::Tools::Run::Alignment::Clustalw',
+		   'tcoffee' => 'Bio::Tools::Run::Alignment::TCoffee');
+}
+
+my ($help, $input, $output);
+
+my ($alignprog, $sformat, $aformat, $frame, $codontable, $verbose) 
+  = ('clustalw', 'fasta', 'clustalw', 0, 1, 0);
+
+GetOptions( 'h|help'            => \$help,
+				'i|input:s'         => \$input,
+				'o|output:s'        => \$output,
+				'sf|seqformat:s'    => \$sformat,
+				'af|alignformat:s'  => \$aformat,
+				'ap|alignprog:s'    => \$alignprog,
+				# for translate
+				'f|frame:s'         => \$frame,
+				'ct|codontable:s'   => \$codontable,
+				'v|verbose'         => \$verbose,
+			 );
+
+if( $help ) { 
+    die($USAGE);
+}
+if( ! $alignprog || !defined $VALIDALIGN{$alignprog} ) {
+    die("Cannot use $alignprog as 'alignprog' parameter");  
+} else {
+    my $modname = $VALIDALIGN{$alignprog} .".pm";
+    $modname =~ s/::/\//g;
+    require $modname;
+}
+
+my $alignout;
+if( $output ) {
+    $alignout = new Bio::AlignIO('-format' => $aformat,
+				 '-file'   => ">$output");
+} else { 
+    $alignout = new Bio::AlignIO('-format' => $aformat);
+}
+
+my (@nucseqs, at protseqs);
+my $seqio;
+
+if( $input ) {
+    $seqio = new Bio::SeqIO('-format' => $sformat,
+			    '-file'   => $input);
+} else { 
+    $seqio = new Bio::SeqIO('-format' => $sformat,
+			    '-fh'     => \*STDIN);
+}
+
+my $table = new Bio::Tools::CodonTable();
+while( my $seq = $seqio->next_seq ) {
+    
+	#    if( $frame == 0 && $alignprog eq 'tcoffee' ) {
+	#	print "last codon is ",$seq->subseq($seq->length() -2,
+	#					    $seq->length()), "\n";
+	#	if( $table->is_ter_codon($seq->subseq($seq->length() -2,
+	#					      $seq->length())) ) {
+	#	    $seq->seq($seq->subseq(1,$seq->length() - 3));
+	#	}
+	#    }
+
+	push @nucseqs, $seq;    
+	push @protseqs, $seq->translate(-frame => $frame,
+											   -codontable_id => $codontable );
+}
+
+if( @nucseqs <= 1 ) {
+	die("Must specify > 1 sequence for alignment on codons");
+}
+
+# allow these to be tweaked by cmdline parameters at some point
+my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM'); 
+
+my $alignengine = $VALIDALIGN{$alignprog}->new('-verbose' => $verbose,
+					       @params);
+
+my $aln = $alignengine->align(\@protseqs);
+
+my $dnaalign = new Bio::SimpleAlign;
+my $seqorder = 0;
+my $alnlen = $aln->length;
+foreach my $seq ( $aln->each_seq ) {    
+	my $newseq;
+    
+	foreach my $pos ( 1..$alnlen ) {
+		my $loc = $seq->location_from_column($pos);
+		my $dna = ''; 
+		if( !defined $loc || $loc->location_type ne 'EXACT' ) {
+			$dna = '---';
+		} else {
+			# to readjust to codon boundaries
+			# end needs to be +1 so we can just multiply by CODONSIZE 
+			# to get this
+			my ($start,$end) = ((($loc->start - 1)*$CODONSIZE) +1,
+									  ($loc->end)*$CODONSIZE);
+			if( $start <=0 || $end > $nucseqs[$seqorder]->length() ) {
+				print "start is ", $loc->start, " end is ", $loc->end, "\n";
+				warn("codons don't seem to be matching up for $start,$end");
+				$dna = '---';
+			} else {
+				$dna = $nucseqs[$seqorder]->subseq($start,$end);
+			}
+		}
+		$newseq .= $dna;
+	}
+	$seqorder++;
+	# funky looking math is to readjust to codon boundaries and deal
+	# with fact that sequence start with 1
+	my $newdna = new Bio::LocatableSeq(-display_id  => $seq->id(),
+												  -start => (($seq->start - 1) * 
+																 $CODONSIZE) + 1, 
+												  -end   => ($seq->end * $CODONSIZE),
+												  -strand => $seq->strand,
+												  -seq   => $newseq);    
+	$dnaalign->add_seq($newdna);
+}
+
+$alignout->write_aln($dnaalign);
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/align/align_on_codons.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/align/aligntutorial.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/align/aligntutorial.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/align/aligntutorial.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+#!/usr/bin/perl -w
+
+# $Id: aligntutorial.pl,v 1.1 2003/07/07 18:20:58 bosborne Exp $
+# An example of how to use the different alignment tools in bioperl
+# to align some sequences
+
+# All these methods except Bio::Tools::pSW will work for DNA sequence
+# (need to use a different matrix however)
+
+use Bio::Factory::EMBOSS;
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Bio::Tools::pSW;
+use Bio::PrimarySeq;
+use Bio::Tools::Run::Alignment::Clustalw;
+use Bio::Tools::Run::Alignment::TCoffee;
+use Bio::Tools::Run::StandAloneBlast;
+
+use strict;
+# build the sequences since EMBOSS expects seqs to be in files
+my $seq = new Bio::PrimarySeq(-seq => 
+				  'MAVNPELAPFTLSRGIPSFDDQALSTIIQLQDCIQQAIQQLNYSTAEFLAELLYAECSILDKSSVYWSDAVYLYALSLFLNKSYHTAFQISKEFKEYHLGIAYIFGRCALQLSQGVNEAILTLLSIINVFSSNSSNTRINMVLNSNLVHIPDLATLNCLLGNLYMKLDHSKEGAFYHSEALAINPYLWESYEAICKMRATVDLKRVFFDIAGKKSNSHNNNAASSFPSTSLSHFEPRSQPSLYSKTNKNGNNNINNNVNTLFQSSNSPPSTSASSFSSIQHFSRSQQQQANTSIRTCQNKNTQTPKNPAINSKTSSALPNNISMNLVSPSSKQPTISSLAKVYNRNKLLTTPPSKLLNNDRNHQNNNNNNNNNNNNNNNNNNNNNNNNIINKTTFKTPRNLYSSTGRLTTSKKNPRSLIISNSILTSDYQITLPEIMYNFALILRSSSQYNSFKAIRLFESQIPSHIKDTMPWCLVQLGKLHFEIINYDMSLKYFNRLKDLQPARVKDMEIFSTLLWHLHDKVKSSNLANGLMDTMPNKPETWCCIGNLLSLQKDHDAAIKAFEKATQLDPNFAYAYTLQGHEHSSNDSSDSAKTCYRKALACDPQHYNAYYGLGTSAMKLGQYEEALLYFEKARSINPVNVVLICCCGGSLEKLGYKEKALQYYELACHLQPTSSLSKYKMGQLLYSMTRYNVALQTFEELVKLVPDDATAHYLLGQTYRIVGRKKDAIKELTVAMNLDPKGNQVIIDELQKCHMQE',
+				  -id => 'seq1'
+				  ); 
+my $seq2 = new Bio::PrimarySeq( -seq =>
+				'CLIFXRLLLIQMIHPQARRAFTFLQQQEPYRIQSMEQLSTLLWHLADLPALSHLSQSLISISRSSPQAWIAVGNCFSLQKDHDEAMRCFRRATQVDEGCAYAWTLCGYEAVEMEEYERAMAFYRTAIRTDARHYNAWYVLFFFFFFFFVPGDIDSXPKKGMEWGXFISKRIDRGMRSIILKEPSKSIQLIPFFYVALVWXVGVSSYPLETMTNIDFPKKKKALEKSNDVVQALHFYERASKYAPTSAMVQFKRIRALVALQRYDEAISALVPLTHSAPDEANVFFLLGKCLLKKERRQEATMAFTNARELEPK',
+				    -id => 'seq2');
+
+    my $out = new Bio::SeqIO(-format => 'fasta',
+			     -file   => ">seq1.fa");
+    $out->write_seq($seq);
+    $out->close();
+    $out = new Bio::SeqIO(-format => 'fasta',
+			  -file   => ">seq2.fa");
+    $out->write_seq($seq2);
+    $out->close();
+
+
+my $embossfactory = Bio::Factory::EMBOSS->new();
+
+my @alignprogs = qw(water needle stretcher matcher);
+my $alignout = new Bio::AlignIO(-format => 'msf');
+
+
+foreach my $prog ( @alignprogs ) {
+    my $alignfactory = $embossfactory->program('water');
+
+
+    $alignfactory->run({ '-sequencea' => 'seq1.fa',
+			    '-seqall'    => 'seq2.fa',
+			    '-gapext'    => 2.0,
+			    '-datafile'  => 'EBLOSUM62',
+			    '-gapopen'   => 14.0,
+			    '-outfile'   => "seq1_vs_seq2.$prog"});
+
+    my $alnin = new Bio::AlignIO(-format => 'emboss',
+				 -file   => "seq1_vs_seq2.$prog");
+    my $aln = $alnin->next_aln(); 
+    $alignout->write_aln($aln);
+}
+
+# this should produce the same alignment as 'water'
+my $factory = new Bio::Tools::pSW(-matrix=> 'blosum62.bla',
+				  -gap   => 14,
+				  -ext   => 2);
+my $aln = $factory->pairwise_alignment($seq,$seq2);
+$alignout->write_aln($aln);
+
+$factory = new Bio::Tools::Run::Alignment::Clustalw('ktuple' => 2, 
+						    'matrix' => 'BLOSUM');
+$aln = $factory->align([$seq,$seq2]);
+$alignout->write_aln($aln);
+
+$factory = new Bio::Tools::Run::Alignment::TCoffee('ktuple' => 2, 
+						   'matrix' => 'BLOSUM');
+$aln = $factory->align([$seq,$seq2]);
+$alignout->write_aln($aln);
+
+$factory = new Bio::Tools::Run::StandAloneBlast();
+$aln = $factory->bl2seq($seq,$seq2);
+
+# this actually returns a Bio::Tools::BPbl2seq object
+# it can be transformed to a SimpleAlign object see
+# the code in Bio::AlignIO::bl2seq
+# A transformer object will be written at some point


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/align/aligntutorial.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/align/clustalw.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/align/clustalw.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/align/clustalw.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,253 @@
+#!/usr/bin/perl
+# PROGRAM  : clustalw.pl
+# PURPOSE  : Demonstrate possible uses of Bio::Tools::Run::Alignment::Clustalw.pm
+# AUTHOR   : Peter Schattner schattner at alum.mit.edu
+# CREATED  : Oct 06 2000
+# REVISION : $Id: clustalw.pl,v 1.2 2005/09/16 15:01:48 bosborne Exp $
+#
+# INSTALLATION
+#
+# You will need to have installed clustalw and to ensure that Clustalw.pm can find it.
+# This can be done in different ways (bash syntax):
+#	   export PATH=$PATH:/home/peter/clustalw1.8
+#  or
+#     define an environmental variable CLUSTALDIR:
+#	   export CLUSTALDIR=/home/peter/clustalw1.8   
+#  or
+#     include a definition of an environmental variable CLUSTALDIR in every
+#     script that will use Clustal.pm.
+#	   BEGIN {$ENV{CLUSTALDIR} = '/home/peter/clustalw1.8/'; }
+#
+#  We are going to demonstrate 3 possible applications of Clustalw.pm:
+#	1. Test effect of varying clustalw alignment parameter(s) on resulting alignment
+#	2. Test effect of changing the order that sequences are added to the alignment
+#		on the resulting alignment
+#	3. Test effect of incorporating an "anchor point" in the alignment process
+#
+#  Before we can do any tests, we need to set up the environment, create the factory
+#  and read in the unaligned sequences.
+#
+
+#BEGIN {
+#	$ENV{CLUSTALDIR} = '/home/peter/clustalw1.8/';
+#}
+
+use Getopt::Long;
+use Bio::Tools::Run::Alignment::Clustalw;
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+use Bio::SeqIO;
+use strict;
+
+# set some default values
+my $infile = 't/data/cysprot1a.fa';
+my @params = ('quiet' => 1 );
+my $do_only = '123';   # string listing examples to be executed. Default is to
+			              # execute all tests (ie 1,2 and 3)
+my $param = 'ktuple';  # parameter to be varied in example 1
+my $startvalue = 1;    # initial value for parameter $param
+my $stopvalue = 3;     # final value for parameter $param
+my $regex = 'W[AT]F';  # regular expression for 'anchoring' alignment in example 3
+my $extension = 30; 	  # distance regexp anchor should be extended in each direction
+			              # for local alignment in example 3
+my $helpflag = 0;      # Flag to show usage info.
+
+# get user options
+my @argv = @ARGV;  # copy ARGV before GetOptions() massacres it.
+
+&GetOptions("h!" => \$helpflag, "help!" => \$helpflag,
+				"in=s" => \$infile,
+				"param=s" => \$param,
+				"do=s" =>  \$do_only,
+				"start=i" =>  \$startvalue,
+				"stop=i" =>  \$stopvalue,
+				"ext=i" =>  \$extension,
+				"regex=s" =>  \$regex,) ;
+
+if ($helpflag) { &clustalw_usage(); exit 0;}
+
+# create factory & set user-specified global clustalw parameters
+foreach my $argv (@argv) {
+	unless ($argv =~ /^(.*)=>(.*)$/) { next;}
+	push (@params, $1 => $2);
+}
+my  $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
+	
+
+# put unaligned sequences in a Bio::Seq array
+my $str = Bio::SeqIO->new(-file=> $infile, '-format' => 'Fasta');
+my ($paramvalue, $aln, $subaln, @consensus, $seq_num, $string, $strout, $id);
+my @seq_array =();
+while ( my $seq = $str->next_seq() ) { push (@seq_array, $seq) ;}
+
+# Do each example that has digit present in variable $do_only
+$_= $do_only;
+/1/ && &vary_params();
+/2/ && &vary_align_order();
+/3/ && &anchored_align();
+
+## End of "main"
+
+#################################################
+#   vary_params(): Example demonstrating varying of clustalw parameter
+#
+
+sub vary_params {
+
+	print "Beginning parameter-varying example... \n";
+
+	# Now we'll create several alignments, 1 for each value of the selected
+	# parameter. We also compute a simple consensus string for each alignment.
+	# (In the default case, we vary the "ktuple" parameter,  creating 3
+	# alignments using ktuple values from 1 to 3.)
+
+	my $index =0;
+	for ($paramvalue = $startvalue; $paramvalue < ($stopvalue + 1); $paramvalue++) {
+		$factory->$param($paramvalue);  # set parameter	value
+		print "Performing alignment with $param = $paramvalue \n";
+		$aln = $factory->align(\@seq_array);
+		$string = $aln->consensus_string(); # Get consensus of alignment
+		# convert '?' to 'X' at non-consensus positions
+		$string =~ s/\?/X/g;
+		$consensus[$index] = Bio::Seq->new(-id=>"$param=$paramvalue",-seq=>$string);
+		$index++;
+	}
+	# Compare consensus strings for alignments with different $param values by
+	# making an alignment of the different consensus strings
+	# $factory->ktuple(1);  # set ktuple parameter	
+	print "Performing alignment of $param consensus sequences \n";
+	$aln = $factory->align(\@consensus);
+	$strout = Bio::AlignIO->newFh('-format' => 'msf');
+	print $strout $aln;
+
+	return 1;
+}
+
+
+#################################################
+#   vary_align_order():
+#
+# For our second example, we'll test the effect of changing the order
+# that sequences are added to the alignment
+
+sub vary_align_order {
+
+	print "\nBeginning alignment-order-changing example... \n";
+
+	@consensus = ();  # clear array
+	for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
+		my $obj_out = shift @seq_array;  # remove one Seq object from array and save
+		$id = $obj_out->display_id;
+		# align remaining sequences
+		print "Performing alignment with sequence $id left out \n";
+		$subaln = $factory->align(\@seq_array);
+		# add left-out sequence to subalignment
+		$aln = $factory->profile_align($subaln,$obj_out);
+		$string = $aln->consensus_string(); # Get consensus of alignment
+		# convert '?' to 'X' for non-consensus positions
+		$string =~ s/\?/X/g;
+		$consensus[$seq_num] = Bio::Seq->new(-id=>"$id left out",-seq=>$string);
+		push @seq_array, $obj_out;  # return Seq object for next (sub) alignment
+	}
+
+	# Compare consensus strings for alignments created in different orders
+	# $factory->ktuple(1);  # set ktuple parameter	
+	print "\nPerforming alignment of consensus sequences for different reorderings \n";
+	print "Each consensus is labeled by the sequence which was omitted in the initial alignment\n";
+	$aln = $factory->align(\@consensus);
+	$strout = Bio::AlignIO->newFh('-format' => 'msf');
+	print $strout $aln;
+
+	return 1;
+}
+
+#################################################
+#   anchored_align()
+#
+# For our last example, we'll test a way to perform a local alignment by
+# "anchoring" the alignment to a regular expression.  This is similar
+# to the approach taken in the recent dbclustal program.
+# In principle, we could write a script to search for a good regular expression
+# to use. Instead, here we'll simply choose one manually after looking at the
+# previous alignments.
+
+sub anchored_align {
+
+	my @local_array = ();
+	my @seqs_not_matched = ();
+
+	print "\n Beginning anchored-alignment example... \n";
+
+	for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
+		my $seqobj = $seq_array[$seq_num];
+		my $seq =  $seqobj->seq();
+		my $id =  $seqobj->id();
+		# if $regex is not found in the sequence, save sequence id name and set
+		# array value =0 for later
+		unless ($seq =~/$regex/) {
+			$local_array[$seq_num] = 0;
+			push (@seqs_not_matched, $id) ;
+			next;
+		}
+		# find positions of start and of subsequence to be aligned
+		my $match_start_pos = length($`);
+		my $match_stop_pos = length($`) + length($&);
+		my	$start =  ($match_start_pos - $extension) > 1 ? 
+		  ($match_start_pos - $extension) +1 : 1;
+		my	$stop =  ($match_stop_pos + $extension) < length($seq) ?
+		  ($match_stop_pos + $extension) : length($seq);
+		my $string = $seqobj->subseq($start, $stop);
+
+		$local_array[$seq_num] = Bio::Seq->new(-id=>$id, -seq=>$string);
+	}
+	@local_array = grep $_ , @local_array; # remove array entries with no match
+
+	# Perform alignment on the local segments of the sequences which match "anchor"
+	$aln = $factory->align(\@local_array);
+	my $consensus  = $aln->consensus_string(); # Get consensus of local alignment
+
+	if (scalar(@seqs_not_matched) ) {
+		print " Sequences not matching $regex : @seqs_not_matched \n"
+	} else {
+		print " All sequences match $regex : @seqs_not_matched \n"
+	}
+	print "Consensus sequence of local alignment: $consensus \n";
+
+	return 1;
+}
+
+#----------------
+sub clustalw_usage {
+#----------------
+
+#-----------------------
+# Prints usage information for general parameters.
+
+    print STDERR <<"QQ_PARAMS_QQ";
+
+ Command-line accessible script variables and commands:
+ -------------------------------
+ -h 		       :  Display this usage info and exit.
+ -in <str>	    :  File containing input sequences in fasta format (default = $infile) .
+ -do <str>	    :  String listing examples to be executed. Default is to execute
+		             all tests (ie default = '123')
+ -param <str>   :  Parameter to be varied in example 1. Any clustalw parameter
+		             which takes inteer values can be varied (default = 'ktuple')
+ -start <int>   :  Initial value for varying parameter in example 1 (default = 1)
+ -stop <int>    :  Final value for varying parameter (default = 3)
+ -regex   <str> :  Regular expression for 'anchoring' alignment in example 3
+                   (default = $regex)
+ -ext <int>     :  Distance regexp anchor should be extended in each direction
+		             for local alignment in example 3   (default = 30)
+
+In addition, any valid Clustalw parameter can be set using the syntax 
+"parameter=>value" as in "ktuple=>3"
+
+So a typical command lines might be:
+ > clustalw.pl -param=pairgap -start=2 -stop=3 -do=1 "ktuple=>3"
+or
+ > clustalw.pl -ext=10 -regex='W[AST]F' -do=23 -in='t/cysprot1a.fa'
+
+QQ_PARAMS_QQ
+
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/align/clustalw.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/align/simplealign.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/align/simplealign.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/align/simplealign.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,182 @@
+#!/usr/bin/perl
+
+# PROGRAM  : simplealign.pl
+# PURPOSE  : Simple driver for Bio::SimpleAlign
+# AUTHOR   : Ewan Birney birney at sanger.ac.uk 
+# CREATED  : Tue Oct 27 1998
+# REVISION : $Id: simplealign.pl,v 1.1 2003/07/07 18:20:58 bosborne Exp $
+#
+# INSTALLATION
+#    If you have installed bioperl using the standard
+#    makefile system everything should be fine and 
+#    dandy.
+#
+#    if not edit the use lib "...." line to point the directory
+#    containing your Bioperl modules.
+#
+
+#use lib "/nfs/disk21/birney/prog/bioperl/";
+
+# Modified 3/5/01 to use AlignIO by Peter Schattner schattner at alum.mit.edu
+
+#
+# This uses the internal DATA stream (past the end of this
+# file, on the __END__ tag) to load in the data. We then
+# do some reformats, sort in a different way and a quick
+# getting into the alignment. All pretty simple ;)
+#
+
+#
+# The simplealign module does not do the following things
+#    a) give you sensible ways of asking if residues are a
+#       column of gaps or conservation 
+#    b) provide ways of editing the alignment
+#    c) making alignments
+#
+
+# 
+# a) and b) are probably best done by UnivAlign from Georg Fuellen
+# c) is done for pairwise alignments in Bio::Tools::pSW; and
+# also you can read in stuff from programs like clustal and hmmer
+# into this.
+#
+
+use strict;
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+
+# read from a stream
+my $str = Bio::AlignIO->newFh('-fh'=> \*DATA, '-format' => 'pfam' );
+my $al = <$str>;
+
+# write out a MSF file
+my $out = Bio::AlignIO->newFh('-fh'=> \*STDOUT,  '-format' => 'msf');
+my $status = print $out $al;
+
+# order by alphabetically then start end
+$al->sort_alphabetically();
+
+# write in Pfam format now...
+my $out2=Bio::AlignIO->newFh( '-fh'=> \*STDOUT, '-format' => 'pfam');
+$status = print $out2 $al;
+
+# now set the display name to be 
+# name_# like roa1_human_1, roa1_human_2 etc
+# This **doesn't** change the underlying names of the
+# sequences you'll be glad to hear.
+
+$al->set_displayname_count();
+
+# dump again... bored of this yet?
+
+$status = print $out2 $al;
+
+# get into the alignment and get things out
+# we just want to see how many unique names
+# there are in this alignment
+
+my ($seq, $id, %hash) ;
+
+# loop over the alignment
+foreach  $seq ( $al->eachSeq() ) {
+    # increment a hash on the name by one each time
+    $hash{$seq->id()}++;
+}
+
+# disgorge the hash
+
+foreach $id ( keys %hash ) {
+    print "$id has $hash{$id} subsequences in this alignment\n";
+}
+
+__END__
+GR10_BRANA/8-79     CFVGGL......AWATGDAELERTFS.....Q.FGEV..IDSKIIND.............RETGRSRGFGFVTFKDEKSMKDAIDEMNG.K...ELDGRTITV
+HUD_HUMAN/48-119    LIVNYL......PQNMTQEEFRSLFG.....S.IGEI..ESCKLVRD.............KITGQSLGYGFVNYIDPKDAEKAINTLNG.L...RLQTKTIKV
+IF32_SCHPO/41-124   VVIEGAP....VVEEAKQQDFFRFLSSKVLAK.IGKVKENGFYMPFE.........EKNGK..KMSLGLVFADFENVDGADLCVQELDGKQ...ILKNHTFVV
+IF32_YEAST/79-157   IVVNGAPVIPSAKVPVLKKALTSLFS.....K.AGKV..VNMEFPID.............EATGKTKGFLFVECGSMNDAKKIIKSFHGKR...LDLKHRLFL
+IF4B_HUMAN/98-168   AFLGNL......PYDVTEESIKEFFR.....G.LNIS...AVRLPR............EPSNPERLKGFGYAEFEDLDSLLSALS.LNE.E...SLGNRRIRV
+LA_DROME/151-225    AYAKGF......PLDSQISELLDFTA.....N.YDKV..VNLTMRNS.........YDKPTKSYKFKGSIFLTFETKDQAKAFLE.QEK.I...VYKERELLR
+LA_HUMAN/113-182    VYIKGF......PTDATLDDIKEWLE.....D.KGQV..LNIQMRR..............TLHKAFKGSIFVVFDSIESAKKFVE.TPG.Q...KYKETDLLI
+MEI2_SCHPO/197-265  LFVTNL......PRIVPYATLLELFS.....K.LGDV..KGIDTSSL.................STDGICIVAFFDIRQAIQAAKSLRSQR...FFNDRLLYF
+MODU_DROME/177-246  VFVTNL......PNEYLHKDLVALFA.....K.FGRL..SALQRFTN................LNGNKSVLIAFDTSTGAEAVLQAKPKAL...TLGDNVLSV
+MODU_DROME/260-326  VVVGLI......GPNITKDDLKTFFE.....K.VAPV..EAVTISSN.................RLMPRAFVRLASVDDIPKALK.LHS.T...ELFSRFITV
+MODU_DROME/342-410  LVVENVG....KHESYSSDALEKIFK.....K.FGDV..EEIDVVC..................SKAVLAFVTFKQSDAATKALAQLDG.K...TVNKFEWKL
+MODU_DROME/422-484  ILVTNL......TSDATEADLRKVFN.....D.SGEI..ESIIMLG.....................QKAVVKFKDDEGFCKSFL.ANE.S...IVNNAPIFI
+MSSP_HUMAN/31-102   LYIRGL......PPHTTDQDLVKLCQ.....P.YGKI..VSTKAILD.............KTTNKCKGYGFVDFDSPAAAQKAVSALKA.S...GVQAQKAKQ
+NAM8_YEAST/165-237  IFVGDL......APNVTESQLFELFI.....NRYAST..SHAKIVHD.............QVTGMSKGYGFVKFTNSDEQQLALSEMQG.V...FLNGRAIKV
+NONA_DROME/304-369  LYVGNL......TNDITDDELREMFK.....P.YGEI..SEIFSNLD...................KNFTFLKVDYHPNAEKAKRALDG.S...MRKGRQLRV
+NONA_DROME/378-448  LRVSNL......TPFVSNELLYKSFE.....I.FGPI..ERASITVD..............DRGKHMGEGIVEFAKKSSASACLRMCNE.K...CFFLTASLR
+NOP3_YEAST/127-190  LFVRPF......PLDVQESELNEIFG.....P.FGPM..KEVKILN.....................GFAFVEFEEAESAAKAIEEVHG.K...SFANQPLEV
+NOP3_YEAST/202-270  ITMKNL......PEGCSWQDLKDLAR.....E.NSLE..TTFSSVN................TRDFDGTGALEFPSEEILVEALERLNN.I...EFRGSVITV
+NOP4_YEAST/28-98    LFVRSI......PQDVTDEQLADFFS.....N.FAPI..KHAVVVKD..............TNKRSRGFGFVSFAVEDDTKEALAKARK.T...KFNGHILRV
+NOP4_YEAST/292-363  VFVRNV......PYDATEESLAPHFS.....K.FGSV..KYALPVID.............KSTGLAKGTAFVAFKDQYTYNECIKNAPA.A...GSTSLLIGD
+NSR1_YEAST/170-241  IFVGRL......SWSIDDEWLKKEFE.....H.IGGV..IGARVIYE.............RGTDRSRGYGYVDFENKSYAEKAIQEMQG.K...EIDGRPINC
+NSR1_YEAST/269-340  LFLGNL......SFNADRDAIFELFA.....K.HGEV..VSVRIPTH.............PETEQPKGFGYVQFSNMEDAKKALDALQG.E...YIDNRPVRL
+NUCL_CHICK/283-352  LFVKNL......TPTKDYEELRTAIK.....EFFGKK...NLQVSEV..............RIGSSKRFGYVDFLSAEDMDKALQ.LNG.K...KLMGLEIKL
+PABP_DROME/4-75     LYVGDL......PQDVNESGLFDKFS.....S.AGPV..LSIRVCRD.............VITRRSLGYAYVNFQQPADAERALDTMNF.D...LVRNKPIRI
+PABP_DROME/92-162   VFIKNL......DRAIDNKAIYDTFS.....A.FGNI..LSCKVATD..............EKGNSKGYGFVHFETEEAANTSIDKVNG.M...LLNGKKVYV
+PABP_DROME/183-254  VYVKNF......TEDFDDEKLKEFFE.....P.YGKI..TSYKVMS..............KEDGKSKGFGFVAFETTEAAEAAVQALNGKD...MGEGKSLYV
+PABP_SCHPO/249-319  VYIKNL......DTEITEQEFSDLFG.....Q.FGEI..TSLSLVKD..............QNDKPRGFGFVNYANHECAQKAVDELND.K...EYKGKKLYV
+PES4_YEAST/93-164   LFIGDL......HETVTEETLKGIFK.....K.YPSF..VSAKVCLD.............SVTKKSLGHGYLNFEDKEEAEKAMEELNY.T...KVNGKEIRI
+PES4_YEAST/305-374  IFIKNL......PTITTRDDILNFFS.....E.VGPI..KSIYLSN...............ATKVKYLWAFVTYKNSSDSEKAIKRYNN.F...YFRGKKLLV
+PR24_YEAST/43-111   VLVKNL......PKSYNQNKVYKYFK.....H.CGPI..IHVDVAD...............SLKKNFRFARIEFARYDGALAAIT.KTH.K...VVGQNEIIV
+PR24_YEAST/119-190  LWMTNF......PPSYTQRNIRDLLQ.....D.INVV.ALSIRLPSL..............RFNTSRRFAYIDVTSKEDARYCVEKLNG.L...KIEGYTLVT
+PR24_YEAST/212-284  IMIRNL.....STELLDENLLRESFE.....G.FGSI..EKINIPAG............QKEHSFNNCCAFMVFENKDSAERALQ.MNR.S...LLGNREISV
+PSF_HUMAN/373-443   LSVRNL......SPYVSNELLEEAFS.....Q.FGPI..ERAVVIVD..............DRGRSTGKGIVEFASKPAARKAFERCSE.G...VFLLTTTPR
+PTB_HUMAN/61-128    IHIRKL......PIDVTEGEVISLGL.....P.FGKV..TNLLMLKG...................KNQAFIEMNTEEAANTMVN.YYT.SVTPVLRGQPIYI
+PTB_HUMAN/186-253   IIVENL......FYPVTLDVLHQIFS.....K.FGTV....LKIIT...............FTKNNQFQALLQYADPVSAQHAKLSLDG.Q...NIYNACCTL
+PUB1_YEAST/76-146   LYVGNL......DKAITEDILKQYFQ.....V.GGPI..ANIKIMID..............KNNKNVNYAFVEYHQSHDANIALQTLNG.K...QIENNIVKI
+PUB1_YEAST/163-234  LFVGDL......NVNVDDETLRNAFK.....D.FPSY..LSGHVMWD.............MQTGSSRGYGFVSFTSQDDAQNAMDSMQG.Q...DLNGRPLRI
+PUB1_YEAST/342-407  AYIGNI......PHFATEADLIPLFQ.....N.FGFI..LDFKHYPE...................KGCCFIKYDTHEQAAVCIVALAN.F...PFQGRNLRT
+RB97_DROME/34-104   LFIGGL......APYTTEENLKLFYG.....Q.WGKV..VDVVVMRD.............AATKRSRGFGFITYTKSLMVDRAQE..NRPH...IIDGKTVEA
+RN12_YEAST/200-267  IVIKFQ......GPALTEEEIYSLFR.....R.YGTI....IDIFP...............PTAANNNVAKVRYRSFRGAISAKNCVSG.I...EIHNTVLHI
+RN15_YEAST/20-91    VYLGSI......PYDQTEEQILDLCS.....N.VGPV..INLKMMFD.............PQTGRSKGYAFIEFRDLESSASAVRNLNG.Y...QLGSRFLKC
+RNP1_YEAST/37-109   LYVGNL......PKNCRKQDLRDLFE.....PNYGKI..TINMLKKK.............PLKKPLKRFAFIEFQEGVNLKKVKEKMNG.K...IFMNEKIVI
+RO28_NICSY/99-170   LFVGNL......PYDIDSEGLAQLFQ.....Q.AGVV..EIAEVIYN.............RETDRSRGFGFVTMSTVEEADKAVELYSQ.Y...DLNGRLLTV
+RO33_NICSY/116-187  LYVGNL......PFSMTSSQLSEIFA.....E.AGTV..ANVEIVYD.............RVTDRSRGFAFVTMGSVEEAKEAIRLFDG.S...QVGGRTVKV
+RO33_NICSY/219-290  LYVANL......SWALTSQGLRDAFA.....D.QPGF..MSAKVIYD.............RSSGRSRGFGFITFSSAEAMNSALDTMNE.V...ELEGRPLRL
+ROA1_BOVIN/106-176  IFVGGI......KEDTEEHHLRDYFE.....Q.YGKI..EVIEIMTD.............RGSGKKRGFAFVTFDDHDSVDKIVI.QKY.H...TVNGHNCEV
+ROC_HUMAN/18-82     VFIGNL.....NTLVVKKSDVEAIFS.....K.YGKI..VGCSVHK.....................GFAFVQYVNERNARAAVAGEDG.R...MIAGQVLDI
+ROF_HUMAN/113-183   VRLRGL......PFGCTKEEIVQFFS.....G.LEIV.PNGITLPVD..............PEGKITGEAFVQFASQELAEKALG.KHK.E...RIGHRYIEV
+ROG_HUMAN/10-81     LFIGGL......NTETNEKALEAVFG.....K.YGRI..VEVLLMKD.............RETNKSRGFAFVTFESPADAKDAARDMNG.K...SLDGKAIKV
+RT19_ARATH/33-104   LYIGGL......SPGTDEHSLKDAFS.....S.FNGV..TEARVMTN.............KVTGRSRGYGFVNFISEDSANSAISAMNG.Q...ELNGFNISV
+RU17_DROME/104-175  LFIARI......NYDTSESKLRREFE.....F.YGPI..KKIVLIHD.............QESGKPKGYAFIEYEHERDMHAAYKHADG.K...KIDSKRVLV
+RU1A_HUMAN/12-84    IYINNLNE..KIKKDELKKSLYAIFS.....Q.FGQI..LDILVSR................SLKMRGQAFVIFKEVSSATNALRSMQG.F...PFYDKPMRI
+RU1A_HUMAN/210-276  LFLTNL......PEETNELMLSMLFN.....Q.FPGF..KEVRLVPG..................RHDIAFVEFDNEVQAGAARDALQG.F...KITQNNAMK
+RU1A_YEAST/229-293  LLIQNL......PSGTTEQLLSQILG.....N.EALV...EIRLVSV...................RNLAFVEYETVADATKIKNQLGS.T...YKLQNNDVT
+RU2B_HUMAN/9-81     IYINNMND..KIKKEELKRSLYALFS.....Q.FGHV..VDIVALK................TMKMRGQAFVIFKELGSSTNALRQLQG.F...PFYGKPMRI
+RU2B_HUMAN/153-220  LFLNNL......PEETNEMMLSMLFN.....Q.FPGF..KEVRLVPG..................RHDIAFVEFENDGQAGAARDALQGFK...ITPSHAMKI
+SC35_CHICK/16-87    LKVDNL......TYRTSPDTLRRVFE.....K.YGRV..GDVYIPRD.............RYTKESRGFAFVRFHDKRDAEDAMDAMDG.A...VLDGRELRV
+SP33_HUMAN/17-85    IYVGNL......PPDIRTKDIEDVFY.....K.YGAI..RDIDLKNR................RGGPPFAFVEFEDPRDAEDAVYGRDG.Y...DYDGYRLRV
+SP33_HUMAN/122-186  VVVSGL......PPSGSWQDLKDHMR.....E.AGDV..CYADVYRD....................GTGVVEFVRKEDMTYAVRKLDN.T...KFRSHEGET
+SQD_DROME/58-128    LFVGGL......SWETTEKELRDHFG.....K.YGEI..ESINVKTD.............PQTGRSRGFAFIVFTNTEAIDKVSA.ADE.H...IINSKKVDP
+SQD_DROME/138-208   IFVGGL......TTEISDEEIKTYFG.....Q.FGNI..VEVEMPLD.............KQKSQRKGFCFITFDSEQVVTDLLK.TPK.Q...KIAGKEVDV
+SR55_DROME/5-68     VYVGGL......PYGVRERDLERFFK.....G.YGRT..RDILIKN.....................GYGFVEFEDYRDADDAVYELNG.K...ELLGERVVV
+SSB1_YEAST/39-114   IFIGNV......AHECTEDDLKQLFV.....EEFGDE..VSVEIPIK..........EHTDGHIPASKHALVKFPTKIDFDNIKENYDT.K...VVKDREIHI
+SXLF_DROME/127-198  LIVNYL......PQDMTDRELYALFR.....A.IGPI..NTCRIMRD.............YKTGYSFGYAFVDFTSEMDSQRAIKVLNG.I...TVRNKRLKV
+SXLF_DROME/213-285  LYVTNL......PRTITDDQLDTIFG.....K.YGSI..VQKNILRD.............KLTGRPRGVAFVRYNKREEAQEAISALNNVI...PEGGSQPLS
+TIA1_HUMAN/9-78     LYVGNL......SRDVTEALILQLFS.....Q.IGPC..KNCKMIMD...............TAGNDPYCFVEFHEHRHAAAALAAMNG.R...KIMGKEVKV
+TIA1_HUMAN/97-168   VFVGDL......SPQITTEDIKAAFA.....P.FGRI..SDARVVKD.............MATGKSKGYGFVSFFNKWDAENAIQQMGG.Q...WLGGRQIRT
+TIA1_HUMAN/205-270  VYCGGV......TSGLTEQLMRQTFS.....P.FGQI..MEIRVFPD...................KGYSFVRFNSHESAAHAIVSVNG.T...TIEGHVVKC
+TRA2_DROME/99-170   IGVFGL......NTNTSQHKVRELFN.....K.YGPI..ERIQMVID.............AQTQRSRGFCFIYFEKLSDARAAKDSCSG.I...EVDGRRIRV
+U2AF_HUMAN/261-332  LFIGGL......PNYLNDDQVKELLT.....S.FGPL..KAFNLVKD.............SATGLSKGYAFCEYVDINVTDQAIAGLNG.M...QLGDKKLLV
+U2AF_SCHPO/312-383  IYISNL......PLNLGEDQVVELLK.....P.FGDL..LSFQLIKN.............IADGSSKGFCFCEFKNPSDAEVAISGLDG.K...DTYGNKLHA
+U2AG_HUMAN/67-142   CAVSDVEM..QEHYDEFFEEVFTEME.....EKYGEV..EEMNVCDN..............LGDHLVGNVYVKFRREEDAEKAVIDLNN.R...WFNGQPIHA
+WHI3_YEAST/540-614  LYVGNL......PSDATEQELRQLFS.....G.QEGF..RRLSFRNK..........NTTSNGHSHGPMCFVEFDDVSFATRALAELYG.R...QLPRSTVSS
+X16_HUMAN/12-78     VYVGNL......GNNGNKTELERAFG.....Y.YGPL..RSVWVARN..................PPGFAFVEFEDPRDAADAVRELDG.R...TLCGCRVRV
+YHC4_YEAST/348-415  IFVGQL......DKETTREELNRRFS.....T.HGKI..QDINLIFK.................PTNIFAFIKYETEEAAAAALESENH.A...IFLNKTMHV
+YHH5_YEAST/315-384  ILVKNL......PSDTTQEEVLDYFS.....T.IGPI..KSVFISEK...............QANTPHKAFVTYKNEEESKKAQKCLNK.T...IFKNHTIWV
+YIS1_YEAST/66-136   IFVGNI......TPDVTPEQIEDHFK.....D.CGQI..KRITLLYD.............RNTGTPKGYGYIEFESPAYREKALQ.LNG.G...ELKGKKIAV
+YIS5_YEAST/33-104   IYIGNL......NRELTEGDILTVFS.....E.YGVP..VDVILSRD.............ENTGESQGFAYLKYEDQRSTILAVDNLNG.F...KIGGRALKI
+ARP2_PLAFA/364-438  VEVTYLF....STYLVNGQTL..IYS.....N.ISVV....LVILY........HQKFKETVLGRNSGFGFVSYDNVISAQHAIQFMNG.Y...FVNNKYLKV
+CABA_MOUSE/77-147   MFVGGL......SWDTSKKDLKDYFT.....K.FGEV..VDCTIKMD.............PNTGRSRGFGFILFKDSSSVEKVLD.QKE.H...RLDGRVIDP
+CABA_MOUSE/161-231  IFVGGL......NPEATEEKIREYFG.....Q.FGEI..EAIELPID.............PKLNKRRGFVFITFKEEDPVKKVLE.KKF.H...TVSGSKCEI
+CPO_DROME/453-526   LFVSGL......PMDAKPRELYLLFR.....A.YEGY..EGSLLKV............TSKNGKTASPVGFVTFHTRAGAEAAKQDLQGVR...FDPDMPQTI
+CST2_HUMAN/18-89    VFVGNI......PYEATEEQLKDIFS.....E.VGPV..VSFRLVYD.............RETGKPKGYGFCEYQDQETALSAMRNLNG.R...EFSGRALRV
+D111_ARATH/281-360  LLLRNMVG.PGQVDDELEDEVGGECA.....K.YGTV..TRVLIFE..........ITEPNFPVHEAVRIFVQFSRPEETTKALVDLDG.R...YFGGRTVRA
+ELAV_DROME/250-322  LYVSGL......PKTMTQQELEAIFA.....P.FGAI..ITSRILQN............AGNDTQTKGVGFIRFDKREEATRAIIALNG.T...TPSSCTDPI
+ELAV_DROME/404-475  IFIYNL......APETEEAALWQLFG.....P.FGAV..QSVKIVKD.............PTTNQCKGYGFVSMTNYDEAAMAIRALNG.Y...TMGNRVLQV
+EWS_HUMAN/363-442   IYVQGL......NDSVTLDDLADFFK.....Q.CGVV..K.MNKRTG....QPMIHIYLDKETGKPKGDATVSYEDPPTAKAAVEWFDG.K...DFQGSKLKV
+GBP2_YEAST/124-193  IFVRNL......TFDCTPEDLKELFG.....T.VGEV..VEADIIT...............SKGHHRGMGTVEFTKNESVQDAISKFDG.A...LFMDRKLMV
+GBP2_YEAST/221-291  VFIINL......PYSMNWQSLKDMFK.....E.CGHV..LRADVELD..............FNGFSRGFGSVIYPTEDEMIRAIDTFNG.M...EVEGRVLEV
+GBP2_YEAST/351-421  IYCSNL......PFSTARSDLFDLFG.....P.IGKI..NNAELKP..............QENGQPTGVAVVEYENLVDADFCIQKLNN.Y...NYGGCSLQI


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/align/simplealign.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-eutils-example.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-eutils-example.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-eutils-example.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,227 @@
+#!/usr/bin/perl -w
+# $Id: biblio-eutils-example.pl,v 1.1 2005/09/24 01:05:28 bosborne Exp $
+=head1 NAME
+
+biblio-eutils-example.pl
+
+=head1 SYNOPSIS
+
+Script that uses Bio::Biblio, accessing 'eutils' at PubMed.
+
+As of Bioperl version 1.4 there are 3 bibliographic repositories,
+stipulated by the -access argument: soap, eutils, and biofetch.
+The default is 'soap'. Not all of these repositories support all
+the Biblio methods nor are the contents of these repositories
+necessarily the same. Choose wisely!
+
+=head2 PubMed Queries
+
+The syntax of the queries is the same as at PubMed, see
+http://www.ncbi.nlm.nih.gov/entrez/query/Pmc/pmchelp.html#SearchFieldDescriptionsandTags
+for more information on how to construct queries.
+
+=head2 Parsing Results
+
+Bio::Biblio will give you XML when querying eutils so you have
+choose a method to parse XML. A fairly simple approach uses
+XML::Twig, shown here. This example shows how query by title and
+how to retrieve the titles of the abstracts found.
+
+=cut
+
+use strict;
+use Bio::Biblio;
+use XML::Twig;
+
+# one-liner to get the number of abstracts found
+my $num = new Bio::Biblio(-access => "eutils")->find("Osborne","authors")->
+  get_count;
+
+my $biblio = Bio::Biblio->new(-access => "eutils");
+
+my $result = $biblio->find("brain [TI] AND MDM2 [TI]");
+
+my $pmids = $result->get_all_ids;
+
+my $parser = XML::Twig->new(twig_roots => {"ArticleTitle" => \&print_title} );
+
+for my $pmid (@$pmids) {
+	my $xml = $biblio->get_by_id($pmid);
+	eval {
+		$parser->parse($xml);
+	};
+	if ($@) {
+		warn "Problem parsing PubMed $pmid XML: $!\n";
+	}
+}
+
+sub print_title {
+	my ($twig, $elt) = @_;
+	print $elt->text,"\n";
+	$twig->purge;
+}
+
+=head1 PubMed XML Example
+
+<?xml version="1.0"?>
+<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st November 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/pubmed_041101.dtd">
+<PubmedArticleSet>
+<PubmedArticle>
+    <MedlineCitation Owner="NLM" Status="MEDLINE">
+        <PMID>15815077</PMID>
+        <DateCreated>
+            <Year>2005</Year>
+            <Month>04</Month>
+            <Day>07</Day>
+        </DateCreated>
+        <DateCompleted>
+            <Year>2005</Year>
+            <Month>08</Month>
+            <Day>29</Day>
+        </DateCompleted>
+        <Article PubModel="Print">
+            <Journal>
+                <ISSN>0231-5882</ISSN>
+                <JournalIssue>
+                    <Volume>23</Volume>
+                    <Issue>4</Issue>
+                    <PubDate>
+                        <Year>2004</Year>
+                        <Month>Dec</Month>
+                    </PubDate>
+                </JournalIssue>
+            </Journal>
+            <ArticleTitle>Rabbit liver microsomal system: study of interaction with two model N-nitrosamines and their metabolism.</ArticleTitle>
+            <Pagination>
+                <MedlinePgn>423-33</MedlinePgn>
+            </Pagination>
+            <Abstract>
+                <AbstractText>Rabbit liver microsomes of control (non-treated) or animals induced either by ethanol (EtOH) or phenobarbital (PB) were incubated with N-nitrosodimethylamine (NDMA) or N-nitrosomethylaniline (NMA). Difference spectroscopy showed that NMA is bound to the substrate-binding site of cytochrome P-450 (CYP) isoforms as heme ligand in control and EtOH pre-treated microsomes. On the other hand, PB-induced microsomes exhibit with NMA substrate type of spectra. NDMA does not provide any type of binding spectra with used microsomal systems. Oxidative bio-activation of N-nitrosamines by the microsomal CYP isoforms was measured as formaldehyde formation. Analysis of reaction kinetics in control microsomes revealed, for both substrates, two values of Michaelis-Menten constant (K(m)) for, K(m) values of 0.03 and 0.13 mmol/l for NDMA, and 0.30 and 0.82 mmol/l for NMA. Induction of animals with EtOH resulted in a decrease in the K(m) value for both substrates. In contrast, PB treatment caused an elevation of K(m) value for NDMA. Based on these data, we conclude that EtOH-inducible microsomal CYP isoforms (mainly CYP2E1) are responsible for binding and N-demethylation metabolism of both studied N-nitrosamines in rabbit liver microsomal system. The role of the other CYP isoforms involved in the metabolism of mentioned N-nitrosamines is discussed.</AbstractText>
+            </Abstract>
+            <Affiliation>Department of Biochemistry, Faculty of Science, Charles University, Hlavova 2030, 128 43 Prague 2, Czech Republic. mis at natur.cuni.cz</Affiliation>
+            <AuthorList CompleteYN="Y">
+                <Author ValidYN="Y">
+                    <LastName>Sulc</LastName>
+                    <ForeName>B</ForeName>
+                    <Initials>B</Initials>
+                </Author>
+                <Author ValidYN="Y">
+                    <LastName>Kubícková</LastName>
+                    <ForeName>B</ForeName>
+                    <Initials>B</Initials>
+                </Author>
+                <Author ValidYN="Y">
+                    <LastName>Máslová</LastName>
+                    <ForeName>F</ForeName>
+                    <Initials>B</Initials>
+                </Author>
+                <Author ValidYN="Y">
+                    <LastName>Hodek</LastName>
+                    <ForeName>C</ForeName>
+                    <Initials>B</Initials>
+                </Author>
+            </AuthorList>
+            <Language>eng</Language>
+            <PublicationTypeList>
+                <PublicationType>Journal Article</PublicationType>
+            </PublicationTypeList>
+        </Article>
+        <MedlineJournalInfo>
+            <Country>Slovakia</Country>
+            <MedlineTA>Gen Physiol Biophys</MedlineTA>
+            <NlmUniqueID>8400604</NlmUniqueID>
+        </MedlineJournalInfo>
+        <ChemicalList>
+            <Chemical>
+                <RegistryNumber>0</RegistryNumber>
+                <NameOfSubstance>N-nitrosodimethylamine</NameOfSubstance>
+            </Chemical>
+            <Chemical>
+                <RegistryNumber>0</RegistryNumber>
+                <NameOfSubstance>Nitrosamines</NameOfSubstance>
+            </Chemical>
+            <Chemical>
+                <RegistryNumber>50-06-6</RegistryNumber>
+                <NameOfSubstance>Phenobarbital</NameOfSubstance>
+            </Chemical>
+            <Chemical>
+                <RegistryNumber>614-00-6</RegistryNumber>
+                <NameOfSubstance>N-methyl-N-nitrosoaniline</NameOfSubstance>
+            </Chemical>
+            <Chemical>
+                <RegistryNumber>64-17-5</RegistryNumber>
+                <NameOfSubstance>Ethanol</NameOfSubstance>
+            </Chemical>
+            <Chemical>
+                <RegistryNumber>9035-51-2</RegistryNumber>
+                <NameOfSubstance>Cytochrome P-450 Enzyme System</NameOfSubstance>
+            </Chemical>
+        </ChemicalList>
+        <CitationSubset>I</CitationSubset>
+        <MeshHeadingList>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Animals</DescriptorName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Cytochrome P-450 Enzyme System</DescriptorName>
+                <QualifierName MajorTopicYN="Y">metabolism</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Ethanol</DescriptorName>
+                <QualifierName MajorTopicYN="Y">administration &amp; dosage</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Liver</DescriptorName>
+                <QualifierName MajorTopicYN="N">drug effects</QualifierName>
+                <QualifierName MajorTopicYN="Y">metabolism</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Male</DescriptorName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Microsomes, Liver</DescriptorName>
+                <QualifierName MajorTopicYN="N">drug effects</QualifierName>
+                <QualifierName MajorTopicYN="Y">metabolism</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Nitrosamines</DescriptorName>
+                <QualifierName MajorTopicYN="Y">metabolism</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Phenobarbital</DescriptorName>
+                <QualifierName MajorTopicYN="Y">administration &amp; dosage</QualifierName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Rabbits</DescriptorName>
+            </MeshHeading>
+            <MeshHeading>
+                <DescriptorName MajorTopicYN="N">Research Support, Non-U.S. Govt</DescriptorName>
+            </MeshHeading>
+        </MeshHeadingList>
+    </MedlineCitation>
+    <PubmedData>
+        <History>
+            <PubMedPubDate PubStatus="pubmed">
+                <Year>2005</Year>
+                <Month>4</Month>
+                <Day>9</Day>
+                <Hour>9</Hour>
+                <Minute>0</Minute>
+            </PubMedPubDate>
+            <PubMedPubDate PubStatus="medline">
+                <Year>2005</Year>
+                <Month>8</Month>
+                <Day>30</Day>
+                <Hour>9</Hour>
+                <Minute>0</Minute>
+            </PubMedPubDate>
+        </History>
+        <PublicationStatus>ppublish</PublicationStatus>
+        <ArticleIdList>
+            <ArticleId IdType="pubmed">15815077</ArticleId>
+        </ArticleIdList>
+    </PubmedData>
+  </PubmedArticle>
+ </PubmedArticleSet>
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-eutils-example.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-soap-example.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-soap-example.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-soap-example.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,235 @@
+#!/usr/bin/perl -w
+# $Id: biblio-soap-example.pl,v 1.1 2005/09/24 01:05:28 bosborne Exp $
+=head1 NAME
+
+biblio-soap-example.pl
+
+=head1 SYNOPSIS
+
+Script showing code that uses Bio::Biblio, with 'soap' at OpenBQS.
+
+As of Bioperl version 1.4 there are 3 bibliographic repositories,
+stipulated by the -access argument: soap, eutils, and biofetch.
+The default is 'soap'. Not all of these repositories support all
+the Biblio methods nor are the contents of these repositories
+necessarily the same. Choose wisely!
+
+=cut
+
+use strict;
+use Bio::Biblio;
+use Bio::Biblio::IO;
+use Data::Dumper;
+
+# number of articles in 'soap' with author Osborne...
+my $num = new Bio::Biblio->find("Osborne","authors")->
+  get_count;
+
+# number of articles in OpenBQS with 'topoisomerase' in the title, year 2000,
+# ("J Biol Chem","journal") is another example query
+$num = new Bio::Biblio->find("topoisomerase","title")->
+  find("2000","year")->get_count;
+
+# a reference as XML...
+my $xml = new Bio::Biblio->get_by_id("3047008");
+
+# get a reference to an array of ids...
+my $arr_ref = new Bio::Biblio->find ("Osborne","authors")->
+  find("2000","year")->get_all_ids;
+
+# get the vocabulary of a specific repository, but not all repositories
+# support these methods as of Bioperl 1.4...
+my $biblio = Bio::Biblio->new(-access => "soap");
+my $biblio_ref = $biblio->get_vocabulary_names;
+my $val_ref = $biblio->get_all_values('MEDLINE2004/JournalArticle/properties');
+
+# retrieve the entry as text or retrieve specific text fields...
+my $medline_id = "88329717";
+my $ref = Bio::Biblio->new(-access => "soap")->get_by_id($medline_id);
+my $io = Bio::Biblio::IO->new( -result => "raw",
+			       -data   => $ref );
+my $nextref = $io->next_bibref;
+# print the entire hash...
+my $dump = Data::Dumper->Dump([$nextref],["$medline_id"]);
+# or just the abstract...
+my $abstract = $nextref->{article}->{abstract}->{abstractText};
+# some hash references are stored in arrays
+foreach my $ref ( @{$nextref->{article}->{authors}} ) {
+   foreach my $val (values %{$ref}) {
+      print $val->{lastName}," ",$val->{initials},"\n";
+   }
+}
+# a few more values
+my $year = $nextref->{article}->{journal}->{journalIssue}->{pubDate}->{year};
+my $title = $nextref->{article}->{articleTitle};
+
+# put it all together...
+my $refs = new Bio::Biblio->find("Osborne","authors")->find("2000","year");
+while ($refs->has_next){
+   my $ref = $refs->get_next;
+   my $io = Bio::Biblio::IO->new( -result => "raw", -data => $ref );
+   my $nextref = $io->next_bibref;
+   my $abstract = $nextref->{article}->{abstract}->{abstractText};
+   # you could also write:
+   # my $abstract = Bio::Biblio::IO->new( -result => "raw",
+   #  -data => $refs->get_next )->next_bibref->{article}->{abstract}->{abstractText};
+   print $abstract,"\n";
+}
+
+=head1 Output from Data::Dumper->Dump:
+
+88329717 = {
+	    'chemicals' => [
+			    {
+			     'nameOfSubstance' => 'DNA, Fungal',
+			     'registryNumber'  => '0'
+			    },
+			    {
+			     'nameOfSubstance' => 'DNA, Superhelical',
+			     'registryNumber'  => '0'
+			    },
+			    {
+			     'nameOfSubstance' => 'RNA Polymerase II',
+			     'registryNumber'  => 'EC 2.7.7.-'
+			    }
+			   ],
+	    'journalInfo' => {
+			      'medlineTA'   => 'Genes Dev',
+			      'country'     => 'UNITED STATES',
+			      'nlmUniqueID' => '8711660'
+			     },
+	    'PMID'      => '3047008',
+	    'medlineID' => '88329717',
+	    'status'    => 'Completed',
+	    'article'   => {
+			    'journal' => {
+					  'journalIssue' => {
+							     'volume'       => '2',
+							     'issue'        => '6',
+							     'pubDate'      => {
+										'month' => 'Jun',
+										'year'  => '1988'
+									       }
+							    },
+					  'iSSN' => '0890-9369'
+					 },
+			    'grants' => [
+					 {
+					  'agency'  => 'NIGMS',
+					  'grantID' => '5R01 GM30454-05',
+					  'acronym' => 'GM'
+					 }
+					],
+			    'pagination' => {
+					     'medlinePgn' => '766-72'
+					    },
+			    'abstract' => {
+					   'abstractText' => 'We show that induction of transcription of a CYC1-lacZ fusion gene, borne on a yeast plasmid, causes an increase in negative superhelicity of approximately five turns. This increase is abolished by deletion of either essential element of the CYC1 promoter, the upstream activation site (UAS), or the TATA boxes. Several experiments indicate that the size of the increase is proportional to the size of the transcribed region. First, an internal deletion removing half of the CYC1-lacZ transcribed region results in a plasmid whose negative superhelicity on induction is intermediate between promoter-deletion plasmids and the parental plasmid. Second, plasmids bearing insertions of a fragment containing the putative CYC1 terminator into the CYC1-lacZ fusion gene have relative negative superhelicities proportional to the length of the truncated fusion transcripts generated. A plausible model explaining these observations is that local unwinding of the double helix by transcribing RNA polymerase generates positively supercoiled DNA, which is subsequently relaxed by a topoisomerase.'
+					  },
+			    'languages' => [
+					    'eng'
+					   ],
+			    'publicationTypes' => [
+						   'Journal Article'
+						  ],
+			    'authors' => [
+					  {
+					   'personalName' => {
+							      'initials' => 'BI',
+							      'lastName' => 'Osborne',
+							      'foreName' => 'B I',
+							      'type'     => 'PersonalName'
+							     }
+					  },
+					  {
+					   'personalName' => {
+							      'initials' => 'L',
+							      'lastName' => 'Guarente',
+							      'foreName' => 'L',
+							      'type'     => 'PersonalName'
+							     }
+					  }
+					 ],
+			    'articleTitle' => 'Transcription by RNA polymerase II induces changes of DNA topology in yeast.',
+			    'affiliation' => 'Massachusetts Institute of Technology, Department of Biology, Cambridge 02139.'
+                           },
+	    'dateRevised' => {
+			      'day' => '18',
+			      'month' => '12',
+			      'year' => '2000'
+			     },
+	    'meshHeadings' => [
+			       {
+				'descriptorName' => 'Chromosome Deletion'
+			       },
+			       {
+				'descriptorName' => 'DNA, Fungal',
+				'subHeadings' => [
+						  {
+						   'subHeading' => 'genetics',
+						   'majorTopic' => 'Y'
+						  },
+						  {
+						   'subHeading' => 'ultrastructure'
+						  }
+						 ]
+			       },
+			       {
+				'descriptorName' => 'DNA, Superhelical',
+				'subHeadings' => [
+						  {
+						   'subHeading' => 'genetics'
+						  }
+						 ]
+			       },
+			       {
+				'descriptorName' => 'Genes, Fungal'
+			       },
+			       {
+				'descriptorName' => 'Promoter Regions (Genetics)'
+			       },
+			       {
+				'descriptorName' => 'RNA Polymerase II',
+				'subHeadings' => [
+						  {
+						   'subHeading' => 'metabolism',
+						   'majorTopic' => 'Y'
+						  }
+						 ]
+			       },
+			       {
+				'descriptorName' => 'Saccharomyces cerevisiae',
+				'subHeadings' => [
+						  {
+						   'subHeading' => 'genetics',
+						   'majorTopic' => 'Y'
+						  }
+						 ]
+			       },
+			       {
+				'descriptorName' => 'Support, Non-U.S. Gov\'t'
+			       },
+			       {
+				'descriptorName' => 'Support, U.S. Gov\'t, P.H.S.'
+			       },
+			       {
+				'descriptorName' => 'Transcription, Genetic'
+			       }
+			      ],
+	    'dateCreated' => {
+			      'day' => '24',
+			      'month' => '10',
+			      'year' => '1988'
+			     },
+	    'citationSubsets' => [
+				  'IM'
+				 ],
+	    'type' => 'MedlineCitation',
+	    'dateCompleted' => {
+				'day' => '24',
+				'month' => '10',
+				'year' => '1988'
+			       }
+	   };
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio-soap-example.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio_soap.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio_soap.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio_soap.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+#
+# This was actually a part of the test suite - but because it starts
+# an external process it was safer not to use it as a test (the process
+# could be left running if an error occurs).
+#
+# It is an example of a TCP-based SOAP exchange.
+#
+
+use strict;
+eval { require SOAP::Lite;
+};
+if( $@ ){
+    die("must have SOAP::Lite installed to run this script");
+}
+
+use vars qw($NUMTESTS);
+
+my $error;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 10;
+}
+
+my $testnum;
+my $verbose = 0;
+
+use Bio::Biblio;
+
+# --- launch a testing SOAP server
+my ($pid, $port, $max_port);
+$port = 4444;
+$max_port = $port + 100;
+if ($pid = fork) {
+    # parent here
+
+    sleep 1;
+    my $biblio = new Bio::Biblio (-location => "tcp://localhost:$port",
+				  -namespace => 'soap_server');
+    
+    ok ($biblio->get_count, '43');
+    ok ($biblio->get_by_id ('X'), 'X');
+    ok ($biblio->find ('a,b','c,d')->get_collection_id, 'a,b,c,d');
+    ok ($biblio->find (['x', 'y'], ['u', 'v'])->get_collection_id, 'x,y,u,v');
+
+    ok ( eval { join (',', @{ $biblio->find ('AAA')->get_all_ids }) }, 'AAA'); print STDERR $@ if $@;
+
+    ok ( eval { join (',', @{ $biblio->find ('XXX')->get_all }) }, 'XXX'); print STDERR $@ if $@;
+
+    ok ( eval { $biblio->find (46)->has_next }, 1); print STDERR $@ if $@;
+
+    ok ( eval { $biblio->find ('BBB')->get_next }, 'BBB'); print STDERR $@ if $@;
+
+    ok ( eval { join (',', @{ $biblio->find ('CCC')->get_more (3) }) }, 'CCC,CCC,CCC'); print STDERR $@ if $@;
+
+    ok ( eval { $biblio->find (46)->exists }, 0); print STDERR $@ if $@;
+
+
+    # clean-up the running server
+    kill 9, $pid if defined $pid;
+    print "    SOAP server $pid killed\n";
+
+} elsif (defined $pid) {
+    # child here - a testing SOAP server
+
+    package soap_server;
+    use strict;
+    use SOAP::Transport::TCP;
+    my $daemon;
+    while ($port < $max_port) {
+	eval {
+	    $daemon = SOAP::Transport::TCP::Server
+		-> new (LocalAddr => 'localhost', LocalPort => $port, Listen => 5, Reuse => 1)
+		    -> dispatch_to('soap_server');
+	};
+	last unless $@;
+	$port++;
+    }
+    print "    Contact to SOAP server at ", join(':', $daemon->sockhost, $daemon->sockport), " (server PID: $$)\n";
+    $daemon->handle;
+
+    sub getBibRefCount { shift;  return 43; }
+    sub getById { shift; return shift; }
+    sub find {
+	my ($self, $keywords, $attrs) = @_;
+	return join (',', (@{ $keywords }, @{ $attrs })) if $attrs;
+	return join (',', @{ $keywords });
+    }
+    sub getAllIDs { shift; return [ shift ] }
+    sub getAllBibRefs { shift; return [ shift ] }
+    sub hasNext { return SOAP::Data->type (boolean => 'true'); }
+    sub getNext { shift; return [ '1', shift]; }
+    sub getMore {
+	my ($self, $id, $how_many) = @_;
+	my @result = ('1');
+	push (@result, $id) for (1..$how_many);
+	return \@result;
+    }
+    sub exists { return SOAP::Data->type (boolean => '0'); }
+    sub destroy {}
+
+    package main;
+
+} else {
+        # fork failed
+        print STDERR "Testing SOAP services FAILED: $!.\n";
+    }


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biblio/biblio_soap.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/all_glyphs.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/all_glyphs.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/all_glyphs.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,496 @@
+#!/usr/bin/perl -w
+# Generate a simple display of all glyphs for comparison testing
+# T. Harris (harris at cshl.org)
+
+# Usage:
+# ./all_glyphs GD > all.png
+# ./all_glyphs GD 0 1000 > all.png       # output in png with a wide view
+# ./all_glyphs GD::SVG 100 150 > all.svg # output in SVG, zoomed
+
+use lib '.','../..','./blib/lib','../../blib/lib','../..';
+use strict;
+use Bio::Seq;
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+
+chomp (my $CLASS = shift);
+$CLASS or die "\nUsage: lots_of_glyphs IMAGE_CLASS
+\t- where IMAGE_CLASS is one of GD or GD::SVG
+\t- GD generate png output; GD::SVG generates SVG.\n";
+
+chomp (my $start = shift);
+chomp (my $end   = shift);
+
+$start ||= -100;
+$end   ||= 1000;
+
+my $ftr = 'Bio::Graphics::Feature';
+my $segment = $ftr->new(-start=>$start,-end=>$end,-name=>'ZK154',-type=>'clone');
+my $panel = Bio::Graphics::Panel->new(
+				      -grid => [50,100,150,200,250,300,310,320,330],
+				      -gridcolor => 'lightcyan',
+				      -grid => 1,
+				      -segment => $segment,
+				    #  -offset => 300,
+				    #  -length  => 1000,
+				      -spacing => 15,
+				      -width   => 600,
+				      -pad_top  => 20,
+				      -pad_bottom  => 20,
+				      -pad_left => 20,
+				      -pad_right=> 20,
+				    #  -bgcolor => 'teal',
+				    #  -key_style => 'between',
+				      -key_style => 'bottom',
+				      -image_class => $CLASS,
+				     );
+
+my $zk154_1 = $ftr->new(-start=>-50,-end=>800,-name=>'ZK154.1',-type=>'gene',-source=>'predicted');
+my $zk154_2 = $ftr->new(-start=>380,-end=>500,-name=>'ZK154.2',-type=>'gene',-source=>'predicted');
+my $zk154_3 = $ftr->new(-start=>900,-end=>1200,-name=>'ZK154.3',-type=>'gene',-source=>'confirmed');
+my $xyz4 = $ftr->new(-segments=>[[40,80],[100,120],[200,280],[300,320]],
+		     -name   =>'xyz4',
+		     -source =>'mysterious',
+		     -subtype=>'predicted',
+		     -type   =>'alignment');
+
+# alignment
+add_scores($xyz4);
+$panel->add_track([$zk154_1,[$zk154_2,$xyz4]],
+		  -glyph => 'alignment',
+		  -label => 'alignment',
+		  -key   => 'alignment',
+		  -height => 10,
+		  -font => 'gdSmallFont',
+		  -bump => 1,
+		  -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'green' : 'blue'},
+		  -connector => sub { my $primary_tag = shift->primary_tag;
+				      $primary_tag eq 'transcript' ? 'hat'
+				    : $primary_tag eq 'alignment'  ? 'solid'
+				    : undef},
+		  -connector_color => 'black',
+		 );
+
+# anchored_arrow
+my $short_segment = $ftr->new(-start=>200,-end=>1000);
+$panel->add_track($short_segment,
+		  -glyph  => 'anchored_arrow',
+		  -label  => 'anchored_arrow',
+		  -key    => 'anchored_arrow',
+		  -double => 1,
+		  -bump   => 0,
+		  -height => 10,
+		  -linewidth =>1,
+		  -arrowstyle=>'regular',
+		  -tick      =>1,
+		 );
+
+# arrow
+$panel->add_track($segment,
+		  -glyph  => 'arrow',
+		  -label  => 'arrow-minor ticks',
+		  -key    => 'arrow',
+		  -double => 1,
+		  -fgcolor=> 'red',
+		  -bump   => 0,
+		  -height => 10,
+		  -arrowstyle=>'regular',
+		  -tick      =>1,
+		  -linewidth =>1,
+		 );
+
+$panel->add_track($segment,
+		  -glyph  => 'arrow',
+		  -label  => 'arrow-major ticks',
+		  -double => 1,
+		  -bump   => 0,
+		  -height => 10,
+		  -linewidth =>1,
+		  -arrowstyle=>'filled',
+		  -tick=>2,
+		 );
+
+# box
+my $box = $ftr->new(-start=>100,-end=>600,-name=>'JC8',-type=>'clone');
+$panel->add_track($box,
+		  -glyph  => 'box',
+		  -label  => 'box',
+		  -key => 'box',
+		  -bump   => 0,
+		  -height => 10,
+		  -font   => 'gdLargeFont',
+		  -linewidth =>1,
+		  -bgcolor => 'turquoise',
+		  -fgcolor => 'black',
+		 );
+
+
+# cds
+my $cds = $ftr->new(-segments=>[[1,50],[100,150],[222,280],[380,400],[520,599],[801,900]],
+			     -name=>'cds',
+			     -type=>'gene',
+			     -strand=>'+1',
+			     -subtype=>'predicted',
+			    );
+
+my $cds2 = $ftr->new(-segments =>[[23,90],[157,201],[256,375],[439,502],[600,725]],
+		     -name     =>'cds',
+		     -strand   => '-1',
+		     -subtype  =>'predicted',
+		     -type     =>'gene');
+
+$panel->add_track([$cds],
+		  -glyph    => 'cds',
+		  -label    => 'cds',
+		  -key      => 'cds',
+		  -bump     => 0,
+		  -height   =>30,
+		  -linewidth=>1,
+		  -frame0f => 'blue',
+		  -frame1f => 'green',
+		  -frame2f => 'yellow',
+		  -frame0r => 'red',
+		  -frame1r => 'black',
+		  -frame2r => 'purple',
+		  #  -sixframe => 1,
+		  -require_subparts=>1,
+		 );
+
+
+# crossbox
+my $crossbox = $ftr->new(-start=>200,-end=>600);
+$panel->add_track($crossbox,
+		  -glyph  => 'crossbox',
+		  -label  => 'crossbox',
+		  -key    => 'crossbox',
+		  -bump   => 0,
+		  -height => 20,
+		  # -font   => 'gdMediumBold',
+		  -linewidth =>1,
+		  -bgcolor => 'red',
+		  -fgcolor => 'black',
+		 );
+
+# diamond
+my $diamonds = $ftr->new(-segments=>[[10,11],[100,101],[201,202],[214,215],[237,238],
+				     [300,301],[350,351],[400,550],[601,602],[775,776]],
+			 -name=>'SNPs');
+$panel->add_track([$diamonds],
+		  -glyph   => 'diamond',
+		  -label   => 'diamond',
+		  -key     => 'diamond',
+		  -height  => 10,
+		  -bgcolor => 'aqua',
+		 );
+
+
+
+# dna
+my $string =
+'tcgtcaaatgtctattgggtcgaaaagaaggtgaacgagtgctcggtgatgcgttcaaaactcaacacaaatcttcacatttcgctccactagtcgactttatcgattttgattatcatgctcaaatgaagatttccaaagaggcaattgtgcagttgaaaaagaaaatgagcccacatatgacaaagcatggatttttctattcaatgggaaaagaaatagtgaaacgacaaactggagtaattcgaacaaattgtctagattgtctggataggacgaatgccgtacaaacagccatcggacttcaaatgtcacatgatcaagttgcatttctgaatttaaacgcgggaaaagtgaatgtagagcaacgagttgaagagattcttcgtgatttgtggcagaaaaatggagatcagtgtagtacgatctacgcgggaactggagctcttgacggaaagagcaagttgaaagacgcgtcgagatcgcttgcaagaactattcagaataatttgatggatggtgcaaagcaggaatcatttgatttatttttgactggagccgcatatgatccgaggcttttcgatagagcatgtaatatattgccacctagtttgatacaagaatacgctgacgccgtatcgcagcttgtcgagcgaagtcccgaaatcgccgaacctcaatccattaaaatattcgttggaacttggaatgtgaatggaggaaagaatattcataatgtggcattccgtaatgaatcgagtctctcccactggatatttgccaattcaatgacacgtctcgtatctgtagaagatgagcaactagctgatattgtagcaattggagttgaagaacttgttgatttgaatgcaagtaatatggttaaagcaagtaccacaaatcaacgaatgtggtgtgaaagtattcgaaaaactctttctgaaaaagctccatttgtgctcattggctccgagcagctcgtcggtgtttgtctattcctcttcgcaagaccacgtgtctcaccatacctgaaagactttgcagtggcttctgtaaagactggaatgggtggagcaactggaaataagggatccgttgccttccgaatcgtcgtattctccacttctatttgttttatttgttctcactttgcagccgggcaaaacgagattcgagacagaaatgaggattttgcgacgacgttgaaaaagattcgattcccgttgggcagagaaattgactcgcatgacgtcatattttggttgggagatttcaactatcgaattaatttgtcgggggatgaagttaagaatgctgttagaaatggagactatgcgaaattagtcgaaaatgatcaattgacacagcagaaagctcttggacagacatttgttggcttcaacgaaggacagctcacgttcgcaccaacatacaaatacgacacattcagtgatgactatgatacgagtgaaaagtgtcgtgcacccgcatggactgatcgaattctttggaaagatcagagaaagaagggaaaaacgcaacttctcagctatgatagatcagaattaaaaacttctgatcatcgacctgttggagctgttttcaaagtggaaacttttaaagttggcggcagaaaatgtgtggagctcatcgaggatgttgtagaatctatgggtccaccggacggaacaatcattgtcagtattgccggaaaacctcgattcccgccgcaaatgtttccgccgattcatgagaagttgaaggaactcggtgctcaagttcagctgagcaaattcgacgatggcgatctatggattgtactgaatagtggagaaatggcattagccgcattaagtatggatgggctgaagatcggaggaacagatcagattaatgtgaagttgaagtcaccggattgggcttatgctttgaagccacatctttcagattttgatttggaatcgtttgaagtgacggcagaggaagaggcattacttggtggtactgatggtgccgtttttgaatttgcagacgaagacgaggacgcaatcagtgtgtctagtctgacgcttactggttcggctcccgatcgacctcgtccaccatcagcaagaagtgaagcgatcagtgtagccaaacttgaatggccaacagaacaaccaaacgtcctctccacatcaatgccaacacgagcttcatcagcttctcttgccaatagttcttggtatgagcatgtaccaccacttgctccacctcaatcaaacaataataaaagccctccacaagcttgtctattcaatccattcactcaatctgcaccatccccggctccaccaccatccacgattcctcttccaccgactcgtggagcatcagttggaccaggtcctccagcggttcccgtcaggaaggcacccccaccgccacctcggcctgtcattccacctagaccaaaaaatatgtag';
+
+my $fragment = Bio::Seq->new(-seq=>$string);
+my $dna = $ftr->new(-seq=>$fragment,
+		    -start=>$start,-end=>$end);
+$panel->add_track($dna,
+		  -glyph    => 'dna',
+		  -label    => 'dna',
+		  -key      => 'dna',
+		  -height   => 50,
+		  -linewidth=> 1,
+		  -axis_color=>'red',
+		  -gc_bins  => 10,
+		  -strand   => 'both',
+		  );
+
+
+# dot
+my $dots = $ftr->new(-segments=>[[10,11],[100,150],[201,232],[214,215],[237,270],
+				     [280,281],[300,321],[400,550],[601,602],[775,776]]);
+$panel->add_track([$dots],
+		  -glyph   => 'dot',
+		  -label   => 'dot',
+		  -key     => 'dot',
+		  -height  => 10,
+		  -bgcolor => 'red',
+		  -point   => 5,
+		 );
+
+# ellipse
+my $ellipses = $ftr->new(-segments=>[[100,150],[201,232],[237,270],[300,321],[400,550],[730,776]]);
+$panel->add_track([$ellipses],
+		  -glyph   => 'ellipse',
+		  -label   => 'ellipse',
+		  -key     => 'ellipse',
+		  -height  => 10,
+		  -bgcolor => 'orange',
+		 );
+
+# ex
+my $ex = $ftr->new(-start=>100,-end=>400);
+$panel->add_track($ex,
+		  -glyph  => 'ex',
+		  -label  => 'ex',
+		  -key    => 'ex',
+		  -bump   => 0,
+		  -height => 20,
+		  # -font   => 'gdMediumBold',
+		  -linewidth =>1,
+		  -bgcolor => 'red',
+		  -fgcolor => 'black',
+		 );
+
+# graded_segments				
+my $partial_gene = $ftr->new(-segments=>[[1,50],[100,150],[220,300],
+					 [380,400],[520,600],[800,900]],
+			     -name   =>'partial_gene',
+			     -strand => '+1',
+			     -type   =>'exon',
+			     -source =>'confirmed');
+
+add_scores($partial_gene);
+$panel->add_track($partial_gene,
+		  -glyph     => 'graded_segments',
+		  -key       => 'graded_segments',
+		  -label     => 'graded_segments - quill connector',,
+		  -bgcolor   => 'blue',
+		  -connector => 'quill',
+		 );
+
+$panel->add_track($partial_gene,
+		  -glyph     => 'graded_segments',
+		  -label     => 'graded_segments - hat connector',
+		  -key       => 'graded_segments',
+		  -bgcolor   => 'green',
+		  -connector => 'hat',
+		 );
+
+$panel->add_track($partial_gene,
+		  -glyph     => 'graded_segments',
+		  -label     => 'graded_segments - solid connector',
+		  -key       => 'graded_segments',
+		  -bgcolor   => 'yellow',
+		  -connector => 'solid',
+		 );
+
+$panel->add_track($partial_gene,
+		  -glyph     => 'graded_segments',
+		  -label     => 'graded_segments - dashed connector',
+		  -key       => 'graded_segments',
+		  -bgcolor   => 'red',
+		  -connector => 'dashed',
+		 );
+
+# heterogenous_segments
+$panel->add_track([[$zk154_2,$zk154_3],[$zk154_2,$xyz4]],
+		  -glyph => 'heterogeneous_segments',
+		  -label => 'heterogeneous_segments',
+		  -key   => 'heterogeneous_segments',
+		  -height => 10,
+		  -bump => 1,
+		  -predicted_color=>'orange',
+		  -confirmed_color=>'purple',
+		  -mysterious_color=>'red',
+  		  -connector_color => 'black',
+		 );
+
+# line
+$panel->add_track($short_segment,
+		  -glyph  => 'line',
+		  -label  => 'line',
+		  -key    => 'line',
+		  -bump   => 0,
+		  -height => 20,
+		  # -font   => 'gdMediumBold',
+		  -linewidth =>1,
+		  -bgcolor => 'green',
+		  -fgcolor => 'black',
+		 );
+
+# pinsertion
+my $pinsertion = $ftr->new(-segments=>[[10,10],[100,100],[200,200],[300,300],[400,400],
+				       [550,600],[650,650]]);
+$panel->add_track([$pinsertion],
+		  -glyph   => 'pinsertion',
+		  -label   => 'pinsertion',
+		  -key     => 'pinsertion',
+		  -height  => 10,
+		  -bgcolor => 'yellow',
+		 );
+
+# primers
+my $p = $ftr->new(-start=>200,-end=>600);
+$panel->add_track($p,
+		  -glyph  => 'primers',
+		  -label  => 'primers',
+		  -key    => 'primers',
+		  -height => 10,
+		  -linewidth =>1,
+		 );
+
+# processed_transcript
+my $trans1 = $ftr->new(-start=>50,-end=>10,-name=>'ZK154.1',-type=>"3'-UTR");
+my $trans2 = $ftr->new(-start=>100,-end=>50,-name=>'ZK154.2',-type=>'CDS');
+my $trans3 = $ftr->new(-start=>350,-end=>225,-name=>'ZK154.3',-type=>'CDS');
+my $trans4 = $ftr->new(-start=>650,-end=>500,-name=>'ZK154.3',-type=>'CDS');
+my $trans5 = $ftr->new(-start=>700,-end=>650,-name=>'ZK154.3',-type=>"5'-UTR");
+my $trans  = $ftr->new(-segments=>[$trans1,$trans2,$trans3,$trans4,$trans5]);
+$panel->add_track($trans,
+		  -glyph     => 'processed_transcript',
+		  -key       => 'processed_transcript',
+		  -label     => 'processed_transcript',
+		  -bgcolor   => 'aqua',
+		  # -height    => 5,
+		 );
+
+$panel->add_track($trans,
+		  -glyph    => 'processed_transcript',
+		  -key      => 'processed_transcript',
+		  -label    => 'processed_transcript',
+		  -bgcolor  => 'green',
+		  # -height   => 10,
+		  -thin_utr => 1);
+
+
+# redgreen_box
+$panel->add_track($partial_gene,
+		  -glyph     => 'redgreen_box',
+		  -label     => 'redgreen_box',
+		  -key       => 'redgreen_box',
+		 );
+
+# redgreen_segments
+$panel->add_track($partial_gene,
+		  -glyph     => 'redgreen_segment',
+		  -label     => 'redgreen_segment',
+		  -key       => 'redgreen_segment',
+		 );
+
+# rndrect
+$panel->add_track($partial_gene,
+		  -glyph     => 'rndrect',
+		  -label     => 'rndrect',
+		  -key       => 'rndrect',
+		 );
+
+# ruler_arrow
+$panel->add_track($partial_gene,
+		  -glyph => 'ruler_arrow',
+		  -label => 1,
+		  -key   => 'ruler_arrow',
+		  -base  => 1,
+		 );
+
+$panel->add_track($partial_gene,
+		  -glyph => 'ruler_arrow',
+		  -label => 1,
+		  -key   => 'ruler_arrow',
+		  -base  => 1,
+		  -parallel => 0,
+		 );
+
+# segments
+$panel->add_track([$zk154_1,[$zk154_2,$xyz4]],
+		  -glyph => 'segments',
+		  -label => 'segments',
+		  -key   => 'segments',
+		  -height => 10,
+		  -bump => 1,
+		  -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'green' : 'blue'},
+		  -connector => sub { my $primary_tag = shift->primary_tag;
+				      $primary_tag eq 'transcript' ? 'hat'
+				    : $primary_tag eq 'alignment'  ? 'solid'
+				    : undef},
+		  -connector_color => 'black',
+		 # -draw_dna => 1,
+		 );
+
+# span
+my $big_span = $ftr->new(-start=>-400,-end=>3000);
+my $small_span = $ftr->new(-start=>290,-end=>600);
+$panel->add_track([$big_span,$small_span],
+		  -glyph => 'span',
+		  -label => 'span',
+		  -key   => 'span',
+		 );
+
+# splice_site
+$panel->add_track($partial_gene,
+		  -glyph => 'splice_site',
+		  -label => 'splice_site',
+		  -key   => 'splice_site',
+		  -direction => 'right',
+		 );
+
+# transcript
+$panel->add_track($trans,
+		  -glyph   => 'transcript',
+		  -label   => 'transcript',
+		  -key     => 'transcript',
+		  -bgcolor =>'yellow',
+		  -arrow_length=>10,
+		 );
+
+# transcript2
+$panel->add_track($trans,
+		  -glyph   => 'transcript2',
+		  -label   => 'transcript2',
+		  -key     => 'transcript2',
+		  -bgcolor => 'purple',
+		  -arrow_length=>10,
+		 );
+
+# translation
+$panel->add_track($dna,
+		  -glyph   => 'translation',
+		  -label   => 'translation',
+		  -key     => 'translation',
+		  -translation => '3frame',
+		  -frame0 => 'red',
+		  -frame1 => 'blue',
+		  -frame2 => 'green',
+		  -arrow_length => 10,
+		  -start_codons => 1,
+		  -show_sequence=> 1,
+		 );
+
+
+# triangle
+$panel->add_track([$pinsertion],
+		  -glyph   => 'triangle',
+		  -label   => 'triangle',
+		  -key     => 'triangle',
+		  -bgcolor => 'yellow',
+		  -point   => 1,
+		  -orient  => 'E',
+		 );
+
+
+# xyplot
+$panel->add_track($partial_gene,
+		  -glyph      => 'xyplot',
+		  -key        => 'xyplot',
+		  -label      => 'xyplot',
+		  -graph_type => 'boxes');
+
+
+my $gd   = $panel->gd;
+my $type = ($CLASS eq 'GD') ? 'png' : 'svg';
+print $gd->$type;
+
+
+
+
+
+
+sub add_scores {
+  my $ftr = shift;
+  my $score = 10;
+  my @segs = $ftr->segments;
+  foreach (@segs) {
+    $_->score($score);
+    $score += 10;
+  }
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biographics/all_glyphs.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/dynamic_glyphs.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/dynamic_glyphs.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/dynamic_glyphs.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+#!/usr/bin/perl
+
+use lib '.','../..','./blib/lib','../../blib/lib','../..';
+use strict;
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+
+chomp (my $PKG = shift);
+$PKG or die "\nUsage: lots_of_glyphs IMAGE_CLASS
+\t- where IMAGE_CLASS is one of GD or GD::SVG
+\t- GD generate png output; GD::SVG generates SVG.\n";
+
+my $ftr = 'Bio::Graphics::Feature';
+
+my $segment = $ftr->new(-start=>-100,-end=>1400,-name=>'ZK154',-type=>'clone');
+my $zk154_1 = $ftr->new(-start=>-50,-end=>800,-name=>'ZK154.1',-type=>'gene');
+my $zk154_2 = $ftr->new(-segments=>[[200,300],[380,800]],-name=>'ZK154.2',-type=>'gene');
+my $zk154_3 = $ftr->new(-start=>900,-end=>1200,-name=>'ZK154.3',-type=>'gene');
+
+my $zed_27 = $ftr->new(-segments=>[[550,600],[800,950],[1200,1300]],
+		   -name=>'zed-27',
+		   -subtype=>'exon',-type=>'transcript');
+my $abc3 = $ftr->new(-segments=>[[100,200],[350,400],[500,550]],
+		    -name=>'abc53',
+		     -strand => -1,
+		    -subtype=>'exon',-type=>'transcript');
+my $xyz4 = $ftr->new(-segments=>[[40,80],[100,120],[200,280],[300,320]],
+		     -name=>'xyz4',
+		     -subtype=>'predicted',-type=>'alignment');
+
+my $m3 = $ftr->new(-segments=>[[20,40],[30,60],[90,270],[290,300]],
+		   -name=>'M3',
+		   -subtype=>'predicted',-type=>'alignment');
+
+my $bigone = $ftr->new(-segments=>[[-200,-120],[90,270],[290,300]],
+		   -name=>'big one',
+		   -subtype=>'predicted',-type=>'alignment');
+
+my $fred_12 = $ftr->new(-segments=>[$xyz4,$zed_27],
+			-type => 'group',
+			-name =>'fred-12');
+
+my $confirmed_exon1 = $ftr->new(-start=>1,-stop=>20,
+				-type=>'exon',
+				-source=>'confirmed',
+				-name => 'confirmed1',
+			       );
+my $predicted_exon1 = $ftr->new(-start=>30,-stop=>50,
+				-type=>'exon',
+				-name=>'predicted1',
+				-source=>'predicted');
+my $predicted_exon2 = $ftr->new(-start=>60,-stop=>100,
+				-name=>'predicted2',
+				-type=>'exon',-source=>'predicted');
+
+my $confirmed_exon3 = $ftr->new(-start=>150,-stop=>190,
+				-type=>'exon',-source=>'confirmed',
+			       -name=>'abc123');
+my $partial_gene = $ftr->new(-segments=>[$confirmed_exon1,$predicted_exon1,$predicted_exon2,$confirmed_exon3],
+			     -name => 'partial gene',
+			     -type => 'transcript',
+			     -source => '(from a big annotation pipeline)'
+			    );
+my @segments = $partial_gene->segments;
+my $score = 10;
+foreach (@segments) {
+  $_->score($score);
+  $score += 10;
+}
+
+my $panel = Bio::Graphics::Panel->new(
+				      -gridcolor => 'lightcyan',
+				      -grid => 1,
+				      -segment => $segment,
+				      -spacing => 15,
+				      -width   => 600,
+				      -pad_top  => 20,
+				      -pad_bottom  => 20,
+				      -pad_left => 20,
+				      -pad_right=> 20,
+				      -key_style => 'between',
+         		 	      -image_class=> $PKG,
+				     );
+my @colors = $panel->color_names();
+
+my $t = $panel->add_track(
+			  transcript2 => [$abc3,$zed_27],
+			  -label => 1,
+			  -bump => 1,
+			  -key => 'Prophecies',
+			  #		  -tkcolor => $colors[rand @colors],
+			 );
+$t->configure(-bump=>1);
+$panel->add_track($segment,
+		  -glyph => 'arrow',
+		  -label => sub {scalar localtime},
+#		  -labelfont => 'gdMediumBoldFont',
+		  -double => 1,
+		  -bump => 0,
+		  -height => 10,
+		  -arrowstyle=>'regular',
+		  -linewidth=>1,
+		  -tick => 2,
+		 );
+
+$panel->add_track(generic => [$segment,$abc3,$zk154_1,[$zk154_2,$xyz4]],
+		  -label     => sub { $_[-1]->level == 0 } ,
+		  -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'green' : 'blue'},
+		  -connector => sub { my $primary_tag = shift->primary_tag;
+				      $primary_tag eq 'transcript' ? 'hat'
+				    : $primary_tag eq 'alignment'  ? 'solid'
+				    : 'solid'},
+		  -connector_color => 'black',
+		  -height => 10,
+		  -bump => 1,
+#		  -tkcolor => $colors[rand @colors],
+		  -key => 'Signals',
+		 );
+
+my $track = $panel->add_track('transcript2'=> [$bigone],
+			      -label   => 1,
+			      -connector => 'solid',
+			      -point  => 0,
+			      -orient => 'N',
+			      -height => 8,
+			      -base => 1,
+			      -relative_coords => 1,
+			      -tick  => 2,
+			      -bgcolor => 'red',
+			      -key     => 'Dynamically Added');
+#$track->add_feature($bigone,$zed_27,$abc3);
+#$track->add_group($predicted_exon1,$predicted_exon2,$confirmed_exon3);
+$track->add_group($bigone,$zed_27,$zk154_2,$bigone);
+
+my $gd    = $panel->gd;
+my @boxes = $panel->boxes;
+my $red   = $panel->translate_color('red');
+for my $box (@boxes) {
+  my ($feature, at points) = @$box;
+}
+my $type = ($PKG eq 'GD') ? 'png' : 'svg';
+print $gd->$type;


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biographics/dynamic_glyphs.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.gff
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.gff	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.gff	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,27 @@
+# this is a regular GFF-format file
+
+[intron:curated]
+glyph = segments
+description = 1
+bgcolor = green
+height = 5
+key = Curated Introns
+
+[structural:GenePair_STS]
+glyph = primers
+fgcolor = black
+bgcolor = blue
+connect = 1
+
+III	curated	Sequence	2729913	2752540	.	+	.	Sequence "H19M22.2a"
+III	curated	Sequence	2729913	2752540	.	+	.	Sequence "H19M22.2b"
+III	Expr_profile	Expression	2731006	2732545	.	+	.	Expr_profile "H19M22.2"
+III	GenePair_STS	structural	2731006	2732545	.	+	.	PCR_product "sjj_H19M22.2"
+III	curated	Sequence	2748310	2752540	.	+	.	Sequence "H19M22.2c"
+III	curated	intron	2730004	2730635	.	+	.	Sequence "H19M22.2b" ; Confirmed_by_EST
+III	curated	intron	2730004	2730635	.	+	.	Sequence "H19M22.2a" ; Confirmed_by_EST
+III	curated	intron	2730705	2730846	.	+	.	Sequence "H19M22.2a" ; Confirmed_by_EST
+III	curated	intron	2730705	2730846	.	+	.	Sequence "H19M22.2b" ; Confirmed_by_EST
+III	curated	intron	2731102	2731151	.	+	.	Sequence "H19M22.2a" ; Confirmed_by_EST
+III	curated	intron	2731102	2731151	.	+	.	Sequence "H19M22.2b" ; Confirmed_by_EST
+III	curated	intron	2731541	2732220	.	+	.	Sequence "H19M22.2b" ; Confirmed_by_cDNA

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/feature_data.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+[general]
+pixels = 750
+bases = -1000..21000
+height = 12
+reference = B0511
+
+[Cosmid]
+glyph = segments
+fgcolor = blue
+key = C. elegans conserved regions
+
+[EST]
+glyph = segments
+bgcolor= yellow
+connector = solid
+height = 5
+
+[FGENESH]
+glyph = transcript2
+bgcolor = green
+description = 1
+
+[SwissProt]
+glyph = arrow
+base  = 1
+linewidth = 2
+fgcolor = red
+description = 1
+
+[P-element]
+glyph = triangle
+orient = S
+bgcolor = red
+fgcolor = white
+label = 1
+point = 1
+
+Cosmid	B0511	516-619
+Cosmid	B0511	3185-3294
+Cosmid	B0511	10946-11208
+Cosmid	B0511	13126-13511
+Cosmid	B0511	11394-11539
+Cosmid	B0511	14383-14490
+Cosmid	B0511	15569-15755
+Cosmid	B0511	18879-19178
+Cosmid	B0511	15850-16110
+Cosmid	B0511	66-208
+Cosmid	B0511	6354-6499
+Cosmid	B0511	13955-14115
+Cosmid	B0511	7985-8042
+Cosmid	B0511	11916-12046
+P-element	""	500-500
+P-element	MrQ	700-700
+P-element	MrR	10000-10000
+EST	yk260e10.5	15569-15724
+EST	yk672a12.5	537-618,3187-3294
+EST	yk595e6.5	552-618
+EST	yk595e6.5	3187-3294
+EST	yk846e07.3	11015-11208
+EST	yk53c10
+	yk53c10.3	12876-13577,13882-14121,14169-14535
+	yk53c10.5	18892-19154,15853-16219
+SwissProt	"PECANEX Protein"	5513-16656	"From SwissProt"
+FGENESH	"Predicted gene 1"	-1200--500,518-616,661-735,3187-3365,3436-3846	Pfam domain
+FGENESH	"Predicted gene 2"	5513-6497,7968-8136,8278-8383,8651-8839,9462-9515,10032-10705,10949-11340,11387-11524,11765-12067,12876-13577,13882-14121,14169-14535,15006-15209,15259-15462,15513-15753,15853-16219	Mysterious
+FGENESH	"Predicted gene 3"	16626-17396,17451-17597
+FGENESH	"Predicted gene 4"	18459-18722,18882-19176,19221-19513,19572-30000	"Transmembrane protein"
+

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/lots_of_glyphs.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/lots_of_glyphs.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/lots_of_glyphs.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,201 @@
+#!/usr/bin/perl
+
+use lib '.','../..','./blib/lib','../../blib/lib','../..';
+use strict;
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+
+chomp (my $CLASS = shift);
+$CLASS or die "\nUsage: lots_of_glyphs IMAGE_CLASS
+\t- where IMAGE_CLASS is one of GD or GD::SVG
+\t- GD generate png output; GD::SVG generates SVG.\n";
+
+my $ftr = 'Bio::Graphics::Feature';
+my $segment = $ftr->new(-start=>-100,-end=>1000,-name=>'ZK154',-type=>'clone');
+my $zk154_1 = $ftr->new(-start=>-50,-end=>800,-name=>'ZK154.1',-type=>'gene');
+my $zk154_2 = $ftr->new(-start=>380,-end=>500,-name=>'ZK154.2',-type=>'gene');
+my $zk154_3 = $ftr->new(-start=>900,-end=>1200,-name=>'ZK154.3',-type=>'gene');
+
+my $zed_27 = $ftr->new(-segments=>[[400,500],[550,600],[800,950]],
+		       -name=>'zed-27',
+		       -subtype=>'exon',-type=>'gene');
+my $abc3 = $ftr->new(-segments=>[[100,200],[350,400],[500,550]],
+		     -name=>'abc53',
+		     -strand => -1,
+		     -subtype=>'exon',-type=>'gene');
+my $xyz4 = $ftr->new(-segments=>[[40,80],[100,120],[200,280],[300,320]],
+		     -name=>'xyz4',
+		     -subtype=>'predicted',-type=>'alignment');
+
+my $m3 = $ftr->new(-segments=>[[20,40],[30,60],[90,270],[290,300]],
+		   -name=>'M3',
+		   -subtype=>'predicted',-type=>'alignment');
+
+my $bigone = $ftr->new(-segments=>[[-200,-120],[90,270],[290,300]],
+		       -name=>'big one',
+		       -subtype=>'predicted',-type=>'gene');
+
+my $fred_12 = $ftr->new(-segments=>[$xyz4,$zed_27],
+			-type => 'group',
+			-name =>'fred-12');
+
+my $confirmed_exon1 = $ftr->new(-start=>1,-stop=>20,
+				-type=>'exon',
+				-source=>'confirmed',
+				-name => 'confirmed1',
+			       );
+my $predicted_exon1 = $ftr->new(-start=>30,-stop=>50,
+				-type=>'exon',
+				-name=>'predicted1',
+				-source=>'predicted');
+my $predicted_exon2 = $ftr->new(-start=>60,-stop=>100,
+				-name=>'predicted2',
+				-type=>'exon',-source=>'predicted');
+
+my $confirmed_exon3 = $ftr->new(-start=>150,-stop=>190,
+				-type=>'exon',-source=>'confirmed',
+				-name=>'abc123');
+my $partial_gene = $ftr->new(-segments=>[$confirmed_exon1,$predicted_exon1,$predicted_exon2,$confirmed_exon3],
+			     -name => 'partial gene',
+			     -type => 'transcript',
+			     -source => '(from a big annotation pipeline)'
+			    );
+my @segments = $partial_gene->segments;
+my $score = 10;
+foreach (@segments) {
+  $_->score($score);
+  $score += 10;
+}
+
+my $panel = Bio::Graphics::Panel->new(
+#				      -grid => [50,100,150,200,250,300,310,320,330],
+				      -gridcolor => 'lightcyan',
+				      -grid => 1,
+				      -segment => $segment,
+#				      -offset => 300,
+#				      -length  => 1000,
+				      -spacing => 15,
+				      -width   => 600,
+				      -pad_top  => 20,
+				      -pad_bottom  => 20,
+				      -pad_left => 20,
+				      -pad_right=> 20,
+#				      -bgcolor => 'teal',
+#				      -key_style => 'between',
+				      -key_style => 'bottom',
+				      -image_class => $CLASS,
+				     );
+my @colors = $panel->color_names();
+
+my $t = $panel->add_track(
+			  #		  generic => [$abc3,$zed_27],
+			  transcript2 => [$abc3,$zed_27],
+			  -label => 1,
+			  -bump => 1,
+			  -key => 'Prophecies',
+			  #		  -tkcolor => $colors[rand @colors],
+			 );
+$t->configure(-bump=>1);
+$panel->add_track($segment,
+		  -glyph => 'arrow',
+		  -label => 'base pairs',
+		  -double => 1,
+		  -bump => 0,
+		  -height => 10,
+		  -arrowstyle=>'regular',
+		  -linewidth=>1,
+		  #		  -tkcolor => $colors[rand @colors],
+		  -tick => 2,
+		 );
+$panel->unshift_track(generic => [$segment,$zk154_1,$zk154_2,$zk154_3,[$xyz4,$zed_27]],
+		      -label     => sub { my $feature = shift; $feature->sub_SeqFeature>0},
+		      -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'olive' : 'red'},
+		      -connector => sub { my $feature = shift;
+					  my $type = $feature->primary_tag;
+					  $type eq 'group'      ? 'dashed'
+					    : $type eq 'transcript' ? 'hat'
+					      : $type eq 'alignment'  ? 'solid'
+						: undef},
+		      -all_callbacks => 1,
+		      -connector_color => 'black',
+		      -height => 10,
+		      -bump => 1,
+		      -linewidth=>2,
+		      #		  -tkcolor => $colors[rand @colors],
+		      -key => 'Signs',
+		     );
+
+my $track = $panel->add_track(-glyph=> sub { shift->primary_tag eq 'gene' ? 'transcript2': 'generic'},
+			      -label   => sub { $_[-1]->level == 0 } ,
+			      -connector => sub { return shift->type eq 'group' ? 'dashed' : ''},
+			      -point  => 0,
+			      -orient => 'N',
+			      -height => 8,
+			      -base => 1,
+			      -relative_coords => 1,
+			      -tick  => 2,
+			      -all_callbacks => 1,
+			      -bgcolor => 'red',
+			      -key     => 'Dynamically Added');
+$track->add_feature($bigone,$zed_27,$abc3);
+$track->add_group($predicted_exon1,$predicted_exon2,$confirmed_exon3);
+
+$panel->add_track(
+		  [$abc3,$zed_27,$partial_gene],
+		  -bgcolor   => sub { shift->source_tag eq 'predicted' ? 'green' : 'blue'},
+		  -glyph   => 'transcript',
+#		  -glyph   => sub { my $feature = shift; 
+#				    return $feature->source_tag eq 'predicted'
+#				      ? 'ellipse' : 'transcript'},
+		  -label       => sub { shift->sub_SeqFeature > 0 },
+#		  -label       => 1,
+#		  -description => sub { shift->sub_SeqFeature > 0 },
+		  -description => sub {
+		    my $feature = shift;
+		    return 1   if $feature->primary_tag eq 'transcript';
+		    return '*' if $feature->source_tag eq 'predicted';
+		    return;
+		  },
+		  -font2color  => 'red',
+		  -bump => +1,
+#		  -tkcolor => $colors[rand @colors],
+		  -key => 'Portents',
+		 );
+$panel->add_track(segments => [$segment,$zk154_1,[$zk154_2,$xyz4]],
+		  -label     => 1,
+		  -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'green' : 'blue'},
+		  -connector => sub { my $primary_tag = shift->primary_tag;
+				      $primary_tag eq 'transcript' ? 'hat'
+				    : $primary_tag eq 'alignment'  ? 'solid'
+				    : undef},
+		  -connector_color => 'black',
+		  -height => 10,
+		  -bump => 1,
+#		  -tkcolor => $colors[rand @colors],
+		  -key => 'Signals',
+		 );
+$panel->add_track(generic => [],
+		  -key => 'Foobar');
+
+$panel->add_track(graded_segments => $partial_gene,
+		  -bgcolor =>'blue',
+		  -label   => 1,
+		  -key     => 'Scored thing');
+
+$panel->add_track(diamond => [$segment,$zk154_1,$zk154_2,$zk154_3,$xyz4,$zed_27],
+		  -bgcolor =>'blue',
+		  -label   => 1,
+		  -key     => 'pointy thing');
+
+my $gd    = $panel->gd;
+my @boxes = $panel->boxes;
+my $red   = $panel->translate_color('red');
+for my $box (@boxes) {
+  my ($feature, at points) = @$box;
+#  $gd->rectangle(@points,$red);
+}
+#$gd->filledRectangle(0,0,20,200,1);
+#$gd->filledRectangle(600-20,0,600,200,1);
+my $type = ($CLASS eq 'GD') ? 'png' : 'svg';
+print $gd->$type;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biographics/lots_of_glyphs.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/biographics/render_sequence.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/biographics/render_sequence.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/biographics/render_sequence.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,125 @@
+#!/usr/bin/perl
+
+use strict;
+use lib '.','../blib/lib';
+use Bio::DB::BioFetch;
+use Bio::Graphics;
+
+my $accession = shift;
+if (!defined $accession || $accession =~ /^-/) { die <<END; }
+Usage: $0 <accession> [start] [stop]
+   Render a GenBank/EMBL accession into drawable form.
+   Return as a GIF or PNG image on standard output.
+
+   If start and stop are specified, then that segment
+   will be displayed.
+
+   To view, pipe to a viewer program as shown below.
+
+Example to try:
+   render_sequence.pl CEF58D5 | display -
+
+By default, will look for accession in the "embl" namespace.  To
+choose other namespaces, use these formats:
+
+  swall:CEF58D5
+  refseq:NC_001320
+
+END
+;
+
+my ($start,$stop) = @ARGV;
+
+my $db = 'embl';
+if ($accession =~ /^(embl|swall|refseq):(.+)/) {
+  $db        = $1;
+  $accession = $2;
+}
+
+my $bf = eval {require Bio::DB::FileCache}
+  ? Bio::DB::FileCache->new(-seqdb=>Bio::DB::BioFetch->new(-db=>$db),
+			    -file =>'/usr/tmp/my_seq_cache',
+			    -keep =>1)
+  : Bio::DB::BioFetch->new(-db=>$db);
+
+warn "fetching...\n";
+my $seq = $bf->get_Seq_by_id($accession);
+
+my @features = $seq->all_SeqFeatures;
+my @CDS      = grep {$_->primary_tag eq 'CDS'}  @features;
+my @gene     = grep {$_->primary_tag eq 'gene'} @features;
+my @tRNAs    = grep {$_->primary_tag eq 'tRNA'} @features;
+
+warn "rendering...\n";
+$start = 1 unless defined $start;
+$stop  = $seq->length   unless defined $stop;
+
+my $panel = Bio::Graphics::Panel->new(
+				      -offset  => $start,
+				      -length  => $stop - $start + 1,
+				      -width   => 1000,
+				      );
+$panel->add_track(arrow => 
+		  Bio::Graphics::Feature->new(-start => $start,
+					      -stop   => $stop,
+					      -name   => $seq->display_id),
+		  -bump => 0,
+		  -double=>1,
+		  -tick => 2);
+
+$panel->add_track(transcript2  => \@gene,
+		  -bgcolor    =>  'blue',
+		  -fgcolor    =>  'black',
+		  -key        => 'Genes',
+		  -bump       =>  +1,
+		  -height     =>  10,
+		  -label      => \&gene_label,
+		  -description=> \&gene_description,
+		 );
+
+$panel->add_track(transcript2  => \@CDS,
+		  -bgcolor    =>  'cyan',
+		  -fgcolor    =>  'black',
+		  -key        => 'CDS',
+		  -bump       =>  +1,
+		  -height     =>  10,
+		  -label      => \&gene_label,
+		  -description=> \&gene_description,
+		 );
+
+$panel->add_track(generic    => \@tRNAs,
+		  -bgcolor   =>  'red',
+		  -fgcolor   =>  'black',
+		  -key       => 'tRNAs',
+		  -bump      =>  +1,
+		  -height    =>  8,
+		  -label      => \&gene_label,
+		 );
+
+my $gd = $panel->gd;
+
+print $gd->can('png') ? $gd->png : $gd->gif;
+
+sub gene_label {
+  my $feature = shift;
+  my @notes;
+  foreach (qw(product gene)) {
+      next unless $feature->can('has_tag') && $feature->has_tag($_);
+      @notes = $feature->each_tag_value($_);
+      last;
+  }
+  $notes[0];
+}
+sub gene_description {
+  my $feature = shift;
+  my @notes;
+  
+  foreach (qw(note)) {
+      next unless $feature->can('has_tag') && $feature->has_tag($_);
+      @notes = $feature->each_tag_value($_);
+      last;
+  }
+  return unless @notes;
+  substr($notes[0],30) = '...' if length $notes[0] > 30;
+  $notes[0];
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/biographics/render_sequence.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/bioperl.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/bioperl.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/bioperl.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,654 @@
+#!/usr/bin/perl
+
+# bioperl.pl
+# cjm at fruitfly.org
+
+use strict;
+use lib '.';
+no strict "vars";
+use Data::Dumper;
+use Bio::Perl;
+use Bio::SeqIO;
+use Getopt::Long;
+my $h = {};
+
+GetOptions($h,
+           "file|f=s",
+           );
+my @cmds = get_default_cmds();
+shell($h, \@cmds, @ARGV);
+
+# prepare for some seriously hacky code....
+sub shell {
+    my $h = shift;
+    my @cmds = @{shift || []};
+    my @args = @_;
+    my $prompt = $ENV{BIOPERL_PROMPT} || "BioPerl> ";
+    my $quit = 0;
+    my @lines = ();
+    my $r;
+    my $rv;
+    my $seq;
+    my @pseqs = ();
+    my $seqio;
+    my $wseqio;
+    my $fastadb;
+    my $options =
+      {echo=>0, chatty=>10};
+
+    my $loadfn = $h->{'file'};
+    if ($loadfn) {
+        @lines = ("load '$loadfn'");
+    }
+
+    sub hr {
+	print "\n===============================\n";
+    }
+
+    sub nl {
+	print "\n";
+    }
+
+    sub demo {
+        if (! -d 't/data') {
+            print "To run the demo, you must be in the bioperl directory\n";
+        }
+        @lines = 
+          split(/\n/,
+                q[
+                  %keep = %$options;
+                  +format ''
+                  +outformat ''
+                  +echo 1
+                  # BioPerl shell utility - Demo
+                  #
+                  # We're now going to take a tour
+                  # through some of the features of
+                  # this tool.
+                  #
+                  #
+                  # This demo will go through some of
+                  # the major commands, feeding you
+                  # the commands as you go. all you have
+                  # to do is hit <ENTER> every time
+                  # you see the prompt $prompt
+                  # you will then see the output of
+                  # the command on your terminal window.
+                  # type 'q' to end the tour
+                  # at any time.
+                  #
+                  waitenter
+                  # PARSING GENBANK RECORDS
+                  # -----------------------
+                  # to parse genbank files, use
+                  # the read_seq() method, or
+                  # simply use the '<' command.
+                  #
+                  # First of all we're going to
+                  # take a look at the file
+                  # 't/data/test.genbank'
+                  # Let's examine the file itself
+                  # using the unix command "cat"
+                  # (you can use any unix command
+                  #  using the ! at the beginning
+                  #  of a line)
+                  ^!cat t/data/test.genbank
+                  waitenter
+                  # Ok, you can see this is a
+                  # typical file of genbank records.
+                  # Let's get the first sequence
+                  # from the file
+                  ^<t/data/test.genbank
+                  waitenter
+                  # we have parsed the first
+                  # record of the file, and placed
+                  # the sequence object into
+                  # the variable $seq
+                  #
+                  # if you are familiar with perl
+                  # objects and the bioperl object
+                  # model, you can interact with
+                  # the object; for instance, to
+                  # display the residues we use the
+                  # seq() method like this:
+                  ^print $seq->seq()
+                  waitenter
+                  #
+                  # we can cycle through all the
+                  # sequences in the file using
+                  # the ',' command.
+                  ^,
+                  waitenter
+                  # this fetched the second sequence
+                  # and placed it in the $seq variable
+                  #
+                  # we can change the output format
+                  # by setting the 'outformat' parameter
+                  # like this:
+                  ^+outformat fasta
+                  ^,
+                  waitenter
+                  # now the sequences are output in
+                  # fasta format
+                  # to change to embl format:
+                  ^+outformat embl
+                  ^,
+                  waitenter
+                  # we can also fetch _all_ seqs from
+                  # a file; for this example we will
+                  # use t/data/swiss.dat, which is in
+                  # swiss format. usually bioperl can guess
+                  # the file format from the file extension
+                  # but this isn't possible here, so we
+                  # must help by setting the input format:
+                  ^+format swiss
+                  # now lets get all the sequences, like this:
+                  ^<*t/data/swiss.dat
+                  waitenter
+                  # typing <* is equivalent to
+                  # using the read_seqs() function,
+                  # like this:
+                  ^read_seqs('t/data/swiss.dat')
+                  waitenter
+                  # we now have all the sequences in
+                  # the array @seqs
+                  # we can write these all out as fasta
+                  ^+outformat fasta
+                  ^>*
+                  # we can also write these out to a file:
+                  ^>*myfile.tmp
+                  ^!cat myfile.tmp
+                  #
+                  # RANDOM ACCESS OF FASTA FILES
+                  # END
+                  +echo 0
+                  %$options = %keep
+                 ]);
+
+        @lines = 
+          map {
+              s/^ *//;
+              $_;
+          } @lines;
+    }
+
+    sub error {
+        if ($error) {
+            print "Error:\n$error";
+        }
+        else {
+            print "No errors have been reported\n";
+        }
+    }
+
+    sub fmt {
+        $options->{format} = shift if @_;
+        print "format=$options->{format}\n";
+    }
+
+    # should this move to Bio::Perl ?
+    sub seqio {
+        my $filename = shift;
+        $options->{format} = shift if @_;
+
+        if( !defined $filename ) {
+            warn "read_sequence($filename) - usage incorrect";
+        }
+
+        if( defined $options->{format} ) {
+            $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $options->{format});
+        } else {
+            $seqio = Bio::SeqIO->new( '-file' => $filename);
+        }
+
+        $seqio;
+    }
+
+    sub wseqio {
+        my $filename = shift;
+        $options->{format} = shift if @_;
+
+        my @args = ();
+        if ($filename && $filename !~ /^\>/) {
+            $filename = ">$filename";
+        }
+        push(@args, -file => "$filename") if $filename;
+        push(@args, -fh => \*STDOUT) unless $filename;
+        push(@args, -format => $options->{outformat}) if $options->{outformat};
+        $wseqio = Bio::SeqIO->new( @args );
+
+        $wseqio;
+    }
+
+    sub show_seq {
+        return unless $seq;
+        if ($wseqio) {
+            $wseqio->write_seq($seq);
+        }
+        else {
+            printf "seq display id: %s\n", $seq->display_id;
+        }
+    }
+
+    sub addseq {
+        push(@pseqs, @_);
+        while (scalar(@pseqs) > 50) {
+            # todo - history variable
+            shift @pseqs;
+        }
+    }
+
+    sub next_seq {
+        if ($seqio) {
+            eval {
+                $seq = $seqio->next_seq;
+            };
+            if ($@) {
+                $error = $@;
+                print "There was an error getting the seq. Type 'error'\n";
+                print "for full details\n";
+                print "(Maybe you have to explicitly set the format?)";
+            }
+            addseq($seq);
+        }
+        else {
+            print STDERR "use read_seq first\n";
+        }
+        show_seq;
+        $seq;
+    }
+
+    sub next_seqs {
+        @seqs = ();
+        if ($seqio) {
+            while ($seq = $seqio->next_seq) {
+                printf "%s\n", $seq->display_id;
+                push(@seqs, $seq);
+            }
+        }
+        $seq = $seqs[$#seqs] if @seqs;
+        @seqs
+    }
+
+    sub read_seq {
+        seqio(@_);
+        next_seq();
+    }
+
+    sub read_seqs {
+        seqio(@_);
+        next_seqs();
+    }
+
+    sub write_seq {
+        wseqio(@_);
+        $wseqio->write_seq($seq) if $seq;
+    }
+
+    sub write_seqs {
+        wseqio(@_);
+        map {
+            $wseqio->write_seq($_)
+        } @seqs;
+    }
+
+    sub pod {
+        if (!-d "Bio") {
+            print "You need to be in the bioperl directory!\n";
+        }
+        else {
+            my $mod = shift;
+            unix("pod2text", "Bio/$mod.pm");
+        }
+    }
+    sub fastadb {
+        require "Bio/DB/Fasta.pm";
+        my $f = shift;
+        $fastadb = Bio::DB::Fasta->new($f);
+        print "Set \$fastadb object\n";
+        $fastadb;
+    }
+
+    sub subseq {
+        if (!$fastadb) {
+            fastadb(shift);
+        }
+        $seq = $fastadb->get_Seq_by_id(shift);
+        if (@_) {
+            printf "%s\n",
+              $seq->subseq(@_);
+        }
+        $seq;
+    }
+
+    sub load {
+        open(F, shift);
+        @lines = map {chomp;$_} <F>;
+        close(F);
+    }
+
+    sub waitenter {
+        print "<hit ENTER to continue>";
+        <STDIN>;
+    }
+
+    sub showintro {
+        hr;
+        print "This is a text-based commandline interface to BioPerl;\n";
+        print "\n";
+    }
+
+    sub checkoptions {
+    }
+
+    sub showoptions {
+        my $k = shift;
+        my @k = defined $k ? ($k) : keys %$options;
+        foreach my $ok ($k) {
+            my $v = sprintf("%s", $options->{$k});
+            if ($v =~ /HASH/) {
+                # hide perl internal details
+                # from user; if they are experienced
+                # perlhackers they can just
+                # type "x $options" to see the
+                # gory details
+                $v = "undisplayable";
+            }
+            printf("%20s:%s\n",
+                   $ok,
+                   $b);
+        }
+    }
+
+    sub set {
+        my ($k,$v) = @_;
+        if (defined($v)) {
+            $options->{$k} = $v;
+            checkoptions;
+        }
+        else {
+            showoptions($k);
+        }
+#        if ($k eq "format") {
+#            seqio;
+#        }
+        if ($k eq "outformat") {
+            wseqio;
+        }
+    }
+
+    sub echo {
+        my $e = shift;
+        if (defined($e)) {
+            set("echo", $e);
+        }
+        else {
+            set("echo", !$options->{echo});
+        }
+    }
+
+    sub options {
+        map {print "$_ = $options->{$_}\n"} keys%$options;
+    }
+
+    sub showcommands {
+        hr;
+        print "BioPerl Shell Commands:\n";
+        my $layout = "%5s : %-20s - %s\n";
+        printf $layout, "cmd", "function", "summary";
+        printf "%s\n", ("-" x 40);
+        foreach my $c (@cmds) {
+            my $sc = $c->{shortcut};
+            $sc =~ s/\\//g;
+            printf($layout,
+                   $sc,
+                   $c->{'func'} . "()",
+                   $c->{'summary'}
+                   );
+        }
+        
+    }
+
+    sub showexamples {
+        print "\nExamples:\n-------\n";
+    }
+
+    sub showvariables {
+        hr;
+        print "Shell variables:\n";
+        print q[
+                $seq     : Bio::SeqI object
+                $seqio   : Bio::SeqIO object
+                @pseqs   : array of previous Bio::SeqI objects
+               ];
+        nl;
+    }
+
+    sub welcome {
+	print "Welcome to the BioPerl shell interface!\n\n";
+        print "\n\nType 'help' for instructions\n";
+        print "\n\nType 'demo' for demonstration\n";
+        print "\n\nThis is ALPHA software - commands may change\n";
+        print "-lots more commands need to be added to take full\n";
+        print "advantage of the bioperl functionality\n\n";
+    }
+
+    sub help {
+        my $topic = shift;
+        my $c;
+        if ($topic) {
+            ($c) = grep {$_->{func} eq $topic} @cmds;
+        }
+        if ($c) {
+            print "Function:   $c->{func}\n";
+            print "Shortcut:   $c->{shortcut}\n" if $c->{shortcut};
+            print "Summary:    $c->{summary}\n" if $c->{summary};
+            print "=======\n";
+            print "$c->{docs}\n" if $c->{docs};
+        }
+        elsif ($topic eq "advanced") {
+            hr;
+            nl;
+        }
+        else {
+            hr;
+            print "\nBioPerl Shell Help\n\n";
+            showintro;
+            waitenter;
+            showcommands;
+            waitenter;
+            showvariables;
+            waitenter;
+            showexamples;
+            nl;
+            nl;
+            nl;
+            print "Type \"demo\" for an interactive demo of commands\n\n";
+            print "Type \"help advanced\" for advanced options\n\n";
+            hr;
+            nl;
+        }
+    }
+
+    sub p {
+	print shift;
+	print "\n";
+    }
+
+    sub x {
+	print Dumper shift;
+	print "\n";
+    }
+
+    # trick to allow barewords as keywords...
+    sub advanced {"advanced"}
+
+    sub unix {
+        my @cmds = @_;
+        my $c = join(" ", @cmds);
+        print `$c`;
+    }
+
+    welcome;
+    require Term::ReadLine;
+    require Shell;
+
+    checkoptions;
+    print "\n";
+    my $termline = shift || Term::ReadLine->new($prompt);
+
+    my $rcfile = "$ENV{HOME}/.goshellrc";
+    if (-f $rcfile) {
+	open(F, $rcfile);
+	@lines = <F>;
+	close(F);
+	
+    }
+    my $end_signal = "";
+    while (!$quit) {
+	if ($end_signal) {
+	    @lines = ($lines);
+	    while ($end_signal && ($line = $termline->readline("? "))) {
+		if($line !~ /$end_signal/) {
+		    $lines[0].= "\n$line";
+		}
+		else {
+		    $end_signal = "";
+		}
+	    }
+	    next;
+	}
+	my $line = 
+	  @lines ? shift @lines : $termline->readline($prompt);
+        if ($line =~ /^\^/) {
+            $line =~ s/^\^//;
+            print "$prompt$line";
+            my $e = <STDIN>;
+            if ($e =~ /^q/) {
+                $line = "";
+                @lines = ();
+            }
+        }
+        if ($options->{echo} && $line !~ /\+?wait/) {
+            if ($line =~ /^\#/) {
+                print "$line\n";
+            }
+            else {
+                print "$prompt$line\n";
+            }
+            if ($options->{sleep}) {
+                sleep $options->{sleep};
+            }
+            if ($options->{wait}) {
+                sleep $options->{wait};
+            }
+        }
+	my ($cmd, @w) = split(' ',$line);
+
+	$_ = $cmd;
+	if (/^\<\<(.*)/) {
+	    $end_signal = $1;
+	}
+
+        # check for shortcuts
+        my $selected;
+        foreach my $c (@cmds) {
+            my $shortcut = $c->{'shortcut'};
+            next unless $shortcut;
+            if ($line =~ /^$shortcut(.*)/) {
+                if (!defined($selected) ||
+                    length($shortcut) > length($selected->{shortcut} || "")) {
+                    # get the most specific match
+                    $selected = $c;
+                }
+            }
+        }
+        if ($selected) {
+            my $shortcut = $selected->{'shortcut'};
+            if ($line =~ /^$shortcut(.*)/) {
+                my @w = map {"'".$_."'" } split(' ', $1);
+                $line = $selected->{'func'}." ".join(", ", @w);
+            }
+        }
+
+	$rv = eval $line;
+#        print "\n";
+#        print "RV=$rv;;;\n";
+	if ($@) {
+	    print STDERR $@;
+	}
+        if ($options->{sleep}) {
+            sleep $options->{sleep};
+        }
+        if ($options->{wait}) {
+            sleep $options->{wait};
+            $options->{wait} = 0;
+        }
+
+    }
+}
+
+sub get_default_cmds {
+    my @cmds =
+      (
+       {
+        func         =>  'read_seq',
+        shortcut     =>  '\<',
+        summary      =>  'read a Seq from a file', 
+       },
+
+       {
+        func         =>  'next_seq',
+        shortcut     =>  ',',
+        summary      =>  'get the next Seq', 
+       },
+
+       {
+        func         =>  'read_seqs',
+        shortcut     =>  '\<\*',
+        summary      =>  'read all Seqs from a file', 
+       },
+
+       {
+        func         =>  'write_seq',
+        shortcut     =>  '\>',
+        summary      =>  'write a Seq to screen/file', 
+       },
+
+       {
+        func         =>  'write_seqs',
+        shortcut     =>  '\>\*',
+        summary      =>  'write a Seq to screen/file', 
+       },
+
+       {
+        func         =>  'fastadb',
+        shortcut     =>  'fa',
+        summary      =>  'fast fasta access', 
+       },
+
+       {
+        func         =>  'subseq',
+        summary      =>  'get a subseq from a fastadb', 
+       },
+
+       {
+        func         =>  'set',
+        shortcut     =>  '\+',
+        summary      =>  'set a shell parameter', 
+       },
+
+       {
+        func         =>  'unix',
+        shortcut     =>  '\!',
+        summary      =>  'run a unix command', 
+       },
+
+       {
+        func         =>  'x',
+        summary      =>  'display variable (and internals) using dumper', 
+       },
+
+      );
+    return @cmds;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/bioperl.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/cluster/dbsnp.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/cluster/dbsnp.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/cluster/dbsnp.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+# allenday at ucla.edu
+# parses a dbsnp xml file, prints some info for each refsnp and subsnp
+
+use strict;
+use Bio::ClusterIO;
+use Bio::Root::IO;
+use IO::File;
+
+my $file = shift @ARGV;
+
+my $io = Bio::ClusterIO->new	(	-tempfile => 0,
+					-format   => 'dbsnp',
+					-fh       => IO::File->new("zcat $file |"),
+				);
+
+while(my $cluster = $io->next_cluster){
+	print $cluster->id,"\t", $cluster->observed, "\n";
+
+	foreach my $subsnp ($cluster->each_subsnp){
+		print "\t\t\t", $subsnp->id, "\t", $subsnp->handle, "\t", $subsnp->method, "\n";
+
+	}
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/cluster/dbsnp.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/contributed/nmrpdb_parse.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/contributed/nmrpdb_parse.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/contributed/nmrpdb_parse.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,184 @@
+#!/usr/bin/perl -w
+# $Id: nmrpdb_parse.pl,v 1.1 2003/07/07 18:20:58 bosborne Exp $
+use strict;
+
+# This program will read in an NMR derived PDB file containing
+# multiple conformers, and will allow the user to extract either
+# one or all of the models to separate files.
+#
+# Although the program will run interactively, the command line
+# usage is "nmrsplit [input.file] [number of model to extract]"
+#
+#
+#
+# Written 13/12/00 by Simon Andrews (simon.andrews at bbsrc.ac.uk)
+
+# Submitted to bioperl script project 2001/08/06
+
+# Description:  Script which reads  an NMR-derived  multiple structure
+# PDB file, and will either extract a single structure from it, or
+# will  extract all of the structures into single  files.  This  is
+# useful when you want to  work with a single representative structure
+# from an NMR  ensemble - especially in conjunction  with the OLDERADO
+# database (http://neon.chem.le.ac.uk/olderado/) which  finds  the
+# most representative structure from an ensemble.
+
+
+my $Input = $ARGV[0];  	# File to be read
+my $Pullout = $ARGV[1];	# Specific model to extract
+my @Pdbfile; 		# Array for whole PDB file
+my $Header = ""; 	# String to hold the PDB header
+my $Model = ""; 	# String to hold individual models
+my $Output; 		# Prefix for output files
+my $Modno = 1; 		# Number of the model being processed
+
+while () {
+    if ($Input)	{
+
+	if (-r $Input) {
+	    last;
+	}else{
+	    print "\"$Input\" does not exist, or could not be read\n";
+	}
+    }
+
+    print "\nEnter name of multiple PDB file: ";
+    $Input = <STDIN>;
+    chomp $Input;
+    $Input =~ s/^\s*//;
+    $Input =~ s/\s*$//;
+
+    next;
+}
+
+while () {
+    if ($Pullout) {
+
+	if ($Pullout =~ /^\d+$/){
+	    if ($Pullout == int $Pullout) {
+		last;
+	    }else {
+		print "\"$Pullout\" should be an integer\n";
+	    }
+	}else  {
+	    print "\"$Pullout\" should be a number\n";
+	}
+    }
+
+    print "\nEnter number of specific model to extract (Return for none): ";
+    $Pullout = <STDIN>;
+    chomp $Pullout;
+    $Pullout =~ s/^\s*//;
+    $Pullout =~ s/\s*$//;
+
+    last unless ($Pullout);
+    next;
+}
+
+
+($Output = $Input) =~ s/\.\w*$//; # Take off everything after the last . to use as prefix
+
+
+open (PDB,$Input) || die "Can't open $Input because $!";
+
+
+
+########## Read the header information ####################
+
+
+while (<PDB>) {
+    if (/^MODEL\b/){last;}
+    $Header = $Header . $_;
+}
+
+
+######### Read the separate models #######################
+
+
+while () {
+
+    model();
+    if ($Model)	{      # Check if we're past the last model
+	if ($Pullout) {		 # Check if we're writing one or all
+	    last if ($Modno > $Pullout);# No point continuing if we've got the one we want
+	    readout();
+	}else	{
+	    writeout();
+	}
+	$Model = "";
+	++$Modno;
+    }else  {
+	last;
+    }
+}
+--$Modno;     # Correct last increment which didn't find a model
+
+if (($Pullout) & ($Modno < $Pullout)) {
+    print "\nCannot find model $Pullout : Only $Modno models in this file\n";
+}
+
+#################### subroutines start here ##########################
+
+
+sub model {
+
+    while (<PDB>) {
+	if (/^(MODEL\b|END\b|MASTER\b)/){next;} 
+	# Stops you getting MODEL... at the top of the output
+	# and makes sure there isn't a file containing just END or MASTER
+
+	if (/^ENDMDL\b/){last;}    # Check for the end of the model
+	$Model = $Model . $_; 	   # Append the line to $Model
+    }
+}
+
+sub writeout  {	 # Used when all files are being written out
+
+    if (-e "$Output\_$Modno.pdb"){  # Check whether we're overwriting anything
+
+	print "\n$Output\_$Modno.pdb already exists. Overwrite (y/n)? ";
+	my $Question = <STDIN>;
+	unless ($Question =~ /^y/i) {
+	    print "\nSkipping $Output\_$Modno.pdb";
+	    return;
+	}
+    }
+
+    open (OUT,">$Output\_$Modno.pdb") || die "Can't open $Output\_$Modno.pdb because $!";
+    print "\nWriting $Output\_$Modno.pdb ...";
+    print OUT $Header;
+    print OUT $Model;
+    print OUT "END\n";	# Adds and END statement to the PDB file
+
+    close OUT || die "Couldn't close $Output\_$Modno.pdb because $!";
+}
+
+
+sub readout {
+
+    if ($Modno == $Pullout) {
+
+	if (-e "$Output\_$Modno.pdb") {	# Check whether we're overwriting anything
+
+	    print "\n$Output\_$Modno.pdb already exists. Overwrite (y/n)? ";
+	    my $Question = <STDIN>;
+	    unless ($Question =~ /^y/i)	{
+		print "\nModel not extracted\n";
+		$Model = "";
+		return;
+	    }
+	}
+
+	open (OUT,">$Output\_$Modno.pdb") || die "Can't open $Output\_$Modno.pdb because $!";
+	print "\nWriting $Output\_$Modno.pdb ...\n";
+	print OUT $Header;
+	print OUT $Model;
+	print OUT "END\n"; # Adds and END statement to the PDB file
+
+	close OUT || die "Couldn't close $Output\_$Modno.pdb because $!";
+
+	$Model = "";  # Stops the reading after this model
+    }else  {
+	print "\nReading Model $Modno ...";
+    }
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/contributed/nmrpdb_parse.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/contributed/prosite2perl.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/contributed/prosite2perl.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/contributed/prosite2perl.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,29 @@
+#!/usr/bin/perl -w
+# $Id: prosite2perl.pl,v 1.1 2003/07/07 18:20:58 bosborne Exp $
+# prosite2perl -- convert Prosite patterns to Perl regular expressions
+#
+# Jordan Dimov (jdimov at cis.clarion.edu)
+#
+# Submitted to bioperl scripts project 2001/08/03 
+#
+# Description: 
+# Prosite patterns to Perl regular expressions.
+# The prositeRegEx($) sub accepts a string
+# containing a Prosite pattern and returns a
+# string containing a valid Perl regex.  The code
+# is self-explanatory.
+
+sub prositeRegEx($);
+
+while (<>) {
+  chomp ($_);
+  print prositeRegEx ($_), "\n";
+}
+
+sub prositeRegEx ($) {
+  my $regex = shift;
+  $regex =~ s/[\-\.]//g;    
+  $regex =~ s/\{/[^/g; 
+  $regex =~ tr/x()<>}/.{}^$]/;
+  return ($regex);
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/contributed/prosite2perl.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/contributed/rebase2list.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/contributed/rebase2list.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/contributed/rebase2list.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+# Generate an enzyme list for RestrictionEnzyme.pm from rebase
+# From Ryan Brinkman
+
+my $strider = $ARGV[0]; #commercial_version_rebase_strider_format
+
+open (FILEIN,"$strider") or die "can't open $strider: $!\n";
+
+while (<FILEIN>){
+   chomp;
+   if ( /^[A-Z]\S+,\S+/ ){
+      ($enzyme,$cutsite)=split(',');
+      if ($cutsite =~ m#/#){
+	 $match=$-[0];
+      }
+      ($seqfixed=$cutsite) =~ s/\///g;
+      $seqfixed=uc $seqfixed;
+      print " \'$enzyme\'\t=> \'".$seqfixed." ".$match."\'\,\n";
+   }
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/contributed/rebase2list.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/dbfetch
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/dbfetch	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/dbfetch	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,618 @@
+#!/usr/local/bin/perl -- # -*-Perl-*-
+
+=head1 NAME
+
+dbfetch - generic CGI program to retrieve biological database entries
+in various formats and styles (using SRS)
+
+=head1 SYNOPSIS
+
+  # URL examples:
+
+  # prints the interactive page with the HTML form
+  http://www.ebi.ac.uk/cgi-bin/dbfetch
+
+  # for backward compatibility, implements <ISINDEX>
+  # single entry queries defaulting to EMBL sequence database
+  http://www.ebi.ac.uk/cgi-bin/dbfetch?J00231
+
+  # retrieves one or more entries in default format
+  # and default style (html)
+  # returns nothing for IDs which are not valid
+  http://www.ebi.ac.uk/cgi-bin/dbfetch?id=J00231.1,hsfos,bum
+
+  # retrieve entries in fasta format without html tags
+  http://www.ebi.ac.uk/cgi-bin/dbfetch?format=fasta&style=raw&id=J00231,hsfos,bum
+
+  # retrieve a raw Ensembl entry
+  http://www.ebi.ac.uk/cgi-bin/dbfetch?db=ensembl&style=raw&id=AL122059
+
+
+=head1 DESCRIPTION
+
+This program generates a page allowing a web user to retrieve database
+entries from a local SRS in two styles: html and raw. Other
+database engines can be used to implement te same interfase.
+
+At this stage, on unique identifier queries are supported. Free text
+searches returning more than one entry per query term are not in these
+specs.
+
+In its default setup, type one or more EMBL accession numbers
+(e.g. J00231), entry name (e.g. BUM) or sequence version into the
+seach dialog to retieve hypertext linked enties.
+
+Note that for practical reasons only the first 50 identifiers
+submitted are processed.
+
+Additional input is needed to change the sequence format or suppress
+the HTML tags.  The styles are html and raw. In future there might be
+additional styles (e.g. xml). Currently XML is a 'raw' format used by
+Medline. Each style is implemented as a separate subroutine.
+
+=head1 MAINTANENCE
+
+A new database can be added simply by adding a new entry in the global hash
+%IDS. Additionally, if the database defines new formats add an entry for
+each of them into the hash %IDMATCH.  After modifying the hash, run this
+script from command line for some sanity checks with parameter debug set to
+true (e.g. dbfetch debug=1 ).
+
+Finally, the user interface needs to be updated in the L<print_prompt>
+subroutine.
+
+=head1 VERSIONS
+
+Version 3 uses EBI SRS server 6.1.3. That server is able to merge release
+and update libraries automatically which makes this script simpler. The
+other significant change is the way sequence versions are indexed. They
+used to be indexed together with the string accession
+(e.g. 'J00231.1'). Now they are indexed as integers (e.g. '1').
+
+Version 3.1 changes the command line interface. To get the debug
+information use attribute 'debug' set to true. Also, it uses File::Temp
+module to create temporary files securely.
+
+Version 3.2 fixes fasta format parsing to get the entry id.
+
+Version 3.3. Adds RefSeq to the database list.
+
+Version 3.4. Make this compliant to BioFetch specs.
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki-at-bioperl-dot-org
+
+=cut
+
+# Let the code begin...
+
+$VERSION = '3.4';
+$DATE = '28 Jan 2002';
+
+use CGI "standard";
+#use POSIX;
+use CGI::Carp qw/ fatalsToBrowser /;
+use File::Temp qw/ tempfile tempdir /;
+use strict;
+no strict "refs";
+
+use constant MAXIDS => 50;
+use constant TMPDIR => '/usr/tmp';
+
+use vars qw( $VERSION $DATE %DBS  %STYLES $RWGETZ $RGETZ  %IDMATCH %IDLIST $XEMBL $FH );
+
+BEGIN {
+
+    # paths to SRS binaries
+    $RWGETZ = '/ebi/srs/srs/bin/osf_5/wgetz -e';
+    $RGETZ = '/ebi/srs/srs/bin/osf_5/getz -e';
+    $XEMBL = "cd /ebi/www/pages/cgi-bin/xembl/; ./XEMBL.pl";
+    #$EMBOSSDIR = '/ebi/services/pkgs/emboss/bin';
+
+    # RE matching the unique ID in the db entry
+    # - key is the 
+    # - put the id string in parenthesis
+    %IDMATCH = (        #  123
+		embl => 'ID   (\w+)',
+		fasta => '>\w+.(\w+)',
+		medlinefull => '[\n><]MedlineID. ?(\w+)',
+                swissprot => 'ID   (\w+)',
+		pdb => '.{62}(\w+)',
+		bsml => 'DUMMY',
+		agave => 'DUMMY',
+		refseq => 'LOCUS       ([\w_]+)'
+		);
+    %DBS = (
+	    embl => {
+		fields => ['id', 'acc'],
+		version => 'sv',          # name of the SRS field
+		format => {
+		    default => 'embl',
+		    embl => 1,
+		    fasta => 'FastaSeqs',
+		    bsml => 1,
+		    agave => 1
+		}
+	    },
+	    medline => {
+		fields => ['id'],
+		format => {
+		    default => 'medlinefull',
+#		    medlineref => 'MedlineRef',
+		    medlinefull => 'MedlineFull'
+		    }
+	    },
+	    ensembl => {
+		fields => ['id'],
+		format => {
+		    default => 'embl',
+		    embl => 1,
+		    fasta => 'FastaSeqs'
+		    }
+	    },
+            swall => {
+		fields => ['id', 'acc'],
+		format => {
+		    default => 'swissprot',
+		    swissprot => 1,
+		    fasta => 'FastaSeqs'
+		    }
+	    },
+            pdb => {
+		fields => ['id'],
+		format => {
+		    default => 'pdb',
+		    pdb => '1'
+		    }
+	    },
+            refseq => {
+                fields => ['id', 'acc'],
+                format => {
+		    default => 'refseq',
+		    refseq => 1,
+		    fasta => 'FastaSeqs'
+                }
+            }
+            #add more databases here...
+	    );
+
+    %STYLES = (
+	       html => 1,
+	       raw => 1
+	       );
+
+    %IDLIST = (); #redundancy check list built during the execution
+}
+
+my $q = new CGI;
+
+# sanity checks if the script is running from command line
+#  and debug parameter is set.
+my $debug = protect($q->param('debug')) if $q->param('debug');
+&debugging if not $q->user_agent and $debug; 
+
+if ( $q->param('id') or $q->param('keywords')  ) {
+
+    # pacify input strings
+    my $value;
+    $value = protect($q->param('id')) if $q->param('id');
+    $value = protect($q->param('keywords')) if $q->param('keywords');
+    my $db = lc protect($q->param('db')); # let's keep the case lower
+    my $format = lc protect($q->param('format'));
+    my $style =  lc protect($q->param('style'));
+
+    # check input and set defaults
+    $style ||= 'html';  # default style
+    input_error($q, $style, "2 Unknown style [$style].") unless $STYLES{$style};
+
+    $db ||= 'embl'; # default db
+    input_error($q, $style, "1 Unknown database [$db].") unless $DBS{$db};
+
+    $format ||= $DBS{$db}{format}{default}; # default format
+    input_error($q, $style, "3 Format [$format] not known for database [$db]")  
+        unless $DBS{$db}{format}{$format};
+    $format = $DBS{$db}{format}{default} if $format eq 'default';
+
+
+    # If people choose Bsml or AGAVE, DB can only be 'embl'
+    input_error($q, $style, "1 Unknown database [$db].") 
+        if ($format eq 'bsml' or $format eq 'agave') and $db ne 'embl';
+
+    # If people choose Bsml or AGAVE, internal style has to be xml . Make it so.
+    $style = ($format =~ /(bsml|agave)/i) ? 'xml' : $style;
+
+    if ($style eq 'html') {
+        print $q->header(-type => 'text/html', -charset => 'UTF-8');
+    }
+    elsif ($style eq 'raw') { 
+        print "Content-Type: text/plain; charset=UTF-8\n\n";
+    }
+    $FH = tempfile('dbfetchXXXXXX', DIR => TMPDIR, UNLINK => 1 ); #automatic unlinking
+
+    # Check the number of IDs
+    my @ids = split (/ /, $value);
+    input_error($q, $style, "6 Too many IDs [". scalar @ids. "]. Max [". MAXIDS. "] allowed.")
+        if scalar @ids >  MAXIDS;
+
+    # XEMBL cannot 'glue' single entries due to XML setup 
+    #- we need to send things in one go.
+    if ($style eq 'xml') {
+	&xml($format, @ids);
+    } else {
+        my $counter;
+        foreach my $id (@ids) {
+            &$style($db, $id, $format);
+        }
+        no_entries($q, $value) if $style eq 'html' and tell($FH) == 0;
+    }
+    seek $FH, 0, 0;
+    print '<pre>' if $style eq 'html';
+    print $_ while <$FH>;
+} else {
+    print_prompt($q);
+}
+
+
+=head2 print_prompt
+
+ Title   : print_prompt
+ Usage   :
+ Function: Prints the default page with the query form
+           to STDOUT (Web page)
+ Args    :
+ Returns :
+
+=cut
+
+sub print_prompt {
+    print $q->header(),
+         $q->start_html(-title => 'DB Entry Retrieval',
+                        -bgcolor => 'white',
+			-author => 'heikki-at-bioperl-dot-org'
+			),
+	 '<IMG align=middle SRC="/icons/ebibanner.gif">',
+	  $q->h1('Generic DB Entry Retrieval'),
+	  $q->p("This page allows you to retrieve up to ". MAXIDS .
+		 " entries at the time from various up-to-date biological databases."),
+	  $q->p("For EMBL, enter an  accession number (e.g. J00231) or entry name (e.g.
+		 BUM) or a sequence version (e.g. J00231.1), or any combination of them
+		 separated by a non-word character into your browser's search dialog.
+		 SWALL examples are: fos_human, p53_human.
+		 For short Ensembl entries, try : AL122059, AL031002, AL031030 .
+		 'Random' Medline entry examples are: 20063307, 98276153.
+		 PDB entry examples are: 100D, 1FOS. Try NM_006732 for RefSeq.
+		 Only one copy of the latest version of the entry is returned."),
+	  $q->hr,
+	  $q->startform,
+	  $q->popup_menu(-name => 'db',
+			 -values => ['EMBL',
+				     'SWALL',
+				     'PDB',
+				     'Medline',
+				     'Ensembl',
+				     'RefSeq'
+				     ]),
+	  $q->textfield(-name => 'id',
+			 -size => 40,
+			 -maxlength => 1000),
+	  $q->popup_menu(-name => 'format',
+			 -values => ['default','Fasta','bsml','agave']),
+	  $q->popup_menu(-name => 'style',
+			 -values => ['html','raw']),
+	  $q->submit('Retrieve'),
+	  $q->endform,
+	  $q->hr,
+	  $q->h2('Direct access'),
+	  $q->p('For backward compatibility, the script defaults to EMBL:'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?J00231">
+		     http://www.ebi.ac.uk/cgi-bin/dbfetch?J00231</a>'),
+	  $q->p('but the preferred way of calling it is:'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?id=J00231.1,hsfos,bum">
+		     http://www.ebi.ac.uk/cgi-bin/dbfetch?id=J00231.1,hsfos,bum</a>'),
+	  $q->p('which can be extended to retrieve entries in alternative sequence formats
+		      and other databases:'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?db=swall&format=fasta&id=fos_human">
+		     http://www.ebi.ac.uk/cgi-bin/dbfetch?db=swall&format=fasta&id=fos_human</a>'),
+	  $q->p('Set style to <code>raw</code> to retrieve plain text entries for computational purposes
+                 and saving to disk:'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?db=medline&style=raw&id=21131735">
+                    http://www.ebi.ac.uk/cgi-bin/dbfetch?db=medline&style=raw&id=21131735</a>'),
+ 	  $q->p('There is now the possibility to retrieve EMBL sequences formatterd into two XML standards:
+                Bsml (Bioinformatic Sequence Markup Language - from 
+                Labbook, Inc.) or as AGAVE (Architecture for Genomic Annotation, 
+                Visualisation, and Exchange - from Labbook, Inc.). To do this, use the 
+                formats \'bsml\' or \'agave\', as follows:'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?format=bsml&id=J00231">
+                   http://www.ebi.ac.uk/cgi-bin/dbfetch?format=bsml&id=J00231</a><br>'),
+	  $q->code('<A href="http://www.ebi.ac.uk/cgi-bin/dbfetch?format=agave&id=J00231">
+                   http://www.ebi.ac.uk/cgi-bin/dbfetch?format=agave&id=J00231</a>'),
+          $q->p("Version numbers are not supported with the XML retrieval."),
+	  $q->hr,
+          $q->address("Version $VERSION, $DATE, <a href=\"mailto:support\@ebi.ac.uk\">support\@ebi.ac.uk</a>"),
+	  $q->end_html, "\n" ;
+}
+
+=head2 protect
+
+ Title   : protect
+ Usage   : $value = protect($q->param('id'));
+ Function:
+
+           Removes potentially dangerous characters from the input
+	   string.  At the same time, converts word separators into a
+	   single space character.
+
+ Args    : scalar, string with one or more IDs or accession numbers
+ Returns : scalar
+
+=cut
+
+sub protect {
+    my ($s) = @_;
+    $s =~ s![^\w\.\_]+! !g; # allow version numbers with '.' & RefSeq IDs with '_'
+    $s =~ s|^\W+||;
+    $s =~ s|\W+$||;
+    return $s;
+}
+
+=head2 input_error
+
+ Title   : input_error
+ Usage   : input_error($q, 'html', "Error message");
+ Function: Standard error message behaviour
+ Args    : reference to the CGI object
+           scalar, string to display on input error.
+ Returns : scalar
+
+=cut
+
+sub input_error {
+    my ($q, $style, $s) = @_;
+
+    if ($style eq 'html' ) {
+	print $q->header,
+	$q->start_html(-title => 'DB Entry Retrieval: Input error', 
+		       -bgcolor => 'white'
+		       ),
+	"<h2>ERROR in input:<h2>$s\n",
+	$q->end_html, "\n";
+    } else {
+	print "Content-type: text/plain\n\n", "ERROR $s\n";
+    }
+    exit 0;
+}
+
+=head2 no_entries
+
+ Title   : no_entries
+ Usage   : no_entries($q, "Message");
+ Function: Standard behaviour when no entries found
+ Args    : reference to the CGI object
+           scalar, string to display on input error.
+ Returns : scalar
+
+=cut
+
+sub no_entries {
+    my ($q, $value) = @_;
+
+    print $q->start_html(-title => 'DB Entry Retrieval: Input warning',
+			 -bgcolor => 'white'
+			 ),
+	  "<h2>Sorry, your query retrieved no entries.</h2>",
+	  "Entries with [$value] where not found.",
+	  "Please go back or press <a href=\"dbfetch\"><b>here</b></a> to try again",
+	   $q->end_html, "\n";
+    exit 0;
+}
+
+
+=head2 raw
+
+ Title   : raw
+ Usage   :
+ Function: Retrieves a single database entry in plain text
+ Args    : scalar, an ID
+           scaler, format
+ Returns : scalar
+
+=cut
+
+sub raw {
+    my ($db, $value, $format) = @_;
+    my ($srsq, $qdb, $entry, $id);
+    my ($seqformat) = '';
+    $seqformat = '-view '. $DBS{$db}{format}{$format}
+       if $format ne $DBS{$db}{format}{default};
+
+    my $version = '';
+    $value =~ /(.+)\.(.+)/;
+    $version = $2 if $2;
+    $value = $1 if $1;
+
+    # main db
+    $qdb = $db;
+    $srsq = '';
+    foreach my $field (@{$DBS{$db}{fields}}) {
+	$srsq .= " [$qdb-$field:$value] |";
+    }
+    chop $srsq;
+
+    # if database supports versions (EMBL, GenBank, RefSeq...)
+    if ($version) {
+	my $vfname = $DBS{$db}{version};
+	$srsq = "[$qdb-$vfname:$version] & (". $srsq. ")"
+    }
+
+#    print "rsh srs $RGETZ $seqformat $srsq\n";
+    $entry = `rsh srs "$RGETZ $seqformat '$srsq'"`;
+
+    $entry =~ s|EMBL[^\n]+\n||;
+    $entry =~ s|^\s+||g;
+    $entry =~ s|\s+$|\n|g;
+
+    my $idmatch = $IDMATCH{$format};
+    ($id) = $entry =~ /$idmatch/;
+    # die if ID not found
+    input_error(' ', 'raw', "5 ID [$value] not found in database [$db].")
+	     unless $id;
+
+#    my $tmp = substr($entry, 0, 20);
+#    print "Entry:$tmp\n";
+#    print  "-----id=$id---\$1=$1----idmatch=$idmatch=format=$format=\n";
+#
+    print $FH $entry unless $IDLIST{$id};
+    $IDLIST{$id} = 1;
+}
+
+=head2 html
+
+ Title   : html
+ Usage   :
+ Function: Retrieves a single database entry with HTML
+           hypertext links in place. Limits retieved enties to 
+           ones with correct version if the string has '.' in it.
+ Args    : scalar, a UID
+           scalar, format
+ Returns : scalar
+
+=cut
+
+sub html {
+    my ($db, $value, $format) = @_;
+    my ($srsq, $qdb, $entry, $id, $idmatch);
+    my ($seqformat) = '';
+    $seqformat = '-view '. $DBS{$db}{format}{$format}
+       if $format ne $DBS{$db}{format}{default};
+
+    my $version = '';
+    $value =~ /(.+)\.(.+)/;
+    $version = $2 if $2;
+    $value = $1 if $1;
+
+    # SWALL plain format at EBI
+    $seqformat .= ' -vn 2 ' if $db eq 'swall' or $db eq 'refseq';
+    
+    $qdb = $db;
+    $srsq = '';
+    foreach my $field (@{$DBS{$db}{fields}}) {
+	$srsq .= " [$qdb-$field:$value] |";
+    }
+    chop $srsq;
+
+    # if database supports versions (EMBL...)
+    if ($version) {
+	my $vfname = $DBS{$db}{version};
+	$srsq = "[$qdb-$vfname:$version] & (". $srsq. ")"
+    }
+
+#    print "rsh srs $RWGETZ $seqformat $srsq\n";
+    ### '-id EBISRS' is (hopefully) a temporary addtion until SRS HTML output is fixed 
+    $entry = `rsh srs "$RWGETZ $seqformat '$srsq'"`;
+  
+    return if $entry =~ /SRS error/;
+
+    $entry =~ s|^Content-type:[^\n]+\n||;
+    $entry =~ s|\n<A  HREF[^\n]+\n||;
+    $entry =~ s|<A +HREF=\"?wgetz|<A HREF=http://srs6.ebi.ac.uk/srs6bin/cgi-bin/wgetz|g; #"\
+    $entry =~ s/\+-e\"/\+-e/g; #"
+    $entry =~ s|<BR>||g;
+    $entry =~ s|</?pre>||g;
+    $entry =~ s|\n+|\n|g;
+    $entry =~ s|^\n+||g;
+
+    $idmatch = $IDMATCH{$format};
+    ($id) = $entry =~ /$idmatch/;
+
+#    my $tmp = substr($entry, 0, 20);
+#    print "Entry:$tmp\n";
+#    print  "-----id=$id---\$1=$1----idmatch=$idmatch=format=$format=\n";
+    print $FH $entry unless $IDLIST{$id};
+    $IDLIST{$id} = 1;
+}
+
+=head2 xml
+
+ Title   : xml
+ Usage   : 
+ Function: Retrieves an entry formatted as XML
+ Args    : array, UID
+           scalar, format 
+ Returns : scalar
+
+=cut
+
+sub xml {
+    my ($format, @ids) = @_;
+    my ($entry, $id, $content, $counter, $reg);
+
+    $content = ($ENV{'HTTP_USER_AGENT'} =~ /MSIE/) ? "Content-type: text/xml\n\n" : 
+      "Content-type: text/plain\n\n";
+
+    $entry = "--format ".(($format eq "bsml") ? "Bsml" : "sciobj") .
+	     " " . join (" ", @ids);
+
+    $entry = `rsh mercury "$XEMBL $entry"`;
+
+    $reg = (($format eq "bsml") ? '<Sequence id=' : '<contig length-');
+    $counter++ while $entry =~ /($reg)/g;
+
+    foreach my $idl (@ids) {
+	input_error($q, " ", "5 ID [$idl] not found in database [embl].")
+	    if ($format eq "bsml" && $entry =~ "NOT EXIST: $idl") ||
+	       ($format eq "agave" && $entry =~ "NOT FOUND: $idl")
+    }
+
+    print $FH ($content . $entry);
+}
+
+=head2 debugging
+
+ Title   : debugging
+ Usage   : 'perl dbfetch'
+ Function:
+
+           Performs sanity checks on global hash %IDS when this script
+           is run from command line. %IDS holds the description of
+           formats and other crusial info for each database accessible
+           through the program.
+
+           Note that hash key 'version' is not tested as it should 
+           only be in sequence databases.
+
+ Args    : none
+ Returns : error messages to STDOUT
+
+=cut
+
+sub debugging {
+
+    foreach my $db (keys %DBS) {
+	my $status = 1;
+
+	# field
+	print "ERROR: [$db]: no SRS fields defined.".
+	    " Give an array of field names?\n" and $status = 0
+	    unless $DBS{$db}{fields};
+	print "ERROR: [$db]: SRS fields are not defined as an array.\n" and $status = 0
+	    unless ref $DBS{$db}{fields} eq 'ARRAY';
+
+	# format
+	print "ERROR: [$db]: no formats defined.\n" and $status = 0
+	    unless $DBS{$db}{format};
+	print "ERROR: [$db]: no default format defined.\n" and $status = 0
+	    unless $DBS{$db}{format}{default};
+	my $format = $DBS{$db}{format}{default};
+	print "ERROR: [$db]: no format [$format] defined.".
+	    " You declared it as a default and only.\n" and $status = 0
+	    unless $DBS{$db}{format}{$format};
+	foreach my $dbformat (keys %{$DBS{$db}{format}}) {
+	    print "ERROR: [$db]: format [$format] not defined in %IDMATCH.\n"
+		and $status = 0
+		 unless $IDMATCH{$dbformat} or $dbformat eq 'default';
+	}
+	printf "%-12s%s", "[$db]", "OK\n" if $status;
+    }
+    exit;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/dbfetch
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/est_tissue_query.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/est_tissue_query.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/est_tissue_query.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,162 @@
+#!/usr/bin/perl -w
+
+# This script will report the names of the tissues which were seen
+# in a BLAST/FASTA report against an EST (or cDNA possibly) library.
+
+# this script assumes you have a directory which you have downloaded
+# gbestXX.seq.gz from ncbi genbank release.  This will run faster if 
+# they are uncompressed already, but if will uncompress the files 
+# on demand.  Be sure that there is sufficient space and the uid
+# has write permission on the files and in that directory if you
+# plan to run this script on compressed files.
+
+# Alternatively you can use this with the -r option and it will 
+# use the remote sequence databases either genbank or embl to 
+# retrieve the specific EST (so one can attempt to guess the tissue type)
+##
+# cmd line options are
+# -i/--index=indexname
+# -d/--dir=dir where gbest data files are located
+# -b/--blast=filename blast filename which compared against an EST db
+# -f/--format=(blast|blastxml|fasta) - type of search output either 
+#                                      from BLAST or FASTA suites 
+# -c/--cache=filename cache for accession number to tissue
+# -p/pvalue=pvalue pvalue to limit search to
+# -r/--remote=[GenBank|EMBL] use remote db for searching
+
+use strict;
+use DB_File;
+use Bio::SeqIO;
+use Bio::SearchIO;
+use Bio::DB::EMBL;
+use Bio::DB::GenBank;
+use Bio::Index::GenBank;
+
+use Getopt::Long;
+my $GZIP = '/usr/bin/gzip';
+my $GUNZIP = '/usr/bin/gunzip';
+
+my $dir = '/home/data/libs/gbest'; # local dir for gbest files
+my $index = 'dbest_tissue.idx';    # local index filename
+my $cache;      # filename to create cache of accession->tissue
+my $VERBOSE = 0;# verbosity option
+my $blastfile;  # blastfile to parse
+my $pvalue;     # Max P-Value allowed when parsing blastfile
+my $remote;     # flag for remote database 
+my $db;         # generic database handle
+my %accessions; # cache results
+my $format = 'blast';
+
+&GetOptions( 'd|dir:s'   => \$dir,
+	     'i|index:s' => \$index,
+	     'v|verbose' => \$VERBOSE,
+	     'b|blast:s' => \$blastfile,
+	     'f|format:s' => \$format,
+	     'c|cache:s' => \$cache,
+	     'p|pvalue:s'       => \$pvalue,
+	     'r|remote:s'=> \$remote);
+
+if( $cache && -w $cache ) {
+    print "creating cache file\n";
+    tie %accessions, "DB_File", $cache,  O_RDWR|O_CREAT,0660, $DB_HASH;
+}
+
+if( ! $remote ) {
+    opendir(GBEST, $dir) or die("cannot open $dir");
+    
+    my $indexfile = new Bio::Index::GenBank(-filename   => $index,
+					    -write_flag => 'WRITE');
+    foreach my $file  ( readdir(GBEST) ) {
+#	print "file is $file\n";
+	    next unless ( $file =~ /(gbest\d+\.seq)(.gz)?$/ );
+	    if( $2 ) {		
+		`$GUNZIP $dir/$file`;
+	    }
+	    $indexfile->make_index("$dir/$1");
+    }
+
+    $indexfile = undef;
+    $db = new Bio::Index::GenBank(-filename => $index);
+    
+} else { 
+    if( $remote =~ /(ncbi)|(genbank)/i ) {
+
+	$db = new Bio::DB::GenBank;
+    } elsif( $remote =~ /embl/i ) {
+	$db = new Bio::DB::EMBL;
+    } else { 
+	die("remote must be either 'NCBI' or 'EMBL'");
+    }
+    # would need to add code to set proxy info for those who need it
+}
+
+if(! $blastfile || ! -r $blastfile ) {
+    die("Must specify a valid blastfile");
+}
+
+my $parser = new Bio::SearchIO(-format => $format,
+			       -file => $blastfile);
+
+my %tissues_seen = ();
+my ($result,$hit,$hsp);
+while( my $result = $parser->next_result )  {
+  HIT: while( my $hit = $result->next_hit ) {
+      if( defined $pvalue ) {
+	  while( my $hsp = $hit->next_hsp ) {
+	      if( $hsp->evalue > $pvalue ) {
+		  print "skipping ", $hit->name, " because of low evalue \n";
+		  # skip this Subject if it contains a pvalue of > $pvalue
+		  next HIT;
+	      }
+	  }
+      }
+      my  ($id) = split(/\s+/, $hit->name);
+      # get the last value
+      my @ids = split(/\|/, $id);
+      $id = pop @ids;
+      my ($tissuetype) = get_tissue($id);
+      if( defined $tissuetype ) {
+	  push @{$tissues_seen{$tissuetype}}, $hit->name;
+      } else { 
+	  print STDERR "could not find tissue for $id\n" if( $VERBOSE);
+      }
+  }
+  print "tissues seen for: ", $result->query_name, "\n";
+
+  foreach my $tissue ( sort keys %tissues_seen ) {
+      print "* $tissue\n-----------\n\t", 
+      join("\n\t",@{$tissues_seen{$tissue}}), "\n\n";
+  }
+}
+
+# cleanup -- avoid segfault here
+$db = undef;
+
+# subroutines
+
+sub get_tissue {
+    my ($id) = @_;
+    my $tissue;
+    if( $tissue = $accessions{$id} ) {
+	return $tissue;
+    }
+
+    my $seq = $db->get_Seq_by_acc($id);
+    return  unless(  $seq );
+
+    foreach my $feature ( $seq->all_SeqFeatures ) {
+	if( $feature->primary_tag eq 'source' ) {
+	    foreach my $tag ( sort { $b cmp $a }
+			      $feature->all_tags ) {
+		if( $tag =~ /tissue/i  || 
+		    ( ! $tissue && 
+		      $tag =~ /clone_lib/i ) ){
+		    ($tissue) = $feature->each_tag_value($tag);
+		    $accessions{$seq->display_id} = $tissue;
+		    return $tissue;
+		}
+	    }
+	}
+    }	    
+    return;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/est_tissue_query.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/gb2features.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/gb2features.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/gb2features.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+#!/usr/bin/perl -w
+# Author: Damien Mattei C.N.R.S / U.N.S.A - UMR 6549
+# example: ./idfetch.pl AP001266
+
+use Bio::DB::GenBank;
+
+$gb = new Bio::DB::GenBank();
+
+# this returns a Seq object :
+$seq1 = $gb->get_Seq_by_acc($ARGV[0]);
+print $seq1->display_id() . "\n" ;
+
+
+foreach $feat ($seq1->all_SeqFeatures()) {
+
+  #print $feat->primary_tag . " " . $feat->source_tag() . "\n" ;
+
+  print "Feature from ", $feat->start, " to ",
+   $feat->end, " Primary tag  ", $feat->primary_tag,
+   ", produced by ", $feat->source_tag(), "\n";
+
+  if( $feat->strand == 0 ) {
+    print "Feature applicable to either strand\n";
+  } else {
+    print "Feature on strand ", $feat->strand,"\n"; # -1,1
+  }
+
+  foreach $tag ( $feat->all_tags() ) {
+    print "Feature has tag ", $tag, " with values, ",
+    join(' ',$feat->each_tag_value($tag)), "\n";
+  }
+
+  print "new feature\n" if $feat->has_tag('new');
+
+}
+
+
+exit;
+
+__END__
+
+It will display something like that:
+
+[dmattei at pclgmch2 gmap]$ ./idfetch.pl AP001266
+AP001266
+Feature from 1 to 168978 Primary tag  source, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag chromosome with values, 11
+Feature has tag map with values, 11q13
+Feature has tag clone with values, RP11-770G2
+Feature has tag organism with values, Homo sapiens
+Feature has tag db_xref with values, taxon:9606
+Feature from 1 to 31550 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 31651 to 48510 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 48611 to 64044 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 64145 to 78208 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 78309 to 89008 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 89109 to 99704 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 99805 to 107965 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 108066 to 116032 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 116133 to 124010 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 124111 to 130494 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 130595 to 136072 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 136173 to 139649 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 139750 to 144590 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 144691 to 148482 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 148583 to 152279 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 152380 to 153632 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment clone_end:T7 
+vector_side:left
+Feature from 153733 to 155746 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 155847 to 156405 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment clone_end:SP6 
+vector_side:right
+Feature from 156506 to 158398 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 158499 to 161333 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 161434 to 163304 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 163405 to 164604 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 164705 to 166693 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+Feature from 166794 to 168978 Primary tag  misc_feature, produced by 
+EMBL/GenBank/SwissProt
+Feature on strand 1
+Feature has tag note with values, assembly_fragment
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/gb2features.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/getGenBank.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/getGenBank.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/getGenBank.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,29 @@
+#!/usr/local/bin/perl -w
+#
+# How to retrieve GenBank entries over the Web
+#
+# by Jason Stajich
+#
+use Bio::DB::GenBank;
+use Bio::SeqIO;
+my $gb = new Bio::DB::GenBank;
+
+# the output stream for your seqs, this can be a file
+# instead or STDOUT, see the Bio::SeqIO module for info
+
+my $seqout = new Bio::SeqIO(-fh => \*STDOUT, -format => 'fasta');
+
+# if you want a single seq
+my $seq = $gb->get_Seq_by_id('MUSIGHBA1');
+$seqout->write_seq($seq);
+# or by accession
+$seq = $gb->get_Seq_by_acc('AF303112');
+
+$seqout->write_seq($seq);
+
+# if you want to get a bunch of sequences use the batch method
+my $seqio = $gb->get_Stream_by_batch([ qw(J00522 AF303112 2981014)]); 
+
+while( defined ($seq = $seqio->next_seq )) {
+        $seqout->write_seq($seq);
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/getGenBank.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/get_seqs.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/get_seqs.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/get_seqs.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+#!/usr/local/bin/perl -w
+use strict;
+use vars qw($USAGE);
+use Carp;
+use Getopt::Long;
+use Bio::SeqIO;
+
+$USAGE = "get_seqs.pl\t[--db=DBNAME] [--format=FORMAT] \n\t\t[--output=FILENAME] [--proxy=PROXY] accession1, accession2, ...\n Defaults: db=GenBank format=fasta output=STDOUT proxy=none\n See LWP::UserAgent for more information on proxy syntax";
+my %dbs = (
+  'genbank'   => 'Bio::DB::GenBank',
+  'embl'      => 'Bio::DB::EMBL',
+  'swissprot' => 'Bio::DB::SwissProt', 
+);
+
+my ($db,$format,$file,$proxy,$help) = ( 'genbank', 'fasta' );
+
+&GetOptions
+    (
+     'db:s'       => \$db,
+     'f|format:s' => \$format,
+     "file|out|output:s" => \$file,
+     'proxy:s'           => \$proxy,
+     "h|\?|help"  => \$help ,     
+     );
+
+if( $help ) { print $USAGE, "\n";exit; }
+
+if( $db =~ /gb|gen|genbank/i ) {
+    $db = 'genbank';
+} elsif( $db =~ /embl|em|e/i ) {
+    $db = 'embl';
+} elsif( $db =~ /swiss|sp/i ) {
+    $db = 'swissprot';
+} else { 
+    croak("Unknown db parameter '$db' valid parameters are (" . join(',', keys %dbs) . ")");
+}
+
+my %params = ( '-format' => $format );
+
+if( defined $file ) {
+    $params{'-file'} = ">$file";
+} else { 
+    $params{'-fh'} = \*STDOUT;
+}
+
+my $seqio = new Bio::SeqIO(%params);
+
+my $remotedb;
+
+eval {
+    my $filename = "$dbs{$db}.pm";
+    $filename =~ s!::!/!g;
+    require $filename;
+    $remotedb = "$dbs{$db}"->new();
+};
+
+die($@) unless ! $@;
+
+if( defined $proxy ) { $remotedb->proxy($proxy); }
+
+my $stream;
+
+if( $remotedb->can('get_Stream_by_batch') ) {
+    $stream = $remotedb->get_Stream_by_batch(@ARGV);
+} else {
+    $stream = $remotedb->get_Stream_by_acc(\@ARGV);
+}
+
+while( my $seq = $stream->next_seq ) {
+    $seqio->write_seq($seq);
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/get_seqs.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/rfetch.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/rfetch.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/rfetch.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+
+#
+# Fetch sequence data via OBDA registry system
+#
+# usage: rfetch -i <file_with_accession_list> -a -v -d embl -s start -e end
+#
+
+use Bio::DB::Registry;
+use Bio::SeqIO;
+use Getopt::Long;
+use strict;
+
+my $database = 'embl_biosql';
+my $start    = undef;
+my $end      = undef;
+my $format   = 'fasta';
+my $file     = undef;
+my $acc      = undef;
+my $verbose  = undef;
+
+&GetOptions(
+	    'd|database:s' => \$database,
+	    's|start:i' => \$start,
+	    'e|end:i'   => \$end,
+	    'f|format:s' => \$format,
+	    'i|input:s' => \$file,
+	    'a|acc'     => \$acc,
+	    'v|verbose' => \$verbose,
+	   );
+
+
+my $registry = Bio::DB::Registry->new();
+
+my $db = $registry->get_database($database);
+
+my $seqout = Bio::SeqIO->new( '-format' => $format, '-fh' => \*STDOUT);
+
+my @ids;
+
+if( defined $file ) {
+  open(F,$file) || die "cannot open $file $!";
+  while( <F> ) {
+    my ($id) = split;
+    push(@ids,$id);
+  }
+} else {
+  @ids = @ARGV;
+}
+
+foreach my $id ( @ids ) {
+  my $seq;
+  if( $verbose ){
+    print STDERR "fetching $id\n";
+  }
+
+  if( $acc ) {
+    $seq = $db->get_Seq_by_acc($id);
+  } else {
+    $seq = $db->get_Seq_by_id($id);
+  }
+
+  if( defined $start && defined $end ) {
+    $seq = $seq->trunc($start,$end);
+  }
+
+  $seqout->write_seq($seq);
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/rfetch.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/db/use_registry.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/db/use_registry.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/db/use_registry.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+#!/usr/bin/perl -w
+use strict;
+
+use Bio::DB::Registry;
+use Bio::SeqIO;
+use strict;
+
+my $registry = new Bio::DB::Registry();
+
+print "services are ", join(',', $registry->services), "\n";
+my $db = $registry->get_database("embl");
+my $seq = $db->get_Seq_by_id("J02231");
+my $out = new Bio::SeqIO;
+$out->write_seq($seq);


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/db/use_registry.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/generate_random_seq.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/generate_random_seq.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/generate_random_seq.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,81 @@
+#!/bin/perl -w
+use strict;
+use vars qw($USAGE);
+
+# random sequence generator #
+# -c=1 option will cause prot sequences to be built 
+# using vertebrate aa frequencies, 
+# with option -a putting a 1st methionine residues on. Frequencies are
+# calculated from the NCBI human RefSeq protein sequences 
+# -c and -a only affect protein sequences
+# -a only works in conjunction with -c
+# -n number of random sequences, default = 1
+
+use Bio::PrimarySeq;
+use Bio::SeqIO;
+use Getopt::Long;
+my ($length,$type,$filename,$comp,$met);
+
+$USAGE = 'usage: generate_random_seq.pl --length=1000 --type=dna --filename=/tmp/test.seq --number=50';
+
+my %alphabets = ( 'dna' => [qw(C A G T)],
+                  'rna' => [qw(C A G U)],
+                  'prot'=> [qw( A C D E F G H I K L M N P Q R S T V W Y)],
+              );
+# make random num from 1-10000. numbers in this array reflect the frequency,
+# e.g., a random number from 1.744 = A, 745-991 = C etc;
+my @aa_frequencies = qw(744 991 1398 2017 2378 3104 3349 3726 4239 5273 5443 
+                        5749 6410 6848 7455 8263 8760 9340 9488 9713 10000);
+my $number = 1;
+
+&GetOptions
+  (
+   'l|length:s'          => \$length,
+   't|type|m|alphabet:s' => \$type,
+   'f|file|filename:s'   => \$filename,
+   'c|composition:s'     => \$comp,
+   'a|methionine:s'      => \$met,
+   'n|number:s'          => \$number
+  );
+
+assert ( $type && defined ($alphabets{lc $type}),
+         $USAGE);
+assert ( $length && $length =~ /^\d+$/, $USAGE );
+
+foreach my $num (1..$number) {
+   my $sequence = "";
+   my $alphabet = $alphabets{lc $type};
+   my $sspace = scalar @$alphabet;
+   if (!$comp || $type ne 'prot') {
+      foreach ( 1..$length ) {
+	 $sequence .= $alphabet->[ int rand($sspace) ];
+      }
+   }elsif ($type eq 'prot') {
+      $sequence = build_seq($length, \@aa_frequencies);
+   }
+   my $seq =  Bio::PrimarySeq->new(-seq        => $sequence, 
+				   -display_id => 'randomseq'.$num);
+   my %args = (-format => 'fasta');
+   if( $filename ) { $args{-file} = ">>$filename" }
+   my $seqio = Bio::SeqIO->new(%args);
+   $seqio->write_seq($seq);
+}
+
+sub assert { die $_[1] unless( $_[0] ); }
+
+sub build_seq {
+   #takes seqlen and ref to frequency data as parameters
+   my ($len, $pf)  = @_;
+   my $str = ($met)?'M':'';
+   my $i = ($met)?1:0;
+   for ($i..$len-1) {
+      my $aa = int(rand (10000)) ;
+      my $j = 0;
+      while ($pf->[$j] < $aa && $j <19) {
+	 $j++;
+      }
+      $str .= $alphabets{'prot'}[$j];
+   }
+   print "str is $str\n";
+   return $str;
+}

Added: trunk/packages/bioperl/branches/upstream/current/examples/liveseq/change_gene.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/liveseq/change_gene.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/liveseq/change_gene.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+# $Id: change_gene.pl,v 1.1 2003/02/28 05:43:13 heikki Exp $
+
+use strict;
+use Bio::LiveSeq::IO::BioPerl;
+use Bio::LiveSeq::Mutator;
+use Bio::LiveSeq::Mutation;
+use Bio::Variation::IO;
+
+if ($#ARGV < 1) { # one argument input
+    print <<USAGE;
+
+LiveSeq::Mutator example by Joseph Insana
+
+Arguments: filename containing embl entry, gene_name
+           It will create that Bio::LiveSeq::Gene and print out some
+           basic informations about it.
+           It will then issue mutations and print results
+
+Usage:     change_gene.pl filename genename
+
+Example:   change_gene.pl ../../t/data/ar.embl AR
+USAGE
+exit;
+} else {
+
+    my $filename=$ARGV[0];
+    my $loader=Bio::LiveSeq::IO::BioPerl->load(-file => "$filename");
+
+    my $gene_name=$ARGV[1];
+    my $gene=$loader->gene2liveseq(-gene_name => $gene_name,
+				   -getswissprotinfo => 0);
+
+    print STDERR "Gene: ",$gene->name,"\n";
+    print STDERR "    Moltype: ", $gene->get_DNA->alphabet,  "\n";
+    print STDERR "    Features:\n";
+    print STDERR $gene->printfeaturesnum();
+    print STDERR "    Gene has boundaries ",$gene->upbound," - ",$gene->downbound,"\n";
+    print STDERR "    Gene has maxtranscript with start ",$gene->maxtranscript->start,
+          " end ",$gene->maxtranscript->end," strand ",$gene->maxtranscript->strand,"\n";
+    print STDERR "    DNA  has boundaries ",$gene->get_DNA->start," - ",$gene->get_DNA->end,"\n";
+    print STDERR "\n";
+
+    print STDERR "Now issuing mutations to the gene....\n";
+
+    my $mutation = new Bio::LiveSeq::Mutation (-seq =>'A',
+				       -pos => 64
+				       );
+    my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene,
+					 -numbering => "coding"
+					 );
+    $mutate->add_Mutation($mutation);
+    my $results=$mutate->change_gene();
+    print "\n";
+    if ($results) {
+	my $out = Bio::Variation::IO->new( '-format' => 'flat');
+	$out->write($results);
+    }
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/liveseq/change_gene.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/longorf.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/longorf.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/longorf.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,150 @@
+#!/usr/bin/perl -w
+# longorf.pl v0208020920
+# (c) Dan Kortschak 2002
+
+use vars qw($USAGE);
+
+use strict;
+use Getopt::Long;
+use Bio::SeqIO;
+
+$USAGE = "longorf [--help] [--notstrict] [--verbose] [--graph] [--width printwidth] [--format seqformat] --input seqfile\n";
+
+my ($sequencefile,$sequenceformat,$notstrict,$graph,$verb,$printwidth,$help) =
+   (undef,        'fasta',        undef,      undef,undef,50,         undef);
+
+&GetOptions('input|i=s'              => \$sequencefile,
+            'format|f=s'             => \$sequenceformat,
+            'notstrict|n'            => \$notstrict,
+            'width|w=s'              => \$printwidth,
+            'graph|g'                => \$graph,
+            'verbose|v'              => \$verb,
+            'help|h'                 => \$help,
+            );
+
+if ($help) {
+   exec('perldoc', $0);
+   die;
+}
+
+if (!defined $sequencefile) {
+    die($USAGE . "\nPlease specify an input filename.\n");
+}
+
+sub longestORF {
+   my $best=0;
+   my ($bests,$beste,$beststrand)=(-1,-1,0);
+   my $bestorf="";
+
+   my $relaxed=$_[1];
+   my $dna=Bio::Seq->new(-seq => $_[0]);
+   my %strand=('+'=>$dna->seq,
+               '-'=>$dna->revcom->seq);
+
+   foreach my $direction (keys %strand) {
+      my @starts=();
+      my @ends=();
+      if ($relaxed) {
+         for (my $frame=0;$frame<3;$frame++) {
+            unless ($strand{$direction}=~m/^.{$frame}(taa|tga|tag)/i) {
+               push @starts,$frame+1;
+            }
+         }
+      }
+      while ($strand{$direction}=~m/(atg)/gi) {
+         push @starts,pos($strand{$direction})-2;
+      }
+
+      while ($strand{$direction}=~m/(taa|tga|tag)/gi) {
+         push @ends,pos($strand{$direction})-2;
+      }
+      push @ends,($dna->length-2,$dna->length-1,$dna->length);
+
+      for my $s (@starts) {
+         for my $e (@ends) {
+            if ($e%3==$s%3 and $e>$s) {
+               if ($e-$s>$best) {
+                  $best=$e-$s;
+                  ($bests,$beste,$beststrand)=($s,$e,$direction);
+                  $bestorf=Bio::Seq->new(-seq=>$strand{$direction})->subseq($s,$e);
+               }
+               last
+            } else {
+               next
+            }
+         }
+      }
+   }
+   return ($best,$bests,$beste,$beststrand,$bestorf);
+}
+
+
+my $seqio = new Bio::SeqIO('-format' => $sequenceformat,
+                           '-file'   => $sequencefile );
+
+my ($length,$start,$end,$direction,$sequence);
+my $count=0;
+my @lengths;
+my $totallength=0;
+
+while (my $dna = $seqio->next_seq) {
+   $count++;
+   ($length,$start,$end,$direction,$sequence)=longestORF($dna->seq,$notstrict);
+   if ($verb) {
+      print $dna->display_id," ",$dna->desc,": "; 
+      print "$length, $start, $end ($direction)\n$sequence\n\n",Bio::Seq->new(-seq=>$sequence)->translate->seq,"\n\n--\n\n";
+   }
+   $totallength+=$length;
+   $lengths[$length/3]++;
+}
+
+print "Average ORF length: ", $totallength/$count,"\n\n";
+
+print "Length distribution is:\n";
+
+if ($graph) {
+   my $length;
+   my $maxlength=0;
+   for ($length=0;$length<@lengths;$length++) {
+      $lengths[$length]=0 unless $lengths[$length];
+      $maxlength=$lengths[$length] if ($lengths[$length]>$maxlength);
+   }
+   for ($length=0;$length<@lengths;$length++) {
+      print $length*3,"\t",$lengths[$length],"\t|";
+      print "#"x(($lengths[$length])*$printwidth/$maxlength);
+      print "\n";
+   }
+} else {
+   for ($length=0;$length<@lengths;$length++) {
+      print $length*3,"\t",($lengths[$length]or"0"),"\n";
+   }
+}
+
+__END__
+
+=head1 NAME
+
+longorf - perl script to find the longest ORF of a sequence
+
+=head1 SYNOPSIS
+
+% longorf [-h] [-n] [-v] [-g] [-w printwidth] [-f seqformat] -i seqfile
+
+=head1 DESCRIPTION
+
+This script will examine a set of nucleotide sequences and determine
+the longest ORF in each sequence. ORFs may start at the canonical ATG
+or at the beginning of the sequence if the notstrict option is chosen.
+The script will output a list of the longest ORF lengths, starts, ends
+and strands with the ORF and amino acid sequence if the verbose option
+is chosen. A histogram of the longest ORFs in the input set may be
+printed by choosing the graph option.
+
+=head1 FEEDBACK
+
+This script is not supported by anyone, but requests can be made to the
+author.
+
+=head1 AUTHOR - Dan Kortschak <kortschak at rsbs.anu.spanner.edu.au>
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/longorf.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/make_primers.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/make_primers.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/make_primers.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,169 @@
+#!/usr/bin/perl -w
+# $Id: make_primers.pl,v 1.1 2003/07/07 18:20:58 bosborne Exp $
+# Author: cckim at stanford.edu
+
+# Description: This program designs primers for constructing knockouts
+# of genes by transformation of PCR products (ref: Datsenko & Wanner,
+# PNAS 2000).  A tab-delimtied file containing ORF START STOP is read,
+# and primers flanking the start & stop coordinates are designed based
+# on the user-designated sequence file.  In addition, primers flanking
+# the knockout regions are chosen for PCR screening purposes once the
+# knockout is generated.  The script uses Bioperl in order to
+# determine the primer sequences, which requires getting subsequences
+# and reverse complementing some of the objects.
+
+# make_primers.pl
+# Purpose: Design primers for the Wanner method of PCR product-based knockouts
+# Input: FASTA sequence file, tab-delimited coordinates file
+# Output: Primer output file
+# July 4, 2001
+# Charles C. Kim
+
+###########
+# MODULES #
+###########
+use Bio::Seq;
+use Getopt::Std;
+
+#############
+# VARIABLES #
+#############
+$upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
+$downgap = 0; # the number of nucleotides downstream of the 3' end to include
+              # in the deletion
+$oligolength = 40; # the length of the homologous region on each primer
+$seqfile = '';   # don't specify these filenames unless you want to run
+$coordfile = ''; # the program on these filenames exclusively
+$outfile = '';   #
+%fiveprime_primers = (
+		      "P1" => "GTGTAGGCTGGAGCTGCTTC",
+		      );
+%threeprime_primers = (
+		       "P2" => "CATATGAATATCCTCCTTAG",
+		       "P4" => "ATTCCGGGGATCCGTCGACC",
+		       );
+
+#########
+# FILES #
+#########
+getopts('s:c:o:');  # sequence file, coordinates file, output file
+
+$seqfile = $opt_s if $opt_s;
+$coordfile = $opt_c if $opt_c;
+$outfile = $opt_o if $opt_o;
+
+&open_readfile(*SEQFILE, 'sequence', $seqfile);
+&open_readfile(*COORDFILE, 'coordinate', $coordfile);
+&open_writefile(*PRIMERFILE, 'output', $outfile);
+
+########
+# MAIN #
+########
+
+$seq = '';
+$count = 0;
+while (<SEQFILE>) {
+    if (/>/) {
+	$count++;
+	if ($count > 1) {
+	    die "More than one sequence present in the input file\n";
+	}
+	next;
+    }
+    chomp($_);
+    $_ =~ tr/gatc/GATC/;
+    $seq .= $_;
+}
+close SEQFILE;
+
+$seq = Bio::Seq-> new('-seq'=>$seq );
+
+while (<COORDFILE>) {
+    chomp($_);
+    next if !$_;
+    (my $name, my $start, my $stop) = split(/\t/, $_);
+    if ($start < $stop) {
+	$upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
+	$downprimer = $seq->subseq($stop+1+$downgap,$stop+$oligolength+$downgap);
+	$downprimer = Bio::Seq->new('-seq'=>$downprimer);
+	$downprimer = $downprimer->revcom();
+	$downprimer = $downprimer->seq();
+	$uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
+	$downpcr = $seq->subseq($stop+1+$downgap+$oligolength,$stop+$oligolength+$downgap+20);
+	$downpcr = Bio::Seq->new('-seq'=>$downpcr);
+	$downpcr = $downpcr->revcom();
+	$downpcr = $downpcr->seq();
+    }
+    elsif ($start > $stop) {
+	$upprimer = $seq->subseq($start+$upgap+1,$start+$oligolength+$upgap);
+	$downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
+	$upprimer = Bio::Seq->new('-seq'=>$upprimer);
+	$upprimer = $upprimer->revcom();
+	$upprimer = $upprimer->seq();
+	$uppcr = $seq->subseq($start+$oligolength+$upgap+1,$start+$oligolength+$upgap+20);
+	$downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
+	$uppcr = Bio::Seq->new('-seq'=>$uppcr);
+	$uppcr = $uppcr->revcom();
+	$uppcr = $uppcr->seq();
+    }
+    else { die "Problem with start and stop coordinates\n"; }
+    print PRIMERFILE "$name\n";
+    print PRIMERFILE "5'pcr\t$uppcr\n";
+    print PRIMERFILE "3'pcr\t$downpcr\n";
+    print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop)+121," bp\n";
+    foreach $entry (sort keys %fiveprime_primers) {
+	print PRIMERFILE "5'+$entry\t$upprimer$fiveprime_primers{$entry}\n";
+    }
+    foreach $entry (sort keys %threeprime_primers) {
+	print PRIMERFILE "3'+$entry\t$downprimer$threeprime_primers{$entry}\n";
+    }
+    print PRIMERFILE "\n";
+    $upprimer = '';
+    $downprimer = '';
+    $uppcr = '';
+    $downpcr = '';
+}
+
+
+###############
+# SUBROUTINES #
+###############
+
+sub open_readfile {
+    my $filehandle = $_[0];
+    my $filetype = $_[1] if $_[1];
+    my $filename = $_[2] if $_[2];
+    unless ($filename) {
+	print "Enter $filetype filename: ";
+	chomp ($filename=<STDIN>);
+    }
+    unless (-e $filename) { die "$filename not found\n"; }
+    open($filehandle,$filename) or die "Couldn't open $filename\n";
+    $filehandle = '';
+    $filetype = '';
+    $filename = '';
+}
+
+sub open_writefile {
+    my $filehandle = $_[0];
+    my $filetype = $_[1] if $_[1];
+    my $filename = $_[2] if $_[2];
+    unless ($filename) {
+	print "Enter $filetype filename: ";
+	chomp ($filename=<STDIN>);
+    }
+    if (-e $filename) {
+	print "$filename already exists!  Overwrite (Y/N)? ";
+	chomp ($_ = <STDIN>);
+	while (/[^yn]/i) {
+	    print 'Y or N, please: ';
+	    chomp ($_ = <STDIN>);
+	}
+	if (/n/i) { die "$filename not overwritten.\n"; }
+	else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
+    }
+    else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
+    $filehandle = '';
+    $filetype = '';
+    $filename = '';
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/make_primers.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/popgen/parse_calc_stats.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/popgen/parse_calc_stats.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/popgen/parse_calc_stats.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,102 @@
+#!/usr/bin/perl -w
+# Author: Jason Stajich, jason at bioperl.org
+# $Id: parse_calc_stats.pl,v 1.1 2003/07/29 02:50:08 jason Exp $
+# $Revision: 1.1 $
+
+use strict;
+
+use Bio::PopGen::IO;
+use Bio::PopGen::Statistics;
+use Bio::PopGen::Population;
+
+my $io = new Bio::PopGen::IO(-format => 'prettybase',
+			     # the Bio::Root::IO->catfile is only
+			     # to make file access platform independent
+			     -file   => Bio::Root::IO->catfile
+			     (qw( t data popstats.prettybase)));
+
+# This is an example of how to read in data from Bio::PopGen::IO
+# We're going to make 2 lists, @outgroup, @ingroup
+# @outgroup is a single individual which is named 'out'
+# @ingroup is the set of individuals we are testing
+my (@ingroup, at outgroup);
+while( my $ind = $io->next_individual ) {
+    if($ind->unique_id =~ /out/) {
+	push @outgroup, $ind;
+    } else { 
+	push @ingroup, $ind;	
+    }
+}
+
+# We'll get the names of all the markers (or sites)
+# that this individual has genotypes for
+my @marker_names = $ingroup[0]->get_marker_names();
+
+# the number of sites is the same as the number of markers
+# we assume that all the individuals have the same number of sites
+# or that this data is 'aligned' if these were derived from a 
+# multiple sequence alignment
+my $sitecount = scalar @marker_names;
+
+foreach my $ind ( @ingroup ) {
+    # here let's print out the individual name and all their alleles
+    # for all the markers
+    # like this
+    # Name: INDIVIDUALNAME
+    #      A1,A2 B1,B2,...
+    print "Name: ", $ind->unique_id,"\n";
+    print "\t";
+    foreach my $marker ( @marker_names ) {
+	for my $genotype ( $ind->get_Genotypes($marker) ) {
+	    my @alleles = $genotype->get_Alleles();
+	    # In this example these are actually single alleles anyways...
+	    print join(",", @alleles), " ";
+	}
+    }
+    print "\n";
+    
+    # There is a more compact way to write that
+    print "Name: ", $ind->unique_id,
+          "\n\t", join(" ", map { join(",",$_->get_Alleles) } 
+		          map { $ind->get_Genotypes($_) } @marker_names),"\n";
+    print "--\n";
+}
+
+# We can compute some statistics about these individuals
+# (underlying assumption is that they are unrelated...)
+
+print "Pi: ",Bio::PopGen::Statistics->pi(\@ingroup), "\n";
+print "Theta: ",Bio::PopGen::Statistics->theta(\@ingroup), "\n";
+
+# we can also treat them like a population
+my $ingroup_pop = new Bio::PopGen::Population(-individuals => \@ingroup);
+
+print "Pi: ",Bio::PopGen::Statistics->pi($ingroup_pop), "\n";
+print "Theta: ",Bio::PopGen::Statistics->theta($ingroup_pop), "\n";
+
+
+
+
+
+# You can also simulate individuals from a coalescent 
+use Bio::PopGen::Simulation::Coalescent;
+
+my $ssize = 5;
+my $sim = new Bio::PopGen::Simulation::Coalescent(-sample_size => $ssize);
+my $tree = $sim->next_tree;
+my $mutcount = 100;
+$sim->add_Mutations($tree, $mutcount);
+
+# The leaves are the simulated individuals
+my @leaves = $tree->get_leaf_nodes;
+
+# We can use the Stats module either like Bio::PopGen::Statistics->XXX
+# or like this:
+my $stats = new Bio::PopGen::Statistics;
+# $stats->verbose(1);
+print "Coalescent pi: ", $stats->pi(\@leaves), "\n";
+print "Coalescent theta: ", $stats->theta(\@leaves), "\n";
+my $coalescent_pop = new Bio::PopGen::Population(-individuals => \@leaves);
+
+print "Coalescent pi: ", $stats->pi($coalescent_pop), "\n";
+print "Coalescent theta: ", $stats->theta($coalescent_pop), "\n";

Added: trunk/packages/bioperl/branches/upstream/current/examples/rev_and_trans.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/rev_and_trans.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/rev_and_trans.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+
+# PROGRAM  : rev_and_trans.pl
+# PURPOSE  : Simple driver for Bio::Seq revcom and translate
+# AUTHOR   : Ewan Birney birney at sanger.ac.uk 
+# CREATED  : Tue Oct 27 1998
+# REVISION : $Id: rev_and_trans.pl,v 1.5 2003/02/25 09:14:51 bosborne Exp $
+#
+# INSTALLATION
+#    If you have installed bioperl using the standard
+#    makefile system everything should be fine and 
+#    dandy.
+#
+#    if not edit the use lib "...." line to point the directory
+#    containing your Bioperl modules.
+#
+
+
+use Bio::Seq;
+use Bio::SeqIO;
+
+# new sequence from raw memory...
+# it is *very* important to get the type right so it
+# is translated correctly.
+
+$seq = Bio::Seq->new ( -id => "myseq",
+		      -seq => "CGCCGAAGAAGCATCGTTAAAGTCTCTCTTCACCCTGCCGTCATGTCTAAGTCAGAGTCTCCT",
+		      -type => 'Dna');
+
+$seqout = Bio::SeqIO->new('-format' => 'fasta', -fh => \*STDOUT);
+
+# make a reverse complement sequence
+
+$rev = $seq->revcom();
+
+# the actual sequence is here
+
+$actual_bases = $rev->seq();
+
+print "Reversed sequence as a string is [$actual_bases]\n";
+
+# we could also write it as fasta formatted output
+
+$seqout->write_seq($rev);
+
+# make a translation
+
+$trans = $seq->translate();
+
+print "Translated sequence!\n";
+
+$seqout->write_seq($trans);
+

Added: trunk/packages/bioperl/branches/upstream/current/examples/revcom_dir.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/revcom_dir.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/revcom_dir.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,79 @@
+#!/usr/bin/perl -w
+#
+################################################################################
+#11-17-2001
+#Jianwen Fang (jwfang1999 at yahoo.com)
+#
+#THis program returns reverse complement sequences of all sequences in the current directory
+#and save them in the same directory, using the same name with extension ".rev"
+###############################################################################
+
+
+use strict;
+use Bio::Seq;
+use Bio::SeqIO;
+
+my @files = ();
+my $folder = '.';
+my $inputFormat;
+my $outputFormat;
+my $numSeq;
+
+   #Fasta       FASTA format
+   #EMBL        EMBL format
+   #GenBank     GenBank format
+   #GCG         GCG format
+   #raw         Raw format (one sequence per line, no ID)
+
+my @format = ('Fasta', 'EMBL', 'GenBank', 'GCG', 'Raw');
+
+print("\nWhat is the format of the original sequence files?\n");
+print("type 0 for Fasta; 1 for EMBL; 2 for GenBank; 3 for GCG; 4 for Raw\n");
+$inputFormat = <STDIN>;
+chomp ($inputFormat);
+
+print("\nWhat is the format of the reverse complement sequence files you want?\n");
+print("type 0 for Fasta; 1 for EMBL; 2 for GenBank; 3 for GCG; 4 for Raw\n");
+$outputFormat = <STDIN>;
+chomp ($outputFormat);
+
+unless(opendir(FOLDER, $folder))
+{
+	print "cannot open folder $folder!\n";
+	exit;
+}
+	
+ at files = grep(!/^\.\.?$/, readdir(FOLDER));
+
+foreach my $file (@files)
+	{
+	   if($file =~ /seq/i)
+	    {
+		    getRevcom($file);
+	        $numSeq++;
+	    }
+	}
+	
+print "$numSeq reverse complement sequences have been saved in current directory\n";
+exit;
+
+############################################################################
+#subroutine getRevcom take an backward sequence file name(should with .seq extension) as parameter
+#return its revcom sequence using the same name with the extension replaced with rev
+############################################################################
+sub getRevcom
+{
+	my $seqFile = $_[0];
+	my $in = Bio::SeqIO->new('-file'=>$seqFile, '-format'=>$format[$inputFormat]);
+	my $seq = $in->next_seq();
+	my $revcomSeq = $seq->revcom();
+	my @outSeqFile = split (/\./, $seqFile);
+	pop @outSeqFile;
+	push(@outSeqFile, 'rev');
+	my $outSeqFile = join('.', @outSeqFile);
+	print "$outSeqFile\n";
+	my $out = Bio::SeqIO->new('-file'=>">$outSeqFile", '-format'=>$format[$outputFormat]);
+	$out->write_seq($revcomSeq);
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/revcom_dir.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,192 @@
+README for Bioperl examples/root 
+
+This directory contains some sample scripts and modules that
+illustrate the use of the Bio::Root::* modules. Currently, these
+example scripts focus on how exception handling. 
+
+Here are some short descriptions of the examples/root scripts:
+
+Script            Description
+--------------    ----------------------------------------
+exceptions1.pl    How to throw and catch Error.pm objects
+exceptions2.pl    How to throw Error.pm objects via Bio::Root::Root
+exceptions3.pl    Illustrates inheritance between Error.pm types
+exceptions4.pl    Shows what happens when Error.pm isn't installed
+
+These demo scripts should be executed within the
+examples/root directory of the Bioperl distribution.
+
+
+Using Error.pm for Exception Handling
+--------------------------------------
+
+The Bio::Root::Root module interfaces with Graham Barr's Error.pm.
+Error.pm provides a handy way to create, throw, and catch exceptions
+as objects. Error.pm is quite convenient and easy to use and adds a
+level of control for managing errors within your Perl code using
+familiar object-oriented, try-catch-finally semantics. You can define
+subclasses of Error.pm representing particular types of exceptions,
+and you can define catch blocks to handle these types of exceptions.
+
+This has distinct advantages over simply catching any and all errors
+with an eval{} block, as is currently done in Bioperl. Strongly typed
+exception objects make it easy to write appropriate handlers. It also
+makes you code easier to understand because it's clear what type of
+things can/did go wrong.
+
+Throwing exceptions that are Error.pm-compliant is a little more work
+than throwing them the usual Bioperl way. Here's an example:
+
+Using Error.pm-compliant syntax:
+
+  if( !$feat->isa("Bio::SeqFeatureI") ) {
+      $self->throw(-class => 'Bio::Root::BadParameter',
+                   -text  =>"$feat is not a SeqFeatureI and that's what we expect.",
+                   -value => $feat);
+  }
+
+Not using Error.pm-compliant syntax:
+
+  if( !$feat->isa("Bio::SeqFeatureI") ) {
+      $self->throw("$feat is not a SeqFeatureI and that's what we expect.");
+  }
+
+The advantage of using the Error.pm-compliant syntax is that, even if
+Error.pm isn't installed, the exception message that gets thrown will
+contain the name of the class of the exception. This provides a more
+informative description of what went wrong.
+
+In the Error.pm-compliant case above, the exception string starts with:
+
+    ------------- EXCEPTION: Bio::Root::BadParameter -------------
+
+Compare this to the non-Error.pm-compliant exception string:
+
+    -------------------- EXCEPTION --------------------
+
+There are a variety of exception classes that are declared in
+Bio::Root::Exception for common types of error conditions:
+
+    Bio::Root::Exception
+    Bio::Root::NotImplemented
+    Bio::Root::IOException
+    Bio::Root::FileOpenException
+    Bio::Root::SystemException
+    Bio::Root::BadParameter
+    Bio::Root::OutOfRange
+    Bio::Root::NoSuchThing
+
+Feel free to use these, or subclass from them to derive more specific
+classes of exceptions. For more information about these types of
+exceptions, see perldoc Bio::Root::Exception.
+
+Error.pm is available through CPAN and I encourage Bioperl users and
+developers to install it and experiment with it.
+
+
+Bio::Root::Exception.pm
+-----------------------
+
+The Bio::Root::Exception.pm module contains a number of Error.pm
+subclasses representing common types of errors. If you want to throw
+an exception within your Bioperl module that doesn't correspond to any
+of the ones defined in Bio::Root::Exception, feel free to define a new
+one, but be sure it inherits from Bio::Root::Exception or one of its
+subclasses. This will allow anyone to write a handler for any type of
+Bioperl exception.
+
+Defining a new type of exception can be done quite simply. All you
+need to do is to specify the @ISA array for your new type, as in:
+
+    @Bio::Root::Exception::MyBad::ISA = qw( Bio::Root::Exception );
+
+If you want to override any of the available methods or add new ones,
+you'll have to provide a package statement and the appropriate
+method definitions.
+
+Programming tip: Be careful not to use exceptions as your primary
+means of flow control within your code. Throwing and handling
+exceptions come with some execution overhead. Also, such excessive use
+of exceptions can make your logic hard to follow.
+
+
+Bio::Root::RootI.pm and Bio::Root::Root.pm 
+-------------------------------------------
+
+The modules in the lib directory also demonstrate the use of the Bioperl
+modules Bio::Root::RootI and Bio::Root::Root. RootI.pm should be used
+as the base class for any Bioperl module that specifies an
+interface. It simplifies the process of writing virtual
+methods. Root.pm implements RootI.pm should be used as a base class
+for any Bioperl module that specifies a concrete object.
+
+The module TestInterface.pm demonstrates how to use
+Bio::Root::RootI.pm. The module TestObject.pm demonstrates how to use
+Bio::Root::Root.pm.
+
+Bio::Root::RootI defines a method called "throw_not_implemented()"
+that will throw a Bio::Root::NotImplemented exception. This is useful
+for ensuring that an implementing class has implemented all
+methods. Any method within a Bio::Root::RootI subclass can call
+throw_not_implemented() to indicate that a method has not been
+implemented. Implementations of the interface must implement the
+method or an exception will result when someone tries to use it.
+
+Note that Bio::Root::Root can make use of Error.pm if available, but
+Error.pm is not required. 
+
+
+Bio::Root::Root::throw() with Error.pm
+---------------------------------------
+
+Bio::Root::Root can determine if Error.pm is available and if so, can
+make use of it when Bio::Root::Root::throw() is called. For a demo,
+see test2.pl.
+
+
+Real-Life Examples
+------------------
+
+For additional examples of how to make use of the Error.pm-related capabilities
+of Bio::Root::Root.pm, I created new versions of Bio::SeqI.pm,
+Bio::Seq.pm, Bio::PrimarySeqI.pm, and Bio::PrimarySeq.pm within the
+lib/Bio subdirectory. This conversion is pretty straightforward and could
+be done on the other Bioperl modules without too much effort.
+
+TODO: Update the lib/Bio modules based on the latest versions in bioperl-live.
+
+
+Using Error.pm's try{} and catch{} within Bioperl Modules
+----------------------------------------------------------
+
+For developers, using Error.pm's try{} and catch{} blocks within
+Bioperl modules themselves could come in handy. But doing so would add
+an external dependency for Error.pm, which is not part of the standard
+Perl distribution. So at this stage, it's best to stick with just
+using Error.pm's throw() method (via Bio::Root::Root) and leave the
+try{} and catch{} blocks for use only within your scripts.
+
+If you really want to use try{} and catch{} within your module and
+still want to be capable of running when Error.pm isn't available, you
+can check $Bio::Root::Root::ERRORLOADED variable.
+
+If we really want to incorporate it within Bioperl, a reasonable
+solution would be to distribute Error.pm with Bioperl. 
+
+So why use Error.pm instead of some other utility? Well, Perl 6 will
+most likely include some form of structured exception handling akin to
+that provided by Error.pm (see these RFC's:
+http://dev.perl.org/rfc/63.pod and http://dev.perl.org/rfc/88.pod).
+So it will probably be easy to convert Error.pm-based exception handling
+to whatever is adopted for Perl 6.
+
+(Side note for any CORBA folks out there: Error.pm is used in some
+other CPAN modules, notably CORBA::MICO. Thus, using Error.pm within
+Bioperl allows consistent exception handling methodology when working
+with such modules and Bioperl together.)
+
+--
+Steve Chervitz <sac at bioperl.org>
+21 April 2001
+Updated 6 March 2003
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/root/README
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions1.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions1.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions1.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,166 @@
+#!/usr/bin/env perl
+
+# A simple tester script for demonstrating how to throw and catch
+# Error.pm objects. It also shows how to define new types of
+# Error.pm-based objects. 
+#
+# It relies on the tester modules TestObject.pm and TestInterface.pm
+# which you should also look at.
+#
+# Note that Bio::Root::NotImplemented is a subclass of Error.pm 
+# and is defined in Bio::Root::Exception.pm
+#
+# This code requires Graham Barr's Error.pm module available from CPAN.
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+#
+# $Id: exceptions1.pl,v 1.1 2003/03/07 19:25:19 sac Exp $
+
+use strict;
+use lib qw(lib/ ../../);
+use Error qw(:try);
+use TestObject;
+use Getopt::Long;
+
+# Command-line options:
+my $eg = 0;        # which example to run (a number 1-4)
+my $help = 0;      # print usage info
+
+# $Error::Debug is set to true by default in Bio::Root::Interface.
+$Error::Debug = 1; # enables verbose stack trace 
+
+GetOptions( "debug!" => \$Error::Debug,
+	    "eg=s"   => \$eg,    
+	    "h"      => \$help   
+	  ); 
+
+my $options = << "OPTS";
+      -eg  1|2|3|4   Run a particular example
+      -nodebug       Deactivate verbose stacktrace
+      -h             Print this usage
+OPTS
+
+(!$eg || $help) and die "Usage: $0 -eg 1|2|3|4 [-nodebug] [-h]\nOptions:\n$options";
+
+print $Error::Debug ? "Try a -nodebug option to supress stack trace." : "Verbose stacktrace off.";
+print "\n\n";
+
+# Set up a tester object.
+my $test = TestObject->new();
+$test->data('Eeny meeny miney moe.');
+
+try {
+
+    test_notimplemented( $test ) if $eg == 1;
+
+    test_custom_error( $test ) if $eg == 2;
+
+    test_simple_error() if $eg == 3;
+
+    # This subroutine doesn't even exist. But because it occurs within a try block,
+    # the Error module will create a Error::Simple to capture it. Handy eh?
+    if(  $eg == 4 ) {
+	print "Test #4: Calling an undefined subroutine.\n";
+	test_foobar();
+    }
+
+    # We shouldn't see this stuff.
+    print "----\n";
+    print "----\n";
+    print "Some other code within the try block after the last throw...\n";
+    print "----\n";
+    print "----\n";
+}
+
+# Multiple catch blocks to handle different types of errors:
+
+catch Bio::Root::NotImplemented with {
+    my $error = shift;
+    print "\nCaught a Bio::Root::NotImplemented.\n",
+      "  file  : ", $error->file, "\n",
+      "  line  : ", $error->line, "\n",
+      "  text  : ", $error->text, "\n",
+      "  value : ", $error->value, "\n",
+      "  object: ", ref($error->object), "\n";
+
+    print "\nstacktrace:\n", $error->stacktrace, "\n";
+
+    print "\nstringify:\n$error\n";
+    # The above line is equivalent to this:
+    #print "\nstringify:\n", $error->stringify, "\n";
+}
+
+catch Bio::TestException with {
+    # Since we know what type of error we're getting,
+    # we can extract more information about the offending object
+    # which is retrievable from the error object.
+    my $error = shift;
+    print "\nCaught a Bio::TestException.\n",
+      "  file  : ", $error->file, "\n",
+      "  line  : ", $error->line, "\n",
+      "  text  : ", $error->text, "\n",
+      "  value : ", $error->value, "\n",
+      "  object: ", ref($error->object), "\n",
+      "  data  : ", $error->object->data, "\n";
+
+    print "\nstacktrace:\n", $error->stacktrace, "\n";
+    print "\nstringify:\n", $error->stringify, "\n";
+
+}
+
+otherwise {
+    # This is a catch-all handler for any type of error not handled above.
+    my $error = shift;
+    print "\nCaught an other type of error: ", ref($error), "\n",
+      "  file  : ", $error->file, "\n",
+      "  line  : ", $error->line, "\n",
+      "  text  : ", $error->text, "\n",
+      "  value : ", $error->value, "\n",
+      "  object: ", ref($error->object), "\n";
+
+#    print "\nstack_trace_dump:\n", $error->stack_trace_dump(), "\n";
+
+    print "\nstacktrace:\n", $error->stacktrace, "\n";
+
+    print "\nstringify:\n$error\n";
+
+};  # This semicolon is essential.
+
+print "\nDone $0\n";
+
+sub test_notimplemented {
+
+    my $test = shift;
+    # This demonstrates what will happen if a method defined in an interface 
+    # that is not implemented in the implementating object.
+
+    print "Test #1: Inducing a Bio::Root::NotImplemented exception from TestObject\n";
+
+    $test->foo();
+}
+
+
+sub test_custom_error {
+
+    my $test = shift;
+
+    # TestObject::bar() deliberately throws a Bio::TestException, 
+    # which is defined in TestObject.pm
+
+    print "Test #2: Throwing a Bio::TestException exception from TestObject\n";
+
+    $test->bar;
+
+}
+
+
+sub test_simple_error {
+
+    # Error::Simple comes with Error.pm and can have only a string and a value.
+
+    print "Test #3: Throwing a Error::Simple object\n";
+
+    throw Error::Simple( "A simple error", 42 );
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions1.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions2.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions2.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions2.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,177 @@
+#!/usr/bin/env perl
+
+# This shows how Error.pm-based objects can be thrown 
+# by Bio::Root::Root::throw() when Error.pm is available.
+# When Error.pm isn't available, Bio::Root::Root::throw() 
+# works as usual.
+#
+# It also demonstrates what happens when you use an outer eval{}
+# instead of a try{} to trap thrown Error.pm-based exceptions. 
+# The behavior is the same as when Error.pm is not used.
+# This is important for backward compatibility.
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+#
+# $Id: exceptions2.pl,v 1.1 2003/03/07 19:25:19 sac Exp $
+
+use strict;
+
+use lib qw(lib/ ../../);
+
+# Uncomment this line to force Bio::Root::Root::throw() to 
+# not use Error.pm even if it's available.
+# Some of the tests in this script will be skipped .
+#BEGIN { $main::DONT_USE_ERROR = 1; }
+
+use Bio::Root::Root;
+#use Bio::Root::Exception;  # Not necessary since Bio::Root::Root uses it.
+use Error qw(:try);
+
+my $foo = Bio::Root::Root->new();
+
+if (!$main::DONT_USE_ERROR) {
+    try {
+        # This is the new, fancier way to handle exceptions. 
+        # You must have Error.pm to do this (tarball included in this dir).
+        
+        print "[1] Throwing Error within try block via call to Bio::Root::Root::throw()\n";
+        $foo->throw( -class => 'Bio::Root::Exception',
+                     -text  => "Oopsie!",
+                     -value => "123" 
+                   );
+    }
+    
+    catch Bio::Root::Exception with {
+        my $err = shift;
+        print "[1] Caught Bio::Root::Exception:\n$err";
+
+    }
+
+    otherwise {
+        my $err = shift;
+        print "[1] Caught other Error: ", ref($err), "\n$err";
+    };
+
+    
+    print "\n\n";
+}
+
+eval {
+
+    # This example demonstrates the traditional method for throwing
+    # an exception using Bio::Root::Root->throw('string').
+    # Notice how an exception of type Bio::Root::Exception is created.
+
+    print "[2] Calling Bio::Root::Root->throw('string') within an eval{}\n";
+    $foo->throw("Error message string.");
+
+};
+
+if($@) {
+    print "[2] Caught eval{}-based exception: ", ref($@), "\n$@";
+}
+else {
+    print "[2] Nothing to catch.\n";
+}
+
+
+
+print "\n\n";
+
+eval {
+
+    # This example shows that calling Error::throw directly within
+    # an eval{} doesn't lead to a true value in $@ if
+    # the error lacks a value. 
+
+    print "[3] Attempting to throw a valueless Error within an eval{} block\n    (this should fail to be caught by Error.pm v0.13 but is caught by v0.14 and greater).\n";
+
+    if( $ENV{OSTYPE} =~ /cygwin/ ) {
+        die "[3] This causes a segmentation fault with cygwin perl! Skipping.\n";
+    }
+
+    throw Error::Simple ("A simple error.");
+
+};
+
+if($@) {
+    print "[3] Caught eval{}-based exception: ", ref($@), "\n$@\n";
+}
+else {
+    print "[3] Nothing to catch.\n";
+}
+
+
+print "\n\n";
+
+eval {
+
+    # This example shows that calling Error::throw directly within
+    # an eval{} *does* lead to a true value in $@ if the error 
+    # contains a non-zero value. 
+
+    print "[4] Attempting to throw a valued Error within an eval{} block.\n";
+
+    throw Error::Simple ("A simple error.", 42);
+
+};
+
+if($@) {
+    print "[4] Caught eval{}-based exception: ", ref($@), "\n$@\n";
+}
+else {
+    print "[4] Nothing to catch.\n";
+}
+
+print "\n\n";
+
+if (!$main::DONT_USE_ERROR) {
+    eval {
+
+        # This example shows what happens if we try to create a
+        # Bio::Root::IOException (a subclass of Bio::Root::Exception)
+        # with a zero value. Bio::Root::Exception::new() catches this
+        # faux pas and substitutes a value that will register as true in if($@).
+
+        print "[5] Attempting to throw a zero-valued Bio::Root::IOException\n    within an eval{} block.\n";
+
+        throw Bio::Root::IOException ( -text =>"An error with zero value.",
+                                   -value => 0);
+
+    };
+
+    if($@) {
+        print "[5] Caught eval{}-based zero-valued exception: ", ref($@), "\n$@\n";
+    }
+    else {
+        print "[5] Nothing to catch.\n";
+    }
+    print "\n\n";
+}
+
+
+eval {
+
+    # If Error::throw is called *indirectly* within an eval{}
+    # (i.e., by calling a method which then calls Error::throw),
+    # $@ is defined and it consists of a reference to the Error.pm object.
+
+    print "[6] Attempting to throw Error indirectly within an eval{} block \n    via Bio::Root::Root::throw()\n";
+
+    $foo->throw( -class => 'Bio::Root::Exception',
+                 -text  => "Oopsie!",
+                 -value => "456"
+                );
+
+};
+
+if($@) {
+    print "[6] Caught eval{}-based exception: ", ref($@), "\n$@";
+}
+else {
+    print "[6] Nothing to catch.\n";
+}
+
+print "Done.\n";
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions2.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions3.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions3.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions3.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+
+# This shows that Error objects can be subclassed into more specialized types.
+# Bio::Root::FileOpenException is a subclass of  Bio::Root::IOException. 
+#
+# We can write a generic handler to trap any type of IOException
+# or we could handle FileOpenExceptions explicitly.
+#
+# To demo, run this script without any arguments, then try it with an argument
+# that doesn't correspond to any file on your system (e.g., foobar). 
+# Then try running with a valid file name.
+#
+# This requires Graham Barr's Error.pm module available from CPAN.
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+#
+# $Id: exceptions3.pl,v 1.1 2003/03/07 19:25:19 sac Exp $
+
+use strict;
+use lib qw(lib/ ../../);
+use Error qw(:try);
+use Bio::Root::Exception;
+
+try {
+   print "Starting try block.\n";
+   my $file = shift @ARGV || throw Bio::Root::IOException(-text=>"No file supplied.");
+
+   open ( IN, $file) || throw Bio::Root::FileOpenException(-text=>"Can't open file \"$file\"", -value=> $!); 
+
+   print "Opened file $file\n";
+
+}
+catch Bio::Root::IOException with {
+    # This handler deals with IOException or any of its subclasses.
+    # We could also write a handler with a `catch Bio::Root::FileOpenException'.
+    # Such a handler would appear before this one.
+    my $e = shift;
+    print "Caught IOException:\n\n$e";
+}
+finally {
+     close IN;
+};
+
+print "\nDone.\n";


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions3.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions4.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions4.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions4.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,129 @@
+#!/usr/bin/env perl
+
+# This shows how the examples work when Error.pm isn't installed.
+# It also shows how to supress using Error.pm if it is installed
+# and you don't want to use it for some reason.
+#
+# Here we use the eval{} style exception handling that's currently
+# in vogue trapping Bioperl exceptions.
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+#
+# $Id: exceptions4.pl,v 1.1 2003/03/07 19:25:19 sac Exp $
+
+
+# Setting this variable simulates not having Error.pm installed.
+BEGIN { $DONT_USE_ERROR = 1; }
+
+use strict;
+use lib qw(lib/ ../../);
+use TestObject;
+use Getopt::Long;
+
+# Command-line options:
+my $eg = 0;        # which example to run (a number 1-4)
+my $help = 0;      # print usage info
+$Error::Debug = 1; # enables verbose stack trace 
+
+GetOptions( "debug!" => \$Error::Debug,
+	    "eg=s"   => \$eg,    
+	    "h"      => \$help   
+	  ); 
+
+my $options = << "OPTS";
+      -eg  1|2|3|4   Run a particular example
+      -nodebug       Deactivate verbose stacktrace
+      -h             Print this usage
+OPTS
+
+(!$eg || $help) and die "Usage: $0 -eg 1|2|3|4|5 [-nodebug] [-h]\nOptions:\n$options";
+
+# Set up a tester object.
+my $test = TestObject->new();
+$test->data('Eeny meeny miney moe.');
+
+eval {
+
+    test_notimplemented( $test ) if $eg == 1;
+
+    test_custom_error( $test ) if $eg == 2;
+
+    test_simple_error() if $eg == 3;
+
+    # This subroutine doesn't even exist. But because it occurs within a try block,
+    # the Error module will create a Error::Simple to capture it. Handy eh?
+    if(  $eg == 4 ) {
+	print "Test #4: Calling an undefined subroutine.\n";
+	test_foobar();
+    }
+
+    # Throwing an exception the traditional bioperl way.
+    if(  $eg == 5 ) {
+	print "Test #5: Creating a Bio::Root::Root object and calling throw('string').\n";
+        my $obj = Bio::Root::Root->new();
+        $obj->throw("Throwing string from Bio::Root::Root object.");
+    }
+
+    # We shouldn't see this stuff.
+    print "----\n";
+    print "----\n";
+    print "Some other code within the try block after the last throw...\n";
+    print "----\n";
+    print "----\n";
+};
+
+if($@) {
+    my $error = shift;
+    print "\nAn exception occurred:\n$@\n";
+}
+else {
+    print "\nNo exception occurred\n";
+}
+
+print "\nDone $0\n";
+
+sub test_notimplemented {
+
+    my $test = shift;
+    # This demonstrates what will happen if a method defined in an interface 
+    # that is not implemented in the implementation.
+
+    print "Test #1: Inducing a Bio::Root::NotImplemented exception from TestObject\n";
+
+    $test->foo();
+}    
+
+
+sub test_custom_error {
+
+    my $test = shift;
+
+    # TestObject::bar() deliberately throws a Bio::Root::TestError, 
+    # which is defined in TestObject.pm
+
+    print "Test #2: Throwing a Bio::TestException exception from TestObject\n";
+
+    $test->bar;  
+
+}
+
+
+sub test_simple_error {
+
+    # This example won't work without Error.pm installed.
+    # It shows how setting $DONT_USE_ERROR = 1 
+    # really does simulate the absence of Error.pm.
+    # The exception should report something like:
+    # "Can't locate object method "throw" via package "Error::Simple"
+
+    # Error::Simple comes with Error.pm and can have only a string and a value.
+
+    print "Test #3: Throwing a Error::Simple object\n\n";
+
+    print "This should fail to find object method 'throw' via package 'Error::Simple'\n";
+    print "because Error.pm is not available.\n\n";
+
+    throw Error::Simple( "A simple error", 42 );
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/root/exceptions4.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,867 @@
+# $Id: PrimarySeq.pm,v 1.6.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# bioperl module for Bio::PrimarySeq
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PrimarySeq - Bioperl lightweight Sequence Object
+
+=head1 SYNOPSIS
+
+  # The Bio::SeqIO for file reading, Bio::DB::GenBank for
+  # database reading
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+  use Bio::DB::GenBank;
+
+  #make from memory
+  $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG',
+				   -id  => 'GeneFragment-12',
+				   -accession_number => 'X78121',
+				   -alphabet => 'dna',
+				   -is_circular => 1
+				   );
+  print "Sequence ", $seqobj->id(), " with accession ", 
+    $seqobj->accession_number, "\n";
+
+  # read from file
+  $inputstream = Bio::SeqIO->new(-file => "myseq.fa",-format => 'Fasta');
+  $seqobj = $inputstream->next_seq();
+  print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n";
+
+
+  # to get out parts of the sequence.
+
+  print "Sequence ", $seqobj->id(), " with accession ", 
+    $seqobj->accession_number, " and desc ", $seqobj->desc, "\n";
+
+  $string  = $seqobj->seq();
+  $string2 = $seqobj->subseq(1,40);
+
+
+=head1 DESCRIPTION
+
+PrimarySeq is a lightweight Sequence object, storing little more than
+the sequence, its name, a computer useful unique name. It does not
+contain sequence features or other information.  To have a sequence
+with sequence features you should use the Seq object which uses this
+object - go perldoc Bio::Seq
+
+Although newusers will use Bio::PrimarySeq alot, in general you will
+be using it from the Bio::Seq object. For more information on Bio::Seq
+go perldoc Bio::Seq. For interest you might like to known that
+Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls
+to do with sequence to it (the has-a relationship lets us get out of a
+otherwise nasty cyclical reference in Perl which would leak memory).
+
+Sequence objects are defined by the Bio::PrimarySeqI interface, and this
+object is a pure Perl implementation of the interface (if that's
+gibberish to you, don't worry. The take home message is that this
+object is the bioperl default sequence object, but other people can
+use their own objects as sequences if they so wish). If you are
+interested in wrapping your own objects as compliant Bioperl sequence
+objects, then you should read the Bio::PrimarySeqI documentation
+
+The documenation of this object is a merge of the Bio::PrimarySeq and
+Bio::PrimarySeqI documentation.  This allows all the methods which you can
+call on sequence objects here.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at sanger.ac.uk
+
+Describe contact details here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PrimarySeq;
+use strict;
+
+use base qw(Bio::Root::Root Bio::PrimarySeqI
+	  Bio::IdentifiableI Bio::DescribableI);
+
+#
+# setup the allowed values for alphabet()
+#
+
+my %valid_type = map {$_, 1} qw( dna rna protein );
+
+=head2 new
+
+ Title   : new
+ Usage   : $seq    = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+                                           -id  => 'human_id',
+					   -accession_number => 'AL000012',
+					   );
+
+ Function: Returns a new primary seq object from
+           basic constructors, being a string for the sequence
+           and strings for id and accession_number.
+
+           Note that you can provide an empty sequence string. However, in
+           this case you MUST specify the type of sequence you wish to
+           initialize by the parameter -alphabet. See alphabet() for possible
+           values.
+ Returns : a new Bio::PrimarySeq object
+ Args    : -seq         => sequence string
+           -display_id  => display id of the sequence (locus name) 
+           -accession_number => accession number
+           -primary_id  => primary id (Genbank id)
+           -namespace   => the namespace for the accession
+           -authority   => the authority for the namespace
+           -desc        => description text
+           -alphabet    => sequence type (alphabet) (dna|rna|protein)
+           -id          => alias for display id
+           -is_circular => boolean field for whether or not sequence is circular
+ Throws  : Bio::Root::BadParameter if both -id and -display_id
+           parameters are supplied and they are not the same.
+           You only need to supply one of these parameters.
+           -display_id is preferred and is synonymous with -id.
+
+=cut
+
+
+sub new {
+    my ($class, @args) = @_;
+    my $self = $class->SUPER::new(@args);
+
+    my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid,
+       $desc,$alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) =
+	$self->_rearrange([qw(SEQ
+			      DISPLAY_ID
+			      ACCESSION_NUMBER
+			      PRIMARY_ID
+			      NAMESPACE
+			      AUTHORITY
+			      VERSION
+			      OBJECT_ID
+			      DESC
+			      ALPHABET
+			      ID
+			      IS_CIRCULAR
+			      DIRECT
+			      REF_TO_SEQ
+			      LENGTH
+			      )],
+			  @args);
+    if( defined $id && defined $given_id ) {
+	if( $id ne $given_id ) {
+	    $self->throw(-class => 'Bio::Root::BadParameter',
+                         -text => "Provided conflicting id and display_id constructor arguments. [id=$id] [display_id=$given_id]",
+                         -value => "id=$id, display_id=$given_id");	
+	}
+    }
+    if( defined $given_id ) { $id = $given_id; }
+
+    # let's set the length before the seq -- if there is one, this length is
+    # going to be invalidated
+    defined $len && $self->length($len);
+
+    # if alphabet is provided we set it first, so that it won't be guessed
+    # when the sequence is set
+    $alphabet && $self->alphabet($alphabet);
+    
+    # if there is an alphabet, and direct is passed in, assumme the alphabet
+    # and sequence is ok 
+
+    if( $direct && $ref_to_seq) {
+	$self->{'seq'} = $$ref_to_seq;
+	if( ! $alphabet ) {
+	    $self->_guess_alphabet();
+	} # else it has been set already above
+    } else {
+#	print STDERR "DEBUG: setting sequence to [$seq]\n";
+	# note: the sequence string may be empty
+	$self->seq($seq) if defined($seq);
+    }
+
+    $id          && $self->display_id($id);
+    $acc         && $self->accession_number($acc);
+    defined $pid && $self->primary_id($pid);
+    $desc        && $self->desc($desc);
+    $is_circular && $self->is_circular($is_circular);
+    $ns          && $self->namespace($ns);
+    $auth        && $self->authority($auth);
+    defined($v)  && $self->version($v);
+    defined($oid) && $self->object_id($oid);
+
+    return $self;
+}
+
+sub direct_seq_set {
+    my $obj = shift;
+    return $obj->{'seq'} = shift if @_;
+    return;
+}
+
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string    = $obj->seq()
+ Function: Returns the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard), but you should not rely on this
+ Returns : A scalar
+ Args    : Optionally on set the new value (a string). An optional second
+           argument presets the alphabet (otherwise it will be guessed).
+           Both parameters may also be given in named paramater style
+           with -seq and -alphabet being the names.
+
+=cut
+
+sub seq {
+   my ($obj, at args) = @_;
+
+   if( scalar(@args) == 0 ) {
+       return $obj->{'seq'};
+   }
+
+   my ($value,$alphabet) = @args;
+
+
+   if(@args) {
+       if(defined($value) && (! $obj->validate_seq($value))) {
+	   $obj->throw(-class => 'Bio::Root::BadParameter',
+                       -text => "Attempting to set the sequence to [$value] ".
+		                "which does not look healthy",
+                       -value => $value);
+       }
+       # if a sequence was already set we make sure that we re-adjust the
+       # alphabet, otherwise we skip guessing if alphabet is already set
+       # note: if the new seq is empty or undef, we don't consider that a
+       # change (we wouldn't have anything to guess on anyway)
+       my $is_changed_seq =
+	   exists($obj->{'seq'}) && (CORE::length($value || '') > 0);
+       $obj->{'seq'} = $value;
+       # new alphabet overridden by arguments?
+       if($alphabet) {
+	   # yes, set it no matter what
+	   $obj->alphabet($alphabet);
+       } elsif( # if we changed a previous sequence to a new one
+		$is_changed_seq ||
+		# or if there is no alphabet yet at all
+		(! defined($obj->alphabet()))) {
+	   # we need to guess the (possibly new) alphabet
+	   $obj->_guess_alphabet();
+       } # else (seq not changed and alphabet was defined) do nothing
+       # if the seq is changed, make sure we unset a possibly set length
+       $obj->length(undef) if $is_changed_seq;
+   }
+   return $obj->{'seq'};
+}
+
+=head2 validate_seq
+
+ Title   : validate_seq
+ Usage   : if(! $seq->validate_seq($seq_str) ) {
+                print "sequence $seq_str is not valid for an object of 
+                alphabet ",$seq->alphabet, "\n";
+	   }
+ Function: Validates a given sequence string. A validating sequence string
+           must be accepted by seq(). A string that does not validate will
+           lead to an exception if passed to seq().
+
+           The implementation provided here does not take alphabet() into
+           account. Allowed are all letters (A-Z) and '-','.', '*' and '?'.
+
+ Example :
+ Returns : 1 if the supplied sequence string is valid for the object, and
+           0 otherwise.
+ Args    : The sequence string to be validated.
+
+
+=cut
+
+sub validate_seq {
+    my ($self,$seqstr) = @_;
+    if( ! defined $seqstr ){ $seqstr = $self->seq(); }
+    return 0 unless( defined $seqstr); 
+    if((CORE::length($seqstr) > 0) && ($seqstr !~ /^([A-Za-z\-\.\*\?]+)$/)) {
+	$self->warn("seq doesn't validate, mismatch is " .
+		   ($seqstr =~ /([^A-Za-z\-\.\*\?]+)/g));
+	return 0;
+    }
+    return 1;
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+ Returns : a string
+ Args    : integer for start position
+           integer for end position
+                 OR
+           Bio::LocationI location for subseq (strand honored)
+
+=cut
+
+sub subseq {
+   my ($self,$start,$end,$replace) = @_;
+
+   if( ref($start) && $start->isa('Bio::LocationI') ) {
+       my $loc = $start;
+       $replace = $end; # do we really use this anywhere? scary. HL
+       my $seq = "";
+       foreach my $subloc ($loc->each_Location()) {
+	   my $piece = $self->subseq($subloc->start(),
+				     $subloc->end(), $replace);
+	   if($subloc->strand() < 0) {
+	       $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq();
+	   }
+	   $seq .= $piece;
+       }
+       return $seq;
+   } elsif(  defined  $start && defined $end ) {
+       if( $start > $end ){
+	   $self->throw(-class => 'Bio::Root::BadParameter',
+			-text  => "in subseq, start [$start] has to be ".
+			          "less than end [$end]",
+			-value => "start=$start, end=$end");
+       }
+       if( $start <= 0 ) {
+	   $self->throw(-class => 'Bio::Root::BadParameter',
+			-text=>"start must be positive [start=$start]",
+			-value => "start=$start");
+       }
+       if( $end > $self->length ) {
+	   $self->throw(-class => 'Bio::Root::BadParameter',
+			-text=>"length must be less than the total length ".
+			       "of sequence [start=$start, end=$end] ".
+			       "Total=".$self->length."",
+			-value =>$self->length );
+       }
+
+       # remove one from start, and then length is end-start
+       $start--;
+       if( defined $replace ) {
+	   return substr( $self->seq(), $start, ($end-$start), $replace);
+       } else {
+	   return substr( $self->seq(), $start, ($end-$start));
+       }
+   } else {
+       $self->warn("Incorrect parameters to subseq - must be two integers ".
+		   "or a Bio::LocationI object not ($start,$end)");
+   }
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length();
+ Function: Get the length of the sequence in number of symbols (bases
+           or amino acids).
+
+           You can also set this attribute, even to a number that does
+           not match the length of the sequence string. This is useful
+           if you don''t want to set the sequence too, or if you want
+           to free up memory by unsetting the sequence. In the latter
+           case you could do e.g.
+
+               $seq->length($seq->length);
+               $seq->seq(undef);
+
+           Note that if you set the sequence to a value other than
+           undef at any time, the length attribute will be
+           invalidated, and the length of the sequence string will be
+           reported again. Also, we won''t let you lie about the length.
+
+ Example :
+ Returns : integer representing the length of the sequence.
+ Args    : Optionally, the value on set
+
+=cut
+
+sub length {
+    my $self = shift;
+    my $len = CORE::length($self->seq() || '');
+    
+    if(@_) {
+	my $val = shift;
+	if(defined($val) && $len && ($len != $val)) {
+	    $self->throw(-class=>'Bio::Root::BadParameter',
+			 -text=>"You're trying to lie about the length: ".
+			        "is $len but you say ".$val,
+			 -value=>$val);
+	}
+	$self->{'_seq_length'} = $val;
+    } elsif(defined($self->{'_seq_length'})) {
+	return $self->{'_seq_length'};
+    }
+    return $len;
+}
+
+=head2 display_id
+
+ Title   : display_id or display_name
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, aka the common name of the Sequence object.
+
+           The semantics of this is that it is the most likely string to
+           be used as an identifier of the sequence, and likely to have
+           "human" readability.  The id is equivalent to the ID field of
+           the GenBank/EMBL databanks and the id field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id to
+           embed other information. Bioperl does not use any embedded
+           information in the ID field, and people are encouraged to use
+           other mechanisms (accession field for example, or extending
+           the sequence object) to solve this.
+
+           With the new Bio::DescribeableI interface, display_name aliases
+           to this method.
+
+ Returns : A string
+ Args    : None
+
+
+=cut
+
+sub display_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'display_id'} = $value;
+    }
+    return $obj->{'display_id'};
+
+}
+
+=head2 accession_number
+
+ Title   : accession_number or object_id
+ Usage   : $unique_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should
+           return "unknown".
+
+           [Note this method name is likely to change in 1.3]
+
+           With the new Bio::IdentifiableI interface, this is aliased 
+           to object_id
+
+ Returns : A string
+ Args    : A string (optional) for setting
+
+=cut
+
+sub accession_number {
+    my( $obj, $acc ) = @_;
+
+    if (defined $acc) {
+        $obj->{'accession_number'} = $acc;
+    } else {
+        $acc = $obj->{'accession_number'};
+        $acc = 'unknown' unless defined $acc;
+    }
+    return $acc;
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no natural primary id, this method
+           should return a stringified memory location.
+
+ Returns : A string
+ Args    : A string (optional, for setting)
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   if( ! exists $obj->{'primary_id'} ) {
+       return "$obj";
+   }
+   return $obj->{'primary_id'};
+
+}
+
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the alphabet of sequence, one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no alphabet specified it
+           has to guess.
+ Args    : (for setting) string of 'dna','rna', or 'protein'
+ Throws  : Bio::Root::BadParameter if the supplied value
+           is not a valid type. The offending value is placed within
+           the -value field of the Error object.
+
+=cut
+
+sub alphabet {
+    my ($obj,$value) = @_;
+    if (defined $value) {
+	$value = lc $value;
+	unless ( $valid_type{$value} ) {
+	    $obj->throw(-class => 'Bio::Root::BadParameter',
+                        -text => "Alphabet type '$value' is not a valid type (".
+			join(',', map "'$_'", sort keys %valid_type) .") lowercase",
+                        -value => $value);
+	}
+	$obj->{'alphabet'} = $value;
+    }
+    return $obj->{'alphabet'};
+}
+
+=head2 desc
+
+ Title   : desc or description
+ Usage   : $obj->desc($newval)
+ Function: Get/set description of the sequence.
+
+           description is an alias for this for compliance with the
+           Bio::DescribeableI interface.
+
+ Example :
+ Returns : value of desc (a string)
+ Args    : newvalue (a string or undef, optional)
+
+
+=cut
+
+sub desc{
+    my $self = shift;
+
+    return $self->{'desc'} = shift if @_;
+    return $self->{'desc'};
+}
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   :
+ Function:
+ Example :
+ Returns : true
+ Args    :
+
+
+=cut
+
+sub can_call_new {
+   my ($self) = @_;
+
+   return 1;
+
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: This is mapped on display_id
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub  id {
+   return shift->display_id(@_);
+}
+
+=head1 Methods for Bio::IdentifiableI compliance
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to accession_number().
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->accession_number(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+ Returns : A number
+
+=cut
+
+sub version{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'_version'} = $value;
+    }
+    return $self->{'_version'};
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for  
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+
+=cut
+
+sub authority {
+    my ($obj,$value) = @_;
+    if( defined $value) {
+	$obj->{'authority'} = $value;
+    }
+    return $obj->{'authority'};
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection 
+
+ Returns : A scalar
+
+
+=cut
+
+sub namespace{
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'namespace'} = $value;
+    }
+    return $self->{'namespace'} || "";
+}
+
+=head1 Methods for Bio::DescribableI compliance
+
+This comprises of display_name and description.
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking 
+           this is a good idea)
+
+           This is aliased to display_id().
+ Returns : A scalar
+
+=cut
+
+sub display_name {
+    return shift->display_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a 
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+
+           This is aliased to desc().
+ Returns : A scalar
+
+=cut
+
+sub description {
+    return shift->desc(@_);
+}
+
+=head1 Methods Inherited from Bio::PrimarySeqI
+
+These methods are available on Bio::PrimarySeq, although they are
+actually implemented on Bio::PrimarySeqI
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::SeqI implementing object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of
+           "Sequence is a protein. Cannot revcom"
+
+           The id is the same id as the orginal sequence, and the
+           accession number is also indentical. If someone wants to
+           track that this sequence has be reversed, it needs to
+           define its own extensions
+
+           To do an inplace edit of an object you can go:
+
+           $seqobj = $seqobj->revcom();
+
+           This of course, causes Perl to handle the garbage
+           collection of the old object, but it is roughly speaking as
+           efficient as an inplace edit.
+
+ Returns : A new (fresh) Bio::SeqI object
+ Args    : none
+
+=cut
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+
+ Example :
+ Returns : a fresh Bio::SeqI implementing object
+ Args    :
+
+
+=cut
+
+=head1 Internal methods
+
+These are internal methods to PrimarySeq
+
+=cut
+
+=head2 _guess_alphabet
+
+ Title   : _guess_alphabet
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+ Throws  : Bio::Root::BadParameter if the string obtained from 
+           PrimarySeq::seq() is empty. 
+
+=cut
+
+sub _guess_alphabet {
+   my ($self) = @_;
+   my ($str,$str2,$total,$atgc,$u,$type);
+
+   $str = $self->seq();
+   $str =~ s/\-\.\?//g;
+
+   $total = CORE::length($str);
+   if( $total == 0 ) {
+       $self->throw(-class => 'Bio::Root::BadParameter',
+                    -text => "Got a sequence with no letters in - ".
+		             "cannot guess type [$str]",
+                    -value => $str);
+   }
+   
+   $u = ($str =~ tr/Uu//);
+   $atgc = ($str =~ tr/ATGCNatgcn//);
+   
+   if( ($atgc / $total) > 0.85 ) {
+       $type = 'dna';
+   } elsif( (($atgc + $u) / $total) > 0.85 ) {
+       $type = 'rna';
+   } else {
+       $type = 'protein';
+   }
+
+   $self->alphabet($type);
+   return $type;
+}
+
+############################################################################
+# aliases due to name changes or to compensate for our lack of consistency #
+############################################################################
+
+sub accession {
+    my $self = shift;
+
+    $self->warn(ref($self)."::accession is deprecated, ".
+		"use accession_number() instead");
+    return $self->accession_number(@_);
+}
+
+1;
+

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/PrimarySeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,719 @@
+# $Id: PrimarySeqI.pm,v 1.6.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::PrimarySeqI
+#
+# Cared for by Ewan Birney <birney at sanger.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::PrimarySeqI [Developers] - Interface definition for a Bio::PrimarySeq
+
+=head1 SYNOPSIS
+
+
+    # Bio::PrimarySeqI is the interface class for sequences.
+
+    # If you are a newcomer to bioperl, you should
+    # start with Bio::Seq documentation. This
+    # documentation is mainly for developers using
+    # Bioperl.
+
+    # to test this is a seq object
+
+    $obj->isa("Bio::PrimarySeqI") ||
+      $obj->throw("$obj does not implement the Bio::PrimarySeqI interface");
+
+    # accessors
+
+    $string    = $obj->seq();
+    $substring = $obj->subseq(12,50);
+    $display   = $obj->display_id(); # for human display
+    $id        = $obj->primary_id(); # unique id for this object,
+                                     # implementation defined
+    $unique_key= $obj->accession_number();
+                       # unique biological id
+
+    # object manipulation
+
+    eval {
+	$rev    = $obj->revcom();
+    };
+    if( $@ ) {
+	$obj->throw(-class => 'Bio::Root::Exception',
+                    -text => "Could not reverse complement. ".
+		             "Probably not DNA. Actual exception\n$@\n",
+                    -value => $@);
+    }
+
+    $trunc = $obj->trunc(12,50);
+
+    # $rev and $trunc are Bio::PrimarySeqI compliant objects
+
+
+=head1 DESCRIPTION
+
+This object defines an abstract interface to basic sequence
+information - for most users of the package the documentation (and
+methods) in this class are not useful - this is a developers only
+class which defines what methods have to be implmented by other Perl
+objects to comply to the Bio::PrimarySeqI interface. Go "perldoc
+Bio::Seq" or "man Bio::Seq" for more information on the main class for
+sequences.
+
+
+PrimarySeq is an object just for the sequence and its name(s), nothing
+more. Seq is the larger object complete with features. There is a pure
+perl implementation of this in Bio::PrimarySeq. If you just want to
+use Bio::PrimarySeq objects, then please read that module first. This
+module defines the interface, and is of more interest to people who
+want to wrap their own Perl Objects/RDBs/FileSystems etc in way that
+they "are" bioperl sequence objects, even though it is not using Perl
+to store the sequence etc.
+
+
+This interface defines what bioperl consideres necessary to "be" a
+sequence, without providing an implementation of this. (An
+implementation is provided in Bio::PrimarySeq). If you want to provide
+a Bio::PrimarySeq 'compliant' object which in fact wraps another
+object/database/out-of-perl experience, then this is the correct thing
+to wrap, generally by providing a wrapper class which would inheriet
+from your object and this Bio::PrimarySeqI interface. The wrapper class
+then would have methods lists in the "Implementation Specific
+Functions" which would provide these methods for your object.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at sanger.ac.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::PrimarySeqI;
+use strict;
+use Bio::Tools::CodonTable;
+
+use base qw(Bio::Root::RootI);
+
+=head1 Implementation Specific Functions
+
+These functions are the ones that a specific implementation must
+define.
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string    = $obj->seq()
+ Function: Returns the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard),
+           but implementations are suggested to keep an open mind about
+           case (some users... want mixed case!)
+ Returns : A scalar
+ Status  : Virtual
+
+=cut
+
+sub seq {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+
+           Start cannot be larger than end but can be equal
+
+ Returns : a string
+ Args    :
+ Status  : Virtual
+
+=cut
+
+sub subseq{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $id_string = $obj->display_id();
+ Function: returns the display id, aka the common name of the Sequence object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the ID
+           field of the GenBank/EMBL databanks and the id field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convience issues
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub display_id {
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub accession_number {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage their
+           own object ids in a way the implementaiton can control
+           clients can expect one id to map to one object.
+
+           For sequences with no accession number, this method should
+           return a stringified memory location.
+
+           [Note this method name is likely to change in 1.3]
+
+ Returns : A string
+ Args    : None
+ Status  : Virtual
+
+
+=cut
+
+sub primary_id {
+   my ($self, at args) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   : if( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	 }
+ Function: can_call_new returns 1 or 0 depending
+           on whether an implementation allows new
+           constructor to be called. If a new constructor
+           is allowed, then it should take the followed hashed
+           constructor list.
+
+           $myobject->new( -seq => $sequence_as_string,
+			   -display_id  => $id
+			   -accession_number => $accession
+			   -alphabet => 'dna',
+			   );
+ Example :
+ Returns : 1 or 0
+ Args    :
+
+
+=cut
+
+sub can_call_new{
+   my ($self, at args) = @_;
+
+   # we default to 0 here
+
+   return 0;
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : a string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : none
+ Status  : Virtual
+
+
+=cut
+
+sub alphabet{
+    my ( $self ) = @_;
+    $self->throw_not_implemented();
+}
+
+sub moltype{
+   my ($self, at args) = @_;
+
+   $self->warn("moltype: pre v1.0 method. Calling alphabet() instead...");
+   $self->alphabet(@args);
+}
+
+
+=head1 Optional Implementation Functions
+
+The following functions rely on the above functions. An
+implementing class does not need to provide these functions, as they
+will be provided by this class, but is free to override these
+functions.
+
+All of revcom(), trunc(), and translate() create new sequence
+objects. They will call new() on the class of the sequence object
+instance passed as argument, unless can_call_new() returns FALSE. In
+the latter case a Bio::PrimarySeq object will be created. Implementors
+which really want to control how objects are created (eg, for object
+persistence over a database, or objects in a CORBA framework), they
+are encouraged to override these methods
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::PrimarySeqI implementing object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of "Sequence is a
+           protein. Cannot revcom"
+
+           The id is the same id as the original sequence, and the
+           accession number is also indentical. If someone wants to
+           track that this sequence has be reversed, it needs to
+           define its own extensions
+
+           To do an inplace edit of an object you can go:
+
+           $seq = $seq->revcom();
+
+           This of course, causes Perl to handle the garbage
+           collection of the old object, but it is roughly speaking as
+           efficient as an inplace edit.
+
+ Returns : A new (fresh) Bio::PrimarySeqI object
+ Args    : none
+
+
+=cut
+
+sub revcom{
+   my ($self) = @_;
+
+
+   # check the type is good first.
+   my $t = $self->alphabet;
+
+   if( $t eq 'protein' ) {
+       $self->throw(-class => 'Bio::Root::Exception',
+		    -text  => "Sequence is a protein. Cannot revcom");
+   }
+
+   if( $t ne 'dna' && $t ne 'rna' ) {
+       if( $self->can('warn') ) {
+	   $self->warn("Sequence is not dna or rna, but [$t]. ".
+		       "Attempting to revcom, but unsure if this is right");
+       } else {
+	   warn("[$self] Sequence is not dna or rna, but [$t]. ".
+		"Attempting to revcom, but unsure if this is right");
+       }
+   }
+
+   # yank out the sequence string
+
+   my $str = $self->seq();
+
+   # if is RNA - map to DNA then map back
+
+   if( $t eq 'rna' ) {
+       $str =~ tr/uU/tT/;
+   }
+
+   # revcom etc...
+
+   $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
+   my $revseq = CORE::reverse $str;
+
+   if( $t eq 'rna' ) {
+       $revseq =~ tr/tT/uU/;
+   }
+
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::PrimarySeq';
+       $self->_attempt_to_load_Seq();
+   }
+   my $out = $seqclass->new( '-seq' => $revseq,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-alphabet' => $self->alphabet,
+			     '-desc' => $self->desc()
+			     );
+   return $out;
+
+}
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence,
+
+ Example :
+ Returns : a fresh Bio::PrimarySeqI implementing object
+ Args    : Two integers denoting first and last base of the sub-sequence.
+
+
+=cut
+
+sub trunc{
+   my ($self,$start,$end) = @_;
+
+   my $str;
+   if( defined $start && ref($start) &&
+       $start->isa('Bio::LocationI') ) {
+       $str = $self->subseq($start); # start is a location actually
+   } elsif( !$end ) {
+       $self->throw("trunc start,end -- there was no end for $start");
+   } elsif( $end < $start ) {
+       my $msg = "start [$start] is greater than end [$end]. \n".
+	   "If you want to truncated and reverse complement, \n".
+	       "you must call trunc followed by revcom. Sorry.";
+       $self->throw($msg);
+   } else {
+       $str = $self->subseq($start,$end);
+   }
+
+   my $seqclass;
+   if($self->can_call_new()) {
+       $seqclass = ref($self);
+   } else {
+       $seqclass = 'Bio::PrimarySeq';
+       $self->_attempt_to_load_Seq();
+   }
+
+   my $out = $seqclass->new( '-seq' => $str,
+			     '-display_id'  => $self->display_id,
+			     '-accession_number' => $self->accession_number,
+			     '-alphabet' => $self->alphabet,
+			     '-desc' => $self->desc()
+			     );
+   return $out;
+}
+
+
+=head2 translate
+
+ Title   : translate
+ Usage   : $protein_seq_obj = $dna_seq_obj->translate
+
+ Function:
+
+           Provides the translation of the DNA sequence using full
+           IUPAC ambiguities in DNA/RNA and amino acid codes.
+
+           The full CDS translation is identical to EMBL/TREMBL
+           database translation. Note that the trailing terminator
+           character is removed before returning the translation
+           object.
+
+           Note: if you set $dna_seq_obj->verbose(1) you will get a
+           warning if the first codon is not a valid initiator.
+
+
+ Returns : A Bio::PrimarySeqI implementing object
+ Args    : character for terminator (optional) defaults to '*'
+           character for unknown amino acid (optional) defaults to 'X'
+           frame (optional) valid values 0, 1, 2, defaults to 0
+           codon table id (optional) defaults to 1
+           complete coding sequence expected, defaults to 0 (false)
+           boolean, throw exception if not complete CDS (true) or defaults to warning (false)
+
+=cut
+
+sub translate {
+    my($self) = shift;
+    my($stop, $unknown, $frame, $tableid, $fullCDS, $throw) = @_;
+    my($i, $len, $output) = (0,0,'');
+    my($codon)   = "";
+    my $aa;
+
+    ## User can pass in symbol for stop and unknown codons
+    unless(defined($stop) and $stop ne '')    { $stop = "*"; }
+    unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
+    unless(defined($frame) and $frame ne '') { $frame = 0; }
+
+    ## the codon table ID
+    unless(defined($tableid) and $tableid ne '')    { $tableid = 1; }
+
+    ##Error if monomer is "Amino"
+    if ($self->alphabet eq 'protein') {
+	$self->throw(-class => 'Bio::Root::Exception',
+		     -text => "Can't translate an amino acid sequence.")
+    }
+
+    ##Error if frame is not 0, 1 or 2
+    unless ($frame == 0 or $frame == 1 or $frame == 2) {
+	$self->throw(-class => 'Bio::Root::BadParameter',
+		     -text   => "Valid values for frame are 0, 1, 2, not [$frame].",
+		     -value => $frame);
+    }
+
+    #warns if ID is invalid
+    my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
+
+    my ($seq) = $self->seq();
+
+    # deal with frame offset.
+    if( $frame ) {
+	$seq = substr ($seq,$frame);
+    }
+
+    # Translate it
+    $output = $codonTable->translate($seq);
+    # Use user-input stop/unknown
+    $output =~ s/\*/$stop/g;
+    $output =~ s/X/$unknown/g;
+	
+    # only if we are expecting to translate a complete coding region
+    if ($fullCDS) {
+	my $id = $self->display_id;
+	#remove the stop character
+	if( substr($output,-1,1) eq $stop ) {
+	    chop $output;
+	} else {
+	    $throw && $self->throw(-class => 'Bio::Root::Exception',
+				   -text  => "Seq [$id]: Not using a valid terminator codon!: ". substr($output,-1,1),
+				   -value => substr($output,-1,1));
+	    $self->warn("Seq [$id]: Not using a valid terminator codon!: ". substr($output,-1,1));
+	}
+	# test if there are terminator characters inside the protein sequence!
+	if ($output =~ /\*/) {
+	    $throw && $self->throw(-class => 'Bio::Root::Exception',
+				   -text  => "Seq [$id]: Terminator codon inside CDS!");
+	    $self->warn("Seq [$id]: Terminator codon inside CDS!");
+	}
+	# if the initiator codon is not ATG, the amino acid needs to changed into M
+	if ( substr($output,0,1) ne 'M' ) {
+	    if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) {
+		$output = 'M'. substr($output,1);
+	    }
+	    elsif ($throw) {
+		$self->throw(-class => 'Bio::Root::Exception',
+			     -text => "Seq [$id]: Not using a valid initiator codon!: ". substr($seq, 0, 3),
+			     -value => substr($seq, 0, 3));
+	    } else {
+		$self->warn("Seq [$id]: Not using a valid initiator codon!");
+	    }
+	}
+    }
+
+    my $seqclass;
+    if($self->can_call_new()) {
+	$seqclass = ref($self);
+    } else {
+	$seqclass = 'Bio::PrimarySeq';
+	$self->_attempt_to_load_Seq();
+    }
+    my $out = $seqclass->new( '-seq' => $output,
+			      '-display_id'  => $self->display_id,
+			      '-accession_number' => $self->accession_number,
+			      # is there anything wrong with retaining the
+			      # description?
+			      '-desc' => $self->desc(),
+			      '-alphabet' => 'protein'
+			    );
+    return $out;
+
+}
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: ID of the sequence. This should normally be (and actually is in
+           the implementation provided here) just a synonym for display_id().
+ Example :
+ Returns : A string.
+ Args    :
+
+
+=cut
+
+sub  id {
+   my ($self)= @_;
+
+   return $self->display_id();
+}
+
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length()
+ Function:
+ Example :
+ Returns : integer representing the length of the sequence.
+ Args    :
+
+
+=cut
+
+sub  length {
+   my ($self)= @_;
+   $self->throw_not_implemented();
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seq->desc($newval);
+           $description = $seq->desc();
+ Function: Get/set description text for a seq object
+ Example :
+ Returns : value of desc
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub desc {
+   my ($self,$value) = @_;
+   $self->throw_not_implemented();
+}
+
+
+=head2 is_circular
+
+ Title   : is_circular
+ Usage   : if( $obj->is_circular) { /Do Something/ }
+ Function: Returns true if the molecule is circular
+ Returns : Boolean value
+ Args    : none
+
+=cut
+
+sub is_circular{
+    my ($self,$value) = @_;
+    if (defined $value) {
+	$self->{'_is_circular'} = $value;
+    }
+    return $self->{'_is_circular'};
+}
+
+=head1 Private functions
+
+These are some private functions for the PrimarySeqI interface. You do not
+need to implement these functions
+
+=head2 _attempt_to_load_Seq
+
+ Title   : _attempt_to_load_Seq
+ Usage   :
+ Function:
+ Example :
+ Returns :
+ Args    :
+
+
+=cut
+
+sub _attempt_to_load_Seq{
+   my ($self) = @_;
+
+   if( $main::{'Bio::PrimarySeq'} ) {
+       return 1;
+   } else {
+       eval {
+	   require Bio::PrimarySeq;
+       };
+       if( $@ ) {
+	   my $text = "Bio::PrimarySeq could not be loaded for [$self]\n".
+	       "This indicates that you are using Bio::PrimarySeqI ".
+	       "without Bio::PrimarySeq loaded or without providing a ".
+	       "complete implementation.\nThe most likely problem is that there ".
+	       "has been a misconfiguration of the bioperl environment\n".
+	       "Actual exception:\n\n";
+	   $self->throw(-class => 'Bio::Root::Exception',
+			-text => "$text$@\n",
+			-value => $@);
+	   return 0;
+       }
+       return 1;
+   }
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/Seq.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/Seq.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/Seq.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1244 @@
+# $Id: Seq.pm,v 1.6.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::Seq
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Seq - Sequence object, with features
+
+=head1 SYNOPSIS
+
+    # This is the main sequence object in Bioperl
+
+    # gets a sequence from a file
+    $seqio  = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat');
+    $seqobj = $seqio->next_seq();
+
+    # SeqIO can both read and write sequences; see Bio::SeqIO
+    # for more information and examples
+
+    # get from database
+    $db = Bio::DB::GenBank->new();
+    $seqobj = $db->get_Seq_by_acc('X78121');
+
+    # make from strings in script
+    $seqobj = Bio::Seq->new( -display_id => 'my_id',
+			     -seq => $sequence_as_string);
+
+    # gets sequence as a string from sequence object
+    $seqstr   = $seqobj->seq(); # actual sequence as a string
+    $seqstr   = $seqobj->subseq(10,50); # slice in biological coordinates
+
+    # retrieves information from the sequence
+    # features must implement Bio::SeqFeatureI interface
+
+    @features = $seqobj->get_SeqFeatures(); # just top level
+    foreach my $feat ( @features ) {
+	print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ",
+	$feat->end," strand ",$feat->strand,"\n";
+
+        # features retain link to underlying sequence object
+        print "Feature sequence is ",$feat->seq->seq(),"\n"
+    }
+
+    # sequences may have a species
+
+    if( defined $seq->species ) {
+	print "Sequence is from ",$species->binomial_name," [",$species->common_name,"]\n";
+    }
+
+    # annotation objects are Bio::AnnotationCollectionI's
+    $ann      = $seqobj->annotation(); # annotation object
+
+    # references is one type of annotations to get. Also get
+    # comment and dblink. Look at Bio::AnnotationCollection for
+    # more information
+
+    foreach my $ref ( $ann->get_Annotations('reference') ) {
+	print "Reference ",$ref->title,"\n";
+    }
+
+    # you can get truncations, translations and reverse complements, these
+    # all give back Bio::Seq objects themselves, though currently with no
+    # features transfered
+
+    my $trunc = $seqobj->trunc(100,200);
+    my $rev   = $seqobj->revcom();
+
+    # there are many options to translate - check out the docs
+    my $trans = $seqobj->translate();
+
+    # these functions can be chained together
+
+    my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate();
+
+
+
+=head1 DESCRIPTION
+
+A Seq object is a sequence with sequence features placed on it. The
+Seq object contains a PrimarySeq object for the actual sequence and
+also implements its interface.
+
+In Bioperl we have 3 main players that people are going to use frequently
+
+  Bio::PrimarySeq  - just the sequence and its names, nothing else.
+  Bio::SeqFeatureI - a location on a sequence, potentially with a sequence
+                     and annotation.
+  Bio::Seq         - A sequence and a collection of sequence features
+                     (an aggregate) with its own annotation.
+
+Although Bioperl is not tied heavily to file formats these distinctions do
+map to file formats sensibly and for some bioinformaticians this might help
+
+  Bio::PrimarySeq  - Fasta file of a sequence
+  Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table
+  Bio::Seq         - A single EMBL/GenBank/DDBJ entry
+
+By having this split we avoid a lot of nasty circular references
+(sequence features can hold a reference to a sequence without the sequence
+holding a reference to the sequence feature). See L<Bio::PrimarySeq> and
+L<Bio::SeqFeatureI> for more information.
+
+Ian Korf really helped in the design of the Seq and SeqFeature system.
+
+=head1 EXAMPLES
+
+A simple and fundamental block of code
+
+  use Bio::SeqIO;
+
+  my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object
+  my $seqobj = $seqIOobj->next_seq;              # get a Seq object
+
+With the Seq object in hand one has access to a powerful set of Bioperl
+methods and Bioperl objects. This next script will take a file of sequences
+in EMBL format and create a file of the reverse-complemented sequences
+in Fasta format using Seq objects. It also prints out details about the
+exons it finds as sequence features in Genbank Flat File format.
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+  while((my $seqobj = $seqin->next_seq())) {
+      print "Seen sequence ",$seqobj->display_id,", start of seq ",
+            substr($seqobj->seq,1,10),"\n";
+      if( $seqobj->alphabet eq 'dna') {
+	    $rev = $seqobj->revcom;
+	    $id  = $seqobj->display_id();
+            $id  = "$id.rev";
+            $rev->display_id($id);
+            $seqout->write_seq($rev);
+      }
+
+      foreach $feat ( $seqobj->get_SeqFeatures() ) {
+           if( $feat->primary_tag eq 'exon' ) {
+              print STDOUT "Location ",$feat->start,":",
+                    $feat->end," GFF[",$feat->gff_string,"]\n";
+	   }
+      }
+  }
+
+Let's examine the script. The lines below import the Bioperl modules.
+Seq is the main Bioperl sequence object and SeqIO is the Bioperl support
+for reading sequences from files and to files
+
+  use Bio::Seq;
+  use Bio::SeqIO;
+
+These two lines create two SeqIO streams: one for reading in sequences
+and one for outputting sequences:
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+Notice that in the "$seqout" case there is a greater-than sign,
+indicating the file is being opened for writing.
+
+Using the
+
+  '-argument' => value
+
+syntax is common in Bioperl. The file argument is like an argument
+to open() . You can also pass in filehandles or FileHandle objects by
+using the -fh argument (see L<Bio::SeqIO> documentation for details).
+Many formats in Bioperl are handled, including Fasta, EMBL, GenBank,
+Swissprot (swiss), PIR, and GCG.
+
+  $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
+  $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
+
+This is the main loop which will loop progressively through sequences
+in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq
+object from the file:
+
+  while((my $seqobj = $seqio->next_seq())) {
+
+This print line below accesses fields in the Seq object directly. The
+$seqobj-E<gt>display_id is the way to access the display_id attribute
+of the Seq object. The $seqobj-E<gt>seq method gets the actual
+sequence out as string. Then you can do manipulation of this if
+you want to (there are however easy ways of doing truncation,
+reverse-complement and translation).
+
+  print "Seen sequence ",$seqobj->display_id,", start of seq ",
+               substr($seqobj->seq,1,10),"\n";
+
+Bioperl has to guess the alphabet of the sequence, being either 'dna',
+'rna', or 'protein'. The alphabet attribute is one of these three
+possibilities.
+
+  if( $seqobj->alphabet eq 'dna') {
+
+The $seqobj-E<gt>revcom method provides the reverse complement of the Seq
+object as another Seq object. Thus, the $rev variable is a reference to
+another Seq object. For example, one could repeat the above print line
+for this Seq object (putting $rev in place of $seqobj). In this
+case we are going to output the object into the file stream we built
+earlier on.
+
+  $rev = $seqobj->revcom;
+
+When we output it, we want the id of the outputted object
+to be changed to "$id.rev", ie, with .rev on the end of the name. The
+following lines retrieve the id of the sequence object, add .rev
+to this and then set the display_id of the rev sequence object to
+this. Notice that to set the display_id attribute you just need
+call the same method, display_id(), with the new value as an argument.
+Getting and setting values with the same method is common in Bioperl.
+
+  $id  = $seqobj->display_id();
+  $id  = "$id.rev";
+  $rev->display_id($id);
+
+The write_seq method on the SeqIO output object, $seqout, writes the
+$rev object to the filestream we built at the top of the script.
+The filestream knows that it is outputting in fasta format, and
+so it provides fasta output.
+
+  $seqout->write_seq($rev);
+
+This block of code loops over sequence features in the sequence
+object, trying to find ones who have been tagged as 'exon'.
+Features have start and end attributes and can be outputted
+in Genbank Flat File format, GFF, a standarized format for sequence
+features.
+
+  foreach $feat ( $seqobj->get_SeqFeatures() ) {
+      if( $feat->primary_tag eq 'exon' ) {
+          print STDOUT "Location ",$feat->start,":",
+             $feat->end," GFF[",$feat->gff_string,"]\n";
+      }
+  }
+
+The code above shows how a few Bio::Seq methods suffice to read, parse,
+reformat and analyze sequences from a file. A full list of methods
+available to Bio::Seq objects is shown below. Bear in mind that some of
+these methods come from PrimarySeq objects, which are simpler
+than Seq objects, stripped of features (see L<Bio::PrimarySeq> for
+more information).
+
+  # these methods return strings, and accept strings in some cases:
+
+  $seqobj->seq();              # string of sequence
+  $seqobj->subseq(5,10);       # part of the sequence as a string
+  $seqobj->accession_number(); # when there, the accession number
+  $seqobj->alphabet();          # one of 'dna','rna',or 'protein'
+  $seqobj->seq_version()       # when there, the version
+  $seqobj->keywords();         # when there, the Keywords line
+  $seqobj->length()            # length
+  $seqobj->desc();             # description
+  $seqobj->primary_id();       # a unique id for this sequence regardless
+                               # of its display_id or accession number
+  $seqobj->display_id();       # the human readable id of the sequence
+
+Some of these values map to fields in common formats. For example, The
+display_id() method returns the LOCUS name of a Genbank entry,
+the (\S+) following the E<gt> character in a Fasta file, the ID from
+a SwissProt file, and so on. The desc() method will return the DEFINITION
+line of a Genbank file, the description following the display_id in a
+Fasta file, and the DE field in a SwissProt file.
+
+  # the following methods return new Seq objects, but
+  # do not transfer features across to the new object:
+
+  $seqobj->trunc(5,10)  # truncation from 5 to 10 as new object
+  $seqobj->revcom       # reverse complements sequence
+  $seqobj->translate    # translation of the sequence
+
+  # if new() can be called this method returns 1, else 0
+
+  $seqobj->can_call_new
+
+  # the following method determines if the given string will be accepted
+  # by the seq() method - if the string is acceptable then validate()
+  # returns 1, or 0 if not
+
+  $seqobj->validate_seq($string)
+
+  # the following method returns or accepts a Species object:
+
+  $seqobj->species();
+
+Please see L<Bio::Species> for more information on this object.
+
+  # the following method returns or accepts an Annotation object
+  # which in turn allows access to Annotation::Reference
+  # and Annotation::Comment objects:
+
+  $seqobj->annotation();
+
+These annotations typically refer to entire sequences, unlike
+features.  See L<Bio::AnnotationCollectionI>,
+L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and
+L<Bio::Annotation::Comment> for details.
+
+It is also important to be able to describe defined portions of a
+sequence. The combination of some description and the corresponding
+sub-sequence is called a feature - an exon and its coordinates within
+a gene is an example of a feature, or a domain within a protein.
+
+  # the following methods return an array of SeqFeatureI objects:
+
+  $seqobj->get_SeqFeatures # The 'top level' sequence features
+  $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq
+                               # features, such as features in an exon
+
+  # to find out the number of features use:
+
+  $seqobj->feature_count
+
+Here are just some of the methods available to SeqFeatureI objects:
+
+  # these methods return numbers:
+
+  $feat->start          # start position (1 is the first base)
+  $feat->end            # end position (2 is the second base)
+  $feat->strand         # 1 means forward, -1 reverse, 0 not relevant
+
+  # these methods return or accept strings:
+
+  $feat->primary_tag    # the name of the sequence feature, eg
+                        # 'exon', 'glycoslyation site', 'TM domain'
+  $feat->source_tag     # where the feature comes from, eg, 'EMBL_GenBank',
+                        # or 'BLAST'
+
+  # this method returns the more austere PrimarySeq object, not a
+  # Seq object - the main difference is that PrimarySeq objects do not
+  # themselves contain sequence features
+
+  $feat->seq            # the sequence between start,end on the
+                        # correct strand of the sequence
+
+See L<Bio::PrimarySeq> for more details on PrimarySeq objects.
+
+  # useful methods for feature comparisons, for start/end points
+
+  $feat->overlaps($other)  # do $feat and $other overlap?
+  $feat->contains($other)  # is $other completely within $feat?
+  $feat->equals($other)    # do $feat and $other completely agree?
+
+  # one can also add features
+
+  $seqobj->add_SeqFeature($feat)     # returns 1 if successful
+  $seqobj->add_SeqFeature(@features) # returns 1 if successful
+
+  # sub features. For complex join() statements, the feature
+  # is one sequence feature with many sub SeqFeatures
+
+  $feat->sub_SeqFeature  # returns array of sub seq features
+
+Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>,
+for more information on sequence features.
+
+It is worth mentioning that one can also retrieve the start and end
+positions of a feature using a Bio::LocationI object:
+
+  $location = $feat->location # $location is a Bio::LocationI object
+  $location->start;           # start position
+  $location->end;             # end position
+
+This is useful because one needs a Bio::Location::SplitLocationI object
+in order to retrieve the coordinates inside the Genbank or EMBL join()
+statements (e.g. "CDS    join(51..142,273..495,1346..1474)"):
+
+  if ( $feat->location->isa('Bio::Location::SplitLocationI') &&
+	       $feat->primary_tag eq 'CDS' )  {
+    foreach $loc ( $feat->location->sub_Location ) {
+      print $loc->start . ".." . $loc->end . "\n";
+    }
+  }
+
+See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more
+information.
+
+=head1 Implemented Interfaces
+
+This class implements the following interfaces.
+
+=over 4
+
+=item Bio::SeqI
+
+Note that this includes implementing Bio::PrimarySeqI.
+
+=item Bio::IdentifiableI
+
+=item Bio::DescribableI
+
+=item Bio::AnnotatableI
+
+=item Bio::FeatureHolderI
+
+=back
+
+=head1 FEEDBACK
+
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects
+
+Email birney at ebi.ac.uk
+
+=head1 CONTRIBUTORS
+
+Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=head1 APPENDIX
+
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a "_".
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::Seq;
+use strict;
+
+
+# Object preamble - inherits from Bio::Root::Object
+
+use Bio::Annotation::Collection;
+use Bio::PrimarySeq;
+
+use base qw(Bio::Root::Root Bio::SeqI
+	  Bio::IdentifiableI Bio::DescribableI
+	  Bio::AnnotatableI Bio::FeatureHolderI);
+
+=head2 new
+
+ Title   : new
+ Usage   : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
+                                 -id  => 'human_id',
+				 -accession_number => 'AL000012',
+			       );
+
+ Function: Returns a new Seq object from
+           basic constructors, being a string for the sequence
+           and strings for id and accession_number
+ Returns : a new Bio::Seq object
+
+=cut
+
+sub new {
+    my($caller, at args) = @_;
+
+    if( $caller ne 'Bio::Seq') {
+	$caller = ref($caller) if ref($caller);
+    }
+
+    # we know our inherietance heirarchy
+    my $self = Bio::Root::Root->new(@args);
+    bless $self,$caller;
+
+    # this is way too sneaky probably. We delegate the construction of
+    # the Seq object onto PrimarySeq and then pop primary_seq into
+    # our primary_seq slot
+
+    my $pseq = Bio::PrimarySeq->new(@args);
+
+    # as we have just made this, we know it is ok to set hash directly
+    # rather than going through the method
+
+    $self->{'primary_seq'} = $pseq;
+
+    # setting this array is now delayed until the final
+    # moment, again speed ups for non feature containing things
+    # $self->{'_as_feat'} = [];
+
+
+    my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args);
+
+    # for a number of cases - reading fasta files - these are never set. This
+    # gives a quick optimisation around testing things later on
+
+    if( defined $ann || defined $pid || defined $feat || defined $species ) {
+	$pid && $self->primary_id($pid);
+	$species && $self->species($species);
+	$ann && $self->annotation($ann);
+	
+	if( defined $feat ) {
+	    if( ref($feat) !~ /ARRAY/i ) {
+		if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) {
+		    $self->add_SeqFeature($feat);
+		} else {
+		    $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self));
+		}
+	    } else {
+		foreach my $feature ( @$feat ) {
+		    $self->add_SeqFeature($feature);
+		}	
+	    }
+	}
+    }
+
+    return $self;
+}
+
+=head1 PrimarySeq interface
+
+
+The PrimarySeq interface provides the basic sequence getting
+and setting methods for on all sequences.
+
+These methods implement the Bio::PrimarySeq interface by delegating
+to the primary_seq inside the object. This means that you
+can use a Seq object wherever there is a PrimarySeq, and
+of course, you are free to use these functions anyway.
+
+=cut
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $string = $obj->seq()
+ Function: Returns the sequence as a string of letters. The
+           case of the letters is left up to the implementer.
+           Suggested cases are upper case for proteins and lower case for
+           DNA sequence (IUPAC standard),
+           but implementations are suggested to keep an open mind about
+           case (some users... want mixed case!)
+ Returns : A scalar
+ Args    : None
+
+=cut
+
+sub seq {
+    return shift->primary_seq()->seq(@_);
+}
+
+=head2 validate_seq
+
+ Title   : validate_seq
+ Usage   : if(! $seq->validate_seq($seq_str) ) {
+                print "sequence $seq_str is not valid for an object of type ",
+		      ref($seq), "\n";
+	   }
+ Function: Validates a given sequence string. A validating sequence string
+           must be accepted by seq(). A string that does not validate will
+           lead to an exception if passed to seq().
+
+           The implementation provided here does not take alphabet() into
+           account. Allowed are all letters (A-Z) and '-','.', and '*'.
+
+ Example :
+ Returns : 1 if the supplied sequence string is valid for the object, and
+           0 otherwise.
+ Args    : The sequence string to be validated.
+
+
+=cut
+
+sub validate_seq {
+    return shift->primary_seq()->validate_seq(@_);
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $len = $seq->length()
+ Function:
+ Example :
+ Returns : Integer representing the length of the sequence.
+ Args    : None
+
+=cut
+
+sub length {
+    return shift->primary_seq()->length(@_);
+}
+
+=head1 Methods from the Bio::PrimarySeqI interface
+
+=cut
+
+=head2 subseq
+
+ Title   : subseq
+ Usage   : $substring = $obj->subseq(10,40);
+ Function: Returns the subseq from start to end, where the first base
+           is 1 and the number is inclusive, ie 1-2 are the first two
+           bases of the sequence
+
+           Start cannot be larger than end but can be equal
+
+ Returns : A string
+ Args    : 2 integers
+
+
+=cut
+
+sub subseq {
+    return shift->primary_seq()->subseq(@_);
+}
+
+=head2 display_id
+
+ Title   : display_id
+ Usage   : $id = $obj->display_id or $obj->display_id($newid);
+ Function: Gets or sets the display id, also known as the common name of
+           the Seq object.
+
+           The semantics of this is that it is the most likely string
+           to be used as an identifier of the sequence, and likely to
+           have "human" readability.  The id is equivalent to the LOCUS
+           field of the GenBank/EMBL databanks and the ID field of the
+           Swissprot/sptrembl database. In fasta format, the >(\S+) is
+           presumed to be the id, though some people overload the id
+           to embed other information. Bioperl does not use any
+           embedded information in the ID field, and people are
+           encouraged to use other mechanisms (accession field for
+           example, or extending the sequence object) to solve this.
+
+           Notice that $seq->id() maps to this function, mainly for
+           legacy/convenience issues.
+ Returns : A string
+ Args    : None or a new id
+
+
+=cut
+
+sub display_id {
+   return shift->primary_seq->display_id(@_);
+}
+
+
+
+=head2 accession_number
+
+ Title   : accession_number
+ Usage   : $unique_biological_key = $obj->accession_number;
+ Function: Returns the unique biological id for a sequence, commonly
+           called the accession_number. For sequences from established
+           databases, the implementors should try to use the correct
+           accession number. Notice that primary_id() provides the
+           unique id for the implemetation, allowing multiple objects
+           to have the same accession number in a particular implementation.
+
+           For sequences with no accession number, this method should return
+           "unknown".
+
+           Can also be used to set the accession number.
+ Example : $key = $seq->accession_number or $seq->accession_number($key)
+ Returns : A string
+ Args    : None or an accession number
+
+
+=cut
+
+sub accession_number {
+   return shift->primary_seq->accession_number(@_);
+}
+
+=head2 desc
+
+ Title   : desc
+ Usage   : $seqobj->desc($string) or $seqobj->desc()
+ Function: Sets or gets the description of the sequence
+ Example :
+ Returns : The description
+ Args    : The description or none
+
+
+=cut
+
+sub desc {
+   return shift->primary_seq->desc(@_);
+}
+
+=head2 primary_id
+
+ Title   : primary_id
+ Usage   : $unique_implementation_key = $obj->primary_id;
+ Function: Returns the unique id for this object in this
+           implementation. This allows implementations to manage
+           their own object ids in a way the implementation can control
+           clients can expect one id to map to one object.
+
+           For sequences with no natural id, this method should return
+           a stringified memory location.
+
+           Can also be used to set the primary_id.
+
+           Also notice that this method is not delegated to the
+           internal Bio::PrimarySeq object
+
+           [Note this method name is likely to change in 1.3]
+
+ Example : $id = $seq->primary_id or $seq->primary_id($id)
+ Returns : A string
+ Args    : None or an id
+
+
+=cut
+
+sub primary_id {
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+      $obj->{'primary_id'} = $value;
+    }
+   if( ! exists $obj->{'primary_id'} ) {
+       return "$obj";
+   }
+   return $obj->{'primary_id'};
+}
+
+=head2 can_call_new
+
+ Title   : can_call_new
+ Usage   : if ( $obj->can_call_new ) {
+             $newobj = $obj->new( %param );
+	   }
+ Function: can_call_new returns 1 or 0 depending
+           on whether an implementation allows new
+           constructor to be called. If a new constructor
+           is allowed, then it should take the followed hashed
+           constructor list.
+
+           $myobject->new( -seq => $sequence_as_string,
+			   -display_id  => $id
+			   -accession_number => $accession
+			   -alphabet => 'dna',
+			 );
+ Example :
+ Returns : 1 or 0
+ Args    : None
+
+
+=cut
+
+sub can_call_new {
+    return 1;
+}
+
+=head2 alphabet
+
+ Title   : alphabet
+ Usage   : if ( $obj->alphabet eq 'dna' ) { /Do Something/ }
+ Function: Returns the type of sequence being one of
+           'dna', 'rna' or 'protein'. This is case sensitive.
+
+           This is not called <type> because this would cause
+           upgrade problems from the 0.5 and earlier Seq objects.
+
+ Returns : A string either 'dna','rna','protein'. NB - the object must
+           make a call of the type - if there is no type specified it
+           has to guess.
+ Args    : None
+
+
+=cut
+
+sub alphabet {
+   my $self = shift;
+   return $self->primary_seq->alphabet(@_) if @_ && defined $_[0];
+   return $self->primary_seq->alphabet();
+}
+
+=head1 Methods for Bio::IdentifiableI compliance
+
+=cut
+
+=head2 object_id
+
+ Title   : object_id
+ Usage   : $string    = $obj->object_id()
+ Function: a string which represents the stable primary identifier
+           in this namespace of this object. For DNA sequences this
+           is its accession_number, similarly for protein sequences
+
+           This is aliased to accession_number().
+ Returns : A scalar
+
+
+=cut
+
+sub object_id {
+    return shift->accession_number(@_);
+}
+
+=head2 version
+
+ Title   : version
+ Usage   : $version    = $obj->version()
+ Function: a number which differentiates between versions of
+           the same object. Higher numbers are considered to be
+           later and more relevant, but a single object described
+           the same identifier should represent the same concept
+
+ Returns : A number
+
+=cut
+
+sub version{
+    return shift->primary_seq->version(@_);
+}
+
+
+=head2 authority
+
+ Title   : authority
+ Usage   : $authority    = $obj->authority()
+ Function: a string which represents the organisation which
+           granted the namespace, written as the DNS name for
+           organisation (eg, wormbase.org)
+
+ Returns : A scalar
+
+=cut
+
+sub authority {
+    return shift->primary_seq()->authority(@_);
+}
+
+=head2 namespace
+
+ Title   : namespace
+ Usage   : $string    = $obj->namespace()
+ Function: A string representing the name space this identifier
+           is valid in, often the database name or the name
+           describing the collection
+
+ Returns : A scalar
+
+
+=cut
+
+sub namespace{
+    return shift->primary_seq()->namespace(@_);
+}
+
+=head1 Methods for Bio::DescribableI compliance
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $string    = $obj->display_name()
+ Function: A string which is what should be displayed to the user
+           the string should have no spaces (ideally, though a cautious
+           user of this interface would not assumme this) and should be
+           less than thirty characters (though again, double checking
+           this is a good idea)
+
+           This is aliased to display_id().
+ Returns : A scalar
+
+=cut
+
+sub display_name {
+    return shift->display_id(@_);
+}
+
+=head2 description
+
+ Title   : description
+ Usage   : $string    = $obj->description()
+ Function: A text string suitable for displaying to the user a
+           description. This string is likely to have spaces, but
+           should not have any newlines or formatting - just plain
+           text. The string should not be greater than 255 characters
+           and clients can feel justified at truncating strings at 255
+           characters for the purposes of display
+
+           This is aliased to desc().
+ Returns : A scalar
+
+=cut
+
+sub description {
+    return shift->desc(@_);
+}
+
+=head1 Methods for implementing Bio::AnnotatableI
+
+=cut
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $ann = $seq->annotation or $seq->annotation($annotation)
+ Function: Gets or sets the annotation
+ Returns : L<Bio::AnnotationCollectionI> object
+ Args    : None or L<Bio::AnnotationCollectionI> object
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information
+
+=cut
+
+sub annotation {
+    my ($obj,$value) = @_;
+    if( defined $value ) {
+	$obj->throw("object of class ".ref($value)." does not implement ".
+		    "Bio::AnnotationCollectionI. Too bad.")
+	    unless $value->isa("Bio::AnnotationCollectionI");
+	$obj->{'_annotation'} = $value;
+    } elsif( ! defined $obj->{'_annotation'}) {
+	$obj->{'_annotation'} = new Bio::Annotation::Collection;
+    }
+    return $obj->{'_annotation'};
+}
+
+=head1 Methods to implement Bio::FeatureHolderI
+
+This includes methods for retrieving, adding, and removing features.
+
+=cut
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   :
+ Function: Get the feature objects held by this feature holder.
+
+           Features which are not top-level are subfeatures of one or
+           more of the returned feature objects, which means that you
+           must traverse the subfeature arrays of each top-level
+           feature object in order to traverse all features associated
+           with this sequence.
+
+           Use get_all_SeqFeatures() if you want the feature tree
+           flattened into one single array.
+
+ Example :
+ Returns : an array of Bio::SeqFeatureI implementing objects
+ Args    : none
+
+At some day we may want to expand this method to allow for a feature
+filter to be passed in.
+
+=cut
+
+sub get_SeqFeatures{
+   my $self = shift;
+
+   if( !defined $self->{'_as_feat'} ) {
+       $self->{'_as_feat'} = [];
+   }
+
+   return @{$self->{'_as_feat'}};
+}
+
+=head2 get_all_SeqFeatures
+
+ Title   : get_all_SeqFeatures
+ Usage   : @feat_ary = $seq->get_all_SeqFeatures();
+ Function: Returns the tree of feature objects attached to this
+           sequence object flattened into one single array. Top-level
+           features will still contain their subfeature-arrays, which
+           means that you will encounter subfeatures twice if you
+           traverse the subfeature tree of the returned objects.
+
+           Use get_SeqFeatures() if you want the array to contain only
+           the top-level features.
+
+ Returns : An array of Bio::SeqFeatureI implementing objects.
+ Args    : None
+
+
+=cut
+
+# this implementation is inherited from FeatureHolderI
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $seq->feature_count()
+ Function: Return the number of SeqFeatures attached to a sequence
+ Returns : integer representing the number of SeqFeatures
+ Args    : None
+
+
+=cut
+
+sub feature_count {
+    my ($self) = @_;
+
+    if (defined($self->{'_as_feat'})) {
+	return ($#{$self->{'_as_feat'}} + 1);
+    } else {
+	return 0;
+    }
+}
+
+=head2 add_SeqFeature
+
+ Title   : add_SeqFeature
+ Usage   : $seq->add_SeqFeature($feat);
+           $seq->add_SeqFeature(@feat);
+ Function: Adds the given feature object (or each of an array of feature
+           objects to the feature array of this
+           sequence. The object passed is required to implement the
+           Bio::SeqFeatureI interface.
+ Returns : 1 on success
+ Args    : A Bio::SeqFeatureI implementing object, or an array of such objects.
+ Throws  : Bio::Root::BadParameter if any of the supplied arguments do
+           not derive from Bio::SeqFeatureI.
+
+
+=cut
+
+sub add_SeqFeature {
+   my ($self, at feat) = @_;
+
+   $self->{'_as_feat'} = [] unless $self->{'_as_feat'};
+
+   foreach my $feat ( @feat ) {
+       if( !$feat->isa("Bio::SeqFeatureI") ) {
+	   $self->throw(-class => 'Bio::Root::BadParameter',
+                        -text  =>"$feat is not a Bio::SeqFeatureI and that's what we expect...",
+                        -value => $feat);
+       }
+
+       # make sure we attach ourselves to the feature if the feature wants it
+       my $aseq = $self->primary_seq;
+       $feat->attach_seq($aseq) if $aseq;
+
+       push(@{$self->{'_as_feat'}},$feat);
+   }
+   return 1;
+}
+
+=head2 remove_SeqFeatures
+
+ Title   : remove_SeqFeatures
+ Usage   : $seq->remove_SeqFeatures();
+ Function: Flushes all attached SeqFeatureI objects.
+
+           To remove individual feature objects, delete those from the returned
+           array and re-add the rest.
+ Example :
+ Returns : The array of Bio::SeqFeatureI objects removed from this seq.
+ Args    : None
+
+
+=cut
+
+sub remove_SeqFeatures {
+    my $self = shift;
+
+    return () unless $self->{'_as_feat'};
+    my @feats = @{$self->{'_as_feat'}};
+    $self->{'_as_feat'} = [];
+    return @feats;
+}
+
+=head1 Methods provided in the Bio::PrimarySeqI interface
+
+
+These methods are inherited from the PrimarySeq interface
+and work as one expects, building new Bio::Seq objects
+or other information as expected. See L<Bio::PrimarySeq>
+for more information.
+
+Sequence Features are B<not> transfered to the new objects.
+This is possibly a mistake. Anyone who feels the urge in
+dealing with this is welcome to give it a go.
+
+=head2 revcom
+
+ Title   : revcom
+ Usage   : $rev = $seq->revcom()
+ Function: Produces a new Bio::Seq object which
+           is the reversed complement of the sequence. For protein
+           sequences this throws an exception of "Sequence is a protein.
+           Cannot revcom"
+
+           The id is the same id as the original sequence, and the
+           accession number is also identical. If someone wants to track
+           that this sequence has be reversed, it needs to define its own
+           extensions
+
+           To do an in-place edit of an object you can go:
+
+           $seq = $seq->revcom();
+
+           This of course, causes Perl to handle the garbage collection of
+           the old object, but it is roughly speaking as efficient as an
+           in-place edit.
+
+ Returns : A new (fresh) Bio::Seq object
+ Args    : None
+
+
+=cut
+
+=head2 trunc
+
+ Title   : trunc
+ Usage   : $subseq = $myseq->trunc(10,100);
+ Function: Provides a truncation of a sequence
+
+ Example :
+ Returns : A fresh Seq object
+ Args    : A Seq object
+
+
+=cut
+
+=head2 id
+
+ Title   : id
+ Usage   : $id = $seq->id()
+ Function: This is mapped on display_id
+ Returns : value of display_id()
+ Args    : [optional] value to update display_id
+
+
+=cut
+
+sub  id {
+    return shift->display_id(@_);
+}
+
+
+=head1 Seq only methods
+
+
+These methods are specific to the Bio::Seq object, and not
+found on the Bio::PrimarySeq object
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $seq->primary_seq or $seq->primary_seq($newval)
+ Function: Get or set a PrimarySeq object
+ Example :
+ Returns : PrimarySeq object
+ Args    : None or PrimarySeq object
+ Throws  : Bio::Root::BadParameter if the supplied argument does
+           not derive from Bio::PrimarySeqI.
+
+
+=cut
+
+sub primary_seq {
+   my ($obj,$value) = @_;
+
+   if( defined $value) {
+       if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) {
+	   $obj->throw(-class => 'Bio::Root::BadParameter',
+                       -text  => "$value is not a Bio::PrimarySeqI compliant object",
+                       -value => $value );
+       }
+
+       $obj->{'primary_seq'} = $value;
+       # descend down over all seqfeature objects, seeing whether they
+       # want an attached seq.
+
+       foreach my $sf ( $obj->get_SeqFeatures() ) {
+	   $sf->attach_seq($value);
+       }
+
+   }
+   return $obj->{'primary_seq'};
+
+}
+
+=head2 species
+
+ Title   : species
+ Usage   : $species = $seq->species() or $seq->species($species)
+ Function: Gets or sets the species
+ Returns : L<Bio::Species> object
+ Args    : None or L<Bio::Species> object
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self, $species) = @_;
+    if ($species) {
+        $self->{'species'} = $species;
+    } else {
+        return $self->{'species'};
+    }
+}
+
+=head1 Internal methods
+
+=cut
+
+# keep AUTOLOAD happy
+sub DESTROY { }
+
+############################################################################
+# aliases due to name changes or to compensate for our lack of consistency #
+############################################################################
+
+# in all other modules we use the object in the singular --
+# lack of consistency sucks
+*flush_SeqFeature = \&remove_SeqFeatures;
+*flush_SeqFeatures = \&remove_SeqFeatures;
+
+# this is now get_SeqFeatures() (from FeatureHolderI)
+*top_SeqFeatures = \&get_SeqFeatures;
+
+# this is now get_all_SeqFeatures() in FeatureHolderI
+sub all_SeqFeatures{
+    return shift->get_all_SeqFeatures(@_);
+}
+
+sub accession {
+    my $self = shift;
+    $self->warn(ref($self)."::accession is deprecated, ".
+		"use accession_number() instead");
+    return $self->accession_number(@_);
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/SeqI.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/SeqI.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/Bio/SeqI.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+# $Id: SeqI.pm,v 1.5.4.1 2006/10/02 23:10:38 sendu Exp $
+#
+# BioPerl module for Bio::SeqI
+#
+# Cared for by Ewan Birney <birney at ebi.ac.uk>
+#
+# Copyright Ewan Birney
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqI [Developers] - Abstract Interface of Sequence (with features)
+
+=head1 SYNOPSIS
+
+    # Bio::SeqI is the interface class for sequences.
+
+    # If you are a newcomer to bioperl, you should
+    # start with Bio::Seq documentation. This
+    # documentation is mainly for developers using
+    # Bioperl.
+
+    # Bio::SeqI implements Bio::PrimarySeqI
+    $seq      = $seqobj->seq(); # actual sequence as a string
+    $seqstr   = $seqobj->subseq(10,50);
+
+    # Bio::SeqI has annotationcollections
+
+    $ann      = $seqobj->annotation(); # annotation object
+
+    # Bio::SeqI has sequence features
+    # features must implement Bio::SeqFeatureI
+
+    @features = $seqobj->get_SeqFeatures(); # just top level
+    @features = $seqobj->get_all_SeqFeatures(); # descend into sub features
+
+
+
+=head1 DESCRIPTION
+
+Bio::SeqI is the abstract interface of annotated Sequences. These
+methods are those which you can be guarenteed to get for any Bio::SeqI
+- for most users of the package the documentation (and methods) in
+this class are not at useful - this is a developers only class which
+defines what methods have to be implmented by other Perl objects to
+comply to the Bio::SeqI interface. Go "perldoc Bio::Seq" or "man
+Bio::Seq" for more information.
+
+
+There aren't many here, because too many complicated functions here
+prevent implementations which are just wrappers around a database or
+similar delayed mechanisms.
+
+Most of the clever stuff happens inside the SeqFeatureI system.
+
+A good reference implementation is Bio::Seq which is a pure perl
+implementation of this class with alot of extra pieces for extra
+manipulation.  However, if you want to be able to use any sequence
+object in your analysis, if you can do it just using these methods,
+then you know you will be future proof and compatible with other
+implementations of Seq.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to one
+of the Bioperl mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Email birney at sanger.ac.uk
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+#'
+# Let the code begin...
+
+
+package Bio::SeqI;
+use strict;
+
+
+# Object preamble - inheriets from Bio::PrimarySeqI
+
+use base qw(Bio::PrimarySeqI Bio::AnnotatableI Bio::FeatureHolderI);
+
+=head2 get_SeqFeatures
+
+ Title   : get_SeqFeatures
+ Usage   : my @feats = $seq->get_SeqFeatures();
+ Function: retrieve just the toplevel sequence features attached to this seq
+ Returns : array of Bio::SeqFeatureI objects
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> and L<Bio::SeqFeatureI> for more information.
+
+=cut
+
+=head2 get_all_SeqFeatures
+
+ Title   : get_all_SeqFeatures
+ Usage   : @features = $annseq->get_all_SeqFeatures()
+ Function: returns all SeqFeatures, included sub SeqFeatures
+ Returns : an array of Bio::SeqFeatureI objects
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> and L<Bio::SeqFeatureI> for more information.
+
+=cut
+
+=head2 feature_count
+
+ Title   : feature_count
+ Usage   : $seq->feature_count()
+ Function: Return the number of SeqFeatures attached to a sequence
+ Returns : integer representing the number of SeqFeatures
+ Args    : none
+
+This method comes through extension of Bio::FeatureHolderI. See
+L<Bio::FeatureHolderI> for more information.
+
+=cut
+
+=head2 seq
+
+ Title   : seq
+ Usage   : my $string = $seq->seq();
+ Function: Retrieves the sequence string for the sequence object
+ Returns : string
+ Args    : none
+
+
+=cut
+
+sub seq{
+   my ($self) = @_;
+   $self->throw_not_implemented();
+}
+
+=head2 write_GFF
+
+ Title   : write_GFF
+ Usage   : $seq->write_GFF(\*FILEHANDLE);
+ Function: Convience method to write out all the sequence features
+           in GFF format to the provided filehandle (STDOUT by default)
+ Returns : none
+ Args    : [optional] filehandle to write to (default is STDOUT)
+
+
+=cut
+
+sub write_GFF{
+   my ($self,$fh) = @_;
+
+   $fh || do { $fh = \*STDOUT; };
+
+   foreach my $sf ( $self->get_all_SeqFeatures() ) {
+       print $fh $sf->gff_string, "\n";
+   }
+
+}
+
+=head2 annotation
+
+ Title   : annotation
+ Usage   : $obj->annotation($seq_obj)
+ Function: retrieve the attached annotation object
+ Returns : Bio::AnnotationCollectionI or none;
+
+See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
+for more information. This method comes through extension from
+L<Bio::AnnotatableI>.
+
+=cut
+
+=head2 species
+
+ Title   : species
+ Usage   :
+ Function: Gets or sets the species
+ Example : $species = $self->species();
+ Returns : Bio::Species object
+ Args    : Bio::Species object or none;
+
+See L<Bio::Species> for more information
+
+=cut
+
+sub species {
+    my ($self) = @_;
+    $self->throw_not_implemented();
+}
+
+=head2 primary_seq
+
+ Title   : primary_seq
+ Usage   : $obj->primary_seq($newval)
+ Function: Retrieve the underlying Bio::PrimarySeqI object if available.
+           This is in the event one has a sequence with lots of features
+           but want to be able to narrow the object to just one with
+           the basics of a sequence (no features or annotations).
+ Returns : Bio::PrimarySeqI
+ Args    : Bio::PrimarySeqI or none;
+
+See L<Bio::PrimarySeqI> for more information
+
+=cut
+
+sub primary_seq {
+    my ($self) = @_;
+    $self->throw_not_implemented;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestInterface.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestInterface.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestInterface.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+=head1 NAME
+
+TestInterface - A simple subclass of Interface
+
+=head1 DESCRIPTION
+
+This module demonstrates how to use the generic Bio::Root::RootI
+superclass.
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+=cut
+
+package TestInterface;
+
+
+use base qw(Bio::Root::RootI);
+
+sub data {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+sub foo {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+sub bar {
+    my $self = shift;
+    $self->throw_not_implemented;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestObject.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestObject.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/root/lib/TestObject.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,67 @@
+=head1 NAME
+
+TestObject - An implementation of TestInterface
+
+=head1 DESCRIPTION
+
+This module attempts to provide an implementation of TestInterface and
+is used for illustrating exception throwing using Graham Barr's
+Error.pm object.
+
+=head1 AUTHOR
+
+Steve Chervitz E<lt>sac at bioperl.orgE<gt>
+
+=cut
+
+#'
+
+package TestObject;
+
+use strict;
+
+
+# Define a special type of error "Bio::TestException" as a subclass of Error.
+# Note two things:
+#   1. The ISA declaration effectively defines our new Exception object.
+#   2. This declaration doesn't have to be located in the Bio directory.
+#   3. We don't have to use Bio::Root::Exception in this module.
+#   4. If Error.pm isn't available this statement doesn't matter.
+ at Bio::TestException::ISA = qw( Bio::Root::Exception );
+
+use base qw(Bio::Root::Root TestInterface);
+
+
+# Note that we're not implementing foo(), so calling it
+# will result in a Bio::Root::NotImplemented exception.
+
+sub data {
+    my ($self, $data) = @_;
+    print "Setting test data ($data)\n" if $data;
+    $self->{'data'} = $data if $data;
+ 
+   return $self->{'data'} 
+}
+
+sub bar {
+
+    my $self = shift;
+
+    print "\nExecuting method bar() in TestObject\n";
+    print "Throwing a Bio::TestException\n";
+
+    my $message = "A Test error";
+
+    # Bio::Root::Root::throw() will make use of Error.pm if present.
+    # The type of Error is specified with a -class parameter. 
+    # If -class is not supplied, a Bio::Root::Exception is throw.
+    # In this case, the argument can consist of a simple string.
+
+    $self->throw( -class => 'Bio::TestException',
+                  -text  => $message );
+
+    print "Code within bar() below the throw that shouldn't be executed.\n";
+
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/blast_example.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/blast_example.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/blast_example.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+# Example that shows values returned by Bio::SearchIO::Blast.
+# Note that some methods will return objects or arrays, not text.
+# For example, $hsp->get_aln will return a Bio::SimpleAlign object,
+# not the alignment in a printable form.
+# This script was used to create the table in the SearchIO HOWTO,
+# found at http://bioperl.open-bio.org/wiki/HOWTO:SearchIO
+# Brian Osborne
+
+use strict;
+use Bio::SearchIO;
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+
+my $file = shift or die "Usage: $0 <BLAST-report-file>\n";
+my $in = new Bio::SearchIO(-format => 'blast',
+			   # comment out the next line to read STDIN
+			   -file   => $file );
+
+while ( my $result = $in->next_result ) {
+   my @stats =  $result->available_statistics;
+   my @params = $result->available_parameters;
+   print "Result\tavailable_statistics\t at stats\n";
+   print "Result\tavailable_parameters\t at params\n";
+   print "Result\talgorithm\t" .         $result->algorithm . "\n";
+   print "Result\talgorithm_version\t" . $result->algorithm_version . "\n";
+   print "Result\tquery_name\t" .        $result->query_name . "\n";
+   print "Result\tquery_accession\t" .   $result->query_accession . "\n";
+   print "Result\tquery_length\t" .      $result->query_length . "\n";
+   print "Result\tquery_description\t" . $result->query_description . "\n";
+   print "Result\tdatabase_name\t" .     $result->database_name . "\n";
+   print "Result\tdatabase_letters\t" .  $result->database_letters . "\n";
+   print "Result\tdatabase_entries\t" .  $result->database_entries . "\n";
+   print "Result\tnum_hits\t" .          $result->num_hits . "\n";
+   print "Result\thits\t" .              $result->hits . "\n";
+   while ( my $hit = $result->next_hit ) {
+      my $id =         $hit->matches('id');
+      my $cons =       $hit->matches('cons');
+      my @accs =       $hit->each_accession_number;
+      my @qidentical = $hit->seq_inds('query','identical');
+      my @qconserved = $hit->seq_inds('query','conserved');
+      my @hidentical = $hit->seq_inds('hit','identical');
+      my @hconserved = $hit->seq_inds('hit','conserved');
+      print "Hit\tseq_inds('query','identical')\t at qidentical\n";
+      print "Hit\tseq_inds('query','conserved')\t at qconserved\n";
+      print "Hit\tseq_inds('hit','identical')\t at hidentical\n";
+      print "Hit\tseq_inds('hit','conserved')\t at hconserved\n";
+      print "Hit\teach_accession_number\t at accs\n";
+      print "Hit\tmatches('id')\t" . $id . "\n";
+      print "Hit\tmatches('cons')\t" . $cons . "\n";
+      print "Hit\tname\t" .                $hit->name . "\n";
+      print "Hit\taccession\t" .           $hit->accession . "\n";
+      print "Hit\tdescription\t" .         $hit->description . "\n";
+      print "Hit\tlength\t" .              $hit->length . "\n";
+      print "Hit\talgorithm\t" .           $hit->algorithm . "\n";
+      print "Hit\traw_score\t" .           $hit->raw_score . "\n";
+      print "Hit\tsignificance\t" .        $hit->significance . "\n";
+      print "Hit\tbits\t" .                $hit->bits . "\n";
+      print "Hit\thsps\t" .                $hit->hsps . "\n";
+      print "Hit\tnum_hsps\t" .            $hit->num_hsps . "\n";
+      print "Hit\tambiguous_aln\t" .       $hit->ambiguous_aln . "\n";
+      print "Hit\toverlap\t" .             $hit->overlap . "\n";
+      print "Hit\tn\t" .                   $hit->n . "\n"; 
+      print "Hit\tlogical_length\t" .      $hit->logical_length . "\n";
+      print "Hit\tlength_aln\t" .          $hit->length_aln . "\n";
+      print "Hit\tgaps\t" .                $hit->gaps . "\n";
+      print "Hit\tfrac_identical\t" .      $hit->frac_identical . "\n";
+      print "Hit\tfrac_conserved\t" .      $hit->frac_conserved . "\n";
+      print "Hit\tfrac_aligned_query\t" .  $hit->frac_aligned_query . "\n";
+      print "Hit\tfrac_aligned_hit\t" .    $hit->frac_aligned_hit . "\n";
+      print "Hit\tnum_unaligned_sbjct\t" . $hit->num_unaligned_sbjct . "\n";
+      print "Hit\tnum_unaligned_hit\t" .   $hit->num_unaligned_hit . "\n";
+      print "Hit\tnum_unaligned_query\t" . $hit->num_unaligned_query . "\n";
+      print "Hit\tstrand\t" .              $hit->strand . "\n";
+      print "Hit\tframe\t" .               $hit->frame . "\n";
+      print "Hit\trank\t" .                $hit->rank . "\n";
+      print "Hit\tlocus\t" .               $hit->locus . "\n";
+      while ( my $hsp = $hit->next_hsp ) {
+	 my ($qid,$qcons) = $hsp->matches('hit');
+	 my ($id,$cons) = $hsp->matches('query');
+	 @qidentical = $hsp->seq_inds('query','identical');
+	 @qconserved = $hsp->seq_inds('query','conserved');
+	 @hidentical = $hsp->seq_inds('hit','identical');
+	 @hconserved = $hsp->seq_inds('hit','conserved');
+	 my @hrange =  $hsp->range('hit');
+	 my @qrange =  $hsp->range('query');
+	 my $aln =     $hsp->get_aln;
+	 my $alnIO = Bio::AlignIO->new(-format=>"clustalw");
+	 print "HSP\trange('hit')\t at hrange\n";
+	 print "HSP\trange('query')\t at qrange\n";
+	 print "HSP\tseq_inds('hit','identical')\t at hidentical\n";
+	 print "HSP\tseq_inds('query','conserved')\t at qconserved\n";
+	 print "HSP\tseq_inds('query','identical')\t at qidentical\n";
+	 print "HSP\tseq_inds('hit','conserved')\t at hconserved\n";
+	 print "HSP\tmatches('hit')\t" .   $qid . " " . $qcons . "\n";
+	 print "HSP\tmatches('query')\t" . $id . " " . $cons . "\n";
+	 print "HSP\talgorithm\t" .        $hsp->algorithm . "\n";
+	 print "HSP\tevalue\t" .           $hsp->evalue . "\n";
+	 print "HSP\tfrac_identical\t" .   $hsp->frac_identical . "\n";
+	 print "HSP\tfrac_conserved\t" .   $hsp->frac_conserved . "\n";
+	 print "HSP\tgaps\t" .             $hsp->gaps     . "\n";
+	 print "HSP\tquery_string\t" .     $hsp->query_string . "\n";
+	 print "HSP\thit_string\t" .       $hsp->hit_string . "\n";
+	 print "HSP\thomology_string\t" .  $hsp->homology_string . "\n";
+	 print "HSP\tlength('total')\t" .  $hsp->length('total') . "\n";
+	 print "HSP\tlength('hit')\t" .    $hsp->length('hit') . "\n";
+	 print "HSP\tlength('query')\t" .  $hsp->length('query') . "\n";
+	 print "HSP\thsp_length\t" .       $hsp->hsp_length . "\n";
+	 print "HSP\tframe\t" .            $hsp->frame . "\n";
+	 print "HSP\tnum_conserved\t" .    $hsp->num_conserved . "\n";
+	 print "HSP\tnum_identical\t" .    $hsp->num_identical . "\n";
+	 print "HSP\trank\t" .             $hsp->rank . "\n";
+	 print "HSP\tscore\t" .            $hsp->score . "\n";
+	 print "HSP\tbits\t" .             $hsp->bits . "\n";
+	 print "HSP\tpercent_identity\t" . $hsp->percent_identity . "\n";
+	 print "HSP\tstrand()\t" .         $hsp->strand() . "\n";
+	 print "HSP\tstart('hit')\t" .     $hsp->start('hit') . "\n";
+	 print "HSP\tstart('query')\t" .   $hsp->start('query') . "\n";
+	 print "HSP\tend('hit')\t" .       $hsp->end('hit') . "\n";
+	 print "HSP\tend('query')\t" .     $hsp->end('query') . "\n";
+	 print "HSP\talignment\n";
+	 print $alnIO->write_aln($aln),"\n\n";
+      }
+   }
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/blast_example.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/custom_writer.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/custom_writer.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/custom_writer.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+#!/usr/bin/env perl 
+
+# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
+# for producing custom output of Blast hit data from a Blast report 
+# input stream.
+#
+# Here we define a custom SearchWriterI object that ouputs just the data we want
+# from each BLAST report.
+# 
+# NOTE: If you just want pick and choose which columns you want 
+# in the output table, you don't need to create your own custom
+# SearchWriterI implementation as we do here. HitTableWriter and HSPTableWriter
+# are configurable as to what columns and order you want. 
+# The hitwriter*.pl and hspwriter.pl examples in this directory
+# illustrate this.
+#
+# For a complete list of columns, see the docs for these modules:
+#   Bio::SearchIO::Writer::HitTableWriter
+#   Bio::SearchIO::Writer::HSPTableWriter
+#
+# This example serves as an illustration of how to use the 
+# SearchWriterI api and plug it in to a SearchIO parser,
+# which you may want to do if you want to generate data column(s)
+# not provided by the available writers.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: none, but generates an output file "custom_writer.out"
+#           containing tab-delimited data on a per-hit basis.
+#   STDERR: Progress info.
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: custom_writer.pl,v 1.3 2004/02/21 10:50:34 sac Exp $
+
+package MyBlastWriter;
+
+use strict;
+use lib '../../';
+use Bio::Root::Root;
+use Bio::SearchIO::SearchWriterI;
+
+use vars qw( @ISA );
+ at ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );
+
+sub to_string {
+    my ($self, $result, @args) = @_;
+    my $str = '';
+
+    my $hits_reported = 0;
+
+    foreach my $hit($result->hits) {
+
+      # If this is a PSI-BLAST report, only report novel hits
+      if( $result->psiblast ) {
+	# Note that we could have supplied this has a -HIT_FILTER function
+	# when we defined our input SearchIO object. Then we wouldn't need 
+	# to define a custom writer.
+	next unless $hit->iteration > 1 and not $hit->found_again;
+      }
+
+      $hits_reported++;
+      printf STDERR "$hit\n";
+
+      $str .= sprintf "%s\t%d\t%s\t%d\t%.2f\t%d\t%.1e\t%d\t%d\t%d\t%d\t%s\n", 
+	               $result->query_name, $result->query_length, $hit->name,
+	               $hit->length, $hit->frac_identical('query'), $hit->length_aln,
+	               $hit->expect, $hit->score, $hit->bits,
+	               $hit->gaps('total'), $hit->num_hsps, $hit->iteration || '-';
+    }
+
+    printf STDERR "\n%d hits written\n", $hits_reported;
+
+    $str;
+
+}
+
+package main;
+
+#===================================================
+# Start of script 
+#===================================================
+
+use strict;
+
+use lib '../../../';
+use Bio::SearchIO;
+
+select STDOUT; $|=1; 
+
+my $in     = Bio::SearchIO->new( -format => 'blast', 
+				 -fh => \*ARGV,
+                                 -signif => 0.1 );
+my $writer = MyBlastWriter->new();
+my $out    = Bio::SearchIO->new( -format => 'blast',
+				 -writer => $writer,
+				 -file   => ">custom_writer.out" );
+
+while ( my $result = $in->next_result() ) {
+    printf STDERR "Report %d: $result\n", $in->result_count;
+    $out->write_result($result);
+}
+
+printf STDERR "\n%d Results processed.\n", $in->result_count;
+printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/custom_writer.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/hitwriter.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/hitwriter.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/hitwriter.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,118 @@
+#!/usr/bin/perl 
+
+# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
+# for producing tab-delimited output of hit data from a Blast report 
+# input stream.
+#
+# Each row in the output represents data for a single hit.
+# For hits containing multiple HSPs, the output information represents a
+# summary across all HSPs.
+#
+# This parser represents a new and improved version of Bio::Tools::Blast.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: none, but generates an output file "hitwriter.out"
+#           containing tab-delimited data on a per-hit basis.
+#   STDERR: Progress info and any errors.
+# 
+# In this example, we create a SearchIO parser that screens out hits 
+# based on expect (or P) scores and a default HitTableWriter. This writer
+# provides the same functionality as the original Bio::Tools::Blast::table()
+# function (i.e., a tab-delimited summary of each hit per row).
+# HitTableWriter, however, is customizable so you can specify just the columns
+# you want to have in the output table.
+#
+# For more documentation about the writer, including
+# a complete list of columns, execute:
+#   perldoc Bio::SearchIO::Writer::HitTableWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Iteration::IterationI
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the Blast parser, see docs for
+#   Bio::SearchIO
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: hitwriter.pl,v 1.4 2004/05/06 02:11:46 sac Exp $
+
+use strict;
+use lib '../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::HitTableWriter;
+
+# These are the columns that will be in the output table of BLAST results.
+my @columns = qw(
+		 query_name
+		 query_length
+                 hit_name
+                 hit_length
+		 num_hsps
+                 expect
+                 frac_aligned_query
+                 frac_identical_query
+                 length_aln_query
+                 gaps_total
+                 strand_query
+                 strand_hit
+		);
+
+# The following columns require HSP alignment data:
+# 		  num_hsps
+#                 frac_identical_query
+#                 length_aln_query
+#                 gaps_total
+#                 strand_query
+#                 strand_hit
+
+print STDERR "\nUsing SearchIO->new()\n";
+
+# Note that all parameters for the $in, $out, and $writer objects are optional.
+# Default in = STDIN; Default out = STDOUT; Default writer = all columns 
+# In this example, we're reading from STDIN and  writing to a file 
+# called "hitwriter.out"
+# TODO: write hitless reports to STDERR and note if filtered.
+my $in     = Bio::SearchIO->new( -format => 'blast', 
+				 -fh => \*ARGV,
+				 -signif => 0.1, 
+				# -verbose=> 2
+                               );
+my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
+						       );
+my $out    = Bio::SearchIO->new( -format => 'blast',
+				 -writer => $writer,
+				 -file   => ">hitwriter.out" );
+# Need to keep a separate count of reports with hits
+# to know when to include labels. The first report may be hitless, 
+# so we can't use $in->result_count
+my $hit_count = 0;
+while ( my $blast = $in->next_result() ) {
+  printf STDERR "\nReport %d: $blast\n", $in->result_count;
+  
+  printf STDERR "query=%s, length=%d\n", $blast->query_name, $blast->query_length;
+
+  if( $blast->hits ) {
+      print STDERR "# hits= ", $blast->num_hits, "\n";
+      $hit_count++;
+      my @hits= $blast->hits;
+      print STDERR "frac_aligned_query= ", $hits[0]->frac_aligned_query, "\n";
+
+      $out->write_result($blast, $hit_count==1 );
+  }
+  else {
+    print STDERR "Hitless Blast Report ";
+    print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
+  }
+  
+  ## For a simple progress monitor, uncomment this line:
+  #print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/hitwriter.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/hspwriter.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/hspwriter.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/hspwriter.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+#!/usr/bin/env perl 
+
+# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
+# for producing tab-delimited output of HSP data from a Blast report 
+# input stream.
+#
+# Each row in the output represents data for a single HSP.
+#
+# This parser represents a new and improved version of Bio::Tools::Blast.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: none, but generates an output file "hspwriter.out"
+#           containing tab-delimited data on a per-HSP basis.
+#   STDERR: Progress info and any errors.
+#
+# In this example, we create a SearchIO parser that screens out hits 
+# based on expect (or P) scores and a default HSPTableWriter. This writer
+# provides the same functionality as the original Bio::Tools::Blast::table2()
+# function (i.e., a tab-delimited summary of each hit per row).
+# HSPTableWriter, however, is customizable so you can specify just the columns
+# you want to have in the output table.
+#
+# For more documentation about the writer, including
+# a complete list of columns, execute:
+#   perldoc Bio::SearchIO::Writer::HSPTableWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Iteration::IterationI
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the Blast parser, see docs for
+#   Bio::SearchIO
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: hspwriter.pl,v 1.3 2004/02/21 10:50:34 sac Exp $
+
+use strict;
+use lib '../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::HSPTableWriter;
+
+# These are the columns that will be in the output table of BLAST results.
+my @columns = qw(
+		 query_name
+		 query_length
+                 hit_name
+                 hit_length
+		 rank
+                 expect
+                 frac_identical_query
+                 length_aln_query
+                 gaps_total
+                 strand_query
+                 strand_hit
+		);
+
+
+print STDERR "\nUsing SearchIO->new()\n";
+
+# Note that all parameters for the $in, $out, and $writer objects are optional.
+# Default in = STDIN; Default out = STDOUT; Default writer = all columns 
+# In this example, we're reading from STDIN and  writing to a STDOUT
+my $in     = Bio::SearchIO->new( -format => 'blast',
+				 -fh => \*ARGV
+                               );
+my $writer = Bio::SearchIO::Writer::HSPTableWriter->new( -columns => \@columns );
+my $out    = Bio::SearchIO->new( -format => 'blast', 
+				 -writer => $writer,
+				 -file   => ">hspwriter.out" );
+
+while ( my $result = $in->next_result() ) {
+  printf STDERR "\nReport %d: $result\n", $in->result_count;
+  
+  if( $result->hits ) {
+    $out->write_result($result, ($in->result_count - 1 ? 0 : 1) );
+  }
+  else {
+    print STDERR "Hitless Blast Report: $result ";
+    print STDERR ($result->no_hits_found ? "\n" : "(filtered)\n");
+  }
+  
+  ## For a simple progress monitor, uncomment this line:
+  #print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/hspwriter.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/htmlwriter.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/htmlwriter.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/htmlwriter.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+#!/usr/bin/perl 
+
+# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
+# for producing HTML Blast output from a Blast report input stream.
+#
+# Usage:
+#   STDIN:  none; supply filename of BLAST report on command-line
+#   STDOUT: none; generates an output file "searchio.html"
+#           containing HTML-formatted Blast Report
+#   STDERR: Any errors that occurred.
+#
+# For more documentation about the writer, including
+# a complete list of columns, see the docs for 
+#   Bio::SearchIO::Writer::HTMLResultWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Iteration::IterationI
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the Blast parser, see docs for
+#   Bio::SearchIO
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: htmlwriter.pl,v 1.5 2004/02/21 10:50:34 sac Exp $
+
+
+use strict;
+use lib '../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::HTMLResultWriter;
+
+my $outfile = "searchio.html";
+my $file = shift or die "Usage: $0 <BLAST-report-file>\n       HTML output is saved to $outfile\n";
+
+my $in = Bio::SearchIO->new( -format => 'blast', 
+                             -file => $file,  #comment this out to read STDIN
+                             #-fh => \*ARGV,  #uncomment this to read from STDIN
+                             -verbose => 0 );
+
+my $writer = new Bio::SearchIO::Writer::HTMLResultWriter();
+my $out = new Bio::SearchIO(-writer => $writer,
+                            -file   => ">$outfile");
+
+
+while ( my $result = $in->next_result() ) {
+    eval {
+        # printf STDERR "Report %d: $result\n", $in->result_count;
+        $out->write_result($result, 1);
+    };
+    if($@) {
+        warn "Warning: Blast parsing or writing exception caught for $result:\n$@\n";
+    }
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/htmlwriter.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_features.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_features.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_features.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,57 @@
+#!/usr/local/bin/perl 
+
+# Example usage of a SearchIO::psiblast parser of traditional format Blast 
+# and PSI-Blast reports.
+# Illustrates how to grab a set of SeqFeatures from a Blast report.
+# This parser represents a new and improved version of Bio/Tools/Blast.pm.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: feature start, end data
+#   STDERR: Processing info, such as the number of reports processed
+#           and the number of hitless reports.
+# 
+# For more documentation about working with Blast result objects,
+# see to documentation for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the PSI-Blast parser, see docs for
+#   Bio::SearchIO::psiblast
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: psiblast_features.pl,v 1.6 2004/02/21 10:50:34 sac Exp $
+
+use strict;
+use lib '../../';
+use Bio::SearchIO;
+
+my $in = Bio::SearchIO->new( -format => 'psiblast',
+                             -fh => \*ARGV,
+			     -signif => 0.1, 
+			     -verbose => 0 );
+my @hitless_reports = ();
+
+while ( my $blast = $in->next_result() ) {
+
+    if( $blast->hits ) {
+      while( my $feature = $blast->next_feature() ) {
+	print "Feature from ", $feature->start, " to ", $feature->end, "\n";
+      }
+    }
+    else {
+      push @hitless_reports, $blast;
+    }
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+printf STDERR "\n%d reports had no hits:\n", scalar(@hitless_reports);
+
+foreach my $blast (@hitless_reports) {
+    print STDERR "No hits for query ", $blast->query_name;
+    print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n")
+;
+}
+  
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_features.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_iterations.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_iterations.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_iterations.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+#!/usr/bin/perl 
+
+# Demonstrates the use of a SearchIO parser for processing
+# the iterations within a PSI-BLAST report.
+#
+# Usage:
+#   STDIN:  none; supply filename of PSI-BLAST report on command-line
+#   STDOUT: information parsed from the input data.
+#   STDERR: errors.
+#
+# For more documentation about working with Iteration objects,
+# see docs for:
+#   Bio::Search::Iteration::IterationI
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: psiblast_iterations.pl,v 1.2 2004/02/21 10:50:34 sac Exp $
+
+use strict;
+use lib '../../';
+
+use Bio::SearchIO;
+
+my $file = shift or die "Usage: $0 <BLAST-report-file>\n";
+my $in = new Bio::SearchIO(-format => 'blast',
+                           -file => $file, #comment this out to read STDIN
+                           #-fh => \*ARGV,  #uncomment this to read STDIN
+                          );
+
+# Iterate over all results in the input stream
+while (my $result = $in->next_result) {
+
+    printf "Result #%d: %s\n", $in->result_count, $result->to_string;
+    printf "Total Iterations: %d\n", $result->num_iterations();
+
+    # Iterate over all iterations and process old and new hits
+    # separately.
+
+    while( my $it = $result->next_iteration) { 
+        printf "\nIteration %d\n", $it->number;
+        printf "Converged: %d\n", $it->converged;
+
+        # Print out the hits not found in previous iteration
+        printf "New hits: %d\n", $it->num_hits_new;
+        while( my $hit = $it->next_hit_new ) {
+            printf "  %s, Expect=%g\n", $hit->name, $hit->expect; 
+        }
+
+        # Print out the hits found in previous iteration
+        printf "Old hits: %d\n", $it->num_hits_old; 
+        while( my $hit = $it->next_hit_old ) {
+            printf "  %s, Expect=%g\n", $hit->name, $hit->expect; 
+        }
+    }
+    printf "%s\n\n", '-' x 50;
+}
+
+printf "Total Reports processed: %d: %s\n", $in->result_count;
+
+__END__
+
+# NOTE: The following functionality is just proposed
+# (does not yet exist but might, given sufficient hew and cry):
+
+# Zero-in on the new hits found in last iteration.
+# By default, iteration() returns the last one.
+
+my $last_iteration = $result->iteration();
+while( my $hit = $last_iteration->next_hit) {
+    # Do something with new hit...
+}
+
+# Get the first iteration
+
+my $first_iteration = $result->iteration(1);
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/psiblast_iterations.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/rawwriter.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/rawwriter.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/rawwriter.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+#!/usr/bin/env perl 
+
+# Demonstrates the use of a SearchIO Blast parser for producing
+# output of raw HSP data from a Blast report input stream.
+#
+# Shows how to print out raw BLAST alignment data for each HSP.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: Raw alignment data for each HSP of each hit (BLAST format)
+#   STDERR: Progress info and any errors.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the PSI-Blast parser, see docs for
+#   Bio::SearchIO::psiblast
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: rawwriter.pl,v 1.2 2004/02/21 10:50:34 sac Exp $
+#
+# TODO: 
+#   * Implement a Bio::SearchIO::Writer::HSPTextWriter object
+#     that can do this. Then this example can fit into the standard
+#     model used by the other writer examples in which a writer
+#     object is created and hooked up with a SearchIO output object.
+
+use strict;
+
+use lib '../../';
+
+use Bio::SearchIO;
+
+# In this case, we only want raw alignments, and we only need to screen
+# on significance info (E- or P-value) so we don't need
+# to do a full parse of the alignments. Thus, we're using a -shalow_parse
+# flag to indicate that we don't need to parse alignments. This should
+# result in faster processing.
+# TODO: Convert this to use -format='blast'. Shallow-parse option not supported there.
+my $in = Bio::SearchIO->new(-format => 'psiblast', 
+                            -fh => \*ARGV,
+			    -signif => 0.1,
+			    -shallow_parse => 1,
+			    -hold_raw_data => 1 );
+
+while ( my $result = $in->next_result() ) {
+  print STDERR "\nBLAST Results for $result\n\n";
+  my $count = 0;
+  foreach( $result->hits ) {
+    print "Alignment for hit #", ++$count, "\n\n";
+    print $_->raw_hit_data();
+  }
+  print "=" x 50 , "\n";
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/rawwriter.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/resultwriter.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/resultwriter.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/resultwriter.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+#!/usr/bin/perl 
+
+# Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
+# for producing tab-delimited output of result data from a Blast report 
+# input stream.
+#
+# This writer only outputs information at the level of the result object.
+# This shows that you can work with a writer that only knows about 
+# Bio::Search::Result objects and doesn't care about hit or HSP data. 
+# Therefore, the output from this example doesn't contain any information 
+# about hits or HSPs. 
+# See the hitwriter.pl and hspwriter.pl examples for that.
+#
+# This parser represents a new and improved version of Bio::Tools::Blast.
+#
+# Usage:
+#   STDIN:  stream containing one or more BLAST or PSI-BLAST reports.
+#   STDOUT: none, but generates an output file "resultwriter.out"
+#           containing tab-delimited data on a per-report basis.
+#   STDERR: Any errors that occurred.
+#
+# For more documentation about the writer, including
+# a complete list of columns, see the docs for 
+#   Bio::SearchIO::Writer::ResultTableWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+#   Bio::Search::Result::BlastResult
+#   Bio::Search::Iteration::IterationI
+#   Bio::Search::Hit::BlastHit
+#   Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the Blast parser, see docs for
+#   Bio::SearchIO
+#
+# Author: Steve Chervitz <sac at bioperl.org>
+# Revision: $Id: resultwriter.pl,v 1.3 2004/02/21 10:50:34 sac Exp $
+
+
+use strict;
+use lib '../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::ResultTableWriter;
+use Bio::SearchIO::Writer::HTMLResultWriter;
+
+print "\nUsing SearchIO->new()\n";
+
+
+# Note that all parameters for the $in, $out, and $writer objects are optional.
+# Default in = STDIN; Default out = STDOUT; Default writer = all columns
+# In this example, we're reading from STDIN and  writing to STDOUT
+# and using the default columns for the writer.
+# We're also telling the script to timeout if input isn't received
+# within 10 sec. (Note the clock is still ticking when you background the job.)
+# Setting verbose to 1 is useful for debugging.
+my $in = Bio::SearchIO->new( -format => 'blast', 
+                             -fh => \*ARGV,
+                             -signif => 0.1, 
+                             -verbose => 0, 
+                             -timeout_sec => 10 );
+# not specifying any columns to get the default.
+my $writer = Bio::SearchIO::Writer::ResultTableWriter->new();
+my $out    = Bio::SearchIO->new( -format => 'blast', 
+                                 -writer => $writer,
+                                 -file => ">resultwriter.out");
+
+my $writerhtml = new Bio::SearchIO::Writer::HTMLResultWriter();
+my $outhtml = new Bio::SearchIO(-writer => $writerhtml,
+                                -file   => ">searchio.html");
+
+
+while ( my $result = $in->next_result() ) {
+    eval {
+        # printf STDERR "Report %d: $result\n", $in->result_count;
+        $out->write_result($result, ($in->result_count - 1 ? 0 : 1) );
+        
+        $outhtml->write_result($result, 1);
+
+        # To get at the statistical parameters:
+        # Calling raw_statistics() returns a list containing the
+        # unparsed lines of the parameters section of the report.
+        # Here we're only interested in parameters beginning with "effective".
+        #         print "Report Stats, effective data:\n";
+        #         foreach( $result->raw_statistics) {
+        #             print "$_" if /^effective/i;
+        #         }
+        
+	## For a simple progress monitor, uncomment this line:
+	#print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
+    };
+    if($@) {
+        warn "Warning: Blast parsing or writing exception caught for $result:\n$@\n";
+    }
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
+printf STDERR "Output sent to file: %s\n",  $out->file if $out->file;


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/resultwriter.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+use strict;
+use Bio::SearchIO;
+use Getopt::Long;
+use Benchmark;
+
+my ($infile,$outfile,$verbose);
+
+GetOptions( 
+	    'i|input:s'  => \$infile,
+	    'o|output:s' => \$outfile,
+	    'v|verbose'  => \$verbose,
+	    );
+$infile = shift unless $infile;
+
+my $in = new Bio::SearchIO(-format => 'waba',
+			   -file   => $infile, #comment out to read from STDIN
+                           #-fh => \*ARGV,  # uncomment to read from STDIN
+			   -verbose => $verbose);
+
+my $out;
+if( defined $outfile) {
+    $out = new Bio::Tools::GFF(-file => ">$outfile");
+} else {
+    $out = new Bio::Tools::GFF(-verbose => $verbose);
+}
+
+while( my $r = $in->next_result ) {
+    while( my $hit = $r->next_hit ) {
+	while( my $hsp = $hit->next_hsp ) {
+	    $out->write_feature($hsp);
+	}
+    }
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff3.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff3.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/searchio/waba2gff3.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+waba2gff3.pl - convert waba output into GFF3 suitable for Gbrowse
+
+=head1 DESCRIPTION
+
+This script turns WABA output into GFF3 output for the query sequence.
+If you need to get this where the Hit sequence is the reference
+sequence you'll want to flip-flop the code to use hit instead of
+query.  I didn't try and make it that general yet.
+
+I don't (yet) know how the 'score' field is calculate by Wormbase
+folks for WABA data in their GFF dumps.  I'm checking on that but it
+shouldn't make a difference for Gbrowse.
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+Duke University, 
+
+=head1 LICENSE
+
+This script is available under the Perl Artistic License meaning you
+can do with it what you wish.  
+
+Please do tell me about bugs or improvements so I can roll those back
+in for other people to use.
+
+
+=cut
+
+ 
+
+use strict;
+use Bio::SearchIO;
+use Bio::SeqFeature::Generic;
+use Bio::Tools::GFF;
+use Getopt::Long;
+
+my %States = ('1' => 'coding',
+	      '2' => 'coding',
+	      '3' => 'coding',
+	      'L'  => 'weak',
+	      'H'  => 'strong',
+	     );
+my ($infile,$outfile,$verbose,$version);
+$version = 3;
+my $ptag = 'nucleotide_match';
+GetOptions( 
+    'i|input:s'  => \$infile,
+    'o|output:s' => \$outfile,
+    'v|verbose'  => \$verbose,
+    'version'    => \$version,
+    'p|primary|primary_tag:s' => \$ptag,
+);
+$infile = shift unless $infile;
+
+my $in;
+
+if( $infile ) {
+    $in = new Bio::SearchIO(-verbose => $verbose,
+			    -format  => 'waba',
+			    -file    => $infile);
+} else {
+    $in = new Bio::SearchIO(-verbose => $verbose,
+			    -format  => 'waba',
+			    -fh      => \*ARGV);
+}
+
+my $out;
+if( defined $outfile) {
+    $out = new Bio::Tools::GFF(-gff_version => $version,
+			       -file => ">$outfile",
+			       -verbose => $verbose);
+} else {
+    $out = new Bio::Tools::GFF(-gff_version => $version,
+			       -verbose     => $verbose);
+}
+
+while( my $r = $in->next_result ) {
+    while( my $hit = $r->next_hit ) {
+	while( my $hsp = $hit->next_hsp ) {
+	    # now split this HSP up into pieces
+	    my ($qs,$qe,$hs,$he)= ($hsp->query->start,
+				   $hsp->query->end,
+				   $hsp->hit->start,
+				   $hsp->hit->end);
+	    my $i = 0;
+	    # grab the HMM states from Jim's WABA output
+	    my $stateseq = $hsp->hmmstate_string;
+	    my $state_len = length($stateseq);
+	    my ($piece,$gap, at pieces);
+	    $piece = {'length'   => 0,
+		      'str'      => '',
+		      'start'    => $i};
+	    $gap = 0;
+	    
+	    # parse the state string, finding the gaps (Q and T states)
+	    # runs of Non Q or T letters indicate a 'piece'
+	    while($i <  $state_len ) {
+		my $char = substr($stateseq,$i,1);
+		if($char =~ /[QT]/ ) {
+		    $gap++;
+		} elsif( $gap ) {
+		    # just finished a gap
+		    $piece->{'length'} = length($piece->{'str'});
+		    push @pieces, $piece;
+		    $piece = {'length' => 0,
+			      'str'    => '',
+			      'start'  => $i };
+		    $gap = 0;
+		} else {
+		    $piece->{'str'} .= $char;
+		}
+		$i++;
+	    }
+	    # for each piece, this could be made up of things either 
+	    # as H,L, or 123 state. 
+	    # In retrospect this could all probably be contained in a 
+	    # single loop, but now I'm feeling lazy. I had just converted this
+	    # from using 'split' in the first place if you want to know
+	    # why it is structured this way....
+	    for my $piece ( @pieces ) {
+		
+		my $len = $piece->{length};
+		my $start = $piece->{start};
+		my $end = $start + $len;
+		my ($j) = 0;
+		my $state = substr($piece->{str},$j++,1);
+		warn("start is $start end is $end len is $len\n") if $verbose;
+		my ($set, at sets) = ($state);
+		while( $j < $len ) {
+		    my $char = substr($piece->{str},$j++,1);
+		    next unless( $char);
+		    if( ($char =~ /\d/ && $state =~ /\d/) ||
+			($char =~ /\w/ && $char eq $state) ) {
+			$set .= $char;
+		    } else {
+			push @sets, $set;
+			$set = $state = $char;
+		    }		    
+		}
+		push @sets, $set;
+		for my $set (@sets ) {
+		    my $c = substr($set,0,1);
+		    if( ! $c ) {
+			warn("no char for '$set'\n") if $verbose;
+			next;
+		    }
+		    my $type ='waba_'.$States{$c};
+		    my $f = Bio::SeqFeature::Generic->new(
+			-start => $qs + $start,
+			-end   => $qs + $start + length($set),
+			-strand=> $hsp->query->strand,
+			-seq_id=> $hsp->query->seq_id,
+			-score => $hsp->query->score,
+			-primary_tag => $ptag,
+			-source_tag  => $type,
+			-tag    => {
+			    'ID' => $hsp->hit->seq_id
+			    });
+		    $f->add_tag_value('ID',$hs+$start,$hs+$start+$f->length);
+		    $out->write_feature($f);
+		    $start += $f->length+1;
+		}
+	    }
+	}
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/examples/sirna/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/sirna/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/sirna/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+These are scripts for designing SiRNA reagents using Bio::Tools::SiRNA.pm
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/examples/sirna/rnai_finder.cgi
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/sirna/rnai_finder.cgi	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/sirna/rnai_finder.cgi	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,551 @@
+#!/usr/bin/perl -w
+
+=pod
+
+=head1 NAME
+
+rnai_finder.cgi
+
+=head1 DESCRIPTION
+
+A CGI script using the Bio::Tools::SiRNA package to design RNAi reagents.  
+Retrieves sequences from NCBI and generates output in graphic and tabular form.
+
+=head1 INSTALLATION
+
+To use this script, place it in an appropriate cgi-bin directory on a web server.  
+The script needs to write its graphic maps to a temporary directory.  Please update
+$TMPDIR and $TMPURL to suit your local configuation.
+
+=head1 AUTHOR
+
+Donald Jackson (donald.jackson at bms.com)
+
+=head1 SEE ALSO
+
+L<Bio::Tools::SIRNA>, L<Bio::Graphics::Panel>, L<Bio::DB::NCBIHelper>, L<CGI>
+
+=cut
+
+use Bio::Tools::SiRNA;
+
+use Bio::Graphics::Panel;
+use Bio::DB::NCBIHelper;
+use Bio::Seq::RichSeq; # for hand-entry
+use Bio::SeqFeature::Generic; 
+
+use GD::Text::Align;
+use Clone qw(clone);
+
+use CGI;
+use CGI::Carp qw (fatalsToBrowser carpout);
+
+my $q = CGI->new;
+
+
+
+# define a bunch of constants
+my %COLORRANKS = ( 1 => 'red',
+		   2 => 'orchid',
+		   3 => 'blue' );
+my $TMPDIR = '/var/www/htdocs/tmp/';
+my $TMPURL = '/tmp/';
+my $ATGPAD = 75; # how far from start do we wait?
+my $NOLIGOS = 3;
+
+my $log = $TMPDIR . 'RNAiFinder.log';
+open (LOG, ">>$log") or die $!;
+carpout(LOG);
+
+print $q->header,
+    $q->start_html;
+    
+print $q->h1('RNAi Finder');
+
+
+if ($q->param('Design')) {
+    if ($q->param('accession') and !$q->param('seq')) {
+	$target = get_target();
+    }
+    else { 
+	$target = make_target();
+    }
+    get_rnai($target);
+}
+else {
+    get_settings();
+}
+
+
+sub get_settings {
+    print <<EOM1;
+<P>Oligos are designed as described on the <A HREF="http://www.mpibpc.gwdg.de/abteilungen/100/105/sirna.html" TARGET="Tuschl">Tuschl lab web page</A> and are ranked as follows: 
+<UL>
+<LI><B>New:</B> Selecting 'Pol3-compatible targets' looks for oligos with the 
+pattern NAR(N17)YNN which can be synthesized or expressed from a Pol3 promoter.
+<br>This selection <b>overrides</b> the 'Cutoff' rank.
+<LI>Oligos with Rank = 1 (best) match the AAN(19)TT rule.
+<LI>Oligos with Rank = 2 match the AAN(21) rule 
+<LI>Oligos with Rank = 3 match the NAN(21) rule.
+</UL>
+<P>If percent GC and specificity are similar, Rank 1 oligos are better. All 3 prime overhangs are converted to TT; the rest of the sequence is transcribed into RNA</P>
+
+<h3>Modifications to published rules:</h3>
+<ul>
+<li>
+Runs of 3 or more consecutive Gs on either strand are skipped - these can cause problems in synthesis. 
+<li>Users may choose to exclude oligos that overlap single nucleotide polymorphisms (ON by default).  SNP data comes from the NCBI dbSNP database.  
+<li>'Low-complexity' regions (such as runs of a single nucleotide) are also excluded.
+</ul>
+
+EOM1
+    
+    print $q->start_form;
+    print $q->h2('Enter your sequence and other parameters:'), "\n";
+    print $q->p('The values already here are DEFAULTS - you should change them to suit YOUR sequence');
+    print $q->start_table();
+    print $q->TR( $q->td({-align=> 'left'}, 
+		       [
+			$q->textfield(-name 	=> 'mingc', -default => '0.40'),
+			$q->textfield(-name 	=> 'maxgc', -default => '0.60'),
+			]
+		       ),
+		      $q->td({-align=> 'left'},
+			     $q->popup_menu(-name	 => 'worstrank', 
+				      -values 	=> [1,2,3], 
+				      -default 	=> 2,
+				      ),
+			     $q->b('OR'),
+			     $q->checkbox(-name 		=> 'pol3',
+				    -label		=> 'Pol3 compatible',
+				    -default	=> 0,
+				    ),
+			     ),
+		      );	
+    print    $q->TR( $q->th({-align=> 'left'}, 'Exclude oligos with SNPs?'),
+		     $q->td($q->radio_group(-name => 'avoid_snps', 
+					    -values => [1,0],
+					    -default => 1,
+					    -labels => {1 => 'Yes', 0 => 'No'}
+					    )),
+		     );
+
+    print $q->TR( $q->th({-align=> 'left'}, 'Sequence Name:'),
+		  $q->td({-align=> 'left'},$q->textfield('accession')),
+		  $q->td({-align=> 'left'}, 
+			 $q->em( q(Enter an accession and you won't have to enter the <br>sequence or start/stop. Use accessions beginning with NM_ if possible.))),
+		  );
+
+    print $q->TR( $q->th({-align=> 'left'}, ['Position of initiator ATG:', 
+					     'NT after start to exclude:',
+					     'Position of Stop codon:' ]));
+    print $q->TR( $q->td({-align=> 'left'}, 
+					   [$q->textfield(-name => 'cdstart', -default => 1),
+					    $q->textfield(-name => 'atgpad', -default => $ATGPAD),
+					    $q->textfield('cdend'), ]));
+    print $q->TR( $q->th({-align=> 'left'}, ['Minimum Fraction GC:', 
+					     'Maximum Fraction GC:', 
+					     'Rank cutoff',
+					     ]));
+    print $q->TR($q->th({-align=> 'left', -colspan=>2},'cDNA Sequence in plain text or FASTA format'),
+		 $q->td( $q->a({-href =>'Fasta_format.html', -target => 'Fasta_desc'}, 'What is FASTA format?')),
+		 );
+    print $q->TR($q->td({-align => 'left', -colspan=>3},
+		  $q->textarea( -name =>'seq',
+				-rows => 4,
+				-columns => 80, 
+				-wrap => 'virtual',
+				)));
+    print $q->TR( $q->th({-align => 'left', -colspan=>3},
+			 'Output options: '));
+    print $q->TR( $q->td({-align=> 'left'},
+		   [ $q->checkbox(-name => 'Graphic', -checked => 'checked'), 
+		     $q->checkbox(-name =>  'Table',  -checked => 'checked'), 
+						 ]));			       		   		
+    print $q->TR($q->td({-align=> 'left', -colspan=>3}, $q->submit('Design')));
+    print $q->end_table();		
+    print $q->end_form;
+
+}
+
+sub get_rnai {
+    # design and output RNAi reagents
+    my ($gene) = @_;
+
+    my $factory = Bio::Tools::SiRNA->new( -target 	=> $gene, 
+					  -tmpdir	=> $TMPDIR,
+					  -cutoff 	=> $q->param('worstrank') || 2,
+					  -avoid_snps	=> $q->param('avoid_snps') || 1, 
+					  -min_gc	=> $q->param('min_gc') || 0.40,
+					  -max_gc	=> $q->param('max_gc') || 0.60,
+					  -pol3		=> $q->param('pol3') || 0,
+					  );
+
+    print $q->p('Designing Pol3-compatible oligos') if ($q->param('pol3'));
+
+    my @pairs = $factory->design;
+
+    draw_gene($gene) if ($q->param('Graphic'));
+    print_table($gene->accession, \@pairs) if ($q->param('Table'));
+    print_text($gene->accession, \@pairs) if ($q->param('Text'));
+}
+
+sub get_target {
+    my ($acc) = $q->param('accession');
+    my $gb = Bio::DB::NCBIHelper->new();
+    my $seq = $gb->get_Seq_by_acc($acc);
+
+    if ($seq) { 
+	return $seq;
+    }
+    else {
+	print_error("Unable to retrieve sequence from GenBank using accession $acc");
+	return;
+    }
+
+}
+
+sub make_target {
+      # sanity chex - do we have the necessary info?
+      $q->param('seq') or print_error("Please supply a sequence", 1);
+      my $seq = $q->param('seq');
+      my $name;
+
+      # is sequence in fasta format?
+      if ($seq =~ /^>/) {
+	  my ($head, $realseq) = split (/\n/, $seq, 2);
+	  $head =~ /^>(.+?) /;
+	  $name = $1;
+	  $realseq =~ s/[\n|\r|\s]//g;
+	  $seq = $realseq;
+      }
+      elsif ($q->param('accession')) {
+  	$name = $q->param('accession');
+	$seq =~ s/[\n|\r|\s]//g;
+      }
+      else {
+  	print_error('Please supply a sequence name!');
+  	return;
+      }
+
+      $cds_start = $q->param('cds_start') || 1;
+      $cds_end = $q->param('cds_end') || length($seq);
+
+      # create a new Bio::Seq::RichSeq object from parameters 
+      my $seqobj = Bio::Seq::RichSeq->new( -seq 		=> $seq,
+					   -accession_number	=> $name,
+					   -molecule		=> 'DNA',
+					   
+  				     );
+      my $cds = Bio::SeqFeature::Generic->new( -start 	=> $cds_start,
+					       -end	=> $cds_end,
+  					     );
+      $cds->primary_tag('CDS');
+      $seqobj->add_SeqFeature($cds);
+      return $seqobj;
+     
+}
+sub draw_gene {
+# now draw a pretty picture
+    my ($gene) = @_;
+
+    my $panel = Bio::Graphics::Panel->new( -segment 	=> $gene,
+					   -width 	=> 600,
+					   -pad_top	=> 100,
+					   -pad_bottom  => 20,
+					   -pad_left	=> 50,
+					   -pad_right	=> 50,
+					   -fontcolor	=> 'black',
+					   -fontcolor2  => 'black',
+					   -key_color	=> 'white',
+					   -grid	=> 1,
+					   -key_style	=> 'between',
+					   #-gridcolor	=> 'lightgray',
+					   );
+
+    my $genefeat = Bio::SeqFeature::Generic->new( -start	=> 1,
+						  -end 	       	=> $gene->length);
+
+    $panel->add_track( arrow	=> $genefeat,
+		       -bump	=> 0,
+		       -tick	=> 2,
+		       -label 	=> 1,
+		       );
+
+    my %feature_classes;
+
+    foreach $feat($gene->top_SeqFeatures) {
+	$feature_classes{ $feat->primary_tag } ||= [];
+
+	push(@{ $feature_classes{ $feat->primary_tag } }, $feat);
+    }
+
+# for some reason, Bio::Graphics insists on drawing subfeatures for SiRNA::Pair objects...
+    $cleanpairs = cleanup_feature($feature_classes{'SiRNA::Pair'});
+
+# draw
+    $panel->add_track( transcript	=> $feature_classes{'gene'},
+		       -bgcolor	=> 'green',
+		       -fgcolor	=> 'black',
+		       -fontcolor2  => 'black',
+		       -key		=> 'Gene',
+		       -bump	=> +1,
+		       -height	=> 8,
+		       -label	=> \&feature_label,
+		       -description	=> 1,
+		       );
+
+    $panel->add_track( transcript2	=> $feature_classes{'CDS'},
+		       -bgcolor		=> 'blue',
+		       -fontcolor2  => 'black',
+		       -fgcolor		=> 'black',
+		       -key		=> 'CDS',
+		       -bump		=> +1,
+		       -height		=> 8,
+		       -label		=> \&feature_label,
+		       -description		=> \&feature_desc,
+		       );
+
+    $panel->add_track( $feature_classes{'variation'},
+		       -bgcolor	=> 'black',
+		       -fgcolor	=> 'black',
+		       -fontcolor2  => 'black',
+		       -key	=> 'SNPs',
+		       -bump	=> +1,
+		       -height	=> 8,
+		       -label	=> \&snp_label,
+		       #-glyph	=> 'triangle',
+		       -glyph	=> 'diamond',
+		       -description		=> \&feature_desc,
+		       );
+
+    $panel->add_track( generic	=> $feature_classes{'Excluded'},
+		       -bgcolor	=> 'silver',
+		       -fgcolor	=> 'black',
+		       -fontcolor  => 'black',
+		       -fontcolor2  => 'black',
+		       -key	=> 'Excluded Regions',
+		       -bump	=> +1,
+		       -height	=> 6,
+		       -label	=> \&feature_label,
+		       -description		=> \&feature_desc,
+		       );
+
+    $panel->add_track( 
+		       generic => $cleanpairs,
+		       -bgcolor	=> \&feature_color,
+		       -fgcolor	=> \&feature_color,
+		       -fontcolor  => 'black',
+		       -fontcolor2  => 'black',
+		       -key	=> 'SiRNA Reagents',
+		       -bump	=> +1,
+		       -height	=> 8,
+		       -label	=> \&feature_label,
+		       -glyph	=> 'generic',
+		       -description		=> \&feature_desc,
+		       );
+
+    my $gd = $panel->gd;
+    my $black = $gd->colorAllocate(0,0,0);
+    my $txt = GD::Text::Align->new($gd);    
+    $txt->set( valign => 'center', align => 'center', color => $black);
+    #$txt->set_font(['/usr/share/fonts/truetype/VERDANA.TTF',gdGiantFont ], 10);
+    $txt->set_font(gdGiantFont);
+    $txt->set_text("RNAi Reagents for ".$gene->accession );
+    $txt->draw(200, 50, 0);
+
+    my $pngfile = $TMPDIR . $gene->accession . '.png';
+    my $pngurl = $TMPURL . $gene->accession . '.png';
+    open (IMG, ">$pngfile") or die $!;
+    binmode IMG;
+    print IMG $gd->png;
+    close IMG;
+
+    # also get the imagemap boxes
+    my @pairboxes = extract_pairs($panel->boxes);
+
+    print $q->img({-src => $pngurl, -usemap=>"#MAP"});
+    print $q->p('Oligos are color coded: rank 1 in ', 
+		$q->font({-color => $COLORRANKS{1}}, $COLORRANKS{1}),
+		', rank 2 in ',
+		$q->font({-color => $COLORRANKS{2}}, $COLORRANKS{2}),
+		' and rank 3 in ',
+		$q->font({-color => $COLORRANKS{3}}, $COLORRANKS{3}),
+		'. Click on an oligo to bring it up in the table below');
+
+    print_imagemap(@pairboxes);
+
+}
+
+sub feature_label {
+    my ($feature) = @_;
+    my (@notes, @label);
+    #$label = ucfirst($feature->primary_tag);
+    foreach (qw(note name product gene)) {
+	if ($feature->has_tag($_)) {
+	    @notes = $feature->each_tag_value($_);
+	    #$label .= ': ' . $notes[0];
+	    push(@label, $notes[0]);
+	    last;
+	}
+    }
+    return join(': ', @label);
+    #return $label;
+}
+
+sub feature_color {
+    my ($feature) = @_;
+    my ($rank) = $feature->each_tag_value('rank');
+    #print STDERR "Feature rank: $rank COLOR $COLORRANKS{$rank}\n";
+    return $COLORRANKS{$rank};
+    #return 'red';
+}
+
+
+sub print_table {
+    my ($accession, $pairs) = @_;
+
+    print $q->h2("RNAi Reagents for $accession");
+    print $q->start_table({-border => 1, -cellpadding => 2});
+    print $q->TR( $q->th(['Reagent #', 'Start', 'Stop', 'Rank', 'Fxn GC', 'Sense Oligo', 'Antisense Oligo', 'Target' ]) ), "\n";
+
+
+    my $i = 1;
+
+    foreach $pair ( sort { $a->start <=> $b->start } @$pairs ) {
+	my $sense = $pair->sense;
+	my $anti = $pair->antisense;
+	my $color = feature_color($pair);
+
+#  	my $blasturl = "http://nunu.hpw.pri.bms.com/biocgi/versablast.pl?p=blastn&sequence=";
+#  	$blasturl .= $pair->seq->seq;
+#  	$blasturl .= "&action=Nucleotide Databases";
+
+	print 
+	    $q->TR( $q->td( [ $q->a({-name => 'RNAi' . $pair->start}) . $i,
+			      $pair->start, 
+			      $pair->end,
+			      $q->font({-color => $color},$pair->rank), 
+			      $pair->fxGC,
+			      $q->tt($sense->seq), 	
+			      $q->tt($anti->seq),
+			      $q->tt($pair->seq->seq),
+#  			      $q->a({-href=>$blasturl,
+#  				     -target=>"blastn"},
+#  				    "BLAST this target"),
+			      ] ) ),
+	"\n";
+	$i++;
+    }
+    print $q->end_table;
+}
+
+
+
+
+
+sub print_text {
+    my ($accession,  $pairs ) = @_;
+    my ($pair);
+
+    print "RNAi reagents for $accession \n";
+
+    print join("\t", qw(Start Stop Rank Sense Antisense)), "\n";
+    foreach $pair (@$pairs ) {
+	my $sense = $pair->sense;
+	my $anti = $pair->antisense;
+
+	print join("\t", $pair->start, $pair->end, $pair->rank, $sense->seq, $anti->seq), "\n";
+
+
+    }
+
+
+}
+
+sub cleanup_feature {
+    my ($flist) = @_;
+
+    my ($feat, @clean, $cfeat);
+
+    foreach $feat(@$flist) {
+	$cfeat = clone($feat);
+#	$cfeat = $feat->clone;
+	$cfeat->flush_sub_SeqFeature;
+	push (@clean, $cfeat); # will they 
+    }
+    return \@clean;
+}
+
+
+sub extract_pairs {
+    # get SiRNA::Pair features ONLY for imagemap
+    return ( grep {ref($_->[0]) eq "Bio::SeqFeature::SiRNA::Pair"} @_ );
+}
+
+sub print_imagemap {
+    my @items = @_;
+
+    print q(<MAP NAME="MAP">), "\n";
+
+    my $i = 1;
+    
+    foreach $item (@items) {
+	my ($feature, $x1, $y1, $x2, $y2) = @$item;
+	my $fstart = $feature->start; # should be unique
+	my $text = 'RNAi #' . $i. ' Start=' . $feature->start . ' Rank='.$feature->rank;
+	print qq(<AREA SHAPE="RECT" COORDS="$x1,$y1,$x2,$y2" TITLE="$text" HREF="#RNAi$fstart">), "\n";
+	warn "Mouseover text: $text\n";
+
+	$i++;
+    }
+    print "</MAP>\n";
+}
+
+
+sub print_error {
+    # print error messages in big red type. Provide more graceful die/warn to end user
+    my ($msg, $fatal) = @_;
+    print $q->h3($q->font({-color=>'RED'}, $msg));
+    
+    if ($fatal) {
+	print $q->end_html;
+	die "$msg \n";
+    }
+    else {
+	warn $msg;
+    }
+}
+
+sub dump {
+    print $q->start_ul;
+
+    foreach ($q->param) {
+	print $q->li($_),
+	$q->ul($q->li([ $q->param($_) ]));
+    }
+}
+    
+sub snp_label {
+    # special format for SNPs
+    my ($feature) = @_;
+    my $label;
+
+    if ( $feature->has_tag('db_xref') ) {
+	my @notes = $feature->each_tag_value('db_xref');
+	$label .= $notes[0];
+	$label .= ' ';
+    }
+    if ( $feature->has_tag('allele') ) {
+	my ($nt1, $nt2) = $feature->each_tag_value('allele');
+	$label .=  $nt1 . '->' . $nt2;
+    }
+    return $label;
+}
+
+sub feature_desc {
+    my ($feature) = @_;
+    my $desc = $feature->start;
+    $desc .= '-' . $feature->end unless ($feature->start == $feature->end);
+    return $desc;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/sirna/rnai_finder.cgi
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/structure/structure-io.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/structure/structure-io.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/structure/structure-io.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,21 @@
+#!/bin/perl -w
+# Getting  Entry, Chain, Residue, and Atom objects given a PDB file
+
+use Bio::Structure::IO;
+use strict;
+
+my $file = shift or die "No PDB file\n";
+my $structio = Bio::Structure::IO->new(-file => $file);
+my $struc = $structio->next_structure;
+
+for my $chain ($struc->get_chains) {
+   my $chainid = $chain->id;
+   # one-letter chaincode if present, 'default' otherwise
+   for my $res ($struc->get_residues($chain)) {
+      my $resid = $res->id;
+      # format is 3-lettercode - dash - residue number, e.g. PHE-20
+      my $atoms = $struc->get_atoms($res);
+      # actually a list of atom objects, used here to get a count
+      print join "\t", $chainid,$resid,$atoms,"\n";
+   }
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/structure/structure-io.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/subsequence.cgi
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/subsequence.cgi	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/subsequence.cgi	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+# see http://zfish.nichd.nih.gov/tools/subsequence.cgi
+
+# uncomment and modify the next two lines 
+#  if your perl is in a nonstandard directory
+#use lib '/disk3/local/lib/perl5/site_perl';
+#use lib '/disk3/local/lib/perl5/';
+
+use CGI qw/:standard :html3/;
+use Bio::DB::GenBank;
+use File::Temp;
+use FileHandle;
+
+print header,
+  start_html(-title => 'find subsequence of large GenBank entries',-author => 'Jonathan_Epstein\@nih.gov');
+print_form()    unless param;
+print_results() if param;
+
+sub print_results {
+  $gb = new Bio::DB::GenBank;
+  $accession = param('accession');
+  eval {
+    $seq = $gb->get_Seq_by_acc($accession); # Accession Number
+  };
+  if ($@) {
+    print "***ERROR: accession $accession not found***\n";
+    return;
+  }
+  $segment_start = param('start');
+  $segment_end = param('length_or_end_value');
+  $segment_end = $segment_start+$segment_end-1 if param('length_or_end_choice') eq 'Length';
+  if ($segment_end<$segment_start || $segment_start<0) {
+    print "***ERROR: invalid segment start and end values:$segment_start,$segment_end***\n";
+    return;
+  }
+  $len = $seq->length();
+  if ($segment_end>$len) {
+    print "***ERROR: maximum length $len exceeded***\n";
+    return;
+  }
+  $subseq = $seq->subseq ($segment_start,$segment_end);
+  
+  $name = "subsequence of $accession";
+  $strand = "+";
+  $strand = "-" if (param('reverse'));
+  
+  # For some reason, there seems to be a problem if you use the file
+  # handle provided by File::Temp.  Similarly, there's a problem if you
+  # pass a filename to BioPerl below rather than a file handle.  However,
+  # constructing our own file handle and then passing it to BioPerl works
+  # fine.
+  (undef, $filename) = File::Temp::tempfile();
+  $fh = new FileHandle "> $filename";
+  $seqoutlong = Bio::SeqIO->new( '-format' => 'Fasta',-fh => $fh);
+  $seqobj = Bio::PrimarySeq->new ( -seq => $subseq,
+				   -id  => $name . "[length:$len]:" . $segment_start . "-" . $segment_end . "(" . $strand . "strand)",
+				   -moltype => 'dna'
+				 );
+  $seqobj = $seqobj->revcom if ($strand ne "+");
+  $seqoutlong->write_seq($seqobj);
+  $fh->close;
+  undef $fh;
+  
+  # Now we parse the FASTA file which was just generated, and perform
+  # some simple conversions to HTML.   
+  open (TEMPORARY, "<$filename") or die "unable to open temporary file $filename\n";
+  print "<tt>\n";
+  while (<TEMPORARY>) {
+    print $_;
+    print "<br>\n";
+  }
+  close TEMPORARY;
+  print "</tt>\n";
+  unlink $filename;
+}
+
+sub print_form {
+  print p("This web page permits you to extract a short subsequence of DNA from a large GenBank entry.  This is especially useful in an era of huge \"contigs\" of genomic DNA, where you only want to extract a few hundred base pairs for subsequent analysis.\n");
+  
+  print p,"This program also illustrates the power of ",a({-href => 'http://www.BioPerl.org/'}, "BioPerl"), ", a powerful set of tools for molecular biology analysis.  The ", a({-href => 'subsequence.pl.txt'}, "source code"), " for this program is less than 90 lines long.\n";
+  
+  print p,"You must specify the GenBank accession number along with a start position.  You may specify either the length of the subsequence you wish to extract or, equivalently, the endpoint.\n";
+  
+  print "The sequence may be reverse-complemented if you wish, e.g., the reverse complement of <font color=green>ATCGC</font> is <font color=yellow>GCGAT</font>.\n";
+  
+  print p,"To test this web page, try accession NT_004002, start 50000, length 400.\n";
+  
+  print start_form,table(
+			 Tr(td("Enter your GenBank accession"),td(textfield(-name => 'accession',-size => 20))),
+			 Tr(td("Start position"),td(textfield(-name => 'start',-size => 10))),
+			 Tr(td("Specify length or end position"), td(radio_group (-name => 'length_or_end_choice',-values => [Length, End], default => Length))),
+			 Tr(td("Length or end position"), td(textfield (-name => length_or_end_value,-size => 20))),
+			 Tr(td("Reverse complement?"), td(checkbox (-name => 'reverse')))),
+    submit ("Find my subsequence");
+  
+  print hr(),"Credits: Jonathan Epstein (Jonathan_Epstein\@nih.gov)";
+
+}

Added: trunk/packages/bioperl/branches/upstream/current/examples/tk/gsequence.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tk/gsequence.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tk/gsequence.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1625 @@
+#!/usr/bin/perl -w
+# gSequence - Protein Sequence Control Panel
+# by Lorenz Pollsk
+#
+# this is work in progress! use this only for testing
+
+use Gtk;
+use strict;
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Tools::SeqStats;
+use Bio::SeqFeature::Generic;
+use Bio::Index::Abstract;
+use Bio::DB::GenBank;
+use Bio::DB::GenPept;
+
+init Gtk;
+
+# constant
+my $false = 0;
+my $true = 1;
+
+# global widgets
+my ($main_notebook, at main_label, at seq_edit);
+my $about_dialog;
+my ($import_dialog,$import_entry, at import_buttons,$import_from);
+my ($description_window,$description_edit);
+my ($comment_window,$comment_edit,$current_comment,$comment_frame);
+my ($seqstats_window,$seqstats_edit);
+my ($dblink_window, at dblink_entry,$current_dblink,$dblink_clist,$dblink_handler_id);
+my ($ref_window, at ref_entry,$current_ref,$ref_clist,$ref_handler_id);
+my ($feature_window, at feature_entry,$current_feature_item, at feature_spinner,
+    $feature_handler_id,$feature_tree);
+my ($pref_window, at pref_entry);
+
+# global file data
+my @seq;
+my @filename;
+my @modified;
+my @locked; # locked sequence for editing ?
+my $current;
+
+# menu
+my @menu_items = ( { path        => '/_File',
+		     type        => '<Branch>' },
+		   { path        => '/File/_New',
+		     accelerator => '<control>N',
+		     callback    => \&new },
+		   { path        => '/File/_Open SwissProt',
+		     accelerator => '<control>O',
+		     callback    => \&open_dialog },
+		   { path        => '/File/_Save SwissProt',
+		     accelerator => '<control>S',
+		     callback    => \&save },
+		   { path        => '/File/Save _As...',
+		     callback    => \&saveas_dialog },
+		   { path        => '/File/Close',
+		     callback    => \&close },
+		   { path        => '/File/sep1',
+		     type        => '<Separator>' },
+		   { path        => '/File/_Import from...',
+		     type        => '<Branch>' },
+                   { path        => '/File/Import from.../Remote DB',
+		     type        => '<Branch>' },
+                   { path        => '/File/Import from.../Remote DB/AceDB',
+		     callback    => sub { &seq_import("ace"); } },
+                   { path        => '/File/Import from.../Remote DB/GenPept',
+		     callback    => sub { &seq_import("genpept"); } },
+                   { path        => '/File/Import from.../Flat File Index',
+		     type        => '<Branch>' },
+                   { path        => '/File/Import from.../Flat File Index/Fasta',
+		     callback    => sub { &seq_import("fasta"); } },
+                   { path        => '/File/Import from.../Flat File Index/SwissProt',
+		     callback    => sub { &seq_import("swissprot"); } },
+                   { path        => '/File/Import from.../Flat File Index/SwissPfam',
+		     callback    => sub { &seq_import("swisspfam"); } },
+		   { path        => '/File/_Export to...' },
+		   { path        => '/File/sep2',
+		     type        => '<Separator>' },
+		   { path        => '/File/_Quit',
+		     callback    => sub { Gtk->exit( 0 ); } },
+
+		   { path        => '/_Edit',
+		     type        => '<Branch>' },
+		   { path        => '/Edit/C_ut',
+		     callback    => sub { $seq_edit[$current]->cut_clipboard(); },
+		     accelerator => '<control>X' },
+		   { path        => '/Edit/_Copy',
+		     callback    => sub { $seq_edit[$current]->copy_clipboard(); },
+		     accelerator => '<control>C' },
+		   { path        => '/Edit/_Paste',
+		     callback    => sub { $seq_edit[$current]->paste_clipboard(); },
+		     accelerator => '<control>V' },
+		   { path        => '/Edit/Select All',
+		     callback    => sub { $seq_edit[$current]->select_region(0,-1); } },
+
+		   { path        => '/_Specs',
+		     type        => '<Branch>' },
+		   { path        => '/Specs/_Sequence Stats',
+		     callback    => sub {&update_seqstats_window(1);} },
+		   { path        => '/Specs/sep1',
+		     type        => '<Separator>' },
+		   { path        => '/Specs/_Description',
+		     callback    => sub {&update_description_window(1);} },
+		   { path        => '/Specs/_Comments',
+		     callback    => sub {&update_comment_window(1);} },
+		   { path        => '/Specs/_DB Links',
+		     callback    => sub {&update_dblink_window(1);} },
+		   { path        => '/Specs/_References',
+		     callback    => sub {&update_reference_window(1);} },
+		   { path        => '/Specs/sep2',
+		     type        => '<Separator>' },
+		   { path        => '/Specs/_Features',
+		     callback    => sub {&update_feature_window(1);} },
+
+		   { path        => '/_Tools',
+		     type        => '<Branch>' },
+		   { path        => '/Tools/Code Table' },
+		   { path        => '/Tools/sep1',
+		     type        => '<Separator>' },
+		   { path        => '/Tools/local Blast' },
+		   { path        => '/Tools/local HMMER' },
+		   { path        => '/Tools/hmmpfam' },
+		   { path        => '/Tools/web Blast' },
+
+		   { path        => '/_Options',
+		     type        => '<Branch>' },
+		   { path        => '/Options/_Preferences',
+		     callback    => sub {&update_pref_window(1);} },
+
+		   { path        => '/_Help',
+		     type        => '<LastBranch>' },
+		   { path        => '/Help/Help' },
+		   { path        => '/Help/_About...',
+		     callback    => sub { $about_dialog->show_all();} } );
+
+### main
+
+$current = 0;
+&init_windows();
+main Gtk;
+exit( 0 );
+
+
+### Subroutines
+
+sub init_windows
+{
+    &init_main_window();
+    &init_about_dialog();
+    &init_import_dialog();
+    &init_seqstats_window();
+    &init_description_window();
+    &init_comment_window();
+    &init_dblink_window();
+    &init_reference_window();
+    &init_feature_window();
+    &init_pref_window();
+}
+
+sub init_main_window
+{
+    # toplevel window
+    my $window;
+    $window = new Gtk::Window( 'toplevel' );
+    $window->signal_connect( 'destroy', sub { Gtk->exit( 0 ); } );
+    $window->set_title( "gSequence" );
+    $window->set_usize( 600, 400 );
+
+    # vertical box containing menu and text editor widget
+    my $main_vbox;
+    $main_vbox = new Gtk::VBox( $false, 1 );
+    $main_vbox->border_width( 1 );
+    $window->add( $main_vbox );
+
+    # handlebox for menubar
+    my $handlebox;
+    $handlebox = new Gtk::HandleBox();
+    $main_vbox->pack_start( $handlebox, $false, $true, 0 );
+
+    # menubar
+    my $menubar;
+    $menubar = get_menu( $window );
+    $handlebox->add( $menubar );
+
+    # text widget
+    $seq_edit[$current] = new Gtk::Text( undef, undef );
+    $seq_edit[$current]->set_editable( $true );
+
+    # vertical scrollbar for text widget
+    my $scrollbar;
+    $scrollbar = new Gtk::VScrollbar( $seq_edit[$current]->vadj );
+
+    # horizontal box containing text widget and scrollbar
+    my $seq_edit_hbox;
+    $seq_edit_hbox = new Gtk::HBox( $false, 1 );
+    $seq_edit_hbox->border_width( 1 );
+    $seq_edit_hbox->pack_start( $seq_edit[$current], $true, $true, 0);
+    $seq_edit_hbox->pack_end( $scrollbar, $false, $true, 0);
+
+    $main_notebook = new Gtk::Notebook();
+    $main_notebook->set_tab_pos( 'top' );
+
+    $main_vbox->pack_end( $main_notebook, $true, $true, 0);
+
+    # show everything
+    $window->show_all();
+
+    $main_notebook->signal_connect_after("switch-page",
+       sub{ #$seq[$current]->seq($seq_edit[$current]->get_chars(0,-1)) 
+	    #   if (defined($seq[$current]));
+	    $current = $main_notebook->get_current_page(); 
+	    &update_seq_data(); } );
+}
+
+sub get_menu
+{
+    my ( $window ) = @_;
+    
+    my $menubar;
+    my $item_factory;
+    my $accel_group;
+
+    $accel_group = new Gtk::AccelGroup();
+
+    # This function initializes the item factory.
+    # Param 1: The type of menu - can be 'Gtk::MenuBar', 'Gtk::Menu',
+    #          or 'Gtk::OptionMenu'.
+    # Param 2: The path of the menu.
+    # Param 3: The accelerator group.  The item factory sets up
+    #          the accelerator table while generating menus.
+    $item_factory = new Gtk::ItemFactory( 'Gtk::MenuBar',
+					  '<main>',
+					  $accel_group );
+
+    # This function generates the menu items. Pass the item factory,
+    # the number of items in the array, the array itself, and any
+    # callback data for the the menu items.
+    $item_factory->create_items( @menu_items );
+
+    # Attach the new accelerator group to the window.
+    $window->add_accel_group( $accel_group );
+
+    # Finally, return the actual menu bar created by the item factory.
+    #*menubar = gtk_item_factory_get_widget (item_factory, "&lt;main>");
+    return ( $item_factory->get_widget( '<main>' ) );
+}
+
+sub new_seq_page
+{
+    my ($seq) = shift;
+    my $curr;
+
+    push @seq,$seq;
+    $curr = @seq - 1;
+    $main_label[$curr] = new Gtk::Label($seq[$curr]->id())
+	if (defined($seq[$curr]));
+    $main_label[$curr] = new Gtk::Label("<New>")
+	if (!defined($seq[$curr]));
+
+    # text widget
+    $seq_edit[$curr] = new Gtk::Text( undef, undef );
+    $seq_edit[$curr]->set_editable( $true );
+
+    # vertical scrollbar for text widget
+    my $scrollbar;
+    $scrollbar = new Gtk::VScrollbar( $seq_edit[$curr]->vadj );
+
+    # horizontal box containing text widget and scrollbar
+    my $seq_edit_hbox;
+    $seq_edit_hbox = new Gtk::HBox( $false, 1 );
+    $seq_edit_hbox->border_width( 1 );
+    $seq_edit_hbox->pack_start( $seq_edit[$curr], $true, $true, 0);
+    $seq_edit_hbox->pack_end( $scrollbar, $false, $true, 0);
+
+    $main_notebook->append_page( $seq_edit_hbox, $main_label[$curr] );
+    $main_notebook->show_all();
+    $main_notebook->set_page(-1);
+}
+
+sub seq_fetch
+{
+    my ($server,$port,$dir,$db); # read from preferences
+    my ($dbobj);
+
+    return if (!defined($import_from) || !($import_from));
+
+    $dbobj = Bio::DB::GenPept->new() if ($import_from eq "genpept");
+    $dbobj = Bio::DB::Ace->new(-host=>$server,-port=>$port)
+	if ($import_from eq "ace");
+    $dbobj = Bio::Index::Abstract->new("$dir/$db") 
+	if ($import_from eq "fasta") ||
+	   ($import_from eq "swissprot") ||
+	   ($import_from eq "swisspfam");
+
+    if( $import_buttons[0]->get_active() ) {
+	&new_seq_page($dbobj->get_Seq_by_id($import_entry->get_text()));
+    } else {
+	&new_seq_page($dbobj->get_Seq_by_acc($import_entry->get_text()));
+    }
+}
+
+sub seq_import
+{
+    ($import_from) = @_;
+    my %names = ( "ace" => "AceDB",
+			 "genpept" => "GenPept DB",
+			 "fasta" => "Fasta Flat File",
+			 "swissprot" => "SwissProt Flat File",
+			 "swisspfam" => "SwissPfam Flat File"
+		     );
+    $import_dialog->set_title("Import from ".$names{$import_from});
+    $import_entry->set_text("");
+    $import_dialog->show_all();
+}
+
+sub init_import_dialog
+{
+    $import_dialog = new Gtk::Dialog();
+    $import_dialog->border_width(5);
+
+    # create the first button and add it to a box
+    my $button = new Gtk::RadioButton( "Fetch by ID" );
+    $import_dialog->vbox->pack_start($button,$false,$false,2);
+         
+    # create the second button and add it to a box
+    $button = new Gtk::RadioButton( "Fetch by ACCESSION", $button );
+    $import_dialog->vbox->pack_start($button,$false,$false,2);
+    @import_buttons = $button->group();
+
+    $import_entry = new Gtk::Entry();
+    my $frame = new Gtk::Frame("Enter here:");
+    $frame->add($import_entry);
+    $import_dialog->vbox->pack_start( $frame, $true, $true, 5);
+    
+    my $bbox = new Gtk::HButtonBox();
+    $bbox->set_layout("end");
+    
+    $button = new Gtk::Button( "OK" );
+    $bbox->add( $button );
+    $button->signal_connect("clicked",
+           # OK button handler
+           sub{ $import_dialog->hide();
+		&seq_fetch();
+	      });
+
+    $button = new Gtk::Button( "Cancel" );
+    $bbox->add( $button );
+    $button->signal_connect("clicked",
+           # close button handler
+           sub{ $import_dialog->hide();
+	      });
+
+    $import_dialog->action_area->pack_start( $bbox, $true, $true, 0 );
+
+    $import_dialog->signal_connect_after( "delete_event",
+           # window delete handler
+           sub{ $import_dialog->hide();
+                return &Gtk::true;
+	      });
+}
+
+sub open_dialog
+{
+    # Create a new file selection widget
+    my $open_dialog = new Gtk::FileSelection( "Open File..." );
+    # Connect the ok_button to open_ok_sel function
+    $open_dialog->ok_button->signal_connect( "clicked",
+					     \&ok_open_dialog,
+					     $open_dialog );
+    # Connect the cancel_button to destroy the widget
+    $open_dialog->cancel_button->signal_connect( "clicked",
+						 sub { $open_dialog->destroy(); } );
+    $open_dialog->show();
+}
+
+# Get the selected filename
+sub ok_open_dialog
+  {
+    my ( $widget, $file_selection ) = @_;
+    push @filename, $file_selection->get_filename();
+
+    $widget->parent->parent->parent->destroy();
+
+    my $in = Bio::SeqIO->new(-file => $filename[-1] , '-format' => 'swiss');
+
+    &new_seq_page($in->next_seq());
+}
+
+sub update_seq_data
+{    
+    $main_label[$current]->set_text($seq[$current]->id) if (defined($seq[$current]));
+    $main_label[$current]->set_text("<New>") if (!defined($seq[$current]));
+
+    $seq_edit[$current]->freeze();
+    $seq_edit[$current]->delete_text(0,-1);
+    $seq_edit[$current]->insert(undef,undef,undef,$seq[$current]->seq()) if (defined($seq[$current]));
+    $seq_edit[$current]->thaw();
+
+    &update_comment_window();
+    &update_description_window();
+    &update_seqstats_window();
+    &update_dblink_window();
+    &update_reference_window();
+    &update_feature_window();
+}
+
+sub new
+{
+    &new_seq_page(undef);
+}
+
+sub close
+{
+}
+
+sub save
+{
+    if (!defined($filename[$current])||!$filename[$current])
+    {
+	&saveas_dialog;
+	return;
+    }
+    my $out = Bio::SeqIO->new(-file => ">$filename[$current]" , '-format' => 'swiss');
+    $out->write_seq($seq[$current]);
+}
+
+sub saveas_dialog
+{
+    # Create a new file selection widget
+    my $saveas_dialog = new Gtk::FileSelection( "Save As..." );
+    # Connect the ok_button to saveas_ok_sel function
+    $saveas_dialog->ok_button->signal_connect( "clicked",
+					       \&ok_saveas_dialog,
+					       $saveas_dialog );
+    # Connect the cancel_button to destroy the widget
+    $saveas_dialog->cancel_button->signal_connect( "clicked",
+						   sub { $saveas_dialog->destroy(); } );
+    $saveas_dialog->show();
+}
+
+# Get the selected filename and print it to the console
+sub ok_saveas_dialog
+  {
+    my ( $widget, $file_selection ) = @_;
+    my $filename = $file_selection->get_filename();
+    $widget->parent->parent->parent->destroy();
+    $filename[$current] = $filename;
+    my $out = Bio::SeqIO->new(-file => ">$filename[$current]" , '-format' => 'swiss');
+    $out->write_seq($seq[$current]);
+  }
+
+sub init_comment_window
+{
+    $current_comment = 0;
+    
+    $comment_window = new Gtk::Dialog();
+    $comment_window->set_default_size(650,300);
+    $comment_window->set_policy($false,$true,$false);
+    $comment_window->set_title("Comments");
+    $comment_window->border_width(5);
+    
+    # frame
+    $comment_frame = new Gtk::Frame( "Comment[".$current_comment."]" );
+    
+    # text widget
+    $comment_edit = new Gtk::Text( undef, undef );
+    $comment_edit->set_editable( $true );
+    $comment_edit->set_word_wrap( $true );
+	
+    # vertical scrollbar for text widget
+    my $scrollbar;
+    $scrollbar = new Gtk::VScrollbar( $comment_edit->vadj );
+	
+    # horizontal box containing text widget and scrollbar
+    my $hbox;
+    $hbox = new Gtk::HBox( $false, 1 );
+    $hbox->border_width( 1 );
+    $hbox->pack_start( $comment_edit, $true, $true, 0);
+    $hbox->pack_end( $scrollbar, $false, $true, 0);
+    $comment_frame->add($hbox);
+    $comment_window->vbox->pack_start( $comment_frame, $true, $true, 5);
+
+    my $bbox = new Gtk::HBox( $false, 5 );
+    $bbox->border_width(10);
+    my $arrow = new Gtk::Arrow('right','out');
+    my $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect
+	( "clicked", 
+	  # next comment button handler
+	  sub { return if !defined($seq[$current]);
+		 &store_current_comment;
+		$current_comment++ 
+		    if ($current_comment <((scalar $seq[$current]->annotation->each_Comment)-1));
+		&update_comment_window;
+	    } );
+
+    $arrow = new Gtk::Arrow('left','out');
+    $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect( "clicked", 
+           # prev comment button handler
+	   sub { return if !defined($seq[$current]);
+		 &store_current_comment;
+		 $current_comment-- 
+		     if ($current_comment > 0);
+		 &update_comment_window;
+	       } );
+
+    $button = new Gtk::Button("Add");
+    $bbox->pack_start( $button, $false, $false, 0);
+    $button->signal_connect( "clicked",
+           # add comment button handler 
+           sub { return if !defined($seq[$current]);
+		 &store_current_comment;
+		 my $comment = new Bio::Annotation::Comment;
+		 $comment->text("");
+		 $seq[$current]->annotation->add_Comment( $comment );
+		 $current_comment = $seq[$current]->annotation->each_Comment - 1;
+		 &update_comment_window;
+	       } );
+
+     $button = new Gtk::Button("Delete");
+     $bbox->pack_start( $button, $false, $false, 0);
+     $button->signal_connect( "clicked", 
+           # delete comment button handler
+           sub { return if !defined($seq[$current]); 
+		 $seq[$current]->annotation->remove_Comment( $current_comment );
+		 $current_comment = $current_comment - 1 
+		     if ($current_comment > 0);
+		 &update_comment_window;		 
+	       } );
+
+     $comment_window->vbox->pack_end( $bbox, $false, $false, 0);
+
+     $bbox = new Gtk::HButtonBox();
+     $bbox->set_layout("end");
+	
+     $button = new Gtk::Button( "Close" );
+     $bbox->add( $button );
+     $button->signal_connect("clicked",
+           # close button handler
+           sub{ $comment_window->hide();
+		&store_current_comment;
+	      });
+
+     $comment_window->action_area->pack_start( $bbox, $true, $true, 0 );
+     $comment_window->signal_connect_after( "delete_event",
+           # window delete handler
+           sub{ $comment_window->hide();
+		&store_current_comment;
+                return &Gtk::true;
+	      });
+}
+
+sub store_current_comment
+{
+    (($seq[$current]->annotation->each_Comment)[$current_comment])->
+	text($comment_edit->get_chars(0,-1) )
+	    if ((defined($seq[$current])) && ($seq[$current]->annotation->each_Comment));
+}
+
+sub update_comment_window
+{
+    my ($show_me) = @_;
+    $comment_frame->set_label("Comment[".$current_comment."]");
+    # insert comment text
+    $comment_edit->freeze();
+    $comment_edit->delete_text(0,-1);
+    if (defined($seq[$current]))
+    {
+	my @comment = $seq[$current]->annotation->each_Comment;
+	$comment_edit->insert(undef,undef,undef, $comment[$current_comment]->text)
+	    if (@comment);
+    }
+    $comment_edit->thaw();
+    
+    $comment_window->show_all() if (defined($show_me));
+}
+
+sub init_description_window
+{
+    $description_window = new Gtk::Dialog();
+    $description_window->set_default_size(620,250);
+    $description_window->border_width(5);
+    $description_window->set_title("Description");
+    
+    # frame
+    my $description_frame = new Gtk::Frame( "Description" );
+    
+    # text widget
+    $description_edit = new Gtk::Text( undef, undef );
+    $description_edit->set_editable( $true );
+    $description_edit->set_word_wrap( $true );	
+    
+    # vertical scrollbar for text widget
+    my $scrollbar;
+    $scrollbar = new Gtk::VScrollbar( $description_edit->vadj );
+    
+    # horizontal box containing text widget and scrollbar
+    my $hbox;
+    $hbox = new Gtk::HBox( $false, 1 );
+    $hbox->border_width( 1 );
+    $hbox->pack_start( $description_edit, $true, $true, 0);
+    $hbox->pack_end( $scrollbar, $false, $true, 0);
+    $description_frame->add($hbox);
+    $description_window->vbox->pack_start( $description_frame, $true, $true, 5);
+    
+    my $bbox = new Gtk::HButtonBox();
+    $bbox->set_layout("end");
+    
+    my $button = new Gtk::Button( "Close" );
+    $bbox->add( $button );
+    $button->signal_connect("clicked",
+           # close button handler
+           sub{ $description_window->hide();
+		$seq[$current]->desc($description_edit->get_chars(0,-1))
+		    if $description_edit->get_chars(0,-1);
+	      });
+
+    $description_window->action_area->pack_start( $bbox, $true, $true, 0 );
+    $description_window->signal_connect_after( "delete_event",
+           # window delete handler
+           sub{ $description_window->hide();
+		$seq[$current]->desc($description_edit->get_chars(0,-1))
+		    if $description_edit->get_chars(0,-1);
+                return &Gtk::true;
+	      });
+}
+
+sub update_description_window
+{
+    my ($show_me) = @_;
+    $description_edit->freeze();
+    $description_edit->delete_text(0,-1);
+    $description_edit->insert(undef,undef,undef,$seq[$current]->desc)
+	if defined($seq[$current]) && defined($seq[$current]->desc);
+    $description_edit->thaw();
+    
+    $description_window->show_all() if (defined($show_me));
+}
+
+sub init_seqstats_window
+{
+    $seqstats_window = new Gtk::Dialog();
+    $seqstats_window->border_width(5);
+    $seqstats_window->set_default_size(100,250);
+    $seqstats_window->set_title("Sequence Statistics");
+
+    # frame
+    my $seqstats_frame = new Gtk::Frame( "Sequence Statistics" );
+    
+    # text widget
+    $seqstats_edit = new Gtk::Text( undef, undef );
+    $seqstats_edit->set_editable( $false );
+    $seqstats_edit->set_word_wrap( $true );
+    
+    # vertical scrollbar for text widget
+    my $scrollbar;
+    $scrollbar = new Gtk::VScrollbar( $seqstats_edit->vadj );
+    
+    # horizontal box containing text widget and scrollbar
+    my $hbox;
+    $hbox = new Gtk::HBox( $false, 1 );
+    $hbox->border_width( 1 );
+    $hbox->pack_start( $seqstats_edit, $true, $true, 0);
+    $hbox->pack_end( $scrollbar, $false, $true, 0);
+    $seqstats_frame->add($hbox);
+    $seqstats_window->vbox->pack_start( $seqstats_frame, $true, $true, 5);
+    
+    my $bbox = new Gtk::HButtonBox();
+    $bbox->set_layout("end");
+    
+    my $button = new Gtk::Button( "Close" );
+    $bbox->add( $button );
+    $button->signal_connect("clicked",
+       # close button handler
+       sub{ $seqstats_window->hide();
+	  });
+    
+    $seqstats_window->action_area->pack_start( $bbox, $true, $true, 0 );
+    $seqstats_window->signal_connect_after( "delete_event",
+       # window delete handler
+       sub{ $seqstats_window->hide();
+	    return &Gtk::true;
+	  });
+}
+
+sub update_seqstats_window
+{
+    my ($show_me) = @_;
+    my ($data,$weight,$count_hash,$percent);
+
+    $seqstats_edit->freeze();
+    $seqstats_edit->delete_text(0,-1);
+    if (defined($seq[$current]))
+    {
+	$data = $seq[$current]->id."\n\n";
+	$weight = Bio::Tools::SeqStats->get_mol_wt($seq[$current]->primary_seq);
+	if ($$weight[0] == $$weight[1]) {
+	    $data .= "Molecular weight of sequence equals to ".$$weight[0]."\n\n";
+	} else {
+	    $data .= "Molecular weight of sequence is greater than ";
+	    $data .= $$weight[0]." and less than ".$$weight[1]."\n\n";
+	}
+	$count_hash = Bio::Tools::SeqStats->count_monomers($seq[$current]->primary_seq);
+	$data .= "Amino Acids:\n";
+	foreach (sort keys %$count_hash)
+	{
+	    $percent = sprintf "%.1f",
+	    (($$count_hash{$_} / $seq[$current]->length)*100);
+	    $data .= "${_}: ".$$count_hash{$_}." (${percent}%) \n"
+	    }
+	$seqstats_edit->insert(undef,undef,undef,$data)
+	}
+    $seqstats_edit->thaw();
+    
+    $seqstats_window->show_all() if (defined($show_me));
+}
+
+sub init_dblink_window
+{
+    $current_dblink = 0;
+    
+    $dblink_window = new Gtk::Dialog();
+    $dblink_window->set_default_size(500,400);
+    $dblink_window->set_policy($true,$true,$false);
+    $dblink_window->set_title("Database Links");
+    $dblink_window->border_width(5);
+    
+    # Create a scrolled window to pack the CList widget into
+    my $scrolled_window = new Gtk::ScrolledWindow( undef, undef );
+    $dblink_window->vbox->pack_start( $scrolled_window, $true, $true, 0 );
+    $scrolled_window->set_policy( 'automatic', 'always' );
+
+    # Create the CList. For this example we use 2 columns
+    $dblink_clist = new_with_titles Gtk::CList( "Primary Id","Database" );
+
+    # When a selection is made, we want to know about it. The callback
+    # used is selection_made, and its code can be found further down
+    $dblink_handler_id = $dblink_clist->signal_connect( "select_row", 
+       sub{ return if (!defined($seq[$current]));
+	    my ( $clist, $row ) = @_;
+	    &store_current_dblink;
+	    $current_dblink = $row;
+	    &update_dblink_window;
+          } );
+
+    # It isn't necessary to shadow the border, but it looks nice :)
+    $dblink_clist->set_shadow_type( 'out' );
+
+    # What however is important, is that we set the column widths as
+    # they will never be right otherwise. Note that the columns are
+    # numbered from 0 and up (to 1 in this case).
+    $dblink_clist->set_column_width( 0, 150 );
+
+    # Add the CList widget to the vertical box
+    $scrolled_window->add( $dblink_clist );
+
+    my $bbox = new Gtk::HBox( $false, 5 );
+    $bbox->border_width(10);
+    my $arrow = new Gtk::Arrow('down','out');
+    my $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect
+	( "clicked", 
+	  # next dblink button handler
+	  sub { return if (!defined($seq[$current]));
+		&store_current_dblink;
+		$current_dblink++
+		    if ($current_dblink <((scalar $seq[$current]->annotation->each_DBLink)-1));
+		&update_dblink_window;
+	      } );
+
+    $arrow = new Gtk::Arrow('up','out');
+    $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect( "clicked", 
+           # prev comment button handler
+	   sub { return if (!defined($seq[$current]));
+		 &store_current_dblink;
+		 $current_dblink--
+		     if ($current_dblink > 0);
+		 &update_dblink_window;
+	       } );
+
+    $button = new Gtk::Button("Add");
+    $bbox->pack_start( $button, $false, $false, 0);
+    $button->signal_connect( "clicked",
+           # add comment button handler 
+           sub { return if (!defined($seq[$current]));
+		 &store_current_dblink;
+		 my $dblink = new Bio::Annotation::DBLink;
+		 $dblink->primary_id("<New>");
+		 $seq[$current]->annotation->add_DBLink( $dblink );
+		 $current_dblink = $seq[$current]->annotation->each_DBLink - 1;
+		 $dblink_clist->append("","");
+		 &update_dblink_window;
+	       } );
+
+     $button = new Gtk::Button("Delete");
+     $bbox->pack_start( $button, $false, $false, 0);
+     $button->signal_connect( "clicked", 
+           # delete comment button handler
+           sub { return if !defined($seq[$current]); 
+		 $seq[$current]->annotation->remove_DBLink( $current_dblink );
+		 $dblink_clist->remove($current_dblink);
+		 $current_dblink-- if ($current_dblink > 0);
+		 &update_dblink_window;
+	       } );
+
+     $dblink_window->vbox->pack_start( $bbox, $false, $false, 0);
+
+    # horizontal box containing primary_id & optional_id entries
+    my $hbox;
+    $hbox = new Gtk::HBox( $true, 10 );
+    $hbox->border_width( 1 );
+
+    # text entries
+    $dblink_entry[0] = new Gtk::Entry();
+    my $frame = new Gtk::Frame("primary id");
+    $frame->add($dblink_entry[0]);
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $dblink_entry[1] = new Gtk::Entry();
+    $frame = new Gtk::Frame("optional id");
+    $frame->add($dblink_entry[1]);
+    $hbox->pack_end( $frame, $true, $true, 0);
+
+    $dblink_window->vbox->pack_start( $hbox, $false, $false, 5);
+
+    $dblink_entry[2] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Database");
+    $frame->add($dblink_entry[2]);
+    $dblink_window->vbox->pack_start( $frame, $false, $false, 5);
+
+    $dblink_entry[3] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Comment");
+    $frame->add($dblink_entry[3]);
+    $dblink_window->vbox->pack_end( $frame, $false, $false, 5);
+
+     $bbox = new Gtk::HButtonBox();
+     $bbox->set_layout("end");
+	
+     $button = new Gtk::Button( "Close" );
+     $bbox->add( $button );
+     $button->signal_connect("clicked",
+           # close button handler
+           sub{ $dblink_window->hide();
+		&store_current_dblink;
+	      });
+
+     $dblink_window->action_area->pack_start( $bbox, $true, $true, 0 );
+     $dblink_window->signal_connect_after( "delete_event",
+           # window delete handler
+           sub{ $dblink_window->hide();
+		&store_current_dblink;
+                return &Gtk::true;
+	      });
+}
+
+sub store_current_dblink
+{
+    if ((defined($seq[$current])) && ($seq[$current]->annotation->each_DBLink))
+    {
+	(($seq[$current]->annotation->each_DBLink)[$current_dblink])->
+	    primary_id($dblink_entry[0]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_DBLink)[$current_dblink])->
+	    optional_id($dblink_entry[1]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_DBLink)[$current_dblink])->
+	    database($dblink_entry[2]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_DBLink)[$current_dblink])->
+	    comment($dblink_entry[3]->get_chars(0,-1) );		
+    }
+}
+
+sub update_dblink_window
+{
+    my ($show_me) = @_;
+    $dblink_window->show_all() if (defined($show_me));
+
+    $dblink_clist->freeze();
+    if (!defined($seq[$current]))
+    {
+	$dblink_clist->clear();
+	$dblink_clist->thaw();
+	foreach (@dblink_entry) { $_->set_text(""); }
+	return;
+    }
+    my @dblinks = $seq[$current]->annotation->each_DBLink;
+    # reset clist if rows are different to links
+    if ($dblink_clist->rows != @dblinks) {
+	$dblink_clist->clear();
+	foreach (@dblinks) { $dblink_clist->append("",""); }
+    }
+    # redraw references
+    for(my $i=0;$i<@dblinks;$i++)
+    {
+	$dblink_clist->set_text($i,0,$dblinks[$i]->primary_id);
+	$dblink_clist->set_text($i,1,$dblinks[$i]->database);
+    }
+    # redraw text widgets
+    foreach (@dblink_entry) { $_->set_text(""); }
+    if (@dblinks)
+    {
+	$dblink_entry[0]->set_text($dblinks[$current_dblink]->primary_id);
+	$dblink_entry[1]->set_text($dblinks[$current_dblink]->optional_id);
+	$dblink_entry[2]->set_text($dblinks[$current_dblink]->database);
+	$dblink_entry[3]->set_text($dblinks[$current_dblink]->comment);
+    }
+
+    $dblink_clist->moveto($current_dblink,0,0.3,0.0)
+	if ($dblink_clist->row_is_visible($current_dblink) ne 'full');
+    $dblink_clist->signal_handler_block($dblink_handler_id);
+    $dblink_clist->select_row($current_dblink,0);
+    $dblink_clist->signal_handler_unblock($dblink_handler_id);
+    Gtk::CList::set_focus_row($dblink_clist,$current_dblink);
+    $dblink_clist->thaw();
+}
+
+sub init_reference_window
+{
+    $current_ref = 0;
+    
+    $ref_window = new Gtk::Dialog();
+    $ref_window->set_default_size(620,500);
+    $ref_window->set_policy($true,$true,$false);
+    $ref_window->set_title("References");
+    $ref_window->border_width(5);
+    
+    # Create a scrolled window to pack the CList widget into
+    my $scrolled_window = new Gtk::ScrolledWindow( undef, undef );
+    $ref_window->vbox->pack_start( $scrolled_window, $true, $true, 0 );
+    $scrolled_window->set_policy( 'automatic', 'always' );
+
+    # Create the CList. For this example we use 2 columns
+    $ref_clist = new_with_titles Gtk::CList( "Medline","Title","Authors" );
+
+    # When a selection is made, we want to know about it. The callback
+    # used is selection_made, and its code can be found further down
+    $ref_handler_id = $ref_clist->signal_connect( "select_row", 
+       sub{ return if (!defined($seq[$current]));
+	    my ( $clist, $row ) = @_;
+	    &store_current_reference;
+	    $current_ref = $row;
+	    &update_reference_window;
+          } );
+
+    # It isn't necessary to shadow the border, but it looks nice :)
+    $ref_clist->set_shadow_type( 'out' );
+
+    # What however is important, is that we set the column widths as
+    # they will never be right otherwise. Note that the columns are
+    # numbered from 0 and up (to 1 in this case).
+    $ref_clist->set_column_width( 0, 70 );
+    $ref_clist->set_column_width( 1, 350 );
+    $ref_clist->set_column_width( 2, 300 );
+
+    # Add the CList widget to the vertical box
+    $scrolled_window->add( $ref_clist );
+
+    my $bbox = new Gtk::HBox( $false, 5 );
+    $bbox->border_width(10);
+    my $arrow = new Gtk::Arrow('down','out');
+    my $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect
+	( "clicked", 
+	  # next ref button handler
+	  sub { return if (!defined($seq[$current]));
+		&store_current_reference;
+		$current_ref++
+		    if ($current_ref <((scalar $seq[$current]->annotation->each_Reference)-1));
+		&update_reference_window;
+	    } );
+
+    $arrow = new Gtk::Arrow('up','out');
+    $button = new Gtk::Button();
+    $button->add($arrow);
+    $bbox->pack_end( $button, $false, $false, 0);
+    $button->signal_connect( "clicked", 
+           # prev comment button handler
+	   sub { return if (!defined($seq[$current]));
+		 &store_current_reference;
+		 $current_ref--
+		     if ($current_ref > 0);
+		 &update_reference_window;
+	       } );
+
+    $button = new Gtk::Button("Add");
+    $bbox->pack_start( $button, $false, $false, 0);
+    $button->signal_connect( "clicked",
+           # add comment button handler 
+           sub { return if (!defined($seq[$current]));
+		 &store_current_reference;
+		 my $ref = new Bio::Annotation::Reference;
+		 $ref->medline("<New>");
+		 $seq[$current]->annotation->add_Reference( $ref );
+		 $ref_clist->append("","","");
+		 $current_ref = ($seq[$current]->annotation->each_Reference)-1;
+		 &update_reference_window;
+	       } );
+
+     $button = new Gtk::Button("Delete");
+     $bbox->pack_start( $button, $false, $false, 0);
+     $button->signal_connect( "clicked", 
+           # delete comment button handler
+           sub { return if !defined($seq[$current]); 
+		 $seq[$current]->annotation->remove_Reference( $current_ref );
+		 $ref_clist->remove($current_ref);
+		 $current_ref-- if ($current_ref > 0);
+		 &update_reference_window;
+	       } );
+
+     $ref_window->vbox->pack_start( $bbox, $false, $false, 0);
+
+    # horizontal box containing primary_id & optional_id entries
+    my $hbox;
+    $hbox = new Gtk::HBox( $true, 10 );
+    $hbox->border_width( 1 );
+
+    # text entries
+    $ref_entry[0] = new Gtk::Entry();
+    my $frame = new Gtk::Frame("Title");
+    $frame->add($ref_entry[0]);
+    $ref_window->vbox->pack_start( $frame, $false, $false, 5);
+
+    $ref_entry[1] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Authors");
+    $frame->add($ref_entry[1]);
+    $ref_window->vbox->pack_start( $frame, $false, $false, 5);
+
+    # horizontal box
+    $hbox = new Gtk::HBox( $true, 10 );
+    $hbox->border_width( 1 );
+
+    # text entries
+    $ref_entry[2] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Comment");
+    $frame->add($ref_entry[2]);
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $ref_entry[3] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Location");
+    $frame->add($ref_entry[3]);
+    $hbox->pack_end( $frame, $true, $true, 0);
+
+    $ref_window->vbox->pack_start( $hbox, $false, $false, 5);
+
+    # horizontal box
+    $hbox = new Gtk::HBox( $false, 10 );
+    $hbox->border_width( 1 );
+
+    # text entries
+    $ref_entry[4] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Medline");
+    $frame->add($ref_entry[4]);
+    $hbox->pack_start( $frame, $false, $false, 0);
+
+#    $ref_entry[5] = new Gtk::Entry();
+#    $frame = new Gtk::Frame("Start");
+#    $frame->add($ref_entry[5]);
+#    $hbox->pack_start( $frame, $false, $false, 0);
+
+    $ref_entry[5] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Reference Position");
+    $frame->add($ref_entry[5]);
+    $hbox->pack_end( $frame, $true, $true, 0);
+
+    $ref_window->vbox->pack_start( $hbox, $false, $false, 5);
+
+
+     $bbox = new Gtk::HButtonBox();
+     $bbox->set_layout("end");
+	
+     $button = new Gtk::Button( "Close" );
+     $bbox->add( $button );
+     $button->signal_connect("clicked",
+           # close button handler
+           sub{ $ref_window->hide();
+		&store_current_reference;
+	      });
+
+     $ref_window->action_area->pack_start( $bbox, $true, $true, 0 );
+     $ref_window->signal_connect_after( "delete_event",
+           # window delete handler
+           sub{ $ref_window->hide();
+		&store_current_reference;
+                return &Gtk::true;
+	      });
+}
+
+sub store_current_reference
+{
+    if ((defined($seq[$current])) && ($seq[$current]->annotation->each_Reference))
+    {
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    title($ref_entry[0]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    authors($ref_entry[1]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    comment($ref_entry[2]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    location($ref_entry[3]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    medline($ref_entry[4]->get_chars(0,-1) );		
+#	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+#	    start($ref_entry[5]->get_chars(0,-1) );		
+	(($seq[$current]->annotation->each_Reference)[$current_ref])->
+	    rp($ref_entry[5]->get_chars(0,-1) );		
+    }
+}
+
+sub update_reference_window
+{
+    my ($show_me) = @_;
+    $ref_window->show_all() if (defined($show_me));
+
+    $ref_clist->freeze();
+    if (!defined($seq[$current]))
+    {
+	$ref_clist->clear();
+	$ref_clist->thaw();
+	foreach (@ref_entry) { $_->set_text(""); }
+	return;
+    }
+    my @refs = $seq[$current]->annotation->each_Reference;
+    # reset clist if rows are different to references
+    if ($ref_clist->rows != @refs) {
+	$ref_clist->clear();
+	foreach (@refs) { $ref_clist->append("","",""); }
+    }
+    # redraw references
+    for(my $i=0;$i<@refs;$i++)
+    {
+	$ref_clist->set_text($i,0,$refs[$i]->medline)
+	  if ($refs[$i]->medline);
+	$ref_clist->set_text($i,1,$refs[$i]->title)
+	  if ($refs[$i]->title);
+	$ref_clist->set_text($i,2,$refs[$i]->authors)
+	  if ($refs[$i]->authors);
+    }
+    # redraw text widgets
+    foreach (@ref_entry) { $_->set_text(""); }
+    if (@refs) {
+	$ref_entry[0]->set_text($refs[$current_ref]->title);
+	$ref_entry[1]->set_text($refs[$current_ref]->authors);
+	$ref_entry[2]->set_text($refs[$current_ref]->comment);
+	$ref_entry[3]->set_text($refs[$current_ref]->location);
+	$ref_entry[4]->set_text($refs[$current_ref]->medline);
+#	$ref_entry[5]->set_text($refs[$current_ref]->start);
+	$ref_entry[5]->set_text($refs[$current_ref]->rp);
+    }
+
+    $ref_clist->moveto($current_ref,0,0.3,0.0)
+	if ($ref_clist->row_is_visible($current_ref) ne 'full');
+    $ref_clist->signal_handler_block($ref_handler_id);
+    $ref_clist->select_row($current_ref,0);
+    $ref_clist->signal_handler_unblock($ref_handler_id);
+    Gtk::CList::set_focus_row($ref_clist,$current_ref);
+    $ref_clist->thaw();
+}
+
+
+sub init_about_dialog {
+ 	my ($window,$bg,$tbox,$vbox,$hbox,$sep,$butbox,$button,$pixmap);
+ 	$about_dialog = new Gtk::Window("dialog");
+ 	$about_dialog->set_title("About gSequence");
+ 	$about_dialog->signal_connect_after("destroy" => 
+					    sub { $about_dialog->hide; 
+					          return &Gtk::true; });
+ 	$about_dialog->set_default_size('350','350');
+ 	$about_dialog->set_policy(1,1,0);
+ 	$window = $about_dialog->window;
+ 	$bg = $about_dialog->style->bg('normal');
+ 	$vbox= new Gtk::VBox(0,0);
+ 	$about_dialog->add($vbox);
+    	$tbox = new Gtk::Label("\ngSequence\nAuthor: Lorenz Pollak\n\n
+gSequence is cool! :-)\n(this text is to be written...)
+\n");
+ 	$vbox->pack_start($tbox,1,1,1);
+   
+    	$hbox = new Gtk::HBox(0,0);   
+    	$vbox->pack_start($hbox,0,0,0);
+ 	$sep = new Gtk::HSeparator;
+   	$sep->set_usize(-1,5);
+    	$vbox->pack_start($sep,0,1,0);
+    
+     	$butbox = new Gtk::HButtonBox;
+     	$butbox->set_usize(-1,32);
+     	$vbox->pack_start($butbox, 0,1,0); 
+ 	$button = new_with_label Gtk::Button("OK");
+     	$button->set_usize(50,-1);
+     	$button->signal_connect('clicked', sub { $about_dialog->hide; });
+	$button->can_default(1);
+	$button->grab_default;
+	$butbox->add($button);
+     
+  return 1;
+}    
+
+sub init_feature_window
+{
+    $current_feature_item = 0;
+    
+    $feature_window = new Gtk::Dialog();
+    $feature_window->set_default_size(500,400);
+    $feature_window->set_policy($true,$true,$false);
+    $feature_window->set_title("Sequence Features");
+    $feature_window->border_width(5);
+
+    my $pane = new Gtk::HPaned();
+    $feature_window->vbox->pack_start( $pane, $true, $true, 0);
+    $pane->set_handle_size( 10 );
+    $pane->set_gutter_size( 8 );
+
+    # Create a VBox for the Entry and Tree Scrolled Window
+    my $vbox = new Gtk::VBox( $false, 0 );
+    $pane->add1( $vbox );
+
+    # Create a ScrolledWindow for the tree
+    my $tree_scrolled_win = new Gtk::ScrolledWindow( undef, undef );
+    $tree_scrolled_win->set_usize( 150, 400 );
+    $vbox->pack_start( $tree_scrolled_win, $true, $true, 0 );
+    $tree_scrolled_win->set_policy( 'automatic', 'automatic' );
+
+    #my $list_scrolled_win = new Gtk::ScrolledWindow( undef, undef );
+    #$list_scrolled_win->set_policy( 'automatic', 'automatic' );
+    $vbox = new Gtk::VBox( $false, 0 );
+    $pane->add2( $vbox );
+
+    # add stuff to the vbox
+    # text entries
+    my $hbox = new Gtk::HBox( $true, 10 );
+
+    $feature_entry[0] = new Gtk::Entry();
+    my $frame = new Gtk::Frame("Primary Tag");
+    $frame->add($feature_entry[0]);
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $feature_entry[1] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Source Tag");
+    $frame->add($feature_entry[1]);
+    $hbox->pack_end( $frame, $true, $true, 0);
+
+    $vbox->pack_start( $hbox, $false, $false, 5);
+
+    $hbox = new Gtk::HBox( $true, 10 );
+
+    my $adj = new Gtk::Adjustment( 0, 0, 0, 0, 0, 0 );
+    $feature_spinner[0] = new Gtk::SpinButton( $adj, 0.0, 0 );
+    $feature_spinner[0]->signal_connect( "changed", \&select_feature_region);
+    $frame = new Gtk::Frame("Start");
+    $frame->add($feature_spinner[0]);
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $adj = new Gtk::Adjustment( 0, 0, 0, 0, 0, 0 );
+    $feature_spinner[1] = new Gtk::SpinButton( $adj, 0.0, 0 );
+    $feature_spinner[1]->signal_connect( "changed", \&select_feature_region);
+    $frame = new Gtk::Frame("End");
+    $frame->add($feature_spinner[1]);
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $frame = new Gtk::Frame("Strand");
+    $hbox->pack_start( $frame, $true, $true, 0);
+    $frame = new Gtk::Frame("Score");
+    $hbox->pack_start( $frame, $true, $true, 0);
+
+    $vbox->pack_start( $hbox, $false, $false, 5);
+
+    $feature_entry[2] = new Gtk::Entry();
+    $frame = new Gtk::Frame("Description");
+    $frame->add($feature_entry[2]);
+
+    $vbox->pack_start( $frame, $false, $false, 5);
+
+    my $bbox = new Gtk::HBox( $false, 5 );
+    $bbox->border_width(10);
+    my $button = new Gtk::Button("Add");
+    $bbox->pack_start( $button, $false, $false, 0);
+    $button->signal_connect( "clicked",
+           # add comment button handler 
+           sub { return if (!defined($seq[$current]));
+		 &store_current_feature if ($current_feature_item);
+		 my $feature = new Bio::SeqFeature::Generic;
+		 $feature->primary_tag("<New>");
+		 $seq[$current]->add_SeqFeature( $feature );
+		 my $item_new = new_with_label Gtk::TreeItem( "<New>" );
+		 $item_new->set_user_data( $feature );
+		 $item_new->signal_connect( 'select', \&select_feature_item );
+		 $current_feature_item->parent->append( $item_new )
+		     if ($current_feature_item);
+		 $feature_tree->append( $item_new ) if (!$current_feature_item);
+		 $item_new->show();
+		 $current_feature_item->deselect()
+		     if ($current_feature_item);
+		 $item_new->select();
+	       } );
+    $button = new Gtk::Button("Add Subfeature");
+    $bbox->pack_start( $button, $false, $false, 0);
+    $button->signal_connect( "clicked",
+           # add comment button handler 
+           sub { return if (!defined($seq[$current])||!$current_feature_item);
+		 &store_current_feature;
+		 my $feature = new Bio::SeqFeature::Generic;
+		 $feature->primary_tag("<New>");
+		 $feature->start($current_feature_item->get_user_data->start);
+		 $feature->end($current_feature_item->get_user_data->end);
+		 $current_feature_item->get_user_data->add_sub_SeqFeature( $feature );
+		 my $new_subtree = new Gtk::Tree();
+		 $current_feature_item->set_subtree( $new_subtree );
+		 my $item_new = new_with_label Gtk::TreeItem( "<New>" );
+		 $item_new->set_user_data( $feature );
+		 $item_new->signal_connect( 'select', \&select_feature_item );
+		 $new_subtree->append( $item_new );
+		 $item_new->show();
+		 $current_feature_item->deselect();
+		 $current_feature_item->expand();
+		 $item_new->select();
+	       } );
+     $button = new Gtk::Button("Delete");
+     $bbox->pack_start( $button, $false, $false, 0);
+     $button->signal_connect( "clicked", 
+           # delete comment button handler
+           sub { return if (!$current_feature_item); 
+		 &store_current_feature;
+		 my $flist = $seq[$current]->{_as_feat};
+		 my $pos;
+		 for(my $i=0;$i<@$flist;$i++) {
+		   $pos=$i if $$flist[$i]==$current_feature_item->get_user_data();
+		 }
+		 splice @$flist, $pos, 1;
+		 $seq[$current]->{_as_feat} = $flist;
+		 $current_feature_item->parent->remove_item($current_feature_item);
+		 $current_feature_item=0;
+	       } );
+
+     $vbox->pack_end( $bbox, $false, $false, 0);
+
+    # Create root tree
+    $feature_tree = new Gtk::Tree();
+    $tree_scrolled_win->add_with_viewport( $feature_tree );
+    $feature_tree->set_selection_mode( 'single' );
+    $feature_tree->set_view_mode( 'item' );
+
+    $bbox = new Gtk::HButtonBox();
+    $bbox->set_layout("end");
+    
+    $button = new Gtk::Button( "Close" );
+    $bbox->add( $button );
+    $button->signal_connect("clicked",
+			    # close button handler
+			    sub{ $feature_window->hide();
+				 &store_current_feature;
+			     });
+    
+    $feature_window->action_area->pack_start( $bbox, $true, $true, 0 );
+    $feature_window->signal_connect_after( "delete_event",
+					   # window delete handler
+					   sub{ $feature_window->hide();
+						&store_current_feature;
+						return &Gtk::true;
+					    });
+}
+
+# Callback for expanding tree
+sub expand_feature_tree
+  {
+    my ( $item, $subtree ) = @_;
+    my ($feature,$subfeature,$item_new,$new_subtree);
+    $feature = $item->get_user_data();
+
+    foreach $subfeature ($feature->sub_SeqFeature)
+      {
+	  $item_new = new_with_label Gtk::TreeItem( $subfeature->primary_tag );
+	  $item_new->set_user_data( $subfeature );
+	  $item_new->signal_connect( 'select', \&select_feature_item );
+	  $subtree->append( $item_new );
+	  $item_new->show();
+	  
+	  if ( $subfeature->sub_SeqFeature )
+	  {
+	      $new_subtree = new Gtk::Tree();
+	      $item_new->set_subtree( $new_subtree );
+	      $item_new->signal_connect( 'expand',
+					 \&expand_feature_tree,
+					 $new_subtree );
+	      $item_new->signal_connect( 'collapse', \&collapse_feature_tree );
+	  }
+	  $item_new->expand();
+      }
+  }
+
+
+# Callback for collapsing tree
+sub collapse_feature_tree
+  {
+    my ( $item ) = @_;
+
+    my $subtree = new Gtk::Tree();
+
+    $item->remove_subtree();
+    $item->set_subtree( $subtree );
+    $item->signal_connect( 'expand', \&expand_feature_tree, $subtree );
+  }
+
+
+sub store_current_feature
+{
+  if ((defined($seq[$current])) && ($seq[$current]->top_SeqFeatures) && ($current_feature_item))
+  {
+    my $current_feature = $current_feature_item->get_user_data();
+    $current_feature->primary_tag( $feature_entry[0]->get_chars(0,-1) );		
+    $current_feature->source_tag( $feature_entry[1]->get_chars(0,-1) );		
+    if ($current_feature->has_tag("description"))
+    {
+      $current_feature->remove_tag("description");
+      $current_feature->add_tag_value("description",
+				      $feature_entry[2]->get_chars(0,-1));
+    }
+    $current_feature->start($feature_spinner[0]->get_value_as_int());
+    $current_feature->end($feature_spinner[1]->get_value_as_int());
+    # set tree item
+    ($current_feature_item->children)[0]->set($current_feature->primary_tag);
+  }
+}
+
+sub select_feature_item
+{
+    my ($widget) = @_;
+    &store_current_feature;
+    $current_feature_item->deselect()
+      if $current_feature_item;
+    $current_feature_item = $widget;
+    &update_feature_paned2;
+}
+
+sub update_feature_paned2
+{
+  $feature_entry[0]->set_text("");
+  $feature_entry[1]->set_text("");
+  $feature_entry[2]->set_text("");
+
+  return if (!defined($seq[$current])||(!$current_feature_item));
+  my $current_feature = $current_feature_item->get_user_data();
+  $feature_entry[0]->set_text($current_feature->primary_tag);
+  $feature_entry[1]->set_text($current_feature->source_tag)
+    if (defined($current_feature->source_tag));
+  $feature_entry[2]->set_text(($current_feature->each_tag_value("description"))[0])
+    if ($current_feature->has_tag("description"));
+  my $adj = new Gtk::Adjustment($current_feature->start,
+				0,
+				$seq[$current]->length-1,
+				1,
+				1,
+				0
+			       );
+  $feature_spinner[0]->set_adjustment($adj);
+  $feature_spinner[0]->set_value($current_feature->start);
+  $feature_spinner[0]->show_all();
+  $adj = new Gtk::Adjustment($current_feature->end,
+			     0,
+			     $seq[$current]->length-1,
+			     1,
+			     1,
+			     0
+			    );
+  $feature_spinner[1]->set_adjustment($adj);
+  $feature_spinner[1]->set_value($current_feature->end);
+  $feature_spinner[1]->show_all();
+}
+
+sub select_feature_region
+{
+  $seq_edit[$current]->freeze;
+  $seq_edit[$current]->select_region($feature_spinner[0]->get_value_as_int(),
+			   $feature_spinner[1]->get_value_as_int()+1);
+  $seq_edit[$current]->thaw;
+}
+
+sub update_feature_window
+{
+    my ($show_me) = @_;
+    $feature_window->show_all() if (defined($show_me));
+
+    $feature_tree->clear_items(0,-1);
+    if (!defined($seq[$current]))
+    {
+	&update_feature_paned2;
+	return;
+    }
+
+    my ($item_new,$new_subtree);
+    foreach ($seq[$current]->top_SeqFeatures)
+      {
+	  $item_new = new_with_label Gtk::TreeItem( $_->primary_tag );
+	  $item_new->set_user_data( $_ );
+	  $item_new->signal_connect( 'select', \&select_feature_item );
+	  $feature_tree->append( $item_new );
+	  if ( $_->sub_SeqFeature )
+	  {
+	      $new_subtree = new Gtk::Tree();
+	      $item_new->set_subtree( $new_subtree );
+	      $item_new->signal_connect( 'expand',
+					 \&expand_feature_tree,
+					 $new_subtree );
+	      $item_new->signal_connect( 'collapse', \&collapse_feature_tree );
+	  }
+	  $item_new->expand();
+      }
+    $feature_tree->select_item($current_feature_item) 
+      if $current_feature_item;
+    $feature_tree->show_all();
+
+    &update_feature_paned2;
+}
+
+sub store_prefs
+{
+}
+
+sub update_pref_window
+{
+  $pref_window->show_all();
+}
+
+sub init_pref_window
+{
+  $pref_window = new Gtk::Dialog();
+  $pref_window->set_default_size(500,400);
+  $pref_window->set_policy($true,$true,$false);
+  $pref_window->border_width( 5 );
+
+  # Create a new notebook, place the position of the tabs
+  my $notebook = new Gtk::Notebook();
+  $pref_window->vbox->pack_start( $notebook, $true, $true, 0);
+  $notebook->set_tab_pos( 'top' );
+
+  my $main_vbox = new Gtk::VBox($false,10);
+
+  my $label = new Gtk::Label( "Import Options" );
+  my $frame = new Gtk::Frame("Flat File Indexes");
+  my $vbox = new Gtk::VBox($false,10);
+  $frame->add($vbox);
+  $main_vbox->pack_start($frame,$false,$false,10);
+
+  $notebook->append_page( $main_vbox, $label );
+
+  my $hbox = new Gtk::HBox($false,0);
+
+  $pref_entry[0] = new Gtk::Entry();
+  $frame = new Gtk::Frame("Indexes Directory");
+  $frame->add($pref_entry[0]);
+  $hbox->pack_start( $frame, $true, $false, 0);
+
+  $pref_entry[1] = new Gtk::Entry();
+  $frame = new Gtk::Frame("Index Type");
+  $frame->add($pref_entry[1]);
+  $hbox->pack_start( $frame, $false, $false, 0);
+
+  $vbox->pack_start( $hbox, $false, $false, 0);
+
+  $pref_entry[2] = new Gtk::Entry();
+  $frame = new Gtk::Frame("Fasta Index Name");
+  $frame->add($pref_entry[2]);
+  $vbox->pack_start( $frame, $false, $false, 0);
+
+  $pref_entry[3] = new Gtk::Entry();
+  $frame = new Gtk::Frame("SwissProt Index Name");
+  $frame->add($pref_entry[3]);
+  $vbox->pack_start( $frame, $false, $false, 0);
+
+  $pref_entry[4] = new Gtk::Entry();
+  $frame = new Gtk::Frame("SwissPfam Index Name");
+  $frame->add($pref_entry[4]);
+  $vbox->pack_start( $frame, $false, $false, 0);
+
+  $frame = new Gtk::Frame("Remote DBs");
+  $hbox = new Gtk::HBox($false,10);
+  $frame->add($hbox);
+  $main_vbox->pack_start($frame,$false,$false,10);
+
+  $pref_entry[5] = new Gtk::Entry();
+  $frame = new Gtk::Frame("AceDB host");
+  $frame->add($pref_entry[5]);
+  $hbox->pack_start( $frame, $true, $false, 0);
+
+  $pref_entry[6] = new Gtk::Entry();
+  $frame = new Gtk::Frame("AceDB port");
+  $frame->add($pref_entry[6]);
+  $hbox->pack_start( $frame, $false, $false, 0);
+
+  $notebook->set_page( 0 );
+
+  my $bbox = new Gtk::HButtonBox();
+  $bbox->set_layout("end");
+
+  my $button = new Gtk::Button( "Save" );
+  $bbox->add( $button );
+  $button->signal_connect("clicked",
+			  # close button handler
+			  sub{ $pref_window->hide();
+			       &store_prefs();
+			     });
+  
+  $button = new Gtk::Button( "Close" );
+  $bbox->add( $button );
+  $button->signal_connect("clicked",
+			  # close button handler
+			  sub{ $pref_window->hide();
+			     });
+  
+  $pref_window->action_area->pack_start( $bbox, $true, $true, 0 );
+  $pref_window->signal_connect_after( "delete_event",
+					 # window delete handler
+					 sub{ $pref_window->hide();
+					      return &Gtk::true;
+					    });
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tk/gsequence.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tk/hitdisplay.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tk/hitdisplay.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tk/hitdisplay.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+#!/usr/local/bin/perl -w
+#
+# PROGRAM  : hitdisplay.pl
+# PURPOSE  : Demonstrate Bio::Tk::HitDisplay
+# AUTHOR   : Keith James kdj at sanger.ac.uk
+# CREATED  : Nov 1 2000
+#
+# Requires Bio::Tk::HitDisplay
+#
+# To use, just pipe Blast output into this script. Try clicking on
+# the blue Subject ids with the left button to activate a callback
+# or with the right button to show text describing the hit.
+# 
+
+use strict;
+use Text::Wrap qw(wrap $columns);
+use Bio::Tools::BPlite;
+BEGIN { 
+    eval { 
+	require 'Tk.pm';
+	require 'Bio/Tk/HitDisplay.pm'; 
+    };
+    if( $@ ) {
+	print STDERR "Must have bioperl-gui and Tk installed to run this test, see bioperl website www.bioperl.org for instructions on how to installed bioperl-gui modules\n";    
+	exit;
+    }
+
+}
+use Tk;
+    $columns = 80;
+
+my $report = Bio::Tools::BPlite->new(-fh => \*STDIN);
+
+# Normally the code ref below is in a separate package and I do 
+# something like:
+#
+# my $adapter = Bio::PSU::IO::Blast::HitAdapter->new;
+#
+# while (my $hit = $result->next_hit)
+# {
+#     my $text     = " ... ";
+#     my $callback = sub { ... };
+#     push(@hits, $adapter->($sbjct, $text, $callback));
+# }
+#
+# It's easy to roll your own for Fasta, or whatever.
+
+my $adapter = sub
+{
+    my ($sbjct, $text, $callback) = @_;
+
+    my (@data, $expect, $percent, $length);
+    my ($q_id, $s_id, $q_len, $s_len);
+
+    while (my $hsp = $sbjct->nextHSP)
+    {
+	$q_id ||= $hsp->query->seqname;
+	$s_id ||= $hsp->subject->seqname;
+
+	$q_len ||= $hsp->query->seqlength;
+	$s_len ||= $hsp->subject->seqlength;
+
+	my $q_x1 = $hsp->query->start;
+	my $q_x2 = $hsp->query->end;
+
+	my $s_x1 = $hsp->subject->start;
+	my $s_x2 = $hsp->subject->end;
+
+	push(@data, [$q_x1, $q_x2,
+		     $s_x1, $s_x2]);
+
+	if (defined $expect)
+	{
+	    if ($hsp->P < $expect)
+	    {
+		$expect  = $hsp->P;
+		$percent = $hsp->percent;
+		$length  = $hsp->length;
+	    }
+	}
+	else
+	{
+	    $expect  = $hsp->P;
+	    $percent = $hsp->percent;
+	    $length  = $hsp->length;
+	}
+    }
+
+    return { q_id     => $q_id,
+	     s_id     => $s_id,
+	     expect   => $expect,
+	     score    => $percent,
+	     overlap  => $length,
+	     q_len    => $q_len,
+	     s_len    => $s_len,
+	     data     => \@data,
+	     text     => $text,
+	     callback => $callback }
+
+};
+
+my @hits;
+
+while (my $sbjct = $report->nextSbjct)
+{
+    # Make some text to show when the left button is clicked
+    my $text = wrap("", "", "Blast hit to: ", $sbjct->name, "\n");
+
+    # Make a callback to actiavte when the right button is clicked
+    my $callback = sub { print "Blast hit to ", $sbjct->name, "\n" };
+
+    # Convert Subjct, text and callback into hash
+    push(@hits, $adapter->($sbjct, $text, $callback));
+}
+
+# Create the main window and HitDisplay
+my $mw = MainWindow->new;
+my $hds = $mw->Scrolled('HitDisplay',
+			-borderwidth => 5,
+			-scrollbars  => 'ose',
+			-width       => 600,
+			-height      => 300,
+			-background  => 'white',
+			-hitcolours  => {
+					 10 => 'pink',
+					 20 => 'purple',
+					 40 => 'yellow',
+					 60 => 'gold',
+					 70 => 'orange',
+					 90 => 'red'
+					},
+			-interval    => 15,
+			-hitdata     => \@hits);
+
+$hds->pack(-side => 'top', -fill => 'both', -expand => 1);
+$hds->waitVisibility;
+$hds->configure(-height => 900);
+$hds->configure(-scrollregion => [$hds->bbox("all")]);
+
+MainLoop;


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tk/hitdisplay.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/extract_genes.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/extract_genes.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/extract_genes.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,129 @@
+#!/usr/bin/perl -w
+# $Id: extract_genes.pl,v 1.4 2006/02/28 02:52:11 bosborne Exp $
+=pod
+
+=head1 NAME
+
+extract_genes.pl - extract genomic sequences from NCBI files
+using BioPerl
+
+=head1 DESCRIPTION
+
+This script is a simple solution to the problem of
+extracting genomic regions corresponding to genes. There are other 
+solutions, this particular approach uses genomic sequence 
+files from NCBI and gene coordinates from Entrez Gene.
+
+The first time this script is run it will be slow as it will
+extract species-specific data from the gene2accession file and create
+a storable hash (retrieving the positional data from this hash is
+significantly faster than reading gene2accession each time the script
+runs). The subsequent runs should be fast.
+
+=head1 INSTALLATION
+
+=head2
+
+Install BioPerl, full instructions at http://bioperl.org.
+
+=head2 Download gene2accession.gz
+
+Download this file from ftp://ftp.ncbi.nlm.nih.gov/gene/DATA into 
+your working directory and gunzip it.
+
+=head2 Download sequence files
+
+Create one or more species directories in the working directory, the
+directory names do not have to match those at NCBI (e.g. "Sc", "Hs").
+
+Download the nucleotide fasta files for a given species from its CHR*
+directories at ftp://ftp.ncbi.nlm.nih.gov/genomes and put these files into a 
+species directory. The sequence files will have the suffix ".fna" or 
+"fa.gz", gunzip if necessary.
+
+=head2 Determine Taxon id
+
+Determine the taxon id for the given species. This id is the first column
+in the gene2accession file. Modify the %species hash in this script
+such that name of your species directory is a key and the taxon id is the 
+value.
+
+=head2 Command-line options
+
+  -i   Gene id
+  -s   Name of species directory
+  -h   Help
+
+Example:
+
+  extract_genes.pl -i 850302 -s Sc
+
+=cut
+
+use strict;
+use Bio::DB::Fasta;
+use Getopt::Long;
+use Storable;
+
+my %species = ( "Sc" => 4932,  # Saccharomyces cerevisiae
+				     "Ec" => 83333, # Escherichia coli K12
+					  "Hs" => 9606   # H. sapiens
+				   );
+
+my ($help,$id,$name);
+
+GetOptions( "s=s"  =>  \$name,
+            "i=i"  =>  \$id,
+				"h"    =>  \$help );
+
+usage() if ($help || !$id || !$name);
+
+my $storedHash = $name . ".dump";
+
+# create index for a directory of fasta files
+my $db = Bio::DB::Fasta->new($name, -makeid => \&make_my_id);
+
+# extract species-specific data from gene2accession
+unless (-e $storedHash) {
+	my $ref;
+	# extract species-specific information from gene2accession
+	open MYIN,"gene2accession" or die "No gene2accession file\n";
+	while (<MYIN>) {
+		my @arr = split "\t",$_;
+		if ($arr[0] == $species{$name} && $arr[9] =~ /\d+/ && $arr[10] =~ /\d+/) {
+			($ref->{$arr[1]}->{"start"}, $ref->{$arr[1]}->{"end"}, 
+			 $ref->{$arr[1]}->{"strand"}, $ref->{$arr[1]}->{"id"}) =	
+				($arr[9], $arr[10], $arr[11], $arr[7]);
+		}
+	}
+	# save species-specific information using Storable
+	store $ref, $storedHash;
+} 
+
+# retrieve the species-specific data from a stored hash
+my $ref = retrieve($storedHash);
+
+# retrieve sequence and sub-sequence
+if (defined $ref->{$id}) {
+	my $chr = $db->get_Seq_by_id($ref->{$id}->{"id"});
+	my $seq = $chr->trunc($ref->{$id}->{"start"},$ref->{$id}->{"end"});
+	$seq = $seq->revcom if ($ref->{$id}->{"strand"} eq "-");
+
+	# Insert SeqIO options here...
+	print $seq->seq,"\n";
+} else {
+	print "Cannot find id: $id\n";
+}
+
+sub make_my_id {
+	my $line = shift;
+	$line =~ /ref\|([^|]+)/;
+	$1;
+}
+
+sub usage {
+	system "perldoc $0";
+	exit;
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/extract_genes.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/gb_to_gff.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/gb_to_gff.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/gb_to_gff.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,23 @@
+#!/usr/local/bin/perl -w
+use strict;
+
+use Bio::Tools::GFF;
+use Bio::SeqIO;
+
+my ($seqfile) = @ARGV;
+die("must define a valid seqfile to read") unless ( defined $seqfile && -r $seqfile);
+
+my $seqio = new Bio::SeqIO(-format => 'genbank',
+			   -file   => $seqfile);
+my $count = 0;
+while( my $seq = $seqio->next_seq ) {
+    $count++;
+    # defined a default name
+    my $fname = sprintf("%s.gff", $seq->display_id || "seq-$count");
+    my $gffout = new Bio::Tools::GFF(-file => ">$fname" ,
+				     -gff_version => 1);
+    
+    foreach my $feature ( $seq->top_SeqFeatures() ) {
+	$gffout->write_feature($feature);
+    }
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/gb_to_gff.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/gff2ps.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/gff2ps.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/gff2ps.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,255 @@
+#!/usr/local/bin/perl
+
+
+=head1 NAME
+
+gff2ps - you will want to change this script
+
+=head2 SYNOPSIS
+
+   perl gff2ps < file.gff > file.ps
+
+=head2 DESCRIPTION
+
+This script provides GFF to postscript handling. Due to the ... ummm
+... potential for flexible reinterpretation that is GFF, this script
+will almost certainly need modifying for anyone elses use (basically,
+you need to know what you want to get out of the GFF file and how to
+draw it). But it does include code to draw the most challenging thing
+out there - genes - and should give you a good example of where to
+start
+
+=head2 AUTHOR
+
+Ewan Birney
+
+=cut
+
+
+use Bio::Tools::GFF;
+
+my $font      = 8;
+my $scale     = 200;
+my $rotate    = 1;
+my $feature_off = 0;
+
+use Getopt::Long;
+
+&GetOptions(
+	    "scale=i"   => \$scale,
+	    "font=i"    => \$font,
+	    "rotate=i"  => \$rotate,
+	    "start=i"   => \$feature_off
+	    );
+
+
+my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 1);
+my $feature;
+
+use Data::Dumper;
+
+my %set;
+
+# loop over the input stream
+while( my $f = $gffio->next_feature()) {
+    $f->start($f->start - $feature_off);
+    $f->end  ($f->end   - $feature_off);
+
+    if( $f->start < 0 ) {
+	next;
+    }
+
+    if( $f->start > $scale*1000 ) {
+      next;
+    }
+    
+    
+    if( $f->primary_tag =~ /coding_exon/ ) {
+	#print STDERR "Seen ",$f->start," ",$f->end,"\n";
+	($group) = $f->each_tag_value('group');
+	$group =~ s/\s+//g;
+
+	
+
+	if( !defined $set{$group} ) {
+	    $set{$group} = Bio::SeqFeature::Generic->new();
+	    $set{$group}->seqname($f->seqname);
+	    $set{$group}->primary_tag('transcript');
+	    $set{$group}->source_tag($f->source_tag);
+	    $set{$group}->add_tag_value('id',$group);
+	}
+	$set{$group}->add_sub_SeqFeature($f,'EXPAND');
+	$set{$group}->strand($f->strand);
+    }
+}
+$gffio->close();
+
+
+#foreach my $set ( values %set ) {
+#    print $set->gff_string,"\n";
+#    foreach $sub ( $set->sub_SeqFeature ) {
+#	print $sub->gff_string,"\n";
+#    }
+#}
+
+
+# sort into forward and reverse strands
+
+my @forward;
+my @reverse;
+
+$max = 0;
+
+foreach my $set ( values %set ) {
+    if( $set->end > $max ) {
+	$max = $set->end;
+    }
+
+    if( $set->strand == -1 ) {
+	push(@reverse,$set);
+    } else {
+	push(@forward,$set);
+    }
+}
+
+ at forward = sort { $a->start <=> $b->start } @forward;
+ at reverse = sort { $a->start <=> $b->start } @reverse;
+
+&print_header(\*STDOUT);
+
+if( $rotate ) {
+   print "0 700 translate\n";
+   print "-90 rotate\n";
+}
+
+print "0 200 moveto 900 200 lineto stroke\n";
+
+my $bp_max = $scale*900;
+
+for(my $bp = 0;$bp < $bp_max ;$bp = $bp + 5000) {
+    print STDOUT $bp/$scale," 200 moveto ",$bp/$scale," 197 lineto\n";
+    $text = int( $feature_off + ($bp/1000));
+    print STDOUT $bp/$scale," 195 moveto ($text) show\n";
+}
+
+
+&draw_gene(\@forward,1,$scale,220,\*STDOUT);
+&draw_gene(\@reverse,-1,$scale,180,\*STDOUT);
+
+print "showpage\n";
+
+
+
+
+sub draw_gene {
+    my ($gene_array,$strand,$scale,$offset,$fh) = @_;
+
+
+    my @bump_array;
+    my $bump_row_max = 1;
+    my $bump_end = int $max/$scale;
+
+    $bump_array[0] = '0' x $bump_end;
+
+
+    foreach my $f ( @$gene_array ) {
+
+	#
+	# Bump it baby!
+	#
+
+	# We keep an array of strings for currently draw areas. Do this in pixel
+	# coordinates to save on space. If the region has all 0's then we know we
+	# can draw here. If so, we set it to 1's. If not, we go up a row and see if
+	# we can fit it in there. If we exhausted the rows we make a new row. 
+
+	$bump_start = (int $f->start/$scale)-1;
+	$bump_len   = int(($f->end - $f->start)/$scale) +1;
+
+	# text might be longer than gene. Mystic number 5 looks good for 8 point helvetica
+	# you will have to change this otherwise.
+
+	my ($gene_id) = $f->each_tag_value('id');
+	if( (length $gene_id)*5 > $bump_len ) {
+	    $bump_len = (length $gene_id)*5; 
+	}
+
+	# figure out the first place to fit in this gene;
+	for($i=0;$i<$bump_row_max;$i++) {
+	    #print STDERR "Seeing $bump_start $bump_len $i ",substr($bump_array[$i],$bump_start,$bump_len),"\n";
+	    
+	    if( substr($bump_array[$i],$bump_start,$bump_len) !~ /1/ ) {
+		#print STDERR "Going to break with $i\n";
+		last;
+	    }
+	}
+	#print STDERR "i is $i\n";
+	# if $i == bump_row_max then we need a new bump row
+	if( $i == $bump_row_max ) {
+	    $bump_array[$bump_row_max] = '0' x $bump_end;
+	    $bump_row_max++;
+	}
+	
+	# now blank out this bump row to 1's
+	
+	substr($bump_array[$i],$bump_start,$bump_len) = '1' x $bump_len;
+	
+	# now print it out ;)
+
+	
+	#
+	# Need to be portable between strands. Gene hats go the
+	# other way up on reverse strand, but not the text. 
+	#
+
+	if( $strand == 1 ) {
+	    $text   = $offset+($i*20)+1;
+	    $bottom = $offset+($i*20)+10;
+	    $top    = $offset+($i*20)+20;
+	    $mid    = $offset+($i*20)+15;
+	} else {
+	    $text   = $offset-($i*20)-19;
+	    $bottom = $offset-($i*20);
+	    $top    = $offset-($i*20)-10;
+	    $mid    = $offset-($i*20)-5;
+	}
+
+	print $fh $f->start/$scale," ",$text," moveto\n";
+	print $fh "($gene_id) show\n";
+
+	my $prev = undef;
+	    
+	foreach $exon ( $f->sub_SeqFeature ) {
+	    print $fh $exon->start/$scale," ",$bottom," moveto\n";
+	    print $fh $exon->end/$scale," ",$bottom," lineto\n";
+	    print $fh $exon->end/$scale," ",$top, " lineto\n";
+	    print $fh $exon->start/$scale," ",$top," lineto\n";
+	    print $fh "closepath stroke\n";
+
+	    # draw the intron hat
+	    if( defined $prev ) {
+		print $prev->end/$scale," ",$mid," moveto\n";
+		my $intron_len = $exon->start - $prev->end;
+		
+		print $fh ($prev->end+($intron_len/2))/$scale," ",$top," lineto\n";
+		print $fh $exon->start/$scale," ",$mid," lineto stroke\n";
+	    }
+		
+	    $prev = $exon;
+	}
+	
+    }
+}
+
+
+sub print_header {
+    my $fh = shift;
+
+    print $fh <<EOF;
+%!PS-Adobe-2.0
+% Created by Genome2ps. Ewan Birney <birney\@ebi.ac.uk>
+0.5 setlinewidth
+/Helvetica findfont $font scalefont setfont
+EOF
+
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/gff2ps.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/parse_codeml.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/parse_codeml.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/parse_codeml.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Bio::Tools::Phylo::PAML;
+use Bio::Root::IO;
+
+my $parser = new Bio::Tools::Phylo::PAML(-file    => shift,
+					 -verbose => shift);
+
+my $result = $parser->next_result;
+my @otus = $result->get_seqs();
+my $MLmatrix = $result->get_MLmatrix();
+my $NGmatrix = $result->get_NGmatrix();
+
+# These matrices are length(@otu) x length(@otu) "strict lower
+# triangle" 2D-matrices, which means that the diagonal and
+# everything above it is undefined.  Each of the defined cells is a
+# hashref of estimates for "dN", "dS", "omega" (dN/dS ratio), "t",
+# "S" and "N".  If a ML matrix, "lnL" will also be defined.
+
+ at otus = $result->get_seqs();
+$MLmatrix = $result->get_MLmatrix();
+$NGmatrix = $result->get_NGmatrix();
+for( my $i=0;$i<scalar @$MLmatrix;$i++) {
+	for( my $j = $i+1; $j < scalar @{$MLmatrix->[$i]}; $j++ ) { 
+		printf "The ML omega ratio for sequences %s vs %s was: %g\n",
+		  $otus[$i]->id, $otus[$j]->id, $MLmatrix->[$i]->[$j]->{omega};
+	}
+}
+
+for( my $i=0;$i<scalar @$MLmatrix;$i++) {
+	for( my $j = $i+1; $j < scalar @{$MLmatrix->[$i]}; $j++ ) { 
+	
+		printf "The NG omega ratio for sequences %s vs %s was: %g\n",
+		  $otus[$i]->id, $otus[$j]->id, $NGmatrix->[$i]->[$j]->{'omega'};
+	}
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/parse_codeml.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/psw.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/psw.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/psw.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+#!/usr/bin/perl
+
+# PROGRAM  : psw.pl
+# PURPOSE  : Simple driver for Bio::Tools::pSW
+# AUTHOR   : Ewan Birney birney at sanger.ac.uk 
+# CREATED  : Tue Oct 27 1998
+# REVISION : $Id: psw.pl,v 1.1 2003/07/07 18:20:59 bosborne Exp $
+#
+# INSTALLATION
+#  
+# you almost certainly have to have installed bioperl 
+# from the makefile system for this to work. This is
+# because this module use XS extensions (C source code
+# 'compiled into' perl)
+#
+# The lib system below is just so that I (ewan) can test it
+# on site... 
+#
+
+use lib "/nfs/disk100/pubseq/wise/PerlMod/";
+
+# 
+# This is a simple example script. We are going
+# to make 3 sequences directly from memory and
+# then align them once using blosum matrix and once
+# using a gonnet matrix. These matrices should
+# in the examples directory.
+#
+
+use Bio::Tools::pSW;
+
+# redundant, as Bio::Tools::pSW uses them, but useful to say
+# precisely what we are using ;)
+
+use Bio::Seq; 
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+
+# for legibility - write with newlines and then strip them!
+
+$tseq = 'SKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVVMRDPNTKRSRGFGFVT
+YATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTVKKIFVGGIKEDTEEHHL
+RDYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFDDHDSVDKIVIQKYHTVNGHNCEVRKAL
+SKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNFSGRGGFGGSRGGGGYGGSG
+DGYNGFGNDGGYGGGGPGYSGGSRGYGSGGQGYGNQGSGYGGSGSYDSYNNGGGRGFGGG
+SGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNFGGRSSGPYGGGGQYFAKPRNQGGYGGSS
+SSSSYGSGRRF';
+
+$tseq =~ s/[^A-Z]//g;
+
+$seq1 = Bio::Seq->new(-id=>'roa1_human',-seq=>$tseq);
+
+$tseq = 'MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHFEKWGNIVDV
+VVMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVVEPKRAVPRQDIDSPNAGAT
+VKKLFVGALKDDHDEQSIRDYFQHFGNIVDINIVIDKETGKKRGFAFVEFDDYDPVDKVV
+LQKQHQLNGKMVDVKKALPKQNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWN
+NGGNNWGNNRGGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNW
+NNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGGNNQGFNNGG
+NNRRY';
+
+$tseq =~ s/[^A-Z]//g;
+
+$seq2 = Bio::Seq->new(-id=>'roa1_drome',-seq=>$tseq);
+
+$tseq = 'MHKSEAPNEPEQLRKLFIGGLSFETTDESLREHFEQWGTLTDCVVMRDPNSKRSRGFGFV
+TYLSTDEVDAAMTARPHKVDGRVVEPKRAVSREDSSRPGAHLTVKKIFVGGIKEDTEEDH
+LREYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFEDHDSVDKIVIQKYHTVNNHNSQVRKA
+LSKQEMASVSGSQRERGGSGNYGSRGGFGNDNFGGRGGNFGGNRGGGGGFGNRGYGGDGY
+NGDGQLWWQPSLLGWNRGYGAGQGGGYGAGQGGGYGGGGQGGGYGGNGGYDGYNGGGSGF
+SGSGGNFGSSGGYNDFGNYNSQSSSNFGPMKGGNYGGGRNSGPYGGGYGGGSASSSSGYG
+GGRRF';
+
+$tseq =~ s/[^A-Z]//g;
+$seq3 = Bio::Seq->new(-id=>'roa1_xenla',-seq=>$tseq);
+
+
+#
+# Now make an Alignment Factory with blosum62 as a matrix
+# gap -12 and ext -2
+#
+
+$fac = Bio::Tools::pSW->new(-matrix => 'blosum62.bla',-gap => 12, -ext => 2);
+
+
+#
+# run seq1 vs seq2 and seq1 vs seq3 and write the output direct 
+# to stdout using the 'pretty' method
+#
+
+$fac->align_and_show($seq1,$seq2,STDOUT);
+print "Next alignment\n";
+$fac->align_and_show($seq1,$seq3,STDOUT);
+
+
+#
+# a different factory, using gonnet, and now make a simple align and 
+# provide MSF format
+#
+
+$fac = Bio::Tools::pSW->new(-matrix => 'gon250.bla',-gap => 12, -ext => 2);
+
+
+# switch on reporting this time and change the amount of memory it is allowed
+
+print STDOUT "Doing the next calculation in limited memory, with a progress report\n";
+
+$fac->report(1);
+$fac->kbyte(100);
+
+$al = $fac->pairwise_alignment($seq1,$seq2);
+
+
+# write out a MSF file
+my $out = Bio::AlignIO->newFh('-fh'=> \*STDOUT,  '-format' => 'msf');
+my $status = print $out $al;
+#$al->write_MSF(\*STDOUT);
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/psw.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/run_genscan.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/run_genscan.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/run_genscan.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+# Brian Osborne
+# script to run genscan on all nucleotide sequences in a fasta file
+# and save results as the fasta files <file>.gs.pept and <file>.gs.cds,
+# and <file>.gs.exons
+
+use Bio::SeqIO;
+use Bio::Seq;
+use Getopt::Long;
+use Bio::Tools::Genscan;
+use strict;
+
+# GENSCAN matrix
+my $matrix = "/home/bosborne/src/genscan/HumanIso.smat";
+
+my ($file,$i);
+
+GetOptions( "f|file=s" => \$file );
+usage() if ( !$file );
+
+my $pept_out = Bio::SeqIO->new(-file   => ">$file.gs.pept",
+			       -format => "fasta");
+my $cds_out = Bio::SeqIO->new(-file   => ">$file.gs.cds",
+			      -format => "fasta");
+my $exons_out = Bio::SeqIO->new(-file   => ">$file.gs.exons",
+				-format => "fasta");
+
+my $in = Bio::SeqIO->new(-file => $file , -format => 'Fasta');
+
+while ( my $seq = $in->next_seq() ) {
+   die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' );
+
+   # create temp file, input to GENSCAN
+   my $temp_out = Bio::SeqIO->new(-file   => ">temp.fa",
+				   -format => "fasta");
+   $temp_out->write_seq($seq);
+
+   my $file_id = $seq->display_id;
+   $file_id =~ s/\|/-/g;
+
+   system "genscan $matrix temp.fa -cds > $file_id.gs.raw";
+   unlink "temp.fa";
+
+   my $genscan = Bio::Tools::Genscan->new( -file => "$file_id.gs.raw");
+   while ( my $gene = $genscan->next_prediction() ) {
+      $i++;
+      my $pept = $gene->predicted_protein;
+      my $cds = $gene->predicted_cds;
+      my @exon_arr = $gene->exons;
+
+      if ( defined $cds  ) {
+	 my $cds_seq = Bio::Seq->new(-seq => $cds->seq,
+				     -display_id => $cds->display_id);
+	 $cds_out->write_seq($cds_seq);
+      }
+
+      if ( defined $pept ) {
+	 my $pept_seq = Bio::Seq->new(-seq => $pept->seq,
+				      -display_id => $pept->display_id);
+	 $pept_out->write_seq($pept_seq);
+      }
+
+      for my $exon (@exon_arr) {
+	 my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end .
+	   " " . $exon->primary_tag . " " . "GENSCAN_predicted_$i";
+	 my $exon_seq = Bio::Seq->new(-seq => $seq->subseq($exon->start,
+							   $exon->end),
+				      -display_id => $seq->display_id,
+				      -desc => $desc );
+	 $exons_out->write_seq($exon_seq);
+      }
+   }
+   $genscan->close();
+   unlink "$file_id.gs.raw";
+}
+
+sub usage {
+    print "
+Usage    : $0 -f <file>
+Function : run genscan on all nucleotide sequences in a multiple fasta file
+Output   : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons
+
+";
+    exit;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/run_genscan.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/run_primer3.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/run_primer3.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/run_primer3.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+#!/usr/bin/perl -w
+# $Id: run_primer3.pl,v 1.3.4.1 2006/10/02 23:10:38 sendu Exp $
+
+=head1 NAME
+
+run_primer3.pl - run primer3 and parse its output
+
+=head1 SYNOPSIS
+
+  ./run_primer3.pl -i test.fa
+
+  #or
+
+  ./run_primer3.pl --input=test.fa
+
+=head1 DESCRIPTION
+
+Example of how to run primer3 and parse its output, essentially taken from an
+email written by Paul Wiersma to bioperl-l.
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl scripts. Send your comments and suggestions to the Bioperl 
+mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Brian Osborne, bosborne at alum.mit.edu
+
+=cut
+
+use strict;
+use Getopt::Long;
+use Bio::Tools::Run::Primer3;
+use Bio::SeqIO;
+
+my $in_file;
+
+GetOptions("i|input:s" => \$in_file );
+
+usage() unless $in_file;
+
+my $seqio = Bio::SeqIO->new(-file => $in_file);
+
+while (my $seq = $seqio->next_seq) {
+	my $primer3 = Bio::Tools::Run::Primer3->new(-seq => $seq);
+	$primer3->program_name('primer3_core') unless $primer3->executable;
+
+	$primer3->add_targets('PRIMER_MIN_TM' => 56, 'PRIMER_MAX_TM' => 90);
+
+	my $results = $primer3->run;
+
+	unless ($results->number_of_results) {
+		print "No results for ",$seq->display_id;
+		next;
+	}
+
+	my @out_keys_part = qw(START
+								    LENGTH
+								    TM
+									 GC_PERCENT
+									 SELF_ANY
+									 SELF_END
+									 SEQUENCE );
+
+	print "\n", $seq->display_id, "\n";
+
+	for (my $i = 0 ; $i < $results->number_of_results ; $i++){
+		my $result = $results->primer_results($i);
+ 
+		print "\n", $i + 1;	
+		for my $key qw(PRIMER_LEFT PRIMER_RIGHT){	
+			my ($start, $length) = split /,/, $result->{$key};
+			$result->{$key . "_START"} = $start;
+			$result->{$key . "_LENGTH"} = $length;
+			foreach my $partkey (@out_keys_part) {
+				print "\t", $result->{$key . "_" . $partkey};
+			} 
+			print "\n";
+		}
+		print "\tPRODUCT SIZE: ", $result->{'PRIMER_PRODUCT_SIZE'}, ", PAIR ANY COMPL: ",
+		  $result->{'PRIMER_PAIR_COMPL_ANY'};
+		print ", PAIR 3\' COMPL: ", $result->{'PRIMER_PAIR_COMPL_END'}, "\n";
+	}
+}
+
+sub usage {
+   exec('perldoc',$0);
+   exit(0);
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/run_primer3.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/seq_pattern.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/seq_pattern.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/seq_pattern.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+#!/usr/bin/perl -w
+
+#-----------------------------------------------------------------------------
+# PROGRAM : seq_pattern.pl 
+# PURPOSE : This is a simple driver used to test the Bio::Tools::SeqPattern.pm
+#           module for working with sequence patterns (regexps that recognize
+#           nucleotide or peptide sequences).
+# AUTHOR  : Steve Chervitz (sac at bioperl.org)
+# CREATED : 28 Aug 1997
+# REVISION: $Id: seq_pattern.pl,v 1.1 2003/07/07 18:20:59 bosborne Exp $
+# USAGE   : seq_pattern.pl -h
+# COMMENTS: 
+#    This is a driver script for the Bio::Tools::SeqPattern.pm Bioperl module
+#    that can be used for working with both nucleotide and peptide sequence and 
+#    offers features such as:
+#    
+#      -- generate reverse complement of sequence pattern
+#      -- ensuring pattern has no invalid characters
+#      -- untainting pattern
+#      -- expanding ambiguity codes. 
+#    
+#     Functionality is not yet complete but it may be of use as-is. 
+# 
+# INSTALLATION
+#    Edit the use lib "...." line to point the directory
+#    containing your Bioperl modules.
+#
+# DOCUMENTATION:
+#  http://genome-www.stanford.edu/perlOOP/bioperl/lib/Bio/Tools/SeqPattern.pm.html
+#
+#-----------------------------------------------------------------------------
+
+use lib "/home/steve/perl/bioperl";
+use Bio::Tools::SeqPattern ();
+use Bio::Root::Global qw(:std);
+use Getopt::Std;
+
+$opt_h = 0;
+$opt_n = 0;
+$opt_p = 0;
+$opt_r = 0;
+$opt_v = -1;
+
+getopts('hnprv:');
+$pat = $ARGV[0] || '';
+
+$opt_h and die <<"QQ_USAGE_QQ";
+
+Usage: seq_pattern.pl [-n|p|r|h] 'REGEXP' 
+
+ regexp : full-regular expression for a nucleotide or peptide sequence.
+          Must be listed *after* one of the following options:
+ -n     : interpret regexp as a nucleotide pattern.
+ -p     : interpret regexp as a peptide pattern.
+ -r     : output only the reverse complement of the nucleotide pattern.
+ -v int : Set verbosity level (-1, 0, 1, default = -1); 
+ -h     : print usage.
+ 
+QQ_USAGE_QQ
+
+
+## Nucleotide test patterns (most are based on actual patterns submitted by users):
+
+%nucpat = (1 =>'YR...CG(CCG){5,7}CG[^G]TN{10,}[SA]{4}NN(ACA){2,}GCGTTT.{20,40}GT>',
+	   2 =>'cggnnn[ta][ta][ta]n{3,5}[ta][ta][ta]nnnccg',
+	   3 =>'<ATGX{6,10}RTTRTT',
+	   4 =>'cggnnnwwwn{3,5}wwwnnnccg',
+	   5 =>'(CCCCT)N{1,200}(agggg)N{1,200}(agggg)', 
+	   6 =>'cccct{2,}', 
+	   7 =>'(a){10,40}', 
+	   8 =>'(cag){36,}', 
+	   9 =>'rgaatgx{2,}ygtttca(cag){5,}',
+	   10 =>'yattgtt(n){20,80}yattgtt', 
+	   11 =>'yattgtt(aca){20,80}yattgtt', 
+	   12 =>'TATAAAN{30,100}[AT][CAT][AT]YCAAR[CAT][AT][CAT]', 
+	   13 =>'TGACTC[N]{1,300}TGACTC',
+	   14 =>'TGACTCN*GAGTCAN*GAGTCAN*TGACTC',
+	   15 =>'TGACTC(TCA)*GAGTCA',
+	   16 =>'TGACTCN*GAG(TCA)*GAGTCA',
+	   17 =>'[at][at]ttcacatgy',
+	   );
+
+%peppat = (1 =>'<X{10,}[WFY]XXXDN[BK][ST]Z{5,}>',
+	   2 =>'<x{10,40}[gas]x[gasct]x*[gascdn]x[gas]x{0,10}[bst]{8,}x{0,8}>', 
+	   );
+
+#----------------------
+# Main
+
+&verbosity($opt_v);
+
+if($opt_r) {
+    print Bio::Tools::SeqPattern->new(-SEQ =>$pat, -TYPE =>'Dna')->revcom->str,"\n";
+    
+} else {
+    test_nuc($pat) if ($opt_n and !$opt_p);
+    test_pep($pat) if ($opt_p and !$opt_n);
+    (test_nuc($pat), test_pep($pat)) if !($opt_p or $opt_n);
+}
+    
+exit 0;
+
+#----------------------
+
+sub test_nuc {
+# Create nucleotide pattern object:
+    my $pat = shift;
+    $pat ||= $nucpat{9};
+
+    $npat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Dna');
+
+    print "\nNucleotide Pattern:\n";
+    print "-----------------------\n";
+    printf "%18s: %s\n", 'Type',    $npat->type;
+    printf "%18s: %s\n", 'Original',$npat->str;
+    printf "%18s: %s\n", 'Expanded', $npat->expand;
+    printf "%18s: %s\n", 'Reverse-Comp', $npat->revcom->str;
+    printf "%18s: %s\n", 'Rev-Comp+Expanded', $npat->revcom(1)->str; # Hate this syntax. May change.
+    print "\n";
+}
+
+
+sub test_pep {
+# Create peptide pattern object:
+    my $pat = shift;
+    $pat ||= $peppat{1};
+
+    $ppat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Amino');
+
+    print "\nPeptide Pattern:\n";
+    print "-----------------------\n";
+    printf "%18s: %s\n", 'Type',    $ppat->type;
+    printf "%18s: %s\n", 'Original',$ppat->str;
+    printf "%18s: %s\n", 'Expanded', $ppat->expand;
+    print "\n";
+}    
+
+
+
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/seq_pattern.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tools/standaloneblast.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tools/standaloneblast.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tools/standaloneblast.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,461 @@
+#!/usr/bin/perl
+
+# PROGRAM  : standaloneblast.pl
+# PURPOSE  : Demonstrate possible uses of Bio::Tools::StandAloneBlast.pm
+# AUTHOR   : Peter Schattner schattner at alum.mit.edu
+# CREATED  : Nov 01 2000
+# REVISION : $Id: standaloneblast.pl,v 1.5 2006/01/16 16:50:55 bosborne Exp $
+#
+# INSTALLATION
+#
+
+# You will need to enable Blast to find the Blast program. This can be done
+# in (at least) two ways:
+#  1. define an environmental variable blastDIR:
+#	export BLASTDIR=/home/peter/blast   or
+#  2. include a definition of an environmental variable BLASTDIR in every script that will
+#     use StandAloneBlast.pm.
+#	BEGIN {$ENV{BLASTDIR} = '/home/peter/blast/'; }
+#
+#  We also need to select the database to be used
+my $amino_database = 'swissprot';
+
+
+# 
+#  We are going to demonstrate 3 possible applications of StandAloneBlast.pm:
+#	1. Test effect of varying choice of substitution matrices	
+#	2. Test effect of varying choice of gap penalty 
+#	3. Comparison of results of psiblast depending on whether psiblast itself is used
+#	to identify an alignment to use for blasting or whether an external alignment is given to 
+#	psiblast
+#
+use strict;
+use Getopt::Long;
+use Bio::SimpleAlign;
+use Bio::Tools::Run::StandAloneBlast;
+use Bio::SearchIO;
+use Bio::AlignIO;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+# set some default values
+my $queryseq = Bio::Root::IO->catfile(qw(t data cysprot1.fa) );
+my $executable = 'blastpgp';
+my $queryaln = Bio::Root::IO->catfile(qw(t data cysprot.msf) );
+my @params = ('database' => $amino_database);
+# string listing examples to be executed. Default is to execute
+# all tests (ie 1,2 and 3)
+my $do_only = ''; 	
+my $example1param = 'MATRIX';  # parameter to be varied in example 1
+my $example2param = 'GAP';  # parameter to be varied in example 1
+my $example1values = [ 'BLOSUM62', 'BLOSUM80', 'PAM70']; # MATRIX values to try
+my $example2values = [ 7, 9, 25]; # GAP values to be tried
+my $queryalnformat = 'msf';
+my $jiter = 2;
+# only use pos. specific scoring matrix if > 50% of residues have
+# consensus letter (and compare with 25% or 75% cut off)
+my  $maskvalues = [50, 25, 75] ; my $helpflag = 0;   # Flag to show usage info.
+
+# get user options
+my @argv       = @ARGV;  # copy ARGV before GetOptions() massacres it.
+my $paramvalstring;
+my $maskvalstring;
+
+&GetOptions("h!"          => \$helpflag, 
+				"help!"       => \$helpflag,
+				"in=s"        => \$queryseq,
+				"inaln=s"     => \$queryaln,
+				"alnfmt=s"    => \$queryalnformat,
+				"param=s"     => \$example1param,
+				"exec=s"      => \$executable,
+				"paramvals=s" => \$paramvalstring,
+				"do=i"        =>  \$do_only,
+				"maskvals=s"  => \$maskvalstring,
+				"iter=i"      =>  \$jiter,
+	) ;
+
+if ($paramvalstring) { @$example1values = split (":", $paramvalstring); }
+if ($maskvalstring)  { @$maskvalues     = split (":", $maskvalstring);  }
+
+if ($helpflag) { &example_usage(); exit 0;}
+
+# create factory & set user-specified global blast parameters
+foreach my $argv (@argv) {
+	next unless ($argv =~ /^(.*)=>(.*)$/);
+	push (@params, $1 => $2);
+}
+my  $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+	
+# If "do" variable not set, do all four examples
+if ( ! $do_only)  {
+    &vary_params($queryseq, $example1param, $example1values); # ex. 1
+
+    # To compare gap penalties of 7, 9 and 25 we need to set the
+    # scoring matrix to BLOSUM62 and extension penalty to 2 (these are
+    # limitations of BLAST)
+
+    $factory->MATRIX('BLOSUM62');  
+
+    $factory->EXTENSION(2);  
+    &vary_params($queryseq, $example2param, $example2values); # ex. 2
+ 
+    # For the psiblast tests we want to restore gap opening and
+    # extension values to their defaults
+
+    $factory->GAP(11);
+    $factory->EXTENSION(1);
+    # now do the mask comparison example and ..
+    &vary_masks($queryseq, $maskvalues); # ex. 3
+    # do the jumpstart-align vs multiple iteration examples with the 
+    # mask value set to 50%
+    &aligned_blast($queryseq, $queryaln, $queryalnformat, 
+		   $jiter, $maskvalues->[0]); # ex. 4
+} elsif ($do_only  == 1) {
+    &vary_params($queryseq,$example1param, $example1values);
+} elsif ($do_only  == 3) {
+    &vary_masks($queryseq, $maskvalues);
+} elsif ($do_only  == 4 ) {
+    &aligned_blast($queryseq, $queryaln, $queryalnformat, $jiter, $maskvalues->[0]);
+}  else {
+    &example_usage();
+}
+
+exit 0;
+
+##########
+## End of "main"
+
+
+#################################################
+#   compare_runs(): Prints out display of which hits were found by different methods
+#	Various methods are labeled by "tags" found in array @runtags
+#
+#  args: 
+#	$typetag  -  label describing type of "tags"
+#	$runtags  -  reference to array @runtags
+#	$hashhits  - reference to hash of all the hits found by all runs (%hashhits) 
+#		value for each hit is string which is the concatenation of all the "tags" of
+#		runs that found that hit
+#  returns: nothing  
+
+sub compare_runs {
+    my $typetag = shift;
+    my $runtags = shift;
+
+    my $hashhits = shift;   
+
+    my ($tag, @taghits);
+
+    print "Comparing BLAST results... \n";
+
+# Get total number of hits found by any method
+    my $numhits = keys %$hashhits ; # scalar context to get total number of hits by all methods
+    print  "Total number of hits found: $numhits \n";
+
+# Get total number of hits found by every method
+    my $alltags =  join ( "" ,  @$runtags );
+    my  @alltaghits = grep  $$hashhits{$_} =~ /$alltags/  ,  keys %$hashhits;
+    print  " Number of hits found by every method / parameter-value: " ,   
+    scalar(@alltaghits), "\n";
+
+# If one desires to see the hits found by all methods, uncomment next 2 lines
+#print  " Hits were found all methods / parameters: \n";
+#print   join ( "\n", @alltaghits ) ,  "\n";
+
+# For each method/parameter-value (labeled by type) display  hits found 
+# exclusively by that method
+    foreach $tag (@$runtags)  {
+		 @taghits = grep  $$hashhits{$_} =~ /^$tag$/  ,  keys %$hashhits;
+		 print  " Hits found only when $typetag was $tag: \n";
+		 print   join ( "\n", @taghits ) ,  "\n";
+    }
+    return 1;
+}
+
+
+#################################################
+#   vary_params(): Example demonstrating varying of parameter
+#
+#  args: 
+#	$queryseq  - query sequence (can be filename (fasta),  or Bio:Seq object) 
+#	$param  - name of parameter to be varied 
+#	$values  - reference to array of values to be used for the parameter 
+#  returns: nothing  
+
+
+
+sub vary_params {
+
+    my $queryseq = shift;   
+    my $param = shift;   
+    my $values = shift;  
+
+
+    print "Beginning $param parameter-varying example... \n";
+
+    # Now we'll perform several blasts, 1 for each value of the
+    # selected parameter.  In the first default case, we vary the
+    # MATRIX substitution parameter, creating 3 BLAST reports, using
+    # MATRIX values of BLOSUM62, BLOSUM80 or PAM70.
+
+    # In the second default case, we vary the GAP penalty parameter,
+    # creating 3 BLAST reports, using GAP penalties of 7, 9 and 25. In
+    # either case we then automatically parse the resulting report to
+    # identify which hits are found with any of the parameter values
+    # and which with only one of them.
+
+ 
+    # To test the BLAST results to some other parameter it is only
+    # necessary to change the parameters passed to the script on the
+    # commandline.  The only tricky part is that the BLAST program
+    # itself only supports a limited range of parameters.  See the
+    # BLAST documentation.
+
+    my ($report, $sbjct, $paramvalue);
+
+    my $hashhits = { };	# key is hit id, value is string of param values for which hit was found
+    
+    foreach $paramvalue (@$values)  {
+	
+	$factory->$param($paramvalue); # set parameter value
+
+	print "Performing BLAST with $param = $paramvalue \n";
+
+	$report = $factory->$executable($queryseq);
+	my $r = $report->next_result;
+	while( my $hit = $r->next_hit ) {
+	    $hashhits->{$hit->name} .= "$paramvalue";
+	}
+    }
+
+    &compare_runs( $param , $values , $hashhits);  
+
+    return 1;
+
+}
+
+#################################################
+
+#   vary_masks(): Example demonstrating varying of parameter
+#
+#  args:
+#	$queryseq  - query sequence (can be filename (fasta),  or Bio:Seq object)
+#	$maskvalues  - reference to array of values to be used for the mask threshold
+#  returns: nothing
+
+# Now we'll perform several blasts, 1 for each value of the mask threshold.
+# In the default case, we use thresholds of 25%, 50% and 75%. (Recall the threshold is
+# % of resudues which must match the consensus residue before deciding to use the
+# position specific scoring matrix rather than the default - BLOSUM or PAM - matrix)
+# We then automatically parse the resulting reports to identify which hits
+# are found with any of the mask threshold values and which with only one of them.
+#
+
+sub vary_masks {
+
+my $queryseq = shift;
+my $values = shift;
+
+
+print "Beginning mask-varying example... \n";
+
+my ($report, $sbjct, $maskvalue);
+
+my $hashhits = { };     # key is hit id, value is string of param values for which hit was found
+
+# Get the alignment file
+my $str = Bio::AlignIO->new(-file=> "$queryaln", '-format' => "$queryalnformat", );
+my $aln = $str->next_aln();
+
+foreach $maskvalue (@$values)  {
+
+    print "Performing BLAST with mask threshold = $maskvalue % \n";
+
+    # Create the proper mask for 'jumpstarting'
+    my $mask = &create_mask($aln, $maskvalue);
+    my $report2 = $factory->blastpgp($queryseq, $aln, $mask);
+    my $r = $report2->next_result;
+    while($sbjct = $r->next_hit) {
+	$hashhits->{$sbjct->name} .= "$maskvalue";			
+    }
+}
+
+&compare_runs( 'mask threshold' , $values , $hashhits);
+
+return 1;
+
+}
+
+#################################################
+#  aligned_blast ():
+#
+#
+#  args: 
+#	$queryseq  - query sequence (can be filename (fasta),  or Bio:Seq object) 
+#	$queryaln  - file containing alignment to be used to "jumpstart" psiblast in "-B mode"
+#			$queryaln *must contain $queryseq with the same name and length
+#				(psiblast is very picky)
+#	$queryalnformat  - format of alignment (can = "fasta", "msf", etc)
+#	$jiter  - number of iterations in psiblast run
+#	$maskvalue  - threshold indicating how similar residues must be at a sequence location
+#		before position-specific-scoring matrix is used
+#		: "0" => use position specific matrix at all residues,  or
+#			"100" => use default (eg BLOSUM) at all residues
+#  returns: nothing  
+
+
+# For this example, we'll compare the results of psiblast depending on whether psiblast itself is 
+
+#  used to identify an alignment to use for blasting or whether an external alignment is given to 
+#  psiblast
+
+sub aligned_blast {
+
+
+my     $queryseq  =  shift; 
+my	$queryaln  =  shift; 
+my	$queryalnformat  =  shift;
+my	$jiter  =  shift;
+my	$maskvalue  =  shift;
+
+my $hashhits = { };
+my ($sbjct, $id);
+
+print "\nBeginning aligned blast example... \n";
+
+
+# First we do a  single-iteration psiblast search but with a specified alignment to
+#  "jump start" psiblast
+
+
+print "\nBeginning jump-start psiblast ... \n";
+
+
+my $tag1 = 'jumpstart';
+
+# $factory->j('1');    # perform single iteration
+
+# Get the alignment file
+my $str = Bio::AlignIO->new(-file=> "$queryaln", '-format' => "$queryalnformat", );
+my $aln = $str->next_aln();
+
+
+# Create the proper mask for 'jumpstarting'
+my $mask = &create_mask($aln, $maskvalue);
+
+
+my $report2 = $factory->blastpgp($queryseq, $aln, $mask);
+while($sbjct = $report2->next_result) {
+		$hashhits->{$sbjct->name} .= "$tag1";			
+}
+
+# Then we do a "plain" psiblast multiple-iteration search
+
+print "\nBeginning multiple-iteration psiblast ... \n";
+
+my $undefineB ;
+  $factory->B($undefineB);
+
+my $tag2 = 'iterated';
+$factory->j($jiter);    # 'j' is blast parameter for # of iterations
+my $report1 = $factory->blastpgp($queryseq);
+my $total_iterations = $report1->number_of_iterations;
+my $last_iteration = $report1->round($total_iterations);
+
+
+ while($sbjct = $last_iteration->next_result) {
+		$hashhits->{$sbjct->name} .= "$tag2";			
+	}
+
+# Now we compare the results of the searches
+
+my $tagtype = 'iterated_or_jumpstart'; 
+my $values = [ $tag1, $tag2];
+
+&compare_runs( $tagtype , $values , $hashhits);  
+ 
+return 1;
+
+}
+
+
+#################################################
+
+
+# create_mask(): creates a mask for the psiblast jumpstart alignment
+#                that determines at what residues position-specific
+#                scoring matrices (PSSMs) are used and at what
+#                residues default scoring matrices (eg BLOSUM) are
+#                used. See psiblast documentation for more details,
+
+#  args: 
+#	$aln  -  SimpleAlign object with alignment
+#	$maskvalue  -  label describing type of "tags"
+#  returns: actual mask, ie a string of 0's and 1's which is the 
+#           same length as each sequence in the alignment and has 
+#           a "1" at locations where (PSSMs) are to be used
+#           and a "0" at all other locations.
+
+
+sub create_mask {
+	my $aln = shift;
+	my $maskvalue = shift;
+	my $mask = "";
+
+	die "psiblast jumpstart requires all sequences to be same length \n"
+	  unless $aln->is_flush();
+	my $len = $aln->length();
+
+	if ($maskvalue =~ /^(\d){1,3}$/  ) {
+		$mask = $aln->consensus_string($maskvalue) ;
+		$mask =~ s/[^\?]/1/g ;
+		$mask =~ s/\?/0/g ;
+	}
+	else { die "maskvalue must be an integer between 0 and 100 \n"; }
+	return $mask ;
+}
+
+#----------------
+sub example_usage {
+#----------------
+
+#-----------------------
+# Prints usage information for general parameters.
+
+    print STDERR <<"QQ_PARAMS_QQ";
+
+ Command-line accessible script variables and commands:
+ -------------------------------
+ -h 		:  Display this usage info and exit.
+ -in <str>	:  File containing input sequences in fasta format (default = $queryseq) .
+ -inaln <str>	:  File containing input alignment for example 3 (default = $queryaln) .
+ -alnfmt <str>	:  Format of input alignment for example 3, eg "msf", "fasta", "pfam".
+		   (default = $queryalnformat) .
+ -do <int>	:  Number of test to be executed ("1" => vary parameters,
+		   "3" => compare iterated & jumpstart psiblast.) If omitted,
+		   three default tests performed.
+ -exec <str>  	:  Blast executable to be used in example 1.  Can be "blastall" or
+		   "blastpgp" (default is "blastpgp").
+ -param <str>  	:  Parameter to be varied in example 1. Any blast parameter
+		   can be varied (default = 'MATRIX')
+ -paramvals <str>:  String containing parameter values in example 1, separated
+		   by ":"'s. (default = 'BLOSUM62:BLOSUM80:PAM70')
+ -iter <int>    :  Maximum number of iterations in psiblast in example 3 (default = 2)
+ -maskvals <str>:  String containing mask threshold values (in per-cents) for example 3,
+		   separated by ":"'s. (default = '50:75:25')
+
+In addition, any valid Blast parameter can be set using the syntax "parameter=>value" as in "database=>swissprot"
+
+So some typical command lines might be:
+ >standaloneblast.pl -do 1 -param expectation -paramvals '1e-10:1e-5'
+or
+ >standaloneblast.pl -do 1 -exec blastall -param q -paramvals '-1:-7' -in='t/dna1.fa' "pr=>blastn" "d=>ecoli.nt"
+or
+ >standaloneblast.pl -do 4 -maskvals 0 -iter 3
+or
+ >standaloneblast.pl -do 3 -maskvals '10:50:90'  -in 't/data/cysprot1.fa' -alnfmt msf -inaln 't/cysprot.msf'
+
+
+
+QQ_PARAMS_QQ
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tools/standaloneblast.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/examples/tree/paup2phylip.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/examples/tree/paup2phylip.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/examples/tree/paup2phylip.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,24 @@
+#!/usr/bin/perl
+# Author: Jason Stajich <jason.stajich at duke.edu>
+# Convert a PAUP tree block to Phylip format
+
+use strict;
+
+my @data;
+while(<>) {     
+    last if( /Translate/ );
+}
+while(<>) { 
+    last if (/;/);
+    my ($num, $taxon) = (/\s+(\d+)\s([A-Za-z\.\_]+),/);
+    $data[$num] = substr($taxon,0,10);
+}
+while(<>) {    
+    next unless (s/^\s*tree (\S+) = \[\S+\] //i);
+    my $tree = $_;    
+    for( my $i=scalar @data; $i > 0; $i-- ) {
+	my $taxon = $data[$i];
+	$tree =~ s/$i/$taxon/;
+    }    
+    print $tree;    
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/examples/tree/paup2phylip.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,19 @@
+
+The scripts in this directory are meant for bioperl maintainers.  They
+count and test modules that make up the bioperl core library.
+
+The scripts are not expected to run under any other than UNIXy
+operating systems.
+
+Run them from this directory.
+
+Dependencies (not required for bioperl):
+========================================
+
+Module                         Script
+------------------------------------------------
+Data::Dumper                   authors.pl
+                               modules.pl
+Regexp::Common                 check_URLs.pl
+Pod::Checker                   pod.pl
+

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/authors.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/authors.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/authors.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,230 @@
+#!/usr/bin/perl -w
+# $Id: authors.pl,v 1.6 2006/07/04 22:23:28 mauricio Exp $
+#
+=head1 NAME
+
+authors.pl - check modules and scripts for authors not in AUTHORS file
+
+=head1 SYNOPSIS
+
+B<authors.pl> [B<-d|--dir> path ] [B<-v|--verbose>] B<-a|--authorsfile>
+    [B<-?|-h|--help>]
+
+=head1 DESCRIPTION
+
+Checks Plain Old Documentation (POD) of all bioperl live modules for
+AUTHORS and CONTRIBUTORS tags and prints out any emails missing from
+the AUTHORS file
+
+=cut
+
+use Data::Dumper;
+use File::Find;
+use Getopt::Long;
+use strict;
+
+sub findauthors;
+
+#
+# command line options
+#
+
+my ($verbose, $dir, $authorsfile, $help) = (0, undef, "../AUTHORS", undef);
+GetOptions(
+           'v|verbose' => \$verbose,
+           'dir:s' => \$dir,
+           'authorsfile:s' => \$authorsfile,
+	   'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
+	   );
+
+#
+# global variables
+#
+
+# known authors from the AUTHORS file are read into
+# the hash which is initialized with known synonymes
+our %AUTHORS = map {$_=>1} qw{
+                              birney at sanger.ac.uk
+                              jinsana at gmx.net
+                              Insana at ebi.ac.uk
+                              fugui at worf.fugu-sg.org
+                              cjm at fruitfly.bdgp.berkeley.edu
+                              elia at tll.org.sg
+                              heikki-at-bioperl-dot-org
+                              bioinformatics at dieselwurks.com
+                              bioinformatics1 at dieselwurks.com
+                              bioperl-l at bio.perl.org
+                              paul at systemsbiology.org
+                              gattiker at isb-sib.ch
+                              elia at fugu-sg.org
+                              jason at cgt.mc.duke.edu
+                              jason at chg.mc.duke.edu
+                              jason at open-bio.org
+                              hilmar.lapp at pharma.novartis.com
+                              richard.adams at ed.ac.uk
+                              dblock at gene.pbi.nrc.ca
+                              ak at ebi.ac.uk
+                              day at cshl.org
+                              bala at tll.org.sg
+                              mrp at sanger.ac.uk
+                              m.w.e.j.fiers at plant.wag-ur.nl
+                              cmzmasek at yahoo.com
+                              fuguteam at fugu-sg.org
+                              shawnh at gmx.net
+                          };
+our %NEWAUTHORS;     # new authors
+our %FIND_OPTIONS = ( wanted => \&findauthors, no_chdir => 1 );
+
+
+# Directories to check
+my @dirs = qw( ../Bio/ ../scripts . );
+
+#print Dumper \%AUTHORS;
+
+#
+# Read the AUTHORS file
+#
+
+
+open (F, $authorsfile) || die "can't open file $authorsfile: $!";
+
+
+while (<F>) {
+    my ($email) = /([\.\w_-]+ at [\.\w_-]+)/;
+    next unless $email;
+    #print $email, "\n";
+    $email =~ s/ at /@/;
+    $AUTHORS{$email} = 1;
+}
+close F;
+
+
+#
+# run
+#
+
+if ($dir) {
+    find \%FIND_OPTIONS, $dir;
+} else {
+    find \%FIND_OPTIONS, @dirs;
+}
+
+#
+# results
+#
+print Dumper \%NEWAUTHORS;
+
+
+#
+##
+### end main
+##
+#
+
+#
+# this is where the action is
+#
+sub findauthors {
+    return unless /\.PLS$/ or /\.p[ml]$/ ;
+    return unless -e $_;
+    print "$_\n" if $verbose;
+    my $filename = $_;
+    #local $/=undef;
+    open F, $_ || die "Could not open file $_";
+    while (<F>) {
+        #print;
+        last if /=head1 +AUTHOR/;
+    }
+    my $authorblock;
+    while (<F>) {
+        last if /=head/ and not /CONTRIBUTORS/;
+        $authorblock .= $_;
+    }
+    return unless $authorblock;
+    while ( $authorblock =~ /([\.\w_-]+@[\.a-z_-]+)/g) {
+        #my $email = $1;
+        #$email =~ //
+        next if $AUTHORS{$1};
+        #print "$filename\t$1\n";
+
+        push @{$NEWAUTHORS{$1}}, $filename;
+
+    }
+}
+
+
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-d | --dir> path
+
+Overides the default directories to check by one directory 'path' and
+all its subdirectories.
+
+=item B<-a | --authorsfile>
+
+path from working directory the AUTHORS file.
+
+Redundant as this information could be had from --dir option butI am
+feeling too lazy to change the code.
+
+=cut
+
+sub blankline {
+    return unless /\.PLS$/ or /\.p[ml]$/ ;
+    return unless -e $_;
+    my $file = $_;
+    open (F, $_) or warn "can't open file $_: $!" && return;
+    local $/="";
+    while (<F>) {
+        print "$file: +|$1|\n" if /[ \t]\n(=[a-z][^\n]+$)/m and $verbose;
+        print "$file: ++|$1|\n" if /\w\n(=[a-z][^\n]+$)/m and $verbose;
+        print "$file:|$1|+\n" if /(^=[a-z][^\n]+)\n[\t ]/m;
+        #print "$file:|$1|++\n" if /(^=[^\n]+)\n\w/m;
+    }
+    close F;
+}
+
+__END__
+
+=item B<-v | --verbose>
+
+Show the progress through files during the POD checking.
+
+=item B<-? | -h  | --help>
+
+This help text.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=cut
+
+
+# find . -name '*.pm' -print | xargs  perl -e '$/=""; while (<>) {$n = $1 if /^package\s+([\w:]+)/; print "$n:|$1|"  if  /(\s\s^=[^\n]+$)/m ; }'  ;
+
+# find . -name '*.pm' -print | xargs  perl -e '$/=""; while (<>) {$n = $1 if /^package\s+([\w:]+)/; print "$n:|$1|\n"  if /(^=[^\n]+\n[\t ])/m ; }'  ;


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/authors.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/check_NAME.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/check_NAME.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/check_NAME.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+check_NAMEs.pl - check NAME in module POD has fully qualified object name
+
+=head1 SYNOPSIS
+
+B<check_NAMEs.pl> [B<-d|--dir> path] [B<-v|--verbose>] [B<-?|-h|--help>]
+
+=head1 DESCRIPTION
+
+This script is designed to find all Bioperl modules which don't
+have the fully qualified object name with correct capitalization
+in the "NAME" section of the POD. 
+
+The full name is required for the PDOC POD to HTML script to
+correctly render the module documentation.
+
+=cut
+
+use strict;
+use File::Find;
+use Getopt::Long;
+
+#
+# command line options
+#
+
+my ($verbose, $dir, $help) = (0, '../Bio/', undef);
+GetOptions(
+    'v|verbose' => \$verbose,
+    'd|dir:s' => \$dir,
+    'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
+);
+
+#
+# globals
+#
+
+my $num_found = 0;
+
+#
+# find all modules
+#
+
+print STDERR "Searching for incorrect NAME POD section of all modules in: $dir\n";
+find( \&find_modules, $dir );
+print STDERR "$num_found found.\n";
+
+# this is where the action is
+
+sub find_modules {
+    # only want files with .pm
+    return unless m/\.pm$/;
+    return unless -f $_;
+    
+    my $fname = $_;
+    my $pm = $File::Find::name;
+    $pm =~ s{.*?/(?=Bio/)}{};  # remove up to first slash before Bio/
+    $pm =~ s{\.pm$}{};         # remove .pm suffix
+    $pm =~ s{/}{::}g;          # convert / to ::
+    
+    print STDERR "# $File::Find::name\n" if $verbose;
+    
+    # slurp in the file
+    my $text = do { local( @ARGV, $/ ) = $fname ; <> } ;
+
+    # check if the NAME section has the _full_ module name in it
+    if ($text !~ m/^=head1\s+NAME.*?^$pm/xms) {
+      print "$pm\n";
+      $num_found++;
+    }
+}
+
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-d | --dir> path
+
+Overides the default directory to recursively look for .pm file 
+(Default is '../Bio')
+
+=item B<-v | --verbose>
+
+Show the progress through files during the POD checking.
+
+=item B<-? | -h  | --help>
+
+This help text.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Torsten Seemann
+
+Email: torsten-dot-seemann-at-infotech-dot-monash-dot-edu-dot-au
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/check_NAME.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/check_URLs.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/check_URLs.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/check_URLs.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,145 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+check_URLs.pl - validate URLs located in module code and POD
+
+=head1 SYNOPSIS
+
+B<check_URLs.pl> [B<-d|--dir> path] [B<-v|--verbose>] [B<-?|-h|--help>]
+
+=head1 DESCRIPTION
+
+Checks code and POD of all bioperl-live modules for URLs, and validates them.
+
+Output is a series of lines containing two fields, tab separated.
+The first field is the file with the bad URL, the second is the URL itself.
+
+The whole URL is not retrieved, only a HTTP "HEAD" request is done
+to see if the URL exists on the server. The request is done using 
+B<LWP::Simple> so the B<http_proxy> environmental variable will be
+honoured.
+
+The URL parsing may not be perfect - although I use the B<Regexp::Common::URI>
+module, I have to manually clean up some URLs which are embedded in Perl
+strings to convert the matched URL to a more probable real world URL,
+e.g. most URLs don\'t end in "'," or ")" :-)
+
+=cut
+
+use strict;
+use Data::Dumper;
+use File::Find;
+use Getopt::Long;
+use Regexp::Common qw(URI);
+use LWP::Simple;
+
+#
+# command line options
+#
+
+my ($verbose, $dir, $help) = (0, '../Bio/', undef);
+GetOptions(
+    'v|verbose' => \$verbose,
+    'd|dir:s' => \$dir,
+    'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
+);
+
+#
+# globals
+#
+
+my %URL;
+
+#
+# find all modules
+#
+
+find( \&find_modules, $dir );
+
+#
+# validate unique URLs and print fail cases to stdout
+#
+
+for my $url (keys %URL) {
+    print STDERR "Checking $url ... ";
+    my $ok = head($url);
+    print STDERR ($ok ? 'ok' : 'FAIL!'), "\n";
+    if (not $ok) {
+         for my $file (@{ $URL{$url} }) {
+	     print "$file\t$url\n";
+	 } 
+    }  
+}
+
+print STDERR Dumper(\%URL) if $verbose;
+
+#
+# this is where the action is
+#
+
+sub find_modules {
+    # only want files with .pm
+    return unless m/\.pm$/;
+    return unless -f $_;
+    
+    my $fname = $_;
+    print STDERR "$fname\n" if $verbose;
+    
+    # slurp in the file
+    my $text = do { local( @ARGV, $/ ) = $fname ; <> } ;
+    
+    # keep track of URLs
+    while ($text =~ m/$RE{URI}{HTTP}{-keep}/g) {
+        my $url = $1 or next;
+        # remove Perl code if URL was embedded in string and other stuff
+        $url =~ s/\s*[.,;'")]*\s*$//;
+        print STDERR "$url\n" if $verbose;
+        push @{ $URL{$url} } , $File::Find::name;
+    }    
+}
+
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-d | --dir> path
+
+Overides the default directory to recursively look for .pm file 
+(Default is '../Bio')
+
+=item B<-v | --verbose>
+
+Show the progress through files during the POD checking.
+
+=item B<-? | -h  | --help>
+
+This help text.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Torsten Seemann
+
+Email: torsten-dot-seemann-at-infotech-dot-monash-dot-edu-dot-au
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/check_URLs.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/modules.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/modules.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/modules.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,569 @@
+#!/usr/bin/perl -w
+# $Id: modules.pl,v 1.22 2006/07/04 22:23:28 mauricio Exp $
+#
+=head1 NAME
+
+modules.pl - information about modules in BioPerl core
+
+=head1 SYNOPSIS
+
+B<modules.pl> [B<-V|--verbose>] [B<-c|--count>] | [B<-l|--list>] |
+  [B<-u|--untested>] | [B<-i|--info> class] | [B<-i|--inherit> |
+  [B<-d|--dir> path ] | [B<-v|--version> | [B<-?|-h|--help>]
+
+=head1 DESCRIPTION
+
+This script counts, lists and provides other information about bioperl
+modules. It is mainly meant to be run by bioperl maintainers.
+
+The default action is to count modules in the bioperl core
+distribution. Based on the class name it tries to classify them into
+categories. The following is a tentative glossary of terms used.
+
+
+=over 4
+
+=item Base
+
+Synonyms: Generic class, parameterized class, generic module.
+
+A class that you don't instantiate in your scripts, but that it's a
+template for other classes.
+
+Examples: Bio::Tools::Run::WrapperBase - a base object for wrappers
+around executables. Bio::Tools::Analysis::SimpleAnalysisBase - an
+abstract superclass for SimpleAnalysis implementations
+
+This are counted with C</:Base/ | /Base$/>; They have "Base" in the
+beginning or end of the name.
+
+=item Interface
+
+Synonyms: protocol, feature set.
+
+Class that defines a set of features that are common to a group of
+classes.
+
+Example: Bio::Tree::NodeI - interface describing a Tree Node.
+
+This are counted with C</[^A-Z]I$/>; They have "I" at the end of the
+name.
+
+=item Component
+
+A class that implements a small subset of their superclass. They are in
+a directory with an identical name of the superclass. There are plenty
+of them. You need only a small number of methods to be overridden.
+
+Example: Bio::SeqIO::fasta.
+
+This is counted with C</.*:[a-z]/>; Classes are inside their base directory
+and all in lowercase.
+
+=item Instance
+
+The rest of them. It is sometimes helpful to divide them into two
+types:
+
+=over 2
+
+=item Algorithmic classes
+
+Example: Bio::AlignIO - Handler for AlignIO formats
+
+=item Storage classes
+
+Example: Bio::SimpleAlign - Multiple alignments held as a set of
+sequences
+
+=back
+
+=back
+
+=cut
+
+#
+# The helper class to store class status;
+#
+package BioClass;
+
+sub new {
+    my $class = shift;
+    my $name = shift;
+    die unless $name;
+
+    my $self = {};
+    $self->{'name'} = $name;
+    $self->{'tested'} = 0;
+    $self->{'type'} = '';
+    $self->{'path'} = '';
+
+    bless $self, $class;
+}
+
+
+sub name {
+    my $self = shift;
+    return $self->{'name'};
+}
+
+sub tested {
+    my $self = shift;
+    my $value = shift;
+    $self->{'tested'} = 1 if defined $value && $value;
+    return $self->{'tested'} || 0;
+}
+
+sub type {
+    my $self = shift;
+    my $value = shift;
+    $self->{'type'} = $value if defined $value;
+    return $self->{'type'};
+}
+
+sub path {
+    my $self = shift;
+    my $value = shift;
+    $self->{'path'} = $value if defined $value;
+    return $self->{'path'};
+}
+
+sub add_superclass {
+    my $self = shift;
+    my $superclass = shift;
+    return unless $superclass;
+    $self->{'superclasses'}->{$superclass} = 1 ;
+}
+
+sub each_superclass {
+    my $self = shift;
+    return  keys %{$self->{'superclasses'}};
+}
+
+sub add_used_class {
+    my $self = shift;
+    my $used_class = shift;
+    return unless $used_class;
+    $self->{'used_classes'}->{$used_class} = 1 ;
+}
+
+sub each_used_class {
+    my $self = shift;
+    return  keys %{$self->{'used_classes'}};
+}
+
+package main;
+
+use File::Find;
+use Getopt::Long;
+use Data::Dumper;
+use strict;
+
+
+# declare subroutines
+sub dir;
+sub modules;
+sub count;
+sub list_all;
+sub untested;
+sub info;
+sub inherit;
+sub synopsis;
+sub version;
+
+# command line options
+my ($dir, $count,$list, $verbose,$info,$untested, $inherit, $synopsis,
+    $version);
+GetOptions(
+	   'dir:s'      => \$dir,
+	   'count'    => \$count,
+	   'list'     => \$list,
+           'test_BioClass' => \&_test_BioClass,
+           'V|verbose'  => \$verbose,
+           'untested' => \$untested,
+           'info:s' =>  \$info,
+           'inherit' => \$inherit,
+           'synopsis' => \$synopsis,
+           'version' => \$version,
+	   'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
+	   );
+
+
+our %MODULES; # storage structure
+
+# find modules
+my $pwd = $ENV{PWD};
+my $seachdir = "$pwd/../Bio"; #default
+my %FIND_OPTIONS = ( wanted => \&modules );
+
+$seachdir = "$pwd/$dir" if $dir;
+find \%FIND_OPTIONS, $seachdir;
+
+
+# call subroutines
+if    ($list)     { list_all }
+elsif ($untested) { untested }
+elsif ($info)     { info($info) }
+elsif ($inherit)  { inherit }
+elsif ($synopsis) { synopsis }
+elsif ($version)   { version }
+else              { count }
+
+
+################# end main ####################
+
+
+#
+# subroutines;
+#
+
+sub _test_BioClass {
+    $a = new BioClass('Bio::Test');
+    print "Class name: ", $a->name(), "\n";
+    $a->add_superclass('Bio::Super');
+    $a->add_superclass('Bio::Super2');
+    $a->tested(1);
+    $a->type('instance');
+    print Dumper [$a->each_superclass] if $a->tested;
+    print Dumper $a;
+    exit;
+}
+
+sub modules {
+    return unless /\.pm$/ ;
+    #return unless -e $_;
+    #print "file: $_\n" if $verbose;
+    open (F, $_) or warn "can't open file $_: $!" && return;
+    my $class;
+    while (<F>) {
+        if (/^package\s+([\w:]+)\s*;/) {
+            #print $1, "\n" if $verbose;
+            $_ = $1;
+            $class = new BioClass($_);
+            $MODULES{$_} = $class;
+            if (/.*:[a-z]/) {
+                $class->type('component');
+            } elsif (/:Base/ | /Base$/) {
+                $class->type('base');
+            } elsif (/[^A-Z]I$/) {
+                $class->type('interface');
+            } else {
+                $class->type('instance');
+            }
+            $class->path($File::Find::name);
+        }
+        if (/^\w*use/ && /(Bio[\w:]+)\W*;/ && not /base/) {
+	    next unless $class;
+            #print "\t$1\n" if $verbose;
+            $class->add_used_class($1);
+        }
+        if ((/\@ISA/ || /use base/) && /Bio/) {
+            next unless $class;
+            my $line = $_;
+            while ( $line =~ /(Bio[\w:]+)/g) {
+                #print "\t$1\n" if $verbose;
+                $class->add_superclass($1);
+            }
+        }
+        if (/\@ISA/ && /Bio/) {
+            next unless $class;
+            my $line = $_;
+            while ( $line =~ /(Bio[\w:]+)/g) {
+                #print "\t$1\n" if $verbose;
+                $class->add_superclass($1);
+            }
+        }
+    }
+    close F;
+}
+
+=head1 OPTIONS
+
+Only one option is processed on each run of the script. The --verbose
+is an exception, it modifies the amount of output.
+
+=over 4
+
+=item B<-V | --verbose>
+
+B<INACTIVE>
+
+Set this option if you want to see more verbose output. Often that
+will mean seeing warnings normally going into STDERR.
+
+=cut
+
+=item B<-d | --dir> path
+
+Overides the default directories to check by one directory 'path' and
+all its subdirectories.
+
+=item B<-c | --count>
+
+The default action if no other option is given. Gives the count of
+modules broken to B<instance> ("usable"), B<base> ( (abstract)?
+superclass) , B<interface> (the "I" files) and B<component> (used from
+instantiable parent) modules, in addition to total number of modules.
+
+Note that abstract superclass in bioperl is not an enforced concept and
+they are not clearly indicateded in the class name.
+
+=cut
+
+sub count {
+    printf "Instance : %3d\n",
+        scalar (grep $MODULES{$_}->type =~ /instance/ , keys %MODULES);
+    printf "Base     : %3d\n",
+        scalar (grep $MODULES{$_}->type =~ /base/ , keys %MODULES);
+    printf "Interface: %3d\n",
+        scalar (grep $MODULES{$_}->type =~ /interface/ , keys %MODULES);
+    printf "Component: %3d\n",
+        scalar (grep $MODULES{$_}->type =~ /component/ , keys %MODULES);
+    print  "--------------\n";
+    printf "Total    : %3d\n", scalar (keys %MODULES);
+
+}
+
+=item B<-l | --list>
+
+Prints all the module names in alphabetical order. The output is a tab
+separated list of category (see above) and module name per line. The
+output can be processed with standard UNIX command line tools.
+
+=cut
+
+sub list_all {
+    foreach ( sort keys %MODULES) {
+        print $MODULES{$_}->type. "\t$_\n";
+    }
+}
+
+=item B<-u | --untested>
+
+Prints a list of instance modules which are I<not> explicitly used by
+test files in the directory. Superclasess or any classes used by others
+are not reported, either, since their methods are assumed to be tested
+by subclass tests.
+
+
+=cut
+
+sub _used_and_super {
+    my $name = shift;
+#    print "-:$name\n" if /Locati/; 
+    foreach ($MODULES{$name}->each_superclass) {
+        next unless defined $MODULES{$_};
+#        print "-^$_\n" if /Locati/; 
+#        unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
+        if (not  $MODULES{$_}->tested) {
+            $MODULES{$_}->tested(1);
+            _used_and_super($_);
+        }
+    }
+    foreach ($MODULES{$name}->each_used_class) {
+        next unless defined $MODULES{$_};
+#        print "--$_\n" if /Locati/; 
+#        unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
+        if (not  $MODULES{$_}->tested) {
+            $MODULES{$_}->tested(1);
+            _used_and_super($_);
+        }
+#        $MODULES{$_}->tested(1) && _used_and_super($_)
+#            unless defined $MODULES{$_} or $MODULES{$_}->tested;
+    }
+    return 1;
+}
+
+sub untested {
+    foreach (`find ../t -name "*.t" -print | xargs grep -hs "[ur][se][eq]"`) {
+        s/.*use +//;
+        s/.*require +//;
+        next unless /^Bio/;
+
+        s/[\W;]+$//;
+        s/([\w:]+).*/$1/;
+        my $name = $_;
+
+        next unless $MODULES{$_};
+        $MODULES{$_}->tested(1) 
+            unless defined $MODULES{$_} and $MODULES{$_}->tested;
+
+        next if $MODULES{$name}->name eq "Bio::SeqIO::abi"; # exception: requires bioperl ext package
+        next if $MODULES{$name}->name eq "Bio::SeqIO::ctf"; # exception: requires bioperl ext package
+        next if $MODULES{$name}->name eq "Bio::SeqIO::exp"; # exception: requires bioperl ext package
+        next if $MODULES{$name}->name eq "Bio::SeqIO::pln"; # exception: requires bioperl ext package
+        next if $MODULES{$name}->name eq "Bio::SeqIO::ztr"; # exception: requires bioperl ext package
+#        print $MODULES{$name}->name, "\n";
+#        print Dumper $MODULES{$name};
+
+        _used_and_super($name);
+
+    }
+
+    foreach ( sort keys %MODULES) {
+
+        # skip some name spaces 
+        next  if /^Bio::Search/; # Bio::Search and Bio::SearchIO are extensively tested 
+                                 # but classes are used by attribute naming 
+
+        print "$_\n" if
+            $MODULES{$_}->type eq 'instance' and ($MODULES{$_}->tested == 0) ;
+    }
+
+}
+
+=item B<-i | --info> class
+
+Dumps information about a class given as an argument.
+
+=cut
+
+sub info {
+    my $class = shift;
+    die "" unless $class;
+    #print Dumper $MODULES{$class};
+    my $c = $MODULES{$class};
+    print $c->name, "\n";
+    printf "  Type:\n\t%s\n", $c->type;
+    print "  Superclasses:\n";
+    foreach (sort $c->each_superclass) {
+        print "\t$_\n";
+    }
+    print "  Used classes:\n";
+    foreach (sort $c->each_used_class) {
+        print "\t$_\n";
+    }
+}
+
+
+=item B<-i | --inherit>
+
+Finds interface modules which inherit from an instantiable class.
+
+Could be extended to check other bad inheritance patterns.
+
+=cut
+
+sub inherit {
+    foreach ( sort keys %MODULES) {
+        my $c=$MODULES{$_};
+        next unless $c->type =~ /interface/;
+        foreach my $super ($c->each_superclass) {
+            next if $super =~ /I$/;
+            print "Check this inheritance: ", $c->name, " <-- $super\n";
+        }
+    }
+}
+
+=item B<-s | --synopsis>
+
+Test SYNOPSIS section of bioperl modules for runnability
+
+=cut
+
+sub synopsis {
+    foreach ( sort keys %MODULES) {
+        my $c=$MODULES{$_};
+
+        next unless $c->type eq "instance";
+        next if $c->name eq 'Bio::Root::Version';
+        next if $c->name eq 'Bio::Tools::HMM';
+
+        my $synopsis = '';
+        open (F, $c->path) or warn "can't open file ".$c->name.": $!" && return;
+
+        my $flag = 0;
+        while (<F>) {
+            last if $flag && /^=/;
+            $synopsis .= $_ if $flag;
+            $flag = 1 if /^=head1 +SYNOPSIS/;
+        }
+
+        # remove comments
+        $synopsis =~ s/[^\$]#[^\n]*//g;
+        # allow linking to an other Bio module, e.g.: See L<Bio::DB::GFF>.
+        $synopsis =~ s/[^\n]*L<Bio[^\n]*//g;
+        # protect single quotes
+        $synopsis =~ s/'/"/g;
+
+        my $res = `perl -ce '$synopsis' 2>&1 `;
+        next if $res =~ /syntax OK/;
+        print $c->path, "\n";
+        print $synopsis;
+        print $res;
+        print "-" x 70, "\n"; 
+        # print "SYNOPSIS not runnable\n";
+        close F;
+    }
+}
+
+=item B<-v | --version>
+
+Test the VERSION of the module against the global one set in
+Bio::Root::Variation. Print out the different ones.
+
+=cut
+
+sub version {
+
+    use Bio::Root::Version;
+    my $version =  $Bio::Root::Version::VERSION;
+
+    my %skip = ( # these are defined together with an other module
+                 # and can not be use independently
+                'Bio::AnalysisI::JobI' => 1,
+                'Bio::PrimarySeq::Fasta' => 1,
+                'Bio::DB::Fasta::Stream' => 1,
+                'Bio::DB::GFF::ID_Iterator' => 1,
+                'Bio::DB::GFF::Adaptor::dbi::faux_dbh' =>1,
+                'Bio::LiveSeq::IO::SRS' =>1 # tries to call an external module
+               );
+
+    foreach ( sort keys %MODULES) {
+        my $n=$MODULES{$_}->name;
+        next if $skip{$n};
+        my $vv= "\$${n}::VERSION";
+        my $v = `perl -we 'use $n; print $vv;'`;
+        printf "%50s %-3s\n", $n, $v unless $version eq $v;
+    }
+}
+
+__END__
+
+=item B<-? | -h  | --help>
+
+This help text.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
+
+=head1 Contributors
+
+Albert Vilella, avilella-AT-gmail-DOT-com
+
+=cut
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/modules.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/ncbi_blast_switches.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/ncbi_blast_switches.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/ncbi_blast_switches.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+
+# This script determines all the valid command line switches
+# from the four main NCBI BLAST tools, and produces Perl code
+# to put into Bio/Tools/Run/StandAloneBlast.pm
+#
+# Torsten Seemann
+# 27 June 2006
+
+
+my @exe = qw(blastall blastpgp rpsblast bl2seq);
+
+for my $exe (@exe) {
+  open(HELP, "$exe - |") or die $!;
+  my @switch;
+  while (<HELP>) {
+    next unless m/^\s*-(\w)\s/;
+    push @switch, $1;
+  }
+  close(HELP);
+  print "\t\@",uc($exe),"_PARAMS = qw(", join(q{ }, sort @switch), ");\n";
+}
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/ncbi_blast_switches.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/pod.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/pod.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/pod.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,178 @@
+#!/usr/bin/perl -w
+# $Id: pod.pl,v 1.9 2006/07/04 22:23:28 mauricio Exp $
+#
+=head1 NAME
+
+pod.pl - check the POD documentation syntax in modules and scripts
+
+=head1 SYNOPSIS
+
+B<pod.pl> [B<-d|--dir> path ] [B<-v|--verbose>] B<-b|--blankline>
+    [B<-?|-h|--help>]
+
+=head1 DESCRIPTION
+
+Checks Plain Old Documentation (POD) with highest possible stringency
+in every bioperl module and script in CVS modules 'core' and 'run'.
+
+Amounts to same as running
+
+  podchecker -warnings -warnings
+
+on every file.
+
+You are expected to have checked out CVS module 'bioperl_all'.
+Otherwise, bioperl-run module is not found.
+
+
+=head2 Results
+
+The results are written into file '/tmp/bioperl_pod_check' and
+displayed after the run. The output is filtered not to show
+confirmations of correct syntax. The result file is not removed.
+
+The aim is to have as few warnings, and no errors, as possible.  Links
+to web URLs give a warning but that seems to be spurious, so they are
+filtered out.  Currently there are a few cases of "multiple occurrence
+of link target" in several modules which are harmless.
+
+=head1 SEE ALSO
+
+L<podchecker>, L<Pod::Checker>
+
+=cut
+
+use File::Find;
+use Pod::Checker;
+use Getopt::Long;
+use strict;
+
+sub podcheck;
+sub blankline;
+
+#
+## Directories to check
+#
+my @dirs = qw( ../Bio/ ../../run/Bio  ../scripts ../../run/scripts . );
+
+# command line options
+my ($verbose, $blankline, $dir, $help) = (0, undef, undef, undef);
+GetOptions(
+           'v|verbose' => \$verbose,
+           'dir:s' => \$dir,
+           'blankline' => \$blankline,
+	   'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
+	   );
+
+# setup
+my $tmpfile = '/tmp/bioperl_pod_check';
+our %POD_CHECKER_OPTIONS = ( '-warnings' => 2 );
+our %FIND_OPTIONS = ( wanted => \&podcheck, no_chdir => 1 );
+
+# run
+open (F, ">$tmpfile") || die "can't open file $tmpfile: $!";
+$FIND_OPTIONS{wanted} = \&blankline if $blankline;
+
+if ($dir) {
+    find \%FIND_OPTIONS, $dir;
+} else {
+    find \%FIND_OPTIONS, @dirs;
+}
+close F;
+open (F, "grep -v OK $tmpfile|") || die "can't open file $tmpfile: $!";
+while (<F>) { print unless /http/ and /non-escaped/ }
+
+
+# this is where the action is
+sub podcheck {
+    return unless /\.PLS$/ or /\.p[ml]$/ ;
+    return unless -e $_;
+    print "$_\n" if $verbose;
+    my $checker = new Pod::Checker %POD_CHECKER_OPTIONS;
+    $checker->parse_from_file($_, \*F);
+    print "$_\tno POD\n" if $checker->num_errors() < 0;
+}
+
+=head1 OPTIONS
+
+=over 3
+
+=item B<-d | --dir> path
+
+Overides the default directories to check by one directory 'path' and
+all its subdirectories.
+
+=item B<-b | --blankline>
+
+Checks POD command paragraphs (lines starting with '=' character) for
+preceding nonblank lines. These lines are printed out with '++'.
+
+Also, if verbose is turned on, it will report on lines whitespace
+characters which prevent paragrafs to be recognised by older POD
+parsers (marked with '+'). Modern perlpod parsers (5.6.0 and later, I
+suppose) allow for whitespace lines surrounding command lines, but
+since bioperl still supports older versions, these lines should be
+cleaned to contain only '\n' and no space or tab characters.
+
+
+See: L<perlpodspec>
+
+
+=cut
+
+sub blankline {
+    return unless /\.PLS$/ or /\.p[ml]$/ ;
+    return unless -e $_;
+    my $file = $_;
+    open (F, $_) or warn "can't open file $_: $!" && return;
+    local $/="";
+    while (<F>) {
+        print "$file: +|$1|\n" if /[ \t]\n(=[a-z][^\n]+$)/m and $verbose;
+        print "$file: ++|$1|\n" if /\w\n(=[a-z][^\n]+$)/m and $verbose;
+        print "$file:|$1|+\n" if /(^=[a-z][^\n]+)\n[\t ]/m;
+        #print "$file:|$1|++\n" if /(^=[^\n]+)\n\w/m;
+    }
+    close F;
+}
+
+__END__
+
+=item B<-v | --verbose>
+
+Show the progress through files during the POD checking.
+
+=item B<-? | -h  | --help>
+
+This help text.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email heikki-at-bioperl-dot-org
+
+=cut
+
+
+# find . -name '*.pm' -print | xargs  perl -e '$/=""; while (<>) {$n = $1 if /^package\s+([\w:]+)/; print "$n:|$1|"  if  /(\s\s^=[^\n]+$)/m ; }'  ;
+
+# find . -name '*.pm' -print | xargs  perl -e '$/=""; while (<>) {$n = $1 if /^package\s+([\w:]+)/; print "$n:|$1|\n"  if /(^=[^\n]+\n[\t ])/m ; }'  ;


Property changes on: trunk/packages/bioperl/branches/upstream/current/maintenance/pod.pl
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_script.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_script.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_script.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+#!/usr/bin/perl
+use Module::Build;
+use strict;
+use warnings;
+
+my $build = Module::Build->current;
+
+my %symlink_scripts = ('bp_bulk_load_gff.pl' => 'bp_pg_bulk_load_gff.pl');
+
+#my $blib_dir = File::Spec->catdir($build->blib, 'script');
+# using blib prior to installation, post build, always 'works', but the
+# installation process installs the symlink as the actual file, so we may as
+# well have just done a copy
+
+my $install_dir = $build->install_destination('script');
+$build->log_info("Will try to install symlinks to $install_dir\n");
+my $orig_dir = $build->cwd;
+chdir($install_dir);
+
+while (my ($source, $destination) = each %symlink_scripts) {
+    eval { symlink($source, $destination) };
+    $build->log_warn("Cannot create symbolic link named $destination on your system for $source in $install_dir\n") if $@;
+}
+
+chdir($orig_dir);
+
+exit;
+
+__END__
+
+=head1 NAME
+
+symlink_script.pl - install script to create symbolic links
+
+=head1 SYNOPSIS
+
+  perl Build.pl
+  ./Build install
+
+=head1 DESCRIPTION
+
+Used during "./Build install". Only works if the script installation directory
+used during "perl Build.pl" matches that used for the actual installation during
+"./Build install". So if you install to a special place, do
+
+perl Build.pl --install_base /home/me
+./Build install
+
+not
+
+perl Build.pl
+./Build install --install_base /home/me
+
+This script will create a symlink to a script in that same directory. It was
+written to create a symlink with the name 'bp_pg_bulk_load_gff.pl' that targeted
+'bp_bulk_load_gff.pl' but can be extended by adding files to the
+%symlink_scripts hash.
+
+Perl function 'symlink' is used to keep the script from crashing on systems
+that don't allow symbolic linking.
+
+=head1 SEE ALSO
+
+=cut
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_scripts.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_scripts.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/symlink_scripts.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+#!perl
+use Config;
+use File::Basename qw(&basename &dirname);
+use Cwd;
+
+$origdir = cwd;
+chdir dirname($0);
+$file = basename( $0, '.PL', '.PLS' );
+$file .= $^O eq 'VMS' ? '.com' : '.pl';
+
+open OUT, ">$file" or die "Can't create $file: $!";
+
+print "Extracting $file (with variable substitutions)\n";
+
+print OUT "$Config{startperl}\n";
+
+print OUT <<'!NO!SUBS!';
+use strict;
+
+my %symlink_scripts = ('bp_bulk_load_gff.pl' => 'bp_pg_bulk_load_gff.pl');
+
+my $dir = $ARGV[0];
+
+foreach my $target ( keys ( %symlink_scripts ) ) {
+    unlink "$dir/".$symlink_scripts{$target} if -e "$dir/".$symlink_scripts{$target};
+    # place symlink in eval to catch error on systems that don't allow symlinks
+    eval { symlink( "$dir/$target", "$dir/".$symlink_scripts{$target} ); 1}
+        or print STDERR "Cannot create symbolic link named $dir/"
+            . $symlink_scripts{$target}
+            . " on your system for $dir/$target\n";
+}
+
+!NO!SUBS!
+close OUT or die "Can't close $file: $!";
+chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
+exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
+chdir $origdir;
+__END__
+
+=head1 NAME
+
+symlink_scripts.pl - install script to create symbolic links
+
+=head1 SYNOPSIS
+
+  perl symlink_scripts.pl
+
+=head1 DESCRIPTION
+
+Used during "make install".
+
+This script will create a symlink in the 'installscript' directory (as defined
+during install) to a script in that same directory.  It was written to create a
+symlink with the name 'bp_pg_bulk_load_gff.pl' that targeted
+'bp_bulk_load_gff.pl' but can be extended by adding files to the
+%symlink_scripts hash.
+
+Perl function 'symlink' is used to keep the script from crashing on systems
+that don't allow symbolic linking.
+
+=head1 SEE ALSO
+
+=cut
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ben Faga
+
+Email faga at cshl.edu
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/maintenance/version.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/maintenance/version.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/maintenance/version.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+# $Id: version.pl,v 1.3 2004/11/14 15:03:14 bosborne Exp $
+#
+=head1 version
+
+This script is to add or modify version declaration for each bioperl pm.
+
+[Currently, it just add version. Later I will update it to modify version.]
+
+=head1 USAGE
+
+  perl version.pl <module directory> <version>
+
+=cut
+
+use strict;
+
+if(@ARGV < 2) {
+    die "USAGE: perl version.pl <module directory> <version>\n";
+}
+my $dir=shift || "$ENV{HOME}/src/bioperl-live/";
+my $version=shift || '1.4';
+
+sub traveral_dir {
+    my ($dir, )=@_;
+    opendir DIR, $dir;
+    my @allfiles= grep{$_ ne '.' and $_ ne '..'}readdir DIR;
+    closedir DIR;
+    my @full_path = map{"$dir/$_"} @allfiles;
+    my @out = grep -f, @full_path;
+    foreach(grep -d, @full_path){
+        push @out, traveral_dir($_);
+    }
+    return @out;
+}
+
+my @pm=sort grep /\.pm$/, traveral_dir($dir);
+
+use ExtUtils::MakeMaker;
+
+map {
+    my $f=$_; 
+    my $v = MM->parse_version($f);
+    print "$v\t$f\n";
+    my $ep ='s/^(package\s+[\w:]+;\r?)$/$1\nour \$VERSION="'. $version.'";/';
+
+    if((not defined $v) or $v eq 'undef'){ # This is strange on parse_version. 
+    # It return scalar 'undef', not the undef can be detected by defined.
+        `perl -p -i -e '$ep' $f`;
+    }
+
+} @pm;
+

Added: trunk/packages/bioperl/branches/upstream/current/models/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+# $Id: README,v 1.3 2004/02/13 17:34:17 bosborne Exp $
+
+This is the README file for the BioPerl models directory.
+
+o What the models/ directory is about.
+
+This directory is for files about schemas and plans in BioPerl.
+
+
+o Are any of these files important?
+
+It is definitely worth having a look at file bioperl.dia - if you can
+open it. It gives you an overview of most important BioPerl modules.
+
+
+o How do I open these files?
+
+Most files in this directory are XML files for a program dia (with
+extension .dia) which is a free GTK-library based diagram editor. It
+is part of the GNOME desktop and is included in most GNU/LINUX
+distributions (or see http://www.lysator.liu.se/~alla/dia).
+
+
+o Are these up-to-date?
+
+These are Bioperl version 1.0 models.
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/models/biblio.dia
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/models/biblio.dia
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/models/bio_liveseq_variation.dia
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/models/bio_liveseq_variation.dia
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/models/bio_map.dia
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/models/bio_map.dia
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/models/bio_restriction.dia
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/bio_restriction.dia	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/bio_restriction.dia	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/"><dia:diagramdata><dia:attribute name="background"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="paper"><dia:composite type="paper"><dia:attribute name="name"><dia:string>#A4#</dia:string></dia:attribute><dia:attribute name="tmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="bmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="lmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="rmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="is_portrait"><dia:boolean val="true"/></dia:attribute><dia:attribute name="scaling"><dia:real val="1"/></dia:attribute><dia:attribute name="fitto"><dia:boolean val="false"/></dia:attribute></dia:composite></dia:attribute><dia:attribute name="grid"><dia:composite type="grid"><dia:attribute name="width_x"><dia:real val="1"/></dia:attribute><dia:attribute name="width_y"><dia:real val="1"/></dia:attribute><dia:attribute name="visible_x"><dia:int val="1"/></dia:attribute><dia:attribute name="visible_y"><dia:int val="1"/></dia:attribute></dia:composite></dia:attribute><dia:attribute name="guides"><dia:composite type="guides"><dia:attribute name="hguides"/><dia:attribute name="vguides"/></dia:composite></dia:attribute></dia:diagramdata><dia:layer name="Background" visible="true"><dia:object type="Standard - Box" version="0" id="O0"><dia:attribute name="obj_pos"><dia:point val="29.63,24.28"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="29.58,24.23;31.1166,25.63"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="29.63,24.28"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="1.43658"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="1.3"/></dia:attribute><dia:attribute name="border_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="show_background"><dia:boolean val="true"/></dia:attribute></dia:object><dia:object type="UML - LargePackage" version="0" id="O1"><dia:attribute name="obj_pos"><dia:point val="3.68658,4.05"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="3.63658,2.2;30.7366,25.3051"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="3.68658,4.05"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="27"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="21.2051"/></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Restriction#</dia:string></dia:attribute></dia:object><dia:object type="UML - Class" version="0" id="O2"><dia:attribute name="obj_pos"><dia:point val="15.45,4.65"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="15.4,4.6;19.1,6.9"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="15.45,4.65"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.6"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Analysis#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O3"><dia:attribute name="obj_pos"><dia:point val="8.875,11.55"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="8.825,11.5;12.325,13.8"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="8.875,11.55"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.4"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Enzyme#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O4"><dia:attribute name="obj_pos"><dia:point val="20.14,7.47"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="20.09,7.42;27.29,9.72"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="20.14,7.47"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="7.1"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#EnzymeCollection#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O5"><dia:attribute name="obj_pos"><dia:point val="22.99,11.55"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="22.94,11.5;24.44,13.8"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="22.99,11.55"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="1.4"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#IO#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - LargePackage" version="0" id="O6"><dia:attribute name="obj_pos"><dia:point val="4.45,17.15"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="4.4,15.3;16.3866,24.5"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="4.45,17.15"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="11.8866"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="7.3"/></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Restriction::Enzyme#</dia:string></dia:attribute></dia:object><dia:object type="UML - Class" version="0" id="O7"><dia:attribute name="obj_pos"><dia:point val="10.68,20.17"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="10.63,20.12;14.28,22.42"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="10.68,20.17"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.55"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#MultiSite#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O8"><dia:attribute name="obj_pos"><dia:point val="6.68,20.17"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="6.63,20.12;10.23,22.42"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="6.68,20.17"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.5"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#MultiCut#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - LargePackage" version="0" id="O9"><dia:attribute name="obj_pos"><dia:point val="17.7866,17.12"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="17.7366,15.27;30.1,24.55"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="17.7866,17.12"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="12.2634"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="7.38"/></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Restriction::IO#</dia:string></dia:attribute></dia:object><dia:object type="UML - Class" version="0" id="O10"><dia:attribute name="obj_pos"><dia:point val="22.565,17.67"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="22.515,17.62;24.865,19.92"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="22.565,17.67"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.25"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#base#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O11"><dia:attribute name="obj_pos"><dia:point val="20.43,21.77"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="20.38,21.72;23.18,24.02"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="20.43,21.77"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.7"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#itype2#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O12"><dia:attribute name="obj_pos"><dia:point val="23.98,21.77"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="23.93,21.72;27.63,24.02"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="23.98,21.77"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.6"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#withrefm#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O13"><dia:attribute name="obj_pos"><dia:point val="8.8,7.47"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="8.75,7.42;12.4,9.72"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="8.8,7.47"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.55"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#EnzymeI#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="true"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Generalization" version="0" id="O14"><dia:attribute name="obj_pos"><dia:point val="10.575,13.75"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="8.38,13.7;11.425,20.22"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="10.575,13.75"/><dia:point val="10.575,18.9"/><dia:point val="8.43,18.9"/><dia:point val="8.43,20.17"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O3" connection="6"/><dia:connection handle="1" to="O8" connection="1"/></dia:connections></dia:object><dia:object type="UML - Generalization" version="0" id="O15"><dia:attribute name="obj_pos"><dia:point val="10.575,13.75"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="9.725,13.7;12.505,20.22"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="10.575,13.75"/><dia:point val="10.575,18.92"/><dia:point val="12.455,18.92"/><dia:point val="12.455,20.17"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O3" connection="6"/><dia:connection handle="1" to="O7" connection="1"/></dia:connections></dia:object><dia:object type="UML - Generalization" version="0" id="O16"><dia:attribute name="obj_pos"><dia:point val="23.69,13.75"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="22.84,13.7;24.54,17.75"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,13.75"/><dia:point val="23.69,17"/><dia:point val="23.69,17"/><dia:point val="23.69,17.67"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O5" connection="6"/><dia:connection handle="1" to="O10" connection="1"/></dia:connections></dia:object><dia:object type="UML - Generalization" version="0" id="O17"><dia:attribute name="obj_pos"><dia:point val="23.69,19.87"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="22.84,19.82;25.83,22"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,19.87"/><dia:point val="23.69,21.25"/><dia:point val="25.78,21.25"/><dia:point val="25.78,21.77"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O10" connection="6"/><dia:connection handle="1" to="O12" connection="1"/></dia:connections></dia:object><dia:object type="UML - Generalization" version="0" id="O18"><dia:attribute name="obj_pos"><dia:point val="23.69,19.87"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="21.73,19.82;24.54,22"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,19.87"/><dia:point val="23.69,21.25"/><dia:point val="21.78,21.25"/><dia:point val="21.78,21.77"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O10" connection="6"/><dia:connection handle="1" to="O11" connection="1"/></dia:connections></dia:object><dia:object type="UML - Realizes" version="0" id="O19"><dia:attribute name="obj_pos"><dia:point val="10.575,9.67"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="9.725,9.62;11.425,11.8"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="10.575,9.67"/><dia:point val="10.575,11.05"/><dia:point val="10.575,11.05"/><dia:point val="10.575,11.55"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O13" connection="6"/><dia:connection handle="1" to="O3" connection="1"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O20"><dia:attribute name="obj_pos"><dia:point val="23.69,9.67"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="22.465,9.62;24.915,12.07"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,9.67"/><dia:point val="23.69,11.1551"/><dia:point val="23.69,11.1551"/><dia:point val="23.69,11.55"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#creates#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="true"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O4" connection="6"/><dia:connection handle="1" to="O5" connection="1"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O21"><dia:attribute name="obj_pos"><dia:point val="12.1366,12.3551"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="12.1366,8.92;20.89,14.7551"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="12.1366,12.3551"/><dia:point val="12.1366,12.3551"/><dia:point val="20.14,12.3551"/><dia:point val="20.14,9.67"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#contains#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="true"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="1"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="1" to="O4" connection="5"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O22"><dia:attribute name="obj_pos"><dia:point val="23.69,7.47"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="19,5.07;24.49,7.52"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,7.47"/><dia:point val="23.69,5.95"/><dia:point val="19.05,5.95"/><dia:point val="19.05,5.35"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#uses#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="true"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O4" connection="1"/><dia:connection handle="1" to="O2" connection="4"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O23"><dia:attribute name="obj_pos"><dia:point val="20.415,0.8"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="20.365,0.75;27.015,3.05"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="20.415,0.8"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="6.55"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Bio::PrimarySeqI#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="true"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="monospace" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="monospace" style="88" name="Courier"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="sans" style="80" name="Courier"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="sans" style="88" name="Courier"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Association" version="0" id="O24"><dia:attribute name="obj_pos"><dia:point val="23.69,3"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="19,2.95;24.49,5.95"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="23.69,3"/><dia:point val="23.69,5.15"/><dia:point val="19.05,5.15"/><dia:point val="19.05,5.35"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#cuts#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="true"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O23" connection="6"/><dia:connection handle="1" to="O2" connection="4"/></dia:connections></dia:object><dia:object type="Standard - Box" version="0" id="O25"><dia:attribute name="obj_pos"><dia:point val="3.2,0.55"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="3.15,0.5;4.68658,1.9"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="3.2,0.55"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="1.43658"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="1.3"/></dia:attribute><dia:attribute name="border_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="show_background"><dia:boolean val="true"/></dia:attribute></dia:object></dia:layer></dia:diagram>

Added: trunk/packages/bioperl/branches/upstream/current/models/bioperl.dia
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/models/bioperl.dia
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/models/coordinatemapper.dia
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/coordinatemapper.dia	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/coordinatemapper.dia	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/"><dia:diagramdata><dia:attribute name="background"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="paper"><dia:composite type="paper"><dia:attribute name="name"><dia:string>#A4#</dia:string></dia:attribute><dia:attribute name="tmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="bmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="lmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="rmargin"><dia:real val="2.8222"/></dia:attribute><dia:attribute name="is_portrait"><dia:boolean val="true"/></dia:attribute><dia:attribute name="scaling"><dia:real val="1"/></dia:attribute><dia:attribute name="fitto"><dia:boolean val="false"/></dia:attribute></dia:composite></dia:attribute><dia:attribute name="grid"><dia:composite type="grid"><dia:attribute name="width_x"><dia:real val="1"/></dia:attribute><dia:attribute name="width_y"><dia:real val="1"/></dia:attribute><dia:attribute name="visible_x"><dia:int val="1"/></dia:attribute><dia:attribute name="visible_y"><dia:int val="1"/></dia:attribute></dia:composite></dia:attribute><dia:attribute name="guides"><dia:composite type="guides"><dia:attribute name="hguides"/><dia:attribute name="vguides"/></dia:composite></dia:attribute></dia:diagramdata><dia:layer name="Background" visible="true"><dia:object type="UML - LargePackage" version="0" id="O0"><dia:attribute name="obj_pos"><dia:point val="1.70187,11.5356"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="1.65187,9.6856;52.4798,34.4655"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="1.70187,11.5356"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="50.7279"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="22.8799"/></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Coordinate#</dia:string></dia:attribute></dia:object><dia:object type="Standard - Box" version="0" id="O1"><dia:attribute name="obj_pos"><dia:point val="2.71269,22.9925"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="2.66269,22.9425;31.8999,33.7214"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="2.71269,22.9925"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="29.1372"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="10.6789"/></dia:attribute><dia:attribute name="inner_color"><dia:color val="#e5e5e5"/></dia:attribute><dia:attribute name="show_background"><dia:boolean val="true"/></dia:attribute></dia:object><dia:object type="UML - Class" version="0" id="O2"><dia:attribute name="obj_pos"><dia:point val="16.2505,14.8379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="16.2005,14.7879;19.7005,19.2879"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="16.2505,14.8379"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.4"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="4.4"/></dia:attribute><dia:attribute name="name"><dia:string>#MapperI#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="true"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="courier new" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="Arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"><dia:composite type="umloperation"><dia:attribute name="name"><dia:string>#map#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="type"><dia:string>##</dia:string></dia:attribute><dia:attribute name="visibility"><dia:enum val="0"/></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="inheritance_type"><dia:enum val="2"/></dia:attribute><dia:attribute name="query"><dia:boolean val="false"/></dia:attribute><dia:attribute name="class_scope"><dia:boolean val="false"/></dia:attribute><dia:attribute name="parameters"/></dia:composite><dia:composite type="umloperation"><dia:attribute name="name"><dia:string>#swap#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="type"><dia:string>##</dia:string></dia:attribute><dia:attribute name="visibility"><dia:enum val="0"/></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="inheritance_type"><dia:enum val="2"/></dia:attribute><dia:attribute name="query"><dia:boolean val="false"/></dia:attribute><dia:attribute name="class_scope"><dia:boolean val="false"/></dia:attribute><dia:attribute name="parameters"/></dia:composite><dia:composite type="umloperation"><dia:attribute name="name"><dia:string>#test#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="type"><dia:string>##</dia:string></dia:attribute><dia:attribute name="visibility"><dia:enum val="0"/></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="inheritance_type"><dia:enum val="2"/></dia:attribute><dia:attribute name="query"><dia:boolean val="false"/></dia:attribute><dia:attribute name="class_scope"><dia:boolean val="false"/></dia:attribute><dia:attribute name="parameters"/></dia:composite></dia:attribute><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O3"><dia:attribute name="obj_pos"><dia:point val="7.995,24"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="7.945,23.95;10.095,26.25"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="7.995,24"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.05"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Pair#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Realizes" version="0" id="O4"><dia:attribute name="obj_pos"><dia:point val="17.9505,19.2379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="8.97,19.1879;18.8005,24.05"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="17.9505,19.2379"/><dia:point val="17.9505,22.1472"/><dia:point val="9.02,22.1472"/><dia:point val="9.02,24"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O2" connection="6"/><dia:connection handle="1" to="O3" connection="1"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O5"><dia:attribute name="obj_pos"><dia:point val="15.8499,23.95"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="15.7999,23.9;20.1499,26.2"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="15.8499,23.95"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="4.25"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Collection#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Realizes" version="0" id="O6"><dia:attribute name="obj_pos"><dia:point val="17.9505,19.2379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="17.1005,19.1879;18.8005,24"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="17.9505,19.2379"/><dia:point val="17.9505,22.15"/><dia:point val="17.9749,22.15"/><dia:point val="17.9749,23.95"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O2" connection="6"/><dia:connection handle="1" to="O5" connection="1"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O7"><dia:attribute name="obj_pos"><dia:point val="16.2505,19.2379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="15.0999,19.1879;16.5999,24.7"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="16.2505,19.2379"/><dia:point val="16.2505,20.9415"/><dia:point val="15.8499,20.9415"/><dia:point val="15.8499,23.95"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="1"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O2" connection="5"/><dia:connection handle="1" to="O5" connection="0"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O8"><dia:attribute name="obj_pos"><dia:point val="5.53784,30.7466"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="5.48784,30.6966;12.4878,32.9966"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="5.53784,30.7466"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="6.9"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#ExtrapolatingPair#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Generalization" version="0" id="O9"><dia:attribute name="obj_pos"><dia:point val="9.02,26.2"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="8.17,26.15;9.87,30.7966"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="9.02,26.2"/><dia:point val="9.02,26.9"/><dia:point val="8.98784,26.9"/><dia:point val="8.98784,30.7466"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O3" connection="6"/><dia:connection handle="1" to="O8" connection="1"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O10"><dia:attribute name="obj_pos"><dia:point val="40.272,20.566"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.222,20.516;43.172,22.816"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="40.272,20.566"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.85"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Result#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O11"><dia:attribute name="obj_pos"><dia:point val="42.5678,28.992"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="42.5178,28.942;48.1178,31.242"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="42.5678,28.992"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="5.5"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Result::Match#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O12"><dia:attribute name="obj_pos"><dia:point val="35.1765,28.992"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="35.1265,28.942;40.1265,31.242"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="35.1765,28.992"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="4.9"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Result::Gap#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O13"><dia:attribute name="obj_pos"><dia:point val="37.314,36.6612"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="37.264,36.6112;45.864,38.9112"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="37.314,36.6612"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="8.5"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Location::Simple#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Generalization" version="0" id="O14"><dia:attribute name="obj_pos"><dia:point val="41.564,36.6612"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="37.5765,31.142;42.414,36.7565"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="41.564,36.6612"/><dia:point val="41.564,35.1565"/><dia:point val="37.6265,35.1565"/><dia:point val="37.6265,31.192"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O13" connection="1"/><dia:connection handle="1" to="O12" connection="6"/></dia:connections></dia:object><dia:object type="UML - Generalization" version="0" id="O15"><dia:attribute name="obj_pos"><dia:point val="41.564,36.6612"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.714,31.142;45.3678,36.7565"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="41.564,36.6612"/><dia:point val="41.564,35.1565"/><dia:point val="45.3178,35.1565"/><dia:point val="45.3178,31.192"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O13" connection="1"/><dia:connection handle="1" to="O11" connection="6"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O16"><dia:attribute name="obj_pos"><dia:point val="45.3178,28.992"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.947,22.016;45.3678,30.592"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="45.3178,28.992"/><dia:point val="45.3178,26.3164"/><dia:point val="41.697,26.3164"/><dia:point val="41.697,22.766"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="1"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O11" connection="1"/><dia:connection handle="1" to="O10" connection="6"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O17"><dia:attribute name="obj_pos"><dia:point val="7.995,24"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="7.245,9.06849;8.745,24.75"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="7.995,24"/><dia:point val="7.995,19.315"/><dia:point val="7.98808,19.315"/><dia:point val="7.98808,9.11849"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="1"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>#2#</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O3" connection="0"/><dia:connection handle="1" to="O29" connection="6"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O18"><dia:attribute name="obj_pos"><dia:point val="19.6505,17.1379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="19.6005,17.0879;40.2097,19.2158"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="19.6505,17.1379"/><dia:point val="19.6505,17.3348"/><dia:point val="40.1597,17.3348"/><dia:point val="40.1597,17.6158"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#create#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O2" connection="9"/><dia:connection handle="1" to="O27" connection="5"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O19"><dia:attribute name="obj_pos"><dia:point val="16.6205,30.7673"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="16.5705,30.7173;19.3705,33.0173"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="16.6205,30.7673"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.7"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Chain#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Generalization" version="0" id="O20"><dia:attribute name="obj_pos"><dia:point val="17.9749,26.15"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="17.1249,26.1;18.8249,30.8173"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="17.9749,26.15"/><dia:point val="17.9749,28.6526"/><dia:point val="17.9705,28.6526"/><dia:point val="17.9705,30.7673"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O5" connection="6"/><dia:connection handle="1" to="O19" connection="1"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O21"><dia:attribute name="obj_pos"><dia:point val="24.9723,27.0825"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="24.9223,27.0325;30.2223,29.3325"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="24.9723,27.0825"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="5.2"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#GeneMapper#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Realizes" version="0" id="O22"><dia:attribute name="obj_pos"><dia:point val="17.9505,19.2379"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="17.1005,19.1879;27.6223,27.1325"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="17.9505,19.2379"/><dia:point val="17.9505,22.1462"/><dia:point val="27.5723,22.1462"/><dia:point val="27.5723,27.0825"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O2" connection="6"/><dia:connection handle="1" to="O21" connection="1"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O23"><dia:attribute name="obj_pos"><dia:point val="40.7724,7.9105"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.7224,7.8605;48.4724,10.1605"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="40.7724,7.9105"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="7.65"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Location::Split#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Generalization" version="0" id="O24"><dia:attribute name="obj_pos"><dia:point val="44.5974,10.1105"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="43.072,10.0605;45.4474,23.3902"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="44.5974,10.1105"/><dia:point val="44.5974,21.7902"/><dia:point val="43.122,21.7902"/><dia:point val="43.122,20.566"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O23" connection="6"/><dia:connection handle="1" to="O10" connection="2"/></dia:connections></dia:object><dia:object type="UML - Association" version="0" id="O25"><dia:attribute name="obj_pos"><dia:point val="37.6265,28.992"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="37.5765,22.016;42.447,30.592"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="37.6265,28.992"/><dia:point val="37.6265,26.3164"/><dia:point val="41.697,26.3164"/><dia:point val="41.697,22.766"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="1"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O12" connection="1"/><dia:connection handle="1" to="O10" connection="6"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O26"><dia:attribute name="obj_pos"><dia:point val="9.45776,13.299"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="9.40776,13.249;14.6578,16.149"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="9.45776,13.299"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="5.15"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.8"/></dia:attribute><dia:attribute name="name"><dia:string>#Utils#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="true"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="courier new" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="Arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"><dia:composite type="umloperation"><dia:attribute name="name"><dia:string>#from_align#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="type"><dia:string>##</dia:string></dia:attribute><dia:attribute name="visibility"><dia:enum val="0"/></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="inheritance_type"><dia:enum val="2"/></dia:attribute><dia:attribute name="query"><dia:boolean val="false"/></dia:attribute><dia:attribute name="class_scope"><dia:boolean val="false"/></dia:attribute><dia:attribute name="parameters"/></dia:composite></dia:attribute><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Class" version="0" id="O27"><dia:attribute name="obj_pos"><dia:point val="40.1597,15.4158"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.1097,15.3658;43.2597,17.6658"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="40.1597,15.4158"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="3.05"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#ResultI#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Realizes" version="0" id="O28"><dia:attribute name="obj_pos"><dia:point val="41.6847,17.6158"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.8347,17.5658;42.5347,21.8267"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="41.6847,17.6158"/><dia:point val="41.6847,20.2267"/><dia:point val="41.697,20.2267"/><dia:point val="41.697,20.566"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O27" connection="6"/><dia:connection handle="1" to="O10" connection="1"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O29"><dia:attribute name="obj_pos"><dia:point val="3.73808,6.91849"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="3.68808,6.86849;12.2881,9.16849"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="3.73808,6.91849"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="8.5"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Bio::Location::Simple#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Realizes" version="0" id="O30"><dia:attribute name="obj_pos"><dia:point val="41.6847,17.6158"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="40.8347,17.5658;47.3879,28.9831"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="41.6847,17.6158"/><dia:point val="41.6847,19.6834"/><dia:point val="47.3379,19.6834"/><dia:point val="47.3379,28.9331"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O27" connection="6"/></dia:connections></dia:object><dia:object type="UML - Realizes" version="0" id="O31"><dia:attribute name="obj_pos"><dia:point val="41.6847,17.6158"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="36.3261,17.5658;42.5347,29.0538"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="41.6847,17.6158"/><dia:point val="41.6847,19.6624"/><dia:point val="36.3761,19.6624"/><dia:point val="36.3761,29.0038"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>##</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:connections><dia:connection handle="0" to="O27" connection="6"/></dia:connections></dia:object><dia:object type="UML - Class" version="0" id="O32"><dia:attribute name="obj_pos"><dia:point val="28.6446,18.5959"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="28.5946,18.5459;31.5946,20.8459"/></dia:attribute><dia:attribute name="elem_corner"><dia:point val="28.6446,18.5959"/></dia:attribute><dia:attribute name="elem_width"><dia:real val="2.9"/></dia:attribute><dia:attribute name="elem_height"><dia:real val="2.2"/></dia:attribute><dia:attribute name="name"><dia:string>#Graph#</dia:string></dia:attribute><dia:attribute name="stereotype"><dia:string>##</dia:string></dia:attribute><dia:attribute name="comment"><dia:string>##</dia:string></dia:attribute><dia:attribute name="abstract"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_attributes"><dia:boolean val="false"/></dia:attribute><dia:attribute name="suppress_operations"><dia:boolean val="false"/></dia:attribute><dia:attribute name="visible_attributes"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_operations"><dia:boolean val="true"/></dia:attribute><dia:attribute name="visible_comments"><dia:boolean val="false"/></dia:attribute><dia:attribute name="foreground_color"><dia:color val="#000000"/></dia:attribute><dia:attribute name="background_color"><dia:color val="#ffffff"/></dia:attribute><dia:attribute name="normal_font"><dia:font family="Courier New" style="0" name="Courier"/></dia:attribute><dia:attribute name="abstract_font"><dia:font family="Courier New" style="4" name="Courier-Oblique"/></dia:attribute><dia:attribute name="polymorphic_font"><dia:font family="monospace" style="8" name="Courier"/></dia:attribute><dia:attribute name="classname_font"><dia:font family="arial" style="80" name="Helvetica-Bold"/></dia:attribute><dia:attribute name="abstract_classname_font"><dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/></dia:attribute><dia:attribute name="comment_font"><dia:font family="sans" style="8" name="Courier"/></dia:attribute><dia:attribute name="font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="polymorphic_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="abstract_font_height"><dia:real val="0.8"/></dia:attribute><dia:attribute name="classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="abstract_classname_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="comment_font_height"><dia:real val="1"/></dia:attribute><dia:attribute name="attributes"/><dia:attribute name="operations"/><dia:attribute name="template"><dia:boolean val="false"/></dia:attribute><dia:attribute name="templates"/></dia:object><dia:object type="UML - Association" version="0" id="O33"><dia:attribute name="obj_pos"><dia:point val="30.1723,27.0825"/></dia:attribute><dia:attribute name="obj_bb"><dia:rectangle val="27.8585,20.7459;32.4085,28.6825"/></dia:attribute><dia:attribute name="orth_points"><dia:point val="30.1723,27.0825"/><dia:point val="30.1723,21.9316"/><dia:point val="30.0946,21.9316"/><dia:point val="30.0946,20.7959"/></dia:attribute><dia:attribute name="orth_orient"><dia:enum val="1"/><dia:enum val="0"/><dia:enum val="1"/></dia:attribute><dia:attribute name="name"><dia:string>#shortest path#</dia:string></dia:attribute><dia:attribute name="direction"><dia:enum val="0"/></dia:attribute><dia:attribute name="ends"><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite><dia:composite><dia:attribute name="role"><dia:string>##</dia:string></dia:attribute><dia:attribute name="multiplicity"><dia:string>##</dia:string></dia:attribute><dia:attribute name="arrow"><dia:boolean val="false"/></dia:attribute><dia:attribute name="aggregate"><dia:enum val="0"/></dia:attribute></dia:composite></dia:attribute><dia:connections><dia:connection handle="0" to="O21" connection="2"/><dia:connection handle="1" to="O32" connection="6"/></dia:connections></dia:object></dia:layer></dia:diagram>

Added: trunk/packages/bioperl/branches/upstream/current/models/map_proposal.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/map_proposal.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/map_proposal.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,150 @@
+
+Map data is a critical component in many aspects of genetics and
+biological research.   Well defined toolkits for manipulating map data
+do not exist at this point, we propose to build a system for
+manipulating most types of map data (Genetic, RH, RFLP, Sequence, and
+LD).  
+
+Map Proposal
+
+This document proposes an object heirarchy for maps, markers, and
+their manipulation. 
+
+Key Points
+* A Map is an object which contains mapable elements.
+* A Map can be defined for a given organism or population of individuals.
+* A Mappable element is an element with a position within a map.
+
+Background information
+ Maps are made up of elements which are mappable.  This includes
+ genetic and physical markers.  
+
+ A genetic map consists of markers which have a given recombination
+ distance between them.  This distance is usually given as
+ centi-morgans or 1% recombination between them.  Other distances
+ include ... Examples of these are the publicly available
+ Marshfield and Genethon maps.  
+
+ Radiation hybrid maps consist of markers which have been mapped to
+ radiation hybrid panels.  Typically these markers are STSes which
+ have been processed on RH panels.  The distance between markers is
+ calculated in centi-Rads which represent .  Examples of these include
+ Whitehead STS, GeneMap '99.
+
+ Restriction Enzyme (RE) maps are used to describe RE cut points in a
+ given sequence and can be used to "fingerprint" sections of DNA
+ (typically BAC clones).  Clones which share a statitistically (based
+ on known frequency of RE cutting) signifigant collection fingerprints
+ are likely to overlap.  Additionally 
+
+ Physical maps or BAC/PAC/YAC maps represent clone fragment overlap.
+ These maps are used to to represent how clones overlap and form a
+ consensus sequence of a genomic or cDNA region.
+ 
+ Sequence maps represent the known consensus sequence for a given
+ region of typically genomic DNA.
+
+ LD and Haplotype maps ...
+ 
+ Comparisions between maps from different organisms can yield useful
+ observations about trends in evolution.  Additionally comparisons of
+ maps for the same species can provide insight into information such
+ as recombination hot spots and DNA stability.
+
+Object proposal
+ Maps are objects which are made up of mappable elements.  A mappable
+ element has a position on a map and can be tested for equality and
+ relative position to other mappable element positions. 
+ 
+ These are some baseline interface and object definitions.  Other work
+ has been done by Philip Lijnzaad, Emmanuel Barillot and OMG folks to
+ create definitions for maps.
+
+ Interfaces
+  Bio::IdentifiableI 
+    string    getID // unique identifier -- this goes with Juha's
+                    // identifiable property?
+   
+  Bio::NameableI
+    string    getName
+    
+  Bio::AliasableI isa Bio::NameableI
+    string    getAliases
+
+
+  Bio::Map::MapI isa Bio::NameableI isa Bio::Identifiable
+    MapIterator	       getAllElements // for in-order iterator access)
+    ?Bio::ChromosomeI? chromosome     // Should maps be build one per
+                                      // chromosome aggregated for
+				      // a whole report set.
+    Bio::SpeciesI      species        // use existing BP species object
+				      // which may need to be more robust
+    numeric	       length         // not sure what to return for
+                                      // relative or RFLP maps
+    string	       units          // Map units
+    string	       name	      // Map Name
+
+
+  Bio::Map::MappableI 
+   // Where to handle the fact that RFLP 
+   // Markers have multiple Map positions
+    PositionI position(MapI) 
+    boolean   equals(MappableI)
+    boolean   less_than(MappableI)
+    boolean   greater_than(MappableI)
+
+    Bio::Map::PositionI 
+     // may be undef to handle relative maps [RE].  
+     // This is where a known position for a marker can be retrieved  
+     // Multiple positions are possible for RE on a sequence map
+     Array<string>  positionValues  
+  
+  Bio::MarkerI isa Bio::MappableI isa Bio::AliasableI
+
+  // heikki to help fill in Variant and Allele information
+  Bio::LiveSeq::AlleleI
+
+  Bio::LiveSeq::VariantI isa Bio::MarkerI
+    Bio::PrimarySeqI getFwdPrimer()
+    Bio::PrimarySeqI getRevPrimer()
+    // I assume there should always be a primary set of 
+    // of markers which defined start/end points 
+    // should this be hidden inside more methods to 
+    // handle RFLP, etc?
+    Bio::LiveSeq::AlleleI getAlleles()
+     
+ Implementations
+   Bio::Marker::RestrictionEnzyme isa Bio::MarkerI
+   Bio::Marker::STS isa Bio::MarkerI
+   Bio::Marker::Microsat isa Bio::LiveSeq::VariantI
+   Bio::Marker::CytogeneticBand isa Bio::MarkerI
+   Bio::Marker::VLTR isa Bio::MarkerI
+   Bio::Marker::SNP
+   Bio::Bin 
+   
+   Bio::Map::Cytogenetic isa Bio::Map::MapI 
+     
+   Bio::Map::RadiationHybrid
+   Bio::Map::Genetic
+   Bio::Map::GeneticMap 
+     string	       getSex         // code as a string? - only 
+   Bio::Map::RFLP
+   Bio::Map::Sequence // Should probably be Bio::Assembly or these two
+                      // need to work together Sequence Map could be 
+		      // be built with Bio::Assemblies
+   Bio::Map::Haplotype // what would this entail -- SNP components?
+   
+
+Caveats, questions, etc
+-----------------------
+Namespace is very flexible here.  
+
+An important useful result of this toolkit will be the ability to
+programatically go from one map to another.  So Querying Maps for a
+marker - perhaps based on that marker's unique id will allow on to
+compare distances on different maps or go from genetic to sequence
+maps very easily.  
+
+Not sure if we should be doing a Bio::ChromosomeI or can just code
+with a string/numeric?  Does Polyploidy cause any problems in maps or
+just in population/allele issues?

Added: trunk/packages/bioperl/branches/upstream/current/models/maps_and_markers.dia
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/models/maps_and_markers.dia
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/models/popgen.dia
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/popgen.dia	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/popgen.dia	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4571 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+  <dia:diagramdata>
+    <dia:attribute name="background">
+      <dia:color val="#ffffff"/>
+    </dia:attribute>
+    <dia:attribute name="pagebreak">
+      <dia:color val="#000099"/>
+    </dia:attribute>
+    <dia:attribute name="paper">
+      <dia:composite type="paper">
+        <dia:attribute name="name">
+          <dia:string>#A4#</dia:string>
+        </dia:attribute>
+        <dia:attribute name="tmargin">
+          <dia:real val="2.82"/>
+        </dia:attribute>
+        <dia:attribute name="bmargin">
+          <dia:real val="2.82"/>
+        </dia:attribute>
+        <dia:attribute name="lmargin">
+          <dia:real val="2.82"/>
+        </dia:attribute>
+        <dia:attribute name="rmargin">
+          <dia:real val="2.82"/>
+        </dia:attribute>
+        <dia:attribute name="is_portrait">
+          <dia:boolean val="false"/>
+        </dia:attribute>
+        <dia:attribute name="scaling">
+          <dia:real val="0.265177"/>
+        </dia:attribute>
+        <dia:attribute name="fitto">
+          <dia:boolean val="true"/>
+        </dia:attribute>
+        <dia:attribute name="fitwidth">
+          <dia:int val="1"/>
+        </dia:attribute>
+        <dia:attribute name="fitheight">
+          <dia:int val="1"/>
+        </dia:attribute>
+      </dia:composite>
+    </dia:attribute>
+    <dia:attribute name="grid">
+      <dia:composite type="grid">
+        <dia:attribute name="width_x">
+          <dia:real val="1"/>
+        </dia:attribute>
+        <dia:attribute name="width_y">
+          <dia:real val="1"/>
+        </dia:attribute>
+        <dia:attribute name="visible_x">
+          <dia:int val="1"/>
+        </dia:attribute>
+        <dia:attribute name="visible_y">
+          <dia:int val="1"/>
+        </dia:attribute>
+        <dia:composite type="color"/>
+      </dia:composite>
+    </dia:attribute>
+    <dia:attribute name="color">
+      <dia:color val="#d8e5e5"/>
+    </dia:attribute>
+    <dia:attribute name="guides">
+      <dia:composite type="guides">
+        <dia:attribute name="hguides"/>
+        <dia:attribute name="vguides"/>
+      </dia:composite>
+    </dia:attribute>
+  </dia:diagramdata>
+  <dia:layer name="Background" visible="true">
+    <dia:object type="UML - LargePackage" version="0" id="O0">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-42.4451,-26.8545"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-42.4951,-27.9045;48.2367,5.431"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-42.4451,-26.8545"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="90.6318"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="32.2355"/>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen#</dia:string>
+      </dia:attribute>
+    </dia:object>
+    <dia:object type="Standard - Box" version="0" id="O1">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-42.4451,-26.8545"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-42.4951,-26.9045;48.1875,5.431"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-42.4451,-26.8545"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="90.5826"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="32.2355"/>
+      </dia:attribute>
+      <dia:attribute name="inner_color">
+        <dia:color val="#e5e5e5"/>
+      </dia:attribute>
+      <dia:attribute name="show_background">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O2">
+      <dia:attribute name="obj_pos">
+        <dia:point val="16.5939,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="16.5439,-13.8265;27.7439,0.8735"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="16.5939,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="11.1"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="14.6"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::Population#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#description#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#source#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#set_Allele_Frequency#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#add_Individual#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#remove_Individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_marker_names#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Marker#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_number_individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#set_number_individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Frequency_Homozygotes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Frequency_Heterozygotes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O3">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-40.7856,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-40.8356,-13.8265;-31.0356,-2.1265"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-40.7856,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.7"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="11.6"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::Statistics#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#fu_and_li_D#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#fu_and_li_D_star#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#fu_and_li_F#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#fu_and_li_F_star#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#tajima_D#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#pi#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#theta#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#singleton_count#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#segregating_sites_count#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#heterozygosity#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#derived_mutations#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#composite_LD#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O4">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-12.3681,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-12.4181,-13.8265;-2.9681,-3.9265"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-12.3681,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.35"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="9.8"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::Marker#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#description#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#type#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#unique_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Allele_Frequencies#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#add_Allele_Frequency#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#reset_alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O5">
+      <dia:attribute name="obj_pos">
+        <dia:point val="33.9341,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="33.8841,-13.8265;43.3341,-3.9265"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="33.9341,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.35"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="9.8"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::Individual#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#unique_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#num_of_results#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#add_Genotype#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#reset_Genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#remove_Genotype#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#has_Marker#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_marker_names#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O6">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-40.4856,-1.10183"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-40.5356,-1.15183;-31.3356,3.94817"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-40.4856,-1.10183"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.1"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="5"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::PopStats#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#haploid_status#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#Fst#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O7">
+      <dia:attribute name="obj_pos">
+        <dia:point val="2.69832,-25.1315"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="2.64832,-25.1815;12.1983,-20.6815"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="2.69832,-25.1315"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.45"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="4.4"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::GenotypeI#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#marker_name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#individual_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O8">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-12.3681,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-12.4181,-25.2813;-2.9681,-17.5813"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-12.3681,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.35"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="7.6"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::MarkerI#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#description#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#type#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#unique_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#annotation#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Allele_Frequencies#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O9">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-25.8138,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-25.8638,-13.8265;-17.4638,0.8735"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-25.8138,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="8.3"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="14.6"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::IO#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#_initialize#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="1"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#next_individual#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#next_population#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#write_individual#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#write_population#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#newFh#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#fh#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#_load_format_module#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="1"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#_guess_format#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="1"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#close#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#DESTROY#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#TIEHANDLE#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#READLINE#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#PRINT#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O10">
+      <dia:attribute name="obj_pos">
+        <dia:point val="17.1689,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="17.1189,-25.2813;27.1689,-15.9813"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="17.1689,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.95"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="9.2"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::PopulationI#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#description#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#source#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Marker#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_marker_names#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Markers#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_number_individuals#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O11">
+      <dia:attribute name="obj_pos">
+        <dia:point val="33.8341,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="33.7841,-25.2813;43.4341,-18.3813"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="33.8341,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.55"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="6.8"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::IndividualI#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#unique_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#num_genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#num_of_results#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Genotypes#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#has_Marker#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_marker_names#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O12">
+      <dia:attribute name="obj_pos">
+        <dia:point val="2.79832,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="2.74832,-13.8265;12.0983,-6.3265"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="2.79832,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="9.25"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="7.4"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::PopGen::Genotype#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="courier new" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes">
+        <dia:composite type="umlattribute">
+          <dia:attribute name="name">
+            <dia:string>#self#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="operations">
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#new#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#marker_name#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#individual_id#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#get_Alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#add_Allele#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+        <dia:composite type="umloperation">
+          <dia:attribute name="name">
+            <dia:string>#reset_Alleles#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="stereotype">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="visibility">
+            <dia:enum val="0"/>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="abstract">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="inheritance_type">
+            <dia:enum val="2"/>
+          </dia:attribute>
+          <dia:attribute name="query">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="class_scope">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="parameters"/>
+        </dia:composite>
+      </dia:attribute>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O13">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-11.1431,-34.5551"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-11.1931,-34.6051;-4.1931,-32.3051"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-11.1431,-34.5551"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="6.9"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="2.2"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::AnnotatableI#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="Courier New" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations"/>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Class" version="0" id="O14">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-24.3638,-34.5551"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-24.4138,-34.6051;-18.9138,-32.3051"/>
+      </dia:attribute>
+      <dia:attribute name="elem_corner">
+        <dia:point val="-24.3638,-34.5551"/>
+      </dia:attribute>
+      <dia:attribute name="elem_width">
+        <dia:real val="5.4"/>
+      </dia:attribute>
+      <dia:attribute name="elem_height">
+        <dia:real val="2.2"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>#Bio::Root::IO#</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="comment">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="abstract">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_attributes">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="suppress_operations">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="visible_attributes">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_operations">
+        <dia:boolean val="true"/>
+      </dia:attribute>
+      <dia:attribute name="visible_comments">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="foreground_color">
+        <dia:color val="#000000"/>
+      </dia:attribute>
+      <dia:attribute name="background_color">
+        <dia:color val="#ffffff"/>
+      </dia:attribute>
+      <dia:attribute name="normal_font">
+        <dia:font family="Courier New" style="0" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font">
+        <dia:font family="Courier New" style="4" name="Courier-Oblique"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font">
+        <dia:font family="monospace" style="8" name="Courier"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font">
+        <dia:font family="arial" style="80" name="Helvetica-Bold"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font">
+        <dia:font family="Arial" style="84" name="Helvetica-BoldOblique"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font">
+        <dia:font family="sans" style="8" name="Helvetica"/>
+      </dia:attribute>
+      <dia:attribute name="font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="polymorphic_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_font_height">
+        <dia:real val="0.8"/>
+      </dia:attribute>
+      <dia:attribute name="classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="abstract_classname_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="comment_font_height">
+        <dia:real val="1"/>
+      </dia:attribute>
+      <dia:attribute name="attributes"/>
+      <dia:attribute name="operations"/>
+      <dia:attribute name="template">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="templates"/>
+    </dia:object>
+    <dia:object type="UML - Generalization" version="0" id="O15">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-21.6638,-32.3551"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-22.5138,-32.4051;-20.8138,-13.7265"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="-21.6638,-32.3551"/>
+        <dia:point val="-21.6638,-29.2647"/>
+        <dia:point val="-21.6638,-29.2647"/>
+        <dia:point val="-21.6638,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O14" connection="6"/>
+        <dia:connection handle="1" to="O9" connection="1"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="UML - Generalization" version="0" id="O16">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-7.6931,-32.3551"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-8.5431,-32.4051;-6.8431,-25.1813"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="-7.6931,-32.3551"/>
+        <dia:point val="-7.6931,-30.0086"/>
+        <dia:point val="-7.6931,-30.0086"/>
+        <dia:point val="-7.6931,-25.2313"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O13" connection="6"/>
+        <dia:connection handle="1" to="O8" connection="1"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="UML - Association" version="0" id="O17">
+      <dia:attribute name="obj_pos">
+        <dia:point val="16.5939,-5.2765"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-3.08881,-5.3265;16.6939,-3.6765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="16.5939,-5.2765"/>
+        <dia:point val="16.5939,-5.2765"/>
+        <dia:point val="-3.0181,-5.2765"/>
+        <dia:point val="-3.0181,-5.2765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="direction">
+        <dia:enum val="0"/>
+      </dia:attribute>
+      <dia:attribute name="ends">
+        <dia:composite>
+          <dia:attribute name="role">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="multiplicity">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="arrow">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="aggregate">
+            <dia:enum val="0"/>
+          </dia:attribute>
+        </dia:composite>
+        <dia:composite>
+          <dia:attribute name="role">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="multiplicity">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="arrow">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="aggregate">
+            <dia:enum val="0"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O2" connection="24"/>
+        <dia:connection handle="1" to="O4" connection="25"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="Standard - Text" version="0" id="O18">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-32.2657,-31.1198"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-36.9407,-32.4198;-27.5907,-29.9198"/>
+      </dia:attribute>
+      <dia:attribute name="text">
+        <dia:composite type="text">
+          <dia:attribute name="string">
+            <dia:string>#Bio::PopGen#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="font">
+            <dia:font family="courier new" style="80" name="Courier-Bold"/>
+          </dia:attribute>
+          <dia:attribute name="height">
+            <dia:real val="2"/>
+          </dia:attribute>
+          <dia:attribute name="pos">
+            <dia:point val="-32.2657,-31.1198"/>
+          </dia:attribute>
+          <dia:attribute name="color">
+            <dia:color val="#000000"/>
+          </dia:attribute>
+          <dia:attribute name="alignment">
+            <dia:enum val="1"/>
+          </dia:attribute>
+        </dia:composite>
+      </dia:attribute>
+    </dia:object>
+    <dia:object type="UML - Realizes" version="0" id="O19">
+      <dia:attribute name="obj_pos">
+        <dia:point val="7.42332,-20.7315"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="6.57332,-20.7815;8.27332,-13.7265"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="7.42332,-20.7315"/>
+        <dia:point val="7.42332,-16.4134"/>
+        <dia:point val="7.42332,-16.4134"/>
+        <dia:point val="7.42332,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O7" connection="6"/>
+        <dia:connection handle="1" to="O12" connection="1"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="UML - Realizes" version="0" id="O20">
+      <dia:attribute name="obj_pos">
+        <dia:point val="-7.6931,-17.6313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="-8.5431,-17.6813;-6.8431,-12.1318"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="-7.6931,-17.6313"/>
+        <dia:point val="-7.6931,-13.7318"/>
+        <dia:point val="-7.6931,-13.7318"/>
+        <dia:point val="-7.6931,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O8" connection="6"/>
+        <dia:connection handle="1" to="O4" connection="1"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="UML - Realizes" version="0" id="O21">
+      <dia:attribute name="obj_pos">
+        <dia:point val="38.6091,-18.4313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="37.7591,-18.4813;39.4591,-12.434"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="38.6091,-18.4313"/>
+        <dia:point val="38.6091,-14.034"/>
+        <dia:point val="38.6091,-14.034"/>
+        <dia:point val="38.6091,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O11" connection="6"/>
+        <dia:connection handle="1" to="O5" connection="1"/>
+      </dia:connections>
+    </dia:object>
+    <dia:object type="UML - Realizes" version="0" id="O22">
+      <dia:attribute name="obj_pos">
+        <dia:point val="22.1439,-16.0313"/>
+      </dia:attribute>
+      <dia:attribute name="obj_bb">
+        <dia:rectangle val="21.2939,-16.0813;22.9939,-11.3755"/>
+      </dia:attribute>
+      <dia:attribute name="orth_points">
+        <dia:point val="22.1439,-16.0313"/>
+        <dia:point val="22.1439,-12.9755"/>
+        <dia:point val="22.1439,-12.9755"/>
+        <dia:point val="22.1439,-13.7765"/>
+      </dia:attribute>
+      <dia:attribute name="orth_orient">
+        <dia:enum val="1"/>
+        <dia:enum val="0"/>
+        <dia:enum val="1"/>
+      </dia:attribute>
+      <dia:attribute name="orth_autoroute">
+        <dia:boolean val="false"/>
+      </dia:attribute>
+      <dia:attribute name="name">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:attribute name="stereotype">
+        <dia:string>##</dia:string>
+      </dia:attribute>
+      <dia:connections>
+        <dia:connection handle="0" to="O10" connection="6"/>
+        <dia:connection handle="1" to="O2" connection="1"/>
+      </dia:connections>
+    </dia:object>
+  </dia:layer>
+</dia:diagram>

Added: trunk/packages/bioperl/branches/upstream/current/models/population_proposal.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/models/population_proposal.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/models/population_proposal.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,29 @@
+A toolkit for manipulating population data is critical for population
+geneticists, epidemiologists, evo-devo and others.
+
+
+This proposal will outline basic set of objects which are useful for
+manipulating population data.  Our primary driving force will be to
+process the data generated from the SNP haplotype project.
+
+
+Population Objects
+
+Bio::Population namespace
+
+Bio::Population::HaplotypeCohortI  a collection (likely) co-inherited
+			             markers
+Bio::Population::HaplotypeI  -- a score for a particular haplotype
+			        cohort or an individual
+Bio::Population::GenotypeI   -- a score value for a marker for an individual 
+Bio::Population::FrequencyI  -- an allele frequency in a Population
+Bio::Population::PopulationI -- group of unrelated individuals
+Bio::Population::PedigreeI   -- group of related individuals w/ relationships
+Bio::Population::IndividualI -- a single identifiable entity with distinct
+				genotypes for markers
+Bio::Population::PhenotypeI  -- a trait associated with an individual 
+
+Relationships
+--------------- 
+A PedigreeI isa PopulationI
+A PopulationI can contain other PopulationIs

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,35 @@
+These are utilities that go with the Bio::DB::GFF module.
+
+examples/Bio-DB-GFF/load_gff.PLS:
+
+Slow but foolproof GFF table to database loader.
+
+examples/Bio-DB-GFF/bulk_load_gff.PLS:      
+
+Fast loader, but wipes database clean each time.
+
+examples/Bio-DB-GFF/fast_load_gff.PLS:
+
+EXPERIMENTAL incremental loader that is as fast as bulk loader, but only works on Unix platforms.
+
+scripts/Bio-DB-GFF/generate_histogram.PLS: 
+
+Generate histogram of sequence features, for use with generic genome browser.
+
+scripts/Bio-DB-GFF/process_gadfly.PLS:     
+
+Tweak GadFly GFF files for loading (drosophila).
+
+scripts/Bio-DB-GFF/process_wormbase.PLS:   
+
+Tweak WormBase GFF files for loading (elegans).
+
+scripts/Bio-DB-GFF/process_sgd.PLS:        
+
+Turn SGD feature dumps into GFF files for loading (saccharomyces).
+
+scripts/Bio-DB-GFF/process_ncbi_human.PLS: 
+
+Turn NCBI feature dumps into GFF files for loading (human).
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4 @@
+These are scripts that go with the Bio::DB::GFF module, a basic
+seqfeature database.  Install these scripts if you wish to use the
+LDAS distributed annotation server or the Generic Genome Browser.
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bp_genbank2gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bp_genbank2gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bp_genbank2gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,309 @@
+#!/usr/bin/perl -w
+
+use lib '.';
+
+# $Id: bp_genbank2gff.PLS,v 1.14 2004/05/05 13:37:12 scain Exp $
+use strict;
+use Bio::DB::GFF;
+use Getopt::Long;
+
+=head1 NAME
+
+bp_genbank2gff.pl - Load a Bio::DB::GFF database from GENBANK files.
+
+=head1 SYNOPSIS
+
+  % bp_genbank2gff.pl -d genbank -f localfile.gb
+  % bp_genbank2gff.pl -d genbank --accession AP003256
+  % bp_genbank2gff.pl --accession AP003256 --stdout
+
+=head1 DESCRIPTION
+
+This script loads a Bio::DB::GFF database with the features contained
+in a either a local genbank file or an accession that is fetched from
+genbank.  Various command-line options allow you to control which
+database to load and whether to allow an existing database to be
+overwritten.
+
+The database must already have been created and the current user must
+have appropriate INSERT and UPDATE privileges.  The --create option
+will initialize a new database with the appropriate schema, deleting
+any tables that were already there.
+
+=head1 COMMAND-LINE OPTIONS
+
+Command-line options can be abbreviated to single-letter options.
+e.g. -d instead of --database.
+
+   --create                 Force creation and initialization of database
+   --dsn       <dsn>        Data source (default dbi:mysql:test)
+   --user      <user>       Username for mysql authentication
+   --pass      <password>   Password for mysql authentication
+   --proxy     <proxy>      Proxy server to use for remote access
+   --stdout                 direct output to STDOUT
+   --adaptor   <adaptor>    adaptor to use (eg dbi::mysql, dbi::pg, dbi::oracle)   --viral                  the genome you are loading is viral (changes tag
+                                 choices)
+   --source    <source>     source field for features ['genbank']
+    EITHER --file           Arguments that follow are Genbank/EMBL file names
+    OR --gb_folder          What follows is a folder full of gb files to process    OR --accession          Arguments that follow are genbank accession numbers
+                                 (not gi!)
+    OR --acc_file           Accession numbers (not gi!) in a file (one per line,                                 no punc.)
+    OR --acc_pipe           Accession numbers (not gi!) from a STDIN pipe (one
+                                 per line)
+
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Scott Cain, cain at cshl.org
+
+Copyright (c) 2003 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::biofetch_to_stdout;
+use CGI 'escape';
+use Bio::DB::GFF::Util::Rearrange;
+use Bio::DB::GFF::Adaptor::biofetch;
+use vars '@ISA';
+ at ISA = 'Bio::DB::GFF::Adaptor::biofetch';
+
+sub load_gff_line {
+  my ($self,$options) = @_;
+  # synthesize GFF3-compatible line
+  my @attributes;
+  if (my $id = $options->{gname}) {
+    my $parent = $id;
+    $parent    =~ s/\..\d+$//                  if     $options->{method} =~ /^(mRNA|transcript|exon|gene)$/;
+    push @attributes,"Parent=".escape($parent) if     $options->{method} =~ /^(variation|exon|CDS|transcript|mRNA|coding)$/;
+    push @attributes,"ID=".escape($id)         unless $options->{method} =~ /^(exon|CDS)$/;
+  }
+  if (my $tstart = $options->{tstart}) {
+    my $tstop    = $options->{tstop};
+    my $target   = escape($options->{gname});
+    push @attributes,"Target=$target+$tstart+$tstop";
+  }
+  my %a;
+  if (my $attributes = $options->{attributes}) {
+    for my $a (@$attributes) {
+      my ($tag,$value) = @$a;
+      push @{$a{escape($tag)}},escape($value);
+    }
+    for my $a (keys %a) {
+       push @attributes,"$a=".join(',',@{$a{$a}});
+    }
+  }
+  ${$options}{'score'} = "." unless ${$options}{'score'};
+  ${$options}{'strand'} = "." unless ${$options}{'strand'};
+  ${$options}{'phase'} = "." unless ${$options}{'phase'};
+  my $last_column = join ';', at attributes;
+  if ($options->{method} eq 'origin') {
+     print "##sequence-region $options->{gname} $options->{start} $options->{stop}\n";
+  }
+  print join("\t",@{$options}{qw(ref source method start stop score strand phase)},$last_column),"\n";
+}
+
+sub load_sequence_string {
+  my $self = shift;
+  my ($acc,$seq)  = @_;
+  return unless $seq;
+  $seq =~ s/(.{1,60})/$1\n/g;
+  print ">$acc\n\L$seq\U\n";
+}
+
+sub setup_load {
+   my $self = shift;
+   print "##gff-version 3\n";
+}
+
+sub finish_load { }
+
+1;
+
+package main;
+
+my $USAGE = <<USAGE;
+
+Usage: $0 [options] [<gff file 1> <gff file 2>] ...
+Load a Bio::DB::GFF database from GFF files.
+
+ Options:
+   --create                 Force creation and initialization of database
+   --dsn       <dsn>        Data source (default dbi:mysql:test)
+   --user      <user>       Username for mysql authentication
+   --pass      <password>   Password for mysql authentication
+   --proxy     <proxy>      Proxy server to use for remote access
+   --stdout                 direct output to STDOUT
+   --adaptor   <adaptor>    adaptor to use (eg dbi::mysql, dbi::pg, dbi::oracle)
+   --viral                  the genome you are loading is viral (changes tag
+                                 choices)
+   --source    <source>     source field for features ['genbank']
+    EITHER --file           Arguments that follow are Genbank/EMBL file names
+    OR --gb_folder          What follows is a folder full of gb files to process
+    OR --accession          Arguments that follow are genbank accession numbers
+                                 (not gi!)
+    OR --acc_file           Accession numbers (not gi!) in a file (one per line,
+                                 no punc.) 
+    OR --acc_pipe           Accession numbers (not gi!) from a STDIN pipe (one
+                                 per line)   
+
+
+This script loads a Bio::DB::GFF database with the features contained
+in a either a local genbank file or an accession that is fetched from
+genbank.  Various command-line options allow you to control which
+database to load and whether to allow an existing database to be
+overwritten.
+
+USAGE
+;
+
+my ($DSN,$ADAPTOR,$CREATE,$USER,$VIRAL,$PASSWORD,$gbFOLDER,
+    $FASTA,$ACC,$accFILE, $accPIPE, $FILE,$PROXY,$STDOUT,$SOURCE);
+
+
+GetOptions (
+            'dsn:s'       => \$DSN,
+            'user:s'      => \$USER,
+            'password:s'  => \$PASSWORD,
+            'adaptor:s'   => \$ADAPTOR,
+            'accession'   => \$ACC,
+            'file'        => \$FILE,
+            'viral'       => \$VIRAL,
+            'acc_file'    => \$accFILE,
+            'acc_pipe'    => \$accPIPE,
+	    'source:s'    => \$SOURCE,
+            'gb_folder=s' => \$gbFOLDER,
+            'proxy:s'     => \$PROXY,
+            'stdout'      => \$STDOUT,
+            'create'      => \$CREATE) or die $USAGE;
+
+
+die $USAGE unless ($DSN || $STDOUT);  # at a minimum we need to have a place to write to!
+
+# some local defaults
+$DSN     ||= 'dbi:mysql:test';
+$ADAPTOR ||= $STDOUT ? 'memory' : 'dbi::mysql';
+
+# Ensure that biofetch inherits from the "right" adaptor.
+# This is a horrible hack and should be fixed.
+eval "use Bio::DB::GFF::Adaptor::${ADAPTOR}";
+local @Bio::DB::GFF::Adaptor::biofetch::ISA = "Bio::DB::GFF::Adaptor::${ADAPTOR}";
+
+my $biofetch = $STDOUT ? 'biofetch_to_stdout' : 'biofetch';
+my @dsn      = $STDOUT ? () : (-dsn => $DSN);
+
+my @auth;
+push @auth,(-user=>$USER)     if defined $USER;
+push @auth,(-pass=>$PASSWORD) if defined $PASSWORD;
+push @auth,(-proxy=>$PROXY)   if defined $PROXY;
+
+my %preferred_tags = (
+		      strain        => 10,
+		      organism      => 20,
+		      protein_id    => 40,
+		      locus_tag     => 50,
+		      locus         => 60,
+		      gene          => 70,
+		      standard_name => 80,
+                     );
+$preferred_tags{'product'} = 90 if $VIRAL; # added this to the default list for viral genomes
+       # since most functions come from post-translational processing, so the default labels are c**p!
+
+my $db = Bio::DB::GFF->new(-adaptor=>$biofetch,
+			   @dsn,
+			   @auth,
+			   -preferred_tags => \%preferred_tags,
+			   -source=> $SOURCE || 'Genbank')
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+if ($CREATE) {
+  $db->initialize(1);
+}
+
+die "you must specify either an accession to retrieve from\nembl or a local file containing data in embl format\n" if (($FILE || $ACC) && !scalar(@ARGV));
+
+if ($ACC) {
+  while ($_ = shift) {
+    status(loading => $_);
+    my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
+    status(done    => $result);
+  }
+  exit 1;
+}
+
+elsif ($FILE) {
+  while ($_ = shift) {
+    status('loading' => $_);
+    my $result = $db->load_from_file($_);
+    status (done => $result);
+  }
+  exit 1;
+}
+
+elsif ($accFILE){
+    my $filename = shift;
+    die "you must supply a filename after the --accFILE command line flag\n" unless $filename;
+    die "file $filename does not exist\n" unless (-e $filename && !(-d $filename));
+    open IN, "$filename" || die "Can't open file $filename for reading accession numbers: $!\n";
+    while (<IN>){
+        chomp;
+	status(loading => $_);
+        my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
+	status(done => $result);
+    }
+    exit 1;
+}
+
+elsif ($gbFOLDER){
+    my $dir = $gbFOLDER;
+    die "folder $dir does not exist\n" unless (-e $dir && -d $dir);
+    opendir DIR, "$dir" || die "can't open directory $dir for reading: $!\n";
+    my @files = readdir DIR;
+    foreach my $file(@files){
+        if (!(-e "$gbFOLDER/$file") || (-d "$gbFOLDER/$file")){
+            print STDERR " $gbFOLDER/$file is not a filename!  Skipping...\n";
+            next
+        }
+        my $result = $db->load_from_file("$gbFOLDER/$file");
+        print STDERR $result ? "ok\n" : "failed\n";        
+    }
+} elsif ($accPIPE){
+    my @accessions = <STDIN>;
+    chomp @accessions;
+    foreach (@accessions){
+      status(loading => $_);
+      my $result = $db->load_from_embl(/^NC_/?'refseq':'embl' => $_);
+      status(done => $result);
+    }
+    exit 1;
+}
+
+else {
+  my $done;
+  while ($_ = shift) {
+    $done = 1;
+    status(loading => $_);
+    my $result = $db->load_from_file($_);
+    status(done => $result);
+  }
+
+  $done || die "\n\nno source of data provided\n\n";
+  exit 1;
+}
+
+sub status {
+  my ($state,$msg) = @_;
+  return if $STDOUT;
+  if ($state eq 'loading') {
+    print STDERR "Loading $msg...";
+  } elsif ($state eq 'done') {
+    print STDERR $msg ? "ok\n" : "failed\n";
+  }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bulk_load_gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bulk_load_gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bulk_load_gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,665 @@
+#!/usr/bin/perl
+# $Id: bulk_load_gff.PLS,v 1.43 2006/06/26 17:40:27 lstein Exp $
+
+
+use strict;
+# use lib './blib/lib';
+use DBI;
+use IO::File;
+use Getopt::Long;
+use Bio::DB::GFF;
+use Bio::DB::GFF::Util::Binning 'bin';
+use constant MYSQL => 'mysql';
+
+use constant FDATA      => 'fdata';
+use constant FTYPE      => 'ftype';
+use constant FGROUP     => 'fgroup';
+use constant FDNA       => 'fdna';
+use constant FATTRIBUTE => 'fattribute';
+use constant FATTRIBUTE_TO_FEATURE => 'fattribute_to_feature';
+
+=head1 NAME
+
+bulk_load_gff.pl - Bulk-load a Bio::DB::GFF database from GFF files.
+
+=head1 SYNOPSIS
+
+  % bulk_load_gff.pl -d testdb dna1.fa dna2.fa features1.gff features2.gff ...
+
+=head1 DESCRIPTION
+
+This script loads a Bio::DB::GFF database with the features contained
+in a list of GFF files and/or FASTA sequence files.  You must use the
+exact variant of GFF described in L<Bio::DB::GFF>.  Various
+command-line options allow you to control which database to load and
+whether to allow an existing database to be overwritten.
+
+This script differs from bp_load_gff.pl in that it is hard-coded to use
+MySQL and cannot perform incremental loads.  See L<bp_load_gff.pl> for an
+incremental loader that works with all databases supported by
+Bio::DB::GFF, and L<bp_fast_load_gff.pl> for a MySQL loader that supports
+fast incremental loads.
+
+=head2 NOTES
+
+If the filename is given as "-" then the input is taken from standard
+input. Compressed files (.gz, .Z, .bz2) are automatically
+uncompressed.
+
+FASTA format files are distinguished from GFF files by their filename
+extensions.  Files ending in .fa, .fasta, .fast, .seq, .dna and their
+uppercase variants are treated as FASTA files.  Everything else is
+treated as a GFF file.  If you wish to load -fasta files from STDIN,
+then use the -f command-line swith with an argument of '-', as in 
+
+    gunzip my_data.fa.gz | bp_fast_load_gff.pl -d test -f -
+
+The nature of the bulk load requires that the database be on the local
+machine and that the indicated user have the "file" privilege to load
+the tables and have enough room in /usr/tmp (or whatever is specified
+by the \$TMPDIR environment variable), to hold the tables transiently.
+
+Local data may now be uploaded to a remote server via the --local option
+with the database host specified in the dsn, e.g. dbi:mysql:test:db_host
+
+The adaptor used is dbi::mysqlopt.  There is currently no way to
+change this.
+
+About maxfeature: the default value is 100,000,000 bases.  If you have
+features that are close to or greater that 100Mb in length, then the
+value of maxfeature should be increased to 1,000,000,000. This value
+must be a power of 10.
+
+Note that Windows users must use the --create option.
+
+If the list of GFF or fasta files exceeds the kernel limit for the 
+maximum number of command-line arguments, use the 
+--long_list /path/to/files option. 
+
+
+=head1 COMMAND-LINE OPTIONS
+
+Command-line options can be abbreviated to single-letter options.
+e.g. -d instead of --database.
+
+   --database <dsn>      Database name (default dbi:mysql:test)
+   --adaptor             Adaptor name (default mysql)
+   --create              Reinitialize/create data tables without asking
+   --user                Username to log in as
+   --fasta               File or directory containing fasta files to load
+   --long_list           Directory containing a very large number of 
+                         GFF and/or FASTA files
+   --password            Password to use for authentication
+                           (Doesn't work with Postgres, password must be
+                           supplied interactively)
+   --maxbin              Set the value of the maximum bin size
+   --local               Flag to indicate that the data source is local
+   --maxfeature          Set the value of the maximum feature size (power of 10)
+   --group               A list of one or more tag names (comma or space separated)
+                         to be used for grouping in the 9th column.
+   --gff3_munge          Activate GFF3 name munging (see Bio::DB::GFF)
+   --Temporary           Location of a writable scratch directory
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<fast_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::fauxmysql;
+
+use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
+use vars '@ISA';
+ at ISA = 'Bio::DB::GFF::Adaptor::dbi::mysqlopt';
+
+sub insert_sequence {
+  my $self = shift;
+  my ($id,$offset,$seq) = @_;
+  print join("\t",$id,$offset,$seq),"\n";
+};
+
+package Bio::DB::GFF::Adaptor::fauxmysqlcmap;
+
+use Bio::DB::GFF::Adaptor::dbi::mysqlcmap;
+use vars '@ISA';
+ at ISA = 'Bio::DB::GFF::Adaptor::dbi::mysqlcmap';
+
+sub insert_sequence {
+  my $self = shift;
+  my ($id,$offset,$seq) = @_;
+  print join("\t",$id,$offset,$seq),"\n";
+};
+
+package Bio::DB::GFF::Adaptor::fauxpg;
+
+use Bio::DB::GFF::Adaptor::dbi::pg;
+use vars '@ISA';
+ at ISA = 'Bio::DB::GFF::Adaptor::dbi::pg';
+
+#these two subs are to separate the table creation from the
+#index creation
+sub do_initialize {
+  my $self = shift;
+  my $erase = shift;
+  $self->drop_all if $erase;
+                                                                                
+  my $dbh = $self->features_db;
+  my $schema = $self->schema;
+  foreach my $table_name ($self->tables) {
+    my $create_table_stmt = $schema->{$table_name}{table} ;
+    $dbh->do($create_table_stmt) ||  warn $dbh->errstr;
+  #  $self->create_other_schema_objects(\%{$schema->{$table_name}});
+  }
+  1;
+}
+
+sub _create_indexes_etc {
+  my $self = shift;
+
+  my $dbh = $self->features_db;
+  my $schema = $self->schema;
+  foreach my $table_name ($self->tables) {
+    $self->create_other_schema_objects(\%{$schema->{$table_name}});
+  }
+}
+
+sub insert_sequence {
+  my $self = shift;
+  my ($id,$offset,$seq) = @_;
+  print "$id\t$offset\t$seq\n";
+}
+
+package main;
+
+eval "use Time::HiRes"; undef $@;
+my $timer = defined &Time::HiRes::time;
+
+my $bWINDOWS = 0;    # Boolean: is this a MSWindows operating system?
+if ($^O =~ /MSWin32/i) {
+    $bWINDOWS = 1;
+}
+
+my ($DSN,$ADAPTOR,$FORCE,$USER,$PASSWORD,$FASTA,$LOCAL,$MAX_BIN,$GROUP_TAG,$LONG_LIST,$MUNGE,$TMPDIR);
+
+GetOptions ('database:s'    => \$DSN,
+	    'adaptor:s'     => \$ADAPTOR,
+	    'create'        => \$FORCE,
+	    'user:s'        => \$USER,
+	    'password:s'    => \$PASSWORD,
+	    'fasta:s'       => \$FASTA,
+	    'local'         => \$LOCAL,
+	    'maxbin|maxfeature:s'    => \$MAX_BIN,
+	    'group:s'       => \$GROUP_TAG,
+	    'long_list:s'   => \$LONG_LIST,
+	    'gff3_munge'    => \$MUNGE,
+	    'Temporary:s'   => \$TMPDIR,
+	   ) or (system('pod2text', $0), exit -1);
+
+# If called as pg_bulk_load_gff.pl behave as that did.
+if ($0 =~/pg_bulk_load_gff.pl/){
+    $ADAPTOR ||= 'pg';
+    $DSN     ||= 'test';
+}
+$DSN     ||= 'dbi:mysql:test';
+
+
+if ($bWINDOWS && not $FORCE) {
+  die "Note that Windows users must use the --create option.\n";
+}
+
+unless ($FORCE) {
+  die "This will delete all existing data in database $DSN.  If you want to do this, rerun with the --create option.\n"
+    if $bWINDOWS;
+  open (TTY,"/dev/tty") or die "/dev/tty: $!\n";  #TTY use removed for win compatability
+  print STDERR "This operation will delete all existing data in database $DSN.  Continue? ";
+  my $f = <TTY>;
+  die "Aborted\n" unless $f =~ /^[yY]/;
+  close TTY;
+}
+
+$DSN=~s/database=//i;
+$DSN=~s/;host=/:/i; #cater for dsn in the form of "dbi:mysql:database=$dbname;host=$host"
+
+
+my($DBI,$DBD,$DBNAME,$HOST)=split /:/,$DSN;
+$DBNAME=$DSN unless $DSN=~/:/;
+$ADAPTOR ||= $DBD; 
+$ADAPTOR ||= 'mysql';
+
+my ($use_mysql,$use_mysqlcmap,$use_pg) = (0,0,0);
+if ( $ADAPTOR eq 'mysqlcmap' ) {
+  $use_mysqlcmap = 1;
+}
+elsif ( $ADAPTOR =~ /^mysql/ ) {
+  $use_mysql = 1;
+}
+elsif ( $ADAPTOR eq "pg" ) {
+  $use_pg = 1;
+}
+else{
+    die "$ADAPTOR is not an acceptable database adaptor.";
+}
+
+
+my (@auth,$AUTH);
+if (defined $USER) {
+  push @auth,(-user=>$USER);
+  if ( $use_mysql or $use_mysqlcmap ) {
+    $AUTH .= " -u$USER";
+  }
+  elsif ( $use_pg ) {
+    $AUTH .= " -U $USER ";
+  }
+}
+if (defined $PASSWORD) {
+  push @auth,(-pass=>$PASSWORD);
+  if ( $use_mysql or $use_mysqlcmap ) {
+    $AUTH .= " -p$PASSWORD";
+  }
+#  elsif ( $use_pg ) {
+#    $AUTH .= " -W $PASSWORD ";
+#  }
+}
+
+if (defined $HOST) {
+  $AUTH .= " -h$HOST";  
+}
+if (defined $DBNAME) {
+  if ( $use_mysql or $use_mysqlcmap ) {
+    $AUTH .= " -D$DBNAME ";
+  }
+}
+if (defined $LOCAL) {
+  $LOCAL='local';
+  $AUTH.=' --local-infile=1';
+}else {
+  $LOCAL='';
+}
+
+my $faux_adaptor;
+if ( $use_mysqlcmap ) {
+  $faux_adaptor = "fauxmysqlcmap";
+}
+elsif ( $use_mysql ) {
+  $faux_adaptor = "fauxmysql";
+}
+elsif ( $use_pg ) {
+  $faux_adaptor = "fauxpg";
+}
+
+my $db = Bio::DB::GFF->new(-adaptor=>$faux_adaptor,-dsn => $DSN, at auth)
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+$db->gff3_name_munging(1) if $MUNGE;
+
+$MAX_BIN ? $db->initialize(-erase=>1,-MAX_BIN=>$MAX_BIN) : $db->initialize(1);
+$MAX_BIN ||= $db->meta('max_bin') || 100_000_000;
+
+# deal with really long lists of files
+if ($LONG_LIST) {
+  -d $LONG_LIST or die "The --long_list argument must be a directory\n";
+  opendir GFFDIR,$LONG_LIST or die "Could not open $LONG_LIST for reading: $!";
+  @ARGV = map { "$LONG_LIST\/$_" } readdir GFFDIR;
+  closedir GFFDIR;
+
+  if (defined $FASTA && -d $FASTA) {
+    opendir FASTA,$FASTA or die "Could not open $FASTA for reading: $!";
+    push @ARGV, map { "$FASTA\/$_" } readdir FASTA;
+    closedir FASTA;
+  }
+  elsif (defined $FASTA && -f $FASTA) {
+    push @ARGV, $FASTA;
+  }
+}
+
+foreach (@ARGV) {
+  $_ = "gunzip -c $_ |" if /\.gz$/;
+  $_ = "uncompress -c $_ |" if /\.Z$/;
+  $_ = "bunzip2 -c $_ |" if /\.bz2$/;
+}
+
+my (@gff, at fasta);
+foreach (@ARGV) {
+  if (/\.(fa|fasta|dna|seq|fast)(?:$|\.)/i) {
+    push @fasta,$_;
+  } else {
+    push @gff,$_;
+  }
+}
+ at ARGV = @gff;
+push @fasta,$FASTA if defined $FASTA;
+
+# drop everything that was there before
+my %FH;
+my $tmpdir = $TMPDIR || $ENV{TMPDIR} || $ENV{TMP} || '/usr/tmp';
+$tmpdir =~ s!\\!\\\\!g if $bWINDOWS; #eliminates backslash mis-interpretation
+-d $tmpdir or die <<END;
+I could not find a suitable temporary directory to write scratch files into ($tmpdir by default).
+Please select a directory and indicate its location by setting the TMP environment variable, or
+by using the --Temporary switch.
+END
+my @fasta_files_to_be_unlinked;
+my @files = (FDATA,FTYPE,FGROUP,FDNA,FATTRIBUTE,FATTRIBUTE_TO_FEATURE);
+foreach (@files) {
+  $FH{$_} = IO::File->new(">$tmpdir/$_.$$") or die $_,": $!";
+  $FH{$_}->autoflush;
+}
+
+if ( $use_pg ) {
+  $FH{FDATA()                }->print("COPY fdata (fid, fref, fstart, fstop, fbin, ftypeid, fscore, fstrand, fphase, gid, ftarget_start, ftarget_stop) FROM stdin;\n");
+  $FH{FTYPE()                }->print("COPY ftype (ftypeid, fmethod, fsource) FROM stdin;\n");
+  $FH{FGROUP()               }->print("COPY fgroup (gid, gclass, gname) FROM stdin;\n");
+  $FH{FATTRIBUTE()           }->print("COPY fattribute (fattribute_id, fattribute_name) FROM stdin;\n");
+  $FH{FATTRIBUTE_TO_FEATURE()}->print("COPY fattribute_to_feature (fid, fattribute_id, fattribute_value) FROM stdin;\n");
+}
+my $FID     = 1;
+my $GID     = 1;
+my $FTYPEID = 1;
+my $ATTRIBUTEID = 1;
+my %GROUPID     = ();
+my %FTYPEID     = ();
+my %ATTRIBUTEID = ();
+my %DONE        = ();
+my $FEATURES    = 0;
+
+my %tmpfiles; # keep track of temporary fasta files
+my $count;
+my $fasta_sequence_id;
+my $gff3;
+my $current_file; #used to reset GFF3 flag in mix of GFF and GFF3 files
+
+$db->preferred_groups(split (/[,\s]+/,$GROUP_TAG)) if defined $GROUP_TAG;
+
+my $last  = Time::HiRes::time() if $timer;
+my $start = $last;
+
+  # avoid hanging on standalone --fasta load
+if (!@ARGV) {
+    $FH{NULL} = IO::File->new(">$tmpdir/null");
+    push @ARGV, "$tmpdir/null";
+}
+
+my ($cmap_db);
+if ($use_mysqlcmap){
+  my $options = {
+		 AutoCommit       => 1,
+		 FetchHashKeyName => 'NAME_lc',
+		 LongReadLen      => 3000,
+		 LongTruncOk      => 1,
+		 RaiseError       => 1,
+		};
+
+  $cmap_db = DBI->connect( $DSN, $USER, $PASSWORD, $options );
+}
+# Only load CMap::Utils if using cmap
+unless (!$use_mysqlcmap or
+	eval {
+	  require Bio::GMOD::CMap::Utils;
+	  Bio::GMOD::CMap::Utils->import('next_number');
+	  1;
+	} 
+       ) {
+  print STDERR "Error loading Bio::GMOD::CMap::Utils\n";
+}
+
+
+while (<>) {
+
+  $current_file ||= $ARGV;
+
+  # reset GFF3 flag if new filehandle
+  unless($current_file eq $ARGV){
+    undef $gff3;
+    $current_file = $ARGV;
+  }
+
+  chomp;
+  my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group);
+
+  # close sequence filehandle if required
+  if ( /^\#|\s+|^$|^>|\t/ && defined $FH{FASTA}) {
+    $FH{FASTA}->close;
+    delete $FH{FASTA};
+  }
+
+  # print to fasta file if the handle is open
+  if ( defined $FH{FASTA} ) {
+    $FH{FASTA}->print("$_\n");
+    next;
+  }
+
+  elsif (/^>(\S+)/) {  # uh oh, sequence coming
+    $FH{FASTA} = IO::File->new(">$tmpdir/$1\.fa") or die "FASTA: $!\n";
+    $FH{FASTA}->print("$_\n");
+    print STDERR "Preparing embedded sequence $1\n";
+    push @fasta, "$tmpdir/$1\.fa";
+    push @fasta_files_to_be_unlinked,"$tmpdir/$1\.fa";
+    $tmpfiles{"$tmpdir/$1\.fa"}++;
+    next;
+  }
+
+  elsif (/^\#\#\s*gff-version\s+(\d+)/) {
+    $gff3 = ($1 >= 3);
+    next;
+  }
+
+  elsif (/^\#\#\s*group-tags\s+(.+)/) {
+    $db->preferred_groups(split(/\s+/,$1));
+    next;
+  }
+
+  elsif (/^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
+    ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) =
+	($1,'reference','Component',$2,$3,'.','.','.',$gff3 ? "ID=Sequence:$1": qq(Sequence "$1"));
+  }
+
+  elsif (/^\#/) {
+    next;
+  }
+
+  else {
+    ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t";
+  }
+  if ( not defined( $ref ) or length ($ref) == 0) {
+    warn "\$ref is null.  source = $source, method = $method, group = $group\n";
+    next;
+  }
+  $FEATURES++;
+  my $size = $stop-$start+1;
+  warn "Feature $group ($size) is larger than $MAX_BIN. You will have trouble retrieving this feature.\nRerun script with --maxfeature set to a higher power of 10.\n" if $size > $MAX_BIN;
+
+  $source = '\N' unless defined $source;
+  $score  = '\N' if $score  eq '.';
+  $strand = '\N' if $strand eq '.';
+  $phase  = '\N' if $phase  eq '.';
+
+  my ($group_class,$group_name,$target_start,$target_stop,$attributes) = $db->split_group($group,$gff3);
+
+  # GFF2/3 transition
+  $group_class = [$group_class] unless ref $group_class;
+  $group_name  = [$group_name]  unless ref $group_name;
+
+  for (my $i=0; $i < @$group_name; $i++) {
+    $group_class->[$i]  ||= '\N';
+    $group_name->[$i]   ||= '\N';
+    $target_start ||= '\N';
+    $target_stop  ||= '\N';
+    $method       ||= '\N';
+    $source       ||= '\N';
+
+    my $fid     = $FID++;
+    my $gid     = $GROUPID{lc join('',$group_class->[$i],$group_name->[$i])}  ||= $GID++;
+    my $ftypeid = $FTYPEID{lc join('',$source,$method)}                       ||= $FTYPEID++;
+
+    my $bin = bin($start,$stop,$db->min_bin);
+    $FH{ FDATA()  }->print(    join("\t",$fid,$ref,$start,$stop,$bin,$ftypeid,$score,$strand,$phase,$gid,$target_start,$target_stop),"\n"   );
+    if ($use_mysqlcmap){
+      my $feature_id    = next_number(
+				      db         => $cmap_db,
+				      table_name => 'cmap_feature',
+				      id_field   => 'feature_id',
+				     )
+	or die 'No feature id';
+      my $direction = $strand eq '-' ? -1:1;
+      $FH{ FGROUP() }->print(    
+			     join("\t",$feature_id,$feature_id,'NULL',0, $group_name->[$i],0,0,'NULL',1,$direction, $group_class->[$i],)
+			     ,"\n"
+			    ) unless $DONE{"G$gid"}++;
+    }
+    else {
+      $FH{ FGROUP() }->print(    join("\t",$gid,$group_class->[$i],$group_name->[$i]),"\n") unless $DONE{"G$gid"}++;
+    }
+    $FH{ FTYPE()  }->print(    join("\t",$ftypeid,$method,$source),"\n"                   ) unless $DONE{"T$ftypeid"}++;
+
+    foreach (@$attributes) {
+      my ($key,$value) = @$_;
+      my $attributeid = $ATTRIBUTEID{$key}   ||= $ATTRIBUTEID++;
+      $FH{ FATTRIBUTE() }->print( join("\t",$attributeid,$key),"\n"                       ) unless $DONE{"A$attributeid"}++;
+      $FH{ FATTRIBUTE_TO_FEATURE() }->print( join("\t",$fid,$attributeid,$value),"\n");
+    }
+
+    if ( $fid % 1000 == 0) {
+      my $now    = Time::HiRes::time() if $timer;
+      my $elapsed = $timer ? sprintf(" in %5.2fs",$now - $last) : '';
+      $last = $now;
+      print STDERR "$fid features parsed$elapsed...";
+      print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+    }
+  }
+}
+
+$FH{FASTA}->close if exists $FH{FASTA};
+
+for my $file (@fasta) {
+  warn "Preparing DNA file $file....\n";
+  if ($use_pg){
+    $FH{FDNA() }->print("COPY fdna (fref, foffset, fdna) FROM stdin;\n");
+  }
+  my $old = select($FH{FDNA()});
+  $db->load_fasta($file) or warn "Couldn't load fasta file $file: $!";
+  if ($use_pg){
+    $FH{FDNA() }->print("\\.\n\n");
+  }
+  warn "done...\n";
+  select $old;
+  unlink $file if $tmpfiles{$file};
+}
+
+if ($use_pg) { 
+  $FH{FDATA()                }->print("\\.\n\n");
+  $FH{FTYPE()                }->print("\\.\n\n");
+  $FH{FGROUP()               }->print("\\.\n\n");
+  $FH{FATTRIBUTE()           }->print("\\.\n\n");
+  $FH{FATTRIBUTE_TO_FEATURE()}->print("\\.\n\n");
+}
+
+
+$_->close foreach values %FH;
+printf STDERR "Total parse time %5.2fs\n",(Time::HiRes::time() - $start) if $timer;
+warn "Loading feature data and analyzing tables.  You may see RDBMS messages here...\n";
+
+if ($use_pg){
+  warn "Loading feature data.  You may see Postgres comments...\n";
+
+  foreach (@files) {
+    my $file = "$tmpdir/$_.$$";
+
+    $AUTH ? system("psql $AUTH -f $file $DSN")
+          : system('psql','-f', $file, $DSN);
+
+    unlink $file;
+  }
+
+  warn "Updating sequences ...\n";
+  $db->update_sequences();
+
+  warn "Creating indexes ...\n";
+  $db->_create_indexes_etc();
+
+  warn "done...\n";
+
+}
+
+elsif( $use_mysql or $use_mysqlcmap ) {
+  $start = time();
+
+  my $success = 1;
+  my $TERMINATEDBY = $bWINDOWS ? q( LINES TERMINATED BY '\r\n') : ''; 
+  for my $f (@files) {
+    my $table = function_to_table($f,$ADAPTOR);
+    my $sql = join ('; ',
+		    "lock tables $table write",
+		    "delete from $table",
+		    "load data $LOCAL infile '$tmpdir/$f.$$' replace into table $table $TERMINATEDBY",
+		    "unlock tables");
+    my $command = MYSQL . qq[$AUTH -s -e "$sql"];
+    $command =~ s/\n/ /g;
+    $success &&= system($command) == 0;
+    unlink "$tmpdir/$f.$$";
+  }
+  printf STDERR "Total load time %5.2fs\n",(time() - $start) if $timer;
+  print STDERR "done...\n";
+
+  print STDERR "Analyzing/optimizing tables. You will see database messages...\n";
+  $start = time();
+  my $sql = '';
+  for my $f (@files) {
+    my $table = function_to_table($f,$ADAPTOR);
+    $sql       .= "analyze table $table;";
+  }
+  my $command = MYSQL . qq[$AUTH -N -s -e "$sql"];
+  $success &&= system($command) == 0;
+  printf STDERR "Optimization time time %5.2fs\n",(time() - $start);
+
+  if ($success) {
+    print "$FEATURES features successfully loaded\n";
+    exit 0;
+  } else {
+    print "FAILURE: Please see standard error for details\n";
+    exit -1;
+  }
+}
+
+foreach (@fasta_files_to_be_unlinked) {
+  unlink "$tmpdir/$_.$$";
+}
+
+exit 0;
+
+sub function_to_table {
+    my $function = shift;
+    my $adaptor  = shift;
+
+    if ($function eq 'fdata'){
+        return 'fdata';
+    }
+    elsif ($function eq 'ftype'){
+        return 'ftype';
+    }
+    elsif ($function eq 'fgroup'){
+        return 'cmap_feature' if ($adaptor eq 'mysqlcmap');
+        return 'fgroup';
+    }
+    elsif ($function eq 'fdna'){
+        return 'fdna';
+    }
+    elsif ($function eq 'fattribute'){
+        return 'fattribute';
+    }
+    elsif ($function eq 'fattribute_to_feature'){
+        return 'fattribute_to_feature';
+    }
+    return '';
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/bulk_load_gff.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/fast_load_gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/fast_load_gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/fast_load_gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,485 @@
+#!/usr/bin/perl
+# $Id: fast_load_gff.PLS,v 1.30.4.1 2006/11/08 17:25:55 sendu Exp $
+
+use strict;
+# use lib './blib/lib';
+use DBI;
+use IO::File;
+use Getopt::Long;
+use Bio::DB::GFF::Util::Binning 'bin';
+use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
+
+use constant MYSQL => 'mysql';
+
+use constant FDATA      => 'fdata';
+use constant FTYPE      => 'ftype';
+use constant FGROUP     => 'fgroup';
+use constant FDNA       => 'fdna';
+use constant FATTRIBUTE => 'fattribute';
+use constant FATTRIBUTE_TO_FEATURE => 'fattribute_to_feature';
+
+my $DO_FAST = eval "use POSIX 'WNOHANG'; 1;";
+
+=head1 NAME
+
+bp_fast_load_gff.pl - Fast-load a Bio::DB::GFF database from GFF files.
+
+=head1 SYNOPSIS
+
+  % bp_fast_load_gff.pl -d testdb dna1.fa dna2.fa features1.gff features2.gff ...
+
+=head1 DESCRIPTION
+
+This script loads a Bio::DB::GFF database with the features contained
+in a list of GFF files and/or FASTA sequence files.  You must use the
+exact variant of GFF described in L<Bio::DB::GFF>.  Various
+command-line options allow you to control which database to load and
+whether to allow an existing database to be overwritten.
+
+This script is similar to load_gff.pl, but is much faster.  However,
+it is hard-coded to use MySQL and probably only works on Unix
+platforms due to its reliance on pipes.  See L<bp_load_gff.pl> for an
+incremental loader that works with all databases supported by
+Bio::DB::GFF, and L<bp_bulk_load_gff.pl> for a fast MySQL loader that
+supports all platforms.
+
+=head2 NOTES
+
+If the filename is given as "-" then the input is taken from
+standard input. Compressed files (.gz, .Z, .bz2) are automatically
+uncompressed.
+
+FASTA format files are distinguished from GFF files by their filename
+extensions.  Files ending in .fa, .fasta, .fast, .seq, .dna and their
+uppercase variants are treated as FASTA files.  Everything else is
+treated as a GFF file.  If you wish to load -fasta files from STDIN,
+then use the -f command-line swith with an argument of '-', as in 
+
+    gunzip my_data.fa.gz | bp_fast_load_gff.pl -d test -f -
+
+The nature of the load requires that the database be on the local
+machine and that the indicated user have the "file" privilege to load
+the tables and have enough room in /usr/tmp (or whatever is specified
+by the \$TMPDIR environment variable), to hold the tables transiently.
+If your MySQL is version 3.22.6 and was compiled using the "load local
+file" option, then you may be able to load remote databases with local
+data using the --local option.
+
+About maxfeature: the default value is 100,000,000 bases.  If you have
+features that are close to or greater that 100Mb in length, then the
+value of maxfeature should be increased to 1,000,000,000. This value
+must be a power of 10.
+
+If the list of GFF or fasta files exceeds the kernel limit for the
+maximum number of command-line arguments, use the
+--long_list /path/to/files option.
+
+The adaptor used is dbi::mysqlopt.  There is currently no way to
+change this.
+
+=head1 COMMAND-LINE OPTIONS
+
+Command-line options can be abbreviated to single-letter options.
+e.g. -d instead of --database.
+
+   --database <dsn>      Mysql database name
+   --create              Reinitialize/create data tables without asking
+   --local               Try to load a remote database using local data.
+   --user                Username to log in as
+   --fasta               File or directory containing fasta files to load
+   --password            Password to use for authentication
+   --long_list           Directory containing a very large number of
+                         GFF and/or FASTA files
+   --maxfeature          Set the value of the maximum feature size (default 100Mb; must be a power of 10)
+   --group               A list of one or more tag names (comma or space separated)
+                         to be used for grouping in the 9th column.
+   --gff3_munge          Activate GFF3 name munging (see Bio::DB::GFF)
+   --Temporary           Location of a writable scratch directory
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+package Bio::DB::GFF::Adaptor::faux;
+
+use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
+use vars '@ISA';
+ at ISA = 'Bio::DB::GFF::Adaptor::dbi::mysqlopt';
+
+sub insert_sequence {
+  my $self = shift;
+  my ($id,$offset,$seq) = @_;
+  print join "\t",$id,$offset,$seq,"\n";
+}
+
+package main;
+
+eval "use Time::HiRes"; undef $@;
+my $timer = defined &Time::HiRes::time;
+
+my ($DSN,$CREATE,$USER,$PASSWORD,$FASTA,$FAILED,$LOCAL,%PID,$MAX_BIN,$GROUP_TAG,$LONG_LIST,$MUNGE,$TMPDIR);
+
+if ($DO_FAST) {
+  $SIG{CHLD} = sub {
+    while ((my $child = waitpid(-1,&WNOHANG)) > 0) {
+      delete $PID{$child} or next;
+      $FAILED++ if $? != 0;
+    }
+  }
+};
+
+$SIG{INT} = $SIG{TERM} = sub {cleanup(); exit -1};
+
+GetOptions ('database:s'    => \$DSN,
+	    'create'        => \$CREATE,
+	    'user:s'        => \$USER,
+            'local'         => \$LOCAL,
+	    'password:s'    => \$PASSWORD,
+	    'fasta:s'       => \$FASTA,
+	    'group:s'       => \$GROUP_TAG,
+	    'long_list:s'   => \$LONG_LIST,
+            'maxbin|maxfeature:s'    => \$MAX_BIN,
+	    'gff3_munge'    => \$MUNGE,
+	    'Temporary:s'   => \$TMPDIR,
+	   ) or (system('pod2text',$0), exit -1);
+
+$DSN ||= 'test';
+
+my (@args,$AUTH);
+if (defined $USER) {
+  push @args,(-user=>$USER);
+  $AUTH .= " -u$USER";
+}
+if (defined $PASSWORD) {
+  push @args,(-pass=>$PASSWORD);
+  $AUTH .= " -p$PASSWORD";
+}
+push @args,(-preferred_groups=>[split(/[,\s+]+/,$GROUP_TAG)]) if defined $GROUP_TAG;
+
+my $db = Bio::DB::GFF->new(-adaptor=>'faux',-dsn => $DSN, at args)
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+$db->gff3_name_munging(1) if $MUNGE;
+
+if ($CREATE) {
+  $MAX_BIN ? $db->initialize(-erase=>1,-MAX_BIN=>$MAX_BIN) : $db->initialize(1);
+}
+
+$MAX_BIN ||= $db->meta('max_bin') || 100_000_000;
+
+# deal with really long lists of files
+if ($LONG_LIST) {
+  -d $LONG_LIST or die "The --long_list argument must be a directory\n";
+  opendir GFFDIR,$LONG_LIST or die "Could not open $LONG_LIST for reading: $!";
+  @ARGV = map { "$LONG_LIST\/$_" } readdir GFFDIR;
+  closedir GFFDIR;
+  
+  if (defined $FASTA && -d $FASTA) {
+    opendir FASTA,$FASTA or die "Could not open $FASTA for reading: $!";
+    push @ARGV, map { "$FASTA\/$_" } readdir FASTA;
+    closedir FASTA;
+  }
+}
+
+foreach (@ARGV) {
+  $_ = "gunzip -c $_ |" if /\.gz$/;
+  $_ = "uncompress -c $_ |" if /\.Z$/;
+  $_ = "bunzip2 -c $_ |" if /\.bz2$/;
+}
+my(@fasta, at gff);
+foreach (@ARGV) {
+  if (/\.(fa|fasta|dna|seq|fast)(?:\.|$)/i) {
+    push @fasta,$_;
+  } else {
+    push @gff,$_;
+  }
+}
+ at ARGV = @gff;
+push @fasta,$FASTA if defined $FASTA;
+
+# initialize state variables
+my $FID     = 1;
+my $GID     = 1;
+my $FTYPEID = 1;
+my $ATTRIBUTEID = 1;
+my %GROUPID     = ();
+my %FTYPEID     = ();
+my %ATTRIBUTEID = ();
+my %DONE        = ();
+my $FEATURES    = 0;
+
+load_tables($db->dbh) unless $CREATE;
+my ($major,$minor,$sub) = split /\./,$db->dbh->get_info(18); # SQL_DBMS_VER
+my $can_disable_indexes = ($major >= 4 and $minor >= 0);
+
+# open up pipes to the database
+my (%FH,%COMMAND);
+my $MYSQL = MYSQL;
+my $tmpdir = $TMPDIR || $ENV{TMPDIR} || $ENV{TMP} || '/usr/tmp';
+-d $tmpdir or die <<END;
+I could not find a suitable temporary directory to write scratch files into ($tmpdir by default).
+Please select a directory and indicate its location by setting the TMP environment variable, or
+by using the --Temporary switch.
+END
+
+my @fasta_files_to_be_unlinked;
+my @files = (FDATA,FTYPE,FGROUP,FDNA,FATTRIBUTE,FATTRIBUTE_TO_FEATURE);
+foreach (@files) {
+  my $file = "$tmpdir/$_.$$";
+  print STDERR "creating load file $file...";
+  $DO_FAST &&= (system("mkfifo $file") == 0);  # for system(), 0 = success
+  print STDERR "ok\n";
+  my $delete = $CREATE ? "delete from $_" : '';
+  my $local  = $LOCAL ? 'local' : '';
+  my $analyze = "analyze table $_";
+  my $command =<<END;
+$MYSQL $AUTH
+-N
+-s
+-e "lock tables $_ write; $delete; load data $local infile '$file' replace into table $_; unlock tables; $analyze"
+$DSN
+END
+;
+  $command =~ s/\n/ /g;
+  $COMMAND{$_} = $command;
+
+  if ($DO_FAST) {
+    if (my $pid = fork) {
+      $PID{$pid} = $_;
+      print STDERR "pausing for 0.5 sec..." if $DO_FAST;
+      select(undef,undef,undef,0.50); # work around a race condition
+      print STDERR "ok\n";
+    } else {  # THIS IS IN CHILD PROCESS
+      die "Couldn't fork: $!" unless defined $pid;
+      exec $command || die "Couldn't exec: $!";
+      exit 0;
+    }
+  }
+  print STDERR "opening load file for writing...";
+  $FH{$_} = IO::File->new($file,'>') or die $_,": $!";
+  print STDERR "ok\n";
+  $FH{$_}->autoflush;
+}
+
+print STDERR "Fast loading enabled\n"    if $DO_FAST;
+
+my ($count,$gff3,$last,$start,$beginning,$current_file);
+
+$last  = Time::HiRes::time() if $timer;
+$beginning = $start = $last;
+
+# avoid hanging on standalone --fasta load
+if (!@ARGV) {
+    $FH{NULL} = IO::File->new(">$tmpdir/null");
+    push @ARGV, "$tmpdir/null";
+}
+
+while (<>) {
+
+  # reset GFF3 flag if new filehandle
+  $current_file ||= $ARGV;
+  unless ($current_file eq $ARGV) {
+    undef $gff3;
+    $current_file = $ARGV;
+  }
+
+  chomp;
+  my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group);
+
+  # close sequence filehandle if required
+  if ( /^\#|\s+|^$|^>|\t/ && defined $FH{FASTA}) {
+      $FH{FASTA}->close;
+      delete $FH{FASTA};
+  }
+
+  # print to fasta file if the handle is open
+  if ( defined $FH{FASTA} ) {
+      $FH{FASTA}->print("$_\n");
+      next;
+  }
+
+  elsif (/^>(\S+)/) {  # uh oh, sequence coming
+      $FH{FASTA} = IO::File->new(">$tmpdir/$1\.fa") or die "FASTA: $!\n";
+      $FH{FASTA}->print("$_\n");
+      push @fasta, "$tmpdir/$1\.fa";
+      push @fasta_files_to_be_unlinked,"$tmpdir/$1\.fa";
+      print STDERR "Processing embedded sequence $1\n";
+      next;
+  }
+
+  elsif (/^\#\#\s*group-tags\s+(.+)/) {
+    $db->preferred_groups(split(/\s+/,$1));
+    next;
+  }
+
+  elsif (/^\#\#\s*gff-version\s+(\d+)/) {
+    $gff3 = ($1 >= 3);
+    next;
+  }
+
+  elsif (/^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
+    ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = 
+      ($1,'reference','Component',$2,$3,'.','.','.',$gff3 ? "ID=Sequence:$1": qq(Sequence "$1"));
+  }
+
+  elsif (/^\#/) {
+    next;
+  }
+
+  else {
+    ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t";
+  }
+  next unless defined $ref;
+  $FEATURES++;
+
+  warn "Feature $group is larger than $MAX_BIN. You will have trouble retrieving this feature.\nRerun script with --maxfeature set to a higher power of 10.\n" if $stop-$start+1 > $MAX_BIN;
+
+  $source = '\N' unless defined $source;
+  $score  = '\N' if $score  eq '.';
+  $strand = '\N' if $strand eq '.';
+  $phase  = '\N' if $phase  eq '.';
+
+  my ($gclass,$gname,$target_start,$target_stop,$attributes) = $db->split_group($group,$gff3);
+  # GFF2/3 transition
+  $gclass = [$gclass] unless ref $gclass;
+  $gname  = [$gname]  unless ref $gname;
+
+  for (my $i=0; $i < @$gname; $i++) {
+    my $group_class = $gclass->[$i];
+    my $group_name  = $gname->[$i];
+    $group_class  ||= '\N';
+    $group_name   ||= '\N';
+    $target_start ||= '\N';
+    $target_stop  ||= '\N';
+    $method       ||= '\N';
+    $source       ||= '\N';
+
+    my $fid     = $FID++;
+    my $gid     = $GROUPID{lc join($;,$group_class,$group_name)} ||= $GID++;
+    my $ftypeid = $FTYPEID{lc join($;,$source,$method)}          ||= $FTYPEID++;
+
+    my $bin = bin($start,$stop,$db->min_bin);
+    $FH{ FDATA()  }->print(    join("\t",$fid,$ref,$start,$stop,$bin,$ftypeid,$score,$strand,$phase,$gid,$target_start,$target_stop),"\n"   );
+    $FH{ FGROUP() }->print(    join("\t",$gid,$group_class,$group_name),"\n"              ) unless $DONE{"fgroup$;$gid"}++;
+    $FH{ FTYPE()  }->print(    join("\t",$ftypeid,$method,$source),"\n"                   ) unless $DONE{"ftype$;$ftypeid"}++;
+
+    foreach (@$attributes) {
+      my ($key,$value) = @$_;
+      my $attributeid = $ATTRIBUTEID{$key}   ||= $ATTRIBUTEID++;
+      $FH{ FATTRIBUTE() }->print( join("\t",$attributeid,$key),"\n"                       ) unless $DONE{"fattribute$;$attributeid"}++;
+      $FH{ FATTRIBUTE_TO_FEATURE() }->print( join("\t",$fid,$attributeid,$value),"\n");
+    }
+
+    if ( $FEATURES % 1000 == 0) {
+      my $now    = Time::HiRes::time() if $timer;
+      my $elapsed = $timer ? sprintf(" in %5.2fs",$now - $last) : '';
+      $last = $now;
+      print STDERR "$fid features parsed$elapsed...";
+      print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
+    }
+  }
+}
+
+$FH{FASTA}->close if exists $FH{FASTA};
+
+printf STDERR "Feature load time %5.2fs\n",(Time::HiRes::time() - $start) if $timer;
+$start = time();
+
+for my $fasta (@fasta) {
+  warn "Loading fasta ",(-d $fasta?"directory":"file"), " $fasta\n";
+  my $old = select($FH{FDNA()});
+  my $loaded = $db->load_fasta($fasta);
+  warn "$fasta: $loaded records loaded\n";
+  select $old;
+}
+
+printf STDERR "Fasta load time %5.2fs\n",(Time::HiRes::time() - $start) if $timer;
+$start = time();
+
+my $success = 1;
+if ($DO_FAST) {
+  warn "Indexing and analyzing tables.  This may take some time (you may see database messages during the process)...\n";
+}
+
+$_->close foreach values %FH;
+
+if (!$DO_FAST) {
+  warn "Loading feature data and analyzing tables.  You may see database messages here...\n";
+  $success &&= system($COMMAND{$_}) == 0 foreach @files;
+}
+
+# wait for children
+while (%PID) {
+  sleep;
+}
+$success &&= !$FAILED;
+
+cleanup();
+
+printf STDERR "Total parse & load time %5.2fs\n",(Time::HiRes::time() - $beginning) if $timer;
+
+if ($success) {
+  print "SUCCESS: $FEATURES features successfully loaded\n";
+  exit 0;
+} else {
+  print "FAILURE: Please see standard error for details\n";
+  exit -1;
+}
+
+exit 0;
+
+sub cleanup {
+  foreach (@files, at fasta_files_to_be_unlinked) {
+    unlink "$tmpdir/$_.$$";
+  }
+}
+
+# load copies of some of the tables into memory
+sub load_tables {
+  my $dbh = shift;
+  print STDERR "loading normalized group, type and attribute information...";
+  $FID         = 1 + get_max_id($dbh,'fdata','fid');
+  $GID         = 1 + get_max_id($dbh,'fgroup','gid');
+  $FTYPEID     = 1 + get_max_id($dbh,'ftype','ftypeid');
+  $ATTRIBUTEID = 1 + get_max_id($dbh,'fattribute','fattribute_id');
+  get_ids($dbh,\%DONE,\%GROUPID,'fgroup','gid','gclass','gname');
+  get_ids($dbh,\%DONE,\%FTYPEID,'ftype','ftypeid','fsource','fmethod');
+  get_ids($dbh,\%DONE,\%ATTRIBUTEID,'fattribute','fattribute_id','fattribute_name');
+  print STDERR "ok\n";
+}
+
+sub get_max_id {
+  my $dbh = shift;
+  my ($table,$id) = @_;
+  my $sql = "select max($id) from $table";
+  my $result = $dbh->selectcol_arrayref($sql) or die $dbh->errstr;
+  $result->[0];
+}
+
+sub get_ids {
+  my $dbh = shift;
+  my ($done,$idhash,$table,$id, at columns) = @_;
+  my $columns = join ',',$id, at columns;
+  my $sql = "select $columns from $table";
+  my $sth = $dbh->prepare($sql) or die $dbh->errstr;
+  $sth->execute or die $dbh->errstr;
+  while (my($id, at cols) = $sth->fetchrow_array) {
+    my $key = join $;, at cols;
+    $idhash->{$key} = $id;
+    $done->{$table,$id}++;
+  }
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/fast_load_gff.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/genbank2gff3.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/genbank2gff3.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/genbank2gff3.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,456 @@
+#!/usr/bin/perl -w
+
+#$Id: genbank2gff3.PLS,v 1.9 2005/07/15 16:54:01 matsallac Exp $;
+
+=pod
+
+=head1 NAME 
+
+bp_genbank2gff3.pl -- Genbank-E<gt>gbrowse-friendly GFF3
+
+=head1 SYNOPSIS
+
+  bp_gbrowse_genbank2gff3.pl [options] filename(s)
+
+  # process a directory containing GenBank flatfiles
+  perl gbrowse_genbank2gff3.pl --dir path_to_files --zip
+
+  # process a single file, ignore explicit exons and introns
+  perl bp_genbank2gff3.pl --filter exon --filter intron file.gbk.gz
+
+  # process a list of files 
+  perl bp_genbank2gff3.pl *gbk.gz
+
+
+    Options:
+        --dir     -d  path to a list of genbank flatfiles
+        --outdir  -o  location to write GFF files
+        --zip     -z  compress GFF3 output files with gzip
+        --summary -s  print a summary of the features in each contig
+        --filter  -x  genbank feature type(s) to ignore
+        --split   -y  split output to seperate GFF and fasta files for
+                      each genbank record
+        --nolump  -n  seperate file for each reference sequence
+                      (default is to lump all records together into one 
+                       output file for each input file)
+        --ethresh -e  error threshold for unflattener
+                      set this high (>2) to ignore all unflattener errors
+        --help    -h  display this message
+
+
+=head1 DESCRIPTION
+
+This script uses Bio::SeqFeature::Tools::Unflattener and
+Bio::Tools::GFF to convert GenBank flatfiles to GFF3 with gene
+containment hierarchies mapped for optimal display in gbrowse.
+
+The input files are assumed to be gzipped GenBank flatfiles for refseq
+contigs.  The files may contain multiple GenBank records.  Either a
+single file or an entire directory can be processed.  By default, the
+DNA sequence is embedded in the GFF but it can be saved into seperate
+fasta file with the --split(-y) option.
+
+If an input file contains multiple records, the default behaviour is
+to dump all GFF and sequence to a file of the same name (with .gff
+appended).  Using the 'nolump' option will create a seperate file for
+each genbank record.  Using the 'split' option will create seperate
+GFF and Fasta files for each genbank record.
+
+
+=head2 Notes
+
+=head3 Note1:
+
+In cases where the input files contain many GenBank records (for
+example, the chromosome files for the mouse genome build), a very
+large number of output files will be produced if the 'split' or
+'nolump' options are selected.  If you do have lists of files E<gt> 6000,
+use the --long_list option in bp_bulk_load_gff.pl or
+bp_fast_load_gff.pl to load the gff and/ or fasta files.
+
+=head3 Note2:
+
+This script is designed for refseq genomic sequence entries.  It may
+work for third party annotations but this has not been tested.
+
+=head1 AUTHOR 
+
+Sheldon McKay (mckays at cshl.edu)
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory.
+
+=cut
+
+use strict;
+
+use lib "$ENV{HOME}/bioperl-live";
+# chad put this here to enable situations when this script is tested
+# against bioperl compiled into blib along with other programs using blib
+BEGIN {
+	unshift(@INC,'blib/lib');
+};
+use Pod::Usage;
+use Bio::Root::RootI;
+use Bio::SeqIO;
+use File::Spec;
+use Bio::SeqFeature::Tools::Unflattener;
+use Bio::SeqFeature::Tools::TypeMapper;
+use Bio::SeqFeature::Tools::IDHandler;
+use Bio::Tools::GFF;
+use Getopt::Long;
+
+use vars qw/$split @filter $zip $outdir $help $ethresh
+            $file @files $dir $summary $nolump
+            $gene_id $rna_id $tnum %method %id %seen/;
+
+$| = 1;
+
+GetOptions( 'd|dir:s'   => \$dir,
+	    'z|zip'     => \$zip, 
+	    'h|help'    => \$help,
+	    's|summary' => \$summary,
+	    'o|outdir:s'=> \$outdir,
+	    'x|filter:s'=> \@filter,
+	    'y|split'   => \$split,
+            "ethresh|e=s"=>\$ethresh,
+            'n|nolump'  => \$nolump);
+
+my $lump = 1 unless $nolump || $split;
+
+# look for help request
+pod2usage(2) if $help;
+
+# initialize handlers
+my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+$unflattener->error_threshold($ethresh) if $ethresh;
+my $tm  = Bio::SeqFeature::Tools::TypeMapper->new;
+my $idh = Bio::SeqFeature::Tools::IDHandler->new;
+
+# stringify filter list if applicable
+my $filter = join ' ', @filter  if @filter;
+
+# determine input files
+if ( $file ) {
+    -e $file or die "file $file does not exist\n";
+
+    if ( $file =~ m|/|) {
+	($dir) = $file =~ m|(\S+)/\S+$|;
+    } 
+
+    $dir ||= '.';
+    @files = ($file);
+}
+elsif ( $dir ) {
+    if ( -d $dir ) {
+	opendir DIR, $dir or die "could not open $dir for reading: $!";
+	@files = grep { /\.gb.*/ } readdir DIR;
+	closedir DIR;
+    }
+    else {
+	die "$dir is not a directory\n";
+    }
+}
+else {
+    @files = @ARGV;
+    $dir = '';
+}
+
+# we should have some files by now
+pod2usage(2) unless @files;
+
+if ( $outdir && !-e $outdir ) {
+    mkdir($outdir) or die "could not create directory $outdir: $!\n";        
+}
+elsif ( !$outdir ) {
+    $outdir = $dir || '.';
+}
+
+$outdir .= '/' unless $outdir =~ m|/$|;
+
+for my $file ( @files ) {
+    chomp $file;
+    die "$! $file" unless -e $file;
+    print "Processing file $file...\n";
+
+    my $lump_fh;
+    if ( $lump ) {
+		# this really doesn't do what you think it does.
+        # ($lump) = $file =~ /^(\S+?)\./;
+		# this is better, but still should use catfile
+         
+	  my ($vol,$dirs,$fileonly) = File::Spec->splitpath($file); 
+        $lump   = $outdir . $fileonly . '.gff';
+	open $lump_fh, ">$lump" or die "Could not create a lump outfile called ($lump) because ($!)\n";
+
+    }
+    
+    my ($outfile, $outfa);
+    
+    # open input file, unzip if req'd
+    if ( $file =~ /\.gz/ ) {
+	open FH, "gunzip -c $file |";
+    }
+    else {
+	open FH, "<$file";
+    }
+
+    my $in = Bio::SeqIO->new(-fh => \*FH, -format => 'GenBank');
+    my $gffio = Bio::Tools::GFF->new( -noparse => 1, -gff_version => 3 );
+
+    while ( my $seq = $in->next_seq ) {
+	my $seq_name = $seq->accession;
+	my $end = $seq->length;
+	my @to_print;
+
+        # arrange disposition of GFF output
+        $outfile = $lump || $outdir . $seq->accession . ".gff";
+	my $out;
+
+	if ( $lump ) {
+	    $outfile = $lump;
+	    $out = $lump_fh;
+	}
+	else {
+	    $outfile = $outdir . $seq->accession . ".gff";
+	    open $out, ">$outfile";
+	}
+
+        # filter out unwanted features
+        filter($seq);
+
+	# abort if there are no features
+        warn "$seq_name has no features, skipping\n" and next
+	    if !$seq->all_SeqFeatures;
+
+        # unflatten gene graphs, apply SO types, etc
+        unflatten_seq($seq);
+
+        # construct a GFF header
+        print $out &gff_header($seq_name, $end);
+
+	# Note that we use our own get_all_SeqFeatures function 
+        # to rescue cloned exons
+	for my $feature ( get_all_SeqFeatures($seq) ) {
+	    
+	    $feature->source_tag('GenBank');
+	    my $method = $feature->primary_tag;
+	    
+	    # current gene name.  The unflattened gene features should be in order so any
+            # exons, CDSs, etc that follow will belong to this gene
+	    if ( $method eq 'gene' ) {
+		gene_name($feature);
+	    }
+
+	    if ( $feature->has_tag('gene') || $method =~ /CDS|exon|RNA|UTR|gene/ ) {
+		my $unique = gene_features($feature);
+		push @to_print, $feature if $unique;
+	    }
+	    
+	    # otherwise handle as generic feats with IDHandler labels 
+	    else {
+		my $gff = generic_features($feature,$gffio,$seq_name);
+		print $out "$gff\n" if $gff;
+	    }
+	}
+
+        for my $printme ( @to_print ) {
+	    my $gff = $gffio->gff_string($printme);
+            print $out "$gff\n";
+        }
+
+	# deal with the corresponding DNA
+	my $dna = $seq->seq;
+	$dna    =~ s/(\S{60})/$1\n/g;
+	$dna   .= "\n";
+        
+        my ($fa_out,$fa_outfile);
+        if ($split) {
+	    $fa_outfile = $outfile;
+	    $fa_outfile =~ s/gff$/fa/;
+	    open $fa_out, ">$fa_outfile" or die $!; 
+	    print $fa_out ">$seq_name\n$dna";
+	}
+	else {
+	    print $out ">$seq_name\n$dna";
+	}
+
+	if ( $zip && !$lump ) {
+	    system "gzip -f $outfile";
+	    system "gzip -f $fa_outfile";
+	    $outfile .= '.gz';
+	    $fa_outfile .= '.gz' if $split;
+	}
+
+	print " GFF3 saved to $outfile";
+	print $split ? "; DNA saved to $fa_outfile\n" : "\n";
+	
+        if ( $summary ) {
+	    print "Summary:\nFeature\tCount\n-------\t-----\n";
+	
+	    for ( keys %method ) {
+		print "$_  $method{$_}\n";
+	    }
+	    print "\n";
+	}       
+    
+    }
+
+    if ( $zip && $lump ) {
+	system "gzip -f $lump";
+    }
+    
+    close FH;
+}
+
+sub gene_features {
+    my $f = shift;
+    local $_ = $f->primary_tag;
+    $method{$_}++;
+    
+    if ( /gene/ ) {
+	#($gene_id)  = $f->get_tag_values('gene');
+	#$gene_id    = 'gene:' . $gene_id;
+	$f->add_tag_value( ID => $gene_id );
+	$tnum   = 0;
+    }
+    elsif ( /mRNA/ ) {
+        return 0 unless $gene_id;
+	$rna_id    = $gene_id;
+	$rna_id    =~ s/gene/mRNA/;
+	$rna_id   .= '.t0' . ++$tnum;
+	$f->add_tag_value( ID => $rna_id );
+	$f->add_tag_value( Parent => $gene_id );
+    }
+    elsif ( /exon/ || /CDS/ ) {
+	return 0 unless $rna_id;
+	$f->add_tag_value( Parent => $rna_id );
+    }
+    else {
+	return 0 unless $gene_id;
+	$f->add_tag_value( Parent => $gene_id );
+    }
+    
+    # now we can skip cloned exons
+    return 0 if /exon/ && ++$seen{$f} > 1;
+
+    return 1;
+}
+
+sub generic_features {
+    my ($f, $io, $refseq) = @_;
+    my $method = $f->primary_tag;
+    $method{$method}++;
+
+    if ( $f->has_tag($method) ) {
+	my ($fname) = $f->get_tag_values($method);
+	$f->add_tag_value( ID => "$method:$fname" )
+	    unless $f->has_tag('ID');
+    }
+    else {
+	$idh->generate_unique_persistent_id($f);
+    }
+
+    $io->gff_string($f);
+}
+
+sub gff_header {
+    my ($name, $end) = @_;
+    
+    return <<END;
+##gff-version 3
+##sequence-region $name 1 $end
+##source bp_genbank2gff3.pl
+$name\tGenBank\tregion\t1\t$end\t.\t.\t.\tID=$name
+END
+}
+
+sub unflatten_seq {
+    my $seq = shift;
+
+    print "working on contig ", $seq->accession, "..."; 
+    my $uh_oh = "Possible gene unflattening error with" .  $seq->accession .
+                ": consult STDERR\n";
+    
+    eval {
+	$unflattener->unflatten_seq( -seq => $seq, 
+				     -use_magic => 1 );
+    };
+    
+    # deal with unflattening errors
+    if ( $@ ) {
+	warn $seq->accession . " Unflattening error:\n";
+	warn "Details: $@\n";
+	print $uh_oh;
+    }
+
+    return 0 if !$seq || !$seq->all_SeqFeatures;
+
+    # map feature types to the sequence ontology
+    $tm->map_types_to_SO( -seq => $seq );
+
+    1;
+}
+
+sub filter {
+    my $seq = shift;
+    return unless $filter;
+    my @feats;
+
+    for my $f ( $seq->remove_SeqFeatures ) {
+	my $m = $f->primary_tag;
+	push @feats, $f unless $filter =~ /$m/i;
+    }
+
+    $seq->add_SeqFeature(@feats) if @feats;
+}
+
+
+# The default behaviour of Bio::FeatureHolderI:get_all_SeqFeatures
+# changed to filter out cloned features.  We have to implement the old
+# method.  These two subroutines were adapted from the v1.4 Bio::FeatureHolderI
+sub get_all_SeqFeatures  {
+    my $seq = shift;
+    my @flatarr;
+
+    foreach my $feat ( $seq->get_SeqFeatures ){
+        push(@flatarr,$feat);
+        _add_flattened_SeqFeatures(\@flatarr,$feat);
+    }
+    return @flatarr;
+}
+
+sub gene_name {
+    my $g = shift;
+
+    if ($g->has_tag('gene')) {
+	($gene_id) = $g->get_tag_values('gene'); 
+    }
+    elsif ($g->has_tag('locus_tag')) {
+	($gene_id) = $g->get_tag_values('locus_tag');
+    }
+
+    $gene_id;
+}
+
+sub _add_flattened_SeqFeatures  {
+    my ($arrayref,$feat) = @_;
+    my @subs = ();
+
+    if ($feat->isa("Bio::FeatureHolderI")) {
+	@subs = $feat->get_SeqFeatures;
+    } 
+    elsif ($feat->isa("Bio::SeqFeatureI")) {
+	@subs = $feat->sub_SeqFeature;
+    }
+    else {
+	warn ref($feat)." is neither a FeatureHolderI nor a SeqFeatureI. ".
+	    "Don't know how to flatten.";
+    }
+
+    for my $sub (@subs) {
+	push(@$arrayref,$sub);
+	_add_flattened_SeqFeatures($arrayref,$sub);
+    }
+
+}
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/genbank2gff3.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/generate_histogram.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/generate_histogram.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/generate_histogram.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,137 @@
+#!/usr/bin/perl
+# $Id: generate_histogram.PLS,v 1.1 2003/02/24 17:44:04 bosborne Exp $
+
+use strict;
+use lib '.','./blib','../../blib/lib';
+use Bio::DB::GFF;
+use Getopt::Long;
+
+my $usage = <<USAGE;
+Usage: $0 [options] feature_type1 feature_type2...
+
+Dump out a GFF-formatted histogram of the density of the indicated set
+of feature types.
+
+ Options:
+   --dsn        <dsn>       Data source (default dbi:mysql:test)
+   --adaptor    <adaptor>   Schema adaptor (default dbi::mysqlopt)
+   --user       <user>      Username for mysql authentication
+   --pass       <password>  Password for mysql authentication
+   --bin        <bp>        Bin size in base pairs.
+   --aggregator <list>      Comma-separated list of aggregators
+   --sort                   Sort the resulting list by type and bin
+   --merge                  Merge features with same method but different sources
+USAGE
+;
+
+my ($DSN,$ADAPTOR,$AGG,$USER,$PASSWORD,$BINSIZE,$SORT,$MERGE);
+GetOptions ('dsn:s'         => \$DSN,
+	    'adaptor:s'     => \$ADAPTOR,
+	    'user:s'        => \$USER,
+	    'password:s'    => \$PASSWORD,
+            'aggregators:s' => \$AGG,
+            'bin:i'         => \$BINSIZE,
+	    'sort'          => \$SORT,
+	    'merge'         => \$MERGE,
+	   ) or die $usage;
+
+my @types = @ARGV or die $usage;
+
+# some local defaults
+$DSN     ||= 'dbi:mysql:test';
+$ADAPTOR ||= 'dbi::mysqlopt';
+$BINSIZE ||= 1_000_000;   # 1 megabase bins
+
+my @options;
+push @options,(-user=>$USER)     if defined $USER;
+push @options,(-pass=>$PASSWORD) if defined $PASSWORD;
+push @options,(-aggregator=>[split /\s+/,$AGG]) if defined $AGG;
+
+my $db = Bio::DB::GFF->new(-adaptor=>$ADAPTOR,-dsn => $DSN, at options)
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+my @features = $db->features(-binsize=>$BINSIZE,-types=>\@types);
+
+if ($MERGE) {
+  my %MERGE;
+  for my $f (@features) {
+    my $name  = $f->name;
+    my $class = $name->class;
+    $name =~ s/^(.+:.+):.+$/$1/;
+    $f->group(Bio::DB::GFF::Featname->new($class,$name));
+    my $source = $f->source;
+    $source =~ s/:.+$//;
+    $f->source($source);
+    if (my $already_there = $MERGE{$f->source,$f->abs_ref,$f->abs_start}) {
+      $already_there->score($already_there->score + $f->score);
+    } else {
+      $MERGE{$f->source,$f->abs_ref,$f->abs_start} = $f;
+    }
+  }
+  @features = values %MERGE;
+}
+
+# sort features by type, ref and start if requested
+if ($SORT) {
+  @features = sort {
+    $a->type cmp $b->type
+      || $a->abs_ref cmp $b->abs_ref
+	|| $a->start <=> $b->start
+      }
+    @features;
+}
+
+for my $f (@features) {
+  print $f->gff_string,"\n";
+}
+
+
+__END__
+
+=head1 NAME
+
+generate_histogram.pl -- Generate a histogram of Bio::DB::GFF features
+
+=head1 SYNOPSIS
+
+  generate_histogram.pl -d gadfly variation gene:curated
+
+=head1 DESCRIPTION
+
+Use this utility to generate feature density histograms from
+Bio::DB::GFF databases.  The result is a GFF data file that is
+suitable for loading with load_gff.pl.
+
+=head2 OPTIONS
+
+The following options are recognized:
+
+  Option        Description
+  ------        -----------
+
+   --dsn        <dsn>       Data source (default dbi:mysql:test)
+   --adaptor    <adaptor>   Schema adaptor (default dbi::mysqlopt)
+   --user       <user>      Username for mysql authentication
+   --pass       <password>  Password for mysql authentication
+   --aggregator <list>      Comma-separated list of aggregators
+
+=head1 BUGS
+
+Please report them.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2001 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/generate_histogram.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/load_gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/load_gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/load_gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+#!/usr/bin/perl
+
+use strict;
+use lib '../blib/lib';
+use Bio::DB::GFF;
+use Getopt::Long;
+
+=head1 NAME
+
+bp_load_gff.pl - Load a Bio::DB::GFF database from GFF files.
+
+=head1 SYNOPSIS
+
+  % bp_load_gff.pl -d testdb dna1.fa dna2.fa features1.gff features2.gff ...
+
+=head1 DESCRIPTION
+
+This script loads a Bio::DB::GFF database with the features contained
+in a list of GFF files and/or FASTA sequence files.  You must use the
+exact variant of GFF described in L<Bio::DB::GFF>.  Various
+command-line options allow you to control which database to load and
+whether to allow an existing database to be overwritten.
+
+This script uses the Bio::DB::GFF interface, and so works with all
+database adaptors currently supported by that module (MySQL, Oracle,
+PostgreSQL soon).  However, it is slow.  For faster loading, see the
+MySQL-specific L<bp_bulk_load_gff.pl> and L<bp_fast_load_gff.pl> scripts.
+
+=head2 NOTES
+
+If the filename is given as "-" then the input is taken from standard
+input. Compressed files (.gz, .Z, .bz2) are automatically
+uncompressed.
+
+FASTA format files are distinguished from GFF files by their filename
+extensions.  Files ending in .fa, .fasta, .fast, .seq, .dna and their
+uppercase variants are treated as FASTA files.  Everything else is
+treated as a GFF file.  If you wish to load -fasta files from STDIN,
+then use the -f command-line swith with an argument of '-', as in 
+
+    gunzip my_data.fa.gz | bp_fast_load_gff.pl -d test -f -
+
+On the first load of a database, you will see a number of "unknown
+table" errors.  This is normal.
+
+About maxfeature: the default value is 100,000,000 bases.  If you have
+features that are close to or greater that 100Mb in length, then the
+value of maxfeature should be increased to 1,000,000,000, or another
+power of 10.
+
+=head1 COMMAND-LINE OPTIONS
+
+Command-line options can be abbreviated to single-letter options.
+e.g. -d instead of --database.
+
+   --dsn     <dsn>       Data source (default dbi:mysql:test)
+   --adaptor <adaptor>   Schema adaptor (default dbi::mysqlopt)
+   --user    <user>      Username for mysql authentication
+   --pass    <password>  Password for mysql authentication
+   --fasta   <path>      Fasta file or directory containing fasta files for the DNA
+   --create              Force creation and initialization of database
+   --maxfeature          Set the value of the maximum feature size (default 100 Mb; must be a power of 10)
+   --group               A list of one or more tag names (comma or space separated)
+                          to be used for grouping in the 9th column.
+   --upgrade             Upgrade existing database to current schema
+   --gff3_munge          Activate GFF3 name munging (see Bio::DB::GFF)
+   --quiet               No progress reports
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+my ($DSN,$ADAPTOR,$CREATE,$USER,$PASSWORD,$FASTA,$UPGRADE,$MAX_BIN,$GROUP_TAG,$MUNGE,$QUIET);
+
+GetOptions ('dsn:s'       => \$DSN,
+	    'adaptor:s'   => \$ADAPTOR,
+	    'u|user:s'      => \$USER,
+	    'p|password:s'  => \$PASSWORD,
+            'fasta:s'     => \$FASTA,
+            'upgrade'     => \$UPGRADE,
+            'maxbin|maxfeature:s'    => \$MAX_BIN,
+	    'group:s'     => \$GROUP_TAG,
+	    'gff3_munge'    => \$MUNGE,
+	    'quiet'       => \$QUIET,
+	    create        => \$CREATE) or (system('pod2text',$0), exit -1);
+
+# some local defaults
+$DSN     ||= 'dbi:mysql:test';
+$ADAPTOR ||= 'dbi::mysqlopt';
+
+my @args;
+push @args,(-user=>$USER)     if defined $USER;
+push @args,(-pass=>$PASSWORD) if defined $PASSWORD;
+push @args,(-preferred_groups=>[split(/[,\s+]+/,$GROUP_TAG)]) if defined $GROUP_TAG;
+push @args,(-create=>1)       if $CREATE;
+push @args,(-write=>1);
+
+my $db = Bio::DB::GFF->new(-adaptor=>$ADAPTOR,-dsn => $DSN, at args)
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+$db->gff3_name_munging(1) if $MUNGE;
+
+if ($CREATE) {
+  $MAX_BIN ? $db->initialize(-erase=>1,-MAX_BIN=>$MAX_BIN) :
+             $db->initialize(1);
+} elsif ($UPGRADE) {
+  warn qq(expect to see several "table already exists" messages\n);
+  $db->initialize(0);
+  my $dbi = $db->dbh;  # get the raw database handle
+  my ($count) = $dbi->selectrow_array('SELECT COUNT(*) FROM fnote');
+  if (defined($count) && $count > 0) {
+    warn qq(fnote table detected.  Translating into fattribute table.  This may take a while.\n);
+    $dbi->do("INSERT INTO fattribute VALUES (1,'Note')") or die "failed: ",$dbi->errstr;
+    $dbi->do("INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) SELECT fnote.fid,1,fnote FROM fnote") or die "failed: ",$dbi->errstr;
+    warn qq(Schema successfully upgraded.  You might want to drop the fnote table when you're sure everything's working.\n);
+  }
+}
+
+my (@gff, at fasta);
+foreach (@ARGV) {
+  if (/\.(fa|fasta|dna|seq|fast)$/i) {
+    push @fasta,$_;
+  } else {
+    push @gff,$_;
+  }
+}
+
+for my $file (@gff) {
+  warn "$file: loading...\n";
+  my $loaded = $db->load_gff($file,!$QUIET);
+  warn "$file: $loaded records loaded\n";
+}
+
+unshift @fasta,$FASTA if defined $FASTA;
+
+for my $file (@fasta) {
+  warn "Loading fasta ",(-d $file?"directory":"file"), " $file\n";
+  my $loaded = $db->load_fasta($file,!$QUIET);
+  warn "$file: $loaded records loaded\n";
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/load_gff.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/meta_gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/meta_gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/meta_gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+
+use strict;
+use DBI;
+use Getopt::Long;
+use Bio::DB::GFF;
+
+=head1 NAME
+
+bp_meta_gff.pl - Get/set Bio::DB::GFF meta-data
+
+=head1 SYNOPSIS
+
+  # set the following meta data values
+  % bp_meta_gff.pl -d testdb tag1=value1 tag2=value2
+
+  # get the indicated meta data value
+  % bp_meta_gff.pl -d testdb tag1 tag2
+
+=head1 DESCRIPTION
+
+This script gets or sets metadata in a Bio::DB::GFF database.  Not all
+adaptors support this operation!  To set a series of tags, pass a set
+of tag=value pairs to the script.  To get the contents of a series of
+tags, pass the bare tag names.
+
+The output from the get operation will be an easily parseable set of
+tag=value pairs, one per line.
+
+=head1 COMMAND-LINE OPTIONS
+
+Command-line options can be abbreviated to single-letter options.
+e.g. -d instead of --database.
+
+   --database <dsn>      Mysql database name (default dbi:mysql:test)
+   --adaptor <adaptor>   Mysql adaptor (default dbi::mysqlopt)
+   --user    <user>      Username for mysql authentication
+   --pass    <password>  Password for mysql authentication
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+my ($DSN,$ADAPTOR,$USER,$PASSWORD);
+
+GetOptions ('database:s'    => \$DSN,
+	    'adaptor:s'     => \$ADAPTOR,
+	    'user:s'      => \$USER,
+	    'password:s'  => \$PASSWORD,
+	   ) or (system('pod2text', $0), exit -1);
+
+$DSN     ||= 'dbi:mysql:test';
+$ADAPTOR ||= 'dbi::mysqlopt';
+
+my @args;
+push @args,(-user=>$USER)     if defined $USER;
+push @args,(-pass=>$PASSWORD) if defined $PASSWORD;
+
+my $db = Bio::DB::GFF->new(-adaptor=>$ADAPTOR,-dsn => $DSN, at args)
+  or die "Can't open database: ",Bio::DB::GFF->error,"\n";
+
+for my $pair (@ARGV) {
+  my ($tag,$value) = split /=/,$pair;
+  if ($value) {  # set operation
+    $db->meta($tag,$value);
+    unless ($db->meta($tag) eq $value) {
+      print STDERR "value for '$tag' not set; perhaps this adaptor does not support meta data?\n";
+    }
+  } else {
+    print "$tag=",$db->meta($tag),"\n";
+  }
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/meta_gff.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_gadfly.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_gadfly.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_gadfly.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,194 @@
+#!/usr/bin/perl
+if ($ARGV[0]=~/^-?-h/ || @ARGV < 1) {
+die <<'USAGE';
+
+This script massages the RELEASE 3 Flybase/Gadfly GFF files located at
+http://www.fruitfly.org/sequence/release3download.shtml into the
+"correct" version of the GFF format.
+
+To use this script, download the whole genome FASTA file and save it
+to disk.  (The downloaded file will be called something like
+"na_whole-genome_genomic_dmel_RELEASE3.FASTA", but the link on the
+HTML page doesn't give the filename.)  Do the same for the whole
+genome GFF annotation file (the saved file will be called something
+like "whole-genome_annotation-feature-region_dmel_RELEASE3.GFF".)  If
+you wish you can download the ZIP compressed versions of these files.
+
+Next run this script on the two files, indicating the name of the
+downloaded FASTA file first, followed by the gff file:
+
+ % process_gadfly.pl na_whole-genome_genomic_dmel_RELEASE3.FASTA whole-genome_annotation-feature-region_dmel_RELEASE3.GFF > fly.gff
+
+The gadfly.gff file and the fasta file can now be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d fly -fasta na_whole-genome_genomic_dmel_RELEASE3.FASTA fly.gff 
+
+(Where "fly" is the name of the database.  Change it as appropriate.
+The database must already exist and be writable by you!)
+
+The resulting database will have the following feature types
+(represented as "method:source"):
+
+  Component:arm              A chromosome arm
+  Component:scaffold	     A chromosome scaffold (accession #)
+  Component:gap	             A gap in the assembly
+  clone:clonelocator         A BAC clone
+  gene:gadfly                A gene accession number
+  transcript:gadfly          A transcript accession number
+  translation:gadfly         A translation
+  codon:gadfly               Significance unknown
+  exon:gadfly                An exon
+  symbol:gadfly              A classical gene symbol
+  similarity:blastn          A BLASTN hit
+  similarity:blastx          A BLASTX hit
+  similarity:sim4            EST->genome using SIM4
+  similarity:groupest        EST->genome using GROUPEST
+  similarity:repeatmasker    A repeat
+
+IMPORTANT NOTE: This script will *only* work with the RELEASE3 gadfly
+files and will not work with earlier releases.
+
+USAGE
+;
+}
+
+use strict;
+
+foreach (@ARGV) {
+  $_ = "gunzip -c $_ |" if /\.gz$/;
+}
+
+if ($ARGV[0] =~ /fasta/i) {
+  process_fasta();
+} else {
+  die "call as process_gadfly.pl \"release3_dna.FASTA\" \"release3_features.GFF\"";
+}
+
+while (<>) {
+  next if /^\#/;
+  chomp;
+  my ($ref,$csource,$cmethod,$start,$stop,$cscore,$strand,$cphase,$cgroup) = split "\t";
+  next if $start > $stop;  # something wrong. Don't bother fixing it.
+
+  my $fixed_group = fix_group($csource,$cmethod,$cgroup);
+  print join("\t",$ref,$csource,$cmethod,$start,$stop,$cscore,$strand,$cphase,$fixed_group),"\n";
+  dump_symbol($ref,$csource,$cmethod,$start,$stop,$cscore,$strand,$cphase,$cgroup) if $cgroup =~ /symbol/i;
+}
+
+sub fix_group {
+  my ($source,$method,$group) = @_;
+  my (@group,$gene);
+  push @group,"Transcript $1" if $group =~ /transgrp=([^; ]+)/;
+  push @group,"Gene $1"       if $method eq 'gene' && $group =~ /genegrp=([^; ]+)/;
+
+  $gene ||= qq(Note "FlyBase $1")  if $group =~ /dbxref=FlyBase:(\w+)/;
+  $gene ||= qq(Note "GadFly $1")   if $group =~ /genegrp=([^; ]+)/;
+  push @group,qq(Note "Symbol $1") if $group =~ /symbol=([^; ]+)/ && "Gene $1" ne $group[0];
+  push @group,$gene;
+  return join ' ; ', at group;
+}
+
+# called when we encounter a gene symbol
+sub dump_symbol {
+  my ($ref,$csource,$cmethod,$start,$stop,$cscore,$strand,$cphase,$cgroup) = @_;
+  my ($symbol) = $cgroup=~/symbol=([^;]+)/;
+  my ($gene)   = $cgroup=~/genegrp=([^;]+)/;
+  return if $symbol eq $gene;
+  $cmethod = 'symbol';
+  print join("\t",$ref,$csource,$cmethod,$start,$stop,$cscore,$strand,$cphase,qq(Symbol "$symbol")),"\n";
+}
+
+sub process_fasta {
+  my $file = shift @ARGV;
+  open F,$file or die "Can't open $file: $!";
+  print STDERR "Reading big FASTA file, please be patient...\n";
+  my ($current_id,%lengths);
+  while (<F>) {
+    if (/^>(\S+)/) {
+      $current_id = $1;
+      next;
+    }
+    die "this doesn't look like a fasta file to me" unless $current_id;
+    chomp;
+    $lengths{$current_id} += length;
+  }
+  foreach (sort keys %lengths) {
+    print join("\t",$_,'arm','Component',1,$lengths{$_},'.','+','.',qq(Sequence "$_")),"\n";
+  }
+}
+
+__END__
+
+=head1 NAME
+
+process_gadfly.pl - Massage Gadfly/FlyBase GFF files into a version suitable for the Generic Genome Browser
+
+=head1 SYNOPSIS
+
+  % process_gadfly.pl ./RELEASE2 > gadfly.gff
+
+=head1 DESCRIPTION
+
+This script massages the RELEASE 3 Flybase/Gadfly GFF files located at
+http://www.fruitfly.org/sequence/release3download.shtml into the "correct"
+version of the GFF format.
+
+To use this script, download the whole genome FASTA file and save it
+to disk.  (The downloaded file will be called something like
+"na_whole-genome_genomic_dmel_RELEASE3.FASTA", but the link on the
+HTML page doesn't give the filename.)  Do the same for the whole
+genome GFF annotation file (the saved file will be called something
+like "whole-genome_annotation-feature-region_dmel_RELEASE3.GFF".)  If
+you wish you can download the ZIP compressed versions of these files.
+
+Next run this script on the two files, indicating the name of the
+downloaded FASTA file first, followed by the gff file:
+
+ % process_gadfly.pl na_whole-genome_genomic_dmel_RELEASE3.FASTA whole-genome_annotation-feature-region_dmel_RELEASE3.GFF > fly.gff
+
+The gadfly.gff file and the fasta file can now be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d fly -fasta na_whole-genome_genomic_dmel_RELEASE3.FASTA fly.gff 
+
+(Where "fly" is the name of the database.  Change it as appropriate.
+The database must already exist and be writable by you!)
+
+The resulting database will have the following feature types
+(represented as "method:source"):
+
+  Component:arm              A chromosome arm
+  Component:scaffold	     A chromosome scaffold (accession #)
+  Component:gap	             A gap in the assembly
+  clone:clonelocator         A BAC clone
+  gene:gadfly                A gene accession number
+  transcript:gadfly          A transcript accession number
+  translation:gadfly         A translation
+  codon:gadfly               Significance unknown
+  exon:gadfly                An exon
+  symbol:gadfly              A classical gene symbol
+  similarity:blastn          A BLASTN hit
+  similarity:blastx          A BLASTX hit
+  similarity:sim4            EST->genome using SIM4
+  similarity:groupest        EST->genome using GROUPEST
+  similarity:repeatmasker    A repeat
+
+IMPORTANT NOTE: This script will *only* work with the RELEASE3 gadfly
+files and will not work with earlier releases.
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_gadfly.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_sgd.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_sgd.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_sgd.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+#!/usr/bin/perl -w
+
+# $Id: process_sgd.PLS,v 1.2.8.1 2006/10/02 23:10:39 sendu Exp $
+# This script will convert from SGD format to GFF format
+# See http://db.yeastgenome.org/schema/Schema.html
+
+use strict;
+
+# hard-coded length data that I couldn't get directly
+my %CHROMOSOMES = (I => 230_203,
+		   II => 813_139,
+		   III => 316_613,
+		   IV  => 1_531_929,
+		   V   => 576_869,
+		   VI => 270_148,
+		   VII => 1_090_937,
+		   VIII => 562_639,
+		   IX => 439_885,
+		   X => 745_444,
+		   XI => 666_445,
+		   XII => 1_078_173,
+		   XIII => 924_430,
+		   XIV => 784_328,
+		   XV  => 1_091_284,
+		   XVI => 948_061,
+		   Mit => 85_779);
+my @ROMAN = qw(I II III IV V VI VII VIII IX X
+	       XI XII XIII XIV XV XVI Mit);
+
+if ($ARGV[0] =~ /^--?h/) {
+  die <<USAGE;
+ Usage: $0 <SGD features file>
+
+This script massages the SGD sequence annotation flat files located at
+ftp://genome-ftp.stanford.edu/pub/yeast/data_dump/feature/chromosomal_features.tab
+into a version of the GFF format suitable for display by the generic
+genome browser.
+
+To use this script, get the SGD chromosomal_features.tab file from the
+FTP site listed above, and run the following command:
+
+  % process_sgd.pl chromosomal_features.tab > yeast.gff
+
+The yeast.gff file can then be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d <databasename> yeast.gff
+
+USAGE
+;
+}
+
+# first print out chromosomes
+# We hard coded the lengths because they are not available in the features table.
+for my $chrom (sort keys %CHROMOSOMES) {
+  print join("\t",$chrom,'chromosome','Component',1,$CHROMOSOMES{$chrom},'.','.','.',qq(Sequence "$chrom")),"\n";
+}
+
+# this is hard because the SGD idea of a feature doesn't really map onto the GFF idea.
+while (<>) {
+  chomp;
+  my($id,$gene,$aliases,$type,$chromosome,$start,$stop,$strand,$sgdid,$sgdid2,$description,$date) = split "\t";
+  my $ref = $ROMAN[$chromosome-1];
+  $description =~ s/"/\\"/g;
+  $description =~ s/;/\\;/g;
+
+  $strand = $strand eq 'W' ? '+' : '-';
+  ($start,$stop) = ($stop,$start) if $strand eq '-';
+  die "Strand logic is messed up" if $stop < $start;
+
+  if ($gene) {
+     my @aliases = split(/\|/,$aliases);
+     my $aliases = join " ; ",map {qq(Alias "$_")} @aliases;
+     my $group = qq(Gene "$gene" ; Note "$description");
+     $group .= " ; $aliases" if $aliases;
+     print join("\t",$ref,'sgd','gene',$start,$stop,'.',$strand,'.',$group),"\n";
+     $description .= "\\; AKA @aliases" if @aliases;
+  }
+
+  print join("\t",$ref,'sgd',$type,$start,$stop,'.',$strand,'.',qq($type "$id" ; Note "$description")),"\n";
+}
+
+__END__
+
+=head1 NAME
+
+process_sgd.pl - Massage SGD annotation flat files into a version suitable for the Generic Genome Browser
+
+=head1 SYNOPSIS
+
+  % process_sgd.pl chromosomal_features.tab > yeast.gff
+
+=head1 DESCRIPTION
+
+This script massages the SGD sequence annotation flat files located at
+ftp://genome-ftp.stanford.edu/pub/yeast/data_dump/feature/chromosomal_features.tab
+into a version of the GFF format suitable for display by the generic
+genome browser.
+
+To use this script, get the SGD chromosomal_features.tab file from the
+FTP site listed above, and run the following command:
+
+  % process_sgd.pl chromosomal_features.tab > yeast.gff
+
+The yeast.gff file can then be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d <databasename> yeast.gff
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_sgd.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_wormbase.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_wormbase.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_wormbase.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+#!/usr/bin/perl
+
+use constant ACEDB => 'sace://aceserver.cshl.org:2005'; 
+use strict;
+use Ace;
+
+my @framework = qw(mex-3 spe-15 lin-17 unc-11 dhc-1 unc-40 smg-5
+		   unc-13 unc-29 eat-16 lin-11 spe-9 par-6 unc-59 unc-54 mab-9 lin-42
+		   sri-71 smu-2 vab-1 bli-2 dpy-10 him-14 mig-5 unc-4 bli-1 sqt-1 rol-1
+		   his-14 unc-52 unc-45 par-2 let-805 sel-8 mab-21 daf-4 sma-3 lin-39
+		   unc-32 tax-4 ced-9 tra-1 nob-1 daf-1 ced-2 lin-1 unc-17 dpy-13 unc-5
+		   smg-7 dif-1 lin-49 elt-1 daf-14 dpy-20 dpy-26 unc-30 tra-3 sup-24
+		   rho-1 egl-8 unc-60 srh-36 apx-1 unc-62 let-418 dpy-11 let-413 sel-9
+		   unc-42 egl-9 sma-1 sqt-3 odr-3 hda-1 unc-76 gcy-20 skr-5 par-4 unc-51
+		   egl-17 lim-6 fox-1 fax-1 lon-2 unc-97 unc-6 unc-18 mec-10 sop-1 mab-18
+		   sdc-2 odr-7 unc-9 unc-3 gas-1 ace-1);
+my %framework = map {$_=>1} @framework;
+my %framework_seen = ();
+
+my $USAGE = <<USAGE;
+This script massages the Wormbase GFF files located at
+ftp://www.wormbase.org/pub/wormbase/GENE_DUMPS into a version of the
+GFF format suitable for display by the generic genome browser.  It
+mainly adds comments to the annotations and designates certain
+well-spaced genetic loci as framework landmarks.
+
+This script requires the AcePerl distribution, which is available on
+CPAN (look for the "Ace" module).
+
+To use this script, get the WormBase GFF files from the FTP site
+listed above and place them in a directory.  It might be a good idea
+to name the directory after the current release, such as WS61.  You do
+not need to uncompress the files.
+
+Then give that directory as the argument to this script and capture
+the script's output to a file:
+
+  % process_wormbase.pl ./WS61 > wormbase.gff
+
+It may take a while before you see output from this script, since it
+must first fetch gene and protein database from the remote AceDB
+running at www.wormbase.org.
+The wormbase.gff file can then be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d <databasename> wormbase.gff
+USAGE
+;
+#'
+
+die $USAGE if $ARGV[0]=~/^-?-h/i;
+
+my $db = Ace->connect(-url=>ACEDB,
+		      -query_timeout=>500) or die "Can't open ace database:",Ace->error;
+
+if (-d $ARGV[0]) {
+  @ARGV = <$ARGV[0]/*.gff.gz>;
+}
+
+ at ARGV || die $USAGE;
+
+foreach (@ARGV) { # GFF FILES
+  $_ = "gunzip -c $_ |" if /\.gz$/;
+}
+
+my (%NOTES,%LOCUS,%GENBANK,%CONFIRMED,%ORFEOME);
+get_confirmed($db,\%CONFIRMED);
+get_genbank($db,\%GENBANK);
+get_loci($db,\%LOCUS);
+get_notes($db,\%NOTES);
+get_orfeome($db,\%ORFEOME);
+
+while (<>) {
+  chomp;
+  next if /^\#/;
+  my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split /\t/;
+  next if $source eq 'assembly_tag';  # don't want 'em, don't need 'em
+  $ref    =~ s/^CHROMOSOME_//;
+  $group  =~ s/CHROMOSOME_//;
+
+  $source ='' if $source eq '*UNKNOWN*';
+
+  if ($method eq 'Sequence' && ($source eq 'curated' || $source eq 'RNA') && $group =~ /Sequence "(\w+\.\d+[a-z]?)"/) {
+    my @notes;
+    push @notes,map { qq(Note "$_")        } @{$NOTES{$1}}     if $NOTES{$1};
+    push @notes,map { qq(Note "$_")        } @{$LOCUS{$1}}     if $LOCUS{$1};
+    push @notes,qq(Confirmed_by "$CONFIRMED{$1}")              if $CONFIRMED{$1};
+    $group = join ' ; ',$group, at notes;
+    if (my $loci = $LOCUS{$1}) {
+      foreach (@$loci) {
+        print join("\t",$ref,$source,'gene',$start,$stop,$score,$strand,$phase,"Locus $_"),"\n";
+        print join("\t",$ref,'framework','gene',$start,$stop,$score,$strand,$phase,"Locus $_"),"\n" 
+          if $framework{$_} && !$framework_seen{$_}++;
+      }
+    }
+  }
+
+  if ($method eq 'Sequence' && $source eq 'Genomic_canonical' && $group =~ /Sequence "(\w+)"/) {
+    if (my $accession = $GENBANK{$1}) {
+      $group .= qq( ; Note "Genbank $accession");
+      print join("\t",$ref,'Genbank',$method,$start,$stop,$score,$strand,$phase,"Genbank \"$accession\""),"\n";
+    }
+  }
+
+  if ($method eq 'reagent' && $source eq 'Orfeome_project' && $group =~ /PCR_product "([^\"]+)"/) {
+    my $amp = $ORFEOME{$1};
+    $group .= qq( ; Amplified $amp) if defined $amp;
+  }
+
+  # fix variant fields: Variant "T" => Note "T"
+  $group =~ s/(?:Variant|Insert) "(\w+)"/Note "$1"/;
+
+  # fix UTR fields
+  if ($group =~ /UTR "([35])_UTR:(\S+)"/) {
+    $method = 'UTR';
+    $source = "$1_UTR";
+    $group = qq(Sequence "$2");
+  }
+
+  print join("\t",$ref,$source,$method,$start,$stop,$score,$strand,$phase,$group),"\n";
+}
+
+sub get_loci {
+  my ($db,$hash) = @_;  # hash keys are predicted gene names, values are one or more loci names
+  my @genes = $db->fetch(-query=>'find Locus Genomic_sequence',-filltag=>'Genomic_sequence');
+  foreach my $obj (@genes) {
+    my @genomic = $obj->Genomic_sequence or next;
+    foreach (@genomic) {
+      push @{$hash->{$_}},$obj;
+    }
+  }
+}
+
+sub get_notes {
+  my ($db,$hash) = @_;  # hash keys are predicted gene names, values are one or more brief identifications
+  my @genes = $db->fetch(-query=>'find Sequence Brief_identification',-filltag=>'Brief_identification');
+  foreach my $obj (@genes) {
+    my @notes = $obj->Brief_identification or next;
+    $hash->{$obj} = \@notes;
+  }
+}
+
+sub get_genbank {
+  my ($db,$hash) = @_;   # hash keys are cosmid names, values are genbank accessions (1 to 1)
+  my @cosmids = $db->fetch(-query=>'find Genome_Sequence Database',-filltag=>'Database');
+  for my $cosmid (@cosmids) {
+    my ($database,undef,$accession) = $cosmid->Database(1)->row;
+    next unless $accession;
+    $hash->{$cosmid} = $accession;
+  }
+}
+
+sub get_confirmed {
+  my ($db,$hash) = @_;  # hash keys are predicted gene names, values are confirmation type
+  my @confirmed = $db->fetch(-query=>'find Sequence Confirmed_by',-filltag=>'Confirmed_by');
+  foreach my $obj (@confirmed) {
+    my $confirmed_by = $obj->Confirmed_by || 'Unknown';
+    $hash->{$obj} = $confirmed_by;
+  }
+}
+
+sub get_orfeome {
+  my ($db,$hash) = @_;
+  my @mv_primers = $db->fetch(-query=>'find PCR_Product mv*',-filltag=>'Amplified');
+  for my $obj (@mv_primers) {
+    my $amplified = $obj->Amplified;
+    $hash->{$obj} = $amplified;
+  }
+}
+
+__END__
+
+=head1 NAME
+
+process_wormbase.pl - Massage WormBase GFF files into a version suitable for the Generic Genome Browser
+
+=head1 SYNOPSIS
+
+  % process_wormbase.pl ./WS61 > wormbase.gff
+
+=head1 DESCRIPTION
+
+This script massages the Wormbase GFF files located at
+ftp://www.wormbase.org/pub/wormbase/GENE_DUMPS into a version of the
+GFF format suitable for display by the generic genome browser.  It
+mainly adds comments to the annotations and designates certain
+well-spaced genetic loci as framework landmarks.
+
+This script requires the AcePerl distribution, which is available on
+CPAN (look for the "Ace" module).
+
+To use this script, get the WormBase GFF files from the FTP site
+listed above and place them in a directory.  It might be a good idea
+to name the directory after the current release, such as WS61.  You do
+not need to uncompress the files.
+
+Then give that directory as the argument to this script and capture
+the script's output to a file:
+
+  % process_wormbase.pl ./WS61 > wormbase.gff
+
+It may take a while before you see output from this script, since it
+must first fetch gene and protein database from the remote AceDB
+running at www.wormbase.org.
+The wormbase.gff file can then be loaded into a Bio::DB::GFF database
+using the following command:
+
+  % bulk_load_gff.pl -d <databasename> wormbase.gff
+
+=head1 SEE ALSO
+
+L<Bio::DB::GFF>, L<bulk_load_gff.pl>, L<load_gff.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein E<lt>lstein at cshl.orgE<gt>
+
+Copyright (c) 2002 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-DB-GFF/process_wormbase.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,81 @@
+#!/usr/bin/perl
+# $Id: bp_seqfeature_load.PLS,v 1.7 2006/07/05 15:53:50 lstein Exp $
+
+use strict;
+
+use lib '/home/lstein/projects/bioperl-live';
+
+use Getopt::Long;
+use File::Spec;
+use Bio::DB::SeqFeature::Store::GFF3Loader;
+use Bio::DB::SeqFeature::Store;
+
+my $DSN         = 'dbi:mysql:test';
+my $SFCLASS     = 'Bio::DB::SeqFeature';
+my $ADAPTOR     = 'DBI::mysql';
+my $VERBOSE  = 1;
+my $FAST     = 0;
+my $TMP      = File::Spec->tmpdir();
+my $CREATE   = 0;
+my $USER     = '';
+my $PASS     = '';
+
+GetOptions(
+	   'dsn=s'       => \$DSN,
+	   'seqfeature=s'  => \$SFCLASS,
+	   'adaptor=s'   => \$ADAPTOR,
+	   'verbose!'    => \$VERBOSE,
+	   'fast'       => \$FAST,
+	   'T|temporary-directory' => \$TMP,
+	   'create'      => \$CREATE,
+	   'user=s'      => \$USER,
+	   'password=s'  => \$PASS
+	   ) || die <<END;
+Usage: $0 [options] gff_file1 gff_file2...
+  Options:
+          -d --dsn        The database name ($DSN)
+          -s --seqfeature The type of SeqFeature to create ($SFCLASS)
+          -a --adaptor    The storage adaptor to use ($ADAPTOR)
+          -v --verbose    Turn on verbose progress reporting
+             --noverbose  Turn off verbose progress reporting
+          -f --fast       Activate fast loading (only some adaptors)
+          -T --temporary-directory  Specify temporary directory for fast loading ($TMP)
+          -c --create     Create the database and reinitialize it (will erase contents)
+          -u --user       User to connect to database as
+          -p --password   Password to use to connect to database
+END
+
+if ($FAST) {
+  -d $TMP && -w $TMP
+    or die "Fast loading is requested, but I cannot write into the directory $TMP";
+}
+
+my @options;
+ at options = ($USER,$PASS) if $USER || $PASS;
+
+my $store = Bio::DB::SeqFeature::Store->new(
+					    -dsn     => $DSN,
+					    -adaptor => $ADAPTOR,
+					    -tmpdir  => $TMP,
+					    -user    => $USER,
+					    -pass    => $PASS,
+					    -write    => 1,
+					    -create   => $CREATE)
+  or die "Couldn't create connection to the database";
+
+$store->init_database('erase') if $CREATE;
+
+my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $store,
+							 -sf_class => $SFCLASS,
+							 -verbose  => $VERBOSE,
+							 -tmpdir   => $TMP,
+							 -fast     => $FAST)
+  or die "Couldn't create GFF3 loader";
+
+# on signals, give objects a chance to call their DESTROY methods
+$SIG{TERM} = $SIG{INT} = sub {  undef $loader; undef $store; die "Aborted..."; };
+
+$loader->load(@ARGV);
+
+exit 0;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/Bio-SeqFeature-Store/bp_seqfeature_load.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/DB/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/DB/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/DB/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3 @@
+These are scripts to fetch sequence data from local and remote
+sequence repositories using the Open Bio Database Access registry
+protocol (http://obda.open-bio.org).

Added: trunk/packages/bioperl/branches/upstream/current/scripts/DB/biofetch_genbank_proxy.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/DB/biofetch_genbank_proxy.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/DB/biofetch_genbank_proxy.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,312 @@
+#!perl -w
+# $Id: biofetch_genbank_proxy.PLS,v 1.4.6.1 2006/11/17 09:32:42 sendu Exp $
+
+# dbfetch style caching proxy for GenBank
+use strict;
+use CGI qw(:standard);
+use HTTP::Request::Common;
+use LWP::UserAgent;
+use Cache::FileCache;
+
+use vars qw(%GOT $BUFFER %MAPPING $CACHE);
+
+use constant CACHE_LOCATION => '/usr/tmp/dbfetch_cache';
+use constant MAX_SIZE   => 100_000_000;  # 100 megs, roughly
+use constant CACHE_DEPTH => 4;
+use constant EXPIRATION => "1 week";
+use constant PURGE      => "1 hour";
+
+%MAPPING = (genbank => {db=>'nucleotide',
+			rettype => 'gb'},
+	    genpep  => {db=>'protein',
+			rettype => 'gp'});
+# we're doing everything in callbacks, so initialize globals.
+$BUFFER = '';
+%GOT    = ();
+
+print header('text/plain');
+
+param() or print_usage();
+
+my $db     = param('db');
+my $style  = param('style');
+my $format = param('format');
+my $id     = param('id');
+my @ids    = split /\s+/,$id;
+
+$format = 'genbank' if $format eq 'default';  #h'mmmph
+
+$MAPPING{$db}        or error(1=>"Unknown database [$db]");
+$style  eq 'raw'     or error(2=>"Unknown style [$style]");
+$format eq 'genbank' or error(3=>"Format [$format] not known for database [$db]");
+
+$CACHE = Cache::FileCache->new({cache_root          => CACHE_LOCATION,
+				default_expires_in  => EXPIRATION,
+				cache_DEPTH         => CACHE_DEPTH,
+				namespace           => 'dbfetch',
+				auto_purge_interval => PURGE});
+
+# handle cached entries
+foreach (@ids) {
+  if (my $obj = $CACHE->get($_)) {
+    $GOT{$_}++;
+    print $obj,"//\n";
+  }
+}
+
+# handle the remainder
+ at ids = grep {!$GOT{$_}} @ids;
+if (@ids) {
+  my $request = POST('http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
+		     [rettype    => $MAPPING{$db}{rettype},
+		      db         => $MAPPING{$db}{db},
+		      tool       => 'bioperl',
+		      retmode    => 'text',
+		      usehistory => 'n',
+		      id         => join(',', at ids),
+		     ]
+		    );
+
+  my $ua = LWP::UserAgent->new;
+  my $response = $ua->request($request,\&callback);
+
+  if ($response->is_error) {
+    my $status = $response->status_line;
+    error(6 => "HTTP error from GenBank [$status]");
+  }
+}
+
+my @missing_ids = grep {!$GOT{$_}} @ids;
+foreach (@missing_ids) {
+  error(4=>"ID [$_] not found in database [$db]",1);
+}
+
+# my $response = $response->content;
+
+sub process_record {
+  my $record = shift;
+  print "$record//\n";
+  my ($locus)       = $record =~ /^LOCUS\s+(\S+)/m;
+  my ($accession)   = $record =~ /^ACCESSION\s+(\S+)/m;
+  my ($version,$gi) = $record =~ /^VERSION\s+(\S+)\s+GI:(\d+)/m;
+  foreach ($locus,$accession,$version,$gi) {
+    $GOT{$_}++;
+    $CACHE->set($_,$record);
+  }
+}
+
+sub callback {
+  my $data = shift;
+  $BUFFER .= $data;
+  my $index = 0;
+  while (($index = index($BUFFER,"//\n\n",$index))>=0) {
+    my $record = substr($BUFFER,0,$index);
+    $index += length("//\n\n");
+    substr($BUFFER,0,$index) = '';
+    process_record($record);
+  }
+}
+
+
+
+sub print_usage {
+  print <<'END';
+This script is intended to be used non-interactively.
+
+Brief summary of arguments:
+URL
+
+This interface does not specify what happens when biofetch is called
+in interactive context. The implementations can return the entries
+decorated with HTML tags and hypertext links.
+
+A URL for biofetch consists of four sections:
+
+			e.g.
+1. protocol		http://
+2. host			www.ebi.ac.uk
+3. path to program	/cgi-bin/dbfetch
+4. query string		?style=raw;format=embl;db=embl;id=J00231
+
+
+QUERY STRING
+
+The query string options are separated from the base URL (protocol +
+host + path) by a question mark (?) and from each other by a semicolon
+';' (or by ampersand '&'). See CGI GET documents at
+http://www.w3.org/CGI/). The order of options is not critical. It is
+recommended to leave the ID to be the last item.
+
+Input for options should be case insensitive.
+
+
+option: db
+
+  Option  : db
+  Descr   : database name
+  Type    : required
+  Usage   : db=genpep | db=genbank
+  Arg     : string 
+
+Currently this server accepts "genbank" and "genpep"
+
+option: style
+
+  Option  : style
+  Descr   : +/- HTML tags
+  Type    : required
+  Usage   : style=raw | db=html
+  Arg     : enum (raw|html)
+
+In non-interactive context, always give "style=raw". This uses
+"Content-Type: text/plain". If other content types are needed (XML),
+this part of the spesifications can be extended to accommodate them.
+
+This server only accepts "raw".
+
+
+option: format
+
+  Option  : format
+  Descr   : format of the database entries returned
+  Type    : optional
+  Usage   : format=genbank
+  Arg     : enum
+
+Format defaults to the distribution format of the database (embl for
+EMBL database). If some other supported format is needed this option
+is needed (E.g. formats for EMBL: fasta, bsml, agave).
+
+This server only accepts "genbank" format.
+
+option: id
+
+  Option  : id
+  Descr   : unique database identifier(s)
+  Type    : required
+  Usage   : db=J00231 | id=J00231+BUM
+  Arg     : string 
+
+The ID option should be able to process all UIDS in a database. It
+should not be necessary to know if the UID is an ID, accession number
+or accession.version.
+
+The number of entry UIDs allowed is implementation specific. If the
+limit is exceeded, the the program reports an error. The UIDs should
+be separated by spaces (use '+' in a GET method string).
+
+
+ERROR MESSAGES
+
+The following standardized one line messages should be printed out in
+case of an error.
+
+ERROR 1 Unknown database [$db].
+ERROR 2 Unknown style [$style].
+ERROR 3 Format [$format] not known for database [$db].
+ERROR 4 ID [$id] not found in database [$db].
+ERROR 5 Too many IDs [$count]. Max [$MAXIDS] allowed.
+
+END
+;
+
+exit 0;
+}
+
+sub error {
+  my ($code,$message,$noexit) = @_;
+  print "ERROR $code $message\n";
+  exit 0 unless $noexit;
+}
+
+__END__
+
+=head1 NAME
+
+biofetch_genbank_proxy.pl - Caching BioFetch-compatible web proxy for GenBank
+
+=head1 SYNOPSIS
+
+  Install in cgi-bin directory of a Web server.  Stand back.
+
+=head1 DESCRIPTION
+
+This CGI script acts as the server side of the BioFetch protocol as
+described in http://obda.open-bio.org/Specs/.  It provides two
+database access services, one for data source "genbank" (nucleotide
+entries) and the other for data source "genpep" (protein entries).
+
+This script works by forwarding its requests to NCBI's eutils script,
+which lives at http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi.
+It then reformats the output according to the BioFetch format so the
+sequences can be processed and returned by the Bio::DB::BioFetch
+module.  Returned entries are temporarily cached on the Web server's
+file system, allowing frequently-accessed entries to be retrieved
+without another round trip to NCBI.
+
+=head2 INSTALLATION
+
+You must have the following installed in order to run this script:
+
+   1) perl
+   2) the perl modules LWP and Cache::FileCache
+   3) a web server (Apache recommended)
+
+To install this script, copy it into the web server's cgi-bin
+directory.  You might want to shorten its name; "dbfetch" is
+recommended.
+
+There are several constants located at the top of the script that you
+may want to adjust.  These are:
+
+CACHE_LOCATION
+
+This is the location on the filesystem where the cached files will be
+located.  The default is /usr/tmp/dbfetch_cache.
+
+MAX_SIZE
+
+This is the maximum size that the cache can grow to.  When the cache
+exceeds this size older entries will be deleted automatically.  The
+default setting is 100,000,000 bytes (100 MB).
+
+EXPIRATION
+
+Entries that haven't been accessed in this length of time will be
+removed from the cache.  The default is 1 week.
+
+PURGE
+
+This constant specifies how often the cache will be purged for older
+entries.  The default is 1 hour.
+
+=head1 TESTING
+
+To see if this script is performing as expected, you may test it with
+this script:
+
+ use Bio::DB::BioFetch;
+ my $db = Bio::DB::BioFetch->new(-baseaddress=>'http://localhost/cgi-bin/dbfetch',
+	 			 -format     =>'genbank',
+				 -db         =>'genbank');
+ my $seq = $db->get_Seq_by_id('DDU63596');
+ print $seq->seq,"\n";
+
+This should print out a DNA sequence.
+
+=head1 SEE ALSO
+
+L<Bio::DB::BioFetch>, L<Bio::DB::Registry>
+
+=head1 AUTHOR
+
+Lincoln Stein, E<lt>lstein-at-cshl.orgE<gt>
+
+Copyright (c) 2003 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/DB/bioflat_index.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/DB/bioflat_index.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/DB/bioflat_index.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,117 @@
+#!/usr/bin/perl -w
+#$Id: bioflat_index.PLS,v 1.5.6.1 2006/11/17 09:32:42 sendu Exp $
+
+=head1 NAME
+
+bioflat_index.pl - index sequence files using Bio::DB::Flat
+
+=head1 DESCRIPTION
+
+ Create or update a biological sequence database indexed with the
+ Bio::DB::Flat indexing scheme.  The arguments are a list of flat files
+ containing the sequence information to be indexed.
+
+=head1 USAGE
+
+ bioflat_index.pl <options> file1 file2 file3...
+
+ Options:
+
+     --create              Create or reinitialize the index.  If not specified,
+                           the index must already exist.
+
+     --format   <format>   The format of the sequence files.  Must be one
+                           of "genbank", "swissprot", "embl" or "fasta".
+
+     --location <path>     Path to the directory in which the index files
+                           are stored.
+
+     --dbname <name>       The symbolic name of the database to be created.
+
+     --indextype <type>    Type of index to create.  Either "bdb" or "flat".
+                           "binarysearch" is the same as "flat".
+
+Options can be abbreviated.  For example, use -i for --indextype.
+
+The following environment variables will be used as defaults if the 
+corresponding options are not provided:
+
+     OBDA_FORMAT      format of sequence file
+     OBDA_LOCATION    path to directory in which index files are stored
+     OBDA_DBNAME      name of database
+     OBDA_INDEX       type of index to create
+
+=cut
+
+use strict;
+use Bio::Root::Root;
+use Bio::Root::IO;
+use Bio::DB::Flat;
+use Getopt::Long;
+use File::Path qw(mkpath rmtree);
+
+my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE);
+
+GetOptions( 'create'      => \$CREATE,
+				'format:s'    => \$FORMAT,
+				'location:s'  => \$LOCATION,
+				'dbname:s'    => \$DBNAME,
+				'indextype:s' => \$INDEXTYPE );
+
+$FORMAT    = $ENV{OBDA_FORMAT}    unless defined $FORMAT;
+$LOCATION  = $ENV{OBDA_LOCATION}  unless defined $LOCATION;
+$DBNAME    = $ENV{OBDA_DBNAME}    unless defined $DBNAME;
+$INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE;
+
+my $root = 'Bio::Root::Root';
+my $io   = 'Bio::Root::IO';
+
+# confirm that database directory is there
+defined $LOCATION or 
+  $root->throw("please provide a base directory with the --location option");
+
+-d $LOCATION or 
+  $root->throw("$LOCATION is not a valid directory; use --create to create a new index");
+
+defined $DBNAME or 
+  $root->throw("please provide a database name with the --dbname option");
+
+defined $FORMAT or 
+  $root->throw("please specify the format for the input files with the --format option");
+
+unless (defined $INDEXTYPE) {
+	$INDEXTYPE = 'flat';
+	$root->warn('setting index type to "flat", use the --indextype option to override');
+}
+
+# Confirm that database is there and that --create flag is sensible.
+my $path = $io->catfile($LOCATION,$DBNAME,'config.dat');
+if (-e $path) {
+  if ($CREATE) {
+    $root->warn("existing index detected; deleting.");
+    rmtree($io->catfile($LOCATION,$DBNAME),1,1);
+  } else {
+    $root->warn("existing index detected; ignoring --indextype and --format options.");
+    undef $INDEXTYPE;
+  }
+}
+elsif (!$CREATE) {
+  $root->throw("Cannot find database config file at location $path; use --create to create a new index");
+}
+
+# open for writing/updating
+my $db = Bio::DB::Flat->new(-directory  => $LOCATION,
+									 -dbname     => $DBNAME,
+									 $INDEXTYPE ? (
+														-index      => $INDEXTYPE
+													  )
+									 : (),
+									 -write_flag => 1,
+									 -format     => $FORMAT) or 
+  $root->throw("can't create Bio::DB::Flat object");
+
+my $entries = $db->build_index(@ARGV);
+
+print STDERR "(Re)indexed $entries entries.\n ";
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/DB/bioflat_index.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/DB/biogetseq.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/DB/biogetseq.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/DB/biogetseq.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,58 @@
+#!perl -w
+# $Id: biogetseq.PLS,v 1.4 2005/11/17 09:54:54 heikki Exp $
+#
+# OBDA Registry compliant sequence retrieval script
+#
+# Copyright Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+# You may distribute this program under the same terms as perl itself
+
+
+use Bio::DB::Registry;
+use Bio::SeqIO;
+use Getopt::Long;
+use strict;
+
+my ($help, $format, $namespace, $dbname) = ('', 'embl', 'acc', 'embl');
+GetOptions ("help" => \$help, "format=s" => \$format,
+            "namespace=s" => \$namespace, "dbname=s" => \$dbname );
+if ($help || !@ARGV) {
+  system("perldoc $0");
+  exit 0;
+}
+
+my $get_function = 'get_Seq_by_'. $namespace;
+
+my $registry = new Bio::DB::Registry();
+while (my $id = shift) {
+    my $db = $registry->get_database($dbname);
+    my $seq = $db->$get_function($id);
+
+    if ($seq) {
+        my $out = new Bio::SeqIO('-format' => $format);
+        $out->write_seq($seq);
+    } else {
+        print STDERR "Could not find sequence with identifier [$id]\n";
+    }
+}
+
+=head1 NAME
+
+biogetseq - sequence retrieval using OBDA registry
+
+=head1 DESCRIPTION
+
+This script retrives sequences from the source defined by users
+registry setup.  The current alternatives are from a local indexed
+file, sql database or over the web.
+
+=head1 USAGE
+
+  Usage: biogetseq --dbname embl --format embl
+                   --namespace acc id [ ids... ]
+         * dbname defaults to embl
+         * format defaults to embl
+         * namespace defaults to 'acc' ['id', 'acc', 'version']
+         * unnamed arguments are ids in the given namespace
+
+=cut
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/DB/biogetseq.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/DB/flanks.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/DB/flanks.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/DB/flanks.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,314 @@
+#!perl
+# -*-Perl-*-
+# $Id: flanks.PLS,v 1.7 2006/07/04 22:23:29 mauricio Exp $
+#
+# Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
+# Finding flanking sequences for a variant.
+#
+#
+#   v. 1     16 Mar 2001
+#   v. 1.1    9 Aug 2001 interface change, more info in fasta header
+#   v. 2.0   23 Nov 2001 new code from the flanks CGI program
+#                         support for EMBL-like positions
+#   v. 3.0   21 Feb 2003 new command line interface
+
+
+use Bio::PrimarySeq;
+use Bio::SeqIO;
+use Bio::DB::EMBL;
+use Bio::DB::GenBank;
+use Getopt::Long;
+use strict;
+
+use constant VERSION => '3.0';
+
+my $help = '';
+my $flank = 100;              # flank length on both sides of the region
+my $in_format = 'EMBL';       # format of the file to read in
+my @pos;                      # position(s) in the sequence
+
+
+GetOptions ("help" => \$help, "flanklength=i" => \$flank,
+            "position=s" => \@pos );
+
+ at pos = split(/,/,join(',', at pos));
+
+system("perldoc $0") if $help;
+system("perldoc $0") unless @ARGV;
+print STDERR "\nYou need to provide --position option\n" and system("perldoc $0") 
+    unless @pos;
+
+my $file = shift;
+$file || system("perldoc $0");
+
+my $seq = get_seq($file);
+exit 0 unless $seq;
+
+&extract($seq, \@pos, $flank);
+
+#
+## end main
+#
+
+sub get_seq {
+    my ($file) = @_;
+    my $IN_FORMAT = 'EMBL';	# format of the local file on disk
+
+    if (-e $file ) {		# local file
+	my $in  = Bio::SeqIO->new('-file' => $file,
+				  '-format' => $IN_FORMAT);
+	$seq = $in->next_seq();
+    }
+    elsif ($file =~ /\./) {	# sequence version from GenBank
+	eval {
+	    my $gb = new Bio::DB::GenBank;
+	    $seq = $gb->get_Seq_by_version($file);
+	};
+    } else {			# plain accession mumber from more reliable EMBL
+	eval {
+	    my $gb = new Bio::DB::EMBL;
+	    $seq = $gb->get_Seq_by_acc($file);
+	};
+	
+    }
+    print STDERR "Could not find sequence [$file]" && return unless $seq;
+    return $seq;
+}
+
+sub extract {
+    my ($seq, $pos, $flank) = @_;
+    my ($out_seq);
+    my $OUT_FORMAT = 'FASTA';	# output format, going into STDOUT
+    my $strand = 1;		# default for the forward strand
+
+    my $out = Bio::SeqIO->new('-format' => $OUT_FORMAT);
+
+    my $count = 1;
+    foreach my $idpos (@$pos) {
+
+	my ($id, $pos_range, $start, $end, $allele_len);
+	my $inbetween = 0;	# handle 23^24 notation as well as plain integer (24)
+                          	# but set flag and make corrections when needed
+
+	if ($idpos =~ /:/ ) {	# id and position separator
+	    ($id, $pos_range) = split /:/, $idpos;
+	} else {		# no id
+	    $id = $count;
+	    $count++;
+	    $pos_range = $idpos;
+	}
+	$strand = -1 if $pos_range =~ /-$/; # opposite strand
+	$pos_range = $1 if $pos_range =~ /(.+)-/; # remove trailing '-'
+
+	if ($pos_range =~ /\^/) { # notation 23^24 used
+	    ($start, $end) = split /\^/, $pos_range;
+	    print STDERR $id, ": Give adjacent nucleotides flanking '^' character, not [",
+	        $start, "] and [", $end, "]\n" and next
+		unless $end == $start + 1;
+	    $end = $start;
+	    $inbetween = 1;
+	} else {		#  notation 23..24 used
+	    ($start, $end) = split /\.\./, $pos_range;
+	}
+	$end ||= $start;	# notation 23 used
+	print STDERR $id, ": Start can not be larger than end. Not [",
+	        $start, "] and [", $end, "]\n" and next
+		if $start > $end;
+	$allele_len = $end - $start;
+
+	# sanity checks
+	next unless defined $start && $start =~ /\d+/ && $start != 0;
+	print STDERR "Position '$start' not in sequence '$file'\n",  and next
+	    if $start < 1 or $start > $seq->length;
+	print STDERR "End position '$end' not in sequence '$file'\n",  and next
+	    if $end < 1 or $end > $seq->length;
+	
+	# determine nucleotide positions
+	# left edge
+	my $five_start = $start - $flank;
+	$five_start = 1 if $five_start < 1; # not outside the sequence
+	# right edge
+	my $three_end = $start + $allele_len + $flank;
+	$three_end = $seq->length if $start + $allele_len + $flank > $seq->length;
+	$three_end-- if $inbetween;
+
+	# extract the sequences
+	my $five_prime = lc $seq->subseq($five_start , $start - 1); # left flank
+	my $snp = uc $seq->subseq($start, $end); # allele (always > 0 length)
+	$snp = lc $snp if $inbetween;
+
+	my $three_prime;	# right flank
+	if ($end < $seq->length) { # make sure we are not beyond reference sequece
+	    $three_prime = lc $seq->subseq($end + 1, $three_end);
+	} else {
+	    $three_prime = '';
+	}
+
+	# allele positions in local, extracted coordinates
+	my $locpos = length($five_prime) + 1;
+	my $loc_end;
+	if ($allele_len) {
+	    $loc_end = "..". ($locpos+$allele_len);
+	} else {
+	    $loc_end = '';
+	    $loc_end = '^'. ($locpos+1) if $inbetween;
+	}
+	# build FASTA id and description line
+	my $fastaid = uc($id). "_". uc($file).
+	    " oripos=$pos_range strand=$strand allelepos=$locpos$loc_end";
+
+	#build BioPerl sequence objects
+	if ($strand == -1) {
+	    my $five_prime_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$five_prime);
+	    my $snp_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$snp);
+	    my $three_prime_seq = new Bio::PrimarySeq(-alphabet=>'dna',-seq=>$three_prime);
+
+	    my $str = $three_prime_seq->revcom->seq. " ".
+		$snp_seq->revcom->seq. " ". $five_prime_seq->revcom->seq;
+	    $str =~ s/ //g;
+	    $out_seq = new Bio::PrimarySeq (-id => $fastaid,
+					    -alphabet=>'dna',
+					    -seq => $str );
+	} else {
+	    my $str = $five_prime. " ". $snp. " ". $three_prime;
+	    $str =~ s/ //g;
+	    $out_seq = new Bio::PrimarySeq (-id => $fastaid,
+					    -alphabet=>'dna',
+					    -seq => $str );
+	}
+	$out->write_seq($out_seq); # print sequence out
+    }
+}
+
+
+
+=head1 NAME
+
+flanks - finding flanking sequences for a variant in a sequence position
+
+=head1 SYNOPSIS
+
+  flanks --position POS [-p POS ...]  [--flanklen INT]
+         accession | filename
+
+=head1 DESCRIPTION
+
+This script allows you to extract a subsequence around a region of
+interest from an existing sequence. The output if fasta formatted
+sequence entry where the header line contains additional information
+about the location.
+
+=head1 OPTIONS
+
+The script takes one unnamed argument which be either a file name in
+the local file system or a nucleotide sequence accession number.
+
+
+  -p         Position uses simple nucleotide sequence feature table
+  --position notation to define the region of interest, typically a
+             SNP or microsatellite repeat around which the flanks are
+             defined.
+
+             There can be more than one position option or you can
+             give a comma separated list to one position option.
+
+             The format of a position is:
+
+                 [id:] int | range | in-between [-]
+
+             The optional id is the name you want to call the new
+             sequence. If it not given in joins running number to the
+             entry name with an underscore.
+
+             The position is either a point (e.g. 234), a range (e.g
+             250..300) or insertion point between nucleotides
+             (e.g. 234^235)
+
+             If the position is not completely within the source
+             sequence the output sequence will be truncated and it
+             will print a warning.
+
+             The optional hyphen [-] at the end of the position
+             indicates that that you want the retrieved sequence to be
+             in the opposite strand.
+
+
+  -f         Defaults to 100. This is the length of the nucleotides
+  --flanklen sequence retrieved on both sides of the given position.
+
+             If the source file does not contain 
+
+=head1 OUTPUT FORMAT
+
+The output is a fasta formatted entry where the description file
+contains tag=value pairs for information about where in the original
+sequence the subsequence was taken.
+
+The ID of the fasta entry is the name given at the command line joined
+by hyphen to the filename or accesion of the source sequence. If no id
+is given a series of consequtive integers is used.
+
+The tag=value pairs are:
+
+=over 3
+
+=item oripos=int
+
+position in the source file
+
+=item strand=1|-1
+
+strand of the sequence compared to the source sequence
+
+=item allelepos=int
+
+position of the region of interest in the current entry.
+The tag is the same as used by dbSNP at NCBI
+
+=back
+
+The sequence highlights the allele variant position by showing it in
+upper case and rest of the sequence in lower case characters.
+
+=head1 EXAMPLE
+
+  % flanks ~/seq/ar.embl
+
+  >1_/HOME/HEIKKI/SEQ/AR.EMBL oripos=100 strand=1 allelepos=100
+  taataactcagttcttatttgcacctacttcagtggacactgaatttggaaggtggagga
+  ttttgtttttttcttttaagatctgggcatcttttgaatCtacccttcaagtattaagag
+  acagactgtgagcctagcagggcagatcttgtccaccgtgtgtcttcttctgcacgagac
+  tttgaggctgtcagagcgct
+
+
+=head1 TODO
+
+The input files are assumed to be in EMBL format and the sequences are
+retrieved only from the EMB database. Make this more generic and use
+the registry.
+
+
+head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  E<lt>heikki-at-bioperl-dot-orgE<gt>
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+These scripts have been contributed by the developers and users of
+Bioperl. The scripts in scripts/ are production quality scripts that 
+have POD documentation and accept command-line arguments, and all of 
+these scripts have the PLS suffix.
+
+You can install the scripts in the scripts/ directory if you'd like,
+simply follow the instructions on 'make install'. The installation
+directory is specified by the INSTALLSCRIPT variable in the Makefile,
+the default directory is /usr/bin. Installation will copy the scripts
+to the specified directory, change the 'PLS' suffix to 'pl' and
+prepend 'bp_' to the script name if it isn't so named already.
+
+Please contact bioperl-l at bioperl.org if you are interested in
+contributing your own script.


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/README
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/biblio/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/biblio/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/biblio/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+These are scripts to manipulate bibliographic repositories using the
+Bio::Biblio modules.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/biblio/biblio.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/biblio/biblio.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/biblio/biblio.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,493 @@
+#!perl -w
+#
+#   A client showing how to use Bio::Biblio module, a module for
+#   accessing and querying a bibliographic repository.
+#   It also shows how to use modules Bio::Biblio::IO::medlinexml
+#   Bio::Biblio::IO::medline2ref which converts XML MEDLINE
+#   citations into a simple hash table and into full Perl objects.
+#
+#   It has many options in order to cover as many methods as
+#   possible.  Because of that, it can be also used as a fully
+#   functional command-line client for querying repository and
+#   retrieving citations from it.
+#
+#   Usage: biblio.PLS -h
+#
+#   martin.senger at gmail.com
+#   February 2002
+#
+#   $Id: biblio.PLS,v 1.5 2006/07/04 22:23:29 mauricio Exp $
+#-----------------------------------------------------------------------------
+
+use strict;
+
+sub get_usage {
+    exec('perldoc',$0);
+}
+
+BEGIN {
+    # add path to the directory with this script
+    my $mylib;
+    ($mylib = $0) =~ s|/[^/]+$||;
+    unshift @INC, $mylib;
+
+    # be prepare for command-line options/arguments
+    use Getopt::Std;
+
+    # general options
+    use vars qw/ $opt_h $opt_v $opt_q /;
+    # specialized options
+    use vars qw/ $opt_a $opt_b $opt_c $opt_d $opt_D $opt_e $opt_k $opt_n $opt_p $opt_r $opt_s /;
+    # options with a value
+    use vars qw/ $opt_f $opt_F $opt_g $opt_i $opt_l $opt_m $opt_O $opt_V /;
+    my $switches = 'fFgilmOV';   # these are switches taking an argument (a value)
+    getopt ($switches);
+
+    # help wanted?
+    if ($opt_h) {
+	print get_usage;
+	exit 0;
+    }
+}
+
+use Bio::Biblio;       # to read data via SOAP
+use Bio::Biblio::IO;   # to convert resulting XML to Biblio objects
+use Data::Dumper;      # to print resulting data in a raw form
+
+# --- print version and exit
+if ($opt_v) {
+    print "$Bio::Biblio::VERSION\n";
+    print "$Bio::Biblio::Revision\n";
+    exit 0;
+}
+
+# --- deal with a local file
+&convert_and_print ($opt_f) if $opt_f;
+
+# --- create a Biblio object;
+#     the new() method understands the following parameters
+#     (but none of them is mandatory - unless the default service location
+#      is not where you want to go today):
+#
+#      -location           (taken from '-l' option if given)
+#      -collection_id      (taken from '-i' option if given)
+#      -destroy_on_exit    (set to false if '-k' or '-p' or '-i' are given)
+#
+#      And just for information (these can be used from your script but
+#      they are not set-able by this script):
+#
+#      -access => 'soap'   (not set-able here, a default value will be used)
+#      -namespace => '...' (not set-able here, a default value will be used)
+#      -soap               (not set-able here)
+#
+#      Additionally, it uses env. variable HTTPPROXY to create parameter
+#      '-httpproxy'.
+#
+my @location   = ('-location', $opt_l) if defined $opt_l;
+my @collection = ('-collection_id', $opt_i) if defined $opt_i;
+my @destroy    = ('-destroy_on_exit', 0) if $opt_k or $opt_p or $opt_i;
+my @httpproxy  = ('-httpproxy', $ENV{'HTTPPROXY'}) if defined $ENV{'HTTPPROXY'};
+my $biblio = new Bio::Biblio (@location, @collection, @destroy, @httpproxy);
+
+die "Stopped. No success in accessing the bibliographic repository.\n" unless $biblio;
+
+#
+# all remaining command-line arguments (if any remains after getopts) are:
+#     -find <keywords> [-attrs <attributes>]
+# and these (up-to-)pairs can be repeated...
+#
+# ...and it creates a query collection (perhaps more than one) and
+# assigns it (or the last one in case on 'chained' finds) to $biblio
+#
+my ($keywords, $attrs, $next);
+while ($next = shift) {
+    if ($next eq '-find') {
+	$biblio = &_find ($biblio, $keywords, $attrs) if $keywords;
+	$keywords = shift;
+	undef $attrs;
+    } elsif ($next eq '-attrs') {
+	$attrs = shift;
+    }
+}
+$biblio = &_find ($biblio, $keywords, $attrs) if $keywords;
+
+#
+# now we have either the top-level collection (if there were no -find
+# arguments), or a resulting collection from the -find queries above
+# ...let's do with it what was asked by options
+#
+
+# ...print the number of citations
+print $biblio->get_count . "\n" if $opt_c;
+
+# ...get one particular citation (this method does not use any -finds above)
+&convert_and_print ($biblio->get_by_id ($opt_g)) if $opt_g;
+
+# ...print all citation IDs
+print join ("\n", @{ $biblio->get_all_ids }) . "\n" if $opt_d;
+
+# ...print all citations - returned as one big string from the server
+&convert_and_print ($biblio->get_all) if $opt_s;
+
+# ... reset iteration in the collection again to the first citation
+if ($opt_r) {
+    $biblio->reset_retrieval;
+    print "Reset OK.\n" unless $opt_q;
+}
+
+# ...print more citations (perhaps all) - returned as an array of citations
+$opt_m = 100000000 if $opt_a;
+if (defined $opt_m) {
+    foreach my $cit (@{ $biblio->get_more ($opt_m) }) {
+	&convert_and_print ($cit);
+    }
+}
+
+# ...print next citation from the current collection
+&convert_and_print ($biblio->get_next) if $opt_n;
+
+# ...check existence of a collection and completeness of its iterator
+if ($opt_e) {
+    my $exists = $biblio->exists;
+    my $has_next = $biblio->has_next if $exists;
+    $exists = '0' unless $exists;
+    $has_next = '0' unless $has_next;
+
+    if ($opt_q) {
+	print "$exists\n$has_next\n";
+    } else {
+	print "Exists: $exists\tHas next: $has_next\n";
+    }
+}
+
+# ...destroy collection
+if ($opt_D) {
+    $biblio->destroy;
+    print "Destroyed OK.\n" unless $opt_q;
+}
+
+# ...print the collection ID
+if ($opt_p) {
+    my $id = $biblio->get_collection_id;
+    print "$id\n" if $id;
+}
+
+# ...controlled vocabularies
+if ($opt_V) {
+
+    # ...print all vocabulary names (-Vn)
+    if ($opt_V =~ /^n/) {
+	print join ("\n", @{ $biblio->get_vocabulary_names }) . "\n";
+
+    } else {
+	my ($arg, $name, $value) = split (/\:\:/, $opt_V, 3);
+
+	# ...print all values from a given vocabulary (-Vv::<name>)
+	if ($opt_V =~ /^v/) {
+	    print join ("\n", @{ $biblio->get_all_values ($name) }) . "\n";
+
+	# ...print all entries from a given vocabulary (-Va::<name>)
+	} elsif ($opt_V =~ /^a/) {
+	    print Data::Dumper->Dump ( [$biblio->get_all_entries ($name)], ['All entries']);
+
+	# ...print description of a given vocabulary entry (-Vd::<name>::<value>)
+	} elsif ($opt_V =~ /^d/) {
+	    print $biblio->get_entry_description ($name, $value) . "\n";
+
+	# ...check existence of a vocabulary value (-Ve::<name>::<value>)
+	} elsif ($opt_V =~ /^e/) {
+            my $contains = $biblio->contains ($name, $value);
+	    $contains = '0' unless $contains;
+	    print "Value '$value' in vocabulary '$name': $contains\n" unless $opt_q;
+	    print "$contains\n" if $opt_q;
+	}
+    }
+}
+
+sub _find {
+    my ($biblio, $keywords, $attrs) = @_;
+    $| = 1;
+    print "Looking for '$keywords'" . ($attrs ? " in attributes '$attrs'..." : "...")
+	unless $opt_q;
+    my ($new_biblio) = $biblio->find ($keywords, $attrs);
+    print "\tFound " . $new_biblio->get_count . "\n"
+	unless $opt_q;
+    print "\tReturned collection is '" . $new_biblio->get_collection_id . "'.\n"
+	if $opt_k and not $opt_q;
+    return $new_biblio;
+}
+
+sub convert_and_print {
+    my ($citation) = @_;
+
+    # if no -O option given or if it is -Ox we are happy returning XML string
+    unless (defined $opt_O and $opt_O !~ /^x/) {
+	return if $opt_f;   # we do not do a simple file reading
+	&print_one ($citation);
+	return;
+    }
+
+    my @args;
+
+    # -Or means to return a raw hash, everything else means to return
+    # Biblio objects - but there may be more types of them depending
+    # also on -F (which format the citation is in)
+    if ($opt_O =~ /^r/) {
+	push (@args, ('-result' => 'raw'));
+    } elsif ($opt_F and $opt_F =~ /^p/) {
+	push (@args, ('-result' => 'pubmed2ref'));
+    }
+    # default: -result => 'medline2ref'
+
+    # an argument to specify that we want parse XML (which we always want
+    # but there can be various XML formats)
+    if ($opt_F and $opt_F =~ /^p/) {
+	push (@args, ('-format' => 'pubmedxml'));
+    } else {
+	push (@args, ('-format' => 'medlinexml'));
+    }
+
+    # where to take the citation from
+    if ($opt_f) {
+	push (@args, ('-file' => $citation));
+    } else {
+	push (@args, ('-data' => $citation));
+    }
+
+    # make an instance of a converter
+    my $io = new Bio::Biblio::IO (@args);
+
+    # and finally make the conversion
+    while (my $bibref = $io->next_bibref) {
+	&print_one ($bibref);
+    }
+#    return $io->next_bibref;
+}
+
+sub print_one {
+    my ($citation) = @_;
+    return unless defined $citation;
+    if (ref (\$citation) eq 'SCALAR') {
+	print $citation;
+    } elsif (ref ($citation) =~ /^HASH|ARRAY|SCALAR$/o) {
+	print Data::Dumper->Dump ( [$citation], ['Citation']);
+    } else {
+	print $citation->print_me;
+    }
+}
+
+
+__END__
+
+
+=head1 NAME
+
+biblio.PLS - bioperl client for accessing and querying a bibliographic
+repository.
+
+=head1 SYNOPSIS
+
+Usage:
+
+  biblio.PLS [vh]
+  biblio.PLS [bcFgOpq]         [-l <URL>]
+  biblio.PLS [abcdDeFknmOpqrs] [-l <URL>] -i <collection-ID>
+  biblio.PLS [abcdDeFknmOpqrs] [-l <URL>] - -find <keywords> \
+         [-attrs <attrs>]...
+  biblio.PLS [Vq]              [-l <URL>]
+  biblio.PLS [FOq]             [-f <filename>]
+
+=head1 DESCRIPTION
+
+A client showing how to use Bio::Biblio module, a module for accessing
+and querying a bibliographic repository.  It also shows how to use
+modules Bio::Biblio::IO::medlinexml Bio::Biblio::IO::medline2ref which
+converts XML MEDLINE citations into a simple hash table and into full
+Perl objects.
+
+It has many options in order to cover as many methods as possible.
+Because of that, it can be also used as a fully functional
+command-line client for querying repository and retrieving citations
+from it.
+
+=head1 OPTIONS
+
+=head2 What service to contact:
+
+  -l <URL> ... a location where a Bibliographic Query service is
+               provided as a WebService
+               (default: http://www.ebi.ac.uk/openbqs/services/MedlineSRS)
+
+=head2 What query collection to use:
+
+Some options do not need to specify a collection, some do.
+
+  -i <collection_id>  ... the collection ID can be obtained in a
+                          previous invocation by specifying argument
+                          '-p' (print ID)
+  -find <keywords> [-attrs <attrs>]
+                      ... create a collection from citations
+                          containing given keywords - either in all
+                          default attributes, or only in the given
+                          attributes;
+
+                          it is possible to repeat it, for example:
+                             -find brazma -attrs authors -find -study
+                          (the repetitions refine previous results)
+                          both <keywords> and <attrs> may be
+                          comma-delimited multi-values;
+                          note that '-find' must be separated from
+                          the rest of options by '-';
+
+                          note that this script is a bit stupid
+                          regarding quoted keywords, or keywords
+                          containing commans... TBD better
+
+  what XML format is used for citations:
+  -Fm     ... MEDLINE (default)
+  -Fp     ... PubMed
+
+=head2 What to do (with the query collection):
+
+  -g <id>    ... get citation <id>
+  -c         ... get count (a number of citations)
+  -p         ... print collection ID (which may be used in the next
+                 invocation as an '-i' argument); it implies also '-k'
+  -b         ... print citations in a non-XML format (TBD)
+
+Other options can be used only on a sub-collection - which can be
+obtained directly by specifying '-i' argument, or indirectly by
+specifying one or more queries by '-find' arguments:
+
+  -d         ... get all citation IDs
+  -n         ... get next citation
+  -m [<how_many>] ... get 'how_many' more
+  -r         ... reset iteration to the first citation in the collection
+                 (now you can use '-n' or '-m' again)
+  -a         ... get all citations - as an array
+  -s         ... as '-a' but get it as one string
+  -e         ... check if given collection exists and has more citations
+  -k         ... keep resulting collection persistent (makes sense only
+                 when collection IDs are being printed otherwise you
+                 would not know how to contact the persistent collection
+                 next time)
+  -D         ... destroy given collection (makes sense together with '-i')
+
+Options specifying output format of the results:
+
+  -Ox        ... output in XML format (default)
+  -Oo        ... output as Biblio objects
+  -Or        ... output as a raw hashtable
+
+  The options above can be used also for converting an XML MEDLINE
+  local file without using any SOAP connection at all;
+
+  -f <filename> ... an XML file to be read and converted
+
+Options dealing with controlled vocabularies:
+
+  -Vn                  ... get all vocabulary names
+  -Vv::<name>          ... get all values from vocabulary <name>
+  -Va::<name>          ... get everything from vocabulary <name>
+  -Vd::<name>::<value> ... get description of <value>
+                           from vocabulary <name>
+  -Ve::<name>::<value> ... return 1 if <value> exists
+                           in vocabulary <name>
+
+And the remaining options:
+
+  -h  ... get help
+  -v  ... get version
+  -q  ... be quiet (less verbose)
+
+=head1 EXAMPLES
+
+  biblio.PLS - -find Java -attrs abstract -find perl
+
+Several separate invocations sharing the same query collection:
+
+  biblio.PLS -p -q - -find Brazma,Robinson > b.tmp
+  biblio.PLS -i `cat b.tmp` -d
+  MEDLINE2005/10693778
+  MEDLINE2005/10977099
+  MEDLINE2005/11726920
+  MEDLINE2005/12225585
+  MEDLINE2005/12227734
+  biblio.PLS -i `cat b.tmp` -g 10693778
+  <MedlineCitation Status="Completed">
+   ...
+  </MedlineCitation>
+
+  biblio.PLS -i `cat b.tmp` -e
+  Exists: 1       Has next: 1
+
+  biblio.PLS -i `cat b.tmp` -D
+  Destroyed OK.
+
+  biblio.PLS -i `cat b.tmp` -e
+  Exists: 0       Has next: 0
+
+Access to controlled vocabularies:
+
+  biblio.PLS -Vn
+  MEDLINE2005/JournalArticle/properties
+  MEDLINENEW/resource_types
+  MEDLINE2005/resource_types
+  MEDLINE2005/Person/properties
+  MEDLINE2005/*/publication_type
+  MEDLINENEW/JournalArticle/properties
+  repository_subsets
+  MEDLINE2005/*/citation_subset
+
+  biblio.PLS -Vv::MEDLINE2005/JournalArticle/properties
+  AllText
+  ID
+  PMID
+  ISSN
+  ...
+
+Converting local XML MEDLINE file:
+
+  biblio.PLS -g 10693778 > a_file.xml
+  biblio.PLS -f a_file.xml -Oo   ... to Perl objects
+  biblio.PLS -f a_file.xml -Or   ... as a raw hash
+
+=head1 ENVIRONMENT VARIABLES
+
+  HTTPPROXY = <HTTP proxy server>
+
+Use this if you use this script on a machine which needs to access
+remote HTTP targets via a proxy server.  For example:
+
+  export HTTPPROXY=http://128.243.220.41:3128
+  biblio.PLS -c
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug trackingi system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Martin Senger
+
+Email martin.senger at gmail.com
+
+=head1 HISTORY
+
+Written February 2002
+Updated July 2005
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/biblio/biblio.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_embl2picture.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_embl2picture.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_embl2picture.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,175 @@
+#!/usr/bin/perl
+
+# file: embl2picture.pl
+# This is code example 6 in the Graphics-HOWTO
+# Author: Lincoln Stein
+
+use strict;
+use lib "$ENV{HOME}/projects/bioperl-live";
+use Bio::Graphics;
+use Bio::SeqIO;
+
+=head1 NAME
+
+bp_embl2picture.pl - Render a Genbank/EMBL sequence file graphically as a png image
+
+=head1 SYNOPSIS
+
+  % bp_embl2picture.pl factor7.embl | display -
+
+=head1 DESCRIPTION
+
+Render a GenBank/EMBL entry into drawable form.  Return as a GIF or
+PNG image on standard output.
+
+The file must be in embl, genbank, or another SeqIO- recognized format.
+Only the first entry will be rendered.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics>, the BioGraphics HOWTO.
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.edu
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+use constant USAGE =><<END;
+Usage: $0 <file>
+   Render a GenBank/EMBL entry into drawable form.
+   Return as a GIF or PNG image on standard output.
+
+   File must be in embl, genbank, or another SeqIO-
+   recognized format.  Only the first entry will be 
+   rendered.
+
+Example to try:
+   embl2picture.pl factor7.embl | display -
+
+END
+
+my $file = shift                       or die USAGE;
+my $io = Bio::SeqIO->new(-file=>$file) or die USAGE;
+my $seq = $io->next_seq                or die USAGE;
+my $wholeseq = Bio::SeqFeature::Generic->new(-start=>1,-end=>$seq->length,
+					     -display_name=>$seq->display_name);
+
+my @features = $seq->all_SeqFeatures;
+
+# sort features by their primary tags
+my %sorted_features;
+for my $f (@features) {
+  my $tag = $f->primary_tag;
+  push @{$sorted_features{$tag}},$f;
+}
+
+my $panel = Bio::Graphics::Panel->new(
+				      -length    => $seq->length,
+				      -key_style => 'between',
+				      -width     => 800,
+				      -pad_left  => 10,
+				      -pad_right => 10,
+				      );
+$panel->add_track($wholeseq,
+		  -glyph => 'arrow',
+		  -bump => 0,
+		  -double=>1,
+		  -tick => 2);
+
+$panel->add_track($wholeseq,
+		  -glyph  => 'generic',
+		  -bgcolor => 'blue',
+		  -label  => 1,
+		 );
+
+# special cases
+if ($sorted_features{CDS}) {
+  $panel->add_track($sorted_features{CDS},
+		    -glyph      => 'transcript2',
+		    -bgcolor    => 'orange',
+		    -fgcolor    => 'black',
+		    -font2color => 'red',
+		    -key        => 'CDS',
+		    -bump       =>  +1,
+		    -height     =>  12,
+		    -label      => \&gene_label,
+		    -description=> \&gene_description,
+		   );
+  delete $sorted_features{'CDS'};
+}
+
+if ($sorted_features{tRNA}) {
+  $panel->add_track($sorted_features{tRNA},
+		    -glyph     =>  'transcript2',
+		    -bgcolor   =>  'red',
+		    -fgcolor   =>  'black',
+		    -font2color => 'red',
+		    -key       => 'tRNAs',
+		    -bump      =>  +1,
+		    -height    =>  12,
+		    -label     => \&gene_label,
+		   );
+  delete $sorted_features{tRNA};
+}
+
+# general case
+my @colors = qw(cyan orange blue purple green chartreuse magenta yellow aqua);
+my $idx    = 0;
+for my $tag (sort keys %sorted_features) {
+  my $features = $sorted_features{$tag};
+  $panel->add_track($features,
+		    -glyph    =>  'generic',
+		    -bgcolor  =>  $colors[$idx++ % @colors],
+		    -fgcolor  => 'black',
+		    -font2color => 'red',
+		    -key      => "${tag}s",
+		    -bump     => +1,
+		    -height   => 8,
+		    -description => \&generic_description
+		   );
+}
+
+print $panel->png;
+exit 0;
+
+sub gene_label {
+  my $feature = shift;
+  my @notes;
+  foreach (qw(product gene)) {
+    next unless $feature->has_tag($_);
+    @notes = $feature->each_tag_value($_);
+    last;
+  }
+  $notes[0];
+}
+
+sub gene_description {
+  my $feature = shift;
+  my @notes;
+  foreach (qw(note)) {
+    next unless $feature->has_tag($_);
+    @notes = $feature->each_tag_value($_);
+    last;
+  }
+  return unless @notes;
+  substr($notes[0],30) = '...' if length $notes[0] > 30;
+  $notes[0];
+}
+
+sub generic_description {
+  my $feature = shift;
+  my $description;
+  foreach ($feature->all_tags) {
+    my @values = $feature->each_tag_value($_);
+    $description .= $_ eq 'note' ? "@values" : "$_=@values; ";
+  }
+  $description =~ s/; $//; # get rid of last
+  $description;
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs1-demo.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs1-demo.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs1-demo.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,173 @@
+#!/usr/bin/perl
+
+#Thu Aug 19 22:29:23 EDT 2004
+#Simon Ilyushchenko - demonstrating several new glyphs for gbrowse, part 1.
+
+use strict;
+
+use Bio::Graphics::Panel;
+
+=head1 NAME
+
+bp_glyphs1-demo.pl - First demo of Bio::Graphics glyphs
+
+=head1 SYNOPSIS
+
+  % bp_glyphs2-demo.pl | display -
+
+=head1 DESCRIPTION
+
+Generates a PNG image of some of the more esoteric Bio::Graphics glyphs.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics>, the BioGraphics HOWTO.
+
+=head1 AUTHOR
+
+Simon Ilyushchenko
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+my $ftr = 'Bio::Graphics::Feature';
+my $segment = $ftr->new(-start=>1,-end=>1000,-name=>'ZK154',-type=>'clone');
+my $subseg1 = $ftr->new(-start=>100,-end=>600,-name=>'saw teeth');
+
+my $panel = Bio::Graphics::Panel->new(
+                    -grid => 1,
+                    -segment => $segment,
+                    -key_style => 'bottom');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-interval => 20,
+			-width => 20,
+			-glyph => 'saw_teeth');
+
+$subseg1->name('frequent saw teeth');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-interval => 0,
+			-width => 10,
+			-fgcolor => 'red',
+			-glyph => 'saw_teeth');
+
+$subseg1->name('dashed line');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-dash_size => 10,
+			-space_size => 5,
+			-glyph => 'dashed_line');
+
+$subseg1->name('thick colored dashed line with shear');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-fgcolor => 'red',
+			-dash_size => 20,
+			-space_size => 5,
+			-space_color => 'blue',
+			-shear => 'yes',
+			-linewidth => 2,
+			-glyph => 'dashed_line');
+
+$subseg1->name('three letters');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-width => 20,
+			-interval => 10,
+			-pad_top => 30,
+			-glyph => 'three_letters');
+
+$subseg1->name('flag');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-width => 20,
+			-text => "ori",
+			-height => 30,
+			-glyph => 'flag');
+
+$subseg1->name('dumbbell - square ');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_sive => 20,
+			-end_shape => "square",
+			-height => 20,
+			-glyph => 'dumbbell');
+
+$subseg1->name('dumbbell - diamond');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 20,
+			-end_shape => "diamond",
+			-fgcolor => 'orange',
+			-height => 20,
+			-glyph => 'dumbbell');
+
+$subseg1->name('dumbbell - tree');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 20,
+			-end_shape => "tree",
+			-fgcolor => 'green',
+			-height => 20,
+			-glyph => 'dumbbell');
+
+$subseg1->name('dumbbell - clover');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 20,
+			-end_shape => "clover",
+			-fgcolor => 'pink',
+			-height => 20,
+			-glyph => 'dumbbell');
+
+
+$subseg1->name('dumbbell - star with text');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 40,
+			-end_shape => "star",
+			-fgcolor => 'red',
+			-height => 40,
+			-caption => 'Back in USSR',
+			-glyph => 'dumbbell');
+
+
+$subseg1->name('dumbbell - bubble text');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 20,
+			-end_shape => "bubble",
+			-bubble_text => 'CpG',
+			-fgcolor => 'red',
+			-height => 20,
+			-glyph => 'dumbbell');
+
+
+
+
+
+
+
+open OUT,">glyphs1.png" or die "Couldn't open glyphs1.png for writing: $!";
+print OUT $panel->gd->png;
+close OUT;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs2-demo.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs2-demo.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/biographics/bp_glyphs2-demo.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,140 @@
+#!/usr/bin/perl
+
+#Wed Sep  1 18:54:18 EDT 2004
+#Simon Ilyushchenko - demonstrating several new glyphs for gbrowse, part 2.
+
+use strict;
+
+use Bio::Graphics::Panel;
+
+=head1 NAME
+
+bp_glyphs2-demo.pl - Second demo of Bio::Graphics glyphs
+
+=head1 SYNOPSIS
+
+  % bp_glyphs2-demo.pl | display -
+
+=head1 DESCRIPTION
+
+Generates a PNG image of some of the more esoteric Bio::Graphics glyphs.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics>, the BioGraphics HOWTO.
+
+=head1 AUTHOR
+
+Simon Ilyushchenko
+
+Copyright (c) 2004 Cold Spring Harbor Laboratory
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.  See DISCLAIMER.txt for
+disclaimers of warranty.
+
+=cut
+
+my $ftr = 'Bio::Graphics::Feature';
+my $segment = $ftr->new(-start=>1,-end=>400,-name=>'ZK154',-type=>'clone');
+my $subseg1 = $ftr->new(-start=>100,-end=>300,-name=>'glyphs 2');
+
+my $panel = Bio::Graphics::Panel->new(
+                    -grid => 1,
+                    -segment => $segment,
+                    -key_style => 'bottom');
+
+$subseg1->name('dumbbell - arrows with arc ');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-width => 50,
+			-height => 30,
+			-arc => 1,
+			-shape_size => 20,
+			-fgcolor => 'crimson',
+			-end_shape => "arrow",
+			-glyph => 'dumbbell');
+
+$subseg1->name('dumbbell - wave ');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 20,
+			-end_shape => "wave",
+			-height => 20,
+			-fgcolor => 'green',
+			-glyph => 'dumbbell');
+
+$subseg1->name('two bolts');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-shape_size => 40,
+			-bolt_color => 'violet',
+			-height => 20,
+			-glyph => 'two_bolts');
+
+$subseg1->name('wave');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 10,
+			-circle => 1,
+			-glyph => 'wave');
+
+$subseg1->name('broken line');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-glyph => 'broken_line');
+
+$subseg1->name('tic_tac_toe');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-glyph => 'tic_tac_toe');
+
+$subseg1->name('text_in_box');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-text_bgcolor => 'yellow',
+			-glyph => 'text_in_box');
+
+
+$subseg1->name('christmas arrow');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-fgcolor => 'steelblue',
+			-glyph => 'christmas_arrow');
+
+
+$subseg1->name('pentagram');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-glyph => 'pentagram');
+
+
+$subseg1->name('weighted arrow');
+
+$panel->add_track(segments=>[$subseg1],
+            -label => 1,
+			-height => 20,
+			-fgcolor => 'sienna',
+			-glyph => 'weighted_arrow');
+
+
+
+open OUT,">glyphs2.png" or die "Couldn't open glyphs2.png for writing: $!";
+print OUT $panel->gd->png;
+close OUT;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/das/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/das/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/das/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3 @@
+For a lightweight Distributed Annotation System (DAS) server, see
+http://www.biodas.org/servers/
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/das/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/das/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/das/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+This directory is currently empty.  For the Lightweight Distributed
+Annotation System (LDAS) server, see http://www.biodas.org/servers/

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+These are scripts that run on top of Bio::Graphics.
+
+examples/graphics/render_sequence.pl:
+
+fetch a GenBank or EMBL entry off the net (fetching by accession
+number) and render into a graphical picture
+
+scripts/graphics/feature_draw.PLS:
+
+render a set of features into a drawing, using
+Bio::Graphics::FeatureFile's idiosyncratic (but relatively friendly)
+feature notation.
+
+scripts/graphics/frend.PLS:		
+
+the same thing, but a CGI front end
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+These are scripts to generate graphical images from sequence data.
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/contig_draw.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/contig_draw.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/contig_draw.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,165 @@
+#!/usr/bin/perl -w
+# $Id: contig_draw.PLS,v 1.1 2004/05/20 19:49:28 matsallac Exp $
+
+=head1 NAME
+
+search_overview -- Render a SearchIO parser report into a simple overview graphic
+
+=head1 SYNOPSIS
+
+search_overview -i filename [-f format] [-o outputfilename] [--labels]
+
+=head1 DESCRIPTION
+
+This script will take any Bio::SearchIO parseable report and turn it
+into a simple overview graphic of the report.  For our purposes we are
+assuming BLAST and the BLAST scores when assigning colors.  Output is
+a PNG format file.
+
+This is not intended to be an overly customized script, rather it
+should probably just be either a quick and dirty look at a report or a
+starting point for more complicated implementations.
+
+The color is determined by the hit score which is currently pegged to the NCBI 
+scheme which looks like this
+
+ RED     E<gt>= 200 
+ PURPLE  80-200
+ GREEN   50-80
+ BLUE    40-50
+ BLACK   E<lt>40
+
+Options:
+ -i/--input        The input filename, otherwise input is assumed from STDIN
+ -o/--output       The output filename, this is optional, if you do not
+                   provide the output filename the script will create a file
+                   using the name of the query sequence and will process
+                   all the sequences in the file.  If an output filename
+                   IS provided the script will only display an image for the 
+                   first one.
+ -f/--format       The SearchIO format parser to use, if not provided
+                   SearchIO will guess based on the file extension.
+ -l/--labels       Display the hit sequence name as a label in the overview.
+                   For lots of sequences this will make the image very long
+                   so by default it is turned off.
+
+=head1 AUTHOR Jason Stajich
+
+Jason Stajich, jason[-at-]open-bio[-dot-]org.
+
+=cut
+
+use strict;
+
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+use Bio::Graphics::FeatureFile;
+use Bio::Assembly::IO;
+use Getopt::Long;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+
+use constant WIDTH          => 600;  # default width
+
+my ($in,$format,$out);
+
+my $showlabels = 0;
+
+# This defines the color order
+# For NCBI it is typically defined like this
+# Score
+# RED     >= 200 
+# PURPLE  80-200
+# GREEN   50-80
+# BLUE    40-50
+# BLACK   <40
+my @COLORS = qw(red magenta green blue black);
+my @SCORES = (200,80,50,40,0);
+
+GetOptions(
+	   'i|in|input:s'   => \$in,
+	   'f|format:s'     => \$format,
+	   'o|output:s'     => \$out,
+	   'l|labels'       => \$showlabels
+	   );
+
+if (!$in) {
+     $in = "../../t/data/acefile.ace.1";
+     # $in = "../../t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1";
+}
+if (!$out) {
+     $out = "web/contig.png";
+}
+print("Parsing this file: ($in)\n");
+my $parser = new Bio::Assembly::IO(-file   => $in );
+my $ass = $parser->next_assembly();
+
+my @contigs = $ass->all_contigs();
+
+# for demo purposes, just work on the first contig
+my $contig = pop(@contigs);
+
+     my (@sequences, at features, at configs);
+
+          # get the consensus sequence
+     my $cs = $contig->get_consensus_sequence();
+     print STDERR "Adding a consensus with start(".$cs->start().") and end(".$cs->end().")\n";
+     $cs->display_name("Consensus sequence(".$cs->start().",".length($cs->seq()).")");
+     my $min = $cs->start();
+     my $max = $cs->end();
+     push @features, $cs;
+     $dumper->dumpValue($cs);
+          # now get the things in this contig
+     foreach my $feat ($contig->each_seq()) {
+          print STDERR "Adding a member with name(".$feat->display_id().") start(".$feat->start().") and end(".$feat->end().")\n";
+          print(ref($feat)."\n");
+          # $dumper->dumpValue($feat) ;
+          # my @fs = $feat->get_all_tags(); 
+          # print("These are the seqfeatures:\n");
+          # $dumper->dumpValue(\@fs);
+          # my @tag_values = $feat->get_tag_values('contig');
+          # my $locatable_seq = $feat->get_tag_values('contig');
+          # print("These are the tagged values:\n");
+          # $dumper->dumpValue(\@tag_values);
+               # help bioperlers! how do i not do this:
+	     push @features, $feat;
+          $min = &MIN($min,$feat->start());
+          $max = &MAX($max,$feat->end());
+          $feat->display_name($feat->display_name()."(".$feat->start().",".$feat->end().")");
+     }
+    my $panel = Bio::Graphics::Panel->new(
+                              -length => 2000,
+                              -width    =>   900,
+					  -bgcolor => 'white',
+					  -pad_left=> 10,
+					  -pad_right=> 10);
+    $panel->add_track('arrow' => Bio::Graphics::Feature->new
+		      (-start => 0,
+		       -end   => $max-$min + 100 ),
+		      -bump   => 0,
+		      -double => 1,
+		      -tick   => 2,
+		      );
+     # my $invisible_track = $panel->add_track(-glyph    =>   '');
+     # $invisible_track->add_feature(new Bio::SeqFeature::Generic(-start    =>   $min-500,     -end =>   $max+500));
+    my $track = $panel->add_track(-glyph   =>   'generic',-label    =>   1);
+    foreach my $f ( @features ) {
+          my $newfeat = new Bio::SeqFeature::Generic(-start =>   $f->start()-$min ,
+                                                       -end =>   $f->end()-$min,
+                                                       -display_name  =>   $f->display_name());
+	     $track->add_feature($newfeat);
+    }
+    if( $out ) { 
+	open(OUT,">$out") || die("cannot open $out: $!");
+	binmode(OUT);
+	print OUT $panel->png;
+	close(OUT);
+    } else { 
+	open(OUT, ">$out.png") || die("$out: $!");
+	binmode(OUT);
+	print OUT $panel->png;	
+	close(OUT);
+    }
+ 
+sub MAX {return $_[0] < $_[1] ? $_[1] : $_[0] }
+sub MIN {return $_[0] > $_[1] ? $_[1] : $_[0] }


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/contig_draw.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/feature_draw.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/feature_draw.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/feature_draw.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,362 @@
+#!/usr/bin/perl -w
+
+use strict;
+use lib './blib/lib','../blib/lib';
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+use Bio::Graphics::FeatureFile;
+
+use Getopt::Long;
+
+use constant WIDTH => 600;
+my ($WIDTH,$RANGE,$BOXES);
+
+GetOptions ('width:i'  => \$WIDTH,
+            'range:s'  => \$RANGE,
+	    'boxes'    => \$BOXES,
+	   ) || die <<USAGE;
+Usage: $0 [feature file 1] [feature file 2] ...
+
+ Options:
+    --width  <pixels>     Set width of image (${\WIDTH} pixels default)
+    --range  <start-stop> Set range of region (base pairs, start-stop)
+    --boxes               Draw grey boxes around the features (for debugging)
+
+Render a Bio::Graphics feature file and produce a PNG image.
+See the manual page for Bio::Graphics::FeatureFile for a
+description of the file format.
+USAGE
+
+my @COLORS = qw(cyan blue red yellow green wheat turquoise orange);  # default colors
+my $color = 0;      # position in color cycle
+
+my $data = Bio::Graphics::FeatureFile->new(-file => '-');
+
+# general configuration of the image here
+my $width         = $WIDTH || $data->setting(general => 'pixels')
+                           || $data->setting(general => 'width')
+                           || WIDTH;
+
+my ($start,$stop);
+my $range_expr = '(-?\d+)(?:-|\.\.)(-?\d+)';
+
+if (defined $RANGE) {
+   ($start,$stop) = $RANGE =~ /$range_expr/o or die "$RANGE: invalid range specification";
+} elsif (my $bases = $data->setting(general => 'bases')) {
+   ($start,$stop) =  $bases =~ /([\d-]+)(?:-|\.\.)([\d-]+)/;
+}
+
+if (!defined $start || !defined $stop) {
+       $start = $data->min unless defined $start;
+       $stop  = $data->max unless defined $stop;
+}
+
+# Use the order of the stylesheet to determine features.  Whatever is left
+# over is presented in alphabetic order
+my %types = map {$_=>1} $data->configured_types;
+
+my @configured_types   = grep {exists $data->features->{$_}} $data->configured_types;
+my @unconfigured_types = sort grep {!exists $types{$_}}      $data->types;
+
+# create the segment,the panel and the arrow with tickmarks
+my $segment = Bio::Graphics::Feature->new(-start=>$start,-stop=>$stop);
+my $panel = Bio::Graphics::Panel->new(-segment   => $segment,
+				      -width     => $width,
+				      -key_style => 'between');
+$panel->add_track($segment,-glyph=>'arrow',-tick=>2);
+
+my @base_config = $data->style('general');
+
+for my $type (@configured_types, at unconfigured_types) {
+  my @config = ( -glyph   => 'segments',         # really generic
+		 -bgcolor => $COLORS[$color++ % @COLORS],
+		 -label   => 1,
+		 -key     => $type,
+		 @base_config,             # global
+		 $data->style($type),  # feature-specificp
+	       );
+  my $features = $data->features($type);
+  $panel->add_track($features, at config);
+}
+
+my $gd = $panel->gd;
+
+if ($BOXES) {  # debugging code
+  my $boxes = $panel->boxes;
+  debugging_rectangles($gd,$boxes);
+}
+
+print $gd->can('png') ? $gd->png : $gd->gif;
+
+sub debugging_rectangles {
+  my ($image,$boxes) = @_;
+  my $grey = $image->colorClosest(100,100,100);
+  foreach (@$boxes) {
+    my @rect = @{$_}[1,2,3,4];
+    $image->rectangle(@{$_}[1,2,3,4],$grey);
+  }
+}
+
+=head1 NAME
+
+feature_draw.pl -- Render a Bio::Graphics Feature File
+
+=head1 SYNOPSIS
+
+ feature_draw.pl [options] file.txt [file2.txt...] > rendering.png
+ feature_draw.pl [options] file.txt [file2.txt...] | display -
+
+=head1 DESCRIPTION
+
+The feature_draw.pl script is a thin front end around the
+Bio::Graphics module.  It accepts a list of files containing sequence
+(protein, nucleotide) feature coordinates from the file(s) listed on
+the command line or on standard input, renders them, and produces a
+PNG file on standard output.
+
+=head2 Options
+
+This script uses GNU-style long options.  This allows you to specify
+the image width option, for example, with any of the following alternative
+forms:
+
+ --width=800
+ --width 800
+ -width 800
+ -w 800
+
+=over 4
+
+=item --width
+
+This sets the width of the image, in pixels.  The default is 800
+pixels.
+
+=item --range
+
+This sets the range of the region displayed, in base pairs from start
+to stop. Any of the following formats are accepted:
+
+  --range 1..1000
+  --range 1,1000
+  --range 1-1000
+
+Negative ranges are allowed.
+
+=back
+
+=head1 Feature Files Format
+
+This script accepts and processes sequence annotations in a simple
+tab-delimited format or in GFF format.
+
+The feature file format has a configuration section and a data
+section. The configuration section sets up the size and overall
+properties of the image, and the data section gives the feature
+data itself.
+
+=head2 Configuration Section
+
+If not provided, this scripts generates a reasonable default
+configuration section for you, so you do not need to provide a
+configuration section to get a reasonable image. However, to tune the
+appearance of the image, you will probably want to tweak the
+configuration. Here is an excerpt from the configuration section:
+
+
+ # example file
+ [general]
+ bases = -1000..21000
+ height = 12
+
+ [EST]
+ glyph = segments
+ bgcolor= yellow
+ connector = dashed
+ height = 5
+
+ [FGENES]
+ glyph = transcript2
+ bgcolor = green
+ description = 1
+
+
+The configuration section is divided into a set of sections, each one
+labeled with a [section title]. The [general] section specifies global
+options for the entire image. Other sections apply to particular
+feature types. In the example above, the configuration in the [EST]
+section applies to features labeled as ESTs, while the configuration
+in the [FGENES] section applies to features labeled as predictions
+from the FGENES gene prediction program.
+
+Inside each section is a series of name=value pairs, where the name is
+the name of an option to set. You can put whitespace around the = sign
+to make it more readable, or even use a colon (:) if you prefer. The
+following option names are recognized:
+
+ Option     Value                                       Example
+ ------     -----                                       -------
+
+ bases      Min & max of the sequence range (bp)           1200..60000
+ width      width of the image (pixels)                    600
+ height     Height of each graphical element (pixels)      10
+ glyph      Style of each graphical element (see below)    transcript
+ fgcolor    Foreground color of each element               yellow
+ bgcolor    Background color of each element               blue
+ linewidth  Width of lines                                 3
+ label      Print the feature's name                       1
+ description Whether to print the feature's description    0
+ bump       Elements are not allowed to collide            1
+ ticks      Print tick marks on arrows                     1
+ connector  Type of group connector (dashed, hat or solid) dashed
+
+The "bases" and "width" options are only relevant in the [general]
+section. They are overridden by the like-named command-line options.
+
+The remainder of the options can be located in any section, but if
+present in the [general] section will set defaults for the others.
+
+Colors are English-language color names or Web-style #RRGGBB colors
+(see a book on HTML for an explanation). True/false values are 1 for
+true, and 0 for false. Numeric ranges can be expressed in start..end
+fashion with two dots, or as start-end with a hyphen.
+
+The "glyph" option controls how the features are rendered. The
+following glyphs are implemented:
+
+  Name                Description
+  ----                -----------
+
+  box                 A filled rectangle, nondirectional.
+  ellipse             An oval.
+  arrow               An arrow; can be unidirectional or
+		      bidirectional.  It is also capable of displaying
+                      a scale with major and minor tickmarks, and can 
+                      be oriented horizontally or vertically. 
+  segments            A set of filled rectangles connected by solid
+		      lines. Used for interrupted features, such as 
+		      gapped alignments and exon groups.
+  transcript          Similar to segments, but the connecting line is
+		      a "hat" shape, and the direction of
+		      transcription is indicated by a small arrow. 
+  transcript2         Similar to transcript, but the direction of
+		      transcription is indicated by a terminal segment
+		      in the shape of an arrow. 
+  primers             Two inward pointing arrows connected by a line. Used for STSs. 
+
+The bump option is the most important option for controlling the look
+of the image. If set to false (the number 0), then the features are
+allowed to overlap. If set to true (the number 1), then the features
+will move vertically to avoid colliding. If not specified, bump is
+turned on if the number of any given type of sequence feature is
+greater than 50.
+
+=head2 Data Section
+
+The data section can follow or proceed the configuration section. The
+two sections can also be intermixed. The data section is a tab or
+whitespace-delimited file which you can export from a spreadsheet
+application or word processor file (be sure to save as text only!)
+
+Here is an example data section:
+
+
+Cosmid     B0511        .       516-619
+Cosmid     B0511        .       3185-3294
+Cosmid     B0511        .       10946-11208
+Cosmid     B0511        .       13126-13511
+Cosmid     B0511        .       66-208
+Cosmid     B0511        .       6354-6499
+Cosmid     B0511        .       13955-14115
+EST        yk595e6.5    +       3187-3294
+EST        yk846e07.3   -       11015-11208
+EST        yk53c10
+           yk53c10.5    +       18892-19154
+           yk53c10.3    -       15000-15500,15700-15800
+EST        yk53c10.5    +       16032-16105
+SwissProt  PECANEX      +       13153-13656     Swedish fish
+FGENESH    "Gene 1"     -       1-205,518-616,661-735,3187-3365,3436-3846       Transmembrane domain
+FGENESH    "Gene 2"     -       16626-17396,17451-17597 Kinase and sushi domains
+
+
+Each line of the file contains five columns. The columns are: 
+
+ Column #   Description
+ --------   -----------
+
+ 1          feature type
+ 2          feature name
+ 3          strand
+ 4          coordinates
+ 5          description
+
+=over 4
+
+=item Feature type
+
+The feature type should correspond to one of the [feature type]
+headings in the configuration section. If it doesn't, the [general]
+options will be applied to the feature when rendering it. The feature
+name is a name for the feature. Use a "." or "-" if this is not
+relevant. If the name contains whitespace, put single or double quotes
+("") around the name.
+
+=item Strand
+
+The strand indicates which strand the feature is on. It is one of "+"
+for the forward strand, "-" for the reverse strand, or "." for
+features that are not stranded.
+
+=item Coordinates
+
+The coordinates column is a set of one or more ranges that the feature
+occupies. Ranges are written using ".." as in start..stop, or with
+hyphens, as in start-stop. For features that are composed of multiple
+ranges &em; for example transcripts that have multiple exons &em; you
+can either put the ranges on the same line separated by commas or
+spaces, or put the ranges on individual lines and just use the same
+feature name and type to group them. In the example above, the Cosmid
+B0511 features use the individual line style, while the FGENESH
+features use the all-ranges-on-one-line style.
+
+=item Description
+
+The last column contains some descriptive text. If the description
+option is set to true, this text will be printed underneath the
+feature in the rendering.
+
+=back
+
+Finally, it is possible to group related features together. An example
+is the ESTs yk53c10.5 and yk53c10.3, which are related by being reads
+from the two ends of the clone yk53c10. To indicate this relationship,
+generate a section that looks like this:
+
+ EST        yk53c10
+            yk53c10.5    +       18892-19154
+            yk53c10.3    -       15000-15500,15700-15800
+
+
+The group is indicated by a line that contains just two columns
+containing the feature type and a unique name for the group. Follow
+this line with all the features that form the group, but leave the
+first column (the feature type) blank. The group will be rendered by
+drawing a dashed line between all the members of the group. You can
+change this by specifying a different connector option in the
+configuration section for this feature type.
+
+=head1 BUGS
+
+Please report them to the author.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+=cut
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/feature_draw.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/frend.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/frend.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/frend.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,755 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+frend.pl -- Render a Bio::Graphics Feature File on the web
+
+=head1 SYNOPSIS
+
+ http://your.host.com/cgi-bin/frend.pl
+
+=head1 DESCRIPTION
+
+The frend.pl script is a thin front end around the Bio::Graphics
+module.  It accepts a list of files containing sequence (protein,
+nucleotide) feature coordinates from the file(s) listed on the command
+line or on standard input, renders them, and produces a PNG file on
+standard output.
+
+=head1 INSTALLATION
+
+Copy this script into your web site's cgi-bin directory.  Name it
+whatever you want.
+
+=head1 Feature Files Format
+
+This script accepts and processes sequence annotations in a simple
+tab-delimited format or in GFF format.
+
+The feature file format has a configuration section and a data
+section. The configuration section sets up the size and overall
+properties of the image, and the data section gives the feature
+data itself.
+
+=head2 Configuration Section
+
+If not provided, this scripts generates a reasonable default
+configuration section for you, so you do not need to provide a
+configuration section to get a reasonable image. However, to tune the
+appearance of the image, you will probably want to tweak the
+configuration. Here is an excerpt from the configuration section:
+
+
+ # example file
+ [general]
+ bases = -1000..21000
+ height = 12
+
+ [EST]
+ glyph = segments
+ bgcolor= yellow
+ connector = solid
+ height = 5
+
+ [FGENES]
+ glyph = transcript2
+ bgcolor = green
+ description = 1
+
+
+The configuration section is divided into a set of sections, each one
+labeled with a [section title]. The [general] section specifies global
+options for the entire image. Other sections apply to particular
+feature types. In the example above, the configuration in the [EST]
+section applies to features labeled as ESTs, while the configuration
+in the [FGENES] section applies to features labeled as predictions
+from the FGENES gene prediction program.
+
+Inside each section is a series of name=value pairs, where the name is
+the name of an option to set. You can put whitespace around the = sign
+to make it more readable, or even use a colon (:) if you prefer. The
+following option names are recognized:
+
+ Option     Value                                       Example
+ ------     -----                                       -------
+
+ bases      Min & max of the sequence range (bp)           1200..60000
+ width      width of the image (pixels)                    600
+ height     Height of each graphical element (pixels)      10
+ glyph      Style of each graphical element (see below)    transcript
+ fgcolor    Foreground color of each element               yellow
+ bgcolor    Background color of each element               blue
+ linewidth  Width of lines                                 3
+ label      Print the feature's name                       1
+ description Whether to print the feature's description    0
+ bump       Elements are not allowed to collide            1
+ ticks      Print tick marks on arrows                     1
+ connector  Type of group connector (dashed, hat or solid) dashed
+
+The "bases" and "width" options are only relevant in the [general]
+section. They are overridden by the like-named command-line options.
+
+The remainder of the options can be located in any section, but if
+present in the [general] section will set defaults for the others.
+
+Colors are English-language color names or Web-style #RRGGBB colors
+(see a book on HTML for an explanation). True/false values are 1 for
+true, and 0 for false. Numeric ranges can be expressed in start..end
+fashion with two dots, or as start-end with a hyphen.
+
+The "glyph" option controls how the features are rendered. The
+following glyphs are implemented:
+
+  Name                Description
+  ----                -----------
+
+  box                 A filled rectangle, nondirectional.
+  ellipse             An oval.
+  arrow               An arrow; can be unidirectional or
+		      bidirectional.  It is also capable of displaying
+                      a scale with major and minor tickmarks, and can 
+                      be oriented horizontally or vertically. 
+  segments            A set of filled rectangles connected by solid
+		      lines. Used for interrupted features, such as 
+		      gapped alignments and exon groups.
+  transcript          Similar to segments, but the connecting line is
+		      a "hat" shape, and the direction of
+		      transcription is indicated by a small arrow. 
+  transcript2         Similar to transcript, but the direction of
+		      transcription is indicated by a terminal segment
+		      in the shape of an arrow. 
+  primers             Two inward pointing arrows connected by a line. Used for STSs. 
+
+The bump option is the most important option for controlling the look
+of the image. If set to false (the number 0), then the features are
+allowed to overlap. If set to true (the number 1), then the features
+will move vertically to avoid colliding. If not specified, bump is
+turned on if the number of any given type of sequence feature is
+greater than 50.
+
+=head2 Data Section
+
+The data section can follow or proceed the configuration section. The
+two sections can also be intermixed. The data section is a tab or
+whitespace-delimited file which you can export from a spreadsheet
+application or word processor file (be sure to save as text only!)
+
+Here is an example data section:
+
+
+Cosmid     B0511        .       516-619
+Cosmid     B0511        .       3185-3294
+Cosmid     B0511        .       10946-11208
+Cosmid     B0511        .       13126-13511
+Cosmid     B0511        .       66-208
+Cosmid     B0511        .       6354-6499
+Cosmid     B0511        .       13955-14115
+EST        yk595e6.5    +       3187-3294
+EST        yk846e07.3   -       11015-11208
+EST        yk53c10
+           yk53c10.5    +       18892-19154
+           yk53c10.3    -       15000-15500,15700-15800
+EST        yk53c10.5    +       16032-16105
+SwissProt  PECANEX      +       13153-13656     Swedish fish
+FGENESH    "Gene 1"     -       1-205,518-616,661-735,3187-3365,3436-3846       Transmembrane domain
+FGENESH    "Gene 2"     -       16626-17396,17451-17597 Kinase and sushi domains
+
+
+Each line of the file contains five columns. The columns are: 
+
+ Column #   Description
+ --------   -----------
+
+ 1          feature type
+ 2          feature name
+ 3          strand
+ 4          coordinates
+ 5          description
+
+=over 4
+
+=item Feature type
+
+The feature type should correspond to one of the [feature type]
+headings in the configuration section. If it doesn't, the [general]
+options will be applied to the feature when rendering it. The feature
+name is a name for the feature. Use a "." or "-" if this is not
+relevant. If the name contains whitespace, put single or double quotes
+("") around the name.
+
+=item Strand
+
+The strand indicates which strand the feature is on. It is one of "+"
+for the forward strand, "-" for the reverse strand, or "." for
+features that are not stranded.
+
+=item Coordinates
+
+The coordinates column is a set of one or more ranges that the feature
+occupies. Ranges are written using ".." as in start..stop, or with
+hyphens, as in start-stop. For features that are composed of multiple
+ranges &em; for example transcripts that have multiple exons &em; you
+can either put the ranges on the same line separated by commas or
+spaces, or put the ranges on individual lines and just use the same
+feature name and type to group them. In the example above, the Cosmid
+B0511 features use the individual line style, while the FGENESH
+features use the all-ranges-on-one-line style.
+
+=item Description
+
+The last column contains some descriptive text. If the description
+option is set to true, this text will be printed underneath the
+feature in the rendering.
+
+=back
+
+Finally, it is possible to group related features together. An example
+is the ESTs yk53c10.5 and yk53c10.3, which are related by being reads
+from the two ends of the clone yk53c10. To indicate this relationship,
+generate a section that looks like this:
+
+ EST        yk53c10
+            yk53c10.5    +       18892-19154
+            yk53c10.3    -       15000-15500,15700-15800
+
+
+The group is indicated by a line that contains just two columns
+containing the feature type and a unique name for the group. Follow
+this line with all the features that form the group, but leave the
+first column (the feature type) blank. The group will be rendered by
+drawing a dashed line between all the members of the group. You can
+change this by specifying a different connector option in the
+configuration section for this feature type.
+
+=head1 BUGS
+
+Please report them to the author.
+
+=head1 SEE ALSO
+
+L<Bio::Graphics>, L<feature_draw.pl>
+
+=head1 AUTHOR
+
+Lincoln Stein, lstein at cshl.org
+
+=cut
+
+use strict;
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+use Bio::Graphics::FeatureFile;
+use CGI qw(:standard);
+use CGI::Carp;
+use File::Temp ':mktemp';
+use File::Spec;
+use File::Basename 'basename';
+use File::Path 'mkpath';
+use vars '@COLORS';
+
+use constant WIDTH          => 600;  # default width
+use constant BUMP_THRESHOLD => 50;  # if more than this # of features, will stop bumping
+ at COLORS = qw(cyan blue red yellow green wheat turquoise orange);  # default colors
+
+if (param('cat')) {
+  catfile(param('cat'));
+  exit 0;
+}
+
+print header,start_html('Sequence Feature Renderer');
+print h1('Sequence Feature Renderer');
+
+print p('This is a front end to the Bio::Graphics package, a part of the',
+	a({-href=>'http://www.bioperl.org'},'BioPerl library.'),
+	  'Cut and paste your sequence annotation data into the text field below, or upload it using the',
+	'upload button.',
+	'The format of the annotation data is explained',a({-href=>'#format'},'below.'));
+
+my $self = url(-relative=>1);
+print h3('Instant examples'),
+  p('For the impatient, you can paste in an',
+    b(a({-href=>"$self?Paste+Example+1"},'example file.')));
+
+read_file() if param('file');
+
+my $example = param('Example 1') 
+  ? test_data(0) 
+  : param('Example 2')
+  ? test_data(1)
+  : '';
+param(text => $example) if length $example;
+
+render() if param('text') || param('file') =~ /\w/;
+
+print start_multipart_form(),
+  table({-border=>0,-width=>300,-cellspacing=>0,-cellpadding=>0},
+	TR({-class=>'resultsbody'},
+	   td({-colspan=>1},
+	      'Cut and Paste the annotation file...'
+	     ),
+	   td({-colspan=>2},
+	      'Image width: ',
+	      popup_menu(-name=>'width',-values=>[480,640,800,1024,1280,1600],-default=>800)
+	     ),
+	   TR({-class=>'resultsbody'},
+	      td({-colspan=>3},
+		 pre(
+		     textarea(-name=>'text',-value=>$example,
+			      -cols=>80,-rows=>10,-wrap=>'off',-override=>length $example || param('Clear'))
+		    )
+		)
+	     )
+	  ),
+	TR({-class=>'resultsbody'},
+	   td({-colspan=>1},'Upload it... ',filefield(-name=>'file',-size=>30)),
+	   td({-align=>'left',-colspan=>2},
+	      'Or paste one of the example files...',
+	      submit('Example 1'),
+	      submit('Example 2'),
+	      submit('Clear'),
+	     )
+	  ),
+	TR({-class=>'resultstitle'},
+	   td({-align=>'left',-colspan=>3},
+	      "Press",b('Render'),'when ready...',
+	      b(submit('Render...'))
+	     ),
+	   )),
+  end_form;
+
+print_format();
+
+print hr(),a({-href=>'http://www.bioperl.org'},'www.bioperl.org'),end_html();
+
+exit 0;
+
+sub read_file {
+  my $text;
+  my $fh = param('file') or return;
+  $text .= $_ while <$fh>;
+  param(text => $text);
+}
+
+sub render {
+  my $text = shift;
+  my $color = 0;      # position in color cycle
+
+  $text ||= param('text');
+  my $data = $text ? Bio::Graphics::FeatureFile->new(-text => $text) 
+                   : Bio::Graphics::FeatureFile->new(-file => param('file'));
+
+  unless ($data->min < $data->max) {
+    AceError("This doesn't look like a valid annotation file.  No annotations found.");
+    exit 0;
+  }
+
+  # adjust the width if requested
+  $data->setting(general => 'width',param('width')) if param('width');
+
+  # render the panel
+  my $panel = $data->new_panel;
+  $data->render($panel);
+
+  # we create the file and write it out
+  my $gd = $panel->gd;
+  my $suffix = $gd->can('gif') ? '.gif' : '.png';
+  my $dir  = tmpdir();
+  mkpath($dir) unless -e $dir;
+  my($fh,$filename) = mkstemps(tmpfile('XXXXXXXX'),$suffix);
+
+  print $fh ($gd->can('gif') ? $gd->gif : $gd->png);
+  close $fh;
+
+  # now we send the link to the user
+  my $self = url(-relative=>1);
+  my $base = basename($filename);
+  my $url  = "$self/features$suffix?cat=$base";
+  my ($w,$h) = $gd->getBounds;
+
+  print hr(),h2('Rendering');
+  print a({-name=>'rendering'},
+	  img({-src=>$url,-alt=>'Right-click and "Save As..." to save this image',
+	       -border=>0,-width=>$w,-height=>$h})
+	  );
+}
+
+sub tmpdir {
+  return File::Spec->catfile(File::Spec->tmpdir,'frend');
+}
+
+sub tmpfile {
+  return File::Spec->catfile(tmpdir(),shift);
+}
+
+sub catfile {
+  my $file = shift;
+  my $path = tmpfile($file);
+  print header($path =~ /\.gif$/ ? 'image/gif' : 'image/png');
+  open F,$path or die "Couldn't open $file for reading: $!";
+  print while <F>;
+  close F;
+  unlink $path;
+}
+
+sub print_format {
+  print hr();
+  print a({-name=>'format'},h2('Annotation file format'));
+  print <<END;
+<p>
+The annotation file format has a configuration section and a data section.  The configuration section
+sets up the size and overall properties of the image, and the data section gives the annotation data 
+itself.
+<p>
+<h3>Configuration Section</h3>
+<p>
+If not provided, this page generates a reasonable default configuration section for you, so you 
+do not need to provide a configuration section to get a reasonable image.  However, to tune the
+appearance of the image, you will probably want to tweak the configuration.  Here is an excerpt 
+from the configuration section:
+<blockquote>
+<pre>
+# example file
+[general]
+bases = -1000..21000
+height = 12
+
+[EST]
+glyph = segments
+bgcolor= yellow
+connector = solid
+height = 5
+
+[FGENES]
+glyph = transcript2
+bgcolor = green
+description = 1
+</pre>
+</blockquote>
+
+<p>
+The configuration section is divided into a set of sections, each one labeled with a [section title].
+The [general] section specifies global options for the entire image.  Other sections apply to particular
+feature types.  In the example above, the configuration in the [EST] section applies to features labeled
+as ESTs, while the configuration in the [FGENES] section applies to features labeled as predictions from
+the FGENES gene prediction program.
+<p>
+Inside each section is a series of <i>name</i>=<i>value</i> pairs, where the name is the name of
+an option to set.  You can put whitespace around the = sign to make it more readable, or even use
+a colon (:) if you prefer.  The following option names are recognized:
+<p>
+<table border="1">
+<tr>
+  <th>Option</th><th>Value</th><th>Example</th>
+</tr>
+<tr>
+  <th>bases</th><td>Min &amp; max of the sequence range (bp)</td><td>1200..60000</td>
+</tr>
+<tr>
+  <th>width</th><td>width of the image (pixels)</td>                 <td>600</td>
+</tr>
+<tr>
+  <th>height</th><td>Height of each graphical element (pixels)</td><td>10</td>
+</tr>
+<tr>
+  <th>glyph</th><td>Style of each graphical element (see below)</td><td>transcript</td>
+</tr>
+<tr>
+  <th>fgcolor</th>      <td>Foreground color of each element</td>            <td>yellow</td>
+</tr>
+<tr>
+  <th>bgcolor</th>      <td>Background color of each element</td>            <td>blue</td>
+</tr>
+<tr>
+  <th>linewidth</th>      <td>Width of lines</td>            <td>3</td>
+</tr>
+<tr>
+  <th>label</th>        <td>Print the feature's name</td>         <td>1</td>
+</tr>
+<tr>
+  <th>description</th>  <td>Whether to print the feature's description </td> <td>0</td>
+</tr>
+<tr>
+  <th>bump</th>         <td>Elements are not allowed to collide</td> <td>1</td>
+</tr>
+<tr>
+  <th>ticks</th>        <td>Print tick marks on arrows</td>       <td>1</td>
+</tr>
+<tr>
+  <th>connector</th>    <td>Type of group connector (dashed, hat or solid)</td>       <td>dashed</td>
+</tr>
+</table>
+<p>
+
+The "bases" and "width" options are only relevant in the [general]
+section.  The rest can be located in any section, but if present in
+the [general] section will set defaults for the others.
+
+<p>
+
+Colors are English-language color names or Web-style #RRGGBB colors
+(see a book on HTML for an explanation).  True/false values are 1 for
+true, and 0 for false.  Numeric ranges can be expressed in
+<i>start</i>..<i>end</i> fashion with two dots, or as
+<i>start</i>-<i>end</i> with a hyphen.
+
+<p>
+The "glyph" option controls how the features are rendered.  The
+following glyphs are implemented:
+
+<p>
+
+<table border="1">
+
+<tr><th>Name</th><th>Description</th></tr>
+<tr>
+  <th>
+  box
+  </th>
+  <td>A filled rectangle, nondirectional.</td>
+</tr>
+<tr>
+  <th>ellipse</th><td>An oval.</td>
+</tr>
+<tr>
+<th>arrow</th>
+<td>	      An arrow; can be unidirectional or bidirectional.
+	      It is also capable of displaying a scale with
+	      major and minor tickmarks, and can be oriented
+	      horizontally or vertically.
+</td>
+</tr>
+<tr>
+  <th>segments</th>
+  <td>    A set of filled rectangles connected by solid lines.
+  Used for interrupted features, such as gapped
+  alignments.
+</td>
+</tr>
+<tr>
+  <th>transcript</th>
+<td>
+  Similar to segments, but the connecting line is
+  a "hat" shape, and the direction of transcription
+  is indicated by a small arrow.
+  </td>
+</tr>
+<tr>
+<th>
+  transcript2</th>
+<td>  Similar to transcript, but the direction of
+  transcription is indicated by a terminal segment
+  in the shape of an arrow.
+</td>
+</tr>
+<tr>
+<th>
+  primers
+</th>
+<td>     Two inward pointing arrows connected by a line.
+	      Used for STSs.
+</td>
+</tr>
+</table>
+<p>
+
+The <b>bump</b> option is the most important option for controlling the look
+of the image.  If set to false (the number 0), then the features are allowed
+to overlap.  If set to true (the number 1), then the features will move
+vertically to avoid colliding.  If not specified, bump is turned on
+if the number of any given type of sequence feature is greater than
+${\BUMP_THRESHOLD}.
+
+<h3>Data Section</h3>
+<p>
+
+The data section can follow or proceed the configuration section.  The two sections
+can also be intermixed.  The data section is a tab or whitespace-delimited file which you can
+export from a spreadsheet application or word processor file (be sure to save as text only!)
+
+<p>
+
+Here is an example data section:
+
+<p>
+
+<blockquote>
+<pre>
+Cosmid	   B0511	.	516-619
+Cosmid	   B0511	.	3185-3294
+Cosmid	   B0511	.	10946-11208
+Cosmid	   B0511	.	13126-13511
+Cosmid	   B0511	.	66-208
+Cosmid	   B0511	.	6354-6499
+Cosmid	   B0511	.	13955-14115
+EST	   yk595e6.5	+	3187-3294
+EST	   yk846e07.3	-	11015-11208
+EST	   yk53c10
+	   yk53c10.5	+	18892-19154
+	   yk53c10.3	-	15000-15500,15700-15800
+EST	   yk53c10.5	+	16032-16105
+SwissProt  PECANEX	+	13153-13656	Swedish fish
+FGENESH	   "Gene 1"	-	1-205,518-616,661-735,3187-3365,3436-3846	Transmembrane domain
+FGENESH	   "Gene 2"	-	16626-17396,17451-17597	Kinase and sushi domains
+</pre>
+</blockquote>
+
+<p>
+
+Each line of the file contains five columns.  The columns are:
+
+<p>
+
+<table border="1">
+<tr><th>Column #</th><th>Column Description</th></tr>
+<tr><td align="right">1</td><td>feature type</td></tr>
+<tr><td align="right">2</td><td>feature name</td></tr>
+<tr><td align="right">3</td><td>strand</td></tr>
+<tr><td align="right">4</td><td>coordinates</td></tr>
+<tr><td align="right">5</td><td>description</td></tr>
+</table>
+<p>
+
+The <b>feature type</b> should correspond to one of the [feature type] headings
+in the configuration section.  If it doesn't, the [general] options will
+be applied to the feature when rendering it.  The <b>feature name</b> is a
+name for the feature.  Use a "." or "-" if this is not relevant.  If
+the name contains whitespace, put single or double quotes ("") around
+the name.
+
+<p>
+
+The <b>strand</b>
+indicates which strand the feature is on.  It is one of "+" for the 
+forward strand, "-" for the reverse strand, or "." for features that are not
+stranded.  
+
+<p>
+
+The <b>coordinates</b> column is a set of one or more ranges that the
+feature occupies.  Ranges are written using ".." as in <i>start</i>..<i>stop</i>,
+or with hyphens, as in <i>start</i>-<i>stop</i>. For features that are composed
+of multiple ranges &em; for example transcripts that have multiple exons &em;
+you can either put the ranges on the same line separated by commas or spaces,
+or put the ranges on individual lines and just use the same feature name and
+type to group them.  In the example above, the Cosmid B0511 features use
+the individual line style, while the FGENESH features use the all-ranges-on-one-line
+style.
+
+<p>
+
+The last column contains some descriptive text.  If the <b>description</b> option
+is set to true, this text will be printed underneath the feature in the rendering.
+
+<p>
+
+Finally, it is possible to group related features together.  An example is
+the ESTs yk53c10.5 and yk53c10.3, which are related by being reads from 
+the two ends of the clone yk53c10.  To indicate this relationship, generate
+a section that looks like this:
+
+<p>
+
+<blockquote>
+<pre>
+EST	   yk53c10
+	   yk53c10.5	+	18892-19154
+	   yk53c10.3	-	15000-15500,15700-15800
+</pre>
+</blockquote>
+
+<p>
+
+The group is indicated by a line that contains just two columns 
+containing the feature type and a unique name for the group.
+Follow this line with all 
+the features that form the group, but leave the first column 
+(the feature type) blank.  The group will be rendered by
+drawing a dashed line between all the members of the group.  
+You can change this by specifying a different <b>connector</b>
+option in the configuration section for this feature type.
+
+END
+;
+
+}
+
+sub test_data {
+  my $config = shift;
+  my $header = <<'END';
+[general]
+bases = -1000..21000
+height = 12
+reference = B0511
+
+[Cosmid]
+glyph = segments
+fgcolor = blue
+key = C. elegans conserved regions
+
+[EST]
+glyph = segments
+bgcolor= yellow
+connector = solid
+height = 5
+
+[FGENESH]
+glyph = transcript2
+bgcolor = green
+description = 1
+
+[SwissProt]
+glyph = arrow
+base  = 1
+linewidth = 2
+fgcolor = red
+description = 1
+
+[P-element]
+glyph = triangle
+orient = S
+bgcolor = red
+fgcolor = white
+label = 1
+point = 1
+
+END
+;
+
+my $data =<<'END';
+Cosmid	B0511	516-619
+Cosmid	B0511	3185-3294
+Cosmid	B0511	10946-11208
+Cosmid	B0511	13126-13511
+Cosmid	B0511	11394-11539
+Cosmid	B0511	14383-14490
+Cosmid	B0511	15569-15755
+Cosmid	B0511	18879-19178
+Cosmid	B0511	15850-16110
+Cosmid	B0511	66-208
+Cosmid	B0511	6354-6499
+Cosmid	B0511	13955-14115
+Cosmid	B0511	7985-8042
+Cosmid	B0511	11916-12046
+P-element	""	500-500
+P-element	MrQ	700-700
+P-element	MrR	10000-10000
+EST	yk260e10.5	15569-15724
+EST	yk672a12.5	537-618,3187-3294
+EST	yk595e6.5	552-618
+EST	yk595e6.5	3187-3294
+EST	yk846e07.3	11015-11208
+EST	yk53c10
+	yk53c10.3	12876-13577,13882-14121,14169-14535
+	yk53c10.5	18892-19154,15853-16219
+SwissProt	"PECANEX Protein"	5513-16656	"From SwissProt"
+FGENESH	"Predicted gene 1"	-1200--500,518-616,661-735,3187-3365,3436-3846	Pfam domain
+FGENESH	"Predicted gene 2"	5513-6497,7968-8136,8278-8383,8651-8839,9462-9515,10032-10705,10949-11340,11387-11524,11765-12067,12876-13577,13882-14121,14169-14535,15006-15209,15259-15462,15513-15753,15853-16219	Mysterious
+FGENESH	"Predicted gene 3"	16626-17396,17451-17597
+FGENESH	"Predicted gene 4"	18459-18722,18882-19176,19221-19513,19572-30000	"Transmembrane protein"
+END
+
+  return $config ? $header . $data : $data;
+}
+
+__END__
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/frend.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/graphics/search_overview.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/graphics/search_overview.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/graphics/search_overview.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,154 @@
+#!/usr/bin/perl -w
+# $Id: search_overview.PLS,v 1.3 2003/11/25 17:11:43 jason Exp $
+
+=head1 NAME
+
+search_overview -- Render a SearchIO parser report into a simple overview graphic
+
+=head1 SYNOPSIS
+
+search_overview -i filename [-f format] [-o outputfilename] [--labels]
+
+=head1 DESCRIPTION
+
+This script will take any Bio::SearchIO parseable report and turn it
+into a simple overview graphic of the report.  For our purposes we are
+assuming BLAST and the BLAST scores when assigning colors.  Output is
+a PNG format file.
+
+This is not intended to be an overly customized script, rather it
+should probably just be either a quick and dirty look at a report or a
+starting point for more complicated implementations.
+
+The color is determined by the hit score which is currently pegged to the NCBI 
+scheme which looks like this
+
+ RED     E<gt>= 200 
+ PURPLE  80-200
+ GREEN   50-80
+ BLUE    40-50
+ BLACK   E<lt>40
+
+Options:
+ -i/--input        The input filename, otherwise input is assumed from STDIN
+ -o/--output       The output filename, this is optional, if you do not
+                   provide the output filename the script will create a file
+                   using the name of the query sequence and will process
+                   all the sequences in the file.  If an output filename
+                   IS provided the script will only display an image for the 
+                   first one.
+ -f/--format       The SearchIO format parser to use, if not provided
+                   SearchIO will guess based on the file extension.
+ -l/--labels       Display the hit sequence name as a label in the overview.
+                   For lots of sequences this will make the image very long
+                   so by default it is turned off.
+
+=head1 AUTHOR Jason Stajich
+
+Jason Stajich, jason[-at-]open-bio[-dot-]org.
+
+=cut
+
+use strict;
+
+use Bio::Graphics::Panel;
+use Bio::Graphics::Feature;
+use Bio::Graphics::FeatureFile;
+use Bio::SearchIO;
+use Getopt::Long;
+
+use constant WIDTH          => 600;  # default width
+
+my ($in,$format,$out);
+
+my $showlabels = 0;
+
+# This defines the color order
+# For NCBI it is typically defined like this
+# Score
+# RED     >= 200 
+# PURPLE  80-200
+# GREEN   50-80
+# BLUE    40-50
+# BLACK   <40
+my @COLORS = qw(red magenta green blue black);
+my @SCORES = (200,80,50,40,0);
+
+GetOptions(
+	   'i|in|input:s'   => \$in,
+	   'f|format:s'     => \$format,
+	   'o|output:s'     => \$out,
+	   'l|labels'       => \$showlabels
+	   );
+
+
+my $parser = new Bio::SearchIO(-file   => $in,
+			       -format => $format);
+
+while( my $r = $parser->next_result ) {
+
+    my ($qname,$qlen) = ($r->query_name, $r->query_length);
+    my $max = 0;
+    my (@features, at configs);
+    while(my $h = $r->next_hit ) {
+	next if $h->num_hsps == 0;
+	my ($left,$right) = ( $h->start('query'),
+			      $h->end  ('query') );
+	
+	if( ! $qlen ) { 
+	    $max = MAX($max,abs($right-$left));
+	}
+	my $bin = 0;
+	my $score = $h->score;
+	for my $s ( @SCORES ) {
+	    last if( $score > $s);
+	    $bin++;
+	}
+	push @features, Bio::Graphics::Feature->new(-start   => $left,
+						    -stop     => $right,
+						    -type    => 'similarity',
+						    -name    => $h->name,
+						    -desc    => $h->description
+						    );
+	push @configs, [ ( -glyph   => 'segments',  
+			   -bgcolor => $COLORS[$bin],
+			   -fgcolor => $COLORS[$bin],
+			   -label   => $showlabels,
+			   -height  => 1,
+			   )];
+						    
+    }
+    my $panel = Bio::Graphics::Panel->new(-length => $qlen || $max,
+					  -bgcolor => 'white',
+					  -pad_left=> 10,
+					  -pad_right=> 10);
+    $panel->add_track('arrow' => Bio::Graphics::Feature->new
+		      (-start => 1,
+		       -end   => $qlen || $max),
+		      -bump   => 0,
+		      -double => 1,
+		      -tick   => 2,
+		      );
+    foreach my $f ( @features ) {
+	my $c = shift @configs;
+	$panel->add_track($f, @$c);
+    }
+    if( $out ) { 
+	open(OUT,">$out") || die("cannot open $out: $!");
+	binmode(OUT);
+	print OUT $panel->png;
+	close(OUT);
+	if( $parser->result_count > 1 ) { 
+	    print STDERR "only printing the first result, do not provide a outfile name if you want to see them all\n";
+	}
+	last;
+    } else { 
+	open(OUT, ">$qname.png") || die("$qname: $!");
+	binmode(OUT);
+	print OUT $panel->png;	
+	close(OUT);
+    }
+}
+ 
+sub MAX {return $_[0] < $_[1] ? $_[1] : $_[0] }
+sub MIN {return $_[0] > $_[1] ? $_[1] : $_[0] }

Added: trunk/packages/bioperl/branches/upstream/current/scripts/index/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/index/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/index/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+These are scripts to create and maintain flatfile databases indexed
+with the Bio::Index or Bio::DB::Fasta modules.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_fetch.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_fetch.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_fetch.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,318 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+bp_fetch.pl - fetches sequences from bioperl indexed databases
+
+=head1 SYNOPSIS 
+
+  bp_fetch.pl swiss:ROA1_HUMAN
+
+  bp_fetch.pl net::genbank:X47072
+
+  bp_fetch.pl net::genpept:ROA1_HUMAN
+
+  bp_fetch.pl ace::myserver.somewhere.edu,21000:X56676
+
+  bp_fetch.pl -fmt GCG swiss:ROA1_HUMAN
+
+=head1 DESCRIPTION
+
+Fetches sequences using the DB access systems in Bioperl. The most
+common use of this is to fetch sequences from bioperl indices built
+using bpindex.pl, or to fetch sequences from the NCBI website
+
+The format for retrieving sequences is delibrately like the
+GCG/EMBOSS format like the following:
+
+  db:name
+
+with the potential of putting in a 'meta' database type, being
+
+  meta::db:name
+
+The meta information can be one of three types
+
+  local - local indexed flat file database
+  net   - networked http: based database
+  ace   - ACeDB database
+
+This information defaults to 'local' for database names with no meta
+db information
+
+=head1 OPTIONS
+
+  -fmt  <format> - Output format
+                   Fasta (default), EMBL, Raw, swiss or GCG
+  -acc           - string is an accession number, not an
+                   id. 
+
+options only for expert use
+
+  -dir  <dir>    - directory to find the index files
+                  (overrides BIOPERL_INDEX environment varaible)
+  -type <type>   - type of DBM file to open 
+                  (overrides BIOPERL_INDEX_TYPE environment variable)
+
+=head1 ENVIRONMENT
+
+bp_index and bp_fetch coordinate where the databases lie using the
+enviroment variable BIOPERL_INDEX. This can be overridden using the
+-dir option. The index type (SDBM or DB_File or another index file)
+is controlled by the BIOPERL_INDEX_TYPE variable. This defaults to 
+SDBM_File 
+
+=head1 USING IT YOURSELF
+
+bp_fetch is a wrapper around the bioperl modules which support 
+the Bio::DB::BioSeqI abstract interface. These include:
+
+  Author          Code
+
+  James Gilbert - Fasta indexer, Abstract indexer
+  Aaron Mackay  - GenBank and GenPept DB access
+  Ewan Birney   - EMBL .dat indexer
+  Many people   - SeqIO code
+
+These modules can be used directly, which is far better than using
+this script as a system call or a pipe to read from. Read the
+source code for bp_fetch to see how it is used.
+
+=head1 EXTENDING IT
+
+bp_fetch uses a number of different modules to provide access to
+databases. Any module which subscribes to the Bio::DB::BioSeqI
+interface can be used here. For flat file indexers, this is
+best done by extending Bio::Index::Abstract, as is done in
+Bio::Index::EMBL and Bio::Index::Fasta. For access to other
+databases you will need to roll your own interface.
+
+For new output formats, you need to add a new SeqIO module. The
+easiest thing is to look at Bio::SeqIO::Fasta and figure out
+how to hack it for your own format (call it something different
+obviously).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Ewan Birney E<lt>birney at ebi.ac.ukE<gt>
+
+=cut
+
+use strict;
+use Getopt::Long;
+
+#
+# Dofus catcher for people who are trying this script without
+# installing bioperl. In your own script, you can just go
+#
+# use Bio::Index::Fasta etc, rather than this
+#
+
+BEGIN {
+    eval {
+	require Bio::Index::Fasta;
+	require Bio::Index::EMBL;
+	require Bio::Index::GenBank;
+	require Bio::Index::Swissprot;
+        require Bio::SeqIO;
+    };
+    if ( $@ ) {
+	# one up from here is Bio directory - we hope!
+	push(@INC,"..");
+	eval {
+	    require Bio::Index::Fasta;
+	    require Bio::Index::EMBL;
+	    require Bio::Index::Swissprot;
+            require Bio::SeqIO;
+	};
+	if ( $@ ) {
+	    print STDERR ("\nbp_index cannot find Bio::Index::Fasta and Bio::Index::EMBL\nbp_index needs to have bioperl installed for it to run.\nBioperl is very easy to install\nSee http://bio.perl.org for more information\n\n");
+	    exit(1);
+	} else {
+	    print STDERR ("\nYou are running bp_index.pl without installing bioperl.\nYou have done it from bioperl/scripts, and so we can find the necessary information\nbut it is much better to install bioperl\n\nPlease read the README in the bioperl distribution\n\n");
+	}
+    }
+
+    eval {
+	require Bio::DB::GenBank;
+	require Bio::DB::GenPept;
+	require Bio::DB::EMBL;
+    };
+
+    if( $@ ) {
+	if( !exists $ENV{'BIOPERL_SAVVY'} ) {
+	    print STDERR ("\nbp_fetch cannot find Bio::DB::GenBank and Bio::DB::EMBL modules\nThis is most likely because LWP has not been installed\nThis does not effect local indexing\nset environment variable BIOPERL_SAVVY to supress this message\n\n");
+	}
+    }
+}
+
+#
+# Start processing the command line
+#
+
+my $dir = $ENV{'BIOPERL_INDEX'};
+my $type = $ENV{'BIOPER_INDEX_TYPE'};
+my $fmt = 'Fasta';
+my $useacc = 0;
+my $ret = GetOptions('dir=s' => \$dir,'fmt=s' => \$fmt , 'type=s' => \$type , 'acc!' => \$useacc);
+
+#
+# print pod documentation if we have no arguments
+#
+
+exec('perldoc',$0) unless @ARGV;
+
+my($isnet,$db,$dbobj,$id,$seq,$seqio,$out,$meta);
+
+#
+# Reset the type if needed
+#
+
+if( $type ) {
+   $Bio::Index::Abstract::USE_DBM_TYPE = $type;
+}
+
+#
+# Build at run time the SeqIO output
+#
+
+$out = Bio::SeqIO->new(-fh => \*STDOUT , -format => $fmt);
+
+#
+# Main loop over remaining arguments
+#
+
+foreach my $arg ( @ARGV ) {
+    $_= $arg;
+
+    
+    # strip out meta:: if there
+    if( /^(\w+)::/ ) {
+	$meta = $1;
+	s/^(\w+):://;
+    } else {
+	$meta = 'local';
+    }
+
+    # parse to db:id 
+
+    /^(\S+)\:(\S+)$/ || do { print STDERR "$_ is not parsed as db:name\n"; next;};
+($db,$id) = split/:/,$_,2;
+
+    #
+    # the eval block catches exceptions if they occur
+    # in the code in the block. The exception goes in $@
+    #
+
+    eval {
+	SWITCH : {
+	    $_ = $meta;
+	    /^net$/ && do {
+		if( $db =~ /genbank/i ) {
+		    $dbobj = Bio::DB::GenBank->new();
+		}
+		elsif( $db =~ /genpept/i ) {
+		    $dbobj = Bio::DB::GenPept->new();
+		} elsif( $db =~ /embl/i ) {
+                    $dbobj = Bio::DB::EMBL->new();
+                } else {
+		    die "Net database $db not available";
+		} 
+		last SWITCH;
+	    };
+	    /^ace$/ && do {
+
+		# yank in Bio::DB::Ace at runtime
+		eval {
+		    require Bio::DB::Ace;		    
+		};
+		if ( $@ ) {
+		    die "Unable to load Bio::DB::Ace for ace::$db\n\n$@\n";
+		}
+
+		# db is server,port
+		my ($server,$port);
+
+		$db =~ /(\S+)\,(\d+)/ || die "$db is not server.name,port for acedb database";
+		$server = $1;
+		$port = $2;
+		# print STDERR "Connecting to $server,$port\n";
+
+		$dbobj = Bio::DB::Ace->new(-host => $server, -port => $port);
+		last SWITCH;
+	    };
+	    /^local$/ && do {
+		if( !$dir ) {
+		    die "\nNo directory specified for index\nDirectory must be specified by the environment varaible BIOPERL_INDEX or --dir option\ngo bp_index with no arguments for more help\n\n";
+		}
+
+		#
+		# $db gets re-blessed to the correct index when
+		# it is made from the abstract class. Cute eh?
+		#
+
+		$dbobj = Bio::Index::Abstract->new("$dir/$db");
+		last SWITCH;
+	    };
+	    die "Meta database $meta is not valid";
+	}
+    }; # end of eval to get db
+    if( $@ ) {
+	warn("Database $db in $arg is not loadable. Skipping\n\nError $@");
+	next;
+    }
+
+    #
+    # We expect the databases to adhere to the BioSeqI
+    # the sequence index databases and the GenBank/GenPept do already
+    #
+
+    if( ! $dbobj->isa('Bio::DB::RandomAccessI') ) {
+	warn("$db in $arg does not inherit from Bio::DB::RandomAccessI, so is not expected to work under the DB guidlines. Going to try it anyway");
+    }
+
+    eval {
+	if( $useacc == 0 ) {
+	    $seq = $dbobj->get_Seq_by_id($id);
+	} else {
+	    $seq = $dbobj->get_Seq_by_acc($id);
+	}
+
+    };
+    if( $@ ) {
+	warn("Sequence $id in Database $db in $arg is not loadable. Skipping.\n\nError $@");
+	next;
+    }
+    if( !defined $seq ) {
+	warn("Sequence $id in Database $db is not present\n");
+	next;
+    }
+
+    $out->write_seq($seq);
+}
+
+
+	
+
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_fetch.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_index.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_index.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_index.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+bp_index.pl - indexes files for use by bp_fetch.pl
+
+=head1 SYNOPSIS
+
+bp_index.pl index_name file1 file2 etc.
+
+=head1 DESCRIPTION
+
+bp_index.pl builds a bioperl index for the sequence files given in the
+argument list, under the index name. For example
+
+   bp_index.pl nrdb /data/nrdb/nrdb.fasta
+
+would build an index called 'nrdb' as the index name for the file
+nrdb.fasta, and
+
+   bp_index.pl -fmt EMBL swiss /data/swiss/*.dat
+
+would build an index called swiss for all the files in /data/swiss
+which end in .dat which are in EMBL format.
+
+The indexes are built using the Bio/Index/* modules, in particular,
+Bio::Index::EMBL and the Bio::Index::Fasta modules. Any script which
+uses these modules can use the index. A good example script is bp_fetch
+which fetches sequences and pipes them to STDOUT, for example
+
+   bp_fetch swiss:ROA1_HUMAN
+
+gets the ROA1_HUMAN sequence from the swiss index and writes it as
+fasta format on STDOUT.
+
+=head1 OPTIONS
+
+  -fmt  <format>   - Fasta (default), swiss or EMBL
+  -dir  <dir>      - directory where the index files are found
+                     (overrides BIOPERL_INDEX environment variable)
+
+Options for expert use
+
+  -type <db_type>  - DBM_file type. 
+                     (overrides BIOPERL_INDEX_TYPE environment variable)
+  -v               - report every index addition (debugging)
+
+=head1 ENVIRONMENT
+
+bp_index and bp_fetch coordinate where the databases lie using the
+enviroment variable BIOPERL_INDEX. This can be overridden using the
+-dir option. There is no default value, so you must use the -dir option 
+or set BIOPERL_INDEX.
+
+The DB type is coordinated with BIOPERL_INDEX_TYPE which if it
+is not there, defaults to whatever the bioperl modules have installed,
+which itself defaults to SDBM_File.
+
+=head1 USING IT YOURSELF
+
+bp_index.pl is a script that drives the Index modules. If you want to 
+use this script heavily in your work, if it is Perl based, it is 
+almost certainly better to look at the code in this script and copy
+it across (probably you will be more likely to want to use the bp_fetch
+code).
+
+=head1 EXTENDING IT
+
+bp_index is just a wrapper around James Gilbert's excellent Index modules
+found in bioperl
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Ewan Birney
+
+Ewan Birney E<lt>birney at ebi.ac.ukE<gt>
+
+=cut
+
+#'
+use strict;
+
+#
+# Doofus catcher for people who are trying this script without
+# installing bioperl
+#
+
+BEGIN {
+    eval {
+	require Bio::Index::Fasta;
+	require Bio::Index::EMBL;
+	require Bio::Index::Swissprot;
+	require Bio::Index::GenBank;
+    };
+    if ( $@ ) {
+	# one up from here is Bio directory - we hope!
+	push(@INC,"..");
+	eval {
+	    require Bio::Index::Fasta;
+	    require Bio::Index::EMBL;
+	};
+	if ( $@ ) {
+	    print STDERR ("\nbp_index cannot find Bio::Index::Fasta and Bio::Index::EMBL\nbp_index needs to have bioperl installed for it to run.\nBioperl is very easy to install\nSee http://bio.perl.org for more information\n\n");
+	    exit(1);
+	} else {
+	    print STDERR ("\nYou are running bp_index.pl without installing bioperl.\nYou have done it from bioperl/scripts, and so we can find the necessary information\nbut it is much better to install bioperl\n\nPlease read the README in the bioperl distribution\n\n");
+	}
+    }
+}
+
+my $dir = $ENV{'BIOPERL_INDEX'};
+my $type = $ENV{'BIOPER_INDEX_TYPE'};
+my $fmt = 'Fasta';
+my $verbose = 0;
+
+use Getopt::Long;
+&GetOptions("fmt=s" => \$fmt,"dir=s" => \$dir,"type=s" => \$type, "v!" => \$verbose);
+
+exec('perldoc',$0) unless @ARGV;
+
+my $name = shift;
+
+if( !$dir ) {
+    print STDERR "\nNo directory specified for index\nDirectory must be specified by the environment varaible BIOPERL_INDEX or -dir option\ngo bp_index with no arguments for more help\n\n";
+    exit(1);
+}
+
+#
+# Reset the type if needed
+#
+
+if( $type ) {
+   $Bio::Index::Abstract::USE_DBM_TYPE = $type;
+}
+#
+# Rock and roll...
+# 
+my $index;
+$_ = $fmt;
+SWITCH : {
+    /Fasta/i && do {
+	$index = Bio::Index::Fasta->new("$dir/$name", 'WRITE');
+	last;
+    };
+    /EMBL/i && do {
+	$index = Bio::Index::EMBL->new("$dir/$name", 'WRITE');
+	last;
+    };
+    /swiss/i && do {
+	$index = Bio::Index::Swissprot->new("$dir/$name", 'WRITE');
+	last;
+    };
+    /GenBank/i && do {
+	$index = Bio::Index::GenBank->new("$dir/$name", 'WRITE');
+	last;
+    };
+    die("No index format called $fmt");
+}
+
+if( $verbose != 0 ) {
+  $index->verbose(1);
+}
+
+$index->make_index(@ARGV);
+
+# finished. Neat eh.
+
+#
+# if you are using this in a script, to 
+# to force deallocation + closing of the index, go
+# $index = undef;
+#
+
+	
+
+
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_index.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_seqret.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_seqret.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/index/bp_seqret.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,96 @@
+#!/usr/bin/perl -w
+# $Id: bp_seqret.PLS,v 1.3 2005/08/30 13:05:23 jason Exp $
+# -*-Perl-*- mode (for emacs)
+
+=head1 NAME
+
+bp_seqret - bioperl implementation of sequence fetch from local db (like EMBOSS seqret)
+
+=head1 USAGE
+
+bp_seqret [-f/--format outputformat] [-o/--out/--outfile outfile] [-d/--db dbname] [-i/--id/-s/--seqname seqname1]
+
+Example usage:
+
+   bp_seqret -f fasta -db db.fa -i seq1 -i seq2 > output.fas
+   bp_seqret db.fa:seq1 output.fas
+   bp_seqret db.fa:seq1 -o output.fas
+   bp_seqret -db db.fa -o output.fas seq1 seq2 seq3
+   bp_seqret -db db.fa seq1 seq2 seq3 output.fas
+   bp_seqret -db db.fa seq1 seq2 seq3 - > output.fas  
+
+The DB is expected to be a Fasta formatted sequence file with multiple
+sequences.
+
+Output format is Fasta by default.
+
+If no output filename is provided then output is written to STDOUT.
+Providing '-' as the output filename will accomplish the same thing.
+
+
+=head1 AUTHOR
+
+Jason Stajich jason_AT_bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::DB::Fasta;
+use Bio::SeqIO;
+use Getopt::Long;
+
+my $dbname;
+my @names;
+my $format = 'fasta';
+my $outfile;
+my ($start,$end);
+GetOptions(
+	   'f|format:s'   => \$format,
+	   'o|out|outfile:s' => \$outfile,
+	   's|sbegin|begin|start:s'  => \$start,
+	   'e|send|end|stop:s'       => \$end,
+	   'd|db|dbname:s'   => \$dbname,
+	   'i|id|seqname:s' => \@names);
+
+
+if( ! $dbname ) {
+    die "need a dbname\n" unless @ARGV;
+    $dbname = shift @ARGV;	
+    if( $dbname =~ s/^([^:]+):// ) {
+	push @names, $dbname;
+	$dbname = $1;
+    }				
+}
+
+my $db = Bio::DB::Fasta->new($dbname, -glob => "*.{fa,fas,fsa,fasta,pep,aa,seq,cds,peps}");
+if( ! $outfile ) {
+    $outfile = pop @ARGV;
+}
+my $out;
+if( $outfile ) {
+    $out = Bio::SeqIO->new(-format => $format,
+			   -file   => ">$outfile");
+} else {
+    $out = Bio::SeqIO->new(-format => $format);
+}
+for my $nm ( @names ) {   
+    my $seq;
+    if( $start || $end ) {
+	$seq = $db->seq($nm, $start => $end);
+    } else { 
+	$seq = $db->seq($nm);
+    }
+    if( $seq ) { 
+	my ($id,$desc) = split(/\s+/,$db->header($nm),2);
+	if( $start && $end ) { 
+	    $id = sprintf("%s_%d-%d",$id,$start || 0,$end || 0);
+	}
+	
+	$out->write_seq(Bio::PrimarySeq->new(-display_id => $id,
+					     -description => $desc,
+					     -seq => $seq));
+    } else {
+	warn("$nm not found\n");
+    }
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/popgen/composite_LD.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/popgen/composite_LD.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/popgen/composite_LD.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+#!/usr/bin/perl -w
+# -*-Perl-*- 
+# $Id: composite_LD.PLS,v 1.8 2003/11/13 00:32:09 jason Exp $
+
+=head1 NAME 
+
+composite_LD -i filename.prettybase.txt --sortbyld E<gt> outfile
+
+=head1 SYNOPSIS
+
+  composite_LD -i filename.prettybase [-o out.LD] [-f prettybase/csv] [--sortbyld] [--noconvertindels]
+
+=head2 DESCRIPTION
+
+This a script which allows an easy way to calculate composite LD.  
+
+=head2 OPTIONS
+
+-i or --in     filename
+
+-f or --format genotype format (prettybase or CSV)
+
+--sortbyld     To see data sorted by LD instead of just all the 
+               site1/site2 pair LD values.
+
+-o or --out    output filename, otherwise will print to STDOUT
+
+--noconvert    (applicable for prettybase format file only)
+               if specified will NOT attempt to convert indel
+               states to 'I' and delete states ('-') to 'D'.
+
+-h or --help   see this documentation
+
+=head2 AUTHOR Jason Stajich, Matthew Hahn
+
+For more information contact:
+
+Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
+Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
+
+
+=cut
+
+use strict;
+
+use Bio::PopGen::IO;
+use Bio::PopGen::Statistics;
+use Bio::PopGen::Population;
+
+use Getopt::Long;
+
+my ($file,$outfile,$sortbyld,$format,$noconvert,$verbose);
+$format = 'prettybase'; # default format is prettybase
+GetOptions(
+	   'i|in:s'       => \$file, # pass the filename as 
+	   'o|out:s'      => \$outfile,
+	   'f|format:s'   => \$format,
+	   'sortbyld'     => \$sortbyld,
+	   'noconvert'    => \$noconvert,
+	   'v|verbose'    => \$verbose,
+	   'h|help'       => sub { system('perldoc', $0);
+				   exit; }, 
+	   );
+
+if( ! $file ) { 
+    $file = shift @ARGV;  # if no -i specified
+}
+
+my $io = Bio::PopGen::IO->new(-format => $format,
+			      -verbose=> $verbose,
+			      -CONVERT_INDEL_STATES => ! $noconvert,
+			      -file   => $file);
+
+my $stats = Bio::PopGen::Statistics->new(-verbose => $verbose);
+my $pop = $io->next_population;
+
+my %LD_matrix = $stats->composite_LD($pop);
+
+# sites can be ordered by sorting their names
+
+my @sites;
+my $out;
+if( $outfile ) { 
+    open($out, ">$outfile") || die("$outfile: $!");
+} else { 
+    $out = \*STDOUT;
+}
+foreach my $site1 ( sort keys %LD_matrix ) {
+    foreach my $site2 ( sort keys %{ $LD_matrix{$site1} } ) {
+	my $LD = $LD_matrix{$site1}->{$site2}; # LD for site1,site2 
+	if( $sortbyld ) {
+	    push @sites, [ $site1,$site2,@$LD];
+	} else { 
+	    printf $out "%s,%s - LD=%.4f chisq=%.4f\n",$site1,$site2,@$LD;
+	}
+    }
+}
+
+if( $sortbyld ) {
+    foreach my $s ( sort { $b->[3] <=> $a->[3] } @sites ) {
+	my ($site1,$site2,$ld,$chisq) = @$s;
+	print $out "$site1,$site2 - LD=$ld, chisq=$chisq\n";
+    }
+}
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/popgen/heterogeneity_test.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/popgen/heterogeneity_test.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/popgen/heterogeneity_test.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+#!/usr/bin/perl -w
+# -*-Perl-*- (for my emacs)
+# $Id: heterogeneity_test.PLS,v 1.2 2003/08/13 14:46:12 jason Exp $
+
+use strict;
+
+=head1 NAME 
+
+heterogeneity_test - a test for distinguishing between selection and population expansion.
+
+=head1 SYNOPSIS
+
+heterogenetity_test -mut_1/--mutsyn synonymous_mut_count -mut_2/--mutnon nonsyn_mut_count -s/--smaplesize sample_size [-i/--iterations iterations] [-o/--observed observed_D] [-v/--verbose] [--silent ] [-m/--method tajimaD or fuD] [--precision]
+
+=head2 DESCRIPTION
+
+This is an implementation of the Heterogenetity test as described in
+Hahn MW, Rausher MD, and Cunningham CW. 2002. Genetics 161(1):11-20. 
+
+=head2 OPTIONS
+
+ Options in brackets above are optional
+
+ -s or --samplesize samplesize 
+ -mut_1 or --mutsyn synonymous mutation count 
+ -mut_2 or --mutnon nonsynonmous mutation count 
+ -i or --iterations number of iterations 
+ -o or --observed   observed D 
+ -m or --method     tajimaD or fuD  for Tajima's D or Fu and Li's D
+ -v or --verbose    print out extra verbose messages
+ --silent           Be extra quiet
+ --precision        Level of precision - specify the number of digits 
+                   (default 4)
+
+=head2 AUTHOR Matthew Hahn E<lt>matthew.hahn-at-duke.eduE<gt>
+
+For more information contact:
+
+Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
+Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
+
+=cut
+
+use Getopt::Long;
+use Bio::PopGen::Simulation::Coalescent;
+use Bio::PopGen::Statistics;
+use Bio::PopGen::Individual;
+use Bio::PopGen::Genotype;
+
+my $sample_size = 4;
+my $mut_count_1 = 10; # synonymous
+my $mut_count_2 = 20; # non-synonymous
+my $iterations = 1;
+my $verbose = 0;
+my $observedD = undef;
+my $method = 'fuD';
+my $help = 0;
+my $precision = '4'; # Let's make the random precision between
+                     # 0->1 to 1000th digits
+
+GetOptions( 
+	    's|samplesize|samp_size:i' => \$sample_size,
+	    'mut_1|mutsyn:i'           => \$mut_count_1,
+	    'mut_2|mutnon:i'           => \$mut_count_2, 
+	    'i|iterations:i'           => \$iterations,
+	    'o|obsered|observedD:f'    => \$observedD, 
+	    'v|verbose'                => \$verbose,
+	    'm|method:s'               => \$method,
+	    'h|help'                   => \$help,
+	    'silent'                   => sub { $verbose = -1; },
+	    'p|precision:i'            => \$precision,
+	    );
+
+if( $help ) {
+    system("perldoc",$0);
+    exit(0);
+}
+
+if( $method ne 'fuD' and $method ne 'tajimaD' ) {
+    die("available methods are [fu and li's D] (fuD) and Tajima's D (tajimaD)");
+}
+my @D_distribution;  
+printf("sample size is %d iteration count = %d\n", $sample_size, 
+       $iterations);
+
+my $sim = new Bio::PopGen::Simulation::Coalescent
+    (-sample_size => $sample_size);
+
+for(my $iter = 0; $iter < $iterations; $iter++ ) {
+    my $tree = $sim->next_tree($sample_size);
+    my $f1 = 0;
+    if( $mut_count_1 > 0 ) {
+	$sim->add_Mutations($tree,$mut_count_1,$precision);
+
+	my @leaves = $tree->get_leaf_nodes;
+	# the outgroup is just an individual with the ancestral state 
+	# (no mutations)
+	my $outgroup = new Bio::PopGen::Individual();
+	foreach my $m ( $leaves[0]->get_marker_names ) {
+	    $outgroup->add_Genotype(Bio::PopGen::Genotype->new
+				    (-marker_name=> $m,
+				     -alleles    => [ 0 ]));
+	}
+	if( $method eq 'fuD' ) {
+	    $f1 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
+	} elsif( $method eq 'tajimaD' ) {
+	    $f1 = Bio::PopGen::Statistics->tajima_D(\@leaves);
+	}
+	print "(mutation count = $mut_count_1) D=$f1\n" 
+	    if( $verbose >= 0);
+    }
+    
+    my $f2 = 0;
+    if( $mut_count_2 > 0 ) {
+	$sim->add_Mutations($tree,$mut_count_2,$precision);
+	my @leaves = $tree->get_leaf_nodes;
+        # the outgroup is just an individual with the ancestral state 
+	# (no mutations)
+	my $outgroup = new Bio::PopGen::Individual();
+	foreach my $m ( $leaves[0]->get_marker_names ) {
+	    $outgroup->add_Genotype(Bio::PopGen::Genotype->new
+				    (-marker_name=> $m,
+				     -alleles    => [ 0 ]));
+	}
+	if( $method eq 'fuD' ) {
+	    $f2 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
+	} elsif( $method eq 'tajimaD' ) {
+	    $f2 = Bio::PopGen::Statistics->tajima_D(\@leaves);
+	}
+	print "(mutation count = $mut_count_2) D=$f2\n" if( $verbose >= 0);
+
+    }
+    my $deltaD = ( $f1 - $f2 );
+    push @D_distribution, $deltaD;
+    if( $iter % 10 == 0 && $iter > 0 ) { 
+	print STDERR "iter = $iter\n"; 
+    }
+}
+
+if( defined $observedD && $iterations > 1 ) { 
+    my @sortedD = sort { $a <=> $b } @D_distribution;
+    my $i;
+    for($i = 0; $i < scalar @sortedD; $i++ ) {
+	if( $sortedD[$i] > $observedD ) { 
+	    last;
+	}
+    }
+    
+    printf( "index %d value=%.4f out of %d total (obs=%.4f)\n", 
+	    $i, $sortedD[$i], scalar @sortedD, $observedD);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+These are scripts that run on top of Bio::SearchIO or generally fit
+into the Database search result parsingframework.
+
+fastam9_to_table.PLS -- script which doesn't use Bio::SearchIO but provides fast parsing of FASTA -m9 output to an NCBI m9-like table.
+hmmer_to_table.PLS -- script which doesn't use Bio::SearchIO but provides fast parsing of HMMER output into a tabular format.
+
+search2table.PLS -- Turn any Bio::SearchIO parseable report into NCBI m9-like tablular output
+
+filter_search.PLS - produce tabular output, filtering first. 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+These are scripts related to parsing Search Result output like HMMER, FASTA, BLAST.
+Not all of these scripts use Bioperl objects.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/fastam9_to_table.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/fastam9_to_table.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/fastam9_to_table.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,139 @@
+#!/usr/bin/perl -w
+
+=head1 NAME 
+
+fastm9_to_table  - turn FASTA -m 9 output into NCBI -m 9 tabular output
+
+=head1 SYNOPSIS
+
+ fastm9_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ... 
+
+=head1 DESCRIPTION
+
+Comand line options:
+  -e/--evalue   evalue    -- filter by evalue
+  -b/--bitscore bitscore  -- filter by bitscore
+  --header                -- boolean flag to print column header 
+  -o/--out                -- optional outputfile to write data, 
+                             otherwise will write to STDOUT
+  -h/--help               -- show this documentation
+
+Not technically a SearchIO script as this doesn't use any Bioperl
+components but is a useful and fast.  The output is tabular output
+with the standard NCBI -m9 columns.
+
+ queryname
+ hit name
+ percent identity
+ alignment length
+ number mismatches 
+ number gaps
+ query start  (if on rev-strand start > end)
+ query end 
+ hit start (if on rev-strand start > end)
+ hit end 
+ evalue
+ bit score
+
+Additionally 3 more columns are provided
+ fasta score
+ sw-score
+ percent similar
+ query length
+ hit length
+ query gaps
+ hit gaps
+
+=head1 AUTHOR - Jason Stajich
+
+Jason Stajich jason_at_bioperl-dot-org
+
+=cut
+
+use strict;
+use Getopt::Long;
+my $hitsection = 0;
+my %data;
+
+my ($evalue,$bitscore,$header,$outfile) = ( 10, 0);
+GetOptions(
+	   'b|bitscore|bits:f'   => \$bitscore,
+	   'e|evalue:f'          => \$evalue,
+	   'header'              => \$header,
+	   'o|out|outfile:s'     => \$outfile,
+	   'h|help'              => sub { exec('perldoc',$0); exit; }
+	   );
+
+my $outfh;
+if( $outfile ) { 
+    open($outfh, ">$outfile") || die("$outfile: $!");
+} else { 
+    $outfh = \*STDOUT; 
+}
+	   
+# query start -- an0
+# query en    -- ax0
+# hit start   -- an1
+# hit end     -- ax1
+
+my @fields = qw(qname hname percid alen mmcount gapcount 
+		qstart qend hstart hend evalue score bits fs sw-score 
+		percsim qlen hlen qgap hgap);
+print $outfh "#",uc(join("", map{ sprintf("%-10s",$_) } @fields)), "\n" if $header;
+
+while(<>) {
+    my $linestr = $_;
+    if( /^\s*\d+>>>(\S+).+/ ) {	
+	$data{'qname'} = $1;
+	if( /\-\s+(\d+)\s+(aa|nt)\s+$/ ){
+	    $data{'qlen'} = $1;
+	}
+    } elsif( $hitsection && /^>>>\Q$data{'qname'}/ ) {	
+	$hitsection = 0;
+    } elsif( /^The best scores are:/ ) {	       
+	$hitsection = 1;
+    } elsif( /^\s+$/ ) {
+    } elsif( $hitsection ) {
+	if( s/^(\S+)\s+(.+)\(\s*(\d+)\)\s+// ) {
+	    my ($hit, $desc,$hitlen) = ($1,$2,$3);
+	    my ($dir) = ( s/^\[(r|f)\]\s+// );
+	    my @line = split(/\s+/,$_);
+	    $data{'hname'}    = $hit;
+	    $data{'hlen'}     = $hitlen;
+	    $data{'score'}    = shift @line;
+	    $data{'bits'}     = shift @line;
+	    $data{'evalue'}   = shift @line;
+	    $data{'percid'}   = shift @line;
+	    
+	    $data{'percsim'}  = shift @line;
+	    $data{'sw-score'} = shift @line;
+	    $data{'alen'}     = shift @line;
+	    $data{'qstart'}   = shift @line;
+	    $data{'qend'}     = shift @line;
+	    $data{'pn0'}      = shift @line; # pn0
+	    $data{'px0'}      = shift @line; # px0
+	    $data{'hstart'}   = shift @line; # an1 
+	    $data{'hend'}     = shift @line; # ax1
+	    $data{'pn1'}      = shift @line; # pn1
+	    $data{'px1'}      = shift @line; # px1
+	    # query + hit gaps
+	    $data{'qgap'}     = shift @line;
+	    $data{'hgap'}     = shift @line;
+	    $data{'gapcount'} = $data{'qgap'} + $data{'hgap'};
+	    $data{'fs'}       = shift @line;
+	    
+	    $data{'mmcount'} = $data{'alen'} - ( int($data{'percid'} * $data{'alen'}) + $data{'gapcount'});
+	    #next if( $data{'evalue'} > $evalue || 
+	#	     $data{'bits'} < $bitscore );
+	    
+	    for ( $data{'percid'}, $data{'percsim'} ) {
+		$_ = sprintf("%.2f",$_*100);
+	    }
+	    print $outfh join( "\t",map { $data{$_} } @fields),"\n";
+	} else { 
+	    # print STDERR "unrecognized line \n$linestr";
+	}
+    } else { 
+	# warn("skipping a line like this: $_");
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/filter_search.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/filter_search.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/filter_search.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,79 @@
+#!perl -w
+use strict;
+
+# $Id: filter_search.PLS,v 1.3 2006/07/04 22:23:29 mauricio Exp $
+
+
+=head1 NAME
+
+filter_search - filters searchio results, outputting a tab delimited summary
+
+=head1 SYNOPSIS
+
+  #filter_search -format blast -score 200 < search.bl > search.tab
+
+=head1 DESCRIPTION 
+
+This script filters searchio results allowing a number of different
+filters to be applied before outputting to stdout in a tab delimited
+format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+  Ewan Birney <birney at ebi.ac.uk>
+
+=cut
+
+use Bio::SearchIO;
+use Getopt::Long;
+
+my ($format,$score);
+
+$format  = 'blast';
+$score   = 150;
+
+GetOptions(
+	   'format:s'  => \$format,
+	   'score:s'  => \$score,
+	);
+
+
+my $searchin = Bio::SearchIO->new( -format => $format);
+
+
+while( (my $result = $searchin->next_result()) ) { 
+  while( (my $hit = $result->next_hit())) {
+
+      if( $score ) {
+        if( $hit->raw_score < $score ) {
+           next;
+        }
+       }
+
+
+      foreach my $hsp ( $hit->hsps() ) {
+         print $result->query_name,"\t",$hit->score,"\t",$hsp->start,"\t",$hsp->end,"\t",$hsp->strand,"\t",$hsp->hseq_id,"\t",$hsp->hstart,"\t",$hsp->hend,"\t",$hsp->strand,"\n";
+      }
+    }
+}
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/hmmer_to_table.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/hmmer_to_table.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/hmmer_to_table.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,97 @@
+#!perl -w
+use strict;
+
+=head1 NAME 
+
+hmmer_to_table  - turn HMMER output into tabular format
+
+=head1 SYNOPSIS
+
+ hmmer_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ... 
+
+=head1 DESCRIPTION
+
+Comand line options:
+  -e/--evalue   evalue    -- filter by evalue
+  -b/--bitscore bitscore  -- filter by bitscore
+  --header                -- boolean flag to print column header 
+  -o/--out                -- optional outputfile to write data, 
+                             otherwise will write to STDOUT
+  -h/--help               -- show this documentation
+
+Not technically a SearchIO script as this doesn't use any Bioperl
+components but is a useful and fast.  The output is tabular output.
+
+  query sequence/domain (these are flip-flopped for hmmsearch / hmmpfam)  
+  query start
+  query end
+  domain/sequence name or PFAM accession 
+  hit start
+  hit end
+  score
+  e-value
+  domain/sequence name (these are flip-flopped for hmmsearch / hmmpfam)  
+
+=head1 AUTHOR - Jason Stajich
+
+Jason Stajich jason_at_bioperl-dot-org
+
+=cut
+
+use Getopt::Long;
+
+my ($evalue,$bitscore,$header,$outfile);
+GetOptions(
+	   'b|bitscore|bits:f'   => \$bitscore,
+	   'e|evalue:f'          => \$evalue,
+	   'header'              => \$header,
+	   'o|out|outfile:s'     => \$outfile,
+	   'h|help'              => sub { exec('perldoc',$0); exit; }
+	   );
+
+my $outfh;
+if( $outfile ) { 
+    open($outfh, ">$outfile") || die("$outfile: $!");
+} else { 
+    $outfh = \*STDOUT; 
+}
+
+my @fields = qw(QNAME QSTART QEND HACCESSION HSTART HEND SCORE EVALUE HNAME);
+if( $header ) {
+    print $outfh join("\t", @fields), "\n";
+}
+my %dat;
+while(<>) {
+    if( s/^Query(\s+(sequence|HMM))?:\s+// ) {	
+	s/\s+$//;
+	$dat{'Query'} = $_;
+    } elsif( /^Parsed for domains:/ ) {
+	my $ready = 0;
+	while(<>) {
+	    if(/^Model|Sequence\s+Domain/ ) { $ready = 1; }
+	    elsif( $ready && /^\-\-/) { $ready = 2; }
+	    elsif( /^Alignments of/ ) { undef %dat; last; }
+	    elsif( $ready == 2 ) {
+		if( my ($n,$domainnum,$domainct, @vals) = 
+		    (m!^(\S+)\s+      # domain name
+		     (\d+)\/(\d+)\s+  # num/num (ie 1 of 2) 
+		     (\d+)\s+(\d+).+? # sequence start and end
+		     (\d+)\s+(\d+)\s+ # hmm start and end
+		     \S+\s+           # []
+		     (\S+)\s+         # score
+		     (\S+)            # evalue
+		     \s*$!ox)  ) {
+		    next if( defined $bitscore && $vals[4] < $bitscore );
+		    next if (defined $evalue && $vals[5] > $evalue);
+		    print $outfh join("\t",
+				      $dat{'Query'},
+				      $vals[0], $vals[1],
+				      $n,
+				      $vals[2],$vals[3],
+				      $vals[4],$vals[5],
+				      $n),"\n";
+		}
+	    }
+	}
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/parse_hmmsearch.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/parse_hmmsearch.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/parse_hmmsearch.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,206 @@
+#!/usr/bin/perl -w
+# $Id: parse_hmmsearch.PLS,v 1.3 2006/07/04 22:23:29 mauricio Exp $
+
+use strict;
+
+=head1 NAME
+
+parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with
+                  different output options
+
+=head1 SYNOPSIS
+
+parse_hmmsearch [--po] [--ps] -s hmmsearch_file
+
+parse_hmmsearch [--po] [--ps] -m index_file
+
+=head1 DESCRIPTION
+
+=head2 Mandatory Options:
+
+  -s  HMMSEARCH file to parse.
+  -m  INDEX file that contains a list of HMMSEARCH files for multiple
+      parsing.
+
+=head2 Special Options:
+
+  --po    Print only the hits that have positive scores.
+  --ps    Print the total of positive scores found.
+  --help  Show this documentation.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the
+Bioperl mailing list. Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+  Mauricio Herrera Cuadra <arareko-at-campus.iztacala.unam.mx>
+
+=cut
+
+# Modules, pragmas and variables to use
+use Bio::SearchIO;
+use Getopt::Long;
+use vars qw($opt_s $opt_m $opt_po $opt_ps $opt_help);
+
+# Gets options from the command line
+GetOptions qw(-s:s -m:s --po --ps --help);
+
+# Print documentation if help switch was given
+exec('perldoc', $0) and exit() if $opt_help;
+
+# If no mandatory options are given prints an error and exits
+if (!$opt_s && !$opt_m) {
+    print "ERROR: No HMMSEARCH or INDEX file has been specified.\n       Use
+'--help' switch for documentation.\n" and exit();
+} elsif ($opt_s && $opt_m) {
+    print "ERROR: You must select only one option (-s or -m) for input.\n      
+Use '--help' switch for documentation.\n" and exit();
+}
+
+# Initializes a counter for the domain positive scores if the option
+# was given
+my $pos_scores = 0 if $opt_ps;
+
+# If single file mode was selected
+if ($opt_s) {
+    parse_hmmsearch($opt_s);
+
+    # Prints the total domain positive scores if the option was given
+    if ($opt_ps) {
+        print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+- - - -\n";
+        print "Total domain positive scores: $pos_scores\n";
+    }
+
+# If multiple files mode was selected
+} elsif ($opt_m) {
+
+    # Opens the INDEX file sent as input
+    open(FH, "<", $opt_m) or die("Unable to open INDEX file: $opt_m ($!)");
+
+    # Cycle that extracts one line for every loop until finding the
+    # end of file
+    while (my $line = <FH>) {
+
+        # Deletes the new line characters from the line
+        chomp $line;
+
+        # Parses the result file in turn
+        parse_hmmsearch($line);
+        print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+= = = =\n";
+    }
+
+    # Prints the total domain positive scores if the option was given
+    print "Total domain positive scores: $pos_scores\n" if $opt_ps;
+
+    # Closes INDEX files
+    close(FH);
+}
+
+# Exits the program
+exit();
+
+# Subroutine that parses a HMMSEARCH results file
+sub parse_hmmsearch {
+
+    # Gets the parameters sent to the function
+    my ($file) = @_;
+
+    # Creates a new Bio::SearchIO object
+    my $in = new Bio::SearchIO(
+        -format => 'hmmer',
+        -file   => $file,
+    );
+
+    # Loops through the results file
+    while (my $result = $in->next_result()) {
+
+        # Prints program name and version (these are values from
+        # Bio::Search::Result::GenericResult methods)
+        print $result->algorithm(), " ", $result->algorithm_version(), "\n";
+        print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+- - - -\n";
+
+        # Prints HMM file and sequence database (these are values from
+        # Bio::Search::Result::HMMERResult methods)
+        print "HMM file:\t\t\t", $result->hmm_name(), "\n";
+        print "Sequence database:\t\t", $result->sequence_file(), "\n";
+        print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+-\n";
+
+        # Prints some values from Bio::Search::Result::GenericResult
+        # methods
+        print "Query HMM:\t\t\t", $result->query_name(), "\n";
+        print "Accession:\t\t\t", $result->query_accession(), "\n";
+        print "Description:\t\t\t", $result->query_description(), "\n";
+        print "Total hits:\t\t\t", $result->num_hits(), "\n";
+
+        # Loops through the sequence in turn
+        while (my $hit = $result->next_hit()) {
+
+            # If only positive scores option was given and the score
+            # in turn is greater than zero
+            if ($opt_po) {
+                printHits($hit) if ($hit->score() >= 0);
+
+            # Prints all hits otherwise
+            } else {
+                printHits($hit);
+            }
+        }
+    }
+}
+
+# Subroutine that prints the values from a Bio::Search::Hit::HitI
+# object
+sub printHits {
+
+    # Gets the parameters sent to the function
+    my ($hit) = @_;
+
+    # Prints some values from Bio::Search::Hit::HitI methods
+    print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
+    print "Hit ", $hit->rank(), "\n";
+    print "Sequence:\t\t\t", $hit->name(), "\n";
+    print "Description:\t\t\t", $hit->description(), "\n";
+    print "Score:\t\t\t\t", $hit->score(), "\n";
+    print "E-value:\t\t\t", $hit->significance(), "\n";
+    print "Number of domains:\t\t", $hit->num_hsps(), "\n";
+
+    # Loops through the domain in turn
+    while (my $hsp = $hit->next_hsp()) {
+
+        # Prints some values from Bio::Search::HSP::HSPI methods
+        print "   - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
+        print "   Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n";
+        print "   seq-f:\t\t\t", $hsp->start('hit'), "\n";
+        print "   seq-t:\t\t\t", $hsp->end('hit'), "\n";
+        print "   hmm-f:\t\t\t", $hsp->start(), "\n";
+        print "   hmm-t:\t\t\t", $hsp->end(), "\n";
+        print "   score:\t\t\t", $hsp->score(), "\n";
+        $pos_scores++ if ($hsp->score() >= 0) && $opt_ps;
+        print "   E-value:\t\t\t", $hsp->evalue(), "\n";
+        my $hmm_string = $hsp->query_string();
+        $hmm_string =~ s/<-\*$//;
+        print "   hmm string:\t\t\t", $hmm_string, "\n";
+        print "   homology string:\t\t", $hsp->homology_string(), "\n";
+        print "   hit string:\t\t\t", $hsp->hit_string(), "\n";
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/searchio/search2table.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/searchio/search2table.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/searchio/search2table.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+search2table - turn SearchIO parseable reports into tab delimited format like NCBI's -m 9 
+
+=head1 SYNOPSIS
+
+  search2table -f fasta -i file.FASTA -o output.table
+
+=head1 DESCRIPTION 
+
+Turn SearchIO reports into a tabular format like NCBI's -m 9 output.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+  Jason Stajich jason_at_bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::SearchIO;
+use Getopt::Long;
+
+my ($format, $file,$output) = ('blast');
+
+GetOptions(
+	   'f|format:s'   => \$format,
+	   'i|input:s'    => \$file,
+	   'o|output:s'   => \$output);
+
+if( @ARGV ) { 
+    $file = shift;
+}
+    
+my $in = Bio::SearchIO->new(-format => $format,
+			    -file   => $file);
+my $out;
+if( $output ) { 
+    open($out,">$output") || die "cannot open $output for writing";
+} else { 
+    $out = \*STDOUT;
+}
+
+while( my $r = $in->next_result ) {
+    while( my $hit = $r->next_hit ) {
+	while( my $hsp = $hit->next_hsp ) {
+	    my $mismatchcount = $hsp->length('total') - 
+		($hsp->num_conserved + $hsp->gaps('total'));
+	    print $out join("\t", ( $r->query_name,
+				    $hit->name,
+				    sprintf("%.2f",$hsp->percent_identity),
+				    $hsp->length('total'),
+				    $mismatchcount,
+				    $hsp->gaps('total'),
+				    # flip start/end on rev strand
+				    $hsp->query->strand < 0 ?
+				    ( $hsp->query->end,
+				      $hsp->query->start ) :
+				    ( $hsp->query->start,
+				      $hsp->query->end ),
+				    $hsp->hit->strand < 0 ?
+				    ( $hsp->hit->end,
+				      $hsp->hit->start ) :
+				    ( $hsp->hit->start,
+				      $hsp->hit->end ),
+
+				    $hsp->evalue,
+				    # chance this to $hsp->sw_score 
+				    # if you would rather have that
+				    # it will only work for FASTA parsing though!
+				    $hsp->bits)),"\n";
+	}
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+These are scripts to interconvert sequence formats and to perform
+other common sequence manipulations.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/extract_feature_seq.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/extract_feature_seq.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/extract_feature_seq.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,84 @@
+#!perl -w
+use strict;
+# $Id: extract_feature_seq.PLS,v 1.5 2006/07/04 22:23:29 mauricio Exp $
+# Author Jason Stajich <jason at bioperl.org>
+
+=head1 NAME
+
+extract_feature_seq - extract the corresponding sequence for a specified feature type
+
+=head1 SYNOPSIS
+
+extract_feature_seq.PLS -i file --format genbank --feature=CDS -o output.fa
+
+=head1 DESCRIPTION 
+
+This script will extract the sequence for all the features you specify.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
+
+=cut
+
+use Bio::SeqIO;
+use Getopt::Long;
+
+my ($input,$format,$featuretype,$output);
+$featuretype ='CDS';
+GetOptions(
+	   'i|input:s' => \$input,
+	   'format:s'  => \$format,
+	   'feature:s' => \$featuretype,
+	   'o|output:s'=> \$output);
+
+$input || shift if @ARGV;
+
+my $in = new Bio::SeqIO(-file => $input,
+			-format => $format);
+my $out;
+if ($output ) {
+    $out = new Bio::SeqIO(-file => ">$output");
+} else { 
+    $out = new Bio::SeqIO(); # use STDOUT for output
+}
+
+my $count = 1;
+while( my $seq = $in->next_seq ) {    
+    foreach my $f ( grep { $_->primary_tag =~ /$featuretype/i } 
+		    $seq->get_SeqFeatures ) {
+	my $s = $f->spliced_seq;
+	if( $featuretype =~ /gene|CDS/ ) {
+	    $s->display_id($f->has_tag('gene') ? join(',',sort $f->each_tag_value('gene')) :
+			   $f->has_tag('label') ? join(',',$f->each_tag_value('label')): 
+			   $s->display_id);
+	} else {
+	    $s->display_id(sprintf("%s_%s_%d",
+				   $seq->display_id, 
+				   $f->primary_tag,
+				   $count++));
+	}
+	$out->write_seq($s);
+    }
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/make_mrna_protein.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/make_mrna_protein.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/make_mrna_protein.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+#!perl -w
+# $Id: make_mrna_protein.PLS,v 1.3 2005/09/30 18:32:29 bosborne Exp $
+#
+=head1 NAME
+
+make_mrna_protein - Convert an input mRNA/cDNA sequence into protein
+
+=head1 DESCRIPTION
+
+Convert an input mRNA/cDNA sequence into protein using translate()
+
+  -f/--frame           Specifies frame [0,1,2]
+
+One can also specify:
+
+  -t/--terminator      Stop Codon character (defaults to '*')
+  -u/--unknown         Unknown Protein character (defaults to 'X')
+  -cds/--fullcds       Expected Full CDS (with start and Stop codon)
+  -throwOnError        Throw error if no Full CDS (defaults to 0)
+  -if/--format         Input format (defaults to FASTA/Pearson)
+  -of/--format         Output format (defaults to FASTA/Pearson)
+  -o/--output          Output Filename (defaults to STDOUT)
+  -i/--input           Input Filename (defaults to STDIN)
+  -ct/--codontable     Codon table to use (defaults to '1')
+
+See L<Bio::PrimarySeq> for more information on codon tables
+and the translate() method
+
+=head1 AUTHOR - Jason Stajich
+
+  Email jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::SeqIO;
+use Getopt::Long;
+
+use vars qw($USAGE);
+
+BEGIN {
+    $USAGE =
+qq{make_mrna_protein.pl < file.fa > file.prots
+-f/--frame            Translation Frame (0,1,2) are valid (defaults to '0')
+-t/--terminator	    Stop Codon Character ('*' by default)
+-u/--unknown          Unknown Protein character (defaults to 'X')
+-ct/--codontable      Codon table to use (defaults to '1')
+                      (see Bio::PrimarySeq for more information)
+-cds/--fullcds        Expected Full CDS (with start and Stop codon)
+-throwOnError         Throw an error if no Full CDS (defaults to 0)
+-if/--iformat         Input format (defaults to FASTA/Pearson)
+-of/--oformat         Output format (defaults to FASTA/Pearson)
+-o/--output           Output Filename (defaults to STDOUT)
+-i/--input            Input Filename (defaults to STDIN)
+};
+
+}
+my ($iformat, $oformat, $frame, $termchar, $unknownProt, $codontable, $fullCDS,
+    $throw_on_Incomp_CDS, $help) = ('fasta','fasta', 0, undef, undef, 1, 0, 0);
+my ($input,$output);
+
+GetOptions('f|frame:s'       => \$frame,
+			  't|terminator:s'  => \$termchar,
+			  'u|unknown:s'     => \$unknownProt,
+			  'ct|codontable:s' => \$codontable,
+			  'cds|fullcds'     => \$fullCDS,
+			  'throwOnError'    => \$throw_on_Incomp_CDS,
+			  'h|help'          => \$help,
+			  'i|input:s'       => \$input,
+			  'if|iformat:s'    => \$iformat,
+			  'of|oformat:s'    => \$oformat,
+			  'o|output:s'      => \$output,
+			 );
+
+die $USAGE if( $help );
+
+my ($in,$out);
+if( $input ) {
+	$in = new Bio::SeqIO('-format' => $iformat, '-file' => $input);
+} else {
+	$in = new Bio::SeqIO('-format' => $iformat, '-fh' => \*STDIN);
+}
+
+if( $output ) { 
+	$out = new Bio::SeqIO('-format' => $oformat, '-file' => ">$output" );
+} else {
+	$out = new Bio::SeqIO('-format' => $oformat );
+}
+
+while( my $seq = $in->next_seq ) {
+    my $protseq = $seq->translate(-terminator => $termchar,
+											 -unknown => $unknownProt,
+											 -frame => $frame,
+											 -codontable_id => $codontable,
+											 -complete => $fullCDS,
+											 -throw => $throw_on_Incomp_CDS );
+    $out->write_seq($protseq);
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/seq/make_mrna_protein.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/seqconvert.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/seqconvert.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/seqconvert.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+#!perl -w
+# $Id: seqconvert.PLS,v 1.8 2006/07/04 22:23:29 mauricio Exp $
+
+use strict;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $help;
+my $from=undef;
+my $to=undef;
+
+### please add to this list (see the modules under Bio/SeqIO):
+my @known_formats=
+  qw(gcg fasta ace raw fastq phd pir scf swiss genbank locuslink
+     embl game qual bsml tab raw abi chado alf ctf exp ztr pln
+     chaosxml chadoxml yaml tigr tigrxml agave chaos kegg interpro
+     lasergene strider);
+
+my $script=substr($0, 1+rindex($0,'/'));
+my $usage="Usage:
+
+  $script --from in-format --to out-format < file.in-format > file.out-format
+
+Known formats:\n  " . join(' ', @known_formats) . "\n\n";
+
+die $usage unless
+  &GetOptions( 'from:s'   => \$from,
+               'to:s'     => \$to,
+               'h|help'   => \$help
+	     )
+  && !$help &&  $from && $to
+  && grep($from eq $_, @known_formats)
+  && grep($to eq $_, @known_formats);
+
+my $in  = Bio::SeqIO->newFh(-fh => \*STDIN , '-format' => $from);
+my $out = Bio::SeqIO->newFh(-fh=> \*STDOUT, '-format' => $to);
+
+print $out $_ while <$in>;
+
+
+__END__
+
+=head1 NAME
+
+seqconvert - generic BioPerl sequence format converter
+
+=head1 SYNOPSIS
+
+  seqconvert --from in-format --to out-format < file.in-format > file.out-format
+  # or
+  seqconvert -f in-format -t out-format < file.in-format > file.out-format
+
+=head1 DESCRIPTION
+
+This script gives command line interface to BioPerl Bio::SeqIO. 
+
+=head1 SEE ALSO
+
+L<Bio::SeqIO>
+L<bp_sreformat.PLS> for similar functionality which also supports AlignIO.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Philip Lijnzaad
+
+Email E<lt>p.lijnzaad-at-med.uu.nlE<gt>
+
+=cut
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/split_seq.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/split_seq.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/split_seq.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,141 @@
+#!perl -w
+use strict;
+# $Id: split_seq.PLS,v 1.7 2006/07/04 22:23:29 mauricio Exp $
+
+=head1 NAME
+
+split_seq - splits a sequence into equal sized chunks with an optional
+            overlapping range
+
+=head1 SYNOPSIS
+
+split_seq -c 10000 [-o 1000] [-i] -f seq.in
+
+=head1 DESCRIPTION 
+
+The script will split sequences into chunks
+
+Mandatory Options:
+
+  -c  Desired length of the resulting sequences.
+  -f  Input file (must be FASTA format).
+
+Special Options:
+
+  -o  Overlapping range between the resulting sequences.
+  -i  Create an index file with the resulting sequence files. This is
+      useful if you want to pass this list as input arguments into
+      another programs (i.e. CLUSTAL, HMMER, etc.).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHORS
+
+  Ewan Birney E<lt>birney-at-ebi.ac.ukE<gt>
+  Mauricio Herrera Cuadra E<lt>arareko-at-campus.iztacala.unam.mxE<gt>
+  (some enhancements)
+
+=cut
+
+# Modules, pragmas and variables to use
+use Bio::Seq;
+use Bio::SeqIO;
+use Getopt::Long;
+use vars qw($opt_c $opt_o $opt_i $opt_f $index_file);
+
+# Gets options from the command line
+GetOptions qw(-c=i -o:i -i -f=s);
+
+# If no mandatory options are given prints an error and exits
+if (!$opt_c) {
+    print "ERROR: No chunk size has been specified.\n" and exit();
+} elsif (!$opt_f) {
+    print "ERROR: No FASTA file has been specified.\n" and exit();
+}
+
+# Declares offset size
+my $offset = $opt_o ? $opt_o : "0";
+
+# Opens the FASTA file
+my $in = Bio::SeqIO->new(
+    -file   => "$opt_f",
+    -format => "Fasta",
+);
+print "==> Opening FASTA file:\t\t\t\t$opt_f\n";
+
+# Reads the next sequence object
+while (my $seq = $in->next_seq()) {
+
+    # Reads the ID for the sequence and prints it
+    my $id = $seq->id();
+    print "--> The ID for this sequence is:\t\t$id\n";
+
+    # Reads the description for the sequence and prints it
+    my $desc = $seq->desc();
+    print "--> The description for this sequence is:\t$desc\n";
+
+    # Gets sequence length and prints it
+    my $seq_length = $seq->length();
+    print "--> The length of this sequence is:\t\t$seq_length\n";
+
+    # If the chunk size is bigger than the sequence length prints the error and exits
+    (print "ERROR: Specified chunk size is bigger than sequence length.\n" and exit()) if ($opt_c > $seq_length);
+
+    # Creates a directory for writing the resulting files
+    mkdir("split", 0755) unless -e "split" and -d "split";
+
+    # Creates the INDEX file if the option was given
+    if ($opt_i) {
+        $index_file = "$id.c$opt_c.o$offset.INDEX";
+        open(FH, ">", $index_file) or die("Unable to create file: $index_file ($!)");
+    }
+
+    # Loops through the sequence
+    for (my $i = 1; $i < $seq_length; $i += $opt_c) {
+        my $end = (($i + $opt_c) > $seq_length) ? ($seq_length + 1) : ($i + $opt_c);
+        my $seq_range = (($i + $opt_c) > $seq_length) ? "$i-".($end - 1) : "$i-$end";
+        my $id = $seq->id();
+        $id .= "_$seq_range";
+
+        # Stores chunk into its corresponding FASTA file
+        my $out = Bio::SeqIO->new(
+            -file   => ">split/$id.faa",
+            -format => "Fasta",
+        );
+        my $trunc_seq = $seq->trunc($i, $end - 1);
+        $trunc_seq->id($id);
+        $out->write_seq($trunc_seq);
+        print "==> Sequence chunk:\t$seq_range\tstored in file:\tsplit/$id.faa\n";
+
+        # Prints the current file name into the INDEX file if the option was given
+        print FH "split/$id.faa\n" if $opt_i;
+
+        # Decreases the $i value with the offset value
+        $i -= $offset;
+    }
+
+    # Closes the INDEX file if the option was given
+    if ($opt_i) {
+        print "==> INDEX stored in file:\t\t\t$index_file\n";
+        close(FH);
+    }
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/translate_seq.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/translate_seq.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/translate_seq.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,63 @@
+#!perl -w
+use strict;
+# $Id: translate_seq.PLS,v 1.7 2006/07/04 22:23:29 mauricio Exp $
+
+=head1 NAME
+
+translate_seq - translates a sequence
+
+=head1 SYNOPSIS
+
+translate_seq E<lt> cdna_cds.fa E<gt> protein.fa
+
+=head1 DESCRIPTION 
+
+The script will translate one fasta file (on stdin) to protein on stdout
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+  Ewan Birney E<lt>birney at ebi.ac.ukE<gt>
+
+=cut
+
+use Bio::SeqIO;
+use Getopt::Long;
+
+my ($format) = 'fasta';
+
+GetOptions(
+	   'format:s'  => \$format,
+	   );
+
+my $oformat = 'fasta';
+
+# this implicity uses the <> file stream
+my $seqin = Bio::SeqIO->new( -format => $format, -file => shift); 
+my $seqout = Bio::SeqIO->new( -format => $oformat, -file => ">-" );
+
+
+while( (my $seq = $seqin->next_seq()) ) {
+	my $pseq = $seq->translate();
+	$seqout->write_seq($pseq);
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seq/unflatten_seq.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seq/unflatten_seq.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seq/unflatten_seq.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,254 @@
+#!perl -w
+use strict;
+# $Id: unflatten_seq.PLS,v 1.7 2006/07/04 22:23:29 mauricio Exp $
+# Author Chris Mungall <cjm-at-bioperl.org>
+
+=head1 NAME
+
+unflatten_seq - unflatten a genbank or genbank-style feature file into
+a nested SeqFeature hierarchy
+
+=head1 SYNOPSIS
+
+  unflatten_seq.PLS -e 3 -gff ~/cvs/bioperl-live/t/data/AE003644_Adh-genomic.gb
+
+  unflatten_seq.PLS --detail ~/cvs/bioperl-live/t/data/AE003644_Adh-genomic.gb
+
+  unflatten_seq.PLS -i foo.embl --from embl --to chadoxml -o out.chado.xml
+
+  unflatten_seq.PLS --notypemap --detail --to asciitree -ethresh 2 AE003644_Adh-genomic.gb
+
+=head1 DESCRIPTION 
+
+This script will B<unflatten> a genbank or genbank-style file of
+SeqFeatures into a nested hierarchy.
+
+See L<Bio::SeqFeature::Tools::Unflattener>
+
+In a GenBank/EMBL representation, features are 'flat' - for example,
+there is no link between an mRNA and a CDS, other than implicit links
+(eg via tags or via splice site coordinates) which may be hard to code
+for.
+
+This is most easily illustrated with the default output format,
+B<asciitree>
+
+An unflattened genbank feature set may look like this (AB077698)
+
+  Seq: AB077698
+    databank_entry                                   1..2701[+]
+    gene                                             
+      mRNA                                           
+        CDS hCHCR-G                                  80..1144[+]
+        exon                                         80..1144[+]
+      five_prime_UTR                                 1..79[+]
+      located_sequence_feature                       137..196[+]
+      located_sequence_feature                       239..292[+]
+      located_sequence_feature                       617..676[+]
+      located_sequence_feature                       725..778[+]
+      three_prime_UTR                                1145..2659[+]
+      polyA_site                                     1606..1606[+]
+      polyA_site                                     2660..2660[+]
+
+Or like this (portion of AE003734)
+
+
+  gene                                             
+    mRNA CG3320-RA                                 
+      CDS CG3320-PA                                53126..54971[-]
+      exon                                         52204..53323[-]
+      exon                                         53404..53631[-]
+      exon                                         53688..53735[-]
+      exon                                         53798..53918[-]
+      exon                                         54949..55287[-]
+    mRNA CG3320-RB                                 
+      CDS CG3320-PB                                53383..54971[-]
+      exon                                         52204..53631[-]
+      exon                                         53688..53735[-]
+      exon                                         53798..53918[-]
+      exon                                         54949..55287[-]
+
+The unflattening will also 'normalize' the containment hierarchy (in
+the sense of standardising it - e.g. making sure there is always a
+transcript record, even if genbank just specifies CDS and gene)
+
+By default, the GenBank types will be mapped to SO types
+
+See L<Bio::SeqFeature::Tools::TypeMapper>
+
+=head1 COMMAND LINE ARGUMENTS
+
+=over
+
+=item -i|input FILE
+
+input file (can also be specified as last argument)
+
+=item -from FORMAT
+
+input format (defaults to genbank)
+
+probably doesnt make so much sense to use this for non-flat formats;
+ie other than embl/genbank
+
+=item -to FORMAT
+
+output format (defaults to asciitree)
+
+should really be a format that is nested SeqFeature aware; I think
+this is only asciitree, chadoxml and gff3
+
+=item -gff
+
+with export to GFF3 format (pre-3 GFFs make no sense with unflattened
+sequences, as they have no set way of representing feature graphs)
+
+=item -o|output FILE
+
+outfile defaults to STDOUT
+
+=item -detail
+
+show extra detail on features (asciitree mode only)
+
+=item -e|ethresh INT
+
+sets the error threshold on unflattening
+
+by default this script will throw a wobbly if it encounters weird
+stuff in the genbank file - raise the error threshold to signal these
+to be ignored (and reported on STDERR)
+
+=item -nomagic
+
+suppress use_magic in unflattening (see
+L<Bio::SeqFeature::Tools::Unflattener>
+
+=item -notypemap
+
+suppress type mapping (see
+L<Bio::SeqFeature::Tools::TypeMapper>
+
+
+=back
+
+=head1 TODO
+
+L<Bio::SeqFeature::Tools::Unflattener> allows fine-grained control
+over the unflattening process - need to add more options to allow this
+control at the command line
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+email or the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Chris Mungall E<lt>cjm-at-bioperl.orgE<gt>
+
+=cut
+
+use Bio::SeqIO;
+use Bio::SeqFeature::Tools::Unflattener;
+use Bio::SeqFeature::Tools::TypeMapper;
+use Bio::SeqFeature::Tools::IDHandler;
+use Bio::Tools::GFF;
+
+use Getopt::Long;
+
+my ($input,$from,$to,$output,$verbosity,$ethresh,$nomagic,$group_tag,$detail,
+    $notypemap);
+$from = 'genbank';
+$to = 'asciitree';
+$ethresh = 3;
+my $gff;
+my @remove_types = ();
+
+GetOptions(
+	   'i|input:s' => \$input,
+	   'from:s'  => \$from,
+	   'to:s'  => \$to,
+	   'o|output:s'=> \$output,
+	   "verbosity|v=s"=>\$verbosity,
+	   "ethresh|e=s"=>\$ethresh,
+	   "remove_type=s@"=>\@remove_types,
+	   "nomagic"=>\$nomagic,
+	   "notypemap"=>\$notypemap,
+	   "group_tag"=>\$group_tag,
+	   "detail"=>\$detail,
+           "gff"=>\$gff,
+	   "h|help"=>sub {
+	       system("perldoc $0");
+	       exit 0;
+	   },
+	  );
+                       
+                       
+if ($to =~ /^gff/i) {
+    $gff = 1;
+}
+
+$input = $input || shift if @ARGV;
+
+my $in = new Bio::SeqIO(-file => $input,
+			-format => $from);
+my $out;
+my @out_opt = $output ? (-file => ">$output") : ();
+unless ($gff) {
+    $out = new Bio::SeqIO(-format=>$to, @out_opt);
+    $out->show_detail($detail) if $out->can("show_detail") && $detail;
+}
+
+my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+$unflattener->verbose($verbosity);
+$unflattener->error_threshold($ethresh);
+my $tm = Bio::SeqFeature::Tools::TypeMapper->new;
+my $idhandler = Bio::SeqFeature::Tools::IDHandler->new;
+
+while( my $seq = $in->next_seq ) {    
+    $unflattener->remove_types(-seq=>$seq,
+                               -types=>\@remove_types)
+      if @remove_types;
+
+    $unflattener->unflatten_seq(-seq=>$seq,
+				-use_magic=>!$nomagic,
+				-group_tag=>$group_tag,
+			       );
+    $unflattener->report_problems(\*STDERR);
+    $tm->map_types_to_SO(-seq=>$seq) unless $notypemap;
+
+    my @seq_args = ($seq);
+    if ($to eq 'chadoxml') {
+	@seq_args = (-seq=>$seq, -nounflatten=>1)
+    }
+    if ($gff) {
+        my $gffio = Bio::Tools::GFF->new(@out_opt, -noparse=>1, -gff_version => 3);
+        $idhandler->set_ParentIDs_from_hierarchy($seq);
+        foreach my $feature ($seq->get_all_SeqFeatures) {
+            $gffio->write_feature($feature);
+        }
+        $gffio->close();
+    }
+    else {
+        $out->write_seq(@seq_args);
+    }
+
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/seq/unflatten_seq.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3 @@
+These are scripts to generate common statistics on protein and
+nucleotide sequences.
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/aacomp.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/aacomp.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/aacomp.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+#!perl -w
+use strict;
+use Carp;
+# $Id: aacomp.PLS,v 1.5 2006/07/04 22:23:29 mauricio Exp $
+
+use Bio::SeqIO;
+use Getopt::Long;
+use Bio::SeqUtils;
+use Bio::Tools::IUPAC;
+
+my $table = new Bio::SeqUtils;
+my @BASES = $table->valid_aa(0);
+my %all = $table->valid_aa(2);
+my ($file,$format,$help) = ( undef, 'fasta');
+GetOptions(
+	   'i|in:s'  => \$file,
+	   'f|format:s' => \$format,
+	   'h|help|?'  => \$help,
+	   );
+
+my $USAGE = "usage: aacomp [-f format] filename\n\tdefault format is fasta\n";
+$file = shift unless $file;
+
+die $USAGE if $help;
+
+my $seqin;
+if( defined $file ) {
+    print "Could not open file [$file]\n$USAGE" and exit unless -e $file;
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -file   => $file);
+} else {
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -fh     => \*STDIN);
+}
+
+my %composition;
+my $total;
+foreach my $base ( @BASES ) {
+    $composition{$base} = 0;
+}
+while ( my $seq = $seqin->next_seq ) {
+    if( $seq->alphabet ne 'protein' ) {
+	confess("Must only provide amino acid sequences to aacomp...skipping this seq");
+	next;
+    }
+    foreach my $base ( split(//,$seq->seq()) ) {
+	$composition{uc $base}++;
+	$total++;
+    }
+}
+
+printf("%d aa\n",$total); 
+printf("%5s %4s\n", 'aa', '#' );
+my $ct = 0;
+foreach my $base ( @BASES ) {
+    printf(" %s %s %3d\n", $base, $all{$base}, $composition{$base} );
+    $ct += $composition{$base};
+}
+printf( "%6s %s\n", '','-'x5);
+printf( "%6s %3d\n", '',$ct);
+
+
+__END__
+
+
+=head1 NAME
+
+aacomp - amino acid composition of protein sequences
+
+=head1 SYNOPSIS
+
+  aacomp [-f/--format FORMAT] [-h/--help] filename
+  or
+  aacomp [-f/--format FORMAT] < filename
+  or
+  aacomp [-f/--format FORMAT] -i filename
+
+=head1 DESCRIPTION
+
+This scripts prints out the count of amino acids over all protein
+sequences from the input file.
+
+=head1 OPTIONS
+
+The default sequence format is fasta.
+
+The sequence input can be provided using any of the three methods:
+
+=over 3
+
+=item unnamed argument
+
+  aacomp filename
+
+=item named argument
+
+  aacomp -i filename
+
+=item standard input
+
+  aacomp < filename
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 HISTORY
+
+Based on aacomp.c from an old version of EMBOSS
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/chaos_plot.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/chaos_plot.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/chaos_plot.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,168 @@
+#!perl -w
+# $Id: chaos_plot.PLS,v 1.6 2006/07/04 22:23:29 mauricio Exp $
+
+use strict;
+
+use Bio::SeqIO;
+use Getopt::Long;
+use GD;
+
+use vars qw( $USAGE %VALIDFORMATS);
+
+%VALIDFORMATS = ( 'png'  => 1,
+		  'jpeg' => 1,
+		  'gd2'  => 1,
+		  'gd'   => 1,
+		  'gif'	 => 1,
+		  'wbmp' => 1 );
+
+$USAGE = "usage:\tchaos_plot -i/--input=INPUTFILE -f/--format=SEQFORMAT \n".
+    "\t-o/--output=OUTPUTFILE -g/--graphics=GRAPHIC TYPE\n".
+    "\t-w/--width=600 -h/--height=400\n";
+
+$USAGE .= "\tValid graphics formats: (" . join(",", ( keys %VALIDFORMATS )) .")\n";
+$USAGE .= "\tImage size defaults to 600x400, SEQFORMAT to fasta\n";
+$USAGE .= "\tINPUTFILE can also be read from STDIN\n";
+
+my ($format,$graph,$width,$height,$seqfile,$output) = ('fasta', 'png', 600, 400);
+GetOptions( "i|input:s"           => \$seqfile,
+	    "f|format:s"          => \$format,
+	    "o|output:s"          => \$output,
+	    "g|graph|graphics:s"  => \$graph,
+            "width:i"             => \$width,
+            "height:i"            => \$height
+	    );
+
+if( ! $output || ! $VALIDFORMATS{$graph} ) {
+    die $USAGE ;
+}
+my $seqin;
+$seqfile = shift unless $seqfile;
+if( defined $seqfile ) {
+    print "Could not open file [$seqfile]\n$USAGE" and exit unless -e $seqfile;
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -file   => $seqfile);
+} else {
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -fh     => \*STDIN);
+}
+
+my $img = new GD::Image($width,$height);
+my $white = $img->colorAllocate(255,255,255); 
+my $black = $img->colorAllocate(0,0,0); 
+
+my $seq = $seqin->next_seq;
+die("Sequence type must be DNA not " . $seq->alphabet())
+    unless $seq->alphabet ne 'dna' or $seq->alphabet ne 'rna';
+my %nmerdata;
+my $len = $seq->length();
+my $max = 0;
+
+my ($x,$y) = ( 0.5, 0.5);
+$img->string(gdGiantFont, 1,1, 'A', $black);
+$img->string(gdGiantFont, 0,$height - 15, 'C', $black);
+$img->string(gdGiantFont, $width - 15,1, 'T', $black);
+$img->string(gdGiantFont, $width - 15,$height -20, 'G', $black);
+
+for( my $i = 1; $i <= $len; $i++ ) {
+
+    my $base = lc $seq->subseq($i,$i);
+    if( $base eq 'a' ) {
+	$x *= 0.5;
+	$y  *= 0.5;
+    } elsif ( $base eq 'g' ) {
+	$x = ( $x + 1.0 ) * 0.5;
+	$y  = ( $y + 1.0  ) * 0.5; 
+    } elsif ( $base eq 'c' ) {
+	$x *= 0.5;
+	$y  = ( $y + 1.0  ) * 0.5; 
+    } elsif ( $base eq 't' or $base eq 'u' ) {
+	$x = ( $x + 1.0 ) * 0.5;
+	$y  *= 0.5;
+    }
+
+    $img->setPixel($x * $width,$y * $height, $black);
+}
+open(OUT, ">$output");
+binmode OUT;
+$graph =~ s/jpg/jpeg/;
+
+print OUT $img->$graph();
+
+
+__END__
+
+=head1 NAME
+
+chaos_plot - a chaos plot from DNA and RNA sequences
+
+=head1 SYNOPSIS
+
+  chaos_plot.pl -i/--input=INPUTFILE -f/--format=SEQFORMAT
+        -o/--output=OUTPUTFILE -g/--graphics=GRAPHIC FORMAT
+        -w/--width=WIGHT -h/--height=HEIGHT
+
+=head1 DESCRIPTION
+
+This scripts generates image files using GD image library to visualize
+nucleotide sequences using chaos plot.
+
+=head1 OPTIONS
+
+Valid graphics formats are currently gd, gd2, png, wbmp, jpeg and gif.
+
+The default size of the image file is 600x400.
+
+The sequence input can be provided using any of the three methods:
+
+=over 3
+
+=item unnamed argument
+
+  chaos_plot filename
+
+=item named argument
+
+  chaos_plot -i filename
+
+=item standard input
+
+  chaos_plot < filename
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 HISTORY
+
+This code is based on EMBOSS C code for chaos.c by Ian Longden.
+Included are documentation from EMBOSS code:
+
+Chaos produces a chaos plot.  The original application is part of the
+ACEDB genome database package, written by ** Richard Durbin (MRC LMB,
+UK) rd at mrc-lmba.cam.ac.uk, and Jean Thierry-Mieg (CRBM du CNRS,
+France) mieg at crbm1.cnusc.fr
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/gccalc.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/gccalc.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/gccalc.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+#!perl -w
+# $Id: gccalc.PLS,v 1.5 2006/07/04 22:23:29 mauricio Exp $
+
+use strict;
+
+use Bio::SeqIO;
+use Bio::Tools::SeqStats;
+use Getopt::Long;
+my $format = 'fasta';
+my $file;
+my $help =0;
+GetOptions(
+	    'f|format:s' => \$format,
+	    'i|in:s'     => \$file,
+	    'h|help|?'    => \$help,
+	    );
+
+
+my $USAGE = "usage: gccalc.pl -f format -i filename\n";
+if( $help ) {
+    die $USAGE;
+}
+
+$file = shift unless $file;
+my $seqin;
+if( defined $file ) {
+    print "Could not open file [$file]\n$USAGE" and exit unless -e $file;
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -file   => $file);
+} else {
+    $seqin = new Bio::SeqIO(-format => $format,
+			    -fh     => \*STDIN);
+}
+
+while( my $seq = $seqin->next_seq ) {
+    next if( $seq->length == 0 );
+    if( $seq->alphabet eq 'protein' ) {
+	warn("gccalc does not work on amino acid sequences ...skipping this seq");
+	next;
+    }
+
+    my $seq_stats  =  Bio::Tools::SeqStats->new('-seq'=>$seq);
+    my $hash_ref = $seq_stats->count_monomers();  # for DNA sequence
+    print "Seq: ", $seq->display_id, " ";
+    print $seq->desc if $seq->desc;
+    print " Len:", $seq->length, "\n";
+    printf "GC content is %.4f\n", ($hash_ref->{'G'} + $hash_ref->{'C'}) /
+	$seq->length();
+
+    foreach my $base (sort keys %{$hash_ref}) {
+	print "Number of bases of type ", $base, "= ", $hash_ref->{$base},"\n";
+    }
+    print "--\n";
+}
+
+# alternatively one could use code submitted by
+# cckim at stanford.edu
+
+sub calcgc {
+    my $seq = $_[0];
+    my @seqarray = split('',$seq);
+    my $count = 0;
+    foreach my $base (@seqarray) {
+	$count++ if $base =~ /[G|C]/i;
+    }
+
+    my $len = $#seqarray+1;
+    return $count / $len;
+}
+
+
+__END__
+
+=head1 NAME
+
+gccalc - GC content of nucleotide sequences
+
+=head1 SYNOPSIS
+
+  gccalc [-f/--format FORMAT] [-h/--help] filename
+  or
+  gccalc [-f/--format FORMAT] < filename
+  or
+  gccalc [-f/--format FORMAT] -i filename
+
+=head1 DESCRIPTION
+
+This scripts prints out the GC content for every nucleotide sequence
+from the input file.
+
+=head1 OPTIONS
+
+The default sequence format is fasta.
+
+The sequence input can be provided using any of the three methods:
+
+=over 3
+
+=item unnamed argument
+
+  gccalc filename
+
+=item named argument
+
+  gccalc -i filename
+
+=item standard input
+
+  gccalc < filename
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason at bioperl.org
+
+=head1 HISTORY
+
+Based on script code (see bottom) submitted by cckim at stanford.edu
+
+Submitted as part of bioperl script project 2001/08/06
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/oligo_count.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/oligo_count.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/seqstats/oligo_count.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,249 @@
+#!perl -w
+# $Id: oligo_count.PLS,v 1.4 2006/07/04 22:23:29 mauricio Exp $
+#
+# oligomer_freq.pl
+# We use this to determine what primers are useful for frequent priming of
+# nucleic acid for random labeling
+# Input: Sequence file, oligomer length
+# Output: Tab-delimited text file of oligomer frequencies
+# Written July 2, 2001
+# Charles C. Kim
+
+###########
+# MODULES #
+###########
+use Bio::Seq;
+use Bio::SeqIO;
+use Getopt::Long;
+
+#########################
+# VARIABLES & FILENAMES #
+#########################
+
+use strict;
+
+my ($format, $infile, $help, $outfile, $oligomerlength) = ('fasta');
+GetOptions(
+           'f|format:s'            => \$format,
+           'i|in|s|sequence:s'     => \$infile,
+           'h|help|?'              => \$help,
+           'o|out:s'               => \$outfile,
+           'length:i'              => \$oligomerlength
+          );
+
+my $USAGE = "Usage:\toligo_count [-h/--help] [-l/--length OLIGOLENGTH]\n".
+    "\t[-f/--format SEQFORMAT] [-i/--in/-s/--sequence SEQFILE]\n".
+    "\t[-o/--out OUTFILE]\n".
+    "\tDefault SEQFORMAT is fasta\n";
+
+print $USAGE and exit if $help;
+
+unless ($infile ) {
+    print 'Enter your concatenated FASTA sequence filename: ';
+    chomp ($infile=<STDIN>);
+}
+unless (-e $infile) { die "$infile not found\n"; }
+
+if ($outfile) {
+    if (-e $outfile) {
+	print "$outfile already exists!  Overwrite (Y/N)? ";
+	chomp ($_ = <STDIN>);
+	while (/[^yn]/i) {
+	    print 'Y or N, please: ';
+	    chomp ($_ = <STDIN>);
+	}
+	if (/n/i) { die "$outfile not overwritten.\n"; }
+    }
+#} else {
+#    print 'Enter an output filename: ';
+#    chomp ($outfile=<STDIN>);
+#    if (-e $outfile) {
+#	print "$outfile already exists!  Overwrite (Y/N)? ";
+#	chomp ($_ = <STDIN>);
+#	while (/[^yn]/i) {
+#	    print 'Y or N, please: ';
+#	    chomp ($_ = <STDIN>);
+#	}
+#	if (/n/i) { die "$outfile not overwritten.\n"; }
+#    }
+}
+
+unless ($oligomerlength) {
+    while () {
+	print 'Enter an oligomer length to count: ';
+	chomp($oligomerlength=<STDIN>);
+	if ($oligomerlength !~ /\d/) {
+	    print "Value is non-numeric!\n";
+	}
+	else {last;}
+    }
+}
+
+
+########
+# MAIN #
+########
+
+if ($oligomerlength >= 9) {
+    print "An oligomer length of $oligomerlength will generate ";
+    print 4 ** $oligomerlength, " combinations,\nwhich could cause ";
+    print "an out of memory error.  Proceed? (y/n) ";
+    chomp($_=<STDIN>);
+    if (/y/i) { ; }
+    else { die "Program terminated\n"; }
+}
+my @oligoseqs = &generate_all_oligos($oligomerlength);
+my %oligos = ();
+foreach  (@oligoseqs) {
+    $oligos{$_} = 0;
+}
+
+my $in = Bio::SeqIO->new( -file => $infile,
+		       -format => $format);
+my $seqnumber = 0;
+my $oligocounts = 0;
+my $exception;
+while (my $seq = $in->next_seq() ) {
+    my $len = $seq->length();
+    my $position = 1;
+    if ($position+$oligomerlength > $len) {
+	$exception = 2;
+	next;
+    }
+    $seq = uc $seq->seq; #string
+    $exception = 1 if $seq =~ /[^GATC]/;
+
+    while ($position + $oligomerlength-1 <= $len) {
+	$oligos{substr $seq, $position-1, $oligomerlength}++;
+	$position++;
+	if ($position%250000 == 0) {print "$position\n";}
+    }
+    $oligocounts += $position-1;
+    $seqnumber++;
+}
+
+if ($outfile) {
+    open(OUTFILE, ">$outfile") or die "Can't open $outfile\n";
+} else {
+    open OUTFILE, '>-'; # STDOUT
+}
+print OUTFILE "$seqnumber sequences analyzed\n";
+print OUTFILE "$oligocounts total $oligomerlength-mers counted\n";
+print OUTFILE "$oligomerlength-mer\tNumber\tFrequency\n";
+foreach my $key (sort keys %oligos) {
+    print OUTFILE "$key\t$oligos{$key}\t", $oligos{$key}/$oligocounts, "\n";
+}
+
+if ($exception) {
+    if ($exception == 1) {
+	print "Non-standard (non-GATC) bases were found in sequence\n";
+    }
+    if ($exception == 2) {
+	print "Oligomer length greater than sequence length\n";
+    }
+}
+
+#&notify();
+
+###############
+# SUBROUTINES #
+###############
+
+sub generate_all_oligos {
+    my $oligolength = $_[0];
+    my $iter = 1;
+    my @newarray = qw{A C G T};
+    my @bases = qw{A C G T};
+
+    while ($iter < $oligolength) {
+	my @oldarray = @newarray;
+	@newarray = ();
+	foreach my $oligoseq (@oldarray) {
+	    foreach my $newbase (@bases) {
+		push @newarray, $oligoseq . $newbase;
+	    }
+	}
+	$iter++;
+    }
+    return @newarray;
+}
+
+# if you wanted to be notified about status of running
+#my $EMAILADDRESS = undef;
+#die("Must change script to a valid email addres for notification") 
+#    unless( defined $EMAILADDRESS );
+
+#sub notify {
+#    $address = $EMAILADDRESS;
+#    $address = $_[0] if $_[0];
+#    open(SENDMAIL, "|/usr/lib/sendmail -oi -t") or die "Can't fork for sendmail: $!\n";
+#    print SENDMAIL <<"EOF";
+#From: Computer
+#To: $address
+#Subject: Program Finished
+#	
+#EOF
+#    close(SENDMAIL) or warn "sendmail didn't close nicely";
+#}
+
+__END__
+
+=head1 NAME
+
+oligo_count - oligo count and frequency
+
+=head1 SYNOPSIS
+
+  Usage:  oligo_count [-h/--help] [-l/--length OLIGOLENGTH]
+          [-f/--format SEQFORMAT] [-i/--in/-s/--sequence SEQFILE]
+          [-o/--out OUTFILE]
+
+=head1 DESCRIPTION
+
+This scripts counts occurrence and frequency for all oligonucleotides
+of given length.
+
+It can be used to determine what primers are useful for
+frequent priming of nucleic acid for random labeling.
+
+Note that this script could be run by utilizing the compseq
+program which is part of EMBOSS.
+
+=head1 OPTIONS
+
+The default sequence format is fasta. If no outfile is given, the
+results will be printed to standard out. All other options can entered
+interactively.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Charles C. Kim
+
+Email cckim at stanford.edu
+
+=head1 HISTORY
+
+Written July 2, 2001
+
+Submitted to bioperl scripts project 2001/08/06
+
+E<gt>E<gt> 100 x speed optimization by Heikki Lehvaslaiho
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+These are scripts to create and query taxonomic trees.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/classify_hits_kingdom.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/classify_hits_kingdom.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/classify_hits_kingdom.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,212 @@
+#!/usr/bin/perl -w
+# $Id: classify_hits_kingdom.PLS,v 1.2 2005/08/25 07:21:26 heikki Exp $
+
+=head1 NAME
+
+classify_hits_kingdom - classify BLAST hits by taxonomic kingdom
+
+=head2 USAGE
+
+classify_hits_kingdom [-i tab_file] [-i second_BLAST_file] [-e evalue_cutoff]
+                      [-t dir_where_TAXONOMY_files_are] [-g gi2taxid] 
+                      [-z PATH_TO_zcat] [-v]
+
+=head2 DESCRIPTION
+
+Will print out the taxonomic distribution (at the kingdom level) for a
+set of hits against the NR database.  This script assumes you've done
+a search against the protein database, you'll have to make minor
+changes in the gi_taxid part to point to the gi_taxid_nuc.dump file.
+
+This expects BLAST files in tabbed -m9 or -m8 format.  Output with -m
+8 or use blast2table.pl to convert (or fastam9_to_table.PLS if using
+FASTA).
+
+  Input values:
+   -t/--taxonomy  directory where the taxonomy .dmp files are (from NCBI)
+   -g/--gi        Location of gi_taxid_prot.dmp (or gi_taxid_nucl.dmp if 
+                  the search was against a NT db)
+   -i/--in        The name of the tab delimited -m8/-m9 output files to 
+                  process.
+
+    -e/--evalue   Provide an E-value cutoff for hits to be considered
+    -z/--zcat     Path to the 'zcat' executable, can also be 'gunzip -c'
+                  if no zcat on your system.
+   Flags
+    -v/--verbose  To turn on verbose messages
+    -h/--help     Display this helpful information
+
+This is intended to be useful starting script, but users may want to
+customize the output and parameters.  Note that I am summarizing the
+kingdoms here and Eukaryota not falling into Metazoa, Viridiplantae,
+or Fungi gets grouped into the general superkingdom Eukaryota. for
+simplicity.  There are comments in the code directing you to where
+changes can be made if you wanted to display hits by phylum for
+example.  Note that you must wipe out the cache file 'gi2class' that
+is craeed in your directory after making these changes.
+
+=head2 AUTHOR
+
+Jason Stajich jason_at_bioperl_dot_org
+
+=cut
+
+use strict;
+use Bio::DB::Taxonomy;
+use DB_File;
+use Env;
+use File::Spec;
+use vars qw($SEP);
+my $DEBUG = 0;
+use Getopt::Long;
+$SEP = '_';
+
+my $evalue_filter = 1e-3;
+my @files;
+my $zcat = 'zcat'; # or gunzip -c 
+my $prefix = File::Spec->catfile($HOME,'taxonomy');
+my $gi2taxidfile = "$prefix/gi_taxid_prot.dmp.gz";
+GetOptions(
+	   'v|verbose|debug' => \$DEBUG,
+	   'z|zcat:s'    => \$zcat,
+	   'i|in:s'      => \@files,
+	   'e|evalue:f'  => \$evalue_filter,
+	   't|taxonomy:s'=> \$prefix,
+	   'g|gi|gi2taxid:s' => \$gi2taxidfile,
+	   'h|help'      => sub { system('perldoc', $0);
+				  exit },
+	   );
+
+# insure idx location is created
+mkdir(File::Spec->catfile($prefix,'idx')) 
+    unless -d File::Spec->catfile($prefix,'idx');
+
+# these files came from ftp://ftp.ncbi.nih.gov/pub/taxonomy
+my $taxdb = Bio::DB::Taxonomy->new
+    (-source => 'flatfile',
+     -directory => File::Spec->catfile
+     ($prefix, 'idx'), 
+     -nodesfile => File::Spec->catfile($prefix,'nodes.dmp'),
+     -namesfile => File::Spec->catfile($prefix,'names.dmp')
+     );
+my %query;
+
+my (%taxid4gi,%gi2node);
+my $dbh = tie(%gi2node, 'DB_File', 'gi2class');
+
+
+my $giidxfile = File::Spec->catfile($prefix,'idx','gi2taxid');
+my $done = -e $giidxfile;
+my $dbh2 = tie(%taxid4gi, 'DB_File', $giidxfile);
+
+if( ! $done ) {
+    my $fh;
+    # this file came from ftp://ftp.ncbi.nih.gov/pub/taxonomy
+    # I'm interested in protein hits therefor _prot file.
+    if( $gi2taxidfile =~ /\.gz$/ ) {
+	open($fh, "$zcat $gi2taxidfile |" ) || die "$zcat $gi2taxidfile: $!";
+    } else {
+	open($fh, $gi2taxidfile ) || die $!;
+    }
+    my $i = 0;
+    while(<$fh>) {
+	my ($gi,$taxid) = split;
+	$taxid4gi{$gi} = $taxid;
+	$i++;
+	unless( $DEBUG && $i % 100000  ) {
+	    warn "$i\n";
+	}
+    }
+    $dbh2->sync;
+}
+
+for my $file ( @files ) {
+    warn("$file\n");
+    my $gz;
+    if( $file =~ /\.gz$/) {
+	$gz = 1;
+    }
+    my ($spname) = split(/\./,$file); 
+    my ($fh,$i);
+    if( $gz ) {
+	open($fh, "$zcat $file |")  || die "$zcat $file: $!";
+    } else {
+	open($fh, $file) || die "$file: $!";
+    }
+    while(<$fh>) {
+	my ($qname,$hname,$pid,$qaln,$mismatch,$gaps,
+	    $qstart,$qend,$hstart,$hend,
+	    $evalue,$bits,$score) = split(/\t/,$_);	
+	next if( $evalue > $evalue );
+	if( ! exists $query{$spname}->{$qname} ) {
+	    $query{$spname}->{$qname} = {};
+	}
+	
+	if( $hname =~ /gi\|(\d+)/) {		
+	    my $gi = $1;	    
+	    if( ! $gi2node{$gi} ){ # see if we cached the results from before
+		my $taxid = $taxid4gi{$gi};
+		if( ! $taxid ) {
+		    warn("no taxid for $gi\n");
+		    next;
+		}
+		my $node = $taxdb->get_Taxonomy_Node($taxid);
+		if( ! $node ) {
+		    warn("cannot find node for gi=$gi ($hname) (taxid=$taxid)\n");
+		    next;
+		}
+		my $parent = $taxdb->get_Taxonomy_Node($node->parent_id);
+
+		# THIS IS WHERE THE KINGDOM DECISION IS MADE
+		# DON'T FORGET TO WIPE OUT YOUR CACHE FILE
+		# gi2class after you make changes here
+		while( defined $parent && $parent->node_name ne 'root' ) { 
+		    # this is walking up the taxonomy hierarchy
+		    # can be a little slow, but works...
+		    #warn( "\t",$parent->rank, " ", $parent->node_name, "\n");
+		    # deal with Eubacteria, Archea separate from 
+		    # Metazoa, Fungi, Viriplantae differently
+		    # (everything else Eukaryotic goes in Eukaryota)
+		    if( $parent->rank eq 'kingdom') {
+			# caching in ... 
+			($gi2node{$gi}) = $parent->node_name;
+			last;
+		    } elsif( $parent->rank eq 'superkingdom' ) {
+			# caching in ... 
+			($gi2node{$gi}) = $parent->node_name;
+			$gi2node{$gi} =~ s/ \<(bacteria|archaea)\>//g;
+			last;
+		    }
+		    $parent = $taxdb->get_Taxonomy_Node($parent->parent_id);
+		}		
+		$dbh->sync;
+	    }
+	    my ($kingdom) = $gi2node{$gi};
+#		warn("$gi2node{$gi}\n");
+	    unless( defined $kingdom && length($kingdom) ) {
+#		    warn("no kingdom for $hname\n");
+	    } else {
+		$query{$spname}->{$qname}->{$kingdom}++;		
+	    }	
+	} else {
+	    warn("no GI in $hname\n");
+	}
+    }
+    last if ( $DEBUG && $i++ > 10000);
+}
+
+# print out the taxonomic distribution
+while( my ($sp,$d) = each %query ) {
+    my $total = scalar keys %$d;
+    print "$sp total=$total\n";
+    my %seen;
+    for my $v ( values %$d ) {
+	my $tag = join(",",sort keys %$v );
+	$seen{$tag}++;
+    }
+    for my $t ( sort { $seen{$a} <=> $seen{$b} } keys %seen ) {
+	printf " %-20s\t%d\t%.2f%%\n",
+	$t,$seen{$t}, 100 * $seen{$t} / $total;
+    }
+    print "\n\n";
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/local_taxonomydb_query.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/local_taxonomydb_query.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/local_taxonomydb_query.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,59 @@
+#!/usr/bin/perl -w
+use strict;
+use Bio::DB::Taxonomy;
+
+use strict;
+use Getopt::Long;
+my $verbose = 0;
+my $plain   = 0;
+my ($nodesfile,$namesfile);
+my $idx_dir = '/tmp/idx';
+GetOptions('v|verbose' => \$verbose,
+	   'nodes:s'   => \$nodesfile,
+	   'names:s'   => \$namesfile,
+	   'idx:s'     => \$idx_dir,
+	   'h|help'    => sub{ exec('perldoc',$0);
+				exit(0)
+				} );
+
+unless( @ARGV || $nodesfile || $namesfile ) {
+    exec('perldoc',$0);
+    exit(0);
+}
+mkdir($idx_dir) unless -d $idx_dir;
+
+my $db = new Bio::DB::Taxonomy(-source    => 'flatfile',
+			       -nodesfile => $nodesfile,
+			       -namesfile => $namesfile,
+			       -directory => $idx_dir);
+foreach my $sp ( @ARGV ) {
+    my $node = $db->get_Taxonomy_Node(-name => $sp);
+    if( defined $node ) {
+	print "id is ", $node->id, "\n"; # 9606
+	print "rank is ", $node->rank, "\n"; # species
+	print "scientific name is ", $node->scientific_name, "\n"; # Homo sapiens
+	print "division is ", $node->division, "\n"; # Primates
+    } else {
+	warn("no node found for query $sp");
+    }
+}
+
+=head1 NAME
+
+local_taxonomydb_query - query a local TaxonomyDB for species or taxonid
+
+=head1 DESCRIPTION
+
+This script provides an example implementation of access to a local
+Taxonomy database implemented with Berkeley DB (DB_File module is needed).
+
+Usage:
+
+ local_taxonomydb_query.PLS: [-v] --nodes nodes.dmp --names names.dmp "Genus1 species1" "Genus2 species2"
+
+Providing the nodes.dmp and names.dmp files from the NCBI Taxonomy
+dump (see Bio::DB::Taxonomy::flatfile for more info) is only necessary
+on the first time running.  This will create the local indexes and may
+take quite a long time.  However once created, these indexes will
+allow fast access for species to taxon id OR taxon id to species name
+lookups.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/query_entrez_taxa.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/query_entrez_taxa.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/query_entrez_taxa.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,80 @@
+#!/usr/bin/perl -w
+# This is a -*-Perl-* file (make my emacs happy)
+
+=head1 NAME
+
+query_entrez_taxa - query Entrez taxonomy database and print out information 
+
+=head1 USAGE
+
+query_entrez_taxa "Homo sapiens" "Saccharomyces cerevisiae" Rhizopus Metazoa
+query_entrez_taxa -gi 28800981 -gi 54301680 -db nucleotide
+query_entrez_taxa -gi 71836523 -db protein
+
+ Provide the genus and species name in quotes, you can also query for
+ a non-species node like Family or Order
+
+Command-line options:
+   -v or --verbose  : print verbose debugging info
+   -gi              : one or many GI numbers to lookup taxon id for
+   -db              : the sequence db (nucleotide or protein) the GI is for
+
+   other arguments are assumed to be species names to lookup in taxonomy db
+
+
+=head1 AUTHOR
+
+Jason Stajich jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::DB::Taxonomy;
+use Getopt::Long;
+
+my $verbose = 0;
+my (@gi, $dbname);
+GetOptions('v|verbose' => \$verbose,
+	   'gi:i'      => \@gi,
+	   'db:s'      => \$dbname);
+
+my $db = new Bio::DB::Taxonomy(-source => 'entrez', -verbose => $verbose);
+if( @gi ) {
+    my @nodes= $db->get_Taxonomy_Node(-gi => \@gi,
+				      -db => $dbname);
+    for my $node ( @nodes ) {
+	my $gi = shift @gi;
+	print " for gi $gi:\n";
+	print " taxonid is ",$node->ncbi_taxid,"\n";    
+	print " node is ", join(", ",$node->classification), "\n";
+	print " species is ", $node->species,"\n";
+	print " parent is ", $node->parent_id, "\n";
+	print " rank is ", $node->rank, "\n";
+	print " genetic_code  ", $node->genetic_code, "\n";
+	print " mito_genetic_code  ", $node->mitochondrial_genetic_code, "\n";
+	print " scientfic name is ", $node->binomial, "\n";
+    }	
+}
+
+print "\n\n";
+for my $name ( @ARGV ) {
+    my $taxonid = $db->get_taxonid($name);
+    my $node   = $db->get_Taxonomy_Node(-taxonid => $taxonid);
+    print "taxonid is $taxonid\n";
+
+    print " node is ", join(", ",$node->classification), "\n";
+    print " species is ", $node->species,"\n";
+    print " parent is ", $node->parent_id, "\n";
+    print " rank is ", $node->rank, "\n";
+    print " genetic_code  ", $node->genetic_code, "\n";
+    print " mito_genetic_code  ", $node->mitochondrial_genetic_code, "\n";
+    print " scientfic name is ", $node->binomial, "\n";
+    print " common name is ", $node->common_name, "\n";
+    print " create date is ", $node->create_date, "\n";
+    print " update date is ", $node->update_date, "\n";
+    print " pub date is ", ($node->pub_date || ''), "\n";
+    print " variant is ", $node->variant, "\n";
+    print " sub_species is ", $node->sub_species, "\n";
+    print " organelle is ", $node->organelle, "\n";
+    print " division is ", $node->division, "\n";
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxid4species.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxid4species.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxid4species.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+#!perl -w
+# Author:  Jason Stajich <jason at bioperl.org>
+# Purpose: Retrieve the NCBI Taxa ID for organism(s)
+
+# TODO: add rest of POD
+#
+
+use LWP::UserAgent;
+use XML::Twig;
+use strict;
+use Getopt::Long;
+my $verbose = 0;
+my $plain   = 0;
+my $help    = 0;
+my $USAGE = "taxid4species: [-v] [-p] \"Genus1 species1\" \"Genus2 species2\"";
+
+GetOptions('v|verbose' => \$verbose,
+	   'p|plain'   => \$plain,
+	   'h|help'    => \$help);
+die("$USAGE\n") if $help;
+
+my $ua = new LWP::UserAgent();
+
+my $urlbase = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=taxonomy&term=';
+
+my (@organisms) = @ARGV;
+die("must provide valid organism") unless @organisms;
+my $organismstr = join(" OR ", @organisms);
+$organismstr =~ s/\s/\+/g;
+
+my $response = $ua->get($urlbase.$organismstr);
+my $t = XML::Twig->new();
+print $response->content,"\n"if($verbose);
+$t->parse($response->content);
+my $root = $t->root;
+my $list = $root->first_child('IdList');
+my @data;
+foreach my $child ($list->children('Id') ) {
+    push @data, $child->text;
+    if( $plain ) { print $child->text, "\n" }
+}
+unless( $plain  ) {
+    $list = $root->first_child('TranslationStack');
+    foreach my $set ($list->children('TermSet') ) {
+	foreach my $term ( $set->children('Term') ) {
+	    print "\"",$term->text(), "\", ", shift @data, "\n";
+	}
+    }
+}
+
+=head1 NAME
+
+taxid4species: Simple script which returns the NCBI Taxanomic id for a requested species
+
+=head1 DESCRIPTION
+
+This simple script shows how to get the taxa id from NCBI Entrez and
+will return a list of taxa ids for requested organisms.
+
+=cut
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxonomy2tree.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxonomy2tree.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/taxa/taxonomy2tree.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+taxonomy2tree - Building a taxonomic tree based on the full lineages of a set of species names
+
+=head1 DESCRIPTION
+
+taxonomy2tree.PLS: -s Orangutan -s Gorilla -s Chimpanzee -s Human
+taxonomy2tree.PLS: -s Orangutan -s Gorilla -s Chimpanzee -s "Homo Sapiens"
+
+Can also provide -d to specific the directory to store index files in, -o to
+specific the location of your nodes file, and -a for the names file.
+Or the option -e to use the web-based entrez taxonomy database if you don't
+have the flatfiles installed.
+
+This script requires that the bioperl-run pkg be also installed.
+
+Providing the nodes.dmp and names.dmp files from the NCBI Taxonomy
+dump (see Bio::DB::Taxonomy::flatfile for more info) is only necessary
+on the first time running.  This will create the local indexes and may
+take quite a long time.  However once created, these indexes will
+allow fast access for species to taxon id OR taxon id to species name
+lookups.
+
+=head1 AUTHOR - Gabriel Valiente, reimplemented by Sendu Bala
+
+Email valiente at lsi.upc.edu
+Email bix at sendu.me.uk
+
+=cut
+
+use strict;
+use Bio::DB::Taxonomy;
+use Bio::TreeIO;
+use Bio::Tree::Compatible;
+use Getopt::Long;
+
+my @species;
+my $index_dir = "./db/";
+my $nodesfile = "nodes.dmp";
+my $namesfile = "names.dmp";
+my $use_entrez = 0;
+
+# the input to the script is an array of species names
+GetOptions('s|species=s' => \@species, 'd|dir:s' => \$index_dir, 'o|nodesfile:s' => \$nodesfile, 'a|namesfile:s' => \$namesfile, 'e|entrez' => \$use_entrez);
+
+
+my $db = new Bio::DB::Taxonomy(-source => $use_entrez ? 'entrez' : 'flatfile',
+                               -directory => $index_dir,
+                               -nodesfile => $nodesfile,
+                               -namesfile => $namesfile);
+
+# the full lineages of the species are merged into a single tree
+my $tree;
+for my $name (@species) {
+  my $ncbi_id = $db->get_taxonid($name);
+  if ($ncbi_id) {
+    my $node = $db->get_taxon(-taxonid => $ncbi_id);
+    
+    if ($tree) {
+      $tree->merge_lineage($node);
+    }
+    else {
+      $tree = new Bio::Tree::Tree(-node => $node);
+    }
+  }
+  else {
+    warn "no NCBI Taxonomy node for species ",$name,"\n";
+  }
+}
+
+# simple paths are contracted by removing degree one nodes
+$tree->contract_linear_paths;
+
+# convert tree ids to their names for nice output with TreeIO
+foreach my $node ($tree->get_nodes) {
+  $node->id($node->node_name);
+}
+
+# the tree is output in Newick format
+my $output = new Bio::TreeIO(-format => 'newick');
+$output->write_tree($tree);
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/scripts/tree/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/tree/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/tree/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+These are utilities to manipulate and create trees.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/tree/blast2tree.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/tree/blast2tree.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/tree/blast2tree.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,148 @@
+#!/usr/bin/perl -w
+# Author:  Jason Stajich <jason at bioperl.org>
+# Purpose: Blast Report -> MSA -> Tree 
+
+
+# This needs lots more error checking, cmdline input of parameters
+# and support for other treebuilding -- only Phylip Neighbor for 
+# protein data is supported
+
+# Also proper pulling in of the correct sequence data from the
+# alignment - multiple hits on different parts of a protein aren't
+# going to work properly right now.  So this is mostly and example
+# starting point which needs a lot more work to be made robust.
+
+use strict;
+use Bio::AlignIO;
+use Bio::Tools::Run::Alignment::Clustalw;
+use Bio::Tools::Run::Phylo::Phylip::ProtDist;
+use Bio::Tools::Run::Phylo::Phylip::Neighbor;
+use Bio::Tools::Run::Phylo::Molphy::ProtML;
+use Bio::Tools::Run::Phylo::Phylip::ProtPars;
+use Bio::SearchIO;
+use Bio::SimpleAlign;
+use Bio::PrimarySeq;
+use Bio::TreeIO;
+use Getopt::Long;
+
+my $IDLENGTH = 12;
+
+
+# we could in fact pull the tree out of the guide tree calculated
+# by Clustalw in the alignment, but I believe that is UPGMA 
+# which would *NOT* be a good thing to be giving people.
+
+my $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new
+    ('ktuple' => 2, "quiet" => 1,
+     'matrix' => 'BLOSUM');
+my ($report,$format,$tree_method,$cutoff,$keepall);
+
+$format = 'blast';
+$tree_method = 'neighbor';
+$cutoff = '0.01';
+GetOptions(
+	   'h|help'       => sub { exec('perldoc', $0);
+				   exit(0); },
+	   'i|input:s'    => \$report,
+	   'f|format:s'   => \$format,
+	   'm|method:s'   => \$tree_method,
+	   'e|evalue:s'   => \$cutoff,
+	   'k|keepall:s'  => \$keepall, # keep all HSPs not just best
+	   );
+
+
+unless( $format =~ /blast|fasta|hmmer/i ) {
+    die("Must request a valid search report format (fasta,blast,hmmer)");
+}
+
+unless ( $tree_method =~ /nj|neighbor/i || $tree_method =~ /protml|ml/i ) {
+    die("Must request a valid tree building method (neighbor,protml)");
+} 
+
+my (@alns, at seqs);
+
+my $in = new Bio::SearchIO(-file   => $report,
+			   -format => $format);
+while( my $r = $in->next_result ) {
+    # Let's build the simplest system first
+    die("Cannot process report that does not contain protein sequence") 
+	unless ($r->algorithm =~ /HMMER|BLASTP|FASTP/i );
+    my @seqs;
+    while( my $hit = $r->next_hit ) {
+	next if $hit->significance > $cutoff;
+	while( my $hsp = $hit->next_hsp ) {
+	    next if $hsp->evalue > $cutoff;
+	    my $seq = $hsp->get_aln->get_seq_by_pos(2)->seq();
+	    push @seqs, new Bio::PrimarySeq(-seq => $seq,
+					    -id  => $hsp->hit->seq_id,
+					    -desc => $r->algorithm . " best align to ". $hsp->query->seq_id );
+	    last unless $keepall;
+	}
+    }
+    push @alns, $aln_factory->align(\@seqs);    
+}
+
+my $out = new Bio::AlignIO(-format => 'phylip',
+			   -interleaved => 1,
+			   -idlength => $IDLENGTH,
+			   -file => ">alignfile.phy");
+$out->write_aln(@alns);
+$out = undef;
+
+# these need to be parameterized for cmdline arguments
+my @params = ('idlength'=>$IDLENGTH,
+	      'model'=>'cat',
+	      'gencode'=>'U',
+	      'category'=>'H',
+	      'probchange'=>'0.4',
+	      'trans'=>'5',
+	      'freq'=>'0.25,0.5,0.125,0.125');
+my $dist_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new(@params);
+$dist_factory->quiet(1);
+ at params = ('type'=>'NJ',
+	   'outgroup'=>1,
+	   'upptri'=>1,
+	   'jumble'=>17);
+
+my $tree_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new(@params);
+$tree_factory->quiet(1);
+my $count = 1;
+my $outtrees = new Bio::TreeIO(-file => ">trees.tre",
+			       -format => 'newick');
+foreach my $aln ( @alns ) {
+# NOTE NOTE NOTE 
+# This interface is probably going to change from create_tree to
+# next_tree per some discussions I'm having with Shawn - we may need
+# to tweak any scripts before you publish
+
+# also may move the create_distance_matrix method around some
+# and need to write in the switched support for Molphy's ProtML
+    
+    my $matrix = $dist_factory->create_distance_matrix($aln);
+    my @seqnames = keys %$matrix; 
+    open(MATRIX, ">Group$count.dist");
+    printf MATRIX "%4d\n",scalar @seqnames;
+    for(my $i =0; $i< (scalar @seqnames  - 1); $i++ ) {
+	printf MATRIX "%-12s     ", $seqnames[$i];
+	for( my $j = $i+1; $j < scalar @seqnames; $j++ ) {
+	    print MATRIX $matrix->{$seqnames[$i]}->{$seqnames[$j]},"  ";
+	}
+	print MATRIX "\n";
+    }
+    close MATRIX;
+
+    my $tree = $tree_factory->create_tree("Group$count.dist");
+    $outtrees->write_tree($tree);   
+    $count++;
+}
+
+=head1 NAME
+
+tree_from_seqsearch - builds a phylogenetic tree based on a sequence
+search (FastA,BLAST,HMMER)
+
+=head1 DESCRIPTION
+
+This script requires that the bioperl-run pkg be also installed.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/tree/nexus2nh.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/tree/nexus2nh.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/tree/nexus2nh.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,46 @@
+#!perl -w
+
+=head1 NAME
+
+nexus2nh - convert nexus format trees (from PAUP* and MrBayes) to new hampshire
+
+=head1 SYNOPSIS
+
+ nexus2nh file.nexus > file.nh
+
+ # OR pipe in through STDIN
+
+ cat file.nexus | nexus2nh > file.nh
+
+ # OR specify an output
+
+ nexus2nh -o file.nh file.nexus
+
+=head1 DESCRIPTION
+
+Convert Nexus Tree files into Newick/New Hampshire format tree files.
+
+
+=cut
+
+use strict;
+use Bio::TreeIO;
+use Getopt::Long;
+
+my $outfile;
+
+GetOptions('o|out|outfile:s' => \$outfile);
+
+my $in = Bio::TreeIO->new(-format => 'nexus', -fh => \*ARGV);
+my $out;
+if( $outfile ) { 
+    $out= Bio::TreeIO->new(-format => 'newick',
+			   -file   => ">$outfile");
+} else { 
+    # write to STDOUT
+    $out= Bio::TreeIO->new(-format => 'newick');
+}
+
+while( my $t = $in->next_tree ) {
+    $out->write_tree($t);
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/tree/tree2pag.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/tree/tree2pag.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/tree/tree2pag.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,59 @@
+#!perl -w
+
+=head1 NAME
+
+tree2pag - convert Bio::TreeIO parseable format trees to pagel format
+
+=head1 SYNOPSIS
+
+ tree2pag -if nexus -i file.nexus > file.pag
+
+ # OR pipe in through STDIN, and use newick format instead
+
+ cat file.newick | tree2pag -if newick > file.nh
+
+ # OR specify an output and input
+
+ tree2pag -o file.pag -i file.newick
+
+=head1 DESCRIPTION
+
+Convert TreeIO parseable files into Pagel format tree files.  Be
+warned that pagel format only really supports a single tree per file
+so.  Also Pagel parsing is not yet available in bioperl.
+
+=cut
+
+use strict;
+use Bio::TreeIO;
+use Getopt::Long;
+my ($iformat,$oformat) = ('newick', 'pag');
+my ($outfile,$infile);
+GetOptions(
+	   'if|informat:s'    => \$iformat,
+	   'of|outformat:s'   => \$oformat,
+	   'i|in:s'           => \$infile,
+	   'o|out:s'          => \$outfile,
+	   'h|help'           => sub { exec('perldoc', $0);
+				       exit(0); },
+	   );
+my $in;
+if( ! $infile ) {
+    $in = Bio::TreeIO->new(-format => $iformat,
+			   -fh     => \*ARGV);
+} else { 
+    $in = Bio::TreeIO->new(-format => $iformat,
+			   -file   => $infile);
+}
+
+my $out;
+if( $outfile) {
+    $out = Bio::TreeIO->new(-format => $oformat,
+			    -file   => ">$outfile");
+} else { 
+    $out = Bio::TreeIO->new(-format => $oformat); #print to STDOUT instead
+}
+
+while( my $t = $in->next_tree ) {
+    $out->write_tree($t);
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/README
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/README	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/README	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+This directory is for robust scripts which have documentation,
+cmdline arguments, and can be used in a production environment.
+Their extensions will be renamed .pl and will be installed in
+the SCRIPT_INSTALL directory as defined in the Makefile.PL 
+configuration.
+
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/TAG
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/TAG	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/TAG	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3 @@
+These are various sequence-related scripts that were difficult to
+classify more specifically but are considered general purpose
+utilities.

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_mrtrans.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_mrtrans.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_mrtrans.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+#!perl
+# $Id: bp_mrtrans.PLS,v 1.6 2005/04/26 15:03:53 jason Exp $
+use strict;
+
+# Author:      Jason Stajich <jason-at-bioperl-dot-org>
+# Description: Perl implementation of Bill Pearson's mrtrans
+#              to project protein alignment back into cDNA coordinates
+#
+
+=head1 NAME
+
+bp_mrtrans - implement a transformer of alignments from protein to mrna coordinates
+
+=head1 SYNOPSIS
+
+Usage:
+  bp_mrtrans -i inputfile -o outputfile [-if input format] [-of output format] [-s cDNA sequence database]  [-sf cDNA sequence format] [-h]
+
+=head1 DESCRIPTION
+
+This script will convert a protein alignment back into a cDNA.  Loosely
+based on Bill Pearson's mrtrans.
+
+The options are:
+
+   -o filename          - the output filename [default STDOUT]
+   -of format           - output sequence format
+                          (multiple sequence alignment)
+                          [default phylip]
+   -i filename          - the input filename [required]
+   -if format           - input sequence format
+                          (multiple sequence alignment)
+                          [ default clustalw]
+   -s --seqdb filename  - the cDNA sequence database file
+   -sf --seqformat      - the cDNA seq db format (flatfile sequence format)
+   -h                   - this help menu
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::Align::Utilities qw(aa_to_dna_aln);
+use Bio::AlignIO;
+use Bio::SeqIO;
+use Getopt::Long;
+
+# TODO - finish documentation,
+#      - add support for extra options in output alignment formats
+#        such as idnewline in phylip out to support Molphy input files
+
+my ($iformat,$seqformat,$oformat,$seqdb,$input,$output) = ('clustalw','fasta',
+							   'phylip');
+my ($help,$usage);
+
+$usage = "usage: bp_mrtrans.pl -i prot_alignment -if align_format -o out_dna_align -of output_format -s cDNA_seqdb -sf cDNA_seqdb\n".
+"defaults: -if clustalw
+          -of phylip
+          -sf fasta\n";
+
+GetOptions(
+	   'if|iformat:s'  => \$iformat,
+	   'i|input:s'     => \$input,
+	   'o|output:s'    => \$output,
+	   'of|outformat:s'=> \$oformat,
+	   's|seqdb|db:s'  => \$seqdb,
+	   'sf|seqformat:s'=> \$seqformat,
+	   'h|help'        => sub{ exec('perldoc',$0);
+				   exit(0)
+				   },
+	   );
+
+$input ||= shift;
+$seqdb ||= shift;
+$output ||= shift;
+if( ! defined $seqdb ) {
+    die("cannot proceed without a valid seqdb\n$usage");
+}
+if( ! defined $input ) {
+    die("cannot proceed without an input file\n$usage");
+}
+my $indb = new Bio::SeqIO(-file => $seqdb,
+			  -format=>$seqformat);
+my %seqs;
+while( my $seq = $indb->next_seq ) {
+    $seqs{$seq->id} = $seq;
+}
+
+my $in = new Bio::AlignIO(-format => $iformat,
+			  -file   => $input);
+my $out = new Bio::AlignIO(-format => $oformat,
+			   -idlength => 22,
+			   -interleaved => 0,
+			   defined $output ? (-file   => ">$output") : () );
+
+while( my $aln = $in->next_aln ) {
+    my $dnaaln = aa_to_dna_aln($aln,\%seqs);
+    $dnaaln->set_displayname_flat(1);
+    $out->write_aln($dnaaln);
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_nrdb.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_nrdb.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_nrdb.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,135 @@
+#!perl -w
+# $Id: bp_nrdb.PLS,v 1.4 2005/04/26 15:03:53 jason Exp $
+
+# Author Jason Stajich <jason-at-bioperl-dot-org>
+# 
+# Make a non-redundant database based on sequence (not on ID!)
+# This script is still in progress but is intended to mimic what 
+# Warren Gish's nrdb does
+
+# It requires that Digest::MD5 is installed (for now)
+
+=head1 NAME
+
+bp_nrdb.PLS - a script to emulate Warren Gish's nrdb, make a unique sequence database from a set of input databases
+
+=head1 SYNOPSIS
+
+
+Usage: 
+  bp_nrdb.PLS [options] file1 file2 file3
+
+Alternative usage
+  bp_nrdb.PLS -p [options] file1 id1 file2 id2 file3 id3
+
+=head1 DESCRIPTION
+
+This script will create a unique database of sequences
+(quasi-nonredundant).  The options are:
+
+   -o filename          - the filename the db is written (STDOUT by default)
+   -a filename          - the filename to append the db to
+   -l#                  - minimum required sequence length
+   -i                   - do not check for duplicates
+   -n#                  - max number of descriptions to report per seq
+   -d#                  - delimiter to use between consecutive descriptions
+   -p                   - use database id prefixes from command line
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::SeqIO;
+use Getopt::Long;
+
+use Digest::MD5 qw(md5_hex);
+my ($output,$append,$min_len, 
+    $no_duplicate_check,$desc_count,
+    $delimiter, $expect_prefixes,$help);
+$delimiter = ';';
+
+GetOptions(
+	   'o|output:s'    => \$output,
+	   'a|append:s'    => \$append,
+	   'n:s'           => \$desc_count,
+	   'l:s'           => \$min_len,
+	   'd:s'           => \$delimiter,
+	   'p'             => \$expect_prefixes,
+	   'i'             => \$no_duplicate_check,
+	   'h'             => \$help,
+	   );
+
+die("must supply a positive integer for -d") if ( defined $desc_count &&
+						  ( $desc_count !~ /^\d+$/ ||
+						    $desc_count < 1) );
+die("must supply a positive integer for -l") if ( defined $min_len &&
+						  ( $min_len !~ /^\d+$/ ||
+						    $min_len < 1) );
+my @files;
+
+if( $help || ! @ARGV ) {
+    exec('perldoc',$0);
+    exit(0);
+}
+while( @ARGV ) {
+    
+    my ($file, $id) = (undef,'');
+    if( $expect_prefixes ) {
+	($file,$id) = (shift @ARGV, shift @ARGV);
+	if( ! $id ) { 
+	    die("Must provide 'name id' pairing of dbfile and id");
+	}
+    } else { 
+	$file = shift @ARGV;
+    }
+    push @files, [ $file,$id];
+}
+
+
+my $out;
+if( $append ) {
+    $out = new Bio::SeqIO(-file => ">>$append");
+} elsif( $output ) { 
+    $out = new Bio::SeqIO(-file => ">$output");
+} else {
+    $out = new Bio::SeqIO(); # use STDOUT
+}
+
+my %unique;
+my %seqcount;
+my $counter = 0;
+foreach my $pair ( @files ) {
+    my ($file,$id) = @$pair;
+    my $in = new Bio::SeqIO(-file => $file);
+    while( my $seq = $in->next_seq ) {
+	next if defined $min_len && $seq->length < $min_len;
+	if( $id ) { 
+	    $seq->display_id("$id:".$seq->display_id);
+	}
+	my $s = lc($seq->seq());
+	my $md5sum = md5_hex($s);
+	if( $no_duplicate_check ) {
+	    $md5sum = $counter++;
+	}
+	    
+	if( defined $unique{$md5sum} ) {
+	    $seqcount{$md5sum}++;
+	    next if defined $desc_count && $seqcount{$md5sum++} > $desc_count;
+	    my $desc = $unique{$md5sum}->description;	    
+	    my $id2 = sprintf("%s %s:%s %s",$delimiter,
+			      $id,$seq->display_id,$seq->description);
+	    $unique{$md5sum}->desc($desc . $id2);
+	} else { 
+	    $unique{$md5sum} = $seq;	
+	}
+    }
+}
+
+foreach my $seq ( values %unique ) {
+    $out->write_seq($seq);
+}
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_sreformat.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_sreformat.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/bp_sreformat.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+#!perl
+# Author:  Jason Stajich <jason-at-bioperl-dot-org>
+# Purpose: Bioperl implementation of Sean Eddy's sreformat
+#          We're not as clever as Sean's squid library though so
+#          you have to specify the input format rather than letting
+#          the application guess.
+
+use strict;
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Getopt::Long;
+
+my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
+
+-h/--help               Print this help
+-if/--informat          Specify the input format
+-of/--outformat         Specify the output format
+-i/--input              Specify the input file name
+                        (to pass in data on STDIN use minus sign as filename)
+-o/--output             Specify the output file name
+                        (to pass data out on STDOUT use minus sign as filename)
+--msa                   Specify this is multiple sequence alignment data
+--special=specialparams Specify special params supported by some formats
+                        Comma or space separated please.
+                        These include:
+                        nointerleaved   -- for phylip,non-interleaved format
+                        idlinebreak     -- for phylip, makes it molphy format
+                        percentages     -- for clustalw, show % id per line
+                        flat            -- don't show start-end in seqid
+                        linelength      -- line length for clustalw
+";
+
+
+my ($input,$output,$informat,$outformat,$msa,$special);
+
+GetOptions(
+	   'h|help'          => sub { print STDERR ($USAGE); exit(0) },
+	   'i|input:s'         => \$input,
+	   'o|output:s'        => \$output,
+	   'if|informat:s'     => \$informat,
+	   'of|outformat:s'    => \$outformat,
+	   'msa'               => \$msa,
+	   's|special:s'       => \$special,
+	   );
+
+unless( defined $informat && defined $outformat )
+    { die("Cannot proceed without a defined input and output you gave ($informat,$outformat)\n") }
+
+my ($in,$out);
+my @extra;
+if( $special ) {
+    @extra = map { my @rc;
+		   if( /nointerleaved/) {
+		       @rc = ('-interleaved' => '0');
+		   } elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) } 
+	           else{ @rc = ("-$_" => 1) }
+		   @rc;
+	       } split(/[\s,]/,$special);
+}
+# guess we're talking about MSA if any of the standard MSA names are used
+if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
+    $outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
+    $msa = 1;
+}
+
+if( $msa ) {
+    eval {
+	if( defined $input ) {
+	    $in = new Bio::AlignIO(-format => $informat, -file => $input);
+	} else {
+	    $in = new Bio::AlignIO(-format => $informat, -fh => \*ARGV);
+	}
+    };
+    if( $@ ) {
+	die("Unknown MSA format to bioperl $informat\n");
+    }
+    eval {
+	if( $output ) {
+	    $out = new Bio::AlignIO(-format => $outformat,
+				    -file => ">$output", @extra);
+	} else {
+	    # default to STDOUT for output
+	    $out = new Bio::AlignIO(-format => $outformat, at extra);
+	}
+    };
+    if( $@ ) {
+	die("Unknown MSA format to bioperl $outformat\n");
+    }
+    while( my $aln = $in->next_aln) { 
+	 if( $special =~ /flat/ ) {$aln->set_displayname_flat(1); }
+	 $out->write_aln($aln) }
+
+} else {
+    eval {
+	if( defined $input ) {
+	    $in = new Bio::SeqIO(-format => $informat, -file => $input);
+	} else { 
+	    $in = new Bio::SeqIO(-format => $informat, -fh => \*ARGV);
+	}
+    };
+    if( $@ ) {
+	if( $@ =~ /Could not open/ ) {
+	    die("Could not open input file: $input\n");
+	} else { 
+	    die("Unknown sequence format to bioperl $informat\n");
+	}	
+    }
+    eval {
+	   if( $output ) {
+	       $out = new Bio::SeqIO(-format => $outformat,
+				     -file => ">$output");
+	   } else {
+	       # default to STDOUT for output
+	       $out = new Bio::SeqIO(-format => $outformat);
+	   }
+       };
+    if( $@ ) {
+	if( $@ =~ /Could not open/ ) {
+	    die("Could not open output file: $output\n");
+	} else { 
+	    die("Unknown sequence format to bioperl $outformat: $@\n");
+	}
+    }
+    while( my $seq = $in->next_seq ) {
+	$out->write_seq($seq);
+    }
+}
+
+=head1 NAME
+
+bpsreformat - convert sequence formats
+
+=head1 DESCRIPTION
+
+This script uses the SeqIO system that allows conversion of sequence
+formats either sequence data or multiple sequence alignment data.  The
+name comes from the fact that Sean Eddy's program sreformat (part of
+the HMMER pkg) already does this.  Sean's program tries to guess the
+input formats while in our code we currently require your to specify what
+the input and output formats are and if the data is from a multiple
+sequence alignment or from straight sequence files.
+
+Usage:
+
+bpsreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
+
+  -h/--help        Print this help
+
+  -if/--informat   Specify the input format
+
+  -of/--outformat  Specify the output format
+
+  -i/--input       Specify the input file name
+                   (to pass in data on STDIN use minus sign as filename)
+  -o/--output      Specify the output file name
+                   (to pass data out on STDOUT use minus sign as filename)
+
+  --msa            Specify this is multiple sequence alignment data
+
+  --special        Will pass on special parameters to the AlignIO/SeqIO
+                   object -- most of these are for Bio::AlignIO objects
+                   Comma separated list of the following
+                   nointerleaved   -- for phylip,non-interleaved format
+                   idlinebreak     -- for phylip, makes it molphy format
+                   percentages     -- for clustalw, show % id per line
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/dbsplit.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/dbsplit.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/dbsplit.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+#!perl -w
+#-*-Perl-*-
+
+=head1 NAME
+
+dbsplit - script to split an input set of database(s) into smaller pieces
+
+=head1 SYNOPSIS
+
+  dbsplit.PLS --size 50 [-i inputfile] [-if inputformat] [-of outputformat]
+              [--prefix outputprefix] [ < file1 file 2  OR file1 file2]
+
+=head1 DESCRIPTION
+
+This script will take as input a list of filenames or a single file or
+from STDIN a sequence database and split the database into separate
+files of X numbers of sequences.  You specify X with the C<--size/-s>
+parameter.  The input and output sequence format is any that is
+supported by bioperl (fasta,embl,genbank,gcg, swissprot, etc).
+
+You can specify the input data either as a single file with -i
+filename, or as a single file as an argument like
+
+  % dbsplit file1 file2
+
+or as a list of sequence data with 
+
+  % cat file1 file2 file3 | dbsplit
+
+You'll want to use the C<--prefix> to specify what the output prefix will
+be.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::SeqIO;
+use Bio::SeqIO::MultiFile;
+
+use Getopt::Long;
+my $dbsize = 100;
+my $prefix;
+my ($informat,$outformat,$infile) = ( 'fasta', 'fasta');
+
+GetOptions (
+	    's|size:s'     => \$dbsize,
+	    'if:s'         => \$informat,
+	    'of:s'         => \$outformat,
+	    'i:s'          => \$infile,
+	    'p|prefix:s'   => \$prefix,
+	    
+);
+if( @ARGV == 1 ) {
+    $infile = shift @ARGV;
+}
+$prefix ||= $infile || $ARGV[0] || 'db';
+
+my $in;
+if( @ARGV ) {
+    $in = new Bio::SeqIO::MultiFile(-files => [@ARGV],
+				    -format => $informat || 'fasta');
+} elsif( $infile ) {
+    $in = new Bio::SeqIO(-file  => $infile,
+			 -format=> $informat);
+} else { 
+    $in = new Bio::SeqIO(-format=> $informat);
+}
+my $count = 1;
+my $out = new Bio::SeqIO(-format => $outformat,
+			 -file   => ">$prefix.$count");
+my $scount = 0;
+while( my $seq = $in->next_seq ) {    
+    if( ++$scount >= $dbsize && $count ) { 
+	$out->close();
+	undef($out);
+	$count++;
+	$out = new Bio::SeqIO(-format => $outformat,
+			      -file   => ">$prefix.$count");
+	$scount = 0;
+    }
+    $out->write_seq($seq);
+}
+
+
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mask_by_search.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mask_by_search.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mask_by_search.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,155 @@
+#!perl -w
+# $Id: mask_by_search.PLS,v 1.5 2005/04/26 15:03:53 jason Exp $
+# Author:  Jason Stajich <jason-at-bioperl-dot-org>
+
+
+=head1 NAME
+
+mask_by_search - mask sequence(s) based on its alignment results
+
+=head1 SYNOPSIS 
+
+  mask_by_search.pl -f blast genomefile blastfile.bls > maskedgenome.fa
+
+=head1 DESCRIPTION
+
+Mask sequence based on significant alignments of another sequence.
+You need to provide the report file and the entire sequence data which
+you want to mask.  By default this will assume you have done a TBLASTN
+(or TFASTY) and try and mask the hit sequence assuming you've provided
+the sequence file for the hit database.  If you would like to do the
+reverse and mask the query sequence specify the -t/--type query flag.
+
+This is going to read in the whole sequence file into memory so for
+large genomes this may fall over.  I'm using DB_File to prevent
+keeping everything in memory, one solution is to split the genome into
+pieces (BEFORE you run the DB search though, you want to use the exact
+file you BLASTed with as input to this program).
+
+Below the double dash (--) options are of the form
+--format=fasta or --format fasta
+or you can just say
+-f fasta
+
+By -f/--format I mean either are acceptable options.  The =s or =n
+or =c specify these arguments expect a 'string'
+
+Options:
+    -f/--format=s    Search report format (fasta,blast,axt,hmmer,etc)
+    -sf/--sformat=s  Sequence format (fasta,genbank,embl,swissprot)
+    --hardmask       (booelean) Hard mask the sequence
+                     with the maskchar [default is lowercase mask]
+    --maskchar=c     Character to mask with [default is N], change 
+                     to 'X' for protein sequences
+    -e/--evalue=n    Evalue cutoff for HSPs and Hits, only 
+                     mask sequence if alignment has specified evalue 
+                     or better
+    -o/--out/
+    --outfile=file   Output file to save the masked sequence to.
+    -t/--type=s      Alignment seq type you want to mask, the 
+                     'hit' or the 'query' sequence. [default is 'hit']
+    --minlen=n       Minimum length of an HSP for it to be used 
+                     in masking [default 0]
+    -h/--help        See this help information
+
+=head1 AUTHOR - Jason Stajich
+
+Jason Stajich, jason-at-bioperl-dot-org.
+
+=cut 
+
+
+use strict;
+use Bio::SeqIO;
+use Bio::SearchIO;
+use Getopt::Long;
+use Bio::Seq;
+use DB_File;
+# assuming tblastn or tfasty type alignment
+
+my $format = 'blast';
+my $sformat= undef;
+my $evalue = undef;
+my $type   = 'hit';
+my $minlen = 50;
+my $hardmask = 0; # mask with $maskchar instead of lowercase
+my $maskchar = 'N'; # if we hard mask, mask with this cahr
+my $outfile;
+GetOptions(
+	   'f|format:s'  => \$format,
+	   'sf|sformat:s'=> \$sformat,
+	   'hardmask'    => \$hardmask,
+	   'maskchar:s'  => \$maskchar,
+	   'e|evalue:s'  => \$evalue,
+	   'o|out|outfile:s' => \$outfile,
+	   't|type:s'    => \$type,
+	   'minlen:s'    => \$minlen,
+	   'h|help'      => sub { system('perldoc', $0);
+				  exit; },
+	   );
+if( $type !~ /^(hit|query)/i ) {
+    die("type must be query or hit[default] not $type") ;
+}
+$type = lc($type);
+
+if(length($maskchar) > 1 ) {
+    die("expected a mask character, not a string (you gave $maskchar)");
+}
+my $genomefile = shift || die('need a file containing the genome');
+my $reportfile = shift;
+
+# this could be problem for large genomes, figure out a 
+# better way to do this later on
+# or force people to split it up
+my $genomeparser = new Bio::SeqIO(-file  => $genomefile,
+				  -format=> $sformat);
+my %seqs; 
+unlink('/tmp/genome.idx');
+tie(%seqs,'DB_File','/tmp/genome.idx');
+while( my $seq = $genomeparser->next_seq ) {
+    # should we pre-force to upper case?
+    $seqs{$seq->display_id} = $seq->seq();
+}
+
+my $parser = new Bio::SearchIO(-file   => $reportfile,
+			       -format => $format);
+
+while( my $r = $parser->next_result ) {
+    while( my $h = $r->next_hit ) {
+	last if( defined $evalue && $h->significance > $evalue );
+	my $hname = $h->name;
+	if( ! $seqs{$hname} ) { 
+	    die("Cannot find sequence $hname in genome seq");
+	}
+	while( my $hsp = $h->next_hsp ) {
+	    last if( defined $evalue && $hsp->evalue > $evalue );
+	    next if( $hsp->length('total') < $minlen);
+	    my ($s,$len) = ( $hsp->$type()->start,
+			     $hsp->$type()->length);
+	    
+	    if( $hardmask ) { 
+		substr($seqs{$hname}, $s,$len, $maskchar x $len);
+	    } else { 
+		substr($seqs{$hname}, $s,$len, 
+		       lc(substr($seqs{$hname}, $s,$len)));
+	    }
+	}
+    }
+}
+
+my $out;
+if( $outfile ) { 
+    $out = new Bio::SeqIO(-file   => ">$outfile",
+			  -format => $sformat);
+} else { 
+    $out = new Bio::SeqIO(-format => $sformat);
+}
+
+while( my ($seqname,$seq) = each %seqs ) {
+    $out->write_seq(Bio::Seq->new(-seq        => $seq,
+				  -display_id => $seqname,
+				  -description=> 'MASKED'));
+}
+END { 
+    unlink('/tmp/genome.idx');
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mask_by_search.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mutate.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mutate.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mutate.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+mutate.pl - randomly mutagenize a single protein or DNA sequence
+
+=head1 SYNOPSIS
+
+  ./mutate.pl -p 25 -i test.fa -n 5 -f swiss -o muts.swiss
+
+  #or
+
+  ./mutate.pl --percent=25 --input=test.fa --number=5 -output=x.fa
+
+=head1 DESCRIPTION
+
+Randomly mutagenize a single protein or DNA sequence one or more times.
+Specify percentage mutated and number of resulting mutant sequences.
+Print mutagenized sequences to STDOUT or write to an output file.
+
+  -h|--help    Help
+  -p|--percent Percent mutagenized
+  -n|--number  Number of mutant sequences created
+  -o|--output  Output file (optional)
+  -f|--format  Output format (default: fasta)
+  -i|--input   Input file
+
+=head1 FEEDBACK
+
+User feedback is an integral part of the evolution of this and other
+Bioperl scripts. Send your comments and suggestions to the Bioperl 
+mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+Brian Osborne, bosborne at alum.mit.edu
+
+=cut
+
+use strict;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my ($help,$in_file,$percent,$out_file,$number);
+my $format = "fasta";
+my @dna = qw(a g c t);
+my @amino = qw(a c d e f g h i k l m n p q r s t v w y);
+
+GetOptions(  "h|help"      => \$help,
+	     "p|percent:i" => \$percent,
+	     "n|number:i"  => \$number,
+	     "o|output:s"  => \$out_file,
+	     "f|format:s"  => \$format,
+	     "i|input:s"   => \$in_file );
+
+usage() if ($help || !$percent || !$in_file || !$number || $percent > 100);
+
+# Seed the random number generator. "time|$$" combines the
+# current time with the current process id
+srand(time|$$);
+
+my $seqio = Bio::SeqIO->new(-file => $in_file);
+my $seqobj = $seqio->next_seq;
+my $num_mut = percent_to_num($percent);
+my @seq_arr = ();
+
+# don't keep a mutant that's already been made
+while ($number > $#seq_arr + 1) {
+   my $mut_seq = mutate_all($seqobj,$num_mut);
+   push @seq_arr, $mut_seq unless (grep /$mut_seq/, at seq_arr);
+}
+
+foreach my $mut_seq (@seq_arr) {
+   my $name = $seqobj->display_id . "-" . "mutant" . $number;
+   my $outseq = Bio::Seq->new(-seq        => $mut_seq,
+			      -display_id => $name,
+			      -desc       => $seqobj->desc );
+   my %args = (-format => $format );
+   $args{-file} = ">>$out_file" if $out_file;
+   my $seqio = Bio::SeqIO->new(%args);
+   $seqio->write_seq($outseq);
+   $number--;
+}
+
+# mutagenize the sequence, one-by-one
+sub mutate_all {
+   my ($seq_obj,$num) = @_;
+   my $type = $seq_obj->alphabet;
+   my $str = $seq_obj->seq;
+   # store the mutagenized positions in $positions
+   my $positions = "";
+   for (my $i = 0 ; $i < $num_mut ; ++$i) {
+      ($str,$positions) = mutate_one($str,$type,$positions);
+   }
+   $str;
+}
+
+# mutagenize one position
+sub mutate_one {
+   my ($str,$type,$positions) = @_;
+   my ($position,$new_char);
+
+   # pick a random position in the sequence, checking
+   # that the position isn't already mutagenized
+   do { $position = random_position($str);
+   } until ( $positions !~ /\b$position\b/ );
+   $positions .= "$position ";
+   my $current_char = substr($str,$position,1);
+
+   # pick a random char that's not the existing char
+   do { $new_char = random_char($type);
+   } until ($new_char ne $current_char);
+   substr($str,$position,1,$new_char);
+   ($str,$positions);
+}
+
+# randomly select a position in the sequence
+sub random_position {
+   my $string = shift;
+   int(rand(length($string)));
+}
+
+# randomly select one of the chars depending on alphabet
+sub random_char {
+   my $type = shift;
+   $type eq "protein" ? return $amino[rand @amino] :
+     return $dna[rand @dna];
+}
+
+sub percent_to_num {
+   my $percent = shift;
+   int($percent/100 * length($seqobj->seq));
+}
+
+sub usage {
+   exec('perldoc',$0);
+   exit(0);
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/mutate.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/pairwise_kaks.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/pairwise_kaks.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/pairwise_kaks.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,211 @@
+#!perl -w
+use strict;
+# $Id: pairwise_kaks.PLS,v 1.12 2006/07/04 22:23:29 mauricio Exp $
+# Author Jason Stajich <jason-at-bioperl-dot-org>
+
+=head1 NAME
+
+pairwise_kaks - script to calculate pairwise Ka,Ks for a set of sequences
+
+=head1 SYNOPSIS
+
+pairwise_kaks.PLS -i t/data/worm_fam_2785.cdna [-f fasta/genbank/embl...] [-msa tcoffee/clustal] [-kaks yn00/codeml]
+
+=head1 DESCRIPTION 
+
+  This script will take as input a dataset of cDNA sequences verify
+ that they contain no stop codons, align them in protein space,
+ project the alignment back into cDNA and estimate the Ka
+ (non-synonymous) and Ks (synonymous) substitutions based on the ML
+ method of Yang with the PAML package.
+
+ Requires:
+ * bioperl-run package
+ * PAML program codeml or yn00
+ * Multiple sequence alignment programs Clustalw OR T-Coffee
+
+ Often there are specific specific parameters you want to run when you
+ a computing Ka/Ks ratios so consider this script a starting point and
+ do not rely it on for every situation.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR
+
+ Jason Stajich jason-at-bioperl-dot-org
+
+=cut
+
+eval {
+    # Ka/Ks estimators
+    require Bio::Tools::Run::Phylo::PAML::Codeml;
+    require Bio::Tools::Run::Phylo::PAML::Yn00;
+    
+    # Multiple Sequence Alignment programs
+    require Bio::Tools::Run::Alignment::Clustalw;
+    require Bio::Tools::Run::Alignment::TCoffee;
+};
+if( $@ ) {
+    die("Must have bioperl-run pkg installed to run this script");
+}
+# for projecting alignments from protein to R/DNA space
+use Bio::Align::Utilities qw(aa_to_dna_aln);
+
+# for input of the sequence data
+use Bio::SeqIO;
+use Bio::AlignIO;
+
+# for the command line argument parsing
+use Getopt::Long;
+
+my ($aln_prog, $kaks_prog,$format, $output,
+    $cdna,$verbose,$help) = qw(clustalw codeml fasta);
+
+GetOptions(
+	   'i|input:s'      => \$cdna,
+	   'o|output:s'     => \$output,
+	   'f|format:s'     => \$format,
+	   'msa:s'          => \$aln_prog,
+	   'kaks:s'         => \$kaks_prog,
+	   'v|verbose'      => \$verbose,
+	   'h|help'         => \$help,
+	   );
+
+if( $help ) {
+    exec('perldoc',$0);
+    exit(0);
+}
+$verbose = -1 unless $verbose;
+my ($aln_factory,$kaks_factory);
+if( $aln_prog =~ /clus/i ) {
+    $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new(-verbose => $verbose);
+} elsif( $aln_prog =~ /t\_?cof/i ) {
+    $aln_factory = Bio::Tools::Run::Alignment::TCoffee->new(-verbose => $verbose);
+} else { 
+    warn("Did not provide either 'clustalw' or 'tcoffee' as alignment program names");
+    exit(0);
+}
+unless( $aln_factory->executable ) {
+    warn("Could not find the executable for $aln_prog, make sure you have installed it and have either set ".uc($aln_prog)."DIR or it is in your PATH");
+    exit(0);
+}
+
+
+if( $kaks_prog =~ /yn00/i ) {
+    $kaks_factory = Bio::Tools::Run::Phylo::PAML::Yn00->new(-verbose => $verbose);
+} elsif( $kaks_prog =~ /codeml/i ) {
+    # change the parameters here if you want to tweak your Codeml running!
+    $kaks_factory = Bio::Tools::Run::Phylo::PAML::Codeml->new
+	(-verbose => $verbose,
+	 -params => { 'runmode' => -2,
+		      'seqtype' => 1,
+		  }
+	 );
+}
+unless ( $kaks_factory->executable ) {
+    warn("Could not find the executable for $kaks_prog, make sure you have installed it and you have defined PAMLDIR or it is in your PATH");
+    exit(0);
+}
+
+unless ( $cdna && -f $cdna && -r $cdna &&  ! -z $cdna ) {
+    warn("Did not specify a valid cDNA sequence file as input"); 
+    exit(0);
+}
+
+my $seqin = new Bio::SeqIO(-file   => $cdna, 
+			   -format => $format);
+
+my %seqs;
+my @prots;
+while( my $seq = $seqin->next_seq ) {
+    $seqs{$seq->display_id} = $seq;
+    my $protein = $seq->translate();
+    my $pseq = $protein->seq();
+    
+    $pseq =~ s/\*$//;
+    if( $pseq =~ /\*/ ) {
+	warn("provided a cDNA (".$seq->display_id.") sequence with a stop codon, PAML will choke!");
+	exit(0);
+    }
+    # Tcoffee can't handle '*'
+    $pseq =~ s/\*//g;
+    $protein->seq($pseq);
+    push @prots, $protein;
+}
+if( @prots < 2 ) {
+    warn("Need at least 2 cDNA sequences to proceed");
+    exit(0);
+}
+
+local * OUT;
+if( $output ) {
+    open(OUT, ">$output") || die("cannot open output $output for writing");
+} else { 
+    *OUT = *STDOUT;
+}
+
+my $aa_aln = $aln_factory->align(\@prots);
+my $dna_aln = &aa_to_dna_aln($aa_aln, \%seqs);
+
+my @each = $dna_aln->each_seq();
+
+
+$kaks_factory->alignment($dna_aln);
+
+my ($rc,$parser) = $kaks_factory->run();
+if( $rc <= 0 ) { 
+    warn($kaks_factory->error_string,"\n");
+    exit;
+}
+my $result = $parser->next_result;
+
+if ($result->version =~ m/3\.15/) {
+	warn("This script does not work with v3.15 of PAML!  Please use 3.14 instead.");
+	exit(0);
+}
+
+my $MLmatrix = $result->get_MLmatrix();
+
+my @otus = $result->get_seqs();
+
+my @pos = map { 
+    my $c= 1;
+    foreach my $s ( @each ) {
+	last if( $s->display_id eq $_->display_id );
+	$c++;
+    }
+    $c; 
+} @otus; 
+
+print OUT join("\t", qw(SEQ1 SEQ2 Ka Ks Ka/Ks PROT_PERCENTID CDNA_PERCENTID)), "\n";
+for( my $i = 0; $i < (scalar @otus -1) ; $i++) {
+    for( my $j = $i+1; $j < (scalar @otus); $j++ ) {
+	my $sub_aa_aln = $aa_aln->select_noncont($pos[$i],$pos[$j]);
+	my $sub_dna_aln = $dna_aln->select_noncont($pos[$i],$pos[$j]);
+	print OUT join("\t",  
+		       $otus[$i]->display_id,
+		       $otus[$j]->display_id,$MLmatrix->[$i]->[$j]->{'dN'},
+		       $MLmatrix->[$i]->[$j]->{'dS'},
+		       $MLmatrix->[$i]->[$j]->{'omega'},
+		       sprintf("%.2f",$sub_aa_aln->percentage_identity),
+		       sprintf("%.2f",$sub_dna_aln->percentage_identity),
+		       ), "\n";
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/remote_blast.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/remote_blast.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/remote_blast.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,176 @@
+#!/usr/bin/perl -w
+# $Id: remote_blast.PLS,v 1.8 2006/07/02 05:35:50 mauricio Exp $
+#
+# BioPerl module for remote_blast.pl
+#
+# Revived by Evan Weaver for bioperl-1.5.1
+# 3/14/2006
+#
+# Copyright Jason Stajich, Evan Weaver
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs after the code
+
+use strict;
+use vars qw($USAGE);
+
+use Bio::Tools::Run::RemoteBlast;
+use Bio::SeqIO;
+use Getopt::Long;
+
+$USAGE = "remote_blast.pl [-h] [-p prog] [-d db] [-e expect] [-mod
+Blast] [-f seqformat] -z=\"entrez query\" -v 1 -t output_format -i
+seqfile\n";
+
+my ($prog, $db, $expect,$method) = ( 'blastp', 'nr', '10', 'Blast');
+
+my ($sequencefile,$sequenceformat,$help, $entrez, $outputformat,
+$verbose) = (undef, 'fasta',undef, undef, undef, 1);
+
+&GetOptions('prog|p=s'               => \$prog,
+				'db|d=s'                 => \$db,
+				'expect|e=s'             => \$expect,
+				'blsmod|module|method=s' => \$method,
+				'input|i=s'              => \$sequencefile,  	
+				'format|f=s'             => \$sequenceformat,
+				'help|h'                 => \$help,
+				'entrez|z=s'             => \$entrez,
+				'output_format|t=s'      => \$outputformat,
+				'verbose|v=s'            => \$verbose
+			  );
+
+if( $help ) {
+    exec('perldoc', $0);
+    die;
+}
+
+if( !defined $prog ) {
+    die($USAGE . "\n\tMust specify a valid program name ([t]blast[pxn])\n");
+}
+if( !defined $db ) {
+    die($USAGE . "\n\tMust specify a db to search\n");
+}
+if( !defined $sequencefile ) {
+    die($USAGE . "\n\tMust specify an input file\n");
+}
+
+my $blastfactory = new Bio::Tools::Run::RemoteBlast ('-prog' => $prog,
+																	  '-data'      => $db,
+																	  '-expect'    => $expect,
+																	  'readmethod' => $method,
+																	 );
+
+if ($entrez) {
+  if ($verbose) {
+    print "Entrez query (submission side): $entrez\n";
+  }
+  #$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{ FORMAT_ENTREZ_QUERY} = $entrez;
+  $Bio::Tools::Run::RemoteBlast::HEADER{ ENTREZ_QUERY } = $entrez;
+}
+if ($outputformat) {
+  print "Don't use output format type; it doesn't work.\n";
+  $Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{ FORMAT_TYPE } = $outputformat;
+}
+
+# submit_blast can only currenly handle fasta format files so I'll
+# preprocess outside of the module but I'd rather be sure here
+
+my $input;
+if( $sequenceformat !~ /fasta/ ) {
+	my @seqs;
+	my $seqio = new Bio::SeqIO('-format' => $sequenceformat,
+										'-file'   => $sequencefile );
+	while( my $seq = $seqio->next_seq() ) {
+		push @seqs, $seq;
+	}
+	$input = \@seqs;
+} else {
+	$input = $sequencefile;
+}
+
+my $r = $blastfactory->submit_blast($input);
+#my $r = $factory->submit_blast(?amino.fa?);
+
+print STDERR "waiting...\n" if( $verbose > 0 );
+while ( my @rids = $blastfactory->each_rid ) {
+  foreach my $rid ( @rids ) {
+    my $rc = $blastfactory->retrieve_blast($rid);
+    if( !ref($rc) ) {
+      if( $rc < 0 ) {
+        $blastfactory->remove_rid($rid);
+      }
+      print STDERR "   checking $rid\n" if ( $verbose > 0 );
+      sleep 5;
+    } else {
+      my $result = $rc->next_result();
+      #save the output
+      my $filename = $result->query_name()."\.out";
+      $blastfactory->save_output($filename);
+      $blastfactory->remove_rid($rid);
+      print "\nQuery Name: ", $result->query_name(), "\n";
+      while ( my $hit = $result->next_hit ) {
+        next unless ( $verbose > 0);
+        print "\thit name is ", $hit->name, "\n";
+        while( my $hsp = $hit->next_hsp ) {
+          print "\t\tscore is ", $hsp->score, "\n";
+        }
+      }
+    }
+  }
+  print STDERR scalar(@rids) . " left\n";
+}
+	
+
+__END__
+
+#
+# BioPerl module for remote_blast.pl
+#
+# Cared for by Jason Stajich <jason-at-bioperl-dot-org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+remote_blast.pl - script for submitting jobs to a remote blast server
+(ncbi blast queue at this time)
+
+=head1 SYNOPSIS
+
+% remote_blast.pl -p blastp -d ecoli -e 1e-5 -i myseqs.fa
+
+=head1 DESCRIPTION
+
+This module will run a remote blast on a set of sequences by
+submitting them to the NCBI blast queue and printing the output of the
+request.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+the bugs and their resolution.  Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason-at-bioperl-dot-org
+
+=cut


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/remote_blast.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2BSML.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2BSML.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2BSML.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+#!perl -w
+# $Id: search2BSML.PLS,v 1.2 2005/04/26 15:03:53 jason Exp $
+
+# Author:      Jason Stajich <jason-at-bioperl-dot-org>
+# Description: Turn SearchIO parseable report(s) into a GFF report
+#
+=head1 NAME
+
+search2bsml - Turn SearchIO parseable reports(s) into a BSML report
+
+=head1 SYNOPSIS
+
+Usage:
+  search2bsml [-o outputfile] [-f reportformat] [-i inputfilename]  OR file1 file2 ..
+
+=head1 DESCRIPTION
+
+This script will turn a protein Search report (BLASTP, FASTP, SSEARCH, 
+AXT, WABA, SIM4) into a BSML File.
+
+The options are:
+
+   -i infilename        - (optional) inputfilename, will read
+                          either ARGV files or from STDIN
+   -o filename          - the output filename [default STDOUT]
+   -f format            - search result format (blast, fasta,waba,axt)
+                          (ssearch is fasta format). default is blast.
+   -h                   - this help menu
+
+Additionally specify the filenames you want to process on the
+command-line.  If no files are specified then STDIN input is assumed.
+You specify this by doing: search2gff E<lt> file1 file2 file3
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Getopt::Long;
+use Bio::SearchIO;
+
+my ($output,$input,$format,$type,$help,$cutoff);
+$format = 'blast'; # by default
+GetOptions(
+	   'i|input:s'  => \$input,
+	   'o|output:s' => \$output,
+	   'f|format:s' => \$format,
+	   'c|cutoff:s' => \$cutoff,
+	   'h|help'     => sub{ exec('perldoc',$0);
+				exit(0)
+				},
+	   );
+# if no input is provided STDIN will be used
+my $parser = new Bio::SearchIO(-format => $format, 
+			       -file   => $input);
+
+my $out;
+if( defined $output ) {
+    $out = new Bio::SearchIO(-file => ">$output",
+			     -output_format => 'BSMLResultWriter');
+} else { 
+    $out = new Bio::SearchIO(-output_format => 'BSMLResultWriter'); # STDOUT
+}
+
+while( my $result = $parser->next_result ) {
+    $out->write_result($result);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2alnblocks.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2alnblocks.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2alnblocks.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+#!perl -w
+# $Id: search2alnblocks.PLS,v 1.3 2005/04/26 15:03:53 jason Exp $
+# Author:      Jason Stajich <jason-at-bioperl-dot-org>
+# Description: Turn SearchIO parseable report(s) into a set of Aligned blocks
+#
+
+=head1 NAME
+
+search2alnblocks - Turn SearchIO parseable reports(s) into a set of aligned blocks
+
+=head1 SYNOPSIS
+
+  search2alnblocks --minid PERCENTID --minlen LEN --minevalue EVALUE file1.
+  blast file2.blast ...> out.fas
+
+=head1 DESCRIPTION
+
+This script will parse and filter BLAST (or other formats
+L<Bio::SearchIO> can parse) output and format the alignment as blocks
+of alignments based on the HSPs.  Note this can only work if the input
+file parsed contains the necessary.
+
+Typically this can be used to turn BLAST output into a FASTA alignment format for input into the QRNA comparative gene finder for RNA genes (E.Rivas).
+
+=head1 OPTIONS
+
+ --maxevalue   Maximum E-value for an HSP
+ --minevalue   Minimum E-value for an HSP 
+ --minlen      Minimum length of an HSP [default 0] 
+ --maxid       Maximum Percent Id [default 100]
+               (to help remove sequences which are really close)
+ --minid       Minimum Percent Identity for an HSP [default 0]
+ -i/--input    An optional input filename (expects input on STDIN by default)
+ -o/--output   An optional output filename (exports to STDOUT by default)
+ -f/--format   Specify a different Search Alignment format- 
+               {fasta, axt, waba, blast, blastxml} are all permitted
+               although the format must have actual alignment 
+               sequence for this script to work
+               See L<Bio::SearchIO> for more information.
+ -of/--outformat Output format for the alignment blocks, anything
+               L<Bio::AlignIO> supports.
+ -v/--verbose  Turn on debugging
+
+=head1 AUTHOR - Jason Stajich
+
+Jason Stajich, jason-at-bioperl-dot-org.
+
+=cut
+
+
+use strict;
+
+use Bio::SearchIO;
+use Bio::AlignIO;
+use Math::BigFloat;
+use Getopt::Long;
+
+my $Usage = 'search2alnblocks --minid PERCENTID --minlen LEN --maxevalue EVALUE file1.blast file2.blast ... > blocks.fas';
+
+my ($min_id,$min_len,$max_id,$max_len,$max_evalue,$min_evalue,$format,
+    $outformat,$verbose,$input,$output);
+$min_id  = 0;
+$max_evalue = 0;
+$min_evalue = undef;
+$min_len = 0;
+$format = 'blast';
+$outformat= 'fasta';
+GetOptions(
+	   'minid:s'      => \$min_id,
+	   'maxid:s'      => \$max_id,
+	   'minlen:s'     => \$min_len,
+	   'maxlen:s'     => \$max_len,
+	   'minevalue:s'  => \$min_evalue,
+	   'maxevalue:s'  => \$max_evalue,
+	   'f|format:s'   => \$format,
+	   'i|input:s'    => \$input,
+	   'o|output:s'   => \$output,
+	   'of|oformat:s' => \$outformat,
+	   'v|verbose'    => \$verbose,
+	   'h|help'       => sub { system('perldoc', $0);
+				   exit(0) },
+	   );
+$max_evalue =~ s/^e/1e/;
+
+# if $input is undef then will read from STDIN
+my $parser =  new Bio::SearchIO(-format => $format,
+				-file   => $input,
+				-verbose=> $verbose);
+my $out;
+
+if( $output ) {
+    $out = new Bio::AlignIO(-format => $outformat,
+			    -file   => ">$output");
+} else { 
+    $out = new Bio::AlignIO(-format => $outformat);
+}
+
+my $id = 1;
+while( my $r = $parser->next_result ) {
+    while( my $hit = $r->next_hit ) {
+	while( my $hsp = $hit->next_hsp ) {
+	    my $hsplen = $hsp->length('total');
+	    next if( $min_len && $hsplen < $min_len);
+	    my $pid = $hsp->percent_identity;
+	    next if( ($min_id && $pid < $min_id) || 
+		     ($max_id && $pid > $max_id ) );
+	    next if( defined $min_evalue && 
+		     $hsp->evalue > $min_evalue ); 
+	    next if( $max_evalue && 
+		     $hsp->evalue < $max_evalue);	    
+	    $verbose && $hsp->verbose($verbose);	    
+	    my $aln = $hsp->get_aln();
+	    my @seqs;
+	    foreach my $seq ( $aln->each_seq ) {
+		if( $seq->display_id =~ /(\S+)[\/\_](\d+)\-(\d+)/ ) {
+		    $seq->display_id($1);
+		    $seq->start($seq->start + $2 - 1);
+		    $seq->end($seq->end + $2 - 1);
+		}
+		$seq->description(sprintf("PID=%.2f LEN=%d HSP=%d",
+					  $pid,$hsplen,$id));
+		push @seqs, $seq;
+	    }
+	    $aln = new Bio::SimpleAlign();
+	    $aln->add_seq(shift @seqs);
+	    $aln->add_seq(shift @seqs);
+	    
+	    $id++;
+	    $out->write_aln($aln);
+	}
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2gff.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2gff.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2gff.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,332 @@
+#!perl -w
+# $Id: search2gff.PLS,v 1.14 2006/01/19 22:56:02 lapp Exp $
+
+# Author:      Jason Stajich <jason-at-bioperl-dot-org>
+# Description: Turn SearchIO parseable report(s) into a GFF report
+#
+=head1 NAME
+
+search2gff - Turn SearchIO parseable reports(s) into a GFF report
+
+=head1 SYNOPSIS
+
+Usage:
+  search2gff [-o outputfile] [-f reportformat] [-i inputfilename]  OR file1 file2 ..
+
+=head1 DESCRIPTION
+
+This script will turn a protein Search report (BLASTP, FASTP, SSEARCH, 
+AXT, WABA) into a GFF File.
+
+The options are:
+
+   -i infilename      - (optional) inputfilename, will read
+                        either ARGV files or from STDIN
+   -o filename        - the output filename [default STDOUT]
+   -f format          - search result format (blast, fasta,waba,axt)
+                        (ssearch is fasta format). default is blast.
+   -t/--type seqtype  - if you want to see query or hit information
+                        in the GFF report
+   -s/--source        - specify the source (will be algorithm name
+                        otherwise like BLASTN)
+   --method           - the method tag (primary_tag) of the features
+                        (default is similarity)
+   --scorefunc        - a string or a file that when parsed evaluates
+                        to a closure which will be passed a feature
+                        object and that returns the score to be printed
+   --locfunc          - a string or a file that when parsed evaluates
+                        to a closure which will be passed two
+                        features, query and hit, and returns the
+                        location (Bio::LocationI compliant) for the
+                        GFF3 feature created for each HSP; the closure
+                        may use the clone_loc() and create_loc()
+                        functions for convenience, see their PODs
+   --onehsp           - only print the first HSP feature for each hit
+   -p/--parent        - the parent to which HSP features should refer
+                        if not the name of the hit or query (depending
+                        on --type)
+   --target/--notarget - whether to always add the Target tag or not
+   -h                 - this help menu
+   --version          - GFF version to use (put a 3 here to use gff 3)
+   --component        - generate GFF component fields (chromosome)
+   -m/--match         - generate a 'match' line which is a container
+                        of all the similarity HSPs
+   --addid            - add ID tag in the absence of --match
+   -c/--cutoff        - specify an evalue cutoff
+
+Additionally specify the filenames you want to process on the
+command-line.  If no files are specified then STDIN input is assumed.
+You specify this by doing: search2gff E<lt> file1 file2 file3
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=head1 Contributors
+
+Hilmar Lapp, hlapp-at-gmx-dot-net
+
+=cut
+
+use strict;
+use Bio::Tools::GFF;
+use Getopt::Long;
+use Bio::SearchIO;
+use Bio::Location::Simple; # pre-declare to simplify $locfunc implementations
+use Bio::Location::Atomic; # pre-declare to simplify $locfunc implementations
+use Storable qw(dclone);   # for cloning location objects
+use Bio::Factory::FTLocationFactory;
+
+my ($output,       # output file (if not stdout)
+    $input,        # name of the input file
+    $format,       # format of the input file, defauly is blast
+    $type,         # 'query' or 'hit' 
+    $cutoff,       # cut-off value for e-value filter
+    $sourcetag,    # explicit source tag (will be taken from program
+                   # otherwise
+    $methodtag,    # primary tag (a.k.a. method), default 'similarity'
+    $gffver,       # GFF version (dialect) to write  
+    $scorefunc,    # closure returning the score for a passed feature
+    $locfunc,      # closure returning a location object for a passed
+                   # query and hit feature
+    $addid,        # flag: whether to always add the ID for $match == 0
+    $parent,       # the name of the parent to use; if set and $match == 0
+                   # will always add the target
+    $comp,         # flag: whether to print a component feature
+    $addtarget,    # flag: whether to always add the Target tag, default
+                   # is true
+    $match,        # flag: whether to print match lines as containers
+    $onehsp,       # flag: whether to consider only the first HSP for a hit
+    $quiet,        # flag: run quietly
+    $help);        # flag: show help screen
+
+# set defaults:
+$format    = 'blast'; 
+$type      = 'query';
+$gffver    = 2;
+$methodtag = "similarity";
+$addtarget = 1;
+
+GetOptions(
+	   'i|input:s'  => \$input,
+	   'component'  => \$comp,
+	   'm|match'    => \$match,
+	   'o|output:s' => \$output,
+	   'f|format:s' => \$format,
+	   's|source:s' => \$sourcetag,
+           'method=s'   => \$methodtag,
+           'addid'      => \$addid,
+           'scorefunc=s'=> \$scorefunc,
+           'locfunc=s'  => \$locfunc,
+           'p|parent=s' => \$parent,
+           'target!'    => \$addtarget,
+           'onehsp'     => \$onehsp,
+	   't|type:s'   => \$type,
+	   'c|cutoff:s' => \$cutoff,
+	   'v|version:i'=> \$gffver,
+           'q|quiet'    => \$quiet,
+	   'h|help'     => sub{ exec('perldoc',$0);
+				exit(0)
+				},
+	   );
+$type = lc($type);
+if( $type =~ /target/ ) { $type = 'hit' }
+elsif( $type ne 'query' && $type ne 'hit' ) {
+    die("seqtype must be either 'query' or 'hit'");
+}
+
+# custom or default function returning the score
+$scorefunc = defined($scorefunc) ? parse_code($scorefunc) : sub {shift->score};
+
+# custom or default function returning the location
+$locfunc = defined($locfunc) ? parse_code($locfunc) : sub { shift->location };
+
+# if --match is given then $addid needs to be disabled
+$addid = undef if $addid && $match;
+
+# if no input is provided STDIN will be used
+my $parser = new Bio::SearchIO(-format => $format,
+			       -verbose => $quiet ? -1 : 0,
+			       -file   => $input);
+
+my $out;
+if( defined $output ) {
+    $out = new Bio::Tools::GFF(-gff_version => $gffver,
+			       -file => ">$output");
+} else { 
+    $out = new Bio::Tools::GFF(-gff_version => $gffver); # STDOUT
+}
+my (%seen_hit,%seen);
+my $other = $type eq 'query' ? 'hit' : 'query';
+
+while( my $result = $parser->next_result ) {
+    my $qname = $result->query_name;
+    if ( $comp && $type eq 'query' && 
+	 $result->query_length ) {
+	$out->write_feature(Bio::SeqFeature::Generic->new
+			    (-start       => 1,
+			     -end         => $result->query_length,
+			     -seq_id      => $qname,
+			     -source_tag  => 'chromosome',
+			     -primary_tag => 'Component',
+			     -tag         => {
+				 'Sequence' => $qname
+				 }));
+    }
+    while( my $hit = $result->next_hit ) {
+	next if( defined $cutoff && $hit->significance > $cutoff);
+	my $acc = $qname;
+	if( $seen{$qname."-".$hit->name}++ ) {
+	    $acc = $qname."-".$seen{$qname.'-'.$hit->name};
+	}
+	
+	if( $comp && $type eq 'hit' && $hit->length &&
+	    ! $seen_hit{$hit->name}++ ) {
+	    $out->write_feature(Bio::SeqFeature::Generic->new
+				(-start       => 1,
+				 -end         => $hit->length,
+				 -seq_id      => $hit->name,
+				 -source_tag  => 'chromosome',
+				 -primary_tag => 'Component',
+				 -tag         => {
+				     'Sequence' => $hit->name
+				     }));
+	}
+	my (%min,%max,$seqid,$name,$st);
+	while( my $hsp = $hit->next_hsp ) {
+	    my $feature = new Bio::SeqFeature::Generic;
+	    my ($proxyfor,$otherf);
+	    if( $type eq 'query' ) {
+		($proxyfor,$otherf) = ($hsp->query,
+				      $hsp->hit);
+		$name  ||= $hit->name;
+	    } else {
+		($otherf,$proxyfor) = ($hsp->query,
+				      $hsp->hit);
+		$name ||= $acc;
+	    }
+	    $proxyfor->score($hit->bits) unless( $proxyfor->score );
+	    if (($gffver == 3) && ($match || $parent)) {
+		$feature->add_tag_value('Parent', $parent || $name);
+	    }
+	    
+	    $min{$type} = $proxyfor->start 
+                unless defined $min{$type} && $min{$type} < $proxyfor->start;
+	    $max{$type} = $proxyfor->end 
+                unless defined $max{$type} && $max{$type} > $proxyfor->end;
+	    $min{$other} = $otherf->start 
+                unless defined $min{$other} && $min{$other} < $otherf->start;
+	    $max{$other} = $otherf->end 
+                unless defined $max{$other} && $max{$other} > $otherf->end;
+	    if ($addtarget || $match) {
+                $feature->add_tag_value('Target', 'Sequence:'.$name);
+                $feature->add_tag_value('Target', $otherf->start);
+                $feature->add_tag_value('Target', $otherf->end);
+            }
+            if ($addid) {
+                $feature->add_tag_value('ID', $name);
+            }
+
+	    $feature->location(&$locfunc($proxyfor,$otherf));
+	    #  strand for feature is always going to be product of
+	    #  query & hit strands so that target can always be just
+	    #  '+'
+	    $feature->strand ( $proxyfor->strand * $otherf->strand);
+	    if( $sourcetag ) { 
+		$feature->source_tag($sourcetag);
+	    } else {
+		$feature->source_tag($proxyfor->source_tag);
+	    }
+	    $feature->score(&$scorefunc($proxyfor));
+	    $feature->frame($proxyfor->frame);
+	    $feature->seq_id($proxyfor->seq_id );
+	    $feature->primary_tag($methodtag);
+            # add annotation if encoded in the query description
+            my $desc = $result->query_description;
+            while ($desc =~ /\/([^=]+)=(\S+)/g) {
+                $feature->add_tag_value($1,$2);
+            }
+	    $seqid ||= $proxyfor->seq_id;
+	    $out->write_feature($feature);
+	    $st ||= $sourcetag || $proxyfor->source_tag;
+            last if $onehsp;
+	}
+	if( $match ) { 
+	    
+	    my $matchf = Bio::SeqFeature::Generic->new
+		(-start => $min{$type},
+		 -end   => $max{$type},
+		 -strand=> $hit->strand($type)*$hit->strand($other),
+		 -primary_tag => 'match',
+		 -source_tag  => $st,
+		 -score => $hit->bits,
+		 -seq_id => $seqid);
+	    if( $gffver == 3 ) { 
+		$matchf->add_tag_value('ID', $name);
+	    }
+	    $matchf->add_tag_value('Target', "Sequence:$name");
+	    $out->write_feature($matchf);
+	}
+    }
+}
+
+sub parse_code{
+    my $src = shift;
+    my $code;
+
+    # file or subroutine?
+    if(-r $src) {
+        if(! (($code = do $src) && (ref($code) eq "CODE"))) {
+            die "error in parsing code block $src: $@" if $@;
+            die "unable to read file $src: $!" if $!;
+            die "failed to run $src, or it failed to return a closure";
+        }
+    } else {
+        $code = eval $src;
+        die "error in parsing code block \"$src\": $@" if $@;
+        die "\"$src\" fails to return a closure"
+            unless ref($code) eq "CODE";
+    }
+    return $code;
+}
+
+=head2 clone_loc
+
+ Title   : clone_loc
+ Usage   : my $l = clone_loc($feature->location);
+ Function: Helper function to simplify the task of cloning locations
+           for --locfunc closures.
+
+           Presently simply implemented using Storable::dclone().
+ Example :
+ Returns : A L<Bio::LocationI> object of the same type and with the
+           same properties as the argument, but physically different.
+           All structured properties will be cloned as well.
+ Args    : A L<Bio::LocationI> compliant object
+
+
+=cut
+
+sub clone_loc{
+    return dclone(shift);
+}
+
+=head2 create_loc
+
+ Title   : create_loc
+ Usage   : my $l = create_loc("10..12");
+ Function: Helper function to simplify the task of creating locations
+           for --locfunc closures. Creates a location from a feature-
+           table formatted string.
+
+ Example :
+ Returns : A L<Bio::LocationI> object representing the location given
+           as formatted string.
+ Args    : A GenBank feature-table formatted string.
+
+
+=cut
+
+sub create_loc{
+    return Bio::Factory::FTLocationFactory->from_string(shift);
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2gff.PLS
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2tribe.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2tribe.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/search2tribe.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+#!perl -w
+# $Id: search2tribe.PLS,v 1.6 2005/04/26 15:03:53 jason Exp $
+
+# Author:      Jason Stajich <jason-at-bioperl-dot-org>
+# Description: Turn SearchIO parseable report(s) into a TRIBE matrix
+#
+=head1 NAME
+
+search2tribe - Turn SearchIO parseable reports(s) into TRIBE matrix
+
+=head1 SYNOPSIS
+
+Usage:
+  search2tribe [-o outputfile] [-f reportformat] [-w/--weight] file1 file2 ..
+
+=head1 DESCRIPTION
+
+This script is probably too slow for most people's uses.  It is better
+to use something like scripts/searchio/fastam9_to_table, -m 9 output
+from BLAST, or the blast2table from the BLAST O'Reilly book to get a
+tabular output from these programs and then feed the table into MCL
+with the mcxdeblast script and the --m9 option.
+
+This script will turn a protein Search report (BLASTP, FASTP, SSEARCH)
+into a Markov Matrix for TribeMCL clustering.
+
+The options are:
+
+   -o filename          - the output filename [default STDOUT]
+   -f format            - search result format (blast, fasta)
+                          (ssearch is fasta format). default is blast.
+   -w or --weight VALUE - Change the default weight for E(0.0) hits
+                          to VALUE (default=200 (i.e. 1e-200) )
+   -h                   - this help menu
+
+Additionally specify the filenames you want to process on the
+command-line.  If no files are specified then STDIN input is assumed.
+You specify this by doing: search2tribe E<lt> file1 file2 file3
+
+=head1 AUTHOR
+
+Jason Stajich, jason-at-bioperl-dot-org
+
+=cut
+
+use strict;
+use Bio::SearchIO;
+use Bio::SearchIO::FastHitEventBuilder; # employ a speedup
+use Getopt::Long;
+use constant DEFAULT_WEIGHT => 200;
+use constant DEFAULT_FORMAT => 'blast';
+
+my ($format, at files,$output,$weight);
+$weight = DEFAULT_WEIGHT; # default weight value
+$format = DEFAULT_FORMAT;
+
+my ($help);
+
+GetOptions(
+	   'f|format:s'    => \$format,
+	   'o|output:s'    => \$output,
+	   'w|weight:i'  => \$weight,
+	   'h|help'        => sub{ exec('perldoc',$0);
+				   exit(0)
+				   },
+	   );
+
+my $outfh;
+if( $output ) { 
+    open($outfh, ">$output") || die("Error opening output file $output. $!");
+} else {
+    $outfh = *STDOUT;
+}
+
+my $parser = new Bio::SearchIO(-format => $format, -fh => \*ARGV);
+
+# Let's throw away HSP events
+$parser->attach_EventHandler(new Bio::SearchIO::FastHitEventBuilder);
+while( my $report = $parser->next_result ) {
+    my $q = $report->query_name;
+    while( my $hit = $report->next_hit ) {
+	my $evalue = $hit->significance;
+	$evalue =~ s/^e/1e/i;
+
+	if( $evalue == 0 ) {	    
+	    $evalue = "1e-$weight";
+	} else { 
+	    $evalue = sprintf("%e",$evalue);
+	}
+
+	print $outfh join("\t",$q,$hit->name, split('e-',$evalue)), "\n"; 
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/scripts/utilities/seq_length.PLS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/scripts/utilities/seq_length.PLS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/scripts/utilities/seq_length.PLS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+#!perl -w
+
+=head1 NAME
+
+seq_length.pl - lists the number of bases and number of sequences in specified sequence database files
+
+=head1 SYNOPSIS
+
+seq_length.pl *.fa
+
+=head1 DESCRIPTION
+
+seq_length.pl will report the total number of residues and total number of individual sequences contained within a specified sequence database file.
+
+=head1 OPTIONS
+
+ -f/--format          - Specify the database format ('fasta' is default).
+                        This script uses SeqIO and as such formats are 
+                        limited to those which SeqIO system supports.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Jason Stajich E<lt>jason at bioperl.orgE<gt>
+
+=cut
+
+use strict;
+use Bio::SeqIO;
+use Getopt::Long;
+
+my $format = 'fasta';
+GetOptions('f|format:s' => \$format);
+
+exec('perldoc',$0) unless @ARGV;
+
+foreach my $f ( @ARGV ) {
+    my $in = new Bio::SeqIO(-file => $f,
+			    -format => $format);
+    my $len = 0;
+    my $count = 0;
+    while( my $seq = $in->next_seq ) {
+	$len += $seq->length();
+	$count++;
+    }
+    
+    printf "%-10s %d bp $count sequences\n",$f,$len;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/AAChange.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AAChange.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AAChange.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,105 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: AAChange.t,v 1.5 2001/08/03 10:11:24 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 25;
+}
+use Bio::Variation::Allele;
+use Bio::Variation::AAChange;
+use Bio::Variation::RNAChange;
+
+my $obj = Bio::Variation::AAChange -> new;
+ok(1);
+ok defined $obj;
+ok ref($obj), 'Bio::Variation::AAChange';
+
+$obj->start(3);           
+ok $obj->start, 3;
+
+$obj->end(3); 
+ok $obj->end, 3;
+
+$obj->length(3);
+
+ok $obj->length, 3;
+
+$obj->strand('1');  
+ok $obj->strand, '1';
+
+ok $obj->primary_tag, 'Variation';
+
+$obj->source_tag('source');
+ok $obj->source_tag, 'source';
+
+$obj->frame(2);   
+ok $obj->frame,2;
+
+$obj->score(2);   
+ok $obj->score, 2;
+
+$obj->isMutation(1); 
+ok $obj->isMutation;
+
+my $a1 = Bio::Variation::Allele->new(-seq => 'V');
+$obj->allele_ori($a1);
+
+ok $obj->allele_ori->seq, 'V';
+
+my $a2 = Bio::Variation::Allele->new('-seq' => 'A');
+$obj->add_Allele($a2);
+
+ok $obj->allele_mut->seq, 'A';
+
+ok $obj->similarity_score, 0;
+
+$obj->upStreamSeq('upStreamSeq'); 
+ok $obj->upStreamSeq, 'upStreamSeq';
+
+$obj->dnStreamSeq('dnStreamSeq'); 
+ok $obj->dnStreamSeq, 'dnStreamSeq' ;
+
+ok $obj->label, 'substitution, conservative';
+
+$obj->status('proven'); 
+ok $obj->status, 'proven';
+
+$obj->proof('experimental'); 
+ok $obj->proof, 'experimental';
+
+$obj->region('region'); 
+ok $obj->region, 'region';
+
+$obj->region_value('region_value'); 
+ok $obj->region_value, 'region_value';
+
+$obj->numbering('coding'); 
+ok $obj->numbering, 'coding';
+
+my $obj2 = Bio::Variation::RNAChange -> new(-start => 7, 
+					  -end => 7,
+					  -cds_end => 100,
+					  -codon_pos => 1,
+					  -upStreamSeq => 'acgcgcgcgc',
+					  -dnStreamSeq => 'acgcgcgcgc'
+					  );
+$obj2->label('missense');
+$obj->RNAChange($obj2);
+
+ok $obj->trivname, 'V3A', "Trivial name is !". $obj->trivname. "!\n";
+
+$obj->mut_number(2);
+ok $obj->mut_number, 2;

Added: trunk/packages/bioperl/branches/upstream/current/t/AAReverseMutate.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AAReverseMutate.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AAReverseMutate.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: AAReverseMutate.t,v 1.4 2001/01/25 22:13:39 jason Exp $
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 16;
+}
+
+use Bio::Variation::AAReverseMutate;
+ok(1);
+
+my $obj = new Bio::Variation::AAReverseMutate
+    ('-aa_ori' => 'F', 
+     '-aa_mut' => 'S'
+     );
+ok defined $obj;
+ok $obj->isa('Bio::Variation::AAReverseMutate');
+
+ok $obj->aa_ori, 'F';
+
+ok $obj->aa_mut, 'S';
+
+my @points = $obj->each_Variant;
+# F>S has two solutions
+ok scalar @points, 2;
+
+$obj->codon_ori('ttc');
+ok defined $obj;
+
+#now there should be only one left
+ at points = $obj->each_Variant;
+ok scalar @points, 1;
+
+$obj->codon_table(3);
+ok $obj->codon_table, 3;
+
+#Check the returned object
+my $rna = pop @points;
+ok $rna->isa('Bio::Variation::RNAChange');
+
+ok $rna->length, 1;
+ok $rna->allele_ori->seq, 't';
+ok $rna->allele_mut->seq, 'c';
+
+
+ok $rna->codon_ori, 'ttc', "Codon_ori is |". $rna->codon_ori. "|";
+
+ok $rna->codon_pos, 2;
+
+$obj->codon_table(11);
+ok $obj->codon_table, 11;

Added: trunk/packages/bioperl/branches/upstream/current/t/AlignIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AlignIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AlignIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,662 @@
+# This is -*-Perl-*- code
+# $Id: AlignIO.t,v 1.63.2.1 2006/11/08 19:25:18 cjfields Exp $
+use strict;
+use Data::Dumper;
+my $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+	eval { require Test::More; };
+	if( $@ ) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	plan tests => 221;
+}
+use_ok('Bio::SimpleAlign');
+use_ok('Bio::AlignIO');
+use_ok('Bio::Root::IO');
+
+END {
+    unlink(Bio::Root::IO->catfile("t","data","testout2.pfam"),
+	   Bio::Root::IO->catfile("t","data","testout.selex"),
+	   Bio::Root::IO->catfile("t","data","testout.pfam"),
+	   Bio::Root::IO->catfile("t","data","testout.msf"),
+	   Bio::Root::IO->catfile("t","data","testout.fasta"),
+	   Bio::Root::IO->catfile("t","data","testout.clustal"),
+	   Bio::Root::IO->catfile("t","data","testout.phylip"),
+	   Bio::Root::IO->catfile("t","data","testout.nexus"),
+	   Bio::Root::IO->catfile("t","data","testout.mega"),
+	   Bio::Root::IO->catfile("t","data","testout.po"),
+	   Bio::Root::IO->catfile("t","data","testout.largemultifasta"),
+       Bio::Root::IO->catfile("t","data","testout.stockholm")
+	  );
+}
+
+my ($str,$aln,$strout,$status);
+
+#PSI format  
+$str  = new Bio::AlignIO (
+    '-file'	=> Bio::Root::IO->catfile("t","data","testaln.psi"),
+    '-format'	=> 'psi');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'QUERY/1-798');
+is($aln->no_sequences, 56);
+
+# STOCKHOLM (multiple concatenated files)
+# Rfam
+$str  = new Bio::AlignIO (
+    '-file'	=> Bio::Root::IO->catfile("t","data","rfam_tests.stk"),
+    '-format'	=> 'stockholm');
+$strout = Bio::AlignIO->new('-file'  => ">".
+		  Bio::Root::IO->catfile("t", "data", "testout.stockholm"),
+		'-format' => 'stockholm', );
+
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'Z11765.1/1-89');
+is($aln->accession, 'RF00006');
+is($aln->id, 'Vault');
+is($aln->description,'Vault RNA');
+# annotation
+my ($ann) = $aln->annotation->get_Annotations('alignment_comment');
+isa_ok($ann, 'Bio::Annotation::Comment');
+is($ann->text,'This family of RNAs are found as part of the enigmatic vault'.
+   ' ribonucleoprotein complex. The complex consists of a major vault protein'.
+   ' (MVP), two minor vault proteins (VPARP and TEP1), and several small '.
+   'untranslated RNA molecules. It has been suggested that the vault complex '.
+   'is involved in drug resistance. We have identified a putative novel vault '.
+   'RNA on chromosome 5 EMBL:AC005219.','Stockholm annotation');
+is($ann->tagname,'alignment_comment','Stockholm annotation');
+
+# test output
+$status = $strout->write_aln($aln);
+is $status, 1, "stockholm output test";
+
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'L43844.1/2-149');
+is($aln->accession, 'RF00007');
+is($aln->id, 'U12');
+is($aln->description,'U12 minor spliceosomal RNA');
+my @anns = $aln->annotation->get_Annotations('reference');
+$ann = shift @anns;
+isa_ok($ann, 'Bio::Annotation::Reference', 'Stockholm annotation');
+$ann = shift @anns;
+is($ann->pubmed,'9149533', 'Stockholm annotation');
+is($ann->title,
+   'Pre-mRNA splicing: the discovery of a new spliceosome doubles the challenge.',
+   'Stockholm annotation');
+is($ann->authors,'Tarn WY, Steitz JA;', 'Stockholm annotation');
+is($ann->location,'Trends Biochem Sci 1997;22:132-137.', 'Stockholm annotation');
+# alignment meta data
+my $meta = $aln->consensus_meta;
+isa_ok($meta, 'Bio::Seq::MetaI');
+my ($name) = $meta->meta_names;
+is($name,'SS_cons', 'Rfam meta data');
+my $meta_str = $meta->named_meta($name);
+is($meta_str, '...<<<<<..........>>>>>........<<<<......<<<<......>>>>>>>>'.
+   '<<<<<.......>>>>>...........<<<<<<<...<<<<<<<.....>>>>>>>.>>>>>>>..<<<'.
+   '<<<<<<.........>>>>>>>>>...', 'Rfam meta data');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'AJ295015.1/58-1');
+is($aln->accession, 'RF00008');
+is($aln->id, 'Hammerhead_3');
+is($aln->description,'Hammerhead ribozyme (type III)');
+# alignment meta data
+$meta = $aln->consensus_meta;
+isa_ok($meta, 'Bio::Seq::MetaI');
+($name) = $meta->meta_names;
+is($name,'SS_cons', 'Rfam meta data');
+$meta_str = $meta->named_meta($name);
+is($meta_str, '.<<<<<<..<<<<<.........>>>>>.......<<<<.....................'.
+   '...........>>>>...>>>>>>.', 'Rfam meta data');
+
+# STOCKHOLM (Pfam)
+$str  = new Bio::AlignIO (
+    '-file'	=> Bio::Root::IO->catfile("t","data","pfam_tests.stk"),
+    '-format'	=> 'stockholm');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'RAD25_SCHPO/5-240');
+is($aln->accession, 'PF00244.9');
+is($aln->id, '14-3-3');
+is($aln->description,'14-3-3 protein');
+($ann) = $aln->annotation->get_Annotations('gathering_threshold');
+isa_ok($ann, 'Bio::Annotation::SimpleValue');
+is($ann, '25.00 25.00; 25.00 25.00;', 'Pfam annotation');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'COMB_CLOAB/6-235');
+is($aln->accession, 'PF04029.4');
+is($aln->id, '2-ph_phosp');
+is($aln->description,'2-phosphosulpholactate phosphatase');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, 'Y278_HAEIN/174-219');
+is($aln->accession, 'PF03475.3');
+is($aln->id, '3-alpha');
+is($aln->description,'3-alpha domain');
+# alignment meta data
+$meta = $aln->consensus_meta;
+isa_ok($meta, 'Bio::Seq::MetaI');
+my %test_data = ('SA_cons'  => '6000320010013274....3372052026033.108303630350385563',
+                 'SS_cons'  => 'SCBHHHHHHHHHTSCC....CHHHHHHHHTSTT.CCHHHHHHHHHHHHHSSC',
+                 'seq_cons' => 'plTVtclsclhasc......stphLcphLshss.Lupsa+cohpK+lspshs',);
+for my $name ($meta->meta_names) {
+    ok(exists $test_data{$name}, 'Pfam aln meta data');
+    $meta_str = $meta->named_meta($name);
+    is($meta_str, $test_data{$name}, 'Pfam aln meta data');
+}
+%test_data = ();
+# sequence meta data
+%test_data = ('SA'  => '6000320010013274....3372052026033.108303630350385563',
+              'SS'  => 'SCBHHHHHHHHHTSCC....CHHHHHHHHTSTT.CCHHHHHHHHHHHHHSSC');
+for my $seq ($aln->each_seq) {
+    for my $name ($seq->meta_names) {
+        ok(exists $test_data{$name}, 'Pfam seq meta data');
+        is($seq->named_meta($name), $test_data{$name}, 'Pfam seq meta data');
+    }
+}
+
+# PFAM format (no annotation)
+$str = Bio::AlignIO->new(
+	  '-file' => Bio::Root::IO->catfile("t","data","testaln.pfam"));
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246');
+
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout.pfam"), 
+			    '-format' => 'pfam');
+$status = $strout->write_aln($aln);
+is($status, 1, " pfam output test");
+
+# MAF
+$str = Bio::AlignIO->new(
+	  '-file' => Bio::Root::IO->catfile("t","data","humor.maf"));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'NM_006987/1-5000', "maf input test";
+is $aln->get_seq_by_pos(1)->strand, '-';
+
+# MSF
+$str = Bio::AlignIO->new(
+    '-file' => Bio::Root::IO->catfile("t","data","testaln.msf"));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246', "msf input test";
+
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout.msf"), 
+			    '-format' => 'msf');
+$status = $strout->write_aln($aln);
+is $status, 1, "msf output test";
+
+
+# FASTA
+$str = Bio::AlignIO->new(
+		 -file => Bio::Root::IO->catfile("t","data","testaln.fasta"), 
+		 -format => 'fasta');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'AK1H_ECOLI/114-431', 
+  "fasta input test ";
+is ($aln->get_seq_by_pos(1)->description, 'DESCRIPTION HERE', 
+    "fasta input test for description");
+is ($aln->get_seq_by_pos(11)->display_id, 'AK_YEAST',
+    "fasta input test for id");
+
+is ($aln->get_seq_by_pos(2)->end, 318,
+    "fasta input test for end");
+
+is ($aln->get_seq_by_pos(11)->description, 'A COMMENT FOR YEAST', 
+    "fasta input test for description");
+
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout.fasta"), 
+			      '-format' => 'fasta');
+$status = $strout->write_aln($aln);
+is $status, 1,"fasta output test";
+
+my $in = Bio::AlignIO->newFh(
+   '-file'  => Bio::Root::IO->catfile("t","data","testaln.fasta"), 
+			       '-format' => 'fasta');
+my $out = Bio::AlignIO->newFh(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout2.pfam"), 
+				'-format' => 'pfam');
+while ( $aln = <$in>) {
+    is $aln->get_seq_by_pos(1)->get_nse, 'AK1H_ECOLI/114-431',
+     "filehandle input test  ";
+    $status = print $out $aln;
+    last;
+}
+is $status, 1, "filehandle output test";
+
+
+# SELEX
+$str = Bio::AlignIO->new(
+    '-file' => Bio::Root::IO->catfile("t","data","testaln.selex"),
+			   '-format' => 'selex');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'AK1H_ECOLI/114-431', "selex format test ";
+
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout.selex"), 
+			      '-format' => 'selex');
+$status = $strout->write_aln($aln);
+is $status, 1, "selex output test";
+
+
+# MASE
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","testaln.mase"),
+			   '-format' => 'mase');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'AK1H_ECOLI/1-318', "mase input test ";
+
+
+# PRODOM
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","testaln.prodom"),
+			   '-format' => 'prodom');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'P04777/1-33', "prodom input test ";
+
+
+# CLUSTAL
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t","data","testout.clustal"), 
+			      '-format' => 'clustalw');
+$status = $strout->write_aln($aln);
+is $status, 1, "clustalw (.aln) output test";
+undef $strout;
+$str = Bio::AlignIO->new(
+   '-file'=> Bio::Root::IO->catfile("t","data","testout.clustal"), 
+			   '-format' => 'clustalw');
+$aln = $str->next_aln($aln);
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'P04777/1-33', "clustalw (.aln) input test";
+my $io = Bio::AlignIO->new(
+   -file => Bio::Root::IO->catfile("t","data","testaln.aln") );
+$aln = $io->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->consensus_string, "MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSWEPKDLPHRHEQIEA".
+"LAQILVPVLRGETMKIIFCGHHACELGEDRGTKGFVIDELKDVDEDRNGKVDVIEINCEHMDTHYRVLPNIAKLF".
+"DDCTGIGVPMHGGPTDEVTAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTRINTELKRAKVSVIGISND".
+"LKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAFYPGVLDEGVIPLCAALAAREHGDARKALDLLRV".
+"AGEIAEREGASKVTEKHVWKAQEKIEQDMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYI".
+"DLEPLTQRRISDLINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRRLI",
+"clustalw consensus_string test";
+
+# BL2SEQ
+$str = Bio::AlignIO->new(
+   '-file'   => Bio::Root::IO->catfile("t","data","bl2seq.out"),
+			 '-format' => 'bl2seq',
+			 '-report_type' => 'blastp');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(2)->get_nse, 'ALEU_HORVU/60-360', 
+    "BLAST bl2seq format test";
+
+# PHYLIP interleaved
+$str = Bio::AlignIO->new(
+    '-file' => Bio::Root::IO->catfile("t","data","testaln.phylip"),
+    '-format' => 'phylip');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'Homo_sapie/1-45';
+
+$strout = Bio::AlignIO->new(
+    '-file'  => ">".Bio::Root::IO->catfile("t","data","testout.phylip"),
+    '-format' => 'phylip');
+$status = $strout->write_aln($aln);
+is $status, 1, "phylip output test";
+
+
+# METAFASTA
+#print STDERR "Better Metafasta tests needed\n" if $DEBUG;
+$io = Bio::AlignIO->new(-verbose => -1, 
+   -file => Bio::Root::IO->catfile("t","data","testaln.metafasta"));
+$aln = $io->next_aln;
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->consensus_string,'CDEFHIJKLMNOPQRSTUVWXYZ', "consensus_string on metafasta";
+is $aln->symbol_chars,'23',"symbol_chars() using metafasta";
+
+# NEXUS
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","testaln.nexus"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'Homo_sapiens/1-45';
+$strout = Bio::AlignIO->new('-file'  => ">".
+			  Bio::Root::IO->catfile("t", "data", "testout.nexus"),
+			    '-format' => 'nexus', );
+$status = $strout->write_aln($aln);
+is $status, 1, "nexus output test";
+
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","Bird_Ovomucoids.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","basic-ladder.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","Kingdoms_DNA.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+  '-file' => Bio::Root::IO->catfile("t","data","char-interleave.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","Primate_mtDNA.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","char-matrix-spaces.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","SPAN_Family4nl.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","intrablock-comment.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","SPAN_Family7n.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","long-names.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","SPAN_Family8a.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","multiline-intrablock-comment.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+  '-file' => Bio::Root::IO->catfile("t","data","Treebase-chlamy-dna.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+  '-file' => Bio::Root::IO->catfile("t","data","quoted-strings1.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","UnaSmithHIV-both.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+  '-file' => Bio::Root::IO->catfile("t","data","quoted-strings2.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","barns-combined.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","radical-whitespace.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t","data","basic-bush.nex"),
+			  '-format' => 'nexus');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+$str = Bio::AlignIO->new(
+  '-file' => Bio::Root::IO->catfile("t","data","radical-whitespace_02.nex"),
+			  '-format' => 'nexus');
+
+
+# EMBOSS water
+$str = new Bio::AlignIO('-format' => 'emboss',
+		 '-file'   => Bio::Root::IO->catfile("t", "data", 'cysprot.water'));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->score,'501.50');
+is($aln->get_seq_by_pos(1)->get_nse,'PAPA_CARPA/3-342');
+is($aln->get_seq_by_pos(2)->get_nse,'CATL_HUMAN/1-331');
+is(sprintf("%.1f",$aln->overall_percentage_identity),33.8);
+is(sprintf("%.1f",$aln->average_percentage_identity),40.1);
+
+is($aln->get_seq_by_pos(1)->start, 3);
+is($aln->length,364);
+
+
+# EMBOSS needle
+$str = new Bio::AlignIO('-format' => 'emboss',
+	  '-file'   => Bio::Root::IO->catfile("t", "data", 'cysprot.needle'));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->score,'499.50');
+is($aln->get_seq_by_pos(1)->get_nse,'PAPA_CARPA/1-345');
+is($aln->get_seq_by_pos(2)->get_nse,'CATL_HUMAN/1-333');
+
+
+# EMBOSS water 2.2.x
+$str = new Bio::AlignIO('-format' => 'emboss',
+	 '-file'   => Bio::Root::IO->catfile("t", "data", 'cys1_dicdi.water'));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse,'CYS1_DICDI/1-343');
+is($aln->get_seq_by_pos(2)->get_nse,'CYS1_DICDI-1/1-343');
+is($aln->score,'1841.0');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse,'CYS1_DICDI/29-343');
+is($aln->get_seq_by_pos(2)->get_nse,'ALEU_HORVU/61-360');
+
+
+# EMBOSS water 2.2.x sparse needle
+$str = new Bio::AlignIO(-verbose => $DEBUG,
+	  '-format' => 'emboss',
+   	'-file'   => Bio::Root::IO->catfile("t", "data", 'sparsealn.needle'));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->score,'18.0');
+is(sprintf("%.1f",$aln->overall_percentage_identity), 2.1);
+is(sprintf("%.1f",$aln->average_percentage_identity), 38.5);
+is($aln->get_seq_by_pos(1)->length, 238);
+is($aln->length,238);
+is($aln->get_seq_by_pos(1)->get_nse,'KV1K_HUMAN/1-108');
+is($aln->get_seq_by_pos(2)->get_nse,'IF1Y_HUMAN/1-143');
+is($aln->get_seq_by_pos(1)->seq(), 'DIQMTQSPSTLSVSVGDRVTITCEASQTVLSYLNWYQQK'.
+   'PGKAPKLLIYAASSLETGVPSRFSGQGSGTBFTFTISSVZPZBFATYYCQZYLDLPRTFGQGTKVDLKR'.
+   '-'x130);
+is($aln->get_seq_by_pos(2)->seq(), ('-'x94).'PKNKGKGGK-NRRRGKNENESEKRELVFKE'.
+   'DGQEYAQVIKMLGNGRLEALCFDGVKRLCHIRGKLRKKVWINTSDIILVGLRDYQDNKADVILKYNADEAR'.
+   'SLKAYGGLPEHAKINETDTFGPGDDDEIQFDDIGDDDEDIDDI');
+is($aln->is_flush, 1);
+
+
+# MEGA
+$str = new Bio::AlignIO('-format' => 'mega',
+  	'-file'   => Bio::Root::IO->catfile("t","data","hemoglobinA.meg"));
+
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(1)->get_nse,'Human/1-141');
+is($aln->get_seq_by_pos(2)->get_nse,'Horse/1-144');
+$aln->unmatch();
+is($aln->get_seq_by_pos(3)->subseq(1,10), 'V-LSAADKGN');
+
+$strout = new Bio::AlignIO('-format' => 'mega',
+	  '-file'   => ">" .Bio::Root::IO->catfile("t","data","testout.mega"));
+
+$status = $strout->write_aln($aln);
+is $status, 1, "mega output test";
+
+
+# EMBOSS needle
+$str = new Bio::AlignIO('-format' => 'emboss',
+	  '-file'   => Bio::Root::IO->catfile('t','data','gf-s71.needle'));
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(2)->seq(), 'MEDVTLFQFTWRKPI-RLQGEIVYKTSETQTIETNKKDVECVANFQENKEVQTDS-VDNGVGENVKKDITISKEVLNLLYDFVRDDSKVNYDRLLEFHKFDKVALETVQKYHVETRNENIILMISSSSRKTLILFGGISHETFCSHQARALLCSSSTSFSIPLPVCAISAVFYSSTQFILGDVSGNISMCSKDKIIFEKKITDGAVTCLEMCRHGLLSGSDDGNIILWQIGTSGLEKLGGTKLTVSDLSRKIRRSSTSNKPVAIVSMQVYVWPSGEEACVATETGGLYLLTLPTLDYKPLSHQTATSINKILFENQFVAVIYHTSNAAVFNSEGLVDEIPFVATLAVR----------PKLVLF--YTSVCVQDITLNCTSPFREFNNEYNPVIKFSKIRFSADLSVING-FRTSSPNSNN-----------------------------------------------');
+is($aln->get_seq_by_pos(1)->seq(), 'MEDVTLHHFRWRKPVENKNGEIVYKTSETQTAEISRKDVECVANFQKSQESQTDDFMQNGVGDGIKKEIRISKEVLGHIYDFLRDDSKVNYDRLLEFHKFDKVSLETVQKYHVETRNENIILMISNSSRKTLILFGGLSHETFCSHQARAVLCSSSTTSSLPLPVCAISAVFYSSTQFLLGDISGNISMWTKEKMIFENKVTDGSVTSLELCRYGLLSGSDDGNVILWKVEESKIEKIEGIKLTVSDLSRKIRRSSTSNKPVAIVSMQV----SGDEVCVATETGGLYLLTLPTLESKPLT-QSATSIFKILYEHPYIAVVYHTSNSAIFNSEGLVDEIPFVATLAVRCGAYFIFSNQSRLIIWSMNTRSTVIDENLNCHS-ICSLSND--------------TLQVLDGDFNLNSQSENSATSESENLRISDLQNLRMLKLQNLRTSEFQNFRTSESQYFKKDNGEL');
+is($aln->is_flush(), 1);
+is($aln->get_seq_by_pos(1)->get_nse,'gf.s71.44/1-448');
+is($aln->get_seq_by_pos(2)->get_nse,'Y50C1A.2/1-406');
+
+
+# PHYLIP sequential/non-interleaved
+$strout = Bio::AlignIO->new('-file'  =>
+			    Bio::Root::IO->catfile("t","data",
+						   "noninterleaved.phy"),
+			    '-format' => 'phylip');
+$aln = $strout->next_aln($aln);
+isa_ok($aln,'Bio::Align::AlignI');
+is($aln->get_seq_by_pos(2)->seq(), 'CCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAATAA'.
+   'AGGTAGGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGACATGAATT'.
+   'TGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGGTTTATCAAAGTAAGACAGTATGATCAGA'.
+   'TACCCATAGAGATCTGTGGACATAAAGCTATAGGTACAGTATTAGTAGGACCCACACCTGTCAATATAATTG'.
+   'GAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTT' );
+
+
+# LARGEMULTIFASTA
+$str = Bio::AlignIO->new(
+   '-file' => Bio::Root::IO->catfile("t", "data","little.largemultifasta"),
+                         '-format' => 'largemultifasta');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'Human:/1-81', "fasta input test ";
+is ($aln->get_seq_by_pos(1)->description,
+    '72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;',
+    "fasta input test for description");
+is ($aln->get_seq_by_pos(3)->display_id, 'Rat:',
+    "fasta input test for id");
+
+is ($aln->get_seq_by_pos(3)->description,
+    '72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;',
+    "fasta input test for description");
+
+$strout = Bio::AlignIO->new(
+   '-file' => ">".Bio::Root::IO->catfile("t", "data",
+                                       "testout.largemultifasta"),
+                            '-format' => 'largemultifasta');
+$status = $strout->write_aln($aln);
+is $status, 1,"fasta output test";
+
+
+# POA
+# just skip on perl 5.6.0 and earlier as it causes a crash on 
+# default perl with OS X 10.2
+# fink perl 5.6.0 does not seem to have the problem
+# can't figure out what it is so just skip for now
+if( $^O ne 'darwin' || $] > 5.006 ) {
+	$str = new Bio::AlignIO(
+			  -file   => Bio::Root::IO->catfile("t", "data", "testaln.po"),
+			  -format => 'po',
+			  );
+	isa_ok($str, 'Bio::AlignIO');
+	$aln = $str->next_aln();
+	isa_ok($aln,'Bio::Align::AlignI');
+	is $aln->no_sequences, 6;
+
+# output is? i.e. does conversion from clustalw to po give the same alignment?
+	$str = new Bio::AlignIO(
+		  '-file'   => Bio::Root::IO->catfile("t", "data", "testaln.aln"),
+		  '-format' => 'clustalw');
+	isa_ok($str,'Bio::AlignIO');
+	$aln = $str->next_aln();
+	isa_ok($aln,'Bio::Align::AlignI');
+	$strout = Bio::AlignIO->new(
+		 '-file'   => ">" . Bio::Root::IO->catfile("t", "data", "testout.po"),
+		 '-format' => 'po');
+	$status = $strout->write_aln($aln);
+	is $status, 1, "po output test";
+
+	$str = new Bio::AlignIO(
+		 '-file'   => Bio::Root::IO->catfile("t", "data", "testaln.po"),
+		 '-format' => 'po');
+	isa_ok($str,'Bio::AlignIO');
+	my $aln2 = $str->next_aln();
+	isa_ok($aln2,'Bio::Align::AlignI');
+	is $aln2->no_sequences, $aln->no_sequences;
+	is $aln2->length, $aln->length;
+} else {
+	for ( 1..14 ) {
+		skip(1,"skipping due to bug in perl 5.6.0 that comes with OS X 10.2");
+	}
+}
+
+
+# MEME
+# this file has no Strand column
+$str = new Bio::AlignIO(
+		-file => Bio::Root::IO->catfile("t", "data", "test.meme"),
+		-format => 'meme');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');is $aln->length,25;
+is $aln->no_sequences,4;
+is $aln->get_seq_by_pos(3)->seq(),"CCTTAAAATAAAATCCCCACCACCA";
+is $aln->get_seq_by_pos(3)->strand,"1";
+
+# this file has a Strand column
+$str = new Bio::AlignIO(
+		-file => Bio::Root::IO->catfile("t", "data", "test.meme2"),
+		-format => 'meme');
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');is $aln->length,20;
+is $aln->no_sequences,8;
+is $aln->get_seq_by_pos(8)->seq(),"CCAGTCTCCCCTGAATACCC";
+is $aln->get_seq_by_pos(7)->strand,"-1";
+is $aln->get_seq_by_pos(6)->strand,"1";

Added: trunk/packages/bioperl/branches/upstream/current/t/AlignStats.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AlignStats.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AlignStats.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: AlignStats.t,v 1.12 2006/06/14 03:50:46 tseemann Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+
+    use Test;
+    plan tests => 36; 
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+my $debug = -1;
+
+use Bio::Align::DNAStatistics;
+use Bio::Align::ProteinStatistics;
+use Bio::AlignIO;
+use Bio::Root::IO;
+
+my $in = new Bio::AlignIO(-format => 'emboss',
+			  -file   => Bio::Root::IO->catfile('t', 'data',
+							    'insulin.water'));
+my $aln = $in->next_aln();
+ok($aln);
+my $stats = new Bio::Align::DNAStatistics(-verbose => $debug);
+ok( $stats->transversions($aln),4);
+ok( $stats->transitions($aln),9);
+ok( $stats->pairwise_stats->number_of_gaps($aln),21);
+ok( $stats->pairwise_stats->number_of_comparable_bases($aln),173);
+ok( $stats->pairwise_stats->number_of_differences($aln),13);
+
+my $d = $stats->distance(-align => $aln,
+			 -method=> 'f81');
+ok(  $d->get_entry('hs_insulin','seq2'), '0.07918');
+
+$d = $stats->distance(-align=> $aln,
+		      -method => 'JC');
+ok( $d->get_entry('hs_insulin','seq2'), '0.07918');
+
+$d = $stats->distance(-align=> $aln,
+		      -method => 'Kimura');
+ok( $d->get_entry('hs_insulin','seq2'), '0.07984');
+
+$d = $stats->distance(-align=> $aln,
+		      -method => 'TajimaNei');
+ok( $d->get_entry('seq2','hs_insulin'), '0.08106');
+
+$d = $stats->distance(-align=> $aln,
+		      -method => 'Tamura');
+ok( $d->get_entry('seq2','hs_insulin'), '0.08037');
+
+#$d =  $stats->distance(-align => $aln,
+#		       -method => 'JinNei');
+#ok( $d->get_entry('seq2','hs_insulin'), 0.0850);
+
+$in = new Bio::AlignIO(-format => 'clustalw',
+		       -file   => Bio::Root::IO->catfile('t','data',
+							 'hs_owlmonkey.aln'));
+
+$aln = $in->next_aln();
+ok($aln);
+
+ok( $stats->transversions($aln),10);
+ok( $stats->transitions($aln),17);
+ok( $stats->pairwise_stats->number_of_gaps($aln),19);
+ok( $stats->pairwise_stats->number_of_comparable_bases($aln),170);
+ok( $stats->pairwise_stats->number_of_differences($aln),27);
+
+# now test the distance calculations
+$d = $stats->distance(-align => $aln, -method => 'jc');
+ok( $d->get_entry('human','owlmonkey'), 0.17847);
+
+$d = $stats->distance(-align => $aln,
+		      -method=> 'f81');
+ok(  $d->get_entry('human','owlmonkey'), '0.17847');
+
+$d = $stats->distance(-align => $aln, -method => 'uncorrected');
+ok( $d->get_entry('human','owlmonkey'), 0.15882);
+
+$d =  $stats->distance(-align => $aln, -method => 'Kimura');
+ok( $d->get_entry('human','owlmonkey'), 0.18105);
+
+$d =  $stats->distance(-align => $aln, -method => 'TajimaNei');
+ok( $d->get_entry('human','owlmonkey'), 0.18489);
+
+$d =  $stats->distance(-align => $aln,
+		       -method => 'Tamura');
+
+ok( $d->get_entry('human','owlmonkey'), 0.18333);
+#$d =  $stats->distance(-align => $aln,
+#		       -method => 'JinNei');
+#ok( $d->get_entry('human','owlmonkey'), 0.2079);
+
+### now test Nei_gojobori methods ##
+$in = Bio::AlignIO->new(-format => 'fasta',
+			-file   => Bio::Root::IO->catfile('t','data',
+							  'nei_gojobori_test.aln'));
+my $alnobj = $in->next_aln();
+ok($alnobj);
+my $result = $stats->calc_KaKs_pair($alnobj, 'seq1', 'seq2');
+ok (sprintf ("%.1f", $result->[0]{'S'}), 40.5);
+ok (sprintf ("%.1f", $result->[0]{'z_score'}), '4.5');
+$result = $stats->calc_all_KaKs_pairs($alnobj);
+ok (int( $result->[1]{'S'}), 41);
+ok (int( $result->[1]{'z_score'}), 4);
+$result = $stats->calc_average_KaKs($alnobj, 100);
+ok (sprintf ("%.4f", $result->{'D_n'}), 0.1628);
+
+
+# now test Protein Distances
+my $pstats = Bio::Align::ProteinStatistics->new();
+$in = Bio::AlignIO->new(-format => 'clustalw',
+			-file   => Bio::Root::IO->catfile('t','data',
+							  'testaln.aln'));
+$alnobj = $in->next_aln();
+ok($alnobj);
+$result = $pstats->distance(-method => 'Kimura',
+			    -align  => $alnobj);
+ok($result);
+
+ok ($result->get_entry('P84139','P814153'),   '0.01443');
+ok ($result->get_entry('P841414','P851414'),  '0.01686');
+ok ($result->get_entry('P84139','P851414'),   '3.58352');
+
+my $seq = Bio::Seq->new(-id=>'NOT3MUL', -seq=>'gatac');
+ok($seq);
+eval { 
+  Bio::Align::DNAStatistics->count_syn_sites($seq); 
+};
+ok($@ =~ m/not integral number of codons/);
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/AlignUtil.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AlignUtil.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AlignUtil.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,63 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: AlignUtil.t,v 1.2 2004/01/24 20:16:08 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+
+    use Test;
+    plan tests => 16; 
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+my $debug = -1;
+
+use Bio::Align::Utilities qw(aa_to_dna_aln bootstrap_replicates);
+use Bio::AlignIO;
+use Bio::Root::IO;
+use Bio::SeqIO;
+
+my $in = new Bio::AlignIO(-format => 'clustalw',
+			  -file   => Bio::Root::IO->catfile
+			  ('t','data','pep-266.aln'));
+my $aln = $in->next_aln();
+ok($aln);
+$in->close();
+
+my $seqin = new Bio::SeqIO(-format => 'fasta',
+			   -file   => Bio::Root::IO->catfile
+			   ('t','data','cds-266.fas'));
+# get the cds sequences
+my %cds_seq;
+while( my $seq = $seqin->next_seq ) {
+    $cds_seq{$seq->display_id} = $seq;
+}
+
+my $cds_aln = &aa_to_dna_aln($aln,\%cds_seq);
+
+my @aa_seqs = $aln->each_seq;
+
+for my $cdsseq ( $cds_aln->each_seq ) {
+    my $peptrans = $cdsseq->translate();
+    my $aaseq = shift @aa_seqs;
+    ok($peptrans->seq(),$aaseq->seq());
+}
+
+my $bootstraps = &bootstrap_replicates($aln,10);
+
+ok(scalar @$bootstraps, 10);

Added: trunk/packages/bioperl/branches/upstream/current/t/Allele.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Allele.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Allele.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,52 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Allele.t,v 1.6 2001/10/22 08:22:58 heikki Exp $
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 15 }
+
+use Bio::Variation::Allele;
+
+ok(1);
+
+my($a,$trunc,$rev);
+
+$a = Bio::Variation::Allele->new(-seq=>'ACTGACTGACTG',
+			-display_id => 'new-id',
+			-alphabet => 'dna',
+			-accession_number => 'X677667',
+                        -desc=>'Sample Bio::Seq object');
+ok defined $a,
+ok ref($a), 'Bio::Variation::Allele';
+
+ok $a->accession_number(), 'X677667';
+ok $a->seq(), 'ACTGACTGACTG';
+ok $a->display_id(),'new-id' ;
+ok $a->desc, 'Sample Bio::Seq object';
+ok $a->alphabet(), 'dna';
+
+ok defined($trunc = $a->trunc(1,4));
+ok $trunc->seq(), 'ACTG', "Expecting ACTG. Got ". $trunc->seq();
+
+ok defined($rev = $a->revcom());
+ok $rev->seq(), 'CAGTCAGTCAGT';
+
+$a->is_reference(1);
+ok $a->is_reference;
+
+$a->repeat_unit('ACTG');
+ok $a->repeat_unit, 'ACTG';
+
+$a->repeat_count(3);
+ok $a->repeat_count, 3;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Alphabet.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Alphabet.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Alphabet.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Alphabet.t,v 1.4 2006/08/16 22:25:59 cjfields Exp $
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 96;
+}
+
+use Bio::Symbol::Alphabet;
+use Bio::Symbol::Symbol;
+use Bio::Symbol::DNAAlphabet;
+use Bio::Symbol::ProteinAlphabet;
+
+my $A = new Bio::Symbol::Symbol(-token => 'A' );
+my $U = new Bio::Symbol::Symbol(-token => 'U' );
+my $G = new Bio::Symbol::Symbol(-token => 'G' );
+my $T = new Bio::Symbol::Symbol(-token => 'T' );
+
+my $rna = new Bio::Symbol::Alphabet( -symbols => [ $A, $U, $G, $T ] );
+				     
+ok($rna);
+my @symbols = $rna->symbols;
+ok(scalar @symbols, 4);
+
+ok($rna->contains($A));
+ok($rna->contains($T));
+ok($rna->contains($U));
+ok($rna->contains($G));
+
+
+my $dna = new Bio::Symbol::DNAAlphabet();
+ok($dna->isa('Bio::Symbol::AlphabetI'));
+my $count = 0;
+
+my @dnasymbols = sort qw( A B C D G H K M N R S T U V W X Y );
+foreach my $s ( sort { $a->name cmp $b->name } $dna->symbols ) {
+    ok($s->name, $dnasymbols[$count]);    
+    ok($s->token, $dnasymbols[$count++]);    
+}
+
+my $prot = new Bio::Symbol::ProteinAlphabet();
+ok($prot->isa('Bio::Symbol::AlphabetI'));
+
+my @protsymbols = sort qw( * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
+my %h = (
+	 'Ala' => 'A',     'Asx' => 'B',     'Cys' => 'C',     'Asp' => 'D',
+	 'Glu' => 'E',     'Phe' => 'F',     'Gly' => 'G',     'His' => 'H',
+	 'Ile' => 'I',     'Lys' => 'K',     'Leu' => 'L',     'Met' => 'M',
+	 'Asn' => 'N',     'Pro' => 'P',     'Gln' => 'Q',     'Arg' => 'R',
+	 'Ser' => 'S',     'Thr' => 'T',     'Val' => 'V',     'Trp' => 'W',
+	 'Xaa' => 'X',     'Tyr' => 'Y',     'Glx' => 'Z',     'Ter' => '*',     
+	 'Sec' => 'U',     'Pyl' => 'O',     'Xle' => 'J',     
+	 );
+my @protnms = sort { $h{$a} cmp $h{$b} } keys %h;
+$count = 0;
+foreach my $s ( sort { $a->token cmp $b->token } $prot->symbols ) {
+    ok($s->name, $protnms[$count]);
+    ok($s->token, $protsymbols[$count++]);    
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Annotation.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Annotation.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Annotation.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,239 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Annotation.t,v 1.18.4.1 2006/11/30 09:24:00 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $DEBUG $NUMTESTS);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) { 
+	use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => ($NUMTESTS = 101);
+	use_ok('Bio::Annotation::Collection');
+	use_ok('Bio::Annotation::DBLink');
+	use_ok('Bio::Annotation::Comment');
+	use_ok('Bio::Annotation::Reference');
+	use_ok('Bio::Annotation::SimpleValue');
+	use_ok('Bio::Annotation::Target');
+	use_ok('Bio::Annotation::AnnotationFactory');
+	use_ok('Bio::Annotation::StructuredValue');
+	use_ok('Bio::Seq');
+	use_ok('Bio::SeqFeature::Generic');
+	use_ok('Bio::SimpleAlign');
+	use_ok('Bio::Cluster::UniGene');
+}
+
+#simple value
+
+my $simple = Bio::Annotation::SimpleValue->new(
+						  -tagname => 'colour',
+						  -value   => '1'
+						 ), ;
+
+isa_ok($simple, 'Bio::AnnotationI');
+is $simple, 1;
+is $simple->value, 1;
+is $simple->tagname, 'colour';
+
+is $simple->value(0), 0;
+is $simple->value, 0;
+is $simple, 0;
+
+# link
+
+my $link1 = new Bio::Annotation::DBLink(-database => 'TSC',
+					-primary_id => 'TSC0000030'
+					);
+isa_ok($link1,'Bio::AnnotationI');
+is $link1->database(), 'TSC';
+is $link1->primary_id(), 'TSC0000030';
+is $link1->as_text, 'Direct database link to TSC0000030 in database TSC';
+my $ac = Bio::Annotation::Collection->new();
+isa_ok($ac,'Bio::AnnotationCollectionI');
+
+$ac->add_Annotation('dblink',$link1);
+$ac->add_Annotation('dblink',
+		    Bio::Annotation::DBLink->new(-database => 'TSC',
+						 -primary_id => 'HUM_FABV'));
+
+my $comment = Bio::Annotation::Comment->new( '-text' => 'sometext');
+is $comment->text, 'sometext';
+is $comment->as_text, 'Comment: sometext';
+$ac->add_Annotation('comment', $comment);
+
+
+
+my $target = new Bio::Annotation::Target(-target_id  => 'F321966.1',
+                                         -start      => 1,
+                                         -end        => 200,
+                                         -strand     => 1,
+					 );
+isa_ok($target,'Bio::AnnotationI');
+ok $ac->add_Annotation('target', $target);
+
+
+my $ref = Bio::Annotation::Reference->new( '-authors' => 'author line',
+					   '-title'   => 'title line',
+					   '-location'=> 'location line',
+					   '-start'   => 12);
+isa_ok($ref,'Bio::AnnotationI');
+is $ref->authors, 'author line';
+is $ref->title,  'title line';
+is $ref->location, 'location line';
+is $ref->start, 12;
+is $ref->database, 'MEDLINE';
+is $ref->as_text, 'Reference: title line';
+$ac->add_Annotation('reference', $ref);
+
+
+my $n = 0;
+foreach my $link ( $ac->get_Annotations('dblink') ) {
+    is $link->database, 'TSC';
+    is $link->tagname(), 'dblink';
+    $n++;
+}
+is ($n, 2);
+
+$n = 0;
+my @keys = $ac->get_all_annotation_keys();
+is (scalar(@keys), 4);
+foreach my $ann ( $ac->get_Annotations() ) {
+    shift(@keys) if ($n > 0) && ($ann->tagname ne $keys[0]);
+    is $ann->tagname(), $keys[0];
+    $n++;
+}
+is ($n, 5);
+
+$ac->add_Annotation($link1);
+
+$n = 0;
+foreach my $link ( $ac->get_Annotations('dblink') ) {
+    is $link->tagname(), 'dblink';
+    $n++;
+}
+is ($n, 3);
+
+# annotation of structured simple values (like swissprot''is GN line)
+my $ann = Bio::Annotation::StructuredValue->new();
+isa_ok($ann, "Bio::AnnotationI");
+
+$ann->add_value([-1], "val1");
+is ($ann->value(), "val1");
+$ann->value("compat test");
+is ($ann->value(), "compat test");
+$ann->add_value([-1], "val2");
+is ($ann->value(-joins => [" AND "]), "compat test AND val2");
+$ann->add_value([0], "val1");
+is ($ann->value(-joins => [" AND "]), "val1 AND val2");
+$ann->add_value([-1,-1], "val3", "val4");
+$ann->add_value([-1,-1], "val5", "val6");
+$ann->add_value([-1,-1], "val7");
+ok ($ann->value(-joins => [" AND "]), "val1 AND val2 AND (val3 AND val4) AND (val5 AND val6) AND val7");
+ok ($ann->value(-joins => [" AND ", " OR "]), "val1 AND val2 AND (val3 OR val4) AND (val5 OR val6) AND val7");
+
+$n = 1;
+foreach ($ann->get_all_values()) {
+    is ($_, "val".$n++);
+}
+
+# nested collections
+my $nested_ac = Bio::Annotation::Collection->new();
+$nested_ac->add_Annotation('nested', $ac);
+
+is (scalar($nested_ac->get_Annotations()), 1);
+($ac) = $nested_ac->get_Annotations();
+isa_ok($ac, "Bio::AnnotationCollectionI");
+is (scalar($nested_ac->get_all_Annotations()), 6);
+$nested_ac->add_Annotation('gene names', $ann);
+is (scalar($nested_ac->get_Annotations()), 2);
+is (scalar($nested_ac->get_all_Annotations()), 7);
+is (scalar($nested_ac->get_Annotations('dblink')), 0);
+my @anns = $nested_ac->get_Annotations('gene names');
+isa_ok($anns[0], "Bio::Annotation::StructuredValue");
+ at anns = map { $_->get_Annotations('dblink');
+	  } $nested_ac->get_Annotations('nested');
+is (scalar(@anns), 3);
+is (scalar($nested_ac->flatten_Annotations()), 2);
+is (scalar($nested_ac->get_Annotations()), 7);
+is (scalar($nested_ac->get_all_Annotations()), 7);
+
+SKIP: {
+	eval {require Graph::Directed; 
+	  require Bio::Annotation::OntologyTerm; };
+	skip('Graph::Directed not installed cannot test'.
+		 ' Bio::Annotation::OntologyTerm module',6) if $@;
+	# OntologyTerm annotation
+    my $termann = Bio::Annotation::OntologyTerm->new(-label => 'test case',
+						     -identifier => 'Ann:00001',
+						     -ontology => 'dumpster');
+    isa_ok($termann->term,'Bio::Ontology::Term');
+    is ($termann->term->name, 'test case');
+    is ($termann->term->identifier, 'Ann:00001');
+    is ($termann->tagname, 'dumpster');
+    is ($termann->ontology->name, 'dumpster');
+    is ($termann->as_text, "dumpster|test case|");
+}
+
+# AnnotatableI
+my $seq = Bio::Seq->new();
+isa_ok($seq,"Bio::AnnotatableI");
+my $fea = Bio::SeqFeature::Generic->new();
+isa_ok($fea, "Bio::AnnotatableI");
+my $clu = Bio::Cluster::UniGene->new();
+isa_ok($clu, "Bio::AnnotatableI");
+my $aln = Bio::SimpleAlign->new();
+isa_ok($clu,"Bio::AnnotatableI");
+
+# tests for Bio::Annotation::AnnotationFactory
+
+my $factory = Bio::Annotation::AnnotationFactory->new;
+isa_ok($factory, 'Bio::Factory::ObjectFactoryI');
+
+# defaults to SimpleValue
+$ann = $factory->create_object(-value => 'peroxisome',
+                                  -tagname => 'cellular component');
+like(ref $ann, qr(Bio::Annotation::SimpleValue));
+
+$factory->type('Bio::Annotation::OntologyTerm');
+
+$ann = $factory->create_object(-name => 'peroxisome',
+			       -tagname => 'cellular component');
+ok(defined $ann);
+like(ref($ann), qr(Bio::Annotation::OntologyTerm));
+
+SKIP: {
+	skip("TODO: Create Annotation::Comment based on parameter only",2);
+	ok $ann = $factory->create_object(-text => 'this is a comment');
+	like(ref $ann, qr(Bio::Annotation::Comment));
+}
+
+ok $factory->type('Bio::Annotation::Comment');
+ok $ann = $factory->create_object(-text => 'this is a comment');
+like(ref $ann, qr(Bio::Annotation::Comment));
+
+
+# factory guessing the type: Comment
+$factory = new Bio::Annotation::AnnotationFactory();
+ok $ann = $factory->create_object(-text => 'this is a comment');
+like(ref $ann, qr(Bio::Annotation::Comment));
+
+# factory guessing the type: Target
+$factory = new Bio::Annotation::AnnotationFactory();
+ok $ann = $factory->create_object(-target_id => 'F1234', -start => 1, -end => 10);
+like(ref $ann, qr(Bio::Annotation::Target));
+
+# factory guessing the type: OntologyTerm
+$factory = new Bio::Annotation::AnnotationFactory();
+ok(defined ($ann = $factory->create_object(-name => 'peroxisome',
+					  -tagname => 'cellular component')));
+like(ref $ann, qr(Bio::Annotation::OntologyTerm));

Added: trunk/packages/bioperl/branches/upstream/current/t/AnnotationAdaptor.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/AnnotationAdaptor.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/AnnotationAdaptor.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: AnnotationAdaptor.t,v 1.2 2004/11/24 02:14:06 allenday Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 19;
+}
+
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::AnnotationAdaptor;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Comment;
+use Bio::Annotation::SimpleValue;
+
+ok(1);
+
+my $feat = Bio::SeqFeature::Generic->new();
+$feat->add_tag_value("tag1", "value of tag1");
+$feat->add_tag_value("tag1", "another value of tag1");
+$feat->add_tag_value("tag2", "some value for a tag");
+
+my $link1 = new Bio::Annotation::DBLink(-database => 'TSC',
+                                        -primary_id => 'TSC0000030',
+                                        #cannot use "tag2", already claimed by Bio::Annotation::SimpleValue by above $feat->add_tag_value() call.
+                                        -tagname => "tag3"
+                                       );
+$feat->annotation->add_Annotation($link1);
+
+ok(1);
+
+my $anncoll = Bio::SeqFeature::AnnotationAdaptor->new(-feature => $feat);
+
+ok ($anncoll->get_num_of_annotations(), 4);
+ok (scalar($anncoll->get_all_annotation_keys()), 3);
+
+my @anns = $anncoll->get_Annotations("tag1");
+my @vals = $feat->each_tag_value("tag1");
+
+ok (scalar(@anns), scalar(@vals));
+for(my $i = 0; $i < @anns; $i++) {
+  ok ($anns[$i]->value(), $vals[$i]);
+}
+
+ at anns = $anncoll->get_Annotations("tag3");
+my @fanns = $feat->annotation->get_Annotations("tag3");
+ at vals = $feat->each_tag_value("tag3");
+
+ok (scalar(@fanns), 1);
+ok (scalar(@anns), 1);
+ok (scalar(@vals), 1);
+ok ($anns[0]->primary_id(), $vals[0]);
+
+ok ($anns[0]->primary_id(), $fanns[0]->primary_id());
+
+my $comment = Bio::Annotation::Comment->new( '-text' => 'sometext');
+$anncoll->add_Annotation('comment', $comment);
+
+ at fanns = $feat->annotation->get_Annotations("comment");
+ok (scalar(@fanns), 1);
+ok ($fanns[0]->text(), "sometext");
+
+my $tagval = Bio::Annotation::SimpleValue->new(-value => "boring value",
+					       -tagname => "tag2");
+$anncoll->add_Annotation($tagval);
+
+ at anns = $anncoll->get_Annotations("tag3");
+ at fanns = $feat->annotation->get_Annotations("tag3");
+ at vals = $feat->each_tag_value("tag3");
+
+ok (scalar(@fanns), 1);
+ok (scalar(@anns), 1);
+ok (scalar(@vals), 1);
+ok ($anns[0]->primary_id(), $vals[0]);
+
+ok ($anncoll->get_num_of_annotations(), 6);
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Assembly.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Assembly.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Assembly.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Assembly.t,v 1.7.2.1 2006/10/02 23:10:39 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $error;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 19;
+    plan tests => $NUMTESTS;
+    eval { require DB_File };
+    if( $@ ) {
+		 print STDERR "DB_File not installed. This means the Assembly modules are not available.  Skipping tests.\n";
+		 for( 1..$NUMTESTS ) {
+			 skip("DB_File not installed",1);
+		 }
+		 $error = 1; 
+    }
+}
+
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to run all of the DB tests',1);
+	}
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+#syntax test
+
+require Bio::Assembly::IO;
+require Bio::Assembly::Scaffold;
+require Bio::Assembly::Contig;
+require Bio::Assembly::ContigAnalysis;
+
+use Data::Dumper;
+
+ok 1;
+
+#
+# Testing IO
+#
+
+# -file => ">".Bio::Root::IO->catfile("t","data","primaryseq.embl")
+
+ok my $in = Bio::Assembly::IO->new
+    (-file=>Bio::Root::IO->catfile
+     ("t","data","consed_project","edit_dir","test_project.phrap.out"));
+
+ok my $sc = $in->next_assembly;
+#print Dumper $sc;
+
+#
+# Testing Scaffold
+#
+
+
+ok $sc->id, "NoName";
+ok $sc->id('test'), "test";
+
+ok $sc->annotation;
+skip "no annotations in Annotation collection?", $sc->annotation->get_all_annotation_keys, 0;
+ok $sc->get_nof_contigs, 1;
+ok $sc->get_nof_sequences_in_contigs, 2;
+skip "should return a number", $sc->get_nof_singlets, 0;
+skip $sc->get_seq_ids, 2;
+skip $sc->get_contig_ids, 1;
+skip "nothing to test", $sc->get_singlet_ids;
+
+
+#
+# Testing Contig
+#
+
+#
+# Testing ContigAnalysis
+#
+
+#
+# Testing Ace 
+#
+
+my $aio = Bio::Assembly::IO->new(
+    -file=>Bio::Root::IO->catfile
+     ("t","data","consed_project","edit_dir","test_project.fasta.screen.ace.2"),
+    -format=>'ace',
+);
+
+my $assembly = $aio->next_assembly();
+my @contigs = $assembly->all_contigs();
+
+my $direction = $contigs[0]->strand;
+ok $direction, 1;
+
+my $features =  $contigs[0]->get_features_collection;
+my @contig_features = $features->get_all_features;
+ok @contig_features, 8;
+
+my @annotations = grep {$_->primary_tag eq 'Annotation'} @contig_features;
+ok @annotations, 2;
+my $had_tag = 0;
+foreach my $an (@annotations) {
+	if ($an->has_tag('extra_info')) {
+		$had_tag++;
+		ok (($an->get_tag_values('extra_info'))[0], "contig extra\ninfo\n");
+	}
+	elsif ($an->has_tag('comment')){
+		$had_tag++;
+		ok (($an->get_tag_values('comment'))[0], "contig tag\ncomment\n");
+	}
+}
+ok $had_tag, 2;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/Biblio.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Biblio.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Biblio.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,205 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Biblio.t,v 1.10 2002/03/08 20:48:47 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl Biblio.t'
+
+use strict;
+use vars qw($NUMTESTS);
+
+my $error;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 24;
+}
+
+my $testnum;
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my $serror = 0; my $serror2 = 0;
+my $ferror = 0; my $ferror2 = 0;
+my $xerror = 0;
+
+my $format = ($ENV{'TEST_DETAILS'} ? '%-25s' : '');
+
+unless (eval "require SOAP::Lite; 1;") {
+    print STDERR "SOAP::Lite not installed. Skipping some tests.\n";
+    $serror = 1;
+}
+
+unless (eval "require IO::String; 1;") {
+    print STDERR "IO::String not installed. Skipping some tests.\n";
+    $serror2 = 1;
+}
+
+unless (eval "require XML::Parser; 1;") {
+    print STDERR "XML::Parser not installed. Skipping some tests.\n";
+    $xerror = 1;
+}
+
+use Bio::Root::IO;
+my $testfile = Bio::Root::IO->catfile ('t','data','stress_test_medline.xml');
+unless (-e $testfile) {
+    print STDERR "Cannot find testing data '$testfile'. Skipping some tests.\n";
+    $ferror = 1;
+}
+my $testfile2 = Bio::Root::IO->catfile ('t','data','stress_test_pubmed.xml');
+unless (-e $testfile2) {
+    print STDERR "Cannot find testing data '$testfile2'. Skipping some tests.\n";
+    $ferror2 = 1;
+}
+
+
+# check 'use ...'
+eval { require Bio::Biblio };
+print sprintf ($format, 'use Bio::Biblio'); ok (%Bio::Biblio::);
+print $@ if $@;
+
+# check 'new...'
+my $biblio;
+eval { $biblio = new Bio::Biblio (-location => 'http://localhost:4567'); };
+print sprintf ($format, "new Bio::Biblio "); skip ($serror, defined $biblio);
+#print $@ if $@;
+
+
+# check 'use ...IO...'
+eval { require Bio::Biblio::IO };
+print sprintf ($format, "use Bio::Biblio::IO "); ok (%Bio::Biblio::IO::);
+
+my $io;
+
+# check MEDLINE XML parser
+print sprintf ($format, "new Bio::Biblio::IO (1)");
+skip ($ferror || $xerror,
+      defined (eval { $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+						 '-file'   => $testfile,
+						 '-result' => 'raw') }));
+print $@ if $@;
+
+print "Reading and parsing MEDLINE XML file...\n";
+print sprintf ($format, "    citation 1 "); skip ($ferror || $xerror, eval { $io->next_bibref->{'medlineID'} }, 'Text1');
+print sprintf ($format, "    citation 2 "); skip ($ferror || $xerror, eval { $io->next_bibref->{'medlineID'} }, 'Text248');
+print sprintf ($format, "    citation 3 "); skip ($ferror || $xerror, eval { $io->next_bibref->{'medlineID'} }, 'Text495');
+
+print "Getting citations using callback...\n";
+my (@ids) = ('Text1', 'Text248', 'Text495');
+my $callback_used = 'no';
+if ($ferror || $xerror) {
+    foreach my $i (1..3) {
+	print sprintf ($format, "    citation $i "); skip (1,1);
+    }
+} else {
+    $io = new Bio::Biblio::IO ('-format'   => 'medlinexml',
+			       '-file'     => $testfile,
+#			       '-result'   => 'medline2ref',  # this is default
+			       '-callback' => \&callback);
+}
+print sprintf ($format, "    calling callback "); skip ($ferror || $xerror, $callback_used, 'yes');
+
+sub callback {
+    my $citation = shift;
+    $callback_used = 'yes';
+    print sprintf ($format, '    citation ' . (@ids+0) . ' '); skip ($ferror, $citation->{'_identifier'}, shift @ids);
+}
+
+print "Reading and parsing XML string...\n";
+if ($xerror) {
+    print sprintf ($format, "    citation 1 "); skip (1, 1);
+    print sprintf ($format, "    citation 2 "); skip (1, 1);
+} else {
+    $io = new Bio::Biblio::IO ('-format'   => 'medlinexml',
+			       '-data'     => <<XMLDATA,
+<MedlineCitationSet>
+<MedlineCitation>
+<MedlineID>12345678</MedlineID>
+<Article><Journal></Journal></Article>
+</MedlineCitation>
+<MedlineCitation>
+<MedlineID>abcdefgh</MedlineID>
+<Article><Journal></Journal></Article>
+</MedlineCitation>
+</MedlineCitationSet>
+XMLDATA
+			       '-result'   => 'medline2ref',
+			       );
+    print sprintf ($format, "    citation 1 "); ok ($io->next_bibref->{'_identifier'}, '12345678');
+    print sprintf ($format, "    citation 2 "); ok ($io->next_bibref->{'_identifier'}, 'abcdefgh');
+}
+
+print "Reading and parsing XML string handle...\n";
+#use IO::String;
+if ($xerror || $serror2) {
+    print sprintf ($format, "    citation 1 "); skip (1,1);
+    print sprintf ($format, "    citation 2 "); skip (1,1);
+} else {
+    my $data = <<XMLDATA;
+<MedlineCitationSet>
+<MedlineCitation>
+<MedlineID>87654321</MedlineID>
+<Article><Journal></Journal></Article>
+</MedlineCitation>
+<MedlineCitation>
+<MedlineID>hgfedcba</MedlineID>
+<Article><Journal></Journal></Article>
+</MedlineCitation>
+</MedlineCitationSet>
+XMLDATA
+
+    $io = new Bio::Biblio::IO ('-format' => 'medlinexml',
+			       '-fh'     => IO::String->new ($data),
+			       );
+    print sprintf ($format, "    citation 1 "); ok (eval { $io->next_bibref->identifier }, '87654321');
+    print sprintf ($format, "    citation 2 "); ok (eval { $io->next_bibref->identifier }, 'hgfedcba');
+}
+
+# check PUBMED XML parser
+print sprintf ($format, "new Bio::Biblio::IO (2)");
+skip ($ferror2 || $xerror,
+      defined (eval { $io = new Bio::Biblio::IO ('-format' => 'pubmedxml',
+						 '-file'   => $testfile2,
+						 '-result' => 'pubmed2ref') }));
+print "Reading and parsing PUBMED XML file...\n";
+if ($xerror) {
+    foreach my $i (1..4) {
+	print sprintf ($format, "    citation $i "); skip (1,"Can't read citation from PUBMED XML");
+    }
+} else {
+    print sprintf ($format, "    citation 1 "); skip ($ferror2, eval { $io->next_bibref->identifier }, '11223344');
+    print sprintf ($format, "    citation 2 "); skip ($ferror2, eval { $io->next_bibref->identifier }, '21583752');
+    print sprintf ($format, "    citation 3 "); skip ($ferror2, eval { $io->next_bibref->identifier }, '21465135');
+    print sprintf ($format, "    citation 4 "); skip ($ferror2, eval { $io->next_bibref->identifier }, '21138228');
+}
+
+# test for FH
+my $fh;
+my @expvals = qw(11223344 21583752 21465135 21138228);
+print "Testing FH\n";
+eval { 
+    $fh = Bio::Biblio::IO->newFh('-format' => 'pubmedxml',
+				  '-file'   => $testfile2,
+				  '-result' => 'pubmed2ref');
+    while(<$fh>) {
+	ok($_->identifier,shift @expvals);
+    }
+};
+if( $@) {
+    foreach ( 1..4 ) { skip(1,"unable to use pubmedxml"); }
+}
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/t/BiblioReferences.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BiblioReferences.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BiblioReferences.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,656 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: BiblioReferences.t,v 1.9 2002/03/08 13:58:00 senger Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+
+my $error;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 537;
+    plan tests => $NUMTESTS;
+}
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($biblio, $count, $str, @args);
+my ($citation, $provider);
+
+my $format = ($ENV{'TEST_DETAILS'} ? "\t%-45s" : '');
+
+print "Testing 'use Bio::Biblio:: ...'\n";
+
+eval { require Bio::Biblio::Article };
+print sprintf ($format, "use Bio::Biblio::Article"); ok (%Bio::Biblio::Article::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Book };
+print sprintf ($format, "use Bio::Biblio::Book"); ok (%Bio::Biblio::Book::);
+print $@ if $@;
+
+eval { require Bio::Biblio::BookArticle };
+print sprintf ($format, "use Bio::Biblio::BookArticle"); ok (%Bio::Biblio::BookArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Journal };
+print sprintf ($format, "use Bio::Biblio::Journal"); ok (%Bio::Biblio::Journal::);
+print $@ if $@;
+
+eval { require Bio::Biblio::JournalArticle };
+print sprintf ($format, "use Bio::Biblio::JournalArticle"); ok (%Bio::Biblio::JournalArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::MedlineArticle };
+print sprintf ($format, "use Bio::Biblio::MedlineArticle"); ok (%Bio::Biblio::MedlineArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::MedlineBook };
+print sprintf ($format, "use Bio::Biblio::MedlineBook"); ok (%Bio::Biblio::MedlineBook::);
+print $@ if $@;
+
+eval { require Bio::Biblio::MedlineBookArticle };
+print sprintf ($format, "use Bio::Biblio::MedlineBookArticle"); ok (%Bio::Biblio::MedlineBookArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::MedlineJournal };
+print sprintf ($format, "use Bio::Biblio::MedlineJournal"); ok (%Bio::Biblio::MedlineJournal::);
+print $@ if $@;
+
+eval { require Bio::Biblio::MedlineJournalArticle };
+print sprintf ($format, "use Bio::Biblio::MedlineJournalArticle"); ok (%Bio::Biblio::MedlineJournalArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Organisation };
+print sprintf ($format, "use Bio::Biblio::Organisation"); ok (%Bio::Biblio::Organisation::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Patent };
+print sprintf ($format, "use Bio::Biblio::Patent"); ok (%Bio::Biblio::Patent::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Person };
+print sprintf ($format, "use Bio::Biblio::Person"); ok (%Bio::Biblio::Person::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Proceeding };
+print sprintf ($format, "use Bio::Biblio::Proceeding"); ok (%Bio::Biblio::Proceeding::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Provider };
+print sprintf ($format, "use Bio::Biblio::Provider"); ok (%Bio::Biblio::Provider::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Ref };
+print sprintf ($format, "use Bio::Biblio::Ref"); ok (%Bio::Biblio::Ref::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Service };
+print sprintf ($format, "use Bio::Biblio::Service"); ok (%Bio::Biblio::Service::);
+print $@ if $@;
+
+eval { require Bio::Biblio::TechReport };
+print sprintf ($format, "use Bio::Biblio::TechReport"); ok (%Bio::Biblio::TechReport::);
+print $@ if $@;
+
+eval { require Bio::Biblio::Thesis };
+print sprintf ($format, "use Bio::Biblio::Thesis"); ok (%Bio::Biblio::Thesis::);
+print $@ if $@;
+
+eval { require Bio::Biblio::WebResource };
+print sprintf ($format, "use Bio::Biblio::WebResource"); ok (%Bio::Biblio::WebResource::);
+print $@ if $@;
+
+eval { require Bio::Biblio::PubmedArticle };
+print sprintf ($format, "use Bio::Biblio::PubmedArticle"); ok (%Bio::Biblio::PubmedArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::PubmedBookArticle };
+print sprintf ($format, "use Bio::Biblio::PubmedBookArticle"); ok (%Bio::Biblio::PubmedBookArticle::);
+print $@ if $@;
+
+eval { require Bio::Biblio::PubmedJournalArticle };
+print sprintf ($format, "use Bio::Biblio::PubmedJournalArticle"); ok (%Bio::Biblio::PubmedJournalArticle::);
+print $@ if $@;
+
+print "Testing 'new Bio::Biblio:: ...'\n";
+foreach my $object (
+		    qw(
+		     Bio::Biblio::Article
+		     Bio::Biblio::Book
+		     Bio::Biblio::BookArticle
+		     Bio::Biblio::Journal
+		     Bio::Biblio::JournalArticle
+		     Bio::Biblio::MedlineArticle
+		     Bio::Biblio::MedlineBook
+		     Bio::Biblio::MedlineBookArticle
+		     Bio::Biblio::MedlineJournal
+		     Bio::Biblio::MedlineJournalArticle
+		     Bio::Biblio::Organisation
+		     Bio::Biblio::Patent
+		     Bio::Biblio::Person
+		     Bio::Biblio::Proceeding
+		     Bio::Biblio::Provider
+		     Bio::Biblio::Ref
+		     Bio::Biblio::Service
+		     Bio::Biblio::TechReport
+		     Bio::Biblio::Thesis
+		     Bio::Biblio::WebResource
+		     Bio::Biblio::PubmedArticle
+		     Bio::Biblio::PubmedBookArticle
+		     Bio::Biblio::PubmedJournalArticle
+		       )) {
+    print sprintf ($format, "new $object"); ok defined ($biblio = new $object);
+}
+
+my @scalar_methods_for_ref =
+    qw(
+     abstract
+     abstract_language
+     abstract_type
+     author_list_complete
+     cross_references_list_complete
+     date
+     date_completed
+     date_created
+     date_revised
+     format
+     identifier
+     language
+     last_modified_date
+     repository_subset
+     rights
+     spatial_location
+     subject_headings_source
+     temporal_period
+     title
+     toc
+     toc_type
+     type
+     );
+my @other_methods_for_ref =
+    qw(
+     authors
+     cross_references
+     codes
+     contributors
+     keywords
+     publisher
+     subject_headings
+    );
+
+my @scalar_methods_for_book =
+    qw(
+     edition
+     isbn
+     series
+     volume
+     );
+my @other_methods_for_book =
+    qw(
+     editor
+     );
+
+my @scalar_methods_for_bookarticle =
+    qw(
+     );
+my @other_methods_for_bookarticle =
+    qw(
+     book
+     );
+
+my @scalar_methods_for_article =
+    qw(
+     first_page
+     last_page
+     );
+my @other_methods_for_article =
+    qw(
+     );
+
+my @scalar_methods_for_journalarticle =
+    qw(
+     issue
+     issue_supplement
+     volume
+     );
+my @other_methods_for_journalarticle =
+    qw(
+     journal
+     );
+
+my @scalar_methods_for_medlinearticle =
+    qw(
+     affiliation
+     citation_owner
+     date_of_electronic_publication
+     gene_symbols
+     grant_list_complete
+     medline_date
+     medline_id
+     medline_page
+     number_of_references
+     other_languages
+     pmid
+     season
+     status
+     vernacular_title
+     );
+my @other_methods_for_medlinearticle =
+    qw(
+     chemicals
+     comment_ins
+     comment_ons
+     erratum_fors
+     erratum_ins
+     general_notes
+     grants
+     mesh_headings
+     original_report_ins
+     other_abstracts
+     other_ids
+     republished_froms
+     republished_ins
+     retraction_ins
+     retraction_ofs
+     summary_for_patients_ins
+     update_ins
+     update_ofs
+     );
+
+my @scalar_methods_for_medlinejournalarticle =
+    qw(
+     );
+my @other_methods_for_medlinejournalarticle =
+    qw(
+     journal
+     );
+
+my @scalar_methods_for_medlinebookarticle =
+    qw(
+     );
+my @other_methods_for_medlinebookarticle =
+    qw(
+     book
+     );
+
+my @scalar_methods_for_medlinebook =
+    qw(
+     );
+my @other_methods_for_medlinebook =
+    qw(
+     );
+
+
+
+my @scalar_methods_for_pubmedarticle =
+    qw(
+     pubmed_status
+     pubmed_provider_id
+     );
+my @other_methods_for_pubmedarticle =
+    qw(
+     pubmed_history_list
+     pubmed_article_id_list
+     pubmed_url_list
+     );
+
+
+my @scalar_methods_for_journal =
+    qw(
+     abbreviation
+     issn
+     name
+     );
+my @other_methods_for_journal =
+    qw(
+     );
+
+my @scalar_methods_for_medlinejournal =
+    qw(
+     coden
+     country
+     medline_code
+     medline_ta
+     nlm_unique_id
+     );
+my @other_methods_for_medlinejournal =
+    qw(
+     );
+
+my @scalar_methods_for_patent =
+    qw(
+     doc_number
+     doc_office
+     doc_type
+     );
+my @other_methods_for_patent =
+    qw(
+     applicants
+     );
+
+my @scalar_methods_for_webresource =
+    qw(
+     url
+     estimated_size
+     cost
+     );
+my @other_methods_for_webresource =
+    qw(
+     );
+
+my @scalar_methods_for_provider =
+    qw(
+     type
+     );
+
+my @scalar_methods_for_person =
+    qw(
+     affiliation
+     email
+     firstname
+     forename
+     initials
+     lastname
+     middlename
+     postal_address
+     suffix
+     );
+
+my @scalar_methods_for_organisation =
+    qw(
+     name
+     );
+
+my @scalar_methods_for_service =
+    qw(
+     name
+     );
+
+#
+# Bio::Biblio::MedlineJournalArticle
+#
+print "Testing Bio::Biblio::MedlineJournalArticle ...\n";
+$citation = new Bio::Biblio::MedlineJournalArticle;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_ref,
+                    @scalar_methods_for_article,
+                    @scalar_methods_for_journalarticle,
+                    @scalar_methods_for_medlinearticle,
+                    @scalar_methods_for_medlinejournalarticle) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::MedlineJournalArticle (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_ref,
+                    @other_methods_for_article,
+                    @other_methods_for_journalarticle,
+                    @other_methods_for_medlinearticle,
+                    @other_methods_for_medlinejournalarticle) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+my ($me) = new Bio::Biblio::Person (-lastname => 'me');
+my ($you) = new Bio::Biblio::Person (-lastname => 'you');
+print sprintf ($format, "add_author 1"); ok $citation->add_author ($me);
+print sprintf ($format, "add_author 2"); ok $citation->add_author ($you);
+print sprintf ($format, "get authors");  ok ${ $citation->authors }[1]->lastname, 'you';
+
+print sprintf ($format, "add_contributor 1"); ok $citation->add_contributor ($me);
+print sprintf ($format, "add_contributor 2"); ok $citation->add_contributor ($you);
+print sprintf ($format, "get contributors");  ok ${ $citation->contributors }[1]->lastname, 'you';
+
+use Bio::Annotation::DBLink;
+my $link1 = new Bio::Annotation::DBLink(-database => 'here',
+				        -primary_id => '001'
+				        );
+my $link2 = new Bio::Annotation::DBLink(-database => 'there',
+				        -primary_id => '002'
+				        );
+print sprintf ($format, "add_cross_reference 1"); ok $citation->add_cross_reference ($link1);
+print sprintf ($format, "add_cross_reference 2"); ok $citation->add_cross_reference ($link2);
+print sprintf ($format, "get cross_references");  ok ${ $citation->cross_references }[0]->database, 'here';
+print sprintf ($format, "get cross_references");  ok ${ $citation->cross_references }[1]->primary_id, '002';
+
+
+#
+# Bio::Biblio::MedlineBookArticle
+#
+print "Testing Bio::Biblio::MedlineBookArticle ...\n";
+$citation = new Bio::Biblio::MedlineBookArticle;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_ref,
+                    @scalar_methods_for_article,
+                    @scalar_methods_for_bookarticle,
+                    @scalar_methods_for_medlinearticle,
+                    @scalar_methods_for_medlinebookarticle) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::MedlineBookArticle (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_ref,
+                    @other_methods_for_article,
+                    @other_methods_for_bookarticle,
+                    @other_methods_for_medlinearticle,
+                    @other_methods_for_medlinebookarticle) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+
+
+#
+# Bio::Biblio::MedlineBook
+#
+print "Testing Bio::Biblio::MedlineBook ...\n";
+$citation = new Bio::Biblio::MedlineBook;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_ref,
+                    @scalar_methods_for_book,
+                    @scalar_methods_for_medlinebook) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::MedlineBook (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_ref,
+                    @other_methods_for_book,
+                    @other_methods_for_medlinebook) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+
+#
+# Bio::Biblio::MedlineJournal
+#
+print "Testing Bio::Biblio::MedlineJournal ...\n";
+$citation = new Bio::Biblio::MedlineJournal;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_journal,
+                    @scalar_methods_for_medlinejournal) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::MedlineJournal (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_journal,
+                    @other_methods_for_medlinejournal) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+
+#
+# Bio::Biblio::Patent
+#
+print "Testing Bio::Biblio::Patent ...\n";
+$citation = new Bio::Biblio::Patent;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_patent) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::Patent (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_patent) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+
+#
+# Bio::Biblio::WebResource
+#
+print "Testing Bio::Biblio::WebResource ...\n";
+$citation = new Bio::Biblio::WebResource;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_webresource) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::WebResource (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_webresource) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+
+
+#
+# Bio::Biblio::Person
+#
+print "Testing Bio::Biblio::Person ...\n";
+$provider = new Bio::Biblio::Person;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_provider,
+                    @scalar_methods_for_person) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $provider->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $provider->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::Person (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $provider->$method(), $args[$i+1];
+}
+
+#
+# Bio::Biblio::Organisation
+#
+print "Testing Bio::Biblio::Organisation ...\n";
+$provider = new Bio::Biblio::Organisation;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_provider,
+                    @scalar_methods_for_organisation) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $provider->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $provider->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::Organisation (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $provider->$method(), $args[$i+1];
+}
+
+#
+# Bio::Biblio::Service
+#
+print "Testing Bio::Biblio::Service ...\n";
+$provider = new Bio::Biblio::Service;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_provider,
+                    @scalar_methods_for_organisation) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $provider->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $provider->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::Service (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $provider->$method(), $args[$i+1];
+}
+
+#
+# Bio::Biblio::PubmedJournalArticle
+#
+print "Testing Bio::Biblio::PubmedJournalArticle ...\n";
+$citation = new Bio::Biblio::PubmedJournalArticle;
+ at args = ();
+$count = 1;
+foreach my $method (@scalar_methods_for_pubmedarticle) {
+    $str = 'string' . ($count++);
+    print sprintf ($format, "set '$method' "); ok $citation->$method ($str), $str;
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), $str;
+    push (@args, ("-$method" => $str));
+}
+print sprintf ($format, "set all attributes in a constructor");
+ok defined ($biblio = new Bio::Biblio::PubmedJournalArticle (@args));
+for (my $i = 0; $i < @args; $i += 2) {
+    my $method = substr ($args[$i], 1);
+    print sprintf ($format, "   $method"); ok $citation->$method(), $args[$i+1];
+}
+foreach my $method (@other_methods_for_pubmedarticle) {
+    print sprintf ($format, "get '$method' "); ok $citation->$method(), undef;
+}
+__END__

Added: trunk/packages/bioperl/branches/upstream/current/t/Biblio_biofetch.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Biblio_biofetch.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Biblio_biofetch.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Biblio_biofetch.t,v 1.11.6.2 2006/10/02 23:10:39 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $error $msg);
+
+BEGIN { 
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+ 
+	plan tests => ($NUMTESTS = 11);
+	eval { require IO::String; };
+	if( $@ ) {
+		warn( "IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n") if $DEBUG;
+    	$msg .= 'IO::String not installed. ';
+		$error = 1;
+	}
+	eval { require LWP::Simple; };
+	if( $@ ) {
+		warn( "LWP::Simple not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n") if $DEBUG;
+		$msg .= 'LWP::Simple not installed. ';
+		$error = 1; 
+	}
+	eval { require HTTP::Request::Common; };
+	if( $@ ) {
+		warn( "HTTP::Request::Common not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n") if $DEBUG;
+		$msg .= 'HTTP::Request::Common not installed. ';
+		$error = 1; 
+	}
+}
+
+END{
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip($msg,1);
+	}
+}
+
+exit if $error;
+
+use Bio::Biblio;
+use Bio::Biblio::IO;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($db,$ref,$refio);
+# get a single ref
+
+my $verbose =  $DEBUG || 0;
+
+$ref = $refio = undef;
+
+# check BioFetch access method
+
+eval { 
+	ok ($db = new Bio::Biblio (-access => 'biofetch',
+										# -verbose => $verbose,
+									  ));
+	ok(defined($ref = $db->get_by_id('10592273')));
+	ok $ref->identifier, '10592273';
+};
+
+if ($@) {
+	warn( "Warning: Couldn't connect to BioFetch server with Bio::DB::Biblio::biofetch!\n$@\n") 
+	  if $DEBUG;
+	$msg = "Couldn't connect to BioFetch server with Bio::DB::Biblio::biofetch";
+	exit(0);
+}
+
+$ref = $refio = undef;
+
+eval {
+	ok defined($db = new Bio::Biblio(-access => 'biofetch',
+												# -verbose => $verbose,
+											   )); 
+
+	my $ids = ['10592273', '9613206'];
+	ok(defined($refio = $db->get_all($ids)));
+	ok($refio->next_bibref->identifier, '9613206');
+	ok($refio->next_bibref->identifier, '10592273');
+};
+
+if ($@) {    
+	warn "Batch access test failed.Error: $@\n" if $DEBUG;
+	$msg = 'No network access';
+	exit(0);
+}
+
+eval {
+	ok defined($db = new Bio::Biblio(-access => 'biofetch',
+												# -verbose => $verbose,
+											  )); 
+
+	ok(defined($refio = $db->get_Stream_by_id(['10592273', '9613206'])));
+	ok($refio->next_bibref->identifier, '9613206');
+	ok($refio->next_bibref->identifier, '10592273');
+};
+
+if ($@) {    
+	warn "Batch access test failed.Error: $@\n" if $DEBUG;
+	$msg = 'No network access';
+	exit(0);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Biblio_eutils.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Biblio_eutils.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Biblio_eutils.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,72 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Biblio_eutils.t,v 1.2 2004/10/27 21:40:44 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $error $msg);
+
+BEGIN { 
+    $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    
+    plan tests => ($NUMTESTS = 5);
+    eval { require IO::String; };
+    if( $@ ) {
+	warn( "IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n") if $DEBUG;
+    	$msg .= 'IO::String not installed. ';
+	$error = 1;
+    } 
+    eval { require LWP::Simple; };
+    if( $@ ) {
+	warn( "LWP::Simple not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n") if $DEBUG;
+	$msg .= 'LWP::Simple not installed. ';
+	$error = 1; 
+    }
+}
+
+exit(0) if $error;
+
+END { 
+    foreach ( $Test::ntest..$NUMTESTS) {
+	skip($msg,1);
+    }
+}
+use Bio::Biblio;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my $db;
+
+my $verbose =  $DEBUG || 0;
+
+eval { 
+    ok ($db = new Bio::Biblio (-access => 'eutils',
+			       -verbose=>$verbose));
+    ok(defined($db->find('"Day A"[AU] AND ("Database Management Systems"[MH] OR "Databases, Genetic"[MH] OR "Software"[MH] OR "Software Design"[MH])')));
+};
+
+if ($@) {
+    warn "Warning: Couldn't connect to Eutils server!\n$@\n" if $DEBUG;
+    $msg = 'No network access - could not connect to PubMed Eutils';
+    exit(0);
+}
+
+while(my $xml = $db->get_next) {
+    ok(1);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/BioDBGFF.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BioDBGFF.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BioDBGFF.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,470 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use ExtUtils::MakeMaker;
+use Bio::Root::IO;
+use FindBin '$Bin';
+use constant TEST_COUNT => 276;
+use constant FASTA_FILES => Bio::Root::IO->catfile('t','data','dbfa');
+use constant GFF_FILE1    => Bio::Root::IO->catfile('t','data',
+						   'biodbgff','test.gff');
+use constant GFF_FILE2    => Bio::Root::IO->catfile('t','data',
+						   'biodbgff','test.gff3');
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan test => TEST_COUNT;
+}
+
+sub bail ($;$);
+sub user_prompt ($;$);
+sub fail ($);
+use lib '.','..','./blib/lib';
+use Bio::DB::GFF;
+use Bio::SeqIO;
+
+my $tests_file = Bio::Root::IO->catfile('t','do_biodbgff.tests');
+
+my $adaptor = -e $tests_file ? 'dbi::mysql' : 'memory';
+$adaptor    = shift if @ARGV;
+my @args;
+
+if ($adaptor =~ /^dbi/) {
+
+  open T,$tests_file or bail(TEST_COUNT,"Couldn't read configuration: $tests_file");
+  my $cfg = {};
+  while (<T>) {
+    chomp;
+    my ($key,$value) = split "\t";
+    $cfg->{$key}     = $value;
+  }
+  $adaptor = "dbi::$cfg->{dbd_driver}" if $cfg->{dbd_driver};
+  @args = ( '-adaptor'  => $adaptor,
+	    '-dsn'     => $cfg->{test_dsn},
+	  );
+  push @args,('-user' => $cfg->{test_user}) if $cfg->{test_user};
+  push @args,('-pass' => $cfg->{test_pass}) if $cfg->{test_pass};
+} else {
+  @args = ('-adaptor' => $adaptor,
+	   '-create'  => 1);
+}
+
+push @args,('-aggregators' => ['transcript','processed_transcript']);
+
+for my $FILE (GFF_FILE1,GFF_FILE2) {
+
+  my $db = eval { Bio::DB::GFF->new(@args) };
+  warn $@ if $@;
+  ok($db);
+  fail(TEST_COUNT - 1) unless $db;
+
+  $db->debug(0);
+  $db->gff3_name_munging(1);
+
+  # set the preferred groups
+  $db->preferred_groups( [ 'transcript', 'gene', 'mRNA' ] );
+  my @pg = $db->preferred_groups;
+  ok(scalar(@pg), 3);
+  ok($pg[1], 'gene'); 
+
+  # exercise the loader
+  ok($db->initialize(1));
+  ok($db->load_gff($FILE));
+  ok($db->load_fasta(FASTA_FILES));
+
+  # exercise db->types
+  my @types = sort $db->types;
+  ok(scalar @types,11);
+  ok($types[0],'CDS:confirmed');
+  ok($types[-1],'transposon:tc1');
+  my %types = $db->types('-enumerate'=>1);
+  ok($types{'transposon:tc1'},2);
+
+  # exercise segment
+  my $segment1 = $db->segment('Contig1');
+
+  ok($segment1);
+  ok($segment1->length,37450);
+  ok($segment1->start,1);
+  ok($segment1->end,37450);
+  ok($segment1->strand,1);
+  
+  my $segment2  = $db->segment('Contig1',1=>1000);
+  ok($segment2->length,1000);
+  ok($segment2->start,1);
+  ok($segment2->end,1000);
+  ok($segment2->strand,1);
+  
+  my $segment3 = $db->segment('Contig1',10=>1);
+  ok($segment3->start,10);
+  ok($segment3->end,1);
+  ok($segment3->strand,-1);
+
+  # exercise attribute fetching
+  my @t = $db->fetch_feature_by_name(Transcript => 'trans-1');
+  my ($t) = grep {$_->type eq 'transcript:confirmed'} @t;
+  ok($t->attributes('Note'),'function unknown');
+  ok(join(' ',sort $t->attributes('Gene')),'abc-1 xyz-2');
+  my $att = $t->attributes;
+  ok(scalar @{$att->{Gene}},2);
+  @t = sort {$a->display_name cmp $b->display_name} $db->fetch_feature_by_attribute('Gene'=>'abc-1');
+  ok(@t>0);
+  ok($t[0] eq $t);
+  my $seg = $db->segment('Contig1');
+  @t = $seg->features(-attributes=>{'Gene'=>'abc-1'});
+  ok(@t>0);
+  ok($seg->feature_count, 17);
+  @t = $seg->features(-attributes=>{'Gene'=>'xyz-2',Note=>'Terribly interesting'});
+  ok(@t==1);
+
+  # exercise dna() a bit
+  my $dna = $segment2->dna;
+  ok(length $dna,1000);
+  ok(substr($dna,0,10),'gcctaagcct');
+  ok($segment3->dna,'aggcttaggc');
+  ok($segment1->dna eq $db->dna($segment1->ref));
+
+  # exercise ref()
+  my $segment4 = $db->segment('-name'=>'c128.1','-class'=>'Transposon');
+  ok($segment4->length,1000);
+  ok($segment4->start,1);
+  ok($segment4->end,1000);
+  ok($segment4->ref,'c128.1');
+  ok($segment4->strand,1);
+  ok(!$segment4->absolute);
+
+  $segment4->absolute(1);
+  ok($segment4->absolute);
+  ok($segment4->ref,'Contig1');
+  ok($segment4->start,5001);
+  $segment4->absolute(0);
+  my $tmp = $db->segment('Contig1',5001=>6000);
+  ok($segment4->dna,$tmp->dna);
+
+  $segment4->ref('Contig1');
+  ok($segment4->ref,'Contig1');
+  ok($segment4->start,5001);
+  ok($segment4->end,6000);
+
+  my $segment5 = $db->segment('-name'=>'c128.2','-class'=>'Transposon');
+  ok($segment5->length,1000);
+  ok($segment5->start,1);
+  ok($segment5->end,1000);
+  ok($segment5->ref,'c128.2');
+  ok($segment5->strand,1);
+
+  $tmp = $db->segment('Contig1',9000,8001);
+  ok($segment5->dna,$tmp->dna);
+  $segment5->absolute(1);
+  ok($segment5->strand,-1);
+
+  # rel/rel addressing
+  # first two positive strand features
+  $segment4 = $db->segment('-name'=>'c128.1','-class'=>'Transposon');
+  my $start4 = $segment4->abs_start;
+  $segment5  = $db->segment('Transcript' => 'trans-1');
+  my $start5 = $segment5->abs_start;
+  $segment4->ref($segment5);
+  ok($segment4->strand,1);
+  ok($segment4->start,$start4-$start5+1);
+  ok($segment4->stop,$start4-$start5+$segment4->length);
+
+  $segment4->ref('Transposon' => 'c128.1');
+  $segment5->ref('Transcript' => 'trans-1');
+  $segment5->ref($segment4);
+  ok($segment5->start,$start5-$start4+1);
+
+  # now a positive on a negative strand feature
+  my $segment6 = $db->segment('Transcript'=>'trans-2');
+  my $start6 = $segment6->abs_start;
+  ok($segment6->strand,1);
+  ok($segment6->abs_strand,-1);
+  $segment6->ref($segment4);
+  ok($segment6->start,$start6-$start4+1);
+  ok($segment6->strand,-1);
+
+  $segment4->ref($segment6);
+  ok($segment4->start,$start6-$start4+1);
+  ok($segment4->strand,-1);
+  ok($segment4->ref eq $segment6);
+
+  # the reference sequence shouldn't affect the dna
+  $segment6 = $db->segment('Transcript'=>'trans-2');
+  $dna = $segment6->dna;
+  $segment6->ref($segment4);
+  ok($segment6->dna,$dna);
+
+  # segments should refuse to accept a reference sequence on a foreign segment
+  undef $@;
+  my $result = eval { $segment6->ref('Contig2') };
+  ok(!$result);
+  ok("$@" =~ /are on different sequence segments/);
+
+  # types across a segment
+  $segment1 = $db->segment('Contig1');
+  @types = sort $segment1->types;
+  ok(scalar @types,6);
+  ok($types[0],'CDS:confirmed');
+  ok($types[-1],'transposon:tc1');
+  %types = $segment1->types('-enumerate'=>1);
+  ok($types{'similarity:est'},3);
+
+  # features across a segment
+  my @features = $segment1->features('-automerge'=>0);
+  ok(scalar @features,17);
+  my %types_seen;
+  foreach (@features) {
+    $types_seen{$_->type}++;
+  }
+  my $inconsistency = 0;
+  foreach (keys %types,keys %types_seen) {
+    $inconsistency++ unless $types_seen{$_} == $types{$_};
+  }
+  ok(!$inconsistency);
+
+  @features = sort {$a->start<=>$b->start} @features;
+  ok($features[0]->type,'Component:reference');
+  ok($features[-1]->type,'exon:confirmed');
+
+  # make sure that we can use features to get at dna
+  ok($features[0]->dna,$db->segment('Contig1',$features[0]->start,$features[0]->end)->dna);
+
+  # check three forward features and three reverse features
+  # (This depends on the test.gff data)
+  for (1..3,-3..-1) {
+    $segment2 = $db->segment($features[$_],50,100);
+    if ($features[$_]->strand >= 0) {
+      ok($segment2->dna,$db->segment('Contig1',
+				     $features[$_]->start+50-1,
+				     $features[$_]->start+100-1)->dna)
+    } else {
+      ok($segment2->dna,$db->segment('Contig1',
+				     $features[$_]->start-50+1,
+				     $features[$_]->start-100+1)->dna)
+    }
+  }
+
+  # exercise the aggregator
+  my $aggregator = Bio::DB::GFF::Aggregator->new('-method'      => 'aggregated_transcript',
+						 '-main_method' => 'transcript',
+						 '-sub_parts'   => ['exon','CDS']);
+  $db->add_aggregator($aggregator);
+  $segment1 = $db->segment('Contig1');
+  @features = sort $segment1->features('aggregated_transcript');  # sort so that trans-1 comes first
+  ok(scalar @features,2);
+  ok($features[0]->Exon > 0);
+  ok($features[0]->Cds > 0);
+
+  # Test that sorting is correct.  The way that test.gff is set up, the lower one is
+  # on the + strand and the higher is on the -.
+  @features = sort {$a->start <=> $b->start} @features;
+  ok($features[0]->strand,1);
+  ok($features[1]->strand,-1);
+
+  my $last = 0;
+  $inconsistency = 0;
+  foreach ($features[0]->Exon) {
+    $inconsistency++ if $_->start > $_->end;
+    $inconsistency++ if $last && $_->start < $last;
+    $last = $_->start;
+  }
+  ok(!$inconsistency);
+
+  $inconsistency = $last = 0;
+  foreach ($features[1]->Exon) {
+    $inconsistency++ if $_->start < $_->end;
+    $inconsistency++ if $last && $_->start > $last;
+    $last = $_->start;
+  }
+  ok(!$inconsistency);
+  
+  # relative addressing in aggregated features
+  my $transcript1 = $db->segment($features[0]);
+  $transcript1->ref($features[0]);
+  my @overlap     = sort {$a->start <=> $b->start } $transcript1->features;
+  ok(scalar(@overlap),5);
+  ok($overlap[0]->start,-999);
+
+  $transcript1 = $db->segment('Transcript' => 'trans-1');
+  @overlap     = sort {$a->start <=> $b->start } $transcript1->features;
+  ok($overlap[0]->start,-999);
+
+  # test strandedness of features
+  $segment1 = $db->segment('-class' => 'Transcript',
+			   '-name'  => 'trans-3',
+			   '-start' => 1,
+			   '-stop'  => 6000);
+  ok($segment1->strand,1);
+  @overlap  = sort {$a->start <=> $b->start} $segment1->features('transcript');
+  ok(scalar(@overlap),2);
+  ok($overlap[0]->name,'trans-3');
+  ok($overlap[1]->name,'trans-4');
+  ok($overlap[0]->strand,1);
+  ok($overlap[1]->strand,-1);
+
+  # testing feature id and group_id
+  my $tf = $overlap[0];
+  ok(defined $tf->id);
+  my $t1 = $db->fetch_feature_by_id($tf->id);
+  ok($t1->id,$tf->id);
+
+  if (defined $tf->group_id) {
+    my $t2 = $db->fetch_feature_by_gid($tf->group_id);
+    ok($t2->group_id,$tf->group_id);
+    ok($t2->group_id,$t1->group_id);
+  } else {
+    skip("fetch_feature_by_gid() not implemented by this adaptor",1);
+    skip("fetch_feature_by_gid() not implemented by this adaptor",1);
+  }
+
+  $segment1 = $db->segment('-class' => 'Transcript',
+			   '-name'  => 'trans-4',
+			   '-start' => 1,
+			   '-stop'  => 6000);
+  ok($segment1->strand,1);
+  @overlap = sort {$a->start <=> $b->start} $segment1->features('transcript');
+  ok($overlap[0]->name,'trans-4');
+  ok($overlap[1]->name,'trans-3');
+  ok($overlap[0]->strand,1);
+  ok($overlap[1]->strand,-1);
+
+  @overlap = sort {$a->start <=> $b->start} $segment1->features('Component');
+  ok($overlap[0]->strand,0);
+
+  # test preferred group assignments
+  if ($FILE =~ /\.gff$/) {
+    my @gene = $db->get_feature_by_name( gene => 'gene-9' );
+    my @mrna = $db->get_feature_by_name( mRNA => 'trans-9' );
+    ok($gene[0]->ref, 'Contig4');
+    ok(scalar(@gene), 2);
+    ok(scalar(@mrna), 1);
+  } else {
+    skip('preferred groups are not supported by gff3',1) for 1..3;
+  }
+
+  # test iterator across a segment
+  $segment1 = $db->segment('Contig1');
+  my $i = $segment1->features('-automerge'=>0,'-iterator'=>1);
+  my %strand;
+  while (my $s = $i->next_feature) {
+    $strand{$s->strand}++;
+  }
+  ok(keys %strand == 3);
+
+  # test iterator across entire database
+  $i = $db->features('-automerge'=>0,'-iterator'=>1);
+  %strand = ();
+  while (my $s = $i->next_feature) {
+    $strand{$s->strand}++;
+  }
+  ok(keys %strand == 3);
+
+  # test iterator across a segment, limited by an attribute
+  $i = $seg->get_feature_stream(-attributes=>{'Gene'=>'abc-1',Note=>'function unknown'});
+  my $count = 0;
+  while ($i->next_seq) {
+    $count++;
+  }
+  ok($count,2);
+
+  # test that aliases work
+  my $st1 = $db->segment(Transcript => 'trans-3');
+  ok($st1);
+  my $st2 = $db->segment(Transcript => 'trans-18');  # this is an alias!
+  ok($st2);
+  ok($st1 eq $st2);
+  my @transcripts = $st1->features('transcript');
+  ok(($transcripts[0]->aliases)[0] eq 'trans-18');
+
+  # test truncation
+  $db->strict_bounds_checking(1);
+  my $tseg = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>500);
+  ok(!$tseg->truncated);
+  $tseg    = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>50000);
+  ok($tseg->truncated);
+  $db->strict_bounds_checking(0);
+  $tseg    = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>50000);
+  ok(!$tseg->truncated);
+
+  # test the processed_transcript aggregator
+  $db->clear_aggregators;
+  $db->add_aggregator('processed_transcript');
+  my @f = $db->fetch_feature_by_name(mRNA => 'trans-8');
+  ok(scalar @f,1);
+  ok($f[0]->length,35000-32000+1);
+  ok(scalar $f[0]->CDS,3);
+  ok(scalar $f[0]->UTR,2);
+
+  # test deletions
+  # segment delete() method
+  my $clone = $db->segment(Clone=>'M7.3');
+  my $overlapping_feature_count = $clone->features(-range_type =>'overlaps');
+  my $contained_feature_count   = $clone->features(-range_type =>'contains');
+  ok(scalar $clone->delete(-range_type=>'contains'),$contained_feature_count);
+  ok(scalar $clone->features,$overlapping_feature_count - $contained_feature_count);
+
+  # database delete() method
+  ok($db->delete(-type=>['mRNA:confirmed','transposon:tc1']),4);
+  ok($db->delete(-type=>'UTR',-ref=>'Contig29'),undef);
+  ok($db->delete(-type=>'CDS',-ref=>'AL12345.2',-class=>'Clone'),3);
+  ok($db->delete_features(1,2,3),3);
+
+  $result = eval {
+    ok($db->delete_groups(1,2,3,4,5),5);
+    my @features = $db->get_feature_by_name(Sequence => 'Contig2');
+    ok($db->delete_groups(@features),1);
+    1;
+  };
+  if (!$result && $@ =~ /not implemented/i) {
+    skip("delete_groups() not implemented by this adaptor",1);
+    skip("delete_groups() not implemented by this adaptor",1);
+  }
+  ok(!defined eval{$db->delete()});
+  ok($db->delete(-force=>1));
+  ok(scalar $db->features,0);
+  ok(!$db->segment('Contig1'));
+}
+
+END {
+  unlink FASTA_FILES."/directory.index";
+}
+
+sub bail ($;$) {
+  my $count = shift;
+  my $explanation = shift;
+  for (1..$count) {
+    skip($explanation,1);
+  }
+  exit 0;
+}
+
+sub fail ($) {
+  my $count = shift;
+  for (1..$count) {
+    ok(0);
+  }
+  exit 0;
+}
+
+sub user_prompt ($;$) {
+    my($mess,$def)=@_;
+    Carp::confess("prompt function called without an argument") unless defined $mess;
+    my $dispdef = defined $def ? "[$def] " : " ";
+    $def = defined $def ? $def : "";
+    my $ans;
+    local $|=1;
+    print STDERR "$mess $dispdef";
+    chomp($ans = <STDIN>);
+    return ($ans ne '') ? $ans : $def;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/BioDBSeqFeature.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BioDBSeqFeature.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BioDBSeqFeature.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,203 @@
+##-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use Bio::Root::IO;
+use FindBin '$Bin';
+use constant TEST_COUNT => 52;
+use constant GFF_FILE    => Bio::Root::IO->catfile('t','data',
+					   'seqfeaturedb','test.gff3');
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan test => TEST_COUNT;
+    $ENV{ORACLE_HOME} ||= '/home/oracle/Home';
+}
+
+use lib "$Bin/..","$Bin/../blib/lib";
+use Bio::DB::SeqFeature::Store;
+use Bio::DB::SeqFeature::Store::GFF3Loader;
+
+sub bail ($;$) {
+  my $count = shift;
+  my $explanation = shift;
+  for (1..$count) {
+    skip($explanation,1);
+  }
+  exit 0;
+}
+
+sub fail ($) {
+  my $count = shift;
+  for (1..$count) {
+    ok(0);
+  }
+  exit 0;
+}
+
+my (@f,$f, at s,$s,$seq1,$seq2);
+
+my @args = @ARGV;
+ at args = (-adaptor => 'memory') unless @args;
+
+my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
+warn $@ if $@;
+ok($db);
+fail(TEST_COUNT - 1) unless $db;
+
+my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
+warn $@ if $@;
+ok($loader);
+fail(TEST_COUNT - 2) unless $loader;
+
+# exercise the loader
+ok($loader->load(GFF_FILE));
+
+# there should be one gene named 'abc-1'
+ at f = $db->get_features_by_name('abc-1');
+ok(@f==1);
+
+$f = $f[0];
+# there should be three subfeatures of type "exon" and three of type "CDS"
+ok($f->get_SeqFeatures('exon')==3);
+ok($f->get_SeqFeatures('CDS')==3);
+
+# the sequence of feature abc-1 should match the sequence of the first exon at the beginning
+$seq1 = $f->seq->seq;
+$seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
+ok(substr($seq1,0,length $seq2) eq $seq2);
+
+# sequence lengths should match
+ok(length $seq1 == $f->length);
+
+# if we pull out abc-1 again we should get the same object
+($s) = $db->get_features_by_name('abc-1');
+ok($f eq $s);
+
+# we should get two objects when we ask for abc-1 using get_features_by_alias
+# this also depends on selective subfeature indexing
+ at f = $db->get_features_by_alias('abc-1');
+ok(@f==2);
+
+# the two features should be different
+ok($f[0] ne $f[1]);
+
+# test that targets are working
+($f) = $db->get_features_by_name('match1');
+ok(defined $f);
+$s = $f->target;
+ok(defined $s);
+ok($s->seq_id  eq 'CEESC13F');
+$seq1 = $s->seq->seq;
+ok(substr($seq1,0,10) eq 'ttgcgttcgg');
+
+# can we fetch subfeatures?
+# gene3.a has the Index=1 attribute, so we should fetch it
+($f) = $db->get_features_by_name('gene3.a');
+ok($f);
+
+# gene 3.b doesn't have an index, so we shouldn't get it
+($f) = $db->get_features_by_name('gene3.b');
+ok(!$f);
+
+# test three-tiered genes
+($f) = $db->get_features_by_name('gene3');
+ok($f);
+my @transcripts = $f->get_SeqFeatures;
+ok(@transcripts == 2);
+ok($transcripts[0]->method eq 'mRNA');
+ok($transcripts[0]->source eq 'confirmed');
+
+# test that exon #2 is shared between the two transcripts
+my @exons1      = $transcripts[0]->get_SeqFeatures('CDS');
+ok(@exons1 == 3);
+my @exons2      = $transcripts[1]->get_SeqFeatures('CDS');
+my ($shared1)   = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
+my ($shared2)   = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
+ok($shared1 && $shared2);
+ok($shared1 eq $shared2);
+ok($shared1->primary_id eq $shared2->primary_id);
+
+# test attributes
+ok($shared1->phase == 0);
+ok($shared1->strand eq +1);
+ok(($f->attributes('expressed'))[0] eq 'yes');
+
+# test autoloading
+my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
+my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
+ok($gene3a);
+ok($gene3b);
+ok($gene3a->Is_expressed);
+ok(!$gene3b->Is_expressed);
+
+# the representation of the 3'-UTR in the two transcripts a and b is
+# different (not recommended but supported by the GFF3 spec). In the
+# first case, there are two 3'UTRs existing as independent
+# features. In the second, there is one UTR with a split location.
+ok($gene3a->Three_prime_UTR == 2);
+ok($gene3b->Three_prime_UTR == 1);
+my ($utr) = $gene3b->Three_prime_UTR;
+ok($utr->segments == 2);
+my $location = $utr->location;
+ok($location->isa('Bio::Location::Split'));
+ok($location->sub_Location == 2);
+
+# ok, test that queries are working properly.
+# find all features with the attribute "expressed"
+ at f = $db->get_features_by_attribute({expressed=>'yes'});
+ok(@f == 2);
+
+# find all top-level features on Contig3 -- there should be two
+ at f = $db->get_features_by_location(-seq_id=>'Contig3');
+ok(@f == 2);
+
+# find all top-level features on Contig3 of type 'assembly_component'
+ at f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
+ok(@f==1);
+
+# test iteration
+ at f = $db->features;
+my $feature_count = @f;
+ok($feature_count > 0);
+
+my $i = $db->get_seq_stream;
+ok($i);
+
+my $count;
+while ($i->next_seq) { $count++ }
+ok($feature_count == $count);
+
+# regression test on bug in which get_SeqFeatures('type') did not filter inline segments
+ at f = $db->get_features_by_name('agt830.3');
+ok(@f && !$f[0]->get_SeqFeatures('exon'));
+ok(@f && $f[0]->get_SeqFeatures('EST_match'));
+
+# regression test on bug in which the load_id disappeared
+ok(@f && $f[0]->load_id eq 'Match2');
+
+# regress on proper handling of multiple ID features
+my ($alignment) = $db->get_features_by_name('agt830.5');
+ok($alignment);
+ok($alignment->target->start == 1 && $alignment->target->end == 654);
+ok($alignment->get_SeqFeatures == 2);
+my $gff3 = $alignment->gff3_string(1);
+my @lines = split "\n",$gff3;
+ok (@lines == 2);
+ok ("@lines" !~ /Parent=/s);
+ok ("@lines" =~ /ID=/s);
+
+1;
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/t/BioFetch_DB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BioFetch_DB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BioFetch_DB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: BioFetch_DB.t,v 1.13 2006/08/12 11:00:02 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+use lib '.','./blib/lib';
+
+my $error;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 27;
+    plan tests => $NUMTESTS;
+    eval { require IO::String; require LWP::UserAgent; 1; };
+   if ( $@ ) {
+      warn("No LWP::UserAgent or IO::String installed\n");
+      $error = 1;
+    }
+}
+
+END { 
+    foreach ( $Test::ntest..$NUMTESTS) {
+	skip('unable to run all of the Biblio/Biofetch tests - probably no network or LWP not installed',1);
+    }
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+
+require Bio::DB::BioFetch;
+
+my $verbose = -1;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($db,$db2,$seq,$seqio);
+# get a single seq
+
+$seq = $seqio = undef;
+
+ok defined($db = new Bio::DB::BioFetch(-verbose => $verbose));
+eval {
+    # get a RefSeq entry
+    ok $db->db('refseq');
+    $seq = $db->get_Seq_by_acc('NM_006732'); # RefSeq VERSION
+    $seq ? ok 1 : exit;
+    ok $seq->accession_number;
+
+    # EMBL
+    $db->db('embl');
+    ok(defined($seq = $db->get_Seq_by_acc('J00522')));
+    ok( $seq->length, 408);
+    ok(defined($seq = $db->get_Seq_by_acc('J02231')));
+	ok $seq->id, 'J02231';
+    ok( $seq->length, 200); 
+    ok(defined($seqio = $db->get_Stream_by_id(['BUM'])));
+    undef $db; # testing to see if we can remove gb
+    ok( defined($seq = $seqio->next_seq()));
+    ok( $seq->length, 200);
+
+    #swissprot
+    ok defined($db2 = new Bio::DB::BioFetch(-db => 'swissprot'));
+    ok(defined($seq = $db2->get_Seq_by_id('YNB3_YEAST')));
+    ok( $seq->length, 125);
+    ok($seq->division, 'YEAST');
+    $db2->request_format('fasta');
+    ok(defined($seq = $db2->get_Seq_by_acc('P43780')));
+    ok($seq->length,103); 
+};
+
+if ($@) {
+    if( $DEBUG ) {
+	print STDERR "Warning: Couldn't connect to EMBL with Bio::DB::EMBL.pm!\n" . $@;
+    }
+    foreach ( $Test::ntest..$NUMTESTS) { 
+	skip('Could not open database, probably no network access',1);
+    }
+    exit(0);
+}
+
+
+$seq = $seqio = undef;
+
+eval {
+    $db = new Bio::DB::BioFetch(-retrievaltype => 'tempfile',
+				 -format => 'fasta',
+				 -verbose => $verbose
+				);
+	$db->db('embl');
+    ok( defined($seqio = $db->get_Stream_by_id('J00522 AF303112 J02231')));
+    my %seqs;
+    # don't assume anything about the order of the sequences
+    while ( my $s = $seqio->next_seq ) {
+	my ($type,$x,$name) = split(/\|/,$s->display_id);
+	$seqs{$x} = $s->length;
+    }
+    ok($seqs{'J00522'},408);
+    ok($seqs{'AF303112'},1611);
+    ok($seqs{'J02231'},200);
+};
+
+if ($@) {
+    if( $DEBUG ) { warn "Batch access test failed.\nError: $@\n"; }
+    foreach ( $Test::ntest..$NUMTESTS ) { skip('no network access skipping fasta retrieval',1); }
+    exit(0);
+}
+
+$verbose = -1;
+ok $db = new Bio::DB::BioFetch(-db => 'EMBL',
+			       -verbose => $verbose);
+eval {
+    $seq = $db->get_Seq_by_acc('NT_006732');
+};
+ok $@;
+
+eval {
+    ok $seq = $db->get_Seq_by_acc('NM_006732');
+    ok($seq );
+    ok($seq->length, 3775);
+};
+
+if ($@) {
+    if( $DEBUG ) { 
+	print STDERR "Warning: Couldn't connect to BioFetch server with Bio::DB::BioFetch.pm!\n" . $@;
+    }
+    foreach ( $Test::ntest..$NUMTESTS) { 
+	skip('Could not open database, probably no network access',1);
+    }
+    exit(0);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/BioGraphics.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BioGraphics.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BioGraphics.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,359 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: BioGraphics.t,v 1.17.4.2 2006/11/30 09:24:00 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+use File::Spec;
+use constant IMAGES => File::Spec->catfile(qw(t data biographics));
+use constant FILES => File::Spec->catfile(qw(t data biographics));
+use constant IMAGE_TESTS => 0;
+
+my $error;
+
+BEGIN { 
+    $error = 0;
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 14 + (IMAGE_TESTS ? 3 : 0);
+    plan tests => $NUMTESTS;
+
+    eval {
+        require GD;
+	require Text::Shellwords;
+    };
+    if( $@ ) {
+	print STDERR "GD or Text::Shellwords modules are not installed. This means that Bio::Graphics module is unusable. Skipping tests.\n";
+      $error = 1;
+    }
+
+    require Bio::Graphics::FeatureFile;
+    require Bio::Graphics;
+}
+
+END { 
+    foreach ( $Test::ntest..$NUMTESTS) {
+	skip('unable to run all of the Bio::Graphics tests',1);
+    }
+}
+
+exit 0 if $error;
+
+my $verbose = -1;
+my $write   = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my @images = IMAGE_TESTS ? qw(t1 t2 t3) : ();
+
+# parse command line arguments
+while (@ARGV && $ARGV[0] =~ /^--?(\w+)/) {
+  my $arg = $1;
+  if ($arg eq 'write') {
+    warn "Writing regression test images into ",IMAGES,".........\n";
+    $write++;
+  }
+  shift;
+}
+
+
+foreach (@images) {
+  if ($write) { warn "$_...\n"; do_write($_) } else { eval { do_compare($_) } }
+}
+
+my $data  = Bio::Graphics::FeatureFile->new(-file => FILES . "/feature_data.txt") or die;
+ok defined $data;
+ok $data->render == 5;
+ok $data->setting(general=>'pixels') == 750;
+ok $data->setting('general') == 4;
+ok $data->setting == 6;
+ok $data->glyph('EST') eq 'segments';
+
+my %style = $data->style('EST');
+ok $style{-connector} eq 'solid';
+ok $style{-height} == 5;
+ok $style{-bgcolor} eq 'yellow';
+
+ok $data->configured_types == 5;
+ok @{$data->features('EST')} == 5;
+
+my $thing = $data->features('EST');
+
+my ($feature) = grep {$_->name eq 'Predicted gene 1'} @{$data->features('FGENESH')};
+ok $feature;
+ok $feature->desc eq "Pfam";
+ok $feature->score == 20;
+
+sub do_write {
+  my $test = shift;
+  my $canpng = GD::Image->can('png');
+  my $output_file = IMAGES . ($canpng ? "/$test.png" : "/$test.gif");
+  my $test_sub    = $test;
+  my $panel       = eval "$test_sub()" or die "Couldn't run test: $@";
+  open OUT,">$output_file" or die "Couldn't open $output_file for writing: $!";
+  print OUT $canpng ? $panel->gd->png : $panel->gd->gif;
+  close OUT;
+}
+
+sub do_compare {
+  my $test = shift;
+  my $canpng = GD::Image->can('png');
+  my @input_files = glob(IMAGES . ($canpng ? "/$test/*.png" : "/$test/*.gif"));
+  my $test_sub    = $test;
+  my $panel       = eval "$test_sub()" or die "Couldn't run test";
+  my $ok = 0;
+  my $test_data = $canpng ? $panel->gd->png : $panel->gd->gif;
+  foreach (@input_files) {
+    my $reference_data = read_file($_);
+    if ($reference_data eq $test_data) {
+      $ok++;
+      last;
+    }
+  }
+  ok($ok);
+}
+
+sub read_file {
+  my $f = shift;
+  open F,$f or die "Can't open $f: $!";
+  binmode(F);
+  my $data = '';
+  while (read(F,$data,1024,length $data)) { 1 }
+  close F;
+  $data;
+}
+
+
+sub t1 {
+
+  my $ftr = 'Bio::Graphics::Feature';
+
+  my $segment = $ftr->new(-start=>1,-end=>1000,-name=>'ZK154',-type=>'clone');
+  my $subseg1 = $ftr->new(-start=>1,-end=>500,-name=>'seg1',-type=>'gene');
+  my $subseg2 = $ftr->new(-start=>250,-end=>500,-name=>'seg2',-type=>'gene');
+  my $subseg3 = $ftr->new(-start=>250,-end=>500,-name=>'seg3',-type=>'gene');
+  my $subseg4 = $ftr->new(-start=>1,-end=>400,-name=>'seg4',-type=>'gene');
+  my $subseg5 = $ftr->new(-start=>400,-end=>800,-name=>'seg5',-type=>'gene');
+  my $subseg6 = $ftr->new(-start=>550,-end=>800,-name=>'seg6',-type=>'gene');
+  my $subseg7 = $ftr->new(-start=>550,-end=>800,-name=>'seg7',-type=>'gene');
+  my $subseg8 = $ftr->new(-segments=>[[100,200],[300,400],[420,800]],-name=>'seg8',-type=>'gene');
+
+  my $panel = Bio::Graphics::Panel->new(
+					-grid => 1,
+					-segment => $segment,
+					-key_style => 'bottom');
+  $panel->add_track(segments=>[$subseg1,$subseg2,$subseg3,$subseg4,
+			       $subseg5,$subseg6,$subseg7,$subseg8],
+		    -bump => 1,
+		    -label => 1,
+		    -key => '+1 bumping');
+  $panel->add_track(segments=>[$subseg1,$subseg2,$subseg3,$subseg4,
+			       $subseg5,$subseg6,$subseg7,$subseg8],
+		    -bump => -1,
+		    -label => 1,
+		    -bgcolor => 'blue',
+		    -key => '-1 bumping');
+  $panel->add_track(segments=>[$subseg1,$subseg2,$subseg3,$subseg4,
+			       $subseg5,$subseg6,$subseg7,$subseg8],
+		    -bump => +2,
+		    -label => 1,
+		    -bgcolor => 'orange',
+		    -key => '+2 bumping');
+  $panel->add_track(segments=>[$subseg1,$subseg2,$subseg3,$subseg4,
+			       $subseg5,$subseg6,$subseg7,$subseg8],
+		    -bump => -2,
+		    -label => 1,
+		    -bgcolor => 'yellow',
+		    -key => '-2 bumping');
+  return $panel;
+}
+
+
+sub t2 {
+  my $ftr = 'Bio::Graphics::Feature';
+
+  my $segment = $ftr->new(-start=>-100,-end=>1000,-name=>'ZK154',-type=>'clone');
+  my $zk154_1 = $ftr->new(-start=>-50,-end=>800,-name=>'ZK154.1',-type=>'gene');
+  my $zk154_2 = $ftr->new(-start=>380,-end=>500,-name=>'ZK154.2',-type=>'gene');
+  my $zk154_3 = $ftr->new(-start=>900,-end=>1200,-name=>'ZK154.3',-type=>'gene');
+
+  my $zed_27 = $ftr->new(-segments=>[[400,500],[550,600],[800,950]],
+			 -name=>'zed-27',
+			 -subtype=>'exon',-type=>'transcript');
+  my $abc3 = $ftr->new(-segments=>[[100,200],[350,400],[500,550]],
+		       -name=>'abc53',
+		       -strand => -1,
+		       -subtype=>'exon',-type=>'transcript');
+  my $xyz4 = $ftr->new(-segments=>[[40,80],[100,120],[200,280],[300,320]],
+		       -name=>'xyz4',
+		       -subtype=>'predicted',-type=>'alignment');
+
+  my $m3 = $ftr->new(-segments=>[[20,40],[30,60],[90,270],[290,300]],
+		     -name=>'M3',
+		     -subtype=>'predicted',-type=>'alignment');
+
+  my $bigone = $ftr->new(-segments=>[[-200,-120],[90,270],[290,300]],
+			 -name=>'big one',
+			 -subtype=>'predicted',-type=>'alignment');
+
+  my $fred_12 = $ftr->new(-segments=>[$xyz4,$zed_27],
+			  -type => 'group',
+			  -name =>'fred-12');
+
+  my $confirmed_exon1 = $ftr->new(-start=>1,-stop=>20,
+				  -type=>'exon',
+				  -desc=>'confirmed',
+				  -name => 'confirmed1',
+				 );
+  my $predicted_exon1 = $ftr->new(-start=>30,-stop=>50,
+				  -type=>'exon',
+				  -name=>'predicted1',
+				  -desc=>'predicted');
+  my $predicted_exon2 = $ftr->new(-start=>60,-stop=>100,
+				  -name=>'predicted2',
+				  -type=>'exon',-desc=>'predicted');
+
+  my $confirmed_exon3 = $ftr->new(-start=>150,-stop=>190,
+				  -type=>'exon',-desc=>'confirmed',
+				  -name=>'abc123');
+  my $partial_gene = $ftr->new(-segments=>[$confirmed_exon1,$predicted_exon1,$predicted_exon2,$confirmed_exon3],
+			       -name => 'partial gene',
+			       -type => 'transcript',
+			       -desc => '(from a big annotation pipeline)'
+			    );
+  my @segments = $partial_gene->segments;
+  my $score = 10;
+  foreach (@segments) {
+    $_->score($score);
+    $score += 10;
+  }
+
+  my $panel = Bio::Graphics::Panel->new(
+					-gridcolor => 'lightcyan',
+					-grid => 1,
+					-segment => $segment,
+					-spacing => 15,
+					-width   => 600,
+					-pad_top  => 20,
+					-pad_bottom  => 20,
+					-pad_left => 20,
+					-pad_right=> 20,
+					-key_style => 'between',
+					-empty_tracks => 'suppress',
+				       );
+  my @colors = $panel->color_names();
+
+  my $t = $panel->add_track(
+			    transcript2 => [$abc3,$zed_27],
+			    -label => 1,
+			    -bump => 1,
+			    -key => 'Prophecies',
+			   );
+  $t->configure(-bump=>1);
+  $panel->add_track($segment,
+		    -glyph => 'arrow',
+		    -label => 'base pairs',
+		    -double => 1,
+		    -bump => 0,
+		    -height => 10,
+		    -arrowstyle=>'regular',
+		    -linewidth=>1,
+		    -tick => 2,
+		   );
+  $panel->unshift_track(generic => [$segment,$zk154_1,$zk154_2,$zk154_3,[$xyz4,$zed_27]],
+			-label     => sub { my $feature = shift; $feature->sub_SeqFeature>0},
+			-bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'olive' : 'red'},
+			-connector => sub { my $feature = shift;
+					    my $type = $feature->primary_tag;
+					    $type eq 'group'      ? 'dashed'
+					      : $type eq 'transcript' ? 'hat'
+						: $type eq 'alignment'  ? 'solid'
+						  : undef},
+			-all_callbacks => 1,
+			-connector_color => 'black',
+			-height => 10,
+			-bump => 1,
+			-linewidth=>2,
+			-key => 'Signs',
+			-empty_tracks => 'suppress',
+		       );
+
+  my $track = $panel->add_track(-glyph=> sub { shift->primary_tag =~ /transcript|alignment/ ? 'transcript2': 'generic'},
+				-label   => sub { $_[-1]->level == 0 } ,
+				-connector => sub { return shift->type eq 'group' ? 'dashed' : 'hat'},
+				-point  => 0,
+				-orient => 'N',
+				-height => 8,
+				-base => 1,
+				-relative_coords => 1,
+				-tick  => 2,
+				-all_callbacks => 1,
+				-bgcolor => 'red',
+				-key     => 'Dynamically Added');
+  $track->add_feature($bigone,$zed_27,$abc3);
+  $track->add_group($predicted_exon1,$predicted_exon2,$confirmed_exon3);
+
+  $panel->add_track(
+		    [$abc3,$zed_27,$partial_gene],
+		    -bgcolor   => sub { shift->source_tag eq 'predicted' ? 'green' : 'blue'},
+		    -glyph   => 'transcript',
+		    -label       => sub { shift->sub_SeqFeature > 0 },
+		    -description => sub {
+		      my $feature = shift;
+		      return 1   if $feature->primary_tag eq 'transcript';
+		      return '*' if $feature->source_tag eq 'predicted';
+		      return;
+		    },
+		    -font2color  => 'red',
+		    -bump => +1,
+		    -key => 'Portents',
+		   );
+  $panel->add_track(segments => [$segment,$zk154_1,[$zk154_2,$xyz4]],
+		    -label     => 1,
+		    -bgcolor   => sub { shift->primary_tag eq 'predicted' ? 'green' : 'blue'},
+		    -connector => sub { my $primary_tag = shift->primary_tag;
+					$primary_tag eq 'transcript' ? 'hat'
+					  : $primary_tag eq 'alignment'  ? 'solid'
+					    : undef},
+		    -connector_color => 'black',
+		    -height => 10,
+		    -bump => 1,
+		    -key => 'Signals',
+		   );
+  $panel->add_track(generic => [],
+		    -key => 'Empty');
+
+  $panel->add_track(graded_segments => $partial_gene,
+		    -bgcolor =>'blue',
+		    -vary_fg => 1,
+		    -label   => 1,
+		    -key     => 'Scored thing');
+
+  $panel->add_track(diamond => [$segment,$zk154_1,$zk154_2,$zk154_3,$xyz4,$zed_27],
+		    -bgcolor =>'blue',
+		    -label   => 1,
+		    -key     => 'pointy thing');
+  return $panel;
+}
+
+sub t3 {
+  my $data  = Bio::Graphics::FeatureFile->new(-file => FILES . "/feature_data.txt") or die;
+  my ($tracks,$panel) = $data->render;
+  return $panel;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/BlastIndex.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/BlastIndex.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/BlastIndex.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: BlastIndex.t,v 1.6 2003/05/19 16:27:06 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+my $error;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    $NUMTESTS = 13;
+    plan tests => $NUMTESTS;
+    eval { require 'IO/String.pm' };
+    if( $@ ) {
+        for( $Test::ntest..$NUMTESTS ) {
+            skip("IO::String not installed. This means the Bio::Index::Blast modules are not usable. Skipping tests",1);
+        }
+       $error = 1;
+    }
+}
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require Bio::SearchIO;
+require Bio::Index::Blast;
+require Bio::Root::IO;
+
+use Cwd;
+END {  unlink qw( Wibbl Wibbl.pag Wibbl.dir ); }
+
+ok(1);
+
+my $index = new Bio::Index::Blast(-filename => 'Wibbl',
+				  -write_flag => 1);
+ok($index);
+
+$index->make_index(Bio::Root::IO->catfile(cwd,"t","data","multi_blast.bls"));
+($index->dbm_package eq 'SDBM_File') ? 
+    (ok(-e "Wibbl.pag" && -e "Wibbl.dir")) :
+    (ok(-e "Wibbl"));
+
+foreach my $id ( qw(CATH_RAT PAPA_CARPA) ) {
+    my $fh = $index->get_stream($id);
+    ok($fh);
+    ok( ! eof($fh) );
+    my $report = new Bio::SearchIO(-noclose => 1,
+				   -format  => 'blast',
+				   -fh      => $fh);
+    my $result = $report->next_result;
+    ok($result->query_name, qr/$id/);
+    ok( $result->next_hit);
+    
+    ok( $index->fetch_report($id)->query_name, qr/$id/);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Chain.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Chain.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Chain.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Chain.t,v 1.7 2001/01/25 22:13:40 jason Exp $
+# Created: Wed Dec 13 15:52:33 GMT 2000
+# By Joseph A.L. Insana, <insana at ebi.ac.uk>
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 45;
+}
+
+use Bio::LiveSeq::Chain;
+
+ok(1);
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my $chain = Bio::LiveSeq::Chain::string2chain("abcdefghijklmnopqrstuvwxyz");
+ok defined $chain;
+ok( Bio::LiveSeq::Chain::down_chain2string($chain), 
+    "abcdefghijklmnopqrstuvwxyz");
+ok( Bio::LiveSeq::Chain::down_chain2string($chain,undef,4),
+    "abcd"); # default start=1
+my ($warning,$output);
+eval {
+  local $SIG{__WARN__}=sub{ $warning=$_[0]};
+  $output=Bio::LiveSeq::Chain::down_chain2string($chain,1,4,6);
+};
+ok (((index($warning,"Warning chain2string: argument LAST:6 overriding LEN:4!")==0)&&($output eq "abcdef")),1);
+my $arrayref=Bio::LiveSeq::Chain::down_labels($chain,1,4);
+ok $arrayref->[1], 2;
+$arrayref=Bio::LiveSeq::Chain::up_labels($chain,4,1);
+ok $arrayref->[1], 3;
+$arrayref=Bio::LiveSeq::Chain::up_labels($chain);
+ok scalar(@{$arrayref}), 26; # total number of labels should be 26
+ok Bio::LiveSeq::Chain::start($chain), '1';
+ok Bio::LiveSeq::Chain::end($chain), '26';
+ok Bio::LiveSeq::Chain::label_exists($chain,'4');
+ok Bio::LiveSeq::Chain::label_exists($chain,'28'), '0';
+ok Bio::LiveSeq::Chain::down_get_pos_of_label($chain,4), '4';
+ok Bio::LiveSeq::Chain::down_get_pos_of_label($chain,4,4), '1';
+ok Bio::LiveSeq::Chain::up_get_pos_of_label($chain,26,1), '1';
+ok Bio::LiveSeq::Chain::down_subchain_length($chain,1,4), '4';
+ok Bio::LiveSeq::Chain::up_subchain_length($chain,4,1), '4';
+ok Bio::LiveSeq::Chain::invert_chain($chain);
+ok Bio::LiveSeq::Chain::invert_chain($chain);
+ok Bio::LiveSeq::Chain::down_get_value_at_pos($chain,4), 'd';
+ok Bio::LiveSeq::Chain::down_get_value_at_pos($chain,1,4), 'd';
+ok Bio::LiveSeq::Chain::up_get_value_at_pos($chain,4), 'w';
+
+ok Bio::LiveSeq::Chain::up_set_value_at_pos($chain,'W',4);
+ok Bio::LiveSeq::Chain::up_get_value_at_pos($chain,4), 'W';
+
+ok Bio::LiveSeq::Chain::down_set_value_at_pos($chain,'D',4); 
+ok Bio::LiveSeq::Chain::down_get_value_at_pos($chain,4), 'D';
+
+ok Bio::LiveSeq::Chain::set_value_at_label($chain,'d',4);
+ok Bio::LiveSeq::Chain::get_value_at_label($chain,4), 'd';
+
+ok Bio::LiveSeq::Chain::down_get_label_at_pos($chain,1,4), '4';
+ok Bio::LiveSeq::Chain::up_get_label_at_pos($chain,4), '23';
+ok Bio::LiveSeq::Chain::is_downstream($chain,3,4);
+ok Bio::LiveSeq::Chain::is_downstream($chain,4,3), '0';
+ok Bio::LiveSeq::Chain::is_upstream($chain,4,3);
+ok Bio::LiveSeq::Chain::is_upstream($chain,3,4), '0';
+ok Bio::LiveSeq::Chain::splice_chain($chain,4,2), 'de';
+ok Bio::LiveSeq::Chain::splice_chain($chain,7,undef,9), 'ghi';
+
+my @array=Bio::LiveSeq::Chain::praeinsert_string($chain,"ghi",10);
+ok $array[0],27;
+ok $array[1],29;
+
+ at array=Bio::LiveSeq::Chain::postinsert_string($chain,"de",3);
+ok $array[0], 30;
+ok $array[1], 31;
+ok Bio::LiveSeq::Chain::up_chain2string($chain), "zyxWvutsrqponmlkjihgfedcba";
+
+ at array=Bio::LiveSeq::Chain::check_chain($chain);
+ok $array[0], 1;
+ok $array[1], 1;
+ok $array[2], 1;
+ok $array[3], 1;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/ClusterIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ClusterIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ClusterIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: ClusterIO.t,v 1.5 2006/06/08 08:44:26 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+
+use vars qw($SKIPXML $LASTXMLTEST); 
+use strict;
+use lib '.';
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use vars qw($NTESTS);
+    $NTESTS = 10;
+    $LASTXMLTEST = 8;
+    $error = 0;
+
+    use Test;
+    plan tests => $NTESTS; 
+
+    eval { require XML::Parser::PerlSAX; };
+    if( $@ ) {
+	$SKIPXML = 1;
+	print STDERR "XML::Parser::PerlSAX not loaded. This means ClusterIO::dbsnp test cannot be executed. Skipping\n";
+	foreach ( $Test::ntest..$LASTXMLTEST ) {
+	    skip('No XML::Parser::PerlSAX loaded',1);
+	}
+    }
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use Bio::ClusterIO;
+use Bio::Root::IO;
+use Bio::Cluster::ClusterFactory;
+
+my ($clusterio, $result,$hit,$hsp);
+if( ! $SKIPXML ) {
+	$clusterio = new Bio::ClusterIO ('-tempfile' => 0,
+					'-format' => 'dbsnp',
+					'-file'   => Bio::Root::IO->catfile('t','data','LittleChrY.dbsnp.xml'));
+    
+	$result = $clusterio->next_cluster;
+	ok($result);    
+	ok($result->observed eq 'C/T');
+	ok($result->type eq 'notwithdrawn');
+	ok($result->seq_5);
+	ok($result->seq_3);
+	my @ss = $result->each_subsnp;
+	ok scalar @ss,  5;
+	ok($ss[0]->handle eq 'CGAP-GAI');
+	ok($ss[1]->handle eq 'LEE');
+#	ok($result->heterozygous == 0.208738461136818);
+#	ok($result->heterozygous_SE == 0.0260274689436777);
+}
+
+###################################
+# ClusterFactory tests            #
+###################################
+
+my $fact = Bio::Cluster::ClusterFactory->new();
+# auto-recognize implementation class
+my $clu = $fact->create_object(-display_id => 'Hs.2');
+ok $clu->isa("Bio::Cluster::UniGeneI");
+$clu = $fact->create_object(-namespace => "UNIGENE");
+ok $clu->isa("Bio::Cluster::UniGeneI");

Added: trunk/packages/bioperl/branches/upstream/current/t/Coalescent.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Coalescent.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Coalescent.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,73 @@
+# -*-Perl-*-
+# $Id: Coalescent.t,v 1.5 2003/12/16 01:12:34 jason Exp $
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 11;
+}
+
+use Bio::PopGen::Simulation::Coalescent;
+use Bio::PopGen::Statistics;
+use Bio::TreeIO;
+ok(1);
+
+use vars qw($FILE1);
+$FILE1 = 'out.tre';
+END { unlink $FILE1; }
+ 
+my $ssize = 5;
+my $sim = new Bio::PopGen::Simulation::Coalescent(-sample_size => $ssize);
+my $stats = Bio::PopGen::Statistics->new();
+my $tree = $sim->next_tree;
+
+ok($tree->get_nodes, ($ssize * 2 - 1));
+
+my $treeio = new Bio::TreeIO(-format => 'newick', -file => ">$FILE1");
+
+$treeio->write_tree($tree);
+undef $treeio;
+
+ok(-s $FILE1);
+my $mutcount = 100;
+$sim->add_Mutations($tree, $mutcount);
+
+my $leaves = [$tree->get_leaf_nodes];
+# $stats->verbose(1);
+my $pi = $stats->pi($leaves);
+ok( $pi > 0 , 1, 'pi');
+
+
+# theta is num seg sites / sum(1/(numsams-1))
+my $theta = $stats->theta(scalar @$leaves, $mutcount);
+ok($theta,48, 'theta');
+
+my $tD = Bio::PopGen::Statistics->tajima_D($leaves);
+ok(defined $tD,1, 'tajimaD');
+
+my $seg_sites = Bio::PopGen::Statistics->segregating_sites_count($leaves);
+ok($seg_sites,$mutcount,
+   'all the mutations should be polymorphic (by definition)');
+my $single = Bio::PopGen::Statistics->singleton_count($leaves);
+my $flD = Bio::PopGen::Statistics->fu_and_li_D($leaves,$single);
+ok(defined $flD,1,'fu and li D');
+
+my $flD_star = $stats->fu_and_li_D_star($leaves);
+ok(defined $flD_star,1,'fu and li D*');
+
+my $flF = $stats->fu_and_li_F($leaves,$single);
+ok(defined $flF, 1,'fu and li F');
+
+my $flFstar = $stats->fu_and_li_F_star($leaves);
+ok(defined $flF, 1,'fu and li F');

Added: trunk/packages/bioperl/branches/upstream/current/t/CodonTable.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/CodonTable.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/CodonTable.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: CodonTable.t,v 1.20 2006/08/16 21:07:01 cjfields Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use lib './';
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 51;
+}
+use Bio::Tools::CodonTable;
+use vars qw($DEBUG);
+ok(1);
+
+# create a table object by giving an ID
+$DEBUG = 0;
+my $myCodonTable = Bio::Tools::CodonTable -> new ( -id => 16);
+ok defined $myCodonTable;
+ok $myCodonTable->isa('Bio::Tools::CodonTable');
+
+# defaults to ID 1 "Standard"
+$myCodonTable = Bio::Tools::CodonTable->new();
+ok $myCodonTable->id(), 1;
+
+# change codon table
+$myCodonTable->id(10);
+ok $myCodonTable->id, 10;
+ok $myCodonTable->name(), 'Euplotid Nuclear';
+
+# enumerate tables as object method
+my $table = $myCodonTable->tables();
+ok (keys %{$table} >= 17); # currently 17 known tables
+ok $table->{11}, q{"Bacterial"};
+
+# enumerate tables as class method
+$table = Bio::Tools::CodonTable->tables;
+ok (values %{$table} >= 17); # currently 17 known tables
+ok $table->{23}, 'Thraustochytrium Mitochondrial';
+
+# translate codons
+$myCodonTable->id(1);
+
+eval {
+    $myCodonTable->translate();
+};
+ok ($@ =~ /EX/) ;
+
+ok $myCodonTable->translate(''), '';
+
+my @ii  = qw(ACT acu ATN gt ytr sar);
+my @res = qw(T   T   X   V  L   Z  );
+my $test = 1;
+for my $i (0..$#ii) {
+    if ($res[$i] ne $myCodonTable->translate($ii[$i]) ) {
+	$test = 0; 
+	print $ii[$i], ": |", $res[$i], "| ne |", $myCodonTable->translate($ii[$i]), "|\n" if( $DEBUG);
+	last ;
+    }
+}
+ok ($test);
+ok $myCodonTable->translate('ag'), '';
+ok $myCodonTable->translate('jj'), '';
+ok $myCodonTable->translate('jjg'), 'X';
+ok $myCodonTable->translate('gt'), 'V'; 
+ok $myCodonTable->translate('g'), '';
+
+# a more comprehensive test on ambiguous codes
+my $seq = <<SEQ;
+atgaaraayacmacracwackacyacsacvachacdacbacxagragyatmatwatyathcarcayc
+cmccrccwcckccyccsccvcchccdccbccxcgmcgrcgwcgkcgycgscgvcghcgdcgbcgxctmctrct
+wctkctyctsctvcthctdctbctxgargaygcmgcrgcwgckgcygcsgcvgchgcdgcbgcxggmggrggw
+ggkggyggsggvgghggdggbggxgtmgtrgtwgtkgtygtsgtvgthgtdgtbgtxtartaytcmtcrtcwt
+cktcytcstcvtchtcdtcbtcxtgyttrttytramgamggmgrracratrayytaytgytrsaasagsartaa;
+SEQ
+    $seq =~ s/\s+//g;
+ at ii = grep { length == 3 } split /(.{3})/, $seq; 
+print join (' ', @ii), "\n" if( $DEBUG);
+my $prot = <<PROT;
+MKNTTTTTTTTTTTRSIIIIQHPPPPPPPPPPPRRRRRRRRRRRLLLLLLLLLLLEDAAAAAAAAAAAGGG
+GGGGGGGGVVVVVVVVVVV*YSSSSSSSSSSSCLF*RRRBBBLLLZZZ*
+PROT
+
+    $prot =~ s/\s//;
+ at res = split //, $prot;
+print join (' ', @res), "\n" if( $DEBUG );
+$test = 1;
+for my $i (0..$#ii) {
+    if ($res[$i] ne $myCodonTable->translate($ii[$i]) ) {
+	$test = 0; 
+	print $ii[$i], ": |", $res[$i], "| ne |", 
+	  $myCodonTable->translate($ii[$i]),  "| @ $i\n" if( $DEBUG);
+	last ;
+    }
+}
+ok $test;
+
+# reverse translate amino acids 
+
+ok $myCodonTable->revtranslate('U'), 0;
+ok $myCodonTable->revtranslate('O'), 0;
+ok $myCodonTable->revtranslate('J'), 9;
+ok $myCodonTable->revtranslate('I'), 3;
+
+ at ii = qw(A l ACN Thr sER ter Glx);
+ at res = (
+	[qw(gct gcc gca gcg)],
+	[qw(ggc gga ggg act acc aca acg)],
+	[qw(tct tcc tca tcg agt agc)],
+	[qw(act acc aca acg)],
+	[qw(tct tcc tca tcg agt agc)],
+	[qw(taa tag tga)],
+	[qw(gaa gag caa cag)]
+	);
+
+$test = 1;
+ TESTING: {
+     for my $i (0..$#ii) {
+	 my @codonres = $myCodonTable->revtranslate($ii[$i]);
+	 for my $j (0..$#codonres) {
+	     if ($codonres[$j] ne $res[$i][$j]) {
+		 $test = 0;
+		 print $ii[$i], ': ', $codonres[$j], " ne ", 
+		 $res[$i][$j], "\n" if( $DEBUG);
+		 last TESTING;
+	     }
+	 }
+     }
+ }
+ok $test;
+
+#  boolean tests
+$myCodonTable->id(1);
+
+ok $myCodonTable->is_start_codon('ATG');  
+ok $myCodonTable->is_start_codon('GGH'), 0;
+ok $myCodonTable->is_start_codon('HTG');
+ok $myCodonTable->is_start_codon('CCC'), 0;
+
+ok $myCodonTable->is_ter_codon('UAG');
+ok $myCodonTable->is_ter_codon('TaG');
+ok $myCodonTable->is_ter_codon('TaR');
+ok $myCodonTable->is_ter_codon('tRa');
+ok $myCodonTable->is_ter_codon('ttA'), 0;
+
+ok $myCodonTable->is_unknown_codon('jAG');
+ok $myCodonTable->is_unknown_codon('jg');
+ok $myCodonTable->is_unknown_codon('UAG'), 0;
+
+ok $myCodonTable->translate_strict('ATG'), 'M';
+
+
+
+#
+# adding a custom codon table
+#
+
+
+my @custom_table =
+    ( 'test1',
+      'FFLLSSSSYY**CC*WLLLL**PPHHQQR*RRIIIMT*TT*NKKSSRRV*VVAA*ADDEE*GGG'
+    );
+
+ok my $custct = $myCodonTable->add_table(@custom_table);
+ok $custct, 24;
+ok $myCodonTable->translate('atgaaraayacmacracwacka'), 'MKNTTTT';
+ok $myCodonTable->id($custct);
+ok $myCodonTable->translate('atgaaraayacmacracwacka'), 'MKXXTTT';
+
+# test doing this via Bio::PrimarySeq object
+
+use Bio::PrimarySeq;
+ok $seq = Bio::PrimarySeq->new(-seq=>'atgaaraayacmacracwacka', -alphabet=>'dna');
+ok $seq->translate()->seq, 'MKNTTTT';
+ok $seq->translate(undef, undef, undef, undef, undef, undef, $myCodonTable)->seq, 'MKXXTTT';
+
+# test gapped translated
+
+ok $seq = Bio::PrimarySeq->new(-seq      => 'atg---aar------aay',
+			                   -alphabet => 'dna');
+ok $seq->translate->seq, 'M-K--N';
+
+ok $seq = Bio::PrimarySeq->new(-seq=>'ASDFGHKL');
+ok $myCodonTable->reverse_translate_all($seq), 'GCBWSNGAYTTYGGVCAYAARYTN';
+ok $seq = Bio::PrimarySeq->new(-seq=>'ASXFHKL');
+ok $myCodonTable->reverse_translate_all($seq), 'GCBWSNNNNTTYCAYAARYTN';

Added: trunk/packages/bioperl/branches/upstream/current/t/Compatible.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Compatible.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Compatible.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+# -*-Perl-*-
+# $Id: Compatible.t,v 1.4 2006/06/16 04:41:47 lapp Exp $
+# Bioperl Test Harness Script for Modules
+#
+
+my $error;
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN { 
+  # to handle systems with no installed Test module
+  # we include the t dir (where a copy of Test.pm is located)
+  # as a fallback
+  $error = 0; 
+  eval { require Test; };
+  if( $@ ) {
+    use lib 't';
+  }
+  use Test;
+  use vars qw($TESTCOUNT);
+  $TESTCOUNT = 3;
+  plan tests => $TESTCOUNT;
+
+  eval {
+	  require Set::Scalar;
+  };
+  if( $@ ) {
+	  $error = 1;
+	  warn("No Set::Scalar. Unable to test Bio::Tree::Compatible\n");
+  }
+}
+
+END {
+   foreach ( $Test::ntest..$TESTCOUNT) {
+      skip('No Set::Scalar: unable to run tests',1);
+   }
+}
+
+exit if $error;
+
+# we have to protect Bio::Tree::Compatible from being compiled because
+# Set::Scalar may not be installed.
+eval { require Bio::Tree::Compatible; };
+die "failed to load Bio::Tree::Compatible: $@\n" if $@;
+
+use Bio::TreeIO;
+my $verbose = 0;
+
+my $in = new Bio::TreeIO(-format => 'newick',
+								 -fh     => \*DATA);
+
+# the common labels of (((A,B)C,D),(E,F,G)); and ((A,B)H,E,(J,(K)G)I);
+# are [A,B,E,G]
+
+my $t1 = $in->next_tree;
+my $t2 = $in->next_tree;
+my $common = Bio::Tree::Compatible::common_labels($t1,$t2);
+my $labels = Set::Scalar->new(qw(A B E G));
+ok($common->is_equal($labels));
+
+# the topological restrictions of (((A,B)C,D),(E,F,G)); and
+# ((A,B)H,E,(J,(K)G)I); to their common labels, [A,B,E,G], are,
+# respectively, ((A,B),(E,G)); and ((A,B),E,(G));
+
+Bio::Tree::Compatible::topological_restriction($t1,$common);
+Bio::Tree::Compatible::topological_restriction($t2,$common);
+my $t3 = $in->next_tree;
+my $t4 = $in->next_tree;
+# ok($t1->is_equal($t3)); # is_equal method missing in Bio::Tree::Tree
+# ok($t2->is_equal($t4)); # is_equal method missing in Bio::Tree::Tree
+
+# the topological restrictions of (((A,B)C,D),(E,F,G)); and
+# ((A,B)H,E,(J,(K)G)I); to their common labels, [A,B,E,G], are
+# compatible
+
+my ($incompat, $ilabels, $inodes) = Bio::Tree::Compatible::is_compatible($t3,$t4);
+ok(!$incompat);
+
+# (((B,A),C),D); and ((A,(D,B)),C); are incompatible
+
+my $t5 = $in->next_tree;
+my $t6 = $in->next_tree;
+($incompat, $ilabels, $inodes) = Bio::Tree::Compatible::is_compatible($t5,$t6);
+ok($incompat);
+
+__DATA__
+(((A,B)C,D),(E,F,G));
+((A,B)H,E,(J,(K)G)I);
+((A,B),(E,G));
+((A,B),E,(G));
+(((B,A),C),D);
+((A,(D,B)),C);


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/Compatible.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/CoordinateGraph.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/CoordinateGraph.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/CoordinateGraph.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: CoordinateGraph.t,v 1.1 2002/10/29 13:52:23 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 7;
+}
+
+
+use Bio::Coordinate::Graph;
+
+ok(1);
+
+ok my $graph = new Bio::Coordinate::Graph;
+
+# graph structure
+my $dag = {
+	   9  => [],
+	   8  => [9],
+	   7  => [],
+	   6  => [7, 8],
+	   5  => [],
+	   4  => [5],
+	   3  => [6],
+	   2  => [3, 4, 6],
+	   1  => [2]
+	  };
+
+ok $graph->hash_of_arrays($dag);
+
+
+my $a = 1;
+my $b = 6;
+ok my @a = $graph->shortest_path($a, $b), 3;
+#print join (", ", @a), "\n";
+
+$a = 7;
+$b = 8;
+ok @a = $graph->shortest_path($a, $b), 1;
+
+
+$a = 8;
+$b = 9;
+ok @a = $graph->shortest_path($a, $b), 2;
+$b = 2;
+ok @a = $graph->shortest_path($a, $b), 3;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/CoordinateMapper.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/CoordinateMapper.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/CoordinateMapper.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,703 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: CoordinateMapper.t,v 1.13 2003/10/16 16:34:19 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 170;
+}
+
+use Bio::Location::Simple;
+use Bio::Coordinate::Pair;
+use Bio::Coordinate::Result;
+use Bio::Coordinate::Result::Match;
+use Bio::Coordinate::Result::Gap;
+use Bio::Coordinate::Chain;
+use Bio::Coordinate::Collection;
+
+use vars qw($DEBUG);
+ok(1);
+
+my ($c, $value);
+
+ok $c = Bio::Coordinate::Result::Match-> new;
+ok $c = Bio::Coordinate::Result::Gap-> new;
+
+# propepide
+my $match1 = Bio::Location::Simple->new 
+    (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
+# peptide
+my $match2 = Bio::Location::Simple->new
+    (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
+
+ok my $pair = Bio::Coordinate::Pair->new(-in => $match1,
+					 -out => $match2,
+					 -negative => 0, # false, default
+					);
+
+ok $pair->test;
+ok $pair->strand(), 1; #  = in->strand * out->strand
+ok $pair->in->seq_id(), 'propeptide';
+
+
+my ($count, $pos, $pos2, $res, $match, $res2);
+
+
+#
+# match within
+#
+$pos = Bio::Location::Simple->new 
+    (-start => 25, -end => 25, -strand=> -1 );
+
+# results are in Bio::Coordinate::Result
+# they can be Matches and Gaps; are  Bio::LocationIs
+ok $res = $pair->map($pos);
+ok $res->isa('Bio::Coordinate::Result');
+ok $res->isa('Bio::Location::SplitLocationI');
+ok $res->each_match, 1;
+ok $res->each_gap, 0;
+ok $res->each_Location, 1;
+
+ok $res->match->isa('Bio::LocationI');
+ok $res->match->isa('Bio::Coordinate::Result::Match');
+ok $res->match->start, 5;
+ok $res->match->end, 5;
+ok $res->match->strand, -1;
+ok $res->match->seq_id, 'peptide';
+ok $res->start, 5;
+ok $res->end, 5;
+ok $res->strand, -1;
+#ok $res->seq_id, 'peptide';
+
+# lets do the reverse
+$match = $res->match;
+ok $pair->swap;
+$res2 = $pair->map($match);
+ok $res2->match->start, $pos->start;
+ok $res2->match->end, $pos->end;
+ok $res2->match->strand, $pos->strand;
+ok $res2->match->seq_id, $pair->out->seq_id;
+ok $pair->swap;
+
+#
+# match outside = Gap
+#
+$pos = Bio::Location::Simple->new (-start => 5, -end => 5 );
+
+ok $res = $pair->map($pos);
+#$res->verbose(2);
+ok $res->each_Location, 1;
+ok $res->each_gap, 1;
+
+ok $res->gap->isa('Bio::Coordinate::Result::Gap');
+ok $res->gap->isa('Bio::LocationI');
+ok $res->gap->strand, 1;
+ok $res->gap->start, 5;
+ok $res->gap->length, $pos->length;
+ok $res->gap->seq_id, 'propeptide';
+
+
+#
+# partial match = gap & match
+#
+$pos2 = Bio::Location::Simple->new
+    (-start => 20, -end => 22, -strand=> -1 );
+
+ok $res = $pair->map($pos2);
+
+ok $res->each_match, 1;
+ok $res->each_gap, 1;
+ok $res->each_Location, 2;
+ok $res->match->length + $res->gap->length, $pos2->length;
+
+ok $res->match->start, 1;
+ok $res->match->end, 2;
+ok $res->match->seq_id, 'peptide';
+ok $res->match->strand, -1;
+ok $res->gap->start, 20;
+ok $res->gap->end, 20;
+ok $res->gap->seq_id, 'propeptide';
+ok $res->gap->strand, -1;
+
+#
+# partial match =  match & gap
+#
+$pos2 = Bio::Location::Simple->new (-start => 40, -end => 41, -strand=> 1 );
+ok $res = $pair->map($pos2);
+ok $res->match->length + $res->gap->length, $pos2->length;
+
+#
+#enveloping
+#
+$pos2 = Bio::Location::Simple->new (-start => 19, -end => 41, -strand=> 1 );
+ok $res = $pair->map($pos2);
+$count = 0; map {$count += $_->length} $res->each_Location;
+ok $count, $pos2->length;
+
+
+
+
+#
+# Testing insertions
+#
+#out
+$pos = Bio::Location::Simple->new (-start => 5, -end => 6, -location_type=>'^');
+$res = $pair->map($pos);
+ok $res->each_gap, 1;
+ok $res->each_Location, 1;
+
+#in
+$pos = Bio::Location::Simple->new (-start => 21, -end => 22, -location_type=>'^');
+$res = $pair->map($pos);
+ok $res->each_match, 1;
+ok $res->each_Location, 1;
+
+#just before
+$pos = Bio::Location::Simple->new (-start => 20, -end => 21, -location_type=>'^');
+$res = $pair->map($pos);
+ok $res->each_gap, 1;
+ok $res->each_Location, 1;
+
+#just after
+$pos = Bio::Location::Simple->new (-start => 40, -end => 41, -location_type=>'^');
+$res = $pair->map($pos);
+ok $res->each_gap, 1;
+ok $res->each_Location, 1;
+
+#
+# strandness
+#
+#   11   6 4 2
+#  -|--------|-
+#  -|--------|-
+#   2    7 9 11
+#
+
+# from
+$match1 = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 2, -end => 11, -strand=>1 );
+# to
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'to', -start => 2, -end => 11, -strand=>-1 );
+$pair = Bio::Coordinate::Pair->new(-in => $match1,
+				   -out => $match2
+				  );
+#
+# match within
+#
+
+ok $pair->test;
+ok $pair->strand(), -1;
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 7, -end => 9, -strand=>1 );
+$res = $pair->map($pos);
+ok $res->match->start, 4;
+ok $res->match->end, 6;
+ok $res->match->strand, -1;
+
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 3, -end => 10, -strand=>-1 );
+$res = $pair->map($pos);
+ok $res->match->start, 3;
+ok $res->match->end, 10;
+ok $res->match->strand, 1;
+
+#
+# match outside = Gap
+#
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 1, -end => 1, -strand=>1 );
+$res = $pair->map($pos);
+ok $res->gap->start, 1;
+ok $res->gap->end, 1;
+ok $res->gap->strand, 1;
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 12, -end => 12, -strand=>-1 );
+$res = $pair->map($pos);
+ok $res->gap->start, 12;
+ok $res->gap->end, 12;
+ok $res->gap->strand, -1;
+
+
+#
+# partial match1 = gap & match
+#
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 1, -end => 7, -strand=>-1 );
+$res = $pair->map($pos);
+ok $res->gap->start, 1;
+ok $res->gap->end, 1;
+ok $res->gap->strand, -1;
+ok $res->match->start, 6;
+ok $res->match->end, 11;
+ok $res->match->strand, 1;
+
+#
+# partial match2 =  match & gap 
+#
+
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 9, -end => 12, -strand=>-1 );
+$res = $pair->map($pos);
+ok $res->match->start, 2;
+ok $res->match->end, 4;
+ok $res->match->strand, 1;
+ok $res->gap->start, 12;
+ok $res->gap->end, 12;
+ok $res->gap->strand, -1;
+
+#
+#enveloping
+#
+
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 1, -end => 12, -strand=>-1 );
+$res = $pair->map($pos);
+ok $res->match->start, 2;
+ok $res->match->end, 11;
+ok $res->match->strand, 1;
+
+my ($gap1, $gap2) = $res->each_gap;
+ok $gap1->start, 1;
+ok $gap1->end, 1;
+ok $gap1->strand, -1;
+ok $gap2->start, 12;
+ok $gap2->end, 12;
+ok $gap2->strand, -1;
+
+#
+# Chain
+#
+# chain (two) mappers together
+#
+
+# propepide
+$match1 = Bio::Location::Simple->new 
+    (-seq_id => 'propeptide', -start => 5, -end => 40, -strand=>1 );
+# peptide
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'peptide', -start => 1, -end => 36, -strand=>1 );
+
+ok $pair = Bio::Coordinate::Pair->new(-in => $match1,
+					 -out => $match2
+					);
+
+
+ok my $chain = Bio::Coordinate::Chain->new;
+ok $chain->add_mapper($pair);
+$chain->add_mapper($pair);
+
+
+$pos = Bio::Location::Simple->new
+    (-seq_id => 'from', -start => 6, -end => 21, -strand=> 1 );
+
+#  6 ->  2 ->  1
+# 21 -> 17 -> 13
+$match = $chain->map($pos);
+ok $match->isa('Bio::Coordinate::Result::Match');
+ok $match->start, 1;
+ok $match->end, 13;
+ok $match->strand, 1;
+
+
+
+#
+# Collection
+#
+#         1   5     6   10
+#         |---|     |---|
+#-----|-----------------------
+#     1   5   9     15  19
+#         pair1     pair2
+
+# gene
+$match1 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 5, -end => 9, -strand=>1 );
+# exon2
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'exon1', -start => 1, -end => 5, -strand=>1 );
+
+ok my $pair1 = Bio::Coordinate::Pair->new(-in => $match1,
+					  -out => $match2,
+					);
+# gene
+my $match3 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 15, -end => 19, -strand=>1 );
+# exon
+my $match4 = Bio::Location::Simple->new
+    (-seq_id => 'exon2', -start => 6, -end => 10, -strand=>1 );
+
+ok my $pair2 = Bio::Coordinate::Pair->new(-in => $match3,
+					  -out => $match4,
+					 );
+
+ok my $transcribe = Bio::Coordinate::Collection->new;
+ok $transcribe->add_mapper($pair1);
+ok $transcribe->add_mapper($pair2);
+
+
+# simple match
+$pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
+ok $res = $transcribe->map($pos);
+ok $res->match->start, 1;
+ok $res->match->end, 5;
+ok $res->match->seq_id, 'exon1';
+
+# flank pre
+$pos = Bio::Location::Simple->new (-start => 2, -end => 9 );
+ok $res = $transcribe->map($pos);
+ok $res->each_gap, 1;
+ok $res->each_match, 1;
+ok $res->match->start, 1;
+ok $res->match->end, 5;
+
+# flank post
+$pos = Bio::Location::Simple->new (-start => 5, -end => 12 );
+ok $res = $transcribe->map($pos);
+ok $res->each_gap, 1;
+ok $res->each_match, 1;
+ok $res->match->start, 1;
+ok $res->match->end, 5;
+
+# match more than two
+$pos = Bio::Location::Simple->new (-start => 5, -end => 19 );
+ok $res = $transcribe->map($pos);
+ok $res->each_gap, 2;
+ok $res->each_match, 2;
+
+
+
+# testing sorting
+#
+#         1   5     6   10    11  15
+#         |---|     |---|     |---|
+#-----|-----------------------|---|--
+#     1   5   9     15  19    25  29
+#         pair1     pair2     pair3
+#
+#
+# create the third pair
+# gene
+my $match5 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 25, -end => 29, -strand=>1 );
+# exon
+my $match6 = Bio::Location::Simple->new
+    (-seq_id => 'exon3', -start => 11, -end => 15, -strand=>1 );
+
+my $pair3 = Bio::Coordinate::Pair->new(-in => $match5,
+				       -out => $match6
+				      );
+
+# create a new collection in wrong order
+$transcribe = Bio::Coordinate::Collection->new;
+$transcribe->add_mapper($pair3);
+$transcribe->add_mapper($pair1);
+$transcribe->add_mapper($pair2);
+ok $transcribe->sort;
+my @res;
+map {push @res, $_->in->start } $transcribe->each_mapper;
+ok compare_arrays ([5, 15, 25], \@res);
+
+
+#
+# Test using genomic data
+#
+
+my $mapper = Bio::Coordinate::Collection->new;
+
+load_data($mapper, undef );
+
+# transform a segment entirely within the first rawcontig
+#test_transform ($mapper,
+#		[627012, 2, 5, -1, "rawcontig"],
+#		["chr1", 2, 5, -1]);
+$pos = Bio::Location::Simple->new (-start => 2, -end => 5, -strand => -1);
+$res = $mapper->map($pos);
+ok $res->match->start, 2;
+ok $res->match->end, 5;
+ok $res->match->strand, -1;
+ok $res->match->seq_id, '627012';
+
+## now a split coord
+my @testres = (
+	     [314696, 31917, 31937, -1],
+	     [341, 126, 59773, -1],
+	     [315843, 5332, 5963, +1]
+);
+$pos = Bio::Location::Simple->new (-start => 383700, -end => 444000, -strand => 1);
+$res = $mapper->map($pos);
+ @res =  $res->each_match;
+compare (shift @res, shift @testres);
+compare (shift @res, shift @testres);
+compare (shift @res, shift @testres);
+
+## now a simple gap
+ at testres = (
+	    [627011, 7447, 7507, +1],
+	    ["chr1", 273762, 273781, 1]
+	   );
+$pos = Bio::Location::Simple->new (-start => 273701, -end => 273781, -strand => 1);
+$res = $mapper->map($pos);
+ok $res->each_match, 1;
+ok $res->each_gap, 1;
+ at res =  $res->each_Location;
+compare (shift @res, shift @testres);
+compare (shift @res, shift @testres);
+
+ok $mapper->swap;
+$pos = Bio::Location::Simple->new 
+    (-start => 2, -end => 5, -strand => -1, -seq_id => '627012');
+$res = $mapper->map($pos);
+ok $res->match->start, 2;
+ok $res->match->end, 5;
+ok $res->match->strand, -1;
+ok $res->match->seq_id, 'chr1';
+
+#
+# tests for split locations
+#
+
+# testing a  simple pair
+$match1 = Bio::Location::Simple->new 
+    (-seq_id => 'a', -start => 5, -end => 17, -strand=>1 );
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'b', -start => 1, -end => 13, -strand=>-1 );
+
+$pair = Bio::Coordinate::Pair->new(-in => $match1,
+					 -out => $match2,
+					);
+
+# split location
+
+ok my $split = new Bio::Location::Split;
+ok $split->add_sub_Location(new Bio::Location::Simple(-start=>6,
+                                                      -end=>8,
+                                                      -strand=>1));
+$split->add_sub_Location(new Bio::Location::Simple(-start=>15,
+                                                   -end=>16,
+                                                   -strand=>1));
+
+$res=$pair->map($split);
+ok my @sublocs = $res->each_Location(1);
+ok @sublocs, 2;
+
+#print Dumper \@sublocs;
+ok $sublocs[0]->start, 2;
+ok $sublocs[0]->end, 3;
+ok $sublocs[1]->start, 10;
+ok $sublocs[1]->end, 12;
+
+
+
+#
+# from Align
+#
+
+use Bio::Coordinate::Utils;
+use Bio::LocatableSeq;
+use Bio::SimpleAlign;
+
+my $string;
+#y $out = IO::String->new($string);
+
+#AAA/3-10    --wtatgtng
+#BBB/1-7     -aaaat-tt-
+
+my $s1 = new Bio::LocatableSeq (-id => 'AAA',
+			    -seq => '--wtatgtng',
+			    -start => 3,
+			    -end => 10,
+  			    -alphabet => 'dna'
+			    );
+my $s2 = new Bio::LocatableSeq (-id => 'BBB',
+			    -seq => '-aaaat-tt-',
+			    -start => 1,
+			    -end => 7,
+  			    -alphabet => 'dna'
+			    );
+$a = new Bio::SimpleAlign;
+$a->add_seq($s1);
+$a->add_seq($s2);
+#use Data::Dumper;
+
+ok my $uti = Bio::Coordinate::Utils->new;
+$mapper = $uti->from_align($a);
+#print Dumper $mapper;
+ok $mapper->return_match, 1;
+ok $mapper->return_match(1), 1;
+
+
+$pos = Bio::Location::Simple->new 
+    (-start => 4, -end => 8, -strand => 1);
+$res = $mapper->map($pos);
+#print Dumper $res;
+
+exit; # end of tests
+#
+# subroutines only after this
+#
+
+sub compare_arrays {
+    my ($first, $second) = @_;
+
+    return 0 unless @$first == @$second;
+    for (my $i = 0; $i < @$first; $i++) {
+	return 0 if $first->[$i] ne $second->[$i];
+    }
+    return 1;
+}
+
+
+sub compare {
+    my ($match, $test) = @_;
+    ok $match->seq_id eq $test->[0], 1,
+	"Match: |". $match->seq_id. "| Test: ". $test->[0]. "|\n";
+    ok $match->start  == $test->[1];
+    ok $match->end == $test->[2];
+    ok $match->strand == $test->[3];
+}
+
+
+sub load_data {
+    my ($map, $reverse) = @_;
+
+#chr_name	raw_id	chr_start	chr_end	raw_start	raw_end	raw_ori
+    my @sgp_dump = split ( /\n/, qq {
+chr1	627012	1	31276	1	31276	1
+chr1	627010	31377	42949	72250	83822	-1
+chr1	2768	42950	180950	251	138251	1
+chr1	10423	180951	266154	1	85204	-1
+chr1	627011	266255	273761	1	7507	1
+chr1	314698	273862	283122	1	9261	-1
+chr1	627009	283223	331394	251	48422	-1
+chr1	314695	331395	352162	1	20768	-1
+chr1	314697	352263	359444	1	7182	-1
+chr1	314696	359545	383720	31917	56092	-1
+chr1	341	383721	443368	126	59773	-1
+chr1	315843	443369	444727	5332	6690	1
+chr1	315844	444828	453463	1	8636	-1
+chr1	315834	453564	456692	1	3129	1
+chr1	315831	456793	458919	1	2127	1
+chr1	315827	459020	468965	251	10196	-1
+chr1	544782	468966	469955	1	990	-1
+chr1	315837	470056	473446	186	3576	-1
+chr1	544807	473447	474456	1	1010	-1
+chr1	315832	474557	477289	1	2733	1
+chr1	544806	477390	477601	1086	1297	-1
+chr1	315840	477602	482655	21	5074	1
+chr1	544802	482656	483460	1	805	-1
+chr1	544811	483561	484162	6599	7200	-1
+chr1	315829	484163	498439	15	14291	-1
+chr1	544813	498440	500980	1	2541	-1
+chr1	544773	501081	502190	1217	2326	-1
+chr1	315828	502191	513296	72	11177	1
+chr1	544815	513297	517276	2179	6158	1
+chr1	315836	517277	517662	2958	3343	1
+chr1	544805	517663	520643	299	3279	1
+chr1	315835	520744	521682	2462	3400	-1
+chr1	544784	521683	526369	54	4740	1
+chr1	544796	526470	527698	1	1229	1
+chr1	315833	527799	528303	2530	3034	-1
+chr1	544803	528304	531476	1	3173	-1
+chr1	544821	531577	532691	1	1115	1
+chr1	544810	532792	533843	1	1052	1
+chr1	544800	533944	535249	1	1306	1
+chr1	544786	535350	536652	1	1303	1
+chr1	544814	536753	538358	1	1606	1
+chr1	544812	538459	540004	1	1546	1
+chr1	544818	540105	541505	1	1401	1
+chr1	544816	541606	542693	1	1088	1
+chr1	544778	542794	544023	1	1230	1
+chr1	544779	544124	545709	1	1586	1
+chr1	544804	545810	547660	1	1851	1
+chr1	544774	547761	550105	1	2345	1
+chr1	544817	550206	552105	1	1900	1
+chr1	544781	552206	553640	1	1435	1
+chr1	315830	553741	555769	1	2029	-1
+chr1	544819	555870	558904	1	3035	-1
+chr1	544777	559005	560670	1	1666	1
+chr1	544795	560771	563092	1	2322	1
+chr1	544809	563193	565523	1	2331	1
+chr1	544808	565624	568113	1	2490	1
+chr1	544798	568214	570324	1	2111	1
+chr1	544783	570425	574640	1	4216	1
+chr1	544824	574741	578101	1	3361	1
+chr1	544775	578202	580180	1	1979	-1
+chr1	544825	580281	581858	1	1578	-1
+chr1	544772	581959	585312	1	3354	1
+chr1	544793	585413	588740	1	3328	1
+chr1	544785	588841	591656	1	2816	-1
+chr1	544791	591757	594687	1	2931	1
+chr1	544820	594788	597671	1	2884	1
+chr1	544790	597772	601587	1	3816	1
+chr1	544794	601688	603324	1	1637	-1
+chr1	544823	603425	607433	1	4009	1
+chr1	544789	607534	610856	1	3323	1
+chr1	544799	610957	614618	1	3662	1
+chr1	544776	614719	618674	1	3956	-1
+chr1	544797	618775	624522	1	5748	-1
+chr1	544787	624623	629720	1	5098	-1
+chr1	544792	629821	637065	1	7245	1
+chr1	622020	837066	851064	1	13999	-1
+chr1	622021	851165	854101	1	2937	-1
+chr1	622016	854202	856489	1	2288	-1
+chr1	625275	856590	888524	420	32354	-1
+chr1	622015	888525	891483	1	2959	-1
+chr1	622024	891584	896208	8871	13495	-1
+chr1	625537	896209	952170	1	55962	-1
+chr1	625538	952271	1051812	251	99792	-1
+chr1	625277	1051813	1055193	1	3381	-1
+chr1	625266	1055294	1062471	1	7178	-1
+chr1	598266	1062572	1086504	11	23943	-1
+chr1	625271	1086505	1096571	3943	14009	1
+chr1	625265	1096572	1100161	2436	6025	-1
+chr1	173125	1100162	1106067	3329	9234	-1
+chr1	598265	1106068	1112101	286	6319	1
+chr1	625360	1112102	1172572	251	60721	1
+chr1	173111	1172573	1172716	1	144	-1
+chr1	173103	1172817	1173945	1	1129	1
+chr1	170531	1174046	1174188	8791	8933	-1
+chr1	625363	1174189	1183590	67	9468	1
+chr1	173120	1183591	1183929	153	491	-1
+chr1	170509	1183930	1184112	864	1046	1
+chr1	173119	1184213	1189703	1	5491	-1
+chr1	625357	1189804	1213915	1	24112	1
+chr1	625359	1214016	1216330	1	2315	1
+} );
+    # test the auto-sorting feature
+    #	@sgp_dump = reverse (@sgp_dump) if defined $reverse; 
+
+    my $first = 1;
+    for my $line ( @sgp_dump ) {
+	if( $first ) { $first = 0; next; }
+	my ( $chr_name, $contig_id, $chr_start, $chr_end,
+	     $contig_start, $contig_end, $contig_strand ) =
+		 split ( /\t/, $line );
+
+	my $match1 = Bio::Location::Simple->new
+	    (-seq_id => $chr_name, -start => $chr_start,
+	     -end => $chr_end, -strand=>1 );
+	my $match2 = Bio::Location::Simple->new
+	    (-seq_id => $contig_id, -start => $contig_start,
+	     -end => $contig_end, -strand=>$contig_strand );
+
+	my $pair = Bio::Coordinate::Pair->new(-in => $match1,
+					      -out => $match2,
+					     );
+	$map->add_mapper($pair);
+    }
+    return $map;
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Correlate.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Correlate.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Correlate.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,58 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $ Id: Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 15;
+}
+
+use Bio::Phenotype::Correlate;
+use Bio::Species;
+
+my $mouse = Bio::Species->new();
+  
+$mouse->classification( qw( musculus Mus ) );
+
+my $co = Bio::Phenotype::Correlate->new( -name        => "4(Tas1r3)",
+                                         -description => "mouse correlate of human phenotype MIM 605865",
+                                         -species     => $mouse,
+                                         -type        => "homolog",
+                                         -comment     => "type=homolog is putative" );
+
+ok( $co->isa( "Bio::Phenotype::Correlate" ) );
+
+ok( $co->to_string() );
+
+ok( $co->name(), "4(Tas1r3)" );
+ok( $co->description(), "mouse correlate of human phenotype MIM 605865" );
+ok( $co->species()->binomial(), "Mus musculus" );
+ok( $co->type(), "homolog" );
+ok( $co->comment(), "type=homolog is putative" );
+
+$co->init();
+
+ok( $co->name(), "" );
+ok( $co->description(), "" );
+ok( $co->type(), "" );
+ok( $co->comment(), "" );
+
+ok( $co->name( "A" ), "A" );
+ok( $co->description( "B" ), "B" );
+ok( $co->type( "C" ), "C" );
+ok( $co->comment( "D" ), "D" );
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/CytoMap.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/CytoMap.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/CytoMap.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,266 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: CytoMap.t,v 1.5 2006/07/05 18:20:53 sendu Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 110;
+}
+
+END {
+}
+
+#
+# Let's test first the map class : Bio::Map::CytoMap
+#
+
+use Bio::Map::CytoMap;
+ok 1;
+
+ok my $map = new Bio::Map::CytoMap(-name  => 'my');
+ok $map->type, 'cyto'; 
+ok $map->units, ''; 
+ok $map->length, 0;
+ok $map->name, 'my';
+ok $map->species('human'), 'human';
+ok $map->species, 'human';
+ok $map->unique_id, '1';
+#
+#
+# Secondly, we make sure the location calculations in
+#           Bio::Map::CytoPosition make sense
+#
+
+use Bio::Map::CytoPosition;
+use Bio::Range;
+
+ok(1);
+
+my($a, $b, $r);
+my $string = 'b';
+ok Bio::Map::CytoPosition::_pad($string, 5, 'z'), 'bzzzz';
+
+ok $a = Bio::Map::CytoPosition->new();
+ok $a->isa('Bio::Map::CytoPosition');
+ok $a->cytorange, undef;
+
+
+$a->verbose(2);
+eval {
+    ok $a->value('C'), 'C'; 
+    ok $a->cytorange, undef ;
+};
+ok $@;
+$a->verbose(0);
+
+ok $a->value('X'), 'X';
+$r = $a->cytorange;
+ok $r->isa('Bio::Range');
+ok $r->start, 100000000;
+ok $r->end, 100200000;
+
+$a->value('1p');
+ok $a->cytorange->start, 1000000;
+ok $a->cytorange->end, 1100000;
+
+$a->value('2qter');
+ok $a->cytorange->start, 2200000;
+ok $a->cytorange->end, 2200000;
+
+$a->value('2qcen');
+ok $a->cytorange->start, 2100000;
+ok $a->cytorange->end, 2100000;
+
+eval {
+    $a->value('2qcen2');
+    $a->cytorange->start;
+};
+ok 1 if $@;
+
+$a->value('2q22');
+ok $a->cytorange->start, 2122000;
+ok $a->cytorange->end, 2122999;
+
+$a->value('2p22');
+ok $a->cytorange->start, 2077001;
+ok $a->cytorange->end, 2078000;
+
+$a->value('2p21');
+ok $a->cytorange->start, 2078001;
+ok $a->cytorange->end, 2079000;
+
+$a->value('10p22.1-cen');
+ok $a->cytorange->start, 10022199;
+ok $a->cytorange->end, 10100000;
+
+eval {
+    $a->value('10q22.1-cen');
+    $a->cytorange->start;
+};
+ok 1 if $@;
+
+$a->value('10q22.1-ter');
+ok $a->cytorange->start, 10122100;
+ok $a->cytorange->end, 10200000;
+
+
+eval {
+    $a->value('10q22.1-p');
+    $a->cytorange->start;
+};
+ok 1 if $@;
+
+$a->value('10qcen-qter');
+ok $a->cytorange->start, 10100000;
+ok $a->cytorange->end, 10200000;
+
+$a->value('10pcen-qter');
+ok $a->cytorange->start, 10100000;
+ok $a->cytorange->end, 10200000;
+
+$a->value('10q22.1-q23');
+ok $a->cytorange->start, 10122100;
+ok $a->cytorange->end, 10123999;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('10p22.1-p23');
+ok $a->cytorange->start, 10076001;
+ok $a->cytorange->end,  10077900;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('10cen-p23');
+ok $a->cytorange->start, 10076001;
+ok $a->cytorange->end, 10100000;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('10q22.1-p23');
+ok $a->cytorange->start, 10076001;
+ok $a->cytorange->end, 10122199;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('10p22.1-q23');
+ok $a->cytorange->start, 10077801;
+ok $a->cytorange->end, 10123999;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('10q22.1-p22');
+ok $a->cytorange->start, 10077001 ;
+ok $a->cytorange->end, 10122199 ;
+
+$b = Bio::Map::CytoPosition->new();
+$b->value('10p22-p22.1');
+ok $b->cytorange->start, 10077801 ;
+ok $b->cytorange->end, 10078000;
+ok $a->cytorange->overlaps($b->cytorange);
+
+
+$a->value('10p22.1-q23');
+ok $a->cytorange->start, 10077801;
+ok $a->cytorange->end, 10123999;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+
+$a->value('17p13-pter');
+ok $a->cytorange->start, 17000000;
+ok $a->cytorange->end, 17087000;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+$a->value('17cen-pter');
+ok $a->cytorange->start, 17000000;
+ok $a->cytorange->end, 17100000;
+ok ($a->cytorange->start < $a->cytorange->end );
+
+
+#-----------------------------------------
+#my $s;
+
+sub test {
+    my ($s) = @_;
+    my $a = Bio::Map::CytoPosition->new();
+    $a->value($s);
+    $r = $a->cytorange;
+    ok $a->range2value($r), $s;
+}
+
+test '1';
+test '2p';
+test '3q';
+test '4cen';
+
+test '5pter';
+test '6qter';
+test '7p21';
+test '8q11.1';
+
+test '9q13.13-15';
+test '10p13.13-q15';
+test '11p13.13-qter';
+test '12p13.13-qter';
+test '13p13.13-14';
+test '14p13.13-pter';
+test '15cen-q2';
+test '16cen-p2';
+#test '17cen-pter'; eq 17p
+#test '18cen-qter'; eq 18q 
+
+
+# by now we should be convinced that band conversion to a range works
+# so lets try to use it for comparing markers.
+
+
+use Bio::Map::CytoMarker;
+ok 1;
+
+ok my $marker1 = new Bio::Map::CytoMarker();
+ok $marker1->name('gene1'), 'gene1' ;
+ok $marker1->position($map, '10p33.13-q15');
+
+ok my $marker2 = new Bio::Map::CytoMarker(-name => 'gene2' );
+ok $marker2->position($map, '10p10-15');
+ok $marker1->get_chr, 10;
+
+ok my $marker3 = new Bio::Map::CytoMarker(-name => '3' );
+ok $marker3->position($map, '10p1');
+
+ok my $marker4 = new Bio::Map::CytoMarker(-name => '4' );
+ok $marker4->position($map, '10q2');
+
+#
+# Lastly, let's test the comparison methods
+#
+
+ok $marker1->equals($marker1);
+ok ! $marker1->equals($marker2);
+
+ok $marker3->less_than($marker4);
+ok ! $marker3->greater_than($marker4);
+ok ! $marker4->less_than($marker3);
+ok $marker4->greater_than($marker3);
+
+ok ! $marker4->overlaps($marker3);
+ok $marker1->overlaps($marker3);
+
+ok ! $marker4->contains($marker3);
+ok $marker1->contains($marker3);
+
+# 
+# Test throw() in some private functions
+#
+
+eval { Bio::Map::CytoPosition::_pad('string', -1, 'x'); };
+ok($@ =~ m/positive integer/);
+eval { Bio::Map::CytoPosition::_pad('string', +1, 'toolong'); };
+ok($@ =~ m/single character/);
+

Added: trunk/packages/bioperl/branches/upstream/current/t/DB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/DB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/DB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,393 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: DB.t,v 1.67.2.4 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 113;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+        require HTTP::Request::Common;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent or HTTP::Request not installed. This means the Bio::DB::* modules are not usable. Skipping tests';
+	}
+    elsif (!$DEBUG) {
+		plan skip_all => 'Skipping all tests since they require network access, set BIOPERLDEBUG=1 to test';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::DB::GenBank');
+	use_ok('Bio::DB::GenPept');
+	use_ok('Bio::DB::SwissProt');
+    use_ok('Bio::DB::GDB');
+    use_ok('Bio::DB::MeSH');
+}
+
+my %expected_lengths = ('NDP_MOUSE' => 131,
+                        'NDP_HUMAN' => 133,
+                        'MUSIGHBA1' => 408,
+                        'AF303112'  => 1611,
+                        'J00522'    => 408,
+                        'AF303112'  => 1611,
+                        'AF303112.1' => 1611,
+                        '2981014'   => 1156,
+                        'AF041456'  => 1156,
+                        'AY080910'  => 798,
+                        'AY080909'  => 1042,
+                        'AF155220'  => 1172,
+                        '405830'    => 1743,
+                        'CELRABGDI' => 1743,
+                        '195055'    => 136,
+                        'AAD15290'  => 136,
+                        'AAC06201'  => 353,
+                        'P43780'    => 103,
+                        'BOLA_HAEIN'=> 103,
+                        'YNB3_YEAST'=> 125,
+                        'O39869'    => 56,
+                        'P18584'    => 497,
+                        'DEGP_CHLTR'=> 497,
+                        'AF442768'  => 2547,
+                        'P31383'    => 635,
+                        'CH402638'  => 5041);
+
+my ($gb, $seq, $seqio, $seqin, $query);
+
+#
+# Bio::DB::GenBank
+#
+ok $gb = new Bio::DB::GenBank('-delay'=>0);
+
+# get a single seq
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Do you have network access? Skipping GenBank tests", 4 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 3 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_version('AF303112.1');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 2 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_gi('405830');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 1 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+}
+
+$seq = $seqio = undef;
+
+# batch mode
+SKIP: {
+    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
+    skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@;
+    my $done = 0;
+    while (my $s = $seqio->next_seq) {
+        is $s->length, $expected_lengths{$s->display_id};
+        $done++;
+    }
+    is $done, 3;
+}
+
+$seq = $seqio = undef;
+
+# test the temporary file creation and fasta
+ok $gb = new Bio::DB::GenBank('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0);
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
+    skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
+    # last part of id holds the key
+    is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
+    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
+    skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
+    # last part of id holds the key
+    is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]};
+    # batch mode requires genbank format
+    $gb->request_format("gb");
+    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
+    skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
+    my $done = 0;
+    while (my $s = $seqio->next_seq) {
+        is $s->length, $expected_lengths{$s->display_id};
+        undef $gb; # test the case where the db is gone, 
+        # but a temp file should remain until seqio goes away.
+        $done++;
+    }
+    is $done, 3;
+}
+
+$seq = $seqio = undef;
+
+# test pipeline creation
+ok $gb = new Bio::DB::GenBank('-retrievaltype' => 'pipeline', '-delay' => 0);
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');};
+    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
+    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
+    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
+    my $done = 0;
+    while (my $s = $seqio->next_seq) {
+        is $s->length, $expected_lengths{$s->display_id};
+        undef $gb; # test the case where the db is gone, 
+        # but the pipeline should remain until seqio goes away
+        $done++;
+    }
+    is $done, 3;
+}
+
+$seq = $seqio = undef;
+
+# test query facility
+ok $query = Bio::DB::Query::GenBank->new('-db'      => 'nucleotide',
+                                         '-query'   => 'Onchocerca volvulus[Organism]',
+                                         '-mindate' => '2002/1/1',
+                                         '-maxdate' => '2002/12/31');
+SKIP: {
+    ok $query->count > 0;
+    my @ids = $query->ids;
+    ok @ids > 0;
+    ok @ids == $query->count;
+    ok $gb = Bio::DB::GenBank->new('-delay' => 0);
+    eval {$seqio = $gb->get_Stream_by_query($query);};
+    skip "Couldn't connect to complete GenBank query tests. Skipping those tests", 5 if $@;
+    my $done = 0;
+    while (my $s = $seqio->next_seq) {
+        is $s->length, $expected_lengths{$s->display_id};
+        undef $gb; # test the case where the db is gone, 
+        # but the pipeline should remain until seqio goes away
+        $done++;
+    }
+    is $done, 4;
+}
+
+$seq = $seqio = undef;
+
+# test query facility (again)
+ok $query = Bio::DB::Query::GenBank->new('-db'  => 'nucleotide',
+                                         '-ids' => [qw(J00522 AF303112 2981014)]);
+SKIP: {
+    ok $query->count > 0;
+    my @ids = $query->ids;
+    ok @ids > 0;
+    ok @ids == $query->count;
+    $gb = Bio::DB::GenBank->new('-delay' => 0);
+    eval {$seqio = $gb->get_Stream_by_query($query);};
+    skip "Couldn't connect to complete GenBank query tests. Skipping those tests: $@", 4 if $@;
+    my $done = 0;
+    while (my $s = $seqio->next_seq) {
+        is $s->length, $expected_lengths{$s->display_id};
+        $done++;
+    }
+    is $done, 3;
+    $seqio->close(); # the key to preventing errors during make test, no idea why
+}
+
+$seq = $seqio = undef;
+
+# test contig retrieval
+ok $gb = new Bio::DB::GenBank('-delay'  => 0, '-format' => 'gbwithparts');
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_id('CH402638');};
+    skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    # now to check that postprocess_data in NCBIHelper catches CONTIG...
+    ok $gb = new Bio::DB::GenBank('-delay' => 0, '-format' => 'gb');
+    eval {$seq = $gb->get_Seq_by_id('CH402638');};
+    skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+}
+
+$seq = $seqio = undef;
+
+# bug 1405
+my @result;
+ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start  => 2, -seq_stop   => 7);
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_acc("A11111");};
+    skip "Couldn't connect to complete GenBank tests. Skipping those tests", 14 if $@;
+    is $seq->length, 6;
+    # complexity tests
+    ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -complexity => 0);
+    eval {$seqin = $gb->get_Stream_by_acc("5");};
+    skip "Couldn't connect to complete GenBank tests. Skipping those tests", 12 if $@;
+    @result = (1136, 'dna', 342, 'protein');
+    while ($seq = $seqin->next_seq) {
+        is $seq->length, shift(@result);
+        is $seq->alphabet, shift(@result);
+    }
+    is @result, 0;
+    # Real batch retrieval using epost/efetch 
+    # these tests may change if integrated further into Bio::DB::Gen*
+    # Currently only useful for retrieving GI's via get_seq_stream
+    $gb = Bio::DB::GenBank->new();
+    eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');};
+    skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 7 if $@;
+    @result = ('M59757', 12611 ,'X76083', 3140, 'J01670', 1593);
+    while ($seq = $seqin->next_seq) {
+        is $seq->accession, shift(@result);
+        is $seq->length, shift(@result);
+    }
+    is @result, 0;
+}
+
+$seq = $seqin = undef;
+
+#
+# Bio::DB::GenPept
+#
+ok $gb = Bio::DB::GenPept->new();
+SKIP: {
+    eval {$seqin = $gb->get_seq_stream(-uids => [2981015, 1621261, 195055], -mode => 'batch');};
+    skip "Couldn't connect to complete GenPept tests. Skipping those tests", 7 if $@;
+    @result = ('AAC06201', 353, 'CAB02640', 193, 'AAD15290', 136);
+    while ($seq = $seqin->next_seq) {
+        is $seq->accession, shift(@result);
+        is $seq->length, shift(@result);
+    }
+    is @result, 0;
+}
+
+$seq = $seqio = undef;
+
+ok $gb = new Bio::DB::GenPept('-delay' => 0);
+SKIP: { 
+    eval {$seq = $gb->get_Seq_by_id('195055');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 10 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_acc('AAC06201');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 9 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seqio = $gb->get_Stream_by_id([qw(AAC06201 195055)]);};
+    skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 8 if $@;
+    my $done = 0;
+    while( my $s = $seqio->next_seq ) {
+        is $s->length, $expected_lengths{$s->display_id};
+        $done++;
+    }
+    is $done, 2;
+    
+    # swissprot genpept parsing   
+    eval {$seq = $gb->get_Seq_by_acc('2AAA_YEAST');};
+    skip "Couldn't connect to Genbank with Bio::DB::GenPept.pm. Skipping those tests", 5 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    
+    # test dbsource stuff
+    # small chance this might change but hopefully not
+    my @annot = $seq->annotation->get_Annotations('dblink');
+    is @annot, 31;	# changed 16-Aug-06
+    is $annot[0]->database, 'swissprot';
+    is $annot[0]->primary_id, '2AAA_YEAST';
+    is (($seq->annotation->get_Annotations('swissprot_dates'))[0]->value, 'Jul 1, 1993');
+}
+
+$seq = $seqio = undef;
+
+#
+# Bio::DB::SwissProt
+#
+ok $gb = new Bio::DB::SwissProt(-retrievaltype =>'pipeline', -delay => 0);
+SKIP: {
+    eval {$seq = $gb->get_Seq_by_id('YNB3_YEAST');};
+    skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 14 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    is $seq->division, 'YEAST';
+    
+    eval {$seq = $gb->get_Seq_by_acc('P43780');};
+    skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 12 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    eval {$seq = $gb->get_Seq_by_acc('O39869');};
+    skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 11 if $@;
+    is $seq->length, $expected_lengths{$seq->accession_number};
+    is $seq->accession_number, 'O39869';
+    is $seq->division, '9PICO';
+    
+    # test for bug #958
+    eval {$seq = $gb->get_Seq_by_id('P18584');};
+    skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 8 if $@;
+    is $seq->length, $expected_lengths{$seq->display_id};
+    is $seq->display_id, 'DEGP_CHLTR';
+    is $seq->division, 'CHLTR';
+
+    ok $gb = new Bio::DB::SwissProt('-retrievaltype' => 'tempfile', '-delay' => 0);
+    eval {$seqio = $gb->get_Stream_by_id(['NDP_MOUSE', 'NDP_HUMAN']);};
+    skip "Couldn't connect to SwissProt with Bio::DB::Swiss.pm. Skipping those tests", 4 if $@;
+    undef $gb; # testing to see if we can remove gb
+    ok $seq = $seqio->next_seq();
+    is $seq->length, $expected_lengths{$seq->display_id};
+    ok $seq = $seqio->next_seq();
+    is $seq->length, $expected_lengths{$seq->display_id};
+}
+
+$seq = $seqio = undef;
+
+#
+# Bio::DB::GDB
+#
+ok my $gdb = new Bio::DB::GDB;
+SKIP: {
+    my $info; 
+    eval {$info = $gdb->get_info(-type => 'marker', -id => 'D1S243');};
+    skip "Couldn't connect to GDB with Bio::DB::GDB.pm. Skipping those tests", 1 if $@;
+    is $info->{gdbid}, 'GDB:188393';
+}
+
+#
+# Bio::DB::EntrezGene
+#
+SKIP: {
+    eval {require Bio::ASN1::EntrezGene;};
+    skip "Bio::ASN1::EntrezGene not found, Bio::DB::EntrezGene module is not usable. Skipping those tests", 8 if $@;
+    use_ok('Bio::DB::EntrezGene');
+    ok $gb = Bio::DB::EntrezGene->new(-retrievaltype => 'tempfile', -delay => 0);
+    eval {$seqio = $gb->get_Stream_by_id([2,3064]);};
+    skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 6 if $@;
+    $seq = $seqio->next_seq;
+    is $seq->display_id, "A2M";
+    is $seq->accession_number, 2;
+    $seq = $seqio->next_seq;
+    is $seq->display_id, "HD";
+    is $seq->accession_number, 3064;
+    eval {$seq = $gb->get_Seq_by_id(6099);};
+    skip "Couldn't connect to Entrez with Bio::DB::EntrezGene. Skipping those tests", 2 if $@;
+    is $seq->display_id, "RP";
+    is $seq->accession_number, 6099;
+}
+
+$seq = $seqio = undef;
+
+#
+# Bio::DB::MeSH
+#
+ok my $mesh = new Bio::DB::MeSH();
+SKIP: {
+    my $t;
+    eval {$t = $mesh->get_exact_term('Dietary Fats');};
+    skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 3 if $@;
+    is $t->each_twig(), 2;
+    eval {$t = $mesh->get_exact_term("Sinus Thrombosis, Intracranial");};
+    skip "Couldn't connect to MeSH with Bio::DB::MeSH. Skipping those tests", 2 if $@;
+    is $t->description, "Thrombus formation in an intracranial venous sinus, including the superior sagittal, cavernous, lateral, and petrous sinuses. Etiologies include thrombosis due to infection,  DEHYDRATION, coagulation disorders (see  THROMBOPHILIA), and  CRANIOCEREBRAL TRAUMA.";
+    is $t->id, "D012851";
+}
+
+$seq = $seqio = undef;

Added: trunk/packages/bioperl/branches/upstream/current/t/DBCUTG.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/DBCUTG.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/DBCUTG.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,104 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: DBCUTG.t,v 1.20.4.5 2006/11/17 09:32:42 sendu Exp $ 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 35;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::DB::CUTG');
+	use_ok('Bio::CodonUsage::Table');
+	use_ok('Bio::CodonUsage::IO');
+    use_ok('Bio::SeqIO');
+    use_ok('Bio::Tools::SeqStats');
+	use_ok('Bio::Root::IO');
+}
+
+END {
+	unlink(Bio::Root::IO->catfile("t","data","cutg.out"));
+}
+
+my $outfile = Bio::Root::IO->catfile("t","data","cutg.out");
+my $verbose = 1 if $DEBUG;
+
+# try reading from file
+ok my $io = Bio::CodonUsage::IO->new
+  (-file=> Bio::Root::IO->catfile("t", "data", "MmCT"));
+ok  my $cut2 = $io->next_data();
+is int($cut2->aa_frequency('LEU')), 10;
+
+# write
+ok $io = Bio::CodonUsage::IO->new(-file => ">$outfile");
+$io->write_data($cut2);
+ok -e $outfile;
+
+# can we read what we've written?
+ok $io = Bio::CodonUsage::IO->new(-file => "$outfile");
+ok $cut2 = $io->next_data();
+is int($cut2->aa_frequency('LEU')), 10;
+
+# now try making a user defined CUT from a sequence
+ok my $seqobj = Bio::SeqIO->new (-file =>
+			 Bio::Root::IO->catfile("t", "data", "HUMBETGLOA.fa"),
+				                        -format => 'fasta')->next_seq;
+is $seqobj->subseq(10,20), 'TTGACACCACT';
+ok my $codcont_Ref = Bio::Tools::SeqStats->count_codons($seqobj);
+is $codcont_Ref->{'TGA'}, 16;
+ok my $cut = Bio::CodonUsage::Table->new(-data=>$codcont_Ref);
+is $cut->codon_rel_frequency('CTG'), 0.18;
+is $cut->codon_abs_frequency('CTG'), 2.6;
+is $cut->codon_count('CTG'), 26;
+is $cut->get_coding_gc(1), "39.70";
+ok my $ref = $cut->probable_codons(20);
+
+# requiring Internet access, set env BIOPERLDEBUG to 1 to run
+SKIP: {
+	skip "Skipping tests which require remote servers, set BIOPERLDEBUG=1 to test", 11 unless $DEBUG;
+	ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+	ok $tool->sleep;
+	is $tool->delay(1), 1;
+	ok $tool->sleep;
+
+	# get CUT from web
+	ok my $db = Bio::DB::CUTG->new();
+	ok $db->verbose(1);
+	my $cdtable;
+	eval {$cdtable = $db->get_request(-sp =>'Pan troglodytes');};
+	skip "Could not connect to server, server/network problems? Skipping those tests", 5 if $@;
+	
+	# tests for Table.pm, the answers seem to change with time, so not specific
+	cmp_ok($cdtable->cds_count(), '>', 600);
+	cmp_ok(int($cdtable->aa_frequency('LEU')), '>', 1);
+	ok $cdtable->get_coding_gc('all');
+	cmp_ok($cdtable->codon_rel_frequency('ttc'), '<', 1); 
+    
+	## now lets enter a non-existent species ans check handling..
+	## should default to human...
+	my $db2 = Bio::DB::CUTG->new();
+	eval {$cut2 = $db2->get_request(-sp =>'Wookie magnus');};
+	skip "Could not connect to server, server/network problems? Skipping those tests", 1 if $@;
+	is $cut2->species(), 'Homo sapiens';
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/DBCUTG.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/DBFasta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/DBFasta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/DBFasta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,81 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use lib '.','./blib/lib';
+use vars qw($DEBUG $NUMTESTS $exit);
+
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    eval {
+	require Bio::DB::Fasta;
+    };
+    if ( $@ ) {
+	warn("A depedendancy for Bio::DB::Fasta is not installed - skipping tests. $@\n") if $DEBUG;
+	$exit = 1;
+    }
+    plan test => ($NUMTESTS = 12);
+}
+require Bio::DB::Fasta;
+use Bio::Root::IO;
+
+# this obfuscation is to deal with lockfiles by GDBM_File which can
+# only be created on local filesystems apparently so will cause test
+# to block and then fail when the testdir is on an NFS mounted system
+
+use File::Copy;
+my $io = Bio::Root::IO->new(-verbose => $DEBUG);
+my $tempdir = $io->tempdir('CLEANUP' => 1);
+my $test_dbdir = $io->catfile($tempdir, 'dbfa');
+mkdir($test_dbdir); # make the directory
+my $indir = $io->catfile(qw(. t data dbfa)); 
+opendir(INDIR,$indir) || die("cannot open dir $indir");
+# effectively do a cp -r but only copy the files that are in there, no subdirs
+for my $file ( map { $io->catfile($indir,$_) } readdir(INDIR) ) {
+    next unless (-f $file );
+    copy($file, $test_dbdir);
+}
+closedir(INDIR);
+
+# now use this temporary dir for the db file
+my $db = Bio::DB::Fasta->new($test_dbdir, -reindex => 1);
+ok($db);
+ok($db->length('CEESC13F') > 0);
+ok(length $db->seq('CEESC13F:1,10') == 10);
+ok(length $db->seq('AW057119',1,10) == 10);
+my $primary_seq = $db->get_Seq_by_id('AW057119');
+ok($primary_seq);
+ok(length($primary_seq->seq) > 0);
+ok(!defined $db->get_Seq_by_id('foobarbaz'));
+undef $db;
+undef $primary_seq;
+
+my (%h,$dna1,$dna2);
+ok(tie(%h,'Bio::DB::Fasta',$test_dbdir));
+ok($h{'AW057146'});
+ok($dna1 = $h{'AW057146:1,10'});
+ok($dna2 = $h{'AW057146:10,1'});
+
+my $revcom = reverse $dna1;
+$revcom =~ tr/gatcGATC/ctagCTAG/;
+ok($dna2 eq $revcom);
+
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to complete DBFasta tests',1);
+	}
+	# test dir is cleaned up automagically by tempdir(CLEANUP => 1)
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/DNAMutation.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/DNAMutation.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/DNAMutation.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,146 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: DNAMutation.t,v 1.7 2002/03/20 13:08:04 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 36 }
+
+use Bio::Variation::DNAMutation;
+use Bio::Variation::Allele;
+
+ok(1);
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my($obj,$a1,$a2,$obj2);
+$obj = Bio::Variation::DNAMutation -> new;
+
+ok defined $obj;
+
+$obj->start(3);           
+ok $obj->start, 3;
+
+$obj->end(3); 
+ok $obj->end, 3;
+
+$obj->length(2);
+ok $obj->length, 2;
+
+$obj->strand('1');  
+ok $obj->strand, '1';
+
+ok $obj->primary_tag, 'Variation';
+
+$obj->source_tag('source');
+ok $obj->source_tag, 'source';
+
+$obj->frame(2);   
+ok $obj->frame,2;
+
+$obj->score(2);   
+ok $obj->score, 2;
+
+if( $obj->can('dna_mut') ) {
+#test gff string
+    $obj->dna_mut('dna_mut'); 
+    ok( $obj->dna_mut,'dna_mut');
+}
+
+$a1 = Bio::Variation::Allele->new(-seq => 'c');
+$obj->allele_ori($a1);
+ 
+ok $obj->allele_ori->seq, 'c';
+
+$a2 = Bio::Variation::Allele->new('-seq' => 'g');
+$obj->allele_mut($a2);
+
+ok $obj->allele_mut->seq, 'g';
+
+$obj->upStreamSeq('agcacctcccggcgccagtttgctg'); 
+ok $obj->upStreamSeq, 'agcacctcccggcgccagtttgctg';
+
+$obj->dnStreamSeq('tgctgcagcagcagcagcagcagca'); 
+ok $obj->dnStreamSeq, 'tgctgcagcagcagcagcagcagca';
+
+
+ok $obj->label, 'point, transversion' ;
+
+$obj->status('proven'); 
+ok $obj->status, 'proven';
+
+
+$obj->proof('experimental'); 
+ok $obj->proof, 'experimental';
+
+
+ok $obj->restriction_changes, '-BbvI, +BstXI, -Fnu4HI, -TseI';
+
+$obj->region('region'); 
+ok $obj->region, 'region';
+
+$obj->region_value('region_value'); 
+ok $obj->region_value, 'region_value';
+
+$obj->region_dist(-5); 
+ok $obj->region_dist, -5;
+
+$obj->numbering('coding'); 
+ok $obj->numbering, 'coding';
+
+ok not $obj->CpG;
+
+$obj->mut_number(2);
+ok $obj->mut_number, 2;
+
+
+ok defined ($obj2 = Bio::Variation::DNAMutation -> new
+	    ('-mut_number' => 2));
+
+ok $obj2->mut_number, 2;
+
+
+$obj->isMutation(1); 
+ok $obj->isMutation;
+
+$obj->add_Allele($a1);
+$obj->add_Allele($a2);
+
+ok scalar ($obj->each_Allele), 2;
+
+
+$obj = Bio::Variation::DNAMutation->new
+    ('-start'         => 23,
+     '-end'           => 24,
+     '-length'        => 2,
+     '-upStreamSeq'   => 'gt',
+     '-dnStreamSeq'   => 'at',
+     '-proof'         => 'experimental',
+     '-isMutation'    => 1,
+     '-mut_number'    => 2
+     );
+
+ok $obj->start(), 23;
+ok $obj->end(), 24;
+ok $obj->length(), 2;
+ok $obj->upStreamSeq(), 'gt';
+ok $obj->dnStreamSeq(), 'at';
+ok $obj->proof(), 'experimental';
+ok $obj->mut_number(), 2;
+ok $obj->isMutation;

Added: trunk/packages/bioperl/branches/upstream/current/t/Domcut.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Domcut.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Domcut.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,121 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Domcut.t,v 1.1 2003/07/23 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR $METAERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $ERROR = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 25;
+    plan tests => $NUMTESTS;
+
+    eval {
+		 require IO::String; 
+		 require LWP::UserAgent;
+    };
+    if( $@ ) {
+        warn("IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests");
+	$ERROR = 1;
+    }
+	#check this is available, set error flag if not.
+	eval {
+		require Bio::Seq::Meta::Array;
+		};
+	if ($@) {
+		warn ("Bio::Seq::Meta::Array not installed - will skip tests using meta sequences");
+		$METAERROR = 1;
+		}
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to complete all Domcut tests',1);
+	}
+}
+
+exit 0 if $ERROR ==  1;
+
+use Data::Dumper;
+
+use Bio::PrimarySeq;
+require Bio::Tools::Analysis::Protein::Domcut;
+
+ok 1;
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+
+######## test using PrimarySeq object ##############
+my $seq = Bio::PrimarySeq->new(-seq        => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQPPPPPPPPPPPPPDQRS',
+						       -display_id => 'test2');
+
+ok $tool = Bio::Tools::Analysis::Protein::Domcut->new( -seq=>$seq);
+if( $DEBUG ) { 
+    ok $tool->run ();
+    exit if $tool->status eq 'TERMINATED_BY_ERROR';
+    ok my $raw    = $tool->result('');
+    ok my $parsed = $tool->result('parsed');
+    ok ($parsed->[23]{'score'}, '-0.209');
+    my @res       = $tool->result('Bio::SeqFeatureI');
+    if (scalar @res > 0) {
+	ok 1;
+    } else {
+	skip('No network access - could not connect to Domcut server', 1);
+    }
+    ok my $meta = $tool->result('meta');
+    
+    if (!$METAERROR) { #if Bio::Seq::Meta::Array available
+	ok($meta->named_submeta_text('Domcut', 1,2), "0.068 0.053");
+	ok ($meta->seq, "MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQPPPPPPPPPPPPPDQRS");
+    }
+    
+    
+########## test using Bio::Seq object ##############
+    ok my $tool2 = Bio::WebAgent->new(-verbose =>$verbose);
+    
+    ok my $seq2  = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS',
+				 -display_id => 'test2');
+    
+    ok $tool2 = Bio::Tools::Analysis::Protein::Domcut->new( -seq=>$seq2->primary_seq);
+    ok $tool2->run ();
+    
+    ok my $parsed2 = $tool2->result('parsed');
+    ok ($parsed2->[23]{'score'}, '-0.209');
+    
+    @res = $tool2->result('Bio::SeqFeatureI');
+
+    if (scalar @res > 0) {
+	ok 1;
+    } else {
+	skip('No network access - could not connect to Domcut server', 1);
+    }
+    ok my $meta2 = $tool2->result('meta');
+    
+    if (!$METAERROR) { #if Bio::Seq::Meta::Array available
+	ok($meta2->named_submeta_text('Domcut', 1,2), "0.068 0.053");
+	ok ($meta2->seq, "MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS");
+    }
+    ok my $seq4 = new Bio::Seq;
+    ok $seq2->primary_seq($meta2);
+    ok $seq2->add_SeqFeature(@res);
+    ok $seq2->primary_seq->named_submeta_text('Domcut', 1,2);
+} else { 
+    for ( $Test::ntest..$NUMTESTS) {
+	skip("Skipping tests which require remote servers - set env variable BIOPERLDEBUG to test",1);
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/ECnumber.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ECnumber.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ECnumber.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,96 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: ECnumber.t,v 1.2 2002/09/11 22:29:18 czmasek Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 26;
+}
+
+use Bio::Tools::ECnumber;
+
+
+my $EC1 = Bio::Tools::ECnumber->new( -ec_string => " EC  01. 02.03.00022  ",
+                                     -comment   => "is 1.2.3.22" );
+
+
+my $EC2 = Bio::Tools::ECnumber->new( -ec_string => "ec:1.2.3.-",
+                                     -comment   => "is 1.2.3.-" );
+
+
+my $EC3 = $EC1->copy();
+
+ok( $EC1->isa( "Bio::Tools::ECnumber" ) );
+
+ok( $EC3->isa( "Bio::Tools::ECnumber" ) );
+
+ok( $EC1->EC_string(), "1.2.3.22" );
+
+ok( $EC1->EC_string(), "1.2.3.22" );
+
+ok( $EC1->to_string(), "1.2.3.22" );
+
+ok( $EC1->comment(),   "is 1.2.3.22" );
+
+ok( $EC1->enzyme_class(), "1" );
+
+ok( $EC1->sub_class(), "2" );
+
+ok( $EC1->sub_sub_class(), "3" );
+
+ok( $EC1->serial_number(), "22" );
+
+ok( $EC3->is_equal( $EC1 ) );
+
+ok( $EC3->is_equal( "1.2.3.22" ) );
+
+ok( ! $EC3->is_equal( "1.2.3.-" ) );
+
+ok( ! $EC3->is_equal( "1.2.3.23" ) );
+
+ok( $EC1->is_member( $EC2 ) );
+
+ok( $EC1->is_member( "1.2.3.-" ) );
+
+$EC1->init();
+
+ok( $EC2->is_member( $EC1 ) );
+
+ok( $EC1->to_string(), "-.-.-.-" );
+
+$EC1->enzyme_class( 44 );
+
+$EC1->sub_class( "033" );
+
+$EC1->sub_sub_class( 22 );
+
+$EC1->serial_number( "-" );
+
+ok( $EC1->to_string(), "44.33.22.-" );
+
+ok( ! $EC1->is_member( "44.33.23.-" ) );
+
+ok( ! $EC1->is_member( "44.33.22.1" ) );
+
+ok( $EC1->is_member( "-.-.-.-" ) );
+
+ok( $EC1->is_member( "44.-.-.-" ) );
+
+ok( $EC1->is_member( "44.33.-.-" ) );
+
+ok( $EC1->is_member( "EC 44.33.22.-" ) );
+
+ok( ! $EC1->is_member( "45.33.22.-" ) );
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/ELM.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ELM.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ELM.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: ELM.t,v 1.5 2005/09/18 00:57:50 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $ERROR = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 13;
+    plan tests => $NUMTESTS;
+
+    eval {
+	require IO::String; 
+	require LWP::UserAgent;
+	require HTML::HeadParser
+    }; 
+    if( $@ ) {
+        warn("IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests");
+	$ERROR = 1;
+    }
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to complete all of the ELM tests',1);
+	}
+}
+
+exit 0 if $ERROR ==  1;
+
+use Data::Dumper;
+
+require Bio::Tools::Analysis::Protein::ELM;
+use Bio::SeqIO;
+use Bio::PrimarySeq;
+require Bio::WebAgent;
+
+ok 1;
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+
+my $seqio=new Bio::SeqIO( -verbose => $verbose,
+                  -format => 'swiss',
+                  -file   => Bio::Root::IO->catfile('t','data', 'swiss.dat'));
+
+my $seq = $seqio->next_seq();
+ok $tool = Bio::Tools::Analysis::Protein::ELM->new( 
+					-seq=>$seq->primary_seq);
+ok $tool->compartment(['golgi', 'er']);
+ok my $cmp = $tool->compartment();
+ok $cmp->[1], 'GO:0005783';
+ok $tool->species(9606);
+ok $tool->species, 9606;
+
+ok $tool->run ();
+exit if $tool->status eq 'TERMINATED_BY_ERROR';
+ok my $raw = $tool->result('');
+print $raw if $verbose;
+ok my $parsed = $tool->result('parsed');
+ok $parsed->{'CLV_NDR_NDR_1'}{'locus'}[0], '54-56';
+ok my @res = $tool->result('Bio::SeqFeatureI');


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/ELM.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/EMBL_DB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/EMBL_DB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/EMBL_DB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: EMBL_DB.t,v 1.16 2006/06/29 15:20:44 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $error;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 15;
+    plan tests => $NUMTESTS;
+    eval { require IO::String;
+			  require HTTP::Request::Common;   };
+    if( $@ ) {
+	for( $Test::ntest..$NUMTESTS ) {
+	    skip("IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping tests",1);
+	}
+       $error = 1; 
+    }
+}
+
+END { 
+    foreach ( $Test::ntest..$NUMTESTS) {
+	skip('unable to run all of the Biblio_biofetch tests',1);
+    }
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require Bio::DB::EMBL;
+
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($db,$seq,$seqio);
+# get a single seq
+
+$seq = $seqio = undef;
+
+eval { 
+    ok defined($db = new Bio::DB::EMBL(-verbose=>$verbose)); 
+    ok(defined($seq = $db->get_Seq_by_acc('J00522')));
+    ok( $seq->length, 408); 
+    ok defined ($db->request_format('fasta'));
+    ok(defined($seq = $db->get_Seq_by_acc('J02231')));
+    ok( $seq->id, 'embl|J02231|J02231');
+    ok( $seq->length, 200); 
+    ok( defined($db = new Bio::DB::EMBL(-verbose=>$verbose, 
+					-retrievaltype => 'tempfile')));
+    ok(defined($seqio = $db->get_Stream_by_id(['BUM'])));
+    undef $db; # testing to see if we can remove gb
+    ok( defined($seq = $seqio->next_seq()));
+    ok( $seq->length, 200);
+};
+
+if ($@) {
+    if( $DEBUG ) {
+	warn "Warning: Couldn't connect to EMBL with Bio::DB::EMBL.pm!\n$@";
+    }
+    foreach ( $Test::ntest..$NUMTESTS) { 
+	 skip('could not connect to embl',1);
+     }
+    exit(0);
+}
+
+$seq = $seqio = undef;
+
+eval {
+    $db = new Bio::DB::EMBL(-verbose => $verbose,
+			    -retrievaltype => 'tempfile',
+			    -format => 'fasta'
+			    ); 
+    ok( defined($seqio = $db->get_Stream_by_acc(['J00522 AF303112 J02231'])));
+    my %seqs;
+    # don't assume anything about the order of the sequences
+    while ( my $s = $seqio->next_seq ) {
+	my ($type,$x,$name) = split(/\|/,$s->display_id);
+	$seqs{$x} = $s->length;
+    }
+    ok($seqs{'J00522'},408);
+    ok($seqs{'AF303112'},1611);
+    ok($seqs{'J02231'},200);
+};
+
+if ($@) {
+    if( $DEBUG ) {
+	warn "Batch access test failed.\nError: $@\n";
+    }
+    foreach ( $Test::ntest..$NUMTESTS ) { skip('no network access',1); }
+}
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/EMBOSS_Tools.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/EMBOSS_Tools.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/EMBOSS_Tools.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+# -*-Perl-*-
+# $Id: EMBOSS_Tools.t,v 1.2 2003/10/13 12:15:32 jason Exp $
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+my $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 12;
+}
+
+use Bio::Tools::EMBOSS::Palindrome;
+use Bio::Tools::GFF;
+ok(1);
+
+my $parser = new Bio::Tools::EMBOSS::Palindrome
+    (-file => Bio::Root::IO->catfile(qw( t data humts1.pal)));
+
+my $seq = $parser->next_seq;
+ok($seq);
+ok($seq->display_id, 'HUMTS1');
+ok($seq->length, 18596);
+my @features = $seq->get_SeqFeatures();
+ok(scalar @features, 23);
+
+ok($features[0]->feature1->start, 126);
+ok($features[0]->feature1->end, 142);
+ok($features[0]->feature1->strand, 1);
+ok($features[0]->feature1->seq_id, 'HUMTS1');
+
+
+ok($features[0]->feature2->start, 201);
+ok($features[0]->feature2->end, 217);
+ok($features[0]->feature2->strand, -1);
+
+if( $DEBUG ) {
+    my $out = new Bio::Tools::GFF(-gff_version => 2);
+    $out->write_feature($features[0]);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/ESEfinder.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ESEfinder.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ESEfinder.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,63 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: ESEfinder.t,v 1.13.6.5 2006/11/08 17:25:55 sendu Exp $ 
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 14;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+		require Bio::WebAgent;
+		require HTML::HeadParser;
+		require HTTP::Request::Common;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String, LWP::UserAgent, Bio::WebAgent, HTML::HeadParser, or HTTP::Request::Common not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::Tools::Analysis::DNA::ESEfinder');
+	use_ok('Data::Dumper');
+	use_ok('Bio::PrimarySeq');
+    use_ok('Bio::Seq');
+}
+
+#######all these tests work with 1ary seq########
+my $seq = Bio::PrimarySeq->new(-id=>'bioperl',
+                               -seq=>'atcgatgctatgcatgctatgggtgtgattcgatgcgactgttcatcgtagccccccccccccccctttt');
+ok my $tool = Bio::Tools::Analysis::DNA::ESEfinder->new(-seq => $seq);
+
+SKIP: {
+	skip "Skipping tests which require remote servers, set BIOPERLDEBUG=1 to test", 9 unless $DEBUG;
+	eval {$tool->run;};
+	skip "Could not connect to ESEfinder server, skipping those tests", 9 if $@;
+    ok my @res = $tool->result('Bio::SeqFeatureI');
+	ok @res > 0;
+    ok my $raw = $tool->result('');
+    ok my $parsed = $tool->result('parsed');
+    ok my $meta = $tool->result('all');
+    is $parsed->[0][1], 41;
+	
+    eval {require Bio::Seq::Meta::Array;};
+	skip "Bio::Seq::Meta::Array not installed. Skipping tests using meta sequences", 3 if $@;
+	is $meta->{'seq'}, "atcgatgctatgcatgctatgggtgtgattcgatgcgactgttcatcgtagccccccccccccccctttt";
+	is $meta->named_submeta_text('ESEfinder_SRp55', 1,2), "-3.221149 -1.602223";
+	is $meta->seq, "atcgatgctatgcatgctatgggtgtgattcgatgcgactgttcatcgtagccccccccccccccctttt";
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/EUtilities.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/EUtilities.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/EUtilities.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,690 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: EUtilities.t,v 1.10.2.4 2006/11/30 09:24:00 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+# Note this uses Test::More; this should catch the few perl versions w/o
+# this test suite
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $error);
+
+BEGIN { 
+	$NUMTESTS = 453;
+	$error = 0;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	# this seems to work for perl 5.6 and perl 5.8
+	eval {require Test::More;};
+	
+	if ($@) {
+		use lib 't/lib';
+	}
+	
+	use Test::More;
+	
+	eval {
+		require XML::Simple;
+		require LWP::UserAgent;
+	};
+	
+	if (!$DEBUG) {
+		plan skip_all => 'Set BIOPERLDEBUG=1 to run tests';
+	} elsif ($@) {
+		plan skip_all => 'Requires LWP::UserAgent and XML::Simple; skipping...';
+	} else {
+		plan tests => $NUMTESTS;
+	}
+	use_ok('Bio::DB::EUtilities');
+}
+
+require_ok('LWP::UserAgent');
+require_ok('XML::Simple');
+
+# NOTE : Bio::DB::EUtilities is just a specialized pipeline to get any 
+# data available via NCBI's Entrez interface, with a few convenience methods
+# to get UIDs and other additional information.  All data returned
+# using EFetch is raw (not Bioperl objects) and is meant to be piped into
+# other Bioperl modules at a later point for further processing
+
+# protein acc
+my @acc = qw(MUSIGHBA1 P18584 CH402638);
+
+# protein GI
+my @ids = sort qw(1621261 89318838 68536103 20807972 730439);
+
+# test search term
+my $term = 'dihydroorotase AND human';
+
+my ($eutil, $response);
+
+my %dbs = (taxonomy => 1,
+		   nucleotide =>1,
+		   pubmed => 1);
+my %links = (protein_taxonomy => 1,
+			 protein_nucleotide => 1,
+			 protein_nucleotide_wgs => 1,
+			 protein_pubmed => 1,
+			 protein_pubmed_refseq => 1
+			 );
+my %scores = (   1621261 =>   2147483647,
+				20807972 =>          423,
+				68536103 =>          554,
+				  730439 =>          411,
+				89318838 =>          725,);
+
+# Simple EFetch
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -db         => 'protein',
+                                    -id         => $ids[0],
+									-rettype 	=> 'fasta'
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("EFetch HTTP error: $@", 2) if $@;
+	isa_ok($response, 'HTTP::Response');
+	my $content = $response->content;
+	like($content, qr(PYRR \[Mycobacterium tuberculosis H37Rv\]),
+		 'EFetch: Fasta format');
+	
+	# reuse the EUtilities webagent
+	$eutil->id($ids[1]);
+	$eutil->rettype('gb');
+	eval {$response = $eutil->get_response; };
+	skip("EFetch HTTP error: $@", 2) if $@;
+	isa_ok($response, 'HTTP::Response');
+	$content = $response->content;
+	like($content, qr(^LOCUS\s+NP_623143),'EFetch: GenBank format');
+}
+
+# EPost->EFetch with History (Cookie)
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'epost',
+                                    -db         => 'protein',
+                                    -id         => \@ids,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("EPost HTTP error: $@", 12) if $@;
+	isa_ok($response, 'HTTP::Response');
+	my $cookie = $eutil->next_cookie;
+	isa_ok($cookie, 'Bio::DB::EUtilities::Cookie');
+	
+	# set for epost, esearch, elink
+	is($cookie->eutil, 'epost', '$epost->cookie->eutil()');
+	is($cookie->database, 'protein', '$epost->cookie->database()');
+	
+	# these are not set using epost
+	is($cookie->elink_dbfrom, undef, '$epost->cookie->elink_dbfrom()');
+	is($cookie->esearch_total, undef, '$epost->cookie->esearch_total()');
+	is($cookie->esearch_query, undef, '$epost->cookie->esearch_query()');
+	is($cookie->elink_queryids, undef, '$epost->cookie->elink_queryids()');
+	is($cookie->elink_linkname, undef, '$epost->cookie->elink_linkname()');
+	
+	# check the actual cookie
+	my ($webenv, $key) = @{ $cookie->cookie };
+	like($webenv, qr{^\S{50}}, '$epost->cookie->cookie() WebEnv');
+	like($key, qr{^\d+}, '$epost->cookie->cookie() query key');
+	
+	# can we fetch the sequences using the cookie
+	my $efetch = Bio::DB::EUtilities->new(
+								-cookie		=> $cookie,
+								-rettype  	=> 'fasta'
+								  );
+	# look for fasta headers
+	my $total = grep(m{^>.*$}, split "\n", $efetch->get_response->content);
+	is($total, 5, 'EPost to EFetch');
+}
+
+# ESearch, ESearch History
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'esearch',
+                                    -db         => 'protein',
+                                    -term       => $term,
+									-retmax		=> 100
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("ESearch HTTP error:$@", 3) if $@;
+	isa_ok($response, 'HTTP::Response');
+	
+	# can't really check for specific ID's but can check total ID's returned
+	my @esearch_ids = $eutil->get_ids;
+	is(scalar(@esearch_ids), 100, '$esearch->get_ids()');
+	
+	cmp_ok($eutil->esearch_count, '>', 117, '$esearch->esearch_count()');
+
+	# usehistory (get a cookie)
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'esearch',
+                                    -db         => 'protein',
+									-usehistory => 'y',
+                                    -term       => $term,
+                                      );
+	
+	eval {$response = $eutil->get_response; };
+	skip("ESearch HTTP error:$@", 11) if $@;
+	my $cookie = $eutil->next_cookie;
+	isa_ok($cookie, 'Bio::DB::EUtilities::Cookie');
+	is($cookie->eutil, 'esearch', '$esearch->cookie->eutil()');
+	is($cookie->database, 'protein', '$esearch->cookie->database()');
+	cmp_ok($cookie->esearch_total, '>', 117, '$esearch->cookie->esearch_total()');
+	is($cookie->esearch_query, $term, '$esearch->cookie->esearch_query()');
+	
+	# these are not set using esearch
+	is($cookie->elink_dbfrom, undef, '$esearch->cookie->elink_dbfrom()');
+	is($cookie->elink_queryids, undef, '$esearch->cookie->elink_queryids()');
+	is($cookie->elink_linkname, undef, '$esearch->cookie->elink_linkname()');
+	
+	# check the actual cookie
+	my ($webenv, $key) = @{ $cookie->cookie };
+	like($webenv, qr{^\S{50}}, '$esearch->cookie->cookie() WebEnv');
+	like($key, qr{^\d+}, '$esearch->cookie->cookie() query key');
+	
+	# can we fetch the sequences using the cookie?
+	my $efetch = Bio::DB::EUtilities->new(
+								-cookie		=> $cookie,
+								-rettype  	=> 'fasta',
+								-retmax 	=> 5
+								  );
+	# look for the fasta headers
+	my $total = grep(m{^>.*$}, split "\n", $efetch->get_response->content);
+	is($total, 5, 'ESearch to EFetch'); 
+}
+
+# EInfo
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'einfo',
+                                    -db  		=> 'protein',
+                                      );
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("EInfo HTTP error:$@", 10) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eInfoResult>), 'EInfo response');
+	is($eutil->einfo_dbs->[0], 'protein', '$einfo->einfo_dbs()');
+	like($eutil->einfo_db_lastupdate, qr(\d{4}\/\d{2}\/\d{2}\s\d{2}:\d{2}),
+		 '$einfo->einfo_db_lastupdate()');
+	cmp_ok($eutil->einfo_db_count, '>', 9200000, '$einfo->einfo_db_count()');
+	is($eutil->einfo_db_desc, 'Protein sequence record', '$einfo->einfo_db_desc()');
+	my @links = $eutil->einfo_dblink_info;
+	my @fields = $eutil->einfo_dbfield_info;
+	cmp_ok(scalar(@links), '>',30, '$einfo->einfo_dblink_info()');
+	is(scalar(@fields), 24, '$einfo->einfo_dbfield_info()');
+
+	my %field = ('SingleToken' => 'Y',
+				'Hierarchy' => 'N',
+				'IsDate' => 'N',
+				'TermCount' => '0',
+				'Description' => 'Unique number assigned to each sequence',
+				'Name' => 'UID',
+				'IsNumerical' => 'Y');
+	my %link = ('DbTo' => 'cdd',
+				'Description' => 'Link to conserved domains within a protein',
+				'Name' => 'protein_cdd',
+				'Menu' => 'Conserved Domain Links');
+
+	eq_hash($fields[1], \%field, '$einfo->einfo_dbfield_info()');
+	eq_hash($links[1], \%link, '$einfo->einfo_dblink_info()');
+	
+	# all databases (list)
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'einfo',
+                                      );
+	
+	eval {$response = $eutil->get_response; };
+	skip("EInfo HTTP error:$@", 1) if $@;
+	
+	my @db = sort qw(pubmed  protein  nucleotide  nuccore  nucgss  nucest  structure
+	genome  books  cancerchromosomes  cdd  domains  gene  genomeprj  gensat
+	geo  gds  homologene  journals  mesh  ncbisearch  nlmcatalog  omia  omim
+	pmc  popset  probe  pcassay  pccompound  pcsubstance  snp  taxonomy toolkit
+	unigene  unists);
+	
+	my @einfo_dbs = sort $eutil->einfo_dbs;
+	cmp_ok(scalar(@einfo_dbs), '>=', scalar(@db), 'All EInfo databases');
+}
+
+# ELink - normal (single ID array) - single db - ElinkData tests
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy',
+									-dbfrom		=> 'protein',
+									-id			=> \@ids,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("ELink HTTP error:$@", 10) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	my @ids2 = qw(350054 306537 273068 83332 1394);
+	is_deeply([sort $eutil->get_ids], [sort @ids2],'$elink->get_ids()');
+	
+	# Linkset tests
+	is($eutil->get_linkset_count, 1, '$elink->get_linkset_count()');
+	my $linkobj = $eutil->next_linkset;
+	isa_ok($linkobj, 'Bio::DB::EUtilities::ElinkData');
+	is($linkobj->elink_dbfrom, 'protein', '$linkdata->elink_dbfrom()');
+	is_deeply([sort $linkobj->elink_queryids],
+			  [sort @ids], '$linkdata->elink_queryids()');
+	is($linkobj->elink_command, 'neighbor', '$linkdata->elink_command()');
+	my $db = $linkobj->next_linkdb;
+	is($db, 'taxonomy', '$linkdata->next_linkdb()');
+	is_deeply([sort $linkobj->get_LinkIds_by_db($db)],
+			  [sort @ids2], '$linkdata->get_LinkIds_by_db($db)');	
+}
+
+# ELink - normal (single ID array), multiple dbs 
+
+SKIP: {
+	# can use 'all' for db, but takes a long time; use named dbs instead
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy,nucleotide,pubmed',
+									-dbfrom		=> 'protein',
+									-id			=> \@ids,
+                                      );
+	
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("ELink HTTP error:$@", 14) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');;
+	
+	# This is designed to fail; grabbing IDs w/o knowing which DB
+	# they belong to in a multiple DB search is fatal
+	my @ids2;
+	eval {@ids2 = $eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# Must grab the linkset first...
+	is($eutil->get_linkset_count, 1, '$elink->get_linkset_count()');
+	my $linkobj = $eutil->next_linkset;
+	isa_ok($linkobj, 'Bio::DB::EUtilities::ElinkData');
+	
+	# then iterate through each database, grabbing the IDs for each database
+	my %ids = (
+			'taxonomy' => [sort qw(350054 306537 273068 83332 1394)],
+			'nucleotide' => [sort qw(89318678 68535062 38490250 20806542)],
+			'pubmed' => [sort qw(15968079 12368430 11997336 9634230 8206848)],
+		   );
+	
+	while (my $db = $linkobj->next_linkdb) {
+		ok(exists $ids{$db}, "ElinkData database: $db");
+		@ids2 = sort $linkobj->get_LinkIds_by_db($db);
+		is_deeply($ids{$db}, \@ids2, "ElinkData database IDs: $db")
+	}
+	# other ElinkData methods
+	is($linkobj->elink_dbfrom, 'protein', '$linkdata->elink_dbfrom()');
+	is_deeply([sort $linkobj->elink_queryids],
+			  [sort @ids], '$linkdata->elink_queryids()');
+	is($linkobj->elink_command, 'neighbor', '$linkdata->elink_command()');
+}
+
+# ELink - normal (single ID array), multiple dbs, cookies)
+
+SKIP: {
+	# can use 'all' for db, but takes a long time; use named dbs instead
+	# this retrieves cookies instead (no ElinkData objects are stored)
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy,nucleotide,pubmed',
+									-dbfrom		=> 'protein',
+									-id			=> \@ids,
+									-cmd		=> 'neighbor_history'
+                                      );
+	
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("ELink HTTP error:$@", 49) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');;
+	
+	# This is designed to fail; grabbing IDs w/o knowing which DB
+	# they belong to in a multiple DB search is fatal
+	my @ids2;
+	eval {@ids2 = $eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# No ElinkData objs
+	is($eutil->get_linkset_count, 0, '$elink->get_linkset_count()');
+	
+	# There are ELink cookies instead
+	is($eutil->get_cookie_count, 5, '$elink->get_cookie_count()');
+	my $ct = 0;
+	while (my $cookie = $eutil->next_cookie) {
+		isa_ok($cookie, 'Bio::DB::EUtilities::Cookie');
+		is($cookie->eutil, 'elink', '$elink->cookie->eutil()');
+		ok(exists $dbs{$cookie->database},  '$elink->cookie->database()');
+		is($cookie->elink_dbfrom, 'protein', '$elink->cookie->elink_dbfrom()');
+		@ids2 = sort $cookie->elink_queryids;
+		is_deeply(\@ids2, \@ids, '$elink->cookie->elink_queryids()');
+		ok(exists $links{$cookie->elink_linkname}, '$elink->cookie->elink_linkname()');
+		
+		# these are not set using elink
+		is($cookie->esearch_query, undef, '$elink->cookie->esearch_query()');
+		is($cookie->esearch_total, undef, '$elink->cookie->esearch_total()');
+		
+		# check the actual cookie data
+		my ($webenv, $key) = @{ $cookie->cookie };
+		like($webenv, qr{^\S{50}}, '$elink->cookie->cookie() WebEnv');
+		like($key, qr{^\d+}, '$elink->cookie->cookie() query key');
+		
+		# can we retrieve the data via efetch?  Test one...
+		# Note the cookie has all the information contained to
+		# retrieve data; no additional parameters needed
+		if($cookie->database eq 'taxonomy') {
+			my $efetch = Bio::DB::EUtilities->new(-cookie => $cookie);
+			my $content = $efetch->get_response->content;
+			like($content, qr(<TaxaSet>), 'ELink to EFetch : taxonomy');
+		}
+		last if $ct == 4;
+		$ct++;
+	}
+}
+
+# ELink (multi_id), single db
+# this is a flag set to get one-to-one correspondence for ELink data
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy',
+									-dbfrom		=> 'protein',
+									-multi_id 	=> 1,
+									-id			=> \@ids,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	
+	skip("ELink HTTP error:$@", 26) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	my @ids2 = qw(350054 306537 273068 83332 1394);
+	
+	# This is designed to fail; IDs present in individual ElinkData objects
+	# for one-to-one correspondence with ID groups
+	eval{$eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# Linkset tests
+	is($eutil->get_linkset_count, 5, '$elink->get_linkset_count()');
+	my @qids;
+	my @retids;
+	my $ct = 0;
+	# ids may not be returned in same order as array, so need to grab and sort
+	while (	my $linkobj = $eutil->next_linkset) {
+		isa_ok($linkobj, 'Bio::DB::EUtilities::ElinkData');
+		is($linkobj->elink_dbfrom, 'protein', '$linkdata->elink_dbfrom()');
+		is($linkobj->elink_command, 'neighbor', '$linkdata->elink_command()');
+		push @qids, $linkobj->elink_queryids;
+		while (	my $db = $linkobj->next_linkdb) {
+			is($db, 'taxonomy', '$linkdata->next_linkdb()');
+			push @retids, $linkobj->get_LinkIds_by_db($db);
+		}
+		last if $ct == 4;
+		$ct++
+	}
+	is_deeply([sort @qids], [sort @ids], '$linkdata->elink_queryids()');
+	is_deeply([sort @retids], [sort @ids2], '$linkdata->get_LinkIds_by_db($db)');
+}
+
+# ELink (multi_id, cookies)
+
+# these need to be cleaned up
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy',
+									-dbfrom		=> 'protein',
+									-multi_id 	=> 1,
+									-id			=> \@ids,
+									-cmd		=> 'neighbor_history'
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	
+	# check this number, likely wrong
+	skip("ELink HTTP error:$@", 20) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	my @ids2 = qw(350054 306537 273068 83332 1394);
+	
+	# This is designed to fail; IDs present in individual ElinkData objects
+	# for one-to-one correspondence with ID groups
+	eval{$eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# Linkset tests (there aren't any)
+	is($eutil->get_linkset_count, 0, '$elink->get_linkset_count()');
+	is($eutil->get_cookie_count, 5, '$elink->get_cookie_count()');
+	
+	my $efetch = Bio::DB::EUtilities->new();
+	my $ct = 0;
+	while (my $cookie = $eutil->next_cookie) {
+		isa_ok($cookie, 'Bio::DB::EUtilities::Cookie');
+		is($cookie->eutil, 'elink', '$elink->cookie->eutil()');
+		is($cookie->database, 'taxonomy',  '$elink->cookie->database()');
+		is($cookie->elink_dbfrom, 'protein', '$elink->cookie->elink_dbfrom()');
+		@ids2 = $cookie->elink_queryids;
+		
+		# should be single IDs, one per ElinkData obj
+		is(scalar(@ids2), 1, '$elink->cookie->elink_queryids()');
+		is($cookie->elink_linkname, 'protein_taxonomy',
+		   '$elink->cookie->elink_linkname()');
+		# these are not set using elink
+		is($cookie->esearch_query, undef, '$elink->cookie->esearch_query()');
+		is($cookie->esearch_total, undef, '$elink->cookie->esearch_total()');
+		
+		# check the actual cookie data
+		my ($webenv, $key) = @{ $cookie->cookie };
+		like($webenv, qr{^\S{50}}, '$elink->cookie->cookie() WebEnv');
+		like($key, qr{^\d+}, '$elink->cookie->cookie() query key');
+		
+		# can we retrieve the data via efetch?  Test one...
+		# Note the cookie has all the information contained to
+		# retrieve data; no additional parameters needed
+		
+		if($cookie->database eq 'taxonomy') {
+			$efetch->add_cookie($cookie);
+			my $content = $efetch->get_response->content;
+			like($content, qr(<TaxaSet>), 'ELink to EFetch : taxonomy');
+		}
+		last if $ct ==4;
+		$ct++;
+	}
+}
+
+
+
+# ELink (multi_id, multidbs)
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy,nucleotide,pubmed',
+									-dbfrom		=> 'protein',
+									-multi_id 	=> 1,
+									-id			=> \@ids,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	
+	# check this number, likely wrong
+	skip("ELink HTTP error:$@", 20) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	my @ids2 = qw(350054 306537 273068 83332 1394);
+	
+	# This is designed to fail; IDs present in individual ElinkData objects
+	# for one-to-one correspondence with ID groups
+	eval{$eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# Linkset tests (there aren't any)
+	is($eutil->get_linkset_count, 5, '$elink->get_linkset_count()');
+	is($eutil->get_cookie_count, 0, '$elink->get_cookie_count()');
+	
+	# Linkset tests
+	my $ct = 0;
+	while (	my $linkobj = $eutil->next_linkset) {
+		isa_ok($linkobj, 'Bio::DB::EUtilities::ElinkData');
+		is($linkobj->elink_dbfrom, 'protein', '$linkdata->elink_dbfrom()');
+		is($linkobj->elink_command, 'neighbor', '$linkdata->elink_command()');
+		my @dbs = $linkobj->get_all_linkdbs;
+		cmp_ok(scalar(@dbs), '>=' , 2, '$linkobj->get_all_linkdbs()');
+		while (	my $db = $linkobj->next_linkdb) {
+			is($dbs{$db}, 1, '$linkdata->next_linkdb()');
+			my @ids2 = $linkobj->get_LinkIds_by_db($db);
+			cmp_ok(scalar(@ids2), '>=', 1, '$linkdata->get_LinkIds_by_db($db)');
+		}
+		last if $ct == 4;
+		$ct++;
+	}
+}
+
+# ELink (multi_id, multidb, cookies)
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'taxonomy,nucleotide,pubmed',
+									-dbfrom		=> 'protein',
+									-multi_id 	=> 1,
+									-id			=> \@ids,
+									-cmd		=> 'neighbor_history'
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	
+	# check this number, likely wrong
+	skip("ELink HTTP error:$@", 20) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	my @ids2 = qw(350054 306537 273068 83332 1394);
+	
+	# This is designed to fail; IDs present in individual ElinkData objects
+	# for one-to-one correspondence with ID groups
+	eval{$eutil->get_ids;};
+	ok($@,'$elink->get_ids()');
+	
+	# Linkset tests (there aren't any)
+	is($eutil->get_linkset_count, 0, '$elink->get_linkset_count()');
+	cmp_ok($eutil->get_cookie_count, '>', 15, '$elink->get_cookie_count()');
+	my $ct = 0;
+	while (my $cookie = $eutil->next_cookie) {
+		isa_ok($cookie, 'Bio::DB::EUtilities::Cookie');
+		is($cookie->eutil, 'elink', '$elink->cookie->eutil()');
+		is($dbs{$cookie->database}, 1,  '$elink->cookie->database()');
+		is($cookie->elink_dbfrom, 'protein', '$elink->cookie->elink_dbfrom()');
+		@ids2 = $cookie->elink_queryids;
+		
+		# should be single IDs, one per ElinkData obj
+		is(scalar(@ids2), 1, '$elink->cookie->elink_queryids()');
+		is($links{$cookie->elink_linkname}, 1,
+		   '$elink->cookie->elink_linkname()');
+		# these are not set using elink
+		is($cookie->esearch_query, undef, '$elink->cookie->esearch_query()');
+		is($cookie->esearch_total, undef, '$elink->cookie->esearch_total()');
+		
+		# check the actual cookie data
+		my ($webenv, $key) = @{ $cookie->cookie };
+		like($webenv, qr{^\S{50}}, '$elink->cookie->cookie() WebEnv');
+		like($key, qr{^\d+}, '$elink->cookie->cookie() query key');
+		last if $ct == 14;
+		$ct++;
+	}
+}
+
+# ELink (scores)
+
+SKIP: {
+	# an elink back to the same db (db eq dbfrom) returns similarity scores
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'elink',
+                                    -db  		=> 'protein',
+									-dbfrom		=> 'protein',
+									-id			=> $ids[0],
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	
+	# check this number, likely wrong
+	skip("ELink HTTP error:$@", 20) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eLinkResult>), 'ELink response');
+	
+	# only one linkset, so this actually works (not recommended)
+	my @ids2 = $eutil->get_ids;
+	cmp_ok(scalar(@ids2), '>' ,765 ,'$elink->get_ids()');
+	
+	# Linkset tests (there aren't any)
+	is($eutil->get_linkset_count, 1, '$elink->get_linkset_count()');
+	is($eutil->get_cookie_count, 0, '$elink->get_cookie_count()');
+	
+	while (	my $linkobj = $eutil->next_linkset) {
+		isa_ok($linkobj, 'Bio::DB::EUtilities::ElinkData');
+		is($linkobj->elink_dbfrom, 'protein', '$linkdata->elink_dbfrom()');
+		is($linkobj->elink_command, 'neighbor', '$linkdata->elink_command()');
+		
+		# get db with scores
+		while (	my $db = $linkobj->next_scoredb) {
+			is($db,'protein', '$linkdata->next_scoredb()');
+			my @ids2 = $linkobj->get_LinkIds_by_db($db);
+			cmp_ok(scalar(@ids2), '>', 765, '$linkdata->get_LinkIds_by_db($db)');
+			for my $id (@ids) {
+				is($linkobj->get_score($id), $scores{$id}, '$linkdata->get_score()');
+			}
+		}
+	}
+}
+
+# Although the other EUtilities are available, no postprocessing is done on the
+# returned XML yet
+
+SKIP: {
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'esummary',
+                                    -db         => 'protein',
+                                    -id		    => \@ids,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("ESummary HTTP error:$@", 2) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eSummaryResult>), 'ESummary response');
+	
+	$eutil = Bio::DB::EUtilities->new(
+                                    -eutil      => 'egquery',
+                                    -term		=> $term,
+                                      );
+		  
+	isa_ok($eutil, 'Bio::DB::GenericWebDBI');
+	eval {$response = $eutil->get_response; };
+	skip("EGQuery HTTP error:$@", 2) if $@;
+	isa_ok($response, 'HTTP::Response');
+	like($response->content, qr(<eGQueryResult>), 'EGQuery response');
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/EUtilities.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/EncodedSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/EncodedSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/EncodedSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,83 @@
+
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: EncodedSeq.t,v 1.4 2003/11/18 16:32:27 amackey Exp $
+use strict;
+use constant NUMTESTS => 37;
+
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    
+    plan tests => NUMTESTS;
+}
+use Bio::Seq::EncodedSeq;
+
+
+ok(1);
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+use Bio::Root::IO;
+
+my ($str, $aln, $seq, $loc);
+
+ok $seq = new Bio::Seq::EncodedSeq(
+			     -seq => '--atg---gta--',
+			     -start => 1,
+			     -end => 6,
+			     -strand => 1
+			     );
+ok $seq->alphabet, 'dna';
+ok $seq->start, 1;
+ok $seq->end, 6;
+ok $seq->strand, 1;
+ok $seq->no_gaps, 1;
+ok $seq->column_from_residue_number(4), 9;
+
+# this should fail
+eval {
+    $seq->column_from_residue_number(8);
+};
+ok $@;
+
+ok $loc = $seq->location_from_column(4);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->to_FTstring, "2";
+
+ok $loc = $seq->location_from_column(6);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->start, 3;
+ok $loc->location_type, 'IN-BETWEEN';
+ok $loc->to_FTstring, '3^4';
+
+ok $loc = $seq->location_from_column(2), undef;
+
+ok $seq->encoding, "GGCCCGGGCCCGG";
+ok $seq->encoding(-explicit => 1), "GGCDEGGGCDEGG";
+
+ok $seq = new Bio::Seq::EncodedSeq(
+			     -seq => 'atcgta',
+			     -start => 10,
+			     -end => 15,
+			     -strand => -1,
+			     );
+ok $seq->encoding('CCGGG'), 'CCGGGCCCC';
+ok $seq->seq, 'atcg---ta';
+ok $seq->column_from_residue_number(14), 2;
+ok $seq->encoding('3C2GCG'), 'CCCGGCGCC';
+ok $seq->seq, 'at-c--gta';
+ok $seq->no_gaps, 2;
+ok $seq->location_from_column(2)->to_FTstring, 14;
+ok $seq->location_from_column(5)->to_FTstring, "12^13";
+ok $seq->encoding("B", Bio::Location::Simple->new(-start => 10, -end => 11,
+						  -location_type => 'IN-BETWEEN')), 'B';
+ok $seq->seq, 'at-c--gt-a';
+ok $seq->encoding, 'CBCCGGCGCC';
+ok $seq->cds(-nogaps => 1)->seq, 'tacgat';
+ok $seq->translate->seq, 'YD';
+ok $seq = $seq->trunc(4,10); # kinda testing LocatableSeq's new trunc() here as well.
+ok $seq->seq, 'c--gt-a';
+ok $seq->encoding, 'CBCCGGC';

Added: trunk/packages/bioperl/branches/upstream/current/t/Exception.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Exception.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Exception.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Exception.t,v 1.8.4.1 2006/11/17 16:03:06 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+eval {require Test::More;};
+if ($@) {
+	use lib 't/lib';
+}
+use Test::More;
+
+eval {require Error;};
+if ($@) {
+	use lib 't/lib';
+}
+use_ok("Error");
+
+use lib './examples/root/lib';
+
+BEGIN {
+	$NUMTESTS = 8;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+    plan tests => $NUMTESTS; 
+}
+
+use Bio::Root::Exception;
+use TestObject;
+use Error qw(:try);
+
+$Error::Debug = 1; 
+
+# Set up a tester object.
+ok my $test = TestObject->new();
+
+is $test->data('Eeny meeny miney moe.'), 'Eeny meeny miney moe.';
+
+# This demonstrates what will happen if a method defined in an
+# interface that is not implemented in the implementating object.
+
+eval { 
+    try {
+		$test->foo();
+    }
+    catch Bio::Root::NotImplemented with {
+		my $err = shift;
+		is ref $err, 'Bio::Root::NotImplemented';
+    };
+};
+
+# TestObject::bar() deliberately throws a Bio::TestException, 
+# which is defined in TestObject.pm
+try {
+    $test->bar;
+}
+catch Bio::TestException with {
+    my $err = shift;
+    is ref $err, 'Bio::TestException';
+};
+
+
+# Use the non-object-oriented syntax to throw a generic Bio::Root::Exception.
+try {
+    throw Bio::Root::Exception( "A generic error", 42 );
+}
+catch Bio::Root::Exception with {
+    my $err = shift;
+    is ref $err, 'Bio::Root::Exception';
+    is $err->value, 42;
+};
+
+# Try to call a subroutine that doesn't exist. But because it occurs
+# within a try block, the Error module will create a Error::Simple to
+# capture it. Handy eh?
+
+try {
+	$test->foobar();
+}
+otherwise {
+	my $err = shift;
+	is ref $err, 'Error::Simple';
+}; 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Exonerate.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Exonerate.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Exonerate.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,84 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Exonerate.t,v 1.5 2003/03/29 20:25:02 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+
+use strict;
+use lib '.';
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use vars qw($NTESTS);
+    $NTESTS = 45;
+    $error = 0;
+
+    use Test;
+    plan tests => $NTESTS; 
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use Bio::SearchIO;
+use Bio::Root::IO;
+use Bio::SearchIO::Writer::HitTableWriter;
+use Bio::SearchIO::Writer::HTMLResultWriter;
+
+ok(1);
+my ($searchio, $result,$hit,$hsp);
+
+$searchio = new Bio::SearchIO(-file => 
+			      Bio::Root::IO->catfile(qw(t data 
+							testdat.exonerate)),
+			      -format => 'exonerate');
+my @data = ( [qw(ln27 416 Contig124 939
+		 293 416 -1 
+		 1   124 1 
+		 
+		 107 292 -1 
+		 178 363 1 
+		 
+		 66 106 -1
+		 899 939 1
+		 )],
+	     [qw(ln74 644 Contig275 1296 
+		 601 644 -1
+		 901 944 1
+		 
+		 436 600 -1
+		 998 1162    1
+
+		 386 435 -1
+		 1247 1296 1
+		 
+		 )] );
+
+while( my $r = $searchio->next_result ) {
+    my $d = shift @data;
+    ok($r->query_name, shift @$d);
+    skip( 'no query length available in default output',
+	  $r->query_length, shift @$d);
+    my $h = $r->next_hit;
+    ok($h->name, shift @$d);
+    skip( 'no hit length available in default output',$h->length, shift @$d);
+    while( my $hsp = $h->next_hsp ) {
+	ok($hsp->query->start, shift @$d);
+	ok($hsp->query->end, shift @$d);
+	ok($hsp->query->strand, shift @$d);
+	
+	ok($hsp->hit->start, shift @$d);
+	ok($hsp->hit->end, shift @$d);
+	ok($hsp->hit->strand, shift @$d);
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/FeatureHolder.x
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/FeatureHolder.x	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/FeatureHolder.x	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,96 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: FeatureHolder.x,v 1.1 2004/03/06 02:02:28 cjm Exp $
+
+use strict;
+use vars qw($DEBUG $TESTCOUNT);
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 6;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Root::IO;
+use Bio::SeqFeature::Tools::Unflattener;
+use Bio::SeqFeature::Tools::TypeMapper;
+use Bio::Tools::GFF;
+
+ok(1);
+
+my $verbosity = -1;   # Set to -1 for release version, so warnings aren't printed
+
+my ($seq, @sfs);
+my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+my $tm = Bio::SeqFeature::Tools::TypeMapper->new;
+
+
+if (1) {
+    my @path = ("t","data","AE003644_Adh-genomic.gb");
+    # allow cmd line override
+    if (@ARGV) {
+	@path = (shift @ARGV);
+    }
+    $seq = getseq(@path);
+    
+    ok ($seq->accession_number, 'AE003644');
+    my @topsfs = $seq->get_SeqFeatures;
+    
+    # UNFLATTEN
+    $unflattener->verbose(1);
+    $unflattener->unflatten_seq(-seq=>$seq,
+                                -use_magic=>1);
+    $tm->map_types_to_SO(-seq=>$seq);
+    $path[-1] .= ".gff3";
+    my $o =
+      Bio::Root::IO->catfile(
+                             @path
+                            );
+    my $gffio = Bio::Tools::GFF->new(-file=>">$o" , -noparse=>1, -gff_version => 3);
+    $seq->set_ParentIDs_from_hierarchy();
+    foreach my $feature ($seq->get_all_SeqFeatures) {
+        $gffio->write_feature($feature);
+    }
+    $gffio->close();
+
+    $gffio = Bio::Tools::GFF->new(-file=>"$o", -gff_version => 3);
+    my $seq = Bio::Seq->new;
+    my $feature;
+    # loop over the input stream
+    while($feature = $gffio->next_feature()) {
+        $seq->add_SeqFeature($feature);
+    }
+    $gffio->close();
+    $seq->create_hierarchy_from_ParentIDs;
+
+    $o =~ s/\.gff3$/chado\-xml/;
+    my $outio = new Bio::SeqIO(-format=>'chadoxml', -file=>">$o");
+    $outio->write_seq($seq);
+
+    # no way to check chado output for now
+
+    $o =~ s/\.chado\-xml$/chaos\-xml/;
+    $outio = new Bio::SeqIO(-format=>'chaosxml', -file=>">$o");
+    $outio->write_seq($seq);
+
+    # no way to check chado output for now
+
+}
+
+
+sub getseq {
+    my @path = @_;
+    my $seqio =
+      Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+                                                       @path
+                                                      ), 
+                      '-format' => 'GenBank');
+    $seqio->verbose($verbosity);
+
+    my $seq = $seqio->next_seq();
+    return $seq;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/FeatureIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/FeatureIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/FeatureIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,210 @@
+# -*-Perl-*-
+# $Id: FeatureIO.t,v 1.6.4.1 2006/10/02 23:10:40 sendu Exp $
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+use constant NUMTESTS => 33;
+my $error;
+BEGIN {
+  eval { require Test; };
+  if( $@ ) {
+	  use lib 't';
+  }
+  $error = 0;
+  use Test;
+  plan tests => NUMTESTS;
+  unless( eval "require Graph; require Bio::FeatureIO; 1;" ) {
+	  warn("Graph not installed.  Bio::FeatureIO is not installed.\n");
+	  $error = 1;
+	  for ( 1..NUMTESTS ) {
+		  skip("Graph is not installed. Bio::FeatureIO::gff cannot be run",1);
+	  }
+  }
+}
+
+if( $error ==  1 ) {
+	exit(0);
+}
+END {
+	foreach ( $Test::ntest..NUMTESTS) {
+		skip('Cannot complete FeatureIO tests',1);
+	}
+}
+
+use Bio::Root::IO;
+use Data::Dumper;
+ok(1);
+
+my $io;
+my $f;
+my $s;
+my $fcount;
+my $scount;
+
+################################################################################
+#
+# use FeatureIO::gff to read a FASTA file.
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','dna1.fa') ) );
+
+#read features
+while($f = $io->next_feature()){
+warn $f;
+  $fcount++;
+}
+ok($fcount == 0);
+
+#then try to read sequences again.  should get seqs now
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 1);
+
+################################################################################
+#
+# use FeatureIO::gff to read a GFF3 file.
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','knownGene.gff3') ) );
+
+#try to read sequences first.  should be undef
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 0);
+
+#then read features
+while($f = $io->next_feature()){
+  $fcount++;
+}
+ok($fcount == 15);
+
+#then try to read sequences again.  should still be undef
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 0);
+
+################################################################################
+#
+# use FeatureIO::gff to read a GFF3 file w/ directivized FASTA tail
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','hybrid1.gff3') ) );
+
+#try to read sequences first.  should be undef
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 0);
+
+#then read features
+while($f = $io->next_feature()){
+  $fcount++;
+}
+ok($fcount == 6);
+
+#then try to read sequences again.
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 1);
+
+################################################################################
+#
+# use FeatureIO::gff to read a GFF3 file w/ non-directivized FASTA tail
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','hybrid2.gff3') ) );
+
+#try to read sequences first.  should be undef
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 0);
+
+#then read features
+while($f = $io->next_feature()){
+  $fcount++;
+}
+ok($fcount == 6);
+
+################################################################################
+#
+# use FeatureIO::gff to read a GFF3 file of directives
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','directives.gff3') ) );
+
+#read features
+while($f = $io->next_feature()){
+  $fcount++;
+}
+ok($fcount == 1); #sequence-region
+
+################################################################################
+#
+# use FeatureIO::gff to read a GFF3 file as aggregated feature groups
+#
+$fcount = 0;
+$scount = 0;
+
+ok( $io = Bio::FeatureIO->new( -file => Bio::Root::IO->catfile('t','data','hybrid1.gff3') ) );
+
+#try to read sequences first.  should be undef
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 0);
+
+#read feature groups
+$f = $io->next_feature_group();
+ok($f == 1);
+$f = $io->next_feature_group();
+ok($f == 0);
+
+#then try to read sequences again.
+while($s = $io->next_seq()){
+  $scount++;
+}
+ok($scount == 1);
+
+################################################################################
+#
+# use FeatureIO::gff to read a PTT file.
+#
+$fcount = 0;
+
+my $ptt_in = Bio::FeatureIO->new(
+  -file => Bio::Root::IO->catfile('t','data','test.ptt'), 
+  -format => 'ptt',
+);
+ok($ptt_in);
+
+while (my $f = $ptt_in->next_feature) {
+  $fcount++;
+  if ($fcount==2) {
+    # 2491..3423  + 310 24217063  metF  LB002 - COG0685E  5,10-methylenetetrahydrofolate reductase
+    ok( $f->start == 2491 );
+    ok( $f->end == 3423 );
+    ok( $f->strand > 0 );
+    ok( ($f->get_tag_values('PID'))[0] eq '24217063' );
+    ok( ($f->get_tag_values('Gene'))[0] eq 'metF' );
+    ok( ($f->get_tag_values('Synonym'))[0] eq 'LB002' );
+    ok( not $f->has_tag('Code') );
+    ok( ($f->get_tag_values('COG'))[0] eq 'COG0685E' );
+    ok( ($f->get_tag_values('Product'))[0] eq '5,10-methylenetetrahydrofolate reductase' );   
+  }
+}
+ok($fcount == 367);

Added: trunk/packages/bioperl/branches/upstream/current/t/FootPrinter.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/FootPrinter.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/FootPrinter.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,72 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 26;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::FootPrinter;
+use Bio::SeqIO;
+
+END {
+	for ( $Test::ntest..$NTESTS ) {
+		skip("FootPrinter parser failed.",1);
+	}
+}
+
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","footprinter.out");
+my $parser = Bio::Tools::FootPrinter->new(-file => $inputfilename);
+my @sub;
+my @species = qw(TETRAODON CHICKEN MOUSE HAMSTER HUMAN PIG);
+while (my $feat = $parser->next_feature){
+    ok($feat->seq_id, shift @species);
+    foreach my $sub ($feat->sub_SeqFeature){
+      push @sub,$sub;
+    }
+}
+
+ok $sub[0]->seq_id, 'TETRAODON-motif1';
+ok $sub[0]->start,352;
+ok $sub[0]->end,362;
+ok $sub[0]->seq->seq,'tacaggatgca';
+ok $sub[1]->seq_id, 'TETRAODON-motif2';
+ok $sub[1]->start,363;
+ok $sub[1]->end,373;
+ok $sub[1]->seq->seq,'ccatatttgga';
+
+ok $sub[2]->seq_id, 'CHICKEN-motif1';
+ok $sub[2]->start,363;
+ok $sub[2]->end,373;
+ok $sub[2]->seq->seq,'cacaggatgta';
+
+ok $sub[3]->seq_id, 'CHICKEN-motif2';
+ok $sub[3]->start,376;
+ok $sub[3]->end,386;
+ok $sub[3]->seq->seq,'ccatataagga';
+
+ok $sub[4]->seq_id, 'MOUSE-motif1';
+ok $sub[4]->start,234;
+ok $sub[4]->end,243;
+ok $sub[4]->seq->seq,'cacaggatgt';
+
+
+
+
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/GDB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GDB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GDB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: GDB.t,v 1.13 2002/05/31 18:51:02 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+my $error;
+my $SKIPTEST = 1;  # set this to 0 if you want to run the GDB test
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    $error = 0;
+    use Test;    
+    use vars qw($NUMTESTS);
+    $NUMTESTS = 11;
+    plan tests => $NUMTESTS;
+    
+    unless( $SKIPTEST ) {
+	foreach ( $Test::ntest..$NUMTESTS ) {
+	    skip('GDB test skipped to avoid timeouts',1);
+	    $error = 1;
+	}
+    }
+    eval { require ('LWP/UserAgent.pm'); require('HTML/Parser.pm');	
+	   require ('HTTP/Request/Common.pm');
+	 };
+    if( $@ ) {
+	print STDERR "Cannot load LWP::UserAgent or HTML::Parser, skipping tests\n";
+	foreach ( $Test::ntest..$NUMTESTS) { skip('LWP::UserAgent or HTML::Parser not installed',1); }
+	$error = 1;
+    } 
+    if( $] < 5.005 ) {
+	print STDERR "GDB parsing does not work with 5.005 or lower Perl versions.\n";
+	foreach ( $Test::ntest..$NUMTESTS) { skip('need perl > 5.005',1); }
+	$error = 1;
+    }
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+require Bio::DB::GDB;
+my $verbose = -1;
+
+my ($gdb, $marker, $info);
+# get a single seq
+$marker = 'D1S234';
+eval { 
+    ok defined ( $gdb = new Bio::DB::GDB(-verbose=>$verbose) );     
+    ok($info = $gdb->get_info(-type=>'marker',
+			      -id  => $marker));
+};
+if( $@ || ! defined $info) {
+    warn "Warning: Couldn't connect to GDB website with Bio::DB::GDB.pm!\nError: Do you have network access? Skipping all other tests";
+    foreach ( $Test::ntest..$NUMTESTS ) { skip('no network access',1); }
+    exit(0);
+}
+
+
+ok $info->{gdbid}, 'GDB:188296', 'value was ' . $info->{gdbid};
+ok $info->{primers}->[0], 'GCCCAGGAGGTTGAGG', 'value was ' . $info->{primers}->[0];
+ok $info->{primers}->[1], 'AAGGCAGGCTTGAATTACAG', 'value was ' . $info->{primers}->[1];
+ok $info->{'length'}, 226, 'value was '. $info->{'length'};
+
+$marker = 'UT497';
+$info = undef;
+eval { 
+ok ($info = $gdb->get_info(-type=>'marker',
+			     -id  => $marker));
+};
+if( $@ || ! defined $info ) {
+    warn "Warning: Couldn't connect to GDB website with Bio::DB::GDB.pm!\nError: Do you have network access? Skipping all other tests";
+    foreach ( $Test::ntest..$NUMTESTS ) { skip('no network access',1); }
+    exit(0);
+}
+ok $info->{gdbid}, 'GDB:198271', 'value was ' . $info->{gdbid};
+ok $info->{primers}->[0], 'GGGTGACAGAACAAGACCT', 'value was ' . $info->{primers}->[0];
+ok $info->{primers}->[1], 'ACCCATTAGCCTTGAACTGA', 'value was ' . $info->{primers}->[1];
+ok $info->{'length'}, 155, 'value was '. $info->{'length'};

Added: trunk/packages/bioperl/branches/upstream/current/t/GFF.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GFF.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GFF.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,121 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan test => 32;
+}
+
+use Bio::Seq;
+use Bio::Tools::GFF;
+use Bio::SeqFeatureI;
+use Bio::SeqFeature::Generic;
+my $feat = new Bio::SeqFeature::Generic( -start => 10, -end => 100,
+				-strand => -1, -primary => 'repeat',
+				-source => 'repeatmasker',
+				-score  => 1000,
+				-tag    => {
+				    new => 1,
+				    author => 'someone',
+				    sillytag => 'this is silly!;breakfast' } );
+ok($feat);
+my $gff1out = Bio::Tools::GFF->new(-gff_version => 1, -file => ">out1.gff");
+ok($gff1out);
+my $gff2out = Bio::Tools::GFF->new(-gff_version => 2, -file => ">out2.gff");
+ok($gff2out);
+
+$gff1out->write_feature($feat);
+$gff2out->write_feature($feat);
+
+$gff1out->close();
+$gff2out->close();
+
+my $gff1in = Bio::Tools::GFF->new(-gff_version => 1,  -file => "out1.gff");
+ok($gff1in);
+my $gff2in = Bio::Tools::GFF->new(-gff_version => 2, -file => "out2.gff");
+ok($gff2in);
+
+my $feat1 = $gff1in->next_feature();
+ok($feat1);
+ok($feat1->start, $feat->start);
+ok($feat1->end, $feat->end);
+ok($feat1->primary_tag, $feat->primary_tag);
+ok($feat1->score, $feat->score);
+
+my $feat2 = $gff2in->next_feature();
+ok($feat2);
+ok($feat2->start, $feat->start);
+ok($feat2->end, $feat->end);
+ok($feat2->primary_tag, $feat->primary_tag);
+ok($feat2->score, $feat->score);
+ok(($feat2->each_tag_value('sillytag'))[0], 'this is silly!;breakfast');
+
+#test sequence-region parsing
+$gff2in = Bio::Tools::GFF->new(-gff_version => 2, -file => Bio::Root::IO->catfile("t","data","hg16_chroms.gff"));
+ok($gff2in->next_feature(),undef);
+my $seq = $gff2in->next_segment;
+ok($seq->display_id, 'chr1');
+ok($seq->end, 246127941);
+ok($seq->start, 1);
+
+
+# GFF3
+eval { require IO::String };
+unless( $@ ) {
+    my $str = IO::String->new;
+    my $gffout = new Bio::Tools::GFF(-fh => $str, -gff_version => 3);
+    my $feat_test = new Bio::SeqFeature::Generic
+	(-primary_tag => 'tag',
+	 -source_tag  => 'exon',
+	 -seq_id      => 'testseq',
+	 -score       => undef,
+	 -start       => 10,
+	 -end         => 120,
+	 -strand      => 1,
+	 -tag         => { 
+	     'bungle' => 'jungle;mumble',
+	     'lion'   => 'snake=tree'
+	     });
+    $feat_test->add_tag_value('giant_squid', 'lakeshore manor');
+    $gffout->write_feature($feat_test);
+    seek($str,0,0);
+    my $in = new Bio::Tools::GFF(-fh          => $str,
+				 -gff_version => 3);
+    my $f_recon = $in->next_feature;
+    ok($f_recon->primary_tag, $feat_test->primary_tag);
+    ok($f_recon->source_tag,  $feat_test->source_tag);
+    ok($f_recon->score, $feat_test->score);
+    ok($f_recon->start, $feat_test->start);
+    ok($f_recon->end, $feat_test->end);
+    ok($f_recon->strand, $feat_test->strand);
+    for my $tag ( $feat_test->get_all_tags ) {
+	ok($f_recon->has_tag($tag));
+	if( $f_recon->has_tag($tag) ) {
+	    my @v = $feat_test->get_tag_values($tag);
+	    my @g = $f_recon->get_tag_values($tag);
+	    while( @v && @g ) {
+		ok(shift @v, shift @g);
+	    }
+	}
+    }
+} else { 
+    for ( 17..28 ) {
+	skip('cannot verify GFF3 writing tests without IO::String installed',1);
+    }
+}
+
+END {
+    unlink("out1.gff", "out2.gff");
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/GOR4.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GOR4.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GOR4.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,67 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: GOR4.t,v 1.1 2003/07/23 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 14;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok("Data::Dumper");
+	use_ok("Bio::Seq");
+	use_ok("Bio::Tools::Analysis::Protein::GOR4");
+}
+
+#	eval {require Bio::Seq::Meta::Array;};
+#	"Bio::Seq::Meta::Array not installed - will skip tests using meta sequences"
+
+my $verbose = $DEBUG;
+
+my $seq = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS',
+                        -display_id => 'test2');
+ok my $tool = Bio::Tools::Analysis::Protein::GOR4->new(-seq=>$seq->primary_seq);
+
+SKIP: {
+	skip "Skipping tests which require remote servers, set BIOPERLDEBUG=1 to test", 10 unless $DEBUG;
+    ok $tool->run();
+	skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR';
+    ok my $raw = $tool->result('');
+    ok my $parsed = $tool->result('parsed');
+    is $parsed->[0]{'coil'}, '999';
+    my @res = sort {$a->start <=> $b->start} $tool->result('Bio::SeqFeatureI');
+    if (scalar @res > 0) {
+		ok 1;
+    }
+	else {
+		skip 'No results - could not connect to GOR4 server?', 6;
+    }
+	is $res[0]->start, 1;
+	is $res[0]->end, 43;
+    ok my $meta = $tool->result('meta');
+    
+    eval {require Bio::Seq::Meta::Array;};
+	skip "Bio::Seq::Meta::Array not installed - will skip tests using meta sequences", 2 if $@;
+	is $meta->named_submeta_text('GOR4_coil',1,2), '999 999';
+	is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/GOterm.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GOterm.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GOterm.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,158 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: GOterm.t,v 1.10 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    eval {require Graph::Directed; 
+			 $HAVEGRAPHDIRECTED=1;
+			 require Bio::Ontology::GOterm;
+			 require Bio::Ontology::Ontology;
+		 };
+    if ($@) {
+		 $HAVEGRAPHDIRECTED = 0;
+    }
+    plan tests => ($NUMTESTS = 59);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the GOterm tests ',1);
+	}
+}
+
+exit(0) unless $HAVEGRAPHDIRECTED;
+  
+my $obj = Bio::Ontology::GOterm->new();
+
+ok( $obj->isa( "Bio::Ontology::GOterm" ) );
+
+$obj->init();
+
+ok( $obj->to_string() );
+
+
+ok( $obj->GO_id( "GO:0003947" ), "GO:0003947" );
+ok( $obj->GO_id(), "GO:0003947" );
+
+
+ok( $obj->get_dblinks(), 0 );
+
+$obj->add_dblink( ( "dAA", "dAB" ) );
+ok( scalar($obj->get_dblinks()), 2 );
+my @df1 = $obj->get_dblinks();
+ok( $df1[ 0 ], "dAA" );
+ok( $df1[ 1 ], "dAB" );
+ok( $obj->get_dblinks(), 2 );
+
+my @df2 = $obj->remove_dblinks();
+ok( $df2[ 0 ], "dAA" );
+ok( $df2[ 1 ], "dAB" );
+
+ok( $obj->get_dblinks(), 0 );
+ok( $obj->remove_dblinks(), 0 );
+
+
+ok( $obj->get_secondary_GO_ids(), 0 );
+
+$obj->add_secondary_GO_id( ( "GO:0000000", "GO:1234567" ) );
+ok( scalar($obj->get_secondary_GO_ids()), 2 );
+my @si1 = $obj->get_secondary_GO_ids();
+ok( $si1[ 0 ], "GO:0000000" );
+ok( $si1[ 1 ], "GO:1234567" );
+ok( $obj->get_secondary_GO_ids(), 2 );
+
+my @si2 = $obj->remove_secondary_GO_ids();
+ok( $si2[ 0 ], "GO:0000000" );
+ok( $si2[ 1 ], "GO:1234567" );
+
+ok( $obj->get_secondary_GO_ids(), 0 );
+ok( $obj->remove_secondary_GO_ids(), 0 );
+
+
+
+ok( $obj->identifier( "0003947" ), "0003947" );
+ok( $obj->identifier(), "0003947" );
+
+ok( $obj->name( "N-acetylgalactosaminyltransferase" ), "N-acetylgalactosaminyltransferase" );
+ok( $obj->name(), "N-acetylgalactosaminyltransferase" );
+
+ok( $obj->definition( "Catalysis of ..." ), "Catalysis of ..." );
+ok( $obj->definition(), "Catalysis of ..." );
+
+ok( $obj->version( "666" ), "666" );
+ok( $obj->version(), "666" );
+
+ok( $obj->ontology( "category 1 name" ) );
+ok( $obj->ontology()->name(), "category 1 name" );
+
+my $ont = Bio::Ontology::Ontology->new();
+ok( $ont->name( "category 2 name" ) );
+
+ok( $obj->ontology( $ont ) );
+ok( $obj->ontology()->name(), "category 2 name" );
+
+ok( $obj->is_obsolete( 1 ), 1 );
+ok( $obj->is_obsolete(), 1 );
+
+ok( $obj->comment( "Consider the term ..." ), "Consider the term ..." );
+ok( $obj->comment(), "Consider the term ..." );
+
+ok( $obj->get_synonyms(), 0 );
+
+$obj->add_synonym( ( "AA", "AB" ) );
+my @al1 = $obj->get_synonyms();
+ok( scalar(@al1), 2 );
+ok( $al1[ 0 ], "AA" );
+ok( $al1[ 1 ], "AB" );
+
+my @al2 = $obj->remove_synonyms();
+ok( $al2[ 0 ], "AA" );
+ok( $al2[ 1 ], "AB" );
+
+ok( $obj->get_synonyms(), 0 );
+ok( $obj->remove_synonyms(), 0 );
+
+
+
+$obj->add_synonym( ( "AA", "AB" ) );
+$obj->add_dblink( ( "dAA", "dAB" ) );
+$obj->add_secondary_GO_id( ( "GO:1234567", "GO:1234567" ) );
+
+$obj->init();
+ok( $obj->identifier(), undef ); # don't make up identifiers
+ok( $obj->name(), undef );
+ok( $obj->definition(), undef );
+ok( $obj->is_obsolete(), 0 );
+ok( $obj->comment(), undef );
+
+
+$obj = Bio::Ontology::GOterm->new( -go_id       => "0016847",
+                                   -name        => "1-aminocyclopropane-1-carboxylate synthase",
+                                   -definition  => "Catalysis of ...",
+                                   -is_obsolete => 0,
+                                   -version     => "6.6.6",
+                                   -ontology    => "cat",
+                                   -comment     => "X" );  
+
+ok( $obj->identifier(), "GO:0016847" );
+ok( $obj->name(), "1-aminocyclopropane-1-carboxylate synthase" );
+ok( $obj->definition(), "Catalysis of ..." );
+ok( $obj->is_obsolete(), 0 );
+ok( $obj->comment(), "X" );
+ok( $obj->version(), "6.6.6" );
+ok( $obj->ontology()->name(), "cat" );
+

Added: trunk/packages/bioperl/branches/upstream/current/t/GbrowseGFF.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GbrowseGFF.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GbrowseGFF.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan test => 2;
+}
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::GbrowseGFF;
+use Bio::Root::IO;
+
+END {
+    unlink(Bio::Root::IO->catfile(qw(t data gbrowsegff.out)) );
+}
+my $in = Bio::SearchIO->new(-format => 'blast',
+			    -file   => Bio::Root::IO->catfile(
+				 qw(t data brassica_ATH.WUBLASTN)));
+my $out = new Bio::SearchIO(-output_format  => 'GbrowseGFF',
+			    -prefix => 'Sequence',
+			    -output_cigar   => 1,
+			    -output_signif  => 1,
+			    -file           => ">".Bio::Root::IO->catfile
+			    (qw(t data gbrowsegff.out) ));
+ok($out);
+while( my $r = $in->next_result ) {
+    ok($out->write_result($r));
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Gel.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Gel.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Gel.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Gel.t,v 1.1 2006/01/27 10:52:04 heikki Exp $
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan test => 7 }
+
+use Bio::PrimarySeq;
+use Bio::Restriction::Analysis;
+use Bio::Tools::Gel;
+ok(1);
+
+
+
+my $seq1 = Bio::PrimarySeq->new(-id=>'groundhog day',
+                                -seq=>'AAAAAAAAAGAATTCTTTTTTTTTTTTTTGAATTCGGGGGGGGGGGGGGGGGGGG');
+
+
+my $ra=Bio::Restriction::Analysis->new(-seq=>$seq1);
+ok my @cuts = $ra->fragments('EcoRI'), 3;
+
+
+ok my $gel = Bio::Tools::Gel->new(-seq=>\@cuts,-dilate=>10);
+ok my %bands = $gel->bands;
+my @bands = (26, 27, 30);
+my $c = 0;
+foreach my $band (sort {$b <=> $a} keys %bands){
+    #print $band,"\t",  sprintf("%.1f", $bands{$band}), "\n";
+    ok $bands[$c],  sprintf("%.0f", $bands{$band});
+    $c++;
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/GeneCoordinateMapper.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GeneCoordinateMapper.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GeneCoordinateMapper.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,629 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: GeneCoordinateMapper.t,v 1.15 2004/04/21 14:01:47 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 113;
+}
+
+use Bio::Location::Simple;
+use Bio::Coordinate::Pair;
+use Bio::Coordinate::Result;
+use Bio::Coordinate::Result::Match;
+use Bio::Coordinate::Result::Gap;
+use Bio::Coordinate::ExtrapolatingPair;
+use Bio::Coordinate::Collection;
+
+
+use vars qw($DEBUG);
+ok(1);
+
+
+
+#
+# Extrapolating pairs
+#
+#    No gaps returned, matches extrapolated
+#     returns always a match or undef
+#     -strict
+#
+
+
+# the  reverse strand pair
+my $inr = new Bio::Location::Simple(-start=>2, -end=>5, -strand=>1);
+my $outr = new Bio::Location::Simple(-start=>10, -end=>13, -strand=>-1);
+ok my $pairr = Bio::Coordinate::ExtrapolatingPair->
+    new(-in => $inr,
+	-out => $outr
+       );
+
+my $posr = Bio::Location::Simple->new 
+    (-start => 3, -end => 4, -strand=> 1 );
+my $resr = $pairr->map($posr);
+ok $resr->start, 11;
+ok $resr->end, 12;
+ok $resr->strand, -1;
+
+
+
+# propepide
+my $match1 = Bio::Location::Simple->new 
+    (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
+# peptide
+my $match2 = Bio::Location::Simple->new
+    (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
+
+ok my $pair = Bio::Coordinate::ExtrapolatingPair->
+    new(-in => $match1,
+	-out => $match2,
+	-strict => 1
+       );
+
+ok $pair->test;
+ok $pair->strand(), 1; #  = in->strand * out->strand
+ok $pair->in->seq_id(), 'propeptide';
+ok $pair->strict(), 1;
+
+my ($count, $pos, $pos2, $res, $match, $res2);
+
+# match within
+$pos = Bio::Location::Simple->new 
+    (-start => 25, -end => 25, -strand=> -1 );
+$res = $pair->map($pos);
+
+ok $res->isa('Bio::Location::Simple');
+ok $res->start, 5;
+ok $res->end, 5;
+ok $res->strand, -1;
+ok $res->seq_id, 'peptide';
+
+
+# match outside = undef
+$pos = Bio::Location::Simple->new (-start => 5, -end => 5 );
+$res = $pair->map($pos);
+
+ok $res, undef;
+
+#
+# partial match = match
+#
+$pos2 = Bio::Location::Simple->new
+    (-start => 20, -end => 22, -strand=> -1 );
+
+ok $res = $pair->map($pos2);
+
+ok $res->start, 0;
+ok $res->end, 2;
+ok $res->seq_id, 'peptide';
+ok $res->strand, -1;
+
+
+#
+# partial match2 =  match & gap
+#
+$pos2 = Bio::Location::Simple->new (-start => 40, -end => 41, -strand=> 1 );
+ok $res = $pair->map($pos2);
+ok $res->start, 20;
+ok $res->end, 20;
+
+#
+#enveloping
+#
+$pos2 = Bio::Location::Simple->new (-start => 19, -end => 41, -strand=> 1 );
+ok $res = $pair->map($pos2);
+ok $res->start, 1;
+ok $res->end, 20;
+
+#
+# testing the changing the strand
+#
+
+# chr
+$match1 = Bio::Location::Simple->new 
+    (-seq_id => 'chr', -start => 21, -end => 40, -strand=>1 );
+# gene
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'gene', -start => 1, -end => 20, -strand=>-1 );
+
+ $pair = Bio::Coordinate::ExtrapolatingPair->
+#my $pair = Bio::Coordinate::Pair->
+    new(-in => $match1,
+	-out => $match2,
+	-strict => 0
+       );
+
+$pos = Bio::Location::Simple->new 
+    (-start => 38, -end => 40, -strand=> 1 );
+$res = $pair->map($pos);
+ok $res->start, 1;
+ok $res->end, 3;
+ok $res->strand, -1;
+
+$pos = Bio::Location::Simple->new 
+    (-start => 1, -end => 3, -strand=> 1 );
+$res = $pair->map($pos);
+ok $res->start, 38;
+ok $res->end, 40;
+ok $res->strand, -1;
+
+
+#
+#
+# Gene Mapper
+#
+#
+
+use Bio::Coordinate::GeneMapper;
+
+ok my $m = new Bio::Coordinate::GeneMapper(-in => 'propeptide',
+					   -out => 'peptide');
+#$m->verbose(2);
+
+ok $m->peptide_offset(5), 5;
+
+
+# match within
+$pos = Bio::Location::Simple->new 
+    (-start => 25, -end => 25, -strand=> 1 );
+$res = $m->map($pos);
+
+ok $res->start, 20;
+ok $res->end, 20;
+ok $res->strand, 1;
+ok $res->seq_id, 'peptide';
+
+
+#
+# nozero
+#
+
+# match within
+$pos = Bio::Location::Simple->new 
+    (-start => 4, -end => 5, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, -1;
+ok $res->end, 0;
+
+ok $m->nozero('in&out'), 'in&out';
+$res = $m->map($pos);
+ok $res->start, -2;
+ok $res->end, -1;
+ok $m->nozero(0), 0;
+
+
+
+ok $m->swap;
+$pos = Bio::Location::Simple->new 
+    (-start => 5, -end => 5, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, 10;
+
+# cds -> propeptide
+ok $m->in('cds'), 'cds';
+ok $m->out('propeptide'), 'propeptide';
+
+$res = $m->map($pos);
+ok $res->start, 2;
+ok $res = $m->_translate($pos);
+ok $res->start, 2;
+ok $res = $m->_reverse_translate($pos);
+ok $res->start, 13;
+ok $res->end, 15;
+
+$pos = Bio::Location::Simple->new 
+    (-start => 26, -end => 26, -strand=> 1 );
+$m->out('peptide');
+$res = $m->map($pos);
+ok $res->start, 4;
+
+
+#
+# frame
+#
+
+$pos = Bio::Location::Simple->new 
+    (-start => 1, -end => 3, -strand=> 1 );
+$res = $m->_frame($pos);
+ok $res->start, 1;
+ok $res->end, 3;
+
+
+# Collection representing exons
+#
+#  cds    1   5     6   10    11  15
+#  exon   1   5     1   5     1   5
+#  gene   1   5    11   15   21   25
+#         |---|     |---|     |---|
+#-----|-----------------------|---|--
+# chr 1   5   9    15   19   25   29
+#         pair1     pair2     pair3
+
+# gene
+my $e1 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 5, -end => 9, -strand=>1 );
+my $e2 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 15, -end => 19, -strand=>1 );
+my $e3 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 25, -end => 29, -strand=>1 );
+my @cexons = ($e1, $e2, $e3);
+
+$m= new Bio::Coordinate::GeneMapper;
+
+$m->in('chr');
+$m->out('gene');
+my $off = $m->cds(5);
+ok $off->start, 5; # start of the coding region
+ok $m->exons(@cexons), 3;
+
+$m->out('exon');
+$pos = Bio::Location::Simple->new
+    (-start => 6, -end => 7, -strand=> 1 );
+$res = $m->map($pos);
+
+ok $res->start, 2;
+ok $res->end, 3;
+
+$m->out('negative_intron');
+$pos = Bio::Location::Simple->new 
+    (-start => 12, -end => 14, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, -3;
+ok $res->end, -1;
+ok $res->seq_id, 'intron1';
+
+
+# cds
+$m->out('cds');
+$pos = Bio::Location::Simple->new
+    (-start => 5, -end => 9, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, 1;
+ok $res->end, 5;
+
+$pos = Bio::Location::Simple->new
+    (-start => 15, -end => 25, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, 6;
+ok $res->end, 11;
+
+$pos = Bio::Location::Simple->new
+    (-start => 5, -end => 19, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, 1;
+ok $res->end, 10;
+
+
+#
+# chr to cds ; ranges into one
+#
+my $exons = new Bio::Location::Split(-seq_id => 'gene');
+$exons->add_sub_Location($e1);
+$exons->add_sub_Location($e2);
+$exons->add_sub_Location($e3);
+
+$res = $m->map($exons);
+ok $res->isa('Bio::Location::Simple');
+ok $res->start, 1;
+ok $res->end, 15;
+
+#
+# cds to chr; single range into two
+#
+$m->in('cds');
+$m->out('gene');
+
+$pos = Bio::Location::Simple->new
+    (-start => 4, -end => 7, -strand=> 1 );
+$res = $m->map($pos);
+ok $res->start, 4;
+ok $res->end, 12;
+
+
+
+# Collection representing exons
+#
+#  cds  -11  -7    -6  -2    -1   3  :27
+#  cds   -6  -2    -1 1 3     4   8  :17
+#  exon   1   5     1   5     1   5
+#  gene -21  -17  -11  -7    -1 1 3  :27
+#  gene -11  -7    -1 1 3     9   13 :17
+#         |---|     |---|     |---|
+#-----|-----------------------|---|--
+# chr 1   5   9    15   19   25   29
+#         pair1     pair2     pair3
+
+$m= new Bio::Coordinate::GeneMapper;
+
+$m->in('chr');
+$m->out('gene');
+$off = $m->cds(17);
+ok $off->start, 17; # start of the coding region
+ok $m->exons(@cexons), 3;
+
+# testing parameter handling in the constructor
+ok $m = new Bio::Coordinate::GeneMapper(-in => 'gene',
+					-out => 'peptide',
+					-cds => 3,
+					-exons => @cexons,
+					-utr => 7,
+					-peptide_offset => 5
+				       );
+
+
+#
+# Real life data
+# Mapping SNPs into  human serum protein MSE55 and
+# human galecting LGALS2 from Ensembl:
+#
+
+#Ensembl Gene ID	Exon Start (Chr bp)	Exon End (Chr bp)	Exon Coding Start (Chr bp)
+#	Exon Coding End (Chr bp)	Strand
+
+my @gene1_dump = split ( /\n/, qq {
+ENSG00000128283	34571058	34571126			1
+ENSG00000128283	34576610	34577350	34576888	34577350	1
+ENSG00000128283	34578646	34579858	34578646	34579355	1
+});
+
+
+my @gene2_dump = split ( /\n/, qq {
+ENSG00000100079	34590438	34590464			-1
+ENSG00000100079	34582387	34582469	34582387	34582469	-1
+ENSG00000100079	34581114	34581273	34581114	34581273	-1
+ENSG00000100079	34580784	34580950	34580804	34580950	-1
+}); # exon start should be less than end or is this intentional?
+
+#Chromosome Name	Location (bp)	Strand	Reference ID
+my @snp_dump = split ( /\n/, qq {
+22	34572694	1	2235335
+22	34572799	1	2235336
+22	34572843	1	2235337
+22	34574896	1	2076087
+22	34575256	1	2076088
+22	34578830	1	2281098
+22	34579111	1	2281099
+22	34580411	1	2235338
+22	34580591	1	2281097
+22	34580845	1	2235339
+22	34581963	1	2281100
+22	34583722	1	140057
+22	34585003	1	140058
+22	34587726	1	968725
+22	34588207	1	2284055
+22	34591507	1	1969639
+22	34591949	1	140059
+});
+shift @snp_dump;
+
+my ($cdsr, @exons) = read_gene_data(@gene1_dump);
+
+ok my $g1 = new Bio::Coordinate::GeneMapper(-in=>'chr', -out=>'gene');
+$g1->cds($cdsr);
+
+#$pos = Bio::Location::Simple->new
+#    (-start => 34576888, -end => 34579355, -strand=> 1 );
+$res = $g1->map($cdsr);
+ok $res->start, 1;
+ok $res->end, 2468;
+
+$g1->exons(@exons);
+$g1->in('gene');
+$g1->out('cds');
+$res = $g1->map($res);
+ok $res->start, 1;
+ok $res->end, 1173;
+
+#map_snps($g1, @snp_dump);
+
+
+#gene 2 in reverse strand
+($cdsr, @exons) = read_gene_data(@gene2_dump);
+ok my $g2 = new Bio::Coordinate::GeneMapper(-in=>'chr', -out=>'gene');
+$g2->cds($cdsr);
+
+$pos = Bio::Location::Simple->new
+    (-start => $cdsr->end-2, -end => $cdsr->end, -strand=> 1 );
+$res = $g2->map($pos);
+ok $res->start, 1;
+ok $res->end, 3;
+ok $res->strand, -1;
+
+
+$g2->exons(@exons);
+
+#map_snps($g2, @snp_dump);
+
+
+$match1 = Bio::Location::Simple->new 
+    (-seq_id => 'a', -start => 5, -end => 17, -strand=>1 );
+$match2 = Bio::Location::Simple->new
+    (-seq_id => 'b', -start => 1, -end => 13, -strand=>-1 );
+ok $pair = Bio::Coordinate::Pair->new(-in => $match1,
+					 -out => $match2,
+					);
+
+#
+# split location
+#
+
+ok my $split = new Bio::Location::Split;
+ok $split->add_sub_Location(new Bio::Location::Simple(-start=>6,
+                                                      -end=>8,
+                                                      -strand=>1));
+$split->add_sub_Location(new Bio::Location::Simple(-start=>15,
+                                                   -end=>16,
+                                                   -strand=>1));
+
+$res=$pair->map($split);
+ok my @sublocs = $res->each_Location(1);
+ok @sublocs, 2;
+
+#print Dumper \@sublocs;
+ok $sublocs[0]->start, 2;
+ok $sublocs[0]->end, 3;
+ok $sublocs[1]->start, 10;
+ok $sublocs[1]->end, 12;
+
+# testing  cds -> gene/chr which generates a split location from a simple one
+# exons in reverse strand!
+#
+#  pept   33222     111
+#  cds    8   4     3 1-1
+#  exon   5   1     5   1
+#  gene  13   9     3 1-2
+#         |---|     |---|
+#-----|-------------------
+# chr 1   5   9    15   19
+#           e1        e2
+
+# gene
+$e1 = Bio::Location::Simple->new
+    (-seq_id => 'gene', -start => 5, -end => 9, -strand=>-1 );
+$e2 = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 15, -end => 19, -strand=>-1 );
+ at cexons = ($e1, $e2);
+my $cds= Bio::Location::Simple->new
+    (-seq_id => 'gene', -start => 5, -end => 17, -strand=>-1 );
+
+$m = new Bio::Coordinate::GeneMapper(-in=>'cds', -out=>'chr');
+
+$m->cds($cds); # this has to be set first!?
+ok $m->exons(@cexons), 2;
+
+
+my $cds_f= Bio::Location::Simple->new
+    (-start => 2, -end => 7, );
+$res = $m->map($cds_f);
+
+ok @sublocs = $res->each_Location(1);
+ok @sublocs, 2;
+
+ok $sublocs[0]->start, 6;
+ok $sublocs[0]->end, 9;
+ok $sublocs[1]->start, 15;
+ok $sublocs[1]->end, 16;
+
+
+# test inex, exon & negative_intron
+
+$m->in('gene');
+$m->out('inex');
+
+$pos = Bio::Location::Simple->new 
+    (-seq_id => 'gene', -start => 2, -end => 10, -strand=> 1 );
+
+$res = $m->map($pos);
+ok $res->each_Location, 3;
+
+
+$m->out('intron');
+$res = $m->map($pos);
+ok $res->start, 1;
+ok $res->end, 5;
+ok $res->strand, 1;
+
+$m->out('negative_intron');
+$res = $m->map($pos);
+ok $res->start, -5;
+ok $res->end, -1;
+ok $res->strand, 1;
+
+ok $m->_mapper_code2string('1-2'), 'chr-gene';
+ok $m->_mapper_string2code('chr-gene'), '1-2';
+
+
+#todo:
+#  strict mapping mode
+#  extrapolating pair code into Bio::Coordinate::Pair ?
+
+
+
+
+
+
+sub read_gene_data {
+    my ($self, at gene_dump) = @_;
+    my ($cds_start, $cds_end, $strand, @exons);
+
+    #one line per exon
+    my ($first, $first_line);
+    for my $line ( @gene_dump ) {
+
+	my ($geneid, $exon_start, $exon_end, $exon_cstart,
+	    $exon_cend, $exon_strand) = split /\t/, $line;
+
+	$strand = $exon_strand if $exon_strand;
+	#print join (' ', $geneid, $exon_start, $exon_strand), "\n";
+
+	# CDS location in chromosome coordinates
+	$cds_start = $exon_cstart if !$cds_start and $exon_cstart;
+	$cds_end = $exon_cend if $exon_cend;
+
+
+	if ($exon_start > $exon_end) {
+	    ($exon_start, $exon_end) = ($exon_end, $exon_start);
+	}
+
+	my $exon = Bio::Location::Simple->new
+	    (-seq_id => 'gene', -start => $exon_start,
+	     -end => $exon_end, -strand=>$strand, -verbose=>2);
+	push @exons, $exon;
+    }
+
+    if ($cds_start > $cds_end) {
+	($cds_start, $cds_end) = ($cds_end, $cds_start);
+    }
+
+    my $cdsr = Bio::Location::Simple->new (-start => $cds_start,
+					   -end => $cds_end,
+					   -strand=> $strand);
+
+    return ($cdsr, @exons);
+}
+
+
+sub map_snps {
+    my ($mapper, @snps) =@_;
+    $mapper->in('chr');
+    $mapper->out('cds');
+    foreach my $line (@snps) {
+	$mapper->out('cds');
+
+	my ($chr, $start, $strand, $id) = split /\t/, $line;
+	my $loc = Bio::Location::Simple->new
+	    ( -start => $start,
+	     -end => $start, -strand=>$strand );
+
+	my $res = $mapper->map($loc);
+	my $cds_start = 0;
+	$cds_start = $res->start if defined $res;#defined $res->start;
+	print $id, "\t", $cds_start, "\n";
+
+	# coding
+	if ($cds_start) {
+	    $mapper->out('propeptide');
+	    my $frame_obj = $mapper->_frame($res);
+	    my $res = $mapper->map($loc);
+	    my $cds_start = 0;
+	    $cds_start = $res->start if defined $res;#defined $res->start;
+	    print  "\t\t", $cds_start, " (", $frame_obj->start, ")\n";
+
+	}
+
+    }
+
+
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Geneid.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Geneid.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Geneid.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,80 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+use strict;
+
+BEGIN
+{
+    eval { require Test; };
+    if ($@)
+    {
+        use lib 't';
+    }
+
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 26;
+    plan tests => $NTESTS;
+}
+
+use Bio::Tools::Geneid;
+use Bio::SeqIO;
+ok(1);
+
+my $inputfilename = Bio::Root::IO->catfile("t", "data", "geneid_1.0.out");
+my $parser = Bio::Tools::Geneid->new(-file => $inputfilename);
+my @genes;
+
+while (my $gene= $parser->next_prediction)
+{
+    push(@genes, $gene);
+}
+
+my @transcripts = $genes[0]->transcripts;
+my @exons = $transcripts[0]->exons;
+
+ok($transcripts[0]->seq_id, '10');
+ok($exons[0]->seq_id, '10');
+ok($transcripts[0]->source_tag, 'geneid');
+ok($exons[0]->source_tag, 'geneid');
+ok($transcripts[0]->primary_tag, 'transcript');
+ok($exons[0]->primary_tag, 'Initial');
+
+ok(scalar($transcripts[0]->exons), 2);
+ok($transcripts[0]->start, 6090);
+ok($transcripts[0]->end, 7276);
+ok($transcripts[0]->score, 36.87);
+ok($transcripts[0]->strand, 1);
+ok($exons[0]->start, 6090);
+ok($exons[0]->end, 6155);
+ok($exons[0]->score, '1.40');
+ok($exons[0]->strand, 1);
+
+my ($type) = $exons[0]->get_tag_values('Type');
+ok($type, 'Initial');
+
+my ($phase) = $exons[0]->get_tag_values('phase');
+ok($phase, 0);
+
+my ($end_phase) = $exons[0]->get_tag_values('end_phase');
+ok($end_phase, 0);
+
+my ($start_signal_score) = $exons[0]->get_tag_values('start_signal_score');
+ok($start_signal_score, 2.15);
+
+my ($end_signal_score) = $exons[0]->get_tag_values('end_signal_score');
+ok($end_signal_score, 3.63);
+
+my ($coding_potential_score) = $exons[0]->get_tag_values('coding_potential_score');
+ok($coding_potential_score, 12.34);
+
+my ($homology_score) = $exons[0]->get_tag_values('homology_score');
+ok($homology_score, '0.00');
+
+ok(scalar(@genes), 3);
+
+ at transcripts = $genes[1]->transcripts;
+ok(scalar($transcripts[0]->exons), 5);
+
+ at transcripts = $genes[2]->transcripts;
+ok(scalar($transcripts[0]->exons), 1);

Added: trunk/packages/bioperl/branches/upstream/current/t/Genewise.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Genewise.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Genewise.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 51;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::Genewise;
+use Bio::SeqIO;
+use Bio::SearchIO;
+use Bio::Root::IO;
+
+END {
+	for ( $Test::ntest..$NTESTS ) {
+		skip("Cannot run remaining Genewise tests, skipping.",1);
+	}
+}
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","genewise.out");
+my $parser = Bio::Tools::Genewise->new(-file => $inputfilename);
+my @gene;
+while (my $gene= $parser->next_prediction){
+    push @gene, $gene;
+}
+my @t = $gene[0]->transcripts;
+my @e = $t[0]->exons;
+
+ok ($t[0]->seq_id, 'Scaffold_2042.1');
+ok ($e[0]->seq_id, 'Scaffold_2042.1');
+ok ($t[0]->source_tag, 'genewise');
+ok ($e[0]->source_tag, 'genewise');
+ok ($t[0]->primary_tag, 'transcript');
+ok ($e[0]->primary_tag, 'exon');
+
+ok (scalar($t[0]->exons), 18);
+ok ($t[0]->start, 22265);
+ok ($t[0]->end, 37062);
+ok ($e[0]->start,22265);
+ok ($e[0]->end, 22396);
+my ($phase) = $e[0]->each_tag_value('phase');
+ok ($phase,0);
+my ($sf)= $e[0]->each_tag_value('supporting_feature');
+ok ($sf->feature1->seq_id,'Scaffold_2042.1');
+ok ($sf->feature1->start,22265);
+ok ($sf->feature1->end,22396);
+ok ($sf->feature2->seq_id,'SINFRUP00000067802');
+ok ($sf->feature2->start,1);
+ok ($sf->feature2->end,44);
+ok ($sf->feature1->end,22396);
+
+open(FH,$inputfilename);
+$parser = Bio::Tools::Genewise->new(-fh=>\*FH);
+while (my $gene= $parser->next_prediction){
+    push @gene, $gene;
+}
+ at t = $gene[0]->transcripts;
+ at e = $t[0]->exons;
+
+ok (scalar($t[0]->exons), 18);
+ok ($t[0]->start, 22265);
+ok ($t[0]->end, 37062);
+ok ($e[0]->start,22265);
+ok ($e[0]->end, 22396);
+($phase) = $e[0]->each_tag_value('phase');
+ok ($phase,0);
+($sf)= $e[0]->each_tag_value('supporting_feature');
+ok ($sf->feature1->seq_id,'Scaffold_2042.1');
+ok ($sf->feature1->start,22265);
+ok ($sf->feature1->end,22396);
+ok ($sf->feature2->seq_id,'SINFRUP00000067802');
+ok ($sf->feature2->start,1);
+ok ($sf->feature2->end,44);
+ok ($sf->feature1->end,22396);
+
+$parser = new Bio::SearchIO(-file => 
+			    Bio::Root::IO->catfile(qw(t data genewise.out)),
+			    -format   => 'wise',
+			    -wisetype => 'genewise');
+my $result = $parser->next_result;
+skip(1,'swapping query/name need to reconsider how this done');
+#ok($result->query_name, 'SINFRUP00000067802');
+my $hit = $result->next_hit;
+skip(1,'swapping query/name need to reconsider how this done');
+#ok($hit->name, 'Scaffold_2042.1');
+ok($hit->score, 2054.68);
+my $hsp = $hit->next_hsp;
+
+ok($hsp->query->start,22265);
+ok($hsp->query->end,22396);
+ok($hsp->query->strand,1);
+ok($hsp->query->score, 2054.68);
+
+ok($hsp->hit->start,1);
+ok($hsp->hit->end,44);
+ok($hsp->hit->strand,0);
+ok($hsp->hit->score, 2054.68);
+
+$hsp = $hit->next_hsp;
+
+ok($hsp->query->start,24224);
+ok($hsp->query->end,24328);
+
+ok($hsp->hit->start,45);
+ok($hsp->hit->end,79);
+
+$hsp = $hit->next_hsp;
+
+ok($hsp->query->start,24471);
+ok($hsp->query->end,24513);
+
+ok($hsp->hit->start,80);
+ok($hsp->hit->end,93);

Added: trunk/packages/bioperl/branches/upstream/current/t/Genomewise.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Genomewise.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Genomewise.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 20;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::Genomewise;
+
+END {
+	for ( $Test::ntest..$NTESTS ) {
+		skip("Cannot complete genomewise tests, skipping.",1);
+	}
+}
+
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","genomewise.out");
+my $parser = Bio::Tools::Genomewise->new(-file => $inputfilename);
+my @gene;
+while (my $gene= $parser->next_prediction){
+    push @gene, $gene;
+}
+my @t = $gene[0]->transcripts;
+my @e = $t[0]->exons;
+
+ok ($t[0]->source_tag, 'genomewise');
+ok ($e[0]->source_tag, 'genomewise');
+ok ($t[0]->primary_tag, 'transcript');
+ok ($e[0]->primary_tag, 'exon');
+
+ok (scalar($t[0]->exons), 5);
+ok ($t[0]->start, 4761);
+ok ($t[0]->end, 6713);
+ok ($e[0]->start,4761);
+ok ($e[0]->end, 4874);
+my ($phase) = $e[0]->each_tag_value('phase');
+ok ($phase,0);
+
+open(FH,$inputfilename);
+$parser = Bio::Tools::Genomewise->new(-fh=>\*FH);
+while (my $gene= $parser->next_prediction){
+    push @gene, $gene;
+}
+ at t = $gene[1]->transcripts;
+ at e = $t[0]->exons;
+
+ok ($t[0]->source_tag, 'genomewise');
+ok ($e[0]->source_tag, 'genomewise');
+ok ($t[0]->primary_tag, 'transcript');
+ok ($e[0]->primary_tag, 'exon');
+
+ok (scalar($t[0]->exons), 3);
+ok ($t[0]->start, 9862);
+ok ($t[0]->end, 10316);
+ok ($e[1]->start,10024);
+ok ($e[1]->end, 10211);
+
+($phase) = $e[2]->each_tag_value('phase');
+ok ($phase,2);
+
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Genpred.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Genpred.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Genpred.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,175 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Genpred.t,v 1.15.8.1 2006/10/02 23:10:40 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 79;
+}
+
+use Bio::Tools::Genscan;
+use Bio::Tools::Genemark;
+use Bio::Tools::Glimmer;
+use Bio::Tools::MZEF;  
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+# Genscan report
+my $genscan = Bio::Tools::Genscan->new('-file' => Bio::Root::IO->catfile("t","data","genomic-seq.genscan"));
+ok $genscan;
+
+# original sequence
+my $seqin = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data","genomic-seq.fasta"),
+			    '-format' => "fasta");
+ok $seqin;
+my $seq = $seqin->next_seq();
+$seqin->close();
+ok $seq;
+
+# scan through the report
+my $fea;
+my $pred_num = 0;
+my ($prtseq, $cds, $tr_cds);
+while(my $gene = $genscan->next_prediction()) {
+    $gene->attach_seq($seq) if $seq;
+    $pred_num++;
+
+    if($pred_num == 1) {
+	$fea = ($gene->exons())[0];
+	ok $fea->strand(), -1, 
+	     "strand mismatch (".$fea->strand()." instead of -1)";
+	$fea = ($gene->poly_A_site());
+	ok $fea->score(), 1.05, 
+             "score mismatch (".$fea->score()." instead of 1.05)";
+    }
+    if($pred_num == 2) {
+	$fea = ($gene->exons("Initial"))[0];
+	ok $fea->strand(), 1, 
+	"strand mismatch (".$fea->strand()." instead of 1)";
+	ok $fea->score(), 4.46, 
+             "score mismatch (".$fea->score()." instead of 4.46)";
+    }
+    if($pred_num == 3) {
+	my @exons = $gene->exons("Initial");
+	ok scalar(@exons), 0, 
+	     "initial exons (".scalar(@exons)." instead of 0)";
+	$fea = ($gene->exons())[0];
+	ok $fea->score(),  1.74, 
+             "score mismatch (".$fea->score()." instead of 1.74)";
+    }
+    if($seq) {
+	$prtseq = $gene->predicted_protein()->seq();
+        $cds = $gene->cds();
+	ok($cds) || print STDERR "# no CDS for prediction $pred_num; protein: $prtseq\n";
+	$tr_cds = $cds->translate()->seq();
+	$tr_cds =~ s/\*$//;
+	ok( lc($prtseq), lc($tr_cds),
+	    "predicted and extracted protein seqs don't match");
+    }
+}
+
+# Genscan report with no genes predicted
+my $null_genscan = Bio::Tools::Genscan->new('-file' => Bio::Root::IO->catfile("t","data","no-genes.genscan"));
+ok $null_genscan;
+my $no_gene = $null_genscan->next_prediction;
+my @exons = $no_gene->exons;
+ok($#exons,-1);
+
+# MZEF report
+my $mzef = Bio::Tools::MZEF->new('-file' => Bio::Root::IO->catfile("t","data","genomic-seq.mzef"));
+ok $mzef;
+
+my $exon_num = 0;
+my $gene = $mzef->next_prediction();
+
+ok($gene->exons, 23);
+
+# Genemark testing:
+my $genemark = Bio::Tools::Genemark->new('-file' => Bio::Root::IO->catfile(qw(t data genemark.out)));
+
+my $gmgene = $genemark->next_prediction();
+ok $gmgene->seq_id(), "Hvrn.contig8";
+ok $genemark->analysis_date(), "Thu Mar 22 10:25:00 2001";
+
+my $i = 0;
+my @num_exons = (1,5,2,1,9,5,3,2,3,2,1,2,7);
+while($gmgene = $genemark->next_prediction()) {
+    $i++;
+    my @gmexons = $gmgene->exons();
+    ok scalar(@gmexons), $num_exons[$i];
+
+    if($i == 5) {
+	my $gmstart = $gmexons[0]->start();
+	ok $gmstart, 23000;
+
+	my $gmend = $gmexons[0]->end();
+	ok $gmend, 23061;
+    }
+}
+
+# Glimmer testing (GlimmerM)
+my $glimmer = new Bio::Tools::Glimmer('-file' => Bio::Root::IO->catfile(qw(t data glimmer.out)));
+my $glimmergene = $glimmer->next_prediction;
+
+ok($glimmergene);
+ok($glimmergene->seq_id, 'BAC1Contig11');
+ok($glimmergene->source_tag, 'GlimmerM_3.0');
+ok($glimmergene->primary_tag, 'transcript');
+ok(($glimmergene->get_tag_values('Group'))[0], 'GenePrediction1');
+my @glim_exons = $glimmergene->exons;
+ok(scalar (@glim_exons), 5);
+ok($glim_exons[0]->start, 13907);
+ok($glim_exons[0]->end, 13985);
+ok($glim_exons[0]->strand, 1);
+ok(($glim_exons[0]->get_tag_values('Group'))[0], 'GenePrediction1');
+
+ at num_exons = (0,5,3, 1, 6, 3);
+$i = 1;
+while($glimmergene = $glimmer->next_prediction()) {
+    $i++;
+    ok(($glimmergene->get_tag_values('Group'))[0],"GenePrediction$i");
+    @glim_exons = $glimmergene->exons();    
+    ok scalar(@glim_exons), $num_exons[$i];
+    if($i == 5) {
+	ok $glim_exons[1]->start, 30152;
+	ok $glim_exons[1]->end, 30235;
+	ok $glim_exons[1]->strand, -1;
+    }
+}
+
+# Glimmer testing (GlimmerM)
+my $ghmm = Bio::Tools::Glimmer->new('-file' => Bio::Root::IO->catfile(qw(t data GlimmerHMM.out)));
+my $ghmmgene = $ghmm->next_prediction;
+
+ok($ghmmgene);
+ok($ghmmgene->seq_id, 'gi|23613028|ref|NC_004326.1|');
+ok($ghmmgene->source_tag, 'GlimmerHMM');
+ok($ghmmgene->primary_tag, 'transcript');
+ok($ghmmgene->exons == 1);
+
+ at num_exons = qw(0 1 2 4 2 2 1 1 1 2 2 2 10 4 1 1); # only first few tested
+$i = 1;
+while ($ghmmgene = $ghmm->next_prediction) {
+  $i++;
+  my @ghmm_exons = $ghmmgene->exons;    
+  ok(scalar(@ghmm_exons), $num_exons[$i]) if $i <= $#num_exons;
+  if ($i == 9) {
+    ok( $ghmm_exons[1]->start, 5538 );
+    ok( $ghmm_exons[1]->end,   5647 );
+    ok( $ghmm_exons[1]->strand > 0  );
+  }
+}
+ok($i, 44);
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/GraphAdaptor.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GraphAdaptor.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GraphAdaptor.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,79 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+# $Id: GraphAdaptor.t,v 1.2 2005/04/17 02:05:48 lapp Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    eval { require 'Graph.pm' };
+    if( $@ ) {
+	    print STDERR "\nGraph.pm doesn't seem to be installed on this system -- the GO Parser needs it...\n\n";
+	    plan tests => 1;
+	    ok( 1 );
+	    exit( 0 );
+    }
+
+    plan tests => 18;
+}
+
+use Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+
+my $g=new Bio::Ontology::SimpleGOEngine::GraphAdaptor;
+my $graph_version=( defined($Graph::VERSION) && $Graph::VERSION >= 0.5 )  ? 'new' : 'old';
+my $adaptor_class=$graph_version eq 'new' ? 
+  'Bio::Ontology::SimpleGOEngine::GraphAdaptor' : 'Bio::Ontology::SimpleGOEngine::GraphAdaptor02';
+ok (ref $g, $adaptor_class);
+
+$g->add_vertex('vertex0');
+ok($g->has_vertex('vertex0'));
+ok(!$g->has_vertex('vertex1'));
+my @v=$g->vertices;
+ok (@v==1 && $v[0] eq 'vertex0') ;
+
+$g->add_edge('vertex0','vertex1');
+ok($g->has_edge('vertex0','vertex1'));
+ok(!$g->has_edge('vertex0','vertex'));
+my @e=$g->edges;
+ok (@e==1 && $e[0]->[0] eq 'vertex0' && $e[0]->[1] eq 'vertex1') ;
+
+ at e=$g->edges_at('vertex0');
+ok (@e==1 && $e[0]->[0] eq 'vertex0' && $e[0]->[1] eq 'vertex1') ;
+
+ at v=$g->predecessors('vertex1');
+ok (@v==1 && $v[0] eq 'vertex0');
+
+ at v=$g->successors('vertex0');
+ok (@v==1 && $v[0] eq 'vertex1');
+
+ at v=$g->source_vertices;
+ok (@v==1 && $v[0] eq 'vertex0');
+
+ at v=$g->sink_vertices;
+ok (@v==1 && $v[0] eq 'vertex1');
+
+$g->set_vertex_attribute('vertex0','ATTR0','vertex0_ATTR0');
+$g->set_vertex_attribute('vertex0','ATTR1','vertex0_ATTR1');
+$g->set_vertex_attribute('vertex1','ATTR0','vertex1_ATTR0');
+$g->set_vertex_attribute('vertex1','ATTR1','vertex1_ATTR1');
+ok ($g->get_vertex_attribute('vertex0','ATTR0'),'vertex0_ATTR0');
+ok ($g->get_vertex_attribute('vertex0','ATTR1'),'vertex0_ATTR1');
+ok ($g->get_vertex_attribute('vertex1','ATTR0'),'vertex1_ATTR0');
+ok ($g->get_vertex_attribute('vertex1','ATTR1'),'vertex1_ATTR1');
+
+$g->set_edge_attribute('vertex0','vertex1','ATTR0','vertex0_vertex1_ATTR0');
+$g->set_edge_attribute('vertex0','vertex1','ATTR1','vertex0_vertex1_ATTR1');
+ok ($g->get_edge_attribute('vertex0','vertex1','ATTR0'),'vertex0_vertex1_ATTR0');
+ok ($g->get_edge_attribute('vertex0','vertex1','ATTR1'),'vertex0_vertex1_ATTR1');
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/GraphAdaptor.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/GuessSeqFormat.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/GuessSeqFormat.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/GuessSeqFormat.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,133 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: GuessSeqFormat.t,v 1.8.6.2 2006/12/04 14:17:26 sendu Exp $
+# test for Bio::Tools::GuessSeqFormat
+# written by Heikki Lehvaslaiho
+
+use strict;
+my $NUMTESTS;
+my $error;
+
+BEGIN {
+   eval { require Test; };
+   if( $@ ) {
+      use lib 't';
+   }
+   use Test;
+   $error = 0;
+   # SeqIO::game needs XML::Writer and XML::Parser::PerlSAX
+   eval {require XML::Writer; require XML::Parser::PerlSAX;};
+   if ($@) {
+      print STDERR "XML::Writer or XML::Parser::PerlSAX not found, skipping game test\n";
+      $error = 1;
+   }
+   $NUMTESTS = ($error == 1) ? 44 : 46;
+   plan tests => $NUMTESTS;
+}
+
+my @seqformats = qw{ ace embl fasta gcg genbank mase
+                        pfam pir raw swiss tab };
+
+push @seqformats,"game" if ($error == 0);
+
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Bio::Tools::GuessSeqFormat;
+use Data::Dumper;
+
+ok 1;
+
+my $format;
+my $verbose =1;
+#
+# Seqio formats
+#
+
+#not tested:  waba
+
+my %no_seqio_module = map {$_=>1} qw {gcgblast gcgfasta mase pfam};
+
+my $guessed_format = new Bio::Tools::GuessSeqFormat
+        (-file => Bio::Root::IO->catfile("t","data","test.waba"))->guess;
+ok $guessed_format, undef ;
+
+eval {
+    my $input = Bio::SeqIO->new
+        (-file=>Bio::Root::IO->catfile("t","data","test.waba"));
+    ok my $seq = $input->next_seq();
+};
+$@ ? ok 1 : ok 0;
+
+foreach $format (@seqformats) {
+    my $guessed_format = new Bio::Tools::GuessSeqFormat
+        (-file => Bio::Root::IO->catfile("t","data","test.$format"),
+         #-verbose=> $verbose;
+        )->guess;
+    $format =~ s/\..*$//;
+    ok $guessed_format, $format;
+    next if $no_seqio_module{$format};
+
+    eval {
+        my $input = Bio::SeqIO->new
+            (-file=>Bio::Root::IO->catfile("t","data","test.$format"));
+        ok my $seq = $input->next_seq();
+    };
+    ok 0, 1, $@ if $@;
+}
+
+
+#
+# AlignIO formats
+#
+
+ at seqformats = qw{ aln:clustalw fasta mase msf nexus pfam phylip
+                  prodom stockholm}; # not selex (same as pfam, mainly)
+
+my %no_alignio_module = map {$_=>1} qw {};
+
+foreach my $ext (@seqformats) {
+    my $format;
+    ($ext, $format) = split /:/, $ext;
+    my $guesser = new Bio::Tools::GuessSeqFormat
+        (-file => Bio::Root::IO->catfile("t","data","testaln.$ext"));
+    $format ||= $ext;
+    ok $guesser->guess(), $format;
+
+    next if $no_alignio_module{$format};
+
+    eval {
+        my $input = Bio::AlignIO->new
+            (-file=>Bio::Root::IO->catfile("t","data","testaln.$ext"));
+        ok my $seq = $input->next_aln();
+    };
+    ok 0, 1, $@ if $@;
+}
+
+
+#
+# File handle tests
+#
+if( eval 'require IO::String; 1' ) {
+
+    my $string = ">test1 no comment
+agtgctagctagctagctagct
+>test2 no comment
+gtagttatgc
+";
+
+    my $stringfh = new IO::String($string);
+    
+    my $seqio = new Bio::SeqIO(-fh => $stringfh);
+    while( my $seq = $seqio->next_seq ) {
+	ok $seq->id =~ /test/;
+    }
+    
+#
+# text guessing
+#
+
+    ok new Bio::Tools::GuessSeqFormat( -text => $string )->guess, 'fasta';
+} else {
+    for (1..3) {
+	skip("skipping guessing format from string, IO::String not installed",1);
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/HNN.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/HNN.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/HNN.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: HNN.t,v 1.1 2003/07/23 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 13;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok("Bio::Seq");
+	use_ok("Bio::Tools::Analysis::Protein::HNN");
+}
+
+#	eval {require Bio::Seq::Meta::Array;};
+#	"Bio::Seq::Meta::Array not installed - will skip tests using meta sequences"
+
+my $verbose = $DEBUG;
+
+my $seq = Bio::Seq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS',
+                        -display_id => 'test2');
+ok my $tool = Bio::Tools::Analysis::Protein::HNN->new(-seq=>$seq->primary_seq);
+
+SKIP: {
+	skip "Skipping tests which require remote servers, set BIOPERLDEBUG=1 to test", 10 unless $DEBUG;
+    ok $tool->run();
+	skip "Skipping tests since we got terminated by a server error", 9 if $tool->status eq 'TERMINATED_BY_ERROR';
+    ok my $raw = $tool->result('');
+    ok my $parsed = $tool->result('parsed');
+    is $parsed->[0]{'coil'}, '1000';
+    my @res = $tool->result('Bio::SeqFeatureI');
+    if (scalar @res > 0) {
+		ok 1;
+    }
+	else {
+		skip 'No results - could not connect to HNN server?', 6;
+    }
+    
+    ok my $meta = $tool->result('meta');
+    ok my $seqobj = Bio::Seq->new(-primary_seq => $meta, display_id=>"a");
+    ok $seqobj->add_SeqFeature($tool->result('Bio::SeqFeatureI'));
+    
+    eval {require Bio::Seq::Meta::Array;};
+	skip "Bio::Seq::Meta::Array not installed - will skip tests using meta sequences", 2 if $@;
+	is $meta->named_submeta_text('HNN_helix',1,2), '0 111';
+	is $meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/HtSNP.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/HtSNP.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/HtSNP.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,92 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+        use lib 't';
+    }
+    use Test;
+
+    plan tests => 7;
+}
+
+use Bio::PopGen::HtSNP;
+
+my $hap = [
+     'acgt?cact',
+     'acgt?ca-t',
+     'cg?tag?gc',
+     'cactcgtgc',
+     'cgctcgtgc',
+     'cggtag?gc',
+     'ac?t?cact',
+     ];
+
+my $snp = [qw/s1 s2 s3 s4 s5 s6 s7 s8 s9/];
+
+my $pop = [
+     [qw/ uno    0.20/],
+     [qw/ dos    0.20/],
+     [qw/ tres   0.15/],
+     [qw/ cuatro 0.15/],
+     [qw/ cinco  0.10/],
+     [qw/ seis   0.10/],
+     [qw/ siete  0.10/],
+       ];
+
+my $obj = Bio::PopGen::HtSNP->new(-haplotype_block => $hap,
+                                   -snp_ids         => $snp,
+                                   -pattern_freq    => $pop,
+);
+
+
+# check lenght of the haplotype
+ok($obj->hap_length,9); # length of the haplotype must be 9 
+
+# check silent SNPs
+ok( (join ' ', @{$obj->silent_snp}) ,'s4'); # the silent snp is in position 4 (counting from 1)
+
+# check degenerated SNPs 
+ok( (join ' ', @{$obj->deg_snp}) ,'s7 s5 s3'); # degenerate SNPs 
+
+# check useful SNP's
+ok( (join ' ', @{$obj->useful_snp}) ,'s1 s2 s6 s8 s9'); # degenerate SNPs 
+
+# check the SNP code
+ok( (join ' ',@{$obj->snp_type_code}),'36 63 36 75 36'); # code for SNPs
+
+# check the HtType 
+ok( (join ' ',@{$obj->ht_type}),'36 63 75'); # min snp_code 
+
+my $tmp = $obj->deg_pattern();
+my $err=0;
+
+foreach my $family (keys %$tmp){
+    if ($family eq '0'){
+       unless ( (join ' ', @{$tmp->{$family}}) eq '0 6'){
+           $err=1;
+       }
+    }
+    if ($family eq '1'){
+       unless ( (join ' ', @{$tmp->{$family}}) eq '1'){
+           $err=1;
+       }
+    }
+    if ($family eq '2'){
+       unless ( (join ' ', @{$tmp->{$family}}) eq '2 4 5'){
+           $err=1;
+       }
+    }
+    if ($family eq '3'){
+       unless ( (join ' ', @{$tmp->{$family}}) eq '3'){
+           $err=1;
+       }
+    }
+}
+
+ok(! $err); # clustering degenerated haplotypes 

Added: trunk/packages/bioperl/branches/upstream/current/t/IUPAC.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/IUPAC.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/IUPAC.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan test => 2;
+}
+
+#use Bio::Tools::SeqAnal; # deprecated, don't use any more
+use Bio::Tools::IUPAC;
+use Bio::Seq;
+
+# test IUPAC
+
+my $ambiseq = new Bio::Seq (-seq => 'ARTCGTTGR',
+			    -alphabet => 'dna'); 
+
+my $stream  = new Bio::Tools::IUPAC('-seq' => $ambiseq);
+ok $stream->count(), 4;
+
+my $b = 1; 
+while (my $uniqueseq = $stream->next_seq()) {
+    if( ! $uniqueseq->isa('Bio::Seq') ) {
+	$b = 0;
+	last; # no point continuing if we get here
+    }
+}
+ok $b;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Index.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Index.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Index.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,218 @@
+# -*-Perl-*-
+# $Id: Index.t,v 1.44.2.3 2006/10/02 23:10:40 sendu Exp $
+
+use strict;
+use vars qw($exit $DEBUG);
+BEGIN {
+   eval { require Test; };
+   use vars qw($NUMTESTS);
+   $DEBUG = $ENV{"BIOPERLDEBUG"} || 0;
+   $NUMTESTS = 50;
+   if ( $@ ) {
+      use lib 't';
+   }
+   use Test;
+   eval {
+		require Bio::Index::Fasta;
+		require Bio::Index::Qual;
+		require Bio::Index::SwissPfam;
+		require Bio::Index::EMBL;
+		require Bio::Index::GenBank;
+		require Bio::Index::Swissprot;
+		require DB_File;
+		require Storable;
+		require File::Temp;
+		require Fcntl;
+	};
+   if ( $@ ) {
+      warn("Module DB_File or Fcntl or Storable or File::Temp not installed - skipping tests\n") if $DEBUG;
+      $exit = 1;
+   }
+   plan tests => $NUMTESTS;
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+	  skip("Cannot complete Index.t tests, skipping",1);
+	}
+}
+
+exit(0) if $exit;
+
+use Bio::Root::IO;
+use Bio::DB::InMemoryCache;
+
+eval { require Bio::DB::FileCache };
+
+use vars qw ($dir);
+
+($Bio::Root::IO::FILESPECLOADED && File::Spec->can('curdir') &&
+($dir = File::Spec->curdir) ) || ($dir = `pwd`) || ($dir = '.');
+chomp( $dir );
+
+my $ind = Bio::Index::Fasta->new(-filename => 'Wibbl',
+											-write_flag => 1,
+											-verbose => 0);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","multifa.seq"));
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","seqs.fas"));
+
+ok ( -e "Wibbl" || -e "Wibbl.pag" );
+my $seq = $ind->fetch('HSEARLOBE');
+ok($seq->length,321);
+$seq = $ind->fetch('HSMETOO');
+ok($seq->length,134);
+$seq = $ind->fetch('MMWHISK');
+ok($seq->length,62);
+$seq = $ind->fetch('gi|238775|bbs|65126');
+ok($seq->length,70);
+
+my $stream = $ind->get_PrimarySeq_stream();
+$seq = $stream->next_seq;
+ok ($seq->isa('Bio::PrimarySeqI'));
+
+$ind = Bio::Index::Fasta->new(-filename => 'multifa_index',
+										-write_flag => 1,
+										-verbose => 0);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","multifa.seq.qual"));
+
+ok ( -e "multifa_index" );
+
+$ind = Bio::Index::Qual->new(-filename => 'multifa_qual_index',
+									  -write_flag => 1,
+									  -verbose => 0);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","multifa.seq.qual"));
+
+ok ( -e "multifa_qual_index" );
+
+ok ( defined($seq) && $seq->isa('Bio::SeqI'));
+$seq = $ind->fetch('HSEARLOBE');
+ok($seq->length,321);
+$seq = $ind->fetch('HSMETOO');
+ok($seq->length,134);
+$seq = $ind->fetch('MMWHISK');
+ok($seq->length,62);
+$seq = $ind->fetch('NONEXISTENT_SEQ');
+ok(! defined $seq);
+
+$ind = Bio::Index::SwissPfam->new(-filename => 'Wibbl2',
+											 -write_flag =>1);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","swisspfam.data"));
+
+ok ( -e "Wibbl2" || -e "Wibbl2.pag" );
+
+$ind = Bio::Index::EMBL->new(-filename   => 'Wibbl3',
+			     -write_flag =>1);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","test.embl"));
+ok ( -e "Wibbl3" || -e "Wibbl3.pag" );
+ok ($ind->fetch('AL031232')->length, 4870);
+
+$ind = Bio::Index::Swissprot->new(-filename   => 'Wibbl4',
+											 -write_flag => 1);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","roa1.swiss"));
+ok ( -e "Wibbl4" || -e "Wibbl4.pag" );
+$seq = $ind->fetch('ROA1_HUMAN');
+ok ($seq->display_id(), 'ROA1_HUMAN');
+$seq = $ind->fetch('P09651');
+ok ($seq->display_id(), 'ROA1_HUMAN');
+
+# test id_parser
+$ind = Bio::Index::Swissprot->new(-filename   => 'Wibbl4',
+											 -write_flag => 1);
+$ind->id_parser(\&get_id);
+$ind->make_index(Bio::Root::IO->catfile($dir,"t","data","roa1.swiss"));
+ok ( -e "Wibbl4" || -e "Wibbl4.pag" );
+$seq = $ind->fetch('X12671');
+ok ($seq->length,371);
+
+
+my $gb_ind = Bio::Index::GenBank->new(-filename => 'Wibbl5',
+												  -write_flag =>1,
+												  -verbose    => 0);
+$gb_ind->make_index(Bio::Root::IO->catfile($dir,"t","data","roa1.genbank"));
+ok ( -e "Wibbl5" || -e "Wibbl5.pag" );
+$seq = $gb_ind->fetch('AI129902');
+ok ($seq->length, 37);
+ok ($seq->species->binomial, 'Homo sapiens');
+$seq = $gb_ind->fetch(3598416);
+ok ($seq->seq,"CTCCGCGCCAACTCCCCCCACCCCCCCCCCACACCCC");
+
+my $cache = Bio::DB::InMemoryCache->new( -seqdb => $gb_ind );
+
+ok ( $cache->get_Seq_by_id('AI129902') );
+
+if (Bio::DB::FileCache->can('new')) {
+   $cache = Bio::DB::FileCache->new(-seqdb => $gb_ind,
+												-keep  => 1,
+												-file  => 'filecache.idx');
+   # problem:
+   my $seq = $cache->get_Seq_by_id('AI129902');
+   ok ( $seq);
+   ok ( $seq->length, 37);
+   ok ( lc($seq->seq()), 'ctccgcgccaactccccccaccccccccccacacccc');
+
+   my ( $f1 ) = $seq->get_SeqFeatures();
+   ok ( ($f1->each_tag_value('sex'))[0], 'female');
+   ok ( ($f1->each_tag_value('lab_host'))[0], 'DH10B');
+   my $species = $seq->species;
+   ok( $species );
+   ok( $species->binomial, 'Homo sapiens');
+   ok( $species->species(), 'sapiens');
+   ok( $species->genus(), 'Homo');
+   # changes in GenBank file SOURCE line
+   # this is now the abbreviated name
+   ok defined($species->name('abbreviated'));
+   ok ($species->name('abbreviated')->[0], 'human');
+
+   $cache = undef;
+   $cache = Bio::DB::FileCache->new(-seqdb => $gb_ind,
+												-keep  => 0,
+												-file  => 'filecache.idx');
+   $seq = $cache->get_Seq_by_id('AI129902');
+   ok ( $seq);
+   ok ( $seq->length, 37);
+   ok ( lc($seq->seq()), 'ctccgcgccaactccccccaccccccccccacacccc');
+
+   ( $f1 ) = $seq->get_SeqFeatures();
+   ok ( ($f1->each_tag_value('sex'))[0], 'female');
+   ok ( ($f1->each_tag_value('lab_host'))[0], 'DH10B');
+   $species = $seq->species;
+   ok( $species );
+   ok( $species->binomial, 'Homo sapiens');
+   ok( $species->species(), 'sapiens');
+   ok( $species->genus(), 'Homo');
+   # changes in GenBank file SOURCE line
+   # this is now the abbreviated name
+   ok defined($species->name('abbreviated'));
+   ok ($species->name('abbreviated')->[0], 'human');
+} else {
+   skip('Bio::DB::FileCache not loaded because one or more of Storable, Fcntl, DB_File or File::Temp not installed',1);
+}
+
+# test id_parser
+$gb_ind = Bio::Index::GenBank->new(-filename => 'Wibbl5',
+											  -write_flag =>1,
+											  -verbose    => 0);
+$gb_ind->id_parser(\&get_id);
+$gb_ind->make_index(Bio::Root::IO->catfile($dir,"t","data","roa1.genbank"));
+ok ( -e "Wibbl5" || -e "Wibbl5.pag" );
+$seq = $gb_ind->fetch('alpha D-globin');
+ok ($seq->length,141);
+
+sub get_id {
+	my $line = shift;
+	return $1 if ($line =~ /product="([^"]+)"/);
+	return $1 if ($line =~ /^DR\s+EMBL;\s+([^;]+)/);
+}
+
+END {
+	cleanup();
+}
+
+sub cleanup {
+	for my $root ( qw( Wibbl Wibbl2 Wibbl3 Wibbl4 Wibbl5
+                      multifa_index multifa_qual_index ) ) {
+		unlink $root if( -e $root );
+		unlink "$root.pag" if( -e "$root.pag");
+		unlink "$root.dir" if( -e "$root.dir");
+	}
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/InstanceSite.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/InstanceSite.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/InstanceSite.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+#-*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: InstanceSite.t,v 1.5 2004/11/01 21:09:49 bosborne Exp $
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 6;
+}
+
+use Bio::Matrix::PSM::InstanceSite;
+ok(1);
+
+my %params=(-seq=>'TATAAT',-id=>"TATAbox1", -accession_number=>'ENSG00000122304', -mid=>'TB1',
+            -desc=>'TATA box, experimentally verified in PRM1 gene',-relpos=>-35, -start=>1965);
+
+ok my $instance=new  Bio::Matrix::PSM::InstanceSite(%params);
+ok $instance->seq, 'TATAAT';
+ok $instance->subseq(1,3),'TAT';
+ok $instance->accession_number, 'ENSG00000122304';
+ok $instance->end(1999), 1999;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/InterProParser.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/InterProParser.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/InterProParser.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,121 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $GNF: projects/gi/symgene/src/perl/seqproc/t/InterProParser.t,v 1.7 2003/02/07 22:05:58 pdimitro Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+my $error;
+use vars qw($NUMTESTS $DEBUG $HAVEGRAPHDIRECTED $errmsg);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	$errmsg = 'Unable to run Interpro Tests';
+   # to handle systems with no installed Test module
+   # we include the t dir (where a copy of Test.pm is located)
+   # as a fallback
+   eval { require Test; };
+   $error = 0;
+   if( $@ ) {
+      use lib 't';
+   }
+   use Test; 
+   $NUMTESTS = 47;
+   plan tests => $NUMTESTS;
+   eval {
+      require XML::Parser::PerlSAX;
+   };
+   if ( $@ ) {
+      warn( "XML::Parser::PerlSAX not installed. This means that InterPro Ontology Parsing module is not usable. Skipping tests.\n") if $DEBUG;
+      $errmsg .= ', XML::Parser::PerlSAX not installed';
+      $error = 1;
+   }
+
+   eval {
+      require XML::Parser;
+   };
+   if( $@ ) {
+      warn "XML::Parser not installed. This means that InterPro Ontology Parsing module is not usable. Skipping tests.\n" if $DEBUG;
+      $error = 1;
+      $errmsg .= ', XML::Parser not installed';
+   }
+	eval {require Graph::Directed; 
+			$HAVEGRAPHDIRECTED=1;
+		};
+	if ($@) {
+		$errmsg .= ', Graph::Directed not installed';
+		$HAVEGRAPHDIRECTED = 0;
+		$error = 1;
+	}
+}
+
+END {
+   foreach ( $Test::ntest..$NUMTESTS) {
+      skip($errmsg,1);
+   }
+}
+if ( $error ) {
+    exit(0);
+}
+
+require Bio::OntologyIO;
+require Bio::Root::IO;
+
+my $io = Bio::Root::IO->new();
+
+ok (1);
+
+my $ipp = Bio::OntologyIO->new( -format => 'interpro',
+										  -file =>
+										  $io->catfile('t','data','interpro_short.xml'),
+										  -ontology_engine => 'simple' );
+ok ($ipp);
+
+my $ip;
+while(my $ont = $ipp->next_ontology()) {
+    # there should be only one ontology
+    ok ($ip, undef);
+    $ip = $ont;
+}
+#print $ip->to_string."\n";
+my @rt = sort { $a->name cmp $b->name; } $ip->get_root_terms();
+
+# there should be 4 root terms: InterPro Domain, InterPro Family,
+# InterPro Repeat, and InterPro PTM (Post Translational Modification)
+#
+# I added 2 more terms, Active_site and Binding_site. -Juguang
+ok (scalar(@rt), 6);
+
+# every InterPro term should have an ontology,
+foreach ($ip->get_leaf_terms, @rt) {
+	ok ($_->ontology);
+	ok ($_->ontology->name, "InterPro",
+		 "term ".$_->name." not in ontology InterPro");
+}
+
+# there are 6 fully instantiated InterPro terms in total, which should be returned as the leafs
+ok (scalar($ip->get_leaf_terms()), 8);
+# roots and leafs together:
+ok (scalar($ip->get_all_terms()), 13);
+
+# descendants and children (synonymous here because of depth 1)
+# note that the sort should have placed Domain first and Family second
+ok (scalar($ip->get_descendant_terms($rt[2])), 4); # 4 InterPro Domains
+ok (scalar($ip->get_child_terms($rt[2])), 4);      # dto.
+ok (scalar($ip->get_descendant_terms($rt[3])), 3); # 3 Interpro Family
+ok (scalar($ip->get_child_terms($rt[3])), 3);      # dto.
+
+# test for ancestors and parents (synonymous here because of depth 1)
+foreach my $t ($ip->get_leaf_terms) {
+	# every InterPro term has exactly one parent - namely either 
+	# Domain, Family, Repeat, or PTM(Post Transl. Modification)
+	if (!($t->identifier eq "Repeat" || $t->identifier eq "PTM" 
+			|| $t->identifier eq'Active_site' || $t->identifier eq'Binding_site')) {
+		ok (scalar($ip->get_parent_terms($t)), 1);
+		ok (scalar($ip->get_ancestor_terms($t)), 1);
+	}
+}
+
+# test for secondary accession map
+ok(scalar(keys %{$ipp->secondary_accessions_map}), 2);

Added: trunk/packages/bioperl/branches/upstream/current/t/LargeLocatableSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LargeLocatableSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LargeLocatableSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,35 @@
+# This is -*-Perl-*- code
+# $Id: LargeLocatableSeq.t,v 1.1 2004/02/02 11:19:43 heikki Exp $
+use strict;
+
+my $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 8;
+}
+
+
+use Bio::Seq::LargeLocatableSeq;
+use Data::Dumper;
+ok 1;
+
+ok my $llseq  = Bio::Seq::LargeLocatableSeq->new(-seq => 'at-cg',
+                                                 -display_id => 'seq1');
+
+print Dumper $llseq if $DEBUG;
+
+ok $llseq->isa("Bio::Seq::LargeSeqI");
+
+ok $llseq->seq, 'at-cg';
+ok $llseq->add_sequence_as_string('atcc'), 9;
+
+ok $llseq->start, 1;
+
+ok $llseq->end, 8;
+ok $llseq->length, 9;
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/LinkageMap.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LinkageMap.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LinkageMap.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: LinkageMap.t,v 1.2 2005/09/16 12:22:36 bosborne Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 16;
+}
+
+# END {
+# }
+
+require 'dumpvar.pl';
+
+use Bio::Map::LinkagePosition;
+use Bio::Map::Microsatellite;
+use Bio::Map::LinkageMap;
+
+ok 1 ;
+my $verbose = 0;
+ok my $map = new Bio::Map::LinkageMap('-verbose' => $verbose,
+				   '-name'    => 'Leviathon',
+				   '-type'    => 'Genetic',
+				   '-units'   => 'cM',
+				   '-species' => "Brassica");
+ok ref($map), 'Bio::Map::LinkageMap';
+ok $map->name, 'Leviathon';
+ok $map->type, 'Genetic';
+ok $map->units, 'cM';
+ok $map->species, 'Brassica';
+ok $map->unique_id, '1';
+
+ok my $position = new Bio::Map::LinkagePosition('-order' => 2,
+						'-map' =>  $map, 
+						'-value' => 22.3
+						);
+
+ok $position->order, 2;
+ok $position->map, $map,
+ok $position->value, 22.3;
+
+ok my $o_usat = new Bio::Map::Microsatellite('-name'     => "Chad marker",
+					     '-position' => $position);
+
+ok $o_usat->name, 'Chad marker';
+ok $o_usat->position, $position ;
+ok $map->add_element($o_usat);
+
+#use Data::Dumper; print Dumper($map);
+#----------------------------
+#ok my $position2 = new Bio::Map::LinkagePosition(-order => qw(3 4 5),
+#						 );
+# print("position2 looks like this:\n");
+# dumpValue($position2);
+#ok(($position2->each_position_value('fakemap'))[0] == 0);
+#ok $position2->order, 3;
+
+#-------------
+#ok($position->order, 2);
+#ok(($position->each_position_value($map))[0], 22.3);
+	# what should be printed if this was ok?
+	# ok(1);
+
+#ok my $o_usat = new Bio::Map::Microsatellite('-name'     => "Chad marker",
+#					      '-position' => $position);
+#
+#ok $o_usat->name, 'Chad marker';
+#ok $o_usat->position, $position ;
+#ok $map->add_element($o_usat);
+# what should be printed if this is ok?
+#dumpValue($map);
+
+# add more tests
+# see also t/microsatellite.t and t/linkageposition.t

Added: trunk/packages/bioperl/branches/upstream/current/t/LiveSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LiveSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LiveSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,103 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: LiveSeq.t,v 1.14 2002/01/08 01:20:46 jason Exp $
+# Created: Thu Dec 14 13:57:04 GMT 2000
+# By Joseph A.L. Insana, <insana at ebi.ac.uk>
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+my $error;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $NUMTESTS = 48;
+    plan tests => $NUMTESTS;
+    $error = 0;
+    eval { require 'IO/String.pm' };
+    if( $@ ) {
+	print STDERR "IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n";
+	for( 1..$NUMTESTS ) {
+	    skip("IO::String not installed",1);
+	}
+	$error = 1; 
+    }
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require Bio::LiveSeq::IO::BioPerl;
+require Bio::Root::IO;
+
+ok(1);
+
+my $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"EMBL", 
+					   -file=>Bio::Root::IO->catfile("t","data","factor7.embl"));
+ok $loader;
+my $gene=$loader->gene2liveseq(-gene_name => "factor7");
+ok $gene;
+ok ref($gene), "Bio::LiveSeq::Gene";
+ok $gene->name, "factor7";
+ok $gene->get_DNA->alphabet, "dna";
+ok $gene->get_DNA->display_id, "HSCFVII";
+ok $gene->get_DNA->accession_number, "J02933";
+ok $gene, $gene->get_DNA->gene;
+ok $gene->get_DNA->desc, "Human blood coagulation factor VII gene, complete cds.";
+ok $gene->get_DNA->source, "Homo sapiens";
+ok $gene->get_DNA->start, 1;
+ok $gene->get_DNA->end, 12850;
+ok $gene->maxtranscript->start, 487;
+ok $gene->maxtranscript->end, 12686;
+ok $gene->upbound, 487;
+ok $gene->downbound, 12686;
+ok not(defined($gene->get_Repeat_Units));
+
+my @exons   = @{$gene->get_Exons};
+my @introns = @{$gene->get_Introns};
+ok scalar(@exons), 9;
+ok scalar(@introns), 8;
+ok $introns[4]->desc, "Intron D";
+ok $introns[4]->start, 6592;
+ok $introns[4]->end, 8306;
+ok $exons[1]->desc, "optional";
+ok $exons[4]->end, 6591;
+
+my $transcript  = $gene->get_Transcripts->[0];
+my $translation = $gene->get_Translations->[0];
+ok $transcript , $translation->get_Transcript;
+ok $translation , $transcript->get_Translation;
+
+ at exons = $transcript->all_Exons;
+ok $exons[4]->end , 6591;
+ok $exons[4]->length , 114;
+ok $transcript->upstream_seq, "tcaacaggcaggggcagcactgcagagatttcatc";
+ok substr($transcript->downstream_seq,0,16), "cccagcagccctggcc";
+ok $transcript->position($transcript->label(666)), 666;
+ok $transcript->position($transcript->label(666),9419), 95;
+ok $transcript->labelsubseq(8447,undef,9419), "gt";
+ok $transcript->labelsubseq(8447,2), "gt";
+ok $gene->get_DNA->labelsubseq(8447,2), "gg";
+ok substr($gene->get_DNA->labelsubseq(8447,undef,9419),0,16), "ggtgaccaggcttcat";
+ok $gene->get_DNA, $transcript->{seq};
+my ($nothing,$whichexon) = $transcript->in_which_Exon(9419);
+ok $whichexon , 7;
+ok $transcript->frame(9419) , 1;
+ok $transcript->frame(9420) , 2;
+ok substr($translation->seq,0,16), "MVSQALRLLCLLLGLQ";
+ok substr($transcript->seq,0,32), "atggtctcccaggccctcaggctcctctgcct";
+ok $transcript->translation_table(2);
+ok $transcript->translation_table , 2;
+ok substr($translation->seq,0,16), "MVSQAL*"; # mitochondrial table creates stop codon
+ok $gene->verbose(2), 2;
+ok $gene->delete_Obj(); # to free all memory, deleting circular references
+

Added: trunk/packages/bioperl/branches/upstream/current/t/LocatableSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LocatableSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LocatableSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,160 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: LocatableSeq.t,v 1.11 2003/11/13 13:44:56 heikki Exp $
+use strict;
+use constant NUMTESTS => 80;
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => NUMTESTS;
+}
+use Bio::LocatableSeq;
+ok(1);
+use Bio::SimpleAlign;
+use Bio::AlignIO;
+use Bio::Root::IO;
+
+my ($str, $aln, $seq, $loc);
+
+ok $seq = new Bio::LocatableSeq(
+			     -seq => '--atg---gta--',
+			     -strand => 1,
+			     -alphabet => 'dna'
+			     );
+ok $seq->alphabet, 'dna';
+ok $seq->start, 1;
+ok $seq->end, 6;
+ok $seq->strand, 1;
+ok $seq->no_gaps, 1;
+ok $seq->column_from_residue_number(4), 9;
+
+ok $loc = $seq->location_from_column(4);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->to_FTstring, 2;
+
+ok $loc = $seq->location_from_column(6);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->start, 3;
+ok $loc->location_type, 'IN-BETWEEN';
+ok $loc->to_FTstring, '3^4';
+
+
+ok $loc = $seq->location_from_column(2), undef;
+
+
+$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile("t","data","testaln.pfam"));
+ok defined($str) && ref($str) && $str->isa('Bio::AlignIO');
+$aln = $str->next_aln();
+ok $seq = $aln->get_seq_by_pos(1);
+ok ref($seq), 'Bio::LocatableSeq';
+
+ok $seq->get_nse, '1433_LYCES/9-246';
+ok $seq->id, '1433_LYCES';
+
+# test revcom and trunc
+
+$seq = new Bio::LocatableSeq(
+			     -seq => '--atg---gta--',
+			     -strand => 1,
+			     -alphabet => 'dna'
+			     );
+
+my $seq2 = $seq->trunc(1,9);
+ok $seq2->seq, '--atg---g';
+ok $seq2->start, 1;
+ok $seq2->end, 4;
+ok $seq2->strand, $seq->strand;
+
+$seq2 = $seq->trunc(3,8);
+ok $seq2->seq, 'atg---';
+ok $seq2->start, 1;
+ok $seq2->end, 3;
+
+ok $seq->strand(-1), -1;
+ok $seq->start, 1;
+ok $seq->end, 6;
+$seq2 = $seq->trunc(3,8);
+ok $seq2->seq, 'atg---';
+ok $seq2->start, 4;
+ok $seq2->end, 6;
+#use Data::Dumper;
+#print Dumper $seq;
+#print Dumper $seq2;
+#exit;
+$seq2 = $seq->revcom();
+ok $seq2->seq, '--tac---cat--';
+ok $seq2->start, $seq->start;
+ok $seq2->end, $seq->end;
+ok $seq2->strand, $seq->strand * -1;
+
+# test column-mapping for -1 strand sequence
+$seq = new Bio::LocatableSeq(
+			     -seq => '--atg---gtaa-',
+			     -strand => -1,
+			     -alphabet => 'dna'
+			     );
+ok $seq->column_from_residue_number(5),5;
+ok $seq->column_from_residue_number(4),9;
+ok $loc = $seq->location_from_column(4);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->to_FTstring, 6;
+ok $loc = $seq->location_from_column(6);
+ok $loc->isa('Bio::Location::Simple');
+ok $loc->start, 4;
+ok $loc->location_type, 'IN-BETWEEN';
+ok $loc->to_FTstring, '4^5';
+
+
+# more tests for trunc() with strand -1
+
+
+ok $seq = new Bio::LocatableSeq(
+			     -seq => '--atg---gta--',
+			     -strand => -1,
+			     -alphabet => 'dna'
+			     );
+ok $seq->alphabet, 'dna';
+ok $seq->start, 1;
+ok $seq->end, 6;
+ok $seq->strand, -1;
+ok $seq->no_gaps, 1;
+ok $seq->column_from_residue_number(4), 5;
+
+
+ok $seq2 = $seq->trunc(1,9);
+ok $seq2->seq, '--atg---g';
+ok $seq2->start, 3;
+ok $seq2->end, 6;
+ok $seq2->strand, $seq->strand;
+
+ok $seq->location_from_column(3)->start, 6;
+ok $seq->location_from_column(11)->start, 1;
+ok $seq->location_from_column(9)->start, 3;
+
+
+
+ok $seq2 = $seq->trunc(7,12);
+ok $seq2->seq, '--gta-';
+ok $seq2->start, 1;
+ok $seq2->end, 3;
+
+
+ok $seq2 = $seq->trunc(2,6);
+ok $seq2->seq, '-atg-';
+ok $seq2->start, 4;
+ok $seq2->end, 6;
+
+ok $seq2 = $seq->trunc(4,7);
+ok $seq2->seq, 'tg--';
+ok $seq2->start, 4;
+ok $seq2->end, 5;
+
+ok $seq = new Bio::LocatableSeq();
+ok $seq->seq, undef;
+ok $seq->start, undef;
+ok $seq->end, undef;

Added: trunk/packages/bioperl/branches/upstream/current/t/Location.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Location.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Location.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,285 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Location.t,v 1.29.4.3 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+		use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => 104;
+}
+
+use_ok('Bio::Location::Simple');
+use_ok('Bio::Location::Split');
+use_ok('Bio::Location::Fuzzy');
+
+use_ok('Bio::SeqFeature::Generic');
+use_ok('Bio::SeqFeature::SimilarityPair');
+use_ok('Bio::SeqFeature::FeaturePair');
+
+ok(1);
+
+my $simple = new Bio::Location::Simple('-start' => 10, '-end' => 20,
+				       '-strand' => 1, -seq_id => 'my1');
+isa_ok($simple, 'Bio::LocationI');
+isa_ok($simple, 'Bio::RangeI');
+
+is($simple->start, 10, 'Bio::Location::Simple tests');
+is($simple->end, 20);
+is($simple->seq_id, 'my1');
+
+my ($loc) = $simple->each_Location();
+ok($loc);
+is("$loc", "$simple");
+
+my $generic = new Bio::SeqFeature::Generic('-start' => 5, '-end' => 30, 
+					   '-strand' => 1);
+
+isa_ok($generic,'Bio::SeqFeatureI', 'Bio::SeqFeature::Generic' );
+isa_ok($generic,'Bio::RangeI');
+is($generic->start, 5);
+is($generic->end, 30);
+
+my $similarity = new Bio::SeqFeature::SimilarityPair();
+
+my $feat1 = new Bio::SeqFeature::Generic('-start' => 30, '-end' => 43, 
+					 '-strand' => -1);
+my $feat2 = new Bio::SeqFeature::Generic('-start' => 80, '-end' => 90, 
+					 '-strand' => -1);
+
+my $featpair = new Bio::SeqFeature::FeaturePair('-feature1' => $feat1,
+						'-feature2' => $feat2 );
+
+my $feat3 = new Bio::SeqFeature::Generic('-start' => 35, '-end' => 50, 
+					 '-strand' => -1);
+
+is($featpair->start, 30,'Bio::SeqFeature::FeaturePair tests');
+is($featpair->end,  43);
+
+is($featpair->length, 14);
+
+ok($featpair->overlaps($feat3));
+ok($generic->overlaps($simple), 'Bio::SeqFeature::Generic tests');
+ok($generic->contains($simple));
+
+# fuzzy location tests
+my $fuzzy = new Bio::Location::Fuzzy('-start'  =>'<10', 
+				     '-end'    => 20,
+				     -strand   =>1, 
+				     -seq_id   =>'my2');
+
+is($fuzzy->strand, 1, 'Bio::Location::Fuzzy tests');
+is($fuzzy->start, 10);
+is($fuzzy->end,20);
+ok(!defined $fuzzy->min_start);
+is($fuzzy->max_start, 10);
+is($fuzzy->min_end, 20);
+is($fuzzy->max_end, 20);
+is($fuzzy->location_type, 'EXACT');
+is($fuzzy->start_pos_type, 'BEFORE');
+is($fuzzy->end_pos_type, 'EXACT');
+is($fuzzy->seq_id, 'my2');
+is($fuzzy->seq_id('my3'), 'my3');
+
+($loc) = $fuzzy->each_Location();
+ok($loc);
+is("$loc", "$fuzzy");
+
+# split location tests
+my $splitlocation = new Bio::Location::Split;
+my $f = new Bio::Location::Simple(-start  => 13,
+				  -end    => 30,
+				  -strand => 1);
+$splitlocation->add_sub_Location($f);
+is($f->start, 13, 'Bio::Location::Split tests');
+is($f->min_start, 13);
+is($f->max_start,13);
+
+
+$f = new Bio::Location::Simple(-start  =>30,
+			       -end    =>90,
+			       -strand =>1);
+$splitlocation->add_sub_Location($f);
+
+$f = new Bio::Location::Simple(-start  =>18,
+			       -end    =>22,
+			       -strand =>1);
+$splitlocation->add_sub_Location($f);
+
+$f = new Bio::Location::Simple(-start  =>19,
+			       -end    =>20,
+			       -strand =>1);
+
+$splitlocation->add_sub_Location($f);
+
+$f = new Bio::Location::Fuzzy(-start  =>"<50",
+			      -end    =>61,
+			      -strand =>1);
+is($f->start, 50);
+ok(! defined $f->min_start);
+is($f->max_start, 50);
+
+is(scalar($splitlocation->each_Location()), 4);
+
+$splitlocation->add_sub_Location($f);
+
+is($splitlocation->max_end, 90);
+is($splitlocation->min_start, 13);
+is($splitlocation->end, 90);
+is($splitlocation->start, 13);
+is($splitlocation->sub_Location(),5);
+
+
+is($fuzzy->to_FTstring(), '<10..20');
+$fuzzy->strand(-1);
+is($fuzzy->to_FTstring(), 'complement(<10..20)');
+is($simple->to_FTstring(), '10..20');
+$simple->strand(-1);
+is($simple->to_FTstring(), 'complement(10..20)');
+is( $splitlocation->to_FTstring(), 
+    'join(13..30,30..90,18..22,19..20,<50..61)');
+
+# test for bug #1074
+$f = new Bio::Location::Simple(-start  => 5,
+			       -end    => 12,
+			       -strand => -1);
+$splitlocation->add_sub_Location($f);
+is( $splitlocation->to_FTstring(), 
+    'join(13..30,30..90,18..22,19..20,<50..61,complement(5..12))',
+	'Bugfix 1074');
+$splitlocation->strand(-1);
+is( $splitlocation->to_FTstring(), 
+    'complement(join(13..30,30..90,18..22,19..20,<50..61,5..12))');
+
+$f = new Bio::Location::Fuzzy(-start => '45.60',
+			      -end   => '75^80');
+
+is($f->to_FTstring(), '(45.60)..(75^80)');
+$f->start('20>');
+is($f->to_FTstring(), '>20..(75^80)');
+
+# test that even when end < start that length is always positive
+
+$f = new Bio::Location::Simple(-verbose => -1,
+			       -start   => 100, 
+			       -end     => 20, 
+			       -strand  => 1);
+
+is($f->length, 81, 'Positive length');
+is($f->strand,-1);
+
+# test that can call seq_id() on a split location;
+$splitlocation = new Bio::Location::Split(-seq_id => 'mysplit1');
+is($splitlocation->seq_id,'mysplit1', 'seq_id() on Bio::Location::Split');
+is($splitlocation->seq_id('mysplit2'),'mysplit2');
+
+
+# Test Bio::Location::Exact
+
+ok(my $exact = new Bio::Location::Simple(-start    => 10, 
+					 -end      => 20,
+					 -strand   => 1, 
+					 -seq_id   => 'my1'));
+isa_ok($exact, 'Bio::LocationI');
+isa_ok($exact, 'Bio::RangeI');
+
+is( $exact->start, 10, 'Bio::Location::Simple EXACT');
+is( $exact->end, 20);
+is( $exact->seq_id, 'my1');
+is( $exact->length, 11);
+is( $exact->location_type, 'EXACT');
+
+ok ($exact = new Bio::Location::Simple(-start         => 10, 
+				      -end           => 11,
+				      -location_type => 'IN-BETWEEN',
+				      -strand        => 1, 
+				      -seq_id        => 'my2'));
+
+is($exact->start, 10, 'Bio::Location::Simple IN-BETWEEN');
+is($exact->end, 11);
+is($exact->seq_id, 'my2');
+is($exact->length, 0);
+is($exact->location_type, 'IN-BETWEEN');
+
+eval {
+    $exact = new Bio::Location::Simple(-start         => 10, 
+				       -end           => 12,
+				       -location_type => 'IN-BETWEEN');
+};
+ok( $@, 'Testing error handling' );
+
+# testing error when assigning 10^11 simple location into fuzzy
+eval {
+    ok $fuzzy = new Bio::Location::Fuzzy(-start         => 10, 
+					 -end           => 11,
+					 -location_type => '^',
+					 -strand        => 1, 
+					 -seq_id        => 'my2');
+};
+ok( $@ );
+
+$fuzzy = new Bio::Location::Fuzzy(-location_type => '^',
+				  -strand        => 1, 
+				  -seq_id        => 'my2');
+
+$fuzzy->start(10);
+eval { $fuzzy->end(11) };
+ok($@);
+
+$fuzzy = new Bio::Location::Fuzzy(-location_type => '^',
+				  -strand        => 1, 
+				  -seq_id        =>'my2');
+
+$fuzzy->end(11);
+eval {
+    $fuzzy->start(10);
+};
+ok($@);
+
+# testing coodinate policy modules
+
+use_ok('Bio::Location::WidestCoordPolicy');
+use_ok('Bio::Location::NarrowestCoordPolicy');
+use_ok('Bio::Location::AvWithinCoordPolicy');
+
+$f = new Bio::Location::Fuzzy(-start => '40.60',
+			      -end   => '80.100');
+is $f->start, 40, 'Default coodinate policy';
+is $f->end, 100;
+is $f->length, 61;
+is $f->to_FTstring, '(40.60)..(80.100)';
+isa_ok($f->coordinate_policy, 'Bio::Location::WidestCoordPolicy');
+
+# this gives an odd location string; is it legal?
+$f->coordinate_policy(new Bio::Location::NarrowestCoordPolicy);
+is $f->start, 60, 'Narrowest coodinate policy';
+is $f->end, 80;
+is $f->length, 21;
+is $f->to_FTstring, '(60.60)..(80.80)';
+isa_ok($f->coordinate_policy, 'Bio::Location::NarrowestCoordPolicy');
+
+# this gives an odd location string
+$f->coordinate_policy(new Bio::Location::AvWithinCoordPolicy);
+is $f->start, 50, 'Average coodinate policy';
+is $f->end, 90;
+is $f->length, 41;
+is $f->to_FTstring, '(50.60)..(80.90)';
+isa_ok($f->coordinate_policy, 'Bio::Location::AvWithinCoordPolicy');
+
+# to complete the circle
+$f->coordinate_policy(new Bio::Location::WidestCoordPolicy);
+is $f->start, 40, 'Widest coodinate policy';
+is $f->end, 100;
+is $f->length, 61;
+is $f->to_FTstring, '(40.60)..(80.100)';
+isa_ok($f->coordinate_policy, 'Bio::Location::WidestCoordPolicy');
+

Added: trunk/packages/bioperl/branches/upstream/current/t/LocationFactory.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LocationFactory.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LocationFactory.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,165 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: LocationFactory.t,v 1.12.4.4 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if ( $@ ) {
+		use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => 275;
+}
+
+use_ok('Bio::Factory::FTLocationFactory');
+use_ok('Bio::Factory::LocationFactoryI');
+use_ok('Bio::Location::Simple');
+use_ok('Bio::Location::Split');
+use_ok('Bio::Location::Fuzzy');
+
+my $simple_impl = "Bio::Location::Simple";
+my $fuzzy_impl = "Bio::Location::Fuzzy";
+my $split_impl = "Bio::Location::Split";
+
+# Holds strings and results. The latter is an array of expected class name,
+# min/max start position and position type, min/max end position and position
+# type, location type, the number of locations, and the strand.
+#
+my %testcases = (
+   # note: the following are directly taken from 
+   # http://www.ncbi.nlm.nih.gov/collab/FT/#location
+   "467" => [$simple_impl,
+	    467, 467, "EXACT", 467, 467, "EXACT", "EXACT", 1, 1],
+	"340..565" => [$simple_impl,
+		 340, 340, "EXACT", 565, 565, "EXACT", "EXACT", 1, 1],
+	"<345..500" => [$fuzzy_impl,
+		 undef, 345, "BEFORE", 500, 500, "EXACT", "EXACT", 1, 1],
+	"<1..888" => [$fuzzy_impl,
+		 undef, 1, "BEFORE", 888, 888, "EXACT", "EXACT", 1, 1],
+	"(102.110)" => [$fuzzy_impl,
+		 102, 102, "EXACT", 110, 110, "EXACT", "WITHIN", 1, 1],
+	"(23.45)..600" => [$fuzzy_impl,
+		 23, 45, "WITHIN", 600, 600, "EXACT", "EXACT", 1, 1],
+	"(122.133)..(204.221)" => [$fuzzy_impl,
+		 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
+	"123^124" => [$simple_impl,
+		 123, 123, "EXACT", 124, 124, "EXACT", "IN-BETWEEN", 1, 1],
+	"145^177" => [$fuzzy_impl,
+		 145, 145, "EXACT", 177, 177, "EXACT", "IN-BETWEEN", 1, 1],
+	"join(12..78,134..202)" => [$split_impl,
+		 12, 12, "EXACT", 202, 202, "EXACT", "EXACT", 2, 1],
+	"complement(join(4918..5163,2691..4571))" => [$split_impl,
+		 2691, 2691, "EXACT", 5163, 5163, "EXACT", "EXACT", 2, -1],
+	"complement(34..(122.126))" => [$fuzzy_impl,
+		 34, 34, "EXACT", 122, 126, "WITHIN", "EXACT", 1, -1],
+	"J00194:100..202" => [$simple_impl,
+		 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
+	# this variant is not really allowed by the FT definition
+	# document but we want to be able to cope with it
+	"J00194:(100..202)" => [$simple_impl,
+		 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
+	"((122.133)..(204.221))" => [$fuzzy_impl,
+		 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
+	"join(AY016290.1:108..185,AY016291.1:1546..1599)"=> [$split_impl,
+		 108, 108, "EXACT", 185, 185, "EXACT", "EXACT", 2, undef],
+
+	# UNCERTAIN locations and positions (Swissprot)
+   "?2465..2774" => [$fuzzy_impl,
+       2465, 2465, "UNCERTAIN", 2774, 2774, "EXACT", "EXACT", 1, 1],
+   "22..?64" => [$fuzzy_impl,
+       22, 22, "EXACT", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
+   "?22..?64" => [$fuzzy_impl,
+       22, 22, "UNCERTAIN", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
+   "?..>393" => [$fuzzy_impl,
+       undef, undef, "UNCERTAIN", 393, undef, "AFTER", "UNCERTAIN", 1, 1],
+   "<1..?" => [$fuzzy_impl,
+       undef, 1, "BEFORE", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
+   "?..536" => [$fuzzy_impl,
+       undef, undef, "UNCERTAIN", 536, 536, "EXACT", "UNCERTAIN", 1, 1],
+   "1..?" => [$fuzzy_impl,
+       1, 1, "EXACT", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
+   "?..?" => [$fuzzy_impl,
+       undef, undef, "UNCERTAIN", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
+   # Not working yet:
+   #"12..?1" => [$fuzzy_impl,
+   #    1, 1, "UNCERTAIN", 12, 12, "EXACT", "EXACT", 1, 1]
+		 );
+
+my $locfac = Bio::Factory::FTLocationFactory->new();
+isa_ok($locfac,'Bio::Factory::LocationFactoryI');
+
+# sorting is to keep the order constant from one run to the next
+foreach my $locstr (keys %testcases) { 
+	my $loc = $locfac->from_string($locstr);
+	if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
+		$loc->seq_id("AY016295.1");
+	}
+	my @res = @{$testcases{$locstr}};
+	is(ref($loc), $res[0], $res[0]);
+	is($loc->min_start(), $res[1]);
+	is($loc->max_start(), $res[2]);
+	is($loc->start_pos_type(), $res[3]);
+	is($loc->min_end(), $res[4]);
+	is($loc->max_end(), $res[5]);
+	is($loc->end_pos_type(), $res[6]);
+	is($loc->location_type(), $res[7]);
+	my @locs = $loc->each_Location();
+	is(@locs, $res[8]);
+	my $ftstr = $loc->to_FTstring();
+	# this is a somewhat ugly hack, but we want clean output from to_FTstring()
+	# Umm, then these should really fail, correct?
+	# Should we be engineering workarounds for tests?
+	$locstr = "J00194:100..202" if $locstr eq "J00194:(100..202)";
+	$locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
+	# now test
+	is($ftstr, $locstr, "Location String: $locstr");
+	# test strand production
+	is($loc->strand(), $res[9]);
+}
+
+if ($^V gt v5.6.0) {
+	# bug #1674, #1765, 2101
+	# EMBL-like 
+	# join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
+	# GenBank-like
+	# join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
+	# Note that
+	# join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
+	# is the same as
+	# join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)
+	# But I don't want to bother with it at this point
+	my @expected = (# intentionally testing same expected string twice
+					# as I am providing two different encodings
+					# that should mean the same thing
+	'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+	'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+	# ditto
+	'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
+	'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
+	# this is just seen once
+	'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)'
+   );
+
+	for my $locstr (
+		'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
+		'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+		'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
+		'join(20464..20694,21548..22763,join(complement(231520..231669),complement(232596..232990),complement(314652..314672)))',
+		'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)' 
+	   ) {
+		my $loc = $locfac->from_string($locstr);
+		my $ftstr = $loc->to_FTstring();
+		is($ftstr, shift @expected, $locstr);
+	}
+} else {
+	foreach (1..3) {
+		skip('nested matches in regex only supported in v5.6.1 and higher',1);
+	}
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/LocusLink.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/LocusLink.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/LocusLink.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: LocusLink.t,v 1.8 2006/06/08 02:11:12 bosborne Exp $
+
+use strict;
+use vars qw($DEBUG $NUMTESTS $HAVEGRAPHDIRECTED);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval {
+		require Graph::Directed; 
+		$HAVEGRAPHDIRECTED=1;
+	};
+	if ($@) {
+		$HAVEGRAPHDIRECTED = 0;
+		warn "Graph::Directed not installed, skipping tests\n";
+	}
+	plan tests => ($NUMTESTS = 23);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete LocusLink tests, skipping',1);
+	}
+	unlink("locuslink-test.out.embl") if -e "locuslink-test.out.embl";
+}
+
+exit(0) unless $HAVEGRAPHDIRECTED;
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::AnnotationAdaptor;
+
+ok(1);
+
+my $seqin = Bio::SeqIO->new(-file => Bio::Root::IO->catfile("t","data",
+							 "LL-sample.seq"),
+			    -format => 'locuslink');
+ok $seqin;
+my $seqout = Bio::SeqIO->new(-file => ">locuslink-test.out.embl",
+			     -format => 'embl');
+
+# process and write to output
+my @seqs = ();
+
+while(my $seq = $seqin->next_seq()) {
+    push(@seqs, $seq);
+    
+    # create an artificial feature to stick the annotation on
+    my $fea = Bio::SeqFeature::Generic->new(-start => 1, -end => 9999,
+					    -strand => 1,
+					    -primary => 'annotation');
+    my $ac = Bio::SeqFeature::AnnotationAdaptor->new(-feature => $fea);
+    foreach my $k ($seq->annotation->get_all_annotation_keys()) {
+	foreach my $ann ($seq->annotation->get_Annotations($k)) {
+	    next unless $ann->isa("Bio::Annotation::SimpleValue");
+	    $ac->add_Annotation($ann);
+	}
+    }
+    $seq->add_SeqFeature($fea);
+    $seqout->write_seq($seq);
+}
+
+ok (scalar(@seqs), 2);
+
+ok ($seqs[0]->desc,
+    "amiloride binding protein 1 (amine oxidase (copper-containing))");
+ok ($seqs[0]->accession, "26");
+ok ($seqs[0]->display_id, "ABP1");
+ok ($seqs[0]->species->binomial, "Homo sapiens");
+
+
+my @dblinks = $seqs[0]->annotation->get_Annotations('dblink');
+my %counts = map { ($_->database(),0) } @dblinks;
+foreach (@dblinks) { $counts{$_->database()}++; }
+
+ok ($counts{GenBank}, 11);
+ok ($counts{RefSeq}, 4);
+ok ($counts{UniGene}, 1);
+ok ($counts{Pfam}, 1);
+ok ($counts{STS}, 2);
+ok ($counts{MIM}, 1);
+ok ($counts{PUBMED}, 6);
+ok (scalar(@dblinks), 27);
+
+ok ($seqs[1]->desc, "v-abl Abelson murine leukemia viral oncogene homolog 2 (arg, Abelson-related gene)");
+ok ($seqs[1]->display_id, "ABL2");
+
+my $ac = $seqs[1]->annotation;
+my @keys = $ac->get_all_annotation_keys();
+ok (scalar(@keys), 19);
+
+my ($cmt) = $ac->get_Annotations('comment');
+ok (length($cmt->text), 403);
+
+my @isoforms = qw(a b);
+foreach ($ac->get_Annotations('PRODUCT')) {
+    ok ($_->value,
+	"v-abl Abelson murine leukemia viral oncogene homolog 2 isoform ".
+	shift(@isoforms));
+}
+
+my @goann = ();
+foreach my $k (@keys) {
+    foreach my $ann ($ac->get_Annotations($k)) {
+	next unless $ann->isa("Bio::Ontology::TermI");
+	push(@goann, $ann);
+    }
+}
+ok (scalar(@goann), 4);
+ at goann = sort { $a->as_text() cmp $b->as_text() } @goann;
+ok ($goann[2]->as_text, "cellular component|cytoplasm|");
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Map.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Map.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Map.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,359 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Map.t,v 1.8.4.1 2006/10/17 09:35:22 sendu Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 141;
+}
+
+###
+# We explicitly test Bio::Map::SimpleMap, Bio::Map::Mappable, Bio::Map::Position,
+# Bio::Map::Marker and Bio::Map::Relative.
+#
+# We implicitly test Bio::Map::MapI, Bio::Map::MappableI, Bio::Map::PositionI,
+# and Bio::Map::PositionHandler.
+###
+
+# Test map basics
+my $map;
+{
+    use Bio::Map::SimpleMap;
+    ok 1;
+    
+    ok $map = new Bio::Map::SimpleMap(-name  => 'my');
+    ok $map->type('cyto');
+    ok $map->type, 'cyto';
+    ok $map->units, '';
+    ok $map->length, 0, "Length is ". $map->length;
+    ok $map->name, 'my';
+    ok $map->species('human'), 'human';
+    ok $map->species, 'human';
+    ok $map->unique_id, '1';
+}
+
+# Test marker basics
+my $marker;
+{
+    use Bio::Map::Marker;
+    ok 1;
+    
+    # make a plane one and add details after
+    ok $marker = new Bio::Map::Marker();
+    ok $marker->name('gene1'), 'gene1';
+    ok $marker->position($map, 100);
+    ok $marker->position->value, 100;
+    ok $marker->map, $map;
+    
+    # make positions a little easier to add by setting a default map first
+    ok my $marker2 = new Bio::Map::Marker(-name => 'gene3');
+    ok $map->add_element($marker2); # one way of setting default
+    ok $marker2->default_map, $map;
+    $marker2 = new Bio::Map::Marker(-name => 'gene3');
+    ok $marker2->default_map($map); # the other way of setting default
+    ok $marker2->default_map, $map;
+    ok $marker2->position(300);
+    ok $marker2->position->value, 300;
+    ok my $position = $marker2->position();
+    ok $position->value, 300;
+    
+    # make one with details set in new()
+    ok my $marker3 = new Bio::Map::Marker(-name => 'gene2', -position => [$map, 200]);
+    ok $marker3->default_map, $map;
+    ok $marker3->position->value, 200;
+    
+    # make one with multiple positions on multiple maps
+    my $map2 = new Bio::Map::SimpleMap();
+    $marker2->positions([[$map, 150], [$map, 200], [$map2, 200], [$map2, 400]]);
+    my @p = map($_->numeric, $marker2->each_position);
+    ok $p[0] == 150 && $p[1] == 200 && $p[2] == 200 && $p[3] == 300 && $p[4] == 400;
+    $marker2->purge_positions($map2);
+    @p = map($_->numeric, $marker2->each_position);
+    ok $p[0] == 150 && $p[1] == 200 && $p[2] == 300;
+}
+
+# Test position basics
+my $pos;
+{
+    use Bio::Map::Position;
+    ok 1;
+    
+    ok $pos = new Bio::Map::Position();
+    ok $pos->map($map);
+    ok $pos->map(), $map;
+    ok $pos->element($marker);
+    ok $pos->element(), $marker;
+    
+    ok $pos->value('10');
+    ok $pos->value(), '10';
+    ok $pos->numeric, 10;
+    ok $pos->sortable, 10;
+    ok $pos->start, 10;
+    ok $pos->end, 10;
+    
+    # give a marker a single position with explicit position creation
+    ok $pos = new Bio::Map::Position(-map => $map, -value => 500);
+    ok $marker->position($pos);
+    ok my $got_pos = $marker->position();
+    ok $got_pos eq $pos;
+    ok $marker->position->value, 500;
+    
+    # add a position
+    my $map2 = new Bio::Map::SimpleMap(-name => 'genethon', -type => 'Genetic');
+    my $pos2 = new Bio::Map::Position(-map => $map2, -value => 100);
+    $marker->add_position($pos2);
+    ok my @positions = $marker->get_positions($map2);
+    ok @positions, 1;
+    ok $positions[0]->value, 100;
+}
+
+# Test interaction of Markers and Maps via Positions
+{
+    # markers know what maps they are on
+    $marker->purge_positions;
+    ok $marker->known_maps, 0;
+    $pos->element($marker);
+    ok $marker->known_maps, 1;
+    ok $marker->in_map(1);
+    ok $marker->in_map($map);
+    
+    # maps know what markers are on themselves
+    $map->purge_positions;
+    my @els = $map->get_elements;
+    ok @els, 0;
+    $pos->map($map);
+    ok my @elements = $map->get_elements;
+    ok @elements, 1;
+    ok $elements[0], $marker;
+    
+    # positions know what marker they are for and what map they are on
+    ok $pos->map, $map;
+    ok $pos->element, $marker;
+}
+
+# We can compare Map objects to their own kind
+{
+    # positions to positions
+    {
+        ok $pos->equals($pos);
+        # these get tested properly when testing Relative, later
+    }
+    
+    # markers to markers
+    {
+        ok $marker->equals($marker);
+        # these get tested properly when testing Mappables, later
+    }
+    
+    # maps to maps
+    {
+        my $human = new Bio::Map::SimpleMap;
+        my $mouse = new Bio::Map::SimpleMap;
+        my $chicken = new Bio::Map::SimpleMap;
+        my $aardvark = new Bio::Map::SimpleMap;
+        
+        # scenario 1: we have information about where some factors bind upstream
+        # of a gene in 4 different species. Which factors are found in all the
+        # species?
+        my $fac1 = new Bio::Map::Mappable;
+        my $pos1 = new Bio::Map::Position(-map => $human, -element => $fac1);
+        my $pos2 = new Bio::Map::Position(-map => $mouse, -element => $fac1);
+        my $pos3 = new Bio::Map::Position(-map => $chicken, -element => $fac1);
+        my $pos4 = new Bio::Map::Position(-map => $aardvark, -element => $fac1);
+        my $fac2 = new Bio::Map::Mappable;
+        my $pos5 = new Bio::Map::Position(-map => $human, -element => $fac2);
+        my $pos6 = new Bio::Map::Position(-map => $mouse, -element => $fac2);
+        my $pos7 = new Bio::Map::Position(-map => $chicken, -element => $fac2);
+        my $fac3 = new Bio::Map::Mappable;
+        my $pos8 = new Bio::Map::Position(-map => $human, -element => $fac3);
+        my $pos9 = new Bio::Map::Position(-map => $mouse, -element => $fac3);
+        
+        # scenario 1 answer:
+        ok my @factors = $human->common_elements([$mouse, $chicken, $aardvark]);
+        ok @factors, 1;
+        ok @factors = $human->common_elements([$mouse, $chicken, $aardvark], -min_percent => 50);
+        ok @factors, 3;
+        ok @factors = $human->common_elements([$mouse, $chicken, $aardvark], -min_percent => 50, -min_num => 3);
+        ok @factors, 2;
+        ok @factors = $chicken->common_elements([$mouse, $human, $aardvark], -min_percent => 50, -require_self => 1);
+        ok @factors, 2;
+        ok @factors = Bio::Map::SimpleMap->common_elements([$human, $mouse, $human, $aardvark], -min_percent => 50, -required => [$aardvark]);
+        ok @factors, 1;
+    }
+}
+
+# Test relative positions
+{
+    use Bio::Map::Relative;
+    ok 1;
+    
+    my $map = new Bio::Map::SimpleMap;
+    my $pos1 = new Bio::Map::Position(-map => $map, -start => 50, -length => 5);
+    my $pos2 = new Bio::Map::Position(-map => $map, -start => 100, -length => 5);
+    ok my $relative = new Bio::Map::Relative (-position => $pos2);
+    ok $pos1->relative($relative);
+    ok $pos1->start, 50;
+    ok $pos1->absolute(1), 1;
+    ok $pos1->start, 150;
+    ok $pos1->absolute(0), 0;
+    ok my $relative2 = new Bio::Map::Relative (-map => 10);
+    my $pos3 = new Bio::Map::Position(-map => $map, -element => $marker, -start => -5, -length => 5);
+    $pos3->relative($relative2);
+    my $relative3 = new Bio::Map::Relative (-position => $pos3);
+    ok $pos1->start($relative3), 145;
+    ok $pos1->numeric($relative3), 145;
+    ok $pos1->end($relative3), 149;
+    
+    # Test the RangeI-related methods on relative positions
+    {
+        my $pos1 = new Bio::Map::Position(-map => $map, -start => 50, -length => 10);
+        my $pos2 = new Bio::Map::Position(-map => $map, -start => 100, -length => 10);
+        my $pos3 = new Bio::Map::Position(-map => $map, -start => 45, -length => 1);
+        my $pos4 = new Bio::Map::Position(-map => $map, -start => 200, -length => 1);
+        my $relative = new Bio::Map::Relative (-position => $pos3);
+        my $relative2 = new Bio::Map::Relative (-position => $pos4);
+        ok ! $pos1->overlaps($pos2);
+        $pos1->relative($relative);
+        ok $pos1->overlaps($pos2);
+        ok $pos2->overlaps($pos1);
+        ok $pos1->overlaps($pos2, undef, $relative2);
+        
+        # Make sure it works with normal Ranges
+        use Bio::Range;
+        my $range = new Bio::Range(-start => 100, -end => 109);
+        ok $pos1->overlaps($range);
+        ok ! $range->overlaps($pos1);
+        $pos1->absolute(1);
+        ok $range->overlaps($pos1);
+        $pos1->absolute(0);
+        
+        # Try the other methods briefly
+        ok my $i = $pos1->intersection($pos2); # returns a mappable
+        ($i) = $i->get_positions; # but we're just interested in the first (and only) position of mappable
+        ok $i->toString, '100..104';
+        ok $i = $pos1->intersection($pos2, undef, $relative2);
+        ($i) = $i->get_positions;
+        ok $i->toString, '-100..-96';
+        ok $i->map, $map;
+        ok $i->relative, $relative2;
+        $i->absolute(1);
+        ok $i->toString, '100..104';
+        
+        ok my $u = $pos1->union($pos2);
+        ($u) = $u->get_positions;
+        ok $u->toString, '95..109';
+        ok $u = $pos1->union($pos2, $relative2);
+        ($u) = $u->get_positions;
+        ok $u->toString, '-105..-91';
+        ok $u->map, $map;
+        ok $u->relative, $relative2;
+        $u->absolute(1);
+        ok $u->toString, '95..109';
+        
+        ok ! $pos1->contains($pos2);
+        $pos2->end(104);
+        ok $pos1->contains($pos2);
+        ok $pos1->contains(100);
+        
+        ok ! $pos1->equals($pos2);
+        $pos2->start(95);
+        ok $pos1->equals($pos2);
+    }
+}
+
+# Test Mappables
+{
+    use Bio::Map::Mappable;
+    ok 1;
+    
+    ok my $gene = new Bio::Map::Mappable;
+    my $human = new Bio::Map::SimpleMap;
+    my $mouse = new Bio::Map::SimpleMap;
+    ok my $pos1 = new Bio::Map::Position(-map => $human, -element => $gene, -start => 50, -length => 1000);
+    my $pos2 = new Bio::Map::Position(-map => $mouse, -start => 100, -length => 1000);
+    $gene->add_position($pos2);
+    my $gene_rel = new Bio::Map::Relative(-element => $gene);
+    
+    # scenario 1a: we know where a TF binds upstream of a gene in human.
+    # we use four different programs to predict the site; how good were they?
+    # scenaria 1b: to what extent do the predictions and known agree?
+    my $factor = new Bio::Map::Mappable;
+    my $pos3 = new Bio::Map::Position(-map => $human, -element => $factor, -start => -25, -length => 10, -relative => $gene_rel);
+    my $perfect_prediction = new Bio::Map::Mappable;
+    my $pos4 = new Bio::Map::Position(-map => $human, -element => $perfect_prediction, -start => 25, -length => 10);
+    my $good_prediction = new Bio::Map::Mappable;
+    my $pos5 = new Bio::Map::Position(-map => $human, -element => $good_prediction, -start => 24, -length => 10);
+    my $ok_prediction = new Bio::Map::Mappable;
+    my $pos6 = new Bio::Map::Position(-map => $human, -element => $ok_prediction, -start => 20, -length => 10);
+    my $bad_prediction = new Bio::Map::Mappable;
+    my $pos7 = new Bio::Map::Position(-map => $human, -element => $bad_prediction, -start => 10, -length => 10);
+    
+    # scenario 2: we have the same program making a prediciton for a site
+    # in two different species; is the predicted site conserved in terms of
+    # its position relative to the gene?
+    my $human_prediction = new Bio::Map::Mappable;
+    my $pos8 = new Bio::Map::Position(-map => $human, -element => $human_prediction, -start => 25, -length => 10);
+    my $mouse_prediction = new Bio::Map::Mappable;
+    my $pos9 = new Bio::Map::Position(-map => $mouse, -element => $mouse_prediction, -start => 75, -length => 10);
+    
+    # Test the RangeI-related methods
+    {
+        # scenario 1a answers:
+        ok $perfect_prediction->equals($factor);
+        ok $perfect_prediction->contains($factor);
+        ok ! $ok_prediction->equals($factor);
+        ok $ok_prediction->overlaps($factor);
+        ok ! $bad_prediction->overlaps($factor);
+        ok $bad_prediction->less_than($factor);
+        ok ! $bad_prediction->greater_than($factor);
+        ok $factor->greater_than($bad_prediction);
+        
+        # scenario 1b answer:
+        my $predictions = [$perfect_prediction, $good_prediction, $ok_prediction, $bad_prediction];
+        ok my @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel);
+        ok @groups, 2;
+        ok ${$groups[0]}[0], $pos7;
+        ok ${$groups[1]}[0], $pos6;
+        ok ${$groups[1]}[1], $pos5;
+        ok ${$groups[1]}[2]->toString($gene_rel), $pos4->toString($gene_rel);
+        ok ${$groups[1]}[3]->toString($gene_rel), $pos3->toString($gene_rel);
+        ok my $di = $factor->disconnected_intersections($predictions, -relative => $gene_rel, -min_mappables_num => 3);
+        my @di = $di->get_positions;
+        ok @di, 1;
+        ok $di[0]->toString, '-25..-21';
+        ok my $du = $factor->disconnected_unions($predictions, -relative => $gene_rel, -min_mappables_num => 3);
+        my @du = $du->get_positions;
+        ok @du, 1;
+        ok $du[0]->toString, '-30..-16';
+        
+        # test the flags on overlapping_groups a bit more
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -min_pos_num => 2);
+        ok @groups, 1;
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -min_pos_num => 1, -min_mappables_num => 2);
+        ok @groups, 1;
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -min_pos_num => 1, -min_mappables_num => 1, -min_mappables_percent => 50);
+        ok @groups, 1;
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -min_pos_num => 1, -min_mappables_num => 1, -min_mappables_percent => 5);
+        ok @groups, 2;
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -require_self => 1);
+        ok @groups, 1;
+        @groups = $factor->overlapping_groups($predictions, -relative => $gene_rel, -required => [$factor]);
+        ok @groups, 1;
+        
+        # scenario 2 answer:
+        ok ! $human_prediction->overlaps($mouse_prediction);
+        ok $human_prediction->overlaps($mouse_prediction, -relative => $gene_rel);
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/MapIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MapIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MapIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,112 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for Modules
+# $Id: MapIO.t,v 1.4.4.3 2006/10/02 23:10:40 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {     
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+
+	use Test;
+	plan tests => 51; 
+}
+
+if( $error == 1 ) {
+	exit(0);
+}
+
+use Bio::MapIO;
+use Bio::Root::IO;
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok 1;
+
+ok my $mapio = new Bio::MapIO(-verbose => $verbose,
+										-format => 'mapmaker',
+										-file   => Bio::Root::IO->catfile
+										('t','data','mapmaker.out'));
+
+my $map = $mapio->next_map;
+
+ok(ref($map) && $map->isa('Bio::Map::MapI'));
+
+ok $map->units, 'cM';
+ok $map->type, 'Genetic';
+ok $map->name('test map'), 'test map'; # map name is unset for this data type
+
+my $count;
+foreach my $marker ( $map->each_element ) {
+	$count++;
+	ok($marker->position->order,$count);
+}
+ok $count,18;
+
+ok $mapio = new Bio::MapIO(-format => 'mapmaker',
+									-file   => Bio::Root::IO->catfile
+									('t','data','mapmaker.txt'));
+
+$map = $mapio->next_map;
+ok $map->length,382.5;
+
+$count = 0;
+foreach my $marker ( $map->each_element ) {
+	$count++;
+	ok($marker->position->order,$count);
+}
+ok $count,13;
+
+ok $mapio = Bio::MapIO->new(-format => 'fpc',
+									-file   => Bio::Root::IO->catfile
+									('t','data','ctgdemo.fpc'));
+
+$map = $mapio->next_map;
+
+ok($map->length, 0);
+ok($map->version, 7.2);
+ok($map->modification_user, 'cari');
+ok($map->group_type, 'Chromosome');
+ok($map->group_abbr, 'Chr');
+ok($map->core_exists, 0);
+
+$count = 0;
+foreach my $marker ($map->each_markerid) {
+	$count++;
+}
+
+ok($count,150);
+
+# add tests for get_markerobj
+
+$count = 0;
+foreach my $clone ($map->each_cloneid) {
+	$count++;
+}
+
+ok($count,618);
+
+# add tests for get_cloneobj
+
+$count = 0;
+foreach my $contig ($map->each_contigid) {
+	$count++;
+}
+
+ok($count,2);
+
+# add tests for get_contigobj
+
+# need tests for
+# matching_bands
+# coincidence_score
+# print_contiglist
+# print_markerlist
+# print_gffstyle

Added: trunk/packages/bioperl/branches/upstream/current/t/Matrix.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Matrix.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Matrix.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,155 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Matrix.t,v 1.8 2006/02/15 21:10:09 jason Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 68;
+}
+
+#END {
+#}
+
+use Bio::Matrix::Generic;
+use Bio::Matrix::IO;
+use Bio::Root::IO;
+
+my $raw = [ [ 0, 10, 20],
+	    [ 2, 17,  4],
+	    [ 3,  4,  5] ];
+
+my $matrix = new Bio::Matrix::Generic(-values => $raw,
+				      -matrix_id  => 'fakeid00',
+				      -matrix_name=> 'matname',
+				      -rownames   => [qw(A B C)],
+				      -colnames   => [qw(D E F)] );
+
+ok($matrix->matrix_name, 'matname');
+ok($matrix->matrix_id,   'fakeid00');
+ok($matrix->entry('A','F'), $raw->[0]->[2]);
+my @colE = $matrix->get_column('E');
+ok($colE[0], $raw->[0]->[1]);
+ok($colE[1], $raw->[1]->[1]);
+ok($colE[2], $raw->[2]->[1]);
+
+my @rowC = $matrix->get_row('C');
+ok($rowC[0], $raw->[2]->[0]);
+ok($rowC[1], $raw->[2]->[1]);
+ok($rowC[2], $raw->[2]->[2]);
+
+ok($matrix->row_num_for_name('A'),0);
+ok($matrix->column_num_for_name('D'),0);
+
+ok($matrix->row_header(1),'B');
+ok($matrix->column_header(0),'D');
+
+ok($matrix->add_row(1, 'b', [qw(21 13 14)]),4);
+ok($matrix->add_column(2, 'f', [qw(71 81 14 3)]),4);
+
+ok($matrix->add_row(4, 'c', [qw(22 11 17)]),5);
+ok($matrix->remove_row(4),4);
+
+ok($matrix->add_column(4, 'g', [qw(11 10 100 71)]),5);
+ok($matrix->remove_column(4),4);
+
+ok($matrix->row_num_for_name('B'),2);
+ok($matrix->row_num_for_name('b'),1);
+
+ok($matrix->column_num_for_name('D'),0);
+ok($matrix->column_num_for_name('F'),3);
+ok($matrix->column_num_for_name('f'),2);
+
+ok($matrix->row_header(2),'B');
+
+ok($matrix->column_header(3),'F');
+
+ok($matrix->get_entry('b', 'f'), 81);
+
+
+# read in a scoring matrix
+
+my $io = Bio::Matrix::IO->new(-format => 'scoring',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data BLOSUM50)));
+my $blosum_matrix = $io->next_matrix;
+ok($blosum_matrix->isa('Bio::Matrix::Scoring'));
+ok($blosum_matrix->entropy, 0.4808);
+ok($blosum_matrix->expected_score, -0.3573);
+ok($blosum_matrix->scale, '1/3');
+ok($blosum_matrix->get_entry('*','A'), -5);
+ok($blosum_matrix->get_entry('V','Y'), -1);
+ok($blosum_matrix->get_entry('Y','V'), -1);
+ok($blosum_matrix->get_entry('L','I'), 2);
+my @diag = $blosum_matrix->get_diagonal;
+ok($diag[2],7);
+my @row = $blosum_matrix->get_row('D');
+ok($row[5], $blosum_matrix->get_entry('D','Q'));
+ok($blosum_matrix->num_rows,24);
+ok($blosum_matrix->num_columns,24);
+ 
+$io = Bio::Matrix::IO->new(-format => 'scoring',
+			   -file   => Bio::Root::IO->catfile
+			   (qw(t data PAM250)));
+my $pam_matrix = $io->next_matrix;
+ok($pam_matrix->isa('Bio::Matrix::Scoring'));
+ok($pam_matrix->entropy, 0.354);
+ok($pam_matrix->expected_score, -0.844,);
+ok($pam_matrix->scale, 'ln(2)/3');
+ok($pam_matrix->num_rows,24);
+ok($pam_matrix->get_entry('G','*'), -8);
+ok($pam_matrix->get_entry('V','Y'), -2);
+ok($pam_matrix->get_entry('Y','V'), -2);
+ok($pam_matrix->get_entry('L','I'), 2);
+ at diag = $pam_matrix->get_diagonal;
+ok($diag[2],2);
+ at row = $pam_matrix->get_row('D');
+ok($row[5], $pam_matrix->get_entry('D','Q'));
+
+# test Phylip parsing
+
+$io = new Bio::Matrix::IO(-format  => 'phylip',
+			  -program => 'phylipdist',
+			  -file    => Bio::Root::IO->catfile
+			  (qw(t data phylipdist.out)));
+
+my $phy = $io->next_matrix;
+ok $phy->program, 'phylipdist';
+ok $phy->get_entry('Alpha','Beta'), '4.23419';
+ok $phy->get_entry('Gamma','Alpha'),'3.63330';
+
+my @column =  $phy->get_column('Alpha');
+ok $column[0], '0.00000';
+ok $column[1], '4.23419';
+ok $column[2], '3.63330';
+ok $column[3], '6.20865';
+ok $column[4], '3.45431';
+
+ at row    = $phy->get_row('Gamma');
+ok $row[0], '3.63330';
+ok $row[1], '3.49289';
+ok $row[2], '0.00000';
+ok $row[3], '3.68733';
+ok $row[4], '5.84929';
+
+ at diag   = $phy->get_diagonal;
+
+ok $diag[0], '0.00000';
+ok $diag[1], '0.00000';
+ok $diag[2], '0.00000';
+ok $diag[3], '0.00000';
+ok $diag[4], '0.00000';
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/MeSH.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MeSH.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MeSH.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: MeSH.t,v 1.6 2006/03/27 21:07:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $error;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	$NUMTESTS = 23;
+
+	plan tests => $NUMTESTS;
+
+	eval { require IO::String; 
+			 require LWP::UserAgent;
+			 require HTTP::Request::Common;
+       };
+	if( $@ ) {
+		print STDERR "IO::String or LWP::UserAgent or HTTP::Request not installed. This means the MeSH modules are not usable. Skipping tests.\n";
+		for( 1..$NUMTESTS ) {
+			skip("IO::String, LWP::UserAgent,or HTTP::Request not installed",1);
+		}
+		$error = 1;
+	}
+}
+# For tests of Bio::DB::MeSH see t/DB.t
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to run all of the MeSH.t tests, skipping',1);
+    }
+}
+
+require Bio::Phenotype::MeSH::Term;
+require Bio::Phenotype::MeSH::Twig;
+require Bio::DB::MeSH;
+ok 1;
+
+my $verbose = 0;
+
+ok my $term = Bio::Phenotype::MeSH::Term->new(-verbose =>$verbose);
+ok $term->id('D000001'), 'D000001';
+ok $term->id, 'D000001';
+ok $term->name('Dietary Fats'), 'Dietary Fats';
+ok $term->name, 'Dietary Fats';
+ok $term->description('dietary fats are...'), 'dietary fats are...';
+ok $term->description, 'dietary fats are...';
+
+ok my $twig = Bio::Phenotype::MeSH::Twig->new(-verbose =>$verbose);
+ok $twig->parent('Fats'), 'Fats';
+ok $twig->parent(), 'Fats';
+
+
+ok $term->add_twig($twig);
+ok $term->each_twig(), 1;
+ok $twig->term, $term;
+
+ok $twig->add_sister('Bread', 'Candy', 'Cereals'), 3;
+ok $twig->add_sister('Condiments', 'Dairy Products'), 2;
+ok $twig->each_sister(), 5;
+ok $twig->purge_sisters();
+ok $twig->each_sister(), 0;
+
+ok $twig->add_child('Butter', 'Margarine'), 2;
+ok $twig->each_child(), 2;
+ok $twig->purge_children();
+ok $twig->each_child(), 0;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Measure.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Measure.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Measure.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,59 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Measure.t,v 1.1 2002/09/10 06:50:18 czmasek Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 20;
+}
+
+use Bio::Phenotype::Measure;
+  
+my $measure = Bio::Phenotype::Measure->new( -context     => "height",
+                                            -description => "desc",
+                                            -start       => 10,
+                                            -end         => 150,
+                                            -unit        => "cm",
+                                            -comment     => "comment" );
+
+ok( $measure->isa( "Bio::Phenotype::Measure" ) );
+
+ok( $measure->to_string() );
+
+ok( $measure->context(), "height" );
+ok( $measure->description(), "desc" );
+ok( $measure->start(), 10 );
+ok( $measure->end(), 150 );
+ok( $measure->unit(), "cm" );
+ok( $measure->comment(), "comment" );
+
+$measure->init();
+
+ok( $measure->context(), "" );
+ok( $measure->description(), "" );
+ok( $measure->start(), "" );
+ok( $measure->end(), "" );
+ok( $measure->unit(), "" );
+ok( $measure->comment(), "" );
+
+ok( $measure->context( "A" ), "A" );
+ok( $measure->description( "B" ), "B" );
+ok( $measure->start( "C" ), "C" );
+ok( $measure->end( "D" ), "D" );
+ok( $measure->unit( "E" ), "E" );
+ok( $measure->comment( "F" ), "F" );
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/MetaSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MetaSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MetaSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,357 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: MetaSeq.t,v 1.5 2005/08/16 11:51:04 heikki Exp $
+
+use strict;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 124;
+}
+
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+
+use Bio::Seq::Meta;
+use Bio::Seq::Meta::Array;
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Bio::Root::IO;
+
+
+use Bio::Seq::Quality;
+
+use Data::Dumper;
+
+ok(1);
+
+
+ok my $seq = Bio::Seq::Meta->new( -seq => "AT-CGATCGA");
+ok $seq->meta, "";
+ok $seq->force_flush(1);
+ok $seq->meta, "          ";
+$seq->seq("AT-CGATCGATT");
+ok $seq->meta, "            ";
+ok not $seq->force_flush(0);
+#print Dumper $seq;
+
+ok $seq = Bio::Seq::Meta::Array->new( -seq => "AT-CGATCGA");
+ok $seq->meta_text, "";
+ok $seq->force_flush(1);
+$seq->seq("AT-CGATCGATT");
+ok $seq->meta_text, "0 0 0 0 0 0 0 0 0 0 0 0";
+ok not $seq->force_flush(0);
+#print Dumper $seq;
+
+ok $seq = Bio::Seq::Quality->new( -seq => "AT-CGATCGA");
+ok $seq->meta_text, "";
+ok $seq->force_flush(1);
+ok $seq->meta_text, "0 0 0 0 0 0 0 0 0 0";
+$seq->seq("AT-CGATCGATT");
+ok $seq->meta_text, "0 0 0 0 0 0 0 0 0 0 0 0";
+ok not $seq->force_flush(0);
+#print Dumper $seq;
+#exit;
+
+ok $seq = Bio::Seq::Meta->new
+    ( -seq => "",
+      -meta => "",
+      -alphabet => 'dna',
+      -id => 'myid'
+    );
+
+# create a sequence object
+ok $seq = Bio::Seq::Meta->new( -seq => "AT-CGATCGA",
+                               -id => 'test',
+                               -verbose => 2,
+                               -force_flush => 1
+                             );
+
+ok $seq->meta, "          ";
+ok $seq->meta_length, 10;
+
+# Create some random meta values, but gap in the wrong place
+my $metastring = "a-abb  bb ";
+eval {
+    $seq->meta($metastring);
+};
+ok 1 if $@ =~ 'column [2]';
+$seq->verbose(1);
+
+# create some random meta values, but not for the last residue
+$metastring = "aa-bb  bb";
+ok $seq->meta($metastring), $metastring. " ";
+#print Dumper $seq;
+#exit;
+
+# truncate the sequence by assignment
+$seq->force_flush(1);
+$seq->seq('AT-CGA');
+$seq->alphabet('dna');
+ok $seq->meta, 'aa-bb ';
+ok $seq->start, 1;
+ok $seq->end, 5;
+$seq->force_flush(0);
+
+# truncate the sequence with trunc()
+ok $seq->strand(-1), -1;
+ok $seq = $seq->trunc(1,5);
+ok $seq->start, 2;
+ok $seq->end, 5;
+ok $seq->seq, 'AT-CG';
+ok $seq->meta, 'aa-bb';
+ok $seq->strand, -1;
+
+# revcom
+ok $seq = $seq->revcom;
+ok $seq->seq, 'CG-AT';
+ok $seq->meta, 'bb-aa';
+ok $seq->strand, 1;
+
+# submeta
+ok $seq->subseq(2,4), 'G-A';
+ok $seq->submeta(2,4), 'b-a';
+ok $seq->submeta(2,undef, 'c-c'), 'c-ca';
+ok $seq->submeta(2,4), 'c-c';
+ok $seq->meta, 'bc-ca';
+ok $seq->meta(''), '     ';
+ok $seq->submeta(2,undef, 'c-c'), 'c-c ';
+ok $seq->meta, ' c-c ';
+
+# add named meta annotations
+
+my $first = '11-22';
+ok $seq->named_meta('first', $first), $first;
+ok $seq->named_meta('first'), $first;
+
+my $second = '[[-]]';
+ok $seq->named_meta('second', $second);
+
+# undefined range arguments
+ok $seq->named_submeta('second', 3, 4), '-]';
+ok $seq->named_submeta('second', 3), '-]]';
+ok $seq->named_submeta('second'), '[[-]]';
+
+my @names =  $seq->meta_names;
+ok @names, 3;
+ok $names[0], 'DEFAULT';
+
+
+
+#
+# IO tests
+#
+
+sub diff {
+    my ($infile, $outfile) = @_;
+    my ($in, $out);
+    open FH, $infile;
+    $in .= $_ while (<FH>);
+    close FH;
+
+    open FH, $outfile;
+    $out .= $_ while (<FH>);
+    close FH;
+    print "|$in||$out|\n" if $DEBUG;
+    ok $in, $out;
+
+}
+
+
+# SeqIO
+my $str = Bio::SeqIO->new
+    ( '-file'=> Bio::Root::IO->catfile("t","data","test.metafasta"),
+      '-format' => 'metafasta');
+ok  $seq = $str->next_seq;
+
+my $strout = Bio::SeqIO->new
+    ('-file'=> ">". Bio::Root::IO->catfile("t","data","test.metafasta.out"),
+     '-format' => 'metafasta');
+ok $strout->write_seq($seq);
+
+diff (Bio::Root::IO->catfile("t","data","test.metafasta"),
+      Bio::Root::IO->catfile("t","data","test.metafasta.out")
+     );
+
+#exit;
+# AlignIO
+
+$str = Bio::AlignIO->new
+    ( '-file'=> Bio::Root::IO->catfile("t","data","testaln.metafasta"),
+      '-format' => 'metafasta');
+ok my $aln = $str->next_aln;
+
+$strout = Bio::AlignIO->new
+    ('-file'=> ">". Bio::Root::IO->catfile("t","data","testaln.metafasta.out"),
+     '-format' => 'metafasta');
+ok $strout->write_aln($aln);
+
+diff (Bio::Root::IO->catfile("t","data","testaln.metafasta"),
+      Bio::Root::IO->catfile("t","data","testaln.metafasta.out")
+     );
+
+
+END {
+    unlink(Bio::Root::IO->catfile("t","data","test.metafasta.out"));
+    unlink(Bio::Root::IO->catfile("t","data","testaln.metafasta.out"));
+}
+
+
+#
+##
+### tests for Meta::Array
+##
+#
+
+ok $seq = Bio::Seq::Meta::Array->new
+    ( -seq => "",
+      -meta => "",
+      -alphabet => 'dna',
+      -id => 'myid'
+    );
+
+# create a sequence object
+ok $seq = Bio::Seq::Meta::Array->new( -seq => "AT-CGATCGA",
+                                      -id => 'test',
+                                      -force_flush => 1,
+                                      -verbose => 2
+                             );
+
+ok $seq->is_flush, 1;
+#ok $seq->meta_text, "          ";
+ok $seq->meta_text, '0 0 0 0 0 0 0 0 0 0';
+#print Dumper $seq; exit;
+# create some random meta values, but not for the last residue
+$metastring = "a a - b b 0 b b 0";
+ok join (' ',  @{$seq->meta($metastring)}), $metastring. ' 0';
+ok $seq->meta_text, $metastring. ' 0';
+
+# truncate the sequence by assignment
+$seq->seq('AT-CGA');
+$seq->alphabet('dna');
+ok $seq->meta_text, 'a a - b b 0';
+
+# truncate the sequence with trunc()
+ok $seq->strand(-1), -1;
+ok $seq = $seq->trunc(1,5);
+ok $seq->seq, 'AT-CG';
+ok $seq->meta_text, 'a a - b b';
+ok $seq->strand, -1;
+
+#ok $seq->length, 5;
+#ok $seq->meta_length, 6;
+#ok $seq->force_flush(1);
+#ok $seq->meta_length, 5;
+#print Dumper $seq; exit;
+#exit;
+# revcom
+ok $seq = $seq->revcom;
+ok $seq->seq, 'CG-AT';
+ok $seq->meta_text, 'b b - a a';
+ok $seq->strand, 1;
+
+# submeta
+
+ok $seq->subseq(2,4), 'G-A';
+
+ok $seq->submeta_text(2,4), 'b - a';
+ok $seq->submeta_text(2,undef, 'c - c'), 'c - c';
+ok $seq->submeta_text(2,4), 'c - c';
+ok $seq->meta_text, 'b c - c a';
+
+ok $seq->meta_text(''), '0 0 0 0 0';
+ok $seq->submeta_text(2,undef, 'c - c'), 'c - c';
+ok $seq->meta_text, '0 c - c 0';
+
+# add named meta annotations
+$first = '1 10 - 222 23';
+ok $seq->named_meta_text('first', $first), $first;
+ok $seq->named_meta_text('first'), $first;
+$second = '[ [ - ] ]';
+ok $seq->named_meta_text('second', $second);
+
+# undefined range arguments
+ok $seq->named_submeta_text('second', 3, 4), '- ]';
+ok $seq->named_submeta_text('second', 3), '- ] ]';
+ok $seq->named_submeta_text('second'), '[ [ - ] ]';
+
+ at names =  $seq->meta_names;
+ok @names, 3;
+ok $names[0], 'DEFAULT';
+
+
+
+
+#
+# testing the forcing of flushed meta values
+#
+
+
+
+
+ok $seq = Bio::Seq::Meta->new( -seq =>  "AT-CGATCGA",
+                                  -id => 'test',
+                                  -verbose => 2
+                             );
+ok $seq->submeta(4, 6, '456'), '456';
+ok $seq->meta_length, 6;
+ok $seq->length, 10;
+
+ok $seq->meta, "   456";
+
+ok $seq->force_flush(1);
+ok $seq->meta, "   456    ";
+ok $seq->seq('aaatttc');
+ok $seq->meta, "   456 ";
+
+ok $seq = Bio::Seq::Meta::Array->new( -seq =>  "AT-CGATCGA",
+                                  -id => 'test',
+                                  -verbose => 2
+                             );
+ok join (' ', @{$seq->submeta(4, 6, '4 5 6')}), '4 5 6';
+ok $seq->meta_length, 6;
+ok $seq->length, 10;
+
+ok $seq->meta_text, "0 0 0 4 5 6";
+ok $seq->force_flush(1);
+ok $seq->meta_text, "0 0 0 4 5 6 0 0 0 0";
+
+ok $seq->seq('aaatttc');
+ok $seq->meta_text, "0 0 0 4 5 6 0";
+ok $seq->meta_length, 7;
+
+
+ok  $seq = Bio::Seq::Quality->new( -seq =>  "AT-CGATCGA",
+                                  -id => 'test',
+                                  -verbose => 2
+                             );
+ok join (' ', @{$seq->submeta(4, 6, '4 5 6')}), '4 5 6';
+ok $seq->meta_length, 6;
+ok $seq->length, 10;
+
+ok $seq->meta_text, "0 0 0 4 5 6";
+#print Dumper $seq;
+ok $seq->force_flush(1);
+
+ok $seq->meta_text, "0 0 0 4 5 6 0 0 0 0";
+
+ok $seq->seq('aaatttc');
+ok $seq->meta_text, "0 0 0 4 5 6 0";
+ok $seq->meta_length, 7;
+ok $seq->trace_length, 7;
+#ok $seq->quality_length, 7;
+
+ok $seq->is_flush, 1;
+ok $seq->trace_is_flush, 1;
+ok $seq->quality_is_flush, 1;
+
+#print Dumper $seq;
+
+
+# quality: trace_lengths, trace_is_flush, quality_is_flush

Added: trunk/packages/bioperl/branches/upstream/current/t/MicrosatelliteMarker.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MicrosatelliteMarker.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MicrosatelliteMarker.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: MicrosatelliteMarker.t,v 1.2 2005/09/16 12:44:34 bosborne Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 6;
+}
+
+#END {
+#}
+
+require 'dumpvar.pl';
+use Bio::Map::SimpleMap;
+use Bio::Map::Position;
+use Bio::Map::Microsatellite;
+ok(1);
+
+my $map = new Bio::Map::SimpleMap(-units => 'MB',
+				  -type  => 'oo-121');
+my $position = new Bio::Map::Position(-map => $map,
+				      -value => 20
+				      );
+
+my $o_usat = new Bio::Map::Microsatellite
+    (-name=>'Chad Super Marker 2',
+     -sequence => 'gctgactgatcatatatatatatatatatatatatatatatcgcgatcgtgatttt',
+     -motif => 'at',
+     -repeats => 15,
+     -repeat_start_position => 12,
+     -position => $position,
+     );
+
+ok($o_usat->get_leading_flank(), "gctgactgatc");
+ok($o_usat->get_trailing_flank(), "cgcgatcgtgatttt");
+ok($o_usat->motif(), 'at');
+ok($o_usat->repeats(), 15);
+ok($o_usat->repeat_start_position, 12);
+
+
+#dumpValue($o_usat);
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/MicrosatelliteMarker.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/MiniMIMentry.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MiniMIMentry.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MiniMIMentry.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,55 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: MiniMIMentry.t,v 1.1 2002/09/10 06:50:18 czmasek Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 14;
+}
+
+use Bio::Phenotype::OMIM::MiniMIMentry;
+  
+my  $mm = Bio::Phenotype::OMIM::MiniMIMentry->new( -description  => "The central form of ...",
+                                                   -created      => "Victor A. McKusick: 6/4/1986",
+                                                   -contributors => "Kelly A. Przylepa - revised: 03/18/2002",
+                                                   -edited       => "alopez: 06/03/1997" );
+
+ok( $mm->isa( "Bio::Phenotype::OMIM::MiniMIMentry" ) );
+
+ok( $mm->to_string() );
+
+
+ok( $mm->description(), "The central form of ..." );
+ok( $mm->created(), "Victor A. McKusick: 6/4/1986" );
+ok( $mm->contributors(), "Kelly A. Przylepa - revised: 03/18/2002" );
+ok( $mm->edited(), "alopez: 06/03/1997" );
+
+$mm->init();
+
+ok( $mm->description(), "" );
+ok( $mm->created(), "" );
+ok( $mm->contributors(), "" );
+ok( $mm->edited(), "" );
+
+
+ok( $mm->description( "A" ), "A" );
+ok( $mm->created( "B" ), "B" );
+ok( $mm->contributors( "C" ), "C" );
+ok( $mm->edited( "D" ), "D" );
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/MitoProt.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MitoProt.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MitoProt.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: MitoProt.t,v 1.1 2003/07/26 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+	eval { require Test; };
+	$ERROR = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 8;
+	plan tests => $NUMTESTS;
+
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if( $@ ) {
+		warn("IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests\n");
+		$ERROR = 1;
+	}
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to complete MitoProt tests, skipping',1);
+	}
+}
+
+exit 0 if $ERROR ==  1;
+
+use Data::Dumper;
+
+require Bio::Tools::Analysis::Protein::Mitoprot;
+use Bio::PrimarySeq;
+require Bio::WebAgent;
+
+ok 1;
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+my $seq = Bio::PrimarySeq->new(-seq => 'MSADQRWRQDSQDSFGDSFDGDSFFGSDFDGDS'.
+                               'DFGSDFGSDGDFGSDFGDSFGDGFSDRSRQDQRS',
+                               -display_id => 'test2');
+
+ok $tool = Bio::Tools::Analysis::Protein::Mitoprot->new( -seq=>$seq);
+if( $DEBUG ) { 
+    ok $tool->run ();
+    exit if $tool->status eq 'TERMINATED_BY_ERROR';
+    ok my $raw = $tool->result('');
+    ok my $parsed = $tool->result('parsed');
+    ok ($parsed->{'charge'}, -13);
+    ok my @res = $tool->result('Bio::SeqFeatureI');
+} else { 
+    for ( $Test::ntest..$NUMTESTS) {
+	skip("Skipping tests which require remote servers - set env variable BIOPERLDEBUG to test",1);
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Molphy.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Molphy.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Molphy.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Molphy.t,v 1.2 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $error);
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 17;
+	plan tests => $NUMTESTS;
+	eval { require IO::String;
+			 require Bio::Tools::Phylo::Molphy;};
+	if( $@ ) {
+		warn "No IO::String installed\n";
+		$error = 1;
+	}
+}
+
+END { 
+	foreach ( $Test::ntest .. $NUMTESTS ) {
+		skip("Unable to run the Molphy tests",1);
+	}
+}
+
+
+exit(0) if( $error );
+
+my $testnum;
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my $inmolphy = new Bio::Tools::Phylo::Molphy(-file => 't/data/lysozyme6.simple.protml');
+ok($inmolphy);
+my $r = $inmolphy->next_result;
+ok($r);
+ok($r->model, 'JTT');
+ok($r->search_space,50);
+my @trees;
+while( my $t = $r->next_tree ) { 
+    push @trees, $t;
+}
+ok(@trees,5);
+ $inmolphy = new Bio::Tools::Phylo::Molphy(-file => 't/data/lysozyme6.protml');
+ok($inmolphy);
+$r = $inmolphy->next_result;
+ok($r->model, 'JTT');
+ok($r->search_space,50);
+ at trees = ();
+while( my $t = $r->next_tree ) { 
+    push @trees, $t;
+}
+ok(@trees,5);
+
+ok($trees[0]->score, -1047.8);
+ok($trees[-1]->id, 9);
+
+my $tpm = $r->transition_probability_matrix;
+ok($tpm->{'Val'}->{'Val'}, -122884);
+ok($tpm->{'Ala'}->{'Arg'}, 2710);
+
+my $sub_mat = $r->substitution_matrix;
+ok($sub_mat->{'Val'}->{'Tyr'}, 50);
+ok($sub_mat->{'Arg'}->{'Ile'}, 72);
+ok($sub_mat->{'Met'}->{'Met'}, '');
+
+my %fmat = $r->residue_frequencies();
+ok($fmat{'D'}->[0], 0.052);

Added: trunk/packages/bioperl/branches/upstream/current/t/MultiFile.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/MultiFile.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/MultiFile.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: MultiFile.t,v 1.1 2005/08/28 03:41:56 bosborne Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 3;
+}
+
+use Bio::SeqIO;
+use Bio::SeqIO::MultiFile;
+
+ok(1);
+
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $mf = Bio::SeqIO::MultiFile->new(-format => 'Fasta' ,
+												-verbose => $verbose,
+												-files =>
+												[ Bio::Root::IO->catfile
+												("t","data","multi_1.fa"),
+												Bio::Root::IO->catfile
+												("t","data","multi_2.fa")]);
+ok defined $mf;
+my $count = 0;
+eval {
+	while (my $seq = $mf->next_seq() ) {
+		$count++;
+		# $temp = $seq->display_id;
+	}
+};
+ok( $count,12 );

Added: trunk/packages/bioperl/branches/upstream/current/t/Mutation.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Mutation.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Mutation.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,73 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Mutation.t,v 1.4 2001/01/25 22:13:40 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 18;
+}
+
+use Bio::LiveSeq::Mutation;
+
+my $a = Bio::LiveSeq::Mutation->new();
+ok defined $a;
+
+$a->seq('aaa');
+ok $a->seq, 'aaa';
+
+$a->seqori('ggg');
+ok $a->seqori, 'ggg';
+
+$a->pos(-4);
+ok $a->pos, -4;
+
+$a->pos(5);
+ok $a->pos, 5;
+
+ok ($a->len, 3);
+
+$a->len(9);
+ok ($a->len, 9);
+
+$a->transpos(55);
+ok $a->transpos, 55;
+
+$a->issue(1);
+ok $a->issue, 1;
+
+$a->label(57);
+ok $a->label, '57';
+
+$a->prelabel(57);
+ok $a->prelabel, '57';
+
+$a->postlabel(57);
+ok $a->postlabel, '57';
+
+$a->lastlabel(57);
+ok $a->lastlabel, '57';
+
+#constuctor test
+$b = Bio::LiveSeq::Mutation->new('-seq'=>'AC',
+				 '-seqori' => 'GG',
+				 '-pos' => 5,
+				 '-len' => 2,
+				 );
+ok  defined $b;
+ok $b->seqori, 'GG';
+ok $b->len, 2;
+ok $b->seq, 'AC';
+ok $b->pos, 5;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Mutator.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Mutator.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Mutator.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,117 @@
+# -*-Perl-*-
+# $Id: Mutator.t,v 1.9.14.3 2006/10/02 23:10:40 sendu Exp $
+## Bioperl Test Harness Script for Modules
+##
+
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+my $error;
+use vars qw($NUMTESTS);
+
+BEGIN { 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    $error=0;
+    use Test;
+    $NUMTESTS=21;
+    plan tests => $NUMTESTS;
+    eval { require IO::String; };
+    if( $@ ) {
+	print STDERR "IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n";
+	for( 1..$NUMTESTS ) {
+	    skip("IO::String not installed",1);
+	}
+	$error = 1; 
+    }
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require Bio::LiveSeq::Mutator;
+require Bio::LiveSeq::IO::BioPerl;
+require Bio::LiveSeq::Gene;
+require Bio::Root::IO;
+
+
+$a = Bio::LiveSeq::Mutator->new();
+ok $a;
+
+ok $a->numbering, 'coding';
+ok $a->numbering('coding 1');
+ok $a->numbering, 'coding 1';
+
+require Bio::LiveSeq::Mutation;
+my $mt = new Bio::LiveSeq::Mutation;
+ok $mt->seq('g');
+$mt->pos(100);
+ok ($a->add_Mutation($mt));
+my @each = $a->each_Mutation;
+ok( (scalar @each), 1 );
+my $mt_b = pop @each;
+ok($mt_b->seq, 'g');
+my $filename=Bio::Root::IO->catfile("t","data","ar.embl");
+my $loader=Bio::LiveSeq::IO::BioPerl->load('-file' => "$filename");
+my $gene_name='AR'; # was G6PD
+
+my $gene=$loader->gene2liveseq('-gene_name' => $gene_name);
+ok($gene);
+ok $a->gene($gene);
+
+my $results = $a->change_gene();
+ok($results);
+
+# bug 1701 - mutations on intron/exon boundaries where codon is split 
+
+$loader = Bio::LiveSeq::IO::BioPerl->load( -db   => 'EMBL',
+                                -file => Bio::Root::IO->catfile('t','data','ssp160.embl.1')
+					    );
+# move across intron/exon boundaries, check expected mutations
+my @positions = (3128..3129,3188..3189);
+my @bases = (qw(C C T T));
+my @expected = (qw(T683T T684P T684I T684T));
+my $ct = 0;
+
+for my $pos (@positions) {
+    # reset gene
+    my $gene = $loader->gene2liveseq( -gene_name => 'ssp160');
+    my $mutation = Bio::LiveSeq::Mutation->new( -seq => $bases[$ct],
+                                                -pos => $pos,
+                          );
+    my $mutate = Bio::LiveSeq::Mutator->new( -gene      => $gene,
+                                             -numbering => 'entry',
+                           );
+	
+    $mutate->add_Mutation( $mutation );
+
+    my $results = $mutate->change_gene();
+    
+	ok(defined($results));
+	ok($expected[$ct] eq $results->trivname);
+    $ct++;
+}
+
+eval { require IO::String };
+if( $@ ) {
+    print STDERR "IO::String not installed. Skipping output test.\n";
+    skip("IO::String not installed",1);
+
+} else {
+
+    use Bio::Variation::IO;
+    require IO::String;    
+    my $s;
+    my $io = IO::String->new($s);
+    my $out = Bio::Variation::IO->new('-fh'   => $io,
+				      '-format' => 'flat'
+				      );
+    ok($out->write($results));
+    #print $s;
+    ok ($s=~/DNA/ && $s=~/RNA/ && $s=~/AA/);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/NetPhos.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/NetPhos.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/NetPhos.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,65 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: NetPhos.t,v 1.8.4.4 2006/11/08 17:25:55 sendu Exp $ 
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 14;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::Tools::Analysis::Protein::NetPhos');
+	use_ok('Bio::PrimarySeq');
+	use_ok('Bio::WebAgent');
+}
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+SKIP: {
+	skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 10 unless $DEBUG;
+	ok $tool->sleep;
+	is $tool->delay(1), 1;
+	ok $tool->sleep;
+	ok $tool->timeout(120); # LWP::UserAgent method
+	is $tool->url('http://a.b.c/'), 'http://a.b.c/';
+	
+	
+	my $seq = Bio::PrimarySeq->new(-id=>'bioperl',
+								   -seq=>'ABCDEFGHIJKLLKJFHSAKNDJFPSINCSJNDSKNSN');
+	
+	ok $tool = Bio::Tools::Analysis::Protein::NetPhos->new(-verbose =>$verbose);
+	$tool->timeout(15);
+	ok $tool->run ( {seq=>$seq, threshold=>0.9} );
+	if ($tool->status eq 'TERMINATED_BY_ERROR') {
+		skip "Running of the tool was terminated by an error, probably network/ NetPhos server error", 3;
+	}
+	my @res = $tool->result('Bio::SeqFeatureI');
+	unless (@res) {
+		skip "Didn't get any results from NetPhos server, probable network/server error", 3;
+	}
+	#new tests her in v 1.2
+	ok my $raw = $tool->result('');
+	ok my $parsed = $tool->result('parsed');
+	is $parsed->[0][1], '0.934';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Node.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Node.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Node.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 21;
+}
+
+use Bio::Tree::Node;
+use Bio::Tree::AlleleNode;
+
+ok(1);
+
+my $node1 = new Bio::Tree::Node();
+my $node2 = new Bio::Tree::Node();
+ok($node1->is_Leaf() );
+ok($node1->ancestor, undef);
+
+my $pnode = new Bio::Tree::Node();
+$pnode->add_Descendent($node1);
+ok($node1->ancestor, $pnode);
+$pnode->add_Descendent($node2);
+ok($node2->ancestor, $pnode);
+
+ok(! $pnode->is_Leaf);
+
+my $phylo_node = new Bio::Tree::Node(-bootstrap => 0.25,
+				     -id => 'ADH_BOV',
+				     -desc => 'Taxon 1');
+$node1->add_Descendent($phylo_node);
+ok(! $node1->is_Leaf);
+ok($phylo_node->ancestor, $node1);
+ok($phylo_node->id, 'ADH_BOV');
+ok($phylo_node->bootstrap, 0.25);
+ok($phylo_node->description, 'Taxon 1');
+
+ok $phylo_node->ancestor($node2), $node2;
+ok $node1->is_Leaf;
+ok my @descs = $node2->each_Descendent, 1;
+ok $descs[0], $phylo_node;
+
+my $allele_node = new Bio::Tree::AlleleNode();
+$allele_node->add_Genotype(new Bio::PopGen::Genotype(-marker_name => 'm1',
+						     -alleles=>  [ 0 ]));
+$allele_node->add_Genotype(new Bio::PopGen::Genotype(-marker_name => 'm3',
+						     -alleles=>  [ 1,1 ]));
+$allele_node->add_Genotype(new Bio::PopGen::Genotype(-marker_name => 'm4',
+						     -alleles=>  [ 0,4 ]));
+ok($allele_node);
+my @mkrs = $allele_node->get_marker_names;
+
+ok(@mkrs, 3);
+my ($m3) = $allele_node->get_Genotypes(-marker => 'm3');
+ok($m3->get_Alleles, 2);
+my ($a1) = $allele_node->get_Genotypes(-marker => 'm1')->get_Alleles;
+ok($a1, 0);
+
+my ($a2,$a3) = $allele_node->get_Genotypes(-marker => 'm4')->get_Alleles;
+ok($a2, 0);
+ok($a3, 4);

Added: trunk/packages/bioperl/branches/upstream/current/t/OMIMentry.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OMIMentry.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OMIMentry.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,327 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: OMIMentry.t,v 1.2 2003/11/04 02:47:23 juguang Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 145;
+}
+
+use Bio::Phenotype::OMIM::OMIMentry;
+use Bio::Phenotype::OMIM::MiniMIMentry;
+use Bio::Species;
+use Bio::Annotation::Reference;
+use Bio::Map::CytoPosition;
+use Bio::Phenotype::Correlate;
+use Bio::Phenotype::Measure;
+use Bio::Annotation::DBLink;
+
+
+my $obj = Bio::Phenotype::OMIM::OMIMentry->new();
+
+ok( $obj->isa( "Bio::Phenotype::OMIM::OMIMentry" ) );
+
+ok( $obj->to_string() );
+
+ok( $obj->MIM_number( "100050" ) );
+ok( $obj->MIM_number(), "100050" );
+
+ok( $obj->title( "AARSKOG SYNDROME" ) );
+ok( $obj->title(), "AARSKOG SYNDROME" );
+
+
+ok( $obj->more_than_two_genes( 1 ) );
+ok( $obj->more_than_two_genes(), 1 );
+
+ok( $obj->is_separate( 1 ) );
+ok( $obj->is_separate(), 1 );
+
+ok( $obj->alternative_titles_and_symbols( "AORTIC ANEURYSM, ABDOMINAL" ) );
+ok( $obj->alternative_titles_and_symbols(), "AORTIC ANEURYSM, ABDOMINAL" );
+
+ok( $obj->mapping_method( "PCR of somatic cell hybrid DNA" ) );
+ok( $obj->mapping_method(), "PCR of somatic cell hybrid DNA" );
+
+ok( $obj->gene_status( "I" ) );
+ok( $obj->gene_status(), "I" );
+
+
+ok( $obj->clinical_symptoms_raw( "Patients with ..." ) );
+ok( $obj->clinical_symptoms_raw(), "Patients with ..." );
+
+
+ok( $obj->created( "Victor A. McKusick: 6/4/1986" ) );
+ok( $obj->created(), "Victor A. McKusick: 6/4/1986" );
+
+ok( $obj->contributors( "Kelly A. Przylepa - revised: 03/18/2002" ) );
+ok( $obj->contributors(), "Kelly A. Przylepa - revised: 03/18/2002" );
+
+ok( $obj->edited( "alopez: 06/03/1997" ) );
+ok( $obj->edited(), "alopez: 06/03/1997" );
+
+
+my $mm = Bio::Phenotype::OMIM::MiniMIMentry->new();
+
+ok( $obj->miniMIM( $mm ) );
+ok( $obj->miniMIM(), $mm );
+
+
+
+my $av1 = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new();
+my $av2 = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new();
+
+ok( $av1->description( "dedxsc" ) );
+
+ok( $obj->each_AllelicVariant(), 0 );
+
+ok( $obj->add_AllelicVariants( ( $av1, $av2 ) ) );
+ok( $obj->each_AllelicVariant(), 2 );
+my @avs = $obj->each_AllelicVariant();
+ok( $avs[ 0 ], $av1 );
+ok( $avs[ 1 ], $av2 );
+ok( $avs[ 0 ]->description, "dedxsc" );
+ok( $obj->each_AllelicVariant(), 2 );
+
+my @avs2 = $obj->remove_AllelicVariants();
+ok( $avs2[ 0 ], $av1 );
+ok( $avs2[ 1 ], $av2 );
+
+ok( $obj->each_AllelicVariant(), 0 );
+ok( $obj->remove_AllelicVariants(), 0 );
+
+
+
+
+
+
+ok( $obj->name( "r1" ) );
+ok( $obj->name(), "r1" );
+
+ok( $obj->description( "This is ..." ) );
+ok( $obj->description(), "This is ..." );
+
+my $mouse = Bio::Species->new();
+$mouse->classification( qw( musculus Mus ) );
+ok( $obj->species( $mouse ) );
+ok( $obj->species()->binomial(), "Mus musculus" );
+
+ok( $obj->comment( "putative" ) );
+ok( $obj->comment(), "putative" );
+
+
+
+ok( $obj->each_gene_symbol(), 0 );
+
+ok( $obj->add_gene_symbols( ( "A", "B" ) ) );
+ok( $obj->each_gene_symbol(), 2 );
+my @gs = $obj->each_gene_symbol();
+ok( $gs[ 0 ], "A" );
+ok( $gs[ 1 ], "B" );
+ok( $obj->each_gene_symbol(), 2 );
+
+my @gs2 = $obj->remove_gene_symbols();
+ok( $gs2[ 0 ], "A" );
+ok( $gs2[ 1 ], "B" );
+
+ok( $obj->each_gene_symbol(), 0 );
+ok( $obj->remove_gene_symbols(), 0 );
+
+
+
+my $v1 = Bio::Variation::VariantI->new();
+my $v2 = Bio::Variation::VariantI->new();
+
+$v1->length( "123" );
+
+ok( $obj->each_Variant(), 0 );
+
+ok( $obj->add_Variants( ( $v1, $v2 ) ) );
+ok( $obj->each_Variant(), 2 );
+my @vs = $obj->each_Variant();
+ok( $vs[ 0 ], $v1 );
+ok( $vs[ 1 ], $v2 );
+ok( $vs[ 0 ]->length(), "123" );
+ok( $obj->each_Variant(), 2 );
+
+my @vs2 = $obj->remove_Variants();
+ok( $vs2[ 0 ], $v1 );
+ok( $vs2[ 1 ], $v2 );
+
+ok( $obj->each_Variant(), 0 );
+ok( $obj->remove_Variants(), 0 );
+
+
+
+
+my $r1 = Bio::Annotation::Reference->new();
+my $r2 = Bio::Annotation::Reference->new();
+
+$r1->title( "title" );
+
+ok( $obj->each_Reference(), 0 );
+
+ok( $obj->add_References( ( $r1, $r2 ) ) );
+ok( $obj->each_Reference(), 2 );
+my @rs = $obj->each_Reference();
+ok( $rs[ 0 ], $r1 );
+ok( $rs[ 1 ], $r2 );
+ok( $rs[ 0 ]->title(), "title" );
+ok( $obj->each_Reference(), 2 );
+
+my @rs2 = $obj->remove_References();
+ok( $rs2[ 0 ], $r1 );
+ok( $rs2[ 1 ], $r2 );
+
+ok( $obj->each_Reference(), 0 );
+ok( $obj->remove_References(), 0 );
+
+
+
+
+my $c1 = Bio::Map::CytoPosition->new();
+my $c2 = Bio::Map::CytoPosition->new();
+
+$c1->chr( "12" );
+
+ok( $obj->each_CytoPosition(), 0 );
+
+ok( $obj->add_CytoPositions( ( $c1, $c2 ) ) );
+ok( $obj->each_CytoPosition(), 2 );
+my @cs = $obj->each_CytoPosition();
+ok( $cs[ 0 ], $c1 );
+ok( $cs[ 1 ], $c2 );
+ok( $cs[ 0 ]->chr(), "12" );
+ok( $obj->each_CytoPosition(), 2 );
+
+my @cs2 = $obj->remove_CytoPositions();
+ok( $cs2[ 0 ], $c1 );
+ok( $cs2[ 1 ], $c2 );
+
+ok( $obj->each_CytoPosition(), 0 );
+ok( $obj->remove_CytoPositions(), 0 );
+
+
+
+
+my $co1 = Bio::Phenotype::Correlate->new();
+my $co2 = Bio::Phenotype::Correlate->new();
+
+ok( $co1->name( "name" ) );
+
+ok( $obj->each_Correlate(), 0 );
+
+ok( $obj->add_Correlates( ( $co1, $co2 ) ) );
+ok( $obj->each_Correlate(), 2 );
+my @cos = $obj->each_Correlate();
+ok( $cos[ 0 ], $co1 );
+ok( $cos[ 1 ], $co2 );
+ok( $cos[ 0 ]->name, "name" );
+ok( $obj->each_Correlate(), 2 );
+
+my @cos2 = $obj->remove_Correlates();
+ok( $cos2[ 0 ], $co1 );
+ok( $cos2[ 1 ], $co2 );
+
+ok( $obj->each_Correlate(), 0 );
+ok( $obj->remove_Correlates(), 0 );
+
+
+
+
+my $m1 = Bio::Phenotype::Measure->new();
+my $m2 = Bio::Phenotype::Measure->new();
+
+ok( $m1->description( "desc" ) );
+
+ok( $obj->each_Measure(), 0 );
+
+ok( $obj->add_Measures( ( $m1, $m2 ) ) );
+ok( $obj->each_Measure(), 2 );
+my @ms = $obj->each_Measure();
+ok( $ms[ 0 ], $m1 );
+ok( $ms[ 1 ], $m2 );
+ok( $ms[ 0 ]->description, "desc" );
+ok( $obj->each_Measure(), 2 );
+
+my @ms2 = $obj->remove_Measures();
+ok( $ms2[ 0 ], $m1 );
+ok( $ms2[ 1 ], $m2 );
+
+ok( $obj->each_Measure(), 0 );
+ok( $obj->remove_Measures(), 0 );
+
+
+
+ok( $obj->each_keyword(), 0 );
+
+ok( $obj->add_keywords( ( "A", "B" ) ) );
+ok( $obj->each_keyword(), 2 );
+my @ks = $obj->each_keyword();
+ok( $ks[ 0 ], "A" );
+ok( $ks[ 1 ], "B" );
+ok( $obj->each_keyword(), 2 );
+
+my @ks2 = $obj->remove_keywords();
+ok( $ks2[ 0 ], "A" );
+ok( $ks2[ 1 ], "B" );
+
+ok( $obj->each_keyword(), 0 );
+ok( $obj->remove_keywords(), 0 );
+
+
+
+my $l1 = Bio::Annotation::DBLink->new();
+my $l2 = Bio::Annotation::DBLink->new();
+
+ok( $l1->comment( "comment" ) );
+
+ok( $obj->each_DBLink(), 0 );
+
+ok( $obj->add_DBLinks( ( $l1, $l2 ) ) );
+ok( $obj->each_DBLink(), 2 );
+my @ls = $obj->each_DBLink();
+ok( $ls[ 0 ], $l1 );
+ok( $ls[ 1 ], $l2 );
+ok( $ls[ 0 ]->comment(), "comment" );
+ok( $obj->each_DBLink(), 2 );
+
+my @ls2 = $obj->remove_DBLinks();
+ok( $ls2[ 0 ], $l1 );
+ok( $ls2[ 1 ], $l2 );
+
+ok( $obj->each_DBLink(), 0 );
+ok( $obj->remove_DBLinks(), 0 );
+
+
+
+ok( $obj->each_Genotype(), 0 );
+
+ok( $obj->add_Genotypes( ( "A", "B" ) ) );
+ok( $obj->each_Genotype(), 2 );
+my @gts = $obj->each_Genotype();
+ok( $gts[ 0 ], "A" );
+ok( $gts[ 1 ], "B" );
+ok( $obj->each_Genotype(), 2 );
+
+my @gts2 = $obj->remove_Genotypes();
+ok( $gts2[ 0 ], "A" );
+ok( $gts2[ 1 ], "B" );
+
+ok( $obj->each_Genotype(), 0 );
+ok( $obj->remove_Genotypes(), 0 );
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/OMIMentryAllelicVariant.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OMIMentryAllelicVariant.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OMIMentryAllelicVariant.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: OMIMentryAllelicVariant.t,v 1.2 2002/09/17 01:41:43 czmasek Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 26;
+}
+
+use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
+
+my $av = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new( -number               => ".0001",
+                                                             -title                => "ALCOHOL INTOLERANCE",
+                                                             -symbol               => "ALDH2*2",
+                                                             -description          => "The ALDH2*2-encoded ...",
+                                                             -aa_ori               => "GLU",
+                                                             -aa_mut               => "LYS",
+                                                             -position             => 487,
+                                                             -additional_mutations => "IVS4DS, G-A, +1" );  
+
+ok( $av->isa( "Bio::Phenotype::OMIM::OMIMentryAllelicVariant" ) );
+
+ok( $av->to_string() );
+
+ok( $av->number(), ".0001" );
+ok( $av->title(), "ALCOHOL INTOLERANCE" );
+ok( $av->symbol(), "ALDH2*2" );
+ok( $av->description(), "The ALDH2*2-encoded ..." );
+ok( $av->aa_ori(), "GLU" );
+ok( $av->aa_mut(), "LYS" );
+ok( $av->position(), 487 );
+ok( $av->additional_mutations(), "IVS4DS, G-A, +1" );
+
+$av->init();
+
+ok( $av->number(), "" );
+ok( $av->title(), "" );
+ok( $av->symbol(), "" );
+ok( $av->description(), "" );
+ok( $av->aa_ori(), "" );
+ok( $av->aa_mut(), "" );
+ok( $av->position(), "" );
+ok( $av->additional_mutations(), "" );
+
+ok( $av->number( "A" ), "A" );
+ok( $av->title( "B" ), "B" );
+ok( $av->symbol( "C" ), "C" );
+ok( $av->description( "D" ), "D" );
+ok( $av->aa_ori( "E" ), "E" );
+ok( $av->aa_mut( "F" ), "F" );
+ok( $av->position( "G" ), "G" );
+ok( $av->additional_mutations( "H" ), "H" );
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/OMIMparser.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OMIMparser.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OMIMparser.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,304 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: OMIMparser.t,v 1.5.8.1 2006/10/16 17:08:15 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 173;
+}
+
+use File::Spec;
+use Bio::Phenotype::OMIM::OMIMparser;
+
+
+my $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap  => File::Spec->catfile(qw(t data omim_genemap_test)),
+                                                         -omimtext => File::Spec->catfile(qw(t data omim_text_test)) );
+
+
+ok( $omim_parser->isa( "Bio::Phenotype::OMIM::OMIMparser" ) );
+
+my $omim_entry = $omim_parser->next_phenotype();
+
+
+ok( $omim_entry->isa( "Bio::Phenotype::OMIM::OMIMentry" ) );
+
+ok( $omim_entry->MIM_number(), "100500" );
+ok( $omim_entry->title(), "*100500 title" );
+ok( $omim_entry->alternative_titles_and_symbols(), ";;title1;;\ntitle2;;\ntitle3" );
+ok( $omim_entry->more_than_two_genes(), 0 );
+ok( $omim_entry->is_separate(), 1 );
+ok( $omim_entry->description(), undef); # "DESCRIPTION1\nDESCRIPTION2" );
+ok( $omim_entry->mapping_method(), "M method 1" );
+ok( $omim_entry->gene_status(), "C" );
+ok( $omim_entry->comment(), "comment1" );
+ok( $omim_entry->edited(), undef); # "ed1\ned2\ned3" );
+ok( $omim_entry->created(), undef); # "cd1\ncd2\ncd3" );
+ok( $omim_entry->contributors, undef); # "cn1\ncn2\ncn3" );
+ok( $omim_entry->additional_references(), "sa" );
+ok( ref($omim_entry->clinical_symptoms()), 'HASH' );
+ok( $omim_entry->species()->binomial(), "Homo sapiens" );
+
+
+my $mini_mim = $omim_entry->miniMIM();
+
+ok( $mini_mim->isa( "Bio::Phenotype::OMIM::MiniMIMentry" ) );
+ok( $mini_mim->description(), "Mini MIM text" );
+ok( $mini_mim->created(), "Mini MIM - cd" );
+ok( $mini_mim->contributors(), "Mini MIM - cn" );
+ok( $mini_mim->edited(), "Mini MIM - ed" );
+
+
+my @corrs      = $omim_entry->each_Correlate();
+
+ok( $corrs[ 0 ]->name(), "mousecorrelate1" );
+ok( $corrs[ 0 ]->type(), "OMIM mouse correlate" );
+ok( $corrs[ 0 ]->species()->binomial(), "Mus musculus" );
+
+
+my @cps        = $omim_entry->each_CytoPosition();
+
+ok( $cps[ 0 ]->value(), "1pter-p36.14" );
+
+
+my @gss        = $omim_entry->each_gene_symbol();
+
+ok( $gss[ 0 ], "gene-symbol1" );
+
+
+my @refs       = $omim_entry->each_Reference();
+
+ok( $refs[ 0 ]->authors(), "Author11, A. A.; Author12, A. A." );
+ok( $refs[ 0 ]->title(), "Title 1." );
+ok( $refs[ 0 ]->location(), "Am. J. Med. Genet1. 11 11-111 \(1981\)" );
+
+ok( $refs[ 1 ]->authors(), "Author21, A. A.; Author22, A. A." );
+ok( $refs[ 1 ]->title(), "Title 2." );
+ok( $refs[ 1 ]->location(), "Am. J. Med. Genet2. 12 22-222 \(1982\)" );
+
+ok( $refs[ 2 ]->authors(), "Author31, A. A.; Author32, A. A." );
+ok( $refs[ 2 ]->title(), "Title 3." );
+ok( $refs[ 2 ]->location(), "Am. J. Med. Genet3. 13 33-333 \(1983\)" );
+
+ok( $refs[ 3 ]->authors(), "" );
+ok( $refs[ 3 ]->title(), "other reference undef format" );
+ok( $refs[ 3 ]->location(), "" );
+
+
+
+my @avs        = $omim_entry->each_AllelicVariant();
+
+ok( $avs[ 0 ]->number(), ".0001" );
+ok( $avs[ 0 ]->title(), "ALCOHOL INTOLERANCE, ACUTE" );
+ok( $avs[ 0 ]->symbol(), "ALDH2" );
+ok( $avs[ 0 ]->description(), "AV1-text" );
+ok( $avs[ 0 ]->aa_ori(), "GLU" );
+ok( $avs[ 0 ]->aa_mut(), "LYS" );
+ok( $avs[ 0 ]->position(), "487" );
+ok( $avs[ 0 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 1 ]->number(), ".0002" );
+ok( $avs[ 1 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 1 ]->symbol(), "CHRNA1" );
+ok( $avs[ 1 ]->description(), "AV2-text" );
+ok( $avs[ 1 ]->aa_ori(), "VAL" );
+ok( $avs[ 1 ]->aa_mut(), "MET" );
+ok( $avs[ 1 ]->position(), "156" );
+ok( $avs[ 1 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 2 ]->number(), ".0003" );
+ok( $avs[ 2 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 2 ]->symbol(), "CHRNE" );
+ok( $avs[ 2 ]->description(), "AV2-text a\nAV2-text b" );
+ok( $avs[ 2 ]->aa_ori(), "ARG" );
+ok( $avs[ 2 ]->aa_mut(), "LEU" );
+ok( $avs[ 2 ]->position(), "147" );
+ok( $avs[ 2 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 3 ]->number(), ".0004" );
+ok( $avs[ 3 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 3 ]->symbol(), "CHRNE" );
+ok( $avs[ 3 ]->description(), "Sieb et al. (2000) found that a brother and sister with congenital\nmyasthenic syndrome (601462) were compound heterozygotes for a deletion\nof 911T and a splicing mutation (IVS4+1G-A; 100725.0007)." );
+ok( $avs[ 3 ]->aa_ori(), "" );
+ok( $avs[ 3 ]->aa_mut(), "" );
+ok( $avs[ 3 ]->position(), "" );
+ok( $avs[ 3 ]->additional_mutations(), "1-BP DEL, 911T" );
+
+
+ok( $avs[ 4 ]->number(), ".0005" );
+ok( $avs[ 4 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 4 ]->symbol(), "CHRNE" );
+ok( $avs[ 4 ]->description(), "See 100725.0006 and Sieb et al. (2000)." );
+ok( $avs[ 4 ]->aa_ori(), "" );
+ok( $avs[ 4 ]->aa_mut(), "" );
+ok( $avs[ 4 ]->position(), "" );
+ok( $avs[ 4 ]->additional_mutations(), "IVS4DS, G-A, +1" );
+
+
+
+ok( $avs[ 5 ]->number(), ".0006" );
+ok( $avs[ 5 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 5 ]->symbol(), "CHRNE" );
+ok( $avs[ 5 ]->description(), "AV6-text" );
+ok( $avs[ 5 ]->aa_ori(), "" );
+ok( $avs[ 5 ]->aa_mut(), "" );
+ok( $avs[ 5 ]->position(), "" );
+ok( $avs[ 5 ]->additional_mutations(), "1-BP DEL, 1030C" );
+
+
+
+
+
+my $omim_entry2 = $omim_parser->next_phenotype();
+
+
+ok( $omim_entry2->isa( "Bio::Phenotype::OMIM::OMIMentry" ) );
+
+ok( $omim_entry2->MIM_number(), "100501" );
+ok( $omim_entry2->title(), "#100501 second entry" );
+ok( $omim_entry2->alternative_titles_and_symbols(), ";;title1;;\ntitle2;;\ntitle3" );
+ok( $omim_entry2->more_than_two_genes(), 1 );
+ok( $omim_entry2->is_separate(), 0 );
+ok( $omim_entry2->description(), undef); # "DESCRIPTION1\nDESCRIPTION2" );
+ok( $omim_entry2->mapping_method(), "M method 2" );
+ok( $omim_entry2->gene_status(), "C" );
+ok( $omim_entry2->comment(), "comment2" );
+ok( $omim_entry2->edited(), undef); # "ed1\ned2\ned3" );
+ok( $omim_entry2->created(), undef); # "cd1\ncd2\ncd3" );
+ok( $omim_entry2->contributors(), undef); # "cn1\ncn2\ncn3" );
+ok( $omim_entry2->additional_references(), "sa" );
+
+my $cs = $omim_entry2->clinical_symptoms();
+ok( ref($cs), 'HASH' );
+ok( $omim_entry2->species()->binomial(), "Homo sapiens" );
+
+
+$mini_mim   = $omim_entry2->miniMIM();
+
+ok( $mini_mim->isa( "Bio::Phenotype::OMIM::MiniMIMentry" ) );
+ok( $mini_mim->description(), "Mini MIM text" );
+ok( $mini_mim->created(), "Mini MIM - cd" );
+ok( $mini_mim->contributors(), "Mini MIM - cn" );
+ok( $mini_mim->edited(), "Mini MIM - ed" );
+
+
+ at corrs      = $omim_entry2->each_Correlate();
+
+ok( $corrs[ 0 ]->name(), "mousecorrelate2" );
+ok( $corrs[ 0 ]->type(), "OMIM mouse correlate" );
+ok( $corrs[ 0 ]->species()->binomial(), "Mus musculus" );
+
+
+ at cps        = $omim_entry2->each_CytoPosition();
+
+ok( $cps[ 0 ]->value(), "1pter-p36.15" );
+
+
+ at gss        = $omim_entry2->each_gene_symbol();
+
+ok( $gss[ 0 ], "gene-symbol2" );
+
+
+ at refs       = $omim_entry2->each_Reference();
+
+ok( $refs[ 0 ]->authors(), "Author11, A. A.; Author12, A. A." );
+ok( $refs[ 0 ]->title(), "Title 1." );
+ok( $refs[ 0 ]->location(), "Am. J. Med. Genet1. 11 11-111 \(1981\)" );
+
+ok( $refs[ 1 ]->authors(), "Author21, A. A.; Author22, A. A." );
+ok( $refs[ 1 ]->title(), "Title 2." );
+ok( $refs[ 1 ]->location(), "Am. J. Med. Genet2. 12 22-222 \(1982\)" );
+
+ok( $refs[ 2 ]->authors(), "Author31, A. A.; Author32, A. A." );
+ok( $refs[ 2 ]->title(), "Title 3." );
+ok( $refs[ 2 ]->location(), "Am. J. Med. Genet3. 13 33-333 \(1983\)" );
+
+ok( $refs[ 3 ]->authors(), "" );
+ok( $refs[ 3 ]->title(), "other reference undef format" );
+ok( $refs[ 3 ]->location(), "" );
+
+
+
+ at avs        = $omim_entry2->each_AllelicVariant();
+
+ok( $avs[ 0 ]->number(), ".0001" );
+ok( $avs[ 0 ]->title(), "ALCOHOL INTOLERANCE, ACUTE" );
+ok( $avs[ 0 ]->symbol(), "ALDH2" );
+ok( $avs[ 0 ]->description(), "AV1-text" );
+ok( $avs[ 0 ]->aa_ori(), "GLU" );
+ok( $avs[ 0 ]->aa_mut(), "LYS" );
+ok( $avs[ 0 ]->position(), "487" );
+ok( $avs[ 0 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 1 ]->number(), ".0002" );
+ok( $avs[ 1 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 1 ]->symbol(), "CHRNA1" );
+ok( $avs[ 1 ]->description(), "AV2-text" );
+ok( $avs[ 1 ]->aa_ori(), "VAL" );
+ok( $avs[ 1 ]->aa_mut(), "MET" );
+ok( $avs[ 1 ]->position(), "156" );
+ok( $avs[ 1 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 2 ]->number(), ".0003" );
+ok( $avs[ 2 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 2 ]->symbol(), "CHRNE" );
+ok( $avs[ 2 ]->description(), "AV2-text a\nAV2-text b" );
+ok( $avs[ 2 ]->aa_ori(), "ARG" );
+ok( $avs[ 2 ]->aa_mut(), "LEU" );
+ok( $avs[ 2 ]->position(), "147" );
+ok( $avs[ 2 ]->additional_mutations(), "" );
+
+
+ok( $avs[ 3 ]->number(), ".0004" );
+ok( $avs[ 3 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 3 ]->symbol(), "CHRNE" );
+ok( $avs[ 3 ]->description(), "Sieb et al. (2000) found that a brother and sister with congenital\nmyasthenic syndrome (601462) were compound heterozygotes for a deletion\nof 911T and a splicing mutation (IVS4+1G-A; 100725.0007)." );
+ok( $avs[ 3 ]->aa_ori(), "" );
+ok( $avs[ 3 ]->aa_mut(), "" );
+ok( $avs[ 3 ]->position(), "" );
+ok( $avs[ 3 ]->additional_mutations(), "1-BP DEL, 911T" );
+
+
+ok( $avs[ 4 ]->number(), ".0005" );
+ok( $avs[ 4 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 4 ]->symbol(), "CHRNE" );
+ok( $avs[ 4 ]->description(), "See 100725.0006 and Sieb et al. (2000)." );
+ok( $avs[ 4 ]->aa_ori(), "" );
+ok( $avs[ 4 ]->aa_mut(), "" );
+ok( $avs[ 4 ]->position(), "" );
+ok( $avs[ 4 ]->additional_mutations(), "IVS4DS, G-A, +1" );
+
+
+
+ok( $avs[ 5 ]->number(), ".0006" );
+ok( $avs[ 5 ]->title(), "MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL" );
+ok( $avs[ 5 ]->symbol(), "CHRNE" );
+ok( $avs[ 5 ]->description(), "AV6-text" );
+ok( $avs[ 5 ]->aa_ori(), "" );
+ok( $avs[ 5 ]->aa_mut(), "" );
+ok( $avs[ 5 ]->position(), "" );
+ok( $avs[ 5 ]->additional_mutations(), "1-BP DEL, 1030C" );
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/OddCodes.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OddCodes.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OddCodes.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##$Id: OddCodes.t,v 1.5 2001/10/22 08:22:58 heikki Exp $
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 10;
+}
+
+use Bio::PrimarySeq;
+use Bio::Tools::OddCodes;
+ok 1;
+
+my ($seqobj, $oddcode_obj);
+
+$seqobj = Bio::PrimarySeq->new('-seq'=>'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
+			       '-alphabet'=>'protein', 
+			       '-id'=>'test');
+$oddcode_obj  =  Bio::Tools::OddCodes->new('-seq' => $seqobj);
+
+ok defined($oddcode_obj) && ref($oddcode_obj) && 
+    $oddcode_obj->isa('Bio::Tools::OddCodes');
+
+ok ${$oddcode_obj->structural()}, 'ABAEEIAEIJEIIEOAEEAAUIAXAZ';
+ok ${$oddcode_obj->chemical()}, 'LBSAARLCLJCLSMOIMCHHULRXRZ';
+ok ${$oddcode_obj->functional()}, 'HBPAAHPCHJCHHPOHPCPPUHHXPZ';
+ok ${$oddcode_obj->charge()}, 'NBNAANNCNJCNNNONNCNNUNNXNZ';
+ok ${$oddcode_obj->hydrophobic()}, 'IBOOOIOOIJOIIOOIOOOOUIIXOZ';
+ok ${$oddcode_obj->Dayhoff()}, 'CBADDGCEFJEFFDOCDECCUFGXGZ';
+ok ${$oddcode_obj->Sneath()}, 'CBEFFHCHAJGADDOCDGEEUAHXHZ';
+ok ${$oddcode_obj->Stanfel()}, 'ABACCDAEAJEAACOACEAAUADXDZ';

Added: trunk/packages/bioperl/branches/upstream/current/t/Ontology.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Ontology.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Ontology.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: Ontology.t,v 1.3 2003/05/24 23:05:35 lapp Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    eval { require 'Graph.pm' };
+    if( $@ ) {
+	    print STDERR "\nGraph.pm doesn't seem to be installed on this system -- the GO Parser needs it...\n\n";
+	    plan tests => 1;
+	    ok( 1 );
+	    exit( 0 );
+    }
+
+    plan tests => 50;
+}
+
+
+use Bio::OntologyIO;
+use Bio::Ontology::RelationshipType;
+use Bio::Root::IO;
+
+my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+
+my $io = Bio::Root::IO->new(); # less typing from now on
+my $parser = Bio::OntologyIO->new(
+                      -format    => "soflat",
+		      -file      => $io->catfile("t", "data",
+						 "sofa.ontology"));
+
+my $ont = $parser->next_ontology();
+ok ($ont);
+ok ($ont->name, "Sequence Feature Ontology");
+
+my @roots = $ont->get_root_terms();
+ok (scalar(@roots), 1);
+ok ($roots[0]->name(), "Sequence_Feature_Ontology");
+ok ($roots[0]->identifier(), "SO:0000000");
+
+my @terms = $ont->get_child_terms($roots[0]);
+ok (scalar(@terms), 1);
+ok ($terms[0]->name(), "sofa");
+ at terms = $ont->get_child_terms($terms[0]);
+ok (scalar(@terms), 1);
+ok ($terms[0]->name(), "feature");
+my $featterm = $terms[0];
+ at terms = $ont->get_child_terms($featterm);
+ok (scalar(@terms), 10);
+
+# oligonucleotide has two parents, see whether this is handled
+ at terms = $ont->get_descendant_terms($featterm);
+my ($term) = grep { $_->name() eq "oligonucleotide"; } @terms;
+ok $term;
+skip(! $term, $term->identifier(), "SO:0000696");
+
+ at terms = $ont->get_ancestor_terms($term);
+ok (scalar(@terms), 7);
+ok (scalar(grep { $_->name() eq "remark"; } @terms), 1);
+ok (scalar(grep { $_->name() eq "reagent"; } @terms), 1);
+
+# processed_transcript has part-of and is-a children
+ at terms = $ont->get_descendant_terms($featterm);
+($term) = grep { $_->name() eq "processed_transcript"; } @terms;
+ok $term;
+skip(! $term, $term->identifier(), "SO:0000233");
+
+ at terms = $ont->get_child_terms($term);
+ok (scalar(@terms), 4);
+ at terms = $ont->get_child_terms($term, $PART_OF);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_child_terms($term, $IS_A);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_child_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 4);
+
+# now all descendants:
+ at terms = $ont->get_descendant_terms($term);
+ok (scalar(@terms), 13);
+ at terms = $ont->get_descendant_terms($term, $PART_OF);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_descendant_terms($term, $IS_A);
+ok (scalar(@terms), 5);
+ at terms = $ont->get_descendant_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 13);
+
+# TF_binding_site has 2 parents and different relationships in the two
+# paths up (although the relationships to its two parents are of the
+# same type, namely is-a)
+ at terms = $ont->get_descendant_terms($featterm);
+($term) = grep { $_->name() eq "TF_binding_site"; } @terms;
+ok $term;
+skip(! $term, $term->identifier(), "SO:0000235");
+
+ at terms = $ont->get_parent_terms($term);
+ok (scalar(@terms), 2);
+my ($pterm) = grep { $_->name eq "regulatory_region"; } @terms;
+ok $pterm;
+ at terms = $ont->get_parent_terms($term, $PART_OF);
+ok (scalar(@terms), 0);
+ at terms = $ont->get_parent_terms($term, $IS_A);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_parent_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 2);
+
+# now all ancestors:
+ at terms = $ont->get_ancestor_terms($term);
+ok (scalar(@terms), 6);
+ at terms = $ont->get_ancestor_terms($term, $PART_OF);
+ok (scalar(@terms), 0);
+ at terms = $ont->get_ancestor_terms($pterm, $PART_OF);
+ok (scalar(@terms), 1);
+ at terms = $ont->get_ancestor_terms($term, $IS_A);
+ok (scalar(@terms), 5);
+ at terms = $ont->get_ancestor_terms($pterm, $IS_A);
+ok (scalar(@terms), 0);
+ at terms = $ont->get_ancestor_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 6);
+
+# pull out all relationships
+my @rels = $ont->get_relationships();
+my @relset = grep { $_->object_term->name eq "sofa"; } @rels;
+ok (scalar(@relset), 1);
+ at relset = grep { $_->subject_term->name eq "sofa"; } @rels;
+ok (scalar(@relset), 1);
+ at relset = grep { $_->object_term->name eq "feature"; } @rels;
+ok (scalar(@relset), 10);
+ at relset = grep { $_->subject_term->name eq "feature"; } @rels;
+ok (scalar(@relset), 1);
+ at relset = grep { $_->object_term->identifier eq "SO:0000233"; } @rels;
+ok (scalar(@relset), 4);
+ at relset = grep { $_->predicate_term->name eq "IS_A" } @relset;
+ok (scalar(@relset), 2);
+
+# relationships for a specific term only
+($term) = $ont->find_terms(-identifier => "SO:0000233");
+ok ($term);
+ok ($term->identifier, "SO:0000233");
+ok ($term->name, "processed_transcript");
+ at rels = $ont->get_relationships($term);
+ok (scalar(@rels), 5);
+ at relset = grep { $_->predicate_term->name eq "IS_A"; } @rels;
+ok (scalar(@relset), 3);
+ at relset = grep { $_->object_term->identifier eq "SO:0000233"; } @rels;
+ok (scalar(@relset), 4);

Added: trunk/packages/bioperl/branches/upstream/current/t/OntologyEngine.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OntologyEngine.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OntologyEngine.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: OntologyEngine.t,v 1.8 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $DEBUG $NUMTESTS);
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval {
+		require Graph::Directed;
+		$HAVEGRAPHDIRECTED=1;
+	};
+	if ($@) {
+		$HAVEGRAPHDIRECTED = 0;
+		warn "Cannot run tests, Graph::Directed not installed\n";
+	}
+	plan tests => ($NUMTESTS = 22);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot run OntologyEngine tests, skipping',1);
+	}
+}
+exit(0) unless $HAVEGRAPHDIRECTED;
+require Bio::Ontology::Term;
+require Bio::Ontology::Relationship;
+require Bio::Ontology::RelationshipType;
+require Bio::Ontology::SimpleOntologyEngine;
+require Bio::Ontology::Ontology;
+
+my $ont = Bio::Ontology::Ontology->new(-name => "My Ontology");
+
+my $eng = Bio::Ontology::SimpleOntologyEngine->new();
+$ont->engine($eng);
+ok( $eng->isa( "Bio::Ontology::OntologyEngineI" ) );
+ok ($ont->engine, $eng);
+
+my @terms = (
+	     [-identifier => "IPR000001",
+	      -name => "Kringle",
+	      -definition => "Kringles are autonomous structural domains ...",
+	      -ontology => $ont
+	      ],
+	     [-identifier => "IPR000002",
+	      -name => "Cdc20/Fizzy",
+	      -definition => "The Cdc20/Fizzy region is almost always ...",
+	      -ontology => $ont
+	      ],
+	     [-identifier => "IPR000003",
+	      -name => "Retinoid X receptor",
+	      -definition => "Steroid or nuclear hormone receptors ...",
+	      -ontology => $ont
+	      ],
+	     [-identifier => "IPR000004",
+	      -name => "Test4",
+	      -definition => "Test4 definition ...",
+	      -ontology => $ont
+	      ],
+	     );
+
+for(my $i = 0; $i < @terms; $i++) {
+    $terms[$i] = Bio::Ontology::Term->new(@{$terms[$i]});
+    $ont->add_term($terms[$i]);
+}
+
+my $rel_type = Bio::Ontology::RelationshipType->get_instance("IS_A", $ont);
+my $rel_type1 = Bio::Ontology::RelationshipType->get_instance("PART_OF", $ont);
+
+my @rels = (
+	    [-object_term => $terms[0],
+	     -subject_term => $terms[1],
+	     -predicate_term => $rel_type,
+	     -ontology => $ont,
+	     ],
+	    [-object_term => $terms[1],
+	     -subject_term => $terms[2],
+	     -predicate_term => $rel_type,
+	     -ontology => $ont,
+	     ],
+	    [-object_term => $terms[0],
+	     -subject_term => $terms[3],
+	     -predicate_term => $rel_type,
+	     -ontology => $ont,
+	     ],
+	    [-object_term => $terms[3],
+	     -subject_term => $terms[2],
+	     -predicate_term => $rel_type,
+	     -ontology => $ont,
+	     ],
+	    );
+
+for(my $i = 0; $i < @rels; $i++) {
+    $rels[$i] = Bio::Ontology::Relationship->new(@{$rels[$i]});
+    $ont->add_relationship($rels[$i]);
+}
+
+my @child_terms = sort { $a->identifier() cmp $b->identifier();
+		     } $ont->get_child_terms($terms[0]);
+ok (scalar(@child_terms), 2);
+ok( $child_terms[0], $terms[1] );
+my @child_terms1 = sort { $a->identifier() cmp $b->identifier();
+		      } $ont->get_child_terms($terms[0], $rel_type);
+ok (scalar(@child_terms), 2);
+ok( $child_terms1[0], $terms[1] );
+ok (scalar($ont->get_child_terms($terms[0], $rel_type1)), 0);
+
+my @descendant_terms = sort { $a->identifier() cmp $b->identifier();
+			  } $ont->get_descendant_terms($terms[0]);
+ok( scalar(@descendant_terms), 3);
+ok( $descendant_terms[1], $terms[2] );
+
+my @descendant_terms1 = sort { $a->identifier() cmp $b->identifier();
+			   } $ont->get_descendant_terms($terms[0], $rel_type);
+ok( $descendant_terms1[1], $terms[2] );
+ok (scalar(@descendant_terms1), 3);
+ok (scalar($ont->get_descendant_terms($terms[0], $rel_type1)), 0);
+
+my @parent_terms = sort { $a->identifier() cmp $b->identifier();
+		      } $ont->get_parent_terms($terms[1]);
+ok (scalar(@parent_terms), 1);
+ok( $parent_terms[0], $terms[0] );
+
+my @ancestor_terms = sort { $a->identifier() cmp $b->identifier();
+			} $ont->get_ancestor_terms($terms[2]);
+ok( $ancestor_terms[0], $terms[0] );
+ok (scalar(@ancestor_terms), 3);
+ok (scalar($ont->get_ancestor_terms($terms[2], $rel_type)), 3);
+ok (scalar($ont->get_ancestor_terms($terms[2], $rel_type1)), 0);
+
+my @leaf_terms = $ont->get_leaf_terms();
+# print scalar(@leaf_terms)."\n";
+ok (scalar(@leaf_terms), 1);
+ok( $leaf_terms[0], $terms[2]);
+
+my @root_terms = $ont->get_root_terms();
+# print scalar(@root_terms)."\n";
+ok (scalar(@root_terms), 1);
+ok( $root_terms[0], $terms[0]);
+
+#print $ont->engine->to_string();

Added: trunk/packages/bioperl/branches/upstream/current/t/OntologyStore.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/OntologyStore.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/OntologyStore.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,45 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: OntologyStore.t,v 1.1.6.4 2006/11/09 10:10:59 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 7;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require Graph;
+	};
+	if ($@) {
+		plan skip_all => 'Graph not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::Ontology::OntologyStore');
+}
+
+ok my $store = Bio::Ontology::OntologyStore->get_instance;
+
+SKIP: {
+	my $ontology;
+	eval {$ontology = $store->get_ontology(-name => 'Sequence Ontology');};
+	skip "Couldn't get sequence ontology, network problems? Skipping these tests", 5 if $@;
+	ok('got file okay');
+	ok(scalar($ontology->get_root_terms()) == 1);
+	my($txt) = $ontology->find_terms(-name => 'transcript');
+	is $txt->identifier, 'SO:0000673';
+	is $txt->name, 'transcript';
+	is $txt->definition, 'An RNA synthesized on a DNA or RNA template by an RNA polymerase.';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/PAML.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PAML.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PAML.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,425 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: PAML.t,v 1.24.2.1 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $error);
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 192;
+    plan tests => $NUMTESTS;
+    eval { require IO::String; 
+	   require Bio::Tools::Phylo::PAML;}; 
+    if( $@ ) {
+	print STDERR "no IO::String installed\n"; 
+	$error = 1;
+    }
+}
+
+END {
+	foreach ( $Test::ntest .. $NUMTESTS ) {
+		skip("Unable to run all of the PAML tests",1);
+	}
+}
+
+
+exit(0) if( $error );
+
+my $testnum;
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+use Bio::Root::IO;
+
+my $inpaml = new Bio::Tools::Phylo::PAML(-file => Bio::Root::IO->catfile(qw(t data codeml.mlc)));
+ok($inpaml);
+my $result = $inpaml->next_result;
+ok($result);
+ok($result->model, 'several dN/dS ratios for branches');
+ok($result->version, qr'3\.12');
+my $MLmat = $result->get_MLmatrix;
+my $NGmat = $result->get_NGmatrix;
+
+ok($NGmat->[0]->[1]->{'omega'}, 0.2507);
+ok($NGmat->[0]->[1]->{'dN'}, 0.0863);
+ok($NGmat->[0]->[1]->{'dS'}, 0.3443);
+
+ok($NGmat->[2]->[3]->{'omega'}, 0.2178);
+ok($NGmat->[2]->[3]->{'dN'}, 0.1348);
+ok($NGmat->[2]->[3]->{'dS'}, 0.6187);
+
+ok($MLmat->[0]->[1]->{'omega'}, 0.19479);
+ok($MLmat->[0]->[1]->{'dN'}, 0.0839);
+ok($MLmat->[0]->[1]->{'dS'}, 0.4309);
+ok($MLmat->[0]->[1]->{'lnL'}, -1508.607268);
+ok($MLmat->[0]->[1]->{'t'}, 0.47825);
+ok($MLmat->[0]->[1]->{'kappa'}, 2.29137);
+
+ok($MLmat->[2]->[3]->{'omega'}, 0.16114);
+ok($MLmat->[2]->[3]->{'dN'}, 0.1306);
+ok($MLmat->[2]->[3]->{'dS'}, 0.8105);
+ok($MLmat->[2]->[3]->{'lnL'},-1666.440696);
+ok($MLmat->[2]->[3]->{'t'}, 0.85281);
+ok($MLmat->[2]->[3]->{'kappa'}, 2.21652);
+
+my @codonposfreq = $result->get_codon_pos_basefreq();
+ok($codonposfreq[0]->{'A'}, 0.23579);
+ok($codonposfreq[0]->{'T'}, 0.14737);
+ok($codonposfreq[1]->{'C'}, 0.25123);
+ok($codonposfreq[2]->{'G'}, 0.32842);
+
+# AAML parsing - Empirical model
+$inpaml = new Bio::Tools::Phylo::PAML(-file => Bio::Root::IO->catfile
+				      (qw(t data aaml.mlc)));
+
+ok($inpaml);
+$result = $inpaml->next_result;
+ok($result);
+ok($result->model, 'Empirical (wag.dat)');
+my @trees = $result->get_trees;
+ok(@trees, 1);
+ok($trees[0]->score, -1042.768973);
+
+ok((scalar grep { $_->is_Leaf } $trees[0]->get_nodes), $result->get_seqs);
+
+my $aadistmat = $result->get_AADistMatrix();
+ok($aadistmat);
+ok($aadistmat->get_entry('Cow', 'Horse'), 0.5462);
+ok($aadistmat->get_entry('Baboon', 'Langur'), 0.1077);
+
+my %aafreq = %{$result->get_AAFreqs()};
+ok(%aafreq);
+ok($aafreq{'Human'}->{'N'}, 0.0769);
+ok($aafreq{'Human'}->{'R'}, 0.1077);
+
+my %ratfreqs = %{$result->get_AAFreqs('Rat')};
+ok($ratfreqs{'R'},0.0923);
+ok($ratfreqs{'F'},0.0154);
+my %avgfreqs = %{$result->get_AAFreqs('Average')};
+ok($avgfreqs{'Q'},0.0411);
+
+ok($result->get_AAFreqs('Average')->{'I'},0.0424);
+
+my $patterns = $result->patterns;
+my @pat = @{$patterns->{'-patterns'}};
+ok(scalar @pat, 98);
+ok($patterns->{'-ns'}, 6);
+ok($patterns->{'-ls'}, 130);
+
+ok((sort $result->get_stat_names)[0], 'constant_sites');
+ok($result->get_stat('constant_sites'), 46);
+ok($result->get_stat('constant_sites_percentage'), 35.38);
+
+# AAML parsing - pairwise model
+$inpaml = new Bio::Tools::Phylo::PAML(-file => Bio::Root::IO->catfile
+				      (qw(t data aaml_pairwise.mlc)));
+
+ok($inpaml);
+$result = $inpaml->next_result;
+ok($result);
+ok($result->model, 'Empirical_F (wag.dat)');
+ok($result->get_stat('loglikelihood'),-1189.106658);
+ok($result->get_stat('constant_sites'), 170);
+ok($result->get_stat('constant_sites_percentage'), 59.65);
+
+ok($result->get_AAFreqs('Average')->{'R'},0.0211);
+ok($result->get_AAFreqs('rabbit')->{'L'},0.1228);
+
+$aadistmat = $result->get_AADistMatrix();
+ok($aadistmat);
+ok($aadistmat->get_entry('rabbit', 'marsupial'), 0.2877);
+ok($aadistmat->get_entry('human', 'goat-cow'), 0.1439);
+
+$aadistmat = $result->get_AAMLDistMatrix();
+ok($aadistmat);
+ok($aadistmat->get_entry('rabbit', 'marsupial'), 0.3392);
+ok($aadistmat->get_entry('human', 'goat-cow'), 0.1551);
+
+my @seqs = $result->get_seqs;
+ok($seqs[0]->display_id, 'human');
+
+# YN00 parsing, pairwise Ka/Ks from Yang & Nielsen 2000
+$inpaml = new Bio::Tools::Phylo::PAML(-file => Bio::Root::IO->catfile
+				      (qw(t data yn00.mlc)));
+
+ok($inpaml);
+$result = $inpaml->next_result;
+
+ok($result);
+$MLmat = $result->get_MLmatrix;
+$NGmat = $result->get_NGmatrix;
+
+ok($NGmat->[0]->[1]->{'omega'}, 0.251);
+ok($NGmat->[0]->[1]->{'dN'}, 0.0863);
+ok($NGmat->[0]->[1]->{'dS'}, 0.3443);
+ok($NGmat->[2]->[3]->{'omega'}, 0.218);
+ok($NGmat->[2]->[3]->{'dN'}, 0.1348);
+ok($NGmat->[2]->[3]->{'dS'}, 0.6187);
+
+ok($MLmat->[0]->[1]->{'omega'}, 0.1625);
+ok($MLmat->[0]->[1]->{'dN'}, 0.0818);
+ok($MLmat->[0]->[1]->{'dS'}, 0.5031);
+ok($MLmat->[2]->[3]->{'omega'}, 0.1262);
+ok($MLmat->[2]->[3]->{'dN'}, 0.1298);
+ok($MLmat->[2]->[3]->{'dN_SE'}, 0.0149);
+ok($MLmat->[2]->[3]->{'dS'}, 1.0286);
+ok($MLmat->[2]->[3]->{'dS_SE'}, 0.2614);
+
+# codeml NSSites parsing
+
+$inpaml = new Bio::Tools::Phylo::PAML
+    (-file => Bio::Root::IO->catfile(qw(t data codeml_nssites.mlc)));
+
+ok($inpaml);
+$result = $inpaml->next_result;
+
+ok($result);
+ok($result->model, 'One dN/dS ratio dGamma (ncatG=11)');
+ok($result->version, 'paml 3.13, August 2002');
+$NGmat = $result->get_NGmatrix;
+ok($NGmat);
+
+ok($NGmat->[0]->[1]->{'omega'}, 0.2782);
+ok($NGmat->[0]->[1]->{'dN'}, 0.0133);
+ok($NGmat->[0]->[1]->{'dS'}, 0.0478);
+ok($NGmat->[1]->[2]->{'omega'}, 1.1055);
+ok($NGmat->[1]->[2]->{'dN'}, 0.0742);
+ok($NGmat->[1]->[2]->{'dS'}, 0.0671);
+          # this is
+          #   model num  description
+          #   kappa   log-likelihood tree length time used
+          #   shape   alpha/gamma r          f
+my @tstr = ([qw(0 one-ratio 0
+		4.54006 -906.017440    0.55764
+		)],
+	    [qw(1 neutral 2
+		4.29790 -902.503869    0.56529
+		)],
+	    [qw(2 selection 3 
+		5.12250 -900.076500    0.6032
+		)],
+	     );
+my $iter = 0;
+my $lastmodel;
+foreach my $model ( $result->get_NSSite_results ) {    
+    my $i = 0;
+    my $r = shift @tstr;
+    ok($model->model_num, $r->[$i++]);
+    ok($model->model_description, qr/$r->[$i++]/);
+    ok($model->num_site_classes,$r->[$i++]);
+    my $tree = $model->next_tree;
+    ok($model->kappa, $r->[$i++]);
+    ok($model->likelihood,$r->[$i]);
+    ok($tree->score, $r->[$i++]);
+    ok($tree->total_branch_length, $r->[$i++]);
+    if( $iter == 0 ) {
+	my $params = $model->shape_params;
+	ok($params->{'shape'}, 'alpha');
+	ok($params->{'gamma'},   '0.50000');
+	ok($params->{'r'}->[0], '1.00000');
+	ok($params->{'f'}->[0], '1.00000');
+    } elsif( $iter == 2 ) {
+	my $class = $model->dnds_site_classes;
+	ok($class->{'p'}->[0], '0.38160');
+	ok($class->{'w'}->[1], '1.00000');
+    }
+    $iter++;
+    $lastmodel = $model;
+}
+
+my ($firstsite) = $lastmodel->get_pos_selected_sites;
+ok($firstsite->[0], 15);
+ok($firstsite->[1], 'L');
+ok($firstsite->[2], 0.6588);
+
+# codeml NSSites parsing
+# for M0 model
+
+my $codeml_m0 = new Bio::Tools::Phylo::PAML
+    (-file => Bio::Root::IO->catfile(qw/t data M0.mlc/));
+ok($codeml_m0);
+my $result_m0 = $codeml_m0->next_result;
+my ($nssite_m0,$nssite_m1) = $result_m0->get_NSSite_results;
+ok($nssite_m0->num_site_classes,1);
+my $class_m0 = $nssite_m0->dnds_site_classes;
+ok($class_m0->{q/p/}->[0],q/1.00000/);
+ok($class_m0->{q/w/}->[0],0.09213);
+
+ok($nssite_m0->model_num, "0");
+ at trees= $nssite_m0->get_trees;
+ok (@trees , 1 ); 
+# model 0
+ok($trees[0]->score, -30.819156);
+ok($nssite_m1->model_num, "1");
+ at trees= $nssite_m1->get_trees;
+ok($trees[0]->score, -30.819157);
+
+# test BASEML
+# pairwise first
+
+my $baseml_p = Bio::Tools::Phylo::PAML->new
+    (-file => Bio::Root::IO->catfile(qw(t data baseml.pairwise)));
+ok($baseml_p);
+my $baseml = $baseml_p->next_result;
+my @b_seqs =  $baseml->get_seqs;
+ok($b_seqs[0]->seq, 'GTAGAGTACTTT');
+ok($b_seqs[1]->seq, 'GTAAGAGACGAT');
+
+my @otus = map { $_->display_id } @b_seqs;
+ok(scalar @otus, 3);
+my $ntfreq = $baseml->get_NTFreqs;
+ok($ntfreq);
+ok($ntfreq->{$otus[0]}->{'A'}, '0.3333');
+ok($ntfreq->{$otus[1]}->{'G'}, '0.2105');
+my $kappaM = $baseml->get_KappaMatrix;
+ok($kappaM);
+ok($kappaM->get_entry($otus[1],$otus[0]), '0.3240');
+ok($kappaM->get_entry($otus[0],$otus[1]), 
+   $kappaM->get_entry($otus[1],$otus[0]));
+ok($kappaM->get_entry($otus[1],$otus[2]), '0.1343');
+my $alphaM = $baseml->get_AlphaMatrix;
+ok($alphaM);
+ok($alphaM->get_entry($otus[1],$otus[0]), '9.3595');
+ok($alphaM->get_entry($otus[0],$otus[1]), 
+   $alphaM->get_entry($otus[1],$otus[0]));
+ok($alphaM->get_entry($otus[1],$otus[2]), '1.1101');
+ok($alphaM->get_entry($otus[0],$otus[2]), '33.1197');
+
+# codeml NSSites parsing
+# for only 1 model
+
+my $codeml_single = new Bio::Tools::Phylo::PAML
+    (-file => Bio::Root::IO->catfile(qw/t data singleNSsite.mlc/));
+ok($codeml_single);
+my $result_single = $codeml_single->next_result;
+my ($nssite_single) = $result_single->get_NSSite_results;
+ok($nssite_single->num_site_classes,q/3/);
+ok($nssite_single->kappa, q/5.28487/);
+ok($nssite_single->likelihood,q/-30.819156/);
+
+ok($baseml->get_stat('loglikelihood'),-110.532715);
+ok($baseml->get_stat('constant_sites'),46);
+ok($baseml->get_stat('constant_sites_percentage'),'80.70');
+ok($baseml->model,'HKY85 dGamma (ncatG=5)');
+
+# user trees
+$baseml_p = Bio::Tools::Phylo::PAML->new
+    (-file => Bio::Root::IO->catfile(qw(t data baseml.usertree)));
+$baseml = $baseml_p->next_result;
+
+ at trees = $baseml->get_trees;
+ok(@trees, 1);
+ok($trees[0]->score, -129.328757);
+
+# codeml NSSites parsing
+# for branch site model/clade model
+
+my $codeml_bs = new Bio::Tools::Phylo::PAML
+    (-file => Bio::Root::IO->catfile(qw/t data branchSite.mlc/));
+ok($codeml_bs);
+my $result_bs = $codeml_bs->next_result;
+my ($nssite_bs) = $result_bs->get_NSSite_results;
+ok($nssite_bs->num_site_classes,q/4/);
+my $class_bs = $nssite_bs->dnds_site_classes;
+ok($class_bs->{q/p/}->[1],q/0.65968/);
+ok($class_bs->{q/w/}->[1]->{q/background/},q/0.00000/);
+ok($class_bs->{q/w/}->[2]->{q/foreground/},q/999.00000/);
+
+# Let's parse the RST file
+
+my $paml = Bio::Tools::Phylo::PAML->new
+    (-file => Bio::Root::IO->catfile(qw(t data codeml_lysozyme mlc)),
+     -dir  => Bio::Root::IO->catfile(qw(t data codeml_lysozyme)));
+
+$result = $paml->next_result;
+
+my ($rst) = grep {$_->id eq 'node#8'} $result->get_rst_seqs;
+ok($rst);
+ok($rst->seq, join('',qw(
+AAGGTCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGATTGGGACTGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGATGTGTTTGGCCAAATGGGAGAGTGATTATAACACA
+CGAGCTACAAACTACAATCCTGGAGACCAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCACTACTGGTGTAATAATGGCAAAACCCCAGGAGCAGTTAATGCCTGTCATATATCC
+TGCAATGCTTTGCTGCAAGATAACATCGCTGATGCTGTAGCTTGTGCAAAGAGGGTTGTC
+CGTGATCCACAAGGCATTAGAGCATGGGTGGCATGGAGAAATCATTGTCAAAACAGAGAT
+GTCAGTCAGTATGTTCAAGGTTGTGGAGTG)),
+   'node#8 reconstructed seq');
+
+my ($first_tree) = $result->get_rst_trees;
+my ($node) = $first_tree->find_node(-id => '5_Mmu_rhesus');
+my @changes = $node->get_tag_values('changes');
+my ($site) = grep { $_->{'site'} == 94 } @changes;
+ok($site->{'anc_aa'}, 'A','ancestral AA');
+ok($site->{'anc_prob'}, '0.947','ancestral AA prob');
+ok($site->{'derived_aa'}, 'T','derived AA');
+
+($node) = $first_tree->find_node(-id => '12');
+ at changes = $node->get_tag_values('changes');
+($site) = grep { $_->{'site'} == 88 } @changes;
+ok($site->{'anc_aa'}, 'N','ancestral AA');
+ok($site->{'anc_prob'}, '0.993','ancestral AA prob');
+ok($site->{'derived_aa'}, 'D','derived AA');
+ok($site->{'derived_prob'}, '0.998','derived AA prob');
+
+my $persite = $result->get_rst_persite;
+# minus 1 because we have shifted so that array index matches site number
+# there are 130 sites in this seq file
+ok(scalar @$persite -1, $result->patterns->{'-ls'}); 
+# let's score site 1
+$site = $persite->[2]; 
+# so site 2, node 2 (extant)
+ok($site->[2]->{'codon'}, 'GTC');
+ok($site->[2]->{'aa'}, 'V');
+# site 2, node 3
+ok($site->[3]->{'codon'}, 'ATC');
+ok($site->[3]->{'aa'}, 'I');
+
+# ancestral node 9
+ok($site->[9]->{'codon'}, 'GTC');
+ok($site->[9]->{'aa'},    'V');
+ok($site->[9]->{'prob'},  '1.000');
+ok($site->[9]->{'Yang95_aa'},'V');
+ok($site->[9]->{'Yang95_aa_prob'},'1.000');
+
+# ancestral node 10
+ok($site->[10]->{'codon'}, 'ATC');
+ok($site->[10]->{'aa'},    'I');
+ok($site->[10]->{'prob'},  '0.992');
+ok($site->[10]->{'Yang95_aa'},'I');
+ok($site->[10]->{'Yang95_aa_prob'},'0.992');
+
+
+## PAML 3.15
+$paml = Bio::Tools::Phylo::PAML->new(-file => Bio::Root::IO->catfile(qw(t data codeml315.mlc)) );
+$result = $paml->next_result;
+
+ok($result->model, 'One dN/dS ratio');
+ok($result->version, qr'3\.15');
+$MLmat = $result->get_MLmatrix;
+$NGmat = $result->get_NGmatrix;
+
+ok($NGmat->[0]->[1]->{'omega'}, 0.2264);
+ok($NGmat->[0]->[1]->{'dN'}, 0.0186);
+ok($NGmat->[0]->[1]->{'dS'}, 0.0821);
+
+ok($MLmat->[0]->[1]->{'omega'}, 0.32693);
+ok($MLmat->[0]->[1]->{'dN'}, '0.0210');
+ok($MLmat->[0]->[1]->{'dS'}, 0.0644);

Added: trunk/packages/bioperl/branches/upstream/current/t/Perl.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Perl.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Perl.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,142 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Perl.t,v 1.8.6.1 2006/10/16 17:08:15 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $BIODBTESTS);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $error;
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 14;
+	$BIODBTESTS = 5;
+	plan tests => $NUMTESTS;
+	eval { require IO::String };
+	if( $@ ) {
+		print STDERR "IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping some tests.\n";
+		for( 1..$BIODBTESTS ) {
+			skip("IO::String not installed. This means the Bio::DB::* modules are not usable. Skipping some tests",1);
+		}
+		$error = 1;
+	}
+}
+
+END {
+	# clean up after oneself
+	unlink (  'Perltmp' );
+	for ( $Test::ntest..$NUMTESTS ) {
+		skip("Unable to run database access tests",1);
+	}
+}
+
+use Bio::Perl;
+use File::Spec;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($seq_object,$filename, at seq_object_array);
+
+
+
+# will guess file format from extension
+$filename = File::Spec->catfile(qw(t data cysprot1.fa));
+ok ($seq_object = read_sequence($filename)); 
+# forces genbank format
+$filename = File::Spec->catfile(qw(t data AF165282.gb));
+ok  ($seq_object = read_sequence($filename,'genbank')); 
+# reads an array of sequences
+$filename = File::Spec->catfile(qw(t data amino.fa));
+ok (@seq_object_array = read_all_sequences($filename,'fasta'), 2); 
+$filename = 'Perltmp';
+ok write_sequence(">$filename",'genbank',$seq_object);
+ok ($seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA","myname","AL12232"));
+
+my $trans;
+
+ok ($trans = translate($seq_object));
+
+ok ($trans = translate("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA"));
+
+ok ($trans = translate_as_string($seq_object));
+
+ok ($trans = translate_as_string("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA"));
+
+
+# we need to keep tests that depend on net connection at the end
+
+unless ( $error ) {
+    # swissprot
+    eval {
+	ok ($seq_object = get_sequence('swissprot',"ROA1_HUMAN"));
+    };
+    if ($@) {
+	if($DEBUG) {
+	    warn "Warning: Couldn't connect to SWISS-PROT! Do you have network access?\n";
+        }
+	exit 0;
+    }
+
+    # embl
+    eval {
+	ok ($seq_object = get_sequence('embl',"BUM"));
+    };
+    if ($@) {
+	if($DEBUG ) {
+	    warn "Warning: Couldn't connect to EMBL! Do you have network access?\n";
+	}
+        exit 0;
+    }
+
+    # genbank
+    eval {
+	ok ($seq_object = get_sequence('genbank',"AI129902"));
+    };
+    if ($@) {
+	if($DEBUG) {
+	    warn "Warning: Couldn't connect to GenBank! Do you have network access?\n";
+	}
+        exit 0;
+    }
+
+    # refseq
+    eval {
+	ok ($seq_object = get_sequence('genbank',"NM_006732"));
+    };
+    if ($@) {
+	if( $DEBUG ) {
+	    warn "Warning: Couldn't connect to RefSeq! Do you have network access?\n";
+	}
+        exit 0;
+    }
+
+        # genbank
+    eval {
+	ok ($seq_object = get_sequence('genpept',"AAC06201"));
+    };
+    if ($@) {
+	if($DEBUG) {
+	    warn "Warning: Couldn't connect to GenPept! Do you have network access?\n";
+	}
+        exit 0;
+    }
+
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Phenotype.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Phenotype.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Phenotype.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,262 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Phenotype.t,v 1.1 2002/09/25 22:34:21 czmasek Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 109;
+}
+
+use Bio::Phenotype::Phenotype;
+use Bio::Species;
+use Bio::Annotation::Reference;
+use Bio::Map::CytoPosition;
+use Bio::Phenotype::Correlate;
+use Bio::Phenotype::Measure;
+use Bio::Annotation::DBLink;
+
+
+my $obj = Bio::Phenotype::Phenotype->new();
+
+ok( $obj->isa( "Bio::Phenotype::PhenotypeI" ) );
+ok( $obj->isa( "Bio::Phenotype::Phenotype" ) );
+
+ok( $obj->name( "r1" ) );
+ok( $obj->name(), "r1" );
+
+ok( $obj->description( "This is ..." ) );
+ok( $obj->description(), "This is ..." );
+
+my $mouse = Bio::Species->new();
+$mouse->classification( qw( musculus Mus ) );
+ok( $obj->species( $mouse ) );
+ok( $obj->species()->binomial(), "Mus musculus" );
+
+ok( $obj->comment( "putative" ) );
+ok( $obj->comment(), "putative" );
+
+
+
+ok( $obj->each_gene_symbol(), 0 );
+
+ok( $obj->add_gene_symbols( ( "A", "B" ) ) );
+ok( $obj->each_gene_symbol(), 2 );
+my @gs = $obj->each_gene_symbol();
+ok( $gs[ 0 ], "A" );
+ok( $gs[ 1 ], "B" );
+ok( $obj->each_gene_symbol(), 2 );
+
+my @gs2 = $obj->remove_gene_symbols();
+ok( $gs2[ 0 ], "A" );
+ok( $gs2[ 1 ], "B" );
+
+ok( $obj->each_gene_symbol(), 0 );
+ok( $obj->remove_gene_symbols(), 0 );
+
+
+
+my $v1 = Bio::Variation::VariantI->new();
+my $v2 = Bio::Variation::VariantI->new();
+
+$v1->length( "123" );
+
+ok( $obj->each_Variant(), 0 );
+
+ok( $obj->add_Variants( ( $v1, $v2 ) ) );
+ok( $obj->each_Variant(), 2 );
+my @vs = $obj->each_Variant();
+ok( $vs[ 0 ], $v1 );
+ok( $vs[ 1 ], $v2 );
+ok( $vs[ 0 ]->length(), "123" );
+ok( $obj->each_Variant(), 2 );
+
+my @vs2 = $obj->remove_Variants();
+ok( $vs2[ 0 ], $v1 );
+ok( $vs2[ 1 ], $v2 );
+
+ok( $obj->each_Variant(), 0 );
+ok( $obj->remove_Variants(), 0 );
+
+
+
+
+my $r1 = Bio::Annotation::Reference->new();
+my $r2 = Bio::Annotation::Reference->new();
+
+$r1->title( "title" );
+
+ok( $obj->each_Reference(), 0 );
+
+ok( $obj->add_References( ( $r1, $r2 ) ) );
+ok( $obj->each_Reference(), 2 );
+my @rs = $obj->each_Reference();
+ok( $rs[ 0 ], $r1 );
+ok( $rs[ 1 ], $r2 );
+ok( $rs[ 0 ]->title(), "title" );
+ok( $obj->each_Reference(), 2 );
+
+my @rs2 = $obj->remove_References();
+ok( $rs2[ 0 ], $r1 );
+ok( $rs2[ 1 ], $r2 );
+
+ok( $obj->each_Reference(), 0 );
+ok( $obj->remove_References(), 0 );
+
+
+
+
+my $c1 = Bio::Map::CytoPosition->new();
+my $c2 = Bio::Map::CytoPosition->new();
+
+$c1->chr( "12" );
+
+ok( $obj->each_CytoPosition(), 0 );
+
+ok( $obj->add_CytoPositions( ( $c1, $c2 ) ) );
+ok( $obj->each_CytoPosition(), 2 );
+my @cs = $obj->each_CytoPosition();
+ok( $cs[ 0 ], $c1 );
+ok( $cs[ 1 ], $c2 );
+ok( $cs[ 0 ]->chr(), "12" );
+ok( $obj->each_CytoPosition(), 2 );
+
+my @cs2 = $obj->remove_CytoPositions();
+ok( $cs2[ 0 ], $c1 );
+ok( $cs2[ 1 ], $c2 );
+
+ok( $obj->each_CytoPosition(), 0 );
+ok( $obj->remove_CytoPositions(), 0 );
+
+
+
+
+my $co1 = Bio::Phenotype::Correlate->new();
+my $co2 = Bio::Phenotype::Correlate->new();
+
+ok( $co1->name( "name" ) );
+
+ok( $obj->each_Correlate(), 0 );
+
+ok( $obj->add_Correlates( ( $co1, $co2 ) ) );
+ok( $obj->each_Correlate(), 2 );
+my @cos = $obj->each_Correlate();
+ok( $cos[ 0 ], $co1 );
+ok( $cos[ 1 ], $co2 );
+ok( $cos[ 0 ]->name, "name" );
+ok( $obj->each_Correlate(), 2 );
+
+my @cos2 = $obj->remove_Correlates();
+ok( $cos2[ 0 ], $co1 );
+ok( $cos2[ 1 ], $co2 );
+
+ok( $obj->each_Correlate(), 0 );
+ok( $obj->remove_Correlates(), 0 );
+
+
+
+
+my $m1 = Bio::Phenotype::Measure->new();
+my $m2 = Bio::Phenotype::Measure->new();
+
+ok( $m1->description( "desc" ) );
+
+ok( $obj->each_Measure(), 0 );
+
+ok( $obj->add_Measures( ( $m1, $m2 ) ) );
+ok( $obj->each_Measure(), 2 );
+my @ms = $obj->each_Measure();
+ok( $ms[ 0 ], $m1 );
+ok( $ms[ 1 ], $m2 );
+ok( $ms[ 0 ]->description, "desc" );
+ok( $obj->each_Measure(), 2 );
+
+my @ms2 = $obj->remove_Measures();
+ok( $ms2[ 0 ], $m1 );
+ok( $ms2[ 1 ], $m2 );
+
+ok( $obj->each_Measure(), 0 );
+ok( $obj->remove_Measures(), 0 );
+
+
+
+ok( $obj->each_keyword(), 0 );
+
+ok( $obj->add_keywords( ( "A", "B" ) ) );
+ok( $obj->each_keyword(), 2 );
+my @ks = $obj->each_keyword();
+ok( $ks[ 0 ], "A" );
+ok( $ks[ 1 ], "B" );
+ok( $obj->each_keyword(), 2 );
+
+my @ks2 = $obj->remove_keywords();
+ok( $ks2[ 0 ], "A" );
+ok( $ks2[ 1 ], "B" );
+
+ok( $obj->each_keyword(), 0 );
+ok( $obj->remove_keywords(), 0 );
+
+
+
+my $l1 = Bio::Annotation::DBLink->new();
+my $l2 = Bio::Annotation::DBLink->new();
+
+ok( $l1->comment( "comment" ) );
+
+ok( $obj->each_DBLink(), 0 );
+
+ok( $obj->add_DBLinks( ( $l1, $l2 ) ) );
+ok( $obj->each_DBLink(), 2 );
+my @ls = $obj->each_DBLink();
+ok( $ls[ 0 ], $l1 );
+ok( $ls[ 1 ], $l2 );
+ok( $ls[ 0 ]->comment(), "comment" );
+ok( $obj->each_DBLink(), 2 );
+
+my @ls2 = $obj->remove_DBLinks();
+ok( $ls2[ 0 ], $l1 );
+ok( $ls2[ 1 ], $l2 );
+
+ok( $obj->each_DBLink(), 0 );
+ok( $obj->remove_DBLinks(), 0 );
+
+
+
+ok( $obj->each_Genotype(), 0 );
+
+ok( $obj->add_Genotypes( ( "A", "B" ) ) );
+ok( $obj->each_Genotype(), 2 );
+my @gts = $obj->each_Genotype();
+ok( $gts[ 0 ], "A" );
+ok( $gts[ 1 ], "B" );
+ok( $obj->each_Genotype(), 2 );
+
+my @gts2 = $obj->remove_Genotypes();
+ok( $gts2[ 0 ], "A" );
+ok( $gts2[ 1 ], "B" );
+
+ok( $obj->each_Genotype(), 0 );
+ok( $obj->remove_Genotypes(), 0 );
+
+
+
+
+
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/PhylipDist.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PhylipDist.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PhylipDist.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,69 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 19;
+    plan tests => $NTESTS;
+}
+use Bio::Matrix::PhylipDist;
+
+END {
+    for ( $Test::ntest..$NTESTS ) {
+        skip("Error in PhylipDist.pm",1);
+    }
+}
+use Bio::Tools::Phylo::Phylip::ProtDist;
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","phylipdist.out");
+my $parser = Bio::Tools::Phylo::Phylip::ProtDist->new(-program => 'phylipdist',
+						      -file => $inputfilename);
+
+my $phy = $parser->next_matrix;
+ok $phy->program, 'phylipdist';
+ok $phy->get_entry('Alpha','Beta'), '4.23419';
+ok $phy->get_entry('Gamma','Alpha'),'3.63330';
+my @column =  $phy->get_column('Alpha');
+ok $column[0] = '0.00000';
+ok $column[1] = '4.23419';
+ok $column[2] = '3.63330';
+ok $column[3] = '6.20865';
+ok $column[4] = '3.45431';
+
+my @row    = $phy->get_row('Gamma');
+ok $row[0] = '3.63330';
+ok $row[1] = '3.49289';
+ok $row[2] = '0.00000';
+ok $row[3] = '3.68733';
+ok $row[4] = '5.84929';
+
+my @diag   = $phy->get_diagonal;
+
+
+ok $diag[0] = '0.00000';
+ok $diag[1] = '0.00000';
+ok $diag[2] = '0.00000';
+ok $diag[3] = '0.00000';
+ok $diag[4] = '0.00000';
+
+my $matrix =<<END;
+    5
+Alpha          0.00000  4.23419  3.63330  6.20865  3.45431
+Beta           4.23419  0.00000  3.49289  3.36540  4.29179
+Gamma          3.63330  3.49289  0.00000  3.68733  5.84929
+Delta          6.20865  3.36540  3.68733  0.00000  4.43345
+Epsilon        3.45431  4.29179  5.84929  4.43345  0.00000
+END
+;
+ok $phy->print_matrix , $matrix;
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/PhysicalMap.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PhysicalMap.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PhysicalMap.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,223 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: PhysicalMap.t,v 1.2.4.4 2006/12/05 20:54:38 sendu Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG);
+    $DEBUG = $ENV{'BIOPERLDEBUG'};
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 38;
+}
+
+use Bio::Map::Clone;
+use Bio::Map::Contig;
+use Bio::Map::FPCMarker;
+use Bio::Map::OrderedPositionWithDistance;
+
+use Bio::Map::Physical;
+ok 1;
+
+ok my $phm = new Bio::Map::Physical;
+ok $phm->version(2), 2;
+ok $phm->version(), 2;
+ok $phm->modification_user('me'), 'me';
+ok $phm->modification_user(), 'me';
+
+ok $phm->group_type('xx'), 'xx';
+ok $phm->group_type(), 'xx';
+
+ok $phm->group_abbr('xx'), 'xx';
+ok $phm->group_abbr(), 'xx';
+
+ok $phm->core_exists, undef, 'code holds and returns a string, definition requires a boolean';
+
+ok $phm->core_exists(3), 1, 'code holds and returns a string, definition requires a boolean';
+
+ok $phm->core_exists(1), 1;
+ok $phm->core_exists(), 1;
+
+
+use Bio::MapIO::fpc;
+
+my $fpcpath = Bio::Root::IO->catfile('t','data','biofpc.fpc');
+
+my $mapio = new Bio::MapIO(-format => "fpc", -species => 'demo', -readcor => 1, -file => $fpcpath);
+my $fobj = $mapio->next_map();
+
+ok $fobj->group_abbr(), "Chr";
+ok $fobj->core_exists(), 1;
+
+test_clones($fobj);
+test_contigs($fobj);
+test_markers($fobj);
+
+#########################################################
+
+sub test_markers
+{
+    my $nmrk = 0;
+    my $nrem = 0;
+    my %types;
+    my $nanch = 0;
+    my $nfrm = 0;
+    my %grps;
+    my $pos = 0;
+    my $ctgpos = 0;
+
+    my $f = shift;
+    foreach my $mid ($f->each_markerid())
+    {
+        $nmrk++;
+        my $mobj = $f->get_markerobj($mid);
+        if (not defined $mobj)
+        {
+            ok 1, 0;
+            next;
+        }
+        my @remarks = split /\n/, $mobj->remark();
+        $nrem += scalar(@remarks);
+        $types{$mobj->type()} = 1;
+        if ($mobj->anchor())
+        {
+            $nanch++;
+            $grps{$mobj->group()} = 1;
+            $pos += $mobj->global();
+        }
+        if ($mobj->framework())
+        {
+            $nfrm++;
+        }
+        foreach my $ctgid ($f->each_contigid())
+        {
+            $ctgpos += $mobj->position($ctgid);
+        }
+    }
+    ok $nmrk, 15;
+    ok $nrem, 17;
+    ok scalar(keys %types), 2;
+    ok $nanch, 9;
+    ok $nfrm, 7;
+    ok scalar (keys %grps), 4;
+    ok $pos, 36;
+    ok $ctgpos, 1249;
+}
+
+#########################################################
+
+sub test_contigs
+{
+    my $f = shift;
+    my $nchr = 0;
+    my $nuser = 0;
+    my $ntrace = 0;
+    my $nctg = 0;
+    my $ncb = 0;
+    my $psum = 0;
+    my %grps;
+    
+    foreach my $cid ($f->each_contigid())
+    {
+        $nctg++;
+        my $cobj = $f->get_contigobj($cid);
+        if (not defined $cobj)
+        {
+            ok 1, 0;
+            next;
+        }
+        if ($cobj->chr_remark() ne "")
+        {
+            $nchr++;
+        }
+        if ($cobj->user_remark() eq "test")
+        {
+            $nuser++;
+        }
+        if ($cobj->trace_remark() eq "test")
+        {
+            $ntrace++;
+        }
+        if ($cid > 0)
+        {
+            $ncb += ($cobj->range()->end() - $cobj->range()->start() + 1);
+        }
+        if ($cobj->anchor())
+        {
+            $psum += $cobj->position(); 
+            $grps{$cobj->group()} = 1;
+        }
+    }
+    ok $nctg, 11;
+    ok $nchr, 3;
+    ok $nuser, 1;
+    ok $ntrace, 1;
+    ok $ncb, 880; 
+    ok $psum, 15.55;
+    ok scalar(keys %grps), 3;
+}
+
+#########################################################
+
+sub test_clones
+{
+    my $f = shift;
+    my $nclones = 0;
+    my $nbands = 0;
+    my $nrem = 0;
+    my %ctgs;
+    my $nmrkhits = 0;
+    my $nfprem = 0;
+    my %stati;
+    foreach my $cid ($f->each_cloneid())
+    {
+        $nclones++;
+        my $cobj = $f->get_cloneobj($cid);
+        if (not defined $cobj)
+        {
+            ok 1, 0;
+            next;
+        }
+        my $pbands = $cobj->bands();
+        $nbands += scalar(@$pbands);
+        $ctgs{$cobj->contigid()} = 1;
+        if ($cobj->contigid() > 0)
+        {
+            if (not defined $cobj->range()->start() or 
+                not defined $cobj->range()->end() or
+                $cobj->range()->end() < $cobj->range()->start())
+            {
+                ok 1, 0;
+            }
+        }
+        foreach my $mid ($cobj->each_markerid())
+        {
+            $nmrkhits++;
+        }
+        my @remarks;
+        if ($cobj->remark) {
+            @remarks = split /\n/, $cobj->remark();
+            $nrem += scalar(@remarks);
+        }
+        if ($cobj->fpc_remark) {
+            @remarks = split /\n/, $cobj->fpc_remark();
+            $nfprem += scalar(@remarks);
+        }
+        $stati{$cobj->sequence_status()} = 1 if $cobj->sequence_status;
+    }
+    ok $nclones, 355;
+    ok $nbands, 9772;
+    ok scalar(keys %ctgs), 11;
+    ok $nmrkhits, 46;
+    ok $nrem, 12;
+    ok $nfprem, 162;
+    ok scalar(keys %stati), 5;
+}
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/Pictogram.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Pictogram.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Pictogram.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,75 @@
+# -*-Perl-*-
+# $Id: Pictogram.t,v 1.6 2005/09/17 02:11:21 bosborne Exp $
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+use vars qw($NTESTS $SVG_AVAIL);
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    $NTESTS = 3;
+    plan tests => $NTESTS;
+    eval {
+		 require Bio::Graphics::Pictogram;
+		 require SVG;
+	 };
+    $SVG_AVAIL = $@ ? 0 : 1;
+}
+
+END {
+	for ( $Test::ntest..$NTESTS ) {
+		skip("Cannot complete Pictogram tests. Skipping. ",1);
+	}
+}
+
+if(!$SVG_AVAIL){
+	warn("SVG not installed, skipping tests");
+	exit;
+}
+
+use Bio::SeqIO;
+use Bio::Matrix::PSM::IO;
+
+my $file =  Bio::Root::IO->catfile("t","data","pictogram.fa");
+my $sio = Bio::SeqIO->new(-file=>$file,-format=>'fasta');
+my @seq;
+while(my $seq = $sio->next_seq){
+  push @seq, $seq;
+}
+my $picto = Bio::Graphics::Pictogram->new(-width=>"800",
+                                          -fontsize=>"80",
+                                          -plot_bits=>1,
+                                          -color=>{'A'=>'red',
+                                                   'G'=>'blue',
+                                                   'C'=>'green',
+                                                   'T'=>'magenta'});
+ok $picto->isa("Bio::Graphics::Pictogram");
+
+my $svg = $picto->make_svg(\@seq);
+ok $svg->xmlify;
+
+my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', 
+                                   -file=> Bio::Root::IO->catfile(qw(t data meme.dat)));
+$picto = Bio::Graphics::Pictogram->new(-width=>"800",
+                                          -normalize=>1,
+                                          -fontsize=>"80",
+                                          -plot_bits=>1,
+                                          -color=>{'A'=>'red',
+                                                   'G'=>'blue',
+                                                   'C'=>'green',
+                                                   'T'=>'magenta'});
+
+my $psm = $psmIO->next_psm;
+$svg = $picto->make_svg($psm);
+ok $svg->xmlify;
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/PopGen.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PopGen.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PopGen.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,463 @@
+# -*-Perl-*- mode for emacs
+# $Id: PopGen.t,v 1.27 2006/06/08 02:11:21 jason Exp $
+
+# This will outline many tests for the population genetics
+# objects in the Bio::PopGen namespace
+
+my $error;
+
+use vars qw($SKIPXML $LASTXMLTEST); 
+use strict;
+use lib '.';
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use vars qw($NTESTS);
+    $NTESTS = 89;
+    $error = 0;
+
+    use Test;
+    plan tests => $NTESTS; 
+
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+
+use Bio::PopGen::Individual;
+use Bio::PopGen::Genotype;
+use Bio::PopGen::Population;
+use Bio::PopGen::IO;
+use Bio::PopGen::PopStats;
+use Bio::AlignIO;
+use Bio::PopGen::Statistics;
+use Bio::PopGen::Utilities;
+
+
+# test Fu and Li's D using data from the paper
+
+ok(sprintf("%.3f",
+	   Bio::PopGen::Statistics->fu_and_li_D_counts(24, 18, 9)),
+   -1.529);
+
+ok(sprintf("%.3f",
+	   Bio::PopGen::Statistics->fu_and_li_D_star_counts(24, 18, 10)),
+   -1.558);
+
+ok(sprintf("%.3f",
+	   Bio::PopGen::Statistics->fu_and_li_F_counts(24, 3.16, 18, 9)),
+   -1.735);
+
+ok(sprintf("%.2f",
+	   Bio::PopGen::Statistics->fu_and_li_F_star_counts(24, 3.16, 18, 10)),
+   -1.71);
+
+my ($FILE1) = qw(popgentst1.out);
+
+END { 
+    # unlink($FILE1);
+}
+my @individuals = ( new Bio::PopGen::Individual(-unique_id => '10a'));
+ok($individuals[0]);
+
+my @genotypes = ( new Bio::PopGen::Genotype(-marker_name    => 'Mkr1',
+					    -individual_id  => '10a',
+					    -alleles => [ qw(A a)]),
+		  new Bio::PopGen::Genotype(-marker_name    => 'Mkr2',
+					    -individual_id  => '10a',
+					    -alleles => [ qw(B B)]),
+		  new Bio::PopGen::Genotype(-marker_name    => 'Mkr3',
+					    -individual_id  => '10a',
+					    -alleles => [ qw(A a)]));
+ok(($genotypes[1]->get_Alleles)[0], 'B');
+
+$individuals[0]->add_Genotype(@genotypes);
+ok($individuals[0]->get_Genotypes,3);
+ok($individuals[0]->get_Genotypes(-marker => 'Mkr3')->get_Alleles(),2);
+my @alleles = $individuals[0]->get_Genotypes(-marker => 'Mkr2')->get_Alleles();
+ok($alleles[0], 'B');
+
+					     
+my $population = new Bio::PopGen::Population(-name        => 'TestPop1',
+					     -source      => 'testjasondata',
+					     -description => 'throw away example',
+					     -individuals => \@individuals);
+
+ok(scalar ($population->get_Individuals()), 1);
+ok($population->name, 'TestPop1');
+ok($population->source, 'testjasondata');
+ok($population->description, 'throw away example');
+
+my @genotypes2 = ( new Bio::PopGen::Genotype(-marker_name   => 'Mkr1',
+					     -individual_id => '11',
+					     -alleles       => [ qw(A A)]),
+		   new Bio::PopGen::Genotype(-marker_name   => 'Mkr2',
+					     -individual_id => '11',
+					     -alleles       => [ qw(B B)]),
+		   new Bio::PopGen::Genotype(-marker_name   => 'Mkr3',
+					     -individual_id => '11',
+					     -alleles       => [ qw(a a)]),
+		   new Bio::PopGen::Genotype(-marker_name   => 'Mkr4',
+					     -individual_id => '11',
+					     -alleles       => [ qw(C C)])
+		   );
+push @individuals, new Bio::PopGen::Individual(-genotypes   => \@genotypes2,
+					       -unique_id   => '11');
+$population->add_Individual($individuals[1]);
+
+ok(scalar ($population->get_Individuals()), 2);
+my ($found_ind) = $population->get_Individuals(-unique_id => '10a');
+ok($found_ind->unique_id, '10a');
+ok(scalar($population->get_Individuals(-marker => 'Mkr4')) , 1);
+ok(scalar($population->get_Individuals(-marker => 'Mkr3')) , 2);
+
+my @g = $population->get_Genotypes(-marker => 'Mkr4');
+
+ok($g[0]->individual_id, '11');
+ok(($g[0]->get_Alleles())[0], 'C');
+
+my $marker = $population->get_Marker('Mkr3');
+ok($marker);
+
+ at alleles = $marker->get_Alleles;
+ok(@alleles,2);
+my %af = $marker->get_Allele_Frequencies();
+ok($af{'a'}, 0.75);
+ok($af{'A'}, 0.25);
+
+
+# Read in data from a file
+my $io = new Bio::PopGen::IO(-format => 'csv',
+			     -file   => Bio::Root::IO->catfile(qw(t data
+								  popgen_saureus.dat)));
+
+my @inds;
+while( my $ind = $io->next_individual ) {
+    push @inds, $ind;
+}
+
+my @mrsainds = grep { $_->unique_id =~ /^MRSA/ } @inds;
+my @mssainds = grep { $_->unique_id =~ /^MSSA/ } @inds;
+my @envinds = grep { $_->unique_id =~ /^NC/ } @inds;
+
+ok(scalar @mrsainds, 9);
+ok(scalar @mssainds, 10);
+ok(scalar @envinds, 5);
+
+my $mrsapop = new Bio::PopGen::Population(-name        => 'MRSA',
+					  -description => 'Resistant S.aureus',
+					  -individuals => \@mrsainds);
+
+my $mssapop = new Bio::PopGen::Population(-name        => 'MSSA',
+					  -description =>'Suceptible S.aureus',
+					  -individuals => \@mssainds);
+
+my $envpop = new Bio::PopGen::Population(-name        => 'NC',
+					 -description => 'WT isolates',
+					  -individuals => \@envinds);
+
+my $stats = new Bio::PopGen::PopStats(-haploid => 1);
+my $fst = $stats->Fst([$mrsapop,$mssapop],[qw(AFLP1)]);
+# We're going to check the values against other programs first
+ok(sprintf("%.3f",$fst),0.077,'mrsa,mssa aflp1'); 
+  
+$fst = $stats->Fst([$envpop,$mssapop,$mrsapop],[qw(AFLP1 )]);
+ok(sprintf("%.3f",$fst),0.035,'all pops, aflp1'); 
+
+$fst = $stats->Fst([$mrsapop,$envpop],[qw(AFLP1 AFLP2)]);
+ok(sprintf("%.3f",$fst),0.046,'mrsa,envpop aflp1,aflp2');
+
+# Read in data from a file
+$io = new Bio::PopGen::IO(-format => 'csv',
+			  -file   => Bio::Root::IO->catfile
+			  (qw(t data popgen_saureus.multidat)));
+
+ at inds = ();
+while( my $ind = $io->next_individual ) {
+    push @inds, $ind;
+}
+
+ at mrsainds = grep { $_->unique_id =~ /^MRSA/ } @inds;
+ at mssainds = grep { $_->unique_id =~ /^MSSA/ } @inds;
+ at envinds = grep { $_->unique_id =~ /^NC/ } @inds;
+
+ok(scalar @mrsainds, 7);
+ok(scalar @mssainds, 10);
+ok(scalar @envinds, 5);
+
+$mrsapop = new Bio::PopGen::Population(-name        => 'MRSA',
+				       -description => 'Resistant S.aureus',
+				       -individuals => \@mrsainds);
+
+$mssapop = new Bio::PopGen::Population(-name        => 'MSSA',
+				       -description =>'Suceptible S.aureus',
+				       -individuals => \@mssainds);
+
+$envpop = new Bio::PopGen::Population(-name        => 'NC',
+				      -description => 'WT isolates',
+				      -individuals => \@envinds);
+
+$stats = new Bio::PopGen::PopStats(-haploid => 1);
+my @all_bands = map { 'B' . $_ } 1..20;
+my @mkr1     = map { 'B' . $_ } 1..13;
+my @mkr2     = map { 'B' . $_ } 14..20;
+
+# still wrong ?
+$fst = $stats->Fst([$mrsapop,$mssapop],[@all_bands ]);
+skip(sprintf("%.3f",$fst),'-0.001','mssa,mrsa all_bands'); # We're going to check the values against other programs first
+$fst = $stats->Fst([$envpop,$mssapop],[ @mkr1 ]);
+ok(sprintf("%.3f",$fst),0.023,'env,mssa mkr1'); # We're going to check the values against other programs first
+
+$fst = $stats->Fst([$envpop,$mssapop,$mrsapop],[ @all_bands ]);
+ok(sprintf("%.3f",$fst),0.071,'env,mssa,mrsa all bands'); # We're going to check the values against other programs first
+
+$fst = $stats->Fst([$envpop,$mssapop,$mrsapop],[ @mkr2 ]);
+ok(sprintf("%.3f",$fst),0.076, 'env,mssa,mrsa mkr2'); # We're going to check the values against other programs first
+
+$fst = $stats->Fst([$mrsapop,$envpop],[@all_bands ]);
+ok(sprintf("%.3f",$fst),0.241,'mrsa,nc all_bands'); # We're going to check the values against other programs first
+
+# test overall allele freq setting for a population
+
+my $poptst1 = new Bio::PopGen::Population(-name => 'tst1');
+my $poptst2 = new Bio::PopGen::Population(-name => 'tst2');
+
+$poptst1->set_Allele_Frequency(-frequencies => 
+			       { 'marker1' => { 'a' => '0.20',
+						'A' => '0.80' },
+				 'marker2' => { 'A' => '0.10',
+						'B' => '0.20',
+						'C' => '0.70' }
+			     });
+
+my $mk1 = $poptst1->get_Marker('marker1');
+my %f1 = $mk1->get_Allele_Frequencies;
+ok($f1{'a'}, '0.20');
+ok($f1{'A'}, '0.80');
+my $mk2 = $poptst1->get_Marker('marker2');
+my %f2 = $mk2->get_Allele_Frequencies;
+ok($f2{'C'}, '0.70');
+
+$poptst2->set_Allele_Frequency(-name      => 'marker1',
+			       -allele    => 'A',
+			       -frequency => '0.60');
+$poptst2->set_Allele_Frequency(-name      => 'marker1',
+			       -allele    => 'a',
+			       -frequency => '0.40');
+
+#$fst = $stats->Fst([$poptst1,$poptst2],[qw(marker1 marker2) ]);
+skip('Fst not calculated yet for just allele freqs',1,'marker1 test'); # 
+
+$io = new Bio::PopGen::IO(-format => 'csv',
+			  -file   => ">$FILE1");
+
+$io->write_individual(@inds);
+$io->close();
+ok( -e $FILE1);
+unlink($FILE1);
+$io = new Bio::PopGen::IO(-format => 'csv',
+			  -file   => ">$FILE1");
+
+$io->write_population(($mssapop,$mrsapop));
+$io->close();
+ok( -e $FILE1);
+unlink($FILE1);
+
+$io = new Bio::PopGen::IO(-format => 'prettybase',
+			  -file   => ">$FILE1");
+
+$io->write_individual(@inds);
+$io->close();
+ok( -e $FILE1);
+unlink($FILE1);
+
+$io = new Bio::PopGen::IO(-format => 'prettybase',
+			  -file   => ">$FILE1");
+
+$io->write_population(($mssapop,$mrsapop));
+$io->close();
+ok( -e $FILE1);
+unlink($FILE1);
+
+
+# Let's do PopGen::Statistics tests here
+
+$io = new Bio::PopGen::IO(-format          => 'prettybase',
+			  -no_header       => 1,
+			  -file            => Bio::Root::IO->catfile
+			  (qw(t data popstats.prettybase )));
+my (@ingroup, at outgroup);
+my $sitecount;
+while( my $ind = $io->next_individual ) {
+    if($ind->unique_id =~ /out/) {
+	push @outgroup, $ind;
+    } else { 
+	push @ingroup, $ind;
+	$sitecount = scalar $ind->get_marker_names() unless defined $sitecount;
+    }
+}
+$stats = new Bio::PopGen::Statistics();
+
+# Real data and values courtesy M.Hahn and DNASP
+
+ok($stats->pi(\@ingroup),2);
+ok(Bio::PopGen::Statistics->pi(\@ingroup,$sitecount),0.4);
+
+ok(Bio::PopGen::Statistics->theta(\@ingroup),1.92);
+ok(Bio::PopGen::Statistics->theta(\@ingroup,$sitecount),0.384);
+
+# Test with a population object
+my $ingroup  = new Bio::PopGen::Population(-individuals => \@ingroup);
+my $outgroup = new Bio::PopGen::Population(-individuals => \@outgroup);
+
+ok($stats->pi($ingroup),2);
+ok(Bio::PopGen::Statistics->pi($ingroup,$sitecount),0.4);
+
+ok(Bio::PopGen::Statistics->theta($ingroup),1.92);
+ok(Bio::PopGen::Statistics->theta($ingroup,$sitecount),0.384);
+
+my $haploidpop = $ingroup->haploid_population;
+ok(sprintf("%.5f",Bio::PopGen::Statistics->tajima_D($haploidpop)), 0.27345);
+
+# to fix
+ok(sprintf("%.5f",Bio::PopGen::Statistics->tajima_D(\@ingroup)),0.27345);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->tajima_D($ingroup)),0.27345);
+
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D_star(\@ingroup)),
+   0.27345);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D_star($ingroup)),
+   0.27345);
+
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F_star(\@ingroup)),
+     0.27834);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F_star($ingroup)),
+   0.27834);
+
+ok((Bio::PopGen::Statistics->derived_mutations(\@ingroup,\@outgroup))[0], 1);
+ok((Bio::PopGen::Statistics->derived_mutations($ingroup,\@outgroup))[0], 1);
+ok((Bio::PopGen::Statistics->derived_mutations(\@ingroup,$outgroup))[0], 1);
+ok((Bio::PopGen::Statistics->derived_mutations($ingroup,$outgroup))[0], 1);
+
+# expect to have 1 external mutation
+ at ingroup = $haploidpop->get_Individuals;
+
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D(\@ingroup,1)),0.75653);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D($ingroup,1)),0.75653);
+
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D(\@ingroup,
+						       \@outgroup)),0.75653);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D($ingroup,
+						       \@outgroup)),0.75653);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D($ingroup,
+						       $outgroup)),0.75653);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_D(\@ingroup,
+						       $outgroup)),0.75653);
+
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F(\@ingroup,1)),
+     0.77499);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F($haploidpop,1)),0.77499);
+ok(sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F($ingroup,
+						       \@outgroup)),0.77499);
+ok( sprintf("%.5f",Bio::PopGen::Statistics->fu_and_li_F($ingroup,
+							 $outgroup)),0.77499);
+
+
+# Test composite LD
+
+$io = new Bio::PopGen::IO(-format => 'prettybase',
+			  -file   => Bio::Root::IO->catfile
+			  (qw(t data compLD_test.prettybase)));
+
+my $pop = $io->next_population;
+
+my %LD = $stats->composite_LD($pop);
+
+ok($LD{'01'}->{'02'}->[1], 10);
+ok($LD{'01'}->{'03'}->[1], 0);
+ok($LD{'02'}->{'03'}->[1], 0);
+
+# Test composite LD
+
+$io = new Bio::PopGen::IO(-format => 'prettybase',
+			  -file   => Bio::Root::IO->catfile
+			  (qw(t data compLD_missingtest.prettybase)));
+
+$pop = $io->next_population;
+
+%LD = $stats->composite_LD($pop);
+
+ok(sprintf("%.4f",$LD{'ProC9198EA'}->{'ProcR2973EA'}->[0]), -0.0375);
+ok(sprintf("%.2f",$LD{'ProC9198EA'}->{'ProcR2973EA'}->[1]), 2.56);
+
+
+
+# build a population from an alignment
+
+my $alnin = Bio::AlignIO->new(-format => 'clustalw',
+			      -file   => Bio::Root::IO->catfile(qw(t data T7.aln)));
+my $aln = $alnin->next_aln;
+$population = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln);
+ok($population->get_number_individuals,9);
+#warn($aln->match_line,"\n");
+my $matchline = $aln->match_line;
+ok( $population->get_marker_names, $matchline =~ tr/ //);
+for my $name ( $population->get_marker_names ) {
+    my $marker = $population->get_Marker($name); 
+#    warn("$name ",join(" ",$marker->get_Alleles()),"\n");
+}
+
+
+# test Rich's phase and hap parsers
+
+$io = new Bio::PopGen::IO(-format   => 'hapmap',
+			  -verbose  => 1,
+			  -no_header=> 1,
+			  -starting_column => 10,
+			  -file     => Bio::Root::IO->catfile(qw(t data
+								example.hap)));
+
+# Some IO might support reading in a population at a time
+
+my @population;
+while( my $ind = $io->next_individual ) {
+    push @population, $ind;
+}
+ok(@population, 90);
+ok($population[3]->unique_id, 'NA06994');
+ok($population[3]->get_Genotypes, 34);
+$population = Bio::PopGen::Population->new(-individuals => \@population);
+
+ok(sprintf("%.3f",$stats->pi($population)),12.335);
+# if forced haploid population is called within pi
+# need to decide about that...
+# ok(sprintf("%.3f",$stats->pi($population)),12.266);
+
+ok(sprintf("%.3f",$stats->theta($population)),5.548);
+skip(1,'tjd inconsistency, need to recalculate');
+skip(1,'tjd inconsistency, need to recalculate');
+#ok(sprintf("%.3f",$stats->tajima_D($population)),2.926);
+#ok(sprintf("%.3f",$stats->tajima_D($population->haploid_population)),3.468);
+
+$io = new Bio::PopGen::IO(-format => 'phase',
+			  -file   => Bio::Root::IO->catfile(qw(t data
+							       example.phase)));
+
+# Some IO might support reading in a population at a time
+
+ at population = ();
+while( my $ind = $io->next_individual ) {
+    push @population, $ind;
+}
+ok(@population, 4);
+
+
+# test diploid data
+

Added: trunk/packages/bioperl/branches/upstream/current/t/PopGenSims.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PopGenSims.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PopGenSims.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+# -*-Perl-*- mode for emacs
+# $Id: PopGenSims.t,v 1.3 2003/10/25 14:52:22 heikki Exp $
+
+# This will outline tests for the population genetics simulation
+# in the Bio::PopGen::Simulation namespace
+# Coalescent has its own tests though in t/Coalescent.t
+
+my $error;
+
+use vars qw($SKIPXML $LASTXMLTEST); 
+use strict;
+use lib '.';
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use vars qw($NTESTS);
+    $NTESTS = 22;
+    $error = 0;
+
+    use Test;
+    plan tests => $NTESTS; 
+
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+
+use Bio::PopGen::Simulation::GeneticDrift;
+
+my $sim = new Bio::PopGen::Simulation::GeneticDrift(-popsize => 40,
+						    -alleles => {A => 0.2,
+								 B => 0.8});
+
+my (@Afreqs, at Bfreqs);
+for(my $i =0 ;$i < 10; $i++ ) {
+    my %f = $sim->next_generation;
+    push @Afreqs, $f{'A'};
+    push @Bfreqs, $f{'B'};
+    ok(($f{'A'}||0) + ($f{'B'}||0), 1, 'Allele freqs should sum to 1');
+}
+
+ok(@Afreqs, 10);
+ok(($Afreqs[9]||0) <= 1, 1, 'All frequencies should be <= 1');
+
+$sim = new Bio::PopGen::Simulation::GeneticDrift(-popsize => 50,
+						 -alleles => {A => 0.2,
+							      B => 0.3,
+							      C => 0.5,
+							  });
+
+for(my $i =0 ;$i < 10; $i++ ) {
+    my %f = $sim->next_generation;
+    ok(($f{'A'}||0) + ($f{'B'}||0) + ($f{'C'}||0), 1, 'Allele freqs should sum to 1');
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/PrimarySeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/PrimarySeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/PrimarySeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,187 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: PrimarySeq.t,v 1.23 2006/01/23 13:29:18 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl t/test.t'
+
+use strict;
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 48;
+}
+use Bio::PrimarySeq;
+use Bio::Location::Simple;
+use Bio::Location::Fuzzy;
+use Bio::Location::Split;
+
+ok(1);
+
+my $seq = Bio::PrimarySeq->new(
+					 '-seq'              => 'TTGGTGGCGTCAACT',
+			       '-display_id'       => 'new-id',
+			       '-alphabet'         => 'dna',
+			       '-accession_number' => 'X677667',
+			       '-desc'             => 'Sample Bio::Seq object');
+ok defined $seq;
+ok $seq->isa('Bio::PrimarySeqI');
+ok $seq->accession_number(), 'X677667';
+ok $seq->seq(), 'TTGGTGGCGTCAACT';
+ok $seq->display_id(), 'new-id';
+ok $seq->alphabet(), 'dna';
+ok $seq->is_circular(), undef;
+ok $seq->is_circular(1);
+ok $seq->is_circular(0), 0;
+
+# check IdentifiableI and DescribableI interfaces
+ok $seq->isa('Bio::IdentifiableI');
+ok $seq->isa('Bio::DescribableI');
+# make sure all methods are implemented
+ok $seq->authority("bioperl.org"), "bioperl.org";
+ok $seq->namespace("t"), "t";
+ok $seq->version(0), 0;
+ok $seq->lsid_string(), "bioperl.org:t:X677667";
+ok $seq->namespace_string(), "t:X677667.0";
+ok $seq->description(), 'Sample Bio::Seq object';
+ok $seq->display_name(), "new-id";
+
+my $location = new Bio::Location::Simple('-start' => 2, 
+													  '-end' => 5,
+													  '-strand' => -1);
+ok ($seq->subseq($location), 'ACCA');
+
+my $splitlocation = new Bio::Location::Split();
+$splitlocation->add_sub_Location( new Bio::Location::Simple(
+								 '-start' => 1,
+							    '-end'   => 4,
+							    '-strand' => 1));
+
+$splitlocation->add_sub_Location( new Bio::Location::Simple(
+                         '-start' => 7,
+							    '-end'   => 12,
+							    '-strand' => -1));
+
+ok( $seq->subseq($splitlocation), 'TTGGTGACGC');
+
+my $fuzzy = new Bio::Location::Fuzzy(-start => '<3',
+												 -end   => '8',
+												 -strand => 1);
+
+ok( $seq->subseq($fuzzy), 'GGTGGC');
+
+my $trunc = $seq->trunc(1,4);
+ok defined $trunc;
+ok $trunc->seq(), 'TTGG', "Expecting TTGG. Got ".$trunc->seq();
+
+$trunc = $seq->trunc($splitlocation);
+ok( defined $trunc);
+ok( $trunc->seq(), 'TTGGTGACGC');
+
+$trunc = $seq->trunc($fuzzy);
+ok( defined $trunc);
+ok( $trunc->seq(), 'GGTGGC');
+
+my $rev = $seq->revcom();
+ok defined $rev; 
+
+ok $rev->seq(), 'AGTTGACGCCACCAA', 'revcom() failed, was ' . $rev->seq();
+
+#
+# Translate
+#
+
+my $aa = $seq->translate(); # TTG GTG GCG TCA ACT
+ok $aa->seq, 'LVAST', "Translation: ". $aa->seq;
+
+# tests for non-standard initiator codon coding for
+# M by making translate() look for an initiator codon and
+# terminator codon ("complete", the 5th argument below)
+$seq->seq('TTGGTGGCGTCAACTTAA'); # TTG GTG GCG TCA ACT TAA
+$aa = $seq->translate(undef, undef, undef, undef, 1);
+ok $aa->seq, 'MVAST', "Translation: ". $aa->seq;
+
+# same test as previous, but using named parameter
+$aa = $seq->translate(-complete => 1);
+ok $aa->seq, 'MVAST', "Translation: ". $aa->seq;
+
+# find ORF, ignore codons outside the ORF or CDS
+$seq->seq('TTTTATGGTGGCGTCAACTTAATTT'); # ATG GTG GCG TCA ACT
+$aa = $seq->translate(-orf => 1);
+ok $aa->seq, 'MVAST*', "Translation: ". $aa->seq;
+
+# smallest possible ORF
+$seq->seq("ggggggatgtagcccc"); # atg tga
+$aa = $seq->translate(-orf => 1);
+ok $aa->seq, 'M*', "Translation: ". $aa->seq;
+
+# same as previous but complete, so * is removed
+$aa = $seq->translate(-orf => 1,
+                      -complete => 1);
+ok $aa->seq, 'M', "Translation: ". $aa->seq;
+
+# ORF without termination codon
+# should warn, let's change it into throw for testing
+$seq->verbose(2);
+$seq->seq("ggggggatgtggcccc"); # atg tgg ccc
+
+eval {$aa = $seq->translate(-orf => 1);};
+if ($@) {
+    ok  1 if $@ =~ /atgtggcccc\n/;
+    #ok $aa->seq, 'MWP', "Translation: ". $aa->seq; 
+}
+$seq->verbose(0);
+
+# use non-standard codon table where terminator is read as Q
+$seq->seq('ATGGTGGCGTCAACTTAG'); # ATG GTG GCG TCA ACT TAG
+$aa = $seq->translate(-codontable_id => 6);
+ok $aa->seq, 'MVASTQ', "Translation: ". $aa->seq;
+
+# insert an odd character instead of terminating with *
+$aa = $seq->translate(-terminator => 'X');
+ok $aa->seq, 'MVASTX', "Translation: ". $aa->seq;
+
+# change frame from default
+$aa = $seq->translate(-frame => 1); # TGG TGG CGT CAA CTT AG
+ok $aa->seq, 'WWRQL', "Translation: ". $aa->seq;
+
+# TTG is initiator in Standard codon table? Afraid so.
+$seq->seq("ggggggttgtagcccc"); # ttg tag
+$aa = $seq->translate(-orf => 1);
+ok $aa->seq, 'L*', "Translation: ". $aa->seq;
+
+# Replace L at 1st position with M by setting complete to 1 
+$seq->seq("ggggggttgtagcccc"); # ttg tag
+$aa = $seq->translate(-orf => 1,
+							 -complete => 1);
+ok $aa->seq, 'M', "Translation: ". $aa->seq;
+
+# Ignore non-ATG initiators (e.g. TTG) in codon table
+$seq->seq("ggggggttgatgtagcccc"); # atg tag
+$aa = $seq->translate(-orf => 1,
+							 -start => "atg",
+							 -complete => 1);
+ok $aa->seq, 'M', "Translation: ". $aa->seq;
+
+
+
+# test for character '?' in the sequence string
+ok $seq->seq('TTGGTGGCG?CAACT'), 'TTGGTGGCG?CAACT';
+
+# test for some aliases
+$seq = Bio::PrimarySeq->new(-id          => 'aliasid',
+									 -description => 'Alias desc');
+ok($seq->description, 'Alias desc');
+ok($seq->display_id, 'aliasid');
+
+# test that x's are ignored and n's are assumed to be 'dna'
+$seq->seq('atgxxxxxx');
+ok($seq->alphabet,'dna');
+$seq->seq('atgnnnnnn');
+ok($seq->alphabet,'dna');

Added: trunk/packages/bioperl/branches/upstream/current/t/Primer.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Primer.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Primer.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+## $Id: Primer.t,v 1.2.6.1 2006/11/30 09:24:00 sendu Exp $
+
+# test for Bio::SeqFeature::Primer
+# written by Rob Edwards
+
+use strict;
+use constant NUMTESTS => 18;
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    plan tests => NUMTESTS;
+}
+
+use Bio::SeqFeature::Primer;
+ok(1);
+
+my ($primer, $location, $start, $end, $strand, $id, $tm, $tme);
+
+ok $primer=Bio::SeqFeature::Primer->new(-seq=>'CTTTTCATTCTGACTGCAACG');
+ok $primer->seq->seq eq "CTTTTCATTCTGACTGCAACG";
+ok $primer->primary_tag eq "Primer";
+ok $location=$primer->location(500);
+ok $location==500;
+ok $start=$primer->start(2);
+ok $start == 2;
+ok $end=$primer->end(19);
+ok $end == 19;
+ok $strand=$primer->strand(-1);
+ok $strand == -1;
+ok $id=$primer->display_id('test');
+ok $id eq "test";
+ok $tm = $primer->Tm;
+ok $tme = $primer->Tm_estimate;
+ok int($tm) == 52;
+ok int($tme) == 58;

Added: trunk/packages/bioperl/branches/upstream/current/t/Promoterwise.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Promoterwise.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Promoterwise.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+#!/usr/local/bin/perl
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 6;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::Promoterwise;
+use Bio::Root::IO;
+use Bio::Seq;
+
+END {
+    for ( $Test::ntest..$NTESTS ) {
+        skip("promoterwise parser not working properly. Skipping.",1);
+    }
+}
+
+my $file = Bio::Root::IO->catfile(qw(t data promoterwise.out));
+my  $parser = Bio::Tools::Promoterwise->new(-file=>$file);
+ok $parser->isa('Bio::Tools::Promoterwise');
+my @fp;
+while (my $fp = $parser->next_result){
+  push @fp,$fp;
+}
+my $first = $fp[0]->feature1;
+my $second = $fp[0]->feature2;
+
+my @sub = $first->sub_SeqFeature;
+my @sub2 = $second->sub_SeqFeature;
+
+ok $sub[0]->start,4;
+ok $sub2[0]->start,29;
+ok $sub[0]->end,18;
+ok $sub2[0]->end,43;
+ok $sub[0]->score,1596.49
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/ProtDist.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ProtDist.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ProtDist.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 46;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::Phylo::Phylip::ProtDist;
+
+END {
+    for ( $Test::ntest..$NTESTS ) {
+        skip("Error in ProtDist.pm",1);
+    }
+}
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","phylipdist.out");
+my $tool= Bio::Tools::Phylo::Phylip::ProtDist->new(-file => $inputfilename);
+my $phy = $tool->next_matrix;
+ok(@{$phy->names}, 5);
+ok $phy->get_entry('Alpha','Beta'), '4.23419';
+ok $phy->get_entry('Gamma','Alpha'),'3.63330';
+my @column =  $phy->get_column('Alpha');
+ok $column[0] = '0.00000';
+ok $column[1] = '4.23419';
+ok $column[2] = '3.63330';
+ok $column[3] = '6.20865';
+ok $column[4] = '3.45431';
+
+my @row    = $phy->get_row('Gamma');
+ok $row[0] = '3.63330';
+ok $row[1] = '3.49289';
+ok $row[2] = '0.00000';
+ok $row[3] = '3.68733';
+ok $row[4] = '5.84929';
+
+my @diag   = $phy->get_diagonal;
+
+
+ok $diag[0] = '0.00000';
+ok $diag[1] = '0.00000';
+ok $diag[2] = '0.00000';
+ok $diag[3] = '0.00000';
+ok $diag[4] = '0.00000';
+
+my $matrix =<<END;
+    5
+Alpha          0.00000  4.23419  3.63330  6.20865  3.45431
+Beta           4.23419  0.00000  3.49289  3.36540  4.29179
+Gamma          3.63330  3.49289  0.00000  3.68733  5.84929
+Delta          6.20865  3.36540  3.68733  0.00000  4.43345
+Epsilon        3.45431  4.29179  5.84929  4.43345  0.00000
+END
+;
+ok $phy->print_matrix , $matrix;
+
+# now parse Phylip 3.6 output
+
+$inputfilename= Bio::Root::IO->catfile("t","data","phylipdist-36.out");
+$tool= Bio::Tools::Phylo::Phylip::ProtDist->new(-file => $inputfilename);
+$phy = $tool->next_matrix;
+
+ok(@{$phy->names}, 39);
+ok $phy->get_entry('CBG01299','CBG00435'), '4.7793';
+ok $phy->get_entry('CBG22788','CBG22521'),'5.3195';
+ok $phy->get_entry('CBG01466', 'CBG01473'), '3.3944';
+
+ at row = $phy->get_row('CBG01473');
+ok(scalar @row, 39);
+ at column =  $phy->get_column('CBG01300');
+ok $column[0] = '0.0817';
+ok $column[1] = '0.0000';
+ok $column[2] = '0.0950';
+ok $column[3] = '0.3111';
+ok $column[37] = '4.7190';
+ok $column[38] = '4.7592';
+
+ at row    = $phy->get_row('CBG17433');
+ok $row[0] = '4.8451';
+ok $row[1] = '4.5982';
+ok $row[2] = '4.0620';
+ok $row[3] = '5.9673';
+ok $row[4] = '4.6224';
+ok $row[5] = '5.1993';
+ok $row[6] = '5.4427';
+ok $row[7] = '4.2783';
+
+ at diag   = $phy->get_diagonal;
+
+ok $diag[0] = '0.00000';
+ok $diag[1] = '0.00000';
+ok $diag[2] = '0.00000';
+ok $diag[3] = '0.00000';
+ok $diag[4] = '0.00000';
+ok $diag[5] = '0.00000';
+ok $diag[37] = '0.00000';
+ok $diag[38] = '0.00000';
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/ProtMatrix.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ProtMatrix.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ProtMatrix.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,144 @@
+# $Id: ProtMatrix.t,v 1.4 2004/11/02 20:23:22 jthompson Exp $
+#---------------------------------------------------------
+
+# tests for ProtMatrix.pm
+# written by James Thompson <tex at biosysadmin.com>
+
+use strict;
+
+BEGIN {
+   # to handle systems with no installed Test module
+   # we include the t dir (where a copy of Test.pm is located)
+   # as a fallback
+   eval { require Test; };
+   if( $@ ) {
+	use lib 't';
+   }
+   use Test;
+
+   plan tests => 14;
+}
+
+use Bio::Matrix::PSM::ProtMatrix;
+ok(1);
+
+my %param = (
+   '-id' => 'A. thaliana protein atp1',
+   '-e_val' => 0.0001,
+   '-ic' => [ '0.28', '0.40', '0.64', '0.68', '0.68', '0.70', '0.72', '0.72' ],
+   '-lS' => [ '-2', '3', '-3', '2', '-3', '1', '1', '3' ],
+   '-lF' => [ '-1', '-4', '0', '-5', '0', '-5', '-4', '-4' ],
+   '-lT' => [ '-1', '1', '0', '1', '-2', '-1', '0', '1' ],
+   '-lN' => [ '-3', '-1', '-2', '3', '-5', '5', '-2', '0' ],
+   '-lK' => [ '-2', '0', '-3', '2', '-3', '2', '-3', '-1' ],
+   '-lY' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-4', '-4' ],
+   '-lE' => [ '-3', '4', '-3', '2', '-4', '-2', '-3', '2' ],
+   '-lV' => [ '0', '-2', '1', '-4', '1', '-4', '-1', '-3' ],
+   '-lQ' => [ '-1', '0', '-2', '3', '-4', '1', '-3', '0' ],
+   '-lM' => [ '8', '-3', '8', '-3', '1', '-3', '-3', '-3' ],
+   '-lC' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-3', '-3' ],
+   '-lL' => [ '1', '-3', '1', '-4', '3', '-4', '-2', '-4' ],
+   '-lA' => [ '-2', '1', '-2', '0', '-2', '-2', '2', '2' ],
+   '-lW' => [ '-2', '-4', '-3', '-5', '-4', '-5', '-5', '-5' ],
+   '-lP' => [ '-3', '-2', '-4', '-3', '-1', '-3', '6', '-3' ],
+   '-lH' => [ '-2', '-2', '-3', '-2', '-5', '-2', '-2', '-3' ],
+   '-lD' => [ '-4', '-1', '-3', '1', '-3', '-1', '-3', '4' ],
+   '-lR' => [ '-2', '-1', '-3', '0', '-4', '4', '-4', '-3' ],
+   '-lI' => [ '0', '-3', '0', '-4', '6', '-4', '-2', '-2' ],
+   '-lG' => [ '-4', '-2', '-4', '-2', '-5', '-3', '-1', '-2' ],
+   '-pS' => [ '0', '33', '0', '16', '1', '12', '11', '25' ],
+   '-pF' => [ '0', '0', '2', '0', '3', '0', '0', '0' ],
+   '-pT' => [ '0', '8', '7', '10', '1', '2', '7', '8' ],
+   '-pN' => [ '0', '0', '2', '13', '0', '36', '1', '4' ],
+   '-pK' => [ '0', '5', '0', '13', '1', '15', '0', '2' ],
+   '-pY' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+   '-pE' => [ '0', '41', '1', '12', '0', '0', '0', '15' ],
+   '-pV' => [ '0', '3', '9', '0', '2', '0', '3', '1' ],
+   '-pQ' => [ '0', '0', '0', '15', '0', '4', '0', '3' ],
+   '-pM' => [ '100', '0', '66', '0', '2', '0', '0', '0' ],
+   '-pC' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+   '-pL' => [ '0', '0', '8', '0', '25', '0', '4', '0' ],
+   '-pA' => [ '0', '10', '1', '9', '2', '0', '22', '16' ],
+   '-pW' => [ '0', '0', '0', '0', '0', '0', '0', '0' ],
+   '-pP' => [ '0', '0', '0', '0', '3', '1', '45', '0' ],
+   '-pH' => [ '0', '0', '0', '0', '0', '0', '1', '0' ],
+   '-pD' => [ '0', '0', '1', '7', '2', '2', '0', '22' ],
+   '-pR' => [ '0', '0', '0', '3', '0', '27', '0', '0' ],
+   '-pI' => [ '0', '0', '3', '0', '59', '1', '2', '3' ],
+   '-pG' => [ '0', '0', '0', '1', '0', '0', '4', '1' ],
+);
+
+my $matrix = new Bio::Matrix::PSM::ProtMatrix(%param);
+ok $matrix;
+
+#Simple methods here
+ok $matrix->IUPAC,'MEMSINPS';
+
+ok $matrix->consensus,'MEMSINPS';
+
+ok $matrix->width,8;
+
+ok $matrix->curpos,0;
+
+ok $matrix->get_string('A'), '0100a90200220160';
+
+my %x1 = (
+         'base' => 'M', 'prob' => 100, 'rel' => 0,
+         'lC' => '-2',
+         'pM' => '100',
+         'lY' => '-2',
+         'lK' => '-2',
+         'lR' => '-2',
+         'lM' => '8',
+         'pP' => '0',
+         'lV' => '0',
+         'pK' => '0',
+         'lH' => '-2',
+         'pI' => '0',
+         'pT' => '0',
+         'lE' => '-3',
+         'lN' => '-3',
+         'lQ' => '-1',
+         'lW' => '-2',
+         'pH' => '0',
+         'pC' => '0',
+         'lI' => '0',
+         'pA' => '0',
+         'lA' => '-2',
+         'pV' => '0',
+         'pF' => '0',
+         'lS' => '-2',
+         'pY' => '0',
+         'lL' => '1',
+         'lG' => '-4',
+         'pE' => '0',
+         'pL' => '0',
+         'lF' => '-1',
+         'pS' => '0',
+         'pD' => '0',
+         'pN' => '0',
+         'lP' => '-3',
+         'lT' => '-1',
+         'pQ' => '0',
+         'pR' => '0',
+         'lD' => '-4',
+         'pW' => '0',
+         'pG' => '0'
+);
+
+my %x2 = $matrix->next_pos;
+ok %x1, %x2;
+
+ok $matrix->curpos,1;
+
+ok $matrix->e_val(0.0001);
+ok $matrix->e_val,0.0001;
+
+#Now some PSM specific methods like regexp and matrix info
+
+my @a = ('0', '10', '1', '9', '2', '0', '22', '16');
+ok $matrix->get_array('A'), @a;
+
+my $regexp = '[Mm][EeSs][Mm]\.[IiLl][RrNn][AaPp][DdSs]';
+ok $matrix->regexp, $regexp;
+ok $matrix->sequence_match_weight('MSMPLRPD'), 33;

Added: trunk/packages/bioperl/branches/upstream/current/t/ProtPsm.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ProtPsm.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ProtPsm.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+# $Id: ProtPsm.t,v 1.4 2004/10/21 18:39:33 bosborne Exp $
+#---------------------------------------------------------
+
+# tests for Bio::Matrix::PSM::ProtPsm
+# written by James Thompson <tex at biosysadmin.com>
+
+use strict;
+
+BEGIN {
+   # to handle systems with no installed Test module
+   # we include the t dir (where a copy of Test.pm is located)
+   # as a fallback
+   eval { require Test; };
+   if( $@ ) {
+	use lib 't';
+   }
+   use Test;
+
+   plan tests => 5;
+}
+
+use Bio::Matrix::PSM::ProtPsm;
+ok 1;
+use Bio::Matrix::PSM::IO;
+ok 2;
+
+# Test psiblast reading functionality.
+my $psmIO =  new Bio::Matrix::PSM::IO(-format => 'psiblast', 
+			      -file   => Bio::Root::IO->catfile(qw(t data atp1.matrix)));
+ok $psmIO;
+
+my $psm = $psmIO->next_psm;
+ok $psm;
+
+# Verify that getting IUPAC sequence is functional
+my $IUPAC = 'MEMSINPSEISSIIKEQIENYDTKAEVSEVGTVLSVGDGIARVYGLDNVMAGEMVEFPSGVKGMALNLEEDNVGVVLLGDDTGIKEGDLVKRTGKIVEVPVGEALLGRVVDPLGNPIDAKGPIKTDERRPVEVKAPGIIPRKSVHEPLQTGLKAIDSLVPIGRGQRELIIGDRQTGKTAIAIDTIINQKRINDESTDEGKKVYCIYVAIGQKRSTVAQVVQTLREAGALEYTIIVAATAAAPAPAQYLSAYAGCAIGEAFADNGAAACIIHDDLSRQAVAYAIISLLLRRPPGREAYPGDVFYLHSRLLERAAKLSDELGGGSLTALPIIETQAGDVSAYIPTNVISITDGQIFLETDLFNSGIRPAINVGLSVSRVGSAAQIKAMKKVAGSLKLELAQYRELAAFAQFGSDLDAATQAQLNRGARLTELLKQPQYSPLPVEEQVVILYAGVNGYLDDIPVEDIRDFEKELLEYLKSNHPEILESIRTGKLSDEIEKALKEAIKEFV';
+ok $psm->IUPAC, $IUPAC;
+
+## Lets try to compress and uncompress the log odds and the
+## frequencies, see if there is no considerable loss of data.
+
+#my $fA=$psm->get_compressed_freq('A');
+#my @check=Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1);
+#my @A=$psm->get_array('A');
+#my ($var,$max) = (0,0);
+
+#for (my $i = 0; $i<@check;$i++) {
+#  my $diff=abs(abs($check[$i])-abs($A[$i]));
+#  $var += $diff;
+#  $max=$diff if ($diff>$max);
+#}
+#my $avg=$var/@check;
+#ok $avg<0.01; #Loss of data under 1 percent
+#ok $psm->sequence_match_weight('CAGAAAAATAAAATGGCCACCACCC'),2015;
+#
+#my $lA=$psm->get_compressed_logs('A');
+#@check=Bio::Matrix::PSM::SiteMatrix::_uncompress_string($lA,1000,2);
+#@A=$psm->get_logs_array('A');
+#($var,$max) = (0,0);
+#for (my $i = 0;$i<@check;$i++) {
+#  my $diff=abs(abs($check[$i])-abs($A[$i]));
+#  $var += $diff;
+#  $max=$diff if ($diff>$max);
+#}
+#$avg=$var/@check;
+#ok $avg<10; #Loss of data under 1 percent
+#
+#my $matrix=$psm->matrix;
+#ok $matrix;
+#my $psm2=$psm;
+#$psm2->matrix($matrix);
+#ok $psm,$psm2;
+
+#ok $IUPAC,'CAGAAAAATWVAATYCCCACCHCCC';
+#ok $IUPAC,$psm2->IUPAC;
+#ok $IUPAC,$matrix->IUPAC;
+#
+#my $instances=$psm->instances;
+#ok $instances;
+#
+#foreach my $instance (@{$instances}) {
+#  my $id=$instance->primary_id;
+#  ok $instance->strand,1;
+#  last if (ok $id);
+#}

Added: trunk/packages/bioperl/branches/upstream/current/t/Pseudowise.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Pseudowise.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Pseudowise.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 20;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::Pseudowise;
+use Bio::Root::IO;
+
+END {
+    for ( $Test::ntest..$NTESTS ) {
+	#skip("Cannot run remaining pseudowise tests, skipping.",1);
+    }
+}
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","pseudowise.out");
+my $parser = Bio::Tools::Pseudowise->new(-file => $inputfilename);
+my @gene;
+while (my $gene= $parser->next_prediction){
+    push @gene, $gene;
+}
+my ($g) = @gene;
+my @e = $g->sub_SeqFeature;
+
+ok ($g->primary_tag, 'pseudogene');
+ok ($g->source_tag, 'pseudowise');
+
+ok(($g->get_tag_values('Synonymous'))[0],7);
+ok(($g->get_tag_values('Nonsynonymous'))[0],18);
+ok(($g->get_tag_values('Ka/Ks'))[0],2.57);
+ok(($g->get_tag_values('Unlikely'))[0],0);
+ok(($g->get_tag_values('Identical'))[0],5);
+ok(($g->get_tag_values('Stop'))[0],0);
+ok(($g->get_tag_values('Total codons'))[0],30);
+ok(($g->get_tag_values('Frameshift'))[0],0);
+ok(($g->get_tag_values('Intron'))[0],1);
+
+ok($g->start,163);
+ok($g->end,626);
+ok($g->strand,1);
+ok($e[0]->start, 163);
+ok($e[0]->end,213);
+ok($e[0]->strand,1);
+ok($e[1]->start,585);
+ok($e[1]->end,626);
+ok($e[1]->strand,1);


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/Pseudowise.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/QRNA.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/QRNA.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/QRNA.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,65 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+# $Id: QRNA.t,v 1.1 2003/07/25 16:21:07 jason Exp $
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 29;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::QRNA;
+use Bio::Root::IO;
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","ecoli-trna-qrna.out");
+my $parser = new Bio::Tools::QRNA(-file => $inputfilename);
+ok($parser);
+my $rnacount = 0;
+while( my $f = $parser->next_feature ) {
+    if( $f->primary_tag eq 'RNA' ) { # winning model is primary tag
+	if( ! $rnacount ) { # 1st time through let's test
+	    ok($f->feature1->start,4);
+	    ok($f->feature1->end,  70);
+	    ok($f->score, 22.147);
+	    ok($f->feature1->seq_id,'DA0780-1-');
+	    
+	    ok($f->feature2->start, 4);
+	    ok($f->feature2->end,  70);
+	    ok($f->feature2->seq_id, 'ECOLI-3979754-');
+	    ok(($f->get_tag_values('alignment_len'))[0], 70);
+	    ok(($f->get_tag_values('alignment_pid'))[0], '72.86');
+	    ok(($f->get_tag_values('COD_score'))[0], '16.954');
+	    ok(($f->get_tag_values('COD_logoddspost'))[0], '-4.365');
+	    ok(($f->get_tag_values('OTH_score'))[0], '21.319');
+	    ok(($f->get_tag_values('OTH_logoddspost'))[0], '0.000');
+	}
+	$rnacount++;
+    }
+}
+ok($rnacount, 21);
+$inputfilename= Bio::Root::IO->catfile("t","data","qrna-relloc.out");
+$parser = new Bio::Tools::QRNA(-file => $inputfilename);
+
+my $qrna = $parser->next_feature;
+ok($qrna->primary_tag, 'COD');
+ok($qrna->source_tag, 'qrna');
+ok($qrna->feature1->seq_id, 'Contig1');
+ok($qrna->feature2->seq_id, 'chr5.pseudo');
+ok($qrna->feature1->start, 24732);
+ok($qrna->feature1->end, 24881);
+
+ok($qrna->feature2->start, 527251);
+ok($qrna->feature2->end, 527400);
+
+ok($parser->seq_file,'tst.out');
+ok($parser->RNA_model, '/mix_tied_linux.cfg');
+ok($parser->PAM_model, 'BLOSUM62 scaled by 1.000');
+ok($parser->program_name, 'qrna');
+ok($parser->program_version, '1.2b');
+ok($parser->program_date, 'Tue Dec 18 15:04:38 CST 2001');
+

Added: trunk/packages/bioperl/branches/upstream/current/t/RNAChange.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RNAChange.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RNAChange.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,123 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RNAChange.t,v 1.5 2001/10/09 15:11:17 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;    
+    plan tests => 29;
+}
+
+use Bio::Variation::Allele;
+use Bio::Variation::RNAChange;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::AAChange;
+ 
+my $obj = Bio::Variation::RNAChange -> new;
+
+ok $obj;
+
+$obj->start(4);           
+ok $obj->start, 4;
+
+$obj->end(4); 
+ok $obj->end, 4;
+
+$obj->length(1);
+
+ok $obj->length, 1;
+
+$obj->strand('1');  
+ok $obj->strand, '1';
+
+ok ($obj->primary_tag, 'Variation' );
+
+$obj->source_tag('source');
+ok ($obj->source_tag, 'source' );
+
+$obj->frame(2);   
+ok ($obj->frame, 2 );
+
+$obj->score(2);   
+ok ($obj->score, 2 );
+
+#test gff string
+#$obj->dna_mut('dna_mut'); 
+#if ($obj->dna_mut eq 'dna_mut' ) {
+#    print "ok 11\n";  
+#} else {
+#    print "not ok 11\n";
+#} 
+ok(1);
+
+my $a1 = Bio::Variation::Allele->new(-seq => 'g');
+$obj->allele_ori($a1);
+
+ok( $obj->allele_ori->seq, 'g' );
+
+my $a2 = Bio::Variation::Allele->new('-seq' => 'a');
+$obj->allele_mut($a2);
+
+ok ($obj->allele_mut->seq, 'a' );
+
+$obj->upStreamSeq('gaagattcagccaagctcaaggatg'); 
+ok ($obj->upStreamSeq, 'gaagattcagccaagctcaaggatg' );
+
+$obj->cds_end(1000); 
+ok ($obj->cds_end, 1000 );
+
+$obj->dnStreamSeq('aagtgcagttagggctgggaagggt'); 
+ok ($obj->dnStreamSeq, 'aagtgcagttagggctgggaagggt' );
+
+$obj->codon_pos(1); 
+ok ($obj->codon_pos, 1 );
+
+my $obj3 = Bio::Variation::AAChange -> new;
+$obj3->start(2);
+$obj->AAChange($obj3);
+$obj3->allele_ori($a1);
+$obj3->allele_mut($a2);
+
+ok ($obj->label, 'missense' , "label is". $obj->label);
+
+
+$obj->status('proven'); 
+ok ($obj->status, 'proven' );
+
+$obj->proof('experimental'); 
+ok ($obj->proof, 'experimental' );
+ok ($obj->restriction_changes, '-BccI' );
+
+$obj->region('coding'); 
+ok ($obj->region, 'coding' );
+$obj->numbering('coding'); 
+ok ($obj->numbering, 'coding' );
+
+ok ($obj->codon_ori, 'gaa', "Codon_ori is |". $obj->codon_ori. "|");
+
+ok ($obj->codon_mut, 'aaa' , "Codon_mut is |". $obj->codon_mut. "|");
+
+
+$obj->codon_pos(1); 
+ok ($obj->codon_pos, 1 );
+ok( $obj->codon_table, 1 );
+
+$obj->codon_table(3);
+ok ( $obj->codon_table, 3 );
+
+$obj->mut_number(2);
+ok ( $obj->mut_number, 2 );
+
+$obj->verbose(2);
+ok ( $obj->verbose, 2 );

Added: trunk/packages/bioperl/branches/upstream/current/t/RandDistFunctions.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RandDistFunctions.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RandDistFunctions.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,27 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 4;
+}
+
+use Bio::Tools::RandomDistFunctions;
+
+my $dist = Bio::Tools::RandomDistFunctions->new;
+ok(1);
+
+ok($dist->rand_exponentional_distribution(1.0));
+ok($dist->rand_geometric_distribution(0.9));
+ok($dist->rand_normal_distribution);

Added: trunk/packages/bioperl/branches/upstream/current/t/RandomTreeFactory.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RandomTreeFactory.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RandomTreeFactory.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 3;
+}
+
+use Bio::Tree::RandomFactory;
+use Bio::TreeIO;
+use Bio::Tree::Statistics;
+ok(1);
+
+use vars qw($FILE1);
+$FILE1 = 'out.tre';
+END { unlink $FILE1; }
+ 
+my $ssize = 5;
+my $factory = new Bio::Tree::RandomFactory(-sample_size => $ssize);
+my $stats = new Bio::Tree::Statistics();
+
+my $tree = $factory->next_tree;
+
+ok($tree->get_nodes, ($ssize * 2 - 1));
+
+my $treeio = new Bio::TreeIO(-format => 'newick', -file => ">$FILE1");
+
+$treeio->write_tree($tree);
+undef $treeio;
+
+ok(-s $FILE1);

Added: trunk/packages/bioperl/branches/upstream/current/t/Range.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Range.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Range.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Range.t,v 1.11 2006/06/26 14:46:45 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 40;
+}
+
+use Bio::Range;
+
+ok(1);
+
+my $range = Bio::Range->new(-start=>10,
+                            -end=>20,
+									 -strand=>1);
+ok(defined $range);
+ok($range->strand, 1);
+
+my $range2 = Bio::Range->new(-start=>15,
+                             -end=>25,
+									  -strand=>1);
+
+ok(defined $range2);
+ok($range2->strand, 1);
+
+my $r = Bio::Range->new();
+ok($r->strand(0), 0);
+ok($r->start(27), 27);
+ok($r->end(28), 28);
+
+ok(! defined $r->intersection($range2));
+
+$r = $range->union($range2);
+ok($r->start, 10);
+ok($r->end, 25);
+
+$r = $range->intersection($range2);
+ok($r->start, 15);
+ok($r->end, 20);
+ok($r->strand, 1);
+
+# intersection and union can also take lists
+my $range3 = Bio::Range->new(-start=>18,-end=>30);
+$r = $range->intersection([$range2, $range3]);
+ok($r->start == 18 && $r->end == 20);
+$r = Bio::Range->intersection([$range, $range2, $range3]);
+ok($r->start == 18 && $r->end == 20);
+$r = $range->union($range2, $range3);
+ok($r->start == 10 && $r->end == 30);
+$r = Bio::Range->union($range, $range2, $range3);
+ok($r->start == 10 && $r->end == 30);
+$range3->start(21);
+ok(! defined $range->intersection([$range2, $range3]));
+
+ok !($range->contains($range2));
+ok !($range2->contains($range));
+ok ($range->overlaps($range2));
+ok ($range2->overlaps($range));
+
+# testing strand
+$range3 = Bio::Range->new(-start => 15,
+                             -end => 25,
+									  -strand => 1);
+
+my $range4 = Bio::Range->new(-start => 15,
+									  -end => 25,
+									  -strand => -1);
+
+my $range5 = Bio::Range->new(-start => 15,
+                             -end => 25,
+									  -strand => 0);
+
+my $range6 = Bio::Range->new(-start => 20,
+									  -end => 30,
+									  -strand => -1);
+
+ok $range3->_ignore($range4);     # 1 & -1
+ok $range3->_weak($range3);       # 1 & 1 true
+ok $range3->_weak($range5);       # 1 & 0 true
+ok ! ($range3->_weak($range4));   # 1 & -1 false
+ok $range3->_strong($range3);     # 1 & 1 true
+ok ! ($range3->_strong($range5)); # 1 & 0 false
+ok ! ($range3->_strong($range4)); # 1 & -1 false
+
+ok ! ( $range3->overlaps($range4,'weak'));
+ok ! ( $range4->overlaps($range3,'weak'));
+ok ! ( $range3->overlaps($range4,'strong')); 
+ok ! ( $range4->overlaps($range3,'strong')); 
+
+$range3->strand(0);
+
+ok  ( $range3->overlaps($range4,'weak'));
+ok  ( $range4->overlaps($range3,'weak')); 
+ok ! ( $range3->overlaps($range4,'strong'));
+ok ! ( $range4->overlaps($range3,'strong')); 
+
+# if strands are different then intersection() should return 0...
+$r = $range3->intersection($range4);
+ok($r->strand, 0);
+
+# or if both strands are -1 then -1 should be returned
+$r = $range6->intersection($range4);
+ok($r->strand, -1);

Added: trunk/packages/bioperl/branches/upstream/current/t/RangeI.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RangeI.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RangeI.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RangeI.t,v 1.5 2001/09/19 16:43:10 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw(@funcs);
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    @funcs = qw(start end length strand overlaps contains 
+		equals intersection union overlap_extent);
+    plan tests => 19;
+}
+
+use Bio::RangeI;
+
+my $i = 1;
+my $func;
+while ($func = shift @funcs) {
+    $i++;
+  if(exists $Bio::RangeI::{$func}) {
+    ok(1);
+    next if $func eq 'union';
+    eval {
+      $Bio::RangeI::{$func}->();
+    };
+    ok( $@ );
+  } else {
+    ok(0);
+  }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/RefSeq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RefSeq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RefSeq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,114 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: RefSeq.t,v 1.12 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+my $error;
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 13;
+	plan tests => $NUMTESTS;
+	eval { require IO::String; 
+			 require LWP::UserAgent; 
+			 require HTTP::Request::Common;
+		 };
+	if( $@ ) {
+		for( $Test::ntest..$NUMTESTS ) {
+			skip("IO::String,LWP::UserAgent, or HTTP::Request::Common not installed. This means the Bio::DB::* modules are not usable. Skipping tests",1);
+		}
+		$error = 1;
+	}
+}
+
+END {
+	for ( $Test::ntest..$NUMTESTS ) {
+		skip("Unable to complete RefSeq tests - set env variable BIOPERLDEBUG to test",1);
+	}
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require  Bio::DB::RefSeq;
+require  Bio::DB::GenBank;
+require  Bio::DB::EMBL;
+
+my $testnum;
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($db,$seq,$db2,$seq2,$seqio);
+# get a single seq
+
+$seq = $seqio = undef;
+
+#test redirection from GenBank and EMBL
+$verbose = -1;
+#GenBank
+ok $db = new Bio::DB::GenBank('-verbose'=>$verbose) ;     
+#EMBL
+ok $db2 = new Bio::DB::EMBL('-verbose'=>$verbose) ;     
+
+eval {
+    $seq = $db->get_Seq_by_acc('NT_006732');
+    $seq2 = $db2->get_Seq_by_acc('NT_006732');
+};
+ok $@;
+
+exit unless $DEBUG;
+eval {
+    ok($seq = $db->get_Seq_by_acc('NM_006732'));
+    ok($seq && $seq->length eq 3775);
+    ok $seq2 = $db2->get_Seq_by_acc('NM_006732');
+    ok($seq2 && $seq2->length eq 3775);
+};
+
+if ($@) {
+    if( $DEBUG ) {
+	print STDERR "Warning: Couldn't connect to RefSeq with Bio::DB::RefSeq.pm!\n" . $@;
+    }
+    exit(0);
+}
+
+
+
+$verbose = 0;
+
+eval { 
+    ok defined($db = new Bio::DB::RefSeq(-verbose=>$verbose)); 
+    ok(defined($seq = $db->get_Seq_by_acc('NM_006732')));
+    ok( $seq->length, 3775);
+    ok defined ($db->request_format('fasta'));
+    ok(defined($seq = $db->get_Seq_by_acc('NM_006732')));
+    ok( $seq->length, 3775); 
+};
+
+if ($@) {
+    if( $DEBUG ) {
+	print STDERR "Warning: Couldn't connect to RefSeq with Bio::DB::RefSeq.pm!\n" . $@;
+    }
+    exit(0);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Registry.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Registry.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Registry.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+##-*-Perl-*-
+# $Id: Registry.t,v 1.10.6.2 2006/11/30 09:24:00 sendu Exp $
+# test for Bio::DB::Registry, Flat::BinarySearch, and Flat::BDB
+# written by Brian Osborne
+
+use strict;
+use vars qw($NUMTESTS $old_search_path $no_DB_File $no_LWP $DEBUG);
+
+use File::Spec;
+
+BEGIN {
+	$DEBUG = $ENV{BIOPERLDEBUG} || 0;
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	$no_DB_File = 0;
+	$old_search_path = $ENV{OBDA_SEARCH_PATH} if defined $ENV{OBDA_SEARCH_PATH};
+	$ENV{OBDA_SEARCH_PATH} = 't/data/registry/flat;t/data/registry/bdb';
+	eval { require DB_File;
+			 require BerkeleyDB;
+		 };
+	if ($@) {
+		$ENV{OBDA_SEARCH_PATH} = 't/data/registry/flat';
+		$no_DB_File = 1;
+	}
+	eval { require LWP::UserAgent;
+			 require HTTP::Request::Common; };
+	if( $@ ) {
+		$no_LWP =1;
+	}
+	plan tests => ($NUMTESTS=13);
+}
+
+require Bio::DB::Registry;
+use Bio::DB::Flat;
+use Bio::Root::IO;
+
+my $tmpdir = File::Spec->catfile(qw(t tmp));
+mkdir($tmpdir,0777);
+ok (-d $tmpdir);
+
+my $flat = Bio::DB::Flat->new(-directory  => $tmpdir,
+			      -dbname     => 'testflat',
+			      -format     => 'fasta',
+			      -index      => 'binarysearch',
+                              -write_flag => 1 );
+my $entries = $flat->build_index(File::Spec->catfile(qw(t data cysprot.fa)));
+ok $entries == 7;
+
+if ($no_DB_File){
+    for (1..2) {
+	skip("DB_File or BerkeleyDB not found, skipping DB_File tests",1);
+    }
+} else {
+    my $bdb = Bio::DB::Flat->new(-directory  => $tmpdir,
+				 -dbname     => 'testbdb',
+				 -format     => 'fasta',
+				 -index      => 'bdb',
+				 -write_flag => 1 );
+    ok defined($bdb);
+    $entries = $bdb->build_index(File::Spec->catfile(qw(t data cysprot.fa)));
+    ok $entries == 7;
+}
+
+if( $no_LWP ) {
+    for (1..9 ) {
+	skip("No LWP::UserAgent or HTTP::Request::Common modules installed skipping tests",1);
+    }
+} else {
+    my $registry = Bio::DB::Registry->new;
+    ok defined($registry);
+    my @available_services = $registry->services;
+    
+    ok grep /testflat/, at available_services;
+    my $db = $registry->get_database('testflat');
+    ok defined($db);
+    my $seq = $db->get_Seq_by_id("ALEU_HORVU");
+    ok defined($seq);
+    my $sequence = $seq->seq;
+    ok $sequence eq "MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNGGLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCAIATCASYPVVAA";
+
+    if ($no_DB_File) {
+	for (1..4) {
+	    skip("DB_File or BerkeleyDB not found, skipping DB_File tests",1);
+	}
+    } else {
+	ok grep /testbdb/, at available_services;
+	$db = $registry->get_database('testbdb');
+	ok defined($db);
+	$seq = $db->get_Seq_by_id("ALEU_HORVU");
+	ok defined($seq);
+	$sequence = $seq->seq;
+	ok $sequence eq "MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNGGLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCAIATCASYPVVAA";
+    }
+}
+END {
+	cleanup();
+}
+
+sub cleanup {
+	eval {
+		Bio::Root::IO->rmtree($tmpdir) if (-d $tmpdir);
+	};
+}
+
+__END__


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/Registry.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/Relationship.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Relationship.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Relationship.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,74 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Relationship.t,v 1.4 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval {
+		require Graph::Directed;
+		$HAVEGRAPHDIRECTED = 1;
+	};
+	if ($@) {
+		$HAVEGRAPHDIRECTED = 0;
+		warn "Cannot run tests, Graph::Directed is not installed\n";
+	}
+	plan tests => ($NUMTESTS = 9);
+}
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete Relationship tests',1);
+   }
+}
+exit(0) unless $HAVEGRAPHDIRECTED;
+
+require Bio::Ontology::Relationship;
+require Bio::Ontology::GOterm;  
+require Bio::Ontology::RelationshipType;
+
+my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+ok( $IS_A->isa( "Bio::Ontology::RelationshipType" ) );
+
+my $parent = Bio::Ontology::GOterm->new();
+ok( $parent->isa( "Bio::Ontology::GOterm" ) );
+
+my $child = Bio::Ontology::GOterm->new();
+ok( $child->isa( "Bio::Ontology::GOterm" ) );
+
+$parent->name( "parent" );
+
+$child->name( "child" );
+
+
+my $rel = Bio::Ontology::Relationship->new( -identifier        => "16847",
+                                            -parent_term       => $parent,
+                                            -child_term        => $child,
+                                            -relationship_type => $IS_A ); 
+
+ok( $rel->isa( "Bio::Ontology::Relationship" ) );
+
+ok( $rel->identifier(), "16847" );
+
+ok( $rel->parent_term()->name(), "parent" );
+
+ok( $rel->child_term()->name(), "child" );
+
+ok( $rel->relationship_type()->name(), "IS_A" );
+
+ok( $rel->to_string() );
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/RelationshipType.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RelationshipType.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RelationshipType.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RelationshipType.t,v 1.5 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $DEBUG $NUMTESTS);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    eval {require Graph::Directed; 
+			 $HAVEGRAPHDIRECTED=1;
+	 };
+
+    if ($@) {
+		 $HAVEGRAPHDIRECTED = 0;
+		 warn "Cannot run tests as Graph::Directed is not installed\n";
+    }
+    plan tests => ($NUMTESTS = 21);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete RelationshipType tests',1);
+	}
+}
+
+exit 0 unless $HAVEGRAPHDIRECTED;
+require Bio::Ontology::RelationshipType;
+require Bio::Ontology::Ontology;
+
+my $ont = Bio::Ontology::Ontology->new(-name => "relationship type");
+  
+my $IS_A     = Bio::Ontology::RelationshipType->get_instance("IS_A", $ont);
+my $PART_OF  = Bio::Ontology::RelationshipType->get_instance("PART_OF", $ont);
+my $CONTAINS = Bio::Ontology::RelationshipType->get_instance("CONTAINS", $ont);
+my $FOUND_IN = Bio::Ontology::RelationshipType->get_instance("FOUND_IN", $ont);
+my $IS_A2    = Bio::Ontology::RelationshipType->get_instance("IS_A", $ont);
+
+ok( $IS_A->isa( "Bio::Ontology::RelationshipType" ) );
+ok( $IS_A->isa( "Bio::Ontology::TermI" ) );
+
+
+ok( ! $IS_A->equals( $PART_OF ) );
+ok( $IS_A->equals( $IS_A2 ) );
+ok( $PART_OF->equals( $PART_OF ) );
+
+
+ok( $IS_A->identifier(), undef ); # don't make up identifiers
+ok( $IS_A->name(), "IS_A" );
+ok( $IS_A->definition(), "IS_A relationship predicate (type)" );
+ok( $IS_A->ontology()->name(), "relationship type" );
+
+
+ok( $PART_OF->identifier(), undef ); # don't make up identifiers
+ok( $PART_OF->name(), "PART_OF" );
+ok( $PART_OF->definition(), "PART_OF relationship predicate (type)" );
+ok( $PART_OF->ontology()->name(), "relationship type" );
+
+
+ok( $CONTAINS->identifier(), undef ); # don't make up identifiers
+ok( $CONTAINS->name(), "CONTAINS" );
+ok( $CONTAINS->definition(), "CONTAINS relationship predicate (type)" );
+ok( $CONTAINS->ontology()->name(), "relationship type" );
+
+
+ok( $FOUND_IN->identifier(), undef ); # don't make up identifiers
+ok( $FOUND_IN->name(), "FOUND_IN" );
+ok( $FOUND_IN->definition(), "FOUND_IN relationship predicate (type)" );
+ok( $FOUND_IN->ontology()->name(), "relationship type" );
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/RemoteBlast.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RemoteBlast.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RemoteBlast.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,218 @@
+# -*-Perl-*- for my emacs
+# $Id: RemoteBlast.t,v 1.15.6.2 2006/11/30 18:45:52 sendu Exp $
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+my $error;
+BEGIN { 
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	$error = 0;
+	$NUMTESTS = 13; 
+	plan tests => $NUMTESTS;
+	eval { require IO::String;
+			 require LWP;
+			 require LWP::UserAgent;
+			 1;
+		 };
+	if( $@ ) {
+		print STDERR "IO::String,LWP,LWP::UserAgent not installed. This means cannot query remote webserver. Skipping tests.\n";
+		for( $Test::ntest..$NUMTESTS ) {
+			skip("IO::String or LWP::UserAgent not installed. This means the Bio::Tools::Run::RemoteBlast is not usable. Skipping tests",1);
+		}
+		$error = 1; 
+	}
+}
+
+END {     
+	for ( $Test::ntest..$NUMTESTS ) {
+		skip("Unable to complete RemoteBlast tests",1);
+	}
+}
+
+my $actually_submit = $DEBUG > 0;
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require Bio::Tools::Run::RemoteBlast;
+
+require Bio::SeqIO;
+require Bio::AlignIO;
+require Bio::Seq;
+require Bio::Root::IO;
+use Env;
+
+ok(1);
+
+my $prog = 'blastp';
+my $db   = 'swissprot';
+my $e_val= '1e-10';
+my $v = $DEBUG > 1;
+my  $remote_blast = Bio::Tools::Run::RemoteBlast->new('-verbose' => $v,
+																		'-prog' => $prog,
+																		'-data' => $db,
+																		'-expect' => $e_val,
+						      );
+$remote_blast->submit_parameter('ENTREZ_QUERY', 
+										  'Escherichia coli[ORGN]');
+my $inputfilename = Bio::Root::IO->catfile("t","data","ecolitst.fa");
+ok( -e $inputfilename);
+
+if( $actually_submit == 0 ) {
+	print STDERR "Skipping submitting remote BLAST to avoid Time-out\n" if( $DEBUG );
+	foreach( $Test::ntest..$NUMTESTS) { 
+		skip('Skip to avoid timeout',1);
+	}
+} else {
+	my $r = $remote_blast->submit_blast($inputfilename);
+	ok($r);
+	print STDERR "waiting..." if( $v > 0 );
+	while ( my @rids = $remote_blast->each_rid ) {
+		foreach my $rid ( @rids ) {
+			my $rc = $remote_blast->retrieve_blast($rid);
+			if( !ref($rc) ) {
+				if( $rc < 0 ) { 		
+					$remote_blast->remove_rid($rid);
+					ok(0);
+				}
+				print STDERR "." if ( $v > 0 );
+				sleep 5;
+			} else { 
+				ok(1);
+				$remote_blast->remove_rid($rid);
+				my $result = $rc->next_result;
+				ok($result->database_name, qr/swissprot/i);
+				my $count = 0;
+				while( my $hit = $result->next_hit ) {		
+					$count++;
+					next unless ( $v > 0);
+					print "sbjct name is ", $hit->name, "\n";
+					while( my $hsp = $hit->next_hsp ) {
+						print "score is ", $hsp->score, "\n";
+					} 
+				}
+				ok($count, 3);
+			}
+		}
+	}
+}
+
+# test blasttable
+
+my $remote_blast2 = Bio::Tools::Run::RemoteBlast->new
+  ('-verbose'    => $v,
+	'-prog'       => $prog,
+	'-data'       => $db,
+	'-readmethod' => 'blasttable',
+	'-expect'     => $e_val,     
+  );
+$remote_blast2->submit_parameter('ENTREZ_QUERY', 
+											'Escherichia coli[ORGN]');
+
+$remote_blast2->retrieve_parameter('ALIGNMENT_VIEW', 'Tabular');
+
+$inputfilename = Bio::Root::IO->catfile("t","data","ecolitst.fa");
+
+if( $actually_submit == 0 ) {
+	print STDERR "Skipping submitting remote BLAST to avoid Time-out\n" if( $DEBUG );
+	foreach( $Test::ntest..$NUMTESTS) { 
+		skip('Skip to avoid timeout',1);
+	}
+} else {
+	my $r = $remote_blast2->submit_blast($inputfilename);
+	ok($r);
+	print STDERR "waiting..." if( $v > 0 );
+	while ( my @rids = $remote_blast2->each_rid ) {
+		foreach my $rid ( @rids ) {
+			my $rc = $remote_blast2->retrieve_blast($rid);
+			if( !ref($rc) ) {
+				if( $rc < 0 ) { 		
+					$remote_blast2->remove_rid($rid);
+					ok(0);
+				}
+				print STDERR "." if ( $v > 0 );
+				sleep 5;
+			} else { 
+				ok(1);
+				$remote_blast2->remove_rid($rid);
+				my $result = $rc->next_result;
+				my $count = 0;
+				while( my $hit = $result->next_hit ) {		
+					$count++;
+					next unless ( $v > 0);
+					print "sbjct name is ", $hit->name, "\n";
+					while( my $hsp = $hit->next_hsp ) {
+						print "score is ", $hsp->score, "\n";
+					} 
+				}
+				ok($count, 3);
+			}
+		}
+	}
+}
+
+
+my $remote_blastxml = Bio::Tools::Run::RemoteBlast->new
+  ('-verbose'    => $v,
+	'-prog'       => $prog,
+	'-data'       => $db,
+	'-readmethod' => 'xml',
+	'-expect'     => $e_val,
+  );
+$remote_blast->submit_parameter('ENTREZ_QUERY', 
+										  'Escherichia coli[ORGN]');
+
+$remote_blastxml->retrieve_parameter('FORMAT_TYPE', 'XML');
+$inputfilename = Bio::Root::IO->catfile("t","data","ecolitst.fa");
+
+eval {require Bio::SearchIO::blastxml;};
+if ($@) {
+	foreach( $Test::ntest..$NUMTESTS) { 
+		skip('Skip blastxml tests probably because XML::SAX not installed',1);
+	}
+}
+elsif( $actually_submit == 0 ) {
+	print STDERR "Skipping submitting remote BLAST to avoid Time-out\n" if( $DEBUG );
+	foreach( $Test::ntest..$NUMTESTS) { 
+		skip('Skip to avoid timeout',1);
+	}
+}
+else {
+	my $r = $remote_blastxml->submit_blast($inputfilename);
+	ok($r);
+	print STDERR "waiting..." if( $v > 0 );
+	while ( my @rids = $remote_blastxml->each_rid ) {
+		foreach my $rid ( @rids ) {
+			my $rc = $remote_blastxml->retrieve_blast($rid);
+			if( !ref($rc) ) {
+				if( $rc < 0 ) { 		
+					$remote_blastxml->remove_rid($rid);
+					ok(0);
+				}
+				print STDERR "." if ( $v > 0 );
+				sleep 5;
+			} else { 
+				ok(1);
+				$remote_blastxml->remove_rid($rid);
+				my $result = $rc->next_result;
+				ok($result->database_name, qr/swissprot/i);
+				my $count = 0;
+				while( my $hit = $result->next_hit ) {		
+					$count++;
+					next unless ( $v > 0);
+					print "sbjct name is ", $hit->name, "\n";
+					while( my $hsp = $hit->next_hsp ) {
+						print "score is ", $hsp->score, "\n";
+					} 
+				}
+				ok($count, 3);
+			}
+		}
+	}
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/RepeatMasker.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RepeatMasker.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RepeatMasker.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 13;
+    plan tests => $NTESTS;
+}
+use Bio::Tools::RepeatMasker;
+use Bio::SeqIO;
+
+END {
+    for ( $Test::ntest..$NTESTS ) {
+        skip("Cannot complete RepeatMasker tests",1);
+    }
+}
+
+my $inputfilename= Bio::Root::IO->catfile("t","data","repeatmasker.fa.out");
+my $parser = Bio::Tools::RepeatMasker->new(-file => $inputfilename);
+my $i = 0;
+while (my $rpt = $parser->next_result){
+    unless( $i++ ) {
+	ok ($rpt->feature1->seq_id, "contig11600");
+	ok ($rpt->feature1->start, 1337);
+	ok ($rpt->feature1->end, 1407);
+	ok ($rpt->feature1->strand, 1);
+	ok ($rpt->feature1->primary_tag, "Simple_repeat");
+	ok ($rpt->feature1->source_tag, "RepeatMasker");
+	ok (scalar $rpt->feature1->get_tag_values('Target'), 3);
+
+	ok ($rpt->feature2->seq_id, "(TTAGGG)n");
+	ok ($rpt->feature2->start, 2);
+	ok ($rpt->feature2->end, 76);
+	ok ($rpt->feature1->primary_tag, "Simple_repeat");
+	ok ($rpt->feature1->source_tag, "RepeatMasker");
+	ok (scalar $rpt->feature2->get_tag_values('Target'), 3);
+    }
+}
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/RestrictionAnalysis.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RestrictionAnalysis.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RestrictionAnalysis.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,298 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: RestrictionAnalysis.t,v 1.6.8.3 2006/11/30 09:24:00 sendu Exp $
+
+# test for Bio::Restriction::Analysis.pm
+# written by Rob Edwards & Heikki Lehvaslaiho
+
+use strict;
+my $NUMTESTS;
+my $error;
+
+BEGIN {
+    eval { require Test::More; };
+    if( $@ ) {
+        use lib 't/lib';
+    }
+    use Test::More;
+    $NUMTESTS = 173;
+    $error  = 0;
+
+    plan tests => $NUMTESTS;
+
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+require_ok('Bio::Restriction::Enzyme');
+require_ok('Bio::Restriction::Enzyme::MultiCut');
+require_ok('Bio::Restriction::Enzyme::MultiSite');
+require_ok('Bio::Restriction::EnzymeCollection');
+require_ok('Bio::Restriction::Analysis');
+use_ok('Bio::Root::IO');
+use_ok('Bio::SeqIO');
+use Data::Dumper;
+
+#
+# Bio::Restriction::Enzyme
+#
+
+my ($re, $seq, $iso, %meth, $microbe, $source, @vendors, @refs, $name);
+ok $re=Bio::Restriction::Enzyme->new(-enzyme=>'EcoRI', -site=>'G^AATTC');
+isa_ok($re, 'Bio::Restriction::EnzymeI');
+is $re->cut, 1;
+ok ! $re->cut(0);
+is $re->complementary_cut, 6;
+ok $re->cut(1);
+
+is $re->complementary_cut,5;
+is $re->site,'G^AATTC';
+ok $seq = $re->seq;
+isa_ok($seq, 'Bio::PrimarySeqI');
+is $seq->seq, 'GAATTC';
+is $re->string,'GAATTC';
+is $re->revcom, 'GAATTC';
+is $re->recognition_length, 6;
+is $re->cutter, 6;
+is $re->palindromic, 1;
+is $re->overhang, "5'";
+is $re->overhang_seq, 'AATT';
+is $re->is_ambiguous, 0;
+
+ok $re->compatible_ends($re);
+
+ok $re->isoschizomers('BamHI', 'AvaI'); # not really true :)
+
+is my @isos=$re->isoschizomers, 2;
+is $isos[0],'BamHI';
+ok $re->purge_isoschizomers;
+is scalar($re->isoschizomers), 0;
+ok $re->methylation_sites(2,5); # not really true :)
+ok %meth = $re->methylation_sites;
+is $meth{2}, 5;
+ok $re->purge_methylation_sites;
+is scalar($re->methylation_sites), 0;
+
+
+ok $re->microbe('E. coli');
+ok $microbe = $re->microbe;
+is $microbe, "E. coli";
+ok $re->source("Rob"); # not really true :)
+
+ok $source = $re->source;
+is $source, "Rob";
+
+ok !$re->vendor;
+ok $re->vendors('NEB'); # my favorite
+ok @vendors = $re->vendors;
+is $vendors[0], "NEB";
+$re->purge_vendors;
+is scalar($re->vendors), 0;
+
+ok $re->references('Rob et al');
+ok @refs = $re->references;
+is $refs[0], "Rob et al";
+$re->purge_references;
+is scalar($re->references), 0;
+
+ok $re->name('BamHI');
+ok $name = $re->name;
+is $name, "BamHI";
+
+is $re->is_prototype, 0;
+ok $re->is_prototype(1);
+is $re->is_prototype, 1;
+
+is $re->prototype_name, $re->name;
+ok ! $re->is_prototype(0);
+is $re->prototype_name('XxxI'), 'XxxI';
+is $re->prototype_name, 'XxxI';
+
+
+is $re->cutter, 6;
+ok $re->seq->seq('RCATGY');
+is $re->cutter, 5;
+
+ok my $re2 = $re->clone;
+isnt $re, $re2;
+is $re->name, $re2->name;
+
+ok $re = Bio::Restriction::Enzyme->new(-enzyme=>'AciI', 
+										-site=>'C^CGC');
+is $re->palindromic, 0;
+is $re->is_palindromic, 0;
+
+#
+# Bio::Restriction::Enzyme::MultiSite
+#
+
+ok $re=new Bio::Restriction::Enzyme::MultiSite(-enzyme=>'TaqII',
+                                              -site=>'GACCGA',
+                                              -cut=>17,
+                                              -complementary_cut=>15
+                                             );
+ok $re2=new Bio::Restriction::Enzyme::MultiSite(-enzyme=>'TaqII',
+                                                -site=>'CACCCA',
+                                                -cut=>17,
+                                                -complementary_cut=>15
+                                               );
+isa_ok( $re, 'Bio::Restriction::EnzymeI');
+isa_ok( $re2, 'Bio::Restriction::EnzymeI');
+ok $re->others($re2);
+ok $re2->others($re);
+
+is $re->others, 1;
+is $re2->others, 1;
+
+ok my $re3 = $re->clone;
+isnt $re, $re3;
+is $re->name , $re3->name; # wouldn't this be a circular reference???
+#print Dumper $re, $re3;exit;
+
+#
+# Bio::Restriction::Enzyme::MultiCut
+#
+#Hin4I has four cut sites [(8/13)GAYNNNNNVTC(13/8)],
+
+ok $re = Bio::Restriction::Enzyme::MultiCut->new(-enzyme=>'Hin4I',
+                                              -site=>'GAYNNNNNVTC',
+                                              -cut=>-8,
+                                              -complementary_cut=>-13
+                                             );
+ok $re2 = Bio::Restriction::Enzyme::MultiCut->new(-enzyme=>'Hin4I',
+                                               -site=>'GAYNNNNNVTC',
+                                               -cut=>13,
+                                               -complementary_cut=>8
+                                              );
+isa_ok($re, 'Bio::Restriction::EnzymeI');
+isa_ok($re2, 'Bio::Restriction::EnzymeI');
+ok $re->others($re2);
+ok $re2->others($re);
+
+ok $re3 = $re->clone;
+isnt $re, $re3;
+is $re->name, $re3->name;
+#print Dumper $re, $re3;exit;
+
+#
+# Bio::Restriction::EnzymeCollection
+#
+
+my ($collection, $enz, $new_set);
+
+ok $collection = Bio::Restriction::EnzymeCollection->new(-empty=>1);
+is $collection->each_enzyme, 0;
+# default set
+$collection = Bio::Restriction::EnzymeCollection->new;
+is $collection->each_enzyme, 532;
+is $collection->each_enzyme, 532;
+
+ok $enz = $collection->get_enzyme('AclI');
+isa_ok($enz, 'Bio::Restriction::Enzyme');
+is my @enzymes=$collection->available_list, 532;
+
+ok $new_set = $collection->blunt_enzymes;
+isa_ok($enz, 'Bio::Restriction::Enzyme');
+is $new_set->each_enzyme, 114;
+
+#map {print $_->name, ": ", $_->cutter, "\n"; } $collection->each_enzyme;
+
+ok $new_set = $collection->cutters(8);
+is $new_set->each_enzyme, 17;
+
+ok $new_set=$collection->cutters(-start => 8, -end => 8);
+is $new_set->each_enzyme, 17;
+
+ok $new_set=$collection->cutters(-start => 6, -end => 8);
+is $new_set->each_enzyme, 293;
+
+ok $new_set=$collection->cutters(-start => 6, -end => 8,  -exclusive => 1);
+is $new_set->each_enzyme, 10;
+
+
+#
+# Restriction::Analysis
+#
+
+
+ok my $seqio=Bio::SeqIO->new(-file=>Bio::Root::IO->catfile("t","data","dna1.fa"),
+                         -format=>'fasta');
+ok $seq=$seqio->next_seq;
+
+ok my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);
+ok my $uniq = $ra->unique_cutters;
+
+# test most objects
+is $ra->unique_cutters->each_enzyme, 42, 'number of unique cutters';
+is $ra->fragments('RsaI'), 2, 'number of RsaI fragments';
+is $ra->max_cuts, 9, 'number of maximum cutters';
+is $ra->zero_cutters->each_enzyme, 477, 'number of zero cutters';
+is $ra->cutters->each_enzyme, 55, 'number of cutters';
+is $ra->cutters(3)->each_enzyme, 8, 'number of 3x cutters';
+is $ra->fragments('MseI'), 4, '4 MseI fragments';
+is $ra->cuts_by_enzyme('MseI'), 3, '3 MseI cut sites';
+
+#my $z = $ra->cutters(3);
+#my $out=Bio::Restriction::IO->new;
+#$out->write($z);
+
+
+is $ra->fragments('PspEI'), 2, 'expected 2 PspEI fragments';
+is $ra->cuts_by_enzyme('PspEI'), 1;
+is $ra->cuts_by_enzyme('XxxI'), undef;
+
+
+is my @ss = $ra->sizes('PspEI'), 2, 'expected 2 sizes for PspEI';
+is $ss[0] + $ss[1], $seq->length;
+
+is $ra->fragments('MwoI'), 1, 'not circular expected 1 fragments for MwoI as it doesnt cut';
+
+# circularise the sequence, regenerate the cuts and test again
+# note that there is one less fragment now!
+ok $seq->is_circular(1);
+
+# we need to regenerate all the cuts
+ok $ra->cut;
+
+is $ra->fragments('RsaI'), 1, 'number of RsaI fragments';
+is $ra->fragments('MseI'), 3, '3 circular MseI fragments';
+is $ra->cuts_by_enzyme('MseI'), 3, '3 circular MseI cut sites';
+is $ra->fragments('AciI'), 1, 'number for AciI a non-palindromic enzyme';
+
+is $ra->fragments('MwoI'), 1, '1 fragment for MwoI as it cuts across the circ point';
+
+ok my @rb=($collection->get_enzyme("AluI"), $collection->get_enzyme("MseI"), $collection->get_enzyme("MaeIII"));
+
+# test multiple digests
+ok my $rbc=Bio::Restriction::EnzymeCollection->new(-empty=>1);
+ok $rbc->enzymes(@rb);
+ok $ra->cut('multiple', $rbc);
+is $ra->fragments('multiple_digest'),7, '7 fragments in the multiple digest';
+is my @pos=$ra->positions('multiple_digest'),7, '7 positions in the multiple digest';
+is my @ssm = $ra->sizes('multiple_digest'),7, '7 sizes in the multiple digest';
+my $check_len;
+map {$check_len+=$_}@ssm;
+is $check_len, $seq->length;
+
+# now test the non-palindromic part
+# HindI is a non palindromic enzyme that cuts 9 times
+is $ra->positions('HindI'), 9, ' expected 9 cuts for HindI';
+
+# now we need to test the fragment maps
+# lets do this for HindI
+is my @fm=$ra->fragment_maps('HindI'), 9, 'expect 9 fragment maps for HindI';
+foreach my $fm (@fm) {
+ is exists $fm->{'seq'}, 1, "sequence for ".$fm->{'seq'};
+ is exists $fm->{'start'}, 1, "start at ".$fm->{'start'};
+ is exists $fm->{'end'}, 1, "end at ".$fm->{'end'};
+}
+
+# bug 2139
+
+eval {$re = Bio::Restriction::Enzyme->new(
+        -name    => 'Invalid',
+        -site    => 'G^AATTE' );};
+
+ok $@;
+like($@, qr(Unrecognized characters in site), 'bug 2139');

Added: trunk/packages/bioperl/branches/upstream/current/t/RestrictionIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RestrictionIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RestrictionIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,76 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: RestrictionIO.t,v 1.9.4.2 2006/11/30 09:24:00 sendu Exp $
+
+# test for Bio::Restriction::Analysis.pm
+# written by Rob Edwards
+
+use strict;
+
+my $NUMTESTS;
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    $NUMTESTS = 14;
+    plan tests => $NUMTESTS;
+}
+
+require Bio::Restriction::IO;
+use Bio::Root::IO;
+require Bio::Restriction::EnzymeCollection;
+
+my $tmpdir = File::Spec->catfile(qw(t tmp));
+mkdir($tmpdir,0777);
+
+ok(1);
+
+#
+# default enz set
+#
+ok my $in  = Bio::Restriction::IO->new();
+ok my $renzs = $in->read;
+ok $renzs->each_enzyme, 532;
+
+ok my $e = $renzs->get_enzyme('AccI');
+ok $e->name, 'AccI';
+
+ok my $out = Bio::Restriction::IO->new(-format => 'base', -file   => ">".File::Spec->catfile($tmpdir,"r"));
+#$out->write($renzs);
+#map {print $_->name, "\t", $_->site, "\t", $_->overhang, "\n"} $renzs->each_enzyme;
+
+#
+# withrefm, 31
+#
+
+ok $in  = Bio::Restriction::IO->new
+  (-format=> 'withrefm',
+	-verbose => 0,
+	-file => Bio::Root::IO->catfile("t","data","rebase.withrefm"));
+ok $renzs = $in->read;
+ok $renzs->each_enzyme, 11;
+
+#
+# itype2, 8
+#
+#enzyme name [tab] prototype [tab] recognition sequence with cleavage site
+#  [tab] methylation site and type [tab] commercial source [tab] references
+
+ok $in  = Bio::Restriction::IO->new
+    (-format=> 'itype2', -verbose => 0,
+     -file => Bio::Root::IO->catfile("t","data","rebase.itype2"));
+
+ok $renzs = $in->read;
+ok $renzs->each_enzyme, 16;
+
+ok  $out  = Bio::Restriction::IO->new(-format=>'base');
+
+END { cleanup(); }
+
+sub cleanup {
+   eval {
+      Bio::Root::IO->rmtree($tmpdir) if (-d $tmpdir);
+   };
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/RootI.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RootI.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RootI.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RootI.t,v 1.8 2006/07/12 18:46:22 sac Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;    
+    plan tests => 11;
+}
+
+
+use Bio::Root::Root;
+
+my $obj = new Bio::Root::Root();
+ok defined($obj) && $obj->isa('Bio::Root::RootI');
+
+eval { $obj->throw('Testing throw') };
+ok $@ =~ /Testing throw/;# 'throw failed';
+
+# doesn't work in perl 5.00405
+#my $val;
+#eval {
+#    my ($tfh,$tfile) = $obj->tempfile();
+#    local * STDERR = $tfh;
+#    $obj->warn('Testing warn');
+#    close $tfh;    
+#    open(IN, $tfile) or die("cannot open $tfile");    
+#    $val = join("", <IN>) ;
+#    close IN;
+#    unlink $tfile;
+#};
+#ok $val =~ /Testing warn/;
+#'verbose(0) warn did not work properly' . $val;
+
+$obj->verbose(-1);
+eval { $obj->throw('Testing throw') };
+ok $@=~ /Testing throw/;# 'verbose(-1) throw did not work properly' . $@;
+
+eval { $obj->warn('Testing warn') };
+ok !$@;
+
+$obj->verbose(1);
+eval { $obj->throw('Testing throw') };
+ok $@ =~ /Testing throw/;# 'verbose(1) throw did not work properly' . $@;
+
+# doesn't work in perl 5.00405
+#undef $val;
+#eval {
+#    my ($tfh,$tfile) = $obj->tempfile();
+#    local * STDERR = $tfh;
+#    $obj->warn('Testing warn');
+#    close $tfh;
+#    open(IN, $tfile) or die("cannot open $tfile");    
+#    $val = join("", <IN>);
+#    close IN;
+#    unlink $tfile;
+#};
+#ok $val =~ /Testing warn/;# 'verbose(1) warn did not work properly' . $val;
+
+my @stack = $obj->stack_trace();
+ok scalar @stack, 2;
+
+my $verbobj = new Bio::Root::Root(-verbose=>1,-strict=>1);
+ok $verbobj->verbose(), 1;
+
+$Bio::Root::Root::DEBUG = 1;
+require Bio::Seq;
+my $seq = new Bio::Seq;
+ok($seq->verbose, 1);
+
+# test for bug #1343
+my @vals = Bio::Root::RootI->_rearrange([qw(apples pears)], 
+					-apples => 'up the',
+					-pears  => 'stairs');
+eval { $obj->throw_not_implemented() };
+ok $@ =~ /Bio::Root::NotImplemented/;
+
+ok(shift @vals, 'up the');
+ok(shift @vals, 'stairs');
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/RootIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RootIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RootIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RootIO.t,v 1.7.6.2 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test::More module
+    # we include the t dir (where a copy of Test/More.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+		use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => 31;
+}
+
+my $DEBUG = $ENV{BIOPERLDEBUG} || 0;
+$| = 1;
+
+use_ok('Bio::Root::IO');
+
+my $obj = Bio::Root::IO->new();
+ok defined($obj) && $obj->isa('Bio::Root::IO');
+
+#############################################
+# tests for exceptions/debugging/verbosity
+#############################################
+
+eval { $obj->throw('Testing throw') };
+like $@, qr/Testing throw/, 'throw()'; # 'throw failed';
+
+$obj->verbose(-1);
+eval { $obj->throw('Testing throw') };
+like $@, qr/Testing throw/, 'throw() verbose(-1)'; # 'verbose(-1) throw did not work properly' . $@;
+
+eval { $obj->warn('Testing warn') };
+ok !$@, 'warn()';
+
+$obj->verbose(1);
+eval { $obj->throw('Testing throw') };
+like $@, qr/Testing throw/, 'throw() verbose(1)'; # 'verbose(1) throw did not work properly' . $@;
+
+my @stack = $obj->stack_trace();
+ok scalar @stack == 2, 'stack_trace()';
+
+my $verbobj = Bio::Root::IO->new(-verbose=>1,-strict=>1);
+ok $verbobj->verbose() == 1, 'set verbosity to 1';
+
+ok $obj->verbose(-1);
+
+#############################################
+# tests for handle read and write abilities
+#############################################
+
+ok my $TESTINFILE = Bio::Root::IO->catfile(qw(t data test.waba));
+
+my($handle,$file) = $obj->tempfile;
+ok $handle;
+ok $file;
+
+#test with files
+
+ok my $rio = Bio::Root::IO->new(-file=>$TESTINFILE);
+ok $rio->mode eq 'r', 'filename, read';
+
+ok my $wio = Bio::Root::IO->new(-file=>">$file");
+ok $wio->mode eq 'w', 'filename, write';
+
+# test with handles
+
+ok open(my $I, $TESTINFILE);
+ok open(my $O, '>', $file);
+
+ok $rio = Bio::Root::IO->new(-fh=>$I);
+ok $rio->mode eq 'r', 'handle, read';
+
+ok $wio = Bio::Root::IO->new(-fh=>$O);
+ok $wio->mode eq 'w', 'handle, write';
+
+##############################################
+# tests _pushback for multi-line buffering
+##############################################
+
+my $line1 = $rio->_readline;
+my $line2 = $rio->_readline;
+
+ok $rio->_pushback($line1);
+ok $rio->_pushback($line2);
+
+my $line3 = $rio->_readline;
+my $line4 = $rio->_readline;
+my $line5 = $rio->_readline;
+
+ok $line1 eq $line3;
+ok $line2 eq $line4;
+ok $line5 ne $line4;
+
+ok close($I);
+ok close($O);
+
+##############################################
+# tests http retrieval
+##############################################
+
+SKIP: {
+  skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 2 unless $DEBUG;
+
+  my $TESTURL = 'http://www.google.com/index.html';
+  
+  ok $rio = Bio::Root::IO->new(-url=>$TESTURL), 'default -url method';
+  
+  if ($Bio::Root::IO::HAS_LWP) {
+    $Bio::Root::IO::HAS_LWP = 0;
+    ok $rio = Bio::Root::IO->new(-url=>$TESTURL), 'non-LWP -url method';
+  } 
+  else {
+    ok 1, 'non-LWP -url method not needed as non-LWP was default';
+  }
+}  
+  
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/RootStorable.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/RootStorable.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/RootStorable.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,90 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: RootStorable.t,v 1.3.6.2 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {
+    eval { require Test::More; };
+    if( $@ ) {
+        use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => 35;
+
+}
+
+$| = 1;
+
+use_ok('Bio::Root::Storable');
+
+foreach my $mode( "BINARY", "ASCII" ){
+    if( $mode eq "ASCII" ){
+		no warnings;
+        $Bio::Root::Storable::BINARY = 0;
+    }
+
+    #------------------------------
+    # Test the easy bits that don't need file IO
+    my $obj = Bio::Root::Storable->new();
+    ok defined($obj) && $obj->isa('Bio::Root::Storable');
+
+    eval { $obj->throw('Testing throw') };
+    ok $@ =~ /Testing throw/;   # 'throw failed';
+
+    $obj->{_test}  = "_TEST";   # Provide test attributes
+    $obj->{__test} = "__TEST";  # 
+
+    my $state = $obj->serialise;
+    ok length($state) > 0;
+
+    my $clone = $obj->clone;
+    ok defined($clone) and $clone->isa('Bio::Root::Storable');
+    ok $clone->{_test} eq "_TEST" && $clone->{__test}  eq "__TEST";
+
+    #------------------------------
+    # Test standard file IO 
+    my $file = $obj->store;
+    ok $file && -f $obj->statefile;
+
+    my $retrieved;
+    eval { $retrieved = Bio::Root::Storable->retrieve( $file ) };
+    ok defined($retrieved) && $retrieved->isa('Bio::Root::Storable');
+    ok $retrieved->{_test} eq "_TEST" && ! exists $retrieved->{__test};
+
+    my $skel = $obj->new_retrievable;
+    ok defined($skel) && $skel->isa('Bio::Root::Storable');
+    ok ! exists $skel->{_test} && ! exists $skel->{__test};
+    ok $skel->retrievable;
+
+    eval { $skel->retrieve };
+    ok ! $skel->retrievable;
+    ok $skel->{_test} eq "_TEST" && ! exists $skel->{__test};
+
+    my $obj2 = Bio::Root::Storable->new();
+    $obj2->template('TEST_XXXXXX');
+    $obj2->suffix('.state');
+    my $file2 = $obj2->store;
+    ok $file2 =~ /TEST_\w{6}?\.state$/ and -f $file2;
+
+    #------------------------------
+    # Test recursive file IO
+    $obj->{_test_lazy} = $obj2;
+    $obj->store;
+    my $retrieved2;
+    eval { $retrieved2 = Bio::Root::Storable->retrieve( $obj->token ) };
+    ok $retrieved2->{_test_lazy} && $retrieved2->{_test_lazy}->retrievable;
+
+    #------------------------------
+    # Clean up
+    # Should only be 2 object files; all others were clones in one way or another
+    $obj->remove;
+    ok ! -f $obj->statefile;
+    $obj2->remove;
+    ok ! -f $obj2->statefile;
+}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/SNP.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SNP.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SNP.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,44 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: SNP.t,v 1.1 2003/05/14 12:04:06 heikki Exp $
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 13 }
+
+
+use Bio::Variation::SNP;
+
+ok(1);
+
+my($a);
+
+#
+# SNP
+#
+
+ok $a = new Bio::Variation::SNP;
+ok $a->id('123'), 123;
+eval { $a->di('123'); };
+ok 1 if $@;
+ok $a->validated('by-cluster'), 'by-cluster';
+my @alleles = ('A', 'T');
+ok $a->validated(\@alleles), \@alleles;
+ok $a->desc('abc'), 'abc'; # Bio::Variation::Allele method
+ok $a->chromosome('X'), 'X'; # Bio::Variation::Allele method
+ok my $s = $a->add_subsnp;
+ok $s->is_subsnp;
+ok $s->handle('HGBASE'), 'HGBASE';
+ok $a->add_subsnp;
+ok $a->each_subsnp, 2;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Scansite.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Scansite.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Scansite.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,82 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Scansite.t,v 1.1 2003/11/18 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$ERROR = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 12;
+	plan tests => $NUMTESTS;
+
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if( $@ ) {
+		warn("IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests");
+		$ERROR = 1;
+	}
+}
+
+END {
+	if ($DEBUG) {
+		foreach ( $Test::ntest..$NUMTESTS) {
+			skip('unable to run all of the Scansite tests',1);
+		}
+	} else {
+		foreach ( $Test::ntest..$NUMTESTS) {
+			skip('set env BIOPERLDEBUG to run tests over web',1);
+		}
+	}
+}
+
+exit 0 if $ERROR ==  1;
+
+use Data::Dumper;
+
+require Bio::Tools::Analysis::Protein::Scansite;
+use Bio::SeqIO;
+use Bio::PrimarySeq;
+require Bio::WebAgent;
+
+ok 1;
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+
+my $seqio=new Bio::SeqIO( -verbose => $verbose,
+                  -format => 'swiss',
+                  -file   => Bio::Root::IO->catfile('t','data', 'swiss.dat'));
+
+my $seq = $seqio->next_seq();
+ok $tool = Bio::Tools::Analysis::Protein::Scansite->new( 
+					-seq=>$seq->primary_seq);
+ok $tool->stringency('Low');
+ok $tool->stringency(), 'Low';
+ok $tool->protein_id(), $tool->seq->display_id();
+exit unless $DEBUG;
+ok $tool->run ();
+exit if $tool->status eq 'TERMINATED_BY_ERROR';
+ok my $raw = $tool->result('');
+print $raw if $verbose;
+ok my $parsed = $tool->result('parsed');
+ok $parsed->[0]{'site'}, 'T101';
+ok my @res = $tool->result('Bio::SeqFeatureI');
+ok $res[0]->start, 101;


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/Scansite.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/SearchDist.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SearchDist.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SearchDist.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,52 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 3;
+
+    eval { local * STDERR; require Bio::Ext::Align };
+    if ( $@ ) {
+	foreach ( 1..3) {
+	    skip('Bio::Ext::Align not loaded',1);
+	}
+        $error = 1;
+    }
+}
+
+if( $error == 1 ) { 
+    exit(0);
+}
+
+require Bio::SearchDist;
+ok(1);
+
+my $dist = new Bio::SearchDist;
+
+ok ref($dist), 'Bio::SearchDist';
+
+my @scores;
+foreach my $i ( 1..5000 ) {
+    my $score = rand(1300);
+    #print STDERR "Got $score\n";
+    $dist->add_score($score);
+    push(@scores,$score);
+}
+
+# this just checks that this routine runs ;)
+# as the distribution is not gaussian, it gives
+# non-sensical results    
+
+ok $dist->fit_Gaussian(1200), 1;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/SearchIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SearchIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SearchIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1941 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: SearchIO.t,v 1.96.4.6 2006/11/30 09:24:00 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+
+use vars qw($LASTXMLTEST); 
+use strict;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test::More; };
+	if( $@ ) {
+		use lib 't/lib';
+	}
+	use Test::More;
+    
+	use vars qw($NTESTS);
+	$NTESTS = 1340;
+	$LASTXMLTEST = 67;
+	$error = 0;
+
+	plan tests => $NTESTS; 
+}
+
+use_ok('Bio::SearchIO');
+use_ok('Bio::Root::IO');
+use_ok('Bio::SearchIO::Writer::HitTableWriter');
+use_ok('Bio::SearchIO::Writer::HTMLResultWriter');
+
+END { 
+    unlink 'searchio.out';
+    unlink 'searchio.html';
+}
+
+my ($searchio, $result,$iter,$hit,$hsp);
+
+SKIP: {
+	eval {
+		require XML::SAX;
+		require HTML::Entities;
+	};
+	skip("XML::SAX or HTML::Entities not loaded.  Skipping XML tests",68) if $@;
+    eval {
+		# test with RPSBLAST data first
+		# this needs to be eval'd b/c the XML::SAX parser object is
+		# instantiated in the constructor
+		$searchio = new Bio::SearchIO ('-tempfile' => 1,
+			   '-format' => 'blastxml',
+			   '-file'   => Bio::Root::IO->catfile('t','data','ecoli_domains.rps.xml'),
+			   '-verbose' => 2); # promote warn to throw so we can skip over problems
+		$result = $searchio->next_result;
+	};
+	if ($@ && $@ =~ m{Handler couldn't resolve external entity}) {
+		skip("XML::SAX::Expat does not work with XML tests; skipping",68);
+	} elsif ($@) {
+		skip("Problem with XML::SAX setup: check ParserDetails.ini; skipping XML tests",68);
+	}
+    isa_ok($result, 'Bio::Search::Result::ResultI');    
+    is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
+    is($result->query_name,'gi|1786182|gb|AAC73112.1|','query_name()');
+    is($result->query_description, '(AE000111) thr operon leader peptide [Escherichia coli]');
+    is($result->query_accession, 'AAC73112.1');
+    is($result->query_length, 21);
+    is($result->algorithm, 'BLASTP');
+    is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');
+
+    is($result->available_parameters, 8);
+    is($result->get_parameter('gapext'), 1);
+    is($result->available_statistics, 5);
+    is($result->get_statistic('lambda'), 0.267);
+
+# this result actually has a hit
+    $result = $searchio->next_result;
+    $hit = $result->next_hit;
+    is($hit->name, 'gnl|Pfam|pfam00742');
+    is($hit->description(), 'HomoS_dh, HomoS dehydrogenase');
+    is($hit->accession, 'pfam00742');
+    is($hit->length, 310);
+
+    $hsp = $hit->next_hsp;
+    is($hsp->query->seq_id, $result->query_name,'query name on HSP');
+    is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
+    is($hsp->hit->seq_id, $hit->name,'hitname');
+    is($hsp->hit->seqdesc, $hit->description,'hitdesc');
+    is($hsp->pvalue, undef);
+    is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.46134e-90'));
+    is($hsp->score, 838);
+    is($hsp->bits,327.405);
+    is($hsp->query->start, 498);
+    is($hsp->query->end,815);
+    is($hsp->hit->start, 3);
+    is($hsp->hit->end, 310);
+    is($hsp->query->frame,0);
+    is($hsp->hit->frame,0);
+    is(sprintf("%.2f", $hsp->percent_identity), 37.73);
+    is(sprintf("%.4f", $hsp->frac_identical('hit')), 0.3994);
+    is(sprintf("%.4f", $hsp->frac_identical('query')), 0.3868);
+    is(sprintf("%.4f",$hsp->query->frac_identical), 0.3868);
+
+    is(sprintf("%.4f",$hsp->frac_conserved('total')),0.5245);
+    is(sprintf("%.4f",$hsp->frac_conserved('hit')),0.5552);
+    is(sprintf("%.4f",$hsp->frac_conserved('query')),0.5377);
+    is($hsp->gaps('total'), 26);
+    is($hsp->length('hsp'), 326);
+    is($hsp->query_string, 'LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS');
+    is($hsp->hit_string, 'GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR');
+    is($hsp->homology_string, '  V G+ +S+ +L +  GL LE W+  L   ++P  +LG+L      + +++     V+V+ T    VA   Y D L EG HVVT NK  N S  D Y  L   AEKS    LY+  V  G+P+I+ L+ LL  GD ++K  GI +G+ ++I  ++DE G+ FS+    A+E+GYTE DPRDD+ G+D ARKL ILAR   G ELEL D+ +E + P           F   L  LD+    RV  A   G+VLRY   I E             + PL  VK  +NA+A     Y   PL+  G GAG + TA+ V AD++R   ');
+    is(join(' ', $hsp->seq_inds('query', 'gap',1)), '532 548-551 562 649 690');
+# one more 
+    $hit = $result->next_hit;
+    isa_ok($hit,'Bio::Search::Hit::HitI');
+    
+    my $results_left = 8;
+    while( $result = $searchio->next_result ) { ok($result); $results_left--; }
+    is($results_left, 0);
+
+
+    $searchio = new Bio::SearchIO(-format => 'blastxml', 
+				  -file => Bio::Root::IO->catfile('t','data','plague_yeast.bls.xml'));
+
+    $result = $searchio->next_result;
+
+    is($result->database_name, 'yeast.aa');
+    is($result->query_name, 'gi|5763811|emb|CAB53164.1|');
+    is($result->query_description,  'putative transposase [Yersinia pestis]');
+    is($result->query_accession, 'CAB53164.1');
+    is($result->query_length, 340);
+
+    $hit = $result->next_hit;
+    ok(! $hit);
+
+    $searchio = new Bio::SearchIO(-format => 'blastxml', 
+				  -file => Bio::Root::IO->catfile('t','data','mus.bls.xml'));
+
+    $result = $searchio->next_result;
+
+    is($result->database_name,'Hs15_up1000');
+    is($result->query_name,'NM_011441_up_1000_chr1_4505586_r');
+    is($result->query_description,'chr1:4505586-4506585');
+    is($result->query_accession,'NM_011441_up_1000_chr1_4505586_r');
+    is($result->query_length,'1000');
+    $hit = $result->next_hit;
+    is($hit->name,'NM_001938_up_1000_chr1_93161154_f');
+    is($hit->description,'chr1:93161154-93162153');
+    is($hit->accession,'3153');
+    is($hit->length,'1000');
+}
+
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+				  '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.bls'));
+
+$result = $searchio->next_result;
+# $dumper->dumpValue($result);
+
+is($result->database_name, 'ecoli.aa', 'database_name()');
+is($result->database_entries, 4289);
+is($result->database_letters, 1358990);
+
+is($result->algorithm, 'BLASTP');
+like($result->algorithm_version, qr/^2\.1\.3/);
+like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
+is($result->query_length, 820);
+is($result->get_statistic('kappa'), '0.135');
+is($result->get_statistic('kappa_gapped'), '0.0410');
+is($result->get_statistic('lambda'), '0.319');
+is($result->get_statistic('lambda_gapped'), '0.267');
+is($result->get_statistic('entropy'), '0.383');
+is($result->get_statistic('entropy_gapped'), '0.140');
+
+is($result->get_statistic('dbletters'), 1358990);
+is($result->get_statistic('dbentries'), 4289);
+is($result->get_statistic('effective_hsplength'), 47);
+is($result->get_statistic('effectivespace'), 894675611);
+is($result->get_parameter('matrix'), 'BLOSUM62');
+is($result->get_parameter('gapopen'), 11);
+is($result->get_parameter('gapext'), 1);
+is($result->get_statistic('S2'), '92');
+is($result->get_statistic('S2_bits'), '40.0');
+is($result->get_parameter('expect'), '1.0e-03');
+is($result->get_statistic('num_extensions'), '82424');
+
+
+my @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 1567],
+	      [ 'gb|AAC76922.1|', 810, 'AAC76922', '1e-91', 332],
+	      [ 'gb|AAC76994.1|', 449, 'AAC76994', '3e-47', 184]);
+my $count = 0;
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1);
+            is($hsp->query->end, 820);
+            is($hsp->hit->start, 1);
+            is($hsp->hit->end, 820);
+            is($hsp->length('hsp'), 820);
+            is($hsp->start('hit'), $hsp->hit->start);
+            is($hsp->end('query'), $hsp->query->end);
+            is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
+            is($hsp->evalue, '0.0');
+            is($hsp->score, 4058);
+            is($hsp->bits,1567);	    	    
+            is(sprintf("%.2f",$hsp->percent_identity), 98.29);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9829);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9829);
+            is($hsp->gaps, 0);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.wublastp'));
+
+$result = $searchio->next_result;
+
+is($result->database_name, 'ecoli.aa');
+is($result->database_letters, 1358990);
+is($result->database_entries, 4289);
+is($result->algorithm, 'BLASTP');
+like($result->algorithm_version, qr/^2\.0MP\-WashU/);
+like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
+is($result->query_accession, 'AAC73113.1');
+
+is($result->query_length, 820);
+is($result->get_statistic('kappa'), 0.136);
+is($result->get_statistic('lambda'), 0.319);
+is($result->get_statistic('entropy'), 0.384);
+is($result->get_statistic('dbletters'), 1358990);
+is($result->get_statistic('dbentries'), 4289);
+is($result->get_parameter('matrix'), 'BLOSUM62');
+is($result->get_statistic('Frame+0_lambda_used'), '0.319');
+is($result->get_statistic('Frame+0_kappa_used'), '0.136');
+is($result->get_statistic('Frame+0_entropy_used'), '0.384');
+
+is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
+is($result->get_statistic('Frame+0_kappa_computed'), '0.136');
+is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
+
+is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
+is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
+is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
+
+ at valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
+	   [ 'gb|AAC76922.1|', 810, 'AAC76922', '3.1e-86', 844],
+	   [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
+$count = 0;
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    if ($count==1) {
+        # Test HSP contig data returned by SearchUtils::tile_hsps()
+        # Second hit has two hsps that overlap.
+        my($qcontigs, $scontigs) = Bio::Search::SearchUtils::tile_hsps($hit);
+        # Query contigs
+        is($qcontigs->[0]->{'start'}, 5);
+        is($qcontigs->[0]->{'stop'}, 812);
+        is($qcontigs->[0]->{'iden'}, 250);
+        is($qcontigs->[0]->{'cons'}, 413);
+        # Subject contigs
+        is($scontigs->[0]->{'start'}, 16);
+        is($scontigs->[0]->{'stop'}, 805);
+        is($scontigs->[0]->{'iden'}, 248);
+        is($scontigs->[0]->{'cons'}, 410);
+    }
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1);
+            is($hsp->query->end, 820);
+            is($hsp->hit->start, 1);
+            is($hsp->hit->end, 820);
+            is($hsp->length('hsp'), 820);
+            
+            is($hsp->evalue, '0.0');
+            is($hsp->pvalue, '0.0');
+            is($hsp->score, 4141);
+            is($hsp->bits,1462.8);	    	    
+            is($hsp->percent_identity, 100);
+            is($hsp->frac_identical('query'), 1.00);
+            is($hsp->frac_identical('hit'), 1.00);
+            is($hsp->gaps, 0);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+# test that add hit really works properly for BLAST objects
+# bug 1611
+my @hits = $result->hits;
+$result->add_hit($hits[0]);
+is($result->num_hits, @hits + 1);
+
+# test WU-BLAST -noseqs option
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.noseqs.wublastp'));
+
+$result = $searchio->next_result;
+
+is($result->database_name, 'ecoli.aa');
+is($result->database_letters, 1358990);
+is($result->database_entries, 4289);
+is($result->algorithm, 'BLASTP');
+like($result->algorithm_version, qr/^2\.0MP\-WashU/);
+like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
+is($result->query_accession, 'AAC73113.1');
+
+is($result->query_length, 820);
+is($result->get_statistic('kappa'), 0.135);
+is($result->get_statistic('lambda'), 0.319);
+is($result->get_statistic('entropy'), 0.384);
+is($result->get_statistic('dbletters'), 1358990);
+is($result->get_statistic('dbentries'), 4289);
+is($result->get_parameter('matrix'), 'BLOSUM62');
+is($result->get_statistic('Frame+0_lambda_used'), '0.319');
+is($result->get_statistic('Frame+0_kappa_used'), '0.135');
+is($result->get_statistic('Frame+0_entropy_used'), '0.384');
+
+is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
+is($result->get_statistic('Frame+0_kappa_computed'), '0.135');
+is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
+
+is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
+is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
+is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
+
+ at valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
+	   [ 'gb|AAC76922.1|', 810, 'AAC76922', '6.6e-93', 907],
+	   [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
+$count = 0;
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1);
+            is($hsp->query->end, 820);
+            is($hsp->hit->start, 1);
+            is($hsp->hit->end, 820);
+            is($hsp->length('hsp'), 820);
+            
+            is($hsp->evalue , '0.');
+            is($hsp->pvalue , '0.');
+            is($hsp->score, 4141);
+            is($hsp->bits,1462.8);	    	    
+            is($hsp->percent_identity, 100);
+            is($hsp->frac_identical('query'), 1.00);
+            is($hsp->frac_identical('hit'), 1.00);
+            is($hsp->gaps, 0);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+# test tblastx 
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','HUMBETGLOA.tblastx'));
+
+$result = $searchio->next_result;
+is($result->database_name, 'ecoli.nt');
+is($result->database_letters, 4662239);
+is($result->database_entries, 400);
+is($result->algorithm, 'TBLASTX');
+like($result->algorithm_version, qr/^2\.1\.2/);
+is($result->query_name, 'HUMBETGLOA');
+is($result->query_description, 'Human haplotype C4 beta-globin gene, complete cds.');
+is($result->query_length, 3002);
+is($result->get_statistic('kappa'), 0.135);
+is($result->get_statistic('lambda'), 0.318);
+is($result->get_statistic('entropy'), 0.401);
+is($result->get_statistic('dbletters'), 4662239);
+is($result->get_statistic('dbentries'), 400);
+is($result->get_statistic('T'), 13);
+is($result->get_statistic('X1'), 16);
+is($result->get_statistic('X1_bits'), 7.3);
+is($result->get_statistic('X2'), 0);
+is($result->get_statistic('X2_bits'), '0.0');
+is($result->get_statistic('S1'), 41);
+is($result->get_statistic('S1_bits'), 21.7);
+is($result->get_statistic('S2'), 53);
+is($result->get_statistic('S2_bits'), 27.2);
+
+is($result->get_statistic('decayconst'), 0.1);
+
+is($result->get_parameter('matrix'), 'BLOSUM62');
+
+ at valid = ( [ 'gb|AE000479.1|AE000479', 10934, 'AE000479', '0.13', 34],
+	   [ 'gb|AE000302.1|AE000302', 10264, 'AE000302', '0.61', 31],
+	   [ 'gb|AE000277.1|AE000277', 11653, 'AE000277', '0.84', 31]);
+$count = 0;
+
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is($hit->significance, shift @$d );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1057);
+            is($hsp->query->end, 1134);
+            is($hsp->query->strand, 1);
+            is($hsp->strand('query'), $hsp->query->strand);
+            is($hsp->hit->end, 5893);
+            is($hsp->hit->start, 5816);
+            is($hsp->hit->strand, -1);
+            is($hsp->strand('sbjct'), $hsp->subject->strand);
+            is($hsp->length('hsp'), 26);
+            
+            is($hsp->evalue , 0.13);
+            is($hsp->score, 67);
+            is($hsp->bits,33.6);
+            is(sprintf("%.2f",$hsp->percent_identity), 42.31);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), '0.4231');
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.4231');
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 1);
+            is($hsp->gaps, 0);	    
+            is($hsp->query_string, 'SAYWSIFPPLGCWWSTLGPRGSLSPL');
+            is($hsp->hit_string, 'AAVWALFPPVGSQWGCLASQWRTSPL');
+            is($hsp->homology_string, '+A W++FPP+G  W  L  +   SPL');
+            is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '355 364 365 367 368 370 371 373-375');
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+$searchio = new Bio::SearchIO(-format => 'fasta',
+				 -file   => File::Spec->catfile(qw(t data HUMBETGLOA.FASTA)) );
+$result = $searchio->next_result;
+like($result->database_name, qr/dros_clones.2.5/);
+is($result->database_letters, 112936249);
+is($result->database_entries, 657);
+is($result->algorithm, 'FASTN');
+is($result->algorithm_version, '3.3t08');
+is($result->query_name, "HUMBETGLOA");
+is($result->query_description, "Human haplotype C4 beta-globin gene, complete cds.");
+is($result->query_length, 3002);
+is($result->get_parameter('gapopen'), -16);
+is($result->get_parameter('gapext'), -4);
+is($result->get_parameter('ktup'), 6);
+
+is($result->get_statistic('lambda'), 0.0823);
+is($result->get_statistic('dbletters'), 112936249);
+is($result->get_statistic('dbentries'), 657);
+
+ at valid = ( [ 'BACR21I23', 73982, 'BACR21I23', '0.017', 44.2],
+	   [ 'BACR40P19', 73982, 'BACR40P19', '0.017', 44.2],
+	   [ 'BACR30L17', 32481, 'BACR30L17', '0.018', 44.1]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is($hit->significance, shift @$d );
+    is($hit->raw_score, shift @$d );
+    is($hit->rank, $count + 1);
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 31);
+            is($hsp->query->end, 289);
+            is($hsp->query->strand, -1);
+            is($hsp->hit->end, 65167);
+            is($hsp->hit->start, 64902);
+            is($hsp->hit->strand, 1);
+            is($hsp->length('hsp'), 267);	    
+            is($hsp->evalue , 0.017);
+            is($hsp->score, 134.5);
+            is($hsp->bits,44.2);
+            is(sprintf("%.2f",$hsp->percent_identity), '57.30');
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5907); 
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.5752); 
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps, 159);
+            is($hsp->gaps('query'), 8);
+            is($hsp->gaps('hit'),1);
+            is($hsp->query_string, 'GATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTA----ATAAATTATGTCTAAAAATAGAAT---AAATACAAATCAATGTGCTCTGTGCATTA-GTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCAGTCAAAAATACAAATAAATAAAAAGTCACTTACAACCCAAAGTGTGACTATCAATGGGGTAATCAGTGGTGTCAAATAGGAGGT');
+            is($hsp->hit_string, 'GATGTCCTTGGTGGATTATGGTGTTAGGGTATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATATAAAATATAATATAAAATATAATATAAAATAAAATATAAAATAAAATATAAAATAAAATATAAAATAAAATATAAAATAAAATAT-AATATAAAATATAAAATAAAATATAATATAAAATATAATATAAAATATAATATAAAATATAATATAAAATA');
+            is($hsp->homology_string, '                              :::::::::::::::::: : ::::: :: : : ::: ::::: ::::::::  ::  :: : :   : : : : :  ::    : :: ::   ::    : ::: :::     :::::: :::   ::::: ::  :::  :    :    : ::   :::  : ::   : :   : : :: :   :: : : :: : :       ::  : : ::: ::: ::  ::::: ::: : :  :: ::   ::: : : : ::: ::   '.' 'x60);
+            is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '33 37 39 41 43 47-49 52 55 56 58 60 64 70 71 74 78 82 84 86 87 90-96 98 100 103 105 107 110-112 114 117 119 121-123 125 127-129 132 134 135 139-141 143 145-148 150-153 155 156 160 161 164 170 173 180-184 188 192 194 196-198 201 204 206-209 212 213 215 217 219 221 223-225 227 229 232 233 236 237 246 252 256 258 260 263 269 271');
+            is(join(' ', $hsp->seq_inds('query', 'conserved',1)), '31 32 34-36 38 40 42 44-46 50 51 53 54 57 59 61-63 65-69 72 73 75-77 79-81 83 85 88 89 97 99 101 102 104 106 108 109 113 115 116 118 120 124 126 130 131 133 136-138 141 142 144 149 154 157-159 162 163 165-172 174-179 185-187 189-191 193-195 199 200 202 203 205 210 211 214 216 218 220 222 226 228 230 231 234 235 238-245 247-251 253-255 257 259 261 262 264-268 270 272-289');
+            # note: the reason this is not the same percent id above
+            # is we are calculating average percent id
+            is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), '59.30');
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+$searchio = new Bio::SearchIO(-format => 'fasta',
+				 -file   => File::Spec->catfile(qw(t data cysprot1.FASTA)));
+$result = $searchio->next_result;
+like($result->database_name, qr/ecoli.aa/);
+is($result->database_letters, 1358987);
+is($result->database_entries, 4289);
+is($result->algorithm, 'FASTP');
+is($result->algorithm_version, '3.3t08');
+is($result->query_name, 'CYS1_DICDI');
+is($result->query_length, 343);
+is($result->get_parameter('gapopen'), -12);
+is($result->get_parameter('gapext'), -2);
+is($result->get_parameter('ktup'), 2);
+
+is($result->get_statistic('lambda'), 0.1456);
+is($result->get_statistic('dbletters'), 1358987);
+is($result->get_statistic('dbentries'), 4289);
+
+
+ at valid = ( [ 'gi|1787478|gb|AAC74309.1|', 512, 'AAC74309', 1.2, 29.2],
+	   [ 'gi|1790635|gb|AAC77148.1|', 251, 'AAC77148', 2.1, 27.4],
+	   [ 'gi|1786590|gb|AAC73494.1|', 94, 'AAC73494',  2.1, 25.9]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is($hit->significance, shift @$d );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 125);
+            is($hsp->query->end, 305);
+            is($hsp->query->strand, 0);
+            is($hsp->hit->start, 2);
+            is($hsp->hit->end, 181);
+            is($hsp->hit->strand, 0);
+            is($hsp->length('hsp'), 188);	    
+            is($hsp->evalue , 1.2);
+            is($hsp->score, 109.2);
+            is($hsp->bits,29.2);
+            is(sprintf("%.2f",$hsp->percent_identity), 23.94);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.2486);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.2500');
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps('query'), 7);
+            is($hsp->gaps, 49);	    
+            is($hsp->query_string, 'NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTT-GNV----EGQHFISQNKLVSLSEQNLVDCDHECME-YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGP-LAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII');
+            is($hsp->hit_string, (' 'x29).'MKIRSQVGMVLNLDKCIGCHTCSVTCKNVWTSREGVEYAWFNNVETKPGQGF-PTDWENQEKYKGGWI--RKINGKLQPRMGNRAMLLGKIFANPHLPGIDDYYEPFDFDYQNLHTAPEG----SKSQPIARPRSLITGERMAKIEKGPNWEDDLGGEFDKLAKDKNFDN-IQKAMYSQFENTFMMYLPRLCEHCLNPACVATCPSGAIYKREEDGIVLIDQDKCRGWRMCITGCPYKKIYFNWKSGKSEKCIFCYPRIEAGQPTVCSETC');
+            is($hsp->homology_string, '                              . :. :  : :  .: .: . :.:  ::    :: ..   :.. .   :..   : : .: :.:     .  :: :::   :  .  : : ..   :   .     .:.  :. .   .     :.. .     . ::  .:    . .:.  .:: ::   . ...:. :  . ::  .. :   .:                      '.' 'x60);
+            # note: the reason this is not the same percent id above
+            # is we are calculating average percent id
+            is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), 26.01);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+is($result->hits, 8);
+$searchio = new Bio::SearchIO(-format => 'fasta',
+				 -file   => File::Spec->catfile(qw(t data cysprot_vs_gadfly.FASTA)) );
+$result = $searchio->next_result;
+like($result->database_name, qr/gadflypep2/);
+is($result->database_letters, 7177762);
+is($result->database_entries, 14334);
+is($result->algorithm, 'FASTP');
+is($result->algorithm_version, '3.3t08');
+is($result->query_name, 'cysprot.fa');
+is($result->query_length, 2385);
+is($result->get_parameter('gapopen'), -12);
+is($result->get_parameter('gapext'), -2);
+is($result->get_parameter('ktup'), 2);
+is($result->get_parameter('matrix'), 'BL50');
+
+is($result->get_statistic('lambda'), 0.1397);
+is($result->get_statistic('dbletters'), 7177762 );
+is($result->get_statistic('dbentries'), 14334);
+
+
+ at valid = ( [ 'Cp1|FBgn0013770|pp-CT20780|FBan0006692', 341, 
+	     'FBan0006692', '3.1e-59', 227.8],
+	   [ 'CG11459|FBgn0037396|pp-CT28891|FBan0011459', 336, 
+	     'FBan0011459', '6.4e-41',  166.9],
+	   [ 'CG4847|FBgn0034229|pp-CT15577|FBan0004847', 390, 
+	     'FBan0004847',  '2.5e-40', 165.2]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1373);
+            is($hsp->query->end, 1706);
+            is($hsp->query->strand, 0);
+            is($hsp->hit->start, 5);
+            is($hsp->hit->end, 341);
+            is($hsp->hit->strand, 0);
+            is($hsp->length('hsp'), 345);	    
+            is(sprintf("%g",$hsp->evalue), sprintf("%g",'3.1e-59') );
+            is($hsp->score, 1170.6);
+            is($hsp->bits,227.8);
+            is(sprintf("%.2f",$hsp->percent_identity), 53.04);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5479);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.5430');
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps('query'), 11);
+            is($hsp->gaps, 194);
+            is($hsp->hit_string, (' 'x26).'MRTAVLLPLLAL----LAVAQA-VSFADVVMEEWHTFKLEHRKNYQDETEERFRLKIFNENKHKIAKHNQRFAEGKVSFKLAVNKYADLLHHEFRQLMNGFNYTLHKQLRAADESFKGVTFISPAHVTLPKSVDWRTKGAVTAVKDQGHCGSCWAFSSTGALEGQHFRKSGVLVSLSEQNLVDCSTKYGNNGCNGGLMDNAFRYIKDNGGIDTEKSYPYEAIDDSCHFNKGTVGATDRGFTDIPQGDEKKMAEAVATVGPVSVAIDASHESFQFYSEGVYNEPQCDAQNLDHGVLVVGFGTDESGED---YWLVKNSWGTTWGDKGFIKMLRNKENQCGIASASSYPLV');
+            is($hsp->query_string, 'SNWGNNGYFLIERGKNMCGLAACASYPIPQVMNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLY-GMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQ---NRKPR------KGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPK-QEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEE');
+            # note: the reason this is not the same percent id above
+            # is we are calculating average percent id
+            is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), 56.13);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+is($result->hits, 21);
+
+# test on TFASTXY
+$searchio = new Bio::SearchIO(-format => 'fasta',
+			      -file   => File::Spec->catfile(qw(t data 5X_1895.FASTXY)));
+$result = $searchio->next_result;
+like($result->database_name, qr/yeast_nrpep.fasta/);
+is($result->database_letters, 4215311);
+is($result->database_entries, 9190);
+is($result->algorithm, 'FASTY');
+is($result->algorithm_version, '3.4t07');
+is($result->query_name, '5X_1895.fa');
+is($result->query_length, 7972);
+is($result->get_parameter('gapopen'), -14);
+is($result->get_parameter('gapext'), -2);
+is($result->get_parameter('ktup'), 2);
+is($result->get_parameter('matrix'), 'BL50');
+
+is($result->get_statistic('lambda'), 0.1711);
+is($result->get_statistic('dbletters'), 4215311);
+is($result->get_statistic('dbentries'), 9190);
+
+
+ at valid = ( [ 'NR_SC:SW-YNN2_YEAST', 1056, 'NR_SC:SW-YNN2_YEAST','1.6e-154', '547.0'],
+	   [ 'NR_SC:SW-MPCP_YEAST', 311, 'NR_SC:SW-MPCP_YEAST', '1.3e-25', 117.1],
+	   [ 'NR_SC:SW-YEO3_YEAST', 300, 'NR_SC:SW-YEO3_YEAST', '5.7e-05', 48.5]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 2180);
+            is($hsp->query->end, 5623);
+            is($hsp->query->strand, 1);
+            is($hsp->hit->start, 3);
+            is($hsp->hit->end, 1053);
+            is($hsp->hit->strand, 0);
+            is($hsp->length('hsp'), 1165);
+            
+            is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.6e-154'));
+            is($hsp->score, 2877.6);
+            is($hsp->bits,'547.0');
+            is(sprintf("%.2f",$hsp->percent_identity), 51.67);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5244);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.5728);
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps, 678);	    
+            is($hsp->query_string, 'RKQLDPRIPALINNGVKANHRSFFVMVGDKGRDQVCPGMQAAMRFD*HRCR/LVNLHFLLSQARVSSRPSVLWCYKKD-LGFTT*VAASENLQQTIYFRPIATSHRKKREAKIKRDVKRGIRDANEQDPFELFVTVTDIRYTYYKDSAKILGQTFGMLVLQDYEAITPNLLARTIETVEGGGIVVLLLKTMSSLKQLYAMAM/DKL*CRDGVE*SDFS*LLI*DVHSRYRTDAHQFVQPRFNERFILSLGSNPDCLVLDDELNVLPLSKGKDIQIGKAGEEDDRGRKRKAEELKEMKENLEGVDIVGSLAKLAKTVDQAKAILTFVEAISEKNLSSTVALTAGRGRGKSAALGLAIGAALAHDYSNIFVTSPDPENLKTLFEFVFKALDALGYEEHIDYDVVQSTNPDFKKAIVRVNIFRGHRQTIQYISPEDSHVLGQAELVIIDEAAAIPLPLVRKLIGPYLVFMASTINGYEGTGRSLSIKLIQQLREQTRPSITKDSENAAASSAGSSSKAAAAGRSGAGLVRSLREIKLDEPIRYSPGDNVEKWLNNLLCLDATIVSK---SIQGCPHPSKCELYYVNRDTLFSYHPASEVFLQRMMALYVASHYKNSPNDLQMLSDAPAHHLFVLLPPIDEND-NTLPDPLVVLQVALEGNISREAILKEMAQSGMRSSGDMIPWIISTQFQDNDFATLSGARVVRIATHPDYARMGYGSRAMEALESFYNGTSYNFDDVPVDMGESFAD\VPRSDL*VTSFIPFPQNRTSTECVSQNANLQNDTIAIRDPSRMPPLLQRLSERKPETLDYLGVSFGLTRDLLRFWKKGGFTPLYASQKENALTGEYTFVMLKVLASAGGGGEWLGAFAQGMSCLLLQDEVHMGND*RL*TDFRQRFMNLLSYEAFKKFDASIALSILESTVPRNSPSPAP----KLLTNTELSSLLTPFDIKRLESYADSMLDYHVVLDLVPTIASLFFGKRLETS--LPPAQQAILLALGLQRKNVEALENELGITSTQTLALFGKVLRKMTKSLEDIRKASIASELP-----AEPTLAGRSANGSNKFVALQQTIEQDLADSAVQLNGEDDDASKKEQRELLNTLNMEEFAI-DQGGDWTEAEKQVERLASGKGGTRLSSTVSVKVDKLDD\AKRRRRRARMRVPRMRRR');
+            is($hsp->hit_string, 'KKAIDSRIPSLIRNGVQTKQRSIFVIVGDRARNQ------------------LPNLHYLMMSADLKMNKSVLWAYKKKLLGFT--------------------SHRKKRENKIKKEIKRGTREVNEMDPFESFISNQNIRYVYYKESEKILGNTYGMCILQDFEALTPNLLARTIETVEGGGIVVILLKSMSSLKQLYTMTM-D--------------------VHARYRTEAHGDVVARFNERFILSLGSNPNCLVVDDELNVLPLSGAKNVKPLPPKEDDELPPKQL--ELQELKESLEDVQPAGSLVSLSKTVNQAHAILSFIDAISEKTLNFTVALTAGRGRGKSAALGISIAAAVSHGYSNIFVTSPSPENLKTLFEFIFKGFDALGYQEHIDYDIIQSTNPDFNKAIVRVDIKRDHRQTIQYIVPQDHQVLGQAELVVIDEAAAIPLPIVKNLLGPYLVFMASTINGYEGTGRSLSLKLIQQLRNQNNTSGRESTQTAVVSRDNKEKDSHLHSQS-----RQLREISLDEPIRYAPGDPIEKWLNKLLCLDVTLIKNPRFATRGTPHPSQCNLFVVNRDTLFSYHPVSENFLEKMMALYVSSHYKNSPNDLQLMSDAPAHKLFVLLPPIDPKDGGRIPDPLCVIQIALEGEISKESVRNSLSR-GQRAGGDLIPWLISQQFQDEEFASLSGARIVRIATNPEYASMGYGSRAIELLRDYFEGKF-------TDMSE---D-VRPKDYSI--------KRVSDKELAKT-NLLKDDVKLRDAKTLPPLLLKLSEQPPHYLHYLGVSYGLTQSLHKFWKNNSFVPVYLRQTANDLTGEHTCVMLNVLE--GRESNWLVEFAK---------------------DFRKRFLSLLSYD-FHKFTAVQALSVIESSKKAQDLSDDEKHDNKELTRTHLDDIFSPFDLKRLDSYSNNLLDYHVIGDMIPMLALLYFGDKMGDSVKLSSVQSAILLAIGLQRKNIDTIAKELNLPSNQTIAMFAKIMRKMSQYFRQLLSQSIEETLPNIKDDAIAEMDGEEIKNYNAAEALDQ-MEEDLEEAG----SEAVQAMREKQKELINSLNLDKYAINDNSEEWAESQKSLEIAAKAKGVVSLKTGKKRTTEKAED-IYRQEMKA-MKKPRKSKK');
+            is($hsp->homology_string
+            # note: the reason this is not the same percent id above
+            # is we are calculating average percent id
+            is(sprintf("%.2f",$hsp->get_aln->overall_percentage_identity()),
+               '51.77');
+            is(sprintf("%.2f",$hsp->get_aln->average_percentage_identity()),
+               '58.41');
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+is($result->hits, 58);
+# test for MarkW bug in blastN
+
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','a_thaliana.blastn'));
+
+
+$result = $searchio->next_result;
+is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,or phase 0, 1 or 2 HTGS sequences) ');
+is($result->database_letters, 4677375331);
+is($result->database_entries, 1083200);
+is($result->algorithm, 'BLASTN');
+like($result->algorithm_version, qr/^2\.2\.1/);
+is($result->query_name, '');
+is($result->query_length, 60);
+is($result->get_parameter('gapopen'), 5);
+is($result->get_parameter('gapext'), 2);
+is($result->get_parameter('ktup'), undef);
+
+is($result->get_statistic('lambda'), 1.37);
+is($result->get_statistic('kappa'), 0.711);
+is($result->get_statistic('entropy'),1.31 );
+is($result->get_statistic('T'), 0);
+is($result->get_statistic('A'), 30);
+is($result->get_statistic('X1'), '6');
+is($result->get_statistic('X1_bits'), 11.9);
+is($result->get_statistic('X2'), 15);
+is($result->get_statistic('X2_bits'), 29.7);
+is($result->get_statistic('S1'), 12);
+is($result->get_statistic('S1_bits'), 24.3);
+is($result->get_statistic('S2'), 17);
+is($result->get_statistic('S2_bits'), 34.2);
+
+is($result->get_statistic('dbentries'), 1083200);
+
+ at valid = ( [ 'gb|AY052359.1|', 2826, 'AY052359', '3e-18', 96, 1, 60, 
+	     '1.0000'],
+	   [ 'gb|AC002329.2|AC002329', 76170, 'AC002329', '3e-18', 96, 1, 60, 
+	     '1.0000' ],
+	   [ 'gb|AF132318.1|AF132318', 5383, 'AF132318', '0.04', 42, 35, 55, 
+	     '0.3500']);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+    is($hit->start, shift @$d);
+    is($hit->end,shift @$d);    
+    is(sprintf("%.4f",$hit->frac_aligned_query), shift @$d);
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1);
+            is($hsp->query->end, 60);
+            is($hsp->query->strand, 1);
+            is($hsp->hit->start, 154);
+            is($hsp->hit->end, 212);
+            is($hsp->hit->strand, 1);
+            is($hsp->length('hsp'), 60);	    
+            is(sprintf("%g",$hsp->evalue), sprintf("%g",'3e-18'));
+            is($hsp->score, 48);
+            is($hsp->bits,95.6);
+            is(sprintf("%.2f",$hsp->percent_identity), 96.67);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9667);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9831);
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps('query'), 0);
+            is($hsp->gaps('hit'), 1);
+            is($hsp->gaps, 1);	    
+            is($hsp->query_string, 'aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat');
+            is($hsp->hit_string, 'aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat');
+            is($hsp->homology_string, '|||||||||||||||||||||||  |||||||||||||||||||||||||||||||||||');
+            is(sprintf("%.2f",$hsp->get_aln->overall_percentage_identity), 96.67);
+            is(sprintf("%.2f",$hsp->get_aln->percentage_identity), 98.31);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+} 
+is(@valid, 0);
+
+#WU-BlastX test
+
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','dnaEbsub_ecoli.wublastx'));
+
+$result = $searchio->next_result;
+is($result->database_name, 'ecoli.aa');
+is($result->database_letters, 1358990);
+is($result->database_entries, 4289);
+is($result->algorithm, 'BLASTX');
+like($result->algorithm_version, qr/^2\.0MP\-WashU/);
+is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
+is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
+is($result->query_accession, 'M10040.1');
+is($result->query_length, 2001);
+is($result->get_parameter('matrix'), 'blosum62');
+
+is($result->get_statistic('lambda'), 0.318);
+is($result->get_statistic('kappa'), 0.135);
+is($result->get_statistic('entropy'),0.401 );
+
+is($result->get_statistic('dbentries'), 4289);
+
+ at valid = ( [ 'gi|1789447|gb|AAC76102.1|', 581, 'AAC76102', '1.1e-74', 671]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+    is(sprintf("%.4f",$hit->frac_identical('query')), '0.3640');
+    is(sprintf("%.4f",$hit->frac_identical('hit')), '0.3660');
+    is(sprintf("%.4f",$hit->frac_conserved('query')), '0.5370');
+    is(sprintf("%.4f",$hit->frac_conserved('hit')), '0.5400');
+    is(sprintf("%.4f",$hit->frac_aligned_query), '0.6200');
+    is(sprintf("%.4f",$hit->frac_aligned_hit), '0.7100');
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 21);
+            is($hsp->query->end, 1265);
+            is($hsp->query->strand, 1);
+            is($hsp->hit->start, 1);
+            is($hsp->hit->end, 413);
+            is($hsp->hit->strand, 0);
+            is($hsp->length('hsp'), 421);	    
+            is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.1e-74'));
+            is(sprintf("%g",$hsp->pvalue), sprintf("%g",'1.1e-74'));
+            is($hsp->score,671);
+            is($hsp->bits,265.8);
+            is(sprintf("%.2f",$hsp->percent_identity), 35.87);
+            
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);
+            is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.5373);
+            is(sprintf("%.2f",$hsp->frac_conserved('hit')), 0.54);
+            
+            is(sprintf("%.4f",$hsp->frac_identical('hsp')), 0.3587);
+            is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
+            
+            is($hsp->query->frame(), 2);
+            is($hsp->hit->frame(), 0);
+            is($hsp->gaps('query'), 6);
+            is($hsp->gaps('hit'), 8);
+            is($hsp->gaps, 14);	    
+            is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
+            is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
+            is($hsp->homology_string, 'M  RIP   ++ +    DIV++I   V+LKKQG+N+   CPFH E TPSF+V+ +KQ +HCFGCGA GN   FL   +   F E+V  LA  + ++ P +    +G+ P   E    Q + +  + L  FY   L        A  YL  RG + E+I  F IG+A   WD + K       +   +  AG+L+  + G  Y DRFR RVMFPI D  G V+ F GR LG+  PKY+NSPET +FHK + LY  Y+A+    +  R ++ EG+ DV       +  ++A++GTS T DH+++L R    +I CYD D+AG +A  +A E        G ++R   +PDG DPD  ++K G E F+  + + ++ + AF         +LS    R       L  IS + G   + +Y++Q');
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+#Trickier WU-Blast
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','tricky.wublast'));
+$result = $searchio->next_result;
+my $hits_left = 1;
+while (my $hit = $result->next_hit) {
+	# frac_aligned_hit used to be over 1, frac_identical & frac_conserved are still too wrong
+	#is(sprintf("%.3f",$hit->frac_identical), '0.852');
+    #is(sprintf("%.3f",$hit->frac_conserved), '1.599');
+    is(sprintf("%.2f",$hit->frac_aligned_query), '0.92');
+    is(sprintf("%.2f",$hit->frac_aligned_hit), '0.91');
+    $hits_left--;
+}
+is($hits_left, 0);
+
+# More frac_ method testing, this time on ncbi blastn
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','frac_problems.blast'));
+my @expected = ("1.000", "0.943");
+while (my $result = $searchio->next_result) {
+    my $hit = $result->next_hit;
+    is($hit->frac_identical, shift @expected);
+}
+is(@expected, 0);
+
+#WU-TBlastN test
+
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','dnaEbsub_ecoli.wutblastn'));
+
+$result = $searchio->next_result;
+is($result->database_name, 'ecoli.nt');
+is($result->database_letters, 4662239);
+is($result->database_entries, 400);
+is($result->algorithm, 'TBLASTN');
+like($result->algorithm_version, qr/^2\.0MP\-WashU/);
+is($result->query_name, 'gi|142865|gb|AAA22406.1|');
+is($result->query_description, 'DNA primase');
+is($result->query_accession, 'AAA22406.1');
+is($result->query_length, 603);
+is($result->get_parameter('matrix'), 'blosum62');
+
+is($result->get_statistic('lambda'), '0.320');
+is($result->get_statistic('kappa'), 0.136);
+is($result->get_statistic('entropy'),0.387 );
+
+is($result->get_statistic('dbentries'), 400);
+
+ at valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '1.4e-73', 671]);
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 1);
+            is($hsp->query->end, 415);
+            is($hsp->query->strand, 0);
+            is($hsp->hit->start, 4778);
+            is($hsp->hit->end, 6016);
+            is($hsp->hit->strand, 1);
+            is($hsp->length('hsp'), 421);	    
+            cmp_ok($hsp->evalue,'==',1.4e-73);
+            cmp_ok($hsp->pvalue,'==',1.4e-73);
+            is($hsp->score,671);
+            is($hsp->bits,265.8);
+            is(sprintf("%.2f",$hsp->percent_identity), 35.87);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);	    
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
+            is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
+            is($hsp->query->frame(), 0);
+            is($hsp->hit->frame(), 1);
+            is($hsp->gaps('query'), 6);
+            is($hsp->gaps('hit'), 8);
+            is($hsp->gaps, 14);	    
+            is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
+            is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
+            is($hsp->homology_string, 'M  RIP   ++ +    DIV++I   V+LKKQG+N+   CPFH E TPSF+V+ +KQ +HCFGCGA GN   FL   +   F E+V  LA  + ++ P +    +G+ P   E    Q + +  + L  FY   L        A  YL  RG + E+I  F IG+A   WD + K       +   +  AG+L+  + G  Y DRFR RVMFPI D  G V+ F GR LG+  PKY+NSPET +FHK + LY  Y+A+    +  R ++ EG+ DV       +  ++A++GTS T DH+++L R    +I CYD D+AG +A  +A E        G ++R   +PDG DPD  ++K G E F+  + + ++ + AF         +LS    R       L  IS + G   + +Y++Q');
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is($count, 1);
+
+# WU-BLAST TBLASTX
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data','dnaEbsub_ecoli.wutblastx'));
+
+$result = $searchio->next_result;
+is($result->database_name, 'ecoli.nt');
+is($result->database_letters, 4662239);
+is($result->database_entries, 400);
+is($result->algorithm, 'TBLASTX');
+like($result->algorithm_version, qr/^2\.0MP\-WashU/);
+is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
+is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
+is($result->query_accession, 'M10040.1');
+is($result->query_length, 2001);
+is($result->get_parameter('matrix'), 'blosum62');
+
+is($result->get_statistic('lambda'), 0.318);
+is($result->get_statistic('kappa'), 0.135);
+is($result->get_statistic('entropy'),0.401 );
+is($result->get_statistic('dbentries'), 400);
+
+ at valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '6.4e-70', 318],
+	   [ 'gi|2367383|gb|AE000509.1|AE000509', 10589, 'AE000509', '0.9992', 59]
+	   );
+$count = 0;
+
+while( my $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    # using e here to deal with 0.9992 coming out right here as well
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hspcounter = 0;
+        while( my $hsp = $hit->next_hsp ) {
+            $hspcounter++;
+            if( $hspcounter == 3 ) {
+                # let's actually look at the 3rd HSP
+                is($hsp->query->start, 441);
+                is($hsp->query->end, 617);
+                is($hsp->query->strand, 1);
+                is($hsp->hit->start, 5192);
+                is($hsp->hit->end, 5368);
+                is($hsp->hit->strand, 1);
+                is($hsp->length('hsp'), 59);	    
+                cmp_ok($hsp->evalue,'==',6.4e-70);
+                cmp_ok($hsp->pvalue,'==',6.4e-70);
+                is($hsp->score,85);
+                is($hsp->bits,41.8);
+                is(sprintf("%.2f",$hsp->percent_identity), '32.20');
+                is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.322);
+                is(sprintf("%.3f",$hsp->frac_identical('query')), 0.322);
+                is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.4746);
+                is($hsp->query->frame(), 2);
+                is($hsp->hit->frame(), 1);
+                is($hsp->gaps('query'), 0);
+                is($hsp->gaps('hit'), 0);
+                is($hsp->gaps, 0);	    
+                is($hsp->query_string, 'ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGY');
+                is($hsp->hit_string, 'ARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY');
+                is($hsp->homology_string, 'A  YL  RG + E+I  F IG+A   WD + K       +   +  AG+L+  + G  Y');
+                last;
+            }
+        }
+        is($hspcounter, 3);
+    }
+    elsif( $count == 1 ) {
+        my $hsps_to_do = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 587);
+            is($hsp->query->end, 706);
+            is($hsp->query->strand, -1);
+            is($hsp->hit->start, 4108);
+            is($hsp->hit->end, 4227);
+            is($hsp->hit->strand, -1);
+            is($hsp->length('hsp'), 40);	    
+            is($hsp->evalue , 7.1);
+            is($hsp->pvalue , '1.00');
+            is($hsp->score,59);
+            is($hsp->bits,29.9);
+            is(sprintf("%.2f",$hsp->percent_identity), '37.50');
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.3750');
+            is(sprintf("%.4f",$hsp->frac_identical('query')), '0.3750');
+            is(sprintf("%.4f",$hsp->frac_conserved('hsp')), '0.4750');
+            is($hsp->query->frame(), 2);
+            is($hsp->hit->frame(), 2);
+            is($hsp->gaps('query'), 0);
+            is($hsp->gaps('hit'), 0);
+            is($hsp->gaps, 0);
+            is($hsp->query_string, 'WLPRALPEKATTAP**SWIGNMTRFLKRSKYPLPSSRLIR');
+            is($hsp->hit_string, 'WLSRTTVGSSTVSPRTFWITRMKVKLSSSKVTLPSTKSTR');
+            is($hsp->homology_string, 'WL R     +T +P   WI  M   L  SK  LPS++  R');
+            $hsps_to_do--;
+            last;
+        }
+        is($hsps_to_do, 0);
+    }
+    last if( $count++ > @valid );
+}
+is($count, 2);
+
+# Do a multiblast report test
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','multi_blast.bls'));
+
+ at expected = qw(CATH_RAT CATL_HUMAN CATL_RAT PAPA_CARPA);
+my $results_left = 4;
+while( my $result = $searchio->next_result ) {
+    is($result->query_name, shift @expected, "Multiblast query test");
+    $results_left--;
+}
+is($results_left, 0);
+
+# Test GCGBlast parsing
+
+$searchio = new Bio::SearchIO('-format' => 'blast',
+			      '-file'   => Bio::Root::IO->catfile('t','data', 'test.gcgblast'));
+$result = $searchio->next_result();
+
+is($result->query_name, '/v0/people/staji002/test.gcg');
+is($result->algorithm, 'BLASTP');
+is($result->algorithm_version, '2.2.1 [Apr-13-2001]');
+is($result->database_name, 'pir');
+is($result->database_entries, 274514);
+is($result->database_letters, 93460074);
+
+$hit = $result->next_hit;
+is($hit->name, 'PIR2:S44629');
+is($hit->length, 628);
+is($hit->accession, 'PIR2:S44629');
+
+#TODO: {
+	#skip('Significance parsing and raw score parsing broken for GCG-BLAST Hits -- see HSP',2);
+	is($hit->significance, '2e-08' );
+	is($hit->raw_score, 57 );
+#}
+
+$hsp = $hit->next_hsp;
+cmp_ok($hsp->evalue, '==', 2e-08);
+is($hsp->bits, '57.0');
+is($hsp->score, 136);
+is(int($hsp->percent_identity), 28);
+is(sprintf("%.2f",$hsp->frac_identical('query')), 0.29);
+is($hsp->frac_conserved('total'), 69/135);
+is($hsp->gaps('total'), 8);
+is($hsp->gaps('hit'), 6);
+is($hsp->gaps('query'), 2);
+
+is($hsp->hit->start, 342);
+is($hsp->hit->end, 470);
+is($hsp->query->start, 3);
+is($hsp->query->end, 135);
+
+is($hsp->query_string, 'CAAEFDFMEKETPLRYTKTXXXXXXXXXXXXXXRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGW--LFCKVHPGAIVFVILAAMSIQGSANLQTQ');
+is($hsp->hit_string, 'CSAEFDFIQYSTIEKLCGTLLIPLALISLVTFVFNFVKNT-NLLWRNSEEIG----ENGEILYNVVQLCCSTVMAFLIMRLKLFMTPHLCIVAALFANSKLLGGDRISKTIRVSALVGVI-AILFYRGIPNIRQQ');
+is($hsp->homology_string, 'C+AEFDF++  T  +   T                 + +   +L +    +     ++GE++Y+ +QL   T +  LIMRLKLF+TP++C++A+L  + +L G   +   +   A+V VI A +  +G  N++ Q');
+
+
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','HUMBETGLOA.tblastx'));
+
+$result = $searchio->next_result;
+
+isa_ok($result,'Bio::Search::Result::ResultI');
+$hit = $result->next_hit;
+is($hit->accession, 'AE000479');
+is($hit->bits, 33.6);
+$hsp = $hit->next_hsp;
+is($hit->hsp->bits,$hsp->bits);
+
+isa_ok($hsp->get_aln,'Bio::Align::AlignI');
+my $writer = Bio::SearchIO::Writer::HitTableWriter->new( 
+                                  -columns => [qw(
+                                                  query_name
+                                                  query_length
+                                                  hit_name
+                                                  hit_length
+						  bits
+						  score
+                                                  frac_identical_query
+                                                  expect
+                                                  )]  );
+
+my $out = new Bio::SearchIO(-writer => $writer,
+			 -file   => ">searchio.out");
+$out->write_result($result, 1);
+ok(-e 'searchio.out');
+my $writerhtml = new Bio::SearchIO::Writer::HTMLResultWriter();
+my $outhtml = new Bio::SearchIO(-writer => $writerhtml,
+				-file   => ">searchio.html");
+$outhtml->write_result($result, 1);
+ok(-e "searchio.html");
+
+unlink 'searchio.out';
+unlink 'searchio.html';
+
+#test all the database accession number formats
+$searchio = new Bio::SearchIO(-format => 'blast',
+				 -file   => File::Spec->catfile(qw(t data testdbaccnums.out)) );
+$result = $searchio->next_result;
+
+ at valid = ( ['pir||T14789','T14789','T14789','CAB53709','AAH01726'],
+	   ['gb|NP_065733.1|CYT19', 'NP_065733','CYT19'],
+	   ['emb|XP_053690.4|Cyt19','XP_053690'],
+	   ['dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
+	   ['prf||XP_064862.2','XP_064862'],
+	   ['pdb|BAB13968.1|1','BAB13968'],
+	   ['sp|Q16478|GLK5_HUMAN','Q16478'],
+	   ['pat|US|NP_002079.2','NP_002079'],
+	   ['bbs|NP_079463.2|','NP_079463'],
+	   ['gnl|db1|NP_002444.1','NP_002444'],
+	   ['ref|XP_051877.1|','XP_051877'],
+	   ['lcl|AAH16829.1|','AAH16829'],
+	   ['gi|1|gb|NP_065733.1|CYT19','NP_065733'],
+	   ['gi|2|emb|XP_053690.4|Cyt19','XP_053690'],
+	   ['gi|3|dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
+	   ['gi|4|pir||T14789','T14789'],
+	   ['gi|5|prf||XP_064862.2','XP_064862'],
+	   ['gi|6|pdb|BAB13968.1|1','BAB13968'],
+	   ['gi|7|sp|Q16478|GLK5_HUMAN','Q16478'],
+	   ['gi|8|pat|US|NP_002079.2','NP_002079'],
+	   ['gi|9|bbs|NP_079463.2|','NP_079463'],
+	   ['gi|10|gnl|db1|NP_002444.1','NP_002444'],
+	   ['gi|11|ref|XP_051877.1|','XP_051877'],
+	   ['gi|12|lcl|AAH16829.1|','AAH16829'],
+	   ['MY_test_ID','MY_test_ID']
+	   );
+
+$hit = $result->next_hit;
+my $d = shift @valid;
+is($hit->name, shift @$d);
+is($hit->accession, shift @$d);
+my @accnums = $hit->each_accession_number;
+foreach my $a (@accnums) {
+	is($a, shift @$d);
+}
+$d = shift @valid;
+$hit = $result->next_hit;
+is($hit->name, shift @$d);
+is($hit->accession, shift @$d);
+is($hit->locus, shift @$d);
+
+$hits_left = 23;
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+    is($hit->name, shift @$d);
+    is($hit->accession, shift @$d);
+    $hits_left--;
+}
+is($hits_left, 0);
+
+# Parse MEGABLAST
+
+# parse the BLAST-like output
+my $infile = Bio::Root::IO->catfile(qw(t data 503384.MEGABLAST.2));
+my $in = new Bio::SearchIO(-file => $infile,
+			   -format => 'blast'); # this is megablast 
+                                                # blast-like output
+my $r = $in->next_result;
+my @dcompare = ( ['Contig3700', 5631, 785, '0.0', 785, '0.0', 396, 639, 12, 
+		  8723,9434, 1, 4083, 4794, -1],
+                 ['Contig3997', 12734, 664, '0.0', 664, '0.0', 335, 401, 0, 
+		  1282, 1704, 1, 1546, 1968,-1 ],
+                 ['Contig634', 858, 486, '1e-136', 486, '1e-136', 245, 304, 3, 
+		  7620, 7941, 1, 1, 321, -1],
+                 ['Contig1853', 2314, 339, '1e-91',339, '1e-91', 171, 204, 0,
+		  6406, 6620, 1, 1691, 1905, 1]
+    );
+
+is($r->query_name, '503384');
+is($r->query_description, '11337 bp 2 contigs');
+is($r->query_length, 11337);
+is($r->database_name, 'cneoA.nt ');
+is($r->database_letters, 17206226);
+is($r->database_entries, 4935);
+
+$hits_left = 4;
+while( my $hit = $r->next_hit ) {
+    my $d = shift @dcompare;
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->raw_score, shift @$d);
+    is($hit->significance, shift @$d);
+    
+    my $hsp = $hit->next_hsp;
+    is($hsp->bits, shift @$d);
+    cmp_ok($hsp->evalue, '==', shift @$d);
+    is($hsp->score, shift @$d);
+    is($hsp->num_identical, shift @$d);
+    is($hsp->gaps('total'), shift @$d);
+    is($hsp->query->start, shift @$d);
+    is($hsp->query->end, shift @$d);
+    is($hsp->query->strand, shift @$d);
+    is($hsp->hit->start, shift @$d);
+    is($hsp->hit->end, shift @$d);
+    is($hsp->hit->strand, shift @$d);
+    $hits_left--;
+}
+is($hits_left, 0);
+
+# parse another megablast format
+
+$infile =  Bio::Root::IO->catfile(qw(t data 503384.MEGABLAST.0));
+
+# this is megablast output type 0 
+$in = new Bio::SearchIO(-file          => $infile,
+			-report_format => 0,
+			-format        => 'megablast'); 
+$r = $in->next_result;
+ at dcompare = ( 
+	      ['Contig634', 7620, 7941, 1, 1, 321, -1],
+	      ['Contig1853', 6406, 6620, 1, 1691, 1905, 1],  
+	      ['Contig3700', 8723,9434, 1, 4083, 4794, -1],
+	      ['Contig3997', 1282, 1704, 1, 1546, 1968,-1 ],
+	      );
+
+is($r->query_name, '503384');
+
+while( my $hit = $r->next_hit ) {
+    my $d = shift @dcompare;
+    is($hit->name, shift @$d);
+    my $hsp = $hit->next_hsp;
+    is($hsp->query->start, shift @$d);
+    is($hsp->query->end, shift @$d);
+    is($hsp->query->strand, shift @$d);
+    is($hsp->hit->start, shift @$d);
+    is($hsp->hit->end, shift @$d);
+    is($hsp->hit->strand, shift @$d);
+}
+is(@dcompare, 0);
+
+
+# Let's test RPS-BLAST
+
+my $parser = new Bio::SearchIO(-format => 'blast',
+			       -file   => Bio::Root::IO->catfile(qw(t data ecoli_domains.rpsblast)));
+
+$r = $parser->next_result;
+is($r->query_name, 'gi|1786183|gb|AAC73113.1|');
+is($r->num_hits, 7);
+$hit = $r->next_hit;
+is($hit->name, 'gnl|CDD|3919');
+is($hit->significance, 0.064);
+is($hit->raw_score, 28);
+$hsp = $hit->next_hsp;
+is($hsp->query->start, 599);
+is($hsp->query->end,655);
+is($hsp->hit->start,23);
+is($hsp->hit->end,76);
+
+
+# Test PSI-BLAST parsing
+
+$searchio = new Bio::SearchIO ('-format' => 'blast',
+			       '-file'   => Bio::Root::IO->catfile('t','data','psiblastreport.out'));
+
+$result = $searchio->next_result;
+
+is($result->database_name, '/home/peter/blast/data/swissprot.pr');
+is($result->database_entries, 88780);
+is($result->database_letters, 31984247);
+
+is($result->algorithm, 'BLASTP');
+like($result->algorithm_version, qr/^2\.0\.14/);
+is($result->query_name, 'CYS1_DICDI');
+is($result->query_length, 343);
+is($result->get_statistic('kappa') , 0.0491);
+cmp_ok($result->get_statistic('lambda'), '==', 0.270);
+cmp_ok($result->get_statistic('entropy'), '==', 0.230);
+is($result->get_statistic('dbletters'), 31984247);
+is($result->get_statistic('dbentries'), 88780);
+is($result->get_statistic('effective_hsplength'), 49);
+is($result->get_statistic('effectivespace'), 8124403938);
+is($result->get_parameter('matrix'), 'BLOSUM62');
+is($result->get_parameter('gapopen'), 11);
+is($result->get_parameter('gapext'), 1);
+
+my @valid_hit_data = ( [ 'sp|P04988|CYS1_DICDI', 343, 'P04988', '0', 721],
+		       [ 'sp|P43295|A494_ARATH', 313, 'P43295', '1e-75', 281],
+		       [ 'sp|P25804|CYSP_PEA', 363, 'P25804', '1e-74', 278]);
+my @valid_iter_data = ( [ 127, 127, 0, 109, 18, 0, 0, 0, 0],
+			[ 157, 40, 117, 2, 38, 0, 109, 3, 5]);
+my $iter_count = 0;
+while( $iter = $result->next_iteration ) {
+    $iter_count++;
+    my $di = shift @valid_iter_data;
+    is($iter->number, $iter_count);
+
+    is($iter->num_hits, shift @$di);
+    is($iter->num_hits_new, shift @$di);
+    is($iter->num_hits_old, shift @$di);
+    is(scalar($iter->newhits_below_threshold), shift @$di);
+    is(scalar($iter->newhits_not_below_threshold), shift @$di);
+    is(scalar($iter->newhits_unclassified), shift @$di);
+    is(scalar($iter->oldhits_below_threshold), shift @$di);
+    is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
+    is(scalar($iter->oldhits_not_below_threshold), shift @$di);
+
+    my $hit_count = 0;
+    if ($iter_count == 1) {
+        while( $hit = $result->next_hit ) {
+            my $d = shift @valid_hit_data;
+            
+            is($hit->name, shift @$d);
+            is($hit->length, shift @$d);
+            is($hit->accession, shift @$d);
+            is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+            is($hit->bits, shift @$d );
+            
+            if( $hit_count == 1 ) {
+                my $hsps_left = 1;
+                while( my $hsp = $hit->next_hsp ){
+                    is($hsp->query->start, 32);
+                    is($hsp->query->end, 340);
+                    is($hsp->hit->start, 3);
+                    is($hsp->hit->end, 307);
+                    is($hsp->length('hsp'), 316);
+                    is($hsp->start('hit'), $hsp->hit->start);
+                    is($hsp->end('query'), $hsp->query->end);
+                    is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
+                    cmp_ok($hsp->evalue, '==', 1e-75);
+                    is($hsp->score, 712);
+                    is($hsp->bits, 281);
+                    is(sprintf("%.1f",$hsp->percent_identity), 46.5);
+                    is(sprintf("%.4f",$hsp->frac_identical('query')), 0.4757);
+                    is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.482);
+                    is($hsp->gaps, 18);
+                    $hsps_left--;
+                }
+                is($hsps_left, 0);
+            }
+            last if( $hit_count++ > @valid_hit_data );
+        }
+    }
+}
+is(@valid_hit_data, 0);
+is(@valid_iter_data, 0);
+
+# Test filtering
+
+$searchio = new Bio::SearchIO ( '-format' => 'blast', 
+                                '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.bls'),
+                                '-signif' => 1e-100);
+
+ at valid = qw(gb|AAC73113.1|);
+$r = $searchio->next_result;
+
+while( my $hit = $r->next_hit ) {
+    is($hit->name, shift @valid);
+}
+
+$searchio = new Bio::SearchIO ( '-format' => 'blast', 
+                                '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.bls'),
+                                '-score' => 100);
+
+ at valid = qw(gb|AAC73113.1| gb|AAC76922.1| gb|AAC76994.1|);
+$r = $searchio->next_result;
+
+while( my $hit = $r->next_hit ) {
+    is($hit->name, shift @valid);
+}
+is(@valid, 0);
+
+$searchio = new Bio::SearchIO ( '-format' => 'blast', 
+                                '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.bls'),
+                                '-bits' => 200);
+
+ at valid = qw(gb|AAC73113.1| gb|AAC76922.1|);
+$r = $searchio->next_result;
+
+while( my $hit = $r->next_hit ) {
+    is($hit->name, shift @valid);
+}
+is(@valid, 0);
+
+
+my $filt_func = sub{ my $hit=shift; 
+                     $hit->frac_identical('query') >= 0.31
+                     };
+
+$searchio = new Bio::SearchIO ( '-format' => 'blast', 
+                                '-file'   => Bio::Root::IO->catfile('t','data','ecolitst.bls'),
+                                '-hit_filter' => $filt_func);
+
+ at valid = qw(gb|AAC73113.1| gb|AAC76994.1|);
+$r = $searchio->next_result;
+
+while( my $hit = $r->next_hit ) {
+    is($hit->name, shift @valid);
+}
+is(@valid, 0);
+
+
+
+
+# bl2seq parsing testing
+
+# this is blastp bl2seq
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile(qw(t data
+								   bl2seq.out)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, '');
+is($result->algorithm, 'BLASTP');
+$hit = $result->next_hit;
+is($hit->name, 'ALEU_HORVU');
+is($hit->length, 362);
+$hsp = $hit->next_hsp;
+is($hsp->score, 481);
+is($hsp->bits, 191);
+is(int $hsp->percent_identity, 34);
+cmp_ok($hsp->evalue, '==', 2e-53);
+is(int($hsp->frac_conserved*$hsp->length), 167);
+is($hsp->query->start, 28);
+is($hsp->query->end, 343);
+is($hsp->hit->start, 60);
+is($hsp->hit->end,360);
+is($hsp->gaps, 27);
+
+# this is blastn bl2seq 
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data bl2seq.blastn.rev)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, '');
+is($result->algorithm, 'BLASTN');
+is($result->query_length, 180);
+$hit = $result->next_hit;
+is($hit->length, 179);
+is($hit->name, 'human');
+$hsp = $hit->next_hsp;
+is($hsp->score, 27);
+is($hsp->bits, '54.0');
+is(int $hsp->percent_identity, 88);
+cmp_ok($hsp->evalue, '==', 2e-12);
+is(int($hsp->frac_conserved*$hsp->length), 83);
+is($hsp->query->start, 94);
+is($hsp->query->end, 180);
+is($hsp->query->strand, 1);
+is($hsp->hit->strand, -1);
+is($hsp->hit->start, 1);
+is($hsp->hit->end,94);
+is($hsp->gaps, 7);
+
+# this is blastn bl2seq 
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data bl2seq.blastn)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, '');
+is($result->query_length, 180);
+is($result->algorithm, 'BLASTN');
+$hit = $result->next_hit;
+is($hit->name, 'human');
+is($hit->length, 179);
+$hsp = $hit->next_hsp;
+is($hsp->score, 27);
+is($hsp->bits, '54.0');
+is(int $hsp->percent_identity, 88);
+cmp_ok($hsp->evalue,'==', 2e-12);
+is(int($hsp->frac_conserved*$hsp->length), 83);
+is($hsp->query->start, 94);
+is($hsp->query->end, 180);
+is($hsp->query->strand, 1);
+is($hsp->hit->strand, 1);
+is($hsp->hit->start, 86);
+is($hsp->hit->end,179);
+is($hsp->gaps, 7);
+
+# this is blastp bl2seq
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data bl2seq.bug940.out)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, 'zinc');
+is($result->algorithm, 'BLASTP');
+is($result->query_description, 'finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
+is($result->query_length, 469);
+$hit = $result->next_hit;
+is($hit->name, 'gi|4507985|');
+is($hit->description,'zinc finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
+is($hit->length, 469);
+$hsp = $hit->next_hsp;
+is($hsp->score, 1626);
+is($hsp->bits, 637);
+is(int $hsp->percent_identity, 66);
+cmp_ok($hsp->evalue, '==', 0.0);
+is(int($hsp->frac_conserved*$hsp->length), 330);
+is($hsp->query->start, 121);
+is($hsp->query->end, 469);
+is($hsp->hit->start, 1);
+is($hsp->hit->end,469);
+is($hsp->gaps, 120);
+ok($hit->next_hsp); # there is more than one HSP here, 
+                    # make sure it is parsed at least
+
+# cannot distinguish between blastx and tblastn reports
+# so we're only testing a blastx report for now
+
+# this is blastx bl2seq
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data bl2seq.blastx.out)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, 'AE000111.1');
+is($result->query_description, 'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
+is($result->algorithm, 'BLASTX');
+is($result->query_length, 720);
+$hit = $result->next_hit;
+is($hit->name, 'AK1H_ECOLI');
+is($hit->description,'P00561 Bifunctional aspartokinase/homoserine dehydrogenase I (AKI-HDI) [Includes: Aspartokinase I ; Homoserine dehydrogenase I ]');
+is($hit->length, 820);
+$hsp = $hit->next_hsp;
+is($hsp->score, 634);
+is($hsp->bits, 248);
+is(int $hsp->percent_identity, 100);
+cmp_ok($hsp->evalue, '==' ,2e-70);
+is(int($hsp->frac_conserved*$hsp->length), 128);
+is($hsp->query->start, 1);
+is($hsp->query->end, 384);
+is($hsp->hit->start, 1);
+is($hsp->hit->end,128);
+is($hsp->gaps, 0);
+is($hsp->query->frame,0);
+is($hsp->hit->frame,0);
+is($hsp->query->strand,-1);
+is($hsp->hit->strand,0);
+
+# this is tblastx bl2seq (self against self)
+$searchio = new Bio::SearchIO(-format => 'blast',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data bl2seq.tblastx.out)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->query_name, 'Escherichia');
+is($result->algorithm, 'TBLASTX');
+is($result->query_description, 'coli K-12 MG1655 section 1 of 400 of the complete genome');
+is($result->query_length, 720);
+$hit = $result->next_hit;
+is($hit->name, 'gi|1786181|gb|AE000111.1|AE000111');
+
+is($hit->description,'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
+is($hit->length, 720);
+$hsp = $hit->next_hsp;
+is($hsp->score, 1118);
+is($hsp->bits, 515);
+is(int $hsp->percent_identity, 95);
+cmp_ok($hsp->evalue, '==', 1e-151);
+is(int($hsp->frac_conserved*$hsp->length), 229);
+is($hsp->query->start, 1);
+is($hsp->query->end, 720);
+is($hsp->hit->start, 1);
+is($hsp->hit->end,720);
+is($hsp->gaps, 0);
+is($hsp->query->frame,0);
+is($hsp->hit->frame,0);
+is($hsp->query->strand,1);
+is($hsp->hit->strand,1);
+
+# this is NCBI tblastn
+$searchio = new Bio::SearchIO(-format => 'blast',
+										-file   => Bio::Root::IO->catfile
+										(qw(t data tblastn.out)));
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+is($result->algorithm, 'TBLASTN');
+$hit = $result->next_hit;
+is($hit->name,'gi|10040111|emb|AL390796.6|AL390796');
+
+# test blasttable output
+my @eqset = qw( 
+		
+		c200-vs-yeast.BLASTN.m9);
+$searchio = new Bio::SearchIO(-file => Bio::Root::IO->catfile
+			      (qw(t data c200-vs-yeast.BLASTN)),
+			      -format => 'blast');
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+my %ref = &result2hash($result);
+is( scalar keys %ref, 67);
+$searchio = new Bio::SearchIO(-file => Bio::Root::IO->catfile
+			      (qw(t data c200-vs-yeast.BLASTN.m8)),
+			      -program_name => 'BLASTN',
+			      -format => 'blasttable');
+$result = $searchio->next_result;
+my %tester = &result2hash($result);
+is( scalar keys %tester, 67);
+foreach my $key ( sort keys %ref ) {
+    is($tester{$key}, $ref{$key},$key);
+}      
+
+# Test Blast parsing with B=0 (WU-BLAST)
+$searchio = new Bio::SearchIO(-file   => Bio::Root::IO->catfile
+			      (qw(t data no_hsps.blastp)),
+			      -format => 'blast');
+$result = $searchio->next_result;
+is($result->query_name, 'mgri:MG00189.3');
+$hit = $result->next_hit;
+is($hit->name, 'mgri:MG00189.3');
+is($hit->description, 'hypothetical protein 6892 8867 +');
+is($hit->score, 3098);
+is($hit->significance, '0.');
+
+$hit = $result->next_hit;
+is($hit->name, 'fgram:FG01141.1');
+is($hit->description, 'hypothetical protein 47007 48803 -');
+is($hit->score, 2182);
+is($hit->significance, '4.2e-226');
+is($result->num_hits, 415);
+# Let's now test if _guess_format is doing its job correctly
+my %pair = ( 'filename.blast'  => 'blast',
+	     'filename.bls'    => 'blast',
+	     'f.blx'           => 'blast',
+	     'f.tblx'          => 'blast',
+	     'fast.bls'        => 'blast',
+	     'f.fasta'         => 'fasta',
+	     'f.fa'            => 'fasta',
+	     'f.fx'            => 'fasta',
+	     'f.fy'            => 'fasta',
+	     'f.ssearch'       => 'fasta',
+	     'f.SSEARCH.m9'    => 'fasta',
+	     'f.m9'            => 'fasta',
+	     'f.psearch'       => 'fasta',
+	     'f.osearch'       => 'fasta',
+	     'f.exon'          => 'exonerate',
+	     'f.exonerate'     => 'exonerate',
+	     'f.blastxml'      => 'blastxml',
+	     'f.xml'           => 'blastxml');
+while( my ($file,$expformat) = each %pair ) {
+    is(Bio::SearchIO->_guess_format($file),$expformat, "$expformat for $file");
+}
+
+
+# Test Wes Barris's reported bug when parsing blastcl3 output which
+# has integer overflow
+
+$searchio = new Bio::SearchIO(-file => Bio::Root::IO->catfile
+			      (qw(t data hsinsulin.blastcl3.blastn)),
+			      -format => 'blast');
+$result = $searchio->next_result;
+is($result->query_name, 'human');
+is($result->database_letters(), '-24016349'); 
+# this is of course not the right length, but is the what blastcl3 
+# reports, the correct value is
+is($result->get_statistic('dbletters'),'192913178');
+is($result->get_statistic('dbentries'),'1867771');
+
+
+# test for links and groups being parsed out of WU-BLAST properly
+$searchio = Bio::SearchIO->new(-format => 'blast',
+			       -file   => Bio::Root::IO->catfile
+			       (qw(t data brassica_ATH.WUBLASTN) ));
+ok($result = $searchio->next_result);
+ok($hit = $result->next_hit);
+ok($hsp = $hit->next_hsp);
+is($hsp->links,'(1)-3-2');
+is($hsp->query->strand, 1);
+is($hsp->hit->strand, 1);
+is($hsp->hsp_group, '1');
+
+## Web blast result parsing
+
+$searchio = Bio::SearchIO->new(-format => 'blast',
+			       -file   => Bio::Root::IO->catfile
+			       (qw(t data catalase-webblast.BLASTP)));
+ok($result = $searchio->next_result);
+ok($hit = $result->next_hit);
+is($hit->name, 'gi|40747822|gb|EAA66978.1|', 'full hit name');
+is($hit->accession, 'EAA66978', 'hit accession');
+ok($hsp = $hit->next_hsp);
+is($hsp->query->start, 1, 'query start');
+is($hsp->query->end, 528, 'query start');
+
+# tests for new BLAST 2.2.13 output
+$searchio = new Bio::SearchIO(-format => 'blast',
+							  -file   => Bio::Root::IO->catfile
+							  (qw(t data new_blastn.txt)));
+
+$result = $searchio->next_result;
+is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS,GSS,environmental samples or phase 0, 1 or 2 HTGS sequences)');
+is($result->database_entries, 3742891);
+is($result->database_letters, 16670205594);
+is($result->algorithm, 'BLASTN');
+is($result->algorithm_version, '2.2.13 [Nov-27-2005]');
+is($result->query_name, 'pyrR,');
+is($result->query_length, 558);
+is($result->get_statistic('kappa'), '0.711');
+is($result->get_statistic('kappa_gapped'), '0.711');
+is($result->get_statistic('lambda'), '1.37');
+is($result->get_statistic('lambda_gapped'), '1.37');
+is($result->get_statistic('entropy'), '1.31');
+is($result->get_statistic('entropy_gapped'), '1.31');
+is($result->get_statistic('dbletters'), '-509663586');
+is($result->get_statistic('dbentries'), 3742891);
+is($result->get_statistic('effective_hsplength'), undef);
+is($result->get_statistic('effectivespace'), undef);
+is($result->get_parameter('matrix'), 'blastn matrix:1 -3');
+is($result->get_parameter('gapopen'), 5);
+is($result->get_parameter('gapext'), 2);
+is($result->get_statistic('S2'), '60');
+is($result->get_statistic('S2_bits'), '119.4');
+is($result->get_parameter('expect'), '1e-23');
+is($result->get_statistic('num_extensions'), '117843');
+
+
+ at valid = ( [ 'gi|41400296|gb|AE016958.1|', 4829781, 'AE016958', '6e-059', 236],
+	      [ 'gi|54013472|dbj|AP006618.1|', 6021225, 'AP006618', '4e-026', 127],
+	      [ 'gi|57546753|dbj|BA000030.2|', 9025608, 'BA000030', '1e-023', 119]);
+$count = 0;
+while( $hit = $result->next_hit ) {
+    my $d = shift @valid;
+
+    is($hit->name, shift @$d);
+    is($hit->length, shift @$d);
+    is($hit->accession, shift @$d);
+    is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
+    is($hit->raw_score, shift @$d );
+
+    if( $count == 0 ) {
+        my $hsps_left = 1;
+        while( my $hsp = $hit->next_hsp ) {
+            is($hsp->query->start, 262);
+            is($hsp->query->end, 552);
+            is($hsp->hit->start, 1166897);
+            is($hsp->hit->end, 1167187);
+            is($hsp->length('hsp'), 291);
+            is($hsp->start('hit'), $hsp->hit->start);
+            is($hsp->end('query'), $hsp->query->end);
+            is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
+            cmp_ok($hsp->evalue, '==', 6e-59);
+            is($hsp->score, 119);
+            is($hsp->bits,236);	    	    
+            is(sprintf("%.2f",$hsp->percent_identity), 85.22);
+            is(sprintf("%.4f",$hsp->frac_identical('query')), 0.8522);
+            is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.8522);
+            is($hsp->gaps, 0);
+            $hsps_left--;
+        }
+        is($hsps_left, 0);
+    }
+    last if( $count++ > @valid );
+}
+is(@valid, 0);
+
+# some utilities
+# a utility function for comparing result objects
+sub result2hash {
+    my ($result) = @_;
+    my %hash;
+    $hash{'query_name'} = $result->query_name;
+    my $hitcount = 1;
+    my $hspcount = 1;
+    foreach my $hit ( $result->hits ) {
+	$hash{"hit$hitcount\_name"}   =  $hit->name;
+	# only going to test order of magnitude
+	# too hard as these don't always match
+#	$hash{"hit$hitcount\_signif"} =  
+#	    ( sprintf("%.0e",$hit->significance) =~ /e\-?(\d+)/ );
+	$hash{"hit$hitcount\_bits"}   =  sprintf("%d",$hit->bits);
+
+	foreach my $hsp ( $hit->hsps ) {
+	    $hash{"hsp$hspcount\_bits"}   = sprintf("%d",$hsp->bits);
+	    # only going to test order of magnitude
+ 	    # too hard as these don't always match
+#	    $hash{"hsp$hspcount\_evalue"} =  
+#		( sprintf("%.0e",$hsp->evalue) =~ /e\-?(\d+)/ );
+	    $hash{"hsp$hspcount\_qs"}     = $hsp->query->start;
+	    $hash{"hsp$hspcount\_qe"}     = $hsp->query->end;
+	    $hash{"hsp$hspcount\_qstr"}   = $hsp->query->strand;
+	    $hash{"hsp$hspcount\_hs"}     = $hsp->hit->start;
+	    $hash{"hsp$hspcount\_he"}     = $hsp->hit->end;
+	    $hash{"hsp$hspcount\_hstr"}   = $hsp->hit->strand;
+
+	    #$hash{"hsp$hspcount\_pid"}     = sprintf("%d",$hsp->percent_identity);
+	    #$hash{"hsp$hspcount\_fid"}     = sprintf("%.2f",$hsp->frac_identical);
+	    $hash{"hsp$hspcount\_gaps"}    = $hsp->gaps('total');
+	    $hspcount++;
+	}
+	$hitcount++;
+    }
+    return %hash;
+}
+
+
+__END__
+
+Useful for debugging:
+
+    if ($iter_count == 3) {
+	print "NEWHITS:\n";
+	foreach ($iter->newhits) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nOLDHITS:\n";
+	foreach ($iter->oldhits) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nNEWHITS BELOW:\n";
+	foreach ($iter->newhits_below_threshold) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nNEWHITS NOT BELOW:\n";
+	foreach ($iter->newhits_not_below_threshold) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nNEWHITS UNCLASSIFIED:\n";
+	foreach ($iter->newhits_unclassified) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nOLDHITS BELOW:\n";
+	foreach ($iter->oldhits_below_threshold) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nOLDHITS NEWLY BELOW:\n";
+	foreach ($iter->oldhits_newly_below_threshold) {
+	    print "  " . $_->name . "\n";
+	}
+	print "\nOLDHITS NOT BELOW:\n";
+	foreach ($iter->oldhits_not_below_threshold) {
+	    print "  " . $_->name . "\n";
+	}
+    }
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Seg.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Seg.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Seg.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,43 @@
+# Bio::Tools::Seg test script
+
+use strict;
+use Test;
+
+BEGIN {	plan tests => 15 }
+
+use Bio::Tools::Seg;
+use File::Spec;
+
+ok my $infile = File::Spec->catfile(qw(t data seg.out));
+ok my $parser = Bio::Tools::Seg->new(-file=>$infile);
+
+my @feat;
+while ( my $feat = $parser->next_result ) {
+  push @feat, $feat;
+}
+
+ok @feat == 3;
+
+#>LBL_0012(32-46) complexity=2.47 (12/2.20/2.50)
+#gdggwtfegwggppe
+
+ok $feat[0]->seq_id, 'LBL_0012';
+ok $feat[0]->start,  32;
+ok $feat[0]->end,    46;
+ok $feat[0]->score,  2.47;
+
+#>LBL_0012(66-80) complexity=2.31 (12/2.20/2.50)
+#kfssrasakavakks
+
+ok $feat[1]->seq_id, 'LBL_0012';
+ok $feat[1]->start,  66;
+ok $feat[1]->end,    80;
+ok $feat[1]->score,  2.31;
+
+#>LBL_0012(123-138) complexity=2.31 (12/2.20/2.50)
+#svivsqsqgvvkgvgv
+
+ok $feat[2]->seq_id, 'LBL_0012';
+ok $feat[2]->start,  123;
+ok $feat[2]->end,    138;
+ok $feat[2]->score,  2.31;

Added: trunk/packages/bioperl/branches/upstream/current/t/Seq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Seq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Seq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,201 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Seq.t,v 1.29 2004/08/11 07:34:20 lapp Exp $
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 57;
+}
+
+use Bio::Seq;
+use Bio::Seq::RichSeq;
+use Bio::SeqFeature::Generic;
+use Bio::Species;
+use Bio::Annotation::SimpleValue;
+
+ok(1);
+
+ok my $seq = Bio::Seq->new(-seq=>'ACTGTGGCGTCAACT',
+                        -desc=>'Sample Bio::Seq object',
+			-alphabet => 'dna',
+                        -is_circular => 1
+                       );
+
+ok $seq->is_circular;
+ok not $seq->is_circular(0);
+ok not $seq->is_circular;
+
+my $trunc = $seq->trunc(1,4);
+ok $trunc->length,  4, 'truncated sequence was not of length 4';
+
+ok $trunc->seq, 'ACTG', 'truncated sequence was not ACTG instead was '. $trunc->seq();
+
+# test ability to get str function
+ok  $seq->seq(),  'ACTGTGGCGTCAACT' ;
+
+ok $seq = Bio::Seq->new(-seq=>'actgtggcgtcaact',
+		     -desc=>'Sample Bio::Seq object',
+		     -display_id => 'something',
+		     -accession_number => 'accnum',
+		     -alphabet => 'dna' );
+
+ok uc $seq->alphabet, 'DNA' , 'alphabet was ' .$seq->alphabet();
+
+# basic methods
+
+ok $seq->id(), 'something',  "saw ".$seq->id;
+ok $seq->accession_number, 'accnum', "saw ". $seq->accession_number ;
+ok $seq->subseq(5, 9),  'tggcg', "subseq(5,9) was ". $seq->subseq(5,9);
+
+# check IdentifiableI and DescribableI interfaces
+ok $seq->isa('Bio::IdentifiableI');
+ok $seq->isa('Bio::DescribableI');
+# make sure all methods are implemented
+ok $seq->authority("bioperl.org"), "bioperl.org";
+ok $seq->namespace("t"), "t";
+ok $seq->version(0), 0;
+ok $seq->lsid_string(), "bioperl.org:t:accnum";
+ok $seq->namespace_string(), "t:accnum.0";
+ok $seq->description(), 'Sample Bio::Seq object';
+ok $seq->display_name(), "something";
+
+# check that feature accession works regardless of lazy things going on
+ok scalar($seq->top_SeqFeatures()), 0;
+ok scalar($seq->flush_SeqFeatures()), 0;
+
+my $newfeat = Bio::SeqFeature::Generic->new( -start => 10,
+					     -end => 12,
+					     -primary => 'silly',
+					     -source => 'stuff');
+
+
+$seq->add_SeqFeature($newfeat);
+ok $seq->feature_count, 1;
+
+my $species = new Bio::Species
+    (-verbose => 1, 
+     -classification => [ qw( sapiens Homo Hominidae
+			      Catarrhini Primates Eutheria
+			      Mammalia Vertebrata Chordata
+			      Metazoa Eukaryota )]);
+$seq->species($species);
+ok $seq->species->binomial, 'Homo sapiens';
+$seq->annotation->add_Annotation('description',
+		 Bio::Annotation::SimpleValue->new(-value => 'desc-here'));
+my ($descr) = $seq->annotation->get_Annotations('description');
+ok $descr->value(), 'desc-here';
+ok $descr->tagname(), 'description';
+
+#
+#  translation tests
+#
+
+my $trans = $seq->translate();
+ok  $trans->seq(), 'TVAST' , 'translated sequence was ' . $trans->seq();
+
+# unambiguous two character codons like 'ACN' and 'GTN' should give out an amino acid
+$seq->seq('ACTGTGGCGTCAAC');
+$trans = $seq->translate();
+ok $trans->seq(), 'TVAST', 'translated sequence was ' . $trans->seq();
+
+$seq->seq('ACTGTGGCGTCAACA');
+$trans = $seq->translate();
+ok $trans->seq(), 'TVAST', 'translated sequence was ' . $trans->seq();
+
+$seq->seq('ACTGTGGCGTCAACAG');
+$trans = $seq->translate();
+ok $trans->seq(), 'TVAST', 'translated sequence was ' . $trans->seq();
+
+$seq->seq('ACTGTGGCGTCAACAGT');
+$trans = $seq->translate();
+ok $trans->seq(), 'TVASTV', 'translated sequence was ' . $trans->seq();
+
+$seq->seq('ACTGTGGCGTCAACAGTA');
+$trans = $seq->translate();
+ok $trans->seq(), 'TVASTV', 'translated sequence was ' . $trans->seq();
+
+$seq->seq('AC');
+ok $seq->translate->seq , 'T', 'translated sequence was ' . $seq->translate->seq();
+
+#difference between the default and full CDS translation
+
+$seq->seq('atgtggtaa');
+$trans = $seq->translate();
+ok $trans->seq(), 'MW*' , 'translated sequence was ' . $trans->seq();
+
+$seq->seq('atgtggtaa');
+$trans = $seq->translate(undef,undef,undef,undef,1);
+ok $trans->seq(), 'MW', 'translated sequence was ' . $trans->seq();
+
+#frame 
+my $string;
+my @frames = (0, 1, 2);
+foreach my $frame (@frames) {
+    $string .= $seq->translate(undef, undef, $frame)->seq;
+    $string .= $seq->revcom->translate(undef, undef, $frame)->seq;
+}
+ok $string, 'MW*LPHCGYHVVTT';
+
+#Translating with all codon tables using method defaults
+$string = '';
+my @codontables = qw( 1 2 3 4 5 6 9 10 11 12 13 14 15 16 21 22 23);
+foreach my $ct (@codontables) {
+    $string .= $seq->translate(undef, undef, undef, $ct)->seq;
+}
+ok $string, 'MW*MW*MW*MW*MW*MWQMW*MW*MW*MW*MW*MWYMW*MW*MW*MW*MW*';
+
+# CDS translation set to throw an exception for internal stop codons
+$seq->seq('atgtggtaataa');
+eval {
+    $seq->translate(undef, undef, undef, undef, 'CDS' , 'throw');
+};
+ok ($@ =~ /EX/) ;
+
+$seq->seq('atgtggtaataa');
+ok( $seq->translate('J', '-',)->seq, 'MWJJ');
+
+# tests for RichSeq
+my $richseq = Bio::Seq::RichSeq->new( -seq => 'atgtggtaataa',
+				      -accession_number => 'AC123',
+				      -alphabet => 'rna',
+				      -molecule => 'mRNA',		
+				      -id => 'id1',
+				      -dates => [ '2001/1/1' ],
+				      -pid => '887821',
+				      -keywords => 'JUNK1;JUNK2',
+				      -division => 'Fungi',
+				      -secondary_accessions => 'AC1152' );
+				 
+ok ($richseq);
+ok ($richseq->seq, 'atgtggtaataa');
+ok ($richseq->display_id, 'id1');
+ok (($richseq->get_dates)[0], '2001/1/1');
+ok (($richseq->get_secondary_accessions)[0], 'AC1152');
+ok ($richseq->accession_number, 'AC123');
+ok ($richseq->alphabet, 'rna');
+ok ($richseq->molecule, 'mRNA');
+ok ($richseq->pid, 887821);
+ok ($richseq->division, 'Fungi');
+ok ($richseq->keywords, 'JUNK1; JUNK2');
+$richseq->seq_version('2');
+ok ($richseq->seq_version, 2);
+
+# tests for subtle misbehaviors
+$seq = Bio::Seq->new(-primary_id => 'blah', -accession_number => 'foo');
+ok ($seq->accession_number, $seq->primary_seq->accession_number);
+ok ($seq->primary_id, $seq->primary_seq->primary_id);
+$seq->accession_number('blurb');
+$seq->primary_id('bar');
+ok ($seq->accession_number, $seq->primary_seq->accession_number);
+ok ($seq->primary_id, $seq->primary_seq->primary_id);
+

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqAnalysisParser.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqAnalysisParser.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqAnalysisParser.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,80 @@
+# -*-Perl-*-
+
+use strict;
+BEGIN { 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 11;
+}
+
+use Bio::Factory::SeqAnalysisParserFactory;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my ($seqio,$seq,$factory,$parser, $gene_seen, $exon_seen);
+
+$seqio = new Bio::SeqIO('-format'=>'fasta', '-file' => Bio::Root::IO->catfile("t","data","genomic-seq.fasta"));
+ok $seqio->isa('Bio::SeqIO');# 'seqio was not created';
+$seq = $seqio->next_seq;
+ok $seq->isa('Bio::PrimarySeqI');#'could not read sequence';
+
+$factory = new Bio::Factory::SeqAnalysisParserFactory();
+
+# let's test the genscan factory
+$parser = $factory->get_parser(-input => Bio::Root::IO->catfile("t","data","genomic-seq.genscan"),
+				  -method => 'genscan');
+ok $parser->isa('Bio::SeqAnalysisParserI');#'noSeqAnalysisParserI created';
+while( my $feat = $parser->next_feature() ){
+    $seq->add_SeqFeature($feat);
+}
+($gene_seen, $exon_seen)  = (0,0);
+foreach my $feat (  $seq->top_SeqFeatures() ) {
+    if( $feat->isa("Bio::Tools::Prediction::Gene") ) {
+	foreach my $exon ( $feat->exons ) {
+	    $exon_seen++;
+	}
+	$gene_seen++;
+    } 
+}
+ok $exon_seen, 37;
+ok $gene_seen, 3;
+
+# let's test the mzef factory
+$parser = $factory->get_parser(-input => Bio::Root::IO->catfile("t","data","genomic-seq.mzef"),
+			       -method=> 'mzef');
+$seqio = new Bio::SeqIO('-format'=>'fasta', '-file' => Bio::Root::IO->catfile("t","data","genomic-seq.fasta"));
+$seq = $seqio->next_seq();
+ok(defined $seq && $seq->isa('Bio::PrimarySeqI'));
+
+ok $parser->isa('Bio::SeqAnalysisParserI');#'noSeqAnalysisParserI created';
+while( my $feat = $parser->next_feature() ){
+    $seq->add_SeqFeature($feat);
+}
+($gene_seen, $exon_seen)  = (0,0);
+foreach my $feat (  $seq->top_SeqFeatures() ) {
+    if( $feat->isa("Bio::Tools::Prediction::Gene") ) {
+	foreach my $exon ( $feat->exons ) { 
+	    $exon_seen++;
+	}
+	$gene_seen++;
+    } 
+}
+ok $exon_seen, 23;
+ok $gene_seen, 1;
+
+# let's test the ePCR factory
+
+$parser = $factory->get_parser(-input => Bio::Root::IO->catfile("t","data", "genomic-seq.epcr"),
+			       -method => 'epcr');
+
+$seq->flush_SeqFeatures;
+
+ok $parser->isa('Bio::SeqAnalysisParserI');#'noSeqAnalysisParserI created';
+while( my $feat = $parser->next_feature() ){
+    $seq->add_SeqFeature($feat);
+}
+
+ok $seq->top_SeqFeatures(), 7;

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqBuilder.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqBuilder.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqBuilder.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,141 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: SeqBuilder.t,v 1.2 2003/02/18 03:37:58 lapp Exp $
+
+use strict;
+use vars qw($DEBUG $TESTCOUNT);
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 101;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+ok(1);
+
+my $verbosity = -1;   # Set to -1 for release version, so warnings aren't printed
+
+my ($seqio,$seq); # predeclare variables for strict
+
+# default mode
+$seqio = Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+						   "t","data","test.genbank"), 
+			 '-format' => 'GenBank');
+ok $seqio;
+$seqio->verbose($verbosity);
+
+my $numseqs = 0;
+my @loci = qw(U63596 U63595 M37762 NT_010368 L26462);
+my @numfeas = (3,1,6,3,26);
+
+while($seq = $seqio->next_seq()) {
+    ok ($seq->accession_number, $loci[$numseqs++]);
+    ok ($seq->annotation->get_Annotations());
+    ok (scalar($seq->top_SeqFeatures), $numfeas[$numseqs-1]);
+    ok ($seq->species->binomial);
+    ok ($seq->seq);
+}
+ok ($numseqs, 5);
+
+# minimalistic mode
+$seqio = Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+						   "t","data","test.genbank"), 
+			 '-format' => 'GenBank');
+ok $seqio;
+$seqio->verbose($verbosity);
+my $seqbuilder = $seqio->sequence_builder();
+ok $seqbuilder;
+ok $seqbuilder->isa("Bio::Factory::ObjectBuilderI");
+$seqbuilder->want_none();
+$seqbuilder->add_wanted_slot('display_id','accession_number','desc');
+
+$numseqs = 0;
+
+while($seq = $seqio->next_seq()) {
+    ok ($seq->accession_number, $loci[$numseqs++]);
+    ok (scalar(grep { ! ($_->tagname eq "keyword" ||
+			 $_->tagname eq "date_changed" ||
+			 $_->tagname eq "secondary_accession"); }
+	       $seq->annotation->get_Annotations()), 0);
+    if($numseqs <= 3) {
+	ok (scalar($seq->top_SeqFeatures), 0);
+    } else {
+	ok (scalar($seq->top_SeqFeatures), $numfeas[$numseqs-1]);
+    }
+    ok ($seq->species, undef);
+    ok ($seq->seq, undef);
+    # switch on features for the last 2 seqs
+    $seqbuilder->add_wanted_slot('features') if $numseqs == 3;
+}
+ok ($numseqs, 5);
+
+# everything but no sequence, and no features
+$seqio = Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+						   "t","data","test.genbank"), 
+			 '-format' => 'GenBank');
+ok $seqio;
+$seqio->verbose($verbosity);
+$seqbuilder = $seqio->sequence_builder();
+# want-all is default
+$seqbuilder->add_unwanted_slot('seq','features');
+
+$numseqs = 0;
+
+while($seq = $seqio->next_seq()) {
+    ok ($seq->accession_number, $loci[$numseqs++]);
+    ok scalar($seq->annotation->get_Annotations());
+    if($numseqs <= 3) {
+	ok (scalar($seq->top_SeqFeatures), 0);
+    } else {
+	ok (scalar($seq->top_SeqFeatures), $numfeas[$numseqs-1]);
+    }
+    ok $seq->species->binomial;
+    ok ($seq->seq, undef);
+    # switch on features for the last 2 seqs
+    if($numseqs == 3) {
+	$seqbuilder->add_unwanted_slot(
+	     grep { $_ ne 'features'; } $seqbuilder->remove_unwanted_slots());
+    }
+}
+ok ($numseqs, 5);
+
+# skip sequences less than 100bp or accession like 'NT_*'
+$seqio = Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+						   "t","data","test.genbank"), 
+			 '-format' => 'GenBank');
+ok $seqio;
+$seqio->verbose($verbosity);
+$seqbuilder = $seqio->sequence_builder();
+# we could have as well combined the two conditions into one, but we want to
+# test the implicit AND here
+$seqbuilder->add_object_condition(sub {
+    my $h = shift;
+    return 0 if($h->{'-length'} < 100);
+    return 1;
+});
+$seqbuilder->add_object_condition(sub {
+    my $h = shift;
+    return 0 if($h->{'-display_id'} =~ /^NT_/);
+    return 1;
+});
+
+$numseqs = 0;
+my $i = 0;
+
+while($seq = $seqio->next_seq()) {
+    $numseqs++;
+    ok ($seq->accession_number, $loci[$i]);
+    ok scalar($seq->annotation->get_Annotations());
+    ok (scalar($seq->top_SeqFeatures), $numfeas[$i]);
+    ok $seq->species->binomial;
+    ok $seq->seq;
+    $i += 2;
+}
+ok ($numseqs, 3);
+

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqDiff.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqDiff.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqDiff.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: SeqDiff.t,v 1.6 2002/06/10 08:38:54 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 42;
+}
+
+use Bio::Variation::SeqDiff;
+use Bio::Variation::DNAMutation;
+use Bio::Variation::Allele;
+
+ok 1;
+my ($obj, $mm, $aa, $dna, $m);
+
+ok $obj = Bio::Variation::SeqDiff -> new;
+
+ok $obj->id('id');
+ok $obj->id, 'id';
+
+ok $obj->sysname('sysname');
+ok $obj->sysname, 'sysname';
+
+$obj->trivname('trivname'); 
+ok $obj->trivname eq 'trivname';
+
+ok $obj->chromosome('chr');
+ok $obj->chromosome, 'chr';
+
+ok $obj->description('desc');
+ok $obj->description, 'desc';
+
+ok $obj->numbering('numbering');
+ok $obj->numbering, 'numbering';
+
+ok $obj->offset(100);
+ok $obj->offset, 100;
+#                  12345678901234567890
+ok $obj->dna_ori ('gctgctgatcgatcgtagctagctag');
+ok $obj->dna_ori, 'gctgctgatcgatcgtagctagctag';
+
+# generate mutated DNA seq from the mutation
+ok $m = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>14, -end=>14);
+ok $a = Bio::Variation::Allele->new(-seq=>'c');
+$b = Bio::Variation::Allele->new(-seq=>'g');
+ok $m->allele_ori($a);
+ok $m->allele_mut($b);
+ok $obj->add_Variant($m);
+my $m2 = Bio::Variation::DNAMutation->new(-isMutation => 1, -start=>19, -end=>19);
+my $a2 = Bio::Variation::Allele->new(-seq=>'c');
+my $b2 = Bio::Variation::Allele->new(-seq=>'g');
+$m2->allele_ori($a2);
+$m2->allele_mut($b2);
+$obj->add_Variant($m2);
+
+#ok $obj->dna_mut('gctgctgatcggtcgtagctagctag');
+ok $obj->dna_mut, 'gctgctgatcgatggtaggtagctag';
+
+ok $obj->rna_ori('gctgctgatcgatcgtagctagctag');
+ok $obj->rna_ori, 'gctgctgatcgatcgtagctagctag';
+
+$obj->rna_mut('gctgctgatcgatcgtagctagctag'); 
+ok $obj->rna_mut, 'gctgctgatcgatcgtagctagctag';
+
+ok $obj->aa_ori('MHYTRD');
+ok $obj->aa_ori, 'MHYTRD';
+
+ok $obj->aa_mut('MHGTRD');
+ok $obj->aa_mut, 'MHGTRD';
+
+foreach $mm ( $obj->each_Variant ) {
+    $mm->primary_tag('a');
+    ok $mm->isa('Bio::Variation::VariantI');
+}
+
+
+ok $obj->gene_symbol('fos');
+ok $obj->gene_symbol, 'fos';
+
+ok $obj->rna_offset(10);
+ok $obj->rna_offset == 10;
+
+ok $obj->rna_id('transcript#3');
+ok $obj->rna_id, 'transcript#3';
+
+ok $dna = $obj->seqobj('dna_ori');
+ok $dna->isa('Bio::PrimarySeq');
+
+$obj->aa_mut(''); 
+$aa = $obj->seqobj('aa_mut');
+ok not defined $aa;
+
+eval {
+    $dna = $obj->seqobj('dna_ri');
+};
+ok $@;

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqFeatCollection.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqFeatCollection.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqFeatCollection.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,207 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: SeqFeatCollection.t,v 1.11 2005/08/01 21:29:10 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+
+my $error;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 20;
+    plan tests => $NUMTESTS;
+
+    eval { require DB_File; };
+    if( $@ ) {
+	print STDERR "DB_File not installed. This means the SeqFeatCollection wont work\n";
+	for( 1..$NUMTESTS ) {
+	    skip("DB_File",1);
+	}
+       $error = 1; 
+    }
+
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+my $testnum;
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+
+#First of all we need to create an flat db
+require Bio::SeqFeature::Collection;
+use Bio::Root::IO;
+use Bio::Location::Simple;
+use Bio::Tools::GFF;
+use Bio::SeqIO;
+
+my $simple = new Bio::SeqIO(-format => 'genbank',
+			    -file   =>  Bio::Root::IO->catfile
+			    ("t","data","AB077698.gb"));
+
+my @features;
+my $seq = $simple->next_seq();
+ at features = $seq->top_SeqFeatures();
+ok(scalar @features, 11);
+
+my $col = new Bio::SeqFeature::Collection(-verbose => $verbose);
+
+ok($col);
+ok($col->add_features( \@features), 11);
+my @feat = $col->features_in_range(-range => ( new Bio::Location::Simple
+					       (-start => 100,
+						-end   => 300,
+						-strand => 1) ),
+				   -contain => 0);
+ok(scalar @feat, 5);
+if( $verbose ) {    
+    foreach my $f ( @feat ) {
+	print "location: ", $f->location->to_FTstring(), "\n";    	
+    }
+}
+
+ok(scalar $col->features_in_range(-range => ( new Bio::Location::Simple
+						   (-start => 100,
+						    -end   => 300,
+						    -strand => -1) ),
+				      -strandmatch => 'ignore',
+				      -contain => 1), 2);
+
+ at feat = $col->features_in_range(-start => 79,
+				-end   => 1145,
+				-strand => 1,
+				-strandmatch => 'strong',
+				-contain => 1);
+ok(scalar @feat, 5);
+if( $verbose ) {    
+    foreach my $f ( sort { $a->start <=> $b->start} @feat ) {
+	print $f->primary_tag, " ", $f->location->to_FTstring(), "\n";
+    }
+}
+
+ok($feat[0]->primary_tag, 'CDS');
+ok($feat[0]->has_tag('gene'));
+
+$verbose = 0;
+# specify input via -fh or -file
+my $gffio = Bio::Tools::GFF->new(-file => Bio::Root::IO->catfile
+				 ("t","data","myco_sites.gff"), 
+				 -gff_version => 2);
+ at features = ();
+# loop over the input stream
+while(my $feature = $gffio->next_feature()) {
+    # do something with feature
+    push @features, $feature;
+}
+$gffio->close();
+
+ok(scalar @features, 412);
+$col = new Bio::SeqFeature::Collection(-verbose => $verbose,
+				       -usefile => 1);
+
+ok($col);
+
+ok($col->add_features( \@features), 412);
+
+my $r = new Bio::Location::Simple(-start => 67700,
+				  -end   => 150000,
+				  -strand => 1);
+
+ at feat = $col->features_in_range(-range => $r,
+				-strandmatch => 'ignore',
+				-contain => 0);
+
+ok(scalar @feat, 56);
+ok($col->feature_count, 412);
+my $count = $col->feature_count;
+$col->remove_features( [$features[58], $features[60]]);
+
+ok( $col->feature_count, 410);
+ at feat = $col->features_in_range(-range => $r,
+				-strandmatch => 'ignore',
+				-contain => 0);
+ok( scalar @feat, 54);
+# add the removed features back in in order to get the collection back to size 
+
+$col->add_features([$features[58], $features[60]]);
+
+# let's randomize so we aren't removing and adding in the same order
+# and hopefully randomly deal with a bin's expiration
+fy_shuffle(\@features);
+
+foreach my $f ( @features ) {
+    $count--, next unless defined $f;
+    $col->remove_features([$f]);
+#    ok( $col->feature_count, --$count);
+}
+ok($col->feature_count, 0);
+my $filename = 'featcol.idx';
+my $newcollection = new Bio::SeqFeature::Collection(-verbose => $verbose,
+						    -keep    => 1,
+						    -file    => $filename);
+$newcollection->add_features(\@feat);
+ok($newcollection->feature_count, 54);
+undef $newcollection;
+ok(-e $filename);
+$newcollection = new Bio::SeqFeature::Collection(-verbose => $verbose,
+						 -file    => $filename);
+ok($newcollection->feature_count, 54);
+undef $newcollection;
+ok( ! -e $filename);
+if( $verbose ) {
+    my @fts =  sort { $a->start <=> $b->start}  
+    grep { $r->overlaps($_,'ignore') } @features;
+    
+    if( $verbose ) {
+	foreach my $f ( @fts ) {
+	    print $f->primary_tag, "    ", $f->location->to_FTstring(), "\n";
+	}
+	print "\n";
+    }
+
+    my %G = map { ($_,1) } @feat; 
+    my $c = 0;
+    foreach my $A ( @fts ) {
+	if( ! $G{$A} ) {
+	    print "missing ", $A->primary_tag, " ", $A->location->to_FTstring(), "\n";
+	} else { 
+	    $c++;
+	}
+    }
+    print "Number of features correctly retrieved $c\n";
+    foreach my $f ( sort { $a->start <=> $b->start} @feat ) {
+	print $f->primary_tag, "    ", $f->location->to_FTstring(), "\n";
+    }
+}
+
+
+
+sub fy_shuffle { 
+    my $array = shift;
+    my $i;
+    for( $i = @$array; $i--; ) { 
+	my $j = int rand($i+1);
+	next if $i==$j;
+	@$array[$i,$j] = @$array[$j,$i];
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqFeature.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqFeature.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqFeature.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,461 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# CVS Version
+# $Id: SeqFeature.t,v 1.41.4.4 2006/11/30 09:24:00 sendu Exp $
+
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+my $skipdbtests;
+my $skip_all;
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	$NUMTESTS = 211;
+	plan tests => $NUMTESTS;
+
+	eval { 
+		require IO::String; 
+		require LWP::UserAgent;
+		require HTTP::Request::Common;
+		require Bio::DB::GenBank;
+	};
+	if( $@ ) {
+		print STDERR "IO::String, LWP::UserAgent or HTTP::Request not installed - skipping DB tests...\n";
+		$skipdbtests = 1;
+	} else {
+		$skipdbtests = 0;
+	}
+	eval {
+		require URI::Escape;
+	};
+	if( $@ ) {
+		print STDERR "URI::Escape not installed, so Bio::SeqFeature::Annotated not usable - skipping all tests...\n";
+		$skip_all = 1;
+	}
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Skipping tests which need the Bio::DB::GenBank module',1);
+	}
+}
+
+exit(0) if $skip_all;
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::SeqFeature::Generic;
+use Bio::SeqFeature::FeaturePair;
+use Bio::SeqFeature::SimilarityPair;
+use Bio::SeqFeature::Computation;
+require Bio::SeqFeature::Annotated;
+use Bio::SeqFeature::Gene::Transcript;
+use Bio::SeqFeature::Gene::UTR;
+use Bio::SeqFeature::Gene::Exon;
+use Bio::SeqFeature::Gene::Poly_A_site;
+use Bio::SeqFeature::Gene::GeneStructure;
+
+use Bio::Location::Fuzzy;
+use Env qw(BIOPERLDEUG); # for importing bioperldebug var
+ok(1);
+
+# predeclare variables for strict
+my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2, at sft); 
+
+
+my $verbose = 0;
+
+$feat = new Bio::SeqFeature::Generic ( -start => 40,
+				       -end => 80,
+				       -strand => 1,
+				       -primary => 'exon',
+				       -source  => 'internal',
+				       -tag => {
+					   silly => 20,
+					   new => 1
+					   }
+				       );
+
+ok $feat->start, 40;
+
+ok $feat->end, 80;
+
+ok $feat->primary_tag, 'exon';
+
+ok $feat->source_tag, 'internal';
+
+$str = $feat->gff_string() || ""; # placate -w
+
+# we need to figure out the correct mapping of this stuff
+# soon
+
+#if( $str ne "SEQ\tinternal\texon\t40\t80\t1\t.\t." ) {
+#    print "not ok 3\n";
+#} else {
+#    print "ok 3\n";
+#}
+
+ok(1);
+
+$pair = new Bio::SeqFeature::FeaturePair();
+
+ok defined $pair;
+
+$feat2 = new Bio::SeqFeature::Generic ( -start => 400,
+				       -end => 440,
+				       -strand => 1,
+				       -primary => 'other',
+				       -source  => 'program_a',
+				       -tag => {
+					   silly => 20,
+					   new => 1
+					   }
+				       );
+
+ok defined $feat2;
+$pair->feature1($feat);
+$pair->feature2($feat2);
+
+ok $pair->feature1, $feat;
+ok $pair->feature2, $feat2;
+ok $pair->start, 40;
+ok $pair->end, 80;
+ok $pair->primary_tag, 'exon';
+ok $pair->source_tag, 'internal';
+ok $pair->hstart, 400;
+ok $pair->hend, 440;
+ok $pair->hprimary_tag, 'other' ;
+ok $pair->hsource_tag, 'program_a';
+
+$pair->invert;
+ok $pair->end, 440;
+
+# Test attaching a SeqFeature::Generic to a Bio::Seq
+{
+    # Make the parent sequence object
+    my $seq = Bio::Seq->new(
+        '-seq'          => 'aaaaggggtttt',
+        '-display_id'   => 'test',
+        '-alphabet'      => 'dna',
+        );
+    
+    # Make a SeqFeature
+    my $sf1 = Bio::SeqFeature::Generic->new(
+        '-start'    => 4,
+        '-end'      => 9,
+        '-strand'   => 1,
+        );
+    
+    # Add the SeqFeature to the parent
+    ok ($seq->add_SeqFeature($sf1));
+    
+    # Test that it gives the correct sequence
+    my $sf_seq1 = $sf1->seq->seq;
+    ok $sf_seq1, 'aggggt';
+    ok $sf1->end,9;
+    ok $sf1->start,4;
+
+    # Make a second seqfeature on the opposite strand
+    my $sf2 = Bio::SeqFeature::Generic->new(
+        '-start'    => 4,
+        '-end'      => 9,
+        '-strand'   => -1,
+        );
+    
+    # This time add the PrimarySeq to the seqfeature
+    # before adding it to the parent
+    ok ($sf2->attach_seq($seq->primary_seq));
+    $seq->add_SeqFeature($sf2);
+    
+    # Test again that we have the correct sequence
+    my $sf_seq2 = $sf2->seq->seq;
+    ok $sf_seq2, 'acccct';
+}
+
+#Do some tests for computation.pm
+
+ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
+							    '-end'   => 10) );
+ok ( $comp_obj1->computation_id(332),332 );
+ok ( $comp_obj1->add_score_value('P', 33) );
+{
+    $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
+						   '-end'   => 10);
+    ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
+    ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
+    ok ($sft[0], 'exon');
+}
+
+ok defined ( $comp_obj1 = new Bio::SeqFeature::Computation 
+	     (
+	      -start => 10, -end => 100,
+	      -strand => -1, -primary => 'repeat',
+	      -program_name => 'GeneMark',
+	      -program_date => '12-5-2000',
+	      -program_version => 'x.y',
+	      -database_name => 'Arabidopsis',
+	      -database_date => '12-dec-2000',
+	      -computation_id => 2231,
+	      -score    => { no_score => 334 } )
+	     );
+
+ok ( $comp_obj1->computation_id, 2231 );
+ok ( $comp_obj1->add_score_value('P', 33) );
+ok ( ($comp_obj1->each_score_value('no_score'))[0], '334');
+
+# some tests for bug #947
+
+my $sfeat = new Bio::SeqFeature::Generic(-primary => 'test');
+
+$sfeat->add_sub_SeqFeature(new Bio::SeqFeature::Generic(-start => 2,
+							-end   => 4,
+							-primary => 'sub1'),
+			   'EXPAND');
+
+$sfeat->add_sub_SeqFeature(new Bio::SeqFeature::Generic(-start => 6,
+							-end   => 8,
+							-primary => 'sub2'),
+			   'EXPAND');
+
+ok($sfeat->start, 2);
+ok($sfeat->end, 8);
+
+# some tests to see if we can set a feature to start at 0
+$sfeat = new Bio::SeqFeature::Generic(-start => 0, -end => 0 );
+
+ok(defined $sfeat->start);
+ok($sfeat->start,0);
+ok(defined $sfeat->end);
+ok($sfeat->end,0);
+
+
+# tests for Bio::SeqFeature::Gene::* objects
+# using information from acc: AB077698 as a guide
+
+ok my $seqio = new Bio::SeqIO(-format => 'genbank',
+			 -file   => Bio::Root::IO->catfile("t","data","AB077698.gb"));
+ok my $geneseq = $seqio->next_seq();
+
+ok my $gene = new Bio::SeqFeature::Gene::GeneStructure(-primary => 'gene',
+						    -start   => 1,
+						    -end     => 2701,
+						    -strand  => 1);
+
+ok my $transcript = new Bio::SeqFeature::Gene::Transcript(-primary => 'CDS',
+						       -start   => 80,
+						       -end     => 1144,
+						       -tag     => { 
+							   'gene' => "CHCR",
+							   'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
+							   'codon_start' => 1,
+							   'protein_id'  => 'BAB85648.1',
+						       });
+
+ok my $poly_A_site1 = new Bio::SeqFeature::Gene::Poly_A_site
+    (-primary => 'polyA_site',
+     -start => 2660,
+     -end   => 2660,
+     -tag   => { 
+	 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
+	 });
+
+ok my $poly_A_site2 = new Bio::SeqFeature::Gene::Poly_A_site
+    (-primary => 'polyA_site',
+     -start => 1606,
+     -end   => 1606,
+     -tag   => { 
+	 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
+     });
+
+ok my $fiveprimeUTR = new Bio::SeqFeature::Gene::UTR(-primary => "utr5prime");
+ok $fiveprimeUTR->location(new Bio::Location::Fuzzy(-start => "<1",
+						 -end   => 79));
+ok my $threeprimeUTR = new Bio::SeqFeature::Gene::UTR(-primary => "utr3prime",
+						   -start   => 1145,
+						   -end     => 2659);
+
+# Did a quick est2genome against genomic DNA (this is on Chr X) to
+# get the gene structure by hand since it is not in the file
+# --Jason
+
+ok my $exon1 = new Bio::SeqFeature::Gene::Exon(-primary => 'exon',
+					       -start => 80,
+					       -end   => 177);
+ok $geneseq->add_SeqFeature($exon1);
+
+ok $geneseq->add_SeqFeature($fiveprimeUTR);
+ok $geneseq->add_SeqFeature($threeprimeUTR);
+ok $geneseq->add_SeqFeature($poly_A_site1);
+ok $geneseq->add_SeqFeature($poly_A_site2);
+
+ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
+ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
+
+ok $transcript->add_exon($exon1);
+
+# API only supports a single poly-A site per transcript at this point 
+$transcript->poly_A_site($poly_A_site2);
+$geneseq->add_SeqFeature($transcript);
+$gene->add_transcript($transcript);
+$geneseq->add_SeqFeature($gene);
+
+my ($t) = $gene->transcripts(); # get 1st transcript
+ok(defined $t); 
+ok($t->mrna->length, 1693);
+ok($gene->utrs, 2);
+
+
+
+# Test for bug when Locations are not created explicitly
+
+my $feat1 = new Bio::SeqFeature::Generic(-start => 1,
+					 -end   => 15,
+					 -strand=> 1);
+
+$feat2 = new Bio::SeqFeature::Generic(-start => 10,
+					 -end   => 25,
+					 -strand=> 1);
+
+my $overlap = $feat1->location->union($feat2->location);
+ok($overlap->start, 1);
+ok($overlap->end,   25);
+
+my $intersect = $feat1->location->intersection($feat2->location);
+ok($intersect->start, 10);
+ok($intersect->end,   15);
+
+
+# now let's test spliced_seq
+
+ok  $seqio = new Bio::SeqIO(-file => Bio::Root::IO->catfile
+			    (qw(t data AY095303S1.gbk)),
+                            -format  => 'genbank');
+
+ok $geneseq = $seqio->next_seq();
+my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
+my $db;
+
+unless( $skipdbtests ) {
+ $db = new Bio::DB::GenBank(-verbose=> $ENV{BIOPERLDEBUG});
+ $CDS->verbose(-1);
+ my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
+ 
+ ok($cdsseq->subseq(1,60, 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATC'.
+		    'TAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC'));
+ ok($cdsseq->translate->subseq(1,100), 'MQPYASVSGRCLSRPDALHVIPFGRP'.
+    'LQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
+} else {
+    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
+    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
+
+}
+ok  $seqio = new Bio::SeqIO(-file => Bio::Root::IO->catfile
+			    (qw(t data AF032047.gbk)),
+                            -format  => 'genbank');
+ok $geneseq = $seqio->next_seq();
+($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
+unless ($skipdbtests ) {
+    my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
+    ok($cdsseq->subseq(1,60, 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTG'.
+		       'TCTGGCCTGGAGGCTATCCAGCATG'));
+    ok($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFL'.
+       'NCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
+} else {
+    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
+    skip('Cannot test for remote loc with spliced_seq w/o LWP installed',1);
+}
+
+
+# trans-spliced 
+
+ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
+									  -file   => 
+			    Bio::Root::IO->catfile(qw(t data NC_001284.gbk))));
+my $genome = $seqio->next_seq;
+
+foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
+   my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
+   chop($spliced); # remove stop codon
+   ok($spliced,($cds->get_tag_values('translation'))[0],'spliced seq translation matches expected');
+}
+
+my $sfa = Bio::SeqFeature::Annotated->new(-start => 1,
+					  -end => 5,
+					  -strand => "+",
+					  -frame => 2,
+					  -phase => 2,
+					  -score => 12,
+					  -display_name => 'test.annot',
+					  -seq_id => 'test.displayname' );
+
+ok (defined $sfa);
+my $loc = $sfa->location;
+ok $loc->isa("Bio::Location::Simple");
+
+ok $sfa->display_name eq 'test.annot';
+
+
+#test bsfa::from_feature
+{
+  my $sfg = Bio::SeqFeature::Generic->new ( -start => 400,
+					    -end => 440,
+					    -strand => 1,
+					    -primary => 'nucleotide_motif',
+					    -source  => 'program_a',
+					    -tag => {
+						     silly => 20,
+						     new => 1
+						    }
+					  );
+	my $sfa2;
+	eval {
+		$sfa2 = Bio::SeqFeature::Annotated->new(-feature => $sfg);
+	};
+	if ($@) {
+		foreach ( $Test::ntest..$NUMTESTS ) { skip('Could not get sofa definitions from external server',1); }
+		exit(0);
+	}
+  ok $sfa2->type->name,'nucleotide_motif';
+  ok $sfa2->primary_tag, 'nucleotide_motif';
+  ok $sfa2->source,'program_a';
+  ok $sfa2->strand,1;
+  ok $sfa2->start,400;
+  ok $sfa2->end,440;
+  ok $sfa2->get_Annotations('silly')->value,20;
+  ok $sfa2->get_Annotations('new')->value,1;
+
+  my $sfa3 = Bio::SeqFeature::Annotated->new( -start => 1,
+					      -end => 5,
+					      -strand => "+",
+					      -frame => 2,
+					      -phase => 2,
+					      -score => 12,
+					      -display_name => 'test.annot',
+					      -seq_id => 'test.displayname' );
+  eval {
+	$sfa3->from_feature($sfg);
+  };
+  if ($@) {
+	foreach ( $Test::ntest..$NUMTESTS ) { skip('Could not get sofa definitions from external server',1); }
+	exit(0);
+  }
+  ok $sfa3->type->name,'nucleotide_motif';
+  ok $sfa3->primary_tag, 'nucleotide_motif';
+  ok $sfa3->source,'program_a';
+  ok $sfa3->strand,1;
+  ok $sfa3->start,400;
+  ok $sfa3->end,440;
+  ok $sfa3->get_Annotations('silly')->value,20;
+  ok $sfa3->get_Annotations('new')->value,1;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqHound_DB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqHound_DB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqHound_DB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,78 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# This test file is adapted from EMBL_DB.t
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl SeqHound_DB.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 15;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+	
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if ($@) {
+		plan skip_all => 'IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+	
+	use_ok('Bio::DB::SeqHound');
+}
+
+END {
+	unlink $Bio::DB::SeqHound::LOGFILENAME if -f $Bio::DB::SeqHound::LOGFILENAME;
+}
+
+my $verbose = -1;
+
+my ($db,$seq,$seqio);
+# get a single seq
+
+$seq = $seqio = undef;
+
+SKIP: {
+    $db = new Bio::DB::SeqHound(-verbose=>$verbose);
+    eval {ok(defined($seq = $db->get_Seq_by_acc('J00522')));};
+	skip('Could not connect to seqhound, skipping tests', 10) if $@;
+    is( $seq->length, 408); 
+    ok defined ($db->request_format('fasta'));
+    eval {ok(defined($seq = $db->get_Seq_by_acc('NP_862707')));};
+	skip('Could not connect to seqhound, skipping tests', 7) if $@;
+    is( $seq->accession, 'NP_862707');
+    is( $seq->length, 227); 
+    ok( defined($db = new Bio::DB::SeqHound(-verbose=>$verbose, 
+					-retrievaltype => 'tempfile')));
+    eval {ok(defined($seqio = $db->get_Stream_by_id(['BTACHRE'])));};
+	skip('Could not connect to seqhound, skipping tests', 3) if $@;
+    undef $db; # testing to see if we can remove db
+    ok( defined($seq = $seqio->next_seq()));
+    is( $seq->length, 1621);
+}
+
+$seq = $seqio = undef;
+
+SKIP: {
+    $db = Bio::DB::SeqHound->new(-verbose => $verbose,
+			    -retrievaltype => 'tempfile',
+			    -format => 'genbank'
+			    ); 
+    eval {ok( defined($seqio = $db->get_Stream_by_acc(['J00522', 'AF303112', 'J02231'])));};
+	skip('Could not connect to seqhound for batch test, skipping tests', 4) if $@;
+	is($seqio->next_seq->length, 408);
+    is($seqio->next_seq->length, 1611);
+    is($seqio->next_seq->length, 200);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: SeqIO.t,v 1.99 2005/08/28 13:56:45 bosborne Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 29;
+}
+
+use Bio::SeqIO;
+
+ok(1);
+
+# Set to -1 for release version, so warnings aren't printed
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+# Basic read and/or write tests for SeqIO. Specific tests for
+# given module should go into their own file.
+
+my @formats = qw(gcg fasta raw pir tab ace );
+# The following files or formats are failing: swiss genbank interpro embl
+
+foreach my $format (@formats) {
+	print "======== $format ========\n" if $verbose;
+	read_write($format);
+}
+
+sub read_write {
+	my $format = shift;
+	my $seq;
+	my $str = Bio::SeqIO->new(-file=> Bio::Root::IO->catfile
+									  ("t","data","test.$format"),
+									  -format => $format);
+	ok $seq = $str->next_seq();
+	print "Sequence 1 of 2 from $format stream:\n", $seq->seq, "\n\n"
+	  if  $verbose;
+	unless ($format eq 'raw') {
+		ok $seq->id, 'roa1_drome',"ID for format $format";
+		ok $seq->length, 358;
+	}
+
+	unless ($format eq 'gcg') { # GCG file can contain only one sequence
+		ok $seq = $str->next_seq();
+		print "Sequence 2 of 2 from $format stream:\n", $seq->seq,
+		  $seq->seq, "\n" if $verbose;
+	}
+
+	my $out = Bio::SeqIO->new(-file => ">". Bio::Root::IO->catfile
+									  ("t","data","$format.out"),
+									  -format => $format);
+	ok $out->write_seq($seq);
+	if ($format eq 'fasta') {
+		my $id_type;
+		ok($id_type = $out->preferred_id_type('accession.version'),
+			'accession.version');
+	}
+}
+
+END {
+	map { unlink Bio::Root::IO->catfile("t","data","$_.out") } @formats
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqPattern.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqPattern.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqPattern.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,43 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 6;
+}
+
+use Bio::Tools::SeqPattern;
+
+my ( $pattern,$pattern_obj,$pattern_obj2, $pattern_obj3);
+
+$pattern     = '(CCCCT)N{1,200}(agyyg)N{1,80}(ag)';
+$pattern_obj = new Bio::Tools::SeqPattern(-SEQ =>$pattern, -TYPE =>'dna');
+ok defined($pattern_obj) && ref($pattern_obj) && $pattern_obj->isa('Bio::Tools::SeqPattern');
+
+$pattern_obj2  = $pattern_obj->revcom();
+ok $pattern_obj2->str, '(CT)N(CRRCT){1,80}N(AGGGG){1,200}';
+
+$pattern_obj3 = $pattern_obj->revcom(1);
+ok $pattern_obj3->str, '(CT).{1,80}(C[GA][GA]CT).(AGGGG){1,200}';
+
+$pattern     = '(CCCCT)N{1,200}(agyyg)N{1,80}(bb)'; # test protein object expand
+$pattern_obj = new Bio::Tools::SeqPattern(-SEQ =>$pattern, -TYPE =>'protein');
+ok defined($pattern_obj) && ref($pattern_obj) && $pattern_obj->isa('Bio::Tools::SeqPattern');
+
+ok $pattern_obj2->expand, '(CT).(C[AG][AG]CT){1,80}.(AGGGG){1,200}';
+
+# amino patterns
+
+$pattern = 'ABZH';
+$pattern_obj2 = new Bio::Tools::SeqPattern(-SEQ =>$pattern, 
+					   -TYPE =>'amino');
+ok $pattern_obj2->expand, 'A[EQ][DN]H';

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqStats.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqStats.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqStats.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,121 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { use lib 't'; }
+    use Test;
+    plan tests => 36;
+}
+
+use Bio::PrimarySeq;
+use Bio::Tools::SeqStats;
+use vars ('$DEBUG');
+
+my ($seqobj, $count, $seqobj_stats, $wt);
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
+			       -alphabet=>'dna', -id=>'test');
+$seqobj_stats  =  Bio::Tools::SeqStats->new(-seq=>$seqobj);
+
+ok defined($seqobj_stats) && ref($seqobj_stats) &&
+    $seqobj_stats->isa('Bio::Tools::SeqStats');
+
+$count = $seqobj_stats->count_monomers();  # for DNA sequence
+ok $count->{'A'}, 3;
+ok $count->{'C'}, 4;
+ok $count->{'G'}, 5;
+ok $count->{'T'}, 4;
+
+$count = $seqobj_stats->count_codons();
+ok $count->{'ACT'}, 2;
+ok $count->{'GTG'}, 1;
+ok $count->{'GCG'}, 1;
+ok $count->{'TCA'}, 1;
+
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'ACTACTTCA', -alphabet=>'dna',
+			       -id=>'test');
+$seqobj_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
+$wt = $seqobj_stats->get_mol_wt();  # for DNA sequence
+ok &round($$wt[0]), 2738 ;
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'ACXACNNCA',
+			       -alphabet=>'dna', -id=>'test');
+$wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+ok &round($$wt[0]), 2693;
+ok &round($$wt[1]), 2813;
+
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTG',
+			       -alphabet=>'dna', -id=>'test');
+$count = Bio::Tools::SeqStats->count_monomers($seqobj);  # for DNA sequence
+ok $count->{'A'}, 3;
+ok $count->{'C'}, 4;
+ok $count->{'G'}, 5;
+ok $count->{'T'}, 4;
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
+                               -alphabet=>'protein', -id=>'test');
+$seqobj_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
+$count = $seqobj_stats->count_monomers();  # for amino sequence
+ok $$count{'M'}, 1;
+ok $$count{'I'}, 3;
+ok $$count{'Y'}, 2;
+ok $$count{'T'}, 3;
+$wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+ok int $$wt[0], 2896;
+ok int $$wt[1], 2896;
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'UYXUYNNYU', -alphabet=>'rna');
+$wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+ok &round($$wt[0]), 2768;
+ok &round($$wt[1]), 2891;
+
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'TGCCGTGTGTGCTGCTGCT', -alphabet=>'rna');
+$wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+ok &round($$wt[0]), 6104 ;
+
+# selenocysteine
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVT',
+                                  -alphabet=>'protein');
+$wt = Bio::Tools::SeqStats->get_mol_wt($seqobj);
+ok &round($$wt[0]), 2896 ;
+
+#
+# hydropathicity aka "gravy" score
+#
+
+# normal seq (should succeed)
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'MSFVLVAPDMLATAAADVVQIGSAVSAGS',
+                                  -alphabet=>'protein');
+my $gravy = Bio::Tools::SeqStats->hydropathicity($seqobj);
+ok int($gravy*1000) == 1224;  # check to nearest 0.1%
+
+# ambiguous sequence (should fail)
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'XXXB**BS', -alphabet=>'protein');
+eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
+ok $@ =~ /ambiguous amino acids/i;
+
+# empty sequence (should fail)
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'', -alphabet=>'protein');
+eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
+ok $@ =~ /hydropathicity not defined/i;
+
+# DNA sequence (should fail)
+ok $seqobj = Bio::PrimarySeq->new(-seq=>'GATTACA', -alphabet=>'dna');
+eval { Bio::Tools::SeqStats->hydropathicity($seqobj) };
+ok $@ =~ /only meaningful for protein/;
+
+
+#
+# Extra functions
+#
+
+# perl does not have an explicit rounding function
+sub round { return int ((shift @_) + 0.5 ) }

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqUtils.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqUtils.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqUtils.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,279 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##$Id: SeqUtils.t,v 1.20 2006/08/16 16:48:59 cjfields Exp $
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 37;
+}
+
+use Bio::PrimarySeq;
+use Bio::SeqUtils;
+use Bio::LiveSeq::Mutation;
+ok 1;
+
+my ($seq, $util, $ascii, $ascii_aa, $ascii3);
+
+# Entire alphabet now IUPAC-endorsed and used in GenBank (Oct 2006)          
+$ascii =    'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+$ascii_aa = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+
+$ascii3 = 
+    'AlaAsxCysAspGluPheGlyHisIleXleLysLeuMetAsnPylProGlnArgSerThrSecValTrpXaaTyrGlx';
+
+$seq = Bio::PrimarySeq->new('-seq'=> $ascii,
+			    '-alphabet'=>'protein', 
+			       '-id'=>'test');
+
+# one letter amino acid code to three letter code
+ok $util = new Bio::SeqUtils;
+ok $util->seq3($seq), $ascii3;
+
+#using anonymous hash
+ok (Bio::SeqUtils->seq3($seq), $ascii3); 
+ok (Bio::SeqUtils->seq3($seq, undef, ','), 
+    'Ala,Asx,Cys,Asp,Glu,Phe,Gly,His,Ile,Xle,Lys,'.
+    'Leu,Met,Asn,Pyl,Pro,Gln,Arg,Ser,Thr,Sec,Val,Trp,Xaa,Tyr,Glx');
+
+$seq->seq('asd-KJJK-');
+ok (Bio::SeqUtils->seq3($seq, '-', ':'), 
+    'Ala:Ser:Asp:Ter:Lys:Xle:Xle:Lys:Ter');
+
+# three letter amino acid code to one letter code
+ok (Bio::SeqUtils->seq3in($seq, 'AlaPYHCysAspGlu')), 
+ok  $seq->seq, 'AXCDE';
+ok (Bio::SeqUtils->seq3in($seq, $ascii3)->seq, $ascii_aa);
+#ok ();
+
+#
+# Tests for multiframe translations
+#
+
+$seq = Bio::PrimarySeq->new('-seq'=> 'agctgctgatcggattgtgatggctggatggcttgggatgctgg',
+			    '-alphabet'=>'dna', 
+			    '-id'=>'test2');
+
+my @a = $util->translate_3frames($seq);
+ok scalar @a, 3;
+#foreach $a (@a) {
+#    print 'ID: ', $a->id, ' ', $a->seq, "\n";
+#}
+
+ at a = $util->translate_6frames($seq);
+ok scalar @a, 6;
+#foreach $a (@a) {
+#    print 'ID: ', $a->id, ' ', $a->seq, "\n";
+#}
+
+#
+# test for valid AA return
+#
+
+my @valid_aa = sort Bio::SeqUtils->valid_aa;
+ok(@valid_aa, 27);
+ok ($valid_aa[1], 'A');
+
+ at valid_aa = sort Bio::SeqUtils->valid_aa(1);
+ok(@valid_aa, 27);
+ok ($valid_aa[1], 'Arg');
+
+my %valid_aa = Bio::SeqUtils->valid_aa(2);
+ok keys %valid_aa, 54;
+ok($valid_aa{'C'}, 'Cys');
+ok( $valid_aa{'Cys'}, 'C');
+
+
+#
+# Mutate
+#
+
+my $string1 = 'aggt';
+$seq = Bio::PrimarySeq->new('-seq'=> 'aggt',
+			    '-alphabet'=>'dna',
+			    '-id'=>'test3');
+
+# point
+Bio::SeqUtils->mutate($seq,
+                      Bio::LiveSeq::Mutation->new(-seq => 'c',
+                                                  -pos => 3
+                                                 )
+                     );
+ok $seq->seq, 'agct';
+
+# insertion and deletion
+my @mutations = (
+                 Bio::LiveSeq::Mutation->new(-seq => 'tt',
+                                             -pos => 2,
+                                             -len => 0
+                                            ),
+                 Bio::LiveSeq::Mutation->new(-pos => 2,
+                                             -len => 2
+                                            )
+);
+
+Bio::SeqUtils->mutate($seq, @mutations);
+ok $seq->seq, 'agct';
+
+# insertion to the end of the sequence
+Bio::SeqUtils->mutate($seq,
+                      Bio::LiveSeq::Mutation->new(-seq => 'aa',
+                                                  -pos => 5,
+                                                  -len => 0
+                                                 )
+                     );
+ok $seq->seq, 'agctaa';
+
+
+
+#
+# testing Bio::SeqUtils->cat
+#
+
+use Bio::Annotation::SimpleValue;
+use Bio::Seq::RichSeq;;
+
+
+# PrimarySeqs
+
+my $primseq1 = new Bio::PrimarySeq(-id => 1, -seq => 'acgt', -description => 'master');
+my $primseq2 = new Bio::PrimarySeq(-id => 2, -seq => 'tgca');
+
+Bio::SeqUtils->cat($primseq1, $primseq2);
+ok $primseq1->seq, 'acgttgca';
+ok $primseq1->description, 'master';
+
+#should work for Bio::LocatableSeq
+#should work for Bio::Seq::MetaI Seqs?
+
+
+# Bio::SeqI
+
+my $seq1 = new Bio::Seq(-id => 1, -seq => 'aaaa', -description => 'first');
+my $seq2 = new Bio::Seq(-id => 2, -seq => 'tttt', -description => 'second');
+my $seq3 = new Bio::Seq(-id => 3, -seq => 'cccc', -description => 'third');
+
+
+#  annotations
+my $ac2 = new Bio::Annotation::Collection;
+my $simple1 = Bio::Annotation::SimpleValue->new(
+                                                -tagname => 'colour',
+                                                -value   => 'blue'
+                                               ), ;
+my $simple2 = Bio::Annotation::SimpleValue->new(
+                                                -tagname => 'colour',
+                                                -value   => 'black'
+                                               ), ;
+$ac2->add_Annotation('simple',$simple1);
+$ac2->add_Annotation('simple',$simple2);
+$seq2->annotation($ac2);
+
+my $ac3 = new Bio::Annotation::Collection;
+my $simple3 = Bio::Annotation::SimpleValue->new(
+                                                -tagname => 'colour',
+                                                -value   => 'red'
+						 ), ;
+$ac3->add_Annotation('simple',$simple3);
+$seq3->annotation($ac3);
+
+
+ok (Bio::SeqUtils->cat($seq1, $seq2, $seq3));
+ok $seq1->seq, 'aaaattttcccc';
+ok scalar $seq1->get_Annotations, 3;
+
+
+# seq features
+use Bio::SeqFeature::Generic;
+
+my $ft2 = new Bio::SeqFeature::Generic ( -start => 1,
+                                      -end => 4,
+                                      -strand => 1,
+                                      -primary => 'source',
+				       );
+
+
+my $ft3 = new Bio::SeqFeature::Generic ( -start => 3,
+                                      -end => 3,
+                                      -strand => 1,
+                                      -primary => 'hotspot',
+				       );
+
+$seq2->add_SeqFeature($ft2);
+$seq2->add_SeqFeature($ft3);
+
+
+ok (Bio::SeqUtils->cat($seq1, $seq2));
+ok $seq1->seq, 'aaaattttcccctttt';
+ok scalar $seq1->get_Annotations, 5;
+
+
+my $protseq = new Bio::PrimarySeq(-id => 2, -seq => 'MVTF'); # protein seq
+
+eval {
+    Bio::SeqUtils->cat($seq1, $protseq);
+};
+ok 1 if $@; # did throw
+
+#use Data::Dumper; print Dumper $seq1;
+
+
+
+
+
+
+#
+# evolve()
+#
+
+$seq = Bio::PrimarySeq->new('-seq'=> 'aaaaaaaaaa',
+                            '-id'=>'test');
+
+
+
+$util = new Bio::SeqUtils(-verbose => 0);
+ok my $newseq = $util->evolve($seq, 60, 4);
+
+#  annotations
+
+$seq2 = new Bio::Seq(-id => 2, -seq => 'ggttaaaa', -description => 'second');
+$ac3 = new Bio::Annotation::Collection;
+$simple3 = Bio::Annotation::SimpleValue->new(
+                                                -tagname => 'colour',
+                                                -value   => 'red'
+                                                 ), ;
+$ac3->add_Annotation('simple',$simple3);
+$seq2->annotation($ac3);
+$ft2 = new Bio::SeqFeature::Generic ( -start => 1,
+                                      -end => 4,
+                                      -strand => 1,
+                                      -primary => 'source',
+                                       );
+
+
+$ft3 = new Bio::SeqFeature::Generic ( -start => 5,
+                                      -end => 8,
+                                      -strand => -1,
+                                      -primary => 'hotspot',
+                                       );
+$seq2->add_SeqFeature($ft2);
+$seq2->add_SeqFeature($ft3);
+
+my $trunc=Bio::SeqUtils->trunc_with_features($seq2, 2, 7);
+ok $trunc->seq, 'gttaaa';
+my @feat=$trunc->get_SeqFeatures;
+ok $feat[0]->location->to_FTstring, '<1..3';
+ok $feat[1]->location->to_FTstring, 'complement(4..>6)';
+
+my $revcom=Bio::SeqUtils->revcom_with_features($seq2);
+ok $revcom->seq, 'ttttaacc';
+my @revfeat=$revcom->get_SeqFeatures;
+ok $revfeat[0]->location->to_FTstring, 'complement(5..8)';
+ok $revfeat[1]->location->to_FTstring, '1..4';

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqVersion.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqVersion.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqVersion.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,57 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($DEBUG $NUMTESTS);
+
+BEGIN {
+  $NUMTESTS = 10;
+  $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+  eval { require Test::More; };
+  if ($@) {
+    use lib 't/lib';
+  }
+  use Test::More;
+  
+  eval {
+	require LWP::UserAgent;
+  };
+  if ($@) {
+	plan skip_all => 'LWP::UserAgent not installed. This means that the module is not usable. Skipping tests';
+  }
+  else {
+	plan tests => $NUMTESTS;
+  }
+  
+  use_ok('Bio::DB::SeqVersion');
+}
+
+ok my $query = Bio::DB::SeqVersion->new(-type => 'gi');
+
+SKIP: {
+	skip "Skipping tests which require remote servers - set env variable BIOPERLDEBUG to test", 8 unless $DEBUG;
+
+        eval { $query->get_history('DODGY_ID_WHICH_SHOULD_FAIL') };
+        like($@, qr/could not parse/i, 'throw on bad ID');
+
+	my $latest_gi = $query->get_recent(2);
+	is($latest_gi, 2, 'get_recent');
+
+	my @all_gis = $query->get_all(2);
+	is(scalar @all_gis, 8, 'get_all');
+
+	$latest_gi = $query->get_recent('A00002');
+	is($latest_gi, 2, 'get_recent, string');
+
+	$latest_gi = $query->get_recent(27478738);
+	is($latest_gi, 42659163, 'get_recent, integer');
+
+	# check that default type is "gi"
+	ok $query = Bio::DB::SeqVersion->new();
+	ok my $ref = $query->get_history(3245);
+	is($ref->[0]->[0], 578167, 'get_history');
+} 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/SeqWords.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SeqWords.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SeqWords.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: SeqWords.t,v 1.1 2003/11/12 17:14:09 heikki Exp $
+
+## SeqWords.t, based on SeqStats.t
+# Derek Gatherer, 11th November 2003
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { use lib 't'; }
+    use Test;
+    plan tests => 16;
+}
+
+use Bio::PrimarySeq;
+use Bio::Tools::SeqWords;
+use vars ('$DEBUG');
+
+ok 1;
+
+my ($seqobj, $count, $seqobj_stats, $wt);
+
+$seqobj = Bio::PrimarySeq->new(-seq=>'ACTGTGGCGTCAACTGACTGGC',
+			       -alphabet=>'dna', -id=>'test');
+$seqobj_stats  =  Bio::Tools::SeqWords->new(-seq=>$seqobj);
+
+ok defined($seqobj_stats) && ref($seqobj_stats) &&
+    $seqobj_stats->isa('Bio::Tools::SeqWords');
+
+$count = $seqobj_stats->count_words(4);
+ok $count->{'ACTG'}, 3;
+ok $count->{'TGGC'}, 1;
+ok $count->{'GTCA'}, 1;
+
+$count = $seqobj_stats->count_overlap_words(4);
+ok $count->{'ACTG'}, 3;
+ok $count->{'TGGC'}, 2;
+ok $count->{'GTCA'}, 1;
+ok $count->{'GTGG'}, 1;
+
+# now test a protein
+$seqobj = Bio::PrimarySeq->new(-seq=>'MQSERGITIDISLWKFETSKYYVTIDISSLWKF',
+                               -alphabet=>'protein', -id=>'test');
+$seqobj_stats  =  Bio::Tools::SeqWords->new('-seq' => $seqobj);
+ok defined($seqobj_stats) && ref($seqobj_stats) &&
+    $seqobj_stats->isa('Bio::Tools::SeqWords');
+
+$count = $seqobj_stats->count_words(4);
+ok $count->{'MQSE'}, 1;
+ok $count->{'LWKF'}, 1;
+ok $count->{'IDIS'}, 2;
+
+$count = $seqobj_stats->count_overlap_words(4);
+ok $count->{'MQSE'}, 1;
+ok $count->{'LWKF'}, 2;
+ok $count->{'IDIS'}, 2;

Added: trunk/packages/bioperl/branches/upstream/current/t/SequenceFamily.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SequenceFamily.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SequenceFamily.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,69 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NTESTS);
+    $NTESTS = 17;
+    plan tests => $NTESTS;
+}
+
+use Bio::Root::IO;
+use Bio::SeqIO;
+use Bio::Cluster::SequenceFamily;
+
+
+my $seqio= new Bio::SeqIO('-format' => 'swiss',
+                           '-file'   => Bio::Root::IO->catfile('t','data','sequencefamily.dat'));
+my @mem;
+while(my $seq = $seqio->next_seq){
+    push @mem, $seq;
+}
+my $family = Bio::Cluster::SequenceFamily->new(-family_id=>"Family_1",
+                                       -description=>"SomeFamily",
+                                       -annotation_score=>"100",
+                                       -family_score=>"50",
+                                       -version=>"1.0",
+                                       -members=>\@mem);
+ok $family->description, "SomeFamily";
+ok $family->annotation_score,100;
+ok $family->size, 5;
+ok $family->family_id,"Family_1";
+ok $family->version, "1.0";
+
+$family->add_members($mem[0]);
+$family->add_members($mem[1]);
+ok $family->size, 7;
+ok $family->cluster_score, "50";
+ok $family->family_score, "50";
+
+my @members = $family->get_members(-ncbi_taxid=>9606);
+
+foreach my $mem(@members){
+    ok $mem->species->ncbi_taxid, 9606;
+}
+
+ at members = $family->get_members(-binomial=>"Homo sapiens");
+
+foreach my $mem(@members){
+    ok $mem->species->binomial, "Homo sapiens";
+}
+
+
+$family->flush_members();
+
+ok $family->size, 0;
+
+
+
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Sigcleave.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Sigcleave.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Sigcleave.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for various modules
+## $Id: Sigcleave.t,v 1.10 2002/10/02 14:16:50 heikki Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 16;
+}
+use Bio::PrimarySeq;
+use Bio::Tools::Sigcleave;
+
+#load n-terminus of MGR5_HUMAN as test seq
+my $protein = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
+
+ok my $seq= Bio::PrimarySeq->new(-seq => $protein);
+
+ok my $sig = new Bio::Tools::Sigcleave;
+ok $sig->seq($seq);
+ok my $sout = $sig->seq;
+ok $sout->seq eq $protein;
+ok $sig->threshold, 3.5;
+ok $sig->threshold(5), 5;
+ok $sig->matrix, 'eucaryotic';
+ok $sig->matrix('procaryotic'), 'procaryotic';
+ok $sig->matrix('eucaryotic'), 'eucaryotic';
+
+ok $sig->pretty_print =~ /Maximum score 7/;
+ok my %results = $sig->signals;
+
+ok $results{9}, 5.2, "unable to get raw sigcleave results";
+
+
+$sig = new Bio::Tools::Sigcleave(-seq=>$protein,
+				 -threshold=>5);
+ok %results = $sig->signals;
+ok $results{9}, 5.2, "unable to get raw sigcleave results";
+ok $sig->result_count, 5;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Signalp.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Signalp.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Signalp.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+# Bio::Tools::Signalp test script
+
+use strict;
+use Test;
+
+BEGIN {	plan tests => 7 }
+
+use Bio::Tools::Signalp;
+use File::Spec;
+
+# global setting
+
+my $verbose = $ENV{BIOPERLDEBUG} || 0;
+
+# shared variables
+
+my $infile;
+my $parser;
+my @feat;
+
+# negative example without "YES" features
+
+ok $infile = File::Spec->catfile(qw(t data signalp.negative.out));
+ok $parser = Bio::Tools::Signalp->new(-file=>$infile, -verbose=>$verbose);
+
+while ( my $feat = $parser->next_result ) {
+  push @feat, $feat;
+}
+ok @feat == 0;
+ok $parser->_seqname, 'my_fasta_id';
+ok $parser->_fact1,   'NO';
+
+# positive example with "YES" features
+
+ok $infile = File::Spec->catfile(qw(t data signalp.positive.out));
+ok $parser = Bio::Tools::Signalp->new(-file=>$infile, -verbose=>$verbose);
+
+#
+#  The current module does NOT parse stuff properly
+#  It is probably from version 2 but version 3 is used today
+#  This has to be investigated!!!! --Torsten
+#  FIXME / TODO / BUG / *** 
+# 
+
+#while ( my $feat = $parser->next_result ) {
+#  push @feat, $feat;
+#}
+#ok @feat == 1;
+#ok $parser->_seqname, 'my_fasta_id';
+#ok $parser->_fact1,   'YES';
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Sim4.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Sim4.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Sim4.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Sim4.t,v 1.13 2003/09/16 21:00:33 jason Exp $
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 128;
+}
+
+use Bio::Tools::Sim4::Results;
+use Bio::Root::IO;
+use Bio::SearchIO;
+
+ok(1);
+my $sim4 = new Bio::Tools::Sim4::Results(-file=> Bio::Root::IO->catfile("t","data","sim4.rev"), -estisfirst=>0);
+ok $sim4;
+
+
+my $exonset = $sim4->next_exonset;
+my @exons = $exonset->sub_SeqFeature(); 
+
+ok @exons, 10;
+my $exon = 1;
+ok $exons[$exon]->est_hit()->seq_id(), 'HSHNCPA1';
+ok($exons[$exon]->seq_id(), qr/human/);
+ok $exons[$exon]->strand(), -1;
+ok $exons[$exon]->start(), 1048;
+ok $exons[$exon]->end(), 1117;
+ok $exons[$exon]->score, 93;
+ok $exons[$exon]->est_hit()->seqlength(), 1198;
+
+
+$sim4 = new Bio::Tools::Sim4::Results(-file=> Bio::Root::IO->catfile("t","data","sim4.for.for"), -estisfirst=>0);
+ok $sim4;
+
+$exonset = $sim4->next_exonset;
+ at exons = $exonset->sub_SeqFeature(); 
+
+ok @exons, 4;
+$exon = 1;
+ok $exons[$exon]->est_hit()->seq_id(), 'hs_est';
+ok $exons[$exon]->seq_id(), 'human';
+ok $exons[$exon]->strand(), 1;
+ok $exons[$exon]->start(), 1377;
+ok $exons[$exon]->end(), 1500;
+ok $exons[$exon]->score, 99;
+ok $exons[$exon]->est_hit()->seqlength(), 479;
+
+ok($sim4->next_exonset);
+ at exons = $exonset->sub_SeqFeature();
+
+ok $exons[$exon]->est_hit()->seq_id(), 'hs_est';
+ok $exons[$exon]->seq_id(), 'human';
+ok $exons[$exon]->strand(), 1;
+ok $exons[$exon]->est_hit->start(), 120;
+ok $exons[$exon]->est_hit->end(), 243;
+ok $exons[$exon]->score, 99;
+ok $exons[$exon]->est_hit()->seqlength(), 479;
+
+
+# new SearchIO parser for Sim4
+
+# parse align format 0
+my $parser = new Bio::SearchIO(-format => 'sim4',
+			       -file   => 
+			       Bio::Root::IO->catfile(qw(t data crypto.sim4-0))
+			       );
+my $r = $parser->next_result;
+ok($r->query_name, 'cn416');
+ok($r->query_length, 630);
+
+my $hit = $r->next_hit;
+ok($hit->name, 'Contig147');
+ok($hit->description, 'Contig147.fa');
+ok($hit->length, 1086);
+
+my $hsp = $hit->next_hsp;
+ok($hsp->query->start, 36);
+ok($hsp->query->end, 132);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 191);
+ok($hsp->hit->end, 286);
+ok($hsp->hit->strand, 1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 133);
+ok($hsp->query->end, 191);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 343);
+ok($hsp->hit->end, 401);
+ok($hsp->hit->strand, 1);
+
+# parse align format 3
+$parser = new Bio::SearchIO(-format => 'sim4',
+			    -file   => 
+			    Bio::Root::IO->catfile(qw(t data crypto.sim4-3))
+			    );
+$r = $parser->next_result;
+ok($r->query_name, 'cn416');
+ok($r->query_length, 630);
+$hit = $r->next_hit;
+ok($hit->name, 'Contig147');
+ok($hit->description, 'Contig147.fa');
+ok($hit->length, 1086);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 36);
+ok($hsp->query->end, 132);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 191);
+ok($hsp->hit->end, 286);
+ok($hsp->hit->strand, 1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 133);
+ok($hsp->query->end, 191);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 343);
+ok($hsp->hit->end, 401);
+ok($hsp->hit->strand, 1);
+
+# parse align format 4
+$parser = new Bio::SearchIO(-format => 'sim4',
+			    -file   => 
+			    Bio::Root::IO->catfile(qw(t data crypto.sim4-4))
+			    );
+$r = $parser->next_result;
+ok($r->query_name, 'cn416');
+ok($r->query_length, 630);
+
+$hit = $r->next_hit;
+ok($hit->name, 'Contig147');
+ok($hit->length, 1086);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 36);
+ok($hsp->query->end, 132);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 191);
+ok($hsp->hit->end, 286);
+ok($hsp->hit->strand, 1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 133);
+ok($hsp->query->end, 191);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 343);
+ok($hsp->hit->end, 401);
+ok($hsp->hit->strand, 1);
+
+
+# do the other sim4 files
+$parser = new Bio::SearchIO(-format => 'sim4',
+			    -file   => 
+			    Bio::Root::IO->catfile(qw(t data sim4.rev))
+			    );
+$r = $parser->next_result;
+ok($r->query_name, '/nfs/disk21/birney/prog/wise2/example/human.rev');
+ok($r->query_length, 5368);
+$hit = $r->next_hit;
+ok($hit->name, 'HSHNCPA1');
+ok($hit->description, 'temp.cdna');
+ok($hit->length, 1198);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 486);
+ok($hsp->query->end, 503);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 10);
+ok($hsp->hit->end, 27);
+ok($hsp->hit->strand, -1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 1048);
+ok($hsp->query->end, 1117);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 194);
+ok($hsp->hit->end, 265);
+ok($hsp->hit->strand, -1);
+
+# do the other sim4 files fwd
+$parser = new Bio::SearchIO(-format => 'sim4',
+			    -file   => 
+			    Bio::Root::IO->catfile(qw(t data sim4.for.for))
+			    );
+$r = $parser->next_result;
+ok($r->query_name, 'human.genomic');
+ok($r->query_length, 5368);
+$hit = $r->next_hit;
+ok($hit->name, 'hs_est');
+ok($hit->description, 'est.for');
+ok($hit->length, 479);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 695);
+ok($hsp->query->end, 813);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 1);
+ok($hsp->hit->end, 119);
+ok($hsp->hit->strand, 1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 1377);
+ok($hsp->query->end, 1500);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 120);
+ok($hsp->hit->end, 243);
+ok($hsp->hit->strand, 1);
+
+# do the other sim4 files fwd rev
+$parser = new Bio::SearchIO(-format => 'sim4',
+			    -file   => 
+			    Bio::Root::IO->catfile(qw(t data sim4.for.rev))
+			    );
+$r = $parser->next_result;
+ok($r->query_name, 'human.genomic');
+ok($r->query_length, 5368);
+$hit = $r->next_hit;
+ok($hit->name, 'REVCOMP');
+ok($hit->description, 'hn_est.rev');
+ok($hit->length, 479);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 695);
+ok($hsp->query->end, 813);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 1);
+ok($hsp->hit->end, 119);
+ok($hsp->hit->strand, -1);
+
+$hsp = $hit->next_hsp;
+ok($hsp->query->start, 1377);
+ok($hsp->query->end, 1500);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->start, 120);
+ok($hsp->hit->end, 243);
+ok($hsp->hit->strand, -1);

Added: trunk/packages/bioperl/branches/upstream/current/t/SimilarityPair.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SimilarityPair.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SimilarityPair.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,44 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# CVS Version
+# $Id: SimilarityPair.t,v 1.5 2002/03/10 17:28:10 jason Exp $
+
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 5;
+}
+use Bio::Seq;
+use Bio::SeqFeature::SimilarityPair;
+use Bio::SearchIO;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+# test SimilarityPair
+
+my $seq = (new Bio::SeqIO('-format' => 'fasta',
+			  '-file' => Bio::Root::IO->catfile("t","data","AAC12660.fa")))->next_seq();
+ok defined( $seq) && $seq->isa('Bio::SeqI');
+my $blast = new Bio::SearchIO('-file'=>Bio::Root::IO->catfile("t","data","blast.report"), '-format' => 'blast');
+ok defined ($blast) && $blast->isa('Bio::SearchIO');
+my $r = $blast->next_result;
+my $hit = $r->next_hit;
+ok defined ($hit) && $hit->isa('Bio::Search::Hit::HitI'), 1, ' hit is ' . ref($hit);
+my $sim_pair = $hit->next_hsp;
+ok defined($sim_pair) && $sim_pair->isa('Bio::SeqFeatureI');
+$seq->add_SeqFeature($sim_pair);
+ok $seq->all_SeqFeatures() == 1;

Added: trunk/packages/bioperl/branches/upstream/current/t/SimpleAlign.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SimpleAlign.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SimpleAlign.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,306 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: SimpleAlign.t,v 1.43.2.1 2006/10/02 23:10:40 sendu Exp $
+use strict;
+use constant NUMTESTS => 75;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	eval { require Test::More; };
+	if( $@ ) {
+		use lib 't/lib';
+	}
+	use Test::More;
+
+	plan tests => NUMTESTS;
+}
+
+use_ok('Bio::SimpleAlign');
+use_ok('Bio::AlignIO');
+use_ok('Bio::Root::IO');
+
+my ($str, $aln, @seqs, $seq);
+
+$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
+                        "t","data","testaln.pfam"));
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+is $aln->get_seq_by_pos(1)->get_nse, '1433_LYCES/9-246', 
+            "pfam input test";
+
+my $aln1 = $aln->remove_columns(['mismatch']);
+is($aln1->match_line, '::*::::*:**:*:*:***:**.***::*.*::**::**:***..**:'.
+   '*:*.::::*:.:*.*.**:***.**:*.:.**::**.*:***********:::*:.:*:**.*::*:'.
+   '.*.:*:**:****************::', 'match_line');
+
+my $aln2 = $aln->select(1,3);
+isa_ok($aln2, 'Bio::Align::AlignI');
+is($aln2->no_sequences, 3, 'no_sequences');
+
+# test select non continous
+$aln2 = $aln->select_noncont(8,2,7);
+is($aln2->no_sequences, 3, 'no_sequences');
+is($aln2->get_seq_by_pos(2)->id, $aln->get_seq_by_pos(7)->id, 'get+seq_by_pos');
+
+ at seqs = $aln->each_seq();
+is scalar @seqs, 16, 'each_seq';
+is $seqs[0]->get_nse, '1433_LYCES/9-246', 'get_nse';
+is $seqs[0]->id, '1433_LYCES', 'id';
+is $seqs[0]->no_gaps, 3, 'no_gaps';
+ at seqs = $aln->each_alphabetically();
+is scalar @seqs, 16, 'each_alphabetically';
+
+is $aln->column_from_residue_number('1433_LYCES', 10), 2, 'column_from_residue_number';
+is $aln->displayname('1433_LYCES/9-246', 'my_seq'), 'my_seq', 'display_name get/set';
+is $aln->displayname('1433_LYCES/9-246'), 'my_seq', 'display_name get';
+is substr ($aln->consensus_string(50), 0, 60),
+    "RE??VY?AKLAEQAERYEEMV??MK?VAE??????ELSVEERNLLSVAYKNVIGARRASW", 'consensus_string';
+is substr ($aln->consensus_string(100), 0, 60),
+    "?????????L????E????M???M????????????L??E?RNL?SV?YKN??G??R??W", 'consensus_string';
+is substr ($aln->consensus_string(0), 0, 60), 
+    "REDLVYLAKLAEQAERYEEMVEFMKKVAELGAPAEELSVEERNLLSVAYKNVIGARRASW", 'consensus_string';
+
+ok(@seqs = $aln->each_seq_with_id('143T_HUMAN'));
+is scalar @seqs, 1, 'each_seq_with_id';
+
+is $aln->is_flush, 1,'is_flush';
+ok($aln->id('x') && $aln->id eq 'x','id get/set');
+
+is $aln->length, 242, 'length';
+is $aln->no_residues, 3769, 'no_residues';
+is $aln->no_sequences, 16, 'no_sequences';
+is (sprintf("%.2f",$aln->overall_percentage_identity()), 33.06, 'overall_percentage_identity');
+is (sprintf("%.2f",$aln->average_percentage_identity()), 66.91, 'overall_percentage_identity');
+
+ok $aln->set_displayname_count;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES_1', 'set_displayname_count';
+ok $aln->set_displayname_flat;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES', 'set_displayname_flat';
+ok $aln->set_displayname_normal;
+is $aln->displayname('1433_LYCES/9-246'), '1433_LYCES/9-246', 'set_displayname_normal';
+ok $aln->uppercase;
+ok $aln->map_chars('\.','-');
+ at seqs = $aln->each_seq_with_id('143T_HUMAN');
+is substr($seqs[0]->seq, 0, 60),
+    'KTELIQKAKLAEQAERYDDMATCMKAVTEQGA---ELSNEERNLLSVAYKNVVGGRRSAW', 'uppercase, map_chars';
+
+is($aln->match_line, '       ::*::::*  : *   *:           *: *:***:**.***::*.'.
+   ' *::**::**:***      .  .      **  :* :*   .  :: ::   *:  .     :* .*. **:'.
+   '***.** :*.            :  .*  *   :   : **.*:***********:::* : .: *  :** .'.
+   '*::*: .*. : *: **:****************::     ', 'match_line');
+ok $aln->remove_seq($seqs[0]),'remove_seqs';
+is $aln->no_sequences, 15, 'remove_seqs';
+ok $aln->add_seq($seqs[0]), 'add_seq';
+is $aln->no_sequences, 16, 'add_seq';
+ok $seq = $aln->get_seq_by_pos(1), 'get_seq_by_pos';
+is( $seq->id, '1433_LYCES', 'get_seq_by_pos');
+ok (($aln->missing_char(), 'P') and  ($aln->missing_char('X'), 'X')) ;
+ok (($aln->match_char(), '.') and  ($aln->match_char('-'), '-')) ;
+ok (($aln->gap_char(), '-') and  ($aln->gap_char('.'), '.')) ;
+
+is $aln->purge(0.7), 12, 'purge';
+is $aln->no_sequences, 4, 'purge';
+
+SKIP:{
+	eval { require IO::String };
+	skip("IO::String not installed. Skipping tests.\n", 24) if $@;
+
+	my $string;
+	my $out = IO::String->new($string);
+	
+	my $s1 = new Bio::LocatableSeq (-id => 'AAA',
+					-seq => 'aawtat-tn-',
+					-start => 1,
+					-end => 8,
+					-alphabet => 'dna'
+					);
+	my $s2 = new Bio::LocatableSeq (-id => 'BBB',
+					-seq => '-aaaat-tt-',
+					-start => 1,
+					-end => 7,
+					-alphabet => 'dna'
+					);
+	$a = new Bio::SimpleAlign;
+	$a->add_seq($s1);           
+	$a->add_seq($s2);
+	
+	is ($a->consensus_iupac, "aAWWAT-TN-", 'IO::String consensus_iupac');
+	$s1->seq('aaaaattttt');
+	$s1->alphabet('dna');
+	$s1->end(10);
+	$s2->seq('-aaaatttt-');
+	$s2->end(8);
+	$a = new Bio::SimpleAlign;
+	$a->add_seq($s1);
+	$a->add_seq($s2);
+	
+	my $strout = Bio::AlignIO->new(-fh => $out, -format => 'pfam');
+	$strout->write_aln($a);
+	is ($string,
+		"AAA/1-10    aaaaattttt\n".
+		"BBB/1-8     -aaaatttt-\n",
+		'IO::String write_aln normal');
+	
+	$out->setpos(0); 
+	$string ='';
+	my $b = $a->slice(2,9);
+	$strout->write_aln($b);
+	is $string,
+	"AAA/2-9    aaaatttt\n".
+	"BBB/1-8    aaaatttt\n",
+	'IO::String write_aln slice';
+	
+	$out->setpos(0); 
+	$string ='';
+	$b = $a->slice(9,10);
+	$strout->write_aln($b);
+	is $string,
+	"AAA/9-10    tt\n".
+	"BBB/8-8     t-\n",
+	'IO::String write_aln slice';
+	
+	$a->verbose(-1);
+	$out->setpos(0); 
+	$string ='';
+	$b = $a->slice(1,2);
+	$strout->write_aln($b);
+	is $string,
+	"AAA/1-2    aa\n".
+	"BBB/1-1    -a\n",
+	'IO::String write_aln slice';
+	
+	# not sure what coordinates this should return...
+	$a->verbose(-1);
+	$out->setpos(0); 
+	$string ='';
+	$b = $a->slice(1,1,1);
+	$strout->write_aln($b);
+	is $string,
+	"AAA/1-1    a\n".
+	"BBB/1-0    -\n",
+	'IO::String write_aln slice';
+	
+	$a->verbose(-1);
+	$out->setpos(0); 
+	$string ='';
+	$b = $a->slice(2,2);
+	$strout->write_aln($b);
+	is $string,
+	"AAA/2-2    a\n".
+	"BBB/1-1    a\n",
+	'IO::String write_aln slice';
+	
+	eval {
+		$b = $a->slice(11,13);
+	};
+	
+	like($@, qr/EX/ );
+	
+	# remove_columns by position
+	$out->setpos(0); 
+	$string ='';
+	$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
+												"t","data","mini-align.aln"));
+	$aln1 = $str->next_aln;
+	$aln2 = $aln1->remove_columns([0,0]);
+	$strout->write_aln($aln2);
+	is $string,
+	"P84139/1-33              NEGEHQIKLDELFEKLLRARLIFKNKDVLRRC\n".
+	"P814153/1-33             NEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC\n".
+	"BAB68554/1-14            ------------------AMLIFKDKQLLQQC\n".
+	"gb|443893|124775/1-32    MRFRFQIKVPPAVEGARPALLIFKSRPELGGC\n",
+	'remove_columns by position';
+	
+	# and when arguments are entered in "wrong order"?
+	$out->setpos(0); 
+	$string ='';
+	my $aln3 = $aln1->remove_columns([1,1],[30,30],[5,6]);
+	$strout->write_aln($aln3);
+	is $string,
+	"P84139/1-33              MEGEIKLDELFEKLLRARLIFKNKDVLRC\n".
+	"P814153/1-33             MEGMIKLDVLFEKLLRARLIFKNKDVLRC\n".
+	"BAB68554/1-14            ----------------AMLIFKDKQLLQC\n".
+	"gb|443893|124775/1-32    -RFRIKVPPAVEGARPALLIFKSRPELGC\n",
+	'remove_columns by position (wrong order)';
+	
+	my %cigars = $aln1->cigar_line;
+	is $cigars{'gb|443893|124775/1-32'},'19,19:21,24:29,29:32,32','cigar_line';
+	is $cigars{'P814153/1-33'},'20,20:22,25:30,30:33,33','cigar_line';
+	is $cigars{'BAB68554/1-14'},'1,1:3,6:11,11:14,14','cigar_line';
+	is $cigars{'P84139/1-33'},'20,20:22,25:30,30:33,33','cigar_line';
+	
+	
+	# sort_alphabetically
+	my $s3 = new Bio::LocatableSeq (-id => 'ABB',
+											  -seq => '-attat-tt-',
+											  -start => 1,
+											  -end => 7,
+											  -alphabet => 'dna'
+											 );
+	$a->add_seq($s3);
+	
+	is $a->get_seq_by_pos(2)->id,"BBB", 'sort_alphabetically - before';
+	ok $a->sort_alphabetically;
+	is $a->get_seq_by_pos(2)->id,"ABB", 'sort_alphabetically - after';
+	
+	$b = $a->remove_gaps();
+	is $b->consensus_string, "aaaattt", 'remove_gaps';
+	
+	$s1->seq('aaaaattt--');
+	
+	$b = $a->remove_gaps(undef, 'all_gaps_only');
+	is $b->consensus_string, "aaaaatttt",'remove_gaps all_gaps_only';
+	
+	# test set_new_reference:
+	$str = Bio::AlignIO->new(-file=> Bio::Root::IO->catfile(
+							"t","data","testaln.aln"));
+	$aln=$str->next_aln();
+	my $new_aln=$aln->set_new_reference(3);
+	$a=$new_aln->get_seq_by_pos(1)->display_id;
+	$new_aln=$aln->set_new_reference('P851414');
+	$b=$new_aln->get_seq_by_pos(1)->display_id;
+	is $a, 'P851414','set_new_reference';
+	is $b, 'P851414','set_new_reference';
+	
+	# test uniq_seq:
+	$str = Bio::AlignIO->new(-verbose => $DEBUG,
+							 -file=> Bio::Root::IO->catfile(
+							"t","data","testaln2.fasta"));
+	$aln=$str->next_aln();
+	$new_aln=$aln->uniq_seq();
+	$a=$new_aln->no_sequences;
+	is $a, 11,'uniq_seq';
+		
+	# check if slice works well with a LocateableSeq in its negative strand
+	my $seq1 = Bio::LocatableSeq->new(
+	  -SEQ    => "ATGCTG-ATG",
+	  -START  => 1,
+	  -END    => 9,
+	  -ID     => "test1",
+	  -STRAND => -1
+	);
+	
+	my $seq2 = Bio::LocatableSeq->new(
+	  -SEQ    => "A-GCTGCATG",
+	  -START  => 1,
+	  -END    => 9,
+	  -ID     => "test2",
+	  -STRAND => 1
+	);
+	
+	$string ='';
+	my $aln_negative = Bio::SimpleAlign->new();
+	$aln_negative->add_seq($seq1);
+	$aln_negative->add_seq($seq2);
+	my $start_column =
+	   $aln_negative->column_from_residue_number($aln_negative->get_seq_by_pos(1)->display_id,2);
+	my $end_column =
+	   $aln_negative->column_from_residue_number($aln_negative->get_seq_by_pos(1)->display_id,5);
+	$aln_negative = $aln_negative->slice($end_column,$start_column);
+	my $seq_negative = $aln_negative->get_seq_by_pos(1);
+	is($seq_negative->start,2,"bug 2099");
+	is($seq_negative->end,5,"bug 2099");
+}
+
+1;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/SiteMatrix.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/SiteMatrix.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/SiteMatrix.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,78 @@
+# -*-Perl-*-
+# $Id: SiteMatrix.t,v 1.4.6.3 2006/10/02 23:10:40 sendu Exp $
+#Some simple test, nothing fancy...
+
+use strict;
+
+CHECK {
+  $ENV{PERL_HASH_SEED} = 0;
+}
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 15;
+}
+use Bio::Matrix::PSM::SiteMatrix;
+
+ok(1);
+
+my $score;
+my $A='a0501';
+my $C='014a0';
+my $G='01103';
+my $T='08006';
+my $eval=0.0001;
+my %param=(pA=>$A,pC=>$C,pG=>$G,pT=>$T,e_val=>$eval, correction =>0);
+my $matrix=new Bio::Matrix::PSM::SiteMatrix(%param);
+ok $matrix;
+
+#Simple methods here
+ok $matrix->IUPAC,'ABVCD';
+
+ok $matrix->consensus,'ATACT';
+
+ok $matrix->width,5;
+
+ok $matrix->curpos,0;
+
+ok $matrix->get_string('A'),$A;
+
+my %x= (base=>'A',pA=>1,pC=>0,pG=>0,pT=>0,prob=>10,rel=>0, 
+        lA=>undef,lC=>undef,lG=>undef,lT=>undef);
+my %pos = $matrix->next_pos;
+my ($all) = 1;
+while(my ($k,$v) = each %x ) {
+    my $r =$pos{$k};
+    if( ! defined $v && ! defined $r) {
+    } elsif($pos{$k} ne $v ) { 
+	$all = 0;
+	last;
+    }
+}
+ok($all);
+
+ok $matrix->curpos,1;
+
+ok $matrix->e_val(0.0001);
+
+ok $matrix->e_val,0.0001;
+
+#Now some PSM specific methods like regexp and matrix info
+ok $matrix->regexp,'[Aa][CcGgTtBb][AaCcGgVv][Cc][AaGgTtDd]';
+my $regexp=$matrix->regexp;
+ok 'ATCCT',"/$regexp/";
+
+my @x=(1,0,0.5,0,0.1);
+ok $matrix->get_array('A'), at x;
+
+ at x=qw(Aa Tt AaCc Cc GgTt);
+ok $matrix->regexp_array, at x;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Sopma.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Sopma.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Sopma.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,90 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Sopma.t,v 1.1 2003/07/23 
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR $METAERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$ERROR = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 15;
+	plan tests => $NUMTESTS;
+
+	eval {
+		require IO::String; 
+		require LWP::UserAgent;
+	};
+	if( $@ ) {
+		warn("IO::String or LWP::UserAgent not installed. This means that the module is not usable. Skipping tests");
+	$ERROR = 1;
+	}
+	# check this is available, set error flag if not.
+	eval {
+		require Bio::Seq::Meta::Array;
+	};
+	if ($@) {
+		warn ("Bio::Seq::Meta::Array not installed - will skip tests using meta sequences");
+		$METAERROR = 1;
+	}
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('unable to run all of the Sopma tests',1);
+	}
+}
+
+exit 0 if $ERROR ==  1;
+
+use Data::Dumper;
+use Bio::PrimarySeq;
+require Bio::Tools::Analysis::Protein::Sopma;
+
+ok 1;
+
+my $verbose = 0;
+$verbose = 1 if $DEBUG;
+
+ok my $tool = Bio::WebAgent->new(-verbose =>$verbose);
+
+my $seq = Bio::PrimarySeq->new(
+  -seq => 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS',
+  -display_id => 'test2');
+ok $tool = Bio::Tools::Analysis::Protein::Sopma->new( -seq=>$seq,
+                                                      -window_width => 15);
+if( $DEBUG ) {
+	ok $tool->run ();
+	exit if $tool->status eq 'TERMINATED_BY_ERROR';
+	ok my $raw = $tool->result('');
+	ok my $parsed = $tool->result('parsed');
+	ok ($parsed->[0]{'helix'}, '104');
+	ok my @res = $tool->result('Bio::SeqFeatureI');
+	ok my $meta = $tool->result('meta', "ww15");
+
+	ok $tool->window_width(21);
+	ok $tool->clear();
+	ok $tool->run;
+	ok my $meta2 = $tool->result('meta', "ww21");
+	if (!$METAERROR) { 
+		# if Bio::Seq::Meta::Array available
+		# meta sequence contains data...
+		# but not available thru method call...??
+		ok ($meta->named_submeta_text('Sopma_helix|ww15',1,2), '104 195');
+		ok ($meta->seq, 'MSADQRWRQDSQDSFGDSFDGDPPPPPPPPFGDSFGDGFSDRSRQDQRS');
+	}
+} else {
+	for ( $Test::ntest..$NUMTESTS) {
+		skip("Skipping tests which require remote servers - set env variable BIOPERLDEBUG to test",1);
+	}
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Species.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Species.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Species.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+# -*-Perl-*-
+## $Id: Species.t,v 1.11.4.3 2006/11/08 17:25:55 sendu Exp $
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+
+BEGIN {
+	$NUMTESTS = 20;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+    
+	plan tests => $NUMTESTS;
+}
+
+use_ok('Bio::Species');
+use_ok('Bio::DB::Taxonomy');
+
+ok my $sps = Bio::Species->new();
+$sps->classification(qw( sapiens Homo Hominidae
+			 Catarrhini Primates Eutheria Mammalia Vertebrata
+			 Chordata Metazoa Eukaryota));
+
+is $sps->binomial, 'Homo sapiens';
+
+ok $sps->sub_species('sapiensis');
+is $sps->binomial, 'Homo sapiens';
+is $sps->binomial('FULL'), 'Homo sapiens sapiensis';
+is $sps->sub_species, 'sapiensis';
+
+$sps->classification(qw( sapiens Homo Hominidae
+			 Catarrhini Primates Eutheria Mammalia Vertebrata
+			 Chordata Metazoa Eukaryota));
+is $sps->binomial, 'Homo sapiens';
+
+
+# test cmd line initializtion
+ok my $species = new Bio::Species( -classification => 
+				[ qw( sapiens Homo Hominidae
+				      Catarrhini Primates Eutheria 
+				      Mammalia Vertebrata
+				      Chordata Metazoa Eukaryota) ] );
+is $species->binomial, 'Homo sapiens';
+is $species->species, 'sapiens';
+is $species->genus, 'Homo';
+
+
+# A Bio::Species isa Bio::Taxon, so test some things from there briefly
+is $species->scientific_name, 'sapiens';
+is $species->rank, 'species';
+
+# We can make a species object from just an id an db handle
+SKIP: {
+    skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 5 unless $DEBUG;
+    $species = new Bio::Species(-id => 51351);
+    my $taxdb = new Bio::DB::Taxonomy(-source => 'entrez');
+    eval {$species->db_handle($taxdb);};
+    skip "Unable to connect to entrez database; no network or server busy?", 5 if $@;
+    is $species->binomial, 'Brassica rapa subsp.';
+    is $species->binomial('FULL'), 'Brassica rapa subsp. pekinensis';
+    is $species->genus, 'Brassica';
+    is $species->species, 'rapa subsp.';
+    is $species->sub_species, 'pekinensis';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Spidey.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Spidey.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Spidey.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+		  	use lib 't';
+    }
+    use Test;
+    plan tests => 28;
+}
+
+use Bio::Tools::Spidey::Results;
+ok(1);
+
+my $spidey = new Bio::Tools::Spidey::Results(-file=> Bio::Root::IO->catfile("t", "data",
+"spidey.noalignment"));
+ok $spidey;
+
+$spidey->close();
+ok(1);
+
+my $exonset = $spidey->next_exonset;
+ok(!defined($exonset));
+
+$spidey = new Bio::Tools::Spidey::Results(-file=> Bio::Root::IO->catfile("t", "data",
+"spidey.test1"));
+$exonset = $spidey->next_exonset;
+my @exons = $exonset->sub_SeqFeature(); 
+ok @exons, 6;
+
+ok $spidey->genomic_dna_length(), 145732769;
+ok $spidey->splicesites(), 4;
+ok $spidey->est_coverage(), 100;
+ok $spidey->overall_percentage_id(), 99.7;
+ok $spidey->missing_mrna_ends(), 'neither';
+
+ok $exonset->seq_id(), 'lcl|chr2';
+ok $exonset->start(), 36356457;
+ok $exonset->end(), 36375798;
+
+my $exon = 0;
+ok $exons[$exon]->est_hit()->seq_id(), 'lcl|tmpseq_0';
+ok $exons[$exon]->start(), 36375691;
+ok $exons[$exon]->end(), 36375798;
+ok $exons[$exon]->strand(), -1;
+ok $exons[$exon]->est_hit()->start(), 1;
+ok $exons[$exon]->est_hit()->end(), 108;
+ok $exons[$exon]->donor(), 1;
+ok $exons[$exon]->acceptor(), 0;
+
+$exon = 1;
+ok $exons[$exon]->start(), 36369345;
+ok $exons[$exon]->end(), 36369492;
+ok $exons[$exon]->est_hit()->start(), 109;
+ok $exons[$exon]->est_hit()->end(), 256;
+ok $exons[$exon]->donor(), 0;
+ok $exons[$exon]->acceptor(), 1;
+
+$spidey->close();
+ok(1);

Added: trunk/packages/bioperl/branches/upstream/current/t/StandAloneBlast.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/StandAloneBlast.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/StandAloneBlast.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,199 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: StandAloneBlast.t,v 1.28 2006/06/28 16:45:18 bosborne Exp $
+#
+
+use strict;
+use constant NUMTESTS => 32;
+BEGIN { 
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => NUMTESTS; 
+}
+
+END { 
+	foreach( $Test::ntest..NUMTESTS) {
+		skip('Blast or env variables not installed correctly', 1);
+	}
+	unlink('blastreport.out') if -e 'blastreport.out';
+}
+
+use Bio::Tools::BPlite;
+use Bio::Tools::Run::StandAloneBlast;
+use Bio::SeqIO;
+use Bio::AlignIO;
+use Bio::Seq;
+use Bio::Root::IO;
+use Bio::SearchIO;
+
+# Note: the swissprot and ecoli.nt data sets may be downloaded from
+# ftp://ftp.ncbi.nih.gov/blast/db/FASTA
+my $verbose = -1;
+my $nt_database = 'ecoli.nt';
+my $amino_database = 'swissprot';
+my $evalue = 0.001;
+my ($seq1,$seq2,$seq3,$seq4);
+
+# Tests to check that "-attr" and "attr" and "a" all do the same thing
+# http://bugzilla.open-bio.org/show_bug.cgi?id=1912
+
+for my $p (qw(database db -d -database d)) {
+  my $f = Bio::Tools::Run::StandAloneBlast->new($p => $nt_database);
+  ok $f->d() eq $nt_database;
+}
+for my $p (qw(expect evalue -e -expect e)) {
+  my $f = Bio::Tools::Run::StandAloneBlast->new($p => $evalue);
+  ok $f->e() eq $evalue;
+}
+
+# Let's continue...
+
+my @params = ('program'     => 'blastn',
+				   'database'    => $nt_database , 
+				   '_READMETHOD' => 'SearchIO', 
+				   'output'      => 'blastreport.out',
+				   'verbose'     => 0 );
+my  $factory = Bio::Tools::Run::StandAloneBlast->new('-verbose' => $verbose,
+						     @params);
+ok $factory;
+
+my $inputfilename = Bio::Root::IO->catfile("t","data","test.txt");
+
+$factory->quiet(0);
+$factory->q(-3);
+
+ok($factory->q, -3);
+ok($factory->quiet, 0);
+
+if( ! $factory->executable('blastall') ) {
+    skip('blastall not installed',1);
+    exit;
+}
+
+if( ! defined $Bio::Tools::Run::StandAloneBlast::DATADIR ) {
+    print STDERR "must have BLASTDIR and BLASTDB or BLASTDATADIR env variable set\n";
+    exit(0);
+}
+
+my $nt_database_file = 
+    Bio::Root::IO->catfile($Bio::Tools::Run::StandAloneBlast::DATADIR, 
+			   $nt_database);
+ok($nt_database_file, qr/$nt_database/);
+
+my @testresults = qw(37 182 182  253 167 2);
+my $testcount = 0;
+
+# use ecoli.nt
+if (-e $nt_database_file) {
+	my $parser = $factory->blastall($inputfilename);
+	my $blast_report = $parser->next_result;
+	ok($blast_report->num_hits,$testresults[$testcount++]);
+
+	$factory->_READMETHOD('BPlite');  # Note required leading underscore in _READMETHOD
+	my $str = Bio::SeqIO->new('-file'  => Bio::Root::IO->catfile(qw(t data dna2.fa)),
+									  '-format' => 'fasta');
+	$seq1 = $str->next_seq();
+	$seq2 = $str->next_seq();
+
+	my $BPlite_report = $factory->blastall($seq1);
+	my $sbjct = $BPlite_report->nextSbjct;
+	my $hsp = $sbjct->nextHSP;
+	ok($hsp->score, $testresults[$testcount]);
+
+	$factory->_READMETHOD('Blast');
+	my $searchio_report = $factory->blastall($seq1);
+	$sbjct = $searchio_report->next_result->next_hit;
+	$hsp = $sbjct->next_hsp;
+	ok($hsp->score, $testresults[$testcount++]);
+
+	my @seq_array =($seq1,$seq2);
+	my $seq_array_ref = \@seq_array;
+	$factory->_READMETHOD('BPlite');
+	my $BPlite_report2 = $factory->blastall($seq_array_ref);
+	$sbjct = $BPlite_report2->nextSbjct;
+	$hsp = $sbjct->nextHSP;
+	ok($hsp->score, $testresults[$testcount]);
+
+	$factory->_READMETHOD('Blast');
+	$searchio_report = $factory->blastall($seq_array_ref);
+	$sbjct = $searchio_report->next_result->next_hit;
+	$hsp = $sbjct->next_hsp;
+	ok($hsp->score, $testresults[$testcount++]);
+
+	$sbjct = $searchio_report->next_result->next_hit;
+	ok($sbjct);
+	$hsp = $sbjct->next_hsp;
+	ok($hsp->score, $testresults[$testcount++]);
+
+	@params = ('-verbose' => $verbose,
+				  'program'  => 'blastp'); 
+	$factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+
+	$str = Bio::SeqIO->new(-file => Bio::Root::IO->catfile(qw(t data amino.fa)),
+								  -format => 'Fasta' );
+	$seq3 = $str->next_seq();
+	$seq4 = $str->next_seq();
+	$factory->_READMETHOD('BPlite');
+	my $bl2seq_report = $factory->bl2seq($seq3, $seq4);
+	$hsp = $bl2seq_report->next_feature;
+	ok ($hsp->hit->start, $testresults[$testcount], 
+		 " failed creating or parsing BPlite bl2seq report object");
+
+	$factory->_READMETHOD('Blast');
+	$bl2seq_report = $factory->bl2seq($seq3, $seq4);
+	$hsp = $bl2seq_report->next_result->next_hit->next_hsp;
+	ok( $hsp->hit->start, $testresults[$testcount++], 
+		 " failed creating or parsing SearchIO bl2seq report object");
+} else {
+	for (1..14) {
+		skip("Database $nt_database not found, skipping",1);
+	}	
+}
+
+# use nr
+my $amino_database_file = 
+    Bio::Root::IO->catfile($Bio::Tools::Run::StandAloneBlast::DATADIR, 
+			   $amino_database);
+
+if (-e $amino_database) {
+	@params = ('database' => $amino_database,
+				  '-verbose' => $verbose);
+	$factory = Bio::Tools::Run::StandAloneBlast->new(@params);
+
+	my $iter = 2;
+	$factory->j($iter);    # 'j' is blast parameter for # of iterations
+	my $new_iter = $factory->j();
+	
+	ok $new_iter, 2, " failed setting blast parameter";
+	my $blast_report = $factory->blastpgp($seq3)->next_result;
+	ok($blast_report->number_of_iterations, $testresults[$testcount]);
+
+	$factory->_READMETHOD('BPlite');
+	$iter = 2;
+	$factory->j($iter);    # 'j' is blast parameter for # of iterations
+	$new_iter = $factory->j();
+
+	ok($new_iter, $iter, " failed setting blast parameter");
+
+	$blast_report = $factory->blastpgp($seq3);
+	ok($blast_report->number_of_iterations, $testresults[$testcount]);
+} else {
+	for (1..4) {
+		skip("Database $amino_database not found, skipping",1);
+	}
+}
+
+__END__
+
+
+$factory = Bio::Tools::Run::StandAloneBlast->new(
+												  -verbose     => $verbose,
+												  -program     => 'blastn',
+												  -database    => $nt_database , 
+												  -_READMETHOD => 'SearchIO', 
+												  -output      => 'blastreport.out',
+												  -verbose     => 0	 );
+ok(defined $factory);

Added: trunk/packages/bioperl/branches/upstream/current/t/StructIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/StructIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/StructIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,96 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: StructIO.t,v 1.8.4.2 2006/10/02 23:10:40 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) { 
+		use lib 't';
+	}
+	use Test;
+	plan tests => 14;
+}
+
+use Bio::Structure::Entry;
+use Bio::Structure::IO;
+use Bio::Root::IO;
+ok(1);
+#
+# test reading PDB format - single model, single chain
+#
+my $pdb_file = Bio::Root::IO->catfile("t","data","1BPT.pdb");
+my $structin = Bio::Structure::IO->new(-file => $pdb_file, 
+													-format => 'pdb');
+ok(1);
+my $struc = $structin->next_structure;
+ok(1);
+ok(ref($struc), "Bio::Structure::Entry");
+
+# some basic checks, Structure objects are tested in Structure.t
+my ($chain) = $struc->chain;
+ok($struc->residue, 97);
+my ($atom) = $struc->get_atom_by_serial(367);
+ok($atom->id, "NZ");
+ok($struc->parent($atom)->id, "LYS-46");
+my ($ann) = $struc->annotation->get_Annotations("author");
+ok($ann->as_text,
+	"Value: D.HOUSSET,A.WLODAWER,F.TAO,J.FUCHS,C.WOODWARD              ");
+($ann) = $struc->annotation->get_Annotations("header");
+ok($ann->as_text,
+	"Value: PROTEINASE INHIBITOR (TRYPSIN)          11-DEC-91   1BPT");
+my $pseq = $struc->seqres;
+ok($pseq->subseq(1,20), "RPDFCLEPPYTGPCKARIIR");
+
+#
+# test reading PDB format - single model, multiple chains
+#
+$pdb_file = Bio::Root::IO->catfile("t","data","1A3I.pdb");
+$structin = Bio::Structure::IO->new(-file => $pdb_file, 
+												-format => 'pdb');
+$struc = $structin->next_structure;
+
+my ($chaincount,$rescount,$atomcount);
+for my $chain ($struc->get_chains) {
+	$chaincount++;
+   for my $res ($struc->get_residues($chain)) {
+		$rescount++;
+      for my $atom ($struc->get_atoms($res)) {
+			$atomcount++;
+		}
+   }
+}
+
+ok($chaincount, 4);  # 3 polypeptides and a group of hetero-atoms
+ok($rescount, 60);   # amino acid residues and solvent molecules
+ok($atomcount, 171); # ATOM and HETATM
+
+#
+# test reading PDB format - multiple models, single chain
+#
+$pdb_file = Bio::Root::IO->catfile("t","data","1A11.pdb");
+
+#
+# test reading PDB format - chains with ATOMs plus HETATMs
+#
+$pdb_file = Bio::Root::IO->catfile("t","data","8HVP.pdb");
+
+#
+# test writing PDB format
+#
+my $out_file = Bio::Root::IO->catfile("t","data","temp-pdb1bpt.ent");
+my $structout = Bio::Structure::IO->new(-file => ">$out_file", 
+                                        -format => 'PDB');
+$structout->write_structure($struc);
+ok(1);
+
+END {
+	unlink $out_file if -e $out_file;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Structure.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Structure.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Structure.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,149 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Structure.t,v 1.5 2001/12/14 17:12:20 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+	use lib 't';
+    }
+    use Test;
+    plan tests => 52;
+}
+use Bio::Structure::Entry;
+use Bio::Structure::Model;
+use Bio::Structure::Chain;
+use Bio::Structure::Residue;
+use Bio::Structure::Atom;
+
+ok(1);
+
+
+my $entry = Bio::Structure::Entry->new;
+ok(1);
+ok defined  $entry;
+ok ref($entry), 'Bio::Structure::Entry';
+
+my $model = Bio::Structure::Model->new;
+ok(1);
+ok defined $model;
+ok ref($model), 'Bio::Structure::Model';
+
+my $chain = Bio::Structure::Chain->new;
+ok(1);
+ok defined $chain;
+ok ref($chain), 'Bio::Structure::Chain';
+
+my $residue = Bio::Structure::Residue->new;
+ok(1);
+ok defined $residue;
+ok ref($residue), 'Bio::Structure::Residue';
+
+my $atom = Bio::Structure::Atom->new;
+ok(1);
+ok defined $atom;
+ok ref($atom), 'Bio::Structure::Atom';
+
+# adding/removing to Entry
+my $m1 = Bio::Structure::Model->new;
+$entry->model($m1);
+ok $entry->model,1;
+my $m2 = Bio::Structure::Model->new;
+$entry->add_model($m2);
+ok $entry->get_models, 2;
+
+$entry->model($m2);
+ok $entry->model, 1;
+my @m = ($m1, $m2);
+$entry->model(\@m);
+ok $entry->model, 2;
+
+# does $m2 gest orphaned
+$entry->model($m1);
+ok $entry->parent($m2), undef;
+
+
+# adding/removing to Model 
+my $c1 = Bio::Structure::Chain->new;
+$entry->add_chain($model,$c1);
+ok $entry->get_chains($model),1;
+my $c2 = Bio::Structure::Chain->new;
+$entry->add_chain($model,$c2);
+ok $entry->get_chains($model), 2;
+ok ref($entry->parent($c1)), 'Bio::Structure::Model';
+ok $entry->parent($c1), $entry->parent($c2);
+
+
+# adding/removing to Chain 
+my $r1 = Bio::Structure::Residue->new;
+$entry->add_residue($chain,$r1);
+ok $entry->get_residues($chain),1;
+my $r2 = Bio::Structure::Residue->new;
+$entry->add_residue($chain,$r2);
+ok $entry->get_residues($chain), 2;
+
+ok ref($entry->parent($r2)), 'Bio::Structure::Chain';
+ok $entry->parent($r1), $entry->parent($r2);
+
+# adding/removing to Residue 
+$entry->add_atom($residue,$atom);
+ok $entry->get_atoms($residue),1;
+my $a2 = Bio::Structure::Atom->new;
+my $a3 = Bio::Structure::Atom->new;
+my $a4 = Bio::Structure::Atom->new;
+my $a5 = Bio::Structure::Atom->new;
+my $a6 = Bio::Structure::Atom->new;
+$entry->add_atom($residue,$a2);
+ok $entry->get_atoms($residue), 2;
+
+my @a = ($a3, $a4, $a5);
+$entry->add_atom($r2,\@a);
+ok $entry->get_atoms($r2), 3;
+
+ok ref($entry->parent($a2)), 'Bio::Structure::Residue';
+ok $entry->parent($a3), $entry->parent($a5);
+
+
+
+$atom->x(10.234);
+ok $atom->x, 10.234;
+my $y = 12.345;
+$atom->y($y);
+ok $atom->y, $y;
+my $z = $atom->x - $y;
+$atom->z($z);
+ok $atom->z, -2.111;
+my @xyz = $atom->xyz;
+ok (scalar (@xyz), 3);
+
+ok $xyz[0], 10.234;
+ok $xyz[1], 12.345;
+ok $xyz[2], -2.111;
+
+my $e2 = Bio::Structure::Entry->new(-id => "Entry 2");
+ok (1);
+ok $e2->id, "Entry 2";
+my $m3 = Bio::Structure::Model->new(-id => "Model 2");
+ok (1);
+ok $m3->id, "Model 2";
+my $c3 = Bio::Structure::Chain->new(-id => "Chain 2");
+ok (1);
+ok $c3->id, "Chain 2";
+my $r3 = Bio::Structure::Residue->new(-id => "Residue 2");
+ok (1);
+ok $r3->id, "Residue 2";
+$a2 = Bio::Structure::Atom->new(-id => "Atom 2");
+ok (1);
+ok $a2->id, "Atom 2";
+
+$entry->add_atom($r3,$a6);
+$entry->add_residue($c3,$r3);
+ok $entry->parent( $entry->parent($a6) )->id , "Chain 2";

Added: trunk/packages/bioperl/branches/upstream/current/t/Symbol.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Symbol.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Symbol.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Symbol.t,v 1.1 2001/10/01 15:36:15 jason Exp $
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 8;
+}
+
+use Bio::Symbol::Symbol;
+
+my $thymine = new Bio::Symbol::Symbol(-name => 'Arg',
+				      -token=> 'R');
+my $a = new Bio::Symbol::Symbol(-token => 'A' );
+my $u = new Bio::Symbol::Symbol(-token => 'U' );
+my $g = new Bio::Symbol::Symbol(-token => 'G' );
+
+ok($thymine);
+ok($thymine->name, "Arg");
+ok($thymine->token, "R");
+my $M = new Bio::Symbol::Symbol(-name  => 'Met',
+				-token => 'M',
+				-symbols => [ $a, $u, $g ]);
+
+ok($M->name, "Met");
+ok($M->token, 'M');
+my @symbols = $M->symbols;
+my @expected = ($a, $u, $g);
+foreach ( 0..2 ) {
+    ok($expected[$_], $symbols[$_]);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/TagHaplotype.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/TagHaplotype.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/TagHaplotype.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+        use lib 't';
+    }
+    use Test;
+
+    plan tests => 2;
+}
+
+use Bio::PopGen::TagHaplotype;
+
+my $hap = [
+             [qw/0       0       0/],
+             [qw/1       1       1/],
+             [qw/0       0       1/],
+             [qw/1       2       1/]
+          ];
+
+my $obj = Bio::PopGen::TagHaplotype -> new(-haplotype_block => $hap);
+
+
+# check haplotype length 
+ok( $obj->tag_length ,1); # Tag length for this set must be 1
+ 
+# check the tag list
+ok( (join ' ', @{($obj->tag_list)->[0]}) ,'1 2'); # the SNPs are 1 and 2 (1 2)

Added: trunk/packages/bioperl/branches/upstream/current/t/TaxonTree.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/TaxonTree.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/TaxonTree.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+# These modules are now deprecated, don't bother testing them.
+
+## I am pretty sure this module is going the way of the dodo bird so 
+## I am not sure how much work to put into fixing the tests/module
+## --jasonstajich
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 1;
+}
+
+ok(1);
+
+if (0) {
+	use Bio::Taxonomy::Taxon;
+	ok(1);
+	
+	
+	ok my $taxonL = Bio::Taxonomy::Taxon->new;
+	ok $taxonL->description('this could be anything');
+	ok $taxonL->taxon('could this be called name?');
+	ok $taxonL->id('could this be called taxid?');
+	skip 1, $taxonL->branch_length('should accept only numerical values?');
+	ok  $taxonL->branch_length(5);
+	
+	ok $taxonL->id('could this be called taxid?');
+	ok $taxonL->rank('species');
+	ok $taxonL->rank, 'species';
+	# ok $taxonL->has_rank, 'species'; #why two methods that do mostly the same thing, but work differently?
+	
+	skip 1, $taxonL->rank('foo is not a rank, class variable @RANK not initialised'); 
+	ok $taxonL->to_string, '"could this be called taxid?":5';
+	
+	my $taxonR = new Bio::Taxonomy::Taxon;
+	
+	my $taxon = new Bio::Taxonomy::Taxon(-id =>'ancient', -taxon => 'genus');
+	ok $taxon->id(), 'ancient'; 
+	ok $taxon->taxon(), 'genus'; 
+	ok $taxon->internal_id, 2;
+	ok $taxonL->internal_id, 0; # would not it be better to start numebering from 1?
+	ok $taxon->add_Descendent($taxonL);
+	$taxon->add_Descendent($taxonR);
+	
+	ok  scalar $taxon->each_Descendent, 2;  # dies
+	ok $taxon->remove_Descendent($taxonR); # better to return number of Descendants removed
+	
+	ok $taxon->remove_all_Descendents();
+	
+	
+	$taxon->add_Descendent($taxonL);
+	ok $taxonL->ancestor->id, 'ancient';
+	ok $taxonL->branch_length(5);
+	
+	
+	ok $taxonL->is_Leaf, 1;
+	ok $taxon->is_Leaf, 0;
+	ok $taxon->height, 6;
+	ok $taxonL->height, 5;
+	ok $taxon->invalidate_height, undef;
+	ok $taxonL->classify(1), 2;
+	skip(1,"skip classify weirdness");
+	# ok $taxonL->classify(0), 2, 'ancestor has rank, but implementation prevents showing anything more than one value';
+	skip(1,"skip classify weirdness");
+	#ok $taxonL->has_rank, 1, 'documentation claims this returns a boolean; and that it queries ancestors rank?, needs an agrument but does not test it';
+	skip(1,"skip classify weirdness");
+	#ok $taxonL->has_rank('species'), 1;
+	
+	#ok $taxon->has_taxon(); # why docs and code talk about ancestor?
+	#ok $taxonL->has_taxon('genus');  returns undef or oan object, not boolean
+	
+	ok $taxon->distance_to_root, 0;
+	ok $taxonL->distance_to_root, 1;
+	#ok $taxonL->recent_common_ancestor($taxon)->id, 'ancient';
+	
+	
+	
+	#use Data::Dumper;
+	#print Dumper  $taxonL->classify();
+	skip(1, 'Skip this weird function');
+	# ok $taxonL->has_rank('species'), 1;
+	#ok my $species = $taxonL->species;
+	
+	
+	
+	
+	
+	##################################################################################################
+	
+	# tests for Bio::Taxonomy::Tree;
+	# code from synopsis
+	
+	use Bio::Species;
+	use Bio::Taxonomy::Tree;
+	use Bio::Taxonomy;
+	
+	my $human=new Bio::Species;
+	my $chimp=new Bio::Species;
+	my $bonobo=new Bio::Species;
+	
+	$human->classification(qw( sapiens Homo Hominidae
+							   Catarrhini Primates Eutheria
+							   Mammalia Euteleostomi Vertebrata 
+							   Craniata Chordata
+							   Metazoa Eukaryota ));
+	$chimp->classification(qw( troglodytes Pan Hominidae
+							   Catarrhini Primates Eutheria
+							   Mammalia Euteleostomi Vertebrata 
+							   Craniata Chordata
+							   Metazoa Eukaryota ));
+	$bonobo->classification(qw( paniscus Pan Hominidae
+								Catarrhini Primates Eutheria
+								Mammalia Euteleostomi Vertebrata 
+								Craniata Chordata
+								Metazoa Eukaryota ));
+	
+	# ranks passed to $taxonomy match ranks of species
+	my @ranks = ('superkingdom','kingdom','phylum','subphylum',
+				 'no rank 1','no rank 2','class','no rank 3','order',
+				 'suborder','family','genus','species');
+	
+	my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
+								   -method => 'trust',
+								   -order => -1);
+	
+	
+	ok my $tree1=new Bio::Taxonomy::Tree;
+	my $tree2=new Bio::Taxonomy::Tree;
+	
+	$tree1->make_species_branch($human,$taxonomy);
+	$tree2->make_species_branch($chimp,$taxonomy);
+	
+	my ($homo_sapiens) = $tree1->get_leaves;
+	ok ref $homo_sapiens, 'Bio::Taxonomy::Taxon';
+	
+	ok $tree1->splice($tree2);
+	
+	ok $tree1->add_species($bonobo,$taxonomy);
+	
+	
+	ok join (", ", map {$_->taxon} $tree1->get_leaves), 'Homo sapiens, Pan troglodytes, Pan paniscus';
+	ok $tree1->remove_branch($homo_sapiens);
+	ok join (", ", map {$_->taxon} $tree1->get_leaves), 'Pan troglodytes, Pan paniscus';
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Taxonomy.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Taxonomy.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Taxonomy.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,236 @@
+# This is -*-Perl-*- code
+# $Id: Taxonomy.t,v 1.11.4.4 2006/11/08 17:25:55 sendu Exp $
+
+use strict;
+use vars qw($NUMTESTS $DEBUG);
+use File::Spec;
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	$NUMTESTS = 98;
+	$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+	
+	eval {require Test::More;};
+	if ($@) {
+		use lib 't/lib';
+	}
+	use Test::More;
+    
+	eval {
+		require XML::Twig;
+	};
+	if ($@) {
+		plan skip_all => 'Requires XML::Twig; skipping...';
+	}
+        else {
+		plan tests => $NUMTESTS;
+	}
+}
+
+END {
+    for my $filename (qw(nodes parents id2names names2id)) {
+	unlink File::Spec->catfile('t','data','taxdump', $filename);
+    }
+}
+
+use_ok('Bio::DB::Taxonomy');
+use_ok('Bio::Tree::Tree');
+
+# we're actually testing Bio::Taxon and Bio::DB::Taxonomy::* here, not
+# Bio::Taxonomy
+
+ok my $db_entrez = Bio::DB::Taxonomy->new(-source => 'entrez');
+
+ok my $db_flatfile = Bio::DB::Taxonomy->new(-source => 'flatfile',
+                               -directory => File::Spec->catdir ('t','data','taxdump'),
+                               -nodesfile => File::Spec->catfile('t','data','taxdump','nodes.dmp'),
+                               -namesfile => File::Spec->catfile('t','data','taxdump','names.dmp'),
+                               -force => 1);
+
+my $n;
+foreach my $db ($db_entrez, $db_flatfile) {
+    SKIP: {
+        skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 38 if ($db eq $db_entrez && ! $DEBUG); 
+        my $id;
+        eval { $id = $db->get_taxonid('Homo sapiens');};
+        skip "Unable to connect to entrez database; no network or server busy?", 38 if $@;
+        
+        is $id, 9606;
+        
+        # easy test on human, try out the main Taxon methods
+        ok $n = $db->get_taxon(9606);
+        is $n->id, 9606;
+        is $n->object_id, $n->id;
+        is $n->ncbi_taxid, $n->id;
+        is $n->parent_id, 9605;
+        is $n->rank, 'species';
+        
+        is $n->node_name, 'Homo sapiens';
+        is $n->scientific_name, $n->node_name;
+        is ${$n->name('scientific')}[0], $n->node_name;
+        
+        my %common_names = map { $_ => 1 } $n->common_names;
+        is keys %common_names, 2;
+        ok exists $common_names{human};
+        ok exists $common_names{man};
+        
+        is $n->division, 'Primates';
+        is $n->genetic_code, 1;
+        is $n->mitochondrial_genetic_code, 2;
+        # these are entrez-only, data not available in dmp files
+        if ($db eq $db_entrez) {
+            ok defined $n->pub_date;
+            ok defined $n->create_date;
+            ok defined $n->update_date;
+        }
+        
+        # briefly test some Bio::Tree::NodeI methods
+        ok my $ancestor = $n->ancestor;
+        is $ancestor->scientific_name, 'Homo';
+        # unless set explicitly, Bio::Taxon doesn't return anything for
+        # each_Descendent; must ask the database directly
+        ok my @children = $ancestor->db_handle->each_Descendent($ancestor); 
+        ok @children > 0;
+        
+        sleep(3) if $db eq $db_entrez;
+        
+        # do some trickier things...
+        ok my $n2 = $db->get_Taxonomy_Node('89593');
+        is $n2->scientific_name, 'Craniata';
+        
+        # briefly check we can use some Tree methods
+        my $tree = Bio::Tree::Tree->new();
+        is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
+        
+        # can we actually form a Tree and use other Tree methods?
+        ok $tree = Bio::Tree::Tree->new(-node => $n);
+        is $tree->number_nodes, 30;
+        is $tree->get_nodes, 30;
+        is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
+        
+        # check that getting the ancestor still works now we have explitly set the
+        # ancestor by making a Tree
+        is $n->ancestor->scientific_name, 'Homo';
+        
+        sleep(3) if $db eq $db_entrez;
+        
+        ok $n = $db->get_Taxonomy_Node('1760');
+        is $n->scientific_name, 'Actinobacteria';
+        
+        sleep(3) if $db eq $db_entrez;
+        
+        # entrez isn't as good at searching as flatfile, so we have to special-case
+        my @ids = $db->get_taxonids('Chloroflexi');
+        $db eq $db_entrez ? (is @ids, 1) : (is @ids, 2);
+        $id = $db->get_taxonids('Chloroflexi (class)');
+        is $id, 32061;
+        
+        @ids = $db->get_taxonids('Rhodotorula');
+        is @ids, 8;
+        @ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
+        is @ids, 1;
+        is $ids[0], 231509;
+    }
+}
+
+# Test the list database
+my @ranks = qw(superkingdom class genus species);
+my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
+my $db_list = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
+                                                       -ranks => \@ranks);
+ok $db_list;
+
+ok my $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
+ok my $h_flat = $db_flatfile->get_taxon(-name => 'Homo sapiens');
+
+is $h_list->ancestor->scientific_name, 'Homo';
+
+my @names = $h_list->common_names;
+is @names, 0;
+$h_list->common_names('woman');
+ at names = $h_list->common_names;
+is @names, 1;
+ at names = $h_flat->common_names;
+is @names, 2;
+
+# you can switch to another database when you need more information, which also
+# merges information in the node from the two different dbs
+$h_list->db_handle($db_flatfile);
+ at names = $h_list->common_names;
+is @names, 3;
+
+# form a tree with the list lineage first, preventing a subsequent database
+# change from giving us all those extra ranks
+$h_list->db_handle($db_list);
+my $ancestors_ancestor = $h_list->ancestor->ancestor;
+is $ancestors_ancestor->scientific_name, 'Mammalia';
+
+my $tree = Bio::Tree::Tree->new(-node => $h_list);
+$h_list->db_handle($db_flatfile);
+$ancestors_ancestor = $h_list->ancestor->ancestor;
+is $ancestors_ancestor->scientific_name, 'Mammalia';
+
+# or we can get the flatfile database's idea of the ancestors by removing
+# ourselves from the tree
+is $h_flat->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
+$h_list->ancestor(undef);
+is $h_list->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
+
+# get_lca should work on nodes from different databases
+SKIP: {
+    skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 5 unless $DEBUG;
+    skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 5 unless $DEBUG;
+    $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
+    my $h_entrez;
+    eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
+    skip "Unable to connect to entrez database; no network or server busy?", 5 if $@;
+    
+    ok my $tree_functions = Bio::Tree::Tree->new();
+    is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo';
+    
+    # even though the species taxa for Homo sapiens from list and flat databases
+    # have the same internal id, get_lca won't work because they have different
+    # roots and descendents
+    $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
+    is $h_list->ancestor->internal_id, $h_flat->internal_id;
+    ok ! $tree_functions->get_lca($h_flat, $h_list);
+
+    # but we can form a tree with the flat node then remove all the ranks we're
+    # not interested in and try again
+    $tree = Bio::Tree::Tree->new(-node => $h_flat);
+    $tree->splice(-keep_rank => \@ranks);
+    is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
+}
+
+# ideas from taxonomy2tree.PLS that let us make nice tree, using
+# Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
+# because our test flatfile database only has the full lineage of one species
+undef $tree;
+for my $name ('Human', 'Hominidae') {
+  my $ncbi_id = $db_flatfile->get_taxonid($name);
+  if ($ncbi_id) {
+    my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
+    
+    if ($tree) {
+		$tree->merge_lineage($node);
+    }
+    else {
+		ok $tree = Bio::Tree::Tree->new(-node => $node);
+    }
+  }
+}
+is $tree->get_nodes, 30;
+$tree->contract_linear_paths;
+my $ids = join(",", map { $_->id } $tree->get_nodes);
+is $ids, '131567,9606';
+
+# we can recursively fetch all descendents of a taxon
+SKIP: {
+    skip "Skipping tests which require network access, set BIOPERLDEBUG=1 to test", 1 unless $DEBUG;
+    eval {$db_entrez->get_taxon(10090);};
+    skip "Unable to connect to entrez database; no network or server busy?", 1 if $@;
+    
+    my $lca = $db_entrez->get_taxon(314146);
+    my @descs = $db_entrez->get_all_Descendents($lca);
+    is @descs, 17;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Tempfile.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Tempfile.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Tempfile.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Tempfile.t,v 1.3.12.1 2006/11/08 17:25:55 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test::More module
+    # we include the t dir (where a copy of Test/More.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+		use lib 't/lib';
+    }
+    use Test::More tests => 18;
+}
+
+use_ok('Bio::Root::IO');
+
+ok my $obj = Bio::Root::IO->new(-verbose => 0);
+
+isa_ok($obj, 'Bio::Root::IO');
+
+my $TEST_STRING = "Bioperl rocks!\n";
+
+my ($tfh,$tfile);
+
+eval {
+    ($tfh,$tfile) = $obj->tempfile();
+    print $tfh $TEST_STRING; 
+    close($tfh);
+    open(my $IN, $tfile) or die("cannot open $tfile");    
+    my $val = join("", <$IN>) ;
+    ok( $val eq $TEST_STRING );
+    close $IN;
+    ok( -e $tfile );
+    undef $obj; 
+};
+undef $obj;
+if( $@ ) {
+    ok(0);
+} else { 
+   ok( ! -e $tfile, 'auto UNLINK => 1' );
+}
+
+$obj = Bio::Root::IO->new();
+
+eval {
+    my $tdir = $obj->tempdir(CLEANUP=>1);
+    ok( -d $tdir );
+    ($tfh, $tfile) = $obj->tempfile(dir => $tdir);
+    close $tfh;
+    ok( -e $tfile );
+    undef $obj; # see Bio::Root::IO::_io_cleanup
+};
+
+if( $@ ) { ok(0); } 
+else { ok( ! -e $tfile, 'tempfile deleted' ); }
+
+eval {
+    $obj = Bio::Root::IO->new(-verbose => 0);
+    ($tfh, $tfile) = $obj->tempfile(UNLINK => 0);
+    close $tfh;
+    ok( -e $tfile );   
+    undef $obj; # see Bio::Root::IO::_io_cleanup
+};
+
+if( $@ ) { ok(0) }
+else { ok( -e $tfile, 'UNLINK => 0') }
+
+ok unlink( $tfile) == 1 ;
+
+
+ok $obj = Bio::Root::IO->new;
+
+# check suffix is applied
+my($fh1, $fn1) = $obj->tempfile(SUFFIX => '.bioperl');
+ok $fh1;
+like $fn1, qr/\.bioperl$/, 'tempfile suffix';
+ok close $fh1;
+
+# check single return value mode of File::Temp
+my $fh2 = $obj->tempfile;
+ok $fh2, 'tempfile() in scalar context';
+ok close $fh2;
+
+
+1;
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Term.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Term.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Term.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Term.t,v 1.11 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($HAVEGRAPHDIRECTED $DEBUG $NUMTESTS);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval {require Graph::Directed;
+			$HAVEGRAPHDIRECTED=1;
+		};
+
+	if ($@) {
+		$HAVEGRAPHDIRECTED = 0;
+		warn "Cannot run tests as Graph::Directed is not installed\n";
+	}
+	plan tests => ($NUMTESTS = 51);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete Term tests',1);
+	}
+}
+
+exit 0 unless $HAVEGRAPHDIRECTED;
+
+require Bio::Ontology::Term;
+use Bio::Ontology::TermFactory;
+use Bio::Annotation::DBLink;
+use Bio::Annotation::Reference;
+
+my $obj = Bio::Ontology::Term->new();
+
+ok( $obj->isa( "Bio::Ontology::TermI" ) );
+
+ok( $obj->identifier( "0003947" ), "0003947" );
+ok( $obj->identifier(), "0003947" );
+
+ok( $obj->name( "N-acetylgalactosaminyltransferase" ), "N-acetylgalactosaminyltransferase" );
+ok( $obj->name(), "N-acetylgalactosaminyltransferase" );
+
+ok( $obj->definition( "Catalysis of ..." ), "Catalysis of ..." );
+ok( $obj->definition(), "Catalysis of ..." );
+
+ok( $obj->version( "666" ), "666" );
+ok( $obj->version(), "666" );
+
+ok( $obj->ontology( "category 1 name" ) );
+ok( $obj->ontology()->name(), "category 1 name" );
+
+my $ont = Bio::Ontology::Ontology->new();
+ok( $ont->name( "category 2 name" ) );
+
+ok( $obj->ontology( $ont ) );
+ok( $obj->ontology()->name(), "category 2 name" );
+
+ok( $obj->is_obsolete( 1 ), 1 );
+ok( $obj->is_obsolete(), 1 );
+
+ok( $obj->comment( "Consider the term ..." ), "Consider the term ..." );
+ok( $obj->comment(), "Consider the term ..." );
+
+ok( $obj->get_synonyms(), 0 );
+
+$obj->add_synonym( ( "AA", "AB" ) );
+my @al1 = $obj->get_synonyms();
+ok( scalar(@al1), 2 );
+ok( $al1[ 0 ], "AA" );
+ok( $al1[ 1 ], "AB" );
+
+my @al2 = $obj->remove_synonyms();
+ok( $al2[ 0 ], "AA" );
+ok( $al2[ 1 ], "AB" );
+
+ok( $obj->get_synonyms(), 0 );
+ok( $obj->remove_synonyms(), 0 );
+
+$obj->add_synonyms( ( "AA", "AB" ) );
+
+ok( $obj->identifier(undef), undef );
+ok( $obj->name(undef), undef );
+ok( $obj->definition(undef), undef );
+ok( $obj->is_obsolete(0), 0 );
+ok( $obj->comment(undef), undef );
+
+
+$obj = Bio::Ontology::Term->new( 
+    -identifier  => "0016847",
+    -name        => "1-aminocyclopropane-1-carboxylate synthase",
+    -definition  => "Catalysis of ...",
+    -is_obsolete => 0,
+    -version     => "6.6.6",
+    -ontology    => "cat",
+    -comment     => "X",
+    -dblinks    => [
+        Bio::Annotation::DBLink->new(-database => 'db1'),
+        Bio::Annotation::DBLink->new(-database => 'db2')
+    ],
+    -references => []
+);  
+
+ok( $obj->identifier(), "0016847" );
+ok( $obj->name(), "1-aminocyclopropane-1-carboxylate synthase" );
+ok( $obj->definition(), "Catalysis of ..." );
+ok( $obj->is_obsolete(), 0);
+ok( $obj->comment(), "X" );
+ok( $obj->version(), "6.6.6" );
+ok( $obj->ontology()->name(), "cat" );
+ok( scalar($obj->get_dblinks), 2);
+ok( scalar($obj->get_references), 0);
+
+# test object factory for terms
+my $fact = Bio::Ontology::TermFactory->new();
+$obj = $fact->create_object(-name => "some ontology term");
+ok $obj->isa("Bio::Ontology::TermI");
+ok ($obj->name, "some ontology term");
+
+$fact->type("Bio::Ontology::GOterm");
+$obj = $fact->create_object(-name => "some ontology term",
+			    -identifier => "GO:987654");
+ok $obj->isa("Bio::Ontology::TermI");
+ok (ref($obj), "Bio::Ontology::GOterm");
+ok ($obj->name, "some ontology term");
+ok ($obj->identifier, "GO:987654");
+
+$fact->type("Bio::Annotation::OntologyTerm");
+$obj = $fact->create_object(-name => "some ontology term",
+			    -identifier => "GO:987654",
+			    -ontology => "nonsense");
+ok $obj->isa("Bio::Ontology::TermI");
+ok $obj->isa("Bio::AnnotationI");
+ok ($obj->name, "some ontology term");
+ok ($obj->identifier, "GO:987654");
+ok ($obj->tagname, "nonsense");

Added: trunk/packages/bioperl/branches/upstream/current/t/Test.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Test.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Test.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,253 @@
+use strict;
+package Test;
+use Carp;
+use vars (qw($VERSION @EXPORT @EXPORT_OK $ntest $TestLevel), #public-ish
+	  qw($TESTOUT $ONFAIL %todo %history $planned @FAILDETAIL)); #private-ish
+$VERSION = '1.15';
+require Exporter;
+use base qw(Exporter);
+ at EXPORT=qw(&plan &ok &skip);
+ at EXPORT_OK=qw($ntest $TESTOUT);
+
+$TestLevel = 0;		# how many extra stack frames to skip
+$|=1;
+#$^W=1;  ?
+$ntest=1;
+$TESTOUT = *STDOUT{IO};
+
+# Use of this variable is strongly discouraged.  It is set mainly to
+# help test coverage analyzers know which test is running.
+$ENV{REGRESSION_TEST} = $0;
+
+sub plan {
+    croak "Test::plan(%args): odd number of arguments" if @_ & 1;
+    croak "Test::plan(): should not be called more than once" if $planned;
+    my $max=0;
+    for (my $x=0; $x < @_; $x+=2) {
+	my ($k,$v) = @_[$x,$x+1];
+	if ($k =~ /^test(s)?$/) { $max = $v; }
+	elsif ($k eq 'todo' or 
+	       $k eq 'failok') { for (@$v) { $todo{$_}=1; }; }
+	elsif ($k eq 'onfail') { 
+	    ref $v eq 'CODE' or croak "Test::plan(onfail => $v): must be CODE";
+	    $ONFAIL = $v; 
+	}
+	else { carp "Test::plan(): skipping unrecognized directive '$k'" }
+    }
+    my @todo = sort { $a <=> $b } keys %todo;
+    if (@todo) {
+	print $TESTOUT "1..$max todo ".join(' ', @todo).";\n";
+    } else {
+	print $TESTOUT "1..$max\n";
+    }
+    ++$planned;
+}
+
+sub to_value {
+    my ($v) = @_;
+    (ref $v or '') eq 'CODE' ? $v->() : $v;
+}
+
+sub ok ($;$$) {
+    croak "ok: plan before you test!" if !$planned;
+    my ($pkg,$file,$line) = caller($TestLevel);
+    my $repetition = ++$history{"$file:$line"};
+    my $context = ("$file at line $line".
+		   ($repetition > 1 ? " fail \#$repetition" : ''));
+    my $ok=0;
+    my $result = to_value(shift);
+    my ($expected,$diag);
+    if (@_ == 0) {
+	$ok = $result;
+    } else {
+	$expected = to_value(shift);
+	my ($regex,$ignore);
+	if (!defined $expected) {
+	    $ok = !defined $result;
+	} elsif (!defined $result) {
+	    $ok = 0;
+	} elsif ((ref($expected)||'') eq 'Regexp') {
+	    $ok = $result =~ /$expected/;
+	} elsif (($regex) = ($expected =~ m,^ / (.+) / $,sx) or
+	    ($ignore, $regex) = ($expected =~ m,^ m([^\w\s]) (.+) \1 $,sx)) {
+	    $ok = $result =~ /$regex/;
+	} else {
+	    $ok = $result eq $expected;
+	}
+    }
+    my $todo = $todo{$ntest};
+    if ($todo and $ok) {
+	$context .= ' TODO?!' if $todo;
+	print $TESTOUT "ok $ntest # ($context)\n";
+    } else {
+	print $TESTOUT "not " if !$ok;
+	print $TESTOUT "ok $ntest\n";
+	
+	if (!$ok) {
+	    my $detail = { 'repetition' => $repetition, 'package' => $pkg,
+			   'result' => $result, 'todo' => $todo };
+	    $$detail{expected} = $expected if defined $expected;
+	    $diag = $$detail{diagnostic} = to_value(shift) if @_;
+	    $context .= ' *TODO*' if $todo;
+	    if (!defined $expected) {
+		if (!$diag) {
+		    print $TESTOUT "# Failed test $ntest in $context\n";
+		} else {
+		    print $TESTOUT "# Failed test $ntest in $context: $diag\n";
+		}
+	    } else {
+		my $prefix = "Test $ntest";
+		print $TESTOUT "# $prefix got: ".
+		    (defined $result? "'$result'":'<UNDEF>')." ($context)\n";
+		$prefix = ' ' x (length($prefix) - 5);
+		if ((ref($expected)||'') eq 'Regexp') {
+		    $expected = 'qr/'.$expected.'/'
+		} else {
+		    $expected = "'$expected'";
+		}
+		if (!$diag) {
+		    print $TESTOUT "# $prefix Expected: $expected\n";
+		} else {
+		    print $TESTOUT "# $prefix Expected: $expected ($diag)\n";
+		}
+	    }
+	    push @FAILDETAIL, $detail;
+	}
+    }
+    ++ $ntest;
+    $ok;
+}
+
+sub skip ($$;$$) {
+    my $whyskip = to_value(shift);
+    if ($whyskip) {
+	$whyskip = 'skip' if $whyskip =~ m/^\d+$/;
+	print $TESTOUT "ok $ntest # $whyskip\n";
+	++ $ntest;
+	1;
+    } else {
+	local($TestLevel) = $TestLevel+1;  #ignore this stack frame
+	&ok;
+    }
+}
+
+END {
+    $ONFAIL->(\@FAILDETAIL) if @FAILDETAIL && $ONFAIL;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Test - provides a simple framework for writing test scripts
+
+=head1 SYNOPSIS
+
+  use strict;
+  use Test;
+
+  # use a BEGIN block so we print our plan before MyModule is loaded
+  BEGIN { plan tests => 14, todo => [3,4] }
+
+  # load your module...
+  use MyModule;
+
+  ok(0); # failure
+  ok(1); # success
+
+  ok(0); # ok, expected failure (see todo list, above)
+  ok(1); # surprise success!
+
+  ok(0,1);             # failure: '0' ne '1'
+  ok('broke','fixed'); # failure: 'broke' ne 'fixed'
+  ok('fixed','fixed'); # success: 'fixed' eq 'fixed'
+  ok('fixed',qr/x/);   # success: 'fixed' =~ qr/x/
+
+  ok(sub { 1+1 }, 2);  # success: '2' eq '2'
+  ok(sub { 1+1 }, 3);  # failure: '2' ne '3'
+  ok(0, int(rand(2));  # (just kidding :-)
+
+  my @list = (0,0);
+  ok @list, 3, "\@list=".join(',', at list);      #extra diagnostics
+  ok 'segmentation fault', '/(?i)success/';    #regex match
+
+  skip($feature_is_missing, ...);    #do platform specific test
+
+=head1 DESCRIPTION
+
+L<Test::Harness|Test::Harness> expects to see particular output when it
+executes tests.  This module aims to make writing proper test scripts just
+a little bit easier (and less error prone :-).
+
+=head1 TEST TYPES
+
+=over 4
+
+=item * NORMAL TESTS
+
+These tests are expected to succeed.  If they don't something's
+screwed up!
+
+=item * SKIPPED TESTS
+
+Skip is for tests that might or might not be possible to run depending
+on the availability of platform specific features.  The first argument
+should evaluate to true (think "yes, please skip") if the required
+feature is not available.  After the first argument, skip works
+exactly the same way as do normal tests.
+
+=item * TODO TESTS
+
+TODO tests are designed for maintaining an B<executable TODO list>.
+These tests are expected NOT to succeed.  If a TODO test does succeed,
+the feature in question should not be on the TODO list, now should it?
+
+Packages should NOT be released with succeeding TODO tests.  As soon
+as a TODO test starts working, it should be promoted to a normal test
+and the newly working feature should be documented in the release
+notes or change log.
+
+=back
+
+=head1 RETURN VALUE
+
+Both C<ok> and C<skip> return true if their test succeeds and false
+otherwise in a scalar context.
+
+=head1 ONFAIL
+
+  BEGIN { plan test => 4, onfail => sub { warn "CALL 911!" } }
+
+While test failures should be enough, extra diagnostics can be
+triggered at the end of a test run.  C<onfail> is passed an array ref
+of hash refs that describe each test failure.  Each hash will contain
+at least the following fields: C<package>, C<repetition>, and
+C<result>.  (The file, line, and test number are not included because
+their correspondence to a particular test is tenuous.)  If the test
+had an expected value or a diagnostic string, these will also be
+included.
+
+The B<optional> C<onfail> hook might be used simply to print out the
+version of your package and/or how to report problems.  It might also
+be used to generate extremely sophisticated diagnostics for a
+particularly bizarre test failure.  However it's not a panacea.  Core
+dumps or other unrecoverable errors prevent the C<onfail> hook from
+running.  (It is run inside an C<END> block.)  Besides, C<onfail> is
+probably over-kill in most cases.  (Your test code should be simpler
+than the code it is testing, yes?)
+
+=head1 SEE ALSO
+
+L<Test::Harness> and, perhaps, test coverage analysis tools.
+
+=head1 AUTHOR
+
+Copyright (c) 1998-1999 Joshua Nathaniel Pritikin.  All rights reserved.
+
+This package is free software and is provided "as is" without express
+or implied warranty.  It may be used, redistributed and/or modified
+under the terms of the Perl Artistic License (see
+http://www.perl.com/perl/misc/Artistic.html)
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/t/Tmhmm.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Tmhmm.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Tmhmm.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,32 @@
+use strict;
+use Test;
+
+BEGIN {	plan tests => 12 }
+
+use Bio::Root::IO;
+use Bio::Tools::Tmhmm;
+
+ok my $infile = Bio::Root::IO->catfile(qw(t data tmhmm.out));
+ok my $parser = Bio::Tools::Tmhmm->new(-file=>$infile);
+
+my @feat;
+while ( my $feat = $parser->next_result ) {
+  push @feat, $feat;
+}
+
+ok @feat==3;
+
+ok $feat[0]->seq_id,      'my_sequence_id';
+ok $feat[0]->source_tag,  'TMHMM2.0';
+ok $feat[0]->primary_tag, 'transmembrane';
+
+ok $feat[0]->start,  54;
+ok $feat[0]->end,    76;
+
+ok $feat[1]->start,  116;
+ok $feat[1]->end,    138;
+
+ok $feat[2]->start,  151;
+ok $feat[2]->end,    173;
+
+  

Added: trunk/packages/bioperl/branches/upstream/current/t/Tools.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Tools.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Tools.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# $Id: Tools.t,v 1.7 2001/04/30 15:12:33 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 8; 
+}
+
+use Bio::SeqIO;
+use Bio::Tools::SeqWords;
+use Bio::Tools::SeqStats;
+use Bio::Root::IO;
+
+ok(1);
+my $str = Bio::SeqIO->new(-file=> Bio::Root::IO->catfile("t","data","multifa.seq"), '-format' => 'Fasta');
+my $seqobj= $str->next_seq();
+ok $seqobj;
+
+my $words = Bio::Tools::SeqWords->new('-seq' => $seqobj);
+my $hash = $words->count_words(6);
+ok ($words);
+ok ($hash);
+
+my $seq_stats  =  Bio::Tools::SeqStats->new('-seq' => $seqobj);
+
+ok $seq_stats;
+
+my $hash_ref = $seq_stats->count_monomers();  # eg for DNA sequence
+
+ok ( $hash_ref->{'A'}, 80 );
+
+$hash_ref = $seq_stats-> count_codons();  
+
+ok $hash_ref;
+
+my $weight = $seq_stats->get_mol_wt();
+ok $weight;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Tree.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Tree.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Tree.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,218 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    use vars qw($TESTCOUNT);
+    $TESTCOUNT = 29;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::TreeIO;
+my $verbose = 0;
+
+my $treeio = new Bio::TreeIO(-verbose => $verbose,
+			     -format => 'nhx',
+			     -file   => Bio::Root::IO->catfile('t','data', 
+							       'test.nhx'));
+my $tree = $treeio->next_tree;
+
+my @nodes = $tree->find_node('ADH2');
+ok(@nodes, 2);
+
+if( $verbose ) {
+    $treeio = new Bio::TreeIO(-verbose => $verbose,
+			      -format => 'nhx',
+			      );
+    $treeio->write_tree($tree);
+    print "nodes are: \n",
+    join(", ", map {  $_->id . ":". (defined $_->branch_length ? 
+				     $_->branch_length : '' ) } @nodes), "\n";
+}
+
+$treeio = new Bio::TreeIO(-format => 'newick',
+			  -file   => Bio::Root::IO->catfile('t','data',
+							    'test.nh'));
+$tree = $treeio->next_tree;
+
+
+if( $verbose ) { 
+    my $out = new Bio::TreeIO(-format => 'tabtree');
+    
+    $out->write_tree($tree);
+}
+
+my @hADH = ( $tree->find_node('hADH1'),
+	     $tree->find_node('hADH2') );
+my ($n4) = $tree->find_node('yADH4');
+
+ok($tree->is_monophyletic(-nodes    => \@hADH,
+			  -outgroup => $n4));
+
+my @mixgroup = ( $tree->find_node('hADH1'),
+		 $tree->find_node('yADH2'),
+		 $tree->find_node('yADH3'),
+		 );
+
+my ($iADHX) = $tree->find_node('iADHX');
+
+# test height
+ok($iADHX->height, 0);
+ok($iADHX->depth,0.22);
+ok(! $tree->is_monophyletic(-nodes   => \@mixgroup,
+			    -outgroup=> $iADHX));
+
+my $in = new Bio::TreeIO(-format => 'newick',
+			 -fh     => \*DATA);
+$tree = $in->next_tree;
+my ($a,$b,$c,$d) = ( $tree->find_node('A'),
+			   $tree->find_node('B'),
+			   $tree->find_node('C'),
+			   $tree->find_node('D'));
+
+ok($tree->is_monophyletic(-nodes => [$b,$c],
+			  -outgroup => $d));
+
+ok($tree->is_monophyletic(-nodes => [$b,$a],
+			  -outgroup => $d) );
+
+$tree = $in->next_tree;
+my ($e,$f,$i);
+($a,$b,$c,$d,$e,$f,$i) = ( $tree->find_node('A'),
+			   $tree->find_node('B'),
+			   $tree->find_node('C'),
+			   $tree->find_node('D'),
+			   $tree->find_node('E'),
+			   $tree->find_node('F'),
+			   $tree->find_node('I'),
+			   );
+ok(! $tree->is_monophyletic(-nodes => [$b,$f],
+			    -outgroup => $d) );
+
+ok($tree->is_monophyletic(-nodes => [$b,$a],
+			  -outgroup => $f));
+
+# test for paraphyly
+
+ok(  $tree->is_paraphyletic(-nodes => [$a,$b,$c],
+			   -outgroup => $d), 0);
+
+ok(  $tree->is_paraphyletic(-nodes => [$a,$f,$e],
+			   -outgroup => $i), 1);
+
+
+# test for rerooting the tree
+my $out = Bio::TreeIO->new(-format => 'newick', -fh => \*STDERR, -noclose => 1);
+$tree = $in->next_tree;
+$tree->verbose( -1 ) unless $DEBUG;
+my $node_cnt_orig = scalar($tree->get_nodes);
+# reroot on an internal node: should work fine
+$a = $tree->find_node('A');
+# removing node_count checks because re-rooting can change the
+# number of internal nodes (if it is done correctly)
+my $total_length_orig = $tree->total_branch_length;
+$out->write_tree($tree) if $DEBUG;
+ok($tree->reroot($a));
+$out->write_tree($tree) if $DEBUG;
+ok($node_cnt_orig, scalar($tree->get_nodes));
+my $total_length_new = $tree->total_branch_length;
+my $eps = 0.001 * $total_length_new;	# tolerance for checking length
+warn("orig total len ", $total_length_orig, "\n") if $DEBUG;
+warn("new  total len ", $tree->total_branch_length,"\n") if $DEBUG;
+# according to retree in phylip these branch lengths actually get larger
+# go figure...
+#ok(($total_length_orig >= $tree->total_branch_length - $eps)
+#   and ($total_length_orig <= $tree->total_branch_length + $eps));
+ok($tree->get_root_node, $a->ancestor);
+
+# try to reroot on an internal, will result in there being 1 less node
+$a = $tree->find_node('C')->ancestor;
+$out->write_tree($tree) if $DEBUG;
+ok($tree->reroot($a));
+$out->write_tree($tree) if $DEBUG;
+ok($node_cnt_orig, scalar($tree->get_nodes));
+warn("orig total len ", $total_length_orig, "\n") if $DEBUG;
+warn("new  total len ", $tree->total_branch_length,"\n") if $DEBUG;
+ok(($total_length_orig >= $tree->total_branch_length - $eps)
+   and ($total_length_orig <= $tree->total_branch_length + $eps));
+ok($tree->get_root_node, $a->ancestor);
+
+# try to reroot on existing root: should fail
+$a = $tree->get_root_node;
+ok(! $tree->reroot($a));
+
+# try a more realistic tree
+$tree = $in->next_tree;
+$a = $tree->find_node('VV');
+$node_cnt_orig = scalar($tree->get_nodes);
+$total_length_orig = $tree->total_branch_length;
+$out->write_tree($tree) if $DEBUG;
+ok($tree->reroot($a->ancestor) eq '1');
+$out->write_tree($tree) if $DEBUG;
+ok($node_cnt_orig+1, scalar($tree->get_nodes));
+$total_length_new = $tree->total_branch_length;
+$eps = 0.001 * $total_length_new;    # tolerance for checking length
+ok(($total_length_orig >= $tree->total_branch_length - $eps)
+   and ($total_length_orig <= $tree->total_branch_length + $eps));
+ok($tree->get_root_node, $a->ancestor->ancestor);
+
+# BFS and DFS search testing
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			     -format => 'newick',
+			     -file   => Bio::Root::IO->catfile('t','data', 
+							       'test.nh'));
+$tree = $treeio->next_tree;
+my $ct =0;
+my $let = ord('A');
+for my $n (  $tree->get_leaf_nodes ) {
+    $n->id(chr($let++));
+}
+
+for my $n ( grep {! $_->is_Leaf } $tree->get_nodes ) {
+    $n->id($ct++);
+}
+# enable for debugging
+Bio::TreeIO->new(-format => 'newick')->write_tree($tree) if( $DEBUG );
+
+my $BFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'b')));
+ok($BFSorder, '0,1,3,2,C,D,E,F,G,H,A,B');
+my $DFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'd')));
+ok($DFSorder, '0,1,2,A,B,C,D,3,E,F,G,H');
+
+
+# test some Bio::Tree::TreeFunctionI methods
+#find_node tested extensively already
+$tree->remove_Node('H');
+$DFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'd')));
+ok($DFSorder, '0,1,2,A,B,C,D,3,E,F,G');
+#get_lineage_nodes tested during get_lca
+$tree->splice(-remove_id => 'G');
+$DFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'd')));
+ok($DFSorder, '0,1,2,A,B,C,D,3,E,F');
+$tree->splice(-remove_id => [('E', 'F')], -keep_id => 'F');
+$DFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'd')));
+ok($DFSorder, '0,1,2,A,B,C,D,F');
+$tree->splice(-keep_id => [qw(0 1 2 A B C D)]);
+$DFSorder = join(",", map { $_->id } ( $tree->get_nodes(-order => 'd')));
+ok($DFSorder, '0,1,2,A,B,C,D');
+#get_lca, merge_lineage, contract_linear_paths tested in in Taxonomy.t
+
+__DATA__
+(D,(C,(A,B)));
+(I,((D,(C,(A,B))),(E,(F,G))));
+(((A:0.3,B:2.1):0.45,C:0.7),D:4);
+(A:0.031162,((((((B:0.022910,C:0.002796):0.010713,(D:0.015277,E:0.020484):0.005336):0.005588,((F:0.013293,(G:0.018374,H:0.003108):0.005318):0.006047,I:0.014607):0.001677):0.004196,(((((J:0.003307,K:0.001523):0.011884,L:0.006960):0.006514,((M:0.001683,N:0.000100):0.002226,O:0.007085):0.014649):0.008004,P:0.037422):0.005201,(Q:0.000805,R:0.000100):0.015280):0.005736):0.004612,S:0.042283):0.017979,(T:0.006883,U:0.016655):0.040226):0.014239,((((((V:0.000726,W:0.000100):0.028490,((((X:0.011182,Y:0.001407):0.005293,Z:0.011175):0.004701,AA:0.007825):0.016256,BB:0.029618):0.008146):0.004279,CC:0.035012):0.060215,((((((DD:0.014933,(EE:0.008148,FF:0.000100):0.015458):0.003891,GG:0.010996):0.001489,(HH:0.000100,II:0.000100):0.054265):0.003253,JJ:0.019722):0.013796,((KK:0.001960,LL:0.004924):0.013034,MM:0.010071):0.043273):0.011912,(NN:0.031543,OO:0.018307):0.059182):0.026517):0.011087,((PP:0.000100,QQ:0.002916):0.067214,(RR:0.064486,SS:0.013444):0.011613):0.050846):0.015644,((TT:0.000100,UU:0.009287):0.072710,(VV:0.009242,WW:0.009690):0.035346):0.042993):0.060365);

Added: trunk/packages/bioperl/branches/upstream/current/t/TreeBuild.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/TreeBuild.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/TreeBuild.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: TreeBuild.t,v 1.2 2004/01/24 20:16:08 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+
+    use Test;
+    plan tests => 7;
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+my $debug = -1;
+
+use Bio::Align::DNAStatistics;
+use Bio::Align::ProteinStatistics;
+use Bio::Align::Utilities qw(:all);
+use Bio::AlignIO;
+use Bio::Root::IO;
+use Bio::Tree::DistanceFactory;
+use Bio::TreeIO;
+
+my $in = new Bio::AlignIO(-format => 'clustalw',
+       			  -file   => Bio::Root::IO->catfile('t', 'data',
+							    'pep-266.aln'));
+my $aln = $in->next_aln();
+ok($aln);
+my $pstats = new Bio::Align::ProteinStatistics(-verbose => $debug);
+my $matrix = $pstats->distance(-method => 'Kimura',
+			       -align  => $aln);
+ok($matrix);
+
+my $treebuilder = Bio::Tree::DistanceFactory->new(-method => 'NJ');
+my $tree = $treebuilder->make_tree($matrix);
+ok($tree);
+
+my ($cn) = $tree->find_node('183.m01790');
+
+# brlens checked against tree generated by PHYLIP NJ
+ok($tree->find_node('AN2438.1')->branch_length, '0.28221');
+ok($tree->find_node('FG05298.1')->branch_length, '0.20593');
+
+# simple topology test - make sure these 2 are sister
+ok($tree->find_node('YOR262W')->ancestor->id, 
+   $tree->find_node('Smik_Contig1103.1')->ancestor->id);
+
+# TODO 
+# UPGMA tests
+
+
+# test the bootstrap
+my $replicates = &bootstrap_replicates($aln,10);
+ok(scalar @$replicates, 10);

Added: trunk/packages/bioperl/branches/upstream/current/t/TreeIO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/TreeIO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/TreeIO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,298 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: TreeIO.t,v 1.32.6.3 2006/10/16 17:08:15 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+use vars qw($NUMTESTS);
+use strict;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+
+	use Test;
+	$NUMTESTS = 60;
+	plan tests => $NUMTESTS;
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use vars qw($FILE1 $FILE2 $FILE3);
+use File::Spec;
+$FILE1= 'testnewick.phylip';
+$FILE2= 'testlarge.phy';
+$FILE3= File::Spec->catfile(qw(t testsvg.svg));
+
+END {
+	unlink $FILE1;
+	unlink $FILE2;
+	unlink $FILE3;
+}
+use Bio::TreeIO;
+use Bio::Root::IO;
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $treeio = new Bio::TreeIO(-verbose => $verbose,
+			     -format => 'newick',
+			     -file   => File::Spec->catfile
+			     (qw(t data cysprot1b.newick)));
+
+ok($treeio);
+my $tree = $treeio->next_tree;
+ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+my @nodes = $tree->get_nodes;
+ok(@nodes, 6);
+my ($rat) = $tree->find_node('CATL_RAT');
+ok($rat);
+ok($rat->branch_length, '0.12788');
+# move the id to the bootstap
+ ok($rat->ancestor->bootstrap($rat->ancestor->id), '95');
+ $rat->ancestor->id('');
+# maybe this can be auto-detected, but then can't distinguish
+# between internal node labels and bootstraps...
+ok($rat->ancestor->bootstrap, '95');
+ok($rat->ancestor->branch_length, '0.18794');
+ok($rat->ancestor->id, '');
+
+if($verbose ) {
+	foreach my $node ( $tree->get_root_node()->each_Descendent() ) {
+		print "node: ", $node->to_string(), "\n";
+		my @ch = $node->each_Descendent();
+		if( @ch ) {
+			print "\tchildren are: \n";
+			foreach my $node ( $node->each_Descendent() ) {
+				print "\t\t ", $node->to_string(), "\n";
+			}
+		}
+	}
+}
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			  -format => 'newick',
+			  -file   => ">$FILE1");
+$treeio->write_tree($tree);
+undef $treeio;
+ok( -s $FILE1 );
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			  -format => 'newick',
+			  -file   => Bio::Root::IO->catfile('t','data', 
+							    'LOAD_Ccd1.dnd'));
+ok($treeio);
+$tree = $treeio->next_tree;
+
+ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+ at nodes = $tree->get_nodes;
+ok(@nodes, 52);
+
+if( $verbose ) { 
+	foreach my $node ( @nodes ) {
+		print "node: ", $node->to_string(), "\n";
+		my @ch = $node->each_Descendent();
+		if( @ch ) {
+			print "\tchildren are: \n";
+			foreach my $node ( $node->each_Descendent() ) {
+				print "\t\t ", $node->to_string(), "\n";
+			}
+		}
+	}
+}
+
+ok($tree->total_branch_length, 7.12148);
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			  -format => 'newick', 
+			  -file   => ">$FILE2");
+$treeio->write_tree($tree);
+undef $treeio;
+ok(-s $FILE2);
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			  -format  => 'newick',
+			  -file    => Bio::Root::IO->catfile('t','data','hs_fugu.newick'));
+$tree = $treeio->next_tree();
+ at nodes = $tree->get_nodes();
+ok(@nodes, 5);
+# no relable order for the bottom nodes because they have no branchlen
+my @vals = qw(SINFRUP0000006110);
+my $saw = 0;
+foreach my $node ( $tree->get_root_node()->each_Descendent() ) {
+	foreach my $v ( @vals ) {
+	   if( defined $node->id && 
+	       $node->id eq $v ){ $saw = 1; last; }
+	}
+	last if $saw;
+}
+ok($saw, 1, "Did not see $vals[0] as expected\n");
+if( $verbose ) {
+	foreach my $node ( @nodes ) {
+		print "\t", $node->id, "\n";
+	}
+}
+
+$treeio = new Bio::TreeIO(-format => 'newick', 
+								  -fh => \*DATA);
+my $treeout = new Bio::TreeIO(-format => 'tabtree');
+my $treeout2 = new Bio::TreeIO(-format => 'newick');
+
+$tree = $treeio->next_tree;
+
+if( $verbose > 0  ) {
+    $treeout->write_tree($tree);
+    $treeout2->write_tree($tree);
+}
+
+$treeio = new Bio::TreeIO(-verbose => $verbose,
+			  -file   => Bio::Root::IO->catfile('t','data', 
+							    'test.nhx'));
+
+if( eval "require SVG::Graph; 1;" ) {
+	my $treeout3 = new Bio::TreeIO(-format => 'svggraph',
+											 -file => ">$FILE3");
+	ok($treeout3);
+	eval {$treeout3->write_tree($tree);};
+	ok (-e $FILE3);
+} else {
+    for ( 1..2 ) {
+	skip("skipping SVG::Graph output, SVG::Graph not installed",2);
+    }
+}
+
+ok($treeio);
+$tree = $treeio->next_tree;
+
+ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+ at nodes = $tree->get_nodes;
+ok(@nodes, 13, scalar @nodes);
+
+my $adhy = $tree->find_node('ADHY');
+ok($adhy->branch_length, 0.1);
+ok(($adhy->get_tag_values('S'))[0], 'nematode');
+ok(($adhy->get_tag_values('E'))[0], '1.1.1.1');
+
+# try lintree parsing
+$treeio = new Bio::TreeIO(-format => 'lintree',
+			      -file   => Bio::Root::IO->catfile
+			      (qw(t data crab.njb)));
+
+my (@leaves, $node);
+while( $tree = $treeio->next_tree ) {
+
+	ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+	@nodes = $tree->get_nodes;
+
+	@leaves = $tree->get_leaf_nodes;
+	ok(@leaves, 13);
+	ok(@nodes, 25);
+	($node) = $tree->find_node(-id => '18');
+	ok($node);
+	ok($node->id, '18');
+	ok($node->branch_length, '0.030579');
+	ok($node->bootstrap, 998);
+}
+
+$treeio = new Bio::TreeIO(-format => 'lintree',
+			   -file   => Bio::Root::IO->catfile
+			   (qw(t data crab.nj)));
+
+$tree = $treeio->next_tree;
+
+ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+ at nodes = $tree->get_nodes;
+ at leaves = $tree->get_leaf_nodes;
+ok(@leaves, 13);
+ok(@nodes, 25);
+($node) = $tree->find_node('18');
+ok($node->id, '18');
+ok($node->branch_length, '0.028117');
+
+($node) = $tree->find_node(-id => 'C-vittat');
+ok($node->id, 'C-vittat');
+ok($node->branch_length, '0.087619');
+ok($node->ancestor->id, '14');
+
+$treeio = new Bio::TreeIO(-format => 'lintree',
+			  -file   => Bio::Root::IO->catfile
+			  (qw(t data crab.dat.cn)));
+
+$tree = $treeio->next_tree;
+
+ok(ref($tree) && $tree->isa('Bio::Tree::TreeI'));
+
+ at nodes = $tree->get_nodes;
+ at leaves = $tree->get_leaf_nodes;
+ok(@leaves, 13, scalar @leaves);
+
+ok(@nodes, 25, scalar @nodes);
+($node) = $tree->find_node('18');
+ok($node->id, '18');
+
+ok($node->branch_length, '0.029044');
+
+($node) = $tree->find_node(-id => 'C-vittat');
+ok($node->id, 'C-vittat');
+ok($node->branch_length, '0.097855');
+ok($node->ancestor->id, '14');
+
+if( eval "require IO::String; 1;" ) {
+# test nexus tree parsing
+    $treeio = Bio::TreeIO->new(-format => 'nexus',
+			       -file   => Bio::Root::IO->catfile
+			       (qw(t data urease.tre.nexus) ));
+    
+    $tree = $treeio->next_tree;
+    ok($tree);
+    ok($tree->id, 'PAUP_1');
+    ok($tree->get_leaf_nodes, 6);
+    ($node) = $tree->find_node(-id => 'Spombe');
+    ok($node->branch_length,0.221404);
+    
+# test nexus MrBayes tree parsing
+    $treeio = Bio::TreeIO->new(-format => 'nexus',
+			       -file   => Bio::Root::IO->catfile
+			       (qw(t data adh.mb_tree.nexus) ));
+    
+    $tree = $treeio->next_tree;
+    ok($tree);
+    ok($tree->id, 'rep.1');
+    ok($tree->get_leaf_nodes, 54);
+    ($node) = $tree->find_node(-id => 'd.madeirensis');
+    ok($node->branch_length,0.039223);
+} else{
+    for ( 1..8 ) {
+	skip("skipping nexus tree parsing, IO::String not installed",1);
+    }
+}
+
+# bug #1854
+# process no-newlined tree
+$treeio = Bio::TreeIO->new(-format => 'nexus',
+			   -file   => Bio::Root::IO->catfile
+			   (qw(t data tree_nonewline.nexus) ));
+
+$tree = $treeio->next_tree;
+ok($tree);
+ok($tree->find_node('TRXHomo'));
+
+
+# parse trees with scores
+
+$treeio = Bio::TreeIO->new(-format => 'newick',
+			   -file   => Bio::Root::IO->catfile
+			   (qw(t data puzzle.tre)));
+$tree = $treeio->next_tree;
+ok($tree);
+ok($tree->score, '-2673.059726');
+							     
+__DATA__
+(((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1);

Added: trunk/packages/bioperl/branches/upstream/current/t/UCSCParsers.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/UCSCParsers.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/UCSCParsers.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,109 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    use vars qw($TESTCOUNT);
+    $TESTCOUNT = 48;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::SearchIO;
+use Bio::Root::IO;
+
+my $pslparser = new Bio::SearchIO(-format => 'psl',
+				  -file   => Bio::Root::IO->catfile
+				  (qw(t data sbay_c545-yeast.BLASTZ.PSL)));
+
+my $result = $pslparser->next_result;
+ok($result->query_name, 'I');
+ok($result->query_length, 230203);
+
+my $hit    = $result->next_hit;
+ok($hit->name, 'sbay_c545');
+ok($hit->length, 28791);
+my $hsp    = $hit->next_hsp;
+ok($hsp->query->start,139871);
+ok($hsp->query->end,141472);
+ok($hsp->query->length, 1602);
+ok($hsp->query->strand, 1);
+ok($hsp->hit->strand, 1);
+my $q_gapblocks = $hsp->gap_blocks('query');
+ok(scalar @$q_gapblocks, 24);
+ok($q_gapblocks->[0]->[1],45);
+ok($q_gapblocks->[1]->[1],10);
+ok($q_gapblocks->[1]->[0],139921);
+
+
+$hsp       = $hit->next_hsp;
+$hsp       = $hit->next_hsp;
+ok($hsp->hit->start,27302);
+ok($hsp->hit->end,27468);
+ok($hsp->hit->length,167);
+ok($hsp->query->start, 123814);
+ok($hsp->query->end, 123972);
+ok($hsp->query->length, 159);
+ok($hsp->query->strand,-1);
+
+$q_gapblocks = $hsp->gap_blocks('query');
+ok(scalar @$q_gapblocks, 4);
+ok($q_gapblocks->[0]->[1],116);
+ok($q_gapblocks->[1]->[1],4);
+ok($q_gapblocks->[1]->[0],123856);
+
+
+
+#-----------------------------------
+
+
+$pslparser = new Bio::SearchIO(-format => 'psl',
+			       -file   => Bio::Root::IO->catfile
+			       (qw(t data blat.psLayout3)));
+
+$result = $pslparser->next_result;
+ok($result->query_name, 'sequence_10');
+ok($result->query_length, 1775);
+
+$hit    = $result->next_hit;
+ok($hit->name, 'sequence_10');
+ok($hit->length, 1775);
+$hsp    = $hit->next_hsp;
+ok($hsp->query->start,1);
+ok($hsp->query->end,1775);
+ok($hsp->query->length,1775);
+ok($hsp->query->strand,1);
+ok($hsp->hit->strand,1);
+$q_gapblocks = $hsp->gap_blocks('query');
+ok(scalar @$q_gapblocks, 1);
+ok($q_gapblocks->[0]->[1],1775);
+ok($q_gapblocks->[1]->[1],undef);
+ok($q_gapblocks->[1]->[0],undef);
+
+
+$hsp       = $hit->next_hsp;
+ok($hsp->hit->start,841);
+ok($hsp->hit->end,1244);
+ok($hsp->query->start, 841);
+ok($hsp->query->end, 1244);
+ok($hsp->query->length, 404);
+ok($hsp->query->strand,-1);
+ok($hsp->hit->strand,1);
+
+$q_gapblocks = $hsp->gap_blocks('query');
+ok(scalar @$q_gapblocks, 4);
+ok($q_gapblocks->[0]->[1],14);
+ok($q_gapblocks->[1]->[1],21);
+ok($q_gapblocks->[1]->[0],1152);
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/Unflattener.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Unflattener.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Unflattener.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,208 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: Unflattener.t,v 1.8.4.2 2006/10/02 23:10:40 sendu Exp $
+
+use strict;
+use vars qw($DEBUG $TESTCOUNT);
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 8;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Root::IO;
+use Bio::SeqFeature::Tools::Unflattener;
+
+ok(1);
+
+my $verbosity = -1;   # Set to -1 for release version, so warnings aren't printed
+
+my ($seq, @sfs);
+my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+
+
+if (1) {
+    my @path = ("t","data","AE003644_Adh-genomic.gb");
+    # allow cmd line override
+    if (@ARGV) {
+	@path = (shift @ARGV);
+    }
+    $seq = getseq(@path);
+    
+    ok ($seq->accession_number, 'AE003644');
+    my @topsfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn sprintf "TOP:%d\n", scalar(@topsfs);
+	write_hier(@topsfs);
+    }
+    
+    # UNFLATTEN
+    $unflattener->verbose($verbosity);
+    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
+				       -group_tag=>'locus_tag');
+    if( $verbosity > 0 ) {
+	warn "\n\nPOST PROCESSING:\n";
+	write_hier(@sfs);
+	warn sprintf "PROCESSED:%d\n", scalar(@sfs);
+    }
+    ok(@sfs == 21);
+}
+
+# now try again, using a custom subroutine to link together features
+$seq = getseq("t","data","AE003644_Adh-genomic.gb");
+ at sfs = $unflattener->unflatten_seq
+    (-seq=>$seq,
+     -group_tag=>'locus_tag',
+     -resolver_method => 
+     sub {
+	 my $self = shift;
+	 my ($sf, @candidate_container_sfs) = @_;
+	 if ($sf->has_tag('note')) {
+	     my @notes = $sf->get_tag_values('note');
+	     my @trnames = map {/from transcript\s+(.*)/;
+				$1} @notes;
+	     @trnames = grep {$_} @trnames;
+	     my $trname;
+	     if (@trnames == 0) {
+		 $self->throw("UNRESOLVABLE");
+	     }
+	     elsif (@trnames == 1) {
+		 $trname = $trnames[0];
+	     }
+	     else {
+		 $self->throw("AMBIGUOUS: @trnames");
+	     }
+	     my @container_sfs =
+		 grep {
+		     my ($product) =
+			 $_->has_tag('product') ?
+			 $_->get_tag_values('product') :
+			 ('');
+		     $product eq $trname;
+		 } @candidate_container_sfs;
+	     if (@container_sfs == 0) {
+		 $self->throw("UNRESOLVABLE");
+	     }
+	     elsif (@container_sfs == 1) {
+		 # we got it!
+		 return ($container_sfs[0]=>0);
+	     }
+	     else {
+		 $self->throw("AMBIGUOUS");
+	     }
+                                         
+	 }
+     });
+$unflattener->feature_from_splitloc(-seq=>$seq);
+if( $verbosity > 0 ) {
+    warn "\n\nPOST PROCESSING:\n";
+    write_hier(@sfs);
+    warn sprintf "PROCESSED2:%d\n", scalar(@sfs);
+}
+ok(@sfs == 21);
+
+# try again; different sequence
+# this is an E-Coli seq with no mRNA features;
+# we just want to link all features directly with gene
+
+$seq = getseq("t","data","D10483.gbk");
+
+# UNFLATTEN
+ at sfs = $unflattener->unflatten_seq(-seq=>$seq,
+				   -partonomy=>{'*'=>'gene'},
+                                );
+if( $verbosity > 0 ) {
+    warn "\n\nPOST PROCESSING:\n";
+    write_hier(@sfs);
+    warn sprintf "PROCESSED:%d\n", scalar(@sfs);
+}
+ok(@sfs == 98);
+
+# this sequence has no locus_tag or or gene tags
+$seq = getseq("t","data","AY763288.gb");
+
+# UNFLATTEN
+ at sfs = $unflattener->unflatten_seq(-seq=>$seq,
+				   -use_magic=>1
+                                  );
+if( $verbosity > 0 ) {
+    warn "\n\nPOST PROCESSING:\n";
+    write_hier(@sfs);
+    warn sprintf "PROCESSED:%d\n", scalar(@sfs);
+}
+ok(@sfs == 3);
+
+
+# try again; different sequence - dicistronic gene, mRNA record
+
+$seq = getseq("t","data","X98338_Adh-mRNA.gb");
+
+# UNFLATTEN
+ at sfs = $unflattener->unflatten_seq(-seq=>$seq,
+                                 -partonomy=>{'*'=>'gene'},
+                                );
+if( $verbosity > 0 ) {                                 
+    warn "\n\nPOST PROCESSING:\n";
+    write_hier(@sfs);
+    warn sprintf "PROCESSED:%d\n", scalar(@sfs);
+}
+ok(@sfs == 7);
+
+# try again; this sequence has no CDSs but rRNA present
+
+$seq = getseq("t","data","no_cds_example.gb");
+
+# UNFLATTEN
+ at sfs = $unflattener->unflatten_seq(-seq=>$seq,
+                                 use_magic=>1
+                                );
+if( $verbosity > 0 ) {
+    warn "\n\nPOST PROCESSING:\n";
+    write_hier(@sfs);
+    warn sprintf "PROCESSED:%d\n", scalar(@sfs);
+}
+
+my @all_sfs = $seq->get_all_SeqFeatures;
+
+my @exons = grep { $_-> primary_tag eq 'exon' }  @all_sfs ; 
+
+ok(@exons == 2);
+
+
+
+sub write_hier {
+    my @sfs = @_;
+    _write_hier(0, @sfs);
+}
+
+sub _write_hier {
+    my $indent = shift;
+    my @sfs = @_;
+    foreach my $sf (@sfs) {
+        my $label = '?';
+        if ($sf->has_tag('product')) {
+            ($label) = $sf->get_tag_values('product');
+        }
+        warn sprintf "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
+        my @sub_sfs = $sf->sub_SeqFeature;
+        _write_hier($indent+1, @sub_sfs);
+    }
+}
+
+sub getseq {
+    my @path = @_;
+    my $seqio =
+      Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+                                                       @path
+                                                      ), 
+                      '-format' => 'GenBank');
+    $seqio->verbose($verbosity);
+
+    my $seq = $seqio->next_seq();
+    return $seq;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/Unflattener2.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Unflattener2.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Unflattener2.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,184 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: Unflattener2.t,v 1.9 2005/01/12 17:15:04 jason Exp $
+
+use strict;
+use vars qw($DEBUG $TESTCOUNT);
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 11;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Root::IO;
+use Bio::SeqFeature::Tools::Unflattener;
+
+ok(1);
+
+my $verbosity = -1;   # Set to -1 for release version, so warnings aren't printed
+#$verbosity = 1;
+
+my ($seq, @sfs);
+my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
+$unflattener->verbose($verbosity);
+
+if (1) {
+    
+    # this is an arabidopsise gbk record. it has no mRNA features.
+    # it has explicit exon/intron records
+
+    my @path = ("t","data","ATF14F8.gbk");
+    $seq = getseq(@path);
+    
+    ok ($seq->accession_number, 'AL391144');
+    my @topsfs = $seq->get_SeqFeatures;
+    my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
+    my $n = scalar(@topsfs);
+    if( $verbosity > 0 ) {
+	warn sprintf "TOP:%d\n", scalar(@topsfs);
+	write_hier(@topsfs);
+    }
+    # UNFLATTEN
+    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
+				       -use_magic=>1,
+				      );
+    @sfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn "\n\nPOST PROCESSING:\n";
+	write_hier(@sfs);
+	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
+    }
+    ok(@sfs == 28);
+    my @allsfs = $seq->get_all_SeqFeatures;
+    ok(@allsfs == 202);
+    my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
+    if( $verbosity > 0 ) {
+	warn sprintf "ALL:%d\n", scalar(@allsfs);
+	warn sprintf "mRNAs:%d\n", scalar(@mrnas);
+    }
+
+    # relationship between mRNA and CDS should be one-one
+    ok(@mrnas == @cdss);
+}
+
+if (1) {
+    
+    # this is a record from FlyBase
+    # it has mRNA features, and explicit exon/intron records
+
+    my @path = ("t","data","AnnIX-v003.gbk");
+    $seq = getseq(@path);
+    
+    my @topsfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn sprintf "TOP:%d\n", scalar(@topsfs);
+	write_hier(@topsfs);
+    }
+    # UNFLATTEN
+    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
+				       -use_magic=>1,
+				      );
+    @sfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn "\n\nPOST PROCESSING:\n";
+	write_hier(@sfs);
+	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
+    }
+    ok scalar(@sfs), 1;
+    my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
+    ok scalar(@exons), 6;    # total number of exons per splice
+    my %numberh = map {$_->get_tag_values("number") => 1} @exons;
+    my @numbers = keys %numberh;
+    if( $verbosity > 0 ) {
+	warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
+    }
+    ok scalar(@numbers), 6;  # distinct exons
+}
+
+if (1) {
+    
+    # example of a BAD genbank entry
+
+    my @path = ("t","data","dmel_2Lchunk.gb");
+    $seq = getseq(@path);
+    
+    my @topsfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn sprintf "TOP:%d\n", scalar(@topsfs);
+	write_hier(@topsfs);
+    }
+    # UNFLATTEN
+    #
+    # we EXPECT problems with this erroneous record
+    $unflattener->error_threshold(2);
+    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
+                                       -use_magic=>1,
+                                      );
+    my @probs = $unflattener->get_problems;
+    $unflattener->report_problems(\*STDERR) if $verbosity > 0;
+    $unflattener->clear_problems;
+    @sfs = $seq->get_SeqFeatures;
+    if( $verbosity > 0 ) {
+	warn "\n\nPOST PROCESSING:\n";
+	write_hier(@sfs);
+	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
+    }
+    ok scalar(@sfs), 2;
+    my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
+    ok scalar(@exons), 2;    # total number of exons per splice
+    if( $verbosity > 0 ) {
+	warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
+    }
+    ok scalar(@probs), 6;
+}
+
+
+sub write_hier {
+    my @sfs = @_;
+    _write_hier(0, @sfs);
+}
+
+sub _write_hier {
+    my $indent = shift;
+    my @sfs = @_;
+    foreach my $sf (@sfs) {
+        my $label = '?';
+        if ($sf->has_tag('gene')) {
+            ($label) = $sf->get_tag_values('gene');
+        }
+        if ($sf->has_tag('product')) {
+            ($label) = $sf->get_tag_values('product');
+        }
+        if ($sf->has_tag('number')) {
+            $label = join("; ", $sf->get_tag_values('number'));
+        }
+        printf "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
+        my @sub_sfs = $sf->sub_SeqFeature;
+        _write_hier($indent+1, @sub_sfs);
+    }
+}
+
+sub getseq {
+    my @path = @_;
+    my $seqio =
+      Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
+                                                       @path
+                                                      ), 
+                      '-format' => 'GenBank');
+    $seqio->verbose($verbosity);
+
+    my $seq = $seqio->next_seq();
+    return $seq;
+}
+
+# 1 2,3
+# 2 1,2
+# 3 4,5
+# 4 1,4,5,6
+# 5 1,4,5,6
+# 6 1,4,5,6

Added: trunk/packages/bioperl/branches/upstream/current/t/UniGene.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/UniGene.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/UniGene.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,225 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# $Id: UniGene.t,v 1.17 2005/10/21 17:28:19 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+#use lib '.','./blib/lib'; # make test should take care of this
+
+my $error;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 73;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::Cluster::UniGene;
+use Bio::ClusterIO;
+
+my ($str, $unigene); # predeclare variables for strict
+
+
+$str = Bio::ClusterIO->new('-file' => Bio::Root::IO->catfile(
+				"t","data","unigene.data"), '-format' => "unigene");
+ok $str;
+
+ok ( defined ($unigene = $str->next_cluster()));
+
+# check interface implementations to be sure
+ok $unigene->isa("Bio::Cluster::UniGeneI");
+ok $unigene->isa("Bio::ClusterI");
+ok $unigene->isa("Bio::IdentifiableI");
+ok $unigene->isa("Bio::DescribableI");
+
+
+# test specific instance of unigene record provided in the unigene.data file
+ok($unigene->unigene_id, 'Hs.2');
+ok($unigene->title, 'N-acetyltransferase 2 (arylamine N-acetyltransferase)');
+ok($unigene->gene, 'NAT2');
+ok($unigene->cytoband,'8p22');
+ok($unigene->gnm_terminus,'S');
+ok($unigene->homol,'YES');
+ok($unigene->restr_expr,'liver');
+ok($unigene->scount,26);
+ok(scalar @{ $unigene->locuslink }, 1);
+ok(scalar @{ $unigene->chromosome }, 1);
+ok(scalar @{ $unigene->express }, 7);
+ok(scalar @{ $unigene->sts }, 8);
+ok(scalar @{ $unigene->txmap }, 0);
+ok(scalar @{ $unigene->protsim } , 4);
+ok(scalar @{ $unigene->sequences },26);
+
+ok($unigene->locuslink->[0], '10');
+ok($unigene->chromosome->[0], '8');
+ok($unigene->express->[0], 'liver');
+ok($unigene->sts->[0], 'ACC=G59899 UNISTS=137181');
+ok($unigene->protsim->[0], 'ORG=Escherischia coli; PROTGI=16129422; PROTID=ref:NP_415980.1; PCT=24.81; ALN=255');
+
+my ($seq1) = $unigene->next_seq;
+ok($seq1->display_id, 'BX095770');
+#ok($seq1->desc, 'ACC=D90042 NID=g219415 PID=g219416');
+
+# test recognition of species
+ok $unigene->species;
+skip (! $unigene->species, $unigene->species->binomial, "Homo sapiens");
+
+# test accessors of interfaces
+ok ($seq1->namespace, "GenBank");
+ok ($seq1->authority, "NCBI");
+ok ($seq1->alphabet, "dna");
+my $n = 1; # we've seen already one seq
+while($seq1 = $unigene->next_seq()) {
+    $n++;
+}
+ok ($n, 26);
+ok ($unigene->size(), 26);
+ok (scalar($unigene->get_members()), 26);
+ok ($unigene->description, 'N-acetyltransferase 2 (arylamine N-acetyltransferase)');
+ok ($unigene->display_id, "Hs.2");
+ok ($unigene->namespace, "UniGene");
+ok ($unigene->authority, "NCBI");
+
+$unigene->unigene_id('Hs.50');
+ok($unigene->unigene_id, 'Hs.50', 'unigene_id was ' . $unigene->unigene_id);
+
+$unigene->title('title_test');
+ok($unigene->title, 'title_test', 'title was ' . $unigene->title);
+
+$unigene->gene('gene_test');
+ok($unigene->gene, 'gene_test', 'gene was ' . $unigene->gene);
+
+$unigene->cytoband('cytoband_test');
+ok($unigene->cytoband, 'cytoband_test', 'cytoband was ' . $unigene->cytoband);
+
+$unigene->gnm_terminus('gnm_terminus_test');
+ok($unigene->gnm_terminus, 'gnm_terminus_test', 'gnm_terminus was ' . $unigene->gnm_terminus);
+
+$unigene->homol('homol_test');
+ok($unigene->homol, 'homol_test', 'homol was ' . $unigene->homol);
+
+$unigene->restr_expr('restr_expr_test');
+ok($unigene->restr_expr, 'restr_expr_test', 'restr_expr was ' . $unigene->restr_expr);
+
+$unigene->scount('scount_test');
+ok($unigene->scount, 'scount_test', 'scount was ' . $unigene->scount);
+
+my $seq = $unigene->next_seq;
+$seq = $unigene->next_seq;
+ok($seq->isa('Bio::PrimarySeqI'), 1,'expected a Bio::PrimarySeq object but got a ' . ref($seq));
+my $accession = $seq->accession_number;
+ok($accession, 'AI262683');
+my $version = $seq->seq_version();
+ok($version, "1");
+
+# test the sequence parsing is working
+my $ac = $seq->annotation();
+my $simple_ann_object;
+($simple_ann_object) = $ac->get_Annotations('seqtype');
+ok $simple_ann_object;
+ok($simple_ann_object->value(), 'EST', 'seqtype was ' . $simple_ann_object->value);	
+
+# test PERIPHERAL, bug 1708
+$seq = $unigene->next_seq;
+$accession = $seq->accession_number;
+ok($accession, 'CB161982');
+#$ac = $seq->annotation();
+my @acs = $seq->annotation->get_Annotations('peripheral');
+ok $acs[0], 1;
+
+# tests not specific to unigene record provided in the unigene.data file
+my @locuslink_test = ( "58473", "5354" );
+$unigene->locuslink(\@locuslink_test);
+my @locuslink_results;
+while (my $locuslink = $unigene->next_locuslink) {
+	push @locuslink_results, $locuslink;
+}
+ok scalar(@locuslink_results), 2, 'expected locuslink to have 2 entries but it had ' . scalar(@locuslink_results);
+my $locuslink = shift @locuslink_results;
+ok $locuslink, '58473', 'expected 58473 but got ' . $locuslink;
+
+
+my @express_test = qw( kidney heart liver spleen );
+$unigene->express(\@express_test);
+my @express_results;
+while (my $tissue = $unigene->next_express) {
+	push @express_results, $tissue;
+}
+ok scalar(@express_results), 4, 'expected express to have 4 entries but it had ' . scalar(@express_results);
+
+my @chromosome_test = ( "7", "11" );
+$unigene->chromosome(\@chromosome_test);
+my @chromosome_results;
+while (my $chromosome = $unigene->next_chromosome) {
+	push @chromosome_results, $chromosome;
+}
+ok scalar(@chromosome_results), 2, 'expected chromosome to have 2 entries but it had ' . scalar(@chromosome_results);
+my $chromosome = shift @chromosome_results;
+ok $chromosome, '7', 'expected 7 but got ' . $chromosome;
+
+my @sts_test = ( "ACC=- NAME=sts-D90276 UNISTS=37687", "ACC=G29786 NAME=SHGC-35230 UNISTS=58455" );
+$unigene->sts(\@sts_test);
+my @sts_results;
+while (my $sts = $unigene->next_sts) {
+	push @sts_results, $sts;
+}
+ok scalar(@sts_results), 2, 'expected sts to have 2 entries but it had ' . scalar(@sts_results);
+my $sts = shift @sts_results;
+ok $sts, 'ACC=- NAME=sts-D90276 UNISTS=37687', 'expected ACC=- NAME=sts-D90276 UNISTS=37687 but got ' . $sts;
+
+my @txmap_test = ("D19S425-D19S418; MARKER=sts-D90276; RHPANEL=GB4" , "D19S425-D19S418; MARKER=stSG41396; RHPANEL=GB4");
+$unigene->txmap(\@txmap_test);
+my @txmap_results;
+while (my $txmap = $unigene->next_txmap) {
+	push @txmap_results, $txmap;
+}
+ok scalar(@txmap_results), 2, 'expected txmap to have 2 entries but it had ' . scalar(@txmap_results);
+my $txmap = shift @txmap_results;
+ok $txmap, 'D19S425-D19S418; MARKER=sts-D90276; RHPANEL=GB4', 'expected D19S425-D19S418; MARKER=sts-D90276; RHPANEL=GB4 but got ' . $txmap;
+
+my @protsim_test = ("ORG=Homo sapiens; PROTGI=107211; PROTID=pir:A40428; PCT=100; ALN=243" , "ORG=Mus musculus; PROTGI=2497288; PROTID=sp:Q61400; PCT=42; ALN=143");
+$unigene->protsim(\@protsim_test);
+my @protsim_results;
+while (my $protsim = $unigene->next_protsim) {
+    push @protsim_results, $protsim;
+}
+ok scalar(@protsim_results), 2, 'expected protsim to have 2 entries but it had ' . scalar(@protsim_results);
+my $protsim = shift @protsim_results;
+ok $protsim, 'ORG=Homo sapiens; PROTGI=107211; PROTID=pir:A40428; PCT=100; ALN=243', 'expected ORG=Homo sapiens; PROTGI=107211; PROTID=pir:A40428; PCT=100; ALN=243 but got ' . $protsim;
+
+
+
+# do a quick test on Rn record included as the next cluster in the
+# test data file because it has version numbers tacked on the end of
+# the accession numbers in each seq line - NCBI has started doing this
+# now (Sept 2003).
+
+$unigene = $str->next_cluster();
+$seq = $unigene->next_seq;
+ok($seq->isa('Bio::PrimarySeqI'), 1,'expected a Bio::PrimarySeq object but got a ' . ref($seq));
+$version = $seq->seq_version();
+ok($version, '1');
+
+# next cluster contains a // in the title - yes NCBI did that. Nonetheless,
+# this should not trip up the parser:
+
+$unigene = $str->next_cluster();
+ok ($unigene); # previously this would have been undef
+ok ($unigene->unigene_id, "Mm.340763");
+ok ($unigene->title, 'Transcribed locus, strongly similar to NP_003008.1 splicing factor, arginine/serine-rich 3; splicing factor, arginine//serine-rich, 20-kD [Homo sapiens]');
+ok ($unigene->homol, 'YES');
+ok ($unigene->scount, 31);
+ok (scalar($unigene->get_members()), 31);

Added: trunk/packages/bioperl/branches/upstream/current/t/Variation_IO.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/Variation_IO.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/Variation_IO.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: Variation_IO.t,v 1.20 2005/09/17 02:11:21 bosborne Exp $
+
+use strict;
+
+BEGIN {
+	use vars qw($NUMTESTS $error);
+	$error = 0;
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	eval {
+		require Text::Wrap;
+		require XML::Writer;
+	};
+	if ( $@ || $Text::Wrap::VERSION < 98 ) {
+		print STDERR "Skip tests - missing Text::Wrap 98 installed or XML::Writer\n";
+		$error = 1;
+	}
+	$NUMTESTS = 25;
+	plan tests => $NUMTESTS;
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip("Cannot complete Variation_IO tests",1);
+	}
+}
+
+if ($error == 1 ) {
+	exit(0);
+}
+
+use Bio::Variation::IO;
+use Bio::Root::IO;
+
+sub fileformat ($) {
+	my ($file) = shift;
+	my $format;
+	if ($file =~ /.*dat$/) {
+		$format = 'flat';
+	}
+	elsif ($file =~ /.*xml$/ ) {
+		$format = 'xml';
+	} else {
+		print "Wrong extension! [$file]";
+		exit;
+	}
+	return $format;
+}
+
+sub ext ($) {
+	my ($file) = @_;
+	my ($name) = $file =~ /.*.(...)$/;
+	return $name;
+}
+
+sub filename ($) {
+	my ($file) = @_;
+	my ($name) = $file =~ /(.*)....$/;
+	return $name;
+}
+
+sub io {
+    my ($t_file, $o_file) = @_; 
+    my $res;
+
+    my ($t_ext) = ext ($t_file);
+    my ($o_ext) = ext ($o_file);
+    my ($t_format) = fileformat ($t_file);
+    my ($o_format) = fileformat ($o_file);
+    my ($t_name) = filename($t_file);
+    my ($o_name) = filename($o_file);
+
+    my( $before );
+    {
+        local $/ = undef;
+        local *BEFORE;
+        open BEFORE, "$t_name.$o_ext";
+        $before = <BEFORE>;
+        close BEFORE;
+    }
+
+    ok $before;#"Error in reading input file [$t_name.$o_ext]";
+
+    my $in = Bio::Variation::IO->new( -file => $t_file);
+    my  @entries ;
+    while (my $e = $in->next) {
+        push @entries, $e;
+    }
+    my $count = scalar @entries;
+    ok @entries > 0;# "No SeqDiff objects [$count]";
+
+    my $out = Bio::Variation::IO->new( -FILE => "> $o_file", 
+				       -FORMAT => $o_format);
+    my $out_ok = 1;
+    foreach my $e (@entries) {
+        $out->write($e) or $out_ok = 0;
+    }
+    undef($out);  # Flush to disk
+    ok $out_ok;#  "error writing variants";
+
+    my( $after );
+    {
+        local $/ = undef;
+        local *AFTER;
+        open AFTER, $o_file;
+        $after = <AFTER>;
+        close AFTER;
+    }
+
+    ok $after;# "Error in reading in again the output file [$o_file]";
+    ok $before, $after, "test output file differs from input";
+    print STDERR `diff $t_file $o_file` if $before ne $after;
+    unlink($o_file); 
+}
+
+io  (Bio::Root::IO->catfile("t","data","mutations.dat"), 
+     Bio::Root::IO->catfile("t","data","mutations.out.dat")); #1..5
+io  (Bio::Root::IO->catfile("t","data","polymorphism.dat"), 
+     Bio::Root::IO->catfile("t","data","polymorphism.out.dat")); #6..10
+
+eval {
+    require Bio::Variation::IO::xml;
+};
+
+if( $@ ) {
+    print STDERR
+	 "\nThe XML-format conversion requires the CPAN modules ",
+	 "XML::Twig, XML::Writer, and IO::String to be installed ",
+	 "on your system, which they probably aren't. Skipping these tests.\n";
+    for( $Test::ntest..$NUMTESTS) {
+	 skip("No XML::Twig installed", 1);
+    }
+    exit(0);
+}
+
+eval {
+    if( $XML::Writer::VERSION >= 0.5 ) { 
+	io  (Bio::Root::IO->catfile("t","data","mutations.xml"), 
+	     Bio::Root::IO->catfile("t","data","mutations.out.xml")); #10..12
+    } else { 
+	io  (Bio::Root::IO->catfile("t","data","mutations.old.xml"), 
+	     Bio::Root::IO->catfile("t","data","mutations.out.xml")); #10..12
+    }
+};
+
+eval {
+    if( $XML::Writer::VERSION >= 0.5 ) { 
+	io  (Bio::Root::IO->catfile("t","data","polymorphism.xml"), 
+	     Bio::Root::IO->catfile("t","data","polymorphism.out.xml")); #13..14
+    } else { 
+	io  (Bio::Root::IO->catfile("t","data","polymorphism.old.xml"), 
+	     Bio::Root::IO->catfile("t","data","polymorphism.out.xml")); #13..14
+
+    }
+};
+
+eval { 
+    if( $XML::Writer::VERSION >= 0.5 ) { 
+	io  (Bio::Root::IO->catfile("t","data","mutations.dat"), 
+	     Bio::Root::IO->catfile("t","data","mutations.out.xml")); #15..25
+    } else { 
+	io  (Bio::Root::IO->catfile("t","data","mutations.old.dat"), 
+	     Bio::Root::IO->catfile("t","data","mutations.old.out.xml")); #15..25
+    }
+};

Added: trunk/packages/bioperl/branches/upstream/current/t/WABA.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/WABA.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/WABA.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,84 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: WABA.t,v 1.3 2002/09/16 22:13:31 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+
+use strict;
+use lib '.';
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use vars qw($NTESTS);
+    $NTESTS = 62;
+    $error = 0;
+
+    use Test;
+    plan tests => $NTESTS; 
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use Bio::SearchIO;
+use Bio::Root::IO;
+
+my $wabain = new Bio::SearchIO(-format => 'waba',
+			       -file   => Bio::Root::IO->catfile('t','data',
+								 'test.waba'));
+
+# These won't look the same as the WABA file because Jim's code is 0 based
+# while we (bioperl) are 1 based.
+my @results = ( 
+		[ 'U57623', 'pair1_hs.fa', 'pair1_mm.fa',
+		  [ 'U02884', 3, 
+		    [qw(3833 34 2972 1 243 3688 1 40.9)],
+		    [qw(4211 3022 6914 1 3705 6848 1 43.7)],
+		    [qw(2218 7004 9171 1 6892 8712 1 50.3)],
+		    ], 
+		  ],
+		[ 'X57152', 'pair9_hs.fa', 'pair9_mm.fa',
+		  [ 'X80685', 1, 
+		    [qw(7572 4 5845 1 632 7368 1 46.8)],
+		    ], 
+		  ]
+		);
+while( my $wabar = $wabain->next_result )  {
+    my @r = @{shift @results};
+    ok($wabar->query_name, shift @r);
+    ok($wabar->query_database, shift @r);
+    ok($wabar->database_name, shift @r);
+    while( my $wabah = $wabar->next_hit ) {
+	my (@h) = @{shift @r};
+	ok( $wabah->name, shift @h);
+	ok( $wabah->hsps(), shift @h);
+	while( my $wabahsp = $wabah->next_hsp  ) {
+	    my ( @hsp) = @{shift @h};
+	    ok($wabahsp->length('total'), shift @hsp);
+	    ok($wabahsp->query->start, shift @hsp);
+	    ok($wabahsp->query->end, shift @hsp);
+	    ok($wabahsp->strand('query'), shift @hsp);
+	    ok($wabahsp->start('hit'), shift @hsp);
+	    ok($wabahsp->end('subject'), shift @hsp);
+	    ok($wabahsp->subject->strand, shift @hsp);
+	    ok(length($wabahsp->query_string), $wabahsp->length('total'));
+	    ok(length($wabahsp->hit_string), $wabahsp->length('total'));
+	    ok(length($wabahsp->hmmstate_string), $wabahsp->length('total'));
+	    my $hs = $wabahsp->hit_string;
+	    ok($wabahsp->gaps('hit'), $hs  =~ tr/\-//);
+	    my $qs = $wabahsp->query_string;
+	    ok($wabahsp->gaps('query'),  $qs =~ tr/\-//);
+	    ok(sprintf("%.1f",$wabahsp->percent_identity),shift @hsp);
+	}
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/XEMBL_DB.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/XEMBL_DB.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/XEMBL_DB.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: XEMBL_DB.t,v 1.5.6.1 2006/10/02 23:10:40 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use constant NUMTESTS => 9;
+my $error;
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	plan tests => NUMTESTS;
+
+	unless( eval "require SOAP::Lite; require XML::DOM; 1;" ) {
+      print STDERR "SOAP::Lite and/or XML::DOM not installed. This means that Bio::DB::XEMBL module is not usable. Skipping tests.\n";
+      for( 1..NUMTESTS ) {
+			skip("SOAP::Lite and/or XML::DOM not installed. This means that Bio::DB::XEMBL module is not usable. Skipping tests.\n",1);
+      }
+      $error = 1;
+	}
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+END {
+	foreach ( $Test::ntest..NUMTESTS) {
+		skip('Cannot run XEMBL_DB tests',1);
+	}
+}
+
+require Bio::DB::XEMBL;
+
+my $testnum;
+my $verbose = 1;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+my ($db,$seq,$seqio);
+# get a single seq
+
+$seq = $seqio = undef;
+eval {
+ok defined($db = new Bio::DB::XEMBL(-verbose=>$verbose)); 
+ok(defined($seq = $db->get_Seq_by_acc('J00522')));
+ok( $seq->length, 408);
+ok(defined($seq = $db->get_Seq_by_acc('J02231')));
+ok $seq->id, 'BUM';
+ok( $seq->length, 200); 
+ok(defined($seqio = $db->get_Stream_by_batch(['BUM'])));
+undef $db; # testing to see if we can remove gb
+ok( defined($seq = $seqio->next_seq()));
+ok( $seq->length, 200);
+};
+if( $@ ) { 
+  skip('Skip server may be down',1);
+  exit(0);
+}
+
+$seq = $seqio = undef;
+
+eval {
+    $db = new Bio::DB::XEMBL(-verbose => $verbose,
+			    -retrievaltype => 'tempfile',
+			    -format => 'fasta'
+			    ); 
+    ok( defined($seqio = $db->get_Stream_by_batch(['J00522 AF303112 J02231'])));
+    ok($seqio->next_seq->length, 408);
+    ok($seqio->next_seq->length, 1611);
+    ok($seqio->next_seq->length, 200);
+};
+
+if ($@) {
+    warn "Batch access test failed.\nError: $@\n";
+    foreach ( $Test::ntest..NUMTESTS ) { skip('no network access',1); }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/abi.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/abi.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/abi.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+# -*-Perl-*-
+# $Id: abi.t,v 1.4 2005/09/17 02:11:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 3;
+	$error = 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	# SeqIO modules abi.pm, ctf.pm, exp.pm, pln.pm, ztr.pm
+	# all require Bio::SeqIO::staden::read, part of bioperl-ext
+	eval {
+		require Bio::SeqIO::staden::read;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Bio::SeqIO::staden::read from bioperl-ext is not installed or is installed incorrectly - skipping abi.t tests\n";
+   }
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+END { 
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the abi tests',1);
+   }
+}
+
+exit(0) if ( $error == 1 );
+
+use Bio::SeqIO::abi;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'abi',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 (qw(t data readtest.abi) ));
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "GCNTATGACGTGGATTNCGAATTCTNNNNNCGGTAGNNGAAAATCCCCGGNCAAGNTTNNCCCTGCAAANGGAANAANNTGGCCGAGCGCTACGGGCTGATCTGGGTGTGCCTGTTTCCCCCGGCCGGGGGGAGNGATGCAGGACATCCAAGTATCCCGCCNATGGNGGGCTGAGGACGAGGACGGCTTCCATCAGATCAGTGTGCCCGGNCTTCGACATCGGCGGCAGCGCCGCGCGCCAACTGGAAGGCTTCATCGACGTGNAGCATTTTGNCTTCNTGCGCACCGCTACCTTCACCCANCCGGACAAGCGCNAANTGCNGNCCTACACCACCACNGAAACACCGACCGGNTTNAATGCCGATTACCTGAGNNGCGTGGCAAATTATTCGGNGGACNTGCCGCTGNCGGACGTGGACCCGAACTTCCAATGGCTGCGTCATTNCTAGGTGAATCTGCCTTTCACCGCCACGCTCACCATCCACTTCCCGGTGCCGGGCAAGCGGTTGGTGATNATGAATGCCGCCAGACCGGTGTCCAAGCACACCANCCGCCTGNTGGTGCCGATCGNCCGCTAATTTCGACACCCATCTGCCNGNGGGAAGACGTACATGNGTTCAACCTTGCACNTNGTTCNAAAAAAACCNTGCCATGGTGGNAANCGCAAGCGGNCCGGAAATATCNGCCGGNTTGACCCGCNTGNTTGGAAAGTGCATATTCCCCNCCGATNCNCAATTTCGAT");
+

Added: trunk/packages/bioperl/branches/upstream/current/t/ace.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ace.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ace.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,84 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for Modules
+# $Id: ace.t,v 1.1 2005/08/28 03:41:56 bosborne Exp $
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+
+	use Test;
+	plan tests => 7;
+}
+
+if( $error == 1 ) {
+	exit(0);
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $t_file = Bio::Root::IO->catfile("t","data","test.ace");
+my( $before );
+{
+	local $/ = undef;
+	local *BEFORE;
+	open BEFORE, $t_file;
+	$before = <BEFORE>;
+	close BEFORE;
+}
+
+my $a_in = Bio::SeqIO->new( -FILE => $t_file,
+									 -verbose => $verbose,
+									 -FORMAT => 'ace');
+my( @a_seq );
+while (my $a = $a_in->next_seq) {
+	push(@a_seq, $a);
+}
+
+ok @a_seq, 3, 'wrong number of sequence objects';
+
+my $esc_name = $a_seq[1]->display_id;
+ok( $esc_name , 'Name; 4% strewn with \ various / escaped characters',
+	 "bad unescaping of characters, $esc_name");
+
+ok $a_seq[0]->alphabet, 'protein', 'alphabets incorrectly detected';
+ok $a_seq[1]->alphabet, 'dna', 'alphabets incorrectly detected';
+
+my $o_file = Bio::Root::IO->catfile("t","data","test.out.ace");
+my $a_out = Bio::SeqIO->new(-FILE => "> $o_file",
+									 -verbose => $verbose,
+									 -FORMAT => 'ace');
+my $a_out_ok = 1;
+foreach my $a (@a_seq) {
+	$a_out->write_seq($a) or $a_out_ok = 0;
+}
+undef($a_out);  # Flush to disk
+ok $a_out_ok,1,'error writing sequence';
+
+my( $after );
+{
+	local $/ = undef;
+	local *AFTER;
+	open AFTER, $o_file;
+	$after = <AFTER>;
+	close AFTER;
+}
+unlink($o_file);
+
+ok( ($before and $after and ($before eq $after)),1,
+	 'test output file differs from input');
+

Added: trunk/packages/bioperl/branches/upstream/current/t/alignUtilities.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/alignUtilities.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/alignUtilities.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: alignUtilities.t,v 1.2 2006/06/07 20:26:32 jason Exp $
+use strict;
+use constant NUMTESTS => 9;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	plan tests => NUMTESTS;
+}
+
+
+use Bio::Align::Utilities qw(:all);
+ok(1);
+use Bio::SimpleAlign;
+use Bio::PrimarySeq;
+use Bio::LocatableSeq;
+
+use Bio::AlignIO;
+
+
+# hand crafting the simple input data
+use Data::Dumper;
+
+my $aa_align = new Bio::SimpleAlign;
+$aa_align->add_seq(Bio::LocatableSeq->new(-id => "n1", -seq => "MLIDVG-MLVLR"));
+$aa_align->add_seq(Bio::LocatableSeq->new(-id => "n2", -seq => "MLIDVRTPLALR"));
+$aa_align->add_seq(Bio::LocatableSeq->new(-id => "n3", -seq => "MLI-VR-SLALR"));
+
+my %dnaseqs = ();
+$dnaseqs{'n1'} = Bio::PrimarySeq->new(-id => "n1", -seq => 'atgctgatagacgtaggcatgctagtactgaga');
+$dnaseqs{'n2'} = Bio::PrimarySeq->new(-id => "n2", -seq => 'atgctgatcgacgtacgcaccccgctagcactcaga');
+$dnaseqs{'n3'} = Bio::PrimarySeq->new(-id => "n3", -seq => 'atgttgattgtacgctcgcttgcacttaga');
+my $dna_aln;
+
+ok( $dna_aln = &aa_to_dna_aln($aa_align, \%dnaseqs));
+if( $DEBUG ) {
+    Bio::AlignIO->new(-format=>'clustalw')->write_aln($dna_aln);
+  }
+#print Dumper $dna_aln;
+
+ok $dna_aln->length, 36;
+ok $dna_aln->no_residues, 99;
+ok $dna_aln->no_sequences, 3;
+ok $dna_aln->consensus_string(50), "atgctgat?gacgtacgc????cgctagcact?aga";
+
+$dna_aln->verbose(-1);
+my $replicates;
+ok $replicates = &bootstrap_replicates($dna_aln,3);
+
+ok scalar @$replicates, 3;
+my $repl_aln = pop @$replicates;
+ok $repl_aln->no_sequences, 3;
+
+
+#use IO::String;
+#use Bio::AlignIO;
+#my $string;
+#my $out = IO::String->new($string);
+#
+#my $strout = Bio::AlignIO->new(-fh   => $out,'-format' => 'pfam');
+#$strout->write_aln($repl_aln);
+#ok $string, "";

Added: trunk/packages/bioperl/branches/upstream/current/t/asciitree.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/asciitree.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/asciitree.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,46 @@
+# -*-Perl-*-
+# $Id: asciitree.t,v 1.1.6.1 2006/10/16 17:08:15 sendu Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS $out_file);
+
+BEGIN {
+	$NUMTESTS = 2;
+	use File::Spec;
+	$out_file = File::Spec->catfile(qw(t data tmp-asciitree));
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO::asciitree;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+
+ok(1);
+
+# asciitree is a write-only format
+my $in = Bio::SeqIO->new(-format => 'genbank',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 qw(t data AE003644_Adh-genomic.gb) );
+my $seq = $in->next_seq;
+
+my $out = Bio::SeqIO->new(-file => ">$out_file",
+								  -verbose => $verbose,
+								  -format => 'asciitree');
+$out->write_seq($seq);
+ok (-e $out_file);
+
+END {
+	unlink $out_file if -e $out_file;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/bsml_sax.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/bsml_sax.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/bsml_sax.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for Modules
+# $Id: bsml_sax.t,v 1.2.6.1 2006/11/30 09:24:00 sendu Exp $
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 16;
+	$error = 0;
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if ( $@ ) {
+		use lib 't\lib';
+    }
+    use Test::More;
+	
+	eval {
+		require XML::SAX;
+		require XML::SAX::Writer;
+		require XML::SAX::Base;
+	};
+	if ($@) {
+		plan skip_all => 'XML::SAX::Base or XML::SAX or XML::SAX::Writer not found - skipping bsml_sax tests';
+	}
+	else {
+		plan tests => $NUMTESTS;
+	}
+}
+
+use_ok('Bio::SeqIO');
+use_ok('Bio::Root::IO');
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+
+my $str = Bio::SeqIO->new(-format => 'bsml_sax',
+			  -verbose => $verbose,
+			  -file => Bio::Root::IO->catfile
+			  (qw(t data U83300.bsml) ));
+my $seq = $str->next_seq;
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+my @refs = $seq->annotation->get_Annotations('reference');
+is(@refs, 2);
+is($seq->display_id,'MIVN83300');
+is($seq->molecule ,'dna');
+ok(! $seq->is_circular);
+is($seq->get_dates,2);
+is($seq->accession_number, 'U83300');
+is($seq->seq_version,1);
+my @feats = $seq->get_SeqFeatures;
+is(@feats, 2);
+is($feats[1]->start, 1);
+is($feats[1]->end, 946);
+is($feats[1]->get_tag_values('db_xref'), 3);
+is($seq->annotation->get_Annotations('reference'),2);
+is($seq->annotation->get_Annotations('dblink'),2);

Added: trunk/packages/bioperl/branches/upstream/current/t/chaosxml.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/chaosxml.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/chaosxml.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,65 @@
+# -*-Perl-*-
+# $Id: chaosxml.t,v 1.3.4.2 2006/11/08 17:25:55 sendu Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS $out_file);
+BEGIN {
+	$NUMTESTS = 2;
+	$error = 0;
+	use File::Spec;
+	$out_file = File::Spec->catfile(qw(t data tmp-chaosxml));
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	#
+	eval {
+		require Data::Stag;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Data::Stag::XMLWriter not installed, cannot perform chaosxml tests\n";
+   } 
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+END { 
+   foreach ( $Test::ntest..$NUMTESTS) {
+      skip('Unable to run all of the chaosxml tests',1);
+   }
+}
+
+
+if ( $error == 1 ) {
+  exit(0);
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+# currently chaosxml is write-only
+my $in = Bio::SeqIO->new(-format => 'genbank',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 qw(t data AE003644_Adh-genomic.gb) );
+
+my $seq = $in->next_seq;
+
+my $out = Bio::SeqIO->new(-file => ">$out_file",
+								  -verbose => $verbose,
+								  -format => 'chaosxml');
+$out->write_seq($seq);
+ok (-e $out_file);
+
+END {
+	unlink $out_file if -e $out_file;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/cigarstring.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/cigarstring.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/cigarstring.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,39 @@
+# -*-Perl-*-
+# $Id: cigarstring.t,v 1.2.6.1 2006/10/16 17:08:15 sendu Exp $
+# written by Juguang Xiao
+
+use strict;
+
+BEGIN {
+    use lib 't';
+    use Test;
+    plan tests => 3;
+}
+
+# END { }
+
+use Bio::SearchIO;
+
+my $searchio = new Bio::SearchIO(
+	 -format => 'blast',
+    -file => File::Spec->catfile(qw(t data blast.report))
+);
+
+my @hsps = ();
+while(my $result = $searchio->next_result){
+    while(my $hit = $result->next_hit){
+        while(my $hsp = $hit->next_hsp){
+            push @hsps, $hsp;
+        }
+    }
+}
+
+my $first_hsp = shift @hsps;
+my $first_hsp_cigar_string = '504M'; 
+ok $first_hsp->cigar_string, $first_hsp_cigar_string;
+ok $first_hsp->cigar_string, $first_hsp_cigar_string; # fetch from hash
+
+my $second_hsp = $hsps[0];
+my $second_hsp_cigar_string = '29M18I22M11I20MD33M4I22M3I25M5I21MI33MD14M';
+ok $second_hsp->cigar_string, $second_hsp_cigar_string;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/consed.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/consed.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/consed.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,133 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: consed.t,v 1.5 2002/10/30 14:21:59 heikki Exp $
+#
+#####
+#
+# this script simply tests parsing ace* files
+# - it cares nothing about the chromat_dir,phd_dir,edit_dir types of things
+#
+#####
+
+use strict;
+use vars qw($TESTCOUNT);
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 16;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Root::IO;
+use Bio::Tools::Alignment::Consed;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || -1;
+
+if( $^O =~ /mswin/i ) {
+    for ( $Test::ntest..$TESTCOUNT ) {
+	skip("Cannot run consed module on windows",1,1);	
+    }
+    exit(0);
+}
+print("Checking if the Bio::Tools::Alignment::Consed module could be used...\n") if $DEBUG > 0;
+	# test 1
+ok(1);
+
+	# scope some variables
+my($o_consed, at singlets, at singletons, at pairs, at doublets, at multiplets,$invoker);
+
+# instantiate a new object
+my $passed_in_acefile = Bio::Root::IO->catfile("t","data","acefile.ace.1");
+$o_consed = Bio::Tools::Alignment::Consed->new(-acefile => $passed_in_acefile);
+print("Checking if a new CSM::Consed object was created...\n") if( $DEBUG > 0);
+ok defined $o_consed;
+
+	# set the verbosity to a valid value (1)
+ok my $verbosity = $o_consed->verbose(1);
+
+# set the verbosity to "none"
+$o_consed->verbose(0);
+#
+print("Checking if the new object is a reference to a Bio::Tools::Alignment::Consed object...\n") if($DEBUG > 0);
+# test 3
+ok( ref($o_consed),'Bio::Tools::Alignment::Consed');
+
+print("Checking if singlets can be successfully set...\n"), if( $DEBUG > 0);
+# test 4
+ok ($o_consed->set_singlets() != 1);
+
+print("Checking if the number of singlets can be retrieved and if that number is correct (65)...\n") if($DEBUG > 0);	
+ at singlets = $o_consed->get_singlets();
+ok (scalar(@singlets), 65);
+
+print("Checking if the doublets can be set...\n"), if( $DEBUG> 0);
+ok ($o_consed->set_doublets() != 1);
+
+print("Checking if the doublets can be retreived...\n") if($DEBUG > 0);
+ok @doublets = $o_consed->get_doublets();
+
+print(scalar(@doublets)." doublets were found\n") if ($DEBUG > 0);
+print("Checking if the number of doublets can be retrieved and if that number is correct (45)...\n") if($DEBUG > 0);
+ok (scalar(@doublets), 45);
+
+print("Checking if the number of pairs can be retrieved and if that number is correct (1)...\n") if($DEBUG > 0);
+ at pairs = $o_consed->get_pairs();
+ok (scalar(@pairs),1);
+
+print("Checking if the number of multiplets can be retrieved and if that number is correct (4)...\n") if($DEBUG > 0);
+ at multiplets = $o_consed->get_multiplets();
+ok (scalar(@multiplets), 4);
+
+print("Checking if the number of singletons can be retrieved and if that number is correct (3)...\n") if($DEBUG > 0);
+ at singletons = $o_consed->get_singletons();
+ok (scalar(@singletons), 3);
+my($total_object_sequences, $total_grep_sequences);
+print("Finding out, via grep, how many sequences there are in the acefile _and_ in the singlets file...\n") if $DEBUG > 0; 
+ok($total_grep_sequences = $o_consed->count_sequences_with_grep(), 179);
+
+print("Getting the statistics from the Bio::Tools::Alignment::Consed object to compare the total number of sequences accounted for there to the number of sequences found via grep...\n") if($DEBUG > 0);
+ok($total_object_sequences = $o_consed->sum_lets("total_only"),179);
+print("Match?\n") if($DEBUG > 0) ;
+ok ($total_object_sequences, $total_grep_sequences);
+
+print("These are the statistics. Look right? ".$o_consed->sum_lets()."\n") if($DEBUG > 0);
+ok($o_consed->sum_lets(),'Singt/singn/doub/pair/mult/total : 65,3,45(90),1(2),4(19),179');
+
+print("Dumping out the hash in a compact way...\n")if($DEBUG > 0)  ;
+$o_consed->dump_hash_compact() if($DEBUG > 0)  ;
+
+# print("Dumping out the hash in an ugly way...\n");
+# $o_consed->dump_hash();
+
+sub allele_script {
+	my($a,$trunc,$rev);
+	ok defined $a,
+	ok ref($a), 'Bio::Variation::Allele';
+	
+	ok $a->accession_number(), 'X677667';
+	ok $a->seq(), 'ACTGACTGACTG';
+	ok $a->display_id(),'new-id' ;
+	ok $a->desc, 'Sample Bio::Seq object';
+	ok $a->moltype(), 'dna';
+
+	ok defined($trunc = $a->trunc(1,4));
+	ok $trunc->seq(), 'ACTG', "Expecting ACTG. Got ". $trunc->seq();
+
+	ok defined($rev = $a->revcom());
+	ok $rev->seq(), 'CAGTCAGTCAGT';
+
+	$a->is_reference(1);
+	ok $a->is_reference;
+
+	$a->repeat_unit('ACTG');
+	ok $a->repeat_unit, 'ACTG';
+	
+	$a->repeat_count(3);
+	ok $a->repeat_count, 3;
+}


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/consed.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/ctf.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ctf.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ctf.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+# -*-Perl-*-
+# $Id: ctf.t,v 1.3 2005/09/17 02:11:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 3;
+	$error = 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	# SeqIO modules abi.pm, ctf.pm, exp.pm, pln.pm, ztr.pm
+	# all require Bio::SeqIO::staden::read, part of bioperl-ext
+	eval {
+		require Bio::SeqIO::staden::read;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Bio::SeqIO::staden::read of bioperl-ext is not installed or is installed incorrectly - skipping ctf.t tests\n";
+   }
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+END { 
+    foreach ( $Test::ntest..$NUMTESTS) {
+		 skip('Unable to run all of the ctf tests',1);
+	 }
+}
+
+exit(0) if ( $error == 1 );
+
+use Bio::SeqIO::ctf;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'ctf',
+			 -verbose => $verbose,
+			 -file => Bio::Root::IO->catfile
+			 (qw(t data readtest.ctf) ));
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "GATGATTCCGGCTTCGGACGACTCTAGAGGATCCCCATTTTTATAGTTTTTATCTTGTAATAGATGTTTAGATTTTTCGTTGTAATTATTTTCTTTATTGTTGAAATTAGTATCTCTGGGTAATTTATCATATTCTCTGGAAAATGATTTACTATCACTAGATACTTCATAAGATTTATAATCTTTATTATGAAAATCATCTCTATTTTTCAAATTATTATTATATCTATCAAAGTTTCTGTCTTCATTATATCTATTAGCATATCTATCTTTATCTTTATCCCTATCACTATATCTATCATATGGTTCATCTTGTTCAACCGATCAGACTCGATTCGCCATCGCCTCTAACGGATGGCCGCTCCCCCTCTCATACCTCGCTCCCCTCGACATCCCCCGTCTCGCCACCCTATCCGCCCCCTTCATCACCCCCCCTTATCCACACCCTCACCCCCCGCATCGCGCACCCACGACCACCCGAAGAACCGCCCTTACTCCCAAGTACGCCCCGACCTCCATCACCCTATGCGGTACCACTCCCACCACACCCAGTCCTACTTTCGCCCGCACATCGGCCCCGCTTCAGACAGCTCCCAACTACGCAACCCACGCTTGTTCTTGTTCACACTCGAATACTCGAATCTCTCATTACTCCGCGGACTCCGCCGCACCTGTGCACCATTAACTGTGTAGCGCCTGAACCGGCACCTCTGATTACCACTTCCTCCACCAGCACAGTCCTATTACCGCATGTCGCTCTGCTAAGACAGTGCAAGACTCTGCGGTCGCTCTGACCCGCATCCGCCAGGGCACCTCTCACCCTCGCTGGCCACCCCGCCCCCCTCTCCCTGCCCCTTCATTCCCCCAAACCGCTTTCAACGGGACACACCCCTCCGCGGCGGACCACAACTCGCCGTCGGCCACCACTCACACCTTCCCTCCTCCTTCCCCCACATCACGCCAACCCCGTGGGACGGCTCTCCCGCGGCTACGACGCGCAACCCCCCCTCGCCGCTTCCCCCCCAACTTCCCACGGGCTCCCCTCCGCCCCTTACCCGCGAGGAGCTTCACCCGCGAACCACCTCCCCCCTTTCCCAACAGCACCG");
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/1A11.pdb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/1A11.pdb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/1A11.pdb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4028 @@
+HEADER    ACETYLCHOLINE RECEPTOR                  19-DEC-97   1A11              
+TITLE     NMR STRUCTURE OF MEMBRANE SPANNING SEGMENT 2 OF THE                   
+TITLE    2 ACETYLCHOLINE RECEPTOR IN DPC MICELLES, 10 STRUCTURES                
+COMPND    MOL_ID: 1;                                                            
+COMPND   2 MOLECULE: ACETYLCHOLINE RECEPTOR M2;                                 
+COMPND   3 CHAIN: NULL;                                                         
+COMPND   4 SYNONYM: ACHR M2;                                                    
+COMPND   5 ENGINEERED: YES                                                      
+SOURCE    MOL_ID: 1;                                                            
+SOURCE   2 ORGANISM_SCIENTIFIC: RATTUS NORVEGICUS;                              
+SOURCE   3 ORGANISM_COMMON: RAT;                                                
+SOURCE   4 ORGAN: BRAIN;                                                        
+SOURCE   5 EXPRESSION_SYSTEM: ESCHERICHIA COLI;                                 
+SOURCE   6 EXPRESSION_SYSTEM_STRAIN: BL21;                                      
+SOURCE   7 EXPRESSION_SYSTEM_PLASMID: PGEX FUSION                               
+KEYWDS    ACETYLCHOLINE RECEPTOR, M2, MICELLE                                   
+EXPDTA    NMR, 10 STRUCTURES                                                    
+AUTHOR    J.J.GESELL,W.SUN,M.MONTAL,S.J.OPELLA                                  
+REVDAT   1   08-APR-98 1A11    0                                                
+JRNL        AUTH   S.J.OPELLA,J.GESELL,A.P.VALENTE,F.M.MARASSI,                 
+JRNL        AUTH 2 M.OBLATT-MONTAL,W.SUN,A.FERRER-MONTIEL,M.MONTAL              
+JRNL        TITL   STRUCTURAL STUDIES OF THE PORE-LINING SEGMENTS OF            
+JRNL        TITL 2 NEUROTRANSMITTER-GATED CHANNELS                              
+JRNL        REF    CHEMTRACTS: BIOCHEM.MOL.      V.  10   153 1997              
+JRNL        REF  2 BIOL.                                                        
+JRNL        REFN   ASTM CMBIE5  US ISSN 1045-2680                 2176          
+REMARK   1                                                                      
+REMARK   2                                                                      
+REMARK   2 RESOLUTION. NOT APPLICABLE.                                          
+REMARK   3                                                                      
+REMARK   3 REFINEMENT.                                                          
+REMARK   3   PROGRAM     : X-PLOR 3.1                                           
+REMARK   3   AUTHORS     : BRUNGER                                              
+REMARK   3                                                                      
+REMARK   3  OTHER REFINEMENT REMARKS: REFINEMENT DETAILS CAN BE                 
+REMARK   3  FOUND IN THE JRNL CITATION ABOVE.                                   
+REMARK   4                                                                      
+REMARK   4 1A11 COMPLIES WITH FORMAT V. 2.2, 16-DEC-1996                        
+REMARK 210                                                                      
+REMARK 210 EXPERIMENTAL DETAILS                                                 
+REMARK 210  EXPERIMENT TYPE                : NMR                                
+REMARK 210  TEMPERATURE           (KELVIN) : 313                                
+REMARK 210  PH                             : 5.5                                
+REMARK 210                                                                      
+REMARK 210  NMR EXPERIMENTS CONDUCTED      : HMQC-NOESY, HMQC-TOCSY,            
+REMARK 210   HNCA, HNCOCA, HNHA                                                 
+REMARK 210  SPECTROMETER FIELD STRENGTH    : 600, 750                           
+REMARK 210  SPECTROMETER MODEL             : DMX600, DMX750                     
+REMARK 210  SPECTROMETER MANUFACTURER      : BRUKER                             
+REMARK 210                                                                      
+REMARK 210  STRUCTURE DETERMINATION.                                            
+REMARK 210   SOFTWARE USED                 : X-PLOR                             
+REMARK 210   METHOD USED                   : DISTANCE GEOMETRY,                 
+REMARK 210   SIMULATED ANNEALING                                                
+REMARK 210                                                                      
+REMARK 210 CONFORMERS, NUMBER CALCULATED   : 30                                 
+REMARK 210 CONFORMERS, NUMBER SUBMITTED    : 10                                 
+REMARK 210 CONFORMERS, SELECTION CRITERIA  : LOWEST ENERGY                      
+REMARK 210                                                                      
+REMARK 210 REMARK:                                                              
+REMARK 210 IONIC_STRENGTH: 450MM DPC                                            
+REMARK 210 PRESSURE: 1 ATM                                                      
+REMARK 210 SOLVENT SYSTEM: WATER                                                
+REMARK 210                                                                      
+REMARK 210 HNCA AND HNCOCA WERE USED TO OBTAIN BACKBONE RESONANCE               
+REMARK 210 ASSIGNMENTS.  15N AND 13C EDITED HMQC-NOESY SPECTRA WERE             
+REMARK 210 USED TO TABULATE NOE RESTRAINTS.  AN HNHA SPECTRUM WAS               
+REMARK 210 TO OBTAIN HNCAH COUPLING CONSTANTS.                                  
+REMARK 215                                                                      
+REMARK 215 NMR STUDY                                                            
+REMARK 215 THE COORDINATES IN THIS ENTRY WERE GENERATED FROM SOLUTION           
+REMARK 215 NMR DATA.  PROTEIN DATA BANK CONVENTIONS REQUIRE THAT                
+REMARK 215 CRYST1 AND SCALE RECORDS BE INCLUDED, BUT THE VALUES ON              
+REMARK 215 THESE RECORDS ARE MEANINGLESS.                                       
+REMARK 999                                                                      
+REMARK 999 SEQUENCE                                                             
+REMARK 999 1A11       SWS     P25110       1 -   273 NOT IN ATOMS LIST          
+REMARK 999 1A11       SWS     P25110     299 -   517 NOT IN ATOMS LIST          
+DBREF  1A11      1    25  SWS    P25110   ACHD_RAT       274    298             
+SEQADV 1A11 GLY      1  SWS  P25110    CYS   274 CONFLICT                       
+SEQADV 1A11 SER      2  SWS  P25110    GLY   275 CONFLICT                       
+SEQADV 1A11 MET      5  SWS  P25110    THR   278 CONFLICT                       
+SEQADV 1A11 THR      7  SWS  P25110    VAL   280 CONFLICT                       
+SEQADV 1A11 ALA     16  SWS  P25110    SER   289 CONFLICT                       
+SEQADV 1A11 THR     22  SWS  P25110    ILE   295 CONFLICT                       
+SEQADV 1A11 GLN     24  SWS  P25110    LYS   297 CONFLICT                       
+SEQRES   1     25  GLY SER GLU LYS MET SER THR ALA ILE SER VAL LEU LEU          
+SEQRES   2     25  ALA GLN ALA VAL PHE LEU LEU LEU THR SER GLN ARG              
+HELIX    1   1 SER      2  SER     23  1                                  22    
+CRYST1    1.000    1.000    1.000  90.00  90.00  90.00 P 1           1          
+ORIGX1      1.000000  0.000000  0.000000        0.00000                         
+ORIGX2      0.000000  1.000000  0.000000        0.00000                         
+ORIGX3      0.000000  0.000000  1.000000        0.00000                         
+SCALE1      1.000000  0.000000  0.000000        0.00000                         
+SCALE2      0.000000  1.000000  0.000000        0.00000                         
+SCALE3      0.000000  0.000000  1.000000        0.00000                         
+MODEL        1                                                                  
+ATOM      1  N   GLY     1      15.933   2.572  -9.465  1.00  0.00           N  
+ATOM      2  CA  GLY     1      15.248   3.793  -8.849  1.00  0.00           C  
+ATOM      3  C   GLY     1      14.402   4.670  -9.708  1.00  0.00           C  
+ATOM      4  O   GLY     1      13.527   4.207 -10.413  1.00  0.00           O  
+ATOM      5 1H   GLY     1      16.550   2.876 -10.244  1.00  0.00           H  
+ATOM      6 2H   GLY     1      16.502   2.091  -8.738  1.00  0.00           H  
+ATOM      7 3H   GLY     1      15.212   1.917  -9.829  1.00  0.00           H  
+ATOM      8 1HA  GLY     1      14.396   3.552  -8.438  1.00  0.00           H  
+ATOM      9 2HA  GLY     1      16.004   4.260  -8.092  1.00  0.00           H  
+ATOM     10  N   SER     2      14.637   5.953  -9.674  1.00  0.00           N  
+ATOM     11  CA  SER     2      13.825   6.883 -10.510  1.00  0.00           C  
+ATOM     12  C   SER     2      12.385   6.909  -9.995  1.00  0.00           C  
+ATOM     13  O   SER     2      12.011   6.147  -9.125  1.00  0.00           O  
+ATOM     14  CB  SER     2      13.840   6.409 -11.963  1.00  0.00           C  
+ATOM     15  OG  SER     2      13.830   7.539 -12.826  1.00  0.00           O  
+ATOM     16  H   SER     2      15.348   6.305  -9.099  1.00  0.00           H  
+ATOM     17  HA  SER     2      14.245   7.878 -10.452  1.00  0.00           H  
+ATOM     18 1HB  SER     2      14.731   5.831 -12.147  1.00  0.00           H  
+ATOM     19 2HB  SER     2      12.970   5.794 -12.150  1.00  0.00           H  
+ATOM     20  HG  SER     2      13.081   7.454 -13.419  1.00  0.00           H  
+ATOM     21  N   GLU     3      11.572   7.782 -10.525  1.00  0.00           N  
+ATOM     22  CA  GLU     3      10.156   7.859 -10.065  1.00  0.00           C  
+ATOM     23  C   GLU     3       9.466   6.514 -10.300  1.00  0.00           C  
+ATOM     24  O   GLU     3       8.448   6.217  -9.708  1.00  0.00           O  
+ATOM     25  CB  GLU     3       9.424   8.950 -10.848  1.00  0.00           C  
+ATOM     26  CG  GLU     3       9.409   8.589 -12.334  1.00  0.00           C  
+ATOM     27  CD  GLU     3       9.028   9.823 -13.154  1.00  0.00           C  
+ATOM     28  OE1 GLU     3       7.864  10.186 -13.137  1.00  0.00           O  
+ATOM     29  OE2 GLU     3       9.909  10.385 -13.784  1.00  0.00           O  
+ATOM     30  H   GLU     3      11.893   8.388 -11.225  1.00  0.00           H  
+ATOM     31  HA  GLU     3      10.133   8.095  -9.011  1.00  0.00           H  
+ATOM     32 1HB  GLU     3       8.409   9.033 -10.486  1.00  0.00           H  
+ATOM     33 2HB  GLU     3       9.932   9.893 -10.713  1.00  0.00           H  
+ATOM     34 1HG  GLU     3      10.390   8.247 -12.630  1.00  0.00           H  
+ATOM     35 2HG  GLU     3       8.686   7.807 -12.507  1.00  0.00           H  
+ATOM     36  N   LYS     4      10.011   5.698 -11.161  1.00  0.00           N  
+ATOM     37  CA  LYS     4       9.385   4.373 -11.430  1.00  0.00           C  
+ATOM     38  C   LYS     4       9.476   3.506 -10.174  1.00  0.00           C  
+ATOM     39  O   LYS     4       8.477   3.109  -9.607  1.00  0.00           O  
+ATOM     40  CB  LYS     4      10.116   3.686 -12.585  1.00  0.00           C  
+ATOM     41  CG  LYS     4       9.392   3.987 -13.898  1.00  0.00           C  
+ATOM     42  CD  LYS     4       9.734   5.407 -14.356  1.00  0.00           C  
+ATOM     43  CE  LYS     4       9.186   5.637 -15.765  1.00  0.00           C  
+ATOM     44  NZ  LYS     4      10.318   5.708 -16.733  1.00  0.00           N  
+ATOM     45  H   LYS     4      10.833   5.956 -11.628  1.00  0.00           H  
+ATOM     46  HA  LYS     4       8.347   4.514 -11.694  1.00  0.00           H  
+ATOM     47 1HB  LYS     4      11.131   4.055 -12.639  1.00  0.00           H  
+ATOM     48 2HB  LYS     4      10.129   2.619 -12.419  1.00  0.00           H  
+ATOM     49 1HG  LYS     4       9.704   3.279 -14.652  1.00  0.00           H  
+ATOM     50 2HG  LYS     4       8.326   3.906 -13.748  1.00  0.00           H  
+ATOM     51 1HD  LYS     4       9.290   6.121 -13.676  1.00  0.00           H  
+ATOM     52 2HD  LYS     4      10.806   5.535 -14.363  1.00  0.00           H  
+ATOM     53 1HE  LYS     4       8.531   4.821 -16.036  1.00  0.00           H  
+ATOM     54 2HE  LYS     4       8.633   6.564 -15.790  1.00  0.00           H  
+ATOM     55 1HZ  LYS     4      11.214   5.546 -16.232  1.00  0.00           H  
+ATOM     56 2HZ  LYS     4      10.195   4.979 -17.464  1.00  0.00           H  
+ATOM     57 3HZ  LYS     4      10.333   6.648 -17.179  1.00  0.00           H  
+ATOM     58  N   MET     5      10.667   3.212  -9.730  1.00  0.00           N  
+ATOM     59  CA  MET     5      10.817   2.375  -8.507  1.00  0.00           C  
+ATOM     60  C   MET     5      10.231   3.125  -7.310  1.00  0.00           C  
+ATOM     61  O   MET     5       9.942   2.546  -6.281  1.00  0.00           O  
+ATOM     62  CB  MET     5      12.300   2.093  -8.259  1.00  0.00           C  
+ATOM     63  CG  MET     5      12.474   0.643  -7.803  1.00  0.00           C  
+ATOM     64  SD  MET     5      13.033   0.615  -6.081  1.00  0.00           S  
+ATOM     65  CE  MET     5      14.531  -0.366  -6.347  1.00  0.00           C  
+ATOM     66  H   MET     5      11.461   3.544 -10.198  1.00  0.00           H  
+ATOM     67  HA  MET     5      10.290   1.442  -8.642  1.00  0.00           H  
+ATOM     68 1HB  MET     5      12.854   2.254  -9.171  1.00  0.00           H  
+ATOM     69 2HB  MET     5      12.669   2.757  -7.491  1.00  0.00           H  
+ATOM     70 1HG  MET     5      11.531   0.125  -7.883  1.00  0.00           H  
+ATOM     71 2HG  MET     5      13.208   0.155  -8.428  1.00  0.00           H  
+ATOM     72 1HE  MET     5      14.648  -0.564  -7.404  1.00  0.00           H  
+ATOM     73 2HE  MET     5      15.389   0.181  -5.982  1.00  0.00           H  
+ATOM     74 3HE  MET     5      14.450  -1.300  -5.816  1.00  0.00           H  
+ATOM     75  N   SER     6      10.055   4.411  -7.436  1.00  0.00           N  
+ATOM     76  CA  SER     6       9.488   5.201  -6.309  1.00  0.00           C  
+ATOM     77  C   SER     6       7.971   4.999  -6.259  1.00  0.00           C  
+ATOM     78  O   SER     6       7.371   4.988  -5.202  1.00  0.00           O  
+ATOM     79  CB  SER     6       9.798   6.683  -6.522  1.00  0.00           C  
+ATOM     80  OG  SER     6       9.626   7.380  -5.295  1.00  0.00           O  
+ATOM     81  H   SER     6      10.295   4.858  -8.276  1.00  0.00           H  
+ATOM     82  HA  SER     6       9.926   4.870  -5.380  1.00  0.00           H  
+ATOM     83 1HB  SER     6      10.818   6.795  -6.853  1.00  0.00           H  
+ATOM     84 2HB  SER     6       9.132   7.084  -7.274  1.00  0.00           H  
+ATOM     85  HG  SER     6      10.254   7.025  -4.662  1.00  0.00           H  
+ATOM     86  N   THR     7       7.349   4.835  -7.394  1.00  0.00           N  
+ATOM     87  CA  THR     7       5.873   4.632  -7.414  1.00  0.00           C  
+ATOM     88  C   THR     7       5.562   3.154  -7.171  1.00  0.00           C  
+ATOM     89  O   THR     7       4.458   2.792  -6.815  1.00  0.00           O  
+ATOM     90  CB  THR     7       5.317   5.058  -8.775  1.00  0.00           C  
+ATOM     91  OG1 THR     7       6.248   4.712  -9.792  1.00  0.00           O  
+ATOM     92  CG2 THR     7       5.087   6.570  -8.782  1.00  0.00           C  
+ATOM     93  H   THR     7       7.852   4.843  -8.235  1.00  0.00           H  
+ATOM     94  HA  THR     7       5.417   5.229  -6.636  1.00  0.00           H  
+ATOM     95  HB  THR     7       4.381   4.554  -8.956  1.00  0.00           H  
+ATOM     96  HG1 THR     7       5.969   5.138 -10.606  1.00  0.00           H  
+ATOM     97 1HG2 THR     7       4.638   6.870  -7.847  1.00  0.00           H  
+ATOM     98 2HG2 THR     7       6.031   7.077  -8.909  1.00  0.00           H  
+ATOM     99 3HG2 THR     7       4.427   6.829  -9.598  1.00  0.00           H  
+ATOM    100  N   ALA     8       6.530   2.298  -7.352  1.00  0.00           N  
+ATOM    101  CA  ALA     8       6.294   0.848  -7.125  1.00  0.00           C  
+ATOM    102  C   ALA     8       6.505   0.543  -5.645  1.00  0.00           C  
+ATOM    103  O   ALA     8       5.770  -0.210  -5.037  1.00  0.00           O  
+ATOM    104  CB  ALA     8       7.279   0.031  -7.962  1.00  0.00           C  
+ATOM    105  H   ALA     8       7.416   2.609  -7.630  1.00  0.00           H  
+ATOM    106  HA  ALA     8       5.282   0.597  -7.404  1.00  0.00           H  
+ATOM    107 1HB  ALA     8       8.123   0.649  -8.228  1.00  0.00           H  
+ATOM    108 2HB  ALA     8       7.619  -0.818  -7.390  1.00  0.00           H  
+ATOM    109 3HB  ALA     8       6.789  -0.313  -8.861  1.00  0.00           H  
+ATOM    110  N   ILE     9       7.510   1.131  -5.059  1.00  0.00           N  
+ATOM    111  CA  ILE     9       7.779   0.884  -3.620  1.00  0.00           C  
+ATOM    112  C   ILE     9       6.757   1.641  -2.774  1.00  0.00           C  
+ATOM    113  O   ILE     9       6.393   1.212  -1.701  1.00  0.00           O  
+ATOM    114  CB  ILE     9       9.190   1.359  -3.268  1.00  0.00           C  
+ATOM    115  CG1 ILE     9       9.314   2.855  -3.562  1.00  0.00           C  
+ATOM    116  CG2 ILE     9      10.212   0.591  -4.109  1.00  0.00           C  
+ATOM    117  CD1 ILE     9       9.457   3.623  -2.247  1.00  0.00           C  
+ATOM    118  H   ILE     9       8.087   1.736  -5.570  1.00  0.00           H  
+ATOM    119  HA  ILE     9       7.693  -0.172  -3.420  1.00  0.00           H  
+ATOM    120  HB  ILE     9       9.379   1.178  -2.220  1.00  0.00           H  
+ATOM    121 1HG1 ILE     9      10.183   3.030  -4.179  1.00  0.00           H  
+ATOM    122 2HG1 ILE     9       8.431   3.195  -4.081  1.00  0.00           H  
+ATOM    123 1HG2 ILE     9       9.697   0.013  -4.862  1.00  0.00           H  
+ATOM    124 2HG2 ILE     9      10.883   1.289  -4.586  1.00  0.00           H  
+ATOM    125 3HG2 ILE     9      10.777  -0.073  -3.470  1.00  0.00           H  
+ATOM    126 1HD1 ILE     9      10.205   3.147  -1.631  1.00  0.00           H  
+ATOM    127 2HD1 ILE     9       9.757   4.640  -2.454  1.00  0.00           H  
+ATOM    128 3HD1 ILE     9       8.510   3.626  -1.726  1.00  0.00           H  
+ATOM    129  N   SER    10       6.283   2.761  -3.250  1.00  0.00           N  
+ATOM    130  CA  SER    10       5.278   3.530  -2.465  1.00  0.00           C  
+ATOM    131  C   SER    10       3.914   2.861  -2.617  1.00  0.00           C  
+ATOM    132  O   SER    10       3.209   2.640  -1.653  1.00  0.00           O  
+ATOM    133  CB  SER    10       5.206   4.965  -2.986  1.00  0.00           C  
+ATOM    134  OG  SER    10       5.884   5.827  -2.082  1.00  0.00           O  
+ATOM    135  H   SER    10       6.582   3.092  -4.123  1.00  0.00           H  
+ATOM    136  HA  SER    10       5.561   3.534  -1.423  1.00  0.00           H  
+ATOM    137 1HB  SER    10       5.676   5.025  -3.953  1.00  0.00           H  
+ATOM    138 2HB  SER    10       4.168   5.264  -3.073  1.00  0.00           H  
+ATOM    139  HG  SER    10       6.543   5.307  -1.615  1.00  0.00           H  
+ATOM    140  N   VAL    11       3.538   2.529  -3.822  1.00  0.00           N  
+ATOM    141  CA  VAL    11       2.224   1.866  -4.032  1.00  0.00           C  
+ATOM    142  C   VAL    11       2.216   0.535  -3.277  1.00  0.00           C  
+ATOM    143  O   VAL    11       1.182   0.048  -2.867  1.00  0.00           O  
+ATOM    144  CB  VAL    11       2.012   1.616  -5.526  1.00  0.00           C  
+ATOM    145  CG1 VAL    11       0.862   0.625  -5.727  1.00  0.00           C  
+ATOM    146  CG2 VAL    11       1.667   2.938  -6.217  1.00  0.00           C  
+ATOM    147  H   VAL    11       4.125   2.710  -4.586  1.00  0.00           H  
+ATOM    148  HA  VAL    11       1.435   2.499  -3.654  1.00  0.00           H  
+ATOM    149  HB  VAL    11       2.917   1.209  -5.956  1.00  0.00           H  
+ATOM    150 1HG1 VAL    11       0.068   0.850  -5.031  1.00  0.00           H  
+ATOM    151 2HG1 VAL    11       0.491   0.705  -6.737  1.00  0.00           H  
+ATOM    152 3HG1 VAL    11       1.219  -0.379  -5.552  1.00  0.00           H  
+ATOM    153 1HG2 VAL    11       1.797   3.752  -5.519  1.00  0.00           H  
+ATOM    154 2HG2 VAL    11       2.320   3.083  -7.065  1.00  0.00           H  
+ATOM    155 3HG2 VAL    11       0.641   2.912  -6.553  1.00  0.00           H  
+ATOM    156  N   LEU    12       3.367  -0.056  -3.086  1.00  0.00           N  
+ATOM    157  CA  LEU    12       3.423  -1.350  -2.354  1.00  0.00           C  
+ATOM    158  C   LEU    12       2.879  -1.157  -0.937  1.00  0.00           C  
+ATOM    159  O   LEU    12       2.063  -1.926  -0.466  1.00  0.00           O  
+ATOM    160  CB  LEU    12       4.872  -1.833  -2.286  1.00  0.00           C  
+ATOM    161  CG  LEU    12       5.136  -2.814  -3.429  1.00  0.00           C  
+ATOM    162  CD1 LEU    12       6.604  -3.243  -3.406  1.00  0.00           C  
+ATOM    163  CD2 LEU    12       4.242  -4.044  -3.259  1.00  0.00           C  
+ATOM    164  H   LEU    12       4.193   0.353  -3.423  1.00  0.00           H  
+ATOM    165  HA  LEU    12       2.823  -2.083  -2.872  1.00  0.00           H  
+ATOM    166 1HB  LEU    12       5.538  -0.987  -2.374  1.00  0.00           H  
+ATOM    167 2HB  LEU    12       5.042  -2.329  -1.342  1.00  0.00           H  
+ATOM    168  HG  LEU    12       4.917  -2.333  -4.372  1.00  0.00           H  
+ATOM    169 1HD1 LEU    12       6.943  -3.313  -2.383  1.00  0.00           H  
+ATOM    170 2HD1 LEU    12       6.704  -4.206  -3.886  1.00  0.00           H  
+ATOM    171 3HD1 LEU    12       7.201  -2.513  -3.934  1.00  0.00           H  
+ATOM    172 1HD2 LEU    12       4.200  -4.318  -2.216  1.00  0.00           H  
+ATOM    173 2HD2 LEU    12       3.246  -3.817  -3.612  1.00  0.00           H  
+ATOM    174 3HD2 LEU    12       4.647  -4.866  -3.832  1.00  0.00           H  
+ATOM    175  N   LEU    13       3.318  -0.134  -0.251  1.00  0.00           N  
+ATOM    176  CA  LEU    13       2.814   0.099   1.134  1.00  0.00           C  
+ATOM    177  C   LEU    13       1.360   0.568   1.069  1.00  0.00           C  
+ATOM    178  O   LEU    13       0.541   0.198   1.885  1.00  0.00           O  
+ATOM    179  CB  LEU    13       3.658   1.176   1.825  1.00  0.00           C  
+ATOM    180  CG  LEU    13       5.082   0.662   2.070  1.00  0.00           C  
+ATOM    181  CD1 LEU    13       5.045  -0.794   2.541  1.00  0.00           C  
+ATOM    182  CD2 LEU    13       5.881   0.759   0.771  1.00  0.00           C  
+ATOM    183  H   LEU    13       3.973   0.480  -0.646  1.00  0.00           H  
+ATOM    184  HA  LEU    13       2.869  -0.820   1.697  1.00  0.00           H  
+ATOM    185 1HB  LEU    13       3.699   2.055   1.197  1.00  0.00           H  
+ATOM    186 2HB  LEU    13       3.205   1.433   2.770  1.00  0.00           H  
+ATOM    187  HG  LEU    13       5.554   1.270   2.829  1.00  0.00           H  
+ATOM    188 1HD1 LEU    13       4.267  -0.915   3.280  1.00  0.00           H  
+ATOM    189 2HD1 LEU    13       4.844  -1.439   1.698  1.00  0.00           H  
+ATOM    190 3HD1 LEU    13       5.998  -1.056   2.975  1.00  0.00           H  
+ATOM    191 1HD2 LEU    13       5.409   1.473   0.114  1.00  0.00           H  
+ATOM    192 2HD2 LEU    13       6.888   1.083   0.991  1.00  0.00           H  
+ATOM    193 3HD2 LEU    13       5.909  -0.208   0.293  1.00  0.00           H  
+ATOM    194  N   ALA    14       1.037   1.386   0.104  1.00  0.00           N  
+ATOM    195  CA  ALA    14      -0.362   1.885  -0.014  1.00  0.00           C  
+ATOM    196  C   ALA    14      -1.338   0.718   0.143  1.00  0.00           C  
+ATOM    197  O   ALA    14      -2.271   0.777   0.920  1.00  0.00           O  
+ATOM    198  CB  ALA    14      -0.556   2.534  -1.386  1.00  0.00           C  
+ATOM    199  H   ALA    14       1.715   1.673  -0.541  1.00  0.00           H  
+ATOM    200  HA  ALA    14      -0.548   2.616   0.759  1.00  0.00           H  
+ATOM    201 1HB  ALA    14       0.300   2.321  -2.010  1.00  0.00           H  
+ATOM    202 2HB  ALA    14      -1.447   2.137  -1.850  1.00  0.00           H  
+ATOM    203 3HB  ALA    14      -0.658   3.602  -1.267  1.00  0.00           H  
+ATOM    204  N   GLN    15      -1.135  -0.341  -0.591  1.00  0.00           N  
+ATOM    205  CA  GLN    15      -2.055  -1.510  -0.487  1.00  0.00           C  
+ATOM    206  C   GLN    15      -1.875  -2.190   0.873  1.00  0.00           C  
+ATOM    207  O   GLN    15      -2.832  -2.497   1.556  1.00  0.00           O  
+ATOM    208  CB  GLN    15      -1.731  -2.509  -1.601  1.00  0.00           C  
+ATOM    209  CG  GLN    15      -2.937  -2.645  -2.533  1.00  0.00           C  
+ATOM    210  CD  GLN    15      -3.686  -3.940  -2.215  1.00  0.00           C  
+ATOM    211  OE1 GLN    15      -4.235  -4.089  -1.142  1.00  0.00           O  
+ATOM    212  NE2 GLN    15      -3.730  -4.890  -3.109  1.00  0.00           N  
+ATOM    213  H   GLN    15      -0.379  -0.368  -1.213  1.00  0.00           H  
+ATOM    214  HA  GLN    15      -3.076  -1.175  -0.591  1.00  0.00           H  
+ATOM    215 1HB  GLN    15      -0.879  -2.157  -2.164  1.00  0.00           H  
+ATOM    216 2HB  GLN    15      -1.505  -3.471  -1.168  1.00  0.00           H  
+ATOM    217 1HG  GLN    15      -3.598  -1.802  -2.392  1.00  0.00           H  
+ATOM    218 2HG  GLN    15      -2.598  -2.669  -3.558  1.00  0.00           H  
+ATOM    219 1HE2 GLN    15      -4.208  -5.723  -2.914  1.00  0.00           H  
+ATOM    220 2HE2 GLN    15      -3.287  -4.771  -3.975  1.00  0.00           H  
+ATOM    221  N   ALA    16      -0.655  -2.432   1.265  1.00  0.00           N  
+ATOM    222  CA  ALA    16      -0.404  -3.099   2.575  1.00  0.00           C  
+ATOM    223  C   ALA    16      -1.338  -2.527   3.645  1.00  0.00           C  
+ATOM    224  O   ALA    16      -2.023  -3.255   4.336  1.00  0.00           O  
+ATOM    225  CB  ALA    16       1.050  -2.865   2.991  1.00  0.00           C  
+ATOM    226  H   ALA    16       0.100  -2.180   0.693  1.00  0.00           H  
+ATOM    227  HA  ALA    16      -0.580  -4.160   2.476  1.00  0.00           H  
+ATOM    228 1HB  ALA    16       1.263  -1.805   2.976  1.00  0.00           H  
+ATOM    229 2HB  ALA    16       1.205  -3.248   3.988  1.00  0.00           H  
+ATOM    230 3HB  ALA    16       1.708  -3.374   2.303  1.00  0.00           H  
+ATOM    231  N   VAL    17      -1.365  -1.231   3.797  1.00  0.00           N  
+ATOM    232  CA  VAL    17      -2.249  -0.619   4.831  1.00  0.00           C  
+ATOM    233  C   VAL    17      -3.673  -0.484   4.286  1.00  0.00           C  
+ATOM    234  O   VAL    17      -4.628  -0.423   5.033  1.00  0.00           O  
+ATOM    235  CB  VAL    17      -1.713   0.766   5.202  1.00  0.00           C  
+ATOM    236  CG1 VAL    17      -2.561   1.358   6.329  1.00  0.00           C  
+ATOM    237  CG2 VAL    17      -0.261   0.642   5.670  1.00  0.00           C  
+ATOM    238  H   VAL    17      -0.801  -0.660   3.236  1.00  0.00           H  
+ATOM    239  HA  VAL    17      -2.260  -1.245   5.710  1.00  0.00           H  
+ATOM    240  HB  VAL    17      -1.760   1.413   4.338  1.00  0.00           H  
+ATOM    241 1HG1 VAL    17      -3.591   1.063   6.199  1.00  0.00           H  
+ATOM    242 2HG1 VAL    17      -2.200   0.994   7.280  1.00  0.00           H  
+ATOM    243 3HG1 VAL    17      -2.489   2.435   6.306  1.00  0.00           H  
+ATOM    244 1HG2 VAL    17      -0.107  -0.330   6.114  1.00  0.00           H  
+ATOM    245 2HG2 VAL    17       0.402   0.760   4.825  1.00  0.00           H  
+ATOM    246 3HG2 VAL    17      -0.052   1.409   6.401  1.00  0.00           H  
+ATOM    247  N   PHE    18      -3.825  -0.436   2.992  1.00  0.00           N  
+ATOM    248  CA  PHE    18      -5.189  -0.304   2.406  1.00  0.00           C  
+ATOM    249  C   PHE    18      -6.092  -1.405   2.965  1.00  0.00           C  
+ATOM    250  O   PHE    18      -7.064  -1.141   3.643  1.00  0.00           O  
+ATOM    251  CB  PHE    18      -5.106  -0.438   0.885  1.00  0.00           C  
+ATOM    252  CG  PHE    18      -6.146   0.447   0.241  1.00  0.00           C  
+ATOM    253  CD1 PHE    18      -5.969   1.835   0.221  1.00  0.00           C  
+ATOM    254  CD2 PHE    18      -7.285  -0.122  -0.340  1.00  0.00           C  
+ATOM    255  CE1 PHE    18      -6.932   2.655  -0.379  1.00  0.00           C  
+ATOM    256  CE2 PHE    18      -8.249   0.698  -0.940  1.00  0.00           C  
+ATOM    257  CZ  PHE    18      -8.072   2.087  -0.959  1.00  0.00           C  
+ATOM    258  H   PHE    18      -3.042  -0.485   2.404  1.00  0.00           H  
+ATOM    259  HA  PHE    18      -5.599   0.662   2.660  1.00  0.00           H  
+ATOM    260 1HB  PHE    18      -4.123  -0.140   0.552  1.00  0.00           H  
+ATOM    261 2HB  PHE    18      -5.283  -1.466   0.605  1.00  0.00           H  
+ATOM    262  HD1 PHE    18      -5.089   2.274   0.669  1.00  0.00           H  
+ATOM    263  HD2 PHE    18      -7.422  -1.193  -0.325  1.00  0.00           H  
+ATOM    264  HE1 PHE    18      -6.795   3.726  -0.393  1.00  0.00           H  
+ATOM    265  HE2 PHE    18      -9.128   0.259  -1.388  1.00  0.00           H  
+ATOM    266  HZ  PHE    18      -8.815   2.720  -1.422  1.00  0.00           H  
+ATOM    267  N   LEU    19      -5.775  -2.640   2.685  1.00  0.00           N  
+ATOM    268  CA  LEU    19      -6.613  -3.758   3.200  1.00  0.00           C  
+ATOM    269  C   LEU    19      -6.606  -3.735   4.730  1.00  0.00           C  
+ATOM    270  O   LEU    19      -7.561  -4.124   5.371  1.00  0.00           O  
+ATOM    271  CB  LEU    19      -6.047  -5.092   2.704  1.00  0.00           C  
+ATOM    272  CG  LEU    19      -5.837  -5.028   1.190  1.00  0.00           C  
+ATOM    273  CD1 LEU    19      -5.401  -6.400   0.673  1.00  0.00           C  
+ATOM    274  CD2 LEU    19      -7.148  -4.624   0.511  1.00  0.00           C  
+ATOM    275  H   LEU    19      -4.986  -2.831   2.137  1.00  0.00           H  
+ATOM    276  HA  LEU    19      -7.626  -3.642   2.844  1.00  0.00           H  
+ATOM    277 1HB  LEU    19      -5.103  -5.286   3.191  1.00  0.00           H  
+ATOM    278 2HB  LEU    19      -6.741  -5.886   2.935  1.00  0.00           H  
+ATOM    279  HG  LEU    19      -5.073  -4.299   0.962  1.00  0.00           H  
+ATOM    280 1HD1 LEU    19      -5.007  -6.985   1.491  1.00  0.00           H  
+ATOM    281 2HD1 LEU    19      -6.250  -6.910   0.242  1.00  0.00           H  
+ATOM    282 3HD1 LEU    19      -4.636  -6.275  -0.079  1.00  0.00           H  
+ATOM    283 1HD2 LEU    19      -7.479  -3.674   0.906  1.00  0.00           H  
+ATOM    284 2HD2 LEU    19      -6.992  -4.536  -0.553  1.00  0.00           H  
+ATOM    285 3HD2 LEU    19      -7.901  -5.375   0.705  1.00  0.00           H  
+ATOM    286  N   LEU    20      -5.536  -3.277   5.320  1.00  0.00           N  
+ATOM    287  CA  LEU    20      -5.469  -3.225   6.806  1.00  0.00           C  
+ATOM    288  C   LEU    20      -6.577  -2.310   7.332  1.00  0.00           C  
+ATOM    289  O   LEU    20      -7.113  -2.516   8.402  1.00  0.00           O  
+ATOM    290  CB  LEU    20      -4.106  -2.678   7.238  1.00  0.00           C  
+ATOM    291  CG  LEU    20      -3.842  -3.056   8.697  1.00  0.00           C  
+ATOM    292  CD1 LEU    20      -3.948  -4.574   8.861  1.00  0.00           C  
+ATOM    293  CD2 LEU    20      -2.437  -2.599   9.096  1.00  0.00           C  
+ATOM    294  H   LEU    20      -4.777  -2.965   4.783  1.00  0.00           H  
+ATOM    295  HA  LEU    20      -5.603  -4.219   7.208  1.00  0.00           H  
+ATOM    296 1HB  LEU    20      -3.334  -3.099   6.610  1.00  0.00           H  
+ATOM    297 2HB  LEU    20      -4.103  -1.604   7.142  1.00  0.00           H  
+ATOM    298  HG  LEU    20      -4.573  -2.575   9.331  1.00  0.00           H  
+ATOM    299 1HD1 LEU    20      -3.614  -5.058   7.955  1.00  0.00           H  
+ATOM    300 2HD1 LEU    20      -3.330  -4.891   9.688  1.00  0.00           H  
+ATOM    301 3HD1 LEU    20      -4.976  -4.844   9.056  1.00  0.00           H  
+ATOM    302 1HD2 LEU    20      -1.877  -2.341   8.209  1.00  0.00           H  
+ATOM    303 2HD2 LEU    20      -2.510  -1.736   9.740  1.00  0.00           H  
+ATOM    304 3HD2 LEU    20      -1.933  -3.398   9.620  1.00  0.00           H  
+ATOM    305  N   LEU    21      -6.923  -1.297   6.584  1.00  0.00           N  
+ATOM    306  CA  LEU    21      -7.994  -0.367   7.039  1.00  0.00           C  
+ATOM    307  C   LEU    21      -9.364  -0.982   6.748  1.00  0.00           C  
+ATOM    308  O   LEU    21     -10.155  -1.209   7.642  1.00  0.00           O  
+ATOM    309  CB  LEU    21      -7.867   0.964   6.291  1.00  0.00           C  
+ATOM    310  CG  LEU    21      -8.987   1.906   6.732  1.00  0.00           C  
+ATOM    311  CD1 LEU    21      -8.587   2.601   8.034  1.00  0.00           C  
+ATOM    312  CD2 LEU    21      -9.227   2.958   5.647  1.00  0.00           C  
+ATOM    313  H   LEU    21      -6.478  -1.148   5.723  1.00  0.00           H  
+ATOM    314  HA  LEU    21      -7.895  -0.193   8.100  1.00  0.00           H  
+ATOM    315 1HB  LEU    21      -6.910   1.412   6.513  1.00  0.00           H  
+ATOM    316 2HB  LEU    21      -7.945   0.789   5.228  1.00  0.00           H  
+ATOM    317  HG  LEU    21      -9.893   1.338   6.893  1.00  0.00           H  
+ATOM    318 1HD1 LEU    21      -7.718   2.112   8.451  1.00  0.00           H  
+ATOM    319 2HD1 LEU    21      -8.356   3.637   7.833  1.00  0.00           H  
+ATOM    320 3HD1 LEU    21      -9.404   2.545   8.738  1.00  0.00           H  
+ATOM    321 1HD2 LEU    21      -8.317   3.111   5.086  1.00  0.00           H  
+ATOM    322 2HD2 LEU    21     -10.007   2.618   4.982  1.00  0.00           H  
+ATOM    323 3HD2 LEU    21      -9.526   3.889   6.108  1.00  0.00           H  
+ATOM    324  N   THR    22      -9.651  -1.251   5.504  1.00  0.00           N  
+ATOM    325  CA  THR    22     -10.971  -1.848   5.154  1.00  0.00           C  
+ATOM    326  C   THR    22     -11.292  -2.997   6.113  1.00  0.00           C  
+ATOM    327  O   THR    22     -12.434  -3.228   6.458  1.00  0.00           O  
+ATOM    328  CB  THR    22     -10.922  -2.381   3.720  1.00  0.00           C  
+ATOM    329  OG1 THR    22      -9.763  -3.186   3.556  1.00  0.00           O  
+ATOM    330  CG2 THR    22     -10.876  -1.207   2.740  1.00  0.00           C  
+ATOM    331  H   THR    22      -8.998  -1.058   4.798  1.00  0.00           H  
+ATOM    332  HA  THR    22     -11.738  -1.092   5.230  1.00  0.00           H  
+ATOM    333  HB  THR    22     -11.803  -2.972   3.525  1.00  0.00           H  
+ATOM    334  HG1 THR    22      -9.520  -3.170   2.628  1.00  0.00           H  
+ATOM    335 1HG2 THR    22     -10.880  -0.277   3.289  1.00  0.00           H  
+ATOM    336 2HG2 THR    22      -9.976  -1.270   2.145  1.00  0.00           H  
+ATOM    337 3HG2 THR    22     -11.739  -1.246   2.092  1.00  0.00           H  
+ATOM    338  N   SER    23     -10.296  -3.722   6.543  1.00  0.00           N  
+ATOM    339  CA  SER    23     -10.547  -4.858   7.475  1.00  0.00           C  
+ATOM    340  C   SER    23     -10.857  -4.319   8.874  1.00  0.00           C  
+ATOM    341  O   SER    23     -11.807  -4.729   9.510  1.00  0.00           O  
+ATOM    342  CB  SER    23      -9.306  -5.747   7.539  1.00  0.00           C  
+ATOM    343  OG  SER    23      -9.694  -7.105   7.374  1.00  0.00           O  
+ATOM    344  H   SER    23      -9.382  -3.521   6.250  1.00  0.00           H  
+ATOM    345  HA  SER    23     -11.385  -5.437   7.119  1.00  0.00           H  
+ATOM    346 1HB  SER    23      -8.623  -5.475   6.752  1.00  0.00           H  
+ATOM    347 2HB  SER    23      -8.819  -5.617   8.497  1.00  0.00           H  
+ATOM    348  HG  SER    23      -8.986  -7.561   6.913  1.00  0.00           H  
+ATOM    349  N   GLN    24     -10.063  -3.404   9.358  1.00  0.00           N  
+ATOM    350  CA  GLN    24     -10.312  -2.843  10.714  1.00  0.00           C  
+ATOM    351  C   GLN    24     -11.356  -1.726  10.624  1.00  0.00           C  
+ATOM    352  O   GLN    24     -11.574  -0.991  11.566  1.00  0.00           O  
+ATOM    353  CB  GLN    24      -9.006  -2.280  11.274  1.00  0.00           C  
+ATOM    354  CG  GLN    24      -7.913  -3.347  11.192  1.00  0.00           C  
+ATOM    355  CD  GLN    24      -8.257  -4.506  12.128  1.00  0.00           C  
+ATOM    356  OE1 GLN    24      -8.854  -5.481  11.713  1.00  0.00           O  
+ATOM    357  NE2 GLN    24      -7.906  -4.442  13.382  1.00  0.00           N  
+ATOM    358  H   GLN    24      -9.301  -3.087   8.831  1.00  0.00           H  
+ATOM    359  HA  GLN    24     -10.676  -3.625  11.365  1.00  0.00           H  
+ATOM    360 1HB  GLN    24      -8.711  -1.417  10.695  1.00  0.00           H  
+ATOM    361 2HB  GLN    24      -9.150  -1.992  12.303  1.00  0.00           H  
+ATOM    362 1HG  GLN    24      -7.841  -3.711  10.177  1.00  0.00           H  
+ATOM    363 2HG  GLN    24      -6.968  -2.917  11.487  1.00  0.00           H  
+ATOM    364 1HE2 GLN    24      -8.122  -5.181  13.990  1.00  0.00           H  
+ATOM    365 2HE2 GLN    24      -7.427  -3.657  13.717  1.00  0.00           H  
+ATOM    366  N   ARG    25     -12.002  -1.593   9.498  1.00  0.00           N  
+ATOM    367  CA  ARG    25     -13.030  -0.525   9.350  1.00  0.00           C  
+ATOM    368  C   ARG    25     -14.278  -1.110   8.687  1.00  0.00           C  
+ATOM    369  O   ARG    25     -15.290  -0.429   8.670  1.00  0.00           O  
+ATOM    370  CB  ARG    25     -12.473   0.604   8.480  1.00  0.00           C  
+ATOM    371  CG  ARG    25     -12.008   1.756   9.370  1.00  0.00           C  
+ATOM    372  CD  ARG    25     -13.227   2.465   9.965  1.00  0.00           C  
+ATOM    373  NE  ARG    25     -12.793   3.340  11.090  1.00  0.00           N  
+ATOM    374  CZ  ARG    25     -13.195   3.089  12.306  1.00  0.00           C  
+ATOM    375  NH1 ARG    25     -12.406   2.467  13.138  1.00  0.00           N  
+ATOM    376  NH2 ARG    25     -14.385   3.463  12.689  1.00  0.00           N  
+ATOM    377  OXT ARG    25     -14.200  -2.229   8.207  1.00  0.00           O  
+ATOM    378  H   ARG    25     -11.812  -2.195   8.749  1.00  0.00           H  
+ATOM    379  HA  ARG    25     -13.288  -0.136  10.324  1.00  0.00           H  
+ATOM    380 1HB  ARG    25     -11.637   0.235   7.904  1.00  0.00           H  
+ATOM    381 2HB  ARG    25     -13.243   0.957   7.810  1.00  0.00           H  
+ATOM    382 1HG  ARG    25     -11.390   1.369  10.168  1.00  0.00           H  
+ATOM    383 2HG  ARG    25     -11.438   2.459   8.782  1.00  0.00           H  
+ATOM    384 1HD  ARG    25     -13.701   3.065   9.203  1.00  0.00           H  
+ATOM    385 2HD  ARG    25     -13.928   1.729  10.331  1.00  0.00           H  
+ATOM    386  HE  ARG    25     -12.207   4.105  10.916  1.00  0.00           H  
+ATOM    387 1HH1 ARG    25     -11.493   2.181  12.844  1.00  0.00           H  
+ATOM    388 2HH1 ARG    25     -12.714   2.276  14.070  1.00  0.00           H  
+ATOM    389 1HH2 ARG    25     -14.989   3.942  12.051  1.00  0.00           H  
+ATOM    390 2HH2 ARG    25     -14.694   3.271  13.621  1.00  0.00           H  
+TER     391      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        2                                                                  
+ATOM    392  N   GLY     1      16.481   4.923  -4.827  1.00  0.00           N  
+ATOM    393  CA  GLY     1      16.036   3.978  -5.946  1.00  0.00           C  
+ATOM    394  C   GLY     1      15.368   4.536  -7.157  1.00  0.00           C  
+ATOM    395  O   GLY     1      14.605   3.863  -7.821  1.00  0.00           O  
+ATOM    396 1H   GLY     1      17.036   5.703  -5.232  1.00  0.00           H  
+ATOM    397 2H   GLY     1      15.643   5.307  -4.344  1.00  0.00           H  
+ATOM    398 3H   GLY     1      17.064   4.399  -4.144  1.00  0.00           H  
+ATOM    399 1HA  GLY     1      16.767   3.796  -6.564  1.00  0.00           H  
+ATOM    400 2HA  GLY     1      15.621   3.016  -5.432  1.00  0.00           H  
+ATOM    401  N   SER     2      15.634   5.773  -7.479  1.00  0.00           N  
+ATOM    402  CA  SER     2      14.997   6.384  -8.680  1.00  0.00           C  
+ATOM    403  C   SER     2      13.485   6.484  -8.463  1.00  0.00           C  
+ATOM    404  O   SER     2      12.918   5.781  -7.651  1.00  0.00           O  
+ATOM    405  CB  SER     2      15.279   5.513  -9.904  1.00  0.00           C  
+ATOM    406  OG  SER     2      16.590   4.972  -9.801  1.00  0.00           O  
+ATOM    407  H   SER     2      16.253   6.300  -6.931  1.00  0.00           H  
+ATOM    408  HA  SER     2      15.404   7.372  -8.840  1.00  0.00           H  
+ATOM    409 1HB  SER     2      14.565   4.707  -9.946  1.00  0.00           H  
+ATOM    410 2HB  SER     2      15.194   6.114 -10.800  1.00  0.00           H  
+ATOM    411  HG  SER     2      16.723   4.375 -10.540  1.00  0.00           H  
+ATOM    412  N   GLU     3      12.828   7.353  -9.183  1.00  0.00           N  
+ATOM    413  CA  GLU     3      11.354   7.495  -9.017  1.00  0.00           C  
+ATOM    414  C   GLU     3      10.670   6.174  -9.371  1.00  0.00           C  
+ATOM    415  O   GLU     3       9.507   5.969  -9.084  1.00  0.00           O  
+ATOM    416  CB  GLU     3      10.839   8.600  -9.944  1.00  0.00           C  
+ATOM    417  CG  GLU     3       9.488   9.106  -9.433  1.00  0.00           C  
+ATOM    418  CD  GLU     3       9.704  10.354  -8.574  1.00  0.00           C  
+ATOM    419  OE1 GLU     3      10.507  10.286  -7.659  1.00  0.00           O  
+ATOM    420  OE2 GLU     3       9.061  11.355  -8.846  1.00  0.00           O  
+ATOM    421  H   GLU     3      13.304   7.911  -9.833  1.00  0.00           H  
+ATOM    422  HA  GLU     3      11.131   7.754  -7.992  1.00  0.00           H  
+ATOM    423 1HB  GLU     3      11.547   9.415  -9.961  1.00  0.00           H  
+ATOM    424 2HB  GLU     3      10.719   8.206 -10.942  1.00  0.00           H  
+ATOM    425 1HG  GLU     3       8.854   9.352 -10.273  1.00  0.00           H  
+ATOM    426 2HG  GLU     3       9.018   8.340  -8.838  1.00  0.00           H  
+ATOM    427  N   LYS     4      11.382   5.274  -9.992  1.00  0.00           N  
+ATOM    428  CA  LYS     4      10.770   3.967 -10.364  1.00  0.00           C  
+ATOM    429  C   LYS     4      10.603   3.106  -9.111  1.00  0.00           C  
+ATOM    430  O   LYS     4       9.530   2.614  -8.823  1.00  0.00           O  
+ATOM    431  CB  LYS     4      11.677   3.244 -11.364  1.00  0.00           C  
+ATOM    432  CG  LYS     4      11.878   4.120 -12.601  1.00  0.00           C  
+ATOM    433  CD  LYS     4      13.299   4.688 -12.601  1.00  0.00           C  
+ATOM    434  CE  LYS     4      13.404   5.802 -13.643  1.00  0.00           C  
+ATOM    435  NZ  LYS     4      14.363   6.838 -13.167  1.00  0.00           N  
+ATOM    436  H   LYS     4      12.319   5.458 -10.215  1.00  0.00           H  
+ATOM    437  HA  LYS     4       9.803   4.139 -10.814  1.00  0.00           H  
+ATOM    438 1HB  LYS     4      12.633   3.045 -10.902  1.00  0.00           H  
+ATOM    439 2HB  LYS     4      11.217   2.312 -11.655  1.00  0.00           H  
+ATOM    440 1HG  LYS     4      11.727   3.525 -13.490  1.00  0.00           H  
+ATOM    441 2HG  LYS     4      11.168   4.933 -12.586  1.00  0.00           H  
+ATOM    442 1HD  LYS     4      13.527   5.084 -11.622  1.00  0.00           H  
+ATOM    443 2HD  LYS     4      14.000   3.902 -12.844  1.00  0.00           H  
+ATOM    444 1HE  LYS     4      13.754   5.389 -14.578  1.00  0.00           H  
+ATOM    445 2HE  LYS     4      12.432   6.251 -13.789  1.00  0.00           H  
+ATOM    446 1HZ  LYS     4      14.077   7.168 -12.223  1.00  0.00           H  
+ATOM    447 2HZ  LYS     4      15.317   6.430 -13.115  1.00  0.00           H  
+ATOM    448 3HZ  LYS     4      14.364   7.640 -13.830  1.00  0.00           H  
+ATOM    449  N   MET     5      11.657   2.915  -8.366  1.00  0.00           N  
+ATOM    450  CA  MET     5      11.561   2.083  -7.134  1.00  0.00           C  
+ATOM    451  C   MET     5      10.733   2.819  -6.080  1.00  0.00           C  
+ATOM    452  O   MET     5      10.184   2.219  -5.177  1.00  0.00           O  
+ATOM    453  CB  MET     5      12.966   1.824  -6.587  1.00  0.00           C  
+ATOM    454  CG  MET     5      13.021   0.429  -5.965  1.00  0.00           C  
+ATOM    455  SD  MET     5      12.772   0.555  -4.177  1.00  0.00           S  
+ATOM    456  CE  MET     5      14.219  -0.410  -3.678  1.00  0.00           C  
+ATOM    457  H   MET     5      12.514   3.320  -8.618  1.00  0.00           H  
+ATOM    458  HA  MET     5      11.088   1.141  -7.371  1.00  0.00           H  
+ATOM    459 1HB  MET     5      13.684   1.890  -7.391  1.00  0.00           H  
+ATOM    460 2HB  MET     5      13.200   2.561  -5.834  1.00  0.00           H  
+ATOM    461 1HG  MET     5      12.246  -0.188  -6.394  1.00  0.00           H  
+ATOM    462 2HG  MET     5      13.985  -0.016  -6.163  1.00  0.00           H  
+ATOM    463 1HE  MET     5      14.356  -1.233  -4.366  1.00  0.00           H  
+ATOM    464 2HE  MET     5      15.094   0.224  -3.688  1.00  0.00           H  
+ATOM    465 3HE  MET     5      14.069  -0.799  -2.684  1.00  0.00           H  
+ATOM    466  N   SER     6      10.642   4.116  -6.182  1.00  0.00           N  
+ATOM    467  CA  SER     6       9.853   4.889  -5.181  1.00  0.00           C  
+ATOM    468  C   SER     6       8.363   4.808  -5.525  1.00  0.00           C  
+ATOM    469  O   SER     6       7.514   4.816  -4.654  1.00  0.00           O  
+ATOM    470  CB  SER     6      10.301   6.351  -5.200  1.00  0.00           C  
+ATOM    471  OG  SER     6       9.533   7.090  -4.259  1.00  0.00           O  
+ATOM    472  H   SER     6      11.094   4.583  -6.915  1.00  0.00           H  
+ATOM    473  HA  SER     6      10.017   4.476  -4.197  1.00  0.00           H  
+ATOM    474 1HB  SER     6      11.343   6.413  -4.935  1.00  0.00           H  
+ATOM    475 2HB  SER     6      10.161   6.756  -6.193  1.00  0.00           H  
+ATOM    476  HG  SER     6       9.847   6.868  -3.380  1.00  0.00           H  
+ATOM    477  N   THR     7       8.040   4.728  -6.786  1.00  0.00           N  
+ATOM    478  CA  THR     7       6.606   4.648  -7.182  1.00  0.00           C  
+ATOM    479  C   THR     7       6.129   3.202  -7.051  1.00  0.00           C  
+ATOM    480  O   THR     7       4.948   2.935  -6.939  1.00  0.00           O  
+ATOM    481  CB  THR     7       6.451   5.110  -8.631  1.00  0.00           C  
+ATOM    482  OG1 THR     7       7.571   4.672  -9.389  1.00  0.00           O  
+ATOM    483  CG2 THR     7       6.369   6.636  -8.676  1.00  0.00           C  
+ATOM    484  H   THR     7       8.739   4.719  -7.472  1.00  0.00           H  
+ATOM    485  HA  THR     7       6.017   5.282  -6.535  1.00  0.00           H  
+ATOM    486  HB  THR     7       5.547   4.693  -9.049  1.00  0.00           H  
+ATOM    487  HG1 THR     7       7.250   4.094 -10.086  1.00  0.00           H  
+ATOM    488 1HG2 THR     7       6.049   7.008  -7.714  1.00  0.00           H  
+ATOM    489 2HG2 THR     7       7.342   7.042  -8.912  1.00  0.00           H  
+ATOM    490 3HG2 THR     7       5.660   6.937  -9.433  1.00  0.00           H  
+ATOM    491  N   ALA     8       7.039   2.268  -7.054  1.00  0.00           N  
+ATOM    492  CA  ALA     8       6.641   0.842  -6.921  1.00  0.00           C  
+ATOM    493  C   ALA     8       6.483   0.520  -5.439  1.00  0.00           C  
+ATOM    494  O   ALA     8       5.561  -0.157  -5.031  1.00  0.00           O  
+ATOM    495  CB  ALA     8       7.726  -0.047  -7.530  1.00  0.00           C  
+ATOM    496  H   ALA     8       7.984   2.505  -7.133  1.00  0.00           H  
+ATOM    497  HA  ALA     8       5.705   0.677  -7.430  1.00  0.00           H  
+ATOM    498 1HB  ALA     8       8.684   0.446  -7.446  1.00  0.00           H  
+ATOM    499 2HB  ALA     8       7.757  -0.988  -7.004  1.00  0.00           H  
+ATOM    500 3HB  ALA     8       7.503  -0.224  -8.572  1.00  0.00           H  
+ATOM    501  N   ILE     9       7.371   1.019  -4.630  1.00  0.00           N  
+ATOM    502  CA  ILE     9       7.271   0.761  -3.171  1.00  0.00           C  
+ATOM    503  C   ILE     9       6.054   1.507  -2.627  1.00  0.00           C  
+ATOM    504  O   ILE     9       5.396   1.060  -1.708  1.00  0.00           O  
+ATOM    505  CB  ILE     9       8.535   1.263  -2.475  1.00  0.00           C  
+ATOM    506  CG1 ILE     9       8.713   2.756  -2.763  1.00  0.00           C  
+ATOM    507  CG2 ILE     9       9.748   0.495  -3.003  1.00  0.00           C  
+ATOM    508  CD1 ILE     9       8.165   3.571  -1.590  1.00  0.00           C  
+ATOM    509  H   ILE     9       8.099   1.572  -4.981  1.00  0.00           H  
+ATOM    510  HA  ILE     9       7.156  -0.297  -3.000  1.00  0.00           H  
+ATOM    511  HB  ILE     9       8.446   1.108  -1.410  1.00  0.00           H  
+ATOM    512 1HG1 ILE     9       9.763   2.973  -2.896  1.00  0.00           H  
+ATOM    513 2HG1 ILE     9       8.175   3.015  -3.663  1.00  0.00           H  
+ATOM    514 1HG2 ILE     9       9.588   0.241  -4.041  1.00  0.00           H  
+ATOM    515 2HG2 ILE     9      10.630   1.111  -2.915  1.00  0.00           H  
+ATOM    516 3HG2 ILE     9       9.880  -0.409  -2.427  1.00  0.00           H  
+ATOM    517 1HD1 ILE     9       7.211   3.167  -1.286  1.00  0.00           H  
+ATOM    518 2HD1 ILE     9       8.858   3.522  -0.763  1.00  0.00           H  
+ATOM    519 3HD1 ILE     9       8.040   4.600  -1.895  1.00  0.00           H  
+ATOM    520  N   SER    10       5.747   2.641  -3.197  1.00  0.00           N  
+ATOM    521  CA  SER    10       4.569   3.419  -2.726  1.00  0.00           C  
+ATOM    522  C   SER    10       3.291   2.651  -3.065  1.00  0.00           C  
+ATOM    523  O   SER    10       2.449   2.424  -2.220  1.00  0.00           O  
+ATOM    524  CB  SER    10       4.547   4.780  -3.421  1.00  0.00           C  
+ATOM    525  OG  SER    10       5.669   5.543  -2.998  1.00  0.00           O  
+ATOM    526  H   SER    10       6.291   2.978  -3.940  1.00  0.00           H  
+ATOM    527  HA  SER    10       4.633   3.560  -1.658  1.00  0.00           H  
+ATOM    528 1HB  SER    10       4.594   4.643  -4.489  1.00  0.00           H  
+ATOM    529 2HB  SER    10       3.630   5.297  -3.168  1.00  0.00           H  
+ATOM    530  HG  SER    10       6.372   4.931  -2.764  1.00  0.00           H  
+ATOM    531  N   VAL    11       3.141   2.244  -4.297  1.00  0.00           N  
+ATOM    532  CA  VAL    11       1.918   1.487  -4.686  1.00  0.00           C  
+ATOM    533  C   VAL    11       1.762   0.275  -3.765  1.00  0.00           C  
+ATOM    534  O   VAL    11       0.665  -0.125  -3.431  1.00  0.00           O  
+ATOM    535  CB  VAL    11       2.050   1.019  -6.139  1.00  0.00           C  
+ATOM    536  CG1 VAL    11       1.008  -0.063  -6.433  1.00  0.00           C  
+ATOM    537  CG2 VAL    11       1.821   2.208  -7.076  1.00  0.00           C  
+ATOM    538  H   VAL    11       3.834   2.435  -4.965  1.00  0.00           H  
+ATOM    539  HA  VAL    11       1.053   2.126  -4.591  1.00  0.00           H  
+ATOM    540  HB  VAL    11       3.041   0.618  -6.300  1.00  0.00           H  
+ATOM    541 1HG1 VAL    11       0.318  -0.134  -5.604  1.00  0.00           H  
+ATOM    542 2HG1 VAL    11       0.466   0.193  -7.331  1.00  0.00           H  
+ATOM    543 3HG1 VAL    11       1.504  -1.013  -6.570  1.00  0.00           H  
+ATOM    544 1HG2 VAL    11       1.207   2.944  -6.578  1.00  0.00           H  
+ATOM    545 2HG2 VAL    11       2.771   2.649  -7.338  1.00  0.00           H  
+ATOM    546 3HG2 VAL    11       1.322   1.869  -7.972  1.00  0.00           H  
+ATOM    547  N   LEU    12       2.852  -0.313  -3.351  1.00  0.00           N  
+ATOM    548  CA  LEU    12       2.764  -1.497  -2.451  1.00  0.00           C  
+ATOM    549  C   LEU    12       2.077  -1.095  -1.144  1.00  0.00           C  
+ATOM    550  O   LEU    12       1.191  -1.772  -0.662  1.00  0.00           O  
+ATOM    551  CB  LEU    12       4.172  -2.014  -2.148  1.00  0.00           C  
+ATOM    552  CG  LEU    12       4.711  -2.775  -3.360  1.00  0.00           C  
+ATOM    553  CD1 LEU    12       6.172  -3.155  -3.115  1.00  0.00           C  
+ATOM    554  CD2 LEU    12       3.883  -4.045  -3.572  1.00  0.00           C  
+ATOM    555  H   LEU    12       3.729   0.025  -3.633  1.00  0.00           H  
+ATOM    556  HA  LEU    12       2.192  -2.275  -2.935  1.00  0.00           H  
+ATOM    557 1HB  LEU    12       4.822  -1.177  -1.930  1.00  0.00           H  
+ATOM    558 2HB  LEU    12       4.138  -2.674  -1.296  1.00  0.00           H  
+ATOM    559  HG  LEU    12       4.645  -2.148  -4.237  1.00  0.00           H  
+ATOM    560 1HD1 LEU    12       6.719  -2.286  -2.783  1.00  0.00           H  
+ATOM    561 2HD1 LEU    12       6.222  -3.922  -2.356  1.00  0.00           H  
+ATOM    562 3HD1 LEU    12       6.606  -3.527  -4.031  1.00  0.00           H  
+ATOM    563 1HD2 LEU    12       2.994  -4.001  -2.963  1.00  0.00           H  
+ATOM    564 2HD2 LEU    12       3.605  -4.124  -4.612  1.00  0.00           H  
+ATOM    565 3HD2 LEU    12       4.470  -4.908  -3.291  1.00  0.00           H  
+ATOM    566  N   LEU    13       2.480   0.003  -0.565  1.00  0.00           N  
+ATOM    567  CA  LEU    13       1.849   0.446   0.715  1.00  0.00           C  
+ATOM    568  C   LEU    13       0.433   0.945   0.428  1.00  0.00           C  
+ATOM    569  O   LEU    13      -0.406   1.003   1.304  1.00  0.00           O  
+ATOM    570  CB  LEU    13       2.663   1.584   1.346  1.00  0.00           C  
+ATOM    571  CG  LEU    13       4.116   1.532   0.866  1.00  0.00           C  
+ATOM    572  CD1 LEU    13       4.973   2.439   1.747  1.00  0.00           C  
+ATOM    573  CD2 LEU    13       4.635   0.095   0.952  1.00  0.00           C  
+ATOM    574  H   LEU    13       3.197   0.533  -0.969  1.00  0.00           H  
+ATOM    575  HA  LEU    13       1.804  -0.388   1.400  1.00  0.00           H  
+ATOM    576 1HB  LEU    13       2.228   2.532   1.066  1.00  0.00           H  
+ATOM    577 2HB  LEU    13       2.639   1.485   2.421  1.00  0.00           H  
+ATOM    578  HG  LEU    13       4.169   1.876  -0.156  1.00  0.00           H  
+ATOM    579 1HD1 LEU    13       4.356   2.881   2.515  1.00  0.00           H  
+ATOM    580 2HD1 LEU    13       5.758   1.857   2.204  1.00  0.00           H  
+ATOM    581 3HD1 LEU    13       5.409   3.220   1.141  1.00  0.00           H  
+ATOM    582 1HD2 LEU    13       3.884  -0.532   1.410  1.00  0.00           H  
+ATOM    583 2HD2 LEU    13       4.851  -0.269  -0.042  1.00  0.00           H  
+ATOM    584 3HD2 LEU    13       5.536   0.071   1.547  1.00  0.00           H  
+ATOM    585  N   ALA    14       0.165   1.312  -0.792  1.00  0.00           N  
+ATOM    586  CA  ALA    14      -1.195   1.812  -1.139  1.00  0.00           C  
+ATOM    587  C   ALA    14      -2.227   0.707  -0.903  1.00  0.00           C  
+ATOM    588  O   ALA    14      -3.097   0.825  -0.064  1.00  0.00           O  
+ATOM    589  CB  ALA    14      -1.224   2.233  -2.609  1.00  0.00           C  
+ATOM    590  H   ALA    14       0.861   1.261  -1.480  1.00  0.00           H  
+ATOM    591  HA  ALA    14      -1.433   2.663  -0.518  1.00  0.00           H  
+ATOM    592 1HB  ALA    14      -0.433   2.944  -2.795  1.00  0.00           H  
+ATOM    593 2HB  ALA    14      -1.082   1.365  -3.235  1.00  0.00           H  
+ATOM    594 3HB  ALA    14      -2.178   2.687  -2.835  1.00  0.00           H  
+ATOM    595  N   GLN    15      -2.140  -0.368  -1.640  1.00  0.00           N  
+ATOM    596  CA  GLN    15      -3.119  -1.477  -1.460  1.00  0.00           C  
+ATOM    597  C   GLN    15      -2.831  -2.212  -0.148  1.00  0.00           C  
+ATOM    598  O   GLN    15      -3.694  -2.855   0.416  1.00  0.00           O  
+ATOM    599  CB  GLN    15      -2.999  -2.456  -2.629  1.00  0.00           C  
+ATOM    600  CG  GLN    15      -3.790  -1.923  -3.826  1.00  0.00           C  
+ATOM    601  CD  GLN    15      -3.390  -2.692  -5.086  1.00  0.00           C  
+ATOM    602  OE1 GLN    15      -2.608  -3.619  -5.023  1.00  0.00           O  
+ATOM    603  NE2 GLN    15      -3.898  -2.344  -6.236  1.00  0.00           N  
+ATOM    604  H   GLN    15      -1.432  -0.443  -2.314  1.00  0.00           H  
+ATOM    605  HA  GLN    15      -4.120  -1.072  -1.433  1.00  0.00           H  
+ATOM    606 1HB  GLN    15      -1.958  -2.562  -2.904  1.00  0.00           H  
+ATOM    607 2HB  GLN    15      -3.395  -3.417  -2.338  1.00  0.00           H  
+ATOM    608 1HG  GLN    15      -4.848  -2.052  -3.643  1.00  0.00           H  
+ATOM    609 2HG  GLN    15      -3.573  -0.874  -3.962  1.00  0.00           H  
+ATOM    610 1HE2 GLN    15      -3.648  -2.829  -7.049  1.00  0.00           H  
+ATOM    611 2HE2 GLN    15      -4.529  -1.595  -6.286  1.00  0.00           H  
+ATOM    612  N   ALA    16      -1.624  -2.124   0.342  1.00  0.00           N  
+ATOM    613  CA  ALA    16      -1.283  -2.820   1.616  1.00  0.00           C  
+ATOM    614  C   ALA    16      -1.935  -2.088   2.791  1.00  0.00           C  
+ATOM    615  O   ALA    16      -2.884  -2.562   3.381  1.00  0.00           O  
+ATOM    616  CB  ALA    16       0.236  -2.830   1.801  1.00  0.00           C  
+ATOM    617  H   ALA    16      -0.942  -1.602  -0.129  1.00  0.00           H  
+ATOM    618  HA  ALA    16      -1.647  -3.836   1.579  1.00  0.00           H  
+ATOM    619 1HB  ALA    16       0.657  -1.935   1.366  1.00  0.00           H  
+ATOM    620 2HB  ALA    16       0.471  -2.865   2.854  1.00  0.00           H  
+ATOM    621 3HB  ALA    16       0.653  -3.698   1.311  1.00  0.00           H  
+ATOM    622  N   VAL    17      -1.430  -0.935   3.137  1.00  0.00           N  
+ATOM    623  CA  VAL    17      -2.015  -0.170   4.276  1.00  0.00           C  
+ATOM    624  C   VAL    17      -3.510   0.056   4.035  1.00  0.00           C  
+ATOM    625  O   VAL    17      -4.281   0.205   4.961  1.00  0.00           O  
+ATOM    626  CB  VAL    17      -1.310   1.182   4.389  1.00  0.00           C  
+ATOM    627  CG1 VAL    17      -2.077   2.082   5.358  1.00  0.00           C  
+ATOM    628  CG2 VAL    17       0.114   0.971   4.908  1.00  0.00           C  
+ATOM    629  H   VAL    17      -0.662  -0.575   2.649  1.00  0.00           H  
+ATOM    630  HA  VAL    17      -1.877  -0.725   5.192  1.00  0.00           H  
+ATOM    631  HB  VAL    17      -1.274   1.651   3.416  1.00  0.00           H  
+ATOM    632 1HG1 VAL    17      -3.116   2.124   5.065  1.00  0.00           H  
+ATOM    633 2HG1 VAL    17      -2.002   1.681   6.359  1.00  0.00           H  
+ATOM    634 3HG1 VAL    17      -1.656   3.076   5.337  1.00  0.00           H  
+ATOM    635 1HG2 VAL    17       0.453  -0.018   4.638  1.00  0.00           H  
+ATOM    636 2HG2 VAL    17       0.770   1.710   4.470  1.00  0.00           H  
+ATOM    637 3HG2 VAL    17       0.124   1.074   5.983  1.00  0.00           H  
+ATOM    638  N   PHE    18      -3.922   0.088   2.798  1.00  0.00           N  
+ATOM    639  CA  PHE    18      -5.365   0.311   2.497  1.00  0.00           C  
+ATOM    640  C   PHE    18      -6.197  -0.845   3.062  1.00  0.00           C  
+ATOM    641  O   PHE    18      -7.036  -0.656   3.919  1.00  0.00           O  
+ATOM    642  CB  PHE    18      -5.556   0.387   0.978  1.00  0.00           C  
+ATOM    643  CG  PHE    18      -7.024   0.278   0.641  1.00  0.00           C  
+ATOM    644  CD1 PHE    18      -7.987   0.759   1.537  1.00  0.00           C  
+ATOM    645  CD2 PHE    18      -7.423  -0.304  -0.568  1.00  0.00           C  
+ATOM    646  CE1 PHE    18      -9.348   0.656   1.223  1.00  0.00           C  
+ATOM    647  CE2 PHE    18      -8.783  -0.406  -0.882  1.00  0.00           C  
+ATOM    648  CZ  PHE    18      -9.745   0.073   0.014  1.00  0.00           C  
+ATOM    649  H   PHE    18      -3.282  -0.030   2.066  1.00  0.00           H  
+ATOM    650  HA  PHE    18      -5.686   1.239   2.945  1.00  0.00           H  
+ATOM    651 1HB  PHE    18      -5.172   1.330   0.617  1.00  0.00           H  
+ATOM    652 2HB  PHE    18      -5.018  -0.423   0.508  1.00  0.00           H  
+ATOM    653  HD1 PHE    18      -7.681   1.208   2.470  1.00  0.00           H  
+ATOM    654  HD2 PHE    18      -6.680  -0.674  -1.259  1.00  0.00           H  
+ATOM    655  HE1 PHE    18     -10.091   1.026   1.915  1.00  0.00           H  
+ATOM    656  HE2 PHE    18      -9.090  -0.856  -1.815  1.00  0.00           H  
+ATOM    657  HZ  PHE    18     -10.795  -0.005  -0.228  1.00  0.00           H  
+ATOM    658  N   LEU    19      -5.979  -2.037   2.579  1.00  0.00           N  
+ATOM    659  CA  LEU    19      -6.765  -3.200   3.080  1.00  0.00           C  
+ATOM    660  C   LEU    19      -6.378  -3.509   4.529  1.00  0.00           C  
+ATOM    661  O   LEU    19      -7.086  -4.200   5.233  1.00  0.00           O  
+ATOM    662  CB  LEU    19      -6.481  -4.418   2.201  1.00  0.00           C  
+ATOM    663  CG  LEU    19      -7.062  -4.183   0.808  1.00  0.00           C  
+ATOM    664  CD1 LEU    19      -6.748  -5.383  -0.087  1.00  0.00           C  
+ATOM    665  CD2 LEU    19      -8.579  -4.009   0.911  1.00  0.00           C  
+ATOM    666  H   LEU    19      -5.302  -2.167   1.882  1.00  0.00           H  
+ATOM    667  HA  LEU    19      -7.818  -2.965   3.035  1.00  0.00           H  
+ATOM    668 1HB  LEU    19      -5.413  -4.568   2.127  1.00  0.00           H  
+ATOM    669 2HB  LEU    19      -6.939  -5.293   2.637  1.00  0.00           H  
+ATOM    670  HG  LEU    19      -6.624  -3.291   0.380  1.00  0.00           H  
+ATOM    671 1HD1 LEU    19      -6.925  -6.296   0.460  1.00  0.00           H  
+ATOM    672 2HD1 LEU    19      -7.383  -5.357  -0.961  1.00  0.00           H  
+ATOM    673 3HD1 LEU    19      -5.713  -5.342  -0.393  1.00  0.00           H  
+ATOM    674 1HD2 LEU    19      -8.952  -4.592   1.741  1.00  0.00           H  
+ATOM    675 2HD2 LEU    19      -8.812  -2.966   1.068  1.00  0.00           H  
+ATOM    676 3HD2 LEU    19      -9.044  -4.348  -0.003  1.00  0.00           H  
+ATOM    677  N   LEU    20      -5.264  -3.004   4.984  1.00  0.00           N  
+ATOM    678  CA  LEU    20      -4.848  -3.275   6.389  1.00  0.00           C  
+ATOM    679  C   LEU    20      -5.818  -2.579   7.345  1.00  0.00           C  
+ATOM    680  O   LEU    20      -6.359  -3.186   8.246  1.00  0.00           O  
+ATOM    681  CB  LEU    20      -3.432  -2.744   6.619  1.00  0.00           C  
+ATOM    682  CG  LEU    20      -2.776  -3.523   7.760  1.00  0.00           C  
+ATOM    683  CD1 LEU    20      -3.647  -3.425   9.014  1.00  0.00           C  
+ATOM    684  CD2 LEU    20      -2.627  -4.992   7.357  1.00  0.00           C  
+ATOM    685  H   LEU    20      -4.706  -2.445   4.404  1.00  0.00           H  
+ATOM    686  HA  LEU    20      -4.868  -4.340   6.570  1.00  0.00           H  
+ATOM    687 1HB  LEU    20      -2.850  -2.868   5.717  1.00  0.00           H  
+ATOM    688 2HB  LEU    20      -3.477  -1.698   6.878  1.00  0.00           H  
+ATOM    689  HG  LEU    20      -1.800  -3.104   7.967  1.00  0.00           H  
+ATOM    690 1HD1 LEU    20      -4.060  -2.430   9.089  1.00  0.00           H  
+ATOM    691 2HD1 LEU    20      -4.451  -4.144   8.949  1.00  0.00           H  
+ATOM    692 3HD1 LEU    20      -3.048  -3.632   9.887  1.00  0.00           H  
+ATOM    693 1HD2 LEU    20      -2.760  -5.088   6.289  1.00  0.00           H  
+ATOM    694 2HD2 LEU    20      -1.644  -5.344   7.630  1.00  0.00           H  
+ATOM    695 3HD2 LEU    20      -3.374  -5.583   7.867  1.00  0.00           H  
+ATOM    696  N   LEU    21      -6.044  -1.308   7.151  1.00  0.00           N  
+ATOM    697  CA  LEU    21      -6.983  -0.574   8.046  1.00  0.00           C  
+ATOM    698  C   LEU    21      -8.401  -1.107   7.835  1.00  0.00           C  
+ATOM    699  O   LEU    21      -9.116  -1.385   8.778  1.00  0.00           O  
+ATOM    700  CB  LEU    21      -6.943   0.919   7.713  1.00  0.00           C  
+ATOM    701  CG  LEU    21      -7.964   1.663   8.575  1.00  0.00           C  
+ATOM    702  CD1 LEU    21      -7.586   1.524  10.050  1.00  0.00           C  
+ATOM    703  CD2 LEU    21      -7.972   3.144   8.188  1.00  0.00           C  
+ATOM    704  H   LEU    21      -5.599  -0.838   6.416  1.00  0.00           H  
+ATOM    705  HA  LEU    21      -6.691  -0.722   9.074  1.00  0.00           H  
+ATOM    706 1HB  LEU    21      -5.954   1.306   7.912  1.00  0.00           H  
+ATOM    707 2HB  LEU    21      -7.183   1.062   6.670  1.00  0.00           H  
+ATOM    708  HG  LEU    21      -8.946   1.242   8.413  1.00  0.00           H  
+ATOM    709 1HD1 LEU    21      -6.548   1.789  10.182  1.00  0.00           H  
+ATOM    710 2HD1 LEU    21      -8.204   2.182  10.643  1.00  0.00           H  
+ATOM    711 3HD1 LEU    21      -7.741   0.503  10.367  1.00  0.00           H  
+ATOM    712 1HD2 LEU    21      -8.009   3.234   7.112  1.00  0.00           H  
+ATOM    713 2HD2 LEU    21      -8.838   3.625   8.619  1.00  0.00           H  
+ATOM    714 3HD2 LEU    21      -7.075   3.618   8.558  1.00  0.00           H  
+ATOM    715  N   THR    22      -8.814  -1.257   6.605  1.00  0.00           N  
+ATOM    716  CA  THR    22     -10.185  -1.778   6.338  1.00  0.00           C  
+ATOM    717  C   THR    22     -10.259  -3.244   6.768  1.00  0.00           C  
+ATOM    718  O   THR    22     -11.326  -3.810   6.898  1.00  0.00           O  
+ATOM    719  CB  THR    22     -10.494  -1.668   4.843  1.00  0.00           C  
+ATOM    720  OG1 THR    22      -9.636  -0.700   4.254  1.00  0.00           O  
+ATOM    721  CG2 THR    22     -11.951  -1.246   4.649  1.00  0.00           C  
+ATOM    722  H   THR    22      -8.221  -1.030   5.857  1.00  0.00           H  
+ATOM    723  HA  THR    22     -10.905  -1.199   6.899  1.00  0.00           H  
+ATOM    724  HB  THR    22     -10.334  -2.625   4.371  1.00  0.00           H  
+ATOM    725  HG1 THR    22      -9.986  -0.483   3.387  1.00  0.00           H  
+ATOM    726 1HG2 THR    22     -12.211  -0.501   5.386  1.00  0.00           H  
+ATOM    727 2HG2 THR    22     -12.078  -0.834   3.660  1.00  0.00           H  
+ATOM    728 3HG2 THR    22     -12.593  -2.106   4.767  1.00  0.00           H  
+ATOM    729  N   SER    23      -9.132  -3.863   6.990  1.00  0.00           N  
+ATOM    730  CA  SER    23      -9.134  -5.291   7.411  1.00  0.00           C  
+ATOM    731  C   SER    23      -9.466  -5.383   8.902  1.00  0.00           C  
+ATOM    732  O   SER    23     -10.509  -5.875   9.285  1.00  0.00           O  
+ATOM    733  CB  SER    23      -7.754  -5.901   7.156  1.00  0.00           C  
+ATOM    734  OG  SER    23      -7.745  -6.526   5.879  1.00  0.00           O  
+ATOM    735  H   SER    23      -8.282  -3.386   6.879  1.00  0.00           H  
+ATOM    736  HA  SER    23      -9.877  -5.832   6.843  1.00  0.00           H  
+ATOM    737 1HB  SER    23      -7.007  -5.126   7.177  1.00  0.00           H  
+ATOM    738 2HB  SER    23      -7.537  -6.629   7.927  1.00  0.00           H  
+ATOM    739  HG  SER    23      -7.509  -7.448   6.002  1.00  0.00           H  
+ATOM    740  N   GLN    24      -8.587  -4.916   9.745  1.00  0.00           N  
+ATOM    741  CA  GLN    24      -8.852  -4.978  11.210  1.00  0.00           C  
+ATOM    742  C   GLN    24      -9.824  -3.862  11.606  1.00  0.00           C  
+ATOM    743  O   GLN    24     -10.231  -3.758  12.745  1.00  0.00           O  
+ATOM    744  CB  GLN    24      -7.537  -4.802  11.969  1.00  0.00           C  
+ATOM    745  CG  GLN    24      -6.500  -5.792  11.432  1.00  0.00           C  
+ATOM    746  CD  GLN    24      -5.476  -6.104  12.524  1.00  0.00           C  
+ATOM    747  OE1 GLN    24      -4.828  -7.130  12.488  1.00  0.00           O  
+ATOM    748  NE2 GLN    24      -5.302  -5.256  13.500  1.00  0.00           N  
+ATOM    749  H   GLN    24      -7.751  -4.525   9.417  1.00  0.00           H  
+ATOM    750  HA  GLN    24      -9.285  -5.937  11.457  1.00  0.00           H  
+ATOM    751 1HB  GLN    24      -7.176  -3.793  11.833  1.00  0.00           H  
+ATOM    752 2HB  GLN    24      -7.699  -4.989  13.019  1.00  0.00           H  
+ATOM    753 1HG  GLN    24      -6.997  -6.703  11.130  1.00  0.00           H  
+ATOM    754 2HG  GLN    24      -5.996  -5.358  10.581  1.00  0.00           H  
+ATOM    755 1HE2 GLN    24      -5.825  -4.427  13.529  1.00  0.00           H  
+ATOM    756 2HE2 GLN    24      -4.648  -5.447  14.205  1.00  0.00           H  
+ATOM    757  N   ARG    25     -10.198  -3.027  10.675  1.00  0.00           N  
+ATOM    758  CA  ARG    25     -11.143  -1.922  11.003  1.00  0.00           C  
+ATOM    759  C   ARG    25     -12.049  -1.652   9.801  1.00  0.00           C  
+ATOM    760  O   ARG    25     -13.030  -2.362   9.652  1.00  0.00           O  
+ATOM    761  CB  ARG    25     -10.351  -0.656  11.338  1.00  0.00           C  
+ATOM    762  CG  ARG    25     -10.705  -0.189  12.752  1.00  0.00           C  
+ATOM    763  CD  ARG    25      -9.587  -0.587  13.715  1.00  0.00           C  
+ATOM    764  NE  ARG    25      -8.725   0.596  13.994  1.00  0.00           N  
+ATOM    765  CZ  ARG    25      -7.545   0.432  14.527  1.00  0.00           C  
+ATOM    766  NH1 ARG    25      -7.227  -0.712  15.067  1.00  0.00           N  
+ATOM    767  NH2 ARG    25      -6.683   1.412  14.519  1.00  0.00           N  
+ATOM    768  OXT ARG    25     -11.747  -0.739   9.051  1.00  0.00           O  
+ATOM    769  H   ARG    25      -9.860  -3.126   9.761  1.00  0.00           H  
+ATOM    770  HA  ARG    25     -11.746  -2.205  11.853  1.00  0.00           H  
+ATOM    771 1HB  ARG    25      -9.293  -0.869  11.282  1.00  0.00           H  
+ATOM    772 2HB  ARG    25     -10.600   0.121  10.632  1.00  0.00           H  
+ATOM    773 1HG  ARG    25     -10.820   0.886  12.757  1.00  0.00           H  
+ATOM    774 2HG  ARG    25     -11.629  -0.653  13.064  1.00  0.00           H  
+ATOM    775 1HD  ARG    25     -10.017  -0.945  14.639  1.00  0.00           H  
+ATOM    776 2HD  ARG    25      -8.990  -1.370  13.270  1.00  0.00           H  
+ATOM    777  HE  ARG    25      -9.044   1.497  13.776  1.00  0.00           H  
+ATOM    778 1HH1 ARG    25      -7.888  -1.463  15.072  1.00  0.00           H  
+ATOM    779 2HH1 ARG    25      -6.324  -0.838  15.476  1.00  0.00           H  
+ATOM    780 1HH2 ARG    25      -6.927   2.288  14.105  1.00  0.00           H  
+ATOM    781 2HH2 ARG    25      -5.780   1.285  14.928  1.00  0.00           H  
+TER     782      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        3                                                                  
+ATOM    783  N   GLY     1      17.294   2.900  -8.114  1.00  0.00           N  
+ATOM    784  CA  GLY     1      15.844   3.111  -8.549  1.00  0.00           C  
+ATOM    785  C   GLY     1      15.346   4.497  -8.779  1.00  0.00           C  
+ATOM    786  O   GLY     1      14.493   4.730  -9.613  1.00  0.00           O  
+ATOM    787 1H   GLY     1      17.917   3.517  -8.673  1.00  0.00           H  
+ATOM    788 2H   GLY     1      17.391   3.134  -7.106  1.00  0.00           H  
+ATOM    789 3H   GLY     1      17.561   1.907  -8.268  1.00  0.00           H  
+ATOM    790 1HA  GLY     1      15.747   3.029  -9.516  1.00  0.00           H  
+ATOM    791 2HA  GLY     1      15.196   2.340  -7.957  1.00  0.00           H  
+ATOM    792  N   SER     2      15.859   5.450  -8.050  1.00  0.00           N  
+ATOM    793  CA  SER     2      15.404   6.857  -8.231  1.00  0.00           C  
+ATOM    794  C   SER     2      13.950   6.984  -7.773  1.00  0.00           C  
+ATOM    795  O   SER     2      13.426   6.124  -7.093  1.00  0.00           O  
+ATOM    796  CB  SER     2      15.508   7.246  -9.706  1.00  0.00           C  
+ATOM    797  OG  SER     2      15.662   8.656  -9.811  1.00  0.00           O  
+ATOM    798  H   SER     2      16.546   5.239  -7.382  1.00  0.00           H  
+ATOM    799  HA  SER     2      16.025   7.515  -7.640  1.00  0.00           H  
+ATOM    800 1HB  SER     2      16.364   6.763 -10.148  1.00  0.00           H  
+ATOM    801 2HB  SER     2      14.612   6.931 -10.225  1.00  0.00           H  
+ATOM    802  HG  SER     2      16.580   8.869  -9.631  1.00  0.00           H  
+ATOM    803  N   GLU     3      13.292   8.049  -8.142  1.00  0.00           N  
+ATOM    804  CA  GLU     3      11.872   8.225  -7.730  1.00  0.00           C  
+ATOM    805  C   GLU     3      11.002   7.209  -8.471  1.00  0.00           C  
+ATOM    806  O   GLU     3       9.861   6.981  -8.119  1.00  0.00           O  
+ATOM    807  CB  GLU     3      11.412   9.642  -8.073  1.00  0.00           C  
+ATOM    808  CG  GLU     3      11.580   9.882  -9.574  1.00  0.00           C  
+ATOM    809  CD  GLU     3      10.265  10.399 -10.161  1.00  0.00           C  
+ATOM    810  OE1 GLU     3       9.446   9.578 -10.540  1.00  0.00           O  
+ATOM    811  OE2 GLU     3      10.100  11.606 -10.223  1.00  0.00           O  
+ATOM    812  H   GLU     3      13.731   8.730  -8.692  1.00  0.00           H  
+ATOM    813  HA  GLU     3      11.785   8.065  -6.665  1.00  0.00           H  
+ATOM    814 1HB  GLU     3      10.372   9.760  -7.803  1.00  0.00           H  
+ATOM    815 2HB  GLU     3      12.009  10.358  -7.527  1.00  0.00           H  
+ATOM    816 1HG  GLU     3      12.360  10.613  -9.737  1.00  0.00           H  
+ATOM    817 2HG  GLU     3      11.848   8.956 -10.058  1.00  0.00           H  
+ATOM    818  N   LYS     4      11.531   6.593  -9.493  1.00  0.00           N  
+ATOM    819  CA  LYS     4      10.735   5.589 -10.251  1.00  0.00           C  
+ATOM    820  C   LYS     4      10.544   4.340  -9.389  1.00  0.00           C  
+ATOM    821  O   LYS     4       9.444   4.010  -8.991  1.00  0.00           O  
+ATOM    822  CB  LYS     4      11.472   5.219 -11.540  1.00  0.00           C  
+ATOM    823  CG  LYS     4      10.803   5.915 -12.727  1.00  0.00           C  
+ATOM    824  CD  LYS     4      11.055   7.420 -12.643  1.00  0.00           C  
+ATOM    825  CE  LYS     4      10.219   8.138 -13.704  1.00  0.00           C  
+ATOM    826  NZ  LYS     4      10.995   8.226 -14.973  1.00  0.00           N  
+ATOM    827  H   LYS     4      12.455   6.788  -9.758  1.00  0.00           H  
+ATOM    828  HA  LYS     4       9.770   6.008 -10.496  1.00  0.00           H  
+ATOM    829 1HB  LYS     4      12.502   5.536 -11.469  1.00  0.00           H  
+ATOM    830 2HB  LYS     4      11.432   4.149 -11.683  1.00  0.00           H  
+ATOM    831 1HG  LYS     4      11.213   5.530 -13.648  1.00  0.00           H  
+ATOM    832 2HG  LYS     4       9.739   5.728 -12.700  1.00  0.00           H  
+ATOM    833 1HD  LYS     4      10.777   7.778 -11.660  1.00  0.00           H  
+ATOM    834 2HD  LYS     4      12.102   7.620 -12.815  1.00  0.00           H  
+ATOM    835 1HE  LYS     4       9.307   7.586 -13.878  1.00  0.00           H  
+ATOM    836 2HE  LYS     4       9.977   9.133 -13.361  1.00  0.00           H  
+ATOM    837 1HZ  LYS     4      11.988   7.976 -14.792  1.00  0.00           H  
+ATOM    838 2HZ  LYS     4      10.596   7.566 -15.671  1.00  0.00           H  
+ATOM    839 3HZ  LYS     4      10.943   9.197 -15.345  1.00  0.00           H  
+ATOM    840  N   MET     5      11.608   3.643  -9.091  1.00  0.00           N  
+ATOM    841  CA  MET     5      11.480   2.422  -8.247  1.00  0.00           C  
+ATOM    842  C   MET     5      10.933   2.817  -6.875  1.00  0.00           C  
+ATOM    843  O   MET     5      10.316   2.027  -6.188  1.00  0.00           O  
+ATOM    844  CB  MET     5      12.852   1.762  -8.083  1.00  0.00           C  
+ATOM    845  CG  MET     5      12.669   0.314  -7.624  1.00  0.00           C  
+ATOM    846  SD  MET     5      14.023  -0.146  -6.515  1.00  0.00           S  
+ATOM    847  CE  MET     5      13.735  -1.932  -6.542  1.00  0.00           C  
+ATOM    848  H   MET     5      12.488   3.927  -9.416  1.00  0.00           H  
+ATOM    849  HA  MET     5      10.800   1.728  -8.720  1.00  0.00           H  
+ATOM    850 1HB  MET     5      13.373   1.777  -9.030  1.00  0.00           H  
+ATOM    851 2HB  MET     5      13.426   2.303  -7.346  1.00  0.00           H  
+ATOM    852 1HG  MET     5      11.728   0.218  -7.102  1.00  0.00           H  
+ATOM    853 2HG  MET     5      12.671  -0.339  -8.483  1.00  0.00           H  
+ATOM    854 1HE  MET     5      13.310  -2.214  -7.493  1.00  0.00           H  
+ATOM    855 2HE  MET     5      14.674  -2.448  -6.403  1.00  0.00           H  
+ATOM    856 3HE  MET     5      13.050  -2.197  -5.749  1.00  0.00           H  
+ATOM    857  N   SER     6      11.149   4.039  -6.473  1.00  0.00           N  
+ATOM    858  CA  SER     6      10.638   4.489  -5.147  1.00  0.00           C  
+ATOM    859  C   SER     6       9.124   4.696  -5.234  1.00  0.00           C  
+ATOM    860  O   SER     6       8.404   4.510  -4.272  1.00  0.00           O  
+ATOM    861  CB  SER     6      11.316   5.805  -4.761  1.00  0.00           C  
+ATOM    862  OG  SER     6      10.447   6.890  -5.065  1.00  0.00           O  
+ATOM    863  H   SER     6      11.646   4.663  -7.044  1.00  0.00           H  
+ATOM    864  HA  SER     6      10.858   3.739  -4.402  1.00  0.00           H  
+ATOM    865 1HB  SER     6      11.529   5.806  -3.705  1.00  0.00           H  
+ATOM    866 2HB  SER     6      12.241   5.906  -5.313  1.00  0.00           H  
+ATOM    867  HG  SER     6       9.918   7.072  -4.284  1.00  0.00           H  
+ATOM    868  N   THR     7       8.636   5.074  -6.384  1.00  0.00           N  
+ATOM    869  CA  THR     7       7.170   5.287  -6.539  1.00  0.00           C  
+ATOM    870  C   THR     7       6.488   3.931  -6.710  1.00  0.00           C  
+ATOM    871  O   THR     7       5.307   3.779  -6.466  1.00  0.00           O  
+ATOM    872  CB  THR     7       6.909   6.150  -7.775  1.00  0.00           C  
+ATOM    873  OG1 THR     7       7.537   7.414  -7.611  1.00  0.00           O  
+ATOM    874  CG2 THR     7       5.403   6.346  -7.955  1.00  0.00           C  
+ATOM    875  H   THR     7       9.233   5.211  -7.148  1.00  0.00           H  
+ATOM    876  HA  THR     7       6.779   5.781  -5.661  1.00  0.00           H  
+ATOM    877  HB  THR     7       7.310   5.659  -8.648  1.00  0.00           H  
+ATOM    878  HG1 THR     7       7.195   8.005  -8.285  1.00  0.00           H  
+ATOM    879 1HG2 THR     7       4.898   6.133  -7.025  1.00  0.00           H  
+ATOM    880 2HG2 THR     7       5.205   7.366  -8.248  1.00  0.00           H  
+ATOM    881 3HG2 THR     7       5.042   5.676  -8.722  1.00  0.00           H  
+ATOM    882  N   ALA     8       7.230   2.941  -7.119  1.00  0.00           N  
+ATOM    883  CA  ALA     8       6.640   1.592  -7.296  1.00  0.00           C  
+ATOM    884  C   ALA     8       6.558   0.924  -5.928  1.00  0.00           C  
+ATOM    885  O   ALA     8       5.608   0.237  -5.608  1.00  0.00           O  
+ATOM    886  CB  ALA     8       7.532   0.767  -8.225  1.00  0.00           C  
+ATOM    887  H   ALA     8       8.184   3.083  -7.297  1.00  0.00           H  
+ATOM    888  HA  ALA     8       5.651   1.679  -7.719  1.00  0.00           H  
+ATOM    889 1HB  ALA     8       7.723   1.328  -9.129  1.00  0.00           H  
+ATOM    890 2HB  ALA     8       8.466   0.555  -7.730  1.00  0.00           H  
+ATOM    891 3HB  ALA     8       7.035  -0.158  -8.473  1.00  0.00           H  
+ATOM    892  N   ILE     9       7.550   1.142  -5.112  1.00  0.00           N  
+ATOM    893  CA  ILE     9       7.539   0.542  -3.756  1.00  0.00           C  
+ATOM    894  C   ILE     9       6.414   1.185  -2.945  1.00  0.00           C  
+ATOM    895  O   ILE     9       5.740   0.535  -2.171  1.00  0.00           O  
+ATOM    896  CB  ILE     9       8.884   0.797  -3.072  1.00  0.00           C  
+ATOM    897  CG1 ILE     9       9.176   2.299  -3.060  1.00  0.00           C  
+ATOM    898  CG2 ILE     9       9.989   0.072  -3.840  1.00  0.00           C  
+ATOM    899  CD1 ILE     9      10.384   2.577  -2.161  1.00  0.00           C  
+ATOM    900  H   ILE     9       8.297   1.710  -5.391  1.00  0.00           H  
+ATOM    901  HA  ILE     9       7.367  -0.519  -3.835  1.00  0.00           H  
+ATOM    902  HB  ILE     9       8.847   0.427  -2.057  1.00  0.00           H  
+ATOM    903 1HG1 ILE     9       9.391   2.631  -4.066  1.00  0.00           H  
+ATOM    904 2HG1 ILE     9       8.318   2.831  -2.681  1.00  0.00           H  
+ATOM    905 1HG2 ILE     9       9.699  -0.032  -4.875  1.00  0.00           H  
+ATOM    906 2HG2 ILE     9      10.903   0.645  -3.780  1.00  0.00           H  
+ATOM    907 3HG2 ILE     9      10.147  -0.905  -3.409  1.00  0.00           H  
+ATOM    908 1HD1 ILE     9      10.330   1.950  -1.283  1.00  0.00           H  
+ATOM    909 2HD1 ILE     9      11.293   2.362  -2.702  1.00  0.00           H  
+ATOM    910 3HD1 ILE     9      10.380   3.615  -1.864  1.00  0.00           H  
+ATOM    911  N   SER    10       6.199   2.460  -3.128  1.00  0.00           N  
+ATOM    912  CA  SER    10       5.110   3.148  -2.381  1.00  0.00           C  
+ATOM    913  C   SER    10       3.761   2.614  -2.863  1.00  0.00           C  
+ATOM    914  O   SER    10       2.843   2.425  -2.090  1.00  0.00           O  
+ATOM    915  CB  SER    10       5.183   4.654  -2.639  1.00  0.00           C  
+ATOM    916  OG  SER    10       6.519   5.098  -2.452  1.00  0.00           O  
+ATOM    917  H   SER    10       6.750   2.964  -3.765  1.00  0.00           H  
+ATOM    918  HA  SER    10       5.218   2.955  -1.324  1.00  0.00           H  
+ATOM    919 1HB  SER    10       4.877   4.863  -3.650  1.00  0.00           H  
+ATOM    920 2HB  SER    10       4.522   5.167  -1.952  1.00  0.00           H  
+ATOM    921  HG  SER    10       6.512   6.059  -2.430  1.00  0.00           H  
+ATOM    922  N   VAL    11       3.634   2.367  -4.138  1.00  0.00           N  
+ATOM    923  CA  VAL    11       2.347   1.841  -4.673  1.00  0.00           C  
+ATOM    924  C   VAL    11       2.020   0.511  -3.989  1.00  0.00           C  
+ATOM    925  O   VAL    11       0.881   0.234  -3.666  1.00  0.00           O  
+ATOM    926  CB  VAL    11       2.476   1.628  -6.183  1.00  0.00           C  
+ATOM    927  CG1 VAL    11       1.207   0.965  -6.722  1.00  0.00           C  
+ATOM    928  CG2 VAL    11       2.672   2.980  -6.872  1.00  0.00           C  
+ATOM    929  H   VAL    11       4.389   2.525  -4.745  1.00  0.00           H  
+ATOM    930  HA  VAL    11       1.557   2.551  -4.474  1.00  0.00           H  
+ATOM    931  HB  VAL    11       3.326   0.993  -6.385  1.00  0.00           H  
+ATOM    932 1HG1 VAL    11       0.342   1.515  -6.384  1.00  0.00           H  
+ATOM    933 2HG1 VAL    11       1.233   0.963  -7.802  1.00  0.00           H  
+ATOM    934 3HG1 VAL    11       1.151  -0.052  -6.361  1.00  0.00           H  
+ATOM    935 1HG2 VAL    11       2.704   3.762  -6.127  1.00  0.00           H  
+ATOM    936 2HG2 VAL    11       3.599   2.972  -7.426  1.00  0.00           H  
+ATOM    937 3HG2 VAL    11       1.850   3.161  -7.549  1.00  0.00           H  
+ATOM    938  N   LEU    12       3.006  -0.314  -3.759  1.00  0.00           N  
+ATOM    939  CA  LEU    12       2.740  -1.618  -3.087  1.00  0.00           C  
+ATOM    940  C   LEU    12       2.177  -1.355  -1.687  1.00  0.00           C  
+ATOM    941  O   LEU    12       1.213  -1.966  -1.267  1.00  0.00           O  
+ATOM    942  CB  LEU    12       4.044  -2.413  -2.973  1.00  0.00           C  
+ATOM    943  CG  LEU    12       4.195  -3.326  -4.191  1.00  0.00           C  
+ATOM    944  CD1 LEU    12       5.679  -3.617  -4.426  1.00  0.00           C  
+ATOM    945  CD2 LEU    12       3.453  -4.640  -3.937  1.00  0.00           C  
+ATOM    946  H   LEU    12       3.921  -0.073  -4.023  1.00  0.00           H  
+ATOM    947  HA  LEU    12       2.022  -2.182  -3.664  1.00  0.00           H  
+ATOM    948 1HB  LEU    12       4.879  -1.730  -2.928  1.00  0.00           H  
+ATOM    949 2HB  LEU    12       4.022  -3.014  -2.076  1.00  0.00           H  
+ATOM    950  HG  LEU    12       3.781  -2.837  -5.061  1.00  0.00           H  
+ATOM    951 1HD1 LEU    12       6.160  -3.822  -3.482  1.00  0.00           H  
+ATOM    952 2HD1 LEU    12       5.779  -4.475  -5.076  1.00  0.00           H  
+ATOM    953 3HD1 LEU    12       6.144  -2.759  -4.889  1.00  0.00           H  
+ATOM    954 1HD2 LEU    12       2.996  -4.613  -2.959  1.00  0.00           H  
+ATOM    955 2HD2 LEU    12       2.689  -4.773  -4.688  1.00  0.00           H  
+ATOM    956 3HD2 LEU    12       4.150  -5.463  -3.986  1.00  0.00           H  
+ATOM    957  N   LEU    13       2.772  -0.446  -0.965  1.00  0.00           N  
+ATOM    958  CA  LEU    13       2.272  -0.136   0.408  1.00  0.00           C  
+ATOM    959  C   LEU    13       0.883   0.495   0.305  1.00  0.00           C  
+ATOM    960  O   LEU    13       0.085   0.415   1.217  1.00  0.00           O  
+ATOM    961  CB  LEU    13       3.220   0.847   1.109  1.00  0.00           C  
+ATOM    962  CG  LEU    13       4.639   0.708   0.550  1.00  0.00           C  
+ATOM    963  CD1 LEU    13       5.625   1.405   1.484  1.00  0.00           C  
+ATOM    964  CD2 LEU    13       5.003  -0.775   0.435  1.00  0.00           C  
+ATOM    965  H   LEU    13       3.544   0.035  -1.327  1.00  0.00           H  
+ATOM    966  HA  LEU    13       2.211  -1.049   0.981  1.00  0.00           H  
+ATOM    967 1HB  LEU    13       2.870   1.857   0.950  1.00  0.00           H  
+ATOM    968 2HB  LEU    13       3.233   0.635   2.168  1.00  0.00           H  
+ATOM    969  HG  LEU    13       4.686   1.170  -0.427  1.00  0.00           H  
+ATOM    970 1HD1 LEU    13       5.215   1.436   2.482  1.00  0.00           H  
+ATOM    971 2HD1 LEU    13       6.557   0.862   1.493  1.00  0.00           H  
+ATOM    972 3HD1 LEU    13       5.798   2.413   1.135  1.00  0.00           H  
+ATOM    973 1HD2 LEU    13       4.226  -1.372   0.890  1.00  0.00           H  
+ATOM    974 2HD2 LEU    13       5.096  -1.044  -0.609  1.00  0.00           H  
+ATOM    975 3HD2 LEU    13       5.939  -0.957   0.938  1.00  0.00           H  
+ATOM    976  N   ALA    14       0.593   1.122  -0.801  1.00  0.00           N  
+ATOM    977  CA  ALA    14      -0.742   1.761  -0.971  1.00  0.00           C  
+ATOM    978  C   ALA    14      -1.838   0.753  -0.614  1.00  0.00           C  
+ATOM    979  O   ALA    14      -2.476   0.851   0.418  1.00  0.00           O  
+ATOM    980  CB  ALA    14      -0.908   2.206  -2.427  1.00  0.00           C  
+ATOM    981  H   ALA    14       1.256   1.172  -1.522  1.00  0.00           H  
+ATOM    982  HA  ALA    14      -0.816   2.619  -0.320  1.00  0.00           H  
+ATOM    983 1HB  ALA    14      -0.796   1.353  -3.080  1.00  0.00           H  
+ATOM    984 2HB  ALA    14      -1.888   2.637  -2.563  1.00  0.00           H  
+ATOM    985 3HB  ALA    14      -0.155   2.943  -2.665  1.00  0.00           H  
+ATOM    986  N   GLN    15      -2.061  -0.218  -1.455  1.00  0.00           N  
+ATOM    987  CA  GLN    15      -3.112  -1.231  -1.159  1.00  0.00           C  
+ATOM    988  C   GLN    15      -2.794  -1.921   0.169  1.00  0.00           C  
+ATOM    989  O   GLN    15      -3.677  -2.255   0.933  1.00  0.00           O  
+ATOM    990  CB  GLN    15      -3.148  -2.274  -2.280  1.00  0.00           C  
+ATOM    991  CG  GLN    15      -1.904  -3.162  -2.191  1.00  0.00           C  
+ATOM    992  CD  GLN    15      -1.905  -4.155  -3.355  1.00  0.00           C  
+ATOM    993  OE1 GLN    15      -2.341  -5.279  -3.209  1.00  0.00           O  
+ATOM    994  NE2 GLN    15      -1.430  -3.784  -4.512  1.00  0.00           N  
+ATOM    995  H   GLN    15      -1.536  -0.283  -2.279  1.00  0.00           H  
+ATOM    996  HA  GLN    15      -4.074  -0.743  -1.091  1.00  0.00           H  
+ATOM    997 1HB  GLN    15      -4.034  -2.882  -2.177  1.00  0.00           H  
+ATOM    998 2HB  GLN    15      -3.163  -1.773  -3.237  1.00  0.00           H  
+ATOM    999 1HG  GLN    15      -1.018  -2.546  -2.242  1.00  0.00           H  
+ATOM   1000 2HG  GLN    15      -1.914  -3.703  -1.258  1.00  0.00           H  
+ATOM   1001 1HE2 GLN    15      -1.426  -4.412  -5.264  1.00  0.00           H  
+ATOM   1002 2HE2 GLN    15      -1.079  -2.876  -4.629  1.00  0.00           H  
+ATOM   1003  N   ALA    16      -1.538  -2.141   0.447  1.00  0.00           N  
+ATOM   1004  CA  ALA    16      -1.164  -2.812   1.724  1.00  0.00           C  
+ATOM   1005  C   ALA    16      -1.896  -2.143   2.891  1.00  0.00           C  
+ATOM   1006  O   ALA    16      -2.329  -2.797   3.818  1.00  0.00           O  
+ATOM   1007  CB  ALA    16       0.348  -2.700   1.932  1.00  0.00           C  
+ATOM   1008  H   ALA    16      -0.840  -1.865  -0.185  1.00  0.00           H  
+ATOM   1009  HA  ALA    16      -1.443  -3.853   1.675  1.00  0.00           H  
+ATOM   1010 1HB  ALA    16       0.839  -2.616   0.973  1.00  0.00           H  
+ATOM   1011 2HB  ALA    16       0.566  -1.825   2.525  1.00  0.00           H  
+ATOM   1012 3HB  ALA    16       0.707  -3.580   2.444  1.00  0.00           H  
+ATOM   1013  N   VAL    17      -2.039  -0.846   2.854  1.00  0.00           N  
+ATOM   1014  CA  VAL    17      -2.743  -0.143   3.964  1.00  0.00           C  
+ATOM   1015  C   VAL    17      -4.257  -0.261   3.771  1.00  0.00           C  
+ATOM   1016  O   VAL    17      -5.005  -0.379   4.719  1.00  0.00           O  
+ATOM   1017  CB  VAL    17      -2.343   1.334   3.968  1.00  0.00           C  
+ATOM   1018  CG1 VAL    17      -3.279   2.116   4.893  1.00  0.00           C  
+ATOM   1019  CG2 VAL    17      -0.904   1.466   4.471  1.00  0.00           C  
+ATOM   1020  H   VAL    17      -1.682  -0.336   2.097  1.00  0.00           H  
+ATOM   1021  HA  VAL    17      -2.465  -0.592   4.907  1.00  0.00           H  
+ATOM   1022  HB  VAL    17      -2.415   1.729   2.965  1.00  0.00           H  
+ATOM   1023 1HG1 VAL    17      -4.298   2.003   4.552  1.00  0.00           H  
+ATOM   1024 2HG1 VAL    17      -3.191   1.734   5.899  1.00  0.00           H  
+ATOM   1025 3HG1 VAL    17      -3.008   3.161   4.879  1.00  0.00           H  
+ATOM   1026 1HG2 VAL    17      -0.699   0.682   5.184  1.00  0.00           H  
+ATOM   1027 2HG2 VAL    17      -0.222   1.380   3.636  1.00  0.00           H  
+ATOM   1028 3HG2 VAL    17      -0.774   2.427   4.944  1.00  0.00           H  
+ATOM   1029  N   PHE    18      -4.712  -0.226   2.550  1.00  0.00           N  
+ATOM   1030  CA  PHE    18      -6.178  -0.333   2.291  1.00  0.00           C  
+ATOM   1031  C   PHE    18      -6.755  -1.518   3.071  1.00  0.00           C  
+ATOM   1032  O   PHE    18      -7.667  -1.370   3.865  1.00  0.00           O  
+ATOM   1033  CB  PHE    18      -6.415  -0.539   0.795  1.00  0.00           C  
+ATOM   1034  CG  PHE    18      -7.544   0.350   0.337  1.00  0.00           C  
+ATOM   1035  CD1 PHE    18      -8.577   0.685   1.223  1.00  0.00           C  
+ATOM   1036  CD2 PHE    18      -7.560   0.842  -0.974  1.00  0.00           C  
+ATOM   1037  CE1 PHE    18      -9.624   1.510   0.796  1.00  0.00           C  
+ATOM   1038  CE2 PHE    18      -8.608   1.667  -1.399  1.00  0.00           C  
+ATOM   1039  CZ  PHE    18      -9.639   2.001  -0.514  1.00  0.00           C  
+ATOM   1040  H   PHE    18      -4.088  -0.126   1.800  1.00  0.00           H  
+ATOM   1041  HA  PHE    18      -6.666   0.577   2.610  1.00  0.00           H  
+ATOM   1042 1HB  PHE    18      -5.516  -0.289   0.251  1.00  0.00           H  
+ATOM   1043 2HB  PHE    18      -6.674  -1.571   0.609  1.00  0.00           H  
+ATOM   1044  HD1 PHE    18      -8.563   0.306   2.234  1.00  0.00           H  
+ATOM   1045  HD2 PHE    18      -6.763   0.584  -1.655  1.00  0.00           H  
+ATOM   1046  HE1 PHE    18     -10.419   1.768   1.479  1.00  0.00           H  
+ATOM   1047  HE2 PHE    18      -8.619   2.046  -2.410  1.00  0.00           H  
+ATOM   1048  HZ  PHE    18     -10.447   2.637  -0.843  1.00  0.00           H  
+ATOM   1049  N   LEU    19      -6.232  -2.695   2.857  1.00  0.00           N  
+ATOM   1050  CA  LEU    19      -6.753  -3.879   3.596  1.00  0.00           C  
+ATOM   1051  C   LEU    19      -6.557  -3.647   5.093  1.00  0.00           C  
+ATOM   1052  O   LEU    19      -7.313  -4.128   5.914  1.00  0.00           O  
+ATOM   1053  CB  LEU    19      -5.988  -5.134   3.166  1.00  0.00           C  
+ATOM   1054  CG  LEU    19      -4.560  -5.076   3.713  1.00  0.00           C  
+ATOM   1055  CD1 LEU    19      -4.482  -5.852   5.030  1.00  0.00           C  
+ATOM   1056  CD2 LEU    19      -3.601  -5.703   2.698  1.00  0.00           C  
+ATOM   1057  H   LEU    19      -5.497  -2.799   2.218  1.00  0.00           H  
+ATOM   1058  HA  LEU    19      -7.805  -4.005   3.383  1.00  0.00           H  
+ATOM   1059 1HB  LEU    19      -6.487  -6.010   3.553  1.00  0.00           H  
+ATOM   1060 2HB  LEU    19      -5.955  -5.186   2.088  1.00  0.00           H  
+ATOM   1061  HG  LEU    19      -4.281  -4.046   3.885  1.00  0.00           H  
+ATOM   1062 1HD1 LEU    19      -5.188  -5.437   5.735  1.00  0.00           H  
+ATOM   1063 2HD1 LEU    19      -4.718  -6.891   4.852  1.00  0.00           H  
+ATOM   1064 3HD1 LEU    19      -3.483  -5.774   5.434  1.00  0.00           H  
+ATOM   1065 1HD2 LEU    19      -3.733  -5.228   1.737  1.00  0.00           H  
+ATOM   1066 2HD2 LEU    19      -2.583  -5.565   3.031  1.00  0.00           H  
+ATOM   1067 3HD2 LEU    19      -3.811  -6.758   2.609  1.00  0.00           H  
+ATOM   1068  N   LEU    20      -5.548  -2.901   5.449  1.00  0.00           N  
+ATOM   1069  CA  LEU    20      -5.293  -2.620   6.888  1.00  0.00           C  
+ATOM   1070  C   LEU    20      -6.445  -1.787   7.449  1.00  0.00           C  
+ATOM   1071  O   LEU    20      -6.783  -1.876   8.613  1.00  0.00           O  
+ATOM   1072  CB  LEU    20      -3.984  -1.843   7.025  1.00  0.00           C  
+ATOM   1073  CG  LEU    20      -3.106  -2.510   8.083  1.00  0.00           C  
+ATOM   1074  CD1 LEU    20      -3.931  -2.757   9.347  1.00  0.00           C  
+ATOM   1075  CD2 LEU    20      -2.589  -3.846   7.546  1.00  0.00           C  
+ATOM   1076  H   LEU    20      -4.958  -2.519   4.766  1.00  0.00           H  
+ATOM   1077  HA  LEU    20      -5.221  -3.551   7.432  1.00  0.00           H  
+ATOM   1078 1HB  LEU    20      -3.468  -1.841   6.074  1.00  0.00           H  
+ATOM   1079 2HB  LEU    20      -4.197  -0.826   7.321  1.00  0.00           H  
+ATOM   1080  HG  LEU    20      -2.271  -1.867   8.319  1.00  0.00           H  
+ATOM   1081 1HD1 LEU    20      -4.629  -1.946   9.485  1.00  0.00           H  
+ATOM   1082 2HD1 LEU    20      -4.474  -3.686   9.247  1.00  0.00           H  
+ATOM   1083 3HD1 LEU    20      -3.272  -2.816  10.201  1.00  0.00           H  
+ATOM   1084 1HD2 LEU    20      -3.101  -4.086   6.624  1.00  0.00           H  
+ATOM   1085 2HD2 LEU    20      -1.527  -3.773   7.359  1.00  0.00           H  
+ATOM   1086 3HD2 LEU    20      -2.773  -4.624   8.272  1.00  0.00           H  
+ATOM   1087  N   LEU    21      -7.056  -0.981   6.624  1.00  0.00           N  
+ATOM   1088  CA  LEU    21      -8.192  -0.144   7.099  1.00  0.00           C  
+ATOM   1089  C   LEU    21      -9.399  -1.044   7.353  1.00  0.00           C  
+ATOM   1090  O   LEU    21     -10.008  -1.005   8.403  1.00  0.00           O  
+ATOM   1091  CB  LEU    21      -8.550   0.894   6.032  1.00  0.00           C  
+ATOM   1092  CG  LEU    21      -7.282   1.354   5.308  1.00  0.00           C  
+ATOM   1093  CD1 LEU    21      -7.581   2.633   4.524  1.00  0.00           C  
+ATOM   1094  CD2 LEU    21      -6.176   1.631   6.331  1.00  0.00           C  
+ATOM   1095  H   LEU    21      -6.769  -0.930   5.691  1.00  0.00           H  
+ATOM   1096  HA  LEU    21      -7.915   0.358   8.015  1.00  0.00           H  
+ATOM   1097 1HB  LEU    21      -9.231   0.453   5.318  1.00  0.00           H  
+ATOM   1098 2HB  LEU    21      -9.022   1.744   6.501  1.00  0.00           H  
+ATOM   1099  HG  LEU    21      -6.959   0.582   4.624  1.00  0.00           H  
+ATOM   1100 1HD1 LEU    21      -8.650   2.775   4.463  1.00  0.00           H  
+ATOM   1101 2HD1 LEU    21      -7.135   3.478   5.029  1.00  0.00           H  
+ATOM   1102 3HD1 LEU    21      -7.170   2.550   3.528  1.00  0.00           H  
+ATOM   1103 1HD2 LEU    21      -6.621   1.866   7.286  1.00  0.00           H  
+ATOM   1104 2HD2 LEU    21      -5.551   0.755   6.430  1.00  0.00           H  
+ATOM   1105 3HD2 LEU    21      -5.577   2.465   5.997  1.00  0.00           H  
+ATOM   1106  N   THR    22      -9.749  -1.855   6.396  1.00  0.00           N  
+ATOM   1107  CA  THR    22     -10.918  -2.759   6.579  1.00  0.00           C  
+ATOM   1108  C   THR    22     -10.541  -3.899   7.531  1.00  0.00           C  
+ATOM   1109  O   THR    22     -11.369  -4.707   7.902  1.00  0.00           O  
+ATOM   1110  CB  THR    22     -11.331  -3.337   5.224  1.00  0.00           C  
+ATOM   1111  OG1 THR    22     -10.167  -3.658   4.473  1.00  0.00           O  
+ATOM   1112  CG2 THR    22     -12.167  -2.306   4.462  1.00  0.00           C  
+ATOM   1113  H   THR    22      -9.242  -1.870   5.554  1.00  0.00           H  
+ATOM   1114  HA  THR    22     -11.742  -2.200   6.997  1.00  0.00           H  
+ATOM   1115  HB  THR    22     -11.918  -4.229   5.376  1.00  0.00           H  
+ATOM   1116  HG1 THR    22     -10.010  -2.946   3.847  1.00  0.00           H  
+ATOM   1117 1HG2 THR    22     -12.875  -1.848   5.135  1.00  0.00           H  
+ATOM   1118 2HG2 THR    22     -11.517  -1.547   4.052  1.00  0.00           H  
+ATOM   1119 3HG2 THR    22     -12.698  -2.796   3.659  1.00  0.00           H  
+ATOM   1120  N   SER    23      -9.300  -3.971   7.929  1.00  0.00           N  
+ATOM   1121  CA  SER    23      -8.877  -5.061   8.854  1.00  0.00           C  
+ATOM   1122  C   SER    23      -9.161  -4.650  10.301  1.00  0.00           C  
+ATOM   1123  O   SER    23      -9.931  -5.283  10.997  1.00  0.00           O  
+ATOM   1124  CB  SER    23      -7.380  -5.319   8.687  1.00  0.00           C  
+ATOM   1125  OG  SER    23      -7.180  -6.656   8.248  1.00  0.00           O  
+ATOM   1126  H   SER    23      -8.645  -3.310   7.618  1.00  0.00           H  
+ATOM   1127  HA  SER    23      -9.424  -5.961   8.623  1.00  0.00           H  
+ATOM   1128 1HB  SER    23      -6.973  -4.641   7.954  1.00  0.00           H  
+ATOM   1129 2HB  SER    23      -6.880  -5.162   9.636  1.00  0.00           H  
+ATOM   1130  HG  SER    23      -7.461  -6.712   7.332  1.00  0.00           H  
+ATOM   1131  N   GLN    24      -8.543  -3.597  10.763  1.00  0.00           N  
+ATOM   1132  CA  GLN    24      -8.771  -3.150  12.161  1.00  0.00           C  
+ATOM   1133  C   GLN    24     -10.150  -2.482  12.259  1.00  0.00           C  
+ATOM   1134  O   GLN    24     -10.833  -2.597  13.258  1.00  0.00           O  
+ATOM   1135  CB  GLN    24      -7.650  -2.166  12.551  1.00  0.00           C  
+ATOM   1136  CG  GLN    24      -8.187  -1.066  13.469  1.00  0.00           C  
+ATOM   1137  CD  GLN    24      -7.017  -0.330  14.126  1.00  0.00           C  
+ATOM   1138  OE1 GLN    24      -5.872  -0.691  13.934  1.00  0.00           O  
+ATOM   1139  NE2 GLN    24      -7.257   0.695  14.896  1.00  0.00           N  
+ATOM   1140  H   GLN    24      -7.925  -3.100  10.190  1.00  0.00           H  
+ATOM   1141  HA  GLN    24      -8.742  -4.006  12.820  1.00  0.00           H  
+ATOM   1142 1HB  GLN    24      -6.868  -2.705  13.064  1.00  0.00           H  
+ATOM   1143 2HB  GLN    24      -7.245  -1.717  11.657  1.00  0.00           H  
+ATOM   1144 1HG  GLN    24      -8.771  -0.369  12.885  1.00  0.00           H  
+ATOM   1145 2HG  GLN    24      -8.808  -1.506  14.232  1.00  0.00           H  
+ATOM   1146 1HE2 GLN    24      -6.515   1.175  15.319  1.00  0.00           H  
+ATOM   1147 2HE2 GLN    24      -8.180   0.986  15.051  1.00  0.00           H  
+ATOM   1148  N   ARG    25     -10.564  -1.791  11.229  1.00  0.00           N  
+ATOM   1149  CA  ARG    25     -11.894  -1.122  11.261  1.00  0.00           C  
+ATOM   1150  C   ARG    25     -12.114  -0.478  12.631  1.00  0.00           C  
+ATOM   1151  O   ARG    25     -12.959  -0.966  13.364  1.00  0.00           O  
+ATOM   1152  CB  ARG    25     -12.982  -2.162  10.999  1.00  0.00           C  
+ATOM   1153  CG  ARG    25     -14.357  -1.552  11.281  1.00  0.00           C  
+ATOM   1154  CD  ARG    25     -15.262  -1.753  10.066  1.00  0.00           C  
+ATOM   1155  NE  ARG    25     -16.689  -1.663  10.488  1.00  0.00           N  
+ATOM   1156  CZ  ARG    25     -17.624  -1.476   9.595  1.00  0.00           C  
+ATOM   1157  NH1 ARG    25     -17.575  -0.438   8.804  1.00  0.00           N  
+ATOM   1158  NH2 ARG    25     -18.608  -2.328   9.492  1.00  0.00           N  
+ATOM   1159  OXT ARG    25     -11.433   0.490  12.924  1.00  0.00           O  
+ATOM   1160  H   ARG    25     -10.002  -1.718  10.433  1.00  0.00           H  
+ATOM   1161  HA  ARG    25     -11.933  -0.361  10.496  1.00  0.00           H  
+ATOM   1162 1HB  ARG    25     -12.930  -2.480   9.967  1.00  0.00           H  
+ATOM   1163 2HB  ARG    25     -12.826  -3.012  11.646  1.00  0.00           H  
+ATOM   1164 1HG  ARG    25     -14.794  -2.036  12.142  1.00  0.00           H  
+ATOM   1165 2HG  ARG    25     -14.249  -0.496  11.478  1.00  0.00           H  
+ATOM   1166 1HD  ARG    25     -15.056  -0.987   9.333  1.00  0.00           H  
+ATOM   1167 2HD  ARG    25     -15.075  -2.725   9.633  1.00  0.00           H  
+ATOM   1168  HE  ARG    25     -16.925  -1.745  11.435  1.00  0.00           H  
+ATOM   1169 1HH1 ARG    25     -16.822   0.215   8.882  1.00  0.00           H  
+ATOM   1170 2HH1 ARG    25     -18.291  -0.296   8.121  1.00  0.00           H  
+ATOM   1171 1HH2 ARG    25     -18.647  -3.123  10.098  1.00  0.00           H  
+ATOM   1172 2HH2 ARG    25     -19.324  -2.185   8.810  1.00  0.00           H  
+TER    1173      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        4                                                                  
+ATOM   1174  N   GLY     1      14.467  -2.646  -9.742  1.00  0.00           N  
+ATOM   1175  CA  GLY     1      13.304  -1.738 -10.143  1.00  0.00           C  
+ATOM   1176  C   GLY     1      13.580  -0.331 -10.550  1.00  0.00           C  
+ATOM   1177  O   GLY     1      12.896   0.228 -11.385  1.00  0.00           O  
+ATOM   1178 1H   GLY     1      15.345  -2.297 -10.177  1.00  0.00           H  
+ATOM   1179 2H   GLY     1      14.568  -2.645  -8.707  1.00  0.00           H  
+ATOM   1180 3H   GLY     1      14.279  -3.615 -10.070  1.00  0.00           H  
+ATOM   1181 1HA  GLY     1      13.073  -1.853 -11.083  1.00  0.00           H  
+ATOM   1182 2HA  GLY     1      12.419  -1.980  -9.421  1.00  0.00           H  
+ATOM   1183  N   SER     2      14.581   0.279  -9.975  1.00  0.00           N  
+ATOM   1184  CA  SER     2      14.904   1.687 -10.341  1.00  0.00           C  
+ATOM   1185  C   SER     2      13.689   2.577 -10.073  1.00  0.00           C  
+ATOM   1186  O   SER     2      12.914   2.332  -9.170  1.00  0.00           O  
+ATOM   1187  CB  SER     2      15.271   1.757 -11.825  1.00  0.00           C  
+ATOM   1188  OG  SER     2      15.981   0.581 -12.188  1.00  0.00           O  
+ATOM   1189  H   SER     2      15.120  -0.190  -9.304  1.00  0.00           H  
+ATOM   1190  HA  SER     2      15.739   2.031  -9.748  1.00  0.00           H  
+ATOM   1191 1HB  SER     2      14.373   1.825 -12.416  1.00  0.00           H  
+ATOM   1192 2HB  SER     2      15.883   2.630 -12.002  1.00  0.00           H  
+ATOM   1193  HG  SER     2      16.430   0.251 -11.406  1.00  0.00           H  
+ATOM   1194  N   GLU     3      13.514   3.610 -10.852  1.00  0.00           N  
+ATOM   1195  CA  GLU     3      12.348   4.513 -10.642  1.00  0.00           C  
+ATOM   1196  C   GLU     3      11.071   3.680 -10.536  1.00  0.00           C  
+ATOM   1197  O   GLU     3      10.204   3.954  -9.731  1.00  0.00           O  
+ATOM   1198  CB  GLU     3      12.231   5.475 -11.825  1.00  0.00           C  
+ATOM   1199  CG  GLU     3      12.746   6.856 -11.414  1.00  0.00           C  
+ATOM   1200  CD  GLU     3      11.719   7.536 -10.507  1.00  0.00           C  
+ATOM   1201  OE1 GLU     3      10.622   7.794 -10.976  1.00  0.00           O  
+ATOM   1202  OE2 GLU     3      12.045   7.786  -9.359  1.00  0.00           O  
+ATOM   1203  H   GLU     3      14.150   3.792 -11.575  1.00  0.00           H  
+ATOM   1204  HA  GLU     3      12.488   5.077  -9.732  1.00  0.00           H  
+ATOM   1205 1HB  GLU     3      12.818   5.102 -12.652  1.00  0.00           H  
+ATOM   1206 2HB  GLU     3      11.196   5.554 -12.124  1.00  0.00           H  
+ATOM   1207 1HG  GLU     3      13.681   6.747 -10.882  1.00  0.00           H  
+ATOM   1208 2HG  GLU     3      12.901   7.460 -12.295  1.00  0.00           H  
+ATOM   1209  N   LYS     4      10.948   2.662 -11.344  1.00  0.00           N  
+ATOM   1210  CA  LYS     4       9.728   1.810 -11.286  1.00  0.00           C  
+ATOM   1211  C   LYS     4       9.563   1.260  -9.870  1.00  0.00           C  
+ATOM   1212  O   LYS     4       8.488   1.289  -9.303  1.00  0.00           O  
+ATOM   1213  CB  LYS     4       9.870   0.650 -12.273  1.00  0.00           C  
+ATOM   1214  CG  LYS     4      10.383   1.180 -13.613  1.00  0.00           C  
+ATOM   1215  CD  LYS     4      11.718   0.513 -13.949  1.00  0.00           C  
+ATOM   1216  CE  LYS     4      11.752   0.153 -15.435  1.00  0.00           C  
+ATOM   1217  NZ  LYS     4      11.160  -1.200 -15.632  1.00  0.00           N  
+ATOM   1218  H   LYS     4      11.659   2.457 -11.986  1.00  0.00           H  
+ATOM   1219  HA  LYS     4       8.862   2.402 -11.547  1.00  0.00           H  
+ATOM   1220 1HB  LYS     4      10.569  -0.073 -11.880  1.00  0.00           H  
+ATOM   1221 2HB  LYS     4       8.908   0.179 -12.419  1.00  0.00           H  
+ATOM   1222 1HG  LYS     4       9.663   0.958 -14.388  1.00  0.00           H  
+ATOM   1223 2HG  LYS     4      10.524   2.249 -13.548  1.00  0.00           H  
+ATOM   1224 1HD  LYS     4      12.527   1.193 -13.724  1.00  0.00           H  
+ATOM   1225 2HD  LYS     4      11.829  -0.386 -13.361  1.00  0.00           H  
+ATOM   1226 1HE  LYS     4      11.181   0.880 -15.994  1.00  0.00           H  
+ATOM   1227 2HE  LYS     4      12.774   0.153 -15.782  1.00  0.00           H  
+ATOM   1228 1HZ  LYS     4      10.212  -1.229 -15.209  1.00  0.00           H  
+ATOM   1229 2HZ  LYS     4      11.092  -1.404 -16.650  1.00  0.00           H  
+ATOM   1230 3HZ  LYS     4      11.764  -1.913 -15.176  1.00  0.00           H  
+ATOM   1231  N   MET     5      10.621   0.761  -9.291  1.00  0.00           N  
+ATOM   1232  CA  MET     5      10.525   0.212  -7.909  1.00  0.00           C  
+ATOM   1233  C   MET     5      10.263   1.355  -6.928  1.00  0.00           C  
+ATOM   1234  O   MET     5       9.792   1.145  -5.828  1.00  0.00           O  
+ATOM   1235  CB  MET     5      11.838  -0.483  -7.541  1.00  0.00           C  
+ATOM   1236  CG  MET     5      11.564  -1.955  -7.224  1.00  0.00           C  
+ATOM   1237  SD  MET     5      10.796  -2.087  -5.591  1.00  0.00           S  
+ATOM   1238  CE  MET     5      12.316  -1.980  -4.616  1.00  0.00           C  
+ATOM   1239  H   MET     5      11.479   0.748  -9.764  1.00  0.00           H  
+ATOM   1240  HA  MET     5       9.713  -0.499  -7.860  1.00  0.00           H  
+ATOM   1241 1HB  MET     5      12.527  -0.414  -8.371  1.00  0.00           H  
+ATOM   1242 2HB  MET     5      12.267  -0.005  -6.674  1.00  0.00           H  
+ATOM   1243 1HG  MET     5      10.899  -2.366  -7.970  1.00  0.00           H  
+ATOM   1244 2HG  MET     5      12.494  -2.503  -7.230  1.00  0.00           H  
+ATOM   1245 1HE  MET     5      12.924  -1.170  -4.984  1.00  0.00           H  
+ATOM   1246 2HE  MET     5      12.066  -1.800  -3.579  1.00  0.00           H  
+ATOM   1247 3HE  MET     5      12.865  -2.908  -4.703  1.00  0.00           H  
+ATOM   1248  N   SER     6      10.564   2.564  -7.318  1.00  0.00           N  
+ATOM   1249  CA  SER     6      10.332   3.723  -6.410  1.00  0.00           C  
+ATOM   1250  C   SER     6       8.833   4.014  -6.334  1.00  0.00           C  
+ATOM   1251  O   SER     6       8.273   4.178  -5.267  1.00  0.00           O  
+ATOM   1252  CB  SER     6      11.061   4.950  -6.955  1.00  0.00           C  
+ATOM   1253  OG  SER     6      12.296   4.544  -7.532  1.00  0.00           O  
+ATOM   1254  H   SER     6      10.943   2.710  -8.210  1.00  0.00           H  
+ATOM   1255  HA  SER     6      10.704   3.489  -5.423  1.00  0.00           H  
+ATOM   1256 1HB  SER     6      10.457   5.425  -7.710  1.00  0.00           H  
+ATOM   1257 2HB  SER     6      11.241   5.648  -6.148  1.00  0.00           H  
+ATOM   1258  HG  SER     6      12.777   4.034  -6.876  1.00  0.00           H  
+ATOM   1259  N   THR     7       8.175   4.071  -7.459  1.00  0.00           N  
+ATOM   1260  CA  THR     7       6.712   4.342  -7.450  1.00  0.00           C  
+ATOM   1261  C   THR     7       5.981   3.067  -7.036  1.00  0.00           C  
+ATOM   1262  O   THR     7       4.829   3.093  -6.648  1.00  0.00           O  
+ATOM   1263  CB  THR     7       6.259   4.765  -8.850  1.00  0.00           C  
+ATOM   1264  OG1 THR     7       5.966   3.606  -9.622  1.00  0.00           O  
+ATOM   1265  CG2 THR     7       7.375   5.569  -9.522  1.00  0.00           C  
+ATOM   1266  H   THR     7       8.642   3.926  -8.308  1.00  0.00           H  
+ATOM   1267  HA  THR     7       6.494   5.130  -6.745  1.00  0.00           H  
+ATOM   1268  HB  THR     7       5.376   5.378  -8.772  1.00  0.00           H  
+ATOM   1269  HG1 THR     7       6.539   3.602 -10.393  1.00  0.00           H  
+ATOM   1270 1HG2 THR     7       8.116   5.842  -8.785  1.00  0.00           H  
+ATOM   1271 2HG2 THR     7       7.836   4.970 -10.292  1.00  0.00           H  
+ATOM   1272 3HG2 THR     7       6.959   6.464  -9.961  1.00  0.00           H  
+ATOM   1273  N   ALA     8       6.653   1.953  -7.107  1.00  0.00           N  
+ATOM   1274  CA  ALA     8       6.019   0.670  -6.710  1.00  0.00           C  
+ATOM   1275  C   ALA     8       6.130   0.518  -5.197  1.00  0.00           C  
+ATOM   1276  O   ALA     8       5.280  -0.067  -4.555  1.00  0.00           O  
+ATOM   1277  CB  ALA     8       6.743  -0.486  -7.399  1.00  0.00           C  
+ATOM   1278  H   ALA     8       7.583   1.961  -7.414  1.00  0.00           H  
+ATOM   1279  HA  ALA     8       4.978   0.674  -7.000  1.00  0.00           H  
+ATOM   1280 1HB  ALA     8       6.869  -0.258  -8.446  1.00  0.00           H  
+ATOM   1281 2HB  ALA     8       7.712  -0.625  -6.942  1.00  0.00           H  
+ATOM   1282 3HB  ALA     8       6.162  -1.389  -7.294  1.00  0.00           H  
+ATOM   1283  N   ILE     9       7.171   1.052  -4.619  1.00  0.00           N  
+ATOM   1284  CA  ILE     9       7.335   0.950  -3.145  1.00  0.00           C  
+ATOM   1285  C   ILE     9       6.387   1.942  -2.471  1.00  0.00           C  
+ATOM   1286  O   ILE     9       5.853   1.684  -1.412  1.00  0.00           O  
+ATOM   1287  CB  ILE     9       8.782   1.269  -2.760  1.00  0.00           C  
+ATOM   1288  CG1 ILE     9       9.166   2.652  -3.296  1.00  0.00           C  
+ATOM   1289  CG2 ILE     9       9.712   0.214  -3.361  1.00  0.00           C  
+ATOM   1290  CD1 ILE     9      10.368   3.183  -2.513  1.00  0.00           C  
+ATOM   1291  H   ILE     9       7.840   1.526  -5.155  1.00  0.00           H  
+ATOM   1292  HA  ILE     9       7.090  -0.053  -2.828  1.00  0.00           H  
+ATOM   1293  HB  ILE     9       8.877   1.259  -1.684  1.00  0.00           H  
+ATOM   1294 1HG1 ILE     9       9.422   2.575  -4.342  1.00  0.00           H  
+ATOM   1295 2HG1 ILE     9       8.336   3.330  -3.177  1.00  0.00           H  
+ATOM   1296 1HG2 ILE     9       9.277  -0.173  -4.271  1.00  0.00           H  
+ATOM   1297 2HG2 ILE     9      10.670   0.664  -3.583  1.00  0.00           H  
+ATOM   1298 3HG2 ILE     9       9.847  -0.591  -2.655  1.00  0.00           H  
+ATOM   1299 1HD1 ILE     9      10.458   2.645  -1.581  1.00  0.00           H  
+ATOM   1300 2HD1 ILE     9      11.267   3.045  -3.096  1.00  0.00           H  
+ATOM   1301 3HD1 ILE     9      10.229   4.235  -2.309  1.00  0.00           H  
+ATOM   1302  N   SER    10       6.166   3.074  -3.083  1.00  0.00           N  
+ATOM   1303  CA  SER    10       5.241   4.074  -2.479  1.00  0.00           C  
+ATOM   1304  C   SER    10       3.816   3.532  -2.557  1.00  0.00           C  
+ATOM   1305  O   SER    10       3.135   3.398  -1.559  1.00  0.00           O  
+ATOM   1306  CB  SER    10       5.330   5.399  -3.241  1.00  0.00           C  
+ATOM   1307  OG  SER    10       6.077   5.212  -4.435  1.00  0.00           O  
+ATOM   1308  H   SER    10       6.601   3.262  -3.942  1.00  0.00           H  
+ATOM   1309  HA  SER    10       5.507   4.230  -1.443  1.00  0.00           H  
+ATOM   1310 1HB  SER    10       4.340   5.738  -3.494  1.00  0.00           H  
+ATOM   1311 2HB  SER    10       5.811   6.140  -2.616  1.00  0.00           H  
+ATOM   1312  HG  SER    10       5.957   5.990  -4.985  1.00  0.00           H  
+ATOM   1313  N   VAL    11       3.359   3.204  -3.735  1.00  0.00           N  
+ATOM   1314  CA  VAL    11       1.983   2.654  -3.865  1.00  0.00           C  
+ATOM   1315  C   VAL    11       1.900   1.359  -3.057  1.00  0.00           C  
+ATOM   1316  O   VAL    11       0.846   0.962  -2.605  1.00  0.00           O  
+ATOM   1317  CB  VAL    11       1.677   2.367  -5.336  1.00  0.00           C  
+ATOM   1318  CG1 VAL    11       0.216   1.932  -5.476  1.00  0.00           C  
+ATOM   1319  CG2 VAL    11       1.912   3.632  -6.164  1.00  0.00           C  
+ATOM   1320  H   VAL    11       3.925   3.307  -4.530  1.00  0.00           H  
+ATOM   1321  HA  VAL    11       1.271   3.369  -3.478  1.00  0.00           H  
+ATOM   1322  HB  VAL    11       2.323   1.576  -5.690  1.00  0.00           H  
+ATOM   1323 1HG1 VAL    11      -0.420   2.644  -4.971  1.00  0.00           H  
+ATOM   1324 2HG1 VAL    11      -0.048   1.891  -6.522  1.00  0.00           H  
+ATOM   1325 3HG1 VAL    11       0.088   0.955  -5.034  1.00  0.00           H  
+ATOM   1326 1HG2 VAL    11       2.276   4.420  -5.521  1.00  0.00           H  
+ATOM   1327 2HG2 VAL    11       2.642   3.428  -6.933  1.00  0.00           H  
+ATOM   1328 3HG2 VAL    11       0.984   3.940  -6.621  1.00  0.00           H  
+ATOM   1329  N   LEU    12       3.015   0.702  -2.863  1.00  0.00           N  
+ATOM   1330  CA  LEU    12       3.011  -0.561  -2.074  1.00  0.00           C  
+ATOM   1331  C   LEU    12       2.527  -0.263  -0.654  1.00  0.00           C  
+ATOM   1332  O   LEU    12       1.698  -0.964  -0.109  1.00  0.00           O  
+ATOM   1333  CB  LEU    12       4.432  -1.131  -2.017  1.00  0.00           C  
+ATOM   1334  CG  LEU    12       4.597  -2.206  -3.091  1.00  0.00           C  
+ATOM   1335  CD1 LEU    12       6.087  -2.474  -3.321  1.00  0.00           C  
+ATOM   1336  CD2 LEU    12       3.916  -3.497  -2.629  1.00  0.00           C  
+ATOM   1337  H   LEU    12       3.855   1.046  -3.231  1.00  0.00           H  
+ATOM   1338  HA  LEU    12       2.351  -1.278  -2.540  1.00  0.00           H  
+ATOM   1339 1HB  LEU    12       5.144  -0.337  -2.190  1.00  0.00           H  
+ATOM   1340 2HB  LEU    12       4.606  -1.565  -1.045  1.00  0.00           H  
+ATOM   1341  HG  LEU    12       4.146  -1.866  -4.013  1.00  0.00           H  
+ATOM   1342 1HD1 LEU    12       6.637  -2.242  -2.421  1.00  0.00           H  
+ATOM   1343 2HD1 LEU    12       6.231  -3.514  -3.572  1.00  0.00           H  
+ATOM   1344 3HD1 LEU    12       6.443  -1.856  -4.130  1.00  0.00           H  
+ATOM   1345 1HD2 LEU    12       3.323  -3.295  -1.749  1.00  0.00           H  
+ATOM   1346 2HD2 LEU    12       3.276  -3.868  -3.416  1.00  0.00           H  
+ATOM   1347 3HD2 LEU    12       4.666  -4.237  -2.396  1.00  0.00           H  
+ATOM   1348  N   LEU    13       3.040   0.775  -0.051  1.00  0.00           N  
+ATOM   1349  CA  LEU    13       2.614   1.123   1.333  1.00  0.00           C  
+ATOM   1350  C   LEU    13       1.200   1.705   1.300  1.00  0.00           C  
+ATOM   1351  O   LEU    13       0.477   1.664   2.275  1.00  0.00           O  
+ATOM   1352  CB  LEU    13       3.579   2.164   1.909  1.00  0.00           C  
+ATOM   1353  CG  LEU    13       4.904   1.503   2.317  1.00  0.00           C  
+ATOM   1354  CD1 LEU    13       5.252   0.355   1.361  1.00  0.00           C  
+ATOM   1355  CD2 LEU    13       6.017   2.551   2.266  1.00  0.00           C  
+ATOM   1356  H   LEU    13       3.710   1.329  -0.510  1.00  0.00           H  
+ATOM   1357  HA  LEU    13       2.628   0.238   1.950  1.00  0.00           H  
+ATOM   1358 1HB  LEU    13       3.773   2.921   1.163  1.00  0.00           H  
+ATOM   1359 2HB  LEU    13       3.130   2.625   2.776  1.00  0.00           H  
+ATOM   1360  HG  LEU    13       4.818   1.119   3.323  1.00  0.00           H  
+ATOM   1361 1HD1 LEU    13       5.248   0.719   0.345  1.00  0.00           H  
+ATOM   1362 2HD1 LEU    13       6.231  -0.030   1.601  1.00  0.00           H  
+ATOM   1363 3HD1 LEU    13       4.520  -0.433   1.462  1.00  0.00           H  
+ATOM   1364 1HD2 LEU    13       5.613   3.516   2.538  1.00  0.00           H  
+ATOM   1365 2HD2 LEU    13       6.800   2.279   2.956  1.00  0.00           H  
+ATOM   1366 3HD2 LEU    13       6.418   2.600   1.264  1.00  0.00           H  
+ATOM   1367  N   ALA    14       0.803   2.250   0.185  1.00  0.00           N  
+ATOM   1368  CA  ALA    14      -0.563   2.840   0.087  1.00  0.00           C  
+ATOM   1369  C   ALA    14      -1.612   1.724   0.125  1.00  0.00           C  
+ATOM   1370  O   ALA    14      -2.407   1.638   1.038  1.00  0.00           O  
+ATOM   1371  CB  ALA    14      -0.691   3.613  -1.227  1.00  0.00           C  
+ATOM   1372  H   ALA    14       1.404   2.276  -0.589  1.00  0.00           H  
+ATOM   1373  HA  ALA    14      -0.725   3.512   0.916  1.00  0.00           H  
+ATOM   1374 1HB  ALA    14       0.195   3.458  -1.823  1.00  0.00           H  
+ATOM   1375 2HB  ALA    14      -1.556   3.260  -1.769  1.00  0.00           H  
+ATOM   1376 3HB  ALA    14      -0.804   4.666  -1.015  1.00  0.00           H  
+ATOM   1377  N   GLN    15      -1.621   0.874  -0.865  1.00  0.00           N  
+ATOM   1378  CA  GLN    15      -2.621  -0.233  -0.892  1.00  0.00           C  
+ATOM   1379  C   GLN    15      -2.471  -1.099   0.362  1.00  0.00           C  
+ATOM   1380  O   GLN    15      -3.441  -1.436   1.010  1.00  0.00           O  
+ATOM   1381  CB  GLN    15      -2.395  -1.092  -2.137  1.00  0.00           C  
+ATOM   1382  CG  GLN    15      -3.744  -1.427  -2.777  1.00  0.00           C  
+ATOM   1383  CD  GLN    15      -3.732  -1.001  -4.247  1.00  0.00           C  
+ATOM   1384  OE1 GLN    15      -3.853   0.168  -4.554  1.00  0.00           O  
+ATOM   1385  NE2 GLN    15      -3.589  -1.908  -5.174  1.00  0.00           N  
+ATOM   1386  H   GLN    15      -0.975   0.965  -1.594  1.00  0.00           H  
+ATOM   1387  HA  GLN    15      -3.616   0.185  -0.922  1.00  0.00           H  
+ATOM   1388 1HB  GLN    15      -1.786  -0.548  -2.845  1.00  0.00           H  
+ATOM   1389 2HB  GLN    15      -1.895  -2.007  -1.859  1.00  0.00           H  
+ATOM   1390 1HG  GLN    15      -3.920  -2.490  -2.710  1.00  0.00           H  
+ATOM   1391 2HG  GLN    15      -4.529  -0.896  -2.258  1.00  0.00           H  
+ATOM   1392 1HE2 GLN    15      -3.580  -1.647  -6.119  1.00  0.00           H  
+ATOM   1393 2HE2 GLN    15      -3.491  -2.851  -4.926  1.00  0.00           H  
+ATOM   1394  N   ALA    16      -1.268  -1.463   0.711  1.00  0.00           N  
+ATOM   1395  CA  ALA    16      -1.073  -2.306   1.924  1.00  0.00           C  
+ATOM   1396  C   ALA    16      -1.837  -1.686   3.096  1.00  0.00           C  
+ATOM   1397  O   ALA    16      -2.245  -2.366   4.016  1.00  0.00           O  
+ATOM   1398  CB  ALA    16       0.417  -2.382   2.265  1.00  0.00           C  
+ATOM   1399  H   ALA    16      -0.494  -1.183   0.177  1.00  0.00           H  
+ATOM   1400  HA  ALA    16      -1.451  -3.300   1.735  1.00  0.00           H  
+ATOM   1401 1HB  ALA    16       0.944  -1.587   1.757  1.00  0.00           H  
+ATOM   1402 2HB  ALA    16       0.548  -2.277   3.332  1.00  0.00           H  
+ATOM   1403 3HB  ALA    16       0.811  -3.336   1.947  1.00  0.00           H  
+ATOM   1404  N   VAL    17      -2.036  -0.396   3.066  1.00  0.00           N  
+ATOM   1405  CA  VAL    17      -2.776   0.273   4.174  1.00  0.00           C  
+ATOM   1406  C   VAL    17      -4.281   0.189   3.909  1.00  0.00           C  
+ATOM   1407  O   VAL    17      -5.075   0.069   4.820  1.00  0.00           O  
+ATOM   1408  CB  VAL    17      -2.354   1.741   4.257  1.00  0.00           C  
+ATOM   1409  CG1 VAL    17      -3.306   2.495   5.186  1.00  0.00           C  
+ATOM   1410  CG2 VAL    17      -0.929   1.832   4.807  1.00  0.00           C  
+ATOM   1411  H   VAL    17      -1.699   0.133   2.314  1.00  0.00           H  
+ATOM   1412  HA  VAL    17      -2.547  -0.221   5.107  1.00  0.00           H  
+ATOM   1413  HB  VAL    17      -2.389   2.181   3.270  1.00  0.00           H  
+ATOM   1414 1HG1 VAL    17      -4.018   1.803   5.610  1.00  0.00           H  
+ATOM   1415 2HG1 VAL    17      -2.740   2.960   5.980  1.00  0.00           H  
+ATOM   1416 3HG1 VAL    17      -3.830   3.254   4.625  1.00  0.00           H  
+ATOM   1417 1HG2 VAL    17      -0.295   1.130   4.284  1.00  0.00           H  
+ATOM   1418 2HG2 VAL    17      -0.551   2.833   4.665  1.00  0.00           H  
+ATOM   1419 3HG2 VAL    17      -0.934   1.595   5.861  1.00  0.00           H  
+ATOM   1420  N   PHE    18      -4.680   0.254   2.667  1.00  0.00           N  
+ATOM   1421  CA  PHE    18      -6.133   0.179   2.345  1.00  0.00           C  
+ATOM   1422  C   PHE    18      -6.677  -1.189   2.762  1.00  0.00           C  
+ATOM   1423  O   PHE    18      -7.545  -1.293   3.606  1.00  0.00           O  
+ATOM   1424  CB  PHE    18      -6.331   0.369   0.840  1.00  0.00           C  
+ATOM   1425  CG  PHE    18      -7.039   1.677   0.584  1.00  0.00           C  
+ATOM   1426  CD1 PHE    18      -8.436   1.741   0.651  1.00  0.00           C  
+ATOM   1427  CD2 PHE    18      -6.299   2.826   0.279  1.00  0.00           C  
+ATOM   1428  CE1 PHE    18      -9.093   2.954   0.413  1.00  0.00           C  
+ATOM   1429  CE2 PHE    18      -6.956   4.039   0.041  1.00  0.00           C  
+ATOM   1430  CZ  PHE    18      -8.353   4.103   0.106  1.00  0.00           C  
+ATOM   1431  H   PHE    18      -4.023   0.353   1.947  1.00  0.00           H  
+ATOM   1432  HA  PHE    18      -6.662   0.955   2.879  1.00  0.00           H  
+ATOM   1433 1HB  PHE    18      -5.367   0.378   0.350  1.00  0.00           H  
+ATOM   1434 2HB  PHE    18      -6.926  -0.444   0.450  1.00  0.00           H  
+ATOM   1435  HD1 PHE    18      -9.006   0.855   0.887  1.00  0.00           H  
+ATOM   1436  HD2 PHE    18      -5.221   2.776   0.227  1.00  0.00           H  
+ATOM   1437  HE1 PHE    18     -10.171   3.003   0.463  1.00  0.00           H  
+ATOM   1438  HE2 PHE    18      -6.386   4.924  -0.195  1.00  0.00           H  
+ATOM   1439  HZ  PHE    18      -8.860   5.038  -0.077  1.00  0.00           H  
+ATOM   1440  N   LEU    19      -6.169  -2.242   2.179  1.00  0.00           N  
+ATOM   1441  CA  LEU    19      -6.655  -3.601   2.548  1.00  0.00           C  
+ATOM   1442  C   LEU    19      -6.392  -3.840   4.035  1.00  0.00           C  
+ATOM   1443  O   LEU    19      -7.134  -4.531   4.705  1.00  0.00           O  
+ATOM   1444  CB  LEU    19      -5.910  -4.652   1.721  1.00  0.00           C  
+ATOM   1445  CG  LEU    19      -6.264  -4.485   0.243  1.00  0.00           C  
+ATOM   1446  CD1 LEU    19      -5.438  -5.464  -0.594  1.00  0.00           C  
+ATOM   1447  CD2 LEU    19      -7.753  -4.774   0.040  1.00  0.00           C  
+ATOM   1448  H   LEU    19      -5.468  -2.137   1.503  1.00  0.00           H  
+ATOM   1449  HA  LEU    19      -7.715  -3.671   2.353  1.00  0.00           H  
+ATOM   1450 1HB  LEU    19      -4.845  -4.526   1.854  1.00  0.00           H  
+ATOM   1451 2HB  LEU    19      -6.200  -5.639   2.048  1.00  0.00           H  
+ATOM   1452  HG  LEU    19      -6.045  -3.474  -0.068  1.00  0.00           H  
+ATOM   1453 1HD1 LEU    19      -5.284  -6.375  -0.033  1.00  0.00           H  
+ATOM   1454 2HD1 LEU    19      -5.964  -5.689  -1.510  1.00  0.00           H  
+ATOM   1455 3HD1 LEU    19      -4.482  -5.020  -0.827  1.00  0.00           H  
+ATOM   1456 1HD2 LEU    19      -8.183  -5.119   0.969  1.00  0.00           H  
+ATOM   1457 2HD2 LEU    19      -8.255  -3.871  -0.275  1.00  0.00           H  
+ATOM   1458 3HD2 LEU    19      -7.872  -5.535  -0.717  1.00  0.00           H  
+ATOM   1459  N   LEU    20      -5.342  -3.268   4.558  1.00  0.00           N  
+ATOM   1460  CA  LEU    20      -5.034  -3.456   6.003  1.00  0.00           C  
+ATOM   1461  C   LEU    20      -6.090  -2.730   6.837  1.00  0.00           C  
+ATOM   1462  O   LEU    20      -6.374  -3.101   7.959  1.00  0.00           O  
+ATOM   1463  CB  LEU    20      -3.651  -2.877   6.309  1.00  0.00           C  
+ATOM   1464  CG  LEU    20      -3.365  -3.001   7.805  1.00  0.00           C  
+ATOM   1465  CD1 LEU    20      -2.688  -4.343   8.086  1.00  0.00           C  
+ATOM   1466  CD2 LEU    20      -2.438  -1.863   8.240  1.00  0.00           C  
+ATOM   1467  H   LEU    20      -4.758  -2.711   4.001  1.00  0.00           H  
+ATOM   1468  HA  LEU    20      -5.046  -4.511   6.240  1.00  0.00           H  
+ATOM   1469 1HB  LEU    20      -2.901  -3.421   5.751  1.00  0.00           H  
+ATOM   1470 2HB  LEU    20      -3.626  -1.836   6.024  1.00  0.00           H  
+ATOM   1471  HG  LEU    20      -4.293  -2.942   8.356  1.00  0.00           H  
+ATOM   1472 1HD1 LEU    20      -2.759  -4.972   7.211  1.00  0.00           H  
+ATOM   1473 2HD1 LEU    20      -1.649  -4.179   8.329  1.00  0.00           H  
+ATOM   1474 3HD1 LEU    20      -3.180  -4.827   8.917  1.00  0.00           H  
+ATOM   1475 1HD2 LEU    20      -2.560  -1.024   7.572  1.00  0.00           H  
+ATOM   1476 2HD2 LEU    20      -2.685  -1.560   9.247  1.00  0.00           H  
+ATOM   1477 3HD2 LEU    20      -1.413  -2.202   8.208  1.00  0.00           H  
+ATOM   1478  N   LEU    21      -6.678  -1.701   6.292  1.00  0.00           N  
+ATOM   1479  CA  LEU    21      -7.720  -0.948   7.044  1.00  0.00           C  
+ATOM   1480  C   LEU    21      -8.984  -1.802   7.144  1.00  0.00           C  
+ATOM   1481  O   LEU    21      -9.522  -2.008   8.214  1.00  0.00           O  
+ATOM   1482  CB  LEU    21      -8.042   0.351   6.304  1.00  0.00           C  
+ATOM   1483  CG  LEU    21      -9.269   1.009   6.935  1.00  0.00           C  
+ATOM   1484  CD1 LEU    21      -9.061   1.136   8.445  1.00  0.00           C  
+ATOM   1485  CD2 LEU    21      -9.467   2.400   6.329  1.00  0.00           C  
+ATOM   1486  H   LEU    21      -6.435  -1.425   5.384  1.00  0.00           H  
+ATOM   1487  HA  LEU    21      -7.357  -0.721   8.034  1.00  0.00           H  
+ATOM   1488 1HB  LEU    21      -7.198   1.022   6.373  1.00  0.00           H  
+ATOM   1489 2HB  LEU    21      -8.245   0.134   5.267  1.00  0.00           H  
+ATOM   1490  HG  LEU    21     -10.142   0.402   6.742  1.00  0.00           H  
+ATOM   1491 1HD1 LEU    21      -8.034   0.907   8.687  1.00  0.00           H  
+ATOM   1492 2HD1 LEU    21      -9.288   2.145   8.756  1.00  0.00           H  
+ATOM   1493 3HD1 LEU    21      -9.714   0.445   8.958  1.00  0.00           H  
+ATOM   1494 1HD2 LEU    21      -8.934   2.462   5.392  1.00  0.00           H  
+ATOM   1495 2HD2 LEU    21     -10.519   2.572   6.157  1.00  0.00           H  
+ATOM   1496 3HD2 LEU    21      -9.087   3.147   7.010  1.00  0.00           H  
+ATOM   1497  N   THR    22      -9.461  -2.301   6.038  1.00  0.00           N  
+ATOM   1498  CA  THR    22     -10.687  -3.144   6.072  1.00  0.00           C  
+ATOM   1499  C   THR    22     -10.350  -4.495   6.703  1.00  0.00           C  
+ATOM   1500  O   THR    22     -11.222  -5.257   7.071  1.00  0.00           O  
+ATOM   1501  CB  THR    22     -11.200  -3.356   4.645  1.00  0.00           C  
+ATOM   1502  OG1 THR    22     -12.427  -4.072   4.688  1.00  0.00           O  
+ATOM   1503  CG2 THR    22     -10.172  -4.151   3.840  1.00  0.00           C  
+ATOM   1504  H   THR    22      -9.010  -2.125   5.186  1.00  0.00           H  
+ATOM   1505  HA  THR    22     -11.448  -2.651   6.660  1.00  0.00           H  
+ATOM   1506  HB  THR    22     -11.359  -2.398   4.174  1.00  0.00           H  
+ATOM   1507  HG1 THR    22     -12.861  -3.867   5.520  1.00  0.00           H  
+ATOM   1508 1HG2 THR    22      -9.512  -4.675   4.516  1.00  0.00           H  
+ATOM   1509 2HG2 THR    22     -10.681  -4.864   3.209  1.00  0.00           H  
+ATOM   1510 3HG2 THR    22      -9.595  -3.475   3.225  1.00  0.00           H  
+ATOM   1511  N   SER    23      -9.087  -4.797   6.835  1.00  0.00           N  
+ATOM   1512  CA  SER    23      -8.689  -6.096   7.446  1.00  0.00           C  
+ATOM   1513  C   SER    23      -8.860  -6.016   8.964  1.00  0.00           C  
+ATOM   1514  O   SER    23      -9.327  -6.943   9.595  1.00  0.00           O  
+ATOM   1515  CB  SER    23      -7.226  -6.387   7.113  1.00  0.00           C  
+ATOM   1516  OG  SER    23      -7.164  -7.235   5.974  1.00  0.00           O  
+ATOM   1517  H   SER    23      -8.400  -4.166   6.534  1.00  0.00           H  
+ATOM   1518  HA  SER    23      -9.313  -6.885   7.053  1.00  0.00           H  
+ATOM   1519 1HB  SER    23      -6.712  -5.465   6.897  1.00  0.00           H  
+ATOM   1520 2HB  SER    23      -6.754  -6.869   7.959  1.00  0.00           H  
+ATOM   1521  HG  SER    23      -7.961  -7.770   5.960  1.00  0.00           H  
+ATOM   1522  N   GLN    24      -8.490  -4.913   9.553  1.00  0.00           N  
+ATOM   1523  CA  GLN    24      -8.633  -4.767  11.024  1.00  0.00           C  
+ATOM   1524  C   GLN    24     -10.092  -4.424  11.348  1.00  0.00           C  
+ATOM   1525  O   GLN    24     -10.576  -4.679  12.432  1.00  0.00           O  
+ATOM   1526  CB  GLN    24      -7.688  -3.650  11.501  1.00  0.00           C  
+ATOM   1527  CG  GLN    24      -8.281  -2.923  12.711  1.00  0.00           C  
+ATOM   1528  CD  GLN    24      -7.154  -2.351  13.571  1.00  0.00           C  
+ATOM   1529  OE1 GLN    24      -6.051  -2.859  13.565  1.00  0.00           O  
+ATOM   1530  NE2 GLN    24      -7.386  -1.304  14.316  1.00  0.00           N  
+ATOM   1531  H   GLN    24      -8.119  -4.175   9.028  1.00  0.00           H  
+ATOM   1532  HA  GLN    24      -8.366  -5.697  11.506  1.00  0.00           H  
+ATOM   1533 1HB  GLN    24      -6.737  -4.081  11.777  1.00  0.00           H  
+ATOM   1534 2HB  GLN    24      -7.540  -2.942  10.699  1.00  0.00           H  
+ATOM   1535 1HG  GLN    24      -8.918  -2.120  12.367  1.00  0.00           H  
+ATOM   1536 2HG  GLN    24      -8.863  -3.618  13.295  1.00  0.00           H  
+ATOM   1537 1HE2 GLN    24      -6.671  -0.931  14.871  1.00  0.00           H  
+ATOM   1538 2HE2 GLN    24      -8.276  -0.894  14.320  1.00  0.00           H  
+ATOM   1539  N   ARG    25     -10.793  -3.847  10.412  1.00  0.00           N  
+ATOM   1540  CA  ARG    25     -12.216  -3.485  10.663  1.00  0.00           C  
+ATOM   1541  C   ARG    25     -13.118  -4.664  10.291  1.00  0.00           C  
+ATOM   1542  O   ARG    25     -13.190  -4.982   9.116  1.00  0.00           O  
+ATOM   1543  CB  ARG    25     -12.588  -2.270   9.812  1.00  0.00           C  
+ATOM   1544  CG  ARG    25     -12.498  -1.004  10.665  1.00  0.00           C  
+ATOM   1545  CD  ARG    25     -13.853  -0.298  10.674  1.00  0.00           C  
+ATOM   1546  NE  ARG    25     -14.432  -0.352  12.045  1.00  0.00           N  
+ATOM   1547  CZ  ARG    25     -14.884  -1.480  12.519  1.00  0.00           C  
+ATOM   1548  NH1 ARG    25     -15.709  -2.202  11.810  1.00  0.00           N  
+ATOM   1549  NH2 ARG    25     -14.510  -1.888  13.701  1.00  0.00           N  
+ATOM   1550  OXT ARG    25     -13.723  -5.227  11.189  1.00  0.00           O  
+ATOM   1551  H   ARG    25     -10.384  -3.648   9.543  1.00  0.00           H  
+ATOM   1552  HA  ARG    25     -12.347  -3.247  11.708  1.00  0.00           H  
+ATOM   1553 1HB  ARG    25     -11.906  -2.193   8.979  1.00  0.00           H  
+ATOM   1554 2HB  ARG    25     -13.597  -2.383   9.446  1.00  0.00           H  
+ATOM   1555 1HG  ARG    25     -12.222  -1.269  11.676  1.00  0.00           H  
+ATOM   1556 2HG  ARG    25     -11.752  -0.342  10.250  1.00  0.00           H  
+ATOM   1557 1HD  ARG    25     -13.726   0.732  10.376  1.00  0.00           H  
+ATOM   1558 2HD  ARG    25     -14.520  -0.791   9.981  1.00  0.00           H  
+ATOM   1559  HE  ARG    25     -14.475   0.462  12.591  1.00  0.00           H  
+ATOM   1560 1HH1 ARG    25     -15.996  -1.889  10.904  1.00  0.00           H  
+ATOM   1561 2HH1 ARG    25     -16.056  -3.067  12.173  1.00  0.00           H  
+ATOM   1562 1HH2 ARG    25     -13.877  -1.336  14.243  1.00  0.00           H  
+ATOM   1563 2HH2 ARG    25     -14.855  -2.754  14.064  1.00  0.00           H  
+TER    1564      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        5                                                                  
+ATOM   1565  N   GLY     1      16.815   2.235  -5.715  1.00  0.00           N  
+ATOM   1566  CA  GLY     1      15.942   1.905  -6.926  1.00  0.00           C  
+ATOM   1567  C   GLY     1      15.450   3.008  -7.802  1.00  0.00           C  
+ATOM   1568  O   GLY     1      14.431   2.887  -8.452  1.00  0.00           O  
+ATOM   1569 1H   GLY     1      16.464   3.103  -5.262  1.00  0.00           H  
+ATOM   1570 2H   GLY     1      16.776   1.448  -5.036  1.00  0.00           H  
+ATOM   1571 3H   GLY     1      17.796   2.377  -6.024  1.00  0.00           H  
+ATOM   1572 1HA  GLY     1      16.486   1.675  -7.701  1.00  0.00           H  
+ATOM   1573 2HA  GLY     1      15.222   1.045  -6.602  1.00  0.00           H  
+ATOM   1574  N   SER     2      16.155   4.106  -7.843  1.00  0.00           N  
+ATOM   1575  CA  SER     2      15.713   5.241  -8.701  1.00  0.00           C  
+ATOM   1576  C   SER     2      14.299   5.665  -8.298  1.00  0.00           C  
+ATOM   1577  O   SER     2      13.570   4.914  -7.680  1.00  0.00           O  
+ATOM   1578  CB  SER     2      15.716   4.805 -10.166  1.00  0.00           C  
+ATOM   1579  OG  SER     2      17.059   4.664 -10.610  1.00  0.00           O  
+ATOM   1580  H   SER     2      16.975   4.184  -7.311  1.00  0.00           H  
+ATOM   1581  HA  SER     2      16.388   6.075  -8.573  1.00  0.00           H  
+ATOM   1582 1HB  SER     2      15.211   3.858 -10.263  1.00  0.00           H  
+ATOM   1583 2HB  SER     2      15.204   5.547 -10.764  1.00  0.00           H  
+ATOM   1584  HG  SER     2      17.412   5.543 -10.770  1.00  0.00           H  
+ATOM   1585  N   GLU     3      13.905   6.860  -8.640  1.00  0.00           N  
+ATOM   1586  CA  GLU     3      12.538   7.327  -8.272  1.00  0.00           C  
+ATOM   1587  C   GLU     3      11.496   6.376  -8.864  1.00  0.00           C  
+ATOM   1588  O   GLU     3      10.351   6.363  -8.458  1.00  0.00           O  
+ATOM   1589  CB  GLU     3      12.314   8.739  -8.818  1.00  0.00           C  
+ATOM   1590  CG  GLU     3      12.771   9.767  -7.781  1.00  0.00           C  
+ATOM   1591  CD  GLU     3      12.388  11.173  -8.249  1.00  0.00           C  
+ATOM   1592  OE1 GLU     3      11.396  11.292  -8.949  1.00  0.00           O  
+ATOM   1593  OE2 GLU     3      13.093  12.104  -7.900  1.00  0.00           O  
+ATOM   1594  H   GLU     3      14.507   7.451  -9.138  1.00  0.00           H  
+ATOM   1595  HA  GLU     3      12.441   7.339  -7.196  1.00  0.00           H  
+ATOM   1596 1HB  GLU     3      12.883   8.866  -9.728  1.00  0.00           H  
+ATOM   1597 2HB  GLU     3      11.264   8.881  -9.026  1.00  0.00           H  
+ATOM   1598 1HG  GLU     3      12.295   9.559  -6.833  1.00  0.00           H  
+ATOM   1599 2HG  GLU     3      13.843   9.709  -7.664  1.00  0.00           H  
+ATOM   1600  N   LYS     4      11.883   5.575  -9.820  1.00  0.00           N  
+ATOM   1601  CA  LYS     4      10.913   4.624 -10.432  1.00  0.00           C  
+ATOM   1602  C   LYS     4      10.674   3.460  -9.469  1.00  0.00           C  
+ATOM   1603  O   LYS     4       9.597   3.297  -8.932  1.00  0.00           O  
+ATOM   1604  CB  LYS     4      11.480   4.090 -11.748  1.00  0.00           C  
+ATOM   1605  CG  LYS     4      11.261   5.124 -12.852  1.00  0.00           C  
+ATOM   1606  CD  LYS     4      12.353   6.192 -12.771  1.00  0.00           C  
+ATOM   1607  CE  LYS     4      12.958   6.409 -14.158  1.00  0.00           C  
+ATOM   1608  NZ  LYS     4      11.948   7.052 -15.044  1.00  0.00           N  
+ATOM   1609  H   LYS     4      12.812   5.598 -10.132  1.00  0.00           H  
+ATOM   1610  HA  LYS     4       9.980   5.132 -10.620  1.00  0.00           H  
+ATOM   1611 1HB  LYS     4      12.537   3.901 -11.634  1.00  0.00           H  
+ATOM   1612 2HB  LYS     4      10.976   3.171 -12.011  1.00  0.00           H  
+ATOM   1613 1HG  LYS     4      11.301   4.637 -13.816  1.00  0.00           H  
+ATOM   1614 2HG  LYS     4      10.295   5.589 -12.723  1.00  0.00           H  
+ATOM   1615 1HD  LYS     4      11.925   7.118 -12.414  1.00  0.00           H  
+ATOM   1616 2HD  LYS     4      13.124   5.866 -12.090  1.00  0.00           H  
+ATOM   1617 1HE  LYS     4      13.824   7.049 -14.076  1.00  0.00           H  
+ATOM   1618 2HE  LYS     4      13.251   5.458 -14.576  1.00  0.00           H  
+ATOM   1619 1HZ  LYS     4      10.995   6.748 -14.763  1.00  0.00           H  
+ATOM   1620 2HZ  LYS     4      12.019   8.085 -14.959  1.00  0.00           H  
+ATOM   1621 3HZ  LYS     4      12.126   6.772 -16.031  1.00  0.00           H  
+ATOM   1622  N   MET     5      11.673   2.651  -9.241  1.00  0.00           N  
+ATOM   1623  CA  MET     5      11.500   1.504  -8.307  1.00  0.00           C  
+ATOM   1624  C   MET     5      11.039   2.030  -6.948  1.00  0.00           C  
+ATOM   1625  O   MET     5      10.503   1.301  -6.137  1.00  0.00           O  
+ATOM   1626  CB  MET     5      12.833   0.770  -8.148  1.00  0.00           C  
+ATOM   1627  CG  MET     5      12.573  -0.694  -7.784  1.00  0.00           C  
+ATOM   1628  SD  MET     5      13.996  -1.354  -6.881  1.00  0.00           S  
+ATOM   1629  CE  MET     5      13.548  -3.103  -6.996  1.00  0.00           C  
+ATOM   1630  H   MET     5      12.536   2.802  -9.679  1.00  0.00           H  
+ATOM   1631  HA  MET     5      10.758   0.826  -8.703  1.00  0.00           H  
+ATOM   1632 1HB  MET     5      13.383   0.817  -9.078  1.00  0.00           H  
+ATOM   1633 2HB  MET     5      13.411   1.237  -7.364  1.00  0.00           H  
+ATOM   1634 1HG  MET     5      11.691  -0.760  -7.165  1.00  0.00           H  
+ATOM   1635 2HG  MET     5      12.423  -1.268  -8.687  1.00  0.00           H  
+ATOM   1636 1HE  MET     5      12.504  -3.189  -7.264  1.00  0.00           H  
+ATOM   1637 2HE  MET     5      14.151  -3.582  -7.751  1.00  0.00           H  
+ATOM   1638 3HE  MET     5      13.722  -3.581  -6.041  1.00  0.00           H  
+ATOM   1639  N   SER     6      11.243   3.293  -6.695  1.00  0.00           N  
+ATOM   1640  CA  SER     6      10.817   3.872  -5.390  1.00  0.00           C  
+ATOM   1641  C   SER     6       9.330   4.227  -5.457  1.00  0.00           C  
+ATOM   1642  O   SER     6       8.612   4.118  -4.483  1.00  0.00           O  
+ATOM   1643  CB  SER     6      11.632   5.134  -5.104  1.00  0.00           C  
+ATOM   1644  OG  SER     6      11.746   5.308  -3.698  1.00  0.00           O  
+ATOM   1645  H   SER     6      11.677   3.862  -7.364  1.00  0.00           H  
+ATOM   1646  HA  SER     6      10.981   3.151  -4.605  1.00  0.00           H  
+ATOM   1647 1HB  SER     6      12.615   5.034  -5.530  1.00  0.00           H  
+ATOM   1648 2HB  SER     6      11.135   5.989  -5.544  1.00  0.00           H  
+ATOM   1649  HG  SER     6      10.860   5.342  -3.331  1.00  0.00           H  
+ATOM   1650  N   THR     7       8.860   4.639  -6.603  1.00  0.00           N  
+ATOM   1651  CA  THR     7       7.418   4.990  -6.732  1.00  0.00           C  
+ATOM   1652  C   THR     7       6.608   3.699  -6.823  1.00  0.00           C  
+ATOM   1653  O   THR     7       5.411   3.684  -6.610  1.00  0.00           O  
+ATOM   1654  CB  THR     7       7.203   5.823  -7.998  1.00  0.00           C  
+ATOM   1655  OG1 THR     7       5.877   6.335  -8.004  1.00  0.00           O  
+ATOM   1656  CG2 THR     7       7.421   4.950  -9.234  1.00  0.00           C  
+ATOM   1657  H   THR     7       9.453   4.709  -7.380  1.00  0.00           H  
+ATOM   1658  HA  THR     7       7.104   5.555  -5.867  1.00  0.00           H  
+ATOM   1659  HB  THR     7       7.904   6.642  -8.013  1.00  0.00           H  
+ATOM   1660  HG1 THR     7       5.883   7.180  -7.549  1.00  0.00           H  
+ATOM   1661 1HG2 THR     7       7.596   3.929  -8.928  1.00  0.00           H  
+ATOM   1662 2HG2 THR     7       6.545   4.992  -9.864  1.00  0.00           H  
+ATOM   1663 3HG2 THR     7       8.277   5.312  -9.784  1.00  0.00           H  
+ATOM   1664  N   ALA     8       7.261   2.611  -7.125  1.00  0.00           N  
+ATOM   1665  CA  ALA     8       6.548   1.313  -7.218  1.00  0.00           C  
+ATOM   1666  C   ALA     8       6.397   0.751  -5.810  1.00  0.00           C  
+ATOM   1667  O   ALA     8       5.378   0.195  -5.452  1.00  0.00           O  
+ATOM   1668  CB  ALA     8       7.364   0.347  -8.076  1.00  0.00           C  
+ATOM   1669  H   ALA     8       8.227   2.647  -7.278  1.00  0.00           H  
+ATOM   1670  HA  ALA     8       5.574   1.462  -7.657  1.00  0.00           H  
+ATOM   1671 1HB  ALA     8       8.392   0.678  -8.110  1.00  0.00           H  
+ATOM   1672 2HB  ALA     8       7.318  -0.642  -7.649  1.00  0.00           H  
+ATOM   1673 3HB  ALA     8       6.960   0.328  -9.078  1.00  0.00           H  
+ATOM   1674  N   ILE     9       7.407   0.912  -5.001  1.00  0.00           N  
+ATOM   1675  CA  ILE     9       7.326   0.410  -3.608  1.00  0.00           C  
+ATOM   1676  C   ILE     9       6.312   1.264  -2.846  1.00  0.00           C  
+ATOM   1677  O   ILE     9       5.642   0.801  -1.948  1.00  0.00           O  
+ATOM   1678  CB  ILE     9       8.699   0.517  -2.942  1.00  0.00           C  
+ATOM   1679  CG1 ILE     9       9.172   1.971  -2.987  1.00  0.00           C  
+ATOM   1680  CG2 ILE     9       9.701  -0.366  -3.686  1.00  0.00           C  
+ATOM   1681  CD1 ILE     9      10.348   2.154  -2.026  1.00  0.00           C  
+ATOM   1682  H   ILE     9       8.211   1.377  -5.310  1.00  0.00           H  
+ATOM   1683  HA  ILE     9       7.002  -0.618  -3.614  1.00  0.00           H  
+ATOM   1684  HB  ILE     9       8.627   0.192  -1.913  1.00  0.00           H  
+ATOM   1685 1HG1 ILE     9       9.485   2.215  -3.992  1.00  0.00           H  
+ATOM   1686 2HG1 ILE     9       8.364   2.623  -2.692  1.00  0.00           H  
+ATOM   1687 1HG2 ILE     9       9.326  -0.580  -4.677  1.00  0.00           H  
+ATOM   1688 2HG2 ILE     9      10.646   0.150  -3.763  1.00  0.00           H  
+ATOM   1689 3HG2 ILE     9       9.837  -1.290  -3.145  1.00  0.00           H  
+ATOM   1690 1HD1 ILE     9      10.365   1.339  -1.318  1.00  0.00           H  
+ATOM   1691 2HD1 ILE     9      11.272   2.164  -2.585  1.00  0.00           H  
+ATOM   1692 3HD1 ILE     9      10.238   3.089  -1.496  1.00  0.00           H  
+ATOM   1693  N   SER    10       6.191   2.512  -3.211  1.00  0.00           N  
+ATOM   1694  CA  SER    10       5.213   3.398  -2.521  1.00  0.00           C  
+ATOM   1695  C   SER    10       3.796   2.927  -2.844  1.00  0.00           C  
+ATOM   1696  O   SER    10       3.001   2.669  -1.960  1.00  0.00           O  
+ATOM   1697  CB  SER    10       5.397   4.836  -3.007  1.00  0.00           C  
+ATOM   1698  OG  SER    10       6.672   5.311  -2.598  1.00  0.00           O  
+ATOM   1699  H   SER    10       6.737   2.864  -3.947  1.00  0.00           H  
+ATOM   1700  HA  SER    10       5.376   3.353  -1.455  1.00  0.00           H  
+ATOM   1701 1HB  SER    10       5.335   4.867  -4.082  1.00  0.00           H  
+ATOM   1702 2HB  SER    10       4.617   5.459  -2.587  1.00  0.00           H  
+ATOM   1703  HG  SER    10       7.068   5.776  -3.339  1.00  0.00           H  
+ATOM   1704  N   VAL    11       3.471   2.804  -4.102  1.00  0.00           N  
+ATOM   1705  CA  VAL    11       2.106   2.341  -4.474  1.00  0.00           C  
+ATOM   1706  C   VAL    11       1.805   1.035  -3.735  1.00  0.00           C  
+ATOM   1707  O   VAL    11       0.688   0.780  -3.332  1.00  0.00           O  
+ATOM   1708  CB  VAL    11       2.035   2.101  -5.984  1.00  0.00           C  
+ATOM   1709  CG1 VAL    11       0.714   1.413  -6.331  1.00  0.00           C  
+ATOM   1710  CG2 VAL    11       2.117   3.441  -6.719  1.00  0.00           C  
+ATOM   1711  H   VAL    11       4.127   3.011  -4.802  1.00  0.00           H  
+ATOM   1712  HA  VAL    11       1.380   3.090  -4.192  1.00  0.00           H  
+ATOM   1713  HB  VAL    11       2.859   1.471  -6.287  1.00  0.00           H  
+ATOM   1714 1HG1 VAL    11       0.627   0.493  -5.772  1.00  0.00           H  
+ATOM   1715 2HG1 VAL    11      -0.108   2.066  -6.079  1.00  0.00           H  
+ATOM   1716 3HG1 VAL    11       0.689   1.194  -7.389  1.00  0.00           H  
+ATOM   1717 1HG2 VAL    11       2.624   4.164  -6.097  1.00  0.00           H  
+ATOM   1718 2HG2 VAL    11       2.663   3.313  -7.642  1.00  0.00           H  
+ATOM   1719 3HG2 VAL    11       1.119   3.792  -6.938  1.00  0.00           H  
+ATOM   1720  N   LEU    12       2.796   0.207  -3.549  1.00  0.00           N  
+ATOM   1721  CA  LEU    12       2.565  -1.076  -2.828  1.00  0.00           C  
+ATOM   1722  C   LEU    12       2.023  -0.776  -1.429  1.00  0.00           C  
+ATOM   1723  O   LEU    12       0.967  -1.239  -1.048  1.00  0.00           O  
+ATOM   1724  CB  LEU    12       3.884  -1.841  -2.710  1.00  0.00           C  
+ATOM   1725  CG  LEU    12       4.006  -2.834  -3.867  1.00  0.00           C  
+ATOM   1726  CD1 LEU    12       5.461  -3.290  -3.997  1.00  0.00           C  
+ATOM   1727  CD2 LEU    12       3.116  -4.048  -3.593  1.00  0.00           C  
+ATOM   1728  H   LEU    12       3.691   0.431  -3.877  1.00  0.00           H  
+ATOM   1729  HA  LEU    12       1.848  -1.672  -3.374  1.00  0.00           H  
+ATOM   1730 1HB  LEU    12       4.708  -1.143  -2.745  1.00  0.00           H  
+ATOM   1731 2HB  LEU    12       3.905  -2.378  -1.773  1.00  0.00           H  
+ATOM   1732  HG  LEU    12       3.695  -2.356  -4.785  1.00  0.00           H  
+ATOM   1733 1HD1 LEU    12       6.039  -2.885  -3.180  1.00  0.00           H  
+ATOM   1734 2HD1 LEU    12       5.503  -4.368  -3.969  1.00  0.00           H  
+ATOM   1735 3HD1 LEU    12       5.866  -2.937  -4.934  1.00  0.00           H  
+ATOM   1736 1HD2 LEU    12       2.184  -3.720  -3.154  1.00  0.00           H  
+ATOM   1737 2HD2 LEU    12       2.915  -4.564  -4.520  1.00  0.00           H  
+ATOM   1738 3HD2 LEU    12       3.619  -4.717  -2.911  1.00  0.00           H  
+ATOM   1739  N   LEU    13       2.736   0.002  -0.661  1.00  0.00           N  
+ATOM   1740  CA  LEU    13       2.259   0.335   0.710  1.00  0.00           C  
+ATOM   1741  C   LEU    13       0.852   0.922   0.624  1.00  0.00           C  
+ATOM   1742  O   LEU    13       0.056   0.789   1.531  1.00  0.00           O  
+ATOM   1743  CB  LEU    13       3.202   1.363   1.339  1.00  0.00           C  
+ATOM   1744  CG  LEU    13       4.452   0.662   1.877  1.00  0.00           C  
+ATOM   1745  CD1 LEU    13       4.931  -0.394   0.878  1.00  0.00           C  
+ATOM   1746  CD2 LEU    13       5.557   1.699   2.085  1.00  0.00           C  
+ATOM   1747  H   LEU    13       3.585   0.370  -0.987  1.00  0.00           H  
+ATOM   1748  HA  LEU    13       2.242  -0.559   1.315  1.00  0.00           H  
+ATOM   1749 1HB  LEU    13       3.491   2.089   0.593  1.00  0.00           H  
+ATOM   1750 2HB  LEU    13       2.696   1.865   2.151  1.00  0.00           H  
+ATOM   1751  HG  LEU    13       4.221   0.187   2.821  1.00  0.00           H  
+ATOM   1752 1HD1 LEU    13       4.150  -1.124   0.723  1.00  0.00           H  
+ATOM   1753 2HD1 LEU    13       5.169   0.081  -0.059  1.00  0.00           H  
+ATOM   1754 3HD1 LEU    13       5.810  -0.886   1.266  1.00  0.00           H  
+ATOM   1755 1HD2 LEU    13       5.121   2.688   2.109  1.00  0.00           H  
+ATOM   1756 2HD2 LEU    13       6.064   1.506   3.018  1.00  0.00           H  
+ATOM   1757 3HD2 LEU    13       6.264   1.639   1.271  1.00  0.00           H  
+ATOM   1758  N   ALA    14       0.541   1.573  -0.462  1.00  0.00           N  
+ATOM   1759  CA  ALA    14      -0.817   2.172  -0.613  1.00  0.00           C  
+ATOM   1760  C   ALA    14      -1.878   1.080  -0.458  1.00  0.00           C  
+ATOM   1761  O   ALA    14      -2.612   1.046   0.512  1.00  0.00           O  
+ATOM   1762  CB  ALA    14      -0.943   2.815  -1.995  1.00  0.00           C  
+ATOM   1763  H   ALA    14       1.202   1.668  -1.179  1.00  0.00           H  
+ATOM   1764  HA  ALA    14      -0.963   2.923   0.148  1.00  0.00           H  
+ATOM   1765 1HB  ALA    14      -0.152   3.540  -2.129  1.00  0.00           H  
+ATOM   1766 2HB  ALA    14      -0.865   2.054  -2.755  1.00  0.00           H  
+ATOM   1767 3HB  ALA    14      -1.899   3.309  -2.076  1.00  0.00           H  
+ATOM   1768  N   GLN    15      -1.969   0.186  -1.406  1.00  0.00           N  
+ATOM   1769  CA  GLN    15      -2.987  -0.899  -1.307  1.00  0.00           C  
+ATOM   1770  C   GLN    15      -2.854  -1.596   0.050  1.00  0.00           C  
+ATOM   1771  O   GLN    15      -3.793  -2.181   0.553  1.00  0.00           O  
+ATOM   1772  CB  GLN    15      -2.770  -1.915  -2.433  1.00  0.00           C  
+ATOM   1773  CG  GLN    15      -1.551  -2.787  -2.120  1.00  0.00           C  
+ATOM   1774  CD  GLN    15      -1.248  -3.690  -3.318  1.00  0.00           C  
+ATOM   1775  OE1 GLN    15      -1.308  -4.898  -3.212  1.00  0.00           O  
+ATOM   1776  NE2 GLN    15      -0.926  -3.150  -4.461  1.00  0.00           N  
+ATOM   1777  H   GLN    15      -1.370   0.228  -2.180  1.00  0.00           H  
+ATOM   1778  HA  GLN    15      -3.974  -0.472  -1.394  1.00  0.00           H  
+ATOM   1779 1HB  GLN    15      -3.646  -2.542  -2.525  1.00  0.00           H  
+ATOM   1780 2HB  GLN    15      -2.605  -1.390  -3.363  1.00  0.00           H  
+ATOM   1781 1HG  GLN    15      -0.699  -2.155  -1.922  1.00  0.00           H  
+ATOM   1782 2HG  GLN    15      -1.758  -3.398  -1.255  1.00  0.00           H  
+ATOM   1783 1HE2 GLN    15      -0.730  -3.720  -5.235  1.00  0.00           H  
+ATOM   1784 2HE2 GLN    15      -0.879  -2.175  -4.547  1.00  0.00           H  
+ATOM   1785  N   ALA    16      -1.695  -1.537   0.645  1.00  0.00           N  
+ATOM   1786  CA  ALA    16      -1.502  -2.193   1.969  1.00  0.00           C  
+ATOM   1787  C   ALA    16      -2.349  -1.475   3.022  1.00  0.00           C  
+ATOM   1788  O   ALA    16      -3.014  -2.097   3.827  1.00  0.00           O  
+ATOM   1789  CB  ALA    16      -0.027  -2.118   2.365  1.00  0.00           C  
+ATOM   1790  H   ALA    16      -0.951  -1.059   0.222  1.00  0.00           H  
+ATOM   1791  HA  ALA    16      -1.806  -3.227   1.906  1.00  0.00           H  
+ATOM   1792 1HB  ALA    16       0.588  -2.220   1.483  1.00  0.00           H  
+ATOM   1793 2HB  ALA    16       0.172  -1.166   2.834  1.00  0.00           H  
+ATOM   1794 3HB  ALA    16       0.200  -2.916   3.057  1.00  0.00           H  
+ATOM   1795  N   VAL    17      -2.333  -0.170   3.022  1.00  0.00           N  
+ATOM   1796  CA  VAL    17      -3.140   0.584   4.023  1.00  0.00           C  
+ATOM   1797  C   VAL    17      -4.624   0.300   3.795  1.00  0.00           C  
+ATOM   1798  O   VAL    17      -5.382   0.124   4.729  1.00  0.00           O  
+ATOM   1799  CB  VAL    17      -2.877   2.084   3.868  1.00  0.00           C  
+ATOM   1800  CG1 VAL    17      -3.742   2.857   4.864  1.00  0.00           C  
+ATOM   1801  CG2 VAL    17      -1.400   2.377   4.143  1.00  0.00           C  
+ATOM   1802  H   VAL    17      -1.794   0.314   2.365  1.00  0.00           H  
+ATOM   1803  HA  VAL    17      -2.861   0.271   5.019  1.00  0.00           H  
+ATOM   1804  HB  VAL    17      -3.127   2.390   2.862  1.00  0.00           H  
+ATOM   1805 1HG1 VAL    17      -4.174   2.169   5.576  1.00  0.00           H  
+ATOM   1806 2HG1 VAL    17      -3.132   3.579   5.385  1.00  0.00           H  
+ATOM   1807 3HG1 VAL    17      -4.533   3.368   4.335  1.00  0.00           H  
+ATOM   1808 1HG2 VAL    17      -0.860   1.446   4.234  1.00  0.00           H  
+ATOM   1809 2HG2 VAL    17      -0.989   2.953   3.327  1.00  0.00           H  
+ATOM   1810 3HG2 VAL    17      -1.310   2.937   5.062  1.00  0.00           H  
+ATOM   1811  N   PHE    18      -5.047   0.251   2.562  1.00  0.00           N  
+ATOM   1812  CA  PHE    18      -6.485  -0.026   2.280  1.00  0.00           C  
+ATOM   1813  C   PHE    18      -6.897  -1.325   2.975  1.00  0.00           C  
+ATOM   1814  O   PHE    18      -7.751  -1.337   3.842  1.00  0.00           O  
+ATOM   1815  CB  PHE    18      -6.688  -0.163   0.769  1.00  0.00           C  
+ATOM   1816  CG  PHE    18      -8.151  -0.389   0.470  1.00  0.00           C  
+ATOM   1817  CD1 PHE    18      -9.088   0.611   0.757  1.00  0.00           C  
+ATOM   1818  CD2 PHE    18      -8.570  -1.598  -0.097  1.00  0.00           C  
+ATOM   1819  CE1 PHE    18     -10.444   0.401   0.477  1.00  0.00           C  
+ATOM   1820  CE2 PHE    18      -9.926  -1.809  -0.376  1.00  0.00           C  
+ATOM   1821  CZ  PHE    18     -10.863  -0.809  -0.089  1.00  0.00           C  
+ATOM   1822  H   PHE    18      -4.420   0.395   1.822  1.00  0.00           H  
+ATOM   1823  HA  PHE    18      -7.089   0.789   2.652  1.00  0.00           H  
+ATOM   1824 1HB  PHE    18      -6.357   0.740   0.278  1.00  0.00           H  
+ATOM   1825 2HB  PHE    18      -6.114  -1.002   0.404  1.00  0.00           H  
+ATOM   1826  HD1 PHE    18      -8.765   1.544   1.194  1.00  0.00           H  
+ATOM   1827  HD2 PHE    18      -7.847  -2.370  -0.318  1.00  0.00           H  
+ATOM   1828  HE1 PHE    18     -11.166   1.173   0.698  1.00  0.00           H  
+ATOM   1829  HE2 PHE    18     -10.249  -2.742  -0.814  1.00  0.00           H  
+ATOM   1830  HZ  PHE    18     -11.908  -0.971  -0.306  1.00  0.00           H  
+ATOM   1831  N   LEU    19      -6.296  -2.423   2.603  1.00  0.00           N  
+ATOM   1832  CA  LEU    19      -6.651  -3.721   3.244  1.00  0.00           C  
+ATOM   1833  C   LEU    19      -6.276  -3.673   4.727  1.00  0.00           C  
+ATOM   1834  O   LEU    19      -6.749  -4.461   5.522  1.00  0.00           O  
+ATOM   1835  CB  LEU    19      -5.885  -4.855   2.556  1.00  0.00           C  
+ATOM   1836  CG  LEU    19      -4.412  -4.808   2.968  1.00  0.00           C  
+ATOM   1837  CD1 LEU    19      -4.205  -5.660   4.222  1.00  0.00           C  
+ATOM   1838  CD2 LEU    19      -3.547  -5.355   1.832  1.00  0.00           C  
+ATOM   1839  H   LEU    19      -5.609  -2.392   1.904  1.00  0.00           H  
+ATOM   1840  HA  LEU    19      -7.713  -3.891   3.146  1.00  0.00           H  
+ATOM   1841 1HB  LEU    19      -6.312  -5.804   2.849  1.00  0.00           H  
+ATOM   1842 2HB  LEU    19      -5.961  -4.742   1.484  1.00  0.00           H  
+ATOM   1843  HG  LEU    19      -4.130  -3.786   3.179  1.00  0.00           H  
+ATOM   1844 1HD1 LEU    19      -5.161  -5.859   4.685  1.00  0.00           H  
+ATOM   1845 2HD1 LEU    19      -3.737  -6.595   3.949  1.00  0.00           H  
+ATOM   1846 3HD1 LEU    19      -3.571  -5.131   4.917  1.00  0.00           H  
+ATOM   1847 1HD2 LEU    19      -3.861  -4.917   0.896  1.00  0.00           H  
+ATOM   1848 2HD2 LEU    19      -2.512  -5.106   2.015  1.00  0.00           H  
+ATOM   1849 3HD2 LEU    19      -3.656  -6.429   1.781  1.00  0.00           H  
+ATOM   1850  N   LEU    20      -5.429  -2.755   5.105  1.00  0.00           N  
+ATOM   1851  CA  LEU    20      -5.023  -2.656   6.534  1.00  0.00           C  
+ATOM   1852  C   LEU    20      -6.192  -2.114   7.360  1.00  0.00           C  
+ATOM   1853  O   LEU    20      -6.394  -2.499   8.494  1.00  0.00           O  
+ATOM   1854  CB  LEU    20      -3.827  -1.709   6.659  1.00  0.00           C  
+ATOM   1855  CG  LEU    20      -3.383  -1.635   8.121  1.00  0.00           C  
+ATOM   1856  CD1 LEU    20      -2.484  -2.829   8.445  1.00  0.00           C  
+ATOM   1857  CD2 LEU    20      -2.608  -0.335   8.351  1.00  0.00           C  
+ATOM   1858  H   LEU    20      -5.060  -2.129   4.447  1.00  0.00           H  
+ATOM   1859  HA  LEU    20      -4.747  -3.635   6.899  1.00  0.00           H  
+ATOM   1860 1HB  LEU    20      -3.010  -2.079   6.054  1.00  0.00           H  
+ATOM   1861 2HB  LEU    20      -4.110  -0.724   6.320  1.00  0.00           H  
+ATOM   1862  HG  LEU    20      -4.254  -1.655   8.763  1.00  0.00           H  
+ATOM   1863 1HD1 LEU    20      -1.806  -3.002   7.623  1.00  0.00           H  
+ATOM   1864 2HD1 LEU    20      -1.918  -2.620   9.341  1.00  0.00           H  
+ATOM   1865 3HD1 LEU    20      -3.094  -3.707   8.601  1.00  0.00           H  
+ATOM   1866 1HD2 LEU    20      -2.981   0.430   7.687  1.00  0.00           H  
+ATOM   1867 2HD2 LEU    20      -2.735  -0.016   9.375  1.00  0.00           H  
+ATOM   1868 3HD2 LEU    20      -1.558  -0.503   8.154  1.00  0.00           H  
+ATOM   1869  N   LEU    21      -6.964  -1.225   6.799  1.00  0.00           N  
+ATOM   1870  CA  LEU    21      -8.119  -0.661   7.552  1.00  0.00           C  
+ATOM   1871  C   LEU    21      -9.262  -1.677   7.569  1.00  0.00           C  
+ATOM   1872  O   LEU    21      -9.913  -1.878   8.574  1.00  0.00           O  
+ATOM   1873  CB  LEU    21      -8.593   0.626   6.873  1.00  0.00           C  
+ATOM   1874  CG  LEU    21      -9.909   1.080   7.504  1.00  0.00           C  
+ATOM   1875  CD1 LEU    21      -9.704   1.306   9.003  1.00  0.00           C  
+ATOM   1876  CD2 LEU    21     -10.366   2.385   6.850  1.00  0.00           C  
+ATOM   1877  H   LEU    21      -6.785  -0.928   5.882  1.00  0.00           H  
+ATOM   1878  HA  LEU    21      -7.816  -0.442   8.565  1.00  0.00           H  
+ATOM   1879 1HB  LEU    21      -7.847   1.396   7.000  1.00  0.00           H  
+ATOM   1880 2HB  LEU    21      -8.745   0.441   5.820  1.00  0.00           H  
+ATOM   1881  HG  LEU    21     -10.662   0.318   7.355  1.00  0.00           H  
+ATOM   1882 1HD1 LEU    21      -8.678   1.096   9.261  1.00  0.00           H  
+ATOM   1883 2HD1 LEU    21      -9.934   2.332   9.246  1.00  0.00           H  
+ATOM   1884 3HD1 LEU    21     -10.358   0.649   9.558  1.00  0.00           H  
+ATOM   1885 1HD2 LEU    21     -10.476   2.236   5.785  1.00  0.00           H  
+ATOM   1886 2HD2 LEU    21     -11.313   2.688   7.271  1.00  0.00           H  
+ATOM   1887 3HD2 LEU    21      -9.630   3.156   7.031  1.00  0.00           H  
+ATOM   1888  N   THR    22      -9.512  -2.318   6.460  1.00  0.00           N  
+ATOM   1889  CA  THR    22     -10.615  -3.320   6.412  1.00  0.00           C  
+ATOM   1890  C   THR    22     -10.262  -4.515   7.303  1.00  0.00           C  
+ATOM   1891  O   THR    22     -11.128  -5.216   7.787  1.00  0.00           O  
+ATOM   1892  CB  THR    22     -10.806  -3.801   4.971  1.00  0.00           C  
+ATOM   1893  OG1 THR    22      -9.893  -4.854   4.699  1.00  0.00           O  
+ATOM   1894  CG2 THR    22     -10.552  -2.641   4.006  1.00  0.00           C  
+ATOM   1895  H   THR    22      -8.975  -2.141   5.658  1.00  0.00           H  
+ATOM   1896  HA  THR    22     -11.529  -2.866   6.764  1.00  0.00           H  
+ATOM   1897  HB  THR    22     -11.817  -4.157   4.840  1.00  0.00           H  
+ATOM   1898  HG1 THR    22     -10.357  -5.687   4.813  1.00  0.00           H  
+ATOM   1899 1HG2 THR    22     -10.930  -1.726   4.436  1.00  0.00           H  
+ATOM   1900 2HG2 THR    22      -9.491  -2.545   3.830  1.00  0.00           H  
+ATOM   1901 3HG2 THR    22     -11.057  -2.835   3.071  1.00  0.00           H  
+ATOM   1902  N   SER    23      -8.997  -4.753   7.518  1.00  0.00           N  
+ATOM   1903  CA  SER    23      -8.590  -5.906   8.372  1.00  0.00           C  
+ATOM   1904  C   SER    23      -8.605  -5.491   9.846  1.00  0.00           C  
+ATOM   1905  O   SER    23      -8.904  -6.281  10.718  1.00  0.00           O  
+ATOM   1906  CB  SER    23      -7.180  -6.351   7.986  1.00  0.00           C  
+ATOM   1907  OG  SER    23      -7.247  -7.620   7.348  1.00  0.00           O  
+ATOM   1908  H   SER    23      -8.315  -4.178   7.114  1.00  0.00           H  
+ATOM   1909  HA  SER    23      -9.279  -6.724   8.224  1.00  0.00           H  
+ATOM   1910 1HB  SER    23      -6.747  -5.635   7.307  1.00  0.00           H  
+ATOM   1911 2HB  SER    23      -6.568  -6.415   8.876  1.00  0.00           H  
+ATOM   1912  HG  SER    23      -6.612  -8.200   7.775  1.00  0.00           H  
+ATOM   1913  N   GLN    24      -8.278  -4.259  10.133  1.00  0.00           N  
+ATOM   1914  CA  GLN    24      -8.271  -3.805  11.553  1.00  0.00           C  
+ATOM   1915  C   GLN    24      -9.702  -3.511  12.008  1.00  0.00           C  
+ATOM   1916  O   GLN    24      -9.977  -3.401  13.186  1.00  0.00           O  
+ATOM   1917  CB  GLN    24      -7.426  -2.537  11.679  1.00  0.00           C  
+ATOM   1918  CG  GLN    24      -6.111  -2.868  12.390  1.00  0.00           C  
+ATOM   1919  CD  GLN    24      -4.987  -2.002  11.817  1.00  0.00           C  
+ATOM   1920  OE1 GLN    24      -5.177  -0.829  11.563  1.00  0.00           O  
+ATOM   1921  NE2 GLN    24      -3.815  -2.534  11.601  1.00  0.00           N  
+ATOM   1922  H   GLN    24      -8.036  -3.636   9.417  1.00  0.00           H  
+ATOM   1923  HA  GLN    24      -7.849  -4.580  12.175  1.00  0.00           H  
+ATOM   1924 1HB  GLN    24      -7.215  -2.144  10.695  1.00  0.00           H  
+ATOM   1925 2HB  GLN    24      -7.966  -1.799  12.253  1.00  0.00           H  
+ATOM   1926 1HG  GLN    24      -6.214  -2.673  13.447  1.00  0.00           H  
+ATOM   1927 2HG  GLN    24      -5.872  -3.910  12.237  1.00  0.00           H  
+ATOM   1928 1HE2 GLN    24      -3.089  -1.988  11.236  1.00  0.00           H  
+ATOM   1929 2HE2 GLN    24      -3.662  -3.479  11.806  1.00  0.00           H  
+ATOM   1930  N   ARG    25     -10.616  -3.382  11.086  1.00  0.00           N  
+ATOM   1931  CA  ARG    25     -12.026  -3.095  11.474  1.00  0.00           C  
+ATOM   1932  C   ARG    25     -12.841  -4.389  11.435  1.00  0.00           C  
+ATOM   1933  O   ARG    25     -13.999  -4.349  11.817  1.00  0.00           O  
+ATOM   1934  CB  ARG    25     -12.627  -2.083  10.495  1.00  0.00           C  
+ATOM   1935  CG  ARG    25     -13.772  -1.333  11.177  1.00  0.00           C  
+ATOM   1936  CD  ARG    25     -13.279   0.039  11.642  1.00  0.00           C  
+ATOM   1937  NE  ARG    25     -12.848   0.836  10.459  1.00  0.00           N  
+ATOM   1938  CZ  ARG    25     -12.757   2.135  10.545  1.00  0.00           C  
+ATOM   1939  NH1 ARG    25     -12.610   2.703  11.711  1.00  0.00           N  
+ATOM   1940  NH2 ARG    25     -12.813   2.865   9.466  1.00  0.00           N  
+ATOM   1941  OXT ARG    25     -12.293  -5.399  11.026  1.00  0.00           O  
+ATOM   1942  H   ARG    25     -10.375  -3.473  10.141  1.00  0.00           H  
+ATOM   1943  HA  ARG    25     -12.048  -2.685  12.472  1.00  0.00           H  
+ATOM   1944 1HB  ARG    25     -11.865  -1.380  10.191  1.00  0.00           H  
+ATOM   1945 2HB  ARG    25     -13.006  -2.602   9.628  1.00  0.00           H  
+ATOM   1946 1HG  ARG    25     -14.585  -1.206  10.478  1.00  0.00           H  
+ATOM   1947 2HG  ARG    25     -14.115  -1.898  12.031  1.00  0.00           H  
+ATOM   1948 1HD  ARG    25     -14.078   0.555  12.152  1.00  0.00           H  
+ATOM   1949 2HD  ARG    25     -12.444  -0.087  12.315  1.00  0.00           H  
+ATOM   1950  HE  ARG    25     -12.634   0.385   9.616  1.00  0.00           H  
+ATOM   1951 1HH1 ARG    25     -12.568   2.143  12.539  1.00  0.00           H  
+ATOM   1952 2HH1 ARG    25     -12.539   3.698  11.776  1.00  0.00           H  
+ATOM   1953 1HH2 ARG    25     -12.928   2.431   8.573  1.00  0.00           H  
+ATOM   1954 2HH2 ARG    25     -12.742   3.861   9.532  1.00  0.00           H  
+TER    1955      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        6                                                                  
+ATOM   1956  N   GLY     1      14.522   7.109 -14.316  1.00  0.00           N  
+ATOM   1957  CA  GLY     1      13.576   6.049 -13.753  1.00  0.00           C  
+ATOM   1958  C   GLY     1      13.511   5.838 -12.279  1.00  0.00           C  
+ATOM   1959  O   GLY     1      12.711   5.067 -11.787  1.00  0.00           O  
+ATOM   1960 1H   GLY     1      14.426   7.987 -13.767  1.00  0.00           H  
+ATOM   1961 2H   GLY     1      15.503   6.768 -14.252  1.00  0.00           H  
+ATOM   1962 3H   GLY     1      14.283   7.294 -15.312  1.00  0.00           H  
+ATOM   1963 1HA  GLY     1      12.675   6.403 -13.636  1.00  0.00           H  
+ATOM   1964 2HA  GLY     1      13.616   5.136 -14.480  1.00  0.00           H  
+ATOM   1965  N   SER     2      14.345   6.515 -11.538  1.00  0.00           N  
+ATOM   1966  CA  SER     2      14.332   6.350 -10.057  1.00  0.00           C  
+ATOM   1967  C   SER     2      12.939   6.680  -9.517  1.00  0.00           C  
+ATOM   1968  O   SER     2      12.397   5.969  -8.694  1.00  0.00           O  
+ATOM   1969  CB  SER     2      15.357   7.297  -9.432  1.00  0.00           C  
+ATOM   1970  OG  SER     2      16.624   7.087 -10.041  1.00  0.00           O  
+ATOM   1971  H   SER     2      14.981   7.132 -11.956  1.00  0.00           H  
+ATOM   1972  HA  SER     2      14.584   5.330  -9.806  1.00  0.00           H  
+ATOM   1973 1HB  SER     2      15.051   8.318  -9.592  1.00  0.00           H  
+ATOM   1974 2HB  SER     2      15.420   7.106  -8.369  1.00  0.00           H  
+ATOM   1975  HG  SER     2      17.177   7.847  -9.841  1.00  0.00           H  
+ATOM   1976  N   GLU     3      12.356   7.755  -9.973  1.00  0.00           N  
+ATOM   1977  CA  GLU     3      10.999   8.131  -9.482  1.00  0.00           C  
+ATOM   1978  C   GLU     3      10.025   6.978  -9.734  1.00  0.00           C  
+ATOM   1979  O   GLU     3       9.148   6.709  -8.937  1.00  0.00           O  
+ATOM   1980  CB  GLU     3      10.515   9.378 -10.224  1.00  0.00           C  
+ATOM   1981  CG  GLU     3       9.942  10.379  -9.220  1.00  0.00           C  
+ATOM   1982  CD  GLU     3       9.323  11.559  -9.970  1.00  0.00           C  
+ATOM   1983  OE1 GLU     3       9.977  12.077 -10.861  1.00  0.00           O  
+ATOM   1984  OE2 GLU     3       8.207  11.925  -9.642  1.00  0.00           O  
+ATOM   1985  H   GLU     3      12.810   8.316 -10.635  1.00  0.00           H  
+ATOM   1986  HA  GLU     3      11.044   8.338  -8.423  1.00  0.00           H  
+ATOM   1987 1HB  GLU     3      11.346   9.828 -10.749  1.00  0.00           H  
+ATOM   1988 2HB  GLU     3       9.748   9.101 -10.932  1.00  0.00           H  
+ATOM   1989 1HG  GLU     3       9.184   9.893  -8.621  1.00  0.00           H  
+ATOM   1990 2HG  GLU     3      10.733  10.737  -8.577  1.00  0.00           H  
+ATOM   1991  N   LYS     4      10.171   6.295 -10.836  1.00  0.00           N  
+ATOM   1992  CA  LYS     4       9.250   5.162 -11.134  1.00  0.00           C  
+ATOM   1993  C   LYS     4       9.461   4.054 -10.101  1.00  0.00           C  
+ATOM   1994  O   LYS     4       8.540   3.638  -9.427  1.00  0.00           O  
+ATOM   1995  CB  LYS     4       9.541   4.614 -12.534  1.00  0.00           C  
+ATOM   1996  CG  LYS     4       9.862   5.768 -13.489  1.00  0.00           C  
+ATOM   1997  CD  LYS     4       8.845   6.896 -13.299  1.00  0.00           C  
+ATOM   1998  CE  LYS     4       8.183   7.221 -14.639  1.00  0.00           C  
+ATOM   1999  NZ  LYS     4       8.055   8.698 -14.784  1.00  0.00           N  
+ATOM   2000  H   LYS     4      10.884   6.527 -11.466  1.00  0.00           H  
+ATOM   2001  HA  LYS     4       8.229   5.505 -11.086  1.00  0.00           H  
+ATOM   2002 1HB  LYS     4      10.384   3.941 -12.488  1.00  0.00           H  
+ATOM   2003 2HB  LYS     4       8.676   4.081 -12.898  1.00  0.00           H  
+ATOM   2004 1HG  LYS     4      10.855   6.140 -13.283  1.00  0.00           H  
+ATOM   2005 2HG  LYS     4       9.816   5.415 -14.508  1.00  0.00           H  
+ATOM   2006 1HD  LYS     4       8.091   6.585 -12.591  1.00  0.00           H  
+ATOM   2007 2HD  LYS     4       9.348   7.775 -12.926  1.00  0.00           H  
+ATOM   2008 1HE  LYS     4       8.790   6.831 -15.443  1.00  0.00           H  
+ATOM   2009 2HE  LYS     4       7.203   6.768 -14.675  1.00  0.00           H  
+ATOM   2010 1HZ  LYS     4       8.986   9.142 -14.643  1.00  0.00           H  
+ATOM   2011 2HZ  LYS     4       7.703   8.924 -15.735  1.00  0.00           H  
+ATOM   2012 3HZ  LYS     4       7.388   9.061 -14.074  1.00  0.00           H  
+ATOM   2013  N   MET     5      10.667   3.574  -9.969  1.00  0.00           N  
+ATOM   2014  CA  MET     5      10.929   2.495  -8.976  1.00  0.00           C  
+ATOM   2015  C   MET     5      10.517   2.981  -7.585  1.00  0.00           C  
+ATOM   2016  O   MET     5      10.309   2.199  -6.680  1.00  0.00           O  
+ATOM   2017  CB  MET     5      12.419   2.146  -8.980  1.00  0.00           C  
+ATOM   2018  CG  MET     5      12.679   1.026  -9.990  1.00  0.00           C  
+ATOM   2019  SD  MET     5      13.008  -0.519  -9.107  1.00  0.00           S  
+ATOM   2020  CE  MET     5      11.291  -0.913  -8.691  1.00  0.00           C  
+ATOM   2021  H   MET     5      11.398   3.922 -10.521  1.00  0.00           H  
+ATOM   2022  HA  MET     5      10.353   1.619  -9.237  1.00  0.00           H  
+ATOM   2023 1HB  MET     5      12.993   3.020  -9.253  1.00  0.00           H  
+ATOM   2024 2HB  MET     5      12.713   1.815  -7.995  1.00  0.00           H  
+ATOM   2025 1HG  MET     5      11.811   0.903 -10.621  1.00  0.00           H  
+ATOM   2026 2HG  MET     5      13.533   1.282 -10.598  1.00  0.00           H  
+ATOM   2027 1HE  MET     5      10.631  -0.435  -9.396  1.00  0.00           H  
+ATOM   2028 2HE  MET     5      11.149  -1.984  -8.735  1.00  0.00           H  
+ATOM   2029 3HE  MET     5      11.072  -0.555  -7.695  1.00  0.00           H  
+ATOM   2030  N   SER     6      10.393   4.269  -7.410  1.00  0.00           N  
+ATOM   2031  CA  SER     6       9.991   4.807  -6.081  1.00  0.00           C  
+ATOM   2032  C   SER     6       8.464   4.795  -5.968  1.00  0.00           C  
+ATOM   2033  O   SER     6       7.912   4.631  -4.897  1.00  0.00           O  
+ATOM   2034  CB  SER     6      10.501   6.241  -5.935  1.00  0.00           C  
+ATOM   2035  OG  SER     6      10.243   6.699  -4.614  1.00  0.00           O  
+ATOM   2036  H   SER     6      10.563   4.882  -8.156  1.00  0.00           H  
+ATOM   2037  HA  SER     6      10.414   4.192  -5.300  1.00  0.00           H  
+ATOM   2038 1HB  SER     6      11.561   6.270  -6.119  1.00  0.00           H  
+ATOM   2039 2HB  SER     6       9.997   6.876  -6.652  1.00  0.00           H  
+ATOM   2040  HG  SER     6      10.546   6.025  -4.002  1.00  0.00           H  
+ATOM   2041  N   THR     7       7.777   4.964  -7.065  1.00  0.00           N  
+ATOM   2042  CA  THR     7       6.287   4.959  -7.018  1.00  0.00           C  
+ATOM   2043  C   THR     7       5.791   3.515  -7.006  1.00  0.00           C  
+ATOM   2044  O   THR     7       4.651   3.240  -6.682  1.00  0.00           O  
+ATOM   2045  CB  THR     7       5.731   5.682  -8.247  1.00  0.00           C  
+ATOM   2046  OG1 THR     7       6.290   6.985  -8.322  1.00  0.00           O  
+ATOM   2047  CG2 THR     7       4.208   5.782  -8.135  1.00  0.00           C  
+ATOM   2048  H   THR     7       8.240   5.089  -7.919  1.00  0.00           H  
+ATOM   2049  HA  THR     7       5.954   5.463  -6.122  1.00  0.00           H  
+ATOM   2050  HB  THR     7       5.986   5.128  -9.137  1.00  0.00           H  
+ATOM   2051  HG1 THR     7       5.614   7.580  -8.655  1.00  0.00           H  
+ATOM   2052 1HG2 THR     7       3.924   5.789  -7.092  1.00  0.00           H  
+ATOM   2053 2HG2 THR     7       3.871   6.694  -8.606  1.00  0.00           H  
+ATOM   2054 3HG2 THR     7       3.754   4.933  -8.626  1.00  0.00           H  
+ATOM   2055  N   ALA     8       6.642   2.588  -7.344  1.00  0.00           N  
+ATOM   2056  CA  ALA     8       6.229   1.161  -7.341  1.00  0.00           C  
+ATOM   2057  C   ALA     8       6.401   0.620  -5.927  1.00  0.00           C  
+ATOM   2058  O   ALA     8       5.531  -0.036  -5.384  1.00  0.00           O  
+ATOM   2059  CB  ALA     8       7.113   0.373  -8.309  1.00  0.00           C  
+ATOM   2060  H   ALA     8       7.558   2.829  -7.588  1.00  0.00           H  
+ATOM   2061  HA  ALA     8       5.197   1.080  -7.638  1.00  0.00           H  
+ATOM   2062 1HB  ALA     8       8.101   0.810  -8.331  1.00  0.00           H  
+ATOM   2063 2HB  ALA     8       7.180  -0.653  -7.981  1.00  0.00           H  
+ATOM   2064 3HB  ALA     8       6.682   0.409  -9.299  1.00  0.00           H  
+ATOM   2065  N   ILE     9       7.516   0.909  -5.322  1.00  0.00           N  
+ATOM   2066  CA  ILE     9       7.751   0.434  -3.937  1.00  0.00           C  
+ATOM   2067  C   ILE     9       6.803   1.181  -2.999  1.00  0.00           C  
+ATOM   2068  O   ILE     9       6.411   0.679  -1.965  1.00  0.00           O  
+ATOM   2069  CB  ILE     9       9.203   0.710  -3.539  1.00  0.00           C  
+ATOM   2070  CG1 ILE     9       9.475   2.215  -3.599  1.00  0.00           C  
+ATOM   2071  CG2 ILE     9      10.141  -0.012  -4.507  1.00  0.00           C  
+ATOM   2072  CD1 ILE     9      10.123   2.669  -2.290  1.00  0.00           C  
+ATOM   2073  H   ILE     9       8.193   1.451  -5.778  1.00  0.00           H  
+ATOM   2074  HA  ILE     9       7.553  -0.624  -3.884  1.00  0.00           H  
+ATOM   2075  HB  ILE     9       9.375   0.349  -2.534  1.00  0.00           H  
+ATOM   2076 1HG1 ILE     9      10.143   2.426  -4.422  1.00  0.00           H  
+ATOM   2077 2HG1 ILE     9       8.548   2.745  -3.742  1.00  0.00           H  
+ATOM   2078 1HG2 ILE     9       9.572  -0.699  -5.115  1.00  0.00           H  
+ATOM   2079 2HG2 ILE     9      10.628   0.713  -5.144  1.00  0.00           H  
+ATOM   2080 3HG2 ILE     9      10.886  -0.558  -3.947  1.00  0.00           H  
+ATOM   2081 1HD1 ILE     9      10.099   1.860  -1.576  1.00  0.00           H  
+ATOM   2082 2HD1 ILE     9      11.148   2.956  -2.474  1.00  0.00           H  
+ATOM   2083 3HD1 ILE     9       9.579   3.515  -1.894  1.00  0.00           H  
+ATOM   2084  N   SER    10       6.420   2.375  -3.364  1.00  0.00           N  
+ATOM   2085  CA  SER    10       5.486   3.154  -2.509  1.00  0.00           C  
+ATOM   2086  C   SER    10       4.078   2.581  -2.668  1.00  0.00           C  
+ATOM   2087  O   SER    10       3.322   2.491  -1.723  1.00  0.00           O  
+ATOM   2088  CB  SER    10       5.494   4.620  -2.945  1.00  0.00           C  
+ATOM   2089  OG  SER    10       6.117   5.406  -1.938  1.00  0.00           O  
+ATOM   2090  H   SER    10       6.741   2.756  -4.209  1.00  0.00           H  
+ATOM   2091  HA  SER    10       5.792   3.081  -1.476  1.00  0.00           H  
+ATOM   2092 1HB  SER    10       6.046   4.721  -3.865  1.00  0.00           H  
+ATOM   2093 2HB  SER    10       4.477   4.953  -3.101  1.00  0.00           H  
+ATOM   2094  HG  SER    10       7.008   5.611  -2.229  1.00  0.00           H  
+ATOM   2095  N   VAL    11       3.724   2.182  -3.861  1.00  0.00           N  
+ATOM   2096  CA  VAL    11       2.370   1.604  -4.080  1.00  0.00           C  
+ATOM   2097  C   VAL    11       2.211   0.363  -3.200  1.00  0.00           C  
+ATOM   2098  O   VAL    11       1.166   0.122  -2.630  1.00  0.00           O  
+ATOM   2099  CB  VAL    11       2.211   1.210  -5.551  1.00  0.00           C  
+ATOM   2100  CG1 VAL    11       0.978   0.318  -5.716  1.00  0.00           C  
+ATOM   2101  CG2 VAL    11       2.040   2.472  -6.399  1.00  0.00           C  
+ATOM   2102  H   VAL    11       4.353   2.258  -4.610  1.00  0.00           H  
+ATOM   2103  HA  VAL    11       1.617   2.333  -3.816  1.00  0.00           H  
+ATOM   2104  HB  VAL    11       3.090   0.673  -5.877  1.00  0.00           H  
+ATOM   2105 1HG1 VAL    11       0.352   0.405  -4.841  1.00  0.00           H  
+ATOM   2106 2HG1 VAL    11       0.423   0.628  -6.589  1.00  0.00           H  
+ATOM   2107 3HG1 VAL    11       1.291  -0.709  -5.835  1.00  0.00           H  
+ATOM   2108 1HG2 VAL    11       2.287   3.340  -5.806  1.00  0.00           H  
+ATOM   2109 2HG2 VAL    11       2.697   2.423  -7.256  1.00  0.00           H  
+ATOM   2110 3HG2 VAL    11       1.015   2.545  -6.734  1.00  0.00           H  
+ATOM   2111  N   LEU    12       3.245  -0.426  -3.084  1.00  0.00           N  
+ATOM   2112  CA  LEU    12       3.156  -1.649  -2.238  1.00  0.00           C  
+ATOM   2113  C   LEU    12       2.668  -1.268  -0.838  1.00  0.00           C  
+ATOM   2114  O   LEU    12       1.834  -1.934  -0.258  1.00  0.00           O  
+ATOM   2115  CB  LEU    12       4.536  -2.300  -2.138  1.00  0.00           C  
+ATOM   2116  CG  LEU    12       4.950  -2.827  -3.512  1.00  0.00           C  
+ATOM   2117  CD1 LEU    12       6.449  -3.133  -3.511  1.00  0.00           C  
+ATOM   2118  CD2 LEU    12       4.170  -4.106  -3.823  1.00  0.00           C  
+ATOM   2119  H   LEU    12       4.082  -0.213  -3.552  1.00  0.00           H  
+ATOM   2120  HA  LEU    12       2.461  -2.345  -2.685  1.00  0.00           H  
+ATOM   2121 1HB  LEU    12       5.255  -1.569  -1.799  1.00  0.00           H  
+ATOM   2122 2HB  LEU    12       4.498  -3.121  -1.437  1.00  0.00           H  
+ATOM   2123  HG  LEU    12       4.737  -2.080  -4.263  1.00  0.00           H  
+ATOM   2124 1HD1 LEU    12       6.868  -2.879  -2.549  1.00  0.00           H  
+ATOM   2125 2HD1 LEU    12       6.602  -4.185  -3.705  1.00  0.00           H  
+ATOM   2126 3HD1 LEU    12       6.936  -2.552  -4.280  1.00  0.00           H  
+ATOM   2127 1HD2 LEU    12       3.189  -4.047  -3.375  1.00  0.00           H  
+ATOM   2128 2HD2 LEU    12       4.069  -4.216  -4.893  1.00  0.00           H  
+ATOM   2129 3HD2 LEU    12       4.699  -4.957  -3.422  1.00  0.00           H  
+ATOM   2130  N   LEU    13       3.182  -0.200  -0.291  1.00  0.00           N  
+ATOM   2131  CA  LEU    13       2.747   0.223   1.070  1.00  0.00           C  
+ATOM   2132  C   LEU    13       1.317   0.767   1.002  1.00  0.00           C  
+ATOM   2133  O   LEU    13       0.510   0.530   1.878  1.00  0.00           O  
+ATOM   2134  CB  LEU    13       3.681   1.320   1.590  1.00  0.00           C  
+ATOM   2135  CG  LEU    13       5.055   0.724   1.909  1.00  0.00           C  
+ATOM   2136  CD1 LEU    13       4.886  -0.561   2.723  1.00  0.00           C  
+ATOM   2137  CD2 LEU    13       5.789   0.409   0.604  1.00  0.00           C  
+ATOM   2138  H   LEU    13       3.853   0.325  -0.776  1.00  0.00           H  
+ATOM   2139  HA  LEU    13       2.776  -0.624   1.738  1.00  0.00           H  
+ATOM   2140 1HB  LEU    13       3.789   2.086   0.837  1.00  0.00           H  
+ATOM   2141 2HB  LEU    13       3.264   1.753   2.487  1.00  0.00           H  
+ATOM   2142  HG  LEU    13       5.630   1.437   2.482  1.00  0.00           H  
+ATOM   2143 1HD1 LEU    13       3.938  -0.539   3.238  1.00  0.00           H  
+ATOM   2144 2HD1 LEU    13       4.916  -1.413   2.059  1.00  0.00           H  
+ATOM   2145 3HD1 LEU    13       5.687  -0.640   3.443  1.00  0.00           H  
+ATOM   2146 1HD2 LEU    13       5.687   1.242  -0.075  1.00  0.00           H  
+ATOM   2147 2HD2 LEU    13       6.836   0.239   0.811  1.00  0.00           H  
+ATOM   2148 3HD2 LEU    13       5.363  -0.476   0.155  1.00  0.00           H  
+ATOM   2149  N   ALA    14       0.998   1.495  -0.033  1.00  0.00           N  
+ATOM   2150  CA  ALA    14      -0.377   2.055  -0.158  1.00  0.00           C  
+ATOM   2151  C   ALA    14      -1.404   0.942   0.058  1.00  0.00           C  
+ATOM   2152  O   ALA    14      -2.312   1.070   0.857  1.00  0.00           O  
+ATOM   2153  CB  ALA    14      -0.557   2.650  -1.557  1.00  0.00           C  
+ATOM   2154  H   ALA    14       1.665   1.675  -0.729  1.00  0.00           H  
+ATOM   2155  HA  ALA    14      -0.521   2.827   0.582  1.00  0.00           H  
+ATOM   2156 1HB  ALA    14       0.399   2.696  -2.056  1.00  0.00           H  
+ATOM   2157 2HB  ALA    14      -1.232   2.029  -2.128  1.00  0.00           H  
+ATOM   2158 3HB  ALA    14      -0.969   3.646  -1.475  1.00  0.00           H  
+ATOM   2159  N   GLN    15      -1.271  -0.146  -0.648  1.00  0.00           N  
+ATOM   2160  CA  GLN    15      -2.242  -1.265  -0.484  1.00  0.00           C  
+ATOM   2161  C   GLN    15      -2.082  -1.883   0.906  1.00  0.00           C  
+ATOM   2162  O   GLN    15      -3.047  -2.241   1.552  1.00  0.00           O  
+ATOM   2163  CB  GLN    15      -1.973  -2.330  -1.550  1.00  0.00           C  
+ATOM   2164  CG  GLN    15      -3.146  -3.312  -1.598  1.00  0.00           C  
+ATOM   2165  CD  GLN    15      -3.820  -3.235  -2.969  1.00  0.00           C  
+ATOM   2166  OE1 GLN    15      -5.014  -3.022  -3.060  1.00  0.00           O  
+ATOM   2167  NE2 GLN    15      -3.103  -3.402  -4.047  1.00  0.00           N  
+ATOM   2168  H   GLN    15      -0.534  -0.227  -1.288  1.00  0.00           H  
+ATOM   2169  HA  GLN    15      -3.247  -0.889  -0.597  1.00  0.00           H  
+ATOM   2170 1HB  GLN    15      -1.860  -1.854  -2.513  1.00  0.00           H  
+ATOM   2171 2HB  GLN    15      -1.068  -2.865  -1.304  1.00  0.00           H  
+ATOM   2172 1HG  GLN    15      -2.781  -4.316  -1.431  1.00  0.00           H  
+ATOM   2173 2HG  GLN    15      -3.861  -3.056  -0.832  1.00  0.00           H  
+ATOM   2174 1HE2 GLN    15      -3.528  -3.355  -4.929  1.00  0.00           H  
+ATOM   2175 2HE2 GLN    15      -2.142  -3.574  -3.974  1.00  0.00           H  
+ATOM   2176  N   ALA    16      -0.870  -2.014   1.372  1.00  0.00           N  
+ATOM   2177  CA  ALA    16      -0.650  -2.613   2.719  1.00  0.00           C  
+ATOM   2178  C   ALA    16      -1.538  -1.907   3.746  1.00  0.00           C  
+ATOM   2179  O   ALA    16      -2.116  -2.532   4.613  1.00  0.00           O  
+ATOM   2180  CB  ALA    16       0.819  -2.447   3.115  1.00  0.00           C  
+ATOM   2181  H   ALA    16      -0.104  -1.721   0.835  1.00  0.00           H  
+ATOM   2182  HA  ALA    16      -0.897  -3.664   2.690  1.00  0.00           H  
+ATOM   2183 1HB  ALA    16       1.421  -2.334   2.225  1.00  0.00           H  
+ATOM   2184 2HB  ALA    16       0.928  -1.572   3.737  1.00  0.00           H  
+ATOM   2185 3HB  ALA    16       1.145  -3.321   3.661  1.00  0.00           H  
+ATOM   2186  N   VAL    17      -1.651  -0.611   3.657  1.00  0.00           N  
+ATOM   2187  CA  VAL    17      -2.503   0.130   4.630  1.00  0.00           C  
+ATOM   2188  C   VAL    17      -3.966   0.045   4.193  1.00  0.00           C  
+ATOM   2189  O   VAL    17      -4.871   0.131   4.999  1.00  0.00           O  
+ATOM   2190  CB  VAL    17      -2.069   1.596   4.675  1.00  0.00           C  
+ATOM   2191  CG1 VAL    17      -2.959   2.360   5.656  1.00  0.00           C  
+ATOM   2192  CG2 VAL    17      -0.613   1.682   5.137  1.00  0.00           C  
+ATOM   2193  H   VAL    17      -1.177  -0.125   2.951  1.00  0.00           H  
+ATOM   2194  HA  VAL    17      -2.394  -0.309   5.610  1.00  0.00           H  
+ATOM   2195  HB  VAL    17      -2.163   2.030   3.690  1.00  0.00           H  
+ATOM   2196 1HG1 VAL    17      -3.942   1.912   5.675  1.00  0.00           H  
+ATOM   2197 2HG1 VAL    17      -2.525   2.319   6.644  1.00  0.00           H  
+ATOM   2198 3HG1 VAL    17      -3.039   3.391   5.342  1.00  0.00           H  
+ATOM   2199 1HG2 VAL    17      -0.212   0.685   5.254  1.00  0.00           H  
+ATOM   2200 2HG2 VAL    17      -0.032   2.218   4.402  1.00  0.00           H  
+ATOM   2201 3HG2 VAL    17      -0.565   2.202   6.083  1.00  0.00           H  
+ATOM   2202  N   PHE    18      -4.205  -0.125   2.922  1.00  0.00           N  
+ATOM   2203  CA  PHE    18      -5.609  -0.217   2.434  1.00  0.00           C  
+ATOM   2204  C   PHE    18      -6.280  -1.452   3.041  1.00  0.00           C  
+ATOM   2205  O   PHE    18      -7.213  -1.349   3.811  1.00  0.00           O  
+ATOM   2206  CB  PHE    18      -5.612  -0.333   0.909  1.00  0.00           C  
+ATOM   2207  CG  PHE    18      -6.786   0.432   0.349  1.00  0.00           C  
+ATOM   2208  CD1 PHE    18      -8.087   0.116   0.759  1.00  0.00           C  
+ATOM   2209  CD2 PHE    18      -6.575   1.456  -0.582  1.00  0.00           C  
+ATOM   2210  CE1 PHE    18      -9.176   0.824   0.239  1.00  0.00           C  
+ATOM   2211  CE2 PHE    18      -7.664   2.164  -1.101  1.00  0.00           C  
+ATOM   2212  CZ  PHE    18      -8.965   1.849  -0.691  1.00  0.00           C  
+ATOM   2213  H   PHE    18      -3.461  -0.193   2.288  1.00  0.00           H  
+ATOM   2214  HA  PHE    18      -6.152   0.669   2.730  1.00  0.00           H  
+ATOM   2215 1HB  PHE    18      -4.694   0.077   0.516  1.00  0.00           H  
+ATOM   2216 2HB  PHE    18      -5.693  -1.372   0.628  1.00  0.00           H  
+ATOM   2217  HD1 PHE    18      -8.249  -0.674   1.477  1.00  0.00           H  
+ATOM   2218  HD2 PHE    18      -5.572   1.699  -0.898  1.00  0.00           H  
+ATOM   2219  HE1 PHE    18     -10.180   0.581   0.557  1.00  0.00           H  
+ATOM   2220  HE2 PHE    18      -7.501   2.955  -1.819  1.00  0.00           H  
+ATOM   2221  HZ  PHE    18      -9.805   2.396  -1.092  1.00  0.00           H  
+ATOM   2222  N   LEU    19      -5.810  -2.620   2.697  1.00  0.00           N  
+ATOM   2223  CA  LEU    19      -6.419  -3.861   3.253  1.00  0.00           C  
+ATOM   2224  C   LEU    19      -6.446  -3.781   4.781  1.00  0.00           C  
+ATOM   2225  O   LEU    19      -7.185  -4.488   5.437  1.00  0.00           O  
+ATOM   2226  CB  LEU    19      -5.591  -5.072   2.822  1.00  0.00           C  
+ATOM   2227  CG  LEU    19      -4.172  -4.941   3.378  1.00  0.00           C  
+ATOM   2228  CD1 LEU    19      -4.073  -5.685   4.710  1.00  0.00           C  
+ATOM   2229  CD2 LEU    19      -3.178  -5.546   2.383  1.00  0.00           C  
+ATOM   2230  H   LEU    19      -5.056  -2.681   2.075  1.00  0.00           H  
+ATOM   2231  HA  LEU    19      -7.427  -3.964   2.880  1.00  0.00           H  
+ATOM   2232 1HB  LEU    19      -6.047  -5.974   3.202  1.00  0.00           H  
+ATOM   2233 2HB  LEU    19      -5.550  -5.115   1.744  1.00  0.00           H  
+ATOM   2234  HG  LEU    19      -3.940  -3.897   3.531  1.00  0.00           H  
+ATOM   2235 1HD1 LEU    19      -4.989  -5.550   5.267  1.00  0.00           H  
+ATOM   2236 2HD1 LEU    19      -3.917  -6.737   4.524  1.00  0.00           H  
+ATOM   2237 3HD1 LEU    19      -3.244  -5.294   5.281  1.00  0.00           H  
+ATOM   2238 1HD2 LEU    19      -3.555  -6.494   2.029  1.00  0.00           H  
+ATOM   2239 2HD2 LEU    19      -3.051  -4.874   1.547  1.00  0.00           H  
+ATOM   2240 3HD2 LEU    19      -2.227  -5.696   2.871  1.00  0.00           H  
+ATOM   2241  N   LEU    20      -5.641  -2.929   5.356  1.00  0.00           N  
+ATOM   2242  CA  LEU    20      -5.618  -2.808   6.841  1.00  0.00           C  
+ATOM   2243  C   LEU    20      -6.890  -2.105   7.322  1.00  0.00           C  
+ATOM   2244  O   LEU    20      -7.597  -2.600   8.176  1.00  0.00           O  
+ATOM   2245  CB  LEU    20      -4.394  -1.994   7.267  1.00  0.00           C  
+ATOM   2246  CG  LEU    20      -3.540  -2.821   8.229  1.00  0.00           C  
+ATOM   2247  CD1 LEU    20      -2.059  -2.578   7.934  1.00  0.00           C  
+ATOM   2248  CD2 LEU    20      -3.848  -2.404   9.669  1.00  0.00           C  
+ATOM   2249  H   LEU    20      -5.050  -2.370   4.810  1.00  0.00           H  
+ATOM   2250  HA  LEU    20      -5.563  -3.793   7.280  1.00  0.00           H  
+ATOM   2251 1HB  LEU    20      -3.811  -1.738   6.394  1.00  0.00           H  
+ATOM   2252 2HB  LEU    20      -4.718  -1.090   7.762  1.00  0.00           H  
+ATOM   2253  HG  LEU    20      -3.766  -3.869   8.098  1.00  0.00           H  
+ATOM   2254 1HD1 LEU    20      -1.895  -2.600   6.867  1.00  0.00           H  
+ATOM   2255 2HD1 LEU    20      -1.768  -1.614   8.324  1.00  0.00           H  
+ATOM   2256 3HD1 LEU    20      -1.467  -3.350   8.405  1.00  0.00           H  
+ATOM   2257 1HD2 LEU    20      -4.550  -1.584   9.664  1.00  0.00           H  
+ATOM   2258 2HD2 LEU    20      -4.275  -3.241  10.202  1.00  0.00           H  
+ATOM   2259 3HD2 LEU    20      -2.935  -2.095  10.156  1.00  0.00           H  
+ATOM   2260  N   LEU    21      -7.184  -0.953   6.784  1.00  0.00           N  
+ATOM   2261  CA  LEU    21      -8.407  -0.218   7.217  1.00  0.00           C  
+ATOM   2262  C   LEU    21      -9.654  -1.013   6.818  1.00  0.00           C  
+ATOM   2263  O   LEU    21     -10.670  -0.963   7.482  1.00  0.00           O  
+ATOM   2264  CB  LEU    21      -8.439   1.161   6.552  1.00  0.00           C  
+ATOM   2265  CG  LEU    21      -8.824   1.019   5.078  1.00  0.00           C  
+ATOM   2266  CD1 LEU    21     -10.345   1.101   4.936  1.00  0.00           C  
+ATOM   2267  CD2 LEU    21      -8.176   2.145   4.272  1.00  0.00           C  
+ATOM   2268  H   LEU    21      -6.598  -0.568   6.099  1.00  0.00           H  
+ATOM   2269  HA  LEU    21      -8.390  -0.098   8.290  1.00  0.00           H  
+ATOM   2270 1HB  LEU    21      -9.164   1.785   7.055  1.00  0.00           H  
+ATOM   2271 2HB  LEU    21      -7.463   1.617   6.623  1.00  0.00           H  
+ATOM   2272  HG  LEU    21      -8.481   0.063   4.708  1.00  0.00           H  
+ATOM   2273 1HD1 LEU    21     -10.755   1.647   5.774  1.00  0.00           H  
+ATOM   2274 2HD1 LEU    21     -10.593   1.613   4.018  1.00  0.00           H  
+ATOM   2275 3HD1 LEU    21     -10.760   0.106   4.918  1.00  0.00           H  
+ATOM   2276 1HD2 LEU    21      -8.081   3.024   4.893  1.00  0.00           H  
+ATOM   2277 2HD2 LEU    21      -7.198   1.833   3.938  1.00  0.00           H  
+ATOM   2278 3HD2 LEU    21      -8.792   2.375   3.416  1.00  0.00           H  
+ATOM   2279  N   THR    22      -9.586  -1.745   5.740  1.00  0.00           N  
+ATOM   2280  CA  THR    22     -10.770  -2.540   5.304  1.00  0.00           C  
+ATOM   2281  C   THR    22     -10.977  -3.715   6.262  1.00  0.00           C  
+ATOM   2282  O   THR    22     -12.091  -4.104   6.549  1.00  0.00           O  
+ATOM   2283  CB  THR    22     -10.535  -3.072   3.889  1.00  0.00           C  
+ATOM   2284  OG1 THR    22      -9.282  -3.737   3.838  1.00  0.00           O  
+ATOM   2285  CG2 THR    22     -10.542  -1.910   2.894  1.00  0.00           C  
+ATOM   2286  H   THR    22      -8.758  -1.773   5.217  1.00  0.00           H  
+ATOM   2287  HA  THR    22     -11.648  -1.910   5.311  1.00  0.00           H  
+ATOM   2288  HB  THR    22     -11.321  -3.765   3.630  1.00  0.00           H  
+ATOM   2289  HG1 THR    22      -9.332  -4.509   4.407  1.00  0.00           H  
+ATOM   2290 1HG2 THR    22      -9.862  -1.143   3.233  1.00  0.00           H  
+ATOM   2291 2HG2 THR    22     -10.229  -2.266   1.924  1.00  0.00           H  
+ATOM   2292 3HG2 THR    22     -11.539  -1.502   2.824  1.00  0.00           H  
+ATOM   2293  N   SER    23      -9.913  -4.285   6.756  1.00  0.00           N  
+ATOM   2294  CA  SER    23     -10.052  -5.434   7.693  1.00  0.00           C  
+ATOM   2295  C   SER    23     -10.689  -4.955   8.998  1.00  0.00           C  
+ATOM   2296  O   SER    23     -11.684  -5.487   9.445  1.00  0.00           O  
+ATOM   2297  CB  SER    23      -8.672  -6.025   7.985  1.00  0.00           C  
+ATOM   2298  OG  SER    23      -8.827  -7.295   8.603  1.00  0.00           O  
+ATOM   2299  H   SER    23      -9.022  -3.957   6.511  1.00  0.00           H  
+ATOM   2300  HA  SER    23     -10.678  -6.190   7.243  1.00  0.00           H  
+ATOM   2301 1HB  SER    23      -8.126  -6.143   7.063  1.00  0.00           H  
+ATOM   2302 2HB  SER    23      -8.127  -5.358   8.640  1.00  0.00           H  
+ATOM   2303  HG  SER    23      -9.459  -7.201   9.319  1.00  0.00           H  
+ATOM   2304  N   GLN    24     -10.123  -3.952   9.612  1.00  0.00           N  
+ATOM   2305  CA  GLN    24     -10.699  -3.441  10.888  1.00  0.00           C  
+ATOM   2306  C   GLN    24     -12.071  -2.821  10.615  1.00  0.00           C  
+ATOM   2307  O   GLN    24     -12.874  -2.650  11.510  1.00  0.00           O  
+ATOM   2308  CB  GLN    24      -9.767  -2.380  11.480  1.00  0.00           C  
+ATOM   2309  CG  GLN    24      -9.728  -1.161  10.557  1.00  0.00           C  
+ATOM   2310  CD  GLN    24      -9.853   0.115  11.391  1.00  0.00           C  
+ATOM   2311  OE1 GLN    24     -10.482   1.067  10.975  1.00  0.00           O  
+ATOM   2312  NE2 GLN    24      -9.275   0.174  12.560  1.00  0.00           N  
+ATOM   2313  H   GLN    24      -9.319  -3.535   9.235  1.00  0.00           H  
+ATOM   2314  HA  GLN    24     -10.805  -4.257  11.588  1.00  0.00           H  
+ATOM   2315 1HB  GLN    24     -10.132  -2.085  12.453  1.00  0.00           H  
+ATOM   2316 2HB  GLN    24      -8.773  -2.790  11.577  1.00  0.00           H  
+ATOM   2317 1HG  GLN    24      -8.791  -1.149  10.017  1.00  0.00           H  
+ATOM   2318 2HG  GLN    24     -10.547  -1.213   9.856  1.00  0.00           H  
+ATOM   2319 1HE2 GLN    24      -9.350   0.988  13.102  1.00  0.00           H  
+ATOM   2320 2HE2 GLN    24      -8.768  -0.593  12.896  1.00  0.00           H  
+ATOM   2321  N   ARG    25     -12.348  -2.485   9.384  1.00  0.00           N  
+ATOM   2322  CA  ARG    25     -13.670  -1.879   9.056  1.00  0.00           C  
+ATOM   2323  C   ARG    25     -14.492  -2.867   8.228  1.00  0.00           C  
+ATOM   2324  O   ARG    25     -14.477  -4.043   8.556  1.00  0.00           O  
+ATOM   2325  CB  ARG    25     -13.459  -0.593   8.253  1.00  0.00           C  
+ATOM   2326  CG  ARG    25     -14.682   0.313   8.409  1.00  0.00           C  
+ATOM   2327  CD  ARG    25     -14.514   1.555   7.532  1.00  0.00           C  
+ATOM   2328  NE  ARG    25     -14.617   2.779   8.377  1.00  0.00           N  
+ATOM   2329  CZ  ARG    25     -13.576   3.205   9.040  1.00  0.00           C  
+ATOM   2330  NH1 ARG    25     -13.357   2.781  10.255  1.00  0.00           N  
+ATOM   2331  NH2 ARG    25     -12.755   4.057   8.488  1.00  0.00           N  
+ATOM   2332  OXT ARG    25     -15.125  -2.433   7.279  1.00  0.00           O  
+ATOM   2333  H   ARG    25     -11.687  -2.633   8.676  1.00  0.00           H  
+ATOM   2334  HA  ARG    25     -14.197  -1.650   9.971  1.00  0.00           H  
+ATOM   2335 1HB  ARG    25     -12.581  -0.081   8.619  1.00  0.00           H  
+ATOM   2336 2HB  ARG    25     -13.325  -0.838   7.210  1.00  0.00           H  
+ATOM   2337 1HG  ARG    25     -15.569  -0.225   8.106  1.00  0.00           H  
+ATOM   2338 2HG  ARG    25     -14.779   0.614   9.442  1.00  0.00           H  
+ATOM   2339 1HD  ARG    25     -13.547   1.530   7.052  1.00  0.00           H  
+ATOM   2340 2HD  ARG    25     -15.289   1.572   6.779  1.00  0.00           H  
+ATOM   2341  HE  ARG    25     -15.467   3.264   8.436  1.00  0.00           H  
+ATOM   2342 1HH1 ARG    25     -13.987   2.128  10.677  1.00  0.00           H  
+ATOM   2343 2HH1 ARG    25     -12.561   3.109  10.763  1.00  0.00           H  
+ATOM   2344 1HH2 ARG    25     -12.924   4.384   7.559  1.00  0.00           H  
+ATOM   2345 2HH2 ARG    25     -11.958   4.382   8.997  1.00  0.00           H  
+TER    2346      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        7                                                                  
+ATOM   2347  N   GLY     1      16.431   1.053 -12.751  1.00  0.00           N  
+ATOM   2348  CA  GLY     1      15.090   1.313 -12.065  1.00  0.00           C  
+ATOM   2349  C   GLY     1      15.068   1.961 -10.723  1.00  0.00           C  
+ATOM   2350  O   GLY     1      14.501   1.443  -9.782  1.00  0.00           O  
+ATOM   2351 1H   GLY     1      17.206   1.281 -12.094  1.00  0.00           H  
+ATOM   2352 2H   GLY     1      16.492   0.052 -13.025  1.00  0.00           H  
+ATOM   2353 3H   GLY     1      16.510   1.649 -13.599  1.00  0.00           H  
+ATOM   2354 1HA  GLY     1      14.699   2.156 -12.361  1.00  0.00           H  
+ATOM   2355 2HA  GLY     1      14.410   0.392 -12.289  1.00  0.00           H  
+ATOM   2356  N   SER     2      15.681   3.107 -10.597  1.00  0.00           N  
+ATOM   2357  CA  SER     2      15.694   3.803  -9.279  1.00  0.00           C  
+ATOM   2358  C   SER     2      14.330   4.448  -9.026  1.00  0.00           C  
+ATOM   2359  O   SER     2      13.482   3.888  -8.359  1.00  0.00           O  
+ATOM   2360  CB  SER     2      16.774   4.886  -9.286  1.00  0.00           C  
+ATOM   2361  OG  SER     2      16.610   5.720  -8.148  1.00  0.00           O  
+ATOM   2362  H   SER     2      16.133   3.508 -11.367  1.00  0.00           H  
+ATOM   2363  HA  SER     2      15.907   3.090  -8.497  1.00  0.00           H  
+ATOM   2364 1HB  SER     2      17.748   4.426  -9.253  1.00  0.00           H  
+ATOM   2365 2HB  SER     2      16.686   5.474 -10.191  1.00  0.00           H  
+ATOM   2366  HG  SER     2      17.430   5.706  -7.648  1.00  0.00           H  
+ATOM   2367  N   GLU     3      14.110   5.623  -9.551  1.00  0.00           N  
+ATOM   2368  CA  GLU     3      12.800   6.300  -9.338  1.00  0.00           C  
+ATOM   2369  C   GLU     3      11.666   5.312  -9.614  1.00  0.00           C  
+ATOM   2370  O   GLU     3      10.598   5.403  -9.043  1.00  0.00           O  
+ATOM   2371  CB  GLU     3      12.681   7.493 -10.290  1.00  0.00           C  
+ATOM   2372  CG  GLU     3      12.986   7.040 -11.720  1.00  0.00           C  
+ATOM   2373  CD  GLU     3      13.318   8.259 -12.581  1.00  0.00           C  
+ATOM   2374  OE1 GLU     3      13.420   9.341 -12.029  1.00  0.00           O  
+ATOM   2375  OE2 GLU     3      13.465   8.090 -13.782  1.00  0.00           O  
+ATOM   2376  H   GLU     3      14.807   6.060 -10.085  1.00  0.00           H  
+ATOM   2377  HA  GLU     3      12.736   6.647  -8.317  1.00  0.00           H  
+ATOM   2378 1HB  GLU     3      11.677   7.891 -10.244  1.00  0.00           H  
+ATOM   2379 2HB  GLU     3      13.385   8.257 -10.000  1.00  0.00           H  
+ATOM   2380 1HG  GLU     3      13.828   6.363 -11.711  1.00  0.00           H  
+ATOM   2381 2HG  GLU     3      12.123   6.536 -12.130  1.00  0.00           H  
+ATOM   2382  N   LYS     4      11.889   4.366 -10.484  1.00  0.00           N  
+ATOM   2383  CA  LYS     4      10.822   3.372 -10.792  1.00  0.00           C  
+ATOM   2384  C   LYS     4      10.511   2.558  -9.535  1.00  0.00           C  
+ATOM   2385  O   LYS     4       9.394   2.534  -9.061  1.00  0.00           O  
+ATOM   2386  CB  LYS     4      11.304   2.436 -11.902  1.00  0.00           C  
+ATOM   2387  CG  LYS     4      10.651   2.836 -13.227  1.00  0.00           C  
+ATOM   2388  CD  LYS     4      11.147   4.224 -13.642  1.00  0.00           C  
+ATOM   2389  CE  LYS     4      11.536   4.203 -15.121  1.00  0.00           C  
+ATOM   2390  NZ  LYS     4      11.476   5.586 -15.671  1.00  0.00           N  
+ATOM   2391  H   LYS     4      12.758   4.309 -10.934  1.00  0.00           H  
+ATOM   2392  HA  LYS     4       9.932   3.889 -11.117  1.00  0.00           H  
+ATOM   2393 1HB  LYS     4      12.379   2.509 -11.993  1.00  0.00           H  
+ATOM   2394 2HB  LYS     4      11.031   1.420 -11.661  1.00  0.00           H  
+ATOM   2395 1HG  LYS     4      10.912   2.116 -13.988  1.00  0.00           H  
+ATOM   2396 2HG  LYS     4       9.578   2.861 -13.106  1.00  0.00           H  
+ATOM   2397 1HD  LYS     4      10.361   4.948 -13.484  1.00  0.00           H  
+ATOM   2398 2HD  LYS     4      12.008   4.490 -13.048  1.00  0.00           H  
+ATOM   2399 1HE  LYS     4      12.540   3.819 -15.223  1.00  0.00           H  
+ATOM   2400 2HE  LYS     4      10.852   3.569 -15.664  1.00  0.00           H  
+ATOM   2401 1HZ  LYS     4      10.535   5.991 -15.488  1.00  0.00           H  
+ATOM   2402 2HZ  LYS     4      12.202   6.174 -15.214  1.00  0.00           H  
+ATOM   2403 3HZ  LYS     4      11.649   5.561 -16.696  1.00  0.00           H  
+ATOM   2404  N   MET     5      11.492   1.891  -8.991  1.00  0.00           N  
+ATOM   2405  CA  MET     5      11.251   1.082  -7.762  1.00  0.00           C  
+ATOM   2406  C   MET     5      10.652   1.977  -6.676  1.00  0.00           C  
+ATOM   2407  O   MET     5       9.975   1.515  -5.780  1.00  0.00           O  
+ATOM   2408  CB  MET     5      12.576   0.496  -7.270  1.00  0.00           C  
+ATOM   2409  CG  MET     5      12.469  -1.030  -7.212  1.00  0.00           C  
+ATOM   2410  SD  MET     5      11.903  -1.533  -5.568  1.00  0.00           S  
+ATOM   2411  CE  MET     5      10.915  -2.956  -6.091  1.00  0.00           C  
+ATOM   2412  H   MET     5      12.387   1.924  -9.387  1.00  0.00           H  
+ATOM   2413  HA  MET     5      10.563   0.280  -7.989  1.00  0.00           H  
+ATOM   2414 1HB  MET     5      13.368   0.777  -7.948  1.00  0.00           H  
+ATOM   2415 2HB  MET     5      12.794   0.877  -6.283  1.00  0.00           H  
+ATOM   2416 1HG  MET     5      11.762  -1.369  -7.955  1.00  0.00           H  
+ATOM   2417 2HG  MET     5      13.437  -1.466  -7.410  1.00  0.00           H  
+ATOM   2418 1HE  MET     5      10.180  -2.640  -6.813  1.00  0.00           H  
+ATOM   2419 2HE  MET     5      11.562  -3.697  -6.541  1.00  0.00           H  
+ATOM   2420 3HE  MET     5      10.415  -3.381  -5.233  1.00  0.00           H  
+ATOM   2421  N   SER     6      10.893   3.259  -6.749  1.00  0.00           N  
+ATOM   2422  CA  SER     6      10.334   4.181  -5.721  1.00  0.00           C  
+ATOM   2423  C   SER     6       8.821   4.302  -5.916  1.00  0.00           C  
+ATOM   2424  O   SER     6       8.067   4.384  -4.968  1.00  0.00           O  
+ATOM   2425  CB  SER     6      10.980   5.559  -5.863  1.00  0.00           C  
+ATOM   2426  OG  SER     6      10.980   6.210  -4.598  1.00  0.00           O  
+ATOM   2427  H   SER     6      11.440   3.614  -7.481  1.00  0.00           H  
+ATOM   2428  HA  SER     6      10.540   3.787  -4.736  1.00  0.00           H  
+ATOM   2429 1HB  SER     6      11.995   5.450  -6.205  1.00  0.00           H  
+ATOM   2430 2HB  SER     6      10.421   6.145  -6.581  1.00  0.00           H  
+ATOM   2431  HG  SER     6      10.136   6.038  -4.176  1.00  0.00           H  
+ATOM   2432  N   THR     7       8.370   4.303  -7.142  1.00  0.00           N  
+ATOM   2433  CA  THR     7       6.906   4.409  -7.396  1.00  0.00           C  
+ATOM   2434  C   THR     7       6.264   3.044  -7.156  1.00  0.00           C  
+ATOM   2435  O   THR     7       5.066   2.926  -6.989  1.00  0.00           O  
+ATOM   2436  CB  THR     7       6.664   4.843  -8.845  1.00  0.00           C  
+ATOM   2437  OG1 THR     7       7.909   5.144  -9.461  1.00  0.00           O  
+ATOM   2438  CG2 THR     7       5.768   6.081  -8.866  1.00  0.00           C  
+ATOM   2439  H   THR     7       8.994   4.227  -7.894  1.00  0.00           H  
+ATOM   2440  HA  THR     7       6.473   5.136  -6.723  1.00  0.00           H  
+ATOM   2441  HB  THR     7       6.180   4.043  -9.384  1.00  0.00           H  
+ATOM   2442  HG1 THR     7       8.171   4.387  -9.988  1.00  0.00           H  
+ATOM   2443 1HG2 THR     7       6.197   6.847  -8.237  1.00  0.00           H  
+ATOM   2444 2HG2 THR     7       5.687   6.449  -9.879  1.00  0.00           H  
+ATOM   2445 3HG2 THR     7       4.785   5.821  -8.499  1.00  0.00           H  
+ATOM   2446  N   ALA     8       7.060   2.012  -7.128  1.00  0.00           N  
+ATOM   2447  CA  ALA     8       6.513   0.652  -6.891  1.00  0.00           C  
+ATOM   2448  C   ALA     8       6.372   0.437  -5.387  1.00  0.00           C  
+ATOM   2449  O   ALA     8       5.449  -0.199  -4.920  1.00  0.00           O  
+ATOM   2450  CB  ALA     8       7.470  -0.389  -7.472  1.00  0.00           C  
+ATOM   2451  H   ALA     8       8.023   2.133  -7.255  1.00  0.00           H  
+ATOM   2452  HA  ALA     8       5.546   0.562  -7.362  1.00  0.00           H  
+ATOM   2453 1HB  ALA     8       7.676  -0.149  -8.505  1.00  0.00           H  
+ATOM   2454 2HB  ALA     8       8.393  -0.381  -6.911  1.00  0.00           H  
+ATOM   2455 3HB  ALA     8       7.018  -1.368  -7.413  1.00  0.00           H  
+ATOM   2456  N   ILE     9       7.280   0.978  -4.623  1.00  0.00           N  
+ATOM   2457  CA  ILE     9       7.199   0.819  -3.149  1.00  0.00           C  
+ATOM   2458  C   ILE     9       6.110   1.745  -2.609  1.00  0.00           C  
+ATOM   2459  O   ILE     9       5.480   1.464  -1.610  1.00  0.00           O  
+ATOM   2460  CB  ILE     9       8.543   1.184  -2.513  1.00  0.00           C  
+ATOM   2461  CG1 ILE     9       8.910   2.622  -2.888  1.00  0.00           C  
+ATOM   2462  CG2 ILE     9       9.627   0.233  -3.022  1.00  0.00           C  
+ATOM   2463  CD1 ILE     9       9.859   3.198  -1.837  1.00  0.00           C  
+ATOM   2464  H   ILE     9       8.011   1.495  -5.022  1.00  0.00           H  
+ATOM   2465  HA  ILE     9       6.955  -0.204  -2.912  1.00  0.00           H  
+ATOM   2466  HB  ILE     9       8.466   1.099  -1.438  1.00  0.00           H  
+ATOM   2467 1HG1 ILE     9       9.392   2.628  -3.853  1.00  0.00           H  
+ATOM   2468 2HG1 ILE     9       8.014   3.223  -2.929  1.00  0.00           H  
+ATOM   2469 1HG2 ILE     9       9.615   0.215  -4.101  1.00  0.00           H  
+ATOM   2470 2HG2 ILE     9      10.593   0.572  -2.678  1.00  0.00           H  
+ATOM   2471 3HG2 ILE     9       9.440  -0.761  -2.643  1.00  0.00           H  
+ATOM   2472 1HD1 ILE     9      10.603   2.458  -1.578  1.00  0.00           H  
+ATOM   2473 2HD1 ILE     9      10.347   4.075  -2.235  1.00  0.00           H  
+ATOM   2474 3HD1 ILE     9       9.298   3.468  -0.954  1.00  0.00           H  
+ATOM   2475  N   SER    10       5.881   2.849  -3.266  1.00  0.00           N  
+ATOM   2476  CA  SER    10       4.829   3.792  -2.793  1.00  0.00           C  
+ATOM   2477  C   SER    10       3.451   3.200  -3.091  1.00  0.00           C  
+ATOM   2478  O   SER    10       2.582   3.175  -2.242  1.00  0.00           O  
+ATOM   2479  CB  SER    10       4.981   5.131  -3.517  1.00  0.00           C  
+ATOM   2480  OG  SER    10       4.513   4.999  -4.852  1.00  0.00           O  
+ATOM   2481  H   SER    10       6.399   3.056  -4.072  1.00  0.00           H  
+ATOM   2482  HA  SER    10       4.933   3.941  -1.729  1.00  0.00           H  
+ATOM   2483 1HB  SER    10       4.402   5.884  -3.010  1.00  0.00           H  
+ATOM   2484 2HB  SER    10       6.024   5.422  -3.518  1.00  0.00           H  
+ATOM   2485  HG  SER    10       5.127   5.458  -5.429  1.00  0.00           H  
+ATOM   2486  N   VAL    11       3.244   2.718  -4.286  1.00  0.00           N  
+ATOM   2487  CA  VAL    11       1.920   2.123  -4.624  1.00  0.00           C  
+ATOM   2488  C   VAL    11       1.680   0.896  -3.742  1.00  0.00           C  
+ATOM   2489  O   VAL    11       0.561   0.583  -3.389  1.00  0.00           O  
+ATOM   2490  CB  VAL    11       1.904   1.703  -6.095  1.00  0.00           C  
+ATOM   2491  CG1 VAL    11       0.650   0.871  -6.374  1.00  0.00           C  
+ATOM   2492  CG2 VAL    11       1.893   2.950  -6.983  1.00  0.00           C  
+ATOM   2493  H   VAL    11       3.958   2.742  -4.958  1.00  0.00           H  
+ATOM   2494  HA  VAL    11       1.142   2.852  -4.448  1.00  0.00           H  
+ATOM   2495  HB  VAL    11       2.782   1.112  -6.311  1.00  0.00           H  
+ATOM   2496 1HG1 VAL    11      -0.217   1.385  -5.983  1.00  0.00           H  
+ATOM   2497 2HG1 VAL    11       0.537   0.735  -7.439  1.00  0.00           H  
+ATOM   2498 3HG1 VAL    11       0.743  -0.091  -5.894  1.00  0.00           H  
+ATOM   2499 1HG2 VAL    11       1.259   3.703  -6.539  1.00  0.00           H  
+ATOM   2500 2HG2 VAL    11       2.897   3.334  -7.077  1.00  0.00           H  
+ATOM   2501 3HG2 VAL    11       1.514   2.692  -7.962  1.00  0.00           H  
+ATOM   2502  N   LEU    12       2.724   0.200  -3.382  1.00  0.00           N  
+ATOM   2503  CA  LEU    12       2.556  -1.006  -2.521  1.00  0.00           C  
+ATOM   2504  C   LEU    12       2.058  -0.578  -1.140  1.00  0.00           C  
+ATOM   2505  O   LEU    12       1.098  -1.114  -0.623  1.00  0.00           O  
+ATOM   2506  CB  LEU    12       3.900  -1.725  -2.379  1.00  0.00           C  
+ATOM   2507  CG  LEU    12       4.085  -2.699  -3.543  1.00  0.00           C  
+ATOM   2508  CD1 LEU    12       5.577  -2.921  -3.791  1.00  0.00           C  
+ATOM   2509  CD2 LEU    12       3.422  -4.035  -3.200  1.00  0.00           C  
+ATOM   2510  H   LEU    12       3.620   0.470  -3.677  1.00  0.00           H  
+ATOM   2511  HA  LEU    12       1.837  -1.673  -2.973  1.00  0.00           H  
+ATOM   2512 1HB  LEU    12       4.699  -0.998  -2.386  1.00  0.00           H  
+ATOM   2513 2HB  LEU    12       3.918  -2.271  -1.448  1.00  0.00           H  
+ATOM   2514  HG  LEU    12       3.629  -2.287  -4.431  1.00  0.00           H  
+ATOM   2515 1HD1 LEU    12       6.130  -2.052  -3.465  1.00  0.00           H  
+ATOM   2516 2HD1 LEU    12       5.910  -3.787  -3.237  1.00  0.00           H  
+ATOM   2517 3HD1 LEU    12       5.747  -3.081  -4.845  1.00  0.00           H  
+ATOM   2518 1HD2 LEU    12       3.525  -4.226  -2.140  1.00  0.00           H  
+ATOM   2519 2HD2 LEU    12       2.374  -3.993  -3.457  1.00  0.00           H  
+ATOM   2520 3HD2 LEU    12       3.899  -4.828  -3.756  1.00  0.00           H  
+ATOM   2521  N   LEU    13       2.700   0.387  -0.538  1.00  0.00           N  
+ATOM   2522  CA  LEU    13       2.257   0.848   0.806  1.00  0.00           C  
+ATOM   2523  C   LEU    13       0.845   1.419   0.700  1.00  0.00           C  
+ATOM   2524  O   LEU    13       0.092   1.435   1.654  1.00  0.00           O  
+ATOM   2525  CB  LEU    13       3.213   1.935   1.305  1.00  0.00           C  
+ATOM   2526  CG  LEU    13       4.494   1.302   1.868  1.00  0.00           C  
+ATOM   2527  CD1 LEU    13       4.860   0.040   1.076  1.00  0.00           C  
+ATOM   2528  CD2 LEU    13       5.636   2.314   1.758  1.00  0.00           C  
+ATOM   2529  H   LEU    13       3.471   0.811  -0.973  1.00  0.00           H  
+ATOM   2530  HA  LEU    13       2.262   0.017   1.495  1.00  0.00           H  
+ATOM   2531 1HB  LEU    13       3.469   2.589   0.484  1.00  0.00           H  
+ATOM   2532 2HB  LEU    13       2.730   2.508   2.081  1.00  0.00           H  
+ATOM   2533  HG  LEU    13       4.342   1.043   2.904  1.00  0.00           H  
+ATOM   2534 1HD1 LEU    13       4.955   0.284   0.030  1.00  0.00           H  
+ATOM   2535 2HD1 LEU    13       5.796  -0.356   1.440  1.00  0.00           H  
+ATOM   2536 3HD1 LEU    13       4.083  -0.701   1.202  1.00  0.00           H  
+ATOM   2537 1HD2 LEU    13       5.444   2.984   0.932  1.00  0.00           H  
+ATOM   2538 2HD2 LEU    13       5.704   2.881   2.673  1.00  0.00           H  
+ATOM   2539 3HD2 LEU    13       6.566   1.791   1.588  1.00  0.00           H  
+ATOM   2540  N   ALA    14       0.482   1.887  -0.460  1.00  0.00           N  
+ATOM   2541  CA  ALA    14      -0.882   2.459  -0.644  1.00  0.00           C  
+ATOM   2542  C   ALA    14      -1.932   1.393  -0.324  1.00  0.00           C  
+ATOM   2543  O   ALA    14      -2.578   1.435   0.704  1.00  0.00           O  
+ATOM   2544  CB  ALA    14      -1.047   2.920  -2.093  1.00  0.00           C  
+ATOM   2545  H   ALA    14       1.109   1.863  -1.214  1.00  0.00           H  
+ATOM   2546  HA  ALA    14      -1.010   3.302   0.019  1.00  0.00           H  
+ATOM   2547 1HB  ALA    14      -0.088   2.900  -2.589  1.00  0.00           H  
+ATOM   2548 2HB  ALA    14      -1.731   2.258  -2.606  1.00  0.00           H  
+ATOM   2549 3HB  ALA    14      -1.439   3.926  -2.110  1.00  0.00           H  
+ATOM   2550  N   GLN    15      -2.107   0.438  -1.196  1.00  0.00           N  
+ATOM   2551  CA  GLN    15      -3.117  -0.627  -0.938  1.00  0.00           C  
+ATOM   2552  C   GLN    15      -2.676  -1.473   0.258  1.00  0.00           C  
+ATOM   2553  O   GLN    15      -3.435  -2.260   0.787  1.00  0.00           O  
+ATOM   2554  CB  GLN    15      -3.247  -1.520  -2.174  1.00  0.00           C  
+ATOM   2555  CG  GLN    15      -4.634  -2.165  -2.196  1.00  0.00           C  
+ATOM   2556  CD  GLN    15      -5.535  -1.404  -3.171  1.00  0.00           C  
+ATOM   2557  OE1 GLN    15      -5.748  -0.218  -3.020  1.00  0.00           O  
+ATOM   2558  NE2 GLN    15      -6.077  -2.043  -4.171  1.00  0.00           N  
+ATOM   2559  H   GLN    15      -1.576   0.422  -2.020  1.00  0.00           H  
+ATOM   2560  HA  GLN    15      -4.072  -0.172  -0.723  1.00  0.00           H  
+ATOM   2561 1HB  GLN    15      -3.112  -0.923  -3.064  1.00  0.00           H  
+ATOM   2562 2HB  GLN    15      -2.493  -2.293  -2.142  1.00  0.00           H  
+ATOM   2563 1HG  GLN    15      -4.547  -3.195  -2.512  1.00  0.00           H  
+ATOM   2564 2HG  GLN    15      -5.065  -2.126  -1.206  1.00  0.00           H  
+ATOM   2565 1HE2 GLN    15      -6.656  -1.566  -4.802  1.00  0.00           H  
+ATOM   2566 2HE2 GLN    15      -5.905  -3.000  -4.294  1.00  0.00           H  
+ATOM   2567  N   ALA    16      -1.455  -1.315   0.691  1.00  0.00           N  
+ATOM   2568  CA  ALA    16      -0.973  -2.109   1.856  1.00  0.00           C  
+ATOM   2569  C   ALA    16      -1.725  -1.666   3.112  1.00  0.00           C  
+ATOM   2570  O   ALA    16      -2.533  -2.395   3.654  1.00  0.00           O  
+ATOM   2571  CB  ALA    16       0.526  -1.875   2.050  1.00  0.00           C  
+ATOM   2572  H   ALA    16      -0.857  -0.674   0.254  1.00  0.00           H  
+ATOM   2573  HA  ALA    16      -1.155  -3.158   1.678  1.00  0.00           H  
+ATOM   2574 1HB  ALA    16       0.716  -0.815   2.137  1.00  0.00           H  
+ATOM   2575 2HB  ALA    16       0.856  -2.374   2.949  1.00  0.00           H  
+ATOM   2576 3HB  ALA    16       1.065  -2.268   1.201  1.00  0.00           H  
+ATOM   2577  N   VAL    17      -1.472  -0.473   3.574  1.00  0.00           N  
+ATOM   2578  CA  VAL    17      -2.179   0.021   4.788  1.00  0.00           C  
+ATOM   2579  C   VAL    17      -3.677   0.110   4.493  1.00  0.00           C  
+ATOM   2580  O   VAL    17      -4.504  -0.027   5.373  1.00  0.00           O  
+ATOM   2581  CB  VAL    17      -1.646   1.406   5.159  1.00  0.00           C  
+ATOM   2582  CG1 VAL    17      -2.502   2.001   6.278  1.00  0.00           C  
+ATOM   2583  CG2 VAL    17      -0.197   1.282   5.635  1.00  0.00           C  
+ATOM   2584  H   VAL    17      -0.821   0.101   3.119  1.00  0.00           H  
+ATOM   2585  HA  VAL    17      -2.011  -0.663   5.607  1.00  0.00           H  
+ATOM   2586  HB  VAL    17      -1.689   2.051   4.293  1.00  0.00           H  
+ATOM   2587 1HG1 VAL    17      -3.001   1.204   6.811  1.00  0.00           H  
+ATOM   2588 2HG1 VAL    17      -1.870   2.550   6.962  1.00  0.00           H  
+ATOM   2589 3HG1 VAL    17      -3.238   2.667   5.853  1.00  0.00           H  
+ATOM   2590 1HG2 VAL    17       0.097   0.243   5.624  1.00  0.00           H  
+ATOM   2591 2HG2 VAL    17       0.447   1.846   4.977  1.00  0.00           H  
+ATOM   2592 3HG2 VAL    17      -0.114   1.670   6.639  1.00  0.00           H  
+ATOM   2593  N   PHE    18      -4.030   0.334   3.257  1.00  0.00           N  
+ATOM   2594  CA  PHE    18      -5.473   0.427   2.899  1.00  0.00           C  
+ATOM   2595  C   PHE    18      -6.173  -0.877   3.281  1.00  0.00           C  
+ATOM   2596  O   PHE    18      -6.983  -0.916   4.187  1.00  0.00           O  
+ATOM   2597  CB  PHE    18      -5.608   0.656   1.392  1.00  0.00           C  
+ATOM   2598  CG  PHE    18      -6.644   1.724   1.135  1.00  0.00           C  
+ATOM   2599  CD1 PHE    18      -6.300   3.076   1.255  1.00  0.00           C  
+ATOM   2600  CD2 PHE    18      -7.947   1.362   0.777  1.00  0.00           C  
+ATOM   2601  CE1 PHE    18      -7.260   4.066   1.016  1.00  0.00           C  
+ATOM   2602  CE2 PHE    18      -8.907   2.352   0.538  1.00  0.00           C  
+ATOM   2603  CZ  PHE    18      -8.564   3.704   0.657  1.00  0.00           C  
+ATOM   2604  H   PHE    18      -3.346   0.438   2.564  1.00  0.00           H  
+ATOM   2605  HA  PHE    18      -5.925   1.251   3.432  1.00  0.00           H  
+ATOM   2606 1HB  PHE    18      -4.657   0.972   0.989  1.00  0.00           H  
+ATOM   2607 2HB  PHE    18      -5.915  -0.262   0.915  1.00  0.00           H  
+ATOM   2608  HD1 PHE    18      -5.293   3.355   1.531  1.00  0.00           H  
+ATOM   2609  HD2 PHE    18      -8.212   0.320   0.685  1.00  0.00           H  
+ATOM   2610  HE1 PHE    18      -6.995   5.109   1.109  1.00  0.00           H  
+ATOM   2611  HE2 PHE    18      -9.913   2.073   0.262  1.00  0.00           H  
+ATOM   2612  HZ  PHE    18      -9.305   4.467   0.474  1.00  0.00           H  
+ATOM   2613  N   LEU    19      -5.865  -1.947   2.601  1.00  0.00           N  
+ATOM   2614  CA  LEU    19      -6.512  -3.246   2.932  1.00  0.00           C  
+ATOM   2615  C   LEU    19      -6.318  -3.542   4.420  1.00  0.00           C  
+ATOM   2616  O   LEU    19      -7.042  -4.319   5.009  1.00  0.00           O  
+ATOM   2617  CB  LEU    19      -5.877  -4.362   2.099  1.00  0.00           C  
+ATOM   2618  CG  LEU    19      -4.460  -4.634   2.604  1.00  0.00           C  
+ATOM   2619  CD1 LEU    19      -4.516  -5.609   3.780  1.00  0.00           C  
+ATOM   2620  CD2 LEU    19      -3.627  -5.243   1.474  1.00  0.00           C  
+ATOM   2621  H   LEU    19      -5.207  -1.896   1.876  1.00  0.00           H  
+ATOM   2622  HA  LEU    19      -7.568  -3.189   2.711  1.00  0.00           H  
+ATOM   2623 1HB  LEU    19      -6.470  -5.260   2.189  1.00  0.00           H  
+ATOM   2624 2HB  LEU    19      -5.837  -4.060   1.063  1.00  0.00           H  
+ATOM   2625  HG  LEU    19      -4.010  -3.706   2.926  1.00  0.00           H  
+ATOM   2626 1HD1 LEU    19      -5.540  -5.896   3.961  1.00  0.00           H  
+ATOM   2627 2HD1 LEU    19      -3.931  -6.487   3.547  1.00  0.00           H  
+ATOM   2628 3HD1 LEU    19      -4.114  -5.132   4.662  1.00  0.00           H  
+ATOM   2629 1HD2 LEU    19      -3.595  -4.558   0.640  1.00  0.00           H  
+ATOM   2630 2HD2 LEU    19      -2.622  -5.428   1.827  1.00  0.00           H  
+ATOM   2631 3HD2 LEU    19      -4.074  -6.175   1.159  1.00  0.00           H  
+ATOM   2632  N   LEU    20      -5.347  -2.922   5.036  1.00  0.00           N  
+ATOM   2633  CA  LEU    20      -5.115  -3.163   6.488  1.00  0.00           C  
+ATOM   2634  C   LEU    20      -6.322  -2.656   7.277  1.00  0.00           C  
+ATOM   2635  O   LEU    20      -6.848  -3.340   8.133  1.00  0.00           O  
+ATOM   2636  CB  LEU    20      -3.857  -2.417   6.939  1.00  0.00           C  
+ATOM   2637  CG  LEU    20      -3.387  -2.977   8.283  1.00  0.00           C  
+ATOM   2638  CD1 LEU    20      -1.901  -3.326   8.201  1.00  0.00           C  
+ATOM   2639  CD2 LEU    20      -3.603  -1.927   9.376  1.00  0.00           C  
+ATOM   2640  H   LEU    20      -4.775  -2.295   4.545  1.00  0.00           H  
+ATOM   2641  HA  LEU    20      -4.988  -4.223   6.661  1.00  0.00           H  
+ATOM   2642 1HB  LEU    20      -3.078  -2.545   6.201  1.00  0.00           H  
+ATOM   2643 2HB  LEU    20      -4.082  -1.366   7.048  1.00  0.00           H  
+ATOM   2644  HG  LEU    20      -3.952  -3.867   8.519  1.00  0.00           H  
+ATOM   2645 1HD1 LEU    20      -1.560  -3.211   7.183  1.00  0.00           H  
+ATOM   2646 2HD1 LEU    20      -1.339  -2.666   8.845  1.00  0.00           H  
+ATOM   2647 3HD1 LEU    20      -1.753  -4.348   8.516  1.00  0.00           H  
+ATOM   2648 1HD2 LEU    20      -4.377  -1.240   9.068  1.00  0.00           H  
+ATOM   2649 2HD2 LEU    20      -3.900  -2.417  10.292  1.00  0.00           H  
+ATOM   2650 3HD2 LEU    20      -2.683  -1.385   9.540  1.00  0.00           H  
+ATOM   2651  N   LEU    21      -6.771  -1.466   6.989  1.00  0.00           N  
+ATOM   2652  CA  LEU    21      -7.953  -0.923   7.716  1.00  0.00           C  
+ATOM   2653  C   LEU    21      -9.168  -1.784   7.380  1.00  0.00           C  
+ATOM   2654  O   LEU    21      -9.968  -2.113   8.233  1.00  0.00           O  
+ATOM   2655  CB  LEU    21      -8.219   0.525   7.288  1.00  0.00           C  
+ATOM   2656  CG  LEU    21      -6.900   1.238   6.979  1.00  0.00           C  
+ATOM   2657  CD1 LEU    21      -7.156   2.740   6.831  1.00  0.00           C  
+ATOM   2658  CD2 LEU    21      -5.906   1.008   8.123  1.00  0.00           C  
+ATOM   2659  H   LEU    21      -6.338  -0.937   6.290  1.00  0.00           H  
+ATOM   2660  HA  LEU    21      -7.769  -0.959   8.780  1.00  0.00           H  
+ATOM   2661 1HB  LEU    21      -8.843   0.528   6.405  1.00  0.00           H  
+ATOM   2662 2HB  LEU    21      -8.727   1.047   8.086  1.00  0.00           H  
+ATOM   2663  HG  LEU    21      -6.490   0.854   6.057  1.00  0.00           H  
+ATOM   2664 1HD1 LEU    21      -8.175   2.960   7.111  1.00  0.00           H  
+ATOM   2665 2HD1 LEU    21      -6.481   3.285   7.473  1.00  0.00           H  
+ATOM   2666 3HD1 LEU    21      -6.992   3.034   5.804  1.00  0.00           H  
+ATOM   2667 1HD2 LEU    21      -5.918  -0.032   8.411  1.00  0.00           H  
+ATOM   2668 2HD2 LEU    21      -4.913   1.279   7.798  1.00  0.00           H  
+ATOM   2669 3HD2 LEU    21      -6.186   1.618   8.969  1.00  0.00           H  
+ATOM   2670  N   THR    22      -9.305  -2.160   6.138  1.00  0.00           N  
+ATOM   2671  CA  THR    22     -10.463  -3.009   5.741  1.00  0.00           C  
+ATOM   2672  C   THR    22     -10.376  -4.347   6.476  1.00  0.00           C  
+ATOM   2673  O   THR    22     -11.373  -4.993   6.731  1.00  0.00           O  
+ATOM   2674  CB  THR    22     -10.426  -3.255   4.230  1.00  0.00           C  
+ATOM   2675  OG1 THR    22     -10.647  -2.028   3.547  1.00  0.00           O  
+ATOM   2676  CG2 THR    22     -11.515  -4.258   3.849  1.00  0.00           C  
+ATOM   2677  H   THR    22      -8.642  -1.886   5.467  1.00  0.00           H  
+ATOM   2678  HA  THR    22     -11.383  -2.510   6.004  1.00  0.00           H  
+ATOM   2679  HB  THR    22      -9.461  -3.652   3.952  1.00  0.00           H  
+ATOM   2680  HG1 THR    22     -10.454  -2.166   2.617  1.00  0.00           H  
+ATOM   2681 1HG2 THR    22     -12.267  -4.283   4.622  1.00  0.00           H  
+ATOM   2682 2HG2 THR    22     -11.968  -3.960   2.915  1.00  0.00           H  
+ATOM   2683 3HG2 THR    22     -11.078  -5.240   3.739  1.00  0.00           H  
+ATOM   2684  N   SER    23      -9.189  -4.764   6.823  1.00  0.00           N  
+ATOM   2685  CA  SER    23      -9.034  -6.056   7.547  1.00  0.00           C  
+ATOM   2686  C   SER    23      -9.580  -5.906   8.968  1.00  0.00           C  
+ATOM   2687  O   SER    23     -10.299  -6.753   9.460  1.00  0.00           O  
+ATOM   2688  CB  SER    23      -7.552  -6.432   7.603  1.00  0.00           C  
+ATOM   2689  OG  SER    23      -7.365  -7.700   6.988  1.00  0.00           O  
+ATOM   2690  H   SER    23      -8.399  -4.225   6.610  1.00  0.00           H  
+ATOM   2691  HA  SER    23      -9.583  -6.828   7.029  1.00  0.00           H  
+ATOM   2692 1HB  SER    23      -6.971  -5.694   7.078  1.00  0.00           H  
+ATOM   2693 2HB  SER    23      -7.231  -6.471   8.636  1.00  0.00           H  
+ATOM   2694  HG  SER    23      -8.176  -8.202   7.091  1.00  0.00           H  
+ATOM   2695  N   GLN    24      -9.249  -4.830   9.629  1.00  0.00           N  
+ATOM   2696  CA  GLN    24      -9.755  -4.620  11.014  1.00  0.00           C  
+ATOM   2697  C   GLN    24     -11.255  -4.330  10.959  1.00  0.00           C  
+ATOM   2698  O   GLN    24     -11.953  -4.415  11.950  1.00  0.00           O  
+ATOM   2699  CB  GLN    24      -9.031  -3.430  11.649  1.00  0.00           C  
+ATOM   2700  CG  GLN    24      -7.853  -3.931  12.487  1.00  0.00           C  
+ATOM   2701  CD  GLN    24      -6.798  -4.549  11.569  1.00  0.00           C  
+ATOM   2702  OE1 GLN    24      -7.110  -5.377  10.736  1.00  0.00           O  
+ATOM   2703  NE2 GLN    24      -5.552  -4.178  11.686  1.00  0.00           N  
+ATOM   2704  H   GLN    24      -8.672  -4.157   9.211  1.00  0.00           H  
+ATOM   2705  HA  GLN    24      -9.579  -5.508  11.603  1.00  0.00           H  
+ATOM   2706 1HB  GLN    24      -8.667  -2.774  10.872  1.00  0.00           H  
+ATOM   2707 2HB  GLN    24      -9.716  -2.889  12.284  1.00  0.00           H  
+ATOM   2708 1HG  GLN    24      -7.420  -3.103  13.030  1.00  0.00           H  
+ATOM   2709 2HG  GLN    24      -8.201  -4.677  13.187  1.00  0.00           H  
+ATOM   2710 1HE2 GLN    24      -4.867  -4.570  11.103  1.00  0.00           H  
+ATOM   2711 2HE2 GLN    24      -5.301  -3.511  12.356  1.00  0.00           H  
+ATOM   2712  N   ARG    25     -11.755  -3.984   9.804  1.00  0.00           N  
+ATOM   2713  CA  ARG    25     -13.208  -3.683   9.679  1.00  0.00           C  
+ATOM   2714  C   ARG    25     -13.669  -3.984   8.251  1.00  0.00           C  
+ATOM   2715  O   ARG    25     -13.689  -5.149   7.890  1.00  0.00           O  
+ATOM   2716  CB  ARG    25     -13.447  -2.205   9.991  1.00  0.00           C  
+ATOM   2717  CG  ARG    25     -14.937  -1.971  10.248  1.00  0.00           C  
+ATOM   2718  CD  ARG    25     -15.255  -0.485  10.078  1.00  0.00           C  
+ATOM   2719  NE  ARG    25     -14.358   0.318  10.957  1.00  0.00           N  
+ATOM   2720  CZ  ARG    25     -14.812   0.815  12.074  1.00  0.00           C  
+ATOM   2721  NH1 ARG    25     -15.070   0.028  13.082  1.00  0.00           N  
+ATOM   2722  NH2 ARG    25     -15.008   2.100  12.185  1.00  0.00           N  
+ATOM   2723  OXT ARG    25     -13.994  -3.045   7.545  1.00  0.00           O  
+ATOM   2724  H   ARG    25     -11.172  -3.919   9.018  1.00  0.00           H  
+ATOM   2725  HA  ARG    25     -13.765  -4.293  10.374  1.00  0.00           H  
+ATOM   2726 1HB  ARG    25     -12.880  -1.927  10.868  1.00  0.00           H  
+ATOM   2727 2HB  ARG    25     -13.130  -1.605   9.152  1.00  0.00           H  
+ATOM   2728 1HG  ARG    25     -15.518  -2.548   9.543  1.00  0.00           H  
+ATOM   2729 2HG  ARG    25     -15.181  -2.277  11.254  1.00  0.00           H  
+ATOM   2730 1HD  ARG    25     -15.101  -0.199   9.048  1.00  0.00           H  
+ATOM   2731 2HD  ARG    25     -16.284  -0.304  10.351  1.00  0.00           H  
+ATOM   2732  HE  ARG    25     -13.426   0.471  10.695  1.00  0.00           H  
+ATOM   2733 1HH1 ARG    25     -14.920  -0.957  12.998  1.00  0.00           H  
+ATOM   2734 2HH1 ARG    25     -15.418   0.408  13.938  1.00  0.00           H  
+ATOM   2735 1HH2 ARG    25     -14.810   2.705  11.414  1.00  0.00           H  
+ATOM   2736 2HH2 ARG    25     -15.357   2.480  13.042  1.00  0.00           H  
+TER    2737      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        8                                                                  
+ATOM   2738  N   GLY     1      16.516   1.283 -12.918  1.00  0.00           N  
+ATOM   2739  CA  GLY     1      15.094   1.484 -12.396  1.00  0.00           C  
+ATOM   2740  C   GLY     1      14.882   1.808 -10.956  1.00  0.00           C  
+ATOM   2741  O   GLY     1      14.030   1.240 -10.302  1.00  0.00           O  
+ATOM   2742 1H   GLY     1      17.172   1.204 -12.113  1.00  0.00           H  
+ATOM   2743 2H   GLY     1      16.558   0.414 -13.487  1.00  0.00           H  
+ATOM   2744 3H   GLY     1      16.788   2.096 -13.507  1.00  0.00           H  
+ATOM   2745 1HA  GLY     1      14.789   2.399 -12.537  1.00  0.00           H  
+ATOM   2746 2HA  GLY     1      14.421   0.692 -12.928  1.00  0.00           H  
+ATOM   2747  N   SER     2      15.646   2.722 -10.424  1.00  0.00           N  
+ATOM   2748  CA  SER     2      15.483   3.088  -8.989  1.00  0.00           C  
+ATOM   2749  C   SER     2      14.271   4.006  -8.828  1.00  0.00           C  
+ATOM   2750  O   SER     2      13.416   3.782  -7.994  1.00  0.00           O  
+ATOM   2751  CB  SER     2      16.739   3.813  -8.502  1.00  0.00           C  
+ATOM   2752  OG  SER     2      16.573   5.212  -8.689  1.00  0.00           O  
+ATOM   2753  H   SER     2      16.330   3.166 -10.969  1.00  0.00           H  
+ATOM   2754  HA  SER     2      15.337   2.192  -8.403  1.00  0.00           H  
+ATOM   2755 1HB  SER     2      16.890   3.611  -7.454  1.00  0.00           H  
+ATOM   2756 2HB  SER     2      17.596   3.462  -9.061  1.00  0.00           H  
+ATOM   2757  HG  SER     2      15.957   5.530  -8.025  1.00  0.00           H  
+ATOM   2758  N   GLU     3      14.189   5.042  -9.618  1.00  0.00           N  
+ATOM   2759  CA  GLU     3      13.031   5.974  -9.507  1.00  0.00           C  
+ATOM   2760  C   GLU     3      11.735   5.210  -9.791  1.00  0.00           C  
+ATOM   2761  O   GLU     3      10.719   5.442  -9.166  1.00  0.00           O  
+ATOM   2762  CB  GLU     3      13.188   7.110 -10.520  1.00  0.00           C  
+ATOM   2763  CG  GLU     3      14.509   7.840 -10.268  1.00  0.00           C  
+ATOM   2764  CD  GLU     3      15.101   8.302 -11.601  1.00  0.00           C  
+ATOM   2765  OE1 GLU     3      14.332   8.524 -12.524  1.00  0.00           O  
+ATOM   2766  OE2 GLU     3      16.311   8.428 -11.678  1.00  0.00           O  
+ATOM   2767  H   GLU     3      14.888   5.207 -10.284  1.00  0.00           H  
+ATOM   2768  HA  GLU     3      12.995   6.384  -8.508  1.00  0.00           H  
+ATOM   2769 1HB  GLU     3      13.186   6.703 -11.521  1.00  0.00           H  
+ATOM   2770 2HB  GLU     3      12.369   7.804 -10.412  1.00  0.00           H  
+ATOM   2771 1HG  GLU     3      14.330   8.699  -9.636  1.00  0.00           H  
+ATOM   2772 2HG  GLU     3      15.203   7.173  -9.781  1.00  0.00           H  
+ATOM   2773  N   LYS     4      11.761   4.301 -10.726  1.00  0.00           N  
+ATOM   2774  CA  LYS     4      10.531   3.525 -11.043  1.00  0.00           C  
+ATOM   2775  C   LYS     4      10.218   2.581  -9.881  1.00  0.00           C  
+ATOM   2776  O   LYS     4       9.102   2.511  -9.405  1.00  0.00           O  
+ATOM   2777  CB  LYS     4      10.755   2.710 -12.319  1.00  0.00           C  
+ATOM   2778  CG  LYS     4      10.393   3.560 -13.538  1.00  0.00           C  
+ATOM   2779  CD  LYS     4      11.301   4.790 -13.592  1.00  0.00           C  
+ATOM   2780  CE  LYS     4      11.330   5.344 -15.018  1.00  0.00           C  
+ATOM   2781  NZ  LYS     4       9.974   5.834 -15.390  1.00  0.00           N  
+ATOM   2782  H   LYS     4      12.591   4.126 -11.217  1.00  0.00           H  
+ATOM   2783  HA  LYS     4       9.703   4.204 -11.189  1.00  0.00           H  
+ATOM   2784 1HB  LYS     4      11.793   2.417 -12.380  1.00  0.00           H  
+ATOM   2785 2HB  LYS     4      10.132   1.829 -12.299  1.00  0.00           H  
+ATOM   2786 1HG  LYS     4      10.523   2.975 -14.437  1.00  0.00           H  
+ATOM   2787 2HG  LYS     4       9.364   3.879 -13.462  1.00  0.00           H  
+ATOM   2788 1HD  LYS     4      10.924   5.547 -12.919  1.00  0.00           H  
+ATOM   2789 2HD  LYS     4      12.302   4.512 -13.296  1.00  0.00           H  
+ATOM   2790 1HE  LYS     4      12.036   6.160 -15.073  1.00  0.00           H  
+ATOM   2791 2HE  LYS     4      11.631   4.563 -15.700  1.00  0.00           H  
+ATOM   2792 1HZ  LYS     4       9.567   6.367 -14.594  1.00  0.00           H  
+ATOM   2793 2HZ  LYS     4      10.044   6.453 -16.222  1.00  0.00           H  
+ATOM   2794 3HZ  LYS     4       9.361   5.025 -15.615  1.00  0.00           H  
+ATOM   2795  N   MET     5      11.199   1.856  -9.414  1.00  0.00           N  
+ATOM   2796  CA  MET     5      10.961   0.922  -8.279  1.00  0.00           C  
+ATOM   2797  C   MET     5      10.619   1.727  -7.025  1.00  0.00           C  
+ATOM   2798  O   MET     5      10.082   1.208  -6.068  1.00  0.00           O  
+ATOM   2799  CB  MET     5      12.224   0.093  -8.027  1.00  0.00           C  
+ATOM   2800  CG  MET     5      12.078  -1.275  -8.696  1.00  0.00           C  
+ATOM   2801  SD  MET     5      12.329  -1.106 -10.482  1.00  0.00           S  
+ATOM   2802  CE  MET     5      10.586  -0.955 -10.943  1.00  0.00           C  
+ATOM   2803  H   MET     5      12.093   1.929  -9.809  1.00  0.00           H  
+ATOM   2804  HA  MET     5      10.140   0.263  -8.520  1.00  0.00           H  
+ATOM   2805 1HB  MET     5      13.080   0.608  -8.440  1.00  0.00           H  
+ATOM   2806 2HB  MET     5      12.360  -0.040  -6.966  1.00  0.00           H  
+ATOM   2807 1HG  MET     5      12.815  -1.953  -8.293  1.00  0.00           H  
+ATOM   2808 2HG  MET     5      11.089  -1.665  -8.508  1.00  0.00           H  
+ATOM   2809 1HE  MET     5      10.129  -0.162 -10.366  1.00  0.00           H  
+ATOM   2810 2HE  MET     5      10.512  -0.726 -11.998  1.00  0.00           H  
+ATOM   2811 3HE  MET     5      10.076  -1.883 -10.742  1.00  0.00           H  
+ATOM   2812  N   SER     6      10.926   2.996  -7.025  1.00  0.00           N  
+ATOM   2813  CA  SER     6      10.615   3.838  -5.836  1.00  0.00           C  
+ATOM   2814  C   SER     6       9.138   4.232  -5.872  1.00  0.00           C  
+ATOM   2815  O   SER     6       8.480   4.315  -4.854  1.00  0.00           O  
+ATOM   2816  CB  SER     6      11.480   5.099  -5.863  1.00  0.00           C  
+ATOM   2817  OG  SER     6      12.118   5.257  -4.603  1.00  0.00           O  
+ATOM   2818  H   SER     6      11.356   3.396  -7.809  1.00  0.00           H  
+ATOM   2819  HA  SER     6      10.818   3.279  -4.935  1.00  0.00           H  
+ATOM   2820 1HB  SER     6      12.231   5.007  -6.630  1.00  0.00           H  
+ATOM   2821 2HB  SER     6      10.857   5.958  -6.073  1.00  0.00           H  
+ATOM   2822  HG  SER     6      12.449   4.398  -4.329  1.00  0.00           H  
+ATOM   2823  N   THR     7       8.610   4.467  -7.042  1.00  0.00           N  
+ATOM   2824  CA  THR     7       7.176   4.848  -7.150  1.00  0.00           C  
+ATOM   2825  C   THR     7       6.321   3.586  -7.067  1.00  0.00           C  
+ATOM   2826  O   THR     7       5.151   3.633  -6.744  1.00  0.00           O  
+ATOM   2827  CB  THR     7       6.932   5.547  -8.488  1.00  0.00           C  
+ATOM   2828  OG1 THR     7       7.782   6.681  -8.590  1.00  0.00           O  
+ATOM   2829  CG2 THR     7       5.471   5.992  -8.577  1.00  0.00           C  
+ATOM   2830  H   THR     7       9.157   4.385  -7.851  1.00  0.00           H  
+ATOM   2831  HA  THR     7       6.917   5.514  -6.340  1.00  0.00           H  
+ATOM   2832  HB  THR     7       7.144   4.864  -9.297  1.00  0.00           H  
+ATOM   2833  HG1 THR     7       8.258   6.622  -9.422  1.00  0.00           H  
+ATOM   2834 1HG2 THR     7       5.120   6.272  -7.595  1.00  0.00           H  
+ATOM   2835 2HG2 THR     7       5.392   6.838  -9.244  1.00  0.00           H  
+ATOM   2836 3HG2 THR     7       4.869   5.178  -8.956  1.00  0.00           H  
+ATOM   2837  N   ALA     8       6.902   2.453  -7.350  1.00  0.00           N  
+ATOM   2838  CA  ALA     8       6.135   1.183  -7.282  1.00  0.00           C  
+ATOM   2839  C   ALA     8       6.154   0.673  -5.844  1.00  0.00           C  
+ATOM   2840  O   ALA     8       5.175   0.154  -5.343  1.00  0.00           O  
+ATOM   2841  CB  ALA     8       6.782   0.149  -8.205  1.00  0.00           C  
+ATOM   2842  H   ALA     8       7.850   2.438  -7.598  1.00  0.00           H  
+ATOM   2843  HA  ALA     8       5.117   1.359  -7.589  1.00  0.00           H  
+ATOM   2844 1HB  ALA     8       7.556   0.623  -8.789  1.00  0.00           H  
+ATOM   2845 2HB  ALA     8       7.211  -0.645  -7.612  1.00  0.00           H  
+ATOM   2846 3HB  ALA     8       6.032  -0.261  -8.866  1.00  0.00           H  
+ATOM   2847  N   ILE     9       7.259   0.829  -5.172  1.00  0.00           N  
+ATOM   2848  CA  ILE     9       7.337   0.361  -3.765  1.00  0.00           C  
+ATOM   2849  C   ILE     9       6.485   1.280  -2.889  1.00  0.00           C  
+ATOM   2850  O   ILE     9       5.913   0.858  -1.904  1.00  0.00           O  
+ATOM   2851  CB  ILE     9       8.791   0.385  -3.287  1.00  0.00           C  
+ATOM   2852  CG1 ILE     9       9.359   1.798  -3.425  1.00  0.00           C  
+ATOM   2853  CG2 ILE     9       9.620  -0.581  -4.134  1.00  0.00           C  
+ATOM   2854  CD1 ILE     9       9.642   2.372  -2.037  1.00  0.00           C  
+ATOM   2855  H   ILE     9       8.033   1.257  -5.593  1.00  0.00           H  
+ATOM   2856  HA  ILE     9       6.955  -0.646  -3.704  1.00  0.00           H  
+ATOM   2857  HB  ILE     9       8.833   0.078  -2.251  1.00  0.00           H  
+ATOM   2858 1HG1 ILE     9      10.277   1.761  -3.995  1.00  0.00           H  
+ATOM   2859 2HG1 ILE     9       8.646   2.427  -3.934  1.00  0.00           H  
+ATOM   2860 1HG2 ILE     9       9.268  -0.557  -5.156  1.00  0.00           H  
+ATOM   2861 2HG2 ILE     9      10.658  -0.284  -4.107  1.00  0.00           H  
+ATOM   2862 3HG2 ILE     9       9.522  -1.582  -3.743  1.00  0.00           H  
+ATOM   2863 1HD1 ILE     9       9.412   1.631  -1.287  1.00  0.00           H  
+ATOM   2864 2HD1 ILE     9      10.686   2.644  -1.966  1.00  0.00           H  
+ATOM   2865 3HD1 ILE     9       9.031   3.247  -1.878  1.00  0.00           H  
+ATOM   2866  N   SER    10       6.383   2.532  -3.246  1.00  0.00           N  
+ATOM   2867  CA  SER    10       5.553   3.468  -2.439  1.00  0.00           C  
+ATOM   2868  C   SER    10       4.077   3.131  -2.658  1.00  0.00           C  
+ATOM   2869  O   SER    10       3.346   2.860  -1.724  1.00  0.00           O  
+ATOM   2870  CB  SER    10       5.826   4.907  -2.881  1.00  0.00           C  
+ATOM   2871  OG  SER    10       4.598   5.621  -2.956  1.00  0.00           O  
+ATOM   2872  H   SER    10       6.843   2.854  -4.051  1.00  0.00           H  
+ATOM   2873  HA  SER    10       5.796   3.357  -1.393  1.00  0.00           H  
+ATOM   2874 1HB  SER    10       6.473   5.388  -2.166  1.00  0.00           H  
+ATOM   2875 2HB  SER    10       6.309   4.898  -3.850  1.00  0.00           H  
+ATOM   2876  HG  SER    10       4.302   5.794  -2.059  1.00  0.00           H  
+ATOM   2877  N   VAL    11       3.635   3.128  -3.887  1.00  0.00           N  
+ATOM   2878  CA  VAL    11       2.211   2.791  -4.158  1.00  0.00           C  
+ATOM   2879  C   VAL    11       1.892   1.448  -3.500  1.00  0.00           C  
+ATOM   2880  O   VAL    11       0.763   1.169  -3.149  1.00  0.00           O  
+ATOM   2881  CB  VAL    11       1.984   2.692  -5.670  1.00  0.00           C  
+ATOM   2882  CG1 VAL    11       0.635   2.025  -5.943  1.00  0.00           C  
+ATOM   2883  CG2 VAL    11       1.988   4.097  -6.278  1.00  0.00           C  
+ATOM   2884  H   VAL    11       4.241   3.338  -4.629  1.00  0.00           H  
+ATOM   2885  HA  VAL    11       1.570   3.557  -3.743  1.00  0.00           H  
+ATOM   2886  HB  VAL    11       2.774   2.103  -6.114  1.00  0.00           H  
+ATOM   2887 1HG1 VAL    11      -0.131   2.508  -5.356  1.00  0.00           H  
+ATOM   2888 2HG1 VAL    11       0.395   2.115  -6.992  1.00  0.00           H  
+ATOM   2889 3HG1 VAL    11       0.689   0.980  -5.674  1.00  0.00           H  
+ATOM   2890 1HG2 VAL    11       1.233   4.701  -5.796  1.00  0.00           H  
+ATOM   2891 2HG2 VAL    11       2.957   4.550  -6.133  1.00  0.00           H  
+ATOM   2892 3HG2 VAL    11       1.775   4.032  -7.335  1.00  0.00           H  
+ATOM   2893  N   LEU    12       2.885   0.618  -3.321  1.00  0.00           N  
+ATOM   2894  CA  LEU    12       2.641  -0.700  -2.674  1.00  0.00           C  
+ATOM   2895  C   LEU    12       2.189  -0.469  -1.231  1.00  0.00           C  
+ATOM   2896  O   LEU    12       1.106  -0.858  -0.841  1.00  0.00           O  
+ATOM   2897  CB  LEU    12       3.928  -1.524  -2.683  1.00  0.00           C  
+ATOM   2898  CG  LEU    12       3.983  -2.368  -3.958  1.00  0.00           C  
+ATOM   2899  CD1 LEU    12       5.307  -3.132  -4.008  1.00  0.00           C  
+ATOM   2900  CD2 LEU    12       2.819  -3.362  -3.962  1.00  0.00           C  
+ATOM   2901  H   LEU    12       3.791   0.864  -3.606  1.00  0.00           H  
+ATOM   2902  HA  LEU    12       1.869  -1.229  -3.213  1.00  0.00           H  
+ATOM   2903 1HB  LEU    12       4.780  -0.861  -2.651  1.00  0.00           H  
+ATOM   2904 2HB  LEU    12       3.945  -2.175  -1.821  1.00  0.00           H  
+ATOM   2905  HG  LEU    12       3.910  -1.721  -4.819  1.00  0.00           H  
+ATOM   2906 1HD1 LEU    12       5.833  -3.004  -3.073  1.00  0.00           H  
+ATOM   2907 2HD1 LEU    12       5.111  -4.182  -4.169  1.00  0.00           H  
+ATOM   2908 3HD1 LEU    12       5.913  -2.751  -4.817  1.00  0.00           H  
+ATOM   2909 1HD2 LEU    12       2.296  -3.310  -3.019  1.00  0.00           H  
+ATOM   2910 2HD2 LEU    12       2.139  -3.117  -4.764  1.00  0.00           H  
+ATOM   2911 3HD2 LEU    12       3.200  -4.362  -4.107  1.00  0.00           H  
+ATOM   2912  N   LEU    13       3.005   0.172  -0.435  1.00  0.00           N  
+ATOM   2913  CA  LEU    13       2.606   0.432   0.976  1.00  0.00           C  
+ATOM   2914  C   LEU    13       1.196   1.022   0.986  1.00  0.00           C  
+ATOM   2915  O   LEU    13       0.438   0.836   1.918  1.00  0.00           O  
+ATOM   2916  CB  LEU    13       3.582   1.426   1.612  1.00  0.00           C  
+ATOM   2917  CG  LEU    13       4.840   0.686   2.071  1.00  0.00           C  
+ATOM   2918  CD1 LEU    13       4.446  -0.467   2.994  1.00  0.00           C  
+ATOM   2919  CD2 LEU    13       5.578   0.132   0.851  1.00  0.00           C  
+ATOM   2920  H   LEU    13       3.873   0.486  -0.767  1.00  0.00           H  
+ATOM   2921  HA  LEU    13       2.616  -0.494   1.532  1.00  0.00           H  
+ATOM   2922 1HB  LEU    13       3.852   2.180   0.885  1.00  0.00           H  
+ATOM   2923 2HB  LEU    13       3.113   1.897   2.461  1.00  0.00           H  
+ATOM   2924  HG  LEU    13       5.484   1.370   2.605  1.00  0.00           H  
+ATOM   2925 1HD1 LEU    13       3.395  -0.397   3.229  1.00  0.00           H  
+ATOM   2926 2HD1 LEU    13       4.643  -1.407   2.499  1.00  0.00           H  
+ATOM   2927 3HD1 LEU    13       5.023  -0.411   3.905  1.00  0.00           H  
+ATOM   2928 1HD2 LEU    13       5.831   0.944   0.185  1.00  0.00           H  
+ATOM   2929 2HD2 LEU    13       6.482  -0.364   1.171  1.00  0.00           H  
+ATOM   2930 3HD2 LEU    13       4.944  -0.573   0.336  1.00  0.00           H  
+ATOM   2931  N   ALA    14       0.839   1.728  -0.053  1.00  0.00           N  
+ATOM   2932  CA  ALA    14      -0.521   2.329  -0.118  1.00  0.00           C  
+ATOM   2933  C   ALA    14      -1.569   1.212  -0.114  1.00  0.00           C  
+ATOM   2934  O   ALA    14      -2.444   1.170   0.729  1.00  0.00           O  
+ATOM   2935  CB  ALA    14      -0.648   3.148  -1.406  1.00  0.00           C  
+ATOM   2936  H   ALA    14       1.469   1.859  -0.794  1.00  0.00           H  
+ATOM   2937  HA  ALA    14      -0.674   2.972   0.735  1.00  0.00           H  
+ATOM   2938 1HB  ALA    14       0.316   3.563  -1.664  1.00  0.00           H  
+ATOM   2939 2HB  ALA    14      -0.990   2.510  -2.207  1.00  0.00           H  
+ATOM   2940 3HB  ALA    14      -1.356   3.949  -1.256  1.00  0.00           H  
+ATOM   2941  N   GLN    15      -1.485   0.304  -1.049  1.00  0.00           N  
+ATOM   2942  CA  GLN    15      -2.473  -0.810  -1.099  1.00  0.00           C  
+ATOM   2943  C   GLN    15      -2.545  -1.490   0.269  1.00  0.00           C  
+ATOM   2944  O   GLN    15      -3.584  -1.962   0.686  1.00  0.00           O  
+ATOM   2945  CB  GLN    15      -2.033  -1.832  -2.151  1.00  0.00           C  
+ATOM   2946  CG  GLN    15      -2.985  -1.780  -3.347  1.00  0.00           C  
+ATOM   2947  CD  GLN    15      -2.272  -2.319  -4.590  1.00  0.00           C  
+ATOM   2948  OE1 GLN    15      -2.263  -1.679  -5.623  1.00  0.00           O  
+ATOM   2949  NE2 GLN    15      -1.671  -3.476  -4.534  1.00  0.00           N  
+ATOM   2950  H   GLN    15      -0.770   0.356  -1.717  1.00  0.00           H  
+ATOM   2951  HA  GLN    15      -3.445  -0.421  -1.360  1.00  0.00           H  
+ATOM   2952 1HB  GLN    15      -1.029  -1.602  -2.478  1.00  0.00           H  
+ATOM   2953 2HB  GLN    15      -2.052  -2.822  -1.721  1.00  0.00           H  
+ATOM   2954 1HG  GLN    15      -3.856  -2.384  -3.140  1.00  0.00           H  
+ATOM   2955 2HG  GLN    15      -3.288  -0.759  -3.522  1.00  0.00           H  
+ATOM   2956 1HE2 GLN    15      -1.213  -3.829  -5.325  1.00  0.00           H  
+ATOM   2957 2HE2 GLN    15      -1.679  -3.992  -3.700  1.00  0.00           H  
+ATOM   2958  N   ALA    16      -1.446  -1.547   0.971  1.00  0.00           N  
+ATOM   2959  CA  ALA    16      -1.449  -2.199   2.312  1.00  0.00           C  
+ATOM   2960  C   ALA    16      -2.325  -1.393   3.273  1.00  0.00           C  
+ATOM   2961  O   ALA    16      -3.083  -1.944   4.047  1.00  0.00           O  
+ATOM   2962  CB  ALA    16      -0.020  -2.256   2.854  1.00  0.00           C  
+ATOM   2963  H   ALA    16      -0.618  -1.163   0.617  1.00  0.00           H  
+ATOM   2964  HA  ALA    16      -1.840  -3.202   2.223  1.00  0.00           H  
+ATOM   2965 1HB  ALA    16       0.378  -1.254   2.925  1.00  0.00           H  
+ATOM   2966 2HB  ALA    16      -0.024  -2.712   3.833  1.00  0.00           H  
+ATOM   2967 3HB  ALA    16       0.596  -2.840   2.187  1.00  0.00           H  
+ATOM   2968  N   VAL    17      -2.226  -0.094   3.233  1.00  0.00           N  
+ATOM   2969  CA  VAL    17      -3.052   0.744   4.147  1.00  0.00           C  
+ATOM   2970  C   VAL    17      -4.535   0.464   3.896  1.00  0.00           C  
+ATOM   2971  O   VAL    17      -5.322   0.363   4.818  1.00  0.00           O  
+ATOM   2972  CB  VAL    17      -2.756   2.223   3.887  1.00  0.00           C  
+ATOM   2973  CG1 VAL    17      -3.778   3.088   4.628  1.00  0.00           C  
+ATOM   2974  CG2 VAL    17      -1.350   2.555   4.391  1.00  0.00           C  
+ATOM   2975  H   VAL    17      -1.608   0.332   2.603  1.00  0.00           H  
+ATOM   2976  HA  VAL    17      -2.809   0.503   5.171  1.00  0.00           H  
+ATOM   2977  HB  VAL    17      -2.818   2.421   2.827  1.00  0.00           H  
+ATOM   2978 1HG1 VAL    17      -3.922   2.700   5.626  1.00  0.00           H  
+ATOM   2979 2HG1 VAL    17      -3.414   4.103   4.686  1.00  0.00           H  
+ATOM   2980 3HG1 VAL    17      -4.718   3.071   4.095  1.00  0.00           H  
+ATOM   2981 1HG2 VAL    17      -1.082   1.876   5.187  1.00  0.00           H  
+ATOM   2982 2HG2 VAL    17      -0.644   2.456   3.581  1.00  0.00           H  
+ATOM   2983 3HG2 VAL    17      -1.331   3.569   4.762  1.00  0.00           H  
+ATOM   2984  N   PHE    18      -4.926   0.333   2.658  1.00  0.00           N  
+ATOM   2985  CA  PHE    18      -6.358   0.055   2.355  1.00  0.00           C  
+ATOM   2986  C   PHE    18      -6.758  -1.278   2.991  1.00  0.00           C  
+ATOM   2987  O   PHE    18      -7.682  -1.350   3.780  1.00  0.00           O  
+ATOM   2988  CB  PHE    18      -6.553  -0.021   0.838  1.00  0.00           C  
+ATOM   2989  CG  PHE    18      -8.029  -0.054   0.518  1.00  0.00           C  
+ATOM   2990  CD1 PHE    18      -8.810   1.095   0.697  1.00  0.00           C  
+ATOM   2991  CD2 PHE    18      -8.616  -1.231   0.039  1.00  0.00           C  
+ATOM   2992  CE1 PHE    18     -10.178   1.065   0.398  1.00  0.00           C  
+ATOM   2993  CE2 PHE    18      -9.982  -1.261  -0.260  1.00  0.00           C  
+ATOM   2994  CZ  PHE    18     -10.765  -0.113  -0.081  1.00  0.00           C  
+ATOM   2995  H   PHE    18      -4.276   0.414   1.928  1.00  0.00           H  
+ATOM   2996  HA  PHE    18      -6.972   0.846   2.759  1.00  0.00           H  
+ATOM   2997 1HB  PHE    18      -6.103   0.845   0.375  1.00  0.00           H  
+ATOM   2998 2HB  PHE    18      -6.084  -0.917   0.461  1.00  0.00           H  
+ATOM   2999  HD1 PHE    18      -8.359   2.004   1.067  1.00  0.00           H  
+ATOM   3000  HD2 PHE    18      -8.013  -2.117  -0.099  1.00  0.00           H  
+ATOM   3001  HE1 PHE    18     -10.781   1.950   0.536  1.00  0.00           H  
+ATOM   3002  HE2 PHE    18     -10.435  -2.170  -0.630  1.00  0.00           H  
+ATOM   3003  HZ  PHE    18     -11.820  -0.137  -0.312  1.00  0.00           H  
+ATOM   3004  N   LEU    19      -6.067  -2.333   2.661  1.00  0.00           N  
+ATOM   3005  CA  LEU    19      -6.404  -3.659   3.250  1.00  0.00           C  
+ATOM   3006  C   LEU    19      -6.227  -3.598   4.769  1.00  0.00           C  
+ATOM   3007  O   LEU    19      -6.704  -4.446   5.495  1.00  0.00           O  
+ATOM   3008  CB  LEU    19      -5.475  -4.727   2.669  1.00  0.00           C  
+ATOM   3009  CG  LEU    19      -5.572  -4.713   1.143  1.00  0.00           C  
+ATOM   3010  CD1 LEU    19      -4.187  -4.964   0.540  1.00  0.00           C  
+ATOM   3011  CD2 LEU    19      -6.533  -5.814   0.686  1.00  0.00           C  
+ATOM   3012  H   LEU    19      -5.324  -2.254   2.027  1.00  0.00           H  
+ATOM   3013  HA  LEU    19      -7.429  -3.907   3.017  1.00  0.00           H  
+ATOM   3014 1HB  LEU    19      -4.457  -4.520   2.968  1.00  0.00           H  
+ATOM   3015 2HB  LEU    19      -5.769  -5.699   3.038  1.00  0.00           H  
+ATOM   3016  HG  LEU    19      -5.939  -3.752   0.813  1.00  0.00           H  
+ATOM   3017 1HD1 LEU    19      -3.478  -4.270   0.967  1.00  0.00           H  
+ATOM   3018 2HD1 LEU    19      -3.879  -5.975   0.758  1.00  0.00           H  
+ATOM   3019 3HD1 LEU    19      -4.229  -4.823  -0.529  1.00  0.00           H  
+ATOM   3020 1HD2 LEU    19      -7.222  -6.045   1.484  1.00  0.00           H  
+ATOM   3021 2HD2 LEU    19      -7.085  -5.475  -0.179  1.00  0.00           H  
+ATOM   3022 3HD2 LEU    19      -5.970  -6.699   0.428  1.00  0.00           H  
+ATOM   3023  N   LEU    20      -5.546  -2.596   5.255  1.00  0.00           N  
+ATOM   3024  CA  LEU    20      -5.342  -2.478   6.726  1.00  0.00           C  
+ATOM   3025  C   LEU    20      -6.672  -2.117   7.390  1.00  0.00           C  
+ATOM   3026  O   LEU    20      -7.099  -2.754   8.333  1.00  0.00           O  
+ATOM   3027  CB  LEU    20      -4.313  -1.382   7.014  1.00  0.00           C  
+ATOM   3028  CG  LEU    20      -4.045  -1.310   8.518  1.00  0.00           C  
+ATOM   3029  CD1 LEU    20      -3.061  -2.412   8.916  1.00  0.00           C  
+ATOM   3030  CD2 LEU    20      -3.447   0.055   8.863  1.00  0.00           C  
+ATOM   3031  H   LEU    20      -5.172  -1.918   4.653  1.00  0.00           H  
+ATOM   3032  HA  LEU    20      -4.987  -3.419   7.118  1.00  0.00           H  
+ATOM   3033 1HB  LEU    20      -3.394  -1.608   6.493  1.00  0.00           H  
+ATOM   3034 2HB  LEU    20      -4.695  -0.431   6.673  1.00  0.00           H  
+ATOM   3035  HG  LEU    20      -4.973  -1.447   9.056  1.00  0.00           H  
+ATOM   3036 1HD1 LEU    20      -3.348  -3.339   8.445  1.00  0.00           H  
+ATOM   3037 2HD1 LEU    20      -2.066  -2.138   8.596  1.00  0.00           H  
+ATOM   3038 3HD1 LEU    20      -3.072  -2.534   9.990  1.00  0.00           H  
+ATOM   3039 1HD2 LEU    20      -2.524   0.191   8.319  1.00  0.00           H  
+ATOM   3040 2HD2 LEU    20      -4.146   0.832   8.590  1.00  0.00           H  
+ATOM   3041 3HD2 LEU    20      -3.251   0.104   9.923  1.00  0.00           H  
+ATOM   3042  N   LEU    21      -7.333  -1.103   6.902  1.00  0.00           N  
+ATOM   3043  CA  LEU    21      -8.637  -0.708   7.505  1.00  0.00           C  
+ATOM   3044  C   LEU    21      -9.669  -1.805   7.236  1.00  0.00           C  
+ATOM   3045  O   LEU    21     -10.604  -1.988   7.990  1.00  0.00           O  
+ATOM   3046  CB  LEU    21      -9.111   0.608   6.884  1.00  0.00           C  
+ATOM   3047  CG  LEU    21      -9.572   0.360   5.448  1.00  0.00           C  
+ATOM   3048  CD1 LEU    21     -11.101   0.358   5.395  1.00  0.00           C  
+ATOM   3049  CD2 LEU    21      -9.034   1.470   4.542  1.00  0.00           C  
+ATOM   3050  H   LEU    21      -6.973  -0.604   6.139  1.00  0.00           H  
+ATOM   3051  HA  LEU    21      -8.516  -0.580   8.571  1.00  0.00           H  
+ATOM   3052 1HB  LEU    21      -9.933   1.004   7.463  1.00  0.00           H  
+ATOM   3053 2HB  LEU    21      -8.298   1.319   6.881  1.00  0.00           H  
+ATOM   3054  HG  LEU    21      -9.199  -0.596   5.110  1.00  0.00           H  
+ATOM   3055 1HD1 LEU    21     -11.494   0.651   6.358  1.00  0.00           H  
+ATOM   3056 2HD1 LEU    21     -11.437   1.055   4.642  1.00  0.00           H  
+ATOM   3057 3HD1 LEU    21     -11.451  -0.633   5.151  1.00  0.00           H  
+ATOM   3058 1HD2 LEU    21      -8.252   2.008   5.057  1.00  0.00           H  
+ATOM   3059 2HD2 LEU    21      -8.635   1.034   3.637  1.00  0.00           H  
+ATOM   3060 3HD2 LEU    21      -9.834   2.150   4.290  1.00  0.00           H  
+ATOM   3061  N   THR    22      -9.505  -2.540   6.168  1.00  0.00           N  
+ATOM   3062  CA  THR    22     -10.477  -3.628   5.857  1.00  0.00           C  
+ATOM   3063  C   THR    22     -10.119  -4.879   6.663  1.00  0.00           C  
+ATOM   3064  O   THR    22     -10.938  -5.752   6.867  1.00  0.00           O  
+ATOM   3065  CB  THR    22     -10.417  -3.955   4.362  1.00  0.00           C  
+ATOM   3066  OG1 THR    22     -10.667  -2.777   3.609  1.00  0.00           O  
+ATOM   3067  CG2 THR    22     -11.471  -5.012   4.028  1.00  0.00           C  
+ATOM   3068  H   THR    22      -8.742  -2.378   5.574  1.00  0.00           H  
+ATOM   3069  HA  THR    22     -11.474  -3.304   6.115  1.00  0.00           H  
+ATOM   3070  HB  THR    22      -9.439  -4.339   4.116  1.00  0.00           H  
+ATOM   3071  HG1 THR    22     -10.311  -2.908   2.727  1.00  0.00           H  
+ATOM   3072 1HG2 THR    22     -11.761  -5.529   4.930  1.00  0.00           H  
+ATOM   3073 2HG2 THR    22     -12.336  -4.534   3.594  1.00  0.00           H  
+ATOM   3074 3HG2 THR    22     -11.060  -5.721   3.323  1.00  0.00           H  
+ATOM   3075  N   SER    23      -8.899  -4.973   7.120  1.00  0.00           N  
+ATOM   3076  CA  SER    23      -8.489  -6.170   7.909  1.00  0.00           C  
+ATOM   3077  C   SER    23      -8.929  -5.998   9.365  1.00  0.00           C  
+ATOM   3078  O   SER    23      -9.625  -6.828   9.916  1.00  0.00           O  
+ATOM   3079  CB  SER    23      -6.969  -6.322   7.848  1.00  0.00           C  
+ATOM   3080  OG  SER    23      -6.606  -6.892   6.598  1.00  0.00           O  
+ATOM   3081  H   SER    23      -8.253  -4.259   6.943  1.00  0.00           H  
+ATOM   3082  HA  SER    23      -8.956  -7.051   7.495  1.00  0.00           H  
+ATOM   3083 1HB  SER    23      -6.504  -5.355   7.946  1.00  0.00           H  
+ATOM   3084 2HB  SER    23      -6.639  -6.961   8.657  1.00  0.00           H  
+ATOM   3085  HG  SER    23      -7.231  -6.582   5.939  1.00  0.00           H  
+ATOM   3086  N   GLN    24      -8.531  -4.926   9.994  1.00  0.00           N  
+ATOM   3087  CA  GLN    24      -8.928  -4.701  11.409  1.00  0.00           C  
+ATOM   3088  C   GLN    24     -10.371  -4.199  11.451  1.00  0.00           C  
+ATOM   3089  O   GLN    24     -11.081  -4.402  12.416  1.00  0.00           O  
+ATOM   3090  CB  GLN    24      -8.003  -3.654  12.032  1.00  0.00           C  
+ATOM   3091  CG  GLN    24      -8.207  -2.310  11.330  1.00  0.00           C  
+ATOM   3092  CD  GLN    24      -8.772  -1.294  12.322  1.00  0.00           C  
+ATOM   3093  OE1 GLN    24      -9.877  -1.448  12.805  1.00  0.00           O  
+ATOM   3094  NE2 GLN    24      -8.057  -0.254  12.650  1.00  0.00           N  
+ATOM   3095  H   GLN    24      -7.973  -4.267   9.532  1.00  0.00           H  
+ATOM   3096  HA  GLN    24      -8.850  -5.626  11.960  1.00  0.00           H  
+ATOM   3097 1HB  GLN    24      -8.233  -3.551  13.082  1.00  0.00           H  
+ATOM   3098 2HB  GLN    24      -6.978  -3.966  11.915  1.00  0.00           H  
+ATOM   3099 1HG  GLN    24      -7.260  -1.955  10.951  1.00  0.00           H  
+ATOM   3100 2HG  GLN    24      -8.899  -2.433  10.511  1.00  0.00           H  
+ATOM   3101 1HE2 GLN    24      -8.409   0.404  13.285  1.00  0.00           H  
+ATOM   3102 2HE2 GLN    24      -7.166  -0.130  12.260  1.00  0.00           H  
+ATOM   3103  N   ARG    25     -10.809  -3.554  10.403  1.00  0.00           N  
+ATOM   3104  CA  ARG    25     -12.204  -3.041  10.361  1.00  0.00           C  
+ATOM   3105  C   ARG    25     -12.591  -2.468  11.727  1.00  0.00           C  
+ATOM   3106  O   ARG    25     -12.333  -1.296  11.949  1.00  0.00           O  
+ATOM   3107  CB  ARG    25     -13.140  -4.190   9.999  1.00  0.00           C  
+ATOM   3108  CG  ARG    25     -14.568  -3.663   9.848  1.00  0.00           C  
+ATOM   3109  CD  ARG    25     -15.269  -4.406   8.708  1.00  0.00           C  
+ATOM   3110  NE  ARG    25     -16.378  -5.232   9.262  1.00  0.00           N  
+ATOM   3111  CZ  ARG    25     -17.618  -4.921   8.998  1.00  0.00           C  
+ATOM   3112  NH1 ARG    25     -18.049  -3.711   9.228  1.00  0.00           N  
+ATOM   3113  NH2 ARG    25     -18.427  -5.820   8.506  1.00  0.00           N  
+ATOM   3114  OXT ARG    25     -13.135  -3.210  12.526  1.00  0.00           O  
+ATOM   3115  H   ARG    25     -10.220  -3.416   9.636  1.00  0.00           H  
+ATOM   3116  HA  ARG    25     -12.281  -2.267   9.613  1.00  0.00           H  
+ATOM   3117 1HB  ARG    25     -12.815  -4.632   9.068  1.00  0.00           H  
+ATOM   3118 2HB  ARG    25     -13.111  -4.934  10.779  1.00  0.00           H  
+ATOM   3119 1HG  ARG    25     -15.110  -3.821  10.769  1.00  0.00           H  
+ATOM   3120 2HG  ARG    25     -14.540  -2.607   9.622  1.00  0.00           H  
+ATOM   3121 1HD  ARG    25     -15.669  -3.691   8.004  1.00  0.00           H  
+ATOM   3122 2HD  ARG    25     -14.558  -5.046   8.206  1.00  0.00           H  
+ATOM   3123  HE  ARG    25     -16.177  -6.010   9.823  1.00  0.00           H  
+ATOM   3124 1HH1 ARG    25     -17.430  -3.023   9.605  1.00  0.00           H  
+ATOM   3125 2HH1 ARG    25     -19.000  -3.472   9.027  1.00  0.00           H  
+ATOM   3126 1HH2 ARG    25     -18.095  -6.747   8.331  1.00  0.00           H  
+ATOM   3127 2HH2 ARG    25     -19.376  -5.582   8.304  1.00  0.00           H  
+TER    3128      ARG    25                                                      
+ENDMDL                                                                          
+MODEL        9                                                                  
+ATOM   3129  N   GLY     1      12.763  -1.936 -12.970  1.00  0.00           N  
+ATOM   3130  CA  GLY     1      12.611  -0.676 -12.119  1.00  0.00           C  
+ATOM   3131  C   GLY     1      12.141   0.588 -12.754  1.00  0.00           C  
+ATOM   3132  O   GLY     1      11.047   0.663 -13.276  1.00  0.00           O  
+ATOM   3133 1H   GLY     1      11.869  -2.130 -13.465  1.00  0.00           H  
+ATOM   3134 2H   GLY     1      13.521  -1.794 -13.668  1.00  0.00           H  
+ATOM   3135 3H   GLY     1      13.002  -2.742 -12.357  1.00  0.00           H  
+ATOM   3136 1HA  GLY     1      11.744  -0.652 -11.679  1.00  0.00           H  
+ATOM   3137 2HA  GLY     1      13.504  -0.663 -11.367  1.00  0.00           H  
+ATOM   3138  N   SER     2      12.953   1.609 -12.730  1.00  0.00           N  
+ATOM   3139  CA  SER     2      12.547   2.901 -13.353  1.00  0.00           C  
+ATOM   3140  C   SER     2      11.453   3.566 -12.514  1.00  0.00           C  
+ATOM   3141  O   SER     2      11.072   3.082 -11.465  1.00  0.00           O  
+ATOM   3142  CB  SER     2      12.018   2.641 -14.763  1.00  0.00           C  
+ATOM   3143  OG  SER     2      12.582   1.435 -15.261  1.00  0.00           O  
+ATOM   3144  H   SER     2      13.832   1.526 -12.305  1.00  0.00           H  
+ATOM   3145  HA  SER     2      13.404   3.557 -13.408  1.00  0.00           H  
+ATOM   3146 1HB  SER     2      10.946   2.546 -14.735  1.00  0.00           H  
+ATOM   3147 2HB  SER     2      12.287   3.469 -15.406  1.00  0.00           H  
+ATOM   3148  HG  SER     2      12.117   1.201 -16.068  1.00  0.00           H  
+ATOM   3149  N   GLU     3      10.944   4.678 -12.973  1.00  0.00           N  
+ATOM   3150  CA  GLU     3       9.876   5.383 -12.214  1.00  0.00           C  
+ATOM   3151  C   GLU     3       8.805   4.381 -11.784  1.00  0.00           C  
+ATOM   3152  O   GLU     3       8.163   4.544 -10.766  1.00  0.00           O  
+ATOM   3153  CB  GLU     3       9.243   6.454 -13.105  1.00  0.00           C  
+ATOM   3154  CG  GLU     3      10.197   7.643 -13.231  1.00  0.00           C  
+ATOM   3155  CD  GLU     3      10.056   8.266 -14.621  1.00  0.00           C  
+ATOM   3156  OE1 GLU     3      10.430   7.615 -15.581  1.00  0.00           O  
+ATOM   3157  OE2 GLU     3       9.576   9.385 -14.701  1.00  0.00           O  
+ATOM   3158  H   GLU     3      11.268   5.048 -13.821  1.00  0.00           H  
+ATOM   3159  HA  GLU     3      10.304   5.851 -11.340  1.00  0.00           H  
+ATOM   3160 1HB  GLU     3       9.052   6.039 -14.084  1.00  0.00           H  
+ATOM   3161 2HB  GLU     3       8.313   6.784 -12.667  1.00  0.00           H  
+ATOM   3162 1HG  GLU     3       9.954   8.380 -12.479  1.00  0.00           H  
+ATOM   3163 2HG  GLU     3      11.213   7.307 -13.091  1.00  0.00           H  
+ATOM   3164  N   LYS     4       8.606   3.343 -12.550  1.00  0.00           N  
+ATOM   3165  CA  LYS     4       7.575   2.333 -12.179  1.00  0.00           C  
+ATOM   3166  C   LYS     4       7.914   1.741 -10.811  1.00  0.00           C  
+ATOM   3167  O   LYS     4       7.139   1.823  -9.879  1.00  0.00           O  
+ATOM   3168  CB  LYS     4       7.550   1.219 -13.227  1.00  0.00           C  
+ATOM   3169  CG  LYS     4       6.405   1.470 -14.211  1.00  0.00           C  
+ATOM   3170  CD  LYS     4       6.690   2.741 -15.013  1.00  0.00           C  
+ATOM   3171  CE  LYS     4       6.385   2.492 -16.492  1.00  0.00           C  
+ATOM   3172  NZ  LYS     4       7.605   2.762 -17.304  1.00  0.00           N  
+ATOM   3173  H   LYS     4       9.133   3.228 -13.367  1.00  0.00           H  
+ATOM   3174  HA  LYS     4       6.606   2.808 -12.135  1.00  0.00           H  
+ATOM   3175 1HB  LYS     4       8.488   1.205 -13.760  1.00  0.00           H  
+ATOM   3176 2HB  LYS     4       7.399   0.268 -12.738  1.00  0.00           H  
+ATOM   3177 1HG  LYS     4       6.318   0.629 -14.884  1.00  0.00           H  
+ATOM   3178 2HG  LYS     4       5.481   1.592 -13.665  1.00  0.00           H  
+ATOM   3179 1HD  LYS     4       6.069   3.545 -14.646  1.00  0.00           H  
+ATOM   3180 2HD  LYS     4       7.730   3.010 -14.903  1.00  0.00           H  
+ATOM   3181 1HE  LYS     4       6.082   1.465 -16.628  1.00  0.00           H  
+ATOM   3182 2HE  LYS     4       5.588   3.148 -16.809  1.00  0.00           H  
+ATOM   3183 1HZ  LYS     4       8.438   2.373 -16.819  1.00  0.00           H  
+ATOM   3184 2HZ  LYS     4       7.508   2.313 -18.236  1.00  0.00           H  
+ATOM   3185 3HZ  LYS     4       7.720   3.790 -17.424  1.00  0.00           H  
+ATOM   3186  N   MET     5       9.068   1.145 -10.680  1.00  0.00           N  
+ATOM   3187  CA  MET     5       9.455   0.551  -9.370  1.00  0.00           C  
+ATOM   3188  C   MET     5       9.334   1.610  -8.274  1.00  0.00           C  
+ATOM   3189  O   MET     5       9.067   1.302  -7.129  1.00  0.00           O  
+ATOM   3190  CB  MET     5      10.900   0.053  -9.439  1.00  0.00           C  
+ATOM   3191  CG  MET     5      10.914  -1.472  -9.597  1.00  0.00           C  
+ATOM   3192  SD  MET     5       9.764  -1.965 -10.908  1.00  0.00           S  
+ATOM   3193  CE  MET     5       8.997  -3.347 -10.028  1.00  0.00           C  
+ATOM   3194  H   MET     5       9.681   1.091 -11.445  1.00  0.00           H  
+ATOM   3195  HA  MET     5       8.799  -0.277  -9.144  1.00  0.00           H  
+ATOM   3196 1HB  MET     5      11.397   0.509 -10.284  1.00  0.00           H  
+ATOM   3197 2HB  MET     5      11.416   0.322  -8.531  1.00  0.00           H  
+ATOM   3198 1HG  MET     5      11.912  -1.796  -9.855  1.00  0.00           H  
+ATOM   3199 2HG  MET     5      10.617  -1.931  -8.666  1.00  0.00           H  
+ATOM   3200 1HE  MET     5       8.549  -2.986  -9.112  1.00  0.00           H  
+ATOM   3201 2HE  MET     5       8.233  -3.789 -10.648  1.00  0.00           H  
+ATOM   3202 3HE  MET     5       9.748  -4.089  -9.800  1.00  0.00           H  
+ATOM   3203  N   SER     6       9.529   2.855  -8.609  1.00  0.00           N  
+ATOM   3204  CA  SER     6       9.422   3.928  -7.579  1.00  0.00           C  
+ATOM   3205  C   SER     6       7.956   4.099  -7.171  1.00  0.00           C  
+ATOM   3206  O   SER     6       7.645   4.350  -6.022  1.00  0.00           O  
+ATOM   3207  CB  SER     6       9.949   5.242  -8.154  1.00  0.00           C  
+ATOM   3208  OG  SER     6      10.159   6.168  -7.095  1.00  0.00           O  
+ATOM   3209  H   SER     6       9.746   3.087  -9.539  1.00  0.00           H  
+ATOM   3210  HA  SER     6      10.006   3.653  -6.713  1.00  0.00           H  
+ATOM   3211 1HB  SER     6      10.883   5.068  -8.661  1.00  0.00           H  
+ATOM   3212 2HB  SER     6       9.229   5.641  -8.856  1.00  0.00           H  
+ATOM   3213  HG  SER     6       9.303   6.387  -6.720  1.00  0.00           H  
+ATOM   3214  N   THR     7       7.052   3.958  -8.103  1.00  0.00           N  
+ATOM   3215  CA  THR     7       5.608   4.109  -7.771  1.00  0.00           C  
+ATOM   3216  C   THR     7       5.104   2.815  -7.136  1.00  0.00           C  
+ATOM   3217  O   THR     7       4.072   2.784  -6.494  1.00  0.00           O  
+ATOM   3218  CB  THR     7       4.816   4.395  -9.049  1.00  0.00           C  
+ATOM   3219  OG1 THR     7       5.098   5.714  -9.493  1.00  0.00           O  
+ATOM   3220  CG2 THR     7       3.319   4.258  -8.769  1.00  0.00           C  
+ATOM   3221  H   THR     7       7.323   3.749  -9.019  1.00  0.00           H  
+ATOM   3222  HA  THR     7       5.480   4.927  -7.076  1.00  0.00           H  
+ATOM   3223  HB  THR     7       5.100   3.688  -9.814  1.00  0.00           H  
+ATOM   3224  HG1 THR     7       5.389   5.664 -10.406  1.00  0.00           H  
+ATOM   3225 1HG2 THR     7       3.149   4.277  -7.702  1.00  0.00           H  
+ATOM   3226 2HG2 THR     7       2.789   5.076  -9.232  1.00  0.00           H  
+ATOM   3227 3HG2 THR     7       2.961   3.322  -9.173  1.00  0.00           H  
+ATOM   3228  N   ALA     8       5.831   1.748  -7.306  1.00  0.00           N  
+ATOM   3229  CA  ALA     8       5.407   0.455  -6.709  1.00  0.00           C  
+ATOM   3230  C   ALA     8       5.907   0.392  -5.268  1.00  0.00           C  
+ATOM   3231  O   ALA     8       5.277  -0.181  -4.402  1.00  0.00           O  
+ATOM   3232  CB  ALA     8       6.008  -0.697  -7.515  1.00  0.00           C  
+ATOM   3233  H   ALA     8       6.663   1.799  -7.820  1.00  0.00           H  
+ATOM   3234  HA  ALA     8       4.330   0.385  -6.723  1.00  0.00           H  
+ATOM   3235 1HB  ALA     8       6.822  -0.325  -8.121  1.00  0.00           H  
+ATOM   3236 2HB  ALA     8       6.380  -1.453  -6.839  1.00  0.00           H  
+ATOM   3237 3HB  ALA     8       5.250  -1.125  -8.153  1.00  0.00           H  
+ATOM   3238  N   ILE     9       7.038   0.986  -5.008  1.00  0.00           N  
+ATOM   3239  CA  ILE     9       7.588   0.973  -3.625  1.00  0.00           C  
+ATOM   3240  C   ILE     9       6.762   1.910  -2.744  1.00  0.00           C  
+ATOM   3241  O   ILE     9       6.422   1.584  -1.625  1.00  0.00           O  
+ATOM   3242  CB  ILE     9       9.046   1.439  -3.644  1.00  0.00           C  
+ATOM   3243  CG1 ILE     9       9.144   2.802  -4.339  1.00  0.00           C  
+ATOM   3244  CG2 ILE     9       9.900   0.423  -4.404  1.00  0.00           C  
+ATOM   3245  CD1 ILE     9      10.597   3.280  -4.322  1.00  0.00           C  
+ATOM   3246  H   ILE     9       7.523   1.444  -5.723  1.00  0.00           H  
+ATOM   3247  HA  ILE     9       7.535  -0.029  -3.230  1.00  0.00           H  
+ATOM   3248  HB  ILE     9       9.409   1.525  -2.629  1.00  0.00           H  
+ATOM   3249 1HG1 ILE     9       8.807   2.711  -5.361  1.00  0.00           H  
+ATOM   3250 2HG1 ILE     9       8.526   3.517  -3.817  1.00  0.00           H  
+ATOM   3251 1HG2 ILE     9       9.447   0.216  -5.362  1.00  0.00           H  
+ATOM   3252 2HG2 ILE     9      10.892   0.824  -4.553  1.00  0.00           H  
+ATOM   3253 3HG2 ILE     9       9.966  -0.492  -3.831  1.00  0.00           H  
+ATOM   3254 1HD1 ILE     9      10.983   3.227  -3.315  1.00  0.00           H  
+ATOM   3255 2HD1 ILE     9      11.191   2.650  -4.969  1.00  0.00           H  
+ATOM   3256 3HD1 ILE     9      10.645   4.301  -4.673  1.00  0.00           H  
+ATOM   3257  N   SER    10       6.429   3.069  -3.242  1.00  0.00           N  
+ATOM   3258  CA  SER    10       5.618   4.016  -2.427  1.00  0.00           C  
+ATOM   3259  C   SER    10       4.199   3.465  -2.295  1.00  0.00           C  
+ATOM   3260  O   SER    10       3.652   3.382  -1.214  1.00  0.00           O  
+ATOM   3261  CB  SER    10       5.579   5.381  -3.115  1.00  0.00           C  
+ATOM   3262  OG  SER    10       5.786   6.400  -2.147  1.00  0.00           O  
+ATOM   3263  H   SER    10       6.707   3.315  -4.150  1.00  0.00           H  
+ATOM   3264  HA  SER    10       6.057   4.115  -1.446  1.00  0.00           H  
+ATOM   3265 1HB  SER    10       6.357   5.434  -3.858  1.00  0.00           H  
+ATOM   3266 2HB  SER    10       4.617   5.514  -3.593  1.00  0.00           H  
+ATOM   3267  HG  SER    10       6.519   6.133  -1.586  1.00  0.00           H  
+ATOM   3268  N   VAL    11       3.601   3.079  -3.387  1.00  0.00           N  
+ATOM   3269  CA  VAL    11       2.223   2.523  -3.322  1.00  0.00           C  
+ATOM   3270  C   VAL    11       2.238   1.259  -2.461  1.00  0.00           C  
+ATOM   3271  O   VAL    11       1.234   0.864  -1.904  1.00  0.00           O  
+ATOM   3272  CB  VAL    11       1.744   2.177  -4.733  1.00  0.00           C  
+ATOM   3273  CG1 VAL    11       0.387   1.475  -4.657  1.00  0.00           C  
+ATOM   3274  CG2 VAL    11       1.607   3.461  -5.554  1.00  0.00           C  
+ATOM   3275  H   VAL    11       4.062   3.148  -4.250  1.00  0.00           H  
+ATOM   3276  HA  VAL    11       1.558   3.253  -2.882  1.00  0.00           H  
+ATOM   3277  HB  VAL    11       2.462   1.520  -5.205  1.00  0.00           H  
+ATOM   3278 1HG1 VAL    11       0.120   1.317  -3.623  1.00  0.00           H  
+ATOM   3279 2HG1 VAL    11      -0.362   2.090  -5.132  1.00  0.00           H  
+ATOM   3280 3HG1 VAL    11       0.445   0.523  -5.164  1.00  0.00           H  
+ATOM   3281 1HG2 VAL    11       2.528   4.025  -5.498  1.00  0.00           H  
+ATOM   3282 2HG2 VAL    11       1.401   3.210  -6.584  1.00  0.00           H  
+ATOM   3283 3HG2 VAL    11       0.798   4.057  -5.159  1.00  0.00           H  
+ATOM   3284  N   LEU    12       3.374   0.624  -2.343  1.00  0.00           N  
+ATOM   3285  CA  LEU    12       3.452  -0.609  -1.512  1.00  0.00           C  
+ATOM   3286  C   LEU    12       3.193  -0.249  -0.049  1.00  0.00           C  
+ATOM   3287  O   LEU    12       2.381  -0.861   0.616  1.00  0.00           O  
+ATOM   3288  CB  LEU    12       4.846  -1.228  -1.646  1.00  0.00           C  
+ATOM   3289  CG  LEU    12       4.840  -2.263  -2.773  1.00  0.00           C  
+ATOM   3290  CD1 LEU    12       6.242  -2.374  -3.374  1.00  0.00           C  
+ATOM   3291  CD2 LEU    12       4.420  -3.625  -2.211  1.00  0.00           C  
+ATOM   3292  H   LEU    12       4.175   0.961  -2.797  1.00  0.00           H  
+ATOM   3293  HA  LEU    12       2.709  -1.318  -1.849  1.00  0.00           H  
+ATOM   3294 1HB  LEU    12       5.563  -0.452  -1.872  1.00  0.00           H  
+ATOM   3295 2HB  LEU    12       5.115  -1.710  -0.719  1.00  0.00           H  
+ATOM   3296  HG  LEU    12       4.142  -1.956  -3.538  1.00  0.00           H  
+ATOM   3297 1HD1 LEU    12       6.923  -1.750  -2.815  1.00  0.00           H  
+ATOM   3298 2HD1 LEU    12       6.573  -3.400  -3.327  1.00  0.00           H  
+ATOM   3299 3HD1 LEU    12       6.219  -2.050  -4.403  1.00  0.00           H  
+ATOM   3300 1HD2 LEU    12       5.091  -3.907  -1.414  1.00  0.00           H  
+ATOM   3301 2HD2 LEU    12       3.412  -3.561  -1.828  1.00  0.00           H  
+ATOM   3302 3HD2 LEU    12       4.460  -4.366  -2.995  1.00  0.00           H  
+ATOM   3303  N   LEU    13       3.875   0.742   0.457  1.00  0.00           N  
+ATOM   3304  CA  LEU    13       3.660   1.141   1.877  1.00  0.00           C  
+ATOM   3305  C   LEU    13       2.231   1.661   2.036  1.00  0.00           C  
+ATOM   3306  O   LEU    13       1.640   1.577   3.094  1.00  0.00           O  
+ATOM   3307  CB  LEU    13       4.649   2.245   2.271  1.00  0.00           C  
+ATOM   3308  CG  LEU    13       5.936   2.123   1.449  1.00  0.00           C  
+ATOM   3309  CD1 LEU    13       7.041   2.944   2.109  1.00  0.00           C  
+ATOM   3310  CD2 LEU    13       6.366   0.654   1.377  1.00  0.00           C  
+ATOM   3311  H   LEU    13       4.523   1.226  -0.099  1.00  0.00           H  
+ATOM   3312  HA  LEU    13       3.803   0.283   2.518  1.00  0.00           H  
+ATOM   3313 1HB  LEU    13       4.198   3.210   2.088  1.00  0.00           H  
+ATOM   3314 2HB  LEU    13       4.887   2.154   3.320  1.00  0.00           H  
+ATOM   3315  HG  LEU    13       5.761   2.498   0.450  1.00  0.00           H  
+ATOM   3316 1HD1 LEU    13       6.642   3.450   2.977  1.00  0.00           H  
+ATOM   3317 2HD1 LEU    13       7.843   2.290   2.411  1.00  0.00           H  
+ATOM   3318 3HD1 LEU    13       7.414   3.674   1.407  1.00  0.00           H  
+ATOM   3319 1HD2 LEU    13       5.888   0.100   2.172  1.00  0.00           H  
+ATOM   3320 2HD2 LEU    13       6.072   0.240   0.424  1.00  0.00           H  
+ATOM   3321 3HD2 LEU    13       7.438   0.587   1.485  1.00  0.00           H  
+ATOM   3322  N   ALA    14       1.675   2.201   0.987  1.00  0.00           N  
+ATOM   3323  CA  ALA    14       0.285   2.732   1.066  1.00  0.00           C  
+ATOM   3324  C   ALA    14      -0.710   1.570   1.009  1.00  0.00           C  
+ATOM   3325  O   ALA    14      -1.819   1.667   1.493  1.00  0.00           O  
+ATOM   3326  CB  ALA    14       0.035   3.674  -0.113  1.00  0.00           C  
+ATOM   3327  H   ALA    14       2.174   2.258   0.146  1.00  0.00           H  
+ATOM   3328  HA  ALA    14       0.157   3.272   1.992  1.00  0.00           H  
+ATOM   3329 1HB  ALA    14       0.981   3.995  -0.523  1.00  0.00           H  
+ATOM   3330 2HB  ALA    14      -0.529   3.156  -0.874  1.00  0.00           H  
+ATOM   3331 3HB  ALA    14      -0.522   4.534   0.225  1.00  0.00           H  
+ATOM   3332  N   GLN    15      -0.322   0.473   0.419  1.00  0.00           N  
+ATOM   3333  CA  GLN    15      -1.246  -0.691   0.330  1.00  0.00           C  
+ATOM   3334  C   GLN    15      -1.250  -1.442   1.662  1.00  0.00           C  
+ATOM   3335  O   GLN    15      -2.217  -2.081   2.023  1.00  0.00           O  
+ATOM   3336  CB  GLN    15      -0.778  -1.631  -0.784  1.00  0.00           C  
+ATOM   3337  CG  GLN    15      -1.781  -1.594  -1.940  1.00  0.00           C  
+ATOM   3338  CD  GLN    15      -1.960  -0.150  -2.413  1.00  0.00           C  
+ATOM   3339  OE1 GLN    15      -1.486   0.773  -1.782  1.00  0.00           O  
+ATOM   3340  NE2 GLN    15      -2.630   0.086  -3.509  1.00  0.00           N  
+ATOM   3341  H   GLN    15       0.577   0.415   0.034  1.00  0.00           H  
+ATOM   3342  HA  GLN    15      -2.245  -0.343   0.110  1.00  0.00           H  
+ATOM   3343 1HB  GLN    15       0.192  -1.314  -1.138  1.00  0.00           H  
+ATOM   3344 2HB  GLN    15      -0.711  -2.639  -0.402  1.00  0.00           H  
+ATOM   3345 1HG  GLN    15      -1.413  -2.199  -2.756  1.00  0.00           H  
+ATOM   3346 2HG  GLN    15      -2.731  -1.980  -1.604  1.00  0.00           H  
+ATOM   3347 1HE2 GLN    15      -2.750   1.006  -3.821  1.00  0.00           H  
+ATOM   3348 2HE2 GLN    15      -3.013  -0.658  -4.019  1.00  0.00           H  
+ATOM   3349  N   ALA    16      -0.174  -1.372   2.394  1.00  0.00           N  
+ATOM   3350  CA  ALA    16      -0.115  -2.082   3.702  1.00  0.00           C  
+ATOM   3351  C   ALA    16      -1.279  -1.626   4.587  1.00  0.00           C  
+ATOM   3352  O   ALA    16      -1.980  -2.429   5.169  1.00  0.00           O  
+ATOM   3353  CB  ALA    16       1.209  -1.761   4.399  1.00  0.00           C  
+ATOM   3354  H   ALA    16       0.598  -0.852   2.084  1.00  0.00           H  
+ATOM   3355  HA  ALA    16      -0.186  -3.147   3.537  1.00  0.00           H  
+ATOM   3356 1HB  ALA    16       1.755  -1.035   3.815  1.00  0.00           H  
+ATOM   3357 2HB  ALA    16       1.011  -1.358   5.380  1.00  0.00           H  
+ATOM   3358 3HB  ALA    16       1.795  -2.664   4.490  1.00  0.00           H  
+ATOM   3359  N   VAL    17      -1.490  -0.342   4.693  1.00  0.00           N  
+ATOM   3360  CA  VAL    17      -2.605   0.161   5.543  1.00  0.00           C  
+ATOM   3361  C   VAL    17      -3.912   0.141   4.746  1.00  0.00           C  
+ATOM   3362  O   VAL    17      -4.983  -0.023   5.296  1.00  0.00           O  
+ATOM   3363  CB  VAL    17      -2.302   1.594   5.985  1.00  0.00           C  
+ATOM   3364  CG1 VAL    17      -3.249   1.989   7.120  1.00  0.00           C  
+ATOM   3365  CG2 VAL    17      -0.856   1.681   6.475  1.00  0.00           C  
+ATOM   3366  H   VAL    17      -0.912   0.290   4.216  1.00  0.00           H  
+ATOM   3367  HA  VAL    17      -2.706  -0.469   6.414  1.00  0.00           H  
+ATOM   3368  HB  VAL    17      -2.443   2.265   5.150  1.00  0.00           H  
+ATOM   3369 1HG1 VAL    17      -4.264   1.747   6.844  1.00  0.00           H  
+ATOM   3370 2HG1 VAL    17      -2.981   1.450   8.017  1.00  0.00           H  
+ATOM   3371 3HG1 VAL    17      -3.170   3.051   7.302  1.00  0.00           H  
+ATOM   3372 1HG2 VAL    17      -0.618   0.798   7.050  1.00  0.00           H  
+ATOM   3373 2HG2 VAL    17      -0.192   1.748   5.625  1.00  0.00           H  
+ATOM   3374 3HG2 VAL    17      -0.737   2.557   7.095  1.00  0.00           H  
+ATOM   3375  N   PHE    18      -3.836   0.309   3.455  1.00  0.00           N  
+ATOM   3376  CA  PHE    18      -5.075   0.303   2.629  1.00  0.00           C  
+ATOM   3377  C   PHE    18      -5.828  -1.013   2.842  1.00  0.00           C  
+ATOM   3378  O   PHE    18      -6.948  -1.030   3.312  1.00  0.00           O  
+ATOM   3379  CB  PHE    18      -4.704   0.444   1.151  1.00  0.00           C  
+ATOM   3380  CG  PHE    18      -5.616   1.455   0.499  1.00  0.00           C  
+ATOM   3381  CD1 PHE    18      -5.638   2.777   0.960  1.00  0.00           C  
+ATOM   3382  CD2 PHE    18      -6.441   1.070  -0.565  1.00  0.00           C  
+ATOM   3383  CE1 PHE    18      -6.483   3.714   0.355  1.00  0.00           C  
+ATOM   3384  CE2 PHE    18      -7.286   2.008  -1.169  1.00  0.00           C  
+ATOM   3385  CZ  PHE    18      -7.308   3.331  -0.709  1.00  0.00           C  
+ATOM   3386  H   PHE    18      -2.963   0.442   3.029  1.00  0.00           H  
+ATOM   3387  HA  PHE    18      -5.706   1.129   2.921  1.00  0.00           H  
+ATOM   3388 1HB  PHE    18      -3.680   0.777   1.068  1.00  0.00           H  
+ATOM   3389 2HB  PHE    18      -4.815  -0.510   0.659  1.00  0.00           H  
+ATOM   3390  HD1 PHE    18      -5.002   3.073   1.781  1.00  0.00           H  
+ATOM   3391  HD2 PHE    18      -6.423   0.051  -0.920  1.00  0.00           H  
+ATOM   3392  HE1 PHE    18      -6.500   4.734   0.711  1.00  0.00           H  
+ATOM   3393  HE2 PHE    18      -7.922   1.712  -1.991  1.00  0.00           H  
+ATOM   3394  HZ  PHE    18      -7.959   4.054  -1.175  1.00  0.00           H  
+ATOM   3395  N   LEU    19      -5.220  -2.116   2.498  1.00  0.00           N  
+ATOM   3396  CA  LEU    19      -5.902  -3.428   2.679  1.00  0.00           C  
+ATOM   3397  C   LEU    19      -6.251  -3.624   4.156  1.00  0.00           C  
+ATOM   3398  O   LEU    19      -7.203  -4.297   4.495  1.00  0.00           O  
+ATOM   3399  CB  LEU    19      -4.972  -4.551   2.218  1.00  0.00           C  
+ATOM   3400  CG  LEU    19      -4.832  -4.502   0.696  1.00  0.00           C  
+ATOM   3401  CD1 LEU    19      -3.772  -5.510   0.247  1.00  0.00           C  
+ATOM   3402  CD2 LEU    19      -6.173  -4.854   0.048  1.00  0.00           C  
+ATOM   3403  H   LEU    19      -4.318  -2.080   2.119  1.00  0.00           H  
+ATOM   3404  HA  LEU    19      -6.808  -3.446   2.091  1.00  0.00           H  
+ATOM   3405 1HB  LEU    19      -4.002  -4.427   2.676  1.00  0.00           H  
+ATOM   3406 2HB  LEU    19      -5.387  -5.505   2.508  1.00  0.00           H  
+ATOM   3407  HG  LEU    19      -4.534  -3.508   0.394  1.00  0.00           H  
+ATOM   3408 1HD1 LEU    19      -3.725  -6.323   0.956  1.00  0.00           H  
+ATOM   3409 2HD1 LEU    19      -4.030  -5.896  -0.728  1.00  0.00           H  
+ATOM   3410 3HD1 LEU    19      -2.809  -5.021   0.197  1.00  0.00           H  
+ATOM   3411 1HD2 LEU    19      -6.956  -4.798   0.791  1.00  0.00           H  
+ATOM   3412 2HD2 LEU    19      -6.382  -4.155  -0.748  1.00  0.00           H  
+ATOM   3413 3HD2 LEU    19      -6.128  -5.855  -0.354  1.00  0.00           H  
+ATOM   3414  N   LEU    20      -5.489  -3.037   5.039  1.00  0.00           N  
+ATOM   3415  CA  LEU    20      -5.782  -3.189   6.492  1.00  0.00           C  
+ATOM   3416  C   LEU    20      -7.224  -2.756   6.764  1.00  0.00           C  
+ATOM   3417  O   LEU    20      -8.020  -3.506   7.295  1.00  0.00           O  
+ATOM   3418  CB  LEU    20      -4.824  -2.312   7.301  1.00  0.00           C  
+ATOM   3419  CG  LEU    20      -4.471  -3.016   8.612  1.00  0.00           C  
+ATOM   3420  CD1 LEU    20      -3.578  -4.221   8.319  1.00  0.00           C  
+ATOM   3421  CD2 LEU    20      -3.728  -2.043   9.529  1.00  0.00           C  
+ATOM   3422  H   LEU    20      -4.726  -2.496   4.746  1.00  0.00           H  
+ATOM   3423  HA  LEU    20      -5.656  -4.222   6.780  1.00  0.00           H  
+ATOM   3424 1HB  LEU    20      -3.924  -2.140   6.729  1.00  0.00           H  
+ATOM   3425 2HB  LEU    20      -5.299  -1.366   7.519  1.00  0.00           H  
+ATOM   3426  HG  LEU    20      -5.378  -3.349   9.096  1.00  0.00           H  
+ATOM   3427 1HD1 LEU    20      -3.866  -4.663   7.376  1.00  0.00           H  
+ATOM   3428 2HD1 LEU    20      -2.547  -3.901   8.266  1.00  0.00           H  
+ATOM   3429 3HD1 LEU    20      -3.688  -4.951   9.107  1.00  0.00           H  
+ATOM   3430 1HD2 LEU    20      -2.862  -1.653   9.014  1.00  0.00           H  
+ATOM   3431 2HD2 LEU    20      -4.384  -1.227   9.796  1.00  0.00           H  
+ATOM   3432 3HD2 LEU    20      -3.413  -2.559  10.423  1.00  0.00           H  
+ATOM   3433  N   LEU    21      -7.568  -1.551   6.401  1.00  0.00           N  
+ATOM   3434  CA  LEU    21      -8.957  -1.071   6.635  1.00  0.00           C  
+ATOM   3435  C   LEU    21      -9.936  -1.978   5.889  1.00  0.00           C  
+ATOM   3436  O   LEU    21     -10.857  -2.522   6.466  1.00  0.00           O  
+ATOM   3437  CB  LEU    21      -9.097   0.363   6.121  1.00  0.00           C  
+ATOM   3438  CG  LEU    21     -10.516   0.865   6.389  1.00  0.00           C  
+ATOM   3439  CD1 LEU    21     -10.636   1.297   7.852  1.00  0.00           C  
+ATOM   3440  CD2 LEU    21     -10.812   2.061   5.481  1.00  0.00           C  
+ATOM   3441  H   LEU    21      -6.911  -0.962   5.972  1.00  0.00           H  
+ATOM   3442  HA  LEU    21      -9.174  -1.098   7.692  1.00  0.00           H  
+ATOM   3443 1HB  LEU    21      -8.387   0.999   6.629  1.00  0.00           H  
+ATOM   3444 2HB  LEU    21      -8.905   0.385   5.058  1.00  0.00           H  
+ATOM   3445  HG  LEU    21     -11.222   0.074   6.188  1.00  0.00           H  
+ATOM   3446 1HD1 LEU    21      -9.901   0.772   8.443  1.00  0.00           H  
+ATOM   3447 2HD1 LEU    21     -10.467   2.360   7.928  1.00  0.00           H  
+ATOM   3448 3HD1 LEU    21     -11.625   1.062   8.216  1.00  0.00           H  
+ATOM   3449 1HD2 LEU    21     -10.343   1.907   4.520  1.00  0.00           H  
+ATOM   3450 2HD2 LEU    21     -11.880   2.158   5.350  1.00  0.00           H  
+ATOM   3451 3HD2 LEU    21     -10.421   2.961   5.932  1.00  0.00           H  
+ATOM   3452  N   THR    22      -9.743  -2.151   4.609  1.00  0.00           N  
+ATOM   3453  CA  THR    22     -10.662  -3.028   3.832  1.00  0.00           C  
+ATOM   3454  C   THR    22     -10.866  -4.342   4.588  1.00  0.00           C  
+ATOM   3455  O   THR    22     -11.889  -4.985   4.472  1.00  0.00           O  
+ATOM   3456  CB  THR    22     -10.052  -3.318   2.458  1.00  0.00           C  
+ATOM   3457  OG1 THR    22      -8.810  -3.988   2.626  1.00  0.00           O  
+ATOM   3458  CG2 THR    22      -9.827  -2.004   1.710  1.00  0.00           C  
+ATOM   3459  H   THR    22      -8.992  -1.707   4.162  1.00  0.00           H  
+ATOM   3460  HA  THR    22     -11.614  -2.533   3.706  1.00  0.00           H  
+ATOM   3461  HB  THR    22     -10.724  -3.941   1.890  1.00  0.00           H  
+ATOM   3462  HG1 THR    22      -8.928  -4.901   2.354  1.00  0.00           H  
+ATOM   3463 1HG2 THR    22     -10.053  -1.174   2.363  1.00  0.00           H  
+ATOM   3464 2HG2 THR    22      -8.798  -1.942   1.391  1.00  0.00           H  
+ATOM   3465 3HG2 THR    22     -10.475  -1.967   0.846  1.00  0.00           H  
+ATOM   3466  N   SER    23      -9.896  -4.742   5.366  1.00  0.00           N  
+ATOM   3467  CA  SER    23     -10.033  -6.011   6.133  1.00  0.00           C  
+ATOM   3468  C   SER    23     -11.057  -5.819   7.252  1.00  0.00           C  
+ATOM   3469  O   SER    23     -11.984  -6.590   7.399  1.00  0.00           O  
+ATOM   3470  CB  SER    23      -8.680  -6.389   6.738  1.00  0.00           C  
+ATOM   3471  OG  SER    23      -8.740  -7.724   7.225  1.00  0.00           O  
+ATOM   3472  H   SER    23      -9.080  -4.206   5.447  1.00  0.00           H  
+ATOM   3473  HA  SER    23     -10.365  -6.798   5.472  1.00  0.00           H  
+ATOM   3474 1HB  SER    23      -7.915  -6.321   5.984  1.00  0.00           H  
+ATOM   3475 2HB  SER    23      -8.447  -5.709   7.547  1.00  0.00           H  
+ATOM   3476  HG  SER    23      -9.395  -8.199   6.709  1.00  0.00           H  
+ATOM   3477  N   GLN    24     -10.900  -4.791   8.041  1.00  0.00           N  
+ATOM   3478  CA  GLN    24     -11.866  -4.547   9.147  1.00  0.00           C  
+ATOM   3479  C   GLN    24     -13.232  -4.191   8.557  1.00  0.00           C  
+ATOM   3480  O   GLN    24     -14.223  -4.124   9.257  1.00  0.00           O  
+ATOM   3481  CB  GLN    24     -11.367  -3.388  10.012  1.00  0.00           C  
+ATOM   3482  CG  GLN    24     -10.664  -3.942  11.253  1.00  0.00           C  
+ATOM   3483  CD  GLN    24     -10.704  -2.900  12.371  1.00  0.00           C  
+ATOM   3484  OE1 GLN    24     -11.202  -3.165  13.447  1.00  0.00           O  
+ATOM   3485  NE2 GLN    24     -10.197  -1.716  12.162  1.00  0.00           N  
+ATOM   3486  H   GLN    24     -10.146  -4.179   7.903  1.00  0.00           H  
+ATOM   3487  HA  GLN    24     -11.954  -5.437   9.752  1.00  0.00           H  
+ATOM   3488 1HB  GLN    24     -10.673  -2.786   9.443  1.00  0.00           H  
+ATOM   3489 2HB  GLN    24     -12.206  -2.780  10.318  1.00  0.00           H  
+ATOM   3490 1HG  GLN    24     -11.166  -4.842  11.579  1.00  0.00           H  
+ATOM   3491 2HG  GLN    24      -9.637  -4.170  11.012  1.00  0.00           H  
+ATOM   3492 1HE2 GLN    24     -10.217  -1.041  12.871  1.00  0.00           H  
+ATOM   3493 2HE2 GLN    24      -9.795  -1.502  11.293  1.00  0.00           H  
+ATOM   3494  N   ARG    25     -13.294  -3.963   7.273  1.00  0.00           N  
+ATOM   3495  CA  ARG    25     -14.596  -3.613   6.641  1.00  0.00           C  
+ATOM   3496  C   ARG    25     -15.306  -4.893   6.192  1.00  0.00           C  
+ATOM   3497  O   ARG    25     -15.229  -5.207   5.016  1.00  0.00           O  
+ATOM   3498  CB  ARG    25     -14.348  -2.715   5.427  1.00  0.00           C  
+ATOM   3499  CG  ARG    25     -15.368  -1.573   5.419  1.00  0.00           C  
+ATOM   3500  CD  ARG    25     -15.792  -1.278   3.978  1.00  0.00           C  
+ATOM   3501  NE  ARG    25     -17.265  -1.064   3.928  1.00  0.00           N  
+ATOM   3502  CZ  ARG    25     -17.802  -0.063   4.570  1.00  0.00           C  
+ATOM   3503  NH1 ARG    25     -18.507  -0.280   5.646  1.00  0.00           N  
+ATOM   3504  NH2 ARG    25     -17.631   1.157   4.137  1.00  0.00           N  
+ATOM   3505  OXT ARG    25     -15.912  -5.537   7.032  1.00  0.00           O  
+ATOM   3506  H   ARG    25     -12.483  -4.022   6.726  1.00  0.00           H  
+ATOM   3507  HA  ARG    25     -15.215  -3.090   7.356  1.00  0.00           H  
+ATOM   3508 1HB  ARG    25     -13.350  -2.307   5.480  1.00  0.00           H  
+ATOM   3509 2HB  ARG    25     -14.454  -3.295   4.523  1.00  0.00           H  
+ATOM   3510 1HG  ARG    25     -16.233  -1.859   5.999  1.00  0.00           H  
+ATOM   3511 2HG  ARG    25     -14.922  -0.689   5.848  1.00  0.00           H  
+ATOM   3512 1HD  ARG    25     -15.287  -0.390   3.629  1.00  0.00           H  
+ATOM   3513 2HD  ARG    25     -15.527  -2.114   3.347  1.00  0.00           H  
+ATOM   3514  HE  ARG    25     -17.833  -1.675   3.413  1.00  0.00           H  
+ATOM   3515 1HH1 ARG    25     -18.638  -1.214   5.978  1.00  0.00           H  
+ATOM   3516 2HH1 ARG    25     -18.918   0.488   6.138  1.00  0.00           H  
+ATOM   3517 1HH2 ARG    25     -17.091   1.323   3.312  1.00  0.00           H  
+ATOM   3518 2HH2 ARG    25     -18.041   1.924   4.630  1.00  0.00           H  
+TER    3519      ARG    25                                                      
+ENDMDL                                                                          
+MODEL       10                                                                  
+ATOM   3520  N   GLY     1      16.683   2.046  -8.868  1.00  0.00           N  
+ATOM   3521  CA  GLY     1      15.286   2.014  -9.487  1.00  0.00           C  
+ATOM   3522  C   GLY     1      14.709   3.267 -10.054  1.00  0.00           C  
+ATOM   3523  O   GLY     1      13.652   3.257 -10.653  1.00  0.00           O  
+ATOM   3524 1H   GLY     1      17.287   2.694  -9.410  1.00  0.00           H  
+ATOM   3525 2H   GLY     1      16.619   2.374  -7.883  1.00  0.00           H  
+ATOM   3526 3H   GLY     1      17.094   1.090  -8.889  1.00  0.00           H  
+ATOM   3527 1HA  GLY     1      15.318   1.733 -10.420  1.00  0.00           H  
+ATOM   3528 2HA  GLY     1      14.635   1.310  -8.821  1.00  0.00           H  
+ATOM   3529  N   SER     2      15.381   4.372  -9.883  1.00  0.00           N  
+ATOM   3530  CA  SER     2      14.859   5.659 -10.426  1.00  0.00           C  
+ATOM   3531  C   SER     2      13.542   6.011  -9.729  1.00  0.00           C  
+ATOM   3532  O   SER     2      12.997   5.228  -8.978  1.00  0.00           O  
+ATOM   3533  CB  SER     2      14.620   5.521 -11.930  1.00  0.00           C  
+ATOM   3534  OG  SER     2      15.846   5.192 -12.571  1.00  0.00           O  
+ATOM   3535  H   SER     2      16.232   4.357  -9.395  1.00  0.00           H  
+ATOM   3536  HA  SER     2      15.579   6.442 -10.247  1.00  0.00           H  
+ATOM   3537 1HB  SER     2      13.902   4.740 -12.115  1.00  0.00           H  
+ATOM   3538 2HB  SER     2      14.238   6.455 -12.319  1.00  0.00           H  
+ATOM   3539  HG  SER     2      15.736   5.337 -13.514  1.00  0.00           H  
+ATOM   3540  N   GLU     3      13.032   7.188  -9.970  1.00  0.00           N  
+ATOM   3541  CA  GLU     3      11.752   7.594  -9.321  1.00  0.00           C  
+ATOM   3542  C   GLU     3      10.645   6.615  -9.712  1.00  0.00           C  
+ATOM   3543  O   GLU     3       9.648   6.486  -9.031  1.00  0.00           O  
+ATOM   3544  CB  GLU     3      11.373   9.004  -9.781  1.00  0.00           C  
+ATOM   3545  CG  GLU     3      10.769   9.784  -8.610  1.00  0.00           C  
+ATOM   3546  CD  GLU     3      11.890  10.438  -7.801  1.00  0.00           C  
+ATOM   3547  OE1 GLU     3      12.651  11.194  -8.384  1.00  0.00           O  
+ATOM   3548  OE2 GLU     3      11.968  10.174  -6.613  1.00  0.00           O  
+ATOM   3549  H   GLU     3      13.489   7.806 -10.577  1.00  0.00           H  
+ATOM   3550  HA  GLU     3      11.876   7.588  -8.248  1.00  0.00           H  
+ATOM   3551 1HB  GLU     3      12.255   9.515 -10.137  1.00  0.00           H  
+ATOM   3552 2HB  GLU     3      10.649   8.940 -10.579  1.00  0.00           H  
+ATOM   3553 1HG  GLU     3      10.105  10.546  -8.990  1.00  0.00           H  
+ATOM   3554 2HG  GLU     3      10.217   9.109  -7.975  1.00  0.00           H  
+ATOM   3555  N   LYS     4      10.809   5.924 -10.807  1.00  0.00           N  
+ATOM   3556  CA  LYS     4       9.763   4.955 -11.239  1.00  0.00           C  
+ATOM   3557  C   LYS     4       9.649   3.834 -10.204  1.00  0.00           C  
+ATOM   3558  O   LYS     4       8.598   3.601  -9.639  1.00  0.00           O  
+ATOM   3559  CB  LYS     4      10.146   4.363 -12.598  1.00  0.00           C  
+ATOM   3560  CG  LYS     4      10.007   5.439 -13.677  1.00  0.00           C  
+ATOM   3561  CD  LYS     4      11.395   5.945 -14.079  1.00  0.00           C  
+ATOM   3562  CE  LYS     4      11.290   6.747 -15.377  1.00  0.00           C  
+ATOM   3563  NZ  LYS     4      11.674   5.884 -16.530  1.00  0.00           N  
+ATOM   3564  H   LYS     4      11.621   6.043 -11.345  1.00  0.00           H  
+ATOM   3565  HA  LYS     4       8.815   5.464 -11.322  1.00  0.00           H  
+ATOM   3566 1HB  LYS     4      11.167   4.014 -12.564  1.00  0.00           H  
+ATOM   3567 2HB  LYS     4       9.490   3.538 -12.828  1.00  0.00           H  
+ATOM   3568 1HG  LYS     4       9.512   5.020 -14.541  1.00  0.00           H  
+ATOM   3569 2HG  LYS     4       9.424   6.262 -13.292  1.00  0.00           H  
+ATOM   3570 1HD  LYS     4      11.788   6.576 -13.293  1.00  0.00           H  
+ATOM   3571 2HD  LYS     4      12.054   5.104 -14.229  1.00  0.00           H  
+ATOM   3572 1HE  LYS     4      10.274   7.090 -15.506  1.00  0.00           H  
+ATOM   3573 2HE  LYS     4      11.953   7.599 -15.329  1.00  0.00           H  
+ATOM   3574 1HZ  LYS     4      12.323   5.140 -16.205  1.00  0.00           H  
+ATOM   3575 2HZ  LYS     4      10.820   5.447 -16.935  1.00  0.00           H  
+ATOM   3576 3HZ  LYS     4      12.145   6.461 -17.254  1.00  0.00           H  
+ATOM   3577  N   MET     5      10.723   3.136  -9.948  1.00  0.00           N  
+ATOM   3578  CA  MET     5      10.673   2.035  -8.946  1.00  0.00           C  
+ATOM   3579  C   MET     5      10.385   2.617  -7.562  1.00  0.00           C  
+ATOM   3580  O   MET     5       9.928   1.931  -6.671  1.00  0.00           O  
+ATOM   3581  CB  MET     5      12.016   1.303  -8.921  1.00  0.00           C  
+ATOM   3582  CG  MET     5      11.791  -0.163  -8.547  1.00  0.00           C  
+ATOM   3583  SD  MET     5      12.095  -1.209  -9.992  1.00  0.00           S  
+ATOM   3584  CE  MET     5      13.031  -2.504  -9.144  1.00  0.00           C  
+ATOM   3585  H   MET     5      11.562   3.340 -10.411  1.00  0.00           H  
+ATOM   3586  HA  MET     5       9.890   1.340  -9.215  1.00  0.00           H  
+ATOM   3587 1HB  MET     5      12.476   1.361  -9.898  1.00  0.00           H  
+ATOM   3588 2HB  MET     5      12.664   1.764  -8.190  1.00  0.00           H  
+ATOM   3589 1HG  MET     5      12.468  -0.440  -7.753  1.00  0.00           H  
+ATOM   3590 2HG  MET     5      10.773  -0.297  -8.214  1.00  0.00           H  
+ATOM   3591 1HE  MET     5      13.257  -2.188  -8.139  1.00  0.00           H  
+ATOM   3592 2HE  MET     5      12.442  -3.410  -9.110  1.00  0.00           H  
+ATOM   3593 3HE  MET     5      13.954  -2.687  -9.677  1.00  0.00           H  
+ATOM   3594  N   SER     6      10.650   3.882  -7.376  1.00  0.00           N  
+ATOM   3595  CA  SER     6      10.389   4.509  -6.051  1.00  0.00           C  
+ATOM   3596  C   SER     6       8.883   4.700  -5.867  1.00  0.00           C  
+ATOM   3597  O   SER     6       8.354   4.543  -4.784  1.00  0.00           O  
+ATOM   3598  CB  SER     6      11.087   5.869  -5.988  1.00  0.00           C  
+ATOM   3599  OG  SER     6      11.217   6.270  -4.631  1.00  0.00           O  
+ATOM   3600  H   SER     6      11.017   4.419  -8.109  1.00  0.00           H  
+ATOM   3601  HA  SER     6      10.771   3.872  -5.267  1.00  0.00           H  
+ATOM   3602 1HB  SER     6      12.065   5.793  -6.432  1.00  0.00           H  
+ATOM   3603 2HB  SER     6      10.502   6.597  -6.536  1.00  0.00           H  
+ATOM   3604  HG  SER     6      11.349   5.482  -4.099  1.00  0.00           H  
+ATOM   3605  N   THR     7       8.186   5.030  -6.920  1.00  0.00           N  
+ATOM   3606  CA  THR     7       6.714   5.223  -6.806  1.00  0.00           C  
+ATOM   3607  C   THR     7       6.034   3.857  -6.838  1.00  0.00           C  
+ATOM   3608  O   THR     7       4.901   3.703  -6.426  1.00  0.00           O  
+ATOM   3609  CB  THR     7       6.213   6.079  -7.972  1.00  0.00           C  
+ATOM   3610  OG1 THR     7       7.059   5.888  -9.098  1.00  0.00           O  
+ATOM   3611  CG2 THR     7       6.227   7.553  -7.565  1.00  0.00           C  
+ATOM   3612  H   THR     7       8.630   5.144  -7.786  1.00  0.00           H  
+ATOM   3613  HA  THR     7       6.488   5.716  -5.871  1.00  0.00           H  
+ATOM   3614  HB  THR     7       5.205   5.790  -8.225  1.00  0.00           H  
+ATOM   3615  HG1 THR     7       7.083   6.711  -9.593  1.00  0.00           H  
+ATOM   3616 1HG2 THR     7       6.913   7.693  -6.741  1.00  0.00           H  
+ATOM   3617 2HG2 THR     7       6.545   8.156  -8.404  1.00  0.00           H  
+ATOM   3618 3HG2 THR     7       5.235   7.853  -7.262  1.00  0.00           H  
+ATOM   3619  N   ALA     8       6.726   2.858  -7.313  1.00  0.00           N  
+ATOM   3620  CA  ALA     8       6.130   1.498  -7.360  1.00  0.00           C  
+ATOM   3621  C   ALA     8       6.294   0.850  -5.988  1.00  0.00           C  
+ATOM   3622  O   ALA     8       5.477   0.059  -5.559  1.00  0.00           O  
+ATOM   3623  CB  ALA     8       6.855   0.658  -8.415  1.00  0.00           C  
+ATOM   3624  H   ALA     8       7.642   3.002  -7.628  1.00  0.00           H  
+ATOM   3625  HA  ALA     8       5.082   1.567  -7.607  1.00  0.00           H  
+ATOM   3626 1HB  ALA     8       7.294   1.311  -9.156  1.00  0.00           H  
+ATOM   3627 2HB  ALA     8       7.631   0.077  -7.941  1.00  0.00           H  
+ATOM   3628 3HB  ALA     8       6.149  -0.004  -8.893  1.00  0.00           H  
+ATOM   3629  N   ILE     9       7.342   1.194  -5.291  1.00  0.00           N  
+ATOM   3630  CA  ILE     9       7.558   0.614  -3.940  1.00  0.00           C  
+ATOM   3631  C   ILE     9       6.594   1.279  -2.960  1.00  0.00           C  
+ATOM   3632  O   ILE     9       6.023   0.637  -2.101  1.00  0.00           O  
+ATOM   3633  CB  ILE     9       9.000   0.864  -3.496  1.00  0.00           C  
+ATOM   3634  CG1 ILE     9       9.314   2.359  -3.590  1.00  0.00           C  
+ATOM   3635  CG2 ILE     9       9.956   0.087  -4.403  1.00  0.00           C  
+ATOM   3636  CD1 ILE     9      10.686   2.633  -2.972  1.00  0.00           C  
+ATOM   3637  H   ILE     9       7.980   1.842  -5.655  1.00  0.00           H  
+ATOM   3638  HA  ILE     9       7.366  -0.448  -3.971  1.00  0.00           H  
+ATOM   3639  HB  ILE     9       9.124   0.532  -2.474  1.00  0.00           H  
+ATOM   3640 1HG1 ILE     9       9.316   2.662  -4.628  1.00  0.00           H  
+ATOM   3641 2HG1 ILE     9       8.563   2.919  -3.053  1.00  0.00           H  
+ATOM   3642 1HG2 ILE     9       9.679   0.245  -5.436  1.00  0.00           H  
+ATOM   3643 2HG2 ILE     9      10.966   0.436  -4.244  1.00  0.00           H  
+ATOM   3644 3HG2 ILE     9       9.898  -0.966  -4.172  1.00  0.00           H  
+ATOM   3645 1HD1 ILE     9      11.319   1.769  -3.102  1.00  0.00           H  
+ATOM   3646 2HD1 ILE     9      11.136   3.486  -3.460  1.00  0.00           H  
+ATOM   3647 3HD1 ILE     9      10.571   2.842  -1.919  1.00  0.00           H  
+ATOM   3648  N   SER    10       6.397   2.563  -3.091  1.00  0.00           N  
+ATOM   3649  CA  SER    10       5.458   3.266  -2.176  1.00  0.00           C  
+ATOM   3650  C   SER    10       4.038   2.786  -2.469  1.00  0.00           C  
+ATOM   3651  O   SER    10       3.229   2.631  -1.577  1.00  0.00           O  
+ATOM   3652  CB  SER    10       5.550   4.775  -2.404  1.00  0.00           C  
+ATOM   3653  OG  SER    10       4.251   5.344  -2.306  1.00  0.00           O  
+ATOM   3654  H   SER    10       6.860   3.062  -3.797  1.00  0.00           H  
+ATOM   3655  HA  SER    10       5.712   3.036  -1.151  1.00  0.00           H  
+ATOM   3656 1HB  SER    10       6.188   5.216  -1.658  1.00  0.00           H  
+ATOM   3657 2HB  SER    10       5.964   4.964  -3.386  1.00  0.00           H  
+ATOM   3658  HG  SER    10       4.308   6.262  -2.576  1.00  0.00           H  
+ATOM   3659  N   VAL    11       3.733   2.540  -3.715  1.00  0.00           N  
+ATOM   3660  CA  VAL    11       2.370   2.056  -4.065  1.00  0.00           C  
+ATOM   3661  C   VAL    11       2.123   0.733  -3.340  1.00  0.00           C  
+ATOM   3662  O   VAL    11       1.083   0.522  -2.747  1.00  0.00           O  
+ATOM   3663  CB  VAL    11       2.278   1.844  -5.581  1.00  0.00           C  
+ATOM   3664  CG1 VAL    11       1.068   0.966  -5.911  1.00  0.00           C  
+ATOM   3665  CG2 VAL    11       2.122   3.198  -6.274  1.00  0.00           C  
+ATOM   3666  H   VAL    11       4.406   2.664  -4.418  1.00  0.00           H  
+ATOM   3667  HA  VAL    11       1.636   2.783  -3.754  1.00  0.00           H  
+ATOM   3668  HB  VAL    11       3.179   1.360  -5.931  1.00  0.00           H  
+ATOM   3669 1HG1 VAL    11       0.192   1.360  -5.420  1.00  0.00           H  
+ATOM   3670 2HG1 VAL    11       0.909   0.959  -6.979  1.00  0.00           H  
+ATOM   3671 3HG1 VAL    11       1.251  -0.042  -5.568  1.00  0.00           H  
+ATOM   3672 1HG2 VAL    11       2.638   3.957  -5.706  1.00  0.00           H  
+ATOM   3673 2HG2 VAL    11       2.542   3.144  -7.268  1.00  0.00           H  
+ATOM   3674 3HG2 VAL    11       1.074   3.450  -6.341  1.00  0.00           H  
+ATOM   3675  N   LEU    12       3.075  -0.159  -3.377  1.00  0.00           N  
+ATOM   3676  CA  LEU    12       2.898  -1.461  -2.682  1.00  0.00           C  
+ATOM   3677  C   LEU    12       2.458  -1.204  -1.241  1.00  0.00           C  
+ATOM   3678  O   LEU    12       1.435  -1.688  -0.799  1.00  0.00           O  
+ATOM   3679  CB  LEU    12       4.223  -2.228  -2.688  1.00  0.00           C  
+ATOM   3680  CG  LEU    12       4.409  -2.916  -4.041  1.00  0.00           C  
+ATOM   3681  CD1 LEU    12       5.898  -3.159  -4.290  1.00  0.00           C  
+ATOM   3682  CD2 LEU    12       3.669  -4.254  -4.038  1.00  0.00           C  
+ATOM   3683  H   LEU    12       3.909   0.033  -3.857  1.00  0.00           H  
+ATOM   3684  HA  LEU    12       2.143  -2.042  -3.192  1.00  0.00           H  
+ATOM   3685 1HB  LEU    12       5.038  -1.538  -2.519  1.00  0.00           H  
+ATOM   3686 2HB  LEU    12       4.212  -2.972  -1.906  1.00  0.00           H  
+ATOM   3687  HG  LEU    12       4.011  -2.285  -4.824  1.00  0.00           H  
+ATOM   3688 1HD1 LEU    12       6.469  -2.762  -3.464  1.00  0.00           H  
+ATOM   3689 2HD1 LEU    12       6.079  -4.220  -4.377  1.00  0.00           H  
+ATOM   3690 3HD1 LEU    12       6.195  -2.667  -5.204  1.00  0.00           H  
+ATOM   3691 1HD2 LEU    12       3.751  -4.709  -3.062  1.00  0.00           H  
+ATOM   3692 2HD2 LEU    12       2.627  -4.091  -4.271  1.00  0.00           H  
+ATOM   3693 3HD2 LEU    12       4.104  -4.909  -4.778  1.00  0.00           H  
+ATOM   3694  N   LEU    13       3.215  -0.436  -0.505  1.00  0.00           N  
+ATOM   3695  CA  LEU    13       2.827  -0.144   0.902  1.00  0.00           C  
+ATOM   3696  C   LEU    13       1.434   0.486   0.912  1.00  0.00           C  
+ATOM   3697  O   LEU    13       0.662   0.299   1.831  1.00  0.00           O  
+ATOM   3698  CB  LEU    13       3.837   0.823   1.521  1.00  0.00           C  
+ATOM   3699  CG  LEU    13       5.096   0.055   1.926  1.00  0.00           C  
+ATOM   3700  CD1 LEU    13       4.710  -1.116   2.830  1.00  0.00           C  
+ATOM   3701  CD2 LEU    13       5.791  -0.481   0.671  1.00  0.00           C  
+ATOM   3702  H   LEU    13       4.034  -0.047  -0.878  1.00  0.00           H  
+ATOM   3703  HA  LEU    13       2.810  -1.064   1.469  1.00  0.00           H  
+ATOM   3704 1HB  LEU    13       4.094   1.585   0.800  1.00  0.00           H  
+ATOM   3705 2HB  LEU    13       3.403   1.286   2.396  1.00  0.00           H  
+ATOM   3706  HG  LEU    13       5.766   0.715   2.457  1.00  0.00           H  
+ATOM   3707 1HD1 LEU    13       3.958  -0.793   3.535  1.00  0.00           H  
+ATOM   3708 2HD1 LEU    13       4.316  -1.921   2.228  1.00  0.00           H  
+ATOM   3709 3HD1 LEU    13       5.582  -1.459   3.367  1.00  0.00           H  
+ATOM   3710 1HD2 LEU    13       5.989   0.336  -0.007  1.00  0.00           H  
+ATOM   3711 2HD2 LEU    13       6.721  -0.952   0.948  1.00  0.00           H  
+ATOM   3712 3HD2 LEU    13       5.152  -1.204   0.187  1.00  0.00           H  
+ATOM   3713  N   ALA    14       1.103   1.225  -0.112  1.00  0.00           N  
+ATOM   3714  CA  ALA    14      -0.243   1.858  -0.168  1.00  0.00           C  
+ATOM   3715  C   ALA    14      -1.307   0.774   0.002  1.00  0.00           C  
+ATOM   3716  O   ALA    14      -2.127   0.829   0.896  1.00  0.00           O  
+ATOM   3717  CB  ALA    14      -0.424   2.552  -1.520  1.00  0.00           C  
+ATOM   3718  H   ALA    14       1.737   1.357  -0.847  1.00  0.00           H  
+ATOM   3719  HA  ALA    14      -0.335   2.583   0.628  1.00  0.00           H  
+ATOM   3720 1HB  ALA    14       0.452   3.144  -1.740  1.00  0.00           H  
+ATOM   3721 2HB  ALA    14      -0.559   1.808  -2.291  1.00  0.00           H  
+ATOM   3722 3HB  ALA    14      -1.292   3.193  -1.483  1.00  0.00           H  
+ATOM   3723  N   GLN    15      -1.294  -0.218  -0.847  1.00  0.00           N  
+ATOM   3724  CA  GLN    15      -2.298  -1.310  -0.726  1.00  0.00           C  
+ATOM   3725  C   GLN    15      -2.156  -1.964   0.649  1.00  0.00           C  
+ATOM   3726  O   GLN    15      -3.117  -2.428   1.229  1.00  0.00           O  
+ATOM   3727  CB  GLN    15      -2.054  -2.353  -1.819  1.00  0.00           C  
+ATOM   3728  CG  GLN    15      -2.915  -2.023  -3.039  1.00  0.00           C  
+ATOM   3729  CD  GLN    15      -4.091  -3.001  -3.117  1.00  0.00           C  
+ATOM   3730  OE1 GLN    15      -3.988  -4.044  -3.733  1.00  0.00           O  
+ATOM   3731  NE2 GLN    15      -5.210  -2.706  -2.516  1.00  0.00           N  
+ATOM   3732  H   GLN    15      -0.619  -0.248  -1.557  1.00  0.00           H  
+ATOM   3733  HA  GLN    15      -3.292  -0.901  -0.831  1.00  0.00           H  
+ATOM   3734 1HB  GLN    15      -1.011  -2.342  -2.100  1.00  0.00           H  
+ATOM   3735 2HB  GLN    15      -2.319  -3.331  -1.447  1.00  0.00           H  
+ATOM   3736 1HG  GLN    15      -3.291  -1.014  -2.954  1.00  0.00           H  
+ATOM   3737 2HG  GLN    15      -2.319  -2.110  -3.935  1.00  0.00           H  
+ATOM   3738 1HE2 GLN    15      -5.967  -3.326  -2.560  1.00  0.00           H  
+ATOM   3739 2HE2 GLN    15      -5.293  -1.864  -2.021  1.00  0.00           H  
+ATOM   3740  N   ALA    16      -0.962  -1.997   1.179  1.00  0.00           N  
+ATOM   3741  CA  ALA    16      -0.758  -2.611   2.520  1.00  0.00           C  
+ATOM   3742  C   ALA    16      -1.614  -1.864   3.543  1.00  0.00           C  
+ATOM   3743  O   ALA    16      -2.164  -2.448   4.455  1.00  0.00           O  
+ATOM   3744  CB  ALA    16       0.718  -2.507   2.913  1.00  0.00           C  
+ATOM   3745  H   ALA    16      -0.201  -1.612   0.696  1.00  0.00           H  
+ATOM   3746  HA  ALA    16      -1.053  -3.650   2.491  1.00  0.00           H  
+ATOM   3747 1HB  ALA    16       1.292  -2.155   2.068  1.00  0.00           H  
+ATOM   3748 2HB  ALA    16       0.824  -1.815   3.735  1.00  0.00           H  
+ATOM   3749 3HB  ALA    16       1.080  -3.479   3.214  1.00  0.00           H  
+ATOM   3750  N   VAL    17      -1.740  -0.573   3.390  1.00  0.00           N  
+ATOM   3751  CA  VAL    17      -2.569   0.213   4.343  1.00  0.00           C  
+ATOM   3752  C   VAL    17      -4.044   0.023   3.988  1.00  0.00           C  
+ATOM   3753  O   VAL    17      -4.905   0.020   4.845  1.00  0.00           O  
+ATOM   3754  CB  VAL    17      -2.201   1.694   4.243  1.00  0.00           C  
+ATOM   3755  CG1 VAL    17      -3.030   2.496   5.249  1.00  0.00           C  
+ATOM   3756  CG2 VAL    17      -0.714   1.871   4.554  1.00  0.00           C  
+ATOM   3757  H   VAL    17      -1.294  -0.123   2.642  1.00  0.00           H  
+ATOM   3758  HA  VAL    17      -2.392  -0.137   5.350  1.00  0.00           H  
+ATOM   3759  HB  VAL    17      -2.407   2.049   3.243  1.00  0.00           H  
+ATOM   3760 1HG1 VAL    17      -2.938   2.050   6.228  1.00  0.00           H  
+ATOM   3761 2HG1 VAL    17      -2.669   3.514   5.281  1.00  0.00           H  
+ATOM   3762 3HG1 VAL    17      -4.067   2.491   4.947  1.00  0.00           H  
+ATOM   3763 1HG2 VAL    17      -0.396   1.097   5.238  1.00  0.00           H  
+ATOM   3764 2HG2 VAL    17      -0.144   1.800   3.641  1.00  0.00           H  
+ATOM   3765 3HG2 VAL    17      -0.554   2.839   5.006  1.00  0.00           H  
+ATOM   3766  N   PHE    18      -4.339  -0.146   2.727  1.00  0.00           N  
+ATOM   3767  CA  PHE    18      -5.755  -0.348   2.310  1.00  0.00           C  
+ATOM   3768  C   PHE    18      -6.316  -1.573   3.034  1.00  0.00           C  
+ATOM   3769  O   PHE    18      -7.273  -1.482   3.776  1.00  0.00           O  
+ATOM   3770  CB  PHE    18      -5.813  -0.573   0.797  1.00  0.00           C  
+ATOM   3771  CG  PHE    18      -7.239  -0.846   0.379  1.00  0.00           C  
+ATOM   3772  CD1 PHE    18      -7.748  -2.149   0.436  1.00  0.00           C  
+ATOM   3773  CD2 PHE    18      -8.051   0.205  -0.071  1.00  0.00           C  
+ATOM   3774  CE1 PHE    18      -9.069  -2.402   0.045  1.00  0.00           C  
+ATOM   3775  CE2 PHE    18      -9.371  -0.050  -0.462  1.00  0.00           C  
+ATOM   3776  CZ  PHE    18      -9.880  -1.353  -0.405  1.00  0.00           C  
+ATOM   3777  H   PHE    18      -3.625  -0.148   2.055  1.00  0.00           H  
+ATOM   3778  HA  PHE    18      -6.336   0.525   2.570  1.00  0.00           H  
+ATOM   3779 1HB  PHE    18      -5.451   0.308   0.290  1.00  0.00           H  
+ATOM   3780 2HB  PHE    18      -5.196  -1.419   0.536  1.00  0.00           H  
+ATOM   3781  HD1 PHE    18      -7.123  -2.958   0.783  1.00  0.00           H  
+ATOM   3782  HD2 PHE    18      -7.659   1.210  -0.114  1.00  0.00           H  
+ATOM   3783  HE1 PHE    18      -9.461  -3.407   0.088  1.00  0.00           H  
+ATOM   3784  HE2 PHE    18      -9.996   0.759  -0.808  1.00  0.00           H  
+ATOM   3785  HZ  PHE    18     -10.898  -1.549  -0.707  1.00  0.00           H  
+ATOM   3786  N   LEU    19      -5.720  -2.718   2.833  1.00  0.00           N  
+ATOM   3787  CA  LEU    19      -6.217  -3.941   3.522  1.00  0.00           C  
+ATOM   3788  C   LEU    19      -6.304  -3.661   5.024  1.00  0.00           C  
+ATOM   3789  O   LEU    19      -7.027  -4.313   5.749  1.00  0.00           O  
+ATOM   3790  CB  LEU    19      -5.248  -5.101   3.274  1.00  0.00           C  
+ATOM   3791  CG  LEU    19      -5.092  -5.324   1.767  1.00  0.00           C  
+ATOM   3792  CD1 LEU    19      -3.610  -5.475   1.421  1.00  0.00           C  
+ATOM   3793  CD2 LEU    19      -5.841  -6.594   1.359  1.00  0.00           C  
+ATOM   3794  H   LEU    19      -4.944  -2.772   2.238  1.00  0.00           H  
+ATOM   3795  HA  LEU    19      -7.195  -4.199   3.144  1.00  0.00           H  
+ATOM   3796 1HB  LEU    19      -4.286  -4.865   3.705  1.00  0.00           H  
+ATOM   3797 2HB  LEU    19      -5.637  -6.000   3.729  1.00  0.00           H  
+ATOM   3798  HG  LEU    19      -5.500  -4.476   1.235  1.00  0.00           H  
+ATOM   3799 1HD1 LEU    19      -3.023  -5.441   2.327  1.00  0.00           H  
+ATOM   3800 2HD1 LEU    19      -3.452  -6.421   0.925  1.00  0.00           H  
+ATOM   3801 3HD1 LEU    19      -3.309  -4.670   0.766  1.00  0.00           H  
+ATOM   3802 1HD2 LEU    19      -5.916  -7.257   2.209  1.00  0.00           H  
+ATOM   3803 2HD2 LEU    19      -6.832  -6.334   1.017  1.00  0.00           H  
+ATOM   3804 3HD2 LEU    19      -5.304  -7.089   0.564  1.00  0.00           H  
+ATOM   3805  N   LEU    20      -5.571  -2.686   5.492  1.00  0.00           N  
+ATOM   3806  CA  LEU    20      -5.609  -2.352   6.944  1.00  0.00           C  
+ATOM   3807  C   LEU    20      -6.993  -1.810   7.301  1.00  0.00           C  
+ATOM   3808  O   LEU    20      -7.656  -2.308   8.188  1.00  0.00           O  
+ATOM   3809  CB  LEU    20      -4.552  -1.289   7.251  1.00  0.00           C  
+ATOM   3810  CG  LEU    20      -4.150  -1.374   8.724  1.00  0.00           C  
+ATOM   3811  CD1 LEU    20      -2.922  -0.496   8.971  1.00  0.00           C  
+ATOM   3812  CD2 LEU    20      -5.308  -0.883   9.596  1.00  0.00           C  
+ATOM   3813  H   LEU    20      -4.998  -2.171   4.886  1.00  0.00           H  
+ATOM   3814  HA  LEU    20      -5.408  -3.241   7.524  1.00  0.00           H  
+ATOM   3815 1HB  LEU    20      -3.684  -1.455   6.629  1.00  0.00           H  
+ATOM   3816 2HB  LEU    20      -4.958  -0.309   7.047  1.00  0.00           H  
+ATOM   3817  HG  LEU    20      -3.917  -2.399   8.975  1.00  0.00           H  
+ATOM   3818 1HD1 LEU    20      -3.003   0.409   8.388  1.00  0.00           H  
+ATOM   3819 2HD1 LEU    20      -2.864  -0.245  10.019  1.00  0.00           H  
+ATOM   3820 3HD1 LEU    20      -2.031  -1.034   8.680  1.00  0.00           H  
+ATOM   3821 1HD2 LEU    20      -5.935  -0.217   9.022  1.00  0.00           H  
+ATOM   3822 2HD2 LEU    20      -5.892  -1.729   9.930  1.00  0.00           H  
+ATOM   3823 3HD2 LEU    20      -4.914  -0.356  10.453  1.00  0.00           H  
+ATOM   3824  N   LEU    21      -7.437  -0.794   6.612  1.00  0.00           N  
+ATOM   3825  CA  LEU    21      -8.779  -0.225   6.910  1.00  0.00           C  
+ATOM   3826  C   LEU    21      -9.848  -1.275   6.600  1.00  0.00           C  
+ATOM   3827  O   LEU    21     -10.928  -1.262   7.158  1.00  0.00           O  
+ATOM   3828  CB  LEU    21      -9.008   1.027   6.054  1.00  0.00           C  
+ATOM   3829  CG  LEU    21      -9.379   0.623   4.624  1.00  0.00           C  
+ATOM   3830  CD1 LEU    21     -10.902   0.576   4.488  1.00  0.00           C  
+ATOM   3831  CD2 LEU    21      -8.812   1.649   3.641  1.00  0.00           C  
+ATOM   3832  H   LEU    21      -6.887  -0.408   5.898  1.00  0.00           H  
+ATOM   3833  HA  LEU    21      -8.830   0.041   7.956  1.00  0.00           H  
+ATOM   3834 1HB  LEU    21      -9.810   1.612   6.480  1.00  0.00           H  
+ATOM   3835 2HB  LEU    21      -8.105   1.618   6.034  1.00  0.00           H  
+ATOM   3836  HG  LEU    21      -8.967  -0.353   4.409  1.00  0.00           H  
+ATOM   3837 1HD1 LEU    21     -11.357   0.830   5.433  1.00  0.00           H  
+ATOM   3838 2HD1 LEU    21     -11.218   1.283   3.736  1.00  0.00           H  
+ATOM   3839 3HD1 LEU    21     -11.206  -0.419   4.196  1.00  0.00           H  
+ATOM   3840 1HD2 LEU    21      -7.762   1.802   3.845  1.00  0.00           H  
+ATOM   3841 2HD2 LEU    21      -8.934   1.287   2.631  1.00  0.00           H  
+ATOM   3842 3HD2 LEU    21      -9.339   2.586   3.753  1.00  0.00           H  
+ATOM   3843  N   THR    22      -9.553  -2.187   5.714  1.00  0.00           N  
+ATOM   3844  CA  THR    22     -10.545  -3.240   5.366  1.00  0.00           C  
+ATOM   3845  C   THR    22     -10.833  -4.094   6.603  1.00  0.00           C  
+ATOM   3846  O   THR    22     -11.971  -4.330   6.956  1.00  0.00           O  
+ATOM   3847  CB  THR    22      -9.981  -4.127   4.254  1.00  0.00           C  
+ATOM   3848  OG1 THR    22      -9.205  -3.333   3.367  1.00  0.00           O  
+ATOM   3849  CG2 THR    22     -11.131  -4.780   3.486  1.00  0.00           C  
+ATOM   3850  H   THR    22      -8.674  -2.178   5.278  1.00  0.00           H  
+ATOM   3851  HA  THR    22     -11.460  -2.776   5.028  1.00  0.00           H  
+ATOM   3852  HB  THR    22      -9.360  -4.895   4.686  1.00  0.00           H  
+ATOM   3853  HG1 THR    22      -9.788  -2.689   2.957  1.00  0.00           H  
+ATOM   3854 1HG2 THR    22     -12.072  -4.498   3.938  1.00  0.00           H  
+ATOM   3855 2HG2 THR    22     -11.111  -4.448   2.458  1.00  0.00           H  
+ATOM   3856 3HG2 THR    22     -11.022  -5.853   3.521  1.00  0.00           H  
+ATOM   3857  N   SER    23      -9.808  -4.554   7.269  1.00  0.00           N  
+ATOM   3858  CA  SER    23     -10.027  -5.387   8.484  1.00  0.00           C  
+ATOM   3859  C   SER    23     -10.629  -4.516   9.588  1.00  0.00           C  
+ATOM   3860  O   SER    23     -11.600  -4.882  10.221  1.00  0.00           O  
+ATOM   3861  CB  SER    23      -8.691  -5.963   8.958  1.00  0.00           C  
+ATOM   3862  OG  SER    23      -7.643  -5.071   8.602  1.00  0.00           O  
+ATOM   3863  H   SER    23      -8.897  -4.351   6.971  1.00  0.00           H  
+ATOM   3864  HA  SER    23     -10.706  -6.193   8.251  1.00  0.00           H  
+ATOM   3865 1HB  SER    23      -8.707  -6.083  10.028  1.00  0.00           H  
+ATOM   3866 2HB  SER    23      -8.532  -6.927   8.493  1.00  0.00           H  
+ATOM   3867  HG  SER    23      -6.978  -5.104   9.293  1.00  0.00           H  
+ATOM   3868  N   GLN    24     -10.062  -3.364   9.822  1.00  0.00           N  
+ATOM   3869  CA  GLN    24     -10.607  -2.470  10.882  1.00  0.00           C  
+ATOM   3870  C   GLN    24     -11.941  -1.881  10.414  1.00  0.00           C  
+ATOM   3871  O   GLN    24     -12.618  -1.192  11.151  1.00  0.00           O  
+ATOM   3872  CB  GLN    24      -9.615  -1.337  11.153  1.00  0.00           C  
+ATOM   3873  CG  GLN    24      -8.342  -1.906  11.782  1.00  0.00           C  
+ATOM   3874  CD  GLN    24      -8.711  -2.782  12.980  1.00  0.00           C  
+ATOM   3875  OE1 GLN    24      -8.958  -3.963  12.831  1.00  0.00           O  
+ATOM   3876  NE2 GLN    24      -8.758  -2.253  14.171  1.00  0.00           N  
+ATOM   3877  H   GLN    24      -9.282  -3.086   9.299  1.00  0.00           H  
+ATOM   3878  HA  GLN    24     -10.762  -3.039  11.787  1.00  0.00           H  
+ATOM   3879 1HB  GLN    24      -9.369  -0.844  10.223  1.00  0.00           H  
+ATOM   3880 2HB  GLN    24     -10.061  -0.623  11.830  1.00  0.00           H  
+ATOM   3881 1HG  GLN    24      -7.814  -2.499  11.050  1.00  0.00           H  
+ATOM   3882 2HG  GLN    24      -7.711  -1.096  12.113  1.00  0.00           H  
+ATOM   3883 1HE2 GLN    24      -8.992  -2.806  14.945  1.00  0.00           H  
+ATOM   3884 2HE2 GLN    24      -8.558  -1.301  14.292  1.00  0.00           H  
+ATOM   3885  N   ARG    25     -12.324  -2.146   9.194  1.00  0.00           N  
+ATOM   3886  CA  ARG    25     -13.613  -1.600   8.681  1.00  0.00           C  
+ATOM   3887  C   ARG    25     -14.293  -2.645   7.795  1.00  0.00           C  
+ATOM   3888  O   ARG    25     -15.267  -3.230   8.242  1.00  0.00           O  
+ATOM   3889  CB  ARG    25     -13.341  -0.336   7.862  1.00  0.00           C  
+ATOM   3890  CG  ARG    25     -14.572   0.571   7.899  1.00  0.00           C  
+ATOM   3891  CD  ARG    25     -14.146   1.999   8.248  1.00  0.00           C  
+ATOM   3892  NE  ARG    25     -15.342   2.888   8.251  1.00  0.00           N  
+ATOM   3893  CZ  ARG    25     -15.540   3.721   7.266  1.00  0.00           C  
+ATOM   3894  NH1 ARG    25     -15.476   3.299   6.033  1.00  0.00           N  
+ATOM   3895  NH2 ARG    25     -15.804   4.975   7.514  1.00  0.00           N  
+ATOM   3896  OXT ARG    25     -13.829  -2.842   6.683  1.00  0.00           O  
+ATOM   3897  H   ARG    25     -11.765  -2.704   8.614  1.00  0.00           H  
+ATOM   3898  HA  ARG    25     -14.257  -1.360   9.514  1.00  0.00           H  
+ATOM   3899 1HB  ARG    25     -12.494   0.188   8.279  1.00  0.00           H  
+ATOM   3900 2HB  ARG    25     -13.128  -0.609   6.840  1.00  0.00           H  
+ATOM   3901 1HG  ARG    25     -15.054   0.564   6.932  1.00  0.00           H  
+ATOM   3902 2HG  ARG    25     -15.263   0.211   8.647  1.00  0.00           H  
+ATOM   3903 1HD  ARG    25     -13.688   2.008   9.226  1.00  0.00           H  
+ATOM   3904 2HD  ARG    25     -13.436   2.352   7.515  1.00  0.00           H  
+ATOM   3905  HE  ARG    25     -15.981   2.847   8.993  1.00  0.00           H  
+ATOM   3906 1HH1 ARG    25     -15.274   2.338   5.844  1.00  0.00           H  
+ATOM   3907 2HH1 ARG    25     -15.628   3.936   5.278  1.00  0.00           H  
+ATOM   3908 1HH2 ARG    25     -15.855   5.298   8.459  1.00  0.00           H  
+ATOM   3909 2HH2 ARG    25     -15.953   5.613   6.759  1.00  0.00           H  
+TER    3910      ARG    25                                                      
+ENDMDL                                                                          
+MASTER       52    0    0    1    0    0    0    6 3900   10    0    2          
+END                                                                             

Added: trunk/packages/bioperl/branches/upstream/current/t/data/1A3I.pdb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/1A3I.pdb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/1A3I.pdb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,454 @@
+HEADER    EXTRACELLULAR MATRIX                    22-JAN-98   1A3I              
+TITLE     X-RAY CRYSTALLOGRAPHIC DETERMINATION OF A COLLAGEN-LIKE               
+TITLE    2 PEPTIDE WITH THE REPEATING SEQUENCE (PRO-PRO-GLY)                    
+COMPND    MOL_ID: 1;                                                            
+COMPND   2 MOLECULE: COLLAGEN-LIKE PEPTIDE;                                     
+COMPND   3 CHAIN: A, B, C;                                                      
+COMPND   4 ENGINEERED: YES                                                      
+SOURCE    MOL_ID: 1;                                                            
+SOURCE   2 SYNTHETIC: YES                                                       
+KEYWDS    COLLAGEN, EXTRACELLULAR MATRIX                                        
+EXPDTA    X-RAY DIFFRACTION                                                     
+AUTHOR    R.Z.KRAMER,L.VITAGLIANO,J.BELLA,R.BERISIO,L.MAZZARELLA,               
+AUTHOR   2 B.BRODSKY,A.ZAGARI,H.M.BERMAN                                        
+REVDAT   2   23-JUN-99 1A3I    1       JRNL                                     
+REVDAT   1   06-MAY-98 1A3I    0                                                
+JRNL        AUTH   R.Z.KRAMER,L.VITAGLIANO,J.BELLA,R.BERISIO,                   
+JRNL        AUTH 2 L.MAZZARELLA,B.BRODSKY,A.ZAGARI,H.M.BERMAN                   
+JRNL        TITL   X-RAY CRYSTALLOGRAPHIC DETERMINATION OF A                    
+JRNL        TITL 2 COLLAGEN-LIKE PEPTIDE WITH THE REPEATING SEQUENCE            
+JRNL        TITL 3 (PRO-PRO-GLY)                                                
+JRNL        REF    J.MOL.BIOL.                   V. 280   623 1998              
+JRNL        REFN   ASTM JMOBAK  UK ISSN 0022-2836                 0070          
+REMARK   1                                                                      
+REMARK   2                                                                      
+REMARK   2 RESOLUTION. 1.97 ANGSTROMS.                                          
+REMARK   3                                                                      
+REMARK   3 REFINEMENT.                                                          
+REMARK   3   PROGRAM     : X-PLOR 3.1                                           
+REMARK   3   AUTHORS     : BRUNGER                                              
+REMARK   3                                                                      
+REMARK   3  DATA USED IN REFINEMENT.                                            
+REMARK   3   RESOLUTION RANGE HIGH (ANGSTROMS) : 1.97                           
+REMARK   3   RESOLUTION RANGE LOW  (ANGSTROMS) : 8.                             
+REMARK   3   DATA CUTOFF            (SIGMA(F)) : 2.                             
+REMARK   3   DATA CUTOFF HIGH         (ABS(F)) : 1000000.                       
+REMARK   3   DATA CUTOFF LOW          (ABS(F)) : 0.1                            
+REMARK   3   COMPLETENESS (WORKING+TEST)   (%) : 76.7                           
+REMARK   3   NUMBER OF REFLECTIONS             : 861                            
+REMARK   3                                                                      
+REMARK   3  FIT TO DATA USED IN REFINEMENT.                                     
+REMARK   3   CROSS-VALIDATION METHOD          : NULL                            
+REMARK   3   FREE R VALUE TEST SET SELECTION  : NULL                            
+REMARK   3   R VALUE            (WORKING SET) : 0.181                           
+REMARK   3   FREE R VALUE                     : NULL                            
+REMARK   3   FREE R VALUE TEST SET SIZE   (%) : NULL                            
+REMARK   3   FREE R VALUE TEST SET COUNT      : NULL                            
+REMARK   3   ESTIMATED ERROR OF FREE R VALUE  : NULL                            
+REMARK   3                                                                      
+REMARK   3  FIT IN THE HIGHEST RESOLUTION BIN.                                  
+REMARK   3   TOTAL NUMBER OF BINS USED           : 10                           
+REMARK   3   BIN RESOLUTION RANGE HIGH       (A) : 1.97                         
+REMARK   3   BIN RESOLUTION RANGE LOW        (A) : 2.04                         
+REMARK   3   BIN COMPLETENESS (WORKING+TEST) (%) : 63.3                         
+REMARK   3   REFLECTIONS IN BIN    (WORKING SET) : 69.                          
+REMARK   3   BIN R VALUE           (WORKING SET) : 0.22                         
+REMARK   3   BIN FREE R VALUE                    : NULL                         
+REMARK   3   BIN FREE R VALUE TEST SET SIZE  (%) : NULL                         
+REMARK   3   BIN FREE R VALUE TEST SET COUNT     : NULL                         
+REMARK   3   ESTIMATED ERROR OF BIN FREE R VALUE : NULL                         
+REMARK   3                                                                      
+REMARK   3  NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.                    
+REMARK   3   PROTEIN ATOMS            : 126                                     
+REMARK   3   NUCLEIC ACID ATOMS       : 0                                       
+REMARK   3   HETEROGEN ATOMS          : 0                                       
+REMARK   3   SOLVENT ATOMS            : 45                                      
+REMARK   3                                                                      
+REMARK   3  B VALUES.                                                           
+REMARK   3   FROM WILSON PLOT           (A**2) : NULL                           
+REMARK   3   MEAN B VALUE      (OVERALL, A**2) : 15.9                           
+REMARK   3   OVERALL ANISOTROPIC B VALUE.                                       
+REMARK   3    B11 (A**2) : NULL                                                 
+REMARK   3    B22 (A**2) : NULL                                                 
+REMARK   3    B33 (A**2) : NULL                                                 
+REMARK   3    B12 (A**2) : NULL                                                 
+REMARK   3    B13 (A**2) : NULL                                                 
+REMARK   3    B23 (A**2) : NULL                                                 
+REMARK   3                                                                      
+REMARK   3  ESTIMATED COORDINATE ERROR.                                         
+REMARK   3   ESD FROM LUZZATI PLOT        (A) : NULL                            
+REMARK   3   ESD FROM SIGMAA              (A) : NULL                            
+REMARK   3   LOW RESOLUTION CUTOFF        (A) : NULL                            
+REMARK   3                                                                      
+REMARK   3  CROSS-VALIDATED ESTIMATED COORDINATE ERROR.                         
+REMARK   3   ESD FROM C-V LUZZATI PLOT    (A) : NULL                            
+REMARK   3   ESD FROM C-V SIGMAA          (A) : NULL                            
+REMARK   3                                                                      
+REMARK   3  RMS DEVIATIONS FROM IDEAL VALUES.                                   
+REMARK   3   BOND LENGTHS                 (A) : 0.01                            
+REMARK   3   BOND ANGLES            (DEGREES) : 2.07                            
+REMARK   3   DIHEDRAL ANGLES        (DEGREES) : NULL                            
+REMARK   3   IMPROPER ANGLES        (DEGREES) : 2.11                            
+REMARK   3                                                                      
+REMARK   3  ISOTROPIC THERMAL MODEL : GROUP                                     
+REMARK   3                                                                      
+REMARK   3  ISOTROPIC THERMAL FACTOR RESTRAINTS.    RMS    SIGMA                
+REMARK   3   MAIN-CHAIN BOND              (A**2) : NULL  ; NULL                 
+REMARK   3   MAIN-CHAIN ANGLE             (A**2) : NULL  ; NULL                 
+REMARK   3   SIDE-CHAIN BOND              (A**2) : NULL  ; NULL                 
+REMARK   3   SIDE-CHAIN ANGLE             (A**2) : NULL  ; NULL                 
+REMARK   3                                                                      
+REMARK   3  NCS MODEL : NONE                                                    
+REMARK   3                                                                      
+REMARK   3  NCS RESTRAINTS.                         RMS   SIGMA/WEIGHT          
+REMARK   3   GROUP  1  POSITIONAL            (A) : NULL  ; NULL                 
+REMARK   3   GROUP  1  B-FACTOR           (A**2) : NULL  ; NULL                 
+REMARK   3                                                                      
+REMARK   3  PARAMETER FILE  1  : PARHCSDX.PRO                                   
+REMARK   3  PARAMETER FILE  2  : NULL                                           
+REMARK   3  TOPOLOGY FILE  1   : TOPHCSDX.PRO                                   
+REMARK   3  TOPOLOGY FILE  2   : NULL                                           
+REMARK   3                                                                      
+REMARK   3  OTHER REFINEMENT REMARKS: DUE TO THE QUASI-INFINITE,                
+REMARK   3  AVERAGED NATURE OF THE TRIPLE HELIX, DURING REFINEMENT              
+REMARK   3  COVALENT BONDS ARE NECESSARY TO JOIN THE MOLECULE WITH              
+REMARK   3  ITS SYMMETRY MATES BOTH ABOVE IT AND BELOW IT ALONG THE             
+REMARK   3  HELICAL AXIS AND TIGHT REFINEMENT CONSTRAINTS WERE                  
+REMARK   3  MAINTAINED.                                                         
+REMARK   4                                                                      
+REMARK   4 1A3I COMPLIES WITH FORMAT V. 2.2, 16-DEC-1996                        
+REMARK   6                                                                      
+REMARK   6 THE 21 RESIDUE ASYMMETRIC UNIT CORRESPONDS TO ONE                    
+REMARK   6 TRIPLE-HELICAL REPEAT AND IS SMALLER THAN THE ENTIRE 90              
+REMARK   6 RESIDUE PEPTIDE DUE TO TRANSLATIONAL DISORDER ALONG THE              
+REMARK   6 HELICAL AXIS.  THE RESULT IS A POLYMER-LIKE STRUCTURE WITH           
+REMARK   6 NO DEFINED ENDS.                                                     
+REMARK   7                                                                      
+REMARK   7 THE POLYMER STRUCTURE IS FORMED BY CONTINUATION OF THE               
+REMARK   7 CHAINS USING THE SYMMETRY-RELATED MOLECULES ALONG THE                
+REMARK   7 HELICAL AXIS.  THE TVECT RECORD BELOW PRESENTS THE                   
+REMARK   7 TRANSLATION THAT WILL GENERATE THE POLYMER.  NOTE:                   
+REMARK   7 THEREFORE, CLOSE CONTACTS BETWEEN SYMMETRY-RELATED                   
+REMARK   7 MOLECULES ARE INTENTIONAL AND NECESSARY.  INTERCHAIN                 
+REMARK   7 HYDROGEN BONDING AT THE END OF CHAINS ALSO UTILIZES                  
+REMARK   7 SYMMETRY-RELATED MOLECULES.                                          
+REMARK   8                                                                      
+REMARK   8 THE ENTIRE 30 RESIDUE LONG PEPTIDE CAN BE GENERATED FROM             
+REMARK   8 THE SUBMITTED ASYMMETRIC UNIT BY APPLYING THE FOLLOWING              
+REMARK   8 TRANSLATIONS (USING FRACTIONAL COORDINATES):                         
+REMARK   8 CHAIN A: TRANSLATE RESIDUES 1 - 9 BY (0 0 1), (0 0 2), AND           
+REMARK   8          (0 0 3) AND RESIDUES 7 - 9 BY (0 0 4).                      
+REMARK   8 CHAIN B: TRANSLATE RESIDUES 31 - 36 BY (0 0 1), (0 0 2),             
+REMARK   8          AND (0 0 3).                                                
+REMARK   8 CHAIN C: TRANSLATE RESIDUES 61 - 66 BY (0 0 1), (0 0 2),             
+REMARK   8          AND (0 0 3) AND RESIDUES 64 - 66 BY (004).                  
+REMARK   8 THIS WILL RESULT IN A MOLECULE WITH A TOTAL OF 90 RESIDUES,          
+REMARK   8 30 IN EACH CHAIN.                                                    
+REMARK   9                                                                      
+REMARK   9 HYDROGEN BONDS BETWEEN PEPTIDE CHAINS FOLLOW THE RICH AND            
+REMARK   9 CRICK MODEL II FOR COLLAGEN.                                         
+REMARK  10                                                                      
+REMARK  10 THE UNIT CELL AXES WERE CHOSEN TO COINCIDE WITH A                    
+REMARK  10 PREVIOUS STRUCTURE DETERMINATION (OKUYAMA 1981) OF THIS              
+REMARK  10 PEPTIDE.                                                             
+REMARK  11                                                                      
+REMARK  11 FOR EACH CHAIN, RESIDUE NUMBERING CORRESPONDS TO THE ENTIRE          
+REMARK  11 MOLECULE RATHER THAN THE SHORTER ASYMMETRIC UNIT.                    
+REMARK 200                                                                      
+REMARK 200 EXPERIMENTAL DETAILS                                                 
+REMARK 200  EXPERIMENT TYPE                : X-RAY DIFFRACTION                  
+REMARK 200  DATE OF DATA COLLECTION        : OCT-1991                           
+REMARK 200  TEMPERATURE           (KELVIN) : 259.0                              
+REMARK 200  PH                             : NULL                               
+REMARK 200  NUMBER OF CRYSTALS USED        : 1                                  
+REMARK 200                                                                      
+REMARK 200  SYNCHROTRON              (Y/N) : N                                  
+REMARK 200  RADIATION SOURCE               : NULL                               
+REMARK 200  BEAMLINE                       : NULL                               
+REMARK 200  X-RAY GENERATOR MODEL          : NULL                               
+REMARK 200  MONOCHROMATIC OR LAUE    (M/L) : M                                  
+REMARK 200  WAVELENGTH OR RANGE        (A) : 1.542                              
+REMARK 200  MONOCHROMATOR                  : NULL                               
+REMARK 200  OPTICS                         : NULL                               
+REMARK 200                                                                      
+REMARK 200  DETECTOR TYPE                  : CAD4 DIFFRACTOMETER                
+REMARK 200  DETECTOR MANUFACTURER          : ENRAF-NONIUS                       
+REMARK 200  INTENSITY-INTEGRATION SOFTWARE : MOLEN                              
+REMARK 200  DATA SCALING SOFTWARE          : NULL                               
+REMARK 200                                                                      
+REMARK 200  NUMBER OF UNIQUE REFLECTIONS   : 1136                               
+REMARK 200  RESOLUTION RANGE HIGH      (A) : 1.97                               
+REMARK 200  RESOLUTION RANGE LOW       (A) : INFINITY                           
+REMARK 200  REJECTION CRITERIA  (SIGMA(I)) : 0.                                 
+REMARK 200                                                                      
+REMARK 200 OVERALL.                                                             
+REMARK 200  COMPLETENESS FOR RANGE     (%) : 99.5                               
+REMARK 200  DATA REDUNDANCY                : NULL                               
+REMARK 200  R MERGE                    (I) : NULL                               
+REMARK 200  R SYM                      (I) : NULL                               
+REMARK 200  <I/SIGMA(I)> FOR THE DATA SET  : NULL                               
+REMARK 200                                                                      
+REMARK 200 IN THE HIGHEST RESOLUTION SHELL.                                     
+REMARK 200  HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : 1.97                     
+REMARK 200  HIGHEST RESOLUTION SHELL, RANGE LOW  (A) : 2.2                      
+REMARK 200  COMPLETENESS FOR SHELL     (%) : 98.                                
+REMARK 200  DATA REDUNDANCY IN SHELL       : NULL                               
+REMARK 200  R MERGE FOR SHELL          (I) : NULL                               
+REMARK 200  R SYM FOR SHELL            (I) : NULL                               
+REMARK 200  <I/SIGMA(I)> FOR SHELL         : NULL                               
+REMARK 200                                                                      
+REMARK 200 METHOD USED TO DETERMINE THE STRUCTURE: MOLECULAR                    
+REMARK 200    REPLACEMENT                                                       
+REMARK 200 SOFTWARE USED: LALS                                                  
+REMARK 200 STARTING MODEL: IDEALIZED SEVEN-FOLD TRIPLE-HELIX                    
+REMARK 200                                                                      
+REMARK 200 REMARK: NULL                                                         
+REMARK 280                                                                      
+REMARK 280 CRYSTAL                                                              
+REMARK 280 SOLVENT CONTENT, VS   (%): NULL                                      
+REMARK 280 MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): NULL                     
+REMARK 280                                                                      
+REMARK 280 CRYSTALLIZATION CONDITIONS: PEPTIDE WAS CRYSTALLIZED FROM            
+REMARK 280 4.0 MG/ML PEPTIDE IN 10% ACETIC ACID, 0.1% SODIUM AZIDE,             
+REMARK 280 AND 3.0% PEG400.                                                     
+REMARK 290                                                                      
+REMARK 290 CRYSTALLOGRAPHIC SYMMETRY                                            
+REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: P 21 21 21                       
+REMARK 290                                                                      
+REMARK 290      SYMOP   SYMMETRY                                                
+REMARK 290     NNNMMM   OPERATOR                                                
+REMARK 290       1555   X,Y,Z                                                   
+REMARK 290       2555   1/2-X,-Y,1/2+Z                                          
+REMARK 290       3555   -X,1/2+Y,1/2-Z                                          
+REMARK 290       4555   1/2+X,1/2-Y,-Z                                          
+REMARK 290                                                                      
+REMARK 290     WHERE NNN -> OPERATOR NUMBER                                     
+REMARK 290           MMM -> TRANSLATION VECTOR                                  
+REMARK 290                                                                      
+REMARK 290 CRYSTALLOGRAPHIC SYMMETRY TRANSFORMATIONS                            
+REMARK 290 THE FOLLOWING TRANSFORMATIONS OPERATE ON THE ATOM/HETATM             
+REMARK 290 RECORDS IN THIS ENTRY TO PRODUCE CRYSTALLOGRAPHICALLY                
+REMARK 290 RELATED MOLECULES.                                                   
+REMARK 290   SMTRY1   1  1.000000  0.000000  0.000000        0.00000            
+REMARK 290   SMTRY2   1  0.000000  1.000000  0.000000        0.00000            
+REMARK 290   SMTRY3   1  0.000000  0.000000  1.000000        0.00000            
+REMARK 290   SMTRY1   2 -1.000000  0.000000  0.000000       13.40986            
+REMARK 290   SMTRY2   2  0.000000 -1.000000  0.000000        0.00000            
+REMARK 290   SMTRY3   2  0.000000  0.000000  1.000000       10.09000            
+REMARK 290   SMTRY1   3 -1.000000  0.000000  0.000000        0.00000            
+REMARK 290   SMTRY2   3  0.000000  1.000000  0.000000       13.14510            
+REMARK 290   SMTRY3   3  0.000000  0.000000 -1.000000       10.09000            
+REMARK 290   SMTRY1   4  1.000000  0.000000  0.000000       13.40986            
+REMARK 290   SMTRY2   4  0.000000 -1.000000  0.000000       13.14510            
+REMARK 290   SMTRY3   4  0.000000  0.000000 -1.000000        0.00000            
+REMARK 290                                                                      
+REMARK 290 REMARK: NULL                                                         
+REMARK 900                                                                      
+REMARK 900 RELATED ENTRIES                                                      
+REMARK 900 THIS ENTRY IS RELATED TO PDB ENTRY 1A3J.                             
+DBREF  1A3I A    1     9  PDB    1A3I     1A3I             1      9             
+DBREF  1A3I B   31    36  PDB    1A3I     1A3I            31     36             
+DBREF  1A3I C   61    66  PDB    1A3I     1A3I            61     66             
+SEQRES   1 A    9  PRO PRO GLY PRO PRO GLY PRO PRO GLY                          
+SEQRES   1 B    6  PRO PRO GLY PRO PRO GLY                                      
+SEQRES   1 C    6  PRO PRO GLY PRO PRO GLY                                      
+HET    ACY    401       4                                                       
+HET    ACY    402       4                                                       
+HETNAM     ACY ACETIC ACID                                                      
+FORMUL   4  ACY    2(C2 H4 O2)                                                  
+FORMUL   5  HOH   *37(H2 O1)                                                    
+LINK         N   PRO C  61                 C   GLY A   9            1556        
+LINK         N   PRO A   1                 C   GLY B  36            1556        
+LINK         N   PRO B  31                 C   GLY C  66            1556        
+CRYST1   26.820   26.290   20.180  90.00  90.00  90.00 P 21 21 21    4          
+ORIGX1      1.000000  0.000000  0.000000        0.00000                         
+ORIGX2      0.000000  1.000000  0.000000        0.00000                         
+ORIGX3      0.000000  0.000000  1.000000        0.00000                         
+SCALE1      0.037286  0.000000  0.000000        0.00000                         
+SCALE2      0.000000  0.038037  0.000000        0.00000                         
+SCALE3      0.000000  0.000000  0.049554        0.00000                         
+TVECT    1   0.00000   0.00000  20.18000                                        
+ATOM      1  N   PRO A   1       8.316  21.206  21.530  1.00 17.44           N  
+ATOM      2  CA  PRO A   1       7.608  20.729  20.336  1.00 17.44           C  
+ATOM      3  C   PRO A   1       8.487  20.707  19.092  1.00 17.44           C  
+ATOM      4  O   PRO A   1       9.466  21.457  19.005  1.00 17.44           O  
+ATOM      5  CB  PRO A   1       6.460  21.723  20.211  1.00 22.26           C  
+ATOM      6  CG  PRO A   1       7.110  23.002  20.661  1.00 22.26           C  
+ATOM      7  CD  PRO A   1       7.873  22.569  21.889  1.00 22.26           C  
+ATOM      8  N   PRO A   2       8.177  19.849  18.107  1.00 13.49           N  
+ATOM      9  CA  PRO A   2       9.057  19.896  16.936  1.00 13.49           C  
+ATOM     10  C   PRO A   2       9.087  21.308  16.377  1.00 13.49           C  
+ATOM     11  O   PRO A   2       8.318  22.186  16.816  1.00 13.49           O  
+ATOM     12  CB  PRO A   2       8.426  18.899  15.970  1.00 18.26           C  
+ATOM     13  CG  PRO A   2       7.776  17.914  16.886  1.00 18.26           C  
+ATOM     14  CD  PRO A   2       7.166  18.797  17.954  1.00 18.26           C  
+ATOM     15  N   GLY A   3       9.981  21.517  15.426  1.00 12.07           N  
+ATOM     16  CA  GLY A   3      10.060  22.799  14.777  1.00 12.07           C  
+ATOM     17  C   GLY A   3       9.119  22.720  13.589  1.00 12.07           C  
+ATOM     18  O   GLY A   3       8.531  21.655  13.334  1.00 12.07           O  
+ATOM     19  N   PRO A   4       8.954  23.832  12.847  1.00 14.22           N  
+ATOM     20  CA  PRO A   4       8.095  23.907  11.675  1.00 14.22           C  
+ATOM     21  C   PRO A   4       8.733  23.310  10.437  1.00 14.22           C  
+ATOM     22  O   PRO A   4       9.963  23.078  10.402  1.00 14.22           O  
+ATOM     23  CB  PRO A   4       7.863  25.403  11.527  1.00  8.61           C  
+ATOM     24  CG  PRO A   4       9.218  25.934  11.895  1.00  8.61           C  
+ATOM     25  CD  PRO A   4       9.530  25.158  13.143  1.00  8.61           C  
+ATOM     26  N   PRO A   5       7.918  23.027   9.412  1.00 14.03           N  
+ATOM     27  CA  PRO A   5       8.416  22.493   8.135  1.00 14.03           C  
+ATOM     28  C   PRO A   5       9.621  23.306   7.672  1.00 14.03           C  
+ATOM     29  O   PRO A   5       9.876  24.379   8.181  1.00 14.03           O  
+ATOM     30  CB  PRO A   5       7.229  22.641   7.193  1.00 10.90           C  
+ATOM     31  CG  PRO A   5       6.082  22.382   8.123  1.00 10.90           C  
+ATOM     32  CD  PRO A   5       6.452  23.069   9.415  1.00 10.90           C  
+ATOM     33  N   GLY A   6      10.369  22.765   6.725  1.00 10.80           N  
+ATOM     34  CA  GLY A   6      11.517  23.464   6.204  1.00 10.80           C  
+ATOM     35  C   GLY A   6      11.139  24.138   4.928  1.00 10.80           C  
+ATOM     36  O   GLY A   6      10.028  23.948   4.418  1.00 10.80           O  
+ATOM     37  N   PRO A   7      12.050  24.922   4.355  1.00 11.55           N  
+ATOM     38  CA  PRO A   7      11.797  25.641   3.111  1.00 11.55           C  
+ATOM     39  C   PRO A   7      11.484  24.709   1.970  1.00 11.55           C  
+ATOM     40  O   PRO A   7      11.783  23.500   2.038  1.00 11.55           O  
+ATOM     41  CB  PRO A   7      13.091  26.440   2.901  1.00 10.61           C  
+ATOM     42  CG  PRO A   7      14.119  25.540   3.516  1.00 10.61           C  
+ATOM     43  CD  PRO A   7      13.456  25.102   4.796  1.00 10.61           C  
+ATOM     44  N   PRO A   8      10.853  25.225   0.900  1.00 14.31           N  
+ATOM     45  CA  PRO A   8      10.571  24.322  -0.220  1.00 14.31           C  
+ATOM     46  C   PRO A   8      11.897  23.973  -0.901  1.00 14.31           C  
+ATOM     47  O   PRO A   8      12.973  24.425  -0.473  1.00 14.31           O  
+ATOM     48  CB  PRO A   8       9.623  25.131  -1.124  1.00 16.14           C  
+ATOM     49  CG  PRO A   8       9.080  26.188  -0.206  1.00 16.14           C  
+ATOM     50  CD  PRO A   8      10.279  26.556   0.633  1.00 16.14           C  
+ATOM     51  N   GLY A   9      11.816  23.125  -1.918  1.00 11.65           N  
+ATOM     52  CA  GLY A   9      12.996  22.720  -2.639  1.00 11.65           C  
+ATOM     53  C   GLY A   9      13.234  23.577  -3.852  1.00 11.65           C  
+ATOM     54  O   GLY A   9      12.433  24.459  -4.180  1.00 11.65           O  
+TER      55      GLY A   9                                                      
+ATOM     56  N   PRO B  31      12.731  18.403  18.599  1.00  8.71           N  
+ATOM     57  CA  PRO B  31      13.374  17.891  17.389  1.00  8.71           C  
+ATOM     58  C   PRO B  31      13.142  18.745  16.166  1.00  8.71           C  
+ATOM     59  O   PRO B  31      12.207  19.526  16.121  1.00  8.71           O  
+ATOM     60  CB  PRO B  31      12.784  16.471  17.255  1.00 15.38           C  
+ATOM     61  CG  PRO B  31      11.434  16.598  17.873  1.00 15.38           C  
+ATOM     62  CD  PRO B  31      11.660  17.490  19.065  1.00 15.38           C  
+ATOM     63  N   PRO B  32      14.011  18.647  15.178  1.00 13.67           N  
+ATOM     64  CA  PRO B  32      13.833  19.426  13.967  1.00 13.67           C  
+ATOM     65  C   PRO B  32      12.523  19.099  13.336  1.00 13.67           C  
+ATOM     66  O   PRO B  32      12.038  17.968  13.473  1.00 13.67           O  
+ATOM     67  CB  PRO B  32      15.025  19.023  13.125  1.00  9.03           C  
+ATOM     68  CG  PRO B  32      16.077  18.799  14.163  1.00  9.03           C  
+ATOM     69  CD  PRO B  32      15.320  17.975  15.152  1.00  9.03           C  
+ATOM     70  N   GLY B  33      11.928  20.070  12.640  1.00 14.26           N  
+ATOM     71  CA  GLY B  33      10.661  19.801  12.008  1.00 14.26           C  
+ATOM     72  C   GLY B  33      10.802  18.911  10.797  1.00 14.26           C  
+ATOM     73  O   GLY B  33      11.882  18.368  10.516  1.00 14.26           O  
+ATOM     74  N   PRO B  34       9.708  18.736  10.046  1.00 11.50           N  
+ATOM     75  CA  PRO B  34       9.665  17.922   8.836  1.00 11.50           C  
+ATOM     76  C   PRO B  34      10.227  18.620   7.654  1.00 11.50           C  
+ATOM     77  O   PRO B  34      10.284  19.836   7.639  1.00 11.50           O  
+ATOM     78  CB  PRO B  34       8.192  17.649   8.657  1.00  9.66           C  
+ATOM     79  CG  PRO B  34       7.586  18.930   9.132  1.00  9.66           C  
+ATOM     80  CD  PRO B  34       8.350  19.245  10.373  1.00  9.66           C  
+ATOM     81  N   PRO B  35      10.677  17.860   6.651  1.00 16.25           N  
+ATOM     82  CA  PRO B  35      11.228  18.392   5.413  1.00 16.25           C  
+ATOM     83  C   PRO B  35      10.219  19.317   4.794  1.00 16.25           C  
+ATOM     84  O   PRO B  35       9.010  19.249   5.127  1.00 16.25           O  
+ATOM     85  CB  PRO B  35      11.468  17.160   4.542  1.00 10.89           C  
+ATOM     86  CG  PRO B  35      11.668  16.066   5.531  1.00 10.89           C  
+ATOM     87  CD  PRO B  35      10.798  16.385   6.703  1.00 10.89           C  
+ATOM     88  N   GLY B  36      10.709  20.223   3.948  1.00 19.34           N  
+ATOM     89  CA  GLY B  36       9.836  21.154   3.260  1.00 19.34           C  
+ATOM     90  C   GLY B  36       9.207  20.491   2.050  1.00 19.34           C  
+ATOM     91  O   GLY B  36       9.565  19.355   1.710  1.00 19.34           O  
+TER      92      GLY B  36                                                      
+ATOM     93  N   PRO C  61      14.309  23.332  15.622  1.00 10.63           N  
+ATOM     94  CA  PRO C  61      14.643  24.095  14.422  1.00 10.63           C  
+ATOM     95  C   PRO C  61      13.859  23.751  13.161  1.00 10.63           C  
+ATOM     96  O   PRO C  61      13.189  22.726  13.091  1.00 10.63           O  
+ATOM     97  CB  PRO C  61      16.144  23.846  14.270  1.00 19.41           C  
+ATOM     98  CG  PRO C  61      16.308  22.475  14.820  1.00 19.41           C  
+ATOM     99  CD  PRO C  61      15.451  22.510  16.056  1.00 19.41           C  
+ATOM    100  N   PRO C  62      13.910  24.631  12.163  1.00 15.75           N  
+ATOM    101  CA  PRO C  62      13.234  24.402  10.876  1.00 15.75           C  
+ATOM    102  C   PRO C  62      13.686  23.039  10.262  1.00 15.75           C  
+ATOM    103  O   PRO C  62      14.874  22.653  10.369  1.00 15.75           O  
+ATOM    104  CB  PRO C  62      13.668  25.600  10.024  1.00  9.87           C  
+ATOM    105  CG  PRO C  62      13.859  26.675  11.052  1.00  9.87           C  
+ATOM    106  CD  PRO C  62      14.515  25.976  12.213  1.00  9.87           C  
+ATOM    107  N   GLY C  63      12.761  22.316   9.639  1.00 16.50           N  
+ATOM    108  CA  GLY C  63      13.167  21.066   9.028  1.00 16.50           C  
+ATOM    109  C   GLY C  63      14.105  21.312   7.849  1.00 16.50           C  
+ATOM    110  O   GLY C  63      14.435  22.472   7.518  1.00 16.50           O  
+ATOM    111  N   PRO C  64      14.571  20.246   7.184  1.00 15.53           N  
+ATOM    112  CA  PRO C  64      15.458  20.417   6.037  1.00 15.53           C  
+ATOM    113  C   PRO C  64      14.684  20.927   4.823  1.00 15.53           C  
+ATOM    114  O   PRO C  64      13.463  20.829   4.772  1.00 15.53           O  
+ATOM    115  CB  PRO C  64      16.001  19.018   5.816  1.00  5.63           C  
+ATOM    116  CG  PRO C  64      14.806  18.201   6.163  1.00  5.63           C  
+ATOM    117  CD  PRO C  64      14.354  18.810   7.450  1.00  5.63           C  
+ATOM    118  N   PRO C  65      15.378  21.485   3.840  1.00 10.50           N  
+ATOM    119  CA  PRO C  65      14.622  21.927   2.689  1.00 10.50           C  
+ATOM    120  C   PRO C  65      14.079  20.706   1.955  1.00 10.50           C  
+ATOM    121  O   PRO C  65      14.505  19.531   2.221  1.00 10.50           O  
+ATOM    122  CB  PRO C  65      15.615  22.734   1.892  1.00 11.55           C  
+ATOM    123  CG  PRO C  65      16.951  22.189   2.279  1.00 11.55           C  
+ATOM    124  CD  PRO C  65      16.808  21.850   3.742  1.00 11.55           C  
+ATOM    125  N   GLY C  66      13.102  20.952   1.076  1.00  9.96           N  
+ATOM    126  CA  GLY C  66      12.479  19.874   0.334  1.00  9.96           C  
+ATOM    127  C   GLY C  66      13.226  19.430  -0.900  1.00  9.96           C  
+ATOM    128  O   GLY C  66      14.206  20.054  -1.286  1.00  9.96           O  
+TER     129      GLY C  66                                                      
+HETATM  130  C   ACY   401       3.682  22.541  11.236  1.00 21.19           C  
+HETATM  131  O   ACY   401       2.807  23.097  10.553  1.00 21.19           O  
+HETATM  132  OXT ACY   401       4.306  23.101  12.291  1.00 21.19           O  
+HETATM  133  CH3 ACY   401       4.134  21.141  10.915  1.00 21.19           C  
+HETATM  134  C   ACY   402      19.091  22.160   7.837  1.00 38.32           C  
+HETATM  135  O   ACY   402      19.334  21.755   6.694  1.00 38.32           O  
+HETATM  136  OXT ACY   402      18.633  21.338   8.768  1.00 38.32           O  
+HETATM  137  CH3 ACY   402      19.293  23.618   8.266  1.00 38.32           C  
+HETATM  138  O   HOH   101       5.594  21.889  15.805  1.00 28.97           O  
+HETATM  139  O   HOH   102       7.355  25.253  16.293  1.00 22.47           O  
+HETATM  140  O   HOH   103       6.457  20.993  12.327  1.00 29.02           O  
+HETATM  141  O   HOH   104       7.423  26.270   8.119  1.00 30.44           O  
+HETATM  142  O   HOH   105      11.373  26.588   6.950  1.00 23.32           O  
+HETATM  143  O   HOH   106       7.408  25.371   4.091  1.00  5.05           O  
+HETATM  144  O   HOH   107      13.153  27.296  -0.172  1.00 21.02           O  
+HETATM  145  O   HOH   108      15.664  24.810  -1.390  1.00 11.17           O  
+HETATM  146  O   HOH   109      12.152  26.921  -4.540  1.00  9.23           O  
+HETATM  147  O   HOH   111      13.443  15.615  12.881  1.00 13.59           O  
+HETATM  148  O   HOH   112       9.758  16.818  12.994  1.00 27.99           O  
+HETATM  149  O   HOH   113      12.963  16.325   9.228  1.00 23.25           O  
+HETATM  150  O   HOH   115       5.878  20.610   4.485  1.00 24.12           O  
+HETATM  151  O   HOH   116       8.848  16.145   2.532  1.00 47.48           O  
+HETATM  152  O   HOH   121      18.638  24.296  11.397  1.00 27.03           O  
+HETATM  153  O   HOH   122      16.444  20.463  10.447  1.00 25.94           O  
+HETATM  154  O   HOH   123      16.295  24.230   6.930  1.00 19.70           O  
+HETATM  155  O   HOH   124      17.331  17.619   2.413  1.00 31.73           O  
+HETATM  156  O   HOH   125      13.985  16.977   0.817  1.00 53.43           O  
+HETATM  157  O   HOH   126      16.914  19.363  -0.910  1.00  9.43           O  
+HETATM  158  O   HOH   201       5.473  23.666   4.098  1.00 14.40           O  
+HETATM  159  O   HOH   202      21.595  24.992   9.644  1.00 17.29           O  
+HETATM  160  O   HOH   204       4.187  25.027   5.974  1.00  7.43           O  
+HETATM  161  O   HOH   205      13.466  14.688   2.583  1.00  7.90           O  
+HETATM  162  O   HOH   208      16.622  16.376  -0.560  1.00 22.05           O  
+HETATM  163  O   HOH   209      17.417  23.063  -0.970  1.00 23.97           O  
+HETATM  164  O   HOH   210      17.034  27.356   9.718  1.00 20.07           O  
+HETATM  165  O   HOH   218       3.280  23.836   8.294  1.00 17.32           O  
+HETATM  166  O   HOH   220      16.159  26.500  -3.605  1.00 26.00           O  
+HETATM  167  O   HOH   241      17.060  26.756   1.465  1.00 16.81           O  
+HETATM  168  O   HOH   242       6.144  15.504   5.728  1.00 23.11           O  
+HETATM  169  O   HOH   243      18.532  26.812  12.585  1.00 13.40           O  
+HETATM  170  O   HOH   244       9.783  28.384   5.130  1.00 21.62           O  
+HETATM  171  O   HOH   245      10.552  28.648  13.053  1.00 23.05           O  
+HETATM  172  O   HOH   246       9.269  14.976  15.672  1.00 28.60           O  
+HETATM  173  O   HOH   247       6.015  15.569  15.283  1.00 26.87           O  
+HETATM  174  O   HOH   248      20.598  27.021   8.450  1.00 22.69           O  
+CONECT  130  131  132  133                                                      
+CONECT  131  130                                                                
+CONECT  132  130                                                                
+CONECT  133  130                                                                
+CONECT  134  135  136  137                                                      
+CONECT  135  134                                                                
+CONECT  136  134                                                                
+CONECT  137  134                                                                
+MASTER      226    0    2    0    0    0    0    6  171    3    8    3          
+END                                                                             

Added: trunk/packages/bioperl/branches/upstream/current/t/data/1BPT.pdb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/1BPT.pdb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/1BPT.pdb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,602 @@
+HEADER    PROTEINASE INHIBITOR (TRYPSIN)          11-DEC-91   1BPT      1BPT   2
+COMPND    BOVINE PANCREATIC TRYPSIN INHIBITOR (/BPTI$) MUTANT (TYR 23   1BPT   3
+COMPND   2 REPLACED BY ALA) (/Y23A$)                                    1BPT   4
+SOURCE    BOVINE (BOS $TAURUS) PANCREAS                                 1BPT   5
+AUTHOR    D.HOUSSET,A.WLODAWER,F.TAO,J.FUCHS,C.WOODWARD                 1BPT   6
+REVDAT   1   15-JAN-93 1BPT    0                                        1BPT   7
+JRNL        AUTH   A.T.DANISHEFSKY,D.HOUSSET,K.-S.KIM,F.TAO,J.FUCHS,    1BPT   8
+JRNL        AUTH 2 C.WOODWARD,A.WLODAWER                                1BPT   9
+JRNL        TITL   CREVICE-FORMING MUTANTS OF BPTI: CRYSTAL STRUCTURES  1BPT  10
+JRNL        TITL 2 OF F22A, Y23A, N43G, AND F45A                        1BPT  11
+JRNL        REF    TO BE PUBLISHED                                      1BPT  12
+JRNL        REFN                                                   353  1BPT  13
+REMARK   1                                                              1BPT  14
+REMARK   1 REFERENCE 1                                                  1BPT  15
+REMARK   1  AUTH   K.S.KIM,F.TAO,J.FUCHS,A.T.DANISHEFSKY,D.HOUSSET,     1BPT  16
+REMARK   1  AUTH 2 A.WLODAWER,C.WOODWARD                                1BPT  17
+REMARK   1  TITL   CREVICE-FORMING MUTANTS OF BPTI: STABILITY CHANGES   1BPT  18
+REMARK   1  TITL 2 AND NEW HYDROPHOBIC SURFACE                          1BPT  19
+REMARK   1  REF    TO BE PUBLISHED                                      1BPT  20
+REMARK   1  REFN                                                   353  1BPT  21
+REMARK   1 REFERENCE 2                                                  1BPT  22
+REMARK   1  AUTH   D.HOUSSET,K.-*S.KIM,J.FUCHS,C.WOODWARD,A.WLODAWER    1BPT  23
+REMARK   1  TITL   CRYSTAL STRUCTURE OF A /Y35G$ MUTANT OF BOVINE       1BPT  24
+REMARK   1  TITL 2 PANCREATIC TRYPSIN INHIBITOR                         1BPT  25
+REMARK   1  REF    J.MOL.BIOL.                   V. 220   757 1991      1BPT  26
+REMARK   1  REFN   ASTM JMOBAK  UK ISSN 0022-2836                  070  1BPT  27
+REMARK   1 REFERENCE 3                                                  1BPT  28
+REMARK   1  AUTH   C.EIGENBROT,M.RANDAL,A.A.KOSSIAKOFF                  1BPT  29
+REMARK   1  TITL   STRUCTURAL EFFECTS INDUCED BY REMOVAL OF A           1BPT  30
+REMARK   1  TITL 2 DISULFIDE-BRIDGE. THE X-RAY STRUCTURE OF THE         1BPT  31
+REMARK   1  TITL 3 /C30A(SLASH)C51A$ MUTANT OF BASIC PANCREATIC         1BPT  32
+REMARK   1  TITL 4 TRYPSIN INHIBITOR AT 1.6 ANGSTROMS                   1BPT  33
+REMARK   1  REF    PROTEIN ENG.                  V.   3   591 1990      1BPT  34
+REMARK   1  REFN   ASTM PRENE9  UK ISSN 0269-2139                  859  1BPT  35
+REMARK   1 REFERENCE 4                                                  1BPT  36
+REMARK   1  AUTH   T.R.HYNES,M.RANDAL,L.A.KENNEDY,C.EIGENBROT,          1BPT  37
+REMARK   1  AUTH 2 A.A.KOSSIAKOFF                                       1BPT  38
+REMARK   1  TITL   X-RAY CRYSTAL STRUCTURE OF THE PROTEASE INHIBITOR    1BPT  39
+REMARK   1  TITL 2 DOMAIN OF ALZHEIMER'S AMYLOID BETA-*PROTEIN          1BPT  40
+REMARK   1  TITL 3 PRECURSOR                                            1BPT  41
+REMARK   1  REF    BIOCHEMISTRY                  V.  29 10018 1990      1BPT  42
+REMARK   1  REFN   ASTM BICHAW  US ISSN 0006-2960                  033  1BPT  43
+REMARK   1 REFERENCE 5                                                  1BPT  44
+REMARK   1  AUTH   A.WLODAWER,J.NACHMAN,G.L.GILLILAND,W.GALLAGHER       1BPT  45
+REMARK   1  AUTH 2 C.WOODWARD                                           1BPT  46
+REMARK   1  TITL   STRUCTURE OF FORM /III$ CRYSTALS OF BOVINE           1BPT  47
+REMARK   1  TITL 2 PANCREATIC TRYPSIN INHIBITOR.                        1BPT  48
+REMARK   1  REF    J.MOL.BIOL.                   V. 198   469 1987      1BPT  49
+REMARK   1  REFN   ASTM JMOBAK  UK ISSN 0022-2836                  070  1BPT  50
+REMARK   1 REFERENCE 6                                                  1BPT  51
+REMARK   1  AUTH   A.WLODAWER,J.WALTER,R.HUBER,L.SJOLIN                 1BPT  52
+REMARK   1  TITL   STRUCTURE OF BOVINE PANCREATIC TRYPSIN INHIBITOR.    1BPT  53
+REMARK   1  TITL 2 RESULTS OF JOINT NEUTRON AND X-RAY REFINEMENT OF     1BPT  54
+REMARK   1  TITL 3 CRYSTAL FORM /II$                                    1BPT  55
+REMARK   1  REF    J.MOL.BIOL.                   V. 180   307 1984      1BPT  56
+REMARK   1  REFN   ASTM JMOBAK  UK ISSN 0022-2836                  070  1BPT  57
+REMARK   1 REFERENCE 7                                                  1BPT  58
+REMARK   1  AUTH   A.WLODAWER,J.DEISENHOFER,R.HUBER                     1BPT  59
+REMARK   1  TITL   COMPARISON OF TWO HIGHLY REFINED STRUCTURES OF       1BPT  60
+REMARK   1  TITL 2 BOVINE PANCREATIC TRYPSIN INHIBITOR                  1BPT  61
+REMARK   1  REF    J.MOL.BIOL.                   V. 193   145 1987      1BPT  62
+REMARK   1  REFN   ASTM JMOBAK  UK ISSN 0022-2836                  070  1BPT  63
+REMARK   1 REFERENCE 8                                                  1BPT  64
+REMARK   1  AUTH   J.WALTER,R.HUBER                                     1BPT  65
+REMARK   1  TITL   PANCREATIC TRYPSIN INHIBITOR. A NEW CRYSTAL FORM     1BPT  66
+REMARK   1  TITL 2 AND ITS ANALYSIS                                     1BPT  67
+REMARK   1  REF    J.MOL.BIOL.                   V. 167   911 1983      1BPT  68
+REMARK   1  REFN   ASTM JMOBAK  UK ISSN 0022-2836                  070  1BPT  69
+REMARK   2                                                              1BPT  70
+REMARK   2 RESOLUTION. 2.0  ANGSTROMS.                                  1BPT  71
+REMARK   3                                                              1BPT  72
+REMARK   3 REFINEMENT.                                                  1BPT  73
+REMARK   3   PROGRAM                    XPLOR                           1BPT  74
+REMARK   3   AUTHORS                    BRUENGER                        1BPT  75
+REMARK   3   R VALUE                    0.165                           1BPT  76
+REMARK   3   RMSD BOND DISTANCES        0.018  ANGSTROMS                1BPT  77
+REMARK   4                                                              1BPT  78
+REMARK   4 THERE WAS NO DENSITY FOR THE FINAL TWO CARBOXY-TERMINAL      1BPT  79
+REMARK   4 RESIDUES (GLY 57 AND ALA 58) IN THE FINAL MAPS.              1BPT  80
+REMARK   5                                                              1BPT  81
+REMARK   5 COORDINATES FOR 40 WATER MOLECULES ARE GIVEN FOLLOWING THE   1BPT  82
+REMARK   5 MAIN BODY OF THE PROTEIN.  THE NOMENCLATURE OF THE WATER     1BPT  83
+REMARK   5 MOLECULES IS THAT OF THE DEPOSITORS.                         1BPT  84
+SEQRES   1     58  ARG PRO ASP PHE CYS LEU GLU PRO PRO TYR THR GLY PRO  1BPT  85
+SEQRES   2     58  CYS LYS ALA ARG ILE ILE ARG TYR PHE ALA ASN ALA LYS  1BPT  86
+SEQRES   3     58  ALA GLY LEU CYS GLN THR PHE VAL TYR GLY GLY CYS ARG  1BPT  87
+SEQRES   4     58  ALA LYS ARG ASN ASN PHE LYS SER ALA GLU ASP CYS MET  1BPT  88
+SEQRES   5     58  ARG THR CYS GLY GLY ALA                              1BPT  89
+HET    PO4     70       5     PHOSPHATE ION                             1BPT  90
+FORMUL   2  PO4    O4 P1 --                                             1BPT  91
+FORMUL   3  HOH   *40(H2 O1)                                            1BPT  92
+HELIX    1  H1 PRO      2  GLU      7  5 ALL DONORS,ACCEPTORS INCLUDED  1BPT  93
+HELIX    2  H2 SER     47  GLY     56  1 ALL DONORS,ACCEPTORS INCLUDED  1BPT  94
+SHEET    1  S1 3 LEU    29  TYR    35  0                                1BPT  95
+SHEET    2  S1 3 ILE    18  ASN    24 -1  N  ILE    18   O  TYR    35   1BPT  96
+SHEET    3  S1 3 PHE    45  PHE    45 -1  N  PHE    45   O  TYR    21   1BPT  97
+SSBOND   1 CYS      5    CYS     55                                     1BPT  98
+SSBOND   2 CYS     14    CYS     38                                     1BPT  99
+SSBOND   3 CYS     30    CYS     51                                     1BPT 100
+CRYST1   69.469   23.346   29.188  90.00  90.00  90.00 P 21 21 21    4  1BPT 101
+ORIGX1      1.000000  0.000000  0.000000        0.00000                 1BPT 102
+ORIGX2      0.000000  1.000000  0.000000        0.00000                 1BPT 103
+ORIGX3      0.000000  0.000000  1.000000        0.00000                 1BPT 104
+SCALE1      0.014395  0.000000  0.000000        0.00000                 1BPT 105
+SCALE2      0.000000  0.042834  0.000000        0.00000                 1BPT 106
+SCALE3      0.000000  0.000000  0.034261        0.00000                 1BPT 107
+ATOM      1  N   ARG     1      29.292  13.212 -12.751  1.00 33.78      1BPT 108
+ATOM      2  CA  ARG     1      29.057  12.019 -11.946  1.00 27.63      1BPT 109
+ATOM      3  C   ARG     1      30.419  11.466 -11.478  1.00 29.64      1BPT 110
+ATOM      4  O   ARG     1      31.064  10.670 -12.174  1.00 27.67      1BPT 111
+ATOM      5  CB  ARG     1      28.297  10.939 -12.700  1.00 28.82      1BPT 112
+ATOM      6  CG  ARG     1      27.925   9.695 -11.892  1.00 25.04      1BPT 113
+ATOM      7  CD  ARG     1      26.990  10.102 -10.814  1.00 36.35      1BPT 114
+ATOM      8  NE  ARG     1      25.671   9.509 -10.875  1.00 43.70      1BPT 115
+ATOM      9  CZ  ARG     1      25.299   8.414 -10.207  1.00 44.64      1BPT 116
+ATOM     10  NH1 ARG     1      26.105   7.749  -9.392  1.00 44.80      1BPT 117
+ATOM     11  NH2 ARG     1      24.055   7.949 -10.340  1.00 51.16      1BPT 118
+ATOM     12  N   PRO     2      30.790  11.942 -10.291  1.00 26.52      1BPT 119
+ATOM     13  CA  PRO     2      32.005  11.512  -9.585  1.00 25.70      1BPT 120
+ATOM     14  C   PRO     2      32.010   9.993  -9.473  1.00 22.30      1BPT 121
+ATOM     15  O   PRO     2      30.968   9.316  -9.390  1.00 22.80      1BPT 122
+ATOM     16  CB  PRO     2      31.892  12.166  -8.209  1.00 25.93      1BPT 123
+ATOM     17  CG  PRO     2      31.000  13.378  -8.442  1.00 26.12      1BPT 124
+ATOM     18  CD  PRO     2      29.997  12.890  -9.480  1.00 27.15      1BPT 125
+ATOM     19  N   ASP     3      33.188   9.438  -9.467  1.00 21.36      1BPT 126
+ATOM     20  CA  ASP     3      33.386   7.984  -9.444  1.00 21.85      1BPT 127
+ATOM     21  C   ASP     3      32.859   7.313  -8.187  1.00 18.86      1BPT 128
+ATOM     22  O   ASP     3      32.477   6.121  -8.298  1.00  9.03      1BPT 129
+ATOM     23  CB  ASP     3      34.849   7.659  -9.800  1.00 30.83      1BPT 130
+ATOM     24  CG  ASP     3      35.257   8.386 -11.083  1.00 41.90      1BPT 131
+ATOM     25  OD1 ASP     3      34.882   7.832 -12.153  1.00 42.04      1BPT 132
+ATOM     26  OD2 ASP     3      35.864   9.473 -10.980  1.00 43.78      1BPT 133
+ATOM     27  N   PHE     4      32.841   8.042  -7.067  1.00 14.13      1BPT 134
+ATOM     28  CA  PHE     4      32.406   7.481  -5.780  1.00 12.89      1BPT 135
+ATOM     29  C   PHE     4      30.894   7.286  -5.768  1.00 12.89      1BPT 136
+ATOM     30  O   PHE     4      30.393   6.509  -4.938  1.00 12.42      1BPT 137
+ATOM     31  CB  PHE     4      32.998   8.208  -4.578  1.00 13.83      1BPT 138
+ATOM     32  CG  PHE     4      32.588   9.662  -4.482  1.00 10.93      1BPT 139
+ATOM     33  CD1 PHE     4      31.311   9.973  -3.991  1.00 12.72      1BPT 140
+ATOM     34  CD2 PHE     4      33.414  10.650  -4.951  1.00 13.53      1BPT 141
+ATOM     35  CE1 PHE     4      30.896  11.308  -3.928  1.00 15.82      1BPT 142
+ATOM     36  CE2 PHE     4      33.041  12.001  -4.878  1.00 14.51      1BPT 143
+ATOM     37  CZ  PHE     4      31.765  12.300  -4.388  1.00 10.66      1BPT 144
+ATOM     38  N   CYS     5      30.213   7.889  -6.711  1.00 11.30      1BPT 145
+ATOM     39  CA  CYS     5      28.751   7.756  -6.894  1.00  8.91      1BPT 146
+ATOM     40  C   CYS     5      28.333   6.353  -7.318  1.00  9.64      1BPT 147
+ATOM     41  O   CYS     5      27.197   5.876  -7.108  1.00  7.73      1BPT 148
+ATOM     42  CB  CYS     5      28.387   8.851  -7.898  1.00 11.12      1BPT 149
+ATOM     43  SG  CYS     5      28.579  10.549  -7.280  1.00 12.16      1BPT 150
+ATOM     44  N   LEU     6      29.193   5.558  -7.897  1.00  4.74      1BPT 151
+ATOM     45  CA  LEU     6      29.077   4.189  -8.326  1.00  6.59      1BPT 152
+ATOM     46  C   LEU     6      29.483   3.225  -7.187  1.00  9.26      1BPT 153
+ATOM     47  O   LEU     6      29.266   1.982  -7.343  1.00 13.32      1BPT 154
+ATOM     48  CB  LEU     6      29.823   3.914  -9.616  1.00 11.74      1BPT 155
+ATOM     49  CG  LEU     6      29.250   4.089 -11.024  1.00 15.14      1BPT 156
+ATOM     50  CD1 LEU     6      28.437   5.346 -11.272  1.00 16.39      1BPT 157
+ATOM     51  CD2 LEU     6      30.480   4.138 -11.961  1.00 10.37      1BPT 158
+ATOM     52  N   GLU     7      29.975   3.690  -6.075  1.00  9.37      1BPT 159
+ATOM     53  CA  GLU     7      30.343   2.830  -4.930  1.00 13.11      1BPT 160
+ATOM     54  C   GLU     7      29.096   2.241  -4.249  1.00 10.70      1BPT 161
+ATOM     55  O   GLU     7      28.116   2.999  -4.165  1.00  9.68      1BPT 162
+ATOM     56  CB  GLU     7      30.967   3.613  -3.776  1.00 19.20      1BPT 163
+ATOM     57  CG  GLU     7      32.359   3.902  -3.452  1.00 23.47      1BPT 164
+ATOM     58  CD  GLU     7      32.867   5.001  -2.582  1.00 27.28      1BPT 165
+ATOM     59  OE1 GLU     7      32.230   5.418  -1.584  1.00 20.35      1BPT 166
+ATOM     60  OE2 GLU     7      33.979   5.507  -2.848  1.00 29.04      1BPT 167
+ATOM     61  N   PRO     8      29.242   1.021  -3.736  1.00 10.63      1BPT 168
+ATOM     62  CA  PRO     8      28.178   0.413  -2.939  1.00 11.56      1BPT 169
+ATOM     63  C   PRO     8      28.214   1.124  -1.566  1.00  8.44      1BPT 170
+ATOM     64  O   PRO     8      29.236   1.677  -1.137  1.00  6.60      1BPT 171
+ATOM     65  CB  PRO     8      28.549  -1.058  -2.811  1.00 11.83      1BPT 172
+ATOM     66  CG  PRO     8      30.015  -1.151  -3.101  1.00 11.19      1BPT 173
+ATOM     67  CD  PRO     8      30.413   0.125  -3.811  1.00 10.52      1BPT 174
+ATOM     68  N   PRO     9      27.095   1.120  -0.895  1.00  8.82      1BPT 175
+ATOM     69  CA  PRO     9      27.024   1.732   0.456  1.00  7.22      1BPT 176
+ATOM     70  C   PRO     9      27.918   0.926   1.409  1.00  8.58      1BPT 177
+ATOM     71  O   PRO     9      28.136  -0.308   1.289  1.00  9.21      1BPT 178
+ATOM     72  CB  PRO     9      25.565   1.704   0.830  1.00  8.50      1BPT 179
+ATOM     73  CG  PRO     9      24.820   0.922  -0.252  1.00  8.80      1BPT 180
+ATOM     74  CD  PRO     9      25.831   0.488  -1.289  1.00  8.43      1BPT 181
+ATOM     75  N   TYR    10      28.489   1.597   2.363  1.00  5.10      1BPT 182
+ATOM     76  CA  TYR    10      29.355   0.981   3.369  1.00  7.77      1BPT 183
+ATOM     77  C   TYR    10      28.862   1.292   4.801  1.00  6.47      1BPT 184
+ATOM     78  O   TYR    10      29.004   2.426   5.260  1.00  5.08      1BPT 185
+ATOM     79  CB  TYR    10      30.776   1.548   3.188  1.00 10.29      1BPT 186
+ATOM     80  CG  TYR    10      31.812   0.865   4.029  1.00  3.16      1BPT 187
+ATOM     81  CD1 TYR    10      32.181  -0.458   3.705  1.00  8.41      1BPT 188
+ATOM     82  CD2 TYR    10      32.424   1.521   5.086  1.00  6.18      1BPT 189
+ATOM     83  CE1 TYR    10      33.149  -1.084   4.464  1.00 15.04      1BPT 190
+ATOM     84  CE2 TYR    10      33.412   0.896   5.832  1.00  6.33      1BPT 191
+ATOM     85  CZ  TYR    10      33.728  -0.425   5.565  1.00  8.46      1BPT 192
+ATOM     86  OH  TYR    10      34.706  -1.029   6.302  1.00 18.91      1BPT 193
+ATOM     87  N   THR    11      28.424   0.231   5.434  1.00  6.57      1BPT 194
+ATOM     88  CA  THR    11      27.936   0.236   6.812  1.00  8.79      1BPT 195
+ATOM     89  C   THR    11      29.135   0.441   7.754  1.00 11.72      1BPT 196
+ATOM     90  O   THR    11      29.083   1.299   8.664  1.00  8.91      1BPT 197
+ATOM     91  CB  THR    11      27.128  -1.066   7.132  1.00  9.23      1BPT 198
+ATOM     92  OG1 THR    11      25.915  -0.877   6.315  1.00 11.90      1BPT 199
+ATOM     93  CG2 THR    11      26.632  -1.251   8.598  1.00  9.84      1BPT 200
+ATOM     94  N   GLY    12      30.177  -0.374   7.495  1.00  7.42      1BPT 201
+ATOM     95  CA  GLY    12      31.386  -0.224   8.306  1.00  5.96      1BPT 202
+ATOM     96  C   GLY    12      31.203  -1.147   9.527  1.00 12.57      1BPT 203
+ATOM     97  O   GLY    12      30.172  -1.792   9.683  1.00  8.90      1BPT 204
+ATOM     98  N   PRO    13      32.259  -1.145  10.334  1.00  9.49      1BPT 205
+ATOM     99  CA  PRO    13      32.384  -1.978  11.478  1.00 11.26      1BPT 206
+ATOM    100  C   PRO    13      31.761  -1.570  12.787  1.00 10.42      1BPT 207
+ATOM    101  O   PRO    13      31.739  -2.455  13.698  1.00  9.05      1BPT 208
+ATOM    102  CB  PRO    13      33.930  -2.112  11.625  1.00 12.73      1BPT 209
+ATOM    103  CG  PRO    13      34.418  -0.761  11.150  1.00 12.37      1BPT 210
+ATOM    104  CD  PRO    13      33.504  -0.375  10.028  1.00  9.04      1BPT 211
+ATOM    105  N   CYS    14      31.284  -0.361  12.916  1.00  9.07      1BPT 212
+ATOM    106  CA  CYS    14      30.645   0.049  14.197  1.00 10.05      1BPT 213
+ATOM    107  C   CYS    14      29.209  -0.417  14.208  1.00 13.02      1BPT 214
+ATOM    108  O   CYS    14      28.598  -0.805  13.198  1.00 11.90      1BPT 215
+ATOM    109  CB  CYS    14      30.904   1.510  14.446  1.00  7.60      1BPT 216
+ATOM    110  SG  CYS    14      32.618   1.798  14.807  1.00 11.36      1BPT 217
+ATOM    111  N   LYS    15      28.649  -0.419  15.375  1.00 15.92      1BPT 218
+ATOM    112  CA  LYS    15      27.336  -0.923  15.756  1.00 15.34      1BPT 219
+ATOM    113  C   LYS    15      26.228   0.042  16.097  1.00 15.21      1BPT 220
+ATOM    114  O   LYS    15      25.303  -0.309  16.854  1.00 19.24      1BPT 221
+ATOM    115  CB  LYS    15      27.580  -1.878  16.945  1.00 11.46      1BPT 222
+ATOM    116  CG  LYS    15      27.956  -3.321  16.572  1.00 19.41      1BPT 223
+ATOM    117  CD  LYS    15      29.204  -3.436  15.742  1.00 19.90      1BPT 224
+ATOM    118  CE  LYS    15      29.694  -4.836  15.472  1.00 20.37      1BPT 225
+ATOM    119  NZ  LYS    15      30.440  -5.364  16.680  1.00 25.63      1BPT 226
+ATOM    120  N   ALA    16      26.223   1.260  15.569  1.00 11.74      1BPT 227
+ATOM    121  CA  ALA    16      25.109   2.169  15.768  1.00  7.48      1BPT 228
+ATOM    122  C   ALA    16      24.174   1.935  14.571  1.00  4.01      1BPT 229
+ATOM    123  O   ALA    16      24.497   1.277  13.592  1.00  5.07      1BPT 230
+ATOM    124  CB  ALA    16      25.536   3.634  15.859  1.00  5.63      1BPT 231
+ATOM    125  N   ARG    17      22.998   2.500  14.683  1.00  8.74      1BPT 232
+ATOM    126  CA  ARG    17      21.990   2.475  13.592  1.00 11.74      1BPT 233
+ATOM    127  C   ARG    17      21.767   3.981  13.235  1.00  7.15      1BPT 234
+ATOM    128  O   ARG    17      21.022   4.658  13.964  1.00 10.78      1BPT 235
+ATOM    129  CB  ARG    17      20.614   1.926  13.959  1.00  6.42      1BPT 236
+ATOM    130  CG  ARG    17      20.105   0.880  12.983  1.00 15.99      1BPT 237
+ATOM    131  CD  ARG    17      19.624  -0.289  13.808  1.00  9.00      1BPT 238
+ATOM    132  NE  ARG    17      18.355   0.098  14.439  1.00 18.94      1BPT 239
+ATOM    133  CZ  ARG    17      17.708  -0.673  15.324  1.00 17.07      1BPT 240
+ATOM    134  NH1 ARG    17      18.221  -1.843  15.720  1.00 10.96      1BPT 241
+ATOM    135  NH2 ARG    17      16.495  -0.327  15.731  1.00 15.74      1BPT 242
+ATOM    136  N   ILE    18      22.497   4.428  12.250  1.00  8.62      1BPT 243
+ATOM    137  CA  ILE    18      22.423   5.788  11.715  1.00 10.26      1BPT 244
+ATOM    138  C   ILE    18      21.950   5.730  10.266  1.00  9.38      1BPT 245
+ATOM    139  O   ILE    18      22.480   5.004   9.421  1.00  8.61      1BPT 246
+ATOM    140  CB  ILE    18      23.762   6.554  11.785  1.00 13.44      1BPT 247
+ATOM    141  CG1 ILE    18      24.274   6.512  13.260  1.00 10.02      1BPT 248
+ATOM    142  CG2 ILE    18      23.602   8.014  11.254  1.00 10.50      1BPT 249
+ATOM    143  CD1 ILE    18      25.764   6.903  13.344  1.00 10.74      1BPT 250
+ATOM    144  N   ILE    19      20.867   6.472  10.022  1.00 15.71      1BPT 251
+ATOM    145  CA  ILE    19      20.335   6.459   8.640  1.00 13.86      1BPT 252
+ATOM    146  C   ILE    19      20.970   7.606   7.855  1.00 13.32      1BPT 253
+ATOM    147  O   ILE    19      20.910   8.763   8.262  1.00 11.39      1BPT 254
+ATOM    148  CB  ILE    19      18.788   6.485   8.581  1.00 13.72      1BPT 255
+ATOM    149  CG1 ILE    19      18.252   5.154   9.150  1.00  6.44      1BPT 256
+ATOM    150  CG2 ILE    19      18.282   6.719   7.120  1.00 12.60      1BPT 257
+ATOM    151  CD1 ILE    19      18.299   5.180  10.698  1.00 16.15      1BPT 258
+ATOM    152  N   ARG    20      21.588   7.204   6.751  1.00 11.35      1BPT 259
+ATOM    153  CA  ARG    20      22.236   8.068   5.785  1.00  9.74      1BPT 260
+ATOM    154  C   ARG    20      21.695   7.783   4.382  1.00  6.30      1BPT 261
+ATOM    155  O   ARG    20      20.944   6.791   4.218  1.00  9.59      1BPT 262
+ATOM    156  CB  ARG    20      23.771   7.817   5.751  1.00  9.39      1BPT 263
+ATOM    157  CG  ARG    20      24.296   7.949   7.188  1.00  2.00      1BPT 264
+ATOM    158  CD  ARG    20      24.046   9.346   7.751  1.00 11.19      1BPT 265
+ATOM    159  NE  ARG    20      25.357   9.939   7.653  1.00 16.42      1BPT 266
+ATOM    160  CZ  ARG    20      26.326  10.113   8.491  1.00 17.82      1BPT 267
+ATOM    161  NH1 ARG    20      26.238  10.069   9.811  1.00 20.52      1BPT 268
+ATOM    162  NH2 ARG    20      27.584  10.026   7.945  1.00 11.96      1BPT 269
+ATOM    163  N   TYR    21      22.153   8.566   3.449  1.00  3.50      1BPT 270
+ATOM    164  CA  TYR    21      21.802   8.399   2.032  1.00  4.17      1BPT 271
+ATOM    165  C   TYR    21      23.053   8.021   1.197  1.00  9.33      1BPT 272
+ATOM    166  O   TYR    21      24.153   8.520   1.489  1.00  9.71      1BPT 273
+ATOM    167  CB  TYR    21      21.233   9.689   1.363  1.00  2.47      1BPT 274
+ATOM    168  CG  TYR    21      19.856   9.977   1.902  1.00  5.78      1BPT 275
+ATOM    169  CD1 TYR    21      18.714   9.405   1.346  1.00  8.74      1BPT 276
+ATOM    170  CD2 TYR    21      19.722  10.802   3.013  1.00  5.70      1BPT 277
+ATOM    171  CE1 TYR    21      17.462   9.666   1.913  1.00  8.01      1BPT 278
+ATOM    172  CE2 TYR    21      18.482  11.081   3.558  1.00 11.15      1BPT 279
+ATOM    173  CZ  TYR    21      17.360  10.496   2.999  1.00  9.06      1BPT 280
+ATOM    174  OH  TYR    21      16.131  10.758   3.524  1.00 17.41      1BPT 281
+ATOM    175  N   PHE    22      22.829   7.168   0.207  1.00  4.91      1BPT 282
+ATOM    176  CA  PHE    22      23.752   6.692  -0.783  1.00  4.40      1BPT 283
+ATOM    177  C   PHE    22      23.022   6.846  -2.150  1.00  7.34      1BPT 284
+ATOM    178  O   PHE    22      21.797   6.782  -2.278  1.00  6.82      1BPT 285
+ATOM    179  CB  PHE    22      24.275   5.290  -0.661  1.00  2.00      1BPT 286
+ATOM    180  CG  PHE    22      23.364   4.150  -0.996  1.00  5.18      1BPT 287
+ATOM    181  CD1 PHE    22      22.275   3.834  -0.223  1.00  2.00      1BPT 288
+ATOM    182  CD2 PHE    22      23.583   3.377  -2.136  1.00  5.44      1BPT 289
+ATOM    183  CE1 PHE    22      21.466   2.719  -0.446  1.00  6.36      1BPT 290
+ATOM    184  CE2 PHE    22      22.782   2.296  -2.447  1.00  5.66      1BPT 291
+ATOM    185  CZ  PHE    22      21.709   1.949  -1.597  1.00 10.45      1BPT 292
+ATOM    186  N   ALA    23      23.845   7.069  -3.162  1.00  9.17      1BPT 293
+ATOM    187  CA  ALA    23      23.410   7.217  -4.539  1.00  7.19      1BPT 294
+ATOM    188  C   ALA    23      23.463   5.773  -5.115  1.00  9.37      1BPT 295
+ATOM    189  O   ALA    23      24.506   5.134  -4.936  1.00  8.29      1BPT 296
+ATOM    190  CB  ALA    23      24.352   8.087  -5.360  1.00  4.05      1BPT 297
+ATOM    191  N   ASN    24      22.400   5.354  -5.693  1.00  7.55      1BPT 298
+ATOM    192  CA  ASN    24      22.126   4.088  -6.326  1.00 15.15      1BPT 299
+ATOM    193  C   ASN    24      22.120   4.301  -7.848  1.00 14.72      1BPT 300
+ATOM    194  O   ASN    24      21.083   4.569  -8.463  1.00 22.72      1BPT 301
+ATOM    195  CB  ASN    24      20.788   3.517  -5.804  1.00 19.37      1BPT 302
+ATOM    196  CG  ASN    24      20.802   1.994  -5.932  1.00 26.23      1BPT 303
+ATOM    197  OD1 ASN    24      21.039   1.448  -7.013  1.00 21.38      1BPT 304
+ATOM    198  ND2 ASN    24      20.644   1.338  -4.770  1.00 31.83      1BPT 305
+ATOM    199  N   ALA    25      23.301   4.247  -8.411  1.00 17.58      1BPT 306
+ATOM    200  CA  ALA    25      23.706   4.425  -9.774  1.00 17.08      1BPT 307
+ATOM    201  C   ALA    25      22.839   3.573 -10.736  1.00 21.07      1BPT 308
+ATOM    202  O   ALA    25      22.702   3.982 -11.887  1.00 16.65      1BPT 309
+ATOM    203  CB  ALA    25      25.187   4.093  -9.933  1.00 11.69      1BPT 310
+ATOM    204  N   LYS    26      22.312   2.473 -10.242  1.00 19.28      1BPT 311
+ATOM    205  CA  LYS    26      21.466   1.487 -10.891  1.00 22.03      1BPT 312
+ATOM    206  C   LYS    26      20.014   1.952 -10.989  1.00 24.09      1BPT 313
+ATOM    207  O   LYS    26      19.442   1.857 -12.084  1.00 23.97      1BPT 314
+ATOM    208  CB  LYS    26      21.491   0.169 -10.130  1.00 22.95      1BPT 315
+ATOM    209  CG  LYS    26      21.106  -1.100 -10.859  1.00 28.77      1BPT 316
+ATOM    210  CD  LYS    26      22.013  -1.383 -12.055  1.00 29.92      1BPT 317
+ATOM    211  CE  LYS    26      22.610  -2.772 -11.994  1.00 25.89      1BPT 318
+ATOM    212  NZ  LYS    26      23.796  -2.860 -12.900  1.00 25.18      1BPT 319
+ATOM    213  N   ALA    27      19.497   2.488  -9.874  1.00 22.92      1BPT 320
+ATOM    214  CA  ALA    27      18.103   2.961  -9.823  1.00 19.83      1BPT 321
+ATOM    215  C   ALA    27      17.967   4.434 -10.123  1.00 20.29      1BPT 322
+ATOM    216  O   ALA    27      16.824   4.929 -10.240  1.00 23.87      1BPT 323
+ATOM    217  CB  ALA    27      17.441   2.549  -8.518  1.00 16.72      1BPT 324
+ATOM    218  N   GLY    28      19.057   5.153 -10.273  1.00 16.11      1BPT 325
+ATOM    219  CA  GLY    28      19.062   6.550 -10.586  1.00 17.38      1BPT 326
+ATOM    220  C   GLY    28      18.483   7.415  -9.466  1.00 18.84      1BPT 327
+ATOM    221  O   GLY    28      18.119   8.579  -9.690  1.00 16.95      1BPT 328
+ATOM    222  N   LEU    29      18.443   6.832  -8.287  1.00 21.25      1BPT 329
+ATOM    223  CA  LEU    29      17.947   7.407  -7.024  1.00 21.57      1BPT 330
+ATOM    224  C   LEU    29      18.945   7.497  -5.866  1.00 18.12      1BPT 331
+ATOM    225  O   LEU    29      19.896   6.727  -5.858  1.00 21.34      1BPT 332
+ATOM    226  CB  LEU    29      17.024   6.228  -6.528  1.00 23.29      1BPT 333
+ATOM    227  CG  LEU    29      15.567   6.158  -6.693  1.00 19.66      1BPT 334
+ATOM    228  CD1 LEU    29      15.044   6.893  -7.909  1.00 21.45      1BPT 335
+ATOM    229  CD2 LEU    29      15.225   4.662  -6.813  1.00 20.64      1BPT 336
+ATOM    230  N   CYS    30      18.644   8.243  -4.834  1.00 15.30      1BPT 337
+ATOM    231  CA  CYS    30      19.386   8.332  -3.584  1.00 13.07      1BPT 338
+ATOM    232  C   CYS    30      18.461   7.549  -2.587  1.00 11.85      1BPT 339
+ATOM    233  O   CYS    30      17.266   7.847  -2.545  1.00  9.49      1BPT 340
+ATOM    234  CB  CYS    30      19.618   9.746  -3.030  1.00  7.99      1BPT 341
+ATOM    235  SG  CYS    30      20.895  10.605  -3.992  1.00 11.01      1BPT 342
+ATOM    236  N   GLN    31      19.006   6.552  -1.970  1.00  8.48      1BPT 343
+ATOM    237  CA  GLN    31      18.317   5.700  -1.038  1.00  6.54      1BPT 344
+ATOM    238  C   GLN    31      19.100   5.705   0.281  1.00  5.06      1BPT 345
+ATOM    239  O   GLN    31      20.258   6.027   0.325  1.00 10.97      1BPT 346
+ATOM    240  CB  GLN    31      18.127   4.265  -1.486  1.00  6.08      1BPT 347
+ATOM    241  CG  GLN    31      17.649   4.129  -2.917  1.00 10.54      1BPT 348
+ATOM    242  CD  GLN    31      17.592   2.652  -3.291  1.00 15.80      1BPT 349
+ATOM    243  OE1 GLN    31      18.591   1.933  -3.214  1.00 13.15      1BPT 350
+ATOM    244  NE2 GLN    31      16.345   2.282  -3.638  1.00 16.64      1BPT 351
+ATOM    245  N   THR    32      18.377   5.323   1.264  1.00  6.87      1BPT 352
+ATOM    246  CA  THR    32      18.655   5.165   2.680  1.00  7.62      1BPT 353
+ATOM    247  C   THR    32      19.331   3.843   2.925  1.00  4.54      1BPT 354
+ATOM    248  O   THR    32      19.028   2.743   2.403  1.00  9.34      1BPT 355
+ATOM    249  CB  THR    32      17.212   5.330   3.366  1.00 11.68      1BPT 356
+ATOM    250  OG1 THR    32      17.146   6.586   4.121  1.00 16.76      1BPT 357
+ATOM    251  CG2 THR    32      16.592   4.169   4.025  1.00 11.13      1BPT 358
+ATOM    252  N   PHE    33      20.325   3.905   3.792  1.00  6.01      1BPT 359
+ATOM    253  CA  PHE    33      21.049   2.679   4.214  1.00  5.51      1BPT 360
+ATOM    254  C   PHE    33      21.391   2.964   5.697  1.00  5.03      1BPT 361
+ATOM    255  O   PHE    33      21.307   4.127   6.134  1.00  2.00      1BPT 362
+ATOM    256  CB  PHE    33      22.237   2.311   3.301  1.00  6.21      1BPT 363
+ATOM    257  CG  PHE    33      23.481   3.115   3.586  1.00  7.37      1BPT 364
+ATOM    258  CD1 PHE    33      23.551   4.451   3.178  1.00  3.42      1BPT 365
+ATOM    259  CD2 PHE    33      24.548   2.490   4.242  1.00  5.10      1BPT 366
+ATOM    260  CE1 PHE    33      24.689   5.209   3.481  1.00  4.87      1BPT 367
+ATOM    261  CE2 PHE    33      25.680   3.270   4.545  1.00  3.64      1BPT 368
+ATOM    262  CZ  PHE    33      25.736   4.604   4.188  1.00  4.11      1BPT 369
+ATOM    263  N   VAL    34      21.791   1.844   6.303  1.00  7.77      1BPT 370
+ATOM    264  CA  VAL    34      22.179   1.847   7.727  1.00  7.30      1BPT 371
+ATOM    265  C   VAL    34      23.698   2.016   7.870  1.00  5.82      1BPT 372
+ATOM    266  O   VAL    34      24.383   1.128   7.464  1.00  7.30      1BPT 373
+ATOM    267  CB  VAL    34      21.710   0.602   8.491  1.00 12.10      1BPT 374
+ATOM    268  CG1 VAL    34      22.162   0.650   9.973  1.00  9.39      1BPT 375
+ATOM    269  CG2 VAL    34      20.174   0.525   8.409  1.00 15.65      1BPT 376
+ATOM    270  N   TYR    35      24.091   3.158   8.410  1.00  5.38      1BPT 377
+ATOM    271  CA  TYR    35      25.512   3.474   8.630  1.00  4.86      1BPT 378
+ATOM    272  C   TYR    35      25.853   3.141  10.089  1.00  6.21      1BPT 379
+ATOM    273  O   TYR    35      25.096   3.564  11.002  1.00  5.98      1BPT 380
+ATOM    274  CB  TYR    35      25.736   4.938   8.236  1.00  5.98      1BPT 381
+ATOM    275  CG  TYR    35      27.123   5.423   8.667  1.00  8.66      1BPT 382
+ATOM    276  CD1 TYR    35      28.268   4.755   8.295  1.00  5.52      1BPT 383
+ATOM    277  CD2 TYR    35      27.254   6.531   9.488  1.00  5.03      1BPT 384
+ATOM    278  CE1 TYR    35      29.512   5.168   8.668  1.00  4.71      1BPT 385
+ATOM    279  CE2 TYR    35      28.510   6.966   9.884  1.00  5.81      1BPT 386
+ATOM    280  CZ  TYR    35      29.623   6.289   9.481  1.00  3.57      1BPT 387
+ATOM    281  OH  TYR    35      30.830   6.742   9.901  1.00 13.53      1BPT 388
+ATOM    282  N   GLY    36      26.877   2.383  10.299  1.00  9.83      1BPT 389
+ATOM    283  CA  GLY    36      27.457   1.871  11.546  1.00 11.89      1BPT 390
+ATOM    284  C   GLY    36      28.028   2.897  12.527  1.00 10.62      1BPT 391
+ATOM    285  O   GLY    36      28.257   2.623  13.728  1.00  6.24      1BPT 392
+ATOM    286  N   GLY    37      28.306   4.113  12.087  1.00 10.90      1BPT 393
+ATOM    287  CA  GLY    37      28.806   5.132  13.062  1.00  8.57      1BPT 394
+ATOM    288  C   GLY    37      30.264   5.436  12.901  1.00 11.39      1BPT 395
+ATOM    289  O   GLY    37      30.788   6.346  13.587  1.00 12.05      1BPT 396
+ATOM    290  N   CYS    38      30.958   4.608  12.100  1.00 11.97      1BPT 397
+ATOM    291  CA  CYS    38      32.402   4.846  11.895  1.00 10.71      1BPT 398
+ATOM    292  C   CYS    38      32.879   4.309  10.558  1.00  7.26      1BPT 399
+ATOM    293  O   CYS    38      32.377   3.284  10.069  1.00 10.85      1BPT 400
+ATOM    294  CB  CYS    38      33.254   4.254  13.025  1.00  5.40      1BPT 401
+ATOM    295  SG  CYS    38      33.463   2.472  13.064  1.00 11.31      1BPT 402
+ATOM    296  N   ARG    39      33.888   5.024  10.044  1.00  7.90      1BPT 403
+ATOM    297  CA  ARG    39      34.612   4.645   8.827  1.00  9.16      1BPT 404
+ATOM    298  C   ARG    39      33.852   4.820   7.536  1.00  5.75      1BPT 405
+ATOM    299  O   ARG    39      33.993   4.050   6.590  1.00  7.37      1BPT 406
+ATOM    300  CB  ARG    39      35.284   3.233   8.942  1.00 13.97      1BPT 407
+ATOM    301  CG  ARG    39      36.652   3.311   9.588  1.00 23.19      1BPT 408
+ATOM    302  CD  ARG    39      37.108   2.346  10.584  1.00 26.99      1BPT 409
+ATOM    303  NE  ARG    39      37.383   1.012  10.071  1.00 34.96      1BPT 410
+ATOM    304  CZ  ARG    39      38.017   0.015  10.712  1.00 34.64      1BPT 411
+ATOM    305  NH1 ARG    39      38.490   0.185  11.953  1.00 32.55      1BPT 412
+ATOM    306  NH2 ARG    39      38.178  -1.162  10.082  1.00 31.33      1BPT 413
+ATOM    307  N   ALA    40      33.087   5.902   7.525  1.00  6.82      1BPT 414
+ATOM    308  CA  ALA    40      32.280   6.346   6.408  1.00  6.05      1BPT 415
+ATOM    309  C   ALA    40      33.174   6.463   5.167  1.00 10.08      1BPT 416
+ATOM    310  O   ALA    40      34.349   6.857   5.267  1.00  9.96      1BPT 417
+ATOM    311  CB  ALA    40      31.763   7.754   6.764  1.00  2.00      1BPT 418
+ATOM    312  N   LYS    41      32.572   6.106   4.046  1.00  8.14      1BPT 419
+ATOM    313  CA  LYS    41      33.210   6.218   2.716  1.00  6.07      1BPT 420
+ATOM    314  C   LYS    41      32.548   7.399   2.044  1.00 12.83      1BPT 421
+ATOM    315  O   LYS    41      31.646   8.090   2.693  1.00 10.68      1BPT 422
+ATOM    316  CB  LYS    41      33.223   4.867   2.018  1.00 10.37      1BPT 423
+ATOM    317  CG  LYS    41      34.398   4.021   2.455  1.00 16.66      1BPT 424
+ATOM    318  CD  LYS    41      34.882   2.814   1.760  1.00 24.01      1BPT 425
+ATOM    319  CE  LYS    41      35.299   2.810   0.338  1.00 30.41      1BPT 426
+ATOM    320  NZ  LYS    41      36.465   3.643  -0.102  1.00 35.07      1BPT 427
+ATOM    321  N   ARG    42      32.884   7.785   0.815  1.00 10.98      1BPT 428
+ATOM    322  CA  ARG    42      32.283   8.966   0.211  1.00  9.54      1BPT 429
+ATOM    323  C   ARG    42      30.805   8.864  -0.204  1.00  6.66      1BPT 430
+ATOM    324  O   ARG    42      30.082   9.903  -0.070  1.00  3.97      1BPT 431
+ATOM    325  CB  ARG    42      33.012   9.583  -0.935  1.00 15.86      1BPT 432
+ATOM    326  CG  ARG    42      34.425  10.106  -0.842  1.00 20.34      1BPT 433
+ATOM    327  CD  ARG    42      35.312   9.161  -1.625  1.00 22.25      1BPT 434
+ATOM    328  NE  ARG    42      36.086   9.790  -2.633  1.00 21.50      1BPT 435
+ATOM    329  CZ  ARG    42      36.767   9.117  -3.589  1.00 27.18      1BPT 436
+ATOM    330  NH1 ARG    42      36.779   7.793  -3.734  1.00 24.88      1BPT 437
+ATOM    331  NH2 ARG    42      37.511   9.871  -4.403  1.00 23.05      1BPT 438
+ATOM    332  N   ASN    43      30.347   7.774  -0.685  1.00  4.96      1BPT 439
+ATOM    333  CA  ASN    43      28.913   7.618  -1.084  1.00  5.93      1BPT 440
+ATOM    334  C   ASN    43      28.074   7.439   0.214  1.00  6.01      1BPT 441
+ATOM    335  O   ASN    43      27.554   6.351   0.508  1.00  5.41      1BPT 442
+ATOM    336  CB  ASN    43      28.824   6.464  -2.040  1.00  2.00      1BPT 443
+ATOM    337  CG  ASN    43      27.504   6.530  -2.797  1.00  3.64      1BPT 444
+ATOM    338  OD1 ASN    43      26.760   7.532  -2.643  1.00  2.67      1BPT 445
+ATOM    339  ND2 ASN    43      27.301   5.537  -3.633  1.00  4.58      1BPT 446
+ATOM    340  N   ASN    44      27.999   8.522   0.982  1.00  9.31      1BPT 447
+ATOM    341  CA  ASN    44      27.314   8.619   2.297  1.00  2.00      1BPT 448
+ATOM    342  C   ASN    44      26.927  10.073   2.517  1.00  4.30      1BPT 449
+ATOM    343  O   ASN    44      27.772  10.923   2.793  1.00  4.82      1BPT 450
+ATOM    344  CB  ASN    44      28.276   8.091   3.338  1.00  8.38      1BPT 451
+ATOM    345  CG  ASN    44      27.789   8.066   4.782  1.00  2.00      1BPT 452
+ATOM    346  OD1 ASN    44      28.115   7.107   5.498  1.00  3.04      1BPT 453
+ATOM    347  ND2 ASN    44      27.094   9.111   5.123  1.00  7.22      1BPT 454
+ATOM    348  N   PHE    45      25.640  10.394   2.440  1.00  6.11      1BPT 455
+ATOM    349  CA  PHE    45      25.077  11.713   2.604  1.00  6.35      1BPT 456
+ATOM    350  C   PHE    45      24.138  11.761   3.822  1.00  5.91      1BPT 457
+ATOM    351  O   PHE    45      23.569  10.743   4.220  1.00  7.40      1BPT 458
+ATOM    352  CB  PHE    45      24.338  12.182   1.306  1.00  6.75      1BPT 459
+ATOM    353  CG  PHE    45      25.243  12.109   0.104  1.00  8.57      1BPT 460
+ATOM    354  CD1 PHE    45      26.165  13.134  -0.110  1.00  4.49      1BPT 461
+ATOM    355  CD2 PHE    45      25.243  10.988  -0.731  1.00  7.57      1BPT 462
+ATOM    356  CE1 PHE    45      27.037  13.057  -1.206  1.00  5.81      1BPT 463
+ATOM    357  CE2 PHE    45      26.116  10.886  -1.828  1.00  6.32      1BPT 464
+ATOM    358  CZ  PHE    45      27.017  11.915  -2.031  1.00  5.88      1BPT 465
+ATOM    359  N   LYS    46      24.017  12.955   4.372  1.00  5.18      1BPT 466
+ATOM    360  CA  LYS    46      23.137  13.170   5.515  1.00  7.22      1BPT 467
+ATOM    361  C   LYS    46      21.747  13.494   5.037  1.00  8.50      1BPT 468
+ATOM    362  O   LYS    46      20.775  13.277   5.777  1.00  8.81      1BPT 469
+ATOM    363  CB  LYS    46      23.719  14.097   6.569  1.00 17.67      1BPT 470
+ATOM    364  CG  LYS    46      24.807  13.401   7.450  1.00 17.14      1BPT 471
+ATOM    365  CD  LYS    46      25.667  14.430   8.162  1.00 22.39      1BPT 472
+ATOM    366  CE  LYS    46      26.849  13.763   8.823  1.00 33.75      1BPT 473
+ATOM    367  NZ  LYS    46      27.771  13.182   7.778  1.00 39.51      1BPT 474
+ATOM    368  N   SER    47      21.567  13.946   3.793  1.00  6.62      1BPT 475
+ATOM    369  CA  SER    47      20.237  14.215   3.241  1.00  7.92      1BPT 476
+ATOM    370  C   SER    47      20.291  13.788   1.753  1.00  9.35      1BPT 477
+ATOM    371  O   SER    47      21.389  13.808   1.198  1.00  6.05      1BPT 478
+ATOM    372  CB  SER    47      19.755  15.634   3.248  1.00  8.12      1BPT 479
+ATOM    373  OG  SER    47      20.635  16.520   2.615  1.00 10.87      1BPT 480
+ATOM    374  N   ALA    48      19.110  13.576   1.248  1.00 13.39      1BPT 481
+ATOM    375  CA  ALA    48      18.819  13.216  -0.137  1.00 14.31      1BPT 482
+ATOM    376  C   ALA    48      19.222  14.327  -1.097  1.00  9.87      1BPT 483
+ATOM    377  O   ALA    48      19.693  14.049  -2.219  1.00 10.59      1BPT 484
+ATOM    378  CB  ALA    48      17.317  12.910  -0.257  1.00 14.70      1BPT 485
+ATOM    379  N   GLU    49      19.096  15.564  -0.717  1.00 12.65      1BPT 486
+ATOM    380  CA  GLU    49      19.425  16.790  -1.459  1.00 16.19      1BPT 487
+ATOM    381  C   GLU    49      20.919  16.933  -1.769  1.00 13.71      1BPT 488
+ATOM    382  O   GLU    49      21.385  17.262  -2.881  1.00 12.61      1BPT 489
+ATOM    383  CB  GLU    49      18.987  18.000  -0.638  1.00 24.93      1BPT 490
+ATOM    384  CG  GLU    49      19.603  19.360  -0.977  1.00 32.70      1BPT 491
+ATOM    385  CD  GLU    49      18.593  20.478  -0.869  1.00 33.74      1BPT 492
+ATOM    386  OE1 GLU    49      17.501  20.301  -0.357  1.00 37.68      1BPT 493
+ATOM    387  OE2 GLU    49      19.011  21.524  -1.382  1.00 40.58      1BPT 494
+ATOM    388  N   ASP    50      21.678  16.685  -0.752  1.00  8.77      1BPT 495
+ATOM    389  CA  ASP    50      23.154  16.618  -0.749  1.00  5.33      1BPT 496
+ATOM    390  C   ASP    50      23.557  15.495  -1.741  1.00  7.91      1BPT 497
+ATOM    391  O   ASP    50      24.418  15.687  -2.574  1.00 10.59      1BPT 498
+ATOM    392  CB  ASP    50      23.469  16.208   0.652  1.00  7.72      1BPT 499
+ATOM    393  CG  ASP    50      24.677  16.768   1.310  1.00 22.77      1BPT 500
+ATOM    394  OD1 ASP    50      25.784  16.205   1.218  1.00 29.45      1BPT 501
+ATOM    395  OD2 ASP    50      24.437  17.805   1.976  1.00 27.42      1BPT 502
+ATOM    396  N   CYS    51      22.889  14.372  -1.606  1.00  7.15      1BPT 503
+ATOM    397  CA  CYS    51      23.118  13.191  -2.471  1.00  9.78      1BPT 504
+ATOM    398  C   CYS    51      22.817  13.463  -3.927  1.00 10.99      1BPT 505
+ATOM    399  O   CYS    51      23.624  13.202  -4.878  1.00 10.34      1BPT 506
+ATOM    400  CB  CYS    51      22.338  12.044  -1.832  1.00  7.72      1BPT 507
+ATOM    401  SG  CYS    51      22.556  10.517  -2.797  1.00  9.28      1BPT 508
+ATOM    402  N   MET    52      21.658  14.043  -4.168  1.00 11.69      1BPT 509
+ATOM    403  CA  MET    52      21.202  14.431  -5.526  1.00 18.65      1BPT 510
+ATOM    404  C   MET    52      22.077  15.525  -6.147  1.00 19.65      1BPT 511
+ATOM    405  O   MET    52      22.331  15.528  -7.377  1.00 21.47      1BPT 512
+ATOM    406  CB  MET    52      19.733  14.888  -5.520  1.00 18.64      1BPT 513
+ATOM    407  CG  MET    52      18.820  13.802  -5.029  1.00 25.08      1BPT 514
+ATOM    408  SD  MET    52      18.593  12.518  -6.298  1.00 36.21      1BPT 515
+ATOM    409  CE  MET    52      17.140  13.185  -7.164  1.00 31.81      1BPT 516
+ATOM    410  N   ARG    53      22.488  16.491  -5.340  1.00 15.68      1BPT 517
+ATOM    411  CA  ARG    53      23.357  17.550  -5.895  1.00 14.21      1BPT 518
+ATOM    412  C   ARG    53      24.665  16.936  -6.309  1.00 16.12      1BPT 519
+ATOM    413  O   ARG    53      25.128  17.198  -7.444  1.00 20.73      1BPT 520
+ATOM    414  CB  ARG    53      23.476  18.743  -4.963  1.00 18.66      1BPT 521
+ATOM    415  CG  ARG    53      24.712  19.585  -4.904  1.00 20.71      1BPT 522
+ATOM    416  CD  ARG    53      24.526  20.881  -4.194  1.00 20.48      1BPT 523
+ATOM    417  NE  ARG    53      24.137  20.759  -2.782  1.00 23.67      1BPT 524
+ATOM    418  CZ  ARG    53      22.949  21.116  -2.283  1.00 19.89      1BPT 525
+ATOM    419  NH1 ARG    53      21.956  21.580  -3.045  1.00 21.51      1BPT 526
+ATOM    420  NH2 ARG    53      22.797  21.076  -0.984  1.00 19.67      1BPT 527
+ATOM    421  N   THR    54      25.282  16.106  -5.486  1.00 15.94      1BPT 528
+ATOM    422  CA  THR    54      26.565  15.457  -5.745  1.00 15.33      1BPT 529
+ATOM    423  C   THR    54      26.567  14.390  -6.831  1.00 14.34      1BPT 530
+ATOM    424  O   THR    54      27.430  14.320  -7.727  1.00 15.03      1BPT 531
+ATOM    425  CB  THR    54      27.103  14.813  -4.378  1.00 15.64      1BPT 532
+ATOM    426  OG1 THR    54      26.939  15.842  -3.350  1.00 14.11      1BPT 533
+ATOM    427  CG2 THR    54      28.533  14.325  -4.493  1.00 13.03      1BPT 534
+ATOM    428  N   CYS    55      25.605  13.478  -6.742  1.00 14.03      1BPT 535
+ATOM    429  CA  CYS    55      25.487  12.381  -7.678  1.00 18.90      1BPT 536
+ATOM    430  C   CYS    55      24.327  12.243  -8.624  1.00 14.94      1BPT 537
+ATOM    431  O   CYS    55      24.416  11.321  -9.464  1.00 17.17      1BPT 538
+ATOM    432  CB  CYS    55      25.558  11.073  -6.799  1.00 13.60      1BPT 539
+ATOM    433  SG  CYS    55      27.108  10.842  -5.962  1.00 11.32      1BPT 540
+ATOM    434  N   GLY    56      23.250  12.959  -8.573  1.00 21.33      1BPT 541
+ATOM    435  CA  GLY    56      22.055  12.828  -9.404  1.00 21.71      1BPT 542
+ATOM    436  C   GLY    56      22.203  13.318 -10.815  1.00 29.25      1BPT 543
+ATOM    437  O   GLY    56      22.062  14.551 -11.007  1.00 36.32      1BPT 544
+TER     438      GLY    56                                              1BPT 545
+HETATM  439  P   PO4    70      30.706  10.577  10.314  1.00 40.72      1BPT 546
+HETATM  440  O1  PO4    70      31.880  11.494  10.318  1.00 40.10      1BPT 547
+HETATM  441  O2  PO4    70      29.700  11.094  11.269  1.00 36.93      1BPT 548
+HETATM  442  O3  PO4    70      30.132  10.507   8.949  1.00 36.50      1BPT 549
+HETATM  443  O4  PO4    70      31.190   9.240  10.783  1.00 33.43      1BPT 550
+HETATM  444  O   HOH    80      20.819  13.100   8.431  1.00 24.07      1BPT 551
+HETATM  445  O   HOH   102      30.510   0.396  17.550  1.00 19.35      1BPT 552
+HETATM  446  O   HOH   110      35.009   6.459  -0.855  1.00 12.67      1BPT 553
+HETATM  447  O   HOH   111      30.611   3.654  -0.155  1.00 13.18      1BPT 554
+HETATM  448  O   HOH   112      28.828   4.406   1.730  1.00  8.47      1BPT 555
+HETATM  449  O   HOH   113      29.841   4.909   4.406  1.00  7.19      1BPT 556
+HETATM  450  O   HOH   117      21.848  -0.746   5.060  1.00 16.62      1BPT 557
+HETATM  451  O   HOH   119      21.823   5.979 -12.630  1.00 24.84      1BPT 558
+HETATM  452  O   HOH   122      30.511   1.654  10.647  1.00  7.89      1BPT 559
+HETATM  453  O   HOH   138      25.621  15.094   3.493  1.00 23.48      1BPT 560
+HETATM  454  O   HOH   143      26.160   3.139  -5.420  1.00 34.25      1BPT 561
+HETATM  455  O   HOH   145      33.316   1.413  18.097  1.00 25.90      1BPT 562
+HETATM  456  O   HOH   159      28.583  -2.632   3.791  1.00 39.81      1BPT 563
+HETATM  457  O   HOH   160      33.706   3.567  -8.762  1.00 24.53      1BPT 564
+HETATM  458  O   HOH   200      14.641   3.645 -11.171  1.00 43.15      1BPT 565
+HETATM  459  O   HOH   203      36.110   7.939   7.187  1.00 26.10      1BPT 566
+HETATM  460  O   HOH   204      37.573   6.383   4.119  1.00 34.45      1BPT 567
+HETATM  461  O   HOH   205      17.114   1.480   5.071  1.00 25.78      1BPT 568
+HETATM  462  O   HOH   210      29.293  -0.141  -8.753  1.00 31.97      1BPT 569
+HETATM  463  O   HOH   220      38.003  -0.043   7.446  1.00 39.37      1BPT 570
+HETATM  464  O   HOH   310      27.894  18.497  -3.763  1.00 43.65      1BPT 571
+HETATM  465  O   HOH   400      21.958   7.437  -8.430  1.00 19.20      1BPT 572
+HETATM  466  O   HOH   401      31.962   7.677 -12.750  1.00 38.68      1BPT 573
+HETATM  467  O   HOH   402      17.664   6.716 -13.231  1.00 33.38      1BPT 574
+HETATM  468  O   HOH   403      13.919   2.897  -4.054  1.00 16.38      1BPT 575
+HETATM  469  O   HOH   404      21.409  17.309  -9.673  1.00 36.53      1BPT 576
+HETATM  470  O   HOH   405      13.834   0.430  16.064  1.00 32.73      1BPT 577
+HETATM  471  O   HOH   406      28.339  15.647 -11.514  1.00 41.64      1BPT 578
+HETATM  472  O   HOH   407      15.124   9.979  -1.705  1.00 32.62      1BPT 579
+HETATM  473  O   HOH   408      15.714   0.107  18.383  1.00 27.81      1BPT 580
+HETATM  474  O   HOH   409      28.278  14.368 -14.656  1.00 28.15      1BPT 581
+HETATM  475  O   HOH   410      15.105   5.054   0.515  1.00 23.38      1BPT 582
+HETATM  476  O   HOH   411      23.761  -2.671   7.342  1.00 35.33      1BPT 583
+HETATM  477  O   HOH   412      32.010  -0.303  -0.690  1.00 43.06      1BPT 584
+HETATM  478  O   HOH   413      25.967  19.801  -1.055  1.00 37.89      1BPT 585
+HETATM  479  O   HOH   414      35.741   2.717  -2.480  1.00 39.48      1BPT 586
+HETATM  480  O   HOH   415      22.383  20.755   1.804  1.00 31.50      1BPT 587
+HETATM  481  O   HOH   416      26.289  11.963 -15.860  1.00 40.37      1BPT 588
+HETATM  482  O   HOH   417      28.681  15.680  -0.316  1.00 48.97      1BPT 589
+HETATM  483  O   HOH   418      28.804  19.336  -6.207  1.00 36.73      1BPT 590
+CONECT   43   42  433                                                   1BPT 591
+CONECT  110  109  295                                                   1BPT 592
+CONECT  235  234  401                                                   1BPT 593
+CONECT  295  110  294                                                   1BPT 594
+CONECT  401  235  400                                                   1BPT 595
+CONECT  433   43  432                                                   1BPT 596
+CONECT  439  440  441  442  443                                         1BPT 597
+CONECT  440  439                                                        1BPT 598
+CONECT  441  439                                                        1BPT 599
+CONECT  442  439                                                        1BPT 600
+CONECT  443  439                                                        1BPT 601
+MASTER       71    0    1    2    3    0    0    6  482    1   11    5  1BPT 602
+END                                                                     1BPT 603


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/1BPT.pdb
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.0
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.0	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.0	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,21 @@
+'Contig634'=='-503384' (1 7941 321 7620) 19
+'Contig634'=='-503384' (374 7576 540 7410) 5
+'Contig634'=='-503384' (579 7384 783 7180) 23
+'Contig1853'=='+503384' (1691 6406 1905 6620) 11
+'Contig1853'=='+503384' (1933 6630 2201 6897) 31
+'Contig1853'=='+503384' (2277 6963 2312 6998) 0
+'Contig3700'=='-503384' (4083 9434 4794 8723) 79
+'Contig3700'=='-503384' (2161 11335 2753 10744) 53
+'Contig3700'=='-503384' (3198 10310 3753 9759) 47
+'Contig3700'=='-503384' (2839 10666 3169 10338) 32
+'Contig3700'=='-503384' (5241 8482 5355 8368) 4
+'Contig3700'=='-503384' (3622 9890 3753 9759) 9
+'Contig3997'=='-503384' (1546 1704 1968 1282) 22
+'Contig3997'=='-503384' (2060 1205 2498 767) 28
+'Contig3997'=='-503384' (2560 723 2931 351) 22
+'Contig3997'=='-503384' (3052 233 3284 4) 20
+'Contig3997'=='-503384' (1315 1942 1502 1755) 10
+'Contig3997'=='-503384' (1033 2215 1279 1969) 30
+'Contig3997'=='-503384' (865 2374 960 2279) 6
+'Contig3997'=='-503384' (1 3329 165 3164) 26
+'Contig3997'=='-503384' (198 3116 273 3041) 6

Added: trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/503384.MEGABLAST.2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,710 @@
+MEGABLAST 2.2.4 [Aug-26-2002]
+
+
+Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), 
+"A greedy algorithm for aligning DNA sequences", 
+J Comput Biol 2000; 7(1-2):203-14.
+
+Database: cneoA.nt 
+           4935 sequences; 17,206,226 total letters
+
+Searching.......... done
+Query= 503384 11337 bp 2 contigs
+         (11,337 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+Contig3700                                                            785   0.0  
+Contig3997                                                            664   0.0  
+Contig634                                                             486   e-136
+Contig1853                                                            339   1e-91
+
+>Contig3700
+          Length = 5631
+
+ Score =  785 bits (396), Expect = 0.0
+ Identities = 639/718 (88%), Gaps = 12/718 (1%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 8723 gacggcagcccaccgcgccaatcaggttcatccaaattcgtcatcctcgggccgaacatt 8782
+            |||||||||||||| || ||||||||||||||||||||  ||||||||||||||||||||
+Sbjct: 4794 gacggcagcccaccacgtcaatcaggttcatccaaatttatcatcctcgggccgaacatt 4735
+
+                                                                        
+Query: 8783 ggacagtactttgaagtatcaagctcagaaggatccagctgcagttcttcgccttccact 8842
+            ||||||||||||||||||||||||||||||||||||| |||||||||||| |||||||| 
+Sbjct: 4734 ggacagtactttgaagtatcaagctcagaaggatccaactgcagttcttctccttccacg 4675
+
+                                                                        
+Query: 8843 ccgtcgcattcgcctaaccgtctcaatcatcaacactcatttgacacttctaaaagccgt 8902
+            || ||||||||||||||||||||||||||||  |||||||||||| |||| ||  |||||
+Sbjct: 4674 ccatcgcattcgcctaaccgtctcaatcatcggcactcatttgacccttccaacggccgt 4615
+
+                                                                        
+Query: 8903 tcaccctctccgactgaatgtgacgagattgaccgggtagaatattcatctccagccata 8962
+            |||| ||||||||| |||||||||||||||||| |||| ||||||||||||| | |||||
+Sbjct: 4614 tcacgctctccgaccgaatgtgacgagattgactgggtggaatattcatctctatccata 4555
+
+                                                                        
+Query: 8963 gaaatggaagcatgccaaatgtcgggctgggacagtagtgtctccgagttgtcagagggc 9022
+            |  ||||||||||| || ||| || |||||| |||||||||||| || ||||||||||| 
+Sbjct: 4554 gggatggaagcatgtcagatgccgagctgggccagtagtgtctctgaattgtcagagggt 4495
+
+                                                                        
+Query: 9023 gactcggagacagagacttccgtgattattacccctgatagagatattcgggat-g--gt 9079
+            ||||| |||||||||| ||||||||||||||| ||||||||||||||||||||| |  ||
+Sbjct: 4494 gactcagagacagagagttccgtgattattactcctgatagagatattcgggatggtcgt 4435
+
+                                                                        
+Query: 9080 caaagtcagcgcttggatatccgggatatcgtaaggtggaaatacgctttgataagcctt 9139
+            |||||||||||||||||||||| |||||||||||||||| | |||  |||| |||| |||
+Sbjct: 4434 caaagtcagcgcttggatatccaggatatcgtaaggtggcagtacaatttggtaagtctt 4375
+
+                                                                        
+Query: 9140 gttaaacatatgtttcataggcct-tacttctcaaacagacctccatcaaatctgaacat 9198
+            ||||||||||||||||||||| || |||||||||||||||| |||||||||||||||  |
+Sbjct: 4374 gttaaacatatgtttcatagg-ctctacttctcaaacagacttccatcaaatctgaatgt 4316
+
+                                                                        
+Query: 9199 ccggctttgagaccgaatcgcttattatcttgccgtacaacttcgagccatacggtggta 9258
+            | |||||||||||||||||||    |||||| ||| |||||||||||||||||||| || 
+Sbjct: 4315 ctggctttgagaccgaatcgc---ctatcttcccgcacaacttcgagccatacggtagtg 4259
+
+                                                                        
+Query: 9259 cctcgtgactttaatcgtcgctgtagcgaatcttgctctgttacatc-ggcctcaagctc 9317
+            ||| | |||||||||||||||||||||||| || | |||||| |||| || ||| |||||
+Sbjct: 4258 ccttgcgactttaatcgtcgctgtagcgaacctcgttctgttgcatcagg-ctctagctc 4200
+
+                                                                        
+Query: 9318 tagttttcatgcccctcagccgggagagttgaa-ccaacatggagatactgtggcttgtg 9376
+            ||||| | |||||||||||  ||||||    ||  |||||||||||||| ||||||||||
+Sbjct: 4199 tagttctaatgcccctcagttgggaga-cccaagtcaacatggagataccgtggcttgtg 4141
+
+                                                                      
+Query: 9377 aagcttgcggtagaggcacgacaatcactaacgcaaaaaagattatgccttgtcaagt 9434
+            |||||||||||||||||||||| |||||||||||||| ||||||||||||||||||||
+Sbjct: 4140 aagcttgcggtagaggcacgaccatcactaacgcaaagaagattatgccttgtcaagt 4083
+
+
+
+ Score =  753 bits (380), Expect = 0.0
+ Identities = 541/594 (91%), Gaps = 3/594 (0%)
+ Strand = Plus / Minus
+
+                                                                         
+Query: 10744 taccatatttttctccgttctaccgtattctt-cacatgtatctgcatatcaacgtaaca 10802
+             ||||||| |||||||| || ||||||| | || ||||||||||||| |||  | | || |
+Sbjct: 2753  taccatacttttctccattttaccgtactattccacatgtatctgcgtatttatgcaata 2694
+
+                                                                         
+Query: 10803 tacaatttccaagatacaacaggaattcaaacaagaaaaatcagatcctcaaccccgcac 10862
+             ||||||||||||| |||||||||||| ||||||| |||||||||||||||||||||||||
+Sbjct: 2693  tacaatttccaagttacaacaggaatgcaaacaacaaaaatcagatcctcaaccccgcac 2634
+
+                                                                         
+Query: 10863 caaacttctttacacagataaggtttacctcatctctcatttgtctccatattctccacc 10922
+             ||||||||||||||||||||||||| |||||||| |||||||| ||||||||||||||||
+Sbjct: 2633  caaacttctttacacagataaggttcacctcatccctcatttgcctccatattctccacc 2574
+
+                                                                         
+Query: 10923 attcccttccagcctcactcgcaccctcgtttaacccttcgtcctgtgcccacttt-tct 10981
+             |||||||||||||||| ||| ||| ||| ||||| ||||| |||||| |||| ||| |||
+Sbjct: 2573  attcccttccagcctcgctcacactctcatttaatccttcatcctgtcccca-tttctct 2515
+
+                                                                         
+Query: 10982 tttatgatcattctcatccatcgcaacacctcgacgactttctcaactccatccccaacc 11041
+             ||||||||||  |||||||||| |||||||||||||||||||||||||||||| ||||||
+Sbjct: 2514  tttatgatcaccctcatccatctcaacacctcgacgactttctcaactccatctccaacc 2455
+
+                                                                         
+Query: 11042 ctgccctcgccatcattcaaacattttttgagaaatgcagccactttaatagcatcctta 11101
+              ||||||||||||||||||||||||||||||||||||| ||||||||| |||||||||| 
+Sbjct: 2454  ttgccctcgccatcattcaaacattttttgagaaatgcggccactttagtagcatccttg 2395
+
+                                                                         
+Query: 11102 tctgccggcggactatcttctctggactcgatccataattcaatagtcctgagcacatcg 11161
+             ||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||||
+Sbjct: 2394  tctgccggcggactatcttctctggactcgatccataattcaatagttctgagcacatcg 2335
+
+                                                                         
+Query: 11162 ggaagctcggtcgagttcatgagagcgggtttgggcggcctgatgatcttgacactctgt 11221
+             ||||||||||| ||||||||||| ||||||||||| ||| | |||||||||||| | |||
+Sbjct: 2334  ggaagctcggttgagttcatgagggcgggtttggggggcttaatgatcttgacattttgt 2275
+
+                                                                         
+Query: 11222 ctctgctgcggcgggattgaccgtgatgtggagcggcctctgtacgcgccgacacctccc 11281
+             ||||||||||| |||||||||||||||||||| |||||||||||| |||||||| |||||
+Sbjct: 2274  ctctgctgcggtgggattgaccgtgatgtggaacggcctctgtacccgccgacaactccc 2215
+
+                                                                   
+Query: 11282 aacttgacaggtgagacactggtactcgtccctcttgatctggatctatcccca 11335
+             || |||||||| |||||||| |||||||||||||| || ||||||||| |||||
+Sbjct: 2214  aatttgacaggagagacactagtactcgtccctctcgacctggatctaccccca 2161
+
+
+
+ Score =  726 bits (366), Expect = 0.0
+ Identities = 511/558 (91%), Gaps = 8/558 (1%)
+ Strand = Plus / Minus
+
+                                                                         
+Query: 9759  ctaaagatatcaggacatcactccggccattgttgaggccttccttccgagaaattcgtt 9818
+             |||| |||||||||||||||||||||||| |||||||||||| |||||||||||||| ||
+Sbjct: 3753  ctaatgatatcaggacatcactccggccactgttgaggcctttcttccgagaaattcatt 3694
+
+                                                                         
+Query: 9819  atctcaccgtacccttcaaccaatccatattctccttgacagattcgacgggcgtactaa 9878
+             ||||||  ||||||||||||||||||||||||||||||||||||||||||| ||||| ||
+Sbjct: 3693  atctcatagtacccttcaaccaatccatattctccttgacagattcgacggacgtacaaa 3634
+
+                                                                         
+Query: 9879  ggacta-c--ctg-g-tacgcccccttttaaagtctgctgctgtgctgaatactcgcttc 9933
+             |||||| |   || | || ||| || |||| |||||  ||||||||||| ||||| ||||
+Sbjct: 3633  ggactatctggtgtgctatgccgcc-tttagagtctattgctgtgctgattactcacttc 3575
+
+                                                                         
+Query: 9934  gaaaacagtatatcgaagccgcgacaccgcaagccgcacaaactatactcaagacctgtc 9993
+             |||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||
+Sbjct: 3574  gaaaacagtatatcgaagccgcgacaccgcaagcggcacaaactatactcaagacctgtc 3515
+
+                                                                         
+Query: 9994  agaatactttcttgtccggaggagcgctcacttcgggaaggagacggcctgttaccatca 10053
+             |||||| |||| |||||||||||||||||||||||||||||| |||||||||||| ||||
+Sbjct: 3514  agaataatttcatgtccggaggagcgctcacttcgggaaggaaacggcctgttactatca 3455
+
+                                                                         
+Query: 10054 ctcccgtatcgcattcggagctcatagaagaactgcgtccacattcggctcaggaactcc 10113
+             |||||||||| |||||||||||||||||||||||||||||||| ||||| ||||||||||
+Sbjct: 3454  ctcccgtatctcattcggagctcatagaagaactgcgtccacactcggcccaggaactcc 3395
+
+                                                                         
+Query: 10114 attcgctactcgctctttgtcaagctgctatagactcgaattctggatccaaac-aaagt 10172
+             ||||||||||| ||||||||||||||||| | ||||| ||||||||||  |||| ||| |
+Sbjct: 3394  attcgctactcactctttgtcaagctgctgtggactcaaattctggatgtaaactaaa-t 3336
+
+                                                                         
+Query: 10173 acttcaaagtatatcaagtctcgccatgggccatactacgccttgatgtcaacaatgagc 10232
+             ||||||||||||||||||||||||||||| ||||||||||| ||||||||| ||||||||
+Sbjct: 3335  acttcaaagtatatcaagtctcgccatggaccatactacgctttgatgtcagcaatgagc 3276
+
+                                                                         
+Query: 10233 aaaatgaatggggagcaaagtccctcttactgggacatgttctgtaagtggaatttgagt 10292
+             ||| ||||||||||||||||||| ||||||||||||||||||||||||||| ||| ||| 
+Sbjct: 3275  aaattgaatggggagcaaagtccttcttactgggacatgttctgtaagtgggattcgagc 3216
+
+                               
+Query: 10293 atggtcgtagagctcttg 10310
+             ||| ||||||||||||||
+Sbjct: 3215  atgatcgtagagctcttg 3198
+
+
+
+ Score =  400 bits (202), Expect = e-110
+ Identities = 300/332 (90%), Gaps = 4/332 (1%)
+ Strand = Plus / Minus
+
+                                                                         
+Query: 10338 agacgtcgcctctggtaagaaggactttttggacttttcatcaggtccattatggcattc 10397
+             ||||||||||||||||||||| || |||||| ||||| |||||| |||||||||||| ||
+Sbjct: 3169  agacgtcgcctctggtaagaa-gaatttttgaactttccatcagttccattatggcagtc 3111
+
+                                                                         
+Query: 10398 gctgaccacttgaccgctgttcgtgatggggctaacaggagccatctccgcattaagtaa 10457
+             ||||||| | |||||| ||||||||| |||||||| ||||||||||||||||||||||||
+Sbjct: 3110  gctgaccgcatgaccgttgttcgtgacggggctaataggagccatctccgcattaagtaa 3051
+
+                                                                         
+Query: 10458 agcaataactcgccaggcctactttcgttcctcacaaggctcccctgctcgctcatatcc 10517
+             |||||||||||||||| ||||||||| |||||||||| |||| |||||||||||||||||
+Sbjct: 3050  agcaataactcgccagacctactttcattcctcacaaagctcgcctgctcgctcatatcc 2991
+
+                                                                         
+Query: 10518 c---accgacgccataactcttgtctctgaggaagatctgctcgtacgcaataaattact 10574
+             |   |||||||||| |||||  ||||||||||||||| | || |||||||| ||||||||
+Sbjct: 2990  cgccaccgacgccacaactcacgtctctgaggaagatatacttgtacgcaacaaattact 2931
+
+                                                                         
+Query: 10575 gatgttgttcgagaagtgttttggtatgatgccaagagctaattaactcagaggatcact 10634
+             |||||||||||||| |||||| |||||||||||||||||||||||||||||||||| |||
+Sbjct: 2930  gatgttgttcgagaggtgtttcggtatgatgccaagagctaattaactcagaggattact 2871
+
+                                             
+Query: 10635 atatcatcggggtgtacaggaatttgtacttg 10666
+              |||||| ||| |||||||||||| |||||||
+Sbjct: 2870  gtatcattgggatgtacaggaattcgtacttg 2839
+
+
+
+ Score =  196 bits (99), Expect = 1e-48
+ Identities = 112/116 (96%), Gaps = 2/116 (1%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 8368 gaccgggaacgtacgagggaatggagacggcctttgagagcctgttgatgtagtcgtcct 8427
+            ||||||| ||||||||||||||||| ||||||||||||||||||||||||||||||||||
+Sbjct: 5355 gaccggg-acgtacgagggaatggacacggcctttgagagcctgttgatgtagtcgtcct 5297
+
+                                                                    
+Query: 8428 tgtggtcgtcgac-gtactggaggaacttgggatcggccattgctgtgcttgtggg 8482
+            ||||||||||||| |||||||||||||||||||||||||||||||||| |||||||
+Sbjct: 5296 tgtggtcgtcgacagtactggaggaacttgggatcggccattgctgtgtttgtggg 5241
+
+
+>Contig3997
+          Length = 12734
+
+ Score =  664 bits (335), Expect = 0.0
+ Identities = 401/423 (94%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 1282 accttgcgaatgtccaataaacccatcacgacgtcacctctgaccacttccttttcatct 1341
+            |||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||
+Sbjct: 1968 accttgcgaatgtccaataaacccatgacgacgtcacctctgaccacttccttttcatct 1909
+
+                                                                        
+Query: 1342 gttccaaccaccagcgccctccccacaaaactccttcccggcacatacttgcccacatcc 1401
+            || || |||||||||||||| ||||||||||||||||| |||||||||||||||||||||
+Sbjct: 1908 gtaccgaccaccagcgccctgcccacaaaactccttccaggcacatacttgcccacatcc 1849
+
+                                                                        
+Query: 1402 cctcttcccttttcgtccacggcacgtacatccacttgatccaccgcaacagcataaacc 1461
+            ||||||||||||||||||||||||||||||||||||||||||||||| || |||||||||
+Sbjct: 1848 cctcttcccttttcgtccacggcacgtacatccacttgatccaccgccacggcataaacc 1789
+
+                                                                        
+Query: 1462 tgtacgagcacttggttctttcccgtctttcctgggggttttgagtgagagctaaactct 1521
+            ||||||||||||||||||||||||||||| || || |||||||||||||| |||||||||
+Sbjct: 1788 tgtacgagcacttggttctttcccgtcttaccaggaggttttgagtgagaactaaactct 1729
+
+                                                                        
+Query: 1522 acaaggggcgggagctggtcacggaggtgtgcgggggtagcgttgtggtggtcaaatgtg 1581
+            ||||||||||||||||||||||| |||||||||||||||||||| ||||| || ||||||
+Sbjct: 1728 acaaggggcgggagctggtcacgaaggtgtgcgggggtagcgttatggtgatcgaatgtg 1669
+
+                                                                        
+Query: 1582 gaatggagtgtgatggatgccatggtggcgtgtggaagaccaaggatacgtgcgtctagg 1641
+            |||||||||||||||||||||||| ||||||||||||||||||||||||| |||||||||
+Sbjct: 1668 gaatggagtgtgatggatgccatgctggcgtgtggaagaccaaggatacgggcgtctagg 1609
+
+                                                                        
+Query: 1642 tcgggcgctgatttggccgggtttgactgtttgtaagcgcccagaggtaaaatcaagttg 1701
+            || || |||||||| ||||||||||| |||||||||||||||| ||||||||||||||||
+Sbjct: 1608 tcaggtgctgatttagccgggtttgattgtttgtaagcgcccaaaggtaaaatcaagttg 1549
+
+               
+Query: 1702 cta 1704
+            |||
+Sbjct: 1548 cta 1546
+
+
+
+ Score =  648 bits (327), Expect = 0.0
+ Identities = 412/440 (93%), Gaps = 2/440 (0%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 767  actcactttccgccactcttcaatgctctttttgcagtgtcatacaccctttgcccaccc 826
+            ||||||||||||||||||||||||||||||  ||||||||||||||||| ||||||||| 
+Sbjct: 2498 actcactttccgccactcttcaatgctcttcgtgcagtgtcatacacccgttgcccacct 2439
+
+                                                                        
+Query: 827  tgggtgtcaaacacaaagtcatacccattttcctccagattgatcatcactgccgccgga 886
+            || || ||||||||||||||||| ||||||||||||||||||||||||||||||||||| 
+Sbjct: 2438 tgcgtatcaaacacaaagtcatatccattttcctccagattgatcatcactgccgccggc 2379
+
+                                                                        
+Query: 887  gaacccatgagcactcccttggccccattctcaatacacttt-tggtgagcctcatgcga 945
+            ||||||||||||||||||||||||||||| |||||||| ||| |||||||||||||||||
+Sbjct: 2378 gaacccatgagcactcccttggccccattttcaataca-tttctggtgagcctcatgcga 2320
+
+                                                                        
+Query: 946  gtcgtccccaccggggacgatagccgtcacgtatacacctgcgcgggacatgacctgaca 1005
+            ||||||||| || |||| |||||||||||| || ||||||||||| ||||| ||||| ||
+Sbjct: 2319 gtcgtccccgccagggatgatagccgtcacatagacacctgcgcgagacatcacctggca 2260
+
+                                                                        
+Query: 1006 gacgagtgcggcgataccagtatgggcgttgatgataatggcgcggaactggcgattgag 1065
+            |||||||||||||||||||||||||||||||||||  ||||||||||| |||||||||||
+Sbjct: 2259 gacgagtgcggcgataccagtatgggcgttgatgacgatggcgcggaattggcgattgag 2200
+
+                                                                        
+Query: 1066 agagccacgaacgcatcgcgctgcagcgatgccttggagagggagaatcgagagttgttc 1125
+            ||||||||| |||||||||||||||||||||||||||||||||||||| |||||||||||
+Sbjct: 2199 agagccacggacgcatcgcgctgcagcgatgccttggagagggagaatagagagttgttc 2140
+
+                                                                        
+Query: 1126 gagtgtgagaagggtagggaatggtgctcgagagacacgtcgacggtcgacaagaatgta 1185
+            ||||||||||||||||||||||||||| ||||| ||||||||||| || |||||||||||
+Sbjct: 2139 gagtgtgagaagggtagggaatggtgcacgagaaacacgtcgacgatccacaagaatgta 2080
+
+                                
+Query: 1186 ctctgccaatgcgccgctct 1205
+            ||||||||||||||||||||
+Sbjct: 2079 ctctgccaatgcgccgctct 2060
+
+
+
+ Score =  563 bits (284), Expect = e-159
+ Identities = 351/373 (94%), Gaps = 1/373 (0%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 351  aaaaagcagaacactagttgataatcctcacgaccctcacgccacactcatcccatcctt 410
+            ||||||||| |||||| |||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2931 aaaaagcag-acactaattgataatcctcacgaccctcacgccacactcatcccatcctt 2873
+
+                                                                        
+Query: 411  ggcgtttaaagacatctgctcccctttcaaagggcaccactgcgttctcaccttccagca 470
+            | ||||||||||| |||||||||||||||||||||||||||||| |||||||||||| ||
+Sbjct: 2872 gacgtttaaagacctctgctcccctttcaaagggcaccactgcgctctcaccttccaaca 2813
+
+                                                                        
+Query: 471  ctggcctaaaggatgccatacacggttcctccatcacatcgcggcaatccatcccggatg 530
+            |||| ||||||| ||||||||||||||||||||||||||| ||||| ||||||||||| |
+Sbjct: 2812 ctggtctaaaggttgccatacacggttcctccatcacatcacggcagtccatcccggacg 2753
+
+                                                                        
+Query: 531  cgtcaatctctggctcaccagacccgatgggagagacgtattcaaaggcgataaactttg 590
+            ||||||||||||||||||||||||| ||||| ||||| |||||||||| |||||||||||
+Sbjct: 2752 cgtcaatctctggctcaccagacccaatgggggagacatattcaaaggagataaactttg 2693
+
+                                                                        
+Query: 591  agtctttgcgttttgaaccaaacactgttctcagtgtcttgatgcccgacgggcgggaag 650
+            | |||||||||||||||||||||||||||||||||||||||||||| |||||||||||||
+Sbjct: 2692 aatctttgcgttttgaaccaaacactgttctcagtgtcttgatgccggacgggcgggaag 2633
+
+                                                                        
+Query: 651  ccaagtggggcgggacgcggttcagagttgtttcggggcgcttggtcgataccaatctac 710
+            |||| ||||||||||||||||||||||||| |||||| ||||||||||||||||||||||
+Sbjct: 2632 ccaaatggggcgggacgcggttcagagttgcttcgggccgcttggtcgataccaatctac 2573
+
+                         
+Query: 711  agaatgcacattt 723
+            | ||| |||||||
+Sbjct: 2572 aaaatacacattt 2560
+
+
+
+ Score =  299 bits (151), Expect = 1e-79
+ Identities = 214/234 (91%), Gaps = 5/234 (2%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 4    ttgatagatctccagtcatagggcctctagtccggcgcacaagaagtgcaaggtttttag 63
+            |||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||
+Sbjct: 3284 ttgatagatctccagtcatagggcctctggtccggcgcacaagaagtgcaaggtttttag 3225
+
+                                                                        
+Query: 64   ggcgaagctggggtagttctacagggagagaagttgatggggaagcagagctcgggcaag 123
+            |||||||||||| |||||||  |||| ||||||||||||||||||||||||| ||||| |
+Sbjct: 3224 ggcgaagctgggatagttctgtagggggagaagttgatggggaagcagagctggggcagg 3165
+
+                                                                        
+Query: 124  cgggcagtagtatcggacgagattggtgtacaagagagatggatgaaggg-tatgggacg 182
+            ||||| ||||| ||||||||| | |||||||||||||||||||||| ||| |||||||| 
+Sbjct: 3164 cgggcggtagtgtcggacgaggtcggtgtacaagagagatggatga-gggatatgggact 3106
+
+                                                                  
+Query: 183  tttaatgattgatgac-att-gtatcgattttgtacaagtatcttg-gactgtt 233
+            ||||||||||||| |  ||| ||| ||||||||||||||||||||| |||||||
+Sbjct: 3105 tttaatgattgattaagatttgtaacgattttgtacaagtatcttgagactgtt 3052
+
+
+
+ Score =  293 bits (148), Expect = 7e-78
+ Identities = 179/189 (94%), Gaps = 2/189 (1%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 1755 actcacgtgtacccaggagcactttcttgatcccttccatgcgattccatcaacatc-cc 1813
+            ||||||||||||||||||||||| |||||||||||||||||||| |||||||||| | ||
+Sbjct: 1502 actcacgtgtacccaggagcactctcttgatcccttccatgcgactccatcaaca-cacc 1444
+
+                                                                        
+Query: 1814 atcaccatacgaactcccttcccactttgacccgcctgtaaatctatacccagcagcgct 1873
+            |||||||||||||||||||||||||||||||||||||  ||| |||||||||||||||||
+Sbjct: 1443 atcaccatacgaactcccttcccactttgacccgcctccaaacctatacccagcagcgct 1384
+
+                                                                        
+Query: 1874 cttcgacctcttgatcatggacggtaaaggcaattcagccaatccatcatctttttcgcc 1933
+            ||| ||||||||||||||||||||||||||||||||||||||||||||||||| ||| ||
+Sbjct: 1383 ctttgacctcttgatcatggacggtaaaggcaattcagccaatccatcatcttcttcccc 1324
+
+                     
+Query: 1934 tgcctcact 1942
+            |||||||||
+Sbjct: 1323 tgcctcact 1315
+
+
+
+ Score =  252 bits (127), Expect = 2e-65
+ Identities = 220/250 (88%), Gaps = 6/250 (2%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 1969 gaccgtggccgacttcgctcctcgagcctctaaagatgctagactttctcgaatgcgaca 2028
+            |||||||||| ||||| ||| |||||||||| || | ||||||| || ||||||||||||
+Sbjct: 1279 gaccgtggcccacttcactcttcgagcctctgaatacgctagaccttgtcgaatgcgaca 1220
+
+                                                                        
+Query: 2029 aatgtcctcctccattatcgtggtacatggataaccttgagcggctcctctcgggagggg 2088
+            | ||  | || ||||||||||| |||| ||||||||| ||||| |||||||||||||| |
+Sbjct: 1219 actg--c-ccaccattatcgtgatacacggataacctcgagcgactcctctcgggaggtg 1163
+
+                                                                        
+Query: 2089 gtgaaga---gcgatagagacgttcagagtcttggggggtcatctctggaaggatggtga 2145
+            |||||||   |||||||||||||||||||||||| || ||||| ||||||||||||||||
+Sbjct: 1162 gtgaagaggggcgatagagacgttcagagtcttgcggcgtcatttctggaaggatggtga 1103
+
+                                                                        
+Query: 2146 ggatgtcggggtccaggaaggaaggcggttcaaaccctgcccgcggagttgttggacgtg 2205
+            |||||||||||||||||||||| || ||||| ||||||||||| || |||||||| ||||
+Sbjct: 1102 ggatgtcggggtccaggaaggaggggggttcgaaccctgcccggggggttgttgggcgtg 1043
+
+                      
+Query: 2206 tagggatagg 2215
+            ||||||||||
+Sbjct: 1042 tagggatagg 1033
+
+
+
+ Score =  143 bits (72), Expect = 1e-32
+ Identities = 90/96 (93%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 2279 tgagggagatgtaggacgagagtattgagagagacggccaggtgatgacgatgatggggg 2338
+            |||||||||||||||||||||||||||||||||||| | ||| |||||| |||| |||||
+Sbjct: 960  tgagggagatgtaggacgagagtattgagagagacgacaaggcgatgacaatgacggggg 901
+
+                                                
+Query: 2339 taaaggcgatggggtccttggcggatagacgggagc 2374
+            |||||||||||||||||||||||| |||||||||||
+Sbjct: 900  taaaggcgatggggtccttggcgggtagacgggagc 865
+
+
+
+ Score =  121 bits (61), Expect = 5e-26
+ Identities = 142/168 (84%), Gaps = 5/168 (2%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 3164 ttcgtcatctgagcctgcttggcctgatgcactctccatatcctcgtcgtcctttgaagg 3223
+            |||||| ||||||||||||||||||||||||||||||||||| ||||| ||||| | |||
+Sbjct: 165  ttcgtcgtctgagcctgcttggcctgatgcactctccatatcgtcgtcatccttgggagg 106
+
+                                                                        
+Query: 3224 aagaggcggcggctgttccg-ccgtgttgcgaggggggttgattactctaggtgcaagag 3282
+            |||||| || || | ||||  || |||||||||||||| |||| || ||| |||| || |
+Sbjct: 105  aagaggtggaggtt-ttcccaccatgttgcgaggggggatgatcaccctaagtgccag-g 48
+
+                                                            
+Query: 3283 -tgggacgagaggtggtatgcggtaggtggtcgggatggagggcggat 3329
+             |||| | ||||||||||| |||||| ||||| ||||| |||| ||||
+Sbjct: 47   ctggggccagaggtggtat-cggtagatggtctggatgaagggaggat 1
+
+
+
+ Score =  103 bits (52), Expect = 1e-20
+ Identities = 70/76 (92%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 3041 gtgcgaacgccgcgacgaggagcgaatggactgtgctctgctacaaggccgggaagacga 3100
+            |||||| |||||||| |||||||||||||||||||||||||||||||||| ||| || ||
+Sbjct: 273  gtgcgagcgccgcgatgaggagcgaatggactgtgctctgctacaaggcctggaggaaga 214
+
+                            
+Query: 3101 tttgggagtgcgtgag 3116
+            ||||||||||| ||||
+Sbjct: 213  tttgggagtgcatgag 198
+
+
+>Contig634
+          Length = 858
+
+ Score =  486 bits (245), Expect = e-136
+ Identities = 304/323 (94%), Gaps = 3/323 (0%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 7620 tatatatatacgtaccagaaatacacatgcagtcaacaccggcgaaaaacttgtcctttt 7679
+            ||||||| |||||||||||||| ||||||||||||||||||||||| |||||||||||||
+Sbjct: 321  tatatat-tacgtaccagaaatgcacatgcagtcaacaccggcgaagaacttgtcctttt 263
+
+                                                                        
+Query: 7680 cagattcaataaacttgtccaggttgacagaaccattctcctccataccctcaaaacaga 7739
+            ||||||||||||||||||| ||||||||||||||||||||||||||||||||||||||||
+Sbjct: 262  cagattcaataaacttgtcaaggttgacagaaccattctcctccataccctcaaaacaga 203
+
+                                                                        
+Query: 7740 ctttgaggttgacaggaagctcgataccaagcttcttgtgcgcctcgaggacgtttaacc 7799
+            ||||||| |||||||||||||| |||||||||||||| ||||||||||| ||||| ||||
+Sbjct: 202  ctttgagattgacaggaagctcaataccaagcttcttatgcgcctcgagaacgttcaacc 143
+
+                                                                        
+Query: 7800 agcccatgactggacccttgtcatcggttgagccacggccgtaaagtcggccactgccgt 7859
+            |||||||||| |||||||||||||| ||||||||||||||||| ||||||||||||||||
+Sbjct: 142  agcccatgacaggacccttgtcatccgttgagccacggccgtagagtcggccactgccgt 83
+
+                                                                        
+Query: 7860 tgggatcgggggtgagctcaaagggagggtagagccagccgtcttcgagtaga-gcgggt 7918
+            ||||||| |||||||||||||| |||||||||||||||||||||||||| | | ||||||
+Sbjct: 82   tgggatccggggtgagctcaaacggagggtagagccagccgtcttcgagca-atgcgggt 24
+
+                                   
+Query: 7919 tggacatcgtagtggccatagac 7941
+            ||||| ||||||||||| |||||
+Sbjct: 23   tggacgtcgtagtggccgtagac 1
+
+
+
+ Score =  291 bits (147), Expect = 3e-77
+ Identities = 162/167 (97%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 7410 cttactgagagcgataagatccgtcatcggttcgtggacggtaccgccaaagacaccaga 7469
+            |||||||||||||| ||| |||||||||||||||||||||||||||||||||||||||||
+Sbjct: 540  cttactgagagcgacaaggtccgtcatcggttcgtggacggtaccgccaaagacaccaga 481
+
+                                                                        
+Query: 7470 gtgaagatccctgtcaggcccagaaatcttgatctcgtaatagttgatacctctgagacc 7529
+            ||| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 480  gtggagatccctgtcaggcccagaaatcttgatctcgtaatagttgatacctctgagacc 421
+
+                                                           
+Query: 7530 ataagtgagacagggggtcttggtgtcaagccagtaattgtctatat 7576
+            |||||||||||| ||||||||||||||||||||||| ||||||||||
+Sbjct: 420  ataagtgagacacggggtcttggtgtcaagccagtagttgtctatat 374
+
+
+
+ Score =  224 bits (113), Expect = 5e-57
+ Identities = 183/206 (88%), Gaps = 2/206 (0%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 7180 aaacccacagtcgtaacagtgtcgtcggagatggtgacctcaccaccaacagc-ggcatg 7238
+            |||| ||||||| |||| || ||| | || |||||||| |||||||| |||||  |||||
+Sbjct: 783  aaactcacagtcttaacggtatcgcccgatatggtgacttcaccaccgacagcatgcatg 724
+
+                                                                        
+Query: 7239 aatatcttccatctggaagtggatagcctcaaacttggctcgctcatcgtcggtgacagg 7298
+            |||||||| ||||| ||| |||||||| ||||||||||||||||||||||||||||||||
+Sbjct: 723  aatatcttgcatctcgaaatggatagcttcaaacttggctcgctcatcgtcggtgacagg 664
+
+                                                                        
+Query: 7299 cgcgatgaggtccttgataccggtaacgaggatctggccgtcgggagtgacaagcttgga 7358
+             |||||||||||||||| |||| ||||||||||||||||||||||||| |||||| ||||
+Sbjct: 663  ggcgatgaggtccttgacaccgctaacgaggatctggccgtcgggagtaacaagcctgga 604
+
+                                      
+Query: 7359 cactatatataacactcgtcagctct 7384
+            ||||| ||| |||||| |||||||||
+Sbjct: 603  cactagata-aacacttgtcagctct 579
+
+
+>Contig1853
+          Length = 2314
+
+ Score =  339 bits (171), Expect = 1e-91
+ Identities = 204/215 (94%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 6406 aaatactcactcggatgtagttgtcagtgtcaatcttctcgttggtggaatgagctccgt 6465
+            |||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 1691 aaatacgcactcggatgtagttgtcagtgtcaatcttctcgttggtggaatgagctccgt 1750
+
+                                                                        
+Query: 6466 catcaccccttccgacaggtaacaaaaggacgttgaggttgagaatgttggcaaagtcga 6525
+            ||||||||||||||||||| ||||  ||||| ||||||||||||||||||||||||||||
+Sbjct: 1751 catcaccccttccgacaggcaacaggaggacattgaggttgagaatgttggcaaagtcga 1810
+
+                                                                        
+Query: 6526 gggtgacagggatagaaccgccttcacgggtgtagtcgggcacttggccgtaaacagact 6585
+            | ||||||||||| |||||||||||||||||||| || |||||||||||||||||||| |
+Sbjct: 1811 gagtgacagggatggaaccgccttcacgggtgtaatcaggcacttggccgtaaacagatt 1870
+
+                                               
+Query: 6586 cggtagccttgtgcgccgctcgataagagtagtgc 6620
+            ||||||||||||||||||| |||||||||||||||
+Sbjct: 1871 cggtagccttgtgcgccgcacgataagagtagtgc 1905
+
+
+
+ Score =  285 bits (144), Expect = 2e-75
+ Identities = 243/274 (88%), Gaps = 11/274 (4%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 6630 ggtcagtctttatcatccacttgcatt-gttgccaaccgtgctcacattggggtcagcct 6688
+            ||||||||||||| || ||||||||||  || |||| | ||||||| |||||||||||||
+Sbjct: 1933 ggtcagtctttat-at-cacttgcattattttccaatcatgctcacgttggggtcagcct 1990
+
+                                                                        
+Query: 6689 gaaag-aaaacgattgtcagcc---tctttacattcatcaa-cactcgccgggggcataa 6743
+            ||||| ||||| ||||||||||   | |||||||||||||| || || || |||| |  |
+Sbjct: 1991 gaaagcaaaaccattgtcagccttttttttacattcatcaacca-tc-cc-ggggaacca 2047
+
+                                                                        
+Query: 6744 ctcacaatccaaggctcaccaccgtgagtgagatacacctccatcttgttcttggaacca 6803
+            |||||||||||||||||||| || ||||| ||||||||||||||||||||||||||||||
+Sbjct: 2048 ctcacaatccaaggctcaccgccatgagtaagatacacctccatcttgttcttggaacca 2107
+
+                                                                        
+Query: 6804 agttttttgaattcttcctcaacgtacttgacgacgaggtcagtcacactggcaacagtg 6863
+            ||||| ||||| ||||||||||||||||||||||| ||||| || || ||||| ||||||
+Sbjct: 2108 agtttcttgaactcttcctcaacgtacttgacgacaaggtcggtgacgctggcgacagtg 2167
+
+                                              
+Query: 6864 aggttgggtacaagacggatagagaatttgccta 6897
+            ||||||||||||||||||||||||||||||||||
+Sbjct: 2168 aggttgggtacaagacggatagagaatttgccta 2201
+
+
+
+ Score = 71.9 bits (36), Expect = 5e-11
+ Identities = 36/36 (100%)
+ Strand = Plus / Plus
+
+                                                
+Query: 6963 acccttgacacagcagggaatcacagtcttggaacc 6998
+            ||||||||||||||||||||||||||||||||||||
+Sbjct: 2277 acccttgacacagcagggaatcacagtcttggaacc 2312
+
+
+  Database: cneoA.nt
+    Posted date:  Dec 12, 2002  3:26 PM
+  Number of letters in database: 17,206,226
+  Number of sequences in database:  4935
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 0, Extension: 0
+Number of Hits to DB: 9213
+Number of Sequences: 4935
+Number of extensions: 21
+Number of successful extensions: 21
+Number of sequences better than 1.0e-01: 4
+length of query: 11337
+length of database: 17,206,226
+effective HSP length: 19
+effective length of query: 11318
+effective length of database: 17,112,461
+effective search space: 193678833598
+effective search space used:        0
+S2: 21 (42.1 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/5X_1895.FASTXY
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/5X_1895.FASTXY	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/5X_1895.FASTXY	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1487 @@
+ FASTXY compares a DNA sequence to a protein sequence data bank
+ version 3.4t07 Nov 21, 2001
+Please cite:
+ Pearson et al, Genomics (1997) 46:24-36
+
+ 5X_1895.fa, 7972 aa
+ vs /home/jason/genomes/S_cerevisea/pep/yeast_nrpep.fasta library
+
+       opt      E()
+< 20    53     0:====
+  22     0     0:           one = represents 17 library sequences
+  24     0     0:
+  26     0     0:
+  28     0     2:*
+  30     4    13:*
+  32    11    49:= *
+  34    55   132:====   *
+  36   202   270:============   *
+  38   373   447:======================    *
+  40   619   623:====================================*
+  42   895   762:============================================*========
+  44  1008   840:=================================================*==========
+  46  1000   856:==================================================*========
+  48   915   819:================================================*=====
+  50   820   748:===========================================*=====
+  52   694   657:======================================*==
+  54   552   562:=================================*
+  56   428   469:========================== *
+  58   381   385:======================*
+  60   279   312:================= *
+  62   194   250:============  *
+  64   163   199:========== *
+  66   117   157:=======  *
+  68    62   124:====   *
+  70    50    97:===  *
+  72    36    76:=== *
+  74    26    59:== *
+  76    17    46:= *
+  78    26    36:==*
+  80    18    28:=*
+  82    11    21:=*
+  84    25    17:*=
+  86    19    13:*=
+  88    11    10:*          inset = represents 1 library sequences
+  90    11     8:*
+  92     9     6:*         :=====*===
+  94     9     5:*         :====*====
+  96    13     4:*         :===*=========
+  98     5     3:*         :==*==
+ 100     6     2:*         :=*====
+ 102    13     2:*         :=*===========
+ 104     5     1:*         :*====
+ 106     5     1:*         :*====
+ 108     4     1:*         :*===
+ 110     1     1:*         :*
+ 112     0     0:          *
+ 114     4     0:=         *====
+ 116     1     0:=         *=
+ 118     5     0:=         *=====
+>120    32     0:==        *================================
+4215311 residues in  9190 sequences
+  Expectation_n fit: rho(ln(x))= 4.0381+/-0.00065; mu= 23.8752+/- 0.039
+ mean_var=56.2146+/-11.145, 0's: 53 Z-trim: 75  B-trim: 416 in 2/58
+ Lambda= 0.1711
+ Kolmogorov-Smirnov  statistic: 0.0414 (N=29) at  52
+
+FASTY (3.42 Sept 2001) function [optimized, BL50 matrix (15:-5)] ktup: 2
+ join: 62, opt: 62, gap-pen: -14/ -2 shift: -20, subs: -20 width:  16
+The best scores are:                                       opt bits E(9190)
+NR_SC:SW-YNN2_YEAST SW:YNN2_YEAST P53914 sacch (1056) [f] 2172  547 1.6e-154
+NR_SC:SW-MPCP_YEAST SW:MPCP_YEAST P23641 sacch ( 311) [r]  432  117 1.3e-25
+NR_SC:SW-YEO3_YEAST SW:YEO3_YEAST P40035 sacch ( 300) [r]  154   48 5.7e-05
+NR_SC:SW-RIM2_YEAST SW:RIM2_YEAST P38127 sacch ( 377) [r]  134   44   0.002
+NR_SC:SW-Q05330 SW:Q05330 Q05330 saccharomyces ( 302) [f]  122   41   0.014
+NR_SC:SW-AMYH_YEAST SW:AMYH_YEAST P08640 sacch (1367) [f]  128   43   0.014
+NR_SC:SW-PET8_YEAST SW:PET8_YEAST P38921 sacch ( 284) [r]  121   40   0.016
+NR_SC:SW-AGA1_YEAST SW:AGA1_YEAST P32323 sacch ( 725) [f]  124   41   0.018
+NR_SC:SW-PMT_YEAST SW:PMT_YEAST P32332 sacchar ( 324) [r]  120   40    0.02
+NR_SC:GP-CAA81388_1 gi|439289|emb|CAA81388.1 ( ( 751) [r]  115   39   0.085
+NR_SC:SW-Q07229 SW:Q07229 Q07229 saccharomyces ( 817) [r]  115   39    0.09
+NR_SC:SW-VRP1_YEAST SW:VRP1_YEAST P37370 sacch ( 817) [r]  115   39    0.09
+NR_SC:SW-Q12215 SW:Q12215 Q12215 saccharomyces ( 556) [f]  112   38    0.12
+NR_SC:SW-YH17_YEAST SW:YH17_YEAST P38898 sacch ( 153) [r]  106   36    0.13
+NR_SC:GP-CAB58511_1 gi|6064291|emb|CAB58511.1  ( 894) [f]  113   39    0.13
+NR_SC:SW-YN23_YEAST SW:YN23_YEAST P53832 sacch ( 503) [f]  110   38    0.15
+NR_SC:SW-YHC8_YEAST SW:YHC8_YEAST P38739 sacch ( 605) [f]  110   38    0.17
+NR_SC:SW-FLO1_YEAST SW:FLO1_YEAST P32768 sacch (1537) [f]  113   39     0.2
+NR_SC:PIR-S53465 PIR:S53465 flocculation prote (1537) [f]  113   39     0.2
+NR_SC:SW-Q06143 SW:Q06143 Q06143 saccharomyces ( 298) [r]  105   36    0.25
+NR_SC:PIR-S58652 PIR:S58652 hypothetical prote ( 216) [r]  103   36    0.28
+NR_SC:SW-YKT9_YEAST SW:YKT9_YEAST P36045 sacch ( 187) [f]  102   35     0.3
+NR_SC:SW-YAG3_YEAST SW:YAG3_YEAST P39712 sacch (1322) [f]  109   38    0.35
+NR_SC:PIR-S51959 PIR:S51959 hypothetical prote (1367) [f]  109   38    0.36
+NR_SC:SW-Q12444 SW:Q12444 Q12444 saccharomyces ( 126) [f]   99   34    0.38
+NR_SC:SW-TOA1_YEAST SW:TOA1_YEAST P32773 sacch ( 286) [r]  102   36     0.4
+NR_SC:SW-SIM1_YEAST SW:SIM1_YEAST P40472 sacch ( 475) [f]  104   36    0.41
+NR_SC:GP-CAA47602_1 gi|4824|emb|CAA47602.1 (X6 ( 307) [r]  102   36    0.42
+NR_SC:SW-YMC1_YEAST SW:YMC1_YEAST P32331 sacch ( 307) [r]  102   36    0.42
+NR_SC:SW-YG5F_YEAST SW:YG5F_YEAST P53320 sacch ( 366) [r]  102   36    0.48
+NR_SC:SW-TIR3_YEAST SW:TIR3_YEAST P40552 sacch ( 269) [f]  100   35    0.54
+NR_SC:SW-Q08428 SW:Q08428 Q08428 saccharomyces ( 113) [f]   96   34    0.59
+NR_SC:SW-FLO5_YEAST SW:FLO5_YEAST P38894 sacch (1075) [f]  104   37    0.71
+NR_SC:SW-YK82_YEAST SW:YK82_YEAST P36170 sacch (1169) [f]  104   37    0.76
+NR_SC:SW-YOD0_YEAST SW:YOD0_YEAST Q08193 sacch ( 484) [f]  100   35    0.82
+NR_SC:SW-CW14_YEAST SW:CW14_YEAST O13547 sacch ( 238) [f]   97   34    0.84
+NR_SC:SW-TIR1_YEAST SW:TIR1_YEAST P10863 sacch ( 254) [r]   97   34    0.87
+NR_SC:SW-Q08873 SW:Q08873 Q08873 saccharomyces ( 200) [f]   95   34       1
+NR_SC:SW-TIR2_YEAST SW:TIR2_YEAST P33890 sacch ( 251) [f]   95   34     1.2
+NR_SC:SW-CBF5_YEAST SW:CBF5_YEAST P33322 sacch ( 483) [f]   97   35     1.4
+NR_SC:SW-KRE1_YEAST SW:KRE1_YEAST P17260 sacch ( 313) [f]   95   34     1.4
+NR_SC:GP-CAA52447_1 gi|396560|emb|CAA52447.1 ( ( 250) [f]   94   34     1.4
+NR_SC:GP-CAA81388_1 gi|439289|emb|CAA81388.1 ( ( 751) [f]   98   35     1.6
+NR_SC:SW-VRP1_YEAST SW:VRP1_YEAST P37370 sacch ( 817) [f]   98   35     1.7
+NR_SC:SW-Q07229 SW:Q07229 Q07229 saccharomyces ( 817) [f]   98   35     1.7
+NR_SC:SW-Q12218 SW:Q12218 Q12218 saccharomyces ( 487) [f]   94   34     2.3
+NR_SC:GP-AAA35091_1 gi|295671|gb|AAA35091.1 (L ( 406) [f]   92   33     2.8
+NR_SC:SW-SR40_YEAST SW:SR40_YEAST P32583 sacch ( 406) [f]   92   33     2.8
+NR_SC:SW-TIP1_YEAST SW:TIP1_YEAST P27654 sacch ( 210) [f]   89   32       3
+NR_SC:SW-O94086 SW:O94086 O94086 saccharomyces ( 168) [f]   88   32     3.1
+NR_SC:SW-YM8Z_YEAST SW:YM8Z_YEAST Q04951 sacch ( 389) [f]   91   33     3.3
+NR_SC:SW-YGC8_YEAST SW:YGC8_YEAST P53189 sacch ( 542) [f]   92   33     3.5
+NR_SC:SW-YG1F_YEAST SW:YG1F_YEAST P53214 sacch ( 551) [f]   92   33     3.5
+NR_SC:GP-AAA35015_1 gi|172526|gb|AAA35015.1 (M ( 570) [f]   92   33     3.6
+NR_SC:SW-HRB1_YEAST SW:HRB1_YEAST P38922 sacch ( 429) [f]   90   33     4.2
+NR_SC:SW-MID2_YEAST SW:MID2_YEAST P36027 sacch ( 376) [f]   89   33     4.5
+NR_SC:SW-Q08438 SW:Q08438 Q08438 saccharomyces ( 674) [r]   91   33     4.8
+NR_SC:SW-CCC1_YEAST SW:CCC1_YEAST P47818 sacch ( 322) [f]   88   32     4.8
+
+
+>>NR_SC:SW-YNN2_YEAST SW:YNN2_YEAST P53914 saccharomyces  (1056 aa)
+ initn: 3325 init1: 1027 opt: 2172  Z-score: 2877.6  bits: 547.0 E(): 1.6e-154
+Smith-Waterman score: 3401;  51.588% identity (58.124% ungapped) in 1165 aa overlap (2180-5623:3-1053)
+
+    2180      2210      2240      2270      2300      2330         
+5X_189 RKQLDPRIPALINNGVKANHRSFFVMVGDKGRDQVCPGMQAAMRFD*HRCR/LVNLHFLL
+       .: .: :::.:: :::....::.::.:::..:.:                  : :::.:.
+NR_SC: KKAIDSRIPSLIRNGVQTKQRSIFVIVGDRARNQ------------------LPNLHYLM
+             10        20        30                          40    
+
+     2360      2390       2420      2450      2480      2510       
+5X_189 SQARVSSRPSVLWCYKKD-LGFTT*VAASENLQQTIYFRPIATSHRKKREAKIKRDVKRG
+        .: ..   :::: :::  ::::                    ::::::: :::...:::
+NR_SC: MSADLKMNKSVLWAYKKKLLGFT--------------------SHRKKRENKIKKEIKRG
+           50        60                            70        80    
+
+      2540      2570      2600      2630      2660      2690       
+5X_189 IRDANEQDPFELFVTVTDIRYTYYKDSAKILGQTFGMLVLQDYEAITPNLLARTIETVEG
+        :..::.:::: :..  .:::.:::.: ::::.:.:: .:::.::.::::::::::::::
+NR_SC: TREVNEMDPFESFISNQNIRYVYYKESEKILGNTYGMCILQDFEALTPNLLARTIETVEG
+           90       100       110       120       130       140    
+
+      2720      2750       2780      2810      2840      2870      
+5X_189 GGIVVLLLKTMSSLKQLYAMAM/DKL*CRDGVE*SDFS*LLI*DVHSRYRTDAHQFVQPR
+       :::::.:::.::::::::.:.: :                    ::.::::.::  :  :
+NR_SC: GGIVVILLKSMSSLKQLYTMTM-D--------------------VHARYRTEAHGDVVAR
+          150       160                            170       180   
+
+       2900      2930      2960      2990      3020      3050      
+5X_189 FNERFILSLGSNPDCLVLDDELNVLPLSKGKDIQIGKAGEEDDRGRKRKAEELKEMKENL
+       :::::::::::::.:::.:::::::::: .:...     :.:.   :.   ::.:.::.:
+NR_SC: FNERFILSLGSNPNCLVVDDELNVLPLSGAKNVKPLPPKEDDELPPKQL--ELQELKESL
+           190       200       210       220       230         240 
+
+       3080      3110      3140      3170      3200      3230      
+5X_189 EGVDIVGSLAKLAKTVDQAKAILTFVEAISEKNLSSTVALTAGRGRGKSAALGLAIGAAL
+       : :. .:::..:.:::.::.:::.:..:::::.:. :::::::::::::::::..:.::.
+NR_SC: EDVQPAGSLVSLSKTVNQAHAILSFIDAISEKTLNFTVALTAGRGRGKSAALGISIAAAV
+             250       260       270       280       290       300 
+
+       3260      3290      3320      3350      3380      3410      
+5X_189 AHDYSNIFVTSPDPENLKTLFEFVFKALDALGYEEHIDYDVVQSTNPDFKKAIVRVNIFR
+       .: :::::::::.::::::::::.::..:::::.::::::..:::::::.::::::.: :
+NR_SC: SHGYSNIFVTSPSPENLKTLFEFIFKGFDALGYQEHIDYDIIQSTNPDFNKAIVRVDIKR
+             310       320       330       340       350       360 
+
+       3440      3470      3500      3530      3560      3590      
+5X_189 GHRQTIQYISPEDSHVLGQAELVIIDEAAAIPLPLVRKLIGPYLVFMASTINGYEGTGRS
+        :::::::: :.: .::::::::.::::::::::.:..:.::::::::::::::::::::
+NR_SC: DHRQTIQYIVPQDHQVLGQAELVVIDEAAAIPLPIVKNLLGPYLVFMASTINGYEGTGRS
+             370       380       390       400       410       420 
+
+       3620      3650      3680      3710      3740      3770      
+5X_189 LSIKLIQQLREQTRPSITKDSENAAASSAGSSSKAAAAGRSGAGLVRSLREIKLDEPIRY
+       ::.:::::::.:.  :  .....:..:  .. . .   ..:     :.::::.:::::::
+NR_SC: LSLKLIQQLRNQNNTSGRESTQTAVVSRDNKEKDSHLHSQS-----RQLREISLDEPIRY
+             430       440       450       460            470      
+
+       3800      3830         3860      3890      3920      3950   
+5X_189 SPGDNVEKWLNNLLCLDATIVSK---SIQGCPHPSKCELYYVNRDTLFSYHPASEVFLQR
+       .::: .:::::.:::::.:....   . .: ::::.:.:. :::::::::::.:: ::..
+NR_SC: APGDPIEKWLNKLLCLDVTLIKNPRFATRGTPHPSQCNLFVVNRDTLFSYHPVSENFLEK
+        480       490       500       510       520       530      
+
+          3980      4010      4040      4070       4100      4130  
+5X_189 MMALYVASHYKNSPNDLQMLSDAPAHHLFVLLPPIDEND-NTLPDPLVVLQVALEGNISR
+       ::::::.:::::::::::..::::::.::::::::: .: . .:::: :.:.::::.::.
+NR_SC: MMALYVSSHYKNSPNDLQLMSDAPAHKLFVLLPPIDPKDGGRIPDPLCVIQIALEGEISK
+        540       550       560       570       580       590      
+
+           4160      4190      4220      4250      4280      4310  
+5X_189 EAILKEMAQSGMRSSGDMIPWIISTQFQDNDFATLSGARVVRIATHPDYARMGYGSRAME
+       :.. . ... :.:..::.:::.:: ::::..::.:::::.:::::.:.:: :::::::.:
+NR_SC: ESVRNSLSR-GQRAGGDLIPWLISQQFQDEEFASLSGARIVRIATNPEYASMGYGSRAIE
+        600        610       620       630       640       650     
+
+           4340      4370       4400      4430      4460      4490 
+5X_189 ALESFYNGTSYNFDDVPVDMGESFAD\VPRSDL*VTSFIPFPQNRTSTECVSQNANLQND
+        :.....:         .::.:   : :  .:  .        .:.: . .... :: .:
+NR_SC: LLRDYFEGKF-------TDMSE---D-VRPKDYSI--------KRVSDKELAKT-NLLKD
+         660              670                   680       690      
+
+            4520      4550      4580      4610      4640      4670 
+5X_189 TIAIRDPSRMPPLLQRLSERKPETLDYLGVSFGLTRDLLRFWKKGGFTPLYASQKENALT
+        . .:: . .:::: .:::. :. : :::::.:::..: .:::...:.:.:  :  : ::
+NR_SC: DVKLRDAKTLPPLLLKLSEQPPHYLHYLGVSYGLTQSLHKFWKNNSFVPVYLRQTANDLT
+         700       710       720       730       740       750     
+
+            4700      4730      4760      4790      4820      4850 
+5X_189 GEYTFVMLKVLASAGGGGEWLGAFAQGMSCLLLQDEVHMGND*RL*TDFRQRFMNLLSYE
+       ::.: :::.::   :  ..::  ::.                     :::.::..::::.
+NR_SC: GEHTCVMLNVLE--GRESNWLVEFAK---------------------DFRKRFLSLLSYD
+         760         770                            780       790  
+
+            4880      4910          4940      4970      5000       
+5X_189 AFKKFDASIALSILESTVPRNSPSPAP----KLLTNTELSSLLTPFDIKRLESYADSMLD
+        :.:: :  :::..::.   .. :       : :: :.:.....:::.:::.::....::
+NR_SC: -FHKFTAVQALSVIESSKKAQDLSDDEKHDNKELTRTHLDDIFSPFDLKRLDSYSNNLLD
+             800       810       820       830       840       850 
+
+      5030      5060      5090        5120      5150      5180     
+5X_189 YHVVLDLVPTIASLFFGKRLETS--LPPAQQAILLALGLQRKNVEALENELGITSTQTLA
+       :::. :..: .: :.:: ..  :  :  .:.:::::.::::::.... .::.. :.::.:
+NR_SC: YHVIGDMIPMLALLYFGDKMGDSVKLSSVQSAILLAIGLQRKNIDTIAKELNLPSNQTIA
+             860       870       880       890       900       910 
+
+        5210      5240      5270           5300      5330      5360
+5X_189 LFGKVLRKMTKSLEDIRKASIASELP-----AEPTLAGRSANGSNKFVALQQTIEQDLAD
+       .:.:..:::.. .... . ::   ::     :   . :.  .. :   ::.: .:.:: .
+NR_SC: MFAKIMRKMSQYFRQLLSQSIEETLPNIKDDAIAEMDGEEIKNYNAAEALDQ-MEEDLEE
+             920       930       940       950       960        970
+
+             5390      5420      5450       5480      5510         
+5X_189 SAVQLNGEDDDASKKEQRELLNTLNMEEFAI-DQGGDWTEAEKQVERLASGKGGTRLSST
+       ..    .:  .: ...:.::.:.::....:: :.. .:.:..:..:  :..:: . :.. 
+NR_SC: AG----SEAVQAMREKQKELINSLNLDKYAINDNSEEWAESQKSLEIAAKAKGVVSLKTG
+                  980       990      1000      1010      1020      
+
+    5540       5570      5600       
+5X_189 VSVKVDKLDD\AKRRRRRARMRVPRMRRR
+        .  ..: .:   :.. .: :. ::  ..
+NR_SC: KKRTTEKAED-IYRQEMKA-MKKPRKSKK
+       1030       1040       1050   
+
+>>NR_SC:SW-MPCP_YEAST SW:MPCP_YEAST P23641 saccharomyces  (311 aa)
+rev-comp initn: 1094 init1: 429 opt: 432  Z-score: 563.4  bits: 117.1 E(): 1.3e-25
+Smith-Waterman score: 964;  54.242% identity (66.543% ungapped) in 330 aa overlap (7938-6947:19-287)
+
+      7930      7900      7870      7840      7810      7780       
+5X_18- RFALAGALGCAVTHGALTPVDVVKTRIQLEPEVYNRVGRFFNSS*GF*EL*GVVLMSQT\
+       .::::::.::. ::....:.::::::::::: :::                         
+NR_SC: KFALAGAIGCGSTHSSMVPIDVVKTRIQLEPTVYN-------------------------
+       20        30        40        50                            
+
+       7750      7720      7690      7660      7630      7600      
+5X_18- KGMVASFRQIIAKEGAGALLTGFGPTAVGYAIQGAFKFGG*VMMSLQITA*SRANLPISY
+       ::::.::.:::: ::::::::::::: .::.:::::::::                   :
+NR_SC: KGMVGSFKQIIAGEGAGALLTGFGPTLLGYSIQGAFKFGG-------------------Y
+            60        70        80        90                       
+
+       7570      7540      7510      7480      7450      7420      
+5X_18- EFWKKKAIDLVGVDKARENRQAIYLGASAIAEFFADIALCPLEATRIRLVSQPSFANGLS
+       : .::  :: .: : : . ....:.:..:.:::.:::::::::::::::::::.::::: 
+NR_SC: EVFKKFFIDNLGYDTASRYKNSVYMGSAAMAEFLADIALCPLEATRIRLVSQPQFANGLV
+          100       110       120       130       140       150    
+
+       7390      7360      7330      7300       7270      7240     
+5X_18- GGFLRILREEGPAAFYAGFGPILFKQVPYTMAKFAV*VDRTA*\YQTFG*YYRSYEVAVE
+       ::: :::.::: ..::.:: ::::::.::..::: :    .   :  :.           
+NR_SC: GGFSRILKEEGIGSFYSGFTPILFKQIPYNIAKFLVFERASEF-YYGFAG----------
+          160       170       180       190        200             
+
+        7210      7180      7150      7120      7090      7060     
+5X_18- KILKATGKSKDSLTGGQLTGLNLTSGLIAGLAAAVISQPADTLLSKINKTKGAPGQSTTS
+                :..:.. . : ::: ::: ::::::..::::::::::.:::: ::::::..
+NR_SC: --------PKEKLSSTSTTLLNLLSGLTAGLAAAIVSQPADTLLSKVNKTKKAPGQSTVG
+                   210       220       230       240       250     
+
+        7030      7000      6970       
+5X_18- RLVQMAGQLGVSGLFTGMTTRLVMIGTLTAGQ
+        :.:.: :::  : :.:. :::::.::::. :
+NR_SC: LLAQLAKQLGFFGSFAGLPTRLVMVGTLTSLQ
+         260       270       280       
+
+>>NR_SC:SW-YEO3_YEAST SW:YEO3_YEAST P40035 saccharomyces  (300 aa)
+rev-comp initn: 412 init1: 136 opt: 154  Z-score: 192.8  bits: 48.5 E(): 5.7e-05
+Smith-Waterman score: 469;  33.043% identity (41.455% ungapped) in 345 aa overlap (7943-6915:18-294)
+
+            7920      7890      7860       7830      7800          
+5X_18- YAD/CALAGALGCAVTHGALTPVDVVKTRIQLEPEVY\TGLVVFSTVPKDFENCKAWC*C
+       ::  :.:.: ..:. ::...::.:.:: :.:..:..: :.                    
+NR_SC: YAT-CTLGGIIACGPTHSSITPLDLVKCRLQVNPKLY-TS--------------------
+        20         30        40        50                          
+
+   7770      7740      7710      7680      7650      7620          
+5X_18- HKQ*GMVASFRQIIAKEGAGALLTGFGPTAVGYAIQGAFKFGG*VMMSLQITA*SRANLP
+             . .::.:::.::   . :::: : :::..::: :.::  ...            
+NR_SC: -----NLQGFRKIIANEGWKKVYTGFGATFVGYSLQGAGKYGGYEYFKHL----------
+               60        70        80        90       100          
+
+   7590      7560      7530      7500      7470      7440          
+5X_18- ISYEFWKKKAIDLVGVDKARENRQAIYLGASAIAEFFADIALCPLEATRIR-LVSQPSFA
+         :  : . ..             ..:: ::: :::.::: :::.:: ...  ...: : 
+NR_SC: --YSSWLSPGV-------------TVYLMASATAEFLADIMLCPFEAIKVKQQTTMPPFC
+                             110       120       130       140     
+
+    7410      7380       7350      7320      7290      7260        
+5X_18- NGLSGGFLRILREEGP-AAFYAGFGPILFKQVPYTMAKFAV*VDRTA**SNIWLILS\SY
+       :..  :. ..  : :   ::: :. :.  .:.:::: ::                 . :.
+NR_SC: NNVVDGWKKMYAESGGMKAFYKGIVPLWCRQIPYTMCKF-----------------T-SF
+         150       160       170       180                         
+
+      7230      7200      7170      7140      7110      7080       
+5X_18- EVAVEKILKATGKSKDSLTGGQLTGLNLTSGLIAGLAAAVISQPADTLLSKINKTKGAPG
+       :  :.:: ..  :.:. ... :  ......: .::.  :..:.:::...::::. . : .
+NR_SC: EKIVQKIYSVLPKKKEEMNALQQISVSFVGGYLAGILCAAVSHPADVMVSKINSERKA-N
+       190       200       210       220       230       240       
+
+      7050      7020      6990      6960       6930     
+5X_18- QSTTSRLVQMAGQLGVSGLFTGMTTRLVMIGTLTAGQ/W*VSNAFDAGV
+       .: .    ..  ..: .::..:. .:.:::::::. : : . ..: : :
+NR_SC: ESMSVASKRIYQKIGFTGLWNGLMVRIVMIGTLTSFQ-WLIYDSFKAYV
+        250       260       270       280        290    
+
+>>NR_SC:SW-RIM2_YEAST SW:RIM2_YEAST P38127 saccharomyces  (377 aa)
+rev-comp initn:  79 init1:  79 opt: 134  Z-score: 164.9  bits: 43.6 E(): 0.002
+Smith-Waterman score: 145;  26.047% identity (30.769% ungapped) in 215 aa overlap (7929-7299:180-366)
+
+   7930      7900      7870      7840      7810       7780         
+5X_18- LAGALGCAVTHGALTPVDVVKTRIQLEPEVYNRVGRFFNSS*GF\WNCKAWC*CHKQ*GM
+       .:.: .  .:  : .:. ..:::.::.    . : .. ::     :.:            
+NR_SC: MAAATAGWATATATNPIWLIKTRVQLDKAGKTSVRQYKNS-----WDC------------
+     180       190       200       210            220              
+
+    7750      7720      7690      7660      7630      7600         
+5X_18- VASFRQIIAKEGAGALLTGFGPTAVGYAIQGAFKFGG*VMMSLQITA*SRANLPISYEFW
+          ....: .::  .:  :.. . .: ...: ...    .:.  :   :  ..  . :  
+NR_SC: ---LKSVIRNEGFTGLYKGLSASYLG-SVEGILQWLLYEQMKRLIKERSIEKFGYQAEGT
+               230       240        250       260       270        
+
+    7570      7540      7510      7480      7450      7420         
+5X_18- KKKAIDLVGVDKARENRQAIYLGASAIAEFFADIALCPLEATRIRLVSQPSFA-----NG
+       :. .      .:..:  :    :....:.: :.::  : :..: :: . :.       .:
+NR_SC: KSTS------EKVKEWCQ--RSGSAGLAKFVASIATYPHEVVRTRLRQTPKENGKRKYTG
+      280             290         300       310       320       330
+
+         7390      7360      7330          
+5X_18- LSGGFLRILREEGPAAFYAGFGPILFKQVPYTMAKF
+       :  .:  :..:::  ..:.:. : :.. :: ..  :
+NR_SC: LVQSFKVIIKEEGLFSMYSGLTPHLMRTVPNSIIMF
+              340       350       360      
+
+>>NR_SC:SW-Q05330 SW:Q05330 Q05330 saccharomyces cerevis  (302 aa)
+ initn:  76 init1:  76 opt: 122  Z-score: 150.1  bits: 40.6 E(): 0.014
+Smith-Waterman score: 123;  24.713% identity (28.667% ungapped) in 174 aa overlap (896-1372:55-219)
+
+             920       950       980      1010      1040      1070 
+5X_189 RTHRKRYQRHFHPHHRRHHL*THHRSHLDRNR*RRPLAANLQRLVPVCDRPQPIPNNH*C
+       :::..... : : :. :.:   :::.:: ..: ..      ::   . .: . : . :  
+NR_SC: RTHQHHHRTHQHHHRTRQH---HHRTHLHHHRIHQRHHHIRQRHHHIRQRRHRILQRHHR
+           60        70           80        90       100       110 
+
+            1100      1130                    1160       1190      
+5X_189 QRHEKSKEGRRNENTPLRLL--------------TA*PRQGPSSL*FHPVVV-HSALVPH
+        :...     : . : ::::              ..  :.  ..  ..:.   :...: :
+NR_SC: IRQRRLPTLPRRQATALRLLLILLHLHHTLLRHQVTAQRHQVTAQRLQPIPQHHQVIVLH
+             120       130       140       150       160       170 
+
+       1220      1250      1280      1310      1340      1370
+5X_189 R*QTHLCPARLLQEHQAWQHSSLGLAPAEGMGTYRPIGNLKDQRGVKKLRALIQ
+       : .:      : ..:   ::  : : : . ..  . .   . :  .. :  ..:
+NR_SC: RLHT------LQHHHPIPQHHLLTLPPLQTIALLHLLTPQHLQATAQDLLHILQ
+                   180       190       200       210         
+
+>>NR_SC:SW-AMYH_YEAST SW:AMYH_YEAST P08640 saccharomyces  (1367 aa)
+ initn: 142 init1:  95 opt: 128  Z-score: 150.0  bits: 42.7 E(): 0.014
+Smith-Waterman score: 152;  26.792% identity (28.629% ungapped) in 265 aa overlap (3295-4068:302-556)
+
+         3310      3340      3370      3400      3430      3460    
+5X_189 TSKLCLNLSSKPLMPWVTRSTLIMMSCKAQTPTSKRLL*GSTSSEVTDKPSNTSPPKILT
+       ::: :   ..:   :  : :.    : .: .:: .   . :.:. ::.. ...:   . :
+NR_SC: TSKTC---TKKTTTPVPTPSSSTTESSSAPVPTPSSSTTESSSAPVTSSTTESSSAPVPT
+                310       320       330       340       350        
+
+         3490      3520      3550      3580      3610      3640    
+5X_189 FSAKLSLSSLMKLPPSLFPLFVNSSAPISCLWPPPSTVTRVLAVHCPSSSFSNSVNR/PR
+        :.. . ::   .  :      .::::..      .: .    :  :::: ..: .  : 
+NR_SC: PSSSTTESSSAPVTSSTTE---SSSAPVTS----STTESSSAPVPTPSSSTTESSSA-PV
+      360       370          380           390       400        410
+
+          3670      3700       3730      3760      3790      3820  
+5X_189 PSITKDSENAAASSAGSSSKAAA\VVDRALVSCDLFVRSSLMSLSVTPPETMLKSG*TTS
+        : : .: .: ..:. . :..:  :.. .  : .  : ::    : .:  :  .: . .:
+NR_SC: TSSTTESSSAPVTSSTTESSSAP-VTSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESS
+              420       430        440       450       460         
+
+           3850      3880            3910      3940      3970      
+5X_189 SASMPPSSPNLSKVALTLPNASFT------MSTATLSSLITPLQKCSCKG*WRSTS-LPT
+       :: .  :. . :.. .  :..: :      ....:  :  .:.   : . .  :..  ::
+NR_SC: SAPVTSSTTESSSAPVPTPSSSTTESSSAPVTSSTTESSSAPVPTPSSSTTESSSAPAPT
+     470       480       490       500       510       520         
+
+        4000      4030      4060  
+5X_189 TRTLLTTCRCFPMLPLIIFSFSSPLST
+         .  :     :.      : :.:. :
+NR_SC: PSSSTTESSSAPVTSSTTESSSAPVPT
+     530       540       550      
+
+>>NR_SC:SW-PET8_YEAST SW:PET8_YEAST P38921 saccharomyces  (284 aa)
+rev-comp initn:  91 init1:  91 opt: 121  Z-score: 149.1  bits: 40.3 E(): 0.016
+Smith-Waterman score: 121;  34.146% identity (35.897% ungapped) in 82 aa overlap (7583-7338:172-249)
+
+           7560      7530      7500      7470      7440      7410  
+5X_18- YEFWKKKAIDLVGVDKARENRQAIYLGASAIAEFFADIALCPLEATRIRLVSQPSFANGL
+       ::. ::      : ....  . ::    ..::  .:  .  ::.  . ::. . . :. :
+NR_SC: YEYLKKTWAKANGQSQVEPWKGAI---CGSIAGGIAAATTTPLDFLKTRLMLNKTTAS-L
+             180       190          200       210       220        
+
+           7380      7350    
+5X_18- SGGFLRILREEGPAAFYAGFGP
+       .. ..:: ::::::.:..: ::
+NR_SC: GSVIIRIYREEGPAVFFSGVGP
+       230       240         
+
+>>NR_SC:SW-AGA1_YEAST SW:AGA1_YEAST P32323 saccharomyces  (725 aa)
+ initn: 156 init1:  88 opt: 124  Z-score: 148.1  bits: 41.4 E(): 0.018
+Smith-Waterman score: 144;  23.699% identity (26.032% ungapped) in 346 aa overlap (3286-4232:176-520)
+
+            3310      3340      3370      3400      3430      3460 
+5X_189 TPKTSKLCLNLSSKPLMPWVTRSTLIMMSCKAQTPTSKRLL*GSTSSEVTDKPSNTSPPK
+       : ...    .:::    :  : ..    : .... ...   ....:: .. .::.::  .
+NR_SC: TTSSNPTTTSLSSTSTSPSSTSTSPSSTSTSSSSTSTSSSSTSTSSSSTSTSPSSTSTSS
+         180       190       200       210       220       230     
+
+            3490      3520      3550      3580      3610       3640
+5X_189 ILTFSAKLSLSSLMKLPPSLFPLFVNSSAPISCLWPPPSTVTRVLAVHCPSSSFSN/YRE
+        :: ... : :. ..   .      .: .  :      ::     ..   :.: :. :  
+NR_SC: SLTSTSSSSTSTSQSSTSTSSSSTSTSPSSTSTSSSSTSTSPSSKSTSASSTSTSS-YST
+         240       250       260       270       280       290     
+
+             3670      3700      3730       3760      3790         
+5X_189 QTRPSITKDSENAAASSAGSSSKAAAAGRSGAGL\GDLFVRSSLMSLSVTPPETMLKSG*
+       .: ::.:..: . :..: .:.: ...   : ..: :. .. ::  :.:.  : : . :  
+NR_SC: STSPSLTSSSPTLASTSPSSTSISSTFTDSTSSL-GSSIASSST-SVSLYSPSTPVYSVP
+          300       310       320        330        340       350  
+
+    3820                     3850      3880      3910       3940   
+5X_189 TTSSASMPP---------------SSPNLSKVALTLPNASFTMSTA-TLSSLITPLQKCS
+       .:::    :               ::  ..: ...    ::.:::  :  : .: .    
+NR_SC: STSSNVATPSMTSSTVETTVSSQSSSEYITKSSISTTIPSFSMSTYFTTVSGVTTMYTTW
+            360       370       380       390       400       410  
+
+          3970               4000      4030      4060         4090 
+5X_189 CKG*WRS---------TSLPTTRTLLTTCRCFPMLPLIIFSFSSPLSTRMIIPSL---TL
+       :    .:          .. :  :. :   :.:     ... :  .::. .  :.   :.
+NR_SC: CPYSSESETSTLTSMHETVTTDATVCTHESCMPSQTTSLITSSIKMSTKNVATSVSTSTV
+            420       430       440       450       460       470  
+
+               4120      4150      4180      4210       
+5X_189 LSSFK/CAL--EGNISREAILKEMAQSGMRSSGDMIPWIISTQFQDNDF
+        ::.  :.   : . :  ..    ..:  ... .   :. :   .:.::
+NR_SC: ESSYA-CSTCAETSHSYSSVQTASSSSVTQQTTSTKSWVSSMTTSDEDF
+             480       490       500       510       520
+
+>>NR_SC:SW-PMT_YEAST SW:PMT_YEAST P32332 saccharomyces c  (324 aa)
+rev-comp initn:  86 init1:  86 opt: 120  Z-score: 147.1  bits: 40.1 E(): 0.02
+Smith-Waterman score: 120;  29.630% identity (29.630% ungapped) in 81 aa overlap (7162-6920:231-311)
+
+    7160      7130      7100      7070      7040      7010         
+5X_18- LNLTSGLIAGLAAAVISQPADTLLSKINKTKGAPGQSTTSRLVQMAGQLGVSGLFTGMTT
+       :.::.. :.::..::. .: :..:..: . ::   ..  . ::. .   ::..:. :...
+NR_SC: LHLTASTISGLGVAVVMNPWDVILTRIYNQKGDLYKGPIDCLVKTVRIEGVTALYKGFAA
+              240       250       260       270       280       290
+
+    6980      6950          
+5X_18- RLVMIGTLTAGQCKFQMRLMQ
+       ..  :.  :     :. . :.
+NR_SC: QVFRIAPHTIMCLTFMEQTMK
+              300       310 
+
+>>NR_SC:GP-CAA81388_1 gi|439289|emb|CAA81388.1 (Z26645)   (751 aa)
+rev-comp initn:  83 init1:  83 opt: 115  Z-score: 135.9  bits: 39.2 E(): 0.085
+Smith-Waterman score: 139;  26.996% identity (29.218% ungapped) in 263 aa overlap (7747-6985:222-472)
+
+         7730      7700      7670      7640      7610              
+5X_18- PLSDKSLPRRVPVLFSLVSAPPPSVTPSRVPSSSVGK**CLFK*PPRAALTCQ----LVT
+       :: . ::: .:    .  .::::  ::.   .:.  : .     :: . .       :. 
+NR_SC: PLPSASLPTHVS---NPPQAPPPPPTPTIGLDSKNIKPTDNAVSPPSSEVPAGGLPFLAE
+             230          240       250       260       270        
+
+   7580        7550      7520      7490      7460       7430       
+5X_18- SFGRR--RPLTSLVSTRPVRTDRPSTLVPLPSPSSSPTLLSVPLRPLESGLS/PQPS\LP
+         .::  :  .  ::.  ..:.  ..    : :::.: . .    ::      : :: ::
+NR_SC: INARRSERGAVEGVSSTKIQTENHKSPSQRPLPSSAPPIPTSHAPPLPPTAP-PPPS-LP
+      280       290       300       310       320       330        
+
+       7400      7370      7340      7310      7280        7250    
+5X_18- TVFPVVSLGF*GRKVPLPSTPVSALSSSSRFLIPWPSSPCKSTVL/P**SNIW/PNTIAA
+       .:  . . .  . . : :  :..  :.:.  .   :  :  .  : : ....  ::  ..
+NR_SC: NVTSAPKKATSAPRPPPPPLPAAMSSASTNSVKATPVPPTLAPPL-PNTTSVP-PNKASS
+        340       350       360       370       380         390    
+
+         7220      7190      7160      7130      7100      7070    
+5X_18- TRSPSRRSSRPLASPRTLLLVDSSLVLTLLPVLSPVWPPPLSLNPPTPSCLRSTRPRAPP
+         .:      :   :   . ..:.:  . .: :.:. :::    ::. .    . :  ::
+NR_SC: MPAPPP----PPPPPPGAFSTSSALSASSIP-LAPLPPPP----PPSVATSVPSAPPPPP
+          400           410       420            430       440     
+
+           7040      7010         
+5X_18- --ASLPPLGSSRWLVSSVFPVSSPV*P
+         ..  : .::.    :    :: :.:
+NR_SC: TLTTNKPSASSKQSKISSSSSSSAVTP
+         450       460       470  
+
+>>NR_SC:SW-Q07229 SW:Q07229 Q07229 saccharomyces cerevis  (817 aa)
+rev-comp initn:  83 init1:  83 opt: 115  Z-score: 135.4  bits: 39.3 E(): 0.09
+Smith-Waterman score: 134;  26.996% identity (29.218% ungapped) in 263 aa overlap (7747-6985:222-472)
+
+         7730      7700      7670      7640      7610              
+5X_18- PLSDKSLPRRVPVLFSLVSAPPPSVTPSRVPSSSVGK**CLFK*PPRAALTCQ----LVT
+       :: . ::: .:    .  .::::  ::.   .:.  : .     :: . .       :. 
+NR_SC: PLPSASLPTHVS---NPPQAPPPPPTPTIGLDSKNIKPTDNAVSPPSSEVPAGGLPFLAE
+             230          240       250       260       270        
+
+   7580        7550      7520      7490      7460       7430       
+5X_18- SFGRR--RPLTSLVSTRPVRTDRPSTLVPLPSPSSSPTLLSVPLRPLESGLS/PQPS\LP
+         .::  :  .  ::.  ..:.  ..    : :::.: . .    ::      : :: ::
+NR_SC: INARRSERGAVEGVSSTKIQTENHKSPSQPPLPSSAPPIPTSHAPPLPPTAP-PPPS-LP
+      280       290       300       310       320       330        
+
+       7400      7370      7340      7310      7280        7250    
+5X_18- TVFPVVSLGF*GRKVPLPSTPVSALSSSSRFLIPWPSSPCKSTVL/P**SNIW/PNTIAA
+       .:  . . .  .   : :  :..  :.:.  .   :  :  .  : : ....  ::  ..
+NR_SC: NVTSAPKKATSAPAPPPPPLPAAMSSASTNSVKATPVPPTLAPPL-PNTTSVP-PNKASS
+        340       350       360       370       380         390    
+
+         7220      7190      7160      7130      7100      7070    
+5X_18- TRSPSRRSSRPLASPRTLLLVDSSLVLTLLPVLSPVWPPPLSLNPPTPSCLRSTRPRAPP
+         .:      :   :   . ..:.:  . .: :.:. :::    ::. .    . :  ::
+NR_SC: MPAPPP----PPPPPPGAFSTSSALSASSIP-LAPLPPPP----PPSVATSVPSAPPPPP
+          400           410       420            430       440     
+
+           7040      7010         
+5X_18- --ASLPPLGSSRWLVSSVFPVSSPV*P
+         ..  : .::.    :    :: :.:
+NR_SC: TLTTNKPSASSKQSKISSSSSSSAVTP
+         450       460       470  
+
+>>NR_SC:SW-VRP1_YEAST SW:VRP1_YEAST P37370 saccharomyces  (817 aa)
+rev-comp initn:  83 init1:  83 opt: 115  Z-score: 135.4  bits: 39.3 E(): 0.09
+Smith-Waterman score: 134;  26.996% identity (29.218% ungapped) in 263 aa overlap (7747-6985:222-472)
+
+         7730      7700      7670      7640      7610              
+5X_18- PLSDKSLPRRVPVLFSLVSAPPPSVTPSRVPSSSVGK**CLFK*PPRAALTCQ----LVT
+       :: . ::: .:    .  .::::  ::.   .:.  : .     :: . .       :. 
+NR_SC: PLPSASLPTHVS---NPPQAPPPPPTPTIGLDSKNIKPTDNAVSPPSSEVPAGGLPFLAE
+             230          240       250       260       270        
+
+   7580        7550      7520      7490      7460       7430       
+5X_18- SFGRR--RPLTSLVSTRPVRTDRPSTLVPLPSPSSSPTLLSVPLRPLESGLS/PQPS\LP
+         .::  :  .  ::.  ..:.  ..    : :::.: . .    ::      : :: ::
+NR_SC: INARRSERGAVEGVSSTKIQTENHKSPSQPPLPSSAPPIPTSHAPPLPPTAP-PPPS-LP
+      280       290       300       310       320       330        
+
+       7400      7370      7340      7310      7280        7250    
+5X_18- TVFPVVSLGF*GRKVPLPSTPVSALSSSSRFLIPWPSSPCKSTVL/P**SNIW/PNTIAA
+       .:  . . .  .   : :  :..  :.:.  .   :  :  .  : : ....  ::  ..
+NR_SC: NVTSAPKKATSAPAPPPPPLPAAMSSASTNSVKATPVPPTLAPPL-PNTTSVP-PNKASS
+        340       350       360       370       380         390    
+
+         7220      7190      7160      7130      7100      7070    
+5X_18- TRSPSRRSSRPLASPRTLLLVDSSLVLTLLPVLSPVWPPPLSLNPPTPSCLRSTRPRAPP
+         .:      :   :   . ..:.:  . .: :.:. :::    ::. .    . :  ::
+NR_SC: MPAPPP----PPPPPPGAFSTSSALSASSIP-LAPLPPPP----PPSVATSVPSAPPPPP
+          400           410       420            430       440     
+
+           7040      7010         
+5X_18- --ASLPPLGSSRWLVSSVFPVSSPV*P
+         ..  : .::.    :    :: :.:
+NR_SC: TLTTNKPSASSKQSKISSSSSSSAVTP
+         450       460       470  
+
+>>NR_SC:SW-Q12215 SW:Q12215 Q12215 saccharomyces cerevis  (556 aa)
+ initn:  89 init1:  89 opt: 112  Z-score: 133.5  bits: 38.4 E(): 0.12
+Smith-Waterman score: 115;  27.381% identity (29.677% ungapped) in 168 aa overlap (789-1284:154-311)
+
+            810       840       870        900       930       960 
+5X_189 PSSTCSLLFGPISTKTRDQHM*LATEGNDETAT\TLSYTQKTISTSFSSSPPSTSPLNSS
+       ::.: ::  . ::. ::     . .  ..  :: :.: :. : :.. :..  .::  .::
+NR_SC: PSTTSSLSSAQISSTTRRTSTDMKS--SEMIAT-TVSTTSTTSSSTSSTTSSTTSSTTSS
+           160       170         180        190       200       210
+
+             990      1020      1050      1080      1110      1140 
+5X_189 SESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS*KEQRGPQK*EHTAPPLNRIT
+       . :: . : :::  :. ..  .. . ::. . ..  ..:        ..  ..  .  .:
+NR_SC: TTSSTTSSSTSSTTSSTTSSTTSSTTSSTTSSTTSSTTS--------STTSSTTSIFSVT
+              220       230       240               250       260  
+
+            1170      1200      1230         1260       
+5X_189 SSGSIFTLVSPCCCPLGTCTPPLTNPFVP---SAPTPGAPGMAALEPWT
+       ::.: .:: :     . . :   .. .::   :. : ..: .... : :
+NR_SC: SSSSSITLSSSEHTTVDSRTSSPSSTLVPVSSSSSTLSTPKVTSMTPST
+            270       280       290       300       310 
+
+>>NR_SC:SW-YH17_YEAST SW:YH17_YEAST P38898 saccharomyces  (153 aa)
+rev-comp initn:  71 init1:  71 opt: 106  Z-score: 132.4  bits: 36.3 E(): 0.13
+Smith-Waterman score: 106;  26.897% identity (28.889% ungapped) in 145 aa overlap (5788-5367:6-144)
+
+      5780      5750      5720      5690      5660      5630       
+5X_18- CIPFRTMQESYILLNQI*VKE*EP*SQI*I*NQKHPLTPSLPSSFHPSPLHSLSSSSSHP
+       :.   .:: : :    : . . .:  .    .. :: ::.     ::   :  .. .  :
+NR_SC: CLTPSSMQYSDIY---IHTPHPHPHPHPHTPTHTHPHTPTPTPHPHPHTPHPHTTPTPTP
+          10           20        30        40        50        60  
+
+         5600      5570       5540      5510      5480      5450   
+5X_18- WH---PHPCPSPPSLCH/PSSLSTFTLTVLDSLAPPLPDASLSTCFSASVQSPP*SIANS
+        :   ::   :  ::   ::   :  :..:   . :::    .  .:.    ::  :. .
+NR_SC: HHTHTPHTTLSNLSLNL-PSHYPTSPLVTLPHSTIPLPT---TIHLSTYYYHPPPIITVT
+             70         80        90       100          110        
+
+          5420       5390        
+5X_18- SMLRVFNSSLCS-FFDASSSSPFSCT
+        .: . ::.  . ..      :  ::
+NR_SC: LQLPISNSTTITLLLPYHPPCPTHCT
+      120       130       140    
+
+>>NR_SC:GP-CAB58511_1 gi|6064291|emb|CAB58511.1 (A74265)  (894 aa)
+ initn:  70 init1:  70 opt: 113  Z-score: 132.3  bits: 38.8 E(): 0.13
+Smith-Waterman score: 113;  27.820% identity (28.244% ungapped) in 133 aa overlap (892-1290:514-644)
+
+           910       940       970      1000      1030      1060   
+5X_189 LSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SL
+       .: .  : .. :: :  :.   .::: :. ::: ::: ::  ..   .   :.. :  : 
+NR_SC: ISSSTTTSTSIFSESSKSSVIPTSSSTSGSSESETSSAGSVSSSSFISSESSKSPTYSSS
+           520       530       540       550       560       570   
+
+          1090      1120      1150      1180      1210      1240   
+5X_189 MSAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTPG
+                  :..  . :: .   .: .  :::.   :   .::  .. : . :. :  
+NR_SC: SLPLVTSATTSQETASSLPPATTTKTSEQT-TLVTVTSCESHVCTESIS-PAIVSTATVT
+           580       590       600        610       620        630 
+
+          1270      
+5X_189 APGMAALEPWTCP
+       . :...     ::
+NR_SC: VSGVTTEYTTWCP
+             640    
+
+>>NR_SC:SW-YN23_YEAST SW:YN23_YEAST P53832 saccharomyces  (503 aa)
+ initn: 117 init1:  72 opt: 110  Z-score: 131.4  bits: 37.8 E(): 0.15
+Smith-Waterman score: 110;  29.752% identity (30.252% ungapped) in 121 aa overlap (895-1256:165-283)
+
+          910       940       970      1000      1030      1060    
+5X_189 SYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLM
+       : ...: ::. :::  .::  .::: :: : . :.:  :. .. .:.  ::.. . .:  
+NR_SC: SSSSSTTSTTTSSSETTTSSSSSSSSSSTSTTSTTSTTSSTTSTSSS--PSTTSSSTSAS
+          170       180       190       200       210         220  
+
+          1090      1120      1150      1180      1210      1240   
+5X_189 SAS*/TRAKRAAEMRTHRSAS*PHNLVRVHLHSSFTLLLSTRHLYPTVDKPICAQRAYSR
+       :.:  : . .:.   :  ..:   . . .   :: ..  ::..   .: . . .:   . 
+NR_SC: SSSE-TSSTQATSSSTTSTSSSTSTATVTSTPSSTSIGTSTHYTTRVVTQSVVSQANQQA
+             230       240       250       260       270       280 
+
+         
+5X_189 ST
+       ::
+NR_SC: ST
+         
+
+>>NR_SC:SW-YHC8_YEAST SW:YHC8_YEAST P38739 saccharomyces  (605 aa)
+ initn:  90 init1:  90 opt: 110  Z-score: 130.4  bits: 37.9 E(): 0.17
+Smith-Waterman score: 110;  29.545% identity (29.545% ungapped) in 88 aa overlap (4832-5095:216-303)
+
+          4850      4880      4910      4940      4970      5000   
+5X_189 S*TFSHMRHSKSLTLLSLSPSSNLPSLATLPPPHPNFSPTPSSLRSSLRLTSNVLNRTPT
+       . . :    . : ::.: : ::.  :  :     :  . : ::  .:   :: . . .::
+NR_SC: TSSTSTTTSTTSSTLISTSTSSSSSSTPTTTSSAPISTSTTSSTSTSTSTTSPTSSSAPT
+         220       230       240       250       260       270     
+
+          5030      5060      5090 
+5X_189 ACSTITSSSTLFLPSLPYSSARGLKPAY
+       . :. : .:: :  . : ..  .   .:
+NR_SC: SSSNTTPTSTTFTTTSPSTAPSSTTVTY
+         280       290       300   
+
+>>NR_SC:SW-FLO1_YEAST SW:FLO1_YEAST P32768 saccharomyces  (1537 aa)
+ initn:  70 init1:  70 opt: 113  Z-score: 129.4  bits: 39.1 E():  0.2
+Smith-Waterman score: 113;  27.820% identity (28.244% ungapped) in 133 aa overlap (892-1290:1144-1274)
+
+           910       940       970      1000      1030      1060   
+5X_189 LSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SL
+       .: .  : .. :: :  :.   .::: :. ::: ::: ::  ..   .   :.. :  : 
+NR_SC: ISSSTTTSTSIFSESSKSSVIPTSSSTSGSSESETSSAGSVSSSSFISSESSKSPTYSSS
+          1150      1160      1170      1180      1190      1200   
+
+          1090      1120      1150      1180      1210      1240   
+5X_189 MSAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTPG
+                  :..  . :: .   .: .  :::.   :   .::  .. : . :. :  
+NR_SC: SLPLVTSATTSQETASSLPPATTTKTSEQT-TLVTVTSCESHVCTESIS-PAIVSTATVT
+          1210      1220      1230       1240      1250       1260 
+
+          1270      
+5X_189 APGMAALEPWTCP
+       . :...     ::
+NR_SC: VSGVTTEYTTWCP
+            1270    
+
+>>NR_SC:PIR-S53465 PIR:S53465 flocculation protein FLO1   (1537 aa)
+ initn:  70 init1:  70 opt: 113  Z-score: 129.4  bits: 39.1 E():  0.2
+Smith-Waterman score: 113;  27.820% identity (28.244% ungapped) in 133 aa overlap (892-1290:1144-1274)
+
+           910       940       970      1000      1030      1060   
+5X_189 LSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SL
+       .: .  : .. :: :  :.   .::: :. ::: ::: ::  ..   .   :.. :  : 
+NR_SC: ISSSTTTSTSIFSESSKSSVIPTSSSTSGSSESETSSAGSVSSSSFISSESSKSPTYSSS
+          1150      1160      1170      1180      1190      1200   
+
+          1090      1120      1150      1180      1210      1240   
+5X_189 MSAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTPG
+                  :..  . :: .   .: .  :::.   :   .::  .. : . :. :  
+NR_SC: SLPLVTSATTSQETASSLPPATTTKTSEQT-TLVTVTSCESHVCTESIS-PAIVSTATVT
+          1210      1220      1230       1240      1250       1260 
+
+          1270      
+5X_189 APGMAALEPWTCP
+       . :...     ::
+NR_SC: VSGVTTEYTTWCP
+            1270    
+
+>>NR_SC:SW-Q06143 SW:Q06143 Q06143 saccharomyces cerevis  (298 aa)
+rev-comp initn: 153 init1: 102 opt: 105  Z-score: 127.5  bits: 36.4 E(): 0.25
+Smith-Waterman score: 105;  30.986% identity (30.986% ungapped) in 71 aa overlap (7509-7299:16-86)
+
+   7510        7480      7450      7420      7390      7360        
+5X_18- PW/W/HSAIAEFFADIALCPLEATRIRLVSQPSFANGLSGGFLRILREEGPAAFYAGFGP
+       :: : ... : .:: ..  ::. ...:: . :     :   .  :: .:: ...:.:.. 
+NR_SC: PW-W-YGGAAGIFATMVTHPLDLAKVRLQAAPMPKPTLFRMLESILANEGVVGLYSGLSA
+            20        30        40        50        60        70   
+
+     7330      7300 
+5X_18- ILFKQVPYTMAKF
+        ...:  :: ..:
+NR_SC: AVLRQCTYTTVRF
+            80      
+
+>>NR_SC:PIR-S58652 PIR:S58652 hypothetical protein YFR03  (216 aa)
+rev-comp initn:  77 init1:  77 opt: 103  Z-score: 126.6  bits: 35.7 E(): 0.28
+Smith-Waterman score: 103;  44.444% identity (44.444% ungapped) in 36 aa overlap (5677-5570:44-79)
+
+         5660      5630      5600          
+5X_18- TPSLPSSFHPSPLHSLSSSSSHPWHPHPCPSPPSLC
+       .::::. : : :.  : ::::  :       : : :
+NR_SC: SPSLPTRFTPCPVAVLPSSSSPSWSFSTSCMPESTC
+            50        60        70         
+
+>>NR_SC:SW-YKT9_YEAST SW:YKT9_YEAST P36045 saccharomyces  (187 aa)
+ initn:  81 init1:  81 opt: 102  Z-score: 126.0  bits: 35.4 E():  0.3
+Smith-Waterman score: 102;  28.235% identity (28.571% ungapped) in 85 aa overlap (4862-5116:96-179)
+
+          4880      4910      4940      4970      5000      5030   
+5X_189 KSLTLLSLSPSSNLPSLATLPPPHPNFSPTPSSLRSSLRLTSNVLNRTPTACSTITSSST
+       .:.. ..  :: .   ::. ::: :  . .:.    .:: :      : :      .. .
+NR_SC: RSMVDIAAHPSPTATVLASSPPPPPPATHVPAEALFTLRETPPPQLATLTLSEEPPATPA
+         100       110       120       130       140       150     
+
+          5060      5090        
+5X_189 LFLPSLPYSSARGLKPAYRLPSRPS
+          :: : . .:: .: .:. . ::
+NR_SC: PSAPSAPSARVRGHSP-HRVGASPS
+         160       170          
+
+>>NR_SC:SW-YAG3_YEAST SW:YAG3_YEAST P39712 saccharomyces  (1322 aa)
+ initn:  71 init1:  71 opt: 109  Z-score: 124.8  bits: 38.0 E(): 0.35
+Smith-Waterman score: 117;  27.612% identity (28.906% ungapped) in 134 aa overlap (892-1290:933-1061)
+
+           910       940        970      1000      1030      1060  
+5X_189 LSYTQKTISTSFSSSPPSTSPLNSS-SESSRSESITSSPGSKPAAFGSGL*PSSAYTE*S
+       .: :  . :    ::  :. : .:: : ::.::. ..: .:. ....:   :.:.:.  :
+NR_SC: ISSTTTSASILSESSKSSVIPTSSSTSGSSESETGSASSASSSSSISSE-SPKSTYSSSS
+            940       950       960       970       980        990 
+
+           1090      1120      1150      1180      1210      1240  
+5X_189 LMSAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTP
+       :  ..       :.   . ::..   .: .  :::.   :   .::  ... .: .: . 
+NR_SC: LPPVTSATTS--QEITSSLPPVTTTKTSEQT-TLVTVTSCESHVCTESISSAIVSTATVT
+            1000        1010      1020       1030      1040        
+
+           1270      
+5X_189 GAPGMAALEPWTCP
+        . . .    : ::
+NR_SC: VSGATTEYTTW-CP
+     1050       1060 
+
+>>NR_SC:PIR-S51959 PIR:S51959 hypothetical protein YAL06  (1367 aa)
+ initn:  71 init1:  71 opt: 109  Z-score: 124.6  bits: 38.0 E(): 0.36
+Smith-Waterman score: 117;  27.612% identity (28.906% ungapped) in 134 aa overlap (892-1290:978-1106)
+
+           910       940        970      1000      1030      1060  
+5X_189 LSYTQKTISTSFSSSPPSTSPLNSS-SESSRSESITSSPGSKPAAFGSGL*PSSAYTE*S
+       .: :  . :    ::  :. : .:: : ::.::. ..: .:. ....:   :.:.:.  :
+NR_SC: ISSTTTSASILSESSKSSVIPTSSSTSGSSESETGSASSASSSSSISSE-SPKSTYSSSS
+       980       990      1000      1010      1020       1030      
+
+           1090      1120      1150      1180      1210      1240  
+5X_189 LMSAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTP
+       :  ..       :.   . ::..   .: .  :::.   :   .::  ... .: .: . 
+NR_SC: LPPVTSATTS--QEITSSLPPVTTTKTSEQT-TLVTVTSCESHVCTESISSAIVSTATVT
+       1040        1050      1060       1070      1080      1090   
+
+           1270      
+5X_189 GAPGMAALEPWTCP
+        . . .    : ::
+NR_SC: VSGATTEYTTW-CP
+          1100       
+
+>>NR_SC:SW-Q12444 SW:Q12444 Q12444 saccharomyces cerevis  (126 aa)
+ initn:  79 init1:  79 opt:  99  Z-score: 124.2  bits: 34.5 E(): 0.38
+Smith-Waterman score: 99;  25.472% identity (25.962% ungapped) in 106 aa overlap (5480-5790:19-124)
+
+    5480      5510      5540      5570      5600      5630         
+5X_189 LRSKLRDLHLVRAARDCPAQSA*KWTSLTMTKRRRRRARMRVPRMRRRRGERVEGRRVER
+       .. : :  .  :  :.   .      .:   .: ::. : :  : ::.: .: . :  ..
+NR_SC: MKRKKRKKRKKRRERETMMKIPRILKKLRRKRRTRRKRRKRRKRRRRKRRKRRRKRSPRK
+       20        30        40        50        60        70        
+
+    5660      5690       5720        5750      5780   
+5X_189 RR*GESKRMFLVSYL-DL*LWLLF-FNSYLV/LVKCMILAWF*MVYN
+       ::  ..:  : .  . :    ::: : .. . ...:.    : ....
+NR_SC: RRKRRNKDAFYILIISDPSRSLLFGFRKFSI-IIQCLTYFSFHILFH
+       80        90       100        110       120    
+
+>>NR_SC:SW-TOA1_YEAST SW:TOA1_YEAST P32773 saccharomyces  (286 aa)
+rev-comp initn:  66 init1:  66 opt: 102  Z-score: 123.7  bits: 35.6 E():  0.4
+Smith-Waterman score: 124;  34.091% identity (47.619% ungapped) in 88 aa overlap (998-736:216-278)
+
+       990       960       930       900       870       840       
+5X_18- DVIDSDLDDSDDEFRGDVDGGEDENDVDIVFCVYDKVCCCLIIPLCG*LHMLIARFCGNR
+       : . :.::::::..  . .: ::  : ....:.::::                       
+NR_SC: DEVGSELDDSDDDYLIS-EGEEDGPDENLMLCLYDKV-----------------------
+         220       230        240       250                        
+
+        810       780       750     
+5X_18- S/TRVKNKWKTVFKDGMIHLNGKDYLFAK
+         ::.: .::  .:::.. .: .:: : :
+NR_SC: --TRTKARWKCSLKDGVVTINRNDYTFQK
+               260       270        
+
+>>NR_SC:SW-SIM1_YEAST SW:SIM1_YEAST P40472 saccharomyces  (475 aa)
+ initn: 145 init1:  99 opt: 104  Z-score: 123.7  bits: 36.3 E(): 0.41
+Smith-Waterman score: 120;  27.632% identity (30.216% ungapped) in 152 aa overlap (916-1359:109-251)
+
+             940       970      1000      1030      1060      1090 
+5X_189 STSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS*KEQ
+       :.. :.:  ..:  .::: .:  :: . : .:. ::  : .  .:. :. :  ::. .  
+NR_SC: SATASTSQGASSSSSSSSATSTLESSSVSSSSEEAAPTSTVVSTSSATQSSASSATKSST
+      110       120       130       140       150       160        
+
+            1120      1150      1180      1210      1240      1270 
+5X_189 RGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTPGAPGMAALE
+        . .    :.   .  .::.:  .  :      :.    :..   ::           ..
+NR_SC: SSTSPSTSTSTSTSSTSSSSSSSSSSSSSSSGSGSIYGDLADFSGPSEK---------FQ
+      170       180       190       200       210                  
+
+                1300      1330         
+5X_189 PWTCPC----RGNGNISADWEFEGPAGGEEAS
+         : ::     :.: :: ::  ::  .: : .
+NR_SC: DGTIPCDKFPSGQGVISIDWIGEGGWSGVENT
+     220       230       240       250 
+
+>>NR_SC:GP-CAA47602_1 gi|4824|emb|CAA47602.1 (X67122) mi  (307 aa)
+rev-comp initn:  77 init1:  77 opt: 102  Z-score: 123.4  bits: 35.6 E(): 0.42
+Smith-Waterman score: 102;  32.609% identity (32.609% ungapped) in 46 aa overlap (7433-7296:252-297)
+
+           7410      7380      7350      7320        
+5X_18- VSQPSFANGLSGGFLRILREEGPAAFYAGFGPILFKQVPYTMAKFA
+       ...:.:.:..:.    .  . : .::. :::: ... .: . : ::
+NR_SC: LQKPKFGNSISSVAKTLYANGGIGAFFKGFGPTMLRAAPANGATFA
+             260       270       280       290       
+
+>>NR_SC:SW-YMC1_YEAST SW:YMC1_YEAST P32331 saccharomyces  (307 aa)
+rev-comp initn:  77 init1:  77 opt: 102  Z-score: 123.4  bits: 35.6 E(): 0.42
+Smith-Waterman score: 102;  32.609% identity (32.609% ungapped) in 46 aa overlap (7433-7296:252-297)
+
+           7410      7380      7350      7320        
+5X_18- VSQPSFANGLSGGFLRILREEGPAAFYAGFGPILFKQVPYTMAKFA
+       ...:.:.:..:.    .  . : .::. :::: ... .: . : ::
+NR_SC: LQKPKFGNSISSVAKTLYANGGIGAFFKGFGPTMLRAAPANGATFA
+             260       270       280       290       
+
+>>NR_SC:SW-YG5F_YEAST SW:YG5F_YEAST P53320 saccharomyces  (366 aa)
+rev-comp initn:  64 init1:  64 opt: 102  Z-score: 122.4  bits: 35.7 E(): 0.48
+Smith-Waterman score: 103;  19.597% identity (21.935% ungapped) in 347 aa overlap (7944-6968:12-342)
+
+        7930      7900      7870      7840             7810        
+5X_18- IRRFALAGALGCAVTHGALTPVDVVKTRIQLEPEVYN-------RVGRFFNSS*GF*EL*
+       ...  :... : ..:   :::.:::. :.: .  . .       .:    .:.  .  ..
+NR_SC: LKERMLSAGAGSVLTSLILTPMDVVRIRLQQQQMIPDCSCDGAAEVPNAVSSGSKMKTFT
+              20        30        40        50        60        70 
+
+     7780      7750        7720       7690          7660      7630 
+5X_18- GVVLMSQTIGYGCLFPTNHC--QGGCRCSS\TGFGPTAVGY----AIQGAFKFGG*VMMS
+       .:   .:... . .:  . :  .  :. ::   :. :  ..    ...:  ..   . ..
+NR_SC: NV--GGQNLNNAKIFWESACFQELHCKNSS-LKFNGTLEAFTKIASVEGITSLWRGISLT
+                80        90        100       110       120        
+
+            7600      7570      7540      7510      7480      7450 
+5X_18- LQITA*SRANLPISYEFWKKKAIDLVGVDKARENRQAIYLGASAIAEFFADIALCPLEAT
+       : ..  .      .::. .    :.  . ..  . . .. ::  ::. ::  .. ::: .
+NR_SC: LLMAIPANMVYFSGYEYIR----DVSPIASTYPTLNPLFCGA--IARVFAATSIAPLELV
+      130       140           150       160         170       180  
+
+             7420      7390      7360      7330      7300          
+5X_18- RIRLVSQPS/SRQRSFRWFP*DFEGGRSRCLLRRFRPYPLQAGSLYHGQVRRVSRPY/WR
+       . .: : :  : . .  :.      ...:  ..   :    . .:..:    .     ::
+NR_SC: KTKLQSIPR-SSKSTKTWMMVKDLLNETRQEMKMVGP----SRALFKG----LEITL-WR
+            190        200       210           220           230   
+
+    7270      7240      7210      7180      7150      7120         
+5X_18- NNQTFG*YYRSYEVAVEKILKATGKSKDSLTGGQLTGLNLTSGLIAGLAAAVISQPAD--
+       .    . :. :::.  :..   . .  .. ..      ...:: :.:. ::. ..: :  
+NR_SC: DVPFSAIYWSSYELCKERLWLDSTRFASKDANWVHFINSFASGCISGMIAAICTHPFDVG
+            240       250       260       270       280       290  
+
+            7090      7060      7030      7000      6970 
+5X_18- ------TLLSKINKTKGAPGQSTTSRLVQMAGQLGVSGLFTGMTTRLVMI
+             ..... .   :  ...  . :  .    :...:.::...:.. :
+NR_SC: KTRWQISMMNNSDPKGGNRSRNMFKFLETIWRTEGLAALYTGLAARVIKI
+            300       310       320       330       340  
+
+>>NR_SC:SW-TIR3_YEAST SW:TIR3_YEAST P40552 saccharomyces  (269 aa)
+ initn:  91 init1:  91 opt: 100  Z-score: 121.4  bits: 35.1 E(): 0.54
+Smith-Waterman score: 100;  29.091% identity (29.358% ungapped) in 110 aa overlap (829-1158:112-220)
+
+            850       880       910       940       970      1000  
+5X_189 QKRAISICN*PQRGMMRQQHTLSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPG
+       :. .::: .  :      ... . .. . :.: :::  :.:  .::: .: : : .:: .
+NR_SC: QSAGISITSLGQTVSESGSESATASSDASSASESSSAASSSASESSSAASSSASESSSAA
+             120       130       140       150       160       170 
+
+           1030      1060      1090      1120      1150  
+5X_189 SKPAAFGSGL*PSSAYTE*SLMSAS*KEQRGPQK*EHTAPPLNRITSSGS
+       :. :. .:.   ::: .: .  :.: : . .      ..   .. .:..:
+NR_SC: SSSASESSSAASSSA-SEAAKSSSSAKSSGSSAASSAASSASSKASSAAS
+             180        190       200       210       220
+
+>>NR_SC:SW-Q08428 SW:Q08428 Q08428 saccharomyces cerevis  (113 aa)
+ initn:  95 init1:  95 opt:  96  Z-score: 120.7  bits: 33.7 E(): 0.59
+Smith-Waterman score: 121;  54.545% identity (60.000% ungapped) in 33 aa overlap (902-1000:31-60)
+
+           920       950       980      
+5X_189 HRKRYQRHFHPHHRRHHL*THHRSHLDRNR*RR
+       ::.: .:: . ::::::   ::: .  : : ::
+NR_SC: HRRRRRRHHRRHHRRHH---HHRRRRRRRRRRR
+               40           50        60
+
+>>NR_SC:SW-FLO5_YEAST SW:FLO5_YEAST P38894 saccharomyces  (1075 aa)
+ initn:  72 init1:  72 opt: 104  Z-score: 119.3  bits: 36.7 E(): 0.71
+Smith-Waterman score: 121;  28.030% identity (31.624% ungapped) in 132 aa overlap (895-1290:707-823)
+
+          910       940       970      1000      1030      1060    
+5X_189 SYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLM
+       : :...: :: :.:  : :  .:.: :: : ::.:   ..:.  .:.: : .. :     
+NR_SC: SSTSSVIPTSSSTSGSSESKTSSASSSSSSSSISSESPKSPTNSSSSLPPVTSATTG---
+        710       720       730       740       750       760      
+
+         1090      1120      1150      1180      1210      1240    
+5X_189 SAS*KEQRGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPFVPSAPTPGA
+                 :..  . :: .   .: .  :::.   :   .::  ... .: .: .  .
+NR_SC: ----------QETASSLPPATTTKTSEQT-TLVTVTSCESHVCTESISSAIVSTATVTVS
+                     770       780        790       800       810  
+
+         1270      
+5X_189 PGMAALEPWTCP
+          .    : ::
+NR_SC: GVTTEYTTW-CP
+            820    
+
+>>NR_SC:SW-YK82_YEAST SW:YK82_YEAST P36170 saccharomyces  (1169 aa)
+ initn:  64 init1:  64 opt: 104  Z-score: 118.8  bits: 36.7 E(): 0.76
+Smith-Waterman score: 104;  25.532% identity (26.471% ungapped) in 141 aa overlap (880-1290:839-978)
+
+     880       910         940       970      1000      1030       
+5X_189 QQHTLSYTQKTISTSF--SSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSA
+       .. . . :.  ::..:   ::: ::    :.. .:..:: .::  .  .  .... :::.
+NR_SC: HETSTASTSVQISSQFVTPSSPISTVAPRSTGLNSQTESTNSSKETMSSENSASVMPSSS
+      840       850       860       870       880       890        
+
+        1060       1090      1120       1150      1180      1210   
+5X_189 YT--E*SLMSAS\EKSKEGRRNENTPLRLLTA*/PSSGSIFTLVSPCCCPLGTCTPPLTN
+        :  ... .... : :.   :...:  :. . . ::..   ::..   :  ..:.  ...
+NR_SC: ATSPKTGKVTSD-ETSSGFSRDRTTVYRMTSET-PSTNEQTTLITVSSCESNSCSNTVSS
+      900       910        920       930        940       950      
+
+          1240      1270      
+5X_189 PFVPSAPTPGAPGMAALEPWTCP
+         : .: :      .    : ::
+NR_SC: AVVSTATTTINGITTEYTTW-CP
+        960       970         
+
+>>NR_SC:SW-YOD0_YEAST SW:YOD0_YEAST Q08193 saccharomyces  (484 aa)
+ initn: 116 init1:  69 opt: 100  Z-score: 118.2  bits: 35.3 E(): 0.82
+Smith-Waterman score: 100;  46.429% identity (48.148% ungapped) in 56 aa overlap (916-1083:405-458)
+
+             940       970      1000      1030      1060       
+5X_189 STSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS
+       :.: :::  :.:  .:::::: : : .::  :.:.:  ..:  :.: .  : .:.:
+NR_SC: SSSSSSSSSSSSSASSSSESSSSTSKASS--SSPSASETSLLKSAASATSSSQSSS
+          410       420       430         440       450        
+
+>>NR_SC:SW-CW14_YEAST SW:CW14_YEAST O13547 saccharomyces  (238 aa)
+ initn:  77 init1:  77 opt:  97  Z-score: 118.1  bits: 34.3 E(): 0.84
+Smith-Waterman score: 97;  24.183% identity (24.667% ungapped) in 153 aa overlap (784-1234:53-205)
+
+       790        820       850       880       910         940    
+5X_189 EHRLPLVLYSLD-LFPQKRAISICN*PQRGMMRQQHTLSYTQKTISTSFSSS--PPSTSP
+       ::.   :   :: . :.. : .  .  . .  .:. .:. .... :.: :::    :.. 
+NR_SC: EHENSAVKKCLDSICPNNDADAAYSAFKSSCSEQNASLGDSSSSASSSASSSSKASSSTK
+             60        70        80        90       100       110  
+
+          970      1000      1030      1060      1090      1120    
+5X_189 LNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS*KEQRGPQK*EHTAPPL
+        .::: :: ... .:: .:.  : .:.  :::. .  .  :.: . .    . :...   
+NR_SC: ASSSSASSSTKASSSSASSSTKASSSSAAPSSSKASSTESSSSSSSSTKAPSSEESSSTY
+            120       130       140       150       160       170  
+
+          1150      1180      1210       
+5X_189 \TA*PRQGPSSL*FHPVVVHSALVPHR*QTHLCP
+        ..  .:. :.   :   . :. : ... .   :
+NR_SC: -VSSSKQASSTSEAHSSSAASSTVSQETVSSALP
+             180       190       200     
+
+>>NR_SC:SW-TIR1_YEAST SW:TIR1_YEAST P10863 saccharomyces  (254 aa)
+rev-comp initn:  78 init1:  78 opt:  97  Z-score: 117.7  bits: 34.3 E(): 0.87
+Smith-Waterman score: 97;  39.241% identity (41.892% ungapped) in 79 aa overlap (1483-1263:100-178)
+
+           1460      1430           1400       1370      1340      
+5X_18- SRLLTC*KTRNGKLSCSKHES----PT/SARAGSSSRSP-SPTLESKPEASSPPAGPSNS
+       :::    :. ::  : :   :    :: :. : ::: .: : .  :. ::.:  :.::.:
+NR_SC: SRLEPALKSLNGDASSSAAPSSSAAPT-SSAAPSSSAAPTSSAASSSSEAKSSSAAPSSS
+     100       110       120        130       140       150        
+
+       1310      1280      
+5X_18- QSADMFPFPRQGQVQGSSAA
+       .. .    : ......::::
+NR_SC: EAKSSSAAPSSSEAKSSSAA
+      160       170        
+
+>>NR_SC:SW-Q08873 SW:Q08873 Q08873 saccharomyces cerevis  (200 aa)
+ initn:  85 init1:  85 opt:  95  Z-score: 116.3  bits: 33.7 E():    1
+Smith-Waterman score: 95;  31.868% identity (32.222% ungapped) in 91 aa overlap (7-275:63-153)
+
+       10        40        70        100        130       160      
+5X_189 LVNIIYGVKRGDVNTRYEVIPVLISIQI/DSIS-FLSPTRSYGCLFPSLYYFIDIMNVSA
+       :.::.: .  :..:        .  .:. :.:: ::: .:.::     :.  ::... . 
+NR_SC: LANILYEADTGEANHISWKSSKMPFVQM-DQISQFLSFSRKYGVPEDELFQTIDLFEKKD
+             70        80        90        100       110       120 
+
+        190       220       250        
+5X_189 LFT*T*MLSELSVQLSKVHCDEFPNES*EIST
+              :. ::   .: : :.::  . ..::
+NR_SC: PAIVFQTLKSLSRYANKKHTDRFPVLGPQLST
+             130       140       150   
+
+>>NR_SC:SW-TIR2_YEAST SW:TIR2_YEAST P33890 saccharomyces  (251 aa)
+ initn:  92 init1:  92 opt:  95  Z-score: 115.1  bits: 33.8 E():  1.2
+Smith-Waterman score: 95;  33.846% identity (33.846% ungapped) in 65 aa overlap (889-1083:120-184)
+
+            910       940       970      1000      1030      1060  
+5X_189 TLSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*S
+       : . .  : :.  :::  ..: . ::::.. :   .:: .:. :: .:..  ::  :  .
+NR_SC: TEASSAATSSAVASSSETTSSAVASSSEATSSAVASSSEASSSAATSSAVASSSEATSST
+     120       130       140       150       160       170         
+
+            
+5X_189 LMSAS
+       . :..
+NR_SC: VASST
+     180    
+
+>>NR_SC:SW-CBF5_YEAST SW:CBF5_YEAST P33322 saccharomyces  (483 aa)
+ initn:  77 init1:  77 opt:  97  Z-score: 114.2  bits: 34.6 E():  1.4
+Smith-Waterman score: 97;  20.635% identity (21.757% ungapped) in 252 aa overlap (4951-5685:225-470)
+
+       4960      4990      5020      5050      5080      5110      
+5X_189 TELSSLLTPFDIKRLESYADSMLDYHVVLDLVPTIASLFFGKRLETSLPPAQQAI-----
+       .: ....:  :.   .   :.  :   . ...  . .:. : .  .    : .:.     
+NR_SC: SENDNMVTLHDVMDAQWVYDNTRDESYLRSIIQPLETLLVGYKRIVVKDSAVNAVCYGAK
+          230       240       250       260       270       280    
+
+              5140      5170      5200      5230      5260         
+5X_189 LLALGLQR--KNVEALENELGITSTQTLALFGKVLRKMTKSLEDIRKASIAS/VTSC*AD
+       :.  :: :  ...: : .:. . .:.  :.   . .  : .: .  .. .:: :  :  .
+NR_SC: LMIPGLLRYEEGIE-LYDEIVLITTKGEAIAVAIAQMSTVDLASCDHGVVAS-VKRCIME
+          290        300       310       320       330        340  
+
+     5290      5320       5350      5380      5410      5440       
+5X_189 PRRPISQRV*QVC\PLQQTIEQDLADSAVQLNGEDDDASKKEQRELLNTLNMEEFAIDQG
+         : .  :   .  :. :  .:  ::. ..  :. .. . .. ..    :.  : . ...
+NR_SC: --RDLYPRRWGLG-PVAQKKKQMKADGKLDKYGRVNENTPEQWKKEYVPLDNAEQSTSSS
+              350        360       370       380       390         
+
+      5470      5500      5530      5560      5590      5620       
+5X_189 GDWTEAEKQVERLASGKGGTRLSSTVSVKVDKLDDDKAKAEKGKDAGAKDAKKKRRESGG
+        .  :.:.. ..   .:  . .. . . : .  .::. : .: :    .  .::....  
+NR_SC: QETKETEEEPKK---AKEDSLIKEVETEKEEVKEDDSKKEKKEKKDKKEKKEKKEKKDKK
+     400       410          420       430       440       450      
+
+      5650      5680 
+5X_189 EKGGKKKVRRE*ED
+       ::  ::. .:. ::
+NR_SC: EKKEKKEKKRKSED
+        460       470
+
+>>NR_SC:SW-KRE1_YEAST SW:KRE1_YEAST P17260 saccharomyces  (313 aa)
+ initn:  88 init1:  88 opt:  95  Z-score: 113.9  bits: 33.9 E():  1.4
+Smith-Waterman score: 95;  39.655% identity (40.351% ungapped) in 58 aa overlap (889-1062:139-195)
+
+            910       940       970      1000      1030      1060
+5X_189 TLSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE
+       : ..:... :.. ::.  :.:   ::: :: : . :.: ::  :  :.   ::. .::
+NR_SC: TQTFTHSSTSAT-SSASSSVSSSVSSSGSSSSVKTTTSTGSAVAETGTRPDPSTDFTE
+      140       150        160       170       180       190     
+
+>>NR_SC:GP-CAA52447_1 gi|396560|emb|CAA52447.1 (X74428)   (250 aa)
+ initn:  91 init1:  91 opt:  94  Z-score: 113.8  bits: 33.6 E():  1.4
+Smith-Waterman score: 94;  33.846% identity (33.846% ungapped) in 65 aa overlap (889-1083:119-183)
+
+            910       940       970      1000      1030      1060  
+5X_189 TLSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*S
+       : . .  : :.  :::  ..: . ::::.. :   .:: .:. :: .:..  ::  :  .
+NR_SC: TEASSAATSSAVASSSETTSSAVASSSEATSSAVASSSEASSSAATSSAVASSSEATSSA
+      120       130       140       150       160       170        
+
+            
+5X_189 LMSAS
+       . :..
+NR_SC: VASST
+      180   
+
+>>NR_SC:GP-CAA81388_1 gi|439289|emb|CAA81388.1 (Z26645)   (751 aa)
+ initn:  76 init1:  76 opt:  98  Z-score: 113.2  bits: 35.0 E():  1.6
+Smith-Waterman score: 105;  23.759% identity (24.723% ungapped) in 282 aa overlap (3283-4117:140-414)
+
+             3310      3340      3370      3400       3430         
+5X_189 LTPKTSKLCLNLSSKPLMPWVTRSTLIMMSCKAQTPTSKRLL*GSTS\PRSPTNHPIHLP
+       :.:  .   .  :: : .: .  :.   .    ..:.    : :... :. : :.: :.:
+NR_SC: LSPAPAVPSIPSSSAPPIPDIPSSAAPPIPIVPSSPAPPLPLSGASA-PKVPQNRP-HMP
+     140       150       160       170       180        190        
+
+    3460       3490      3520      3550          3580       3610   
+5X_189 RRFSRSRPS*/RLSSLMKLPPSLFPLFVNSSAPISC----LWPPPSTVTRVLA/DSLSIK
+             :    : :: ..::    : . ..: :         :::  .  .   :: .::
+NR_SC: SVRPAHRSHQ-RKSSNISLPSVSAPPLPSASLPTHVSNPPQAPPPPPTPTIGL-DSKNIK
+       200        210       220       230       240        250     
+
+           3640      3670      3700       3730      3760      3790 
+5X_189 \PFSNSVNRHDHRLPRIARMRLPALLVHLPR\RGCW*IGRWSRAISS*DQA**AYPLLPR
+        : .:.:.  . ..:  .   :  . ..  . ::       ..  .   ..    ::   
+NR_SC: -PTDNAVSPPSSEVPAGGLPFLAEINARRSE-RGAVEGVSSTKIQTENHKSPSQRPLPSS
+          260       270       280        290       300       310   
+
+            3820       3850      3880      3910      3940      3970
+5X_189 RQC*KVVEQPPLP/PMPPSSPNLSKVALTLPNASFTMSTATLSSLITPLQKCSCKG*WRS
+            . . :::: :  :  :.: .:. . :. . .        : . ... : ..   .
+NR_SC: APPIPTSHAPPLP-PTAPPPPSLPNVT-SAPKKATSAPRPPPPPLPAAMSSASTNSVKAT
+           320        330        340       350       360       370 
+
+             4000       4030      4060      4090        
+5X_189 TSLPTTRTLLTTCRCFP\SSRSSSFRSPPPYRRE**YPP*PSCRPSSRA
+          ::    : .    :  ...::. .:::       :: :.   .: :
+NR_SC: PVPPTLAPPLPNTTSVP-PNKASSMPAPPP-----PPPPPPGAFSTSSA
+             380        390       400            410    
+
+>>NR_SC:SW-VRP1_YEAST SW:VRP1_YEAST P37370 saccharomyces  (817 aa)
+ initn:  76 init1:  76 opt:  98  Z-score: 112.7  bits: 35.1 E():  1.7
+Smith-Waterman score: 110;  25.088% identity (26.296% ungapped) in 283 aa overlap (3283-4117:140-414)
+
+             3310      3340      3370      3400       3430         
+5X_189 LTPKTSKLCLNLSSKPLMPWVTRSTLIMMSCKAQTPTSKRLL*GSTS\PRSPTNHPIHLP
+       :.:  .   .  :: : .: .  :.   .    ..:.    : :... :. : :.: :.:
+NR_SC: LSPAPAVPSIPSSSAPPIPDIPSSAAPPIPIVPSSPAPPLPLSGASA-PKVPQNRP-HMP
+     140       150       160       170       180        190        
+
+    3460       3490      3520      3550          3580       3610   
+5X_189 RRFSRSRPS*/RLSSLMKLPPSLFPLFVNSSAPISC----LWPPPSTVTRVLA/DSLSIK
+             :    : :: ..::    : . ..: :         :::  .  .   :: .::
+NR_SC: SVRPAHRSHQ-RKSSNISLPSVSAPPLPSASLPTHVSNPPQAPPPPPTPTIGL-DSKNIK
+       200        210       220       230       240        250     
+
+           3640      3670      3700       3730      3760      3790 
+5X_189 \PFSNSVNRHDHRLPRIARMRLPALLVHLPR\RGCW*IGRWSRAISS*DQA**AYPLLPR
+        : .:.:.  . ..:  .   :  . ..  . ::    :  :  :.. ..   . : :: 
+NR_SC: -PTDNAVSPPSSEVPAGGLPFLAEINARRSE-RGAVE-GVSSTKIQTENHKSPSQPPLPS
+          260       270       280        290        300       310  
+
+             3820       3850      3880      3910      3940         
+5X_189 R-QC*KVVEQPPLP/PMPPSSPNLSKVALTLPNASFTMSTATLSSLITPLQKCSCKG*WR
+             . . :::: :  :  :.: .:. . :. . .  .     : . ... : ..   
+NR_SC: SAPPIPTSHAPPLP-PTAPPPPSLPNVT-SAPKKATSAPAPPPPPLPAAMSSASTNSVKA
+            320        330        340       350       360       370
+
+    3970      4000       4030      4060      4090        
+5X_189 STSLPTTRTLLTTCRCFP\SSRSSSFRSPPPYRRE**YPP*PSCRPSSRA
+       .   ::    : .    :  ...::. .:::       :: :.   .: :
+NR_SC: TPVPPTLAPPLPNTTSVP-PNKASSMPAPPP-----PPPPPPGAFSTSSA
+              380        390       400            410    
+
+>>NR_SC:SW-Q07229 SW:Q07229 Q07229 saccharomyces cerevis  (817 aa)
+ initn:  76 init1:  76 opt:  98  Z-score: 112.7  bits: 35.1 E():  1.7
+Smith-Waterman score: 102;  24.735% identity (25.926% ungapped) in 283 aa overlap (3283-4117:140-414)
+
+             3310      3340      3370      3400       3430         
+5X_189 LTPKTSKLCLNLSSKPLMPWVTRSTLIMMSCKAQTPTSKRLL*GSTS\PRSPTNHPIHLP
+       :.:  .   .  :: : .: .   .   .    ..:.    : :... :. : :.: :.:
+NR_SC: LSPAPAVPSIPSSSAPPIPDIPSFAAPPIPIVPSSPAPPLPLSGASA-PKVPQNRP-HMP
+     140       150       160       170       180        190        
+
+    3460       3490      3520      3550          3580       3610   
+5X_189 RRFSRSRPS*/RLSSLMKLPPSLFPLFVNSSAPISC----LWPPPSTVTRVLA/DSLSIK
+             :    : :: ..::    : . ..: :         :::  .  .   :: .::
+NR_SC: SVRPAHRSHQ-RKSSNISLPSVSAPPLPSASLPTHVSNPPQAPPPPPTPTIGL-DSKNIK
+       200        210       220       230       240        250     
+
+           3640      3670      3700       3730      3760      3790 
+5X_189 \PFSNSVNRHDHRLPRIARMRLPALLVHLPR\RGCW*IGRWSRAISS*DQA**AYPLLPR
+        : .:.:.  . ..:  .   :  . ..  . ::    :  :  :.. ..   . : :: 
+NR_SC: -PTDNAVSPPSSEVPAGGLPFLAEINARRSE-RGAVE-GVSSTKIQTENHKSPSQPPLPS
+          260       270       280        290        300       310  
+
+             3820       3850      3880      3910      3940         
+5X_189 R-QC*KVVEQPPLP/PMPPSSPNLSKVALTLPNASFTMSTATLSSLITPLQKCSCKG*WR
+             . . :::: :  :  :.: .:. . :. . .  .     : . ... : ..   
+NR_SC: SAPPIPTSHAPPLP-PTAPPPPSLPNVT-SAPKKATSAPAPPPPPLPAAMSSASTNSVKA
+            320        330        340       350       360       370
+
+    3970      4000       4030      4060      4090        
+5X_189 STSLPTTRTLLTTCRCFP\SSRSSSFRSPPPYRRE**YPP*PSCRPSSRA
+       .   ::    : .    :  ...::. .:::       :: :.   .: :
+NR_SC: TPVPPTLAPPLPNTTSVP-PNKASSMPAPPP-----PPPPPPGAFSTSSA
+              380        390       400            410    
+
+>>NR_SC:SW-Q12218 SW:Q12218 Q12218 saccharomyces cerevis  (487 aa)
+ initn:  63 init1:  63 opt:  94  Z-score: 110.2  bits: 33.9 E():  2.3
+Smith-Waterman score: 97;  27.778% identity (28.302% ungapped) in 108 aa overlap (775-1092:87-194)
+
+          790        820       850       880       910       940   
+5X_189 SVFEHRLPLV-LYSLDLFPQKRAISICN*PQRGMMRQQHTLSYTQKTISTSFSSSPPSTS
+       :. :: : .:  ::  :.:. .:..   . . .   ..  .. .. . ::: : .: :. 
+NR_SC: SAVEHMLTMVPWYSSRLLPELEAMDASLTTSSSAATSSSEVASSSIASSTSSSVAPSSSE
+         90       100       110       120       130       140      
+
+           970      1000       1030      1060      1090
+5X_189 PLNSSSESSRSESITSS-PGSKPAAFGSGL*PSSAYTE*SLMSAS*KE
+        ..::   : :: ..::   :.  . .:..  ::. .  : .. : .:
+NR_SC: VVSSSVAPSSSEVVSSSVAPSSSEVVSSSVASSSSEVASSSVAPSSSE
+        150       160       170       180       190    
+
+>>NR_SC:GP-AAA35091_1 gi|295671|gb|AAA35091.1 (L11275) s  (406 aa)
+ initn:  78 init1:  78 opt:  92  Z-score: 108.5  bits: 33.3 E():  2.8
+Smith-Waterman score: 109;  30.682% identity (30.682% ungapped) in 88 aa overlap (895-1158:26-113)
+
+          910       940       970      1000      1030      1060    
+5X_189 SYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLM
+       : .... :.: :::  :.:  .::.::: : : .:: .:. .. .:    ::. .  :  
+NR_SC: SSSSSSSSSSSSSSSSSSSSSSSSGESSSSSSSSSSSSSSDSSDSSDSESSSSSSSSSSS
+          30        40        50        60        70        80     
+
+         1090      1120      1150  
+5X_189 SAS*KEQRGPQK*EHTAPPLNRITSSGS
+       :.: ..... .. . ..   .  .::.:
+NR_SC: SSSSSDSESSSESDSSSSGSSSSSSSSS
+          90       100       110   
+
+>>NR_SC:SW-SR40_YEAST SW:SR40_YEAST P32583 saccharomyces  (406 aa)
+ initn:  78 init1:  78 opt:  92  Z-score: 108.5  bits: 33.3 E():  2.8
+Smith-Waterman score: 109;  30.682% identity (30.682% ungapped) in 88 aa overlap (895-1158:26-113)
+
+          910       940       970      1000      1030      1060    
+5X_189 SYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLM
+       : .... :.: :::  :.:  .::.::: : : .:: .:. .. .:    ::. .  :  
+NR_SC: SSSSSSSSSSSSSSSSSSSSSSSSGESSSSSSSSSSSSSSDSSDSSDSESSSSSSSSSSS
+          30        40        50        60        70        80     
+
+         1090      1120      1150  
+5X_189 SAS*KEQRGPQK*EHTAPPLNRITSSGS
+       :.: ..... .. . ..   .  .::.:
+NR_SC: SSSSSDSESSSESDSSSSGSSSSSSSSS
+          90       100       110   
+
+>>NR_SC:SW-TIP1_YEAST SW:TIP1_YEAST P27654 saccharomyces  (210 aa)
+ initn:  77 init1:  77 opt:  89  Z-score: 108.1  bits: 32.3 E():    3
+Smith-Waterman score: 89;  35.000% identity (35.593% ungapped) in 60 aa overlap (913-1089:104-163)
+
+              940       970       1000      1030      1060         
+5X_189 ISTSFSSSPPSTSPLNSSSE-SSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS*K
+       :.....:  :..:   :::: .: :.. .:: ... :: .:.  :::. .  :   .: :
+NR_SC: IAAALASVSPASSEAASSSEAASSSKAASSSEATSSAAPSSSAAPSSSAAPSSSAESSSK
+           110       120       130       140       150       160   
+
+>>NR_SC:SW-O94086 SW:O94086 O94086 saccharomyces cerevis  (168 aa)
+ initn:  74 init1:  74 opt:  88  Z-score: 107.9  bits: 31.9 E():  3.1
+Smith-Waterman score: 88;  29.348% identity (29.348% ungapped) in 92 aa overlap (7217-7491:5-96)
+
+     7220      7250       7280      7310      7340      7370       
+5X_189 DLLDGNLVAAIVLAKYSI/TYAVRSTYTANLAMV*GTCLKRIGPKPA*KAAGPSSLKILR
+       .. .:.  : .:  . .: :..  ..:. ::..:   ::.  . .:  . . ::  ::: 
+NR_SC: NMEQGKRYAMLVNCSRGI-TFSFNKSYAPNLTVVYVHCLNTAALRPEYNPTTPSFAKILS
+           10        20         30        40        50        60   
+
+      7400      7430      7460      7490
+5X_189 KPPERPLAKDG*ETSLIRVASRGQRAMSAKNSA
+       .  .      :   ::  . :::  .: ::  :
+NR_SC: NILKSVGLGIGAACSLTLAKSRGCVTMVAKIPA
+            70        80        90      
+
+>>NR_SC:SW-YM8Z_YEAST SW:YM8Z_YEAST Q04951 saccharomyces  (389 aa)
+ initn:  69 init1:  69 opt:  91  Z-score: 107.4  bits: 33.0 E():  3.3
+Smith-Waterman score: 91;  47.222% identity (47.222% ungapped) in 36 aa overlap (895-1002:99-134)
+
+          910       940       970      1000
+5X_189 SYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSS
+       : . .:...: :::: :.:  .::. :: : ::..:
+NR_SC: SEAASTVGSSTSSSPSSSSSTSSSASSSASSSISAS
+      100       110       120       130    
+
+>>NR_SC:SW-YGC8_YEAST SW:YGC8_YEAST P53189 saccharomyces  (542 aa)
+ initn:  76 init1:  76 opt:  92  Z-score: 107.0  bits: 33.4 E():  3.5
+Smith-Waterman score: 93;  27.551% identity (28.125% ungapped) in 98 aa overlap (829-1122:192-287)
+
+            850       880       910       940       970      1000  
+5X_189 QKRAISICN*PQRGMMRQQHTLSYTQKTISTSFSSSPPSTSPLNSSSESSRSESITSSPG
+       :  : :  :      . .:..   :.   :.: :::  :..  .::. :: : : .:: .
+NR_SC: QAAATSTLNQQTSTSIASQESTESTNTPTSSSTSSSTSSST--SSSTSSSTSSSTSSSTS
+             200       210       220       230         240         
+
+           1030      1060      1090      1120
+5X_189 SKPAAFGSGL*PSSAYTE*SLMSAS*KEQRGPQK*EHT
+       :. ..  :.   ..: :  .  :.:     .:.   ..
+NR_SC: SSTSSSTSSTQETAATTSEGSSSSSAAITSSPKAIAYS
+     250       260       270       280       
+
+>>NR_SC:SW-YG1F_YEAST SW:YG1F_YEAST P53214 saccharomyces  (551 aa)
+ initn:  63 init1:  63 opt:  92  Z-score: 106.9  bits: 33.4 E():  3.5
+Smith-Waterman score: 123;  27.010% identity (27.815% ungapped) in 311 aa overlap (3034-3951:8-313)
+
+      3040      3070        3100      3130       3160      3190    
+5X_189 RQRSSKR*RKTWK-V*TSLVLLP-SWQRLSTRPRLS*LLS/TAISEKNLSSTVALTAGRG
+       :..::    . :. .  .:. :: :       :  :.  : :: :  .::.. ..:..  
+NR_SC: RKKSSASSLSMWRTILMALTTLPLSVLSQELVPANSTTSS-TAPSITSLSAVESFTSSTD
+        10        20        30        40         50        60      
+
+         3220       3250      3280      3310      3340      3370   
+5X_189 RGKSAALG\SPSVQLLLMTTLTSLLLLLTPKTSKLCLNLSSKPLMPWVTRSTLIMMSCKA
+         .::.:. .::.  . .:.. .   :::  .: :  .:::. ..  :. :.    : ..
+NR_SC: ATSSASLS-TPSIASVSFTSFPQSSSLLT-LSSTLSSELSSSSMQ--VSSSSTSSSSSEV
+         70         80        90        100         110       120  
+
+          3400      3430      3460      3490      3520      3550   
+5X_189 QTPTSKRLL*GSTSSEVTDKPSNTSPPKILTFSAKLSLSSLMKLPPSLFPLFVNSSAPIS
+        . .:.  .  :.:: .  . :.. :   .. ...   :: ..   ::     .:.  .:
+NR_SC: TSSSSSSSISPSSSSSTIISSSSSLPTFTVASTSSTVASSTLSTSSSLVISTSSSTFTFS
+            130       140       150       160       170       180  
+
+            3580      3610      3640      3670      3700      3730 
+5X_189 CL/SAST-INGYEGTGRSLSIKLIQQLREQTRPSITKDSENAAASSAGSSSKAAAAGRSG
+          :.:. :..  .:. : :   . .   .. :: ...  ... ::..:::   . . : 
+NR_SC: SE-SSSSLISSSISTSVSTSSVYVPSSSTSSPPSSSSELTSSSYSSSSSSSTLFSYSSSF
+             190       200       210       220       230       240 
+
+              3760      3790      3820      3850       3880        
+5X_189 A/ASRAISS/SRSSLMSLSVTPPETMLKSG*TTSSASMPPSS-PNLSKVALTLPNASFTM
+       . .: . :: : ::  : : .   ...  ....::. .  :: :..:. . . :..:.: 
+NR_SC: S-SSSSSSS-SSSSSSSSSSSSSSSYFTLSTSSSSSIYSSSSYPSFSSSSSSNPTSSITS
+               250       260       270       280       290         
+
+     3910      3940    
+5X_189 STATLSSLITPLQKCS
+       ..:  :: ::: .. :
+NR_SC: TSA--SSSITPASEYS
+     300         310   
+
+>>NR_SC:GP-AAA35015_1 gi|172526|gb|AAA35015.1 (M16165) S  (570 aa)
+ initn:  79 init1:  79 opt:  92  Z-score: 106.7  bits: 33.4 E():  3.6
+Smith-Waterman score: 113;  26.923% identity (27.723% ungapped) in 104 aa overlap (916-1227:24-124)
+
+             940       970      1000      1030      1060      1090 
+5X_189 STSFSSSPPSTSPLNSSSESSRSESITSSPGSKPAAFGSGL*PSSAYTE*SLMSAS*KEQ
+       ::. ::: :  .: .:..::: .   : : .:. .. . . .: :. :: : . . .  .
+NR_SC: STTESSSAPVPTPSSSTTESSVAPVPTPSSSSNITSSAPSSTPFSSSTESSSVPVPTPSS
+            30        40        50        60        70        80   
+
+            1120      1150      1180      1210     
+5X_189 RGPQK*EHTAPPLNRITSSGSIFTLVSPCCCPLGTCTPPLTNPF
+          . .: .. :..  :. .:.  . .:      : . : . ::
+NR_SC: ---STTESSSAPVSSSTTESSVAPVPTPSSSSNITSSAPSSIPF
+               90       100       110       120    
+
+>>NR_SC:SW-HRB1_YEAST SW:HRB1_YEAST P38922 saccharomyces  (429 aa)
+ initn:  68 init1:  68 opt:  90  Z-score: 105.5  bits: 32.8 E():  4.2
+Smith-Waterman score: 90;  28.947% identity (29.730% ungapped) in 76 aa overlap (6937-7158:28-103)
+
+     6940        6970      7000      7030      7060      7090      
+5X_189 ETYTGQQ*--GYRS*QDEWSYR*RDRKHRADQPSGRA*RW*TGRGRPWSC*S*TRGCRRV
+       .:: :..    ::. ... .:: :.: .  :.: .:       :::  .    .:     
+NR_SC: DTYRGSRDRGEYRGGRERSDYRERERFNNRDNPRSRDRYDDRRRGRDVTGRYGNRRDDYP
+        30        40        50        60        70        80       
+
+       7120      7150  
+5X_189 ER*RRRPNQR*DRK*G
+       .  : : : : : . :
+NR_SC: RSFRSRHNTRDDSRRG
+        90       100   
+
+>>NR_SC:SW-MID2_YEAST SW:MID2_YEAST P36027 saccharomyces  (376 aa)
+ initn:  73 init1:  73 opt:  89  Z-score: 104.9  bits: 32.5 E():  4.5
+Smith-Waterman score: 97;  24.742% identity (27.273% ungapped) in 194 aa overlap (3349-3930:11-186)
+
+           3370      3400      3430      3460      3490      3520  
+5X_189 RSTLIMMSCKAQTPTSKRLL*GSTSSEVTDKPSNTSPPKILTFSAKLSLSSLMKLPPSLF
+       :  :...::  .:  .. .. .:.:.  . . . .:  .. . :. :: :          
+NR_SC: RLLLLILSC-ISTIRAQFFVQSSSSNSSAVSTARSSVSRVSSSSSILSSS----------
+                20        30        40        50                   
+
+           3550      3580      3610      3640       3670      3700 
+5X_189 PLFVNSSAPISCLWPPPSTVTRVLAVHCPSSSFSNSVN/MTRPSITKDSENAAASSAGSS
+        .  .:::  : :    :: .: :. :  ::.   :.. .:  :...:: ....:::.:.
+NR_SC: -MVSSSSADSSSL--TSSTSSRSLVSHTSSSTSIASIS-FTSFSFSSDSSTSSSSSASSD
+       60        70          80        90        100       110     
+
+            3730      3760       3790      3820      3850      3880
+5X_189 SKAAAAGRSGAGLVRSLRE\TSLMSLSVTPPETMLKSG*TTSSASMPPSSPNLSKVALTL
+       :.....   ..  . :    ::  . :..   .. ..  ..:: :   :.:. :    . 
+NR_SC: SSSSSSFSISSTSATSESS-TSSTQTSTSSSSSLSSTPSSSSSPSTITSAPSTS----ST
+         120       130        140       150       160           170
+
+             3910      
+5X_189 PNASFTMSTATLSSLI
+       :...   . .:..:.:
+NR_SC: PSTTAYNQGSTITSII
+              180      
+
+>>NR_SC:SW-Q08438 SW:Q08438 Q08438 saccharomyces cerevis  (674 aa)
+rev-comp initn:  78 init1:  78 opt:  91  Z-score: 104.4  bits: 33.3 E():  4.8
+Smith-Waterman score: 103;  35.714% identity (35.714% ungapped) in 56 aa overlap (1082-915:586-641)
+
+    1080      1050      1020       990       960       930     
+5X_18- DADINDYSV*AEDGHKPEPNAAGLLPGDDVIDSDLDDSDDEFRGDVDGGEDENDVD
+       : : .: .   . : : : :       ::  :.: ::.::.   : :  .:..: :
+NR_SC: DNDEEDDNKKNDTGGKDEDNDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
+         590       600       610       620       630       640 
+
+>>NR_SC:SW-CCC1_YEAST SW:CCC1_YEAST P47818 saccharomyces  (322 aa)
+ initn:  81 init1:  81 opt:  88  Z-score: 104.4  bits: 32.2 E():  4.8
+Smith-Waterman score: 88;  23.188% identity (23.188% ungapped) in 69 aa overlap (2052-2258:247-315)
+
+          2070      2100      2130      2160      2190      2220   
+5X_189 GIIPLFFFFFVPEFQSVGVLNIIFVYIQLLSFSTTYSKHHKQ*GSNSTHASQRLSTMVSK
+       :..::  .::: .  .  . .:: . . :. :. . .:     ::....   .   ::  
+NR_SC: GLVPLVPYFFVSDVGTGLIYSIIVMVVTLFWFGYVKTKLSMGSGSSTSKKVTEGVEMVVV
+        250       260       270       280       290       300      
+
+          2250  
+5X_189 RTTGVSSSW
+         ......:
+NR_SC: GGVAAGAAW
+        310     
+
+
+
+
+7972 residues in 1 query   sequences
+4215311 residues in 9190 library sequences
+ Scomplib [34t10]
+ start: Tue Jan  8 20:41:56 2002 done: Tue Jan  8 20:43:22 2002
+ Scan time: 25.790 Display time: 58.770
+
+Function used was FASTXY [version 3.4t07 Nov 21, 2001]

Added: trunk/packages/bioperl/branches/upstream/current/t/data/8HVP.pdb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/8HVP.pdb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/8HVP.pdb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1864 @@
+HEADER    HYDROLASE(ACID PROTEINASE)              26-OCT-90   8HVP      8HVP   2
+COMPND    HIV-1 PROTEASE (HIV-1 PR) COMPLEX WITH INHIBITOR              8HVP   3
+COMPND   2 VAL-SER-GLN-ASN-LEU-PSI(CH(OH)-CH2)-VAL-ILE-VAL (U-85548E)   8HVP   4
+SOURCE    SYNTHETIC, BASED ON SEQUENCE FROM HUMAN (HOMO SAPIENS)        8HVP   5
+SOURCE   2 IMMUNODEFICIENCY VIRUS TYPE 1 PROTEASE                       8HVP   6
+AUTHOR    M.JASKOLSKI,M.MILLER,A.G.TOMASSELLI,T.K.SAWYER,D.G.STAPLES,   8HVP   7
+AUTHOR   2 R.L.HEINRIKSON,J.SCHNEIDER,S.B.H.KENT,A.WLODAWER             8HVP   8
+REVDAT   1   31-OCT-93 8HVP    0                                        8HVP   9
+JRNL        AUTH   M.JASKOLSKI,A.G.TOMASSELLI,T.K.SAWYER,D.G.STAPLES,   8HVP  10
+JRNL        AUTH 2 R.L.HEINRIKSON,J.SCHNEIDER,S.B.H.KENT,A.WLODAWER     8HVP  11
+JRNL        TITL   STRUCTURE AT 2.5-*ANGSTROMS RESOLUTION OF            8HVP  12
+JRNL        TITL 2 CHEMICALLY SYNTHESIZED HUMAN IMMUNODEFICIENCY VIRUS  8HVP  13
+JRNL        TITL 3 TYPE 1 PROTEASE COMPLEXED WITH A                     8HVP  14
+JRNL        TITL 4 HYDROXYETHYLENE*-BASED INHIBITOR                     8HVP  15
+JRNL        REF    BIOCHEMISTRY                  V.  30  1600 1991      8HVP  16
+JRNL        REFN   ASTM BICHAW  US ISSN 0006-2960                  033  8HVP  17
+REMARK   1                                                              8HVP  18
+REMARK   1 REFERENCE 1                                                  8HVP  19
+REMARK   1  AUTH   A.WLODAWER,M.MILLER,M.JASKOLSKI,B.K.SATHYANARAYANA,  8HVP  20
+REMARK   1  AUTH 2 E.BALDWIN,I.T.WEBER,L.M.SELK,L.CLAWSON,J.SCHNEIDER,  8HVP  21
+REMARK   1  AUTH 3 S.B.H.KENT                                           8HVP  22
+REMARK   1  TITL   CONSERVED FOLDING IN RETROVIRAL PROTEASES. CRYSTAL   8HVP  23
+REMARK   1  TITL 2 STRUCTURE OF A SYNTHETIC /HIV$-1 PROTEASE            8HVP  24
+REMARK   1  REF    SCIENCE                       V. 245   616 1989      8HVP  25
+REMARK   1  REFN   ASTM SCIEAS  US ISSN 0036-8075                  038  8HVP  26
+REMARK   1 REFERENCE 2                                                  8HVP  27
+REMARK   1  AUTH   I.T.WEBER,M.MILLER,M.JASKOLSKI,J.LEIS,A.M.SKALKA,    8HVP  28
+REMARK   1  AUTH 2 A.WLODAWER                                           8HVP  29
+REMARK   1  TITL   MOLECULAR MODELING OF THE /HIV$-1 PROTEASE AND ITS   8HVP  30
+REMARK   1  TITL 2 SUBSTRATE BINDING SITE                               8HVP  31
+REMARK   1  REF    SCIENCE                       V. 243   928 1989      8HVP  32
+REMARK   1  REFN   ASTM SCIEAS  US ISSN 0036-8075                  038  8HVP  33
+REMARK   1 REFERENCE 3                                                  8HVP  34
+REMARK   1  AUTH   M.MILLER,M.JASKOLSKI,J.K.M.RAO,J.LEIS,A.WLODAWER     8HVP  35
+REMARK   1  TITL   CRYSTAL STRUCTURE OF A RETROVIRAL PROTEASE PROVES    8HVP  36
+REMARK   1  TITL 2 RELATIONSHIP TO ASPARTIC PROTEASE FAMILY             8HVP  37
+REMARK   1  REF    NATURE                        V. 337   576 1989      8HVP  38
+REMARK   1  REFN   ASTM NATUAS  UK ISSN 0028-0836                  006  8HVP  39
+REMARK   1 REFERENCE 4                                                  8HVP  40
+REMARK   1  AUTH   J.SCHNEIDER,S.B.H.KENT                               8HVP  41
+REMARK   1  TITL   ENZYMATIC ACTIVITY OF A SYNTHETIC 99 RESIDUE         8HVP  42
+REMARK   1  TITL 2 PROTEIN CORRESPONDING TO THE PUTATIVE /HIV$-1        8HVP  43
+REMARK   1  TITL 3 PROTEASE                                             8HVP  44
+REMARK   1  REF    CELL(CAMBRIDGE,MASS.)         V.  54   363 1988      8HVP  45
+REMARK   1  REFN   ASTM CELLB5  US ISSN 0092-8674                  998  8HVP  46
+REMARK   2                                                              8HVP  47
+REMARK   2 RESOLUTION. 2.5  ANGSTROMS.                                  8HVP  48
+REMARK   3                                                              8HVP  49
+REMARK   3 REFINEMENT. REFINEMENT BY THE                                8HVP  50
+REMARK   3  RESTRAINED LEAST-SQUARES PROCEDURE OF J. KONNERT AND W.     8HVP  51
+REMARK   3  HENDRICKSON (PROGRAM *PROFFT*).  THE R VALUE IS 0.138 FOR   8HVP  52
+REMARK   3  4768 REFLECTIONS IN THE RESOLUTION RANGE 10 TO 2.5          8HVP  53
+REMARK   3  ANGSTROMS.                                                  8HVP  54
+REMARK   3                                                              8HVP  55
+REMARK   3  RMS DEVIATIONS FROM IDEAL VALUES (THE VALUES OF             8HVP  56
+REMARK   3      SIGMA, IN PARENTHESES, ARE THE INPUT ESTIMATED          8HVP  57
+REMARK   3      STANDARD DEVIATIONS THAT DETERMINE THE RELATIVE         8HVP  58
+REMARK   3      WEIGHTS OF THE CORRESPONDING RESTRAINTS)                8HVP  59
+REMARK   3    DISTANCE RESTRAINTS (ANGSTROMS)                           8HVP  60
+REMARK   3      BOND DISTANCE                            0.014(0.018)   8HVP  61
+REMARK   3      ANGLE DISTANCE                           0.053(0.035)   8HVP  62
+REMARK   3      PLANAR 1-4 DISTANCE                      0.067(0.050)   8HVP  63
+REMARK   3    PLANE RESTRAINT (ANGSTROMS)                0.020(0.025)   8HVP  64
+REMARK   3    CHIRAL-CENTER RESTRAINT (ANGSTROMS**3)     0.179(0.150)   8HVP  65
+REMARK   3    NON-BONDED CONTACT RESTRAINTS (ANGSTROMS)                 8HVP  66
+REMARK   3      SINGLE TORSION CONTACT                   0.217(0.300)   8HVP  67
+REMARK   3      MULTIPLE TORSION CONTACT                 0.264(0.300)   8HVP  68
+REMARK   3      POSSIBLE HYDROGEN BOND                   0.306(0.300)   8HVP  69
+REMARK   3    CONFORMATIONAL TORSION ANGLE RESTRAINT (DEGREES)          8HVP  70
+REMARK   3      PLANAR (OMEGA)                             3.7(4.0)     8HVP  71
+REMARK   3      STAGGERED                                 20.0(10.0)    8HVP  72
+REMARK   3      ORTHONORMAL                               18.2(20.0)    8HVP  73
+REMARK   3    ISOTROPIC THERMAL FACTOR RESTRAINTS (ANGSTROMS**2)        8HVP  74
+REMARK   3      MAIN-CHAIN BOND                          2.449(3.000)   8HVP  75
+REMARK   3      MAIN-CHAIN ANGLE                         3.577(3.500)   8HVP  76
+REMARK   3      SIDE-CHAIN BOND                          6.239(5.000)   8HVP  77
+REMARK   3      SIDE-CHAIN ANGLE                         8.633(8.000)   8HVP  78
+REMARK   3      HYDROGEN BOND                           12.014(25.000)  8HVP  79
+REMARK   4                                                              8HVP  80
+REMARK   4 THE DIMER INTERFACE IS COMPOSED OF INTERDIGITATED N- AND     8HVP  81
+REMARK   4 C-TERMINI FROM BOTH SUBUNITS FORMING A FOUR-STRANDED         8HVP  82
+REMARK   4 ANTIPARALLEL BETA-SHEET.                                     8HVP  83
+REMARK   5                                                              8HVP  84
+REMARK   5 THE PEPTIDE BOND BETWEEN RESIDUES LEU I 5 AND VAL I 6 HAS    8HVP  85
+REMARK   5 BEEN REPLACED BY A HYDROXYETHYLENE GROUP CH(OH)-CH2.  THESE  8HVP  86
+REMARK   5 RESIDUES HAVE BEEN RENAMED AS LOV I 5 TO DENOTE THIS         8HVP  87
+REMARK   5 MODIFICATION.                                                8HVP  88
+SEQRES   1 A   99  PRO GLN ILE THR LEU TRP GLN ARG PRO LEU VAL THR ILE  8HVP  89
+SEQRES   2 A   99  ARG ILE GLY GLY GLN LEU LYS GLU ALA LEU LEU ASP THR  8HVP  90
+SEQRES   3 A   99  GLY ALA ASP ASP THR VAL LEU GLU GLU MET ASN LEU PRO  8HVP  91
+SEQRES   4 A   99  GLY LYS TRP LYS PRO LYS MET ILE GLY GLY ILE GLY GLY  8HVP  92
+SEQRES   5 A   99  PHE ILE LYS VAL ARG GLN TYR ASP GLN ILE PRO VAL GLU  8HVP  93
+SEQRES   6 A   99  ILE ABA GLY HIS LYS ALA ILE GLY THR VAL LEU VAL GLY  8HVP  94
+SEQRES   7 A   99  PRO THR PRO VAL ASN ILE ILE GLY ARG ASN LEU LEU THR  8HVP  95
+SEQRES   8 A   99  GLN ILE GLY ABA THR LEU ASN PHE                      8HVP  96
+SEQRES   1 B   99  PRO GLN ILE THR LEU TRP GLN ARG PRO LEU VAL THR ILE  8HVP  97
+SEQRES   2 B   99  ARG ILE GLY GLY GLN LEU LYS GLU ALA LEU LEU ASP THR  8HVP  98
+SEQRES   3 B   99  GLY ALA ASP ASP THR VAL LEU GLU GLU MET ASN LEU PRO  8HVP  99
+SEQRES   4 B   99  GLY LYS TRP LYS PRO LYS MET ILE GLY GLY ILE GLY GLY  8HVP 100
+SEQRES   5 B   99  PHE ILE LYS VAL ARG GLN TYR ASP GLN ILE PRO VAL GLU  8HVP 101
+SEQRES   6 B   99  ILE ABA GLY HIS LYS ALA ILE GLY THR VAL LEU VAL GLY  8HVP 102
+SEQRES   7 B   99  PRO THR PRO VAL ASN ILE ILE GLY ARG ASN LEU LEU THR  8HVP 103
+SEQRES   8 B   99  GLN ILE GLY ABA THR LEU ASN PHE                      8HVP 104
+SEQRES   1 I    7  VAL SER GLN ASN LOV ILE VAL                          8HVP 105
+FTNOTE   1                                                              8HVP 106
+FTNOTE   1 THE INHIBITOR RESIDUE LOV I 5 IS LEU-VAL WITH THE PEPTIDE    8HVP 107
+FTNOTE   1 BOND REPLACED BY CHOH-CH2.                                   8HVP 108
+HET    ABA  A  67       6     ALPHA-AMINO-N-BUTYRIC ACID                8HVP 109
+HET    ABA  A  95       6     ALPHA-AMINO-N-BUTYRIC ACID                8HVP 110
+HET    ABA  B  67       6     ALPHA-AMINO-N-BUTYRIC ACID                8HVP 111
+HET    ABA  B  95       6     ALPHA-AMINO-N-BUTYRIC ACID                8HVP 112
+HET    LOV  I   5      15     LEU-PSI(CHOH-CH2)-VAL                     8HVP 113
+FORMUL   1  ABA    2(C4 H9 N1 O2)                                       8HVP 114
+FORMUL   2  ABA    2(C4 H9 N1 O2)                                       8HVP 115
+FORMUL   3  LOV    C12 H21 N1 O2                                        8HVP 116
+FORMUL   4  HOH   *80(H2 O1)                                            8HVP 117
+HELIX    1  HA GLY A   86  GLY A   94  1                                8HVP 118
+HELIX    2  HB GLY B   86  GLY B   94  1                                8HVP 119
+SHEET    1 COA 8 LYS A  43  GLY A  49  0                                8HVP 120
+SHEET    2 COA 8 GLY A  52  ILE A  66 -1                                8HVP 121
+SHEET    3 COA 8 HIS A  69  GLY A  78 -1                                8HVP 122
+SHEET    4 COA 8 THR A  31  GLU A  34  1                                8HVP 123
+SHEET    5 COA 8 ASN A  83  ILE A  85 -1                                8HVP 124
+SHEET    6 COA 8 GLN A  18  ASP A  25  1                                8HVP 125
+SHEET    7 COA 8 PRO A   9  ILE A  15 -1                                8HVP 126
+SHEET    8 COA 8 GLU A  65  ILE A  66 -1                                8HVP 127
+SHEET    1 COB 8 LYS B  43  GLY B  49  0                                8HVP 128
+SHEET    2 COB 8 GLY B  52  ILE B  66 -1                                8HVP 129
+SHEET    3 COB 8 HIS B  69  GLY B  78 -1                                8HVP 130
+SHEET    4 COB 8 THR B  31  GLU B  34  1                                8HVP 131
+SHEET    5 COB 8 ASN B  83  ILE B  85 -1                                8HVP 132
+SHEET    6 COB 8 GLN B  18  ASP B  25  1                                8HVP 133
+SHEET    7 COB 8 PRO B   9  ILE B  15 -1                                8HVP 134
+SHEET    8 COB 8 GLU B  65  ILE B  66 -1                                8HVP 135
+SHEET    1 INT 4 PRO A   1  THR A   4  0                                8HVP 136
+SHEET    2 INT 4 THR B  96  PHE B  99 -1                                8HVP 137
+SHEET    3 INT 4 THR A  96  PHE A  99 -1                                8HVP 138
+SHEET    4 INT 4 PRO B   1  THR B   4 -1                                8HVP 139
+TURN     1 S1A GLY A  16  GLN A  18     SURFACE                         8HVP 140
+TURN     2 FLA ILE A  50  GLY A  52     FLAP                            8HVP 141
+TURN     3 S2A ILE A  66  HIS A  69     SURFACE                         8HVP 142
+TURN     4 S1B GLY B  16  GLN B  18     SURFACE                         8HVP 143
+TURN     5 FLB ILE B  50  GLY B  52     FLAP                            8HVP 144
+TURN     6 S2B ILE B  66  HIS B  69     SURFACE                         8HVP 145
+SITE     1 DTA  3 ASP A  25  THR A  26  GLY A  27                       8HVP 146
+SITE     1 DTB  3 ASP B  25  THR B  26  GLY B  27                       8HVP 147
+CRYST1   51.650   58.760   61.620  90.00  90.00  90.00 P 21 21 21    4  8HVP 148
+ORIGX1      1.000000  0.000000  0.000000        0.00000                 8HVP 149
+ORIGX2      0.000000  1.000000  0.000000        0.00000                 8HVP 150
+ORIGX3      0.000000  0.000000  1.000000        0.00000                 8HVP 151
+SCALE1      0.019361  0.000000  0.000000        0.00000                 8HVP 152
+SCALE2      0.000000  0.017018  0.000000        0.00000                 8HVP 153
+SCALE3      0.000000  0.000000  0.016228        0.00000                 8HVP 154
+ATOM      1  N   PRO A   1      -2.473   7.901  33.687  1.00  4.25      8HVP 155
+ATOM      2  CA  PRO A   1      -1.739   6.714  34.100  1.00  4.33      8HVP 156
+ATOM      3  C   PRO A   1      -0.269   6.799  33.769  1.00  9.25      8HVP 157
+ATOM      4  O   PRO A   1      -0.005   7.325  32.661  1.00 12.86      8HVP 158
+ATOM      5  CB  PRO A   1      -2.443   5.724  33.177  1.00  2.00      8HVP 159
+ATOM      6  CG  PRO A   1      -3.860   6.187  32.991  1.00  2.00      8HVP 160
+ATOM      7  CD  PRO A   1      -3.897   7.440  33.874  1.00  5.50      8HVP 161
+ATOM      8  N   GLN A   2       0.530   6.256  34.695  1.00 10.03      8HVP 162
+ATOM      9  CA  GLN A   2       1.979   6.233  34.355  1.00 13.56      8HVP 163
+ATOM     10  C   GLN A   2       2.193   4.743  34.046  1.00 13.76      8HVP 164
+ATOM     11  O   GLN A   2       1.946   3.952  34.978  1.00 18.02      8HVP 165
+ATOM     12  CB  GLN A   2       2.943   6.797  35.362  1.00 16.19      8HVP 166
+ATOM     13  CG  GLN A   2       4.417   6.732  34.939  1.00 23.62      8HVP 167
+ATOM     14  CD  GLN A   2       5.358   7.151  36.052  1.00 25.05      8HVP 168
+ATOM     15  OE1 GLN A   2       5.681   8.310  36.371  1.00 26.46      8HVP 169
+ATOM     16  NE2 GLN A   2       5.839   6.106  36.720  1.00  9.24      8HVP 170
+ATOM     17  N   ILE A   3       2.514   4.430  32.820  1.00  9.51      8HVP 171
+ATOM     18  CA  ILE A   3       2.669   3.034  32.417  1.00  3.63      8HVP 172
+ATOM     19  C   ILE A   3       4.120   2.826  31.951  1.00  2.00      8HVP 173
+ATOM     20  O   ILE A   3       4.386   3.445  30.877  1.00  2.00      8HVP 174
+ATOM     21  CB  ILE A   3       1.767   2.617  31.198  1.00  3.69      8HVP 175
+ATOM     22  CG1 ILE A   3       0.258   2.901  31.456  1.00 10.88      8HVP 176
+ATOM     23  CG2 ILE A   3       1.934   1.127  30.775  1.00  2.00      8HVP 177
+ATOM     24  CD1 ILE A   3      -0.505   2.898  30.060  1.00  2.00      8HVP 178
+ATOM     25  N   THR A   4       4.824   1.984  32.617  1.00  2.00      8HVP 179
+ATOM     26  CA  THR A   4       6.176   1.497  32.422  1.00  2.00      8HVP 180
+ATOM     27  C   THR A   4       6.348   0.565  31.229  1.00  2.00      8HVP 181
+ATOM     28  O   THR A   4       5.347  -0.027  30.769  1.00  2.00      8HVP 182
+ATOM     29  CB  THR A   4       6.444   0.709  33.777  1.00  8.70      8HVP 183
+ATOM     30  OG1 THR A   4       5.107   0.099  33.936  1.00 16.25      8HVP 184
+ATOM     31  CG2 THR A   4       6.918   1.568  34.954  1.00 15.09      8HVP 185
+ATOM     32  N   LEU A   5       7.603   0.350  30.815  1.00  2.00      8HVP 186
+ATOM     33  CA  LEU A   5       7.782  -0.501  29.624  1.00  2.00      8HVP 187
+ATOM     34  C   LEU A   5       8.206  -1.934  29.728  1.00  2.00      8HVP 188
+ATOM     35  O   LEU A   5       8.198  -2.583  28.652  1.00  2.00      8HVP 189
+ATOM     36  CB  LEU A   5       8.554   0.400  28.653  1.00  2.11      8HVP 190
+ATOM     37  CG  LEU A   5       7.838   1.671  28.247  1.00 13.00      8HVP 191
+ATOM     38  CD1 LEU A   5       8.765   2.681  27.561  1.00 15.68      8HVP 192
+ATOM     39  CD2 LEU A   5       6.655   1.336  27.358  1.00  9.29      8HVP 193
+ATOM     40  N   TRP A   6       8.464  -2.513  30.877  1.00  2.00      8HVP 194
+ATOM     41  CA  TRP A   6       8.824  -3.837  31.313  1.00  2.00      8HVP 195
+ATOM     42  C   TRP A   6       7.847  -4.872  30.752  1.00  2.00      8HVP 196
+ATOM     43  O   TRP A   6       8.306  -6.024  30.814  1.00  2.00      8HVP 197
+ATOM     44  CB  TRP A   6       8.899  -4.179  32.830  1.00 12.98      8HVP 198
+ATOM     45  CG  TRP A   6       9.272  -3.021  33.684  1.00 21.96      8HVP 199
+ATOM     46  CD1 TRP A   6       8.422  -2.112  34.276  1.00 30.51      8HVP 200
+ATOM     47  CD2 TRP A   6      10.596  -2.571  33.963  1.00 18.88      8HVP 201
+ATOM     48  NE1 TRP A   6       9.134  -1.114  34.904  1.00 33.17      8HVP 202
+ATOM     49  CE2 TRP A   6      10.473  -1.376  34.713  1.00 28.60      8HVP 203
+ATOM     50  CE3 TRP A   6      11.850  -3.073  33.668  1.00 17.70      8HVP 204
+ATOM     51  CZ2 TRP A   6      11.587  -0.673  35.140  1.00 25.35      8HVP 205
+ATOM     52  CZ3 TRP A   6      12.971  -2.388  34.070  1.00 16.22      8HVP 206
+ATOM     53  CH2 TRP A   6      12.816  -1.205  34.770  1.00 24.66      8HVP 207
+ATOM     54  N   GLN A   7       6.701  -4.474  30.281  1.00  3.35      8HVP 208
+ATOM     55  CA  GLN A   7       5.777  -5.391  29.605  1.00  4.83      8HVP 209
+ATOM     56  C   GLN A   7       5.131  -4.473  28.544  1.00  6.60      8HVP 210
+ATOM     57  O   GLN A   7       5.124  -3.274  28.779  1.00 10.09      8HVP 211
+ATOM     58  CB  GLN A   7       4.599  -5.998  30.279  1.00 15.11      8HVP 212
+ATOM     59  CG  GLN A   7       4.607  -7.028  31.358  1.00 25.54      8HVP 213
+ATOM     60  CD  GLN A   7       3.219  -7.169  31.970  1.00 27.66      8HVP 214
+ATOM     61  OE1 GLN A   7       2.758  -6.275  32.677  1.00 34.29      8HVP 215
+ATOM     62  NE2 GLN A   7       2.530  -8.274  31.717  1.00 33.00      8HVP 216
+ATOM     63  N   ARG A   8       4.599  -5.119  27.534  1.00 10.31      8HVP 217
+ATOM     64  CA  ARG A   8       3.859  -4.416  26.470  1.00  7.99      8HVP 218
+ATOM     65  C   ARG A   8       2.767  -3.614  27.169  1.00  9.42      8HVP 219
+ATOM     66  O   ARG A   8       2.109  -4.182  28.074  1.00 10.32      8HVP 220
+ATOM     67  CB  ARG A   8       3.114  -5.384  25.554  1.00  2.44      8HVP 221
+ATOM     68  CG  ARG A   8       4.035  -6.115  24.616  1.00  7.69      8HVP 222
+ATOM     69  CD  ARG A   8       3.548  -7.457  24.224  1.00 10.72      8HVP 223
+ATOM     70  NE  ARG A   8       4.459  -7.893  23.140  1.00 17.94      8HVP 224
+ATOM     71  CZ  ARG A   8       4.365  -9.137  22.652  1.00 24.69      8HVP 225
+ATOM     72  NH1 ARG A   8       3.648 -10.046  23.327  1.00 28.41      8HVP 226
+ATOM     73  NH2 ARG A   8       4.780  -9.377  21.409  1.00 16.19      8HVP 227
+ATOM     74  N   PRO A   9       2.644  -2.399  26.655  1.00  8.38      8HVP 228
+ATOM     75  CA  PRO A   9       1.582  -1.480  27.170  1.00  4.47      8HVP 229
+ATOM     76  C   PRO A   9       0.252  -1.823  26.524  1.00  6.26      8HVP 230
+ATOM     77  O   PRO A   9      -0.263  -1.296  25.504  1.00  5.60      8HVP 231
+ATOM     78  CB  PRO A   9       2.282  -0.150  26.990  1.00  2.17      8HVP 232
+ATOM     79  CG  PRO A   9       3.222  -0.285  25.805  1.00  2.00      8HVP 233
+ATOM     80  CD  PRO A   9       3.440  -1.777  25.604  1.00  2.00      8HVP 234
+ATOM     81  N   LEU A  10      -0.348  -2.883  27.099  1.00  6.46      8HVP 235
+ATOM     82  CA  LEU A  10      -1.701  -3.352  26.712  1.00 10.06      8HVP 236
+ATOM     83  C   LEU A  10      -2.741  -2.850  27.746  1.00  6.81      8HVP 237
+ATOM     84  O   LEU A  10      -2.726  -3.162  28.930  1.00  8.01      8HVP 238
+ATOM     85  CB  LEU A  10      -1.805  -4.859  26.486  1.00 11.47      8HVP 239
+ATOM     86  CG  LEU A  10      -1.168  -5.664  25.387  1.00  6.53      8HVP 240
+ATOM     87  CD1 LEU A  10      -1.627  -7.116  25.317  1.00  3.53      8HVP 241
+ATOM     88  CD2 LEU A  10      -1.448  -5.076  24.022  1.00  3.83      8HVP 242
+ATOM     89  N   VAL A  11      -3.704  -2.079  27.315  1.00 10.23      8HVP 243
+ATOM     90  CA  VAL A  11      -4.870  -1.494  28.000  1.00 10.17      8HVP 244
+ATOM     91  C   VAL A  11      -6.200  -1.883  27.305  1.00 10.78      8HVP 245
+ATOM     92  O   VAL A  11      -6.155  -2.128  26.085  1.00  8.04      8HVP 246
+ATOM     93  CB  VAL A  11      -4.647   0.027  28.000  1.00  2.00      8HVP 247
+ATOM     94  CG1 VAL A  11      -3.356   0.398  28.688  1.00  2.00      8HVP 248
+ATOM     95  CG2 VAL A  11      -4.777   0.526  26.562  1.00  2.00      8HVP 249
+ATOM     96  N   THR A  12      -7.321  -1.952  28.016  1.00  9.58      8HVP 250
+ATOM     97  CA  THR A  12      -8.636  -2.332  27.473  1.00  9.66      8HVP 251
+ATOM     98  C   THR A  12      -9.284  -1.117  26.787  1.00  6.66      8HVP 252
+ATOM     99  O   THR A  12      -9.279  -0.073  27.455  1.00  7.20      8HVP 253
+ATOM    100  CB  THR A  12      -9.705  -2.879  28.514  1.00  4.40      8HVP 254
+ATOM    101  OG1 THR A  12      -9.016  -3.028  29.802  1.00  6.88      8HVP 255
+ATOM    102  CG2 THR A  12     -10.282  -4.234  28.065  1.00  2.00      8HVP 256
+ATOM    103  N   ILE A  13      -9.821  -1.318  25.613  1.00  6.50      8HVP 257
+ATOM    104  CA  ILE A  13     -10.428  -0.306  24.756  1.00  5.69      8HVP 258
+ATOM    105  C   ILE A  13     -11.897  -0.668  24.616  1.00  6.09      8HVP 259
+ATOM    106  O   ILE A  13     -12.166  -1.865  24.447  1.00 11.04      8HVP 260
+ATOM    107  CB  ILE A  13      -9.658  -0.150  23.397  1.00  6.87      8HVP 261
+ATOM    108  CG1 ILE A  13     -10.082  -0.976  22.187  1.00  2.00      8HVP 262
+ATOM    109  CG2 ILE A  13      -8.149  -0.446  23.606  1.00  2.63      8HVP 263
+ATOM    110  CD1 ILE A  13      -9.521  -0.698  20.775  1.00  2.00      8HVP 264
+ATOM    111  N   ARG A  14     -12.800   0.270  24.732  1.00  4.95      8HVP 265
+ATOM    112  CA  ARG A  14     -14.234   0.099  24.650  1.00  2.00      8HVP 266
+ATOM    113  C   ARG A  14     -14.585   0.804  23.314  1.00  2.00      8HVP 267
+ATOM    114  O   ARG A  14     -14.408   2.018  23.365  1.00  7.49      8HVP 268
+ATOM    115  CB  ARG A  14     -15.240   0.751  25.610  1.00  2.00      8HVP 269
+ATOM    116  CG  ARG A  14     -16.495  -0.153  25.613  1.00 13.07      8HVP 270
+ATOM    117  CD  ARG A  14     -17.390   0.154  26.767  1.00 25.60      8HVP 271
+ATOM    118  NE  ARG A  14     -17.828   1.546  26.751  1.00 36.29      8HVP 272
+ATOM    119  CZ  ARG A  14     -18.341   2.165  25.680  1.00 36.27      8HVP 273
+ATOM    120  NH1 ARG A  14     -18.644   1.522  24.555  1.00 34.49      8HVP 274
+ATOM    121  NH2 ARG A  14     -18.324   3.505  25.698  1.00 36.80      8HVP 275
+ATOM    122  N   ILE A  15     -14.972  -0.035  22.412  1.00  8.77      8HVP 276
+ATOM    123  CA  ILE A  15     -15.300   0.406  21.054  1.00  3.96      8HVP 277
+ATOM    124  C   ILE A  15     -16.698  -0.064  20.662  1.00  8.76      8HVP 278
+ATOM    125  O   ILE A  15     -16.961  -1.125  20.072  1.00  8.48      8HVP 279
+ATOM    126  CB  ILE A  15     -14.189  -0.175  20.117  1.00  4.59      8HVP 280
+ATOM    127  CG1 ILE A  15     -14.445   0.353  18.694  1.00  2.67      8HVP 281
+ATOM    128  CG2 ILE A  15     -14.116  -1.716  20.057  1.00  8.19      8HVP 282
+ATOM    129  CD1 ILE A  15     -13.982   1.820  18.564  1.00 19.57      8HVP 283
+ATOM    130  N   GLY A  16     -17.652   0.774  21.052  1.00 11.70      8HVP 284
+ATOM    131  CA  GLY A  16     -19.075   0.587  20.773  1.00 12.24      8HVP 285
+ATOM    132  C   GLY A  16     -19.543  -0.807  21.211  1.00 11.42      8HVP 286
+ATOM    133  O   GLY A  16     -19.625  -1.776  20.426  1.00 13.44      8HVP 287
+ATOM    134  N   GLY A  17     -19.839  -0.786  22.503  1.00  7.14      8HVP 288
+ATOM    135  CA  GLY A  17     -20.345  -1.959  23.199  1.00  7.99      8HVP 289
+ATOM    136  C   GLY A  17     -19.544  -3.205  22.882  1.00  7.25      8HVP 290
+ATOM    137  O   GLY A  17     -20.097  -4.287  22.977  1.00  8.85      8HVP 291
+ATOM    138  N   GLN A  18     -18.286  -3.052  22.572  1.00  9.70      8HVP 292
+ATOM    139  CA  GLN A  18     -17.295  -4.100  22.321  1.00  8.42      8HVP 293
+ATOM    140  C   GLN A  18     -16.040  -3.785  23.133  1.00  2.58      8HVP 294
+ATOM    141  O   GLN A  18     -15.570  -2.635  23.044  1.00  7.27      8HVP 295
+ATOM    142  CB  GLN A  18     -16.983  -4.271  20.825  1.00 15.99      8HVP 296
+ATOM    143  CG  GLN A  18     -18.009  -5.084  20.044  1.00 18.33      8HVP 297
+ATOM    144  CD  GLN A  18     -18.240  -6.375  20.808  1.00 30.10      8HVP 298
+ATOM    145  OE1 GLN A  18     -17.295  -6.798  21.495  1.00 33.38      8HVP 299
+ATOM    146  NE2 GLN A  18     -19.461  -6.908  20.792  1.00 30.23      8HVP 300
+ATOM    147  N   LEU A  19     -15.525  -4.704  23.914  1.00  2.00      8HVP 301
+ATOM    148  CA  LEU A  19     -14.294  -4.432  24.669  1.00  2.00      8HVP 302
+ATOM    149  C   LEU A  19     -13.078  -5.133  24.062  1.00  3.47      8HVP 303
+ATOM    150  O   LEU A  19     -13.214  -6.341  23.784  1.00  6.90      8HVP 304
+ATOM    151  CB  LEU A  19     -14.544  -4.830  26.126  1.00  5.54      8HVP 305
+ATOM    152  CG  LEU A  19     -15.446  -3.905  26.934  1.00 12.65      8HVP 306
+ATOM    153  CD1 LEU A  19     -15.808  -4.594  28.261  1.00 11.26      8HVP 307
+ATOM    154  CD2 LEU A  19     -14.638  -2.638  27.230  1.00 11.67      8HVP 308
+ATOM    155  N   LYS A  20     -11.926  -4.503  23.901  1.00  3.26      8HVP 309
+ATOM    156  CA  LYS A  20     -10.754  -5.222  23.378  1.00  4.92      8HVP 310
+ATOM    157  C   LYS A  20      -9.509  -4.758  24.142  1.00  7.51      8HVP 311
+ATOM    158  O   LYS A  20      -9.547  -3.736  24.811  1.00  9.95      8HVP 312
+ATOM    159  CB  LYS A  20     -10.492  -5.064  21.911  1.00  2.00      8HVP 313
+ATOM    160  CG  LYS A  20     -11.596  -4.904  20.908  1.00  2.81      8HVP 314
+ATOM    161  CD  LYS A  20     -12.439  -6.133  20.701  1.00 14.45      8HVP 315
+ATOM    162  CE  LYS A  20     -13.681  -5.897  19.858  1.00 20.29      8HVP 316
+ATOM    163  NZ  LYS A  20     -14.671  -6.960  20.258  1.00 13.89      8HVP 317
+ATOM    164  N   GLU A  21      -8.450  -5.528  24.023  1.00 11.23      8HVP 318
+ATOM    165  CA  GLU A  21      -7.121  -5.258  24.584  1.00 11.12      8HVP 319
+ATOM    166  C   GLU A  21      -6.306  -4.595  23.490  1.00  7.90      8HVP 320
+ATOM    167  O   GLU A  21      -6.198  -5.254  22.433  1.00 14.97      8HVP 321
+ATOM    168  CB  GLU A  21      -6.396  -6.524  25.010  1.00 22.94      8HVP 322
+ATOM    169  CG  GLU A  21      -6.511  -6.993  26.451  1.00 45.49      8HVP 323
+ATOM    170  CD  GLU A  21      -6.037  -6.153  27.597  1.00 59.44      8HVP 324
+ATOM    171  OE1 GLU A  21      -5.411  -5.103  27.298  1.00 66.60      8HVP 325
+ATOM    172  OE2 GLU A  21      -6.236  -6.426  28.782  1.00 64.09      8HVP 326
+ATOM    173  N   ALA A  22      -5.842  -3.384  23.557  1.00  4.69      8HVP 327
+ATOM    174  CA  ALA A  22      -5.083  -2.822  22.433  1.00  2.00      8HVP 328
+ATOM    175  C   ALA A  22      -3.771  -2.331  23.023  1.00  2.00      8HVP 329
+ATOM    176  O   ALA A  22      -3.717  -2.079  24.211  1.00  2.00      8HVP 330
+ATOM    177  CB  ALA A  22      -5.778  -1.670  21.762  1.00  2.00      8HVP 331
+ATOM    178  N   LEU A  23      -2.856  -2.120  22.138  1.00  2.17      8HVP 332
+ATOM    179  CA  LEU A  23      -1.487  -1.667  22.382  1.00  6.32      8HVP 333
+ATOM    180  C   LEU A  23      -1.200  -0.210  22.007  1.00  2.87      8HVP 334
+ATOM    181  O   LEU A  23      -1.368   0.186  20.853  1.00  2.00      8HVP 335
+ATOM    182  CB  LEU A  23      -0.598  -2.622  21.548  1.00  5.69      8HVP 336
+ATOM    183  CG  LEU A  23       0.896  -2.607  21.807  1.00 12.05      8HVP 337
+ATOM    184  CD1 LEU A  23       1.271  -3.607  22.904  1.00  7.79      8HVP 338
+ATOM    185  CD2 LEU A  23       1.656  -2.921  20.509  1.00  2.00      8HVP 339
+ATOM    186  N   LEU A  24      -0.718   0.497  23.015  1.00  3.16      8HVP 340
+ATOM    187  CA  LEU A  24      -0.279   1.898  22.890  1.00  3.72      8HVP 341
+ATOM    188  C   LEU A  24       1.112   1.953  22.222  1.00  3.04      8HVP 342
+ATOM    189  O   LEU A  24       2.176   1.939  22.804  1.00  2.00      8HVP 343
+ATOM    190  CB  LEU A  24      -0.197   2.459  24.308  1.00  2.00      8HVP 344
+ATOM    191  CG  LEU A  24      -1.339   2.182  25.232  1.00  2.07      8HVP 345
+ATOM    192  CD1 LEU A  24      -0.927   2.514  26.671  1.00  4.92      8HVP 346
+ATOM    193  CD2 LEU A  24      -2.489   2.999  24.687  1.00  2.00      8HVP 347
+ATOM    194  N   ASP A  25       1.103   1.961  20.945  1.00  2.09      8HVP 348
+ATOM    195  CA  ASP A  25       2.174   1.976  19.989  1.00  2.00      8HVP 349
+ATOM    196  C   ASP A  25       2.377   3.416  19.493  1.00  3.53      8HVP 350
+ATOM    197  O   ASP A  25       1.640   3.918  18.627  1.00  2.00      8HVP 351
+ATOM    198  CB  ASP A  25       1.836   0.942  18.934  1.00  2.00      8HVP 352
+ATOM    199  CG  ASP A  25       2.875   0.696  17.889  1.00 15.58      8HVP 353
+ATOM    200  OD1 ASP A  25       4.067   1.036  18.055  1.00 20.52      8HVP 354
+ATOM    201  OD2 ASP A  25       2.571   0.117  16.827  1.00 21.05      8HVP 355
+ATOM    202  N   THR A  26       3.380   4.060  20.113  1.00  2.00      8HVP 356
+ATOM    203  CA  THR A  26       3.768   5.394  19.656  1.00  5.15      8HVP 357
+ATOM    204  C   THR A  26       4.638   5.366  18.381  1.00  6.87      8HVP 358
+ATOM    205  O   THR A  26       5.138   6.438  17.990  1.00  7.76      8HVP 359
+ATOM    206  CB  THR A  26       4.647   6.055  20.772  1.00  2.00      8HVP 360
+ATOM    207  OG1 THR A  26       5.776   5.124  20.726  1.00 17.12      8HVP 361
+ATOM    208  CG2 THR A  26       3.999   6.139  22.139  1.00  3.59      8HVP 362
+ATOM    209  N   GLY A  27       4.828   4.213  17.743  1.00  5.41      8HVP 363
+ATOM    210  CA  GLY A  27       5.606   4.072  16.516  1.00  2.00      8HVP 364
+ATOM    211  C   GLY A  27       4.672   3.955  15.314  1.00  2.00      8HVP 365
+ATOM    212  O   GLY A  27       5.098   3.911  14.154  1.00  2.00      8HVP 366
+ATOM    213  N   ALA A  28       3.413   3.936  15.597  1.00  2.00      8HVP 367
+ATOM    214  CA  ALA A  28       2.284   3.786  14.690  1.00  2.00      8HVP 368
+ATOM    215  C   ALA A  28       1.672   5.159  14.463  1.00  2.00      8HVP 369
+ATOM    216  O   ALA A  28       1.406   5.901  15.423  1.00  2.25      8HVP 370
+ATOM    217  CB  ALA A  28       1.255   2.844  15.307  1.00  2.00      8HVP 371
+ATOM    218  N   ASP A  29       1.436   5.455  13.207  1.00  2.00      8HVP 372
+ATOM    219  CA  ASP A  29       0.844   6.762  12.826  1.00  2.00      8HVP 373
+ATOM    220  C   ASP A  29      -0.661   6.737  13.095  1.00  2.61      8HVP 374
+ATOM    221  O   ASP A  29      -1.276   7.740  13.500  1.00  3.70      8HVP 375
+ATOM    222  CB  ASP A  29       1.148   6.901  11.319  1.00  2.00      8HVP 376
+ATOM    223  CG  ASP A  29       2.630   7.176  11.147  1.00  9.16      8HVP 377
+ATOM    224  OD1 ASP A  29       3.604   6.862  11.837  1.00 11.36      8HVP 378
+ATOM    225  OD2 ASP A  29       2.799   7.898  10.186  1.00  2.00      8HVP 379
+ATOM    226  N   ASP A  30      -1.221   5.573  12.797  1.00  2.00      8HVP 380
+ATOM    227  CA  ASP A  30      -2.617   5.259  12.873  1.00  3.29      8HVP 381
+ATOM    228  C   ASP A  30      -2.903   4.122  13.882  1.00  3.40      8HVP 382
+ATOM    229  O   ASP A  30      -1.955   3.505  14.335  1.00  2.00      8HVP 383
+ATOM    230  CB  ASP A  30      -3.141   4.849  11.482  1.00 14.10      8HVP 384
+ATOM    231  CG  ASP A  30      -3.037   5.898  10.398  1.00  5.79      8HVP 385
+ATOM    232  OD1 ASP A  30      -3.903   6.799  10.369  1.00 15.54      8HVP 386
+ATOM    233  OD2 ASP A  30      -2.104   5.769   9.590  1.00  4.06      8HVP 387
+ATOM    234  N   THR A  31      -4.207   3.934  14.032  1.00  2.00      8HVP 388
+ATOM    235  CA  THR A  31      -4.872   2.952  14.847  1.00  2.00      8HVP 389
+ATOM    236  C   THR A  31      -5.409   1.865  13.930  1.00  3.56      8HVP 390
+ATOM    237  O   THR A  31      -6.279   2.174  13.098  1.00  2.00      8HVP 391
+ATOM    238  CB  THR A  31      -6.027   3.608  15.691  1.00  2.00      8HVP 392
+ATOM    239  OG1 THR A  31      -5.350   4.649  16.454  1.00  4.30      8HVP 393
+ATOM    240  CG2 THR A  31      -6.747   2.629  16.643  1.00  4.26      8HVP 394
+ATOM    241  N   VAL A  32      -4.846   0.678  14.061  1.00  3.84      8HVP 395
+ATOM    242  CA  VAL A  32      -5.180  -0.486  13.241  1.00  3.65      8HVP 396
+ATOM    243  C   VAL A  32      -5.806  -1.599  14.089  1.00  5.26      8HVP 397
+ATOM    244  O   VAL A  32      -5.175  -2.213  14.958  1.00  8.26      8HVP 398
+ATOM    245  CB  VAL A  32      -3.978  -1.037  12.451  1.00  2.00      8HVP 399
+ATOM    246  CG1 VAL A  32      -4.397  -2.340  11.729  1.00  2.00      8HVP 400
+ATOM    247  CG2 VAL A  32      -3.290  -0.152  11.453  1.00  2.00      8HVP 401
+ATOM    248  N   LEU A  33      -7.055  -1.927  13.825  1.00  8.12      8HVP 402
+ATOM    249  CA  LEU A  33      -7.689  -3.029  14.570  1.00  9.44      8HVP 403
+ATOM    250  C   LEU A  33      -7.695  -4.318  13.767  1.00 11.79      8HVP 404
+ATOM    251  O   LEU A  33      -7.567  -4.386  12.525  1.00 13.91      8HVP 405
+ATOM    252  CB  LEU A  33      -8.986  -2.441  15.102  1.00 13.84      8HVP 406
+ATOM    253  CG  LEU A  33      -8.958  -1.273  16.070  1.00 10.78      8HVP 407
+ATOM    254  CD1 LEU A  33      -9.958  -1.540  17.202  1.00 14.10      8HVP 408
+ATOM    255  CD2 LEU A  33      -7.596  -1.040  16.703  1.00  8.06      8HVP 409
+ATOM    256  N   GLU A  34      -7.755  -5.399  14.526  1.00 13.04      8HVP 410
+ATOM    257  CA  GLU A  34      -7.818  -6.752  13.935  1.00 20.97      8HVP 411
+ATOM    258  C   GLU A  34      -9.196  -6.789  13.246  1.00 26.23      8HVP 412
+ATOM    259  O   GLU A  34     -10.068  -6.058  13.775  1.00 26.64      8HVP 413
+ATOM    260  CB  GLU A  34      -7.809  -7.851  14.958  1.00 24.11      8HVP 414
+ATOM    261  CG  GLU A  34      -6.716  -8.047  15.995  1.00 33.56      8HVP 415
+ATOM    262  CD  GLU A  34      -7.117  -8.936  17.145  1.00 39.68      8HVP 416
+ATOM    263  OE1 GLU A  34      -8.277  -9.103  17.521  1.00 50.11      8HVP 417
+ATOM    264  OE2 GLU A  34      -6.154  -9.480  17.715  1.00 38.53      8HVP 418
+ATOM    265  N   GLU A  35      -9.368  -7.573  12.190  1.00 29.97      8HVP 419
+ATOM    266  CA  GLU A  35     -10.640  -7.631  11.456  1.00 30.84      8HVP 420
+ATOM    267  C   GLU A  35     -11.838  -7.488  12.388  1.00 33.78      8HVP 421
+ATOM    268  O   GLU A  35     -11.952  -8.112  13.465  1.00 35.01      8HVP 422
+ATOM    269  CB  GLU A  35     -10.787  -8.899  10.639  1.00 37.69      8HVP 423
+ATOM    270  CG  GLU A  35     -12.124  -9.032   9.878  1.00 47.54      8HVP 424
+ATOM    271  CD  GLU A  35     -12.190  -8.046   8.737  1.00 54.43      8HVP 425
+ATOM    272  OE1 GLU A  35     -11.147  -7.728   8.170  1.00 59.79      8HVP 426
+ATOM    273  OE2 GLU A  35     -13.347  -7.647   8.490  1.00 51.62      8HVP 427
+ATOM    274  N   MET A  36     -12.742  -6.600  11.998  1.00 35.17      8HVP 428
+ATOM    275  CA  MET A  36     -13.938  -6.213  12.769  1.00 34.80      8HVP 429
+ATOM    276  C   MET A  36     -14.900  -5.424  11.884  1.00 33.96      8HVP 430
+ATOM    277  O   MET A  36     -14.593  -4.843  10.834  1.00 35.00      8HVP 431
+ATOM    278  CB  MET A  36     -13.557  -5.476  14.052  1.00 39.18      8HVP 432
+ATOM    279  CG  MET A  36     -14.633  -4.659  14.686  1.00 43.69      8HVP 433
+ATOM    280  SD  MET A  36     -14.210  -4.109  16.366  1.00 42.64      8HVP 434
+ATOM    281  CE  MET A  36     -13.318  -2.583  16.030  1.00 43.05      8HVP 435
+ATOM    282  N   ASN A  37     -16.139  -5.505  12.341  1.00 33.16      8HVP 436
+ATOM    283  CA  ASN A  37     -17.299  -4.872  11.708  1.00 30.97      8HVP 437
+ATOM    284  C   ASN A  37     -17.597  -3.625  12.537  1.00 25.69      8HVP 438
+ATOM    285  O   ASN A  37     -17.679  -3.665  13.758  1.00 24.78      8HVP 439
+ATOM    286  CB  ASN A  37     -18.481  -5.810  11.509  1.00 39.66      8HVP 440
+ATOM    287  CG  ASN A  37     -18.341  -6.766  10.331  1.00 47.27      8HVP 441
+ATOM    288  OD1 ASN A  37     -18.080  -7.975  10.496  1.00 45.86      8HVP 442
+ATOM    289  ND2 ASN A  37     -18.535  -6.233   9.113  1.00 51.14      8HVP 443
+ATOM    290  N   LEU A  38     -17.615  -2.544  11.789  1.00 24.97      8HVP 444
+ATOM    291  CA  LEU A  38     -17.905  -1.242  12.405  1.00 24.99      8HVP 445
+ATOM    292  C   LEU A  38     -19.052  -0.710  11.559  1.00 26.54      8HVP 446
+ATOM    293  O   LEU A  38     -19.182  -1.171  10.426  1.00 27.63      8HVP 447
+ATOM    294  CB  LEU A  38     -16.654  -0.412  12.533  1.00  9.19      8HVP 448
+ATOM    295  CG  LEU A  38     -15.633  -0.869  13.564  1.00  7.97      8HVP 449
+ATOM    296  CD1 LEU A  38     -14.307  -0.141  13.325  1.00  3.35      8HVP 450
+ATOM    297  CD2 LEU A  38     -16.116  -0.603  14.984  1.00  2.00      8HVP 451
+ATOM    298  N   PRO A  39     -19.822   0.125  12.205  1.00 28.28      8HVP 452
+ATOM    299  CA  PRO A  39     -20.957   0.755  11.496  1.00 31.06      8HVP 453
+ATOM    300  C   PRO A  39     -20.293   1.736  10.537  1.00 31.57      8HVP 454
+ATOM    301  O   PRO A  39     -19.269   2.307  10.965  1.00 34.27      8HVP 455
+ATOM    302  CB  PRO A  39     -21.788   1.345  12.625  1.00 29.25      8HVP 456
+ATOM    303  CG  PRO A  39     -21.118   1.006  13.920  1.00 28.19      8HVP 457
+ATOM    304  CD  PRO A  39     -19.672   0.683  13.557  1.00 28.38      8HVP 458
+ATOM    305  N   GLY A  40     -20.732   1.877   9.307  1.00 32.44      8HVP 459
+ATOM    306  CA  GLY A  40     -20.110   2.849   8.410  1.00 33.97      8HVP 460
+ATOM    307  C   GLY A  40     -19.616   2.402   7.052  1.00 34.68      8HVP 461
+ATOM    308  O   GLY A  40     -19.398   1.219   6.758  1.00 35.21      8HVP 462
+ATOM    309  N   LYS A  41     -19.469   3.412   6.198  1.00 35.81      8HVP 463
+ATOM    310  CA  LYS A  41     -18.925   3.262   4.838  1.00 36.30      8HVP 464
+ATOM    311  C   LYS A  41     -17.386   3.281   4.987  1.00 33.40      8HVP 465
+ATOM    312  O   LYS A  41     -16.797   4.227   5.542  1.00 35.77      8HVP 466
+ATOM    313  CB  LYS A  41     -19.254   4.390   3.899  1.00 49.20      8HVP 467
+ATOM    314  CG  LYS A  41     -20.408   4.437   2.919  1.00 58.64      8HVP 468
+ATOM    315  CD  LYS A  41     -20.336   5.775   2.162  1.00 64.83      8HVP 469
+ATOM    316  CE  LYS A  41     -21.688   6.194   1.630  1.00 72.01      8HVP 470
+ATOM    317  NZ  LYS A  41     -21.910   7.657   1.792  1.00 73.29      8HVP 471
+ATOM    318  N   TRP A  42     -16.732   2.248   4.522  1.00 29.59      8HVP 472
+ATOM    319  CA  TRP A  42     -15.253   2.248   4.608  1.00 22.94      8HVP 473
+ATOM    320  C   TRP A  42     -14.773   2.518   3.176  1.00 20.11      8HVP 474
+ATOM    321  O   TRP A  42     -15.536   2.544   2.183  1.00 14.17      8HVP 475
+ATOM    322  CB  TRP A  42     -14.842   0.942   5.282  1.00 28.31      8HVP 476
+ATOM    323  CG  TRP A  42     -15.282  -0.273   4.525  1.00 33.50      8HVP 477
+ATOM    324  CD1 TRP A  42     -16.362  -1.083   4.754  1.00 32.54      8HVP 478
+ATOM    325  CD2 TRP A  42     -14.651  -0.797   3.348  1.00 33.46      8HVP 479
+ATOM    326  NE1 TRP A  42     -16.448  -2.076   3.808  1.00 20.81      8HVP 480
+ATOM    327  CE2 TRP A  42     -15.404  -1.915   2.937  1.00 33.44      8HVP 481
+ATOM    328  CE3 TRP A  42     -13.563  -0.401   2.582  1.00 41.85      8HVP 482
+ATOM    329  CZ2 TRP A  42     -15.056  -2.635   1.800  1.00 39.65      8HVP 483
+ATOM    330  CZ3 TRP A  42     -13.247  -1.112   1.437  1.00 42.57      8HVP 484
+ATOM    331  CH2 TRP A  42     -13.974  -2.224   1.039  1.00 34.13      8HVP 485
+ATOM    332  N   LYS A  43     -13.475   2.752   3.099  1.00 15.53      8HVP 486
+ATOM    333  CA  LYS A  43     -12.789   2.960   1.779  1.00 12.02      8HVP 487
+ATOM    334  C   LYS A  43     -11.555   2.127   2.029  1.00  7.46      8HVP 488
+ATOM    335  O   LYS A  43     -11.421   1.942   3.270  1.00  3.03      8HVP 489
+ATOM    336  CB  LYS A  43     -12.666   4.432   1.446  1.00 28.19      8HVP 490
+ATOM    337  CG  LYS A  43     -14.101   4.997   1.265  1.00 40.71      8HVP 491
+ATOM    338  CD  LYS A  43     -14.354   6.419   1.710  1.00 44.03      8HVP 492
+ATOM    339  CE  LYS A  43     -15.819   6.760   1.913  1.00 40.79      8HVP 493
+ATOM    340  NZ  LYS A  43     -16.296   6.538   3.299  1.00 31.16      8HVP 494
+ATOM    341  N   PRO A  44     -10.964   1.530   1.017  1.00  2.00      8HVP 495
+ATOM    342  CA  PRO A  44      -9.809   0.651   1.204  1.00  2.00      8HVP 496
+ATOM    343  C   PRO A  44      -8.531   1.468   1.232  1.00  2.00      8HVP 497
+ATOM    344  O   PRO A  44      -8.449   2.240   0.235  1.00  3.68      8HVP 498
+ATOM    345  CB  PRO A  44      -9.677  -0.160  -0.080  1.00  2.00      8HVP 499
+ATOM    346  CG  PRO A  44     -10.913   0.196  -0.881  1.00  2.00      8HVP 500
+ATOM    347  CD  PRO A  44     -11.263   1.632  -0.429  1.00  5.60      8HVP 501
+ATOM    348  N   LYS A  45      -7.634   1.190   2.136  1.00  2.35      8HVP 502
+ATOM    349  CA  LYS A  45      -6.334   1.863   2.191  1.00  2.00      8HVP 503
+ATOM    350  C   LYS A  45      -5.187   0.879   2.204  1.00  2.00      8HVP 504
+ATOM    351  O   LYS A  45      -5.554  -0.283   2.399  1.00  7.09      8HVP 505
+ATOM    352  CB  LYS A  45      -6.290   2.747   3.456  1.00  2.00      8HVP 506
+ATOM    353  CG  LYS A  45      -7.303   3.866   3.198  1.00 13.15      8HVP 507
+ATOM    354  CD  LYS A  45      -6.860   4.811   2.087  1.00 22.65      8HVP 508
+ATOM    355  CE  LYS A  45      -6.052   5.987   2.595  1.00 35.40      8HVP 509
+ATOM    356  NZ  LYS A  45      -6.802   6.967   3.422  1.00 34.17      8HVP 510
+ATOM    357  N   MET A  46      -3.938   1.263   2.040  1.00  2.93      8HVP 511
+ATOM    358  CA  MET A  46      -2.761   0.436   2.137  1.00  2.00      8HVP 512
+ATOM    359  C   MET A  46      -1.770   1.228   3.060  1.00  4.68      8HVP 513
+ATOM    360  O   MET A  46      -1.271   2.308   2.692  1.00  2.00      8HVP 514
+ATOM    361  CB  MET A  46      -1.897   0.104   0.967  1.00  2.00      8HVP 515
+ATOM    362  CG  MET A  46      -2.491   0.275  -0.400  1.00 15.83      8HVP 516
+ATOM    363  SD  MET A  46      -1.717  -1.122  -1.330  1.00 17.98      8HVP 517
+ATOM    364  CE  MET A  46      -2.553  -2.456  -0.465  1.00 19.37      8HVP 518
+ATOM    365  N   ILE A  47      -1.468   0.467   4.116  1.00  5.17      8HVP 519
+ATOM    366  CA  ILE A  47      -0.489   1.037   5.099  1.00  4.11      8HVP 520
+ATOM    367  C   ILE A  47       0.681   0.049   5.176  1.00  2.00      8HVP 521
+ATOM    368  O   ILE A  47       0.362  -1.114   4.842  1.00  2.00      8HVP 522
+ATOM    369  CB  ILE A  47      -1.228   1.212   6.477  1.00  5.25      8HVP 523
+ATOM    370  CG1 ILE A  47      -1.328  -0.200   7.109  1.00  2.75      8HVP 524
+ATOM    371  CG2 ILE A  47      -2.625   1.852   6.260  1.00  2.00      8HVP 525
+ATOM    372  CD1 ILE A  47      -1.695  -0.199   8.619  1.00  2.00      8HVP 526
+ATOM    373  N   GLY A  48       1.771   0.502   5.750  1.00  2.00      8HVP 527
+ATOM    374  CA  GLY A  48       2.986  -0.256   5.916  1.00  2.00      8HVP 528
+ATOM    375  C   GLY A  48       3.429  -0.381   7.358  1.00  2.00      8HVP 529
+ATOM    376  O   GLY A  48       3.114   0.432   8.206  1.00  2.00      8HVP 530
+ATOM    377  N   GLY A  49       4.183  -1.399   7.611  1.00  2.62      8HVP 531
+ATOM    378  CA  GLY A  49       4.834  -2.034   8.737  1.00  5.23      8HVP 532
+ATOM    379  C   GLY A  49       6.290  -2.440   8.424  1.00  5.87      8HVP 533
+ATOM    380  O   GLY A  49       6.831  -2.026   7.383  1.00  2.00      8HVP 534
+ATOM    381  N   ILE A  50       6.948  -3.231   9.277  1.00  7.18      8HVP 535
+ATOM    382  CA  ILE A  50       8.337  -3.622   9.004  1.00  8.75      8HVP 536
+ATOM    383  C   ILE A  50       8.353  -4.671   7.862  1.00 10.38      8HVP 537
+ATOM    384  O   ILE A  50       9.457  -4.857   7.293  1.00 14.18      8HVP 538
+ATOM    385  CB  ILE A  50       9.222  -4.290  10.104  1.00  3.20      8HVP 539
+ATOM    386  CG1 ILE A  50       8.369  -5.376  10.819  1.00  7.88      8HVP 540
+ATOM    387  CG2 ILE A  50      10.011  -3.433  11.122  1.00  2.00      8HVP 541
+ATOM    388  CD1 ILE A  50       9.330  -6.431  11.442  1.00  7.33      8HVP 542
+ATOM    389  N   GLY A  51       7.241  -5.358   7.718  1.00  5.35      8HVP 543
+ATOM    390  CA  GLY A  51       7.196  -6.459   6.747  1.00  7.47      8HVP 544
+ATOM    391  C   GLY A  51       6.899  -6.022   5.330  1.00  5.78      8HVP 545
+ATOM    392  O   GLY A  51       7.236  -6.807   4.460  1.00  5.82      8HVP 546
+ATOM    393  N   GLY A  52       6.242  -4.902   5.158  1.00  9.54      8HVP 547
+ATOM    394  CA  GLY A  52       5.832  -4.260   3.901  1.00  9.17      8HVP 548
+ATOM    395  C   GLY A  52       4.413  -3.690   4.108  1.00 14.31      8HVP 549
+ATOM    396  O   GLY A  52       3.987  -3.499   5.276  1.00 11.31      8HVP 550
+ATOM    397  N   PHE A  53       3.704  -3.538   2.983  1.00 13.41      8HVP 551
+ATOM    398  CA  PHE A  53       2.331  -3.040   2.907  1.00  7.22      8HVP 552
+ATOM    399  C   PHE A  53       1.207  -4.078   2.933  1.00  5.57      8HVP 553
+ATOM    400  O   PHE A  53       1.265  -5.145   2.325  1.00  2.00      8HVP 554
+ATOM    401  CB  PHE A  53       2.177  -2.160   1.660  1.00 14.11      8HVP 555
+ATOM    402  CG  PHE A  53       2.851  -0.839   1.755  1.00 13.69      8HVP 556
+ATOM    403  CD1 PHE A  53       4.229  -0.754   1.952  1.00 22.88      8HVP 557
+ATOM    404  CD2 PHE A  53       2.103   0.330   1.689  1.00 18.71      8HVP 558
+ATOM    405  CE1 PHE A  53       4.887   0.484   2.043  1.00 14.77      8HVP 559
+ATOM    406  CE2 PHE A  53       2.697   1.573   1.833  1.00 21.08      8HVP 560
+ATOM    407  CZ  PHE A  53       4.082   1.649   1.998  1.00 23.64      8HVP 561
+ATOM    408  N   ILE A  54       0.140  -3.688   3.614  1.00  4.36      8HVP 562
+ATOM    409  CA  ILE A  54      -1.133  -4.337   3.819  1.00  3.22      8HVP 563
+ATOM    410  C   ILE A  54      -2.253  -3.369   3.329  1.00  2.00      8HVP 564
+ATOM    411  O   ILE A  54      -2.125  -2.159   3.486  1.00  2.00      8HVP 565
+ATOM    412  CB  ILE A  54      -1.614  -4.762   5.245  1.00  5.34      8HVP 566
+ATOM    413  CG1 ILE A  54      -1.592  -3.583   6.253  1.00  2.80      8HVP 567
+ATOM    414  CG2 ILE A  54      -1.058  -6.054   5.886  1.00  2.77      8HVP 568
+ATOM    415  CD1 ILE A  54      -2.307  -3.981   7.605  1.00  2.00      8HVP 569
+ATOM    416  N   LYS A  55      -3.275  -3.967   2.753  1.00  2.00      8HVP 570
+ATOM    417  CA  LYS A  55      -4.444  -3.221   2.267  1.00  2.00      8HVP 571
+ATOM    418  C   LYS A  55      -5.389  -3.178   3.475  1.00  2.00      8HVP 572
+ATOM    419  O   LYS A  55      -5.530  -4.334   3.882  1.00  2.00      8HVP 573
+ATOM    420  CB  LYS A  55      -5.247  -3.838   1.121  1.00  2.00      8HVP 574
+ATOM    421  CG  LYS A  55      -6.371  -2.871   0.727  1.00 19.34      8HVP 575
+ATOM    422  CD  LYS A  55      -7.458  -3.479  -0.139  1.00 31.54      8HVP 576
+ATOM    423  CE  LYS A  55      -7.086  -3.526  -1.608  1.00 41.63      8HVP 577
+ATOM    424  NZ  LYS A  55      -6.541  -2.204  -2.039  1.00 43.89      8HVP 578
+ATOM    425  N   VAL A  56      -5.863  -2.045   3.935  1.00  2.00      8HVP 579
+ATOM    426  CA  VAL A  56      -6.766  -1.944   5.091  1.00  3.51      8HVP 580
+ATOM    427  C   VAL A  56      -8.103  -1.307   4.731  1.00  2.00      8HVP 581
+ATOM    428  O   VAL A  56      -8.239  -0.704   3.640  1.00  2.00      8HVP 582
+ATOM    429  CB  VAL A  56      -6.058  -1.152   6.206  1.00 13.97      8HVP 583
+ATOM    430  CG1 VAL A  56      -4.901  -1.905   6.855  1.00 20.53      8HVP 584
+ATOM    431  CG2 VAL A  56      -5.578   0.210   5.724  1.00 20.25      8HVP 585
+ATOM    432  N   ARG A  57      -9.117  -1.478   5.563  1.00  2.00      8HVP 586
+ATOM    433  CA  ARG A  57     -10.425  -0.817   5.243  1.00  2.00      8HVP 587
+ATOM    434  C   ARG A  57     -10.705   0.111   6.440  1.00  2.00      8HVP 588
+ATOM    435  O   ARG A  57     -10.834  -0.364   7.571  1.00  2.00      8HVP 589
+ATOM    436  CB  ARG A  57     -11.596  -1.712   4.948  1.00 10.35      8HVP 590
+ATOM    437  CG  ARG A  57     -12.503  -2.169   6.043  1.00 21.42      8HVP 591
+ATOM    438  CD  ARG A  57     -13.604  -3.058   5.659  1.00 33.04      8HVP 592
+ATOM    439  NE  ARG A  57     -13.279  -4.410   5.254  1.00 46.89      8HVP 593
+ATOM    440  CZ  ARG A  57     -14.189  -5.376   5.028  1.00 58.68      8HVP 594
+ATOM    441  NH1 ARG A  57     -15.506  -5.234   5.228  1.00 63.21      8HVP 595
+ATOM    442  NH2 ARG A  57     -13.791  -6.553   4.522  1.00 60.66      8HVP 596
+ATOM    443  N   GLN A  58     -10.744   1.371   6.167  1.00  2.00      8HVP 597
+ATOM    444  CA  GLN A  58     -10.864   2.541   6.987  1.00  6.11      8HVP 598
+ATOM    445  C   GLN A  58     -12.217   3.163   7.270  1.00  6.18      8HVP 599
+ATOM    446  O   GLN A  58     -13.067   3.354   6.382  1.00  8.69      8HVP 600
+ATOM    447  CB  GLN A  58     -10.075   3.622   6.187  1.00  2.00      8HVP 601
+ATOM    448  CG  GLN A  58     -10.251   5.006   6.805  1.00  2.00      8HVP 602
+ATOM    449  CD  GLN A  58      -9.557   6.086   6.013  1.00  7.49      8HVP 603
+ATOM    450  OE1 GLN A  58      -8.836   5.872   5.041  1.00  2.00      8HVP 604
+ATOM    451  NE2 GLN A  58      -9.815   7.325   6.445  1.00 13.29      8HVP 605
+ATOM    452  N   TYR A  59     -12.355   3.582   8.522  1.00  7.47      8HVP 606
+ATOM    453  CA  TYR A  59     -13.547   4.225   9.066  1.00  4.02      8HVP 607
+ATOM    454  C   TYR A  59     -13.052   5.500   9.761  1.00  6.05      8HVP 608
+ATOM    455  O   TYR A  59     -12.153   5.352  10.594  1.00  7.99      8HVP 609
+ATOM    456  CB  TYR A  59     -14.302   3.395  10.112  1.00  3.56      8HVP 610
+ATOM    457  CG  TYR A  59     -14.644   1.994   9.688  1.00  3.34      8HVP 611
+ATOM    458  CD1 TYR A  59     -15.789   1.814   8.903  1.00  4.20      8HVP 612
+ATOM    459  CD2 TYR A  59     -13.841   0.884   9.984  1.00  8.25      8HVP 613
+ATOM    460  CE1 TYR A  59     -16.103   0.554   8.425  1.00  2.00      8HVP 614
+ATOM    461  CE2 TYR A  59     -14.175  -0.394   9.527  1.00  5.59      8HVP 615
+ATOM    462  CZ  TYR A  59     -15.329  -0.548   8.767  1.00  2.59      8HVP 616
+ATOM    463  OH  TYR A  59     -15.798  -1.764   8.361  1.00  3.97      8HVP 617
+ATOM    464  N   ASP A  60     -13.652   6.572   9.380  1.00  5.89      8HVP 618
+ATOM    465  CA  ASP A  60     -13.507   7.951   9.779  1.00  6.71      8HVP 619
+ATOM    466  C   ASP A  60     -14.482   8.235  10.951  1.00  7.90      8HVP 620
+ATOM    467  O   ASP A  60     -15.503   7.548  11.133  1.00  8.18      8HVP 621
+ATOM    468  CB  ASP A  60     -13.742   8.895   8.611  1.00  9.56      8HVP 622
+ATOM    469  CG  ASP A  60     -13.156   8.619   7.249  1.00 22.85      8HVP 623
+ATOM    470  OD1 ASP A  60     -13.378   7.490   6.734  1.00 32.24      8HVP 624
+ATOM    471  OD2 ASP A  60     -12.560   9.477   6.558  1.00 19.65      8HVP 625
+ATOM    472  N   GLN A  61     -14.164   9.259  11.713  1.00  5.23      8HVP 626
+ATOM    473  CA  GLN A  61     -14.911   9.652  12.914  1.00  8.26      8HVP 627
+ATOM    474  C   GLN A  61     -15.543   8.417  13.566  1.00  7.43      8HVP 628
+ATOM    475  O   GLN A  61     -16.629   8.040  13.126  1.00 11.00      8HVP 629
+ATOM    476  CB  GLN A  61     -15.890  10.784  12.685  1.00 12.02      8HVP 630
+ATOM    477  CG  GLN A  61     -15.524  12.023  11.914  1.00 22.18      8HVP 631
+ATOM    478  CD  GLN A  61     -16.578  13.116  11.902  1.00 31.81      8HVP 632
+ATOM    479  OE1 GLN A  61     -16.590  14.122  11.172  1.00 32.58      8HVP 633
+ATOM    480  NE2 GLN A  61     -17.559  12.972  12.808  1.00 33.12      8HVP 634
+ATOM    481  N   ILE A  62     -14.984   7.777  14.564  1.00  7.95      8HVP 635
+ATOM    482  CA  ILE A  62     -15.452   6.605  15.301  1.00  3.16      8HVP 636
+ATOM    483  C   ILE A  62     -14.889   6.781  16.720  1.00  5.26      8HVP 637
+ATOM    484  O   ILE A  62     -13.654   6.804  16.818  1.00  6.64      8HVP 638
+ATOM    485  CB  ILE A  62     -14.904   5.234  14.757  1.00  2.00      8HVP 639
+ATOM    486  CG1 ILE A  62     -15.353   4.874  13.323  1.00  7.65      8HVP 640
+ATOM    487  CG2 ILE A  62     -15.263   4.060  15.735  1.00  2.00      8HVP 641
+ATOM    488  CD1 ILE A  62     -16.862   4.480  13.149  1.00  2.00      8HVP 642
+ATOM    489  N   PRO A  63     -15.727   6.880  17.728  1.00  7.38      8HVP 643
+ATOM    490  CA  PRO A  63     -15.324   7.094  19.130  1.00  3.35      8HVP 644
+ATOM    491  C   PRO A  63     -14.771   5.819  19.712  1.00  3.57      8HVP 645
+ATOM    492  O   PRO A  63     -15.206   4.705  19.380  1.00  2.00      8HVP 646
+ATOM    493  CB  PRO A  63     -16.544   7.574  19.883  1.00  3.75      8HVP 647
+ATOM    494  CG  PRO A  63     -17.642   7.706  18.862  1.00  7.00      8HVP 648
+ATOM    495  CD  PRO A  63     -17.205   6.901  17.626  1.00  8.19      8HVP 649
+ATOM    496  N   VAL A  64     -13.778   6.036  20.575  1.00  3.62      8HVP 650
+ATOM    497  CA  VAL A  64     -13.112   4.851  21.174  1.00  2.00      8HVP 651
+ATOM    498  C   VAL A  64     -12.778   5.200  22.592  1.00  2.00      8HVP 652
+ATOM    499  O   VAL A  64     -12.370   6.340  22.771  1.00  2.00      8HVP 653
+ATOM    500  CB  VAL A  64     -11.862   4.551  20.328  1.00  4.74      8HVP 654
+ATOM    501  CG1 VAL A  64     -11.044   3.381  20.852  1.00  7.63      8HVP 655
+ATOM    502  CG2 VAL A  64     -12.216   4.410  18.854  1.00  3.77      8HVP 656
+ATOM    503  N   GLU A  65     -12.940   4.276  23.508  1.00  2.00      8HVP 657
+ATOM    504  CA  GLU A  65     -12.593   4.737  24.880  1.00  4.11      8HVP 658
+ATOM    505  C   GLU A  65     -11.357   4.018  25.366  1.00  2.94      8HVP 659
+ATOM    506  O   GLU A  65     -11.406   2.812  25.550  1.00  3.01      8HVP 660
+ATOM    507  CB  GLU A  65     -13.809   4.650  25.764  1.00 10.50      8HVP 661
+ATOM    508  CG  GLU A  65     -13.607   5.129  27.203  1.00 20.47      8HVP 662
+ATOM    509  CD  GLU A  65     -14.912   5.221  27.953  1.00 26.07      8HVP 663
+ATOM    510  OE1 GLU A  65     -15.671   6.055  27.394  1.00 24.43      8HVP 664
+ATOM    511  OE2 GLU A  65     -15.121   4.544  28.944  1.00 26.09      8HVP 665
+ATOM    512  N   ILE A  66     -10.262   4.769  25.350  1.00  4.40      8HVP 666
+ATOM    513  CA  ILE A  66      -8.950   4.279  25.736  1.00  2.43      8HVP 667
+ATOM    514  C   ILE A  66      -8.793   4.502  27.245  1.00  3.70      8HVP 668
+ATOM    515  O   ILE A  66      -8.297   5.504  27.736  1.00  2.15      8HVP 669
+ATOM    516  CB  ILE A  66      -7.662   4.768  25.027  1.00  5.92      8HVP 670
+ATOM    517  CG1 ILE A  66      -7.599   4.255  23.562  1.00 15.82      8HVP 671
+ATOM    518  CG2 ILE A  66      -6.359   4.383  25.791  1.00  2.00      8HVP 672
+ATOM    519  CD1 ILE A  66      -8.540   5.033  22.584  1.00 21.51      8HVP 673
+HETATM  520  N   ABA A  67      -9.180   3.408  27.866  1.00  2.00      8HVP 674
+HETATM  521  CA  ABA A  67      -9.148   3.211  29.308  1.00  2.00      8HVP 675
+HETATM  522  C   ABA A  67      -9.795   4.420  29.938  1.00  2.00      8HVP 676
+HETATM  523  O   ABA A  67      -9.344   5.181  30.820  1.00  2.00      8HVP 677
+HETATM  524  CB  ABA A  67      -7.885   2.703  29.913  1.00  2.00      8HVP 678
+HETATM  525  CG  ABA A  67      -6.489   3.137  29.574  1.00  2.00      8HVP 679
+ATOM    526  N   GLY A  68     -11.010   4.625  29.439  1.00  2.00      8HVP 680
+ATOM    527  CA  GLY A  68     -11.923   5.629  29.949  1.00  2.00      8HVP 681
+ATOM    528  C   GLY A  68     -11.592   7.068  29.742  1.00  2.00      8HVP 682
+ATOM    529  O   GLY A  68     -11.979   7.847  30.699  1.00  2.00      8HVP 683
+ATOM    530  N   HIS A  69     -10.964   7.281  28.598  1.00  2.00      8HVP 684
+ATOM    531  CA  HIS A  69     -10.611   8.538  27.948  1.00  2.00      8HVP 685
+ATOM    532  C   HIS A  69     -11.134   8.384  26.498  1.00  2.67      8HVP 686
+ATOM    533  O   HIS A  69     -10.822   7.471  25.750  1.00  2.00      8HVP 687
+ATOM    534  CB  HIS A  69      -9.162   9.000  27.663  1.00  2.00      8HVP 688
+ATOM    535  CG  HIS A  69      -8.434   9.125  28.962  1.00  4.09      8HVP 689
+ATOM    536  ND1 HIS A  69      -8.028  10.287  29.581  1.00  2.00      8HVP 690
+ATOM    537  CD2 HIS A  69      -8.278   8.108  29.849  1.00  2.00      8HVP 691
+ATOM    538  CE1 HIS A  69      -7.573  10.005  30.777  1.00  2.00      8HVP 692
+ATOM    539  NE2 HIS A  69      -7.627   8.709  30.902  1.00  2.00      8HVP 693
+ATOM    540  N   LYS A  70     -11.904   9.346  26.079  1.00  6.44      8HVP 694
+ATOM    541  CA  LYS A  70     -12.526   9.291  24.767  1.00  8.83      8HVP 695
+ATOM    542  C   LYS A  70     -11.741   9.907  23.623  1.00  7.79      8HVP 696
+ATOM    543  O   LYS A  70     -11.516  11.125  23.688  1.00 10.27      8HVP 697
+ATOM    544  CB  LYS A  70     -13.843  10.049  24.944  1.00 22.25      8HVP 698
+ATOM    545  CG  LYS A  70     -14.883   9.579  25.948  1.00 29.40      8HVP 699
+ATOM    546  CD  LYS A  70     -15.966   8.698  25.354  1.00 37.56      8HVP 700
+ATOM    547  CE  LYS A  70     -16.817   9.292  24.261  1.00 37.79      8HVP 701
+ATOM    548  NZ  LYS A  70     -16.128   9.251  22.942  1.00 41.52      8HVP 702
+ATOM    549  N   ALA A  71     -11.503   9.158  22.563  1.00  3.86      8HVP 703
+ATOM    550  CA  ALA A  71     -10.970   9.668  21.300  1.00  2.78      8HVP 704
+ATOM    551  C   ALA A  71     -12.079   9.535  20.237  1.00  2.59      8HVP 705
+ATOM    552  O   ALA A  71     -13.038   8.782  20.491  1.00  4.78      8HVP 706
+ATOM    553  CB  ALA A  71      -9.727   8.913  20.899  1.00  4.57      8HVP 707
+ATOM    554  N   ILE A  72     -12.030  10.206  19.107  1.00  2.00      8HVP 708
+ATOM    555  CA  ILE A  72     -12.956  10.132  17.989  1.00  2.00      8HVP 709
+ATOM    556  C   ILE A  72     -12.213  10.270  16.652  1.00  2.52      8HVP 710
+ATOM    557  O   ILE A  72     -11.966  11.415  16.204  1.00  5.79      8HVP 711
+ATOM    558  CB  ILE A  72     -14.141  11.152  18.026  1.00  6.76      8HVP 712
+ATOM    559  CG1 ILE A  72     -15.012  10.896  19.279  1.00  3.79      8HVP 713
+ATOM    560  CG2 ILE A  72     -15.063  11.154  16.767  1.00  2.00      8HVP 714
+ATOM    561  CD1 ILE A  72     -15.934  12.133  19.516  1.00  8.03      8HVP 715
+ATOM    562  N   GLY A  73     -11.986   9.137  15.984  1.00  2.00      8HVP 716
+ATOM    563  CA  GLY A  73     -11.254   9.270  14.752  1.00  3.83      8HVP 717
+ATOM    564  C   GLY A  73     -11.100   8.060  13.882  1.00  8.34      8HVP 718
+ATOM    565  O   GLY A  73     -11.824   7.096  14.087  1.00  9.36      8HVP 719
+ATOM    566  N   THR A  74     -10.204   8.214  12.900  1.00 10.69      8HVP 720
+ATOM    567  CA  THR A  74      -9.923   7.185  11.888  1.00 12.72      8HVP 721
+ATOM    568  C   THR A  74      -9.556   5.963  12.729  1.00 11.88      8HVP 722
+ATOM    569  O   THR A  74      -9.029   6.181  13.819  1.00 15.49      8HVP 723
+ATOM    570  CB  THR A  74      -8.837   7.541  10.804  1.00  7.19      8HVP 724
+ATOM    571  OG1 THR A  74      -9.107   8.917  10.330  1.00  2.00      8HVP 725
+ATOM    572  CG2 THR A  74      -8.823   6.625   9.549  1.00  2.00      8HVP 726
+ATOM    573  N   VAL A  75      -9.883   4.817  12.264  1.00 12.35      8HVP 727
+ATOM    574  CA  VAL A  75      -9.637   3.501  12.863  1.00  9.52      8HVP 728
+ATOM    575  C   VAL A  75      -9.761   2.580  11.630  1.00  9.87      8HVP 729
+ATOM    576  O   VAL A  75     -10.825   2.592  10.989  1.00 11.30      8HVP 730
+ATOM    577  CB  VAL A  75     -10.642   3.268  13.978  1.00  2.00      8HVP 731
+ATOM    578  CG1 VAL A  75     -10.584   1.893  14.631  1.00  5.44      8HVP 732
+ATOM    579  CG2 VAL A  75     -10.617   4.277  15.098  1.00  2.00      8HVP 733
+ATOM    580  N   LEU A  76      -8.698   1.941  11.264  1.00  8.47      8HVP 734
+ATOM    581  CA  LEU A  76      -8.485   1.037  10.143  1.00  8.80      8HVP 735
+ATOM    582  C   LEU A  76      -8.671  -0.403  10.608  1.00  6.70      8HVP 736
+ATOM    583  O   LEU A  76      -8.303  -0.703  11.759  1.00  5.67      8HVP 737
+ATOM    584  CB  LEU A  76      -7.063   1.316   9.584  1.00 10.20      8HVP 738
+ATOM    585  CG  LEU A  76      -6.536   2.738   9.571  1.00 12.70      8HVP 739
+ATOM    586  CD1 LEU A  76      -5.017   2.742   9.427  1.00 20.05      8HVP 740
+ATOM    587  CD2 LEU A  76      -7.172   3.539   8.442  1.00 10.50      8HVP 741
+ATOM    588  N   VAL A  77      -9.237  -1.283   9.797  1.00  7.51      8HVP 742
+ATOM    589  CA  VAL A  77      -9.467  -2.667  10.311  1.00  8.48      8HVP 743
+ATOM    590  C   VAL A  77      -8.655  -3.482   9.296  1.00  8.66      8HVP 744
+ATOM    591  O   VAL A  77      -8.707  -3.060   8.148  1.00  3.51      8HVP 745
+ATOM    592  CB  VAL A  77     -10.848  -3.182  10.711  1.00  2.00      8HVP 746
+ATOM    593  CG1 VAL A  77     -11.601  -2.130  11.543  1.00  2.00      8HVP 747
+ATOM    594  CG2 VAL A  77     -11.772  -3.758   9.676  1.00  2.00      8HVP 748
+ATOM    595  N   GLY A  78      -7.917  -4.370   9.966  1.00  9.07      8HVP 749
+ATOM    596  CA  GLY A  78      -7.027  -5.213   9.216  1.00 10.45      8HVP 750
+ATOM    597  C   GLY A  78      -6.278  -6.298   9.987  1.00 13.11      8HVP 751
+ATOM    598  O   GLY A  78      -6.391  -6.475  11.207  1.00  8.30      8HVP 752
+ATOM    599  N   PRO A  79      -5.496  -6.967   9.126  1.00 13.87      8HVP 753
+ATOM    600  CA  PRO A  79      -4.648  -8.082   9.501  1.00 16.71      8HVP 754
+ATOM    601  C   PRO A  79      -3.373  -7.754  10.248  1.00 16.35      8HVP 755
+ATOM    602  O   PRO A  79      -2.271  -8.046   9.729  1.00 21.24      8HVP 756
+ATOM    603  CB  PRO A  79      -4.357  -8.828   8.183  1.00 15.48      8HVP 757
+ATOM    604  CG  PRO A  79      -4.513  -7.802   7.104  1.00 17.22      8HVP 758
+ATOM    605  CD  PRO A  79      -5.426  -6.726   7.664  1.00 15.67      8HVP 759
+ATOM    606  N   THR A  80      -3.508  -7.329  11.491  1.00 13.99      8HVP 760
+ATOM    607  CA  THR A  80      -2.388  -7.007  12.382  1.00  7.93      8HVP 761
+ATOM    608  C   THR A  80      -2.093  -8.073  13.416  1.00  7.15      8HVP 762
+ATOM    609  O   THR A  80      -2.972  -8.837  13.836  1.00  7.15      8HVP 763
+ATOM    610  CB  THR A  80      -2.810  -5.655  13.104  1.00  3.57      8HVP 764
+ATOM    611  OG1 THR A  80      -1.958  -5.424  14.269  1.00  3.18      8HVP 765
+ATOM    612  CG2 THR A  80      -4.299  -5.646  13.443  1.00  3.91      8HVP 766
+ATOM    613  N   PRO A  81      -0.864  -8.073  13.929  1.00  7.89      8HVP 767
+ATOM    614  CA  PRO A  81      -0.489  -8.998  15.016  1.00  5.91      8HVP 768
+ATOM    615  C   PRO A  81      -1.384  -8.738  16.238  1.00  4.90      8HVP 769
+ATOM    616  O   PRO A  81      -1.567  -9.712  17.002  1.00  2.72      8HVP 770
+ATOM    617  CB  PRO A  81       0.999  -8.797  15.310  1.00  2.00      8HVP 771
+ATOM    618  CG  PRO A  81       1.464  -7.910  14.202  1.00  5.30      8HVP 772
+ATOM    619  CD  PRO A  81       0.284  -7.232  13.546  1.00  4.56      8HVP 773
+ATOM    620  N   VAL A  82      -1.821  -7.500  16.368  1.00  2.00      8HVP 774
+ATOM    621  CA  VAL A  82      -2.634  -7.088  17.513  1.00  2.23      8HVP 775
+ATOM    622  C   VAL A  82      -3.237  -5.715  17.207  1.00  4.19      8HVP 776
+ATOM    623  O   VAL A  82      -3.001  -5.045  16.193  1.00  7.31      8HVP 777
+ATOM    624  CB  VAL A  82      -1.794  -7.114  18.814  1.00  7.05      8HVP 778
+ATOM    625  CG1 VAL A  82      -0.584  -6.170  18.866  1.00  2.42      8HVP 779
+ATOM    626  CG2 VAL A  82      -2.637  -6.925  20.078  1.00  2.00      8HVP 780
+ATOM    627  N   ASN A  83      -4.043  -5.282  18.137  1.00  2.00      8HVP 781
+ATOM    628  CA  ASN A  83      -4.725  -4.010  18.139  1.00  2.00      8HVP 782
+ATOM    629  C   ASN A  83      -3.712  -2.932  18.472  1.00  2.00      8HVP 783
+ATOM    630  O   ASN A  83      -3.102  -3.058  19.533  1.00  2.00      8HVP 784
+ATOM    631  CB  ASN A  83      -5.890  -4.091  19.148  1.00  2.77      8HVP 785
+ATOM    632  CG  ASN A  83      -6.999  -5.034  18.701  1.00  3.99      8HVP 786
+ATOM    633  OD1 ASN A  83      -7.292  -5.238  17.505  1.00  2.00      8HVP 787
+ATOM    634  ND2 ASN A  83      -7.656  -5.622  19.696  1.00  3.16      8HVP 788
+ATOM    635  N   ILE A  84      -3.572  -1.899  17.667  1.00  2.00      8HVP 789
+ATOM    636  CA  ILE A  84      -2.627  -0.855  18.109  1.00  2.00      8HVP 790
+ATOM    637  C   ILE A  84      -3.331   0.495  18.104  1.00  2.00      8HVP 791
+ATOM    638  O   ILE A  84      -4.139   0.805  17.202  1.00  3.31      8HVP 792
+ATOM    639  CB  ILE A  84      -1.337  -0.942  17.265  1.00  5.68      8HVP 793
+ATOM    640  CG1 ILE A  84      -1.582  -0.361  15.856  1.00  2.00      8HVP 794
+ATOM    641  CG2 ILE A  84      -0.704  -2.369  17.165  1.00  2.00      8HVP 795
+ATOM    642  CD1 ILE A  84      -0.535  -0.914  14.813  1.00 14.28      8HVP 796
+ATOM    643  N   ILE A  85      -3.119   1.204  19.200  1.00  2.00      8HVP 797
+ATOM    644  CA  ILE A  85      -3.629   2.562  19.300  1.00  2.00      8HVP 798
+ATOM    645  C   ILE A  85      -2.456   3.427  18.810  1.00  5.58      8HVP 799
+ATOM    646  O   ILE A  85      -1.316   3.339  19.342  1.00  6.20      8HVP 800
+ATOM    647  CB  ILE A  85      -4.238   2.871  20.688  1.00  5.40      8HVP 801
+ATOM    648  CG1 ILE A  85      -5.468   2.004  21.077  1.00  5.42      8HVP 802
+ATOM    649  CG2 ILE A  85      -4.563   4.413  20.787  1.00 10.10      8HVP 803
+ATOM    650  CD1 ILE A  85      -6.502   1.744  19.986  1.00  2.00      8HVP 804
+ATOM    651  N   GLY A  86      -2.685   4.160  17.715  1.00  5.21      8HVP 805
+ATOM    652  CA  GLY A  86      -1.546   4.956  17.199  1.00  3.26      8HVP 806
+ATOM    653  C   GLY A  86      -1.677   6.387  17.647  1.00  2.00      8HVP 807
+ATOM    654  O   GLY A  86      -2.687   6.774  18.224  1.00  4.46      8HVP 808
+ATOM    655  N   ARG A  87      -0.753   7.212  17.229  1.00  4.56      8HVP 809
+ATOM    656  CA  ARG A  87      -0.726   8.647  17.515  1.00  4.66      8HVP 810
+ATOM    657  C   ARG A  87      -1.927   9.421  17.003  1.00  5.62      8HVP 811
+ATOM    658  O   ARG A  87      -1.997  10.624  17.295  1.00  6.22      8HVP 812
+ATOM    659  CB  ARG A  87       0.560   9.251  16.954  1.00  3.84      8HVP 813
+ATOM    660  CG  ARG A  87       1.799   8.465  17.393  1.00  5.19      8HVP 814
+ATOM    661  CD  ARG A  87       3.062   9.249  17.176  1.00  5.47      8HVP 815
+ATOM    662  NE  ARG A  87       3.301   9.385  15.743  1.00  2.00      8HVP 816
+ATOM    663  CZ  ARG A  87       2.800  10.279  14.915  1.00  7.29      8HVP 817
+ATOM    664  NH1 ARG A  87       2.082  11.345  15.275  1.00  2.00      8HVP 818
+ATOM    665  NH2 ARG A  87       2.986  10.036  13.612  1.00  2.00      8HVP 819
+ATOM    666  N   ASN A  88      -2.856   8.785  16.310  1.00  7.72      8HVP 820
+ATOM    667  CA  ASN A  88      -4.072   9.472  15.825  1.00  3.58      8HVP 821
+ATOM    668  C   ASN A  88      -5.002   9.692  17.029  1.00  3.98      8HVP 822
+ATOM    669  O   ASN A  88      -5.394  10.821  17.322  1.00  2.18      8HVP 823
+ATOM    670  CB  ASN A  88      -4.685   8.771  14.638  1.00  2.00      8HVP 824
+ATOM    671  CG  ASN A  88      -5.413   7.486  14.820  1.00  2.00      8HVP 825
+ATOM    672  OD1 ASN A  88      -4.900   6.432  15.179  1.00  4.63      8HVP 826
+ATOM    673  ND2 ASN A  88      -6.741   7.466  14.648  1.00  9.19      8HVP 827
+ATOM    674  N   LEU A  89      -5.319   8.597  17.694  1.00  3.27      8HVP 828
+ATOM    675  CA  LEU A  89      -6.191   8.594  18.868  1.00  2.44      8HVP 829
+ATOM    676  C   LEU A  89      -5.324   8.934  20.075  1.00  2.55      8HVP 830
+ATOM    677  O   LEU A  89      -5.871   9.591  20.952  1.00  3.74      8HVP 831
+ATOM    678  CB  LEU A  89      -6.970   7.280  19.019  1.00  2.00      8HVP 832
+ATOM    679  CG  LEU A  89      -7.632   6.620  17.823  1.00  2.00      8HVP 833
+ATOM    680  CD1 LEU A  89      -8.393   5.334  18.111  1.00  2.00      8HVP 834
+ATOM    681  CD2 LEU A  89      -8.555   7.677  17.200  1.00  2.00      8HVP 835
+ATOM    682  N   LEU A  90      -4.090   8.493  20.116  1.00  4.39      8HVP 836
+ATOM    683  CA  LEU A  90      -3.089   8.713  21.163  1.00  3.40      8HVP 837
+ATOM    684  C   LEU A  90      -2.852  10.152  21.590  1.00  3.55      8HVP 838
+ATOM    685  O   LEU A  90      -2.420  10.305  22.734  1.00  7.46      8HVP 839
+ATOM    686  CB  LEU A  90      -1.762   8.046  20.789  1.00  2.00      8HVP 840
+ATOM    687  CG  LEU A  90      -1.440   6.613  21.147  1.00  2.96      8HVP 841
+ATOM    688  CD1 LEU A  90       0.014   6.252  20.880  1.00  2.00      8HVP 842
+ATOM    689  CD2 LEU A  90      -1.605   6.435  22.661  1.00  5.53      8HVP 843
+ATOM    690  N   THR A  91      -3.069  11.165  20.783  1.00  2.00      8HVP 844
+ATOM    691  CA  THR A  91      -2.847  12.561  21.080  1.00  2.00      8HVP 845
+ATOM    692  C   THR A  91      -4.221  13.159  21.410  1.00  2.00      8HVP 846
+ATOM    693  O   THR A  91      -4.409  14.271  21.937  1.00  3.25      8HVP 847
+ATOM    694  CB  THR A  91      -2.236  13.400  19.903  1.00  4.96      8HVP 848
+ATOM    695  OG1 THR A  91      -3.365  13.663  18.960  1.00 17.62      8HVP 849
+ATOM    696  CG2 THR A  91      -1.080  12.973  19.042  1.00  2.00      8HVP 850
+ATOM    697  N   GLN A  92      -5.294  12.504  21.000  1.00  2.02      8HVP 851
+ATOM    698  CA  GLN A  92      -6.596  13.066  21.374  1.00  3.23      8HVP 852
+ATOM    699  C   GLN A  92      -6.603  12.983  22.917  1.00  5.73      8HVP 853
+ATOM    700  O   GLN A  92      -6.911  13.969  23.600  1.00  5.39      8HVP 854
+ATOM    701  CB  GLN A  92      -7.860  12.421  20.850  1.00  7.97      8HVP 855
+ATOM    702  CG  GLN A  92      -7.866  11.997  19.398  1.00  4.65      8HVP 856
+ATOM    703  CD  GLN A  92      -9.225  12.013  18.760  1.00  5.71      8HVP 857
+ATOM    704  OE1 GLN A  92     -10.220  11.988  19.488  1.00 16.81      8HVP 858
+ATOM    705  NE2 GLN A  92      -9.287  12.137  17.436  1.00  2.61      8HVP 859
+ATOM    706  N   ILE A  93      -6.271  11.804  23.441  1.00  5.24      8HVP 860
+ATOM    707  CA  ILE A  93      -6.314  11.492  24.869  1.00  3.10      8HVP 861
+ATOM    708  C   ILE A  93      -5.177  12.056  25.735  1.00  4.44      8HVP 862
+ATOM    709  O   ILE A  93      -5.169  11.736  26.956  1.00  3.21      8HVP 863
+ATOM    710  CB  ILE A  93      -6.503   9.960  25.137  1.00  2.00      8HVP 864
+ATOM    711  CG1 ILE A  93      -5.111   9.269  25.304  1.00  2.32      8HVP 865
+ATOM    712  CG2 ILE A  93      -7.287   9.152  24.091  1.00  2.00      8HVP 866
+ATOM    713  CD1 ILE A  93      -5.310   7.869  25.962  1.00  2.00      8HVP 867
+ATOM    714  N   GLY A  94      -4.273  12.804  25.180  1.00  3.49      8HVP 868
+ATOM    715  CA  GLY A  94      -3.074  13.462  25.609  1.00  3.23      8HVP 869
+ATOM    716  C   GLY A  94      -1.929  12.631  26.127  1.00  5.18      8HVP 870
+ATOM    717  O   GLY A  94      -1.345  13.046  27.151  1.00  6.47      8HVP 871
+HETATM  718  N   ABA A  95      -1.543  11.513  25.567  1.00  6.81      8HVP 872
+HETATM  719  CA  ABA A  95      -0.435  10.616  26.015  1.00  7.01      8HVP 873
+HETATM  720  C   ABA A  95       0.920  11.278  25.762  1.00  7.48      8HVP 874
+HETATM  721  O   ABA A  95       1.150  11.979  24.750  1.00  8.83      8HVP 875
+HETATM  722  CB  ABA A  95      -0.535   9.266  25.311  1.00  4.08      8HVP 876
+HETATM  723  CG  ABA A  95       0.190   7.994  25.563  1.00  2.00      8HVP 877
+ATOM    724  N   THR A  96       1.891  11.127  26.657  1.00  8.14      8HVP 878
+ATOM    725  CA  THR A  96       3.209  11.673  26.385  1.00  6.16      8HVP 879
+ATOM    726  C   THR A  96       4.297  10.662  26.760  1.00  8.82      8HVP 880
+ATOM    727  O   THR A  96       4.046   9.894  27.710  1.00 10.15      8HVP 881
+ATOM    728  CB  THR A  96       3.586  12.991  27.148  1.00  2.00      8HVP 882
+ATOM    729  OG1 THR A  96       3.868  12.461  28.475  1.00  2.00      8HVP 883
+ATOM    730  CG2 THR A  96       2.600  14.110  27.001  1.00  2.00      8HVP 884
+ATOM    731  N   LEU A  97       5.429  10.771  26.063  1.00  4.92      8HVP 885
+ATOM    732  CA  LEU A  97       6.537   9.909  26.444  1.00  3.00      8HVP 886
+ATOM    733  C   LEU A  97       7.472  10.829  27.262  1.00  4.93      8HVP 887
+ATOM    734  O   LEU A  97       8.019  11.760  26.678  1.00  2.00      8HVP 888
+ATOM    735  CB  LEU A  97       7.413   9.402  25.337  1.00  5.81      8HVP 889
+ATOM    736  CG  LEU A  97       6.810   8.527  24.265  1.00 17.78      8HVP 890
+ATOM    737  CD1 LEU A  97       7.531   8.868  22.950  1.00 15.15      8HVP 891
+ATOM    738  CD2 LEU A  97       6.989   7.069  24.694  1.00  2.00      8HVP 892
+ATOM    739  N   ASN A  98       7.600  10.406  28.472  1.00  2.00      8HVP 893
+ATOM    740  CA  ASN A  98       8.451  10.928  29.503  1.00  2.00      8HVP 894
+ATOM    741  C   ASN A  98       9.592   9.944  29.781  1.00  2.00      8HVP 895
+ATOM    742  O   ASN A  98       9.567   8.718  29.800  1.00  2.00      8HVP 896
+ATOM    743  CB  ASN A  98       7.683  11.180  30.790  1.00  2.00      8HVP 897
+ATOM    744  CG  ASN A  98       6.342  11.853  30.762  1.00  2.00      8HVP 898
+ATOM    745  OD1 ASN A  98       6.256  13.075  30.810  1.00  2.00      8HVP 899
+ATOM    746  ND2 ASN A  98       5.257  11.097  30.774  1.00  2.00      8HVP 900
+ATOM    747  N   PHE A  99      10.683  10.600  30.112  1.00  2.30      8HVP 901
+ATOM    748  CA  PHE A  99      12.018  10.064  30.493  1.00  2.00      8HVP 902
+ATOM    749  C   PHE A  99      13.022  11.183  30.820  1.00  2.00      8HVP 903
+ATOM    750  O   PHE A  99      14.206  10.802  30.941  1.00  2.00      8HVP 904
+ATOM    751  CB  PHE A  99      12.574   9.271  29.336  1.00  5.09      8HVP 905
+ATOM    752  CG  PHE A  99      13.135   9.982  28.143  1.00  3.95      8HVP 906
+ATOM    753  CD1 PHE A  99      14.475  10.327  28.094  1.00 13.06      8HVP 907
+ATOM    754  CD2 PHE A  99      12.355  10.142  26.995  1.00 13.99      8HVP 908
+ATOM    755  CE1 PHE A  99      15.075  10.890  26.971  1.00 15.26      8HVP 909
+ATOM    756  CE2 PHE A  99      12.936  10.661  25.858  1.00 16.96      8HVP 910
+ATOM    757  CZ  PHE A  99      14.267  11.058  25.842  1.00 18.46      8HVP 911
+ATOM    758  OXT PHE A  99      12.728  12.395  30.861  1.00  8.17      8HVP 912
+TER     759      PHE A  99                                              8HVP 913
+ATOM    760  N   PRO B   1      13.362  14.768  29.011  1.00 22.23      8HVP 914
+ATOM    761  CA  PRO B   1      12.150  15.030  28.252  1.00 19.96      8HVP 915
+ATOM    762  C   PRO B   1      10.839  14.677  28.948  1.00 20.06      8HVP 916
+ATOM    763  O   PRO B   1      10.645  14.321  30.108  1.00 17.36      8HVP 917
+ATOM    764  CB  PRO B   1      12.333  14.144  27.001  1.00 21.73      8HVP 918
+ATOM    765  CG  PRO B   1      13.829  14.070  26.828  1.00 20.36      8HVP 919
+ATOM    766  CD  PRO B   1      14.266  13.841  28.287  1.00 22.67      8HVP 920
+ATOM    767  N   GLN B   2       9.851  14.769  28.117  1.00 21.74      8HVP 921
+ATOM    768  CA  GLN B   2       8.434  14.690  27.889  1.00 20.96      8HVP 922
+ATOM    769  C   GLN B   2       8.208  14.858  26.363  1.00 19.81      8HVP 923
+ATOM    770  O   GLN B   2       8.087  16.028  25.925  1.00 20.13      8HVP 924
+ATOM    771  CB  GLN B   2       7.647  15.813  28.601  1.00 11.77      8HVP 925
+ATOM    772  CG  GLN B   2       6.163  15.754  28.480  1.00 21.59      8HVP 926
+ATOM    773  CD  GLN B   2       5.211  16.418  29.425  1.00 29.24      8HVP 927
+ATOM    774  OE1 GLN B   2       4.849  15.901  30.495  1.00 30.58      8HVP 928
+ATOM    775  NE2 GLN B   2       4.626  17.557  29.014  1.00 33.48      8HVP 929
+ATOM    776  N   ILE B   3       8.245  13.802  25.569  1.00 18.52      8HVP 930
+ATOM    777  CA  ILE B   3       8.062  13.949  24.104  1.00 15.36      8HVP 931
+ATOM    778  C   ILE B   3       6.545  13.969  23.859  1.00 15.34      8HVP 932
+ATOM    779  O   ILE B   3       5.743  13.463  24.671  1.00 14.19      8HVP 933
+ATOM    780  CB  ILE B   3       8.794  12.957  23.149  1.00  4.31      8HVP 934
+ATOM    781  CG1 ILE B   3      10.325  13.000  23.437  1.00  2.00      8HVP 935
+ATOM    782  CG2 ILE B   3       8.554  13.117  21.635  1.00  7.74      8HVP 936
+ATOM    783  CD1 ILE B   3      10.587  12.109  24.687  1.00  2.00      8HVP 937
+ATOM    784  N   THR B   4       6.286  14.583  22.716  1.00 13.82      8HVP 938
+ATOM    785  CA  THR B   4       4.895  14.834  22.268  1.00 11.46      8HVP 939
+ATOM    786  C   THR B   4       4.650  13.952  21.069  1.00  7.87      8HVP 940
+ATOM    787  O   THR B   4       5.597  13.673  20.332  1.00  6.67      8HVP 941
+ATOM    788  CB  THR B   4       4.683  16.389  22.049  1.00 14.61      8HVP 942
+ATOM    789  OG1 THR B   4       3.283  16.733  22.322  1.00  2.27      8HVP 943
+ATOM    790  CG2 THR B   4       5.274  16.924  20.736  1.00  9.91      8HVP 944
+ATOM    791  N   LEU B   5       3.394  13.552  20.937  1.00  7.96      8HVP 945
+ATOM    792  CA  LEU B   5       3.055  12.678  19.798  1.00  8.46      8HVP 946
+ATOM    793  C   LEU B   5       2.487  13.354  18.578  1.00  6.62      8HVP 947
+ATOM    794  O   LEU B   5       2.051  12.623  17.675  1.00  5.80      8HVP 948
+ATOM    795  CB  LEU B   5       2.218  11.576  20.479  1.00 14.06      8HVP 949
+ATOM    796  CG  LEU B   5       2.985  10.781  21.553  1.00 13.25      8HVP 950
+ATOM    797  CD1 LEU B   5       2.022   9.789  22.194  1.00  2.00      8HVP 951
+ATOM    798  CD2 LEU B   5       4.211  10.126  20.907  1.00  2.00      8HVP 952
+ATOM    799  N   TRP B   6       2.529  14.670  18.435  1.00  8.33      8HVP 953
+ATOM    800  CA  TRP B   6       1.928  15.413  17.312  1.00  6.13      8HVP 954
+ATOM    801  C   TRP B   6       2.582  14.965  15.991  1.00  7.83      8HVP 955
+ATOM    802  O   TRP B   6       2.001  14.765  14.926  1.00  5.80      8HVP 956
+ATOM    803  CB  TRP B   6       1.974  16.899  17.526  1.00  2.70      8HVP 957
+ATOM    804  CG  TRP B   6       1.085  17.568  18.496  1.00  8.01      8HVP 958
+ATOM    805  CD1 TRP B   6       1.502  18.226  19.617  1.00  4.80      8HVP 959
+ATOM    806  CD2 TRP B   6      -0.353  17.645  18.504  1.00 12.18      8HVP 960
+ATOM    807  NE1 TRP B   6       0.424  18.693  20.322  1.00 13.10      8HVP 961
+ATOM    808  CE2 TRP B   6      -0.728  18.360  19.666  1.00 12.26      8HVP 962
+ATOM    809  CE3 TRP B   6      -1.349  17.193  17.640  1.00  9.93      8HVP 963
+ATOM    810  CZ2 TRP B   6      -2.052  18.623  20.007  1.00  5.70      8HVP 964
+ATOM    811  CZ3 TRP B   6      -2.669  17.461  17.968  1.00 11.20      8HVP 965
+ATOM    812  CH2 TRP B   6      -3.007  18.182  19.115  1.00  4.84      8HVP 966
+ATOM    813  N   GLN B   7       3.860  14.795  16.175  1.00  9.46      8HVP 967
+ATOM    814  CA  GLN B   7       4.910  14.331  15.306  1.00  6.51      8HVP 968
+ATOM    815  C   GLN B   7       5.473  13.072  15.957  1.00  6.44      8HVP 969
+ATOM    816  O   GLN B   7       5.491  12.888  17.193  1.00 14.42      8HVP 970
+ATOM    817  CB  GLN B   7       6.018  15.376  15.205  1.00 10.65      8HVP 971
+ATOM    818  CG  GLN B   7       5.548  16.697  14.609  1.00 11.70      8HVP 972
+ATOM    819  CD  GLN B   7       4.947  17.582  15.683  1.00 18.36      8HVP 973
+ATOM    820  OE1 GLN B   7       5.412  17.694  16.813  1.00 18.82      8HVP 974
+ATOM    821  NE2 GLN B   7       3.842  18.206  15.310  1.00 16.66      8HVP 975
+ATOM    822  N   ARG B   8       5.926  12.182  15.138  1.00  2.00      8HVP 976
+ATOM    823  CA  ARG B   8       6.531  10.933  15.569  1.00  2.47      8HVP 977
+ATOM    824  C   ARG B   8       7.555  11.268  16.617  1.00  2.00      8HVP 978
+ATOM    825  O   ARG B   8       8.108  12.376  16.572  1.00  4.59      8HVP 979
+ATOM    826  CB  ARG B   8       7.357  10.413  14.377  1.00  2.00      8HVP 980
+ATOM    827  CG  ARG B   8       6.402  10.086  13.256  1.00  4.99      8HVP 981
+ATOM    828  CD  ARG B   8       7.280   9.765  12.065  1.00 11.69      8HVP 982
+ATOM    829  NE  ARG B   8       6.366   9.124  11.145  1.00 18.34      8HVP 983
+ATOM    830  CZ  ARG B   8       6.534   8.935   9.847  1.00 20.17      8HVP 984
+ATOM    831  NH1 ARG B   8       7.634   9.201   9.166  1.00 29.34      8HVP 985
+ATOM    832  NH2 ARG B   8       5.440   8.519   9.212  1.00 18.36      8HVP 986
+ATOM    833  N   PRO B   9       7.902  10.289  17.418  1.00  4.14      8HVP 987
+ATOM    834  CA  PRO B   9       8.905  10.504  18.481  1.00  2.00      8HVP 988
+ATOM    835  C   PRO B   9      10.327  10.229  18.002  1.00  3.36      8HVP 989
+ATOM    836  O   PRO B   9      11.066   9.312  18.449  1.00  4.57      8HVP 990
+ATOM    837  CB  PRO B   9       8.345   9.633  19.605  1.00  2.00      8HVP 991
+ATOM    838  CG  PRO B   9       7.101   8.985  19.042  1.00  2.00      8HVP 992
+ATOM    839  CD  PRO B   9       7.321   8.943  17.528  1.00  2.00      8HVP 993
+ATOM    840  N   LEU B  10      10.819  11.046  17.082  1.00  2.00      8HVP 994
+ATOM    841  CA  LEU B  10      12.209  10.938  16.611  1.00  2.00      8HVP 995
+ATOM    842  C   LEU B  10      13.190  11.593  17.589  1.00  2.00      8HVP 996
+ATOM    843  O   LEU B  10      13.077  12.784  17.952  1.00  2.00      8HVP 997
+ATOM    844  CB  LEU B  10      12.381  11.504  15.230  1.00  2.00      8HVP 998
+ATOM    845  CG  LEU B  10      12.146  10.576  14.064  1.00  2.00      8HVP 999
+ATOM    846  CD1 LEU B  10      12.269  11.376  12.775  1.00  8.81      8HVP1000
+ATOM    847  CD2 LEU B  10      13.183   9.464  14.152  1.00 12.85      8HVP1001
+ATOM    848  N   VAL B  11      14.234  10.887  17.891  1.00  2.00      8HVP1002
+ATOM    849  CA  VAL B  11      15.287  11.363  18.798  1.00  6.05      8HVP1003
+ATOM    850  C   VAL B  11      16.641  11.041  18.188  1.00  4.35      8HVP1004
+ATOM    851  O   VAL B  11      16.652  10.633  17.020  1.00 10.00      8HVP1005
+ATOM    852  CB  VAL B  11      15.046  10.704  20.164  1.00  2.00      8HVP1006
+ATOM    853  CG1 VAL B  11      13.763  11.201  20.825  1.00  2.00      8HVP1007
+ATOM    854  CG2 VAL B  11      15.103   9.194  20.040  1.00  2.00      8HVP1008
+ATOM    855  N   THR B  12      17.661  11.184  18.991  1.00  3.92      8HVP1009
+ATOM    856  CA  THR B  12      19.029  10.968  18.446  1.00  6.27      8HVP1010
+ATOM    857  C   THR B  12      19.788  10.030  19.346  1.00  7.20      8HVP1011
+ATOM    858  O   THR B  12      19.728  10.174  20.565  1.00  2.07      8HVP1012
+ATOM    859  CB  THR B  12      19.610  12.432  18.259  1.00  2.00      8HVP1013
+ATOM    860  OG1 THR B  12      20.461  12.429  17.101  1.00  5.09      8HVP1014
+ATOM    861  CG2 THR B  12      20.265  12.929  19.561  1.00  2.00      8HVP1015
+ATOM    862  N   ILE B  13      20.471   9.059  18.789  1.00 11.31      8HVP1016
+ATOM    863  CA  ILE B  13      21.235   8.044  19.558  1.00 16.39      8HVP1017
+ATOM    864  C   ILE B  13      22.720   8.005  19.256  1.00 17.43      8HVP1018
+ATOM    865  O   ILE B  13      23.115   8.508  18.157  1.00 16.05      8HVP1019
+ATOM    866  CB  ILE B  13      20.491   6.678  19.202  1.00 17.54      8HVP1020
+ATOM    867  CG1 ILE B  13      20.494   6.535  17.660  1.00 18.07      8HVP1021
+ATOM    868  CG2 ILE B  13      19.058   6.601  19.789  1.00 11.02      8HVP1022
+ATOM    869  CD1 ILE B  13      20.418   5.094  17.098  1.00 19.66      8HVP1023
+ATOM    870  N   ARG B  14      23.576   7.463  20.130  1.00 19.20      8HVP1024
+ATOM    871  CA  ARG B  14      25.006   7.365  19.723  1.00 25.30      8HVP1025
+ATOM    872  C   ARG B  14      25.243   5.828  19.659  1.00 26.26      8HVP1026
+ATOM    873  O   ARG B  14      24.886   5.226  20.683  1.00 28.15      8HVP1027
+ATOM    874  CB  ARG B  14      26.194   7.762  20.539  1.00 34.20      8HVP1028
+ATOM    875  CG  ARG B  14      26.381   8.789  21.594  1.00 46.14      8HVP1029
+ATOM    876  CD  ARG B  14      27.682   8.861  22.313  1.00 43.75      8HVP1030
+ATOM    877  NE  ARG B  14      28.833   9.135  21.480  1.00 43.33      8HVP1031
+ATOM    878  CZ  ARG B  14      29.471   8.225  20.734  1.00 45.45      8HVP1032
+ATOM    879  NH1 ARG B  14      29.249   6.920  20.723  1.00 38.71      8HVP1033
+ATOM    880  NH2 ARG B  14      30.324   8.673  19.806  1.00 51.01      8HVP1034
+ATOM    881  N   ILE B  15      25.820   5.334  18.598  1.00 28.90      8HVP1035
+ATOM    882  CA  ILE B  15      26.161   3.902  18.521  1.00 31.28      8HVP1036
+ATOM    883  C   ILE B  15      27.548   3.841  17.877  1.00 31.86      8HVP1037
+ATOM    884  O   ILE B  15      27.550   4.139  16.675  1.00 34.27      8HVP1038
+ATOM    885  CB  ILE B  15      25.242   2.986  17.647  1.00 27.40      8HVP1039
+ATOM    886  CG1 ILE B  15      23.801   3.550  17.659  1.00 30.83      8HVP1040
+ATOM    887  CG2 ILE B  15      25.353   1.501  18.118  1.00 23.72      8HVP1041
+ATOM    888  CD1 ILE B  15      23.181   3.677  16.232  1.00 31.58      8HVP1042
+ATOM    889  N   GLY B  16      28.579   3.493  18.599  1.00 33.66      8HVP1043
+ATOM    890  CA  GLY B  16      29.920   3.439  17.998  1.00 36.17      8HVP1044
+ATOM    891  C   GLY B  16      30.272   4.691  17.191  1.00 38.23      8HVP1045
+ATOM    892  O   GLY B  16      30.783   4.588  16.042  1.00 38.72      8HVP1046
+ATOM    893  N   GLY B  17      30.023   5.875  17.760  1.00 37.56      8HVP1047
+ATOM    894  CA  GLY B  17      30.371   7.125  17.058  1.00 36.56      8HVP1048
+ATOM    895  C   GLY B  17      29.469   7.488  15.882  1.00 34.87      8HVP1049
+ATOM    896  O   GLY B  17      29.718   8.398  15.074  1.00 32.49      8HVP1050
+ATOM    897  N   GLN B  18      28.403   6.723  15.803  1.00 31.88      8HVP1051
+ATOM    898  CA  GLN B  18      27.332   7.006  14.853  1.00 32.64      8HVP1052
+ATOM    899  C   GLN B  18      26.368   7.826  15.739  1.00 31.59      8HVP1053
+ATOM    900  O   GLN B  18      26.015   7.341  16.831  1.00 29.27      8HVP1054
+ATOM    901  CB  GLN B  18      26.622   5.781  14.308  1.00 47.57      8HVP1055
+ATOM    902  CG  GLN B  18      27.596   4.851  13.567  1.00 57.60      8HVP1056
+ATOM    903  CD  GLN B  18      27.932   5.467  12.218  1.00 61.19      8HVP1057
+ATOM    904  OE1 GLN B  18      28.763   6.373  12.162  1.00 64.28      8HVP1058
+ATOM    905  NE2 GLN B  18      27.207   4.959  11.223  1.00 59.60      8HVP1059
+ATOM    906  N   LEU B  19      26.148   9.042  15.302  1.00 29.82      8HVP1060
+ATOM    907  CA  LEU B  19      25.149   9.917  15.979  1.00 29.89      8HVP1061
+ATOM    908  C   LEU B  19      23.936   9.593  15.112  1.00 27.07      8HVP1062
+ATOM    909  O   LEU B  19      24.220   9.604  13.892  1.00 30.34      8HVP1063
+ATOM    910  CB  LEU B  19      25.656  11.339  16.015  1.00 29.62      8HVP1064
+ATOM    911  CG  LEU B  19      26.168  11.974  17.294  1.00 32.57      8HVP1065
+ATOM    912  CD1 LEU B  19      25.019  12.444  18.202  1.00 26.91      8HVP1066
+ATOM    913  CD2 LEU B  19      27.095  11.019  18.059  1.00 31.34      8HVP1067
+ATOM    914  N   LYS B  20      22.804   9.170  15.621  1.00 26.45      8HVP1068
+ATOM    915  CA  LYS B  20      21.700   8.810  14.674  1.00 22.28      8HVP1069
+ATOM    916  C   LYS B  20      20.315   9.136  15.205  1.00 20.11      8HVP1070
+ATOM    917  O   LYS B  20      20.097   9.503  16.388  1.00 16.11      8HVP1071
+ATOM    918  CB  LYS B  20      21.865   7.380  14.163  1.00 19.56      8HVP1072
+ATOM    919  CG  LYS B  20      21.341   7.177  12.725  1.00 27.74      8HVP1073
+ATOM    920  CD  LYS B  20      22.260   6.479  11.751  1.00 26.62      8HVP1074
+ATOM    921  CE  LYS B  20      23.498   5.759  12.178  1.00 18.42      8HVP1075
+ATOM    922  NZ  LYS B  20      23.364   4.515  12.946  1.00 16.64      8HVP1076
+ATOM    923  N   GLU B  21      19.343   9.036  14.287  1.00 17.43      8HVP1077
+ATOM    924  CA  GLU B  21      17.937   9.319  14.635  1.00 13.51      8HVP1078
+ATOM    925  C   GLU B  21      17.078   8.089  14.826  1.00  8.57      8HVP1079
+ATOM    926  O   GLU B  21      17.075   7.126  14.079  1.00  3.48      8HVP1080
+ATOM    927  CB  GLU B  21      17.226  10.239  13.644  1.00 26.89      8HVP1081
+ATOM    928  CG  GLU B  21      17.343  11.745  13.974  1.00 33.60      8HVP1082
+ATOM    929  CD  GLU B  21      16.331  12.542  13.170  1.00 40.53      8HVP1083
+ATOM    930  OE1 GLU B  21      15.879  11.992  12.167  1.00 35.62      8HVP1084
+ATOM    931  OE2 GLU B  21      16.073  13.654  13.684  1.00 34.83      8HVP1085
+ATOM    932  N   ALA B  22      16.282   8.155  15.875  1.00  8.69      8HVP1086
+ATOM    933  CA  ALA B  22      15.408   7.042  16.232  1.00  8.62      8HVP1087
+ATOM    934  C   ALA B  22      14.029   7.471  16.690  1.00  7.93      8HVP1088
+ATOM    935  O   ALA B  22      13.780   8.599  17.108  1.00  4.79      8HVP1089
+ATOM    936  CB  ALA B  22      16.186   6.273  17.291  1.00  2.50      8HVP1090
+ATOM    937  N   LEU B  23      13.131   6.527  16.570  1.00  8.97      8HVP1091
+ATOM    938  CA  LEU B  23      11.715   6.576  16.855  1.00  6.82      8HVP1092
+ATOM    939  C   LEU B  23      11.585   5.806  18.182  1.00 10.95      8HVP1093
+ATOM    940  O   LEU B  23      11.945   4.613  18.112  1.00 10.10      8HVP1094
+ATOM    941  CB  LEU B  23      11.013   5.782  15.759  1.00  2.00      8HVP1095
+ATOM    942  CG  LEU B  23       9.562   5.834  15.401  1.00  2.00      8HVP1096
+ATOM    943  CD1 LEU B  23       9.296   7.140  14.617  1.00  2.00      8HVP1097
+ATOM    944  CD2 LEU B  23       9.141   4.802  14.364  1.00  2.16      8HVP1098
+ATOM    945  N   LEU B  24      11.191   6.493  19.227  1.00 10.43      8HVP1099
+ATOM    946  CA  LEU B  24      10.892   5.811  20.501  1.00 12.28      8HVP1100
+ATOM    947  C   LEU B  24       9.608   5.010  20.194  1.00  9.36      8HVP1101
+ATOM    948  O   LEU B  24       8.578   5.662  19.993  1.00 12.60      8HVP1102
+ATOM    949  CB  LEU B  24      10.632   6.813  21.639  1.00  7.17      8HVP1103
+ATOM    950  CG  LEU B  24      11.653   7.929  21.812  1.00  5.54      8HVP1104
+ATOM    951  CD1 LEU B  24      11.357   8.652  23.109  1.00  9.55      8HVP1105
+ATOM    952  CD2 LEU B  24      13.056   7.317  21.731  1.00  2.00      8HVP1106
+ATOM    953  N   ASP B  25       9.701   3.708  20.205  1.00  8.50      8HVP1107
+ATOM    954  CA  ASP B  25       8.569   2.830  19.890  1.00  5.50      8HVP1108
+ATOM    955  C   ASP B  25       7.962   2.057  21.057  1.00  2.68      8HVP1109
+ATOM    956  O   ASP B  25       8.249   0.858  21.174  1.00  2.00      8HVP1110
+ATOM    957  CB  ASP B  25       9.094   1.887  18.758  1.00  2.00      8HVP1111
+ATOM    958  CG  ASP B  25       7.855   1.344  18.064  1.00 12.58      8HVP1112
+ATOM    959  OD1 ASP B  25       6.760   1.311  18.713  1.00  2.00      8HVP1113
+ATOM    960  OD2 ASP B  25       7.891   1.001  16.863  1.00  2.00      8HVP1114
+ATOM    961  N   THR B  26       7.021   2.612  21.812  1.00  2.88      8HVP1115
+ATOM    962  CA  THR B  26       6.460   1.826  22.933  1.00  4.04      8HVP1116
+ATOM    963  C   THR B  26       5.802   0.529  22.459  1.00  4.48      8HVP1117
+ATOM    964  O   THR B  26       5.428  -0.322  23.290  1.00  2.00      8HVP1118
+ATOM    965  CB  THR B  26       5.410   2.570  23.847  1.00  2.00      8HVP1119
+ATOM    966  OG1 THR B  26       4.403   3.014  22.912  1.00  2.00      8HVP1120
+ATOM    967  CG2 THR B  26       6.022   3.695  24.670  1.00  2.00      8HVP1121
+ATOM    968  N   GLY B  27       5.722   0.437  21.144  1.00  2.00      8HVP1122
+ATOM    969  CA  GLY B  27       5.087  -0.711  20.511  1.00  2.00      8HVP1123
+ATOM    970  C   GLY B  27       6.094  -1.684  19.944  1.00  3.33      8HVP1124
+ATOM    971  O   GLY B  27       5.613  -2.651  19.311  1.00  7.98      8HVP1125
+ATOM    972  N   ALA B  28       7.374  -1.463  20.157  1.00  3.25      8HVP1126
+ATOM    973  CA  ALA B  28       8.441  -2.340  19.672  1.00  2.00      8HVP1127
+ATOM    974  C   ALA B  28       9.025  -3.158  20.823  1.00  2.00      8HVP1128
+ATOM    975  O   ALA B  28       9.481  -2.652  21.845  1.00  2.00      8HVP1129
+ATOM    976  CB  ALA B  28       9.618  -1.683  19.001  1.00  2.00      8HVP1130
+ATOM    977  N   ASP B  29       8.954  -4.439  20.553  1.00  2.00      8HVP1131
+ATOM    978  CA  ASP B  29       9.483  -5.475  21.428  1.00  4.80      8HVP1132
+ATOM    979  C   ASP B  29      11.009  -5.367  21.606  1.00  5.96      8HVP1133
+ATOM    980  O   ASP B  29      11.496  -5.585  22.727  1.00  5.93      8HVP1134
+ATOM    981  CB  ASP B  29       9.022  -6.840  20.899  1.00  7.65      8HVP1135
+ATOM    982  CG  ASP B  29       7.845  -7.432  21.624  1.00 16.24      8HVP1136
+ATOM    983  OD1 ASP B  29       6.883  -6.685  21.858  1.00 26.97      8HVP1137
+ATOM    984  OD2 ASP B  29       7.868  -8.631  21.991  1.00 19.75      8HVP1138
+ATOM    985  N   ASP B  30      11.716  -5.174  20.516  1.00  7.34      8HVP1139
+ATOM    986  CA  ASP B  30      13.124  -5.121  20.191  1.00  4.67      8HVP1140
+ATOM    987  C   ASP B  30      13.557  -3.784  19.597  1.00  2.00      8HVP1141
+ATOM    988  O   ASP B  30      12.700  -2.963  19.244  1.00  2.00      8HVP1142
+ATOM    989  CB  ASP B  30      13.428  -6.156  19.051  1.00  2.00      8HVP1143
+ATOM    990  CG  ASP B  30      12.960  -7.523  19.518  1.00 19.35      8HVP1144
+ATOM    991  OD1 ASP B  30      13.301  -7.714  20.722  1.00 28.54      8HVP1145
+ATOM    992  OD2 ASP B  30      12.257  -8.296  18.837  1.00 18.48      8HVP1146
+ATOM    993  N   THR B  31      14.855  -3.654  19.335  1.00  6.35      8HVP1147
+ATOM    994  CA  THR B  31      15.480  -2.459  18.741  1.00  2.45      8HVP1148
+ATOM    995  C   THR B  31      16.107  -2.874  17.421  1.00  2.57      8HVP1149
+ATOM    996  O   THR B  31      16.989  -3.703  17.343  1.00  2.38      8HVP1150
+ATOM    997  CB  THR B  31      16.541  -1.792  19.696  1.00  8.53      8HVP1151
+ATOM    998  OG1 THR B  31      15.856  -1.705  20.992  1.00  2.64      8HVP1152
+ATOM    999  CG2 THR B  31      17.128  -0.445  19.245  1.00  2.00      8HVP1153
+ATOM   1000  N   VAL B  32      15.639  -2.267  16.360  1.00  3.43      8HVP1154
+ATOM   1001  CA  VAL B  32      16.019  -2.557  14.989  1.00  2.00      8HVP1155
+ATOM   1002  C   VAL B  32      16.527  -1.268  14.365  1.00  3.55      8HVP1156
+ATOM   1003  O   VAL B  32      15.737  -0.339  14.189  1.00  5.13      8HVP1157
+ATOM   1004  CB  VAL B  32      14.832  -3.093  14.175  1.00  3.44      8HVP1158
+ATOM   1005  CG1 VAL B  32      15.292  -3.729  12.866  1.00  2.00      8HVP1159
+ATOM   1006  CG2 VAL B  32      13.975  -4.061  15.009  1.00  2.00      8HVP1160
+ATOM   1007  N   LEU B  33      17.793  -1.326  14.034  1.00  2.00      8HVP1161
+ATOM   1008  CA  LEU B  33      18.476  -0.151  13.433  1.00  4.39      8HVP1162
+ATOM   1009  C   LEU B  33      18.702  -0.677  12.018  1.00  2.00      8HVP1163
+ATOM   1010  O   LEU B  33      18.716  -1.893  11.949  1.00  2.00      8HVP1164
+ATOM   1011  CB  LEU B  33      19.648   0.118  14.374  1.00  2.00      8HVP1165
+ATOM   1012  CG  LEU B  33      19.431   0.854  15.671  1.00  2.00      8HVP1166
+ATOM   1013  CD1 LEU B  33      19.625  -0.019  16.915  1.00  2.00      8HVP1167
+ATOM   1014  CD2 LEU B  33      20.578   1.874  15.710  1.00 13.97      8HVP1168
+ATOM   1015  N   GLU B  34      18.668   0.153  11.026  1.00 11.86      8HVP1169
+ATOM   1016  CA  GLU B  34      18.921  -0.217   9.609  1.00 19.66      8HVP1170
+ATOM   1017  C   GLU B  34      20.269  -0.927   9.455  1.00 21.70      8HVP1171
+ATOM   1018  O   GLU B  34      20.770  -1.586  10.388  1.00 23.50      8HVP1172
+ATOM   1019  CB  GLU B  34      19.178   1.123   8.828  1.00 24.09      8HVP1173
+ATOM   1020  CG  GLU B  34      20.154   1.911   9.701  1.00 41.99      8HVP1174
+ATOM   1021  CD  GLU B  34      21.007   3.034   9.293  1.00 54.75      8HVP1175
+ATOM   1022  OE1 GLU B  34      20.792   3.376   8.110  1.00 66.31      8HVP1176
+ATOM   1023  OE2 GLU B  34      21.836   3.539  10.050  1.00 60.98      8HVP1177
+ATOM   1024  N   GLU B  35      20.890  -0.735   8.281  1.00 23.86      8HVP1178
+ATOM   1025  CA  GLU B  35      22.198  -1.316   8.006  1.00 25.01      8HVP1179
+ATOM   1026  C   GLU B  35      23.404  -0.584   8.615  1.00 24.91      8HVP1180
+ATOM   1027  O   GLU B  35      23.690   0.564   8.213  1.00 24.28      8HVP1181
+ATOM   1028  CB  GLU B  35      22.593  -1.469   6.535  1.00 30.71      8HVP1182
+ATOM   1029  CG  GLU B  35      21.865  -2.546   5.752  1.00 26.17      8HVP1183
+ATOM   1030  CD  GLU B  35      21.271  -3.614   6.626  1.00 30.32      8HVP1184
+ATOM   1031  OE1 GLU B  35      22.050  -4.030   7.510  1.00 30.03      8HVP1185
+ATOM   1032  OE2 GLU B  35      20.109  -3.919   6.410  1.00 30.07      8HVP1186
+ATOM   1033  N   MET B  36      24.050  -1.359   9.474  1.00 20.99      8HVP1187
+ATOM   1034  CA  MET B  36      25.262  -0.922  10.165  1.00 21.40      8HVP1188
+ATOM   1035  C   MET B  36      26.132  -2.128  10.553  1.00 21.96      8HVP1189
+ATOM   1036  O   MET B  36      25.723  -3.207  11.032  1.00 18.06      8HVP1190
+ATOM   1037  CB  MET B  36      24.977   0.070  11.272  1.00 12.31      8HVP1191
+ATOM   1038  CG  MET B  36      24.552  -0.590  12.536  1.00 20.81      8HVP1192
+ATOM   1039  SD  MET B  36      24.155   0.798  13.659  1.00 41.04      8HVP1193
+ATOM   1040  CE  MET B  36      25.800   1.572  13.710  1.00 21.74      8HVP1194
+ATOM   1041  N   ASN B  37      27.413  -1.843  10.318  1.00 22.96      8HVP1195
+ATOM   1042  CA  ASN B  37      28.526  -2.803  10.481  1.00 26.45      8HVP1196
+ATOM   1043  C   ASN B  37      29.058  -3.002  11.889  1.00 24.22      8HVP1197
+ATOM   1044  O   ASN B  37      30.257  -2.750  12.146  1.00 27.66      8HVP1198
+ATOM   1045  CB  ASN B  37      29.608  -2.306   9.499  1.00 35.13      8HVP1199
+ATOM   1046  CG  ASN B  37      30.495  -3.456   9.064  1.00 43.47      8HVP1200
+ATOM   1047  OD1 ASN B  37      30.062  -4.594   9.339  1.00 51.18      8HVP1201
+ATOM   1048  ND2 ASN B  37      31.615  -3.120   8.422  1.00 42.91      8HVP1202
+ATOM   1049  N   LEU B  38      28.241  -3.500  12.795  1.00 22.53      8HVP1203
+ATOM   1050  CA  LEU B  38      28.712  -3.604  14.184  1.00 21.71      8HVP1204
+ATOM   1051  C   LEU B  38      29.813  -4.653  14.215  1.00 21.57      8HVP1205
+ATOM   1052  O   LEU B  38      29.954  -5.430  13.278  1.00 20.06      8HVP1206
+ATOM   1053  CB  LEU B  38      27.544  -3.677  15.174  1.00 22.38      8HVP1207
+ATOM   1054  CG  LEU B  38      26.588  -2.485  15.107  1.00 17.47      8HVP1208
+ATOM   1055  CD1 LEU B  38      25.171  -2.817  15.535  1.00 18.42      8HVP1209
+ATOM   1056  CD2 LEU B  38      27.118  -1.340  15.949  1.00 13.96      8HVP1210
+ATOM   1057  N   PRO B  39      30.589  -4.547  15.271  1.00 23.87      8HVP1211
+ATOM   1058  CA  PRO B  39      31.705  -5.450  15.533  1.00 24.03      8HVP1212
+ATOM   1059  C   PRO B  39      31.177  -6.642  16.331  1.00 22.54      8HVP1213
+ATOM   1060  O   PRO B  39      30.523  -6.397  17.364  1.00 18.45      8HVP1214
+ATOM   1061  CB  PRO B  39      32.671  -4.638  16.389  1.00 24.63      8HVP1215
+ATOM   1062  CG  PRO B  39      31.828  -3.570  17.034  1.00 24.98      8HVP1216
+ATOM   1063  CD  PRO B  39      30.461  -3.575  16.383  1.00 24.62      8HVP1217
+ATOM   1064  N   GLY B  40      31.474  -7.812  15.802  1.00 20.94      8HVP1218
+ATOM   1065  CA  GLY B  40      31.097  -9.019  16.495  1.00 28.11      8HVP1219
+ATOM   1066  C   GLY B  40      30.325 -10.166  15.898  1.00 29.39      8HVP1220
+ATOM   1067  O   GLY B  40      30.129 -10.333  14.677  1.00 33.81      8HVP1221
+ATOM   1068  N   LYS B  41      29.914 -10.980  16.864  1.00 28.89      8HVP1222
+ATOM   1069  CA  LYS B  41      29.099 -12.177  16.775  1.00 27.88      8HVP1223
+ATOM   1070  C   LYS B  41      27.619 -11.718  16.829  1.00 27.43      8HVP1224
+ATOM   1071  O   LYS B  41      27.198 -10.864  17.628  1.00 28.52      8HVP1225
+ATOM   1072  CB  LYS B  41      29.182 -13.120  17.945  1.00 36.31      8HVP1226
+ATOM   1073  CG  LYS B  41      30.254 -13.963  18.544  1.00 30.73      8HVP1227
+ATOM   1074  CD  LYS B  41      30.984 -14.877  17.594  1.00 29.28      8HVP1228
+ATOM   1075  CE  LYS B  41      30.202 -15.339  16.383  1.00 31.88      8HVP1229
+ATOM   1076  NZ  LYS B  41      30.925 -16.450  15.716  1.00 27.56      8HVP1230
+ATOM   1077  N   TRP B  42      26.882 -12.441  16.040  1.00 27.58      8HVP1231
+ATOM   1078  CA  TRP B  42      25.433 -12.279  15.901  1.00 28.24      8HVP1232
+ATOM   1079  C   TRP B  42      24.929 -13.670  15.533  1.00 27.17      8HVP1233
+ATOM   1080  O   TRP B  42      25.696 -14.610  15.362  1.00 25.95      8HVP1234
+ATOM   1081  CB  TRP B  42      25.069 -11.149  14.956  1.00 37.00      8HVP1235
+ATOM   1082  CG  TRP B  42      25.665 -11.312  13.604  1.00 46.23      8HVP1236
+ATOM   1083  CD1 TRP B  42      26.672 -10.578  13.038  1.00 49.04      8HVP1237
+ATOM   1084  CD2 TRP B  42      25.321 -12.335  12.655  1.00 49.52      8HVP1238
+ATOM   1085  NE1 TRP B  42      26.965 -11.059  11.780  1.00 49.97      8HVP1239
+ATOM   1086  CE2 TRP B  42      26.160 -12.136  11.531  1.00 52.47      8HVP1240
+ATOM   1087  CE3 TRP B  42      24.387 -13.359  12.630  1.00 47.48      8HVP1241
+ATOM   1088  CZ2 TRP B  42      26.077 -12.948  10.410  1.00 56.71      8HVP1242
+ATOM   1089  CZ3 TRP B  42      24.320 -14.162  11.521  1.00 51.28      8HVP1243
+ATOM   1090  CH2 TRP B  42      25.135 -13.968  10.417  1.00 54.84      8HVP1244
+ATOM   1091  N   LYS B  43      23.640 -13.766  15.434  1.00 29.09      8HVP1245
+ATOM   1092  CA  LYS B  43      22.759 -14.900  15.205  1.00 26.73      8HVP1246
+ATOM   1093  C   LYS B  43      21.470 -14.400  14.572  1.00 26.40      8HVP1247
+ATOM   1094  O   LYS B  43      20.741 -13.573  15.156  1.00 25.74      8HVP1248
+ATOM   1095  CB  LYS B  43      22.374 -15.506  16.587  1.00 28.42      8HVP1249
+ATOM   1096  CG  LYS B  43      22.805 -14.615  17.747  1.00 30.96      8HVP1250
+ATOM   1097  CD  LYS B  43      22.046 -14.565  19.021  1.00 33.44      8HVP1251
+ATOM   1098  CE  LYS B  43      22.143 -15.700  19.992  1.00 33.13      8HVP1252
+ATOM   1099  NZ  LYS B  43      21.336 -15.432  21.218  1.00 27.94      8HVP1253
+ATOM   1100  N   PRO B  44      21.198 -14.885  13.376  1.00 27.94      8HVP1254
+ATOM   1101  CA  PRO B  44      19.947 -14.533  12.677  1.00 27.06      8HVP1255
+ATOM   1102  C   PRO B  44      18.740 -14.831  13.575  1.00 23.74      8HVP1256
+ATOM   1103  O   PRO B  44      18.699 -15.616  14.543  1.00 23.37      8HVP1257
+ATOM   1104  CB  PRO B  44      19.990 -15.297  11.359  1.00 27.32      8HVP1258
+ATOM   1105  CG  PRO B  44      20.901 -16.460  11.652  1.00 28.58      8HVP1259
+ATOM   1106  CD  PRO B  44      21.943 -15.906  12.624  1.00 28.32      8HVP1260
+ATOM   1107  N   LYS B  45      17.710 -14.060  13.249  1.00 18.28      8HVP1261
+ATOM   1108  CA  LYS B  45      16.424 -14.073  13.919  1.00 10.05      8HVP1262
+ATOM   1109  C   LYS B  45      15.410 -13.403  12.980  1.00  5.88      8HVP1263
+ATOM   1110  O   LYS B  45      15.736 -12.676  12.016  1.00  3.74      8HVP1264
+ATOM   1111  CB  LYS B  45      16.469 -13.321  15.250  1.00  5.89      8HVP1265
+ATOM   1112  CG  LYS B  45      15.077 -13.238  15.847  1.00  3.10      8HVP1266
+ATOM   1113  CD  LYS B  45      14.828 -12.229  16.904  1.00  3.91      8HVP1267
+ATOM   1114  CE  LYS B  45      15.080 -12.795  18.299  1.00  3.48      8HVP1268
+ATOM   1115  NZ  LYS B  45      14.644 -11.709  19.238  1.00  2.00      8HVP1269
+ATOM   1116  N   MET B  46      14.172 -13.713  13.353  1.00  2.04      8HVP1270
+ATOM   1117  CA  MET B  46      13.017 -13.163  12.665  1.00  2.00      8HVP1271
+ATOM   1118  C   MET B  46      12.014 -12.324  13.423  1.00  2.00      8HVP1272
+ATOM   1119  O   MET B  46      11.701 -12.644  14.579  1.00  2.00      8HVP1273
+ATOM   1120  CB  MET B  46      12.370 -14.279  11.814  1.00  3.38      8HVP1274
+ATOM   1121  CG  MET B  46      13.015 -14.194  10.454  1.00  2.00      8HVP1275
+ATOM   1122  SD  MET B  46      12.190 -15.349   9.346  1.00 12.32      8HVP1276
+ATOM   1123  CE  MET B  46      13.204 -15.043   7.890  1.00  2.00      8HVP1277
+ATOM   1124  N   ILE B  47      11.642 -11.224  12.801  1.00  2.00      8HVP1278
+ATOM   1125  CA  ILE B  47      10.647 -10.378  13.456  1.00  4.30      8HVP1279
+ATOM   1126  C   ILE B  47       9.698  -9.929  12.362  1.00  4.03      8HVP1280
+ATOM   1127  O   ILE B  47      10.103  -9.976  11.206  1.00  3.63      8HVP1281
+ATOM   1128  CB  ILE B  47      11.266  -9.238  14.339  1.00  9.07      8HVP1282
+ATOM   1129  CG1 ILE B  47      11.967  -8.226  13.478  1.00  2.00      8HVP1283
+ATOM   1130  CG2 ILE B  47      12.234  -9.803  15.421  1.00  2.00      8HVP1284
+ATOM   1131  CD1 ILE B  47      12.567  -6.882  13.575  1.00  2.00      8HVP1285
+ATOM   1132  N   GLY B  48       8.520  -9.579  12.813  1.00  9.14      8HVP1286
+ATOM   1133  CA  GLY B  48       7.495  -9.090  11.877  1.00 10.44      8HVP1287
+ATOM   1134  C   GLY B  48       6.462  -8.295  12.687  1.00 11.81      8HVP1288
+ATOM   1135  O   GLY B  48       6.614  -8.152  13.908  1.00  9.14      8HVP1289
+ATOM   1136  N   GLY B  49       5.420  -7.962  11.938  1.00 10.82      8HVP1290
+ATOM   1137  CA  GLY B  49       4.259  -7.215  12.439  1.00  9.96      8HVP1291
+ATOM   1138  C   GLY B  49       3.348  -6.879  11.271  1.00  8.95      8HVP1292
+ATOM   1139  O   GLY B  49       2.977  -7.831  10.575  1.00 10.25      8HVP1293
+ATOM   1140  N   ILE B  50       2.977  -5.625  11.070  1.00 10.74      8HVP1294
+ATOM   1141  CA  ILE B  50       2.065  -5.320   9.923  1.00  8.35      8HVP1295
+ATOM   1142  C   ILE B  50       2.934  -5.730   8.761  1.00 10.18      8HVP1296
+ATOM   1143  O   ILE B  50       4.071  -5.215   8.934  1.00 16.59      8HVP1297
+ATOM   1144  CB  ILE B  50       1.691  -3.801   9.905  1.00  2.25      8HVP1298
+ATOM   1145  CG1 ILE B  50       0.659  -3.526  11.033  1.00  2.00      8HVP1299
+ATOM   1146  CG2 ILE B  50       1.104  -3.143   8.635  1.00  2.00      8HVP1300
+ATOM   1147  CD1 ILE B  50      -0.606  -4.427  10.901  1.00  3.68      8HVP1301
+ATOM   1148  N   GLY B  51       2.549  -6.448   7.743  1.00  9.92      8HVP1302
+ATOM   1149  CA  GLY B  51       3.517  -6.626   6.624  1.00  7.18      8HVP1303
+ATOM   1150  C   GLY B  51       3.983  -8.042   6.403  1.00  9.03      8HVP1304
+ATOM   1151  O   GLY B  51       3.886  -8.678   5.324  1.00  7.91      8HVP1305
+ATOM   1152  N   GLY B  52       4.516  -8.536   7.516  1.00 12.04      8HVP1306
+ATOM   1153  CA  GLY B  52       5.046  -9.907   7.691  1.00  8.63      8HVP1307
+ATOM   1154  C   GLY B  52       6.357  -9.841   8.450  1.00  2.00      8HVP1308
+ATOM   1155  O   GLY B  52       6.654  -8.801   9.024  1.00  3.66      8HVP1309
+ATOM   1156  N   PHE B  53       7.168 -10.849   8.266  1.00  5.97      8HVP1310
+ATOM   1157  CA  PHE B  53       8.486 -11.020   8.918  1.00  2.93      8HVP1311
+ATOM   1158  C   PHE B  53       9.600 -10.612   7.988  1.00  6.45      8HVP1312
+ATOM   1159  O   PHE B  53       9.330 -10.629   6.780  1.00 10.93      8HVP1313
+ATOM   1160  CB  PHE B  53       8.731 -12.469   9.389  1.00  4.32      8HVP1314
+ATOM   1161  CG  PHE B  53       7.829 -12.804  10.557  1.00  6.51      8HVP1315
+ATOM   1162  CD1 PHE B  53       6.449 -12.738  10.377  1.00 10.36      8HVP1316
+ATOM   1163  CD2 PHE B  53       8.304 -13.121  11.813  1.00  8.98      8HVP1317
+ATOM   1164  CE1 PHE B  53       5.561 -13.038  11.367  1.00  2.00      8HVP1318
+ATOM   1165  CE2 PHE B  53       7.401 -13.420  12.840  1.00 12.75      8HVP1319
+ATOM   1166  CZ  PHE B  53       6.017 -13.366  12.645  1.00  2.00      8HVP1320
+ATOM   1167  N   ILE B  54      10.745 -10.320   8.523  1.00  7.04      8HVP1321
+ATOM   1168  CA  ILE B  54      11.958  -9.948   7.787  1.00  7.76      8HVP1322
+ATOM   1169  C   ILE B  54      13.116 -10.492   8.653  1.00  7.51      8HVP1323
+ATOM   1170  O   ILE B  54      12.882 -10.557   9.868  1.00  3.94      8HVP1324
+ATOM   1171  CB  ILE B  54      12.079  -8.397   7.617  1.00  2.00      8HVP1325
+ATOM   1172  CG1 ILE B  54      12.250  -7.836   9.045  1.00  2.00      8HVP1326
+ATOM   1173  CG2 ILE B  54      10.884  -7.810   6.871  1.00  2.00      8HVP1327
+ATOM   1174  CD1 ILE B  54      13.120  -6.569   9.130  1.00  2.00      8HVP1328
+ATOM   1175  N   LYS B  55      14.226 -10.783   8.026  1.00  4.94      8HVP1329
+ATOM   1176  CA  LYS B  55      15.310 -11.368   8.834  1.00  9.23      8HVP1330
+ATOM   1177  C   LYS B  55      16.257 -10.246   9.271  1.00  9.31      8HVP1331
+ATOM   1178  O   LYS B  55      16.554  -9.326   8.485  1.00  6.93      8HVP1332
+ATOM   1179  CB  LYS B  55      16.077 -12.445   8.077  1.00 24.98      8HVP1333
+ATOM   1180  CG  LYS B  55      17.423 -12.772   8.729  1.00 43.02      8HVP1334
+ATOM   1181  CD  LYS B  55      18.300 -13.675   7.872  1.00 53.33      8HVP1335
+ATOM   1182  CE  LYS B  55      19.689 -13.826   8.488  1.00 57.93      8HVP1336
+ATOM   1183  NZ  LYS B  55      20.300 -15.107   8.046  1.00 57.35      8HVP1337
+ATOM   1184  N   VAL B  56      16.739 -10.399  10.488  1.00  2.00      8HVP1338
+ATOM   1185  CA  VAL B  56      17.636  -9.397  11.048  1.00  2.00      8HVP1339
+ATOM   1186  C   VAL B  56      18.747 -10.115  11.799  1.00  3.32      8HVP1340
+ATOM   1187  O   VAL B  56      18.425 -11.224  12.256  1.00  6.46      8HVP1341
+ATOM   1188  CB  VAL B  56      16.915  -8.401  11.980  1.00  7.22      8HVP1342
+ATOM   1189  CG1 VAL B  56      15.835  -7.629  11.242  1.00  9.44      8HVP1343
+ATOM   1190  CG2 VAL B  56      16.426  -9.056  13.283  1.00  2.54      8HVP1344
+ATOM   1191  N   ARG B  57      19.878  -9.457  11.914  1.00  2.00      8HVP1345
+ATOM   1192  CA  ARG B  57      20.953 -10.051  12.721  1.00  4.67      8HVP1346
+ATOM   1193  C   ARG B  57      20.907  -9.356  14.096  1.00  5.22      8HVP1347
+ATOM   1194  O   ARG B  57      20.774  -8.130  14.141  1.00  4.28      8HVP1348
+ATOM   1195  CB  ARG B  57      22.321  -9.885  12.141  1.00 12.84      8HVP1349
+ATOM   1196  CG  ARG B  57      22.619 -10.531  10.797  1.00 12.46      8HVP1350
+ATOM   1197  CD  ARG B  57      23.827  -9.835  10.266  1.00 21.00      8HVP1351
+ATOM   1198  NE  ARG B  57      23.544  -8.448   9.903  1.00 35.91      8HVP1352
+ATOM   1199  CZ  ARG B  57      24.355  -7.415  10.205  1.00 41.63      8HVP1353
+ATOM   1200  NH1 ARG B  57      25.344  -7.628  11.082  1.00 36.55      8HVP1354
+ATOM   1201  NH2 ARG B  57      24.147  -6.171   9.742  1.00 42.40      8HVP1355
+ATOM   1202  N   GLN B  58      20.958 -10.164  15.132  1.00  4.24      8HVP1356
+ATOM   1203  CA  GLN B  58      20.929  -9.868  16.548  1.00  2.00      8HVP1357
+ATOM   1204  C   GLN B  58      22.342  -9.804  17.176  1.00  2.00      8HVP1358
+ATOM   1205  O   GLN B  58      22.962 -10.910  17.183  1.00  2.00      8HVP1359
+ATOM   1206  CB  GLN B  58      20.304 -11.052  17.328  1.00  2.00      8HVP1360
+ATOM   1207  CG  GLN B  58      20.132 -10.685  18.794  1.00 10.91      8HVP1361
+ATOM   1208  CD  GLN B  58      19.611 -11.801  19.668  1.00 14.93      8HVP1362
+ATOM   1209  OE1 GLN B  58      20.247 -12.176  20.650  1.00 21.50      8HVP1363
+ATOM   1210  NE2 GLN B  58      18.463 -12.419  19.366  1.00  8.34      8HVP1364
+ATOM   1211  N   TYR B  59      22.711  -8.672  17.773  1.00  2.46      8HVP1365
+ATOM   1212  CA  TYR B  59      23.981  -8.488  18.487  1.00  2.00      8HVP1366
+ATOM   1213  C   TYR B  59      23.860  -8.440  20.013  1.00  2.00      8HVP1367
+ATOM   1214  O   TYR B  59      22.999  -7.699  20.501  1.00  2.00      8HVP1368
+ATOM   1215  CB  TYR B  59      24.663  -7.193  17.998  1.00  2.00      8HVP1369
+ATOM   1216  CG  TYR B  59      25.115  -7.295  16.566  1.00  6.62      8HVP1370
+ATOM   1217  CD1 TYR B  59      24.334  -6.931  15.475  1.00  2.00      8HVP1371
+ATOM   1218  CD2 TYR B  59      26.472  -7.625  16.426  1.00  7.16      8HVP1372
+ATOM   1219  CE1 TYR B  59      24.924  -7.013  14.197  1.00 13.21      8HVP1373
+ATOM   1220  CE2 TYR B  59      27.053  -7.668  15.184  1.00  2.00      8HVP1374
+ATOM   1221  CZ  TYR B  59      26.266  -7.379  14.061  1.00 10.93      8HVP1375
+ATOM   1222  OH  TYR B  59      26.862  -7.420  12.834  1.00 18.08      8HVP1376
+ATOM   1223  N   ASP B  60      24.633  -9.148  20.812  1.00  2.88      8HVP1377
+ATOM   1224  CA  ASP B  60      24.403  -8.973  22.274  1.00 10.69      8HVP1378
+ATOM   1225  C   ASP B  60      25.131  -7.704  22.730  1.00 12.02      8HVP1379
+ATOM   1226  O   ASP B  60      25.650  -6.906  21.932  1.00 11.21      8HVP1380
+ATOM   1227  CB  ASP B  60      24.563 -10.226  23.106  1.00 23.41      8HVP1381
+ATOM   1228  CG  ASP B  60      23.408 -11.152  22.704  1.00 35.13      8HVP1382
+ATOM   1229  OD1 ASP B  60      22.330 -10.514  22.641  1.00 39.29      8HVP1383
+ATOM   1230  OD2 ASP B  60      23.547 -12.357  22.451  1.00 40.70      8HVP1384
+ATOM   1231  N   GLN B  61      25.017  -7.501  24.024  1.00  9.58      8HVP1385
+ATOM   1232  CA  GLN B  61      25.528  -6.383  24.782  1.00  8.24      8HVP1386
+ATOM   1233  C   GLN B  61      26.170  -5.216  24.081  1.00  6.58      8HVP1387
+ATOM   1234  O   GLN B  61      27.297  -4.831  24.407  1.00  9.84      8HVP1388
+ATOM   1235  CB  GLN B  61      26.343  -6.903  25.975  1.00 16.61      8HVP1389
+ATOM   1236  CG  GLN B  61      25.415  -7.298  27.125  1.00 26.01      8HVP1390
+ATOM   1237  CD  GLN B  61      26.090  -7.394  28.471  1.00 32.51      8HVP1391
+ATOM   1238  OE1 GLN B  61      25.901  -6.537  29.345  1.00 38.87      8HVP1392
+ATOM   1239  NE2 GLN B  61      26.890  -8.447  28.635  1.00 31.15      8HVP1393
+ATOM   1240  N   ILE B  62      25.451  -4.465  23.264  1.00  4.26      8HVP1394
+ATOM   1241  CA  ILE B  62      25.901  -3.266  22.573  1.00  4.05      8HVP1395
+ATOM   1242  C   ILE B  62      25.487  -2.015  23.327  1.00  4.67      8HVP1396
+ATOM   1243  O   ILE B  62      24.297  -1.891  23.661  1.00  8.81      8HVP1397
+ATOM   1244  CB  ILE B  62      25.358  -3.272  21.084  1.00  8.11      8HVP1398
+ATOM   1245  CG1 ILE B  62      26.214  -4.274  20.264  1.00  9.08      8HVP1399
+ATOM   1246  CG2 ILE B  62      25.307  -1.837  20.493  1.00  2.00      8HVP1400
+ATOM   1247  CD1 ILE B  62      25.909  -4.298  18.753  1.00  5.72      8HVP1401
+ATOM   1248  N   PRO B  63      26.441  -1.145  23.610  1.00  6.99      8HVP1402
+ATOM   1249  CA  PRO B  63      26.257   0.127  24.321  1.00  6.15      8HVP1403
+ATOM   1250  C   PRO B  63      25.557   1.142  23.435  1.00 10.75      8HVP1404
+ATOM   1251  O   PRO B  63      25.783   1.158  22.192  1.00 18.39      8HVP1405
+ATOM   1252  CB  PRO B  63      27.658   0.603  24.697  1.00  4.93      8HVP1406
+ATOM   1253  CG  PRO B  63      28.565  -0.542  24.385  1.00  5.11      8HVP1407
+ATOM   1254  CD  PRO B  63      27.881  -1.296  23.236  1.00  7.62      8HVP1408
+ATOM   1255  N   VAL B  64      24.704   1.984  23.980  1.00  9.55      8HVP1409
+ATOM   1256  CA  VAL B  64      24.008   2.929  23.077  1.00 11.32      8HVP1410
+ATOM   1257  C   VAL B  64      23.721   4.095  24.018  1.00 16.28      8HVP1411
+ATOM   1258  O   VAL B  64      23.720   3.938  25.259  1.00 12.69      8HVP1412
+ATOM   1259  CB  VAL B  64      22.849   2.191  22.397  1.00 19.36      8HVP1413
+ATOM   1260  CG1 VAL B  64      21.563   3.000  22.225  1.00 23.40      8HVP1414
+ATOM   1261  CG2 VAL B  64      23.047   1.598  20.998  1.00  8.04      8HVP1415
+ATOM   1262  N   GLU B  65      23.494   5.231  23.349  1.00 17.51      8HVP1416
+ATOM   1263  CA  GLU B  65      23.096   6.447  24.099  1.00 16.89      8HVP1417
+ATOM   1264  C   GLU B  65      21.817   7.080  23.554  1.00 12.66      8HVP1418
+ATOM   1265  O   GLU B  65      21.791   7.691  22.462  1.00  9.45      8HVP1419
+ATOM   1266  CB  GLU B  65      24.275   7.407  24.126  1.00 24.37      8HVP1420
+ATOM   1267  CG  GLU B  65      24.133   8.725  24.886  1.00 30.75      8HVP1421
+ATOM   1268  CD  GLU B  65      25.488   9.323  25.173  1.00 39.91      8HVP1422
+ATOM   1269  OE1 GLU B  65      26.063   8.614  26.038  1.00 44.11      8HVP1423
+ATOM   1270  OE2 GLU B  65      25.943  10.307  24.621  1.00 37.32      8HVP1424
+ATOM   1271  N   ILE B  66      20.735   6.933  24.320  1.00 11.61      8HVP1425
+ATOM   1272  CA  ILE B  66      19.445   7.540  23.954  1.00 13.42      8HVP1426
+ATOM   1273  C   ILE B  66      19.282   8.909  24.610  1.00 11.36      8HVP1427
+ATOM   1274  O   ILE B  66      18.767   9.003  25.728  1.00 15.10      8HVP1428
+ATOM   1275  CB  ILE B  66      18.189   6.630  24.167  1.00  9.27      8HVP1429
+ATOM   1276  CG1 ILE B  66      18.336   5.338  23.325  1.00  2.00      8HVP1430
+ATOM   1277  CG2 ILE B  66      16.862   7.369  23.809  1.00  8.85      8HVP1431
+ATOM   1278  CD1 ILE B  66      19.244   4.315  24.056  1.00  7.34      8HVP1432
+HETATM 1279  N   ABA B  67      19.675   9.925  23.907  1.00 13.30      8HVP1433
+HETATM 1280  CA  ABA B  67      19.686  11.326  24.264  1.00 17.39      8HVP1434
+HETATM 1281  C   ABA B  67      20.287  11.692  25.627  1.00 21.03      8HVP1435
+HETATM 1282  O   ABA B  67      19.628  12.392  26.429  1.00 22.00      8HVP1436
+HETATM 1283  CB  ABA B  67      18.331  12.009  24.095  1.00 17.35      8HVP1437
+HETATM 1284  CG  ABA B  67      17.117  11.103  24.237  1.00  2.14      8HVP1438
+ATOM   1285  N   GLY B  68      21.561  11.352  25.809  1.00 20.94      8HVP1439
+ATOM   1286  CA  GLY B  68      22.410  11.595  26.957  1.00 19.29      8HVP1440
+ATOM   1287  C   GLY B  68      22.128  10.707  28.162  1.00 21.25      8HVP1441
+ATOM   1288  O   GLY B  68      22.270  11.054  29.350  1.00 19.60      8HVP1442
+ATOM   1289  N   HIS B  69      21.648   9.524  27.841  1.00 21.09      8HVP1443
+ATOM   1290  CA  HIS B  69      21.241   8.476  28.776  1.00 19.62      8HVP1444
+ATOM   1291  C   HIS B  69      21.902   7.215  28.214  1.00 22.76      8HVP1445
+ATOM   1292  O   HIS B  69      21.499   6.919  27.070  1.00 26.88      8HVP1446
+ATOM   1293  CB  HIS B  69      19.712   8.249  28.887  1.00  6.31      8HVP1447
+ATOM   1294  CG  HIS B  69      18.965   9.364  29.578  1.00  6.71      8HVP1448
+ATOM   1295  ND1 HIS B  69      18.239   9.336  30.775  1.00  2.00      8HVP1449
+ATOM   1296  CD2 HIS B  69      18.872  10.645  29.079  1.00  4.67      8HVP1450
+ATOM   1297  CE1 HIS B  69      17.797  10.537  30.965  1.00  2.00      8HVP1451
+ATOM   1298  NE2 HIS B  69      18.136  11.362  29.966  1.00  2.00      8HVP1452
+ATOM   1299  N   LYS B  70      22.882   6.618  28.881  1.00 21.04      8HVP1453
+ATOM   1300  CA  LYS B  70      23.479   5.386  28.360  1.00 15.05      8HVP1454
+ATOM   1301  C   LYS B  70      22.499   4.241  28.662  1.00 14.48      8HVP1455
+ATOM   1302  O   LYS B  70      21.698   4.282  29.589  1.00 17.68      8HVP1456
+ATOM   1303  CB  LYS B  70      24.764   4.931  28.983  1.00 19.55      8HVP1457
+ATOM   1304  CG  LYS B  70      26.061   5.280  28.284  1.00 26.69      8HVP1458
+ATOM   1305  CD  LYS B  70      27.064   5.766  29.331  1.00 32.24      8HVP1459
+ATOM   1306  CE  LYS B  70      26.499   6.941  30.115  1.00 37.76      8HVP1460
+ATOM   1307  NZ  LYS B  70      25.683   7.818  29.224  1.00 41.20      8HVP1461
+ATOM   1308  N   ALA B  71      22.681   3.244  27.867  1.00 11.10      8HVP1462
+ATOM   1309  CA  ALA B  71      21.983   1.964  27.880  1.00  9.54      8HVP1463
+ATOM   1310  C   ALA B  71      22.726   1.051  26.855  1.00  7.05      8HVP1464
+ATOM   1311  O   ALA B  71      23.279   1.503  25.829  1.00  2.34      8HVP1465
+ATOM   1312  CB  ALA B  71      20.501   2.157  27.721  1.00 11.33      8HVP1466
+ATOM   1313  N   ILE B  72      22.740  -0.219  27.216  1.00  2.00      8HVP1467
+ATOM   1314  CA  ILE B  72      23.436  -1.261  26.493  1.00  5.49      8HVP1468
+ATOM   1315  C   ILE B  72      22.535  -2.468  26.329  1.00  6.66      8HVP1469
+ATOM   1316  O   ILE B  72      22.105  -2.907  27.440  1.00  8.44      8HVP1470
+ATOM   1317  CB  ILE B  72      24.713  -1.760  27.296  1.00 16.05      8HVP1471
+ATOM   1318  CG1 ILE B  72      25.370  -2.891  26.447  1.00 22.97      8HVP1472
+ATOM   1319  CG2 ILE B  72      24.456  -2.172  28.756  1.00  4.00      8HVP1473
+ATOM   1320  CD1 ILE B  72      26.364  -3.766  27.265  1.00 29.98      8HVP1474
+ATOM   1321  N   GLY B  73      22.338  -2.923  25.093  1.00  2.00      8HVP1475
+ATOM   1322  CA  GLY B  73      21.437  -4.104  25.069  1.00  2.00      8HVP1476
+ATOM   1323  C   GLY B  73      21.562  -4.865  23.788  1.00  2.00      8HVP1477
+ATOM   1324  O   GLY B  73      22.599  -4.727  23.158  1.00  2.00      8HVP1478
+ATOM   1325  N   THR B  74      20.498  -5.596  23.498  1.00  5.86      8HVP1479
+ATOM   1326  CA  THR B  74      20.407  -6.405  22.281  1.00  4.20      8HVP1480
+ATOM   1327  C   THR B  74      19.924  -5.357  21.268  1.00 10.15      8HVP1481
+ATOM   1328  O   THR B  74      19.176  -4.428  21.619  1.00 13.28      8HVP1482
+ATOM   1329  CB  THR B  74      19.452  -7.629  22.317  1.00  6.75      8HVP1483
+ATOM   1330  OG1 THR B  74      19.460  -8.272  23.630  1.00  6.45      8HVP1484
+ATOM   1331  CG2 THR B  74      19.772  -8.692  21.245  1.00  8.22      8HVP1485
+ATOM   1332  N   VAL B  75      20.406  -5.565  20.074  1.00 10.13      8HVP1486
+ATOM   1333  CA  VAL B  75      20.079  -4.601  18.997  1.00 10.03      8HVP1487
+ATOM   1334  C   VAL B  75      20.060  -5.449  17.734  1.00 11.32      8HVP1488
+ATOM   1335  O   VAL B  75      21.000  -6.240  17.598  1.00 11.34      8HVP1489
+ATOM   1336  CB  VAL B  75      21.142  -3.496  19.106  1.00 13.15      8HVP1490
+ATOM   1337  CG1 VAL B  75      21.682  -3.092  17.736  1.00  2.00      8HVP1491
+ATOM   1338  CG2 VAL B  75      20.687  -2.271  19.902  1.00  4.38      8HVP1492
+ATOM   1339  N   LEU B  76      19.015  -5.307  16.930  1.00 14.07      8HVP1493
+ATOM   1340  CA  LEU B  76      18.838  -6.041  15.685  1.00  9.94      8HVP1494
+ATOM   1341  C   LEU B  76      19.164  -5.080  14.518  1.00 10.85      8HVP1495
+ATOM   1342  O   LEU B  76      18.576  -3.981  14.537  1.00 11.91      8HVP1496
+ATOM   1343  CB  LEU B  76      17.392  -6.465  15.439  1.00  2.00      8HVP1497
+ATOM   1344  CG  LEU B  76      16.542  -7.066  16.480  1.00  2.00      8HVP1498
+ATOM   1345  CD1 LEU B  76      15.141  -7.466  16.026  1.00  9.50      8HVP1499
+ATOM   1346  CD2 LEU B  76      17.233  -8.266  17.113  1.00  2.00      8HVP1500
+ATOM   1347  N   VAL B  77      19.930  -5.635  13.604  1.00  7.40      8HVP1501
+ATOM   1348  CA  VAL B  77      20.229  -4.897  12.378  1.00  7.40      8HVP1502
+ATOM   1349  C   VAL B  77      19.442  -5.598  11.245  1.00  8.21      8HVP1503
+ATOM   1350  O   VAL B  77      19.282  -6.837  11.310  1.00  7.68      8HVP1504
+ATOM   1351  CB  VAL B  77      21.701  -4.837  12.004  1.00 13.92      8HVP1505
+ATOM   1352  CG1 VAL B  77      21.869  -4.214  10.605  1.00 11.45      8HVP1506
+ATOM   1353  CG2 VAL B  77      22.529  -4.150  13.037  1.00  2.00      8HVP1507
+ATOM   1354  N   GLY B  78      19.006  -4.810  10.277  1.00  2.90      8HVP1508
+ATOM   1355  CA  GLY B  78      18.214  -5.472   9.211  1.00  4.68      8HVP1509
+ATOM   1356  C   GLY B  78      17.538  -4.397   8.411  1.00  6.60      8HVP1510
+ATOM   1357  O   GLY B  78      17.738  -3.235   8.784  1.00  7.71      8HVP1511
+ATOM   1358  N   PRO B  79      16.748  -4.834   7.447  1.00 10.98      8HVP1512
+ATOM   1359  CA  PRO B  79      16.106  -3.946   6.482  1.00 11.80      8HVP1513
+ATOM   1360  C   PRO B  79      14.924  -3.116   6.916  1.00 12.03      8HVP1514
+ATOM   1361  O   PRO B  79      13.925  -3.134   6.146  1.00 16.48      8HVP1515
+ATOM   1362  CB  PRO B  79      15.733  -4.828   5.284  1.00 12.17      8HVP1516
+ATOM   1363  CG  PRO B  79      15.721  -6.236   5.798  1.00 13.14      8HVP1517
+ATOM   1364  CD  PRO B  79      16.530  -6.238   7.088  1.00 13.32      8HVP1518
+ATOM   1365  N   THR B  80      15.064  -2.344   7.971  1.00  7.91      8HVP1519
+ATOM   1366  CA  THR B  80      14.035  -1.456   8.479  1.00  2.64      8HVP1520
+ATOM   1367  C   THR B  80      14.090  -0.056   7.858  1.00  3.39      8HVP1521
+ATOM   1368  O   THR B  80      15.170   0.550   7.674  1.00  3.04      8HVP1522
+ATOM   1369  CB  THR B  80      14.154  -1.226  10.073  1.00  2.00      8HVP1523
+ATOM   1370  OG1 THR B  80      13.256  -0.108  10.331  1.00  2.00      8HVP1524
+ATOM   1371  CG2 THR B  80      15.609  -0.947  10.440  1.00  4.77      8HVP1525
+ATOM   1372  N   PRO B  81      12.872   0.473   7.706  1.00  2.00      8HVP1526
+ATOM   1373  CA  PRO B  81      12.720   1.836   7.182  1.00  2.97      8HVP1527
+ATOM   1374  C   PRO B  81      12.849   2.875   8.255  1.00  5.81      8HVP1528
+ATOM   1375  O   PRO B  81      12.603   4.034   7.866  1.00 12.28      8HVP1529
+ATOM   1376  CB  PRO B  81      11.315   1.919   6.596  1.00  4.46      8HVP1530
+ATOM   1377  CG  PRO B  81      10.562   0.987   7.528  1.00  2.00      8HVP1531
+ATOM   1378  CD  PRO B  81      11.549  -0.103   7.918  1.00  2.00      8HVP1532
+ATOM   1379  N   VAL B  82      13.092   2.533   9.501  1.00  5.50      8HVP1533
+ATOM   1380  CA  VAL B  82      13.253   3.579  10.533  1.00  6.56      8HVP1534
+ATOM   1381  C   VAL B  82      14.035   2.859  11.635  1.00  4.46      8HVP1535
+ATOM   1382  O   VAL B  82      13.744   1.691  11.882  1.00  5.73      8HVP1536
+ATOM   1383  CB  VAL B  82      12.044   4.308  11.099  1.00  2.00      8HVP1537
+ATOM   1384  CG1 VAL B  82      12.390   5.643  11.816  1.00  2.00      8HVP1538
+ATOM   1385  CG2 VAL B  82      10.922   4.609  10.118  1.00 10.74      8HVP1539
+ATOM   1386  N   ASN B  83      15.039   3.560  12.122  1.00  4.25      8HVP1540
+ATOM   1387  CA  ASN B  83      15.747   2.886  13.236  1.00  3.60      8HVP1541
+ATOM   1388  C   ASN B  83      14.616   3.030  14.285  1.00  5.95      8HVP1542
+ATOM   1389  O   ASN B  83      13.973   4.099  14.278  1.00  4.66      8HVP1543
+ATOM   1390  CB  ASN B  83      16.995   3.589  13.654  1.00 12.96      8HVP1544
+ATOM   1391  CG  ASN B  83      17.932   3.864  12.499  1.00 11.76      8HVP1545
+ATOM   1392  OD1 ASN B  83      17.831   3.133  11.507  1.00 27.72      8HVP1546
+ATOM   1393  ND2 ASN B  83      18.820   4.811  12.706  1.00  2.00      8HVP1547
+ATOM   1394  N   ILE B  84      14.506   2.010  15.091  1.00  2.00      8HVP1548
+ATOM   1395  CA  ILE B  84      13.469   1.945  16.101  1.00  2.00      8HVP1549
+ATOM   1396  C   ILE B  84      14.169   1.483  17.373  1.00  2.00      8HVP1550
+ATOM   1397  O   ILE B  84      14.819   0.450  17.231  1.00  2.00      8HVP1551
+ATOM   1398  CB  ILE B  84      12.560   0.805  15.520  1.00  2.00      8HVP1552
+ATOM   1399  CG1 ILE B  84      11.828   1.260  14.244  1.00  2.00      8HVP1553
+ATOM   1400  CG2 ILE B  84      11.560   0.129  16.485  1.00  9.06      8HVP1554
+ATOM   1401  CD1 ILE B  84      11.228  -0.085  13.628  1.00  2.00      8HVP1555
+ATOM   1402  N   ILE B  85      13.901   2.146  18.451  1.00  2.34      8HVP1556
+ATOM   1403  CA  ILE B  85      14.252   1.917  19.851  1.00  2.00      8HVP1557
+ATOM   1404  C   ILE B  85      12.981   1.300  20.488  1.00  2.00      8HVP1558
+ATOM   1405  O   ILE B  85      11.935   1.937  20.654  1.00  2.00      8HVP1559
+ATOM   1406  CB  ILE B  85      14.685   3.237  20.568  1.00  3.01      8HVP1560
+ATOM   1407  CG1 ILE B  85      15.613   3.980  19.566  1.00  2.00      8HVP1561
+ATOM   1408  CG2 ILE B  85      15.401   3.184  21.944  1.00  2.00      8HVP1562
+ATOM   1409  CD1 ILE B  85      17.057   3.398  19.444  1.00  2.00      8HVP1563
+ATOM   1410  N   GLY B  86      12.996   0.016  20.615  1.00  2.00      8HVP1564
+ATOM   1411  CA  GLY B  86      12.066  -0.875  21.226  1.00  2.00      8HVP1565
+ATOM   1412  C   GLY B  86      12.290  -0.728  22.764  1.00  2.00      8HVP1566
+ATOM   1413  O   GLY B  86      13.261  -0.115  23.155  1.00  2.00      8HVP1567
+ATOM   1414  N   ARG B  87      11.345  -1.332  23.437  1.00  2.00      8HVP1568
+ATOM   1415  CA  ARG B  87      11.186  -1.347  24.865  1.00  2.00      8HVP1569
+ATOM   1416  C   ARG B  87      12.359  -1.917  25.601  1.00  2.00      8HVP1570
+ATOM   1417  O   ARG B  87      12.493  -1.592  26.770  1.00  4.46      8HVP1571
+ATOM   1418  CB  ARG B  87      10.035  -2.294  25.221  1.00  2.00      8HVP1572
+ATOM   1419  CG  ARG B  87       8.742  -1.723  24.711  1.00  5.98      8HVP1573
+ATOM   1420  CD  ARG B  87       7.604  -2.374  25.417  1.00 11.70      8HVP1574
+ATOM   1421  NE  ARG B  87       7.475  -3.756  24.962  1.00 15.15      8HVP1575
+ATOM   1422  CZ  ARG B  87       7.955  -4.754  25.720  1.00 20.65      8HVP1576
+ATOM   1423  NH1 ARG B  87       8.651  -4.475  26.828  1.00 28.10      8HVP1577
+ATOM   1424  NH2 ARG B  87       7.624  -6.024  25.457  1.00 15.02      8HVP1578
+ATOM   1425  N   ASN B  88      13.093  -2.801  24.933  1.00  5.56      8HVP1579
+ATOM   1426  CA  ASN B  88      14.222  -3.463  25.659  1.00  4.96      8HVP1580
+ATOM   1427  C   ASN B  88      15.090  -2.404  26.326  1.00  6.07      8HVP1581
+ATOM   1428  O   ASN B  88      15.400  -2.392  27.534  1.00  5.28      8HVP1582
+ATOM   1429  CB  ASN B  88      14.893  -4.460  24.738  1.00  2.00      8HVP1583
+ATOM   1430  CG  ASN B  88      15.562  -3.844  23.543  1.00  2.00      8HVP1584
+ATOM   1431  OD1 ASN B  88      15.159  -2.730  23.191  1.00  5.31      8HVP1585
+ATOM   1432  ND2 ASN B  88      16.597  -4.483  23.018  1.00  2.22      8HVP1586
+ATOM   1433  N   LEU B  89      15.549  -1.503  25.477  1.00  7.26      8HVP1587
+ATOM   1434  CA  LEU B  89      16.385  -0.373  25.908  1.00  7.41      8HVP1588
+ATOM   1435  C   LEU B  89      15.470   0.724  26.462  1.00  7.21      8HVP1589
+ATOM   1436  O   LEU B  89      15.891   1.509  27.298  1.00  5.18      8HVP1590
+ATOM   1437  CB  LEU B  89      17.200   0.132  24.722  1.00 14.09      8HVP1591
+ATOM   1438  CG  LEU B  89      18.234  -0.671  23.985  1.00 12.91      8HVP1592
+ATOM   1439  CD1 LEU B  89      19.037   0.253  23.055  1.00 15.14      8HVP1593
+ATOM   1440  CD2 LEU B  89      19.172  -1.343  24.974  1.00  7.63      8HVP1594
+ATOM   1441  N   LEU B  90      14.248   0.749  25.906  1.00 10.04      8HVP1595
+ATOM   1442  CA  LEU B  90      13.308   1.791  26.357  1.00 12.16      8HVP1596
+ATOM   1443  C   LEU B  90      13.197   1.796  27.879  1.00 10.68      8HVP1597
+ATOM   1444  O   LEU B  90      13.085   2.850  28.500  1.00 10.90      8HVP1598
+ATOM   1445  CB  LEU B  90      11.996   1.662  25.574  1.00 14.64      8HVP1599
+ATOM   1446  CG  LEU B  90      11.977   2.391  24.240  1.00  7.36      8HVP1600
+ATOM   1447  CD1 LEU B  90      10.753   2.062  23.390  1.00  3.51      8HVP1601
+ATOM   1448  CD2 LEU B  90      12.061   3.867  24.584  1.00  2.00      8HVP1602
+ATOM   1449  N   THR B  91      13.183   0.608  28.426  1.00  8.44      8HVP1603
+ATOM   1450  CA  THR B  91      13.022   0.350  29.861  1.00  3.22      8HVP1604
+ATOM   1451  C   THR B  91      14.275   0.740  30.603  1.00  9.25      8HVP1605
+ATOM   1452  O   THR B  91      14.263   0.702  31.862  1.00 14.93      8HVP1606
+ATOM   1453  CB  THR B  91      12.746  -1.177  29.952  1.00  2.00      8HVP1607
+ATOM   1454  OG1 THR B  91      11.985  -1.448  28.683  1.00  5.45      8HVP1608
+ATOM   1455  CG2 THR B  91      11.729  -1.671  30.939  1.00  2.00      8HVP1609
+ATOM   1456  N   GLN B  92      15.364   1.062  29.885  1.00  2.00      8HVP1610
+ATOM   1457  CA  GLN B  92      16.575   1.395  30.670  1.00  2.00      8HVP1611
+ATOM   1458  C   GLN B  92      16.828   2.837  30.989  1.00  2.00      8HVP1612
+ATOM   1459  O   GLN B  92      17.685   3.073  31.889  1.00  3.27      8HVP1613
+ATOM   1460  CB  GLN B  92      17.800   0.851  29.928  1.00  9.30      8HVP1614
+ATOM   1461  CG  GLN B  92      17.766  -0.674  29.945  1.00  9.24      8HVP1615
+ATOM   1462  CD  GLN B  92      19.084  -1.220  29.396  1.00 17.32      8HVP1616
+ATOM   1463  OE1 GLN B  92      19.908  -0.523  28.794  1.00 18.99      8HVP1617
+ATOM   1464  NE2 GLN B  92      19.167  -2.526  29.698  1.00  2.00      8HVP1618
+ATOM   1465  N   ILE B  93      16.165   3.783  30.406  1.00  2.00      8HVP1619
+ATOM   1466  CA  ILE B  93      16.302   5.224  30.521  1.00  2.00      8HVP1620
+ATOM   1467  C   ILE B  93      15.160   5.863  31.278  1.00  2.00      8HVP1621
+ATOM   1468  O   ILE B  93      15.193   7.088  31.545  1.00  2.00      8HVP1622
+ATOM   1469  CB  ILE B  93      16.471   5.800  29.054  1.00  2.81      8HVP1623
+ATOM   1470  CG1 ILE B  93      15.196   5.560  28.201  1.00 10.61      8HVP1624
+ATOM   1471  CG2 ILE B  93      17.720   5.198  28.360  1.00  5.18      8HVP1625
+ATOM   1472  CD1 ILE B  93      14.935   6.429  26.938  1.00  7.21      8HVP1626
+ATOM   1473  N   GLY B  94      14.276   4.993  31.747  1.00  2.00      8HVP1627
+ATOM   1474  CA  GLY B  94      13.153   5.380  32.551  1.00  2.98      8HVP1628
+ATOM   1475  C   GLY B  94      11.954   5.851  31.761  1.00  4.43      8HVP1629
+ATOM   1476  O   GLY B  94      11.159   6.610  32.386  1.00 12.15      8HVP1630
+HETATM 1477  N   ABA B  95      11.749   5.424  30.513  1.00  3.99      8HVP1631
+HETATM 1478  CA  ABA B  95      10.532   5.892  29.810  1.00  2.00      8HVP1632
+HETATM 1479  C   ABA B  95       9.235   5.310  30.389  1.00  2.00      8HVP1633
+HETATM 1480  O   ABA B  95       8.950   4.191  30.772  1.00  2.00      8HVP1634
+HETATM 1481  CB  ABA B  95      10.573   5.581  28.338  1.00 10.70      8HVP1635
+HETATM 1482  CG  ABA B  95       9.913   6.570  27.369  1.00  2.00      8HVP1636
+ATOM   1483  N   THR B  96       8.263   6.208  30.504  1.00  2.00      8HVP1637
+ATOM   1484  CA  THR B  96       6.873   5.955  30.799  1.00  2.00      8HVP1638
+ATOM   1485  C   THR B  96       6.069   6.697  29.685  1.00  2.00      8HVP1639
+ATOM   1486  O   THR B  96       6.618   7.581  29.045  1.00  2.00      8HVP1640
+ATOM   1487  CB  THR B  96       6.405   6.322  32.261  1.00  2.00      8HVP1641
+ATOM   1488  OG1 THR B  96       6.686   7.717  32.450  1.00  2.00      8HVP1642
+ATOM   1489  CG2 THR B  96       7.012   5.382  33.287  1.00  2.00      8HVP1643
+ATOM   1490  N   LEU B  97       4.837   6.299  29.600  1.00  2.00      8HVP1644
+ATOM   1491  CA  LEU B  97       3.747   6.890  28.809  1.00  2.76      8HVP1645
+ATOM   1492  C   LEU B  97       2.804   7.571  29.829  1.00  2.00      8HVP1646
+ATOM   1493  O   LEU B  97       2.468   6.848  30.824  1.00  2.00      8HVP1647
+ATOM   1494  CB  LEU B  97       3.007   5.780  28.010  1.00  7.78      8HVP1648
+ATOM   1495  CG  LEU B  97       3.760   5.169  26.836  1.00  9.06      8HVP1649
+ATOM   1496  CD1 LEU B  97       2.939   4.176  26.020  1.00  4.90      8HVP1650
+ATOM   1497  CD2 LEU B  97       4.171   6.292  25.876  1.00  5.54      8HVP1651
+ATOM   1498  N   ASN B  98       2.345   8.784  29.722  1.00  2.00      8HVP1652
+ATOM   1499  CA  ASN B  98       1.395   9.239  30.747  1.00  2.00      8HVP1653
+ATOM   1500  C   ASN B  98       0.195   9.892  30.048  1.00  2.00      8HVP1654
+ATOM   1501  O   ASN B  98       0.433  10.583  29.061  1.00  2.00      8HVP1655
+ATOM   1502  CB  ASN B  98       1.901  10.240  31.788  1.00  2.00      8HVP1656
+ATOM   1503  CG  ASN B  98       3.196   9.814  32.407  1.00  7.17      8HVP1657
+ATOM   1504  OD1 ASN B  98       4.049   9.299  31.687  1.00  2.00      8HVP1658
+ATOM   1505  ND2 ASN B  98       3.267  10.027  33.722  1.00 14.16      8HVP1659
+ATOM   1506  N   PHE B  99      -0.920   9.629  30.739  1.00  2.00      8HVP1660
+ATOM   1507  CA  PHE B  99      -2.145  10.323  30.261  1.00  2.68      8HVP1661
+ATOM   1508  C   PHE B  99      -3.092  10.586  31.440  1.00  6.92      8HVP1662
+ATOM   1509  O   PHE B  99      -3.744  11.659  31.306  1.00 14.80      8HVP1663
+ATOM   1510  CB  PHE B  99      -2.816   9.806  29.052  1.00  2.00      8HVP1664
+ATOM   1511  CG  PHE B  99      -3.328   8.440  29.015  1.00  2.45      8HVP1665
+ATOM   1512  CD1 PHE B  99      -2.473   7.420  28.560  1.00  2.00      8HVP1666
+ATOM   1513  CD2 PHE B  99      -4.636   8.153  29.387  1.00  3.88      8HVP1667
+ATOM   1514  CE1 PHE B  99      -2.895   6.105  28.428  1.00  2.00      8HVP1668
+ATOM   1515  CE2 PHE B  99      -5.101   6.833  29.243  1.00  7.55      8HVP1669
+ATOM   1516  CZ  PHE B  99      -4.222   5.848  28.780  1.00  2.00      8HVP1670
+ATOM   1517  OXT PHE B  99      -3.162   9.808  32.396  1.00  2.00      8HVP1671
+TER    1518      PHE B  99                                              8HVP1672
+ATOM   1519  N   VAL I   1       1.002   5.256   2.453  1.00 33.69      8HVP1673
+ATOM   1520  CA  VAL I   1       1.982   6.316   2.835  1.00 33.44      8HVP1674
+ATOM   1521  C   VAL I   1       1.924   6.584   4.348  1.00 28.16      8HVP1675
+ATOM   1522  O   VAL I   1       2.567   7.494   4.904  1.00 27.53      8HVP1676
+ATOM   1523  CB  VAL I   1       1.922   7.494   1.842  1.00 41.97      8HVP1677
+ATOM   1524  CG1 VAL I   1       0.853   8.555   2.110  1.00 45.29      8HVP1678
+ATOM   1525  CG2 VAL I   1       3.267   8.164   1.591  1.00 41.45      8HVP1679
+ATOM   1526  N   SER I   2       1.230   5.717   5.055  1.00 22.94      8HVP1680
+ATOM   1527  CA  SER I   2       1.138   5.731   6.519  1.00 18.94      8HVP1681
+ATOM   1528  C   SER I   2       1.996   4.575   7.075  1.00 18.27      8HVP1682
+ATOM   1529  O   SER I   2       1.709   3.452   6.587  1.00 18.68      8HVP1683
+ATOM   1530  CB  SER I   2      -0.342   5.585   6.864  1.00  4.46      8HVP1684
+ATOM   1531  OG  SER I   2      -0.461   4.626   7.920  1.00 10.63      8HVP1685
+ATOM   1532  N   GLN I   3       2.962   4.735   7.985  1.00 12.32      8HVP1686
+ATOM   1533  CA  GLN I   3       3.735   3.601   8.529  1.00 11.88      8HVP1687
+ATOM   1534  C   GLN I   3       3.378   3.262   9.996  1.00  7.48      8HVP1688
+ATOM   1535  O   GLN I   3       3.068   4.175  10.740  1.00  4.65      8HVP1689
+ATOM   1536  CB  GLN I   3       5.258   3.756   8.428  1.00 10.50      8HVP1690
+ATOM   1537  CG  GLN I   3       5.755   4.804   9.360  1.00 18.97      8HVP1691
+ATOM   1538  CD  GLN I   3       7.152   4.776   9.912  1.00 28.03      8HVP1692
+ATOM   1539  OE1 GLN I   3       7.500   5.754  10.608  1.00 22.68      8HVP1693
+ATOM   1540  NE2 GLN I   3       7.906   3.701   9.651  1.00 26.36      8HVP1694
+ATOM   1541  N   ASN I   4       3.464   1.991  10.356  1.00  6.40      8HVP1695
+ATOM   1542  CA  ASN I   4       3.154   1.352  11.620  1.00  5.82      8HVP1696
+ATOM   1543  C   ASN I   4       4.079   0.279  12.181  1.00  3.93      8HVP1697
+ATOM   1544  O   ASN I   4       3.939  -0.926  11.913  1.00  3.17      8HVP1698
+ATOM   1545  CB  ASN I   4       1.783   0.611  11.539  1.00  2.00      8HVP1699
+ATOM   1546  CG  ASN I   4       0.795   1.717  11.224  1.00 17.75      8HVP1700
+ATOM   1547  OD1 ASN I   4       0.393   1.852  10.076  1.00  2.00      8HVP1701
+ATOM   1548  ND2 ASN I   4       0.596   2.492  12.300  1.00 23.43      8HVP1702
+HETATM 1549  CD1 LOV I   5       6.990   1.440  11.323  1.00  2.00   1  8HVP1703
+HETATM 1550  CD2 LOV I   5       9.002   1.876  12.515  1.00 11.48   1  8HVP1704
+HETATM 1551  C1G LOV I   5       7.861   0.862  12.418  1.00  3.76   1  8HVP1705
+HETATM 1552  C1B LOV I   5       7.163   0.772  13.787  1.00  4.12   1  8HVP1706
+HETATM 1553  C1A LOV I   5       5.904  -0.101  13.814  1.00  2.00   1  8HVP1707
+HETATM 1554  CS  LOV I   5       5.465  -0.675  15.137  1.00  2.00   1  8HVP1708
+HETATM 1555  N   LOV I   5       4.881   0.722  13.126  1.00  6.07   1  8HVP1709
+HETATM 1556  OS  LOV I   5       5.369   0.252  16.190  1.00  2.00   1  8HVP1710
+HETATM 1557  CT  LOV I   5       4.310  -1.642  15.109  1.00  2.00   1  8HVP1711
+HETATM 1558  CA  LOV I   5       4.567  -2.908  15.940  1.00  5.81   1  8HVP1712
+HETATM 1559  CB  LOV I   5       3.269  -3.761  15.889  1.00  5.21   1  8HVP1713
+HETATM 1560  CG1 LOV I   5       3.478  -5.277  15.902  1.00  4.64   1  8HVP1714
+HETATM 1561  CG2 LOV I   5       2.342  -3.457  17.047  1.00 12.50   1  8HVP1715
+HETATM 1562  C   LOV I   5       5.830  -3.684  15.551  1.00  7.18   1  8HVP1716
+HETATM 1563  O   LOV I   5       6.281  -3.728  14.374  1.00 10.72   1  8HVP1717
+ATOM   1564  N   ILE I   7       6.427  -4.343  16.512  1.00  3.60      8HVP1718
+ATOM   1565  CA  ILE I   7       7.551  -5.249  16.261  1.00  6.98      8HVP1719
+ATOM   1566  C   ILE I   7       7.249  -6.445  17.181  1.00  7.58      8HVP1720
+ATOM   1567  O   ILE I   7       7.022  -6.186  18.373  1.00 10.06      8HVP1721
+ATOM   1568  CB  ILE I   7       9.010  -4.748  16.351  1.00  3.23      8HVP1722
+ATOM   1569  CG1 ILE I   7      10.003  -5.658  15.565  1.00  2.00      8HVP1723
+ATOM   1570  CG2 ILE I   7       9.628  -4.887  17.793  1.00  2.11      8HVP1724
+ATOM   1571  CD1 ILE I   7       9.341  -6.707  14.634  1.00 17.88      8HVP1725
+ATOM   1572  N   VAL I   8       7.164  -7.651  16.671  1.00  8.36      8HVP1726
+ATOM   1573  CA  VAL I   8       6.932  -8.756  17.647  1.00 14.10      8HVP1727
+ATOM   1574  C   VAL I   8       8.045  -9.788  17.397  1.00 17.57      8HVP1728
+ATOM   1575  O   VAL I   8       8.550  -9.863  16.247  1.00 18.47      8HVP1729
+ATOM   1576  CB  VAL I   8       5.477  -9.194  17.710  1.00 21.86      8HVP1730
+ATOM   1577  CG1 VAL I   8       4.575  -8.212  18.485  1.00 19.23      8HVP1731
+ATOM   1578  CG2 VAL I   8       4.801  -9.569  16.397  1.00 22.28      8HVP1732
+ATOM   1579  OXT VAL I   8       8.299 -10.540  18.365  1.00 10.36      8HVP1733
+TER    1580      VAL I   8                                              8HVP1734
+HETATM 1581  O   HOH A   1       5.216  -3.028  12.131  1.00  2.00      8HVP1735
+HETATM 1582  O   HOH A   2      10.614   1.714  32.700  1.00 39.80      8HVP1736
+HETATM 1583  O   HOH A   3      12.751  11.193  35.699  1.00 26.47      8HVP1737
+HETATM 1584  O   HOH A   4       5.396  -4.251  21.781  1.00  2.00      8HVP1738
+HETATM 1585  O   HOH A   5       5.258   7.699  15.004  1.00 40.55      8HVP1739
+HETATM 1586  O   HOH A   6      15.785  -6.670  20.952  1.00 20.52      8HVP1740
+HETATM 1587  O   HOH A   7      -5.291  -9.959  12.451  1.00  2.62      8HVP1741
+HETATM 1588  O   HOH A   8       0.869 -10.774  29.130  1.00 30.76      8HVP1742
+HETATM 1589  O   HOH A   9      -7.945 -10.283  10.294  1.00  9.96      8HVP1743
+HETATM 1590  O   HOH A  10      12.572   7.951  34.543  1.00  7.75      8HVP1744
+HETATM 1591  O   HOH A  11      25.971 -12.456  20.059  1.00  3.45      8HVP1745
+HETATM 1592  O   HOH A  12      -4.522  -6.762   2.552  1.00 22.74      8HVP1746
+HETATM 1593  O   HOH A  13       4.817  12.754  11.271  1.00 28.72      8HVP1747
+HETATM 1594  O   HOH A  14      22.399   1.048   5.265  1.00 18.11      8HVP1748
+HETATM 1595  O   HOH A  15       9.276  10.929  33.431  1.00 20.98      8HVP1749
+HETATM 1596  O   HOH A  16       5.249  -8.716  27.053  1.00  3.47      8HVP1750
+HETATM 1597  O   HOH A  17     -19.140   7.249  24.019  1.00 25.75      8HVP1751
+HETATM 1598  O   HOH A  18      11.323  -9.792  22.237  1.00 47.47      8HVP1752
+HETATM 1599  O   HOH A  19      12.200  -6.659  25.388  1.00 45.69      8HVP1753
+HETATM 1600  O   HOH A  20     -12.130   2.034  28.265  1.00 13.33      8HVP1754
+HETATM 1601  O   HOH A  21      19.984  -7.962   8.693  1.00 36.85      8HVP1755
+HETATM 1602  O   HOH A  22     -17.280   5.660  23.688  1.00  8.42      8HVP1756
+HETATM 1603  O   HOH A  23      18.021 -17.711  15.693  1.00  8.89      8HVP1757
+HETATM 1604  O   HOH A  24      -3.849  14.080  15.702  1.00 23.80      8HVP1758
+HETATM 1605  O   HOH A  25      -7.116  -7.821  21.203  1.00 31.21      8HVP1759
+HETATM 1606  O   HOH A  26      -6.504   5.609  11.945  1.00  2.67      8HVP1760
+HETATM 1607  O   HOH A  27      -7.990  10.695  13.759  1.00  2.00      8HVP1761
+HETATM 1608  O   HOH A  28     -23.308   0.870  21.029  1.00 32.65      8HVP1762
+HETATM 1609  O   HOH A  29      11.743  14.669  35.485  1.00 30.26      8HVP1763
+HETATM 1610  O   HOH A  30       9.104  -9.450   4.424  1.00 12.53      8HVP1764
+HETATM 1611  O   HOH A  31       5.681   5.771  12.682  1.00 11.19      8HVP1765
+HETATM 1612  O   HOH A  32     -12.526  12.035  28.606  1.00 15.53      8HVP1766
+HETATM 1613  O   HOH A  33       3.280  -7.615   1.460  1.00 27.88      8HVP1767
+HETATM 1614  O   HOH A  34      12.685  15.267  15.689  1.00 21.25      8HVP1768
+HETATM 1615  O   HOH A  35     -21.369   4.254  23.436  1.00 21.59      8HVP1769
+HETATM 1616  O   HOH A  36     -16.670   6.393   7.864  1.00  8.51      8HVP1770
+HETATM 1617  O   HOH A  37      18.486  -6.156  25.609  1.00 17.82      8HVP1771
+HETATM 1618  O   HOH A  38      17.557  14.261  29.659  1.00 10.62      8HVP1772
+HETATM 1619  O   HOH A  39      -8.192 -12.788  12.929  1.00 16.07      8HVP1773
+HETATM 1620  O   HOH A  40      -8.920  -9.751  22.041  1.00 17.34      8HVP1774
+HETATM 1621  O   HOH A  41      -4.431   3.460  -1.908  1.00 38.28      8HVP1775
+HETATM 1622  O   HOH A  42      -5.066   7.197  37.048  1.00 17.03      8HVP1776
+HETATM 1623  O   HOH A  43       9.683 -12.586  18.588  1.00 25.99      8HVP1777
+HETATM 1624  O   HOH A  44       0.851  13.123  31.812  1.00  2.00      8HVP1778
+HETATM 1625  O   HOH A  45      34.321  -8.096  15.559  1.00  2.00      8HVP1779
+HETATM 1626  O   HOH A  46     -13.318   4.972   4.869  1.00  2.00      8HVP1780
+HETATM 1627  O   HOH A  47      -5.729  16.438  24.228  1.00  2.00      8HVP1781
+HETATM 1628  O   HOH A  48      27.210   9.667  11.528  1.00  2.00      8HVP1782
+HETATM 1629  O   HOH A  49     -15.949   0.283   0.917  1.00  2.00      8HVP1783
+HETATM 1630  O   HOH A  50      -7.424   8.402  35.088  1.00  6.93      8HVP1784
+HETATM 1631  O   HOH A  51       3.081  15.236  11.693  1.00  7.66      8HVP1785
+HETATM 1632  O   HOH A  52     -20.129   3.488  21.314  1.00  8.95      8HVP1786
+HETATM 1633  O   HOH A  53       2.983  -9.210  35.762  1.00 12.16      8HVP1787
+HETATM 1634  O   HOH A  54      22.227  -8.499  25.853  1.00 12.87      8HVP1788
+HETATM 1635  O   HOH A  55      10.098   8.441   9.848  1.00 14.24      8HVP1789
+HETATM 1636  O   HOH A  56       6.471 -10.064  28.863  1.00 16.97      8HVP1790
+HETATM 1637  O   HOH A  57       3.437 -11.087  26.858  1.00 17.91      8HVP1791
+HETATM 1638  O   HOH A  58      11.363  14.729  19.108  1.00 19.73      8HVP1792
+HETATM 1639  O   HOH A  59      -6.962  11.176  11.506  1.00 20.82      8HVP1793
+HETATM 1640  O   HOH A  60       6.623  16.089  10.621  1.00 22.95      8HVP1794
+HETATM 1641  O   HOH A  61       6.668 -11.009  25.008  1.00 22.95      8HVP1795
+HETATM 1642  O   HOH A  62       9.364  14.464  14.156  1.00 23.28      8HVP1796
+HETATM 1643  O   HOH A  63      -2.980   4.497   0.672  1.00 24.40      8HVP1797
+HETATM 1644  O   HOH A  64      22.290  10.937  22.020  1.00 24.99      8HVP1798
+HETATM 1645  O   HOH A  65      23.350   9.385  11.027  1.00 25.21      8HVP1799
+HETATM 1646  O   HOH A  66     -18.687  10.603   9.135  1.00 25.85      8HVP1800
+HETATM 1647  O   HOH A  67      25.618   7.811  33.389  1.00 26.92      8HVP1801
+HETATM 1648  O   HOH A  68      28.885  -6.845  11.462  1.00 28.02      8HVP1802
+HETATM 1649  O   HOH A  69     -23.158   4.537  25.077  1.00 29.77      8HVP1803
+HETATM 1650  O   HOH A  70      -2.599  14.244  31.864  1.00 32.04      8HVP1804
+HETATM 1651  O   HOH A  71      -0.659   9.877  12.451  1.00 34.04      8HVP1805
+HETATM 1652  O   HOH A  72       7.949  12.768  10.878  1.00 34.69      8HVP1806
+HETATM 1653  O   HOH A  73      10.081  15.243   9.186  1.00 37.44      8HVP1807
+HETATM 1654  O   HOH A  74     -19.436   8.318  11.246  1.00 38.80      8HVP1808
+HETATM 1655  O   HOH A  75       2.184  -8.186  27.493  1.00 42.83      8HVP1809
+HETATM 1656  O   HOH A  76      22.220 -16.223   7.085  1.00 44.72      8HVP1810
+HETATM 1657  O   HOH A  77     -23.272   4.114  21.546  1.00 47.20      8HVP1811
+HETATM 1658  O   HOH A  78      22.039 -14.292  24.350  1.00 52.48      8HVP1812
+HETATM 1659  O   HOH A  79       0.071  -8.208  34.109  1.00 56.14      8HVP1813
+HETATM 1660  O   HOH A  80      11.280  -7.333  31.371  1.00 59.48      8HVP1814
+CONECT  514  513  515  520                                              8HVP1815
+CONECT  520  514  521                                                   8HVP1816
+CONECT  521  520  522  524                                              8HVP1817
+CONECT  522  521  523  526                                              8HVP1818
+CONECT  523  522                                                        8HVP1819
+CONECT  524  521                                                        8HVP1820
+CONECT  525  524                                                        8HVP1821
+CONECT  526  522  527                                                   8HVP1822
+CONECT  716  715  717  718                                              8HVP1823
+CONECT  718  716  719                                                   8HVP1824
+CONECT  719  718  720  722                                              8HVP1825
+CONECT  720  719  721  724                                              8HVP1826
+CONECT  721  720                                                        8HVP1827
+CONECT  722  719                                                        8HVP1828
+CONECT  723  722                                                        8HVP1829
+CONECT  724  720  725                                                   8HVP1830
+CONECT 1273 1272 1274 1279                                              8HVP1831
+CONECT 1279 1273 1280                                                   8HVP1832
+CONECT 1280 1279 1281 1283                                              8HVP1833
+CONECT 1281 1280 1282 1285                                              8HVP1834
+CONECT 1282 1281                                                        8HVP1835
+CONECT 1283 1280                                                        8HVP1836
+CONECT 1284 1283                                                        8HVP1837
+CONECT 1285 1281 1286                                                   8HVP1838
+CONECT 1475 1474 1476 1477                                              8HVP1839
+CONECT 1477 1475 1478                                                   8HVP1840
+CONECT 1478 1477 1479 1481                                              8HVP1841
+CONECT 1479 1478 1480 1483                                              8HVP1842
+CONECT 1480 1479                                                        8HVP1843
+CONECT 1481 1478                                                        8HVP1844
+CONECT 1482 1481                                                        8HVP1845
+CONECT 1483 1479 1484                                                   8HVP1846
+CONECT 1543 1542 1544 1555                                              8HVP1847
+CONECT 1549 1551                                                        8HVP1848
+CONECT 1550 1551                                                        8HVP1849
+CONECT 1551 1549 1550 1552                                              8HVP1850
+CONECT 1552 1551 1553                                                   8HVP1851
+CONECT 1553 1552 1554 1555                                              8HVP1852
+CONECT 1554 1553 1556 1557                                              8HVP1853
+CONECT 1555 1543 1553                                                   8HVP1854
+CONECT 1556 1554                                                        8HVP1855
+CONECT 1557 1554 1558                                                   8HVP1856
+CONECT 1558 1557 1559 1562                                              8HVP1857
+CONECT 1559 1558 1560 1561                                              8HVP1858
+CONECT 1560 1559                                                        8HVP1859
+CONECT 1561 1559                                                        8HVP1860
+CONECT 1562 1558 1563 1564                                              8HVP1861
+CONECT 1563 1562                                                        8HVP1862
+CONECT 1564 1562 1565                                                   8HVP1863
+MASTER       71    3    5    2   20    6    2    6 1657    3   49   17  8HVP1864
+END                                                                     8HVP1865

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AAC12660.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AAC12660.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AAC12660.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+>AAC12660 TAK1 binding protein [Homo sapiens]. 
+MAAQRRSLLQSEQQPSWTDDLPLCHLSGVGSASNRSYSADGKGTESHPPEDSWLKFRSEN
+NCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLES
+IDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYV
+ANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQEST
+RRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAH
+GPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDRVKRIHSDTFASGGERARFCPRHEDMTL
+LVRNFGYPLGEMSQPTPSPAPAAGGRVYPVSVPYSSAQSTSKTSVTLSLVMPSQGQMVNG
+AHSASTLDEATPTLTNQSPTLTLQSTNTHTQSSSSSSDGGLFRSRPAHSLPPGEDGRVEP
+YVDFAEFYRLWSVDHGEQSVVTAP

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AB077698.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AB077698.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AB077698.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+LOCUS       AB077698                2701 bp    mRNA    linear   PRI 01-MAR-2002
+DEFINITION  Homo sapiens mRNA for hCHCR-G, complete cds.
+ACCESSION   AB077698
+VERSION     AB077698.1  GI:19032344
+KEYWORDS    .
+SOURCE      Homo sapiens cDNA to mRNA.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1
+  AUTHORS   Squillace,R.M., Chenault,D.M. and Wang,E.H.
+  TITLE     Inhibition of myogenesis by the novel Muscleblind-related protein
+            CHCR
+  JOURNAL   Unpublished
+REFERENCE   2
+  AUTHORS   Squillace,R.M. and Wang,E.H.
+  TITLE     Genomic structure, chromosomal localization, and splicing variation
+            of the human CHCR gene, cloning and characterization of mouse CHCR
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 2701)
+  AUTHORS   Squillace,R.M., Chenault,D.M. and Wang,E.H.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (10-JAN-2002) Edith H. Wang, University of Washington,
+            Pharmacology; 1959 NE Pacific Ave., Box 357280, Seattle, Washington
+            98195, USA (E-mail:ehwang at u.washington.edu, Tel:206-616-5376,
+            Fax:206-685-3822)
+FEATURES             Location/Qualifiers
+     source          1..2701
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /chromosome="X"
+                     /map="Xq26.1"
+     gene            1..2701
+                     /gene="CHCR"
+     5'UTR           <1..79
+                     /gene="CHCR"
+     CDS             80..1144
+                     /gene="CHCR"
+                     /note="Cys3His CCG1-Required
+                     Encoded on BAC clone RP5-842K24 (AL050310)
+                     The human CHCR (Cys3His CCG1-Required) protein is highly
+                     related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL
+                     (NM_005757,AF061261), which together comprise the human
+                     Muscleblind family"
+                     /codon_start=1
+                     /product="hCHCR-G"
+                     /protein_id="BAB85648.1"
+                     /db_xref="GI:19032345"
+                     /translation="MTAVNVALIRDTKWLTLEVCREFQRGTCSRADADCKFAHPPRVC
+                     HVENGRVVACFDSLKGRCTRENCKYLHPPPHLKTQLEINGRNNLIQQKTAAAMFAQQM
+                     QLMLQNAQMSSLGSFPMTPSIPANPPMAFNPYIPHPGMGLVPAELVPNTPVLIPGNPP
+                     LAMPGAVGPKLMRSDKLEVCREFQRGNCTRGENDCRYAHPTDASMIEASDNTVTICMD
+                     YIKGRCSREKCKYFHPPAHLQARLKAAHHQMNHSAASAMALQPGTLQLIPKRSALEKP
+                     NGATPVFNPTVFHCQQALTNLQLPQPAFIPAGPILCMAPASNIVPMMHGATPTTVSAA
+                     TTPATSVPFAAPTTGNQLKF"
+     misc_feature    137..196
+                     /gene="CHCR"
+                     /note="Cys3His, zinc finger
+                     Encoded on BAC clone RP5-842K24 (AL050310)"
+     misc_feature    239..292
+                     /gene="CHCR"
+                     /note="Cys3His, zinc finger
+                     Encoded on BAC clone RP5-842K24 (AL050310)"
+     misc_feature    617..676
+                     /gene="CHCR"
+                     /note="Cys3His, zinc finger
+                     Encoded on BAC clone RP5-842K24 (AL050310)"
+     misc_feature    725..778
+                     /gene="CHCR"
+                     /note="Cys3His, zinc finger
+                     Encoded on BAC clone RP5-842K24 (AL050310)"
+     3'UTR           1145..2659
+                     /gene="CHCR"
+     polyA_site      1606
+                     /gene="CHCR"
+                     /note="Encoded on BAC clone RP5-842K24 (AL050310);
+                     PolyA_site#1 used by CHCR EST clone PLACE1010202
+                     (AK002178)"
+     polyA_site      2660
+                     /gene="CHCR"
+                     /note="Encoded on BAC clone RP5-842K24 (AL050310);
+                     PolyA_site#2 used by CHCR EST clone DKFZp434G2222
+                     (AL133625)"
+BASE COUNT      817 a    570 c    525 g    789 t
+ORIGIN      
+        1 aattcatttt taatccttta atagtccaca gtaatattgt cctaaagagg gtacattgga
+       61 ttttaatttt gctttcaata tgacggctgt caatgttgcc ctgattcgtg ataccaagtg
+      121 gctgacttta gaagtctgta gagaatttca gagaggaact tgctctcgag ctgatgcaga
+      181 ttgcaagttt gcccatccac caagagtttg ccatgtggaa aatggtcgtg tggtggcctg
+      241 ttttgattct ctaaagggtc ggtgtacccg agagaactgc aagtaccttc accctcctcc
+      301 acacttaaaa acgcagctgg agattaatgg gcggaacaat ctgattcaac agaagactgc
+      361 cgcagccatg ttcgcccagc agatgcagct tatgctccaa aacgctcaaa tgtcatcact
+      421 tggttctttt cctatgactc catcaattcc agctaatcct cccatggctt tcaatcctta
+      481 cataccacat cctgggatgg gcctcgttcc tgcagaactt gtaccaaata cacctgttct
+      541 gattcctgga aacccacctc ttgcaatgcc aggagctgtt ggcccaaaac tgatgcgttc
+      601 agataaactg gaggtttgcc gagaatttca gcgtggaaat tgtacccgtg gggagaatga
+      661 ttgccgctat gctcacccta ctgatgcttc catgattgaa gcgagtgata atactgtgac
+      721 aatctgcatg gattacatca aaggtcgatg ctcgcgggag aaatgcaagt actttcatcc
+      781 tcctgcacac ttgcaagcca gactcaaggc agctcatcat cagatgaacc attcagctgc
+      841 ctctgccatg gccctgcagc ctggtacact gcaactgata ccaaagagat cagcactgga
+      901 aaagcccaat ggtgccaccc cggtctttaa tcccactgtt ttccactgcc aacaggctct
+      961 gactaacctg cagctcccac agccggcatt tatccctgca gggccaatac tgtgcatggc
+     1021 acccgcttca aatattgtgc ccatgatgca cggtgctaca cctaccactg tgtctgcagc
+     1081 aacaacacct gccaccagcg ttccgttcgc tgcaccaact acaggcaatc agctgaaatt
+     1141 ctgaacagca gagttatgga gtatcagaat ctttccatgg aaacctccat atggcctttc
+     1201 tatatatatt ctcgtatgtc ttattctacc aacacaacaa taagcgtgtt gcagtcaatg
+     1261 tattaagcaa agcaaacctg ccagccagca aattcaaata aaaaataaag cattaaaaat
+     1321 caatggagat gttaaaacaa cacaaataga aaactagtaa ctaccatcca tcctatttga
+     1381 attatcaagc agaacatgac cataaaattt ggtaacttgt tacattactc tttgtgattt
+     1441 tctaataacc atgctaagtg tatttccaca gtgagctttt ggcttactat atacattctt
+     1501 ggtggataaa ttgttcatct gtttttgaag tgttacctta ctattttgtt tacaagatag
+     1561 tctattgggt tgattcagga tgtaacaaat atattcagta ccatttcttg tgttgtattg
+     1621 tgttgtgctg tgttaggttt ttacatactg tagtgttttg ctgtatatgt gtggtgtttg
+     1681 atttcaacta aagtgttatt agtggggaac agaagtatat gtgcttaaga acatgacagg
+     1741 ttcatgcaaa tatgctctct ttctttagaa tatttctgta ggtttcttgg gactgacatt
+     1801 taaaacgcct cacttttgaa tgtgcacaaa acctgctcat taacatgcat gtgtataatt
+     1861 tgtacctgca gatctgatgt tgcataatac aatcaaatta ctagattttt taaagagaga
+     1921 aataattacc tgcacaaagc agagaacttc ataaaacatt aacccctaat tcactcttct
+     1981 taaatagctt ggcaaataag actttacctt taaatgaatt tctcagcatt tatactaaaa
+     2041 attatgtaac gtgctcatta gattttttgt gtgtgtggct tgagaatccc atctcctaaa
+     2101 ttgagtgtct aaaactgagc catttgtcat cttcagctga gaaactggta cttgggagct
+     2161 taaaaatatg ctaattacaa gttataaatc aaacggagag atgggggcat ggagatagtt
+     2221 tttacgtact ggaggaaagt gtgtaaaacc atggcaatgt caccttttac acaaatgcca
+     2281 ttttccaaat gcaaatggct catgctcttt agactactct ttgaataaca agtaagatgc
+     2341 aatctagcaa aagtcagtca gggtgaaaga gaattggttg caaatgagga cttccctccc
+     2401 caaatggaca gtcttctctg ttgatcacag agggagcctg agtacaggct tggagaaatg
+     2461 gctaggacag ggaacaggga agcacttaca attattcctt gatttattca aaagaactgg
+     2521 gaaagatggt tgtagttgtc tttagcttcg gttcaactga gtttcgtttt gttaaacagt
+     2581 tcagtgaagg agaaagcacc tgtgatatat ggcaagtgtc cccctgccca aactttaaca
+     2641 tcagaccctc tcacatcata aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa
+     2701 a
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AE003528_ecoli.bls
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AE003528_ecoli.bls	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AE003528_ecoli.bls	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,260 @@
+BLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= AE003528 Drosophila melanogaster genomic scaffold
+142000013386050 section 40 of 54, complete sequence.
+         (283,821 letters)
+
+Database: ecoli.nt
+           400 sequences; 4,662,239 total letters
+
+Searching.................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|AE000450.1|AE000450 Escherichia coli K-12 MG1655 section 340 ...    60  1e-06
+gb|AE000359.1|AE000359 Escherichia coli K-12 MG1655 section 249 ...    48  0.004
+gb|AE000281.1|AE000281 Escherichia coli K-12 MG1655 section 171 ...    40  1.1
+gb|AE000274.1|AE000274 Escherichia coli K-12 MG1655 section 164 ...    40  1.1
+gb|AE000117.1|AE000117 Escherichia coli K-12 MG1655 section 7 of...    40  1.1
+gb|AE000502.1|AE000502 Escherichia coli K-12 MG1655 section 392 ...    38  4.3
+gb|AE000454.1|AE000454 Escherichia coli K-12 MG1655 section 344 ...    38  4.3
+gb|AE000443.1|AE000443 Escherichia coli K-12 MG1655 section 333 ...    38  4.3
+gb|AE000404.1|AE000404 Escherichia coli K-12 MG1655 section 294 ...    38  4.3
+gb|AE000369.1|AE000369 Escherichia coli K-12 MG1655 section 259 ...    38  4.3
+gb|AE000287.1|AE000287 Escherichia coli K-12 MG1655 section 177 ...    38  4.3
+gb|AE000283.1|AE000283 Escherichia coli K-12 MG1655 section 173 ...    38  4.3
+gb|AE000253.1|AE000253 Escherichia coli K-12 MG1655 section 143 ...    38  4.3
+gb|AE000201.1|AE000201 Escherichia coli K-12 MG1655 section 91 o...    38  4.3
+
+>gb|AE000450.1|AE000450 Escherichia coli K-12 MG1655 section 340 of 400 of the complete genome
+          Length = 11414
+
+ Score = 60.0 bits (30), Expect = 1e-06
+ Identities = 36/38 (94%)
+ Strand = Plus / Minus
+
+                                                   
+Query: 79116 gacatcatcgccattctgggaatggatgaactgtctga 79153
+             |||||||||||||| ||||| |||||||||||||||||
+Sbjct: 4712  gacatcatcgccatcctgggtatggatgaactgtctga 4675
+
+
+ Score = 40.1 bits (20), Expect = 1.1
+ Identities = 23/24 (95%)
+ Strand = Plus / Minus
+
+                                     
+Query: 78617 tggccagatgaacgagcccccggg 78640
+             |||||||||||||||||| |||||
+Sbjct: 5208  tggccagatgaacgagccgccggg 5185
+
+
+>gb|AE000359.1|AE000359 Escherichia coli K-12 MG1655 section 249 of 400 of the complete genome
+          Length = 11001
+
+ Score = 48.1 bits (24), Expect = 0.004
+ Identities = 24/24 (100%)
+ Strand = Plus / Minus
+
+                                      
+Query: 193000 tgttgctgctgttgcagattgctg 193023
+              ||||||||||||||||||||||||
+Sbjct: 10696  tgttgctgctgttgcagattgctg 10673
+
+
+>gb|AE000281.1|AE000281 Escherichia coli K-12 MG1655 section 171 of 400 of the complete genome
+          Length = 11855
+
+ Score = 40.1 bits (20), Expect = 1.1
+ Identities = 23/24 (95%)
+ Strand = Plus / Plus
+
+                                      
+Query: 211974 ataaatatgtgcaccattagtaac 211997
+              |||||||||||| |||||||||||
+Sbjct: 3068   ataaatatgtgcgccattagtaac 3091
+
+
+>gb|AE000274.1|AE000274 Escherichia coli K-12 MG1655 section 164 of 400 of the complete genome
+          Length = 13793
+
+ Score = 40.1 bits (20), Expect = 1.1
+ Identities = 23/24 (95%)
+ Strand = Plus / Plus
+
+                                      
+Query: 227803 ctggagatgctggaaatgctcact 227826
+              |||||||||||||||||| |||||
+Sbjct: 3532   ctggagatgctggaaatggtcact 3555
+
+
+>gb|AE000117.1|AE000117 Escherichia coli K-12 MG1655 section 7 of 400 of the complete genome
+          Length = 13416
+
+ Score = 40.1 bits (20), Expect = 1.1
+ Identities = 20/20 (100%)
+ Strand = Plus / Minus
+
+                                
+Query: 2754 gctgctgctgttgctgccac 2773
+            ||||||||||||||||||||
+Sbjct: 3441 gctgctgctgttgctgccac 3422
+
+
+>gb|AE000502.1|AE000502 Escherichia coli K-12 MG1655 section 392 of 400 of the complete genome
+          Length = 11313
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 22/23 (95%)
+ Strand = Plus / Plus
+
+                                     
+Query: 171913 ttttatgtagattttacttgtta 171935
+              |||||||| ||||||||||||||
+Sbjct: 428    ttttatgttgattttacttgtta 450
+
+
+>gb|AE000454.1|AE000454 Escherichia coli K-12 MG1655 section 344 of 400 of the complete genome
+          Length = 12175
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Minus
+
+                                 
+Query: 176663 agacaaatttatgagcgtt 176681
+              |||||||||||||||||||
+Sbjct: 9769   agacaaatttatgagcgtt 9751
+
+
+>gb|AE000443.1|AE000443 Escherichia coli K-12 MG1655 section 333 of 400 of the complete genome
+          Length = 11577
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Plus
+
+                                 
+Query: 160348 gcatttgttgtttgcggac 160366
+              |||||||||||||||||||
+Sbjct: 8826   gcatttgttgtttgcggac 8844
+
+
+>gb|AE000404.1|AE000404 Escherichia coli K-12 MG1655 section 294 of 400 of the complete genome
+          Length = 14000
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Minus
+
+                                 
+Query: 193629 ttagcgaccaccacgtcgg 193647
+              |||||||||||||||||||
+Sbjct: 13496  ttagcgaccaccacgtcgg 13478
+
+
+>gb|AE000369.1|AE000369 Escherichia coli K-12 MG1655 section 259 of 400 of the complete genome
+          Length = 9720
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 22/23 (95%)
+ Strand = Plus / Minus
+
+                                    
+Query: 50797 catcaatattattgaatatttca 50819
+             ||||| |||||||||||||||||
+Sbjct: 869   catcactattattgaatatttca 847
+
+
+>gb|AE000287.1|AE000287 Escherichia coli K-12 MG1655 section 177 of 400 of the complete genome
+          Length = 10876
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Plus
+
+                                
+Query: 94068 tcaccagccagccgctgcc 94086
+             |||||||||||||||||||
+Sbjct: 443   tcaccagccagccgctgcc 461
+
+
+>gb|AE000283.1|AE000283 Escherichia coli K-12 MG1655 section 173 of 400 of the complete genome
+          Length = 10857
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Minus
+
+                                 
+Query: 104663 acgttagcggcactgactc 104681
+              |||||||||||||||||||
+Sbjct: 893    acgttagcggcactgactc 875
+
+
+>gb|AE000253.1|AE000253 Escherichia coli K-12 MG1655 section 143 of 400 of the complete genome
+          Length = 10582
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Minus
+
+                                 
+Query: 191578 cgttgaaaatggaaatctt 191596
+              |||||||||||||||||||
+Sbjct: 2595   cgttgaaaatggaaatctt 2577
+
+
+>gb|AE000201.1|AE000201 Escherichia coli K-12 MG1655 section 91 of 400 of the complete genome
+          Length = 11275
+
+ Score = 38.2 bits (19), Expect = 4.3
+ Identities = 19/19 (100%)
+ Strand = Plus / Plus
+
+                                 
+Query: 249230 tggctgctgctccagttgt 249248
+              |||||||||||||||||||
+Sbjct: 2297   tggctgctgctccagttgt 2315
+
+
+  Database: ecoli.nt
+    Posted date:  Jun 14, 2001  3:27 PM
+  Number of letters in database: 4,662,239
+  Number of sequences in database:  400
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 592338
+Number of Sequences: 400
+Number of extensions: 592338
+Number of successful extensions: 42599
+Number of sequences better than 10.0: 14
+length of query: 283821
+length of database: 4,662,239
+effective HSP length: 20
+effective length of query: 283801
+effective length of database: 4,654,239
+effective search space: 1320877682439
+effective search space used: 1320877682439
+T: 0
+A: 0
+X1: 6 (11.9 bits)
+X2: 10 (19.8 bits)
+S1: 12 (24.3 bits)
+S2: 19 (38.2 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AE003644_Adh-genomic.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AE003644_Adh-genomic.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AE003644_Adh-genomic.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4917 @@
+LOCUS       AE003644              263309 bp    DNA     linear   INV 14-FEB-2003
+DEFINITION  Drosophila melanogaster chromosome 2L section 53 of 83 of the
+            complete sequence.
+ACCESSION   AE003644 AE002690 AE014134
+VERSION     AE003644.3  GI:22946496
+KEYWORDS    .
+SOURCE      Drosophila melanogaster (fruit fly)
+  ORGANISM  Drosophila melanogaster
+            Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota;
+            Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha;
+            Ephydroidea; Drosophilidae; Drosophila.
+REFERENCE   1  (bases 1 to 263309)
+  AUTHORS   Adams,M.D., Celniker,S.E., Holt,R.A., Evans,C.A., Gocayne,J.D.,
+            Amanatides,P.G., Scherer,S.E., Li,P.W., Hoskins,R.A., Galle,R.F.,
+            George,R.A., Lewis,S.E., Richards,S., Ashburner,M., Henderson,S.N.,
+            Sutton,G.G., Wortman,J.R., Yandell,M.D., Zhang,Q., Chen,L.X.,
+            Brandon,R.C., Rogers,Y.H., Blazej,R.G., Champe,M., Pfeiffer,B.D.,
+            Wan,K.H., Doyle,C., Baxter,E.G., Helt,G., Nelson,C.R., Gabor,G.L.,
+            Abril,J.F., Agbayani,A., An,H.J., Andrews-Pfannkoch,C., Baldwin,D.,
+            Ballew,R.M., Basu,A., Baxendale,J., Bayraktaroglu,L., Beasley,E.M.,
+            Beeson,K.Y., Benos,P.V., Berman,B.P., Bhandari,D., Bolshakov,S.,
+            Borkova,D., Botchan,M.R., Bouck,J., Brokstein,P., Brottier,P.,
+            Burtis,K.C., Busam,D.A., Butler,H., Cadieu,E., Center,A.,
+            Chandra,I., Cherry,J.M., Cawley,S., Dahlke,C., Davenport,L.B.,
+            Davies,P., de Pablos,B., Delcher,A., Deng,Z., Mays,A.D., Dew,I.,
+            Dietz,S.M., Dodson,K., Doup,L.E., Downes,M., Dugan-Rocha,S.,
+            Dunkov,B.C., Dunn,P., Durbin,K.J., Evangelista,C.C., Ferraz,C.,
+            Ferriera,S., Fleischmann,W., Fosler,C., Gabrielian,A.E., Garg,N.S.,
+            Gelbart,W.M., Glasser,K., Glodek,A., Gong,F., Gorrell,J.H., Gu,Z.,
+            Guan,P., Harris,M., Harris,N.L., Harvey,D., Heiman,T.J.,
+            Hernandez,J.R., Houck,J., Hostin,D., Houston,K.A., Howland,T.J.,
+            Wei,M.H., Ibegwam,C., Jalali,M., Kalush,F., Karpen,G.H., Ke,Z.,
+            Kennison,J.A., Ketchum,K.A., Kimmel,B.E., Kodira,C.D., Kraft,C.,
+            Kravitz,S., Kulp,D., Lai,Z., Lasko,P., Lei,Y., Levitsky,A.A.,
+            Li,J., Li,Z., Liang,Y., Lin,X., Liu,X., Mattei,B., McIntosh,T.C.,
+            McLeod,M.P., McPherson,D., Merkulov,G., Milshina,N.V., Mobarry,C.,
+            Morris,J., Moshrefi,A., Mount,S.M., Moy,M., Murphy,B., Murphy,L.,
+            Muzny,D.M., Nelson,D.L., Nelson,D.R., Nelson,K.A., Nixon,K.,
+            Nusskern,D.R., Pacleb,J.M., Palazzolo,M., Pittman,G.S., Pan,S.,
+            Pollard,J., Puri,V., Reese,M.G., Reinert,K., Remington,K.,
+            Saunders,R.D., Scheeler,F., Shen,H., Shue,B.C., Siden-Kiamos,I.,
+            Simpson,M., Skupski,M.P., Smith,T., Spier,E., Spradling,A.C.,
+            Stapleton,M., Strong,R., Sun,E., Svirskas,R., Tector,C., Turner,R.,
+            Venter,E., Wang,A.H., Wang,X., Wang,Z.Y., Wassarman,D.A.,
+            Weinstock,G.M., Weissenbach,J., Williams,S.M., WoodageT,
+            Worley,K.C., Wu,D., Yang,S., Yao,Q.A., Ye,J., Yeh,R.F.,
+            Zaveri,J.S., Zhan,M., Zhang,G., Zhao,Q., Zheng,L., Zheng,X.H.,
+            Zhong,F.N., Zhong,W., Zhou,X., Zhu,S., Zhu,X., Smith,H.O.,
+            Gibbs,R.A., Myers,E.W., Rubin,G.M. and Venter,J.C.
+  TITLE     The genome sequence of Drosophila melanogaster
+  JOURNAL   Science 287 (5461), 2185-2195 (2000)
+  MEDLINE   20196006
+   PUBMED   10731132
+REFERENCE   2  (bases 1 to 263309)
+  AUTHORS   Celniker,S.E., Adams,M.D., Kronmiller,B., Wan,K.H., Holt,R.A.,
+            Evans,C.A., Gocayne,J.D., Amanatides,P.G., Brandon,R.C., Rogers,Y.,
+            Banzon,J., An,H., Baldwin,D., Banzon,J., Beeson,K.Y., Busam,D.A.,
+            Carlson,J.W., Center,A., Champe,M., Davenport,L.B., Dietz,S.M.,
+            Dodson,K., Dorsett,V., Doup,L.E., Doyle,C., Dresnek,D., Farfan,D.,
+            Ferriera,S., Frise,E., Galle,R.F., Garg,N.S., George,R.A.,
+            Gonzalez,M., Houck,J., Hoskins,R.A., Hostin,D., Howland,T.J.,
+            Ibegwam,C., Jalali,M., Kruse,D., Li,P., Mattei,B., Moshrefi,A.,
+            McIntosh,T.C., Moy,M., Murphy,B., Nelson,C., Nelson,K.A., Nunoo,J.,
+            Pacleb,J., Paragas,V., Park,S., Patel,S., Pfeiffer,B.,
+            Phouanenavong,S., Pittman,G.S., Puri,V., Richards,S., Scheeler,F.,
+            Stapleton,M., Strong,R., Svirskas,R., Tector,C., Tyler,D.,
+            Williams,S.M., Zaveri,J.S., Smith,H.O., Venter,J.C. and Rubin,G.M.
+  TITLE     Sequencing of Drosophila melanogaster genome
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 263309)
+  AUTHORS   Misra,S., Crosby,M.A., Matthews,B.B., Bayraktaroglu,L.,
+            Campbell,K., Hradecky,P., Huang,Y., Kaminker,J.S., Prochnik,S.E.,
+            Smith,C.D., Tupy,J.L., Bergman,C.M., Berman,B.P., Carlson,J.W.,
+            Celniker,S.E., Clamp,M.E., Drysdale,R.A., Emmert,D., Frise,E., de
+            Grey,A.D.N.J., Harris,N.L., Kronmiller,B., Marshall,B.,
+            Millburn,G.H., Richter,J., Russo,S., Searle,S.M.J., Smith,E.,
+            Shu,S., Smutniak,F., Whitfield,E.J., Ashburner,M., Gelbart,W.M.,
+            Rubin,G.M., Mungall,C.J. and Lewis,S.E.
+  TITLE     Annotation of Drosophila melanogaster genome
+  JOURNAL   Unpublished
+REFERENCE   4  (bases 1 to 263309)
+  AUTHORS   Adams,M.D., Celniker,S.E., Gibbs,R.A., Rubin,G.M. and Venter,C.J.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (21-MAR-2000) Celera Genomics, 45 West Gude Drive,
+            Rockville, MD 20850, USA
+REFERENCE   5  (bases 1 to 263309)
+  AUTHORS   .
+  CONSRTM   FlyBase
+  TITLE     Direct Submission
+  JOURNAL   Submitted (06-SEP-2002) University of California Berkeley, 539 Life
+            Sciences Addition, Berkeley, CA 94720, USA
+REFERENCE   6  (bases 1 to 263309)
+  AUTHORS   .
+  CONSRTM   FlyBase
+  TITLE     Direct Submission
+  JOURNAL   Submitted (23-JAN-2003) University of California Berkeley, 539 Life
+            Sciences Addition, Berkeley, CA 94720, USA
+COMMENT     On Sep 16, 2002 this sequence version replaced gi:10728786.
+FEATURES             Location/Qualifiers
+     source          1..263309
+                     /organism="Drosophila melanogaster"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:7227"
+                     /chromosome="2L"
+                     /note="genotype: y[1]; cn[1] bw[1] sp[1]; Rh6[1]"
+     gene            20111..23268
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /note="last curated on Thu Dec 13 16:51:32 PST 2001"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0005771"
+     mRNA            join(20111..20584,20887..23268)
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /product="CG4491-RA"
+                     /db_xref="FLYBASE:FBgn0005771"
+     CDS             join(20495..20584,20887..22410)
+                     /gene="noc"
+                     /locus_tag="CG4491"
+                     /note="noc gene product"
+                     /codon_start=1
+                     /product="CG4491-PA"
+                     /protein_id="AAF53399.1"
+                     /db_xref="GI:7298163"
+                     /db_xref="FLYBASE:FBgn0005771"
+                     /translation="MVVLEGGGGVVTIGNNQYLQPDYLAPLPTTMDAKKSPLALLAQT
+                     CSQIGADSSAVKPLLAMDKNKTKPGACSSSSNSSSSSGSAEISAAKSPSGQAKSPKSS
+                     TPISSTATSASLSNTSTGEIKLAFKPYETNVLSHQNQNSFKSSSSLDAEPTRPSSKNS
+                     SSAQERVPSRSKSNATPTDGGKAEISAHDSSSSRKTVSPSGSSQRGASPIVRSGMEVL
+                     NNANGTAQHPKEMSSMAAAAAAAAAAYKAAGPYGLNPLSALCCPPGMEQHANPAFRPP
+                     FAGGFSHHHAAMLAVAANGGYPGGAPGGGPAGQPNPYISYQRIKTPAGGEAIVPVCKD
+                     PYCQGCPYSAHTQQMLMGAPCPAGCTQCEHQKYGLAMASAAGLPPAHPYSQAAAAAAA
+                     NAAAARSAPYVCSWVVGDAYCGKRFQTSDELFSHLRTHTGNLSDPAAAAAALAQSQAQ
+                     SLLGTLFPPSALRAGYPTPPLSPMSAAAAAARYHPYAKPPPGALAGGPSPFGAAGAFN
+                     PAAAAAAAALGPYYSPYAMYGQRMGAAHQ"
+     gene            25127..25198
+                     /locus_tag="CR31985"
+     tRNA            25127..25198
+                     /locus_tag="CR31985"
+                     /product="tRNA-Pro"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051985"
+     gene            47340..47390
+                     /locus_tag="CR31977"
+     tRNA            47340..47390
+                     /locus_tag="CR31977"
+                     /product="tRNA-Gly"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051977"
+     gene            complement(47556..47626)
+                     /locus_tag="CR31978"
+     tRNA            complement(47556..47626)
+                     /locus_tag="CR31978"
+                     /product="tRNA-Gly"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051978"
+     gene            complement(47870..47940)
+                     /locus_tag="CR31982"
+     tRNA            complement(47870..47940)
+                     /locus_tag="CR31982"
+                     /product="tRNA-Gly"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051982"
+     gene            complement(59795..59865)
+                     /locus_tag="CR31981"
+     tRNA            complement(59795..59865)
+                     /locus_tag="CR31981"
+                     /product="tRNA-Gly"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051981"
+     gene            complement(60103..60173)
+                     /locus_tag="CR31980"
+     tRNA            complement(60103..60173)
+                     /locus_tag="CR31980"
+                     /product="tRNA-Gly"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0051980"
+     gene            complement(76884..78324)
+                     /gene="BG:DS04641.8"
+                     /locus_tag="CG4218"
+                     /note="last curated on Thu Dec 13 17:04:18 PST 2001"
+                     /map="35B2-35B2"
+                     /db_xref="FLYBASE:FBgn0028881"
+     mRNA            complement(join(76884..77044,77116..77503,77900..78324))
+                     /gene="BG:DS04641.8"
+                     /locus_tag="CG4218"
+                     /product="CG4218-RA"
+                     /db_xref="FLYBASE:FBgn0028881"
+     CDS             complement(join(76890..77044,77116..77503,77900..78202))
+                     /gene="BG:DS04641.8"
+                     /locus_tag="CG4218"
+                     /note="BG:DS04641.8 gene product"
+                     /codon_start=1
+                     /product="CG4218-PA"
+                     /protein_id="AAF53400.1"
+                     /db_xref="GI:7298164"
+                     /db_xref="FLYBASE:FBgn0028881"
+                     /translation="MATRQPTPRQLAYLDELKMRLQELRDRQASFMEQTAKILSRMSS
+                     SSMDGSESETLPTDSIDENVTTNTTAGVGAGITKGSKSSLNIKKLIQRFEDLRKNSQE
+                     FGDLPEVPEELINVDVRRILNGYEKLIEEGHVLQQSWFLLKKSTESCARFASANGMKS
+                     EDRPPLKTNSGIVEVCNVVPEHSPEVAETPRVKKSKSLLSNMVTSSEIVDKKAEVFST
+                     KMAKGKCNFQDYGHARWGAFMQLIIRVLRPFHGGSRKTKKIPAHNANNMRIPSHLPPK
+                     RKPIY"
+     gene            complement(92321..92975)
+                     /gene="BG:DS01486.1"
+                     /locus_tag="CG3473"
+                     /note="last curated on Thu Dec 13 16:46:20 PST 2001"
+                     /map="35B3-35B3"
+                     /db_xref="FLYBASE:FBgn0028913"
+     mRNA            complement(92321..92975)
+                     /gene="BG:DS01486.1"
+                     /locus_tag="CG3473"
+                     /product="CG3473-RA"
+                     /db_xref="FLYBASE:FBgn0028913"
+     CDS             complement(92373..92828)
+                     /gene="BG:DS01486.1"
+                     /locus_tag="CG3473"
+                     /note="BG:DS01486.1 gene product"
+                     /codon_start=1
+                     /product="CG3473-PA"
+                     /protein_id="AAF53401.1"
+                     /db_xref="GI:7298165"
+                     /db_xref="FLYBASE:FBgn0028913"
+                     /translation="MAALTPRIIKETQRLLEDPVPGISATPDECNARYFHVLVTGPKD
+                     SPFEGGNFKLELFLPEDYPMKAPKVRFLTKIFHPNIDRVGRICLDILKDKWSPALQIR
+                     TVLLSIQALLSAPNPDDPLANDVAELWKVNERRAIQLARECTLKHAMQN"
+     gene            128108..128179
+                     /locus_tag="CR31983"
+     tRNA            128108..128179
+                     /locus_tag="CR31983"
+                     /product="tRNA-Pro"
+                     /map="35B3-35B3"
+                     /db_xref="FLYBASE:FBgn0051983"
+     gene            complement(128645..128716)
+                     /locus_tag="CR31979"
+     tRNA            complement(128645..128716)
+                     /locus_tag="CR31979"
+                     /product="tRNA-Pro"
+                     /map="35B3-35B3"
+                     /db_xref="FLYBASE:FBgn0051979"
+     gene            128923..128994
+                     /locus_tag="CR31984"
+     tRNA            128923..128994
+                     /locus_tag="CR31984"
+                     /product="tRNA-Pro"
+                     /map="35B3-35B3"
+                     /db_xref="FLYBASE:FBgn0051984"
+     gene            complement(129025..218563)
+                     /gene="osp"
+                     /locus_tag="CG3479"
+                     /note="last curated on Thu Sep 12 16:03:17 PDT 2002"
+                     /map="35B3-35B4"
+                     /db_xref="FLYBASE:FBgn0003016"
+     mRNA            complement(join(129025..130124,130381..130729,
+                     130936..131047,131100..131249,131362..131501,
+                     131576..131710,131840..134493,139933..140509,
+                     141100..141165,165331..165399,217718..218563))
+                     /gene="osp"
+                     /locus_tag="CG3479"
+                     /product="CG3479-RA"
+                     /db_xref="FLYBASE:FBgn0003016"
+     CDS             complement(join(129928..130124,130381..130729,
+                     130936..131047,131100..131249,131362..131501,
+                     131576..131710,131840..134493,139933..140509,
+                     141100..141165,165331..165399,217718..217930))
+                     /gene="osp"
+                     /locus_tag="CG3479"
+                     /note="osp gene product"
+                     /codon_start=1
+                     /product="CG3479-PA"
+                     /protein_id="AAF53402.3"
+                     /db_xref="GI:28380372"
+                     /db_xref="FLYBASE:FBgn0003016"
+                     /translation="MSTTTITTTVPAAPSKSAPPTPTTTGAAGATTNARTADCRKFTP
+                     NIFNKSKCSHCFRQREEHSAAALECNRVSKCGYLFVAPDWDFSNPLYRTKRWQRRWFV
+                     LYDDGELTYSVDDYPETIPQACVDMTKVLEVTSAVEVTGHPNSIAITAPERVTFVKGT
+                     SSEESQWWLNILAAFPKSKGRHKRSATLPGGQVVGSLRPTSNGDLTLSTKLGNRHSSY
+                     HKDTLTSSQSAGNLLSSLDLGPSSTKTAGSPLTTTQSALADDEEDDGVETGEDVDEDE
+                     EDETSVPRKSKTVSMGGQDENNRNAGNEITNRVSQPTTCLLIEDIRRDEKTIKDIANT
+                     ITNLSQQQNKRWSTAVNNALNNQHHFGHHSQYQVTSRDETDFQMSSNSSSTKSQNPAS
+                     ERPKSLPLASNSTPAIVSAIVKKIPTVMEQGDKTKPTARLQLHLKSPKHYQHERGDPD
+                     GGCNLDELCVNYMAKTDELRSVGKANSKSSSGQGKPPVKEESLNAKKGWLMKQDNRTC
+                     EWSKHWFTLSGAALFYYRDPLCEERGVLDGVLDVNSLTSVIPEPAASKQHAFQLTTWD
+                     KQRLVLASLSPSSRNSWLAVLRSAAGLPQLDTPPKQTDIEQDFIKAQLQQPSSSPVTP
+                     GTPAGPHFSSDEEYRTASEGGRRDSLDWGSPLSPSPPVLRSCLRNRSLASLHKRSRSS
+                     PPSSRRSTVDSVASDELPLLVVPEEMQPTESRELKQQCETLRAEASLREARMSELLAT
+                     LQRTEQQLTARLQEQQQQLNSELTQAKQSASDLMHNLGMQLTESQCQIKQLEDRLAQG
+                     IEENEGLYKRLRELQAQDHSGGAALSNLQRHKIKRMDSLSDLTTISDIDPYCLQRDSL
+                     AEEYNELRSRFEKAVNEIRAMKRELKQSQNQYDALELAQAALQQKLERRQHEDGAQLQ
+                     LMAARIQDLTLKYSSSERQVRALKQKLAKSERRRSLSLKGKEQLELKLSELQRETVER
+                     KEGTPPESSSSESSSQSPLNAHLLQRLHSLEHVLLGSKERLEQSLTQLQQIRAGQRTR
+                     RSVSPMNDRKDGLRQLERALAETCVMVSEQMELTCLQDSCHKCCDLRQRVEKLSALQQ
+                     QTETDLQRSEQLLEQRETDLAQALEKCASQEQEQELLLQQRQELSEELGRQQERCRRL
+                     EKRLELLEREHGKQLECLREVYHTEHANAADEQSFRKRYQTEIEQLRTLCEKGLSAME
+                     TSHKRLTMDLEQKHKMEIERLLAEKETALAEETQATLAALDAMRKAHQSEVQREVARF
+                     KQEFLRQVQRGEQMRGDGAKLKEEDLGELRMEILAFSEKYSIKCVENAALEEKLHMAN
+                     SKLRHFQQMQQLELRNKQFRAHLASDDPSNDVHFVQGLTSDAREDADCEDSEPAPQIL
+                     GATATRTTATTTTTATATTTSAAPSETESNPDSDRETADSSRAPPEKMEQSLFVIPSH
+                     MLNSSLVPAANASDQSQRSQLYRDLDGPEDGYEPCYRPFDIFAIYQNRLSYQGLKGSS
+                     TFGKSLRKSAAQTSPASSEITTTTTAPTTDPKKPSHKQMFKTAAVINIQQQVAQEEKA
+                     Q"
+     gene            144804..148151
+                     /locus_tag="CG32954"
+                     /note="synonyms: Adh, Adhr; last curated on Tue Nov 05
+                     07:50:23 PST 2002"
+                     /map="35B3-35B3"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(144804..144892,145552..145686,145752..146156,
+                     146227..146892,147319..147723,147775..148151)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RA"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(144804..144892,145552..145686,145752..146156,
+                     146227..146892,147319..147723,147775..148151)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RC"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(144804..145686,145752..146156,146227..146668)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RH"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(144804..144892,145378..145686,145752..146156,
+                     146227..146668)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RG"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(144804..144892,145552..145686,145752..146156,
+                     146227..146668)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RB"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(145518..145686,145752..146156,146227..146892,
+                     147319..147723,147775..148151)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RD"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(145518..145686,145752..146156,146227..146892,
+                     147319..147723,147775..148151)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RE"
+                     /db_xref="FLYBASE:FBgn0052954"
+     mRNA            join(145518..145686,145752..146156,146227..146668)
+                     /locus_tag="CG32954"
+                     /product="CG32954-RF"
+                     /db_xref="FLYBASE:FBgn0052954"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RA"
+                     /codon_start=1
+                     /product="CG32954-PA"
+                     /protein_id="AAF53403.1"
+                     /db_xref="GI:7298167"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RB"
+                     /codon_start=1
+                     /product="CG32954-PB"
+                     /protein_id="AAO41197.1"
+                     /db_xref="GI:28380373"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RD"
+                     /codon_start=1
+                     /product="CG32954-PD"
+                     /protein_id="AAO41198.1"
+                     /db_xref="GI:28380374"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RF"
+                     /codon_start=1
+                     /product="CG32954-PF"
+                     /protein_id="AAO41199.1"
+                     /db_xref="GI:28380375"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RG"
+                     /codon_start=1
+                     /product="CG32954-PG"
+                     /protein_id="AAO41200.1"
+                     /db_xref="GI:28380376"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(145588..145686,145752..146156,146227..146493)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RH"
+                     /codon_start=1
+                     /product="CG32954-PH"
+                     /protein_id="AAO41201.1"
+                     /db_xref="GI:28380377"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHTFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     CDS             join(146797..146892,147319..147723,147775..148092)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RC"
+                     /codon_start=1
+                     /product="CG32954-PC"
+                     /protein_id="AAF53404.1"
+                     /db_xref="GI:7298168"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MFDLTGKHVCYVADCGGIALETSKVLMTKNIAKLAILQSTENPQ
+                     AIAQLQSIKPSTQIFFWTYDVTMAREDMKKYFDEVMVQMDYIDVLINGATLCDENNID
+                     ATINTNLTGMMNTVATVLPYMDRKMGGTGGLIVNVTSVIGLDPSPVFCAYSASKFGVI
+                     GFTRSLADPLYYSQNGVAVMAVCCGPTRVFVDRELKAFLEYGQSFADRLRRAPCQSTS
+                     VCGQNIVNAIERSENGQIWIADKGGLELVKLHWYWHMADQFVHYMQSNDEEDQD"
+     CDS             join(146797..146892,147319..147723,147775..148092)
+                     /locus_tag="CG32954"
+                     /note="CG32954 gene product from transcript CG32954-RE"
+                     /codon_start=1
+                     /product="CG32954-PE"
+                     /protein_id="AAO41202.1"
+                     /db_xref="GI:28380378"
+                     /db_xref="FLYBASE:FBgn0052954"
+                     /translation="MFDLTGKHVCYVADCGGIALETSKVLMTKNIAKLAILQSTENPQ
+                     AIAQLQSIKPSTQIFFWTYDVTMAREDMKKYFDEVMVQMDYIDVLINGATLCDENNID
+                     ATINTNLTGMMNTVATVLPYMDRKMGGTGGLIVNVTSVIGLDPSPVFCAYSASKFGVI
+                     GFTRSLADPLYYSQNGVAVMAVCCGPTRVFVDRELKAFLEYGQSFADRLRRAPCQSTS
+                     VCGQNIVNAIERSENGQIWIADKGGLELVKLHWYWHMADQFVHYMQSNDEEDQD"
+     gene            complement(148188..148550)
+                     /locus_tag="TE19174"
+     repeat_region   complement(148188..148550)
+                     /map="35B3-35B3"
+                     /transposon="jockey{}370"
+                     /db_xref="FLYBASE:FBti0019174"
+     gene            complement(171878..180982)
+                     /locus_tag="TE19175"
+     repeat_region   complement(171878..180982)
+                     /map="35B3-35B4"
+                     /transposon="roo{}371"
+                     /db_xref="FLYBASE:FBti0019175"
+     gene            241917..242519
+                     /gene="BG:DS07721.3"
+                     /locus_tag="CG15282"
+                     /note="last curated on Thu Dec 13 16:55:15 PST 2001"
+                     /map="35B4-35B4"
+                     /db_xref="FLYBASE:FBgn0028855"
+     mRNA            join(241917..242005,242077..242519)
+                     /gene="BG:DS07721.3"
+                     /locus_tag="CG15282"
+                     /product="CG15282-RA"
+                     /db_xref="FLYBASE:FBgn0028855"
+     CDS             join(241994..242005,242077..242304)
+                     /gene="BG:DS07721.3"
+                     /locus_tag="CG15282"
+                     /note="BG:DS07721.3 gene product"
+                     /codon_start=1
+                     /product="CG15282-PA"
+                     /protein_id="AAF53405.2"
+                     /db_xref="GI:22946499"
+                     /db_xref="FLYBASE:FBgn0028855"
+                     /translation="MKFLVIVFVALIAVASALPQFGYGGFGGFGGFGGQQQQQEGFGG
+                     FGGFGEQQQQQESFGGFGGFGGIEQQQQQQQGGFF"
+     gene            complement(254058..258098)
+                     /gene="BG:DS07721.6"
+                     /locus_tag="CG12636"
+                     /note="last curated on Thu Dec 13 16:58:13 PST 2001"
+                     /map="35B4-35B5"
+                     /db_xref="FLYBASE:FBgn0028854"
+     mRNA            complement(join(254058..254071,254173..255772,
+                     255972..257281,257807..257914,258002..258098))
+                     /gene="BG:DS07721.6"
+                     /locus_tag="CG12636"
+                     /product="CG12636-RA"
+                     /db_xref="FLYBASE:FBgn0028854"
+     CDS             complement(join(254058..254071,254173..255772,
+                     255972..257281,257807..257914,258002..258098))
+                     /gene="BG:DS07721.6"
+                     /locus_tag="CG12636"
+                     /note="BG:DS07721.6 gene product"
+                     /codon_start=1
+                     /product="CG12636-PA"
+                     /protein_id="AAF53406.1"
+                     /db_xref="GI:7298170"
+                     /db_xref="FLYBASE:FBgn0028854"
+                     /translation="MGHLEFFASISDACSTVEYDKITWQLCDITENKIIARLKNATGL
+                     VVKLLPYKVHILNEPDRRKVFLLRGPTLRLPPYALKENAVYDFSLTVISRFDPRNRMT
+                     TTQTIKGVAKTTFTPQILCRRNCALGVYAPLDSIHLISKCDDCPGTVKRYEWWLLDTE
+                     SQPTLESSHKYLILHTLEPLVQIRLRVWVKGQGWADAFYTLRRNNGPQHGSCTIYPFL
+                     GVEGFTMFEIDCPGFESPFPPITYRYMVSYGVVASNVPYSRIVLTLPAAETIMISICD
+                     AIDMCVEKRVEVKVLSIDKSMLTGQKEVMNYVPNFFKRGHWNRAYIMGIAAITFIETS
+                     IDGDEFYSYLTGLVASTGSQMEQITTLSSHMLIRLHPVDFRGATVMAEMFSHLGDSFS
+                     AIVQEHEWLHREGYYSLTAMHMFFMSILGKKTESHSNAMCSLHNPACMNLQIIDLEKP
+                     FVIKFDPLILVRINSWLMSTWFLYRCIYFLGVIATQRHHPYDDALTIHKSGIAYQINV
+                     TEVTENTKDIQVKTIDHIHVIKLSTKLLYELQRRLNHSSILLQIISQQNFHNIYWWYP
+                     DPFPSKTSVLIVHAYSPVQFFRSAKEFQLTNPLVYKTNITHFNDASFNQYMTNNSIQN
+                     STEVHIYSVMLNHKAMLAVRIVNCSELMYIKMRLHRWPTLGQIRQHACRITPDMQGKR
+                     IWIANSCERSPAYVAIHKPGEIRYKTEDKDQLSARGRKKGNRTHDGSETPEVRQKADF
+                     IDYDNEDIVEEVLPLNYSILLEIYQCNIWKNRSLDPGWSEDHCTTSFEHSRGSSVQCT
+                     CHTLGALSSRIFPISSQLFVEHIPVPIFTFNMILMIFFALLFLLLVFKFLLHLNIISA
+                     YLKNPEFRLQCDASVGKSDQSFVSGSEILLVIVTGGQEFAGTTSNVKFYLKSPHRQQT
+                     SYQITQDPGHPKLLRNSTIKIMVPRGHIYIPTRLALRLVPNGRYPSWYCRSITVVDLK
+                     LKVQQLFLVESWIEGGSHIQFMRSKYFTYGNYSRYPKYTWCKRFRSRAEQLYFSWYLI
+                     NAITGPSQTHGI"
+BASE COUNT    77534 a  53560 c  53766 g  78449 t
+ORIGIN      
+        1 acgtgctggc aaatgcaaat aacgaatttt agcacggcac tgactttggc acgactttgg
+       61 atcgtctgca ttgactttgg tcccctttcg ctgagttttc cggaagagaa cgccggcata
+      121 tagaccatga tatagtggca aagtaatttg aaatgaaaat taatgacatg caatcaattt
+      181 tcgtagtcgt cgttatttgt tattttgatg acttgccttg cgcacacaca aacttggctc
+      241 gctctcactc tcgcccattt gtcaagggct aaacaattgg gagcatgaaa atgaaatatg
+      301 tctgccatgc tggagaggag tagctccagt tcagtgagga ctgtttgctt ttaaccgcaa
+      361 tatgcttaag ttgcggttat ttattgtctg gcatttatag tgcggcagaa aggagcagta
+      421 ctttgctacc ctctacacgg aggaatatat tgcttgtatg tactagaaga cttgtcaaac
+      481 tcgtcagatc ttcgaattac acgtgcttat aagacaaaag tatacaaatt ctatcaaaat
+      541 attaacaata tttaatgtgc aactgtgaca tacaaactga aatcactcat acgacatgta
+      601 taccagggat aatttattaa cccaagtaca tatatggttt ttttcttatt ttttataata
+      661 tttaaaacta atatctttgt gcttgaacta gtcacttgtt ttccgttggt aagttggctt
+      721 aaaatattga aagggtattt acctagatta atgttcacct tttcttttga ttttcgcgta
+      781 ttaaaccctc gtgcagaatg ttttctgtct gattcgtatc gacatatttc gcccgcattt
+      841 tgtattccgg ctgtcaacgc caaaagttca agctgatcag accctctgtt gccctcttca
+      901 cctccttcaa tcatttcctt ccccgcagcg ctttgttcct cttttttggg tgactgacat
+      961 gagctttcgt ttataccgtc ctccgtgccc cgccatccca atccttctga ttgatgggcg
+     1021 acagcaaaaa aagttggcac ttgctttgca gtgtgttgtg ccagagtccc cagaaccacc
+     1081 caatcgccca tccatccaaa aatggagcca gttttggtcg ggaaaggtgg tcaaaacacc
+     1141 acccgaaata agtcatcgcg tgcagagttg ccatcaagct aaagccaaga ttgattagtt
+     1201 attgaaaaga catcagctgg ggaaattggg caactcgtaa aaacacgagg taataccctg
+     1261 taattaacgt atatctaagg aatgttactt tattttacac ttaaaagtat taatatgatc
+     1321 aataattatt attttttttc agtttgggtc ttatataata taaaggaaga atattatttt
+     1381 gattttctca ctgtgtaatc actttggggc cttcgcaaat aagtcataat gatttttgat
+     1441 cgatgcgccc cagtccgagg acgtcggtgg atcattaaca aaactgaaac ttgccgaggg
+     1501 agacaacctt cccctatcga ttcctcgatt ccctgtgcat aatcattttc aataaataat
+     1561 ttttcctccg tccgggactt ccactctttg ttgttggtgt ttttcgacgc tatttttttt
+     1621 gtcggttggc ataatgacag catgacagga taaattactg ccaaatgaat ggggcaaaat
+     1681 atgtcttaag cgacacactc acacacacac acacacacaa gcacacgcag acagccacgc
+     1741 ccacttggcg ctgccaaaag ccaaacatca aaaattgttt tccctctttt ctgttgtgta
+     1801 atttaccacc aaacagcgat ggccgaatga aggaaggcaa cgcccgcatg ggtaaaatta
+     1861 ataggcataa taattttttg acagcacacc caaggataac gcaggcaact catcagagtc
+     1921 ctttgcatgc ggcggtagtg aatttttttt ccaaccccgg cggttgggga attcgttgag
+     1981 tgacaggcag gttgatgata aatgatgtcg ctgatgttgg tccgggttgt gccccggcaa
+     2041 cttgcctctt gccacgtcac ggaggcaatc gtttggggcg cccccgtcct gtatatcctc
+     2101 caaaagtgct ccctgctccc ctccctttct ctgacagaca cagtcgtcgc ctgattactt
+     2161 tgatgacgtt gccacatatt tgtgtgttct tcccccatcc aggaattaaa ttcataatat
+     2221 tttgtgcact ctcactttgc gtttcaatgg gaagtttagg gtacatcagc tagggtagta
+     2281 agttgggcta caaaattttt aatcagtcaa acagaacaat ctgtttgcta agatgaccta
+     2341 aatatcattt tctaaagatt tattttaata actttttaca acttttcaca gtttaagatt
+     2401 ttattatgtt agttaaccaa tttaaaagaa aaaagtatga cgttagttga atagtatgta
+     2461 tatttggtat cttttcttct aatccctcac atttcattag agtccatgac aaaacaacga
+     2521 aaagttcgag acttttcttg atcattctat tctatcacat attttggctc ggcaaataaa
+     2581 gttgaataat taattgatga aataattcgg agcgtgtgtc gctttgacat tgaataattg
+     2641 ctttccattt ggccaaaaac cactcacgtg gccatatcgt ccgtcagctg ttatctactg
+     2701 aatagatata aacctacgag cctagaaaaa cataaagacg gcaatctata tgataagtgc
+     2761 caagtcatca ggatccaatc tatcacaggc gatgtgtcaa tttaaagtgt gagaaaattg
+     2821 ctggaattga tgaatgatga tgacggtacg gatcgcaaag aaaaagtaat tagtgcgact
+     2881 caatggtgtg tgtactcttc aaatttaggg aatattctat ttatatatcc ccttattttc
+     2941 agatgaaaag atgattttcc taaacaaata gcaaaaatat atattgaagt cattttcggc
+     3001 gtcattgtga tcgtgactcg tttacactct gtaattcatc aatttattaa aatacaaagt
+     3061 tgtattgctc gtttggaaat ggcaaattac tctacgtttg tggtagtaag cgctaaatgc
+     3121 acataaaatg tgtttttaat tgcgaattgc caaatggtaa cgagcagcat tcagtttttg
+     3181 ttgataattg agcgatttta aaatcgccag acttgcagat tacgtatacg tactatgtgt
+     3241 aattaactaa gctctcaata aaaatcaaat agaatgtact gaatgtagag tggcattgtt
+     3301 ggccgtggca tagaaattga aatcaacacc catacaatgg atactgaatc tgaaatggct
+     3361 cataaataaa acaattgagt taagccataa atcaaacact taatacaata gtttccgctt
+     3421 ttattattat gtgttcgctt tcaacttccc aactacgctg tgcactcgca taaaatagcc
+     3481 ataaattaaa ataacagtaa aataaaccac tgagtgcgga ctatttgctg gcaaaggagt
+     3541 gctttataac catatagtca tatagccacg gctgctctat gctctataca agtacagtgg
+     3601 tgtgtgtgtg tgtgtgtgtg ggccattgtg aagcggccta agctcgcaaa ttagcataca
+     3661 aactgtctgg cagaaaattc gccattgaaa gagaccaggt gagtcgcacg ataatattaa
+     3721 tatgtacctc acttaatcat aaatgcccgg cgacatgcga gaccccttcc ttgtgccccg
+     3781 tcgccatatg aaaagcaatt gatgtatggt agtaggtagg tactggcaac tgccaactgg
+     3841 caaccagttg tttgttgttt cagttgcggg ttttgttatt ttccctgaaa gctcagcagg
+     3901 cagacatttc ggaattgggt ggctcggtac caacaagttg catgtcgcta actagttaat
+     3961 aataatgctt tggacaaaaa gtggcagcct gtgtgacgtg acgcattgca tgacatgcaa
+     4021 gtggctattt gggcggtgtg aggtggccta ttgccaccca aacgatacga tccgatccgt
+     4081 tacgatacga tgcgatactt agacccagtt cgagacccgg ccaagcaaag tcgaagcgaa
+     4141 actaagccaa attagaagct tttcattttc tgtcgtataa tttatgaccg gcacttttcg
+     4201 agtcaattca gccaagcgtt aatcgccgtt tgactcgttt cccccgtgcg ttcagcaatg
+     4261 tgcataattc agactacact ggacaacaag atagcgaact atagaaatgt ctatcaatat
+     4321 gtataattta taaagatttg tttgccatga aagtatactt aatacattta caaaacattt
+     4381 aaatacaaaa caacattgta aaatagtaat tttccaaatg tgttctaaat gaatagctaa
+     4441 acaaaaataa atatattcgt gtgaaggcaa aatacattac caaaccagca agctcaaagt
+     4501 aagccaactt ttctttacga aacatttctt ttgctgggca gtgaaggtgg tagacatcag
+     4561 tccaactgca ccgaatggaa atggaacgga gagtcggcac tttgtcgcat tgtcgtccac
+     4621 atggactgta gactggccct ccttggccac gtcgccggct accggcttcc acagttatta
+     4681 ttattattgt aatttgctgg ctcagacccc acgttgacag cgtgtcattt cgaatcgttt
+     4741 tgtttagtca tagtcacagt gggtcccctt ccttgccact gccctgcact ttgtttcccg
+     4801 cttcggttta ctggttcggc tgggcttttt acacgaaaat ggtaatatgg catatttcat
+     4861 aattaattgt gaaattatac aaagatatgt atgcaaaaat gctcggaaaa gtcataattt
+     4921 tcatgtgact tggttttttg ccctgcccta tacccttcaa acaaaaggca tggcgaattg
+     4981 attatgactt tggtatcgta aaggtgattt ataaatctga gttatggttt aaaaatcctt
+     5041 atattcattg tagtaatatt taatccctta aaaactgcta cctaactgcc tattttaatt
+     5101 tttctgtgcc gacattcccc ttcaacgtga acaacacgca ctggtgtaaa actcaggcct
+     5161 tgttacatat caaagttggc tgtccaaagt taaaactctt acaattcata ggaattaatt
+     5221 ggtttccatg gaggtgccgg tgtccgtgtt gcggaaggaa ctacttgcgg acagggaatg
+     5281 ttgatgcgat gacgccagcg acaactacat aaaagtttgt gaaaggtgaa atcatgaccg
+     5341 ggccaggcaa tgcaaatcag gaagcgagtt caacacctgc tcgaacggca gaaggacgaa
+     5401 acttcactga cgcgcagtag aaaaacccag gggggctgtg ggcaacaccc accccctcac
+     5461 ggaaaaccaa ctgtcattgc cttgtaatgg aggttgattt aacgggggag gtggaggtga
+     5521 cgtgtcaact gtcaatttgt atttgaaaag ctgtcacttg cgctgcctcc acccagatat
+     5581 tcgtactgcc acatacaaat cacaacaaat aaacgccaat tgacagcgac aacccagcga
+     5641 agaagaagcc atcgcggtac agtgagcact gacaatatgg gatgatgtaa gaaaatatta
+     5701 tttggtatta cattttaccc atatggtttg ttataaaatc cataaattat tatgataata
+     5761 tttaaaacaa tctaattaaa ttagaccaaa taaacagagt aggtatatgt atgattttta
+     5821 gtcctatttc tccaaatgaa tacttctgcc tttcgagctt cacattttcc ccaactcgga
+     5881 ctcctccagg ggtcacaccc atcaattgaa ctttgtcatg ggatacgcat ttctccgttt
+     5941 gactgctaaa tgctcgtgtc ttgagagttt atttgtctta cttatttatg tttatgtgct
+     6001 cttcttaagc gttgctaaat taggcgcacg tccgtttcgg gttggtattc ggattgaatt
+     6061 ggattggatt ggggtggcaa atggtgtggg cgatacggga cctacaacac cacctcttcc
+     6121 gaatagcagc tgtgcctaat ttatttcagt ggacgtgccc ccatttcgcc tgtggctgcc
+     6181 gcccatcggc tcgaatcaaa ccgaagccca gaacgaatcc cacagtttga agatttattt
+     6241 cgttctgaag aacattaaaa ttttacaaat tgctttaatc aaataaattg ccgcgtttat
+     6301 ggtgataaat tgtgtttggg gccagcaagt ttttgattca ggacgcgtga acacaactgg
+     6361 ggaattagag taaatgaaag agaaagggag tgagttaaat gcagtgtgaa aaaataagtg
+     6421 cggccatatg tggggtatgg tgttaagtct gatttaactg atgtaacatt gacaatgaat
+     6481 aactttgatg aagactacct accactttag agctgagcct ctcaaatttg gtcttcgtct
+     6541 caacccttca gcacgtcgga cttattaaat ataatttatg gttgctttca atttattttc
+     6601 aacgccagcc gaactattat agtgtttcca tcttcactta agtttgcttc cgataacgat
+     6661 aacgataagt atattgcgtg tatatataac gctgttcacc taagtgactc ttttgcataa
+     6721 tgaatggacg aacgggtgag ttaaagaagc cgggcatgga gaacaagaag aaacttaatg
+     6781 acttgacatc tgcagacatg gccaccataa tactttgtat gtaaatttcg ccaataaagt
+     6841 aatttctttt aatgagccgc gggctagtca gaaagcggcg aaagaaattg ggaatgggaa
+     6901 tgagaatggg tgagccatta agtatcgcat aaagtaacaa aatgaattag gccccgacaa
+     6961 gatgatgatg atggtgatgg tggcgatggt agtgcggtgg tgcaacgttg atggggccca
+     7021 cggcccctat caactcggta gtctcgatgc gcggaagtaa ttgagttaaa tatgcacaag
+     7081 atgacagcat aaattatgcg cctcacaaaa aaagagttaa atgtggtctt gtggcaaaca
+     7141 aaagggtgga gggttggccg gggaaagaca atgacaagaa ccgccagttc agcgctgcga
+     7201 tggcaaacaa aaggcaacaa atgagggaag cctacttatc aacggcatgg gaaccgcgcc
+     7261 gaagcccgta cctttgagca ctcaagattc ggtccttgaa aggggttgca atctacctcc
+     7321 tcgactgctg acacttgaaa aagtgtgcat aacaaattcg ataatgaact atgaaatttt
+     7381 tgccagattg taaccctttt tggcattgtc ggaaatccct gccccacatg tcacagttga
+     7441 aatttatttt gctgtcataa attttgaaat aacaattatt gccatttata gccggcaaag
+     7501 ggaatccttg aaatgggttt ccatttgcat aataaacgct tgtcccagag gaagacgtat
+     7561 ttcatccttg gcattgtcat aatttcaggg aaggacttag tttgagggat gacgtacgac
+     7621 ggattgagga aagctagttc ttgcggttgg gaaaatatga cattgtaata atattatttc
+     7681 ataagtttgt gcttaaaagc tgaatatata ttcctccatc gatatacatt catggtaaat
+     7741 accattattt tcattcattc cttctattat aacattttaa aagaagattg tgtacattac
+     7801 ttcgacaaat tcttgctgtt ttgcactgaa ccagataaaa taccattcta tgtagaccga
+     7861 aatttcgttc tatatatttt taaaattacc cagttttttg agaatgtttc tcagatattt
+     7921 ctctatattt ttcattcttt cacgtgtgtt cgaagtgaag catagaaagt aaatcaactg
+     7981 taaaatggtt cgttcctgct cgtaaagtat aaatttgttc cacaatctgc agaaaaatat
+     8041 gttggaaata tgcatatttc ccccgttttc tctgcactcc aactactcgt aatttttatc
+     8101 ccaaaaattg ttcggctagt gagtgtgtga gaaaagtctc gagatgagaa atctgtgttt
+     8161 aggtacgcca ctactcactc ggcttcctca cctcagccgt tttccattta aatgtaaatt
+     8221 aatggtcagt taagctgtca gccatggagt tttgggtctc attcagttca gttctgcggc
+     8281 tctaacgatt tgggtggttg gttgatggga tgatgggaat gaccatcgca gcacatcaca
+     8341 gatttgtggt gttaatttgc tgtgaaatgt tgactgacca gagaccagtg accggtgacc
+     8401 ggtgaccggt ggaccagttt accagttgat gccatttgcc tgtaattaat tgtctgtgtt
+     8461 taatgtgagt taattttatt tatggccagc tcgttattta catttacgca gcgaatggag
+     8521 tccgatcttc acacggcacg caattacaga acaatatttc agatttcatt cacatttttc
+     8581 tcttagctgt ttctttttta gtcccagaag tcatcgttgt tgtcaggaat cgttgtctgc
+     8641 ccgagacgtg gcaacactga caggacactt ctgacagccc tgagacagcg gggactaagc
+     8701 ttctgctaat gtcaacagca aacagttcaa attgtgcaaa atatagcaac gagtggtgga
+     8761 aatgggtata tcagatattg taggaagaag tcgcatagct atacaacttt cagggttgcc
+     8821 aggctgacta aaattattat ttatgacagc tagtcgaaat atttattttt ctaaaaacaa
+     8881 ttctgtaagc gaaataattt catattaaat tttgtacaaa agcggtaaga gaattcaaaa
+     8941 attttggttt aattaaggaa ttaaggaaaa aataaaaaag tctgtctttc tcaatttaca
+     9001 aaccttatga gcctgcatta catccagcta ttgtagggca tttcaagata taaaattgca
+     9061 ttgccagtga acaaagaggg gcgaaggcgg ggaatacgaa caaaagggct ggaaaacaaa
+     9121 agccaaacaa taagcaacag tgacagtggc actgttgtct tctgctccac gttcccttct
+     9181 gaatctgaca gttgtcaact gctgttggct gccgcctttt tcgttactcg gctgtcatcc
+     9241 tccgaccctt tgctgttttt gaaatttttc ttatttcttg tacatatatt ttttcgggac
+     9301 tgtggtaatg ggtgtgcacc gctccagtta tgacgttctt catcagtccg gagctcacct
+     9361 acgttgtaga accgcccggt aaatcaagtt ctagtcttca cttacctgga ataaactcgc
+     9421 agcattgttg tcttcggctg cgatcataaa tttcctttta accagtttag aagtggggga
+     9481 aaaatatcat aaaatacgaa atgaagtacg ctcgacatgc ctcggacgct gtagaattta
+     9541 tgtgacaatt ttgtccagaa taaatgactg cagactggcc agagaaggat gccccatatg
+     9601 tgtatgtgcg cattctggtt tgcggtaggg tatcaagcgg gtctattaat taggcatcaa
+     9661 atctaaaaat taataaggca cataaaatat atctagataa attataaaaa aatatttaac
+     9721 aaaatctatg atgctcatat acgagacacc ttaaacatga tgtataagaa gcatgatgta
+     9781 tagttgttta gcaacacaat aaaaagttaa ttgactattt taaaaatact tcccatttca
+     9841 gggtacttca aattccaata ctatttcttt caacagcaat ttccaacttt ctccatgtcg
+     9901 tagaaattag ttgggctaaa aggcacgcac accgaggagg aaaccagcca gaggaaggaa
+     9961 gagtgatgca agtcctggtc agcgtttggc tccggcgatt gccattccac aatgcccaca
+    10021 aaatgggtgc ccactaaaaa ctaagccact ggccagcgga cggaaaaagt gggtgtggat
+    10081 gtccatgggg gttgtggaga ttgcggcttg aggatgcagg atgcagggga ggagggttcg
+    10141 ccacttcagc acttggtctg ggttcgcttt tgtctgttgt cggtcatgtt gcataaatct
+    10201 cattttgggc tttatttact cacaaaatgt tcacgtttcg agctagaccg cctctacact
+    10261 tcctgtgcag ctcgtgcgcc ctctctgtcc agcaaataaa ttcatttagt ggcatttagt
+    10321 ggcccctttc accttattgt ccgtgggcca tggccatccg atctcacccc caggatgagc
+    10381 ttgggtccag gatattgcta gtgtcccatg tgtatgtgtg cattttatgg cagtgcctgc
+    10441 gtttttgcgg ccaaacatga agtgaaaatt atgcacacac atatggcaaa tgtatcgcaa
+    10501 ttgttttgcc catccccccc attttcaccc aacccttgtt gtggctgctg gatttggctt
+    10561 tagtcggcag ttaattttca tattttccag ccgcactcac ataccagaac caccataccg
+    10621 tacccaccca ctgccaccac tgtgcggaca tttcgtcgta aataaattta acaatttatt
+    10681 tagattgcaa tgacaggcgg atgctattaa aagcgaaatt tatgaggtcc tttggcgcta
+    10741 ttcgaagggg aggaagatgg aaacggaaga tgcacaaatg aacttcatga tattgcaatt
+    10801 tgcaacagac aagacgtaca aaaacgtagt ggaatgcttc tgaaaaccaa aataacttta
+    10861 cttaaaaata tcaatttatt tctgattttc tagaccatac atacagacgc cattttaaaa
+    10921 tctcatttaa aatctctctt caaagaacag gaaatgcgtt ttccataagt caagaagatg
+    10981 ctgacagctg gcgattaaaa atcccaagtt gggacagtac aaatcaaacc acttgaccca
+    11041 acgatgtcat caattagaaa tttgaactct catcctgaca gatttttttt ttaagtatag
+    11101 cacttgacta tgttaggtca ccaaaataaa tacacacaaa tttgttttat ttcctttccc
+    11161 agccacatgt tggactcgat aacttcaact gcagaaagca gaacattttc atgaatatat
+    11221 ttcgtattaa ataatttatt ggcaacgttt aactaacccc aaacgtgaat ggcaaatgaa
+    11281 gttggaaaag ggttaaacta tgcactggga aatccacttt tgccatcttt gacaagctgg
+    11341 tggagggcaa ccgtgcgtta aacaaacttc ctgcataatc ccaaacccat gaatcttgtc
+    11401 aaccaccacc cccttttcaa ttcgaataga ggaagttaac ccccgttcgg ttttggatat
+    11461 gcttttgttt gcctgaaatt gactgattac ccacaaaatt accaacattt atttgtccgt
+    11521 aatttgagac attaataaaa gttccttttc tgtttgttcc cgccttgggt tctttggtgt
+    11581 tgcgttagtt tgccggatgt tacccctttt cataccggca actgacagtt ggcttctaat
+    11641 atttcaaaat ttgtaacatt tgttacactc aaatgacatc agctaattag cccaagacgg
+    11701 caacgaattt gttgatgtta attaggtttt atgacctcca tgggagatcg ggttcaggtt
+    11761 ctgtgggaat tgaggcatag aaaaaatgtt aaaatggaat acaatacttg cttttagaat
+    11821 tcaaatgttt tgtaggaaaa ataaaagttt ttaccacata atctgaatct gaattaaatt
+    11881 tttaccatag tgaagtttgc gaaatgcaaa ataaattgaa atggacaact taagaatagc
+    11941 accataaatt atttaaattt tccctcatct taaaatatca taaatggcac ttgatggcat
+    12001 cacataactt tttgggggct tccaggcaat tggctgtagt gcgttattta tgacccctct
+    12061 gagcccaagt gttgacggaa gttgtgagaa ccattcaatt ttcttggggt cctcattgtt
+    12121 agaaattcgg ggaactaatt gagcgtgtct catgaggttt cggcacttga aacaaacgga
+    12181 gacgagcaac cggcatcaca gccaacagac tacaactacc gagaactcaa tttacttttt
+    12241 cattagggcg agcgaggatg gctgccaagg attgaggagc ccaagggtgg agggcacttg
+    12301 agtgcagctc caattatggg gcgaaactga cgtcagctcg cggctcgact caactcataa
+    12361 tgaaaacatt taacctgtcg tcgtcagtga ttcaacatga aaatcattcc gcatccaggc
+    12421 ccggcgattt ggttttcctc tgtctatcca ctttttcaat tattcgagag cactgcgatg
+    12481 cgatgagatg cgatgagatg cccaacgcca tggttgcgga tcctttcgag gaatcggagc
+    12541 ggaggcaagc tggaaaaagc gaagggagaa aaaacgaagc acttgagtgg gaattgacag
+    12601 agaggcaggt ttcaagttga gtgcataata cacctttttc ttctgcacgg gctccttgtt
+    12661 gtcattaatg agacgtcttc ttcaggccca gcgccgattc tggtccggat cgggggttct
+    12721 tttggaccgg gtgcacaatc atttccactc caggttgtca aagactatgg acaaggtccc
+    12781 tcgtcatctt ttgtggcagg ccagccgcgt cgcggtacct ggccctcacc gcaccccaag
+    12841 aaccccacac ccttttctcg cagtgtgcgt gtgtgtgtgt gttttggaca tttcgggcat
+    12901 tatgtcgcgc ctactttttc cgccttgcgc ccttcgttaa atgagcgtcg tccgcgtcta
+    12961 gcctttgatc ttacgcttcg cttctaattt gattatgaac gcactctatc gcacgaggaa
+    13021 gtagtccggc atgttcttgg gctcacactc tagcactccg ggtcctttga taccctatat
+    13081 cgggggtaga tttctgatcg aactagaacc ttatgataat aggaagatct cagatatcgc
+    13141 tttaaagaca ggtatatatt tttatgattt gtaataacat caagggaaag taatgcatta
+    13201 atgcaataat ggtttttttt aaatggcttt ccattatcat aaacaaattt aaacatgcac
+    13261 gctgatgggt atctaagagt gccgatagct atctcagaat tccaccttgc taccattcta
+    13321 aacgtcttca ttttcagctc gaatgctctg cggctggggc agcactttca ttccactcac
+    13381 ccacttgcaa acacactcca agtccattcc gcttccactg atgttgatgc tgatgagtgc
+    13441 gcgccgcggc ttactcaatg acgcgttgaa agtgccataa aaaaggacat gaagacgtcg
+    13501 ccgggtctcg gtggcagccg gagatgggga tgtcagggtg gggttaggtg ctgacagcat
+    13561 tttaaatgaa atgaaaacga gatgctttca ctttggccca caagtaaatg tctcacttgg
+    13621 gtagatacct cctggccgct tctgttcgca tagattgcca aagtaagcag aagtaatgcc
+    13681 agcaaccgag aatggagtat ttgtattccg ggcggggtga aaatcctttt ggtaccaaaa
+    13741 aaaaaaaagg gttttgtgtg atatcccgtt caaaagacga ttacattcat atcaatgggc
+    13801 acagtaaata tacaacttta agtgtcgtat gctaattaaa aataagttta aagcttatca
+    13861 cagacgggtg gggtaagttg aactctaacc atttgtaaat tatattttat tattatattt
+    13921 ttatcgactt tggtgcttcc ccaaaaagaa cgacgagatt ctaacaaaaa taaagggtga
+    13981 tagattcgaa tttatacaga cacgtgacga tacgcaggac catcaataaa gtaacacaga
+    14041 aatcgtagag atgcgagcga aattcgaatg agtagttaac tcaaacgcag cgactgagca
+    14101 gcaataacag ctccaacaaa atgcccgaca gtcgtaaagg acccttgtaa gaacaaacac
+    14161 gccatgcata gctgatgaat cctccgaacg tgaagagtta tacgggaaga tgtgaggcaa
+    14221 accccttgcg caataaaaac taaattatat ccttttgagg ttcaagtggc ggcggaaaag
+    14281 aacaaaacgc tgcagaagca acatgttggc cagacgtgaa gtcacagacc gaccagaaag
+    14341 atccatccga agcacaagag atatatatgc atgtataaac gtataaacca accgaccgac
+    14401 agacgtcgcc gacccacaca ctatttaatt tagtacaaaa ttattatgcg caacattagc
+    14461 actcgtctcg gtttgcctgg gttttcctct cggcttgggt ttttatcgag tctgggtttt
+    14521 gggcatgggt cttttctcga gttcggagcg tgaggtgcct caaatgaggt gtgaatttcg
+    14581 ctgtcagccc gtcattctca acggactgag gagaacacac aaaagtgccg acattggtgc
+    14641 ttcttctgtg tcggttttga atttatgtta atacagtcgc gctttgaagc cagcgtaacg
+    14701 ttgagcatcg caattgtctc aataccctcg gagtattcaa attaatcggc atagtctctt
+    14761 ataataaata gcagtgcgca tctgcaagtc tgaaaacata ttaaacaccc aaaagttcat
+    14821 tcattattaa taatcttttt cccttttttc cgtaccttgg ctcacatgga tctattataa
+    14881 aacaatgcat cataagctgt gctcttacgt tacggtttgt ctaaaaatat gtattcacaa
+    14941 cgcttgccat tccatatatc ataatcgaac agaacgagca gaggaaaaca tttgcaaggc
+    15001 acccagcgag catcagcaga acaacaaaca aaatatttat ggccaaacat aaaaatcttt
+    15061 atgaccttcc cgattcaatc agagcagagc tggagctggg cgagcacaac aactgagcca
+    15121 ggtcccaatg cgactacagc tacaacgact ggcgcttcct ggcgaacaca ggatatagcc
+    15181 atccataagg tgtttccccc cggccaactc gccctgccgc cctgccgccc atgtgtgttc
+    15241 gccttgataa agagaacgta aattatggcc ctatatatca tgggaaatct atcgacaacg
+    15301 ttgaattttt atggtctccg aaggaacggc gcagtttaac gagccggcga gagtctggcc
+    15361 atgtttattg atttactcgc taatttttca gctccggcaa agacggccga ggagtgcggt
+    15421 caatcaagga catccgagga ctggcccggg ctgagatgag agtcgatcgc ggcaggcagg
+    15481 ccattgtcct tgatttatgg tccgtcattc attgccagcc accaaagttg acacttctgc
+    15541 gtatatcccg gagttcgtcc tgccattgtc atccagtcac tcccctcgat gtcgtcgtcc
+    15601 tgcgtccttc gacgtccctc ccgttgcgct catcctgaca ttttgacaaa taattttatt
+    15661 gatgctctgt tcctcgccgt ctgtgtttgg tttttaattg tcgctcagct gcgctggacg
+    15721 cacgtgataa ttgctcattt tatgacgcat ttatcaaatg ctcggccttg gtctcgtcct
+    15781 tggcattggc atcggcatcg gcattcggaa tgtcctttgt tcggtggtga ttttctgcat
+    15841 catgctgcgt ctgacagttg gactagcgta ctcctcctgt tcctcttcta ccttcccctt
+    15901 caatcctccg cagatgagac agtcacgcgt catttatttg gccgtgacat ttccactccg
+    15961 ggttgccatt ttgcattcgc gggaattagg aatggatttg gaaaaactgt ccgaactctt
+    16021 cggttcaagt acattcaatt ctgcattcag cagatggtgc tatgagctat caaaatcaat
+    16081 aagctgtgaa actagatgga aattgtatgc tttttaaaat attattaatg taaaggttat
+    16141 ttgtataaag acaaatattt accattccta gaactttctt catcatccgt acaactttta
+    16201 aaatctatat ttccttcgaa taaataatct attctcatag atgaactttt cgtgatttat
+    16261 ttgttggatt aaacctttat caataaccgc cgtatatcaa caggacatct ccgccctctg
+    16321 gacccacagc cgtcagatcg gatgttgtag gcaattaata cttaataccc aaaatggcaa
+    16381 taaaaattga tgccatcaat ttatcatttt agaatcctgc acgtgtcaac tcgtaggtat
+    16441 ttccactttt tgtgtgtgtg tccattgttc caatatttgt tttttttttt agaggggagc
+    16501 attggcaacg attaatttgc tgtattaatc cagcaagcga tgccgcacgg ggggtgttgc
+    16561 aaacacaacc cgcagttggc aaaaagaatc cccttttttt ggtgcattac cgtaaaattt
+    16621 caatgtcacc attattcttc ttccttttgg gcaactatta cgagataaat cgttgtcgat
+    16681 aaattatcga caggcagcag gcggagctcg gactcgattc agctgggtag tcgcagcggg
+    16741 acaccctctg caccccatcc ccttcataga gtacgggata cattttttag ggatcctgct
+    16801 tagttcgctt tcatacggtt ctattgaatt tattgcagtt tgtttttcaa ttttttaaat
+    16861 ggcattttcg cccttccgcg ttgggaagtg agttatggtc ttgctcgatg gcacgtagat
+    16921 gcattcaaat cggaagtaat tatggtgaaa cataagcgat atgagggaaa atagctgaat
+    16981 ggcttatcaa gcgtgaacaa ttttgaaatc acttttcaat gagggtgata tttgtaaaaa
+    17041 aaaaatatta aaaaaaaaaa aacaaataaa aaagtagcac atcaagtcaa aggcacttga
+    17101 actagcagat gagtcacaac gaatgctcag atgaaaacca aatctttaga gtaattaatt
+    17161 tcatggtaat ataattcata tattcccatg ccaaacaggg taaaatatgt cggccaagat
+    17221 aatagctttt ttaagactac gcacaaggat cttcagcagc aaaataaagc aattatcgtg
+    17281 ggtcggtttg ggagttcttc tttgtgcgta agtcaactaa aaatccgcag acagttaaac
+    17341 gactttccct taccagtggc aaccacttca cggatatata tataccttca tattgccgga
+    17401 gataatgtca aatttattgc attatttttc gctcctttcg cccgccagtt gctctgcatg
+    17461 cgtgccaccc actcttgccg caggcaaacc atccaccacc cgccgccacc cactggtgtg
+    17521 cataaatcaa aaggtaaatg gctgcatttt tatgactgcg ccgccatcgt cgtcgttcgc
+    17581 cttttgcgct gcttcgcttt tatcacacaa acatttctgt gataataaat gccataaaaa
+    17641 tgaagaaata tgtggttaat tttatgtgct gcctctgcca ttccgccgct tgttgttctt
+    17701 gctcttgttg ttgtagttgt tgttgtggca tccagtatac ttccttttta tattccccat
+    17761 ataaatgttt tcgcggctac aaatttttcc cactacattt ggatatctct cggcgctttc
+    17821 ctttacaact ttggcgaaga tatcgagagt attcgaattc gaaactcctt atagttgcat
+    17881 aaaacgagat ttttagtctt aaaggactct gatgcactat ataactagaa gctcaatata
+    17941 gttttttatt tcagatctta ttattgttaa cctaatagaa ctgctgcatc tttaactgcg
+    18001 taccttgtaa tttaaattac gctttttttt atatttgtaa tcttcgctac acgacctcct
+    18061 tttgaataaa atgaagttat caacgctgaa atgctgcagg gtgagcatta gtctgaaagc
+    18121 aaatcgcttt agtcacggta tcaaggtgtt ccgcacttcc gcttggcaat ttcttcggct
+    18181 tcgtgtaatt tataatattt ttgtattatt tatatgacac ccccggccac tggtttgaat
+    18241 ttaccttcag cccactcccg caatttgcta gtaaataatt cacatacagg agcagcgaat
+    18301 ttatgacgct gacaaagtgg ccataaaaaa taacaaataa taagagaaaa tgctatggaa
+    18361 cgtgtgcgta aactctccaa ctctgcagca gtatacatgc atatatacac gtatatacat
+    18421 tattttggca acactttttt cgctttgttg cacgcaaaac cgcaaattat gtctaagccg
+    18481 tatcgtaaag tatcgcagca cgctcttaac taataaatcc atcatttcgc cgaaggccga
+    18541 gagtgtttcc tctacctgtt ttagcctcga aggtatttcg ctgccgggtt tcggcttagg
+    18601 gattcccaca gaaaaatgtc taaacactct aaatcgggat taaaagtgtc gcttctgggt
+    18661 tttcggctta tcgtccatat ttatacagat tttaatttac ttgccagggg ctttgaaggg
+    18721 ttcttaaaac tcagcttggt gattaggtaa catatcccaa gggccagata ttccaaaatc
+    18781 ccctcggggc ttgagttaaa catcttgata catagtttat ggttagtcca accgatttgg
+    18841 ttgggattac gttggagttt taatatcggt taggtaaagt ggctagaatg atctagatca
+    18901 acaaaatcta aatgattagt cacataatat tttctaacta cttaaataat agtaatcaat
+    18961 tatgtttaga aggagctagc ctattattag ctcttaataa tttaatataa atcagcctcg
+    19021 ttttgcaatt aaacattatt agtattcagt aattatcttg aaaccaagca acgtgattaa
+    19081 cgcgatttcg tgcgaaacgt aaataagtaa gttaggcaag tgctaactgg aaaattcaat
+    19141 ccaattgtag agcgattaaa aatgcagcgg agttaaccaa accattaaac tattaccaac
+    19201 tctctgagtg caacatgcca caacacacat tcgcttgttt gatatgtgca gattaattac
+    19261 acacacacca aggaaggagg aaaaattgct tggaaaaact tttcgacgcg acaacaaaac
+    19321 aacgatttcc acttaaacag caaaacagga gccacgaaaa aaaagaggga aaaccaaccg
+    19381 cagaagttaa tcgaaaaacg ctaaacaaag gccacaacaa cagctgtgcg agtgagcggg
+    19441 tcagagggag agaggtgaag tgccagagcg ggagagcgac aaacaaatta atttttatat
+    19501 gacattaaaa cgagcgaagc gaggcgatgg aaatggttga agagtgtgtt taagtgttga
+    19561 gtggtttgct tttccgagca acaaatctcg gtgcgaagac aatgaggcct ctgccgcgcc
+    19621 gctgccgcca ccactgcagc gctgctgctg ctgacagccc catcaagaaa aagttccccc
+    19681 agagagacac agagagggag agtgagcgtg acatatgtgc caagataaaa ttaatgtgcc
+    19741 ccccaaagca gccacaacaa caacaactca ctgagcggcg ctctcagcgc tcaactgttg
+    19801 ctctctggac gagtgttgcc gtggtgcagc catattatta tgattttaca ggaagccaaa
+    19861 tcggagagcg agagagagaa aacctgtctt ggcttccatt gcaaggcgca tgcgattgtg
+    19921 attggacaaa aagtacgcca tatttgaaaa ttacgcacag tgcatctcgc tcgcatgcag
+    19981 caggcggtgc tcctctaacg caggcagcaa gccaaagtgt gcagcgctgc tcgccggaga
+    20041 gcgctctctc gcgagtgagc gaacgcttcg ctgcgagggc agttacgctc gctgccgcag
+    20101 tcgttgaatt caattcgaat tttgacttcg cagcgttcag acgtgttcgg aaaaacctga
+    20161 aaaccattcg agtgacaaga gatataacca acaaaaataa gaaacctcag tcaactgtgg
+    20221 tcactgatcg agagtgtttc gtttcgagtg aaaaagtgaa atcagagcca aaacacaaat
+    20281 cgcttggcca gcaaattgaa atagtgctac agagataaat tctctgctca aaactacaag
+    20341 tgtgttaaat cgccgatttt ctttggattt accaacgttt gccgacgcat ttctgtggat
+    20401 taacctcatc gtcgccgaag caaagctacg catttgtagc cttgtctcgc aactcacgtg
+    20461 gattaccctc gagaaaacac actaagcaac tgatatggtt gtcttggaag gaggcggcgg
+    20521 cgttgttacc atcggtaaca accagtacct tcaaccggat tatctggccc cattgccaac
+    20581 gacggttagt attgactaag ttaaaagctg taaaattttg ggatggccgt aaaatctatg
+    20641 gttttaaatt ttgcgtactt aagaattaaa tttggaaagt tcataacatc aaacaatttt
+    20701 tcaaatctaa aaatttatat cgtcattaat actttcacga acaaactttc tttggaagaa
+    20761 attgcatttt ctcatagttt taacttctca ccaaattaaa atcaatatta tcttaaaact
+    20821 cgtaaaaaac tttaaaatta caaaaaacct aattccaact ccacattttc attttattca
+    20881 taacagatgg acgccaaaaa gagtcccttg gctctgctcg cacagacttg ctcccaaatt
+    20941 ggagcggatt cgtcggcagt gaaacccctg ctggcaatgg acaagaacaa gacaaagccc
+    21001 ggagcatgct cctcgtcgtc gaactcgtcg agttcctcgg gcagtgctga gatctcggca
+    21061 gccaagtcac cctccggcca ggccaaatcg cccaagtcga gcactccaat cagctcgacg
+    21121 gcaaccagcg caagtctgag taataccagc accggtgaaa tcaaactggc cttcaagccc
+    21181 tacgagacca atgtgctgag ccaccagaac cagaactcct tcaagagcag ctcttcgctg
+    21241 gatgcggaac ccacgcgtcc cagctctaag aactcctcat ccgcccagga gcgtgtgcca
+    21301 tcgcgcagca aatcaaatgc cacgcccaca gatggcggca aggcggagat ttccgcgcac
+    21361 gattcctcat ccagccgaaa gactgtctcc ccgtcgggat cgtcgcaacg cggcgccagt
+    21421 cccattgtgc gctccggcat ggaggtgctg aacaatgcca acggaaccgc ccagcatccc
+    21481 aaggaaatga gcagcatggc tgcggcggca gcagcagcag cagcggctta caaggccgcc
+    21541 ggaccctatg ggctgaatcc cctgtccgcc ctgtgctgcc cgcccggaat ggagcagcac
+    21601 gccaatcccg ctttccggcc accgtttgcc ggaggattct cacaccatca cgccgccatg
+    21661 ttggcagttg ccgccaatgg aggatatcca ggcggagctc ccggcggtgg accagccgga
+    21721 cagcccaatc cgtacataag ttatcagcgc atcaagacac cagctggcgg cgaggctata
+    21781 gtgccagtct gcaaggatcc gtattgccag ggatgtccct actcggcgca cacgcaacaa
+    21841 atgctcatgg gtgccccctg ccccgccgga tgcacccagt gcgagcatca gaaatacggc
+    21901 ctggccatgg ccagtgccgc cggactgcca ccagctcatc catactcgca ggcagcggcc
+    21961 gctgctgcgg caaacgcggc ggcagcccgt tcggcaccct acgtgtgcag ttgggtggtg
+    22021 ggcgatgcct actgcggcaa gcgattccag acctccgacg agctcttctc ccacctgcgc
+    22081 acccatacgg gcaatctctc cgatccggcg gctgctgcgg cggccttggc ccaatcgcag
+    22141 gctcaatccc tgctgggaac cctatttccg ccatcggctc tccgagctgg ctatccaaca
+    22201 ccaccactga gtcccatgtc agcggctgcg gcagcggcga ggtatcatcc atacgccaag
+    22261 ccaccaccag gcgccttggc tggaggacct tcgcccttcg gcgccgctgg agcattcaat
+    22321 cctgccgcgg cggctgcggc agctgctctg ggaccctact actcgccata cgccatgtac
+    22381 ggccagagga tgggagcggc tcatcagtaa agtgacttcc acaagagtta aataccaaaa
+    22441 aaaaggggat tacgttgaag agacaaccag agatatagaa tgaaaccatt tccaaaccct
+    22501 gtttgagata ctttcatgga aaacaaaaaa gataaaagca caaaacgatt gcaaaggctg
+    22561 acacatctct ggcttttgcg ctattctgtg atactaaact gtagtttaaa gagcgctagt
+    22621 tacacttaag tttcgcaaac agaaaataat caaccaccta aagaacagta gtagccgcaa
+    22681 aaatagcagg atcactctct tctatcatcg taaatgtcta cattgttagg taaaaagaag
+    22741 tgcagccaac acgaagttca ctcactctta tttgttctca aagttcaacc aaagtttttt
+    22801 aagtcttagc cagaacacat tctacaactt ccaattttgt taatgaagaa cccattgctg
+    22861 agatgttcaa acattgtttt agtttaactt cgctttaagt tctattaact attattacga
+    22921 ttatctgcaa gcatacaaca aaccaacaat tgaaataatg ttctttaagt caacttatat
+    22981 ttaaagtaag tgtaatgaat atttgtaata ccaaccaatc agagataaaa aattaaaaaa
+    23041 aaataataat aattacgaaa aataaaaatg aatatttcat ataattatgt actgtgtgta
+    23101 aattctataa atataaacta gatttgctat ttactccttg atgtacgcaa accacaatac
+    23161 gatctacaca agaagccacg gaatatcaaa caaaaataac ttaaactaca tgcaacacat
+    23221 tcgtataaaa ctgagaaata tacaagaaaa aacaattttt agaagcataa accacgaatc
+    23281 ttttcttttg aaacaaactc ttttacagct ggaaaacttt ttgttgggcc ataatttcgc
+    23341 ttcggttatc tgcgctgttt ggctagctca catatgggtc ataaatattg ctgataatgc
+    23401 tccactaggt tttcaattat atttacataa ggctgagtgg caaattccgg aaaccataac
+    23461 aaggtctaat tctcttaatt ttatagttct tacttagaaa tgggactaaa ggagttttta
+    23521 attctgagaa ttgaactgct tcttataagt atcattgata ttaaatgtaa ggttctaggc
+    23581 aaataaataa ataaaatctt ataggttatt ctttaaaaat attttaatta gttttatttt
+    23641 agacaaaagg tattttaatg tgctctcaat tttttctgga tgcactcgaa gaaatctggg
+    23701 tttccccatc tcacctaaca catttgaaat tcaaattagt actattaatt tgcattgctt
+    23761 gttgcatttt aagcgcactc aatttgtatg cagcgattcg tattcagcct aaaaaatcaa
+    23821 atcaaaagca aatttcaatt tcacgagggg tggggtaaat aacttgattg ggcaagcctt
+    23881 aataaaacag tcgccggcaa cagtcataaa acacccaagc tgaaaattaa aaatcaataa
+    23941 aaatcacatt tgtaccaaca atgtgtataa caaaaatcaa ttgccattca atgagcaact
+    24001 caaatgcata tataaatgaa tatgtgtgca tatgtacgta tgtatgtaca tataaaaaaa
+    24061 tcgctgacaa agcgaaatga ctaacattat tcacaaaaaa tatgcttcag aaatatcaaa
+    24121 atctgaaacc aaccagacaa ctcaaactta agcaacaatc agccacagca acaatgaaaa
+    24181 tcaatggtga tatttcgagt gtatctcata aataaattaa ttgtgacaat attcaattga
+    24241 ttcgatttga aattatcgca ttcaactgtc aagcgatcaa cgacaacgtt ttttcaattg
+    24301 tttcaatttc cagcggattc tttcaatgtc tgttttcatt cccccttcaa cccacttgaa
+    24361 tgttgtttct gatttgattt tgattcgttg cgaacacagc cgtttggccc caatacaaat
+    24421 gtgggaatat gatattccgc agggtcgggg ctgccaaggg gtagacttat caatactcag
+    24481 caatcagtgt tgtcaaatga agtggcgaag tgcgagttct tgatttcaga atgctgaggg
+    24541 atttcaattt tgtaaagcat ttcaaagaag tgggagtgct ccaatggtat gttttatata
+    24601 ttcagcaaaa caataatgca ataaaagagc aggaaatcag aaatcaaata aaataaacca
+    24661 tgtatgcaaa aaccatttat tgaaacgcct cagtaggctt agtatagaaa atttaatatc
+    24721 ttcctctgtt ttccccgaac tttggatact tttaatcgtg ttttgattcc gttgaaagct
+    24781 gcgcttgtag ggcgttttga ttgttgttgc tgtcgttcca aatttaattt atttacatgc
+    24841 cacgcatttc tttgttttac cctgctgtcc atgttaaaca atctgttttg gatgtcaaca
+    24901 ctcgcgcggc gcctccgaac ggcagccata aaagcaaaat caacttgaaa ggcagcaaaa
+    24961 gcagcggaac accgaaacaa aaaccaaatc aacaacaaac tcagcaactt tttgttaaaa
+    25021 atagtgtgtt tctgaatggg gagcaaatag ctccaactct atcgttttct cgccaagaga
+    25081 tcttgcaatg gactgaagtt ggcggcttgg accccgttgt ccaactggct cgttggtcta
+    25141 ggggtatgat ttccgcttag ggtgcgggag gtcccgggtt caaatcccgg acgagcccaa
+    25201 gtcaaaacaa ttttttactt taatttttat ttttagttca atagaagagt tgaggtaaca
+    25261 cttgtttctt gaaaagttta attcttatgc tggattcgct aaaacgtaga aggtaacgtt
+    25321 tgcgacacta taaatggcag aatttgtcgc ttttctttct ttttgtgtta aaatacagag
+    25381 attcaaaaag aaaaattaaa tgtttggacc atgttctcca atgaactaat aacctccttt
+    25441 aggtactgtt attcaataaa ttcattaaat tattcttctt catacaatgt atggaataca
+    25501 atacaatttt aaagcacctc tgtttccatg gacttattgt tgcatttagt accagccatt
+    25561 tgaactatca gaagttgagt cataattatt aatactctac attaatgggc ctgtttagag
+    25621 ccataaatat aaatatatag acggcagagc ttataccatg gcaaaacctt tatggtatct
+    25681 tgtgactttg gggctagccc ctgtctaaat ttatgagctc gtggaaatta agtctaatca
+    25741 gtggttgtga tttctttcaa gaggtttaaa tgtgtaaatt tcgtcgatgt atgaaaacca
+    25801 ataaagttcc cttgcagtta aatgaagtat tacatttgaa ttaaatacct ctggtttatt
+    25861 tatataaata cccacttctg ggtattgttc acccctctag tcagtaagag tatttgaatg
+    25921 gctgccttac gaaacccatt gattgaaatc ttcatttgtt ccttttgtga tattggttca
+    25981 ttatttattg gcacctcaga actgacaact tgtgggatgg ttattgctac aaagtcgtaa
+    26041 aatcgatttt attgtccgag acgtatataa agatgttcaa gattatgcga catgaaaagc
+    26101 atgtgtgata atttacgatc aaaatagaaa tgtggaatat gacaaaaata gtaaaaaaaa
+    26161 ttatattgtt aggaactaga actagtagaa actattttta tttatttatt aatcgagttt
+    26221 cagaataatc aacatgtttg cagccattct gctaaaacgg acaaccaacc gcctataaat
+    26281 aatttataat agccgactga gattgcaggt ggattatttg tggccctata caatgtcgac
+    26341 agcggttcag tgctgtgatt ttccttggca gattgatata ttattgcact ggcaagttga
+    26401 cgtttaagat tcggtgattt attccatgat ttatgaagtg ccgtttgaga ttagccagag
+    26461 attagcattc gcgaactttg ccattgtaca actgatagta aataatacac accattcatg
+    26521 gctatttatg cttagctttt tgtaatacaa attaatttcg cgttcaagtg attacgtctt
+    26581 cgacgaattt gcaacaaagt caagtttaaa aagcgactgt gcagataaat gtcgcagtat
+    26641 ccaaaactag gggacttgtt gccacattcc cggcccaaaa agtccggaga cttgttctaa
+    26701 taggtggcga gacccactcg catgttggcc ataaattaaa cgcaacagca cagcagctga
+    26761 ctgatgtcag tgttgtcgtc gcagctaatt aacagttttg ttataaatta cacccagagc
+    26821 cattacaatt tttcgtattc tcggctcaag ttgccggtct ctgagggcgg cacacccatt
+    26881 tggggcgtca atcatgcgta aagtgacaca gatgtgacat ccagcggggg cccactgttg
+    26941 gtaagtgact agcggataca tttgtatctt taagtctgtg tgcccaatgt caactgtcaa
+    27001 ctccttaata attttccgta gtcttatttc ttatttcttt tttttttctg tgaaattcac
+    27061 gcccccgctg cgagtcggtc cagggtccct cctcttcgaa tatcccatgc tgtgttgtga
+    27121 cattgttgct gctgttagcc tgctttgttt taatatttat ggcttcctcg tgtcattttc
+    27181 ataatgttgc gtttgacact ttccctctgg gctccaccaa cagaaatcac acatacacag
+    27241 cccttcacgt tccgtattat cttacattat atgggcggtt gcctgggatc gcccatcttg
+    27301 ttcatttagc atctcagtct ttttttcaga ttttgtgtaa aatgtttttg tgaaatgaca
+    27361 tttgtagctg acacgacgac aactgtcaac gggcactcgg taggcattta tttcggattt
+    27421 gttttaggcg tggtaaacaa gatctatgta gtagtagaag taaggtggcg gattggaaat
+    27481 ggttgcggtt gaggcagaag ttgaagccgt ggcttattgt catgcatgaa ggttttaaag
+    27541 ccacattttt atcaattaaa tattttcatt aattttgtgt tgcgaacaaa agcgtagcca
+    27601 ctttgagtta tgtaccccca accccttttg ttttaagcct ataaagatac tacaaagaac
+    27661 gcaaagccat aaattacagc cgggaaacag ggaataaaat atgatatggg aatggaaaaa
+    27721 taaaatatac gtccattggt atctataaac aaatatatgt aatgaaaaaa aaaatatatt
+    27781 aaatctcgct gagaatttat gaccctgcaa ggccttccaa ctgaagccgc ggcataaata
+    27841 cttcagctgt gattattatt gcacccagcg agccacccaa aaggaatgcc aggcagaacc
+    27901 tggctaaaaa gccagcggaa acaactggaa agcgacaaaa aaaaacgaaa taccttgagc
+    27961 caagccgatt ttactgtgcc acacttcaaa ggccaaccgc cgacattgtg tgtttttcgg
+    28021 gggagcttgg cgctaatata ttattgatag atatatcagt gatatatgtg acactggtca
+    28081 gtggcgggct ttgcttacgt tagatctaga ttgtaaatta cccgtggcaa gaaaatgccg
+    28141 actgtccgga agggtttgtt cgcctggcgt gggctgattg gtaaaatcag gtcactggca
+    28201 gaaagccgtc agcggcaatc gccatacgca gataacttat ttatcggaaa tggctgttga
+    28261 caaatccggt aatgaatttc tacgatctcg cgtttaatcg atgattcgtt aaatttattt
+    28321 cgttttcttt gtactttaat attaattatt aatgtcaagc tgtcagatta aagtttgtgc
+    28381 gacgaggaaa gtctgcagtc gtagtcgact cagtaacgcc ctatactagc ttttcaaagt
+    28441 gaagagatat tgtgtgatca tgggatgtgt tctactaaat tttccataaa gaagaatgaa
+    28501 tgggtatata ttccagtgcg cctcaacatt tttaagtgca ttcacatacc ctttaaggct
+    28561 ctgaaataag ctttacagcg aagcctcgaa agggaataaa tatttcatgg catgtcaacc
+    28621 gataaccgcg attggccaaa gccgcctgtt aagtgtgata tagtcagagt cagaagttgc
+    28681 taaatatggg cttgacctgc tgctgtggtt gttgtggctg ctgccgctgt ccgatggcca
+    28741 cattattaag ccaattcagc ggcagcagat aaataaataa aatttgaaac attgccgcaa
+    28801 caataaaagg cgacagtaca cgatttatta aacggagtgg ctgctgctgg gttgcccggc
+    28861 tccaaatgaa gaggcaaaac acaacaccgt cagcagcaac agcaaaagca acaagaaagc
+    28921 aaaagcaacc gaaaagcaaa agcaacagaa aagcaaaagc agcagctgcc accgtgttgt
+    28981 tgctcacatt actctttttg gcgtggccca ggctggcagg cagacagagg gaaaccgaca
+    29041 gcagatacaa agatacagat acattcgcta tatagaaaac acacaatact tgaggttgag
+    29101 gccaaaatga ctaatgacca gcgattatca agcgataaaa ggcaaagaga gccaccagca
+    29161 acaacaaaga taatttatta ttgttttaaa catttgtgtt ttgcgctcaa acgagataca
+    29221 tttgtctcct ccatgccgtt tagaaacctt ttcggctttt gttcattaag ccaatgatga
+    29281 aataaaaata ccataaaaat ataaccaaag aaggtttgcc taataaatga atacccctta
+    29341 tatataaggg ctcttataca atatattgcc catatgtaaa acgtatttta ttcattctga
+    29401 gcggcgcaac tatttagcat atgtttttgg gagcctttgt ggcaacctgt aattagctgg
+    29461 ccctctctac agtccgcttt tttcatttct ttttttttta ctaccttttg agtgttaagt
+    29521 tttcgttggc taccagaatt ggtggcggca aaggcagaag ctccacagaa ccagacaaca
+    29581 ataataaact cgaagattca tgaaagcggc aaaaggaaaa taaacagtca acgcgacgta
+    29641 aatttaagac ttttagtatt tcatttcttt tgtttctgcg cttccctaac cagccttctt
+    29701 cagactacta tcaagccgaa actatacaaa aaataagcgt gcttttgaat ataggaaagc
+    29761 aacataccta ttacctaaaa tatactttta ttaggctgtc ttaaaattac atagggctca
+    29821 tacatacagc tgcggttaaa ataatagcac taaattaaac atacataaat aatatgatcc
+    29881 aaaaagttgg cacatatcac aatatggtgg ctcattctat tgccgtggtt caatatactt
+    29941 catctatagg gtatccacaa aggggtaaaa tgatcttagc ccgattgtta ctggtagcgt
+    30001 tgtaaatgtg aagtggtcga aattaaaaat atatttatga cttaagtggc aataaattat
+    30061 gtttcttgtt aaaattctat tgtttcagcg tctgctgggc tgttaaaatt tgctgcttct
+    30121 ggctttcttt ttcaccctct gaatttattt attcttttgg ttttgcctaa attattcact
+    30181 tcgtggtgtg gctacataaa tttgctcaca attgtgcgct tatgacaata tttatgactt
+    30241 gctaatgaaa attttattta taattttgta tgctgttgcg gatgcgctaa atatactttt
+    30301 taaaaatatt cacggcttat atttctactg ataacaaaat tattatagtt caagcgaaat
+    30361 catgttgcac atgaacaaaa aaaaaacaga ttttaaatat gttttcaagt gtcattatct
+    30421 gttgtcaaga ctgtttgtcc attggtgtct gaggtcatct tgagtgattt gttgttttta
+    30481 cactttaagg ctttttcgtt ttgattttat gagcctatta atttaagaac actcttttcc
+    30541 aatgagtaca tatatgaata tatacgtttt attagaggca caaatggaaa acgagaattg
+    30601 gagtacataa attcaatatg ttttaattaa gagtattttt cacttcgtta aaatgaaatg
+    30661 gtgtgatttc gatttctaga caaaacaaca ctgcacagat tactaagtca gtttccgttg
+    30721 gcatttttgt gggcactgga ataaaattga ttttataacg acacaattat gttggtctaa
+    30781 tctgaatgaa gagcaattaa aatgaaaatt tatggcccta atgtgccaca tactcagcga
+    30841 gttcaagagc cgactcggct gtcatcagct tattatattt tatttgagtt tacgaggtct
+    30901 gccagattca agtgattata ttttctgtgg ccaattaaaa atttgtttaa ctaacaatat
+    30961 ggacttttac ttaagttttt acttatgtaa acagcacctt cggaatcatt taaaaatagc
+    31021 ttgtgctttt ttgtgggcag tcttatgata aacaatggac tgcaacgtct gtgttttctt
+    31081 taataacatt actggtatta ggctactaag cttccgagct aagataataa acaaaatttg
+    31141 tgaggaattt aaaagacaaa cggcgcagga aaacaagata atagcatcag ttaagtctcg
+    31201 tgattgtcgt gccgccccat gacaaggtct ttcgttctcg aattagagag gatcgtctaa
+    31261 atttagtagt aactcagtaa gtttgttacc tgtcatttag ataacatacg tctgaaagta
+    31321 aatgacgctc atagactctt gaggtctaaa accagatggt tattgcagca ttatatatga
+    31381 tctctattta cgtttcattt agttggcaaa tatttgaaat catggctgtc gcctaactga
+    31441 cataattatt tacaaaaaaa aaagtatgtt ttttggggct taagcataaa aacctcaaat
+    31501 aataataaga cgacttcaaa ttatttaaaa cgaaatctgt agtcaagacg tgtctttaga
+    31561 tagctttaag tcttcaccca acccaaatcg gcggtcagtc acagctgtcg ggctttaatg
+    31621 ccgcccacca tttggccaac aaggctggat ctgtccgcta ctttggcagt ggcactcttc
+    31681 tcgtggccat ttaatcacag tattaactct tttgctgagc cgcctgtcac tcatgtctct
+    31741 actggtcgct cagtgggggt tgtgggacag acgtcgatgt tataaacact actgtcttcc
+    31801 agctgtcaag ttggtctaag tttttgagcg caaaataaaa gccgacttga cagctgaacg
+    31861 gtcgaccaac tcatgttgat gatgatgatt attatgtatt ttgtattttg tatttttgcc
+    31921 gtttctcgtt ttactgccca atgacactct tacactgaga aaatagccgt tgaaatatta
+    31981 catgatcatg attataacaa atttagttct gaaattgaat gttctctaat caacaaaaat
+    32041 gcacacatgc ttaattaagt tgaagttggc gcgtcgtttc tctcagtgtt ttgaggcttg
+    32101 gccggtacag gcacacacag tcgcccatgc acattcgttt aggcttttaa acgcttaggt
+    32161 cgtcttatgt tggacttaac gcgctgcccg atgccaaaag ccaacagttg gacatctgaa
+    32221 gacgcctcaa aaactcgatg gcggcggcgg cggcgaaaca tgaaatgaat ttggacgcgc
+    32281 ttcggataat gaaaacgaaa cgaggcgcgc gcgcgctcca caaaacggac agaccaaaaa
+    32341 caaaactgta gatggaaata acaccgaaaa aagtcaagta aaatcccagc cgacacaaag
+    32401 aataaaaacg tcaaaagtga ttttgcgctt tggtcagcag cagcgacagc ggcgaagcga
+    32461 aatcaaaaag tgccgccgcc gtcagcactt cccccctaaa ctaccaaccg ccttttggcc
+    32521 aacaaaatgg ccagtgtcca cagttaactg ttttgtacga gccgttgagc ttttgggcgc
+    32581 ttgggaattc tgacgtttcg tttttcggtt ctgtagcttt ctacgggggt cttcttattt
+    32641 tttaaaccaa tttggttatt tcggcacgta ctcttctcag ttgttactta cagccacgaa
+    32701 aacttattcc agggattttg atatagctcg ctacgctaaa aataatattc aatgtgtttc
+    32761 cctttttctt cttaaaacaa attgaatttt cttaaggtgg aactcattgc aaccaataga
+    32821 aatcgataat tgcgccaaca gagttcacag ctgggaaaat tcaattagtt tcccaggcaa
+    32881 tgaattcggg ttagggcctg tcattcccca cgcacacact aactcacgca aaactgcagg
+    32941 tcttgttctt cgacctcttc tcgccaaatg cgaacctgta aaaaattgtt tataaattat
+    33001 tgattaaatg acaatgcgat gtcagagttt gatttatggc ccggcagaga gacacgcaag
+    33061 cggctcagcc agggtgtgtg agagtgggca cgacagcgca cattgaaaaa ttattaagca
+    33121 aacaataaaa gatggccgac gtcccccgcc gaacgattta tgacagctaa ttatatacga
+    33181 gatggagcca cggagctcca agaaacgctt ggcccaaaag ttacacaacg gccaaaaccg
+    33241 aaaacgcaat gcagaggggg agtgtggcca ccctatatat tgccgttttt ctcgtattaa
+    33301 tttatcatcg aaaaaaggcg aaacaaaagt gacatcgagt cagtcatcaa tcaaaaatta
+    33361 cgaaatgacc aaaagaaaag tatgagaaac tttgcaataa attatcgggt aaaatagggt
+    33421 ctcaaatttt cccgtggttg ctttcaatta gcggttttag cactgttaac ctttaattaa
+    33481 aacaaataat atgctaatta cggggaatta atacgccact tcagatatca agtttctttt
+    33541 tttttagaat aagtttggta cccgtgaaaa cgaaattcat taaagcaatc tcatactcat
+    33601 tatggaaatt tttcctctgc accttgtggg gcaatcttat cacacctaat tatgggtctt
+    33661 acctcaaaga acttgttgaa tcagatgtgt ttttcggggc ttacaggcaa gatcaatggc
+    33721 actaatggaa tatttctagt ttaaaagttc ctatgtgaat tgaaagtaaa tccctcttac
+    33781 atcctttaag aaccccaacc gattgcagtt ttcgttatca gttcttcgga gctgtctgat
+    33841 ataacagagt gacattatct tgaatgaggg ctctgtgcgc cataagtcgg aaacaaatcc
+    33901 tacttaacta aggtctttca agtctttgcg gatttaacta cgttttgtca tttagtacgt
+    33961 actcgtagta caagttaaag taaccgattt tcataaacag tcaaaggggt tgtatggcat
+    34021 ccaagatgtg actgttattt gtttttactt ggaatacagt acaccttcgt atttactcct
+    34081 ataataatca gctccattta tttatgatac gagagacggg cacctcaaaa atctgaagta
+    34141 tctgccatat ataaagacga agcggacttc atgacatatg attgtgtcgg tcgcgaggtg
+    34201 taaataccgg ggattatgct gctgcgaaag ctactgggtt cttagcaaat aggaatcaga
+    34261 ggcagccact tggataacca ctttcgtaac ccttcttttc gtcatcatca tcttctgata
+    34321 taacaactgt gctaatattt gccaaacatt caacgctgtc cgggcggtac atttcgtatg
+    34381 atataagcca tgttggttgc ttttcaaaaa attctttata atacgtctct gccacggcag
+    34441 tgtttatcaa gtgtaacacg ctgtgtctct atttgttttc caacactcgc tgtctgagca
+    34501 aagttgatta cacatcccgc tttatggcaa acaacaatcc gagccgtttc tcctcactct
+    34561 tgacatacaa taactgaaag agatttacag ccgttgggcc ttgctctatc tcggcgtgta
+    34621 cccgtaattt ccagttacca gatttcactt ttttatttgt gtgttttatt taattttttg
+    34681 ttggcttgtt taaataaatg gaaataaatc tgcacgtcgg cctggtggca cggaattgat
+    34741 ttacaccgta taatgcgcat ttttctcgtg tttaacagac taaatgtctg tttataagac
+    34801 cggggcaagc cgactgggta cgacaacggt cgggtgttgt gttgtcgcgc tataaaatat
+    34861 atttaacaaa ctgatttatt gcattttgat aacctctcac tgggtcgggc cattgcctca
+    34921 actcgccatc cccgcttcat attcgtcgtc taggagctgt cgtcggtcct cggatcccgc
+    34981 ggaccagaaa acctaatggc atcgagcaca actgtgcaaa atgtcaagcc acgccagatg
+    35041 aagaagaacg cgttttataa ttgcctggta atttatgagg cactttttat atgtgtacac
+    35101 ccacacggac atttacagat ttatagaatt ttattacggc agagtgtgcc tttaatgatt
+    35161 ttgtttacaa atgaattggg gctttttacg cacttggttt atttaatttt acaaattatt
+    35221 gctaaggcgc cagaacttaa tgacgtaaat atatagcatt tcaatctatt tagcatgctt
+    35281 gaggtacata aaaattcaat ggctttaaag ttgaaattta tttctgggta aaccccaatg
+    35341 acataatcta gagcgtatag ttctcattta attgggacat atgactgtca gacttttctt
+    35401 tttttcttgt ccaaagatta gccattttga catttttgtg tcaaaatatt tgcgcattca
+    35461 aatgctgcga ctaaaaaaaa aatgaacttt aaggcgactt gagcgatagg taaacgaagc
+    35521 cctcgacatt tgacgcaatc tgcaaagttt gttcacgtgt gacaaggtta attttattca
+    35581 taaatgaact gataatgctt gaatctagtg tgtacttagg aactagtatt cgtgattgaa
+    35641 atgacagaca gctggattga cagtcaaaca gttgtcagag gaagataacg acacacttgt
+    35701 tgtgcgctag aatagatact cgtagatatt ttgacaacga gtttttgtta ttggtatggc
+    35761 caaatgagga taagtccaat gcaagaacaa gcatttaaaa ttcatgacaa gtataataaa
+    35821 tacaaatatt tggttctata aactgtattt attttataaa tgtctgctca ggctaatccc
+    35881 ccatctcagc tcttttcggg taataagtga aacagtataa agtggagaat gacatatgca
+    35941 gatgtggatg gaagaataaa agagcaccac cttctgctgg ctttcagtca ttaagtcatt
+    36001 tacatatttg ccaacaaaat gtcagccata aaatataaac taaagcaact gtgtgaagag
+    36061 cagaacccgc acaaagtggg aacccaagcg cccggcaaag gcaaaaagcc gggaagataa
+    36121 atgaaggcgc agctgctgcg gctacatcaa cgctaattgc tctgtgcatc ttacagatac
+    36181 tcgacgggct gagcagctaa gatacagata cacggtgcgc gaggactggg agatatatac
+    36241 atacatatac gtttatatat acaaaatcat aattcatatt tcttcctttc cctcatttct
+    36301 tgtcgcttct cgagctgttg tcttcgcgtc tttggcggac ttgcgatctg tttgacgggg
+    36361 acggatggcg gccagcgaca gcaaaaatgt caaaatgcat taacatttaa ttgaggctaa
+    36421 tttaaggcga aatatttcgc ttggcatata tgaaaattta tgcgtctcat ttcggtcagc
+    36481 tcttcgtcag ttctttcgct ttcgttctgc ttttttctca gtctcttgct gtcttcttgt
+    36541 ctgacagagc cggatatgtt tgagaaatga gtgcattttg taaaaatgtc acattacatt
+    36601 taatttatgc acgccaacgc tgaaagccat ttaggtacat atctgtatct tttcagtgcc
+    36661 cttttgcagc tccttgcact ggaagtgtgg catgaaattg tcttgatggc gatgatgata
+    36721 ttgcttgttt gtttctcatc gcaatgtttt gtctgcgaaa atgatttaca ttcgagcgaa
+    36781 attttttgtg gtttcttttt gttggtctgg ttttgttaag aaacagttgg cacataagta
+    36841 aataagtatc tacatttaaa ggtattcata gcccatgaag tattctgcaa tccgtccgaa
+    36901 aacaaagtac agctaaatga aagaaagaaa ttaatacaat caacttattt taaagggaat
+    36961 atgtgcgtct actctcccac tgcttagaca ttaaacacca aatatgtttt aaatttacat
+    37021 agaaatcaat tccaatctgg gaacttgagc tatcaatcat tcagtcggct gaagaaagtc
+    37081 ctcaacaaac tctgccccgc ccagtgacaa aatttgacag gaaagctggc aggaggtcgt
+    37141 acgccgcccc actcgtcggt aagtgagtgt aaacttgagt ccggacctcg aactcggact
+    37201 ttggaccacc agctgccagg aacgagatgc ggctgtcaag ctttttaagc ccaaaacgtt
+    37261 gtcgacgaat tgattttgct cctctcagca ggactcttag ccaactgaca ctggcaggcg
+    37321 gtccttgcaa gagtgatgag aaaatgaaca ttttgtagcc aaaaaaaaag agatatatac
+    37381 aaaaatgtta tgtatgaaaa caagattaga atagtacaca attttttttg gtatataata
+    37441 taatatcata cagaaattta tcctaaacgc tgattatagg acttacacat atgcatttga
+    37501 ataaatagct aaaatatttt aaggtatttg atccgactta aatcctgaag tcggcaacac
+    37561 tgccgtccat gcagacaaag catcaaccac ctcggacttg tcactgatat cagctggatg
+    37621 cggaactgga gctaaaagct gggcgaagtg gctacgccca acagctaaac aaagtttgaa
+    37681 ttgactttgt taactcgact ggtgggctca agtggtagat actcgtacgt atgtagatac
+    37741 ttacgcttgt gcatgccccc cgtgctctga aaggcgtgct gtgaaatctg caaattgccg
+    37801 tgtcattatt tgttagcatc aaaccgaaac tcagccagga actaggatga tgtcaacgcc
+    37861 ccttgggcaa ccttaacccc aagaagccct cgcctcgcac tcattcacat tgtgctcggc
+    37921 gtcaaaaagt gtaattggca gtccgcccga cagtggacag tcgattcctg gggatcggat
+    37981 cggtcctgga attgtaaaat tggggtgtaa cccactgtcg gcttacacac tcaagtcaag
+    38041 cgaccaacgc catccccatc aaccgaactg aactcaactc aactcagctc agctcaaccc
+    38101 aaccgaactg aactgaactg aaccgacgag ctggaaccgg cagacagttg gtaactgtca
+    38161 gtgggcatgt tcgacttgac agcataatat tacaaatccc tgccgaaagc agacagtaac
+    38221 gatgacagga cggccggcaa cgcaacgggg caaaaggcga aaaggacaac cgcagggcgg
+    38281 cctgcaaata gagagagaga gagagagata gagggagagg aggatggaaa aatatcctgc
+    38341 cctatgagaa ggcatatttt attcctcagg taaaatgtca gactagggga aatcatttta
+    38401 ccccaaaatt gtttgcggtt agccagagaa tgggccgcag ttgccctttt cgggcggaaa
+    38461 attgagctgg ccagttcaag ggtctggact gggtactgga ttgaatcaaa agtaaagtgg
+    38521 ccatgattac attccatatg acggttcaat caatcaaaaa ttgatttccc attgcaaatt
+    38581 tcatgcaaaa atggctgtca aggattgcct ttgttatggc aatatacata agctaaaaga
+    38641 ttaacgagcg tgattttatt tattttaaat aaattaaata cgtaatatcc ataaacgttg
+    38701 cttatgttgg gggtaccatt tattttcccc ctacttttaa atatttcgaa ataaatttct
+    38761 aagaaaatca ggttttacgt tcatctatta aaaatcaaat tcataccttg gggcttccca
+    38821 acgagcagcc attcaattaa cagacaaaca gacagatgaa gatttacctc atcgccaact
+    38881 tccatttccc ccaatacgcg aacggtaatc aattaatgtt gtgtagggga aatcacataa
+    38941 atttgcgtaa cattaaacgg atcgagtcgt cggcttgaaa cggacgcatt cgatatggtc
+    39001 acatttcaca aattttacga ggagcctttc aagttgatag aaacatttac gagccactcc
+    39061 gtccatggcc gggtctccag acttggtctg gttcgcctgg tagcaattgg taaacaagtt
+    39121 tatggccagt ctgcatccaa ctgcaactcc atatccactg cacattttct ctcttgattc
+    39181 gccgacgtgg ctataaattt attgcacttc cttttgcata taattttcaa attaagtgtt
+    39241 gcttcatgta aattgataca tttactgaat gtatttttcg agctctttgc tgatagtacc
+    39301 agccaagact agttggctgt ccggctttta agattctcag cgaacgaagt tagggaccat
+    39361 aaaacgctgc ttttcaatga ttcatggggt tctacatatt ttctattgcg gagcggcccg
+    39421 gatgtcagaa aatgtaggtg caataaatta cctaaatagt gtgtgattta agtcgttgtt
+    39481 agtcaagttt aagagagctg ttcaaaataa tattgctgac acgcagaata atggttatcg
+    39541 ttgtatttaa ttgcgaaaat tgtttgcatt aaaatgcgat tttaattaaa ctatgaaaaa
+    39601 ctatgttctt attttgaacc acgtcgtata aacacactga agcaaactag tgtgcttagt
+    39661 aacaagataa tataatactt agagcaacgt ccttcaatgg tttggctaac tctgctcatc
+    39721 tcaccttctc cacttttagt tcttgacggt ttcttcagcc atttcttggc cacttgagcg
+    39781 ctcctcgggc aacttcgggg ccttgcgttc ctttcgttca gctgacagtc acttcctgtc
+    39841 agcgtggagc agctcgatcc cctggaaccc cctctaagcc ccgcctgtga cggctcatta
+    39901 gcatgtgtgc gtggctggtc ctgctactct gcccgtcact cctttggcct gccattaatt
+    39961 tcgcaaactc gccctgctcg ttcgtctctt gtgtctcctt ggggcgttta caacttagcc
+    40021 aacttgcagt gtaacgcttt gtattgttca cgcaaaattt atttccgcta cgtcatgaac
+    40081 atcctgctgt cgtcgttgca gttgtagttg tattgtagtt gtcctcatca acatccttgt
+    40141 tgtatttgtt gttaatttac gtaataaata cgcttgaaaa tgttgcacgc acatgcaaaa
+    40201 tgaattacgt tgctggggag cagcaacggc agcagcagca acatcaacaa cagcaacaac
+    40261 agcagcaaca tcaacttaac tgtcacttgt cacagggacg tttcaggggc ctccctctgg
+    40321 atgtcctggc ggtgctgtcg tctctcacat gtcaacgggc gtcgctttgc ttggcatttt
+    40381 tgtgatgtga aatgaagcaa cgaggtgaca gcagcagcag caacagcagc ggcagcagca
+    40441 acagcaacag cagcagctaa aggggagttg ctgctccttg cagcggcagc ataaaatgaa
+    40501 ttactcgcgt gaacaacaaa acctgtcaga aggctgtcag ctgtggaggc gacacgtggg
+    40561 tggggcccca tgtcgcattt gcaatcgccc agtctcgctg atgttgctgg tgtgtgctag
+    40621 gtgtcagcgc atcgcatttg aaatcctcgc acactgaaaa acagttgaca gtcagcagtc
+    40681 agggcagcga caaagtattg ctcccgacat tcaacgccag cggcagcagc aacagcagca
+    40741 gcagcagcaa catgttgcca tcgctcgcat ccatggccat gcatatgaaa tgtgagtgtg
+    40801 cgaaatcttt tgtgggtccg tggcccatga ttatcttacc tcaaattgct tgtagcaatc
+    40861 atggaaaaag caacgttgcg gttgtcaagc tctttgggtt ctccctcgtt tcattccgtt
+    40921 ttcttcttct tttttttttt ttttttggcc acagcaattt tcattttcgc actccattat
+    40981 aaagtaattt ttttgcatta cttatgcctg tcaacgtctg gcgcttttcg ccgctttctt
+    41041 cttcctgttt ccgaggcaac gccccacgaa cgtgtgggta tccttctcgt tggcaaagat
+    41101 taatatgttg ccgctgtgcc gcatcctgta ctcactcgct cagtttgcca gcttgccgct
+    41161 gtttgaacgc tcgcatgtgt gacagtttgc aaatgtgatt gacaactgac actggcatat
+    41221 aaattcaaag ctttcgcaca cactcaaccg ttgacagtgt acacggagag gacagtatcc
+    41281 gcagcaacag caactgcctc ctgccttgaa ggcatgacaa actgactgac tgaatgacgg
+    41341 acgtgccgga ggtctgaacg gtcgcactga ctcactagct gactgactga ctgatggccc
+    41401 gactgtctgc aatgcagttg ggtgttgtct caactcaact cagttagaga actgggcaag
+    41461 tagtcgcaat tcatcagcga aagatcttgg gaaagattta gggttgtctt ttgcactcag
+    41521 cttaagcaaa taattaatta cagtccattt tgaaattggg ttttataagt tgtaagaaat
+    41581 aactagaact aagttatatt aggtttcttg ttggtttttg agtttgttgg attgttgttt
+    41641 tattttttgt cattgaagac tgcaagatca tgattaaaag tactcaaaca aatacttttc
+    41701 cattaatgca agatatacat ttcccccaat tcgaaagagc tcaagccatt tgatcgttcc
+    41761 gcagtaagta cgatcattat ggtggaccga caactctgcg aagtcatttc cccaaagtac
+    41821 aggtctccat ctatattttg ttagttgctt cccgtttctt tcaattaccc agccacggcc
+    41881 tatagaaact aataccaatt atcatgacaa cttccgtttc tcagctcaaa ttactgcggc
+    41941 ctaattgcta tttagggctc ctttcggaac gtcgagccgc gtaatgtcca attattcaaa
+    42001 caagagacgg agcccataac cgctgacaca accgtccggc ccgggtttga taatagactc
+    42061 atcagacatc ggtcgccatc atcgtcatca tcgccacgca gcaggtagtt ccttcaaaag
+    42121 gagggaaaac tcatttggga gagtacacag gaagaaatga aaatgattca aataccggac
+    42181 ctccacaaaa cttccgaact taagcttatg atgttagccc attggttgtg taatgtgata
+    42241 aggttcaact ttatggacta tatcatacta aacctagtgt attaaagcgt tagatcgact
+    42301 gttaatcgga tgctttcagg tctatctatc tcatgtattt ctctctgtgt attagtgatg
+    42361 agacgcaaac ctgtcgctcg tgctgacgac aaatgtcttc agcggaagtg acacaacaaa
+    42421 aatccctggc aaccgaaagg gaacaaaaaa cgcttgatta taaatgcccg atgccgcgaa
+    42481 gaaaataaaa ctaataaatt gatgttgatc tggcgatgct ggctaccgtc tcaaagcaaa
+    42541 acaaacgcgc catgacaatg acaaaatcaa gcggcagggc tgacaatgat ggaagtgaca
+    42601 acgacaatgg cgctgaatga caggaggatg tggatgtgat ggcactgcag aagcagaagg
+    42661 agtggggcgt cggcagaggg ttttctggag gggatccgag gggtcgacca gtggcagctg
+    42721 ttcgctaact cgacactctt ggcgtgactt gtcaacgggc atcattcggc tggcgtcgag
+    42781 gttggggttg ccgttgatgc tggagcaaga agatcatcat tcatgacgat gatgatgcgg
+    42841 tctacgcctt ttgtttatca aatttcaatg agattggacg actgatggag gtagcacatc
+    42901 actttgaaac gcatcgtctt tgtgctcagc tgggatcgat ttcactgaaa ctagcatcgg
+    42961 tcttgggggg aatatatcaa attttaaact cgtactacaa gtaatgattc taatacaaac
+    43021 tttatctaat tttagaaatt ataatattac caaacttggg tctttacttc gttgtcacca
+    43081 tttctcgtga tctccatgct tgcggtcagt cttgtataga ttttgccatc ctggccacag
+    43141 gaaaagacaa gcaattctcc gtttatccgt cagtcagtcg agtcatcgat ccatcagcca
+    43201 ttcaatcatt cactcatttc gactgttgtc actcactcgc actcgagtgg aaggaaaaga
+    43261 agggagccgg ggcagacgag gcagacgggg caggagactt caggcaacca cgttgcacga
+    43321 ttccgtggtg cgcttcaatg aaaacaaact gatatttgcc tgacacttga gattgattgt
+    43381 catccatcaa gggagctgct gctgtcgtct ggctgagtga ttgaatggca aggatgcgct
+    43441 gtttgcattt cgaatttaca tgccagcccc gttctcgttc tcgtttccat ctggactatc
+    43501 tacctaccaa ccgctcccca ggctggttgc cctgcccctc agtatttcgg cgttggcctg
+    43561 aattgatatt caattacgtt ttattttgcc aagccaaaag gcggcaagga gtagtaggtc
+    43621 cgagtagtag gtcggcttgt ggctttcaca ctcggtccac aagttgtttt gaccaacgtc
+    43681 tccattacgt gacaacctgg atacaagtga tggtccttct tggcaccgtg gatcttgttt
+    43741 gcattggcat gttggttggc cggctggaag aaccgcagaa aagcagaaaa tctgccaatt
+    43801 cagattggcc ttctcaagag tcatgtggtc aagcagcagc tgttaagttg ccatgtgata
+    43861 tccaatttac acgaaagatg cttggaatca gtgaaaggca taattgtgcg actctttttt
+    43921 atggcgagca aagcaatatg gcctcgaaaa accgtacacg catttgtaat aaaagtcgcg
+    43981 cacatttttt caaattaaat ttcgaagccg tttcaattgc aattgaagct caatttgttt
+    44041 taattaaaaa attgtttatg tcggttattc attgaaaaca ttgttttaac atgtaatatc
+    44101 aatttgctgc aatttatttc caatcaataa agatctgaaa tttcaattat ttatcaaggc
+    44161 atttgagcgg cgctcgcata agcaatctca acatactgta attcaacctt ttcgtatact
+    44221 cgaatgacat tttattatgt ttaatagtat atatagtcta ctggatttaa ttctcatgtt
+    44281 atataaattt agtgttttct tttatcaaac aagaactccg aactctcagc tccatattca
+    44341 actgggagac attcaaagac aaaagacaaa cacacatttt tcatttggtt tgactttcct
+    44401 cctcgaggca ccaaccattt tcaaatcgtc caagtagcgc ataaagaaat tgaattcatt
+    44461 aaacattagc ttcctgacac gatcgtcgcc aagcctccgc cggcagagac cttaaaacat
+    44521 ttagtgtcgc aatatgcaaa tccataaatt ttctgcccca gacaggcggg gagtgtgagg
+    44581 ggatccggtg gaggcgatgg agcctcagtc ctcggttgtc ggagacaaga catacacacg
+    44641 cgataaagaa attcgattag gcatcaattt taaatgagac gcattcccct gcctccttgc
+    44701 cacgacaggc cgcctgctgt ggatgccacg taacctgata tggtgacact cgtccatgtc
+    44761 gttcacgtcg tcgtcgcggt tgcaatctaa tttatgactt aattatcagg ccatatttaa
+    44821 tgtgtccaat aaactagtct acaaaattat accaacacgt cgttgtttag gctttgccgc
+    44881 gatctgcggt gcactgagaa cgctaatcgg atgggaatat ttaattattt ctaagaatac
+    44941 ttaaggaatg atatttagga ataaagattt cagataaaat ggaattagtt taaattaata
+    45001 atttaatatt aatgtgtttg taaatacatt ttaataggaa atctgttcag ctgaaaaacc
+    45061 ttctatgtgg tatttaagat ttaaatgagc cagtggtgta aagattaaac ttgtttcaag
+    45121 gttgccactt agatgttcta tggcaatgat tgcaatttct ttcagtgcat tggtcttggc
+    45181 tagcctttgc ttaacgcgta atcggagcgc ggggaacttg aatttcagca actcaactcg
+    45241 ccaactgtcc gtggtgtggc ctcctttaat tctgcggcca tcaatttgct tttaaatgca
+    45301 acaatttggt tgccattgcg gcggcaccac cgacagagtc tgcaccacgt catcgtcacc
+    45361 gcagcatgca gcattattat tttttttttt aatacgtatt ttgtttttat tttggcagtg
+    45421 gcgacggaac ggtcgatcgg gcttggcttg gttggcttgc tggttcagca ataaaaattt
+    45481 cgtgaagtgg caataaaatt ccatttaaaa tggcagcgat tatgtctcaa cagaccatta
+    45541 aagtcactca aatgacaact actacttagg cactcacagc caacacaccg caccactttc
+    45601 agcatcgcct tggacgcaca gcagcattta ttttgtattt attcttcaat tttttcttta
+    45661 tttttttact ccttctgtgt tgatcgtggc tcgcagatcg cgacatttga tgacgggcgc
+    45721 cggacttgga agtcaaccgc attcgcacag tggggcacat tttattacct tgggtggtgc
+    45781 cgatcccacc agtgatccac ttgcgtgggt gcctccatta gtattgtgga ttgatcggat
+    45841 atgaacttgg aaagtttggc ttgccatata acgaatcact gtaagtaata caactatcga
+    45901 aagctacatt tcagtcatgt tgtttacaat agttttagat atatattttc tttatttgca
+    45961 tacatggccc tgatttcatg ctcttaacat tcaattactt tgggacccca gtggcctcat
+    46021 caggacttaa aaactccaac caaattgcat ttttccccca gtttttcttg tgtgctcagc
+    46081 tgaaaagtgg acgaaactac cccaatgcga atttcctgct gactttgcat tttattattt
+    46141 gacggtagag tctggttgcc accactgccg cgttttttcc ctcagtatga tatgctcatt
+    46201 ttagcaaacg actttggctg cttatggtcc gacttcctgg ccattttggc actatgcact
+    46261 ctccgaggtt atgacgtctt cgtcggcacc atcatcgttt agataagtga attttttcct
+    46321 acgcaggggg ttgtataatt tctgttgata ggcttaggtt ctataatata cttagcagag
+    46381 ttttcatgac ccgtgctatg taaactccat ggttgcaaag gatgtgcttg cgaaacccaa
+    46441 tacaaaaagg atgtttgccc gtacatctga aatttaatgc caaaatttga ctttcaagtg
+    46501 gttcgaaaca aaaaagagcg agtattgtca tcctataggg gtccccattc tcccaactcg
+    46561 actccagttg atcctcatta gtggtcacat gtcagcttga cgcgcatttg ctgtataatt
+    46621 ttgtcattca tcatttttcg cccatttcat tttgcgctac atgccgaagg actcgtattt
+    46681 cacgctcatc cctctgctcc tctggccgtt tcccgcagag tggtgtcatc ttttccagcg
+    46741 tcgcatgaca tgtgacaaca atgtggaacc gttgtcattg tcgctggtag ctgttttgct
+    46801 gtgacacctc ccagcattcc cctgcagaag ttttcggggc cagtggatct ctgcccagct
+    46861 aggtgtgatt ttcaacaagt acgttcattt ttctgcttca gttttcccat tttcctacgc
+    46921 atccctgcca ccgtgagccc tgtgatatat tacccaaccg aaaacacgac gctcgagtga
+    46981 gtgatgagtg actgaatggg ccacactaat gagggaaccg agtgctggtc agctactgtc
+    47041 agtcggcatg aaaatgaatg acattcgctt gggtgtgtgg tttgattagc cagcaaaact
+    47101 gaacgacgac tagaatatat ttggtatgtc ttctgtggaa aatagttgcc taattggcgc
+    47161 ttttgatatt ttcattataa tacaccaggt aaaataatat gtttattatt ataaagatat
+    47221 agattttata aggatgcttt actatctata cgggtatttt aaacgttatc ttctagtaaa
+    47281 taagaaaaaa aaactgttta tccaattatg ttcggaattt cagctaaaga atttacgaag
+    47341 catcggtggt tcagtggtag aatgctcgcc tgccacgcgg gcggcccggg ttcgattccc
+    47401 ggccgatgca taatactttt ttaaactttt tttttgtatt tctaatgaac ttgtttaaat
+    47461 tgtatttgta gcatacttat tatagataag atataatcat atcaaaaata tgataacaaa
+    47521 ttataatatt caatatcata attaaaatat taaattgcat cggccgggaa tcgaacccgg
+    47581 gccgcccgcg tggcaggcga gcattctacc actgaaccac cgatgctgtc gtgaagatgc
+    47641 atattttccg acaaaaaatt tttcttttag cgaaaaactt gtctaggatc tccgcaaaaa
+    47701 ccatttgttg tgactgtaaa accggatttc gaatgcaacc aaccaacttc aaagacttgg
+    47761 gtgagcaggt ttgaagttta tgtttgtaaa gcaatttcgg atgatggtaa atcaaaaata
+    47821 aacagaaaaa tgtcataatt aaaataacgc aggtattaaa atacttgttt gcatcggccg
+    47881 ggaatcgaac ccgggccgcc cgcgtggcag gcgagcattc taccactgaa ccaccgatgc
+    47941 ttcttaattt gatccgccaa aatatacaaa attaagctta tgtcggacgg attgaatggt
+    48001 gtttaaaaat agacaagaga ttatgaatga attcataaac taaaactcta agtttctaaa
+    48061 tttttttata tttgttatat agtttttgat atggtctaat cgaattcaaa acaacgtcaa
+    48121 ggggtacata aatataacat ttcagtttcc agaattccca ggttatccaa acgtgaaaga
+    48181 cgagtaccac tgacagccgt ctattaactt ctgtgaactc aaacatctaa gagcctatta
+    48241 gcaaccagag ctcaactact catgaacgca aacaaacaaa gctctcagga ctcgcaggca
+    48301 tcaggagacc ccataattaa ggtaacctga gctaattttc cgagcacacc acaccgcgac
+    48361 agacaacaat tgtcgccaga ttttgaaccg ttcctgccaa acaacccttc caggcaggat
+    48421 gtccatcatc tgacaagccc agcccaaccc tttgtgttcc cggccccgtt gacaagcgct
+    48481 gaaaaatatc aaatttcatg caatttcaat gtgaagtggt ggcaggagtt gcgaagagca
+    48541 aaaccaaaag aaagccaacc aacgctagca aagaaactca agctattgga cttgtaaata
+    48601 ccacttaaat aataaattta gttgttttat aaatgtaatg atattaagtt ttgatccact
+    48661 gaaagttttt ataccataaa ctctgcaaga aaattcgcct aattaaaaaa taagaagtgt
+    48721 ataattcctg cattttcaat ttttttttta agtgatgttc aagtgtcata tactcctttg
+    48781 cccactgatt acagggtacg ataaaaaaga ctcggcgcgg gaattgaggg aaatatgacg
+    48841 acggtttttg agtgacagca gcgcggcaac acctaattca atttaacaat tcatttatca
+    48901 taatcaggca tcaagccgtt gtcactcaca attacgatca agccgtagtg aaggccgagc
+    48961 agaaaaggcc aacaaaagga cgccccgatt cctcccctca atcttctcgt atcgcctgat
+    49021 tgccccgcgc tactcgtctt actcttcctg ccagcctgcc gccaggatct ccttcctctg
+    49081 ccccattgtc ctccgactcc tcggtcttcc tgtctcagtc tcagtcgctt cctgccagcc
+    49141 tgcctgggac tcgctatctg tcacttggct gccagcggtg caacgcagca cgaagtccaa
+    49201 ccagtcggcg gcgaaacagc caacaactca ttgtcagtta aatgttgaga gctactcaca
+    49261 ttttaaatga gacaacagcg aaacagctgc gaaacgcaga agactgtcag atctcggggg
+    49321 aatatcgaga aaattcatag cagtcaagac cttggcggtt accaaaaagt taagcgaatc
+    49381 aactccccca cgatttcttt caaagtaaag cgatcagttt taaactaatt tctaacccat
+    49441 ttcttagttt cctacacaat ataattaatt catagaaatt caaaaaatgg gttataggtt
+    49501 aatcatcatt ttatgttttt agaaaggact atattttacg aaatcatttt attcttaggc
+    49561 ctaccagctc tgtcatatgt acatatattc tgttccatat tcactaaaaa ctacaagcag
+    49621 gagatacatt ctgcagcttc tagttacgtg atactctaga ctatcgagct tccagtaaca
+    49681 tttcagataa gacttaatta aaatgaaatt tcaaaaggtg gtcacaattt ggattgctgg
+    49741 taaatattcg tcgaaaatat cctcgttgct ttagtgtacg tacacatgtg catattacat
+    49801 attccgtgtt gcatttctct ttgctcagct gccacttcca ctggccatta atttaacggc
+    49861 caacagcagt gacagaacag agggaacacc catgctgccc cttgtgttac gttgctgcgt
+    49921 tacgttacgt tagtgtgccc cctccgcacg ctgtgtggca ctatctgttt ggctcgtagt
+    49981 catacaacac acccaccccc ctcttcgagc ccttttcgag ctgtcagcca acaatgcttt
+    50041 cgctgtcaga aacagcagag ttgctgctgt tgctgttgct gctgtgcaat ttgttatacg
+    50101 ttgcactgtg gatttttata acaatttcta ccatattttt ctagaccaga gtttggggcg
+    50161 agggtgtatg gggcatataa ggatggtcac atgtcaacaa caacgatgca ttctgacagc
+    50221 ccaatgacat tttctacatt ttcgtaaatt gcttcgattt gtttgggggc ccttggcgtt
+    50281 gttggttgtg ttatatgacc gaaggcaaaa tgggtcccca ctgcgttgtg caatggcaac
+    50341 atgggaagga tgtgacatcc acgtgaacgt atttatttat atagattttt tatcggctgc
+    50401 acatttttgt aaactcggta aagagcttgg tgaaactgaa aggagctttt cgatttcagc
+    50461 tccatcgctc taaccgaaac tgcgactgca tttcatactc aatcattttt aggtttctca
+    50521 catttttgct gactctttca tcacaattga caccaaacac tgccagcccc tttatgtttt
+    50581 tattataccc ataaattttc attatagtag atatgatgta cattagcttg ttcagtttcc
+    50641 tttcaagtta atttgatttg atggcttttt cgaatgtgta tttttataaa tatcagctgg
+    50701 cgtttataat gatctacacg cgcccaccat tgcaagcatt tgttttggct ataaaatatg
+    50761 agccaacacg gcacttaatt ctcaacgaag cattctgtgc cacagcccca tgcactgaaa
+    50821 ataaggccgt aaaagtatgt tgaatgcact ctaaaaatac agtttgaaca tcaaatagtt
+    50881 ccttagaaag aaataaatat cacacctatg ttaagttggt tcaatgtgta ttgttttttg
+    50941 aaactacgct ggattgctag taatttcttc tcagtgcata tatgcaacaa tgtagactgc
+    51001 aattgtgttt accacttggt tttgaggcgg tgagtttgtg ccaaaatgaa tggaccataa
+    51061 agcaaagaca caataaacgc aactcgcgcg attagcaaaa taactctagg atgaccgagt
+    51121 acagtcagta tattgaatac aataccttgc gaaaagctct atttaataat aatatttatg
+    51181 catggccagg ccgtcagact aattgactca taaattagcc aaaggacaga cccctcgtcc
+    51241 accaccattt tctcttttat aaaatcttga caaattcacc cgcatgtctg tgacacttgg
+    51301 acatcaagca cggggtctga gaaatcatga tgggaactgc gacagacgga cagtgacaca
+    51361 aagtgcattt gacaaatgac atttcacaga ctgccagaca cacaatcctg gggtggatat
+    51421 tcaagggaag gcccacattc tggggcccca tcaaaaccaa agtgttttta agtgaattag
+    51481 tttctctgat tttttttttt ttttttgcct taagtactct tcagttgggt atcgtaatta
+    51541 ccaagaagtg gacctttaaa taattaaatt tatatctcaa aaattgtaag tagaaaaatt
+    51601 gtaagtaaaa atatatcccg ttaaaagtta taagctttaa caaaatacat tttcaaagtc
+    51661 attaaatatt cgctgtgcgt tttcagatgg cttattagtg cttagtactt cgcgactgtg
+    51721 tggatctcac ccttaacaaa atttatattt atgtcttatt tttaggaagt attcagtttt
+    51781 tcgtcattct tttataagct tttcgtcgtt cagcgtttcg cgttgccatc accgtcttct
+    51841 tcgcccaaaa gaagatctca atttccactt caatttgtct gctgtcactg gtaattgagt
+    51901 tcaatttaca ataatgtgct ctgacgtttg tgcggctgag cgccagctgg gaggctggga
+    51961 catccgagca ggcggagtgg caagtgtacg aatgtcttgc gattgcaatt tgtacccgct
+    52021 gactctggac tcccgactcc cgactccggt ttccttactc tgatcccaaa cgaagccccc
+    52081 gaccgctggg tttctggagt cagcacagca tctctttggg ggtcagtggt ggtcagggag
+    52141 ttcaacagtt tccaattgag ggggcgtgac tgccaaaggc gaatggaatg gaaattgtaa
+    52201 ttgatgcccc ggatattgat ggcacttact ggatttgaag tggtttgcat ttttagagcc
+    52261 attgtgtttc ctatggtggg atattcttca tctatgtctg gacatcaact tacagtttcc
+    52321 ttacgccata aataaatcga aaagtgaatc cattatgatc tttatttcgt tgaacaaacg
+    52381 taaatttgtg aaatcacttt atatcttata tttttataca tttatcatat agcctaatag
+    52441 ccttccattt gctttaatgt ttaacgttat gcttttatca tattcatatc atatatatgc
+    52501 ggtcaattac gggcaatcct gttgactaaa tccagcttag taaacttcaa atgtaacgtt
+    52561 tcctagacac cgaattataa accttttgtt atctgcgagt ccttgcccac tttactgagg
+    52621 tggtacaatt tttgcaaact tttcaaataa agccaaagaa cataggggtt agagaatggg
+    52681 atttgctgag ctggtttcgg gttcatacat agggcataat aaatcttttt aacgaaaaca
+    52741 ataaatcgca tttcacgcat actctggcac ggcgcaaaca ctctggcagc agaggagggc
+    52801 ggccaaaaat aataatggca agacctagta acggctggga agagagaagc caacgtgagt
+    52861 ttggcccaga gcagtcaagg taaacccttt gcccgaagaa aattgtagaa tttatgcatt
+    52921 gggtggagga attctcgccg aaagacaacg aaaacccatg ggccaagtaa tttctcagcg
+    52981 taaagcagtc aacgagtttc ctctttaacg aaagaaaaaa acacttagtg caactctcat
+    53041 acccttaaaa agggttcaat caactttttc aaagccttag gcaggacctg gcaaattgaa
+    53101 atttaatttt atgattatcc tgaacagagc tttattttta atttaacttg acttaaaagt
+    53161 tatttgaatg atttctctga ataattccat tttatttcac gattttattt aagtaaataa
+    53221 ttcgagctct aaaaattcga tacaaatttg tatctatcta gaggtatctc ttgttacgat
+    53281 acgaggtatt cccaaatgcg tgtatgcctg tcacttaaga ctttatacct atccccccac
+    53341 atgtgtgtgg cagttagcca gtgtgagtgt gattacacct acttgaagtc atatatcttt
+    53401 cactttgcgc tttcaacagt ttgtgacgtc ctctcccgtc tacacattta tcctccactc
+    53461 cactcgaggg ggcggttaac aaaagctctt tgatataccg ccaccaactc gacgcgactc
+    53521 gactttcgag acatgctacc aacacttcaa ccctatatat atatatacca aacccatagc
+    53581 catagccata cccatactat agcctcgcct catttgctgc tgcctctgcc cgaaaagctc
+    53641 agaagtgggc atgtcagcga aaatcaccgt aaattttccg cattgtttaa aatttattac
+    53701 actttcagat ttgccagcga actgaaccgc accgcgaacg gggggcaaac gatgcctgaa
+    53761 tgttgggggt gttggtgcgg ttgatgagga agtcgtttgg ggcacgccct ttgtttgccg
+    53821 gggtttctcc ggggccgcct gctatctctg acatgttgct gtctaataag tgtaaccagt
+    53881 aattttctta tcacgtaaac tgactcatgt agctgccacc ttttggcctc atggagccag
+    53941 aacgctagtt tgaatatttt aaaatctcga aactgatcgg taatagcgca tttatgggaa
+    54001 ttttaaaaac aaatcgttgg atatgaaagc attaactgta attataaatc ttattagata
+    54061 gatttatatt taaaaacatt aattactaaa tattatgttc atagagatgt actctgttat
+    54121 ggtttgataa ctattagttt tacgggtttt gtgtccaact cactttaata taacatggac
+    54181 ctcgatcatt tggcactgat gatagtagtg atcctttatg gcattaacga cgtaagtata
+    54241 gttatataaa agtaaaagat actattaatt tttttttagg tcaccccgaa ggttgagttc
+    54301 accaacatca aatgttcgtc ttctgacact tcgtacgttt actacgagag ttgccgtatt
+    54361 aaatcggtga ataggacgta taagtatatt tctgtgaatt ccagactgtt aatattgccc
+    54421 cttacaaatg ccacagttcg tataaaagtg atttttattc ttgtataatt aattaaaagt
+    54481 atatcttcta gataaacgtt gccctataca agcgatataa tggctataag ccatttctct
+    54541 acaacgtttc tgtggatgcc tgtagatttt tgagaactca aaaatcgaac atagtcgtta
+    54601 aatatttatt cgatttaatt ctcttaaaat cgaatataag aagtccgacc tgcccattca
+    54661 atgtaagtta accaaaatga gtacttatac gcctatagca aaaatacttt tttagcagtc
+    54721 ctttatttct gtggataagc tgactaccaa ctttctgaat aataagctta cgcaagttct
+    54781 gccagttccc gaaggcgact atctttttgc atttagatgg ttctcttata atatttatcg
+    54841 ttcctctgta aacgtatata taacgatttc ctaaatgtta gttttgaaaa cttggacgcc
+    54901 taggtaaatg gatatataca taatgtacat tcttaagatg acacctaatc aatttttatt
+    54961 tcagttgata atctgttcaa aagggattct tcatcatttt actaaaccaa acataattat
+    55021 tagatgtata acgcttatgt atgtaaatta tatgttgagt acgttagtaa taaatgttgc
+    55081 tatgtgtttt ttttataaca tcctggacaa agtaataatt tcgaatcaat tgacaatttc
+    55141 atgaacagaa tttgacaatt ttaagcacta aattatcata tcggaaacat aaatattaaa
+    55201 ctaaatacta atgctccaat gttatcccat aagcagacaa aatataatgt gtgcataatt
+    55261 ttcggctaat ccatatttca tgtccgacaa ctgcattggc ttttggaaaa tctctacgtt
+    55321 atcgaaaaca tcctgttttt gcggaaagcc cgtttatgaa atatatacat atatagtgcc
+    55381 cctcgctatg agtatctgat agatatgggt aaacaaaaaa aggttaagcc agaaccaaac
+    55441 tcgaaattcg aatctaagct cggactgcat ccgcttgtaa atcatattaa aatatttatg
+    55501 ctgccgcccg atatcccctg ctgttgcttt attttttgtc gtttggcttt cacattttgt
+    55561 ggggccaacg ataaattgtg aacaaatgac attatgcaca tgtgctgcgg tatctgtgtg
+    55621 tgtgcggttt atttagtgca tttagataaa gcgagcgaaa atgtcaactc tagcgactat
+    55681 ttttcgtgtt cgtaagtttg ttgggactga cactgccact gccacacagc tactgcaact
+    55741 ggctgcaatt atcgtgcaac aaatcatggc aacagtgggg attcaattca gttggggatc
+    55801 ggttcggttg gactcgcttc ggttgggtta tttatgagat tttaggatgc cagctctctg
+    55861 tgacctccgc cggatgttta gtcgcagtaa cttttgtttt gttgttggcc accggaggtt
+    55921 gggacttata atatttatta aaattatgca cacggtttgg catttaatat atgtaatgac
+    55981 ggttaaagcg gtagctggtt ccattgaatg gatgctcaaa acaggtgtaa ttattcctaa
+    56041 aagcggtgac tgaataattg atatcaatag cgatggataa atgaaaaggt atttactcta
+    56101 ctagcagatt tttggacatg attagcatat ttaattttaa tccagaaacg tgtttcaaaa
+    56161 cgtaatcgca ttccaattga agcatttaat attacgtacg gaaatgtaat aatataatat
+    56221 aatatataat ataatataat tgctatttat cgcagtaatt gtagtaaata taatagctaa
+    56281 gcctataaat ctgttcatag ggcttattaa ccaaaaattg attacttatt ttattaaata
+    56341 tataatatcg gattggatgt tttgacttac atataagagc agttcgaaaa acttattcat
+    56401 atattaatat tctagataat aactctttat agtcagtaca taaacgtgtt agttttcata
+    56461 tagaaaacac attttcgaat ttttcttccg tcgcaaccta tcaaacattg accgagtcca
+    56521 gacgtggcaa gaccagaccc aatgtcacaa caatctccaa tctccaatcg attcccagca
+    56581 caattgctgt tgattaagat gctgatggtg ccccctggcc acatcgctct ccatttgttg
+    56641 gctgttgtgt aaatgtttgt gttattgatt aattacattt acaacaatca aagttcattg
+    56701 agtgatacca atcaaaacaa cgccagcaaa caacatggga tgcagtgcga tgcaatgaat
+    56761 cttcgagtgg aagttggagt ggtggggcca aatggacgcg gagcgatcgg aatggctaac
+    56821 tgaatgactg agtgactgga tgactgagtg actgaatgac tgactgagtg agtgactgaa
+    56881 cgagcaaaga acatgtctaa tgtgcttgtg gtgttgcccc ttgctggctg attgctgcat
+    56941 gttcggcttg tcgctgcttg tatggcattt gtctgttttg ggctccgcgt tgctttgatc
+    57001 ttatttcttt ctcctttttt tttttttttg atcttcactc gctttgattg atcaattaaa
+    57061 ttcatttgta aacaacttct tgccgacgat gatgcggcat aaagagtaca gatgtattcc
+    57121 ccaacagagg aagccggcgt cctcaaagag cggagaagat ttatttggtt ttctatattt
+    57181 acgcgggagt cggtcgatct gcggtccggt taaagccaca acaaacacat cccgctgata
+    57241 ataattacca gatgattggg cctatctgga taaattggtt ttgaggttgc aactagaaac
+    57301 aagaatttgt aaatttgatt gatgcttatt ccatttttat ggtagaaaaa gccttaatct
+    57361 tatatacaga tgagtgttga tttattttta ttgaattata ttttgagtat tttcggtttt
+    57421 gaaatgagca ttttattttt aagttttccc acatttgcac agtaatttca gccatcgtgt
+    57481 tggccatgta ctgcaagttt atcgtacatt tttatagctg tcagtgcgtt ggctgtcaag
+    57541 tgtcaaaaac atttgcagcg tgcaacgcgc caaatgcaaa atgcaaatgc acttacgaac
+    57601 gtctctcgcc aaatgcctga caatgacaag ccaagagaag atatacatat ataaaaagtg
+    57661 tacgtacttg ctcgacggat ttgcattgtt cgccagatat gtattacaca agtcacaggc
+    57721 agtcagcata acagcagttg ggtttacaat agttcctcgt gttcgagcag gggcacacac
+    57781 atgactaacc caccaactga ctgactaact cgccgactga ctgactgcac gtacatacat
+    57841 atagcaatga tgataatgac ggtgccatgg ggttgatgat gctctgacat tgcctgacgc
+    57901 cttgattaaa ctgtcattta acttcattta ttttgacaca aaactgacaa gagaccccca
+    57961 ttctaagtgg cagcttattc aattttgcac acccgtcagc gaacaaagag tattcaataa
+    58021 aataaaatta aatactattt attatgtagt tcgtagacgt attccatatg caatcctcga
+    58081 tgcggtcatc acactgctga gagattgcat attcagatgg gttacccgat cccaaagtaa
+    58141 caaatcaatt ccactcatcc gcaagtcatc atcgagagga ggcaactcat ttatgctaaa
+    58201 taacaagtga ttaaagttcg ttttgcgccg atataaagct gtcactaaac ataaattaca
+    58261 atatgtcagt ctcgagaacc tttgactttt attaatgtaa acatgaaaag caacacgcac
+    58321 gccgaacctc cgaacgagcg cacaacagct acaacatgct gacactgaaa gccgagataa
+    58381 gttacccagg cacacagact gggtcttggg acctcgtctc ataccccgac catatagccc
+    58441 agacccagac ctaaaaaccc caactcccaa gccgggctcc acttggaacc aacgctgaca
+    58501 ggcaaaagtt ttgtccaagc gacgagctgt gtgtttgtta ttaaagaacc cgatgtgcag
+    58561 cagtgcaaag aagatgcaac acttgaggct gcctaggatg ggattatggt atatggtata
+    58621 aaatacaggc cccaattctg ttgttggctt ggttaatggt aaacttacat attatattaa
+    58681 tttatcctac aaaaatgact cgcttttcgg aaacttcctt ctctactcct gtcccaacat
+    58741 tatttttcat atcagtatgt ttcactgtac tataacagca ccaccccact agttttgcat
+    58801 tcatctggcc atacctccgt ataacgtcct gggcttttgt gtgtttgcgc tcacttagcg
+    58861 atcatatcat tttgacatgc tcgtgtgttt aagctgtcaa attgcctgct gtctacggct
+    58921 gctgtctgcg atagcttgtg gctccctctc tgtcctcatc tccatctcgc tctggcggag
+    58981 tcttaactat cttttcgctt tacatggact agtcgctcac tcctcggggc cccggttgtc
+    59041 tatttgtatc tggacaggca cttcggtcgg cctcatgcct ccttaagttg acgtgttacg
+    59101 ctgtcagcat ggagaaaacc tcaatttcaa actgtcagtt tatatgtcag ctccaactga
+    59161 cagttctcca tgttgtcgct gtctctctct ctctcgccga ttcgtttcgt ttctttcatt
+    59221 gggttacgtg gtttggcatg cgctagcttt ggcttttcca cggacttgga tacagataca
+    59281 aagccagcga cagcgacaga tacttagatg ggcatttgaa taatgcgcac acaagtctgg
+    59341 cttacaagag gggcttgctt ggggatacta tactgcttaa ggctgctcat ttggccatga
+    59401 tcctaactct attcatggtg cgcgccttct tcaggggaaa caactcaact cgactcattc
+    59461 ggactgctgt ccgcatggtg aatgggcaaa agatacggat gtgagtacga gtccgcagaa
+    59521 tccagtgatc gtttcccaac cgatgtttga ttaagtttta caaccacgat ttctagaagt
+    59581 gagacgactt tttaagtagc atgaagttgt acggccgatg ctactgtact gattaacttc
+    59641 atatgcgtat aataggtaaa cttctttcat gcatgttata ttttatacag taattgacat
+    59701 tagtcttgta aaaattttgt acttttccac taagtagtgt taaaaagtta accttgagaa
+    59761 aggggttttt cccttaaata tcaaaaagct attttgcatc ggccgggaat cgaacccggg
+    59821 ccgcccgcgt ggcaggcgag cattctacca ctgaaccacc gatgctgttg gaaaagttag
+    59881 gttttccgca taaaattttc cttttggcaa aaaagttggc aaacatcttt gaatatgaat
+    59941 atcaaattat ggttaaaacc ggcttttttt catatgaaga agttacataa gaaaaaatca
+    60001 aacttgttca aaataacaga ttaatatgca atatttgtaa gtgaatgtga ctatttttaa
+    60061 aaaattaaaa cgtagttaaa ttatggtata aaaaatgtat tttgcatcgg ccgggaatcg
+    60121 aacccaggcc gcccgcgtgg caggcgagca ttctaccact gaaccaccga tgcttcttta
+    60181 tttcttgcgt cagcattttc caataataat aactggccat ttagtttttc catcgatgcc
+    60241 gaatcgtcgt gccaataaaa aattggcaaa cataaacgcc ctccttcctc tacgatcata
+    60301 aggatcatta tcatcgttat tataattcct ttatttgata gatgttttgt taaattatat
+    60361 cctgagttaa taaaatacta aagtagaatg cctctctctt cacctgaggg acaaacccct
+    60421 tctcaggcaa ggacagttac ctggccgtgt tcttaagcga taaataaagt tgtgaacatt
+    60481 ggctcgattt gtttgtaggt aggtccgccg ttcggttcgc ttgccaaggt atggtgtggt
+    60541 atgggttggt gtttagcagt cgccatcccc atccccacgg tcttatggct gccgcagtgt
+    60601 ctccctcccc atcaccatcc accgacccca taaccatgcc cataccatcc atcctgatcc
+    60661 gagccttggc tttgtcggcc ctgtcgttcg ttgcactgcg tcggacacaa aagatttacg
+    60721 attatggcgc gactattaaa aagtttcgcc tctcgcacac ctccgccacc gctactcctc
+    60781 atctctcact caagcccaca tccaaagtca aaatattgtg taaaaaataa aaaataaaag
+    60841 gccaacaaac agagtggggg cctggtttgc agccggctct ctggatcgat taattatgtg
+    60901 cggaatttgg tttattgagt acgaaccgcc tttaaattgt ctaaaagacc cggagagctg
+    60961 tcagggcatg ttcattcgtt tcggggggcg atcgcagatc gctgatcgcc gatcggagaa
+    61021 cggagaacgg gtttcaacgg ctgactacgc ctgtaattta tgagtttaac tataattttt
+    61081 aacatttacg ctgcgcctgc gtgcgcttgc aatatgacaa ttttactgcg cgacaaggag
+    61141 ctctacgaag ggagtaaata tgtacgtatt cagtcaggag tcgaatggac tcgagttcgg
+    61201 agagatcggg gaacaggtca tatttggact ctacatggac agctggtcaa ttgagttggc
+    61261 ccgaatgccg gcgtgttggc ggggcaacca ttttgttgtt ttcgattaga ctgttaagtt
+    61321 gtcaagctga aattgatttg tagccactgg ccggcctctt ttaaattgct caaattaatg
+    61381 ttacacaagg tgttgccttc gttgaggtta caaccgctgc accaactctg gctgctcagc
+    61441 caactggatg ctgatcgtaa atcgatctgg ctaaccatgt gcccggcctg ccaaggggtg
+    61501 ttatgactgc tgttagatgc cggactaatc gcggttttgt acgagtgtcc aagccttaat
+    61561 cgatactgat tggcgtgaac tatttgcctg taaaattgta gctggtttat ttggagcttt
+    61621 ggcaacactt gtgctgtttt gaagcaagca cttccacagc ttaacttccg atttcgttat
+    61681 ttatatccgc ttcagccact gttaatcaac attcttctca cagtaaactt ttatatagtc
+    61741 ataattacat ttattagttg gtaaaatata taagtttcga ttatattcga tttattattg
+    61801 taatcctagg agatcgggat agtcctgagg cgactgcata taccgagcac tgcgatacag
+    61861 tttaagagat gaaggcttat ttacggcatt aaaaagcatt tgaccctgta tgctcagcac
+    61921 tccgttaata tagtcattcg atagcttttt gttcaggggg gccttattct ctggcccttc
+    61981 tactttcact ggagccttat ccgatttctt ggtgccgaaa ccaaggatgg aaccctccac
+    62041 aaccaaagcc aaagctaaaa ttaataccaa agtccaacgt aactgcttcg attgcatttc
+    62101 aagagtttaa tgctatgaag ttctccgaag agtctttttt tcccagtttg atatgagtga
+    62161 ttgacacata acactcggct ggcaatggat gtaatcgtac tgatcggaca cgaacaagtg
+    62221 cttaatcgct ttatagcgtg atttctggac gcgattgcaa cccactaaaa atcccaatta
+    62281 actgtcattc tcattatatg ctggttatct gataatccaa atattgcgat tgcgattgcg
+    62341 acactcaagg ggacgccatc atggtcggcc tgactaatca ttttcaaaat ccactccgat
+    62401 taggcgttat gcacacgtcc actaatccac taacgtttcc agctcactga tctgtggatt
+    62461 tcattggggg agcgaaaagg aggttggtgc gtggaaagca gtttccagtt accaaccaac
+    62521 taactaactg actgactggc tgactgacag tcagattcgg tctctatcct ccggatctgg
+    62581 cttcaaagga gtgtgcgacg cctttgacgg cagtggattt ggattgccct ggccccgccc
+    62641 cgcaccgcca cgcctagttc ttgatccggg gttccaagtg gcggggactt gtgagtgtgt
+    62701 caggcccgac tataatctga catttaatta ataataaagt tgacagtttg ttttgagtga
+    62761 ctgtgacagt tttaattgag atttgttctc caaataccct gcacggttcg cagtaaagct
+    62821 tatgcgtttc cgcccgtgca gatgacagtc attttggata gttattaaac ataaaagatt
+    62881 atttgctaca gcttatttaa agttggtgac atgggaagat tggaaactaa tgtatgttct
+    62941 gaatagagct tatcagcagc cattttattt gtagattaaa aaagttttat attcaccacg
+    63001 aaagccctaa acatctgagt agatatagaa tagttgcagc gagctgactg attttcccca
+    63061 gcgatctccc tcccactcaa tcttcatttt tagccacttg tagttggaca tcgacaaatg
+    63121 accatcgcaa tctgcctttg atcttcaagt gcctcaatct tggccccttc ccgaaccact
+    63181 aattactggg ttgtgcaggg cattcgaatg gacatgatat tcgaaacgag cggtgagagt
+    63241 cagtgagtga atgagcaccc accagcgtca acgaggtgcc actcagaaaa gtccaaacca
+    63301 cttaagtgcc ttgtgggctg agaggggggg aatttccagc caaaaaataa gaaagaaaac
+    63361 aaaaaaaaaa aaaaaaacgc ataacgaaat ggtctttcgg cttttgtgag tgtagtcagg
+    63421 tcgattgaga ggcgattagc gaacaaaaca gaagtggcat gttttgatga tgatcagccg
+    63481 ggcacgtcgg attgccagca atcaaatctg aatggtgtgg gggtttctct tggtctacgg
+    63541 gggagctccg taagctagac acgcgtaaaa acgctcccca gatgactttt caattgagtt
+    63601 gttaagctgt ctgatgacgg ttgtcaagtt ccctccgttc gctccccatt tgccgctaca
+    63661 ttcgctatat ggcaaatgct tgcgcgataa ttgagtttta tgattgacac gacaactttg
+    63721 ctttttaaat tgacataaat ttcaatatgt cgcatacaaa agcgaggaac acaactgcga
+    63781 gttaagcgaa ccgaaagcga tctatctccg cctgccccct ccaaattcac ccctgccact
+    63841 gcccaccagt tctcaatatt ctgccagggc gataaattgt gcaataaatt tcaattactg
+    63901 ttatttcgcc cggcaaagcg cccatagaat ccaaaccagc tctcgaccaa aaacccatat
+    63961 aaactattta agtgaaccga aaaaaaatgc ggaaaaaatg attgtggaaa aggaaggcga
+    64021 aacaagaaat atgcgacaca caaattgtga taaattttat gttaaagtgc atgaaattat
+    64081 atggcgacac atttgggcac tcataaattt tatttaaaca atttacaatt gtcattcgag
+    64141 aattgcttgt cgcaattgtt gttgcaatag cttttgttgt tgttctcgtt ggctttgcag
+    64201 tcagcgagcg actggacagt gactttgttg ttgggctata gtttatgttg ttgcttatgt
+    64261 gcggctttgg cctgcgaatt atttatgttt tattgtcagt tggaggttat tatattgcca
+    64321 tattgaagca cgttagcgct ataaaaataa aaagccaatg atatgcacgt tttaagtggc
+    64381 ttaaaaaaaa taatgctttt acgacggtaa gtgaaggttg ggctgacttc aatgcagacg
+    64441 ttgattggat atttgtattg ggcacttttc aatgaggtac tgaaacgatc attacttata
+    64501 atcaatttct accttattgt ttataacccc tttaaatata ctcttaatat ccttttaatt
+    64561 tcggctttca attattcata tattaataac tttagtgcat ctgcaaactg tctaagaatg
+    64621 acagctgggt aagcccgaca gcagttcatt cggttggtta aaccaaccaa aatggttggg
+    64681 gaaaattgtt tgcatcccga gtgcgcacaa gtcatttgcc ttcgcctggc actcgtgatg
+    64741 tgcgtttgat ttgccagctt aattaaccca gcgagttgct ccgaaacatt tccagtcgaa
+    64801 aagcagcggc agccgcaagt caatcaagtg agaacatcaa agcaaatgtc gcctgcactt
+    64861 tgcactgccg acaagatgaa ggcgaaccgc tgggaaagtt tcgtagaacc cgaattccgg
+    64921 aggacttcac tcaaagttct cgccctcgtt cagtccaact gacagggcca gacattttgg
+    64981 ccctgtccat ccactgcgca ccactgcgat ccacttcaga gcatcctgct gccagcagcc
+    65041 aaaatccagc ttccaccagc caacattcag cattcagcat tcagtgccag tgacaactta
+    65101 ctttggcatt tggctgtcaa catgctgtca gacatttaaa caataaatga ccaaacactt
+    65161 ttcattatgt tgctctttgc ggtttcgcct gcaaaggaca ggaaaaccga ctttgacttt
+    65221 ggctcgcaat tggaatccga gtttgaatcg gacgtcgatt caaagccccc cttttaaaac
+    65281 tgactgattc gcacaaagaa tgttggccat tcatatgaaa atacttttca aattgatgct
+    65341 gatgtgggtc aacttgtggg gcggtcagag ttcaacgttt ttactgctgt tactgtcata
+    65401 aaaacgttgc caacatgcgc caagacccaa aaaagttata cggccggccg actgaatgtg
+    65461 cttttggtcg gcgatacaaa tgcgaaccca tttcaaaaat gttgacaccc aacatagggc
+    65521 aattaaattc atttttcatt catctgttac gtgtgtgctg cgctggacac agttggcagc
+    65581 cttgttgaaa gtaattgaca tattgcgttg caaagtttga aaggtttttc tcttatggca
+    65641 aacgcacacg cttttggccg gaattctgtg gactggtatg tgtgtcgaaa ggctggccag
+    65701 aatcaggata tttctacttt ggtaaacgtt tttgccaact gtcaacatgg atcgattatg
+    65761 gtggagactg gaaaatcata tttagaatat tagtttcgtt ttgaaattct tgagctagga
+    65821 gtttttgtga atgcttaaaa tatcttcttt tgacccataa agcaaatatt atttttctgg
+    65881 tagatttgtt gtttcttgaa taatttctca acatctttta tggactcttg aactttttag
+    65941 caattatttc ccataaaata tcactcaccg cacccattaa tgcccacaat agattccaaa
+    66001 tatccagtgc cagcgattta ttgccttacc ctcacaatct tcttgtcact tccagtgtca
+    66061 gttggctatt aagtgcgttt cgaggccatt aatggactct aaacacgttg atggcattct
+    66121 ttccttcaag acagaacatt cgaacaaatt gctctgcaca cattaggaag atttcgaatg
+    66181 gctttctcct cggtccttgg acaccgctcc tccaccgctc tccacatgcc cacgcacctt
+    66241 gggcactaag tcgcctccat cgtcgggcca taaatttcgt tttatttatg agcatttaat
+    66301 ttttgtaaca ttaatttaaa cataactgcc aaaacataaa tttgcttccg tttcaactaa
+    66361 ttatgtccgc cttcatccct ccggcaccct cgcttcgctt ttcgtggcaa tcgctcaagc
+    66421 atgaaaattg ttgactgcac aaaaggcaac atcagaaaca cccccgcaca gtcgaaaacc
+    66481 gcataaattt aaatgtatgc ccccggtcgg gatcccccaa gcacccctcc acacactctt
+    66541 ttttccgttt agggactgtg cgagcgccac gttttatctt tccgttcgcg gggaaatggg
+    66601 attatgggtc cacccatatg gaggggggca aggcggtggg gggggtggag ttctttaagt
+    66661 gccgttacac atgcatcggc accgacatcg catgcatata aattttgatc tgttttgccg
+    66721 gctgcccttt tgaactgcgt ccaaattagt tgctcatcca ggcaattaaa caaaatgggc
+    66781 cttctgcagg gctggataaa atgcatgaga attccttcaa cttaacatac tagcttttgg
+    66841 aaaataaaag gagccccaat tgaaattgta ttttttaaag attttcactg ttcctagact
+    66901 actataaagt ggtctttaga tttatattga taacttatat gtcactgata ctctaattcg
+    66961 ctagatagta aacataatct acataaatta aaagaagcaa attgtttttt aattattatt
+    67021 agttttcata attttacttt aatgtttcat agaccaaaat ggtctcaact cctttcacgg
+    67081 ttctacttgg ctgcttctgg tgccctggaa gccaaaaaca aaaaccgaat aaagaaatac
+    67141 ggccagagac acggcacaaa cataataatc ataaatatat caggcagcta actttcggct
+    67201 tggcataata aaatccaatt aaaaaccaaa gaaagcaaat gtttacgacc tgacacagtc
+    67261 caagggggcg atggaccgca gaggggaacg gggtggccga aaagggctgt ctgtctgtcg
+    67321 acggatacat tgagttatgt taatgggcgg cacaaaatat gtctgcgccc ccacaaagag
+    67381 gggcccatgt cgaattggag agtgtttttt ggggtagcaa tttctatggg cagtgcagac
+    67441 aaaaatattt tccggaaggg ttgaggcaat aatatttgaa atgatattgc caagatggat
+    67501 atattttcat atacatattt gtttaaaaat actttgagtc ttggcaccct gaatttcagc
+    67561 attgatttgg taaatatcaa gttaagagca ttcacaagaa gtttcccttg cgatcttcat
+    67621 gtatcattta agacccttag cttatacact ctccccagtt ggacatagac aaaacggtag
+    67681 tcaacaagcc ctggcagcaa cagcatccat tcgcaactaa caccacataa ttcacaggaa
+    67741 atgacaaaca gctgagctcg agccatcagg gcaccctttc tcctgggggc tgctaagccc
+    67801 caaaccccca aaatccctgc cccttggcaa gtgtcagcgt aaatcttgaa actcgccgag
+    67861 cagaaagtgt tgcctttgtg gaaattgact tggccaaaac cagcagcaga ccagactcgg
+    67921 acttgaagtg ggtgtgacga ggtgggccga gacggtggtg ctttgcactt tattatttat
+    67981 gactgtcggg ttgagtacga attcatttta tttatgaagc aagtcagcca agcgaccagc
+    68041 caaacagcca aaaaagaaag acaacccaaa acgaatggct aacggacaac tgtggtcgtt
+    68101 gtggctttct cgactactga aaaacaaaaa gcaactcttt cattctacgc ccagatggga
+    68161 aatggtttcg gttcacacag caatggttta tgtggcttac aaccttaacc attaatttca
+    68221 tattgaaaat gctctgtttg agcagtaaaa tttccttcat ttcaaaattg acacattaca
+    68281 tttaacgcct aaatttaaaa ttaacataag taaatatttt ttaatcgtac actaaatagg
+    68341 ttacgattaa gtaatagctg cctttcacaa atgataaatc agattttttt tataccatcc
+    68401 caagaaatgt tctatattta agtaaggacc aaaacttttt atttccactg tacttacagt
+    68461 gaggactccc caaagaacgt aatcccatcc gcagttggct acacttgaac tgtggctcag
+    68521 cgagacaatg aggccatccc ctttcctttt agttcctccc tcaatccacg actttggcca
+    68581 cgttcgtagc aattttcgag tcggctaatt agctttgggt gtattgttta taaatttatg
+    68641 acgcatgtta aatgggttta atttatttat ggcactttga ccgcccttac aagagccaag
+    68701 tccaagacca tggccgagtc gagtgcttaa gctgtggcca attgaatttt cgtttgtgaa
+    68761 ttatgtttat gagctgttct gttcagttct gttttggttg ggttgtggtt ctggttctag
+    68821 ttgtggcgtg atgggctttc ctggttgggg gacactgctg aggcaatgtt ccacatattt
+    68881 gtgcggttaa ttagagacgc ctcatggcgg ggcgccacca gagtaagcgc ccgacaagag
+    68941 acccaattaa aacttcaagt tgtcagctat ccagctatca gcaccaccat cctcccccgg
+    69001 cgggtcaaat ggctctatta tgcggattcg aagcgaaggt gggcggagaa atcaaccccg
+    69061 attatgaaaa tgggaataca aaattctgat gactatcatt atgagtatgg tgattattca
+    69121 aaatgtacaa aactctttaa tgtgctcgag ggataagaac gacttaattg aataattttg
+    69181 gaaaatgcag ggtatcgggt atattaactt tgaagtagga ttacggattg ttatcaaatg
+    69241 ttactttcgg ataagcattt gaggtcctgc ttcattttta tttagctgaa cattggctta
+    69301 ctatatgaaa tgataatggt agggtgtaat aaatatcgaa atagtttaca ttatataggg
+    69361 atgtttctct tagttattcc ttaaagttag ggtatttttc aagccgatct ctatacggcg
+    69421 tggctactcc ttccgaagaa attcggttgt cgcacccagg cgccggggtc gaatgatcaa
+    69481 tgggcgctaa ttagtgtgtc ctctgcttcg gaacctttac acatgcttgc tcgattgccg
+    69541 catccactcc gctggacgcc tccatgctcc tgacccgggg ggttttcagt tacaagtgct
+    69601 gattacggcc atgcctttcc ctagtattat gtatgtgttt tttgtttttc gggtttcccg
+    69661 ctgcggccgc aggtcattta ttatgttgct gttgttgggc gagacttgaa tttcaatcat
+    69721 gagctgttgt tgctgccgct gctgctgccg ttgggccttc gattagaaat tgcgataatg
+    69781 tccgatggcg acaaagtttc cgaggcgccc agaaagaatg gccagttgga gaaccgactt
+    69841 tcaccgccac atcccacatt ctacacaccc aatccccgat ccccattccc atatcccaca
+    69901 tcctcgccca tcggcgcctg tctgcagttt gtcaatgcga cggcggccag agggcttatc
+    69961 tgcacttggc tgccactaaa ttatgtttta tctgcatctg tgggtcatcg tgttactccc
+    70021 atgcacaggg agtacattta gcgtatatgt acatatatct ccgacgctgt tgtggtcact
+    70081 ttgatcgatg ggccccgagc gctaatgagc agcaattaat aaagccatgg actttgttga
+    70141 ccgcaacaat attccagtat atttgtgcgg gcattcggga gaatagagac attccggggg
+    70201 ggttggtggc caacccaatg gccagtgact tgtcggtgtc cagctgatta atgccacgcc
+    70261 acgataatga ggttttgact tggactcggg ccttggctct ggtatatgtt ttaatgccca
+    70321 tttgattgat gcccaaggcg aaggcaaaac cacttgaggc actgccaaaa actaagtttg
+    70381 gcaaaagtat tttgtggaca actgagtggc aatagcttga gggttgatgg gttaattctt
+    70441 gagaactcga aaagggttgg ccaactcaag ctgagataat tctatatcac aatttaaagt
+    70501 cctcagctgt gtagatgggt gtgcatttaa catttaaatc catagaagtt ttgaataaat
+    70561 gtttaaaagt gtaaaataaa taccggctgt tattaattaa cttgtcagct agatagacaa
+    70621 aacccagcaa atagaacagc atgtttatat aaagtgtgtt gtggccagaa aaagttattt
+    70681 tccggcacct gtcagacaaa cctttcaatg ccatttccct ttcgctgtca cgcttttctt
+    70741 ttatttataa aacgcccagg gatagggcag ctgcaacgat taccacgaaa aaagcgaata
+    70801 taaatatatt taagttatgt aaatgaaatg cgctttgcaa ttgctttttt ggccgaaccc
+    70861 ctcccgcaac actgcgcaaa aatacatatt ttttggcgca gtagtttttg gagcgcgtgc
+    70921 aaaaataatt tatgggggcc atcaacaatc cttagagctg aaatgataga tggcgtgcca
+    70981 ggatgtctgc catccagcga gagtcggact aaaactgaaa ctgaaaccga aactgagtct
+    71041 gtaactgagt tggtaaccga gagtcggggc caaagtcgaa ggcaagttaa cccaaaaagc
+    71101 cgaaatacgc tgcgggcgca atatttctcg cactcactgt caccgaggga tatggaagtt
+    71161 ggggccacgg gtccttcgac tggatggtgt cgagcacttt tcgaattgaa atgtcgtttt
+    71221 ggctggcgct agaatttttc attgccaatt gctgccagca gtggcaactc ggggtgaaga
+    71281 cgggctttgc caagagctgc catatttctc gagagcccgg cacaaatgtc acatcaatct
+    71341 tgtgtccagc gttgcgtaag aagtaaagaa aatattcatt ttgttttttt tgcgccatgt
+    71401 cccgcttctt tgccacccca aatgaaggag aaacatgaaa tcaagtcttc ccgctggctg
+    71461 cttgcgaaaa tgattaatgg cctggcgtgt tgtgcgaatt ttatatgcaa gacggctagt
+    71521 gcacaaaaag gccaagaagg gccagttggc cacgtctttt gattgaatga actgtcttat
+    71581 ggcgtccgca agttgttcat aataaactat gaagagactg attgtattag gatattatat
+    71641 ttaagttcat taaaaaaaga ccgcaatgat attaaatcta gcattactta gaaggttaat
+    71701 tggttgtcca agttaactaa tgccttactt ttgaaccttt gtcttttagc aacttaagca
+    71761 tttatcgtgt aaaataaaca ttctattaaa ttgattacac tttgtgttta aaattgaaat
+    71821 ttagcgcacc ataacgcaat taaaccagca ccctcagcat aatttgccaa gtaacgttgc
+    71881 cacacaagca caagttatca agtcatcaag tgaggttgat accacccccc tttgaggcca
+    71941 ccgcatagtt gactacacca gccccctcta gaaagtctcc tcatctccta ctccagtgct
+    72001 cacctcgaac atggccacag cgtcatttgc gctgcttaaa agttgattcg aaaagtttta
+    72061 catcgcaacc aagtggggaa agtggcggaa aagtgggtga aaaggtggga aatttgcgag
+    72121 acaggaagtg agagatgggg acgtggatcc ttgaacatgc cgccgtcgac gccttcgctt
+    72181 ttcatcctgg cttctggctc ctggctcctg gctcgcacac acagacacac acttgcacgc
+    72241 aattaggcat aatcacctac acacacaatt aaacaggaac ccgccaaaac accaactcac
+    72301 acagatactc tcgcacacat gcatcctcgg ccgggttata acctgcttgc agcgccatcg
+    72361 tcgccgtaca atttccgcaa gtcgtaaaag tcggtaattt ataccttttg ccccatatgc
+    72421 tacggcatgg taatttgtta agtagagtta aaataattgc tcgtatttat aatttacaca
+    72481 ttttgccttg gaaatttatc gtcgcgctga aagtgcgcta gtagatacag gtcccaaagg
+    72541 ccccaaagga ggatcctgca ctccgaaaac acaactgtaa agtgtaggta aggggttttg
+    72601 gggttttggg ctggtcggcc aagttgccaa catgcctccg ctatttttcg gtgtattttc
+    72661 cacttttgcg ctgtgcgcaa atttatggcc acaagttgcc cctgctaaac tgaatgaatg
+    72721 agtaagtgag cttctgctcc aggcgatgct gcagcagcac ctccctctgg caaagttgaa
+    72781 agttttagcc gctattaatt tcccacacca cccccttttt tcggggtaaa gctgggctct
+    72841 ctgtggggaa tatgaatttc acttgaagag atttaccgac cattcggggt ccagaatgaa
+    72901 agaaaaacaa tttgatttca cttgataaac gctggacata ctaaagtatg gcattcatgt
+    72961 actagtcgag tgtatttgtt tggttccaga ttctccattc acccaaaaac cttaatttta
+    73021 aaattagtaa ccttaaaaca aaaaatttga attttcaact ttcagccatg ccctgaaaat
+    73081 taacttgcca ttctagtatt atgacgaaaa atggtttagt accttccaaa aatagtatat
+    73141 tgtggcataa tattcatata gtccttgcat aaatcgtgtg tttaagggca tggggaaata
+    73201 ttaagagaca aagtaacatt ttatatgaac ccaaggcttg gtaacattca ttttccgtcc
+    73261 aaattaccaa gtaactccat tttgcacgaa aatttcgaaa accataaaac attgtgaacg
+    73321 ataaatttca ttttaatggc tggagaatgg ttggggcacg gcttttttcc ctctcagttg
+    73381 cactcagtta cattgtgtgc ttgctggaaa cattttataa aacatttatt tgcttgattc
+    73441 ctagcctcat ttccgccaga catttcgcac ccgcttcgag cgcaaagccc aaagcccgaa
+    73501 gccccttttg gccccggtag aattctaccc caaattcagc agaaaagata tatattttgt
+    73561 ctgcactccg cgctcttcct tttcgggcca taatgaggcc ataaaaaggg ttaaataaaa
+    73621 tgtgcagcaa ttgctttgct aaagccaacg aggtctagtg taccccattc acctcgtttt
+    73681 ccccccctgc cagccattag gttccgacca ctgacagcga aaatttgccc gattttcgcg
+    73741 ccgttgctca agaattatcg acgtcttttt tatttggtcg tggaatgtca gaagaagagg
+    73801 cgttggctct gggttggaaa agccggggga aaaagtgtgc agcgggtggg acgcagaaat
+    73861 tacagcttgt agctacaaat tgcttccgtt gcaggcactt tggggctctt tagctgggag
+    73921 gagggcagcc caaaatagac acatcaatga aatttattag tttgcatgtg caagtgcagc
+    73981 tggcggtggt cgggattttc catttcccgc cggaccgagg tctaattcgc ttccttaacc
+    74041 cctgcccccg cgaacggatg taaattgctg ctaatttgct ttgtacactg aaattaatgg
+    74101 aatctcaaat gatagccagc ttgacggagt aatggtagac gtgataaatg ccatttagag
+    74161 taacaattat tcataagtag gtaaaaagaa cattcaagtt tctactacta ctactattaa
+    74221 atgctggatt aaagcaatac acatagtttt ccaacttaac tttcagaaaa tgtaaaaatt
+    74281 ctttgataac agttgaaatt caaattttcc atttcatcca tccatcctaa aacgaacaca
+    74341 ttcagatttc cttgaatttt gcagagttga agtctgttag gactttgatc ttgccgccct
+    74401 cctttcacac atttatggct ttatgatgcg ccccatatgc acgacaaatg cagcgaaatg
+    74461 aaacttgaat ttggcttccc gatccgccca ccaattcacg tgccccccac tttacctttt
+    74521 ggccgccacg cctttacgat gcctcaaatg gtgtgataat taggtgaaca gacgtctgcc
+    74581 aacgtgtgtc ggcgcaacat caaagagaac actgcgcccc acgaccgctg cctcctacca
+    74641 cccccccgcc cacaatcgcc gagtagtatc cgcccctgta aaatgtgcga tgacgaattc
+    74701 ccagaactgt ttgctatgtc agtttacaat tgttgggcat tccgcggaac tagatttcta
+    74761 gattccgagt ccaatgtttg tctaggcgtc gacattttgg gtgtttatta tctgagaggt
+    74821 tggttgttta tttattccaa ccaactgctt gggatacatg tgtactttgg gaccgggcaa
+    74881 acaggagcta cgatgcaaca gaatgcaggt aggtaggtta ctagcacttt gctaaacatt
+    74941 ttaaaaagcg tttcaacacg agtaaaagac ataaaatgaa tgctttgaaa cttactaaat
+    75001 ttacgagaaa taaaaaaatg agtatggcta taacccattg tgtgccaaga tttgagctta
+    75061 tgatttatgt actggacaat caagatcaac ccaattaacc atagaaacaa cagaggttaa
+    75121 agcttgctgc agaccttgag gccgactggc caaaataata cccatgtgca acgtggactg
+    75181 gcatgccata tttggtggct ctgcgtcttt ccacctgcaa aacggcagac aatgttgcag
+    75241 gtcaaaacga ttcaagcgcc aacggattgg aaccgcggtg tcagcctaag ccactttgca
+    75301 actcgtttgt tgcccctcga tgatccgtgg acatacgttg cggcaacgtt agccgcctcg
+    75361 aatatatcta tctatatatc tgtatatatt tttgtatcta gcaggttgca tgcggccgcc
+    75421 tgtcgctgat taatatcgcc atcgtcatcg ccactcgtgt ctgttccgca gataaggcgc
+    75481 tgcctccaag acacagcccc tctagatcag tccgatggat atatctatat gtatatccac
+    75541 gccacagcaa accggcatcg atgccacaaa tcattgccaa acatgatttc ttattatggc
+    75601 cagcaaaggt gtcgctggaa ttcgtgtttg ccttccagct tagacttgtt atttctgatt
+    75661 taaatgcttg taactttaat tataattaat tagccattta tatatgtgta cagcttgatt
+    75721 tatccctcag gatgttagca gatagcaggt gaaacccttt tacgtagata ccatattata
+    75781 atgctataca atacagcttg tttaactcgt ctggttaagc ccactcactt ttttgtcatt
+    75841 tacttatggg acttcaatta atatttcgaa tggaaagtta atcaaattcg gatgatgcat
+    75901 atcccagggc tatcagataa atagccatgc gctcaagtaa atatatcact ttcaatgata
+    75961 atccaaacgt gaaagcttcc tgctgagatc cccgccatcc tcctgtccat ctatactata
+    76021 tgtatggtat atatagtttt ccgtcgccca catggagtgg gcctccgctc attagctgct
+    76081 gcattcggag atgctgaaca agccacttaa tttaaattta aatgtttact tctcgctatg
+    76141 cgtgtgtgtt gcgtgttgtg tggggccgag tattttgtct gcgttgcttt tagtgtttct
+    76201 acaccctcct caaccaatgt atcactcctt cagtacactg caaaaatata aagcatgtga
+    76261 aattaatgtt ttaaattata atgctaacca ctctagcttt atatattaat tttacgaatt
+    76321 tcttattgat acctaataac cataagtata acatctaaag ttactttacc aaagttagca
+    76381 atacaatgtg tagagtgact gtagtgacag cttatgttct gttctaatat gcagtttaag
+    76441 ttattttttt gaacagtgca tcatccgctc tgtgtcacga cagggcaaga tttattttcg
+    76501 gtattccatg ggcgactaaa atgcgctgtc agcgcagcag ttgctgccat acgtcgccgt
+    76561 cgcccggcta tggtcatgcc ttgggcttgg gccatataga tacattagct gatggtggtc
+    76621 cggatccgga ccaaggggac ccacagagcg atcgttcccc ctcattgcac ttgacactcg
+    76681 aaattgatgc gccactgcgc ccaagccggg ccacattaat taggcaaaat attgacacgc
+    76741 tgacgtgtgg tgttgcctat tgattaacgc gtcttttcaa tgggatcggt tttattggat
+    76801 tatttttagt gggattttgg ggggattggt ttggcttagt ttggatcttc acttgtggtt
+    76861 ttttgtgtgt ggttggctat tgaaaaggcc tagtagatgg gcttcctctt cggtggcagg
+    76921 tggctgggaa tcctcatgtt gttggcgttg tgagctggaa ttttctttgt cttcctgctg
+    76981 ccgccgtgga aaggacgaag aacgcgtatg atcaactgca taaatgctcc ccagcgcgca
+    77041 tgacctaggg tttagatgag gctttggatt aacctacatg ggactgctct tttcgtggtc
+    77101 ttttgatgcc cttaccatag tcctggaaat tgcatttccc ctttgccatc ttggtgctga
+    77161 aaacctctgc cttcttatcc actatctccg aactagtaac catattcgag agcagcgatt
+    77221 tagatttctt aacccgcggt gtttcagcta cctcaggact atgttctggt accacattgc
+    77281 atacctcaac gataccagag ttggtcttca atggaggacg atcctccgat ttcatgccat
+    77341 ttgcgctggc gaaccgagca caactctccg tggacttctt gagcagaaac caggattgct
+    77401 ggagaacatg accttcttcg atgagtttct cgtagccatt gagaatgcgg cgcacatcca
+    77461 cgttgatcag ttcctcggga acctcgggaa gatcgccaaa ctcctgaaat caatacaaaa
+    77521 agtaaataca taaaatagtt gcaaagaata tttcaaaatt ttgttgagct gcacttttta
+    77581 gttgcaattg tatcaatcga ctgtcatatg aacacaacct ttaaagccct taatcgcttt
+    77641 cagcattcca ataaaacatt tttgggttat atacaattta tttaaatatt aagtatttca
+    77701 cgctgaaaga caatccaggt ctgaatccaa aaatgttcgt cagtataaat ccaatatatc
+    77761 ctcatataaa aggtttgcaa tttttcaaat gacaaaagta acacttaaat gaactacttg
+    77821 tcaattaaat tttgctatga acgtagtagt ttagggtaat aagattaagc aataattatt
+    77881 gatggtaatg caaacccacc tgcgagttct tccgcagatc ttcaaagcgc tgaattagct
+    77941 tcttgatatt aagactgctc ttggaaccct tggtgatccc ggcacccaca cccgctgtag
+    78001 tattggttgt cacattttca tcgatcgaat cggttggtaa tgtctcggat tccgacccat
+    78061 ccatactgga ggagctcatc ctgctgagta tcttggccgt ttgctccatg aaggacgcct
+    78121 ggcgatcgcg tagctcctgc agtcgcatct tcagctcgtc gaggtatgcc agctggcggg
+    78181 gtgtgggctg cctggttgcc attgtcaatt ttgctatccc acttttagat tctttcgaac
+    78241 atgtgaatgg atccgaaaat gcttaagttt tcggtttttt tttttgggct atggatactt
+    78301 gtgttgggct tgtcagacca attgtctcta aataagagtt tcagtgctta atactatgac
+    78361 tgcaaagaga ctgactactc ggttaccggt tttatgacaa cgccaccata gtttgtaact
+    78421 aagtatgctg tgcattggaa cccaaggaag taagctataa ccattcgaat tatttcttaa
+    78481 gagcaaagac ccttttagcg atgtaactat tctatcatat acgtaatttg atgtaaagtg
+    78541 gattagggta tttttagagt atcgcttggt cgtatttcat aagtttggct cgctctttgg
+    78601 acatggccta ttactcgcta tttttatggg ccccaaattg agatatctcc tgcaattctg
+    78661 gctgagctaa ctaagttgta actgcccgct gcctcacgaa accgatccct cccccaactt
+    78721 tcattcgcct gtggttgaac aatggagcga tgtggcaagg acagtgggtg cggaggggca
+    78781 actcctgata cacctatata tctctggttt agcgtcttgg ctggactcac ctttgcgatg
+    78841 ctgcgggtgg ttggggctag tttggaattt cacggaggaa aacgtgccgc tgccatcggc
+    78901 tgacgcctac aatgaagaaa agggggcgtg tcggccactc tgtgactccc agactgctgt
+    78961 tgatttcccc cggaggtgga taccaacccc tccagcatcc tcctcccatg ccagagagtg
+    79021 tgcaattgtt gttttcatat tttaatgcct ttttgagttg gtaggcgtgc cacgacacga
+    79081 ttcttcttac cgataagctg ttatttttat gggcccccgc cgaatcggat cgacttgaag
+    79141 tggagtcggt ggtgcagggt attgccgggt ggagccgggt ggtgctccgt ggtgtgcggt
+    79201 gtgggctggc tgccgactta aagaagttaa aattattaca ccgcaattgt cttgatcgtt
+    79261 gatttatgtg cgcttttgtt gtcggctctc ctggctcgcg gctatggttt ttcgcttttt
+    79321 attacttttt tgaagttgcc agttggccaa caggcaaatt tgtgtggctg ttgatattta
+    79381 ttgggcatgg tccgggcatt ggcatcaatt gtgtatgcaa catgggaagt tgattatgcc
+    79441 tctcattagc caccgtattt tttccttgtg cacgaaatgg cgacaattcc attcccatgg
+    79501 acatgaaagt ggcccatttg cagtttacta gcacatcggg tttggatagt aggtaattta
+    79561 ttttgctagc atgtcttttc gattactaat agttattaaa attactaaaa tagttaagtt
+    79621 aaaataatag atttattaaa gatcatattg atcttgacat ttttcttatt tgcgtggaag
+    79681 ctaaaaacag aattttcaat gggatagcct tccatttcat ttcgaattag tcaatggctc
+    79741 ttttcggcaa ctttaggcat tccctaattg agttcttgat tcggcgactc cacttcctcc
+    79801 tcagcgtttt tcggtgaatg tctggccggt gtcataatct catcgcctcc tccggaccaa
+    79861 gcccaaccca atgcgatccg atccgagtcc cattgtgcga ctccagcacg attaaagcca
+    79921 accgcatttg cagccgcctt caattgcctg aaaacttaat gatttcatct ttcattgaat
+    79981 tcaaatttgt ttccccatcg ggcccccacg atgtccgagt gcgatttgca gctgtgcagg
+    80041 gagcacgacg ttcgatgttg gggcaccggc aaaatgtgct cgatgtttgg ggctgccgaa
+    80101 tgtgggcgtg cccccatcat acgcccctgg agttattgtc gttgctgttc tgcactgttg
+    80161 atagccaacg ccgcggactc ggttggccga gcctcgaaaa ataacgaatt tattatttat
+    80221 ttcacctgac agccccagca acagactcta taaatttccc tagccaatcc actccggacc
+    80281 ggaatcatat ttatggtccc atctgtatct ttctggcgat gcgcatatga aaacgaaatt
+    80341 attgataagg catgcatagc tccatatgta catacatata tacgtatgcc ctttattgta
+    80401 cagctctgat tgaaatacct ttcgtctggc aatgcaacag cttcatttgg acaaccaatg
+    80461 agaaaagaaa atgaaaaaaa tcaaggagaa accgaaaggt ggaaatcagc tgcagcaaca
+    80521 aagtttattg gcaacacaca aggaggtaag aataataaaa ccttaaccca atatcctgct
+    80581 tatggtcata atttttatgc tgcttccagc gaagaaacca aaaaaaaaaa cagtgcagat
+    80641 cgaccgcctg ggctcagcat atagcagttg gtccaggtcc aggtccaagt ccagggaact
+    80701 ttccctccaa tcccaatcca caacgtgccg cgtatttcat tatgaattgc tcttaacgat
+    80761 gttccatttt tcatgagcgc attcataaac ctttttctta ttttcgactt gaagcggatt
+    80821 gttgtttgca tctcgccgtt gccgttggtg gaaaagagga gattctgtct ggcggtttgg
+    80881 ctgccgccag cactcctcct ccgcttcggt tagcaagtga atgaataatt gcaccaaggt
+    80941 gttgtaagga gagctctgct atccgcagtc ctattctgta cagtggttgt gatatacata
+    81001 aaacaagtgc aaaactgtgt attaacactt gggagaaatt aaaactctca atcagattct
+    81061 tttctataat tagttgttaa aaatccacta aagtttattt aaaatcaggt tctaaaaaca
+    81121 aatagtaaat ataaagcatt gcttaaaatt taactcaata attttggaat caatctgttg
+    81181 gacaatgatt accaaactcc ttttttatat ttatgatgga atttcatata tttcgttctt
+    81241 taaaatgaat taatttttaa aaggcgaatg ctatctttat ttattaagaa aacctctgca
+    81301 cttttttttc gaacattttt gattgacaat acgctgaaaa atattttaac tagctttggt
+    81361 aaaagtaatt tttcaaatgc cagttagagg cccctaggga gacttcaagc ccttgcaatg
+    81421 ccccctgttt ccgcttcctt cacttctttc cgttttggcg aaaaccaatt cccttttaca
+    81481 attccgcggc tctgtggcct cttggtcttg ggctctgagc tgcactgaaa gaaatattta
+    81541 tgtgtattta atttttccgc attaaaatcg gcaattttaa gaacacaatg aattcttctt
+    81601 taaagttttg tgcagtcaag ttttatgcat ttcaggtaag attcaactta gttaaggaat
+    81661 ttcttaagga cttactctaa attgtttgca gtgttaggaa gggtttccgc ggaccatatc
+    81721 ggctcgggcc gctgtcaaag gcgcagcggg ccgtcgtaaa atcgcggcaa acacaattga
+    81781 aaaacgaccg ggccgtggga gcgagatggg gctaggaggt gcaagtggga tgggaatatg
+    81841 ggtgagtgaa ggagtgcggc ggcaaacact ccacacttgt cagcagcggc cttagtcgat
+    81901 ccaagcccct ctgccagcct ctccgtgacg gcccagaatc tcgcttatgc ccacaaatca
+    81961 taaattgccg ccttaaaatt tatcttcaat tttcgagacc ccatgtggtg cttagcgaat
+    82021 ggtgttttga aatttattgc tgcatttgca ataaatattg tcagttgagt cgggccaaca
+    82081 gtggaagcga gatgggggag tgctggttgg agcgggatgg ccataatggc ggcgtttgct
+    82141 tgctttttgc atgattgccg ctgtaacgcc tttgcccgaa acccgcaagc ccctccttcc
+    82201 ttttggccac actccgcctc tgaaaatccc cccattcagt tgtttcctcc tcgccgtccc
+    82261 ccggattctt tattgttttc gtgcatatta atgcacgctg attgcgggcc atggttagct
+    82321 gaaagagggg tcgttcagcc gcagggggaa tcccccgcgt ggcagttagc agctgccgta
+    82381 ctgtgtcctt tccagccaaa aagacctccg ccaccccacc gatgattgtt aatttacagc
+    82441 gcgagaaatt gattgccatt ttgttaagat agtatgtgcg aattgagaac tgtgtgtcca
+    82501 ttctgtgggg aaatcttacg cagggatgtg ctcttcttta tttgctggtt aattgtggtt
+    82561 tcttttatac caacataata tttatcgcct cggatcggga gtgggtctaa ttggagtagc
+    82621 gccttaatga aatcatcaag acgagatgat gtgaggtttg gtagaatagc ataccaattt
+    82681 tatttagtaa gataaaaatt aggtaacatg ttatttacct ataagttgtt tacctatatg
+    82741 ttatttatac atatttatac atttattgtt tgctattctt gtaagccctg ttaccgtagg
+    82801 agctcacata agctaagctc aaagtttgag gcattgttct gctcgatccg aatatcattg
+    82861 gggctcaaat ttttagcacc aacgtcatag cagatacttt gggttttttt tctgcactct
+    82921 tgctgagcct accgagcaca cagtaaatga aagtaagctc gaagagcttc gacgactcgt
+    82981 tggacgtcag gcggcattgg cagacccatc aatcaattca cagagaaccc agacacggac
+    83041 actcgaatcg taaaccccgt gaaactcgta actcgggcgc tgcctacaaa gtccgaaaga
+    83101 atggaataac tttccctacc gcccctgcgc tttctgttgc cacaatttgt ttgcactgct
+    83161 gtcaggcgtc gatgactctt cgtcttcctc tgactccttc cctttcggtt accttgtaaa
+    83221 aaacggagcg ggacaacacg tacacagaga gaaattttag caactcaact atattgtgaa
+    83281 gaattggaaa ggttaaatag gcctattttg gtaatattga acaagatttt gttagtggca
+    83341 taaatatttt ttcaagccgg gtccttgaat tttgataaag agatatatac aaaaacgtag
+    83401 ttctcagtgg catagcagtc aggccttgca caaaacccat cgattcgagc atgtgacacg
+    83461 catcaggcag agctcatcaa tctcctgtcg gcccaaacaa aatatgcaat atgttcacat
+    83521 tgcggtaggg agaaacctca acacgaaagg cgagcatcga taccacgggg ttcggttcgg
+    83581 gtttgcgagg gagcggccaa agtgagatat aaaaacaaaa attagaactc tgccgaattc
+    83641 cgaacccctt catgttactg cggcctaacc gcattcagta aatattgtac gggcgaaaca
+    83701 aaacgaagga aaaaaattag agagtatgtg ttgacagcag tcatcaatca agtgacacaa
+    83761 cttttgacat gtcaaatgaa cgagtgagcg cataaattac aatggcagca accatttata
+    83821 cgaccccgaa agttacagac ccagtgagat agtgattgta ggcctccagt agccgacgaa
+    83881 taggtaaaca tgccggcaat tggaaaaccg atttcatcag gagggatggg gctggttacg
+    83941 gaaaggggaa tgggaagggg aagtgcggcg tgttaaatgt cattacactt atcgataaga
+    84001 ttgataagtg taaacggttt tgcgtgtcgt tgcttcttct gagtcatctg ctgcacaaca
+    84061 gaagcgccgc acttaatcac tccgcaactg actcccagac tcggataccc agattcccgt
+    84121 actcacacct ctacgtaagc catcagttgc cagccagaca atctgtcaaa cagtcacgct
+    84181 gacaggcatt tccacttaaa acaatgccaa acagcaacag ctgcggaaaa actcctcatt
+    84241 tcactctcca cactacgctg taatttatcc gtattcgact tcgcgatgtg ggaaagtagt
+    84301 tgagccggcg agaaattcaa gattgtgttt atcagccgaa gagtactcaa ctcgttagtt
+    84361 tcgaattcat gccaattgag tttattacta cattacgagg agattatgta agagaattga
+    84421 ataggtaatg tccacactgt ttttcccggc aaataaacaa taaataaagg ttccatttaa
+    84481 catgtgcaag tttcatataa ttcattcctt taaacggttg caaatttata ttgcatcata
+    84541 atccttaaaa cttgggcata tgctcatacc acccatcaaa aatataatgt aggcttatag
+    84601 acttctttga tcaccgtttt acgaactctg caatctgttc tggataataa aatcaaaagt
+    84661 tgccaacagt ttggagtttg cttttcactt ttcgcacagt ttccggcgtg acagatttta
+    84721 cttttcgcta gcgagtgtga gcctctcgcc tgggtgtgcg gtgtgtgtgt ggtgtgagtg
+    84781 tgcggtgtgt gcgccgctgg cagtgacaaa tcctttagcg ttatttgcgc atgtcgacac
+    84841 tttgacaggc gccgacacat gcgagtcgct ccacgtacgc agtcgtccaa agcccctatc
+    84901 atatttcttt gggtctcagt ggattctctt tgttgccatc gcttcctgtc tgcccagctt
+    84961 gcggcaaagt ttttaataaa tttgcgtcgt tttcgttgtt tttcaatttt ttggttggtt
+    85021 ttttgccggc ttgcgggtga acgaaatcca ccgaaaacat ttatttcaat tttcccccca
+    85081 tgtcggacag gcagcgcaac atgagaaagg aaaatttctg ctgccacagt actttacaaa
+    85141 tttgtttccg tctcaagcgc ggcgacaggc gaggagcgtt gacagctgtc agtggagctg
+    85201 gctcttgagt tgaagccaaa aaatattgac agttatcgta cgtcacatgc aaatcaagtc
+    85261 ggacgcggtg cagtcgtgcg atatgtttac cgttgcccgc tcaccaaatt ggaaaatgtt
+    85321 tgtctttttc ggctctaaaa gttgatttaa ctagacagca atataccttt cagtacacag
+    85381 ccacgaattt tacgcagcaa ggtggaaagt gtaataaaac tatttgcaag ctgatttaaa
+    85441 atgattttcc attgcttagg tctaccatat accagttcta tcaacggtat ccaagcgtat
+    85501 tcttaagtat ttctgaacta cgaagagtaa aaataaaact aaattcgaga aatttccgaa
+    85561 aaattcaaag tttgccaaac aaaatgcagc gaaacgaaac gaattgaatt gtgaaaatcc
+    85621 tttgcaatta agaactgtca acggtctttt gtagacacaa cattcttgga attgtttgca
+    85681 aattccccaa cactcatttg atttacgatt cacttcgccg cgttgaaagt aatttggcgc
+    85741 tcgattttct tttccgaatt tccctgccgc tcatgccatt ttgccttgac tgttcttcgc
+    85801 ctggctgttt gttggctgtc tggggcctga catgttggca tctctttgta attgattagc
+    85861 cagcggggtt attcactgac attgcatgtc gaatccaatg agaacagtct ccgttcctcg
+    85921 ggggagatca cctcgagagt tttctgggcg tgagtctgct agtgattgat gggactgatc
+    85981 gcaaaaatca attgatcaag caacactttt atgaactcga cgcggtcatg aatcatttag
+    86041 aaatcaattt gcaactacgg ttgatgggta aaatatatag tagaagtgta acattgtatg
+    86101 ctctataaga tcacataaat aacataatac ctccttgcct ataatcttaa tatctttgta
+    86161 gaaacttgtc gaacaacttc ttcatggcaa gcccaagtaa cattctaaat tgttagattg
+    86221 tatctggcaa ataagtaaat aagtgaacaa ttgccttgtt tgcattggaa ttatgtggag
+    86281 gcttaccgtc atccgtttcc ttatcaccgc caatgttgat gttagattgt cgcattccgt
+    86341 tcactaggtg atcgatggga ccataaagct gccaaagtgt aattccagta ggcgccttag
+    86401 ttcagagtgt gcgactgcac ttaactatat acagtatcta taaatcccat gcggccatgt
+    86461 ttatcttgcg gatatagtgg atggcagata agcggccggt aagccgagtg atttacggtg
+    86521 ctttgcagcc cccatcaatc aaaaccggaa acgtcagatc gtcgccttaa attatgatcg
+    86581 ctctctctca gtagaaaggc aaccctcttc cgatccttta tgcccccagc atcgattaat
+    86641 aatacgccgc ttatcgagaa tgcaatttat aatacaaaat ggcagtcgaa agccgctcga
+    86701 gaaaaaaggg atcataatta atatttaaag gatgcaggca cagtactgtg cccccggctc
+    86761 cccatttcca gccccatctg gtgtgctctc ctgttgcttc atcggcgatt ccaatcagat
+    86821 aacattagca cgtcacatca cacagcgtgc aaaaattatt attcattgaa tttgtgataa
+    86881 atgtgacagg cccaacagca ctccgagggg tcttaacaat gccgacgagc cgatcaacca
+    86941 accggcctgc ctgcatttcc agaccaatat ggccaatatg ggccaggacc ggaccgaacc
+    87001 aggccgcatc gaagctgtca aaaatttgtt gcaaaaagat attatctgat atggattgaa
+    87061 aggaccagag atgacgatga cggacccagt ctctgcctct gtgtgcgaat cagtggtggc
+    87121 agtttggctt aaaacccatt acctgaaaat aatatatgat tataagtgtg attttaaatg
+    87181 ttcaccgcag ctggactttt tttcaagttt attcggaggg gataatataa acgtttttga
+    87241 tctctgaaga ctttactatt tttatggcat gtctttggca tcactggcca atgtatctgc
+    87301 tccgggctca tcaataatga ggcgacgagg atgcggctcg tcggcaacgt tgctgatgat
+    87361 ggcgatgacc atgatcgatg attgacctgt ttttgttgcg cccatcttta gagtgacgat
+    87421 tgccggcgag tcttcttctg gccctgacca cacccattga caggcatctg caatgtgcaa
+    87481 ttatttgcgg agaatcacaa cagcaggcgg agtagtactc aaatggaatt gccacattga
+    87541 ttgatgtgct caataaattg cacgatgagc atcaactgtt tctgcagtct ggcgcttggt
+    87601 atttggcatg tgataaggcc gatggaagct gacaacggag tcgagctgcg gagcatttaa
+    87661 atgtccaata gatgtctgca caagagcatg aaaaatagtt gaaataagca aaggattttg
+    87721 atggacagtt aggaaaaacc atttatcaat taatatacat ttgcacacca tgtctaagaa
+    87781 actgattaag tatcattcaa ttatttattc tgtaaacgaa ctgctcaaaa aaatatactg
+    87841 acatttcatt tgacacctta tggtcttgtg tttttcgcat atgcttcagg gatcggaatc
+    87901 ttaatcccca gcggaatgcc tttcagcgta tgaatcattg gcaaggcccc acagtcttcg
+    87961 gggaacattt taattatggt ttttaatttc acggcccata tggcgggctg tcaggtgatt
+    88021 tcccctgaaa tatgcgcaga actcacatct atcaagaacg cctggttggt tggaggcact
+    88081 gccgagcgaa gattgcgcac tgagcggtga aatatttcac tctatttctg ttttcgtctt
+    88141 tcagaatgaa attaagtgtt aatctcattt gatttgtcat tttcacatga cgtgatttgg
+    88201 cggccttcgc agctctggca atagaaacga tcgctctggc tcttgatgta tgggaaacaa
+    88261 atgtttctct gacacattta tttgtttaat gagacgttga ataattagtt ggatatggca
+    88321 caagctcaag aaaatacaac cgatacaaag gcgaaaacac aacttgcata gctgtggata
+    88381 cctccatccc tacaactaca cccccgacgc acacacatca gttggcacag caatttataa
+    88441 attatgattg acgactctgt gtgcggaatt gtgtgaccca tcgcagttag cccctaccca
+    88501 gaccattatt ccccactttg aaactcggct ccaccatgtg agactcgtct gacatttttc
+    88561 tatatagttt tttgccctgg cctcggtcat tatggctgac agcgcgcctt caaaatgatg
+    88621 agtttagatt gatggccgca catcactcag cgagatggca gccacaccga gtgaagacgc
+    88681 aactctgaac aggttgggcg aagtcaaaat accctatcaa aggactctga acaggtcctt
+    88741 atatgacttt atctaaggaa gccagtgaag ccattcatat ttgccatata ttcggtaaag
+    88801 tactagttta ttatagaaag agagccttta aaaatggttg ttaatgaatc tcaaaaatac
+    88861 ataactatta aacttactca aacacactta ttaacgtctt tctaaagatg aacaaaaatg
+    88921 gttgttaatg aatctcaaaa atacataact attattcaaa acttattcaa aaacacttat
+    88981 taacgtctta ctaaagatga aacaagactt atttggatca gaaaatatac ataattatat
+    89041 ttcatataaa ttaaaaatgt aaaaaacttt ttcttaatct cttcatataa aagtttcagc
+    89101 acaatccgtt ttatccatgt tgccctctgg agacagagaa ttcaaatcca atacgactgc
+    89161 aggctatctg tggtttttgg gcccaaatca ccatgctcca ctgcttcgcc tgttgacatg
+    89221 tcactaattt cagaatcgct aagctgattt tgctgaaatc tgattttttg ctgtgaacca
+    89281 gcccctggga tagttgctcc accccatctt cctttcgcaa ctccgacgat gattgatgtg
+    89341 tttggcatat gaaaaaccga actgaaaaca aaaacccttt ttccgaaaca tattctatgt
+    89401 cattttgtgt gccggaggag agcagagtag aaatgtatta atcatccgtg gcgaagccta
+    89461 gcgagtcgag gtgataaatt aagtttgaag acgattggga aatgtctgtc gagtaattag
+    89521 tgaactgagc tgggtctgga ttggattggg gtctctggtc gggtctttcc agatcccaaa
+    89581 ggccggcgaa gccttggagg cggcggcggc ctaagagagc aataatgatg tgcctgacag
+    89641 aagaaatgca agagaccgaa attaaatatg ataaaagacc cacacagtct gttggcataa
+    89701 ttaagtatga catgcaaaag tatgaattat cactgaaatg tcaaagtgtc acagctgcca
+    89761 tcaggcggcg atggagatgg agatggggaa tctggcggaa ggcataaata aaaccagaga
+    89821 agagagtgca agaagccatg acaacgaacg ccgcgtgtgg gattaactga cttgaaaatt
+    89881 tatgatatga aaaattactt tagctcggcg aacacacaca cacacgcaca cgcacactgc
+    89941 atatagcata caaaactgca aactgaaaga tgcaaagtgg cgggcgagat acaaatacac
+    90001 aaagagagat gtttattttg tattttattc atcgtcgtct cccattgagc atcagcagca
+    90061 tcgcatcgca tcgagaaatt tgtggcccga acaaatttcc caaagtctgt gttgaaaaat
+    90121 taatgtgcac aactgtcagt gcagtgacag agatgcgatg tgaatggggt tcatattcgg
+    90181 atgcggatcg gatcggaact gaactgaact gaactgaact gagcggggcg tgagggttac
+    90241 tgttgtgaaa gctcttgata aatgattttg aaatgtgaca aaatgcttac gagcgggatt
+    90301 accggggacg agacgacgac ggcccaagga ttgtttgctc cgggttcgga aaaagagtta
+    90361 tttgccagct gagggaccag caaagtttgt tttgtgccag cggaatcaga tgggattata
+    90421 gagatttgct caaatgaagt tgtaacagaa gtgaatgctc agatgagtga agggaaggtg
+    90481 cgagtcctga gctagacgaa tataagctgt acgcccatga aaatcaaata aaataggatt
+    90541 tgttttgttg gcactcggaa caaatcaata ttgaattttt gcgattttag taaagattat
+    90601 ttggagaata gaaaaggttt ctataagctg tataatatat aagtgtgcat tgtcattatt
+    90661 taagattatt ttttaaatgt gtatgtgaca tttatagaac agaaactaaa gataataaaa
+    90721 taatattttg actgaagatt atatggattt atatggccat ctcaataaat cactgccatg
+    90781 ctatcttcaa tcgacgtgaa atggttttca ttctcgatat gcctgaccca tgcagttgtg
+    90841 tcttatataa tggcctttca taagtcatac tacctttcct tttattttgg ccataaccca
+    90901 tcaaggcatc aatcaggcgt tgtcgcgaca ttataaagac tcgaaaccta agacaccatc
+    90961 agtttacctc tgcggctgac ccaaggaaat cgagctgcga cgcagacatc aacgatcacc
+    91021 ggatcggtaa tcggtgatcg acgccttttg gcgttaccat gccatcgatc ttgttagctt
+    91081 tgacgcatct gtaagtggcg gcgacacaag tcggaaagtg aaatgcaaaa gatcccccgc
+    91141 cggatcgatg gatggatcgc atcgggccaa gacatcatca gtcaagtggc gcgactgcgt
+    91201 ccgtggccgg gtcaattcgc gtgacagctc tcttgattta tgcgcagaca cttgagtaac
+    91261 cctcgagata atcaccaaat tccccgccag ggcgcacact atttatcata taattaagcg
+    91321 agtcgataat gttattacac tgtctataaa tcggacactg ccagatcgga gctgcgccga
+    91381 tagattggtc ccataagtgt gtggcaggtg tcttagcggg tctcttttag aaagtgggaa
+    91441 gtacttgggc aacatttgac acgacggtaa acatgcgggt ctaactaggc ttctgatcgg
+    91501 aaaatacgac gccattcagt taattgaaaa cccatagcaa tatatttaag taccgtgtat
+    91561 tacgaagatt tggggtttga aaaggttacc caaaaagtga aaacaaagtt aataacataa
+    91621 ataattcaca aattttatgt aattcatgaa aatatttacg ataaacagta catcaaaatt
+    91681 gagtaattaa aaaaaccgaa ctttacacat ttggttttca taacaccatt tcagtactga
+    91741 gtatcgggta gtcgataggg gagaccttag gattcctttg ttcgggggcg attaagcgaa
+    91801 agcgccactc cctaacctcc gagcagtgtc cactttccca tccagccagt ccatataaat
+    91861 caattaattt aaactgtcag gcagctgtca aattgtgttg ctaccgggag cgtttttcgt
+    91921 ttcgcttttg actcgctcga cttggccggg gttttgtttt actttgtttt gggttctcga
+    91981 gttgcggggg aagccggcaa agagactttg ggtacggcga catgtcgaga tttactactg
+    92041 agtgattgac gactttatgt gtcattaaat accgttagca gtcgcctttg tatgccgcac
+    92101 actggaccgg actcattttt catgccacta cgcccgcgtt tagtgaaagt cgatgcgatt
+    92161 tgtagcttcg gcacttgtgg ctgcacattt caattagaca tcacgccgcg ggggcggtaa
+    92221 ttactcgccg acacaaatca gttaggagtg gatactaaag cttaatatgc ggtttaagga
+    92281 aaatgcacat aggacaaaac aattccaata tttaataaag ttcacatttt aagcaccctt
+    92341 ctaaggatca aatttaattc atttttggga acttaatttt gcatcgcatg tttcaaagtg
+    92401 cattccctag caagttgaat ggccctacgc tcgttgacct tccacaattc cgcaacgtcg
+    92461 ttggccagtg gatcatctgg gtttggagca ctcaacaggg cctggatcga aagaagaacg
+    92521 gtacgaattt gcaaagcagg actccattta tccttcagga tgtccagaca aattcgaccc
+    92581 acacggtcga tgtttggatg gaaaatcttt gtcaaaaatc gaacctttgg tgctttcata
+    92641 ggatagtctt cgggtaaaaa taattccaat ttaaaatttc caccctcaaa aggggaatct
+    92701 ttggggccag taacgaggac gtggaaataa cgcgcattac actcatcagg cgtggcacta
+    92761 atcccgggaa ctgggtcctc cagaaggcgc tgtgtctcct tgatgattcg tggagtcagt
+    92821 gcggccatcc tctgtctacc acttcttgtg aattgccttt gatccgtttc ctccaaaaaa
+    92881 acgctgttaa atttgatgtg tttaagtaga ttttaagtag atttaatttg aataaaattt
+    92941 ctgaattaca ctgtcggtta cttggttttt ctagcaagac tgcttgctac aactgatgca
+    93001 taacctgtca ataaattaac ttagttgatt tgaatgccca agtcatctca ataagaccga
+    93061 atcaaatgct caattcccat acatttgcaa ttatccttca gtgggcattg aaattgaaat
+    93121 tgactagagg cacttgcccc tccataccgc tcccaccacc cacaacaata aaagcgaacc
+    93181 gcatttccat cctgcaacgc ttgagggtcc ttctgaaatc ccgggtgaac tggcaatgtt
+    93241 aactggcact caactagcag cttattagca aactttccca ttggattata tagttgaata
+    93301 ttataataat cgagttaggt tcgtgtaagt ggatgctccg ccgcccctgc aatccgcata
+    93361 cttttggggg catggttagc agtcagcttt catgcacaat ttccgttttc atggctccgg
+    93421 gcatgaaaat aagcccacag tttatgagac gacgtggagg acgacgtctc atgttggtgt
+    93481 aacaaaaagt gggcaacttt ttagagcgcc gaggaaatac tttgctaaat tgccagggca
+    93541 gtggcagtgg cgccccacat ctcctccaac catcgaactt tgggcctgcc ctcgtggaat
+    93601 ggctacaaaa accaataaca ggacgtaata cactcatgtg tgtgcgaatt ggggggattc
+    93661 caacttttac aaatcgttag catttcgcac gtttattgag ttgtttttgt atgtaaattg
+    93721 gttgcataac ttttgccatg gaaagtgtcg agtggccgga atggcaacac aaacgcaacc
+    93781 taaccctttc agtgccctcg cactattcta cactggatta ttgtctgtag attttcagat
+    93841 acgcaaacga acattccctg cccgtctgag tgtcctcgtg tgcataaact ttgattccac
+    93901 tttgaatgcg ccagcgcatg ctaaacgcgt tggaatccct ggatggggta tttcgatata
+    93961 ggagtaccct ggaaatattt aactctttta gccataaatt gtgacagatt caatgtacga
+    94021 aatccaatca aaaatgctca caaatatttt aaaatataat ttcccatctg tataaaactt
+    94081 attagggaat tatatttttt tttttaatct aaagaacatt gagttctctt aaatgacttt
+    94141 tcatttaact tgaattttat taaggggaaa tgtatgtagc tcaacatacc ctcatcatcc
+    94201 acgagtgcga agcataaaaa gtttaagcta tttgcgtatc tcgccaacga aggcttttcc
+    94261 tagtgggaaa atgatgcggg gaaaggtcgc tctgtctgtg gttgtcggtt tggtttgcat
+    94321 tggtttgctt cggtggtgca tatttgtcgt ttgcggtttt cagctgcaaa tattatagca
+    94381 aattgaactg taaatagttt ttattgccct ttttgccaca gagccggagt caaacagggt
+    94441 cgagtcgaga aaatgccggg atgaagtgca gcatagctct gatttttgtt ttccaactat
+    94501 atcgggtctg gccagcccag gccacagttt gttggcagaa attccatttt gcagccaacc
+    94561 caggatgggc cacaaaatga agttgtgtca caaagattaa ctcagagaaa taccgaagca
+    94621 aacaattgac tggaagcgcg acatctctat aagcgaaaat atttgaaaaa aacgaaacac
+    94681 tattattata ataatctata ataatctata gttaaaaaga aaattcgaaa ttaatcgttt
+    94741 attagttgtt tcaatttgag tgttttctaa tgaaatttca ttgttttaat aatgaatata
+    94801 aggcagcata aattcactta actcaagcac accccctaac gaattgccac ctttggacac
+    94861 aagacccaat gaaatcaaac aacacaggcg acacccccac atcggatgat taatggctgg
+    94921 caaattaata tgaaattccg tagccggcat cgtcatcctc gcagaacacg aaactaataa
+    94981 tttgtgtgcc gtaatttgca tagatatatc attattatta tatttcttta taaatatttc
+    95041 gtacacgcct cgcattttca ttaatcaatc ggtgtgcagg gtaggcggag gaaagacatg
+    95101 acatgaaaaa tatgtttgcc agtcgatgga aaaattataa tttataaaaa ttttaagagc
+    95161 actgcaacgt gcgccactcg cgccatcaat caaaaatctc ggtggacatt gcagacgaat
+    95221 ggaaggtgta cagatggacg ggcgcctgtc tgggatccca gattacgacg gtagccatca
+    95281 gaggctccgg accaatgacc gaccgtccat tcactttctt aataataaga gtcgaagtgt
+    95341 gttaatacct tgagaataag tttttctact tcaaatcata ctcaattgtt gagatatttt
+    95401 tcagacttgc agattgtaag ctttccgctg aatagtacca ttattataat aaaatcattc
+    95461 ataaaatgta accaaaggct ttaaaaaata tttaattaaa aataaaataa acaatattca
+    95521 agcattattg ttattataag aattttcatg attagacggg gtattcgaag ttcgtaaccc
+    95581 aaaaccgctt ctattcttca gttataaatg attcaaatta tggaaccgac aatgagatga
+    95641 agaccatgac gatgctgccc cgacttctga tgatgatgat gttcggttct cgctttttag
+    95701 ctattgtctg cccgcctcgg cggatgttga aaaatgtgaa aattgtgaaa aattctacaa
+    95761 caatccaaaa taacaaatgc cccagacgga aatgccggaa aaaagaagta tactatatta
+    95821 gttttgttcc aggacgtgac aggccggtca ggcgtccagg ggcatcaatt tgttcgctct
+    95881 caagcgagat taatgatgtt gcactctggg gcacaggggg catctaacat taaatgccag
+    95941 ctaattaatg atcaaactgt tgcaagaagc gacacgcccc cagcagcata atgtgcccac
+    96001 atttcgcacg attggagctg agaaccaagt gacacagtag tcactccata tggatgatga
+    96061 ttgctctgct cggacgcact tcccatccgg ttttgtgtgt cccatgatta atgagcttat
+    96121 tatgtacttt gccactcgct caatctctcc ttcactcggt tcagatcctg tggacaagtc
+    96181 agaatctctt ttctttttgg cacaggcctc attattcttc attgttcatt tgacgtagtg
+    96241 tgccagaaag atttattaca agcccactgg cccagggatg aaaaaggttt ccgattattt
+    96301 tcaaaaggag tgggaaagtc actcaacaca ttagagtaac acattttatt atagcatggc
+    96361 atatggctaa tagcaaggtt tcttattaat aatagcattc tgataatgta ttgagctacc
+    96421 cattgctcaa cttacactgc aacctgtatt taattatttg cccaattggt tatatggatt
+    96481 tctctctgat ttctgcaatt tacccactca acgcatcatt ccgaaccttc tgaattaaaa
+    96541 tgtttatagg cccctgagct cgacaatttg ccgtatttct cagctaaggc agcggcaaca
+    96601 ttgcagtggc aacaaaatca gcgaacgaac gcgctgacag ctgtcaacat gtcgtatgat
+    96661 taatgttggc caaatgcggt tgcatttttg tgcgtttcgc gctgtaaatg aaatgataaa
+    96721 ttgggtgtgc aaacattttt gcctcgttcg ccatccagtg gaccgatttg cagcttaata
+    96781 acgaaatgaa aacagagcag gcagcgttga agcgagaagt ccctcaatta aattgccttt
+    96841 caccagcaga atgccattca ggaaactttg ccacatatcc gttcgttcgt tccccacttg
+    96901 gagttgcaca aacagatgat attcttgttg ttgttatagc tcttacggag ttgttgcagt
+    96961 ggcacacctt tgttgacacc tttggccaac acaacctcaa cctcgggcgc tttcgtggcg
+    97021 tgtgtgaagt ggcatgtcca catccgtttc gcctcgcgtt tgattgattg aaactgcagt
+    97081 tgcaacttgc tggcaccaaa aggcgttgac agctcgcctc tggcagtcgt gtgaatcgag
+    97141 gggaagtggg agtacacctt acaaagtgca caaatcaccg ctagtaatgt gcgacctttg
+    97201 cactcgtgtc agttaaacgc gacttgggtg aaatcactgc aatcgaaatc aatgaaatgt
+    97261 ctgaatagaa aatcgctagt tggaaaaatg atagatattg actcgtgctt tccgttcgta
+    97321 gccattgata aaatagctaa tcatccacgt aaataaagat ggttataaaa agtcacgcta
+    97381 gtttttattg tgaggataaa catatactat tgcactttta catttattcc aaggaaagga
+    97441 tctacttttg ggtaaccaga aattgaaatt aaatattagg tattaggtat taaactaatc
+    97501 atctgtctat ttcaggaaat atcgactatc tgatatccgt taccgataaa aattagtcaa
+    97561 aaaatgaaaa tgatcaaaat atttttcaag ctgttttaat ttacattttt aaaatcttac
+    97621 agacaagata tccgttcctc aatttttccg ctctctaatg caaattaccc acttgcatct
+    97681 tcggatgcga aatccgtcca tcattcgaat ggcgctgcat ttggttacaa cttgagtcaa
+    97741 tttcctgctg cgacttgaaa tgcacaacta ataaccagaa tgggccggga atttcgccta
+    97801 gcaaggcaaa cattataaat gcacccataa atgcgtcata aaccaggcgg caacacggaa
+    97861 tcaaggattg cggggtggcg ggatttcaag acgttgatgg cagcgtcgca aaatatgggc
+    97921 aaaggctgca agcggcaaca acataataaa ttaattatat gcaacatcag caacagtgcc
+    97981 tccccccacc atcccctttt taaccgtact cacttgagtg atgtgtggca taaagtcgtg
+    98041 agcgtgagcg tctattagca gtcaacgttg acgacgtaca acagaaccgg caacgatccg
+    98101 acccgcccct acagtcccac cgcccccttt tctgcttctt ctccttcgtg acaaggcgca
+    98161 cacgagcgcg aacaaacaag gggcgtggca cgggggcaag ggcagggcag tgcagtgcag
+    98221 atagtttagc atgtgtggca tctcgggaag gcgagccaag tcaagacaag tcaagacagg
+    98281 agtagagtcg agttcagttg agttgcgttg ggttgggttg gattttgcgg gacttggggc
+    98341 aagctaaggg gtaaaggaga tagggaacaa cagagccagt caaacgggca atccatccat
+    98401 cagccgctgg cagacagcaa ttgcaagctg caagttgtgc tggctatgtg gcacgtttga
+    98461 tgttgctgct ttaaccacac cgagccagtt attactgctg caaaactgct ccaatttgct
+    98521 gggaatttac aaagcgaaag ggttttgaat tagtccttga aattatgtag aaattgataa
+    98581 atgctgcatc ttatataagg cgtgaaaggc agttaaattc gtgtttttct tactattaat
+    98641 tgcttttcaa ttgtttattt atttgcatta atttggtgta gtaaaatata ataatagatt
+    98701 tatggtatta ggaattcatt aatttccaat ttttaaagta aatatcacat tttaatgcca
+    98761 tgtttttaat ttttgtaaac aacttaattt gttcaaatgc ttcatatacc tcttgcctca
+    98821 ctttgaaaga gtattcgttg ttcgtcttac gcaaagcaac gttgccttaa taattctcgc
+    98881 attgttgctg caaagacgct gccacaaatg ttgctgatgt tgctgactgg ccaacttaag
+    98941 gagacgctga ccgtagattg catcgggctt cgaggggatg ggagactgtt ctgattggaa
+    99001 aatcaagtgt gtgcatcgga tggggagaac gcttgcagcc ggggcatgtt tgcctgcttt
+    99061 taatggcccc aactgacaga gactgcagct ccgctaattg cgaatgcttg acgcatgttg
+    99121 cagccactca tgggtagaca tggacgcagg gcttgttgcg tatcgtgagg gggggtgtgg
+    99181 ggccatcggg ggcggtgtgg caggtctttt gcttagacag ctccacttcg ctgcgctcct
+    99241 cactcaattc gtctcgagtc ctcgcggccc gaagcgcagc gtgattttta attagttttc
+    99301 ttagaaatta aattgttaat ttgatcgacg cattttccgc actcgccggc tgcccctcaa
+    99361 tcttaatctc cttcgccgag ctgctaagtg gcctgttgct ttgacttcat tatggtggtt
+    99421 ggtgctggct gtttgctatt tgctgtttgc tgttcggttg tttgagctct ttgtctgctc
+    99481 attaatgcca attctggcgc gccagagttg acgcctgcca ccatatcgcc atatcgccat
+    99541 atcgcccctc ctggcccatc tgcccccctc ctttggtcac ggccatcatg caagacaagt
+    99601 tgcactcgtg actgagtgcg cgagtgtcgg ttgtttatgg gtcttacggg gaacgagcta
+    99661 ttcggaaatt gcggttgcga agggcaaacg ggggtaaagt ggtctagagt caaacatatt
+    99721 ttgcagttaa gggaatgcaa cgtaagaaat tgctcgctga catgcgactt ggaaaaagta
+    99781 atctaagaac ggattgatgg atgaatcgtt agtcagcatg aggatggtgg tactcaaatg
+    99841 gcggaggact gagttatgat acggcagttg ccatctgtat aataaggttt ttgactagaa
+    99901 gaagtaaata gtgagatatt aatatataaa gaaaatatac gaaatatatg tggtcacaat
+    99961 tatgataagg aattcattct ttgcctaatt gtattaaatt ataattaaat tatcagtcat
+   100021 ttgctccgaa aaaaattaga tcacaatttt tatagctgcg aaatgataaa aaaatgtgtt
+   100081 aaacaaacta gtgggattga aaaggcttat cctttgttat tacatgcaaa gcgcggtcaa
+   100141 agtttatacg gtgtcggaat agaccaaact agtttaccag taaccccgat tgcacacggg
+   100201 cttccacccc gaaacatctc aactgattgt tgcccacgac caaacaactt tcgcctccaa
+   100261 tcagtcagtc aatcaatcaa tcaatcagtc agcgctcaac tgacaatttg tagccccctc
+   100321 ccccctgaag cgggtgccac cacctcggcg actcagtcac gccctttacg gtggtgcaac
+   100381 gtctacgttc aggtggttgg ggttgcggtg catgtgcatg tgccactcac tggcaattaa
+   100441 ggccgaaata aaataataca ccgaaaatgc caattaacct ttgaatgcca tgcatgacca
+   100501 cgtagcccct gcactcgatg cacaggaatg ggctatgtgc tgcactgtgt gtgttcgggc
+   100561 ggggggatga ggggcgaaag ataaaataat tacccagacg cataactcga aagtaaaata
+   100621 taaatttaat ttatgcatgc acacaacgat ggacaagaaa aaggaccgac cgaaaagcac
+   100681 ccggatccgt agccagtgca atttatctca cgcggatcgc agatatacac atccatccat
+   100741 ccaaccatcc agccatcccc ccagatcccc attcggccca ccaatgcatc gcctgcattt
+   100801 cgatggcgga gccctaaata ggaaataatt tatgatgcga cattttcatc agcaattcgt
+   100861 taaaattcat gcatgcatgc atttttaagc agtcgtcgca gccccctcca aacaccactc
+   100921 actccgccac agagccaccc cttttggtgt cgtatttata aatttattaa gccctaaaag
+   100981 gaggggatca ctccttggcc ctgagttcgg cagacaaagc cgggcgacga caacacgaat
+   101041 cgcatgtaaa attcttactt taatcgatgc aaatatgcag cagcagcagc agcagcggca
+   101101 acaacaattg ctgatgacgc cgatggtggt gcatgctgca tgacaggcgg tgctgcagag
+   101161 tggtaaggtg gtgcgctgca tgtgtgtgtg tcgccgggga ggcctgcagg cctgaaggga
+   101221 aacagaaact cggccatcgc gataataaat caattttcac aataaacagt tgtgtgccct
+   101281 ggcggcagac gaaggcgacg gttgactgta cagtgggacc tagaaaggag gaattacaaa
+   101341 accataacat aagcggcttg taagcttaaa ttcgaatcct gagtcagaga aatccgttgg
+   101401 tatctaaaaa catacttata aatcctttcg cacatattct tatagagttc attacgcaat
+   101461 gataaatgat cagcattgac agtccaaaac tcatgttttt cagtaaaatg caatgaaatg
+   101521 taaaaataac ttcaaaattt ccagtagcat gaaatttaat attaataaga ttttttccta
+   101581 ttaccttaat tccactgtgc aactcggcca tatttggtgc gagcggcttg catatctcgc
+   101641 accacttcat gtcttggcag cgacataata aattaactgc actgcaatta ttttcacatt
+   101701 ttcgcaaagt gaataaaaat tataaaatta tgtggaagcg atgaaagggg cgttgagccc
+   101761 tcaagggggc aggtcgatag cgtggccgcg aaagtgagag cttatgtaag ggattagctg
+   101821 ggggccgagc atgtgactcc tgacccttgg ccttccccct ttcccagaca agcaccttga
+   101881 gtgccggttt cccccagaaa aaccagtcca gtctgcagtt aatcaaaata tcaaattaac
+   101941 tgtcgtgtct gtggcaaata aatcaaaata aaagccggtc ccaggcttcg agcccagctg
+   102001 aactgaatcg ggctaaaaaa caccgccccc ttcgagaccc ccttagttct cctgccaccc
+   102061 ggggcaacct cctcaatcaa gccaactgat gacgctcccc tatgaaaatt tcacatgcag
+   102121 agactgcctg atgagtgagc tgaatttatt ataggccatg ttgatggcga tgtacgcgat
+   102181 tttatgaatg agcagcgagg tgtgagatgc ggcagagggc aagggaccga attggcgaag
+   102241 gggcgagttt ggtgtgtcgt caacgccttt ggcggtgatt attcaatgtc aaaacttaat
+   102301 gtccccaaat tgcagttgtt gtcagcggtc agccgtgaaa ttgaatgcat gtcgcccgga
+   102361 tgcagaaagc gggttcacaa gtgcccaatg cggcagctgt tgcatagttt taggcgtgac
+   102421 acatgtcaac tcttgccacc ctcgatccct caatcttgcc attcggcgat tatcccccaa
+   102481 acgcagccgg cgattcgagc aatccaaaaa aaaaaataat aaaatcgaaa ggttggcatt
+   102541 cggtagtgaa tgctgaaata ccctcttaag tcgattattt attcttaatt taaattaaat
+   102601 tgattttttt ctttagccca tgtgtttagt ttgtttgatt gataagttgt gttaactaat
+   102661 acaattaaca aaatattttc tgcacctaaa ccgctcatta aaattcaagc taaaggtgtg
+   102721 caactcacag acagggtata aaattcccca atccatggcc agccaaactc aacccgaacg
+   102781 ccactcgagt tcaactcatt tggcagcagc tgccatgggg catgtccagt tgcaaggagg
+   102841 gattgggagt aatgatcggg aagggggcga gtccgagttg ggagaagctt gttaccataa
+   102901 aattggtgta aacgaattgc agtcatcgtc atcgtcgtcg ccgacggtct gtcggcattg
+   102961 caaaagtcca aatcgctcgt ggagcggggg gtgcgtcatt gtgaaatgtc tgtaatttgg
+   103021 acaatttgat gaatggttat ttgccacgag atggcaaagt ggtcattatt ctttatctcc
+   103081 ttcgctcgtt cctttttctc cactctgctg cattttttcg gttacatttt tttcgttcaa
+   103141 cgtttactaa ctttttcatt gcacatttgg tgcagctttt tagcattttt tggtcacctc
+   103201 tgcagcagca gaactaagtg tattgttgtt cagaggatat cattagttgg ttattaagta
+   103261 gccggacagt aatctttgtt aacgaagata tcacgttttg aagtaaccag cttggaatct
+   103321 gctatttaga ggaaagtatt tcaaaaatta atgcattagg ctattggttt ttgataagct
+   103381 atgcattttt aatgaatttt taaaacaatt ttattgataa ggaacttatt tctaaaaatg
+   103441 ggtatcttcg aaacttatta tagcaagcaa acctgacatt taaagcatac caatccaatt
+   103501 aggaattatt aataattata ttgaattata tttttaatcc aattaaaaac tttttgaaac
+   103561 tggaaattgt ttcttacagg gtatttaaag cgctaggcac ataagctcag ttgcccatgt
+   103621 tccgtgcttt gttttaacaa agccgtttca tggaggaata aatggcaatt gctttgcctg
+   103681 ctgctcgttt cataattcaa taaattcata aggcgttcac ctggctgctt ggaagcggta
+   103741 atattgtcat tagcctcttt tccccacagt taataggctt ttgggtggct atgatcgctg
+   103801 ggtggagacg gattcgtgac ctgttatcat ttggccagcc gatcgcagtc cacattttgg
+   103861 tcggttggtc gttttgtttg gcctggtctg ctgatgtgct gtgtgtgcgg tcgcggtcag
+   103921 ctgaaagcca tctgtttggc caccctcttt agccacaaac cagcaacgct cctgccacgc
+   103981 ccccaccccg agaacgccct tgggctggcc agaatgcgtt ccacatggcg gcaggcctaa
+   104041 ttacatttct cggcaaatgg caaatggaga atggcgtgat tgcacacgca aatgcgctcc
+   104101 gttgtttata tgcgtagcaa atagatttat gcgctcgtat tttacacggt taattcctta
+   104161 aggcaaagcc cccacagttg gagcggaaga agcaactgag gcacttgctg cattgcactg
+   104221 aaaagaaaat acagtataat aaaaataagt ttcttgaatt ggtattttgt ttatattgta
+   104281 ttttcttagc accgctaaaa cataagttta aataagcatg tttcggacaa ttcaaatcta
+   104341 gaaccatgtt gagctctgca ttacttcact tatgaaatgt caaattttct ttcagtgccc
+   104401 taaccccttc ccagtgtctg gacacttccc actgttcatc tgaatggata tgccatttcc
+   104461 tgcctattgc tctgggcctt ttgcacatag ggtccctctc ttttggtctg ggaagtacgc
+   104521 taagcatgcg accaacgacc cactcgtgca agtacttaaa ttaccaaatg ggttatgccc
+   104581 tcgcaaatat tccagaaatg tctaagaaaa tacaaggggc taaacggacc aacatttaag
+   104641 gcaaactgca ttggaattga gtctgcgcag gcagatatat aaaaaatact tgtgacttac
+   104701 agttttgtgg caatgataac tctagtaaat atatatgtct ttctctaaat aaacaataaa
+   104761 ataatttctt ggcacatatg aaatgttgag ttttggcggt accttaaaat tcaggcccca
+   104821 aattcaacgc ccccttgaat gtcgctggca cttggtatgc tatttaattt tccggactgt
+   104881 tcattaatca aatgttgcca tatttgcatt ggctcagctt catccatttg gcccctgccc
+   104941 cttcggatcg ctcaccaact tggtatgcga atgcccgggt tttataaatg ggatatgaaa
+   105001 tggcctaggc atctttccca gccaccacat cccgattccc ctaggtcgct taatttattt
+   105061 gtaaatatta acacataaat ttttcagaac gaacgtttcg cgatgttgtt ctggggattt
+   105121 gtgtgcagct ccggggcatt tatcaatatt ttattggccc cataagctta ttaattgtcc
+   105181 gggttacctt tttgtggggg atttaattag ccgacagtgg cattgtaatt gcattgccaa
+   105241 tgctggagca atccattcga ttcatcacca tgaaggtatt ggaccaaaaa ctccccatat
+   105301 gcaaagattc gtgtggcgcg tcacttgtgc gcatggtaac actgaccaca aacgaactgc
+   105361 aaggacactg atgacgggta atggaataaa atggaatgaa tcggatggag atgggcgaac
+   105421 ggaacgaact ggcggctgct cctgacaagt gcacacgaat gagtttcatg agttgcactc
+   105481 ctccggacgt actccgggaa tcggggacgt ggagaatact ctgacctcca ttaaaaggta
+   105541 taatacgcac aatggcatat taatatgtga atgcagaagt aatcccgaca taaggttgat
+   105601 caacttaact tttgttttat atatgaatat tatattttgt aaaaattgtt attttttatt
+   105661 cttttttatt ctttttattc ttcaagtcta ttttattgtt atagtttgtg ttatgaatga
+   105721 aataatagca aatctgccat ggaggtctac tatccccagc tatgttagtg tatccaggaa
+   105781 ttcctcttcc ggcttgactg agtgacagct gcttgccagt cggacattac gtgtggctga
+   105841 agctaaccgt actcgtactc ccagtccaga tggctcctcc gcctgcctgg acacatgaag
+   105901 atgacatgag tctgcccgcc cctggctgta aactctgatg atggcctctc ctggcaggca
+   105961 actcatgctg aaaaatcctg ttgacgagga taaaatgcaa aaacaagccg agtcgagtcg
+   106021 agtcgagtag tgtcgagtcg aggaagaggc ggagctaagt gaagtgtaga cacagtgtcg
+   106081 ggctttgcat cctgggacgg gacttggctt gggataagtt tattgttgta tgtgccacac
+   106141 gaattgtttt ctggtcaaat ctgtcactcg gcctgtccaa ctgtcacagt ctaaaacgaa
+   106201 taggaagagt aggagttggg gaaagaaaag accatgacat ggaccctagt accaggacca
+   106261 gctagactcc aatcccccat tgcaggattc agattctaga gattgatgat ggcattgcgt
+   106321 tgattgtgga aaatgtgtga taacctaatg gcaggggaaa ttgtaaataa aattttcacc
+   106381 tgatgaataa aaactgggtt tagaacatgg agaaaaaggt aaactaatct caatctcatt
+   106441 ctgaaaggat agtaagttaa gcccactcga taacgacgag gcctacctga tttgaagtct
+   106501 accgaacaga ttgcgctgag aaaataagca cccgcggctt ttgcctatgg ccgattacac
+   106561 acaaagacct tccgccgacg gctgtctgga atcgatagcc gatttacccg acttcctggg
+   106621 ggggcaccat gctgatttca atttctcgca gcgcaacatg aaacgctaca aaataataat
+   106681 gatgacgatt ggcccggtcc gggcattact ttacagtcaa gtatctcggt gccttgcgca
+   106741 tttcttgact tctcaatgat agagcccggc actcgtcggc ttcctttctt tcggcaattt
+   106801 ctttattatt gctcagtgga ttttcgttcg ttttcgaatt cgacgatcca tcgaagcgac
+   106861 cgcatgtggt cacaactccc ggacgaaaag aaattaatgg gcttgggata gaaaccgttc
+   106921 gttcgattca ttcatgaaac cctcgactaa ttcaagctga aatgtgtgaa caacaatgga
+   106981 ggaaggcaac aggcaaccag gcaaccaggc agcctcacag cctcttctgt tgattaatgt
+   107041 atgaacgcac atcggtagtc ggttcgtccc catcctctcc agaaatccag tagccacgct
+   107101 gctctgtcga tcggactgag tgtcattaag gttgactgga gcgcagctgg aaccgcctac
+   107161 ggattcgatt ccggactcct aactagaact cgggcgcgga gaaaatgtca aatcgctttt
+   107221 ggcgcaaaat gcgatttggt gcaactcaat acaggcccag tttgccggcc aagtgagttc
+   107281 atttgagcga tcatgcatca tgcatgatga tgatcgaaaa tgggaacgaa atggtacgga
+   107341 tgcgggtacg agatcgggat cggaatcggc ttcggttact ggggatcatg agcactgaat
+   107401 ctcctctgct gacggggcgt aaataaatcc aatttgttgc gcgcgtgagc gttgactgtt
+   107461 tttctcggtt atttttatta ttttcttctc cagatcggtc gttgtgcctc ttcctcaagt
+   107521 tggccatcca tctgtgaacg tataatgtct tggctttccg gttgccacaa catgtcaatc
+   107581 atgctccgtg gcatatcaaa cgcatttccc attgttcgga ttgatatttt cttgttgact
+   107641 tcgttttttt atgccataat atattgtgat cggttcgttt tgcagctttg gctgatgatt
+   107701 ttataatggg atttgtgttc atgctggccg gttagatgat caggtcgatg acgaggcacg
+   107761 gtgcggatga gttatggccg caaagtgtga cgaaactgtc ttgattggaa gtaattgtat
+   107821 attaagttta aggagaaaaa cgtatttcta aagatcatta gttaaccaat gtactctcgt
+   107881 ctttcacaca cttataaaaa gtaaacacct aatgttattg aaaataaagt gtattgtata
+   107941 tatggaaaac tattaggatt tcttgatgca gatgctgaaa ctaagttaat tgtaaataat
+   108001 acttcatttt tcgaaagaaa tacagcatgt ggaaagatat atagaaaatc ataaatcgaa
+   108061 agccccttta caagcaatct ctaatgagaa tataccaaac caagaaataa acaatagaat
+   108121 aaatacggca tttcagatca taaataaccg aacatgtaag aaatggtaga gaatgccttc
+   108181 gcataatagc aaagttttcg ttaaacgatt gcgggtatgg ggagcgaata aacgcaatgg
+   108241 gaaaaatccg tacccagccc aatcccctcg agaaccactt gttttatggc cagcaatatt
+   108301 tttgcatgct gaggcagagg aaaagggtgg gaaaacaccg ccgccgcctg agagcgacaa
+   108361 aaagacgtta atcacgttga gttatgtgac attcttatat ctctgcgcca ccaaagtgtc
+   108421 acgtacgcat ttgttccaag ccaaccgctt cggcggcgtg tgtgggtggg tggttcagtg
+   108481 gtttggtggt tcggtggttc agtgggtagc tgttgctttt tctgctcctg ctgctgctgt
+   108541 cagtttgcca ttgttatttg ctgcaatttt gcagttttct tttgcctgct cgccgtctcg
+   108601 tttcttttca gccagttcgt ttgggatttt atcgccagga tagcgaacag aaactgaact
+   108661 gaacttcact actggcagtt gcttcttctt cttcttctgc agcatatgcc acacactcca
+   108721 cagaaaaatg gggtttttat tttccgctcc ttaccctttc tttcatgtcg aaattttctg
+   108781 tgattttttc cacacactcg tgttacaaaa cacaacacga gacaaaaggg atttttctta
+   108841 tttcgcctcc gtcacgtgag tgtgtgtgtt agtttgtttg ctgagttttg ccgtatttgt
+   108901 ttgtttgtca tttcctttgt tctttccctg ctctctgcca ttgtcaacac tctctcctct
+   108961 tgtttgcggc tgttttgggt gcaaattgca cttttgttgg aacaaatgcg atggaaacaa
+   109021 tgccaccatt gagaggaaac gaaagataga cataggcaga gaggttgggg tctaaaactc
+   109081 cttttcattt cgcttaactt ttcacttttt caactcattt caagccatgg aactttaccc
+   109141 ctttacccct ttaccactca ttccacagaa tccactggtc ttgtctgatt gtctgcctgc
+   109201 cgttgacagt ttgacttatg cgacatatgc aatctggaaa ttactctttg agttggttct
+   109261 cccggtaaat gacagttttc cacagatggg ttagattcct ataggaaaca ataaattcta
+   109321 aacaataatt ctatatacta ttttgagtta tgtttactat gacgacctac ataatcataa
+   109381 gtgaaagtaa atgcattaaa ttaaagtaaa tgcattaatt tatttataag ttataactat
+   109441 tatacatatg gatattatga attaacacac gttataaatt cttccgtccc acgaattaaa
+   109501 ttccaatcat cgttaatttc cccgtttttc cgcgtcaaag cgtgccgcct atggacgctt
+   109561 ttcatatttt ggtaattttg acaaatcgtt tctcatcagc ggcagaaagc gaattgtccc
+   109621 aaatggttta gaaaaatgcc cgaggcgcat gaagtaaatt aattacgcct ttcaatcatt
+   109681 ttgaaatgta tctttttcct ggtccaaatt gggcgcatca tttgtcagaa ggcaatccgc
+   109741 ccccaccccc tcctaattcg gtatccccaa aacatttgcg tgttcatgtc agttggcccg
+   109801 gtcgaaatgc gtcgatggcg tcgaaaaaca aaatggtagg aggagtgcct tcaagggagt
+   109861 gagttaatcg aaacccatca actcgacggc ggcagaagca ataatcaaaa gctgtaatta
+   109921 tgtattattt attttttatt tattgtgcat tactcacggc gagaatcggt accgatatga
+   109981 gggaagccga tcgccagctg atttattgtt gataaattta tgaaaaaaat ttccttaccg
+   110041 acccttttct ggccagaact tgcgtccgcc agagcgtccc ggcaggctga aactcatcaa
+   110101 tatcattaat cataaaatta agtatcattt atattaattt attattgtgc cacagaaatt
+   110161 tcaacagagc attcgttgcc attctcggca ccaccgctgg ctccactgat ttatcatata
+   110221 atgctcatta tcgtcctcat atttatcatg atgatgttgt gctcattttc gtcagtctca
+   110281 cgtctcgctc tggccggcta ctctgtctct gtctcattcg ctggccatca ttaatcagct
+   110341 tccaatcccc cctctcgctc gctcgtcatt agccctctct ctcgctcctt tcgtgcccag
+   110401 ctcgaagcgg tgtaattaat tatggtcttt tgtattccgc cgacaatttt cagtgcttta
+   110461 tggctctcct ctcttctggg cttatgaaat attgtgtata aattggaagc acttaatcat
+   110521 ttcgatttac attttaatta aaatattata tatcaaaaac gggtccccga tgatcggatg
+   110581 ccggttgccg gccaccaggg ggcgcttaac ccaaaacatg tgtcgatggg taatgccgca
+   110641 aagtgtctgg cttttgccgg gattccccat tataaagtaa tttgtaagtt ttataaacag
+   110701 gcaaagagag tcaattaaat aacccaaact atgaattcaa ttcccgcaaa cacgttacat
+   110761 ccggtgagct cctgttggaa ggacatttta atctacaaca ttgggtgtca aagggcattc
+   110821 caatggaaaa taataatgga acaaaaaggg gactcaaagg caggccaaga gtcagacagg
+   110881 agtcattgcc gaacaaacaa tgaatggcat gaaaggaccg actctaagcc agaacaggag
+   110941 ccaatctaac gactagctaa aaaattaaaa cgaaatgcta tggccaagcg aacacgaatc
+   111001 gaagaggcaa aacgtctgcg actccttggc caattcacaa tttaacaaac taataaatat
+   111061 tttgctttcg ccttccggcg ctacaggctc tgagcaatgt gctcgctcca aaggacatgg
+   111121 attccagtgc ctggaaaaat ggagcaattt ctttggcatc tccaaagtgc tgggtaccca
+   111181 ggggcgggga gaaagcactt gggagaatgg taaggagggg caaagcagaa atatataaat
+   111241 caaaggttgc actaaacttt agcctgctcg atgtcgttgc caggataaca acaagaacga
+   111301 tgagtattgc tggtgaaatg catctagcga tggcaacgtg agtcgtgcag acgaagtaaa
+   111361 ataatgtgat ggccctgtta aaatataatt caaactcaaa agcatggaaa gtatcgcgca
+   111421 ctaatcaaat atatcgtgca gatcgattga gatcaagtcg gatattttaa tgaaactaca
+   111481 agtggtatta aattatttat ttatttttat taatttaaaa gaaaattcac gtaactattt
+   111541 ttaatgaaat atcaaaaaag ttgtaatatt caagatttac taagtcactt aataataaag
+   111601 tcaatttaat tttttatttt tcggtgctca aaatgtgttt cgtggcacgt tgacatcacc
+   111661 aatgggatca gcactgcgga agcctttggc tgcttgataa cttgataggt gctggctgca
+   111721 ttgttggtgc caccgttgtt gttatttctc ttgttgttgt tgtgctgctg ccaacaagga
+   111781 cacttcggtg ttggcctcaa gaaggagcaa cgcgacggag ggaggggaca ggattgcgac
+   111841 tggggctccg ggatccgcac atggcttgca actgccaggc tgtcggatat gcactgacat
+   111901 aaattgcttc aactgacaac gccagcggac aagaggagcg tgggggcgag acctgcagga
+   111961 tggatactgc cgtgcccagg accttgcccc atgattactg cctctgtctc tgtctcggtc
+   112021 tcggcgctgg caataacttt tctgtgtccg gggccccgtc tgctgctttg gggtaaaatt
+   112081 aatattaaaa tggaagcatc gccttcgccc aatgtccgac gcattgcaat gccagcaagg
+   112141 acccacgatt ggaggtacac actctaaaaa agatactttc caattttttt accaaagttt
+   112201 tcgaccatta aatcgctggt taagttttaa ccgaaaagtt gtattttaac ttgatcttta
+   112261 atttaatatt attaactgaa attattaaac aggttttcca ttcttcaaga aaaagattct
+   112321 cgaagtattt tttagaacac catatcgttt tacgactggt acatttttaa aattaatcct
+   112381 tggagtgaat acagttatcc ttattctcag tgcaccttcg aactcccctc ttcgtagctg
+   112441 atgggggcac tgccagggca aaggacattt ccaatttatt tacgctcaga ggcactttgt
+   112501 ctgctccgct cttcttggcc aaactcctct gatgctcctt gttcggactg ctctctgctc
+   112561 ctcgttcact actttctctt ggtcctttcc cgctgtatat gacagtattt gtgtagcttg
+   112621 tcagggagaa acagattggt agaaaggggg gatcagaggt cgaatctctt tgcctctttg
+   112681 ctggtctgcc ttttattttt gacagcaaaa tgttccttct ggtcgtgtta gcgtgaaaag
+   112741 tgttgaaagg tgtgaaaatg aaaatttact acttggccgt caagatttat gctgcccaat
+   112801 attttcgttt tcccttcaaa atgggggcac gacggaaata gaaactttta aaccattaga
+   112861 agcggagaaa ggatgacatc ataatttgta catcaccttt acaaaataat ctaattggaa
+   112921 tatagccttc ataaatatcg tcttctagag ggtgattaca tacatttcga tctgtgtcct
+   112981 actcgtctat gttcacttat aaccactcac agtattatga aacgcaatta gttatatttt
+   113041 tataagacac ccaaccatgt cattatgctc ttcatcgtag tatatatata tatgatttta
+   113101 tatatatttt atgatatata tgtatttttt ccaattatta attcccatgc tgcaacgcct
+   113161 ctaatatcaa tttccgcttg tactggggat ccattgggaa tgttctcccc tgattgtctt
+   113221 atcgattgcc gacaacctga tccaatactt cactcccgtc tgtcactcgc actgtcgtcg
+   113281 attggcatcc cgacttgaca tatcccacag cagcagcaac aaatcttgtg tcaagcatcc
+   113341 cgacacgcac ttgataaatt attatctgca gtccgggtga tgaaaggcaa tgaagttagt
+   113401 ggaggagaat gggcgaaaag ccaccacaga gttcgcatcg gtgccctcag tgccctgcac
+   113461 tttgcacttt ggctcaattt atatttaaaa gacagggggt atttgcagtg cgatctaaag
+   113521 gattgcaatg cagagaaaaa aagcggaagg tcggaaattg gaactgacac cagttaattc
+   113581 agccatgttt ctccatctct caataattat ctctcaaatt gaagatgacc gattagttaa
+   113641 tacttttata tctgttttta aaaatcaata agcaaaaaca caaaagaaaa aatcgatctt
+   113701 ttacttttga aatactttac attgcggttg aataatataa tatagcttcg ttttatatag
+   113761 aagacagtaa tcattatatt ttttattgca aagcagagtg atatcgcttc cacaaaggag
+   113821 tgaatctgag tgcaacattt ttcttcatga aaaacacaaa aacaatttaa aatattttca
+   113881 tattttcata gtatggtagc ttttcacgga attgtttgat gatatagttt ccctgaattc
+   113941 aggcataatt tgatacttat ttaacgcatt tgtgatactt gttttcagtg catctagaat
+   114001 ggacaatctc tggattgtcc attggatctg gatctggaca agggaaaggg aaggagcttc
+   114061 tgcaggagtg gccggaaaag tgtccaacag ttctccagag cgacgcgtaa gtggagcggt
+   114121 aaacaagtga cccgaccacg cccaccgggc acccacaagg aggccgcagg atgttggcaa
+   114181 tcgaaggcag tgaaggcata aggaataagg aagctgggga agccgagagt tgtgggcaaa
+   114241 atgttggctg cagtcgctca gcgggcgggc gccatttacc aagtgtcaaa agcaaagcta
+   114301 aagctattgt cgacatgact ttggacgaca acaacagcaa gaaatgtgct ttcccaccga
+   114361 aaaagtgaaa tgctttgttg gcaattgccg aggagtttcc cggagcaagt cggagaactt
+   114421 gcagctgtgc aggatctgcc ggctctgccg gtcttgctgg aaaggatagt ggaaaaggag
+   114481 cgatggcggc agcatgtgtc aacttctttt cagaccgaga aagggatttt cctctccggc
+   114541 gggtcaagac gaaggcgtac ccgatagaaa agtgggtgac aagtaaccgg aaaatgtatt
+   114601 ttattgtttg catccggaca tttcgttttc ccgacctctg ccccctgact gttcgggtgt
+   114661 gtttcacccc gcagagtcac caaccgacca atgacttggt cttctggcac taaatttgtc
+   114721 gagtaattga tgagattgca ctttttctcc aaactcgttc aagaaggaga gctgcagatg
+   114781 ttctgagaga gttgtgggcg ttattgtggt aatgtcctgg ttgagaatag tggcaccgta
+   114841 atggatggag atatgtactc aaggaaatgc tggtgctatt ggcataaatg gagtagtggc
+   114901 atataatcaa caaaatttgt ggcttggttc aacaaaattt gatggagccc taagcgttta
+   114961 gaatttctat tataagatta accaaaatat tttactggtg cattgtcata aacaccgatc
+   115021 ctcggtgctc aacgccatgt tctctgtctc tgtccacctc ccactgacca cccatttgaa
+   115081 ccagttcgag tcaggctaac ttgatagatt gtctaaatgt gctgcgtatg aatgaatgaa
+   115141 tgaatgaatg aatgaataag cgagtgcgag tacagcccct tctgccaagc aaacccccct
+   115201 gtccaccagt tgatggccac ctcgaatacg cccgttgggt tcgcctgatg cttctacgag
+   115261 attgatggcc aaaatggctg ttgaaacaac ggcaaagcga acaggcagca gtgactgccg
+   115321 ctcaatgaag aaaaataatc accaacatca tttattttcc gttttgcctg ccgacgtcgt
+   115381 acagcacaca tcaatctttt gcaacagcaa cagccaacta tcgaccatga ccaggcctgg
+   115441 actctggctg aaaacagggg gattgagatt ggtaaccggg aactgggaac tgggactttt
+   115501 ggccgtgatg tatacttttg tgagcctgac agcttctgac gtggatgtgg acgtggacga
+   115561 gaagccaggc ttcagcttga tgtttgatgc ttgatgctag gagcctcctg gaggttggag
+   115621 gattggggag tttctggact gacagacgac ttttgacgtt tgcctgcaat taacgatacc
+   115681 atcccactgg ctcagtgttg aaaaatgact gcagctgcag gtgaaaatgg cacgccagag
+   115741 tgttggttaa tgacacgaca acagttcgag tcagccagcg aagaaaacga cccctccccc
+   115801 gcatttaaat cattaatccc aatgacaagg accccacaca cgcagcccca tcactcagcc
+   115861 agagtgatgt caaagataga cgagcagccc gaagcagcgg agaacgtttc tagaagtgga
+   115921 agttggatgt gtcaggagca gggggagcat cctcggagat ggatctgggg tcggagtcag
+   115981 tgtctaggag tcgcgtcctc gtttgctgag actaatttgt ggctctcttc tgagacgccc
+   116041 actatgcgag ccattacact ttgagaaaat actgaaataa aatacattca gatttttcag
+   116101 atattatttt ttagttttat aataaaaata aataattttc tatttgctta aatttatgga
+   116161 gattacattt catattattt cgagctatac aagaaaagat tttccacagt gcagccacga
+   116221 ttttttgatc cttgttgagg ctcgttatgt gactggtgac ttggttttgt tgccagtcac
+   116281 ttgaaaagtc attaatcatt ttgggggatt tcgctttttg cgccatttgg ggcgtagttg
+   116341 tgcgccgagc tgtcggagtc gctggatttc gactcggagt gctgcagaaa tccatcacgc
+   116401 acccaaatgg cagccactcg catcagccac caccacccaa cacaggcaac acattcaaag
+   116461 aggcaacagc caacagctaa cagccaacat caatgtcaac gtcaggcggc ggagcaggac
+   116521 gcggatgtgt tcgcggatgg gagcgcggat gtgtgcgcgg atgtgagcgc ggacaaggcg
+   116581 acggcagcga cgtctgacag gcggaagaaa agagtcaccc atcaagtgag cttggcaaaa
+   116641 aggacgaggt ggggtagagc agtcactgaa tgaaagtgct ttgtgattag gagtcaaagc
+   116701 atatgctttt taatttgcat gtcagctcgc ggtgccagat gatgttctac acgcgcggct
+   116761 tagattagga tgcctgtcta ttctaccatg cctgtctatt ctagtcagca ggttactatc
+   116821 cgcaatttgt gttcttgcca tgtgagcttt ttgggaaaat tgtagcttaa acggattcgt
+   116881 gaaaactgga aagcaaaggg ttcaagtggg catatttgtt gaaccgacgt agatgtaact
+   116941 tgcagttgat cgtcaataga ggaaaaggga tgatcatcat caaaacataa tatatcttta
+   117001 tacctaaaac aaaaatgaac gctattccta tcaggaagta tttaggtaga gtgagtgcta
+   117061 agaagaattt tcaattgtat gcttaaactc aaccttctaa ctctcacagt tcccgagatg
+   117121 ttattggttg tcgatttgtg cattcctttc taccatgtgc aaaaaactaa caaatctttt
+   117181 ctaaaatgcg ctgctttatt accacttgat gtcaattgta ttcttttcat gatcagttaa
+   117241 cctcttcttc caatgttgta atgaacaggt ttgccgtttc gcatcagctc cgtgattgga
+   117301 tagctacctc cttgggcggc gtcggcggtg ggttcgcact gtgggacatt accaaaaact
+   117361 ggagcggcag tcatcaggac aacggccaaa acggcaaaaa gaacagcctg gtgaagatgg
+   117421 ttttaattgt ttaattaact cataatggat tgagaaatct gttaaactca ccaacaactt
+   117481 catcttgcga gatttgaatt taaagaatgt gttttctgca gagtgcaatg ctttttatac
+   117541 cccagagaag tttcttatcg cttccacagt atgatcatta cacgctaaag ccgcgataat
+   117601 tgggagctgg gttatattgt aactcggtgg aagaacataa atatttccaa agaatcgcaa
+   117661 taatcaagaa tattgttttt cctggtatta ctgcattgga gaattaccaa tattcatcat
+   117721 aggctcttac tatattgggg ctttatatgg gggttctata tatgattaat caacctcttc
+   117781 gtgcggtatt gtatttaata ggtttgccct tccggtttgc acattgtggc agattcgcga
+   117841 aaactggaac ggtagtcatc aggacaacag ccaaaacggc gaaaagaaca gcttgatgaa
+   117901 attgatttag tgttaaataa aattattata atcactcaat ctgtccaact tacgaacaac
+   117961 ttcatctgca cagtaacaca tttttattgg gagttttaac tattctagtg tctttgatat
+   118021 aactattaat ttctcaaggt catgcttttc caatatgtta cgatttcttt ttttattgaa
+   118081 attcactggt caaggcaatg agatatattt tccgtccaac aaaacgaatt gcctaccctt
+   118141 aaactgtttt cttattttaa ctaattccac caacgttata attttataaa agtttaacta
+   118201 aaaatgcctt cattcaaata tattgctcca tctagcgaca atggacagca atgagataat
+   118261 aaagggttat tttacatata ggctacaaat ggttaacgtt attacgcgtg taaatttaca
+   118321 tatagctaag tttcgaacta cgatggttgg tgtattgtct acaccaactt aacgaagtag
+   118381 cggcgacacc accaaactta tcgaagggtt gacaggactg taaaacttct gttaactaag
+   118441 gaactttgca ttctgtaaca agaaaataat ccttttgtat ttaaaataag ttgttaggaa
+   118501 ggagaatcct gcgtcgggta ggtctccttt ttaatacttt ctaacttact tagaaatcag
+   118561 ccatttaacc aaggcagaaa tgaaaaaaaa atttctaaga actttgattt tttgcagaaa
+   118621 gagtggaagg aaatgtataa aatatgtaca ttttccaaga agttcctatt ttcaaaaaag
+   118681 tgtttgggat gttggtgaaa catgatttaa tggcaatata gatattaaaa gtgattttag
+   118741 taatgcttta aaatgacaaa ttatggcttg tgacttgtgc caaagaaagt caccagaaac
+   118801 gtaaaggtac taaaagttcc tgctgcctgt cacctgcctg ccactcggcc atcgagatcg
+   118861 tctgctaaaa gccgggcaat taataacgcg cacacccctc gcgaaaccca aagctagtca
+   118921 cttaacgcta atactctaga ggcgcggtga ctaattaaaa cagcagttaa attagcaaag
+   118981 taaagctggg cgagtagttg acaaattgtc agccagatct ccacacaaac caaaccgatc
+   119041 ctattcgaat tgaaacccgc cgcatctatc agcggcgaca gaaacacttt gcctgtcgtc
+   119101 tgccggcttg ttggtttctt ggtttgtttg tttggcttta gccatctggc ggacactatg
+   119161 agttggccaa ttctgcgcca taagatgccg ttaaatggac acgcagcgcg agccacatct
+   119221 ggagaacgag ctaatgccaa atccagagtc agagctagag cctcggactg gcggcgagtt
+   119281 ctcgctctgc gtagccagag atgcttcctg tttgataaat ttgaaaatca attgaaaatg
+   119341 cctgaaaggc cgacgcatct tggtgggaga cagacggaca cacacacaag ccgagagaga
+   119401 ttcgcggaaa agcagctgga agctgttccg actcgaatcg gagatgctgc cccctgggaa
+   119461 ctggaagtca actgcttatg taggacagga attaaatcag ccgaatcatt ttaaagtatt
+   119521 taggattaat actgagtttt ggatgttcta attatttcgg ctgggctttg acaagttttt
+   119581 atttcatagc aatggacatt aggaattttc taggaacaat tgatattcct gaggacagta
+   119641 atctcacatt tagtagcact caatatacct gtcactaact gaagtcactc cataagcacg
+   119701 cactggcatt cgcatctctg gcactggcca taacctagaa tattttagtt ttttattgat
+   119761 attttctagc catctattta ccgcggctgg atcatctgcc cgatgctcgg atccgccagc
+   119821 cccgcgccaa tcgatgcaaa taaatcacta tggaaaatgc agtcgatgaa aaaaagcgac
+   119881 gcgtaagtgc cgccaaataa atttcgcaat ttatttattg cagagccgcg cgggctgccg
+   119941 ttcaatttgg atggcgatgt tgggatggaa gtgtggggat ttccccctgc tgcactccaa
+   120001 ctactcggag tggagcgtcc atggtcaggt ttttggctat cggcaacaaa caggcgcgtc
+   120061 gcggaagaaa agtattttat ttcattgcca gcaaattggc aataagtgaa atttgttggc
+   120121 cccgctcggc aggtgtgcgt gtgtttggcc gagctggcca atggcgattg aaggaaatgg
+   120181 aaatttgaat cgcacgcaga ttcccttacc ggggaaaatt gggttagcaa tgccatcttt
+   120241 tcgggcgggc gtgttcgggc atttttaaca ggtcctaaag agccacttaa tatatcatcg
+   120301 gggcggttta atttgccgat taagtcgctg cagcaggcag caggcacctg cgaaattccc
+   120361 gtgaagcgca agcgtaaccg atcgactctg acccatttcg ccattgttaa cattcacttt
+   120421 tgtggccgta atttattgtt ttgcctgcac cttcaacggc atgatttctg cataaataaa
+   120481 tcagccgaat cagtcgagct ggcggggcaa agtgcaaatg aattagcatt tgtattcacc
+   120541 cgattcccat acgcatcatt agcctaagag gcggcatcaa gtgtaattta accgttaaca
+   120601 aaagcccatt gtaggctaac aacccattgt gaagagagtg tgtgcgcgtg tgtggaaagc
+   120661 cttttacccg ccagtcggag agaaaagaat ccgcgcggac gcaaattgat tgaacgccgc
+   120721 gaaggagtcg cactgcaaga tgcactgcag tcggatcttc agctggagtt ggagtctaag
+   120781 cggtagattc tggctaatga ccgcaaatta acactggcca aggagccgca agacgagtgt
+   120841 cattaacaga agtgtttgcc aggcggtcgc tcgagtggca gccactcact ttggtcttcg
+   120901 ggtttggtgc gggcttttag catatcttct aatagatcgc caacttggca tacttaactt
+   120961 acgttcgcag ttagtgtttt ccttaactct ttctggccag ttgaccgtgc taagttgggc
+   121021 cataaatcgg cagttaactc tggaagatct gcagctaccg tagtcgctgg gggcactcat
+   121081 aatttaaaga ttcttaaagt tcggtatggg tggcgagaaa agtttaggag aaagaaaaag
+   121141 tccatctctg ttctgaatgc ggtaattggt gctgatatat tttactgtca gaaaatgact
+   121201 taccaataaa cgttatgttc cctatttcac tttattatct caacattctg gaaatcaact
+   121261 aaactagccc caaaatcaag ataatcctat caatatcctg actttgatgc acttaccatc
+   121321 taagccatga cagactttta tgtgggaagt ttcaccaaac taacccaaat cttgtccaat
+   121381 tatctaacgc tatgaaaagc tatccggttc accccgctct tcaatttgca attttgaagc
+   121441 gaaaacccac cgaaaatgca acatttcgtt tgcaaattgc ttgtgtgtag gttttggaca
+   121501 actttttatt tctgttagca tctttctatt cctataatta ccacagaaat cgcataacaa
+   121561 gctgctgcaa cgatgataaa aaacgaaaag ttcgcaatgg ggggagtagc cgggaaactg
+   121621 ggtcagctgt gtggcattat tggtttagtg gatcaaaccc acagcgacta cgcaaaaggc
+   121681 aatttgagtt gcagagcaaa acgacataaa acaaaaagca tggacctcca ccggccacat
+   121741 ttattagatc aatgtctcgg caaccagagc accaaccgta gtgcaatagc tacccacaag
+   121801 accaagactt ggactgctga ataaattgca gaggctgcgg cctactcgag cagcaaggaa
+   121861 aacaaaaaaa aaaaacctgg ggaaaacaca aaatcaaacg aatgactagc atgaggccaa
+   121921 tatataataa tggttaaacg cccaggcagc ggtgcactgc accgcagcaa catctaataa
+   121981 cagcagcagc agcagcaaca tctgcaacaa cggccacaag aacaagcaaa catccaaaca
+   122041 agcagcaagc tagcaactgc ccattaaacg gagctacgga gaatccacct cgattggcac
+   122101 tggcgaaaca atttaaatag aattcgccag catcagtggg aaattcgaat tgaatttgtt
+   122161 gctgatgcat gtccatgttg ctgctgttca tgttcaattt tccagcacca cccaaccgaa
+   122221 aaatcggata gtgaagatta tcattaaaat atattgctgc ctaaatatgt gagagattta
+   122281 tacactcttg aagcagcgga atataacggt ttgaagtaga taatatttag aggaaatgac
+   122341 aaggttataa agattatagc tttccaactt gttatgatct gttttaaata taattccatt
+   122401 ttttatttcc gccaatgcat ggacatagca tacctatacc ccataaatgg cttagttaat
+   122461 cttccatgaa attgattcca tattttatcg gctacaattt agtcaaatag gatagatatc
+   122521 catagaaacc cttcttcttc tagggtagcg agcagcgatt gtaggaaacg catgggcaca
+   122581 cttgccgcaa gatttatatg catgcatgat cggtattaga aaattgtctg agcaaatgtt
+   122641 tttggcaagc atgttgcaca tgttgctcat tgctgccgca ctgccacata tactcgtagt
+   122701 ttctccagca agatcaataa aacctggccg gctactggcc ttgccttggt cggttccgcc
+   122761 tattgccgtt tgttgtctgc cagcctgcat attaatttat gtttcgacat ttgccttggc
+   122821 ccggagaatt tccaaaccgc accaaaacca cacccaacca caatttgttg ttttgcctct
+   122881 gcctcctgct tttcggctgt ttggtcagtc gcctgacagg cgaagcctaa aattcaatta
+   122941 aaaatattaa ggctgaacct gggctactac tgcttattag acttttcggc cgaacatttt
+   123001 cttatggatg ttgatttggg gttcttccgc cctgcccccg cccacgggac tagagaacat
+   123061 tttcgatcac agatctctga tcgacccctt aagccccttt aaccgatagt ttgctcgatt
+   123121 gctttggcta ccgatcaacg accgaaagca atccaaaagt gtgtttatct aagctgaact
+   123181 gggaattatc actgctcgta aagaaaaaca gcaaagtttt atgacttaat gatgaaaaat
+   123241 cacagtgtcg acgtccaaga acttttcaca ttaaaatcag aacaagcact tccttacgac
+   123301 tgctttaaaa ttggagttta attgtactga taaagattca taccataaat gataaacata
+   123361 ttaataggca tattaataag ggctttgaaa gataaaagct actagtgtta attcaatttt
+   123421 tcttttcaac attttgtctt ttgtgtcagc taaatatcta caaacaattt gagacatttt
+   123481 acttaacaca tctcttaata aggctcagta tatctgtaaa gcatagcaag atttgccctc
+   123541 ttgttttttt tttcaggccc ccaccctcct tacttctttc cgatcttgat gccttatgat
+   123601 tcgcgatgat cttgaccatg ttaattccct cgggggtgtg tgctctgtta cgttaacttt
+   123661 ggctacaagc cgaaaaaaag atacgcactg tgcgcgctcc gttaaccggc aaaaaacgag
+   123721 aaaatgtcga caaatttatg acttcacagt taaccacggt cggagatgca gatcccagaa
+   123781 gggggatctt ttgactgtgt gcccaactac cgcgcaattt acgatcggtt catttaggtg
+   123841 gaccattgtc caccgatgca ccgatgttta tgcaatgttt ttgtgatttc atatcggcca
+   123901 gctgggaatt tgtagcggca acgcctttgc catcgccatc accatcgcct ttcattgtgg
+   123961 gcgttgggat ctgggatcat ttcccttttc ccccaagagt tcgggcattt cagtgccatc
+   124021 tccgttgaat ctttctccac tttttgttgg ctgcttattc tgggagaagc gcaccctagc
+   124081 caaatatgtg cgagccaaaa acgtgaatcg aacaaattgt tactgcctta aatagtaaac
+   124141 agattttgtc gttcattcca gccacggcca aaaacgggct ctgcgtatta cggcagcgac
+   124201 aagcaagagg ctgaattcga ttcttcccag ccgattccca gagggttgag ctgggtcaat
+   124261 cctcgccaga ggtccccagg agcatttaaa aatcaatttg ggcgcattta tgtttcacac
+   124321 gatggcccag agaggaacca aaacagaaac gtcgcaatgt ctgagtcggg taggtcttga
+   124381 gtttgatgat tatctcggtt tgtgtgactt tcggtttcca agctcgtgta cgattcgtgt
+   124441 agtatatggt gtgtgcctgg aaaatggatg gaaatgtgtg tcaccgggtc tctgtgtctt
+   124501 ttcatcctcc tcctcctcct ggctggctgg caatcctcca gctttagatg caactcccat
+   124561 tcccaatcca ctggcacagc ctctgccatt tccatttcca ttcccaattc gaaacaatta
+   124621 ccaattgttt atttgttctt acaacttgtt gtcgacttcc ccagtggcct gatggtgctg
+   124681 gtggccccac cgatctatcg ttttcctctt cgcctccacg aattcatcaa ttgccacaca
+   124741 atcttgatct cgagtcggca cacagaagtt tgctctgtcg aaacggtccc aagcctaaac
+   124801 tttccttggc cacttcgaca gtcggcactt tatcttcgcc ggcttttaat ttcactaatg
+   124861 acaattgatg atcgtaaacg tttatgaaaa tggaaaacct aatgaaaccg aatttcccca
+   124921 tgtgagttaa gccaacttaa ataagcctcg cgcagtccga aataaatgaa ataaatcagt
+   124981 ggctttaaac cacgtaaaat tactgtttat tggcttggag gaagttttga tatttatatt
+   125041 ctttaggtgc aatagatact atgagatttc attgataaat atattctttt tcactttcaa
+   125101 tatgaaaaat tgatcagtcg atttaattct ttttataata atgttatcaa aaagaaacaa
+   125161 tgaaatttta aaaaatttat ttttcgcagg gtatataaat taaatataaa taaagtttca
+   125221 aaattttgca ctggaattct cttaagtgtt cggttggcat ccaattagct cagttattga
+   125281 ataaaatgtg gcactgccac acacatttgg tgaatgctcg cggaggcatt tcaagatcgc
+   125341 ggcactcgta aatcgccgac cagcccacaa tgcgatccca aagacaatcc gcaacaggga
+   125401 tgactagaaa agtatgccag cagccttttg gactcggatt tttgctgttc tgctgctgct
+   125461 gctgctgctg ccacttcgac ggcacagaag ttgtcatcgt caattagttt aaaggcagta
+   125521 taaatttatg ttaacagcaa ccaaacataa tatcccccga gagtccaagc agcaagcaga
+   125581 gcgaaaacga aacgaaacaa ttttcccttt tcaccaacat caacgccagc taccgtttct
+   125641 gccactgttg caagtcgctg atattgctgt tgcaagttgc agatgttact gcggctgctg
+   125701 ctgctgattt tgttgttgcg gcattgtttt ggggcgcgcg atttcgtggc cgttataaca
+   125761 ggcccaaaag aatccgaggc agacgttatg cctcaatgaa gtcagagttg gagttggagc
+   125821 tggagctgaa gatatctggg acggggaagc aatccccctg ccctgtgaga agaaggccaa
+   125881 ttgggtcgtg gctgggcctt tgatgttgtc gcagtttgtt agaatctgtt gcggcacaat
+   125941 gagccacaat aagaaacaaa aagttggcac atgggaccag gcgtagcatt atttaatgta
+   126001 atcgatcgtg atgggtttgt tctaatttgc ggtgattttg tttttggcgc ttgttaagaa
+   126061 cgtccgaagt gaaagagcca agcgaaatcc tttgaagttg gccaacgatt tagtgtcgta
+   126121 tcttttatgg tgtatcataa aagtttactg cccgaatgta attagtcgag gggcataaag
+   126181 aagtcatttt tctccgtcta aaattgtggt ttattaactg gattaatgaa aatgataacc
+   126241 aaccgagaaa ggtagttgct tcatttccat ttataattat tattacacaa caagattagt
+   126301 acttgttcgt acataaattt ttattttgag aacgaatata cgaatttgag tttaatacgt
+   126361 aaattcagga ttcttgctac atacattact ggtttcatgg ccatgttctt gtttccagtt
+   126421 gcgtatcaca ggctaatcag agtttcgtac ctgcattata tacatatgcg aagacttttc
+   126481 tcaatctcaa gctggaaaat tgcatggaca actagatggt aattgcaggt gtgcaacatg
+   126541 gtaatgacga ctttggccag cggcctggta ttgggtggaa atggaggccc caattggggg
+   126601 atgattggat gggatgggtc ggggccgggg cgcgatcggg aattgcgttg ggaagaggcg
+   126661 gacgacatcc acccaatcat ggtcattacc tgggacttgt tatggttgcc aatgatcaga
+   126721 tcactgctca taggtgaccc aaattctgtg cctgctactt atacatattc atagccaaaa
+   126781 cagttcattt ttgtttaaat atgtgacagt gaatacccta aaaaatggca aatctatctg
+   126841 aaacgggaca aaacagggtt caatgaccag cgatggaatc acctgcctgc gcttaacccc
+   126901 gcttgattcg atgccactga agtcggcctg ccaatcgctt gaattaagta atttcgattc
+   126961 cgtcacgtca gacggcaatc cacgcgacgg cagctgagga gataatcaaa taaatgcatt
+   127021 gttagccggg ttttcaagtt gaaagaaaag cgccactcgg gattatttgc tttcccgagt
+   127081 gtccccacaa gggtcagggc cagactcaat gaaaagccgg tcagagcgag gggcaaccca
+   127141 aacgaattgt atcgatggct tgccaatgaa gtgaaaatcg tgattccaaa ggcaaacagc
+   127201 ggcctgtaat tttcactaat ttctacacgt ttttttttcc tttggcttca attggcttgc
+   127261 ttttctagtt ttttcgtcgt caacattttg tcgatttgct tcccaagtag ctaaaacaat
+   127321 tatgcgaatt aatggcgaac gggggaatga atcgcgcaat ttcaatagtc attccaattg
+   127381 ccaaaagttc aaatcaattg caactcattt aaagtgacgt cacaatcgct ctgctgccaa
+   127441 aaaaggcaat cgatccacaa ggaaaacaca acccaagtgt ccatagaaga ttttccagag
+   127501 ttctcgcgaa aaactccata acagcaaatg cattctgcat tttaaactga ccaaagggtt
+   127561 ttttttagtc catctcaagt cgcacgtccc aagggaacgt tatccgacgt ttatggaatg
+   127621 agacttcagt cttccatcga ttgaccatgg gcgtagtcag tttgtaaagg tccaccggaa
+   127681 gtactatatt ccaagcggct gttatgaaaa aagtattgtt tgtgtggtgt gctctttgta
+   127741 aacaacaaag atagttagtt tttaattcct tctttcccct ccggatgaaa atcttgacta
+   127801 cgctcttggc cagttgattt tctttctgag ttttctgaaa tccatttgga tgcggcagac
+   127861 tgcttggaac catcgacgtc tgaacacgct tatttattca gtagttactg tttttgctgg
+   127921 ctgctgcagg aattactcga aatggcatgt aatgagttga gttctcgttg ccctttcgtt
+   127981 ttttgccggt gtcttcagtt ttcctggaaa gcttatgcca gatttagaca gcgccaaagc
+   128041 tctcttcgca cttgcgtatg aagtgcccgc catagcaaag cggattttta gcgcttgttt
+   128101 accaactggc tcgttggtct aggggtatga tttccgctta gggtgcggga ggtcccgggt
+   128161 tcaaatcccg gacgagccca agatggagta gtttttttgt gtttattttt tatttccttt
+   128221 aacttcccag aaattgagac ataacaattg ctagtacaat tatagaggat gtaagaaaaa
+   128281 cagcaagact aaaattcatc ttcggcaatc tcaacccctt tatttaatta aatcaacatc
+   128341 attttgcata tataaaaagc taagccaact tgaaaagaaa gactcaacgt ccaacgggtt
+   128401 catatacaca caaataaaaa aaaaacaatg tttgaagttt gattacgaaa tatttattat
+   128461 cggagcactt aagcgaacaa ttctaattac atttgtatat atatcatatt atctgcaagt
+   128521 gcacaaagtt aagcttttag aattaaagac taacgaaggg ctttaaatgt tattgctcta
+   128581 cccatccttt ttgtcattta ctatctttta agaccaccat tggaaaaaaa tagataatca
+   128641 tcttgggctc gtccgggatt tgaacccggg acctcccgca ccctaagcgg aaatcatacc
+   128701 cctagaccaa cgagccaagt gatcgtcgcg actcacataa tcagaccaat tttgcgcagt
+   128761 aatagttggt cttgctcaca atttccatca aaatattttt aaaatgtata agaactggtt
+   128821 tcatagatac tttcaaggct ggagctttct gttgaccatg aaagaaacct gctcttttcg
+   128881 tgctgtgata tgagtaagct ttttgaccaa cgtaagacaa ctggctcgtt ggtctagggg
+   128941 tatgatttcc gcttagggtg cgggaggtcc cgggttcaaa tcccggacga gcccaagaga
+   129001 aaagaacttt ttgtttcaaa ttataaactt tgcttatact ttttattttt gttgcacaaa
+   129061 taacgactta tgggtaaata atattaacat ttgttgtttt gccccgctta tttaataaga
+   129121 ttaaaattgc tttaattaag gtagaatata ttgattaata ccaatggtta acaaaacaaa
+   129181 aataaaatta gacgatatat tgtttgtatt ttctttatgt ataatgcatt tacatgtatg
+   129241 taagctgaaa acaatttcga tttatttaat acaaacgaaa aaagaagcaa ctaagcttga
+   129301 atacttgttg ctcgcttgaa agttgaccta tgaactacat aaacataaat aaatgacaca
+   129361 ttttgtggac atagagaatt ggataaacgt acagtagaat aacatacaga tacaaagata
+   129421 gaatagatga taaatacata aatttggccc aaaattgttg tggtttttct tgctgttgat
+   129481 gacgcagtgg atgctgctgt tgttacattt gttggggtta gttttccatt gctcgacgtc
+   129541 ccttccactt tttgcattgg aagggggtga acagtcgttg ttagctaaac tccattggaa
+   129601 atctgaagct cctcatatat gtatgtatat atttcatttg actgctacat caccaacatc
+   129661 atcatcatca tcttagttat gatcttcaac cagttttgcg ctctgttaga gtaatgttat
+   129721 tcgttttatg tacatacatc atggatgctg tgccgttcga ttttgttttg catttggttg
+   129781 cttggtttaa ttaatatatt ttaaataatt ataattattt agttattatt agcactaatt
+   129841 attctttggt tgttccactg gttctgtttg ttgttttctt gctcttggtt attgcatttc
+   129901 attttcgtta ttatttgttc gatttgttca ttgtgctttt tcttcctggg ctacctgttg
+   129961 ctgaatgtta atgacggcag ccgttttaaa catttgcttg tgactgggct tttttggatc
+   130021 ggttgttggt gctgttgttg ttgttgttat ttctgatgat gctggtgatg tctgtgcagc
+   130081 ggatttcctt aaactcttgc caaacgtgga actgcccttc agacctgcaa agagtcccaa
+   130141 aaaagaagga aatgtatcta aaattagtta tattttaatt tttacagaaa gacaaatata
+   130201 caaatgtgtg attataacga caggagcact gggaaaggat ctggagagat tgacggaaag
+   130261 actgaagaca aaagcagaga cagccgtgat gataacgatt acgtgtatag agtgtagaga
+   130321 gaagagtacg ctgccccagg agggaataga gtggatcgag aatggcgttt ttgcgtttac
+   130381 cttggtagct caaacgattc tgatagatgg caaatatatc gaagggcctg tagcatggct
+   130441 cgtaaccatc ctcgggccca tcgagatctc gatagagttg cgatctctga ctctgatcag
+   130501 aagcgtttgc ggctggcacg agggaactgt ttagcatatg ggagggtatc acgaagagac
+   130561 tttgctccat tttctcgggt ggcgcccgcg atgaatcggc ggtttctcta tcggaatcgg
+   130621 gattggactc agtttcgctc ggggccgctg atgtggttgt tgctgtggct gtagttgtgg
+   130681 ttgtggcagt ggttcttgtt gctgtggcac ccaatatttg tggtgcaggc ttgggaagtt
+   130741 ggttgtagtt gtgttgtgtt gtttgtgttg tgtttttata ttttttgtat tttttttttt
+   130801 gggatagtgg aagatggtaa caaagtggat tagttgttgt ctgcagttgt agtgattagt
+   130861 gttagtcaca atttatacta attgatctag agttaattaa ggggttctaa gtgatataat
+   130921 actgcagttt cttacctcgc tatcttcaca atcagcatct tctcgagcgt cggaggtcag
+   130981 gccctgcacg aagtgaacat catttgatgg atcatcggag gccaagtgag cacgaaattg
+   131041 tttgtttctg caagatgtca ttaaagtaag ggtgtccaat tgaatataaa ttaccgtacc
+   131101 tcagctccag ttgctgcatc tgctgaaaat gtcttagctt gctattggcc atatgcagtt
+   131161 tttcctccaa ggccgcgttc tccacacact tgatggaata cttttcggaa aatgcgagaa
+   131221 tttccattcg cagctctccg agatcctccc taaatagaag ataaggttcg ttttagtttt
+   131281 aatcgaattg tgctcaagga tttgatattt gtatgcatgt gttttaggcc atcttccaca
+   131341 aaatattcat tctatactta ctctttcagt ttggcgccat cccctcgcat ttgttcgcct
+   131401 ctttgaacct gtcgcagaaa ctcctgcttg aatcgagcca cctcccgctg aacttcactc
+   131461 tggtgtgctt ttctcatcgc atccagagca gccaaagtgg cctgtaaatg tagtaaataa
+   131521 atccaaagaa gttaagaatc caaacaaagg tggattggcg ggctatgagc tgtacctgag
+   131581 tttcctcggc cagggcagtt tccttctcgg ccagcaggcg ctctatctcc atcttgtgtt
+   131641 tctgctcaag atccatagta agccgcttat gcgaagtttc catggcactc aatcccttct
+   131701 cgcagagtgt ctgaaatgta cagtatgaaa gttcgttatt aaaacggatt taacttttat
+   131761 ttatttattt atatactata atatatgcca tataaatgat tttttttagc gtacagtggg
+   131821 ttgcagtgtt taaactcacc ctcagctgtt cgatctcagt ttggtaccgc tttcggaaac
+   131881 tctgctcatc ggctgcgttg gcatgttcgg tgtggtagac ctccctcagg cactccagtt
+   131941 gcttgccatg ttcccgctcg aggagctcca gtcgcttttc cagtcgccga caacgctcct
+   132001 gctgccttcc taattcctcg ctcagctcct gacgctgctg caggagcagt tcctgctcct
+   132061 gctcctggct ggcgcacttt tccagtgctt gggccaggtc agtctcccgc tgctccagca
+   132121 gctgctcact ccgttgcaaa tccgtttcgg tttgctgctg caaggcggat agcttttcta
+   132181 cacgctgacg cagatcgcaa cacttgtgac acgaatcctg caagcaagtc agctccatct
+   132241 gctcactgac catcacacat gtttcggcca gagctcgttc cagctggcgt aggccatctt
+   132301 tgcgatcgtt catgggcgaa acggagcgac gagtcctttg gccggcgcga atttgctgca
+   132361 attgggtgag actctgctcc agacgctcct tactgccgag gagcacgtgc tccaggctgt
+   132421 gcaatcgctg caacaggtgg gcatttagtg gtgactggct actggattcg ctgctggaag
+   132481 actcgggtgg cgtaccctcc tttctctcca ccgtttccct ttgcaactcg cttagtttca
+   132541 gttccagttg ctctttaccc ttcaaggaca acgaacgtct cctctcggat ttggccaatt
+   132601 tctgcttaag agcccgcacc tgacgctccg aactgctgta cttcagggtc agatcctgaa
+   132661 tacgggctgc catcagctgc agttgggctc catcctcgtg ctgccgcctc tccagcttct
+   132721 gctgcagcgc agcttgtgcc agctccaagg catcgtattg attttgcgac tgcttcagtt
+   132781 cccgcttcat ggctcgaatt tcgttaacgg ccttctcgaa gcgggatctc agctcattgt
+   132841 actcctcggc cagcgaatcg cgttgcaggc agtagggatc aatgtcgctg atggtggtta
+   132901 gatcgctcag tgagtccatg cgtttaattt tgtgacgctg caggttgctg agagcggcac
+   132961 caccactgtg atcctgagcc tgtaactctc tcaaacgctt gtagagtcct tcgttctcct
+   133021 cgattccctg cgccagacgg tcctccaact gcttgatttg gcactgactc tcggtgagct
+   133081 gcattcccaa attgtgcatc agatcggagg cactctgttt ggcctgggta agttcgctat
+   133141 tcaactgctg ttgttgctcc tgcagccggg cagtcaactg ttgttccgtt ctttggaggg
+   133201 tggccagtag ctccgacatg cgggcctccc gcagtgaagc ctctgcccgc aaggtctcgc
+   133261 attgttgctt gagttccctg ctctcggttg gctgcatttc ctcgggaact actaagagag
+   133321 gcaattcatc actggccaca ctgtctacag tactgcgccg tgaacttggt ggcgaactgc
+   133381 gactcctttt gtgcaagctg gccaagcttc gattgcgcag acaacttctc aaaacaggtg
+   133441 gtgatggtga caacggtgat ccccagtcca gactatcccg cctgccgccc tccgatgccg
+   133501 tcctgtactc ctcatccgag gagaaatgtg gacctgctgg agtgcctgga gtgacgggac
+   133561 tcgacgatgg ctgttgcagt tgagccttga taaagtcctg ctctatatct gtctgtttgg
+   133621 gtggtgtgtc tagttggggc aatccggcag cacttctcag gaccgccagc cagctatttc
+   133681 gcgaactggg cgacaagctg gccaaaacca agcgctgttt gtcccaagtg gtcagctgaa
+   133741 aagcgtgctg tttgcttgcc gcaggttcgg gtattacgct ggttagacta ttaacatcca
+   133801 ggacgccgtc gagaacacct cgttcctcgc acaggggatc tctatagtag aaaagcgctg
+   133861 caccgctcaa ggtgaaccag tgcttggacc actcacaagt ccgattgtcc tgcttcatga
+   133921 gccaaccctt cttggcattc agcgattcct ccttaaccgg aggcttacct tgaccagagg
+   133981 atgacttact gttggccttg cccaccgatc gcagctcatc cgttttggcc atgtagttaa
+   134041 cacacagctc gtccagattg catccgccat cgggatcgcc cctctcgtgt tggtaatgtt
+   134101 tcggtgactt taggtgtaat tgcagcctgg cagtgggttt tgtcttgtca ccctgctcca
+   134161 taaccgtggg tatctttttg actatggccg aaacaatcgc aggcgtggag ttagatgcca
+   134221 ggggcaggga tttaggtcgc tcgctggctg ggttttggga tttggtgctg ctgctgttgg
+   134281 aggacatctg gaagtccgtc tcatccctgg atgtcacttg gtactggctg tgatggccga
+   134341 aatgatgctg gttgttcagg gcattgttga ccgccgtgga ccagcgtttg ttctgctgtt
+   134401 gactcagatt cgttatggtg ttggcaatgt ccttgatcgt cttttcatca cgtcgaatat
+   134461 cctcgattag caagcatgtc gtgggctggg aaactgaaga aatgaaaaaa ttaaattaag
+   134521 gaacaagtca aattgtattt agcttagctt ttttttacca aagatcaaag caaagactat
+   134581 ttagtataat taattcaatg aacccaaatc accattaacc accatcaaag aataaagaaa
+   134641 agatcgttaa aacagataac caggcaatta taagtataca agcttgtcgg tgatcaagga
+   134701 tcataaattg tccatcggcc ttagccaaaa acaaagctca accagcaatc tacgcattct
+   134761 tttctcataa ttggctcagc atcttcatta aatactttga aatatgagca tgttggtaat
+   134821 ttcatttggc tggctggctg gccggtatct ctcttggcta agcaaaccat atcaatgcca
+   134881 ttgactgaaa ttaacttaga cacaggatcg tgtggacggt ggattgagga ctgaggattg
+   134941 aggactgagg attgaggact gaggattgag gaccgagaat tcggagccac actccacccc
+   135001 taacaatatt tatgtccagc atagggccgt aaaacgcgca gagagagcca agttatgtgc
+   135061 gggcaaatat aataaaatta ttaaaacggg cacacggtgt attgtaaggg gatccgaggg
+   135121 atcgcgataa gccaggccag aaagccctga aagagatccc gagagcgctg cagagtgcca
+   135181 taattttatt aacacccaaa caagtcgagg aggacaagcc atcaagtacg aaccagacga
+   135241 acgagaggca agcaccgagt tgtcccataa aatatgtgat tacaaaatat atatttagac
+   135301 ctgttccagc cagccgccac atttaggttt attatgatta tgatacaaat ttccgactgg
+   135361 ccataaagtc gagcgtctga gatgactgga ctgaccgaat ccgaacctcg aacacacagc
+   135421 gctctggatg cgcatcaatt cggtcagtcg aacaggttgg tcgcttttcg gctgctccag
+   135481 cggcgtttat cttgattatt gagctgttgc gaaaagcgaa aggaggtgga ccagcagcct
+   135541 ctgaactcca gcttaagata cggatgccaa agccaaggct ccagcaccga gttcgggatg
+   135601 ggtatggtta cgggggatcc agctcaaagc tctgacagaa acagagctgg ccactgactg
+   135661 actgactcac cgagcaggtt caaggttact gtgctaacag cgccaagggg aggttacggg
+   135721 ttacggtgca atggcgatgg tgatgacgat gggcatgggg tatgccttta attggtcgac
+   135781 ttgtcggccc atcgcgttca agcaaaagcc tgaccaaagt ggccctggcg actctacgtc
+   135841 gattagggtt tggggccggg actatgtcag caatggaatt tataatgctt atgatattac
+   135901 acatatccga agggaaggac aatagtactt taccctttaa aatgcggtta tgcttactac
+   135961 tctgctatca gggattctgg ggcatataag aacgaattta cgtatatata ataatataat
+   136021 aatacgtata tttacgtctg ataataagat gactcacgtt tcaagcattc gatgctgttc
+   136081 tattaaatct aaagccatat taaagaacag atttgaacaa ttgaacaatt agtttgcaat
+   136141 aaatcaaatt tacaatctca agaacgcact cgaaactata aagtttattg agggtcccct
+   136201 ggggagtaac aaaagctaaa atatttttct tagccttaag aagtaacaat gaggacctct
+   136261 ctgctggctt acaggtcatc cttcattcgt ttcttgattc cccgccgaac tggccataaa
+   136321 accatcccca cttgctgctt tattatctgt ggctctttgt ttcgctttcg tggcctgccc
+   136381 ttgatgtgtt ttgcaccatt taatatttat tgtggctttt actacgcgct ttcaagttca
+   136441 agatcaaatc agagaacaca gacttcagtg gggcgaacga aatgaaggcg cgtaaggtgc
+   136501 taagtgtaat taagttgata aaaacagtgt tcgcagatac atgggctggc tagtggcttg
+   136561 atgccgctgt tgatgcacaa agccctataa ttatggttcc tggcagttga ggaactcggg
+   136621 cgatgcgacc atataaacgg ggtggatcgg gtgaggattt tcggaattgt ctctggtggc
+   136681 gactctttat tcatttcttt aattaacagc atttgattaa ttatggccac aattaagcgc
+   136741 actcagaaca acccactcaa tcatggacgg gcttcctgca aaaaaactcc agaactcccc
+   136801 gatttccgag attcccgaca ttgccctcac tttgccaacg aagctcccgc ctcagacgta
+   136861 ataaataatc aaattgcaag ttgatttacc tgcccggctc tgccttttgg gcgaacgaaa
+   136921 gcgaaaattg cctttggatt ggaatcgctc ccgcccgtcg gaacccagcc cttccgcccg
+   136981 agggacccct cacacgatga ctttgaaaaa tttctaactg accgtacagc ccgctgctcc
+   137041 ctgtggaaaa aaaaaaacaa caaaaaacca tataccatat acaaaatcct gccagaccgg
+   137101 accccagctt gttgcataaa ttttgctttc gagtcgttac acttatgaat attgattggt
+   137161 tataggtgat ggtggcttct actgccggac tgccggtggt tctcctttcg agtttcaaag
+   137221 tctcccagtt ctggtgctca gcagtctcgt aaatcgcaca acctgcatcg ttagctgccg
+   137281 agacgcctcc tggcccatcc agtgctgtca ttttgcattt gcacagaagt tgtccggcct
+   137341 caatggagct gggtctgtcc cgcctaacca cgtttgttaa catgcttttt ctgtgtggct
+   137401 cataatgggg ctgctcaata gaaagataat gaattcgcgg tagaactaat ggaactcttt
+   137461 ttctctctct cccggcatcc acgaatcttt ttacaaagtt gcaaagggct gtaattagcg
+   137521 attgatatag tatgaaggtt agccaccgat aacatagctc ggcagatagt tcaaatgcaa
+   137581 gttctatgtc agaggcacac tgggaaaatc ggagagaaaa gtttcagaaa caaactttaa
+   137641 agtaatgtaa agaccaaatt atacaagaca taaatctttg tttacattaa taagcattct
+   137701 ttcagtgcat cagattaatg gaatgcgcag ctactttgtt gcaatcataa gtagttactt
+   137761 ctgtacatcc tccggtctct tacaatttgt cattgtaata caatcttgca cttaattctt
+   137821 gtgaaatgca aacttcttcg gaaactcgtt tgattgtaat gggccctagg ttgatgattt
+   137881 gattgctggc ctgtgttttg tattttttgg ggcgtttaat cgccggttag ctgtgtagct
+   137941 gcatttcgtc acctctaccg agagtctgct gaaatttcta aactaatcca cagagcgagg
+   138001 cttttccaca ccagagcccg agatttattt gcactgtaat tattcacttt gccgtgaaac
+   138061 tttgcccatt acaagtccct acagagagac atacatacgt acatagctag ggacatggct
+   138121 agagaagcca catggtggtg cactcatgtc cagaggggct atcatgtagc gctgacttgg
+   138181 aaaatatttc aactggcccc gtcctgacat tggcctgccg acggctatta ggtgtagaca
+   138241 gtttataatc tttgtcgttg tgcattttaa gagctttttt cttcctccat cctgcatcct
+   138301 aacacccatt cagaaagtgc tcgcccagcc aaccgtccaa tgaattgcct catttgccgc
+   138361 acaattttgc ttattgatgg aatgtaatta gtctatgtca gcacccaagc tccacgaagc
+   138421 gcgagtgagc gaaatgccct agagtcctac ctggtaactg atctcggcct cgccaaatct
+   138481 ccagacccat tactagccaa cggtgcatcg ctaatcgaaa aaggtggcaa atgtgagtgt
+   138541 gaaaactgac cattagaatg gccccagaag cggtactttc cgcaggtaag tgcaataatg
+   138601 aaatgcattc tttagatcta ttattattac ggattgtgtg gataaatgaa atgaatagtc
+   138661 ggaagcctag agaattgttc agctgcaatc gtttcatatt cctaagcttc gacaatattt
+   138721 cgtttcttcg atattttgtt taaattacaa aaagttataa agcaatgaac acttgaatca
+   138781 gtcatgatgc tgggtaaatt ttcgctttcc atttcgtggg ctttgcaatt cgaactggtt
+   138841 cacgtcttta actggcgatt tatcaactga ataatgccat ttgatgtgga tgctgagcca
+   138901 tgaatttgca accaaggagg ggagttggtc cagtccacaa cgaacaccac taccaccctc
+   138961 tttggattca tccgtgatgg cacaacaacg gatctgaggg cagacacact ataatttaag
+   139021 cgtgaatatc atttttccat gaacaagtat cctgcagcaa tgtatgtgag aacattcaca
+   139081 gtaactgcaa ttaccgaccc accaaccaac cgatcctcac aacggaccaa ctccactcga
+   139141 gagtctggct gggatctaaa gaaaatttat gaagcctacc attatacgat cttaagtagt
+   139201 ggcgaggatt cctctccagc cagccagccc tgcttcacga ttgtccctgt ctttgtgtgt
+   139261 gaattcaagg ttaatttcgt ggctgcggct ttaactagct gcacgcactg ccccccatcg
+   139321 ccatcttctt cattcgcttc cttgccccga atatagaata ttcctgcaat ccctcagtgg
+   139381 agtctctcgg actcgctgtc cgtctgttcc caagttgata attttgattg ctgcaagcca
+   139441 taaatgatca taaaaattat agcagccgat acgagctcaa ccgaacccag tgcttccaca
+   139501 tccacgaaaa gtcaagcgga acaaaaatcg cgaaaagggt tcaaaataca gagcaaaaga
+   139561 taactaagct tgccagccat tttcattaaa aagtaaaagt cagattcatc aactgtattg
+   139621 cgtaaagata aatgccattt tggccctaca accacttatt attcagttga ctgttaaatt
+   139681 gcttgagtta tttccacaac ttgatgaagt ttgttattga tagagccaat tttataatac
+   139741 taatctaaca cgagattcag ttaatgtttc attaaaataa aagtttagtt agaagaatat
+   139801 acttctagat tttaaaattt atactttcta agagcttttt gcaattatat ggtttgtgtt
+   139861 tccgctaagt tataaattag ctgatcagct ttttagattt ctctaaagaa aagttagttt
+   139921 tattttactt accccgattt gtgatctcgt tgccggcatt acgattgttc tcgtcctggc
+   139981 cccccatgct gaccgtcttt gactttctgg gcaccgacgt ctcatcctcc tcgtcctcgt
+   140041 cgacatcctc gcccgtttcc acgccgtcgt cctcctcatc atcggcgaga gctgattgcg
+   140101 tggtggtcag aggcgaacca gcggtcttcg tggagctggg acccagatca aggctgctca
+   140161 acagattgcc tgccgactgg gaagacgtta gggtgtcctt gtgataggag ctgtgccgat
+   140221 tgcccagctt agtggatagt gttaggtcgc cgttgctcgt tgggcgtaga ctgcccacca
+   140281 cctgaccacc gggcagggtg gcgcttctct tgtggcgacc cttcgacttg ggaaaggcag
+   140341 ccaggatgtt gagccaccac tgggattcct ctgaactggt gcccttcaca aaggtcacgc
+   140401 gctcgggagc ggttatggcg atggagttgg ggtgacctgt cacctccacg gcgctggtaa
+   140461 cctccagcac tttggtcata tcaacgcacg cctgcggtat ggtttcgggc tatagaaaat
+   140521 atatgtaaat taaagagtaa acaagttgta ttttaagatt ttaattagga gaattaatta
+   140581 atcggtaatc aaatgaactc ggcctatcgc gtaataatat acatttttta atttaatgac
+   140641 taataaataa tataaaatct aattaatagt tcagtaagtt agtaaaagta aatcaatctg
+   140701 gtggcaattt aagaagccac tttaattctt tcacttcata aataatcggc tggttaagga
+   140761 aaggtacatt tattgttgtt tttaccgcca gcacacttat tggttctacc gataacgtcc
+   140821 agcaatgcta taataccata attagaagct cttttgggat tgtataatat tttatgagct
+   140881 ttgttatctt ataaattcag accaccccat aacagaaagt tttattatct ttttattttt
+   140941 ttgttatcac tggagaacca acgagacggt atttaaaaca gaaaaacaca attatgccta
+   141001 tggattgatt agctattaca aactcaaaaa ttcgttttaa ttttattatt agctataaaa
+   141061 atggaaatgg tttaaaatat gttcaaatga attacttaca taatcatcaa ccgagtacgt
+   141121 cagctcgcca tcatcataga gaacaaacca tcttctttgc cagcgctaca attgaaaaag
+   141181 aagacaaatt ttattaaaaa tattaaacta ttctcaagtt tctattattt gatctttact
+   141241 aagtctaagt ctgtggctat cagtcgatga gagtgatcaa ctctaaaaca aatttacatt
+   141301 gtcgcttgca atttgcaaca tgaaaggtgg gacgagaaat ggtgaggaaa gacaagatcg
+   141361 gatgtaaata atgttcaacg cccccgacag aaatcataat tcctttataa ttcgttcttt
+   141421 cataaatttt caggcgttgt cattcccatg aaaggcaacc aagccccaaa cgccttcgcc
+   141481 tttgcattgg cactgattcg ctgtggatct ggatctctat ctgtatctgc atctgtatct
+   141541 gtatctgaat atgaatctga atcggaatct ggatctgatt ctcattgtta ttgttggttg
+   141601 ccagaatcat aacaaacgtg caacagccac aagggtatag gactcaacgt gtgtctgata
+   141661 tttatgcaaa ttgttaaaag tcaaagcaaa ttaagctcaa ccttcagcga agatgacgtt
+   141721 gaattctgtt gccccattgc gctgcaagtt gctagttgca agttgcaagt tgcacctttc
+   141781 tgcagttgat ttctcctcat ccacctatgc agtcaggtga gagggagtga gtgcgagtgg
+   141841 agtgctgagg tgtgtcaagc gaattattta taaggcctag aagaaggcag ctcgcacgcg
+   141901 aataatcaag actcagcacc aatttttagt ttatggtcta gttctttata ggttttgtac
+   141961 ctcttttttt tgcgttggct attttgcgat tgaattcata aatatggaat caaatctata
+   142021 gagtggagag tggaactaac gaggtgagag gtaacaatat agtttttggg caatcagaag
+   142081 caacaaacaa atatctgcaa taactcgttg aattcgaaac aaaattaact gcatttatac
+   142141 taaatatata attgctatag gatgagttag ccgtcttgcg gtttcccaaa ccccaaaagc
+   142201 aaagtcaagc gtgtaggaaa cctgatcaga tcgcgggaaa gattctctgc actcaattac
+   142261 gtcaaaccag gttgatttcc tccttttcgc tgtcgagaga ttggcaaatg ggtcaaatgg
+   142321 gtcaggcagt ggaatagtaa attagattat gtttgcatcg agatgcaatg caagccgcgc
+   142381 cccaaataaa tggaacgtgc gctagtaggg ttcccccttg cccctggtaa cccttccttt
+   142441 caccacccgt tttcccgctt ttccgctccc aaacactaga ggtaagctgc ttagaccccg
+   142501 gcgtttagaa gccccagttt cgtttcacta ggcagacaca ctcgcagcgg gaagacaatg
+   142561 ccaccgccac cgccaccgac ttaatcagcc cgggaaacga catctcaatg ctggcgagtg
+   142621 tgtacctaca tatggacatg ggcgtgcgtt ggtgcgggag ctggtgtaaa tcggttttgg
+   142681 caggtacgcc gctggcgtca ttaccccccc agaggttgaa tgtcaccggc ggcatgactt
+   142741 gggggccaag ccgataaggc gcacactgtc cactgcacgg tgtacactga taaaaatata
+   142801 tatcaagacc aaatattgtt aaagataatt gatgtgtaaa ggaaatacac ttgcaagtta
+   142861 aaatgttttc accttaatgt gtttttcttt taaatactct attaactaat ataaattatc
+   142921 accaaaacaa aacattaatt tgggaaatgt tatcaccaaa agcttttgcc actatagaaa
+   142981 atacagataa atctaaaaat aaattccttt gacgcatgca cgaaataaga taaacaaatt
+   143041 tgattttatt ttcttattta cacaattcat tttatttgca tgcatttcat tttgttcagt
+   143101 gtacctaata aaaacgattt cgtttgcccc aagcagtaag aagatgttag gcacgtctgc
+   143161 tgataaggaa aactgtagcc ccagactagg ccagaccata ttaaattaac gtctggaggc
+   143221 gcgaaaatca tacgattttt ttttatatta cttcgcggtc agttgccaag gcaggagagc
+   143281 aacccgttcg attagtgggt caatttggaa aatgagttat tgactctggg aaattgttga
+   143341 gctgaaaatt taatcggagc ccgaaaattt ccaatcatgc attccccaag tgaccatata
+   143401 tggattagtg ataacgctcg atgcgacccc caaagattat caaaaatatt taataagaat
+   143461 atatgaaaaa aagatttaac ttttatgaat tcctaagcgt cctcaaagct tcgggagaac
+   143521 tgggccatat atgacccgaa atacatgttt atactttagc aaatgtattt tccaattagg
+   143581 tgatagaact tgtgtgcaca cacacatata gttctatatc aacaaacagg tttaagtttt
+   143641 atgcaaattg aaagcttatt tcttccgcat gcttatctct ttccttctca tcatttgtat
+   143701 gcaaaaaata catatgaatt tgcagtagcc tcctcccaca tcatatttaa cgccctatat
+   143761 tcaaaatttg ctcaagaaaa tatttgaacc aaattgattt ttagtcaatt agtttttaag
+   143821 taattaagtg gagtaaacat atacaatttt attcttacca aacacatata ctcatatatt
+   143881 ttgaataaat aaataaacaa atatatataa aatctacgaa attggcaaac aaattttaaa
+   143941 gcattatagt attgccgatt taattaatat aattaaataa tatgtacatg tattaatctt
+   144001 gtgtgcgagc atgggttaaa tctagctgca ttcgaaaccg ctactctggc tcggccacaa
+   144061 agtgggcttg gtcgctgttg cggacaagtg agattgctaa tgagctgctt ttagggggcg
+   144121 tgttgtgctt gctttccaac ttttctagat tgattctacg ctgcctccag cagccacccc
+   144181 tcccatcacc attcccatca ccatccagtc ccgttggctc ccagtcacag tattacacgt
+   144241 atgcaaatta agccgaagtt caattgcgac cgcagcaaca acacgatctt tctacacttc
+   144301 tccttgctat gcttgacatt cacaaggtca aagctcttaa tattctggct cgtggcccta
+   144361 cactgtaaga aattactata gaaataacgg tacacggaat aagatatttt ttttagtcca
+   144421 tatgctttta acaaatgtgt tttaagttta tgttatatta ttgttagaaa accggtgttt
+   144481 ttttaaaatc ggttaaaaaa ttactacgag agaaaaatac aaattttgta aataagattg
+   144541 actctttttc gattttggaa tattttcatt cattttatgt ttttacgttt tcacttattt
+   144601 gtttctcagt gcactttctg gtgttccatt ttctattggg cactttaccc cgcatttgtt
+   144661 tgcagatcac ttgcttgcgc atttttattg cattttacat attacacatt atttgaacgc
+   144721 cgctgctgct gcatccgtcg acgtcgactg cactcgcccc cacgagagaa cagtatttaa
+   144781 ggagctgcga aggtccaagt catgcattat tgtctcagtg cagttgtcag ttgcagttca
+   144841 gcagacgggc taacgagtac ttgcatctct tcaaatttac ttaattgatc aagtaagtag
+   144901 caaaagggca cacaattgaa ggaaattctt gtttaattga atttattatg caagtgcgga
+   144961 aataaaatga cagtattaaa tagtaaatat tttgtaaaat catatataat caaatttatt
+   145021 caatcagaac taattcaagc tgtcacaagt agtgcgaact caattaattg gcatcgaatt
+   145081 aaaatttgga ggcctgttcc gcatattcct cttggaaaat cacctgttag ttaacttcta
+   145141 aaaataggaa ttttaacata actcgtccct gttaatcggc gccgtgcctt cgttagctat
+   145201 ctcaaaagcg agcgcgtgca gacgagcagt aattttccaa gcatcaggca tataatatac
+   145261 taatactaat actaatacta atataagaat actaatatag aaaaagcttt gccggtacaa
+   145321 aatcccaaac aaaaacaaac cgtgtgtgcc gaaaaataaa aataaaccat aaactaggca
+   145381 gcgctgccgt cgccggctga gcagcctgcg tacatagccg agatcgcgta acggtagata
+   145441 atgaaaagct ctacgtaacc gaagcttctg ctgtacggat cttcctataa atacggggcc
+   145501 gacacgaact ggaaaccaac aactaacgga gccctcttcc aattgaaaca gatcgaaaga
+   145561 gcctgctaaa gcaaaaaaga agtcaccatg tcgtttactt tgaccaacaa gaacgtgatt
+   145621 ttcgttgccg gtctgggagg cattggtctg gacaccagca aggagctgct caagcgcgat
+   145681 ctgaaggtaa ctatgcgatg cccacaggct ccatgcagcg atggaggtta atctcgtgta
+   145741 ttcaatccta gaacctggtg atcctcgacc gcattgagaa cccggctgcc attgccgagc
+   145801 tgaaggcaat caatccaaag gtgaccgtca ccttctaccc ctatgatgtg accgtgccca
+   145861 ttgccgagac caccaagctg ctgaagacca tcttcgccca gctgaagacc gtcgatgtcc
+   145921 tgatcaacgg agctggtatc ctggacgatc accagatcga gcgcaccatt gccgtcaact
+   145981 acactggcct ggtcaacacc acgacggcca ttctggactt ctgggacaag cgcaagggcg
+   146041 gtcccggtgg tatcatctgc aacattggat ccgtcactgg attcaatgcc atctaccagg
+   146101 tgcccgtcta ctccggcacc aaggccgccg tggtcaactt caccagctcc ctggcggtaa
+   146161 gttgatcaaa ggaaacgcaa agttttcaag aaaaaacaaa actaatttga tttataacac
+   146221 ctttagaaac tggcccccat taccggcgtg acggcttaca ctgtgaaccc cggcatcacc
+   146281 cgcaccaccc tggtgcacac gttcaactcc tggttggatg ttgagcctca ggttgccgag
+   146341 aagctcctgg ctcatcccac ccagccctcg ttggcctgcg ccgagaactt cgtcaaggct
+   146401 atcgagctga accagaacgg agccatctgg aaactggact tgggcaccct ggaggccatc
+   146461 cagtggacca agcactggga ctccggcatc taagaagtga tactcccaaa aaaaaaaaaa
+   146521 aacataacat tagttcatag ggttctgcga accagaagat attcacgcaa ggcaataagg
+   146581 ctgattcgat gcacactcac attcttctcc taatacgata ataaaacttt ccatgaaaaa
+   146641 tatggaaaaa tatatgaaaa ttgagaaatc caaaaaactg ataaacgctc tacttaatta
+   146701 aaatagataa atgggagcgg caggaatggc ggagcatggc caagttcctc cgccaatcag
+   146761 tcgtaaaaca gaagtcgtgg aaagcggata gaaagaatgt tcgatttgac gggcaagcat
+   146821 gtctgctatg tggcggattg cggaggaatt gcactggaga ccagcaaggt tctcatgacc
+   146881 aagaatatag cggtcagtga gcgggaagct cggtttctgt ccagatcgaa ctcaaaacta
+   146941 gtccagccag tcgctgtcga aactaattaa gttaatgagt ttttcatgtt agtttcgcgc
+   147001 tgagcaacaa ttaagtttat gtttcagttc ggcttagatt tcgctgaagg acttgccact
+   147061 ttcaatcaat actttagaac aaaatcaaaa ctcattctaa tagcttggtg ttcatctttt
+   147121 tttttaatga taagcatttt gtcgtttata ctttttatat ttcgatatta aaccacctat
+   147181 gaagtctatt ttaatcgcca gataagcaat atattgtgta aatatttgta ttctttatca
+   147241 ggaaattcag ggagacgggg aagttactat ctactaaaag ccaaacaatt tcttacagtt
+   147301 ttactctctc tactctagaa actggccatt ttacagagta cggaaaatcc ccaggccatc
+   147361 gctcagttgc agtcgataaa gccgagtacc caaatatttt tctggaccta cgacgtgacc
+   147421 atggcaaggg aagatatgaa gaagtacttc gatgaggtga tggtccaaat ggactacatc
+   147481 gatgtcctga tcaatggtgc tacgctgtgc gatgaaaata acattgatgc caccatcaat
+   147541 acaaatctaa cgggaatgat gaacactgtg gccacagtgt taccctatat ggacagaaaa
+   147601 atgggaggaa ctggtgggct aattgtgaac gtcacttcgg tcattggatt ggacccttcg
+   147661 ccggttttct gcgcatatag tgcatccaaa ttcggtgtaa ttggatttac cagaagtcta
+   147721 gcggtgagtt gaatacgatc ttatgcggat aaattcataa ttttttggtt tcaggaccct
+   147781 ctttactatt cccaaaacgg ggtagctgtg atggcggttt gttgtggtcc tacaagggtc
+   147841 tttgtggacc gggaactgaa agcgtttttg gaatacggac aatcctttgc cgatcgcctg
+   147901 cggcgagcgc cctgccaatc gacatcggtt tgtggtcaga atattgtcaa tgccatcgag
+   147961 agatcggaga atggtcagat atggattgcg gataagggtg gactcgagtt ggtcaaattg
+   148021 cattggtact ggcacatggc cgaccagttc gtgcactata tgcagagcaa tgatgaagag
+   148081 gatcaagatt gaattcgaat caaataaaat aatgctttac gcaaaaagta ggcaattcat
+   148141 tttcctatga taatagatat tttttttttt tttttttttt tttttttttt tttttttttt
+   148201 gatgatgttt ttattatttg atcaacgcac tgttacccat gcggcaactt aatttgatct
+   148261 gcttaaatta ttttatttta ctatgtgtct tggttactta agactaacag atttttaatc
+   148321 ctaaaaatga tagggagaat tataatagtt gatataacat agtattctaa tgaattttaa
+   148381 aactaagact ttctaggtca aaaccaggtt agggaggtca tgagggtggt gtcgcttgag
+   148441 tcttcttttg actctagtcc gagggtcagc attgaccaca gctgcagttc ctagcttcct
+   148501 tgctaggctg ttggggtgta cattaagcct ttccatatat ttttgggcga gcgaatgttg
+   148561 atgggagcta taattagatg tgcttaatta tgatggggtt acgttatgca tgttgtggga
+   148621 atgtgaacta tactgttttt tttttgacat cagtcgacta tatgttctcc taacaacctt
+   148681 tgatctaaat tgtaagtgca aagtcaagtc caactactca aggctttttc aacaacaata
+   148741 tacgtagact ttacgtgcaa catttctaga acgtctctag tgcatctcaa ctatgcgaat
+   148801 gcatgtcgaa attttgctag agcaatgccg tgcattgaac tttggcacat ttccaaacgc
+   148861 acaggcagac ccagtcagcc ataggaaacc caatcggacg accgagaatc gaatgcgaaa
+   148921 acaaacaaat ctgccgacac attgcccagg aatattggca ctgagagcct acagatacac
+   148981 caggaaaaaa aaaaaataaa tgctgggata gttttggtat tttcggtact aatctaacgc
+   149041 aatagccctt ctacggtcga gtaaactgaa aaactcttaa tatttttatg ttgcattcat
+   149101 taagtagtat acattttcta cttccaatct tgccttggct ttttctcatc gtgtagctat
+   149161 atctcagcta tatggtgggg tgagttgtgg gcacgccgcc ttggcatact tccaattcga
+   149221 gtgcgttgtc ttccttctag tcgggggcca ctcggttcgt aaaaaatcaa tagccagcag
+   149281 accaaaaaga gccacaatat catcggacat tgaggaccca acgcaacgga atggaaccaa
+   149341 cggaatcgca tggagtggaa tctggccagg ccatatcaaa gcatttttcc agagttccca
+   149401 ctccctgccg tccgccggcg ttgcaagttt gctttccttt cgatggcgca acttaattcc
+   149461 aaaaatcgat tagtcactgt aagcgaacca tgcaattgtc cagcattttt catttataac
+   149521 tccttttttt cgtttaattt ttgctggttt cctccccttt tccacttttt ttttttggca
+   149581 cacgcttgat taatttggtc agctgccggt tggcaaggag gtggggggga ggacatttaa
+   149641 aaatgcagga agtaatcgat tggctagggc acaatagcca aaatgggaga ggcagtgcaa
+   149701 tcgcaaaaaa tattggggta tgttcgcctg aaggaaatgc cggaaaatat tgaaaattac
+   149761 gataccaaga tacattgcac gaaataataa atattctggg ccggtaattg accaattttg
+   149821 tttgtccatt tgattgtagg tgtactcgta aatggtaggt tatattaaat gactaataat
+   149881 tatattgaat gcaactattc atactaagag tatctaggct tcgactagtt ctagtatttt
+   149941 ccccaacact gtagatcgaa caaaaacatg cgaccgaccg gttgccggca gtaggtggag
+   150001 attaatcaac aaaaaaaaat ttgcggaaaa atggaggaaa aaattactga tgctttttaa
+   150061 atggcaatac gtgcctggtc accaggtatt tcactaatta acctgacgga tgcacaccac
+   150121 agccaaagaa gatagtgatt atgcaactgc atggatcgct gctgttgtct gtgtatgagc
+   150181 gttgtgagat aatgaagtac tgatctgatg ggagtttata gccaacactg ataatgcact
+   150241 cattatgaaa cctaatagcc attcattaat ggagtgggaa atcaatggta ttatagtaat
+   150301 taagcaaagg ttatgagtgt tgaaaccaaa ctgcaggatg tctgattgtt ccttaagagt
+   150361 aatgctctaa taattagtgg aatataaaga attgaatttc catttgctat gtatgtaagt
+   150421 gtaaattaat ataagactat atatatgata aatgttacaa tgggtcttag aatacattgt
+   150481 gatttgctct aaagcttgaa ttctgatgtt ctgatctagt tcccctcggg cagagttcac
+   150541 caatctcgac tcgttttata cgacatgcgg caatgtggcg aaaaagaaac agatgaacac
+   150601 atgttagata tggaagcaaa taccccttgc ccgccaagac cgctctcaga ctttgaagct
+   150661 ctgaaagcca agtcccattt gtatggccga cttgttgtct cgggccagag actatataac
+   150721 tctcggcctg cgattcagat tttgactgag acaccgatgt ggagcttgga gcggcgcata
+   150781 aagattatct acggccgcac atggcaccag gcggtcagac aagattatat gcgaaatgtg
+   150841 aagccaaata acaagtgtta ctcaaggcag caaattaata gccaacattt ttctaaacta
+   150901 atcacgacaa gtgctccggg tttgaggctg acacagaaaa tcaaacagcc ggctgaaaag
+   150961 gcaaaactcc aacttggcca actggtggcc caaaaacagc aaatagacag atagatctga
+   151021 gcgcggctta aatgccaaaa aatgcagtaa gcgaaaaata aacatgaaga aaaatgacag
+   151081 gcaaaaacgt tacttgccct ccgggggtcg tcgcatccac cttatccgcc ccactccgaa
+   151141 tatagcacca ttgctccccc atggacaacc attccccatt cgggagtgga gcgaacttga
+   151201 gccgacgatc tgagattccg acacgtgccg ggcgacaatg ggaatggtcc aaaatgtcca
+   151261 taacaatggc gctggagtac aaacagcaaa cattttgtaa atttgcgtcc attgacactc
+   151321 cgaatgatac agatctcgct gctctcgatc tcgtttctga tccagagggt caccctgttt
+   151381 aaccgggagg gccacatagc cggtagattg gcccacgcaa agttcctacc cctcgccagt
+   151441 tcagtcccct acctgaacta cactctcgtg gtacccattt cagccttgag gcagcaacca
+   151501 ctttgtatgt acaagagacg aatgtgaaat gatgtcgatg cactgcaaaa atgagctgga
+   151561 tattgaatta aatcggtgaa tttagtaggt tgacgaaaat ccaaaacatt tactaaacta
+   151621 aaagatacaa tctatacccc cttttccagt tctacgaaaa ttccacgatt tttctaacta
+   151681 cccctctcgt tttttttttc agtgtaaaaa taagtaatat tctgagaggg gtaaacgggt
+   151741 gaaagggtgc tgactgaaag ggggagctcg ggaccggtcc tgggtgcatg taacacaatc
+   151801 aggaagtaat ttttatgcta tgtacccata gcatgtgtcg ccgtcatcgt cgacgtagct
+   151861 gatgctgctg ctgtcgctgt tgattttgcc cagtttgttg gtgttgccac agccgaggtg
+   151921 gatgtggata tggatgtgga tgcgagtgat ccggccagtc agcggagaca gtcgctcggt
+   151981 caggcggcag ccgggaagat ggcaagttga gctgggttct caactctctg ccgagttgtc
+   152041 gggggaattg cattcgagcg cccgagctca agcttgagct cccttctccc tttgggaggc
+   152101 aaagtcgagg aactgggtgc attggggggc attgattggc aagatgatga cgacgacgat
+   152161 gatgtgaggc aatcgctgaa tcaatcttgg ccagtagttc agcgtttact gttattagtt
+   152221 gctggaacat tgcattttga tcgttcccaa tatggtttgc tcttcgtttt ttaatagaaa
+   152281 aagaaaaacg cctggggcaa taaaatggtt aagcgatggc cagagagtta agatacaatt
+   152341 ctgaattaat ataaaaggtg atcagatatt gtttaaaagt ttggtctttt tgaagattag
+   152401 tatgagtaat cggagagcaa atttattaaa ttttttatac agttaatatt aacatagaaa
+   152461 atgccaagaa aatgttttaa aaatattgca attttataac cagtttcttg tgtatgccct
+   152521 ccggttaatt atgacacttt aatgagcaga aaatgaaatc gaaatcactc tggctgaaca
+   152581 atccaatcaa caactctatc taaataacaa gtatctcgaa tgcaatttaa gctgccccac
+   152641 gaagacctgc gaaatgggaa actaacggaa aaacaacgaa ttgtgaaata attcggcaat
+   152701 ttcaaaattg atggcaagcc aaaattgagg caatagaaat cgattagcgc cacgcttcag
+   152761 ctgaccagcg aaaagtttaa aaaagcatta aaaacaaagc ggcagagtcg gaaaaaaatt
+   152821 aagttatcga cacaaatgaa aatgttcggt agtaaacgga ctcgtgcgga atgcgttttc
+   152881 gtctacgcca tattctttgg gttttcctct ccggaaaact cattcagcca ttaaatgccg
+   152941 ttatctgctg attacacaat aaaagcacaa ccacaaagca gcagcaacaa atgctactgc
+   153001 agcagcgccc agagctaaga gacctaattc gaggcgcatc tgtagaagag ttgcggcaac
+   153061 tgcagcaaca acagcaactg caacagcaac agcagcagca gcaaaagcag ccaaagcaac
+   153121 aatagcaaca tgcaataata taacgtaaca tttagaacgt ctttgttgtt gctgctgcaa
+   153181 cttgtgttgc tgttacccac agaaaggctg accaagagaa aaggctgccg caacaatttc
+   153241 tcgcaacatt ttgctgcctt gcatcattta gagctgcacc cagagaaaat acgggccatt
+   153301 gagaatgtaa caaaaatatt ttgaataaat ttaacttttt aacaagttaa tgcacgtgaa
+   153361 tttaattttt gatgaaatta tctacaagtg ggttgcataa ttttctgaag tgtaagcact
+   153421 gctattagtt agatgaatag taaggggagt ggcacatgcg gcatgcagca gtatgtgcag
+   153481 ctgactaaga actaaaagcc gcagcaaaaa cgaaaaaaaa aaacagattg gcataaagaa
+   153541 cagtggaaag cctttcacca cgcattttga gacgtccaac caaaaatgtt tctgaaaatt
+   153601 agcttttagg attaagttac ttaacttcat taggtaactt catcaaaaac aaaatatatc
+   153661 aacacacaag tattactatt ttaaagaatt atcagatgta tctttttcgt atctaaaact
+   153721 tttgcagtaa atcagtaaat tttatcttta ttaatattaa tttatatatt tattcactgt
+   153781 gcaaatgatt aagataattc atcataaatt tttattataa ttatctgata taaattcagt
+   153841 tgattaggga taaaaaatca acacagagag ggagctaaag gtcgttttga gtgtgaagac
+   153901 ataatattga acaatgtcgt atatgtacaa tataataccc ttgaagaaag ctacaatatt
+   153961 aagccaatct agggaatagt gatcaattca ataaaaatta ataattattt ttcccttgaa
+   154021 aaaatttctt ttacattttc ttaaggatac cgatgtaaaa taaatgaaga tcactaccga
+   154081 cgaaagggta tccatcgttc cttgcagcca gctatccttt ctggtttcgg agctcagaaa
+   154141 atgcggtttg tgttcgcctt cctacctggg ctgccttgta tctaggtgca agaaatcgag
+   154201 tctcaaattg cgccaaagcc gcaaactggc ccgctccgct cttggccatt agttattaga
+   154261 gcttggtcag ctagcaagca gcagcgtgtg gtgtgtgctg tgtggcgatt ttgcctttgt
+   154321 attggtgtct tggaattttc aacaagtttt gctaaaatat gatattaccc aagcaggttg
+   154381 cagccgaaaa caaaaagagc aacaagcgcg gcaaaaatat ttgtatggat atttggaaat
+   154441 ggcgccccgt tcccagtcag cgtggatggg tgaacaaact ccaaaagtga ctttggttgc
+   154501 aagactcgaa aaatgcataa atgaaatgca ttcgagggga gcgcggaact atggacatcg
+   154561 taagtcagct gaccattgat tgacgttgat tgtgctggcc cgtttggttt gtcgcgcgtt
+   154621 cagtgaactc atttacagat tcggtttctt ttcttttttt aatgtttgtt ttagcaatga
+   154681 cacgacatta aatgtcagct ggagtacccc caattttttg tcagccaatc tgacagcatt
+   154741 gtgtgagttg tgaccatggc agaagcactt gtgaaggaag atgggttgaa ttaagataga
+   154801 gtcatagaga tatgcatgca cgtggccatt cggccaggca tttcgcacga taatgaagct
+   154861 tatgcaaatt accaaaggtt gaaggtcttt gctcagcatc atttgttgtc gagattttct
+   154921 cgtttgattt atgaaaattg catatgtttt tcctgtgtgg aaacaaagtg ggcaatttaa
+   154981 agttaggaat tttaggcagt tccactttag caaccatatc caaatgttaa aaactataca
+   155041 actgacaggt cgtacatatt aaccatgtta cacacgccaa ttatgttaag tatgtttcgt
+   155101 gcattacaaa gcttcttata aaaagctgta ttataaacaa cgcataagat ctgtgggtta
+   155161 tctagggtta tttcatcgat tgattgcaca tatatataca atattttttt tgctcgagac
+   155221 ttgcgcacta ttgaaatgca aatgccgtgg tcagtgagat tttaaatctc aagttaatag
+   155281 gagtaatttt acaatggacc gcaagccgag gagaggaata ttaaaatttc ttattttgct
+   155341 ttcgatcttc cttgctactg tatcagtttt ggccgagaaa aatgaaatcg cacttggaaa
+   155401 cagatgtcgg cttacacacg tgggacctcg ttgctaaaat ttgttaaagc atttaaagaa
+   155461 ttttcttatt ttgtatatat gaaaaatgta cgagcgattt tgaattcatc ttttttcctt
+   155521 tgctttggtc aaataaatat aatacatttt taattaaaaa catcaaagat aggttccttt
+   155581 tcatatgaat attcgctgaa ctaggctatt caactggagc agccaaagtt tgcgcaaata
+   155641 actcccacag ttcagagtgt aatatgccat tatatattga tgcaaatgaa cttttgacta
+   155701 acgaaggtca attgtctttt gtcagtgttt ctatatagtt tattatggtc tgttgtcata
+   155761 gtcacaacac aaatggatgg atggatggat ggagtggctg aggttccccg aagcacagat
+   155821 aagcattggc taattaagag cctccgaccc catgccactc accccccccc ctcccccctt
+   155881 ggatacaaaa acaaactaat tgaatcggcc agagtatcga aatatatata accaaaacag
+   155941 aaacagagag aaaaaatcga agaaaaatgc cataatgggt tcactaattg cttcaattag
+   156001 acatcgactg gggcttttga aggcagcata cagcaacagg tgtgcaaaac gaccgacaaa
+   156061 gttgtgcaat acctatggag ctcaattgca gtcggtccct cggttggggc caagtacttg
+   156121 aatctctcta ttgtttttaa aaagttcacg atatttctgt tgttttgctt tttgcggtca
+   156181 cacctttgtg gatttttcaa ttaaacgatc agctgacctt gatacttctt gactcgcaat
+   156241 cgagtttatg cttacacgct tcaatctata tatctcgatt tattctgtta ttcggtctta
+   156301 actcacctga aagaaaagaa aatatttgaa aagattcatt agtgaattat taaatgcaag
+   156361 aacatggact taaggagaac aaacacactt ccttattgcc aacggggaga tgcctaatct
+   156421 ggccagccat taagagccca agaaaccctc acatacacta agagaaaaat atccacaata
+   156481 cttatgatct attaaataat tttaagcaag gattgagtat gtgaattaat aatattttct
+   156541 aataagagtt gatcaaaaca ctgtctctta aatctttggc atgcacaagt tgtatttaaa
+   156601 atagatcagg aactaactgc aaattgtttt gcgtgctatc aactccttga tgcctaaaaa
+   156661 gatcccagca aagagctggc tggcataatg aattgtcaat cattatgcaa tcttcgagtg
+   156721 aaaaatgatg cagcaggttt ccctgctctg ctctcgctgg gatatctctt agccggcgaa
+   156781 tgcgtgggcc gccaacattt agacacactc tgagacagtc ggacgctcgg acggacagac
+   156841 ggacagacag acagacagac agacaggcga tcttgtgatg tgcaacatgg tcttagatct
+   156901 tttgccgtgg catgtggcgc tagagtaaaa ggcctcggag agaaagaggc ttgaactcga
+   156961 gctcgaatct cgacttggag gagttgggac ccatcattta taacacggac agctcgagat
+   157021 ggcgttcctt accctttttc ggctcgcttt ttggcatttg ttatgtggcg ttctcgtttg
+   157081 cattcagaaa tacgtaaacg catacataca catagcatag tatgcatagc atacagtata
+   157141 cagcttccaa aagacacggc ccttttagga gacaagacaa gttgcgcgat gatggatgcc
+   157201 acaagctggg ctcataaatt cagcttgttt ttaagtcggg aaacagaaga tggaaagatc
+   157261 tgcgggggcg tctattggtt acccttgtca acacacccag tctgaatggc tattgggaaa
+   157321 tgcctttgtg ttcgattgaa atgcttacga aagagtttga tatgtttaaa aaatgatagg
+   157381 aaattgagtt ggaacaccat tcattcattg aattcataaa atgaataata tgaataatag
+   157441 ctaagtgtac gaaaaactta aaattagatt tagttttatg tattaactaa gcgggaaact
+   157501 attttttttt gttagttact gtactactac tgctgttacg aaaattacta atgattgaca
+   157561 ttcatttaaa catttctttt agtttattat aaaaatagat ggctaataaa tggcaagtgt
+   157621 cttgaaactc ggaagcactt gctgtatttg agtccattct taacttgtct ttaattcggt
+   157681 ttgactattg ttcaacatgt gagaaaacat aaaaaaaaac tagtcacaaa aacacataat
+   157741 atttattgac tgcttgtagc tttaattgac aattaaaagt tgaactaata accaatcaga
+   157801 ctcaattctc accgtaaatt acaaaacaaa ctgaaaacta atcaatgcca taatgatcaa
+   157861 atccgattgc caaagcaccg caggaagtca attattggtt taatcattac caaaacagct
+   157921 cgaagctctc attttcaata tttgtaaatt gattattgca ttgttgttgc tcgcaagtga
+   157981 atgcaacttc ctttggggcg acaaaagagc acaggctgtg gtcataaaaa gttaatagag
+   158041 ttgactatta acttgtcagt gggccccata gcacacttgt tggccatttt gcaagtggcg
+   158101 actattgcaa aatatataca tacacaggta tttcgtacat gtatgtacat acgtatgtaa
+   158161 actggggctg ggagaatcca gagagttggc aagtcgtcaa ctcaccaagt cgtcaatcat
+   158221 tttcatttcg ttaaacgtgt caaatgacgg tcgcgagcgg aaatcgttca ctagtagtta
+   158281 ttttctttta tgggggaatg ggctaactgc cggcgactgg gagagcggca attggcttat
+   158341 taaatgcgcc ttaatgcgat acgtgttaaa cggggagtga gcagcaaatg tacaaaaaaa
+   158401 taaaaaatac aaagtgaaca taaaaaaaaa aacaattgca acatcgctgg acacgcccac
+   158461 aaatctgcag ctgcagaagg cgttttttcg taattagtgc gaacaagaca ctctccaaac
+   158521 agagctggaa aaacaatact cgcaaacggg ggggtagcac aaaaaacaaa gtaattacgc
+   158581 ggaaatcaaa catctaacat ctccctggtg attttgcaaa agtcgttacc gttatgatga
+   158641 aaactttagc gcgaccattt ccaccgcccg tcccctagtt ttcccaacca gccatagtat
+   158701 caccatctct tttttgtttg gctgcacaat attgcacatg ggcacagatg acaaaaacaa
+   158761 actgcggcag cggataaaaa atgaaaagca caaatgccgg cagaaacgat agacgaactt
+   158821 gcataaagca tcagccgcca gccatcaaat gaaaaaagtt aaataatacc ttacagtggg
+   158881 tttggaagag gggtgggggg tctcagaaag atgagtagag atctcatacg cgtttggatt
+   158941 cctgcatttc gaatgaggag tactctagtt ctagaatata gaaagcttat tgaaataaat
+   159001 tatctgcaaa tcaaataaat gtcgtattct tcaccaacac ctaacattta ttattattat
+   159061 taaagttaac cttctcttag aaggatttca ttgtttgagc tatttgtatc tttatgtagg
+   159121 gtattcactg tccagatata agaagtgcaa tgctttttat tagggctggc ccgctagtcc
+   159181 ctggtgagca cagtaaaacc acaccacagt tagaaatata gcgtctcttt ccgccgtcag
+   159241 acagttcgtc aatgcaacca cacatgagct gccaccacgc cccccggcgc accactgcac
+   159301 caccgccctc cgcccatcgt tcatggcggc tccattcaac gacaatgacg gcgtggaaat
+   159361 ccaatacgcg atctctccaa gtcaacgtca tcgccgttag acgctcggtc tacgtaagtc
+   159421 tcgccttgtc tcaactcagt gttttatgta atggcagaag caaaaagcga gaaaaactcc
+   159481 gaaggcgacc aaagctggac atattccatt gaccacatgg tatcgcgtac tgcactccaa
+   159541 atatgaacga ttgataatcc atcaaagttt ttacactgcc tcgaggttca agtaatcgat
+   159601 atgagctgcc tattcgaata atgaatttca attaaaaatt agtattgtaa aatgccaaga
+   159661 taaactttta aagtgtttca gttagttatg ggattatatt ctttttacaa acacttgaga
+   159721 tcttcaaaat aaatcatatt agtttgtaaa accaagagct aagcagttat gtaatttctt
+   159781 ttttcttata acgggtatat ataagagcaa tgcttttatg atcattgtca ttgtcatgaa
+   159841 atatgacctt aaacaatgtg cacaagtaat tctcttattg tctcgttttt gctgactgat
+   159901 aggaaagaat ggaccccaga gatgagtttt cattttcccc gctttggcct tttttgccat
+   159961 tgaatctgtg tgacggtgtt ggccacaatc tgcagcattt accgttggca attgtgggcc
+   160021 ccttcaagac ccgagcaatc aattaacaca cctttgggtg ggttaaacag agggaggcga
+   160081 aggtgggcag gaggagctgg agcacccacg ttgagaactc catccacatc gctccatccg
+   160141 ctgcactcca ccgttgacag ttgcaaatgc atcaggcggt cgcgagtcgt gggctaattt
+   160201 gcggtttcag cttgcagccc aactaattaa catataaatt tagaggtgtt gcacttttgt
+   160261 atttatttat gaatgaaact gacagagaca aagagacggc ctggtcgtgg aggtggagaa
+   160321 ggaggctggc agggagtggg aaagtttcaa gtgcactgag aaaaatttta agtaaattaa
+   160381 aagcaatatt ttgcgtatac aaattgctca tacatataat cctcaaaaaa tgaattgttt
+   160441 tcgttaactc aaaagaatgt tacaatatgt actgcctaac ttatgtttgg cattttgcat
+   160501 tttttgtcaa ctttttctgt cagtgtgcaa atctgcaata tgcatctcgt actcatatgc
+   160561 aagccaaggt cgatttctgt tccgttcaac tgcagcagca gcagcagcag catcgacaac
+   160621 agcggattgc agtttatggg aaagagttta gccagccaag atttattggc atgttattct
+   160681 aattgctatt tattagcggt gggagacggc agagcagctg ccccgctgtc tgacagtcag
+   160741 tcattcagtt agtcattcca ttcaattgtt catgcagcca ttttcaagtt ggcaggccgt
+   160801 caagtgtgca aattagtttg tttggtttgg tttttatgca tgtgcttaac ccacttgtac
+   160861 gctgtgtgct caaaattgat caccataaac aaataaatgc atatttgaaa aatgtcaaaa
+   160921 tgaaatgtat ttgaatgctt gcaaagggct tggtaaaata gctgttatac aattacaaaa
+   160981 gaaaaaaaaa acaagattat gaaatatttt ttagtttttt tttttgttta tcaaacttgt
+   161041 tcatcttcaa agcaattaac acctctgaaa tcgaatttgt tatgtccggt tttcgtgatt
+   161101 tcttctttta aatctccatc gttaacacac aattaacagc tatataaatt cgtaacaact
+   161161 ttttcggtgc aatagcatcg tgatccattc gtttaggggt gatcgatcgt tgtttgcccc
+   161221 aactcagact atatactcat tagttctaga ttacagatct ccccgcccat tcatccgcat
+   161281 cacccacagt ctcaccggca attcatttcc acttccaacc catttcgatc ggtacgccca
+   161341 caatcgagcg actgcaatcc gcagagaact taatcgatag actccatttc tatcatccgt
+   161401 taattgcgac taatgaattt gcttacgtgt agacaatttt cagttaatat tctaattttt
+   161461 ttttcccctc accccagcct cccccgctgg ccgtaacgta attagtttcg agaggaaatt
+   161521 acaaacgact acgcgcactg tgggcgtaaa gtgaaaaaat ctaaatcact aaaccaaata
+   161581 gtttcgcatc actctggttc gccagtgaga tgtttaagaa gagaacgggc ccagcgtcgt
+   161641 aaaatgttgg atgcttattt ttatagcgga ttctcttggc tatcaaaatg ccataaagcg
+   161701 agtctgggtc tctaagcctg accgcaaatc gcagctatta ccgctagaga aactaaatta
+   161761 atatgcagca tttcgtgtaa ttcgcccacc aagccaaaac acttccttaa taaagcccag
+   161821 ggagaccacc gaccgccggg ccagctaatg cgtgcataat ttttaataaa tatttttcaa
+   161881 gatccccagc tgcagcagct gggttggcaa gtaaggccga gactgcgaga ctgagagact
+   161941 gggagactgc ctgcggaggc aggccgaaaa tgcttcctgg tttaattaat aaagcccctg
+   162001 agagaagaca ccgagtacca gtcagactcc gactcctggc caacttgtca atcaacaaca
+   162061 ttcttatttt ccttttaaaa acggtggcca gagccatgac cgtgatgatt gtgattatca
+   162121 atttcattat caggcccagg ctcgagatca acccgaggca aacacttccg ttttagaagg
+   162181 ggaggccatc tcttcgacga gcaggtccat tgaactcccc ttgtggcgca ctcctcgccc
+   162241 caactcaatg cctcctcacg caaaagtcgt cattatgaaa atatttcccc gcaacccagg
+   162301 atctctggca aaaagcttat cggctgctgc tcagccagcg cttatgttgc ccacgtgtcc
+   162361 gtctgtccaa gtagtctttg gagcaggttt tcctttctgg aaattatgca ctttgtatgc
+   162421 agaaaagttg gcgaggcgat aaaaatctgc ataattccaa gccgtaagtt tattgccgtt
+   162481 tcctagcatt cggggcgcaa gggaaaatga tgctcgaccg ggcggcagag aatgatagct
+   162541 ggagaaaagg agacccctcc gctctgaccc gtctttcata atcgtaatca atattaagtt
+   162601 atatgataaa ttatcggata ccaaattgca gcatttaact tgtcaaagga aattatcgga
+   162661 aattatgcta aaaagggttt cagcgcggca gacgcggtga aattactttc ggaactccca
+   162721 gatgcttaac ttaactgtcg atttgtctta agggctataa gactgatcgg ggtattgggc
+   162781 caatctgaga gctacgttga accagttatg gccagcaaat tgatttgttt aacagtgatt
+   162841 atgactgtct gctgtatccc atggttttat gtcaattata tcagttacct tgtgtgagta
+   162901 tactaagact caattgtctg ctaaagtaaa aaagtttgat ttttcattgg taatgataca
+   162961 agtgtaagct ttaattaacg tggatatcta atcgaaatag gatagaaaat catgtctagc
+   163021 ccgacacctt caggccacac aatcaagttc aggtgccaga ccaacggacc caaggcaaaa
+   163081 gccatcctaa tatcgttcaa aataaaaaag tgcccatgca tagagtcgag acaataagaa
+   163141 attgactgca cctgcactgt ttgtggggtc gtaaatctgg ggaggtggta tctgggactg
+   163201 cggagcacgt aattgccaca ctggagctac attatggata cattatggcg gcaggaaagg
+   163261 gagcaggcag cagcggttct ttgtgggtaa cccagagccc cgagctgtgg atttgctgga
+   163321 ggccaggaaa ggggaaccac ttgatgactg ctcaacggag ataaagaact gatggccgat
+   163381 ggcgatactt ctcaagaagt tgggaaagtt agacaacatg atacacagga cttgggtgaa
+   163441 atggaaattg agtggcgaat tgccactgcc taaataggaa atttttaacg aaacaagatt
+   163501 atagcttgcc ttaaaaataa tgttaatttc tagagcccta tttttaaatc ctagatatta
+   163561 gacatctaga catctagaca ttaaaaataa ttaataattt gtgtgccagg caatgtctag
+   163621 tgaatttcgt tcattaattc aattacagac acacattcgc caaaagaaag tctttgctaa
+   163681 acactggcta gtttggttaa atttatttcc ttcccgttcc agcaccacca attagccccg
+   163741 ccctcgattc aagacatttg atcgttgaat ctgagtgctt ttccagtgct agcacgacac
+   163801 agctcatggc agaaaacaac atccggtttg tgagctggag attgcgactg cttctgcaca
+   163861 aatgttaatt aaatacctgt ccatctatta aaaaacatac gtaaatttat gttgttaaca
+   163921 aggaagtgcg ctctgcgcat gcgcacactt ttattattat catcagcatc ggagatggcc
+   163981 gtcgctcagc acacatcaga ggaggcagtg tggccgccac ttggccattt ctacctgtgg
+   164041 agcccacctg cctctttcct ctcaccaaaa cgcccgccaa accacctgta actgtttcgc
+   164101 agccatttgc atatgtcgcc gaacgtttgc gttgcggttt taacgatgcg ctgctgtcgc
+   164161 cacgtcgcta cgtcattcgc taattggtcg agatacaaat cgccacgaga gagtgaagtg
+   164221 acgacgttgc agcccccacc agtcccataa gccaaaccaa gaccaagtcc acatccaagt
+   164281 ccaagtccaa acccaggaag aggatgagga tgtggaagag gcgttgccgg tgttggccat
+   164341 cagctgatgc aactgcgggt gctgctggca gtacatgcac tgcaaccaaa gtagtactat
+   164401 atatgattct tatttaatta ttattaatta ttcttattta atagttataa tgttataata
+   164461 tagtcccatc tttatattac aaagtaatac aaactaccaa ccatgagatt gtattcaatt
+   164521 cattttttaa agtgtatcaa gtgccaaact gtcatcatta cctaccttac cgtacccacc
+   164581 ccgtgtcacc tttctcttgg ccaacaaaca gcggcacttt ctttgccaaa cgtccaaaaa
+   164641 caaatcggtg gctctgctgc gtcgcctctg tcccgccaga tgctgtcaaa tatttactca
+   164701 agtgtttctg cttttcacac caccccgtca aagccaccaa atcgccaacc atgaccagcg
+   164761 tacttcactc cccctaccaa cagccaccac cccacctggg tggacaaggc tggcaatttg
+   164821 ctttatggga tttacttgta gtgtggatag gcggtttggc atttttggga aagtggacat
+   164881 cggaaatttg gttagcctta atgcattcaa ggtgtgattt ttccgttcgt gtgcccccaa
+   164941 acaaactggt tcgccaagtg tggaactctt gggatgagga tctcactaag tcagtttgtt
+   165001 ggagaagcca tgaaaattag gtgctacact tgcagcggtc attaggttca caatgtatta
+   165061 gatatgaatc tatttcctgt tttaaatggg tatcatattc ggctaaacaa acaaaataag
+   165121 caaagactcg caacacctgc gatgaaaacg atttcatata tatgtttaaa caaatttcta
+   165181 ttagtctaga ctgttagtta gtctgctttg ttgggtccac attaaattta attcaattta
+   165241 ttacaccatt gaatggcata ttagtgatca aatctcgttg tcattttctc gcatatatat
+   165301 tttaatatta tcatattcgt tagaactcac ctttgttctg tacaatggat tgctaaagtc
+   165361 ccaatctggc gccacaaaca gatatccgca tttggagacc tttcgcgatg cctgcagatg
+   165421 ttgaaataga aaaaatggtt atgttaatat ttgtggtaaa caaaagtgtc acataagcct
+   165481 aataaaaaca tttcaatgct gattgctttt cgaatataaa ttatataatt ttggctcgaa
+   165541 tgtctagtga attatgaaaa tattttctag ctctttgata acaaatgatc aataaattac
+   165601 caaaactatt taatcttaat tagcaaaaca aatttattcg gaaatggcat tggcgaaaaa
+   165661 taatcgaaaa agaaaatgaa cagagttatc aaatcgcgct tatctcctga ccgtaaaaga
+   165721 aatcaaaatt tatatgtatt tttaattaaa aaacgtcaaa ttttcgtagc ctcctttttt
+   165781 gtttggcttt aaagttgtct gcttatcgcg cacgtgttat gtttacctat accgcaacca
+   165841 ggtatatctg cacacattta tatagttttt tagcaaagct attcgtctaa cccgtgacta
+   165901 atgttaatga cgtgcgccaa ttcaactgtt tgcgtggtat ttttggttct cgcttttagt
+   165961 tttttttatt tttggagggg ttaaagtaaa taaaataaat tacaagtaga aactactatg
+   166021 aaaaccagac cacttggctg ccacacattg gtggggtttc aacgggtctt ggcctttgca
+   166081 cacctctggt ctagagctgg aacaataagc gacaattggg ccgcatttaa agtggatttc
+   166141 agtcgttagt cgcgcccgcc attaggctgc tgtcggtctc ttgtttggga ttgttttggg
+   166201 tctgctttgg gtgggtggtt ggtacataca tattgtgtat tgtgtacata tatattttcg
+   166261 atttgacaaa gctgcaaacg agagtgccaa atggggggat agcacaatgc tcccaccaat
+   166321 tgaacccagc tgaactggcg tttaatttga gccaactgtg acggtggctt aaggtcgcca
+   166381 atcagcttct aattaaagaa agtgagacaa tctttaaatt aaaaacaatg aaacatataa
+   166441 acgccaacta agtaaagaaa acagtatgta tttgtagaat ttataacagg cttttaagac
+   166501 aatataataa atacattatt ggccacatta tttaaaagtt ttagttgatc aaatcaaacc
+   166561 aatattggat tttatcaaat ctacatatca gacgaatttt agcataaaaa ggaatatatc
+   166621 ataaaaaaag gaaaatactt taaattccaa attttttagt ggcctcacag tttctattcc
+   166681 cacacaacat aactttgaaa acgctcacca attaagccac cttttgccat tggcaacgac
+   166741 tttaagcatc gcctaggact cgatcccgac cgaccccgat catgtcttgt aagctgacac
+   166801 cataaaatgt ttacaaaaca ataaaatagc aaaaacgaaa atcaaagccc aaaaacgaaa
+   166861 tgtgtaatac ggtagttccc ggccactcac tcgcctccaa aactatccaa caacttaaca
+   166921 aagttggtca aaagagaaaa aaatgcctcc cactcctctg ccacggaggt ctaatcgggg
+   166981 tactctatat acatgttttc ttattttttt ggggtctcca gctgtctggt ttgcaattac
+   167041 gctggctaat tttatttatc tgtcttggat tgtttttttt tttttgcatt ttatgatatt
+   167101 ttctgtggtt ggtgctgttg tagtcggtgg tgggtcgttt aaatgcgcca ttaacattga
+   167161 tttatgtgaa aagtcgcacg tttgtattca aataactggg ggagagagag aatgagtgag
+   167221 agacttgcaa ccgttgcagc aacagtagcc gcaattgcaa tcgcaagttg cagttgcagt
+   167281 tgcagcttgc agctgctgtc aggctttcat caggctaagt cagtcgacac gttttccaca
+   167341 taaatttcaa tgggtccgaa aagttaagtg acagaaatag gttcaggttt agtgggtagc
+   167401 taaggtagca ccttctaagt aacgacccgt ttaccgatag ctggctcaca cgatcaccaa
+   167461 attgcgatct acttttgtca cattacggcc actgctaaac tgtgcgacgt gtggggactc
+   167521 cgatggatgc gatggaaaaa aagagggggt ttcaaaaata ccgttggcta ctcgtaatac
+   167581 ccgaccacgt agcccacaag accgactcca ctggcactgg cagacattca gccattcaaa
+   167641 cataaattat caatgccagc ctggtcgctg ttttgcttgt ttgggggaaa aaaaaataaa
+   167701 aaacttttac gaaaactaca ggcgaaaaaa attggcaaaa ttcgtgacta aatgagtcac
+   167761 aattgtcttc taaattgtgc gaactgtttt ccattttcgg tgtccccaaa agaattgatc
+   167821 aaacacataa gtaaagactt tgtgtgtatg gatgccagta taccctcagt aaatagtttt
+   167881 taataattgt ttggcaattg atagctgtgg gaaaacattt gcctctcata aggtcagtca
+   167941 tgcactatga ttaagattgt aggttcacct ttgaaaatac aaaaaatggc aacataattg
+   168001 gcagtaagtt attttaatta aaaataacaa taaggctacg agatgagaaa aatatgtgaa
+   168061 ctgtgatatg tttggccaaa gtaaagtgaa tgaatatgct tattgtattg ctgtgtgaat
+   168121 tgatgtgtta tatttttaag ttgtgtctac atgtttttta gtaatttctt cacttttaag
+   168181 atccaggagc caagccaact tcttgtttaa gggtatttca aatcaaaaac aagccaccaa
+   168241 gccaggctga atatttatgc atcgcgcatg tcgctgctga cacacaaatg ggagattttc
+   168301 aaatgaaatg gggcaaacca cgtggaactg gagggggggg ggcttttggg gctgcgcgtg
+   168361 cagaattttg agtgagccgc tggaatggtg ggcgtggcaa cgcccccagg cgctggctaa
+   168421 tgaagcaaag tgcgccgtgc aaatgtggac cgacacagac agaaataaaa ccacagccca
+   168481 actgattgat cgctccgagt gagtaactca gccacaaata cgaatgctaa tgacaaatca
+   168541 aaaggttgtg actgttgaga atcgcgtatg tattcggctt caaggtaatt taagaagttt
+   168601 acaaaaaatt tttaattgaa aaccattacc aatctagcgg attttctcaa cctacatatg
+   168661 ttcgagcaca ttttaacaaa caaaatttaa gcaatgaaat tctctttata taacctgtga
+   168721 tgatttctat ttaaagtatt atatattcaa cattttaaca ggtaactatg tgtaatgtgt
+   168781 tttgaaaccc aaagagttat gtaaaacgaa atgcgtgatt aaatacattc ctgtagaact
+   168841 caaaaacgtt tttacaaaag aatcacacac tgaaaacact cgatatgaag tcatgcaaac
+   168901 gcaattatgt atgagagaca cgggcaagaa aactataggc gatcgatgga tcttagatca
+   168961 tacttatccg caataaatgc agtcttatac ttactggttc tgccagattc gacgaggttc
+   169021 gctattgcgc aagcgtgcaa gatacgaata gagaatgcat tcggttagtt ggtgaattaa
+   169081 atagttatag aaaaagagaa ggggtgaaga cgaacgaaat gactcaatta gcagcgggga
+   169141 agatgtctca gatgagaggc tgatggtgat gcattttgac atttagccac atttgagatg
+   169201 acattcaaga cgctcgagat gagcccgaac cccatcttac cccttagttg cagcggcagc
+   169261 caaatgaatt taaattgcat gagaccgact tgttttcgat ttaatttgat aagaaacccg
+   169321 gggcgagacg ggtcgtccgt agtcgtcgca tacttatgac tgtgaaatgt caaatcaaaa
+   169381 gtcacgtttt tcactcgccg ccaagctagc tgatttattg gcagcagtcg gggctattaa
+   169441 ctgaatcgta ccgctggcca taaatcggct gccggctgag ctccacttgc agagttgtgg
+   169501 aaaatttcac ttttgctgcc gcagcggcta ataagccaaa aaccgattga caaataaaca
+   169561 atttgtggca tccatcccct ccacatacga gtatcacata catctatcta tctgcacata
+   169621 tggtataccg tgtttagtta gtgagtcgca cgcacacgtg gatccacatc caagtccaag
+   169681 tcaaagtcaa gccatgccaa gtcaagtccc aaacaaacct ggtccaactt cgagttggag
+   169741 tggaaagttg tttgcagctt caatctcttg aatctcgtcg aatctcttgg gcttttaact
+   169801 caatttcagc ctgctttagc tgcctagctt tgtttgttta tttattttgt gtttatttat
+   169861 acatgtgtat tttcaatctt tgccattgca gttgttgttt cgcttcgagt ttctaagttg
+   169921 cggcttaata ggcaagccac aacaaccaac atcctcaacc acagtaacaa caacaaccga
+   169981 ctaaatcatc attagccaca tccatccagt tggcaaagag atggcaacag gtttttcaac
+   170041 catcccatca tgctataaac agtttctgct aaagttatcg aatagtattt tgaatttgat
+   170101 ttcccattca atgaagccat taaattcaaa cacaaaaaac acgcgcatac tacacgcata
+   170161 attcattaat agaagtcaaa tgattaatat tctttgatta aaaaattgca aataaacaaa
+   170221 aattatattt tgttttgcag cgtttgagca tcatctcacg caatctgtta gagattgctg
+   170281 aattttttat tagcattttg gaattttcaa tcatttctgc agaactctga cacaaatgct
+   170341 aattaagtgg attccatgaa aacaacacgg cgacaaaaac aaaatattca cagtaagcgc
+   170401 tggagaatcc ccaaagttta ttttaaataa ataaatattt aacaataaaa ctattcttca
+   170461 gaaagcgaaa ccaaggcggc atttttggca tttcattggc gtaatgctta tatatgggac
+   170521 atgaaaatta cagaccatca attcttataa ttgaaccatc aaattgtaaa gtggatgaag
+   170581 tccagcaaag tctgggcttt tgtcatacag ttcatacctt cagtaaagtg ggccagcgat
+   170641 cgagaaaagc aagatcgagt gagacaattg gtattacttg actggccatc aatgaatgaa
+   170701 aacaattttt accagaaacc gtcttttttt ccggttataa atcgtatcaa tcaatcgatg
+   170761 gggaggtttc cgaggagcat aattaatgtg tgagaacaac tttcttcgct gaccatgaat
+   170821 cgaagcgcct cagtgtttcg gattccgaca cagactgcgc ctgaaacaaa aatgaaagtg
+   170881 gccctcgatt tgttggccat aaagggccgt cgccgatctt aggcccgatt aactcaatga
+   170941 cggctaatct cttctcattc ataactataa acatatatat atatatatgt gcctttacac
+   171001 accacgcatg gaagcgtctc cgacatgaaa tatgattaat aagtttcgca cgattattaa
+   171061 gtcccccaga ctctgatcaa aaagcaagag gcaacgaact gggagaagaa agtggtttgc
+   171121 gatccgccaa gacacaggtt gattgacttc aagagggttg ggacatgggt taatgccatc
+   171181 agcatattta aataaatttc tgtgggtaag ctttactcct agtatcgacg tagtttaaaa
+   171241 ataaacagta tgggaaaata tacaatttat acagtttatt aaactaatct ctccatatga
+   171301 aacatattgg taaaataagc gattaatctt aataaaccat taacaaacta agaattattt
+   171361 aattttaaat gatatatata ttatcctatt ttttagaaca tatctttata tcaaaaataa
+   171421 tcttaatatt tattattatc aaaaccaaaa cttctgatgc acctttgaca acttattcaa
+   171481 gggtatgaaa aatcgaaagt acaaaagaaa tcgaaagcga aactgcgcaa cacaaacgaa
+   171541 acttgaacag aaacagagag gccgaacatc aagtgcaaca aaccgatgaa ccgccgagcc
+   171601 gagaaggggc agatcctcgt cggaagggga agggaagggg gaggggcgct gtggcgtgga
+   171661 gtgcaacggg tgcagggggg tccgagtggc aagtgcatgg cacggcatgg catgaaaata
+   171721 ttgtgtgcta tggcactggg tagcaagtgt gactgaccag cagctgagag gcaactgcat
+   171781 ttggaaatga aagcgaatgt cgtcgattta gcggttgtgt gtcgacggtt tagtgcacca
+   171841 tgaaaaataa aacaaaaatt tttcaagaga gtataactgt aaaatcccaa ataagaagac
+   171901 tttactcgtt gagtttttgt aagaaactga ttttatttgg aaatatcttc ggtttaaata
+   171961 ggtgacatga gaatcgcatc ttaaagtaaa tggcctacgc agaggcctaa gtaaatagtc
+   172021 cccgccttat cgaggtccca cgctcgggca catctgccta tcttgagcgg cgaggacctt
+   172081 atctgtggtc tcccactaag ggactatttt aggaggcggg gaacgatctc aagtgactga
+   172141 ctcatgtagt gtgcacttaa attacatgtt tttgagcaat gcacccatgt cgccttagat
+   172201 aacaaaatcc taaatataat ttatcgctct cgattcattt acataagata tgaacggagc
+   172261 ccaaaattgt aagtctttaa atatattcgt gttcatgtgt gaacattctg ccaaagggcc
+   172321 agcaagctga gatgtacatt agtatattag ttgcatttat aacagtagtg gactgtattt
+   172381 atttgatata tgttcattta ttttgcaact aagatttcaa tgggcctagt ttttctggct
+   172441 tttaatttta ttggattaca attatggttt atattatttt taattcaaca atttgtactc
+   172501 aattaactta ttttaaacat tttgcttttt ctatgtagaa gtacattttc tattaaacaa
+   172561 cgatatagtg gactgtatat ttttatttgt tatattattt tattgtttat ttactattga
+   172621 tatttatagt actggcaacg gacctgcaaa ggttattgct tgcattcttt gttgcacagc
+   172681 acatttccgt atgaaatata ttattaatac tatcattagt attaaagcaa taattaatcc
+   172741 agcatgtccg gaaatatgat ggaaatgtaa atctttcaat ttttgatggt tctctttcat
+   172801 aaatttaatt tcattattca atcgttcaaa ttcctcagta tgatttaata ttgatagttt
+   172861 cagcggatcc caaataatct tcggcgcttc actaacttct cctatataag gtgttgataa
+   172921 taattcttca ctttcggact gaatgttatg gtgggcaact agaattttat cgtcggttct
+   172981 tgccgtacaa tatggcgcaa tgcttaaaac tccttgctga ggcaaatcaa acaattcaat
+   173041 ttgagagcca gtacattgca gacggacttt tgaattcgca ggaaccttaa acaaccaact
+   173101 acttttcttt tctaactcta cccagtaacc tttagagtcg actactgttt tatagatgca
+   173161 gttcgccgct ttatctggct ttagaggctg aatctcacat gcattatcat tcgctgaatt
+   173221 ccagggccaa cttcctttgc atattctctt atttagttgc catttctgac attgatttaa
+   173281 tgtggcttcc gtcattatgt gataggaatc tatctcaaaa ttataaatta aatattcgga
+   173341 cgttgtatgc accattatta tccgatcttc atttcgaatt ggcaccggaa taagcctgaa
+   173401 caatttggat ggatgcctgc taaacagagg cacttttgca ctaatgatca atttatcgtc
+   173461 gatgaataaa cccctggctg ttaacagtgt atacacctcc ttaagttccg tacctgaccg
+   173521 ttttcctgga attactaggt tctccgaaag actctgctga attttggcaa tttctttttt
+   173581 aagctgattt ggcctgagaa tatttgtatt tagcctaccg tgattaatat caatcaacag
+   173641 gcttataatg cctgcttgaa ttttttcgcc ttcttcaatc aatgagtgta gctgtttcgt
+   173701 aatcataaag aattttattg attccttata aacataataa ttttctttaa gaacttctgt
+   173761 catgttctca attcttattt gcatacttct aaaattggag ttaacatctt ctgttgttct
+   173821 ctttaataga ttagaagttg aatcaaccac agatgtttgt ttttgaatta gtttatcaag
+   173881 gttgttctgg ttatctaaca aattcttcat attttcttct aattgctctc tatcatcttc
+   173941 atccattata ccaaataaaa tatgatacaa ggaacccata aattcgaaag gagcacgctt
+   174001 gcttctagat ctagactgta tcataaacaa tttattgttt tcttcaagtt ccgataactg
+   174061 actttgcata ttatctaaga ctagactaca ttgctcttca aagctatgaa gtctttcgca
+   174121 aactttcctc atactttgta taagcgcatt accctttgtt aacattttaa aatatggatc
+   174181 cattttataa tagataacca aattccaaga agtactcaca atctcaacat ctcctagcgg
+   174241 gtctagatat attgctgagg ttttatttat tttgtctata gaatatcttg gtgctatatc
+   174301 tttaggtaat gcgctagaaa cttgacaact taacacaaac aacaacattg ccataatgat
+   174361 tccgatcttg gacattcccg atgtcgctct agttcgtctt tttggctctt ggtcagcctc
+   174421 atttttgtca acagacttta ttccttccaa gggacaaatt ttagtaatgg gtctagtgat
+   174481 atatccttcc tgcatcttta ctttagccac tcggacctta tcatcattcc ccttatgcac
+   174541 cttttccacc tttcctaaag gccatcttgc aggatgacaa ttctcatcct ttaataaaac
+   174601 tatttgccct tcttctatat taggaatttc ctttttccat ttattccttt gctggagcgt
+   174661 atgcaaatat tcacttttcc acttaaccca gaaatctttc ttcatttttt ggataagtct
+   174721 ccacctatcc aaatttccga ttttttcatc ttccattggt tcgactattt ctaaaggtgg
+   174781 tcttccaatt aaaaaatgac ctggtgttaa aacttcttgt tggtccttct cactaactat
+   174841 agtgtataat ggccttgaat ttaagcatgc ttctatttga cataaaagag ttgacatttc
+   174901 ttcgtaagtc aaaatagtgt cgccgattat acgctttaaa tggtatttca ttgacttaac
+   174961 tccagcttcc caaatacctc cgaagtgagg tcctgccggg ggaataaaat gccaatcaat
+   175021 cctgtccttt tcaagctgcg ctgcaatcgt tatattttct tgtattgcat taaataactc
+   175081 ttgatctaat tttcttgcag ctcctacaaa atttgttccg ttgtctgaat agatattgga
+   175141 acattttccc cgtctagcaa taaatcttct gagtgctgct aaaaatgcgt ctgaagttag
+   175201 atcgcttacc atttctaagt gtatggcttt ggtggccatg caaacgaata cagcaacgta
+   175261 tcctttaaat gttttttggc cacgattttt tgaacattta acataataag gacctgcgta
+   175321 atctattcca gtattaagaa acgggaatgt catcgtcact ctatattttg gcaagttacc
+   175381 cattatttgc tgagctgtat tttgtttata ccttgcacac gttacacatt ctcttaaata
+   175441 ctttttcaac gaatttttca acccgaaaat ccaatacttt ctttggatat agtttcgcat
+   175501 taggtttatc cctccatgca atgtttcctt atgagcattt tttattaata agcttgttag
+   175561 gtggcatttt tctaaaatga ttggatgttt aacattaaat tctgcattgg aattttgcaa
+   175621 tcttcctcca actcttagaa ccccatcctt gtccaaaaat ggattcaatg acaatatttt
+   175681 attatttgtc ttgatttcct ttttgatttt aaggcacttt atctcttgcc taaactggta
+   175741 ttcttgttgt ttcttaataa caactgtttc cgctattctt atctccttta ctgaaataat
+   175801 tgatgaatag gctttatttc ttgttttcat ctgcacgaat ctatttatgt atgctattat
+   175861 acgtataagt ttttctatac tggaatacct ttctattaat tcgtaaatag gatcatctat
+   175921 tttgtcattt aataccgtat ttattaagac aggttcttct acagactgct gccgaggcca
+   175981 aagttctttt gggtctgcta gccatttcgg acctttccac caaaaatcac agttgatcaa
+   176041 ctggttagaa tccactcccc tggatgctaa atctgctgga ttatcctctg acttaacatg
+   176101 attccattca gtatttttta atttccgaat gtcatccgtt cttcttttta taaatttgat
+   176161 cttactttga ccactgttaa tccatgctaa ggtaatcgtg gaatcactcc aagcatagat
+   176221 ctccattata ttgtcaattg atccttttag tctttggatt aattcactaa gcaggtgagc
+   176281 tgcacacagc tcgagtttgg gaattgtctt cctatttttt atagggttga ctctactttt
+   176341 gctagctatt atattaacat gaggtcctac tttagcatag actactgcag catatgcttt
+   176401 ttcggaggcg tccgcaaatc cgtgaatctg aatgactgaa gaactgtttg aattaatcca
+   176461 ccttgggatt cgaatattct ctaacaataa taaattttct ttatattttt cccaataatt
+   176521 tttatcttct atggataatt cctgatccca ttcactttta tttatccaaa gtttttgaat
+   176581 aaaaagtttt cctgaaaccg tgactggtgc caaccatcct aacggatcaa atatttttgc
+   176641 tagcgttgat aacacaacgc gcttatttat attttttgat tcatcattac aatttacgct
+   176701 gaacttaaat aaatcctttt gaggttccca ttttagtcct aaagttttaa cacattcatt
+   176761 ttcgataata ttgagaacct tattgtcccc tgtgtcctcc acagtggtta atattttgga
+   176821 attgttggaa atccatttcc ttaagttgaa tccaactttc tgcaattcat ggggaattaa
+   176881 tgttattaat ttattagctt cttctaccga atcagctcca gtcattaggt catccatata
+   176941 gaaatcattc ctaattattg cactaataac ttggttttta catttatctg caatatctac
+   177001 cagaaccctg gtagccaaat atggtgcaga tgcagttccg taagtgactg tggttaattt
+   177061 atatgtttta attttttctt ttggagaatt tctccataaa atatattgat atttttgatc
+   177121 attattatct attttaattt gtcggtacat cttttcaatg tctgccgaaa caacaaattc
+   177181 ccattttctc catttaataa taatgtcaaa aatatctttt tgaactcgtg gcccaaccca
+   177241 cattatgtcg ttcaaacttt tgttattcgt agtttttgct gaagcatcaa aaactactct
+   177301 caatttggtc gtaaggcttg aatctctaat cactgcctgg tgcggtaaaa aatatttgcc
+   177361 ttcatcactc acttcaatca tgtgtcctaa atccatgtat tcattcatga atttagtgta
+   177421 gtcaacctta agtttttcat ttctttttag ttttttctcc agattcatgt aacgagctat
+   177481 cgcttgtttc tttgaatctc ctaaggtgac atcctccttg aatggaattg acacaatgta
+   177541 tcgcccatct gaatcttttt ttgtcgtttt gataaattta ttttcacaga tttcagactc
+   177601 gatatcatct ttttcttctt cttccacttc ccagtagcga tctaactctt ttatttctat
+   177661 tgttgtggct acaatggttt cttttccttt ggatttttta catccagaaa ctatccaccc
+   177721 gaaatcagtt ttttgcccaa ggagaccgtc tatttttata actccatttt gcagaatgtg
+   177781 agtatatacg tctgctccaa tgattaaatc aatgcgaccc ggtttattaa aatcggggtc
+   177841 ggctaattta aagttcttcc attttttctg atcaacatta atcgtgttga ctggaagtgc
+   177901 cttcataagt tttgggagaa taattgcttc aatttctaaa tttttcggag aatttcttat
+   177961 cgaaataacc gctttgtgct tggagatgca cgttcctgtg gaagatactc cacttatttc
+   178021 agtatgagac cgaaattttt tcaattttag aatctgtgca gactcttctg aaataattgt
+   178081 gctttgagag ccactatcaa tcaatgctct taattgttca aagcctccat acctcgactt
+   178141 tacttgaatc aaggccgtgg ccaacaaggc ttgacctgtt gttctacacg tattcacttt
+   178201 ttctggatta tgacctgcaa agtgaagtaa cgtgtggtga ggtttacgac aagtcgaaca
+   178261 aagctgctcg cttatacatt ttttaccaaa cggatgcctc agacatctta ggcaaatccc
+   178321 attttttctt acccagtcag accgttctgc tggattcatt attttaaatt tatggcattg
+   178381 aattaaataa tgccctggta gtttgcaata tgcacaattg tcactataat ttttattctt
+   178441 gttattatta atcattttct ttacaggttt tacttcctgt gagaatgatg atatagaatt
+   178501 gagcctttgc tctaaaaagt ccatgacatc agaaagtgcc tgtatttctt ttgtcttttt
+   178561 aacatggctt tcatataaat tgagtgattc tttattgaat ttccgaagaa ttatgtgagc
+   178621 gaaaattgca tccacatctt ctggtaattg tgcctttaat tttataatat aaattgactc
+   178681 gttaatcgtg tcaataaatg tctttatttg cttattggat tctaaattta aatttggcat
+   178741 atccataagc ctattcatat gatctgagaa tatgtttctt ttattctcat atcgcttggt
+   178801 caaaaactcc caagtggctt cataattttc tccagagccg agcagtaaat gagtaaccac
+   178861 atttctggct tctcctttta atgctgactt tagataatta aatttgagag aaggactgag
+   178921 atcctctctc acatgtatga gctctgtaaa gagttcatta aaaagatccc attctttgga
+   178981 atcaccaaag aaggtgggaa tctgtatttt aggcagggtt ggtaactcct ccgccttaac
+   179041 aaccgtcgac atttcagctt tatttattgt gccactgagt cgactattaa tggctgtaag
+   179101 aatattttgt ttgtcaaatt caagttcgct aatttcttct tcgaatagcg aactctcaaa
+   179161 atgactattc acctgttcaa tcaggttatc tattttatgc cataaaaatt caattttatt
+   179221 tttccttatt ttaaggaaat ctggactact gtctattagt ttagacgtat cttctagata
+   179281 tactcgaaat tggccaacat tattacgaaa tgcctgctct acttttaaag cgttgttttg
+   179341 tgctataccc tctttttcag ggtgaccacg catttcaagg tttaatgtag ggggagggtt
+   179401 tgatttaggg acagtgtttg atttagggaa agtgttttct accgactcgc ccttaatttt
+   179461 ttcattttta tttttaattt tttctaacac catcattatt aaatcatact gcttcgcgtt
+   179521 aaaatattca tgatcgacaa cgccgatctt taacaaattg ctatgattag caatgaaaca
+   179581 tttttgaagc tcatttaatt ttagaatttc tacatctgtt aatgttggtt ttacttccaa
+   179641 ctttctaatt tccaaaataa tttcggactg cttcttaagg aattgaatag tcttttctga
+   179701 catcattttg aaaatttttt gtaatttctt aatatatata gtacgtgtat atgtatttta
+   179761 tatgtattta tatatatatg tgtgtttgga taaacagaaa attcttgttt tgacttagct
+   179821 gatgtcgttg ttgttgctgc tgctgttgct gctgttgttg ctgctgttgc tactgctgtt
+   179881 gctgcttctg ctgctgctgt tgttgctgct tctgctgctg gtagaggctc ctttgaattt
+   179941 gacttccttc tcttctttaa ggctccgtcg atgtttaaag atgatttttt tttttttttt
+   180001 tttggcacgt tttattattt ttgtagtcca gtcagatttt ttgtttttta gccatttatt
+   180061 tcggcattta tgctcttttg gcatatacac tgcactctat ttatgagctg atttaatgct
+   180121 attagagcat ttataaggca ctgttttcag gcacttttta tttaatttat gctcttatgg
+   180181 catatacact gcactctatt tatgagctga tttaatgcta ttagagcatt tataaggcac
+   180241 ttttgttatg cactttttaa tttcacaatt gctgatgtat ggcctcaagc acgccttacc
+   180301 acaatttata atggtacaca aagcaacctt tagctataga ctataaggtg cttgttttaa
+   180361 aacataaaag attcttttgt atcttttgct ttttattttt atactctgtt ccttaccttt
+   180421 ggtaggggga aacaagagtc acttattttt gctaccttta gctgtaagat gcttaaagga
+   180481 gctggccttt ctctgagttc cttacctttg gtagggggaa actgcagagt cgattaaagg
+   180541 ctcgattgac caaatgtaaa atcccaaata agaagacttt actcgttgag tttttgtaag
+   180601 aaactgattt tatttggaaa tatcttcggt ttaaataggt gacatgagaa tcgcatctta
+   180661 aagtaaatgg cctacgcaga ggcctaagta aatagtcccc gccttatcga ggtcccacgc
+   180721 tcgggcacat ctgcctatct tgagcggcga ggaccttatc tgtggtctcc cactaaggga
+   180781 ctattttagg aggcggggaa cgatctcaag tgactgactc atgtagtgtg cacttaaatt
+   180841 acatgttttt gagcaatgca cccatgtcgc cttagataac aaaatcctaa atataattta
+   180901 tcgctctcga ttcatttaca taagatatga acggagccca aaattgtaag tctttaaata
+   180961 tattcgtgtt catgtgtgaa caataacaat tccaaaatac tatgaaattt acggattttt
+   181021 caacagttct ttgtccaaaa tttgattcga aaaactacag aaaaagaagc tagctatata
+   181081 gtagaattac catcttttgt aatttttctc agtgcaatcg ctcgatcatc gcctgcggtg
+   181141 cgcggtcgcg cttcaagctc catttacgaa aaaaaaagcg gatcgaagcg agggccaaag
+   181201 agatacgacc atcgcattgg cgatactgag gggcaagcag acagtatgtc tgtttgtccg
+   181261 cgaaccgatc cgatccgatc cggaaccgct gccagcgatc gaatccgatt cgaggcgagt
+   181321 caacgtatcc ccaacatccg accagttctc tgcattgcat gccaagaatt tcgactgcat
+   181381 cgcccagcta gagtgggaaa gtttgggtcc caacattgtt gcagtcccat cggcggcctc
+   181441 cgagttatta acataacata atattgactt ggatgggcgt tcggctgcat tgtgtaatat
+   181501 ttaagaaaga caaaaacaaa aatattggaa atttcagttt cctacacctc gcgtctttct
+   181561 ctttccagtt tcactttcgt tgcacgctct gtgggaagtg catgtgtgca tcatgctgtg
+   181621 cgggaatggc ttggaatcga cttggatgag atcacctgca tgggtgacat cgtttaggac
+   181681 cggtaattag tcttcattat gagtggagaa attgagtatg aaacgcaaca taaggaaatg
+   181741 aagatgcttt ataggtttcc tctccatttt attcattttt ctatataaag ctattggcta
+   181801 gctatattat ctgatagaag gaaacctttt cccttaaaaa aaaaaaaaaa acaaacataa
+   181861 catcaattaa taacaatcaa cggccccttc agatcttttt gtaattgatc caattgcgat
+   181921 ttccgtcgat ttacgcatta tggttaccac aattaaagaa taaccgcaaa ttcaaagcaa
+   181981 caaacccata aggcaacaat taaagtcaag attacaattg ctcaaaagga aacctcgacc
+   182041 agactaaact aattaaaact ccgtgggtaa ctcgtaaaag ccaagccaaa caagtcatag
+   182101 tgaagtaaat aaaataaatg atataaaaat ctagtcattc aaccagtggc cacacactaa
+   182161 ggcggaaaat gcaatccttt tactattgtg atggctagga aattacaata gaaaaactgt
+   182221 cagaaattga tttccaaatg ggaaaacaac gcacggggca gccacgagaa aacaagaaat
+   182281 tacaaaacaa tagtttaaag gcaacatttc actttcaatg aatcggtttg cggagatgaa
+   182341 gaagggctag gtggaggctt ttattggtag cttcatcatc tttcgcatct ggattcgtag
+   182401 agacgtactg cgtggatgga tgtggatgct gctccgtggt ggtggcctct tcatttgaaa
+   182461 gctcagcatt tgaaatcgac gtccgatggt ggaaaatcgt gcacatgaat ttcagtaagt
+   182521 aaaagtaaaa atcaccttca acgttttcgg cttcttgtgc gccgcagcag cagcggcagc
+   182581 agcatatgtt tgcatttgat tgtgtttctt tttcttttcc agctgtacga gtatgtattt
+   182641 cttattttta tttttatttt tattgccgca catgagtttg caagttgccg gggactcaaa
+   182701 ctcaactcca ctctttttcg acttttgggc gatagacatc gcacatacac tcgctcgcac
+   182761 aggtcgtgtc tggcaatctg acttcacgtt ggcaatccga cgaaggggca gacggtatta
+   182821 caattcattt agaaatgcat ttaaaaatac ataaaatacc cacagcattg gcggggagtg
+   182881 cgcggtgagt gttggcaaat gaaaatgaaa gcgaaaagaa tgccgaaatg aaatggaaag
+   182941 ttttcgccaa gttttcgcag tgtgatcata acagctgtat gcatcatatg tgtaggaaaa
+   183001 ctggaaagtc ttgctatttt atcaaaagca gtttattact gagattttaa ctttaaaaga
+   183061 acaactgtgt atacatgttt tgaaatcaca acaaatttta tgaaactgaa gttaaagacc
+   183121 cagcactgca atttaatctc acaccaaaac gccgctaaca gatttcccaa cttttcaaaa
+   183181 tatatgcaat tacaagcaat tgcagtgtgg ccagcttatt tagttatgca caggacaatc
+   183241 aagatttccc caacacttgc cgctggttac attcccaatc aacagcaatc atcgttattt
+   183301 atgagagttg cacaacgatg atctgtaaga aaacatggaa aaaaatcgca gacccaaaat
+   183361 aaaggggacc aaaaggacca cctggattga ttgtctttgg gtatccgaaa ttaatcatct
+   183421 gtccgatttc aaggaaagtc cgcgatgaag ggcatttcgg gtatgagggg gctgacagat
+   183481 gaccaacagc aaatgattcg ttgtcgtgcc cttgtgtgtc aaattcatca ttaaatgtca
+   183541 acgcttcggg ggacctcatg cgaaactctc gccagaaaac ctaaaagcca agccaaacca
+   183601 aaccgaacca aaccaatcca agatcgaact gagggggtaa atcggagaag gtgcagccaa
+   183661 aaaggttaac aactcgatgc caattgagta acttaacaag gagagccaca gggcgcttaa
+   183721 acggagcaaa acaaaggcgg aaaacttgac agggctagga taaatagaag gagttcgatt
+   183781 tgaggttact ctactcttga taaatacatc attgttatat tgtaatatat caaataaaac
+   183841 attattggaa agaagtcctt ttaaaacata ttttattata ttgcttatat aatacaattt
+   183901 tgtgtttcgg ttgatacaat tttcctgatt atacattatt acagcatctt tagttcacct
+   183961 gcaaaccatt aagccctcat ctgttctcct aaccttgaca acagtattcc gcttccttgc
+   184021 atgcagtttc cttaaagata taccctctat aaatatatat atatacttta ctttatatat
+   184081 aaaacaggcc cataacaagg cgtcttgcaa cgcgagaaaa cttgcagcag gagaaaaaac
+   184141 gaaataattg cgcgactgca taaattttcg caggccaaaa gacggccaga cagaccgact
+   184201 cacagctact ttcgttttta atttaaaaaa aaaagaagca aaaacaacaa caaaaaatac
+   184261 ggtactgaag gaggaaaagc gctttgtggt tgtgtgtata ttcaagagat cgttgatcgt
+   184321 cgatcgccgt caaatggcaa cgaaccttcg acagaccgca gccaagtttt gcctttgcac
+   184381 ataaatttcg tgtggccatc gcaactggcg gaggggggga aaaagtgagg aaaaccgagg
+   184441 aaaagtgagg gacaaggggc agcatagggt actgccgatg aagatcgatg attgtcatcg
+   184501 tcatcggctg ctttgccgct tgttgccgct gctgctgctg ttgacactaa gtgaaagtga
+   184561 acttgatggg gccacgtcat gatcgccgtc attcatgatc gactccgttt gatcttaggt
+   184621 ttcctttttg gccacaacag ctctgcattt tccccgccat tcatccaatt agaaatgttt
+   184681 gacatttaac attgttccgg gatacgcaat atgcggcggg cagctcgcaa ctgggcggct
+   184741 tagatgaaaa ccaatgcttt tcgcaatttg actggaattt tcactgtatt ttcctcatgt
+   184801 gtgtgtgtga gcccgttgtc cattgccagc aatttgcgtt cgattttggc caagaaaaaa
+   184861 aggtgaaaag tgaaatatga caaggggtgg cggtaagggg agtgagtgta atgtcttatc
+   184921 tgcagctgca attgaatttc tggttgtatt cgaaccctcc tttttccgcc aaaacgatgt
+   184981 tgtattttac aatttggttt cttttttagt tttcggccag ttttgattgc acttggggag
+   185041 attagataag caaatgttaa gttatgcgat cgtaaactaa agctgcgggt tttccttttt
+   185101 tctcctatgc ggttgtgaat ataaaatcgc cgtaaagtat tgaaatgggg actgaacgac
+   185161 ttaaagggga aacttttcga gaaaatgatg cagcctaaat aagccattag tgaattatat
+   185221 attgggctta gtttaagccg ggggtcataa gattaggaaa cttacatttt tattgacatt
+   185281 tcgtgaaaga agtggtaaat ctttatgaaa agtattgtag attaaaaata atctgtgcaa
+   185341 ttaggtcaga atatatttgc ttaacctagc tttaaaaaat ccctgacgag acttttaaat
+   185401 ccaaatgata ttattataaa atttttgata atcacccatt aaatgacctc taacgtatat
+   185461 ttagaaactg gcacaacgtt tcttaaaccc ttaccaaaca agaattttat ctgaaatttt
+   185521 taattatcaa caagcttacg tatgttttca tttttgaccg acatagatta gcattggaaa
+   185581 tggctttatc gctcgggtgt actggttgta tttgcttacc tgcaatgaga taaagaacaa
+   185641 ggtagaaaaa ttagtttggt tggttaagtt atgataaata ctacgttttg tacggaacca
+   185701 cagccaacgt tcattgagtg gtgcattgag gtaaaaacta gcacaaatgt ctttattcga
+   185761 gtcagtaatt caacaaaaca atgaacacac acaaaccaaa aaggttgggt tatcttatat
+   185821 ggttttgggg ccgatccgga ctccgtgcgg ttgttgtaat tacaaaattg gttcggcaag
+   185881 cggtagagag agtttcccgg ttaccccccc gctcaaccca tcagccattc tgatgatttt
+   185941 tatgaatgaa aacagaggcc agaaacggtg ttcggcgttt gtctatttat ggataggcgc
+   186001 aatgctccgc tctacacgtc tattcctata tagactttgg ccggtttcac atttcattaa
+   186061 gtggaaaatc agttgctcaa ttagcgagaa tcgaaaaagt gggacataac agcgtcggcg
+   186121 ttgcaacttc ctttttgtct tttttttcac tcctccacct tattgtcaca ttattgttaa
+   186181 aggcagagaa accagcgaaa caaacaaccg gcaagcgaac caaattcaac agaagaagtc
+   186241 acttaagtgc caacgtaaaa acgtttaaat gccattgcgc atacgccacg ttgagcggtc
+   186301 gaggtaggag aagaggtcgt cagtcgctaa gctatttaac ataatttggt catgccattg
+   186361 cattgcaagc gttattactt ttacggcaag gtctgcatgg ctagcaaagc tttgagctca
+   186421 cttgtaaaag attttccagg tttgcatttg agatattgga aatcttcagt gattaaagtg
+   186481 ggtggtgata aaaagtaaat tttaaccgcc acataaataa gaaaacttta gtttaagtgt
+   186541 gcatattgta attaaaattt taagaatcat ctcaataaaa taaatagtaa gacaacatgt
+   186601 tttgtaggca taactttaaa agagttaagc aacaactttt tctttaaagt taattttaac
+   186661 tgtacaatgc aacacgaaaa acaaagtgtt atagcttctt tagtttggct agggcattcc
+   186721 tcattctaag tgggaattta ctaccctttt tcgctaggaa tcccaattgg gacttcaggc
+   186781 ctcaattcac ttcgctatgt gacacagata ggtttgttag gagtatgtct cattgttgcc
+   186841 tgggcagtta tgttttcttt tcggaaattt tgtgtgtcag cggggcaaag tgtaataaaa
+   186901 cgcaactgct gcagtttgtg ttacttttat ggacttttgg ttttcggctt gagtcgatgc
+   186961 agtaaacccc acgacgacgt ccagttacat aaagacctaa cttattttgg tatgggtatt
+   187021 ttttttttgg tgtcgaatgc atcaggccga ggagaaataa aattttggaa tgtgggcgaa
+   187081 gtggacttat actttcgctg ttatttacga tccaatacta atttttgaat cttcgatgcc
+   187141 ataaagatta gagggagacg gacgccaaag acttgggagc tggtcaagta ccgctagaat
+   187201 gaagtttgcg cgcgccaaaa caattccaaa ccgacaagaa gaatcgcaaa aaaaggagcc
+   187261 aagacgatgg ggccaggcaa caaattcgca tgacttccgt cattgacata caacatgaca
+   187321 gttgatttct gtatttgata tcccagtgaa tttaccactg tctcgtgtgt gagtgtgtct
+   187381 cggcaaaaga ctccaaaaaa aaaaaaaaaa aaaccaaaca aagaaccaag gaatctctgg
+   187441 aggaaaatgt gtgatgagtg ccagccagtc cgcaagtcag aagacccgga gtcatcttct
+   187501 gactacgtgc aacgttaaaa gaacatttcg ctgagccagc agcggcacca gaatcttgga
+   187561 gactgggact ccactcccgg catctcgtct cgggcactct tcttcgggaa ccgttaaatg
+   187621 gtacaatatt gaaatttatt tttatgctga tgctgatgct gatgatgatg atcatttggc
+   187681 gttggaagaa ccggttgcga tggccctgct cctggccatg ccaatgccag tgcggaattg
+   187741 cctggttcca aggaacgctg tttttgccgt gtcccaatgc caattggcca aatgtcaaag
+   187801 caacgcaaat tcatcatcaa gccagtgaca gcacagacga agtggcagaa aaatactgga
+   187861 tgaactttaa atgtgatggg aacataggaa ataaggaatg atgggaattt ttggcaatga
+   187921 ctgggcgttg acttcaatgg aattattatt gaaaaatggg aaattttata tttatttaaa
+   187981 tatatagtaa taaaagagca ttcatcactc aaaagtaaat gtacatttta atatacatcg
+   188041 tttatttttg gtgtattttt aatagttgtg tttttggtat ggtaggaata attgcatttc
+   188101 tatgaattct taattttaag aatatggtaa atatatcagt ctatgtttaa atatatgctg
+   188161 cagttccatt tcacagttgt ttttttccta tcaagtttcc ttggccagct cagcaagttc
+   188221 agtgaaaata tgcacattag ccccgtccca agtgcaatgt gtcccacaac ccgcacagtg
+   188281 gctgaaaaac attaccaatc gaggagaatt ggttgttggt gcgctgttgg ctggcactgg
+   188341 cactgcgtgc tacttatcgc cgtcacttgg ccaagaagtc gaggtggaag aagaaaattc
+   188401 aagtataaat taccaaacca agcaaagaac ctgctaccaa gacttatgac cgtatttgat
+   188461 attgcctaat ggtctgcagg ctgctttgcc ggttgctttt tctattaaat gtttttctct
+   188521 ttctgcggcg gtttaaggag agaagcagca gcaacttggc tacttttcaa ctagaagtag
+   188581 gagttgttga ctggaattgc tcaatttgtg tgtgcagcac agcagcagtc gcacaaacaa
+   188641 agccaaacaa gccaagttag atgatgctga cgtacattcg aagctcgtcc gcctcacaga
+   188701 ccggcggcgt cttaccaatc gaagtctctt ggctcgttgg tgacggcatt tccagccaac
+   188761 attgttgtcc caaacaaacg gcaacaccaa cgtagcgcgt aagcaacaac aaccacggcc
+   188821 acaacttggc aattgcaagt tgtataattt atttattttt tctatttctc ttctattgta
+   188881 cgtacacttg cttgaaaatc gggttctgcc atctaacaat gcagtttgct tcgattaaaa
+   188941 atagggacga tttaaaaaaa ttatacaaaa tcttattaag atacactata atgtcgacgg
+   189001 tttcttaaag acaatttcaa cttgtgtatt attgttaata tgtacatcac gataaacgta
+   189061 acgcgttctt tcttttaata actaattctt tttttttatg ctgcttaaat tgtaggatta
+   189121 atcagattac agtatacaaa tttgcttgag atatatttgc tgtaagtgta atcgatttcc
+   189181 gtcaaactag tgtaattagt cagccgccta tcgattccta ctcgattcag caattttttt
+   189241 cgttcgacgg attttcaatt ttaaattgta caaaaaaaaa aacgctgggg gctgcctgag
+   189301 caaagtgttg tagttgcgct gccgtagatc gattcaatga atttaaaatg catttaactt
+   189361 gaaatttagg tcacaagtag tggctgctgt tgaggcagtt gcagttgcac cgcagtagat
+   189421 gttgcagcag caaccagtaa agctgctgca gtatccactg cacgttcatc aagtcagaaa
+   189481 tattctttgc ctggcatttg gaagacaact tgcgagttag aattgttttt gctagcagtt
+   189541 ttgcagctag atgactggtt ttagttgaac attgctttgg gtagtgccgg aaagttggtt
+   189601 aagtgcggga atgccctatg ccatctattc cacactcccc acttcgattc taattacgct
+   189661 cttagctcgg tcaagctttt agcccgattc tacggacaag ttaactcagg tggaattact
+   189721 taaaacaagt ttacacttga ctacttgttg ctattgcaat ttgtggtttg taaatattta
+   189781 tgagcttaga ctagccagtg aatacaacat gattggcatc ataaaatacc atggaaatct
+   189841 aaggcaagct aatatttcgc tgatttacca gttgaaaatc atctaattag acgagtttcg
+   189901 aattctattt gtcttggcca aacttgaatg taatatatca aagctaatta gtcgacatct
+   189961 ggctactgtt gatgctgctg catttaccac ggcttctggc aactttcaca ctttccattt
+   190021 tcacactcgg aaattaatca aaagaaaaat taagcacaga cgacgcttat ttatttagtt
+   190081 tttttttcgt gttgctttta gttggttttg tctttgattc tttggaatgc tttaaatgca
+   190141 tttgaaaacc ataacaaata tttttattgt aaattgtttt gggccttaaa tggggtgagc
+   190201 gccaagcagc aacaagagta atacaatagg cattgtggaa aattcaagtg gaaaataaaa
+   190261 caaaagccat tgaaaccaaa taatacaaaa taaaagggat aggattttac taccaaatga
+   190321 attagagatg agctaaatga aaaatgatca gatatgaaga aattgtatgt tagcaaaaat
+   190381 ctattttccc ctaaattctc ctaaatctaa actaatatat gagaagctta ttacaacatg
+   190441 taaatttttg aaccatttta aattgcattt aaatttattt ttttaaattt tttaacacgt
+   190501 gtacaacact ttcatttgtt cacctagtgt attgggatct ggcattttga tctggaatcc
+   190561 tagaaataag ttggtcagtt gtgggttgtt gcatagatca ctgcacacat gcatcggtgg
+   190621 ggctgtcccc gccacctttt atcccattct caccgccaaa cattagacat cagctaactg
+   190681 cggttttcac cagaactctt tgttcaatgg aaacaatgct ttgaactgtt accgagtcca
+   190741 acgaaaaaaa aagaggtcgt aaatgggttg agattcgggg gcgttgtcgt gtggacaaat
+   190801 gtgttttgtg tcgaatccca aaaggccaac tgggtcagtc acactatcac aaccaaaaaa
+   190861 aaaaaaaaaa aaaccgaaaa aatgcagaat ggtgttcaca taaatgaaaa gtattccaga
+   190921 aagaagcaga agaaaaactt tctcgcactc gaatctaatg acttcagatg cctggactct
+   190981 cgaggaattc taaaccgtca gagcaggaag taaagatgca ggaatcaaac ggagacaaga
+   191041 tcacttttct ggactggcat tatcaaaaaa gaaaacaaca aagaatgggg gactggcaat
+   191101 ggactgcatc gacttgtcaa tcaagcgatt gacttttgcg caaaatccgc agacaatctg
+   191161 gatgccaatg gagtagcgaa gtccacgaag acgactacta cgatgatgat gatgatgata
+   191221 tctaagctgg gctcaggctc aagctcagca ctccacagac catcgatgga tcgcagtata
+   191281 cagtatccag tgtatcccaa ctaacagttg aagagcttag cgtagttgtg gcattttagc
+   191341 ttggccttaa gcgaaaaaat tattcctttg cctaatttgg tttaaatgga aattccgcac
+   191401 accatattgt acgaaggact ttaaatgact gaacgacgac gagggccaag aacccaatgg
+   191461 cagggcaatg ccaaagactt ctcggattcg tgtcttgttc taagtgaact atttttggta
+   191521 tttctcctcg tagcggaaag ttggcgattt ggtttgggct ccccgaccga agggccaaat
+   191581 tgtactggag atgcgtccaa tcactgcgaa atgatttacg atcacgataa agagattagt
+   191641 gcggccagag ttcgctcaac cgaaactcaa agaaaaagcg aaataaatga gtttggcaat
+   191701 caatttaaat caagagcgaa tgccgtgcag attggaaaag tcagctacaa cctattttga
+   191761 acttgtaaaa tattctttgg tatattttgc atattaaata aatcggttta catgcaaaac
+   191821 tgcaccgata acgcatataa agtagcgaaa tgattttata aactgtatgc cgatataaag
+   191881 taaatagttt atttaaaaac ttttcaacat tgtccatatg tgtttaaatt tatataactt
+   191941 caatgcattg agtttgtaat cctttctctt ggaactcatc ctccgactta cgataaattc
+   192001 gctgcaataa gccggattaa atattgcagg tcgagtcaaa gaccaataaa aatgcattta
+   192061 atcccccaac catataattt ataacattcg ccgagccaaa ggcaaaaggc aaacaaaagt
+   192121 aaaagtacag aggtccccgc cgcagagcct ataaataccg catacacata ctatatggcg
+   192181 catatgggta tggttgcggc ctggaaaacc cgaagagatt gagatacctt cgcgtatata
+   192241 aactcgtata tatttaggga cacaacacac cgattgcagc gacgcatgtg gcatggtggc
+   192301 atggggcact ggcacatttg aactttaaca cacgaccgag gcacaataaa aatttcaagt
+   192361 ttgtccaaga gacatatggc caaagatccg ctgatccgcg gggtcaacgt tataaaatcg
+   192421 caagcggcag cagcagccac ggtaacaaca acaaaaaaaa aaaggcagca acaagcaaat
+   192481 aatttatgac caacttgatg catggtggcc atttaagtta tggactgact tctccgaagg
+   192541 gaaccgtcga cggggtgtga actgcccgat cagcaattgc aattgcaatt gcaccattta
+   192601 taccggggta tatatgtatc tatatatata tgcgcacata tgaaaacaga tgacgcatcg
+   192661 tctgcgtctg caatgaatgc actttttctg caatttgact tttccgaaga caaagcagac
+   192721 gagctgtctt ctcgtctgcg gatcgcagtt tggggatcgg aaatcgggaa tggtgggggg
+   192781 tttgtttttg ggctcttttg ttttcagtta gttgtcaaaa gtcagtcgga ggctgaaatg
+   192841 ggtcccaaac tgacagacgc agctgactgg gcccctcttc tttcccagct gaaaacgaaa
+   192901 aaagaaaaat aaactagtca gccttccaag tgggtgtgcc agactttact tagacgcact
+   192961 caaaatttcg taagacaaca atgctgcgag aaaaatatta caaaaacttt tcaacgagat
+   193021 actcgtctaa agtttgcatg cagaaagtta aaataatatt gtttgaaaag aaagcatact
+   193081 gtgaaaaaaa tgaatattaa atattgaagt acagcacttt aattgataag aggaaactga
+   193141 actactaaac tggtaaagtc cgaacatgaa ccgagcattt taaattaact caaataagtt
+   193201 tccgccactg atcaagcgaa cttaaatatg attaatggct cgatgtccag gcgatttagt
+   193261 aaatgactcg tcatccgcct ctctgcagcc tcattttgca tctttaattt tgggttaatt
+   193321 agcatatacg cataaatgca tttcaccgac caaaaatctg aaatctgtgt catccgaaat
+   193381 gatggggcta aaaaaatgtt ggcaccagaa tcgtgcgcaa gaaatttacc caactgcaaa
+   193441 atgtctgccc aataaaaata aacagctcgt cacacgaaaa accaaacaaa acgcaaaaga
+   193501 cgaggaaaaa aatgcacatt gatgttaatt attttggcag ttgaatgcag cagccaggca
+   193561 gcaaggcata atgtttattt ttacaattct ccttcttatt gtgaaatggg cgcaaagtgc
+   193621 atattaaaat gtcagccggc agacaaagat ggagaggaca aactgaaagc ggcaggttaa
+   193681 gaaaataata ataaaaatat acaaaataag atgcgcaact tttcaaaatt acgaacagcc
+   193741 agcgttattt ttcgcaaaat ttgccaaatt cggcagaaac aaggcaacag ccaaagaatt
+   193801 aaacgacaca aaaaaaaccg gagaacaaat aaataaacaa attcagtgct gcgttgagtg
+   193861 cgctcactca gaaaacagtt gattcagcag aaaaaaaata tgaaccaagc aaaatcgaaa
+   193921 cggtttttga tgcaagatat caagttgtgt ctggaggtaa cctcctcctc ggctgccgaa
+   193981 gcccaaagct ggccaagtca atgcaatggc caatgggttg ggcactggcc aaagtagacc
+   194041 cattgtcgac attcatctgt ccgactgtct ttatggtttc cgacgacggc ctccgagcgg
+   194101 cccaattaac ccttatgtct ggtggaaaca tcctttaaat ttaattccat ttttgagcag
+   194161 gcctcataac atgacaaatg aagtggaaca ttaagattaa tcgctgtagt cagcaaatta
+   194221 tattacaaat ataaactcaa tgtcatgcaa aatgcttaat gattaatgca ataaaacaag
+   194281 agctatttat cccctcataa ataatgtatt atgtacgatt tttaataatt ttaaaagctt
+   194341 aagtttcata tttcactaac tcacttcccc ttaacacaca aattcttaag catcttagaa
+   194401 ggtcgcttga cccatatgca ttttaattaa aaatggctga aaatgcattt cggccaacaa
+   194461 gttctcgccc agtactgggt taaggcattc cttttggcta gtcaaaagac aatccgacaa
+   194521 tgtcgtaact tggcgctaag ttaagcggct atgaaagcca gttactccac ctcctgatcg
+   194581 tgtcgctttt gtggccactt ggcccggtgg cagttccagt ttttagctgt tttgtttttt
+   194641 ttttcttact tttctgtaag gaaaaataaa acagaaaaaa aaaaacagag agccagagga
+   194701 actgtagtct ccgctcatca attgacaggt aaccgctacg ccttggacat tgatatgatt
+   194761 agcgactttt aatttaacgc ttctctgaag cgcgttagag cagcaagacc tcggatgcat
+   194821 ttgtgcctat gactcctccg ggccagatca gatctgtagc ttgagtttgc atttccgcaa
+   194881 ccagatgcaa cacaaagtaa tcgagtatac ggagtcgcgt aaccgaaact ctggccagtt
+   194941 cctccacgag tccagccagg ctcgactgga gtcgtaattt gtcagctttt agtaataact
+   195001 ttggacattt accgtaagcg acagcgatcg cgataagttt tcaacctgtt gtcttggggc
+   195061 agccggagct gagcagagca gagctctttg ctctcctgga gcagataaaa gaccagatag
+   195121 ccacgatccg ccattcatcc tcgatccccg gattgtccac cagatacttt ttcgagtccc
+   195181 gaactgcgcc taattgattt tcgcatgcga cgtgacggtc tgacggtctc cttgcctgcc
+   195241 tttcggtatt ttcgcttggg attttcttat ttggtggcat aattattgtc attcataaat
+   195301 tgtgggctcg ttatctatag gccagtgagt gagtggagtg gcgcggactg cagtgaagta
+   195361 aagtgaagtg gtttcgtttg ggagataaat gtgcaaatga tttgccaact gcgatttatg
+   195421 ccacaacggc tttttctcca caagcctctt tctctcgctt tttatgacta ccataaaaat
+   195481 gttccacgaa gcaattcctc ctccactcac caaaactaaa tgacaagatt ttcgaattcg
+   195541 ctggcggacc aactccaccc gttgatatat atttgataat ttcataaatg ttcagcagtc
+   195601 aaaggaactg gcttcttagt catgaattcg tgcggttact attactaagc aaagaacata
+   195661 atgaaatgta taccattaac aatttgtaag cttcttgatc gatacaatat tgaccaatat
+   195721 cggttcaact ttgtatataa tgtgttgtaa cacaaatgga ataagcctga ctgacatttt
+   195781 cagcaatcga aagtgcagaa gattattctg taatcaattt ccctttcttt cttgctgtca
+   195841 gcctataatt tgttccctac gaaatgatta tagcatttgc tgctgtgcga aaaaaaaaca
+   195901 accagtttgt attactcccc acttttgaat gtcggttatc gctgaaagct catcgcgatt
+   195961 acacaattcg ctgctcagta caaaaaaccg aaattgcaat tgcagcagca actgaaaaac
+   196021 tccagactcc gagggttaac attttgtaaa tatttgctgc tgcatgtgag gctttcttat
+   196081 ttattgagtt cgcttttttt gtccgctctg gccgtcgttt tgtcttgctg cattttgtat
+   196141 tttggccgca actcttgtta atttcaatgt gaaaaagaag tcaagtgagg tcgaatctgg
+   196201 gtatgaacat ttagtcgttg gggctaataa caacaacctc gaaaagagtc tgcgacccat
+   196261 tgaggtgggt gagatgttgt ggtttttttc aactcgtttt ttcttcttgg ctgaaaactt
+   196321 ttgaaattgt tttctgcttg tttttgcggc ttgtaattgt tgttattata caatttgcaa
+   196381 aaagtagcaa aaaatgccaa taacaatgta aaccgctaag caacaataaa attttacaag
+   196441 ctccaccagt ggcaagtttt ttgttttttt tttttttttt ggtgcacttc ttactgcata
+   196501 tttgacattt cacttgtgca gctggtaaag ttgcaagttg caactgagga gtatctagag
+   196561 ttttatttat ttactttttt atgtgaattt caatgatatt ttgtctagat ttttcatggc
+   196621 aaaatatttc agatttttgc gatagattaa acgaataaaa attgcaacaa taatagcgac
+   196681 agactcgagg gtctttgttt tgttgttgtt gttgtagttc cctgatttat tgattttcca
+   196741 agcagataat gtgtgatttt tctcatagac tcttgcaatt gcagttgttg ttgttattgt
+   196801 tattgtttcc taatgtgaga aacttgtgtg tcaagagagc tttacctatt gttgcataat
+   196861 cgggcgccaa aaaagcccgc tgaagccccc accgaaaccc aactgaatcc aaccgaaccg
+   196921 aacccagcca acctacgacg atgtcggtgc aacttatatc ttatgtgaac tccaaaaggc
+   196981 agtcttggca tgttgaaaag aagaaaaatg gaatgaaaag atacagaaac gttttaaaca
+   197041 ggctaatatg acaaaatttt cagacagctt gtctactgag ttaaatagtg gggggcccta
+   197101 cagtcgaggg tggtctgaat attttgtgat aatgtgttac caatgtaaat taaaatgcta
+   197161 aattcaaatc aggtatttag atgtttataa atacaagttt tggttaagta tcttgattaa
+   197221 aacaaaccgt tcaataaaag aaaataagta caaattgaat ccgatacagc caactatcgt
+   197281 caaatctatg attttctttt cgtccgaccg ccttgagtga tatatatttc atgacatata
+   197341 caagccttaa tttaaaatat tacttaataa ttaacttttc aaatgcacgt acatgcaaac
+   197401 tttttaaacg cccctcgcca agggggagta acctaagctg ttggaaacta tcttacccag
+   197461 cctgcagcca agtaacttaa gcatcgccaa aagccaccat aaaaggtaat ttgcataatt
+   197521 tctaattaaa atatttttaa actcgcaaga caaagcggca aagacgaaga gaaggctggc
+   197581 aaagttccat tcggtttcag gttttttttt cctgttcgga aaaactctat tgggcgtcta
+   197641 gataaaaatt taaattaatt taaatttgat ttttatagcc attcacctgc aggagaagaa
+   197701 aacgagccga caaaagagtg tccaaaaaaa aaaaaaagtt aaagtcaatc tatgcattcg
+   197761 taattaatta aatcgctttg agcctgattt tttcgctttt cgatttcaaa ttagaggagt
+   197821 ggcatttaaa ggggcggagg ggaaagactt ttctttttgt caattatttg aagtgggaac
+   197881 gtgctgtggc aatgaagtga aagttgtttc aatatcgaat caaacagtct tcgctggaag
+   197941 tgaaaaaatg aagtaatttt tcagttaaaa atagaaagaa aataataata agaaaataca
+   198001 ttacagagtt atgggtttct ctatacattt gaaatataaa aaacttgtta cagtttagtt
+   198061 gggtaaaaaa aaatgccggg tttacaatga aaaataataa caaaagctgc ggcccgatca
+   198121 tcatcacttc acctgttttt cgtttatttc tcattttcat tttcttattt gttcaatgct
+   198181 ataatttcga tttttttttt tttaagcaga tcccattgat aatgatcttg cgccagtcac
+   198241 tcatccgaac ttgactcgtc agccggtttt cgtagtgttt tccattgtgt gatcgacttt
+   198301 gtccgtttgt ccgtcgctca gttagtcagt cagtcaagtc agaccgtcag tttgtcactc
+   198361 tgtcagtctg tcaggtgtat tgaggcattc gaaaattgta tcgaattata gacccaaacg
+   198421 tattgtaatc aacttgactg gacttgaaag caaagatcat aaaggaaaat atactatcga
+   198481 atgtgtacgg tatcaagaaa aactaattag atatgcaaat ctggataatc tgtgggattt
+   198541 tgatttgaaa tgtgtaacat ctcttttagt ttatcttaat gttcgtgttc atactacagg
+   198601 tgagtgatgt taacatacac aaagttacca attacttaat aaaactcgaa tttaatatag
+   198661 tttaaggctt tcatctctga catctcaatg aacctgtaaa atgttttgta aaaactgacc
+   198721 taattagggt atttaacctt cgtcggtaaa acgtttcaac cttttttgct gcccggctcg
+   198781 ccataataat aagcgcccct tttttttttg tttgcaatgc aacacaattt agtacttcta
+   198841 cctctccgcc tgaaccccaa tctctggctc cgtttataaa tcatttctcc ggtggatcag
+   198901 caattacaaa cacatggcag atgtttaaaa ctgggcgagc gagagagaaa acccctgatt
+   198961 ttgattacaa tcgtcggata atgggtagat acgtacagat gtacggctac agacatataa
+   199021 tttaatagaa ttgttgtaaa aatgtttacc cagtgcggtt tgaggttttc gtttcgattg
+   199081 catgagttgc tttgctgcga atgcgatgta agttggcccg atatatagca ttcagttgtc
+   199141 agtatactca ggtttttatt acggctaatg ataatgctat atggcatgag tatgaaattc
+   199201 cagatacgca tacgacgaag gttgcttcga ctgaggaaaa gtcctacacg atttgcatgt
+   199261 ttaacggcgg tttcgccgat ccaaatatgg ccaaaaacgt atctgccgac aatggtgaat
+   199321 gttgtttaac atcctacgat tatttattat tgccacttga gacaataaag tcaataattc
+   199381 aattgcacat tgtatattgt atgctgtata ttgcacacac gtgaaagtgc agcggcagaa
+   199441 acattttggc gaaaggctcg gccacaaaaa tctaattagt aatgcattag tgagtccaaa
+   199501 catggtaaat cgggaagtgg aaaattgtaa atggtaactg tatgccttat tattttccta
+   199561 ttttcctggc catgtttgtg ccgcattgaa atcaaaagta aaagcttaaa ttccccgaaa
+   199621 aaaaaacgaa actcaatctc aaagtttcaa gccgcattta aaagagatca ttgatcagtg
+   199681 ggtgttcctc cataaatggg tcatggctat atccctcctc ctacccttta aagccatgtc
+   199741 tcttaacaat aattgtacag ttgggggctc ctcattacac ccgccatgac acactttcga
+   199801 tcgtgccaag agatcgtgaa atgggagcac acagcttttg tttgataaaa ctcgaattga
+   199861 aaacgaaaag aaatgattga ttgatacaat gataattgaa aaagagagta gccagatctg
+   199921 tgatagcgga aaagaggctg aacgaagtga attaatgaga tctaaatacg acattgatgt
+   199981 gatgttctaa ataattagtt ttgatgggaa agccagctga agcgaatgca tgaaaatggt
+   200041 acaaagaaag tgcatttgat tcgttgtttt tgtttatctc ggaaagggag tagggattaa
+   200101 gctagatgga tttataatcc aatttctcca gtcgtaatct agtaatgaca tcattccaaa
+   200161 ccactaatat gatggcaacc atataatagt tcccatttca attggactag attcgatttt
+   200221 tctagattac gtttccttgc ccctctgaga acttctctat cgcgctggac gtgtctgcat
+   200281 aatgacaagc cacaaaagtg aaaaaaaaaa agtaggaacc aaaatcatta caatgcccaa
+   200341 gacttgggct ttcgaaaatg ccaatggaga cgcaccgggc caggcggtcg ccaacacaat
+   200401 gcatttgatt gaataatgaa gcccaagtaa tggcaaagcc aaacaacatc gacagagaga
+   200461 cacacacata ccaaataaaa cccgattcca gttgaatcca aggcaaaacg cactcgtaag
+   200521 cttggcaaag gcaaacacac gggcaaaagt cgaatgataa acaaatgcag atataaagga
+   200581 aaataaagag accaaaagaa aacattcgaa tacaaaatag agtagttaca attttgagga
+   200641 aattacataa cattaggcat gagcagttta aagcagccgc gcgattgttg ggcaaggcaa
+   200701 aagtcatcgc gcgaattaat gaagtggaaa ataaacaaat tcgtataata cgtacggcaa
+   200761 gcgggggaaa aactcgagta aggcagccta gttttccgct gcttgccgac aaggctggcg
+   200821 gaaaatgaaa ttagcttcgt aaacatacag agacaaaagg cgatgacaat tttccaaaaa
+   200881 ggttacataa ctctacttct tctgtcatta gggcttccga cagattattc cattgttttt
+   200941 caccgtggtc actggccccc taatgtggtt ttcggtgggt ggagatgggg ggcaaggttc
+   201001 gcctggttgt tctggcaaat tatctaacaa attgccaatg ttacacttgt taactcgttt
+   201061 cgatttgctg agcaagaaag tgaactccag cgctgtaaca aagacctttc gtgtactttt
+   201121 tgcggtaaac aaaaagactt ttttttcgtt gaaaaatggt gtgaaattaa cattgggatt
+   201181 tgaatgagaa atgagcattt gagcatttct ttaataagca aataaaatta acataagtag
+   201241 tagtaaaaac gtttagagac catttaaaaa tatatattat aatataataa gtatttagtt
+   201301 tatttaggtg attcttcagg cttagcttag aattgttaga tgaaattgtg ttggaattta
+   201361 aaagatttga tataataaat agagtctttt ataatattaa agcttcaagt tgggcgacgt
+   201421 cttactgatc attcagaacg cgcaatggag tgaggcattt agaaatgagg tagactcttc
+   201481 agttctcagc tttcttcaaa cgcttttatt gcttaacgct ttgccacggc gattctaact
+   201541 gtcatagtag acaaaccgaa acgggacgga aataggtaaa gaaattgagg gaagagtgct
+   201601 tggcgggttc tcaatgagtt ttacagttaa atgtgtttat tttttccatt ataatttctt
+   201661 aaaaatacga aaatgggaat gggtcagaaa ctcccgctcc attttgacca cgtgtgctgc
+   201721 tggccgggga gaagcgaagg cccctgactc caaaaatcac ttgctacatg atgcccctcc
+   201781 tttcgacacg ttttactgat cgtgccaata aatcaccgcc ttttgtcgcc atctctttcg
+   201841 catttaggct agccgtccct ctctgttttc gcctgctgcc agccaaagcc aataaatcaa
+   201901 attgcaaccg tcaattgctg ttaatttcct tggcgttgca tgaatgtttt gaatttactt
+   201961 tttatctttt tcatttcggt tggatatata tattcggaca tacattttcc ccacttttat
+   202021 atgtgtcaat atataaaatg cataaattat ttattgctaa aaagcaaaat gtcataagtg
+   202081 tctgtccatg tggacactgt gggaaccttt cagttgatat tttctgcatg aattgggttt
+   202141 gggtttgttt ttgttttttt tttgttggga ccctctcttt ctgcggcttg tgttgaaata
+   202201 tttcgtcgtt ttgttgtgtc aattggcggt ctatagtcca tagcagatag atcgtttggt
+   202261 cactggaaac cccaaaagag gccagctggg cggttttgtt agtcatgagg tccaggaccc
+   202321 ggagagcttg ttgtaaattt tgatgtattg attagagtag gttcaaaaac ccacttccac
+   202381 cgattgacca gggggtgaga ggccagactc tggcaaagac ataaattcaa tagtggccta
+   202441 atgcagtgaa cggatataaa aaccatttat tctcgactga ttttattgtg cataacatgt
+   202501 cactttgtgt caacaatttt tcgttctttt gttcagctgc aagagataca ttttgtagaa
+   202561 ccactaatac atataattta ttttcttccc aaatcaacac tttcttctca ctaaaaaaaa
+   202621 aataggaaat tccccacagt tctattctta gagcagctcg tattaaaatt aattgcttca
+   202681 catgtacggg aaaaacctgt tagcgccgct ctggccggct ttctgagccc tgaaacggca
+   202741 acaaacaaaa aaggtcagca ctttatattg acccaaaaaa aggcaacaaa cgaaaaacga
+   202801 aaaaccaaaa caaaaaacac aaacaacttg aacgccaacg ccaagaaaag tcaagcattt
+   202861 ctatgtacat acttatatac aagttttata gcgacaacaa acttcgagat tccgttgata
+   202921 attctcagat gtctgtggaa aggtcattta atagactagg gatcagctga taagaacacg
+   202981 atgtcgtcgt gggtttttag aggtctgccc ggccatttga attcaattga attaatttgg
+   203041 attttatggt tcatttagat gggccatttt cgggcgctaa ttgaagagcc agatccgata
+   203101 agctcgtccg tccagatgtg ctgggaaaat aaatagagca cacaagtgac ttcaatttga
+   203161 agagggaaac aagaaattac tttcttaagt gtcaacacag ataacggggc tttcgtagag
+   203221 gatcaattac aacctgtcaa tcagtgatta tgtttgaaat gtttggttga tgcaagttcg
+   203281 tgtgcctgaa agttcaaggt tttgataagc atgataaagt gggtaatttc aagagtaatg
+   203341 accattgtaa agaaattttc atgtgatcga attcccttac tgttaatttt cctaaccctt
+   203401 aattttctga tcgtaaagca agcactaaat tcaatatttt ggccaacacg caacagaaag
+   203461 caatcaagac gaattaccag aaaatgcacg tttataaaaa cacctttcaa gtgcagaaca
+   203521 caatctggtc ttccatgttc ttcggagcaa tttttcaaga atttcgcata ttttggcatg
+   203581 agaaccgaag taggaagaaa ccttatgaac gaacatcatc acctgggagt aatcataaaa
+   203641 atttgcataa ggaaaataaa gcctgtgtgt aataacagta aaaatgcctg aaataagaaa
+   203701 aacacaagac ccaaagaaaa catgaaaaaa aagcgagtgg agtacataat aataatataa
+   203761 ttttctataa tagaactcaa tgtgccgagt gaaagagatg gatagagaac aaacgagcac
+   203821 agcagcaact gattgaggga acacaacaat aataataaca acatcagaac acacaagcac
+   203881 acgtaacaca cacacacaca cacacgggga ccagttcaac aacctttttc tctgaaggag
+   203941 cctctgaaga aaatcagcaa cagagagaca gcacacagaa aaaacaagtt gaataataat
+   204001 tattaacaaa atttgtttcg tataaaaata ttacaataaa tactgcgagc actaataact
+   204061 aattctagac gaagatttga tttcatatgt taataaaata ctacacttaa atattttttt
+   204121 aatttttttc aagtaaatct ttatgatcac tgaccagtta ctatttattc ttctaactgt
+   204181 tgttattaat ttaattgaat taatttaaat ttgttattag ccccatcttt tgtatttctg
+   204241 tgtagaagaa gagagggaga gagtaagatc aattgggtgg gggagacccg aatggggcga
+   204301 ttacttcact cacaccacgc attcggggca tccacatcca catcgatggt gatcatcggg
+   204361 aagaatctca agaaactcaa gaagtgcagg aagaagaaga aaaatgatga gaagactggg
+   204421 cgaaaattgc ccacaaaagg gcgccaacag aaaccacaat aacaacaacg aaacggtacc
+   204481 ttgaagagaa tcttggaaat ttcacaaatc aaatcaatat aattgttttt cccttactca
+   204541 ctttccagct ggatattgat atatgaagag gatagatggg ttagatgggc tacaattacc
+   204601 tagtaaacat gagaaaaaag attttcgaac ggaagttgat agctgcattg gagttgtttg
+   204661 ttcttttatc gatttttttt ttttatattt tagcctttat ttttaccttc agtccgccgc
+   204721 gtgcgtactt tctatttgca aagtgcggaa gccgtgctca ctatcagata cacagatatt
+   204781 aactcttgga ttgccttgcc ctaggttgtc agtcatttgc atttgacgat gattgcgcca
+   204841 tgaaccgttc catgagtact cattcacact cattcgcact cacttcactt cagtcacatt
+   204901 tccaatgact ttttgtccgc cgactggcgc gtgcaagaaa tcgaaaatga ctgacaaaat
+   204961 atatattcat attcaaatca gttgtccaac tgctccatct cggctgacaa acgggctgca
+   205021 cctagacctt gtcaacgggt tcggtttgaa aaagagaccg atcgcaaatt caaaagccaa
+   205081 acttctccaa tgcgcggttc aaattgaacg caaaaaaaca atgagaattg atttgaaaaa
+   205141 aaaaaatcaa ttcaagctac cattgcgcaa aacttattca acgtgcttta agattgattt
+   205201 taaaaggcta agtttaatga tttaggaata aattaaaaaa aaaaatgtat gtacttagga
+   205261 taaaatgcgt acataatcct attttatgct tatactagca ctaataaata attaatattt
+   205321 tattttgtac gaaatgaatt tgtctagtat tctaagtttc tcgtttacat ttacaatttc
+   205381 tagtcattaa tttaggttta acccttttca gtgtatttca agtttccatt taataaaagt
+   205441 gctacttttt catcttgtta gtggattttt attttgttgt ttttgctact tctcctattg
+   205501 ctcgaacaag ccactcaggc cataatctaa acgaattgaa tggaaataaa gtgggccaca
+   205561 cacacacaca ctcacgtcac actgccacat ttgtcaactt aaatgaaaat gaatgaaacg
+   205621 caatgaaatg tatcgagtgt gtgagtatat ttgtacaacc gcatcaagtc agaggccaag
+   205681 cccacctaac gcctcaaaag tgagttggaa ttggaatttc tgtgtcagac tcaacctcaa
+   205741 ttaggtctgt cattgtcgag ccaagaagtg atcatgttaa atgcaaaata atttgattaa
+   205801 ttaaattaaa attacttaat ttgtgtgaag aaaaaacata cgtatacgag gtgtgtgtgt
+   205861 ggagagcgtc tgactttcac acgaaattca gctcagctga tgaagcgacg acattagccc
+   205921 aattcgcttt gatgatttat tagatttttt ctaggttctt taaattattt ttctgctgct
+   205981 atttcttcat tttgaccgct ttggcagcag caaaaaatta atttctcttc ttttcttttg
+   206041 acggaagtgg gttaataact gataacgctg cgaaaatgta aacatgacta aaaatgaacg
+   206101 tgcaactttg cggacaaaat ggttaaacag taaaataaaa aaaaatatcg cgtaaataac
+   206161 ttgattttat ttgacattta atttccggcg agagaataaa catattcatt ttactttgcg
+   206221 gaaacacaac ttcttatgga ccttttattt atgtattatt tatatatttc ttgctggaac
+   206281 atcagctaat taaattattt ctatacgcca gccttatagc tttcattttc ccgtaaaaaa
+   206341 aataataaat ccatataata tttttagtat tctcagctga tttataaatt ggtaggagaa
+   206401 aaaagaatga ataatagttg atgaatctgt cagcgaaact atttgttcta cttaactgat
+   206461 ttattgaaac caatctttta ttgttttacc ttataagata catgtaatcg aacgaagcac
+   206521 ggctgaaaaa ttcaaagtca acgtgctaag aaaatttaag cagattttaa agaccaccta
+   206581 ggatttgtga cttttgcaaa aacctttaaa atgtttgtct gaacaatggt ttcattcatt
+   206641 atccacaggc aattagcata aatctaaaaa tacctttgtt tacaaaatag ggttctcgga
+   206701 taagcgtaga atattttatg acttttcgtt ttcgtacata cctgaaacga gaacaagaag
+   206761 agaaacgtaa aaataattta gtaagaagat gtgaaaactt agtaaacgtg tttgaaagcc
+   206821 ccttggcaaa atatttctgc gggatggaaa gaacgggcta atcaccatat aacgccaaac
+   206881 acatcatcag tagcatcatc atcaccaggt aaacccgtag agacaaagca aaaggttctt
+   206941 atcttaaatg attcataatt gtgcagaatg aaaacgtgtt gtctcctttt gttttacgtt
+   207001 ttttggggtc aaatatttgc tttcgaaaaa tattgttatt taaaacttca acaacaacta
+   207061 attatgcccc gaaaatgcct attaatggcc accatgataa atggtttcgg ataaggggta
+   207121 aaaagacaaa ttagggccgc aatttgcaag tcaacattta ggcagccaag tgatagttaa
+   207181 ttttcattct tttttttttt tgcttctgcg agtttttgtt tgcggcatgc cccacatcac
+   207241 aacaatcgca ctcgcaatct agctagccaa atatggccaa acaagaaata tatctaaaat
+   207301 atctttaatt aaaagtacaa acgtacaaat tatgcaagac gtgctcaaat atggcaaaca
+   207361 cctaaatatt tcatgagttg ataaattgca cgcatattgc cgagccaaaa aatgccacaa
+   207421 gccacagaca cccgcaaaca aaacatttcc gtacatacat atgaatagta tctgcgctct
+   207481 gttctgtaca tgatatctgc aggcgaagct cgcccattta aagcccaaaa gatacagata
+   207541 ccgaaagctt aagccaaatc ggcaattcca caacaaccca cgtactgtgg gaggctagaa
+   207601 acaaatcgaa gacaaaacat aaacattgca catggtgggt gactgacttg gtggagcagg
+   207661 gcttatcggg gaattgggtg gttcgatgga tttggtggtt tggtggttgg gtagtcccaa
+   207721 cacaagtaca cccacccgtt gacatcggca cagcatgcgg aatccaaatc agtttggtaa
+   207781 aatgcatttt tctcgagtgc tttgtatttt tctgcctgca gctatgttaa atacttgagc
+   207841 tgcgaaaatt ggaaaagagt tttgacaatt tctcgaagcg caaaacaata atctatatat
+   207901 caccacagtc tattttgcat atttattatt tatgtgcaga ataggaagtg aacatatggg
+   207961 ttcatatatg agttataatt gataaacttg aggggttgat tcatatgtag ttttaggtta
+   208021 cttagtcgct tttactacta tgcagtatat atatatatat atatatatat atatatatat
+   208081 atattgatat atatatatat caatatattc aatattttat tcccttctac aatattgaat
+   208141 ttaagtcagc tttttagctc ttttaatcac attcatattg aataaaaccc ccttcgcttc
+   208201 gtattttgaa tccattgttt caatttctcc catcagaaaa cggcatgtat ctttgctcca
+   208261 tttaaacgcc ttccttttcg actttaagcc ctctttttac caagtttcga tcactcatca
+   208321 tttgcattgt agccgccaca atttacagta attgcagact gaaatcgctc tttttttgag
+   208381 cagcaagaca acaaatttat tttatttaaa tgaaattccc ccgagggcaa tggcggcgtt
+   208441 gtttgctatt ttttattcat tttttttttt agggcgcggc ttgaagctcg gatttttgct
+   208501 tcataggggg ctgtgtgttc agggtgtctg cagcgaagcg aaattatcat gtccacctac
+   208561 gtaatttagc atgtgttgtt gccgttgttg caattgctaa aattattggt tataaataaa
+   208621 tgcttttggc attgtcgtcg gcaacggagt ctgggccttg ttcattttgc ctattttcgg
+   208681 gtatgtaatt gccaatttct tgagcttatt gtttttgccc atgctcccct atatgaaaca
+   208741 actctgcgta tcaatttttt tttattttgg gactgtctgg gcaggtctct gaaatcatac
+   208801 caaaagatct tcaaacttcc attgttttca aaaacgacgt ttccaagaag ttcacgaaaa
+   208861 aaaacccatg ccaaacattt tacgcacaaa aaatcaacga agagaagcca aattctaaac
+   208921 tgccaaataa atggaatgta ttttagatga tttatctaga cgcgcggcaa cagcgcacaa
+   208981 caagctgaga agagcataaa attgaatctt ctacctggaa gtgttaaagc tcctcagctt
+   209041 tataggccgc acccggttcg cttcctcacc tgcccgtgga ttcctattga atttcgactt
+   209101 gactcgactt caactcgacg cggctcgatc accttgaccg cacacaaaac acaaacatac
+   209161 acaaaaaaaa aaccaaaaca cagatactga gagaccgaga gagagagaaa aaatttacat
+   209221 atcgccattt gccacggccc caaaacccga agaatcttaa aaaacgagct gagctcaaag
+   209281 aactacggtg atcgaagcaa taaaaagaaa ttggcaaggc gaatttacgc ttggaaaagc
+   209341 caaagaaaac ccatgaaagc atcgaagata atcataatca ttaaaggtga aatgggagat
+   209401 actaacaata gatttactga tatgcataag catcttagaa ttgaaatgaa aatcaatgca
+   209461 tattaatctt ctactcaaag aattattttt ttatggcaag aaacttgcac ttcacactta
+   209521 cttccatctt ctaatctcct tattcaaatt acgcttaatc ataaaaaata gcttttgatg
+   209581 catcttcatt ttactcgttt gttttacaag attaattaaa aaacatctga ttcgatttta
+   209641 gagttttctt aatatcattc cacaataata actcattaat caaaactttg gtcgcaataa
+   209701 aaatcttcta gctcatggct gattgctatt tgtacacatt ttgacaggtg agtttttgat
+   209761 ttataagatc tgtcttatgt ttattcatga agcgcacagg catttttcac attcagtaac
+   209821 cctcaaaaaa cttgactaat gcatacataa atatttattt atttcgtttt aatacgaatt
+   209881 ctgtgttagt tatacaggaa attagcattt gatagttaat tacaaacgcc cgaggggcgg
+   209941 atattttttt ttttaaatcg gcacatattt tcggttagcc cgtttcggtg ctaaaatatg
+   210001 cgcaccaata aatatttggc cagacaattt acaataattg cggcgacggg aggtgtggaa
+   210061 atgtggaatt atggcatgca gtttggaccg cgacgggtct aaactaacga tttctaccga
+   210121 gctctggcaa agcttccgat atcgtgcaaa ttactttcca attaaacagc aatttcaatg
+   210181 gtcgaggaag agaaagagtg gaatggcaaa atacaggttg cagtcgtggc aaaatcataa
+   210241 ttataaacta atttgaaatc aagatgccct tctcaacacg tcattatttc aggaaaatgc
+   210301 caaggcgcgt taccccatcc ccccaagatc ttatggatcc actgaatctt ccacggcctc
+   210361 cgtctttatc gctggcaatc taatggtgtt tgcccggtgt taccccctct ttggttacct
+   210421 gggtcttatt attccagctc tatctctttc aaagcaaaga accgtgccca ggggcaggtg
+   210481 taaagatgcc ataactcaga agatttcgca gcgcgccatt tgaactggaa cttgggctcc
+   210541 agcccggagt tggatagtct agtattgtgt cttgattgag tgagcccggc gaagagaata
+   210601 agaattcaat tatgcagcaa aatgtcacat ggaacggctg gcgaagggaa caatggacgg
+   210661 aggcagtgca tctgtgcccc ttttcctccg gtcctcccct tcctccagct gacgatgatg
+   210721 gctgatgtat gaagtggcaa aatcgctgca gtgaggcagc aacaagtgaa tgtgcaagtt
+   210781 ggggatctac ctggacttct ttcttagcca aatgccgaga ctacggggag cagcagtaga
+   210841 tggatgactg gatggcatgg cttggaactg cggcggctac tgaacagaat tagctttcaa
+   210901 tttgagcagc agcaggaagc agggctacgg cgaatagatt gtgcgtttgt atcttgccac
+   210961 agtggttaga attagatttg cgaaaattaa ttaagtgtca gctgcgatga ctaatcaaaa
+   211021 ggaaaaaatg tttcactttc gagaaataag aaatttcgta tttataaggt ttaatgcatt
+   211081 aaaatgaaga tattgaaaat atcgaaaata gcaaacagtt tacggcgatt tgtttcttac
+   211141 catttggttt ttcgatcgat ttctaaataa aacgataaca gagcttttcc ccttatctta
+   211201 tcccactgtt ttcgaagact tgaaaagccg acttaattaa ggcatagcgc atagatagaa
+   211261 taggtagacg aaaaaaaaag aaataaataa ataaaatgac gccgcagcgg aagatgtcgt
+   211321 cgggagttta taaactaaac tcttgagctc aaactcaatc taagggggga ccctcaacta
+   211381 aatccgccaa tggggctctc tattccacat cttggtttta gacttttgcc gacttcagct
+   211441 tgttttttat gaggaaatct aatagctagg gctagcgatg aagctgaatc taaagcagtc
+   211501 gcagagatga agctgttggc gttaagaagt cgatttggtt ttgccgctgc tgtagatgtt
+   211561 gctggtggaa tgaaatggaa tgattgatga tttgttggct gtgtatcgcg ataaggttgg
+   211621 tgggcccatt tgttgtttgc ccccatagct gaacaaaggc aaaaagagcg ccagagattg
+   211681 aagaagaagc agccaacttt ctgggcattg tcttcggtta cctctgctca tgtttttgag
+   211741 cgacagagaa gttcgtgctg ataattgtga cacggcccca tgcaaaaaaa aaaaaaaaaa
+   211801 agaaagacag cgcagaatcg agatccaaag tgcagagtgg ctgaagaata ttcagacgat
+   211861 gccccctgct aattgcaacc acagacaggc aactgcagac agacaataaa gtacaaatag
+   211921 cagcaacaaa gttaaaaaga taagctccat cgccaagttg gttgaattct taacctatac
+   211981 ggtgatccgt agatgctctt ctgttctatg aatatcttga ataattttgt aggtgctttc
+   212041 tcaaatacaa gcagaccaaa tagcgataac ttgttaaatg ttttgaaata catttcgttc
+   212101 ggaatgtgtg ttgcaatatc ttataataat taatatatta agataacgac atggaaacat
+   212161 ggaaaaacat ggcagagcac atatacaata gattagaacc aataccgctc gaataacaaa
+   212221 ctccagtata caatctggtc aaggattttg tcttcacagc aaaagagttg acttctctta
+   212281 tctggttgat ggtgcttagt atgcgcaact gccaactact cagtatcttg cagactgtct
+   212341 actgcaggtg aatgtatctt cgagatacgt tttagtttcc tttacttatc aggtcttcag
+   212401 ccagttcttt tgttttcaca cttgcacaat agtattcctt ggccttatcg ctttgaagtg
+   212461 ttaccttgat gtttaacaat gcaactaacc aactatagat ttagatagca acgagatgac
+   212521 cccaaaagcg atcgtggatg atgcaggtga gtggtgagtg gacgggttca atgcacttga
+   212581 ctcggctggg tggcatatac acaactaatt aacctagaaa aatgcaaatt tctgcaacac
+   212641 acccacttca catacctacc tatgtactta gaagggggcc ccgagtgctg ggaacaattt
+   212701 ctaagccaaa caatcaatca acgttggttg ttggccactt gagcacgtta agactggaaa
+   212761 cgaggataca cgaaggtatg tacacatata tatatgctat aggtgcgggg atgcatgtac
+   212821 aggatgggtg tgcaaaagta atatttatta tgtgtatact tctctgacaa acaacacaag
+   212881 aacttggccg aggcccaggc ccaggccaag gcacactgtc agaaaccgag aactaactat
+   212941 cctcaaaggg ccaaagggga acacgcgaga ccgagaggcc gagtgaatga ggtctatagc
+   213001 aactccactg gagacctgga gacggaatac agacggtata caggatggta tgggactccc
+   213061 agaacgggcc tcgtctcctc tccgatggca tgctgccaat agctcaataa caccccgggg
+   213121 accaattgag atctttttgg tgggataggc aggcacctag gacaccgatg gccaagcgtc
+   213181 taagacaaac agtgaaacag agagggtaat acaggcaaaa ataggagaat cacagacaaa
+   213241 tgctcaacgg gggcacacag ctcgaaaatg acacacgatg gctaacagtg agtcaatctt
+   213301 ggttaattct attgatttta agcacttttg ttatacaatt ttttaaacaa aaaggtgtga
+   213361 aatatttttc ttattcacaa actataaaat gttatttctt taactttagt taagttttaa
+   213421 atgaaggcat ggacacagct ttaccttcaa gttgttaata acttgttagc aaatttagcg
+   213481 tatttgtaaa aaacaatata tttcgaagct tagttgtcaa caatgatgca ataaataaac
+   213541 tatttattag tacttaaaaa atatttctta ttatttactt tgcctagtgt ttttattttt
+   213601 ctatgtccct tggcccacgg tgtcccatgc caattgtttt gccagacaaa tagacgaccg
+   213661 ccaaaagcag caaaaagtcc acaattactg cctataggtt agacctcttg gtggggccgc
+   213721 agctttagtt ttagtctaag tcctagtggg gtgcccagtc acgcggccgg tccaaaaatg
+   213781 aaaaaacgct agcaagatga gattttctcg gggttatggg ctggctgctt tttggcgttt
+   213841 ggggccacca gaaccacgag aaatgctgcc agcctttgac ctgacctgtc aacatggcac
+   213901 agcccccaaa aacacgagct tcatcttctc gaatataatg ggttttgaag tggatggtcc
+   213961 ctgtgatgat aactacaatg ttccgctcca ttgacttgac aatgcattct gacatccgtt
+   214021 ttgcacctcg tcgctttttc ttggcctttg tttttttttt tttttttttt tttttgctgt
+   214081 gcgcattttc ctgtctgtgt accgccaact acaaagggaa cccctaacga taggtggtcc
+   214141 tatgccatgc catctcgtcc cattcgatcc catggccagg taatccgaga gccgtgttag
+   214201 ggacgctttt gccggcttca ttagccttca gatcggcgac agacaatgca acaattatgg
+   214261 cccagcattt atgcactagt caaaaccaaa acagaagaca ggcgaacatg gacactcttc
+   214321 tggagagcaa gaaacaaaag caaaagtgtt gctaaattta gaacgactgc caagtagaaa
+   214381 agaaaatctg tgcgcctagt cggggtgcca agcctgtgag gtctgtcggt tgagtaaccg
+   214441 caaaagggga ccgctggagg ggcatcttga ggggtctgag gtggaactga ctgctgccga
+   214501 cgccttctaa ttgccgctcg acgagcggaa gccaagagtt gagttctatt tttacataca
+   214561 tatacatatt agctgcggag caactgcatc cacaggaagg gggagccaaa tgtggaacac
+   214621 acactgaacc gaaggagtgc ggagaacaca cgcttcccag atttgacttt aaaatgtctg
+   214681 ccggcggagg ggttccttaa tgacaagcgt aacgatattc caatgattta aaaatagttt
+   214741 ctaaggtttt gcattcaagt agctccagct gaagttttat ctttatgcaa cgaatgcgtt
+   214801 tacaattatt attgttaata ggcttttgcc gaaacttttc agtaaaataa aaagttagaa
+   214861 aaactgcata tatacataac gtattggttt cacaataagc ccacacctga acgtattata
+   214921 ggaaacagtc gaatggttta ttagtaaggg tgtttaacct tagatctgcc tacttatgaa
+   214981 aaacttaccc ctgctgtgct ccacttcctc atccacttcc cgatacccgc catatccatc
+   215041 gacttgttga cacagttgtt ctacccactg gctattgact caaatgactt tcacactcaa
+   215101 atagtaaagt ttatttattt tcttctttgt gaacactcag ctccaacgct tcgctcagct
+   215161 agggtaattc aatattgtaa ctcgagctat ttataaatta ggaaatgctg ccaggaacag
+   215221 ctaagccaac aaatcgatca actagttctc cagtttatgg atcatttaca taccccctgc
+   215281 actctgtact atcatttcta gatctattct caccgaaagg cgacattttt cacggctgtc
+   215341 aagtgctgtt ggcctccggc cttttcagag ggtatttcaa tatgagttct cgttgttttt
+   215401 tttgggtctg tgacttcctc aataaatggt taacaataca ataaagtcgg tcggtgtgtg
+   215461 agcgttttgt gctaatttca tgggggtcaa tctggcactg ttttttaaga tcagtcacgc
+   215521 agttgaagtt cccccggctc acggacacat ttgaagcatt tggaaaacaa aagtttgatt
+   215581 tctacgcgct cttagcctac aaaaagcaat tttcgcctga ggccaagctg cgctagaaat
+   215641 ctctcgcgta tccgaatcat ttttgggtga tcaattaaaa aaagagccat tccaatttgg
+   215701 caagcggaca agcggcgagc cagaaattgc ttttgacaat cgcataatta attttcgcag
+   215761 gcttctcgaa aaaccaggca cttgtggccg tcaatgtaat atatcagaaa aacaactaaa
+   215821 cagaataata aaacatttgt cgacctataa aactgcacaa aattgcctgt caactgaatt
+   215881 gagaacgaga acagcaataa tagaataatc agatcagaat ttattagggt cagttgtgta
+   215941 aaaatatatt ttcacaaaat tattacgaaa cgagaacaaa aattgtaaaa attaaaaaat
+   216001 gactttacgg ggcaaacgaa acaaaaagag ggcaggttaa taatagacag agaaatgttg
+   216061 cactggtcag ctagctacgt gtaaaatcca tttatttaaa attttatatt gagtttttta
+   216121 aatttttaca atattaactt gattgcaaat caaaatcatg tttattttga gtcagcaaaa
+   216181 taaatttaaa actttcatat atgtaaccaa gtgcaacagc gctgcttttt gttctctatc
+   216241 tcaactcttt cattctaaac actatcttaa tctcctaaag aaataagatt agatattgtc
+   216301 ggccaaaatg atgcttaatc ttggatttaa tctttcgaca agtgtttaga agcagttgac
+   216361 atatctttcc cgctgttctt caaccacctt catttgcata gttcaacgaa aaatccaact
+   216421 ttcccaatgg taaactagtc ttctggtttt atttttcata tagtcgtctc agtttccttc
+   216481 gctcttttac gacagcgatt ttatgtttat taaatacaaa ttccaaaact tatcgtgcgg
+   216541 cttataagaa agcaggttgg cctttcagct cagacttcaa ggcggtgggc atggtgtttt
+   216601 tttttttagt tcggggcgtc cgtatagtat tgtcgtaaat ttttggtaac ccagccccat
+   216661 cgcaggtaaa caaaataaac gattttcact tttttcttcg gcttctcgct cggtcagttg
+   216721 agtccaggtg ggtttttatg ttttctatgc tttgattagt ttttgttatt ttaattagtt
+   216781 ttcagtggca ttttatttcg tatcgtcatt ttctatcccg aattctaaaa ccacaagact
+   216841 acagtgaaac gagaaagcga gagttttcct ttgtcgtgcc gcaattttgt ttattgttta
+   216901 aatgctggct gtcttctggc gatttttgta acttgattga acgaaagttt caagattaga
+   216961 agatgttttt gcccggggca aaccgatgcc ttaggtcata ttaatgaatt aatcttggtc
+   217021 aagaacttag tttcttcctt aaaaagtcgt tgtaatcaaa ctaaatgtca gctttgatca
+   217081 tgtttaccga tcagattatg ctgtcttctt taaaaatgtt ttaattaaaa ttaatgatga
+   217141 aagtccagtc atagctcatt aataagttta aactgatatg tctgcgaacg ttctgcattt
+   217201 ctaactttct cactgcccat ctgcaaaatg ttaaagacat aaaaatatct ataaaatgtt
+   217261 cacaactaag tttaatccca acaagatata tttcccataa atgtcaccca aaactgcagc
+   217321 aagatgcaca agaagcctga gatgagacga agaaccaaat ggaactataa ttttcggaac
+   217381 agaggaataa caaagaaaag cccgcacaga attggaaact aaataaagat ttattcagca
+   217441 cacacaaaac taaatttatc ctatagtttt tccagggtga cttccctcaa ctccaagtac
+   217501 agtgaatgtt ctcattaggc actttatctt aattttgggc aaggtgttag aagttttaac
+   217561 agaacatcga actaaaatta ggtagttctt ttgaacttaa aacagttata gatagacatg
+   217621 aaataaatat ttataccact gtttcctata agttaacaat atcttctatt cttaggattt
+   217681 ataacttgtc caaaaaggtg aaaaatccga gactcaccct attgcattcc aaggcggcgg
+   217741 ccgagtgctc ctcccgctgt cgaaaacagt ggctgcactt gctcttgttg aaaatgttcg
+   217801 gcgtgaactt gcggcagtcg gcggtgcgag cgtttgttgt tgctcctgcc gcgcctgttg
+   217861 ttgttggagt gggtggtgcc gatttcgagg gggcggcagg caccgttgtt gttattgtcg
+   217921 tggttgacat ttcgctagtg cgctgctgcg tcggttcaac aaacaaaaag agtggggaaa
+   217981 cttgcagagg agaaagagag tgcggaaatg gtggtacttt tagagttttc gcagaaatgg
+   218041 catggctatt atttttaact cgatttgttt tgtttttcgt ttactttttg caaggacgcg
+   218101 aagttacgtt aaccaaaaca ttggcttgtt tttgttttgg ctctaaaagc cagcctttct
+   218161 taactatttc ggcacttcag aatttatcag ctctgataaa cttcttattt tcgggtattt
+   218221 ggttacacgc tcagtgagag agcgaaaaag agagatgggg aaattattca atttcgcttt
+   218281 gcacttcgct ttcttagctt tggcacacat gcacacaatt acgcacaaaa acacacacgg
+   218341 acgcccgcgt aagcccaaca aatacacata cactcgcgca cgcgagaacc agagcagaag
+   218401 tttggggctt tttcttcttt ggccttcttc gccgcctttt gttttgtttt ttttttcttt
+   218461 cgtattactc gtatttttgt tgttattttt agtggtatat tcgtcgaccc ggtattattt
+   218521 gttttatgct cgcgcggaaa acgcgctctt tttttgaagc accgttcgcg cacgctgctc
+   218581 aaaacggaat gagcaatcgc accgattcac cgaaaaatac tgaaacatgt tgagacacaa
+   218641 aaaatactga aacatgctga aacagggaga aataccgcaa acaagtaact atgggaatta
+   218701 attttcgtca agtttgcgta tatataccct ttcgaataat atgtatttaa tattcatatt
+   218761 tatttatact tcatatggaa tgtctttcgt actttagaaa ataagtcaaa aatagtttag
+   218821 gaactataaa ctatatagct caatctatta atcttcagta aactttctga aagggtataa
+   218881 cattccgctc tctctctgca catgttctct tttgtcggca atgttaaccg tttctgtttg
+   218941 cagcaacagc ttctgttgag gtaaatgtta agttaacagt ttacgatttt tcttttaaag
+   219001 tttgttgaat tgcccaaaaa ctaaagataa aaggataaaa tgctacaaaa ataacttact
+   219061 taggtaaagc tgactaatct tgttaatcat gagttacaga atagccaagt tacaaataat
+   219121 atgatttttt ggtttattat tgctttttaa agtattattc gtatatttct tctaaatctt
+   219181 aatttgttat ccagggatta catacttttg ttaagcaata aaatgcatca acaaatagta
+   219241 tgttctattt attcaaacgt taagctacga atctgcaaaa gcaatcgaca gactagttag
+   219301 ttgcatccat ttccaactgc catatgaata aacgttaatt taaataaatt aaaaataatt
+   219361 taaagtaaag gggtgaactt aaaatagagt attttgaaat tgtgcgaata aatctatgat
+   219421 atgcgaacaa tgattaaaat ttttgtattt gtcaacgaga caaaaatttc cttaagtgat
+   219481 actcaaagtt tgttgataaa taaaaatccg gcaaacaaaa atatccgaat cggtggatgt
+   219541 taaccgattg aatctctaag cgcgcaatgt tagtttcaca gtcgaattcc tctcatagtg
+   219601 ttggtagagc ctcatgcact tgttgcttca tgttgcaagg taaagcaata aatgttgctg
+   219661 gcgattttaa caacgaataa taaattctat tgtagttata atttctatat atataaaggt
+   219721 tcctgaattt acgtcaaggg aatttaatag ttttaaaata aacttatttt aggtgtatgt
+   219781 tgttaaaaac tataaaacta gactttcaga agttttcaac attttcttta gcaacatgct
+   219841 gctaagcttg ccacacgaat gttgccacac ttccgtcttg taattgatga ggtttttgac
+   219901 tctcgaactg gaaaaatgaa ttatgctggt ggaaaaatca aagccggcaa aggcaatcat
+   219961 ttcatgccac agacaatttg tttacagatc tcgccagaaa aggccaatgc aacaacaaga
+   220021 gcaagcggta cccataacaa taacaattaa ctgcacactt cattaacaac ctcggtcaac
+   220081 aaattttcaa gtgcgatgca gaacagaagg agaaggcaat agagataaac ccttttcagg
+   220141 caccacaaaa atataaataa aaggcaatgc actgcggaaa aaatgacaca tatttaaagc
+   220201 acattatata aaaacaatat gcttttggta aatgtataaa cagtataaca attaataata
+   220261 aattggattg aacttaataa aaaaaattag gagtaatgtc aaattacaac atatggaaat
+   220321 ttatgtgctt ataccacgtt tttttctctg tgtagaacga aaattgatta attacgcttg
+   220381 tcaagggttc tggcatttga tggggcttgc ggggagaggt aaaggccgca gggaacgagg
+   220441 taaaaaggag tcggccctcg atcaaatatt ttgactgcca cctaccatat ggggggctct
+   220501 gtgtgtaatg aggggtaaag atgggggggg ggggttcggg atctcgcaac ggaaagacta
+   220561 atgtctggcg ctgccctcaa agaattaagg aggaggagcc ggtgagaata aaggtgaaat
+   220621 gaagaagcga cgtaagtccc aacattattc acttccatac gttttattga gtttggatga
+   220681 tcgaagtggt atttataaca tttttatatt atacaaatta atacttatga attttaaaac
+   220741 taaaacaaaa cgattatatt gtacatacat acctatgtat attttaaaca agccaaaatt
+   220801 aatttagagc taactttgga acactgttat attaaatagt ttccatctga gataaactat
+   220861 ttgtatacta aatactagac atttcgaacg atgtgtttat tgagctaaat atttactagt
+   220921 gatacgtgat aatcattggc aagcagcgag ttcattgacc atttcgcttc tgggtctggc
+   220981 atttacggta attagttgat tttaaatttg attcggattt ggctgactct tgtgcaattg
+   221041 ccgaaaatac ctcgattgca ttggcagctg ccttgcaaca aaacattgca tgataatggc
+   221101 ttctttttcg ggtccattgt tgcgctctaa gttgttattg ttgttgtggt aatacctgca
+   221161 tagccgaaaa atgcattacc gaatattgat ttcgatgtgg aagggggaca cactttgtgt
+   221221 gcgagttttt tttttttttg gttttatttt tggtagccac cgaacgccac tcgttttgca
+   221281 cattttccac tctcattcaa atgtcaatcg agtcacgagt tgagttgtcc ccttttaacg
+   221341 cccccctcca tcaccttctc tcaccaactc cttgcactcc aaatgaattt aaagtaatgt
+   221401 ggaagacaaa caatgaagat gaaaaaccta tttccctaaa tatttccata gtccttaccg
+   221461 cacgccaggt cgttaaaatt ctgcgccttt caaaattacc caagatgact ttattagctt
+   221521 tccgactaaa tctggaatca gttaaactac ttaatacacc atttgaaatc gccttttcaa
+   221581 accatattgg ttttatagtc tttcaaaaca ttttgagatt tttgaatgta tctcaagccc
+   221641 attaaagtta agttttagct cagacaggct aagcaaatgt ttcccaaaca ttaagtttta
+   221701 tgtatcatta actgatccaa cggcaattag ttcgaccgac ttgggactgg caaccaaaaa
+   221761 aaaaaaacat gcaacatggg gcaggagcaa caacaagtgc atccgatccc gccttcgggg
+   221821 tgtgtgacca aaaacttgga tcacgcttct tgtcggagca cttgggttaa taagtatctc
+   221881 gtctagccat gcaaatcgtg taccgaatcg aaaatcaaag cactgaactt gaggcaatct
+   221941 aagctgtcag ctccaccgcc cccgttggtt ccgttgttgc tgagtttttc ataatctgcg
+   222001 cgtgggctgg cactttccat attttcccaa agaaaaacag cctttaaaat tcgaacaatt
+   222061 ttgcgccctt ttgacagcaa gcgagtctcg attcatgggt aattttgtga gaagagggcg
+   222121 aggtctaaat gctgcacatg acgtctggtt tggttgttgc tgttgctgct acatataaaa
+   222181 ttgcagctgc aacactttca cttttttcta tgctaatttt ctaaaaaaaa aaaattgaaa
+   222241 aaaattggtt tcggaatttt gaaaagtgac aatgtatttt ggaaagataa aaagtaagct
+   222301 attcaattta acttagtata caagaactgg ccacaatatt aaattcttct tttgtctaac
+   222361 taattatgta tttaaagctt gaatgtctta tgcgggtcaa aagaaaagca catatgtagt
+   222421 ttcgaaaacg aaccaagcac taagaatact ataaatatct ttagaaaaga caatgattta
+   222481 tgttgaactt gccaccaaat tccaatggag ctgcaagtct agacttaatt caaagataag
+   222541 ttatgaatag attatgtggg atagcgacga aaatagaact acaaagatat atggcatatt
+   222601 tatgaagaca ttcgaactag acattttatg aattttttag tatttctagt tgttattttt
+   222661 agatccaaat aaattaggcc tctttcagga cacgattctt tcttactacg attcaaactc
+   222721 tttattttaa caatacttgc attttctata tatattcgtt ttcctttcaa agcaatgaaa
+   222781 accacttcct atatacaact agtttgcggt tgcaaataag caaatatttt tacatgcagc
+   222841 catgtttttg ggcgtcaaaa agactccata actgaccgta gcaagctaaa acctataacg
+   222901 aaaccgatcc aaggccccaa ctaacggaaa ccacctgcga acaattttca ttcggcagca
+   222961 acaaaacgaa gaaaagccaa cgaaaaatgc cgcaaatcga cgtctgacag tctttggctg
+   223021 gtactggtac caatttggca gcgattcgcg gactcacgcc aattccaggg tgtggacccc
+   223081 gggggaaaaa aaggggctga aagacacggc tttgggggaa aaattagggg gatggggcag
+   223141 cgggggcttc aaacgtccat agaccttttc gtgaaagtga aatttttcgc aaaatccgct
+   223201 ccagagtttt gcgagtgtcg gtgtgtctgg caagcctcga ctggcagttg ggtttcctgc
+   223261 acttttatta tgctagtcgc actcgtctcg agctgtaaaa actcagcttt tgggagaaaa
+   223321 ttaattttat tcaagccaac taatatatat tttaatacga ttttatgctg tttgcttaac
+   223381 atgaacttta atatgaacat tcaaaagaca ggaactatac tgccagcgaa tgccttcact
+   223441 gtttcttatt ctaataaaaa cccattctaa ttaaaaccca tttaatttcc ttttctttat
+   223501 tttttggtca aaaggggtaa tagtttgact tcatgttgga actccatttt aagaacattg
+   223561 acgtgtcatt agtagctctt ggaatccata ctccttacct atggccttat atttttccac
+   223621 tgctaacttt gtccagtctt acagtcaaag atacaaaaac gcggatatga ctcacattgg
+   223681 gtattatatt taaaatatac tatacaaata tcctccaaga gcaccctgtg gtttgataac
+   223741 caccgcgttc ggatttgata attggcttaa agcgcgtgaa taataacaac cagcagcggc
+   223801 agtagttgca accacagacg tcagtggcag caggctgcta ggaaatcgcg tttcattaac
+   223861 atggaagaag gaaaattgaa aattgggaaa aggagaaaag cggcgcaacc atgacatgag
+   223921 acgagtggca aggaaaagcg ttgcaggccg gacgtccaac actgcgtata cttgttgcgc
+   223981 ttggcaaact tatctattca agttgtttta ttttttctct cattttcctc tcgatataat
+   224041 acctttcgtt tgtttgtttt tcggaaaagc agaaggaaat atatatacta tatatttttt
+   224101 tcacttttca ttatgtcggt tggttttctt acgggtgacc gcgacaacat aatttaccgt
+   224161 taactcttca tcaacgcaac tcggcacgca aagtaacaaa attgtggcgc aaggagtttt
+   224221 ccattttccg agtttcccat ttgctcgctc agttttgtcg tgtcatatga tttatttgac
+   224281 atgacagcca aacgaacaat gatttatttg tcaatacagc atttttaatg aattttattt
+   224341 tcaattattt atttgttgtt tgttttatat ctagttttta gtttttttta agtactcttt
+   224401 ttgtgtttgc tgtgcaatcg caaaagtcca gatgcagacc aaacacgcaa tttcgacgcc
+   224461 cagttgagtt ccgagtgtga gcacagtttt ctacactatg aatcccacaa acgtgggttt
+   224521 cctaccagca ccaattttgt gcacctgctg accagacatc gaataaatac agcaaaaaca
+   224581 caaacacaat taaaaataag aatgaacgct tgattatctc gactacttga tacccgttac
+   224641 tcaactagta agggtgcgcc gtaattggct ttgaattgga aattagtaag aaaatgagtc
+   224701 aaataaaaag aaatgtgtgt tttaggatcg aaaacgcttt cttctacctg tttttttttt
+   224761 tttttacagc aaaacagaaa tcaattatgg ctcactgagc gttaacattt tgaataaaaa
+   224821 ctcagtttaa ttggcagaac caaaaattaa ggagttttat tacagaaatc tcatgcactc
+   224881 agtgaaggga aacatgtaat tagcttagag ctataaaact aatattaatg tttttaccta
+   224941 ttataaaact ttccttataa atgaatatgc acaaaattaa tatattgtag tattaaatat
+   225001 aaattatata taattttatt aaggcttttg caactaatcc cgcttaaacg atatgtgccc
+   225061 aagtgcatct cgctgcagca agggcgagca aatattaatt tcacttttga ctgcacttga
+   225121 cccactctct ttttaactca aagtgaagtg catcttggcg aagcaccagc tacggggccc
+   225181 agagctttat gcagataaat catataataa acagaacaca gcaggatatc acttcgctgg
+   225241 gtaggatgcg gccgcaagag acttgaagat ttgattcgat gacgagtttc ttccacttgc
+   225301 tggcgtagag atttcgcgat gtgtttgctt tgctgctgag tcacggatgc taaggaagga
+   225361 gctcaaaaca aattgaaagc cgccataaaa ttgcacaaga aatcgataaa cttaaagctg
+   225421 caaaacaaat taaattatgc tcttgaggat ttaaaaaacc aaaactgaaa ggcaaagtaa
+   225481 aagtctaggc cataaagact cattcgatat gcgtatttaa ttgaaacttt tttcgttcgt
+   225541 cgtttcctgt ttgttaaggc gatgttaccc cccccccccc catcataatc ccaacggata
+   225601 acaatttcgc cctgtgggct gtcgttcctt ccccttgcca gccaacttat gactacaact
+   225661 tttgccattt accaaaccca aatctcctgc tggatatatc tttgtttttt ttttttctat
+   225721 cttacataag ccgaatgcaa gttgcatttg aatacaatta gcagtttatg attttcactt
+   225781 ttacaacatt ttcaacatga ggcaacccca ctccttccca acggagtcca tcgataaagt
+   225841 gcattggcca gcggacaaca cgtgtgttgc tgttgctgtt gcatttgccc gctgttgttg
+   225901 gccatggtaa tttcgactcg aacaactcat tttcgattgc gaaacaagtg gcaaaagggc
+   225961 aggcatgggc atccagagag atagacaaaa acccaactta ataggacaat tgattgcttt
+   226021 gggagtgtta caaaagctgg tggaacaaac accaaatgta accaattaca atcgggaggg
+   226081 cagggaagca aagtttttgg tcgaaaatga aaagtcaaaa tgatgcatga gcatgtggaa
+   226141 acttctctga ggggtccgat cggacatgct taagttggca aaacgttgct tgtcttttgt
+   226201 aattatttag ttatattatt tcgtaaattt tctttaaaga aaacattaat tatgctgcat
+   226261 ttacaagcgc agcaaacccg catcaaaaag agacacttaa gcgttcgaaa attgttttat
+   226321 ccattagtcc gtcgtcaact tgacccaaac acctcatcaa gtcggaaagt atcactttaa
+   226381 agagctttac aaattacgac ttaatcgcgt gccaaccaca aaaaaccaaa agactacaag
+   226441 aaaactacag gcagaacaac acgaactctt acgaagtcag cagaaacacc atcaagttca
+   226501 aagtaacaaa cacaaaacca caaagtgaag gaaaactctt tttagacctg ggttgtgcat
+   226561 gatggggtca ttgaaatttt ctggttttaa acgttaaact taaagagcgg taggaaccta
+   226621 aaacttcaaa ttgtcatagt ctgtcagctt cattaagatc attcactttt ataagtgcat
+   226681 tttatctaat taaaatgctt ttcaatagat gtgtttcctt tttgacttcg gcgctccaac
+   226741 ggcttgacct tgtctctaaa attttaatgc ttttggcagc tgaaaggtga ctttgattac
+   226801 ttgatttgat tttaataatt ctaattgctc tacggatgca tttccttctc aggcagactc
+   226861 ttggcattga ttgatgaaac gtaactttga ctttgaaatt ggttcttgac gtttttgtcg
+   226921 ttggcgtgcc aaaagcatca tggtcaacta atacagaccg caaaaaaacg caggcaacaa
+   226981 tttaatatga aaagagaaaa ttcacgcttg attggtagac catttcactt gtctaagaaa
+   227041 caaaaagaaa acacaagtgt caggggaaaa cttcacacgc cagcgaatgg caaacggcaa
+   227101 tcaaattaga acaccacatg aatatgaata ttcgattaaa tgatgaatgc actgctggcc
+   227161 atataccacc gtatttcgcc cccacttaca gtcaaacccc cttgtcgtga ataaaaacgt
+   227221 gccaaatgta tgaatgaatg actggttgac tgaatgaggt ggcaaaatat aaacgtcgtg
+   227281 cttgcacggt gggtaaataa gttgtttaat acaaatacaa aatacaaaaa tcaaatacat
+   227341 taatgcaaca tgatttgtat aattaatatg tagctcctca ctaaaacgaa cttatttttt
+   227401 ataggttgaa atcttaactt gttatctaaa atcaaatttg tttcaatttt cttgtaaatt
+   227461 tagcataata tgaattttta gccaataaaa agtgatcttg gcccactgta tttccgacag
+   227521 ttgctcgagt aggagtcaag tggaagtgga gtagtgtaaa gtgaagaagt gtaagtagca
+   227581 gtagcctctg tctttcggtt gcctttggct tcttggtgcc actggatttt cgctggctga
+   227641 catacgccaa aaaatgccgg cccctcgaac aaaaagaaaa aaaaaaacac aaaacagaaa
+   227701 tcgtattttg ctataattta atcaaaattg gaattgcggc ctgtcccaca tgaatatttc
+   227761 aatggggaga gtgctttttt ttctccagcc cgccccgccg caatatttgc attcgttttc
+   227821 attcggcgat tcgcttttga cagtgtaacg gccgcaacct gtcataacac atggggtaaa
+   227881 ataaaaaata tttacaaaaa atgaaaaaaa aaaattggca aaaagcgcaa gttacaattt
+   227941 atgtttgatg gacattcgtc ggattggttg ggtgaaattt tgatggtggc gacgacgttg
+   228001 atgacgtcca cagttggaca ttgatgggag tgagtgtgct ctttaattag ctgcacacaa
+   228061 caacaaaaaa acacctgggc aaaaaaggaa aacttttcag ctcggcttgt caccgcttga
+   228121 atgggaaaac gctgttcgaa atgcatggaa tatggtttac aatctgatcg cagcttgtag
+   228181 attagccacg agctcaacac attcagccaa tctgtcgaca cttgtcaggg agtcataaaa
+   228241 aggccacacc cagtcgagct taggcactca taagtattca tattccgagc tctaaactct
+   228301 atccaaagtt atgttcatgt tttacatcaa gcgctttatg gactttggtt ttacagctaa
+   228361 agttattcta atgtgatgtt tatagatatt caaaagttga taacgataaa aaaaaatggc
+   228421 tttctttaaa atattccatg tatgtttggg tcgcacaata aacatttgat cagatatttt
+   228481 tcaggttctg ttaaatcaag acaacatttt tgaaacaatt atctttttat tgtaaacatt
+   228541 caccaagaaa tatgatacat attattaagg ttataaacta aataccttta taaattataa
+   228601 tataatattt aatatgatta agccgtgaat tatgtcttga tattactaca aatttcttag
+   228661 attaacaaat ggctcagttg aacaaaatat cagaagttcc ttgcttgaaa tgtttgaact
+   228721 ttttccccct tattcgtaca attctccacc caccgctaag aactataaat aaatacaata
+   228781 tgcacataaa atttattcac ataggcaaat agaaatcttt agcgcggctg catagaatta
+   228841 aaaaccgatg aggcaaagat ttgaatgcac gtgtgttaat cataaatttt aaacatgttt
+   228901 caaatgcagt cggcggtgga aaagtggaca ggaggacagt gggacagatg gacggactga
+   228961 tggctggaaa cgattcggcg gtgtcggcgt ccaaaaaagc gaacgcagct ataataaacg
+   229021 catgaaaata tgcctgaggc gtgcgccaag cgatttcacc tacagataca tgtatatata
+   229081 gaaatatata tgtatgcact gcagtggtgg cgcaaggggg gattttcggg gcaaaacggg
+   229141 ggctggtcgc aaccacgtcg acttgtcact tttttgcata aacgcaccca attttcgatg
+   229201 cgtatatgag tgagagcgaa cgagactttg gctttgcctt tggttttgtt tttggtttgg
+   229261 tttttggttt ggttttggcc acatgccagt gcgtagtgaa ctttttcgtg gaaagggagg
+   229321 cgttaagagc ttgccgcgcc tcagtttcag tctattaaaa gagctgcaag ctgggctcgg
+   229381 tggttgtcaa ttttattcca ttattttccc tttcccccca ctgttgtttt tttgtagcct
+   229441 taatgctctt gtaatgggcc aacaaattgc cattggggcc gttgttatcc gaggtctttg
+   229501 cgcctgatcg tgttgtgaat tggcaattcg atgtgttttt agcccagcta cagtggcaaa
+   229561 tgtcacagaa ctctgcaaac gtgatggatc aactgggata gaacacgcca ctagtgattt
+   229621 caattagttt tgcgaatatt ttaactcagg cattaactaa tttgtgtgta tttacacgtt
+   229681 aaatctcaac aagctaaaat atgtaagagg tcaacaaaca attgtgaaac aattattttt
+   229741 taaagcttaa cggcttatta tgattatggt tttcttaggg taaataagta tattaagttc
+   229801 cggaaatagt atgtatgagg aagaacggat cgtttccact ctacaatttc tataacttta
+   229861 ataaccagaa tttttgaatt tagttaatac taatttgttg atactaatct aaatgtcaaa
+   229921 taattcaatc cggacgaaca ataaagaatg atgaactaaa tatgcaactg catctcctga
+   229981 taacattata tcagcagtgt atgcagtttt agtcagctta ctaatttaaa tattaataat
+   230041 caacttttag tgatatcaaa atcactttta tattgccatg caaataggtt taaccagaaa
+   230101 gcaaataatt cttatgtgca ctgcatatga ctctaattaa gtgctttcat ttttttttgg
+   230161 aatgaaattc ttctgttttg aagtctacgt acaattaact tttttttttg ccaagccacc
+   230221 tataattata aggcaattat tttctgttcg aaagaagccc tagtaatgtc atttttataa
+   230281 aagaaatctc aacgctctgt cacagacaat gtgcattttc gcgtgcaacg gttttgccca
+   230341 tttcccattg tgaaaatgaa agtaattgta ggtaagtttt tcttgtgatt gcatttttgt
+   230401 tgccacagac tgtggacaaa acgattttca actagttttg agagattgca ttggcctgca
+   230461 catttccaca cttcaaagtg aacgaaagaa attgaaatta attaaataca taagcaacac
+   230521 gaaagcggct tttccccctt cttttttttt ttttttgttt tcttcctatc acctgcgagt
+   230581 cactcaatca actgtgacgc atcgtgaaca aaaattgttt gtcaacaaat tggagcgcga
+   230641 ccaagcccac aacaaacaac aactaaaaat gcaattaaaa atcgaaatgg gaaaacaata
+   230701 aaatcaccag agaaatatgt gcttaaagca taaacatgaa ccaagccaga tggagtaaaa
+   230761 aacgaacaaa atattttaca acactttcgc tcgttagaag cgggcgggtg cagcctcagc
+   230821 caattcgact acagtgctcc acgaaagtgt acggctaact cttggctgca ctgagaaaaa
+   230881 tatttcaaat tgagcagaaa aagagttggt gctttgcaat ggctagcaat ctttacaaaa
+   230941 tataaataga ataaaatata gtgtatatat cgataggtat ttgtgaagtt gtgtttatcg
+   231001 gacactagtt tttctcagtg tggctatctg tctcccctct atgcactcat tgccgcttta
+   231061 attgcccggt gaaaatgtgc agcaactgca gttggggcta caaaacactg caacactaag
+   231121 gcgcttacct gccgctctac tctaattacc cgaaaactct tgggagtcga tgcctcttga
+   231181 tcatctgtga tccctttctg taagattttt tgaaagatgc cgccgtgctg ggcgagcaag
+   231241 tgaaattcaa gtgaaattaa tcaaatttgt tttgaattta ggttgagacg tggatgcgat
+   231301 ttgttcgggc gagaggattg tttgtgctgc ttggctaggg tgtaattaga ttgcagttaa
+   231361 acaagtgaaa ttttaggtat aattagtgtt ttactattta ttacctaact ttaaaatgtt
+   231421 ctttatttgt atatttttta ttgtgttaat agctaataaa ttcctattca gacagcatcg
+   231481 cacataaaac actgatagcc aaataaattt ttagcaataa caattcatca aataacgccc
+   231541 attcaatgcc gcaattcgcc tttcttctgc cgttggaaac tcaatttcaa tttgctgcca
+   231601 actccccctc cgcggccaat tgacagcttc ttgggccagc accttggcga aacttgggtc
+   231661 aattggttgc cataaatctt catagaacat tgttgactgc cggcgacagt gcgaaataaa
+   231721 aatacgaatt tcattacaat ttgtgtccat accccggcta ttctctctcc cttgctattt
+   231781 taaattatag aaatgtaata aatcaaaatc tcgttaactt gctgcgacgt tcaaatttgt
+   231841 ttcaggttaa agtcaagacg gtgggaaaag gattccgaat cgaaatcgga atcggaatct
+   231901 attctaatta aataggttta ctcgaatgca atcgacggat ggctggctgc agcgagcagg
+   231961 tcgctctgat taatgccgtc gagcaagata atgacaagac aacacccaga aagccaccca
+   232021 atggttggtg gttgggggtt tggggttttg gggtggttat atgcgaatct cgatcatctt
+   232081 tgctcagtcg tgtgtgcaac agcaacagca acaggttgta acttggctgt gttgcatgca
+   232141 accgatgatt aaaacggggc aggccccaaa ctcttaattc ttggccattt tcactgaaat
+   232201 tgttaaatat aatttcgaaa gaaattcttt ctttcctctg gcttgactgg caattactac
+   232261 gtttctcttg tttattttgt ttcctttttg cactacatat atattttttt tcttgctttc
+   232321 attatgattt ttttttctat gctggcgcat ttcatgcgcg acttcattta actgtgcttt
+   232381 gcatgattat tgctccagtt atcagctgaa cgacaacaac ggctagagca tgagcatgaa
+   232441 caacagcttc tgggacggcg gccagacaaa ataatgtgca acaaactgct gcatgctaca
+   232501 aatatgggca gtggattggg ctgtgcagaa aagcaaaaaa agaaggtgca gctgggcaga
+   232561 tcgcttcggg cttcgtcgag ctgcaaatat gtttgcattc gagtcgtgga tgcattcaaa
+   232621 ataacaaata ttcattcata cacgcattag cctcaccagt tattcataaa atattcgcaa
+   232681 tcataatact taggtaatac aacaaatatt aaatgcgtaa tggaagccag cagttgttgc
+   232741 ggcttataat caaataaaag attaagtaat gctaattatt tggatacaga ccacatttat
+   232801 ttatatattt gtataggtat aggtattata aactgctaaa agcattaaac gttcatataa
+   232861 acgttctttg attgttagcc actacgaaat taaaaaaagc cataagcaga attgaatgta
+   232921 aattgttgtt tggaaaacca atgttctgca gaaacgttaa aacgaatctt gtcttgattt
+   232981 tctcagctta tccgcattta tttagaaaat gtgctcgagg tcacttccac acccaagtgc
+   233041 aatcaaaatc gtagacgtcc gagcacttaa ttaaatttac attgctgcaa acgtacttca
+   233101 accgagccat gcccaacata aaagggggga ccgtttgtga ttttacattt tagatttttt
+   233161 cgccaggcca tttgagggat actcgctcgg gctcacctgc tgctcgttat ctttagtatt
+   233221 tatgtcatcg ttaaatttgg cttccagctg cttttacctg gactcgaaac aagtgcacac
+   233281 acgaagcgtt cctgctttgc ttttttgcta ttcagcctct ctgctcgtgt cgccggcata
+   233341 ggtgtggcac aaaatattta aacacagcat aaaaattaaa aactagtcta atcacaggcc
+   233401 cgagccagat ccaagtgtct agtgttgggg ctggaaaatg gcctcaaccg agatcattta
+   233461 cggtaggcca cgtgatttga tacttaaagg aagatataaa tttaaactta atgacgattt
+   233521 aaagccaaga aaggatccat agggagaagt gtcgagatag tgaaaggttg aacgtcgaat
+   233581 ccagaggtgt gttctttacg cattgattta tttgcattca tttcgattga cacgtcttaa
+   233641 atctttattt tatatcagtc gatttgcaat tatcacttaa cttcgtcgat taaagtaagc
+   233701 caggaaaatt ccaaatattt tagcgaaagt cgagagacta aaaattaaat aataaactaa
+   233761 ttttatgtaa tctatgtaaa agtatatatt tttattcgat actccaaata taggtcatta
+   233821 tttttaaaca tatgtatttg accaaaaccc acttctcacg atagtcaaaa tccgactggc
+   233881 aaaggagagt ctggaaaatg gaccatgtgc catttacaga ggggccatgg taatctgatt
+   233941 tgtgattaag acttcaaatg aatacaaatc cctgctttgg ggactggaga tcccagtaag
+   234001 gtttagccaa cgcttccaac tctcaaacta tcgtactggc aacaggtgta aatgattccg
+   234061 ggcctggaac ttgacgtcgt tgcccccttt ccacctgacc agtgcgttct gagcaaattc
+   234121 aagtgaattt cctcaagaaa ctggttgctt tgccgagtct tagtacgtcg tattaagttt
+   234181 aatttgcatg gcgaactgaa aatggaaaaa aaaaaatgga aatactttta aaagaccccc
+   234241 aggcgccagc aacttgacac atttctgcgg gccatgagaa tgaacttttt tacgatgcga
+   234301 aatgcttttc caatagttga cccaattgcc ccagaggcag aagaaagggc gaccccgtgg
+   234361 attgacatca atcattgcac tatgagacat tattctaaac gtcattctcc tttagccgtc
+   234421 ctagctgggt caattagaaa tgacttggca gcttttgggt aagagcacca gtaccaagtg
+   234481 caagttactc tgggtgaggg agagcccagc acctctaccc attcgataga aaagaaaatc
+   234541 tgcagctaaa cgacaatcga ttagggtcag ccctcgttgg gcctggaaag ttgccccaat
+   234601 gaacgcccag caagtggcgc ccatggtgaa aaactgcagt tggtagccaa gcgtagccta
+   234661 aaaaatatga cagaaaaaaa ttagaacgaa gttcaaatcg tagcctaaaa ttatggtaag
+   234721 atttttgatt tatgttagtg gaatgttcaa gccgtatctc attttgattt agtagtttga
+   234781 aataccgcca caaagttaag tgctaatttt taaggtatta taaacatatt tatctaaatt
+   234841 ttaacaaaca aagacattac ataattttaa aaacaagaag ttccctcaat ttgttaagat
+   234901 ctaaaaaata caaattggaa aacattttcc ggtgcatatt agccagcatg gtcgagttca
+   234961 gtgggccaag gagtaaacaa ggcttggtca agtggctagt ggctgcgatt tgcatgacca
+   235021 gtcgattgcg ataggtgtgc gtgaggcgat ccccgaatcg aacgtagtgg gtcccgcaaa
+   235081 agcggaagat cgaaactgga acacttcaat gtgccagagc aattactgca gcagttccaa
+   235141 cttctcagtt tcccagtcat gcaagcgatt tttccggtgg gcatggggta tcaagtgcac
+   235201 cagaggatcg gagggaaatg gattttcttt cggttgggtt gcgttgcgtt gtctatgact
+   235261 cccatcgctg gttatactgg cttagtagct ggcacagtga gccaattgat catttgacca
+   235321 tggaatttac atacagtcca tggtggttag cactttttgt ggttttgcca atgattgcta
+   235381 aaggaaaaga aattggtaac cggctgtggg gcgcaattta ggcgcctgct cagtcggtcg
+   235441 cctgccaatg taattgttta aaataaaatt actatgcaat aattttttac tacataaaat
+   235501 gcattatttc aaacgaaatg aacggaagaa ctgtgacacc acctaccgga aatctagtga
+   235561 atataaaagc tttaaaagct ctgtaatccg cgccacctat aggcaaatat atcaaggtgc
+   235621 taaagtcgac atctactaaa atgttgcata cttttaggga tacacgataa gcacagtggt
+   235681 gtcgctctct agcggacgta ttcttaaacg tgccgcgcta cagcgacacc taggggagca
+   235741 tgtcaaaact attttaaatt tagaagaagt cagtttttcc caatactttt taacattaat
+   235801 tgggaattta tagatctaaa aaaaattagt gggagcaatg aatgtcttca atgtataaaa
+   235861 tgttaggcag gcaaaagaat tgataacacc aaacctccta ttcctcccac tacatacatg
+   235921 ctgtatcctg ataggcgagc attgaattgt gccccagaga ttgtaaatca cagattaagt
+   235981 gcataatgac tgtattttgc cattcaatgg aatgtaaagt caatctttat tcagcaacat
+   236041 ttatgaatat actgttccca gcgcttacat ttatcgattt taatttagta aatttggaac
+   236101 tacaataaaa cggttattct atacatatat agtgcctaaa acattttttg ttactaagtc
+   236161 tgtgggtgaa ttagggttcg tgaaagaaca ataggtaatt aatatttcat ttgagcttaa
+   236221 atggaaacac agtgcctcca ctttaagata cccgttattg agccaaagga tatttattca
+   236281 gatagtcagt gttaattaaa atagcagtgc ttgggacctg caagttaaag atattccatg
+   236341 aatttcggta tatcatattt tctttttttt tcagtcttgt gctctaccag atgatcaaga
+   236401 agataaatgc agtaccattt gctaccctac tattaagccc ttgttaaagc atgtgggatt
+   236461 gtgccatcaa aataatgaag tggtttacca actgcaagac aaaattcgcg aacaggagat
+   236521 cgaaataatt ttttgacgag attgtcgtcg gcggcgagcc tgaaggttat cggcttaagt
+   236581 cgtttggcaa acctaagtcc gacgaattac tctttattaa gagtcaagtt agggccaagt
+   236641 ttacgacttg tgatcggaac aatgccggaa ctaaaaccat ttatgctgtt atgaaaaaag
+   236701 ccggatggtg ggatcctacc aattaaggta ggagatgatt acacgcttta catttataag
+   236761 cattttaatt caacttcttt ttaaaacttc tgaggttgaa gaaattctga tgctaaatcg
+   236821 acccctaaaa tgtaacattt cctttcggaa tgacattgta gatatattaa attgagctaa
+   236881 gactgaaaac cataaaagaa tacttttcaa aagattgtaa ctttatttta ttatacttaa
+   236941 gaaaatggtt gtaaattatt tatactcaaa agcttcagtt ttgagattcg gggaaatttg
+   237001 gattttcttt tcatttggcg atatgttgtt gctgttataa gtattctctt cagcaattat
+   237061 aaatctaatt taagaagtat atgaaatttt aaaagctaat ttataaagtg ctaccccaac
+   237121 aaatagaaat gcctaagaat acttgcaagt gcgtcataga tacttatcaa acatgccccc
+   237181 atttgctggg ctaagtgagg cgaataattc atgtcaatat caaaactcat ttacctgcgg
+   237241 ttgccccagt ctctctgtct catcaaaaag cttgtgacgc aaaaaccaac taagaagtag
+   237301 atcaaacgag gctgtggagc ccaattccat gggttggggg tgcatggcaa ttctaaggcc
+   237361 aattgaggca aatgcgttca accgcaatgc ctcttataag ccatcgttga caacccagtt
+   237421 tggtgactgc gaaacgaatg tttcgaaatc tcgatttgaa atatggaaaa gtttttgtct
+   237481 catcttgcca cagataggcg gcaatgccga caacgaagcg attttaatta ctttctccca
+   237541 gtcattcatt catttccact tgttgcccat cagttaaaag tcagaagagg aaactcacca
+   237601 aatggtaatt ccatttccat ttcggacatt tcacgctcga aaggatttta tcccacaccg
+   237661 accaccgaaa ctgatgtcta tcgtcctgtg gccattacaa cttattgcta ctactgcggc
+   237721 tacacttaat acaccaaatt aactttttcg tggggggttt ttctcaacgc cccctttcca
+   237781 acaccgccaa gcatgccgcc cactcgaaaa gcaaggacgc gtttatctgc cggttatctc
+   237841 gccttttgtt gattaatagc tttgcggtaa ttaatttgcc atgaatgagg tgcatttgta
+   237901 tagaacgaga tggattggta tggaaataaa ctcgaacgct gaggtgcgct catggatgtc
+   237961 aatgtgatta ttatttaaat caagaggatg gtttgggaaa tatttgatct aatgagatat
+   238021 gatttactat gtgttaataa tcttaactct gatatcagaa aatattttgt gacgcaaatt
+   238081 aggatgtggc aaaaaagtat tgttatttgc cattgataaa ctgtgatgat tgtgaaatga
+   238141 attatatggg taagatataa atacactagt gaggtggagt aatgaattat tcgaagctac
+   238201 ctttcgcaaa tcagttattt aaaacaagct tatgataagg atgtttgtac tggaatagat
+   238261 ttaaccaata gtccttcatc cttggcagag tccttgcatt acctgaaact tttctcatta
+   238321 tatctgcacc ggcacagcaa cgaacagtgg aaaccgaagt actccaacgg cagagccaac
+   238381 tttctgcccg cccacagctt caggttttac caatgacttc cgtcatcacc gacacactca
+   238441 aagtgtaaat ttgcatttgc ttctaattac gagcgggtac aagaaattgc ccagaactga
+   238501 ggccggagaa ggcccgcatt tgcagcattt gcagcatcgg agcatcttgg caactcggaa
+   238561 acttggcatc tgcaaatgga ggagcacttg cggcaattgc aatttagaat ttcaattata
+   238621 tttatatttt ttatggggtt ttagggtggt gcctaggtgg aaaatgcccg cacacacgga
+   238681 agtgctggaa aactacaaat caagtgccac gcacgcaaat ttcctttaat taaaaccaaa
+   238741 cttttaagag cccctcgtct gcatcttctt cacgttcctc ggccgcagat gcatcttcac
+   238801 ttggttaccc aactgcaaac tactcgggat caacttaagc gaatccaagg cgacataatc
+   238861 ttcgagtgag gcccaaagtg cgggccgctt ttgttggtcg cagggcgaag ggattaaagg
+   238921 ccaaagtttg ctcaggtgaa gaaatacaat tagcgagtag gacgaggaca ctgtggcaca
+   238981 aatgaagtac cgaaattcat tttaaaatgt cttgtcctta tattacaatg aaacttacat
+   239041 gctatttttc ccaatttaaa ttgcacggca ttactaatta ttcgaaatta ctgtttaagt
+   239101 cacaaattag ctgcaaaata attaagttgc taagttttta atcaattctt accaaggatt
+   239161 aagcaaggtt attgcacgac tgtaatgaac tgcatagata aataaaaagt tttacaaggc
+   239221 ataattatta gtattattaa ctggttcatt tacggattta atgccctttt tttggggctc
+   239281 ttttggtttt aatgtgccta aaattagctg gggtcaggga atgccactcc tgattgggat
+   239341 tatgaatatg aagtcaaagc ctacaaaaat ctcttccaag tttggttttg taagtaaaat
+   239401 tcagttttct tcgcgttcca gccggaagtg cacttagtat aaaaaaaaaa acgcggtgag
+   239461 ggcaaaaagc gggccaaaag gcaataaaga aaacttcgcc gcccttcgaa gcgatgcgag
+   239521 tgcagtggga aacttttcta acccatatta aaacatattt gcgcaaaaat atgcagatgc
+   239581 ccgcagagtg caaggaagtt ttcacgaata tatgcaggtt cctgccatat ctatatatgg
+   239641 acaggtggcg gcggtgcgtt ggcagtcaat tgctcattga cttggctcaa aaattgttga
+   239701 gtgtcgagtt ctaagtccta agcccctgct aacatgttgg cgaaagttcc cggcaccaga
+   239761 tacggaagag ccgacatctt cctcttcgga ataccatttc tttcgggctt agtttccagt
+   239821 tgagtttacg tgagcttgga tttcaagctg tgcgcgcaca taagtttgcg gcttaagcat
+   239881 gtgaaattca tatggatgga aatggaacca cccacacgcc accccacata ttcagctcca
+   239941 ggaatttgca cccacctgca ggtggggcat aacattctga ctgggctaat gcattgctag
+   240001 ccacccaaat aacctaagac attgcgtcgc acgggggtgc caaagaagtc aagccaagtt
+   240061 actagtgcca aaagtttaaa attaaacgta gttatatcaa aaaaaaaaca gcttggttgg
+   240121 aaacgtaggg tactactgaa ctatatataa cgaagaatgt gtatcttaaa tagtttaaat
+   240181 gttttttttt tactatttta tttaaccttt ctttcattcg ccaataccaa cttaacgtaa
+   240241 atcaagaaac taacgtagtc aacagtttgt tgtttaccta tcggtgacgc aaatatttaa
+   240301 ttataaattc agctcgatta aagtttaaaa gagcaaagaa tgtcccaaaa atgttgcgtt
+   240361 gtaagcaaaa ggctaattat gtttatgcaa agtgtttttt ttttgaactt atggaaccta
+   240421 atggaatata atttgaacaa aattattgaa aattgacggt aattgaaatg aatttgaaca
+   240481 aatttcagtg cacggctttg ggtgtcgtgt tgggccacca tcaaaggagt caaagagcgt
+   240541 tgaatgttga atgttgcaac tttgacacgc gacgcagacg ttggcccaga gtgttttccc
+   240601 cggagctctg caattttaag ggcctaggcc tcccgaaaag ggttcattag acattttacc
+   240661 aaaattactg cacatttcca aaaagcaacg gctatatgat tagttttgaa gcggtggttt
+   240721 atttgaagta gagggtttac tttgatttaa gttaagttga gcgagtttgc gttgtaagtt
+   240781 acttgaacat taaacggata aagtcactag tacccacact aaacagctta cttgccaacc
+   240841 gggaagaggg acttgtggtg cccgactaaa cacaaattcg atgaaataaa aataaattag
+   240901 ctgctttaaa tgagccaaat aaattcattt agggttttat gcgctccact gaagtatgcc
+   240961 aaaaggcacc gtcttcgtcg tcgcagtctt tcgcatcacc ctcttttcgg atggcagggt
+   241021 taagagccaa gttaaaggca acacgtattg ccaaactcga ttgccgctgg cgccgcacac
+   241081 caaatgcaat ttcacttcac cctcgtcgag gtacggtaaa atttccaggg tctccggtct
+   241141 ccggtcgcca gtattcccca ttcgctgcag gtgattcccc atcgagacac ttatctattg
+   241201 agtgagcttg gttcgcttcg tttggaccgc tcttgaagtg aacgccaacg aaataagaaa
+   241261 acaacaaagt ttcttttgat tatgtcaatg gaatttgttt atctcaacga aatgtaaccg
+   241321 aattgaaaaa aatatgtttt ttttacgctc atttgcattt acttttgccc ggatataaat
+   241381 gtggcaattg gaaggggtga tttgttcaaa tgtccagctc agtaagattc aataattata
+   241441 tatacacgta acacattaat gtgattctat ggtaaacatt agagtaagga agcaaatttg
+   241501 ttttgagaac cccagaaaca aaggcatcat caacaacgtc aagtagcatc aacaacgtct
+   241561 agtttgggtt caatatattg attcttggaa acgtgcctct agaaactgaa tagcttagtc
+   241621 ataaattcag taaaaatact atttaaatat aaattggcaa actattaaaa ttcatatcaa
+   241681 aagccacgca cctggcgcct gttcccctaa aaaaaaatgg aaaccggtca ataaccacaa
+   241741 caaattcaaa accggattgg aagagaaacg ctggtcacct gtgaggaaag cccccacgtc
+   241801 tggtccagtg tctgggacaa gtttttattt cagtgcgggt aaagaacctc cgaaattcaa
+   241861 acattatatg aattcaaaaa aatcgtataa atagagaaag acgtgacaga aagggcacag
+   241921 tatcgagtgc aacatcgaag catcctcatc ttctgggata ttagtacaaa ttgaattcat
+   241981 tttcaaattc aatatgaaat tcttggtaag tttggccacc agagctacac ctcgcaggga
+   242041 ttaaggactt tataaaaata ataaacggcc attcaggtga tcgtttttgt ggcccttatc
+   242101 gccgttgctt ccgcgcttcc tcaattcgga tatggaggat tcggtggatt cggaggattc
+   242161 ggtggccaac agcagcagca agaaggattc ggcggtttcg gaggcttcgg agaacagcaa
+   242221 cagcagcagg aaagcttcgg tggattcggc ggatttggag gaatcgagca gcagcagcag
+   242281 cagcaacaag gaggcttctt ctaaggctgt tactttagtt cctcgactaa aaattaaaat
+   242341 aaattatttg aacgtattag tataacaaac acaaaacagt ttttgttttt aaatactcat
+   242401 ttacatgttt tgacaatggc ttattgtagg cctttagatc gacagagatt ttattaatga
+   242461 gtaaccttca tttatttcgc ttgcttataa atacgtttgg gtttattttt ttggacataa
+   242521 gcaacatcaa gaaaaatgaa ttattaaatt aaattaaaaa tttgtgtttc aacataaaaa
+   242581 attgttttta tttagctaat cgaaaatcac ttaatgggtt aatgacattt ttgtgctata
+   242641 acaacattaa ttgcaaacac ttgaataatt cgcaaatgat gttggcataa aaaccaaagg
+   242701 cagatactga ctcatgcaaa attgtgaaat aaccaaatgt tctgacagca ataatttaac
+   242761 aaaaaaccta atttgattag cgaagagcat acgccaagaa gagctgattg caaacctgta
+   242821 tttttagtgg gcccgtcaac cccagatact tgaacgtttt caacaacaga aaaaatcccc
+   242881 tataaacaca aacgttcaca atgatttgaa cagggaagtc cgagcggcga tcgcaccgtg
+   242941 ctcaaccagt tttgaatggg cataaagcct aacaaaaatt cagataaata ctcatagcca
+   243001 gcagccgaaa ctcaaacagt tgattttagc attccggtga aataagattg ttcgcttgag
+   243061 gaatattttc aatattcaac atgaaattca cggtattttt cgctaaaaat tggctaatat
+   243121 agacatctac gttatttttt agttgaaact aaatgtattg tactgtatgt attatttttt
+   243181 tagataatcg tttttgttgc gctcctcgcc tttgcatcgg cccagttcgg tccgtttggt
+   243241 caaattatta ggggtattga acgatttgaa ggtggtctgc aacaacaaca gcagcagcag
+   243301 caaagcggct ttggcggtgg tcagcagcag caacagcagg aggagggtgt catcttcaga
+   243361 ggtcctttcg gcggcggagt ggagttcttc caggagcaac agcagcagca gcaaggcggt
+   243421 ggaggtcagc agcaacagca gcaggagaac ctatttaact tcttcggcta aagctcctta
+   243481 gtgagggaaa ctagacatag actgaccact atctatagaa ctcaaaagca agccattaac
+   243541 taaagccatg atcgtagaga taaacacacg ataaataaat atttttaaaa acacacaatt
+   243601 ataaaccaag acataataat gcttttctat aatcctagtt taaatattta atactagttc
+   243661 atgcaaaacc aagctaattt tagtttaata aagcaataac gctttgtcat ataaaattac
+   243721 acgaaatcaa atacaaacaa tttgtttacg aggccaagga gcagacaatt tgcctgctat
+   243781 ttggatgtga attcagcaat gacttcgtgt tttgtgtcaa aatcgcgatg atggccatgg
+   243841 ccaattgtta tggccaaagt tttaagccaa aaatcgcaaa actctaatga gtgctatcgt
+   243901 tgaaatcaga ggagagcaag tgggtgtgga agggtcggtg gcatttttgg ttcctttatc
+   243961 cgaaagcacc gaaacaccat caaagtcatt aagcggaaaa cccttttggc ttcttttcca
+   244021 cgcattttca tttcgccttg taatgacttc atcccggctt cagagtcttt ggggttttgg
+   244081 ttctggtttt gttattggag acaatttcga agccatcctg tagttgcctg tctgacaaaa
+   244141 caacgccagt cagctataca gccagcgcca tccaccaaga aagccaagag caccaacact
+   244201 cgtaactaca aaacaacttt ttaattgcaa ttaaatacgt aacaagtttt tagttgtaag
+   244261 tgcgactacg gagcacacag ggaacttagc cgtggcaata gtgcccattg gtgccggaat
+   244321 attgttgttg tgggttaccc ttatctatcc tttgctatct gctgaatata tatacaaaga
+   244381 tatatatata tatagatata tatactatat acgtatctat gagattgcca ttgcagtgtt
+   244441 tgcctcactc gcactctgtg tgtattgcaa aattgagttg tgcgctttgg ggttttaatt
+   244501 gttaattgtt ggctcagcgg gcagcgcttt tgacgtcgtt gatgacagtc gactgtgggg
+   244561 ccagaacgat gtggattggg tcgtaagaaa atcgcacttt ggggattata ttgaagtcgg
+   244621 tggagagtgg aaagtgggaa gaggaaagtg gagaaagata aaagattctg ggcccaatcc
+   244681 tctgcgagga ggcggaagaa aaggatgttg cggattaggg aattaaattg aaacccctaa
+   244741 tgattttcgg gttatgcggg gaaacacaca ttttgcggga acttttaatc aaatcatcaa
+   244801 gttcatcgag gcatacgcat gcaatcatta cccgaacggc tttatgactg aacaattaag
+   244861 aagaaaaggt tgaaaactgc tggtaaattt gcttaataaa ttactgggaa agtccgattt
+   244921 gaaaatagtt tagctaaagc attgcactct gatagacgca atattgtaat ttttttatat
+   244981 gttcgtaaag atttatttga ttctctttag atgcaatcgg taatcgtatt atgtccgtgg
+   245041 aaagtgaagt atattggaca ttagcatttt ccatacattg ttaatatttg atatgccatt
+   245101 taatttgaat gcaaattaaa atttgatggg gccactgatg ttgtcgctta cccacttacc
+   245161 ttttgcatct aatggatctt tatgtgcacc cacttgggca gcggttaagt aattcgcccg
+   245221 gagtcatttt gtgaggccaa agctgtgcca gtaacctcta agtgtctgtt tttcttcacc
+   245281 tgtaccggga ttcggtattc cttgactcac taatttggcc gcaatttaca taaaggacgc
+   245341 gcggccccaa atgttgttag ccatggtaat attccgtttc ttcgtcgtta ttttgttgtg
+   245401 tattttttga agcacccgcc gaaaaagaaa gcgtaatttc ttttgccatt caacttgatt
+   245461 aaattagagt aaagggagca aggcatagcg actgtgtact aatgagaggc agctgcagtt
+   245521 tagtgcccgt ccgagaatat tatccaactt cccaggactc caacgccatc tctactctcc
+   245581 aaacagtgac agatgtatga cttggcattt atagttttga gccgcaattt tcagtgattt
+   245641 atttatgcat gaaatgcaaa caaaatctga ataattacca acaaaagcta actgaactga
+   245701 cagttagaca gccagcccaa acaaagcaag aaatgcgata cgaaccaaag cggcggcacc
+   245761 gactatgtgc gctaaggata gacaggacta caggcagcga ctcttgagct gtcagaaggc
+   245821 atctgtgtgg gaatattata atatttgcaa tctgcaaaat gctactttcc ccatattcat
+   245881 atgacgagaa atgagcgggc tcagacaaca aagcggcacc agtgacatgc tcaagtatga
+   245941 cggaagctgc aggtctttgt tttgaccatt aaaagtactt gatgtgctta aaaacgtttg
+   246001 caaaaataac tattatattc gaagtagtaa taatgtgaaa tcgtttgggt caatctttac
+   246061 gtacacaatg gttatgaatc ttcaaccaaa gttgttattc ttcaatcagt ttccaagtta
+   246121 tagtccctgt atgtatttca aataatctga taagctttac atatttataa atatatacga
+   246181 aaatttgcga cgaaaatgtt actcaaagat aatactgagt acaattataa aaacctttgg
+   246241 caaatttaca tataatttag ttagtttcaa ttaagcttaa acactgagaa gtgatttata
+   246301 attagtttag tgcgtgacat gaataacagc acacttcatt tccggccaaa gatcattgtg
+   246361 ttgaactagt ggccgctggc aatctaataa caaagtttgc gcacttaaac actttggcct
+   246421 tttggcttca actggatctg gccacgccca cttagggctc cgccccgaga aggaagtggg
+   246481 cggaagagga ggatgtggta gaggagttgg gatccccatt ataccgccga ctttctatgc
+   246541 ctttgcagtg cagcagtaaa aatgactgca cacagctgtg cttcatggct gcgatggcga
+   246601 atctggagaa ggctggcgct actgcggcag taagatttta ttagcggtgg tattagcagc
+   246661 gacagcaagc cagaaaggag cccatcctgc cactcacacc cactttgcca tggacagcca
+   246721 tctacagtgg ttggcgttgc atattaatgc aggcataaaa atatttacaa cttttttatg
+   246781 cagcaaacaa catttttcgc taccgctgcc gacgggcgtt ttgttttctt ggcaaattta
+   246841 atataatttc tggtcgttta atgagggtca ttcaaaagtg ttcgtcgaga aaacttccat
+   246901 tttcgccgag tagccggcgg gcacaacaag tgtgccggcc attatgagtg accctcggta
+   246961 gggtgtcgta aaaatcgctc aagaacatta tgcaaaataa gagatgcggc cataaagtga
+   247021 agagagtcta aaggccccga agcacttgaa gagcaaatcg cgcccactgt gccgatgatt
+   247081 ttcggaacgg cattgtgagc agtgaacagt ggaatccgca ttttggagtc gcctacggtt
+   247141 tttgtctttc gcttaatccc tgcgccggct cgcaaattcc aatgctggtt tctgtcatcc
+   247201 ccaaaagttg ctcggcccct ccacaaaaaa ggatgcgggt gtcgtggccc ataaccgaaa
+   247261 ggattataca aataaaaata aaaaatatat ataaagaaga ggccatgggt gggtgaggtg
+   247321 ttataaattt gggacagcag gtgaaaacaa tttggtgcat aaagttttcg cttagcagct
+   247381 cagcgcgttg gcgacccaag gacctcgtgc caggatatgg ctctgacttc tgttttgcgg
+   247441 cccaatgcgg tttgacacaa aacggagaaa acaaaaaggg gcctaaataa cagaaaataa
+   247501 aaaatctcaa agaagaaaaa catcaagttc aggaaaaagg gacgagcata cacaaaaatt
+   247561 actgacttgg caggcatatt tttgcatttg cagagctacg attgtcaaaa aggaaaaatg
+   247621 tggtgtaaaa aggataaagg atgtagcaat tttctgctgg tattagtttt ttttttaaac
+   247681 caaaatagtt attgcttagc aatataatga actcatactg attaataatc catatagaga
+   247741 gagtagcttg tagtgaataa tgcgttcttg aaatttttaa aaaactttaa aaatggcaag
+   247801 agtaacagcc tacacactac agtattctaa tcataggaaa tataaaagaa taatctgcac
+   247861 cattacaccg tcatgatagg aataatcttt cctgacaaag cccctaatgc atttcccctt
+   247921 ctaggtccat ggtcaatgca gccactttag gtggctgaga agaccaaccg gcaatgacag
+   247981 cgatttctca tgttagacgt gcctttttat gtggctcgga aattaaagta atcatttttt
+   248041 attgcgcgaa caaacaaagg cattatactt tacagctctg gctaagaggg agagatgaca
+   248101 aacacaatgc agaagggaga atggagacga ccacaaaatg cactgaagga caattgagtg
+   248161 cagaaatatg gccaaaagcc ggagagaaac tagcaaacgg atacgctggc cgaacaggca
+   248221 aaccaaactg aaaccatcta gaaacaacaa cagccactaa cagtgatgtc gaatctttag
+   248281 tttaaaaata ataatactca acttgtccaa tatatttagg ctcatgtcaa tgaatttgct
+   248341 aggaaggaat tgttttgttt attttatatt ttttatacac gtctaaaaac cgaattaaga
+   248401 tgtgaactgc ttttaaaaga taagtttgaa ataataaaaa tctttgccta taaataacta
+   248461 caattctatt cgatgtccaa gttcagacta ttcaggcgat gcaaatttta ttttaaaacg
+   248521 aaatcacgac atatacaaca atatggaatt ttttttgttg tattattttg actaatgagt
+   248581 gtaataaaaa tgattgaaaa catttagcta aagtacaaaa attgtcaaca ctgaccacta
+   248641 aacgtaacca gccagcggta gtggatggac ttgggttttg gacgtggacc cggatcgtcc
+   248701 aactacccac ttccacctcc cacatcccag atcccagcta accattcgct gctcgatgct
+   248761 cgaagctgga aactcggacc tgacaccact cgactcgcac cctcgacggt ccaagaacaa
+   248821 aggctttggc agctgcgaca atgacagcag cgaaaataaa aatatgtaca ctcaaacata
+   248881 catagagagt ctggggcggc agagccacca acccgccgcc attgagaaaa ttgcattaaa
+   248941 atctatggag tgtgcataga aaacggagga ggatgttccg cccgaaaagg actggggact
+   249001 ggccgggtgg cgacggagtg ccggagtgtt ggatagtttc agtttttcgg aactgcgaaa
+   249061 tggcgaattc gtgaaaaatt aaaggaaaaa ggatatgacg ccacattgca acatgaacgc
+   249121 aaaattgcaa ttttattata ttttaacaat gcacttcgtt acgtgattcc cgtcaggaat
+   249181 tgtcctcatt cccactgcac tccaagtaaa gccaagagac aaactcttac cttgtacgct
+   249241 tcttccctac agatacgctt tatatatacc tagatataca tatatatgta tatagcgttt
+   249301 tttttttatt ttggtggttt ccctgatggt tttatgcatg caatctgaat gcaggacaaa
+   249361 ttcacagtta acagtcatat tttattgagt ttttgtgaat cgtagtcata aattcattta
+   249421 gattgtgcat gaggaaatgc atttcgcatg catttgttgt aacacaaatg tgctttctga
+   249481 gatttatttc gtttcgaata aataaagacg tgttgcatag cgaaagtata agggaatgaa
+   249541 taaagaacag agaaatccat tgggaaataa ttgaacttga agtattttaa agtatttact
+   249601 taaattttga tcagaactaa actgttgaaa ttaggataaa taaaggatat agtttttcaa
+   249661 tataagaaag aggctcctgc atttaaacat cactcatgtc tgtttttgcc atgagatggg
+   249721 ctttggcaaa tttgtctctt ttattttgaa tgttttgttt accaagtaat ttttcataat
+   249781 aggctttgcc aactgattgt tgtgtgtttg ccctttaaat accagaacaa ctaaagccgt
+   249841 aaataatacg tattttcttt ttcggcctgt tacgttttcc tatggtgctt gcttttcatt
+   249901 tgcgctagaa aaatattatt tacactttat gatttcttta tatagcacta attgctgcct
+   249961 ttattttatg tttaacccct aagcttggtc tttgcctttg gcagtaaaat aaaatgctta
+   250021 aaatgcgaag acctggaata agaataagca aaacaggcca agatggaaag caagacaaag
+   250081 gaaataaaat gaatgagata gaaaactttg acaaaatgaa aaccattttc ttaaggggaa
+   250141 tatatacaat tagatttgct cagtaaggcc tcttctttgc gatcatttgg cttgaaatta
+   250201 taaatacatt ataatgtagt atataatgtc aatacaaact aagtgggtgc ataaacaggt
+   250261 tattaattgt gatgcaaagt tttttatttt ggaagagtta aaagtttgac agccgtcaga
+   250321 tgcaaagctc catcgtcgtc cttcgcggcg gatactcaga cattcgagag tgatctgcta
+   250381 aactcaaata tacacacaaa acgcacacac acatccatcg cacacacact cacatgaacg
+   250441 tagtcagata atataaaaag tttttccctt ctacggattt atttttatgc cttctgcctt
+   250501 agcatatgct tgtttatagc ctgtgctgct tatgccacag tgcttccttc gcttttcttt
+   250561 gctcgtgctg gttttgctgc ttttcttgag cgtgtaaagt ttatttttgt tacttattat
+   250621 tattattctg gctctgttcg ttgctttggt ccttctccat cttattatgc gtcagtatat
+   250681 gtgtccgcgt atgtgtgtgt atacatatct gcgtacttat ttttcgtaat taatttcgtg
+   250741 cttcgaaaca tctgcagctg gatttttact gcatccgaaa ttacgcatac gccccatgta
+   250801 atgcagactt gatggtttgc tttcagctac agccacaatt tcgcgcaaaa cgtagctgca
+   250861 aacgacttcg tgtttggctt aaagccccct ttctgtcaca ataaaaatgc caatttgtta
+   250921 accaaaattt aaggctgcca aggaatattt gccaacttcg caagtcagcc agcgcagact
+   250981 tgaagggagg gagagaaagt ttagggcatg ggtaaacaat tgacacaacg aattcttgca
+   251041 aattttatag gcaaaatacc gtcaacggaa ctccactgca ccacacggcg tatgtgcaat
+   251101 gcgccgctga gggcgttggt ggattgggaa ttggaagttg aaaaaccgag aataccgcag
+   251161 acgacgactt aggcgaagta ccacatgagc agaagaagaa atctactcag ggcatatgac
+   251221 ataagtgtac ttgttttgtg acattgttag aaaataattt aaaatggatt aggtggatta
+   251281 aaaggcttaa atacatgacc aaaacacaaa taaaaaaata gatagtcagt aaagtttact
+   251341 tcaaaataac gtaatcttta gttattagtt agttagttaa atgaagtatg ataaatgttt
+   251401 tcctaaaact aatatcatct gacagtcact caatcaaaat ctgcatcgtt ctttttaaat
+   251461 tttttccaac tgcatttgcc tatgagccac tgcagtgtcg cgtcgttgct gcgtaaacaa
+   251521 accggtggga aaatatgtgg tgggaaagcg aatatcgggc tgaagggaaa atgccaccga
+   251581 tatacatata tatttacaga cggttttcct cgagggatag ctaacgttgc tgcagcacct
+   251641 tagccaaact ttgccttaaa tggaaatttt tgctgtttgc cccgagcgca aatctttttg
+   251701 agcttgagtc cccagctccc aagctgtgag ccataagctg aaatatgtcg gccaaaagcg
+   251761 agcgagtgca gcaaatcaga aggaaatttg ccaatgccaa gcggcaacaa ttttcaggat
+   251821 aactaatcca ctcacacact tcacacttca atttccgcac ggttttctta cctgtctcat
+   251881 ggtggggaga aaattcgaag ttgtttgtta ttgccgctcg ttattgttgc catgttccat
+   251941 gtcgttttcc atttcgattg tgtcactaac atgttgcgca tccacgggcc aaagggggcg
+   252001 gcaggcggct gggcggttac cgtgggaggg gggcgtggca gcggccaatg atttgaggag
+   252061 tgcgcccagg caatttcaca acgggcacgt atgcaaacag tggccaaaaa gtgggcgttg
+   252121 ttatgccagg gcctcgaaat gggaatgtgc caaactgtcg gctgcaattt tgtcggctgg
+   252181 tggcttcaga aaaacgcata aacagaaaaa agcacgtaag aacgttgcta gatttccccg
+   252241 actctggtat acccactaat catattaaaa cctcgaacat accactttca aagctgcctt
+   252301 atattagtta gaaagatggc atcaaatctt attggcattt attttacaat ttataattta
+   252361 agaggcgctt gttttaagta caaagtagta acaattaaat tgaacacata cgcatactta
+   252421 taaataataa ataaatttaa gccctaatat tgaggtaaag gttggaaatt ctttcaagta
+   252481 tgaacggtaa cctatcattt acctgtacca agtataccca aattttagaa ttgaagtaga
+   252541 gggtacctag gcactaaccc gaaggactac gatatgacaa attcagctgc gtatgtcgtc
+   252601 tccttgggct tatggctgag tgaagagaat aatggccgtg ccctcagtac aatttccgtt
+   252661 gcatactctg tgcgacattc gaacaaagag gaaaaaatcg ctccgtgcga aatgggaggc
+   252721 agatgtgcaa atccatgcct aactacaagg taagcagaaa tcggaactgg ttgcagattg
+   252781 agctgttgcc aaaatttccg aaatccaaga ctggcttaca gcctgaaggg tgaatactga
+   252841 agtgccagat ggaaatgcat cagaattgcg accagatttg aaacgccgtc aatcatcagg
+   252901 ggaattggaa atcatttatg cttagagcta ttcttaagat tctttgtttt agacactgtc
+   252961 accaaatgtt atcagttatt aaatacgttt cgcaagtaat tgaataaatc gaataaaact
+   253021 tgaacttttg caagaatatt ttatttgggt attgtaaatt attctcatag agacaaacta
+   253081 ttcaagaccc aagccaccgc aacttaaaac atttataatg cccagtaacg ttatgaaaat
+   253141 gcatttttcg actgtcccca cgacaatgag aaacgagaac aacagaagcg aagaaaaaaa
+   253201 ctcccccgac ttctggcata aaatatcaca cgttggaaac gtttttcgca gcatttgtgt
+   253261 ggcatttaat taaatgcatt tttattttat tttttttttt taatttcacc caacgtcaca
+   253321 taaaatatgc gaacaaaaaa ataggtgtgg accacaagaa acaggtgcga aggagagaca
+   253381 gtgcaaaaag gtgttaatgg tgactatgaa attccatttg catacactct caagttacaa
+   253441 aaaataataa acgaaaagca tagtttctaa aaaaagcgat aatttttaat gcaattttac
+   253501 ctttttatac gtgtgtgtat gtctgtgttt tttgcccctt tcggctaagt aatggtaatt
+   253561 aggcgtggaa aaatccatta gcccaagccc ggtcggaaac agttgccaga acataaggcg
+   253621 gagcccttcg agtgctaatt tgaattattt aaatttcaaa attgccatca aatgctccac
+   253681 acgttatact taaaatgtaa atacagaaat cgaacagaat atatatagtt ttatacccgc
+   253741 aagaggcaaa cataatattt taattggagc ttttgataga tattagaaaa tatatacgac
+   253801 ttttaaaatt agttcaaagt tccgactgtg tgaatgccta ataatataca atatgtttta
+   253861 tttaatttat ttattaatat gaacattcaa taaagtactt gctttcttac tatgtttgtg
+   253921 gccataacca acgtaaaatg ataacttcga ataacacatg aatagctaaa ccaattaaaa
+   253981 aagcgaaaaa tcccagcata accactaagt gaactttgag ggaattatca acgttttccc
+   254041 tagtctcttc ctgaatgcta gataccgtgt gctttccgaa gtacaaggtt acaaacgcca
+   254101 atgtaatagc cgttttgcag atccacacgc aagtgcgttc gaactgattc atgatgatgc
+   254161 ccccgacctt actttgcgat ggcccggtta ttgcattaat caaataccaa ctgaaataaa
+   254221 gctgctctgc tcgactccgg aaccgcttgc accacgtata cttgggatat ctactatagt
+   254281 tgccatatgt aaagtacttg gatcgcataa actgtatgtg actcccgcct tcaatccagc
+   254341 tctctacgag gaatagctgc tgaactttca acttcagatc caccactgtt atggagcgac
+   254401 agtaccacga tggataccgt ccattgggaa cgagacgcaa tgccaaacgg gttggtatat
+   254461 agatatgtcc acggggaacc attatcttaa tggtgctatt tctgagtaat ttcggatgtc
+   254521 ctggatcctg agtaatttga tacgatgttt gctgacgatg cggcgattta aggtaaaact
+   254581 ttacgtttga tgtagttccg gcaaactcct gaccaccagt tacaatcacg agtagaatct
+   254641 ccgaaccact cacaaacgat tgatccgact ttccaactga cgcatcgcat tgcaatcgaa
+   254701 attccggatt cttcagatag gcagaaataa tgttgaggtg taagagaaac ttgaagacga
+   254761 gcagcagaaa aagcagagcg aaaaaaatca ttaggatcat attgaacgta aatatgggta
+   254821 cgggtatgtg ctcgacaaac aattgcgatg aaattggaaa aatacgagag gatagggctc
+   254881 ctaaggtatg gcaggtgcac tgaacagatg agccacgact gtgctcaaag gaggtggtgc
+   254941 aatgatcctc gctccagccg ggatccaatg aacggttctt ccagatgttg cactggtaaa
+   255001 tttccaaaag tatcgagtag ttcagaggta gaacctcctc cactatgtcc tcattgtcat
+   255061 agtcaataaa gtccgctttt tgtctcacct ctggtgtctc cgagccatcg tgcgtcctgt
+   255121 tccccttctt ccttccccgc gcacttaact gatccttgtc ttcggtttta tatcggattt
+   255181 cgcccggctt gtgtatggcc acatatgccg ggctccgttc acaggagttg gctatccaaa
+   255241 tgcgtttacc ctgcatatca ggtgttatcc gacaagcgtg ctgccttatc tgtccgagcg
+   255301 ttggccatct gtgcaatcgc atcttaatat acattagctc cgaacaattt acgattcgca
+   255361 ctgccaacat ggccttgtgg tttagcatca cactgtatat gtggacttct gtactattct
+   255421 ggatactatt attggtcatg tactgattga aggacgcgtc gttgaaatgc gtgatattcg
+   255481 tcttgtatac taaaggatta gtcagctgaa actccttggc ggatctaaaa aattgaacgg
+   255541 gactgtaagc gtgaacgata agcacactgg tcttcgaggg gaatggatct ggataccacc
+   255601 agtagatatt gtggaagttc tgctgcgaga tgatctgtag cagaatagag ctatggttca
+   255661 gtctgcgttg cagttcatag agcagctttg tcgataattt aataacgtgt atgtggtcga
+   255721 ttgtcttcac ctggatgtcc ttggtgttct ctgtcacctc agtcacatta atctggttgt
+   255781 aaacatgcaa aaaattcaga aaatgttatc aataaagcaa tccaattata tcaataagca
+   255841 aagtgaaaat gacgtttcgt attcttaaat atataatgcg aaaatataat ataatagtta
+   255901 taaatatata tattgggata tcaaaaatct tgtgatgcat tagattataa atttttcgaa
+   255961 tacatgccta cctggtacgc aatgccgctc ttgtggattg ttaatgcgtc atcatacgga
+   256021 tgatgtcttt gagtggctat gacgccaaga aagtaaatgc acctgtacaa gaaccatgta
+   256081 ctcatcagcc agctattgat ccgcaccaaa atgagtggat caaacttaat gacgaacggc
+   256141 ttctccaaat cgattatttg caaattcatg cacgcgggat tatgtagact acacatggcg
+   256201 tttgaatggc tttcggtttt ttttcctaaa atggacataa agaacatatg catggccgtc
+   256261 aacgaatagt agccctcccg atgaagccat tcatgctcct gaacgatcgc cgagaagctg
+   256321 tcgcccaagt ggctaaacat ttctgccatg acggtggctc cacgaaaatc gactggatgc
+   256381 agcctaataa gcatgtgcga gctcagcgtg gtaatctgct ccatctgcga ccccgttgag
+   256441 gcgaccaagc cagtcagata agagtagaat tcatctccat cgattgacgt ctctataaaa
+   256501 gtgatggcag ctattcccat gatataggcc ctattccaat gacctcgctt aaagaaattc
+   256561 ggcacatagt tcattacttc tttttgcccc gttagcatgc ttttatctat cgacaatact
+   256621 ttcacttcta cacgtttttc gacgcacatg tcaatggcat cacatatcga gatcatgata
+   256681 gtctctgcag cgggtagggt gagcacaatc cttgagtatg gcacattcga ggccacaact
+   256741 ccatatgaca ccatatagcg ataggttatt ggcgggaaag gcgactcaaa tccggggcaa
+   256801 tcgatctcga acattgtgaa gccctcgacg cccaaaaaag gatatattgt gcaactacca
+   256861 tgttgagggc cattgttccg gcgcagcgtg tagaacgcat ctgcccaccc ttggcccttc
+   256921 acccacactc ttagacggat ctgaacgagg ggctctagag tgtggagaat cagatattta
+   256981 tgagaggatt ccagagtggg ctgtgattct gtgtccaaca accaccactc atatctttta
+   257041 accgtacccg gacagtcatc gcatttcgag atcaggtgta tgctgtccaa cggcgcataa
+   257101 acacctagag cgcaatttcg acggcaaagg atttggggcg tgaaggttgt tttagcgacg
+   257161 ccttttattg tttgggttgt ggtcattcga ttccgcggat cgaagcgtga gatgacggtt
+   257221 aagctgaagt cgtaaacggc attctcctta agagcatatg ggggtaatct caacgtgggg
+   257281 cctaaaaaaa ataattttga tataccctaa agtaggtaat gacttattta tgtaagatgt
+   257341 ttcctatttg acagagtaaa atattgcgct tttatcaaga taatttgaaa aaaattctat
+   257401 tttacataac attttaatat ctgtcaatag tatttcccca acgcactttt caatatcacc
+   257461 gccattacaa atctagtttt gttcatcata ctcactcttg gacatatact ttttgcaata
+   257521 attgttaaat ggatcgtcta agctgataca agtccacaag aattgcttcg actttataga
+   257581 ttcccttatt ttcaacctgt ccttcgagct ggatccgtcg ataataatgg gttttctgac
+   257641 gtcgaccaga cgtcgctcat tgcctttgat aaggggttcg attgggggcg gtttgtactt
+   257701 catatagcac tgaaataaag tttaaaccct gaggggttat acatttcttc tagacttggt
+   257761 ccactcacac gagctaccgt agttacgccg tttattacag ctttacctcg cagcaaaaag
+   257821 actttccgcc tatccggttc gtttaggatg tgcaccttgt agggcagtag cttcaccacc
+   257881 agtcctgtgg cattttttag acgagcaata atttctgcgg ttagaatata tcaacattta
+   257941 ttcacttgtt tcaccatcga aatttaaaaa aaattaatag aataaagaaa ataaaactta
+   258001 ctgttctctg tgatatcaca taattgccat gttattttat catactccac tgtggagcat
+   258061 gcatcgctaa tggacgcaaa aaattctaga tgacccatgg gagagaaata tactggcttt
+   258121 tcgggatttt tgcaaaattc taactccaaa gctggtaggc acgacacatt tggtggtttg
+   258181 cgtactttaa ttttgaaacg cactacatgt gccaatagct ttcttctgtc gttcaaagaa
+   258241 ctaactttcg ccacaatgta tttatgtttc gtattgtaga atcgggccgt acatcgatgg
+   258301 aaataacaac gattgtatac gattcgctta cttcttatgc cccattttcg cattaacttt
+   258361 cgaagatttt tggtgtattt ccaatccgaa tgtatctcat caattaagta atcattggta
+   258421 tctacagtca ggttgacagc aaagtttttc tttcccaggc tcggcatagt tgtgaaaact
+   258481 gaatactctt ctccataaac tggatttcgg ttcatgaata ggtacacctt caatggacct
+   258541 ttttggccaa gcaattgttg gttttttaca gaacttgttc tgccattaac attcggtgca
+   258601 aggcaaggta ctgcgaaatt ataatcattg acagtttgat tttgatactc tgtagagctg
+   258661 ttatttatgc aggttgagct ttgattatag atgggatact catcaactag ggggaaatca
+   258721 gctaagctga atgagacatt tccattttgg gacttgcttc cttcgattcc gttttcatac
+   258781 gtttggtttt cgtatccata acttatttcg gtattgactg aactccacga atagtcttca
+   258841 tcattaaaga ggtgagttga attctcacct ggtttttgct cggaaatatt cttataagta
+   258901 cgtgcagcat cttccaaacc aaacttttca cttggtatag tgggatatgt cttagtagtc
+   258961 gtattatatt cctcaaagct agaaaagtcg tagcttttat taagaacggg atcccacaat
+   259021 ctctcaattc ttctctcctc ctcttcggga atcgttcgac gtattatttt atgacgctta
+   259081 cgacttaaca ttttagtaaa tctagtacgt tctgaaaaag agattttccc agcttcagca
+   259141 cttctccacg gatgagaagt aggcccgcct tcagggctct ctgtcgaacg attgtatgcc
+   259201 tccagaaatg tgccatccaa taaggacatt aatggggttc cgctgttttc catgatcaag
+   259261 actagcgaca tcgatatggt catgaagtac aggaacatat catagtttac acaatcgtat
+   259321 cgaaaaatat ttcacttttt gcttaatttc ctccattttc gattaaattt tgactcaaga
+   259381 agcaaggtaa ggactgtctg acgtggtttg gcaatttcag cggaccagct ctttcatttt
+   259441 tttaattgca taatactcga actgggaatg tgtggttgac aggcgacatg acattttacc
+   259501 gcagaaaatg ttcgtatatc ttgttggatt tttaattttt ttcatcacta ccctcttgtt
+   259561 tgcctttttc ctgttcctgc tgttgttgtt cttgtcgtgt cgctgcctac ataattgttt
+   259621 taattaattg cgaattgtgc atgcaaaaag cataaagtgc acttcaaatg tgtgaaaagg
+   259681 aacaaaagcg taaaaggctt ttctgcacag ctggtaattt attgaccaca atagcatcca
+   259741 gaatcggcgt catcaacaag agcgattaca gaacacggct taaccccaac ttcaaatcca
+   259801 agctcatccg ctttcccatt ccaatttcca tttacacagt ccatataaat caaatgaaat
+   259861 taactgagag tgagcgagtg caggaaaagc agcaaataga gtgttgaatt tttataaacc
+   259921 acaaaagcaa cgagcattga caatagacaa tcatacttta tttactataa acttcgcaca
+   259981 tgcatagtgt tttttatgcc aggaggagtg gccaggaagc tgttccaaga cagggcgaca
+   260041 atttcttaaa ttgttcgaac aaatgagaat ggaatagaag taatggaagc caaggagaaa
+   260101 cccatttaca ggcgagcgca cgtcgcataa aaaataaggc accgagaaaa agtggaaact
+   260161 gatttcactt cctctgaatg ctttagagat actctacccc ataaattgtt ctcatatagt
+   260221 cttgggagta attcagtttg tgacttaacc cttcttatct gttgcttttg aaccattaac
+   260281 aagtaaatct tataaatact attttgatat aggttagact atctttgagc tttatgattt
+   260341 gaaacaatta atatatattt aaaaaattga agtttattaa ataaagtttt tgacttgtct
+   260401 caattattgg ccgtgcgtac caaaccccaa aaaaaagagt ataaaaactt taacagtaaa
+   260461 gaaaaaaaag caaaaatgaa agaatggagc attttacgat ttagcatgtg cggaccaact
+   260521 gtgggtcggg tggtttgggg cctgatgaac cggcggacgg acggattcac atacggacgg
+   260581 agcggagcgc cagtggagga ccgccaagtt caggactttg gcgcgtcaag tgcttttcat
+   260641 tttccgagca cgtcgccgcg tcataaaaca aaaacgcatg aagcgcaacg gagaagacga
+   260701 agcggggcgt tcgggcacgt ttagcatata tattttcaca cttgcaatca tactttaagc
+   260761 agaaaagtac gctctcacat acggccaagt ttttacggta gagatggaat cttaaaaaaa
+   260821 aaacgcaaga aaacgcatat cttgaaaaca gcataagctc gatgaaataa atacatcgtt
+   260881 caattcacac ttctcttaaa cgaagaactt aaatttattt atttcaagtt gatccctaac
+   260941 acaataagta tgagtatctc aatcaaaaac cacagattct gcattaatcc gcgaaatagg
+   261001 attgagatgc attttatatg atattttttt taggatatcc tgccttgtat aaatcgaagt
+   261061 tccctgaacg gaatagttct ttctatgcca tccctaagtg ttacacactc atgactctga
+   261121 cacccagttg gtgtcgggtc gggactttcc cgtatcttca ttttcattca cagccaaaag
+   261181 ccgtgccata aaagtgcgtc gggattgtcg acaaactttt gctttcttta cttgctcttt
+   261241 ttttttaccc ttgtttgcca tcgtgggtgt gatgataata atttcaagag tcacatcgac
+   261301 tggcgcgttt atctgttttt ttttttattc gacggcggcg gcttcctacg actgcggctt
+   261361 ttgggttggc tttcattaaa atgtcataga ccccgttggc gcatcttaat taccaaaaat
+   261421 gataaaattt cataaataat gtgctttgct gataaattaa tattaatgcg agcggtctgg
+   261481 caggctggct gaaaagtgtc tcgcagtcgt ctaatagatg tcaactctga tatctgctca
+   261541 tttgacggtc tgacattatt aactgaatcg acttatcact tgtcgtcgag aagggttcgg
+   261601 taagaagaag gtcctggtgt atttgacatt aaaaggattt cgctagatcc gccatgattt
+   261661 ctgacactaa tgacgaataa tgttatctaa attatgtaga ttaaatattc gaaatgcgaa
+   261721 atagttgcga cgcgtgtcat tattttgctc agtcaaagtt caaagcgcta tgccactcgt
+   261781 catcctcatc gtcccttctt ccccttctcg ttattgccat cgccggcatg gaatgcttaa
+   261841 tggatctgaa agcagcgtga aaccattctg caatcctttt tgtgcccatt accctgccag
+   261901 cagctcatcc actccatcca tcacttggct gaaaactctt tatgctcaca aactttccag
+   261961 ctgaagcatt ttcgatggcc ccggggggat ggagttcggt ttactttgga ccggactggt
+   262021 tccggcggtg acgtctgttt cggccacgtt ggcaactttg catgcggttt gtctccggtc
+   262081 caaagttctc gggccctgcg ccctttaatg atccatgccg caaatgaatt gcagtccagt
+   262141 tgcaattgct gcacttgatt tgcggtgcga cttaatggtt aacaattatc tggtggcctt
+   262201 ctgcgggcag gagttccctg ttcctgaagt tgacactaag tgtgcggcag ttagcacatt
+   262261 gaccagattt aagtcttcag agttcccaaa tttaaacaaa cttaggtcct aggtctccaa
+   262321 tattatttaa agacgtcttt tttaacttat tcctatttag ataaaattca tagaattata
+   262381 tatacagcct tttcttataa atgaacttaa cttaggactt aacttgtgat tgacggacat
+   262441 tcacccacaa ctaaaccacg aaatgtgtca gcgacaatta caagaatccg aaactgtcgc
+   262501 acaccggcac ttggccacgc cccgcacatc cggagccgca tccgtagtcc catccgtttc
+   262561 cccatccgta cacccaactc caggctgcac aatatcaatt atcgggcttc aagtgtcgct
+   262621 caagtgtcac gtcaaacacc aacacaaaat acgagcaaag agcatcgagc atccagcatt
+   262681 gagtatctag catcgggaga cccgcgagca cacatgacaa tccagtttcc gccccgagca
+   262741 tcctggctgc actttgcaac ccatccgggc ccccgaaccc atcaggacat gctcctttaa
+   262801 cgcctccccc aatacgtgta tgtaagtaac attttgcgct tttgactttg attgattaca
+   262861 aaagcaattg agcatatcgt cacgctgggg ggagcaaaaa aaaaaaaaaa tctgaaatcg
+   262921 aacaaagcaa ggcaaatgaa ttaacataaa taaaagaggc caaggaacga attttcattg
+   262981 acagcttccg acaaaaggaa aagcgtaggg caattagaag tatccatttt ccgttttcca
+   263041 cctaccaatt ctaatgagca atgccttcta attttcatca aggcttctga ttttccaaat
+   263101 cgcttatggc tgcgagattt cggtgaagct tctcagacag gcttgccatt aattagcaca
+   263161 attgaagcgg gcttaacgac accctttgca aggctaatga aaataaaatt tcaattacgt
+   263221 ttaattgata aactaatcaa caaaggtcga caggcaatga ttgatggggt tttctcgttc
+   263281 tcttctttaa attttcgtag aagtacata
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AF032047.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AF032047.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AF032047.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+LOCUS       ALSEB2MP1                291 bp    DNA     linear   PRI 30-JUN-1998
+DEFINITION  Alouatta seniculus beta-2-microglobulin precursor, gene, exon 1.
+ACCESSION   AF032047
+VERSION     AF032047.1  GI:3264957
+KEYWORDS    .
+SEGMENT     1 of 2
+SOURCE      Alouatta seniculus (howler monkey)
+  ORGANISM  Alouatta seniculus
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Platyrrhini; Cebidae; Alouattinae;
+            Alouatta.
+REFERENCE   1  (bases 1 to 291)
+  AUTHORS   Canavez,F.C., Ladasky,J.J., Muniz,J.A., Seuanez,H.N., Parham,P. and
+            Cavanez,C.
+  TITLE     beta2-Microglobulin in neotropical primates (Platyrrhini)
+  JOURNAL   Immunogenetics 48 (2), 133-140 (1998)
+  MEDLINE   98298008
+   PUBMED   9634477
+REFERENCE   2  (bases 1 to 291)
+  AUTHORS   Canavez,F.C., Ladasky,J.J., Seuanez,H.N. and Parham,P.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (31-OCT-1997) Structural Biology, Stanford University,
+            Fairchild Building, Campus West Dr., Room #D-100, Stanford, CA
+            94305-5126, USA
+FEATURES             Location/Qualifiers
+     source          1..291
+                     /organism="Alouatta seniculus"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9503"
+     mRNA            join(<136..202,AF032048.1:67..345,AF032048.1:1162..1189)
+                     /product="beta-2-microglobulin precursor"
+     sig_peptide     136..195
+     CDS             join(136..202,AF032048.1:67..345,AF032048.1:1162..1175)
+                     /codon_start=1
+                     /product="beta-2-microglobulin precursor"
+                     /protein_id="AAC52091.1"
+                     /db_xref="GI:3264961"
+                     /translation="MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLN
+                     CYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRV
+                     SHVTFPTPKTVKWDRTM"
+     sig_peptide     136..195
+     exon            <136..202
+                     /number=1
+     intron          203..>291
+                     /number=1
+BASE COUNT       31 a    103 c     78 g     79 t
+ORIGIN      
+        1 cggtccccgc gggccttctc ctgattggct gtccccgcgg gccttgtcct tattggctgt
+       61 gcccgactcc gtataacata aatagaggcg tcgagttgcg cgggcattac tgcagcggac
+      121 cgcacttggg tcgagatggc tcgcttcgtg gtggtagccc tgctcgcgct actctctctg
+      181 tctggcctgg aggctatcca gcgtaagtct ctcctcccgt ccggcgctgg tcctttccct
+      241 cccgccccaa ctctctgtag ccctctctgt gctctctcgt tccgttacct g
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AF165282.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AF165282.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AF165282.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+LOCUS       HSATPCB01     226 bp    DNA             PRI       17-AUG-1999
+DEFINITION  Homo sapiens ATP cassette binding transporter 1 (ABC1) gene, exon
+            12.
+ACCESSION   AF165282
+VERSION     AF165282.1  GI:5734104
+KEYWORDS    .
+SOURCE      human.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 226)
+  AUTHORS   Rust,S., Rosier,M., Funke,H., Real,J., Amoura,Z., Piette,J.C.,
+            Deleuze,J.F., Brewer,H.B., Duverger,N., Denefle,P. and Assmann,G.
+  TITLE     Tangier disease is caused by mutations in the gene encoding
+            ATP-binding cassette transporter 1
+  JOURNAL   Nat. Genet. 22 (4), 352-355 (1999)
+  MEDLINE   99364413
+   PUBMED   10431238
+REFERENCE   2  (bases 1 to 226)
+  AUTHORS   Rust,S., Rosier,M., Funke,H., Real,J., Amoura,Z., Piette,J.C.,
+            Deleuze,J.F., Brewer,H.B., Duverger,N., Denefle,P. and Assmann,G.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (06-JUL-1999) Genomics, Rhone-Poulenc Rorer, 2 rue Gaston
+            Cremieux, Evry 91006, France
+FEATURES             Location/Qualifiers
+     source          1..226
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /chromosome="9"
+                     /map="9q31"
+     gene            join(<1..226,AF165283.1:1..197,AF165284.1:1..243,
+                     AF165285.1:1..242,AF165286.1:1..225,AF165287.1:1..152,
+                     AF165288.1:1..163,AF165289.1:1..158,AF165290.1:1..241,
+                     AF165291.1:1..93,AF165292.1:1..223,AF165293.1:1..69,
+                     AF165294.1:1..134,AF165295.1:1..169,AF165296.1:1..145,
+                     AF165297.1:1..119,AF165298.1:1..209,AF165299.1:1..115,
+                     AF165300.1:1..53,AF165301.1:1..126,AF165302.1:1..95,
+                     AF165303.1:1..190,AF165304.1:1..198,AF165305.1:1..136,
+                     AF165306.1:1..165,AF165307.1:1..150,AF165308.1:1..141,
+                     AF165309.1:1..83,AF165310.1:1..>264)
+                     /gene="ABC1"
+     mRNA            join(<16..221,AF165283.1:16..192,AF165284.1:16..238,
+                     AF165285.1:16..237,AF165286.1:16..>220)
+                     /gene="ABC1"
+                     /product="ATP cassette binding transporter 1"
+     exon            16..221
+                     /gene="ABC1"
+                     /number=12
+     CDS             join(<16..221,AF165283.1:16..192,AF165284.1:16..238,
+                     AF165285.1:16..237,AF165286.1:16..>220)
+                     /gene="ABC1"
+                     /codon_start=1
+                     /db_xref="GI:5734134"
+BASE COUNT       69 a     46 c     58 g     53 t
+ORIGIN      
+        1 ctgttcttct atcagtgtgt caacctgaac aagctagaac ccatagcaac agaagtctgg
+       61 ctcatcaaca agtccatgga gctgctggat gagaggaagt tctgggctgg tattgtgttc
+      121 actggaatta ctccaggcag cattgagctg ccccatcatg tcaagtacaa gatccgaatg
+      181 gacattgaca atgtggagag gacaaataaa atcaaggatg ggtaag
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AHCYL1.kegg
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AHCYL1.kegg	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AHCYL1.kegg	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+ENTRY       10768             CDS       H.sapiens
+NAME        AHCYL1
+DEFINITION  S-adenosylhomocysteine hydrolase-like 1 [EC:3.3.1.1]
+ORTHOLOG    KO: K01251  adenosylhomocysteinase
+CLASS       Metabolism; Amino Acid Metabolism; Methionine metabolism
+            [PATH:hsa00271]
+            Metabolism; Metabolism of Other Amino Acids; Selenoamino acid
+            metabolism [PATH:hsa00450]
+POSITION    1:join(26813..26932,50794..50905,52974..53117,54122..54222,54657..
+            54759,56523..56617,57185..57291,58104..58220,58427..58490,59255..
+            59343,59706..59776,60133..60227,60312..60410,60811..60879,61308..
+            61386,62491..62611,63434..63440)
+DBLINKS     LocusLink: 10768
+            GDB: 9958257
+            NCBI: 21361647
+            SP: O43865
+CODON_USAGE       T               C               A               G
+          T   7   8   1  10   8  10   6   2   7  11   1   0  11   8   0   8
+          C   3  10   2  16   3   8   4   3   2   6   6  17   3   6   7   3
+          A   9  15   3  14   6  11  15   1  11  11  14  24   2   8   2   4
+          G  10   9   7  17  18  11  11   3  21  11  10  23   5  11   6  11
+AASEQ       530
+            MSMPDAMPLPGVGEELKQAKEIEDAEKYSFMATVTKAPKKQIQFADDMQEFTKFPTKTGR
+            RSLSRSISQSSTDSYSSAASYTDSSDDEVSPREKQQTNSKGSSNFCVKNIKQAEFGRREI
+            EIAEQDMSALISLRKRAQGEKPLAGAKIVGCTHITAQTAVLIETLCALGAQCRWSACNIY
+            STQNEVAAALAEAGVAVFAWKGESEDDFWWCIDRCVNMDGWQANMILDDGGDLTHWVYKK
+            YPNVFKKIRGIVEESVTGVHRLYQLSKAGKLCVPAMNVNDSVTKQKFDNLYCCRESILDG
+            LKRTTDVMFGGKQVVVCGYGEVGKGCCAALKALGAIVYITEIDPICALQACMDGFRVVKL
+            NEVIRQVDVVITCTGNKNVVTREHLDRMKNSCIVCNMGHSNTEIDVTSLRTPELTWERVR
+            SQVDHVIWPDGKRVVLLAEGRLLNLSCSTVPTFVLSITATTQALALIELYNAPEGRYKQD
+            VYLLPKKMDEYVASLHLPSFDAHLTELTDDQAKYLGLNKNGPFKPNYYRY
+NTSEQ       1593
+            atgtcgatgcctgacgcgatgccgctgcccggggtcggggaggagctgaagcaggccaag
+            gagatcgaggacgccgagaagtactccttcatggccaccgtcaccaaggcgcccaagaag
+            caaatccagtttgctgatgacatgcaggagttcaccaaattccccaccaaaactggccga
+            agatctttgtctcgctcgatctcacagtcctccactgacagctacagttcagctgcatcc
+            tacacagatagctctgatgatgaggtttctccccgagagaagcagcaaaccaactccaag
+            ggcagcagcaatttctgtgtgaagaacatcaagcaggcagaatttggacgccgggagatt
+            gagattgcagagcaagacatgtctgctctgatttcactcaggaaacgtgctcagggggag
+            aagcccttggctggtgctaaaatagtgggctgtacacacatcacagcccagacagcggtg
+            ttgattgagacactctgtgccctgggggctcagtgccgctggtctgcttgtaacatctac
+            tcaactcagaatgaagtagctgcagcactggctgaggctggagttgcagtgttcgcttgg
+            aagggcgagtcagaagatgacttctggtggtgtattgaccgctgtgtgaacatggatggg
+            tggcaggccaacatgatcctggatgatgggggagacttaacccactgggtttataagaag
+            tatccaaacgtgtttaagaagatccgaggcattgtggaagagagcgtgactggtgttcac
+            aggctgtatcagctctccaaagctgggaagctctgtgttccggccatgaacgtcaatgat
+            tctgttaccaaacagaagtttgataacttgtactgctgccgagaatccattttggatggc
+            ctgaagaggaccacagatgtgatgtttggtgggaaacaagtggtggtgtgtggctatggt
+            gaggtaggcaagggctgctgtgctgctctcaaagctcttggagcaattgtctacattacc
+            gaaatcgaccccatctgtgctctgcaggcctgcatggatgggttcagggtggtaaagcta
+            aatgaagtcatccggcaagtcgatgtcgtaataacttgcacaggaaataagaatgtagtg
+            acacgggagcacttggatcgcatgaaaaacagttgtatcgtatgcaatatgggccactcc
+            aacacagaaatcgatgtgaccagcctccgcactccggagctgacgtgggagcgagtacgt
+            tctcaggtggaccatgtcatctggccagatggcaaacgagttgtcctcctggcagagggt
+            cgtctactcaatttgagctgctccacagttcccacctttgttctgtccatcacagccaca
+            acacaggctttggcactgatagaactctataatgcacccgaggggcgatacaagcaggat
+            gtgtacttgcttcctaagaaaatggatgaatacgttgccagcttgcatctgccatcattt
+            gatgcccaccttacagagctgacagatgaccaagcaaaatatctgggactcaacaaaaat
+            gggccattcaaacctaattattacagatactaa
+///

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ATF14F8.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ATF14F8.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ATF14F8.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2836 @@
+LOCUS       ATF14F8     96892 bp    DNA             PLN       07-AUG-2000
+DEFINITION  Arabidopsis thaliana DNA chromosome 5, BAC clone F14F8 (ESSA
+            project).
+ACCESSION   AL391144
+VERSION     AL391144.1  GI:9755607
+KEYWORDS    .
+SOURCE      thale cress.
+  ORGANISM  Arabidopsis thaliana
+            Eukaryota; Viridiplantae; Embryophyta; Tracheophyta; Spermatophyta;
+            Magnoliophyta; eudicotyledons; core eudicots; Rosidae; eurosids II;
+            Brassicales; Brassicaceae; Arabidopsis.
+REFERENCE   1  (bases 1 to 96892)
+  AUTHORS   Sato,S., Nakamura,Y., Kaneko,T., Kato,T., Asamizu,E., Kotani,H.,
+            Tabata,S., Mewes,H.W., Rudd,S., Lemcke,K. and Mayer,K.F.X.
+  JOURNAL   Unpublished
+REFERENCE   2  (bases 1 to 96892)
+  AUTHORS   EU Arabidopsis sequencing,project.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (07-AUG-2000) MIPS, at the Max-Planck-Institut fuer
+            Biochemie, Am Klopferspitz 18a, D-82152 Martinsried, FRG, E-mail:
+            lemcke at mips.biochem.mpg.de,mayer at mips.biochem.mpg.de
+COMMENT     Information on performance of analysis and a more detailed
+            annotation of this entry and other sequences of chromosomes 3, 4
+            and 5 can be viewed at: http://www.mips.biochem.mpg.de/proj/thal/.
+FEATURES             Location/Qualifiers
+     source          1..96892
+                     /organism="Arabidopsis thaliana"
+                     /variety="Columbia"
+                     /db_xref="taxon:3702"
+                     /chromosome="5"
+     exon            626..652
+                     /gene="F14F8_10"
+                     /number=1
+     gene            626..2147
+                     /gene="F14F8_10"
+     CDS             join(626..652,743..1199,1277..1424,1504..1781,1870..2147)
+                     /gene="F14F8_10"
+                     /note="strong similarity to putative phytochelatin
+                     synthetase - Arabidopsis thaliana, EMBL:AJ006787"
+                     /codon_start=1
+                     /product="putative phytochelatin synthetase"
+                     /protein_id="CAC01762.1"
+                     /db_xref="GI:9755608"
+                     /translation="MSWTADGYVATVTMNNFQIYRHIQNPGWTLGWTWAKKEVIWSMV
+                     GAQTTEQGDCSKFKGNVPHCCKKTPTVVDLLPGVPYNQQFSNCCKGGVIGAWGQDPSA
+                     AVSQFQVSAGLAGTTNKTVKLPKNFTLLGPGPGYTCGPAKIVPSTVFLTTDKRRKTQA
+                     LMTWNVTCTYSQFLARKHPSCCVSFSSFYNDTITPCPSCACGCENKKSCVKADSKILT
+                     KKGLNTPKKDNTPLLQCTHHMCPVRVHWHVKTNYKDYWRVKIAITNFNYRMNHTLWTL
+                     AIQHPNLNNVTQVFSFDYKPVSPYGSINDTGMFYGTKFYNDLLMEAGPSGNVQSEVLL
+                     QKDQKTFTFKQGWAFPRKVYFNGDECMLPPPDSYPFLPNSAQGNFASFSLTILLLLFI
+                     SIW"
+     intron          653..742
+                     /gene="F14F8_10"
+                     /number=1
+     exon            743..1199
+                     /gene="F14F8_10"
+                     /number=2
+     intron          1200..1276
+                     /gene="F14F8_10"
+                     /number=2
+     exon            1277..1424
+                     /gene="F14F8_10"
+                     /number=3
+     intron          1425..1503
+                     /gene="F14F8_10"
+                     /number=3
+     exon            1504..1781
+                     /gene="F14F8_10"
+                     /number=4
+     intron          1782..1869
+                     /gene="F14F8_10"
+                     /number=4
+     exon            1870..2147
+                     /gene="F14F8_10"
+                     /number=5
+     gene            3192..5279
+                     /gene="F14F8_20"
+     CDS             join(3192..3253,3824..4205,4309..4524,4578..4796,
+                     4969..5125,5254..5279)
+                     /gene="F14F8_20"
+                     /note="similarity to Mitochondrial carrier protein - Ribes
+                     nigrum, EMBL:AJ007580
+                     Contains Mitochondrial energy transfer proteins signature
+                     AA284-293"
+                     /codon_start=1
+                     /product="putative mitochondrial carrier protein"
+                     /protein_id="CAC01763.1"
+                     /db_xref="GI:9755609"
+                     /translation="MDTPPTSRIASFGQTEINWDKLDKRRFYINGAGLFTGVTVALYP
+                     VSVVKTRLQVASKEIAERSAFSVVKGILKNDGVPGLYRGFGTVITGAVPARIIFLTAL
+                     ETTKISAFKLVAPLELSEPTQAAIANGIAGMTASLFSQAVFVPIDVVSQKLMVQGYSG
+                     HATYTGGIDVATKIIKSYGVRGLYRGFGLSVMTYSPSSAAWWASYGSSQRVIWRLAMN
+                     VLSFLEFGFATKATIPLIQYLLLLGRFLGYGGDSDATAAPSKSKIVMVQAAGGIIAGA
+                     TASSITTPLDTIKTRLQVMGHQENRPSAKQVVKKLLAEDGWKGFYRGLGPRFFSMSAW
+                     GTSMILTYEYLKRLCAIED"
+     exon            3192..3253
+                     /gene="F14F8_20"
+                     /number=1
+     intron          3254..3823
+                     /gene="F14F8_20"
+                     /number=1
+     exon            3824..4205
+                     /gene="F14F8_20"
+                     /number=2
+     intron          4206..4308
+                     /gene="F14F8_20"
+                     /number=2
+     exon            4309..4524
+                     /gene="F14F8_20"
+                     /number=3
+     intron          4525..4577
+                     /gene="F14F8_20"
+                     /number=3
+     exon            4578..4796
+                     /gene="F14F8_20"
+                     /number=4
+     intron          4797..4968
+                     /gene="F14F8_20"
+                     /number=4
+     exon            4969..5125
+                     /gene="F14F8_20"
+                     /number=5
+     intron          5126..5253
+                     /gene="F14F8_20"
+                     /number=5
+     exon            5254..5279
+                     /gene="F14F8_20"
+                     /number=6
+     gene            7805..9695
+                     /gene="F14F8_30"
+     CDS             join(7805..8146,8760..9010,9099..9267,9375..9695)
+                     /gene="F14F8_30"
+                     /codon_start=1
+                     /product="reversibly glycosylated polypeptide-3"
+                     /protein_id="CAC01764.1"
+                     /db_xref="GI:9755610"
+                     /translation="MVEPANTVGLPVNPTPLLKDELDIVIPTIRNLDFLEMWRPFLQP
+                     YHLIIVQDGDPSKKIHVPEGYDYELYNRNDINRILGPKASCISFKDSACRCFGYMVSK
+                     KKYIFTIDDDCFVAKDPSGKAVNALEQHIKNLLCPSSPFFFNTLYDPYREGADFVRGY
+                     PFSLREGVSTAVSHGLWLNIPDYDAPTQLVKPKERNTRYVDAVMTIPKGTLFPMCGMN
+                     LAFDRDLIGPAMYFGLMGDGQPIGRYDDMWAGWCIKVICDHLSLGVKTGLPYIYHSKA
+                     SNPFVNLKKEYKGIFWQEEIIPFFQNAKLSKEAVTVQQCYIELSKMVKEKLSSLDPYF
+                     DKLADAMVTWIEAWDELNPPAASGKA"
+     exon            7805..8146
+                     /gene="F14F8_30"
+                     /number=1
+     intron          8147..8759
+                     /gene="F14F8_30"
+                     /number=1
+     exon            8760..9010
+                     /gene="F14F8_30"
+                     /number=2
+     intron          9011..9098
+                     /gene="F14F8_30"
+                     /number=2
+     exon            9099..9267
+                     /gene="F14F8_30"
+                     /number=3
+     intron          9268..9374
+                     /gene="F14F8_30"
+                     /number=3
+     exon            9375..9695
+                     /gene="F14F8_30"
+                     /number=4
+     exon            11254..12399
+                     /gene="F14F8_40"
+                     /number=1
+     gene            11254..12643
+                     /gene="F14F8_40"
+     CDS             join(11254..12399,12473..12643)
+                     /gene="F14F8_40"
+                     /note="similarity to several hypothetical proteins -
+                     Arabidopsis thaliana"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01765.1"
+                     /db_xref="GI:9755611"
+                     /translation="MRRRSKKIKTENNSNPETSEERNKFDEIPHDLVIEILERLPLKS
+                     VARFLTVSKLWATTIRSPDFRKSYRGGSSSEPRTLIVSDLNFKEPNPKLHFFRPSISS
+                     PSFLSSLTCPFTYPRHEEYYYHHVNGLISVGYGTDQIVINPTTGKFITLPRPKTRRKL
+                     VISFFGYDSVSDQYKVLCMTERLRGHPEEASSQHQVYTLGAKQKSWKMINCSIPHRPW
+                     SWNAVCINGVVYYIAKTGEGMFRRCLMRFDLKSDNLDLCTILPEEIQTSLHDYFLINY
+                     KGKVAIPNQPNFYTYDVWVMNQEGGKIEWLKNITFTIKPRKGFVRYLFVTGTTHTGEF
+                     ILAPTSYTDEFYVFHYNPDMNSFRKIRVQAPGVKFSFAQKASVVFSDHSLLRLDNLHI
+                     RGSTHTATGEFILAPRFYSDDLNVIHFNPDTNSFRSTKVEVYEDYE"
+     intron          12400..12472
+                     /gene="F14F8_40"
+                     /number=1
+     exon            12473..12643
+                     /gene="F14F8_40"
+                     /number=2
+     CDS             join(15197..15443,15612..16009)
+                     /gene="F14F8_50"
+                     /note="similarity to several hypothetical proteins -
+                     Arabidopsis thaliana"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01766.1"
+                     /db_xref="GI:9755612"
+                     /translation="MMRRRNKKTKTVISNPETLEERNKFDEIPHDLVIEILGRLPAKS
+                     VARFLTVSKLWATSIRSLDFIKSYPLGSSSKPRTLVASKQVVANPSTGRTIPLPRVKT
+                     RRTIATSFFGYDSVSDQYKVLCMTVKAYGDLRDESSQHQVFTLGAKKKSFRMIDTSII
+                     PHRPCSNGVCIDSVVYYVAKTGAGMLHLCIMRFDLSSEILDLFTSLPQEIRPPS"
+     exon            15197..15443
+                     /gene="F14F8_50"
+                     /number=1
+     gene            15197..16009
+                     /gene="F14F8_50"
+     intron          15444..15611
+                     /gene="F14F8_50"
+                     /number=1
+     exon            15612..16009
+                     /gene="F14F8_50"
+                     /number=2
+     gene            16790..26395
+                     /gene="F14F8_60"
+     exon            complement(16790..19855)
+                     /gene="F14F8_60"
+                     /number=1
+     gene            complement(join(16790..19855,20136..20912,21378..21497,
+                     21654..21876,22204..22400,22527..23158,23335..23448,
+                     23538..23938,24175..24536,24604..24715,24889..24984,
+                     25114..25171,25257..25329,25544..25589,25900..26018,
+                     26300..26395))
+                     /gene="F14F8_60"
+     CDS             complement(join(16790..19855,20136..20912,21378..21497,
+                     21654..21876,22204..22400,22527..23158,23335..23448,
+                     23538..23938,24175..24536,24604..24715,24889..24984,
+                     25114..25171,25257..25329,25544..25589,25900..26018,
+                     26300..26395))
+                     /gene="F14F8_60"
+                     /note="Contains Prokaryotic membrane lipoprotein lipid
+                     attachment site AA801-811"
+                     /codon_start=1
+                     /product="hypothetical protein"
+                     /protein_id="CAC01767.1"
+                     /db_xref="GI:9755613"
+                     /translation="MKSGSAAKLIVEALLQRFLPLARRRIETAQAQDGQYLRPSDPAY
+                     EQVLDSLAMIARHTPVPLLEALLRWRESESPKGANDASTFQRKLAVECIFCSACIRFV
+                     ECCPQEGLTEKLWSGLENFVFDWLINADRVVSQVEYPSLVDLRGLLLDLVAQLLGALS
+                     RISSVTERFFMELNTRRIDTSVARSETLSIINGMRYLKLGVKSEGGLNASASFVAKAN
+                     PLIRDIHKRKSELYHALCNMLSNILAPLSDGGKSQWPPSVAEPALTLWYEAVGRIRVQ
+                     LIQWMEKQSKHLGVRQSDLVSLLYAYLVITVLNMYNWMNRLAIRFLFLVIIENFAPYM
+                     QDKNHRYMALDCLHRVLRFYLSVYASSQPPNRIWDYLDSVTSQLLTVLRKGMLTQDVQ
+                     QDKLVEFCVTIAEHNLDFAMNHMLLELLKQDSPSEAKIIGLRALLALVMSPSSQYVGL
+                     EIFKGHGIGHYIPKVKAAIESILRSCHRTYSQALLTSSRTTIDAVNKEKSQGSLFQSV
+                     LKCIPYLIEEVGRSDKITEIIPQHGISIDPGVRVEAVQVLNRIVRYLPHRRFAVMRGM
+                     ANFILKLPDEFPLLIQASLGRLLELMRFWRACLVDDRQDTDAEEENKTAKGNDRFKKL
+                     SFHQAADAIEFRASDIDAVGLIFLSSVDSQIRHTALELLRCVRALRNDIRDLMIQEHP
+                     DHVMKFEAEPIYMIDVLEEHGDDIVQSCYWDSARPFDLRRDSDAIPSDVTLQSIIFES
+                     LDKNKWGRCLSELVKYAAELCPRSVQEAKSEIMHRLAHITPVEFGGKANQSQDTDNKL
+                     DQWLLYAMFVCSCPPDGKDAGSIASTRDMYHLIFPYLRFGSETHNHAATMALGRSHLE
+                     ACEIMFSELASFMEEISSETETKPKWKIQKGGRREDLRVHVSNIYRTVSENVWPGMLA
+                     RKPVFRLHYLRFIEDSTRQISLAPHESFQDMQPLRYALASVLRFLAPEFVESKSEKFD
+                     VRSRKRLFDLLLSWSDDTGNTWGQDGVSDYRREVERYKTSQHNRSKDSVDKISFDKEL
+                     NEQIEAIQWASLNAMASLLYGPCFDDNARKMSGRVISWINSLFIEPAPRVPFGYSPAD
+                     PRTPSYSKYTGEGGRGTTGRDRHRGGHQRVALAKLALKNLLLTNLDLFPACIDQCYYS
+                     DAAIADGYFSVLAEVYMRQEIPKCEIQRLLSLILYKVVDPSRQIRDDALQMLETLSMR
+                     EWAEDGIEGSGGYRAAVVGNLPDSYQQFQYKLSCKLAKDHPELSQLLCEEIMQRQLDA
+                     VDIIAQHQVLTCMAPWIENLNFWKLKDSGWSERLLKSLYYVTWRHGDQFPDEIEKLWS
+                     TIASKPRNISPVLDFLITKGIEDCDSNASAEITGAFATYFSVAKRVSLYLARICPQRT
+                     IDHLVYQLSQRMLEDSIEPIGYSANRGDSNGNFVLEFSQGPATAPQVVSVADSQPHMS
+                     PLLVRGSLDGPLRNTSGSLSWRTAGMTGRSASGPLSPMPPELNIVPVATGRSGQLLPS
+                     LVNASGPLMGVRSSTGSLRSRHVSRDSGDYLIDTPNSGEDVLHSGIAMHGVNAKELQS
+                     ALQGHQQHSLTHADIALILLAEIAYENDEDFREHLPLLFHVTFVSMDSSEDIVLEHCQ
+                     HLLVNLLYSLAGRHLELYEVENSDGENKQQVVSLIKYVQSKRGSMMWENEDPTVVRTD
+                     LPSAALLSALVQSMVDAIFFQGDLRETWGTEALKWAMECTSRHLACRSHQIYRALRPS
+                     VTSDACVSLLRCLHRCLSNPIPPVLGFIMEILLTLQVMVENMEPEKVILYPQLFWGCV
+                     AMMHTDFVHVYCQVLELFSRIIDRLSFRDKTTENVLLSSMPRDEFNTNDLGEFQRSES
+                     RGYEMPPSSGTLPKFEGVQPLVLKGLMSTVSHEFSIEVLSRITVPSCDSIFGDAETRL
+                     LMHITGLLPWLCLQLTQDQVMVSALPLQQQYQKACSVASNIAVWCRAKSLDELATVFV
+                     AYARGEIKRVENLLACVSPLLCNKWFPKHSALAFGHLLRLLKKGPVDYQRVILLMLKA
+                     LLQHTPMDASQSPHMYTIVSQLVESTLCWEALSVLEALLQSCSPVQGGTGGSHPQDSS
+                     YSENGTDEKTLVPQTSFKARSGPLQYAMMAATMSQPFPLGAAAAESGIPPRDVALQNT
+                     RLMLGRVLDNCALGRRDYRRLVPFVTTIANM"
+     intron          complement(19856..20135)
+                     /gene="F14F8_60"
+                     /number=1
+     exon            complement(20136..20912)
+                     /gene="F14F8_60"
+                     /number=2
+     intron          complement(20913..21377)
+                     /gene="F14F8_60"
+                     /number=2
+     exon            complement(21378..21497)
+                     /gene="F14F8_60"
+                     /number=3
+     intron          complement(21498..21653)
+                     /gene="F14F8_60"
+                     /number=3
+     exon            complement(21654..21876)
+                     /gene="F14F8_60"
+                     /number=4
+     intron          complement(21877..22203)
+                     /gene="F14F8_60"
+                     /number=4
+     exon            complement(22204..22400)
+                     /gene="F14F8_60"
+                     /number=5
+     intron          complement(22401..22526)
+                     /gene="F14F8_60"
+                     /number=5
+     exon            complement(22527..23158)
+                     /gene="F14F8_60"
+                     /number=6
+     intron          complement(23159..23334)
+                     /gene="F14F8_60"
+                     /number=6
+     exon            complement(23335..23448)
+                     /gene="F14F8_60"
+                     /number=7
+     intron          complement(23449..23537)
+                     /gene="F14F8_60"
+                     /number=7
+     exon            complement(23538..23938)
+                     /gene="F14F8_60"
+                     /number=8
+     intron          complement(23939..24174)
+                     /gene="F14F8_60"
+                     /number=8
+     exon            complement(24175..24536)
+                     /gene="F14F8_60"
+                     /number=9
+     intron          complement(24537..24603)
+                     /gene="F14F8_60"
+                     /number=9
+     exon            complement(24604..24715)
+                     /gene="F14F8_60"
+                     /number=10
+     intron          complement(24716..24888)
+                     /gene="F14F8_60"
+                     /number=10
+     exon            complement(24889..24984)
+                     /gene="F14F8_60"
+                     /number=11
+     intron          complement(24985..25113)
+                     /gene="F14F8_60"
+                     /number=11
+     exon            complement(25114..25171)
+                     /gene="F14F8_60"
+                     /number=12
+     intron          complement(25172..25256)
+                     /gene="F14F8_60"
+                     /number=12
+     exon            complement(25257..25329)
+                     /gene="F14F8_60"
+                     /number=13
+     intron          complement(25330..25543)
+                     /gene="F14F8_60"
+                     /number=13
+     exon            complement(25544..25589)
+                     /gene="F14F8_60"
+                     /number=14
+     intron          complement(25590..25899)
+                     /gene="F14F8_60"
+                     /number=14
+     exon            complement(25900..26018)
+                     /gene="F14F8_60"
+                     /number=15
+     intron          complement(26019..26299)
+                     /gene="F14F8_60"
+                     /number=15
+     exon            complement(26300..26395)
+                     /gene="F14F8_60"
+                     /number=16
+     misc_feature    27422..29058
+                     /note="putative Mutator like transposase"
+     gene            complement(join(29425..29769,29840..30004))
+                     /gene="F14F8_70"
+     exon            complement(29425..29769)
+                     /gene="F14F8_70"
+                     /number=1
+     gene            29425..30004
+                     /gene="F14F8_70"
+     CDS             complement(join(29425..29769,29840..30004))
+                     /gene="F14F8_70"
+                     /note="similarity to hypothetical protein AT4g04780 -
+                     Arabidopsis thaliana, EMBL:AL161501"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01768.1"
+                     /db_xref="GI:9755614"
+                     /translation="MSTNSYYSSASSSGFRVCPPGVPSKCWCGEEIITFTSKTKENPY
+                     RRFYRCAIAMKRENEEHLFKWVDEALLDEIKMVNEKCKRVAENISDLRMNVMVNMELL
+                     NKNAKQMEEELIKKMEGELLTMKENVEELGRTCHGKVSFEDCRGCCCDCCIYCMALGK
+                     SVGVVEVLE"
+     intron          complement(29770..29839)
+                     /gene="F14F8_70"
+                     /number=1
+     exon            complement(29840..30004)
+                     /gene="F14F8_70"
+                     /number=2
+     exon            complement(31395..31505)
+                     /gene="F14F8_80"
+                     /number=1
+     gene            complement(join(31395..31505,31597..31700,31806..31920,
+                     32027..32074,32167..32256,32333..32407,32602..32754,
+                     33036..33120,33210..33280,33373..33423,33563..33979,
+                     34057..34188,34338..34451,34537..34654,34956..35014,
+                     35276..35342,35433..35563,35663..35872,36327..37211))
+                     /gene="F14F8_80"
+     gene            31395..37211
+                     /gene="F14F8_80"
+     CDS             complement(join(31395..31505,31597..31700,31806..31920,
+                     32027..32074,32167..32256,32333..32407,32602..32754,
+                     33036..33120,33210..33280,33373..33423,33563..33979,
+                     34057..34188,34338..34451,34537..34654,34956..35014,
+                     35276..35342,35433..35563,35663..35872,36327..37211))
+                     /gene="F14F8_80"
+                     /note="Contains Prokaryotic membrane lipoprotein lipid
+                     attachment site AA922-932;Bacteriophage-type RNA
+                     polymerase family active site signatures
+                     AA708-719;Bacteriophage-type RNA polymerase family active
+                     site signatures AA781-795"
+                     /codon_start=1
+                     /product="DNA-directed RNA polymerase (mitochondrial)"
+                     /protein_id="CAC01769.1"
+                     /db_xref="GI:9755615"
+                     /translation="MSSAQTPLFLANQTKVFDHLIPLHKPFISSPNPVSQSFPMWRNI
+                     AKQAISRSAARLNVSSQTRGLLVSSPESIFSKNLSFRFPVLGSPCHGKGFRCLSGITR
+                     REEFSKSERCLSGTLARGYTSVAEEEVLSTDVEEEPEVDELLKEMKKEKKRESHRSWR
+                     MKKQDQFGMGRTKFQNLWRRQVKIETEEWERAAAEYMELLTDMCEQKLAPNLPYVKSL
+                     FLGWFEPLRDAIAKDQELYRLGKSKATYAHYLDQLPADKISVITMHKLMGHLMTGGDN
+                     GCVKVVHAACTVGDAIEQEIRICTFLDKKKKGDDNEESGGVENETSMKEQDKLRKKVN
+                     ELIKKQKLSAVRKILQSHDYTKPWIADVRAKVGSRLIELLVRTAYIQSPADQQDNDLP
+                     DVRPAFVHTFKVAKGSMNSGRKYGVIECDPLVRKGLEKSGRYAVMPYMPMLVPPLKWS
+                     GYDKGAYLFLTSYIMKTHGAKQQREALKSAPKGQLQPVFEALDTLGSTKWRVNKRVLT
+                     VVDRIWSSGGCVADMVDRSDVPLPEKPDTEDEGILKKWKWEVKSAKKVNSERHSQRCD
+                     TELKLSVARKMKDEEAFYYPHNMDFRGRAYPMPPHLNHLGSDLCRGVLEFAEGRPMGI
+                     SGLRWLKIHLANLYAGGVDKLSLDGRLAFTENHLDDIFDSADRPLEGSRWWLQAEDPF
+                     QCLAVCISLTEALRSPSPETVLSHIPIHQDGSCNGLQHYAALGRDTLGAEAVNLVAGE
+                     KPADVYSGIATRVLDIMRRDADRDPEVFPEALRARKLLNQVDRKLVKQTVMTSVYGVT
+                     YIGARDQIKRRLKERSDFGDEKEVFGAACYAAKVTLAAIDEMFQAARAIMRWFGECAK
+                     IIASENETVRWTTPLGLPVVQPYHQMGTKLVKTSLQTLSLQHETDQVIVRRQRTAFPP
+                     NFIHSLDGSHMMMTAVACKRAGVCFAGVHDSFWTHACDVDKLNIILREKFVELYSQPI
+                     LENLLESFEQSFPHLDFPPLPERGDLDLKVVLDSPYFFN"
+     intron          complement(31506..31596)
+                     /gene="F14F8_80"
+                     /number=1
+     exon            complement(31597..31700)
+                     /gene="F14F8_80"
+                     /number=2
+     intron          complement(31701..31805)
+                     /gene="F14F8_80"
+                     /number=2
+     exon            complement(31806..31920)
+                     /gene="F14F8_80"
+                     /number=3
+     intron          complement(31921..32026)
+                     /gene="F14F8_80"
+                     /number=3
+     exon            complement(32027..32074)
+                     /gene="F14F8_80"
+                     /number=4
+     intron          complement(32075..32166)
+                     /gene="F14F8_80"
+                     /number=4
+     exon            complement(32167..32256)
+                     /gene="F14F8_80"
+                     /number=5
+     intron          complement(32257..32332)
+                     /gene="F14F8_80"
+                     /number=5
+     exon            complement(32333..32407)
+                     /gene="F14F8_80"
+                     /number=6
+     intron          complement(32408..32601)
+                     /gene="F14F8_80"
+                     /number=6
+     exon            complement(32602..32754)
+                     /gene="F14F8_80"
+                     /number=7
+     intron          complement(32755..33035)
+                     /gene="F14F8_80"
+                     /number=7
+     exon            complement(33036..33120)
+                     /gene="F14F8_80"
+                     /number=8
+     intron          complement(33121..33209)
+                     /gene="F14F8_80"
+                     /number=8
+     exon            complement(33210..33280)
+                     /gene="F14F8_80"
+                     /number=9
+     intron          complement(33281..33372)
+                     /gene="F14F8_80"
+                     /number=9
+     exon            complement(33373..33423)
+                     /gene="F14F8_80"
+                     /number=10
+     intron          complement(33424..33562)
+                     /gene="F14F8_80"
+                     /number=10
+     exon            complement(33563..33979)
+                     /gene="F14F8_80"
+                     /number=11
+     intron          complement(33980..34056)
+                     /gene="F14F8_80"
+                     /number=11
+     exon            complement(34057..34188)
+                     /gene="F14F8_80"
+                     /number=12
+     intron          complement(34189..34337)
+                     /gene="F14F8_80"
+                     /number=12
+     exon            complement(34338..34451)
+                     /gene="F14F8_80"
+                     /number=13
+     intron          complement(34452..34536)
+                     /gene="F14F8_80"
+                     /number=13
+     exon            complement(34537..34654)
+                     /gene="F14F8_80"
+                     /number=14
+     intron          complement(34655..34955)
+                     /gene="F14F8_80"
+                     /number=14
+     exon            complement(34956..35014)
+                     /gene="F14F8_80"
+                     /number=15
+     intron          complement(35015..35275)
+                     /gene="F14F8_80"
+                     /number=15
+     exon            complement(35276..35342)
+                     /gene="F14F8_80"
+                     /number=16
+     intron          complement(35343..35432)
+                     /gene="F14F8_80"
+                     /number=16
+     exon            complement(35433..35563)
+                     /gene="F14F8_80"
+                     /number=17
+     intron          complement(35564..35662)
+                     /gene="F14F8_80"
+                     /number=17
+     exon            complement(35663..35872)
+                     /gene="F14F8_80"
+                     /number=18
+     intron          complement(35873..36326)
+                     /gene="F14F8_80"
+                     /number=18
+     exon            complement(36327..37211)
+                     /gene="F14F8_80"
+                     /number=19
+     exon            38393..39739
+                     /gene="F14F8_90"
+                     /number=1
+     gene            38393..39739
+                     /gene="F14F8_90"
+     CDS             38393..39739
+                     /gene="F14F8_90"
+                     /note="similarity to unusual floral organs protein UFO -
+                     Arabidopsis thaliana, PIR:S57710
+                     Contains Prokaryotic membrane lipoprotein lipid attachment
+                     site AA422-432"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01770.1"
+                     /db_xref="GI:9755616"
+                     /translation="MERLGFWGLLMGSVEKSLDSGNSLACSASAKNGDEESSTSSKQV
+                     SPLKGSGSRNTSPLGRVGSRNTSPSRQKVVKTKPRGLEEETVASFGKQVVADVQMEDG
+                     IWAMLPEDLLNEILARVPPFMIFRIRSVCKKWNLILQDNSFLKFHSNVSSHGPCLLTF
+                     WKNSPQIPQCSVFSLPLKTWYKIPFTFLPPWAFWLVGSSGGLVCFSGLDGLTFRTLVC
+                     NPLMQSWRTLPSMHYNQQRQLIMVVDRSDKSFKVIATSDIYGDKSLPTEVYDSKTDKW
+                     SLHQIMPAVNLCSSKMAYCDSRLYLETLSPLGLMMYRLDSGQWEHIPAKFPRSLLDGY
+                     LVAGTQKRLFLVGRIGLYSTLQSMRIWELDHTKVSWVEISRMPPKYFRALLRLSAERF
+                     ECFGQDNLICFTSWNQGKGLLYNVDKKIWSWISGCALQSCNSQVCFYEPRFDASVL"
+     gene            complement(join(40286..40506,40590..40848,40971..41219,
+                     41307..41440,41520..41757))
+                     /gene="F14F8_100"
+     gene            40286..41757
+                     /gene="F14F8_100"
+     CDS             complement(join(40286..40506,40590..40848,40971..41219,
+                     41307..41440,41520..41757))
+                     /gene="F14F8_100"
+                     /note="similarity to RXF26 - Arabidopsis thaliana,
+                     EMBL:AB008020
+                     Contains Protein splicing signature AA340-347"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01771.1"
+                     /db_xref="GI:9755617"
+                     /translation="MKSLLICLVLLELVWLGNGQSRDHQPLAPAFFVFGDSLVDSGNN
+                     NYIPTLARANYFPYGIDFGFPTGRFCNGRTVVDYGATYLGLPLVPPYLSPLSIGQNAL
+                     RGVNYASAAAGILDETGRHYVRGARTTFNGQISQFEITIELRLRRFFQNPADLRKYLA
+                     KSIIGINIGSNDYINNYLMPERYSTSQTYSGEDYADLLIKTLSAQISRLYNLGARKMV
+                     LAGSGPLGCIPSQLSMVTGNNTSGCVTKINNMVSMFNSRLKDLANTLNTTLPGSFFVY
+                     QNVFDLFHDMVVNPSRYGLVVSNEACCGNGRYGGALTCLPLQQPCLDRNQYVFWDAFH
+                     PTETANKIIAHNTFSKSANYSYPISVYELAKL"
+     exon            complement(40286..40506)
+                     /gene="F14F8_100"
+                     /number=1
+     intron          complement(40507..40589)
+                     /gene="F14F8_100"
+                     /number=1
+     exon            complement(40590..40848)
+                     /gene="F14F8_100"
+                     /number=2
+     intron          complement(40849..40970)
+                     /gene="F14F8_100"
+                     /number=2
+     exon            complement(40971..41219)
+                     /gene="F14F8_100"
+                     /number=3
+     intron          complement(41220..41306)
+                     /gene="F14F8_100"
+                     /number=3
+     exon            complement(41307..41440)
+                     /gene="F14F8_100"
+                     /number=4
+     intron          complement(41441..41519)
+                     /gene="F14F8_100"
+                     /number=4
+     exon            complement(41520..41757)
+                     /gene="F14F8_100"
+                     /number=5
+     gene            46886..48648
+                     /gene="F14F8_110"
+     CDS             join(46886..47205,47285..47438,47517..47646,47727..47884,
+                     47974..48177,48304..48648)
+                     /gene="F14F8_110"
+                     /note="strong similarity to serine/threonine-specific
+                     protein kinase NPK15 - Nicotiana tabacum
+                     Contains Protein kinases signatures and profile AA235-247"
+                     /codon_start=1
+                     /product="serine/threonine-specific protein kinase-like
+                     protein"
+                     /protein_id="CAC01772.1"
+                     /db_xref="GI:9755618"
+                     /translation="MVNRSDLVVIGISVGLALGLLLALLLFFAIKWYYGRSHLRRCAN
+                     EQNSPTLPVHTAKRGVVIPDDRANTESSQPPENGAPTQHQPWWNNHTKDLTVSASGIP
+                     RYNYKDIQKATQNFTTVLGQGSFGPVYKAVMPNGELAAAKVHGSNSSQGDREFQTEVS
+                     LLGRLHHRNLVNLTGYCVDKSHRMLIYEFMSNGSLENLLYGGEGMQVLNWEERLQIAL
+                     DISHGIEYLHEGAVPPVIHRDLKSANILLDHSMRAKVADFGLSKEMVLDRMTSGLKGT
+                     HGYMDPTYISTNKYTMKSDIYSFGVIILELITAIHPQQNLMEYINLASMSPDGIDEIL
+                     DQKLVGNASIEEVRLLAKIANRCVHKTPRKRPSIGEVTQFILKIKQSRSRGRRQDTMS
+                     SSFGVGYEEDLSRVMSRIKDQHVELGLLAGVKEENHQERNIATT"
+     exon            46886..47205
+                     /gene="F14F8_110"
+                     /number=1
+     intron          47206..47284
+                     /gene="F14F8_110"
+                     /number=1
+     exon            47285..47438
+                     /gene="F14F8_110"
+                     /number=2
+     intron          47439..47516
+                     /gene="F14F8_110"
+                     /number=2
+     exon            47517..47646
+                     /gene="F14F8_110"
+                     /number=3
+     intron          47647..47726
+                     /gene="F14F8_110"
+                     /number=3
+     exon            47727..47884
+                     /gene="F14F8_110"
+                     /number=4
+     intron          47885..47973
+                     /gene="F14F8_110"
+                     /number=4
+     exon            47974..48177
+                     /gene="F14F8_110"
+                     /number=5
+     intron          48178..48303
+                     /gene="F14F8_110"
+                     /number=5
+     exon            48304..48648
+                     /gene="F14F8_110"
+                     /number=6
+     exon            complement(50390..50636)
+                     /gene="F14F8_120"
+                     /number=1
+     gene            complement(join(50390..50636,50709..50866,50944..51148,
+                     51230..51519,51628..51793,51886..51974,52074..52141,
+                     52255..52549))
+                     /gene="F14F8_120"
+     gene            50390..52549
+                     /gene="F14F8_120"
+     CDS             complement(join(50390..50636,50709..50866,50944..51148,
+                     51230..51519,51628..51793,51886..51974,52074..52141,
+                     52255..52549))
+                     /gene="F14F8_120"
+                     /note="similarity to auxin-independent growth promoter -
+                     Nicotiana tabacum, PIR:A44226
+                     Contains Prokaryotic membrane lipoprotein lipid attachment
+                     site AA101-111"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01773.1"
+                     /db_xref="GI:9755619"
+                     /translation="MEKFLYHRKLWEMNVKLLGESKVEKLKNSFVSRPRMSLWMIRAV
+                     TVLLLWSCFVHLMALGEMWGPRLFKGWPSCFNHHQLSTAAEMTSLPTKIALPPKRVYV
+                     NNGYLMVSCNGGLNQMRAAICDMVTVARYMNVTLIVPELDKTSFWNDPSEFKDIFDVD
+                     HFISSLRDEVRILKELPPRLKKRVELGVYHEMPPISWSNMSYYQNQILPLVKKHKVLH
+                     LNRTDTRLANNGLPVEVQKLRCRVNFNGLKFTPQIEELGRRVVKILREKGPFLVLHLR
+                     YEMDMLAFSGCSHGCNPEEEEELTRMRYAYPWWKEKVINSELKRKDGLCPLTPEETAL
+                     TLTALGIDRNVQIYIAAGEIYGGQRRMKALTDAFPNVVRKETLLESSDLDFCRNHSSQ
+                     MAALDYLVALESDIFVPTNDGNMARVVEGHRRFLGFKKTIQLNRRFLVKLIDEYTEGL
+                     LSWDVFSSTVKAFHSTRMGSPKRRLVIPNRPKEEDYFYANPQECLQLLDEPLRVI"
+     intron          complement(50637..50708)
+                     /gene="F14F8_120"
+                     /number=1
+     exon            complement(50709..50866)
+                     /gene="F14F8_120"
+                     /number=2
+     intron          complement(50867..50943)
+                     /gene="F14F8_120"
+                     /number=2
+     exon            complement(50944..51148)
+                     /gene="F14F8_120"
+                     /number=3
+     intron          complement(51149..51229)
+                     /gene="F14F8_120"
+                     /number=3
+     exon            complement(51230..51519)
+                     /gene="F14F8_120"
+                     /number=4
+     intron          complement(51520..51627)
+                     /gene="F14F8_120"
+                     /number=4
+     exon            complement(51628..51793)
+                     /gene="F14F8_120"
+                     /number=5
+     intron          complement(51794..51885)
+                     /gene="F14F8_120"
+                     /number=5
+     exon            complement(51886..51974)
+                     /gene="F14F8_120"
+                     /number=6
+     intron          complement(51975..52073)
+                     /gene="F14F8_120"
+                     /number=6
+     exon            complement(52074..52141)
+                     /gene="F14F8_120"
+                     /number=7
+     intron          complement(52142..52254)
+                     /gene="F14F8_120"
+                     /number=7
+     exon            complement(52255..52549)
+                     /gene="F14F8_120"
+                     /number=8
+     exon            57051..57238
+                     /gene="F14F8_130"
+                     /number=1
+     gene            57051..58303
+                     /gene="F14F8_130"
+     CDS             join(57051..57238,57324..57413,57893..57967,58072..58303)
+                     /gene="F14F8_130"
+                     /note="strong similarity to 40S RIBOSOMAL PROTEINs -
+                     different species"
+                     /codon_start=1
+                     /product="ribosomal protein-like"
+                     /protein_id="CAC01774.1"
+                     /db_xref="GI:9755620"
+                     /translation="MRKLKYHEKKLIKKVNFLEWKREGNHRENEITYRYHMGSRDDYK
+                     KLVPLFALKALFYLFFFFWMVQKLTNIMKQMDPADPFRIQMTDMLLEKLYNMGVIPTR
+                     KSLTLTERLSVSSFCRRRLSTVLVHLKFAEHHKEAVTYIEQGHVRVGPETITDPAFLV
+                     TRNMEDFITWVDSSKIKRKVLEYNDTLDDYDMLA"
+     intron          57239..57323
+                     /gene="F14F8_130"
+                     /number=1
+     exon            57324..57413
+                     /gene="F14F8_130"
+                     /number=2
+     intron          57414..57892
+                     /gene="F14F8_130"
+                     /number=2
+     exon            57893..57967
+                     /gene="F14F8_130"
+                     /number=3
+     intron          57968..58071
+                     /gene="F14F8_130"
+                     /number=3
+     exon            58072..58303
+                     /gene="F14F8_130"
+                     /number=4
+     exon            58702..58968
+                     /gene="F14F8_140"
+                     /number=1
+     gene            58702..59507
+                     /gene="F14F8_140"
+     CDS             join(58702..58968,59223..59507)
+                     /gene="F14F8_140"
+                     /note="strong similarity to plastid-specific ribosomal
+                     protein 3 precursor - Spinacia oleracea, EMBL:AF239218"
+                     /codon_start=1
+                     /product="ribosomal protein 3 precursor-like protein"
+                     /protein_id="CAC01775.1"
+                     /db_xref="GI:9755621"
+                     /translation="MAVQANQSASFGFRTASPSQKLSSKPIAHISLSTKLKPSSRPSL
+                     SCSTWNQGQIPARHSCINPGIFAYPPSNLTFSHELPESESPPLGKKKMRVLVKPLEKP
+                     KVVLKFVWMQKDIGVALDHMIPGFGTIPLSPYYFWPRKDAWEELKTLLESKPWISELH
+                     RVFLLNQATDIINLWQSSGGDLS"
+     intron          58969..59222
+                     /gene="F14F8_140"
+                     /number=1
+     exon            59223..59507
+                     /gene="F14F8_140"
+                     /number=2
+     exon            complement(59695..60144)
+                     /gene="F14F8_150"
+                     /number=1
+     gene            complement(59695..60144)
+                     /gene="F14F8_150"
+     gene            59695..60144
+                     /gene="F14F8_150"
+     CDS             complement(59695..60144)
+                     /gene="F14F8_150"
+                     /note="similarity to Glucosamine-6-phosphate
+                     acetyltransferase EMeg32 protein - Mus musculus,
+                     EMBL:AJ001006"
+                     /codon_start=1
+                     /product="acetyltransferase-like protein"
+                     /protein_id="CAC01776.1"
+                     /db_xref="GI:9755622"
+                     /translation="MAETFKIRKLEISDKRKGFIELLGQLTVTGSVTDEEFDRRFEEI
+                     RSYGDDHVICVIEEETSGKIAATGSVMIEKKFLRNCGKAGHIEDVVVDSRFRGKQLGK
+                     KVVEFLMDHCKSMGCYKVILDCSVENKVFYEKCGMSNKSIQMSKYFD"
+     exon            complement(60500..61509)
+                     /gene="F14F8_160"
+                     /number=1
+     gene            complement(join(60500..61509,61704..61899))
+                     /gene="F14F8_160"
+     gene            60500..61899
+                     /gene="F14F8_160"
+     CDS             complement(join(60500..61509,61704..61899))
+                     /gene="F14F8_160"
+                     /note="similarity to other proline-rich proteins"
+                     /codon_start=1
+                     /product="proline-rich protein"
+                     /protein_id="CAC01777.1"
+                     /db_xref="GI:9755623"
+                     /translation="MERITTLWFWFSLMIFLGISINGGLSQGQQHVMKKTRSSAVVVG
+                     TVYCDTCFNGAFSKSPNHLISGALVAVECIDENSKPSFRQEVKTDKRGEFKVKLPFSV
+                     SKHVKKIKRCSVKLLSSSQPYCSIASSATSSSLKRLKSNHHGENTRVFSAGFFTFRPE
+                     NQPEICSQKPINLRGSKPLLPDPSFPPPLQDPPNPSPLPNLPIVPPLPNLPVPKLPVP
+                     DLPLPLVPPLLPPGPQKSASLHNKKSDSLKDKKTEALKPNFFFPPNPLNPPSIIPPNP
+                     LIPSIPTPTLPPNPLIPSPPSLPPIPLIPTPPTLPTIPLLPTPPTPTLPPIPTIPTLP
+                     PLPVLPPVPIVNPPSLPPPPPSFPVPLPPVPGLPGIPPVPLIPGIPPAPLIPGIPPLS
+                     PSFSSHHQP"
+     intron          complement(61510..61703)
+                     /gene="F14F8_160"
+                     /number=1
+     exon            complement(61704..61899)
+                     /gene="F14F8_160"
+                     /number=2
+     gene            65402..66566
+                     /gene="F14F8_170"
+     exon            65402..65521
+                     /gene="F14F8_170"
+                     /number=1
+     CDS             join(65402..65521,65615..66056,66144..66253,66540..66566)
+                     /gene="F14F8_170"
+                     /note="similarity to RING-H2 finger protein RHB1a -
+                     Arabidopsis thaliana, EMBL:AF079179"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01778.1"
+                     /db_xref="GI:9755624"
+                     /translation="MGCVSSCFGVDDFEDYPNPSSSVNRSCPCPRCLVNNFLNLYISL
+                     FRRGETRSLPSSLQATNVSIATSTSYDNFMSNTFHSTPRPLPYDADPRYFRSRRDSLV
+                     SRRDKGSSHSHEEAEPLRSDADVDSESFSVEGSKWANKLIISGEDSKEEFSRSSRRIL
+                     QSRTMSTSNEGLYITSDDEDVCPTCLEEYISENPKIVTKCSHHFHLSCIYEWMERSEN
+                     CPVCGKVMEFHETP"
+     intron          65522..65614
+                     /gene="F14F8_170"
+                     /number=1
+     exon            65615..66056
+                     /gene="F14F8_170"
+                     /number=2
+     intron          66057..66143
+                     /gene="F14F8_170"
+                     /number=2
+     exon            66144..66253
+                     /gene="F14F8_170"
+                     /number=3
+     intron          66254..66539
+                     /gene="F14F8_170"
+                     /number=3
+     exon            66540..66566
+                     /gene="F14F8_170"
+                     /number=4
+     exon            complement(67196..67322)
+                     /gene="F14F8_180"
+                     /number=1
+     gene            complement(join(67196..67322,67451..67599,67737..67820,
+                     67911..68010,68100..68161,68252..68333,69185..69369))
+                     /gene="F14F8_180"
+     gene            67196..69369
+                     /gene="F14F8_180"
+     CDS             complement(join(67196..67322,67451..67599,67737..67820,
+                     67911..68010,68100..68161,68252..68333,69185..69369))
+                     /gene="F14F8_180"
+                     /note="sequence differences to mRNA EMBL:M55551
+                     Contains MADS-box domain signature and profile AA3-57"
+                     /codon_start=1
+                     /product="MADS box protein AGL2"
+                     /protein_id="CAC01779.1"
+                     /db_xref="GI:9755625"
+                     /translation="MGRGRVELKRIENKINRQVTFAKRRNGLLKKAYELSVLCDAEVA
+                     LIIFSNRGKLYEFCSSSNMLKTLDRYQKCSYGSIEVNNKPAKELENSYREYLKLKGRY
+                     ENLQRQQRNLLGEDLGPLNSKELEQLERQLDGSLKQVRSIKTQYMLDQLSDLQNKEQM
+                     LLETNRALAMKLDDMIGVRSHHMGGGGGWEGGEQNVTYAHHQAQSQGLYQPLECNPTL
+                     QMGCCFGDDDDDDRYDNPVCSEQITATTQAQAQQGNGYIPGWML"
+     intron          complement(67323..67450)
+                     /gene="F14F8_180"
+                     /number=1
+     exon            complement(67451..67599)
+                     /gene="F14F8_180"
+                     /number=2
+     intron          complement(67600..67736)
+                     /gene="F14F8_180"
+                     /number=2
+     exon            complement(67737..67820)
+                     /gene="F14F8_180"
+                     /number=3
+     intron          complement(67821..67910)
+                     /gene="F14F8_180"
+                     /number=3
+     exon            complement(67911..68010)
+                     /gene="F14F8_180"
+                     /number=4
+     intron          complement(68011..68099)
+                     /gene="F14F8_180"
+                     /number=4
+     exon            complement(68100..68161)
+                     /gene="F14F8_180"
+                     /number=5
+     intron          complement(68162..68251)
+                     /gene="F14F8_180"
+                     /number=5
+     exon            complement(68252..68333)
+                     /gene="F14F8_180"
+                     /number=6
+     intron          complement(68334..69184)
+                     /gene="F14F8_180"
+                     /number=6
+     exon            complement(69185..69369)
+                     /gene="F14F8_180"
+                     /number=7
+     tRNA            73241..73314
+                     /note="tRNA predicted as a tRNA- Ile : anticodon aat"
+                     /product="tRNA-Ile"
+     exon            73400..73789
+                     /gene="F14F8_190"
+                     /number=1
+     gene            73400..76723
+                     /gene="F14F8_190"
+     CDS             join(73400..73789,74082..74363,74478..74565,74673..74794,
+                     74868..75007,75092..75164,75307..75395,75470..75575,
+                     75662..75853,75961..76155,76247..76324,76424..76723)
+                     /gene="F14F8_190"
+                     /note="strong similarity to several N2, N2-dimethylguanine
+                     tRNA methyltransferases"
+                     /codon_start=1
+                     /product="N2, N2-dimethylguanine tRNA
+                     methyltransferase-like protein"
+                     /protein_id="CAC01780.1"
+                     /db_xref="GI:9755626"
+                     /translation="MNFSKRFVGFKQLSILRYSLPNPYGLFRVSPLCLSETATKLEAI
+                     GFIQASLKPVAKFRHLYSHIYRFKLCDLPFGFQIRVGNSISESSAPKEKNLRSAMETD
+                     LNDYTVIKEGEAEVLMHKKNQVFFNKAQVNNRDMSIAVLRAFIIKRKQEHEAMLSKRA
+                     RSSGKVVEKDVSETSKEETPTENGDDNGKTNGEHEVTTQDGPKEAAKTAYESARRELK
+                     PPRVLEALSASGLRALRYAREVEGIGQVVALDNDPASVEACQRNIKFNGLMSTSKVES
+                     HLTDARVHMLSHPKDFDVVDLDPYGAPSIFLDSAVQSVADGGLLMCTATDMAVLCGAN
+                     GEVCYSKYGSYPLKGKYCHEMALRILLASIESHANRYKRYIVPVLSVQMDFYVRVFVR
+                     VYTSASAMKNTPLKLSYVYQCIGCDSFHLQSVGRSLPKNNSVRYLPGVGPVVPQDCTH
+                     CGKKYNMGGPIWSAPIHDQEWVNSILNGVKSMKDRYPAYDRICAELPDVPLFLSLHSL
+                     SATLKCTSPSAALFRSAVINAKYRVSGSHVNPLGIKTDAPMEIIWDIMRCWVKNHPIK
+                     PQSPEHPGSVILSKEPSHQADFSRHVGSLSKAQAKKVARFLPNPEKHWGPKIRAGRTI
+                     TSKHVSLLGHEAVNGHLNNNHKEAGDEEEEEEEEEPEEDIIEGEPELKRQKTTEDFAS
+                     TS"
+     intron          73790..74081
+                     /gene="F14F8_190"
+                     /number=1
+     exon            74082..74363
+                     /gene="F14F8_190"
+                     /number=2
+     intron          74364..74477
+                     /gene="F14F8_190"
+                     /number=2
+     exon            74478..74565
+                     /gene="F14F8_190"
+                     /number=3
+     intron          74566..74672
+                     /gene="F14F8_190"
+                     /number=3
+     exon            74673..74794
+                     /gene="F14F8_190"
+                     /number=4
+     intron          74795..74867
+                     /gene="F14F8_190"
+                     /number=4
+     exon            74868..75007
+                     /gene="F14F8_190"
+                     /number=5
+     intron          75008..75091
+                     /gene="F14F8_190"
+                     /number=5
+     exon            75092..75164
+                     /gene="F14F8_190"
+                     /number=6
+     intron          75165..75306
+                     /gene="F14F8_190"
+                     /number=6
+     exon            75307..75395
+                     /gene="F14F8_190"
+                     /number=7
+     intron          75396..75469
+                     /gene="F14F8_190"
+                     /number=7
+     exon            75470..75575
+                     /gene="F14F8_190"
+                     /number=8
+     intron          75576..75661
+                     /gene="F14F8_190"
+                     /number=8
+     exon            75662..75853
+                     /gene="F14F8_190"
+                     /number=9
+     intron          75854..75960
+                     /gene="F14F8_190"
+                     /number=9
+     exon            75961..76155
+                     /gene="F14F8_190"
+                     /number=10
+     intron          76156..76246
+                     /gene="F14F8_190"
+                     /number=10
+     exon            76247..76324
+                     /gene="F14F8_190"
+                     /number=11
+     intron          76325..76423
+                     /gene="F14F8_190"
+                     /number=11
+     exon            76424..76723
+                     /gene="F14F8_190"
+                     /number=12
+     tRNA            77114..77185
+                     /note="tRNA predicted as a tRNA- Asp : anticodon gtc"
+                     /product="tRNA-Asp"
+     exon            77389..78435
+                     /gene="F14F8_200"
+                     /number=1
+     gene            77389..78435
+                     /gene="F14F8_200"
+     CDS             77389..78435
+                     /gene="F14F8_200"
+                     /note="similarity to ring finger protein - Cicer
+                     arietinum, EMBL:AB026262"
+                     /codon_start=1
+                     /product="putative protein"
+                     /protein_id="CAC01781.1"
+                     /db_xref="GI:9755627"
+                     /translation="MSSAKLFGCSINVNVEAEEEEGGDGGSSTNVEVSRSGNQPDCEA
+                     MSFSNQMEIGVRNTYYQFLESNSDSGSDSMYAEPEFIDFFDRESYEVDTVREVCVSSN
+                     QRVSTPGYFNIWDQDVDLGLGIGLGSRSGSGQLPGDSGGVGVEVGRGVTPVEYNLFGE
+                     EAMVVDEVLEWENFNNAIHLVQEPAYASMEGEEEEEEDEVVMEFAASIYSDAWEILLY
+                     DNMTNSAPMDLDVEVWLDSVDGYAPMDYNAIIGQMFDNETGIKGTPPASKSVVDGLPD
+                     VELTIEELSSVSIVCAICKDEVVFKEKVKRLPCKHYYHGECIIPWLGIRNTCPVCRHE
+                     LPTDDLEYERKRRA"
+     exon            84193..84753
+                     /gene="F14F8_210"
+                     /number=1
+     gene            84193..84753
+                     /gene="F14F8_210"
+     CDS             84193..84753
+                     /gene="F14F8_210"
+                     /note="similarity to putative bZIP DNA-binding protein -
+                     Capsicum chinense, EMBL:AF127797
+                     Contains bZIP transcription factors basic domain signature
+                     AA77-92;bZIP transcription factors basic domain signature
+                     AA78-92"
+                     /codon_start=1
+                     /product="bZIP DNA-binding protein-like"
+                     /protein_id="CAC01782.1"
+                     /db_xref="GI:9755628"
+                     /translation="MQPNYDSSSLNNMQQQDYFNLNNYYNNLNPSTNNNNLNILQYPQ
+                     IQELNLQSPVSNNSTTSDDATEEIFVINERKQRRMVSNRESARRSRMRKQRHLDELLS
+                     QVAWLRSENHQLLDKLNQVSDNNDLVIQENSSLKEENLELRQVITSMKKLGGGIHDKY
+                     SSPSSMDELDQDFSSITDDPRTHHPS"
+     exon            complement(86945..87304)
+                     /gene="F14F8_220"
+                     /number=1
+     gene            complement(join(86945..87304,87538..88299))
+                     /gene="F14F8_220"
+     gene            86945..88299
+                     /gene="F14F8_220"
+     CDS             complement(join(86945..87304,87538..88299))
+                     /gene="F14F8_220"
+                     /codon_start=1
+                     /product="CONSTANS"
+                     /protein_id="CAC01783.1"
+                     /db_xref="GI:9755629"
+                     /translation="MLKQESNDIGSGENNRARPCDTCRSNACTVYCHADSAYLCMSCD
+                     AQVHSANRVASRHKRVRVCESCERAPAAFLCEADDASLCTACDSEVHSANPLARRHQR
+                     VPILPISGNSFSSMTTTHHQSEKTMTDPEKRLVVDQEEGEEGDKDAKEVASWLFPNSD
+                     KNNNNQNNGLLFSDEYLNLVDYNSSMDYKFTGEYSQHQQNCSVPQTSYGGDRVVPLKL
+                     EESRGHQCHNQQNFQFNIKYGSSGTHYNDNGSINHNAYISSMETGVVPESTACVTTAS
+                     HPRTPKGTVEQQPDPASQMITVTQLSPMDREARVLRYREKRKTRKFEKTIRYASRKAY
+                     AEIRPRVNGRFAKREIEAEEQGFNTMLMYNTGYGIVPSF"
+     intron          complement(87305..87537)
+                     /gene="F14F8_220"
+                     /number=1
+     exon            complement(87538..88299)
+                     /gene="F14F8_220"
+                     /number=2
+     exon            complement(91899..92249)
+                     /gene="F14F8_230"
+                     /number=1
+     gene            complement(join(91899..92249,92359..93075))
+                     /gene="F14F8_230"
+     gene            91899..93075
+                     /gene="F14F8_230"
+     CDS             complement(join(91899..92249,92359..93075))
+                     /gene="F14F8_230"
+                     /codon_start=1
+                     /product="CONSTANS-like 1"
+                     /protein_id="CAC01784.1"
+                     /db_xref="GI:9755630"
+                     /translation="MLKVESNWAQACDTCRSAACTVYCRADSAYLCSSCDAQVHAANR
+                     LASRHERVRVCQSCERAPAAFFCKADAASLCTTCDSEIHSANPLARRHQRVPILPISE
+                     YSYSSTATNHSCETTVTDPENRLVLGQEEEDEDEAEAASWLLPNSGKNSGNNNGFSIG
+                     DEFLNLVDYSSSDKQFTDQSNQYQLDCNVPQRSYGEDGVVPLQIEVSKGMYQEQQNFQ
+                     LSINCGSWGALRSSNGSLSHMVNVSSMDLGVVPESTTSDATVSNPRSPKAVTDQPPYP
+                     PAQMLSPRDREARVLRYREKKKMRKFEKTIRYASRKAYAEKRPRIKGRFAKKKDVDEE
+                     ANQAFSTMITFDTGYGIVPSF"
+     intron          complement(92250..92358)
+                     /gene="F14F8_230"
+                     /number=1
+     exon            complement(92359..93075)
+                     /gene="F14F8_230"
+                     /number=2
+     exon            complement(94278..94439)
+                     /gene="F14F8_240"
+                     /number=1
+     gene            complement(join(94278..94439,94541..94631,94708..94851,
+                     94920..94985,95075..95212,95305..95406,95503..95628,
+                     95837..95917,96250..96321,96588..96865))
+                     /gene="F14F8_240"
+     gene            94278..96865
+                     /gene="F14F8_240"
+     CDS             complement(join(94278..94439,94541..94631,94708..94851,
+                     94920..94985,95075..95212,95305..95406,95503..95628,
+                     95837..95917,96250..96321,96588..96865))
+                     /gene="F14F8_240"
+                     /note="similarity to sterol esterase - Rattus norvegicus,
+                     EMBL:Z22803
+                     Contains Carboxylesterases type-B signatures AA214-229"
+                     /codon_start=1
+                     /product="Carboxylesterase-like protein"
+                     /protein_id="CAC01785.1"
+                     /db_xref="GI:9755631"
+                     /translation="MHSPLQTQQPEQRCWPMTSTVSEIEEVLPDEDSDRTTLLNGEPL
+                     RRRVSGKSPVDEGPRRIFRQQSFGRDIGHAAAETYLITGLSFKLLRYLGLSMDDKITC
+                     PYMLCYASYAWLSSRLDLYLPSNNDGLKPVVVFVTGGAWIIGWLLVNKFMFANLVLIF
+                     RYKAWGSLLGMQLAERDIIVACLDYRNFPQGTISDMVTDASQGISFVCNNISAFGGDP
+                     NRIYLMGQSAGAHIAACALLEQATKELKGESISWTVSQIKAYFGLSGGYNLYKLVDHF
+                     HNRGLYRSIFLSIMEGEESFEKFSPEVRLKDPVVGKAASLLPPIILFHGSSDYSIPCD
+                     ESKTFTDALQAVGAKAELVLYSGKTHTDLFLQDPLRGGKDELFDDIVSVIHAEDNDGL
+                     TKDSLAPPRKRLVPELLLKLAREISPF"
+     intron          complement(94440..94540)
+                     /gene="F14F8_240"
+                     /number=1
+     exon            complement(94541..94631)
+                     /gene="F14F8_240"
+                     /number=2
+     intron          complement(94632..94707)
+                     /gene="F14F8_240"
+                     /number=2
+     exon            complement(94708..94851)
+                     /gene="F14F8_240"
+                     /number=3
+     intron          complement(94852..94919)
+                     /gene="F14F8_240"
+                     /number=3
+     exon            complement(94920..94985)
+                     /gene="F14F8_240"
+                     /number=4
+     intron          complement(94986..95074)
+                     /gene="F14F8_240"
+                     /number=4
+     exon            complement(95075..95212)
+                     /gene="F14F8_240"
+                     /number=5
+     intron          complement(95213..95304)
+                     /gene="F14F8_240"
+                     /number=5
+     exon            complement(95305..95406)
+                     /gene="F14F8_240"
+                     /number=6
+     intron          complement(95407..95502)
+                     /gene="F14F8_240"
+                     /number=6
+     exon            complement(95503..95628)
+                     /gene="F14F8_240"
+                     /number=7
+     intron          complement(95629..95836)
+                     /gene="F14F8_240"
+                     /number=7
+     exon            complement(95837..95917)
+                     /gene="F14F8_240"
+                     /number=8
+     intron          complement(95918..96249)
+                     /gene="F14F8_240"
+                     /number=8
+     exon            complement(96250..96321)
+                     /gene="F14F8_240"
+                     /number=9
+     intron          complement(96322..96587)
+                     /gene="F14F8_240"
+                     /number=9
+     exon            complement(96588..96865)
+                     /gene="F14F8_240"
+                     /number=10
+BASE COUNT    30580 a  17554 c  17879 g  30879 t
+ORIGIN      
+        1 aggaagatgg taagctaagt tgggagatat gtttgtgtca gaagttgact catggccagt
+       61 ccccactttt tctttatatc ttcaacccca agttactttc tcacttctca tctctgaatc
+      121 attagggttt tcgtttcttt accaacaaaa gatagttgag atgttgtatt cggacgagaa
+      181 cacgtcagtt tctaaggtgt tctattatca atcactacca taaacattgc ttaaagttat
+      241 gggaaacaag aaagaataaa tttggagaat gcaacaacaa gtaggtgaaa gcccatcttc
+      301 agaccaaaaa gactcatcaa taatctgtaa aaaacttatg ttttaaatcc cataaacaca
+      361 tcaattcaaa ttcctgaaga agaaaaagcc tgaggttaaa cactgcacct gggaatcaat
+      421 ttccttaaat ctttactcaa aagatgaggc tcctcttcag cttctgcttc ttcttcttca
+      481 tgatcatctt taccgcaagt gagtcctgta tattcatttt tgcccgcaca ttaacttact
+      541 tttcttactc tctgattttc tttttttcac agctgcttat gatccattag atcctagtgg
+      601 taacattaca atcaaatggg atattatgtc ctggacggca gatggctatg tggtaagaat
+      661 cctctgcact tgtgcgtgag tgtatatatg cttgaaactg gcacaaactc tcatcatctc
+      721 tatacattct gtgcatttgc aggctacggt aactatgaac aacttccaaa tctaccggca
+      781 catacaaaac cctggttgga cattaggttg gacatgggca aagaaagagg tgatttggtc
+      841 aatggttggt gcacaaacaa cagaacaagg agactgttcc aagtttaagg gaaatgtacc
+      901 tcattgctgt aagaaaaccc ctacagttgt tgatctcttg ccaggtgtgc cttataatca
+      961 acagttctca aactgttgca aaggaggtgt aattggagct tggggtcaag atccatcagc
+     1021 cgctgtatcc cagtttcagg ttagtgctgg tttagctgga actacaaaca agactgtcaa
+     1081 gcttcctaag aacttcactt tgcttggtcc cggccctggt tacacttgcg gtcctgccaa
+     1141 aatcgtgccc tctaccgttt ttctcacaac tgacaaacgg cgaaaaacac aagctttgag
+     1201 taagcaatca atcaaaaacc ttaacttgtt ccatcatcga aatctaaaca tcaatttcaa
+     1261 acttatgttt ctgcagtgac atggaatgtt acctgcacat actcacagtt tttagcaaga
+     1321 aagcatccaa gctgttgtgt ctccttctct tctttctaca acgacaccat aactccttgc
+     1381 ccgtcttgtg cctgtggctg cgagaacaaa aagagctgcg tcaagtgagt aacaaatcat
+     1441 tgaaatccaa actctttctt agatatcctc tgaaaaaata tcaatatgaa tgttttcttg
+     1501 cagggctgat tctaagattc taaccaagaa aggtctcaac acaccaaaaa aggacaacac
+     1561 tcctttgttg caatgcacac atcacatgtg ccctgttaga gtccactggc acgttaaaac
+     1621 taactacaaa gactattggc gagtgaagat agcaatcaca aatttcaatt accggatgaa
+     1681 tcatacactc tggactttag caattcagca tccaaatctc aacaatgtga ctcaagtttt
+     1741 cagctttgac tacaaaccag tctctcctta cggatccata agtaaataaa tacctccctc
+     1801 aaccacaact attcaacttc aaaccaacca ttgaatgact aataacattc ttaaatcttt
+     1861 ttgatacaga tgatactgga atgttctatg gaacgaagtt ttacaatgat ttattaatgg
+     1921 aagctggacc ttcagggaat gtgcaatcag aggttttgct acagaaagat caaaagactt
+     1981 ttactttcaa gcaaggttgg gcttttccta gaaaagttta ctttaatggt gatgaatgta
+     2041 tgttacctcc accagattca tacccttttc taccaaactc tgcacaaggg aactttgctt
+     2101 cgttctcact caccattctt cttctcctat tcatctcaat atggtgattt gatctgatcc
+     2161 ttctgggttt taaaacactg aaccagaact gttttttgta aactcattgc aaagattgta
+     2221 aaagaaaaaa aggagttttg tgagaggaaa tgattagaga tgtgcaaggc ttttttacct
+     2281 tgatcactgc cattgatcaa tgactctgta cttcttggct ttttgctttg gtactaatgt
+     2341 gaaagtgaag catattgatt tgattgagaa agtggcaaaa caacgaaatg tttttaattg
+     2401 ggctcaaata aacgcatttt gaatccagcc caataaaggc ctggtttaac attcacgcgt
+     2461 ttcaatagat tcgcgtgtgt attattttcg ttggtctgaa aaggcaaaag gtcaaaaacg
+     2521 gcgttttgtt tgggagacac tttttgttcg aagatatttt ctcattctct gatctctctg
+     2581 tccgaaacgg ctccgttaat tttaaagtaa atattccaaa ataagtttat gtcaaggcta
+     2641 gtctttttga aaataagatc agaattttac ctatatatat atatttattt atcaaaaaaa
+     2701 gtagacacta cttacaaaga ttagtattat caacgagtaa tacaaaatgg tttgcaagtt
+     2761 aatttaatta aaaacaaaaa actaagattg ctaaaataca gtatttgtga acaagttgaa
+     2821 ctaaaatcta gaagccagtc ccgcacggtg gcttttaagc ccaaatcaat gcccctttga
+     2881 ttgagagatc tgaaatttag gactcaaaaa gtttgttgct gaccaaacgc gaaactttga
+     2941 ttaaaaacgc gtttgcgttt taaaattctc ctgtgtttgt gtatgtgtgg aaaattaatt
+     3001 tcttctcccg acgaatccca aaagtttgtt gaaatttgcc tccaagaaat ctgcaattag
+     3061 aaatttcatt tcgccgtcat cgtcttcttc ttctgattct gattaattcc agatgtcttc
+     3121 ctgagtctta cggagattag gtcattttcg cctcgactcg aactctgtct cgcagcttcc
+     3181 gtgagagagt catggacacg ccaccgactt ctcgtatcgc atcttttggt cagacggaga
+     3241 ttaactggga caagtacgtt cttcattttc ctcctcaatt tccatcgatt gatccgcttg
+     3301 ttttctagtt ttgattgatt ctctgtggcg ttgttgttat tagtccgaac tttgtttatt
+     3361 gttgtttcga tccagttttt gtgattctgt gattggtttc ttcggtggat ttatcgacat
+     3421 gaaatggata tcgctcgttg ttttgacgga atcatttcgt ggaggtaatt gaagatttcc
+     3481 gaatttattg gaaagaatct ggttctcggg ctcctgagaa gctcgatttc agaaattgat
+     3541 ttggtgcaga agatttgttc gctagtgtta atcaaatctt tgtttaggta aaagatctag
+     3601 taggcatttg gatagtttta gtgttcctta ttcttacggt ataatctgag gtcctacgtc
+     3661 taaggttttt ggaaatgtgt gatgagtagg gccaaaacaa acttgaaacc attacaaatt
+     3721 tggttgtaag atttccgaga ggtacctgaa gaatcataca acaaataata tcttcatact
+     3781 agctctaatt atttttgctc tctctccttc ttttatcttg cagacttgac aaaaggaggt
+     3841 tctacattaa tggagctggc ctcttcactg gtgttacagt agctctgtat cctgtatccg
+     3901 ttgtgaaaac aaggcttcaa gttgcttcta aagagattgc tgagagaagt gccttttctg
+     3961 tagttaaagg aattttaaag aatgatggtg ttcctggtct gtaccgaggt tttggtactg
+     4021 tcattacagg tgctgtacct gcaagaatca tatttctaac tgctcttgag accactaaga
+     4081 tttctgcttt taagttggtt gcacctttgg agttaagtga acctacacaa gccgccattg
+     4141 caaatggaat tgctggcatg acagcatctc ttttctcaca ggctgtgttt gtcccaattg
+     4201 atgttgtatg tattatatac aacactaccg ctttgtaaga tttgtaaatg ctgtgaatgt
+     4261 tctatcacta tgagttgctg aaaaccatat ttctcctatt ttctgcaggt tagccaaaag
+     4321 ttgatggtac aaggatactc aggtcatgct acatatactg gtggtatcga tgttgccaca
+     4381 aaaatcatta agtcatatgg tgtaagggga ttatacagag ggtttggtct gtctgttatg
+     4441 acctattctc cttcaagtgc cgcttggtgg gctagctatg gatcaagcca acgtgttatc
+     4501 tggaggttag ccatgaatgt tctagttaat tgatgatagt ttccatttta gtttgattgc
+     4561 aagaacttgc atttcagtcg ttcctggaat ttggatttgc taccaaagct acaattccgc
+     4621 tgatacaata tctgctactg ttgggcagat tcttaggtta tggtggtgac tcggatgcaa
+     4681 ctgctgctcc tagtaagtca aaaattgtta tggtccaggc tgctggagga attattgctg
+     4741 gtgcaacagc atcctcaatt acaacaccat tagacacaat caaaacgcga ctgcaggtat
+     4801 aaagtgttcc cattcatagt aaaaacttga ttttattttg catttacttt ttttaactct
+     4861 aatgaatata gaagagcttc cgtgcctgct gtcctggttc tcaaatagat aacttcgggt
+     4921 attgctacat agaaactatg ctaaacaaac ggtaaaatct ctgtgcaggt catgggacat
+     4981 caagaaaata gaccttcagc gaaacaagtg gtgaaaaaac tgctagcaga agatggctgg
+     5041 aaaggattct ataggggttt gggcccaaga ttctttagca tgtcggcttg gggaacctcg
+     5101 atgatattga cttacgaata cttaagtaag agttcttttc ttttataccc aaaatctcac
+     5161 tttgagccac tctcaaaacc aaccaagatt cttcttttga aatgctagct gctcctctga
+     5221 catgtttcac tctttcatgg tttttttttc cagagcgtct gtgtgcaata gaagattaga
+     5281 aagcttcact actgttcgtt ggtttttctc cttctactaa ctcgagtaga cggcatcacc
+     5341 ttgattcaaa tctcggctat tgggagcttc catgttgaca attggaccat tctttcaact
+     5401 ttgaagtctg agttccgtta ttgtaatcat tctttcaact taatttgcgt attggagtgt
+     5461 attcttacag cagaagaaat tctagcttac gttcttctgc tttatgtagc ttctgagatt
+     5521 taagaagctt caaaacaatg caatcacttc atgaaatctt atagtcgtac ggatgtacgt
+     5581 gttatcgtaa cagtcaaaat ttgagttgtt gcaaatggct tctcatttgc ataactgtct
+     5641 tcacaaagta tcctcattct taggatatca gccaaagcta acctctggtc attctcattg
+     5701 atagcaatat aatcatcatc aaacctctgg tcattctcat tgatagcgac atatacatca
+     5761 tcaagtactt tattataact atgatcattc tcattgatgc tgtgtctctt agcaaaaaaa
+     5821 aggccactag ccataggcta acaccaaatc accaagagcg cttactgggt tttattttag
+     5881 ctcatatacg taaatctatt actccagttc ctaagattta agagtgttta ttatcctaaa
+     5941 caaaaagcca ccgatatatc taattgttat tcaaaatcag ataaaataaa ataaatcgta
+     6001 tatatacgaa aagtttcaag ataaacaaga aaagtagtta gtatttgacc ttttttagtt
+     6061 tatttcctcg tagcaggctc atttttttcc cctctctgac tcagcagatt aatttctcca
+     6121 ttaggataat aacttaaata tttccggaga cttaaatatt cttacaacgt ctttatcaac
+     6181 aaaaacaaaa ttcttgaata gcgatctctt ttcttttaat taaaaaaaaa gactttaaaa
+     6241 atgaggataa aatcaaatat aataatgcat tattgatagt aacttaaaca tgcatggagc
+     6301 tcttaattat ttttgggaga cttttagata tttatgaaat aaaataaagt gtagatgttt
+     6361 taaaaataaa aataaaataa agtgtcgatc acctaccgac ctatacgtgc atgtattctc
+     6421 gtttatacga tagttacatg atttatgcaa taatcataga cgacgtgcat cgacgagcgg
+     6481 cccacgacca cgacgtaagg attacggtcc acgtcgtaag gattacgttt taaatttata
+     6541 gtttatcaat tgtatatcgt aaataataaa tttagaatga ctgagaattt gttgagaaat
+     6601 gaaaatatta gagaatttat cgaatgattc gaatctaaag cgaataataa tttttttgat
+     6661 gatttatgtc cttttgtttc gtcgacattt atgatttaag tcctagcata tactatattg
+     6721 tacttattac cttatacaat acgatttgta tatatacgaa ctttaactaa aaataaagtt
+     6781 aaatgttgaa gagtttaaga cactaccata gctcccccac atccacatgc actatttctt
+     6841 cgtaacatat caaattaaaa ccatgcattt attaagttgt cagttaagga aacaaaaagt
+     6901 tgtgaatgta tttgatttta atttattttt tcagtttttt tcttttaggt agaaagaatg
+     6961 aaaccttagt tttgtacata ataaaaacct tactatctta cattccagtc aacaaattaa
+     7021 atacagtgag tacattttgt tagatatacg ttcatttcat tagcacatac gttatatact
+     7081 tatatagtaa tcataatcat aaaactgtac atatgaattt atttggttgc aaaataaagt
+     7141 tagcttcaat caattataat aaatgcttca ctattttaga gttttggttt atttaggggt
+     7201 atttaaatct aaataaaact gaaaatgaag caaaagcaaa cttaaattaa agtacatctt
+     7261 ttattgggtc cgaaaaactg aaattaaatt tcgtagtagt cttaaatgat tttgaccttt
+     7321 ttaaataata tattcaaatg tgtttcaaac acgaatcaaa ctataccaaa aaaaaaaaaa
+     7381 aagttggata aaaaataaaa cctgactaca cctcaacttt ggatcaaaat ctatgaatat
+     7441 attttcaaaa ttatcttagt caaattttaa attaattaat tatttatata aaatttaata
+     7501 attatcataa ccttggatta aatttatcta cagtcaaaaa ttaattttaa atcaattaat
+     7561 taatagcatt attacaatcc ctaattgtac gggacgaata aaaaagtaga aaactcaagt
+     7621 tcctttcttt accatacagc tttttcgatt ggagttgaat aagtcttcat ctgacacgtg
+     7681 taaccctggc acatgccgtc cactaaaaca cgtgcgagat ctgtataaat caaacctacg
+     7741 cgtttcatct ctcttttcaa aactcaccga cgcgatccga tctcatctct ctcatttcga
+     7801 aaccatggtt gagccggcga atactgttgg tcttccggtg aacccgactc cgttgctgaa
+     7861 agatgagctc gatatcgtga ttccgactat cagaaacctc gatttcctcg agatgtggag
+     7921 gccttttctt cagccttacc atctgatcat cgtccaggac ggagatccat cgaagaagat
+     7981 ccatgtccct gaaggttacg actacgagct ctacaacagg aacgacatta accgaatcct
+     8041 cggacctaag gcttcttgta tctcgtttaa ggattctgct tgtcgatgct ttgggtacat
+     8101 ggtgtctaag aagaagtata tcttcaccat tgatgacgat tgcttcgtaa gttacttgaa
+     8161 ttttgagttt tgtattcgtt tttatgcttg atttgagagt tttgtcaatt ttggttctag
+     8221 atctgttttt ttgagcttat ttgtttgtgt ttgtgtggat ttttcaagtt cattgcttga
+     8281 atttcgtaga tttggtgaga gatcaattat acgattcact aaatttgacg gatcttaggt
+     8341 ttgtgagata atccttggtt cgattagcta ggcaattcaa tgttttgtac cagatccata
+     8401 gatctgcttg ttgagtctga atatgttttc acttttgtgt aattagccat gatctctaat
+     8461 gtttacttgt agattttctg tgagctgatg tctcttttgt tgacgacatt gttgttgagc
+     8521 tgatatctct gagtcattat agctaccttt acgatatggt tgcacgtcct tgttcatcac
+     8581 ttttttcttt tgttttacct ttttgagatt tgtggggcat atccaaggat gagtctcgat
+     8641 gacgcttgtg tttagtttat aattttctga gttttttttg gaggaactct ttgatcaatg
+     8701 gcttgatctg gattttaacc gctttttaat tcatgtattt ctttgatgtg tacatgtagg
+     8761 ttgccaagga tccatcaggc aaagcagtga acgctcttga gcaacacatc aagaaccttc
+     8821 tctgcccatc gtctcccttt ttcttcaaca ccttgtatga tccttaccgt gaaggtgctg
+     8881 atttcgtccg tggataccct ttcagtctcc gtgaaggtgt ttccactgct gtttcccatg
+     8941 gtctttggct caacatccct gactacgatg ccccgaccca actcgtgaag cctaaggaga
+     9001 ggaacaccag gtgacaataa ttatcatcat aacatgttta tgtgtttttt tgtcaggata
+     9061 ttcaaatgtc agtttttgct aaacgtttga tatgtcaggt atgtggatgc tgtcatgacc
+     9121 atcccaaagg gaacactttt cccaatgtgt ggtatgaact tggcttttga ccgtgatttg
+     9181 attggcccgg ctatgtactt tggtctcatg ggtgatggtc agcctattgg tcgttacgac
+     9241 gatatgtggg ctggttggtg catcaaggta atttcttctt attcccttgt aagactcata
+     9301 attgagtata gctaaatatg aagcacatgc tctgtactaa gcgatacctc catttggggt
+     9361 tgaatctttt ataggtgatc tgtgaccact tgagcttggg agtgaagacc ggtttaccgt
+     9421 atatctacca cagcaaagcg agcaaccctt ttgttaacct gaagaaggaa tacaagggaa
+     9481 tcttctggca ggaggagatc attccgttct tccagaacgc aaagctatcg aaagaagcag
+     9541 taactgttca gcaatgctac attgagctct caaagatggt caaggagaag ttgagctcct
+     9601 tagacccgta ctttgacaag cttgcagatg ccatggttac atggattgaa gcttgggatg
+     9661 agcttaaccc accagcagcc agtggcaaag cttgagagca gtatgagcca aaaagaaaaa
+     9721 gccaccaaag ttttggttat ttttagctca aattatcgtt acttttaaat ttctgatttt
+     9781 acgaaccttt cttgcttttt ttacacattt gagtagtttt catcatcagt actttctcat
+     9841 tgtccggtta tggtttttgc atttggttta aatatcaccg gtttatttat aaacagtggt
+     9901 ggattagtag tactattttc tgagtttttt tctttgtttc attaataaaa aggccttttc
+     9961 ataggtgttt gcaattagtt tttttccccc attaatcatc gattatcata ggtatgttat
+    10021 ggctttaaat ggtataagga aattgcttat agaccaaaaa aaagttgaat tgctattgag
+    10081 agagctttta caaaagaaag agcattgttc aataagcttt tcacatttgg tcgatatttt
+    10141 gatcaaccta tcataggtat ctcaattaat aaaccggaat gttaatatgt tttgcaagat
+    10201 atccctacaa tcaaaccgtg aagacccgac ccggttgcgc aagatccatt ttcctacttg
+    10261 gttaaatggt caagcccatt aaccatacta taggcctgtt gccattttcg accgacctct
+    10321 ctatttctac ttactactta aagaagcaag aggagacgta gaggttcata cagggtttaa
+    10381 aaacaagagc aagtcaccat gatgatagca agaaatcagt ccaagaagtt gaagacggag
+    10441 aacaattcac aaactttgca aactcgaagg aagggaacaa atatgtcgtt ccttttgacc
+    10501 tagtgatcga gattccgtag ctaggtctgt cctcttatcc aaggtatggg ggaaaatagt
+    10561 ccgcagtaga cttttcatga catctttccc gtttacgtca ctgtcgtctc agcctgatcg
+    10621 tctcttcgtt gttttcattg atttcattaa tatcttacgt tttttgcggt gattcctctt
+    10681 ttgtggaaaa gtattatcac catataaacc agttggattc actttatgtg atgctggcac
+    10741 tatattgttt atgcctctca tctcaagttc tcaactattc tttttgcggt ttcctcattt
+    10801 cgtgatgaga ttcttagaca ttgcatgtgt attgtgcttt tcttcttctt atcaagaacc
+    10861 aactttgttc aaattagatt ctcaaagtgg gctttctttt agcatctacg cgattgtgtg
+    10921 gttgactact tttcttttaa gtataatttc atagttttgt tttatttcat tttagtatat
+    10981 acagtactac aactgtgtag ttaacttact acagtagcat gtatttcatt tgaccaaaac
+    11041 tttggtatat caattaacgt ttttactttt tactttttac tttttacact cctggtttac
+    11101 ccaattttgt gcaaaatgat gcttatccat gaagaccgga cccggttatg taagatccac
+    11161 gaggtttatt acgttcggat ctgctccttt atctccattt ctcagctggg gtagagagag
+    11221 agatctaggg tttcgaaaga agagcgatcg atcatgagaa gacgcagcaa aaagatcaaa
+    11281 accgagaaca acagcaatcc agaaacgtcg gaagagagaa acaagtttga tgaaatccct
+    11341 catgacctag tgatcgagat acttgaaaga ttgcctttga aatctgtagc caggtttctc
+    11401 acagtatcca agttatgggc aacaactata cgcagtccag atttcagaaa atcttaccgg
+    11461 ggtggatctt cgtcggagcc tcgtacccta atcgtttccg acctaaattt taaggaacca
+    11521 aacccaaagt tgcatttctt caggccgtct atatcatcac cgtcttttct atcaagtctg
+    11581 acatgtccgt tcacatatcc tcggcacgaa gagtactatt atcatcatgt taatggattg
+    11641 ataagcgttg gatatggtac agaccaaatc gtaattaacc ccacgactgg aaaattcata
+    11701 actttaccaa gacccaaaac aaggagaaag ctcgtaataa gttttttcgg gtatgattca
+    11761 gtgagtgatc aatacaaagt gttgtgcatg acggaaagac tgcgtggcca tccagaagaa
+    11821 gcatcatctc aacatcaagt gtacacattg ggagctaaac agaaatcatg gaaaatgatt
+    11881 aattgtagca tccctcaccg tccttggtcg tggaacgccg tgtgcataaa tggtgttgtg
+    11941 tattacattg ctaaaacggg ggagggtatg tttcgtcggt gcttaatgag atttgatttg
+    12001 aagtctgaca atttggatct ttgcactatt ttacctgaag agattcaaac aagtctacat
+    12061 gattactttt tgatcaacta caaggggaaa gtagccatac ccaatcaacc taatttctat
+    12121 acatatgatg tgtgggttat gaatcaggaa ggtggaaaaa ttgaatggtt gaagaatata
+    12181 actttcacta ttaaacctcg gaagggtttt gttcgttatc tattcgtcac aggcactact
+    12241 catacgggtg agtttatttt ggcaccaacg tcctataccg atgagtttta tgtcttccat
+    12301 tacaatcccg acatgaatag ttttagaaag ataagggttc aagcacccgg agttaagttt
+    12361 agttttgctc agaaagcttc agttgttttt tcggatcacg tagagagtgt ttggttgttg
+    12421 taggagagac atggttttga tacccataat catcagtatc gaaccttgga agagtttatt
+    12481 gcgtttggat aatttacaca ttagaggctc tactcatacg gctacgggag agtttatttt
+    12541 ggcaccacgg ttctattctg atgatcttaa tgttatccat ttcaatcccg acacgaatag
+    12601 ttttagaagc actaaggttg aagtatacga agactatgag tgaaagcgtc atggtacaag
+    12661 agcaatggtt tttcgggatt acgtagtctt aagtttttct aggagagaca gtttcatgtt
+    12721 ttgataccta tcgtatttga aatgtttgca ttgatagacc cttttttagg gaaatttttg
+    12781 tcacttcatt gcttaattta gttaaaccag ttggatttgt ctactttctt ttctcctttt
+    12841 acaactatta tttatttcat ctgttttagg acatgcttta tatgaatgaa tcatttatca
+    12901 tttatcaatc atttagtatc taaaagagtt tatgctcttt gatatatcta cgaagaataa
+    12961 tactaaggtg ggagtcaatg tttgaaatat tgataagccg gaagacgtaa gtaagggctt
+    13021 gattggttct ccctctgcca cccgcaaacg ctgcgtttgc gggtgatagc ggttattagc
+    13081 ggttggcacc aatcatagaa accgctagat accgcttcgg accgctcgaa atcgccagat
+    13141 ttcaaaagct ccttcccgca agcatttgcg gttgcgggcg gtagcggtta agttaaaaaa
+    13201 aaatattaat taattaaaat aattatgtct tccacccaaa ccttatactt attcttatca
+    13261 gaaaacccta atcaatgaaa ccctattctc tcaattatgg catccaccca aactgacaaa
+    13321 aatcaaagtc aaaatcaaga atcaaacgta cgtatgatct atcctttcat ccattcatta
+    13381 gctagtgtaa ttctcggaat atttcatggc atcgttgttt ttttttattt tgtttttttt
+    13441 tgagtttttt gattgaatac atgttttgat ttgttttctc gattagaaaa tcatttggtc
+    13501 aaacgagacc acatttgcgt tacttcaacg ggttattgtg gagaaataaa cggaagaatc
+    13561 caaagacccc aaaatcagat ttctaagtga tgttcaaaaa gaaaatattc ttaagaatat
+    13621 taacgtttgg aaaaaatttg agttggagat gtgttaaaaa tcgtttggat atcttgaaga
+    13681 aactttatca tatgtataga ggcaatccgg aaaacccgcg agttcaacgt tatttacagt
+    13741 ttatcacact acttgatgca atttttggtg atgtaccaaa tgcataaagt attaaacttt
+    13801 ttcttacaaa atttatttat ttttgaattt ttagttttat ttatggtttt aattattaag
+    13861 tatgtttaat ttttcaaata ttatcaattt atcataataa tatattgtat tttttttctt
+    13921 ttaatagtaa tatattatat ttattttggt tattttttat ttaattacta tcgcacccgc
+    13981 tggtttacca gtcataaaac tcccgcaaac gcacccataa ccaaacgctc aaccagtcgt
+    14041 tcaaaacact tgataacgct tgaaaccgca accgcccgtt tccgcaaact cccgcaaccg
+    14101 caaccgcacc cgctgcgttt aaaccagtca ggccctaagt gatgcatctt gactaaataa
+    14161 aatttatgtt aaattacaaa agttgtcgat ataacagccg atttgtctat actactaagg
+    14221 ccaggactta gtagttgcgt tgattcctct atatcatgtg tatgtccgtt cccatatcag
+    14281 gtaccattct cattatgtta ataacttgat tgtggtcaaa agcaaatcat aattttaatg
+    14341 actttattaa agtatgaatc caataactcg agatttatga agattttaaa ttacatttta
+    14401 caaatcaaaa tctaataaca ttaaacttta acagagtctt ataaaatctt aattgaataa
+    14461 cacaagattt tttaacattt caagtcacta aaaatccata taaatcacaa accaataaca
+    14521 ccttttagtt aaaaatacag aaatccaaat ctcatggttt aaggtgagat ttgaattaat
+    14581 tttacaaaaa tcatatgaac ttccctaaaa ttatcaaaat catttaaaaa ctcaagaaac
+    14641 tcaaatcact ttaaattcta atttgaatac acgccctttc taagtttcaa tataaaagag
+    14701 cacttgcttt tccggattac gtagagagtc ttaggtgttt agtaggagag gcagtttcat
+    14761 ggcgttgatc tctctttttt aactttggtt gattactcta ttgattactt actcttattt
+    14821 cacaaacaat cttacctctt tttataaaat cattcggact ttgtctcctt gcttctgggg
+    14881 tattattgac actatagttt ctgtctacca aaacgagttc gtgataacat tattatacat
+    14941 atgaaaagag gtcatgattt aatcctcaac ccaatcccac aaaaggtata aagatgcata
+    15001 tatatacgtg aagacgggac ccgggtacgt aaatccactt taccagtcgg gtatagatat
+    15061 gtaggttcaa aaaaagcgca ttggttatac cttgtctcca tatttataag ttttaaatta
+    15121 ttactatata ctcagctgga aaaaaagcat acaaaagaca gagaaatcta gggtttcgaa
+    15181 accagagcga tcgatcatga tgagaagacg caacaaaaag accaaaaccg tgatcagcaa
+    15241 tccagaaacg ttggaagaga gaaacaagtt tgatgaaatc cctcatgacc tagtgatcga
+    15301 gattcttggg agattgcctg ccaaatctgt agctaggttt ctcacagtat ccaagttatg
+    15361 ggcaactagt atccgcagtc tagatttcat caaatcttac ccgcttggat cttcgtcgaa
+    15421 gcctcgtacc ttagttgctt ctagtgaacc agatctcaat atggaacacc acgaatgata
+    15481 cttcttctcg cagtcttcct catcaacgtc tcttgtatca cgtgtgagtg tgacatgtcc
+    15541 gctcccatat tactatgatc atcactaata tcatcatgtt aatgggttga taatcattgg
+    15601 acatggtcca gagcaagtcg tggctaaccc tagcacaggt cgaaccatac ctttaccaag
+    15661 agtcaaaacc aggagaacga tagcaacaag ttttttcggg tatgattctg ttagtgatca
+    15721 atacaaagtg ttgtgtatga cggtaaaagc gtatggtgat ctgagggatg agtcatctca
+    15781 acatcaagta ttcacattgg gagctaaaaa gaaatcattc agaatgatag atactagtat
+    15841 tattcctcac cgtccttgct ctaacggtgt gtgcatagat agtgttgtgt attatgttgc
+    15901 taaaacgggg gcgggaatgt tgcatctatg cataatgaga tttgacttga gttctgaaat
+    15961 attggatctt tttactagtt tgcctcaaga gattcgacct ccaagttgaa ttattgagaa
+    16021 tttgataaac tacgagggga aactagccat accaacggaa actacttcat atacatatgt
+    16081 tgtgtgggtt atagatcagg attctgaaaa acatgaatgc ttgaagaaat taactttcag
+    16141 tattgaacca gcttggaaga attcatttgt taatctacgc ctcataggct acactactca
+    16201 tacgtgtgag tttattttgc atcaccacac tataattatg agatttatgt ctcccattgc
+    16261 aatcccgaca cgaatagttt tagacgcact aaggttgaag catccgcagc gttcaagttt
+    16321 ccttttataa gagcaatggt ttttccggat tacgtagaga gtgttaggat gttgtaggag
+    16381 aggcagtttc atgctgtata atactctctt tataaaactt tagcacttat aaagagagta
+    16441 ttcattcgac gactctaatg tacaaaatcg aggatgtaat tgtacattac aattcacgat
+    16501 tacaattcac gactatacgt gcacattaca attcacgatt ttggttgact tcactatcta
+    16561 ttagttcttt atgttttcac aatttcacag aaaaaaactc tgatcataat aaagtaacaa
+    16621 ataaaacttt ccttcttaca tgtctggaat acattggact ctactaaccg cgatcataca
+    16681 agttagttgc tacgacacca aactattcac acacagacac attgcttctt taaaacttct
+    16741 ggaaagcata caaattacca aaaataaatt caaaacttta ctgaaaatgt tacatgtttg
+    16801 cgatcgttgt gacaaaaggt accaatcttc tgtaatctct tcttcctaga gcgcaattat
+    16861 caagaacccg acccaacatt agccttgtgt tctgtaaagc cacatctctc ggaggtatcc
+    16921 ctgactctgc cgcagctgca ccaagaggga aaggttgcga cattgttgca gccatcattg
+    16981 cgtactgaag aggtccgctt cgggctttaa atgaagtctg aggtacaagc gttttctcat
+    17041 cagtcccgtt ttcagagtaa ctcgaatcct gagggtggga tccaccggtg cccccttgta
+    17101 ccgggctgca gctctgaaga agagcttcga gaacactcaa tgcctcccaa cacagagtgc
+    17161 tttcgactag ctgtgaaaca attgtgtaca tgtggggact ctgcgaagca tccatgggag
+    17221 tgtgttggag caatgccttg agcataagta atatgactcg ttggtaatcc actggtcctt
+    17281 tttttagcag tcttaacaag tgaccgaaag ctaaagctga gtgtttcggg aaccatttgt
+    17341 tgcagagcag aggagagaca catgcaagga gattttcaac tcttttaatc tcgccacgag
+    17401 catatgcgac aaaaacagtc gctaattcat cgagtgattt tgcccgacac cagacagcaa
+    17461 tgttagatgc cacagaacat gccttttgat attgctgctg gagtggtaac gcagaaacca
+    17521 tgacttggtc ttgggtcagc tgcaagcaaa gccaaggaag tagacctgtg atatgcataa
+    17581 ggagtcttgt ctcagcatcg ccaaatatag agtcacatga aggcactgta atccttgata
+    17641 ggacctcaat agagaactcg tggctaacag tagacatcag tcctttgagg acaagaggtt
+    17701 gtacaccttc gaactttgga agagttccac ttgacggcgg catctcgtac cctctagatt
+    17761 ctgacctttg gaattccccc aaatcattag tgttgaattc atcccgaggc atacttgaca
+    17821 agagtacatt ttcagttgtt ttgtcacgaa atgatagccg gtcaataata cgggagaaga
+    17881 gctcaagaac ctggcaatag acatggacga agtctgtgtg catcatggcg acacagcccc
+    17941 agaagagctg tgggtagagt atgactttct ctggctccat gttctctacc atgacctgta
+    18001 gcgtcagaag gatttccata ataaaaccca aaactggagg aatgggattg ctaagacatc
+    18061 gatgaagaca gcgtagaagg gagacacatg catcacttgt cacacttggg cgtagcgcac
+    18121 ggtatatttg gtgtgaccga catgctagat gtcttgacgt gcattccata gcccatttga
+    18181 gagcttcagt tccccaagtt tctcgaagat ctccttggaa gaatatagca tcaaccatac
+    18241 tctgaacaag tgcagacaaa agagcggcac taggaagatc tgttctcaca accgttggat
+    18301 cttcattctc ccacatcatg cttcctcgtt ttgattgcac atacttaatt aaactcacaa
+    18361 cctgttgttt gttctctcca tcgctgtttt ccacctcata tagctccaaa tgccgacccg
+    18421 ctagtgagta caataaattc acaagtaaat gttgacaatg ctctagcaca atgtcttcag
+    18481 agctatccat cgaaacaaaa gtgacgtgga agagcaaagg cagatgttcc ctaaaatctt
+    18541 catcattctc atatgcgatt tctgcaagaa gaatcagagc tatgtcagca tgagtgagtg
+    18601 agtgctgttg atgtccctgc aaggctgatt gaagttcctt tgcattgacg ccgtgcattg
+    18661 cgatccctga atgcaacaca tcttctccag aatttggtgt atctatgagg taatctccgc
+    18721 tatcacgtga gacatggcga ctcctcaaac ttcctgtaga gctacgaact cccattaaag
+    18781 gtcctgatgc attcaccaat gatggcagga gctgaccaga acgaccagta gctacaggaa
+    18841 caatattcaa ctcaggaggc attggactta aaggcccaga agcacttcgc ccagtcatcc
+    18901 ctgctgttct ccagcttagg cttccacttg tattcctaag gggaccatca agggagcctc
+    18961 tgacaagaag tggtgacata tgtggttggc tgtctgcaac tgaaacaact tgaggtgctg
+    19021 tggcaggtcc ctgagagaac tccagcacaa agtttccatt ggaatcaccc cggtttgcac
+    19081 tataaccaat gggttcaata ctgtcttcca gcatccgctg tgacagttgg taaaccaggt
+    19141 gatcaatagt acgctgagga catattcgtg cgaggtacaa acttacacgc ttagcaactg
+    19201 aaaaatatgt agcaaatgca ccagttattt cggcagatgc atttgaatca cagtcctcaa
+    19261 ttccctttgt gatcaagaaa tccaatacag ggctgatgtt tctgggctta ctagcaatgg
+    19321 tactccaaag tttttcaatt tcatcaggga attgatcccc gtgtcgccat gtcacatagt
+    19381 aaagactttt tagtaatctt tcactccaac ccgaatcctt gagtttccag aaattaaggt
+    19441 tctcaatcca tggagccata caggtcaaga cctgatgctg agcaattata tcaacagcat
+    19501 caagctgacg ctgcataatt tcttcacaaa gcagctggct taactccgga tgatccttgg
+    19561 cgagtttgca ggagagcttg tattggaact gttgatatga atctggaaga ttacctacaa
+    19621 ctgctgctct atagcctcct gaaccttcta taccatcctc agcccactca cgcatagaaa
+    19681 gtgtctcaag catttgaaga gcatcatcgc gaatttgtct agatggatcg actaccttgt
+    19741 agagaattag acttaagagc ctctggattt cacattttgg tatttcttgt cgcatgtaaa
+    19801 cctcagccaa cacactgaag tacccatcag ctatagccgc atcagagtag taacactggg
+    19861 aaaagtacag atgaattagg tcatgcagtt tagaagagtc taatcaagga tgtagtcaaa
+    19921 cagatgagca aagcatagaa gtgaaaaaag cttattaaga gaattcagat gcttaaactg
+    19981 gagctaataa aacaaaacta tctgatacgg aaccatcact gcatggataa gttgtcatca
+    20041 ttggtcaaga tttatcaaac atctgaccta gtaattacca agacgacagt ataattaaac
+    20101 acaacttcca tgcattcagt cagatagaac attacctgat caatgcaggc aggaaagaga
+    20161 tccaagtttg taagtagaag attcttcagg gccaattttg ccaacgcaac acgttgatga
+    20221 ccacctctat gcctatcacg acctgttgtc cctcgtccac cttctcccgt gtatttggag
+    20281 taagatggag ttcttggatc agcaggcgag taaccaaaag ggaccctggg agcaggctca
+    20341 atgaacaaac tatttatcca agatatcaca cggccgctca tttttcttgc attgtcatca
+    20401 aaacacggtc cataaagaag ggaagccatt gcattcaatg aggcccactg gatggcttca
+    20461 atttgctcat tcagttcctt gtcaaaagaa atcttgtcta cagaatcttt ggaccggtta
+    20521 tgctgtgagg tcttatagcg ctctacttca cggcgataat cactaacacc atcctgtccc
+    20581 cacgtatttc cagtgtcatc ggaccaggaa agaagcagat cgaatagacg tttcctactt
+    20641 ctaacatcaa acttctctga ttttgattca acaaattcag gggcaaggaa tcttaaaaca
+    20701 gacgcaagtg catatcgcag aggctgcatg tcctgaaagc tttcatgagg tgccaacgat
+    20761 atttgtcttg ttgaatcttc aatgaaccgc agataatgga ggcggaaaac tggtttacga
+    20821 gcaagcattc caggccagac attctcagac actgttcgat atatattaga gacatgaaca
+    20881 cggaggtcct ctcttcgacc tcctttttgg atctgcattc atatcatagg aacacatcgt
+    20941 tttagcgtca agtctcaata ttaatcaccc agctgtaaat caatgtaaca aaaatggtga
+    21001 agaggaatcc aacggtcatt aatttcatat tcagggaaaa acttagtaat tcgacatccg
+    21061 tagcagccca tgtgtgagaa aatactggat acttccgttc cctcaactct aatgattcaa
+    21121 aatgaagctc aataaagaca catagaggta caaataatgg caatgacttc tagtactttt
+    21181 catccatgag atttccctta cttgttctcc ccttttttta ttcatttgct taggtaagtt
+    21241 ttgtaatgaa tctatagtga taatgagtac aaccgtaaat ctgtgcagcc cagcagcata
+    21301 gtgaatgctg taggatttcc aggtataatt ctgaaagaaa gtattctaca ttgttttaca
+    21361 atttgacaag tatatacctt ccactttggc ttcgtctctg tctctgatga aatctcctcc
+    21421 atgaaagatg caagctcgct aaacataatt tcacatgctt ccaaatgaga gcgcccgaga
+    21481 gccatagttg cagcatgcta catgcaagga agtataacgt aaagattgta aaaacttatc
+    21541 taggagaaaa aattcaacca aaaccacatt aacctaacaa gacaagtagg tttaaaagac
+    21601 tgacaaaact accaacttca gactaaaaat gcagagacac aaaaacaact tacgttgtgg
+    21661 gtctctgacc caaatctaag ataagggaaa ataaggtggt acatgtctct ggttgatgca
+    21721 atgctacccg catctttgcc atcaggtgga cacgaacaca caaacatagc atacagaagc
+    21781 cattgatcca gtttgttatc tgtatcttgt gactggtttg cctttccacc aaactcaaca
+    21841 ggtgtaatat gggccaggcg gtgcataatt tcagacctgg aaatgaaaag ggcactttag
+    21901 tttccttcat gttattaaga atagcaaaag agttatggca ataaaatgag acaaaaaaat
+    21961 tcttcctaaa ttttaaaatt cccacagtaa agtgaaattt atttgcacgt tatactaggg
+    22021 catgaatttt actgaaggat tgttgatcca taaagcctac tttacttttt taatctttat
+    22081 agactggtag gacagcgttg acggtcagag aagactcaag aaaacacacc aaggatgatg
+    22141 tagttaggaa aatatacttg gagcaaacac actaaagata atgcagctaa gaaatatact
+    22201 tacttggctt cttgaacaga gcgtgggcag agctcagcag catatttaac aagctcgctg
+    22261 agacatcgac cccatttgtt tttatcaagg ctctcaaaga taatggattg aagagtcaca
+    22321 tcagaaggaa tggcatcgga atctcgtctt aaatcaaaag gacgggcaga atcccaatag
+    22381 caactttgaa caatatcatc ctattatgtg aaagaaataa aggatacaaa tcttaatttt
+    22441 ctataaagcc agacggtttt aaaccataat caaccaaaag gctaaaaagc ctctgaaatg
+    22501 gtgagatcta cacagaagct acttacccca tgttcttcca agacatcgat catgtatatg
+    22561 ggttcagctt caaatttcat aacatggtct gggtgctctt gaatcatgag atcccgaata
+    22621 tcatttctca aagcacgtac acatcgtaat aattctaggg cagtgtgccg aatctgactg
+    22681 tcgactgaac taagaaatat tagcccaaca gcatcaatat cagaagcgcg aaactcaatt
+    22741 gcatctgctg cttgatgaaa agataatttc ttgaacctat catttccctt tgctgttttg
+    22801 ttttcttctt cagcatcagt atcttgtctg tcatctacta aacaagccct ccagaaacgc
+    22861 atgagttcca acagacgtcc tagtgatgct tgaatgagga gtgggaattc atcaggaagc
+    22921 tttaagatga agtttgccat tcctctcatg actgcaaaac gacgatgagg aaggtatcta
+    22981 acaatccggt tcaggacttg cactgcttcc actcgaactc caggatcaat acttatacca
+    23041 tgctgaggta ttatttccgt gatcttatca ctccggccaa cttcctcaat cagataaggt
+    23101 atgcacttca gcactgactg gaagagagat ccttgagact tttctttatt cacagcatct
+    23161 gcatataaca tgaaggttag cgaaaagtgt tttcggaaag taacagccag gggatcctag
+    23221 gtagaaaatc aaaaataata tcacacttaa atacacacaa catataaaga tttcttaacc
+    23281 tcctcacaaa accatgtaaa tcagactaag ctttttgcgt aaaactaagt ttaccgattg
+    23341 tagtcctcga tgatgttaga agagcttgac tgtaggtcct gtggcaggat cttaaaattg
+    23401 actcaattgc tgctttcacc tttggtatgt aatggcctat cccatgacct attaagtgaa
+    23461 cactaagaaa agttaagtgt atgaaaggaa aagcttattg agacctacta acaacctaat
+    23521 tcaggttaac gggttaccct taaatatctc caacccaaca tactggcttg aaggcgacat
+    23581 aacaagggca agtaaagcac ggagaccaat aatctttgct tcgcttggac tatcttgctt
+    23641 taacagttct agtaacatgt gattcatagc aaagtcaagg ttatgttcag ctatggtcac
+    23701 acagaactca acgagtttat cttgctggac atcctgagtg agcattcctt ttctcagaac
+    23761 tgttagcaat tgtgatgtca cactatccaa gtaatcccat atacgatttg gtggctggga
+    23821 cgaagcatag acactcaaat aaaacctcaa cacccggtga agacaatcaa gtgccatgta
+    23881 gcgatggttc ttatcctgca tataaggcgc aaaattctct ataatgacca agaacaagct
+    23941 tgaaaaagaa caatgaagtc acaggtgctc aacatacttg aaaaaaaata agctgctgca
+    24001 gttacaactg cacatcaggg tttgggacgt gataattggt tccatgcatt cttacttaat
+    24061 ttaaattacg aatacaggca aacttactct cagaagcttg tacagttgtt ccatgtgaga
+    24121 gcttaaatta tgatgaaata taagcgggtc accgagacag agaagaagac tcacaagcgg
+    24181 atagccaacc tattcatcca attatacatg ttaaggactg taatcacaag gtaggcatac
+    24241 aaaagtgata ctaaatcaga ttgacgtaca ccaagatgtt tactctgttt ctccatccac
+    24301 tggattagct gcactctgat gcgtccaaca gcctcatacc agagtgtcag tgcaggttct
+    24361 gcaactgaag gaggccactg gcttttgcca ccatcggaaa gtggcgccaa gatgttagag
+    24421 agcatattgc aaagcgcatg ataaagctca cttttacgtt tgtgtatatc ccggataaga
+    24481 gggtttgctt ttgctacaaa ggaggcagac gcattcagcc caccctcact cttaacctgt
+    24541 aaataaacat actgtcacgt tcactttctg aagcctgcct tttctaagat atctgagaca
+    24601 taccccaagc ttcaagtaac gcatcccatt aatgatgctg agagtttcac ttcttgcaac
+    24661 gcttgtatca atccggcgag tattgagttc catgaaaaac ctctccgtga cagagctgaa
+    24721 tctgttagtg aaaattaaat aaatggataa gagtggttgt ggcttaaaat agatcatcac
+    24781 tttcacaaat tcttttctgc caactaaact tgtgaatgga agtccacata ttcttttcat
+    24841 ttatacttta gcatgcttgg aggattgata acttacaaaa tgaattacct aatacgggac
+    24901 agtgcaccca ggagttgagc aaccaagtca agaaggagac ccctcaaatc gaccagcgat
+    24961 gggtattcca cttggctaac aaccctgtaa cggagaagca cccagaactt agaaattcag
+    25021 gtttttacaa ctcgaaaaag gaatgccaaa ggataaaaga agtgtttagc gaatcatata
+    25081 aaattcaacc gttccagatt ctgaaatcac tacctgtcag cgttgattag ccaatcaaaa
+    25141 acaaagttct caagtccaga ccatagcttc tctgccattc acgagagatg ttcttctttt
+    25201 agcaaggcag taaccacatt ttgttgaaaa gaatctaaaa agagagacta acaaaccagt
+    25261 gagcccttcc tgcgggcaac actccacaaa gcgaatgcaa gctgagcaga agatgcactc
+    25321 aacagccaac tgattccagt aatgtaaaac catattttag aaagttgaaa accaatatcg
+    25381 aaaattgatt aaataagcca ttaaaatatt agggttcata aatggaggta agagtactaa
+    25441 gaaacagaga agtaagagta aaacagtctg gtcagttgga tctagtgtac atagttacca
+    25501 aaaccccgag gaattgttcc tatgaagagt cgaaagaaaa tacctttctc tggaaagtcg
+    25561 aggcatcatt tgcaccttta ggtgattcac tgcaagccac aaacaaacac aatattagta
+    25621 atggtctaga gaagtggctt cacccaatca tagataacat ctagtcaata aattcaccta
+    25681 tacgaagaaa aagtgataat ccacataaga attatgataa tgcaccaaag agtccaaaag
+    25741 aaacactgaa acatagaaga tcaatgactc aaacctaaaa ataatgcaag atgtagaaat
+    25801 aatagcctcg tgggagacaa ggtcttctag acgtatttca tacagacaag agtcagatac
+    25861 catctaattg ttaagagaga gaaataacaa tatacccacc tttccctcca tctaagaaga
+    25921 gcttctaaaa gaggaacagg tgtatgacgg gcaatcatag ccaatgaatc caacacctgc
+    25981 tcataagcag gatctgatgg acgaaggtac tgtccatcct gcaaacatca taccaagaac
+    26041 cacatcacta ccaaagagct atttaagaac aaatacgtat actagtcagt aagctacagt
+    26101 cggaacagcg gcatttccaa ttcaatgtaa tacttccagt aaaccgtact caagactaag
+    26161 tttcaactac tgagaatgaa actatctaat ttgcagatac caactcctta gagcactgaa
+    26221 aacttaaact acctgactct atatggattc aaatgtgtgt taaaaaaaaa aaaaaaggct
+    26281 tattgagaag gagaggaacc tgagcctgag ctgtctcgat tcgtcgtctt gcgagaggca
+    26341 ggaaacgctg aagaagcgct tctactatca atttcgcagc actcccagac ttcatcacgt
+    26401 tcagcaagaa tagatcctca aaagcgaaat gtcctctaaa ttaaattaag aaaaattcaa
+    26461 caataagatt tatagacccg aatcgaacag aatacgagac tgaatctcgt gaaattttcc
+    26521 cagaaataat gagaaatacc aaaggagagc accacgaatc acgaaatcat tctagggttt
+    26581 agttcgtcga tccggcaaga gaatcgctaa gaaatttctt tgatcgacga cgagcttcct
+    26641 cgttcctctt ttcttatttg tttttgtttt caatatattt tttttgtttc tttttttttt
+    26701 ctgaaaagtg tttaacgtta agatgtttaa taaaaatcat ttgttaacta acaacattta
+    26761 aggtagattc aattaaataa aacaaacaaa atacaacaag tagtacagta tagtatcaat
+    26821 ctcgggatct tttgtctgaa actaattata tttttgtcaa cgtctcgaaa gtaaattaga
+    26881 gaagctatat agattttttg ttttttcatt tcgcataatt gacaaaaaat ttacaagact
+    26941 attaattacc atgcttattt tctttggatt ggctgatttt tgaggtataa actatttcga
+    27001 ttctcagctt tcactcccag acttttgggc ccttaaattt ccaccatcaa ctttcatatt
+    27061 cctcaaaatt acatcatcta ctttcgaatt catgcttaaa catacatttt caattgaacc
+    27121 attcatttaa ccgggattaa caacaaatct gcgtttactg taccgcgatt gcgattaacc
+    27181 cgactaatac ccagacccat taagaaaacg acgtcgtttt catttcgttt aactcatcaa
+    27241 aaagagaaag aaaacgacgt cgttttcatt cttctttaac tctcaaatcg atttggtcgt
+    27301 tcttctttaa tctcgacact gtgaaattag ggttcttgag agagaaagac aaaaggggat
+    27361 ttggagtttt cttcttcgat tcgaattatt gggtttcgat ttgggtttcg aatagggaga
+    27421 aatgaagaat tggtgggagg aaggattgat ttaccaaagc gatccagatg accccgattt
+    27481 cgaacctcca gagagtgaca ttgaagccga cgatggaagt gatagtggtg atagcggagt
+    27541 cgaggaggac gaagccaccg gagttgaggg agacgacgtt gggatagatg gagacagaga
+    27601 atctgatgga gaggagaata gagaagacga tggtgacata gcgtctgatg gagacgtcaa
+    27661 tctagaagac gatggagaca gagatgaaag tcaaaagaag aggacgagag aagagaaaag
+    27721 aaaggaggaa gaggctgaag aaggtcaaaa gaagaagaga aaacagaaga aggatgagcc
+    27781 ggaagaagat ttggccgaga ggtttgagtt cgagatagag gaagcagtag ctatgtggta
+    27841 tgatgagtta aagattagaa ggaacgaaat accagaaagt aataatgaag aagaagatca
+    27901 tgtgatcact agagataaga agattaggtt agcttcggat gataggttag ctataggcag
+    27961 aacattcttc actggttttg aattcaaaga ggttgtcttg cattatgcaa tgaagcatag
+    28021 gataaatgct aaacaaaata ggtgggaaaa ggataaaatt agctttagat gtgctcaaag
+    28081 gaaggagtgt gaatggtatg tctatgcctc atactctcac gagaggcaat tgtgggtttt
+    28141 gaagaccaaa tgtctggatc attcttgtac ttctaatgga aaatgcaagc tgttgaagag
+    28201 gaaagtgatt ggtagattgt ttatggataa gctaaggttg cagcctaatt tcatgcctct
+    28261 tgacattcag aggcatatca aggagcaatg gaagctagtt agtactattg gacaagtgca
+    28321 agatggaaga cttctagctc ttaagtggct gaaagaagaa tatgcccaac agtttgcaca
+    28381 ccttcgaggc tatgtggcag aaatcttgag tacaaacaaa ggatcaactg caattgttga
+    28441 taccattagg gatgcaaacg agaatgatgt cttcaaccgg atttatgttt gtcttggagc
+    28501 aatgaagaat gtgttttact tctgtaggcc tctcattggg atagatggaa cgtttttgaa
+    28561 gcatgcggtt aagggatgtc tgttcactgc tatagctcat gatgcgaaca accaaatcta
+    28621 tccagtggcg tgggcaactg ttcagtctaa gaatgcagac aattggctct ggtttttgaa
+    28681 tcagctgaag catgacttgg aactaaagga tggcagtggc tatgtggtca tatcagatcg
+    28741 ttgcaaggtt agtattatct tatgtctaat tgtttaatta tgtatagatg atagtgcttt
+    28801 atagatgcta atgtgtttgt atttgacagg ggattattag tgctgtcaag aagaagaatc
+    28861 aagagattaa ggtttgtttc tgttttaggc tgctgtttgg ttagccattt tggtgctcat
+    28921 gtttgaatgt tgtttctgtt ttaggaggct ggaacatctc agcctacaat ggagctccaa
+    28981 gaaacaacac atggagctga tacaatcaca ctcactcaga gaagcagtca atgggatcaa
+    29041 tcggaccagt tggactagtt ctagttctca atcctctttt gtctattttg gaacaatctt
+    29101 aacaatcttc atcctctttt gtctattttg gatgatgtat tattcggtta atgtttagga
+    29161 tgttctttat cctcttgagg ctttgtagga tgatgtatta ttcggttaat gttttggatg
+    29221 ttctttatcc ctcttttgta tggattctta tctattctta tgttatgcat cgattcacat
+    29281 tagacaaaga cacataagac ataacataga tagattcaca aaccctccat cgtttaatct
+    29341 cgacacttac acaatccaaa acaaagacaa acatagccaa acccctacaa gcttcaaaac
+    29401 agagactaat ccaatactac aatttcactc caaaacctca acaacaccta cactcttccc
+    29461 caaagccata caatagatgc aacaatcaca acagcaaccc ctacagtctt caaagctgac
+    29521 tttgccatga catgtcctac ctagctcttc cacattctcc ttcatggtca agagttcacc
+    29581 ctccatcttc ttaatcagct cttcctccat ctgcttagcg tttttattaa ggagctccat
+    29641 attaaccatc acgttcattc tgaggtcact aatattctct gcaactctct tacacttctc
+    29701 atttaccatc ttaatttcgt ccaagagagc ttcatcaacc catttgaaca agtgctcttc
+    29761 attttctctc tataaaacag aatttccaat catttactcc ttcaatcaat caaatatatc
+    29821 acaaaatcga aaggtgtacc ttcattgcta tcgcacaacg gtagaatctc cggtatggat
+    29881 tctcctttgt tttcgatgtg aatgtaatga tctcttctcc acaccaacat ttcgagggga
+    29941 caccaggagg acagacacga aatcctgaac tggaagcgga tgagtagtag ctgttggtgc
+    30001 tcattttctt ataaaattgt cgaatgaaga accctaatcg actaaatcga ttttgcagaa
+    30061 gaagaagaat gaaaacgacg tcgttttctt tctctttttg atgagttaaa cgaaatgaaa
+    30121 acgacgtcgt tttcttaaca ggtctgagta ttagtcgggt taatcgcaat cgcggtacag
+    30181 taaacgcgga tttgtggtta atctcggtta aatgaatggt tcaattggaa atgtatgttt
+    30241 aagcatgaat ccgaaagtag atgatgtaat tttaggaata tgaaagttga tggtggaaat
+    30301 ttaaggatcc aaaagtccga aggtgaaaac tgagaatcga aatagtttat ggctcgaaaa
+    30361 tcagccaacc cattttcttt taataagtct gtggatgtgt tggcaaaatt ggtttaagcg
+    30421 atttgtcgct ttaatgctta tgagccggta cgtttccgtt gagctgatga ttacagattt
+    30481 tgattttaga gatttgcaat ttgtgttgaa gaaatgaagt ggtttccgac gtttattacc
+    30541 atagccttaa gctgttattt ctttgtggag cttcgattag tttttccagc taaaattcca
+    30601 agaaaataca tgtccgattt tattatcggt ggtgatcttt taccggaatt gttgctctga
+    30661 cggcgaagga tgttatcgga atactttggt tactcatcta acacgtgtta tgttgttgac
+    30721 acgttcttaa ggatggagtc aaattgaagt tttcttcttg catcgacatt tctcttttgg
+    30781 gtctgataga tcttgttaac atctgtaatc gggcttttga agtccatttt gtatgtttgg
+    30841 tttgtttaat acaatatatt acacttatgg gaaaaaaaac aaaaaaaaaa acagacgtgt
+    30901 tacatatata ttttcaatct atattttctg tgaaaaaatg gcttccacca attcagtttc
+    30961 tcagttaata gtagtaatcc atttcgtata gtgaacacca gtcaaggctt gtggtacagt
+    31021 tcttgctatt ctacagtgta ccgacgggtt tacacatgac atcaaatagt ttctaaccaa
+    31081 cttccggtac tcccaaaccg ggatttctct cagatcagaa gctacaactc tcatctcagg
+    31141 aggtttcatt aagcttcgtc agcacaagta aattcaaagg atttcttcta ctccttctct
+    31201 ctgtctgcgt ctctgtatac aaaacccagc agcttttctt ttgttctggc ccttgcaaga
+    31261 agtttgtctt tcttctcgtc catctaacac gtacagctca agccttgggg ttgatactat
+    31321 gttctgaaag aatattgttt cttgagagct tcagagatgt ctgagaatct tagtggaaga
+    31381 cattactttc tttatcagtt gaagaaataa ggtgaatcta acaccacttt taaatccaaa
+    31441 tctcctcgtt ctggcagagg aggaaagtct aaatgaggaa acgattgctc aaagctctcc
+    31501 agcaactgtt aaaaagggaa caaaaaacaa actcagttca tgaagcagtc ttctacatgc
+    31561 catatatact gaaccatttg aacatatgac acttacattc tctagtatcg gttgtgaata
+    31621 cagctcaaca aacttttccc gtagtattat gtttagttta tccacatcac acgcatgcgt
+    31681 ccaaaaagag tcatgaactc ctgttagaat aatttttaca tgccatgtgt taattgacac
+    31741 aaagaccaag gttaagaaga agatggaggt ccaaaagatt taacttgtga aaaaaaggac
+    31801 agaacctgca aaacacacgc ctgctctttt acaggcaacc gcagtcatca tcatatgaga
+    31861 cccatccagg gagtgaataa aatttggagg aaaagctgtc ctttgtcgcc tcacaatgac
+    31921 ctacaaaagc ccacgaaact cttttaggaa taggcctctg catcgtactc tgtttcctta
+    31981 cttatctttt gtttctatgt aaaaccaagg taaagtggaa gcagacctga tcagtttcat
+    32041 gctgaagcga tagggtctgg agggatgtct ttacctgcat tcattttgat agaaagtgaa
+    32101 aacagtcaca ttcagattca acagacgaga gtaatggtgt cagtgataac ttttaggaag
+    32161 acttacgagt tttgttccca tttggtggta aggttgtaca acaggaagac ccaatggggt
+    32221 tgtccatcga actgtttcat tttctgaagc aataatctgc tcatgaaaat tattaaggat
+    32281 tattaaaagt ccaaaagcat atttaattga cgctggaaaa aagtctacac accttcgcac
+    32341 attcaccaaa ccaacgcatg atggcgcgtg cagcttgaaa catctcatct atagcagcta
+    32401 atgttacctg ccccaaacat aacaaaacaa aagattatcc cctcatagca tttttacttt
+    32461 tgagaagttt ccaacgtaca tgagaactag aaagtagtaa ttaatcattt gttaaagcac
+    32521 acgcgttctt tgaagattag atgaaggaaa ttgccaaatt aagattttct agtgccagtg
+    32581 ctttatctgt tgctaatata cctttgctgc atagcaagca gccccaaaaa cttctttttc
+    32641 atcaccaaaa tcacttcgtt ccttcaacct tctctttatt tgatcccgag cgccaatgta
+    32701 ggtgacacca tagactgatg tcataaccgt ctgctttaca agcttacgat ccacctacag
+    32761 attttgaaca accgtcaaca agttgtatca gctgaccagt tatgtgaatg agtaaacaat
+    32821 gtacccaaac aaaacagata tcaggcaact tgctcacatg ccaatactac tacatataca
+    32881 ttagaacggt tcattaaaat ttattggtga tttaacaatg ttttttgagg atgtaatgta
+    32941 tgagatttct actccactat gcaagcacgg aagcggaaac cgagaaacac aaggttagga
+    33001 caatggaaaa agaatgaaaa gaatttcata cggacctggt taagtaattt tcttgcacgc
+    33061 aatgcctctg gaaaaacttc aggatctctg tctgcatctc ggcgcataat atcaagaacc
+    33121 ctgcaggtat atgcatatga aattttgaat ggacatttat ttactcgatt aaacttaagt
+    33181 tagctttaag cagtagatgc agagcttacc tggtagctat tcctgaataa acatctgccg
+    33241 gcttctcacc tgcaactaga ttaacagctt ctgctcctaa ctgataatca tcttataaaa
+    33301 ccataatcag taaagaaaga tgacaagcag ccaacaaaaa gaaccataga agattaaaaa
+    33361 caagagaatt actgtgtctc tcccaagagc ggcataatgc tgtaaaccat tgcaggaacc
+    33421 atcctggcca gaaccaaagt gaaggaaggg aagttcattt gaagaaacac acagattgaa
+    33481 cgagaaacta caacgtaaag tcaaaaaaaa attgaagcct tcaggagaag atgccaaagt
+    33541 ggcaacttct accagaaagt acctgatgta taggaatatg tgacagaact gtctctgggg
+    33601 atgggcttct cagagcttca gtcagactta tgcagacagc caagcactga aatgggtctt
+    33661 cagcctgcag ccaccatctg cttccttcaa gtggtctgtc tgccgaatca aatatgtcat
+    33721 ccaagtgatt ttcagtgaaa gctagccgtc catcaagtga taacttatct acaccaccag
+    33781 catacaagtt tgctaagtgt atcttcagcc agcgtaagcc tgaaattccc ataggccttc
+    33841 cctcagcaaa ctccaaaaca ccccgacaca aatcagagcc aagatgattt aagtgtgggg
+    33901 gcatgggata tgcacgaccc cggaagtcca tattgtgggg atagtaaaaa gcttcctcat
+    33961 ctttcatttt ccgtgctacc tgctcacatt taaacagtta tgaacagaac tctcaaactg
+    34021 gcacattact ttaataacaa ggcggataaa gctcaccgaa agcttgagtt ctgtgtcaca
+    34081 tcgctgagaa tgtctctcgc tgttcacctt tttagcagat ttgacttccc acttccattt
+    34141 cttaagaatg ccctcatctt cagtatccgg cttttctggt aaaggaacct gcagtttgaa
+    34201 attaaagaat taaagaggcc tcgtgatgca caagggatag aaaatatgtc tgcgccaaca
+    34261 ataactaatg tcaaactgtt gtacagatga aaaacagtag ccattataag atcaaagtaa
+    34321 cacatggagt aactcacatc actccgatcc accatatcag caacacatcc gccactgctc
+    34381 catatcctat ctacaaccgt taagactcgc ttatttactc tccatttagt acttccaagc
+    34441 gtatccaggg cctaaaaatg tccataatat atttacagaa attaaaactg taactgattc
+    34501 ccacataacg cacgtgaaga aacattaatc ttctacctca aagactggtt gtagttgtcc
+    34561 tttaggtgcg ctcttaagtg cctctctttg ttgcttggct ccatgagttt tcattatata
+    34621 agacgtcaag aacaagtaag cacctttgtc atatctgcga aacacaaaat aaaagtgaaa
+    34681 tgacttacat aagtttcacc aaaagtaaag tattacaagc atcttatgaa gaaatgttga
+    34741 cagttctcga agctatacaa atctaaattt cctttctctt aaacttagat agataagctt
+    34801 taaggtttct gtcttctctg agtaactaag ctggccacaa aagaatgtac atttttaatc
+    34861 atactgaatt cattttggaa tttatgtttg agataccatg caatgcaatt atttaatcat
+    34921 ttggggtaag tatacagcaa agaaagctca tttaccccga ccatttgaga ggaggaacca
+    34981 gcattggcat gtatggcatc actgcatatc tcccctgcac cacaaagaaa aaagcgacat
+    35041 tcacatttcc aacaatatat agattagatg tctcagttat accttagtct tcaaatccca
+    35101 gaaatattta atctaattta caggggctac ccaagtcaaa ccctctttag atataacatt
+    35161 cgacaaaatg accataataa atgactccga gctctgaaaa agagtcaaca ctatttttgt
+    35221 cgattttggg ttaaatttgt tctgataaca aacaaaacgc gaatagatca cctacacttt
+    35281 tttccaagcc tttgcggacc aaagggtcac actcgattac accatatttt ctcccagaat
+    35341 tcctgttgta caagaaagac tggaatatga gcctatgaca ttcagggcaa gaagataagc
+    35401 ctagttgaaa gtcatagacc gaaatttctc acatgcttcc tttcgctacc ttgaaggtat
+    35461 gcacaaatgc aggtcggaca tcaggcagat cattatcctg ctgatcagct ggagactgta
+    35521 tataagctgt tcttacgagt aactctatta gacgacttcc aacctggaat atatattaga
+    35581 aaatatcaaa atattagcaa cttatgcagc caacataatc tttctacctc tccaacaatt
+    35641 ttgcagccaa gcataaacag accttagccc taacatctgc gatccatggt ttcgtatagt
+    35701 catgtgactg caagatcttt ctaactgctg acaacttctg tttctttatc aactcattga
+    35761 ccttttttct taacttatct tgttccttca tagaagtttc attttcaact cccccactct
+    35821 cctcattgtc atcccctttt ttcttcttat ccaagaatgt gcatattctt atctatcaat
+    35881 tcacatagag aaaaacaaca ccattagaat acaaaccatc caaaacacca tagagaagat
+    35941 caagacaatc aaggaaccct aaaggctaat aatacaagag aaaatattac ttcacacctc
+    36001 catataaagg ttgtttttat atgattaatt acaactgtaa catgctaaaa tgaaggagtt
+    36061 ccattcacaa atagtgaata ttttatcaat atcatgttcc acatattaaa gcaatcggtt
+    36121 ttgctttaag atcaggctaa cagttccagt atccagctat ggtatacata gagtttaatc
+    36181 ctcaatggtt taggttcact agtgagggaa acagtttaag agatcaaatt tcccactata
+    36241 ccggcaaacc aagaaaatta ccggcagatg atatgatata attatataaa caaagtagaa
+    36301 accagatcaa gatgaagcta cccaacctct tgttcaatgg catcgcctac tgtacacgca
+    36361 gcatgaacaa ccttaacaca accgttatca ccaccagtca tcaaatgccc catcaacttg
+    36421 tgcattgtaa taacagatat cttgtcggca ggcaactgat caaggtaatg tgcataagtt
+    36481 gctttgctct tccctaatct atacaactcc tggtctttag caattgcatc tcttaatggt
+    36541 tcgaaccaac ccagaaataa agacttcaca taaggcagat taggcgcaag cttttgctca
+    36601 cacatatctg tcaaaagctc catgtactct gcagcagctc tttcccattc ttcagtctcg
+    36661 atcttaactt gtcttctcca taaattctgg aacttggtac gacccattcc aaattggtct
+    36721 tgcttcttca tccgccacga acgatgactt tctctcttct tctctttctt catctccttc
+    36781 aacaattcat ctacctcagg ctcttcctca acatctgttg ataagacctc ctcttcggct
+    36841 acactcgtgt accctctagc caaagtccca ctaaggcacc tctcgctctt ggaaaactcc
+    36901 tctcgcctcg tgatcccact taaacaacga aaaccctttc cgtgacatgg tgatcccaag
+    36961 accggaaacc gaaagctcaa atttttcgag aaaatagact ccggagaaga cacaagaagg
+    37021 ccacgagttt gggaactgac gtttaatcta gcagctgacc tcgaaattgc ttgtttagca
+    37081 atgtttctcc acatggggaa ggattgggaa actgggtttg gagaagaaat aaagggttta
+    37141 tgcaatggaa tcaagtgatc gaatacctta gtctgatttg ccaagaacag tggggtttga
+    37201 gcactggaca tcaatcgaac ttaaaaattg agtaaaatcg ataaaccaga acaataacca
+    37261 actcgagatt gaataaattc gatcgcatga gcaaagaaaa atggcaaatt gggaagagtt
+    37321 tcatagacgg aagaaaataa aaacaaatgg tgagtgaagc gtacacgacg aagccgcctt
+    37381 aaaccctcgc ttaaaaaccc ttgctttctt aagcctctgt tttttttttc tgggttgtct
+    37441 ctgtttctca catgtcaaac aattaaaaaa taaattaaaa atatctaccc taaattcgac
+    37501 ccgtatagga ggatcttacc cgacccggga agtatctgat taaggcctct aataacggcc
+    37561 caaagtctct ttactatgca aaaaggcttg acgaatttat ggagcccaat tagttaaata
+    37621 tatatataca aaatgaattt tattatagtt tcttctatta tttttaccat ttcatacaac
+    37681 tttttagtgg aacccaatta atcaacaaca aaaaaaggtg gaacccaatt acaatgccga
+    37741 atataattaa aatatttaga tttttgtatg aataattcac ttggatttta tttttttggt
+    37801 caacaatatc aatttgtgtt aataataatt attctagtct attatttgta aataaatttg
+    37861 gactaaaaat ctatttttct tgggatgtgt cgactagtgg ttgggttgct agttgtgttt
+    37921 caccaatgat aactcacacc tgattattcg ctccaaattt gatcgtttcc acctcatagt
+    37981 tttaatatat taccatttaa ccccttaact tttgtctgag aagtaacaag cctgatttgt
+    38041 ttcctttcct cggaactgaa gctacaaaga aattgtatta ggtctctctc tctatctctc
+    38101 tcttcttcgt gttactaaaa aggacgaagc ttgttgcata atatgttgag gtaaattact
+    38161 aattactgat ccaaagttcg aatctttgct ccaactccag gctagctgat tgcgtagctt
+    38221 ccgattgatt tctacctgag ttttgagttc ctttgtggcc acttcgttgt tcttctgctg
+    38281 ggttttttgc tcgaggatct gatacttctg tttggtcgat gatcgagtga tcttcgttgg
+    38341 gttttgggga tctaagtcgt ctatatagct aatggtttgg atttgagttt gaatggagcg
+    38401 tttaggattt tggggattgc taatgggtag tgtggaaaag tcattggatt ctggaaattc
+    38461 gttggcttgc tctgcatctg ctaagaatgg agacgaagag agtagtactt catcgaagca
+    38521 agtttcacca ttgaagggtt ctgggtcgag aaatactagt cctttaggtc gagttgggtc
+    38581 gagaaacacg agtccttcta ggcagaaagt ggtgaagacg aagcctcgtg gtctagagga
+    38641 agaaacagtt gcttcatttg gtaaacaagt tgttgctgat gtgcagatgg aagatggtat
+    38701 atgggcaatg cttccagagg atttgctcaa tgagatttta gctagggttc caccgtttat
+    38761 gatatttcga atccggtctg tttgtaaaaa atggaacttg attcttcagg ataatagttt
+    38821 tctcaagttt cactcaaatg tgtcatctca tgggccttgt cttctcactt tctggaagaa
+    38881 ctcgccgcag attccgcaat gctcagtttt tagtttgcca ttgaagacat ggtacaaaat
+    38941 tccattcacg tttttgcctc catgggcttt ttggttggtt ggttcttcag gtggtctcgt
+    39001 ttgtttttcg ggtcttgatg gtctaacttt cagaacttta gtatgcaatc ctctgatgca
+    39061 gagttggagg actctaccga gtatgcacta taaccaacaa aggcaattga ttatggtcgt
+    39121 ggatcgctca gacaaatcgt tcaaagtcat agccacaagt gatatatacg gggataagtc
+    39181 acttcctact gaagtttatg attccaaaac tgacaaatgg tccttacatc agataatgcc
+    39241 tgcggtgaac ttatgctcct cgaaaatggc ttattgtgat tcccggttat atctagaaac
+    39301 tctttcgcct cttggtttga tgatgtatcg gcttgattca gggcaatggg aacacattcc
+    39361 agctaaattc ccgagatctt tgttggatgg ttacttagtt gctggaactc agaagagatt
+    39421 gtttctcgtg ggaaggattg gcctctacag tactctccaa agcatgagaa tatgggagct
+    39481 tgatcacaca aaggtctctt gggtagagat aagtagaatg ccaccaaagt acttccgagc
+    39541 acttctgaga ctttcggctg agaggttcga gtgttttgga caagataatt tgatctgctt
+    39601 tacgtcttgg aatcaaggaa aaggtcttct atacaatgtg gataagaaaa tttggtcttg
+    39661 gatttccggt tgtgctcttc agtcatgcaa cagccaagtg tgcttttatg agccaagatt
+    39721 tgatgcatct gtcctctgaa caataagtta tcgtctgtct cacatcattc ttgaaaactt
+    39781 acaagttcgc cagcaaaaca tgtcagaaat atgaaatcaa agagggtttg atgtgtacct
+    39841 tcagtgttaa tgaagacctg gtcagcaatg atatgcttca ccaatggtta acaatatcga
+    39901 ggagaaaaac tgtaagataa acttgtttct agctttctgt aaattagcat tcactcgata
+    39961 tgaaaacttt ctcaatatcc cctgtctcgt ccgcgtgaaa tatatataca cttaatttta
+    40021 gtttcctgtg ctgaattttc cttttctata tcgtaaattt aagcttctct ttgcaaatag
+    40081 tgactgctag ttcaagaata tcttgttggc taacacttat gagaatgaga aagttttaag
+    40141 ttataacatt ggaatataga tgtgatctta aacatttggg caattggtac atacaaatcc
+    40201 catacttaat gtcttctaac tagaaactag aaaaccgagg agactcttga aatggggagg
+    40261 taatctagct ttgagataca tttctctaca atttagctaa ttcatagaca ctgatagggt
+    40321 aggagtagtt tgcagacttg ctgaaggtat tgtgagctat tattttgttg gcagtttctg
+    40381 ttggatgaaa tgcatcccaa aagacatatt gattcctatc caagcaaggt tgctgcaatg
+    40441 gaaggcatgt taaggctcct ccatatctcc cgttaccgca gcacgcttcg tttgatacta
+    40501 caagacctgc aaaaagaaga actaaaactc agtactgata tatagaaata agagtaaacc
+    40561 cttataaata gggattgagg tcattttacc atatctagaa ggattcacaa ccatatcatg
+    40621 aaatagatca aagacgtttt gatagacaaa gaaagatcct ggcagagttg tgttgagagt
+    40681 atttgctaga tctttcagac ggctattgaa cattgaaacc atattgttga tttttgtcac
+    40741 acacccgctg gtgttgttgc cagttaccat agatagctga ctaggtatgc aacctaatgg
+    40801 tcctgaccca gctaacacca tttttcttgc acctaagttg tatagtctct gcaaaatgtt
+    40861 tgtaaaagaa cacagttttc actttctgag tatgagatga atatctgcat cactattgtc
+    40921 aattttgtac taagagatga tgggagagtc ataaaagatc attgacttac ggatatttga
+    40981 gctgagagag tcttgatcaa gagatctgca taatcttctc cactgtaggt ttggctggtg
+    41041 gagtatctct caggcataag gtagttgttg atatagtcat tgcttcctat attgatccca
+    41101 attatcgatt ttgcaagata ctttctcaga tctgcagggt tttgaaagaa acgccggagg
+    41161 cgtaactcga tcgtaatctc aaactgcgat atctgtccat taaatgtagt tcttgctccc
+    41221 tggaaaaaca gaggtaaatg aacctatatg taagaagaag aaaaggtttt aaaatcttta
+    41281 ttatgaaaga gtttaagttg cttgaccctc acataatgtc gaccggtttc atctaaaatc
+    41341 ccagctgctg cagatgcgta gttaacccct cgtaaggcat tttgcccaat ggataaagga
+    41401 gataagtatg gtggcaccaa tggcaagccg aggtacgttg ctacattacg acaaaattgt
+    41461 gagaaagtta agttaaaaat tgtgaaacca atattatgat ctttagtctt tagctgtacc
+    41521 tccataatca acaacggtac ggccattgca gaaacgacca gtggggaagc caaaatcaat
+    41581 tccataaggg aaataattag ctcgtgcaag agttggaatg taattgttgt ttccactatc
+    41641 aactaaagaa tctccaaaga caaagaaagc tggagcaaga ggttgatgat ctctagattg
+    41701 gccattgcct aaccagacaa gctctaacaa cacaagacaa atcaacaaac ttttcatgct
+    41761 tggttactat ctttatctct ttctctgttt tgagttcttt gagataagct tatgaagaaa
+    41821 acaacgctac tacatatctt tatggtgata tagaaagaga catggttacg tgacataaga
+    41881 gaccgtgttt ggttcagagg aactggtcgt ggtaaatgat ctcatggagt gtagtggtta
+    41941 actgttactg ttctgaaaga tttgtatttt ttctttttct catttactgc ttcttttatt
+    42001 ttcttactct attcgtcttc tttttgatag agacagcttt aagaaaaagc caacctctaa
+    42061 acctccttat ttcgtaaata tgtttactag tcattgatgg ttttgctcat agcttctgaa
+    42121 ttctgattga aatataaaag tgaccactct gttatgtaat gatggttttt cagatgtgtc
+    42181 taaaccggct tatagtttat gataaccgga tggatgtagc tagtgtaatg ggcccttatt
+    42241 tgagggctga atcttcgggt ttaggtgaat tttcatgatc catctatgtt tcattagtta
+    42301 atatcatttg ggattagtat tttgtacttc ctacgaatat taacagctag agtatcattt
+    42361 gaaaacgtta gttaaatgca tagtaggatt attttttata acaaaagtat taaatttgtg
+    42421 aaggttttca aggaacggac cagaatcgga gaactcgtcc tcgtgtctca aaaagaatat
+    42481 atcaatttcg atgtcactac aataataagg tacaatatat ttcattagaa atggtccaac
+    42541 aaagcatatc atttagcaaa tcgatggtca agggatcttt cttgggtaaa atacaaaaac
+    42601 aaaaaagatc agacttattt agaaacccta agcaaccaat gttattagtt ttcaatcaag
+    42661 caactaatca agcttttcat tataaagata gctttgaaat ttatatatca actcggaaat
+    42721 atttcaagaa aatatgtgtg actgcaaata atcattccca aatgaaacaa gagttgattc
+    42781 gaaataaatg aaaaagcaat ttcctatcgt gaaaaatgtc tccagttttt cgaaacagtt
+    42841 atgtggaata tggatgatgt tttagtttct caataaaata cttctttagt taataaacct
+    42901 cctttgcaat taccaaggag aagtatcttt ttgtcggaaa accctagcta gcctcttgtt
+    42961 tctacaaatt ctataacatc cctatatttg tataatatcg aaatcaaggt caagtattga
+    43021 agaactgata aatactttac aaaagcaacg caactacttt tcagtacaaa cgaccaagga
+    43081 tataatatga agaagacgag tttgaagtta atgaccctcg ttttagggtt ttgcttcgtc
+    43141 atttatcttc ttcaagggcc tcgaggtttg aatctactca ttcttttgat ggtcatatga
+    43201 tgttgatgtt gatgatgatg atgatgatga agatgcatat acatatactg tttacttgca
+    43261 tggttctaat atccttgaat atgatatatg tttcttctct tcgtacatgc atttatgtaa
+    43321 ttgttgcatt ctcacaagat ccatcgtttt agaataaaac tttaaatatg ttagtgagtt
+    43381 acaaatttta ttttaagacc tgtagtctta acaattatat ttataaatgt aggcggttcg
+    43441 agaaatggag atctcttgat agcacgaaag gtaatgacac atatcgaaag ttttgagtta
+    43501 aattaatatt tggttttatg tttgatcatt ttcattttag gattcagtta tttgtcagag
+    43561 aatcaacgtt gattcagcta tccatattgg aacaatttta aaaatgaatc atcatcaata
+    43621 tcttcttctt agatttggtc tattttccgc agttgatatc tctggaaccg attgaaacaa
+    43681 aaaatgcagc gagatcgttg aaagattcaa tatcaacaga tttagaggaa gaggttgatc
+    43741 gtctgatgga gcacgaatat ccttcaccag taaagccgag gaaaagaact ccggttcaca
+    43801 atggcgtgcg taatcgtcac taattgaccc ttgttctatt gttctttcaa ttagtagttt
+    43861 aataagtttg gtaaaatcat aaaatggacg tatcctgtaa tgctgaatat tagtatatta
+    43921 cttatacaaa gactttatca actatttgga gtgcttgtat tttttgagat ttaaaaaaaa
+    43981 atctcattgt tttggagata tttaaaattt tcgaaattag tatattccac ttacatatac
+    44041 ttgtatataa atattgtttt atcattcttt gtacttctta aatagatcgg aaaactagat
+    44101 cataacccgt gttatacagc acggacatta atattttgta taaattaaat tgtatacttt
+    44161 gtttgaatta cataagaaat agtatagtat aaaagtgatg ccataatatt tttcaatgtt
+    44221 ttactattta catataatgc attagattga ctttgtttaa aaatgttatt gttataataa
+    44281 atacaacata tgtagtttta agtaaatgtg taaaatttgt atagaagtta ttttggtaag
+    44341 aaacatgttt cctttgtatt agaggatcta atttttggtc taatttagaa ttagtttttg
+    44401 tatagtattt tggttatatt tgtggaccta attggggcct atattagatt atctttaata
+    44461 gggctgacga aaaaacggac ccgataacca actcaatact cgaaccaaaa aatcgggtta
+    44521 gggcgggtta aaattttagg aaatttcctt attgggtaga gttttagtaa acccgtggat
+    44581 atccgattgg accggaaatt acccgttatc taaaaagagt attcaaaaac ccaaacatta
+    44641 atttaatatc caaaatatta attatatgat attattttat ttgattttaa atatatagta
+    44701 aaatgcgagt tgtatatgtt ttcttgatat tatttatatt gtttagtgtt taaaattata
+    44761 cacttgtatt ttgattgtta attttagagt ttcacctgta atataccatc ttatattaat
+    44821 atcgatttaa acccgtcaat tctaggattt tccagcttgt attaaaaatt gaatcacatc
+    44881 atacacataa aaaaatctaa tatgttatta attattgttg tatataagat tataaattct
+    44941 taaaataata tgcatgaaat tgaatataat atttaaatta tgacccagta cttagtaata
+    45001 aattttctta aatctatttt tgacccgtta taatattttt tcatgtattg aacagtttat
+    45061 attcgttttt aaaagtttaa attatggcat atgcgaaaaa actctaatta tttttttata
+    45121 acgatgatat tattttttcg caaaaataga atcatataaa gatgagaggt gaactataat
+    45181 aattaataaa aaattaatat gataatttag atatcaaatc taatttgttg attttaattg
+    45241 gttaattttt tggaaattaa taatgtattt cattttttaa tgaaatttaa ttaattaaat
+    45301 tagtatttga ctttttaatt tttaaagaga tgaattaatt tactctttaa attttatttc
+    45361 taatggcata cctatgtaat tacttaaaaa aataaggtta tatttaaaat gtatttccca
+    45421 aataatatag taggataaaa gtgttttgtc atggaagtag tgttaattac tttgaattac
+    45481 atttcacacg agaatacgtt aacaaaaaca tagttttatg acatttctct aacttttttg
+    45541 gaattaaatg tggacataca ttacttgtat atattttgat gaaaaaaata aagcaaacca
+    45601 agttggattt ggtcaagtcg tcagttctga actatatcga ataagaaaac tcaattattt
+    45661 attatcattt tgcataccaa cttgaagtac aacttttttc ttcattttta tgtctttggg
+    45721 tcatgcttca aaagttcaaa tcattcacct aaacaaataa aaaggtaata taaactccat
+    45781 ggtttagcaa taactctcat tttgtatcaa aattagttaa acaactgtgc aagagagctt
+    45841 aaaatatcac atgtgaacta aacaaaatag cttaaaggat aaaattatta gaagatacta
+    45901 ttttagtaaa gataaaatgt gaataatata tgggctaaat ttataatgct aaaaagtatt
+    45961 aagcctacca aaaagtaata gttacaaatt aaatatttca gaataaaatg attttattaa
+    46021 acgcgctttt aaagcgaaca aatattcttt cggaccaaaa aaaaaaaagg aggaagaaaa
+    46081 aaatgttgga gttagctctg tttctttgtt catcagttta ctttatggac gaggtcctca
+    46141 tgtagatttg actttgtttc aaagcggagg aagaaggaga aggcaacact aaaagatcaa
+    46201 aaccttagat ctttgcttac gctttcgcct ggttaggtaa gtgtttaaga atgttcaatg
+    46261 gctttatgct gctcctagga ttttgatttt gcaattctct tggttggtaa aaaaggcttc
+    46321 tgggtgtttt tccttcttat ttcgagattt gatcgaaccc acttgatttc tatcttgatt
+    46381 agttggtttt actagtgtta cagaacaatt tgtgattttg ggctttgatt ctctgcgaat
+    46441 atagttgcgt gttagctcta gggtatcttc tagcaaacca aaagtgggat ctttggttta
+    46501 tcatggagat ctcgatccaa tttcggttag aattcaaaga aggaaaaaat tgaatctttg
+    46561 agaagctttt gttattgagt taattcagtg atgatcctat gcttgtgatg tgtgaaaatg
+    46621 tttttgttgc ttgtccttgt cttttgtata aatcactgag tagcttcgtt tagtgtctca
+    46681 ccaatgattt aaccttggtt tgctagttta ctgaacatgg agtacaatgt ctagatggag
+    46741 ttggaattgt atgtctagta ataatgtgtg taatgcttca tgatgcaaaa ccttgctttt
+    46801 gaattttttg taggaacggt ttttcagaaa caaaagattc catttttaga ttcaattccc
+    46861 ttctggtttg aaaaatgata tcaaaatggt gaatagaagt gatttggtgg tgattggcat
+    46921 ctcggttggg cttgcacttg gtctcttgct cgctctgctt ttgttcttcg ccataaaatg
+    46981 gtactatggc cgctctcacc tcaggcgatg cgctaatgaa cagaattccc cgactctacc
+    47041 tgttcacact gctaaaagag gtgtagtaat ccctgatgat agagcaaaca cagaatcgtc
+    47101 acagccacct gagaatggag caccaactca acatcagcca tggtggaaca accacaccaa
+    47161 agatctcact gtatctgcat ccggcatacc tagatataac tacaagtgag tattcagtct
+    47221 gaagctttag tttgcgtgaa tttgttattg ggttgttgta gcttatggtc tgaaacttct
+    47281 ttagggatat tcagaaagca acacaaaatt tcacaaccgt tctaggacaa ggatcttttg
+    47341 gtcctgtcta caaagcggtt atgcccaatg gagaattagc tgcagcgaaa gttcatggct
+    47401 ctaattcaag tcaaggagac agagagtttc aaaccgaggt aagtgtacag atccactcat
+    47461 ttccctggtc catgcacaat ggagataaca gagcagtatt ggtttcttgt ttccaggtat
+    47521 ctttacttgg gagactgcat caccggaatc ttgtgaactt gacaggatac tgtgtcgata
+    47581 aaagtcaccg gatgttgatc tatgagttca tgagtaatgg aagtttggag aatcttttgt
+    47641 atggcggtga gtcgtccttt gttttcttct atcaatataa gacgcagcaa gaaattaatc
+    47701 acaagatttt ctactttact tttcaggtga aggaatgcaa gtcttgaatt gggaagagcg
+    47761 gcttcaaatc gctcttgaca tctcccacgg cattgaatac cttcacgaag gggccgtacc
+    47821 gccagttatt caccgtgatc ttaagtcagc aaacattttg ttagatcatt ccatgagagc
+    47881 taaggtaaga gagcaaatat ttaatacgct tactcaaaac tgtttggtta aaccttataa
+    47941 aattttggtt tttttttttt cttgttggta taggtcgcgg atttcgggtt gtcgaaagag
+    48001 atggttttag atagaatgac ttccggattg aagggtactc acggctacat ggatccaaca
+    48061 tacatttcga ctaacaaata cacgatgaag agcgacattt acagtttcgg tgtcatcatt
+    48121 cttgagctca ttactgcaat ccatccccaa cagaatctga tggaatacat caacctggta
+    48181 agttaaaatc cctggtctat gcaaccaaac cgaataaccc aaaaatctga aattaaaacc
+    48241 caaatcgaac caaaactcag tgagtgattg tgagatcaat gattctgttt ttctgatgat
+    48301 caggcttcga tgagtccaga tggtatcgac gaaatactcg atcagaaact agtgggaaac
+    48361 gcaagcattg aagaagtgag gttactggcg aagattgcaa acaggtgtgt gcataagaca
+    48421 ccaagaaaaa gaccatctat tggagaagta acacagttca tactaaagat caaacaaagt
+    48481 cggtctcgag gaagaagaca ggacacgatg tcttcatcgt ttggtgttgg ttatgaggaa
+    48541 gatctgtcaa gggttatgag caggattaag gatcagcatg ttgagttagg gttattggct
+    48601 ggtgttaagg aagagaatca tcaagagagg aacattgcaa caacatagta actctttact
+    48661 ttaggagtaa gtctttttgt acatattacc acatgagtct caaaagtaag atttctccct
+    48721 cttctgcaaa aagaaaagag tttttgcttg ctgacaacaa caacaaaaat ttgaggttat
+    48781 gttataaaga ctcttaacaa aagatatatc tagttcttat ttttgggtta agtttttgtg
+    48841 tacttttcaa tccgaaaaat cattaccaac gttatagcat cattcatacc atcatcatca
+    48901 taatagtcat aacactaatc taaaaagttt gtaagaagaa ggaattacaa gttcactaag
+    48961 tactacttca taaacaatag acacctatat tttgaaacca ttaattaata atggtatctt
+    49021 catcaacttt tttgtgtact tgttgtgctg cgaaagctat gtgtctcggt ggatccaaca
+    49081 atattaatat tcaatcaata agcaattaaa gggagttgtt gacaaaaaaa caaaacaaag
+    49141 aaaaactttg gagacatatc gaaacagcta aaacatcatg tcacataact tgtaatagcg
+    49201 atgtaaaatc ctttaaagaa accaataatc atactgaaat attgttgtat ttgcaaggag
+    49261 atgattatag tgtcgataag atcaagaacc tctacgaaat ttgatgagta gccgacgaag
+    49321 tagaatttct tggatcaccc cttgatatat ttaaacccta ttgaatatat cttggagcac
+    49381 ttgtataatt atcataatag ttattttctc tcttggatca ctcgaataca cttgagacta
+    49441 catatattca tgctctttaa ataatctgta ttttctcgag atataataat ggtttgatct
+    49501 tcttaaaata aatttgaaat gaacatcatg ggttagtgag ttagggtcat tttccacttg
+    49561 ctctatcact tctcaaaatg attgcaaaaa tggaagtgtt ttggttggct ttgaactttt
+    49621 gaacccaaag tagtttcaac atattaaata ggtgaaaatt tagtgatctt gcactaacca
+    49681 atatataaga ggagctgatt ccactctagg tgtttaggta ggtagtggag catattgaat
+    49741 aagttggtga aaattagtga tcttatacta aaccaatata ataatagtca caaatttcat
+    49801 aactaaaagt tgacaaaaaa taaacataat ttttaaaaag atggatacaa aaagtaataa
+    49861 aatttcagtt gaaaatctaa atttaagatt ttgtgtagta ttgtattttc ttttctgttt
+    49921 ttgttattta taatgtcttt atctaggcaa aatcgcttga aaatattacg aaatttttat
+    49981 ttaaataaaa tactctcaga cacctagctg ctataaccga ttctaagaag tagagcaatg
+    50041 ttaattgagc aatctgataa tgccatttta gctatagagt tttttttatc ttttttatgt
+    50101 aaaacatatt gggattgatt ttatcaagag agaaaaaaat aatatgaaaa atatatatct
+    50161 gtaaccaagg tttaattaca caaaatggca cttattattt agctaacaaa actggcaagg
+    50221 ccatggatgc caaactttac agtgtaagct ttaatcaaca ttctaatctc catccatgga
+    50281 agctctttca gaaatctcta caccaagaac actaaataac ttcaatctgt aaaactcact
+    50341 gtaaattcct ctttcaagct gttctctcgc cagtggtgga gtcacattct caaatgactc
+    50401 tcaatggttc atctaacagc tgcagacatt cttgtgggtt ggcgtagaag tagtcttctt
+    50461 ccttgggtct attgggaatc actaaccgtc tcttagggct tcccattcga gtagagtgaa
+    50521 atgccttcac cgtggatgaa aatacatccc aactcaacaa tccttcagta tactcatcta
+    50581 tcagcttaac taggaacctc ctattcagct ggattgtctt cttgaacccc aagaatctgg
+    50641 aaaaaatggt gtcagcctcg ggatgataaa tatgggaaga aagaagaaga aagataacgg
+    50701 tatttcacct gcgatgacct tcaacaactc ttgccatgtt cccatcattg gttggaacaa
+    50761 atatgtcgct ctcaagtgcc actaggtaat caagtgcagc catttgagat gaatggttcc
+    50821 ggcaaaaatc cagatcagag gattcgagta gggtttcttt ccggacctga gttagttaag
+    50881 ataatggagt caatggaaga tataagtttg aaaaaattgc agaggctgaa atgacctact
+    50941 taccacattt ggaaaagcgt ctgttaaagc cttcatccgc ctttgaccac catagatttc
+    51001 tccagcagct atgtaaattt gaacatttcg gtcaataccc aacgcggtca gagtgagagc
+    51061 ggtttcctca ggagttaaag ggcaaaggcc atctttcctc ttcagctcag agtttatgac
+    51121 tttctctttc caccatggat aagcatatct gcaaacaaag agaatttaca tcaatagatc
+    51181 catcaaagtc ggaacaaaag caggaagatt tttcggggat tttttatacc tcattcttgt
+    51241 tagttcttct tcttcctcgg ggttgcaacc atgtgaacaa ccagaaaatg ctaacatatc
+    51301 catctcatat ctgagatgca ggacaagaaa gggacctttc tctctcagaa tcttgactac
+    51361 tcgtctacct aattcctcaa tctgaggagt aaacttaagc ccattgaaat ttactcggca
+    51421 cctcagcttc tgaacctcca caggcagtcc attattagca agtcgggtat ccgttctgtt
+    51481 cagatgtaaa accttatgtt ttttcaccag tggaagaatc tgtatccaga tattatttag
+    51541 tttgaaaaga attagcaaac tgatttaggt ataaaaacca tagtgaagac taatctcgaa
+    51601 caaaaagacc aaacttggat gactaacctg attttggtag taagacatgt ttgaccaact
+    51661 aataggaggc atttcgtggt acactccaag ctcaaccctt ttcttaagcc ttggaggtaa
+    51721 ctctttgagt atccgaactt catctcttaa cgaacttatg aagtgatcca catcgaatat
+    51781 gtctttaaac tcactgcccc gaagaagaaa acattgtaag agattgttga atcggaaaat
+    51841 aaaaaaaaac agagttcaca aagaagcttc aagtgagata cataccttgg atcgttccaa
+    51901 aaagaggtct tgtcaagctc cggcacaata agtgtaacat tcatgtatct tgcaacggtt
+    51961 accatatcac atatctgtag aaagtagacg aaaaacaaag taagcaaatc tatcactaga
+    52021 agatgaaacc ttgtaaagag atgttatttc caatggttta gttatcatct tacagctgct
+    52081 cgcatttgat tgagtcctcc attgcaggaa accataagat aaccattgtt cacatatacc
+    52141 cctgagaaga aaaaacaaag agcttagttt ccagattcat ctccaagacc aagtaaggta
+    52201 cattatccga ttcaaccaca aatttcatca atcaatgttc tcaaaagaac ttacttttag
+    52261 gtggaagagc aattttggta ggaagagacg tcatctctgc ggcagtggac agttgatggt
+    52321 gattgaaaca agaaggccaa cctttaaaca atcttggtcc ccacatctct cccaaagcca
+    52381 tcaaatgaac aaaacagctc caaagcaaca atacagtaac agcacgaatc atccataagc
+    52441 tcatacgagg cctcgaaaca aaagagttct tgagtttctc aactttgctc tctcctaaaa
+    52501 gcttaacatt catctcccat agtttcctgt ggtaaagaaa cttctccatt ttacacattt
+    52561 ttctatatca cccaccaacc ccactctcac agaagattct cacagttccc aaaacccctc
+    52621 agaggaattt catcgaacac ttaaagggta agcctggaaa aactaaccac agcttaaaag
+    52681 actgcagatt taaaaggaca ggaatgaaac aatataatag gtttggtgct caagctttta
+    52741 attatcctct ctaataacac tcagagaaaa tgtaatctcc acacaaatct gtgagcatta
+    52801 atcacacctc gaaaaaccca gatttttagt tttaaagtag cccacgatga tctcgactta
+    52861 ataatctgac taaaacccag atcttgctct attaaataaa agaagaatct ctctctctct
+    52921 ctcaggtttt aattaagagt caactttcaa ctctcaattg atggaagctc acacttccag
+    52981 ttacagaaaa aatgacacag agaaggcttt gaaattcagc aatggcgatc gatcaaagta
+    53041 gccgtacaaa aaaaaaaatg ggagacgaga gtctccgtgg agaaccggtc ggtgttgacc
+    53101 ggagaaggaa gaagaaggac gtcggagaaa tagggagaag atttcggtgt ggagaaaaaa
+    53161 agagacaaga atgtgcaaat ttcagaactt ttttctacca aggaggaaaa aaaaaagaag
+    53221 cagtctttag gtttcttctt ttttgtcggc aaaaactgga gtgtcattta taaacttttt
+    53281 ttttacagag aaatattttt tgtaatattc tttgcaattt aaaatatatt ttttgataca
+    53341 gagccaaagt agacctttaa ttaaatttat aaaatacatt gtattatttt gctattagtg
+    53401 gatatgtata gttatataca tacacttata tgtattttgg ataacttaaa caaggtacaa
+    53461 atttgaattt tggttgacca aatacattca agctattttt taagttttat tttccagagg
+    53521 gataaattaa tatatttgga agttaaataa cagagtttca gttatatcta aaccacatta
+    53581 gcaggttgcc cctagactaa ctctgtcatt aagcaaaact aatccacaag ttaacttcca
+    53641 gaaagtattt gacaagaaaa taaaatcaaa ttacatattt aacatacata aatatgtgtt
+    53701 aacccaatca ttttcaagat tggttgaaaa acgaaattaa attttatttg agttaactaa
+    53761 taaaataaat ttatcgataa atggaaatta atacggcaag attcattttc tgtcggttct
+    53821 ttataaaaat aaaattaaaa aattgttcaa aagggcatat tctagacttg tggatcgctc
+    53881 gttttgtgac agcaatgaca aagagagatc caaataaata ttgaatattt ggcaaataaa
+    53941 taaaataaaa aagtaggcct acaatatact atataataat taaatatata tggatggacc
+    54001 aactctattt attgctcaca gttgtcattt gtcaatctaa aataaaatat agaaaagtat
+    54061 gtggaagtgt attatgttta ttgattaaaa agaattaaac gataaaaagg tccaaaacat
+    54121 tagatcaaaa cttaagctag ctttgttacc ttgatttcga ctgaagtatg gtataaatct
+    54181 ggccccgagc cgactgaact aacattttca gcattatgtt attcgaaatt tcgacagtat
+    54241 gataatgagt atatttaaaa gaagcattat gtataatttt gagagaagaa aagattgagc
+    54301 attttttagt atgttttttt tttttagggt gaaaattgga tctaaggata tttataggaa
+    54361 acaaatggat gaatacataa gaaggaatta atatgtttcg tcatgaacca agtaaccaac
+    54421 ataaatagat acattttaaa ctttttataa aagcagccat taatagaaga tggtctttta
+    54481 aaattggatt tgttttattt gcttttcaaa ttttatcaca attttgtctt gttcatagtt
+    54541 tttaagaaaa atattaacca caactttgtc ttgttcatat tttattttac aaaattggct
+    54601 aaaatcacat tctatttatt tatgagtttt aaaatataaa ttagattttg gtgatattca
+    54661 atttataata ttttaataaa ttttcagtag tctctctaga aatatacaag tttttttaat
+    54721 gtacctctaa atattttaaa atcaagaaaa tcaatctaga tatggttctt tataagaaag
+    54781 tacacaagcc aacaaacact cgtagtctcg tagatgtttt tatcaagtat atatatatac
+    54841 attcattata ctattttcaa atcgttggta aatcaactga taattttttt ttcctatcca
+    54901 ttatatataa aacaaatata aatatgtcgc ctattacatc ataaaatttg aggaagtaaa
+    54961 aaaaaaattg aaatttcaaa cctatcaact aaattttctg atttaacaga tttaatttat
+    55021 ttttattttt ttagtttaaa aaaactcatt cattaaacat gtgaaagtga ctaaccacga
+    55081 tgtctagctg tagcggcagt ttaataaagc tttaagcaat gatctcgctc gccggccaag
+    55141 tttagaaaca atgaaagatc aaattgaatg atttgttttc ttttgtcatt cgtgaagaca
+    55201 atttaaaaag atacacgtgt agatgtgagg ttatgaacag gttattgttt taaaacacag
+    55261 cattatatca gccaatatat atcaatatat agacgatgtt cggtccaaaa atctagatca
+    55321 atctccttca tgtttgctat tcgaaaatga ttgttattta tatcaaatcg tgcgtgtaac
+    55381 atcgattgat aaatattgga aatttaagta aaggccaatt gtcacgtaat gacttgaaag
+    55441 catctccaat gtaattcagt atttatttct tttcacttga cgatagaaga atttgattag
+    55501 agatgagtta cttattgagc gaattaatgt acgttatttt ctatgaccat atacattcca
+    55561 ccgacatcgg acattacact gctgtcggaa cttaatgatt tatttttcta aatacgaata
+    55621 catttctcca atggaaaaaa tctaagagtt ttaagatttt tctataactg taattgtcat
+    55681 ttcaacgaag agtttaagaa gaaaacagag agaaaagcgt gtaaatatct gtagttttaa
+    55741 tcaaatgggt catttctatt tcagtctata tatttaatca aaattgcaat tcacaaattt
+    55801 taatgcttac gtattttaag taacattaaa tgaacatctt cgtcctaaaa gttacttgat
+    55861 attaatagaa aggtaaagta acagaacaaa gcattaacga tatatttata tttcaaaact
+    55921 tacacacaag tattcagaat taagaaaaga acaaaacaaa taaaggagac gtttctgcga
+    55981 ttatgttttt gcttgaaata gaacaaacaa acacgtattg gtaaagaaac caataacttt
+    56041 gaagatagct taaacgcaaa atatatagtg aaaaaaggga aatcaacaat tattatatat
+    56101 ccccacgaaa ctcggtgtcg gtggctcaat catcttaaca ctctaaactt ttaagcccat
+    56161 tcgtgcgcat gcgtgtatgt atatacgtgt gcattggatt tgaaattttg gtcgttatcg
+    56221 agagttctag tagattatgc attttgtgta acaagtgtaa tataattggg accctttatt
+    56281 actttctata aaacttaatt cttttaacct atagatcacc cgtttgtcaa ttatatcaat
+    56341 caagggtccc ttattagctg cattttccga tcgataatag agaagatcat ccaatgcttt
+    56401 acaatcactt ccatgcgacc tcacatacaa ttacttccat tgcatagtat gtagtaaaac
+    56461 tatatatttt gcttccaatg gatttgatgc ttgtttatgc ttagaacttc caaataatct
+    56521 tttttaacgt agcgaattga ttttgcattc aaagcaccct tagtaaatcc gtttcctagt
+    56581 ctagtcccat aattaaggta aaatctacat gggcttctca ggcccataat gagaagttta
+    56641 aaaggctcaa ctcaacaaga tttagttatg ctaaggcctt acaagttaca atcaaacaga
+    56701 ggatagggac cgtcggattt atctgacttc ctctccttcg attagggttt ctgaattact
+    56761 actcctctgc caattagggt ttaagcctct ccagcatttt ctttcagctc aaacccaaga
+    56821 acctcatcgc cgattatttc ttcatcctta ctctatcccg cgccgattaa tctctctcag
+    56881 gtaaactact tcaattttac tcatagacta attgtacaga gattgataaa tcctatgttg
+    56941 ttcccattag aatgttcaag ggtttatgat tattcaggat ttcaaatctg aattattgga
+    57001 caaataaacc ctaatttcgt ataatttgca tatcatcagt accgtccgag atgagaaagc
+    57061 tcaagtatca cgagaagaag cttataaaga aagtaaattt cttagaatgg aaaagagaag
+    57121 gtaaccatcg ggaaaacgag attacgtatc gataccacat gggctctcgt gatgattaca
+    57181 aaaagttggt acctttattt gctcttaaag ctctgttcta cttattcttc ttcttctggt
+    57241 tcgagtgttt tgtgatgaac aatggtcttt ttctgattct tgagaatgtt ttttcttttg
+    57301 ggcaggtatt caggattgtg taggatggtg cagaaactga cgaatataat gaaacaaatg
+    57361 gatccagcag atccttttcg tattcagatg actgatatgc ttttggagaa gctgtgagtt
+    57421 ttttaaatat atataccagt ttgttcaagt atagagctta ttacaagcaa tggttctttg
+    57481 ttttctctag atttaggtcg tagaacatga gacttgatca ttttctgcag ttttttacac
+    57541 cgatcttatc ttagagacct cctgttgttt ttcttgttaa ccacttttag agataattaa
+    57601 aattgggggt ccttatgctt ctatatagat tttaggaccg ttggtttgaa atccattaga
+    57661 ggttgaattc atcctggtaa aatgtgtgat ctaatgggtt tctttcagaa tctgtttctt
+    57721 gaatagcttc tacatagagc taagttacta tataatatag gcttattgta tagggaaaaa
+    57781 gagccatcct actgaaactt gttgattgcg tagttccact ctgcaatgag aaaacttgtt
+    57841 ctcttaatgg ctgactgact tgtgtttctt acatgtacgc ttgaaacggc agatataaca
+    57901 tgggtgttat accgactagg aaaagcttga ctctaactga gcggttatca gtttcatcct
+    57961 tctgtaggta agagtatatg caagagccta tataggcagt tacttaaacc cttgatagcc
+    58021 aaatgttctc atttcgtttg cctcatatcg gttctttttt tttatttgca ggcgtaggct
+    58081 atctactgtg ctggttcacc tgaagtttgc agagcaccac aaagaagctg tgacatacat
+    58141 agagcaagga cacgttcgtg taggaccaga aacaattact gatccagctt tcttagtaac
+    58201 tcggaacatg gaagatttca tcacttgggt tgattcatcc aagattaaac gaaaggttct
+    58261 tgagtacaat gacacattgg atgattatga catgcttgct tagctttgag gaagttaaaa
+    58321 aagactgatg ttttgtaaaa gccaaaaaag gttcactgct actactattg cagggtttgt
+    58381 ttggattttt gaatatgcaa atgtgctact attcccttgt ctttaatctt actttcgatc
+    58441 tatttgcaag aagaaacttg aaaacaaatg tcatgaaaat attgaatcta actcatatta
+    58501 tcgtcaaatt gtataagttg gagccacaga aatggatgaa cataaaatat taaatctaat
+    58561 tattatttga cacttcaaat tacaaaaccc gtttgattcg atccgacccg acccgaccag
+    58621 aaaatcgcaa aacctgtgtt tttgctttcc ccggtctctc aaaataaaat tgctcgctta
+    58681 gaagcttcaa acgcagctga aatggcggtt caagcaaatc agagtgcttc ttttgggttc
+    58741 agaacagctt caccttctca gaagctctct tcgaaaccca tagctcatat ctctctatcg
+    58801 acgaagctca aaccctcttc tcgaccatcg ctttcgtgct ctacttggaa tcagggtcaa
+    58861 attccggcga gacactcctg tatcaatccc ggaatattcg catatccccc ctccaatctt
+    58921 accttctctc atgaactacc agaatctgaa tctccgccac ttggaaaggt cagtcatcgt
+    58981 caaaaaactt ctcttggaac aaattggggt tataagcttg ttagacaaag ttgcagcctt
+    59041 tttttgtcat ttgaacgaaa atgtggtaaa ccaaatgtat tgtctccatc tatttaattg
+    59101 tggtagaaga ggcagtgtgt gctagtgtta aacctccctt atgttgcttc attgtgaaaa
+    59161 cttttgtgaa aggaagtaga gatgtatgat tcttcccttg ttatcgtcat cttatattgc
+    59221 agaagaagat gcgtgtgtta gtgaagccgc tagagaaacc aaaggtggta ctgaagtttg
+    59281 tatggatgca aaaggacata ggagttgcat tagaccatat gattccagga tttggaacaa
+    59341 tcccacttag tccatactac ttctggccta ggaaagatgc ttgggaagag ctcaaaactt
+    59401 tgttagagag caagccttgg atctctgagc ttcatcgtgt cttcctcctt aaccaagcta
+    59461 cagacatcat caatctttgg caatcaagcg gtggagattt gtcttgattt ggattcacca
+    59521 gatatatctg tttatcatag gtatcattaa cttttcaaag tctaaagctt ttgtgttgtt
+    59581 tagtctatca ttagtcctct gtgtaatcat cgaaactcga aaaaacttct atcattaaaa
+    59641 tggcgaattc aaaacaagaa aacaaaagat catcatcatc atcatgatga acgtttaatc
+    59701 gaagtactta gacatttgaa tcgatttatt actcatccca catttctcat agaacacttt
+    59761 gttctccaca ctacaatcta gaatcacctt atagcaaccc attgatttgc aatgatccat
+    59821 aagaaactca acaactttct tccccagctg tttcccgcga aaccttgaat ccacaacaac
+    59881 gtcttcaatg tgcccagctt taccgcaatt cctcagaaac ttcttctcta tcatcacact
+    59941 acccgtagca gcgatttttc ccgaagtttc ttcttcgatc acgcagatca cgtggtcgtc
+    60001 accatacgat ctgatttctt cgaatcgccg atcgaattct tcgtctgtta ctgatccggt
+    60061 gacggttagt tgacctagaa gctcgatgaa tccttttctc ttatcggaga tctccagttt
+    60121 tcggatcttg aatgtctcag ccatagaatc agatcggata attttgaatt tgagtttttt
+    60181 tttataagat ccagtcactc actggaactg aatataatca cccatagaaa aacaaatgtg
+    60241 ataattacga aaaggtactt attcttttag atagttacaa aaagtttccg aatctcttaa
+    60301 tgaagtcaaa atgggccatt tgttttttaa attgactttt gatgtaacaa aagcagcaag
+    60361 ataaaaactc catagtaaac aatatttatt gaaatgacaa caagccaaca agtgaaaatg
+    60421 cagaggaaaa aagtaacaat ttaaagcgga tgatacatat agattacata attggaaatg
+    60481 taaggaaagg attgctaatt tagggttgat ggtgactaga aaaggaaggt gaaagaggag
+    60541 ggattcccgg gatgagaggg gccggaggga ttccagggat aagagggacc ggagggatgc
+    60601 cagggagtcc agggaccgga gggagcggaa cagggaaaga aggtggtggt ggcggtagtg
+    60661 aaggcggatt cacgataggg acaggtggaa gaaccggaag aggtggaaga gtcggaattg
+    60721 ttggaatcgg tggcagagta ggagtcgggg gagtaggaag taacggaatt gtagggagag
+    60781 ttgggggagt agggattagt gggattggag ggagacttgg gggagaagga ataagcgggt
+    60841 ttggagggag agtaggggtt gggatagaag gaatgagcgg gtttggaggg atgatagatg
+    60901 gcgggtttaa cgggtttggc ggaaagaaga aattgggttt tagtgcttcg gttttcttgt
+    60961 ctttcaagga atcagatttc ttgttatgca aggaagctga tttctgaggt cctggtggaa
+    61021 gaagaggagg aactagagga agaggtaagt ccggaacagg taactttgga acaggtaagt
+    61081 ttggaagagg aggaacgata gggagatttg ggagagggga aggattagga ggatcttgga
+    61141 gcggtggagg aaatgaagga tccggtaaaa gaggcttgga accgcggaga ttgataggtt
+    61201 tttggctaca aatctctggt tggttttcgg gtctgaaagt gaagaaccct gcagagaaaa
+    61261 ccctagtatt ctctccatga tggtttgatt taaggcgttt gagagaagaa gaagtagcag
+    61321 acgaagctat agaacagtaa ggctgtgagc tacttagcaa cttcacagag catctcttga
+    61381 tcttcttcac atgtttgctc acagagaatg gtagcttcac tttgaattcg ccgcgcttgt
+    61441 ctgttttcac ttcttgtctg aaactcggtt tcgagttctc gtcaatgcat tccactgcaa
+    61501 ctaaagcacc tgcaacaaga tccacaaaac ttttcataat tctttctaag attttagacc
+    61561 cacaaacaag aaaaaaccct ataagtttcc aaaacagagt gctctgttct agaatccaaa
+    61621 aaacattatt ttccaagtag tgttaccaag aaaacacgag caagactgca agaggaaaaa
+    61681 aaaacaaaaa tcagaaacca gacctgagat taagtggttg ggtgatttcg agaaggcgcc
+    61741 gttaaaacaa gtgtcacagt agacagttcc tactacaact gcagaagaac gtgtcttctt
+    61801 cataacatgt tgctgtcctt gagacaaacc tccattgata gagattccta agaaaatcat
+    61861 aagactaaac caaaaccata aggtcgttat tctctccata attatcgact ctatctctca
+    61921 ctatcggagg aaagaggggg tatatatata ggagtgtggt gtgaagatga gtgagccacg
+    61981 aaaaacgaag agattgttaa aagatgagtg aagcttatct ctttttgttt tgccatgtta
+    62041 ataagaatca tgggaaaaat acaaaatcga actacaaagc tagctactat aatgtcttgt
+    62101 gaccctatct ttctacagct cagtagctga attattgtaa aaaatagtaa gaggtgagaa
+    62161 aaaagacaac actaatattt tggattcacg ttcgcgtgtc gattagattt gatcgaacag
+    62221 tacacggtaa aaacacgttc aaagtaaaat accaaacctg ttatatcatt gtttcttgaa
+    62281 ttgcggggtt gctaaaacta aaaaaattca gacacatatg attttttttt ttttttctga
+    62341 gttaaaggtt ccgaagatga acatatctga gatacttttg tgttgaaaat atctgttatc
+    62401 atcatctttc ccttatttac attctgaatt ttggatgaga ctaaaacatg acatgatgtt
+    62461 ttcatttcca aactgtgaat cactgtctgt catgtttgat gtttatagta tagtctacat
+    62521 ccatgttttt ttaaagaatg acatttacaa tcattattgt tagaaggata tcctaattgt
+    62581 caatattaca tcgaaatatt tatttttaga acatccgaga atagatgatt agttactgtg
+    62641 gtgatgattt tgatagttaa ctaatatcag taacagtcta tcatggacgg atttacatat
+    62701 ttctaatcgg taatccaaac tagatgtaag aagaggttac agagattcat taattcgcaa
+    62761 tataattgtt ctaagaaact tatacattgc ttattataca tttgtttata gatttggaga
+    62821 aacatatagc cagtgaggct aaagttgata actagactta agtaagttat agtttctatt
+    62881 ttctttaaca gaggaaacaa tacatgcaat ttaagaaagt gaaggcttat gggaatgaga
+    62941 atggggaagg gaatacagtt ggagagtctc tgtcgattca cgagacacta aggatgtcca
+    63001 cacaatcata tatacccaat tcttacttct gtcttcttat agagtactac atacatactc
+    63061 attcttgata agggacaaaa aattaaataa aatgagttaa agaatgtgaa tgttttttaa
+    63121 tggtattaaa gactctgaat gttcttctcg aattttcata ttttaacttt tgatttttgt
+    63181 gttttttttt aatcaggaac gccgtatgtg ctaaaaaaaa taccatttca tgagaataaa
+    63241 caaattctat tagtcgacga cataaaaaac aacaaacaaa cactatggtg gattatagat
+    63301 agactaggga atattgtaac tttataaaaa atattgaaaa aatatgaaaa cattgtaaac
+    63361 taaaaatcat taatttctag ataatatagg actaaataat aaaatgggcc aagatcaaat
+    63421 taacccaaac tgtagtttac aacttggttg ttcccattcc ttgatatggg ctaagtgaca
+    63481 taaaaagccc gactctttat tgtccactaa caattggtct tttaacttct tgatggatta
+    63541 attgtgggtg tagaagatta ttttgactat ataaaaacag aaaattaact aaatattttc
+    63601 gatatattaa ataactagat gatagatgat ttttttttgc agtaatttca taaatttagt
+    63661 ttcataaatt agtttggttg aattcaaatt tagtaaattg tcaaatttga gacatagatg
+    63721 atagaactaa gattagtaat tagtagcgct atatatgact caatggatgt gaacaagatg
+    63781 ataatttaaa aaaatgtttt ataaaattct ttttgtccaa tgaacaatag gttaaaacgg
+    63841 acaaatttac ttttgtgatt taattattgc catgcatttt ggcatatagt cactctgttt
+    63901 cataatataa gctgtttaat ttttgtcatt taacaatttt aatacgtgta acatttcgtc
+    63961 atgcaatgtt attttatgtg tagatatcta atttattaaa tgtactttta tttcagttat
+    64021 aaaagttata cataatctgc gattataaaa ttttatatta atataatgcg ttaaacatta
+    64081 agtatctgtt tacgataata ccaaaataaa aaacaaggga aaaattccat cccaaatttt
+    64141 tgcttttgtt acttttgaat agcaaatctc taatgtatta tttttctaaa ataaaatctt
+    64201 cctatcctaa tgtcatcatc tacaaaatca ttaaaatatg taatgtctga ttaactaaaa
+    64261 taattttatc aactagtata aaataagaga atcaaccaaa aaagtttggt ttaaattgta
+    64321 tagagagagt cctccattag cgtccagcaa caacgcccac ccactttcac accaacttga
+    64381 acacactact cctcctcctt cactttttcc ttcaatctct cttctcgctg acgacttgga
+    64441 aataaagtga gggaaaatat tcctccaact tcactttcct gagaatctgc aaataaagca
+    64501 attcaacttc cttttctttc tttctaaatg aaatttcgtt ctttgtctca gtctctctct
+    64561 ctttcttcgg tcgagtccga tcaaaattga aaacccattg agttcctcgc caaaaggtat
+    64621 agtcttcgtt atttcttctg taattggata atggggtttg atcgattctg attagtacaa
+    64681 tcccaaagtg ttgttctttg ttttctggta aattttctct gctttttttg tgttctgttt
+    64741 taaaacaatt attctgagtt aagcaaagca acttgtggtt ttgttgtgta atgatatcaa
+    64801 gtgggttatt ttttttatta taagtttctc tgttttattg cgtttgttag ggattatcta
+    64861 cctaaattag gtttcaactt ttaatcggtt agttgcttgc atttcacttt gaccactttt
+    64921 gccctttttt gtatatcccc aattgaagcc tcgagttttc tttttgacca aacacaattt
+    64981 tgtactgagt ttggtgaatg tttgtttgat gtctcttgtt gggaatctct ctgctttaaa
+    65041 gttacttact gtcaatgctg gcaataaatc tttagttttt tactattttc ataatataat
+    65101 ggattgtttt gggggttgtg atgagttttg gatagtaatt gaagaggttt aaaactccct
+    65161 taaaaaacta cagtccaaac agtgatcaga tcattctaca ggtgaaattt gagagaatga
+    65221 atcaacgaga gaagctgatt gttggtgaaa tgtttggaga gtttctgtaa atagacgcct
+    65281 cggatttggc attgttagtg aaggttaaag gagtttccta gaagtttcaa agcaatttgc
+    65341 ctccaagttc ttgacttata aaacatattg gaactaacat ttcaagaaag aagagaagga
+    65401 gatgggctgt gtttcttctt gctttggggt cgatgacttt gaggattacc ccaatccaag
+    65461 tagttctgtt aacagaagct gcccatgccc gagatgtctt gttaacaact tccttaacct
+    65521 ggtaatgctc tttttgcagt ttttactttt cacctgctac ttatatgaaa taaaggagag
+    65581 ttcattgaag cttcggattt gatctcttct gcagtatatc tctttattca gaagaggcga
+    65641 aacccgctct ctcccatcct ccttacaggc tactaatgta tcgatagcta catcaacttc
+    65701 gtatgataac tttatgtcta atacattcca ttctactcca aggcctctgc cttatgacgc
+    65761 tgatccaaga tacttccggt caaggcgtga ttcgcttgtt tctagacgcg ataagggttc
+    65821 aagtcattct catgaggaag ctgagccctt aagaagtgat gctgatgtgg attctgagtc
+    65881 tttctcagtg gaaggaagca aatgggctaa caagcttatt atctctggtg aagattccaa
+    65941 agaagagttc tctagatcct ctcggaggat tcttcagtca aggacaatgt ctactagtaa
+    66001 cgaaggcctg tatataacat ctgatgatga agatgtctgt ccaacgtgtc ttgaaggtat
+    66061 tcattttcat cttttttttt ctttttcttg gatcaatgca tttcaaagaa gcttagtatc
+    66121 agattatttc gtgtgttttg tagaatacat atcagagaac ccgaagattg ttacaaagtg
+    66181 ttctcaccat ttccacctca gttgcattta tgaatggatg gagagaagtg aaaactgccc
+    66241 tgtttgtgga aaggtaaaca cttttcatgt tcttccaagc cagttttcgg cttctcattt
+    66301 actagtgttt cagagtcaga ctcagacacc atcacgcgta aatagacgag tgatccagag
+    66361 atcaatagaa tgaaagatgt acaacaaccc tagaagcaca tgcgtaaaat ctttacttgt
+    66421 cttgtctatg acaaatatgg ctaatcattc ttcctgaaac tcagaacatt tgaattattt
+    66481 ctcattcatg ttgactaaag aatatttttt tctctgctgt gaattcgtct catttgcagg
+    66541 tgatggagtt ccacgaaaca ccgtaacttg ttgccattga tcctgtcttg tactgcctag
+    66601 accggacctg gggagattga caaggcagca gagagatgtg aatatatttt ttaacaatat
+    66661 atgatattgt atatgatggg gaaatgagga aacgtctctc tgcacgtttg gcttatggag
+    66721 ttctgaggaa accctcttga gagaaaacaa cagcagcagc agaacagatg cacacaggaa
+    66781 tctgacatgt ttcttaggat ttgcctttgg cgcagtgttt tgttcaaggc atgaaacttt
+    66841 ttacatcatt ttgtatgatt acttttaacc ttttatgaat ttatgaatat atatgtgatg
+    66901 cttgtatttt ggcatatagt ttacattggg tatttgggtc aaggagagat cacattgtaa
+    66961 cccaaatgtt tctcattaca tgataataaa ccagtgtcat ggttcagtgt taaaacaagt
+    67021 tccacacatt tgaattctta atataagcaa ggattcccaa aagatacaaa tgtcttgaag
+    67081 atcctcagga ccattatata tagccactta tgtctacaaa tccaaagtct tgtgtctata
+    67141 cttttatata tataaaagat cttttgttgg tgagctttaa cacagcacat gactctcaga
+    67201 gcatccaccc cgggatgtaa ccgtttccct gctgcgcctg agcttgggtt gtcgcagtta
+    67261 tttgctctga gcataccgga ttatcatacc tgtcatcatc atcatcatca ccaaaacagc
+    67321 aacttaaaag gttcacacac attgatgatc ctctttacac caagtaacta aagaacctta
+    67381 tataagtaca atcttagtag tgaagtacac aagctgattt gcagatgatt gtttaaggca
+    67441 aaggatttac cccatttgca gagttggatt gcattcaaga ggctggtata gtccctgaga
+    67501 ctgagcttga tgatgcgcgt aggtaacatt ctgttcacca ccttcccatc ctcctcctcc
+    67561 tcccatatga tgacttctca caccaatcat atcatccagc taaacattta catttcacca
+    67621 aaagactata atcagaaaca agaaaactag agatgtttcc aaaaatgttt agaaacttaa
+    67681 actctgtgaa agagattgat cgcaaggcac caaatgcatt tattctgtaa ttataccttc
+    67741 attgccaaag ctctattggt ttcaagcaac atttgctctt tattttgaag atccgagagc
+    67801 tggtcaagca tgtactgtgt ctgcaaacaa tcatcatacc accatagtag ttataattaa
+    67861 acaacacaaa ttaatctcat ttgaatcatt gattccatgt ataaagatac cttgatggac
+    67921 cgaacttgct tgagagagcc gtccagttga cgctcaagct gctctaactc ctttgaattc
+    67981 aaaggtccta aatcctcccc aagaagattt ctgcaccaac aaaagccaca cacacctaca
+    68041 aatatgaatc tacatggata cagaattgag taaatatacg gaggtagaca gatatgtacc
+    68101 tctgttgacg ttgaaggttc tcatatctac ccttaagctt cagatattct ctgtagctgt
+    68161 tctgaaatat ttcacgaaaa caaactttta aaaaagatct ttcttgaaat gatatgatag
+    68221 gaatctaaaa tagtatttga attaagaaca cctcaagttc tttggcaggt ttgttgttga
+    68281 cttcaatgga tccatagctg catttctggt accgatcaag tgtcttgagc atgctgaaaa
+    68341 taactcaaaa agagagaata agagtttatt aaaaagatca tacttgactt tggagatcta
+    68401 aacctataaa tcaccataaa aaaaaagctc ttcaagtttt aaagtattct tccatcaaaa
+    68461 ttgaagattg tttccattat ttatctgatt atatacgagt tgatttggat ttgattgttt
+    68521 caactttcaa acgtaaagtt tcgtatttat atttacaaat atcaatagtt caatcctatc
+    68581 atttgcaagc aatatcttac gttttcgttt gattcacaaa ttgtgatttg aaaaaaggaa
+    68641 ttaatcgcta tattttcttg aatgcatgta agcaaaacta ttttaacata taattaaaaa
+    68701 tctattaaac attgttttta agaaaaaaca aacaatcaaa gccttcacag atatacacgc
+    68761 gtttgaagtt ttaaccacca taaagtatga tgaactaaca aatggttaat gcatgtaacc
+    68821 atatattatc attctacgtt ggatacttgt aattttcata gaaatctgtg aacaactttt
+    68881 aagtttccaa tcacacagac atatattaat tatataaaac ttttcagatt gcatatcagg
+    68941 atttggaatc tattcgatta caaaattcac agttctaaat cattgacaac aaaaaaacca
+    69001 gatcttaaat ttcaagaatt ggaaaaaaac cctagaaaat gaaatcaaga aatctaatta
+    69061 actcaacttc gtagaaatca agatcgtgtg atctatatat aacaagtcct tgagacaaag
+    69121 atctaagctc agatctagca aaaactcaat caagaaactg ataaagagtg agagagttgt
+    69181 ttactttgag gagctgcaaa actcatagag ctttccacgg ttggagaaga tgatgagagc
+    69241 aacttcagca tcacagagaa cagacaattc ataagctttc ttcaacaaac cgttcctacg
+    69301 ctttgcaaac gttacttgtc tgttgatttt gttctctatc ctcttcagct ctactcttcc
+    69361 tcttcccatt tctttttgtt accttaattt ccacttttta tgtctataca caaatatggg
+    69421 tatgtaatgt ttatctatat acagacacac aaaaacataa gttgggtatc tctttttcag
+    69481 aggaaagatc tgatcttttt gagtttttat ggtctggaga attgatggta tttggaaaaa
+    69541 agaaattaac cctaatttca agaacaaaag gtactagctt aaggaagaag aagaggaaac
+    69601 ttctgtttgt ctgatttttt atttataggg ttattgattc tttttcctct ctttaattgt
+    69661 gtttggaagt gtgtggaggt tgtgaaatct taatgaaaat tttgttttga atcaatggaa
+    69721 tctattgaga gcaaaagata agaaatgtgt gagggtgagg ttagggtttg gtgtgttttc
+    69781 tcatttctag cccatctctc tgcttcttta ttcttcttta ctttcattcc cacgctctct
+    69841 ttaagcacgt gtgatacagt caccaaataa ttggctcttg tgatacaatt accaattaaa
+    69901 cttgtatata tatgtttata tgttatttaa aggataaatt tttatttcgt attacttact
+    69961 ttagagtttt cattataggg ttattattta atttgaacat ctatgtgttt ctcctcaggt
+    70021 agaattcatg tttgtgtata tatgggtgtc tactatttat ggaatattta tacaaaacat
+    70081 ctaaacatat cattgcttga aaattctatg tttatatcat aatagcaaat tgtttatgtt
+    70141 tagttatata taacacaaca atagtactct aagaaaaaca gcatcatgca cgtttgatgg
+    70201 ttgactagta actagacaca cgaggcacaa aggttatcag ttgattaaca tgtgtatgta
+    70261 tatacggtct ttcacttatc acattacaaa cttgaaagta aaatttgttt agcgatatta
+    70321 agaaagttta tatatagaga cctcgtacta gctggaaatg tatatctaaa atgaaagtac
+    70381 actccctctt acatacagtt taatatattt tcaacataaa acatatgtac atagtactag
+    70441 cttatttttt aaccaaaact tttgggtttg atagtttaac aactcggtca agaatctcat
+    70501 attttcttac ttcaaataat tgccaatata ttctagttct ttttgttttt aaattatacc
+    70561 tggtaatttt ctaacgagag tctcataatt gcaagttgta gtagtcttgc agctggatat
+    70621 gaagcgcgtg aattaaaact aaataatact tacaataaat gaaagcaacg tatcatatca
+    70681 ttttctagtt gttttcagaa gccgcagaaa agcatcttct accttaaaat gggtttccat
+    70741 gtgtcgatgc cttattggat catactttct tactttttaa ttcagtttaa aataaattga
+    70801 aaaatgagat aaacatttaa gtggttcaat tatcataaaa aacactaaaa ctgataaaat
+    70861 agccacataa tataggtcat atcaaataaa tgagaaataa tatttgtatg cctaaatcaa
+    70921 gttatccatg tttgaaggtg tttgcagtga ggtatatatg tttttgtcta ataaagatta
+    70981 cagttatgat tatgtaacat aaattagata atatttacag ctcgcgatac actacaattg
+    71041 aaacgtggtt aattagtgaa actgaaattt gaagaagagg atgaaatatg aatgatattc
+    71101 gttaaaatga tctcttagat tgtatagtca ttgtccccca ttgccaaaag atcaaccact
+    71161 tacataaagt aaaacgcatc tttagttaaa agctaaccaa aagagtggaa catatttatt
+    71221 tttggattgc atttgcgaat ttcatcaata tatggttttg ttcaattggg aaatttaaat
+    71281 aactccacta aaatgtacaa agtaattttt tttcgacaaa gaatctatga agctaaaaaa
+    71341 tctacccata agtcactttt agcagaaata ccaccatata gacaagggat tattgttgat
+    71401 tttgccattt tttatagaga tcttttgaaa ttgccatttt tactaaaatc ttacggataa
+    71461 gccattttct aagatttaaa aatatatgga tttcggactt tttttggtaa aaatacgtag
+    71521 tttacacaaa atactaaacc ctaagaacaa ccacaaccga gtaaacccaa acataatgaa
+    71581 taaaacgtat attcattcac gtaacctgaa aaatggcaaa attcacaaac tattaaaaaa
+    71641 agggcaaaat ccacaaatgc catcccaaaa aatggcattt tcataattga ttctatagac
+    71701 aattattttt ctgcctatta actttatttg ttttgataaa gataaccaac ctactatcag
+    71761 ttatggttga ttttttttat tcatattaaa aaaaacaaga aaacgaagta actttattta
+    71821 agttaaacaa aaattcaaga aaaatgtgtg tttgcaccaa ttatggtaca tcaactgcca
+    71881 aaaacccacc tatagttgct cactgaaaga aaggtccagc agagcatttg gttactagat
+    71941 gaatttctcc ggtttaatga cgataaaacc cggttaaagt taacctactc agtaattcat
+    72001 aagcatccgg tacatctaag caaagtgcct tatttgcagt aacagatcaa gaattggaat
+    72061 aagagacttc tcgaataaac aagacattct tcaactttaa cattaccatt accaatggac
+    72121 cgattacact tatcatagtg tcaagaagat gaaacaacta taaaataccg agaaaatggt
+    72181 caagattttg agcattcagg gcaagggaaa caacctttgg cttggcaatt atcgcaagcc
+    72241 actgatcttg tgtttgatga gtcactgaaa ccaaagaatc ctcctgattt gattgttcct
+    72301 gttcctttgc aacggccaca agctacattc ttaagatctc cacattttcg acacatgcca
+    72361 agagatctgc aaaacttgtg tttagaaaca tatgatgatg gcaagttata gattcaaaaa
+    72421 caagtctaat gtcattcagc catggacata caacactcta gaacatttct tatagccgtc
+    72481 agcaaccaca atgaacagtc ttaagatcat gtctcattac aaaggtcaga ttttcagaat
+    72541 ccataatcaa gatcaagaat gattcaatac ctgaatattt caaaaatcaa aagatctttc
+    72601 agtgaggaat acagaatttc aggaaaacca ttactttcac atggtgtgaa ctcagaaaaa
+    72661 aaatgtaatc cattggagat tgaaactaaa aacataactc aacaatggat tttttttttc
+    72721 attcagaatc tatccttcca aactacagat cacaaagaaa ctcttaagga tgaaaaaata
+    72781 gtttaaatca aaccctcttg gattaaaatc aagcagctca ttgcaattct aagcagctac
+    72841 tacaagcatc atcattatca cataacaaca aaacaaatcc acacggttta aatccaaaac
+    72901 aggatataac acgacatggg ttccactcag aacatgaaag attgagcaaa aagttggatt
+    72961 ttgtggaatt tcagaaatct tatgtaatga acttgaaaag ctttattacc ttctctgaga
+    73021 agcagcgata aaagcatcaa ctttaggagc agcgatggtg gcaccaagaa gcagagcccc
+    73081 aactgccaaa cccgcaactt cgtttgacgt taaactctgc atcgtcgtct tcctttttct
+    73141 cttctcttct ctcgttcctt cttcttaact tgatgataat gatatcctga atttatttca
+    73201 aacgggtaaa tattttacat atctagagct tatcagatat ggcctattag ctcagttggt
+    73261 tagagcgtcg tgctaataac gcgaaggtca caggttcgat ccctgtatag gccacttttt
+    73321 tgtttttcct tcttttaata atttcaattt tgaccccgta tcttttgatt tcaccaattt
+    73381 gtatcccaaa gttaaataaa tgaatttctc caaacgattt gtagggttta aacaacttag
+    73441 tatactccgt tactcgcttc caaacccgta cggcctcttt agagtttcac ctttgtgtct
+    73501 ctctgaaact gcaacaaagc ttgaagcgat agggtttatt caagcttcac tcaaaccagt
+    73561 cgctaaattt cgacatcttt attcacatat ctatagattc aagctttgtg atttgccatt
+    73621 cgggtttcag attcgtgttg gtaattcaat ttctgaatcc tctgctccta aagagaagaa
+    73681 tctgagatca gcaatggaaa cggatctcaa tgattatact gtcattaagg aaggagaagc
+    73741 tgaggttctt atgcacaaga agaaccaagt cttcttcaac aaagctcagg tttgctatat
+    73801 gatttgattt tctgcaaagg ttcttcctct ttggtgataa ttgattgaat tatgtcaagg
+    73861 gcaatcttag gattgatcaa gtattagttt tgagcatttt atcttggttt aagatttcaa
+    73921 atttagtgtt tttttcttgt tatggattag tgtcatgctt gtgttagtga gaaatttgaa
+    73981 ggcaactgat cccaaatcct tcttagcaat tatgttctct tgtgtgaatc catatggata
+    74041 taagtgtgag ctaaactaat tgcgtgttgt tttatggtta ggttaacaat agggacatgt
+    74101 ctattgctgt gctaagggca tttataatca aacgcaagca agagcatgag gctatgttat
+    74161 ctaaaagagc tagatcatct gggaaagtgg ttgagaagga tgtctctgaa acttccaagg
+    74221 aagaaactcc tactgaaaac ggtgacgata atggaaaaac caatggagaa cacgaagtaa
+    74281 caactcagga tggaccaaag gaagctgcta agaccgcata tgaatctgca cgaagggaac
+    74341 tcaaaccacc aagagtgctt gaggtttacc tatattcttt cctttatgac tgtaatagct
+    74401 gtacttttgt tttcttattt cgttgcttat tatgcgtttc tagttgataa ctttttcttg
+    74461 tgttatttat tttataggcc ctgtcagctt cagggctacg ggctttgagg tatgctcgtg
+    74521 aagttgaagg aattggtcaa gttgtggctt tagataatga cccaggtatt ttttccttct
+    74581 caacgtttgt gcttgtgaag aggttcgaga ctctactttt ctttcgtcct aacatcccag
+    74641 aataatatag tatttccatt ttcctattaa agcatcggtt gaagcctgcc agagaaacat
+    74701 taagttcaat ggtttgatgt ctacttcaaa ggtggagtca catcttactg atgctcgtgt
+    74761 tcacatgctc agccacccaa aagattttga tgtggtaagt atccaatttt tgcaaaatct
+    74821 tttcatctta ttgatcacat aactaacttg gttcagtgat gtcttaggtt gatcttgacc
+    74881 catatggtgc gccgtctatt ttccttgact cagctgttca atcagttgcc gatggtggtt
+    74941 tgctgatgtg tacagcaact gacatggcag tgttatgtgg agctaacggc gaggtctgct
+    75001 attccaagtg agattttact ccttcattac cttattctga aaacaaatat ctttgatttt
+    75061 gtctgaatgt cttaaatctg tctgttgtta gatatggctc ttatccactg aaagggaaat
+    75121 attgtcatga gatggctttg aggatcctcc tcgccagcat cgaggtaatt ttttgctacc
+    75181 gttgaattga ttcactctcg ccaaatcttc caggctttga ttgttttaaa gatgctcgac
+    75241 tggtatttct tttctaaaaa gcttttgttt ttgatatatt gactttggtc atatttttat
+    75301 ttgcagagcc atgcaaatcg ctacaagcgg tatattgttc ctgttctatc ggtccaaatg
+    75361 gatttctacg ttcgtgtttt tgtccgcgtc tacacgtgag ttttcctttc ttttcaaact
+    75421 gttcctctca attaaaggga agtaaagatt gaagctttat gaaatgcagt tcggcgagtg
+    75481 caatgaagaa tactccacta aagctctcat atgtctacca atgcattggt tgtgattcct
+    75541 ttcatcttca gtctgttgga agatctctcc ctaaggtttt gtggctgtct cgttgcaatg
+    75601 ttagtttagt agtttttgaa agctgccttt tctgataatc ctcttctata atttcttaca
+    75661 gaataacagt gtgagatatc tacctggagt tggtcctgtt gttcctcagg attgcactca
+    75721 ttgtggaaag aaatataata tgggtggacc aatatggtct gctccaatcc acgatcaaga
+    75781 atgggtgaat tcgatactaa acggtgttaa atccatgaaa gacagatatc ctgcttacga
+    75841 ccgaatttgc gctgttctta ccacaatctc agaggcaaaa ctctcctcca cactctgata
+    75901 gctttagttt ttggttttga tcaatttctc taaatcccta aatgtttggc ttgtctccag
+    75961 gaattgccag atgttcctct ctttttgagc ctgcatagtc tctctgcaac gctaaaatgt
+    76021 acttcaccat cagctgcttt gtttcgatca gcggtaatca atgcaaagta ccgtgtctct
+    76081 gggtcccatg tgaacccgct tgggattaaa actgatgctc caatggagat tatctgggac
+    76141 atcatgcggt gctgggtaaa ccaaccattt aagtctaaag gcacaagctt tgaatgtcat
+    76201 ttcttttcct cattgataat gaatgaactc gtttgtttga ttgtaggtga aaaatcatcc
+    76261 cataaagccg caatcacctg aacatcctgg aagtgtgatt ctatctaaag aaccatctca
+    76321 tcaggtctct ctttcactaa gccattgttg taatgcttca ttcattgttt atatatttaa
+    76381 agttccaaac ttgaatcttt aaacaccgct ttctttcgga caggctgact tttcgcgcca
+    76441 cgtcggttcg cttagtaaag cacaggcgaa gaaagtagcc cggtttctac caaacccaga
+    76501 gaagcattgg ggtcccaaga taagggcagg tcgtacaatc acaagcaaac atgtatcgct
+    76561 tcttggtcat gaagcagtaa acggtcatct caataacaat cataaagaag caggagacga
+    76621 agaagaagaa gaagaagaag aagagcccga agaagatatc atcgagggtg agccagagct
+    76681 caaacgccaa aagacaacag aagattttgc ctcaacatca tagggcgaat gtttacacaa
+    76741 gaattttata tttttgtttg caaaagattt tctcttttct tttcttaaac atttgttatc
+    76801 acagttctgc gtttttgatc ctagcacttc accatttttg cagacttcta caacaaacaa
+    76861 cttataagtc tctattggat tcttcagatc tgttagagac tatttaaaaa aaccttgtaa
+    76921 tgtgtaagtt ttcatcagta acacttttct atgaatcttt catcattatt tccagttttt
+    76981 attggcaata tgattgatac ataaacgagg aaaacaataa tcatggagta tcattaactt
+    77041 ttgatcctaa gcagcaaaca aattgtataa caaacaagtt cattaacgat aaaacagaaa
+    77101 gaagagcaaa taagtcgttg tagtatagtg gtaagtattc ccgcctgtca cgcgggtgac
+    77161 ccgggttcga tccccggcaa cggcgttact tttttctttt tccttttttc cttttcaaag
+    77221 ttttccacgg cataatcgta aagcccagcc cacgagtgaa agtaacagag acgatacacg
+    77281 tcataatgtg atcatcttaa aatctttttt tatgtataga gagatacgaa tcttttaacc
+    77341 ttgtcaaatt ttgtttttct tttctctctg aacccgaaaa ctccagcaat gtcgtcggcg
+    77401 aaacttttcg gttgttctat taacgttaac gttgaggcag aggaagaaga aggcggtgac
+    77461 ggtggtagct caaccaacgt ggaggtttct cgatctggta accaaccgga ttgtgaagct
+    77521 atgagtttct ccaatcagat ggaaatcggc gttcgcaata catattacca gtttctagag
+    77581 tcgaattcag attctggatc ggattctatg tatgcagaac ctgaattcat cgatttcttc
+    77641 gatcgagagt cgtacgaggt cgacacggtc cgtgaggttt gtgtaagttc gaatcagagg
+    77701 gttagtactc caggttattt caatatttgg gatcaggatg tagatttagg gcttggaatc
+    77761 gggttaggtt caaggtcggg ttcgggtcag ttacctggtg attcaggtgg ggttggggtc
+    77821 gaagttggta ggggtgttac tcctgttgag tataatctct ttggagagga agctatggtg
+    77881 gttgatgaag tattggaatg ggagaatttc aataacgcta tccacttggt tcaagaacct
+    77941 gcctatgcta gtatggaggg agaagaagaa gaagaagaag acgaagtagt aatggaattt
+    78001 gcagcatcca tttatagtga tgcttgggaa atattattgt acgataacat gacaaactct
+    78061 gctcctatgg atttggatgt tgaagtttgg ctagatagtg tagatggata tgctcctatg
+    78121 gattacaatg ctatcatagg acaaatgttt gataacgaga ctggaatcaa aggaactcct
+    78181 cctgcatcca aaagtgtagt agatggtctt cctgatgtgg agcttaccat tgaagaattg
+    78241 agcagtgtga gcattgtttg cgcgatatgc aaagatgagg ttgtgtttaa ggagaaagtt
+    78301 aagaggcttc cttgtaagca ttactatcat ggagagtgta taataccttg gttggggata
+    78361 aggaatactt gcccggtttg tcggcatgag cttcctactg atgatctgga gtatgaaagg
+    78421 aagaggagag catgaaggcg tggtagtggc gggttggaaa gggagtcgat gctcgggtag
+    78481 agaaggagta actgtgcgtt atatcttcga taattgtcta atcagtaacg atctgttcct
+    78541 cgttgatatg atgtacatat tggttctaga taataggaag catacatttt ttagccatat
+    78601 ctttttgcaa gttatttagg ttcattaatc ttattagact ttgcaggtaa ttgcagtcag
+    78661 aggttatatt taattgttga tccgactttg attagttcta aatatgattt ttttcggatg
+    78721 tggttttgtt gctttaacag cgcaagattc tactgtaaaa cgtgaaagaa agtgaaaagc
+    78781 tcaatcatgt tatttttacc gtttgtgtta tctaggattt gaatcaatta gagatgagat
+    78841 gttgttgcca tggggctaat ccattgtctg atgtgatgaa tccggcggat ccagagtcaa
+    78901 gtcatgtgag tctgtgacca ccttgtggtt aaccaaaaac atattaaaac gatggatttg
+    78961 gtccggttaa acgtggtctt ctgggaatgg gtctcatatg gacctgatct aatccaaagt
+    79021 acgggccttt tagtccacta aagaatctct agtttaaatt ggtgtaattt ggttttattt
+    79081 ggaagctaca aatactaaga atttccacat gaacgtgtat ttgggtatcc acacgaccat
+    79141 actctataca caattttttt tgttcgcatg cgtttgaatt ttgccaatta ggtcaatttt
+    79201 taattagttg tttaaaagct tttgaattat ttatttatat tttcatgttt cattttcact
+    79261 aactacttga aatatgaacc gaaagtccat tagcaataag attgttaaac aaaactaaag
+    79321 ttaaaagtca ttttcactgg cagtggctcc ctcttttctc ttatagtgac tctacatttc
+    79381 taaagagcca aacttcaaac attgagtgtc gatttttttt aataaaaatt ctttttaata
+    79441 atcttagatt ttttttaatt tttttttctg gctcttggat acccatttgg ctcaacacta
+    79501 tttgcttatc ctctcatatg cgataatcat gtctcttgtc tcattcgatg taacaaatac
+    79561 tccacacaga aaatttctta acttctaaaa ctataagctc ctctatgaaa tcttaatttt
+    79621 aaaccttgtg cgtcatttac gttgttcaca tcaaccatag agagttgcaa ttaactactt
+    79681 gcaaatgatc tagtatatgg aatttggcaa gaatttccat gttcctcagt ttagactcat
+    79741 cttaccaaag ctatatacag tatatcttta aatgactaaa catttgtgac gttttaaaac
+    79801 tttatttgtg tggatatgtt tcgaagtaca aacggattaa ataagtgaac tgcaatgaaa
+    79861 aaagtataag aaaatcaaaa taaaacgtaa tgtagttgga gtgggtgaat aacatcacat
+    79921 caaaacaaat tcatcacacc atttgaaatg ttttcattag ataattatta cctcaatcat
+    79981 tttgggattg acatttgcgc tcgagtcttc agtgacgttc tcccactttg tcttcttgtc
+    80041 tcttaggatt ctctctcttc tccttaaggg acattaagct ttctagacca aaatgcacat
+    80101 caactacgta ctctcaacca ataataacta gacaagatta tcaagttaaa aattcaacgt
+    80161 aaatcatatt ttagactaat aatttcatgg ggtaaacttg tgattgtgtt gtgtgattaa
+    80221 atttgcatgg gtaaaatttg ttatcttata gttcgttttc ctactcaaat tgtaaaatct
+    80281 ggctttgata tatacaaaac ataagcacac taattctttt taaaaactaa aaagaactaa
+    80341 aataaacaaa ggaaaacaca gttaggcaat gataagtggg ttatttgaga attcgtaaaa
+    80401 ttattagcat aatcataact attatgatta ggactggcaa tatatagttg taaaaattgg
+    80461 cagtaatagt ggttagtgga gcaagacctt agtaaagttt acaagtatct tagcatattt
+    80521 gttttggcct ttctccatat ctttctatta aattagagaa atgatggact acggtcgtgc
+    80581 ctaagataca ataatttaat agatcatgta gttttgtaat atttgattat ctgataaaga
+    80641 ttatttaaac aagagatcaa atttacaccg caaatattaa taaaaagtta ggtatatata
+    80701 tatatatata tagttaaaac atatttataa gtattcttaa tttggcaaat tggatagaat
+    80761 tatttaaagg tacgaaaaca aaataccata gtataaaatg gatatacaat tataatattt
+    80821 tcataaaaac aaaaggtgta gaattataaa aattatcgta aaaggtgtga tgatacaata
+    80881 gactttgatt tggtgattgg cttatcctta atccaccaat caggtgtacc atcatgacat
+    80941 catccatttt tcattaaaaa aacattataa caataaatta atatgttgat gatttttgga
+    81001 gtttgcttaa caaagaagat cgtggaattt cgaattctca tatggtatta aagattaaga
+    81061 ggccgatgaa tataatctga ccaaaaaaga gtttcaagaa caattaccaa cgagtcaaaa
+    81121 tataatatat atgttagaaa aatgtgacaa aagttatatg tttttgtttt agttagaata
+    81181 gcatgtttag atacgacaac atttacatgg catgtactat ttagattaaa ttacagttac
+    81241 acaaaattat aaatttctta acagaataga agatctaagg gaattctttg gtaaaaaaaa
+    81301 tgaaaaatca aggtaggcca atttcaggtg gtagtgtcat tgatgggacc aaaacaatga
+    81361 aacattcaaa aacactttat agtttatgat ccagtgaaaa ataatactaa aagaatatca
+    81421 gaaaaggaaa cactaagaaa aagcaaattg gaaaaaagta gaaaatgata aaataagaaa
+    81481 aagaacttga ggctgtggaa gaagaacaaa ccgtgcaaag gacatgagac tgaaataaag
+    81541 atgaaataga ctttgtctaa atgatatgac ttgatggtgt gtatttgcct ttcgtttgac
+    81601 taacacagtg atatgcacac tctattttat tctaatttgc catattatgc atcaacaggg
+    81661 aatttcaaag tttaagtcag tcaaatattt ttaggtgttg tgaatttcac attacaaagt
+    81721 tataaagtag ttttaaagtc accagatgag aatctatgag atcaacaagt acagccaaat
+    81781 gaatatccag tctgatgaat taaaaaaaac tatactacac atgttcaaca taaactatag
+    81841 tgtttcttga atatatgtgg tttcctcaaa tagtaaggtg catcagtaaa acccgaaatt
+    81901 agttattgaa ataaacaagc tccattattg tataactttc agtcgacacc caaaaagtct
+    81961 atctaaagag gaccaatatc aatatgcaca tatttttaat taacattgcc tttctatgga
+    82021 tggattatta taaacccatg catgcctaag tttttggtta tatttatctt aaagaaagat
+    82081 atgtttttaa tagtagaata gtagcaagaa agaaaagagg agaccaaaaa aacgattttt
+    82141 tcaattctcc aatcattaat tgtacaaaac tcaaaaacga ttttcctaaa gtttgatatc
+    82201 gttatataaa aatccagaca tctattctct taatcttgac caactaagag ataacactcg
+    82261 ctcattttat aacttagtta gttagttaaa tcaatgaatt ttcttttggt tttagtttgt
+    82321 aatcactaat caatgatcat tatacaagtg tgtgtggttt taaactaaat aaaaggtatt
+    82381 gttttaagtg gttgttgagt catcggctcc tcaattgtat tatagcctct gccttagcct
+    82441 ttgtaatgaa catcataaaa cgacaaggca agcctgccta acacatcttg ctttcctagt
+    82501 gcaagtaagg agtttgtttt tgtaaatagt ggtctttgac catatatcta tatataataa
+    82561 aagaagagtt gaattaataa ttctgttctt gtatcaatat gatcttcttt gtattgattc
+    82621 tataagtata aaaacatacc acttgagaca aacaaaaaac cataaatagg gttaaaggat
+    82681 gaatcatgtg aatagaccat gcatggttcg agtatttcca ttgcgctaag gactgaagac
+    82741 tggaaatgtt ttcaatgaca agaataaaca agtatttcac atatctctaa gattaattag
+    82801 tactatatat gatatgggat gattggttaa gagatcatgc ataatcaatg ggttactaag
+    82861 tactgtagat ttgggttttg tatacaatgt atattgaaac ttcggtatgc catgtttgtg
+    82921 tgtggaaaaa ataggaaaag attgtcattt tttcggtaaa attctaattt attaattatt
+    82981 gtattactaa catttatata ttaattatta agtctttagg tttgctttat ttttgtttac
+    83041 gtctgtctgt gtgatattag tattatagta gaagaccaca tttttttata gtttatctag
+    83101 gacctaatat atgcagaaat caatataata attaattaaa gaagagaagc aaatcttaaa
+    83161 ggtcctatag atatataatc ttcttctttt ttctaaagag gaagaagaag agaaattggt
+    83221 gattaatttt acgtagatgt tagaactaat tcatttagaa aaagatatat actttataaa
+    83281 ttgacttatt tggcatcatc gccactttat atattggcta acaaaagcat cgtgtttcga
+    83341 aactaaaata attttgaaaa ccccacaact aaaaactaca atttttttta tctctgatgt
+    83401 aagcacataa cattgatcta aagcttgatt ttatccacaa tatatatgta catatggtac
+    83461 ccaagtcctt atgtgtcatt atcatcataa tgcatgaaca tgatatagaa accttttcca
+    83521 gttcatgtcc tgcatgaaca tcatgatcat gtttcttgta tgtgtatcgt ttagagatac
+    83581 cactagatgg ccatggccct gttagtctgc tactcacatg tacacgacaa gatgccacca
+    83641 cttggttgtt aaactaatcc tataatgaag tagcaccttc acgtgtcatc atatatacac
+    83701 atccatatat aatttatgtg aatatgatac actatgatct atgcatacaa gccatgaagc
+    83761 caaggttact ataaaattca agcattcaat tccttcccaa gtctcattct catgcatgga
+    83821 caaccaatga atatacaata tatgtatggg gacttctaac caacatgttt atcgatcttt
+    83881 acaacagcaa agtagccata gcagatttac acagcaacgt gtaaatactt ttttaaactt
+    83941 ctctcagtaa gtcatttaca agaaaaaaaa ttatgtccat agccaatggg ttatgtctta
+    84001 tatcgacata atctaaatta ttttttaaaa agcaaaaccc tcttaagcca atgtcacgaa
+    84061 agccaccaat ttgcaaaaaa agttgtggtc cacttgtatg cctttctcag ctgtataaac
+    84121 actccttctc atccaatctc caccattaaa ctcaccctct ttgagttcta aagagagaga
+    84181 gagaaattga ccatgcagcc aaattatgat agctcaagtc ttaacaacat gcaacaacaa
+    84241 gactacttca acttgaacaa ctactacaac aacttaaacc cttcaaccaa taacaacaac
+    84301 ctcaatatcc tccaataccc tcaaattcaa gaactcaacc tacaatctcc ggtaagcaac
+    84361 aactccacaa cttccgatga cgcaactgaa gaaatcttcg tcatcaacga gaggaagcaa
+    84421 agacgtatgg tatctaacag agagtcagca agaagatcaa gaatgagaaa gcaaagacac
+    84481 ttagatgagc ttctctcaca ggttgcttgg cttcgaagcg agaaccacca gcttttagat
+    84541 aagcttaacc aagtctccga caacaatgat cttgttattc aagagaactc gagtcttaaa
+    84601 gaagaaaact tggagcttcg tcaggttatc acatccatga agaagcttgg aggaggcata
+    84661 catgacaaat attcttctcc gtcgtctatg gatgaattgg atcaagactt ttcttctatc
+    84721 acagatgatc caagaactca tcatccatca tgagttgttc tgatcgagaa aaacattact
+    84781 tcaagtttct gcctctaatc ttatctattt tctttctttg gtatcacttt ttaaaccgtt
+    84841 taactacgta ccaaagtttg ggttttattt gtttttctgt tgttttgtca gaagataatt
+    84901 aaatgatgga tatttgaaga tgatgctgta aacaaagatt tataattata tacataaatt
+    84961 tggaaaagaa gagtataatt ttataataat tatacgtaga gaaataaaaa gaagacaaac
+    85021 aactaccacc aaaccctctt cacccaatag agaagccaag tttgatatac ttgaagaaaa
+    85081 gtaaattatt caaaaggccg cggatcttgt taattattct gcattctcag tgaggatagt
+    85141 tcgaggagga tctttctgaa atcaatgaga tgattaaaaa tatcattcat ttacttaaaa
+    85201 ctatccgttg tgcaattatg aagttcgttg tcgaaatgct tagaataact catcgtgaat
+    85261 gataagatta ttatcttttg ataagataaa gatagaaaat gatcatgcaa caacaaaaaa
+    85321 ggaaacttcc tgaaagaaga taagcttagt tcggaagact acttgggccg tcccctgaaa
+    85381 cagaagttta tcgcgcaaga ataattgggc cttatatcag atcctttggc ccagcccgtt
+    85441 agtattagac aagaccagtt gcgtgcgtca ttgttgaata tttcgaagtt tgatgatcga
+    85501 tcgtcgctca aaatttaggg cttttcactt cacaatcccg atttggggtt ttctggtaaa
+    85561 ctcctttaat ctcagctcgt cagacaaaat cgatctggga tgaatttgga tctaattgag
+    85621 caaaaaattg tgttttttca atctattgaa tttcacatcc ttaaaccctt gcatattcaa
+    85681 tctattgaat ttcgcatatt tttcctcaaa tttatccaaa atcatcccaa atctattgtg
+    85741 acgggttgaa agtaagtttc tttatcgaaa atcagtttga atgaatgtta gggtttatat
+    85801 tttgatggtg actgattccg atttgtataa atgttagggt ttatactggt ttctgactct
+    85861 ctggtaagta tagagatgat tatatattaa atggtgtttt aaggcgtgtt cagatatagc
+    85921 tgtgcttgtg ttgttctgtt ttggcatttt tcatttgttc tgtctttact tcttcttagt
+    85981 cagtctgtgg atttcctttg taatggttat tggttagtta ggaactactc tcaggccttc
+    86041 ttcacatgac accaaagttt tttgtggatt atgtggagga aattgctatt ttagtatatg
+    86101 gtcattgaag caagataaga accttagaaa gcagagtgtt ataatttttc tgtcaaactc
+    86161 atgctgtagt tgcagacttt ttcctacttc agattaagga gaatggtttt ttttttgtaa
+    86221 gcaagaagtt aaaagagtga gaatctatga aggcccaggg accatgtaaa gtttgggaat
+    86281 ggttttaaaa tttgatggct ttgagaatcg atctgaacta tccttagatt tttggaaaag
+    86341 tgcaaaaaag tgcaaagtgc agcatcctta tatcacttta tgaacttttc acgtgaggtc
+    86401 tcgaatccat cacttaacat taacacatag tataagctat acacttggtt gttattactt
+    86461 atttcttctt acagttcttg aagccgagaa catatatggc tgatggtttc tacaattagg
+    86521 aacgtgatat gactcatttt agtaagaagt tgtgatttac agctgaggag tctgtgttat
+    86581 caacttgaat tgtagaatct ttttgttgtc agggaggcgt gaaagtgtca catgttcagg
+    86641 ttatcgggaa gttattggct ttgaaccttg gcttccctct tagtagttag caaggtgaaa
+    86701 tctgcgtttt cacttcttga tgatccatat ttcacaaatg gggatctgta ccaattagac
+    86761 tggttttgtt taactaatgc agagaaagaa gaatactata gttttaatta gccaaaacta
+    86821 caaacccatt tgcacaacag tagtaatact tcttgaagct aagaatttca ccaaacctaa
+    86881 cataatctca aactaaaagt aatttacagc ttgcaatcta gtttttcttt ttgccacagg
+    86941 agtatcagaa tgaaggaaca atcccatatc ctgtgttgta cattagcatc gtgttgaacc
+    87001 cttgctcctc ggcttcgatt tctctctttg cgaaccggcc attgacccgc ggtcttatct
+    87061 ctgcatatgc cttcctcgaa gcatacctta ttgtcttctc aaatttcctt gtcttcctct
+    87121 tctctctgta tctcaggacc ctggcttctc tgtccattgg actgagttgt gttactgtta
+    87181 tcatctggct tgcagggtca ggttgttgct ctactgtccc tttgggcgtt cttgggtgtg
+    87241 aagctgttgt gacacatgct gttgactccg gcacaacacc agtttccatg gatgaaatgt
+    87301 atgcctaatg acagaaacaa agacattctc aatccccctg gtaagaggac taaagagagt
+    87361 cgatgcaaaa ggaagcatat attgttttta ttatagactt ctggtgaaat ttctatgtgc
+    87421 tatagatcaa ctgttggcag tggatcagtg gatttgtgtg tatgatgaaa gaattcaccc
+    87481 tgctgcgtta tgggaagatg ctaaattgaa ggggtaacaa atatacaaaa gccttacgtt
+    87541 atggttaatg gaaccattgt cgttgtagtg agtccctgag gagccatatt tgatattgaa
+    87601 ctgaaaattc tgttggttat ggcactggtg gccccttgat tcttcaagtt taagcggaac
+    87661 aactctatct cccccgtagc tcgtctgtgg tacgctgcag ttttgttggt gttgactgta
+    87721 ttcacctgtg aatttgtagt ccatactcga gttgtaatcc acaaggttta gatactcatc
+    87781 actaaacaat aacccattgt tttggttatt gttatttttg tctgaattag ggaacagcca
+    87841 cgaagcaacc tccttggcat ccttatcacc ttcttcacct tcctcttgat ccaccaccag
+    87901 tctcttctct ggatcggtca ttgttttctc gctttggtgg tgagtagtgg tcatggagct
+    87961 gaaagagttt ccagaaattg gtagaattgg aactcgctga tggcgtctag caagtgggtt
+    88021 tgcagaatga acctctgaat cacaggctgt gcatagagag gcatcatctg cctcacacaa
+    88081 aaaagcagcc ggagcacgct cacatgactc gcagacccgg acacgtttat ggcgggaagc
+    88141 aacgcgattg gcagagtgaa cttgagcatc acagctcatg cacaagtagg cagaatctgc
+    88201 atggcaatac acggtgcagg cgtttgaccg gcatgtgtca cagggtcgtg ccctgttgtt
+    88261 ctctccacta cctatgtcgt tactctcttg tttcaacata ataactcaga tgtagtaagt
+    88321 ttgatggtgt gggagctggt atctgaaaga aggggctaat aaagcttata tctggtgtga
+    88381 gagaataagt agaggggaga tcgaatcctg agtgcctaca cgtggcacag cgttagtggc
+    88441 tcttacatgt tattggatcc tcttgcagct agttgagctt ttgggaaggc ctatgtgtaa
+    88501 agtgtaaagt gtaaagtgta aagtgtaaac atctcctcct atgcacttct tcgttgggac
+    88561 ccgttcacaa gttgcttagg ttttgccact tgccatactg agattctcaa gaggaagtgg
+    88621 tccggataat gccatggtgt tgcaggcaaa ttgttactat tttttcgtat caccttttac
+    88681 tattttcata tgcattttat ttaaagtaaa gctagattcg ttttatctct ttggctttat
+    88741 acaaagttta agtggttgaa tattcatcct gaaccttgtg attactgttc aggtatctct
+    88801 ccgagtaagg cttcgagtgt cagagccatc accaactgtt atgggttgac tacctataca
+    88861 aaggttagac aaactcgtga atatggtata gttgttgttc cattatttgt gaatgcatct
+    88921 gcatcattta gaaagatttg caattgaaac tttcgaagga ttttctgatg aaaagtgtta
+    88981 taatgtttaa tctggttcat aaagtcatgg taatgagaat catatcggaa aagtgacatg
+    89041 aaaaacatag agacggtgga ttttctagga tcaaactcga atccgccaag tagtcttgag
+    89101 actacaaatt agaatttacc tgacgcacaa gttaaaactt aaaaggcatt ttggagtcca
+    89161 catgagcaat aattagattt atcatatata gtatgtagaa atttgcactt ggtaaccatt
+    89221 atagtaagta gttacaaaac atgactcatg agtgttgtca catctctgta atttgggttt
+    89281 atgatgatac tctatactat ttattgtagc tttattagat ctcaacctgt ggaagtaatc
+    89341 taatacaaac gacacttgta tggaattttc aggacatgtt tctaagatca cagaggtaat
+    89401 tgtgaagttt tctgctacac tcatttacta gtccacatga tccatagatg caatttaacc
+    89461 atcttcactc tatttggtga cccattcatg tgtttagcat cgtgggtttt cctgtactaa
+    89521 taaccattag tacttggagt tgccttgttg gtgttatgct cataatgttt actatgtgga
+    89581 agctacagga tacgatactc tctaggtttg aggactagct ccagttacgc ttttgggtcc
+    89641 agtgaatgta tgatacccat tatgtggtcc aaaaagaatg agatttgacg gacgctctag
+    89701 tggccaaatt catatggcta ccttcactct atgccttaag agctgaaaag tgagtgtttt
+    89761 ttttttgtat aaaatgctgg tcctaggata cacttttcgc atctcttcct ctttcatatt
+    89821 tgcttacacc gcacttctct ttcccatttt gaactttaaa ctttgaaaag tttttttttg
+    89881 cttggtaatt ggaacttaac atacaagttc ttgttattta ctggttttac gatgaatgta
+    89941 atggcattta gaattgttat gaaatggtta tatgatcatt gaattctaaa gaaaagattc
+    90001 taggttcttt agtaagcatt tttggagatc tttatcctgc caagttggat aagttgaggc
+    90061 atctttggct tttagtgctt tggtgatgaa attttttgga gattgtaacg tggttcgagt
+    90121 tcatgaagag taaattgtgt attggccttt tcacttgtag ctcaagaaaa aagtagtttt
+    90181 gcttattatt gctcgttgac agtctctaat taagtttatt gttgtagcgg tttggaactt
+    90241 ctctcacatg tgtaggcctt cggataactg ttacgagtaa tagatcatat gtaacaatat
+    90301 cctcaaagtt gcatcgagat tttcaagaaa acactattgc ttatttgttt tcattcttta
+    90361 tcaggtttcc gtggtttcga atactttgct tctttcagat taacgaaaac cctatttagt
+    90421 tgcaaagact gagaagagaa taagagtcga gggaccatat aaagcttgta attgtacgtt
+    90481 tatgcggatt ctccactttc catggctgtg agaaactatc tgaggaaaag tgcagtgacc
+    90541 tttggcaaaa ggtaatcact tttatgcact ttcacgtgct agctcgaatc cacaccaaat
+    90601 agtaacattc tattggctct gaaactcata gtgctccaag tggttgtcat tgcattttaa
+    90661 aaattcctga caagaaattt agtagaaact ttgctacttc aagtttcctt ctttatgata
+    90721 tgctagctcg tattatatgc aacatgggat atctttgttt tgcttgtgtg gcttataagt
+    90781 ttatgttgca tgatcaaaac agatcactga ataaattttc tggtggtctg agaactcgct
+    90841 gcagtgaatg aatgagatcc atgtaagtgg ccccaaaaga acataattta caacactctg
+    90901 gtaggtaaat tcatgtggat actaaaactg actcatttat ggctaaaaag tattgatttg
+    90961 gtacatgaat tcatcagctt aaccgttgaa gtgggaagct atcctagtgg taaataagtt
+    91021 gatctctggc cctttccagg tggcaaaaag tgaaggcttt acagtgtcac cctttttctt
+    91081 ttggttgcgt cgttattctt tagtgccagt atataccttg gaaaaagtga gctattattc
+    91141 taaaagatag accaagaaat agatttctca cactgaatat ctttgttcat ttggatggtc
+    91201 cataggcatt gacatattct gctagtagtg gttgtgagtg atagaatctt gttatccgag
+    91261 acaacctcgt gggaatggat ttagcaccaa tgctaatcac taatcagtac aagaatatgc
+    91321 gttgatgtaa ttgtacctat tagcattgtt cactcaggat tcttggttat ttgttacctg
+    91381 actattcagt gtgtgatttt attggagttt tacatgttta cacttgtctc tgtaatctac
+    91441 ttttggtata aaatcatatt gagtacaact ttcttttctc agggatgtgc gaaattgtct
+    91501 ttggggcggc ataagtggaa tcatttatct tctacgtgat aaagtgatgt ggaaagtgtc
+    91561 accaagaaag gaatcattaa ctttgaactt aggcaaggtg agttacgtgt tgactgttct
+    91621 tggtacagct agctatacta atgcatatct ctttgtagca tcagtttgaa tttagactgt
+    91681 agggcagtat ttcatagata agaggacttc taccaagtgg gcaaaatttg tattaactta
+    91741 cagcagctaa gtatgagtgt atatctaatt aactcgtact agttaggcaa aactagtaaa
+    91801 ctacagaccc tatatgcata aaaccgtggt aataacttgc aggaatttca aaccagaagt
+    91861 aatttacaag catgcagtct agttttgcca tagcagcatc agaatgatgg aacaattcca
+    91921 tatccggtgt caaatgttat cattgtggag aaagcttggt ttgcctcttc atcgacatct
+    91981 ttcttctttg caaaccggcc cttgatccgt ggtcttttct ctgcatacgc tttccttgaa
+    92041 gcatatctta tcgtcttctc aaatttcctc atcttcttct tctctctgta tctcaggact
+    92101 ctagcttctc tgtcccttgg actgagcatc tgagctggag ggtaaggtgg ttggtctgtt
+    92161 accgctttgg gcgatcttgg gtttgatact gttgcgtcac tcgttgttga ctccggcaca
+    92221 actcccaggt ccatagatga aacattcacc taatgacagt aacaaagaca ttcttagaaa
+    92281 tttacttcat ggatttgtgt gaaagaagca gcatgctggt catagtaacg gatctgaatt
+    92341 aaatatatga aagcttacca tatgactgag ggaaccattg gagcttcgaa gagctcccca
+    92401 ggagccacag ttgatactca gctgaaagtt ctgttgctct tggtacatgc cctttgatac
+    92461 ttcaatttga agtggaacaa ctccatcttc cccatagctc ctctgaggta cgttgcagtc
+    92521 tagttgatac tgattggatt gatctgtgaa ttgcttatca ctcgaactat aatcaacaag
+    92581 gttcagaaac tcatccccaa tcgagaagcc attgttgtta ccactgtttt tccctgaatt
+    92641 aggcaacaac catgaagccg cctctgcttc atcttcatcc tcttcttctt gaccaagcac
+    92701 aagtctgttc tctggatctg tcactgttgt ctcacatgaa tggttagtgg ccgtggaact
+    92761 gtaagagtac tcagaaatgg gcagaattgg aactcgttga tggcgtctag caagtgggtt
+    92821 tgcggaatga atctctgaat cacaggttgt gcatagagat gcagcatctg ccttgcagaa
+    92881 aaaggcagcc ggggctcgct cacatgattg acagactcga acacgttcat ggcgggaagc
+    92941 aagacgattg gcagcatgaa cttgagcatc acaactggag cacaagtagg cagaatcagc
+    93001 ccggcagtac acggtgcagg cggctgatcg gcatgtatca caggcttgtg cccagttact
+    93061 ctctactttc aacatactaa ccgcagatgt tgtagttgtt tgatggtgag tgagatcgta
+    93121 tgtaaaggca gcaggggact aagaaggcta tattagttgt tgaatggaaa tggaaaggaa
+    93181 ggatagaatt ctgagggctg gacacgtggc gagaagtttg tgggtttgat gttgagtgat
+    93241 catcttgtgg ctggctgagt ttttgagaag gggtccagtg cagagatgct tacagctttg
+    93301 gaatcatcaa ccagcacgaa atgtgaaaga aagaaaaaaa ctaattgtct tcttatatct
+    93361 ttgcaatggc accaatagtt actacttttt ctctctttct tcctcaataa gtgtaaacac
+    93421 ctcttcctat gtaatcttct ataggaccca tccacatgtt acctaggttc tgaaacgtgc
+    93481 aacttctaac tatttttgta tcatctacta ctattttggt aaagctggaa actttagtag
+    93541 ctctttggct tgaccattag cctaatgttt atcttgattc ttaacttttg ccatgttcta
+    93601 actgttcagg tttctctcat cgtctcatga tttcaattat aaaaattaaa tgatcactgt
+    93661 ttagcattgt taaaaagatt atgactctta accatttgtt atcttcgaat gatgctaggt
+    93721 gtcttttttg gtgatatgtt ggagataata gatatggcat gtcctaactt tgtttccttc
+    93781 cacatcaagt cggtcaagct gatttggcca acggtggaga tccaaggagc caattggaat
+    93841 ctgccaagtc gctacgagtc tctagagatt aggagccaca agttgaagca aatggtgggt
+    93901 ccaaattaag gtgaataacg atagttatat caacatcacc tgtggtctgt ttatacaagt
+    93961 ccaatttgct tatgatcacc atcatatttc atagttacaa aacatgtata agtctatcaa
+    94021 gtgtttgtca catctcaatc acctttttga gaatatgata aaactcgata ttatctttac
+    94081 tagcttttat tagatttcaa acttgtggaa gtaatcatat tcatgtgttc aaatctacat
+    94141 gggtcaaata aatttggtgg agcaacaaag tttgttttta tttacatgat gttctactga
+    94201 tgcaattaaa aacacagcaa ctaacaaagg taaaaactga ggatcaacaa acaaagcttg
+    94261 ttttcctttc ttggcagtca gaaagggcta atctcacgag ccagtttcag taacaactct
+    94321 ggtacaagac gctttctggg aggagccaac gagtctttgg tcagcccatc attgtcctcc
+    94381 gcatgtatca cagagactat gtcatcaaag agttcgtctt tacctcctcg taagggatcc
+    94441 tataaacaat actctcatga gttttattgt actcacactg agctaagtgt ttgtgtgtgt
+    94501 gacacaatgt aaggtaagtt gactgattta tgtggcttac ctgaagaaat aagtcagtat
+    94561 gtgtctttcc gctgtataaa acaagctcgg cttttgctcc aacagcttgc agagcgtctg
+    94621 taaaagtttt gctgcaaacg aggtggtttt agtatcccaa aaggtttcat cagtaatgta
+    94681 tcacttcaag aagatgatca tatatacctt tcatcgcatg gtatggaata atctgaggat
+    94741 ccgtggaaaa gtataatagg aggtaaaaga gaagcagctt ttccaacaac tgggtctttc
+    94801 agtcttactt ccggagagaa tttctcaaag gactcttctc cttccattat gctgcaagtt
+    94861 ggagttgaac attagatctg atcagactgc gtttaggatt tggttaacca agcatttacc
+    94921 ttaggaaaat cgagcgatac agaccccgat tatggaagtg atcaaccaac ttgtatagat
+    94981 tgtacctatt gaagttcaaa atgataagca gatcaattgc tgtagaccaa aaagcacata
+    95041 gaagccaacg ggaatccaaa agcttagaag ataccctcca gataatccaa aataagcttt
+    95101 tatctgggac actgtccagg agatgctctc tccttttaat tctttagtag cttgttccaa
+    95161 tagagcacaa gcggctatat gggcaccagc tgattgcccc atcaggtaga tcctagagag
+    95221 acaagcagat acatgcagtt taatagccga gagaccaatt ggaagatcaa atggtaaata
+    95281 aaaacaaagc caatgagatg aaacctgttg gggtcacctc caaatgcaga gatgttattg
+    95341 cagacaaatg agattccttg agaagcatca gtcaccatat cactaattgt tccctgagga
+    95401 aagttcctat accataaaga acggattgct tggtaaatgc atgaaagttc ttcctatatt
+    95461 cataaaagac gactagtttt ttgaattgtg taatttgttt acctgtagtc aaggcatgct
+    95521 acaatgatat ctctttctgc tagctgcatt cccaagagcg agccccaagc tttgtaccta
+    95581 aatatcagta caagattagc aaacatgaac ttattaacaa gcaaccagct agtacaagaa
+    95641 gatgaaactt gagtgagtca gtgagataag aatacagagc ccatcatgat ctttactttt
+    95701 ttctttttgt aactcaaaca tcaacaaaag aacagtgaag gggaaaactt ctataagtgg
+    95761 gacacgagaa attgtaaaac aaacaaattt caacattatg cagaagagaa tgctgtaaat
+    95821 ctgatgtgca acccacccaa taatccaagc tccacccgtc acaaaaacca caaccggctt
+    95881 caagccatcg ttgttgcttg gcaagtacag atccagccta tcaaatcacc gttcatacat
+    95941 caataatctc aagacaaagg tgatgtgata aaagcaggca ttcaacatac cacttaaggc
+    96001 cgatgtgaaa ggagaaggta agattctatt cagttcacac acaaacctgt tccttggttg
+    96061 atctccatac actatactcc tccggacttg ctttgagaaa aaataactat atgcaactgc
+    96121 acaagataat acaaacttga gacgaggaat caaagactaa ttttccaatt aaaacaacat
+    96181 cttaatcaaa aggaaaaatc ctttctatga atttatttat cagcaaagaa aaaaaaaaaa
+    96241 aaatcaaacc ttgaagaaag ccaggcataa gaagcatagc ataacatgta agggcaagta
+    96301 attttgtcat ccatcgatag cctaccctga acaaaactta acaattagat caatccttgc
+    96361 tcaactggaa atataaaaag aagaagttaa acaagagcaa aacaacagca catggagtaa
+    96421 ctttcataat tagagtaaag ttccaaaata aaagacagag aacttttata tatcattgga
+    96481 ttctttgcag agcaaggcaa agccagctaa tatctacacg aattacctcc ctaagcgtaa
+    96541 tcatcaatat acctaaatcc aaaaacaggg acaatcgaga tcagtacccg aggtatcgaa
+    96601 gaagcttgaa gctaagtcca gtaattagat acgtttcagc tgccgcgtgg ccgatatcgc
+    96661 gaccgaacga ttgctgccgg aaaatccgac gaggtccttc gtcaaccggc gacttcccgg
+    96721 aaactcgacg acgtaacggt tcaccgttaa gaagagtcgt tctatcggag tcctcatcgg
+    96781 gtagaacttc ctcgatctcc gataccgtcg aggtcatcgg ccagcatcgc tgctccggct
+    96841 gctgagtctg aagaggcgaa tgcatctttt ccgccgtgaa atagtggatc tc
+//
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AY095303S1.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AY095303S1.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AY095303S1.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+LOCUS       AY095303S1              2375 bp    DNA     linear   PLN 21-JAN-2003
+DEFINITION  Chlamydomonas reinhardtii c-type cytochrome synthesis 1 (CCS1)
+            gene, ccs1-ac206 allele, 5'UTR and exons 1 through 6.
+ACCESSION   AY095303
+VERSION     AY095303.1  GI:25986619
+KEYWORDS    .
+SEGMENT     1 of 2
+SOURCE      Chlamydomonas reinhardtii
+  ORGANISM  Chlamydomonas reinhardtii
+            Eukaryota; Viridiplantae; Chlorophyta; Chlorophyceae; Volvocales;
+            Chlamydomonadaceae; Chlamydomonas.
+REFERENCE   1  (bases 1 to 2375)
+  AUTHORS   Dreyfuss,B.W., Hamel,P.P., Nakamoto,S.S. and Merchant,S.
+  TITLE     Functional Analysis of a Divergent System II Protein, Ccs1,
+            Involved in c-Type Cytochrome Biogenesis
+  JOURNAL   J. Biol. Chem. 278 (4), 2604-2613 (2003)
+   PUBMED   12427747
+REFERENCE   2  (bases 1 to 2375)
+  AUTHORS   Dreyfuss,B.W., Hamel,P., Nakamoto,S.S. and Merchant,S.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-APR-2002) Department of Chemistry & Biochemistry,
+            University of California, Los Angeles, P.O. Box 951569, 405 Hilgard
+            Avenue, Los Angeles, CA 90095-1569, USA
+FEATURES             Location/Qualifiers
+     source          1..2375
+                     /organism="Chlamydomonas reinhardtii"
+                     /mol_type="genomic DNA"
+                     /strain="ccs1-ac206"
+                     /db_xref="taxon:3055"
+     gene            order(124..2375,AY095304.1:1..1465)
+                     /gene="CCS1"
+                     /allele="ccs1-ac206"
+     mRNA            join(124..330,512..825,1045..1233,1418..1798,2000..2131,
+                     2253..2345,AY095304.1:6..303,AY095304.1:495..677,
+                     AY095304.1:863..1465)
+                     /gene="CCS1"
+                     /product="c-type cytochrome synthesis 1"
+     exon            124..330
+                     /gene="CCS1"
+                     /number=1
+     5'UTR           124..206
+                     /gene="CCS1"
+     CDS             join(207..330,512..825,1045..1233,1418..1798,2000..2131,
+                     2253..2345,AY095304.1:6..303,AY095304.1:495..677,
+                     AY095304.1:863..1098)
+                     /gene="CCS1"
+                     /codon_start=1
+                     /product="c-type cytochrome synthesis 1"
+                     /protein_id="AAM44130.1"
+                     /db_xref="GI:25986621"
+                     /translation="MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQ
+                     PGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPRTL
+                     VQSNAVQVAWRRLMKELSSLPRAIAIMALIAVLSGLGTFIPQNKSIEYYLVNYPDGAE
+                     KVLGFLTGDLILTLQLDHIYTADYFYLSMGLLAASLAACTYTRQWPAVKVAQRWRFLT
+                     QPKSLLKQGRTEVLPNARVSDLGAILLQRGYQVRESCACQRTAAALLLRFHSVAADPS
+                     PLLSFVLVAQVFVKDGSLYGFKGLAGKLGPIGVHAALLLCLFGTAWSGFGTLKGNVMC
+                     PEGQDFQVASFLQPSSPIASMPASASNVIHVNKFTIDYRPDGSVAQFYSDLSLLDPAQ
+                     GGKEMMRKTISVNDPFRFNGVTMYQTDWSLSAVTLRVLGQDAPLARAAQAAEAQAAAS
+                     TSGPTSSASSTSDALPQQRTAFNLPMASLEGKPGVAGRLWATFLPLAEPGQDGSAPKG
+                     ISILARDPQSVVFYDAKGQFVGVRRPGSGKPIEVEGLALVVEDVTGATGLELKSDPGV
+                     PAVYAGFGGLMVTTLISYLSHSQVWALQQGSSLFVSGRTNRAKLAFDRELDDILNAVP
+                     ELPPTAATTVASSASTAAPAPTAKQ"
+     exon            512..825
+                     /gene="CCS1"
+                     /number=2
+     exon            1045..1233
+                     /gene="CCS1"
+                     /number=3
+     exon            1418..1798
+                     /gene="CCS1"
+                     /number=4
+     variation       1648
+                     /gene="CCS1"
+                     /note="results in readthrough of typical intron 4 and
+                     addition of 36 amino acids within stromal loop"
+                     /replace="g"
+     exon            2000..2131
+                     /gene="CCS1"
+                     /number=5
+     exon            2253..2345
+                     /gene="CCS1"
+                     /number=6
+BASE COUNT      474 a    718 c    697 g    486 t
+ORIGIN      
+        1 cacatacgac cccaagcagg gccctgcgtg ggtgaaatgc cacacgcggc acgaatccgt
+       61 cggccactcc atgaattgcc cacaaggcca caattgagcc tacaaggatg tggaagtgat
+      121 aactacctag ctccactacc agctgcgact gcccagttct cgagctttct ttctgtccgg
+      181 ccccgcgctt gcacgcgtgc acaaaaatgc agccatacgc ttccgtgagc gggcgatgtc
+      241 tatctagacc agatgcattg catgtgatac cgtttgggcg accgctgcaa gcaattgccg
+      301 ggcggcggtt cgttcgctgt tttgcgaaag gtgcgcagcc ggggaaagaa cacacgaatg
+      361 ggtgtgcagg caattgatgg tttcactatc aattaattat cagggctggg ttgtacgcac
+      421 ttggcagtgg acttgcatgc gtggaagcga tgtcgccacg ctgctattca aatattaagt
+      481 tggagcctgc tctgctggac ccaaaacata ggaggacaac ctggtgacaa gaagaagctc
+      541 aacgtgacgg acaagctgcg cctcgggaac actcctccca cgctggacgt gcttaaagcg
+      601 ccacggccaa ccgacgctcc ctcagccatc gatgacgccc ccagcacttc gggcctgggg
+      661 ctgggcggag gagtggcgag ccccaggact ttggtgcagt ccaatgccgt gcaggtggcg
+      721 tggcgccggc tcatgaagga gctgtcgtcg ctcccacgcg ccatcgccat catggccctc
+      781 atcgccgtgc tgtcgggcct gggcaccttc atcccccaaa acaaggtgcg ggggaatgcg
+      841 acacatgaag ttaatgcaat tctgccaacc aagctgaagc acttgcattg gagccgcgcg
+      901 ggatgagcca gcacaccagc tttgacctgc gcgtaccgca accaggcacc tacgccctcc
+      961 cgcgcagtgc ctccctttcg gtctctccga ctcttccaac ctgtctgacc ctcatctcca
+     1021 ccccccaatg tatccatcca acagtccatc gagtactacc tggtcaacta cccagacggc
+     1081 gcggagaagg tgctggggtt cctgactggc gacctcattc tgaccctaca gctggaccac
+     1141 atctacaccg ctgactattt ctacctgtcc atggggctgc tggccgcctc cctggctgcc
+     1201 tgcacctaca cccgccagtg gcccgccgta aaggtgcgtg cgtaaagggt ttgggagtcg
+     1261 taggaggaag gcagggtaga tggcgggaag gggcggacag agaggaagga ttggggcgta
+     1321 ggttgtggtg acggagaagg cccggttgga gcctgcacgg gggttaggta aaggtgaaag
+     1381 gcaggcaaat accgcccact cgtcgtgttc ctgacaggtg gctcagcgct ggcgcttcct
+     1441 gacgcagccg aagtcgctgc ttaagcaggg gcgcacggag gtgttgccca acgcgcgtgt
+     1501 gtccgacctg ggcgctatcc tgctgcagcg gggctaccag gtgagggaga gctgtgcgtg
+     1561 tcagcgtacg gctgctgcat tacttcttag gtttcactcc gtcgctgctg acccttcacc
+     1621 tctgctttcc tttgttcttg tggctcaagt gtttgtgaag gacggctcct tatacggctt
+     1681 caagggcctg gccggcaaac tggggcccat cggcgtgcac gcggcgctgc tgctgtgcct
+     1741 gttcggtacg gcgtggagcg ggttcggcac cctcaagggc aacgtcatgt gcccggaggt
+     1801 gagggagcag aggacgcaca tggttgtctg gtggcggcac ggagacacgg tagtgacaag
+     1861 ccggctcagg gcagtttggg aaggtggtga tgcaagttgc tggccaggga agtacgctca
+     1921 cccatgttct ccgagtgctt gctatgttcc aacacgatcc cagaaatcgc acgaggacca
+     1981 atcaacgttg cgcctgcagg gtcaagactt ccaggtggct tccttcctgc agccgtcctc
+     2041 gccaattgcc agcatgcccg cctccgcctc caacgtcatc cacgtcaaca agttcaccat
+     2101 tgactaccgc ccagatggct cagtggcgca ggtgcgtgcg ctacaggttg gccggaacca
+     2161 tgccacactt tgcaccctca ccattcctgc gatagtgttg gttctcatca gtactgacgt
+     2221 attcatctcc ctccatctgc cctatcccgc agttctactc ggacttgtcg ctgcttgatc
+     2281 ccgcccaggg cggcaaggaa atgatgcgca agaccatcag cgtgaacgac cccttccgct
+     2341 tcaacgtgag aatacaacgc atgacggtac ctgct
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AY763288.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AY763288.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AY763288.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,64 @@
+
+LOCUS       AY763288                 723 bp    DNA     linear   INV 18-JAN-2006
+DEFINITION  Leishmania guyanensis putative elongation factor 1 beta gene,
+            complete cds.
+ACCESSION   AY763288
+VERSION     AY763288.1  GI:54300415
+KEYWORDS    .
+SOURCE      Leishmania guyanensis
+  ORGANISM  Leishmania guyanensis
+            Eukaryota; Euglenozoa; Kinetoplastida; Trypanosomatidae;
+            Leishmania; Leishmania guyanensis species complex.
+REFERENCE   1  (bases 1 to 723)
+  AUTHORS   Walker,J., Acestor,N., Gongora,R., Quadroni,M., Segura,I., Fasel,N.
+            and Saravia,N.G.
+  TITLE     Comparative protein profiling identifies elongation factor-1beta
+            and tryparedoxin peroxidase as factors associated with metastasis
+            in Leishmania guyanensis
+  JOURNAL   Mol. Biochem. Parasitol. 145 (2), 254-264 (2006)
+   PUBMED   16325936
+REFERENCE   2  (bases 1 to 723)
+  AUTHORS   Vergel,C., Gongora,R.E., Saravia,N. and Walker,J.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (24-SEP-2004) Biochemistry and Molecular Biology, Centro
+            Internacional de Entrenamiento e Investigaciones Medicas, Av 1a
+            Norte No. 3-03, Cali, Valle, Colombia
+FEATURES             Location/Qualifiers
+     source          1..723
+                     /organism="Leishmania guyanensis"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:5670"
+                     /clone="WHI/BT/78/M5313"
+                     /dev_stage="promastigote"
+                     /note="subgenus Viannia"
+     mRNA            <1..>723
+                     /product="putative elongation factor 1 beta"
+     CDS             1..723
+                     /note="similar to elongation factor 1 beta of Leishmania
+                     major (Friedlin)"
+                     /codon_start=1
+                     /product="putative elongation factor 1 beta"
+                     /protein_id="AAV32818.1"
+                     /db_xref="GI:54300416"
+                     /translation="MSVKDVSKKAAELEARLGGKLFLGGAKPTAEDVRMLNDLLGANH
+                     ASLYRWVKNMATYTEGERKAWGAPVRTAAPELRMPAPAAAAPAAAKKPVPAAAAPAAA
+                     KKPAPAPKAVAPAEDDDIGLFGETTEEEQAALEAKRAKDAEKKKAKKDVIAKSPILFD
+                     IKAWDDTVDLEALAQKLHAIQRDGLVWGDHKLAPVAFGVKKLQQLVVIEDDKVSGDDL
+                     EEMIMGFEDEVQSIDIVAWNKI"
+     misc_feature    379..381
+                     /note="putative; phosphorylation site"
+ORIGIN      
+        1 atgtctgtga aggacgtgag caagaaggcc gccgagctgg aggcgaggct gggcggcaag
+       61 ctgttcctgg gcggcgcgaa gccgacggcg gaggacgtga ggatgctcaa cgacctgctc
+      121 ggcgcgaacc acgcgagcct gtaccggtgg gtgaagaaca tggcgaccta cacggagggc
+      181 gagcgcaagg cgtggggcgc accggtgcgc actgctgcgc cggagctgcg catgcccgcg
+      241 cctgccgcgg cggcgcctgc tgccgctaag aagcccgtgc ctgccgcggc ggcgcctgct
+      301 gccgctaaga agcccgcgcc tgcgccgaag gctgttgccc ctgcagagga cgacgacatc
+      361 ggcctgttcg gcgagacgac ggaggaggag caggcggcgc tggaggcgaa gagggcaaag
+      421 gacgcggaga agaagaaggc gaagaaggac gtgattgcga agtcgcccat cctgttcgac
+      481 atcaaggcgt gggacgacac ggtggacctg gaggcgctcg cgcagaagct gcacgcgatt
+      541 cagcgcgacg gcctggtgtg gggtgaccac aagctggcgc ccgttgcgtt cggcgtgaag
+      601 aagctgcagc agctggtcgt gatcgaggat gacaaagtgt ctggcgacga cctggaggag
+      661 atgatcatgg gcttcgagga tgaggtgcag tcgattgata tcgtcgcctg gaacaagatc
+      721 tga
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/AnnIX-v003.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/AnnIX-v003.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/AnnIX-v003.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,373 @@
+LOCUS       DMSOS       14000 bp    DNA             INV       21-Aug-2001
+DEFINITION  D.melanogaster FlyBase-curated sequence: AnnIX.v003
+ACCESSION   AnnIX.v003
+SOURCE      fruit fly.
+  ORGANISM  Drosophila melanogaster
+            Eukaryotae; mitochondrial eukaryotes; Metazoa; Arthropoda;
+            Tracheata; Insecta; Pterygota; Diptera; Brachycera; Muscomorpha;
+            Ephydroidea; Drosophilidae; Drosophila.
+REFERENCE   1
+  AUTHORS   FBrf0104946 == FlyBase, 1996-, other
+COMMENT     Reference sequence of AnnIX == FBgn0000083
+COMMENT     This record is derived from the following:
+            AC009344 AC009344.8 17-FEB-2001
+            AY007377 AY007377.1 14-SEP-2000
+            AF261718 AF261718.1 28-AUG-2000
+            M34068 M34068.1 26-APR-1993
+            AA390914 AA390914.1 23-APR-2001
+            AW942105 AW942105.1 23-APR-2001
+COMMENT     The following contributed to reference sequence development:
+            bases 1..14000 == AC009344 14575..28574
+COMMENT     Reference sequence based on BDGP genomic sequence.
+FEATURES             Location/Qualifiers
+     gene            1..14000
+                     /gene="AnnIX"
+                     /organism="Drosophila melanogaster"
+     exon            4191..4247
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|1"
+                     /number="1"
+                     /primary="AF261718:1..57"
+                     /primary="AY007377:1..57"
+     mRNA            join(4191..4247,6408..6461,7031..7739,7800..7981,
+                     9240..9499)
+                     /gene="AnnIX"
+                     /comment="mRNA structure inferred from FlyBase alignment of
+                     cDNA to reference sequence"
+                     /evidence="experimental"
+                     /label="AnnIX-RA|mRNA"
+                     /primary="AY007377:1..1263"
+                     /primary="M34068:<1..1095"
+                     /symbol="AnnIX-RA"
+     mRNA            join(4191..4247,6408..6461,7031..7739,7800..7981,
+                     9686..9879)
+                     /gene="AnnIX"
+                     /comment="mRNA structure inferred from FlyBase alignment of
+                     EST and cDNA to reference sequence"
+                     /evidence="experimental"
+                     /label="AnnIX-RB|mRNA"
+                     /primary="AF261718:1..1197"
+                     /primary="AW942105:complement(>512..1)"
+                     /symbol="AnnIX-RB"
+     intron          4248..6407
+                     /label="AnnIX|intron|1-2"
+     exon            6408..6461
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|2"
+                     /number="2"
+                     /primary="AF261718:58..111"
+                     /primary="AY007377:58..111"
+     CDS             join(6432..6461,7031..7739,7800..7981,9686..9739)
+                     /gene="AnnIX"
+                     /aa_size="324"
+                     /derived_from="AnnIX-RB"
+                     /evidence="predicted"
+                     /label="AnnIX-P2|CDS"
+                     /symbol="AnnIX-P2"
+                     /translation="MSSAEYYPFKCTPTVYPADPFDPVEDAAILRKAMKGFGTDEKAII
+                     EILARRGIVQRLEIAEAFKTSYGKDLISDLKSELGGKFEDVILALMTPLPQFYAQELHD
+                     AISGLGTDEEAIIEILCTLSNYGIKTIAQFYEQSFGKSLESDLKGDTSGHFKRLCVSLV
+                     QGNRDENQGVDEAAAIADAQALHDAGEGQWGTDESTFNSILITRSYQQLRQIFLEYENL
+                     SGNDIEKAIKREFSGSVEKGFLAIVKCCKSKIDYFSERLHDSMAGMGTKDKTLIRIIVS
+                     RSEIDLGDIKEAFQNKYGKSLESWIKDDLSGDYSYVLQCLASY"
+     CDS             join(6432..6461,7031..7739,7800..7981,9240..9293)
+                     /gene="AnnIX"
+                     /aa_size="324"
+                     /derived_from="AnnIX-RA"
+                     /evidence="predicted"
+                     /label="AnnIX-P1|CDS"
+                     /symbol="AnnIX-P1"
+                     /translation="MSSAEYYPFKCTPTVYPADPFDPVEDAAILRKAMKGFGTDEKAII
+                     EILARRGIVQRLEIAEAFKTSYGKDLISDLKSELGGKFEDVILALMTPLPQFYAQELHD
+                     AISGLGTDEEAIIEILCTLSNYGIKTIAQFYEQSFGKSLESDLKGDTSGHFKRLCVSLV
+                     QGNRDENQGVDEAAAIADAQALHDAGEGQWGTDESTFNSILITRSYQQLRQIFLEYENL
+                     SGNDIEKAIKREFSGSVEKGFLAIVKCCKSKIDYFSERLHDSMAGMGTKDKTLIRIIVS
+                     RSEIDLGDIKEAFQNKYGKSLESWIKEDAETDIGYVLVTLTAW"
+     intron          6462..7030
+                     /label="AnnIX|intron|2-3"
+     exon            7031..7739
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|3"
+                     /number="3"
+                     /primary="AW942105:complement(512..376)"
+                     /primary="AF261718:112..820"
+                     /primary="AY007377:112..820"
+                     /primary="M34068:1..655"
+     intron          7740..7799
+                     /label="AnnIX|intron|3-4"
+     exon            7800..7981
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|4"
+                     /number="4"
+                     /primary="M34068:656..837"
+                     /primary="AF261718:821..1002"
+                     /primary="AW942105:complement(375..193)"
+                     /primary="AY007377:821..1002"
+     intron          7982..9239
+                     /label="AnnIX|intron|4-5"
+     intron          7982..9685
+                     /label="AnnIX|intron|4-6"
+     exon            9240..9499
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|5"
+                     /number="5"
+                     /primary="AY007377:1003..1263"
+                     /primary="M34068:838..1095"
+     exon            9686..9879
+                     /comment="exon boundaries inferred from FlyBase alignment
+                     of cDNA to reference sequence"
+                     /evidence="experimental"
+                     /gene="AnnIX"
+                     /label="AnnIX|exon|6"
+                     /number="6"
+                     /primary="AF261718:1003..1197"
+                     /primary="AW942105:complement(>193..1)"
+BASE COUNT     3803 a   3178 c   2983 g   4036 t
+ORIGIN
+        1 accgttagaa atgttatgcg ggatacatag ttaagttgca taccctttga gttacaatca
+       61 ctagttaata atatctacgt tattaccaac acgcacactt tatcgtaata cctccttgaa
+      121 gtttaattta tacatcaact ttatcagtca aaactttgat ttcgtctgac acttttttcg
+      181 attacgatcc gtcgccaata attgcgataa atcttatcaa gtctttttgg gattggcgct
+      241 caaatttaca atatggccgt acatcctact tatgtatgtt ttttaactaa ttaatcacca
+      301 caatgcaaag tactctttct ttgttgagcc catatgcact cacatttgca ccatgaatca
+      361 tgtcagtagc tcgtttcatg taacaatttc tactttgcca gattacgatg cgttcggaac
+      421 aggcagataa gaattcggcc catccaagaa aggccttgac agttctaccc caaaatagag
+      481 atatcctcgt gatattagaa ggaacccaac aatatgctcg ttcttatctt cttatagaaa
+      541 tttgtgaatt cccgtatcca atgaaatcat tttacttagt aaaatgattt gttaggcctt
+      601 aaaaaaaaac aaaaacaccc gaactatcag taccacaatt taagagagaa ctcgttatta
+      661 tttaacttta ttaattatgt atttctttat caaaagagca gactttttgt ttgtgactgt
+      721 cttcaacatt agatccgtct ttaacattag atcagatcac ctgacacggg aaactctcgt
+      781 agactttata caattcaaaa aaaccaaaat cgttacttga cacaaatatc ataactaatg
+      841 cataaaatat gaaatgagag atatctaaaa tagcttggca tattttcttg gtaaaataaa
+      901 tgtgttaaat acaaagaatg taaaatgcaa taaatgatac atatatcaaa aatggaatac
+      961 cacggttact ttaagtgcta gcataacaaa ttacaataat aattcaatat ctagccattc
+     1021 gttgcacata atttggaggg ttaagagggt aaacaatgga tgggaaatgg gctgggttag
+     1081 ttgcgaattg gtttatatgg tttataatcc aacatcagta ttatcgtttt cgttagtatg
+     1141 taaatacaat ttgattttgt tctatcgtga atttcaattg gaagctttta tgagttctgt
+     1201 cccaccacct gcacgtagtt ggctggaaag agtccgtagc ggttcttgca caatcccctc
+     1261 caccacccat cgtcgatctt ctcgatgtgg gtgatcacgt catctggatc aaaggatatc
+     1321 tcgtcgtcat ccgccgcctg gtagtcgtac agggcaatgg cgtggattcc ggtgtcctcc
+     1381 agatagtcgg ccaaattgtc tgagttcgcg tagattgcct cctcgggaac ggttgcggtg
+     1441 ccactcggag caacagcctc tgaaacggtt ccattcgttg gtggcaaggg cgaggcagcc
+     1501 ttaatctcgg cctggttctg gtacagtggc tcttcttcaa ctactggctc aggatcggct
+     1561 tgaggttgcg gctcaggttg gacttggact tgggcgtgga cttcgggttg aggcgtcggc
+     1621 acatacactg gtgattgcgg ctcacttctg ggaggcgtgt ccacggtctc aacctcaatt
+     1681 tggggcacca catctggtgg tggagctgca gccttggcca ctggagctgt ttccggctct
+     1741 cgagcgggga ccactgtcgg agtaggagca accgcagctg atgttgtact tgccgttggc
+     1801 tcctcctttg cttctaattc tattttaaca ggctgcgcct tgggaatgat aatgggttcc
+     1861 tttcgtgctg gaggggtttc agaaacaggc gactgcatct ggttaaatgc actgatagca
+     1921 ttgccaatgc caccggtccg acctgtctgg atggccgccc tgctgccctt tggaggcggt
+     1981 gcttccgtcg aggtacgagg agtgttctcg gccacagtct tcttggcggc ttcctcccga
+     2041 tcccgcttgt ccttggcctc acggagacgt ttctgttcct cggcccgctt acgtgactcc
+     2101 tcctcggagt ttttagccag attctcgaac ttggcacgca aattggaagg cttggcacct
+     2161 tcgatgactg gcttgacctt gtggtccacc tggctggcgt gcttctgcgg agcctctttg
+     2221 tggtcccatc ccaccgcgga cttgtccttt cggtcctcct gaacgccaaa cttgccgcca
+     2281 aagcccttgg agtaatctta tgttgggaga agatataaat aaaataatca gacatgaaca
+     2341 tactaagaaa aaatcaatga gaaattcatt tccgttaggt aacttaccct tctgagactc
+     2401 gtgcttctcc accttctcga tgtgatccca gcccacggcg gatttatcca cccgatcgga
+     2461 ctgcactcca aatttgcctc cgaagccggt ggcgtaatcc ttctgggagg cgtgcttctc
+     2521 gaccttttcg acgtggtccc agcccacggc agacttgtcc ttgcgatcct cttgcactcc
+     2581 gaactttccc ccaaaaccat cgctgtagtc cttctgcgag gcgtgctttc ccaccttgcc
+     2641 ctggtagtca tgacccacag ccgacttgtc catgcggtcc ttctcaacgc caaacttgcc
+     2701 cccgtatccg tatcctgcgt tctgatcctt cagcagctgc ttcttcttgt ccagatcggc
+     2761 ctgctccgtc tcctcgcgca gcttgtccat gctaaaaaga ataaagaaag ggaacaatga
+     2821 acccgatacc cagtgtttcg agaatttcgg atgactcact cgatggtgcc tgctgtgcga
+     2881 ccgctgccgt cgatcgtctt tgatccccag cgctgctcct gctcactgac atcgttcacg
+     2941 aagtccggat ccgtctccca gtcgtcgtcc tccgcggagg ctgcactggt ggcctgaatc
+     3001 tggtgaccgg cacttgcctt ccacattctt tttaccggtt tctggtcgct ggttactcgc
+     3061 ttatccttat ctgctgggta gatagataaa caccatgacg agtgtgagta gtcggggcag
+     3121 gttttcagtt gcaggggctc cgcttccgat tcgtggccca gataacaagt cacaaaacac
+     3181 cgaagaaggg aggggccgac aaagccggat cgggaacaag atgccactgg cgctctatgt
+     3241 tcatatgtat actcacatcc cgatgactaa ctcctactgc aagaggaact tatgatctga
+     3301 tgccgctagg gatgtctgca aatttctggg aggagaaggg ttaaacaatt gattcataaa
+     3361 gagaccaaca aacgggaaac taactgaact aaatttcatt cgatacaatg gatgcatcga
+     3421 tatgctacga aaagccgata aattattgtg atgagcttga caaaaggtag ttgctgcagt
+     3481 tttaggggcg cccaatttaa atgattaaga gatacagtcg tggttctttt actttattcg
+     3541 ataagtcaat gcaccgatat atactgcagt accaatatat gtgatcgtga tcaatagaac
+     3601 tgagtgctcg ggcaaaaggt atttgatgtt ataccaagtg gcgccatagt aatttgtata
+     3661 attaataaga ttctgccaga acggtaatta acaggtataa gagttgactt ttttcatata
+     3721 accgcaaaca tatcatcgat atatcgatag ttttggattt gaatttgctt cgatgtcggc
+     3781 tgacaattaa cgtaattggt ttttatatat agttataaaa atacaaataa aaatagttat
+     3841 aaaacggctt aggaaggaac aggatattta tttaactgta ttgccctaac ggctgaatat
+     3901 tgaagtcaaa ggttacattt tgaaattcaa tgcgtaatca gatcttattt tcccaatgtc
+     3961 gtcattatca caacggaaat atcatgaatg tctcaacagc ttccaatgcc gatgattcat
+     4021 ccatctcgga acactttaga ttctacggtg tatactatga cgacgcagaa gataccagct
+     4081 tccagttagg cgctaccacc gaatatttcc tagcaacaac aacctatcgc cggttggttc
+     4141 atctctaatc gacttccagc gagagagcgc gagtataaaa gccaaagacg gagcgcaaac
+     4201 agctatggta tttattcaca tcgcttgcag ttcgcatcgt ctgacttgta cgttgaaaat
+     4261 cgaatatcat tggtaaatca aaaggaattt gtagataaaa ctcattgttt actctcgctg
+     4321 tgccagaaat cgcaaaaaat caagcggaaa atcagtcgga aaagtgagaa aagaaagtgt
+     4381 gtgctaaaca ctcaaagata attttcattt gtgccattcc aaaatgggca ttttgtagaa
+     4441 ttctagttgt gggtgcggta tcgcgagaat tctccagaat cttgtttctc tatttacttt
+     4501 atatttcaat aataaaagcc tcgggtggga gaaaaattca aaaccgaaag atgacatcat
+     4561 caaactcaaa agcaaaagcc ccactctttt ggcgttttct ctcgcgacgc gtgtgacttt
+     4621 tctcatctcc gacgaaatct aagtaaactt ttctgttcct ttcttgcttt cccattttac
+     4681 gtaggaatga gaagcggaat ttttcgaaat atctctatcc cccagtgccc tgcttaattt
+     4741 agtaattcaa tcaacttcaa ttgcccgaca ggcacttcaa actacagtaa gcactgtgta
+     4801 tgagcgcctg tgccttgctc agcattactc accggaatcg gatgtggcca cattccaccg
+     4861 tccacaaaat atagtagtag tgtactcgag gccgtgcaat tatagtagaa taggtgtgtc
+     4921 atcaaggttt attactaagt gaattgctca gtgcatttgc aaaacatttt tgaaacattt
+     4981 cgaggtacag tccatcgagg tctccaaaag aagttggcaa aagcttttgt ttatggagtt
+     5041 tttcacacat ggtcacgagc tattccagtt tttacaccta tattataata tctctttttt
+     5101 aaagtcgtgc tcagagctta aaattgacgt cacaaatctg gtgaatcact gagtgctgta
+     5161 atttcaatgt atcatcagtc tatttcaaga acaacattac cagatacgct atgctaaagc
+     5221 atatctatca atcttgtttt ttccagaccc caaccacttt ttataatgaa acaaaagaat
+     5281 tgtaattaag ggtgttttca aacacaaatg tgttttctac attgaggtca aactattacg
+     5341 gagaactcca atcaatattt agagagtcta accaattatt attgattaca ttattctcaa
+     5401 cttgcattat aatttattta aaaagaaatc atatattaac taaagtagtc tataaacact
+     5461 agaatcttga gtttaattgt acgccatacg taaaagaacc ggtttcaacg ttggccaaaa
+     5521 cattttgttt gtttatagct cgactgcaga ggcgtttgta gttgtatctt tttgttaact
+     5581 tttttgtttt tttggatggc gtcgccacgt caagtctttt ataatgcttt tttcggctgc
+     5641 cttttgttga ctttctactt tggcggcagc atagaaaggc aaacatgtat tttcagcact
+     5701 gtcgcactgt gattactaat ttggggaagc tatcaaaaat ctcaatccat ttatcattag
+     5761 gtaatttggc cctattgatt ttagggatta tgtggactag tccctcgaga tagtgttcca
+     5821 cttgacaact gaagtggagt ggtagatcta gaacgcaaac gggtgacgag tcagacattt
+     5881 ccctgaagct ttctcttggc cacaaccaag ccgtgcccga tgttttccaa gccagaacgc
+     5941 aatgagctca tccgtttatg aggccacttc gtgtgggttt ggtctggcct atcacgaaac
+     6001 tgccgcgcca tgccatttgg ctatcagacg cacccggttc caaggttcgt tcctccgcaa
+     6061 actgcgtact gaaaagtgga aactttcact tttccccggc agtacgttga acttcgattt
+     6121 gggcaccggt cggcaaggaa aactaaaaaa aaaaacatct cggtaaataa actaggaaaa
+     6181 aaaatcaatg ggtaaaaatg ctctcagacc agcgccaggc tggctacggg gcgtatgcgt
+     6241 aatgtgaggc ttttacgagt tgatgatgtc accaggcagg aattaaccga aggccgggct
+     6301 tattgggtct gggaaacata acgtatccaa cattgctggg ggttcagttt tcatcacgat
+     6361 ttcggcgtag ccattattca ttatgatttc tcacttccta cttacagatt tttcacaaca
+     6421 aaccaatcaa aatgagttcc gctgagtact acccattcaa ggtgagttta caatggattg
+     6481 tactttatga gtctggtata aatcaattat ttcatgcgat tagcgccgtt aatgtaaaaa
+     6541 atcagatcaa attagattca acagatagat gagaatatct taatattatt ttttctaaaa
+     6601 cgtgtgttgt tgtcaagtaa gcaagatttt tctcgatgca atctataaat tattacaacg
+     6661 accagatgct acgaaattat ctataattgg gctattaaat tatcatcaga ggtgtatact
+     6721 agacaatcgt tgacaaacaa gtaccgattg gtgggagaag agagtgataa gagggtttca
+     6781 gttatgagtt ccttagataa gagtcacaac gaaaaaaaaa gtcaatttag aaagctaact
+     6841 ttattgcaga ggaattcatg caatactgag ataatacatg tagggaataa ccacatatgt
+     6901 atttgtattt agaacaagtg ccactgagta gtgttgagtc atttctttgg gattacgtgc
+     6961 cctgcattaa aatacaccca attcttcttt gttgttactc atttgctgat tgcctatatt
+     7021 cgatttgcag tgcacaccca ctgtctaccc ggcggatccc ttcgatcccg tcgaggatgc
+     7081 ggctattctg cgcaaggcga tgaaaggctt cggcaccgac gagaaggcca tcatcgagat
+     7141 cctggccagg cgtggcatcg tccagcgttt ggagatcgct gaggcgttca agacctcgta
+     7201 cggcaaggac ctgatctcgg acctcaagtc cgaactgggc ggcaaattcg aggatgttat
+     7261 cctggctctg atgacgccgc tgccccagtt ctatgcccag gagctgcacg acgccatctc
+     7321 gggactggga accgacgagg aggccatcat cgagatcctc tgcacgctgt ccaactacgg
+     7381 catcaagacc attgcccagt tctacgagca gagcttcggc aagtccctag agtccgacct
+     7441 gaagggcgac accagtggcc acttcaagcg gctgtgtgtc tcgctcgtcc agggcaaccg
+     7501 ggatgagaac cagggcgtgg acgaggccgc ggccatcgcc gatgcccagg ctctgcacga
+     7561 cgccggcgag ggacagtggg gcacagatga gtccaccttc aactcgatcc tgatcacccg
+     7621 ctcctaccag cagctgcgcc agatcttcct cgaatacgag aatctgtcgg gcaacgacat
+     7681 cgagaaggcc atcaagcggg agtttagcgg ctccgtggag aagggtttcc tggccatcgg
+     7741 tacgttctta tagcatccta ttctttaggg tcccttctaa ctgatgcatt gctctgcagt
+     7801 caagtgctgc aagtccaaga tcgactactt ttcggagcgc ctgcacgact caatggccgg
+     7861 catgggcacc aaggacaaga cgctgatccg catcattgtc agccggtcgg agatcgatct
+     7921 gggtgacatc aaggaggcat tccagaacaa gtacggcaag agcttggagt cctggatcaa
+     7981 ggtaaatacc gatttcaatt acattcatat ctgcgtgtgc ttgccagaac tttcgattct
+     8041 gcaccctgtt caatgtgcca ctaactcgca ttcgattgca cctgcaacaa atcccattaa
+     8101 ttgtggctcc atcaaagttt aataatcgtt catccaagct ggcttctcct gttgttgtta
+     8161 ctgctccttt gcccaacact ttcttgccga tttctgaagc cattatccct tcccgcccga
+     8221 ttgcttcatt tgtgtgcata aaacattaaa acttggcata ttctatattt ttagggcgat
+     8281 acatccggcg attataagcg tgccctattg gctattgttg gcttctaaaa agaaccccat
+     8341 ccaacaataa tttatctctt tcgtctgttc cacgctctaa actatatgca aacagaatgt
+     8401 acaaacaaaa ttccgatatc aaatagttga caatgtatag tttttgaatt ggaacacgtt
+     8461 ttaacgaaga cgcagtgcat ttaagtcgta gaatcagaac cccagtctcg catcctgttg
+     8521 attattataa ccattgtgac ttttattatt atgactatgc acgccacatg cacataattg
+     8581 tatctctata attactacac ctcaggctac ttgcattgct gtgtaggtat actttcagtt
+     8641 ttgttttgag tctcatttgc aagatatttt aacttttaaa aaatacgaaa ataaaaaata
+     8701 cgaaaaaatg aaatacaaaa ttcaaatcga gtttctgtta cctttagcag aggtctctgc
+     8761 actgcttgtc atgtaaataa cagcgctaca ttgggtcgcc taacatcaaa acattaaaca
+     8821 ttaaaaaggg cgtggattaa accaacttaa aaatcgattt aaatggggct aaatgagtat
+     8881 attagccctc tttaattgtc tatataaact agatcagcaa gtgtataagg tatacaaact
+     8941 gttaaatata gttcaattta gatctaaata tacttgcact gcttgctaaa agtacatgtc
+     9001 aattacatgt aaatataatg tacatacaat ttcaagatgt aaaactttaa atgttatgtt
+     9061 aaatttgaaa gacattcatt tgctgatcag gtagatatat agttgattac cccttggagg
+     9121 agtagcttcc ggcaattaac caaaccataa gccatgtata caaagtaaaa ggcgtttaat
+     9181 gctctgaccc tctgctcttt tcacgctttt ctctacccgt ttaaacgaac caacaacagg
+     9241 aggatgccga gaccgatatt ggatacgtcc tggtcactct tacggcttgg tagacggaag
+     9301 cagccggaat atccgaatat ctatgagcaa taccccactg ttcaagtaga aaatgccaaa
+     9361 aacaaaaaaa cgttgcattt ccccaaaaaa aagtataaca aaagcgaaga acaaatggag
+     9421 ttggtctata tacagtagtt gtgatgtgtt ctaaaaatcc aatctacaaa acgcttagta
+     9481 ttttccctct gtgcaataac gtctaacgtt caacgattat ttaacatttt tacgtatttt
+     9541 tattttgtat acatgtcttt ttttattgta aattatggcg catcaaagtc gtatgcgtag
+     9601 tttgtgcttg tattaactaa taaagttggc ttacactcaa cggcaaagct gggtcacatt
+     9661 caccatccac tgatctcctt tccaggacga cctctccgga gactacagct acgtcttgca
+     9721 gtgcctggcc tcctactaag gatttcctcg ttggatcgat tgttaaccat tctatttgtt
+     9781 gtaactctta ctttaaggca agcatcgttt gccaactgtt ttgcggaaga ttcatagcct
+     9841 atgttcaatt cataaatgca ctgtaaaatc gcggtaaata attggaagat tttttcactt
+     9901 atctagggta accgaaacca agggggaatg ggtattgggg aatattcgtt aagggggaat
+     9961 gctgtttggg ttttctactt ggcaactcga tcctggtagg ctgcccacag gcggtcgcgg
+    10021 tacgtctgag ccgcctgctt cgggctgctg gccttcagaa tgcccctgcc caccactccg
+    10081 atgtcggcac cccgctcctt gaccacatgc tccggactct ggtactgctg gcccaattgg
+    10141 tccacgccct cgtctatctt cacaccgggc gtcagttgca gaagtccggg gaaggcaaag
+    10201 gcatcggagg attggcatac cacaccggca acgaaatcta catcggctcc ctcggttgcg
+    10261 atcttgttgc tgttctcctt gtacttggcg tcgatcaggt tgccgctggc agacatctcc
+    10321 gccagcagga agacgccgcg ttccttgccg gctcctcctt cgccaaggcc cgccttcaga
+    10381 ccctgcagaa tactacgtcc aggtaaggtg tgggccgtga ccagatctgc ccaactggaa
+    10441 atcttataga tgcccttgcc gtactgcagg gacaccgtgt tgccgatgtc tgcaaacttg
+    10501 cgatcctcca tcagcaggaa attgtgccgc tgggccagag cttgcaggtc agcgatgaat
+    10561 ttatcactga aatcctccac aatgtctacg tgcgtcttca gcaggcaaat gtacggaccg
+    10621 cacttgtcgg ccacgtccag gatctcatcg gcgtgtgtca ggtcggcggc caagcagagg
+    10681 ttcgtctgtt tgctggctat caggttgaag aggcgcttgg ccaccgcgct cttagccaga
+    10741 ttggcgcgat tctcgtaggt cagtttagtg cgctgcaagt cgttggctgc ggggaaagtg
+    10801 atggtgatta gttaacactt caattttgaa acgtgcacgt agacctatct tatcgcctct
+    10861 tcttatcgct atatggttgt tttgtaccta tgccaatttt cttcaatagc ttcacacttt
+    10921 ggcgatccca aagtacataa ctcttttcaa ctgggaatca attttggcta ttgtttatac
+    10981 agtatcttaa gagtatatac aaaagttact ccccaaaggc aaagacgatg attatgatga
+    11041 gattagaata ctgtgacttt acaggtggaa acaaaaagca aaccggttcc catacatctc
+    11101 atcaaagcgc cataaacgaa atgtgattat ttatagaaga tgggcttgat tgaaggagca
+    11161 ggctggctct cgagtgggct gataagcgtg gtggcataat tgagtgcgga ccggacactg
+    11221 ccctgaatca gaatcacgtg ccgcattccc gggaatgatc cggtccgata gtggaatcca
+    11281 gatcggccta ccgccgttta tggcggttgt ataactttat agtagcgcga ctggagcgct
+    11341 ccctaattga atttgttcga ggccatgtgc tggccgagtg tcatgtactt attatctatc
+    11401 tatgcatcta tctatctggc agatatcacg gggatctgga gcatacgcac ctctgacaac
+    11461 gtcgccctta tcgccgccga cgaaggtacc atcgctgttg atttgcacgg cggcgatgta
+    11521 cttggccacc gcctccactg tggacttctc gatgcgcccg gcctcgtgaa gtgtgttcag
+    11581 cagaaaggag agcgtgaaga gcgagtgcat ccgcacgccg tgcttggcga tgttggccac
+    11641 tccgccctgc tcacggtcta cgacgaccac ggcgtcggtg accacaatgc cctcgccttg
+    11701 cagatcccgc accgtgtcca ggatgctgga gccggaggtg accacgtcct cgacaatcag
+    11761 acaggtgtcg ccagcattga agatgccctc gaccagcttc ttggtgccgt acgccttggc
+    11821 ctccttgcgc cgcaccagca tgggagttcc ctgctgcacg gacacaatgg tggccagcgg
+    11881 gagcgccgtg tagggaacac cgcacacgtg tttggcgctc agctgcttgt ccttgatgtg
+    11941 ttccaccagc aggtcggata cggtttgctg cgggaagacc aagttagcta ctgccattga
+    12001 catggagcac taatcccaaa tactgaccat cacatccgga taactgacga tcactcggag
+    12061 gtcgaagtag accggcgaat ttatgcccac tttcatcttg aagtcgccga acttgaaggc
+    12121 attgatctcg aagagcttca gggccagggc ccgcattttg tcggagttct gggcaaccat
+    12181 gctggcaatt ctaaatctcg atcttaattc ttcacacacg tgctagctag gctccaataa
+    12241 gaaccgtcca attgggagtc tacgcttttt aaacatgctg ccagtgtgca cgtatctgct
+    12301 gtgacattgg ggcacatttc gaacacccta attaaggtac aagttctggt tgcgccgcct
+    12361 gggtggttaa cttcgctatg ccgccaaact tatcgaagtt caaattatta taaatgtcgt
+    12421 agattttatc aacattggct tcgaattaat aaacgtttat tattagttat agggtaacaa
+    12481 agtagcataa gtgttaaggt tttgaaataa actattttgc atgtgaaata tttcccaaat
+    12541 tcataaaata tataaccttt agtttctgag aagtcttaag aaatttcaag gaaatgaatg
+    12601 gatggattat atacaatatt ttgtccgctg cattgctgta ttgttcacta cttagcattt
+    12661 gtaatctgaa agctttggct ccgcccacaa acccttgact gcaacttcaa ggggaggagt
+    12721 ctagtcattt tgcaaccacc aatgacagtc ctgtaagctc atattgcaaa atgaaagcca
+    12781 aagcgcctgc gtaagtcaac aaagtttgcg ccattcattg aaacaattcc agatcctttg
+    12841 gcgcgtgttt ccaaaattta gtttcttttc gctggtctcc aaataagtcg caaatttgtg
+    12901 ctccaaaagc ggcaacttct tagtcgaaaa atcggttttc tctcaatcca tttctcgcct
+    12961 gcgttgcgat ggccagttca agtggtgaac ctgctgatga agtggctaat aagcgtcctc
+    13021 gtcttgtggc taatcccaag gccaccaaaa tagttgaacc cacaccggcc aaggtcacca
+    13081 atcgggtgcc caagtgcgcc cgctgccgga accatgggat catttcagag ctgcggggtc
+    13141 acaagaagct ctgcacctac aagaactgca agtgcgccaa gtgtgtcctg atctttgaga
+    13201 ggcagcggat catggccgct caggtaagtt aggatttata tgcacgatga caagcaatcc
+    13261 tttctgctta attggcaatc attgacatta ctatcctcat tttatgatta ctgcccactt
+    13321 gctaacttta atgtcatcta tcctggatgg taaaatcgct atcccaaaaa tagcttttta
+    13381 aaaattcggt gcattcgaat acagaaaatt gcctggttag atccatccat agacatccaa
+    13441 accatccaga ccagatatat tgctctaaca ttcggagact ttattcccag tcctttagaa
+    13501 aatttcttct tgtaaaaaca tattcccttt attagcattt acttaaaatg acatatcaaa
+    13561 tattctcaaa agccaaaagt tttctaaaat aacttcagga tattaatgta taaatgtata
+    13621 agcataaacg taattgtgtt tcatgttgta ttgttcgcaa tggattccgt gatcgatttt
+    13681 tactaggtat aactttgaaa cccaatttaa gcctttcgat tataatttaa cttgattaat
+    13741 gtcactgtta tatttataat ttactaacct gggacgacaa acaaaaacac ctattagcaa
+    13801 ggggagctta aattaacaat agcaccgaaa actccgacat tttcttatat cgtgttttgt
+    13861 ga
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BAB68554.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BAB68554.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BAB68554.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+LOCUS       BAB68554                 141 aa            linear   VRT 11-APR-2002
+DEFINITION  alpha D-globin [Aldabrachelys elephantina].
+ACCESSION   BAB68554
+PID         g15824047
+VERSION     BAB68554.1  GI:15824047
+DBSOURCE    accession AB072353.1
+KEYWORDS    .
+SOURCE      Aldabra giant tortoise.
+  ORGANISM  Aldabrachelys elephantina
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Testudines; Cryptodira; Testudinoidea; Testudinidae; Aldabrachelys.
+REFERENCE   1
+  AUTHORS   Shishikura,F.
+  TITLE     The primary structure of hemoglobin D from the Aldabra giant
+            tortoise, Geochelone gigantea
+  JOURNAL   Zoolog. Sci. 19, 197-206 (2002)
+REFERENCE   2  (residues 1 to 141)
+  AUTHORS   Shishikura,F.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (29-SEP-2001) Fumio Shishikura, Nihon University School
+            of Medicine, Biology; Oyaguchi-kamimachi, 30-1, Itabashi-ku, Tokyo
+            173-8610, Japan (E-mail:fshishi at med.nihon-u.ac.jp,
+            Tel:81-3-3972-8111(ex.2291), Fax:81-3-3972-0027)
+FEATURES             Location/Qualifiers
+     source          1..141
+                     /organism="Aldabrachelys elephantina"
+                     /db_xref="taxon:167804"
+                     /note="synonym:Dipsochelys dussumieri~synonym:Geochelone
+                     gigantea"
+     Protein         1..141
+                     /product="alpha D-globin"
+     CDS             1..141
+                     /coded_by="join(AB072353.1:1..92,AB072353.1:307..511,
+                     AB072353.1:739..>864)"
+                     /note="hemoglobin D"
+ORIGIN      
+        1 mlteddkqli qhvwekvleh qedfgaeale rmfivypstk tyfphfdlhh dseqirhhgk
+       61 kvvgalgdav khidnlsatl selsnlhayn lrvdpvnfkl lshcfqvvlg ahlgreytpq
+      121 vqvaydkfla avsavlaeky r
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BC000007.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BC000007.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BC000007.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,127 @@
+LOCUS       BC000007                 981 bp    mRNA    linear   PRI 09-DEC-2005
+DEFINITION  Homo sapiens px19-like protein, mRNA (cDNA clone MGC:1082
+            IMAGE:3505068), complete cds.
+ACCESSION   BC000007
+VERSION     BC000007.2  GI:33875090
+KEYWORDS    MGC.
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 981)
+  AUTHORS   Strausberg,R.L., Feingold,E.A., Grouse,L.H., Derge,J.G.,
+            Klausner,R.D., Collins,F.S., Wagner,L., Shenmen,C.M., Schuler,G.D.,
+            Altschul,S.F., Zeeberg,B., Buetow,K.H., Schaefer,C.F., Bhat,N.K.,
+            Hopkins,R.F., Jordan,H., Moore,T., Max,S.I., Wang,J., Hsieh,F.,
+            Diatchenko,L., Marusina,K., Farmer,A.A., Rubin,G.M., Hong,L.,
+            Stapleton,M., Soares,M.B., Bonaldo,M.F., Casavant,T.L.,
+            Scheetz,T.E., Brownstein,M.J., Usdin,T.B., Toshiyuki,S.,
+            Carninci,P., Prange,C., Raha,S.S., Loquellano,N.A., Peters,G.J.,
+            Abramson,R.D., Mullahy,S.J., Bosak,S.A., McEwan,P.J.,
+            McKernan,K.J., Malek,J.A., Gunaratne,P.H., Richards,S.,
+            Worley,K.C., Hale,S., Garcia,A.M., Gay,L.J., Hulyk,S.W.,
+            Villalon,D.K., Muzny,D.M., Sodergren,E.J., Lu,X., Gibbs,R.A.,
+            Fahey,J., Helton,E., Ketteman,M., Madan,A., Rodrigues,S.,
+            Sanchez,A., Whiting,M., Madan,A., Young,A.C., Shevchenko,Y.,
+            Bouffard,G.G., Blakesley,R.W., Touchman,J.W., Green,E.D.,
+            Dickson,M.C., Rodriguez,A.C., Grimwood,J., Schmutz,J., Myers,R.M.,
+            Butterfield,Y.S., Krzywinski,M.I., Skalska,U., Smailus,D.E.,
+            Schnerch,A., Schein,J.E., Jones,S.J. and Marra,M.A.
+  CONSRTM   Mammalian Gene Collection Program Team
+  TITLE     Generation and initial analysis of more than 15,000 full-length
+            human and mouse cDNA sequences
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 99 (26), 16899-16903 (2002)
+   PUBMED   12477932
+REFERENCE   2  (bases 1 to 981)
+  CONSRTM   NIH MGC Project
+  TITLE     Direct Submission
+  JOURNAL   Submitted (03-NOV-2000) National Institutes of Health, Mammalian
+            Gene Collection (MGC), Bethesda, MD 20892-2590, USA
+  REMARK    NIH-MGC Project URL: http://mgc.nci.nih.gov
+COMMENT     On Aug 19, 2003 this sequence version replaced gi:12652536.
+            Contact: MGC help desk
+            Email: cgapbs-r at mail.nih.gov
+            Tissue Procurement: ATCC
+            cDNA Library Preparation: Rubin Laboratory
+            cDNA Library Arrayed by: The I.M.A.G.E. Consortium (LLNL)
+            DNA Sequencing by: Institute for Systems Biology
+            http://www.systemsbiology.org
+            contact: amadan at systemsbiology.org
+            Anup Madan, Jessica Fahey, Erin Helton, Mark Ketteman, Anuradha
+            Madan, Stephanie Rodrigues, Amy Sanchez and Michelle Whiting
+            
+            Clone distribution: MGC clone distribution information can be found
+            through the I.M.A.G.E. Consortium/LLNL at: http://image.llnl.gov
+            Series: IRAL Plate: 7 Row: f Column: 3
+            This clone was selected for full length sequencing because it
+            passed the following selection criteria: matched mRNA gi: 31543450.
+            
+            Differences found between this sequence and the human reference
+            genome (build 36) are described in misc_difference features below
+            and these differences were also compared to chimpanzee genomic
+            sequences available as of 09/15/2004.
+FEATURES             Location/Qualifiers
+     source          1..981
+                     /organism="Homo sapiens"
+                     /mol_type="mRNA"
+                     /db_xref="taxon:9606"
+                     /clone="MGC:1082 IMAGE:3505068"
+                     /tissue_type="Placenta, choriocarcinoma"
+                     /clone_lib="NIH_MGC_21"
+                     /lab_host="DH10B-R"
+                     /note="Vector: pOTB7"
+     gene            1..981
+                     /gene="PX19"
+                     /note="synonyms: CGI-106, PRELI"
+                     /db_xref="GeneID:27166"
+                     /db_xref="MIM:605733"
+     CDS             174..833
+                     /gene="PX19"
+                     /codon_start=1
+                     /product="PX19 protein"
+                     /protein_id="AAH00007.1"
+                     /db_xref="GI:12652537"
+                     /db_xref="GeneID:27166"
+                     /db_xref="MIM:605733"
+                     /translation="MVKYFLGQSVLRSSWDQVFAAFWQRYPNPYSKHVLTEDIVHREV
+                     TPDQKLLSRRLLTKTNRMPRWAERLFPANVAHSVYVLEDSIVDPQNQTMTTFTWNINH
+                     ARLMVVEERCVYCVNSDNSGWTEIRREAWVSSSLFGVSRAVQEFGLARFKSNVTKTMK
+                     GFEYILAKLQGEAPSKTLVETAKEAKEKAKETALAATEKAKDLASKAATKKQQQQQQF
+                     V"
+     misc_difference 623
+                     /gene="PX19"
+                     /note="'G' in cDNA is 'A' in the human genome; no amino
+                     acid change.  The chimpanzee genome agrees with the cDNA
+                     sequence, suggesting that this difference is unlikely to
+                     be due to an artifact."
+     misc_difference 878
+                     /gene="PX19"
+                     /note="'C' in cDNA is 'T' in the human genome.  The
+                     chimpanzee genome agrees with the cDNA sequence,
+                     suggesting that this difference is unlikely to be due to
+                     an artifact."
+     misc_difference 925..981
+                     /gene="PX19"
+                     /note="polyA tail: 57 bases do not align to the human
+                     genome."
+ORIGIN      
+        1 ctcatggcgg cggcggcggc ggcggcagct gcttgggcgc ggtgcggtgg tgactgagct
+       61 acgagcctgg cggcgggtgt gcgccgagcc ccggcccggc ccggccctcg cgtgcctccc
+      121 aggctccgca cccctgatgc tgcgcgggtg ctgagcccgc ttcggccggg acgatggtga
+      181 agtatttcct gggccagagc gtgctccgga gttcctggga ccaagtgttc gccgccttct
+      241 ggcagcggta cccgaatccc tatagcaaac atgtcttgac ggaagacata gtacaccggg
+      301 aggtgacccc tgaccagaaa ctgctgtccc ggcgactcct gaccaagacc aacaggatgc
+      361 cacgctgggc cgagcgacta tttcctgcca atgttgctca ctcggtgtac gtcctggagg
+      421 actctattgt ggacccacag aatcagacca tgactacctt cacctggaac atcaaccacg
+      481 cccggctgat ggtggtggag gaacgatgtg tttactgtgt gaactctgac aacagtggct
+      541 ggactgaaat ccgccgggaa gcctgggtct cctctagctt atttggtgtc tccagagctg
+      601 tccaggaatt tggtcttgcc cggttcaaaa gcaacgtgac caagactatg aagggttttg
+      661 aatatatctt ggctaagctg caaggcgagg ccccttccaa aacacttgtt gagacagcca
+      721 aggaagccaa ggagaaggca aaggagacgg cactggcagc tacagagaag gccaaggacc
+      781 tcgccagcaa ggcggccacc aagaagcagc agcagcagca acagtttgtg tagccagtct
+      841 accaccacca cagcacccca gacagctagg cttagcccct ctgccctccc ttcattgtac
+      901 tttatcatta aaaatcaact tccaaaaaaa aaaaaaaaaa aaaaaaaaaa aaaaaaaaaa
+      961 aaaaaaaaaa aaaaaaaaat a
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BEL16-LTR_AG.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BEL16-LTR_AG.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BEL16-LTR_AG.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+ID   BEL16-LTR_AG repbase; DNA   ; ANG   ; 287 BP.
+XX
+AC   .
+XX
+DT   03-APR-2003 (Rel. 8.03, Created)
+DT   03-APR-2003 (Rel. 8.03, Last updated, Version 1)
+XX
+DE   BEL16-LTR_AG is a long terminal repeat of the BEL16_AG LTR
+DE   retrotransposon - a consensus sequence.
+XX
+KW   5-bp TSD; BEL16-I_AG; BEL16-LTR_AG; BEL16_AG; Bel clade;
+KW   LTR retrotransposon; reverse transcriptase.
+XX
+OS   Anopheles gambiae str. PEST
+OC   Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera;
+OC   Endopterygota; Diptera; Nematocera; Culicoidea; Culicidae; Anophelinae;
+OC   Anopheles; Anopheles gambiae.
+XX
+RN   [1]
+RP   1-287
+RA   Kapitonov V.V., Pavlicek A., Jurka J.;
+RT   "BEL16_AG, a nonautonomous family of Bel/Pao-like LTR retrotransposons
+RT   from African malaria mosquito.";
+RL   Repbase Reports 3(3), 40-40 (2003).
+XX
+CC   [1] (Consensus)
+XX
+CC   BEL16-LTR_AG flank an internal portion of BEL16_AG (deposited as
+CC   BEL16-I_AG).
+XX
+SQ   Sequence 287 BP; 85 A; 51 C; 75 G; 76 T; 0 others;
+     tgttggaatg taagggttat gaaacggtca ttttgaattg tttgcggttg ttttgtcagt        60
+     tgggaattaa aagttaaatg tattttctgg cagcactgcc gatcgacaat ttgtgattaa       120
+     gtatgtgtgc gaataaagcg gcactagcgc atgaaactcg atacgagccg gacgtgttct       180
+     ttactttgtc tcctttggcg atcgaagacg acacaacaaa acacaacgta gggcgtagag       240
+     gcgtcaaggg ggaaaggaac caacaaacca tgttccagaa cgcaaca                     287
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BK000016-tpa.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BK000016-tpa.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BK000016-tpa.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+LOCUS       BK000016                1162 bp    mRNA    linear   ROD 17-MAY-2002
+DEFINITION  TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.
+ACCESSION   BK000016
+VERSION     BK000016.1  GI:20043254
+KEYWORDS    Third Party Annotation; TPA.
+SOURCE      Mus musculus (house mouse)
+  ORGANISM  Mus musculus
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Mus.
+REFERENCE   1  (bases 1 to 1162)
+  AUTHORS   Zhou,B., Westaway,S.K., Levinson,B., Johnson,M.A., Gitschier,J. and
+            Hayflick,S.J.
+  TITLE     A novel pantothenate kinase gene (PANK2) is defective in
+            Hallervorden-Spatz syndrome
+  JOURNAL   Nat. Genet. 28 (4), 345-349 (2001)
+  MEDLINE   21372465
+   PUBMED   11479594
+REFERENCE   2  (bases 1 to 1162)
+  AUTHORS   Zhou,B., Westaway,S.K., Levinson,B., Johnson,M.A., Gitschier,J. and
+            Hayflick,S.J.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (31-JUL-2001) Medicine and HHMI, University of
+            California, San Francisco, 3rd and Parnassus Avenues, San
+            Francisco, CA 94143, USA
+PRIMARY     TPA_SPAN            PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP
+            1-455               BE554781.1         1-455
+            177-687             BG146074.1         1-511
+            608-956             AA671765.1         45-393
+            656-1162            BE982078.1         1-507               c
+FEATURES             Location/Qualifiers
+     source          1..1162
+                     /organism="Mus musculus"
+                     /mol_type="mRNA"
+                     /db_xref="taxon:10090"
+     CDS             <1..1119
+                     /note="PANK4"
+                     /codon_start=1
+                     /product="pantothenate kinase 4"
+                     /protein_id="DAA00010.1"
+                     /db_xref="GI:20043255"
+                     /translation="ERGASGGGSGGDSLDKSITLPPDEIFRNLENAKRFAIDIGGSLT
+                     KLAYYSTVQHKVAKVRSFDHPGKDVEQDHEPPYEISVQEEITARLHFIKFENTYMEAC
+                     LDFIRDHLVNTETKVIQATGGGAYKFKDLIEEKLRLKVDKEDVMTCLIKGCNFVLKNI
+                     PHEAFMYQKDADPEFRFQTNHPNIFPYLLVNIGSGVSIVKVETEDRFEWIGGSSIGGG
+                     TFWGLGALLTKTKKFDELLQLASRGRHANVDMLVQDIYGGAHQTLGLSGNLIASSFGK
+                     SATADREFSKEDMAKSLLHMISNDIGQLACLYAKLHGLDRVYFGGFFIRGHPVTMRTI
+                     TYSINFFSKGEVQALFLRHEGYLGAIGAFLKGAEQDSE"
+BASE COUNT      290 a    308 c    332 g    232 t
+ORIGIN      
+        1 gagcgtggag cgagtggcgg cgggagcggc ggggacagtc tggacaagag catcacgctg
+       61 ccccccgacg agatcttccg caacctggag aacgccaagc gcttcgccat tgatataggt
+      121 ggatcattga ccaagttggc atactattcc accgtacagc acaaagtggc caaagtgaga
+      181 tcttttgacc acccgggaaa ggacgtggag caggatcatg agccacccta tgagatctca
+      241 gtccaggagg agatcacagc tcgcctgcat ttcatcaagt ttgagaatac ctacatggaa
+      301 gcctgcctgg acttcatcag agaccaccta gtcaacactg agaccaaggt catccaggcc
+      361 actgggggtg gagcctataa gttcaaggac ctcatcgagg aaaagctgcg tctgaaggtg
+      421 gacaaagagg atgtaatgac ctgcttgata aaggggtgca acttcgtgct gaagaacatc
+      481 ccgcatgagg ccttcatgta ccagaaagac gcagacccag agtttcgatt tcagacaaat
+      541 caccccaaca tcttccccta cctcctagtc aacattggct ctggcgtctc catcgtgaag
+      601 gtggagacag aggaccggtt tgagtggatt ggtggaagct ccattggagg aggcaccttc
+      661 tggggcctcg gggctctgct caccaaaaca aagaagtttg atgagctgct gcagctggct
+      721 tccagaggcc ggcatgccaa cgttgacatg ctggtccagg acatctatgg tggggcccac
+      781 cagaccctgg gcctgagtgg caatctcatc gcaagcagtt ttgggaagtc agccactgct
+      841 gacagagagt tctccaaaga agacatggcc aagagcctgc tgcacatgat cagcaatgac
+      901 atcgggcagc tcgcctgtct gtacgccaag ctccacggct tggacagggt ctactttggg
+      961 ggcttcttca tccggggtca ccccgtgacc atgcgcacaa tcacctacag cattaacttc
+     1021 ttctccaagg gtgaagtcca ggcactcttc ctgagacatg aaggctacct gggagccatc
+     1081 ggggcatttt taaaaggagc cgagcaagac agtgagtaga gtcactgctg tgagcagtgg
+     1141 ctggctgtgc aggacgcggc cg
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BLOSUM50
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BLOSUM50	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BLOSUM50	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum50.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 50
+#  Entropy =   0.4808, Expected =  -0.3573
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  5 -2 -1 -2 -1 -1 -1  0 -2 -1 -2 -1 -1 -3 -1  1  0 -3 -2  0 -2 -1 -1 -5 
+R -2  7 -1 -2 -4  1  0 -3  0 -4 -3  3 -2 -3 -3 -1 -1 -3 -1 -3 -1  0 -1 -5 
+N -1 -1  7  2 -2  0  0  0  1 -3 -4  0 -2 -4 -2  1  0 -4 -2 -3  4  0 -1 -5 
+D -2 -2  2  8 -4  0  2 -1 -1 -4 -4 -1 -4 -5 -1  0 -1 -5 -3 -4  5  1 -1 -5 
+C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2 -5 
+Q -1  1  0  0 -3  7  2 -2  1 -3 -2  2  0 -4 -1  0 -1 -1 -1 -3  0  4 -1 -5 
+E -1  0  0  2 -3  2  6 -3  0 -4 -3  1 -2 -3 -1 -1 -1 -3 -2 -3  1  5 -1 -5 
+G  0 -3  0 -1 -3 -2 -3  8 -2 -4 -4 -2 -3 -4 -2  0 -2 -3 -3 -4 -1 -2 -2 -5 
+H -2  0  1 -1 -3  1  0 -2 10 -4 -3  0 -1 -1 -2 -1 -2 -3  2 -4  0  0 -1 -5 
+I -1 -4 -3 -4 -2 -3 -4 -4 -4  5  2 -3  2  0 -3 -3 -1 -3 -1  4 -4 -3 -1 -5 
+L -2 -3 -4 -4 -2 -2 -3 -4 -3  2  5 -3  3  1 -4 -3 -1 -2 -1  1 -4 -3 -1 -5 
+K -1  3  0 -1 -3  2  1 -2  0 -3 -3  6 -2 -4 -1  0 -1 -3 -2 -3  0  1 -1 -5 
+M -1 -2 -2 -4 -2  0 -2 -3 -1  2  3 -2  7  0 -3 -2 -1 -1  0  1 -3 -1 -1 -5 
+F -3 -3 -4 -5 -2 -4 -3 -4 -1  0  1 -4  0  8 -4 -3 -2  1  4 -1 -4 -4 -2 -5 
+P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2 -5 
+S  1 -1  1  0 -1  0 -1  0 -1 -3 -3  0 -2 -3 -1  5  2 -4 -2 -2  0  0 -1 -5 
+T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  2  5 -3 -2  0  0 -1  0 -5 
+W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1  1 -4 -4 -3 15  2 -3 -5 -2 -3 -5 
+Y -2 -1 -2 -3 -3 -1 -2 -3  2 -1 -1 -2  0  4 -3 -2 -2  2  8 -1 -3 -2 -1 -5 
+V  0 -3 -3 -4 -1 -3 -3 -4 -4  4  1 -3  1 -1 -3 -2  0 -3 -1  5 -4 -3 -1 -5 
+B -2 -1  4  5 -3  0  1 -1  0 -4 -4  0 -3 -4 -2  0  0 -5 -3 -4  5  2 -1 -5 
+Z -1  0  0  1 -3  4  5 -2  0 -3 -3  1 -1 -4 -1  0 -1 -2 -2 -3  2  5 -1 -5 
+X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1  0 -3 -1 -1 -1 -1 -1 -5 
+* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5  1 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/BN000066-tpa.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/BN000066-tpa.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/BN000066-tpa.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,263 @@
+
+ID   AGA000066  standard; DNA; INV; 5195 BP.
+XX
+AC   BN000066;
+XX
+SV   BN000066.1
+XX
+DT   07-OCT-2002 (Rel. 73, Created)
+DT   21-NOV-2002 (Rel. 73, Last updated, Version 11)
+XX
+DE   TPA: Anopheles gambiae achE1 gene for acetylcholinesterase, exons 1-6
+XX
+KW   acetylcholinesterase; achE1 gene; Third Party Annotation; TPA.
+XX
+OS   Anopheles gambiae (African malaria mosquito)
+OC   Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera;
+OC   Endopterygota; Diptera; Nematocera; Culicoidea; Anopheles.
+XX
+RN   [1]
+RP   1-5195
+RA   Fort P.P.;
+RT   ;
+RL   Submitted (30-NOV-2001) to the EMBL/GenBank/DDBJ databases.
+RL   Fort P.P., CRBM-UPR1086, Cnrs, 1919 route de Mende, 34293 MONTPELLIER CEDEX
+RL   5, FRANCE.
+XX
+RN   [3]
+RA   Weill M., Fort P., Berthomi eu A., Dubois M.P., Pasteur N., Raymond M.;
+RT   "A novel acetylcholinesterase gene in mosquitoes codes for the insecticide
+RT   target and is non-homologous to the ace gene in Drosophila";
+RL   Proc. R. Soc. Lond., B, Biol. Sci. 269:2007-2016(2002).
+XX
+CC   see also AJ488492 for achE-1 from Kisumu strain
+CC   Third Party Annotation Database: This TPA record uses Anopheles gambiae
+CC   trace archive data (http://trace.ensembl.org)
+XX
+AH   TPA_SPAN        PRIMARY_IDENTIFIER   PRIMARY_SPAN   COMP
+AS   1-685           TI55475028           11-685             
+AS   1-747           TI47553499           11-747             
+AS   1-800           TI55407518           11-800             
+AS   188-895         TI57630934           11-707             
+AS   223-845         TI57283198           11-622             
+AS   251-895         TI47098900           11-644             
+AS   272-999         TI56725015           11-727             
+AS   387-1193        TI57310066           11-806             
+AS   431-1195        TI56845856           11-764             
+AS   522-1220        TI55215818           11-698             
+AS   546-1208        TI57401582           11-661             
+AS   566-1208        TI56169014           11-642             
+AS   584-1306        TI59761948           11-722             
+AS   738-1306        TI55852606           11-568             
+AS   827-1634        TI57602219           11-807             
+AS   1064-1769       TI44966390           11-705             
+AS   1065-1822       TI60742945           11-757             
+AS   1129-1954       TI56042882           11-825             
+AS   1223-1979       TI60461178           11-756             
+AS   1226-1868       TI56472916           11-642             
+AS   1296-1989       TI44952509           11-693             
+AS   1364-2170       TI56006358           11-806             
+AS   1561-2341       TI58704241           11-780             
+AS   1582-2385       TI44875600           11-803             
+AS   1740-2415       TI55894404           11-675             
+AS   1794-2585       TI56056818           11-791             
+AS   1834-2644       TI47256770           11-809             
+AS   1848-2729       TI56446066           11-879             
+AS   1849-2515       TI44984669           11-665             
+AS   2216-3002       TI57417259           11-786             
+AS   2224-3029       TI56867244           11-805             
+AS   2226-3057       TI59803037           11-831             
+AS   2299-3083       TI60503347           11-784             
+AS   2614-3351       TI44891398           11-736             
+AS   2654-3419       TI55336738           11-765             
+AS   2663-3304       TI60709914           11-641             
+AS   2672-3417       TI56453591           11-745             
+AS   2691-3260       TI55690247           11-569             
+AS   2697-3494       TI55745954           11-797             
+AS   2885-3706       TI58733608           11-821             
+AS   2896-3700       TI58130218           11-803             
+AS   2946-3741       TI56432498           11-795             
+AS   2992-3702       TI47561255           11-710             
+AS   2992-3702       TI55208429           11-710             
+AS   3029-3807       TI47256418           11-778             
+AS   3030-3831       TI47233061           11-801             
+AS   3123-3892       TI55870944           11-769             
+AS   3255-4043       TI47465031           11-787             
+AS   3325-4045       TI55382589           11-720             
+AS   3535-4255       TI46993841           11-720             
+AS   3574-4244       TI56607077           11-670             
+AS   3596-4394       TI56526265           11-798             
+AS   3704-4484       TI56050293           11-780             
+AS   3724-4560       TI59807029           11-836             
+AS   3745-4549       TI55159438           11-804             
+AS   3753-4596       TI56586965           11-842             
+AS   3800-4577       TI56592754           11-777             
+AS   3833-4635       TI47243257           11-801             
+AS   3837-4272       TI59628661           11-435             
+AS   3899-4728       TI56895999           11-829             
+AS   3938-4704       TI59830483           11-766             
+AS   3942-4704       TI55700050           11-762             
+AS   4116-4912       TI56961976           11-796             
+AS   4137-4761       TI46929238           11-623             
+AS   4148-4961       TI59909853           11-813             
+AS   4285-5046       TI47085450           11-759             
+AS   4315-5069       TI58096045           11-754             
+AS   4387-5177       TI58084392           11-790             
+AS   4455-5175       TI55094688           11-720             
+AS   4455-5195       TI55297322           11-740             
+AS   4491-5195       TI56990277           11-704             
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..5195
+FT                   /db_xref="taxon:7165"
+FT                   /organism="Anopheles gambiae"
+FT   CDS             join(<1780..1914,2001..2538,2618..2815,2902..3609,
+FT                   3676..3848,3956..4077)
+FT                   /codon_start=3
+FT                   /gene="achE1"
+FT                   /product="acetylcholinesterase"
+FT                   /EC_number="3.1.1.7"
+FT                   /function="hydrolyzes acetylcholine at cholinergic
+FT                   synapses"
+FT                   /protein_id="CAD29865.2"
+FT                   /translation="AFFTPYIGHGESVRIIDAELGTLEHVHSGATPRRRGLTRRESNSD
+FT                   ANDNDPLVVNTDKGRIRGITVDAPSGKKVDVWLGIPYAQPPVGPLRFRHPRPAEKWTGV
+FT                   LNTTTPPNSCVQIVDTVFGDFPGATMWNPNTPLSEDCLYINVVAPRPRPKNAAVMLWIF
+FT                   GGGFYSGTATLDVYDHRALASEENVIVVSLQYRVASLGFLFLGTPEAPGNAGLFDQNLA
+FT                   LRWVRDNIHRFGGDPSRVTLFGESAGAVSVSLHLLSALSRDLFQRAILQSGSPTAPWAL
+FT                   VSREEATLRALRLAEAVGCPHEPSKLSDAVECLRGKDPHVLVNNEWGTLGICEFPFVPV
+FT                   VDGAFLDETPQRSLASGRFKKTEILTGSNTEEGYYFIIYYLTELLRKEEGVTVTREEFL
+FT                   QAVRELNPYVNGAARQAIVFEYTDWTEPDNPNSNRDALDKMVGDYHFTCNVNEFAQRYA
+FT                   EEGNNVYMYLYTHRSKGNPWPRWTGVMHGDEINYVFGEPLNPTLGYTEDEKDFSRKIMR
+FT                   YWSNFAKTGNPNPNTASSEFPEWPKHTAHGRHYLELGLNTSFVGRGPRLRQCAFWKKYL
+FT                   PQLVAATSNLPGPAPPSEPCESSAFFYRPDLIVLLVSLLTATVRFIQ"
+FT   mRNA            join(<1780..1914,2001..2538,2618..2815,2902..3609,
+FT                   3676..3848,3956..>4596)
+FT                   /gene="achE1"
+FT   exon            <1780..1914
+FT                   /number=1
+FT                   /gene="achE1"
+FT   intron          1915..2000
+FT                   /number=1
+FT                   /gene="achE1"
+FT   exon            2001..2538
+FT                   /number=2
+FT                   /gene="achE1"
+FT   intron          2539..2617
+FT                   /number=2
+FT                   /gene="achE1"
+FT   exon            2618..2815
+FT                   /number=3
+FT                   /gene="achE1"
+FT   intron          2816..2901
+FT                   /number=3
+FT                   /gene="achE1"
+FT   exon            2902..3609
+FT                   /number=4
+FT                   /gene="achE1"
+FT   intron          3610..3675
+FT                   /number=4
+FT                   /gene="achE1"
+FT   exon            3676..3848
+FT                   /number=5
+FT                   /gene="achE1"
+FT   intron          3849..3955
+FT                   /number=5
+FT                   /gene="achE1"
+FT   exon            3956..>4596
+FT                   /number=6
+FT                   /gene="achE1"
+FT   polyA_signal    4628..4633
+FT                   /gene="achE1"
+XX
+SQ   Sequence 5195 BP; 1326 A; 1468 C; 1183 G; 1218 T; 0 other;
+     gaggtggatt ggtacggatt ggtcatttcc gttcttcttc atgtgcgttt cttactctcc        60
+     tgccttctca aacgaacttc agaacgaaaa aaaacacgcg acggagagta agaagctgta       120
+     cagacactct agtcctcaca cacacaactt gcttactttg tccgtccgtt tgattccgct       180
+     ctttctatgt gtgactttct ggcacccttt acttcgtcac tattcatttc atttccaata       240
+     aacttttaat gtgtctttct tttttattct aaatatctat agtaaatgtt ctgtagcaag       300
+     tatcttgtag tagaattgta tagaagtaga tttttgtatg agtttgcatc atcccttccc       360
+     aatggggttg actccgtttc aaccaacgcc aaaagctatc ggcataaagt atggttcctt       420
+     gcaaaggctt ttatgaaaca cgaatgtgtt gaaagctttt gcaaatggaa atgttaaagc       480
+     ctttaagttc caatcgcttt ttgtatccat ttagtttgca tgaacaacag gaaatcaaaa       540
+     tattggtaac gacaatcgct ggcgggcgtt cctttcttgt ctaatcaaat catctacgat       600
+     tgtaattaca aacttccaag tttgcgtatg acaatgttaa atgtctaaga cgctcaaatg       660
+     caaccaatag agtataatta ctaaggcggg cagtagaaac caaaatatct taaataatgt       720
+     caagcaaaac aaaaagaaca attccgttca ctgctcaaag aaagccctaa ctaactacct       780
+     aaccttttca tcgatgaccc tgtactgaca tggtaagata ttctttatcc tttaactctt       840
+     ctgcacccta cgcactcaat gcaacacacg cactactatt actgctacta ctctcgcact       900
+     cacgagcacc tacttgcact caagccggca ctcaatgtac tagcgaaaca cgtcgcatct       960
+     aagcactcac aaggaagcac acatttgcaa atagcaccta ccggaacagc tttgaatgtg      1020
+     ccagcacagc attgaacagg ttcgcgcctt tactcctgtg ctctgttttc tcgatcggaa      1080
+     tgttcgaaag ttgaaaagcg cattttttca tctctctttt tctattcttc ttcgtatttt      1140
+     tatccctctc tcgtcgtgtt ttttctaaac attaccatac ttcttccgct acgaactcgc      1200
+     caagaaccag aacgcagcgt gcgtgcggtg cttgcggtgt gtgtgtgtgt gtgtgtattc      1260
+     cacggctgcg agaagcaaga tcggagaaca ggcatcattc ccctttcaca gacaattgca      1320
+     cttttgtact agaacagaaa acgagacagc ataatttcca acagcctcat tcactcatac      1380
+     caggctcaca ccgactttta accgaaacat gtactacaga aacaaaaaca aacaatatgg      1440
+     agagtgctcg cgctgatact aagttaatat gaagagatta ctggcgaggt catcgatccc      1500
+     atcccgacat catcgctcca ggctccagac ctaccaagtc gcctaccatt acctacccac      1560
+     caccgaccac tactcacaca gcattatcac ttccgccgcc gtcgccgccg ccgccgacgc      1620
+     cgccgacgcc accaccttca caccgccctg ccaaaatgaa tgcgcattgt tgcgatagat      1680
+     tgaatttcct tggttgttgt tgttgttggt tttcttttga catgtttgtg tgttgttttt      1740
+     tctttctctc tctctctttc tgtggttcca acatttcaga cgcatttttt acaccatata      1800
+     taggtcacgg tgagtccgta cgaattatag atgccgagtt gggcacgctc gagcatgtcc      1860
+     acagtggagc aacgccgcgg cgacgcggcc tgacgaggcg cgagtcaaac tcgggtaagt      1920
+     acgcgattgg aagtgggggg acgtttaccc taccgtgtac tactacaacg cactttaccc      1980
+     ccacgcacac gcaccggcag acgcgaacga caacgatccg ctggtggtca acacggataa      2040
+     ggggcgcatc cgcggcatta cggtcgatgc gcccagcggc aagaaggtgg acgtgtggct      2100
+     cggcattccc tacgcccagc cgccggtcgg gccgctacgg ttccgtcatc cgcggccggc      2160
+     cgaaaagtgg accggcgtgc tgaacacgac cacaccgccc aacagctgcg tgcagatcgt      2220
+     ggacaccgtg ttcggcgact tcccgggcgc gaccatgtgg aacccgaaca cgcccctgtc      2280
+     cgaggactgt ctgtacatta acgtggtggc accgcgaccc cggcccaaga atgcggccgt      2340
+     catgctgtgg atcttcggcg gcggcttcta ctccggcacc gccaccctgg acgtgtacga      2400
+     ccaccgggcg cttgcgtcgg aggagaacgt gatcgtggtg tcgctgcagt accgcgtggc      2460
+     cagtctgggc ttcctgtttc tcggcacccc ggaagcgccg ggcaatgcgg gactgttcga      2520
+     tcagaacctt gcgctacggt aggtgtcttt gcatgtgtga atgagggtat agtattctaa      2580
+     cgaggtgctc ttcttcccat cacttcttgg gagtcagctg ggtgcgggac aacattcacc      2640
+     ggttcggtgg cgatccgtcg cgtgtgacac tgttcggcga gagtgccggt gccgtctcgg      2700
+     tgtcgctgca tctgctgtcc gccctttccc gcgatctgtt ccagcgggcc atcctgcaga      2760
+     gcggctcgcc gacggcaccg tgggcattgg tatcgcgcga ggaagccaca ctaaggtacg      2820
+     tgccagctgc tgctttcccc aaaccaccaa cccgcaacag ctcacacaac cctcttttcc      2880
+     gtcgctcttt tctcgctcca gagcactgcg gttggccgag gcggtcggct gcccgcacga      2940
+     accgagcaag ctgagcgatg cggtcgagtg cctgcgcggc aaggacccgc acgtgctggt      3000
+     caacaacgag tggggcacgc tcggcatttg cgagttcccg ttcgtgccgg tggtcgacgg      3060
+     tgcgttcctg gacgagacgc cgcagcgttc gctcgccagc gggcgcttca agaagacgga      3120
+     gatcctcacc ggcagcaaca cggaggaggg ctactacttc atcatctact acctgaccga      3180
+     gctgctgcgc aaggaggagg gcgtgaccgt gacgcgcgag gagttcctgc aggcggtgcg      3240
+     cgagctcaac ccgtacgtga acggggcggc ccggcaggcg atcgtgttcg agtacaccga      3300
+     ctggaccgag ccggacaacc cgaacagcaa ccgggacgcg ctggacaaga tggtgggcga      3360
+     ctatcacttc acctgcaacg tgaacgagtt cgcgcagcgg tacgccgagg agggcaacaa      3420
+     cgtctacatg tatctgtaca cgcaccgcag caaaggcaac ccgtggccgc gctggacggg      3480
+     cgtgatgcac ggcgacgaga tcaactacgt gttcggcgaa ccgctcaacc ccaccctcgg      3540
+     ctacaccgag gacgagaaag actttagccg gaagatcatg cgatactggt ccaactttgc      3600
+     caaaaccggg taagtgtgtg tgtcaaacag cagagtgtcg atcgctctaa caccagcgtc      3660
+     ttctctcttc tacagcaatc caaatcccaa cacggccagc agcgaattcc ccgagtggcc      3720
+     caagcacacc gcccacggac ggcactatct ggagctgggc ctcaacacgt ccttcgtcgg      3780
+     tcggggccca cggttgaggc agtgtgcctt ctggaagaag taccttcccc agctagttgc      3840
+     agctacctgt aagtctcgtg cagcacttga aaccccctcc cacatcccca tcagggtcca      3900
+     ggttgcaata ataaatttca ctttctctct ctcacgtctc ttttccccaa aacagcgaac      3960
+     ctaccagggc cagcaccgcc tagtgaaccg tgcgaaagca gcgcattttt ttaccgacct      4020
+     gatctgatcg tgctgctggt gtcgctgctt acggcgaccg tcagattcat acaataatta      4080
+     ctaccccatc catggcctag ttcgtttaag ctttaagata gtgaggaaca aatttttccc      4140
+     aaacaatttt ccccccttta gagcagaacc gagggagaga taggactaca tagcgaaaag      4200
+     ggaaaacaag tggtggcgga cgaggagaga agaagcaaat cgaataatcg aagcaacaac      4260
+     aacaacaaca aaaaaactgc aaccgggttc actaaaccca gggggcagct cagtagcaaa      4320
+     ctactactta aataactact ttcttatggc aaattatggc aagagcagtc gtgatgggtt      4380
+     cgatcagtat ccatctgacc ggagcagctg aaccgtttca tgggcagttg ctgcaataca      4440
+     ccacgacccg tacacacagt aacacacttt ttatagcttt acactaacaa ccactctccc      4500
+     cacgctcctc ttccccttcc cctccacaca gacagcagcg ccgtttgtag caggatctac      4560
+     taccgtgcgg tttggtatgg cggccaacaa cactaaacac cacacatcta ctaaaacaca      4620
+     ccggaacaat aaacaaatgt taaacttact atatgaatat acatctagac gcatatatac      4680
+     gcatgaacta ctacttccct cgtgttctga caaaacacat taccttgtcc cccctccccc      4740
+     tccggtttgc ttaccaccac tgcaccacca gtatgaattt gttccataat aacgcttcgt      4800
+     aactcgttac caggagcaca actgggtcgt tggcggagtg ctgcgcgttt cgtgctgaag      4860
+     atgtaaacta gcaccgcgca cactttcgac acgcaaccac agctacacat cacgaaagca      4920
+     acatcctggc cctatccgtt ttctcattct taaaacttct ttccttagac caaaaccaac      4980
+     gcaaactagc aaaaggtact tgagtaaccg gtccagtaca cactgtgcta caattgagcg      5040
+     tagggaggag gtataatttc tgcaaaatgt ataaaacaaa actaaaacaa actaattact      5100
+     tgcaatccat tctaaagcac gaaaactcct caaaataaaa acgggaagta aacaaaaaaa      5160
+     tcagaacgaa caaatttacc taaaaaaaag taaac                                 5195
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Bird_Ovomucoids.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Bird_Ovomucoids.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Bird_Ovomucoids.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,226 @@
+#NEXUS 
+
+
+BEGIN DATA;
+DIMENSIONS  NTAX=89 NCHAR=88;
+
+[!Data from:
+
+Laskowski, M., Jr., and W.M. Fitch.  1989.  Evolution of avian ovomucoids and of birds.  Pp. 371-387 in B.  Fernholm, K. Bremer, and H. Jšrnvall (eds.),  The Hierarchy of Life.  Elsevier Press, Amsterdam. 
+
+Thanks to Walter Fitch for supplying the data matrix,   March 1990.]
+FORMAT DATATYPE=PROTEIN  SYMBOLS = " 1 2 3 4"  MISSING=? GAP=-  MATCHCHAR=. ;
+MATRIX
+[                                        10        20        30        40        50        60        70        80       ]
+[                                        .         .         .         .         .         .         .         .        ]
+
+Struthio_camelus                VKYPNTNEEGKEVVLPKILSPIGSDGVYSNELANIEYTNVSK??????FAT--VDDYKPVPLDYMLDSKTSNKNNVVESSGTLRHFGK   [86]
+Rhea_americana                  .............L..E..N.V.T................?.D?????...--...H...S.E..........D.....N...S....   [86]
+Pterocnemia_pennata             .............L..E..N.V.A..................DHD?EV...--...H...S.E..........D.....N...S....   [86]
+Casuarius_casuarius             ........D....L.....N.........DD......A....DHDKEV...--..E....SPE.......N..DS....N...G....   [86]
+Dromaius_novaehollandiae        ........D....L.....N..........D......A..??D?????...--.......S.E.......N..D.....N...G....   [86]
+Nothoprocta_cinerascens         .....A.D.....P...TP...A.NA.FGS....V....I..DHDK?????T-..G...AT.E.F..NQ.A..A....KNV....L..   [87]
+Eudromia_elegans                .R.....D.....P...TP..V.AN....S....V....I?.?????????S-I.G...AT.EFF..NQ....A.A..KNV..N.I.E   [87]
+Pygoscelis_adeliae_f            .TF..........LVT.......T..................DHDKEVI..--.......S.E..............D.N...S....   [86]
+Pygoscelis_adeliae_y            .T...........LVT.......T..................DHDKEVI..--.......S.E..............D.N...S....   [86]
+Spheniscus_humboldti            .T.S.........LIT.......T..................D?DKEVI..--I......S.E..............D.N.I.S....   [86]
+Phalacrocorax_sulcirostris      .S.SK.......ALVT.......T..............KI..DHDKEVI..--.......S.E.............AD.N...S....   [86]
+Anhinga_novaehollandeae         .L.S.........LVT.......T................T.DHDKEVI.S--.......S.E..............D.N...S....   [86]
+Nycticorax_nycticorax           .T.S.A....R..LVT.......A..........M....I..DHDGEVIV.--.......SPEN.V.......D..AD.N...S....   [86]
+Chauna_chavaria                 .R...........L.T.T.....T..................DRDKEAV..--......AT.E....NQ....S...D.N...S....   [86]
+Anseranas_semipalmata           .R...S.......L.T.D...................A....DHDKEAV..--..E...AT.E....NQ........D.N...S....   [86]
+Dendrocygna_arcuata             .RF..........L.T.E...V.................I..D?DKEAV..--......AT.E....N..G......D.N...S....   [86]
+Dendrocygna_autumnalis          .RF..........L.T.D.....................I..DHDKEAV..--......AT.E....N..G......D.N...S....   [86]
+Dendrocygna_eytoni_d            .RF..........L.T.DVI.V............L....I..DHDKEAV..--....R.DT.E....N..G......D.N...S....   [86]
+Dendrocygna_eytoni_e            .RF..........L.T.DVI.V............L....I..DHDKEAV..--..E.R.DT.E....N..G......D.N...S....   [86]
+Dendrocygna_viduata             .RFS.........L.T.E...V.................I..D?D?EAV..--......AT.E....N..G.R....D.N...S....   [86]
+Coscoroba_coscoroba             ..F..........L.T.D.I...T...............I..DHDKEAV..--..G...ATME....N..G......D.N...S....   [86]
+Cygnus_atratus                  .RF..........L.T.D.....T...............I..DHDKEAV..--......ATME....N..G......D.N...S....   [86]
+Goose                           .RF..........L.T.D.....T...............I..DHDKEAV..--......ATVE....N..D......D.N...S....   [86]
+Anser_indicus                   .RF..........L.T.D.A...T...............I..DHDKEAV..--......ATVE....N..D......D.N...S....   [86]
+Branta_canadensis               .RF.......R..L.T.D.....T...............I..DHD???V..--......ATVE....N..G......D.N...S....   [86]
+Cereopsis_novaehollandiae       ..F..........L...DVI.T.T...............I..D?D??AV..--......ARME....N..G......D.N...S....   [86]
+Chloephaga_picta                .RF..........L.T.E.....T...............I..D??KEAV..--..G...ATME....N..G......D.N...S...E   [86]
+Duck                            .RF..........L.T.E...V.T...............I..DHDKEAV..--..G...ATME....N..G......D.N...S...E   [86]
+Anas_platyrhynchos              .RF........D.L.T.E...V.T...............I..DHDKEAV..--..G...ATME....N..G......D.N...S...E   [86]
+Megapodius_freycinet            .R...........LVTQDV?...T....?....G...??I????????LV.--......ST.EDK..NQ....S...D.N...S....   [86]
+Leipoa_ocellata                 IRH..........LVTEDS....T...............I..E?DK??VV.--..G.THAT.ELK..NQ....G..AQ.N...S....   [86]
+Ortalis_vetula                  ...........D.LA.EDPNL.......T-.......???????????..PN-...H..ALQEQKI.N..D..S...D.N...S....   [86]
+Penelope_jacquacu               ...........D.LA.EDP.........T-.........I..ERDKEA..PN-...H..ALQEQK..N..D..S...D.N...S....   [86]
+Penelope_superciliaris          ...........D.LVAEDP....................I..E?DKEA..PN-...H..ALQEQK..N..D..S...D.N...S....   [87]
+Bonasa_umbellus                 .RF........V.LV.EDPR...T.A.....M.......I..EHD???L.AS-..E...ATME.R..N..G........N.N.S...T   [87]
+Tympanuchus_cupido              .RF........D.LVTED.H...T...............I..EHD???L.AS-..E...ATME.R..N..G........N...S....   [87]
+Oreortyx_pictus                 .RF........D.LAT.E.H...T........S......I..EHDTEA..AS-..E...AT.E.R.....A........N...S....   [87]
+Callipepla_squamata_n           .RF........D.LAT.E.H...T........Y......I..EHD??A..AS-..E...DT.E.R..N..A........N...S....   [87]
+Callipepla_squamata_s           .RF........D.LAT.E.H...T........Y......I..EHD??A..AS-..E...DT.E.R..N..AS.......N...S....   [87]
+Lophortyx_californicus          .RF........D.LVT.E.Q...T........Y......I..EHD?EA..AS-..E...AT.E.R..N..A........N...S....   [87]
+Colinus_virginianus             .RF........D.LATEE.H...T....MS.MF......T..EHDTEA..AS-..E...AMSE.R..N..V........N...S....   [87]
+Cyrtonyx_montezumae_l           .RF........D.LVTEEV....T........S..A.?.I.?E?D???..AS-..E...AT.E.VI.N..G........N...S....   [87]
+Cyrtonyx_montezumae_s           .RF........D.LVTEEV....T........S..A.?.I.?E?D???..AS-..E...ATSE.VI.N..G........N...S....   [87]
+Alectoris_chukar                ARF..A.....D..VTED.R...T....T-.........I..EHDGETL.A--..E...AT.E.R.....G........N...S....   [85]
+Alectoris_rufa                  ARF..A.....D..VTED.H...T....T-.........I..EHD???L.A--..E...AT.E.R.....G........N...S....   [85]
+Francolinus_afer                .RF..A....RD..VSEN.R...TH........SM....I..EHDREAP.AS-..E...ATME.RV.NI.G......K.N...S....   [87]
+Francolinus_erckelii            .RF..A.....D.AVSEN.R...T...N-.....M....I??EHD?EAP.AS-..E...ATME.RV.NI.G......K.N...S.K..   [86]
+Francolinus_coqui_v             .RF..A....RD..VSEN.R...T.........SMN...I..E?D?EA???S-..E...GTME.RV.NI.G......K.N...S....   [87]
+Francolinus_coqui_a             .RF..A....RD.AVSEN.R...T.........SMN...I..E?D?EA???S-..E...GTME.RV.NI.G......K.N...S....   [87]
+Francolinus_francolinus_a       ARF........V.LDS.D.I...T..LHDS..S...H.KIK.EHDRE????S-..G...ETAEET..N..R........N........   [87]
+Francolinus_francolinus_v       .RF........V.LDS.D.I...T..LHDS..S...H.KIK.EHDRE????S-..G...ETAEET..N..R........N........   [87]
+Francolinus_pondicerianus       ARFS.A.....D.LVIDDPR.M.T....DS..F.M....I..EHD???LPAS-..E...DTTEER..N..G........N...S....   [87]
+Perdix_perdix                   .RF........D.LVTED.Q...T...............I..EHT???L.AS-..E...ATME.R..N..G..D.....N...N....   [87]
+Coturnix_delegorguei            .RF........DE.V.DE.RF..T....NH.MF.K....I..EQDGET???S-..E...A.K..RV.N..G........N...NR...   [87]
+Coturnix_coturnix_japonica_1    .RF........DE.V.DE.RL..T....NH.MF.K....I..EQDGETL.AS-..E...A.K..RV.N...........N...N....   [87]
+Coturnix_coturnix_japonica_2    .RF........DE.V.DE.RL..T....NH.MF.K....I..EQDGETL.AS-..E...A.K..RV.N..G........N...N....   [87]
+Arborophilia_torqueola          .RF..S.....V..VKEDPR...T.........H..T??I?.?????????S-....M.ATME.RV.N..G........N...S....   [87]
+Bambusicola_thoracica           ARF..A.....V.LDTQE.R...T.......MS......I.IK?DKE?L.AS-..E...ETAEERI.N..G........N....N...   [87]
+Tragopan_satyra                 .RF........D.LVTED.H...T...............I..GHDREAL.AS-..E...ATME.R..N..G........N...S....   [87]
+Tragopan_temmincki              .RF........D.LVTED.R...T...............I..GHD???L.AS-..E...ATME.R..N..G........N...S....   [87]
+Lophophorus_impejanus           .RF..A.....D.LVTED.R...T...............I..EHDREAL.AS-..E...ATME.R..N..G........N...S....   [87]
+Crossoptilon_auritum            .RF........D.LVAED.R...T...............I..ERDGEAL.AS-..E...ATME.R..N..G........N...S....   [87]
+Lophura_edwardsi                .RF........D.LVAED.R...T.......M.......I..ERDGEAL.AS-..E...ATME.R..N..G........N...S....   [87]
+Lophura_ignita                  .RF........D.LVGEDIR...T.......M.......N..ERDGEAL.AS-..E...ATME.R..N.SD........N...S....   [87]
+Gallus_gallus                   ARF..ADK...D.LVN.D.R...T....T.D..S..F..I..EHDKETL.AS-..E...DTAEDR..N..G........N...S....   [87]
+Grey_jungle_fowl                ARF..ADK...D.LVN.D.R...T....T.D..S..F..I..EHDKETL.AS-..E...DTVEDR..N..G........N...S....   [87]
+Phasianus_colchicus             .RF..........LVAED.R.V.T.....S.........I..EHEGEAL.AS-..E...ATME.R..N..G........N...NR..Q   [87]
+Syrmaticus_ellioti              .RF..K.....D.LVAED.H...T...............I..ER?G??L.AS-..E...ATME.R..N..G........N...S....   [87]
+Syrmaticus_reevesii             .RF..K.......LVAED.H...T.....S.........I..ERNGEAL.AS-..E...ATME.R..N..G........N...SR..E   [87]
+Chrysolophus_amherstiae         .RFL.....S.D-LVAED.H...T...............I..EHDG?AL.AS-..E...ATME.R..N..G........N...N....   [86]
+Polyplectron_bicalcaratum       .RF....K...D.LA.EEVR...T.....D.S..RD...I..EHDR?????S-..E.Q.TTTEHRVNNE.G......K.N..VS....   [87]
+Argusianus_argus_argus          .RF........D.LVSEDRH...T.....H..T......I..EHD?EAL.A--..EH..AT.EDR..N.I...D..L..N...S....   [86]
+Pavo_cristatus                  .RF..A.....D.LVSED.H...T.....H.........I..EHDREAL.AS-..E...AT.EHR..N..G........N...S....   [87]
+Afropavo_congensis              .RF........D.SAS.D.R...T.....H.........I..EHDGEAL.AS-..E...ATMEQR..N..G........N...S....   [87]
+Numida_meleagris                .RF..A.....D.LVTED.R...T......D........I.?????EAL.A--..E...ATME.R..N..D........N...S....   [86]
+Acryllium_vulturinum            .RF..A.....D.LVIED.R...T......D........I..EHD???L.A--..E...ATME.R..N..D........N...S....   [86]
+Meleagris_gallopavo             .RF........D.LVTED.R...T...H.-.........I..EHDREAL.AS-..E...AT.E.R..N..G........N...S....   [86]
+Grus_carunculatus               .T...........LVT.......T..................DHDKEAT..--......AT.E..F...........D.N...S....   [86]
+Anthropoides_virgo              .T...........LVT.......T..................DHDKEVT..--......AT.E..F...........D.N...S....   [86]
+Grus_vipio                      IT...........LVT.......T..................DHDKEAT..--......AT.E..F...........D.N...S....   [86]
+Fulica_atra                     .T...........LVT.....V.TN......S..........DYDKEVT..--..G.Q.AS.E.VF.N.....D..AD.N...S....   [86]
+Vanellus_spinosus               .T...........LVT.......T..........L.......DYDKEVI..--......AS................D.N...S..E.   [86]
+Larus_rudibundus                .T...........LAT.A...V....................DYDKEDI..--......AS................D.N...S..E.   [86]
+Turnix_sylvatica                .RF........DT.AD.D.P.........-.M.......I..EHD??T???S-..E...GMMERL..N..ND.......N...N...E   [86]
+Gallirallus_australis           .T.........V.LVT.NI..V.TN...T..S.I...S....DYD???T..??..G.QSA.Q..VF.N........AD.N...S....   [88]
+Geococcyx_californianus         .A...A......ALVTTARLH..T....G.....L.H..I..DYNKEVI.S--.N.....S.L....N..G.....AD.N...S....   [86]
+Dacelo_novaeguineae             .......D.....LVTE......T.R................EHDKEAI..-Q..EH..AT...RI.......D..MD.N...S....   [87]
+Carpococcyx_renauldi            .R...S......GLATT.R....T....G.....L....I..DYD???I..--.......T.ED...NI.H..Y..AH.N..FS....   [86]
+Podargus_strigoides             .T.......S...LVDEV.....T..........L.-..I..DRDK??I..--....Q..MG...............D.N...N....   [85]
+;
+END;
+BEGIN GENETICCode;
+StandardNUCLEAR;
+END;
+
+BEGIN CODONS;
+CODESET * UNTITLED = Universal: all ;
+END;
+
+BEGIN ASSUMPTIONS;
+	OPTIONS  DEFTYPE=unord PolyTcount=MINSTEPS ;
+END; 
+
+BEGIN TREES; 
+
+	TRANSLATE
+		1	Struthio_camelus,
+		2	Rhea_americana,
+		3	Pterocnemia_pennata,
+		4	Casuarius_casuarius,
+		5	Dromaius_novaehollandiae,
+		6	Nothoprocta_cinerascens,
+		7	Eudromia_elegans,
+		8	Pygoscelis_adeliae_f,
+		9	Pygoscelis_adeliae_y,
+		10	Spheniscus_humboldti,
+		11	Phalacrocorax_sulcirostris,
+		12	Anhinga_novaehollandeae,
+		13	Nycticorax_nycticorax,
+		14	Chauna_chavaria,
+		15	Anseranas_semipalmata,
+		16	Dendrocygna_arcuata,
+		17	Dendrocygna_autumnalis,
+		18	Dendrocygna_eytoni_d,
+		19	Dendrocygna_eytoni_e,
+		20	Dendrocygna_viduata,
+		21	Coscoroba_coscoroba,
+		22	Cygnus_atratus,
+		23	Goose,
+		24	Anser_indicus,
+		25	Branta_canadensis,
+		26	Cereopsis_novaehollandiae,
+		27	Chloephaga_picta,
+		28	Duck,
+		29	Anas_platyrhynchos,
+		30	Megapodius_freycinet,
+		31	Leipoa_ocellata,
+		32	Ortalis_vetula,
+		33	Penelope_jacquacu,
+		34	Penelope_superciliaris,
+		35	Bonasa_umbellus,
+		36	Tympanuchus_cupido,
+		37	Oreortyx_pictus,
+		38	Callipepla_squamata_n,
+		39	Callipepla_squamata_s,
+		40	Lophortyx_californicus,
+		41	Colinus_virginianus,
+		42	Cyrtonyx_montezumae_l,
+		43	Cyrtonyx_montezumae_s,
+		44	Alectoris_chukar,
+		45	Alectoris_rufa,
+		46	Francolinus_afer,
+		47	Francolinus_erckelii,
+		48	Francolinus_coqui_v,
+		49	Francolinus_coqui_a,
+		50	Francolinus_francolinus_a,
+		51	Francolinus_francolinus_v,
+		52	Francolinus_pondicerianus,
+		53	Perdix_perdix,
+		54	Coturnix_delegorguei,
+		55	Coturnix_coturnix_japonica_1,
+		56	Coturnix_coturnix_japonica_2,
+		57	Arborophilia_torqueola,
+		58	Bambusicola_thoracica,
+		59	Tragopan_satyra,
+		60	Tragopan_temmincki,
+		61	Lophophorus_impejanus,
+		62	Crossoptilon_auritum,
+		63	Lophura_edwardsi,
+		64	Lophura_ignita,
+		65	Gallus_gallus,
+		66	Grey_jungle_fowl,
+		67	Phasianus_colchicus,
+		68	Syrmaticus_ellioti,
+		69	Syrmaticus_reevesii,
+		70	Chrysolophus_amherstiae,
+		71	Polyplectron_bicalcaratum,
+		72	Argusianus_argus_argus,
+		73	Pavo_cristatus,
+		74	Afropavo_congensis,
+		75	Numida_meleagris,
+		76	Acryllium_vulturinum,
+		77	Meleagris_gallopavo,
+		78	Grus_carunculatus,
+		79	Anthropoides_virgo,
+		80	Grus_vipio,
+		81	Fulica_atra,
+		82	Vanellus_spinosus,
+		83	Larus_rudibundus,
+		84	Turnix_sylvatica,
+		85	Gallirallus_australis,
+		86	Geococcyx_californianus,
+		87	Dacelo_novaeguineae,
+		88	Carpococcyx_renauldi,
+		89	Podargus_strigoides
+	;
+	TREE  * PAUP_1 =  [&R] (1,(((2,3),(4,5)),((((((((((6,7),((30,31),(((32,33),34),(((((((35,57),((((53,67),70),(62,(63,64),(68,69))),(((54,(55,56)),84),74))),(((46,(48,49)),47),71)),((36,59),60),(61,(75,76))),(72,73),77),((44,45),((((50,51),58),52),(65,66)))),(((37,((38,39),40)),41),(42,43)))))),14),15),((((16,20),(18,19)),17),((((21,26),(27,(28,29))),22),((23,24),25)))),(78,79,80)),87),((81,85),(82,83))),(8,9,(10,((11,(86,88)),13),12))),89)));
+
+END;
+
+
+BEGIN NOTES;
+	TEXT  TAXON=26 TEXT= G_removed_from_end_of_sequence;
+END;
+
+BEGIN MACCLADE;
+Version 3.05;
+LastModified -1425155265;
+Singles 1000&/0;
+END;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/D10483.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/D10483.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/D10483.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3971 @@
+LOCUS       ECO110K               111408 bp    DNA     linear   BCT 29-MAY-2002
+DEFINITION  Escherichia coli genomic DNA. (0.0 - 2.4 min).
+ACCESSION   D10483 J01597 J01683 J01706 K01298 K01990 M10420 M10611 M12544
+            V00259 X04711 X54847 X54945 X55034 X56742
+VERSION     D10483.2  GI:21321891
+KEYWORDS    Complete and shotgun sequencing; thrA; thrA1; thrA2; thrB; thrC;
+            yaaA; yaaJ; talB; mog; chlG; dnaK; dnaJ; groP; gef; nhaA; ant;
+            rpsT; yaaC; ileS; lspA; yaaD; lytB; yaaF; dapB; carA; pyrA; carB;
+            yaaV; caiE; caiD; caiC; caiB; caiA; caiT; fixA; fixC; yaaT; yabE;
+            yabF; kefC; trkC; folA; tmrA; apaH; apaG; pdxA; surA; imp; ostA;
+            yabH; yabP; yabQ; hepA; polB; dinA; araD; araA; araB; araC; yabI;
+            yabJ; yabK; tbpA; yabN; yabM; leuD; leuC; leuB; leuA; leuLP; lueO;
+            ilvI; ilvH; brnP; shl; fruR; yabB; yabC; ftsL; pbpB; ftsI; murE;
+            murF; mra; mraY; murX; murD; ftsW; murG; murC; ddl; ddlB; ftsQ;
+            ftsA; divA; ftsZ; sfiB; sulB; lpxC; envA; asmB; yacA; secA; prlD;
+            azi; pea; mutT; yacG; htgA; htpY; yaaI; nhaR; antO; caiF; fixB;
+            yaaU; yabO.
+SOURCE      Escherichia coli
+  ORGANISM  Escherichia coli
+            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
+            Enterobacteriaceae; Escherichia.
+REFERENCE   1
+  AUTHORS   Ohtsubo,H. and Ohtsubo,E.
+  TITLE     Nucleotide sequence of an insertion element, IS1
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 75 (2), 615-619 (1978)
+  MEDLINE   78137003
+   PUBMED   273224
+REFERENCE   2
+  AUTHORS   Smith,B.R. and Schleif,R.
+  TITLE     Nucleotide sequence of the L-arabinose regulatory region of
+            Escherichia coli K12
+  JOURNAL   J. Biol. Chem. 253 (19), 6931-6933 (1978)
+  MEDLINE   79005683
+   PUBMED   357433
+REFERENCE   3
+  AUTHORS   Greenfield,L., Boone,T. and Wilcox,G.
+  TITLE     DNA sequence of the araBAD promoter in Escherichia coli B/r
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 75 (10), 4724-4728 (1978)
+  MEDLINE   79116194
+   PUBMED   368797
+REFERENCE   4
+  AUTHORS   Johnsrud,L.
+  TITLE     DNA sequence of the transposable element IS1
+  JOURNAL   Mol. Gen. Genet. 169 (2), 213-218 (1979)
+  MEDLINE   79177885
+   PUBMED   375010
+REFERENCE   5
+  AUTHORS   Smith,D.R. and Calvo,J.M.
+  TITLE     Nucleotide sequence of the E coli gene coding for dihydrofolate
+            reductase
+  JOURNAL   Nucleic Acids Res. 8 (10), 2255-2274 (1980)
+  MEDLINE   81053692
+   PUBMED   6159575
+REFERENCE   6
+  AUTHORS   Ogden,S., Haggerty,D., Stoner,C.M., Kolodrubetz,D. and Schleif,R.
+  TITLE     The Escherichia coli L-arabinose operon: binding sites of the
+            regulatory proteins and a mechanism of positive and negative
+            regulation
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 77 (6), 3346-3350 (1980)
+  MEDLINE   81013881
+   PUBMED   6251457
+REFERENCE   7
+  AUTHORS   Katinka,M., Cossart,P., Sibilli,L., Saint-Girons,I.,
+            Chalvignac,M.A., Le Bras,G., Cohen,G.N. and Yaniv,M.
+  TITLE     Nucleotide sequence of the thrA gene of Escherichia coli
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 77 (10), 5730-5733 (1980)
+  MEDLINE   81077247
+   PUBMED   7003595
+REFERENCE   8
+  AUTHORS   Miyada,C.G., Horwitz,A.H., Cass,L.G., Timko,J. and Wilcox,G.
+  TITLE     DNA sequence of the araC regulatory gene from Escherichia coli B/r
+  JOURNAL   Nucleic Acids Res. 8 (22), 5267-5274 (1980)
+  MEDLINE   81124262
+   PUBMED   7008027
+REFERENCE   9
+  AUTHORS   Cossart,P., Katinka,M. and Yaniv,M.
+  TITLE     Nucleotide sequence of the thrB gene of E. coli, and its two
+            adjacent regions; the thrAB and thrBC junctions
+  JOURNAL   Nucleic Acids Res. 9 (2), 339-347 (1981)
+  MEDLINE   81150470
+   PUBMED   6259626
+REFERENCE   10
+  AUTHORS   Lee,N.L., Gielow,W.O. and Wallace,R.G.
+  TITLE     Mechanism of araC autoregulation and the domains of two overlapping
+            promoters, Pc and PBAD, in the L-arabinose regulatory region of
+            Escherichia coli
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 78 (2), 752-756 (1981)
+  MEDLINE   81199399
+   PUBMED   6262769
+REFERENCE   11
+  AUTHORS   Mackie,G.A.
+  TITLE     Nucleotide sequence of the gene for ribosomal protein S20 and its
+            flanking regions
+  JOURNAL   J. Biol. Chem. 256 (15), 8177-8182 (1981)
+  MEDLINE   81264207
+   PUBMED   6267039
+REFERENCE   12
+  AUTHORS   Stoner,C.M. and Schleif,R.
+  TITLE     Is the amino acid but not the nucleotide sequence of the
+            Escherichia coli araC gene conserved?
+  JOURNAL   J. Mol. Biol. 154 (4), 649-652 (1982)
+  MEDLINE   82216830
+   PUBMED   6283093
+REFERENCE   13
+  AUTHORS   Gilson,E., Nikaido,H. and Hofnung,M.
+  TITLE     Sequence of the malK gene in E.coli K12
+  JOURNAL   Nucleic Acids Res. 10 (22), 7449-7458 (1982)
+  MEDLINE   83116968
+   PUBMED   6296778
+REFERENCE   14
+  AUTHORS   Parsot,C., Cossart,P., Saint-Girons,I. and Cohen,G.N.
+  TITLE     Nucleotide sequence of thrC and of the transcription termination
+            region of the threonine operon in Escherichia coli K12
+  JOURNAL   Nucleic Acids Res. 11 (21), 7331-7345 (1983)
+  MEDLINE   84069770
+   PUBMED   6316258
+REFERENCE   15
+  AUTHORS   Bardwell,J.C. and Craig,E.A.
+  TITLE     Major heat shock gene of Drosophila and the Escherichia coli
+            heat-inducible dnaK gene are homologous
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 81 (3), 848-852 (1984)
+  MEDLINE   84144800
+   PUBMED   6322174
+REFERENCE   16
+  AUTHORS   Innis,M.A., Tokunaga,M., Williams,M.E., Loranger,J.M., Chang,S.Y.,
+            Chang,S. and Wu,H.C.
+  TITLE     Nucleotide sequence of the Escherichia coli prolipoprotein signal
+            peptidase (lsp) gene
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 81 (12), 3708-3712 (1984)
+  MEDLINE   84222028
+   PUBMED   6374664
+REFERENCE   17
+  AUTHORS   Bouvier,J., Patte,J.C. and Stragier,P.
+  TITLE     Multiple regulatory signals in the control region of the
+            Escherichia coli carAB operon
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 81 (13), 4139-4143 (1984)
+  MEDLINE   84248073
+   PUBMED   6377309
+REFERENCE   18
+  AUTHORS   Bouvier,J., Richaud,C., Richaud,F., Patte,J.C. and Stragier,P.
+  TITLE     Nucleotide sequence and expression of the Escherichia coli dapB
+            gene
+  JOURNAL   J. Biol. Chem. 259 (23), 14829-14834 (1984)
+  MEDLINE   85054974
+   PUBMED   6094578
+REFERENCE   19
+  AUTHORS   Kamio,Y., Lin,C.K., Regue,M. and Wu,H.C.
+  TITLE     Characterization of the ileS-lsp operon in Escherichia coli.
+            Identification of an open reading frame upstream of the ileS gene
+            and potential promoter(s) for the ileS-lsp operon
+  JOURNAL   J. Biol. Chem. 260 (9), 5616-5620 (1985)
+  MEDLINE   85182715
+   PUBMED   2985604
+REFERENCE   20
+  AUTHORS   Friedberg,D., Rosenthal,E.R., Jones,J.W. and Calvo,J.M.
+  TITLE     Characterization of the 3' end of the leucine operon of Salmonella
+            typhimurium
+  JOURNAL   Mol. Gen. Genet. 199 (3), 486-494 (1985)
+  MEDLINE   85295470
+   PUBMED   2993799
+REFERENCE   21
+  AUTHORS   Cowing,D.W., Bardwell,J.C., Craig,E.A., Woolford,C., Hendrix,R.W.
+            and Gross,C.A.
+  TITLE     Consensus sequence for Escherichia coli heat shock gene promoters
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 82 (9), 2679-2683 (1985)
+  MEDLINE   85190560
+   PUBMED   3887408
+REFERENCE   22
+  AUTHORS   Chong,P., Hui,I., Loo,T. and Gillam,S.
+  TITLE     Structural analysis of a new GC-specific insertion element IS186
+  JOURNAL   FEBS Lett. 192 (1), 47-52 (1985)
+  MEDLINE   86030702
+   PUBMED   2996940
+REFERENCE   23
+  AUTHORS   Birnbaum,M.J., Haspel,H.C. and Rosen,O.M.
+  TITLE     Cloning and characterization of a cDNA encoding the rat brain
+            glucose-transporter protein
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 83 (16), 5784-5788 (1986)
+  MEDLINE   86287338
+   PUBMED   3016720
+REFERENCE   24
+  AUTHORS   Sekiguchi,T., Ortega-Cesena,J., Nosoh,Y., Ohashi,S., Tsuda,K. and
+            Kanaya,S.
+  TITLE     DNA and amino-acid sequences of 3-isopropylmalate dehydrogenase of
+            Bacillus coagulans. Comparison with the enzymes of Saccharomyces
+            cerevisiae and Thermus thermophilus
+  JOURNAL   Biochim. Biophys. Acta 867, 36-44 (1986)
+REFERENCE   25
+  AUTHORS   Ohki,M., Tamura,F., Nishimura,S. and Uchida,H.
+  TITLE     Nucleotide sequence of the Escherichia coli dnaJ gene and
+            purification of the gene product
+  JOURNAL   J. Biol. Chem. 261 (4), 1778-1781 (1986)
+  MEDLINE   86111849
+   PUBMED   3003084
+REFERENCE   26
+  AUTHORS   Mackie,G.A.
+  TITLE     Structure of the DNA distal to the gene for ribosomal protein S20
+            in Escherichia coli K12: presence of a strong terminator and an IS1
+            element
+  JOURNAL   Nucleic Acids Res. 14 (17), 6965-6981 (1986)
+  MEDLINE   87016337
+   PUBMED   2429258
+REFERENCE   27
+  AUTHORS   Blanchin-Roland,S., Blanquet,S., Schmitter,J.M. and Fayat,G.
+  TITLE     The gene for Escherichia coli diadenosine tetraphosphatase is
+            located immediately clockwise to folA and forms an operon with ksgA
+  JOURNAL   Mol. Gen. Genet. 205 (3), 515-522 (1986)
+  MEDLINE   87172305
+   PUBMED   3031429
+REFERENCE   28
+  AUTHORS   Lee,N., Gielow,W., Martin,R., Hamilton,E. and Fowler,A.
+  TITLE     The organization of the araBAD operon of Escherichia coli
+  JOURNAL   Gene 47 (2-3), 231-244 (1986)
+  MEDLINE   87163495
+   PUBMED   3549454
+REFERENCE   29
+  AUTHORS   Gronger,P., Manian,S.S., Reilander,H., O'Connell,M., Priefer,U.B.
+            and Puhler,A.
+  TITLE     Organization and partial sequence of a DNA region of the Rhizobium
+            leguminosarum symbiotic plasmid pRL6JI containing the genes fixABC,
+            nifA, nifB and a novel open reading frame
+  JOURNAL   Nucleic Acids Res. 15 (1), 31-49 (1987)
+  MEDLINE   87146339
+   PUBMED   3029674
+REFERENCE   30
+  AUTHORS   Nonet,M.L., Marvel,C.C. and Tolan,D.R.
+  TITLE     The hisT-purF region of the Escherichia coli K-12 chromosome.
+            Identification of additional genes of the hisT and purF operons
+  JOURNAL   J. Biol. Chem. 262 (25), 12209-12217 (1987)
+  MEDLINE   87308226
+   PUBMED   3040734
+REFERENCE   31
+  AUTHORS   Karpel,R., Olami,Y., Taglicht,D., Schuldiner,S. and Padan,E.
+  TITLE     Sequencing of the gene ant which affects the Na+/H+ antiporter
+            activity in Escherichia coli
+  JOURNAL   J. Biol. Chem. 263 (21), 10408-10414 (1988)
+  MEDLINE   88273145
+   PUBMED   2839489
+REFERENCE   32
+  AUTHORS   Jaiswal,A.K., McBride,O.W., Adesnik,M. and Nebert,D.W.
+  TITLE     Human dioxin-inducible cytosolic NAD(P)H:menadione oxidoreductase.
+            cDNA sequence and localization of gene to chromosome 16
+  JOURNAL   J. Biol. Chem. 263 (27), 13572-13578 (1988)
+  MEDLINE   88330879
+   PUBMED   2843525
+REFERENCE   33
+  AUTHORS   Lozoya,E., Hoffmann,H., Douglas,C., Schulz,W., Scheel,D. and
+            Hahlbrock,K.
+  TITLE     Primary structures and catalytic properties of isoenzymes encoded
+            by the two 4-coumarate: CoA ligase genes in parsley
+  JOURNAL   Eur. J. Biochem. 176 (3), 661-667 (1988)
+  MEDLINE   89005119
+   PUBMED   3169018
+REFERENCE   34
+  AUTHORS   Sato,S., Nakada,Y. and Shiratsuchi,A.
+  TITLE     IS421, a new insertion sequence in Escherichia coli
+  JOURNAL   FEBS Lett. 249 (1), 21-26 (1989)
+  MEDLINE   89252065
+   PUBMED   2542093
+REFERENCE   35
+  AUTHORS   Roa,B.B., Connolly,D.M. and Winkler,M.E.
+  TITLE     Overlap between pdxA and ksgA in the complex pdxA-ksgA-apaG-apaH
+            operon of Escherichia coli K-12
+  JOURNAL   J. Bacteriol. 171 (9), 4767-4777 (1989)
+  MEDLINE   89359108
+   PUBMED   2670894
+REFERENCE   36
+  AUTHORS   Matsubara,Y., Indo,Y., Naito,E., Ozasa,H., Glassberg,R.,
+            Vockley,J., Ikeda,Y., Kraus,J. and Tanaka,K.
+  TITLE     Molecular cloning and nucleotide sequence of cDNAs encoding the
+            precursors of rat long chain acyl-coenzyme A, short chain
+            acyl-coenzyme A, and isovaleryl-coenzyme A dehydrogenases. Sequence
+            homology of four enzymes of the acyl-CoA dehydrogenase family
+  JOURNAL   J. Biol. Chem. 264 (27), 16321-16331 (1989)
+  MEDLINE   89380240
+   PUBMED   2777793
+REFERENCE   37
+  AUTHORS   Minami-Ishii,N., Taketani,S., Osumi,T. and Hashimoto,T.
+  TITLE     Molecular cloning and sequence analysis of the cDNA for rat
+            mitochondrial enoyl-CoA hydratase. Structural and evolutionary
+            relationships linked to the bifunctional enzyme of the peroxisomal
+            beta-oxidation system
+  JOURNAL   Eur. J. Biochem. 185 (1), 73-78 (1989)
+  MEDLINE   90032688
+   PUBMED   2806264
+REFERENCE   38
+  AUTHORS   Chen,H., Sun,Y., Stark,T., Beattie,W. and Moses,R.E.
+  TITLE     Nucleotide sequence and deletion analysis of the polB gene of E.
+            coli
+  JOURNAL   DNA Cell Biol. 9, 613-635 (1990)
+REFERENCE   39
+  AUTHORS   Angerer,A., Gaisser,S. and Braun,V.
+  TITLE     Nucleotide sequences of the sfuA, sfuB, and sfuC genes of Serratia
+            marcescens suggest a periplasmic-binding-protein-dependent iron
+            transport mechanism
+  JOURNAL   J. Bacteriol. 172 (2), 572-578 (1990)
+  MEDLINE   90130288
+   PUBMED   2404942
+REFERENCE   40
+  AUTHORS   Ricca,E. and Calvo,J.M.
+  TITLE     The nucleotide sequence of leuA from Salmonella typhimurium
+  JOURNAL   Nucleic Acids Res. 18 (5), 1290 (1990)
+  MEDLINE   90206796
+   PUBMED   2181403
+REFERENCE   41
+  AUTHORS   Schaaff,I., Hohmann,S. and Zimmermann,F.K.
+  TITLE     Molecular analysis of the structural gene for yeast transaldolase
+  JOURNAL   Eur. J. Biochem. 188 (3), 597-603 (1990)
+  MEDLINE   90235831
+   PUBMED   2185015
+REFERENCE   42
+  AUTHORS   Rosenthal,E.R. and Calvo,J.M.
+  TITLE     The nucleotide sequence of leuC from Salmonella typhimurium
+  JOURNAL   Nucleic Acids Res. 18 (10), 3072 (1990)
+  MEDLINE   90272436
+   PUBMED   2190189
+REFERENCE   43
+  AUTHORS   Chen,H., Lawrence,C.B., Bryan,S.K. and Moses,R.E.
+  TITLE     Aphidicolin inhibits DNA polymerase II of Escherichia coli, an
+            alpha-like DNA polymerase
+  JOURNAL   Nucleic Acids Res. 18 (23), 7185-7186 (1990)
+  MEDLINE   91088346
+   PUBMED   2124684
+REFERENCE   44
+  AUTHORS   Mallonee,D.H., White,W.B. and Hylemon,P.B.
+  TITLE     Cloning and sequencing of a bile acid-inducible operon from
+            Eubacterium sp. strain VPI 12708
+  JOURNAL   J. Bacteriol. 172 (12), 7011-7019 (1990)
+  MEDLINE   91072253
+   PUBMED   2254270
+REFERENCE   45
+  AUTHORS   Bouvier,J. and Stragier,P.
+  TITLE     Nucleotide sequence of the lsp-dapB interval in Escherichia coli
+  JOURNAL   Nucleic Acids Res. 19 (1), 180 (1991)
+  MEDLINE   91187617
+   PUBMED   2011499
+REFERENCE   46
+  AUTHORS   Umeda,M. and Ohtsubo,E.
+  TITLE     Four types of IS1 with differences in nucleotide sequence reside in
+            the Escherichia coli K-12 chromosome
+  JOURNAL   Gene 98 (1), 1-5 (1991)
+  MEDLINE   91192599
+   PUBMED   1849492
+REFERENCE   47
+  AUTHORS   Arigoni,F., Kaminski,P.A., Hennecke,H. and Elmerich,C.
+  TITLE     Nucleotide sequence of the fixABC region of Azorhizobium
+            caulinodans ORS571: similarity of the fixB product with eukaryotic
+            flavoproteins, characterization of fixX, and identification of nifW
+  JOURNAL   Mol. Gen. Genet. 225 (3), 514-520 (1991)
+  MEDLINE   91203829
+   PUBMED   1850088
+REFERENCE   48
+  AUTHORS   Munro,A.W., Ritchie,G.Y., Lamb,A.J., Douglas,R.M. and Booth,I.R.
+  TITLE     The cloning and DNA sequence of the gene for the
+            glutathione-regulated potassium-efflux system KefC of Escherichia
+            coli
+  JOURNAL   Mol. Microbiol. 5 (3), 607-616 (1991)
+  MEDLINE   91260444
+   PUBMED   2046548
+REFERENCE   49
+  AUTHORS   Poulsen,L.K., Refn,A., Molin,S. and Andersson,P.
+  TITLE     Topographic analysis of the toxic Gef protein from Escherichia coli
+  JOURNAL   Mol. Microbiol. 5 (7), 1627-1637 (1991)
+  MEDLINE   92048480
+   PUBMED   1943700
+REFERENCE   50
+  AUTHORS   Poulsen,L.K., Refn,A., Molin,S. and Andersson,P.
+  TITLE     The gef gene from Escherichia coli is regulated at the level of
+            translation
+  JOURNAL   Mol. Microbiol. 5 (7), 1639-1648 (1991)
+  MEDLINE   92048481
+   PUBMED   1943701
+REFERENCE   51 (bases 1 to 111401)
+  AUTHORS   Yura,T., Mori,H., Nagai,H., Nagata,T., Ishihama,A., Fujita,N.,
+            Isono,K., Mizobuchi,K. and Nakata,A.
+  TITLE     Systematic sequencing of the Escherichia coli genome: analysis of
+            the 0-2.4 min region
+  JOURNAL   Nucleic Acids Res. 20 (13), 3305-3308 (1992)
+  MEDLINE   92334977
+   PUBMED   1630901
+REFERENCE   52
+  AUTHORS   Fujita,N., Mori,H., Yura,T. and Ishihama,A.
+  TITLE     Systematic sequencing of the Escherichia coli genome: analysis of
+            the 2.4-4.1 min (110,917-193,643 bp) region
+  JOURNAL   Nucleic Acids Res. 22 (9), 1637-1639 (1994)
+  MEDLINE   94261430
+   PUBMED   8202364
+REFERENCE   53
+  AUTHORS   Stragier,P.
+  JOURNAL   Unpublished
+REFERENCE   54
+  AUTHORS   Ishino,Y.
+  JOURNAL   Unpublished
+REFERENCE   55
+  AUTHORS   Ayala,J.A.
+  TITLE     Regulation of transcription at 2-minute region of the genetic map
+            of Escherichia coli
+  JOURNAL   Unpublished
+REFERENCE   56 (bases 1 to 111408)
+  AUTHORS   Mori,H.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (18-FEB-1992) Hirotada Mori, Institute for Virus
+            Research, Kyoto University, Genetics and Molecular Biology; 53
+            Shogoin Kawara-Machi, Sakyo-ku, Kyoto 606, Japan
+            (E-mail:e52985 at sakura.kudpc.kyoto-u.ac.jp, Tel:075-751-4042,
+            Fax:075-761-5626)
+COMMENT     On or before Jun 4, 2002 this sequence version replaced gi:145461,
+            gi:147757, gi:147977, gi:145773, gi:146668, gi:145775, gi:145709,
+            gi:145770, gi:40949, gi:40916, gi:42462, gi:41931, gi:40841,
+            gi:41874, gi:216434.
+            Submitted (18-FEB-1992) to DDBJ by:
+            Hirotada Mori
+            Institute for Virus Research
+            Kyoto University
+            Kawahara-cho Shogoin Sakyo-ku
+            Kyoto 606
+            Phone:  075-751-4042
+            Fax:    075-761-5626
+            Sequence updated (21-DEC-1992) by:
+            Hirotada Mori.
+FEATURES             Location/Qualifiers
+     source          1..111408
+                     /organism="Escherichia coli"
+                     /strain="K-12"
+                     /db_xref="taxon:562"
+                     /map="0-2.4 min"
+                     /note="Nucleotide position 1-111408 from the initiation
+                     site of ThrA (0 min.)."
+     gene            1..2463
+                     /gene="thrA"
+                     /note="synonyms: thrA1, thrA2"
+     CDS             1..2463
+                     /gene="thrA"
+                     /note="ORF_ID:o101#1
+                     similar to PIR Accession Number A00671"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="ThrA bifunctional enzyme"
+                     /protein_id="BAB96579.1"
+                     /db_xref="GI:21321892"
+                     /translation="MRVLKFGGTSLANAERFLRVADILESNARQGQVATVLSAPAKIT
+                     NHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIK
+                     HVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHY
+                     LESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACL
+                     RADCCEIWTDVNGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQF
+                     QIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAAR
+                     VFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMLEEFYLELKEGLLEPLAV
+                     AERLAIISVVGDGLRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATT
+                     GVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKA
+                     LLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVNCTSSQAVADQYAD
+                     FLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDINVGAGLPVIENLQNLLN
+                     AGDELMKFSGILSGSLSYIFGKLDEGMSFSEATRLAREMGYTEPDPRDDLSGMDVARK
+                     LLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEG
+                     KVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAG
+                     NDVTAAGVFADLLRTLSWKLGV"
+     gene            2465..3394
+                     /gene="thrB"
+     CDS             2465..3394
+                     /gene="thrB"
+                     /note="ORF_ID:o101#2
+                     similar to PIR Accession Number A00658"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Homoserine kinase (EC 2.7.1.39)"
+                     /protein_id="BAB96580.1"
+                     /db_xref="GI:21321893"
+                     /translation="MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAQTF
+                     SLNNLGRFADKLPSEPRENIVYQCWERFCQELGKQIPVAMTLEKNMPIGSGLGSSACS
+                     VVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDI
+                     ISQQVQGLMSGCGCSRIRGLKSRRQKQGYLPAQYRRQDCIAHGRHLAGFIHACYSRQP
+                     ELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQ
+                     RVADWLGKNYLQNQEGFVHICRLDTAGARVLEN"
+     gene            3395..4681
+                     /gene="thrC"
+     CDS             3395..4681
+                     /gene="thrC"
+                     /note="ORF_ID:o101#3
+                     similar to PIR Accession Number A01157"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Threonine synthase (EC 4.2.99.2)"
+                     /protein_id="BAB96581.1"
+                     /db_xref="GI:21321894"
+                     /translation="MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEID
+                     EMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGP
+                     TLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRG
+                     KISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISR
+                     LLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVN
+                     DTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDE
+                     TTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGE
+                     TLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ"
+     CDS             4895..5191
+                     /note="ORF_ID:o101#4"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="BAB96582.1"
+                     /db_xref="GI:21321895"
+                     /translation="MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGY
+                     YWDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHHKKAPHDHHGGHGPGKHHR"
+     CDS             complement(4971..5402)
+                     /note="ORF_ID:o101#5"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="BAB96583.1"
+                     /db_xref="GI:21321896"
+                     /translation="MMKIPPAMANWCLNATSSVKFAVNAGCGVNALSCLQKHADSIYC
+                     RINVGLIRRASVASGAECRIVTRHLSFSGDVCQDHDRRDDHEELSYGGAAVAAVRVGA
+                     SDCLAIHNVVSTSRGRASDLHPSNSHDYHDRLSVILLTGLT"
+     gene            complement(5344..6120)
+                     /gene="yaaA"
+     CDS             complement(5344..6120)
+                     /gene="yaaA"
+                     /note="ORF_ID:o101#6
+                     similar to PIR Accession Number S40534"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96584.1"
+                     /db_xref="GI:21321897"
+                     /translation="MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPP
+                     QISTLMRISDKLAGINAARFHDWQPDFTPANARQAILAFKGDVYTGLQAETFSEDDFD
+                     FAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARGKDLYQFWGDIITNKLNEALA
+                     AQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKKARGLMSRF
+                     IIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR"
+     gene            complement(6190..7620)
+                     /gene="yaaJ"
+     CDS             complement(6190..7620)
+                     /gene="yaaJ"
+                     /note="ORF_ID:o101#7
+                     similar to SwissProt Accession Number P30143"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 51.7 kd protein in thrC-talB
+                     intergenic region (ORF8)."
+                     /protein_id="BAB96585.1"
+                     /db_xref="GI:21321898"
+                     /translation="MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFG
+                     KSLKNSIHPQPGGLTSFQSLCTSLAARVGSGNLAGVALAITAGGPGAVFWMWVAAFIG
+                     MATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMRWMGVLFAVFLLIAYGIIFSG
+                     VQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPLMAIIWVLT
+                     SLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMG
+                     STPNAAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQ
+                     LIQKAMRVLMGSWGAEFVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRIC
+                     TFATVIGGTLLSLPLMWQLADIIMACMAITNLTAILLLSPVVHTIASDYLRQRKLGVR
+                     PVFDPLRYPDIGRQLSPDAWDDVSQE"
+     gene            7899..8852
+                     /gene="talB"
+     CDS             7899..8852
+                     /gene="talB"
+                     /note="ORF_ID:o101#8
+                     similar to PIR Accession Number S40535"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96586.1"
+                     /db_xref="GI:21321899"
+                     /translation="MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQ
+                     IPEYRKLIDDAVAWAKQQSNDRAQQIVDATDKLAVNIGLEILKLVPGRISTEVDARLS
+                     YDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGIRAAEQLEKEGINCNLTLLFS
+                     FAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIYQYYKEHGY
+                     ETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARI
+                     TESEFLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL"
+     gene            9136..9552
+                     /gene="chlG"
+                     /note="synonym: mog"
+     CDS             9136..9552
+                     /gene="chlG"
+                     /note="ORF_ID:o102#1
+                     similar to PIR Accession Number B56688"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Molybdopterin biosynthesis Mog protein."
+                     /protein_id="BAB96587.1"
+                     /db_xref="GI:21321900"
+                     /translation="MCELVDEMSCHLVLTTGGTGPARRDVTPDATLAVADREMPGFGE
+                     QMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIKETLEGVKDAEGNVVVHGIF
+                     ASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE"
+     CDS             complement(9584..10150)
+                     /note="ORF_ID:o102#2
+                     similar to PIR Accession Number E56688"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="hgtA 5'-region hypothetical protein 1"
+                     /protein_id="BAB96588.1"
+                     /db_xref="GI:21321901"
+                     /translation="MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIF
+                     YGGIAQIFAGLLEYKKGNTFGLTAFTSYGSFWLTLVAILLMPKLGLTDAPNAQFLGVY
+                     LGLWGVFTVFMFFGTLKGARVLQFVFFSLTVLFALLAIGNIAGNAAIIHFAGWIGLIC
+                     GASAIYLAMGEVLNEQFGRTVLPIGESH"
+     gene            10479..10963
+                     /gene="htgA"
+                     /note="synonym: htpY"
+     misc_feature    10479..10963
+                     /gene="htgA"
+                     /product="Heat shock protein Y"
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o102#3
+                     similar to PIR Accession Number A40623"
+     gene            complement(11030..11435)
+                     /gene="yaaI"
+     misc_feature    complement(11030..11435)
+                     /gene="yaaI"
+                     /product="dnaK 5'-region hypothetical protein 1"
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o102#4
+                     similar to PIR Accession Number C56688"
+     gene            11812..13728
+                     /gene="dnaK"
+     CDS             11812..13728
+                     /gene="dnaK"
+                     /note="ORF_ID:o102#5
+                     similar to PIR Accession Number A03311"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="DnaK protein"
+                     /protein_id="BAB96589.1"
+                     /db_xref="GI:21321902"
+                     /translation="MGKIIGIDLGTTNSCVAIMDGTTPRVLENAEGDRTTPSIIAYTQ
+                     DGETLVGQPAKRQAVTNPQNTLFAIKRLIGRRFQDEEVQRDVSIMPFKIIAADNGDAW
+                     VEVKGQKMAPPQISAEVLKKMKKTAEDYLGEPVTEAVITVPAYFNDAQRQATKDAGRI
+                     AGLEVKRIINEPTAAALAYGLDKGTGNRTIAVYDLGGGTFDISIIEIDEVDGEKTFEV
+                     LATNGDTHLGGEDFDSRLINYLVEEFKKDQGIDLRNDPLAMQRLKEAAEKAKIELSSA
+                     QQTDVNLPYITADATGPKHMNIKVTRAKLESLVEDLVNRSIEPLKVALQDAGLSVSDI
+                     DDVILVGGQTRMPMVQKKVAEFFGKEPRKDVNPDEAVAIGAAVQGGVLTGDVKDVLLL
+                     DVTPLSLGIETMGGVMTTLIAKNTTIPTKHSQVFSTAEDNQSAVTIHVLQGERKRAAD
+                     NKSLGQFNLDGINPAPRGMPQIEVTFDIDADGILHVSAKDKNSGKEQKITIKASSGLN
+                     EDEIQKMVRDAEANAEADRKFEELVQTRNQGDHLLHSTRKQVEEAGDKLPADDKTAIE
+                     SALTALETALKGEDKAAIEAKMQELAQVSQKLMEIAQQQHAQQQTAGADASANNAKDD
+                     DVVDAEFEEVKDKK"
+     gene            13817..14947
+                     /gene="dnaJ"
+                     /note="synonym: groP"
+     CDS             13817..14947
+                     /gene="dnaJ"
+                     /note="ORF_ID:o102#6
+                     similar to SwissProt Accession Number P08622"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="DnaJ protein."
+                     /protein_id="BAB96590.1"
+                     /db_xref="GI:21321903"
+                     /translation="MAKQDYYEILGVSKTAEEREIRKAYKRLAMKYHPDRNQGDKEAE
+                     AKFKEIKEAYEVLTDSQKRAAYDQYGHAAFEQGGMGGGGFGGGADFSDIFGDVFGDIF
+                     GGGRGRQRAARGADLRYNMELTLEEAVRGVTKEIRIPTLEECDVCHGSGAKPGTQPQT
+                     CPTCHGSGQVQMRQGFFAVQQTCPHCQGRGTLIKDPCNKCHGHGRVERSKTLSVKIPA
+                     GVDTGDRIRLAGEGEAGEHGAPAGDLYVQVQVKQHPIFEREGNNLYCEVPINFAMAAL
+                     GGEIEVPTLDGRVKLKVPGETQTGKLFRMRGKGVKSVRGGAQGDLLCRVVVETPVGLN
+                     ERQKQLLQELQESFGGPTGEHNSPRSKSFFDGVKKFFDDLTR"
+     gene            complement(16400..16609)
+                     /gene="gef"
+     CDS             complement(16400..16609)
+                     /gene="gef"
+                     /note="ORF_ID:o102#7
+                     similar to PIR Accession Number S16473"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Gef protein"
+                     /protein_id="BAB96591.1"
+                     /db_xref="GI:21321904"
+                     /translation="MLNTCRVPLTDRKVKEKRAMKQHKAMIVALIVICITAVVAALVT
+                     RKDLCEVHIRTGQTEVAVFTAYESE"
+     gene            17138..18304
+                     /gene="ant"
+                     /note="synonym: nhaA"
+     CDS             17138..18304
+                     /gene="ant"
+                     /note="ORF_ID:o102#8
+                     similar to SwissProt Accession Number P13738"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Na(+)/H(+) antiporter 1."
+                     /protein_id="BAB96592.1"
+                     /db_xref="GI:21321905"
+                     /translation="MKHLHRFFSSDASGGIILIIAAILAMIMANSGATSGWYHDFLET
+                     PVQLRVGSLEINKNMLLWINDALMAVFFLLVGLEVKRELMQGSLASLRQAAFPVIAAI
+                     GGMIVPALLYLAFNYADPITREGWAIPAATDIAFALGVLALLGSRVPLALKIFLMALA
+                     IIDDLGAIIIIALFYTNDLSMASLGVAAVAIAVLAVLNLCGARRTGVYILVGVVLWTA
+                     VLKSGVHATLAGVIVGFFIPLKEKHGRSPAKRLEHVLHPWVAYLILPLFAFANAGVSL
+                     QGVTLDGLTSILPLGIIAGLLIGKPLGISLFCWLALRLKLAHLPEGTTYQQIMVVGIL
+                     CGIGFTMSIFIASLAFGSVDPELINWAKLGILVGSISSAVIGYSWLRVRLRPSV"
+     gene            18364..19283
+                     /gene="antO"
+                     /note="synonym: nhaR"
+     misc_feature    18364..19283
+                     /gene="antO"
+                     /product="Transcriptional activator protein NhaR."
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o103#1
+                     similar to SwissProt Accession Number P10087"
+     gene            complement(20478..20741)
+                     /gene="rpsT"
+     CDS             complement(20478..20741)
+                     /gene="rpsT"
+                     /note="ORF_ID:o103#2
+                     similar to PIR Accession Number A30425"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Ribosomal protein S20"
+                     /protein_id="BAB96593.1"
+                     /db_xref="GI:21321906"
+                     /translation="MANIKSAKKRAIQSEKARKHNASRRSMMRTFIKKVYAAIEAGDK
+                     AAAQKAFNEMQPIVDRQAAKGLIHKNKAARHKANLTAQINKLA"
+     gene            21070..22008
+                     /gene="yaaC"
+     CDS             21070..22008
+                     /gene="yaaC"
+                     /note="ORF_ID:o103#3
+                     similar to PIR Accession Number A22609"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 35k protein (ileS-lsp operon)"
+                     /protein_id="BAB96594.1"
+                     /db_xref="GI:21321907"
+                     /translation="MKLIRGIHNLSQAPQEGCVLTIGNFDGVHRGHRALLQGLQEEGR
+                     KRNLPVMVMLFEPQPLELFATDKAPARLTRLREKLRYLAECGVDYVLCVRFDRRFAAL
+                     TAQNFVSDLLVKHLRVKFLAVGDDFPLALVVKAISCYYRKLAWNTASISPVRKLFAEV
+                     ACASAARLRQALADDNLALAESLLGHPFAISGRVVHGDELGRTIGFPTANVPPRRQVS
+                     PVKGVYAVEVLGLGEKPLPGVANIGTRPTVAGIRQQLEVHLLDVAMDLYGRHIQVVLR
+                     KKIRNEQRFASLDELKAQIARDELTAREFFGLTKPA"
+     gene            22051..24861
+                     /gene="ileS"
+     CDS             22051..24861
+                     /gene="ileS"
+                     /note="ORF_ID:o103#4
+                     similar to PIR Accession Number S40549"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Isoleucine--tRNA ligase (EC 6.1.1.5)"
+                     /protein_id="BAB96595.1"
+                     /db_xref="GI:21321908"
+                     /translation="MSDYKSTLNLPETGFPMRGDLAKREPGMLARWTDDDLYGIIRAA
+                     KKGKKTFILHDGPPYANGSIHIGHSVNKILKDIIVKSKGLSGYDSPYVPGWDCHGLPI
+                     ELKVEQEYGKPGEKFTAAEFRAKCREYAATQVDGQRKDFIRLGVLGDWSHPYLTMGLQ
+                     NWKANIIRALGKIIGNGHLHKGAKPVHWCVDCRSALAKRKLSITTKLLRPSTLLSRAV
+                     DQDALKAKFAVSNVNGPISLVIWTTTPWTLPANRAISIAPDFDYALVQIDGQAVILAK
+                     DLVESVMQRIGVTDSRHGKRCGAGAAAFTHPFMGFDVPAILGDHVTLDAGTGAVHTAP
+                     GHGPDDYVIGQKYGLETANPVGPDGTYLPGTYPTLDGVNVFKANDIVAALLQEKGALL
+                     HVEKMQHSYPCCWRHKTPIIFRATPQWFVSMDQKGLRAQSLKEIKGVQWIPDWGQARI
+                     ESMVANRPDWCISRQRTWGVPMSLFVHKDTEELHPRTLELMEEVAKRVEVDGIQAWWD
+                     LDAKEILGDEADQYVKVPDTLDVWFDSGSTHSSVVDVRPEFAGHAADMYLEGSDQHRG
+                     WFMSSLMISTAMKGKAPYRQVLTHGFTVDGQGRKMSKSIGNTVSPQDVMNKLGADILR
+                     LWVASTDYTGEMAVSDEILKRAADTYRRIRNTARFLLANLNGFDPAKDMVKRREMVVL
+                     DRWAVVVRKRHRKTSSRRTKHTISTKWYKRLMRFCSVEMGSFYLDIIKDRQYYAKGHS
+                     VARRSCQTALYHIAEALVRWMAPILSFTADEVWGYLPGEREKYVFTGEWYEGLFGLAD
+                     SEAMNDAFWDELLKVRGEVNKVIEQARADKKVGGSLEAAVTLYAEPELSAKLTALGDE
+                     LRFVLLTSGATVADYNDAPADAQQSEVLKGLKVALSKAEGEKCPRCWHYTQDVGKVAE
+                     HAEICGRCVSNVAGDGEKRKFA"
+     gene            24861..25355
+                     /gene="lspA"
+     CDS             24861..25355
+                     /gene="lspA"
+                     /note="ORF_ID:o103#5
+                     similar to SwissProt Accession Number P00804"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Lipoprotein signal peptidase (EC 3.4.23.36)
+                     (Prolipoprotein signal peptidase) (Signal peptidase II)
+                     (Spase II)."
+                     /protein_id="BAB96596.1"
+                     /db_xref="GI:21321909"
+                     /translation="MSQSICSTGLRWLWLVVVVLIIDLGSKYLILQNFALGDTVPLFP
+                     SLNLHYARNYGAAFSFLADSGGWQRWFFAGIAIGISVILAVMMYRSKATQKLNNIAYA
+                     LIIGGALGNLFDRLWHGFVVDMIDFYVGDWHFATFNLADTAICVGAALIVLEGFLPSR
+                     AKKQ"
+     gene            25480..25929
+                     /gene="yaaD"
+     CDS             25480..25929
+                     /gene="yaaD"
+                     /note="ORF_ID:o103#6
+                     similar to PIR Accession Number JE0402"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 16.4K protein (lsp-dapB intergenic
+                     region)"
+                     /protein_id="BAB96597.1"
+                     /db_xref="GI:21321910"
+                     /translation="MSESVQSNSAVLVHFTLKLDDGTTAESTRNNGKPALFRLGDASL
+                     SEGLEQHLLGLKVGDKTTFSLEPDAAFGVPSPDLIQYFSRREFMDAGEPEIGAIMLFT
+                     AMDGSEMPGVIREINGDSITVDFNHPLAGQTVHFDIEVLEIDPALEA"
+     gene            25931..26881
+                     /gene="lytB"
+     CDS             25931..26881
+                     /gene="lytB"
+                     /note="ORF_ID:o103#7
+                     similar to PIR Accession Number JE0403"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 34.8k protein (lsp-dapB intergenic
+                     region)"
+                     /protein_id="BAB96598.1"
+                     /db_xref="GI:21321911"
+                     /translation="MQILLANPRGFCAGVDRAISIVENALAIYGAPIYVRHEVVHNRY
+                     VVDSLRERGAIFIEQISEVPDGAILIFSAHGVSQAVRNEAKSRDLTVFDATCPLVTKV
+                     HMEVARASRRGEESILIGHAGHPEVEGTMGQYSNPEGGMYLVESPDDVWKLTVKNEEK
+                     LSFMTQTTLSVDDTSDVIDALRKRFPKIVGPRKDDICYATTNRQEAVRALAEQAEVVL
+                     VVGSKNSSNSNRLAELAQRMGKRAFLIDDAKDIQEEWVKEVKCVGVTAGASAPDILVQ
+                     NVVARLQQLGGGEAIPLEGREENIVFEVPKELRVDIREVD"
+     gene            26947..27861
+                     /gene="yaaF"
+     CDS             26947..27861
+                     /gene="yaaF"
+                     /note="ORF_ID:o103#8
+                     similar to PIR Accession Number JE0404"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 32.6k protein (lsp-dapB intergenic
+                     region)"
+                     /protein_id="BAB96599.1"
+                     /db_xref="GI:21321912"
+                     /translation="MRLPIFLDTDPGIDDAVAIAAAIFAPELDLQLMTTVAGNVSVEK
+                     TTRNALQLLHFWNAEIPLAQGAAVPLVRAPRDAASVHGESGMAGYDFVEHNRKPLGIP
+                     AFLAIRDALMRAPEPVTLVAIGPLTNIALLLSQCPECKPYIRRLVIMGGSAGRGNCTP
+                     NAEFNIAADPEAAACVFRSGIEIVMCGLDVTNQAILTPDYLSTLPQLNRTGKMLHALF
+                     SHYRSGSMQSGLRMHDLCAIAWLVRPDLFTLKPCFVAVETQGEFTSGTTVVDIDGCLG
+                     KPANVQVALDLDVKGFQQWVAEVLALAS"
+     gene            28028..28849
+                     /gene="dapB"
+     CDS             28028..28849
+                     /gene="dapB"
+                     /note="ORF_ID:o103#9
+                     similar to SwissProt Accession Number P04036"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Dihydrodipicolinate reductase (EC 1.3.1.26)."
+                     /protein_id="BAB96600.1"
+                     /db_xref="GI:21321913"
+                     /translation="MHDANIRVAIAGAGGRMGRQLIQAALALEGVQLGAALEREGSSL
+                     LGSDAGELAGAGKTGVTVQSSLDAVKDDFDVFIDFTRPEGTLNHLAFCRQHGKGMVIG
+                     TTGFDEAGKQAIRDAAADIAIVFAANFSVGVNVMLKLLEKAAKVMGDYTDIEIIEAHH
+                     RHKVDAPSGTALAMGEAIAHALDKDLKDCAVYSREGHTGERVPGTIGFATVRAGDIVG
+                     EHTAMFADIGERLEITHKASSRMTFANGAVRSALWLSGKESGLFDMRDVLDLNNL"
+     gene            29305..30453
+                     /gene="carA"
+                     /note="synonym: pyrA"
+     CDS             29305..30453
+                     /gene="carA"
+                     /note="ORF_ID:o103#10
+                     similar to SwissProt Accession Number P00907"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Carbamoyl-phosphate synthase small chain (EC
+                     6.3.5.5) (carbamoyl- phosphate synthetase glutamine
+                     chain)."
+                     /protein_id="BAB96601.1"
+                     /db_xref="GI:21321914"
+                     /translation="MIKSALLVLEDGTQFHGRAIGATGSAVGEVVFNTSMTGYQEILT
+                     DPSYSRQIVTLTYPHIGNVGTNDADEESSQVHAQGLVIRDLPLIASNFRNTEDLSSYL
+                     KRHNIVAIADIDTRKLTRLLREKGAQNGCIIAGDNPDAALALEKARAFPGLNGMDLAK
+                     EVTTAEAYSWTQGSWTLTGGLPEAKKEDELPFHVVAYDFGAKRNILRMLVDRGCRLTI
+                     VPAQTSAEDVLKMNPDGIFLSNGPGDPAPCDYAITAIQKFLETDIPVFGICLGHQLLA
+                     LASGAKTVKMKFGHHGGNHPVKDVEKNVVMITAQNHGFAVDEATLPANLRVTHKSLFD
+                     GTLQGIHRTDKPAFSFQGHPEASPGPHDAAPLFDHFIELIEQYRKTAK"
+     gene            30471..33692
+                     /gene="carB"
+     CDS             30471..33692
+                     /gene="carB"
+                     /note="ORF_ID:o103#11
+                     similar to PIR Accession Number A01198"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Carbamoyl-phosphate synthase
+                     (glutamine-hydrolyzing) (EC 6.3.5.5) large chain"
+                     /protein_id="BAB96602.1"
+                     /db_xref="GI:21321915"
+                     /translation="MPKRTDIKSILILGAGPIVIGQACEFDYSGAQACKALREEGYRV
+                     ILVNSNPATIMTDPEMADATYIEPIHWEVVRKIIEKERPDAVLPTMGGQTALNCALEL
+                     ERQGVLEEFGVTMIGATADAIDKAEDRRRFDVAMKKIGLETARSGIAHTMEEALAVAA
+                     DVGFPCIIRPSFTMGGSGGGIAYNREEFEEICARGLDLSPTKELLIDESLIGWKEYEM
+                     EVVRDKNDNCIIVCSIENFDAMGIHTGDSITVAPAQTLTDKEYQIMRNASMAVLREIG
+                     VETGGSNVQFAVNPKNGRLIVIEMNPRVSRSSALASKATGFPIAKVAAKLAVGYTLDE
+                     LMNDITGGRTPASFEPSIDYVVTKIPRFNFEKFAGANDRLTTQMKSVGEVMAIGRTQQ
+                     ESLQKALRGLEVGATGFDPKVSLDDPEALTKIRRELKDAGADRIWYIADAFRAGLSVD
+                     GVFNLTNIDRWFLVQIEELVRLEEKVAEVGITGLNADFLRQLKRKGFADARLAKLAGV
+                     REAEIRKLRDQYDLHPVYKRVDTCAAEFATDTAYMYSTYEEECEANPSTDREKIMVLG
+                     GGPNRIGQGIEFDYCCVHASLALREDGYETIMVNCNPETVSTDYDTSDRLYFEPVTLE
+                     DVLEIVRIEKPKGVIVQYGGQTPLKLARALEAAGVPVIGTSPDAIDRAEDRERFQHAV
+                     ERLKLKQPANATVTAIEMAVEKAKEIGYPLVVRPSYVLGGRAMEIVYDEADLRRYFQT
+                     AVSVSNDAPVLLDHFLDDAVEVDVDAICDGEMVLIGGIMEHIEQAGVHSGDSACSLPA
+                     YTLSQEIQDVMRQQVQKLAFELQVRGLMNVQFAVKNNEVYLIEVNPRAARTVPFVSKA
+                     TGVPLAKVAARVMAGKSLAEQGVTKEVIPPYYSVKEVVLPFNKFPGVDPLLGPEMRST
+                     GEVMGVGRTFAEAFAKAQLGSNSTMKKHGRALLSVREGDKERVVDLAAKLLKQGFELD
+                     ATHGTAIVLGEAGINPRLVNKVHEGRPHIQDRIKNGEYTYIINTTSGRRAIEDSRVIR
+                     RSALQYKVHYDTTLNGGFATAMALNADATEKVISVQEMHAQIK"
+     gene            complement(33740..33919)
+                     /gene="yaaV"
+     CDS             complement(33740..33919)
+                     /gene="yaaV"
+                     /note="ORF_ID:o103#12
+                     similar to SwissProt Accession Number P46145"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 6.8 kd protein in carB-caiE
+                     intergenic region."
+                     /protein_id="BAB96603.1"
+                     /db_xref="GI:21321916"
+                     /translation="MTRFEAIKQGHIKIVDISIVCNFTVDKCELNPAYVIKNIDSPKD
+                     LLNGQKKTVLIREPY"
+     gene            33849..34351
+                     /gene="caiF"
+     misc_feature    33849..34351
+                     /gene="caiF"
+                     /product="Transcription activator caiF"
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o103#13
+                     similar to PIR Accession Number JC6025"
+     gene            complement(34438..35049)
+                     /gene="caiE"
+     CDS             complement(34438..35049)
+                     /gene="caiE"
+                     /note="ORF_ID:o103#14
+                     similar to SwissProt Accession Number P39206"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Carnitine operon protein caiE."
+                     /protein_id="BAB96604.1"
+                     /db_xref="GI:21321917"
+                     /translation="MERTLTTVSYYAFEGLIPVVHPTAFVHPSAVLIGDVIVGAGVYI
+                     GPLASLRGDYGRLIVQAGANIQDGCIMHGYCDTDTIVGENGHIGHGAILHGCLIGRDA
+                     LVGMNSVIMDGAVIGEESIVAAMSFVKAGFRGEKRQLLMGTPARAVRNVSDDELHWKR
+                     LNTKEYQDLVGRCHVSLHETQPLRQMEENRPRLQGTTDVTPKR"
+     gene            complement(35034..35927)
+                     /gene="caiD"
+     CDS             complement(35034..35927)
+                     /gene="caiD"
+                     /note="ORF_ID:o103#15
+                     similar to PIR Accession Number S40557"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein."
+                     /protein_id="BAB96605.1"
+                     /db_xref="GI:21321918"
+                     /translation="MKQQGTTLPANNHTLKQYAFFAGMLSSLKKQKWRKGMSESLHLT
+                     RNGSILEITLDRPKANAIDAKTSFEMGEVFLNFRDDPQLRVAIITGAGEKFFSAGWDL
+                     KAAAEGEAPDADFGPGGFAGLTEIFNLDKPVIAAVNGYAFGGAFELALAADFIVCADN
+                     ASFALPEAKLGIVPDSGGVLRLPKILPPAIVNEMVMTGRRMGAEEALRWGIVNRVVSQ
+                     AELMDNARELAQQLVNSAPLAIAALKEIYRTTSEMPVEEAYRYIRSGVLKHYPSVLHS
+                     EDAIEGPLAFAEKRDPVWKGR"
+     gene            complement(35928..37496)
+                     /gene="caiC"
+     CDS             complement(35928..37496)
+                     /gene="caiC"
+                     /note="ORF_ID:o104#1
+                     similar to PIR Accession Number S40558"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein."
+                     /protein_id="BAB96606.1"
+                     /db_xref="GI:21321919"
+                     /translation="MDRGAMDIIGGQHLRQMWDDLADVYGHKTALICESSGGVVNRYS
+                     YLELNQEINRTANLFYTLGIRKGDKVALHLDNCPEFIFCWFGLAKIGAIMVPINARLL
+                     CEESAWILQNSQACLLVTSAQFYPMYQQIQQEDATQLRHICLTDVALPADDGVSSFTQ
+                     LKNQQPATLCYAPPLSTDDTAEILFTSGTTSRPKGVVITHYNLRFAGYYSAWQCALRD
+                     DDVYLTVMPAFHIDCQCTAAMAAFSAGATFVLVEKYSARAFWGQVQKYRATVTECIPM
+                     MIRTLMVQPPSANDQQHRLREVMFYLNLSEQEKDAFCERFGVRLLTSYGMTETIVGII
+                     GDRPGDKRRWPSIGRVGFCYEAEIRDDHNRPLPAGEIGEICIKGIPGKTIFKEYFLNP
+                     QATAKVLEADGWLHTGDTGYRDEEDFFYFVDRRCNMIKRGGENVSCVELENIIAAHPK
+                     IQDIVVVGIKDSIRDEAIKAFVVLNEGETLSEEEFFRFCEQNMAKFKVPSYLEIRKDL
+                     PRNCSGKIIRKNLK"
+     gene            complement(37555..38772)
+                     /gene="caiB"
+     CDS             complement(37555..38772)
+                     /gene="caiB"
+                     /note="ORF_ID:o104#2
+                     similar to PIR Accession Number S40559"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="L-carnitine dehydratase (EC 4.-.-.-)."
+                     /protein_id="BAB96607.1"
+                     /db_xref="GI:21321920"
+                     /translation="MDHLPMPKFGPLAGLRVVFSGIEIAGPFAGQMFAEWGAEVIWIE
+                     NVAWADTIRVQPNYPQLSRRNLHALSLNIFKDEGREAFLKLMETTDIFIEASKGPAFA
+                     RRGITDEVLWQHNPKLVIAHLSGFGQYGTEEYTNLPAYNTIAQAFSGYLIQNGDVDQP
+                     MPAFPYTADYFSGLTATTAALAALHKVRETGKGESIDIAMYEVMLRMGQYFMMDYFNG
+                     GEMCPRMSKGKDPYYAGCGLYKCADGYIVMELVGITQIEECFKDIGLAHLLGTPEIPE
+                     GTQLIHRIECPYGPLVEEKLDAWLATHTIAEVKERFAELNIACAKVLTVPELESNPQY
+                     VARESITQWQTMDGRTCKGPNIMPKFKNNPGQIWRGMPSHGMDTAAILKNIGYSENDI
+                     QELVSKGLAKVED"
+     gene            complement(38901..40043)
+                     /gene="caiA"
+     CDS             complement(38901..40043)
+                     /gene="caiA"
+                     /note="ORF_ID:o105#1
+                     similar to PIR Accession Number S40560"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein."
+                     /protein_id="BAB96608.1"
+                     /db_xref="GI:21321921"
+                     /translation="MDFNLNDEQELFVAGIRELMASENWEAYFAECDRDSVYPERFVK
+                     ALADMGIDSLLIPEEHGGLDAGFVTLAAVWMELGRLGAPTYVLYQLPGGFNTFLREGT
+                     QEQIDKIMAFRGTGKQMWNSAITEPGAGSDVGSLKTTYTRKNGKIYLNGSKCFITSSA
+                     YTPYIVVMARDGASPDKPVYTGWFVDMSKPGIKVTKLEKLGLRMDSCCEITFDDVELD
+                     EKDMFGREGNGFNRVKEEFDHERFLVALTNYGTAMCAFEDAARYANQRVQFGEAIGRF
+                     QLIQEKFAHMAIKLNSMKNMLYEAAWKADNGTITSGDAAMCKYFCANAAFEVVDSAMQ
+                     VLGGVGIAGNHRISRFWRDLRVDRVSGGSDEMQILTLGRAVLKQYR"
+     gene            complement(40075..41589)
+                     /gene="caiT"
+     CDS             complement(40075..41589)
+                     /gene="caiT"
+                     /note="ORF_ID:o105#2
+                     similar to PIR Accession Number S40561"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein."
+                     /protein_id="BAB96609.1"
+                     /db_xref="GI:21321922"
+                     /translation="MKNEKRKTGIEPKVFFPPLIIVGILCWLTVRDLDAANVVINAVF
+                     SYVTNVWGWAFEWYMVVMLFGWFWLVFGPYAKKRLGNEPPEFSTASWIFMMFASCTSA
+                     AVLFWGSIEIYYYISTPPFGLEPNSTGAKELGLAYSLFHWGPLPWATYSFLSVAFAYF
+                     FFVRKMEVIRPSSTLVPLVGEKHAKGLFGTIVDNFYLVALIFTMGTSLGLATPLVTEC
+                     MQWLFGIPHTLQLDAIIITCWIILNAICVACGLQKGVRIASDVRSYLSFLMLGWVFIV
+                     SGASFIMNYFTDSVGMLLMYLPRMLFYTDPIAKGGFPQGWTVFYWAWWVIYAIQMSIF
+                     LARISRGRTVRELCFGMVLGLTASTWILWTVLGSNTLLLIDKNIINIPNLIEQYGVAR
+                     AIIETWAALPLSTATMWGFFILCFIATVTLVNACSYTLAMSTCREVRDGEEPPLLVRI
+                     GWSILVGIIGIVLLALGGLKPIQTAIIAGGCPLFFVNIMVTLSFIKDAKQNWKD"
+     gene            42025..42831
+                     /gene="fixA"
+     CDS             42025..42831
+                     /gene="fixA"
+                     /note="ORF_ID:o105#3
+                     similar to PIR Accession Number S40562"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="FixA homolog."
+                     /protein_id="BAB96610.1"
+                     /db_xref="GI:21321923"
+                     /translation="MNDVSFMISGDAMKIITCYKCVPDEQDIAVNNADGSLDFSKADA
+                     KISQYDLNAIEAACQLKQQAAEAQVTALSVGGKALTNAKGRKDVLSRDPDELIVVIDD
+                     QFEQALPQQTASALAAAAQKAGFDLILCGDGSSDLYAQQVGLLVGEILNIPAVNGVSK
+                     IISLTADTLTVERELEDETETLSIPLPAVVAVSTDINSPQIPSMKAILGAAKKPVQVW
+                     SAADIGFNAEAAWSEQQVAAPKQRERQRIVIEGDGEEQIAAFAENLRKVI"
+     gene            42810..43787
+                     /gene="fixB"
+     misc_feature    42810..43787
+                     /gene="fixB"
+                     /product="FixB protein."
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o105#4
+                     similar to SwissProt Accession Number P31574"
+     gene            43838..45124
+                     /gene="fixC"
+     CDS             43838..45124
+                     /gene="fixC"
+                     /note="ORF_ID:o105#5
+                     similar to PIR Accession Number S40564"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="FixC protein"
+                     /protein_id="BAB96611.1"
+                     /db_xref="GI:21321924"
+                     /translation="MSEDIFDAIIVGAGLAGSVAALVLAREGAQVLVIERGNSAGAKN
+                     VTGGRLYAHSLEHIIPGFADSAPVERLITHEKLAFMTEKSAMTMDYCNGDETSPSQRS
+                     YSVLRSKFDAWLMEQAEEAGAQLITGIRVDNLVQRDGKVVGVEADGDVIEAKTVILAD
+                     GVNSILAEKLGMAKRVKPTDVAVGVKELIELPKSVIEDRFQLQGNQGAACLFAGSPTD
+                     GLMGGGFLYTNENTLSLGLVCGLHHLHDAKKSVPQMLEDFKQHPAVAPLIAGGKLVEY
+                     SAHIMPEAGINMLPELVGDGVLIAGDAAGMCMNFGFTIRGMDLAIAAGEAAAKTVLSA
+                     MKSDDFSKQKLAEYRQHLESGPLRDMRMYQKLPAFLDNPRMFSGYPELAVGVARDLFT
+                     IDGSAPELMRKKILRHGKKVGFINLIKDGMKGVTVL"
+     gene            45121..45408
+                     /gene="yaaT"
+     CDS             45121..45408
+                     /gene="yaaT"
+                     /note="ORF_ID:o105#6
+                     similar to PIR Accession Number S40565"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96612.1"
+                     /db_xref="GI:21321925"
+                     /translation="MTSPVNVDVKLGVNKFNVDEEHPHIVVKADADKQVLELLVKACP
+                     AGLYKKQDDGSVRFDYAGCLECGTCRILGLGSALEQWEYPRGTFGVEFRYS"
+     gene            45465..46050
+                     /gene="yaaU"
+     misc_feature    45465..46050
+                     /gene="yaaU"
+                     /product="Hypothetical 18.4 kd protein in fixC-kefC
+                     intergenic region (orf65)."
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o105#7
+                     similar to SwissProt Accession Number P31679"
+     gene            46137..46799
+                     /gene="yabE"
+     CDS             46137..46799
+                     /gene="yabE"
+                     /note="ORF_ID:o105#8
+                     similar to PIR Accession Number S40566"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96613.1"
+                     /db_xref="GI:21321926"
+                     /translation="MKSSRSKPGFRDLFNRCHFPFVLFVAAIWTCQVIPMFAIYTFGP
+                     QIVGLLGLGVGKNAALGNVVISLFFMLGCIPPMLWLSTAGRRPLLIGSFAMMTLALAV
+                     LGLIPDMGIWLVVMAFAVYAFFSGGPGNLQWLYPNELFPTDIRASAVGVIMSLSRIGT
+                     IVSTWALPIFIDNYGISNTMLMGAGISLFGLLISVAFAPETRGMSLAQTSNMTIRGQR
+                     MG"
+     gene            46907..47437
+                     /gene="yabF"
+     CDS             46907..47437
+                     /gene="yabF"
+                     /note="ORF_ID:o105#9
+                     similar to PIR Accession Number S40567"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96614.1"
+                     /db_xref="GI:21321927"
+                     /translation="MILIIYAHPYPHHSHANKRMLEQARTLEGVEIRSLYQLYPDFNI
+                     DIAAEQEALSRADLIVWQHPMQWYSIPPLLKLWINKVFSHGWAYGHGGTALHGKHLLW
+                     AVTTGGGESHFEIGAHPGFDGLSQPLQATAIYCGLNWLPPFAMHCTFICDDETLEGQA
+                     RHYKQRLLEWQEAHHG"
+     gene            47430..49292
+                     /gene="kefC"
+                     /note="synonym: trkC"
+     CDS             47430..49292
+                     /gene="kefC"
+                     /note="ORF_ID:o105#10
+                     similar to SwissProt Accession Number P03819"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Glutathione-regulated potassium-efflux system
+                     protein KefC (K(+)/H(+) antiporter)."
+                     /protein_id="BAB96615.1"
+                     /db_xref="GI:21321928"
+                     /translation="MDSHTLIQALIYLGSAALIVPIAVRLGLGSVLGYLIAGCIIGPW
+                     GLRLVTDAESILHFAEIGVVLMLFIIGLELDPQRLWKLRAAVFGCGALQMVICGGLLG
+                     LFCMLLGLRWQVAELIGMTLALSSTAIAMQAMNERNLMVTQMGRSAFAVLLFQDIAAI
+                     PLVAMIPLLATSSASTTMGAFALSALKVAGALVLVVLLGRYVTRPALRFVARSGLREV
+                     FSAVALFLVFGFGLLLEEVGLSMAMGAFLAGVLLASSEYRHALESDIEPFKGLLLGLF
+                     FIGVGMSIDFGTLLENPLRIVILLLGFLIIKIAMLWLIARPLQVPNKQRRWFAVLLGQ
+                     GSEFAFVVFGAAQMANVLEPEWAKSLTLAVALSMAATPILLVILNRLEQSSTEEAREA
+                     DEIDEEQPRVIIAGFGRFGQITGRLLLSSGVKMVVLDHDPDHIETLRKFGMKVFYGDA
+                     TRMDLLESAGAAKAEVLINAIDDPQTNLQLTEMVKEHFPHLQIIARARDVDHYIRLRQ
+                     AGVEKPERETFEGALKTGRLALESLGLGPYEARERADVFRRFNIQMVEEMAMVENDTK
+                     ARAAVYKRTSAMLSEIITEDREHLSLIQRHGWQGTEEGKHTGNMADEPETKPSS"
+     gene            49482..49961
+                     /gene="folA"
+                     /note="synonym: tmrA"
+     CDS             49482..49961
+                     /gene="folA"
+                     /note="ORF_ID:o105#11
+                     similar to SwissProt Accession Number P00379"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Dihydrofolate reductase type I (EC 1.5.1.3)."
+                     /protein_id="BAB96616.1"
+                     /db_xref="GI:21321929"
+                     /translation="MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGR
+                     HTWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQ
+                     FLPKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR"
+     gene            complement(50039..50881)
+                     /gene="apaH"
+     CDS             complement(50039..50881)
+                     /gene="apaH"
+                     /note="ORF_ID:o105#12
+                     similar to PIR Accession Number A26221"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Bis(5'-nucleosyl)-tetraphosphatase (symmetrical)
+                     (EC 3.6.1.41)"
+                     /protein_id="BAB96617.1"
+                     /db_xref="GI:21321930"
+                     /translation="MATYLIGDVHGCYDELIALLHKVEFTPGKDTLWLTGDLVARGPG
+                     SLDVLRYVKSLGDSVRLVLGNHDLHLLAVFAGISRNKPLDRLTPLLEAPDADELLNWL
+                     RRQPLLQIDEEKKLVMAHAGITPQWDLQTAKECARDVEAVLSSDSYPFFLDAMYGDMP
+                     NNWSPELRGLGRLRFITNAFTRMRFCFPNGQLDMYSKESPEEAPAPLKPWFAIPGPVA
+                     EEYSIAFGHWASLEGKGTPEGIYALDTGCCWGGTLTCLRWEDKQYFVQPSNRHKDLAE
+                     AAAS"
+     gene            complement(50888..51265)
+                     /gene="apaG"
+     CDS             complement(50888..51265)
+                     /gene="apaG"
+                     /note="ORF_ID:o106#1
+                     similar to PIR Accession Number A30273"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="ApaG protein"
+                     /protein_id="BAB96618.1"
+                     /db_xref="GI:21321931"
+                     /translation="MINSPRVCIQVQSVYIEAQSSPDNERYVFAYTVTIRNLGRAPVQ
+                     LLGRYWLITNGNGRETEVQGEGVVGVQPLIAPGEEYQYTSGAIIETPLGTMQGHYEMI
+                     DENGVPFSIDIPVFRLAVPTLIH"
+     gene            complement(52087..53076)
+                     /gene="pdxA"
+     CDS             complement(52087..53076)
+                     /gene="pdxA"
+                     /note="ORF_ID:o106#2
+                     similar to PIR Accession Number JV0026"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="PdxA protein"
+                     /protein_id="BAB96619.1"
+                     /db_xref="GI:21321932"
+                     /translation="MVKTQRVVITPGEPAGIGPDLVVQLAQREWPVELVVCADATLLT
+                     NRAAMLGLPLTLRPYSPNSPAQPQTAGTLTLLPVALRAPVTAGQLAVENGHYVVETLA
+                     RACDGCLNGEFAALITGPVHKGVINDAGIPFTGHTEFFEERSQAKKVVMMLATEELRV
+                     ALATTHLPLRDIADAITPALLHEVIAILHHDLRTKFGIAEPRILVCGLNPHAGEGGHM
+                     GTEEIDTIIPVLNELRAQGMKLNGPLPADTLFQPKYLDNADAVLAMYHDQGLPVLKYQ
+                     GFGRGVNITLGLPFIRTSVDHGTALELAGRGKADVGSFITALNLAIKMIVNTQ"
+     CDS             complement(53076..53873)
+                     /note="ORF_ID:o106#3
+                     similar to PIR Accession Number PV0009"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein 98 (pdx 5' region)"
+                     /protein_id="BAB96620.1"
+                     /db_xref="GI:21321933"
+                     /translation="MGNQNDASTELNLSHILIPLPENPTSDQVNEAESQARAIVDQAR
+                     NGADFGKLAIAHSADQQALNGGQMGWGRIQELPGIFAQALSTAKKGDIVGPIRSGVGF
+                     HILKVNDLRGESKNISVTEVHARHILLKPSPIMTDEQARVKLEQIAADIKSGKTTFAA
+                     AAKEFSQDPGSANQGGDLGWATPDIFDPAFRDALTRLNKGQMSAPVHSSFGWHLIELL
+                     DTRNVDKTDAAQKDRAYRMLMNRKFSEEAASWMQEQRASAYVKILSN"
+     gene            complement(53840..54361)
+                     /gene="surA"
+     CDS             complement(53840..54361)
+                     /gene="surA"
+                     /note="ORF_ID:o106#4
+                     similar to PIR Accession Number S40574"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Survival protein SurA precursor (peptidyl-prolyl
+                     cis-trans isomerase SurA) (EC 5.2.1.8) (PPiase) (rotamase
+                     C)."
+                     /protein_id="BAB96621.1"
+                     /db_xref="GI:21321934"
+                     /translation="MKNWKTLLLGIAMIANTSFAAPQVVDKVAAVVNNGVVLESDVDG
+                     LMQSVKLNAAQARQQLPDDATLRHQIMERLIMDQIILQMGQKMGVKISDEQLDQAIAN
+                     IAKQNNMTLDQMRSRLAYDGLNYNTYRNQIRKEMIISEVRNNEVRRRITILPQKSNPW
+                     RSRWVTKTTPALS"
+     gene            complement(54414..56768)
+                     /gene="imp"
+                     /note="synonym: ostA"
+     CDS             complement(54414..56768)
+                     /gene="imp"
+                     /note="ORF_ID:o106#5
+                     similar to SwissProt Accession Number P31554"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Organic solvent tolerance protein precursor."
+                     /protein_id="BAB96622.1"
+                     /db_xref="GI:21321935"
+                     /translation="MKKRIPTLLATMIATALYSQQGLAADLASQCMLGVPSYDRPLVQ
+                     GDTNDLPVTINADHAKGDYPDDAVFTGSVDIMQGNSRLQADEVQLHQKEAPGQPEPVR
+                     TVDALGNVHYDDNQVILKGPKGWANLNTKDTNVWEGDYQMVGRQGRGKADLMKQRGEN
+                     RYTILDNGSFTSCLPGSDTWSVVGSEIIHDREEQVAEIWNARFKVGPVPIFYSPYLQL
+                     PVGDKRRSGFLIPNAKYTTTNYFEFYLPYYWNIAPNMDATITPHYMHRRGNIMWENEF
+                     RYLSQAGAGLMELDYLPSDKVYEDEHPNDDSSRRWLFYWNHSGVMDQVWRFNVDYTKV
+                     SDPSYFNDFDNKYGSSTDGYATQKFSVGYAVQNFNATVSTKQFQVFSEQNTSSYSAEP
+                     QLDVNYYQNDVGPFDTRIYGQAVHFVNTRDDMPEATRVHLEPTINLPLSNNWGSINTE
+                     AKLLATHYQQTNLDWYNSRNTTKLDESVNRVMPQFKVDGKMVFERDMEMLAPGYTQTL
+                     EPRAQYLYVPYRDQSDIYNYDSSLLQSDYSGLFRDRTYGGLDRIASANQVTTGVTSRI
+                     YDDAAVERFNISVGQIYYFTESRTGDDNITWENDDKTGSLVWAGDTYWRISERWGLRG
+                     GIQYDTRLDNVATSNSSIEYRRDEDRLVQLNYRYASPEYIQATLPKYYSTAEQYKNGI
+                     SQVGAVASWPIADRWSIVGAYYYDTNANKQADSMLGVQYSSCCYAIRVGYERKLNGWD
+                     NDKQHAVYDNAIGFNIELRGLSSNYGLGTQEMLRSNILPYQNTL"
+     gene            57023..57838
+                     /gene="yabH"
+     CDS             57023..57838
+                     /gene="yabH"
+                     /note="ORF_ID:o106#6
+                     similar to SwissProt Accession Number P31680"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 30.6 kd protein in folA-hepA
+                     intergenic region (orf81)."
+                     /protein_id="BAB96623.1"
+                     /db_xref="GI:21321936"
+                     /translation="MQYWGKIIGVAVALLMGGGFWGVVLGLLIGHMFDKARSRKMAWF
+                     ANQRERQALFFATTFEVMGHLTKSKGRVTEADIHIASQLMDRMNLHGASRTAAQNAFR
+                     VGKSDNYPLREKMRQFRSVCFGRFDLIRMFLEIQIQAAFADGSLHPNERAVLYVIAEE
+                     LGISRAQFDQFLRMMQGGAQFGGGYQQQTGGGNWQQAQRGPTLEDACNVLGVKPTDDA
+                     TTIKRAYRKLMSEHHPDKLVAKGLPPEMMEMAKQKAQEIQQAYELIKQQKGFK"
+     gene            58625..58783
+                     /gene="yabP"
+     CDS             58625..58783
+                     /gene="yabP"
+                     /note="ORF_ID:o106#7
+                     similar to SwissProt Accession Number P39220"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 5.9 kd protein in surA-hepA
+                     intergenic region."
+                     /protein_id="BAB96624.1"
+                     /db_xref="GI:21321937"
+                     /translation="MRDCYLGKKTMKGSNDILYERPGWNANLGVLPRTVLPRTVLTRT
+                     VLTWTVLP"
+     gene            58780..58938
+                     /gene="yabQ"
+     CDS             58780..58938
+                     /gene="yabQ"
+                     /note="ORF_ID:o106#8
+                     similar to SwissProt Accession Number P39221"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 5.7 kd protein in surA-hepA
+                     intergenic region."
+                     /protein_id="BAB96625.1"
+                     /db_xref="GI:21321938"
+                     /translation="MNGATSLYDEVIIINKIPPKKIDTKGVATEEVATKKVLLNKLLT
+                     TQLLNEPE"
+     gene            complement(59346..60009)
+                     /gene="yabO"
+     misc_feature    complement(59346..60009)
+                     /gene="yabO"
+                     /product="Hypothetical 24.9 kd protein in surA-hepA
+                     intergenic region."
+                     /note="ORF is interrupted by frame-shifting
+                     ORF_ID:o106#9
+                     similar to SwissProt Accession Number P39219"
+     CDS             59985..60518
+                     /note="ORF_ID:o106#10"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="BAB96626.1"
+                     /db_xref="GI:21321939"
+                     /translation="MDCSFPSPFSAPLLMRYNDQTQGIQTPTCLIQAFHYLLTVALNG
+                     GQFVVTNVRVHCTQSFQTRQLGRQFFVGFVTRCINQCTGRFLDLRFTQLKDSVNILLH
+                     GVNQFAAGVTVNRVKLAVKGFKLYLRRQVVAVFIQQHTYRRRWQEAVELQLLRSLGFN
+                     HINQFHQQRTYRQRFVF"
+     gene            complement(60021..62927)
+                     /gene="hepA"
+     CDS             complement(60021..62927)
+                     /gene="hepA"
+                     /note="ORF_ID:o106#11
+                     similar to SwissProt Accession Number P23852"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Probable ATP-dependent helicase HepA."
+                     /protein_id="BAB96627.1"
+                     /db_xref="GI:21321940"
+                     /translation="MPFTLGQRWISDTESELGLGTVVAVDARTVTLLFPSTGENRLYA
+                     RSDSPVTRVMFNPGDTITSHDGWQMQVEEVKEENGLLTYIGTRLDTEESGVALREVFL
+                     DSKLVFSKPQDRLFAGQIDRMDRFALRYRARKYSSEQFRMPYSGLRGQRTSLIPHQLN
+                     IAHDVGRRHAPRVLLADEVGLGKTIEAGMILHQQLLSGAAERVLIIVPETLQHQWLVE
+                     MLRRFNLRFALFDDERYAEAQHDAYNPFDTEQLVICSLDFARRSKQRLEHLCEAEWDL
+                     LVVDEAHHLVWSEDAPSREYQAIEQLAEHVPGVLLLTATPEQLGMESHFARLRLLDPN
+                     RFHDFAQFVEEQKNYRPVADAVAMLLAGNKLSNDELNMLGEMIGEQDIEPLLQAANSD
+                     SEDAQSARQELVSMLMDRHGTSRVLFRNTRNGVKGFPKRELHTIKLPLPTQYQTAIKV
+                     SGIMGARKSAEDRARDMLYPERIYQEFEGDNATWWNFDPRVEWLMGYLTSHRSQKVLV
+                     ICAKAATALQLEQVLREREGIRAAVFHEGMSIIERDRAAAWFAEEDTGAQVLLCSEIG
+                     SEGRNFQFASHMVMFDLPFNPDLLEQRIGRLDRIGQAHDIQIHVPYLEKTAQSVLVRW
+                     YHEGLDAFEHTCPTGRTIYDSVYNDLINYLASPDQTEGFDDLIKNCREQHEALKAQLE
+                     QGRDRLLEIHSNGGEKAQALAESIEEQDDDTNLIAFAMNLFDIIGINQDDRGDNMIVL
+                     TPSDHMLVPDFPGLSEDGITITFDREVALAREDAQFITWEHPLIRNGLDLILSGDTGS
+                     STISLLKNKALPVGTLLVELIYVVEAQAPKQLQLNRFLPPTPVRMLLDKNGNNLAAQV
+                     EFETFNRQLNAVNRHTGSKLVNAVQQDVHAILQLGEAQIEKSARALIDAARNEADEKL
+                     SAELSRLEALRAVNPNIRDDELTAIESNRQQVMESLDQAGWRLDALRLIVVTHQ"
+     CDS             complement(63092..64741)
+                     /note="ORF_ID:o107#1
+                     dinA; polB
+                     similar to E. coli DNA polymerase II (EC 2.7.7.7) (Pol
+                     II): PIR Accession Number S15943"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="BAB96628.1"
+                     /db_xref="GI:21321941"
+                     /translation="MPSRYRLPLRLGRDNSELEWREHGPAKTASFLPQAKGRLIIDGI
+                     EALKSAFWDFSSFSLETVAQELLGEGKSIDNPWDRMDEIDRRFAEDKPALATYNLKDC
+                     ELVTQIFHKTEIMPFLLERATVNGLPVDRHGGSVAAFGHLYFPRMHRAGYVAPNLGEV
+                     PPHASPGGYVMDSRPGLYDSVLVLDYKSLYPSIIRTFLIDPVGLVEGMAQPDPEHSTE
+                     GFLDAWFSREKHCLPEIVTNIWHGRDEAKRQGNKPLSQALKIIMNAFYGVLGTTACRF
+                     FDPRLASSITMRGHQIMRQTKALIEAQGYDVIYGDTDSTFVWLKGAHSEEEAAKIGRA
+                     LVQHVNAWWAETLQKQRLTSALELEYETHFCRFLMPTIRGADTGSKKRYAGLIQEGDK
+                     QRMVFKGLETVRTDWTPLAQQFQQELYLRIFRNEPYQEYVRETIDKLMAGELDARLVY
+                     RKRLRRPLSEYQRNVPPHVRAARLADEENQKRGRPLQYQNRGTIKYVWTTNGPEPLDY
+                     QRSPLDYEHYLTRQLQPVAEGILPFIEDNFATLMTGQLGLF"
+     CDS             complement(64698..65447)
+                     /note="ORF_ID:o107#2
+                     polB
+                     similar to E. coli DNA-directed DNA polymerase (EC
+                     2.7.7.7) II: PIR Accession Number S15943"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="BAB96629.1"
+                     /db_xref="GI:21321942"
+                     /translation="MAQAGFILTRHWRDTPQGTEVSFWLATDNGPLQVTLAPQESVAF
+                     IPADQVPRAQHILQGEQGFRLTPLALKDFHRQPVYGLYCRAHRQLMNYEKRLREGGVT
+                     VYEADVRPPERYLMERFITSPVWVEGDMHNGTIVNARLKPHPDYRPPLKWVSIDIETT
+                     RHGELYCIGLEGCGQRIVYMLGPENGDASSLDFELEYVASRPQLLEKLNAWFANYDPD
+                     VIIGWNVVQFDLRMLQKHAEPLPSSAASWAR"
+     gene            complement(65522..66217)
+                     /gene="araD"
+     CDS             complement(65522..66217)
+                     /gene="araD"
+                     /note="ORF_ID:o107#3
+                     similar to SwissProt Accession Number P08203"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="L-ribulose-5-phosphate 4-epimerase (EC
+                     5.1.3.4)."
+                     /protein_id="BAB96630.1"
+                     /db_xref="GI:21321943"
+                     /translation="MLEDLKRQVLEANLALPKHNLVTLTWGNVSAVDRERGVFVIKPS
+                     GVDYSVMTADDMVVVSIETGEVVEGTKKPSSDTPTHRLLYQAFPSIGGIVHTHSRHAT
+                     IWAQAGQSIPATGTTHADYFYGTIPCTRKMTDAEINGEYEWETGNVIVETFEKQGIDA
+                     AQMPGVLVHSHGPFAWGKNAEDAVHNAIVLEEVAYMGIFCRQLAPQLPDMQQTLLDKH
+                     YLRKHGAKAYYGQ"
+     gene            complement(66502..68004)
+                     /gene="araA"
+     CDS             complement(66502..68004)
+                     /gene="araA"
+                     /note="ORF_ID:o107#4
+                     similar to SwissProt Accession Number P08202"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="L-arabinose isomerase (EC 5.3.1.4)."
+                     /protein_id="BAB96631.1"
+                     /db_xref="GI:21321944"
+                     /translation="MTIFDNYEVWFVIGSQHLYGPETLRQVTQHAEHVVNALNTEAKL
+                     PCKLVLKPLGTTPDEITAICRDANYDDPCAGLVVWLHTFSPAKMWINGLTMLNKPLLQ
+                     FHTQFNAALPWDSIDMDFMNLNQTAHGGREFGFIGARMRQQHAVVTGHWQDKQAHERI
+                     GSWMRQAVSKQDTRHLKVCRFGDNMREVAVTDGDKVAAQIKFGFSVNTWAVGDLVQVV
+                     NSISDGDVNALVDEYESCYTMTPATQIHGEKRQNVLEAARIELGMKRFLEQGGFHAFT
+                     TTFEDLHGLKQLPGLAVQRLMQQGYGFAGEGDWKTAALLRIMKVMSTGLQGGTSFMED
+                     YTYHFEKGNDLVLGSHMLEVCPSIAVEEKPILDVQHLGIGGKDDPARLIFNTQTGPAI
+                     VASLIDLGDRYRLLVNCIDTVKTPHSLPKLPVANALWKAQPDLPTASEAWILAGGAHH
+                     TVFSHALNLNDMRQFAEMHDIEITVIDNDTRLPAFKDALRWNEVYYGFRR"
+     gene            complement(68015..69715)
+                     /gene="araB"
+     CDS             complement(68015..69715)
+                     /gene="araB"
+                     /note="ORF_ID:o108#1
+                     similar to PIR Accession Number B29022"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Ribulokinase (EC 2.7.1.16)"
+                     /protein_id="BAB96632.1"
+                     /db_xref="GI:21321945"
+                     /translation="MAIAIGLDFGSDSVRALAVDCASGEEIATSVEWYPRWQKGQFCD
+                     APNNQFRHHPRDYIESMEAALKTVLAELSVEQRAAVVGIGVDSTGSTPAPIDADGNVL
+                     ALRPEFAENPNAMFVLWKDHTAVERSEEITRLCHAPGNVDYSRYIGGIYSSEWFWAKI
+                     LHVTRQDSAVAQSAASWIELCDWVPALLSGTTRPQDIRRGRCSAGHKSLWHESWGGLP
+                     PASFFDELDPILNRHLPSPLFTDTWTADIPVGTLCPEWAQRLGLPESVVISGGAFDCH
+                     MGAVGAGAQPNALVKVIGTSTCDILIADKQSVGERAVKGICGQVDGSVVPGFIGLEAG
+                     QSAFGDIYAWFGRVLSWPLEQLAAQHPELKAQINASQKQLLPALTEAWAKNPSLDHLP
+                     VVLDWFNGRRSPNANQRLKGVITDLNLATDAPLLFGGLIAATAFGARAIMECFTDQGI
+                     AVNNVMALGGIARKNQVIMQACCDVLNRPLQIVASDQCCALGAAIFAAVAAKVHADIP
+                     SAQQKMASAVEKTLQPRSEQAQRFEQLYRRYQQWAMSAEQHYLPTSAPAQAAQAVATL
+                     "
+     gene            70054..70932
+                     /gene="araC"
+     CDS             70054..70932
+                     /gene="araC"
+                     /note="ORF_ID:o108#2
+                     similar to PIR Accession Number A91473"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Arabinose operon regulatory protein"
+                     /protein_id="BAB96633.1"
+                     /db_xref="GI:21321946"
+                     /translation="MAEAQNDPLLPGYSFNAHLVAGLTPIEANGYLDFFIDRPLGMKG
+                     YILNLTIRGQGVVKNQGREFVCRPGDILLFPPGEIHHYGRHPEAREWYHQWVYFRPRA
+                     YWHEWLNWPSIFANTGFFRPDEAHQPHFSDLFGQIINAGQGEGRYSELLAINLLEQLL
+                     LRRMEAINESLHPPMDNRVREACQYISDHLADSNFDIASVAQHVCLSPSRLSHLFRQQ
+                     LGISVLSWREDQRISQAKLLLSTTRMPIATVGRNVGFDDQLYFSRVFKKCTGASPSEF
+                     RAGCEEKVNDVAVKLS"
+     gene            71066..71779
+                     /gene="yabI"
+     CDS             71066..71779
+                     /gene="yabI"
+                     /note="ORF_ID:o108#3
+                     similar to PIR Accession Number S40581"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96634.1"
+                     /db_xref="GI:21321947"
+                     /translation="MAVVLVAFLESLALVGLILPGTVLMAGLGALIGSGELSFWHAWL
+                     AGIIGCLMGDWISFWLGWRFKKPLHRWSFLKKNKALLDKTEHALHQHSMFTILVGRFV
+                     GPTRPLVPMVAGMLDLPVAKFITPNIIGCLLWPPFYFLPGILAGAAIDIPAGMQSGEF
+                     KWLLLATAVFLWVGGWLCWRLWRSGKATDRLSHYLSRGRLLWLTPLISAIGVVALVVL
+                     IRHPLMPVYIDILRKVVGV"
+     gene            complement(71894..72592)
+                     /gene="yabJ"
+     CDS             complement(71894..72592)
+                     /gene="yabJ"
+                     /note="ORF_ID:o108#4
+                     similar to PIR Accession Number S40582"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96635.1"
+                     /db_xref="GI:21321948"
+                     /translation="MLKLTDITWLYHHLPMRFSLTVERGEQVAILGPSGAGKSTLLNL
+                     IAGFLTPASGSLTIDGVDHTTMPPSRRPVSMLFQENNLFSHLTVAQNIGLGLNPGLKL
+                     NAVQQGKMHAIARQMGIDNLMARLPGELSGGQRQRVALARCLVREQPILLLDELFSAL
+                     DPALRQEMLTLVSTSCQQQKMTLLMVSHSVEDAARIATRSVVVADGRIAWQGMTNELL
+                     SGKASASALLGITG"
+     gene            complement(72576..74186)
+                     /gene="yabK"
+     CDS             complement(72576..74186)
+                     /gene="yabK"
+                     /note="ORF_ID:o108#5
+                     similar to PIR Accession Number S40583"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein"
+                     /protein_id="BAB96636.1"
+                     /db_xref="GI:21321949"
+                     /translation="MATRRQPLIPGWLIPGVSATTLVVAVALAAFLALWWNAPQDDWV
+                     AVWQDSYLWHVVRFSFWQAFLSALLSVIPAIFLARALYRRRFPGRLALLRLCAMTLIL
+                     PVLVAVFGILSVYGRQGWLATLCQSLGLEWTFSPYGLQGILLAHVFFNLPMASRLLLQ
+                     ALENIPGEQRQLAAQLGMRSWHFFRFVEWPWLRRQIPPVAALIFMLCFASFATVLSLG
+                     GGPQATTIELAIYQALSYDYDPARAAMLALLQMVCCLGLVLLSQRLSKAIAPGTTLLQ
+                     GWRDPDDRLHSRICDTVLIVLALLLLLPPLLAVIVDGVNRQLPEVLAQPVLWQALWTS
+                     LRIALAAGVLCVVLTMMLLWSSRELRARQKMLAGQVLEMSGMLILAMPGIVLATGFFL
+                     LLNNTIGLPQSADGIVIFTNALMAIPYALKVLENPMRDITARYSMLCQSLGIEGWSRL
+                     KVVELRALKRPLAQALAFACVLSIGDFGVVALFGNDDFRTLPFYLYQQIGSYRSQDGA
+                     VTTLILLLLCFLLFTVIEKLPGRNVKTD"
+     gene            complement(74162..75133)
+                     /gene="tbpA"
+     CDS             complement(74162..75133)
+                     /gene="tbpA"
+                     /note="ORF_ID:o108#6
+                     similar to PIR Accession Number S40584"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein."
+                     /protein_id="BAB96637.1"
+                     /db_xref="GI:21321950"
+                     /translation="MSAPAVAVTAPVFAKPVLTVYTYDSFAADWGPGPVVKKAFEADC
+                     NCELKLVALEDGVSLLNRLRMEGKNSKADVVLGLDNNLLDAASKTGLFAKSGVAADAV
+                     NVPGGWNNDTFVPFDYGYFAFVYDKNKLKNPPQSLKELVESDQNWRVIYQDPRTSTPG
+                     LGLLLWMQKVYGDDAPQAWQKLAKKTVTVTKGWSEAYGLFLKGESDLVLSYTTSPAYH
+                     ILEEKKDNYAAANFSEGHYLQVEVAARTAASKQPELAQKFLQFMVSPAFQNAIPTGNW
+                     MYPVANVTLPAGFEKLTKPATTLEFTPAEVAAQRQAWISEWQRAVSR"
+     gene            complement(75308..76963)
+                     /gene="yabN"
+     CDS             complement(75308..76963)
+                     /gene="yabN"
+                     /note="ORF_ID:o108#7
+                     similar to SwissProt Accession Number P33595"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 63.9 kd protein in tbpA-leuD
+                     intergenic region (orf103)."
+                     /protein_id="BAB96638.1"
+                     /db_xref="GI:21321951"
+                     /translation="MPSARLQQQFIRLWQCCEGKSQDTTLNELAALLSCSRRHMRTLL
+                     NTMQDRGWLTWEAEVGRGKRSRLTFLYTGLALQQQRAEDLLEQDRIDQLVQLVGDKAT
+                     VRQMLVSHLGRSFRQGRHILRVLYYRPLRNLLPGSALRRSETHIARQIFSSLTRINEE
+                     NGELEADIAHHWQQISPLHWRFFLRPGVHFHHGRELEMDDVIASLKRINTLPLYSHIA
+                     DIVSPTPWTLDIHLTQPDRWLPLLLGQVPAMILPREWETLSNFASHPIGTGPYAVIRN
+                     STNQLKIQAFDDFFGYRALIDEVNVWVLPEIADEPAGGLMLKGPQGEEKEIESRLEEG
+                     CYYLLFDSRTHRGANQQVRDWASYVLSPTNLVYFAEEQYQQLWFPAYGLFPRWHHART
+                     IKSEKPAGLESLTLTFCQDHSEHRVIAGIMQQILASHQVTLKIKEIDYDQWHTGEIES
+                     DIWLNSANFTLPLDFSVFAHLCEVPLLQHCIPIDWQADAARWRNGEMNLANWCQQLVA
+                     SKAMVPLLHHWLIIQGQRSMRGLRMNTLGWFDFKSAWFAPPDP"
+     gene            77285..78463
+                     /gene="yabM"
+     CDS             77285..78463
+                     /gene="yabM"
+                     /note="ORF_ID:o108#8
+                     similar to SwissProt Accession Number P31675"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 42.7 kd protein in tbpA-leuD
+                     intergenic region (orf104)."
+                     /protein_id="BAB96639.1"
+                     /db_xref="GI:21321952"
+                     /translation="MIWIMTMARRMNGVYAAFMLVAFMMGVAGALQAPTLSLFLSREV
+                     GAQPFWIGLFYAVNAIAGIGVSLWLAKRSDSQGDRRKLIIFCCLMAIGNALLFAFNRH
+                     YLTLITCGVLLASLANTAMPQLFALAREYADNSAREVVMFSSVMRAQLSLAWVIGPPL
+                     AFMLALNYGFTVMFSIAAGIFTLSLVLIAFMLPSVARVELPSENALSMQGGWQDSNVR
+                     MLFVASTLMWTCNTMYIIDMPLWISSELGLPDKLAGFLMGTAAGLEIPAMILAGYYVK
+                     RYGKRRMMVIAVAAGVLFYTGLIFFNSRMALMTLQLFNAVFIGIVAGIGMLWFQDLMP
+                     GRAGAATTLFTNSISTGVILAGVIQGAIAQSWGHFAVYWVIAVISVVALFLTAKVKDV
+                     "
+     gene            complement(78512..79117)
+                     /gene="leuD"
+     CDS             complement(78512..79117)
+                     /gene="leuD"
+                     /note="ORF_ID:o108#9
+                     similar to PIR Accession Number S40585"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Isopropylmalate isomerase subunit"
+                     /protein_id="BAB96640.1"
+                     /db_xref="GI:21321953"
+                     /translation="MAEKFIKHTGLVVPLDAANVDTDAIIPKQFLQKVTRTGFGAHLF
+                     NDWRFLDEKGQQPNPDFVLNFPQYQGASILLARENFGCGSSREHAPWALTDYGFKVVI
+                     APSFADIFYGNSFNNQLLPVKLSDAEVDELFALVKANPGIHFDVDLEAQEVKAGEKTY
+                     RFTIDAFRRHCMMNGLDSIGLTLQHDDAIAAYEAKQPAFMN"
+     gene            complement(79128..80528)
+                     /gene="leuC"
+     CDS             complement(79128..80528)
+                     /gene="leuC"
+                     /note="ORF_ID:o109#1
+                     similar to PIR Accession Number S40586"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="3-isopropylmalate dehydratase (EC 4.2.1.33)
+                     alpha chain"
+                     /protein_id="BAB96641.1"
+                     /db_xref="GI:21321954"
+                     /translation="MAKTLYEKLFDAHVVYEAENETPLLYIDRHLVHEVTSPQAFDGL
+                     RAHGRPVRQPGKTFATMDHNVSTQTKGINACGEMARIQMQELIKNCKEFGVELYDLNH
+                     PYQGIVHVMGPEQGVTLPGMTIVCGDSHTATHGAFGALAFGIGTSEVEHVLATQTLKQ
+                     GRAKTMKIEVQGKAAPGITAKDIVLAIIGKTGSAGGTGHVVEFCGEAIRDLSMEGRMT
+                     LCNMAIEMGAKAGLVAPDETTFNYVKGRLHAPKGKDFDDAVAYWKTLQTDEGATFDTV
+                     VTLQAEEISPQVTWGTNPGQVISVNDNIPDPASFADPVERASAEKALAYMGLKPGIPL
+                     TEVAIDKVFIGSCTNSRIEDLRAAAEIVKGRKVAPGVQALVVPGSGPVKAQAEAEGLD
+                     KIFIEAGFEWRLPGCSMCLAMNNDRLNPGERCASTSNRNFEGRQGRGGRTHLVSPAMA
+                     AAAAVTGHFADIRNIK"
+     gene            complement(80531..81622)
+                     /gene="leuB"
+     CDS             complement(80531..81622)
+                     /gene="leuB"
+                     /note="ORF_ID:o109#2
+                     similar to SwissProt Accession Number P30125"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="3-isopropylmalate dehydrogenase (EC 1.1.1.85)
+                     (beta-ipm dehydrogenase) (imdH) (3-ipm-dh)."
+                     /protein_id="BAB96642.1"
+                     /db_xref="GI:21321955"
+                     /translation="MSKNYHIAVLPGDGIGPEVMTQALKVLDAVRNRFAMRITTSHYD
+                     VGGAAIDNHGQPLPPATVEGCEQADAVLFGSVGGPKWEHLPPDQQPERGALLPLRKHF
+                     KLFSNLRPAKLYQGLEAFCPLRADIAANGFDILCVRELTGGIYFGQPKGREGSGQYEK
+                     AFDTEVYHRFEIERIARIAFESARKRRHKVTSIDKANVLQSSILWREIVNEIATEYPD
+                     VELAHMYIDNATMQLIKDPSQFDVLLCSNLFGDILSDECAMITGSMGMLPSASLNEQG
+                     FGLYEPAGGSAPDIAGKNIANPIAQILSLALLLRYSLDADDAACAIERAINRALEEGI
+                     RTGDLARGAAAVSTDEMGDIIARYVAEGV"
+     gene            complement(81622..83193)
+                     /gene="leuA"
+     CDS             complement(81622..83193)
+                     /gene="leuA"
+                     /note="ORF_ID:o109#3
+                     similar to PIR Accession Number S40588"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="2-Isopropylmalate synthase"
+                     /protein_id="BAB96643.1"
+                     /db_xref="GI:21321956"
+                     /translation="MSQQVIIFDTTLRDGEQALQASLSVKEKLQIALALERMGVDVME
+                     VGFPVSSPGDFESVQTIARQVKNSRVCALARCVEKDIDVAAESLKVAEAFRIHTFIAT
+                     SPMHIATKLRSTLDEVIERAIYMVKRARNYTDDVEFSCEDAGRTPIADLARVVEAAIN
+                     AGATTINIPDTVGYTMPFEFAGIISGLYERVPSIGKAIISVHTHDDLGLAVGNSLAAV
+                     HAGARQVEGAMNGIGERAGNCSLEEVIMAIKVRKDILNVHTAINHQEIWRTSQLVSQI
+                     CNMPIPANKAIVGSGAFAHSSGIHQDGVLKNRENYEIMTPESIGLNQIQLNLTSRSGR
+                     AAVKHRMDEMGYKESEYNLDNLYDAFLKLADKKGQVFDYDLEALAFIGKQQEEPEHFR
+                     LDYFSVQSGSNDIATAAVKLACGEEVKAEAANGNGPVDAVYQAINRITEYNVELVKYS
+                     LTAKGHGKDALGQVDIVANYNGRRFHGVGLATDIVESSAKAMVHVLNNIWRAAEVEKE
+                     LQRKAQHNENNKETV"
+     gene            complement(83286..83372)
+                     /gene="leuLP"
+     CDS             complement(83286..83372)
+                     /gene="leuLP"
+                     /note="ORF_ID:o109#4
+                     similar to PIR Accession Number A30376"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="LeuABCD leader peptide."
+                     /protein_id="BAB96644.1"
+                     /db_xref="GI:21321957"
+                     /translation="MTHIVRFIGLLLLNASSLRGRRVSGIQH"
+     gene            83855..84904
+                     /gene="lueO"
+     CDS             83855..84904
+                     /gene="lueO"
+                     /note="ORF_ID:o109#5
+                     similar to PIR Accession Number S40589"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="LeuO protein."
+                     /protein_id="BAB96645.1"
+                     /db_xref="GI:21321958"
+                     /translation="MTHSTAMDSVFIRTRIFMFSEFYSFCFFLFYMHDKSYSSGLFLC
+                     IPIRERELSVTVELSMPEVQTDHPETAELSKPQLRMVDLNLLTVFDAVMQEQNITRAA
+                     HVLGMSQPAVSNAVARLKVMFNDELFVRYGRGIQPTARAFQLFGSVRQALQLVQNELP
+                     GSGFEPASSERVFHLCVCSPLDSILTSQIYNHIEQIAPNIHVMFKSSLNQNTEHQLRY
+                     QETEFVISYEDFHRPEFTSVPLFKDEMVLVASKNHPTIKGPLLKHDVYNEQHAAVSLD
+                     RFASFSQPWYDTVDKQASIAYQGMAMMSVLSVVSQTHLVAIAPRWLAEEFAESLELQV
+                     LPLPLKQNSRTCYLS"
+     gene            85294..86994
+                     /gene="ilvI"
+     CDS             85294..86994
+                     /gene="ilvI"
+                     /note="ORF_ID:o109#6
+                     similar to PIR Accession Number S14385"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Acetolactate synthase (EC 4.1.3.18) III large
+                     chain."
+                     /protein_id="BAB96646.1"
+                     /db_xref="GI:21321959"
+                     /translation="MEMLSGGEMVVRSLIDQGVKQVFGYPGGAVLDIYDALHTVGGID
+                     HVLVRHEQAAVHMADGLARATGEVGVVLVTSGPGATNAITGIATAYMDSIPLVVLSGQ
+                     VATSLIGYDAFQECDMVGISRPVVKHSFLVKQTEDIPQVLKKAFWLAASGRPGPVVVD
+                     LPKDILNPANKLPYVWPESVSMRSYNPTTTGHKGQIKRALQSVVAVKKPVVYVGGGAI
+                     TAGCHQQLKETVEALNLPVVCSLMGLGAFPATHRQVLGMLGMHGTYEANMTMHNADVI
+                     FAVGVRFDDRTTNNLAKYCPNATVLHIDIDPTSISKTVTADIPIVGDARQVLEQMLEL
+                     LSQESAHQPLDEIRDWWQQIEQWRARQCLKYDTHSEKIKPQAVIETLWRLTKGDAYVT
+                     SDVGQHQMFAALYYPFDKPRRWINSGGLGSMGFGLPAALGVKMAFPEETVVCVTGDGS
+                     IQMNIQELSTALQYELPVLVVNLNNRYLGMVKQWQDMIYSGRHSQSYMQSLPDFVRRG
+                     AYGHVGIQISHPHGWKANLARRWNRCAIIAWCLLMLPSMAASTSTRCRFAGAEWMKCG
+                     "
+     gene            87017..87508
+                     /gene="brnP"
+                     /note="synonym: ilvH"
+     CDS             87017..87508
+                     /gene="brnP"
+                     /note="ORF_ID:o109#7
+                     similar to SwissProt Accession Number P00894"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Acetolactate synthase isozyme III small subunit
+                     (EC 4.1.3.18) (ahas- III) (acetohydroxy-acid synthase III
+                     small subunit) (als-III)."
+                     /protein_id="BAB96647.1"
+                     /db_xref="GI:21321960"
+                     /translation="MRRILSVLLENESGALSRVIGLFSQRGYNIESLTVAPTDDPTLS
+                     RMTIQTVGDEKVLEQIEKQLHKLVDVLRVSELGQGAHVEREIMLVKIQASGYGRDEVK
+                     RNTEIFRGQIIDVTPSLYTVQLAGTSGKLSAFLASIRDVAKIVEVARSGVVGLSRGDK
+                     IMR"
+     gene            87688..88692
+                     /gene="fruR"
+                     /note="synonym: shl"
+     CDS             87688..88692
+                     /gene="fruR"
+                     /note="ORF_ID:o109#8
+                     similar to PIR Accession Number JU0298"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Pep-fructosephosphotransferase system
+                     repressor."
+                     /protein_id="BAB96648.1"
+                     /db_xref="GI:21321961"
+                     /translation="MKLDEIARLAGVSRTTASYVINGKAKQYRVSDKTVEKVMAVVRE
+                     HNYHPNAVAAGLRAGRTRSIGLVIPDLENTSYTRIANYLERQARQRGYQLLIACSEDQ
+                     PDNEMRCIEHLLQRQVDAIIVSTSLPPEHPFYQRWANDPFPIVALDRALDREHFTSVV
+                     GADQDDAEMLAEELRKFPAETVLYLGALPELSVSFLREQGFRTAWKDDPREVHFLYAN
+                     SYEREAAAQLFEKWLETHPMPQALFTTSFALLQGVMDVTLRRDGKLPSDLAIATFGDN
+                     ELLDFLQCPVLAVAQRHRDVAERVLEIVLASLDEPRKPKPGLTRIKRNLYRRGVLSRS
+                     "
+     gene            89294..89752
+                     /gene="yabB"
+     CDS             89294..89752
+                     /gene="yabB"
+                     /note="ORF_ID:o109#9
+                     similar to PIR Accession Number S14388"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical protein C."
+                     /protein_id="BAB96649.1"
+                     /db_xref="GI:21321962"
+                     /translation="MFRGATLVNLDSKGRLSVPTRYREQLLENAAGQMVCTIDIYHPC
+                     LLLYPLPEWEIIEQKLSRLSSMNPVERRVQRLLLGHASECQMDGAGRLLIAPVLRQHA
+                     GLTKEVMLVGQFNKFELWDETTWHQQVKEDIDAEQLATGDLSERLQDLSL"
+     gene            89754..90695
+                     /gene="yabC"
+     CDS             89754..90695
+                     /gene="yabC"
+                     /note="ORF_ID:o109#10
+                     similar to SwissProt Accession Number P18595"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 34.9 kd protein in fruR-ftsL
+                     intergenic region (orfB)."
+                     /protein_id="BAB96650.1"
+                     /db_xref="GI:21321963"
+                     /translation="MMENYKHTTVLLDEAVNGLNIRPDGIYIDGTFGRGGHSRLILSQ
+                     LGEEGRLLAIDRDPQAIAVAKTIDDPRFSIIHGPFSALGEYVAERDLIGKIDGILLDL
+                     GVSSPQLDDAERGFSFMRDGPLDMRMDPTRGQSAAEWLQTAEEADIAWVLKTYGEERF
+                     AKRIARAIVERNREQPMTRTKELAEVVAAATPVKDKFKHPATRTFQAVRIWVNSELEE
+                     IEQALKSSLNVLAPGGRLSIISFHSLEDRIVKRFMRENSRGPQVPAGLPMTEEQLKKL
+                     GGRQLRALGKLMPGEEEVAENPRARSSVLRIAERTNA"
+     gene            90692..91024
+                     /gene="ftsL"
+     CDS             90692..91024
+                     /gene="ftsL"
+                     /note="ORF_ID:o110#1
+                     similar to PIR Accession Number S14387"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Cell division protein FtsL"
+                     /protein_id="BAB96651.1"
+                     /db_xref="GI:21321964"
+                     /translation="MISRVTEALSKVKGSMGSHERHALPGVIGDDLLRFGKLPLCLFI
+                     CIILTAVTVVTTAHHTRLLTAQREQLVLERDALDIEWRNLILEENALGDHSRVERIAT
+                     EKLQMQTC"
+     gene            91074..92840
+                     /gene="ftsI"
+                     /note="synonym: pbpB"
+     CDS             91074..92840
+                     /gene="ftsI"
+                     /note="ORF_ID:o110#2
+                     similar to PIR Accession Number A93123"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Penicillin-binding protein 3 precursor."
+                     /protein_id="BAB96652.1"
+                     /db_xref="GI:21321965"
+                     /translation="MKAAAKTQKPKRQEEHANFISWRFALLCGCILLALAFLLGRVAW
+                     LQVISPDMLVKEGDMRSLRVQQVSTSRGMITDRSGRPLAVSVPVKAIWADPKEVHDAG
+                     GISVGDRWKALANALNIPLDQLSARINANPKGRFIYLARQVNPDMADYIKKLKLPGIH
+                     LREESRRYYPSGEVTAHLIGFTNVDSQGIEGVEKSFDKWLTGQPGERIVRKDRYGRVI
+                     EDISSTDSQAAHNLALSIDERLQALVYRELNNAVAFNKAESGSAVLVDVNTGEVLAMA
+                     NSPSYNPNNLSGTPKEAMRNRTITDVFEPGSTVKPMVVMTALQRGVVRENSVLNTIPY
+                     RINGHEIKDVARYSELTLTGVLQKSSNVGVSKLALAMPSSALVDTYSRFGLGKATNLG
+                     LVGERSGLYPQKQRWSDIERATFSFGYGLMVTPLQLARVYATIGSYGIYRPLSITKVD
+                     PPVPGERVFPESIVRTVVHMMESVALPGGGGVKAAIKGYRIAIKTGTAKKVGPDGRYI
+                     NKYIAYTAGVAPASQPRFALVVVINDPQAGKYYGGAVSAPVFGAIMGGVLRTMNIEPD
+                     ALTTGDKNEFVINQGEGTGGRS"
+     gene            92827..94314
+                     /gene="murE"
+     CDS             92827..94314
+                     /gene="murE"
+                     /note="ORF_ID:o110#3
+                     similar to PIR Accession Number S40595"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="UDP-N-acetylmuramoylalanyl-D-glutamate--2,
+                     6-diaminopimelate ligase (EC 6.3.2.13) murE"
+                     /protein_id="BAB96653.1"
+                     /db_xref="GI:21321966"
+                     /translation="MADRNLRDLLAPWVPDAPSRALREMTLDSRVAAAGDLFVAVVGH
+                     QADGRRYIPQAIAQGVAAIIAEAKDEATDGEIREMHGVPVIYLSQLNERLSALAGRFY
+                     HEPSDNLRLVGVTGTNGKTTTTQLLAQWSQLLGEISAVMGTVGNGLLGKVIPTENTTG
+                     SAVDVQHELAGLVDQGATFCAMEVSSHGLVQHRVAALKFAASVFTNLSRDHLDYHGDM
+                     EHYEAAKWLLYSEHHCGQAIINADDEVGRRWLAKLPDAVAVSMEDHINPNCHGRWLKA
+                     TEVNYHDSGATIRFSSSWGDGEIESHLMGAFNVSNLLLALATLLALGYPLADLLKTAA
+                     RLQPVCGRMEVFTAPGKPTVVVDYAHTPDALEKALQAARLHCAGKLWCVFGCGGDRDK
+                     GKRPLMGAIAEEFADVAVVTDDNPRTEEPRAIINDILAGMLDAGHAKVMEGRAEAVTC
+                     AVMQAKENDVVLVAGKGHEDYQIVGNQRLDYSDRVTVARLLGVIA"
+     gene            94311..95669
+                     /gene="mra"
+                     /note="synonym: murF"
+     CDS             94311..95669
+                     /gene="mra"
+                     /note="ORF_ID:o110#4
+                     similar to SwissProt Accession Number P11880"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="UDP-n-acetylmuramoylalanyl-d-glutamyl-2,
+                     6-diaminopimelate-d-alanyl -d- alanyl ligase (EC 6.3.2.15)
+                     (UDP-murnac-pentapeptide synthetase)
+                     (d-alanyl-d-alanine-adding enzyme)."
+                     /protein_id="BAB96654.1"
+                     /db_xref="GI:21321967"
+                     /translation="MISVTLSQLTDILNGELQGADITLDAVTTDTRKLTPGCLFVALK
+                     GERFDAHDFADQAKAGAAGALLVSRPLDIDLPQLIVKDTRLAFGELAAWVRQQVPARV
+                     VALTGSSGKTSVKEMTAAILSQCGNTLYTAGNLNNDIGVPMTLLRLTPEYDYAVIELG
+                     ANHQGEIAWTVSLTRPERALVNNLAAAHLEGFGSLAGVAKAKGEIFSGLPENGIAIMN
+                     ADNNDWLNWQSVIGSRKVWRFSPNAANSDFTATNIHVTSHGTEFTLQTPTGSVDVLLP
+                     LPGRHNIANALAAAALSMSVGATLDAIKAGLANLKAVPGRLFPIQLAENQLLLDDSYN
+                     ANVGSMTAAVQVLAEMPGYRVLVVGDMAELGAESEACHVQVGEAAKAAGIDRVLSVGK
+                     QSHAISTASGVGEHFADKTALITRLKLLIAEQQVITILVKGSRSAAMEEVVRALQENG
+                     TC"
+     gene            95663..96745
+                     /gene="mraY"
+                     /note="synonym: murX"
+     CDS             95663..96745
+                     /gene="mraY"
+                     /note="ORF_ID:o110#5
+                     similar to SwissProt Accession Number P15876"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Phospho-n-acetylmuramoyl-pentapeptide-transferas
+                     e (EC 2.7.8.13)."
+                     /protein_id="BAB96655.1"
+                     /db_xref="GI:21321968"
+                     /translation="MLVWLAEHLVKYYSGFNVFSYLTFRAIVSLLTALFISLWMGPRM
+                     IAHLQKLSFGQVVRNDGPESHFSKRGTPTMGGIMILTAIVISVLLWAYPSNPYVWCVL
+                     VVLVGYGVIGFVDDYRKVVRKDTKGLIARWKYFWMSVIALGVAFALYLAGKDTPATQL
+                     VVPFFKDVMPQLGLFYILLAYFVIVGTGNAVNLTDGLDGLAIMPTVFVAGGFALVAWA
+                     TGNMNFASYLHIPYLRHAGELVIVCTAIVGAGLGFLWFNTYPAQVFMGDVGSLALGGA
+                     LGIIAVLLRQEFLLVIMGGVFVVETLSVILQVGSFKLRGQRIFRMAPIHHHYELKGWP
+                     EPRVIVRFWIISLMLVLIGLATLKVR"
+     gene            96748..98064
+                     /gene="murD"
+     CDS             96748..98064
+                     /gene="murD"
+                     /note="ORF_ID:o110#6
+                     similar to PIR Accession Number S08396"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="UDP-n-acetylmuramoylalanine-d-glutamate ligase
+                     (EC 6.3.2.9)."
+                     /protein_id="BAB96656.1"
+                     /db_xref="GI:21321969"
+                     /translation="MADYQGKNVVIIGLGLTGLSCVDFFLARGVTPRVMDTRMTPPGL
+                     DKLPEAVERHTGSLNDEWLMAADLIVASPGIALAHPSLSAAADAGIEIVGDIELFCRE
+                     AQAPIVAITGSNGKSTVTTLVGEMAKAAGVNVGVGGNIGLPALMLLDDECELYVLELS
+                     SFQLETTSSLQAVAATILNVTEDHMDRYPFGLQQYRAAKLRIYENAKVCVVNADDALT
+                     MPIRGADERCVSFGVNMGDYHLNHQQGETWLRVKGEKVLNVKEMKLSGQHNYTNALAA
+                     LALADAAGLPRASSLKALTTFTGLPHRFEVVLEHNGVRWINDSKATNVGSTEAALNGL
+                     HVDGTLHLLLGGDGKSADFSPLARYLNGDNVRLYCFGRDGAQLAALRPEVAEQTETME
+                     QAMRLLAPRVQPGDMVLLSPACASLDQFKNFEQRGNEFARLAKELG"
+     gene            98064..99308
+                     /gene="ftsW"
+     CDS             98064..99308
+                     /gene="ftsW"
+                     /note="ORF_ID:o111#1
+                     similar to PIR Accession Number A32581"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Cell division protein FtsW."
+                     /protein_id="BAB96657.1"
+                     /db_xref="GI:21321970"
+                     /translation="MRLSLPRLKMPRLPGFSILVWISTALKGWVMGSREKDTDSLIMY
+                     DRTLLWLTFGLAAIGFIMVTSASMPIGQRLTNDPFFFAKRDGVYLILAFILAIITLRL
+                     PMEFWQRYSATMLLGSIILLMIVLVVGSSVKGASRWIDLGLLRIQPAELTKLSLFCYI
+                     ANYLVRKGDEVRNNLRGFLKPMGVILVLAVLLLAQPDLGTVVVLFVTTLAMLFLAGAK
+                     LWQFIAIIGMGISAVVLLILAEPYRIRRVTAFWNPWEDPFGSGYQLTQSLMAFGRGEL
+                     WGQGLGNSVQKLEYLPEAHTDFIFAIIGEELGYVGVVLALLMVFFVAFRAMSIGRKAL
+                     EIDHRFSGFLACSIGIWFSFQALVNVGAAAGMLPTKGLTLPLISYGGSSLLIMSTAIM
+                     MLLRIDYETRLEKAQAFVRGSR"
+     gene            99305..100372
+                     /gene="murG"
+     CDS             99305..100372
+                     /gene="murG"
+                     /note="ORF_ID:o111#2
+                     similar to PIR Accession Number JQ0544"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="MurG protein."
+                     /protein_id="BAB96658.1"
+                     /db_xref="GI:21321971"
+                     /translation="MSGQGKRLMVMAGGTGGHVFPGLAVAHHLMAQGWQVRWLGTADR
+                     MEADLVPKHGIEIDFIRISGLRGKGIKALIAAPLRIFNAWRQARAIMKAYKPDVVLGM
+                     GGYVSGPGGLAAWSLGIPVVLHEQNGIAGLTNKWLAKIATKVMQAFPGAFPNAEVVGN
+                     PVRTDVLALPLPQQRLAGREGPVRVLVVGGSQGARILNQTMPQVAAKLGDSVTIWHQS
+                     GKGSQQSVEQAYAEAGQPQHKVTEFIDDMAAAYAWADVVVCRSGALTVSEIAAAGLPA
+                     LFVPFQHKDRQQYWNALPLEKAGAAKIIEQPQLSVDAVANTLAGWSRETLLTMAERAR
+                     AASIPDATERVANEVSRVARA"
+     gene            100426..101901
+                     /gene="murC"
+     CDS             100426..101901
+                     /gene="murC"
+                     /note="ORF_ID:o111#3
+                     similar to PIR Accession Number JQ0545"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="UDP-n-acetylmuramate-alanine ligase (EC
+                     6.3.2.8)."
+                     /protein_id="BAB96659.1"
+                     /db_xref="GI:21321972"
+                     /translation="MNTQQLAKLRSIVPEMRRVRHIHFVGIGGAGMGGIAEVLANEGY
+                     QISGSDLAPNPVTQQLMNLGATIYFNHRPENVRDASVVVVSSAISADNPEIVAAHEAR
+                     IPVIRRAEMLAELMRFRHGIAIAGTHGKTTTTAMVSSIYAEAGLDPTFVNGGLVKAAG
+                     VHARLGHGRYLIAEADESDASFLHLQPMVAIVTNIEADHMDTYQGDFENLKQTFINFL
+                     HNLPFYGRAVMCVDDPVIRELLPRVGRQTTTYGFSEDADVRVEDYQQIGPQGHFTLLR
+                     QDKEPMRVTLNAPGRHNALNAAAAVAVATEEGIDDEAILRALESFQGTGRRFDFLGEF
+                     PLEPVNGKSGTAMLVDDYGHHPTEVDATIKAARAGWPDKNLVMLFQPHRFTRTRDLYD
+                     DFANVLTQVDTLLMLEVYPAGEAPIPGADSRSLCRTIRGRGKIDPILVPDPARVAEML
+                     APVLTGNDLILVQGAGNIGKIARSLAEIKLKPQTPEEEQHD"
+     gene            101894..102814
+                     /gene="ddl"
+                     /note="synonym: ddlB"
+     CDS             101894..102814
+                     /gene="ddl"
+                     /note="ORF_ID:o111#4
+                     similar to PIR Accession Number A30289"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="D-alanine-d-alanine ligase (EC 6.3.2.4) B."
+                     /protein_id="BAB96660.1"
+                     /db_xref="GI:21321973"
+                     /translation="MTDKIAVLLGGTSAEREVSLNSGAAVLAGLREGGIDAYPVDPKE
+                     VDVTQLKSMGFQKVFIALHGRGGEDGTLQGMLELMGLPYTGSGVMASALSMDKLRSKL
+                     LWQGAGLPVAPWVALTRAEFEKGLSDKQLAEISALGLPVIVKPSREGSSVGMSKVVAE
+                     NALQDALRLAFQHDEEVLIEKWLSGPEFTVAILGEEILPSIRIQPSGTFYDYEAKYLS
+                     DETQYFCPAGLEASQEANLQALVLKAWTTLGCKGWGRIDVMLDSDGQFYLLEANTSPG
+                     MTSHSLVPMAARQAGMSFSQLVVRILELAD"
+     gene            102816..103646
+                     /gene="ftsQ"
+     CDS             102816..103646
+                     /gene="ftsQ"
+                     /note="ORF_ID:o111#5
+                     similar to PIR Accession Number S10852"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Cell division protein FtsQ."
+                     /protein_id="BAB96661.1"
+                     /db_xref="GI:21321974"
+                     /translation="MSQAALNTRNSEEEVSSRRNNGTRLAGILFLLTVLTTVLVSGWV
+                     VLGWMEDAQRLPLSKLVLTGERHYTRNDDIRQSILALGEPGTFMTQDVNIIQTQIEQR
+                     LPWIKQVSVRKQWPDELKIHLVEYVPIARWNDQHMVDAEGNTFSVPPERTSKQVLPML
+                     YGPEGSANEVLQGYREMGQMLAKDRFTLKEAAMTARRSWQLTLNNDIKLNLGRGDTMK
+                     RLARFVELYPVLQQQAQTDGKRISYVDLRYDSGAAVGWAPLPPEESTQQQNQAQAEQQ
+                     "
+     gene            103643..104905
+                     /gene="divA"
+                     /note="synonym: ftsA"
+     CDS             103643..104905
+                     /gene="divA"
+                     /note="ORF_ID:o111#6
+                     similar to SwissProt Accession Number P06137"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Cell division protein FtsA."
+                     /protein_id="BAB96662.1"
+                     /db_xref="GI:21321975"
+                     /translation="MIKATDRKLVVGLEIGTAKVAALVGEVLPDGMVNIIGVGSCPSR
+                     GMDKGGVNDLESVVKCVQRAIDQAELMADCQISSVYLALSGKHISCQNEIGMVPISEE
+                     EVTQEDVENVVHTAKSVRVRDEHRVLHVIPQEYAIDYQEGIKNPVGLSGVRMQAKVHL
+                     ITCHNDMAKNIVKAVERCGLKVDQLIFAGLASSYSVLTEDERELGVCVVDIGGGTMDI
+                     AVYTGGALRHTKVIPYAGNVVTSDIAYAFGTPPSDAEAIKVRHGCALGSIVGKDESVE
+                     VPSVGGRPPRSLQRQTLAEVIEPRYTELLNLVNEEILQLQEKLRQQGVKHHLAAGIVL
+                     TGGAAQIEGLAACAQRVFHTQVRIGAPLNITGLTDYAQEPYYSTAVGLLHYGKESHLN
+                     GEAEVEKRVTASVGSWIKRLNSWLRKEF"
+     gene            104966..106117
+                     /gene="ftsZ"
+                     /note="synonyms: sfiB, sulB"
+     CDS             104966..106117
+                     /gene="ftsZ"
+                     /note="ORF_ID:o111#7
+                     similar to SwissProt Accession Number P06138"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Cell division protein FtsZ."
+                     /protein_id="BAB96663.1"
+                     /db_xref="GI:21321976"
+                     /translation="MFEPMELTNDAVIKVIGVGGGGGNAVEHMVRERIEGVEFFAVNT
+                     DAQALRKTAVGQTIQIGSGITKGLGAGANPEVGRNAADEDRDALRAALEGADMVFIAA
+                     GMGGGTGTGAAPVVAEVAKDLGILTVAVVTKPFNFEGKKRMAFAEQGITELSKHVNSL
+                     ITIPNDKLLKVLGRGISLLDAFGAANDVLKGAVQGIAELITRPGLMNVDFADVRTVMS
+                     EMGHAMMGSGVASGEDRAEEAAEMAISSPLLEDIDLSGARGVLVNITAGFDLRLDEFE
+                     TVGNTIRAFASDNATVVIGTSLDPDMNDELRVTVVATGIGMDKRPEITLVTNKQVQQP
+                     VMDRYQQHGMAPLTQEQKPVAKVVNDNAPQTAKEPDYLDIPAFLRKQAD"
+     gene            106218..107135
+                     /gene="asmB"
+                     /note="synonyms: envA, lpxC"
+     CDS             106218..107135
+                     /gene="asmB"
+                     /note="ORF_ID:o111#8
+                     similar to SwissProt Accession Number P07652"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Udp-3-o-[3-hydroxymyristoyl] n-acetylglucosamine
+                     deacetylase (EC 3.5.1.-) (EnvA protein)."
+                     /protein_id="BAB96664.1"
+                     /db_xref="GI:21321977"
+                     /translation="MIKQRTLKRIVQATGVGLHTGKKVTLTLRPAPANTGVIYRRTDL
+                     NPPVDFPADAKSVRDTMLCTCLVNEHDVRISTVEHLNAALAGLGIDNIVIEVNAPEIP
+                     IMDGSAAPFVYLLLDAGIDELNCAKKFVRIKETVRVEDGDKWAEFKPYNGFSLDFTID
+                     FNHPAIDSSNQRYAMNFSADAFMRQISRARTFGFMRDIEYLQSRGLCLGGSFDCAIVV
+                     DDYRVLNEDGLRFEDEFVRHKMLDAIGDLFMCGHNIIGAFTAYKSGHALNNKLLQAVL
+                     AKQEAWEYVTFQDDAELPLAFKAPSAVLA"
+     gene            107435..107878
+                     /gene="yacA"
+     CDS             107435..107878
+                     /gene="yacA"
+                     /note="ORF_ID:o111#9
+                     similar to PIR Accession Number B28381"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 16k protein (eneA-secA intergenic
+                     region)."
+                     /protein_id="BAB96665.1"
+                     /db_xref="GI:21321978"
+                     /translation="MVAASLGLPALSNAAEPNAPAKATTRNHEPSAKVNFGQLALLEA
+                     NTRRPNSNYSVDYWHQHAIRTVIRHLSFAMAPQTLPVAEESLPLQAQHLALLDTLSAL
+                     LTQEGTPSEKGYRIDYAHFTPQAKFSTPVWISQAQGIRAGPQRLT"
+     gene            107940..110645
+                     /gene="azi"
+                     /note="synonyms: pea, prlD, secA"
+     CDS             107940..110645
+                     /gene="azi"
+                     /note="ORF_ID:o111#10
+                     similar to SwissProt Accession Number P10408"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Preprotein translocase SecA subunit."
+                     /protein_id="BAB96666.1"
+                     /db_xref="GI:21321979"
+                     /translation="MLIKLLTKVFGSRNDRTLRRMRKVVNIINAMEPEMEKLSDEELK
+                     GKTAEFRARLEKGEVLENLIPEAFAVVREASKRVFGMRHFDVQLLGGMVLNERCIAEM
+                     RTGEGKTLTATLPAYLNALTGKGVHVVTVNDYLAQRDAENNRPLFEFLGLTVGINLPG
+                     MPAPAKREAYAADITYGTNNEYGFDYLRDNMAFSPEERVQRKLHYALVDEVDSILIDE
+                     ARTPLIISGPAEDSSEMYKRVNKIIPHLIRQEKEDSETFQGEGHFSVDEKSRQVNLTE
+                     RGLVLIEELLVKEGIMDEGESLYSPANIMLMHHVTAALRAHALFTRDVDYIVKDGEVI
+                     IVDEHTGRTMQGRRWSDGLHQAVEAKEGVQIQNENQTLASITFQNYFRLYEKLAGMTG
+                     TADTEAFEFSSIYKLDTVVVPTNRPMIRKDLPDLVYMTEAEKIQAIIEDIKERTAKGQ
+                     PVLVGTISIEKSELVSNELTKAGIKHNVLNAKFHANEAAIVAQAGYPAAVTIATNMAG
+                     RGTDIVLGGSWQAEVAALENPTAEQIEKIKADWQVRHDAVLEAGGLHIIGTERHESRR
+                     IDNQLRGRSGRQGDAGSSRFYLSMEDALMRIFASDRVSGMMRKLGMKPGEAIEHPWVT
+                     KAIANAQRKVESRNFDIRKQLLEYDDVANDQRRAIYSQRNELLDVSDVSETINSIRED
+                     VFKATIDAYIPPQSLEEMWDIPGLQERLKNDFDLDLPIAEWLDKEPELHEETLRDGIL
+                     AQSIEVYQRKEEVVGAEMMRHFEKGVMLQTLDSLWKEHLAAMDYLRQGIHLRGYAQKD
+                     PKQEYKRESFSMFAAMLESLKYEVISTLSKVQVRMPEEVEELEQQRRMEAERLAQMQQ
+                     LSHQDDDSAAAAALAAQTGERKVGRNDPCPCGSGKKYKQCHGRLQ"
+     gene            110705..111094
+                     /gene="mutT"
+     CDS             110705..111094
+                     /gene="mutT"
+                     /note="ORF_ID:o111#11
+                     similar to PIR Accession Number A27890"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Mutator MutT (AT-GC transversion)."
+                     /protein_id="BAB96667.1"
+                     /db_xref="GI:21321980"
+                     /translation="MKKLQIAVGIIRNENNEIFITRRAADAHMANKLEFPGGKIEMGE
+                     TPEQAVVRELQEEVGITPQHFSLFEKLEYEFPDRHITLWFWLVERWEGEPWGKEGQPG
+                     EWMSLVGLNADDFPPANEPVIAKLKRL"
+     gene            complement(111310..111408)
+                     /gene="yacG"
+     CDS             complement(111310..>111408)
+                     /gene="yacG"
+                     /note="ORF_ID:o111#12
+                     similar to SwissProt Accession Number P36681"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="Hypothetical 5.8 kd protein in mutT-guaC
+                     intergenic region."
+                     /protein_id="BAB96668.1"
+                     /db_xref="GI:21321981"
+                     /translation="LIDLGEWAAEEKRIPSSGDLSESDDWSEEPKQ"
+BASE COUNT    26083 a  28055 c  30531 g  26739 t
+ORIGIN      
+        1 atgcgagtgt tgaagttcgg cggtacatca ttggcaaatg cagaacgttt tctgcgtgtt
+       61 gccgatattc tggaaagcaa tgccaggcag gggcaggtgg ccaccgtcct ctctgcccct
+      121 gccaaaatca ccaaccacct ggtggcgatg attgaaaaaa ccattagcgg ccaggatgct
+      181 ttacccaata tcagcgatgc cgaacgtatt tttgccgaac ttttgacggg actcgccgcc
+      241 gcccagccgg ggttcccgtt ggcgcaattg aaaacattcg tcgatcagga atttgcccaa
+      301 ataaaacatg tcctgcatgg cattagtttg ttggggcagt gcccggatag catcaacgct
+      361 gcgctgattt gccgtggcga gaagatgtcg atcgccatta tggccggcgt attagaagcg
+      421 cgcggtcaca acgttactgt tatcgatccg gtcgaaaaac tgctggcagt ggggcattac
+      481 ctcgaatcca ccgtcgatat tgccgagtcc acccgccgta tagcggcaag ccgcattccg
+      541 gctgatcaca tggtgctgat ggcaggtttc accgccggta atgagaaagg cgaactggtg
+      601 gtgcttggac gcaacggttc cgactactct gctgcggtgc tggctgcctg tttacgcgcc
+      661 gattgttgcg agatttggac ggacgttaac ggggtctata cctgcgaccc gcgtcaggtg
+      721 cccgacgcga ggttgttgaa gtcgatgtcc taccaggaag cgatggagct ttcctacttc
+      781 ggcgctaaag ttcttcaccc ccgcaccatt acccccatcg cccagttcca gatcccttgc
+      841 ctgattaaaa ataccggaaa tcctcaagca ccaggtacgc tcattggtgc aagccgtgat
+      901 gaagacgaat taccggtcaa gggcatttcc aatctgaata acatggcaat gttcagcgtt
+      961 tctggtccgg ggatgaaagg gatggtcggc atggcggcgc gcgtctttgc agcgatgtca
+     1021 cgcgcccgta tttccgtggt gctgattacg caatcatctt ccgaatacag catcagtttc
+     1081 tgcgttccac aaagcgactg tgtgcgagct gaacgggcaa tgctggaaga gttctaccta
+     1141 gaactgaaag aaggcttact ggagccgctg gcagtggcgg aacggctggc cattatctcg
+     1201 gtggtaggtg atggtttgcg caccttgcgt gggatctcgg cgaaattctt tgccgcactg
+     1261 gcccgcgcca atatcaacat tgtcgccatt gctcagggat cttctgaacg ctcaatctct
+     1321 gtcgtggtaa ataacgatga tgcgaccact ggcgtgcgcg ttactcatca gatgctgttc
+     1381 aataccgatc aggttatcga agtgtttgtg attggcgtcg gtggcgttgg cggtgcgctg
+     1441 ctggagcaac tgaagcgtca gcaaagctgg ctgaagaata aacatatcga cttacgtgtc
+     1501 tgcggtgttg ccaactcgaa ggctctgctc accaatgtac atggccttaa tctggaaaac
+     1561 tggcaggagg aactggcgca agccaaagag ccgtttaatc tcgggcgctt aattcgcctc
+     1621 gtgaaagaat atcatctgct gaacccggtc attgttaact gcacttccag ccaggcagtg
+     1681 gcggatcaat atgccgactt cctgcgcgaa ggtttccacg ttgtcacgcc gaacaaaaag
+     1741 gccaacacct cgtcgatgga ttactaccat cagttgcgtt atgcggcgga aaaatcgcgg
+     1801 cgtaaattcc tctatgacat caacgttggg gctggattac cggttattga gaacctgcaa
+     1861 aatctgctca atgcaggtga tgaattgatg aaattctccg gcattctttc tggttcgctt
+     1921 tcttatatct tcggcaagtt agacgaaggc atgagtttct ccgaggcgac ccggctggcg
+     1981 cgggaaatgg gttataccga accggacccg cgagatgatc tttctggtat ggatgtggcg
+     2041 cgtaaactat tgattctcgc tcgtgaaacg ggacgtgaac tggagctggc ggatattgaa
+     2101 attgaacctg tgctgcccgc agagtttaac gccgagggtg atgttgccgc ttttatggcg
+     2161 aatctgtcac aactcgacga tctctttgcc gcgcgcgtgg cgaaggcccg tgatgaagga
+     2221 aaagttttgc gctatgttgg caatattgat gaagatggcg tctgccgcgt gaagattgcc
+     2281 gaagtggatg gtaatgatcc gctgttcaaa gtgaaaaatg gcgaaaacgc cctggccttc
+     2341 tatagccact attatcagcc gctgccgttg gtactgcgcg gatatggtgc gggcaatgac
+     2401 gttacagctg ccggtgtctt tgctgatctg ctacgtaccc tctcatggaa gttaggagtc
+     2461 tgacatggtt aaagtttatg ccccggcttc cagtgccaat atgagcgtcg ggtttgatgt
+     2521 gctcggggcg gcggtgacac ctgttgatgg tgcattgctc ggagatgtag tcacggttga
+     2581 ggcggcacag acattcagtc tcaacaacct cggacgcttt gccgataagc tgccgtcaga
+     2641 accacgggaa aatatcgttt atcagtgctg ggagcgtttt tgccaggaac tgggtaagca
+     2701 aattccagtg gcgatgaccc tggaaaagaa tatgccgatc ggttcgggct taggctccag
+     2761 tgcctgttcg gtggtcgcgg cgctgatggc gatgaatgaa cactgcggca agccgcttaa
+     2821 tgacactcgt ttgctggctt tgatgggcga gctggaaggc cgtatctccg gcagcattca
+     2881 ttacgacaac gtggcaccgt gttttctcgg tggtatgcag ttgatgatcg aagaaaacga
+     2941 catcatcagc cagcaagtgc agggtttgat gagtggctgt gggtgctcgc gtatccgggg
+     3001 attaaagtct cgacggcaga agcagggcta tttaccggcg cagtatcgcc gccaggattg
+     3061 cattgcgcac gggcgacatc tggcaggctt cattcacgcc tgctattccc gtcagcctga
+     3121 gcttgccgcg aagctgatga aagatgttat cgctgaaccc taccgtgaac ggttactgcc
+     3181 aggcttccgg caggcgcggc aggcggtcgc ggaaatcggc gcggtagcga gcggtatctc
+     3241 cggctccggc ccgaccttgt tcgctctgtg tgacaagccg gaaaccgccc agcgcgttgc
+     3301 cgactggttg ggtaagaact acctgcaaaa tcaggaaggt tttgttcata tttgccggct
+     3361 ggatacggcg ggcgcacgag tactggaaaa ctaaatgaaa ctctacaatc tgaaagatca
+     3421 caacgagcag gtcagctttg cgcaagccgt aacccagggg ttgggcaaaa atcaggggct
+     3481 gttttttccg cacgacctgc cggaattcag cctgactgaa attgatgaga tgctgaagct
+     3541 ggattttgtc acccgcagtg cgaagatcct ctcggcgttt attggtgatg aaatcccaca
+     3601 ggaaatcctg gaagagcgcg tgcgcgcggc gtttgccttc ccggctccgg tcgccaatgt
+     3661 tgaaagcgat gtcggttgtc tggaattgtt ccacgggcca acgctggcat ttaaagattt
+     3721 cggcggtcgc tttatggcac aaatgctgac ccatattgcg ggtgataagc cagtgaccat
+     3781 tctgaccgcg acctccggtg ataccggagc ggcagtggct catgctttct acggtttacc
+     3841 gaatgtgaaa gtggttatcc tctatccacg aggcaaaatc agtccactgc aagaaaaact
+     3901 gttctgtaca ttgggcggca atatcgaaac tgttgccatc gacggcgatt tcgatgcctg
+     3961 tcaggcgctg gtgaagcagg cgtttgatga tgaagaactg aaagtggcgc tagggttaaa
+     4021 ctcggctaac tcgattaaca tcagccgttt gctggcgcag atttgctact actttgaagc
+     4081 tgttgcgcag ctgccgcagg agacgcgcaa ccagctggtt gtctcggtgc caagcggaaa
+     4141 cttcggcgat ttgacggcgg gtctgctggc gaagtcactc ggtctgccgg tgaaacgttt
+     4201 tattgctgcg accaacgtga acgataccgt gccacgtttc ctgcacgacg gtcagtggtc
+     4261 acccaaagcg actcaggcga cgttatccaa cgcgatggac gtgagtcagc cgaacaactg
+     4321 gccgcgtgtg gaagagttgt tccgccgcaa aatctggcaa ctgaaagagc tgggttatgc
+     4381 agccgtggat gatgaaacca cgcaacagac aatgcgtgag ttaaaagaac tgggctacac
+     4441 ttcggagccg cacgctgccg tagcttatcg tgcgctgcgt gatcagttga atccaggcga
+     4501 atatggcttg ttcctcggca ccgcgcatcc ggcgaaattt aaagagagcg tggaagcgat
+     4561 tctcggtgaa acgttggatc tgccaaaaga gctggcagaa cgtgctgatt tacccttgct
+     4621 ttcacataat ctgcccgccg attttgctgc gttgcgtaaa ttgatgatga atcatcagta
+     4681 aaatctattc attatctcaa tcaggccggg tttgctttta tgcagcccgg cttttttatg
+     4741 aagaaattat ggagaaaaat gacagggaaa aaggagaaat tctcaataaa tgcggtaact
+     4801 tagagattag gattgcggag aataacaacc gccgttctca tcgagtaatc tccggatatc
+     4861 gacccataac gggcaatgat aaaaggagta acctgtgaaa aagatgcaat ctatcgtact
+     4921 cgcactttcc ctggttctgg tcgctcccat ggcagcacag gctgcggaaa ttacgttagt
+     4981 cccgtcagta aaattacaga taggcgatcg tgataatcgt ggctattact gggatggagg
+     5041 tcactggcgc gaccacggct ggtggaaaca acattatgaa tggcgaggca atcgctggca
+     5101 cctacacgga ccgccgccac cgccgcgcca ccataagaaa gctcctcatg atcatcacgg
+     5161 cggtcatggt cctggcaaac atcaccgcta aatgacaaat gccgggtaac aatccggcat
+     5221 tcagcgcctg atgcgacgct ggcgcgtctt atcaggccta cgttaattct gcaatatatt
+     5281 gaatctgcat gcttttgtag gcaggataag gcgttcacgc cgcatccggc attgactgca
+     5341 aacttaacgc tgctcgtagc gtttaaacac cagttcgcca ttgctggagg aatcttcatc
+     5401 aaagaagtaa ccttcgctat taaaaccagt cagttgctct ggtttggtca gccgattttc
+     5461 aataatgaaa cgactcatca gaccgcgtgc tttcttagcg tagaagctga tgatcttaaa
+     5521 tttgccgttc ttctcatcga ggaacaccgg cttgataatc tcggcattca atttcttcgg
+     5581 cttcaccgat ttaaaatact catctgacgc cagattaatc accacattat cgccttgtgc
+     5641 tgcgagcgcc tcgttcagct tgttggtgat gatatctccc cagaattgat acagatcttt
+     5701 ccctcgggca ttctcaagac ggatccccat ttccagacga taaggctgca ttaaatcgag
+     5761 cgggcggagt acgccataca agccggaaag cattcgcaaa tgctgttggg caaaatcgaa
+     5821 atcgtcttcg ctgaaggttt cggcctgcaa gccggtgtag acatcacctt taaacgccag
+     5881 aatcgcctgg cgggcattcg ccggcgtgaa atctggctgc cagtcatgaa agcgagcggc
+     5941 gttgataccc gccagtttgt cgctgatgcg catcagcgtg ctaatctgcg gaggcgtcag
+     6001 tttccgcgcc tcatggatca actgctggga attgtctaac agctccggca gcgtatagcg
+     6061 cgtggtggtc aacgggcttt ggtaatcaag cgttttcgca ggtgaaataa gaatcagcat
+     6121 atccagtcct tgcaggaaat ttatgccgac tttagcaaaa aatgagaatg agttgatcga
+     6181 tagttgtgat tactcctgcg aaacatcatc ccacgcgtcc ggagaaagct ggcgaccgat
+     6241 atccggataa cgcaatggat caaacaccgg gcgcacgccg agtttacgct ggcgtagata
+     6301 atcactggca atggtatgaa ccacaggcga gagcagtaaa atggcggtca aattggtaat
+     6361 agccatgcag gccattatga tatctgccag ttgccacatc agcggaaggc ttagcaaggt
+     6421 gccgccgatg accgttgcga aggtgcagat ccgcaaacac cagatcgctt tagggttgtt
+     6481 caggcgtaaa aagaagagat tgttttcggc ataaatgtag ttggcaacga tggagctgaa
+     6541 ggcaaacaga ataaccacaa gggtaacaaa ctcagcaccc caggaaccca ttagcacccg
+     6601 catcgccttc tggataagct gaataccttc cagcggcatg taggttgtgc cgttacccgc
+     6661 cagtaatatc agcatggcgc ttgccgtaca gatgaccagg gtgtcgataa aaatgccaat
+     6721 catctggaca atcccttgcg ctgccggatg cggaggccag gacgccgctg ccgctgccgc
+     6781 gtttggcgtc gaacccattc ccgcctcatt ggaaaacata ctgcgctgaa aaccgttagt
+     6841 aatcgcctgg cttaaggtat atcccgccgc gccgcctgcc gcttcctgcc agccaaaagc
+     6901 actctcaaaa atagaccaaa tgacgtgggg aagttgcccg atattcatta cgcaaattac
+     6961 caggctggtc agtacccaga ttatcgccat caacgggaca aagccctgca tgagccgggc
+     7021 gacgccatga agaccgcgag tgattgccag cagagtaaag acagcgagaa taatgcctgt
+     7081 caccagcggg ggaaaatcaa aagaaaaact cagggcgcgg gcaacggcgt tcgcttgaac
+     7141 tccgctgaaa attatgccat aggcgatgag caaaaagacg gcgaacagaa cgcccatcca
+     7201 gcgcatcccc agcccgcgcg ccatatacca tgccggtccg ccacgaaact gcccattgac
+     7261 gtcacgttct ttataaagtt gtgccagaga acattcggca aacgaggtcg ccatgccgat
+     7321 aaacgcggca acccacatcc aaaagacggc tccaggtcca ccggcggtaa tagccagcgc
+     7381 aacgccggcc aggttgccgc tacccacgcg cgccgcaaga ctggtacaca atgactgaaa
+     7441 tgaggttaaa ccgcctggct gtggatgaat gctattttta agacttttgc caaactggcg
+     7501 gatgtagcga aactgcacaa atccggtgcg aaaagtgaac caacaacctg cgccgaagag
+     7561 caggtaaatc attaccgatc cccaaaggac gctgttaatg aaggagaaaa aatctggcat
+     7621 gcatatccct cttattgccg gtcgcgatga ctttcctgtg taaacgttac caattgttta
+     7681 agaagtatat acgctacgag gtacttgata acttctgcgt agcatacatg aggttttgta
+     7741 taaaaatggc gggcgatatc aacgcagtgt cagaaatccg aaacagtctc gcctggcgat
+     7801 aaccgtcttg tcggcggttg cgctgacgtt gcgtcgtgat atcatcaggg cagaccggtt
+     7861 acatccccct aacaagctgt ttaaagagaa atactatcat gacggacaaa ttgacctccc
+     7921 ttcgtcagta caccaccgta gtggccgaca ctggggacat cgcggcaatg aagctgtatc
+     7981 aaccgcagga tgccacaacc aacccttctc tcattcttaa cgcagcgcag attccggaat
+     8041 accgtaagtt gattgatgat gctgtcgcct gggcgaaaca gcagagcaac gatcgcgcgc
+     8101 agcagatcgt ggacgcgacc gacaaactgg cagtaaatat tggtctggaa atcctgaaac
+     8161 tggttccggg ccgtatctca actgaagttg atgcgcgtct ttcctatgac accgaagcgt
+     8221 caattgcgaa agcaaaacgc ctgatcaaac tctacaacga tgctggtatt agcaacgatc
+     8281 gtattctgat caaactggct tctacctggc agggtatccg tgctgcagaa cagctggaaa
+     8341 aagaaggcat caactgtaac ctgaccctgc tgttctcctt cgctcaggct cgtgcttgtg
+     8401 cggaagcggg cgtgttcctg atctcgccgt ttgttggccg tattcttgac tggtacaaag
+     8461 cgaataccga taagaaagag tacgctccgg cagaagatcc gggcgtggtt tctgtatctg
+     8521 aaatctacca gtactacaaa gagcacggtt atgaaaccgt ggttatgggc gcaagcttcc
+     8581 gtaacatcgg cgaaattctg gaactggcag gctgcgaccg tctgaccatc gcaccggcac
+     8641 tgctgaaaga gctggcggag agcgaagggg ctatcgaacg taaactgtct tacaccggcg
+     8701 aagtgaaagc gcgtccggcg cgtatcactg agtccgagtt cctgtggcag cacaaccagg
+     8761 atccaatggc agtagataaa ctggcggaag gtatccgtaa gtttgctatt gaccaggaaa
+     8821 aactggaaaa aatgatcggc gatctgctgt aatcattctt agcgtgaccg ggaagtcggt
+     8881 cacgctacct cttctgaagc ctgtctgtca ctcccttcgc agtgtatcat tctgtttaac
+     8941 gagactgttt aaacggaaaa atcttgatga atactttacg tattggctta gtttccatct
+     9001 ctgatcgcgc atccagcggc gtttatcagg ataaaggcat ccctgcgctg gaagaatggc
+     9061 tgacatgcgc taaccacgcc gtttgaactg gaaacccgct taatccccga tgagcaggcg
+     9121 atcatcgagc aaacgttgtg tgagctggtg gatgaaatga gttgccatct ggtgctcacc
+     9181 acgggcggaa ctggcccggc gcgtcgtgac gtaacgcccg atgcgacgct ggcagtagcg
+     9241 gaccgcgaga tgcctggctt tggtgaacag atgcgccaga tcagcctgca ttttgtacca
+     9301 actgcgatcc tttcgcgtca ggtgggcgtg attcgcaaac aggcgctgat ccttaactta
+     9361 cccggtcagc cgaagtctat taaagagacg ctggaaggtg tgaaggacgc tgagggtaac
+     9421 gttgtggtac acggtatttt tgccagcgta ccgtactgca ttcagttgct ggaagggcca
+     9481 tacgttgaaa cggcaccgga agtggttgca gcattcagac cgaagagtgc aagacgcgac
+     9541 gttagcgaat aaaaaatacc cgagcggggg gatctcaaaa caattagtgg gattcaccaa
+     9601 tcggcagaac ggtgcgacca aactgctcgt tcagtacttc acccatcgcc agatagattg
+     9661 cgctggcacc gcagatcagc ccaatccagc cggcaaagtg gatgattgcg gcgttaccgg
+     9721 caatgttacc gatcgccagc agggcaaaca gcacggtcag gctaaagaaa acgaattgca
+     9781 gaacgcgtgc gcctttcagc gtgccgaaga acataaacac ggtaaatacg ccccacagac
+     9841 ccaggtagac accaaggaac tgtgcatttg gcgcatcggt cagacccagt ttcggcatca
+     9901 gcagaatcgc aaccagcgtc agccagaaag aaccgtaaga ggtgaatgcg gttaaaccga
+     9961 aagtgttgcc ttttttgtac tccagcagac cagcaaaaat ttgcgcgatg ccgccgtaga
+    10021 aaatgcccat ggcaagaata ataccgtcca gagcgaaata acccacgttg tgcaggttaa
+    10081 gcagaatggt ggtcatgccg aagcccatca ggcccagcgg tgccggatta gccaacttag
+    10141 tgttgcccat aattcctcaa aaatcatcat cgaatgaatg gtgaaataat ttccctgaat
+    10201 aactgtagtg ttttcagggc gcggcataat aatcagccag tggggcagtg tctacgatct
+    10261 tttgagggga aaatgaaaat tttcccggtt tccggtatca gacctgagtg gcgctaacca
+    10321 tccggcgcag gcaggcgatt tgcagtacgg ctggaatcgt cacgcgatag cgtgcgtgac
+    10381 cgctttaacc ccatttagtg ccgcacctac aggcctccca gcccgcgccg cgcagcaaac
+    10441 catgcccaag tacgctcatt gctgcgtggg tgcgtaaaat gcgggtcagt tggctggaaa
+    10501 gcaaatgcga cacacctttt gccaataatt tgtctttcat cagcagcggc agcagctctt
+    10561 ccagctcatt caccctggca tcgaccgcgt gcagaaactc ctgcttatgt tcctcgtcca
+    10621 ttttcttcca ggtattacgc agaaattgtt ccagtaactg ttgctcaatt tcaaacgtag
+    10681 acatctcttt gtcggctttc agcttcaatc gctttgaaac atcgagcaaa atggcccgat
+    10741 acaatttacc gtgtccgcgc agtttgttgc gatactatcg ccaccaaaat gctgtaattc
+    10801 tccggcaatc agctgccagt tgcggcgatg ttgctcggga tgcccttcca tcgatttaaa
+    10861 cagttcgttg cgcatcagta cgctggagag gcgagttttg cctttttcat tatgggtgag
+    10921 caatcgggcg aaatttgcca actgttcctc actacaatgc tgaagaaaat ccagatctga
+    10981 atcattcagg taattaacat tcattttttg tggcttctat attctggcgt tagtcgtcgc
+    11041 cgataatttt cagcgtggcc atatccgatg agttcaccgt atgacccgaa aaggtgattt
+    11101 ttgagacgca gcgtttattg tcgttatcgc tcttaatgtt gatccagtca gtggtttgcc
+    11161 cttcttttat ttctgaagga atattcaggc tctgacctgg cgctgacggg cggctgtgaa
+    11221 ataaaccgat gcaccgctta actgtaaatc gccatggtcg gcagagagtt gtatgcgttt
+    11281 cacaatgcga caaacaggaa gtttcagcgc cagatcgttg gtttcgttac gcggcattgc
+    11341 aatgcgccga ggagtttatg gtcgtttgcc tgcgccgtgc agcacagcat caggctaatc
+    11401 gccaggctgg cggaaatcgt aaaaacggat ttcataagga ttctcttagt gggaagaggt
+    11461 agggggatga atacccacta gtttactgct gataaagaga agattcaggc acgtaatctt
+    11521 ttctttttat tacaattttt tgatgaatgc cttggctgcg attcattctt tatatgaata
+    11581 aaattgctgt caattttacg tcttgtcctg ccatatcgcg aaatttctgc gcaaaagcac
+    11641 aaaaaatttt tgcatctccc ccttgatgac gtggtttacg accccattta gtagtcaacc
+    11701 gcagtgagtg agtctgcaaa aaaatgaaat tgggcagttg aaaccagacg tttcgcccct
+    11761 attacagact cacaaccaca tgatgaccga atatatagtg gagacgttta gatgggtaaa
+    11821 ataattggta tcgacctggg tactaccaac tcttgtgtag cgattatgga tggcaccact
+    11881 cctcgcgtgc tggagaacgc cgaaggcgat cgcaccacgc cttctatcat tgcctatacc
+    11941 caggatggtg aaactctagt tggtcagccg gctaaacgtc aggcagtgac gaacccgcaa
+    12001 aacactctgt ttgcgattaa acgcctgatt ggtcgccgct tccaggacga agaagtacag
+    12061 cgtgatgttt ccatcatgcc gttcaaaatt attgctgctg ataacggcga cgcatgggtc
+    12121 gaagttaaag gccagaaaat ggcaccgccg cagatttctg ctgaagtgct gaaaaaaatg
+    12181 aagaaaaccg ctgaagatta cctgggtgaa ccggtaactg aagctgttat caccgtaccg
+    12241 gcatacttta acgatgctca gcgtcaggca accaaagacg caggccgtat cgctggtctg
+    12301 gaagtaaaac gtatcatcaa cgaaccgacc gcagctgcgc tggcttacgg tctggacaaa
+    12361 ggcactggca accgtactat cgcggtttat gacctgggtg gtggtacttt cgatatttct
+    12421 attatcgaaa tcgacgaagt tgacggcgaa aaaaccttcg aagttctggc aaccaacggt
+    12481 gatacccacc tggggggtga agacttcgac agccgtctga tcaactatct ggttgaagaa
+    12541 ttcaagaaag atcagggcat tgacctgcgc aacgatccgc tggcaatgca gcgcctgaaa
+    12601 gaagcggcag aaaaagcgaa aatcgaactg tcttccgctc agcagaccga cgttaacctg
+    12661 ccatacatca ctgcagacgc gaccggtccg aaacacatga acatcaaagt gactcgtgcg
+    12721 aaactggaaa gcctggttga agatctggta aaccgttcca ttgagccgct gaaagttgca
+    12781 ctgcaggacg ctggcctgtc cgtatctgat atcgacgacg ttatcctcgt tggtggtcag
+    12841 actcgtatgc caatggttca gaagaaagtt gctgagttct ttggtaaaga gccgcgtaaa
+    12901 gacgttaacc cggacgaagc tgtagcaatc ggtgctgctg ttcagggtgg tgttctgact
+    12961 ggtgacgtaa aagacgtact gctgctggac gttaccccgc tgtctctggg tatcgaaacc
+    13021 atgggcggtg tgatgacgac gctgatcgcg aaaaacacca ctatcccgac caagcacagc
+    13081 caggtgttct ctaccgctga agacaaccag tctgcggtaa ccatccatgt gctgcagggt
+    13141 gaacgtaaac gtgcggctga taacaaatct ctgggtcagt tcaacctaga tggtatcaac
+    13201 ccggcaccgc gcggcatgcc gcagatcgaa gttaccttcg atatcgatgc tgacggtatc
+    13261 ctgcacgttt ccgcgaaaga taaaaacagc ggtaaagagc agaagatcac catcaaggct
+    13321 tcttctggtc tgaacgaaga tgaaatccag aaaatggtac gcgacgcaga agctaacgcc
+    13381 gaagctgacc gtaagtttga agagctggta cagactcgca accagggcga ccatctgctg
+    13441 cacagcaccc gtaagcaggt tgaagaagca ggcgacaaac tgccggctga cgacaaaact
+    13501 gctatcgagt ctgcgctgac tgcactggaa actgctctga aaggtgaaga caaagccgct
+    13561 atcgaagcga aaatgcagga actggcacag gtttcccaga aactgatgga aatcgcccag
+    13621 cagcaacatg cccagcagca gactgccggt gctgatgctt ctgcaaacaa cgcgaaagat
+    13681 gacgatgttg tcgacgctga atttgaagaa gtcaaagaca aaaaataatc gccctataaa
+    13741 cgggtaatta tactgacacg ggcgaagggg aatttcctct ccgcccgtgc attcatctag
+    13801 gggcaattta aaaaagatgg ctaagcaaga ttattacgag attttaggcg tttccaaaac
+    13861 agcggaagag cgtgaaatca gaaaggccta caaacgcctg gccatgaaat accacccgga
+    13921 ccgtaaccag ggtgacaaag aggccgaggc gaaatttaaa gagatcaagg aagcttatga
+    13981 agttctgacc gactcgcaaa aacgtgcggc atacgatcag tatggtcatg ctgcgtttga
+    14041 gcaaggtggc atgggcggcg gcggttttgg cggcggcgca gacttcagcg atatttttgg
+    14101 tgacgttttc ggcgatattt ttggcggcgg acgtggtcgt caacgtgcgg cgcgcggtgc
+    14161 tgatttacgc tataacatgg agctcaccct cgaagaagct gtacgtggcg tgaccaaaga
+    14221 gatccgcatt ccgactctgg aagagtgtga cgtttgccac ggtagcggtg caaaaccagg
+    14281 tacacagccg cagacttgtc cgacctgtca tggttctggt caggtgcaga tgcgccaggg
+    14341 attcttcgct gtacagcaga cctgtccaca ctgtcagggc cgcggtacgc tgatcaaaga
+    14401 tccgtgcaac aaatgtcatg gtcatggtcg tgttgagcgc agcaaaacgc tgtccgttaa
+    14461 aatcccggca ggggtggaca ctggagaccg catccgtctt gcgggcgaag gtgaagcggg
+    14521 cgagcatggc gcaccggcag gcgatctgta cgttcaggtt caggttaaac agcacccgat
+    14581 tttcgagcgt gaaggcaaca acctgtattg cgaagtcccg atcaacttcg ctatggcggc
+    14641 gctgggtggc gaaatcgaag taccgaccct tgatggtcgc gtcaaactga aagtgcctgg
+    14701 cgaaacccag accggtaagc tattccgtat gcgcggtaaa ggcgtcaagt ctgtccgcgg
+    14761 tggcgcacag ggtgatttgc tgtgccgcgt tgtcgtcgaa acaccggtag gcctgaacga
+    14821 aaggcagaaa cagctgctgc aagagctgca agaaagcttc ggtggcccaa ccggcgagca
+    14881 caacagcccg cgctcaaaga gcttctttga tggtgtgaag aagttttttg acgacctgac
+    14941 ccgctaacct ccccaaaagc ctgcccgtgg gcaggcctgg gtaaaaatag ggtgcgttga
+    15001 agatatgcga gcacctgtaa agtggcgggg atcactccca taagcgctaa cttaagggtt
+    15061 gtggtattac gcctgatatg atttaacgtg ccgatgaatt actctcacga taactggtca
+    15121 gcaattctgg cccatattgg taagcccgaa gaactggata cttcggcacg taatgccggg
+    15181 gctctaaccc gccgccgcga aattcgtgat gctgcaactc tgctacgtct ggggctggct
+    15241 tacggccccg gggggatgtc attacgtgaa gtcactgcat gggctcagct ccatgacgtt
+    15301 gcaacattat ctgacgtggc tctcctgaag cggctgcgga atgccgccga ctggtttggc
+    15361 atacttgccg cacaaacact tgctgtacgc gccgcagtta cgggttgtac aagcggaaag
+    15421 agattgcgtc ttgtcgatgg aacagcaatc agtgcgcccg ggggcggcag cgctgaatgg
+    15481 cgactacata tgggatatga tcctcatacc tgtcagttca ctgattttga gctaaccgac
+    15541 agcagagacg ctgaacggct ggaccgattt gcgcaaacgg cagacgagat acgcattgct
+    15601 gaccggggat tcggttcgcg tcccgaatgt atccgctcac ttgcttttgg agaagctgat
+    15661 tatatcgtcc gggttcactg gcgaggattg cgctggttaa ctgcagaagg aatgcgcttt
+    15721 gacatgatgg gttttctgcg cgggctggat tgcggtaaga acggtgaaac cactgtaatg
+    15781 ataggcaatt caggtaataa aaaagccgga gctccctttc cggcacgtct cattgccgta
+    15841 tcacttcctc ccgaaaaagc attaatcagt aaaacccgac tgctcagcga gaatcgtcga
+    15901 aaaggacgag tagttcaggc ggaaacgctg gaagcagcgg gccatgtgct attgctaaca
+    15961 tcattaccgg aagatgaata ttcagcagag caagtggctg attgttaccg tctgcgatgg
+    16021 caaattgaac tggcttttaa gcggctcaaa agtttgctgc acctggatgc tttgcgtgca
+    16081 aaggaacctg aactcgcgaa agcgtggata tttgctaatc tactcgccgc atttttaatt
+    16141 gacgacataa tccagccatc gctggatttc ccccccagaa gtgccggatc cgaaaagaag
+    16201 aactaactcg ttgtggagaa taacaaaaat ggtcatctgg agcttacagg tggccattcg
+    16261 tgggacagta tccctgacag cctacaaaac gcaattgaag aacgcgaggc atcgtcttaa
+    16321 cgaggcaccg aggcgtcgca ttcttcagat ggttcaaccc ttaagttagc gcttatggga
+    16381 tcactccccg ccgttgctct tactcggatt cgtaagccgt gaaaacagca acctccgtct
+    16441 ggccagttcg gatgtgaacc tcacagaggt cttttctcgt taccagcgcc gccactacgg
+    16501 cggtgataca gatgacgatc agggcgacaa tcatcgcctt atgctgcttc attgctctct
+    16561 tctccttgac cttacggtca gtaagaggca ctctacatgt gttcagcata taggaggcct
+    16621 cgggttgatg gtaaaatatc actcggggct tttctctatc tgccgttcag ctaatgcctg
+    16681 agacagacag cctcaagcac ccgccgctat tatatcgctc tctttaaccc attttgtttt
+    16741 atcgattcta atcctgaaga cgcctcgcat ttttgtggcg taatttttta atgatttaat
+    16801 tatttaactt taatttatct cttcatcgca attattgacg acaagctgga ttatttttga
+    16861 aatattggcc taacaagcat cgccgactga caacaaatta attattactt ttcctaatta
+    16921 atccctcagg aatcctcacc ttaagctatg attatctagg cttagggtca ctcgtgagcg
+    16981 cttacagccg tcaaaaacgc atctcaccgc tgatggcgca aattcttcaa tagctcgtaa
+    17041 aaaacgaatt attcctacac tataatctga ttttaacgat gattcgtgcg gggtaaaata
+    17101 gtaaaaacga tctattcacc tgaaagagaa ataaaaagtg aaacatctgc atcgattctt
+    17161 tagcagtgat gcctcgggag gcattattct tatcattgcc gctatcctgg cgatgattat
+    17221 ggccaacagc ggcgcaacca gtggatggta tcacgacttt ctggagacgc cggttcagct
+    17281 ccgggttggt tcactcgaaa tcaacaaaaa catgctgtta tggataaatg acgcgctgat
+    17341 ggcggtattt ttcctgttag tcggtctgga agttaaacgt gaactgatgc aaggatcgct
+    17401 agccagctta cgccaggccg catttccagt tatcgccgct attggtggga tgattgtgcc
+    17461 ggcattactc tatctggctt ttaactatgc cgatccgatt acccgcgaag ggtgggcgat
+    17521 cccggcggct actgacattg cttttgcact tggtgtactg gcgctgttgg gaagtcgtgt
+    17581 tccgttagcg ctgaagatct ttttgatggc tctggctatt atcgacgatc ttggggccat
+    17641 cattatcatc gcattgttct acactaatga cttatcgatg gcctctcttg gcgtcgcggc
+    17701 tgtagcaatt gcggtactcg cggtattgaa tctgtgtggt gcacgccgca cgggcgtcta
+    17761 tattcttgtt ggcgtggtgt tgtggactgc ggtgttgaaa tcgggggttc acgcaactct
+    17821 ggcgggggta attgtcggct tctttattcc tttgaaagag aagcatgggc gttctccagc
+    17881 gaagcgactg gagcatgtgt tgcacccgtg ggtggcgtat ctgattttgc cgctgtttgc
+    17941 atttgctaat gctggcgttt cactgcaagg cgtcacgctg gatggcttga cctccattct
+    18001 gccattgggg atcatcgctg gcttgctgat tggcaaaccg ctggggatta gtctgttctg
+    18061 ctggttggcg ctgcgtttga aactggcgca tctgcctgag ggaacgactt atcagcaaat
+    18121 tatggtggtg gggatcctgt gcggtatcgg ttttactatg tctatcttta ttgccagcct
+    18181 ggcctttggt agcgtagatc cagaactgat taactgggcg aaactcggta tcctggtcgg
+    18241 ttctatctct tcggcggtaa ttggatacag ctggttacgc gttcgtttgc gtccatcagt
+    18301 ttgacaggac ggtttaccgg ggagccataa acggctccct tttcattgtt atcagggaga
+    18361 gaaatgagca tgtctcatat caattacaac cacttgtatt acttctggca tgtctataaa
+    18421 gaaggttccg tggttggcgc agcggaggcg ctttatttaa ctccacaaac cattaccgga
+    18481 cagattcgag cgctggaaga cgccctgcaa gcgaaattat ttaaacgcaa ggggacgtgg
+    18541 tctcgaaccc agcgagctgg agaactggtc tatcgctatg ccgataaaat gttcacctta
+    18601 agccaggaaa tgctggatat tgtgaactat cgcaaagaat ccaatttatt gtttgacgtt
+    18661 ggcgtggctg atgcactttc caaacgcctg gtcagtagcg tacttaacgc cgcagtggta
+    18721 gaaggcgagc ccattcatct tcgctgcttc gaatccaccc acgaaatgct gctggagcaa
+    18781 ttaagtcagc ataaactgga gatgatcatt tctgactgtc cgatagactc tacgcagcag
+    18841 gaaggcctgt tctccgtgag aattggcgaa tgtggcgtga gtttctggtg tacaaatcca
+    18901 ccaccagaaa aaccgttccc ggcttgtctg gaagaacggc gacttttgat tcctgggcga
+    18961 cgttcaatgt tagggcgcaa attgcttaac tggtttaact cccagggatt aaacgtagaa
+    19021 atcctcggcg agtttgatga tgccgctttg atgaaagctt ggctgcaggt ccttttggtg
+    19081 gcgatgcaaa tgcaatcttc gttgccccaa cgctttatgc atatgacttt tatgccgata
+    19141 aaactgtcgt agaaattggt cgcgtcgaga atgtgatgga agagtaccat gctatttttg
+    19201 ctgagcggat gattcagcac ccggcggtac agcgaatctg caatacggat tattctgcgc
+    19261 tttttagtcc agcggtgcgt taatcggcag ctcccccaaa gttaaggtgg gggagataga
+    19321 ttagttgtac attaccacga ttttgactcg gctcattatt tgcccgcttg agacattgtt
+    19381 tccatatgta cgcgggcgaa taaatagagg aatctgatta cttccttcat ggggatgctg
+    19441 aaaagagtag taattgctgg taatgactcc aacttattga tagtgtttta tgttcagata
+    19501 atgcccgatg actttgtcat gcagctccac cgattttgag aacgacagcg acttccgtcc
+    19561 cagccgtgcc aggtgctgcc tcagattcag gttatgccgc tcaattcgct gcgtatatcg
+    19621 cttgctgatt acgtgcagct ttcccttcag gcgggattca tacagcggcc agccatccgt
+    19681 catccatatc accacgtcaa agggtgacag caggctcata agacgcccca gcgtcgccat
+    19741 agtgcgttca ccgaatacgt gcgcaacaac cgtcttccgg agactgtcat acgcgtaaaa
+    19801 cagccagcgc tggcgcgatt tagccccgac atagccccac tgttcgtcca tttccgcgca
+    19861 gacgatgacg tcactgcccg gctgtatgcg cgaggttacc gactgcggcc tgagtttttt
+    19921 aagtgacgta aaatcgtgtt gaggccaacg cccataatgc gggctgttgc ccggcatcca
+    19981 acgccattca tggccatatc aatgattttc tggtgcgtac cgggttgaga agcggtgtaa
+    20041 gtgaactgca gttgccatgt tttacggcag tgagagcaga gatagcgctg atgtccggcg
+    20101 gtgcttttgc cgttacgcac caccccgtca gtagctgaac aggagggaca gctgatagaa
+    20161 acagaagcca ctggagcacc tcaaaaacac catcatacac taaatcagta agttggcagc
+    20221 atcacctacc tcaatgtgta tcacaatatc catattcttt gtgggggagt ctggagattg
+    20281 agtagatatt cttgttcaga atgtatcagc cgatggttct acgattctta agccacgaag
+    20341 agttcagata gtacaacggc atgtctcttt tgactatctg gcaaccggca gtgtgttctc
+    20401 tcacgcatca caaaagcagc aggcataaaa aaacccgctt gcgcgggctt tttcacaaag
+    20461 cttcagcaaa ttggcgatta agccagtttg ttgatctgtg cagtcaggtt agccttatga
+    20521 cgtgcagctt tgtttttgtg gatcagacct ttagcagcct gacggtccac gatcggttgc
+    20581 atttcgttaa atgctttctg tgcagcagct ttgtcgccag cttcgatagc tgcgtatact
+    20641 ttcttgatga aagtacgcat catagagcga cggcttgcgt tgtgcttacg agccttttca
+    20701 gactgaatgg cgcgcttctt agctgatttg atattagcca aggtccaact cccaaatgtg
+    20761 ttctatatgg acaattcaaa ggccgaggaa tatgcccttt tagccttctt ttgtcaatgg
+    20821 atttgtgcaa ataagcgccg ttaatgtgcc ggcactcgtt acgtagtgat ggcgcaggat
+    20881 tctaccagct tgcggggtgt gaatacagct tttccgcgat aaaaattgca gcaggcggtc
+    20941 agtttcttcc cgtgatttgc gccatggcaa tgaaaagcca cttctttctg atttcggtac
+    21001 tcaatcgccg gttaaccttg accgctgtac aaggtctact cggacgattt tcactgtttt
+    21061 gagccagaca tgaagctgat acgcggcata cataatctca gccaggcccc gcaagaaggg
+    21121 tgtgtgctga ctattggtaa tttcgacggc gtgcatcgcg gtcatcgcgc gctgttacag
+    21181 ggcttgcagg aagaagggcg caagcgcaac ttaccggtga tggtgatgct ttttgaacct
+    21241 caaccactgg aactgtttgc taccgataaa gccccggcaa gactgacccg gctgcgggaa
+    21301 aaactgcgtt accttgcaga gtgtggcgtt gattacgtgc tgtgcgtgcg tttcgacagg
+    21361 cgtttcgcgg cgttaaccgc gcaaaatttc gtcagcgatc ttctggtgaa gcatttgcgc
+    21421 gtaaaatttc ttgccgtagg tgatgatttc cctttggcgc tggtcgtgaa ggcgatttct
+    21481 tgttattaca gaaagctggc atggaatacg gcttcgatat caccagtacg caaacttttt
+    21541 gcagaggtgg cgtgcgcatc agcagcacgg ctgcgtcagg cccttgcgga tgacaatctg
+    21601 gctctggcag agagtttact ggggcacccg tttgccatct ccgggcgtgt agtccacggt
+    21661 gatgaattag ggcgcactat aggtttcccg acggcgaatg taccgccgcg ccgtcaggtt
+    21721 tccccggtga aaggggttta tgcggtagaa gtgctgggcc tcggtgaaaa gccgttaccc
+    21781 ggcgtggcaa acatcggaac acgcccaacg gttgccggta ttcgccagca gctggaagtg
+    21841 catttgttag atgttgcaat ggacctttac ggtcgccata tacaagtagt gctgcgtaaa
+    21901 aaaatacgca atgagcagcg atttgcgtcg ctggacgaac tgaaagcgca gattgcgcgt
+    21961 gatgaattaa ccgcccgcga attttttggg ctaacaaaac cggcttaagc ctgttatgta
+    22021 atcaaaccga aatacggaac cgagaatctg atgagtgact ataaatcaac cctgaatttg
+    22081 ccggaaacag ggttcccgat gcgtggcgat ctcgccaagc gcgaacccgg aatgctggcg
+    22141 cgttggactg atgatgatct gtacggcatc atccgtgcgg ctaaaaaagg caaaaaaacc
+    22201 ttcattctgc atgatggccc tccttatgcg aatggcagca ttcatattgg tcactcggtt
+    22261 aacaagattc tgaaagacat tatcgtgaag tccaaagggc tttccggtta tgactcgccg
+    22321 tatgtgcctg gctgggactg ccacggtctg ccgatcgagc tgaaagtcga gcaagaatac
+    22381 ggtaagccgg gtgagaaatt caccgccgcc gagttccgcg ccaagtgccg cgaatacgcg
+    22441 gcgacccagg ttgacggtca acgcaaagac tttatccgtc tgggcgtgct gggcgactgg
+    22501 tcgcacccgt acctgaccat gggacttcaa aactggaagg ccaacatcat ccgcgcgctg
+    22561 ggcaaaatca tcggcaacgg tcacctgcac aaaggcgcga agccagttca ctggtgcgtt
+    22621 gactgccgtt ctgcgctggc gaagcggaag ttgagtatta cgacaaaact tctccgtcca
+    22681 tcgacgttgc tttccagggc agtcgatcag gatgcactga aagcaaaatt tgccgtaagc
+    22741 aacgttaacg gcccaatctc gctggtaatc tggaccacca cgccgtggac tctgcctgcc
+    22801 aaccgcgcaa tctctattgc accagatttc gactatgcgc tggtgcagat cgacggtcag
+    22861 gccgtgattc tggcgaaaga tctggttgaa agcgtaatgc agcgtatcgg cgtgaccgat
+    22921 tctcggcacg gtaaaaggtg cggagctgga gccgctgcgt ttacccatcc gtttatgggc
+    22981 ttcgacgttc cggcaatcct cggcgatcac gttaccctgg atgccggtac cggtgccgtt
+    23041 cacaccgcgc ctggccacgg cccggacgac tatgtgatcg gtcagaaata cggcctggaa
+    23101 accgctaacc cggttggccc ggacggcact tatctgccgg gcacttatcc gacgctggat
+    23161 ggcgtgaacg tcttcaaagc gaacgacatc gtcgctgcgc tgctgcagga aaaaggcgcg
+    23221 ctgctgcacg ttgagaaaat gcagcacagc tatccgtgct gctggcgtca caaaacgccg
+    23281 atcatcttcc gcgcgacgcc gcagtggttc gtcagcatgg atcagaaagg tctgcgtgcg
+    23341 cagtcactga aagagatcaa aggcgtgcag tggatcccgg actggggcca ggcgcgtatc
+    23401 gagtcgatgg ttgctaaccg tcctgactgg tgtatctccc gtcagcgcac ctggggtgta
+    23461 ccgatgtcac tgttcgtgca caaagacacg gaagagctgc atccgcgtac ccttgaactg
+    23521 atggaagaag tggcaaaacg cgttgaagtc gatggcatcc aggcgtggtg ggatctcgat
+    23581 gcgaaagaga tcctcggcga cgaagctgat cagtacgtga aagtgccgga cacattggat
+    23641 gtatggtttg actccggatc tacccactct tctgttgttg acgtgcgtcc ggaatttgcc
+    23701 ggtcacgcag cggacatgta tctggaaggt tctgaccaac accgcggctg gttcatgtct
+    23761 tccctaatga tctccaccgc gatgaagggt aaagcgccgt atcgtcaggt actcacccac
+    23821 ggctttaccg tggatggtca gggccgcaag atgtctaaat ccatcggcaa taccgtttcg
+    23881 ccgcaggatg tgatgaacaa actgggcgcg gatattctgc gtctgtgggt ggcatcaacc
+    23941 gactacaccg gtgaaatggc cgtttctgac gagatcctga aacgtgctgc cgatacgtat
+    24001 cgtcgtatcc gtaacaccgc gcgcttcctg ctggcaaacc tgaacggttt tgatccagca
+    24061 aaagatatgg tgaaacggag agagatggtg gtactggatc gctgggccgt agttgtgcga
+    24121 aagcggcaca ggaagacatc ctcaaggcgt acgaagcata cgatttccac gaagtggtac
+    24181 aagcgtctga tgcgcttctg ctccgttgag atgggttcct tctacctcga catcatcaaa
+    24241 gaccgtcagt actacgccaa aggacacagt gtggcgcgtc gtagctgcca gactgcgcta
+    24301 tatcacatcg cagaagcgct ggtgcgctgg atggcaccaa tcctctcctt caccgctgat
+    24361 gaagtgtggg gctacctgcc gggcgaacgt gaaaaatacg tcttcaccgg tgagtggtac
+    24421 gaaggcctgt ttggcctggc agacagtgaa gcgatgaacg atgcgttctg ggacgagctg
+    24481 ttgaaagtgc gtggcgaagt gaacaaagtc attgagcaag cgcgtgccga caagaaagtg
+    24541 ggtggctcgc tggaagcggc agtaaccttg tatgcagaac cggaactgtc ggcgaaactg
+    24601 accgcgctgg gcgatgaatt acgatttgtc ctgttgacct ccggcgctac cgttgcagac
+    24661 tataacgacg cacctgctga tgctcagcag agcgaagtac tcaaagggct gaaagtcgcg
+    24721 ttgagtaaag ccgaaggtga gaagtgccca cgctgctggc actacaccca ggatgtcggc
+    24781 aaggtggcgg aacacgcaga aatctgcggc cgctgtgtca gcaacgtcgc cggtgacggt
+    24841 gaaaaacgta agtttgcctg atgagtcaat cgatctgttc aacagggcta cgctggctgt
+    24901 ggctggtggt agtcgtgctg attatcgatc tgggcagcaa atacctgatc ctccagaact
+    24961 ttgctctggg ggatacggtc ccgctgttcc cgtcgcttaa tctgcattat gcgcgtaact
+    25021 atggcgcggc gtttagtttc cttgccgata gcggcggctg gcagcgttgg ttctttgccg
+    25081 gtattgcgat tggtattagc gtgatcctgg cagtgatgat gtatcgctcg aaggccacgc
+    25141 agaagctaaa caatatcgct tacgcgctga ttattggcgg cgcgctgggc aacctgttcg
+    25201 accgcctgtg gcacggcttc gttgtcgata tgatcgactt ctacgtcggc gactggcact
+    25261 tcgccacctt caaccttgcc gatactgcca tctgtgtcgg tgcggcactg attgtgctgg
+    25321 aaggtttttt gccttctaga gcgaaaaaac aataataaac cctgccggat gcgatgctga
+    25381 cgcatcttat ccggcctaca gattgctgcg aaatcgtagg ccggataagg cgtttacgcc
+    25441 gcatccggca aaaatcctta aatataagag caaacctgca tgtctgaatc tgtacagagc
+    25501 aatagcgccg tcctggtgca cttcacgcta aaactcgacg atggcaccac cgccgagtct
+    25561 acccgcaaca acggtaaacc ggcgctgttc cgcctgggtg atgcttctct ttctgaaggg
+    25621 ctggagcaac acctgttggg gctgaaagtg ggcgataaaa ccaccttctc gttggagcca
+    25681 gatgcggcgt ttggcgtgcc gtcaccggac ctgattcagt acttctcccg ccgtgaattt
+    25741 atggatgcag gcgagccaga aattggcgca atcatgcttt ttaccgcaat ggatggcagt
+    25801 gagatgcctg gcgtgatccg cgaaattaac ggcgactcca ttaccgttga tttcaaccat
+    25861 ccgctggccg ggcagaccgt tcattttgat attgaagtgc tggaaatcga tccggcactg
+    25921 gaggcgtaac atgcagatcc tgttggccaa cccgcgtggt ttttgtgccg gggtagaccg
+    25981 cgctatcagc attgttgaaa acgcgctggc catttacggc gcaccgatat atgtccgtca
+    26041 cgaagtggta cataaccgct atgtggtcga tagcttgcgt gagcgtgggg ctatctttat
+    26101 tgagcagatt agcgaagtac cggacggcgc gatcctgatt ttctccgcac acggtgtttc
+    26161 tcaggcggta cgtaacgaag caaaaagtcg cgatttgacg gtgtttgatg ccacctgtcc
+    26221 gctggtgacc aaagtgcata tggaagtcgc ccgcgccagt cgccgtggcg aagaatctat
+    26281 tctcatcggt cacgccgggc acccggaagt ggaagggaca atgggccagt acagtaaccc
+    26341 ggaaggggga atgtatctgg tcgaatcgcc ggacgatgtg tggaaactga cggtcaaaaa
+    26401 cgaagagaag ctctccttta tgacccagac cacgctgtcg gtggatgaca cgtctgatgt
+    26461 gatcgacgcg ctgcgtaaac gcttcccgaa aattgtcggt ccgcgcaaag atgacatctg
+    26521 ctacgccacg actaaccgtc aggaagcggt acgcgccctg gcagaacagg cggaagttgt
+    26581 gttggtggtc ggttcgaaaa actcctccaa ctccaaccgt ctggcggagc tggcccagcg
+    26641 tatgggcaaa cgcgcgtttt tgattgacga tgcgaaagac atccaggaag agtgggtgaa
+    26701 agaggttaaa tgcgtcggcg tgactgcggg cgcatcggct ccggatattc tggtgcagaa
+    26761 tgtggtggca cgtttgcagc agctgggcgg tggtgaagcc attccgctgg aaggccgtga
+    26821 agaaaacatt gttttcgaag tgccgaaaga gctgcgtgtc gatattcgtg aagtcgatta
+    26881 agtcattagc agcctaagtt atgcgaaaat gccggtcttg ttaccggcat tttttatgga
+    26941 gaaaacatgc gtttacctat cttcctcgat actgaccccg gcattgacga tgccgtcgcc
+    27001 attgccgccg cgatttttgc acccgaactc gacctgcaac tgatgaccac cgtcgcgggt
+    27061 aatgtctcgg ttgagaaaac tacccgcaat gccctgcaac tgctgcattt ctggaatgcg
+    27121 gagattccgc tcgcccaagg ggccgctgtg ccactggtac gcgcaccgcg tgatgcggca
+    27181 tctgtgcacg gcgaatcggg aatggctggc tacgactttg ttgagcacaa ccgaaagccg
+    27241 ctcgggatac cggcgtttct ggcgattcgg gatgccctga tgcgtgcacc agagcctgtt
+    27301 accctggtgg ccatcggccc gttaaccaat attgcgctgt tactttcaca atgcccggaa
+    27361 tgcaagccgt atattcgccg tctggtgatc atgggtggtt ctgccggacg cggcaactgt
+    27421 acgccaaacg ccgagtttaa tattgctgcc gatccagaag ctgctgcctg tgtcttccgc
+    27481 agtggtattg aaatcgtcat gtgcggtttg gatgtcacca atcaggcaat attaactcct
+    27541 gactatctct ctacactgcc gcagttaaac cgtaccggga aaatgcttca cgccctgttt
+    27601 agccactacc gtagcggcag tatgcaaagc ggcttgcgaa tgcacgatct ctgcgccatc
+    27661 gcctggctgg tgcgcccgga cctgttcact ctcaaaccct gttttgtggc agtggaaact
+    27721 cagggcgaat ttacctcagg cacgacggtg gttgatatcg acggttgcct gggcaagcca
+    27781 gccaatgtac aggtggcatt ggatctggat gtgaaaggct tccagcagtg ggtggctgag
+    27841 gtgctggctc tggcgtcgta acctgtcaca tgttattggc atgcagtcat tcatcgactc
+    27901 atgcctttca ctgatatccc tccctgttta tcattaattt ctaattatca gcgtttttgg
+    27961 ctggcggcgt agcgatgcgc tggttactct gaaaacggtc tatgcaaatt aacaaaagag
+    28021 aatagctatg catgatgcaa acatccgcgt tgccatcgcg ggagccgggg ggcgtatggg
+    28081 ccgccagttg attcaggcgg cgctggcatt agagggcgtg cagttgggcg ctgcgctgga
+    28141 gcgtgaagga tcttctttac tgggcagcga cgccggtgag ctggccggag ccgggaaaac
+    28201 aggcgttacc gtgcaaagca gcctcgatgc ggtaaaagat gattttgatg tgtttatcga
+    28261 ttttacccgt ccggaaggta cgctgaacca tctcgctttt tgtcgccagc atggcaaagg
+    28321 gatggtgatc ggcactacgg ggtttgacga agccggtaaa caagcaattc gtgacgccgc
+    28381 tgccgatatt gcgattgtct ttgcggccaa ttttagcgtt ggcgttaacg tcatgcttaa
+    28441 gctgctggag aaagcagcca aagtgatggg tgactacacc gatatcgaaa ttattgaagc
+    28501 acatcataga cataaagttg atgcgccgtc aggcaccgca ctggcaatgg gagaggcgat
+    28561 cgcccacgcc cttgataaag atctgaaaga ttgcgcggtc tacagtcgtg aaggccacac
+    28621 cggtgaacgt gtgcctggca ccattggttt tgccaccgtg cgtgcaggtg acatcgttgg
+    28681 tgaacatacc gcgatgtttg ccgatattgg cgagcgtctg gagatcaccc ataaggcgtc
+    28741 cagccgtatg acatttgcta acggcgcggt aagatcggct ttgtggttga gtggtaagga
+    28801 aagcggtctt tttgatatgc gagatgtact tgatctcaat aatttgtaac cacaaaatat
+    28861 ttgttatggt gcaaaaataa cacatttaat ttattgatta taaagggctt taatttttgg
+    28921 cccttttatt tttggtgtta tgtttttaaa ttgtctataa gtgccaaaaa ttacatgttt
+    28981 tgtcttctgt ttttgttgtt ttaatgtaaa ttttgaccat ttggtccact tttttctgct
+    29041 cgtttttatt tcatgcaatc ttcttgctgc gcaagcgttt tccagaacag gttagatgat
+    29101 ctttttgtcg cttaatgcct gtaaaacatg catgagccac aaaataatat aaaaaatccc
+    29161 gccattaagt tgacttttag cgcccatatc tccagaatgc cgccgtttgc cagaaattcg
+    29221 tcggtaagca gatttgcatt gatttacgtc atcattgtga attaatatgc aaataaagtg
+    29281 agtgaatatt ctctggaggg tgttttgatt aagtcagcgc tattggttct ggaagacgga
+    29341 acccagtttc acggtcgggc cataggggca acaggttcgg cggttgggga agtcgttttc
+    29401 aatacttcaa tgaccggtta tcaagaaatc ctcactgatc cttcctattc tcgtcaaatc
+    29461 gttactctta cttatcccca tattggcaat gtcggcacca atgacgccga tgaagaatct
+    29521 tctcaggtac atgcacaagg tctggtgatt cgcgacctgc cgctgattgc cagcaacttc
+    29581 cgtaataccg aagacctctc ttcttacctg aaacgccata acatcgtggc gattgccgat
+    29641 atcgataccc gtaagctgac gcgtttactg cgcgagaaag gcgcacagaa tggctgcatt
+    29701 atcgcgggcg ataacccgga tgcggcgctg gcgttagaaa aagcccgcgc gttcccaggt
+    29761 ctgaatggca tggatctggc aaaagaagtg accaccgcag aagcctatag ctggacacaa
+    29821 gggagctgga cgttgaccgg tggcctgcca gaagcgaaaa aagaagacga gctgccgttc
+    29881 cacgtcgtgg cttatgattt tggtgccaag cgcaacatcc tgcggatgct ggtggataga
+    29941 ggctgtcgcc tgaccatcgt tccggcgcaa acttctgcgg aagatgtgct gaaaatgaat
+    30001 ccagacggca tcttcctctc caacggtcct ggcgacccgg ccccgtgcga ttacgccatt
+    30061 accgccatcc agaaattcct cgaaaccgat attccggtat tcggcatctg tctcggtcat
+    30121 cagctgctgg cgctggcgag cggtgcgaag actgtcaaaa tgaaatttgg tcaccacggc
+    30181 ggcaaccatc cggttaaaga tgtggagaaa aacgtggtaa tgatcaccgc ccagaaccac
+    30241 ggttttgcgg tggacgaagc aacattacct gcaaacctgc gtgtcacgca taaatccctg
+    30301 ttcgacggta cgttacaggg cattcatcgc accgataaac cggcattcag cttccagggg
+    30361 caccctgaag ccagccctgg tccacacgac gccgcgccgt tgttcgacca ctttatcgag
+    30421 ttaattgagc agtaccgtaa aaccgctaag taatcaggag taaaagagcc atgccaaaac
+    30481 gtacagatat aaaaagtatc ctgattctgg gtgcgggccc gattgttatc ggtcaggcgt
+    30541 gtgagtttga ctactctggc gcgcaagcgt gtaaagccct gcgtgaagag ggttaccgcg
+    30601 tcattctggt gaactccaac ccggcgacca tcatgaccga cccggaaatg gctgatgcaa
+    30661 cctacatcga gccgattcac tgggaagttg tacgcaagat tattgaaaaa gagcgcccgg
+    30721 acgcggtgct gccaacgatg ggcggtcaga cggcgctgaa ctgcgcgctg gagctggaac
+    30781 gtcagggcgt gttggaagag ttcggtgtca ccatgattgg tgccactgcc gatgcgattg
+    30841 ataaagcaga agaccgccgt cgtttcgacg tagcgatgaa gaaaattggt ctggaaaccg
+    30901 cgcgttccgg tatcgcacac acgatggaag aagcgctggc ggttgccgct gacgtgggct
+    30961 tcccgtgcat tattcgccca tcctttacca tgggcggtag cggcggcggt atcgcttata
+    31021 accgtgaaga gtttgaagaa atttgcgccc gcggtctgga tctctctccg accaaagagt
+    31081 tgctgattga tgagtcgctg atcggctgga aagagtacga gatggaagtg gtgcgtgata
+    31141 aaaacgacaa ctgcatcatc gtctgctcta tcgaaaactt cgatgcgatg ggcatccaca
+    31201 ccggtgactc catcactgtc gcgccagccc aaacgctgac cgacaaagaa tatcaaatca
+    31261 tgcgtaacgc ctcgatggcg gtgctgcgtg aaatcggcgt tgaaaccggt ggttccaacg
+    31321 ttcagtttgc ggtgaacccg aaaaacggtc gtctgattgt tatcgaaatg aacccacgcg
+    31381 tgtcccgttc ttcggcgctg gcgtcgaaag cgaccggttt cccgattgct aaagtggcgg
+    31441 cgaaactggc ggtgggttac accctcgacg aactgatgaa cgacatcact ggcggacgta
+    31501 ctccggcctc cttcgagccg tccatcgact atgtggttac taaaattcct cgcttcaact
+    31561 tcgaaaaatt cgccggtgct aacgaccgtc tgaccactca gatgaaatcg gttggcgaag
+    31621 tgatggcgat tggtcgcacg cagcaggaat ccctgcaaaa agcgctgcgc ggcctggaag
+    31681 tcggtgcgac tggattcgac ccgaaagtga gcctggatga cccggaagcg ttaaccaaaa
+    31741 tccgtcgcga actgaaagac gcaggcgcag atcgtatctg gtacatcgcc gatgcgttcc
+    31801 gtgcgggcct gtctgtggac ggcgtcttca acctgaccaa cattgaccgc tggttcctgg
+    31861 tacagattga agagctggtg cgtctggaag agaaagtggc ggaagtgggc atcactggcc
+    31921 tgaacgctga cttcctgcgc cagctgaaac gcaaaggctt tgccgatgcg cgcttggcaa
+    31981 aactggcggg cgtacgcgaa gcggaaatcc gtaagctgcg tgaccagtat gacctgcacc
+    32041 cggtttataa gcgcgtggat acctgtgcgg cagagttcgc caccgacacc gcttacatgt
+    32101 actccactta tgaagaagag tgcgaagcga atccgtctac cgaccgtgaa aaaatcatgg
+    32161 tgcttggcgg cggcccgaac cgtatcggtc agggtatcga attcgactac tgttgcgtac
+    32221 acgcctcgct ggcgctgcgc gaagacggtt acgaaaccat tatggttaac tgtaacccgg
+    32281 aaaccgtctc caccgactac gacacttccg accgcctcta cttcgagccg gtaactctgg
+    32341 aagatgtgct ggaaatcgtg cgtatcgaga agccgaaagg cgttatcgtc cagtacggcg
+    32401 gtcagacccc gctgaaactg gcgcgcgcgc tggaagctgc tggcgtaccg gttatcggca
+    32461 ccagcccgga tgctatcgac cgtgcagaag accgtgaacg cttccagcat gcggttgagc
+    32521 gtctgaaact gaaacaaccg gcgaacgcca ccgttaccgc tattgaaatg gcggtagaga
+    32581 aggcgaaaga gattggctac ccgctggtgg tacgtccgtc ttacgttctc ggcggtcggg
+    32641 cgatggaaat cgtctatgac gaagctgacc tgcgtcgcta cttccagacg gcggtcagcg
+    32701 tgtctaacga tgcgccagtg ttgctggacc acttcctcga tgacgcggta gaagttgacg
+    32761 tggatgccat ctgcgacggc gaaatggtgc tgattggcgg catcatggag catattgagc
+    32821 aggcgggcgt gcactccggt gactccgcat gttctctgcc agcctacacc ttaagtcagg
+    32881 aaattcagga tgtgatgcgc cagcaggtgc agaaactggc cttcgaattg caggtgcgcg
+    32941 gcctgatgaa cgtgcagttt gcggtgaaaa acaacgaagt ctacctgatt gaagttaacc
+    33001 cgcgtgcggc gcgtaccgtt ccgttcgtct ccaaagccac cggcgtaccg ctggcaaaag
+    33061 tggcggcgcg cgtgatggct ggcaaatcgc tggctgagca gggcgtaacc aaagaagtta
+    33121 tcccgccgta ctactcggtg aaagaagtgg tgctgccgtt caataaattc ccgggcgttg
+    33181 acccgctgtt agggccagaa atgcgctcta ccggggaagt catgggcgtg ggccgcacct
+    33241 tcgctgaagc gtttgccaaa gcgcagctgg gcagcaactc caccatgaag aaacacggtc
+    33301 gtgcgctgct ttccgtgcgc gaaggcgata aagaacgcgt ggtggacctg gcggcaaaac
+    33361 tgctgaaaca gggcttcgag ctggatgcga cccacggcac ggcgattgtg ctgggcgaag
+    33421 caggtatcaa cccgcgtctg gtaaacaagg tgcatgaagg ccgtccgcac attcaggacc
+    33481 gtatcaagaa tggcgaatat acctacatca tcaacaccac ctcaggccgt cgtgcgattg
+    33541 aagactcccg cgtgattcgt cgcagtgcgc tgcaatataa agtgcattac gacaccaccc
+    33601 tgaacggcgg ctttgccacc gcgatggcgc tgaatgccga tgcgactgaa aaagtaattt
+    33661 cggtgcagga aatgcacgca cagatcaaat aatagcgtgt catggcagat atttttcatc
+    33721 cgctaatttg atcgaataac taatacggtt ctctgatgag gaccgttttt ttttgcccat
+    33781 taagtaaatc ttttggggaa tcgatatttt tgatgacata agcaggattt agctcacact
+    33841 tatcgacggt gaagttgcat actatcgata tatccacaat tttaatatgg ccttgtttaa
+    33901 ttgcttcaaa acgagtcata gccagacttt taatttgtga aactggagtt cgtatgtgtg
+    33961 aaggatatgt tgaaaaacca ctctacttgt taatcgccga atggatgatg gctgaaaatc
+    34021 ggtgggtgat agcaatagag agatctctat tcatttcgat attgaacaca gcaaggcggt
+    34081 taataccctg acttatattc tgtcggaagt cacagaaata agctgcgaag ttaagatgat
+    34141 ccctaataag ctggaagggc ggggatgcca gtgtcagcga ctggttaaag tggtcgatat
+    34201 ccatgagcaa atttacgcgc gcctgcgcaa taacagtcgg gaaaaattag tcggtgtaag
+    34261 aaagacgccg cgtattcctg ccgttccgct cacggaactt aaccgcgagc agaagtggca
+    34321 gatgatgttg tcaaagagat tgcgtcgtta attttatctc gttgataccg ggcgtcctgc
+    34381 ttgaccagat gcgatgttgt agcatcttat ccagcaacca ggtcgcatcc ggcaagatca
+    34441 ccgtttaggc gtcacatccg tcgtcccctg caaacggggg cgattttcct ccatttgcct
+    34501 cagtggctgc gtttcatgta acgatacatg acagcgcccg acaagatcct gatactcttt
+    34561 ggtattcaac cgtttccagt gtaactcgtc gtcactaaca ttgcgtacag cgcgggctgg
+    34621 cgtacccatc aacaactggc gtttctcgcc gcgaaagccc gctttgacaa agctcatggc
+    34681 ggcaacaatg ctctcttcgc caatgaccgc gccatccata atcacgctgt tcatcccgac
+    34741 caatgcatcg cgaccaatca aacaaccatg caggatcgct ccgtgcccga tatggccgtt
+    34801 ttccccaacg atagtgtcag tgtcgcagta gccatgcata atgcagccat cctgaatatt
+    34861 ggctcccgct tgcacgatca accgcccgta gtcaccacgc agtgaggcga gtgggccgat
+    34921 gtagacaccg gctcccacaa tcacatcgcc aatcaagacg gcactgggat ggacaaacgc
+    34981 cgtcgggtga accaccggaa ttaacccctc aaaggcgtaa tagctcacgg ttgttaacgt
+    35041 cctttccaca ccggatcgcg cttctcggca aacgccagcg gcccttcaat ggcatcttcc
+    35101 gaatgcagaa ccgatggata gtgtttcaac acgccgctgc gaatatagcg atacgcttct
+    35161 tctaccggca tttcgctggt ggtgcggtag atctctttca gcgccgcaat cgccagcggg
+    35221 gcgctgttaa ccagctgctg agccagttcg cgggcgttat ccatcagttc cgcctggcta
+    35281 accacgcggt tgactatccc ccaacgcagc gcctcttctg cgcccattcg tctgccggtc
+    35341 atcaccattt cattgacgat ggcaggcggc aggatcttcg gcagacgcag cacaccgccg
+    35401 ctgtcaggaa cgatgcccag tttggcttcc ggcagggcga agctggcgtt atcggcacaa
+    35461 acaataaaat ctgccgccag cgccagttca aaggcgccgc caaaggcata gccgttcaca
+    35521 gctgcgataa ccggtttgtc gagattgaaa atttcggtta atcccgcaaa accacccgga
+    35581 ccaaagtcag catccggtgc ttcgccttct gctgccgctt ttaaatccca gcccgcggaa
+    35641 aagaacttct ctccggcacc ggtaataatg gcgacacgta attgcggatc gtcacggaaa
+    35701 tttagaaata cttcgcccat ttcaaagctg gtttttgcat caatagcatt cgcttttgga
+    35761 cgatcaaggg taatttccag aattgatcca ttgcgggtca gatgtaatga ttcactcatt
+    35821 ccttttctcc atttttgctt tttcagggac gacaacatcc ctgcaaaaaa tgcatattgt
+    35881 tttagagtgt gattattagc tggcagggta gttccctgct gtttcattta tttcagattc
+    35941 tttctaatta ttttccccga gcaattacgt ggcagatctt ttctgatctc cagataagag
+    36001 ggcactttaa atttcgccat attttgttcg cagaagcgga aaaattcctc ttcgctcaat
+    36061 gtttcacctt cattcagcac cacaaatgct ttgatggctt catcgcgaat cgaatcttta
+    36121 atacccacaa ccacgatgtc ctgaattttc gggtgcgcgg cgataatatt ttccagctcc
+    36181 acgcaggaga cattctcgcc gccacgttta atcatattgc agcggcgatc gacgaaataa
+    36241 aaaaagtcct cttcgtcgcg gtatccggta tcgccggtat gcagccagcc atcggcttcc
+    36301 agcactttcg cagtggcttg tgggttgaga aagtactctt tgaagatggt tttcccaggt
+    36361 atgcctttaa tgcagatttc accgatctca ccagccggga gcgggcgatt gtgatcgtcg
+    36421 cggatctccg cttcgtagca aaaccccacc cgaccaatcg acggccagcg tcgtttatcg
+    36481 ccaggacgat caccgataat gcccacaatg gtttccgtca tcccataaga cgtcagcaag
+    36541 cgaacgccga agcgttcaca aaacgcatct ttttcctgct ccgacaagtt gagataaaac
+    36601 atcacttccc gcaggcggtg ttgctgatcg ttcgctgaag gcggctgtac catcaacgta
+    36661 cggatcatca tcggaataca ttcggtaacg gtggcgcggt acttctgtac ctgtccccag
+    36721 aaggcgcggg cgctgtattt ctcgaccagc acaaaggtgg ccccggcaga aaacgccgcc
+    36781 atcgccgcag tacactggca atcgatatga aacgcaggca ttaccgtcag gtagacgtca
+    36841 tcgtcacgca gtgcacactg ccaggcggag taatatccag cgaagcgcag gttgtaatgg
+    36901 gtaatcacca cacctttcgg tcgggaggtg gtgccggagg tgaagagaat ttccgccgta
+    36961 tcgtcagtcg atagcggcgg tgcatagcac aaggtggcag gttgttgatt tttcagttga
+    37021 gtaaacgaac tcacgccatc atcagcggga agtgccacat ctgtcaggca aatgtgccgc
+    37081 aattgagtgg catcttcctg ctgaatctgt tgatacatag gatagaattg cgcactggtc
+    37141 accagcaggc acgcctggct attttgcagg atccacgcgc tttcctcgca caacaggcgg
+    37201 gcgttaatcg gcaccataat cgcgccaatt tttgccagcc cgaaccagca aaagataaat
+    37261 tccgggcagt tgtcgagatg tagtgcaacc ttgtcgcctt tgcgaatccc cagcgtataa
+    37321 aacaggtttg ccgtgcggtt aatctcctga tttaactcaa gataactata ccggttaacg
+    37381 actccgccgc tggattcaca aatcagcgcc gttttatgac cgtaaacgtc cgcaagatcg
+    37441 tcccacattt gacgtagatg ttgtccgcca atgatatcca ttgcacctct atccattttt
+    37501 gttcgtttgt tattgggcgg gcgctagtca ggcaagccga ctgacgccac gcgtttagtc
+    37561 ctcaactttg gccagacctt tgctgaccaa ctcctgaatg tcgttttcgc tgtagccgat
+    37621 atttttcaaa atggcagccg tgtccatgcc atgtgagggc attccgcgcc agatttgtcc
+    37681 ggggttattt ttgaatttcg gcatgatgtt cggccctttg caggtgcgac catccatcgt
+    37741 ttgccactga gtgattgatt cgcgagccac atactgtgga ttgctttcca gttccggtac
+    37801 ggtcagcact ttggcgcagg cgatattcag ttcagcaaag cgttctttta cttccgcgat
+    37861 ggtatgtgtc gccagccagg catcgagttt ctcttcaacc agtgggccgt aagggcattc
+    37921 gatacggtgg ataagctgag tgccttccgg gatttctggc gtgccaagca gatgtgcgag
+    37981 gccaatatct ttaaagcact cttcaatttg ggtaatgccc accagttcca tcacgatgta
+    38041 gccgtcggca catttataca gaccgcaacc ggcgtagtag ggatctttac ctttgctcat
+    38101 gcgcgggcac atttcgccgc cgttgaagta atccatcatg aagtactggc ccatacgcag
+    38161 catcacttca tacatggcga tgtcgatact ttcgccttta ccggtttcac gcactttatg
+    38221 cagtgctgcc agcgccgccg tggtggcggt caggccagaa aagtaatcgg cggtatacgg
+    38281 gaaggcaggc attggctggt caacatcacc gttctgaatc aggtaaccac taaaggcctg
+    38341 ggcgatagtg ttataggccg gaagattggt gtactcctcg gtgccgtact gaccaaaacc
+    38401 ggacaggtga gcgataacca gtttcgggtt gtgctgccac agtacttcat cggtaatgcc
+    38461 acgacgggca aaggccggac ctttactggc ttcgatgaag atatcggtgg tttccattaa
+    38521 tttcagaaac gcttcgcggc cttcatcttt gaaaatattt aacgacagcg cgtgcaaatt
+    38581 gcggcgggag agttgcgggt agttcggttg aacgcgaatg gtgtcggccc aggcgacgtt
+    38641 ctcgatccag ataacttccg cgccccattc tgcgaacatt tgcccggcaa acggtccggc
+    38701 gatttcgata ccggagaaga caacgcgcaa tccggccaac ggcccgaatt tcggcatggg
+    38761 tagatgatcc attatttgct cctgaaaaat ttatgtagcg catgactgcc ggatgcggcg
+    38821 taaacgcttt atccggccta cattcgtgct cccgtaggcc tgataagacg catcagcgcg
+    38881 gcatcaggca gcgcacggac ttagcggtat tgcttcagca ccgcacgacc cagcgtcagg
+    38941 atctgcattt cgtcagatcc cccggagacg cggtctacac gcagatcacg ccagaagcgg
+    39001 ctgatgcggt ggttgcccgc aatcccgaca ccgcccagca cctgcattgc gctatccaca
+    39061 acttcaaatg ccgcattggc gcagaagtat ttgcacatcg ctgcatcgcc agaggtgatg
+    39121 gtgccgttgt ctgctttcca cgctgcttca tacagcatgt ttttcatgga gtttaatttg
+    39181 atcgccatgt gggcgaattt ttcctgaatc aactggaaac gaccaatagc ctcgccaaac
+    39241 tgcacgcgct gattggcgta gcgcgccgca tcttcaaagg cgcacatcgc cgtaccgtag
+    39301 ttggtgaggg ctaccaggaa acgttcatgg tcgaactctt ctttgacgcg gttaaagccg
+    39361 ttaccttccc gaccgaacat gtctttctcg tccagttcca cgtcgtcaaa ggtgatttca
+    39421 cagcagctat ccatacgcag accgagcttt tcaagtttgg tcactttgat gcccggtttg
+    39481 ctcatatcaa caaaccatcc ggtgtagaca ggtttgtccg gagaagcccc gtcgcgcgcc
+    39541 atcaccacga tgtacggggt gtaggcgctg ctggtaataa aacacttact accattaaga
+    39601 taaatcttac catttttacg ggtataagtc gttttcaggc tacccacgtc ggagcccgcg
+    39661 cccggttcgg taatcgctga gttccacatc tgcttaccgg tgccgcggaa agccataatt
+    39721 ttgtcgatct gctcttgtgt gccttcgcgc aggaaggtgt tgaacccgcc cggcaactgg
+    39781 tacagcacat aggttggtgc ccccagacgt cccagctcca tccacacggc ggcgagagta
+    39841 acaaaccccg cgtccagacc accgtgctct tcagggatca gcagactgtc gatacccata
+    39901 tccgccagtg ctttgacaaa acgttccggg tagacgctgt cacggtcgca ctcggcaaaa
+    39961 taggcctccc agttttcgct ggccatcagt tcgcggatac cggcgacaaa cagttcctgc
+    40021 tcatcattta aattaaaatc catctttcaa cctcttgata ttttgggggg ttaattaatc
+    40081 tttccagttc tgtttcgcgt ctttaataaa ggagagcgtc accataatgt tgacgaagaa
+    40141 cagcgggcat cctccggcga taatggcggt ttgaatcggt ttcaggccgc cgagcgccag
+    40201 cagaacaata ccgataatgc caaccagaat tgaccaaccg atacgcacca gcagaggtgg
+    40261 ttcttcacca tcgcgtactt cgcggcaagt ggacatcgcc agggtataag agcaggcgtt
+    40321 aaccagcgta acggtggcaa taaagcagag gatgaagaag ccccacatgg tggcggtgct
+    40381 gagtggcaga gcggcccagg tttcaatgat ggcgcgcgcc acaccgtact gttcgatcag
+    40441 atttggaatg ttgatgatgt ttttatctat caacagcaga gtgttactac cgagtacagt
+    40501 ccacaggatc caggttgacg ctgtcagccc cagcaccatg ccgaagcaca gttcacgcac
+    40561 agtacgacca cgggagatgc gggcgaggaa gatactcatc tggatagcat aaatcaccca
+    40621 ccatgcccag tagaacacgg tccagccctg cgggaagccg cctttagcga tgggatcggt
+    40681 atagaacaac atgcgcggca gatacatcag caacatcccc accgaatcgg tgaagtagtt
+    40741 catgatgaag ctggcaccgc tgacaatgaa cacccaaccc agcatcagga agctcaggta
+    40801 actacgcacg tcactggcga tacgtacccc tttttgcaga ccgcaagcga cgcaaatggc
+    40861 gttgaggata atccagcagg taatgatgat agcgtccagt tgcagggtat gcggaatgcc
+    40921 aaacaaccat tgcatacact cggtcaccag cggcgtggca aggcccagac tggtacccat
+    40981 cgtgaagatc aaggcgacga gatagaagtt gtcgacgata gtgccgaaca accctttggc
+    41041 gtgtttttca cctaccagcg gcaccagtgt cgagctgggg cgaatcactt ccattttgcg
+    41101 gacaaagaag aagtaagcga aggcgactga aaggaagctg taagtggccc acggcagagg
+    41161 tccccagtgg aacaagctgt aagccagccc caactctttc gcccctgtcg agttcggttc
+    41221 taagccaaac ggcggggtgg agatgtagta gtagatctca atcgatcccc agaacagtac
+    41281 ggcagcagac gtacaggagg cgaacatcat aaagatccaa ctggcggtgc taaattctgg
+    41341 cggttcgtta cctaaacgct ttttggcata cgggccaaac accagccaga accaaccgaa
+    41401 aagcatcacc accatatacc attcaaatgc ccatccccat acattggtga cgtaactgaa
+    41461 tacagcatta ataacgacat tcgctgcatc cagatctctg actgtaagcc aacaaagtat
+    41521 gccgacgatt attaacggcg gaaagaaaac cttcggttct attcccgttt ttctcttttc
+    41581 attcttcatg agttaattcc actgtgaaaa cgaatattta ttttgcgttc ccgtttgttt
+    41641 tatttttgtt aacatttaat ataattatta ttaacctcgt ggacgcgtta atggctaact
+    41701 cataatgggt attcaataag ctgtattctg tgattggtat cacatttttg tttcgggtga
+    41761 atagagggcg ttttttcgtt aattttgatt aataatcagt ttgttatgct ctgttgtgag
+    41821 taaaaaataa catctgactt tcaatattgg tgatccataa aacaatattg aaaatttctt
+    41881 tttgctacgc cgtgttttca atattggtga ggaacttaac aatattgaaa gttggattta
+    41941 tctgcgtgtg acattttcaa tattggtgat taaagtttta tttcaaaatt aaagggcgtg
+    42001 atatctgtaa ttaacaccac cgatatgaac gacgtttcct tcatgatttc tggagatgca
+    42061 atgaagatta ttacttgcta taagtgcgtg cctgatgaac aggatattgc ggtcaataat
+    42121 gctgatggtt cattagactt cagcaaagcc gatgccaaaa taagccaata cgatctcaac
+    42181 gctattgaag cggcttgcca gctaaagcaa caggcagcag aggcgcaggt gacagcctta
+    42241 agtgtgggcg gtaaagccct gaccaacgcc aaagggcgta aagatgtgct atcgcgcgac
+    42301 ccggatgaac tgattgtggt gattgatgac cagttcgagc aggcactgcc gcaacaaacg
+    42361 gcgagcgcac tggctgcagc cgcccagaaa gcaggctttg atctgatcct ctgtggcgat
+    42421 ggttcttccg acctttatgc ccagcaggtt ggtctgctgg tgggcgaaat cctcaatatt
+    42481 ccggcagtta acggcgtcag caaaattatc tccctgacgg cagataccct caccgttgag
+    42541 cgcgaactgg aagatgaaac cgaaacctta agcattccgc tgcctgcggt tgttgctgtt
+    42601 tccactgata tcaactcccc acaaattcct tcgatgaaag ccattctcgg cgcggcgaaa
+    42661 aagcccgtcc aggtatggtc ggcggcggat attggtttta acgcagaggc agcctggtca
+    42721 gaacaacagg ttgccgcgcc gaaacagcgc gaacgtcagc gcatcgtgat tgaaggcgac
+    42781 ggcgaagaac agatcgccgc atttgctgaa aatcttcgca aagtcattta accacagggg
+    42841 atgctacgaa cacattttct caagtctggg tattcagcga taccccttct cgtctgccgg
+    42901 aactgatgaa cggtgcgcag gctttagcta atcaaatcaa cacctttgtc ctcgatgatc
+    42961 ggcgacggtg tacaggcaat ccagctcggc gctaatcatg tctggaaatt aaacggcaaa
+    43021 ccggacgatc ggatgatcga agattacgcc ggtgtcatgg ctgacactat tcgccagcac
+    43081 ggcgcagacg gcctggtgct gctgccaaac acccgtcgcg gcaaattact ggcggcaaaa
+    43141 ctgggttatc gccttaaagc ggcggtgtct aacgatgcca gcaccgtcag cgtacaggac
+    43201 ggtaaagcga cagtgaaaca catggtttac ggtggtctgg cgattggcga agaacgcatt
+    43261 gccacgccgt atgcggtact gaccatcagc agcggcacgt tcgatgcggc tcagccagac
+    43321 gcgtcacgca ctgtgaaacg cacaccgtgg agtggcaggc tccggctgtg gcgattaccc
+    43381 gcacggcaac ccaggcgcgc cagagcaaca gcgtcgatct cgacaaagcc cgtctggtgg
+    43441 tcagcgtcgg tcgcggtatt ggcagcaaag agaacattgc gctggcagaa cagctttgca
+    43501 aggcgatagg tgcggagttg gcctgttctc gtccggtggc ggaaaacgaa aaatggatgg
+    43561 agcacgaacg ctatgtcggt atctccaacc tgatgctgaa acctgaactg tacctggcgg
+    43621 tggggatctc cgggcagatc cagcacatgg ttggcgctaa cgcgtcgcaa accattttcg
+    43681 ccatcaataa agataaaaat gcgccgatct tccagtacgc ggattacggc attgttggcg
+    43741 acgccgtgaa gatccttccg gcgctgaccg cagctttagc gcgttgatcc actctggcag
+    43801 ggctgcattt tggccctgcc gctgacaggg agctcttatg tccgaagata tctttgacgc
+    43861 catcatcgtc ggtgcagggc ttgctggttc ggttgccgca ctggtgctcg cccgcgaagg
+    43921 tgcgcaagtg ttagttatcg agcgtggcaa ttccgcaggt gccaagaacg tcaccggcgg
+    43981 gcgtctctat gcccacagtc tggaacacat tattcctggt ttcgccgact ccgcccccgt
+    44041 agaacgcctg atcacccatg aaaaactcgc gtttatgacg gaaaagtcag cgatgactat
+    44101 ggactactgc aatggtgacg aaacctcgcc atcccagcgt tcttactccg ttttgcgcag
+    44161 taaatttgat gcctggctga tggagcaggc cgaagaagcg ggcgcgcagt taattaccgg
+    44221 gatccgcgtc gataacctcg tacagcgcga tggcaaagtc gtcggtgtag aagccgatgg
+    44281 cgatgtgatt gaagcgaaaa cggtgatcct tgctgatggg gtgaactcca tccttgccga
+    44341 aaaattgggg atggcaaaac gcgtcaaacc gacggatgtg gcggttggcg tgaaggaact
+    44401 gatcgagtta ccgaagtcgg ttattgaaga ccgttttcag ttgcagggta atcagggggc
+    44461 ggcttgcctg tttgcgggat cacccaccga tggcctgatg ggcggcggct tcctttatac
+    44521 caatgaaaac accctgtcgc tggggctggt ttgtggtttg catcatctgc atgacgcgaa
+    44581 aaaatcggtg ccgcaaatgc tggaagattt caaacagcat ccggccgttg caccgctgat
+    44641 cgcgggcggc aagctggtgg aatattccgc tcacataatg ccggaagcag gcatcaacat
+    44701 gctgccggag ttggttggtg acggcgtatt gattgccggt gatgccgccg gaatgtgtat
+    44761 gaacttcggt tttaccattc gcggtatgga tctggcgatt gccgccgggg aagccgcagc
+    44821 aaaaaccgtg ctttcagcga tgaaaagcga cgatttcagt aagcaaaaac tggcggaata
+    44881 tcgtcagcat cttgagagtg gtccgctgcg cgatatgcgt atgtaccaga aactaccggc
+    44941 gttccttgat aacccacgca tgtttagcgg ctacccggag ctggcggtgg gtgtggcgcg
+    45001 tgacctgttc accattgatg gcagcgcgcc ggaactgatg cgcaagaaaa tcctccgcca
+    45061 cggcaagaaa gtgggcttca tcaatctaat caaggatggc atgaaaggag tgaccgtttt
+    45121 atgacttctc ccgtcaatgt ggacgtcaaa ctgggcgtca ataaattcaa tgtcgatgaa
+    45181 gagcatccgc acattgttgt gaaggccgat gctgataaac aggtgctgga gctgctggtg
+    45241 aaagcgtgcc ccgcaggtct gtacaagaag caggatgacg gcagtgtgcg cttcgattac
+    45301 gccggatgtc tggagtgcgg cacctgtcgc attctggggc tggggtcggc gctggaacag
+    45361 tgggaatacc cgcgcggcac ctttggtgtg gagttccgtt acagctaatg ttgctttgat
+    45421 acgtaacgcc gcactgactc tcattgcaaa aaacatgaat agctatgcaa ccgtccagaa
+    45481 accttgacga tctcaaatgc ttgtcccact accgccgcat tttgctgtgg ggaagcggtg
+    45541 gtccgtttct gtatggttat gtactggtaa tgattggcgt ggcgctggag caactgacgc
+    45601 cggcgctgaa actggacgct gactggattg gcttgctggg cgcgggaacg ctcgccgggc
+    45661 tgttcgttgg cacatcgctg tttggttata tttccgataa agtcggacgg cgcaaaatgt
+    45721 tcctcattga tatcatcgcc atcggcgtga tatcggtggc gacgatgttt gtttcatccc
+    45781 ccgtcgaact gttggtgatg cgggtattta tcggcattgt catcggtgca gattatccca
+    45841 tcgccacctc aatgatcaac gagttctcca gtacccgtca gcgggcggtt ttccatcagc
+    45901 tttattgccg cgatgtggta tgttggcgcg acctgtgccg atctggtcgg ctactggctt
+    45961 tatgatgtgg aaggcggctg gcgctggatg ctgggtagcg cggcgatccc ctgtctgttg
+    46021 attttgattg gtcagattcg aactgcctga atctccccgc tggttattat gcaaagggcg
+    46081 agtaaaagag tgcgaggaga tgatgatcaa actgtttgga gaaccggtgg ctttcgatga
+    46141 agagcagccg cagcaaaccc ggttttcgcg atctgtttaa tcgctgccat tttccttttg
+    46201 ttctgtttgt tgccgccatc tggacctgcc aggtgatccc aatgttcgcc atttacacct
+    46261 ttggcccgca aattgttggt ttgttgggat tgggggttgg caaaaacgcg gcactaggaa
+    46321 atgtggtgat tagcctgttc tttatgctcg gctgtattcc gccgatgctg tggttaagca
+    46381 ctgccggacg gcgtccattg ttgattggca gctttgccat gatgacgctg gcgctggcgg
+    46441 ttttggggct aatcccggat atggggatct ggctggtagt gatggccttt gcggtgtatg
+    46501 cctttttctc tggcgggccg ggtaatttgc agtggctcta tcctaatgaa ctcttcccga
+    46561 cagatatccg cgcctctgcc gtgggcgtga ttatgtcctt aagtcgtatt ggcaccattg
+    46621 tttcgacctg ggcactaccg atctttatcg ataattacgg tatcagtaac acgatgctaa
+    46681 tgggggcggg tatctcgctg tttggcttgt tgatttccgt agcgtttgcc ccggagactc
+    46741 gagggatgtc actggcgcag accagcaata tgacgatccg cgggcagaga atggggtaaa
+    46801 ttgttcagat ttctctcttt tctgaatcaa tattattgac tataagccgc gtgaatatat
+    46861 gactacactt tgtgggaaaa caaaggcgta atcacgcggg ctacctatga ttcttataat
+    46921 ttatgcgcat ccgtatccgc atcattccca tgcgaataaa cggatgcttg aacaggcaag
+    46981 gacgctggaa ggcgtcgaaa ttcgctctct ttatcaactc tatcctgact tcaatatcga
+    47041 tattgccgcc gagcaggagg cgctgtctcg cgccgatctg atcgtctggc agcatccgat
+    47101 gcagtggtac agcattcctc cgctcctcaa actttggatc aataaagttt tctcccacgg
+    47161 ctgggcttac ggacatggcg gcacggcgct gcatggcaaa catttgctgt gggcggtgac
+    47221 gaccggcggc ggggaaagcc attttgaaat tggtgcgcat ccgggctttg atggtctgtc
+    47281 gcagccgcta caggcgacgg caatctactg cgggctgaac tggctgccac cgtttgccat
+    47341 gcactgcacc tttatttgtg acgacgaaac cctcgaaggg caggcgcgtc actataagca
+    47401 acgtctgctg gaatggcagg aggcccatca tggatagcca tacgctgatt caggcgctga
+    47461 tttatctcgg ttcggcagcg ctgattgtac ccattgcggt acgtcttggt ctgggatcgg
+    47521 tacttggcta cctgatcgcc ggctgcatta ttggcccgtg ggggctgcga ctggtgaccg
+    47581 atgccgaatc tattctgcac tttgccgaga ttggggtggt gctgatgctg tttattatcg
+    47641 gcctcgaact cgatccacaa aggctgtgga agctgcgtgc ggcagtgttc ggctgtggcg
+    47701 cattgcagat ggtgatttgc ggcggcctgc tggggctgtt ctgcatgtta cttgggctgc
+    47761 gctggcaggt cgcggaattg atcggcatga cgctggcgct ctcctctacg gcgattgcca
+    47821 tgcaggcgat gaatgaacgc aatctgatgg tgacgcaaat gggtcgcagt gcctttgcgg
+    47881 tgctgctgtt ccaggatatc gcggcgatcc cgctggtggc gatgattccg ctactggcaa
+    47941 cgagcagtgc ctcgacgacg atgggcgcat ttgctctctc ggcgttaaaa gtggcgggtg
+    48001 cgctggtgct ggtggtattg ctggggcgct atgtcacgcg tccggcgctg cgttttgtag
+    48061 cccgctctgg cttgcgggaa gtgtttagtg ccgtggcgtt attcctcgtg tttggctttg
+    48121 gtttgctgct ggaagaggtc ggcttgtcga tggcgatggg cgcgtttctg gcgggcgtac
+    48181 tgctggcaag ctcggaatac cgtcatgcgc tggagagcga tatcgaacca tttaaaggtt
+    48241 tgctgttggg gctgtttttc atcggtgttg gcatgtcgat agactttggc acgctgcttg
+    48301 aaaacccatt gcgcattgtc attttgctgc tcggtttcct catcatcaaa atcgccatgc
+    48361 tgtggctgat tgcccgaccg ttgcaagtgc caaataaaca gcgtcgttgg tttgcggtgt
+    48421 tgttagggca gggcagtgag tttgcctttg tggtatttgg cgcggcgcag atggcgaatg
+    48481 tgctggagcc ggagtgggcg aaatcgctga ccctggcggt ggcgctgtcg atggcagcaa
+    48541 cgccgattct gctggtgatc ctcaatcgcc ttgagcaatc ttctactgag gaagcgcgtg
+    48601 aagccgatga gatcgacgaa gaacagccgc gcgtgattat cgccggattc ggtcgttttg
+    48661 ggcagattac cggacgttta ctgctctcca gcggggtgaa aatggtggta ctcgatcacg
+    48721 atccggacca tatcgaaacc ttgcgtaaat ttggtatgaa agtgttttat ggcgatgcca
+    48781 cgcggatgga tttactggaa tctgccggag cggcgaaagc ggaagtgctg attaacgcca
+    48841 tcgacgatcc gcaaaccaac ctgcaactga cagagatggt gaaagaacat ttcccgcatt
+    48901 tgcagattat tgcccgcgcc cgcgatgtcg accactacat tcgtttgcgt caggcaggcg
+    48961 ttgaaaagcc ggagcgtgaa accttcgaag gtgcgctgaa aaccgggcgt ctggcactgg
+    49021 aaagtttagg tctggggccg tatgaagcgc gagaacgtgc cgatgtgttc cgccgcttta
+    49081 atattcagat ggtggaagag atggcaatgg ttgagaacga caccaaagcc cgcgcggcgg
+    49141 tctataaacg caccagcgcg atgttaagtg agatcattac cgaggaccgc gaacatctgt
+    49201 cattaattca acgacatggc tggcagggaa ccgaagaagg taaacatacc ggcaacatgg
+    49261 cggatgaacc ggaaacgaaa ccctcatcct aataaagagt gacgtaaatc acacttacag
+    49321 ctaactgttt gttttgtttc attgtaatgc ggcgagtcca gggagagagc gtggactcgc
+    49381 cagcagaata taaaattttc ctcaacatca tcctcgcacc agtcgacgac ggtttacgct
+    49441 ttacgtatag tggcgacaat tttttttatc gggaaatctc aatgatcagt ctgattgcgg
+    49501 cgttagcggt agatcgcgtt atcggcatgg aaaacgccat gccgtggaac ctgcctgccg
+    49561 atctcgcctg gtttaaacgc aacaccttaa ataaacccgt gattatgggc cgccatacct
+    49621 gggaatcaat cggtcgtccg ttgccaggac gcaaaaatat tatcctcagc agtcaaccgg
+    49681 gtacggacga tcgcgtaacg tgggtgaagt cggtggatga agccatcgcg gcgtgtggtg
+    49741 acgtaccaga aatcatggtg attggcggcg gtcgcgttta tgaacagttc ttgccaaaag
+    49801 cgcaaaaact gtatctgacg catatcgacg cagaagtgga aggcgacacc catttcccgg
+    49861 attacgagcc ggatgactgg gaatcggtat tcagcgaatt ccacgatgct gatgcgcaga
+    49921 actctcacag ctattgcttt gagattctgg agcggcggta attttgtata gaatttacgg
+    49981 ctagcgccgg atgcgacgcc ggtcgcgtct tatccggcct tcctatatca ggctgtgttt
+    50041 aagacgccgc cgcttcggcc aaatccttat gccggttcga cggctggaca aaatactgtt
+    50101 tatcttccca gcgcaggcag gttaatgtac caccccagca gcagccggta tccagcgcgt
+    50161 atataccttc cggcgtacct ttgccctcca gcgatgccca gtgaccaaag gcgatgctgt
+    50221 attcttcagc gacagggcca ggaatcgcaa accacggttt cagtggggca ggggcctctt
+    50281 ccggcgattc tttgctgtac atatccagtt gaccgttcgg gaagcaaaaa cgcatacggg
+    50341 taaaagcgtt ggtgataaaa cgcagtcttc ccagcccccg caattccggt gaccagttat
+    50401 ttggcatatc gccgtacatg gcatcaagaa agaagggata ggagtcactc gatagcaccg
+    50461 cttctacatc gcgtgcgcac tctttggcgg tctgcagatc ccactgcggc gtgatccctg
+    50521 cgtgggccat caccagcttt ttctcttcgt cgatttgcag cagaggctgg cgccgcagcc
+    50581 agttaagcag ctcgtcggca tccggcgctt ccagcagcgg tgtcaggcga tctaacggtt
+    50641 tattgcggct gatcccggca aataccgcca gcagatgcag atcgtgattg cccagcacca
+    50701 gacgtacgct gtcgcctaag gatttcacat agcgcagaac atccagcgaa cccggcccgc
+    50761 gcgcgaccag atcgcccgtc agccagaggg tatctttccc aggggtaaat tctactttat
+    50821 gcagcaatgc gatcagttca tcgtaacaac catgaacgtc gccaataagg tatgtcgcca
+    50881 tattctttta atgaatgagt gtgggaacgg cgagtcggaa tacgggaatg tcgatgctga
+    50941 aagggacgcc attttcatcg atcatttcgt agtgaccctg catggtgccc agcggggttt
+    51001 caatgattgc accgctggtg tactggtact cttcgccagg cgcgataagt ggctggacgc
+    51061 caaccactcc ttcgccctgg acttcggttt cacggccatt gccattggtg atcagccagt
+    51121 aacgccccaa caactgcact ggcgctcgcc ccagattgcg tatggttacg gtataagcaa
+    51181 aaacgtaacg ttcattatca ggtgaagatt gagcctcaat gtagacgctt tgaacctgaa
+    51241 tacacactcg gggcgaattg atcatcgtta actctcctgc aaaggcgcgt tctccgccca
+    51301 gatagttcgc catctggcaa tattgcgcga cagagatatt ttccgctcgc atcgccgggt
+    51361 cgatccccat tcccgttaac acctcgacgc taaacaggtt gccgaggctg ttacgaatgg
+    51421 ttttacgacg ctggttaaag gcttcggtgg tgatgcggct caacacacga acatctttaa
+    51481 ccgggtgagg catcgttgca tgaggaacca ggcgcacgac ggcggaatcc actttgggtg
+    51541 gtggtgtaaa ggctgacggc ggtacttcca gtaccgggat cacattgcaa tagtattgcg
+    51601 ccatgacgct taatcgacca tacgctttgc tgttcggtcc tgcaaccaga cgattcacca
+    51661 cctctttttg caacataaag tgcatgtcgg caatggcatc agtatagcta aacagatgga
+    51721 acatcaacgg cgtggagatg ttataaggca ggttgccgaa aacacgcagc ggctgaccca
+    51781 ttttctcggc cagttcacca aagttaaagg tcatcgcatc ctgctgataa atcgtcagtt
+    51841 tcgggcctaa gaatggatgc gtttgcagac gtgccgccag atcgcggtca agttcgatga
+    51901 ccgtcagctg gtccagacgt tcgccgaccg gttcggtcaa tgccgccaga ccggggccga
+    51961 tttcgaccat cgcctggccc ttttgcgggt taatggcaga cacaatactg tcgatcacga
+    52021 actgatcgtt gagaaagttt tgcccgaagc gtttacgggc taagtggccc tggtggactc
+    52081 gattattcat tgggtgttaa caatcatttt gatggcgaga ttaagcgccg taataaaact
+    52141 gccgacatcg gctttgccac gtcccgccag ttcaagcgcg gtgccgtggt ccactgatgt
+    52201 gcgaataaag ggcaggccca gcgtaatgtt cacaccgcgc ccgaagccct ggtattttag
+    52261 cacgggaaga ccctgatcgt ggtacatcgc cagcacggcg tcggcgttat caagatattt
+    52321 cggctgaaac agggtatcgg caggcagcgg cccgttgagt ttcatcccct gcgcccgcag
+    52381 ctcattgagc accggaataa tggtgtctat ctcttccgta cccatatgac cgccttcgcc
+    52441 cgcgtgcgga ttcagcccgc agaccagaat gcgcggttcg gcaataccaa atttggtccg
+    52501 caaatcgtga tgcaaaatag caatcacttc gtgcaaaagt gcaggggtga tagcgtctgc
+    52561 gatatcgcgc agcggtaaat gcgtcgttgc cagcgccacg cgaagttctt cggtcgccag
+    52621 catcatcacc acctttttcg cctgcgaacg ctcttcgaaa aactcggtat gaccggtaaa
+    52681 aggaatgcca gcgtcgttaa taacgccttt atgcaccgga cctgtgatca gcgcggcaaa
+    52741 ttcgccgttc agacaaccat cgcacgctcg cgccagcgtt tccaccacat aatgcccatt
+    52801 ttcaaccgct aactgccccg cagtgacagg tgcacgtagc gcgacaggaa gtagcgttaa
+    52861 tgtgcccgca gtttgcggtt gtgcagggga gttgggggaa taagggcgga gggtgagcgg
+    52921 caaaccgagc atcgctgccc ggttggtaag gagagtggca tcggcacaaa caaccagttc
+    52981 gaccggccac tcacgctgtg caagctggac aactaagtcc gggccaatcc cggcgggctc
+    53041 gccgggagtg atcacaacac gttgggtttt aaccattagt tgctcaggat tttaacgtag
+    53101 gcgctggcac gttgttcctg catccagctt gctgcttctt ccgagaactt acggttcatc
+    53161 agcatgcggt atgcacgatc tttctgcgca gcgtcggttt tatcgacatt acgggtatcc
+    53221 agcagttcga ttaaatgcca gccgaatgaa gagtgaaccg gtgcactcat ttgacctttg
+    53281 ttcaggcgag tcagggcgtc acggaaggcc ggatcgaaaa tatctggtgt agcccagccg
+    53341 agatcgccgc cctggttagc agagcctgga tcctgagaga actctttcgc tgcggcagca
+    53401 aaagtcgttt taccactctt gatatcagca gcaatctgtt ccagtttcac acgggcctgt
+    53461 tcgtcagtca tgatcggcga cggtttcagc agaatatggc gagcatgaac ttcggtcacc
+    53521 gagatatttt tgctttcgcc gcgcaggtcg ttaactttca gaatatggaa gccaacgccg
+    53581 gaacgaatcg ggccaacaat gtcgcctttc ttcgcggtgc ttaatgcctg ggcgaagatc
+    53641 ccgggcaact cctgaatacg gccccagccc atctggccgc cgttcagcgc ctgctggtcg
+    53701 gcagaatgag caatcgccag cttaccgaaa tcagcgccgt tacgcgcctg atcgacaatg
+    53761 gcgcgcgcct ggctttccgc ttcgttcacc tgatcagagg tcgggttttc cggcagcggg
+    53821 atcaggatgt ggctcaggtt cagctcagtg ctggcgtcgt tttggttacc cacctgctgc
+    53881 gccagggatt cgacttctgc ggcaggatgg tgatgcgacg acgcacctcg ttgttacgca
+    53941 cttcagagat aatcatctct ttgcggatct ggttacgata ggtgttgtag ttcagtccat
+    54001 cgtaagccag acggctgcgc atctgatcca gcgtcatgtt gttctgtttc gcgatgttag
+    54061 caatcgcctg atccagctgc tcatcggaga ttttcactcc cattttctgc cccatctgca
+    54121 ggatgatttg atccatgatc aaacgttcca tgatttggtg gcgcagcgtc gcgtcatcag
+    54181 gaagttgctg ccttgcctga gcagcgttca gttttaccga ctgcattaat ccatcaacgt
+    54241 cgctttccag cacgacgccg ttattgacga cggctgcgac tttatcgact acctgggggg
+    54301 cagcgaaact ggtattcgcg atcatggcga taccgagaag cagcgttttc cagttcttca
+    54361 tactttttcc atttcaatta accgcactgc ggattacgtg gtaaatcaac aaatcacaaa
+    54421 gtgttttgat acggcagaat gttcgaacgc agcatctctt gcgtacccag accgtagttg
+    54481 gagctcaggc cgcgaagttc gatgttaaag ccgattgcgt tgtcatatac cgcatgttgt
+    54541 ttatcgttat cccaaccgtt cagcttccgc tcgtaaccga cgcgaattgc atagcagcag
+    54601 gagctgtatt gcacacctaa catagagtcg gcttgcttgt tagcattggt gtcgtagtag
+    54661 taggccccaa caatggacca acgatcggca attggccagc tggcgacagc acctacctgc
+    54721 gaaataccat tcttatattg ctcagcagtg gaatagtact taggcagcgt agcctgaata
+    54781 tattccgggc tggcgtaacg gtaattcagc tgtaccagac ggtcttcatc ccgacggtat
+    54841 tcaatgctgg agttactggt cgctacgtta tccagacgtg tatcgtactg aatcccgcca
+    54901 cgcaatcccc aacgctcgga gatacgccag taagtatcgc ctgcccacac cagtgaaccc
+    54961 gttttgtcgt cattctccca tgttatgttg tcatcgccag tgcgagactc cgtgaaatag
+    55021 tagatttgac caacggaaat attaaaacgt tcaacggcag catcatcata tatgcgagat
+    55081 gtgacaccgg tcgtcacctg gttagcggag gcaatacggt caagaccgcc gtaagtccgg
+    55141 tcccggaaca ggccagagta gtcagattgc agcagagacg agtcgtagtt atagatgtcg
+    55201 ctctgatcgc gatacggcac gtacaaatac tgcgcgcgcg gttccagcgt ttgggtataa
+    55261 cccggagcca gcatttccat atcgcgttca aagaccattt tgccgtcaac tttgaattgc
+    55321 ggcattacgc ggttaacgga ttcgtccagc ttggtcgtgt ttctggagtt ataccagtca
+    55381 agattggttt gctgataatg ggttgccagc aacttcgctt cggtattgat gctgccccag
+    55441 ttattagaga gcggcaaatt gatggtcggt tccaggtgaa cacgggttgc ttcaggcatg
+    55501 tcgtctctgg tgttaacaaa gtgcactgcc tggccgtaaa tacgcgtatc aaacggacca
+    55561 acatcattct ggtagtaatt aacgtctaac tgcggctctg ccgagtagct actggtgttc
+    55621 tgttcgctga aaacctggaa ctgcttggtt gaaacggtgg cattgaagtt ttgcaccgca
+    55681 tagccaacgc tgaatttttg cgttgcgtag ccgtcagtac tggaaccgta cttgttatcg
+    55741 aaatcattga agtagctagg atcgctgacc ttggtgtagt cgacgttgaa acgccacacc
+    55801 tgatccatga ccccggagtg gttccagtag aataaccaac gacgtgaact gtcatcgttc
+    55861 gggtgttcat cttcatagac tttatctgaa ggcagatagt ccagttccat caagccagcg
+    55921 cccgcctggg agaggtagcg gaattcgttc tcccacatga tgttgccacg acgatgcata
+    55981 taatgcggcg tgatggtggc atccatattt ggcgcgatgt tccagtaata tggcaggtag
+    56041 aactcaaagt agttggtggt ggtgtacttg gcgttcggga tcaagaaacc agagcgacgt
+    56101 ttgtcaccca ccggcaactg caaatagggg ctataaaaga tcggtaccgg acccacctta
+    56161 aagcgggcgt tccagatctc cgcaacttgt tcttcgcggt catgaataat ttcgctacct
+    56221 accacgctcc aggtgtcaga acccggcaga caggaggtaa agctaccgtt atccagaatg
+    56281 gtatagcggt tttcgccacg ttgtttcatc aggtccgctt taccgcgacc ctggcgaccc
+    56341 accatctggt aatcaccttc ccagacgttg gtatctttgg tgttcagatt cgcccagcct
+    56401 ttcggccctt tgaggatcac ctggttatcg tcgtaatgga cattaccgag cgcatcaacg
+    56461 gtacgtaccg gctccggttg tcctggtgcc tctttttgat ggagctgcac ttcgtcggcc
+    56521 tgcagacggc tgttaccctg catgatatcc acgctgccag taaacacggc gtcatccggg
+    56581 tagtcccctt tcgcgtggtc agcattgata gtcacgggta agtcattggt atcgccctgt
+    56641 accagaggac ggtcatagct tggcacgccc aacatgcact gtgaggcgag gtcggctgcc
+    56701 agtccctgtt gactataaag ggcggtggca atcatggtgg ccaggagagt ggggatacgt
+    56761 tttttcatac gttgatttta ttgttccatc atcggtaacg ttgcgcgtga caaacggtca
+    56821 gagactaacg tactcgtcat ctctacgcta gtgttaatcc tgtccgaata gcgtcagtgg
+    56881 tgttaggcac ggcattgaat gacaggtatg ataatgcaaa ttataggcga tgtcccacaa
+    56941 ttgaccgcag ccggaaaacg gtaaaagcac ctttatattg tgggagatag ccctgatatc
+    57001 cgtgtgtcga tttggggaat atatgcagta ttggggaaaa atcattggcg tggccgtggc
+    57061 cttactgatg ggcggcggct tttggggcgt agtgttaggc ctgttaattg gccatatgtt
+    57121 tgataaagcc cgtagccgta aaatggcgtg gttcgccaac cagcgtgagc gtcaggcgct
+    57181 gttttttgcc accacttttg aagtgatggg gcatttaacc aaatccaaag gtcgcgtcac
+    57241 ggaggctgat attcatatcg ccagccagtt gatggaccga atgaatcttc atggcgcttc
+    57301 ccgtactgcg gcgcaaaatg cgttccgggt gggaaaatca gacaattacc cgctgcgcga
+    57361 aaagatgcgc cagtttcgca gtgtctgctt tggtcgtttt gacttaattc gtatgtttct
+    57421 ggagatccag attcaggcgg cgtttgctga tggttcactg cacccgaatg aacgggcggt
+    57481 gctgtatgtc attgcagaag aattagggat ctcccgcgct cagtttgacc agtttttgcg
+    57541 catgatgcag ggcggtgcac agtttggcgg cggttatcag cagcaaactg gcggtggtaa
+    57601 ctggcagcaa gcgcagcgtg gcccaacgct ggaagatgcc tgtaatgtgc tgggcgtgaa
+    57661 gccgacggat gatgcgacca ccatcaaacg tgcctaccgt aagctgatga gtgaacacca
+    57721 tcccgataag ctggtggcga aaggtttgcc gcctgagatg atggagatgg cgaagcagaa
+    57781 agcgcaggaa attcagcagg catatgagct gataaagcag cagaaagggt ttaaatgacc
+    57841 ctgtaaatga tgctgagtaa ctgcccacga ttaaaggtgg ccgccctggc ggtcacttct
+    57901 ttgagaaaag gcgtttactc agaatggtgg acaggctcaa tgcacggttt acgggagggg
+    57961 ttctgtaggt tttatcgcgt tgaccctgct taaggttgag agctttacga cgagcggaat
+    58021 tatattttta cgtcttaaaa ataaaaaaca catacctgaa tgagcgattt ttgaaagtat
+    58081 atttattcag aacgcgcatc atgagttttt aactcaatgc gaggctatta ccatgaaagt
+    58141 atcagttcca ggcatgccgg ttacactttt aaatatgagc aagaacgata tttataagat
+    58201 ggtgagcggg gacaagatgg acgtgaagat gaatatcttt caacgcttgt gggagacgtt
+    58261 acgccatctg ttctggtcag ataaacagac tgaggcttat aaacttctgt tcaatttcgt
+    58321 gaataaccag actggcaaca tcaacgcctc agaatacttt actggggcta tcaacgagaa
+    58381 tgagagagaa aagtttatca atagcctgga attattcaat aaacttaaaa catgcgcaaa
+    58441 aaatccggat gagttggtcg caaagggcaa tatgcgctgg gtcgcccaga ccttcgggga
+    58501 tatcgagtta agtgtcactt ttttcattga aaagaataag atatgtactc agacgttgca
+    58561 gctgcataag ggccaaggta acttgggcgt tgatcttaga aaggcttacc ttcccggcgt
+    58621 tgacatgagg gattgttacc ttggtaaaaa aacaatgaaa ggtagcaatg atatccttta
+    58681 tgagagacct gggtggaatg ctaacctggg cgtgctaccc cggacggtgc taccccggac
+    58741 ggtgctaacc cggacggtgc taacctggac ggtgctaccg tgaacggtgc tacctcctta
+    58801 tatgatgagg taattattat taataaaatc ccccccaaaa aaattgatac taaaggagtt
+    58861 gctactgaag aagttgctac taaaaaagta ctgctgaaca aattactgac aacgcaatta
+    58921 ttgaatgagc cagaataagc taaggttgaa ggggctggaa cgccccttca accttagcag
+    58981 tagcgtggga tgatttcaca attagaaaga cctgcatgat gagctagaga agaggctagt
+    59041 gacgcaaggc gtcgtgcagg acacggatca ccgagatggg catcgccaac cagactgcta
+    59101 attagcccat gaataacaat cagaaaggac cataacagac ccgttaaaat gaaatataag
+    59161 agacggtcaa cgggtgaaga aaaagttcaa aaattcgctg tggagcagga agggaattac
+    59221 cgaatggaaa gcgtagccac acgcaacaac tgaaagcagt ttggcagaaa caaaaaatcc
+    59281 ccggactcgg ggatttatgt acaagaggca gcccttagga tgagggtata aacgtacagg
+    59341 aaaggttaaa aatccgctgg cgctttaaac gtcatactat tgccatacgc cggatgggta
+    59401 atcgtcaaca tctctgcatg tagcaacaaa cgtggtgcca tcgctctcgc ttctggtgat
+    59461 gcataaaaac gatcgccgag aatcggatga cccagcgcca gcatatgcac acgcaattga
+    59521 tgcgaacgcc cggtaatcgg ttttaacacc actcttgccg tgttatccgc cgcatactcc
+    59581 accacttcat attccgtctg cgccaggttt acccgttttc gtaacagcac tttctgtttc
+    59641 gggcggtttg gccagtcgca aatcagcggc agatccacca gaccttctgc gggggatgga
+    59701 tgcccccaga cgcgggccac atactgcttt ttcggctcgc gctcgcgaga actggcgttt
+    59761 taactcccgc tccgcggctt tggtcagcgc cactacaatc acgccgctgg tagccatatc
+    59821 cagacgatgc accgattctg cctgcggata atcacgctga atgcgcgtca tcacgctgtc
+    59881 tttgtgctct tccagacgac ccggcactga caacaaaccg ctcggcttgt tgaccaccat
+    59941 aatatggtca tcctgataca ggataaccaa ccagggttcc tgcggtggat tgtagttttc
+    60001 catccccatt ttcggctccg ttactgatgc gttacaacga tcaaacgcag ggcatccaga
+    60061 cgccaacctg cctgatccag gctttccatt acctgctgac ggttgctctc aatggcggtc
+    60121 agttcgtcgt cacgaatgtt cgggttcact gcacgcagag cttccagacg agacagctcg
+    60181 gcagacagtt tttcgtcggc ttcgttacgc gctgcatcaa tcaatgcacg ggcagatttc
+    60241 tcgatctgcg cttcacccag ttgaaggata gcgtgaacat cctgctgcac ggcgttaacc
+    60301 agtttgctgc cggtgtgacg gttaaccgcg ttaagctggc ggttaaaggt ttcaaactct
+    60361 acctgcgccg ccaggttgtt gccgttttta tccagcagca tacgtaccgg cgtcggtggc
+    60421 aggaagcggt tgagctgcaa ctgcttcgga gcctgggctt caaccacata aatcagttcc
+    60481 accaacagcg tacctaccgg caacgctttg ttttttaaca gtgaaatcgt gctgctaccg
+    60541 gtatcgccag aaaggatcag atccagaccg ttgcggatca gcggatgctc ccaggtaata
+    60601 aactgtgcat cttcacgcgc cagcgccact tcacgatcaa aggtgatggt gatgccatct
+    60661 tccgacaggc cagggaagtc cggcaccagc atatgatcgg acggcgtcag cacgatcatg
+    60721 ttgtcgccgc gatcgtcctg attgataccg ataatatcga acaggttcat ggcgaaggcg
+    60781 atcaggttgg tatcgtcatc ctgctcttca atgctttctg ccagtgcctg ggctttttcg
+    60841 ccaccgttgg agtggatttc cagcaggcgg tcacgaccct gttccagctg tgctttcagc
+    60901 gcttcatgtt gctcgcggca gtttttgatc agatcgtcaa agccttcggt ttgatccggt
+    60961 gaagccagat agttaatcag atcgttgtat acgctatcgt aaatagtgcg tccggtcggg
+    61021 caggtgtgct caaatgcatc cagaccttcg tgataccagc gcaccagcac cgactgagcg
+    61081 gttttctcca gataaggcac atggatctga atatcgtgcg cctggccgat acgatccaga
+    61141 cgaccaatac gctgctccag tagatccggg ttgaatggca ggtcaaacat caccatgtgg
+    61201 ctggcgaact ggaagttacg tccttcagaa ccgatttctg agcacagcag tacctgtgcg
+    61261 ccggtgtctt cttcggcaaa ccaggcggca gcgcggtcac gttcgataat cgacatacct
+    61321 tcgtggaaca ccgcagcgcg aataccttca cgttcgcgca gtacctgctc cagttgcagc
+    61381 gcagtggcag ctttggcgca gatcaccagc actttctgag agcgatggct ggtcaggtag
+    61441 cccatcagcc actcaacgcg cggatcgaag ttccaccagg tggcgttatc accttcaaat
+    61501 tcctgataaa tacgctccgg gtagagcata tcgcgagcac gatcttccgc acttttacgt
+    61561 gcgcccataa tgccggagac tttaatagcc gtctgatact gcgtcggtag cggcagctta
+    61621 atggtgtgca gctcgcgttt cgggaatcct ttcacaccgt tacgcgtgtt acggaacagc
+    61681 acgcggctgg tgccgtggcg atccatcagc atcgaaacca gctcctgacg ggcgctctgg
+    61741 gcatcttcgc tgtcgctgtt tgctgcctgc aacagcggct cgatatcctg ctcgccgatc
+    61801 atctcgccga gcatgttcag ttcgtcattg ctcagtttgt tacctgccag cagcatggca
+    61861 acggcgtccg caaccggacg ataatttttc tgctcttcaa cgaactgcgc aaaatcgtgg
+    61921 aaacggttcg ggtccagcag acgcagacgg gcgaagtggc tttccatccc cagctgttcc
+    61981 ggggtcgcgg tcagcagcag aacgcccggc acgtgctctg ccagttgttc aatggcctga
+    62041 tattcacggc ttggcgcatc ttcgctccac accaggtgat gcgcttcatc gaccaccagc
+    62101 aggtcccatt cggcttcaca gagatgttcc aggcgctgtt tgctacgacg ggcaaaatcc
+    62161 agcgagcaaa tcaccagctg ttcggtgtca aacgggttgt aagcatcgtg ctgagcttcg
+    62221 gcataacgct catcatcaaa tagcgcaaag cgcaggttga aacggcgcag catttctacc
+    62281 agccactgat gctgtaaggt ttccgggacg ataattagca cacgttcagc agcgccagag
+    62341 agcagttgct gatgcaggat catcccggct tcaatggttt tccctaaacc cacttcgtca
+    62401 gccagcagga cgcgcggcgc gtggcggcga ccaacatcat gagcgatgtt gagctgatgc
+    62461 gggatcaggc tggtacgctg accgcgcagg ccgctgtacg gcatacggaa ctgttcgctg
+    62521 gaatatttac gcgcgcgata acgcagcgca aagcggtcca tacggtcaat ctgcccggca
+    62581 aacagacggt cctgcggttt gctgaacacc agtttgctat caaggaaaac ttcacgcagg
+    62641 gctacgccgg actcttcagt atccaggcga gtaccgatat aggtcagcaa gccattttct
+    62701 tcttttactt cttcgacttg catctgccag ccgtcatggc tggtaatggt atcaccaggg
+    62761 ttgaacatca cgcgggtcac gggggaatca ctgcgtgcgt acagacggtt ttcaccagta
+    62821 gatgggaaaa gtaaagtgac agttcgcgca tccaccgcga caacggttcc aagtcccaat
+    62881 tcgctttctg tatcgctgat ccagcgttga ccaagtgtaa aaggcatatg tgttcggctc
+    62941 tatatcttta attgcaggca ataaccaccc gctaccgtgc ttatgaggta gtggtgttat
+    63001 tcaggtccag gaatggaaag ggcgctatgg tactggatgg caaagcattc gtcacgcatc
+    63061 aaaatggtat ctggcgaact cttttttttg ctcaaaatag cccaagttgc ccggtcataa
+    63121 gtgtagcaaa attatcctca ataaaaggga gtattccctc cgccacgggt tgtagctggc
+    63181 gggtcagata gtgttcgtaa tccagtggtg aacgttggta gtccagcggc tccgggccgt
+    63241 tggtggtcca tacgtactta atggtgccgc gattctgata ttgcaagggg cgaccacgct
+    63301 tttggttttc ttcatcggca aggcgagcgg cgcgtacatg aggcggcaca ttacgctgat
+    63361 actcgctcag cggacggcga aggcgtttac ggtaaaccag tcgcgcatcc agttcacccg
+    63421 ccatcagttt gtcgatggtt tcgcgtacat attcctgata tggctcgttg cggaagatgc
+    63481 gcaggtatag ctcctgctga aactgctggg ccagcggcgt ccagtcggtg cgcacggttt
+    63541 ccagcccttt aaacaccatc cgctgcttgt cgccctcctg aatcagtccg gcataacgct
+    63601 ttttactgcc ggtatcggct ccgcgaatgg ttggcatcag aaaacggcag aaatgggttt
+    63661 catactccag ttctaatgcg ctggtcagcc gttgtttttg cagcgtttcc gcccaccagg
+    63721 cgttaacgtg ctgcaccagt gcacgaccga ttttcgccgc ttcttcttcc gaatgtgcgc
+    63781 ctttcagcca gacaaacgtt gagtcggtat cgccgtagat aacgtcgtag ccctgtgctt
+    63841 caatcaacgc tttggtttgc cgcatgatct gatgaccacg catggtgatc gacgatgcca
+    63901 gccgcggatc gaagaagcgg caggcggtgg tgccgagcac gccataaaag gcattcatga
+    63961 tgattttcag cgcctgcgac agcggtttgt taccctggcg tttggcttca tcgcgcccgt
+    64021 gccagatgtt agtcacaatc tccggcaggc aatgtttttc tcgcgagaac caggcatcga
+    64081 gaaaaccttc ggtactgtgc tctggatcag gctgcgccat gccttccacc agcccgacgg
+    64141 gatcaatcag aaaggtgcgg atgatcgacg ggtacaggct tttatagtcc agcaccagca
+    64201 ctgaatcata aagccctggc cgtgaatcca tcacgtagcc gccagggctg gcgtgcggcg
+    64261 gcacttcgcc gagattaggc gcgacataac cagcgcgatg cattcgcgga aaatagagat
+    64321 gaccaaatgc cgccaccgaa ccgccgtgtc ggtccaccgg caggccgttc accgttgccc
+    64381 gttcgagtaa aaatggcatg atttcagttt tgtggaagat ctgcgtcacc agctcgcaat
+    64441 ctttcaggtt ataagttgcc agcgcaggtt tatcttcggc gaaacggcgg tcaatttcgt
+    64501 ccattcgatc ccacgggtta tcgatagatt ttccttcgcc taatagctcc tgagcgacag
+    64561 tttccagcga gaatgaagag aaatcccaga acgcggattt cagcgcctcg ataccgtcga
+    64621 taattagccg acctttagcc tggggcaaaa aagacgccgt ttttgcgggg ccgtgctcgc
+    64681 gccactccag ctcgctatta tcgcgcccaa gacgcagcgg aagacggtaa cggctcggca
+    64741 tgtttttgca gcattcgcag atcgaactgc accacgttcc aaccgatgat cacatcagga
+    64801 tcgtagttgg caaaccaggc gttgagtttt tccagcaact gcgggcggct ggcgacgtat
+    64861 tccagttcga aatcaagcga ggaggcgtcg ccattctccg gccccagcat ataaacgatg
+    64921 cgctgcccgc agccttccag gccgatgcag tacagctcac cgtggcgggt ggtttcaata
+    64981 tctatagaaa cccacttgag cggcggacga tagtcgggat gcggtttcag acgggcatta
+    65041 acgatagtgc cattgtgcat atcaccctcg acccacaccg gtgaggtgat aaaccgctcc
+    65101 atcagatagc gttctggcgg acgcacatcg gcctcgtaga cggtaacgcc accttcacgc
+    65161 aggcgctttt cgtaattcat caattggcga tgggcgcgac agtaaaggcc atacaccggc
+    65221 tggcggtgaa aatcctttaa cgccagcggt gtcaggcgaa agccttgttc accctgcaaa
+    65281 atatgctgag cgcggggaac ctgatcggcg ggaataaacg ccacggactc ttgcggtgca
+    65341 agcgtaacct gcaacggccc gttgtccgtc gccagccaga aggagacttc tgtcccttgc
+    65401 ggggtgtccc gccagtgtcg ggttaagata aaacctgcct gcgccacgct gaaaatccat
+    65461 caaaaaacca ggcttgagta tagcctggtt tcgtttgatt ggctgtggtt ttatacagtc
+    65521 attactgccc gtaatatgcc ttcgcgccat gcttacgcag atagtgttta tccagcagcg
+    65581 tttgctgcat atccggtaac tgcggcgcta actgacggca gaatatcccc atataagcga
+    65641 cctcttccag cacgatggcg ttatgcaccg catcttcggc atttttgccc catgcaaacg
+    65701 ggccgtggga atggaccaga acgccgggca tttgcgctgc atcgataccc tgtttttcaa
+    65761 aggtttctac gatgacgtta ccggtttccc actcatattc gccgttgatt tctgcgtcgg
+    65821 tcattttgcg ggtgcaggga atggtgccgt agaaatagtc ggcgtgggtg gtgccggttg
+    65881 ctggaatcga ctgacccgcc tgcgcccaga tggtggcgtg gcgcgagtgc gtatgcacaa
+    65941 tgccgccaat ggaggggaat gcctgataga gcagccggtg agttggcgtg tcggaggagg
+    66001 gctttttcgt accttcaacc acttcaccgg tttcgatgct aaccacgacc atatcgtcag
+    66061 cggtcatgac gctgtaatcg acgccggaag gtttgatcac aaagacgccg cgctcgcgat
+    66121 caacggcgct gacgttgccc catgtgagcg tgaccaggtt gtgttttggc agcgccaggt
+    66181 tggcttctaa tacctggcgt ttgagatctt ctaacatgtt gactccttcg tgccggatgc
+    66241 gctttgctta tccggcctac aaaatcgcag cgtgtaggcc tgataagacg cgccagcgtc
+    66301 gcatcaggcg ttgaatgccg gatgcgcttt gcttatccgg cctacaaaat cgcagcgcgt
+    66361 aggcctgata agacgcgcca gcgtcgcatc aggcgttgaa tgccggatgc gctttgctta
+    66421 tccggcctac aaaatcgcag cgtgtaggcc agataagacg cgtcagcgtc gcatcaggcg
+    66481 ttacataccg gatgcggcta cttagcgacg aaacccgtaa tacacttcgt tccagcgcag
+    66541 cgcgtcttta aacgctggca ggcgtgtgtc gttatcaatc accgtgattt caatgtcgtg
+    66601 catctcggcg aattggcgca tatcgttgag gttcagtgca tggctgaaga cggtatggtg
+    66661 cgcgccacca gcgaggatcc acgcttcgga agcagttggc agatccggtt gcgctttcca
+    66721 cagcgcattc gccaccggca gtttcggcag ggagtgcggt gttttcaccg tgtcgataca
+    66781 gttaaccagc agacggtaac gatcgccgag atcaatcagg ctggcgacaa tcgctggacc
+    66841 ggtttgggta ttgaagatca gtcgggcagg atcgtcctta ccaccaatac cgagatgctg
+    66901 aacgtcgagg atcggtttct cttctacggc aatcgacggg cagacttcca gcatatggga
+    66961 gccgagcacc aagtcattac ctttctcgaa gtgataggtg tagtcctcca taaaggaggt
+    67021 gccgccctgc agaccggttg acatcacctt catgatgcga agcagggcgg cggttttcca
+    67081 gtcgccttcg cccgcaaagc cgtaaccctg ctgcatcaga cgctgtacgg ccagacctgg
+    67141 aagctgtttc agaccgtgca aatcttcaaa ggtggtggtg aacgcgtgga agccaccttg
+    67201 ttccaggaaa cgcttcatcc ccagctcaat acgcgccgct tccagcacgt tctgtcgttt
+    67261 ttcgccgtgg atttgtgttg caggcgtcat ggtgtagcag ctttcgtact catcgaccag
+    67321 cgcgttaaca tcgccgtcgc tgatggagtt caccacctgc accagatcgc caaccgccca
+    67381 ggtattgacg gagaaaccga acttgatctg tgcggcaact ttatcaccat cggtgaccgc
+    67441 cacttcacgc atgttatcgc caaaacggca gactttcaga tgacgggtat cctgtttaga
+    67501 aaccgcctga cgcatccagg agccgatacg ctcatgggct tgtttatcct gccagtgacc
+    67561 ggtaacgacg gcatgttgct gacgcatacg cgcgccaatg aagccgaact cgcgaccgcc
+    67621 atgtgcagtc tggttcaggt tcataaagtc catatcgata ctgtcccacg gcagcgccgc
+    67681 gttgaactgg gtgtggaatt gcagcaacgg tttgttgagc atggtcaggc cgttgatcca
+    67741 cattttggcc ggggagaagg tgtgcagcca caccaccaga ccagcgcacg gatcgtcgta
+    67801 attcgcgtcg cggcaaatag cggtgatttc atccggcgtg gtgcccagcg gtttcaacac
+    67861 cagtttgcag ggcagtttcg cttccgtatt cagcgcatta acaacgtgct cggcatgttg
+    67921 ggtgacctga cgcagggttt ccgggccata cagatgctgg ctgccaatga caaaccacac
+    67981 ttcataatta tcaaaaatcg tcattatcgt gtccttatag agtcgcaacg gcctgggcag
+    68041 cctgtgccgg ggcggaagtt ggaagatagt gttgttcggc gctcatcgcc cattgctgat
+    68101 agcggcgata aagctgttca aagcgttgtg cctgttcgct gcgcggttgc agggttttct
+    68161 ctaccgcact ggccattttt tgctgggctg atgggatgtc tgcgtgcact ttcgcggcga
+    68221 cggcagcaaa aatcgccgca ccgagcgcac agcactggtc agaggcaaca atttgcagcg
+    68281 ggcgattcag cacgtcgcag caggcctgca taatgacttg gtttttccgc gcgatgccgc
+    68341 ccagcgccat cacgttattg acggcgatcc cctgatcggt aaagcactcc atgattgcgc
+    68401 gtgcgccaaa ggcggtggca gcaatcaaac cgccgaacag cagcggagcg tcggtagcga
+    68461 ggttaagatc ggtaatcacc cctttcaggc gttggttagc gtttggcgag cgacgaccgt
+    68521 taaaccagtc gagcaccacc ggcaggtgat ccagagacgg atttttggcc catgcttcgg
+    68581 tcagcgccgg aagcagttgt ttctggctgg cgttgatttg cgctttcagt tccggatgct
+    68641 gggcggcaag ctgttccagc ggccagctga gtacgcgacc gaaccaggcg tagatatcac
+    68701 caaacgccga ttggcctgct tccagaccga taaatccagg caccacgctg ccatcaacct
+    68761 gaccgcaaat acctttaact gcccgctcgc caacgctctg tttgtcggca atcagaatgt
+    68821 cgcaggtgga agtaccgata acttttacca gtgcgttagg ctgtgcgcct gcgccaactg
+    68881 cgcccatatg gcagtcaaac gcgccgccgg aaatcaccac gctttcaggc aggccgagac
+    68941 gctgcgccca ttccgggcat aaggtgccca ccggaatatc ggcagtccag gtgtcagtga
+    69001 acagcgggga aggcaaatgg cgattgagga tcgggtccag ctcatcaaag aaactggctg
+    69061 gcggcaagcc gccccagctt tcgtgccaca gagatttatg cccggcgctg caacgtccgc
+    69121 gacgaatatc ctgcgggcgg gtggtaccgg aaagcagagc tggcacccag tcgcacagct
+    69181 caatccacga tgcggcagat tgcgccacgg cgctgtcctg gcgagtcaca tgcaggattt
+    69241 ttgcccagaa ccattcgctg gaataaatac cgccaatata gcgggagtag tcaacattgc
+    69301 ccggcgcgtg gcacaaacgg gtaatctctt cgcttctttc aaccgcagtg tggtctttcc
+    69361 acaatacgaa catcgcgttc gggttttcgg caaactccgg gcgcagcgcc agcacgttac
+    69421 cgtcggcatc aatcggtgcg ggcgtcgagc cggtactgtc aacgccaatc ccgaccacag
+    69481 ctgcgcgctg ttcgacgcta agctctgcaa gcacggtttt cagtgccgct tccattgact
+    69541 caatgtagtc acgcggatga tgacggaact ggttattcgg ggcatcacaa aattgccctt
+    69601 tttgccaacg gggataccac tctacgctgg tggcgatctc ttcaccgctg gcgcagtcca
+    69661 ccgccaaagc tcgcacagaa tcactgccaa aatcgaggcc aattgcaatc gccatcgttt
+    69721 cactccatcc aaaaaaacgg gtatggagaa acagtagaga gttgcgataa aaagcgtcag
+    69781 gtaggatccg ctaatcttat ggataaaaat gctatggcat agcaaagtgt gacgccgtgc
+    69841 aaataatcaa tgtggacttt tctgccgtga ttatagacac ttttgttacg cgtttttgtc
+    69901 atggctttgg tcccgctttg ttacagaatg cttttaataa gcggggttac cggttgggtt
+    69961 agcgagaaga gccagtaaaa gacgcagtga cggcaatgtc tgatgcaata tggacaattg
+    70021 gtttcttctc tgaatggtgg gagtatgaaa agtatggctg aagcgcaaaa tgatcccctg
+    70081 ctgccgggat actcgtttaa cgcccatctg gtggcgggtt taacgccgat tgaggccaac
+    70141 ggttatctcg atttttttat cgaccgaccg ctgggaatga aaggttatat tctcaatctc
+    70201 accattcgcg gtcagggggt ggtgaaaaat cagggacgag aatttgtctg ccgaccgggt
+    70261 gatattttgc tgttcccgcc aggagagatt catcactacg gtcgtcatcc ggaggctcgc
+    70321 gaatggtatc accagtgggt ttactttcgt ccgcgcgcct actggcatga atggcttaac
+    70381 tggccgtcaa tatttgccaa tacgggtttc tttcgcccgg atgaagcgca ccagccgcat
+    70441 ttcagcgacc tgtttgggca aatcattaac gccgggcaag gggaagggcg ctattcggag
+    70501 ctgctggcga taaatctgct tgagcaattg ttactgcggc gcatggaagc gattaacgag
+    70561 tcgctccatc cgccgatgga taatcgggta cgcgaggctt gtcagtacat cagcgatcac
+    70621 ctggcagaca gcaattttga tatcgccagc gtcgcacagc atgtttgctt gtcgccgtcg
+    70681 cgtctgtcac atcttttccg ccagcagtta gggattagcg tcttaagctg gcgcgaggac
+    70741 caacgtatca gccaggcgaa gctgcttttg agcactaccc ggatgcctat cgccaccgtc
+    70801 ggtcgcaatg ttggttttga cgatcaactc tatttctcgc gagtatttaa aaaatgcacc
+    70861 ggggccagcc cgagcgagtt tcgtgccggt tgtgaagaaa aagtgaatga tgtagccgtc
+    70921 aagttgtcat aattggtaac gaatcagaca attgacggct tgacggagta gcatagggtt
+    70981 tgcagaatcc ctgcttcgtc catttgacag gccacattat gcaagcattg cggaacactt
+    71041 tattacccaa ccaccgtgtt cattgatggc ggtggtgttg gtggcctttc tggagtcgct
+    71101 ggcgctggtc ggtttgattc tacccggtac ggtgctgatg gcggggctgg gagcgctgat
+    71161 tggcagcggc gagttaagtt tctggcacgc ctggctggca gggattattg gctgcttgat
+    71221 gggcgactgg atttctttct ggctgggttg gcgttttaaa aagccgttgc atcgctggtc
+    71281 atttctgaag aaaaacaaag cactacttga taaaactgaa catgcgttgc atcaacacag
+    71341 catgttcacc attctggtcg gtcgttttgt tggcccgacg cgtccgctgg tgccaatggt
+    71401 ggcgggaatg ctggatctgc cggtggctaa atttattacg ccgaatatta tcggctgcct
+    71461 gctgtggccg ccgttttact tcctgccagg gattctggcg ggcgcggcga tcgatattcc
+    71521 tgccggaatg cagagcggtg agtttaaatg gttgctgctg gcaacagcgg tgtttttgtg
+    71581 ggttggtggc tggctgtgct ggcggttatg gcgcagcggt aaagcgactg accgtttgag
+    71641 tcattatttg tcccgcggtc gtttgttgtg gctgacgccg ttgatttctg ccatcggcgt
+    71701 ggtggcgctg gtggtgttaa ttcgccaccc gttgatgccg gtgtatatcg atattttgcg
+    71761 taaagtggtt ggggtttagg agatagtctt gtgcgggttg cctgagcgcg acgcttgccg
+    71821 cgtcttatca ggcctacaaa acgcactacc cgtaggtcgg ataaggcgtt cacgccgcat
+    71881 ccgacagtgc atactaaccc gtaatcccca atagtgccga agcactcgcc ttaccgctca
+    71941 acaactcatt ggtcataccc tgccaggcga tgcgcccgtc ggcgactact accgagcgcg
+    72001 tggcgatccg cgccgcatct tccacgctgt gcgacaccat caatagcgtc attttttgct
+    72061 gctggcagct cgtgctcacc agcgtcaaca tctcctgacg taacgccgga tcgagcgcag
+    72121 agaacagttc atcgagcaat aaaatcggct gttcgcgtac cagacaacgc gctaacgcca
+    72181 ctcgctgtcg ctgaccgccg gaaagctcgc ccggtaaccg cgccattaaa ttatcaatcc
+    72241 ccatctggcg ggcgatagcg tgcattttcc cctgctgtac cgcgttcagt ttcaatcccg
+    72301 gatttagccc cagcccgatg ttctgtgcga ccgtcaggtg gctgaacagg ttgttctcct
+    72361 gaaacagcat cgacaccgga cggcgtgacg gcggcatagt tgtgtgatct acgccatcga
+    72421 tagtcagcga accgctggct ggcgtcagaa aaccggcgat caaattcagc agggtacttt
+    72481 tacccgcgcc gcttggcccg aggatcgcca cctgctcgcc gcgttccacc gttaagctaa
+    72541 aacgcatcgg caaatggtgg taaagccagg tgatatcagt cagttttaac atttcgcccc
+    72601 ggtagttttt caatcacggt aaacagcaga aaacagagca gcagcagaat taacgtggtg
+    72661 accgcaccgt cctggctgcg ataggagcca atttgctggt agagataaaa cggcagggtg
+    72721 cggaaatcat cgttaccgaa caacgccacc acgccaaaat caccaatcga cagcacgcat
+    72781 gcaaaggcca gcgcctgcgc cagtggacgt ttcagggcgc gcagctccac cacttttaag
+    72841 cgtgaccagc cttcaatccc cagcgactga cataacatgc tgtagcgggc ggtgatatcg
+    72901 cgcatcgggt tttccagcac tttcagcgca taagggatcg ccattaacgc attggtgaaa
+    72961 atcacaatgc cgtcagcaga ttgtggcagg ccgatagtgt tgttgagcag taaaaagaag
+    73021 ccggtagcca gcacaatccc cggcatggcg aggatcaaca tgccgctcat ctccagcacc
+    73081 tgacccgcca gcattttctg ccgcgcccgc agttcgcgac tgctccatag cagcatcatg
+    73141 gtcagcacta cgcacaatac acctgccgcc agcgcaatac gcaacgaggt ccacagcgcc
+    73201 tgccacagca ccggttgtgc cagcacttcc ggcaactggc gatttacccc atcgacgatc
+    73261 accgccagta acggtggcag caacagcagc agcgccagca caattaacac cgtgtcgcaa
+    73321 atgcggctat gcagacgatc gtccgggtcg cgccagcctt gcagcagcgt ggtgccgggc
+    73381 gcaatggcct tactcaatcg ctgactcaac agcaccagcc cgaggcagca caccatctgg
+    73441 agcagcgcca gcattgccgc gcgggcagga tcgtagtcgt aactcagcgc ctgatagatt
+    73501 gccagctcga tagtggtcgc ctgcggaccg ccccccagcg atagcacggt ggcgaagctg
+    73561 gcgaaacaga gcataaagat aagcgcagca accggcggga tttgtcgccg taaccacggc
+    73621 cattcgacga agcggaaaaa atgccagcta cgcatcccaa gctgggcggc aagttgacgc
+    73681 tgttcgccgg ggatgttttc cagtgcctgg agtaataagc ggctcgccat cggcagatta
+    73741 aaaaacacat gggccagcaa aataccttgc aggccgtagg gcgaaaaggt ccactccaga
+    73801 ccgagcgatt ggcagagtgt tgccagccag ccctggcgac catagacgct aagaatgccg
+    73861 aaaacagcga ccaacaccgg gaggatcaag gtcattgcac acagacgcaa cagcgccagc
+    73921 cgacccggaa agcgcctgcg atagagcgcg cgggcgagga atatcgcggg tatgacagag
+    73981 agcagtgccg agagaaacgc ctgccagaag gagaagcgca ccacatgcca cagatagctg
+    74041 tcctgccaga ctgccaccca gtcatcctgc ggcgcgttcc accacagggc gagaaacgcc
+    74101 gccagcgcaa ccgctaccac cagcgtggtg gcgcttacac ctggaattaa ccagccggga
+    74161 attaacggct gacggcgcgt tgccattcgc taatccatgc ctgacgttgt gccgccactt
+    74221 cggctggcgt gaactccaac gtggttgcgg gtttggtcaa tttttcaaaa ccggcaggca
+    74281 gcgtgacgtt tgccaccgga tacatccagt tgccggttgg gatcgcattc tggaaagccg
+    74341 gagaaaccat aaactggagg aatttttgcg ccagctccgg ctgcttgctg gcagcggtgc
+    74401 gggcggcgac ttccacttgc agatagtgac cttcgctgaa gttcgcggcg gcgtagttat
+    74461 ctttcttctc ttcgagaatg tgataagccg gagaggtggt gtaactcagt accagatcgc
+    74521 tttcaccttt taaaaacagg ccgtaggctt cgctccagcc tttggtgacc gtgaccgttt
+    74581 tcttcgccag tttctgccag gcttgtgggg cgtcatcgcc atagactttt tgcatccata
+    74641 gcaacagacc cagccccggt gtactggtgc gcggatcctg ataaatcacc cgccagtttt
+    74701 gatcgctctc aaccagttct ttcaggcttt gtggcgggtt tttcagtttg ttcttgtcat
+    74761 aaacgaaggc gaagtagcca taatcaaacg gtacgaaagt gtcattattc cagccgccgg
+    74821 gaacgttaac ggcatccgct gccacaccgc ttttggcaaa cagtccggtt ttactggcgg
+    74881 cgtctaacag gttgttatcc agccccagca ccacatcggc tttactgttt ttgccttcca
+    74941 tccgtagacg gttgagaagc gaaacgccat cttccagcgc caccagtttc agttcgcaat
+    75001 tacagtcggc ttcaaaggct tttttaacca ccggaccagg cccccagtcg gcggcgaagg
+    75061 aatcgtaggt ataaacagtc agaacgggtt tagcgaaaac gggcgctgtc acagcaacag
+    75121 caggggcaga cattttttta acactttgca cctcaaaaaa gagtggcaaa ggacttgaga
+    75181 aggagcctca aatcccttcg ccggcgttat ccggatcagg ttcgacgggt attttctcag
+    75241 cgcacgcgta cgcgtggcac cccgttgaga acggcgttag tgtagtgatt ttgttatcaa
+    75301 ccagcaatca tggatccggt ggcgcaaacc acgctgattt aaaatcgaac cagccgaggg
+    75361 tattcatgcg caggccgcgc atactgcgtt gcccctgaat gatcagccag tggtgcaata
+    75421 atggcaccat cgctttgctg gcgaccagtt gctggcacca gttcgccaga ttcatctcgc
+    75481 cattgcgcca gcgagcagcg tcggcttgcc agtcaatggg aatgcaatgt tgtagcagtg
+    75541 gcacttcgca taaatgtgcg aaaacagaga agtccagcgg cagggtaaag ttggcgctgt
+    75601 ttagccagat atcactttcg atctctcctg tatgccactg atcgtagtcg atctctttga
+    75661 ttttcagcgt cacctggtga cttgccagaa tctgctgcat gatcccggca atcacccgat
+    75721 gctcactgtg atcctgacaa aaggttaggg tgaggctttc caggccagcc ggtttttcgc
+    75781 tctttatggt gcgggcatgg tgccaacggg ggaacagtcc ataagccggg aaccacagtt
+    75841 gctggtactg ttcctcagcg aaatagacca agttagttgg agaaagcaca tagcttgccc
+    75901 agtccctgac ttgctgattc gccccgcgat gggtgcggct gtcgaacagt aaatagtagc
+    75961 aaccttcctc caggcggctt tcaatctctt tttcctcgcc ctgtggacct tttagcatca
+    76021 gccctccggc tggctcgtcg gcaatttccg gcagaaccca gacgttaact tcgtcgatta
+    76081 atgcccggta accgaagaag tcatcgaatg cctgaatttt cagttgattg gtgctgttgc
+    76141 gaatcaccgc atacggaccg gtgccgatgg gatggctggc aaagttactg agggtttccc
+    76201 attcgcgcgg caggatcatc gccggaactt gccccagcag taacggtaac cagcggtccg
+    76261 gttgcgtgag atggatatcc agcgtccagg gcgtcggcga cacaatgtca gcaatatgcg
+    76321 aatagagcgg cagcgtattg attcgtttta aagaggcgat cacatcgtcc atttccagtt
+    76381 cacgaccatg gtgaaaatgg actcctggac gcaaaaagaa acgccagtga agcggtgaaa
+    76441 tttgctgcca gtggtgggcg atgtctgctt ccagttcccc attttcctca tttatgcgcg
+    76501 ttagcgaact gaagatttgc cgggcgatat gggtttcgga acggcgcaat gcgctgccag
+    76561 gtagcagatt acgcaacgga cgatagtaga gcacgcgcag gatgtgccgc ccctggcgga
+    76621 agctgcggcc cagatgagaa accagcattt gccgcacagt cgctttgtcg ccaaccaact
+    76681 gcaccagttg atcgatacga tcctgctcca gcaggtcttc cgcccgctgt tgctgaagcg
+    76741 ccagcccggt atagaggaat gtcagacgcg agcgtttacc gcgcccgact tccgcttccc
+    76801 acgtcagcca gccgcgatcc tgcatggtgt tgagcagggt gcgcatatga cgacgcgagc
+    76861 agctcaataa cgctgccagt tcgttgagcg ttgtgtcctg cgatttaccc tcgcagcatt
+    76921 gccacaggcg gatgaactgt tgttgcagac gagcagatgg cataaaaggg gaactcctgt
+    76981 gcaaaagaca gcaattttat tgtccctata ttaagtcaat aattcctaac gatgaagcaa
+    77041 gggggtgccc catgcgtcag ttttatcagc actattttac cgcgacagcg aagttgtgct
+    77101 ggttgcgttg gttaagcgtc ccacaacgat taaccatgct tgaaggactg atgcagtggg
+    77161 atgaccgcaa ttctgaaagt tgacttgcct gcatcatgtg tgactgagta ttggtgtaaa
+    77221 atcacccgcc agcagattat acctgctggt tttttttatt ctcgccgcgc taaaaaggga
+    77281 acgtatgatc tggataatga cgatggctcg ccgtatgaac ggtgtttacg cggcatttat
+    77341 gctggtcgct tttatgatgg gggtggccgg ggcgctacag gctcctacat tgagcttatt
+    77401 tctgagtcgt gaggttggcg cgcaaccttt ctggatcggc ctcttttatg cggtgaatgc
+    77461 tattgctggg atcggcgtaa gcctctggtt ggcaaaacgt tctgacagtc agggcgatcg
+    77521 gcgaaaactg attatatttt gctgtttgat ggctatcggc aatgcgctat tgtttgcatt
+    77581 taatcgtcat tatctgacgc ttatcacctg tggtgtgctt ctggcatctc tggccaatac
+    77641 ggcaatgcca cagttatttg ctctggcgcg ggaatatgcg gataactcgg cgcgagaagt
+    77701 ggtgatgttt agctcggtga tgcgtgcgca gctttctctg gcatgggtta tcggtccacc
+    77761 gttggccttt atgctggcgt tgaattacgg ctttacggtg atgttttcga ttgccgccgg
+    77821 gatattcaca ctcagtctgg tattgattgc atttatgctt ccgtctgtgg cgcgggtaga
+    77881 actgccgtcg gaaaatgctt tatcaatgca aggtggctgg caggatagta acgtacggat
+    77941 gttatttgtc gcctcgacgt taatgtggac ctgcaacacc atgtacatta ttgatatgcc
+    78001 gttgtggatc agtagcgagt taggattgcc agacaaactg gcgggtttcc tgatggggac
+    78061 ggcagctgga ctggaaatac cagcaatgat tctggctggc tactatgtca aacgttatgg
+    78121 taagcggcga atgatggtca tagcagtggc ggcaggagta ctgttttaca ccggattgat
+    78181 tttctttaat agccgtatgg cgttgatgac gctgcaactt tttaacgctg tatttatcgg
+    78241 cattgttgcg ggtattggga tgctatggtt tcaggattta atgcctggaa gagcgggggc
+    78301 agctaccacc ttatttacta acagtatttc taccggggta attctggctg gcgttattca
+    78361 gggagcaatt gcacaaagtt gggggcactt tgctgtctac tgggtaattg cggttatttc
+    78421 tgttgtcgca ttatttttaa ccgcaaaggt taaagacgtt tgatgacgtg gacgatagcg
+    78481 gaaagcccgg tcatttgacc gggcaagggg attaattcat aaacgcaggt tgttttgctt
+    78541 cataagcggc aatggcgtcg tcgtgctgca aggtaagccc aatactgtcc agaccgttca
+    78601 tcatgcagtg gcggcggaag gcatcgatgg taaagcgata ggttttctct cccgctttca
+    78661 cctcttgcgc ttccagatcc acgtcgaaat ggatccccgg attagctttc accagcgcaa
+    78721 acagttcgtc cacttctgca tcgcttaatt tcaccggcag cagctggttg ttaaagctat
+    78781 tgccgtagaa gatgtcagca aaactcggcg caatcaccac tttaaaaccg tagtcggtca
+    78841 atgcccaggg cgcgtgctca cgcgaagagc cacagccgaa gttttctcgt gccagcaaaa
+    78901 tggaagcgcc ctgatactgc gggaagttca gcacgaagtc cgggtttggc tgttggcctt
+    78961 tttcatccag aaaacgccag tcgttaaaca gatgcgcgcc aaaacccgta cgggtcactt
+    79021 tctgcaaaaa ctgtttcggg atgattgcat cggtatcgac attggcggca tccagcggaa
+    79081 ccaccaggcc tgtgtgtttg ataaatttct ctgccatggt gtgctcctta tttaatgttg
+    79141 cgaatgtcgg cgaaatgtcc ggtcacagca gcagcggcag ccattgccgg gctgaccaga
+    79201 tgcgtgcgcc cgccgcgccc ctggcggcct tcaaagttac ggttgctggt ggaggcacaa
+    79261 cgttcgcccg gattcagacg gtcgttgttc atcgccagac acattgagca gccaggcaag
+    79321 cgccattcaa aaccggcttc aataaagatt ttatccagac cttccgcttc cgcctgggct
+    79381 tttaccgggc cagagccggg aaccaccagt gcctgcacgc ctggcgcgac ttttcgccct
+    79441 ttgacgatct ccgctgccgc gcgtaaatct tcaatgcgcg agttggtaca ggaaccgata
+    79501 aacactttgt cgatagccac ttcggtcagc ggaatacccg gtttcagccc catataggcc
+    79561 agcgcttttt ctgccgacgc gcgttcaacc ggatcggcaa acgaagccgg atcgggaata
+    79621 ttgtcgttca cggaaatcac ctggccggga ttggtgcccc aggtgacctg cggtgaaatt
+    79681 tcttctgctt gcagagtgac aacggtatcg aaagttgcgc cttcgtcggt ttgcagggtt
+    79741 ttccagtagg caacggcgtc gtcgaaatct ttgcctttcg gcgcatgcag acggcctttg
+    79801 acatagttaa aggtggtttc gtccggtgca accagaccgg cttttgcgcc catttcgatt
+    79861 gccatattgc acagggtcat acgaccttcc atgcttaaat cacggattgc ttcgccgcaa
+    79921 aactccacca catgcccggt gccgcctgcg ctaccggttt taccgataat tgccagcacg
+    79981 atatcttttg cggtaatgcc cggcgcggct ttgccctgga cttcaatttt catggttttt
+    80041 gcgcggccct gtttcagggt ttgcgttgcc agtacgtgtt caacttcgga agtgccgata
+    80101 ccaaaggcca gtgcgccaaa cgcgccgtgg gtggcggtat gcgagtcgcc gcagacaatg
+    80161 gtcatccccg gcaaggtgac gccctgttcc ggccccatta cgtggacgat cccctgatac
+    80221 gggtgattca ggtcatacag ttcgacgcca aattctttgc agtttttgat cagttcctgc
+    80281 atctggatac gcgccatttc accgcaggca ttaatgcctt tggtctgggt agagacgttg
+    80341 tgatccatgg tagcgaaggt tttgcccggc tgacgtaccg ggcgaccgtg ggcgcgcaga
+    80401 ccatcgaacg cctgcggtga ggtcacttca tgcaccaggt ggcggtcgat atataacagt
+    80461 ggggtttcgt tttcggcttc gtacacaacg tgagcgtcga acaatttttc gtataacgtc
+    80521 ttagccatga ttacacccct tctgctacat agcgggcaat gatatcgccc atttcatcgg
+    80581 tactaacggc ggcagcgcca cgggctaaat ccccggtgcg aatgccttct tctaatgcgc
+    80641 ggttaatggc gcgttcaatg gcgcaagccg catcatcggc atccaggctg taacgcagca
+    80701 gcagtgccag cgaaaggatt tgtgcaatcg ggttggcgat gtttttgcct gcgatatctg
+    80761 gtgccgagcc gcccgccggt tcatacagtc caaaaccttg ctcgttcagg ctggcggaag
+    80821 gcaacatccc catcgagcca gtgatcattg cgcactcgtc agacagaatg tcgccaaaca
+    80881 ggttggagca cagcagaacg tcaaactgtg atggatcttt aatcagctgc atggtggcgt
+    80941 tgtcgatgta catatgcgcc agttcgacat ccgggtattc cgtggcgatc tcgttaacga
+    81001 tctcccgcca taaaatagag gattgcagca cgttggcttt atcgatcgac gtcactttgt
+    81061 ggcgacgctt gcgagcagat tcaaacgcga tgcgggcgat acgttcgatc tcaaaacggt
+    81121 gatacacctc ggtatcaaag gctttttcat attgtccgct accttcgcgg ccttttggct
+    81181 gaccgaaata gatgccgccg gtcagttcgc gcacacacag gatgtcgaag ccgtttgcgg
+    81241 caatgtctgc acgcagcgga cagaatgctt ccagcccctg atacagtttt gccgggcgca
+    81301 ggttgctgaa taatttgaag tgcttacgca gaggcagcag cgcgccgcgt tctggttgct
+    81361 ggtctggtgg taaatgttcc cacttcgggc cgcctaccga gccaaacagc acggcatcgg
+    81421 cttgctcaca accttcaacc gtcgcaggcg gcagtggttg cccgtggtta tcaatggctg
+    81481 cgccgcctac atcgtaatgg ctggtggtga tgcgcatcgc aaagcggttg cgcacggcat
+    81541 ccagcacttt cagcgcctgg gtcatcactt ccggaccaat accgtccccc ggcaatacgg
+    81601 caatatggta attcttcgac atcacacggt ttccttgttg ttttcgttgt gttgagcttt
+    81661 gcgttgcaac tctttttcga cttctgcggc acgccagata ttgttcagaa cgtgcaccat
+    81721 ggctttggca gatgactcga caatatcggt agccaggccg acgccgtgga agcggcgacc
+    81781 gttgtagtta gcgacgatat ccacctgacc cagcgcatct ttaccgtggc ctttggcggt
+    81841 caggctgtat ttcaccagtt cgacgttata ttcagtgatg cggttaattg cctgatagac
+    81901 ggcatcgacc ggaccgttac cgttggcggc ttctgctttg acttcttcgc cacaggccag
+    81961 tttgacggcg gcggtggcga tatcgttaga gccagactgc acgctgaagt aatccagacg
+    82021 gaaatgctcc ggctcttctt gctgcttacc gatgaaggcc agcgcctcca gatcgtaatc
+    82081 aaacacctga ccttttttgt ccgccagctt caggaaagca tcgtacaaat tgtctaaatt
+    82141 atattcactt tctttatacc ccatctcatc catgcgatgt ttcaccgccg cacgccccga
+    82201 acgagaggtc agattcagct ggatttggtt cagaccaata gattctggtg tcatgatttc
+    82261 gtagttttcg cggtttttca gcacgccatc ctggtgtata ccggaggagt gtgcgaatgc
+    82321 gccgctgcca acaatggctt tgtttgccgg gatcggcata ttacaaatct ggctaactaa
+    82381 ctggctggtg cgccatatct cctggtgatt aatggcggtg tggacgttga gaatatcctt
+    82441 acgaactttg atcgccatga tgacttcttc cagggaacag tttccggcac gctcgccgat
+    82501 cccgttcatt gcgccttcca cctggcgtgc accggcatgt accgccgcca gtgagtttcc
+    82561 gaccgccagg cccaaatcgt cgtgggtatg tacggagata atggctttgc cgatgctagg
+    82621 cacgcgttca tacaggccgc tgatgattcc ggcgaactca aacggcatgg tgtagcccac
+    82681 ggtgtccgga atgttgatgg tggtggcacc ggcattaatc gccgcttcga ccactcgcgc
+    82741 cagatcggca atgggtgtac gcccggcatc ttcgcaagaa aattcaacat catcggtgta
+    82801 attacgggcg cgtttcacca tatagatagc gcgttcgatc acctcgtcca gcgtgctgcg
+    82861 cagcttggtg gcgatgtgca ttggcgaagt ggcaataaag gtatgaatac ggaaggcttc
+    82921 ggcgactttc agggattcgg ccgccacgtc gatatctttt tccacgcagc gagctaacgc
+    82981 acatacgcgg ctgtttttaa cctggcgggc gatggtttgc accgattcaa aatcgcccgg
+    83041 cgaagagacg gggaaaccga cttccatcac gtcaacaccc atacgctcaa gggccagcgc
+    83101 aatttgcagt ttttctttca cactcaagct tgcctgtaac gcctgttcac cgtcgcgcaa
+    83161 tgtggtatcg aaaataatga cttgctggct catggtttgg gtccttgtct cttttagagc
+    83221 gcctcgcttc gggcataaaa aaacccgcgc aatggcgcgg gttttttgtt tgactgcgtg
+    83281 ctggcttaat gctggatgcc gctcactcgt ctaccgcgca aagaagatgc gtttagtagt
+    83341 agtagaccga taaagcgaac gatgtgagtc attaaatcag ctccagatga atgcgatatg
+    83401 cttttagagt tactggatac aaaaacggat gtcaaccctg acgcaataaa aacgtcccgc
+    83461 cagcgtgagt tctgcatccg taaaattagc taattgtgct gcggtggtta aagtaagcga
+    83521 tattaatttc tgcttaacta ccgacgcttt tcatcggttg acatatttca gcataaattt
+    83581 ttgcatctaa tcaacgagga aaaaggggac aaaatgcacg cgttgcaaaa cctatcctga
+    83641 tgatttgtat tgaattatat gttttgcgat tttttttgat attgatttgg tgaatattat
+    83701 tgatcaatta atgttaagaa ttaatgcatt aaatatataa attaattatt aaataagcac
+    83761 atttaatcca ttttgtagat gattgagtat tcgcggtagt tatgattaga ttgttttcgc
+    83821 aacaaaaaca ttatggatta ttatgctgtg gtaaatgact cattccacgg caatggattc
+    83881 tgtttttatc agaacccgta tctttatgtt ttccgaattt tactcatttt gctttttctt
+    83941 attttatatg catgataaat catattcttc aggattattt ctctgcattc caataaggga
+    84001 aagggagtta agtgtgacag tggagttaag tatgccagag gtacaaacag atcatccaga
+    84061 gacggcggag ttaagcaaac cacagctacg catggtcgat ctcaacttat taaccgtttt
+    84121 cgatgccgtg atgcaggagc aaaacattac tcgtgccgct catgttctgg gaatgtcgca
+    84181 acctgcggtc agtaacgctg ttgcacgcct gaaggtgatg tttaatgacg agctttttgt
+    84241 tcgttatggc cgtggtattc aaccgactgc tcgcgcattt caactttttg gttcagttcg
+    84301 tcaggcattg caactagtac aaaatgaatt gcctggttca ggttttgaac ccgcgagcag
+    84361 tgaacgtgta tttcatcttt gtgtttgcag cccgttagac agcattctga cctcgcagat
+    84421 ttataatcac attgagcaga ttgcgccaaa tatacatgtt atgttcaagt cttcattaaa
+    84481 tcagaacact gaacatcagc tgcgttatca ggaaacggag tttgtgatta gttatgaaga
+    84541 cttccatcgt cctgaattta ccagcgtacc attatttaaa gatgaaatgg tgctggtagc
+    84601 cagcaaaaat catccaacaa ttaagggccc gttactgaaa catgatgttt ataacgaaca
+    84661 acatgcggcg gtttcgctcg atcgtttcgc gtcatttagt caaccttggt atgacacggt
+    84721 agataagcaa gccagtatcg cgtatcaggg catggcaatg atgagcgtac ttagcgtggt
+    84781 gtcgcaaacg catttggtcg ctattgcgcc gcgttggctg gctgaagagt tcgctgaatc
+    84841 cttagaatta caggtattac cgctgccgtt aaaacaaaac agcagaacct gttatctctc
+    84901 ctagcatgaa gctgccgggc gcgataaagg ccatcagtgg atggaagagc aattagtctc
+    84961 aatttgcaaa cgctaactga ttgcagaata ggtcagacat gaatgtctgg tttattctgc
+    85021 attttttatt gaatgtagaa ttttattctg aatgtgtggg ctctctattt taggattaat
+    85081 taaaaaaata gagaaattgc tgtaagttgt gggattcagc cgatttatta tcaatttaat
+    85141 cctctgtaat ggaggatttt atcgtttctt ttcacctttc ctcctgttta ttcttattac
+    85201 cccgtgttta tgtctctggc tgccaattgc ttaagcaaga tcggacggtt aatgtgtttt
+    85261 acacattttt tccgtcaaac agtgaggcag gccatggaga tgttgtctgg aggcgagatg
+    85321 gtcgtccgat cgcttatcga tcagggcgtt aaacaagtat tcggttatcc cggaggcgca
+    85381 gtccttgata tttatgatgc attgcatacc gtgggtggta ttgatcatgt attagttcgt
+    85441 catgagcagg cggcggtgca tatggccgat ggcttggcgc gcgcgaccgg ggaagtcggc
+    85501 gtcgtgctgg taacgtcggg tccaggggcg accaatgcga ttactggcat cgccaccgct
+    85561 tatatggatt ccattccatt agttgtcctt tccgggcagg tagcgacctc gttgataggt
+    85621 tacgatgcct ttcaggagtg cgacatggtg gggatttcgc gacccgtggt taaacacagt
+    85681 tttctggtta agcaaacgga agacattccg caggtgctga aaaaggcttt ctggctggcg
+    85741 gcaagcggtc gcccaggacc agtagtcgtt gatttaccga aagatattct taatccggcg
+    85801 aacaaattac cctatgtctg gccggagtcg gtcagtatgc gttcttacaa tcccactact
+    85861 accggacata aagggcaaat taagcgtgct ctgcaaagcg tggtagcggt aaaaaaaccg
+    85921 gttgtctacg taggcggtgg ggcaatcacg gcgggctgcc atcagcagtt gaaagaaacg
+    85981 gtggaggcgt tgaatctgcc cgttgtttgc tcattgatgg ggctgggggc gtttccggca
+    86041 acgcatcgtc aggtactggg tatgctggga atgcacggta cctacgaagc caatatgacg
+    86101 atgcataacg cggatgtgat tttcgccgtc ggggtacgat ttgatgaccg aacgacgaac
+    86161 aatctggcaa agtactgccc aaatgccact gttctgcata tcgatattga tcctacttcc
+    86221 atttctaaaa ccgtgactgc ggatatcccg attgtggggg atgctcgcca agtcctcgaa
+    86281 caaatgcttg aactcttgtc gcaagaatcc gcccatcaac cactggatga gatccgcgac
+    86341 tggtggcagc aaattgaaca gtggcgcgct cgtcagtgcc tgaaatatga cactcacagt
+    86401 gaaaagatta aaccgcaggc ggtgatcgag actctttggc ggttgacgaa gggagacgct
+    86461 tacgtgacgt ccgatgtcgg gcagcaccag atgtttgctg cactttatta tccattcgac
+    86521 aaaccgcgtc gctggatcaa ttccggtggc ctcggcagca tgggttttgg tttacctgcg
+    86581 gcactgggcg tcaaaatggc gttcccagaa gaaaccgtgg tttgcgtcac tggcgacggc
+    86641 agtattcaga tgaacatcca ggaactgtct accgcgttgc aatacgagtt gcccgtactg
+    86701 gtggtgaatc tcaataaccg ctatctgggg atggtgaagc agtggcagga catgatctat
+    86761 tccggccgtc attcacaatc ttatatgcaa tcgctacccg atttcgtccg tcgcggagcc
+    86821 tatgggcatg tcgggatcca gatttctcat ccgcatggct ggaaagcaaa cttagcgagg
+    86881 cgctggaaca ggtgcgcaat aatcgcctgg tgtttgttga tgttaccgtc gatggcagcg
+    86941 agcacgtcta cccgatgcag attcgcgggg gcggaatgga tgaaatgtgg ttaagcaaaa
+    87001 cggagagaac ctgattatgc gccggatatt atcagtctta ctcgaaaatg aatcaggcgc
+    87061 gttatcccgc gtgattggcc ttttttccca gcgtggctac aacattgaaa gcctgaccgt
+    87121 tgcgccaacc gacgatccga cattatcgcg tatgaccatc cagaccgtgg gcgatgaaaa
+    87181 agtacttgag cagatcgaaa agcaattaca caaactggtc gatgtcttgc gcgtgagtga
+    87241 gttggggcag ggcgcgcatg ttgagcggga aatcatgctg gtgaaaattc aggccagcgg
+    87301 ttacgggcgt gacgaagtga aacgtaatac ggaaatattc cgtgggcaaa ttatcgatgt
+    87361 cacaccctcg ctttataccg ttcaattagc aggcaccagc ggtaagctta gtgcattttt
+    87421 agcatcgatt cgcgatgtgg cgaaaattgt ggaggttgct cgctctggtg tggtcggact
+    87481 ttcgcgcggc gataaaataa tgcgttgaga atgatctcaa tgcgcaattt acagcccaac
+    87541 atgtcacgtt gggctttttt tgcgaaatca gtgggaacct ggaataaaag cagttgccgc
+    87601 agttaatttt ctgcgcttag atgttaatga atttaaccca taccagtaca atggctatgg
+    87661 tttttacatt ttacgcaagg ggcaattgtg aaactggatg aaatcgctcg gctggcggga
+    87721 gtgtcgcgga ccactgcaag ctatgttatt aacggcaaag cgaagcaata ccgtgtgagc
+    87781 gacaaaaccg ttgaaaaagt catggctgtg gtgcgtgagc acaattacca cccgaacgcc
+    87841 gtggcagctg ggcttcgtgc tggacgcaca cgttctattg gtcttgtgat ccccgatctg
+    87901 gagaacacca gctatacccg catcgctaac tatcttgaac gccaggcgcg gcaacggggt
+    87961 tatcaactgc tgattgcctg ctcagaagat cagccagaca acgaaatgcg gtgcattgag
+    88021 caccttttac agcgtcaggt tgatgccatt attgtttcga cgtcgttgcc tcctgagcat
+    88081 cctttttatc aacgctgggc taacgacccg ttcccgattg tcgcgctgga ccgcgccctc
+    88141 gatcgtgaac acttcaccag cgtggttggt gccgatcagg atgatgccga aatgctggcg
+    88201 gaagagttac gtaagtttcc cgccgagacg gtgctttatc ttggtgcgct accggagctt
+    88261 tctgtcagct tcctgcgtga acaaggtttc cgtactgcct ggaaagatga tccgcgcgaa
+    88321 gtgcatttcc tgtatgccaa cagctatgag cgggaggcgg ctgcccagtt attcgaaaaa
+    88381 tggctggaaa cgcatccgat gccgcaggcg ctgttcacaa cgtcgtttgc gttgttgcaa
+    88441 ggagtgatgg atgtcacgct gcgtcgcgac ggcaaactgc cttctgacct ggcaattgcc
+    88501 acctttggcg ataacgaact gctcgacttc ttacagtgtc cggtgctggc agtggctcaa
+    88561 cgtcaccgcg atgtcgcaga gcgtgtgctg gagattgtcc tggcaagcct ggacgaaccg
+    88621 cgtaagccaa aacctggttt aacgcgcatt aaacgtaatc tctatcgccg cggcgtgctc
+    88681 agccgtagct aagccgcgaa caaaaatacg cgccaggtga atttccctct ggcgcgtaga
+    88741 gtacgggact ggacatcaat atgcttaaag taaataagac tattcctgac tattattgat
+    88801 aaatgctttt aaacccgccc gttaattaac tcaccagctg aaattcacaa taattaagtg
+    88861 atatcgacag cgcgtttttg cattattttg ttacatgcgg cgatgaattg ccgatttaac
+    88921 aaacactttt ctttgctttt gcgcaaaccc gctggcatca agcgccacac agacgtaaca
+    88981 aggactgtta accggggaag atatgtccta aaatgccgct cgcgtcgcaa actgacactt
+    89041 tatatttgct gtggaaaata gtgagtcatt ttaaaacggt gatgacgatg agggattttt
+    89101 tcttacagct attcataacg ttaatttgct tcgcacgttg gacgtaaaat aaacaacgct
+    89161 gatattagcc gtaaacatcg ggttttttac ctcggtatgc cttgtgactg gcttgacaag
+    89221 cttttcctca gctccgtaaa ctcctttcag tgggaaattg tggggcaaag tgggaataag
+    89281 gggtgaggct ggcatgttcc ggggagcaac gttagtcaat ctcgacagca aagggcgctt
+    89341 atcagtgcct acccgttatc gggaacagct gcttgagaac gctgccggtc aaatggtttg
+    89401 caccattgac atttatcacc cgtgcctgct gctttacccc ctgcctgaat gggaaattat
+    89461 cgagcaaaaa ttatcgcgtc tgtcgagcat gaacccggtt gagcgccgtg tgcagcgcct
+    89521 actgttaggt catgccagcg aatgtcagat ggatggcgca ggtcgattgt taatcgcgcc
+    89581 agtactgcgg caacatgccg ggctgacaaa agaagtgatg ctggttggac agttcaacaa
+    89641 gtttgagctg tgggatgaaa caacctggca tcaacaggtc aaggaagata tcgacgcaga
+    89701 gcagttggct accggagact tatcggagcg actgcaggac ttgtctctat aaaatgatgg
+    89761 aaaactataa acatactacg gtgctgctgg atgaagccgt taatggcctc aatatccgtc
+    89821 ctgatggcat ctacattgat gggacttttg gtcgcggtgg tcactcacgt ctgatcctct
+    89881 cgcagcttgg cgaagagggg cgtttgctgg cgatcgatcg cgacccgcag gctatcgccg
+    89941 ttgcgaagac tattgatgat ccgcgcttct ccatcatcca cggacctttc tccgcgctgg
+    90001 gcgaatacgt tgccgagcgc gatcttatcg gcaagatcga cggcattctc ctcgatcttg
+    90061 gcgtctcttc accgcaactt gatgatgctg aacgtggctt ttcctttatg cgcgatggtc
+    90121 cgctggacat gcgtatggac ccaacccgtg ggcagtcagc cgctgaatgg ctacaaaccg
+    90181 cagaagaagc cgatatcgcc tgggtattga aaacctatgg tgaagagcgt tttgccaaac
+    90241 gcattgcccg cgccattgtc gagcgtaacc gcgaacagcc gatgacccgc accaaagaac
+    90301 tggcggaagt cgtggctgct gcaacgccgg tgaaagataa gtttaaacat cccgcgaccc
+    90361 gtaccttcca ggcggtgcgc atttgggtaa acagtgaact ggaggagata gagcaggcgc
+    90421 taaaaagctc gctcaacgtg ctggccccgg gtgggcggct ttcgatcatc agcttccact
+    90481 cgctggaaga ccgtattgtg aaacgtttta tgcgtgaaaa cagccgcggt ccgcaagttc
+    90541 cggcagggtt accgatgact gaagagcagc tcaaaaaact gggtggccgt cagctgcgag
+    90601 cactaggcaa gttaatgccg ggcgaagaag aggtggctga gaaccctcgt gcccgtagtt
+    90661 cagttctgcg tattgcagag aggacgaatg catgatcagc agagtgacag aagctctaag
+    90721 caaagttaaa ggatcgatgg gaagccacga gcgccatgca ttgcctggtg ttatcggtga
+    90781 cgatcttttg cgatttggga agctgccact ctgcctgttc atttgcatta ttttgacggc
+    90841 ggtgactgtg gtaaccacgg cgcaccatac ccgtttactg accgctcagc gcgaacaact
+    90901 ggtgctggag cgagatgctt tagacattga atggcgcaac ctgatccttg aagagaatgc
+    90961 gctcggcgac catagccggg tggaaaggat cgccacggaa aagctgcaaa tgcagacatg
+    91021 ttgatccgtc acaagaaaat atcgtagtgc aaaaataagg ataaacgcga cgcatgaaag
+    91081 cagcggcgaa aacgcagaaa ccaaaacgtc aggaagaaca tgccaacttt atcagttggc
+    91141 gttttgcgtt gttatgcggc tgtattctcc tggcgctggc ttttctgctc ggacgcgtag
+    91201 cgtggttaca agttatctcc ccggatatgc tggtgaaaga gggcgacatg cgttctcttc
+    91261 gcgttcagca agtttccacc tcccgcggca tgattactga ccgttctggt cgcccgttag
+    91321 cggtgagcgt gccggtaaaa gcgatttggg ctgacccgaa agaagtgcat gacgctggcg
+    91381 gtatcagcgt cggtgaccgc tggaaggcgc tggctaacgc gctcaatatt ccgctggatc
+    91441 agctttcagc ccgcattaac gccaacccga aagggcgctt tatttatctg gcgcgtcagg
+    91501 tgaaccctga catggcggac tacatcaaaa aactgaaact gccggggatt catctgcgtg
+    91561 aagagtctcg ccgttactat ccgtccggcg aagtgactgc tcacctcatc ggctttacta
+    91621 acgtcgatag tcaagggatt gagggcgttg agaagagttt cgataaatgg cttaccgggc
+    91681 agccgggtga gcgcattgtg cgtaaagacc gctatggtcg cgtaattgaa gatatttctt
+    91741 ctactgacag ccaggcagcg cacaacctgg cgctgagtat tgatgaacgc ctgcaggcgc
+    91801 tggtttatcg cgaactgaac aacgcggtgg cctttaacaa ggctgaatct ggtagcgccg
+    91861 tgctggtgga tgtcaacacc ggtgaagtgc tggcgatggc taacagcccg tcatacaacc
+    91921 ctaacaatct gagcggcacg ccgaaagagg cgatgcgtaa ccgtaccatc accgacgtgt
+    91981 ttgaaccggg ctcaacggtt aaaccgatgg tggtaatgac cgcgttgcaa cgtggcgtgg
+    92041 tgcgggaaaa ctcggtactc aataccattc cttatcgaat taacggccac gaaatcaaag
+    92101 acgtggcacg ctacagcgaa ttaaccctga ccggggtatt acagaagtcg agtaacgtcg
+    92161 gtgtttccaa gctggcgtta gcgatgccgt cctcagcgtt agtagatact tactcacgtt
+    92221 ttggactggg aaaagcgacc aatttggggt tggtcggaga acgcagtggc ttatatcctc
+    92281 aaaaacaacg gtggtctgac atagagaggg ccaccttctc tttcggctac gggctaatgg
+    92341 taacaccatt acagttagcg cgagtctacg caactatcgg cagctacggc atttatcgcc
+    92401 cactgtcgat taccaaagtt gaccccccgg ttcccggtga acgtgtcttc ccggaatcca
+    92461 ttgtccgcac tgtggtgcat atgatggaaa gcgtggcgct accaggcggc ggcggcgtga
+    92521 aggcggcgat taaaggctat cgtatcgcca ttaaaaccgg taccgcgaaa aaggtcgggc
+    92581 cggacggtcg ctacatcaat aaatatattg cttataccgc aggcgttgcg cctgcgagtc
+    92641 agccgcgctt cgcgctggtt gttgttatca acgatccgca ggcgggtaaa tactacggcg
+    92701 gcgccgtttc cgcgccggtc tttggtgcca tcatgggcgg cgtattgcgt accatgaaca
+    92761 tcgagccgga tgcgctgaca acgggcgata aaaatgaatt tgtgattaat caaggcgagg
+    92821 ggacaggtgg cagatcgtaa tttgcgcgac cttcttgctc cgtgggtgcc agacgcacct
+    92881 tcgcgagcac tgcgagagat gacactcgac agccgtgtgg ctgcggcggg cgatctcttt
+    92941 gtagctgtag taggtcatca ggcggacggg cgtcgatata tcccgcaggc gatagcgcaa
+    93001 ggtgtcgctg ccattattgc agaggcgaaa gatgaggcga ccgatggtga aatccgtgaa
+    93061 atgcacggcg taccggtcat ctatctcagc cagctcaacg agcgtttatc tgcactggcg
+    93121 ggccgctttt accatgaacc ctctgacaat ttacgtctcg tgggcgtaac gggcaccaac
+    93181 ggcaaaacca cgactaccca gctgttggcg cagtggagcc aactgcttgg cgaaatcagc
+    93241 gcggtaatgg gcaccgttgg taacggcctg ctggggaaag tgatcccgac agaaaataca
+    93301 accggttcgg cagtcgatgt tcagcatgag ctggcggggc tggtggatca gggcgcgacg
+    93361 ttttgcgcaa tggaagtttc ctcccacggg ctggtacagc accgtgtggc ggcattgaaa
+    93421 tttgcggcgt cggtctttac caacttaagc cgcgatcacc ttgattatca tggtgatatg
+    93481 gaacactacg aagccgcgaa atggctgctt tattctgagc atcattgcgg tcaggcgatt
+    93541 attaacgccg acgatgaagt gggccgccgc tggctggcaa aactgccgga cgcggttgcg
+    93601 gtatcaatgg aagatcatat taatccgaac tgtcacggac gctggttgaa agcgaccgaa
+    93661 gtgaactatc acgacagcgg tgcgacgatt cgctttagct caagttgggg cgatggcgaa
+    93721 attgaaagcc atctgatggg cgcttttaac gtcagcaacc tgctgctcgc gctggcgaca
+    93781 ctgttggcac tcggctatcc actggctgat ctgctgaaaa ccgccgcgcg tctgcaaccg
+    93841 gtttgcggac gtatggaagt gttcactgcg ccaggcaaac cgacggtggt ggtggattac
+    93901 gcgcatacgc cggatgcact ggaaaaagcc ttacaggcgg cgcgtctgca ctgtgcgggc
+    93961 aagctgtggt gtgtctttgg ctgtggtggc gatcgcgata aaggtaagcg tccactgatg
+    94021 ggcgcaattg ccgaagagtt tgctgacgtg gcggtggtga cggacgataa cccgcgtacc
+    94081 gaagaaccgc gtgccatcat caacgatatt ctggcgggaa tgttagatgc cggacatgcc
+    94141 aaagtgatgg aaggccgtgc tgaagcggtg acttgcgccg ttatgcaggc taaagagaat
+    94201 gatgtggtac tggtcgcggg caaaggccat gaagattacc agattgttgg caatcagcgt
+    94261 ctggactact ccgatcgcgt cacggtggcg cgtctgctgg gggtgattgc atgattagcg
+    94321 taacccttag ccaacttacc gacattctca acggtgaact gcaaggtgca gatatcaccc
+    94381 ttgatgctgt aaccactgat acccgaaaac tgacgccggg ctgcctgttt gttgccctga
+    94441 aaggcgaacg ttttgatgcc cacgattttg ccgaccaggc gaaagctggc gcggcaggcg
+    94501 cactactggt tagccgtccg ctggacatcg acctgccgca gttaatcgtc aaggatacgc
+    94561 gtctggcgtt tggtgaactg gctgcatggg ttcgccagca agttccggcg cgcgtggttg
+    94621 ctctgacggg gtcctccggc aaaacctccg ttaaagagat gacggcggcg attttaagcc
+    94681 agtgcggcaa cacgctttat acggcaggca atctcaacaa cgacatcggt gtaccgatga
+    94741 cgctgttgcg cttaacgccg gaatacgatt acgcagttat tgaacttggc gcgaaccatc
+    94801 agggcgaaat agcctggact gtgagtctga ctcgcccgga acgtgcgctg gtcaacaacc
+    94861 tggcagcggc gcatctggaa ggttttggct cgcttgcggg tgtcgcgaaa gcgaaaggtg
+    94921 aaatctttag cggcctgccg gaaaacggta tcgccattat gaacgccgac aacaacgact
+    94981 ggctgaactg gcagagcgta attggctcac gcaaagtgtg gcgtttctca cccaatgccg
+    95041 ccaacagcga tttcaccgcc accaatatcc atgtgacctc gcacggtacg gaatttaccc
+    95101 tacaaacccc aaccggtagc gtcgatgttc tgctgccgtt gccggggcgt cacaatattg
+    95161 cgaatgcgct ggcagccgct gcgctctcca tgtccgtggg cgcaacgctt gatgctatca
+    95221 aagcggggct ggcaaatctg aaagctgttc caggccgtct gttccccatc caactggcag
+    95281 aaaaccagtt gctgctcgac gactcctaca acgccaatgt cggttcaatg actgcagcag
+    95341 tccaggtact ggctgaaatg ccgggctacc gcgtgctggt ggtgggcgat atggcggaac
+    95401 tgggcgctga aagcgaagcc tgccatgtac aggtgggcga ggcggcaaaa gctgctggta
+    95461 ttgaccgcgt gttaagcgtg ggtaaacaaa gccatgctat cagcaccgcc agcggcgttg
+    95521 gcgaacattt tgctgataaa actgcgttaa ttacgcgtct taaattactg attgctgagc
+    95581 aacaggtaat tacgatttta gttaagggtt cacgtagtgc cgccatggaa gaggtagtac
+    95641 gcgctttaca ggagaatggg acatgttagt ttggctggcc gaacatttgg tcaaatatta
+    95701 ttccggcttt aacgtctttt cctatctgac gtttcgcgcc atcgtcagcc tgctgaccgc
+    95761 gctgttcatc tcattgtgga tgggcccgcg tatgattgct catttgcaaa aactttcctt
+    95821 tggtcaggtg gtgcgtaacg acggtcctga atcacacttc agcaagcgcg gtacgccgac
+    95881 catgggcggg attatgatcc tgacggcgat tgtgatctcc gtactgctgt gggcttaccc
+    95941 gtccaatccg tacgtctggt gcgtgttggt ggtgctggta ggttacggtg ttattggctt
+    96001 tgttgatgat tatcgcaaag tggtgcgtaa agacaccaaa gggttgatcg ctcgttggaa
+    96061 gtatttctgg atgtcggtca ttgcgctggg tgtcgccttc gccctgtacc ttgccggcaa
+    96121 agacacgccc gcaacgcagc tggtggtccc attctttaaa gatgtgatgc cgcagctggg
+    96181 gctgttctac attctgctgg cttacttcgt cattgtgggt actggcaacg cggtaaacct
+    96241 gaccgatggt ctcgacggcc tggcaattat gccgaccgta tttgtcgccg gtggttttgc
+    96301 gctggtggcg tgggcgaccg gcaatatgaa ctttgccagc tacttgcata taccgtatct
+    96361 gcgacacgcc ggggaactgg ttattgtctg taccgcgata gtcggggcag gactgggctt
+    96421 cctgtggttt aacacctatc cggcgcaggt ctttatgggc gatgtaggtt cgctggcgtt
+    96481 aggtggtgcg ttaggcatta tcgccgtact gctacgtcag gaattcctgc tggtgattat
+    96541 ggggggcgtg ttcgtggtag aaacgctttc tgtcatcctg caggtcggct cctttaaact
+    96601 gcgcggacaa cgtattttcc gcatggcacc gattcatcac cactatgaac tgaaaggctg
+    96661 gccggaaccg cgcgtcattg tgcgtttctg gattatttcg ctgatgctgg ttctgattgg
+    96721 tctggcaacg ctgaaggtac gttaatcatg gctgattatc agggtaaaaa tgtcgtcatt
+    96781 atcggcctgg gcctcaccgg gctttcctgc gtggactttt tcctcgctcg cggtgtgacg
+    96841 ccgcgcgtta tggatacgcg tatgacaccg cctggcctgg ataaattacc cgaagccgta
+    96901 gaacgccaca cgggcagtct gaatgatgaa tggctgatgg cggcagatct gattgtcgcc
+    96961 agtcccggta ttgcactggc gcatccatcc ttaagcgctg ccgctgatgc cggaatcgaa
+    97021 atcgttggcg atatcgagct gttctgtcgc gaagcacaag caccgattgt ggcgattacc
+    97081 ggttctaacg gcaaaagcac ggtcaccacg ctagtgggtg aaatggcgaa agcggcgggg
+    97141 gttaacgttg gtgtgggtgg caatattggc ctgcctgcgt tgatgctact ggatgatgag
+    97201 tgtgaactgt acgtgctgga actgtcgagc ttccagctgg aaaccacctc cagcttacag
+    97261 gcggtagcag cgaccattct gaacgtgact gaagatcata tggatcgcta tccgtttggt
+    97321 ttacaacagt atcgtgcagc aaaactgcgc atttacgaaa acgcgaaagt ttgcgtggtt
+    97381 aatgctgatg atgccttaac aatgccgatt cgcggtgcgg atgaacgctg cgtcagcttt
+    97441 ggcgtcaaca tgggtgacta tcacctgaat catcagcagg gcgaaacctg gctgcgggtt
+    97501 aaaggcgaga aagtgctgaa tgtgaaagag atgaaacttt ccgggcagca taactacacc
+    97561 aatgcgctgg cggcgctggc gctggcagat gctgcagggt taccgcgtgc cagcagcctg
+    97621 aaagcgttaa ccacattcac tggtctgccg catcgctttg aagttgtgct ggagcataac
+    97681 ggcgtacgtt ggattaacga ttcgaaagcg accaacgtcg gcagtacgga agcggcgctg
+    97741 aatggcctgc acgtagacgg cacactgcat ttgttgctgg gtggcgatgg taaatcggcg
+    97801 gactttagcc cactggcgcg ttacctgaat ggcgataacg tacgtctgta ttgtttcggt
+    97861 cgtgacggcg cgcagctggc ggcgctacgc ccggaagtgg cagaacaaac cgaaactatg
+    97921 gaacaggcga tgcgcttgct ggctccgcgt gttcagccgg gcgatatggt tctgctctcc
+    97981 ccagcctgtg ccagccttga tcagttcaag aactttgaac aacgaggcaa tgagtttgcc
+    98041 cgtctggcga aggagttagg ttgatgcgtt tatctctccc tcgcctgaaa atgccgcgcc
+    98101 tgccaggatt cagtatcctg gtctggatct ccacggcgct aaagggctgg gtgatgggct
+    98161 cgcgggaaaa agataccgac agcctgatca tgtacgatcg caccttactg tggctgacct
+    98221 tcggcctcgc ggcgattggc tttatcatgg tgacctcggc gtcaatgccc atagggcaac
+    98281 gcttaaccaa cgatccgttc ttcttcgcga agcgtgatgg tgtctatctg attttggcgt
+    98341 ttattctggc gatcattacg ctgcgtctgc cgatggagtt ctggcaacgc tacagtgcca
+    98401 cgatgctgct cggatctatc atcctgctga tgatcgtcct ggtagtgggt agctcggtta
+    98461 aaggggcatc gcgttggatc gatctcggtt tgctgcgtat ccagcctgcg gagctgacaa
+    98521 aactgtcgct gttttgctat atcgccaact atctggtgcg taaaggcgac gaagtacgta
+    98581 ataacctgcg cggcttcctg aaaccgatgg gcgtgattct ggtgttggca gtgttactgc
+    98641 tggcacagcc agaccttggt acggtggtgg tgttgtttgt gactacgctg gcgatgttgt
+    98701 tcctggcggg agcgaaattg tggcagttca ttgccattat cggtatgggc atttcagcgg
+    98761 ttgtgttgct gatactcgcc gaaccgtacc gtatccgccg tgttaccgca ttctggaacc
+    98821 cgtgggaaga tccctttggc agcggctatc agttaacgca atcgctgatg gcgtttggtc
+    98881 gcggcgaact ttgggggcaa ggtttaggta actcggtaca aaaactggag tatctgccgg
+    98941 aagcgcacac tgactttatt ttcgccatta tcggcgaaga actggggtat gtcggtgtgg
+    99001 tgctggcact tttaatggta ttcttcgtcg cttttcgcgc gatgtcgatt ggccgtaaag
+    99061 cattagaaat tgaccaccgt ttttccggtt ttctcgcctg ttctattggc atctggttta
+    99121 gcttccaggc gctggttaac gtaggcgcgg cggcggggat gttaccgacc aaaggtctga
+    99181 cattgccgct gatcagttac ggtggttcga gcttactgat tatgtcgaca gccatcatga
+    99241 tgctgttgcg tattgattat gaaacgcgtc tggagaaagc gcaggcgttt gtacgaggtt
+    99301 cacgatgagt ggtcaaggaa agcgattaat ggtgatggca ggcggaaccg gtggacatgt
+    99361 attcccggga ctggcggttg cgcaccatct aatggctcag ggttggcaag ttcgctggct
+    99421 ggggactgcc gaccgtatgg aagcggactt agtgccaaaa catggcatcg aaattgattt
+    99481 cattcgtatc tctggtctgc gtggaaaagg tataaaagca ctgatagctg ccccgctgcg
+    99541 tatcttcaac gcctggcgtc aggcgcgggc gattatgaaa gcgtacaaac ctgacgtggt
+    99601 gctcggtatg ggaggctacg tgtcaggtcc aggtggtctg gccgcgtggt cgttaggcat
+    99661 tccggttgta cttcatgaac aaaacggtat tgcgggctta accaataaat ggctggcgaa
+    99721 gattgccacc aaagtgatgc aggcgtttcc aggtgctttc cctaatgcgg aagtagtggg
+    99781 taacccggtg cgtaccgatg tgttggcgct gccgttgccg cagcaacgtt tggctggacg
+    99841 tgaaggtccg gttcgtgtgc tggtagtggg tggttctcag ggcgcacgca ttcttaacca
+    99901 gacaatgccg caggttgctg cgaaactggg tgattcagtc actatctggc atcagagcgg
+    99961 caaaggttcg caacaatccg ttgaacaggc gtatgccgaa gcggggcaac cgcagcataa
+   100021 agtgacggaa tttattgatg atatggcggc ggcgtatgcg tgggcggatg tcgtcgtttg
+   100081 ccgctccggt gcgttaacgg tgagtgaaat cgccgcggca ggactaccgg cgttgtttgt
+   100141 gccgtttcaa cataaagacc gccagcaata ctggaatgcg ctaccgctgg aaaaagcggg
+   100201 cgcagccaaa attatcgagc agccacagct tagcgtggat gctgtcgcca acaccctggc
+   100261 cgggtggtcg cgagaaacct tattaaccat ggcagaacgc gcccgcgctg catccattcc
+   100321 ggatgccacc gagcgagtgg caaatgaagt gagccgggtt gcccgggcgt aattgtagcg
+   100381 atgccttttg catcgtatga atttaagaag ttaatggcgt aaagaatgaa tacacaacaa
+   100441 ttggcaaaac tgcgttccat cgtgcccgaa atgcgtcgcg ttcggcacat acattttgtc
+   100501 ggcattggtg gtgccggtat gggcggtatt gccgaagttc tggccaatga aggttatcag
+   100561 atcagtggtt ccgatttagc gccaaatccg gtcacgcagc agttaatgaa tctgggtgcg
+   100621 acgatttatt tcaaccatcg cccggaaaac gtacgtgatg ccagcgtggt cgttgtttcc
+   100681 agcgcgattt ctgccgataa cccggaaatt gtcgccgctc atgaagcgcg tattccggtg
+   100741 atccgtcgtg ccgaaatgct ggctgagtta atgcgttttc gtcatggcat cgccattgcc
+   100801 ggaacgcacg gcaaaacgac aaccaccgcg atggtttcca gcatctacgc agaagcgggg
+   100861 ctcgacccaa ccttcgttaa cggcgggctg gtaaaagcgg cgggggttca tgcgcgtttg
+   100921 gggcatggtc ggtacctgat tgccgaagca gatgagagtg atgcatcgtt cctgcatctg
+   100981 caaccgatgg tggcgattgt caccaatatc gaagccgacc acatggatac ctaccagggc
+   101041 gactttgaga atttaaaaca gacttttatt aattttctgc acaacctgcc gttttacggt
+   101101 cgtgcggtga tgtgtgttga tgatccggtg atccgcgaat tgttaccgcg agtggggcgt
+   101161 cagaccacga cttacggctt cagcgaagat gccgacgtgc gtgtagaaga ttatcagcag
+   101221 attggcccgc aggggcactt tacgctgctg cgccaggaca aagagccgat gcgcgtcacc
+   101281 ctgaatgcgc caggtcgtca taacgcgctg aacgccgcag ctgcggttgc ggttgctacg
+   101341 gaagagggca ttgacgacga ggctattttg cgggcgcttg aaagcttcca ggggactggt
+   101401 cgccgttttg atttcctcgg tgaattcccg ctggagccag tgaatggtaa aagcggtacg
+   101461 gcaatgctgg tcgatgacta cggccaccac ccgacggaag tggacgccac cattaaagcg
+   101521 gcgcgcgcag gctggccgga taaaaacctg gtaatgctgt ttcagccgca ccgttttacc
+   101581 cgtacgcgcg acctgtatga tgatttcgcc aatgtgctga cgcaggttga taccctgttg
+   101641 atgctggaag tgtatccggc tggcgaagcg ccaattccgg gagcggacag ccgttcgctg
+   101701 tgtcgcacaa ttcgtggacg tgggaaaatt gatcccattc tggtgccgga tccggcgcgg
+   101761 gtagccgaga tgctggcacc ggtattaacc ggtaacgacc tgattctcgt tcagggggct
+   101821 ggtaatattg gaaaaattgc ccgttcttta gctgaaatca aactgaagcc gcaaactccg
+   101881 gaggaagaac aacatgactg ataaaatcgc ggtcctgttg ggtgggacct ccgctgagcg
+   101941 ggaagtttct ctgaattctg gcgcagcggt gttagccgga ctgcgtgaag gcggtattga
+   102001 cgcgtatcct gtcgacccga aagaagtcga cgtgacgcaa ctgaagtcga tgggctttca
+   102061 gaaagtgttt atcgcgctac acggtcgcgg cggtgaagat ggtacgctgc aggggatgct
+   102121 cgagctgatg ggcttgcctt ataccggaag cggagtgatg gcatctgcgc tttcaatgga
+   102181 taaactacgc agcaaacttc tatggcaagg tgccggttta ccggtcgcgc cgtgggtagc
+   102241 gttaacccgc gcagagtttg aaaaaggcct gagcgataag cagttagcag aaatttctgc
+   102301 tctgggtttg ccggttatcg ttaagccgag ccgcgaaggt tccagtgtgg gaatgtcaaa
+   102361 agtagtagca gaaaatgctc tacaagatgc attaagattg gcatttcagc acgatgaaga
+   102421 agtattgatt gaaaaatggc taagtgggcc ggagttcacg gttgcgatac tcggtgaaga
+   102481 aattttaccg tcaatacgta ttcaaccgtc cggaaccttc tatgattatg aggcgaagta
+   102541 tctctctgat gagacacagt atttctgccc cgcaggtctg gaagcgtcac aagaggccaa
+   102601 tttgcaggca ttagtgctga aagcatggac gacgttaggt tgcaaaggat ggggacgtat
+   102661 tgacgttatg ctggacagcg atggacagtt ttatctgctg gaagccaata cctcaccggg
+   102721 tatgaccagc cacagcctgg tgccgatggc ggcacgtcag gcaggtatga gcttctcgca
+   102781 gttggtagta cgaattctgg aactggcgga ctaatatgtc gcaggctgct ctgaacacgc
+   102841 gaaacagcga agaagaggtt tcttctcgcc gcaataatgg aacgcgtctg gcggggatcc
+   102901 ttttcctgct gaccgtttta acgacagtgt tggtgagcgg ctgggtcgtg ttgggctgga
+   102961 tggaagatgc gcaacgcctg ccgctctcaa agctggtgtt gaccggtgaa cgccattaca
+   103021 cacgtaatga cgatatccgg cagtcgatcc tggcattggg tgagccgggt acctttatga
+   103081 cccaggatgt caacatcatc cagacgcaaa tagaacaacg cctgccgtgg attaagcagg
+   103141 tgagcgtcag aaagcagtgg cctgatgaat tgaagattca tctggttgaa tatgtgccga
+   103201 ttgcgcggtg gaatgatcaa catatggtag acgcggaagg aaataccttc agcgtgccgc
+   103261 cagaacgcac cagcaagcag gtgcttccaa tgctgtatgg cccggaaggc agcgccaatg
+   103321 aagtgttgca gggctatcgc gaaatggggc agatgctggc aaaggacaga tttactctga
+   103381 aggaagcggc gatgaccgcg cggcgttcct ggcagttgac gctgaataac gatattaagc
+   103441 tcaatcttgg ccggggcgat acgatgaaac gtttggctcg ctttgtagaa ctttatccgg
+   103501 ttttacagca gcaggcgcaa accgatggca aacggattag ctacgttgat ttgcgttatg
+   103561 actctggagc ggcagtaggc tgggcgccct tgccgccaga ggaatctact cagcaacaaa
+   103621 atcaggcaca ggcagaacaa caatgatcaa ggcgacggac agaaaactgg tagtaggact
+   103681 ggagattggt accgcgaagg ttgccgcttt agtaggggaa gttctgcccg acggtatggt
+   103741 caatatcatt ggcgtgggca gctgcccgtc gcgtggtatg gataaaggcg gggtgaacga
+   103801 cctcgaatcc gtggtcaagt gcgtacaacg cgccattgac caggcagaat tgatggcaga
+   103861 ttgtcagatc tcttcggtat atctggcgct ttctggtaag cacatcagct gccagaatga
+   103921 aattggtatg gtgcctattt ctgaagaaga agtgacgcaa gaagatgtgg aaaacgtcgt
+   103981 ccataccgcg aaatcggtgc gtgtgcgcga tgagcatcgt gtgctgcatg tgatcccgca
+   104041 agagtatgcg attgactatc aggaagggat caagaatccg gtaggacttt cgggcgtgcg
+   104101 gatgcaggca aaagtgcacc tgatcacatg tcacaacgat atggcgaaaa acatcgtcaa
+   104161 agcggttgaa cgttgtgggc tgaaagttga ccaactgata tttgccggac tggcatcaag
+   104221 ttattcggta ttgacggaag atgaacgtga actgggtgtc tgcgtcgtcg atatcggtgg
+   104281 tggtacaatg gatatcgccg tttataccgg tggggcattg cgccacacta aggtaattcc
+   104341 ttatgctggc aatgtcgtga ccagtgatat cgcttacgcc tttggcacgc cgccaagcga
+   104401 cgccgaagcg attaaagttc gccacggttg tgcgctgggt tccatcgttg gaaaagatga
+   104461 gagcgtggaa gtgccgagcg taggtggtcg tccgccacgg agtctgcaac gtcagacact
+   104521 ggcagaggtg atcgagccgc gctataccga gctgctcaac ctggtcaacg aagagatatt
+   104581 gcagttgcag gaaaagcttc gccaacaagg ggttaaacat cacctggcgg caggcattgt
+   104641 attaaccggt ggcgcagcgc agatcgaagg tcttgcagcc tgtgctcagc gcgtgtttca
+   104701 tacgcaagtg cgtatcggcg cgccgctgaa cattaccggt ttaacggatt atgctcagga
+   104761 gccgtattat tcgacggcgg tgggattgct tcactatggg aaagagtcac atcttaacgg
+   104821 tgaagctgaa gtagaaaaac gtgttacagc atcagttggc tcgtggatca agcgactcaa
+   104881 tagttggctg cgaaaagagt tttaattttt atgaggccga cgatgattac ggcctcaggc
+   104941 gacaggcaca aatcggagag aaactatgtt tgaaccaatg gaacttacca atgacgcggt
+   105001 gattaaagtc atcggcgtcg gcggcggcgg cggtaatgct gttgaacaca tggtgcgcga
+   105061 gcgcattgaa ggtgttgaat tcttcgcggt aaataccgat gcacaagcgc tgcgtaaaac
+   105121 agcggttgga cagacgattc aaatcggtag cggtatcacc aaaggactgg gcgctggcgc
+   105181 taatccagaa gttggccgca atgcggctga tgaggatcgc gatgcattgc gtgcggcgct
+   105241 ggaaggtgca gacatggtct ttattgctgc gggtatgggt ggtggtaccg gtacaggtgc
+   105301 ggcaccagtc gtcgctgaag tggcaaaaga tttgggtatc ctgaccgttg ctgtcgtcac
+   105361 taagcctttc aactttgaag gcaagaagcg tatggcattc gcggagcagg ggatcactga
+   105421 actgtccaag catgtgaact ctctgatcac tatcccgaac gacaaactgc tgaaagttct
+   105481 gggccgcggt atctccctgc tggatgcgtt tggcgcagcg aacgatgtac tgaaaggcgc
+   105541 tgtgcaaggt atcgctgaac tgattactcg tccgggtttg atgaacgtgg actttgcaga
+   105601 cgtacgcacc gtaatgtctg agatgggcca cgcaatgatg ggttctggcg tggcgagcgg
+   105661 tgaagaccgt gcggaagaag ctgctgaaat ggctatctct tctccgctgc tggaagatat
+   105721 cgacctgtct ggcgcgcgcg gcgtgctggt taacatcacg gcgggcttcg acctgcgtct
+   105781 ggatgagttc gaaacggtag gtaacaccat ccgtgcattt gcttccgaca acgcgactgt
+   105841 ggttatcggt acttctcttg acccggatat gaatgacgag ctgcgcgtaa ccgttgttgc
+   105901 gacaggtatc ggcatggaca aacgtcctga aatcactctg gtgaccaata agcaggttca
+   105961 gcagccagtg atggatcgct accagcagca tgggatggct ccgctgaccc aagagcagaa
+   106021 gccggttgct aaagtcgtga atgacaatgc gccgcaaact gcgaaagagc cggattatct
+   106081 ggatatccca gcattcctgc gtaagcaagc tgattaagaa ttgactggaa tttgggtttc
+   106141 gattctcttt gtgctaaact ggcccgccga atgtatagta cacttcggtt ggataggtaa
+   106201 tttggcgaga taatacgatg atcaaacaaa ggacacttaa acgtatcgtt caggcgacgg
+   106261 gtgtcggttt acataccggc aagaaagtca ccctgacgtt acgccctgcg ccggccaaca
+   106321 ccggggtcat ctatcgtcgc accgacttga atccaccggt agatttcccg gccgatgcca
+   106381 aatctgtgcg tgataccatg ctctgtacgt gtctggtcaa cgagcatgat gtacggattt
+   106441 caaccgtaga gcacctcaat gctgctctcg cgggcttggg catcgataac attgttatcg
+   106501 aagttaacgc gccggaaatc ccgatcatgg acggcagcgc cgctccgttt gtatacctgc
+   106561 tgcttgacgc cggtatcgac gagttgaact gcgccaaaaa atttgttcgc atcaaagaga
+   106621 ctgttcgtgt cgaagatggc gataagtggg ctgaatttaa gccgtacaat ggtttttcgc
+   106681 tggatttcac catcgatttt aaccatccgg ctattgattc cagcaaccag cgctatgcga
+   106741 tgaacttctc cgctgatgcg tttatgcgcc agatcagccg tgcgcgtacg ttcggtttca
+   106801 tgcgtgatat cgaatatctg cagtcccgtg gtttgtgcct gggcggcagc ttcgattgtg
+   106861 ccatcgttgt tgacgattat cgcgtactga acgaagacgg cctgcgtttt gaagacgaat
+   106921 ttgtgcgtca caaaatgctc gatgcgatcg gtgacttgtt catgtgtggt cacaatatta
+   106981 ttggtgcatt taccgcttat aaatccggtc atgcactgaa taacaaactg ctgcaggctg
+   107041 tcctggcgaa acaggaagcc tgggaatatg tgaccttcca ggacgacgca gaactgccgt
+   107101 tggccttcaa agcgccttca gctgtactgg cataacgaca tttatactgt cgtataaaat
+   107161 tcgactggca aatctggcac tctctccggc caggtgaacc agtcgttttt ttttgaattt
+   107221 tataagagct ataaaaaacg gtgcgaacgc tgttttctta agcacttttc cgcacaactt
+   107281 atcttcattc gtgctgtgga ctgcaggctt taatgataag atttgtgcgc taaatacgtt
+   107341 tgaatatgat cgggatggca ataacgtgag tggaatactg acgcgctggc gacagtttgg
+   107401 taaacgctac ttctggccgc atctcttatt agggatggtt gcggcgagtt taggtttgcc
+   107461 tgcgctcagc aacgccgccg aaccaaacgc gcccgcaaaa gcgacaaccc gcaaccacga
+   107521 gccttcagcc aaagttaact ttggtcaatt ggccttgctg gaagcgaaca cacgccgccc
+   107581 gaattcgaac tattccgttg attactggca tcaacatgcc attcgcacgg taatccgtca
+   107641 tctttctttc gcaatggcac cgcaaacact gcccgttgct gaagaatctt tgcctcttca
+   107701 ggcgcaacat cttgcattac tggatacgct cagcgcgctg ctgacccagg aaggcacgcc
+   107761 gtctgaaaag ggttatcgca ttgattatgc gcattttacc ccacaagcaa aattcagcac
+   107821 gcccgtctgg ataagccagg cgcaaggcat ccgtgctggc cctcaacgcc tcacctaaca
+   107881 acaataaacc tttacttcat tttattaact ccgcaacgcg gggcgtttga gattttatta
+   107941 tgctaatcaa attgttaact aaagttttcg gtagtcgtaa cgatcgcacc ctgcgccgga
+   108001 tgcgcaaagt ggtcaacatc atcaatgcca tggaaccgga gatggaaaaa ctctccgacg
+   108061 aagaactgaa agggaaaacc gcagagtttc gtgcacgtct ggaaaaaggc gaagtgctgg
+   108121 aaaatctgat cccggaagct ttcgccgtgg tacgtgaggc aagtaagcgc gtctttggta
+   108181 tgcgtcactt cgacgttcag ttactcggcg gtatggttct taacgaacgc tgcatcgccg
+   108241 aaatgcgtac cggtgaagga aaaaccctga ccgcaacgct gcctgcttac ctgaacgcac
+   108301 taaccggtaa aggcgtgcac gtagttaccg tcaacgacta cctggcgcaa cgtgacgccg
+   108361 aaaacaaccg tccgctgttt gaattccttg gcctgactgt cggtatcaac ctgccgggca
+   108421 tgccagcacc ggcaaagcgc gaagcttacg cagctgacat cacttacggt acgaacaacg
+   108481 aatacggctt tgactacctg cgcgacaaca tggcgttcag ccctgaagaa cgtgtacagc
+   108541 gtaaactgca ctatgcgctg gtggacgaag tggactccat cctgatcgat gaagcgcgta
+   108601 caccgctgat catttccggc ccggcagaag acagctcgga aatgtataaa cgcgtgaata
+   108661 aaattattcc gcacctgatc cgtcaggaaa aagaagactc cgaaaccttc cagggcgaag
+   108721 gccacttctc ggtggacgaa aaatctcgcc aggtgaacct gaccgaacgt ggtctggtgc
+   108781 tgattgaaga actgctggtg aaagagggca tcatggatga aggggagtct ctgtactctc
+   108841 cggccaacat catgctgatg caccacgtaa cggcggcgct gcgcgctcat gcgctgttta
+   108901 cccgtgacgt cgactacatc gttaaagatg gtgaagttat catcgttgac gaacacaccg
+   108961 gtcgtaccat gcagggccgt cgctggtccg atggtctgca ccaggctgtg gaagcgaaag
+   109021 aaggtgtgca gatccagaac gaaaaccaaa cgctggcttc gatcaccttc cagaactact
+   109081 tccgtctgta tgaaaaactg gcggggatga ccggtactgc tgataccgaa gctttcgaat
+   109141 ttagctcaat ctacaagctg gataccgtcg ttgttccgac caaccgtcca atgattcgta
+   109201 aagatctgcc ggacctggtc tacatgactg aagcggaaaa aattcaggcg atcattgaag
+   109261 atatcaaaga acgtactgcg aaaggccagc cggtgctggt gggtactatc tccatcgaaa
+   109321 aatcggagct ggtgtcaaac gaactgacca aagccggtat taagcacaac gtcctgaacg
+   109381 ccaaattcca cgccaacgaa gcggcgattg ttgctcaggc aggttatccg gctgcggtga
+   109441 ctatcgcgac caatatggcg ggtcgtggta cagatattgt gctcggtggt agctggcagg
+   109501 cagaagttgc cgcgctggaa aatccgaccg cagagcaaat tgaaaaaatt aaagccgact
+   109561 ggcaggtacg tcacgatgcg gtactggaag caggtggcct gcatatcatc ggtaccgagc
+   109621 gtcacgaatc ccgtcgtatc gataaccagt tgcgcggtcg ttctggtcgt cagggggatg
+   109681 ctggttcttc ccgtttctac ctgtcgatgg aagatgcgct gatgcgtatt tttgcttccg
+   109741 accgagtatc cggcatgatg cgtaaactgg gtatgaagcc aggcgaagcc attgaacacc
+   109801 cgtgggtgac taaagcgatt gccaacgccc agcgtaaagt tgaaagccgt aacttcgaca
+   109861 ttcgtaagca actgctggaa tatgatgacg tggctaacga tcagcgtcgc gccatttact
+   109921 cccagcgtaa cgaactgttg gatgtcagcg atgtgagcga aaccattaac agcattcgtg
+   109981 aagatgtgtt caaagcgacc attgatgcct acattccacc acagtcgctg gaagaaatgt
+   110041 gggatattcc ggggctgcag gaacgtctga agaacgattt cgacctcgat ttgccaattg
+   110101 ccgagtggct ggataaagaa ccagaactgc atgaagagac gctgcgtgac ggcattctgg
+   110161 cgcagtccat cgaagtgtat cagcgtaaag aagaagtggt tggtgctgag atgatgcgtc
+   110221 acttcgagaa aggcgtcatg ctgcaaacgc ttgactccct gtggaaagag cacctggcag
+   110281 cgatggacta tctgcgtcag ggtatccacc tgcgtggcta cgcacagaaa gatccgaagc
+   110341 aggaatacaa acgtgaatcg ttctccatgt ttgcagcgat gctggagtcg ttgaaatatg
+   110401 aagttatcag tacgctgagc aaagttcagg tacgtatgcc tgaagaggtt gaggagctgg
+   110461 aacaacagcg tcgtatggaa gccgagcgtt tagcgcaaat gcagcagctt agccatcagg
+   110521 atgacgactc tgcagccgca gctgcactgg cggcgcaaac cggagagcgc aaagtaggac
+   110581 gtaacgatcc ttgcccgtgc ggttctggta aaaaatacaa gcagtgccat ggccgcctgc
+   110641 aataaaagct aactgttgaa gtaaaaggcg caggattctg cgcctttttt ataggtttaa
+   110701 gacaatgaaa aagctgcaaa ttgcggtagg tattattcgc aacgagaaca atgaaatctt
+   110761 tataacgcgt cgcgcagcag atgcgcacat ggcgaataaa ctggagtttc ccggcggtaa
+   110821 aattgaaatg ggtgaaacgc cggaacaggc ggtggtgcgt gaacttcagg aagaagtcgg
+   110881 gattaccccc caacattttt cgctatttga aaaactggaa tatgaattcc cggacaggca
+   110941 tataacactg tggttttggc tggtcgaacg ctgggaaggg gagccgtggg gtaaagaagg
+   111001 gcaacccggt gagtggatgt cgctggtcgg tcttaatgcc gatgattttc cgccagccaa
+   111061 tgaaccggta attgcgaagc ttaaacgtct gtaggtcaga taaggcgttt tcgccgcatc
+   111121 cgacattcgc acacgatgcc tgatgcgacg ctggcgcgtc ttatcaggcc taaagggatt
+   111181 tctaactcat tgataaattt gtttttgtag gtcggataag gcgttcacgc cgcatccgac
+   111241 atttgcacaa gatgcctgat gcgacgctgt ccgcgtctta tcaggcctac gtgcggcatc
+   111301 agacaaatgt cactgctttg gttcttcgct ccagtcatcg ctttcggaaa gatcgccact
+   111361 gctggggatt cgtttttctt cagcagccca ttctccgagg tcgatcag
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/D12555.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/D12555.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/D12555.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,47 @@
+LOCUS       MUSBC05                  105 bp    DNA     linear   ROD 12-APR-2002
+DEFINITION  Mus spretus gene for beta-casein, 3'UTR.
+ACCESSION   D12555
+VERSION     D12555.1  GI:303649
+KEYWORDS    .
+SOURCE      Mus spretus (western wild mouse)
+  ORGANISM  Mus spretus
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Mus.
+REFERENCE   1  (bases 1 to 105)
+  AUTHORS   Takahashi,N. and Ko,M.S.
+  TITLE     The short 3'-end region of complementary DNAs as PCR-based
+            polymorphic markers for an expression map of the mouse genome
+  JOURNAL   Genomics 16 (1), 161-168 (1993)
+  MEDLINE   93252372
+   PUBMED   8486351
+COMMENT     Submitted (06-JUL-1993) to DDBJ by:
+            Minoru S.H. Ko
+            ERATO Research Development Corporation of Japan
+            (JRDC)
+            5-9-6 Tohkohdai, Tsukuba
+            Ibaraki 300-26
+            Japan
+            Phone:  0298-47-5531
+            Fax:    0298-47-5421.
+FEATURES             Location/Qualifiers
+     source          1..105
+                     /organism="Mus spretus"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:10096"
+     3'UTR           <1..>105
+                     /note="beta-casein
+                     genomic DNA sequence corresponding to a part of the 3'UTR
+                     of beta-caseine gene, MMBCASE (bases 7015 - 7121)"
+     variation       69^70
+                     /note="A in MMBCASE"
+                     /replace="a"
+     variation       103^102
+                     /note="T in MMBCASE
+                     deleted in CAST/Ei (M. m. castaneus)"
+                     /replace="t"
+BASE COUNT       37 a      7 c     17 g     44 t
+ORIGIN      
+        1 agttatatta caggaatttt ataagtgttc aatatggagt tgaaaatgca agtcaataat
+       61 gtatacaaat agtttgtgaa aaattggatt ttctattttt ttctt
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/DQ018368.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/DQ018368.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/DQ018368.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,55 @@
+LOCUS       DQ018368                 523 bp    DNA     linear   PLN 23-MAY-2005
+DEFINITION  (Populus tomentosa x P. bolleana) x P. tomentosa var. truncata
+            BS-LRR type disease resistance protein (RGA6) gene, partial cds.
+ACCESSION   DQ018368
+VERSION     DQ018368.1  GI:66271013
+KEYWORDS    .
+SOURCE      (Populus tomentosa x P. bolleana) x P. tomentosa var. truncata
+  ORGANISM  (Populus tomentosa x P. bolleana) x P. tomentosa var. truncata
+            Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
+            Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons;
+            rosids; eurosids I; Malpighiales; Salicaceae; Saliceae; Populus.
+REFERENCE   1  (bases 1 to 523)
+  AUTHORS   Zhang,Q., Lin,S.Z., Lin,Y.Z., Zhou,Y.L., Zhang,Z.Y., Zheng,H.Q.,
+            Chen,J.B., Wang,Z.L., Qiao,M.J., Wang,X. and Zhu,B.Q.
+  TITLE     Characterization and cloning of disease resistance gene from poplar
+  JOURNAL   Unpublished
+REFERENCE   2  (bases 1 to 523)
+  AUTHORS   Zhang,Q., Lin,S.Z., Lin,Y.Z., Zhou,Y.L. and Zhang,Z.Y.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (26-APR-2005) Key Laboratory for Genetics and Breeding in
+            Forest Trees and Ornamental Plants, MOE, Beijing Forestry
+            University, Box 118, Qinghuadong Road, Beijing 100083, P.R.China
+FEATURES             Location/Qualifiers
+     source          1..523
+                     /organism="(Populus tomentosa x P. bolleana) x P.
+                     tomentosa var. truncata"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:328805"
+     gene            <1..>523
+                     /gene="RGA6"
+     mRNA            <1..>523
+                     /gene="RGA6"
+                     /product="BS-LRR type disease resistance protein"
+     CDS             <1..>523
+                     /gene="RGA6"
+                     /codon_start=1
+                     /product="BS-LRR type disease resistance protein"
+                     /protein_id="AAY43785.1"
+                     /db_xref="GI:66271014"
+                     /translation="GMGGIGKTTVARVVYDRIRWQFEGSCFLANVREDLAKKGGQRRL
+                     QEQLLSEILMERANICDSSRGIEMIKRRLQRKKILVVLDDVDDRKQLESLAAESKWFG
+                     PESRIIITSRDKQVLTRNGVTRIYEAEKLNDDDALMLFSQKAFKKDQPVEDFVKLSKQ
+                     VVGYANGPSTCPQS"
+ORIGIN      
+        1 gggatggggg gtataggtaa gactactgtt gcaagggtag tatatgatag gattcgttgg
+       61 caatttgaag gtagctgttt cttagcaaat gtcagagaag atcttgctaa gaaaggtgga
+      121 caacgccgtt tacaggagca acttctttct gaaatcttaa tggaacgtgc taatatatgt
+      181 gattcttcta gaggaattga aatgataaag cggaggttac aacgtaaaaa gattcttgtt
+      241 gttcttgatg atgtagatga ccgtaaacaa ctagaatccc tggctgcgga gagtaaatgg
+      301 tttggtccag agagtagaat tatcataaca agcagagata aacaagtgtt gactagaaat
+      361 ggtgttacta gaatttatga ggctgagaaa ttgaatgatg atgatgctct tatgttgttt
+      421 agccagaaag ctttcaaaaa agaccaacct gttgaggatt ttgtgaaact atccaagcaa
+      481 gttgtgggtt atgctaatgg gccttccact tgccctcaaa gtc
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ECAPAH02.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ECAPAH02.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ECAPAH02.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3633 @@
+ID   ECAPAH02   standard; DNA; PRO; 111408 BP.
+XX
+AC   D10483; J01597; J01683; J01706; K01298; K01990; M10420; M10611; M12544;
+AC   V00259; X04711; X54847; X54945; X55034; X56742;
+XX
+SV   D10483.2
+XX
+DT   06-AUG-1992 (Rel. 33, Created)
+DT   03-JUN-2002 (Rel. 72, Last updated, Version 62)
+XX
+DE   Escherichia coli genomic DNA. (0.0 - 2.4 min).
+XX
+KW   ant; antO; apaG; apaH; araA; araB; araC; araD; asmB; azi; brnP; caiA; caiB;
+KW   caiC; caiD; caiE; caiF; caiT; carA; carB; chlG;
+KW   Complete and shotgun sequencing; dapB; ddl; ddlB; dinA; divA; dnaJ; dnaK;
+KW   envA; fixA; fixB; fixC; folA; fruR; ftsA; ftsI; ftsL; ftsQ; ftsW; ftsZ;
+KW   gef; groP; hepA; htgA; htpY; ileS; ilvH; ilvI; imp; kefC; leuA; leuB; leuC;
+KW   leuD; leuLP; lpxC; lspA; lueO; lytB; mog; mra; mraY; murC; murD; murE;
+KW   murF; murG; murX; mutT; nhaA; nhaR; ostA; pbpB; pdxA; pea; polB; prlD;
+KW   pyrA; rpsT; secA; sfiB; shl; sulB; surA; talB; tbpA; thrA; thrA1; thrA2;
+KW   thrB; thrC; tmrA; trkC; yaaA; yaaC; yaaD; yaaF; yaaI; yaaJ; yaaT; yaaU;
+KW   yaaV; yabB; yabC; yabE; yabF; yabH; yabI; yabJ; yabK; yabM; yabN; yabO;
+KW   yabP; yabQ; yacA; yacG.
+XX
+OS   Escherichia coli
+OC   Bacteria; Proteobacteria; gamma subdivision; Enterobacteriaceae;
+OC   Escherichia.
+XX
+RN   [1]
+RP   1-111408
+RA   Mori H.;
+RT   ;
+RL   Submitted (18-FEB-1992) to the EMBL/GenBank/DDBJ databases.
+RL   Hirotada Mori, Institute for Virus Research, Kyoto University, Genetics and
+RL   Molecular Biology; 53 Shogoin Kawara-Machi, Sakyo-ku, Kyoto 606, Japan
+RL   (E-mail:e52985 at sakura.kudpc.kyoto-u.ac.jp, Tel:075-751-4042,
+RL   Fax:075-761-5626)
+XX
+RN   [2]
+RX   MEDLINE; 79005683.
+RA   Smith B.R., Schleif R.;
+RT   "Nucleotide sequence of the L-arabinose regulatory region of Escherichia
+RT   coli K12";
+RL   J. Biol. Chem. 253:6931-6933(1978).
+XX
+RN   [3]
+RX   MEDLINE; 78137003.
+RA   Ohtsubo H., Ohtsubo E.;
+RT   "Nucleotide sequence of an insertion element, IS1";
+RL   Proc. Natl. Acad. Sci. U.S.A. 75:615-619(1978).
+XX
+RN   [4]
+RX   MEDLINE; 79116194.
+RA   Greenfield L., Boone T., Wilcox G.;
+RT   "DNA sequence of the araBAD promoter in Escherichia coli B/r";
+RL   Proc. Natl. Acad. Sci. U.S.A. 75:4724-4728(1978).
+XX
+RN   [5]
+RX   MEDLINE; 79177885.
+RA   Johnsrud L.;
+RT   "DNA sequence of the transposable element IS1";
+RL   Mol. Gen. Genet. 169:213-218(1979).
+XX
+RN   [6]
+RX   MEDLINE; 81053692.
+RA   Calvo J.M., Smith D.R.;
+RT   "Nucleotide sequence of the E. coli gene coding for dihydrofolate
+RT   reductase";
+RL   Nucleic Acids Res. 8:2255-2274(1980).
+XX
+RN   [7]
+RX   MEDLINE; 81124262.
+RA   Miyada C.G., Horwitz A.H., Cass L.G., Timko J., Wilcox G.;
+RT   "DNA sequence of the araC regulatory gene from Escherichia coli B/r";
+RL   Nucleic Acids Res. 8:5267-5274(1980).
+XX
+RN   [8]
+RX   MEDLINE; 81013881.
+RA   Ogden S., Haggerty D., Stoner C.M., Kolodrubetz D., Schleif R.;
+RT   "The Escherichia coli L-arabinose operon: binding sites of the regulatory
+RT   proteins and a mechanism of positive and negative regulation";
+RL   Proc. Natl. Acad. Sci. U.S.A. 77:3346-3350(1980).
+XX
+RN   [9]
+RX   MEDLINE; 81077247.
+RA   Katina M., Cossart P., Sibilli L., Saint-Girons I.;
+RT   "Nucleotide sequence of the thrA gene of Escherichia coli";
+RL   Proc. Natl. Acad. Sci. U.S.A. 77:5730-5733(1980).
+XX
+RN   [10]
+RX   MEDLINE; 81264207.
+RA   Mackie G.A.;
+RT   "Nucleotide sequence of the gene for ribosomal protein S20 and its flanking
+RT   regions";
+RL   J. Biol. Chem. 256:8177-8182(1981).
+XX
+RN   [11]
+RX   MEDLINE; 81150470.
+RA   Cossart P., Katinka M., Yaniv M.;
+RT   "Nucleotide sequence of the thrB gene of E. coli, and its two adjacent
+RT   regions; the thrAB and thrBC junctions";
+RL   Nucleic Acids Res. 9:339-347(1981).
+XX
+RN   [12]
+RX   MEDLINE; 81199399.
+RA   Lee N.L., Gielow W.O., Wallace R.G.;
+RT   "Mechanism of araC autoregulation and the domains of two overlapping
+RT   promoters, pC and pBAD, in the L-arabinose regulatory region of Escherichia
+RT   coli";
+RL   Proc. Natl. Acad. Sci. U.S.A. 78:752-756(1981).
+XX
+RN   [13]
+RX   MEDLINE; 82216830.
+RA   Stoner C.M., Schleif R.;
+RT   "Is the amino acid but not nucleotide sequence of the Escherichia coli araC
+RT   gene conserved?";
+RL   J. Mol. Biol. 154:649-652(1982).
+XX
+RN   [14]
+RX   MEDLINE; 83116968.
+RA   Gilson E., Nikaido H., Hofnung M.;
+RT   "Sequence of the malK gene in E.coli K12";
+RL   Nucleic Acids Res. 10:7449-7458(1982).
+XX
+RN   [15]
+RX   MEDLINE; 84069770.
+RA   Parsot C., Cossart P., Saint-Girons I., Cohen G.N.;
+RT   "Nucleotide sequence of thrC and of the transcription termination region of
+RT   the threonine operon in Escherichia coli K12";
+RL   Nucleic Acids Res. 11:7331-7345(1983).
+XX
+RN   [16]
+RX   MEDLINE; 85054974.
+RA   Bouvier J., Richaud C., Richaud F., Patte J., Stragier P.;
+RT   "Nucleotide sequence and expression of the Escherichia coli dapB gene";
+RL   J. Biol. Chem. 259:14829-14834(1984).
+XX
+RN   [17]
+RX   MEDLINE; 84144800.
+RA   Bardwell J.C., Craig E.A.;
+RT   "Major heat shock gene of drosophila and the Escherichia coli heatinducible
+RT   dnaK gene are homologous";
+RL   Proc. Natl. Acad. Sci. U.S.A. 81:848-852(1984).
+XX
+RN   [18]
+RX   MEDLINE; 84222028.
+RA   Innis M.A., Tokunaga M., Williams M.E., Loranger J.M., Chang S., Chang S.,
+RA   Wu H.C.;
+RT   "Nucleotide sequence of the Escherichia coli prolipoprotein signal
+RT   peptidase (lsp) gene";
+RL   Proc. Natl. Acad. Sci. U.S.A. 81:3708-3712(1984).
+XX
+RN   [19]
+RX   MEDLINE; 84248073.
+RA   Bouvier J., Patte J., Stragier P.;
+RT   "Multiple regulatory signals in the control region of the Escherichia coli
+RT   carAB operon";
+RL   Proc. Natl. Acad. Sci. U.S.A. 81:4139-4143(1984).
+XX
+RN   [20]
+RX   MEDLINE; 86030702.
+RA   Chong P., Hui I., Loo T., Gillam S.;
+RT   "Structural analysis of a new GC-specific insertion element IS186";
+RL   FEBS Lett. 192:47-52(1985).
+XX
+RN   [21]
+RX   MEDLINE; 85182715.
+RA   Kamio Y., Lin C., Regue M., Wu H.C.;
+RT   "Characterization of the ileX-lsp operon in Escherichia coli:
+RT   Identification of an open reading frame upstream of the ileX gene and
+RT   potential promoter(s) for the ileS-lsp operon";
+RL   J. Biol. Chem. 260:5616-5620(1985).
+XX
+RN   [22]
+RX   MEDLINE; 85295470.
+RA   Friedberg D., Rosenthal E.R., Jones J.W., Calvo J.M.;
+RT   "Characterization of the 3' end of the leucine operon of Salmonella
+RT   typhimurium";
+RL   Mol. Gen. Genet. 199:486-494(1985).
+XX
+RN   [23]
+RX   MEDLINE; 85190560.
+RA   Cowing D.W.;
+RT   "Consensus sequence for Escherichia coli heatshock gene promoters";
+RL   Proc. Natl. Acad. Sci. U.S.A. 82:2679-2683(1985).
+XX
+RN   [24]
+RA   Sekiguchi T., Ortega-Cesena J., Nosoh Y., Ohashi S., Tsuda K., Kanaya S.;
+RT   "DNA and amino-acid sequences of 3-isopropylmalate dehydrogenase of
+RT   Bacillus coagulans. Comparison with the enzymes of Saccharomyces cerevisiae
+RT   and Thermus thermophilus";
+RL   Biochim. Biophys. Acta 867:36-44(1986).
+XX
+RN   [25]
+RX   MEDLINE; 87163495.
+RA   Lee N., Gielow W., Martin R., Hamilton E., Fowler A.;
+RT   "The organization of the araBAD operaon of Escherichia coli";
+RL   Gene 47:231-244(1986).
+XX
+RN   [26]
+RX   MEDLINE; 86111849.
+RA   Ohki M., Tamura F., Nishimura S., Uchida H.;
+RT   "Nucleotide sequence of the Escherichia coli dnaJ gene and purification of
+RT   the gene product";
+RL   J. Biol. Chem. 261:1778-1781(1986).
+XX
+RN   [27]
+RX   MEDLINE; 87172305.
+RA   Blanchin-Roland S., Blanquet S., Schmitter J., Fayat G.;
+RT   "The gene for Escherichia coli diadenosine tetraphosphatase is located
+RT   immediately clockwise to folA and forms an operon with ksgA";
+RL   Mol. Gen. Genet. 205:515-522(1986).
+XX
+RN   [28]
+RX   MEDLINE; 87016337.
+RA   Mackie G.A.;
+RT   "Structure of the DNA distal to the gene for ribosomal protein S20 in
+RT   Escherichia coli K12: Presence of a strong terminator and an IS1 element";
+RL   Nucleic Acids Res. 14:6965-6981(1986).
+XX
+RN   [29]
+RX   MEDLINE; 86287338.
+RA   Birnbaum M.J., Haspel H.C., Rosen O.M.;
+RT   "Cloning and characterization of a cDNA encoding the rat brain
+RT   glucose-transporter protein";
+RL   Proc. Natl. Acad. Sci. U.S.A. 83:5784-5788(1986).
+XX
+RN   [30]
+RX   MEDLINE; 87308226.
+RA   Nonet M.L., Marvel C.C., Tolan D.R.;
+RT   "The hisT-purF region of the Escherichia coli K-12 chromosome.
+RT   Identification of additional genes of the hisT and purF operons";
+RL   J. Biol. Chem. 262:12209-12217(1987).
+XX
+RN   [31]
+RX   MEDLINE; 87146339.
+RA   Gronger P., Manian S.S., Reilander H., O'Connell M., Priefer U.B.,
+RA   Puhler A.;
+RT   "Organization and partial sequence of a DNA region of the Rhizobium
+RT   leguminosarum symbiotic plasmid pRL6JI containing the genes fixABC, nifA,
+RT   nifB and a novel operon reading frame";
+RL   Nucleic Acids Res. 15:31-49(1987).
+XX
+RN   [32]
+RX   MEDLINE; 89005119.
+RA   Lozoya E., Hoffmann H., Douglas C., Schulz W., Scheel D., Hahlbrock K.;
+RT   "Primary structures and catalytic properties of isoenzymes encoded by the
+RT   two 4-coumarate:CoA ligase genes in parsley";
+RL   Eur. J. Biochem. 176:661-667(1988).
+XX
+RN   [33]
+RX   MEDLINE; 88273145.
+RA   Karpel R., Olami Y., Taglicht D., Schuldiner S., Padan E.;
+RT   "Sequencing of the gene ant which affects the Na+/H+ antiporter activity in
+RT   Escherichia coli";
+RL   J. Biol. Chem. 263:10408-10414(1988).
+XX
+RN   [34]
+RX   MEDLINE; 88330879.
+RA   Jaiswal A.K., McBride O.W., Adesnik M., Nebert D.W.;
+RT   "Human dioxin-inducible cytosolic NAD(P)H:menadione oxidoreductase. cDNA
+RT   sequence and localization of gene to chromosome 16";
+RL   J. Biol. Chem. 263:13572-13578(1988).
+XX
+RN   [35]
+RX   MEDLINE; 90032688.
+RA   Minami-Ishii N., Taketani S., Osumi T., Hashimoto T.;
+RT   "Molecular cloning and sequence analysis of the cDNA for rat mitochondrial
+RT   enoyl-CoA hydratase. Structural and evolutionary relationships linked to
+RT   the bifunctional enzyme of the peroxisomal beta-oxidation system";
+RL   Eur. J. Biochem. 185:73-78(1989).
+XX
+RN   [36]
+RX   MEDLINE; 89252065.
+RA   Sato S., Nakada Y., Shiratsuchi A.;
+RT   "IS421, a new insetion sequence in Escherichia coli";
+RL   FEBS Lett. 249:21-26(1989).
+XX
+RN   [37]
+RX   MEDLINE; 89359108.
+RA   Roa B.B., Connolly D.M., Winkler M.E.;
+RT   "Overlap between pdxA and ksgA in the complex pdxA-ksgA-apaG-apaH operon of
+RT   Escherichia coli K-12";
+RL   J. Bacteriol. 171:4767-4777(1989).
+XX
+RN   [38]
+RX   MEDLINE; 89380240.
+RA   Matsubara Y., Indo Y., Naito E., Ozasa H., Glassberg R., Vockley J.,
+RA   Ikeda Y., Kraus J., Tanaka K.;
+RT   "Molecular cloning and nucleotide sequence of cDNAs encoding the precursors
+RT   of rat long chain acyl-coenzyme A, short chain acyl-coenzyme A, and
+RT   isovaleryl-coenzyme A dehydrogenases. Sequence homology of four enzymes of
+RT   the acyl-CoA dehydrogenase famil";
+RL   J. Biol. Chem. 264:16321-16331(1989).
+XX
+RN   [39]
+RA   Stragier P.;
+RT   ;
+RL   Unpublished.
+XX
+RN   [40]
+RA   Ishino Y.;
+RT   ;
+RL   Unpublished.
+XX
+RN   [41]
+RA   Chen H., Sun Y., Stark T., Beattie W., Moses R.E.;
+RT   "Nucleotide sequence and deletion analysis of the polB gene of E. coli";
+RL   DNA Cell Biol. 9:613-635(1990).
+XX
+RN   [42]
+RX   MEDLINE; 90235831.
+RA   Schaaff I., Hohmann S., Zimmermann F.K.;
+RT   "Molecular analysis of the structural gene for yeast transaldolase";
+RL   Eur. J. Biochem. 188:597-603(1990).
+XX
+RN   [43]
+RX   MEDLINE; 90130288.
+RA   Angerer A., Gaisser S., Braun V.;
+RT   "Nucleotide sequences of the sfuA, sfuB, and sfuC genes of Serratia
+RT   marcescens suggest a periplasmic-binding-protein-dependent iron transport
+RT   mechanism";
+RL   J. Bacteriol. 172:572-578(1990).
+XX
+RN   [44]
+RX   MEDLINE; 91072253.
+RA   Mallonee D.H., White W.B., Hylemon P.B.;
+RT   "Cloning and sequencing of a bile acid-inducible operon from Eubacterium
+RT   sp. strain VPI 12708";
+RL   J. Bacteriol. 172:7011-7019(1990).
+XX
+RN   [45]
+RX   MEDLINE; 90206796.
+RA   Ricca E., Calvo J.M.;
+RT   "The nucleotide sequence of leuA from Salmonella typhimurium";
+RL   Nucleic Acids Res. 18:1290-1290(1990).
+XX
+RN   [46]
+RX   MEDLINE; 90272436.
+RA   Rosenthal E.R., Calvo J.M.;
+RT   "The nucleotide sequence of leuC from Salmonella typhimurium";
+RL   Nucleic Acids Res. 18:3072-3072(1990).
+XX
+RN   [47]
+RX   MEDLINE; 91088346.
+RA   Chen H., Lawrence C.B., Bryan S.K., Moses R.E.;
+RT   "Aphidicolin inhibits DNA polymerase II of Escherichia coli, an alpha-like
+RT   DNA polymerase";
+RL   Nucleic Acids Res. 18:7185-7186(1990).
+XX
+RN   [48]
+RA   Ayala J.A.;
+RT   "Regulation of transcription at 2-minute region of the genetic map of
+RT   Escherichia coli";
+RL   Unpublished.
+XX
+RN   [49]
+RX   MEDLINE; 91192599.
+RA   Umeda M., Ohtsubo E.;
+RT   "Four types of IS1 with differences in nucleotide sequence reside in the
+RT   Escherichia coli K-12 chromosome";
+RL   Gene 98:1-5(1991).
+XX
+RN   [50]
+RX   MEDLINE; 91203829.
+RA   Arigoni F., Kaminski P.A., Hennecke H., Elmerich C.;
+RT   "Nucleotide sequence of the fixABC region of Azorhizobium caulinodans
+RT   ORS571: similarity of the fixB product with eukaryotic flavoproteins,
+RT   characterization of fixX, and identification of nifW";
+RL   Mol. Gen. Genet. 225:514-520(1991).
+XX
+RN   [51]
+RX   MEDLINE; 91260444.
+RA   Munro A.W., Ritchie G.Y., Lamb A.J., Douglas R.M., Booth I.R.;
+RT   "The cloning and DNA sequence of the gene for the glutathione-regulated
+RT   potassium-efflux system KefC of Escherichia coli";
+RL   Mol. Microbiol. 5:607-616(1991).
+XX
+RN   [52]
+RX   MEDLINE; 92048480.
+RA   Poulsen L.K., Refn A., Molin S., Andersson P.;
+RT   "Topographic analysis of the toxic Gef protein from Escherichia coli";
+RL   Mol. Microbiol. 5:1627-1637(1991).
+XX
+RN   [53]
+RX   MEDLINE; 92048481.
+RA   Poulsen L.K., Refn A., Molin S., Andersson P.;
+RT   "The gef gene from Escherichia coli is regulated at the level of
+RT   translation";
+RL   Mol. Microbiol. 5:1639-1648(1991).
+XX
+RN   [54]
+RX   MEDLINE; 91187617.
+RA   Bouvier J., Stragier P.;
+RT   "Nucleotide sequence of the lsp-dapB interval in Escherichia coli";
+RL   Nucleic Acids Res. 19:180-180(1991).
+XX
+RN   [55]
+RP   1-111401
+RX   MEDLINE; 92334977.
+RA   Yura T., Mori H., Nagai H., Nagata T., Ishihama A., Fujita N., Isono K.,
+RA   Mizobuchi K., Nakata A.;
+RT   "Systematic sequencing of the Escherichia coli genome: analysis of the
+RT   0-2.4min region";
+RL   Nucleic Acids Res. 20:3305-3308(1992).
+XX
+RN   [56]
+RX   MEDLINE; 94261430.
+RA   Fujita N., Mori H., Yura T., Ishihama A.;
+RT   "Systematic sequencing of the Escherichia coli genome: analysis of the
+RT   2.4-4.1 min (110,917-193,643 bp) region";
+RL   Nucleic Acids Res. 22:1637-1639(1994).
+XX
+DR   SWISS-PROT; P00907; CARA_ECOLI.
+DR   SWISS-PROT; P00968; CARB_ECOLI.
+DR   SWISS-PROT; P09151; LEU1_ECOLI.
+DR   SWISS-PROT; P28694; MOG_ECOLI.
+DR   SWISS-PROT; P28695; YAAH_ECOLI.
+DR   SWISS-PROT; P28696; YAAI_ECOLI.
+DR   SWISS-PROT; P28697; HTGA_ECOLI.
+DR   SWISS-PROT; P30127; LEU2_ECOLI.
+DR   SWISS-PROT; P30143; YAAJ_ECOLI.
+DR   SWISS-PROT; P31554; OSTA_ECOLI.
+DR   SWISS-PROT; P31675; SETA_ECOLI.
+DR   SWISS-PROT; P31680; DJLA_ECOLI.
+DR   SWISS-PROT; P33595; YABN_ECOLI.
+DR   SWISS-PROT; P39206; CAIE_ECOLI.
+DR   SWISS-PROT; P39219; RLUA_ECOLI.
+DR   SWISS-PROT; P39220; YABP_ECOLI.
+DR   SWISS-PROT; P39221; YABQ_ECOLI.
+DR   SWISS-PROT; P46145; YAAV_ECOLI.
+XX
+CC   Submitted (18-FEB-1992) to DDBJ by:
+CC   Hirotada Mori
+CC   Institute for Virus Research
+CC   Kyoto University
+CC   Kawahara-cho Shogoin Sakyo-ku
+CC   Kyoto 606
+CC   Phone:  075-751-4042
+CC   Fax:    075-761-5626
+CC   Sequence updated (21-DEC-1992) by:
+CC   Hirotada Mori
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..111408
+FT                   /db_xref="taxon:562"
+FT                   /note="Nucleotide position 1-111408 from the initiation
+FT                   site of ThrA (0 min.)."
+FT                   /sequenced_mol="DNA"
+FT                   /organism="Escherichia coli"
+FT                   /map="0-2.4 min"
+FT                   /strain="K-12"
+FT   CDS             1..2463
+FT                   /note="ORF_ID:o101#1"
+FT                   /note="similar to PIR Accession Number A00671"
+FT                   /transl_table=11
+FT                   /gene="thrA"
+FT                   /gene="thrA1"
+FT                   /gene="thrA2"
+FT                   /product="ThrA bifunctional enzyme"
+FT                   /protein_id="BAB96579.1"
+FT                   /translation="MRVLKFGGTSLANAERFLRVADILESNARQGQVATVLSAPAKITN
+FT                   HLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHV
+FT                   LHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLES
+FT                   TVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADC
+FT                   CEIWTDVNGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCL
+FT                   IKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMS
+FT                   RARISVVLITQSSSEYSISFCVPQSDCVRAERAMLEEFYLELKEGLLEPLAVAERLAII
+FT                   SVVGDGLRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQM
+FT                   LFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLN
+FT                   LENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVNCTSSQAVADQYADFLREGFHVVT
+FT                   PNKKANTSSMDYYHQLRYAAEKSRRKFLYDINVGAGLPVIENLQNLLNAGDELMKFSGI
+FT                   LSGSLSYIFGKLDEGMSFSEATRLAREMGYTEPDPRDDLSGMDVARKLLILARETGREL
+FT                   ELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDG
+FT                   VCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLR
+FT                   TLSWKLGV"
+FT   CDS             2465..3394
+FT                   /note="ORF_ID:o101#2"
+FT                   /note="similar to PIR Accession Number A00658"
+FT                   /transl_table=11
+FT                   /gene="thrB"
+FT                   /product="Homoserine kinase (EC  2.7.1.39)"
+FT                   /protein_id="BAB96580.1"
+FT                   /translation="MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAQTFS
+FT                   LNNLGRFADKLPSEPRENIVYQCWERFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVV
+FT                   AALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDIISQ
+FT                   QVQGLMSGCGCSRIRGLKSRRQKQGYLPAQYRRQDCIAHGRHLAGFIHACYSRQPELAA
+FT                   KLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVADW
+FT                   LGKNYLQNQEGFVHICRLDTAGARVLEN"
+FT   CDS             3395..4681
+FT                   /note="ORF_ID:o101#3"
+FT                   /note="similar to PIR Accession Number A01157"
+FT                   /transl_table=11
+FT                   /gene="thrC"
+FT                   /product="Threonine synthase (EC  4.2.99.2)"
+FT                   /protein_id="BAB96581.1"
+FT                   /translation="MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDE
+FT                   MLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTL
+FT                   AFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKIS
+FT                   PLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQ
+FT                   ICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPR
+FT                   FLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTM
+FT                   RELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKE
+FT                   LAERADLPLLSHNLPADFAALRKLMMNHQ"
+FT   CDS             4895..5191
+FT                   /note="ORF_ID:o101#4"
+FT                   /transl_table=11
+FT                   /protein_id="BAB96582.1"
+FT                   /translation="MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYY
+FT                   WDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHHKKAPHDHHGGHGPGKHHR"
+FT   CDS             complement(4971..5402)
+FT                   /note="ORF_ID:o101#5"
+FT                   /transl_table=11
+FT                   /protein_id="BAB96583.1"
+FT                   /translation="MMKIPPAMANWCLNATSSVKFAVNAGCGVNALSCLQKHADSIYCR
+FT                   INVGLIRRASVASGAECRIVTRHLSFSGDVCQDHDRRDDHEELSYGGAAVAAVRVGASD
+FT                   CLAIHNVVSTSRGRASDLHPSNSHDYHDRLSVILLTGLT"
+FT   CDS             complement(5344..6120)
+FT                   /note="ORF_ID:o101#6"
+FT                   /note="similar to PIR Accession Number S40534"
+FT                   /transl_table=11
+FT                   /gene="yaaA"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96584.1"
+FT                   /translation="MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQ
+FT                   ISTLMRISDKLAGINAARFHDWQPDFTPANARQAILAFKGDVYTGLQAETFSEDDFDFA
+FT                   QQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARGKDLYQFWGDIITNKLNEALAAQG
+FT                   DNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKKARGLMSRFIIEN
+FT                   RLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR"
+FT   CDS             complement(6190..7620)
+FT                   /note="ORF_ID:o101#7"
+FT                   /note="similar to SwissProt Accession Number P30143"
+FT                   /transl_table=11
+FT                   /gene="yaaJ"
+FT                   /product="Hypothetical 51.7 kd protein in thrC-talB
+FT                   intergenic region (ORF8)."
+FT                   /protein_id="BAB96585.1"
+FT                   /translation="MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGK
+FT                   SLKNSIHPQPGGLTSFQSLCTSLAARVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMA
+FT                   TSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMRWMGVLFAVFLLIAYGIIFSGVQA
+FT                   NAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPLMAIIWVLTSLVI
+FT                   CVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPNA
+FT                   AAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAM
+FT                   RVLMGSWGAEFVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIG
+FT                   GTLLSLPLMWQLADIIMACMAITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRY
+FT                   PDIGRQLSPDAWDDVSQE"
+FT   CDS             7899..8852
+FT                   /note="ORF_ID:o101#8"
+FT                   /note="similar to PIR Accession Number S40535"
+FT                   /transl_table=11
+FT                   /gene="talB"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96586.1"
+FT                   /translation="MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQI
+FT                   PEYRKLIDDAVAWAKQQSNDRAQQIVDATDKLAVNIGLEILKLVPGRISTEVDARLSYD
+FT                   TEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGIRAAEQLEKEGINCNLTLLFSFAQ
+FT                   ARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIYQYYKEHGYETVV
+FT                   MGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESEF
+FT                   LWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL"
+FT   CDS             9136..9552
+FT                   /note="ORF_ID:o102#1"
+FT                   /note="similar to PIR Accession Number B56688"
+FT                   /transl_table=11
+FT                   /gene="chlG"
+FT                   /gene="mog"
+FT                   /product="Molybdopterin biosynthesis Mog protein."
+FT                   /protein_id="BAB96587.1"
+FT                   /translation="MCELVDEMSCHLVLTTGGTGPARRDVTPDATLAVADREMPGFGEQ
+FT                   MRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIKETLEGVKDAEGNVVVHGIFAS
+FT                   VPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE"
+FT   CDS             complement(9584..10150)
+FT                   /note="ORF_ID:o102#2"
+FT                   /note="similar to PIR Accession Number E56688"
+FT                   /transl_table=11
+FT                   /product="hgtA 5'-region hypothetical protein 1"
+FT                   /protein_id="BAB96588.1"
+FT                   /translation="MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFY
+FT                   GGIAQIFAGLLEYKKGNTFGLTAFTSYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLG
+FT                   LWGVFTVFMFFGTLKGARVLQFVFFSLTVLFALLAIGNIAGNAAIIHFAGWIGLICGAS
+FT                   AIYLAMGEVLNEQFGRTVLPIGESH"
+FT   misc_feature    10479..10963
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o102#3"
+FT                   /note="similar to PIR Accession Number A40623"
+FT                   /gene="htgA"
+FT                   /gene="htpY"
+FT                   /product="Heat shock protein Y"
+FT   misc_feature    complement(11030..11435)
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o102#4"
+FT                   /note="similar to PIR Accession Number C56688"
+FT                   /gene="yaaI"
+FT                   /product="dnaK 5'-region hypothetical protein 1"
+FT   CDS             11812..13728
+FT                   /note="ORF_ID:o102#5"
+FT                   /note="similar to PIR Accession Number A03311"
+FT                   /transl_table=11
+FT                   /gene="dnaK"
+FT                   /product="DnaK protein"
+FT                   /protein_id="BAB96589.1"
+FT                   /translation="MGKIIGIDLGTTNSCVAIMDGTTPRVLENAEGDRTTPSIIAYTQD
+FT                   GETLVGQPAKRQAVTNPQNTLFAIKRLIGRRFQDEEVQRDVSIMPFKIIAADNGDAWVE
+FT                   VKGQKMAPPQISAEVLKKMKKTAEDYLGEPVTEAVITVPAYFNDAQRQATKDAGRIAGL
+FT                   EVKRIINEPTAAALAYGLDKGTGNRTIAVYDLGGGTFDISIIEIDEVDGEKTFEVLATN
+FT                   GDTHLGGEDFDSRLINYLVEEFKKDQGIDLRNDPLAMQRLKEAAEKAKIELSSAQQTDV
+FT                   NLPYITADATGPKHMNIKVTRAKLESLVEDLVNRSIEPLKVALQDAGLSVSDIDDVILV
+FT                   GGQTRMPMVQKKVAEFFGKEPRKDVNPDEAVAIGAAVQGGVLTGDVKDVLLLDVTPLSL
+FT                   GIETMGGVMTTLIAKNTTIPTKHSQVFSTAEDNQSAVTIHVLQGERKRAADNKSLGQFN
+FT                   LDGINPAPRGMPQIEVTFDIDADGILHVSAKDKNSGKEQKITIKASSGLNEDEIQKMVR
+FT                   DAEANAEADRKFEELVQTRNQGDHLLHSTRKQVEEAGDKLPADDKTAIESALTALETAL
+FT                   KGEDKAAIEAKMQELAQVSQKLMEIAQQQHAQQQTAGADASANNAKDDDVVDAEFEEVK
+FT                   DKK"
+FT   CDS             13817..14947
+FT                   /note="ORF_ID:o102#6"
+FT                   /note="similar to SwissProt Accession Number P08622"
+FT                   /transl_table=11
+FT                   /gene="dnaJ"
+FT                   /gene="groP"
+FT                   /product="DnaJ protein."
+FT                   /protein_id="BAB96590.1"
+FT                   /translation="MAKQDYYEILGVSKTAEEREIRKAYKRLAMKYHPDRNQGDKEAEA
+FT                   KFKEIKEAYEVLTDSQKRAAYDQYGHAAFEQGGMGGGGFGGGADFSDIFGDVFGDIFGG
+FT                   GRGRQRAARGADLRYNMELTLEEAVRGVTKEIRIPTLEECDVCHGSGAKPGTQPQTCPT
+FT                   CHGSGQVQMRQGFFAVQQTCPHCQGRGTLIKDPCNKCHGHGRVERSKTLSVKIPAGVDT
+FT                   GDRIRLAGEGEAGEHGAPAGDLYVQVQVKQHPIFEREGNNLYCEVPINFAMAALGGEIE
+FT                   VPTLDGRVKLKVPGETQTGKLFRMRGKGVKSVRGGAQGDLLCRVVVETPVGLNERQKQL
+FT                   LQELQESFGGPTGEHNSPRSKSFFDGVKKFFDDLTR"
+FT   CDS             complement(16400..16609)
+FT                   /note="ORF_ID:o102#7"
+FT                   /note="similar to PIR Accession Number S16473"
+FT                   /transl_table=11
+FT                   /gene="gef"
+FT                   /product="Gef protein"
+FT                   /protein_id="BAB96591.1"
+FT                   /translation="MLNTCRVPLTDRKVKEKRAMKQHKAMIVALIVICITAVVAALVTR
+FT                   KDLCEVHIRTGQTEVAVFTAYESE"
+FT   CDS             17138..18304
+FT                   /note="ORF_ID:o102#8"
+FT                   /note="similar to SwissProt Accession Number P13738"
+FT                   /transl_table=11
+FT                   /gene="ant"
+FT                   /gene="nhaA"
+FT                   /product="Na(+)/H(+) antiporter 1."
+FT                   /protein_id="BAB96592.1"
+FT                   /translation="MKHLHRFFSSDASGGIILIIAAILAMIMANSGATSGWYHDFLETP
+FT                   VQLRVGSLEINKNMLLWINDALMAVFFLLVGLEVKRELMQGSLASLRQAAFPVIAAIGG
+FT                   MIVPALLYLAFNYADPITREGWAIPAATDIAFALGVLALLGSRVPLALKIFLMALAIID
+FT                   DLGAIIIIALFYTNDLSMASLGVAAVAIAVLAVLNLCGARRTGVYILVGVVLWTAVLKS
+FT                   GVHATLAGVIVGFFIPLKEKHGRSPAKRLEHVLHPWVAYLILPLFAFANAGVSLQGVTL
+FT                   DGLTSILPLGIIAGLLIGKPLGISLFCWLALRLKLAHLPEGTTYQQIMVVGILCGIGFT
+FT                   MSIFIASLAFGSVDPELINWAKLGILVGSISSAVIGYSWLRVRLRPSV"
+FT   misc_feature    18364..19283
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o103#1"
+FT                   /note="similar to SwissProt Accession Number P10087"
+FT                   /gene="antO"
+FT                   /gene="nhaR"
+FT                   /product="Transcriptional activator protein NhaR."
+FT   CDS             complement(20478..20741)
+FT                   /note="ORF_ID:o103#2"
+FT                   /note="similar to PIR Accession Number A30425"
+FT                   /transl_table=11
+FT                   /gene="rpsT"
+FT                   /product="Ribosomal protein S20"
+FT                   /protein_id="BAB96593.1"
+FT                   /translation="MANIKSAKKRAIQSEKARKHNASRRSMMRTFIKKVYAAIEAGDKA
+FT                   AAQKAFNEMQPIVDRQAAKGLIHKNKAARHKANLTAQINKLA"
+FT   CDS             21070..22008
+FT                   /note="ORF_ID:o103#3"
+FT                   /note="similar to PIR Accession Number A22609"
+FT                   /transl_table=11
+FT                   /gene="yaaC"
+FT                   /product="Hypothetical 35k protein (ileS-lsp operon)"
+FT                   /protein_id="BAB96594.1"
+FT                   /translation="MKLIRGIHNLSQAPQEGCVLTIGNFDGVHRGHRALLQGLQEEGRK
+FT                   RNLPVMVMLFEPQPLELFATDKAPARLTRLREKLRYLAECGVDYVLCVRFDRRFAALTA
+FT                   QNFVSDLLVKHLRVKFLAVGDDFPLALVVKAISCYYRKLAWNTASISPVRKLFAEVACA
+FT                   SAARLRQALADDNLALAESLLGHPFAISGRVVHGDELGRTIGFPTANVPPRRQVSPVKG
+FT                   VYAVEVLGLGEKPLPGVANIGTRPTVAGIRQQLEVHLLDVAMDLYGRHIQVVLRKKIRN
+FT                   EQRFASLDELKAQIARDELTAREFFGLTKPA"
+FT   CDS             22051..24861
+FT                   /note="ORF_ID:o103#4"
+FT                   /note="similar to PIR Accession Number S40549"
+FT                   /transl_table=11
+FT                   /gene="ileS"
+FT                   /product="Isoleucine--tRNA ligase (EC 6.1.1.5)"
+FT                   /protein_id="BAB96595.1"
+FT                   /translation="MSDYKSTLNLPETGFPMRGDLAKREPGMLARWTDDDLYGIIRAAK
+FT                   KGKKTFILHDGPPYANGSIHIGHSVNKILKDIIVKSKGLSGYDSPYVPGWDCHGLPIEL
+FT                   KVEQEYGKPGEKFTAAEFRAKCREYAATQVDGQRKDFIRLGVLGDWSHPYLTMGLQNWK
+FT                   ANIIRALGKIIGNGHLHKGAKPVHWCVDCRSALAKRKLSITTKLLRPSTLLSRAVDQDA
+FT                   LKAKFAVSNVNGPISLVIWTTTPWTLPANRAISIAPDFDYALVQIDGQAVILAKDLVES
+FT                   VMQRIGVTDSRHGKRCGAGAAAFTHPFMGFDVPAILGDHVTLDAGTGAVHTAPGHGPDD
+FT                   YVIGQKYGLETANPVGPDGTYLPGTYPTLDGVNVFKANDIVAALLQEKGALLHVEKMQH
+FT                   SYPCCWRHKTPIIFRATPQWFVSMDQKGLRAQSLKEIKGVQWIPDWGQARIESMVANRP
+FT                   DWCISRQRTWGVPMSLFVHKDTEELHPRTLELMEEVAKRVEVDGIQAWWDLDAKEILGD
+FT                   EADQYVKVPDTLDVWFDSGSTHSSVVDVRPEFAGHAADMYLEGSDQHRGWFMSSLMIST
+FT                   AMKGKAPYRQVLTHGFTVDGQGRKMSKSIGNTVSPQDVMNKLGADILRLWVASTDYTGE
+FT                   MAVSDEILKRAADTYRRIRNTARFLLANLNGFDPAKDMVKRREMVVLDRWAVVVRKRHR
+FT                   KTSSRRTKHTISTKWYKRLMRFCSVEMGSFYLDIIKDRQYYAKGHSVARRSCQTALYHI
+FT                   AEALVRWMAPILSFTADEVWGYLPGEREKYVFTGEWYEGLFGLADSEAMNDAFWDELLK
+FT                   VRGEVNKVIEQARADKKVGGSLEAAVTLYAEPELSAKLTALGDELRFVLLTSGATVADY
+FT                   NDAPADAQQSEVLKGLKVALSKAEGEKCPRCWHYTQDVGKVAEHAEICGRCVSNVAGDG
+FT                   EKRKFA"
+FT   CDS             24861..25355
+FT                   /note="ORF_ID:o103#5"
+FT                   /note="similar to SwissProt Accession Number P00804"
+FT                   /transl_table=11
+FT                   /gene="lspA"
+FT                   /product="Lipoprotein signal peptidase (EC 3.4.23.36)
+FT                   (Prolipoprotein signal peptidase) (Signal peptidase II)
+FT                   (Spase II)."
+FT                   /protein_id="BAB96596.1"
+FT                   /translation="MSQSICSTGLRWLWLVVVVLIIDLGSKYLILQNFALGDTVPLFPS
+FT                   LNLHYARNYGAAFSFLADSGGWQRWFFAGIAIGISVILAVMMYRSKATQKLNNIAYALI
+FT                   IGGALGNLFDRLWHGFVVDMIDFYVGDWHFATFNLADTAICVGAALIVLEGFLPSRAKK
+FT                   Q"
+FT   CDS             25480..25929
+FT                   /note="ORF_ID:o103#6"
+FT                   /note="similar to PIR Accession Number JE0402"
+FT                   /transl_table=11
+FT                   /gene="yaaD"
+FT                   /product="Hypothetical 16.4K protein (lsp-dapB intergenic
+FT                   region)"
+FT                   /protein_id="BAB96597.1"
+FT                   /translation="MSESVQSNSAVLVHFTLKLDDGTTAESTRNNGKPALFRLGDASLS
+FT                   EGLEQHLLGLKVGDKTTFSLEPDAAFGVPSPDLIQYFSRREFMDAGEPEIGAIMLFTAM
+FT                   DGSEMPGVIREINGDSITVDFNHPLAGQTVHFDIEVLEIDPALEA"
+FT   CDS             25931..26881
+FT                   /note="ORF_ID:o103#7"
+FT                   /note="similar to PIR Accession Number JE0403"
+FT                   /transl_table=11
+FT                   /gene="lytB"
+FT                   /product="Hypothetical 34.8k protein (lsp-dapB intergenic
+FT                   region)"
+FT                   /protein_id="BAB96598.1"
+FT                   /translation="MQILLANPRGFCAGVDRAISIVENALAIYGAPIYVRHEVVHNRYV
+FT                   VDSLRERGAIFIEQISEVPDGAILIFSAHGVSQAVRNEAKSRDLTVFDATCPLVTKVHM
+FT                   EVARASRRGEESILIGHAGHPEVEGTMGQYSNPEGGMYLVESPDDVWKLTVKNEEKLSF
+FT                   MTQTTLSVDDTSDVIDALRKRFPKIVGPRKDDICYATTNRQEAVRALAEQAEVVLVVGS
+FT                   KNSSNSNRLAELAQRMGKRAFLIDDAKDIQEEWVKEVKCVGVTAGASAPDILVQNVVAR
+FT                   LQQLGGGEAIPLEGREENIVFEVPKELRVDIREVD"
+FT   CDS             26947..27861
+FT                   /note="ORF_ID:o103#8"
+FT                   /note="similar to PIR Accession Number JE0404"
+FT                   /transl_table=11
+FT                   /gene="yaaF"
+FT                   /product="Hypothetical 32.6k protein (lsp-dapB intergenic
+FT                   region)"
+FT                   /protein_id="BAB96599.1"
+FT                   /translation="MRLPIFLDTDPGIDDAVAIAAAIFAPELDLQLMTTVAGNVSVEKT
+FT                   TRNALQLLHFWNAEIPLAQGAAVPLVRAPRDAASVHGESGMAGYDFVEHNRKPLGIPAF
+FT                   LAIRDALMRAPEPVTLVAIGPLTNIALLLSQCPECKPYIRRLVIMGGSAGRGNCTPNAE
+FT                   FNIAADPEAAACVFRSGIEIVMCGLDVTNQAILTPDYLSTLPQLNRTGKMLHALFSHYR
+FT                   SGSMQSGLRMHDLCAIAWLVRPDLFTLKPCFVAVETQGEFTSGTTVVDIDGCLGKPANV
+FT                   QVALDLDVKGFQQWVAEVLALAS"
+FT   CDS             28028..28849
+FT                   /note="ORF_ID:o103#9"
+FT                   /note="similar to SwissProt Accession Number P04036"
+FT                   /transl_table=11
+FT                   /gene="dapB"
+FT                   /product="Dihydrodipicolinate reductase (EC   1.3.1.26)."
+FT                   /protein_id="BAB96600.1"
+FT                   /translation="MHDANIRVAIAGAGGRMGRQLIQAALALEGVQLGAALEREGSSLL
+FT                   GSDAGELAGAGKTGVTVQSSLDAVKDDFDVFIDFTRPEGTLNHLAFCRQHGKGMVIGTT
+FT                   GFDEAGKQAIRDAAADIAIVFAANFSVGVNVMLKLLEKAAKVMGDYTDIEIIEAHHRHK
+FT                   VDAPSGTALAMGEAIAHALDKDLKDCAVYSREGHTGERVPGTIGFATVRAGDIVGEHTA
+FT                   MFADIGERLEITHKASSRMTFANGAVRSALWLSGKESGLFDMRDVLDLNNL"
+FT   CDS             29305..30453
+FT                   /db_xref="SWISS-PROT:P00907"
+FT                   /note="ORF_ID:o103#10"
+FT                   /note="similar to SwissProt Accession Number P00907"
+FT                   /transl_table=11
+FT                   /gene="carA"
+FT                   /gene="pyrA"
+FT                   /product="Carbamoyl-phosphate synthase small chain (EC
+FT                   6.3.5.5) (carbamoyl-phosphate synthetase glutamine chain)."
+FT                   /protein_id="BAB96601.1"
+FT                   /translation="MIKSALLVLEDGTQFHGRAIGATGSAVGEVVFNTSMTGYQEILTD
+FT                   PSYSRQIVTLTYPHIGNVGTNDADEESSQVHAQGLVIRDLPLIASNFRNTEDLSSYLKR
+FT                   HNIVAIADIDTRKLTRLLREKGAQNGCIIAGDNPDAALALEKARAFPGLNGMDLAKEVT
+FT                   TAEAYSWTQGSWTLTGGLPEAKKEDELPFHVVAYDFGAKRNILRMLVDRGCRLTIVPAQ
+FT                   TSAEDVLKMNPDGIFLSNGPGDPAPCDYAITAIQKFLETDIPVFGICLGHQLLALASGA
+FT                   KTVKMKFGHHGGNHPVKDVEKNVVMITAQNHGFAVDEATLPANLRVTHKSLFDGTLQGI
+FT                   HRTDKPAFSFQGHPEASPGPHDAAPLFDHFIELIEQYRKTAK"
+FT   CDS             30471..33692
+FT                   /db_xref="SWISS-PROT:P00968"
+FT                   /note="ORF_ID:o103#11"
+FT                   /note="similar to PIR Accession Number A01198"
+FT                   /transl_table=11
+FT                   /gene="carB"
+FT                   /product="Carbamoyl-phosphate synthase
+FT                   (glutamine-hydrolyzing) (EC   6.3.5.5) large chain"
+FT                   /protein_id="BAB96602.1"
+FT                   /translation="MPKRTDIKSILILGAGPIVIGQACEFDYSGAQACKALREEGYRVI
+FT                   LVNSNPATIMTDPEMADATYIEPIHWEVVRKIIEKERPDAVLPTMGGQTALNCALELER
+FT                   QGVLEEFGVTMIGATADAIDKAEDRRRFDVAMKKIGLETARSGIAHTMEEALAVAADVG
+FT                   FPCIIRPSFTMGGSGGGIAYNREEFEEICARGLDLSPTKELLIDESLIGWKEYEMEVVR
+FT                   DKNDNCIIVCSIENFDAMGIHTGDSITVAPAQTLTDKEYQIMRNASMAVLREIGVETGG
+FT                   SNVQFAVNPKNGRLIVIEMNPRVSRSSALASKATGFPIAKVAAKLAVGYTLDELMNDIT
+FT                   GGRTPASFEPSIDYVVTKIPRFNFEKFAGANDRLTTQMKSVGEVMAIGRTQQESLQKAL
+FT                   RGLEVGATGFDPKVSLDDPEALTKIRRELKDAGADRIWYIADAFRAGLSVDGVFNLTNI
+FT                   DRWFLVQIEELVRLEEKVAEVGITGLNADFLRQLKRKGFADARLAKLAGVREAEIRKLR
+FT                   DQYDLHPVYKRVDTCAAEFATDTAYMYSTYEEECEANPSTDREKIMVLGGGPNRIGQGI
+FT                   EFDYCCVHASLALREDGYETIMVNCNPETVSTDYDTSDRLYFEPVTLEDVLEIVRIEKP
+FT                   KGVIVQYGGQTPLKLARALEAAGVPVIGTSPDAIDRAEDRERFQHAVERLKLKQPANAT
+FT                   VTAIEMAVEKAKEIGYPLVVRPSYVLGGRAMEIVYDEADLRRYFQTAVSVSNDAPVLLD
+FT                   HFLDDAVEVDVDAICDGEMVLIGGIMEHIEQAGVHSGDSACSLPAYTLSQEIQDVMRQQ
+FT                   VQKLAFELQVRGLMNVQFAVKNNEVYLIEVNPRAARTVPFVSKATGVPLAKVAARVMAG
+FT                   KSLAEQGVTKEVIPPYYSVKEVVLPFNKFPGVDPLLGPEMRSTGEVMGVGRTFAEAFAK
+FT                   AQLGSNSTMKKHGRALLSVREGDKERVVDLAAKLLKQGFELDATHGTAIVLGEAGINPR
+FT                   LVNKVHEGRPHIQDRIKNGEYTYIINTTSGRRAIEDSRVIRRSALQYKVHYDTTLNGGF
+FT                   ATAMALNADATEKVISVQEMHAQIK"
+FT   CDS             complement(33740..33919)
+FT                   /note="ORF_ID:o103#12"
+FT                   /note="similar to SwissProt Accession Number P46145"
+FT                   /transl_table=11
+FT                   /gene="yaaV"
+FT                   /product="Hypothetical 6.8 kd protein in carB-caiE
+FT                   intergenic region."
+FT                   /protein_id="BAB96603.1"
+FT                   /translation="MTRFEAIKQGHIKIVDISIVCNFTVDKCELNPAYVIKNIDSPKDL
+FT                   LNGQKKTVLIREPY"
+FT   misc_feature    33849..34351
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o103#13"
+FT                   /note="similar to PIR Accession Number JC6025"
+FT                   /gene="caiF"
+FT                   /product="Transcription activator caiF"
+FT   CDS             complement(34438..35049)
+FT                   /note="ORF_ID:o103#14"
+FT                   /note="similar to SwissProt Accession Number P39206"
+FT                   /transl_table=11
+FT                   /gene="caiE"
+FT                   /product="Carnitine operon protein caiE."
+FT                   /protein_id="BAB96604.1"
+FT                   /translation="MERTLTTVSYYAFEGLIPVVHPTAFVHPSAVLIGDVIVGAGVYIG
+FT                   PLASLRGDYGRLIVQAGANIQDGCIMHGYCDTDTIVGENGHIGHGAILHGCLIGRDALV
+FT                   GMNSVIMDGAVIGEESIVAAMSFVKAGFRGEKRQLLMGTPARAVRNVSDDELHWKRLNT
+FT                   KEYQDLVGRCHVSLHETQPLRQMEENRPRLQGTTDVTPKR"
+FT   CDS             complement(35034..35927)
+FT                   /note="ORF_ID:o103#15"
+FT                   /note="similar to PIR Accession Number S40557"
+FT                   /transl_table=11
+FT                   /gene="caiD"
+FT                   /product="Hypothetical protein."
+FT                   /protein_id="BAB96605.1"
+FT                   /translation="MKQQGTTLPANNHTLKQYAFFAGMLSSLKKQKWRKGMSESLHLTR
+FT                   NGSILEITLDRPKANAIDAKTSFEMGEVFLNFRDDPQLRVAIITGAGEKFFSAGWDLKA
+FT                   AAEGEAPDADFGPGGFAGLTEIFNLDKPVIAAVNGYAFGGAFELALAADFIVCADNASF
+FT                   ALPEAKLGIVPDSGGVLRLPKILPPAIVNEMVMTGRRMGAEEALRWGIVNRVVSQAELM
+FT                   DNARELAQQLVNSAPLAIAALKEIYRTTSEMPVEEAYRYIRSGVLKHYPSVLHSEDAIE
+FT                   GPLAFAEKRDPVWKGR"
+FT   CDS             complement(35928..37496)
+FT                   /note="ORF_ID:o104#1"
+FT                   /note="similar to PIR Accession Number S40558"
+FT                   /transl_table=11
+FT                   /gene="caiC"
+FT                   /product="Hypothetical protein."
+FT                   /protein_id="BAB96606.1"
+FT                   /translation="MDRGAMDIIGGQHLRQMWDDLADVYGHKTALICESSGGVVNRYSY
+FT                   LELNQEINRTANLFYTLGIRKGDKVALHLDNCPEFIFCWFGLAKIGAIMVPINARLLCE
+FT                   ESAWILQNSQACLLVTSAQFYPMYQQIQQEDATQLRHICLTDVALPADDGVSSFTQLKN
+FT                   QQPATLCYAPPLSTDDTAEILFTSGTTSRPKGVVITHYNLRFAGYYSAWQCALRDDDVY
+FT                   LTVMPAFHIDCQCTAAMAAFSAGATFVLVEKYSARAFWGQVQKYRATVTECIPMMIRTL
+FT                   MVQPPSANDQQHRLREVMFYLNLSEQEKDAFCERFGVRLLTSYGMTETIVGIIGDRPGD
+FT                   KRRWPSIGRVGFCYEAEIRDDHNRPLPAGEIGEICIKGIPGKTIFKEYFLNPQATAKVL
+FT                   EADGWLHTGDTGYRDEEDFFYFVDRRCNMIKRGGENVSCVELENIIAAHPKIQDIVVVG
+FT                   IKDSIRDEAIKAFVVLNEGETLSEEEFFRFCEQNMAKFKVPSYLEIRKDLPRNCSGKII
+FT                   RKNLK"
+FT   CDS             complement(37555..38772)
+FT                   /note="ORF_ID:o104#2"
+FT                   /note="similar to PIR Accession Number S40559"
+FT                   /transl_table=11
+FT                   /gene="caiB"
+FT                   /product="L-carnitine dehydratase (EC   4.-.-.-)."
+FT                   /protein_id="BAB96607.1"
+FT                   /translation="MDHLPMPKFGPLAGLRVVFSGIEIAGPFAGQMFAEWGAEVIWIEN
+FT                   VAWADTIRVQPNYPQLSRRNLHALSLNIFKDEGREAFLKLMETTDIFIEASKGPAFARR
+FT                   GITDEVLWQHNPKLVIAHLSGFGQYGTEEYTNLPAYNTIAQAFSGYLIQNGDVDQPMPA
+FT                   FPYTADYFSGLTATTAALAALHKVRETGKGESIDIAMYEVMLRMGQYFMMDYFNGGEMC
+FT                   PRMSKGKDPYYAGCGLYKCADGYIVMELVGITQIEECFKDIGLAHLLGTPEIPEGTQLI
+FT                   HRIECPYGPLVEEKLDAWLATHTIAEVKERFAELNIACAKVLTVPELESNPQYVARESI
+FT                   TQWQTMDGRTCKGPNIMPKFKNNPGQIWRGMPSHGMDTAAILKNIGYSENDIQELVSKG
+FT                   LAKVED"
+FT   CDS             complement(38901..40043)
+FT                   /note="ORF_ID:o105#1"
+FT                   /note="similar to PIR Accession Number S40560"
+FT                   /transl_table=11
+FT                   /gene="caiA"
+FT                   /product="Hypothetical protein."
+FT                   /protein_id="BAB96608.1"
+FT                   /translation="MDFNLNDEQELFVAGIRELMASENWEAYFAECDRDSVYPERFVKA
+FT                   LADMGIDSLLIPEEHGGLDAGFVTLAAVWMELGRLGAPTYVLYQLPGGFNTFLREGTQE
+FT                   QIDKIMAFRGTGKQMWNSAITEPGAGSDVGSLKTTYTRKNGKIYLNGSKCFITSSAYTP
+FT                   YIVVMARDGASPDKPVYTGWFVDMSKPGIKVTKLEKLGLRMDSCCEITFDDVELDEKDM
+FT                   FGREGNGFNRVKEEFDHERFLVALTNYGTAMCAFEDAARYANQRVQFGEAIGRFQLIQE
+FT                   KFAHMAIKLNSMKNMLYEAAWKADNGTITSGDAAMCKYFCANAAFEVVDSAMQVLGGVG
+FT                   IAGNHRISRFWRDLRVDRVSGGSDEMQILTLGRAVLKQYR"
+FT   CDS             complement(40075..41589)
+FT                   /note="ORF_ID:o105#2"
+FT                   /note="similar to PIR Accession Number S40561"
+FT                   /transl_table=11
+FT                   /gene="caiT"
+FT                   /product="Hypothetical protein."
+FT                   /protein_id="BAB96609.1"
+FT                   /translation="MKNEKRKTGIEPKVFFPPLIIVGILCWLTVRDLDAANVVINAVFS
+FT                   YVTNVWGWAFEWYMVVMLFGWFWLVFGPYAKKRLGNEPPEFSTASWIFMMFASCTSAAV
+FT                   LFWGSIEIYYYISTPPFGLEPNSTGAKELGLAYSLFHWGPLPWATYSFLSVAFAYFFFV
+FT                   RKMEVIRPSSTLVPLVGEKHAKGLFGTIVDNFYLVALIFTMGTSLGLATPLVTECMQWL
+FT                   FGIPHTLQLDAIIITCWIILNAICVACGLQKGVRIASDVRSYLSFLMLGWVFIVSGASF
+FT                   IMNYFTDSVGMLLMYLPRMLFYTDPIAKGGFPQGWTVFYWAWWVIYAIQMSIFLARISR
+FT                   GRTVRELCFGMVLGLTASTWILWTVLGSNTLLLIDKNIINIPNLIEQYGVARAIIETWA
+FT                   ALPLSTATMWGFFILCFIATVTLVNACSYTLAMSTCREVRDGEEPPLLVRIGWSILVGI
+FT                   IGIVLLALGGLKPIQTAIIAGGCPLFFVNIMVTLSFIKDAKQNWKD"
+FT   CDS             42025..42831
+FT                   /note="ORF_ID:o105#3"
+FT                   /note="similar to PIR Accession Number S40562"
+FT                   /transl_table=11
+FT                   /gene="fixA"
+FT                   /product="FixA homolog."
+FT                   /protein_id="BAB96610.1"
+FT                   /translation="MNDVSFMISGDAMKIITCYKCVPDEQDIAVNNADGSLDFSKADAK
+FT                   ISQYDLNAIEAACQLKQQAAEAQVTALSVGGKALTNAKGRKDVLSRDPDELIVVIDDQF
+FT                   EQALPQQTASALAAAAQKAGFDLILCGDGSSDLYAQQVGLLVGEILNIPAVNGVSKIIS
+FT                   LTADTLTVERELEDETETLSIPLPAVVAVSTDINSPQIPSMKAILGAAKKPVQVWSAAD
+FT                   IGFNAEAAWSEQQVAAPKQRERQRIVIEGDGEEQIAAFAENLRKVI"
+FT   misc_feature    42810..43787
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o105#4"
+FT                   /note="similar to SwissProt Accession Number P31574"
+FT                   /gene="fixB"
+FT                   /product="FixB protein."
+FT   CDS             43838..45124
+FT                   /note="ORF_ID:o105#5"
+FT                   /note="similar to PIR Accession Number S40564"
+FT                   /transl_table=11
+FT                   /gene="fixC"
+FT                   /product="FixC protein"
+FT                   /protein_id="BAB96611.1"
+FT                   /translation="MSEDIFDAIIVGAGLAGSVAALVLAREGAQVLVIERGNSAGAKNV
+FT                   TGGRLYAHSLEHIIPGFADSAPVERLITHEKLAFMTEKSAMTMDYCNGDETSPSQRSYS
+FT                   VLRSKFDAWLMEQAEEAGAQLITGIRVDNLVQRDGKVVGVEADGDVIEAKTVILADGVN
+FT                   SILAEKLGMAKRVKPTDVAVGVKELIELPKSVIEDRFQLQGNQGAACLFAGSPTDGLMG
+FT                   GGFLYTNENTLSLGLVCGLHHLHDAKKSVPQMLEDFKQHPAVAPLIAGGKLVEYSAHIM
+FT                   PEAGINMLPELVGDGVLIAGDAAGMCMNFGFTIRGMDLAIAAGEAAAKTVLSAMKSDDF
+FT                   SKQKLAEYRQHLESGPLRDMRMYQKLPAFLDNPRMFSGYPELAVGVARDLFTIDGSAPE
+FT                   LMRKKILRHGKKVGFINLIKDGMKGVTVL"
+FT   CDS             45121..45408
+FT                   /note="ORF_ID:o105#6"
+FT                   /note="similar to PIR Accession Number S40565"
+FT                   /transl_table=11
+FT                   /gene="yaaT"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96612.1"
+FT                   /translation="MTSPVNVDVKLGVNKFNVDEEHPHIVVKADADKQVLELLVKACPA
+FT                   GLYKKQDDGSVRFDYAGCLECGTCRILGLGSALEQWEYPRGTFGVEFRYS"
+FT   misc_feature    45465..46050
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o105#7"
+FT                   /note="similar to SwissProt Accession Number P31679"
+FT                   /gene="yaaU"
+FT                   /product="Hypothetical 18.4 kd protein in fixC-kefC
+FT                   intergenic region (orf65)."
+FT   CDS             46137..46799
+FT                   /note="ORF_ID:o105#8"
+FT                   /note="similar to PIR Accession Number S40566"
+FT                   /transl_table=11
+FT                   /gene="yabE"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96613.1"
+FT                   /translation="MKSSRSKPGFRDLFNRCHFPFVLFVAAIWTCQVIPMFAIYTFGPQ
+FT                   IVGLLGLGVGKNAALGNVVISLFFMLGCIPPMLWLSTAGRRPLLIGSFAMMTLALAVLG
+FT                   LIPDMGIWLVVMAFAVYAFFSGGPGNLQWLYPNELFPTDIRASAVGVIMSLSRIGTIVS
+FT                   TWALPIFIDNYGISNTMLMGAGISLFGLLISVAFAPETRGMSLAQTSNMTIRGQRMG"
+FT   CDS             46907..47437
+FT                   /note="ORF_ID:o105#9"
+FT                   /note="similar to PIR Accession Number S40567"
+FT                   /transl_table=11
+FT                   /gene="yabF"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96614.1"
+FT                   /translation="MILIIYAHPYPHHSHANKRMLEQARTLEGVEIRSLYQLYPDFNID
+FT                   IAAEQEALSRADLIVWQHPMQWYSIPPLLKLWINKVFSHGWAYGHGGTALHGKHLLWAV
+FT                   TTGGGESHFEIGAHPGFDGLSQPLQATAIYCGLNWLPPFAMHCTFICDDETLEGQARHY
+FT                   KQRLLEWQEAHHG"
+FT   CDS             47430..49292
+FT                   /note="ORF_ID:o105#10"
+FT                   /note="similar to SwissProt Accession Number P03819"
+FT                   /transl_table=11
+FT                   /gene="kefC"
+FT                   /gene="trkC"
+FT                   /product="Glutathione-regulated potassium-efflux system
+FT                   protein KefC (K(+)/H(+) antiporter)."
+FT                   /protein_id="BAB96615.1"
+FT                   /translation="MDSHTLIQALIYLGSAALIVPIAVRLGLGSVLGYLIAGCIIGPWG
+FT                   LRLVTDAESILHFAEIGVVLMLFIIGLELDPQRLWKLRAAVFGCGALQMVICGGLLGLF
+FT                   CMLLGLRWQVAELIGMTLALSSTAIAMQAMNERNLMVTQMGRSAFAVLLFQDIAAIPLV
+FT                   AMIPLLATSSASTTMGAFALSALKVAGALVLVVLLGRYVTRPALRFVARSGLREVFSAV
+FT                   ALFLVFGFGLLLEEVGLSMAMGAFLAGVLLASSEYRHALESDIEPFKGLLLGLFFIGVG
+FT                   MSIDFGTLLENPLRIVILLLGFLIIKIAMLWLIARPLQVPNKQRRWFAVLLGQGSEFAF
+FT                   VVFGAAQMANVLEPEWAKSLTLAVALSMAATPILLVILNRLEQSSTEEAREADEIDEEQ
+FT                   PRVIIAGFGRFGQITGRLLLSSGVKMVVLDHDPDHIETLRKFGMKVFYGDATRMDLLES
+FT                   AGAAKAEVLINAIDDPQTNLQLTEMVKEHFPHLQIIARARDVDHYIRLRQAGVEKPERE
+FT                   TFEGALKTGRLALESLGLGPYEARERADVFRRFNIQMVEEMAMVENDTKARAAVYKRTS
+FT                   AMLSEIITEDREHLSLIQRHGWQGTEEGKHTGNMADEPETKPSS"
+FT   CDS             49482..49961
+FT                   /note="ORF_ID:o105#11"
+FT                   /note="similar to SwissProt Accession Number P00379"
+FT                   /transl_table=11
+FT                   /gene="folA"
+FT                   /gene="tmrA"
+FT                   /product="Dihydrofolate reductase type I (EC   1.5.1.3)."
+FT                   /protein_id="BAB96616.1"
+FT                   /translation="MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRH
+FT                   TWESIGRPLPGRKNIILSSQPGTDDRVTWVKSVDEAIAACGDVPEIMVIGGGRVYEQFL
+FT                   PKAQKLYLTHIDAEVEGDTHFPDYEPDDWESVFSEFHDADAQNSHSYCFEILERR"
+FT   CDS             complement(50039..50881)
+FT                   /note="ORF_ID:o105#12"
+FT                   /note="similar to PIR Accession Number A26221"
+FT                   /transl_table=11
+FT                   /gene="apaH"
+FT                   /product="Bis(5'-nucleosyl)-tetraphosphatase (symmetrical)
+FT                   (EC  3.6.1.41)"
+FT                   /protein_id="BAB96617.1"
+FT                   /translation="MATYLIGDVHGCYDELIALLHKVEFTPGKDTLWLTGDLVARGPGS
+FT                   LDVLRYVKSLGDSVRLVLGNHDLHLLAVFAGISRNKPLDRLTPLLEAPDADELLNWLRR
+FT                   QPLLQIDEEKKLVMAHAGITPQWDLQTAKECARDVEAVLSSDSYPFFLDAMYGDMPNNW
+FT                   SPELRGLGRLRFITNAFTRMRFCFPNGQLDMYSKESPEEAPAPLKPWFAIPGPVAEEYS
+FT                   IAFGHWASLEGKGTPEGIYALDTGCCWGGTLTCLRWEDKQYFVQPSNRHKDLAEAAAS"
+FT   CDS             complement(50888..51265)
+FT                   /note="ORF_ID:o106#1"
+FT                   /note="similar to PIR Accession Number A30273"
+FT                   /transl_table=11
+FT                   /gene="apaG"
+FT                   /product="ApaG protein"
+FT                   /protein_id="BAB96618.1"
+FT                   /translation="MINSPRVCIQVQSVYIEAQSSPDNERYVFAYTVTIRNLGRAPVQL
+FT                   LGRYWLITNGNGRETEVQGEGVVGVQPLIAPGEEYQYTSGAIIETPLGTMQGHYEMIDE
+FT                   NGVPFSIDIPVFRLAVPTLIH"
+FT   CDS             complement(52087..53076)
+FT                   /note="ORF_ID:o106#2"
+FT                   /note="similar to PIR Accession Number JV0026"
+FT                   /transl_table=11
+FT                   /gene="pdxA"
+FT                   /product="PdxA protein"
+FT                   /protein_id="BAB96619.1"
+FT                   /translation="MVKTQRVVITPGEPAGIGPDLVVQLAQREWPVELVVCADATLLTN
+FT                   RAAMLGLPLTLRPYSPNSPAQPQTAGTLTLLPVALRAPVTAGQLAVENGHYVVETLARA
+FT                   CDGCLNGEFAALITGPVHKGVINDAGIPFTGHTEFFEERSQAKKVVMMLATEELRVALA
+FT                   TTHLPLRDIADAITPALLHEVIAILHHDLRTKFGIAEPRILVCGLNPHAGEGGHMGTEE
+FT                   IDTIIPVLNELRAQGMKLNGPLPADTLFQPKYLDNADAVLAMYHDQGLPVLKYQGFGRG
+FT                   VNITLGLPFIRTSVDHGTALELAGRGKADVGSFITALNLAIKMIVNTQ"
+FT   CDS             complement(53076..53873)
+FT                   /note="ORF_ID:o106#3"
+FT                   /note="similar to PIR Accession Number PV0009"
+FT                   /transl_table=11
+FT                   /product="Hypothetical protein 98 (pdx 5' region)"
+FT                   /protein_id="BAB96620.1"
+FT                   /translation="MGNQNDASTELNLSHILIPLPENPTSDQVNEAESQARAIVDQARN
+FT                   GADFGKLAIAHSADQQALNGGQMGWGRIQELPGIFAQALSTAKKGDIVGPIRSGVGFHI
+FT                   LKVNDLRGESKNISVTEVHARHILLKPSPIMTDEQARVKLEQIAADIKSGKTTFAAAAK
+FT                   EFSQDPGSANQGGDLGWATPDIFDPAFRDALTRLNKGQMSAPVHSSFGWHLIELLDTRN
+FT                   VDKTDAAQKDRAYRMLMNRKFSEEAASWMQEQRASAYVKILSN"
+FT   CDS             complement(53840..54361)
+FT                   /note="ORF_ID:o106#4"
+FT                   /note="similar to PIR Accession Number S40574"
+FT                   /transl_table=11
+FT                   /gene="surA"
+FT                   /product="Survival protein SurA precursor (peptidyl-prolyl
+FT                   cis-trans isomerase SurA) (EC  5.2.1.8) (PPiase) (rotamase
+FT                   C)."
+FT                   /protein_id="BAB96621.1"
+FT                   /translation="MKNWKTLLLGIAMIANTSFAAPQVVDKVAAVVNNGVVLESDVDGL
+FT                   MQSVKLNAAQARQQLPDDATLRHQIMERLIMDQIILQMGQKMGVKISDEQLDQAIANIA
+FT                   KQNNMTLDQMRSRLAYDGLNYNTYRNQIRKEMIISEVRNNEVRRRITILPQKSNPWRSR
+FT                   WVTKTTPALS"
+FT   CDS             complement(54414..56768)
+FT                   /note="ORF_ID:o106#5"
+FT                   /note="similar to SwissProt Accession Number P31554"
+FT                   /transl_table=11
+FT                   /gene="imp"
+FT                   /gene="ostA"
+FT                   /product="Organic solvent tolerance protein precursor."
+FT                   /protein_id="BAB96622.1"
+FT                   /translation="MKKRIPTLLATMIATALYSQQGLAADLASQCMLGVPSYDRPLVQG
+FT                   DTNDLPVTINADHAKGDYPDDAVFTGSVDIMQGNSRLQADEVQLHQKEAPGQPEPVRTV
+FT                   DALGNVHYDDNQVILKGPKGWANLNTKDTNVWEGDYQMVGRQGRGKADLMKQRGENRYT
+FT                   ILDNGSFTSCLPGSDTWSVVGSEIIHDREEQVAEIWNARFKVGPVPIFYSPYLQLPVGD
+FT                   KRRSGFLIPNAKYTTTNYFEFYLPYYWNIAPNMDATITPHYMHRRGNIMWENEFRYLSQ
+FT                   AGAGLMELDYLPSDKVYEDEHPNDDSSRRWLFYWNHSGVMDQVWRFNVDYTKVSDPSYF
+FT                   NDFDNKYGSSTDGYATQKFSVGYAVQNFNATVSTKQFQVFSEQNTSSYSAEPQLDVNYY
+FT                   QNDVGPFDTRIYGQAVHFVNTRDDMPEATRVHLEPTINLPLSNNWGSINTEAKLLATHY
+FT                   QQTNLDWYNSRNTTKLDESVNRVMPQFKVDGKMVFERDMEMLAPGYTQTLEPRAQYLYV
+FT                   PYRDQSDIYNYDSSLLQSDYSGLFRDRTYGGLDRIASANQVTTGVTSRIYDDAAVERFN
+FT                   ISVGQIYYFTESRTGDDNITWENDDKTGSLVWAGDTYWRISERWGLRGGIQYDTRLDNV
+FT                   ATSNSSIEYRRDEDRLVQLNYRYASPEYIQATLPKYYSTAEQYKNGISQVGAVASWPIA
+FT                   DRWSIVGAYYYDTNANKQADSMLGVQYSSCCYAIRVGYERKLNGWDNDKQHAVYDNAIG
+FT                   FNIELRGLSSNYGLGTQEMLRSNILPYQNTL"
+FT   CDS             57023..57838
+FT                   /note="ORF_ID:o106#6"
+FT                   /note="similar to SwissProt Accession Number P31680"
+FT                   /transl_table=11
+FT                   /gene="yabH"
+FT                   /product="Hypothetical 30.6 kd protein in folA-hepA
+FT                   intergenic region (orf81)."
+FT                   /protein_id="BAB96623.1"
+FT                   /translation="MQYWGKIIGVAVALLMGGGFWGVVLGLLIGHMFDKARSRKMAWFA
+FT                   NQRERQALFFATTFEVMGHLTKSKGRVTEADIHIASQLMDRMNLHGASRTAAQNAFRVG
+FT                   KSDNYPLREKMRQFRSVCFGRFDLIRMFLEIQIQAAFADGSLHPNERAVLYVIAEELGI
+FT                   SRAQFDQFLRMMQGGAQFGGGYQQQTGGGNWQQAQRGPTLEDACNVLGVKPTDDATTIK
+FT                   RAYRKLMSEHHPDKLVAKGLPPEMMEMAKQKAQEIQQAYELIKQQKGFK"
+FT   CDS             58625..58783
+FT                   /note="ORF_ID:o106#7"
+FT                   /note="similar to SwissProt Accession Number P39220"
+FT                   /transl_table=11
+FT                   /gene="yabP"
+FT                   /product="Hypothetical 5.9 kd protein in surA-hepA
+FT                   intergenic region."
+FT                   /protein_id="BAB96624.1"
+FT                   /translation="MRDCYLGKKTMKGSNDILYERPGWNANLGVLPRTVLPRTVLTRTV
+FT                   LTWTVLP"
+FT   CDS             58780..58938
+FT                   /note="ORF_ID:o106#8"
+FT                   /note="similar to SwissProt Accession Number P39221"
+FT                   /transl_table=11
+FT                   /gene="yabQ"
+FT                   /product="Hypothetical 5.7 kd protein in surA-hepA
+FT                   intergenic region."
+FT                   /protein_id="BAB96625.1"
+FT                   /translation="MNGATSLYDEVIIINKIPPKKIDTKGVATEEVATKKVLLNKLLTT
+FT                   QLLNEPE"
+FT   misc_feature    complement(59346..60009)
+FT                   /note="ORF is interrupted by frame-shifting"
+FT                   /note="ORF_ID:o106#9"
+FT                   /note="similar to SwissProt Accession Number P39219"
+FT                   /gene="yabO"
+FT                   /product="Hypothetical 24.9 kd protein in surA-hepA
+FT                   intergenic region."
+FT   CDS             59985..60518
+FT                   /note="ORF_ID:o106#10"
+FT                   /transl_table=11
+FT                   /protein_id="BAB96626.1"
+FT                   /translation="MDCSFPSPFSAPLLMRYNDQTQGIQTPTCLIQAFHYLLTVALNGG
+FT                   QFVVTNVRVHCTQSFQTRQLGRQFFVGFVTRCINQCTGRFLDLRFTQLKDSVNILLHGV
+FT                   NQFAAGVTVNRVKLAVKGFKLYLRRQVVAVFIQQHTYRRRWQEAVELQLLRSLGFNHIN
+FT                   QFHQQRTYRQRFVF"
+FT   CDS             complement(60021..62927)
+FT                   /note="ORF_ID:o106#11"
+FT                   /note="similar to SwissProt Accession Number P23852"
+FT                   /transl_table=11
+FT                   /gene="hepA"
+FT                   /product="Probable ATP-dependent helicase HepA."
+FT                   /protein_id="BAB96627.1"
+FT                   /translation="MPFTLGQRWISDTESELGLGTVVAVDARTVTLLFPSTGENRLYAR
+FT                   SDSPVTRVMFNPGDTITSHDGWQMQVEEVKEENGLLTYIGTRLDTEESGVALREVFLDS
+FT                   KLVFSKPQDRLFAGQIDRMDRFALRYRARKYSSEQFRMPYSGLRGQRTSLIPHQLNIAH
+FT                   DVGRRHAPRVLLADEVGLGKTIEAGMILHQQLLSGAAERVLIIVPETLQHQWLVEMLRR
+FT                   FNLRFALFDDERYAEAQHDAYNPFDTEQLVICSLDFARRSKQRLEHLCEAEWDLLVVDE
+FT                   AHHLVWSEDAPSREYQAIEQLAEHVPGVLLLTATPEQLGMESHFARLRLLDPNRFHDFA
+FT                   QFVEEQKNYRPVADAVAMLLAGNKLSNDELNMLGEMIGEQDIEPLLQAANSDSEDAQSA
+FT                   RQELVSMLMDRHGTSRVLFRNTRNGVKGFPKRELHTIKLPLPTQYQTAIKVSGIMGARK
+FT                   SAEDRARDMLYPERIYQEFEGDNATWWNFDPRVEWLMGYLTSHRSQKVLVICAKAATAL
+FT                   QLEQVLREREGIRAAVFHEGMSIIERDRAAAWFAEEDTGAQVLLCSEIGSEGRNFQFAS
+FT                   HMVMFDLPFNPDLLEQRIGRLDRIGQAHDIQIHVPYLEKTAQSVLVRWYHEGLDAFEHT
+FT                   CPTGRTIYDSVYNDLINYLASPDQTEGFDDLIKNCREQHEALKAQLEQGRDRLLEIHSN
+FT                   GGEKAQALAESIEEQDDDTNLIAFAMNLFDIIGINQDDRGDNMIVLTPSDHMLVPDFPG
+FT                   LSEDGITITFDREVALAREDAQFITWEHPLIRNGLDLILSGDTGSSTISLLKNKALPVG
+FT                   TLLVELIYVVEAQAPKQLQLNRFLPPTPVRMLLDKNGNNLAAQVEFETFNRQLNAVNRH
+FT                   TGSKLVNAVQQDVHAILQLGEAQIEKSARALIDAARNEADEKLSAELSRLEALRAVNPN
+FT                   IRDDELTAIESNRQQVMESLDQAGWRLDALRLIVVTHQ"
+FT   CDS             complement(63092..64741)
+FT                   /note="ORF_ID:o107#1"
+FT                   /note="dinA; polB"
+FT                   /note="similar to E. coli DNA polymerase II (EC  2.7.7.7)
+FT                   (Pol II): PIR Accession Number S15943"
+FT                   /transl_table=11
+FT                   /protein_id="BAB96628.1"
+FT                   /translation="MPSRYRLPLRLGRDNSELEWREHGPAKTASFLPQAKGRLIIDGIE
+FT                   ALKSAFWDFSSFSLETVAQELLGEGKSIDNPWDRMDEIDRRFAEDKPALATYNLKDCEL
+FT                   VTQIFHKTEIMPFLLERATVNGLPVDRHGGSVAAFGHLYFPRMHRAGYVAPNLGEVPPH
+FT                   ASPGGYVMDSRPGLYDSVLVLDYKSLYPSIIRTFLIDPVGLVEGMAQPDPEHSTEGFLD
+FT                   AWFSREKHCLPEIVTNIWHGRDEAKRQGNKPLSQALKIIMNAFYGVLGTTACRFFDPRL
+FT                   ASSITMRGHQIMRQTKALIEAQGYDVIYGDTDSTFVWLKGAHSEEEAAKIGRALVQHVN
+FT                   AWWAETLQKQRLTSALELEYETHFCRFLMPTIRGADTGSKKRYAGLIQEGDKQRMVFKG
+FT                   LETVRTDWTPLAQQFQQELYLRIFRNEPYQEYVRETIDKLMAGELDARLVYRKRLRRPL
+FT                   SEYQRNVPPHVRAARLADEENQKRGRPLQYQNRGTIKYVWTTNGPEPLDYQRSPLDYEH
+FT                   YLTRQLQPVAEGILPFIEDNFATLMTGQLGLF"
+FT   CDS             complement(64698..65447)
+FT                   /note="ORF_ID:o107#2"
+FT                   /note="polB"
+FT                   /note="similar to E. coli DNA-directed DNA polymerase (EC
+FT                   2.7.7.7) II: PIR Accession Number S15943"
+FT                   /transl_table=11
+FT                   /protein_id="BAB96629.1"
+FT                   /translation="MAQAGFILTRHWRDTPQGTEVSFWLATDNGPLQVTLAPQESVAFI
+FT                   PADQVPRAQHILQGEQGFRLTPLALKDFHRQPVYGLYCRAHRQLMNYEKRLREGGVTVY
+FT                   EADVRPPERYLMERFITSPVWVEGDMHNGTIVNARLKPHPDYRPPLKWVSIDIETTRHG
+FT                   ELYCIGLEGCGQRIVYMLGPENGDASSLDFELEYVASRPQLLEKLNAWFANYDPDVIIG
+FT                   WNVVQFDLRMLQKHAEPLPSSAASWAR"
+FT   CDS             complement(65522..66217)
+FT                   /note="ORF_ID:o107#3"
+FT                   /note="similar to SwissProt Accession Number P08203"
+FT                   /transl_table=11
+FT                   /gene="araD"
+FT                   /product="L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4)."
+FT                   /protein_id="BAB96630.1"
+FT                   /translation="MLEDLKRQVLEANLALPKHNLVTLTWGNVSAVDRERGVFVIKPSG
+FT                   VDYSVMTADDMVVVSIETGEVVEGTKKPSSDTPTHRLLYQAFPSIGGIVHTHSRHATIW
+FT                   AQAGQSIPATGTTHADYFYGTIPCTRKMTDAEINGEYEWETGNVIVETFEKQGIDAAQM
+FT                   PGVLVHSHGPFAWGKNAEDAVHNAIVLEEVAYMGIFCRQLAPQLPDMQQTLLDKHYLRK
+FT                   HGAKAYYGQ"
+FT   CDS             complement(66502..68004)
+FT                   /note="ORF_ID:o107#4"
+FT                   /note="similar to SwissProt Accession Number P08202"
+FT                   /transl_table=11
+FT                   /gene="araA"
+FT                   /product="L-arabinose isomerase (EC  5.3.1.4)."
+FT                   /protein_id="BAB96631.1"
+FT                   /translation="MTIFDNYEVWFVIGSQHLYGPETLRQVTQHAEHVVNALNTEAKLP
+FT                   CKLVLKPLGTTPDEITAICRDANYDDPCAGLVVWLHTFSPAKMWINGLTMLNKPLLQFH
+FT                   TQFNAALPWDSIDMDFMNLNQTAHGGREFGFIGARMRQQHAVVTGHWQDKQAHERIGSW
+FT                   MRQAVSKQDTRHLKVCRFGDNMREVAVTDGDKVAAQIKFGFSVNTWAVGDLVQVVNSIS
+FT                   DGDVNALVDEYESCYTMTPATQIHGEKRQNVLEAARIELGMKRFLEQGGFHAFTTTFED
+FT                   LHGLKQLPGLAVQRLMQQGYGFAGEGDWKTAALLRIMKVMSTGLQGGTSFMEDYTYHFE
+FT                   KGNDLVLGSHMLEVCPSIAVEEKPILDVQHLGIGGKDDPARLIFNTQTGPAIVASLIDL
+FT                   GDRYRLLVNCIDTVKTPHSLPKLPVANALWKAQPDLPTASEAWILAGGAHHTVFSHALN
+FT                   LNDMRQFAEMHDIEITVIDNDTRLPAFKDALRWNEVYYGFRR"
+FT   CDS             complement(68015..69715)
+FT                   /note="ORF_ID:o108#1"
+FT                   /note="similar to PIR Accession Number B29022"
+FT                   /transl_table=11
+FT                   /gene="araB"
+FT                   /product="Ribulokinase (EC  2.7.1.16)"
+FT                   /protein_id="BAB96632.1"
+FT                   /translation="MAIAIGLDFGSDSVRALAVDCASGEEIATSVEWYPRWQKGQFCDA
+FT                   PNNQFRHHPRDYIESMEAALKTVLAELSVEQRAAVVGIGVDSTGSTPAPIDADGNVLAL
+FT                   RPEFAENPNAMFVLWKDHTAVERSEEITRLCHAPGNVDYSRYIGGIYSSEWFWAKILHV
+FT                   TRQDSAVAQSAASWIELCDWVPALLSGTTRPQDIRRGRCSAGHKSLWHESWGGLPPASF
+FT                   FDELDPILNRHLPSPLFTDTWTADIPVGTLCPEWAQRLGLPESVVISGGAFDCHMGAVG
+FT                   AGAQPNALVKVIGTSTCDILIADKQSVGERAVKGICGQVDGSVVPGFIGLEAGQSAFGD
+FT                   IYAWFGRVLSWPLEQLAAQHPELKAQINASQKQLLPALTEAWAKNPSLDHLPVVLDWFN
+FT                   GRRSPNANQRLKGVITDLNLATDAPLLFGGLIAATAFGARAIMECFTDQGIAVNNVMAL
+FT                   GGIARKNQVIMQACCDVLNRPLQIVASDQCCALGAAIFAAVAAKVHADIPSAQQKMASA
+FT                   VEKTLQPRSEQAQRFEQLYRRYQQWAMSAEQHYLPTSAPAQAAQAVATL"
+FT   CDS             70054..70932
+FT                   /note="ORF_ID:o108#2"
+FT                   /note="similar to PIR Accession Number A91473"
+FT                   /transl_table=11
+FT                   /gene="araC"
+FT                   /product="Arabinose operon regulatory protein"
+FT                   /protein_id="BAB96633.1"
+FT                   /translation="MAEAQNDPLLPGYSFNAHLVAGLTPIEANGYLDFFIDRPLGMKGY
+FT                   ILNLTIRGQGVVKNQGREFVCRPGDILLFPPGEIHHYGRHPEAREWYHQWVYFRPRAYW
+FT                   HEWLNWPSIFANTGFFRPDEAHQPHFSDLFGQIINAGQGEGRYSELLAINLLEQLLLRR
+FT                   MEAINESLHPPMDNRVREACQYISDHLADSNFDIASVAQHVCLSPSRLSHLFRQQLGIS
+FT                   VLSWREDQRISQAKLLLSTTRMPIATVGRNVGFDDQLYFSRVFKKCTGASPSEFRAGCE
+FT                   EKVNDVAVKLS"
+FT   CDS             71066..71779
+FT                   /note="ORF_ID:o108#3"
+FT                   /note="similar to PIR Accession Number S40581"
+FT                   /transl_table=11
+FT                   /gene="yabI"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96634.1"
+FT                   /translation="MAVVLVAFLESLALVGLILPGTVLMAGLGALIGSGELSFWHAWLA
+FT                   GIIGCLMGDWISFWLGWRFKKPLHRWSFLKKNKALLDKTEHALHQHSMFTILVGRFVGP
+FT                   TRPLVPMVAGMLDLPVAKFITPNIIGCLLWPPFYFLPGILAGAAIDIPAGMQSGEFKWL
+FT                   LLATAVFLWVGGWLCWRLWRSGKATDRLSHYLSRGRLLWLTPLISAIGVVALVVLIRHP
+FT                   LMPVYIDILRKVVGV"
+FT   CDS             complement(71894..72592)
+FT                   /note="ORF_ID:o108#4"
+FT                   /note="similar to PIR Accession Number S40582"
+FT                   /transl_table=11
+FT                   /gene="yabJ"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96635.1"
+FT                   /translation="MLKLTDITWLYHHLPMRFSLTVERGEQVAILGPSGAGKSTLLNLI
+FT                   AGFLTPASGSLTIDGVDHTTMPPSRRPVSMLFQENNLFSHLTVAQNIGLGLNPGLKLNA
+FT                   VQQGKMHAIARQMGIDNLMARLPGELSGGQRQRVALARCLVREQPILLLDELFSALDPA
+FT                   LRQEMLTLVSTSCQQQKMTLLMVSHSVEDAARIATRSVVVADGRIAWQGMTNELLSGKA
+FT                   SASALLGITG"
+FT   CDS             complement(72576..74186)
+FT                   /note="ORF_ID:o108#5"
+FT                   /note="similar to PIR Accession Number S40583"
+FT                   /transl_table=11
+FT                   /gene="yabK"
+FT                   /product="Hypothetical protein"
+FT                   /protein_id="BAB96636.1"
+FT                   /translation="MATRRQPLIPGWLIPGVSATTLVVAVALAAFLALWWNAPQDDWVA
+FT                   VWQDSYLWHVVRFSFWQAFLSALLSVIPAIFLARALYRRRFPGRLALLRLCAMTLILPV
+FT                   LVAVFGILSVYGRQGWLATLCQSLGLEWTFSPYGLQGILLAHVFFNLPMASRLLLQALE
+FT                   NIPGEQRQLAAQLGMRSWHFFRFVEWPWLRRQIPPVAALIFMLCFASFATVLSLGGGPQ
+FT                   ATTIELAIYQALSYDYDPARAAMLALLQMVCCLGLVLLSQRLSKAIAPGTTLLQGWRDP
+FT                   DDRLHSRICDTVLIVLALLLLLPPLLAVIVDGVNRQLPEVLAQPVLWQALWTSLRIALA
+FT                   AGVLCVVLTMMLLWSSRELRARQKMLAGQVLEMSGMLILAMPGIVLATGFFLLLNNTIG
+FT                   LPQSADGIVIFTNALMAIPYALKVLENPMRDITARYSMLCQSLGIEGWSRLKVVELRAL
+FT                   KRPLAQALAFACVLSIGDFGVVALFGNDDFRTLPFYLYQQIGSYRSQDGAVTTLILLLL
+FT                   CFLLFTVIEKLPGRNVKTD"
+FT   CDS             complement(74162..75133)
+FT                   /note="ORF_ID:o108#6"
+FT                   /note="similar to PIR Accession Number S40584"
+FT                   /transl_table=11
+FT                   /gene="tbpA"
+FT                   /product="Hypothetical protein."
+FT                   /protein_id="BAB96637.1"
+FT                   /translation="MSAPAVAVTAPVFAKPVLTVYTYDSFAADWGPGPVVKKAFEADCN
+FT                   CELKLVALEDGVSLLNRLRMEGKNSKADVVLGLDNNLLDAASKTGLFAKSGVAADAVNV
+FT                   PGGWNNDTFVPFDYGYFAFVYDKNKLKNPPQSLKELVESDQNWRVIYQDPRTSTPGLGL
+FT                   LLWMQKVYGDDAPQAWQKLAKKTVTVTKGWSEAYGLFLKGESDLVLSYTTSPAYHILEE
+FT                   KKDNYAAANFSEGHYLQVEVAARTAASKQPELAQKFLQFMVSPAFQNAIPTGNWMYPVA
+FT                   NVTLPAGFEKLTKPATTLEFTPAEVAAQRQAWISEWQRAVSR"
+FT   CDS             complement(75308..76963)
+FT                   /note="ORF_ID:o108#7"
+FT                   /note="similar to SwissProt Accession Number P33595"
+FT                   /transl_table=11
+FT                   /gene="yabN"
+FT                   /product="Hypothetical 63.9 kd protein in tbpA-leuD
+FT                   intergenic region (orf103)."
+FT                   /protein_id="BAB96638.1"
+FT                   /translation="MPSARLQQQFIRLWQCCEGKSQDTTLNELAALLSCSRRHMRTLLN
+FT                   TMQDRGWLTWEAEVGRGKRSRLTFLYTGLALQQQRAEDLLEQDRIDQLVQLVGDKATVR
+FT                   QMLVSHLGRSFRQGRHILRVLYYRPLRNLLPGSALRRSETHIARQIFSSLTRINEENGE
+FT                   LEADIAHHWQQISPLHWRFFLRPGVHFHHGRELEMDDVIASLKRINTLPLYSHIADIVS
+FT                   PTPWTLDIHLTQPDRWLPLLLGQVPAMILPREWETLSNFASHPIGTGPYAVIRNSTNQL
+FT                   KIQAFDDFFGYRALIDEVNVWVLPEIADEPAGGLMLKGPQGEEKEIESRLEEGCYYLLF
+FT                   DSRTHRGANQQVRDWASYVLSPTNLVYFAEEQYQQLWFPAYGLFPRWHHARTIKSEKPA
+FT                   GLESLTLTFCQDHSEHRVIAGIMQQILASHQVTLKIKEIDYDQWHTGEIESDIWLNSAN
+FT                   FTLPLDFSVFAHLCEVPLLQHCIPIDWQADAARWRNGEMNLANWCQQLVASKAMVPLLH
+FT                   HWLIIQGQRSMRGLRMNTLGWFDFKSAWFAPPDP"
+FT   CDS             77285..78463
+FT                   /note="ORF_ID:o108#8"
+FT                   /note="similar to SwissProt Accession Number P31675"
+FT                   /transl_table=11
+FT                   /gene="yabM"
+FT                   /product="Hypothetical 42.7 kd protein in tbpA-leuD
+FT                   intergenic region (orf104)."
+FT                   /protein_id="BAB96639.1"
+FT                   /translation="MIWIMTMARRMNGVYAAFMLVAFMMGVAGALQAPTLSLFLSREVG
+FT                   AQPFWIGLFYAVNAIAGIGVSLWLAKRSDSQGDRRKLIIFCCLMAIGNALLFAFNRHYL
+FT                   TLITCGVLLASLANTAMPQLFALAREYADNSAREVVMFSSVMRAQLSLAWVIGPPLAFM
+FT                   LALNYGFTVMFSIAAGIFTLSLVLIAFMLPSVARVELPSENALSMQGGWQDSNVRMLFV
+FT                   ASTLMWTCNTMYIIDMPLWISSELGLPDKLAGFLMGTAAGLEIPAMILAGYYVKRYGKR
+FT                   RMMVIAVAAGVLFYTGLIFFNSRMALMTLQLFNAVFIGIVAGIGMLWFQDLMPGRAGAA
+FT                   TTLFTNSISTGVILAGVIQGAIAQSWGHFAVYWVIAVISVVALFLTAKVKDV"
+FT   CDS             complement(78512..79117)
+FT                   /note="ORF_ID:o108#9"
+FT                   /note="similar to PIR Accession Number S40585"
+FT                   /transl_table=11
+FT                   /gene="leuD"
+FT                   /product="Isopropylmalate isomerase subunit"
+FT                   /protein_id="BAB96640.1"
+FT                   /translation="MAEKFIKHTGLVVPLDAANVDTDAIIPKQFLQKVTRTGFGAHLFN
+FT                   DWRFLDEKGQQPNPDFVLNFPQYQGASILLARENFGCGSSREHAPWALTDYGFKVVIAP
+FT                   SFADIFYGNSFNNQLLPVKLSDAEVDELFALVKANPGIHFDVDLEAQEVKAGEKTYRFT
+FT                   IDAFRRHCMMNGLDSIGLTLQHDDAIAAYEAKQPAFMN"
+FT   CDS             complement(79128..80528)
+FT                   /db_xref="SWISS-PROT:P30127"
+FT                   /note="ORF_ID:o109#1"
+FT                   /note="similar to PIR Accession Number S40586"
+FT                   /transl_table=11
+FT                   /gene="leuC"
+FT                   /product="3-isopropylmalate dehydratase (EC 4.2.1.33) alpha
+FT                   chain"
+FT                   /protein_id="BAB96641.1"
+FT                   /translation="MAKTLYEKLFDAHVVYEAENETPLLYIDRHLVHEVTSPQAFDGLR
+FT                   AHGRPVRQPGKTFATMDHNVSTQTKGINACGEMARIQMQELIKNCKEFGVELYDLNHPY
+FT                   QGIVHVMGPEQGVTLPGMTIVCGDSHTATHGAFGALAFGIGTSEVEHVLATQTLKQGRA
+FT                   KTMKIEVQGKAAPGITAKDIVLAIIGKTGSAGGTGHVVEFCGEAIRDLSMEGRMTLCNM
+FT                   AIEMGAKAGLVAPDETTFNYVKGRLHAPKGKDFDDAVAYWKTLQTDEGATFDTVVTLQA
+FT                   EEISPQVTWGTNPGQVISVNDNIPDPASFADPVERASAEKALAYMGLKPGIPLTEVAID
+FT                   KVFIGSCTNSRIEDLRAAAEIVKGRKVAPGVQALVVPGSGPVKAQAEAEGLDKIFIEAG
+FT                   FEWRLPGCSMCLAMNNDRLNPGERCASTSNRNFEGRQGRGGRTHLVSPAMAAAAAVTGH
+FT                   FADIRNIK"
+FT   CDS             complement(80531..81622)
+FT                   /note="ORF_ID:o109#2"
+FT                   /note="similar to SwissProt Accession Number P30125"
+FT                   /transl_table=11
+FT                   /gene="leuB"
+FT                   /product="3-isopropylmalate dehydrogenase (EC  1.1.1.85)
+FT                   (beta-ipm dehydrogenase) (imdH) (3-ipm-dh)."
+FT                   /protein_id="BAB96642.1"
+FT                   /translation="MSKNYHIAVLPGDGIGPEVMTQALKVLDAVRNRFAMRITTSHYDV
+FT                   GGAAIDNHGQPLPPATVEGCEQADAVLFGSVGGPKWEHLPPDQQPERGALLPLRKHFKL
+FT                   FSNLRPAKLYQGLEAFCPLRADIAANGFDILCVRELTGGIYFGQPKGREGSGQYEKAFD
+FT                   TEVYHRFEIERIARIAFESARKRRHKVTSIDKANVLQSSILWREIVNEIATEYPDVELA
+FT                   HMYIDNATMQLIKDPSQFDVLLCSNLFGDILSDECAMITGSMGMLPSASLNEQGFGLYE
+FT                   PAGGSAPDIAGKNIANPIAQILSLALLLRYSLDADDAACAIERAINRALEEGIRTGDLA
+FT                   RGAAAVSTDEMGDIIARYVAEGV"
+FT   CDS             complement(81622..83193)
+FT                   /db_xref="SWISS-PROT:P09151"
+FT                   /note="ORF_ID:o109#3"
+FT                   /note="similar to PIR Accession Number S40588"
+FT                   /transl_table=11
+FT                   /gene="leuA"
+FT                   /product="2-Isopropylmalate synthase"
+FT                   /protein_id="BAB96643.1"
+FT                   /translation="MSQQVIIFDTTLRDGEQALQASLSVKEKLQIALALERMGVDVMEV
+FT                   GFPVSSPGDFESVQTIARQVKNSRVCALARCVEKDIDVAAESLKVAEAFRIHTFIATSP
+FT                   MHIATKLRSTLDEVIERAIYMVKRARNYTDDVEFSCEDAGRTPIADLARVVEAAINAGA
+FT                   TTINIPDTVGYTMPFEFAGIISGLYERVPSIGKAIISVHTHDDLGLAVGNSLAAVHAGA
+FT                   RQVEGAMNGIGERAGNCSLEEVIMAIKVRKDILNVHTAINHQEIWRTSQLVSQICNMPI
+FT                   PANKAIVGSGAFAHSSGIHQDGVLKNRENYEIMTPESIGLNQIQLNLTSRSGRAAVKHR
+FT                   MDEMGYKESEYNLDNLYDAFLKLADKKGQVFDYDLEALAFIGKQQEEPEHFRLDYFSVQ
+FT                   SGSNDIATAAVKLACGEEVKAEAANGNGPVDAVYQAINRITEYNVELVKYSLTAKGHGK
+FT                   DALGQVDIVANYNGRRFHGVGLATDIVESSAKAMVHVLNNIWRAAEVEKELQRKAQHNE
+FT                   NNKETV"
+FT   CDS             complement(83286..83372)
+FT                   /note="ORF_ID:o109#4"
+FT                   /note="similar to PIR Accession Number A30376"
+FT                   /transl_table=11
+FT                   /gene="leuLP"
+FT                   /product="LeuABCD leader peptide."
+FT                   /protein_id="BAB96644.1"
+FT                   /translation="MTHIVRFIGLLLLNASSLRGRRVSGIQH"
+FT   CDS             83855..84904
+FT                   /note="ORF_ID:o109#5"
+FT                   /note="similar to PIR Accession Number S40589"
+FT                   /transl_table=11
+FT                   /gene="lueO"
+FT                   /product="LeuO protein."
+FT                   /protein_id="BAB96645.1"
+FT                   /translation="MTHSTAMDSVFIRTRIFMFSEFYSFCFFLFYMHDKSYSSGLFLCI
+FT                   PIRERELSVTVELSMPEVQTDHPETAELSKPQLRMVDLNLLTVFDAVMQEQNITRAAHV
+FT                   LGMSQPAVSNAVARLKVMFNDELFVRYGRGIQPTARAFQLFGSVRQALQLVQNELPGSG
+FT                   FEPASSERVFHLCVCSPLDSILTSQIYNHIEQIAPNIHVMFKSSLNQNTEHQLRYQETE
+FT                   FVISYEDFHRPEFTSVPLFKDEMVLVASKNHPTIKGPLLKHDVYNEQHAAVSLDRFASF
+FT                   SQPWYDTVDKQASIAYQGMAMMSVLSVVSQTHLVAIAPRWLAEEFAESLELQVLPLPLK
+FT                   QNSRTCYLS"
+FT   CDS             85294..86994
+FT                   /note="ORF_ID:o109#6"
+FT                   /note="similar to PIR Accession Number S14385"
+FT                   /transl_table=11
+FT                   /gene="ilvI"
+FT                   /product="Acetolactate synthase (EC  4.1.3.18) III large
+FT                   chain."
+FT                   /protein_id="BAB96646.1"
+FT                   /translation="MEMLSGGEMVVRSLIDQGVKQVFGYPGGAVLDIYDALHTVGGIDH
+FT                   VLVRHEQAAVHMADGLARATGEVGVVLVTSGPGATNAITGIATAYMDSIPLVVLSGQVA
+FT                   TSLIGYDAFQECDMVGISRPVVKHSFLVKQTEDIPQVLKKAFWLAASGRPGPVVVDLPK
+FT                   DILNPANKLPYVWPESVSMRSYNPTTTGHKGQIKRALQSVVAVKKPVVYVGGGAITAGC
+FT                   HQQLKETVEALNLPVVCSLMGLGAFPATHRQVLGMLGMHGTYEANMTMHNADVIFAVGV
+FT                   RFDDRTTNNLAKYCPNATVLHIDIDPTSISKTVTADIPIVGDARQVLEQMLELLSQESA
+FT                   HQPLDEIRDWWQQIEQWRARQCLKYDTHSEKIKPQAVIETLWRLTKGDAYVTSDVGQHQ
+FT                   MFAALYYPFDKPRRWINSGGLGSMGFGLPAALGVKMAFPEETVVCVTGDGSIQMNIQEL
+FT                   STALQYELPVLVVNLNNRYLGMVKQWQDMIYSGRHSQSYMQSLPDFVRRGAYGHVGIQI
+FT                   SHPHGWKANLARRWNRCAIIAWCLLMLPSMAASTSTRCRFAGAEWMKCG"
+FT   CDS             87017..87508
+FT                   /note="ORF_ID:o109#7"
+FT                   /note="similar to SwissProt Accession Number P00894"
+FT                   /transl_table=11
+FT                   /gene="ilvH"
+FT                   /gene="brnP"
+FT                   /product="Acetolactate synthase isozyme III small subunit
+FT                   (EC  4.1.3.18) (ahas-III) (acetohydroxy-acid synthase III
+FT                   small subunit) (als-III)."
+FT                   /protein_id="BAB96647.1"
+FT                   /translation="MRRILSVLLENESGALSRVIGLFSQRGYNIESLTVAPTDDPTLSR
+FT                   MTIQTVGDEKVLEQIEKQLHKLVDVLRVSELGQGAHVEREIMLVKIQASGYGRDEVKRN
+FT                   TEIFRGQIIDVTPSLYTVQLAGTSGKLSAFLASIRDVAKIVEVARSGVVGLSRGDKIMR
+FT                   "
+FT   CDS             87688..88692
+FT                   /note="ORF_ID:o109#8"
+FT                   /note="similar to PIR Accession Number JU0298"
+FT                   /transl_table=11
+FT                   /gene="shl"
+FT                   /gene="fruR"
+FT                   /product="Pep-fructosephosphotransferase system repressor."
+FT                   /protein_id="BAB96648.1"
+FT                   /translation="MKLDEIARLAGVSRTTASYVINGKAKQYRVSDKTVEKVMAVVREH
+FT                   NYHPNAVAAGLRAGRTRSIGLVIPDLENTSYTRIANYLERQARQRGYQLLIACSEDQPD
+FT                   NEMRCIEHLLQRQVDAIIVSTSLPPEHPFYQRWANDPFPIVALDRALDREHFTSVVGAD
+FT                   QDDAEMLAEELRKFPAETVLYLGALPELSVSFLREQGFRTAWKDDPREVHFLYANSYER
+FT                   EAAAQLFEKWLETHPMPQALFTTSFALLQGVMDVTLRRDGKLPSDLAIATFGDNELLDF
+FT                   LQCPVLAVAQRHRDVAERVLEIVLASLDEPRKPKPGLTRIKRNLYRRGVLSRS"
+FT   CDS             89294..89752
+FT                   /note="ORF_ID:o109#9"
+FT                   /note="similar to PIR Accession Number S14388"
+FT                   /transl_table=11
+FT                   /gene="yabB"
+FT                   /product="Hypothetical protein C."
+FT                   /protein_id="BAB96649.1"
+FT                   /translation="MFRGATLVNLDSKGRLSVPTRYREQLLENAAGQMVCTIDIYHPCL
+FT                   LLYPLPEWEIIEQKLSRLSSMNPVERRVQRLLLGHASECQMDGAGRLLIAPVLRQHAGL
+FT                   TKEVMLVGQFNKFELWDETTWHQQVKEDIDAEQLATGDLSERLQDLSL"
+FT   CDS             89754..90695
+FT                   /note="ORF_ID:o109#10"
+FT                   /note="similar to SwissProt Accession Number P18595"
+FT                   /transl_table=11
+FT                   /gene="yabC"
+FT                   /product="Hypothetical 34.9 kd protein in fruR-ftsL
+FT                   intergenic region (orfB)."
+FT                   /protein_id="BAB96650.1"
+FT                   /translation="MMENYKHTTVLLDEAVNGLNIRPDGIYIDGTFGRGGHSRLILSQL
+FT                   GEEGRLLAIDRDPQAIAVAKTIDDPRFSIIHGPFSALGEYVAERDLIGKIDGILLDLGV
+FT                   SSPQLDDAERGFSFMRDGPLDMRMDPTRGQSAAEWLQTAEEADIAWVLKTYGEERFAKR
+FT                   IARAIVERNREQPMTRTKELAEVVAAATPVKDKFKHPATRTFQAVRIWVNSELEEIEQA
+FT                   LKSSLNVLAPGGRLSIISFHSLEDRIVKRFMRENSRGPQVPAGLPMTEEQLKKLGGRQL
+FT                   RALGKLMPGEEEVAENPRARSSVLRIAERTNA"
+FT   CDS             90692..91024
+FT                   /note="ORF_ID:o110#1"
+FT                   /note="similar to PIR Accession Number S14387"
+FT                   /transl_table=11
+FT                   /gene="ftsL"
+FT                   /product="Cell division protein FtsL"
+FT                   /protein_id="BAB96651.1"
+FT                   /translation="MISRVTEALSKVKGSMGSHERHALPGVIGDDLLRFGKLPLCLFIC
+FT                   IILTAVTVVTTAHHTRLLTAQREQLVLERDALDIEWRNLILEENALGDHSRVERIATEK
+FT                   LQMQTC"
+FT   CDS             91074..92840
+FT                   /note="ORF_ID:o110#2"
+FT                   /note="similar to PIR Accession Number A93123"
+FT                   /transl_table=11
+FT                   /gene="ftsI"
+FT                   /gene="pbpB"
+FT                   /product="Penicillin-binding protein 3 precursor."
+FT                   /protein_id="BAB96652.1"
+FT                   /translation="MKAAAKTQKPKRQEEHANFISWRFALLCGCILLALAFLLGRVAWL
+FT                   QVISPDMLVKEGDMRSLRVQQVSTSRGMITDRSGRPLAVSVPVKAIWADPKEVHDAGGI
+FT                   SVGDRWKALANALNIPLDQLSARINANPKGRFIYLARQVNPDMADYIKKLKLPGIHLRE
+FT                   ESRRYYPSGEVTAHLIGFTNVDSQGIEGVEKSFDKWLTGQPGERIVRKDRYGRVIEDIS
+FT                   STDSQAAHNLALSIDERLQALVYRELNNAVAFNKAESGSAVLVDVNTGEVLAMANSPSY
+FT                   NPNNLSGTPKEAMRNRTITDVFEPGSTVKPMVVMTALQRGVVRENSVLNTIPYRINGHE
+FT                   IKDVARYSELTLTGVLQKSSNVGVSKLALAMPSSALVDTYSRFGLGKATNLGLVGERSG
+FT                   LYPQKQRWSDIERATFSFGYGLMVTPLQLARVYATIGSYGIYRPLSITKVDPPVPGERV
+FT                   FPESIVRTVVHMMESVALPGGGGVKAAIKGYRIAIKTGTAKKVGPDGRYINKYIAYTAG
+FT                   VAPASQPRFALVVVINDPQAGKYYGGAVSAPVFGAIMGGVLRTMNIEPDALTTGDKNEF
+FT                   VINQGEGTGGRS"
+FT   CDS             92827..94314
+FT                   /note="ORF_ID:o110#3"
+FT                   /note="similar to PIR Accession Number S40595"
+FT                   /transl_table=11
+FT                   /gene="murE"
+FT                   /product="UDP-N-acetylmuramoylalanyl-D-glutamate--2,
+FT                   6-diaminopimelate ligase (EC 6.3.2.13) murE"
+FT                   /protein_id="BAB96653.1"
+FT                   /translation="MADRNLRDLLAPWVPDAPSRALREMTLDSRVAAAGDLFVAVVGHQ
+FT                   ADGRRYIPQAIAQGVAAIIAEAKDEATDGEIREMHGVPVIYLSQLNERLSALAGRFYHE
+FT                   PSDNLRLVGVTGTNGKTTTTQLLAQWSQLLGEISAVMGTVGNGLLGKVIPTENTTGSAV
+FT                   DVQHELAGLVDQGATFCAMEVSSHGLVQHRVAALKFAASVFTNLSRDHLDYHGDMEHYE
+FT                   AAKWLLYSEHHCGQAIINADDEVGRRWLAKLPDAVAVSMEDHINPNCHGRWLKATEVNY
+FT                   HDSGATIRFSSSWGDGEIESHLMGAFNVSNLLLALATLLALGYPLADLLKTAARLQPVC
+FT                   GRMEVFTAPGKPTVVVDYAHTPDALEKALQAARLHCAGKLWCVFGCGGDRDKGKRPLMG
+FT                   AIAEEFADVAVVTDDNPRTEEPRAIINDILAGMLDAGHAKVMEGRAEAVTCAVMQAKEN
+FT                   DVVLVAGKGHEDYQIVGNQRLDYSDRVTVARLLGVIA"
+FT   CDS             94311..95669
+FT                   /note="ORF_ID:o110#4"
+FT                   /note="similar to SwissProt Accession Number P11880"
+FT                   /transl_table=11
+FT                   /gene="murF"
+FT                   /gene="mra"
+FT                   /product="UDP-n-acetylmuramoylalanyl-d-glutamyl-2,
+FT                   6-diaminopimelate-d-alanyl-d-alanyl ligase (EC 6.3.2.15)
+FT                   (UDP-murnac-pentapeptide synthetase)
+FT                   (d-alanyl-d-alanine-adding enzyme)."
+FT                   /protein_id="BAB96654.1"
+FT                   /translation="MISVTLSQLTDILNGELQGADITLDAVTTDTRKLTPGCLFVALKG
+FT                   ERFDAHDFADQAKAGAAGALLVSRPLDIDLPQLIVKDTRLAFGELAAWVRQQVPARVVA
+FT                   LTGSSGKTSVKEMTAAILSQCGNTLYTAGNLNNDIGVPMTLLRLTPEYDYAVIELGANH
+FT                   QGEIAWTVSLTRPERALVNNLAAAHLEGFGSLAGVAKAKGEIFSGLPENGIAIMNADNN
+FT                   DWLNWQSVIGSRKVWRFSPNAANSDFTATNIHVTSHGTEFTLQTPTGSVDVLLPLPGRH
+FT                   NIANALAAAALSMSVGATLDAIKAGLANLKAVPGRLFPIQLAENQLLLDDSYNANVGSM
+FT                   TAAVQVLAEMPGYRVLVVGDMAELGAESEACHVQVGEAAKAAGIDRVLSVGKQSHAIST
+FT                   ASGVGEHFADKTALITRLKLLIAEQQVITILVKGSRSAAMEEVVRALQENGTC"
+FT   CDS             95663..96745
+FT                   /note="ORF_ID:o110#5"
+FT                   /note="similar to SwissProt Accession Number P15876"
+FT                   /transl_table=11
+FT                   /gene="mraY"
+FT                   /gene="murX"
+FT                   /product="Phospho-n-acetylmuramoyl-pentapeptide-transferas
+FT                   e (EC  2.7.8.13)."
+FT                   /protein_id="BAB96655.1"
+FT                   /translation="MLVWLAEHLVKYYSGFNVFSYLTFRAIVSLLTALFISLWMGPRMI
+FT                   AHLQKLSFGQVVRNDGPESHFSKRGTPTMGGIMILTAIVISVLLWAYPSNPYVWCVLVV
+FT                   LVGYGVIGFVDDYRKVVRKDTKGLIARWKYFWMSVIALGVAFALYLAGKDTPATQLVVP
+FT                   FFKDVMPQLGLFYILLAYFVIVGTGNAVNLTDGLDGLAIMPTVFVAGGFALVAWATGNM
+FT                   NFASYLHIPYLRHAGELVIVCTAIVGAGLGFLWFNTYPAQVFMGDVGSLALGGALGIIA
+FT                   VLLRQEFLLVIMGGVFVVETLSVILQVGSFKLRGQRIFRMAPIHHHYELKGWPEPRVIV
+FT                   RFWIISLMLVLIGLATLKVR"
+FT   CDS             96748..98064
+FT                   /note="ORF_ID:o110#6"
+FT                   /note="similar to PIR Accession Number S08396"
+FT                   /transl_table=11
+FT                   /gene="murD"
+FT                   /product="UDP-n-acetylmuramoylalanine-d-glutamate ligase
+FT                   (EC  6.3.2.9)."
+FT                   /protein_id="BAB96656.1"
+FT                   /translation="MADYQGKNVVIIGLGLTGLSCVDFFLARGVTPRVMDTRMTPPGLD
+FT                   KLPEAVERHTGSLNDEWLMAADLIVASPGIALAHPSLSAAADAGIEIVGDIELFCREAQ
+FT                   APIVAITGSNGKSTVTTLVGEMAKAAGVNVGVGGNIGLPALMLLDDECELYVLELSSFQ
+FT                   LETTSSLQAVAATILNVTEDHMDRYPFGLQQYRAAKLRIYENAKVCVVNADDALTMPIR
+FT                   GADERCVSFGVNMGDYHLNHQQGETWLRVKGEKVLNVKEMKLSGQHNYTNALAALALAD
+FT                   AAGLPRASSLKALTTFTGLPHRFEVVLEHNGVRWINDSKATNVGSTEAALNGLHVDGTL
+FT                   HLLLGGDGKSADFSPLARYLNGDNVRLYCFGRDGAQLAALRPEVAEQTETMEQAMRLLA
+FT                   PRVQPGDMVLLSPACASLDQFKNFEQRGNEFARLAKELG"
+FT   CDS             98064..99308
+FT                   /note="ORF_ID:o111#1"
+FT                   /note="similar to PIR Accession Number A32581"
+FT                   /transl_table=11
+FT                   /gene="ftsW"
+FT                   /product="Cell division protein FtsW."
+FT                   /protein_id="BAB96657.1"
+FT                   /translation="MRLSLPRLKMPRLPGFSILVWISTALKGWVMGSREKDTDSLIMYD
+FT                   RTLLWLTFGLAAIGFIMVTSASMPIGQRLTNDPFFFAKRDGVYLILAFILAIITLRLPM
+FT                   EFWQRYSATMLLGSIILLMIVLVVGSSVKGASRWIDLGLLRIQPAELTKLSLFCYIANY
+FT                   LVRKGDEVRNNLRGFLKPMGVILVLAVLLLAQPDLGTVVVLFVTTLAMLFLAGAKLWQF
+FT                   IAIIGMGISAVVLLILAEPYRIRRVTAFWNPWEDPFGSGYQLTQSLMAFGRGELWGQGL
+FT                   GNSVQKLEYLPEAHTDFIFAIIGEELGYVGVVLALLMVFFVAFRAMSIGRKALEIDHRF
+FT                   SGFLACSIGIWFSFQALVNVGAAAGMLPTKGLTLPLISYGGSSLLIMSTAIMMLLRIDY
+FT                   ETRLEKAQAFVRGSR"
+FT   CDS             99305..100372
+FT                   /note="ORF_ID:o111#2"
+FT                   /note="similar to PIR Accession Number JQ0544"
+FT                   /transl_table=11
+FT                   /gene="murG"
+FT                   /product="MurG protein."
+FT                   /protein_id="BAB96658.1"
+FT                   /translation="MSGQGKRLMVMAGGTGGHVFPGLAVAHHLMAQGWQVRWLGTADRM
+FT                   EADLVPKHGIEIDFIRISGLRGKGIKALIAAPLRIFNAWRQARAIMKAYKPDVVLGMGG
+FT                   YVSGPGGLAAWSLGIPVVLHEQNGIAGLTNKWLAKIATKVMQAFPGAFPNAEVVGNPVR
+FT                   TDVLALPLPQQRLAGREGPVRVLVVGGSQGARILNQTMPQVAAKLGDSVTIWHQSGKGS
+FT                   QQSVEQAYAEAGQPQHKVTEFIDDMAAAYAWADVVVCRSGALTVSEIAAAGLPALFVPF
+FT                   QHKDRQQYWNALPLEKAGAAKIIEQPQLSVDAVANTLAGWSRETLLTMAERARAASIPD
+FT                   ATERVANEVSRVARA"
+FT   CDS             100426..101901
+FT                   /note="ORF_ID:o111#3"
+FT                   /note="similar to PIR Accession Number JQ0545"
+FT                   /transl_table=11
+FT                   /gene="murC"
+FT                   /product="UDP-n-acetylmuramate-alanine ligase (EC
+FT                   6.3.2.8)."
+FT                   /protein_id="BAB96659.1"
+FT                   /translation="MNTQQLAKLRSIVPEMRRVRHIHFVGIGGAGMGGIAEVLANEGYQ
+FT                   ISGSDLAPNPVTQQLMNLGATIYFNHRPENVRDASVVVVSSAISADNPEIVAAHEARIP
+FT                   VIRRAEMLAELMRFRHGIAIAGTHGKTTTTAMVSSIYAEAGLDPTFVNGGLVKAAGVHA
+FT                   RLGHGRYLIAEADESDASFLHLQPMVAIVTNIEADHMDTYQGDFENLKQTFINFLHNLP
+FT                   FYGRAVMCVDDPVIRELLPRVGRQTTTYGFSEDADVRVEDYQQIGPQGHFTLLRQDKEP
+FT                   MRVTLNAPGRHNALNAAAAVAVATEEGIDDEAILRALESFQGTGRRFDFLGEFPLEPVN
+FT                   GKSGTAMLVDDYGHHPTEVDATIKAARAGWPDKNLVMLFQPHRFTRTRDLYDDFANVLT
+FT                   QVDTLLMLEVYPAGEAPIPGADSRSLCRTIRGRGKIDPILVPDPARVAEMLAPVLTGND
+FT                   LILVQGAGNIGKIARSLAEIKLKPQTPEEEQHD"
+FT   CDS             101894..102814
+FT                   /note="ORF_ID:o111#4"
+FT                   /note="similar to PIR Accession Number A30289"
+FT                   /transl_table=11
+FT                   /gene="ddl"
+FT                   /gene="ddlB"
+FT                   /product="D-alanine-d-alanine ligase (EC  6.3.2.4) B."
+FT                   /protein_id="BAB96660.1"
+FT                   /translation="MTDKIAVLLGGTSAEREVSLNSGAAVLAGLREGGIDAYPVDPKEV
+FT                   DVTQLKSMGFQKVFIALHGRGGEDGTLQGMLELMGLPYTGSGVMASALSMDKLRSKLLW
+FT                   QGAGLPVAPWVALTRAEFEKGLSDKQLAEISALGLPVIVKPSREGSSVGMSKVVAENAL
+FT                   QDALRLAFQHDEEVLIEKWLSGPEFTVAILGEEILPSIRIQPSGTFYDYEAKYLSDETQ
+FT                   YFCPAGLEASQEANLQALVLKAWTTLGCKGWGRIDVMLDSDGQFYLLEANTSPGMTSHS
+FT                   LVPMAARQAGMSFSQLVVRILELAD"
+FT   CDS             102816..103646
+FT                   /note="ORF_ID:o111#5"
+FT                   /note="similar to PIR Accession Number S10852"
+FT                   /transl_table=11
+FT                   /gene="ftsQ"
+FT                   /product="Cell division protein FtsQ."
+FT                   /protein_id="BAB96661.1"
+FT                   /translation="MSQAALNTRNSEEEVSSRRNNGTRLAGILFLLTVLTTVLVSGWVV
+FT                   LGWMEDAQRLPLSKLVLTGERHYTRNDDIRQSILALGEPGTFMTQDVNIIQTQIEQRLP
+FT                   WIKQVSVRKQWPDELKIHLVEYVPIARWNDQHMVDAEGNTFSVPPERTSKQVLPMLYGP
+FT                   EGSANEVLQGYREMGQMLAKDRFTLKEAAMTARRSWQLTLNNDIKLNLGRGDTMKRLAR
+FT                   FVELYPVLQQQAQTDGKRISYVDLRYDSGAAVGWAPLPPEESTQQQNQAQAEQQ"
+FT   CDS             103643..104905
+FT                   /note="ORF_ID:o111#6"
+FT                   /note="similar to SwissProt Accession Number P06137"
+FT                   /transl_table=11
+FT                   /gene="ftsA"
+FT                   /gene="divA"
+FT                   /product="Cell division protein FtsA."
+FT                   /protein_id="BAB96662.1"
+FT                   /translation="MIKATDRKLVVGLEIGTAKVAALVGEVLPDGMVNIIGVGSCPSRG
+FT                   MDKGGVNDLESVVKCVQRAIDQAELMADCQISSVYLALSGKHISCQNEIGMVPISEEEV
+FT                   TQEDVENVVHTAKSVRVRDEHRVLHVIPQEYAIDYQEGIKNPVGLSGVRMQAKVHLITC
+FT                   HNDMAKNIVKAVERCGLKVDQLIFAGLASSYSVLTEDERELGVCVVDIGGGTMDIAVYT
+FT                   GGALRHTKVIPYAGNVVTSDIAYAFGTPPSDAEAIKVRHGCALGSIVGKDESVEVPSVG
+FT                   GRPPRSLQRQTLAEVIEPRYTELLNLVNEEILQLQEKLRQQGVKHHLAAGIVLTGGAAQ
+FT                   IEGLAACAQRVFHTQVRIGAPLNITGLTDYAQEPYYSTAVGLLHYGKESHLNGEAEVEK
+FT                   RVTASVGSWIKRLNSWLRKEF"
+FT   CDS             104966..106117
+FT                   /note="ORF_ID:o111#7"
+FT                   /note="similar to SwissProt Accession Number P06138"
+FT                   /transl_table=11
+FT                   /gene="ftsZ"
+FT                   /gene="sfiB"
+FT                   /gene="sulB"
+FT                   /product="Cell division protein FtsZ."
+FT                   /protein_id="BAB96663.1"
+FT                   /translation="MFEPMELTNDAVIKVIGVGGGGGNAVEHMVRERIEGVEFFAVNTD
+FT                   AQALRKTAVGQTIQIGSGITKGLGAGANPEVGRNAADEDRDALRAALEGADMVFIAAGM
+FT                   GGGTGTGAAPVVAEVAKDLGILTVAVVTKPFNFEGKKRMAFAEQGITELSKHVNSLITI
+FT                   PNDKLLKVLGRGISLLDAFGAANDVLKGAVQGIAELITRPGLMNVDFADVRTVMSEMGH
+FT                   AMMGSGVASGEDRAEEAAEMAISSPLLEDIDLSGARGVLVNITAGFDLRLDEFETVGNT
+FT                   IRAFASDNATVVIGTSLDPDMNDELRVTVVATGIGMDKRPEITLVTNKQVQQPVMDRYQ
+FT                   QHGMAPLTQEQKPVAKVVNDNAPQTAKEPDYLDIPAFLRKQAD"
+FT   CDS             106218..107135
+FT                   /note="ORF_ID:o111#8"
+FT                   /note="similar to SwissProt Accession Number P07652"
+FT                   /transl_table=11
+FT                   /gene="envA"
+FT                   /gene="asmB"
+FT                   /gene="lpxC"
+FT                   /product="Udp-3-o-[3-hydroxymyristoyl] n-acetylglucosamine
+FT                   deacetylase (EC  3.5.1.-) (EnvA protein)."
+FT                   /protein_id="BAB96664.1"
+FT                   /translation="MIKQRTLKRIVQATGVGLHTGKKVTLTLRPAPANTGVIYRRTDLN
+FT                   PPVDFPADAKSVRDTMLCTCLVNEHDVRISTVEHLNAALAGLGIDNIVIEVNAPEIPIM
+FT                   DGSAAPFVYLLLDAGIDELNCAKKFVRIKETVRVEDGDKWAEFKPYNGFSLDFTIDFNH
+FT                   PAIDSSNQRYAMNFSADAFMRQISRARTFGFMRDIEYLQSRGLCLGGSFDCAIVVDDYR
+FT                   VLNEDGLRFEDEFVRHKMLDAIGDLFMCGHNIIGAFTAYKSGHALNNKLLQAVLAKQEA
+FT                   WEYVTFQDDAELPLAFKAPSAVLA"
+FT   CDS             107435..107878
+FT                   /note="ORF_ID:o111#9"
+FT                   /note="similar to PIR Accession Number B28381"
+FT                   /transl_table=11
+FT                   /gene="yacA"
+FT                   /product="Hypothetical 16k protein (eneA-secA intergenic
+FT                   region)."
+FT                   /protein_id="BAB96665.1"
+FT                   /translation="MVAASLGLPALSNAAEPNAPAKATTRNHEPSAKVNFGQLALLEAN
+FT                   TRRPNSNYSVDYWHQHAIRTVIRHLSFAMAPQTLPVAEESLPLQAQHLALLDTLSALLT
+FT                   QEGTPSEKGYRIDYAHFTPQAKFSTPVWISQAQGIRAGPQRLT"
+FT   CDS             107940..110645
+FT                   /note="ORF_ID:o111#10"
+FT                   /note="similar to SwissProt Accession Number P10408"
+FT                   /transl_table=11
+FT                   /gene="secA"
+FT                   /gene="azi"
+FT                   /gene="pea"
+FT                   /gene="prlD"
+FT                   /product="Preprotein translocase SecA subunit."
+FT                   /protein_id="BAB96666.1"
+FT                   /translation="MLIKLLTKVFGSRNDRTLRRMRKVVNIINAMEPEMEKLSDEELKG
+FT                   KTAEFRARLEKGEVLENLIPEAFAVVREASKRVFGMRHFDVQLLGGMVLNERCIAEMRT
+FT                   GEGKTLTATLPAYLNALTGKGVHVVTVNDYLAQRDAENNRPLFEFLGLTVGINLPGMPA
+FT                   PAKREAYAADITYGTNNEYGFDYLRDNMAFSPEERVQRKLHYALVDEVDSILIDEARTP
+FT                   LIISGPAEDSSEMYKRVNKIIPHLIRQEKEDSETFQGEGHFSVDEKSRQVNLTERGLVL
+FT                   IEELLVKEGIMDEGESLYSPANIMLMHHVTAALRAHALFTRDVDYIVKDGEVIIVDEHT
+FT                   GRTMQGRRWSDGLHQAVEAKEGVQIQNENQTLASITFQNYFRLYEKLAGMTGTADTEAF
+FT                   EFSSIYKLDTVVVPTNRPMIRKDLPDLVYMTEAEKIQAIIEDIKERTAKGQPVLVGTIS
+FT                   IEKSELVSNELTKAGIKHNVLNAKFHANEAAIVAQAGYPAAVTIATNMAGRGTDIVLGG
+FT                   SWQAEVAALENPTAEQIEKIKADWQVRHDAVLEAGGLHIIGTERHESRRIDNQLRGRSG
+FT                   RQGDAGSSRFYLSMEDALMRIFASDRVSGMMRKLGMKPGEAIEHPWVTKAIANAQRKVE
+FT                   SRNFDIRKQLLEYDDVANDQRRAIYSQRNELLDVSDVSETINSIREDVFKATIDAYIPP
+FT                   QSLEEMWDIPGLQERLKNDFDLDLPIAEWLDKEPELHEETLRDGILAQSIEVYQRKEEV
+FT                   VGAEMMRHFEKGVMLQTLDSLWKEHLAAMDYLRQGIHLRGYAQKDPKQEYKRESFSMFA
+FT                   AMLESLKYEVISTLSKVQVRMPEEVEELEQQRRMEAERLAQMQQLSHQDDDSAAAAALA
+FT                   AQTGERKVGRNDPCPCGSGKKYKQCHGRLQ"
+FT   CDS             110705..111094
+FT                   /note="ORF_ID:o111#11"
+FT                   /note="similar to PIR Accession Number A27890"
+FT                   /transl_table=11
+FT                   /gene="mutT"
+FT                   /product="Mutator MutT (AT-GC transversion)."
+FT                   /protein_id="BAB96667.1"
+FT                   /translation="MKKLQIAVGIIRNENNEIFITRRAADAHMANKLEFPGGKIEMGET
+FT                   PEQAVVRELQEEVGITPQHFSLFEKLEYEFPDRHITLWFWLVERWEGEPWGKEGQPGEW
+FT                   MSLVGLNADDFPPANEPVIAKLKRL"
+FT   CDS             complement(111310..>111408)
+FT                   /note="ORF_ID:o111#12"
+FT                   /note="similar to SwissProt Accession Number P36681"
+FT                   /transl_table=11
+FT                   /gene="yacG"
+FT                   /product="Hypothetical 5.8 kd protein in mutT-guaC
+FT                   intergenic region."
+FT                   /protein_id="BAB96668.1"
+FT                   /translation="LIDLGEWAAEEKRIPSSGDLSESDDWSEEPKQ"
+XX
+SQ   Sequence 111408 BP; 26083 A; 28055 C; 30531 G; 26739 T; 0 other;
+     atgcgagtgt tgaagttcgg cggtacatca ttggcaaatg cagaacgttt tctgcgtgtt        60
+     gccgatattc tggaaagcaa tgccaggcag gggcaggtgg ccaccgtcct ctctgcccct       120
+     gccaaaatca ccaaccacct ggtggcgatg attgaaaaaa ccattagcgg ccaggatgct       180
+     ttacccaata tcagcgatgc cgaacgtatt tttgccgaac ttttgacggg actcgccgcc       240
+     gcccagccgg ggttcccgtt ggcgcaattg aaaacattcg tcgatcagga atttgcccaa       300
+     ataaaacatg tcctgcatgg cattagtttg ttggggcagt gcccggatag catcaacgct       360
+     gcgctgattt gccgtggcga gaagatgtcg atcgccatta tggccggcgt attagaagcg       420
+     cgcggtcaca acgttactgt tatcgatccg gtcgaaaaac tgctggcagt ggggcattac       480
+     ctcgaatcca ccgtcgatat tgccgagtcc acccgccgta tagcggcaag ccgcattccg       540
+     gctgatcaca tggtgctgat ggcaggtttc accgccggta atgagaaagg cgaactggtg       600
+     gtgcttggac gcaacggttc cgactactct gctgcggtgc tggctgcctg tttacgcgcc       660
+     gattgttgcg agatttggac ggacgttaac ggggtctata cctgcgaccc gcgtcaggtg       720
+     cccgacgcga ggttgttgaa gtcgatgtcc taccaggaag cgatggagct ttcctacttc       780
+     ggcgctaaag ttcttcaccc ccgcaccatt acccccatcg cccagttcca gatcccttgc       840
+     ctgattaaaa ataccggaaa tcctcaagca ccaggtacgc tcattggtgc aagccgtgat       900
+     gaagacgaat taccggtcaa gggcatttcc aatctgaata acatggcaat gttcagcgtt       960
+     tctggtccgg ggatgaaagg gatggtcggc atggcggcgc gcgtctttgc agcgatgtca      1020
+     cgcgcccgta tttccgtggt gctgattacg caatcatctt ccgaatacag catcagtttc      1080
+     tgcgttccac aaagcgactg tgtgcgagct gaacgggcaa tgctggaaga gttctaccta      1140
+     gaactgaaag aaggcttact ggagccgctg gcagtggcgg aacggctggc cattatctcg      1200
+     gtggtaggtg atggtttgcg caccttgcgt gggatctcgg cgaaattctt tgccgcactg      1260
+     gcccgcgcca atatcaacat tgtcgccatt gctcagggat cttctgaacg ctcaatctct      1320
+     gtcgtggtaa ataacgatga tgcgaccact ggcgtgcgcg ttactcatca gatgctgttc      1380
+     aataccgatc aggttatcga agtgtttgtg attggcgtcg gtggcgttgg cggtgcgctg      1440
+     ctggagcaac tgaagcgtca gcaaagctgg ctgaagaata aacatatcga cttacgtgtc      1500
+     tgcggtgttg ccaactcgaa ggctctgctc accaatgtac atggccttaa tctggaaaac      1560
+     tggcaggagg aactggcgca agccaaagag ccgtttaatc tcgggcgctt aattcgcctc      1620
+     gtgaaagaat atcatctgct gaacccggtc attgttaact gcacttccag ccaggcagtg      1680
+     gcggatcaat atgccgactt cctgcgcgaa ggtttccacg ttgtcacgcc gaacaaaaag      1740
+     gccaacacct cgtcgatgga ttactaccat cagttgcgtt atgcggcgga aaaatcgcgg      1800
+     cgtaaattcc tctatgacat caacgttggg gctggattac cggttattga gaacctgcaa      1860
+     aatctgctca atgcaggtga tgaattgatg aaattctccg gcattctttc tggttcgctt      1920
+     tcttatatct tcggcaagtt agacgaaggc atgagtttct ccgaggcgac ccggctggcg      1980
+     cgggaaatgg gttataccga accggacccg cgagatgatc tttctggtat ggatgtggcg      2040
+     cgtaaactat tgattctcgc tcgtgaaacg ggacgtgaac tggagctggc ggatattgaa      2100
+     attgaacctg tgctgcccgc agagtttaac gccgagggtg atgttgccgc ttttatggcg      2160
+     aatctgtcac aactcgacga tctctttgcc gcgcgcgtgg cgaaggcccg tgatgaagga      2220
+     aaagttttgc gctatgttgg caatattgat gaagatggcg tctgccgcgt gaagattgcc      2280
+     gaagtggatg gtaatgatcc gctgttcaaa gtgaaaaatg gcgaaaacgc cctggccttc      2340
+     tatagccact attatcagcc gctgccgttg gtactgcgcg gatatggtgc gggcaatgac      2400
+     gttacagctg ccggtgtctt tgctgatctg ctacgtaccc tctcatggaa gttaggagtc      2460
+     tgacatggtt aaagtttatg ccccggcttc cagtgccaat atgagcgtcg ggtttgatgt      2520
+     gctcggggcg gcggtgacac ctgttgatgg tgcattgctc ggagatgtag tcacggttga      2580
+     ggcggcacag acattcagtc tcaacaacct cggacgcttt gccgataagc tgccgtcaga      2640
+     accacgggaa aatatcgttt atcagtgctg ggagcgtttt tgccaggaac tgggtaagca      2700
+     aattccagtg gcgatgaccc tggaaaagaa tatgccgatc ggttcgggct taggctccag      2760
+     tgcctgttcg gtggtcgcgg cgctgatggc gatgaatgaa cactgcggca agccgcttaa      2820
+     tgacactcgt ttgctggctt tgatgggcga gctggaaggc cgtatctccg gcagcattca      2880
+     ttacgacaac gtggcaccgt gttttctcgg tggtatgcag ttgatgatcg aagaaaacga      2940
+     catcatcagc cagcaagtgc agggtttgat gagtggctgt gggtgctcgc gtatccgggg      3000
+     attaaagtct cgacggcaga agcagggcta tttaccggcg cagtatcgcc gccaggattg      3060
+     cattgcgcac gggcgacatc tggcaggctt cattcacgcc tgctattccc gtcagcctga      3120
+     gcttgccgcg aagctgatga aagatgttat cgctgaaccc taccgtgaac ggttactgcc      3180
+     aggcttccgg caggcgcggc aggcggtcgc ggaaatcggc gcggtagcga gcggtatctc      3240
+     cggctccggc ccgaccttgt tcgctctgtg tgacaagccg gaaaccgccc agcgcgttgc      3300
+     cgactggttg ggtaagaact acctgcaaaa tcaggaaggt tttgttcata tttgccggct      3360
+     ggatacggcg ggcgcacgag tactggaaaa ctaaatgaaa ctctacaatc tgaaagatca      3420
+     caacgagcag gtcagctttg cgcaagccgt aacccagggg ttgggcaaaa atcaggggct      3480
+     gttttttccg cacgacctgc cggaattcag cctgactgaa attgatgaga tgctgaagct      3540
+     ggattttgtc acccgcagtg cgaagatcct ctcggcgttt attggtgatg aaatcccaca      3600
+     ggaaatcctg gaagagcgcg tgcgcgcggc gtttgccttc ccggctccgg tcgccaatgt      3660
+     tgaaagcgat gtcggttgtc tggaattgtt ccacgggcca acgctggcat ttaaagattt      3720
+     cggcggtcgc tttatggcac aaatgctgac ccatattgcg ggtgataagc cagtgaccat      3780
+     tctgaccgcg acctccggtg ataccggagc ggcagtggct catgctttct acggtttacc      3840
+     gaatgtgaaa gtggttatcc tctatccacg aggcaaaatc agtccactgc aagaaaaact      3900
+     gttctgtaca ttgggcggca atatcgaaac tgttgccatc gacggcgatt tcgatgcctg      3960
+     tcaggcgctg gtgaagcagg cgtttgatga tgaagaactg aaagtggcgc tagggttaaa      4020
+     ctcggctaac tcgattaaca tcagccgttt gctggcgcag atttgctact actttgaagc      4080
+     tgttgcgcag ctgccgcagg agacgcgcaa ccagctggtt gtctcggtgc caagcggaaa      4140
+     cttcggcgat ttgacggcgg gtctgctggc gaagtcactc ggtctgccgg tgaaacgttt      4200
+     tattgctgcg accaacgtga acgataccgt gccacgtttc ctgcacgacg gtcagtggtc      4260
+     acccaaagcg actcaggcga cgttatccaa cgcgatggac gtgagtcagc cgaacaactg      4320
+     gccgcgtgtg gaagagttgt tccgccgcaa aatctggcaa ctgaaagagc tgggttatgc      4380
+     agccgtggat gatgaaacca cgcaacagac aatgcgtgag ttaaaagaac tgggctacac      4440
+     ttcggagccg cacgctgccg tagcttatcg tgcgctgcgt gatcagttga atccaggcga      4500
+     atatggcttg ttcctcggca ccgcgcatcc ggcgaaattt aaagagagcg tggaagcgat      4560
+     tctcggtgaa acgttggatc tgccaaaaga gctggcagaa cgtgctgatt tacccttgct      4620
+     ttcacataat ctgcccgccg attttgctgc gttgcgtaaa ttgatgatga atcatcagta      4680
+     aaatctattc attatctcaa tcaggccggg tttgctttta tgcagcccgg cttttttatg      4740
+     aagaaattat ggagaaaaat gacagggaaa aaggagaaat tctcaataaa tgcggtaact      4800
+     tagagattag gattgcggag aataacaacc gccgttctca tcgagtaatc tccggatatc      4860
+     gacccataac gggcaatgat aaaaggagta acctgtgaaa aagatgcaat ctatcgtact      4920
+     cgcactttcc ctggttctgg tcgctcccat ggcagcacag gctgcggaaa ttacgttagt      4980
+     cccgtcagta aaattacaga taggcgatcg tgataatcgt ggctattact gggatggagg      5040
+     tcactggcgc gaccacggct ggtggaaaca acattatgaa tggcgaggca atcgctggca      5100
+     cctacacgga ccgccgccac cgccgcgcca ccataagaaa gctcctcatg atcatcacgg      5160
+     cggtcatggt cctggcaaac atcaccgcta aatgacaaat gccgggtaac aatccggcat      5220
+     tcagcgcctg atgcgacgct ggcgcgtctt atcaggccta cgttaattct gcaatatatt      5280
+     gaatctgcat gcttttgtag gcaggataag gcgttcacgc cgcatccggc attgactgca      5340
+     aacttaacgc tgctcgtagc gtttaaacac cagttcgcca ttgctggagg aatcttcatc      5400
+     aaagaagtaa ccttcgctat taaaaccagt cagttgctct ggtttggtca gccgattttc      5460
+     aataatgaaa cgactcatca gaccgcgtgc tttcttagcg tagaagctga tgatcttaaa      5520
+     tttgccgttc ttctcatcga ggaacaccgg cttgataatc tcggcattca atttcttcgg      5580
+     cttcaccgat ttaaaatact catctgacgc cagattaatc accacattat cgccttgtgc      5640
+     tgcgagcgcc tcgttcagct tgttggtgat gatatctccc cagaattgat acagatcttt      5700
+     ccctcgggca ttctcaagac ggatccccat ttccagacga taaggctgca ttaaatcgag      5760
+     cgggcggagt acgccataca agccggaaag cattcgcaaa tgctgttggg caaaatcgaa      5820
+     atcgtcttcg ctgaaggttt cggcctgcaa gccggtgtag acatcacctt taaacgccag      5880
+     aatcgcctgg cgggcattcg ccggcgtgaa atctggctgc cagtcatgaa agcgagcggc      5940
+     gttgataccc gccagtttgt cgctgatgcg catcagcgtg ctaatctgcg gaggcgtcag      6000
+     tttccgcgcc tcatggatca actgctggga attgtctaac agctccggca gcgtatagcg      6060
+     cgtggtggtc aacgggcttt ggtaatcaag cgttttcgca ggtgaaataa gaatcagcat      6120
+     atccagtcct tgcaggaaat ttatgccgac tttagcaaaa aatgagaatg agttgatcga      6180
+     tagttgtgat tactcctgcg aaacatcatc ccacgcgtcc ggagaaagct ggcgaccgat      6240
+     atccggataa cgcaatggat caaacaccgg gcgcacgccg agtttacgct ggcgtagata      6300
+     atcactggca atggtatgaa ccacaggcga gagcagtaaa atggcggtca aattggtaat      6360
+     agccatgcag gccattatga tatctgccag ttgccacatc agcggaaggc ttagcaaggt      6420
+     gccgccgatg accgttgcga aggtgcagat ccgcaaacac cagatcgctt tagggttgtt      6480
+     caggcgtaaa aagaagagat tgttttcggc ataaatgtag ttggcaacga tggagctgaa      6540
+     ggcaaacaga ataaccacaa gggtaacaaa ctcagcaccc caggaaccca ttagcacccg      6600
+     catcgccttc tggataagct gaataccttc cagcggcatg taggttgtgc cgttacccgc      6660
+     cagtaatatc agcatggcgc ttgccgtaca gatgaccagg gtgtcgataa aaatgccaat      6720
+     catctggaca atcccttgcg ctgccggatg cggaggccag gacgccgctg ccgctgccgc      6780
+     gtttggcgtc gaacccattc ccgcctcatt ggaaaacata ctgcgctgaa aaccgttagt      6840
+     aatcgcctgg cttaaggtat atcccgccgc gccgcctgcc gcttcctgcc agccaaaagc      6900
+     actctcaaaa atagaccaaa tgacgtgggg aagttgcccg atattcatta cgcaaattac      6960
+     caggctggtc agtacccaga ttatcgccat caacgggaca aagccctgca tgagccgggc      7020
+     gacgccatga agaccgcgag tgattgccag cagagtaaag acagcgagaa taatgcctgt      7080
+     caccagcggg ggaaaatcaa aagaaaaact cagggcgcgg gcaacggcgt tcgcttgaac      7140
+     tccgctgaaa attatgccat aggcgatgag caaaaagacg gcgaacagaa cgcccatcca      7200
+     gcgcatcccc agcccgcgcg ccatatacca tgccggtccg ccacgaaact gcccattgac      7260
+     gtcacgttct ttataaagtt gtgccagaga acattcggca aacgaggtcg ccatgccgat      7320
+     aaacgcggca acccacatcc aaaagacggc tccaggtcca ccggcggtaa tagccagcgc      7380
+     aacgccggcc aggttgccgc tacccacgcg cgccgcaaga ctggtacaca atgactgaaa      7440
+     tgaggttaaa ccgcctggct gtggatgaat gctattttta agacttttgc caaactggcg      7500
+     gatgtagcga aactgcacaa atccggtgcg aaaagtgaac caacaacctg cgccgaagag      7560
+     caggtaaatc attaccgatc cccaaaggac gctgttaatg aaggagaaaa aatctggcat      7620
+     gcatatccct cttattgccg gtcgcgatga ctttcctgtg taaacgttac caattgttta      7680
+     agaagtatat acgctacgag gtacttgata acttctgcgt agcatacatg aggttttgta      7740
+     taaaaatggc gggcgatatc aacgcagtgt cagaaatccg aaacagtctc gcctggcgat      7800
+     aaccgtcttg tcggcggttg cgctgacgtt gcgtcgtgat atcatcaggg cagaccggtt      7860
+     acatccccct aacaagctgt ttaaagagaa atactatcat gacggacaaa ttgacctccc      7920
+     ttcgtcagta caccaccgta gtggccgaca ctggggacat cgcggcaatg aagctgtatc      7980
+     aaccgcagga tgccacaacc aacccttctc tcattcttaa cgcagcgcag attccggaat      8040
+     accgtaagtt gattgatgat gctgtcgcct gggcgaaaca gcagagcaac gatcgcgcgc      8100
+     agcagatcgt ggacgcgacc gacaaactgg cagtaaatat tggtctggaa atcctgaaac      8160
+     tggttccggg ccgtatctca actgaagttg atgcgcgtct ttcctatgac accgaagcgt      8220
+     caattgcgaa agcaaaacgc ctgatcaaac tctacaacga tgctggtatt agcaacgatc      8280
+     gtattctgat caaactggct tctacctggc agggtatccg tgctgcagaa cagctggaaa      8340
+     aagaaggcat caactgtaac ctgaccctgc tgttctcctt cgctcaggct cgtgcttgtg      8400
+     cggaagcggg cgtgttcctg atctcgccgt ttgttggccg tattcttgac tggtacaaag      8460
+     cgaataccga taagaaagag tacgctccgg cagaagatcc gggcgtggtt tctgtatctg      8520
+     aaatctacca gtactacaaa gagcacggtt atgaaaccgt ggttatgggc gcaagcttcc      8580
+     gtaacatcgg cgaaattctg gaactggcag gctgcgaccg tctgaccatc gcaccggcac      8640
+     tgctgaaaga gctggcggag agcgaagggg ctatcgaacg taaactgtct tacaccggcg      8700
+     aagtgaaagc gcgtccggcg cgtatcactg agtccgagtt cctgtggcag cacaaccagg      8760
+     atccaatggc agtagataaa ctggcggaag gtatccgtaa gtttgctatt gaccaggaaa      8820
+     aactggaaaa aatgatcggc gatctgctgt aatcattctt agcgtgaccg ggaagtcggt      8880
+     cacgctacct cttctgaagc ctgtctgtca ctcccttcgc agtgtatcat tctgtttaac      8940
+     gagactgttt aaacggaaaa atcttgatga atactttacg tattggctta gtttccatct      9000
+     ctgatcgcgc atccagcggc gtttatcagg ataaaggcat ccctgcgctg gaagaatggc      9060
+     tgacatgcgc taaccacgcc gtttgaactg gaaacccgct taatccccga tgagcaggcg      9120
+     atcatcgagc aaacgttgtg tgagctggtg gatgaaatga gttgccatct ggtgctcacc      9180
+     acgggcggaa ctggcccggc gcgtcgtgac gtaacgcccg atgcgacgct ggcagtagcg      9240
+     gaccgcgaga tgcctggctt tggtgaacag atgcgccaga tcagcctgca ttttgtacca      9300
+     actgcgatcc tttcgcgtca ggtgggcgtg attcgcaaac aggcgctgat ccttaactta      9360
+     cccggtcagc cgaagtctat taaagagacg ctggaaggtg tgaaggacgc tgagggtaac      9420
+     gttgtggtac acggtatttt tgccagcgta ccgtactgca ttcagttgct ggaagggcca      9480
+     tacgttgaaa cggcaccgga agtggttgca gcattcagac cgaagagtgc aagacgcgac      9540
+     gttagcgaat aaaaaatacc cgagcggggg gatctcaaaa caattagtgg gattcaccaa      9600
+     tcggcagaac ggtgcgacca aactgctcgt tcagtacttc acccatcgcc agatagattg      9660
+     cgctggcacc gcagatcagc ccaatccagc cggcaaagtg gatgattgcg gcgttaccgg      9720
+     caatgttacc gatcgccagc agggcaaaca gcacggtcag gctaaagaaa acgaattgca      9780
+     gaacgcgtgc gcctttcagc gtgccgaaga acataaacac ggtaaatacg ccccacagac      9840
+     ccaggtagac accaaggaac tgtgcatttg gcgcatcggt cagacccagt ttcggcatca      9900
+     gcagaatcgc aaccagcgtc agccagaaag aaccgtaaga ggtgaatgcg gttaaaccga      9960
+     aagtgttgcc ttttttgtac tccagcagac cagcaaaaat ttgcgcgatg ccgccgtaga     10020
+     aaatgcccat ggcaagaata ataccgtcca gagcgaaata acccacgttg tgcaggttaa     10080
+     gcagaatggt ggtcatgccg aagcccatca ggcccagcgg tgccggatta gccaacttag     10140
+     tgttgcccat aattcctcaa aaatcatcat cgaatgaatg gtgaaataat ttccctgaat     10200
+     aactgtagtg ttttcagggc gcggcataat aatcagccag tggggcagtg tctacgatct     10260
+     tttgagggga aaatgaaaat tttcccggtt tccggtatca gacctgagtg gcgctaacca     10320
+     tccggcgcag gcaggcgatt tgcagtacgg ctggaatcgt cacgcgatag cgtgcgtgac     10380
+     cgctttaacc ccatttagtg ccgcacctac aggcctccca gcccgcgccg cgcagcaaac     10440
+     catgcccaag tacgctcatt gctgcgtggg tgcgtaaaat gcgggtcagt tggctggaaa     10500
+     gcaaatgcga cacacctttt gccaataatt tgtctttcat cagcagcggc agcagctctt     10560
+     ccagctcatt caccctggca tcgaccgcgt gcagaaactc ctgcttatgt tcctcgtcca     10620
+     ttttcttcca ggtattacgc agaaattgtt ccagtaactg ttgctcaatt tcaaacgtag     10680
+     acatctcttt gtcggctttc agcttcaatc gctttgaaac atcgagcaaa atggcccgat     10740
+     acaatttacc gtgtccgcgc agtttgttgc gatactatcg ccaccaaaat gctgtaattc     10800
+     tccggcaatc agctgccagt tgcggcgatg ttgctcggga tgcccttcca tcgatttaaa     10860
+     cagttcgttg cgcatcagta cgctggagag gcgagttttg cctttttcat tatgggtgag     10920
+     caatcgggcg aaatttgcca actgttcctc actacaatgc tgaagaaaat ccagatctga     10980
+     atcattcagg taattaacat tcattttttg tggcttctat attctggcgt tagtcgtcgc     11040
+     cgataatttt cagcgtggcc atatccgatg agttcaccgt atgacccgaa aaggtgattt     11100
+     ttgagacgca gcgtttattg tcgttatcgc tcttaatgtt gatccagtca gtggtttgcc     11160
+     cttcttttat ttctgaagga atattcaggc tctgacctgg cgctgacggg cggctgtgaa     11220
+     ataaaccgat gcaccgctta actgtaaatc gccatggtcg gcagagagtt gtatgcgttt     11280
+     cacaatgcga caaacaggaa gtttcagcgc cagatcgttg gtttcgttac gcggcattgc     11340
+     aatgcgccga ggagtttatg gtcgtttgcc tgcgccgtgc agcacagcat caggctaatc     11400
+     gccaggctgg cggaaatcgt aaaaacggat ttcataagga ttctcttagt gggaagaggt     11460
+     agggggatga atacccacta gtttactgct gataaagaga agattcaggc acgtaatctt     11520
+     ttctttttat tacaattttt tgatgaatgc cttggctgcg attcattctt tatatgaata     11580
+     aaattgctgt caattttacg tcttgtcctg ccatatcgcg aaatttctgc gcaaaagcac     11640
+     aaaaaatttt tgcatctccc ccttgatgac gtggtttacg accccattta gtagtcaacc     11700
+     gcagtgagtg agtctgcaaa aaaatgaaat tgggcagttg aaaccagacg tttcgcccct     11760
+     attacagact cacaaccaca tgatgaccga atatatagtg gagacgttta gatgggtaaa     11820
+     ataattggta tcgacctggg tactaccaac tcttgtgtag cgattatgga tggcaccact     11880
+     cctcgcgtgc tggagaacgc cgaaggcgat cgcaccacgc cttctatcat tgcctatacc     11940
+     caggatggtg aaactctagt tggtcagccg gctaaacgtc aggcagtgac gaacccgcaa     12000
+     aacactctgt ttgcgattaa acgcctgatt ggtcgccgct tccaggacga agaagtacag     12060
+     cgtgatgttt ccatcatgcc gttcaaaatt attgctgctg ataacggcga cgcatgggtc     12120
+     gaagttaaag gccagaaaat ggcaccgccg cagatttctg ctgaagtgct gaaaaaaatg     12180
+     aagaaaaccg ctgaagatta cctgggtgaa ccggtaactg aagctgttat caccgtaccg     12240
+     gcatacttta acgatgctca gcgtcaggca accaaagacg caggccgtat cgctggtctg     12300
+     gaagtaaaac gtatcatcaa cgaaccgacc gcagctgcgc tggcttacgg tctggacaaa     12360
+     ggcactggca accgtactat cgcggtttat gacctgggtg gtggtacttt cgatatttct     12420
+     attatcgaaa tcgacgaagt tgacggcgaa aaaaccttcg aagttctggc aaccaacggt     12480
+     gatacccacc tggggggtga agacttcgac agccgtctga tcaactatct ggttgaagaa     12540
+     ttcaagaaag atcagggcat tgacctgcgc aacgatccgc tggcaatgca gcgcctgaaa     12600
+     gaagcggcag aaaaagcgaa aatcgaactg tcttccgctc agcagaccga cgttaacctg     12660
+     ccatacatca ctgcagacgc gaccggtccg aaacacatga acatcaaagt gactcgtgcg     12720
+     aaactggaaa gcctggttga agatctggta aaccgttcca ttgagccgct gaaagttgca     12780
+     ctgcaggacg ctggcctgtc cgtatctgat atcgacgacg ttatcctcgt tggtggtcag     12840
+     actcgtatgc caatggttca gaagaaagtt gctgagttct ttggtaaaga gccgcgtaaa     12900
+     gacgttaacc cggacgaagc tgtagcaatc ggtgctgctg ttcagggtgg tgttctgact     12960
+     ggtgacgtaa aagacgtact gctgctggac gttaccccgc tgtctctggg tatcgaaacc     13020
+     atgggcggtg tgatgacgac gctgatcgcg aaaaacacca ctatcccgac caagcacagc     13080
+     caggtgttct ctaccgctga agacaaccag tctgcggtaa ccatccatgt gctgcagggt     13140
+     gaacgtaaac gtgcggctga taacaaatct ctgggtcagt tcaacctaga tggtatcaac     13200
+     ccggcaccgc gcggcatgcc gcagatcgaa gttaccttcg atatcgatgc tgacggtatc     13260
+     ctgcacgttt ccgcgaaaga taaaaacagc ggtaaagagc agaagatcac catcaaggct     13320
+     tcttctggtc tgaacgaaga tgaaatccag aaaatggtac gcgacgcaga agctaacgcc     13380
+     gaagctgacc gtaagtttga agagctggta cagactcgca accagggcga ccatctgctg     13440
+     cacagcaccc gtaagcaggt tgaagaagca ggcgacaaac tgccggctga cgacaaaact     13500
+     gctatcgagt ctgcgctgac tgcactggaa actgctctga aaggtgaaga caaagccgct     13560
+     atcgaagcga aaatgcagga actggcacag gtttcccaga aactgatgga aatcgcccag     13620
+     cagcaacatg cccagcagca gactgccggt gctgatgctt ctgcaaacaa cgcgaaagat     13680
+     gacgatgttg tcgacgctga atttgaagaa gtcaaagaca aaaaataatc gccctataaa     13740
+     cgggtaatta tactgacacg ggcgaagggg aatttcctct ccgcccgtgc attcatctag     13800
+     gggcaattta aaaaagatgg ctaagcaaga ttattacgag attttaggcg tttccaaaac     13860
+     agcggaagag cgtgaaatca gaaaggccta caaacgcctg gccatgaaat accacccgga     13920
+     ccgtaaccag ggtgacaaag aggccgaggc gaaatttaaa gagatcaagg aagcttatga     13980
+     agttctgacc gactcgcaaa aacgtgcggc atacgatcag tatggtcatg ctgcgtttga     14040
+     gcaaggtggc atgggcggcg gcggttttgg cggcggcgca gacttcagcg atatttttgg     14100
+     tgacgttttc ggcgatattt ttggcggcgg acgtggtcgt caacgtgcgg cgcgcggtgc     14160
+     tgatttacgc tataacatgg agctcaccct cgaagaagct gtacgtggcg tgaccaaaga     14220
+     gatccgcatt ccgactctgg aagagtgtga cgtttgccac ggtagcggtg caaaaccagg     14280
+     tacacagccg cagacttgtc cgacctgtca tggttctggt caggtgcaga tgcgccaggg     14340
+     attcttcgct gtacagcaga cctgtccaca ctgtcagggc cgcggtacgc tgatcaaaga     14400
+     tccgtgcaac aaatgtcatg gtcatggtcg tgttgagcgc agcaaaacgc tgtccgttaa     14460
+     aatcccggca ggggtggaca ctggagaccg catccgtctt gcgggcgaag gtgaagcggg     14520
+     cgagcatggc gcaccggcag gcgatctgta cgttcaggtt caggttaaac agcacccgat     14580
+     tttcgagcgt gaaggcaaca acctgtattg cgaagtcccg atcaacttcg ctatggcggc     14640
+     gctgggtggc gaaatcgaag taccgaccct tgatggtcgc gtcaaactga aagtgcctgg     14700
+     cgaaacccag accggtaagc tattccgtat gcgcggtaaa ggcgtcaagt ctgtccgcgg     14760
+     tggcgcacag ggtgatttgc tgtgccgcgt tgtcgtcgaa acaccggtag gcctgaacga     14820
+     aaggcagaaa cagctgctgc aagagctgca agaaagcttc ggtggcccaa ccggcgagca     14880
+     caacagcccg cgctcaaaga gcttctttga tggtgtgaag aagttttttg acgacctgac     14940
+     ccgctaacct ccccaaaagc ctgcccgtgg gcaggcctgg gtaaaaatag ggtgcgttga     15000
+     agatatgcga gcacctgtaa agtggcgggg atcactccca taagcgctaa cttaagggtt     15060
+     gtggtattac gcctgatatg atttaacgtg ccgatgaatt actctcacga taactggtca     15120
+     gcaattctgg cccatattgg taagcccgaa gaactggata cttcggcacg taatgccggg     15180
+     gctctaaccc gccgccgcga aattcgtgat gctgcaactc tgctacgtct ggggctggct     15240
+     tacggccccg gggggatgtc attacgtgaa gtcactgcat gggctcagct ccatgacgtt     15300
+     gcaacattat ctgacgtggc tctcctgaag cggctgcgga atgccgccga ctggtttggc     15360
+     atacttgccg cacaaacact tgctgtacgc gccgcagtta cgggttgtac aagcggaaag     15420
+     agattgcgtc ttgtcgatgg aacagcaatc agtgcgcccg ggggcggcag cgctgaatgg     15480
+     cgactacata tgggatatga tcctcatacc tgtcagttca ctgattttga gctaaccgac     15540
+     agcagagacg ctgaacggct ggaccgattt gcgcaaacgg cagacgagat acgcattgct     15600
+     gaccggggat tcggttcgcg tcccgaatgt atccgctcac ttgcttttgg agaagctgat     15660
+     tatatcgtcc gggttcactg gcgaggattg cgctggttaa ctgcagaagg aatgcgcttt     15720
+     gacatgatgg gttttctgcg cgggctggat tgcggtaaga acggtgaaac cactgtaatg     15780
+     ataggcaatt caggtaataa aaaagccgga gctccctttc cggcacgtct cattgccgta     15840
+     tcacttcctc ccgaaaaagc attaatcagt aaaacccgac tgctcagcga gaatcgtcga     15900
+     aaaggacgag tagttcaggc ggaaacgctg gaagcagcgg gccatgtgct attgctaaca     15960
+     tcattaccgg aagatgaata ttcagcagag caagtggctg attgttaccg tctgcgatgg     16020
+     caaattgaac tggcttttaa gcggctcaaa agtttgctgc acctggatgc tttgcgtgca     16080
+     aaggaacctg aactcgcgaa agcgtggata tttgctaatc tactcgccgc atttttaatt     16140
+     gacgacataa tccagccatc gctggatttc ccccccagaa gtgccggatc cgaaaagaag     16200
+     aactaactcg ttgtggagaa taacaaaaat ggtcatctgg agcttacagg tggccattcg     16260
+     tgggacagta tccctgacag cctacaaaac gcaattgaag aacgcgaggc atcgtcttaa     16320
+     cgaggcaccg aggcgtcgca ttcttcagat ggttcaaccc ttaagttagc gcttatggga     16380
+     tcactccccg ccgttgctct tactcggatt cgtaagccgt gaaaacagca acctccgtct     16440
+     ggccagttcg gatgtgaacc tcacagaggt cttttctcgt taccagcgcc gccactacgg     16500
+     cggtgataca gatgacgatc agggcgacaa tcatcgcctt atgctgcttc attgctctct     16560
+     tctccttgac cttacggtca gtaagaggca ctctacatgt gttcagcata taggaggcct     16620
+     cgggttgatg gtaaaatatc actcggggct tttctctatc tgccgttcag ctaatgcctg     16680
+     agacagacag cctcaagcac ccgccgctat tatatcgctc tctttaaccc attttgtttt     16740
+     atcgattcta atcctgaaga cgcctcgcat ttttgtggcg taatttttta atgatttaat     16800
+     tatttaactt taatttatct cttcatcgca attattgacg acaagctgga ttatttttga     16860
+     aatattggcc taacaagcat cgccgactga caacaaatta attattactt ttcctaatta     16920
+     atccctcagg aatcctcacc ttaagctatg attatctagg cttagggtca ctcgtgagcg     16980
+     cttacagccg tcaaaaacgc atctcaccgc tgatggcgca aattcttcaa tagctcgtaa     17040
+     aaaacgaatt attcctacac tataatctga ttttaacgat gattcgtgcg gggtaaaata     17100
+     gtaaaaacga tctattcacc tgaaagagaa ataaaaagtg aaacatctgc atcgattctt     17160
+     tagcagtgat gcctcgggag gcattattct tatcattgcc gctatcctgg cgatgattat     17220
+     ggccaacagc ggcgcaacca gtggatggta tcacgacttt ctggagacgc cggttcagct     17280
+     ccgggttggt tcactcgaaa tcaacaaaaa catgctgtta tggataaatg acgcgctgat     17340
+     ggcggtattt ttcctgttag tcggtctgga agttaaacgt gaactgatgc aaggatcgct     17400
+     agccagctta cgccaggccg catttccagt tatcgccgct attggtggga tgattgtgcc     17460
+     ggcattactc tatctggctt ttaactatgc cgatccgatt acccgcgaag ggtgggcgat     17520
+     cccggcggct actgacattg cttttgcact tggtgtactg gcgctgttgg gaagtcgtgt     17580
+     tccgttagcg ctgaagatct ttttgatggc tctggctatt atcgacgatc ttggggccat     17640
+     cattatcatc gcattgttct acactaatga cttatcgatg gcctctcttg gcgtcgcggc     17700
+     tgtagcaatt gcggtactcg cggtattgaa tctgtgtggt gcacgccgca cgggcgtcta     17760
+     tattcttgtt ggcgtggtgt tgtggactgc ggtgttgaaa tcgggggttc acgcaactct     17820
+     ggcgggggta attgtcggct tctttattcc tttgaaagag aagcatgggc gttctccagc     17880
+     gaagcgactg gagcatgtgt tgcacccgtg ggtggcgtat ctgattttgc cgctgtttgc     17940
+     atttgctaat gctggcgttt cactgcaagg cgtcacgctg gatggcttga cctccattct     18000
+     gccattgggg atcatcgctg gcttgctgat tggcaaaccg ctggggatta gtctgttctg     18060
+     ctggttggcg ctgcgtttga aactggcgca tctgcctgag ggaacgactt atcagcaaat     18120
+     tatggtggtg gggatcctgt gcggtatcgg ttttactatg tctatcttta ttgccagcct     18180
+     ggcctttggt agcgtagatc cagaactgat taactgggcg aaactcggta tcctggtcgg     18240
+     ttctatctct tcggcggtaa ttggatacag ctggttacgc gttcgtttgc gtccatcagt     18300
+     ttgacaggac ggtttaccgg ggagccataa acggctccct tttcattgtt atcagggaga     18360
+     gaaatgagca tgtctcatat caattacaac cacttgtatt acttctggca tgtctataaa     18420
+     gaaggttccg tggttggcgc agcggaggcg ctttatttaa ctccacaaac cattaccgga     18480
+     cagattcgag cgctggaaga cgccctgcaa gcgaaattat ttaaacgcaa ggggacgtgg     18540
+     tctcgaaccc agcgagctgg agaactggtc tatcgctatg ccgataaaat gttcacctta     18600
+     agccaggaaa tgctggatat tgtgaactat cgcaaagaat ccaatttatt gtttgacgtt     18660
+     ggcgtggctg atgcactttc caaacgcctg gtcagtagcg tacttaacgc cgcagtggta     18720
+     gaaggcgagc ccattcatct tcgctgcttc gaatccaccc acgaaatgct gctggagcaa     18780
+     ttaagtcagc ataaactgga gatgatcatt tctgactgtc cgatagactc tacgcagcag     18840
+     gaaggcctgt tctccgtgag aattggcgaa tgtggcgtga gtttctggtg tacaaatcca     18900
+     ccaccagaaa aaccgttccc ggcttgtctg gaagaacggc gacttttgat tcctgggcga     18960
+     cgttcaatgt tagggcgcaa attgcttaac tggtttaact cccagggatt aaacgtagaa     19020
+     atcctcggcg agtttgatga tgccgctttg atgaaagctt ggctgcaggt ccttttggtg     19080
+     gcgatgcaaa tgcaatcttc gttgccccaa cgctttatgc atatgacttt tatgccgata     19140
+     aaactgtcgt agaaattggt cgcgtcgaga atgtgatgga agagtaccat gctatttttg     19200
+     ctgagcggat gattcagcac ccggcggtac agcgaatctg caatacggat tattctgcgc     19260
+     tttttagtcc agcggtgcgt taatcggcag ctcccccaaa gttaaggtgg gggagataga     19320
+     ttagttgtac attaccacga ttttgactcg gctcattatt tgcccgcttg agacattgtt     19380
+     tccatatgta cgcgggcgaa taaatagagg aatctgatta cttccttcat ggggatgctg     19440
+     aaaagagtag taattgctgg taatgactcc aacttattga tagtgtttta tgttcagata     19500
+     atgcccgatg actttgtcat gcagctccac cgattttgag aacgacagcg acttccgtcc     19560
+     cagccgtgcc aggtgctgcc tcagattcag gttatgccgc tcaattcgct gcgtatatcg     19620
+     cttgctgatt acgtgcagct ttcccttcag gcgggattca tacagcggcc agccatccgt     19680
+     catccatatc accacgtcaa agggtgacag caggctcata agacgcccca gcgtcgccat     19740
+     agtgcgttca ccgaatacgt gcgcaacaac cgtcttccgg agactgtcat acgcgtaaaa     19800
+     cagccagcgc tggcgcgatt tagccccgac atagccccac tgttcgtcca tttccgcgca     19860
+     gacgatgacg tcactgcccg gctgtatgcg cgaggttacc gactgcggcc tgagtttttt     19920
+     aagtgacgta aaatcgtgtt gaggccaacg cccataatgc gggctgttgc ccggcatcca     19980
+     acgccattca tggccatatc aatgattttc tggtgcgtac cgggttgaga agcggtgtaa     20040
+     gtgaactgca gttgccatgt tttacggcag tgagagcaga gatagcgctg atgtccggcg     20100
+     gtgcttttgc cgttacgcac caccccgtca gtagctgaac aggagggaca gctgatagaa     20160
+     acagaagcca ctggagcacc tcaaaaacac catcatacac taaatcagta agttggcagc     20220
+     atcacctacc tcaatgtgta tcacaatatc catattcttt gtgggggagt ctggagattg     20280
+     agtagatatt cttgttcaga atgtatcagc cgatggttct acgattctta agccacgaag     20340
+     agttcagata gtacaacggc atgtctcttt tgactatctg gcaaccggca gtgtgttctc     20400
+     tcacgcatca caaaagcagc aggcataaaa aaacccgctt gcgcgggctt tttcacaaag     20460
+     cttcagcaaa ttggcgatta agccagtttg ttgatctgtg cagtcaggtt agccttatga     20520
+     cgtgcagctt tgtttttgtg gatcagacct ttagcagcct gacggtccac gatcggttgc     20580
+     atttcgttaa atgctttctg tgcagcagct ttgtcgccag cttcgatagc tgcgtatact     20640
+     ttcttgatga aagtacgcat catagagcga cggcttgcgt tgtgcttacg agccttttca     20700
+     gactgaatgg cgcgcttctt agctgatttg atattagcca aggtccaact cccaaatgtg     20760
+     ttctatatgg acaattcaaa ggccgaggaa tatgcccttt tagccttctt ttgtcaatgg     20820
+     atttgtgcaa ataagcgccg ttaatgtgcc ggcactcgtt acgtagtgat ggcgcaggat     20880
+     tctaccagct tgcggggtgt gaatacagct tttccgcgat aaaaattgca gcaggcggtc     20940
+     agtttcttcc cgtgatttgc gccatggcaa tgaaaagcca cttctttctg atttcggtac     21000
+     tcaatcgccg gttaaccttg accgctgtac aaggtctact cggacgattt tcactgtttt     21060
+     gagccagaca tgaagctgat acgcggcata cataatctca gccaggcccc gcaagaaggg     21120
+     tgtgtgctga ctattggtaa tttcgacggc gtgcatcgcg gtcatcgcgc gctgttacag     21180
+     ggcttgcagg aagaagggcg caagcgcaac ttaccggtga tggtgatgct ttttgaacct     21240
+     caaccactgg aactgtttgc taccgataaa gccccggcaa gactgacccg gctgcgggaa     21300
+     aaactgcgtt accttgcaga gtgtggcgtt gattacgtgc tgtgcgtgcg tttcgacagg     21360
+     cgtttcgcgg cgttaaccgc gcaaaatttc gtcagcgatc ttctggtgaa gcatttgcgc     21420
+     gtaaaatttc ttgccgtagg tgatgatttc cctttggcgc tggtcgtgaa ggcgatttct     21480
+     tgttattaca gaaagctggc atggaatacg gcttcgatat caccagtacg caaacttttt     21540
+     gcagaggtgg cgtgcgcatc agcagcacgg ctgcgtcagg cccttgcgga tgacaatctg     21600
+     gctctggcag agagtttact ggggcacccg tttgccatct ccgggcgtgt agtccacggt     21660
+     gatgaattag ggcgcactat aggtttcccg acggcgaatg taccgccgcg ccgtcaggtt     21720
+     tccccggtga aaggggttta tgcggtagaa gtgctgggcc tcggtgaaaa gccgttaccc     21780
+     ggcgtggcaa acatcggaac acgcccaacg gttgccggta ttcgccagca gctggaagtg     21840
+     catttgttag atgttgcaat ggacctttac ggtcgccata tacaagtagt gctgcgtaaa     21900
+     aaaatacgca atgagcagcg atttgcgtcg ctggacgaac tgaaagcgca gattgcgcgt     21960
+     gatgaattaa ccgcccgcga attttttggg ctaacaaaac cggcttaagc ctgttatgta     22020
+     atcaaaccga aatacggaac cgagaatctg atgagtgact ataaatcaac cctgaatttg     22080
+     ccggaaacag ggttcccgat gcgtggcgat ctcgccaagc gcgaacccgg aatgctggcg     22140
+     cgttggactg atgatgatct gtacggcatc atccgtgcgg ctaaaaaagg caaaaaaacc     22200
+     ttcattctgc atgatggccc tccttatgcg aatggcagca ttcatattgg tcactcggtt     22260
+     aacaagattc tgaaagacat tatcgtgaag tccaaagggc tttccggtta tgactcgccg     22320
+     tatgtgcctg gctgggactg ccacggtctg ccgatcgagc tgaaagtcga gcaagaatac     22380
+     ggtaagccgg gtgagaaatt caccgccgcc gagttccgcg ccaagtgccg cgaatacgcg     22440
+     gcgacccagg ttgacggtca acgcaaagac tttatccgtc tgggcgtgct gggcgactgg     22500
+     tcgcacccgt acctgaccat gggacttcaa aactggaagg ccaacatcat ccgcgcgctg     22560
+     ggcaaaatca tcggcaacgg tcacctgcac aaaggcgcga agccagttca ctggtgcgtt     22620
+     gactgccgtt ctgcgctggc gaagcggaag ttgagtatta cgacaaaact tctccgtcca     22680
+     tcgacgttgc tttccagggc agtcgatcag gatgcactga aagcaaaatt tgccgtaagc     22740
+     aacgttaacg gcccaatctc gctggtaatc tggaccacca cgccgtggac tctgcctgcc     22800
+     aaccgcgcaa tctctattgc accagatttc gactatgcgc tggtgcagat cgacggtcag     22860
+     gccgtgattc tggcgaaaga tctggttgaa agcgtaatgc agcgtatcgg cgtgaccgat     22920
+     tctcggcacg gtaaaaggtg cggagctgga gccgctgcgt ttacccatcc gtttatgggc     22980
+     ttcgacgttc cggcaatcct cggcgatcac gttaccctgg atgccggtac cggtgccgtt     23040
+     cacaccgcgc ctggccacgg cccggacgac tatgtgatcg gtcagaaata cggcctggaa     23100
+     accgctaacc cggttggccc ggacggcact tatctgccgg gcacttatcc gacgctggat     23160
+     ggcgtgaacg tcttcaaagc gaacgacatc gtcgctgcgc tgctgcagga aaaaggcgcg     23220
+     ctgctgcacg ttgagaaaat gcagcacagc tatccgtgct gctggcgtca caaaacgccg     23280
+     atcatcttcc gcgcgacgcc gcagtggttc gtcagcatgg atcagaaagg tctgcgtgcg     23340
+     cagtcactga aagagatcaa aggcgtgcag tggatcccgg actggggcca ggcgcgtatc     23400
+     gagtcgatgg ttgctaaccg tcctgactgg tgtatctccc gtcagcgcac ctggggtgta     23460
+     ccgatgtcac tgttcgtgca caaagacacg gaagagctgc atccgcgtac ccttgaactg     23520
+     atggaagaag tggcaaaacg cgttgaagtc gatggcatcc aggcgtggtg ggatctcgat     23580
+     gcgaaagaga tcctcggcga cgaagctgat cagtacgtga aagtgccgga cacattggat     23640
+     gtatggtttg actccggatc tacccactct tctgttgttg acgtgcgtcc ggaatttgcc     23700
+     ggtcacgcag cggacatgta tctggaaggt tctgaccaac accgcggctg gttcatgtct     23760
+     tccctaatga tctccaccgc gatgaagggt aaagcgccgt atcgtcaggt actcacccac     23820
+     ggctttaccg tggatggtca gggccgcaag atgtctaaat ccatcggcaa taccgtttcg     23880
+     ccgcaggatg tgatgaacaa actgggcgcg gatattctgc gtctgtgggt ggcatcaacc     23940
+     gactacaccg gtgaaatggc cgtttctgac gagatcctga aacgtgctgc cgatacgtat     24000
+     cgtcgtatcc gtaacaccgc gcgcttcctg ctggcaaacc tgaacggttt tgatccagca     24060
+     aaagatatgg tgaaacggag agagatggtg gtactggatc gctgggccgt agttgtgcga     24120
+     aagcggcaca ggaagacatc ctcaaggcgt acgaagcata cgatttccac gaagtggtac     24180
+     aagcgtctga tgcgcttctg ctccgttgag atgggttcct tctacctcga catcatcaaa     24240
+     gaccgtcagt actacgccaa aggacacagt gtggcgcgtc gtagctgcca gactgcgcta     24300
+     tatcacatcg cagaagcgct ggtgcgctgg atggcaccaa tcctctcctt caccgctgat     24360
+     gaagtgtggg gctacctgcc gggcgaacgt gaaaaatacg tcttcaccgg tgagtggtac     24420
+     gaaggcctgt ttggcctggc agacagtgaa gcgatgaacg atgcgttctg ggacgagctg     24480
+     ttgaaagtgc gtggcgaagt gaacaaagtc attgagcaag cgcgtgccga caagaaagtg     24540
+     ggtggctcgc tggaagcggc agtaaccttg tatgcagaac cggaactgtc ggcgaaactg     24600
+     accgcgctgg gcgatgaatt acgatttgtc ctgttgacct ccggcgctac cgttgcagac     24660
+     tataacgacg cacctgctga tgctcagcag agcgaagtac tcaaagggct gaaagtcgcg     24720
+     ttgagtaaag ccgaaggtga gaagtgccca cgctgctggc actacaccca ggatgtcggc     24780
+     aaggtggcgg aacacgcaga aatctgcggc cgctgtgtca gcaacgtcgc cggtgacggt     24840
+     gaaaaacgta agtttgcctg atgagtcaat cgatctgttc aacagggcta cgctggctgt     24900
+     ggctggtggt agtcgtgctg attatcgatc tgggcagcaa atacctgatc ctccagaact     24960
+     ttgctctggg ggatacggtc ccgctgttcc cgtcgcttaa tctgcattat gcgcgtaact     25020
+     atggcgcggc gtttagtttc cttgccgata gcggcggctg gcagcgttgg ttctttgccg     25080
+     gtattgcgat tggtattagc gtgatcctgg cagtgatgat gtatcgctcg aaggccacgc     25140
+     agaagctaaa caatatcgct tacgcgctga ttattggcgg cgcgctgggc aacctgttcg     25200
+     accgcctgtg gcacggcttc gttgtcgata tgatcgactt ctacgtcggc gactggcact     25260
+     tcgccacctt caaccttgcc gatactgcca tctgtgtcgg tgcggcactg attgtgctgg     25320
+     aaggtttttt gccttctaga gcgaaaaaac aataataaac cctgccggat gcgatgctga     25380
+     cgcatcttat ccggcctaca gattgctgcg aaatcgtagg ccggataagg cgtttacgcc     25440
+     gcatccggca aaaatcctta aatataagag caaacctgca tgtctgaatc tgtacagagc     25500
+     aatagcgccg tcctggtgca cttcacgcta aaactcgacg atggcaccac cgccgagtct     25560
+     acccgcaaca acggtaaacc ggcgctgttc cgcctgggtg atgcttctct ttctgaaggg     25620
+     ctggagcaac acctgttggg gctgaaagtg ggcgataaaa ccaccttctc gttggagcca     25680
+     gatgcggcgt ttggcgtgcc gtcaccggac ctgattcagt acttctcccg ccgtgaattt     25740
+     atggatgcag gcgagccaga aattggcgca atcatgcttt ttaccgcaat ggatggcagt     25800
+     gagatgcctg gcgtgatccg cgaaattaac ggcgactcca ttaccgttga tttcaaccat     25860
+     ccgctggccg ggcagaccgt tcattttgat attgaagtgc tggaaatcga tccggcactg     25920
+     gaggcgtaac atgcagatcc tgttggccaa cccgcgtggt ttttgtgccg gggtagaccg     25980
+     cgctatcagc attgttgaaa acgcgctggc catttacggc gcaccgatat atgtccgtca     26040
+     cgaagtggta cataaccgct atgtggtcga tagcttgcgt gagcgtgggg ctatctttat     26100
+     tgagcagatt agcgaagtac cggacggcgc gatcctgatt ttctccgcac acggtgtttc     26160
+     tcaggcggta cgtaacgaag caaaaagtcg cgatttgacg gtgtttgatg ccacctgtcc     26220
+     gctggtgacc aaagtgcata tggaagtcgc ccgcgccagt cgccgtggcg aagaatctat     26280
+     tctcatcggt cacgccgggc acccggaagt ggaagggaca atgggccagt acagtaaccc     26340
+     ggaaggggga atgtatctgg tcgaatcgcc ggacgatgtg tggaaactga cggtcaaaaa     26400
+     cgaagagaag ctctccttta tgacccagac cacgctgtcg gtggatgaca cgtctgatgt     26460
+     gatcgacgcg ctgcgtaaac gcttcccgaa aattgtcggt ccgcgcaaag atgacatctg     26520
+     ctacgccacg actaaccgtc aggaagcggt acgcgccctg gcagaacagg cggaagttgt     26580
+     gttggtggtc ggttcgaaaa actcctccaa ctccaaccgt ctggcggagc tggcccagcg     26640
+     tatgggcaaa cgcgcgtttt tgattgacga tgcgaaagac atccaggaag agtgggtgaa     26700
+     agaggttaaa tgcgtcggcg tgactgcggg cgcatcggct ccggatattc tggtgcagaa     26760
+     tgtggtggca cgtttgcagc agctgggcgg tggtgaagcc attccgctgg aaggccgtga     26820
+     agaaaacatt gttttcgaag tgccgaaaga gctgcgtgtc gatattcgtg aagtcgatta     26880
+     agtcattagc agcctaagtt atgcgaaaat gccggtcttg ttaccggcat tttttatgga     26940
+     gaaaacatgc gtttacctat cttcctcgat actgaccccg gcattgacga tgccgtcgcc     27000
+     attgccgccg cgatttttgc acccgaactc gacctgcaac tgatgaccac cgtcgcgggt     27060
+     aatgtctcgg ttgagaaaac tacccgcaat gccctgcaac tgctgcattt ctggaatgcg     27120
+     gagattccgc tcgcccaagg ggccgctgtg ccactggtac gcgcaccgcg tgatgcggca     27180
+     tctgtgcacg gcgaatcggg aatggctggc tacgactttg ttgagcacaa ccgaaagccg     27240
+     ctcgggatac cggcgtttct ggcgattcgg gatgccctga tgcgtgcacc agagcctgtt     27300
+     accctggtgg ccatcggccc gttaaccaat attgcgctgt tactttcaca atgcccggaa     27360
+     tgcaagccgt atattcgccg tctggtgatc atgggtggtt ctgccggacg cggcaactgt     27420
+     acgccaaacg ccgagtttaa tattgctgcc gatccagaag ctgctgcctg tgtcttccgc     27480
+     agtggtattg aaatcgtcat gtgcggtttg gatgtcacca atcaggcaat attaactcct     27540
+     gactatctct ctacactgcc gcagttaaac cgtaccggga aaatgcttca cgccctgttt     27600
+     agccactacc gtagcggcag tatgcaaagc ggcttgcgaa tgcacgatct ctgcgccatc     27660
+     gcctggctgg tgcgcccgga cctgttcact ctcaaaccct gttttgtggc agtggaaact     27720
+     cagggcgaat ttacctcagg cacgacggtg gttgatatcg acggttgcct gggcaagcca     27780
+     gccaatgtac aggtggcatt ggatctggat gtgaaaggct tccagcagtg ggtggctgag     27840
+     gtgctggctc tggcgtcgta acctgtcaca tgttattggc atgcagtcat tcatcgactc     27900
+     atgcctttca ctgatatccc tccctgttta tcattaattt ctaattatca gcgtttttgg     27960
+     ctggcggcgt agcgatgcgc tggttactct gaaaacggtc tatgcaaatt aacaaaagag     28020
+     aatagctatg catgatgcaa acatccgcgt tgccatcgcg ggagccgggg ggcgtatggg     28080
+     ccgccagttg attcaggcgg cgctggcatt agagggcgtg cagttgggcg ctgcgctgga     28140
+     gcgtgaagga tcttctttac tgggcagcga cgccggtgag ctggccggag ccgggaaaac     28200
+     aggcgttacc gtgcaaagca gcctcgatgc ggtaaaagat gattttgatg tgtttatcga     28260
+     ttttacccgt ccggaaggta cgctgaacca tctcgctttt tgtcgccagc atggcaaagg     28320
+     gatggtgatc ggcactacgg ggtttgacga agccggtaaa caagcaattc gtgacgccgc     28380
+     tgccgatatt gcgattgtct ttgcggccaa ttttagcgtt ggcgttaacg tcatgcttaa     28440
+     gctgctggag aaagcagcca aagtgatggg tgactacacc gatatcgaaa ttattgaagc     28500
+     acatcataga cataaagttg atgcgccgtc aggcaccgca ctggcaatgg gagaggcgat     28560
+     cgcccacgcc cttgataaag atctgaaaga ttgcgcggtc tacagtcgtg aaggccacac     28620
+     cggtgaacgt gtgcctggca ccattggttt tgccaccgtg cgtgcaggtg acatcgttgg     28680
+     tgaacatacc gcgatgtttg ccgatattgg cgagcgtctg gagatcaccc ataaggcgtc     28740
+     cagccgtatg acatttgcta acggcgcggt aagatcggct ttgtggttga gtggtaagga     28800
+     aagcggtctt tttgatatgc gagatgtact tgatctcaat aatttgtaac cacaaaatat     28860
+     ttgttatggt gcaaaaataa cacatttaat ttattgatta taaagggctt taatttttgg     28920
+     cccttttatt tttggtgtta tgtttttaaa ttgtctataa gtgccaaaaa ttacatgttt     28980
+     tgtcttctgt ttttgttgtt ttaatgtaaa ttttgaccat ttggtccact tttttctgct     29040
+     cgtttttatt tcatgcaatc ttcttgctgc gcaagcgttt tccagaacag gttagatgat     29100
+     ctttttgtcg cttaatgcct gtaaaacatg catgagccac aaaataatat aaaaaatccc     29160
+     gccattaagt tgacttttag cgcccatatc tccagaatgc cgccgtttgc cagaaattcg     29220
+     tcggtaagca gatttgcatt gatttacgtc atcattgtga attaatatgc aaataaagtg     29280
+     agtgaatatt ctctggaggg tgttttgatt aagtcagcgc tattggttct ggaagacgga     29340
+     acccagtttc acggtcgggc cataggggca acaggttcgg cggttgggga agtcgttttc     29400
+     aatacttcaa tgaccggtta tcaagaaatc ctcactgatc cttcctattc tcgtcaaatc     29460
+     gttactctta cttatcccca tattggcaat gtcggcacca atgacgccga tgaagaatct     29520
+     tctcaggtac atgcacaagg tctggtgatt cgcgacctgc cgctgattgc cagcaacttc     29580
+     cgtaataccg aagacctctc ttcttacctg aaacgccata acatcgtggc gattgccgat     29640
+     atcgataccc gtaagctgac gcgtttactg cgcgagaaag gcgcacagaa tggctgcatt     29700
+     atcgcgggcg ataacccgga tgcggcgctg gcgttagaaa aagcccgcgc gttcccaggt     29760
+     ctgaatggca tggatctggc aaaagaagtg accaccgcag aagcctatag ctggacacaa     29820
+     gggagctgga cgttgaccgg tggcctgcca gaagcgaaaa aagaagacga gctgccgttc     29880
+     cacgtcgtgg cttatgattt tggtgccaag cgcaacatcc tgcggatgct ggtggataga     29940
+     ggctgtcgcc tgaccatcgt tccggcgcaa acttctgcgg aagatgtgct gaaaatgaat     30000
+     ccagacggca tcttcctctc caacggtcct ggcgacccgg ccccgtgcga ttacgccatt     30060
+     accgccatcc agaaattcct cgaaaccgat attccggtat tcggcatctg tctcggtcat     30120
+     cagctgctgg cgctggcgag cggtgcgaag actgtcaaaa tgaaatttgg tcaccacggc     30180
+     ggcaaccatc cggttaaaga tgtggagaaa aacgtggtaa tgatcaccgc ccagaaccac     30240
+     ggttttgcgg tggacgaagc aacattacct gcaaacctgc gtgtcacgca taaatccctg     30300
+     ttcgacggta cgttacaggg cattcatcgc accgataaac cggcattcag cttccagggg     30360
+     caccctgaag ccagccctgg tccacacgac gccgcgccgt tgttcgacca ctttatcgag     30420
+     ttaattgagc agtaccgtaa aaccgctaag taatcaggag taaaagagcc atgccaaaac     30480
+     gtacagatat aaaaagtatc ctgattctgg gtgcgggccc gattgttatc ggtcaggcgt     30540
+     gtgagtttga ctactctggc gcgcaagcgt gtaaagccct gcgtgaagag ggttaccgcg     30600
+     tcattctggt gaactccaac ccggcgacca tcatgaccga cccggaaatg gctgatgcaa     30660
+     cctacatcga gccgattcac tgggaagttg tacgcaagat tattgaaaaa gagcgcccgg     30720
+     acgcggtgct gccaacgatg ggcggtcaga cggcgctgaa ctgcgcgctg gagctggaac     30780
+     gtcagggcgt gttggaagag ttcggtgtca ccatgattgg tgccactgcc gatgcgattg     30840
+     ataaagcaga agaccgccgt cgtttcgacg tagcgatgaa gaaaattggt ctggaaaccg     30900
+     cgcgttccgg tatcgcacac acgatggaag aagcgctggc ggttgccgct gacgtgggct     30960
+     tcccgtgcat tattcgccca tcctttacca tgggcggtag cggcggcggt atcgcttata     31020
+     accgtgaaga gtttgaagaa atttgcgccc gcggtctgga tctctctccg accaaagagt     31080
+     tgctgattga tgagtcgctg atcggctgga aagagtacga gatggaagtg gtgcgtgata     31140
+     aaaacgacaa ctgcatcatc gtctgctcta tcgaaaactt cgatgcgatg ggcatccaca     31200
+     ccggtgactc catcactgtc gcgccagccc aaacgctgac cgacaaagaa tatcaaatca     31260
+     tgcgtaacgc ctcgatggcg gtgctgcgtg aaatcggcgt tgaaaccggt ggttccaacg     31320
+     ttcagtttgc ggtgaacccg aaaaacggtc gtctgattgt tatcgaaatg aacccacgcg     31380
+     tgtcccgttc ttcggcgctg gcgtcgaaag cgaccggttt cccgattgct aaagtggcgg     31440
+     cgaaactggc ggtgggttac accctcgacg aactgatgaa cgacatcact ggcggacgta     31500
+     ctccggcctc cttcgagccg tccatcgact atgtggttac taaaattcct cgcttcaact     31560
+     tcgaaaaatt cgccggtgct aacgaccgtc tgaccactca gatgaaatcg gttggcgaag     31620
+     tgatggcgat tggtcgcacg cagcaggaat ccctgcaaaa agcgctgcgc ggcctggaag     31680
+     tcggtgcgac tggattcgac ccgaaagtga gcctggatga cccggaagcg ttaaccaaaa     31740
+     tccgtcgcga actgaaagac gcaggcgcag atcgtatctg gtacatcgcc gatgcgttcc     31800
+     gtgcgggcct gtctgtggac ggcgtcttca acctgaccaa cattgaccgc tggttcctgg     31860
+     tacagattga agagctggtg cgtctggaag agaaagtggc ggaagtgggc atcactggcc     31920
+     tgaacgctga cttcctgcgc cagctgaaac gcaaaggctt tgccgatgcg cgcttggcaa     31980
+     aactggcggg cgtacgcgaa gcggaaatcc gtaagctgcg tgaccagtat gacctgcacc     32040
+     cggtttataa gcgcgtggat acctgtgcgg cagagttcgc caccgacacc gcttacatgt     32100
+     actccactta tgaagaagag tgcgaagcga atccgtctac cgaccgtgaa aaaatcatgg     32160
+     tgcttggcgg cggcccgaac cgtatcggtc agggtatcga attcgactac tgttgcgtac     32220
+     acgcctcgct ggcgctgcgc gaagacggtt acgaaaccat tatggttaac tgtaacccgg     32280
+     aaaccgtctc caccgactac gacacttccg accgcctcta cttcgagccg gtaactctgg     32340
+     aagatgtgct ggaaatcgtg cgtatcgaga agccgaaagg cgttatcgtc cagtacggcg     32400
+     gtcagacccc gctgaaactg gcgcgcgcgc tggaagctgc tggcgtaccg gttatcggca     32460
+     ccagcccgga tgctatcgac cgtgcagaag accgtgaacg cttccagcat gcggttgagc     32520
+     gtctgaaact gaaacaaccg gcgaacgcca ccgttaccgc tattgaaatg gcggtagaga     32580
+     aggcgaaaga gattggctac ccgctggtgg tacgtccgtc ttacgttctc ggcggtcggg     32640
+     cgatggaaat cgtctatgac gaagctgacc tgcgtcgcta cttccagacg gcggtcagcg     32700
+     tgtctaacga tgcgccagtg ttgctggacc acttcctcga tgacgcggta gaagttgacg     32760
+     tggatgccat ctgcgacggc gaaatggtgc tgattggcgg catcatggag catattgagc     32820
+     aggcgggcgt gcactccggt gactccgcat gttctctgcc agcctacacc ttaagtcagg     32880
+     aaattcagga tgtgatgcgc cagcaggtgc agaaactggc cttcgaattg caggtgcgcg     32940
+     gcctgatgaa cgtgcagttt gcggtgaaaa acaacgaagt ctacctgatt gaagttaacc     33000
+     cgcgtgcggc gcgtaccgtt ccgttcgtct ccaaagccac cggcgtaccg ctggcaaaag     33060
+     tggcggcgcg cgtgatggct ggcaaatcgc tggctgagca gggcgtaacc aaagaagtta     33120
+     tcccgccgta ctactcggtg aaagaagtgg tgctgccgtt caataaattc ccgggcgttg     33180
+     acccgctgtt agggccagaa atgcgctcta ccggggaagt catgggcgtg ggccgcacct     33240
+     tcgctgaagc gtttgccaaa gcgcagctgg gcagcaactc caccatgaag aaacacggtc     33300
+     gtgcgctgct ttccgtgcgc gaaggcgata aagaacgcgt ggtggacctg gcggcaaaac     33360
+     tgctgaaaca gggcttcgag ctggatgcga cccacggcac ggcgattgtg ctgggcgaag     33420
+     caggtatcaa cccgcgtctg gtaaacaagg tgcatgaagg ccgtccgcac attcaggacc     33480
+     gtatcaagaa tggcgaatat acctacatca tcaacaccac ctcaggccgt cgtgcgattg     33540
+     aagactcccg cgtgattcgt cgcagtgcgc tgcaatataa agtgcattac gacaccaccc     33600
+     tgaacggcgg ctttgccacc gcgatggcgc tgaatgccga tgcgactgaa aaagtaattt     33660
+     cggtgcagga aatgcacgca cagatcaaat aatagcgtgt catggcagat atttttcatc     33720
+     cgctaatttg atcgaataac taatacggtt ctctgatgag gaccgttttt ttttgcccat     33780
+     taagtaaatc ttttggggaa tcgatatttt tgatgacata agcaggattt agctcacact     33840
+     tatcgacggt gaagttgcat actatcgata tatccacaat tttaatatgg ccttgtttaa     33900
+     ttgcttcaaa acgagtcata gccagacttt taatttgtga aactggagtt cgtatgtgtg     33960
+     aaggatatgt tgaaaaacca ctctacttgt taatcgccga atggatgatg gctgaaaatc     34020
+     ggtgggtgat agcaatagag agatctctat tcatttcgat attgaacaca gcaaggcggt     34080
+     taataccctg acttatattc tgtcggaagt cacagaaata agctgcgaag ttaagatgat     34140
+     ccctaataag ctggaagggc ggggatgcca gtgtcagcga ctggttaaag tggtcgatat     34200
+     ccatgagcaa atttacgcgc gcctgcgcaa taacagtcgg gaaaaattag tcggtgtaag     34260
+     aaagacgccg cgtattcctg ccgttccgct cacggaactt aaccgcgagc agaagtggca     34320
+     gatgatgttg tcaaagagat tgcgtcgtta attttatctc gttgataccg ggcgtcctgc     34380
+     ttgaccagat gcgatgttgt agcatcttat ccagcaacca ggtcgcatcc ggcaagatca     34440
+     ccgtttaggc gtcacatccg tcgtcccctg caaacggggg cgattttcct ccatttgcct     34500
+     cagtggctgc gtttcatgta acgatacatg acagcgcccg acaagatcct gatactcttt     34560
+     ggtattcaac cgtttccagt gtaactcgtc gtcactaaca ttgcgtacag cgcgggctgg     34620
+     cgtacccatc aacaactggc gtttctcgcc gcgaaagccc gctttgacaa agctcatggc     34680
+     ggcaacaatg ctctcttcgc caatgaccgc gccatccata atcacgctgt tcatcccgac     34740
+     caatgcatcg cgaccaatca aacaaccatg caggatcgct ccgtgcccga tatggccgtt     34800
+     ttccccaacg atagtgtcag tgtcgcagta gccatgcata atgcagccat cctgaatatt     34860
+     ggctcccgct tgcacgatca accgcccgta gtcaccacgc agtgaggcga gtgggccgat     34920
+     gtagacaccg gctcccacaa tcacatcgcc aatcaagacg gcactgggat ggacaaacgc     34980
+     cgtcgggtga accaccggaa ttaacccctc aaaggcgtaa tagctcacgg ttgttaacgt     35040
+     cctttccaca ccggatcgcg cttctcggca aacgccagcg gcccttcaat ggcatcttcc     35100
+     gaatgcagaa ccgatggata gtgtttcaac acgccgctgc gaatatagcg atacgcttct     35160
+     tctaccggca tttcgctggt ggtgcggtag atctctttca gcgccgcaat cgccagcggg     35220
+     gcgctgttaa ccagctgctg agccagttcg cgggcgttat ccatcagttc cgcctggcta     35280
+     accacgcggt tgactatccc ccaacgcagc gcctcttctg cgcccattcg tctgccggtc     35340
+     atcaccattt cattgacgat ggcaggcggc aggatcttcg gcagacgcag cacaccgccg     35400
+     ctgtcaggaa cgatgcccag tttggcttcc ggcagggcga agctggcgtt atcggcacaa     35460
+     acaataaaat ctgccgccag cgccagttca aaggcgccgc caaaggcata gccgttcaca     35520
+     gctgcgataa ccggtttgtc gagattgaaa atttcggtta atcccgcaaa accacccgga     35580
+     ccaaagtcag catccggtgc ttcgccttct gctgccgctt ttaaatccca gcccgcggaa     35640
+     aagaacttct ctccggcacc ggtaataatg gcgacacgta attgcggatc gtcacggaaa     35700
+     tttagaaata cttcgcccat ttcaaagctg gtttttgcat caatagcatt cgcttttgga     35760
+     cgatcaaggg taatttccag aattgatcca ttgcgggtca gatgtaatga ttcactcatt     35820
+     ccttttctcc atttttgctt tttcagggac gacaacatcc ctgcaaaaaa tgcatattgt     35880
+     tttagagtgt gattattagc tggcagggta gttccctgct gtttcattta tttcagattc     35940
+     tttctaatta ttttccccga gcaattacgt ggcagatctt ttctgatctc cagataagag     36000
+     ggcactttaa atttcgccat attttgttcg cagaagcgga aaaattcctc ttcgctcaat     36060
+     gtttcacctt cattcagcac cacaaatgct ttgatggctt catcgcgaat cgaatcttta     36120
+     atacccacaa ccacgatgtc ctgaattttc gggtgcgcgg cgataatatt ttccagctcc     36180
+     acgcaggaga cattctcgcc gccacgttta atcatattgc agcggcgatc gacgaaataa     36240
+     aaaaagtcct cttcgtcgcg gtatccggta tcgccggtat gcagccagcc atcggcttcc     36300
+     agcactttcg cagtggcttg tgggttgaga aagtactctt tgaagatggt tttcccaggt     36360
+     atgcctttaa tgcagatttc accgatctca ccagccggga gcgggcgatt gtgatcgtcg     36420
+     cggatctccg cttcgtagca aaaccccacc cgaccaatcg acggccagcg tcgtttatcg     36480
+     ccaggacgat caccgataat gcccacaatg gtttccgtca tcccataaga cgtcagcaag     36540
+     cgaacgccga agcgttcaca aaacgcatct ttttcctgct ccgacaagtt gagataaaac     36600
+     atcacttccc gcaggcggtg ttgctgatcg ttcgctgaag gcggctgtac catcaacgta     36660
+     cggatcatca tcggaataca ttcggtaacg gtggcgcggt acttctgtac ctgtccccag     36720
+     aaggcgcggg cgctgtattt ctcgaccagc acaaaggtgg ccccggcaga aaacgccgcc     36780
+     atcgccgcag tacactggca atcgatatga aacgcaggca ttaccgtcag gtagacgtca     36840
+     tcgtcacgca gtgcacactg ccaggcggag taatatccag cgaagcgcag gttgtaatgg     36900
+     gtaatcacca cacctttcgg tcgggaggtg gtgccggagg tgaagagaat ttccgccgta     36960
+     tcgtcagtcg atagcggcgg tgcatagcac aaggtggcag gttgttgatt tttcagttga     37020
+     gtaaacgaac tcacgccatc atcagcggga agtgccacat ctgtcaggca aatgtgccgc     37080
+     aattgagtgg catcttcctg ctgaatctgt tgatacatag gatagaattg cgcactggtc     37140
+     accagcaggc acgcctggct attttgcagg atccacgcgc tttcctcgca caacaggcgg     37200
+     gcgttaatcg gcaccataat cgcgccaatt tttgccagcc cgaaccagca aaagataaat     37260
+     tccgggcagt tgtcgagatg tagtgcaacc ttgtcgcctt tgcgaatccc cagcgtataa     37320
+     aacaggtttg ccgtgcggtt aatctcctga tttaactcaa gataactata ccggttaacg     37380
+     actccgccgc tggattcaca aatcagcgcc gttttatgac cgtaaacgtc cgcaagatcg     37440
+     tcccacattt gacgtagatg ttgtccgcca atgatatcca ttgcacctct atccattttt     37500
+     gttcgtttgt tattgggcgg gcgctagtca ggcaagccga ctgacgccac gcgtttagtc     37560
+     ctcaactttg gccagacctt tgctgaccaa ctcctgaatg tcgttttcgc tgtagccgat     37620
+     atttttcaaa atggcagccg tgtccatgcc atgtgagggc attccgcgcc agatttgtcc     37680
+     ggggttattt ttgaatttcg gcatgatgtt cggccctttg caggtgcgac catccatcgt     37740
+     ttgccactga gtgattgatt cgcgagccac atactgtgga ttgctttcca gttccggtac     37800
+     ggtcagcact ttggcgcagg cgatattcag ttcagcaaag cgttctttta cttccgcgat     37860
+     ggtatgtgtc gccagccagg catcgagttt ctcttcaacc agtgggccgt aagggcattc     37920
+     gatacggtgg ataagctgag tgccttccgg gatttctggc gtgccaagca gatgtgcgag     37980
+     gccaatatct ttaaagcact cttcaatttg ggtaatgccc accagttcca tcacgatgta     38040
+     gccgtcggca catttataca gaccgcaacc ggcgtagtag ggatctttac ctttgctcat     38100
+     gcgcgggcac atttcgccgc cgttgaagta atccatcatg aagtactggc ccatacgcag     38160
+     catcacttca tacatggcga tgtcgatact ttcgccttta ccggtttcac gcactttatg     38220
+     cagtgctgcc agcgccgccg tggtggcggt caggccagaa aagtaatcgg cggtatacgg     38280
+     gaaggcaggc attggctggt caacatcacc gttctgaatc aggtaaccac taaaggcctg     38340
+     ggcgatagtg ttataggccg gaagattggt gtactcctcg gtgccgtact gaccaaaacc     38400
+     ggacaggtga gcgataacca gtttcgggtt gtgctgccac agtacttcat cggtaatgcc     38460
+     acgacgggca aaggccggac ctttactggc ttcgatgaag atatcggtgg tttccattaa     38520
+     tttcagaaac gcttcgcggc cttcatcttt gaaaatattt aacgacagcg cgtgcaaatt     38580
+     gcggcgggag agttgcgggt agttcggttg aacgcgaatg gtgtcggccc aggcgacgtt     38640
+     ctcgatccag ataacttccg cgccccattc tgcgaacatt tgcccggcaa acggtccggc     38700
+     gatttcgata ccggagaaga caacgcgcaa tccggccaac ggcccgaatt tcggcatggg     38760
+     tagatgatcc attatttgct cctgaaaaat ttatgtagcg catgactgcc ggatgcggcg     38820
+     taaacgcttt atccggccta cattcgtgct cccgtaggcc tgataagacg catcagcgcg     38880
+     gcatcaggca gcgcacggac ttagcggtat tgcttcagca ccgcacgacc cagcgtcagg     38940
+     atctgcattt cgtcagatcc cccggagacg cggtctacac gcagatcacg ccagaagcgg     39000
+     ctgatgcggt ggttgcccgc aatcccgaca ccgcccagca cctgcattgc gctatccaca     39060
+     acttcaaatg ccgcattggc gcagaagtat ttgcacatcg ctgcatcgcc agaggtgatg     39120
+     gtgccgttgt ctgctttcca cgctgcttca tacagcatgt ttttcatgga gtttaatttg     39180
+     atcgccatgt gggcgaattt ttcctgaatc aactggaaac gaccaatagc ctcgccaaac     39240
+     tgcacgcgct gattggcgta gcgcgccgca tcttcaaagg cgcacatcgc cgtaccgtag     39300
+     ttggtgaggg ctaccaggaa acgttcatgg tcgaactctt ctttgacgcg gttaaagccg     39360
+     ttaccttccc gaccgaacat gtctttctcg tccagttcca cgtcgtcaaa ggtgatttca     39420
+     cagcagctat ccatacgcag accgagcttt tcaagtttgg tcactttgat gcccggtttg     39480
+     ctcatatcaa caaaccatcc ggtgtagaca ggtttgtccg gagaagcccc gtcgcgcgcc     39540
+     atcaccacga tgtacggggt gtaggcgctg ctggtaataa aacacttact accattaaga     39600
+     taaatcttac catttttacg ggtataagtc gttttcaggc tacccacgtc ggagcccgcg     39660
+     cccggttcgg taatcgctga gttccacatc tgcttaccgg tgccgcggaa agccataatt     39720
+     ttgtcgatct gctcttgtgt gccttcgcgc aggaaggtgt tgaacccgcc cggcaactgg     39780
+     tacagcacat aggttggtgc ccccagacgt cccagctcca tccacacggc ggcgagagta     39840
+     acaaaccccg cgtccagacc accgtgctct tcagggatca gcagactgtc gatacccata     39900
+     tccgccagtg ctttgacaaa acgttccggg tagacgctgt cacggtcgca ctcggcaaaa     39960
+     taggcctccc agttttcgct ggccatcagt tcgcggatac cggcgacaaa cagttcctgc     40020
+     tcatcattta aattaaaatc catctttcaa cctcttgata ttttgggggg ttaattaatc     40080
+     tttccagttc tgtttcgcgt ctttaataaa ggagagcgtc accataatgt tgacgaagaa     40140
+     cagcgggcat cctccggcga taatggcggt ttgaatcggt ttcaggccgc cgagcgccag     40200
+     cagaacaata ccgataatgc caaccagaat tgaccaaccg atacgcacca gcagaggtgg     40260
+     ttcttcacca tcgcgtactt cgcggcaagt ggacatcgcc agggtataag agcaggcgtt     40320
+     aaccagcgta acggtggcaa taaagcagag gatgaagaag ccccacatgg tggcggtgct     40380
+     gagtggcaga gcggcccagg tttcaatgat ggcgcgcgcc acaccgtact gttcgatcag     40440
+     atttggaatg ttgatgatgt ttttatctat caacagcaga gtgttactac cgagtacagt     40500
+     ccacaggatc caggttgacg ctgtcagccc cagcaccatg ccgaagcaca gttcacgcac     40560
+     agtacgacca cgggagatgc gggcgaggaa gatactcatc tggatagcat aaatcaccca     40620
+     ccatgcccag tagaacacgg tccagccctg cgggaagccg cctttagcga tgggatcggt     40680
+     atagaacaac atgcgcggca gatacatcag caacatcccc accgaatcgg tgaagtagtt     40740
+     catgatgaag ctggcaccgc tgacaatgaa cacccaaccc agcatcagga agctcaggta     40800
+     actacgcacg tcactggcga tacgtacccc tttttgcaga ccgcaagcga cgcaaatggc     40860
+     gttgaggata atccagcagg taatgatgat agcgtccagt tgcagggtat gcggaatgcc     40920
+     aaacaaccat tgcatacact cggtcaccag cggcgtggca aggcccagac tggtacccat     40980
+     cgtgaagatc aaggcgacga gatagaagtt gtcgacgata gtgccgaaca accctttggc     41040
+     gtgtttttca cctaccagcg gcaccagtgt cgagctgggg cgaatcactt ccattttgcg     41100
+     gacaaagaag aagtaagcga aggcgactga aaggaagctg taagtggccc acggcagagg     41160
+     tccccagtgg aacaagctgt aagccagccc caactctttc gcccctgtcg agttcggttc     41220
+     taagccaaac ggcggggtgg agatgtagta gtagatctca atcgatcccc agaacagtac     41280
+     ggcagcagac gtacaggagg cgaacatcat aaagatccaa ctggcggtgc taaattctgg     41340
+     cggttcgtta cctaaacgct ttttggcata cgggccaaac accagccaga accaaccgaa     41400
+     aagcatcacc accatatacc attcaaatgc ccatccccat acattggtga cgtaactgaa     41460
+     tacagcatta ataacgacat tcgctgcatc cagatctctg actgtaagcc aacaaagtat     41520
+     gccgacgatt attaacggcg gaaagaaaac cttcggttct attcccgttt ttctcttttc     41580
+     attcttcatg agttaattcc actgtgaaaa cgaatattta ttttgcgttc ccgtttgttt     41640
+     tatttttgtt aacatttaat ataattatta ttaacctcgt ggacgcgtta atggctaact     41700
+     cataatgggt attcaataag ctgtattctg tgattggtat cacatttttg tttcgggtga     41760
+     atagagggcg ttttttcgtt aattttgatt aataatcagt ttgttatgct ctgttgtgag     41820
+     taaaaaataa catctgactt tcaatattgg tgatccataa aacaatattg aaaatttctt     41880
+     tttgctacgc cgtgttttca atattggtga ggaacttaac aatattgaaa gttggattta     41940
+     tctgcgtgtg acattttcaa tattggtgat taaagtttta tttcaaaatt aaagggcgtg     42000
+     atatctgtaa ttaacaccac cgatatgaac gacgtttcct tcatgatttc tggagatgca     42060
+     atgaagatta ttacttgcta taagtgcgtg cctgatgaac aggatattgc ggtcaataat     42120
+     gctgatggtt cattagactt cagcaaagcc gatgccaaaa taagccaata cgatctcaac     42180
+     gctattgaag cggcttgcca gctaaagcaa caggcagcag aggcgcaggt gacagcctta     42240
+     agtgtgggcg gtaaagccct gaccaacgcc aaagggcgta aagatgtgct atcgcgcgac     42300
+     ccggatgaac tgattgtggt gattgatgac cagttcgagc aggcactgcc gcaacaaacg     42360
+     gcgagcgcac tggctgcagc cgcccagaaa gcaggctttg atctgatcct ctgtggcgat     42420
+     ggttcttccg acctttatgc ccagcaggtt ggtctgctgg tgggcgaaat cctcaatatt     42480
+     ccggcagtta acggcgtcag caaaattatc tccctgacgg cagataccct caccgttgag     42540
+     cgcgaactgg aagatgaaac cgaaacctta agcattccgc tgcctgcggt tgttgctgtt     42600
+     tccactgata tcaactcccc acaaattcct tcgatgaaag ccattctcgg cgcggcgaaa     42660
+     aagcccgtcc aggtatggtc ggcggcggat attggtttta acgcagaggc agcctggtca     42720
+     gaacaacagg ttgccgcgcc gaaacagcgc gaacgtcagc gcatcgtgat tgaaggcgac     42780
+     ggcgaagaac agatcgccgc atttgctgaa aatcttcgca aagtcattta accacagggg     42840
+     atgctacgaa cacattttct caagtctggg tattcagcga taccccttct cgtctgccgg     42900
+     aactgatgaa cggtgcgcag gctttagcta atcaaatcaa cacctttgtc ctcgatgatc     42960
+     ggcgacggtg tacaggcaat ccagctcggc gctaatcatg tctggaaatt aaacggcaaa     43020
+     ccggacgatc ggatgatcga agattacgcc ggtgtcatgg ctgacactat tcgccagcac     43080
+     ggcgcagacg gcctggtgct gctgccaaac acccgtcgcg gcaaattact ggcggcaaaa     43140
+     ctgggttatc gccttaaagc ggcggtgtct aacgatgcca gcaccgtcag cgtacaggac     43200
+     ggtaaagcga cagtgaaaca catggtttac ggtggtctgg cgattggcga agaacgcatt     43260
+     gccacgccgt atgcggtact gaccatcagc agcggcacgt tcgatgcggc tcagccagac     43320
+     gcgtcacgca ctgtgaaacg cacaccgtgg agtggcaggc tccggctgtg gcgattaccc     43380
+     gcacggcaac ccaggcgcgc cagagcaaca gcgtcgatct cgacaaagcc cgtctggtgg     43440
+     tcagcgtcgg tcgcggtatt ggcagcaaag agaacattgc gctggcagaa cagctttgca     43500
+     aggcgatagg tgcggagttg gcctgttctc gtccggtggc ggaaaacgaa aaatggatgg     43560
+     agcacgaacg ctatgtcggt atctccaacc tgatgctgaa acctgaactg tacctggcgg     43620
+     tggggatctc cgggcagatc cagcacatgg ttggcgctaa cgcgtcgcaa accattttcg     43680
+     ccatcaataa agataaaaat gcgccgatct tccagtacgc ggattacggc attgttggcg     43740
+     acgccgtgaa gatccttccg gcgctgaccg cagctttagc gcgttgatcc actctggcag     43800
+     ggctgcattt tggccctgcc gctgacaggg agctcttatg tccgaagata tctttgacgc     43860
+     catcatcgtc ggtgcagggc ttgctggttc ggttgccgca ctggtgctcg cccgcgaagg     43920
+     tgcgcaagtg ttagttatcg agcgtggcaa ttccgcaggt gccaagaacg tcaccggcgg     43980
+     gcgtctctat gcccacagtc tggaacacat tattcctggt ttcgccgact ccgcccccgt     44040
+     agaacgcctg atcacccatg aaaaactcgc gtttatgacg gaaaagtcag cgatgactat     44100
+     ggactactgc aatggtgacg aaacctcgcc atcccagcgt tcttactccg ttttgcgcag     44160
+     taaatttgat gcctggctga tggagcaggc cgaagaagcg ggcgcgcagt taattaccgg     44220
+     gatccgcgtc gataacctcg tacagcgcga tggcaaagtc gtcggtgtag aagccgatgg     44280
+     cgatgtgatt gaagcgaaaa cggtgatcct tgctgatggg gtgaactcca tccttgccga     44340
+     aaaattgggg atggcaaaac gcgtcaaacc gacggatgtg gcggttggcg tgaaggaact     44400
+     gatcgagtta ccgaagtcgg ttattgaaga ccgttttcag ttgcagggta atcagggggc     44460
+     ggcttgcctg tttgcgggat cacccaccga tggcctgatg ggcggcggct tcctttatac     44520
+     caatgaaaac accctgtcgc tggggctggt ttgtggtttg catcatctgc atgacgcgaa     44580
+     aaaatcggtg ccgcaaatgc tggaagattt caaacagcat ccggccgttg caccgctgat     44640
+     cgcgggcggc aagctggtgg aatattccgc tcacataatg ccggaagcag gcatcaacat     44700
+     gctgccggag ttggttggtg acggcgtatt gattgccggt gatgccgccg gaatgtgtat     44760
+     gaacttcggt tttaccattc gcggtatgga tctggcgatt gccgccgggg aagccgcagc     44820
+     aaaaaccgtg ctttcagcga tgaaaagcga cgatttcagt aagcaaaaac tggcggaata     44880
+     tcgtcagcat cttgagagtg gtccgctgcg cgatatgcgt atgtaccaga aactaccggc     44940
+     gttccttgat aacccacgca tgtttagcgg ctacccggag ctggcggtgg gtgtggcgcg     45000
+     tgacctgttc accattgatg gcagcgcgcc ggaactgatg cgcaagaaaa tcctccgcca     45060
+     cggcaagaaa gtgggcttca tcaatctaat caaggatggc atgaaaggag tgaccgtttt     45120
+     atgacttctc ccgtcaatgt ggacgtcaaa ctgggcgtca ataaattcaa tgtcgatgaa     45180
+     gagcatccgc acattgttgt gaaggccgat gctgataaac aggtgctgga gctgctggtg     45240
+     aaagcgtgcc ccgcaggtct gtacaagaag caggatgacg gcagtgtgcg cttcgattac     45300
+     gccggatgtc tggagtgcgg cacctgtcgc attctggggc tggggtcggc gctggaacag     45360
+     tgggaatacc cgcgcggcac ctttggtgtg gagttccgtt acagctaatg ttgctttgat     45420
+     acgtaacgcc gcactgactc tcattgcaaa aaacatgaat agctatgcaa ccgtccagaa     45480
+     accttgacga tctcaaatgc ttgtcccact accgccgcat tttgctgtgg ggaagcggtg     45540
+     gtccgtttct gtatggttat gtactggtaa tgattggcgt ggcgctggag caactgacgc     45600
+     cggcgctgaa actggacgct gactggattg gcttgctggg cgcgggaacg ctcgccgggc     45660
+     tgttcgttgg cacatcgctg tttggttata tttccgataa agtcggacgg cgcaaaatgt     45720
+     tcctcattga tatcatcgcc atcggcgtga tatcggtggc gacgatgttt gtttcatccc     45780
+     ccgtcgaact gttggtgatg cgggtattta tcggcattgt catcggtgca gattatccca     45840
+     tcgccacctc aatgatcaac gagttctcca gtacccgtca gcgggcggtt ttccatcagc     45900
+     tttattgccg cgatgtggta tgttggcgcg acctgtgccg atctggtcgg ctactggctt     45960
+     tatgatgtgg aaggcggctg gcgctggatg ctgggtagcg cggcgatccc ctgtctgttg     46020
+     attttgattg gtcagattcg aactgcctga atctccccgc tggttattat gcaaagggcg     46080
+     agtaaaagag tgcgaggaga tgatgatcaa actgtttgga gaaccggtgg ctttcgatga     46140
+     agagcagccg cagcaaaccc ggttttcgcg atctgtttaa tcgctgccat tttccttttg     46200
+     ttctgtttgt tgccgccatc tggacctgcc aggtgatccc aatgttcgcc atttacacct     46260
+     ttggcccgca aattgttggt ttgttgggat tgggggttgg caaaaacgcg gcactaggaa     46320
+     atgtggtgat tagcctgttc tttatgctcg gctgtattcc gccgatgctg tggttaagca     46380
+     ctgccggacg gcgtccattg ttgattggca gctttgccat gatgacgctg gcgctggcgg     46440
+     ttttggggct aatcccggat atggggatct ggctggtagt gatggccttt gcggtgtatg     46500
+     cctttttctc tggcgggccg ggtaatttgc agtggctcta tcctaatgaa ctcttcccga     46560
+     cagatatccg cgcctctgcc gtgggcgtga ttatgtcctt aagtcgtatt ggcaccattg     46620
+     tttcgacctg ggcactaccg atctttatcg ataattacgg tatcagtaac acgatgctaa     46680
+     tgggggcggg tatctcgctg tttggcttgt tgatttccgt agcgtttgcc ccggagactc     46740
+     gagggatgtc actggcgcag accagcaata tgacgatccg cgggcagaga atggggtaaa     46800
+     ttgttcagat ttctctcttt tctgaatcaa tattattgac tataagccgc gtgaatatat     46860
+     gactacactt tgtgggaaaa caaaggcgta atcacgcggg ctacctatga ttcttataat     46920
+     ttatgcgcat ccgtatccgc atcattccca tgcgaataaa cggatgcttg aacaggcaag     46980
+     gacgctggaa ggcgtcgaaa ttcgctctct ttatcaactc tatcctgact tcaatatcga     47040
+     tattgccgcc gagcaggagg cgctgtctcg cgccgatctg atcgtctggc agcatccgat     47100
+     gcagtggtac agcattcctc cgctcctcaa actttggatc aataaagttt tctcccacgg     47160
+     ctgggcttac ggacatggcg gcacggcgct gcatggcaaa catttgctgt gggcggtgac     47220
+     gaccggcggc ggggaaagcc attttgaaat tggtgcgcat ccgggctttg atggtctgtc     47280
+     gcagccgcta caggcgacgg caatctactg cgggctgaac tggctgccac cgtttgccat     47340
+     gcactgcacc tttatttgtg acgacgaaac cctcgaaggg caggcgcgtc actataagca     47400
+     acgtctgctg gaatggcagg aggcccatca tggatagcca tacgctgatt caggcgctga     47460
+     tttatctcgg ttcggcagcg ctgattgtac ccattgcggt acgtcttggt ctgggatcgg     47520
+     tacttggcta cctgatcgcc ggctgcatta ttggcccgtg ggggctgcga ctggtgaccg     47580
+     atgccgaatc tattctgcac tttgccgaga ttggggtggt gctgatgctg tttattatcg     47640
+     gcctcgaact cgatccacaa aggctgtgga agctgcgtgc ggcagtgttc ggctgtggcg     47700
+     cattgcagat ggtgatttgc ggcggcctgc tggggctgtt ctgcatgtta cttgggctgc     47760
+     gctggcaggt cgcggaattg atcggcatga cgctggcgct ctcctctacg gcgattgcca     47820
+     tgcaggcgat gaatgaacgc aatctgatgg tgacgcaaat gggtcgcagt gcctttgcgg     47880
+     tgctgctgtt ccaggatatc gcggcgatcc cgctggtggc gatgattccg ctactggcaa     47940
+     cgagcagtgc ctcgacgacg atgggcgcat ttgctctctc ggcgttaaaa gtggcgggtg     48000
+     cgctggtgct ggtggtattg ctggggcgct atgtcacgcg tccggcgctg cgttttgtag     48060
+     cccgctctgg cttgcgggaa gtgtttagtg ccgtggcgtt attcctcgtg tttggctttg     48120
+     gtttgctgct ggaagaggtc ggcttgtcga tggcgatggg cgcgtttctg gcgggcgtac     48180
+     tgctggcaag ctcggaatac cgtcatgcgc tggagagcga tatcgaacca tttaaaggtt     48240
+     tgctgttggg gctgtttttc atcggtgttg gcatgtcgat agactttggc acgctgcttg     48300
+     aaaacccatt gcgcattgtc attttgctgc tcggtttcct catcatcaaa atcgccatgc     48360
+     tgtggctgat tgcccgaccg ttgcaagtgc caaataaaca gcgtcgttgg tttgcggtgt     48420
+     tgttagggca gggcagtgag tttgcctttg tggtatttgg cgcggcgcag atggcgaatg     48480
+     tgctggagcc ggagtgggcg aaatcgctga ccctggcggt ggcgctgtcg atggcagcaa     48540
+     cgccgattct gctggtgatc ctcaatcgcc ttgagcaatc ttctactgag gaagcgcgtg     48600
+     aagccgatga gatcgacgaa gaacagccgc gcgtgattat cgccggattc ggtcgttttg     48660
+     ggcagattac cggacgttta ctgctctcca gcggggtgaa aatggtggta ctcgatcacg     48720
+     atccggacca tatcgaaacc ttgcgtaaat ttggtatgaa agtgttttat ggcgatgcca     48780
+     cgcggatgga tttactggaa tctgccggag cggcgaaagc ggaagtgctg attaacgcca     48840
+     tcgacgatcc gcaaaccaac ctgcaactga cagagatggt gaaagaacat ttcccgcatt     48900
+     tgcagattat tgcccgcgcc cgcgatgtcg accactacat tcgtttgcgt caggcaggcg     48960
+     ttgaaaagcc ggagcgtgaa accttcgaag gtgcgctgaa aaccgggcgt ctggcactgg     49020
+     aaagtttagg tctggggccg tatgaagcgc gagaacgtgc cgatgtgttc cgccgcttta     49080
+     atattcagat ggtggaagag atggcaatgg ttgagaacga caccaaagcc cgcgcggcgg     49140
+     tctataaacg caccagcgcg atgttaagtg agatcattac cgaggaccgc gaacatctgt     49200
+     cattaattca acgacatggc tggcagggaa ccgaagaagg taaacatacc ggcaacatgg     49260
+     cggatgaacc ggaaacgaaa ccctcatcct aataaagagt gacgtaaatc acacttacag     49320
+     ctaactgttt gttttgtttc attgtaatgc ggcgagtcca gggagagagc gtggactcgc     49380
+     cagcagaata taaaattttc ctcaacatca tcctcgcacc agtcgacgac ggtttacgct     49440
+     ttacgtatag tggcgacaat tttttttatc gggaaatctc aatgatcagt ctgattgcgg     49500
+     cgttagcggt agatcgcgtt atcggcatgg aaaacgccat gccgtggaac ctgcctgccg     49560
+     atctcgcctg gtttaaacgc aacaccttaa ataaacccgt gattatgggc cgccatacct     49620
+     gggaatcaat cggtcgtccg ttgccaggac gcaaaaatat tatcctcagc agtcaaccgg     49680
+     gtacggacga tcgcgtaacg tgggtgaagt cggtggatga agccatcgcg gcgtgtggtg     49740
+     acgtaccaga aatcatggtg attggcggcg gtcgcgttta tgaacagttc ttgccaaaag     49800
+     cgcaaaaact gtatctgacg catatcgacg cagaagtgga aggcgacacc catttcccgg     49860
+     attacgagcc ggatgactgg gaatcggtat tcagcgaatt ccacgatgct gatgcgcaga     49920
+     actctcacag ctattgcttt gagattctgg agcggcggta attttgtata gaatttacgg     49980
+     ctagcgccgg atgcgacgcc ggtcgcgtct tatccggcct tcctatatca ggctgtgttt     50040
+     aagacgccgc cgcttcggcc aaatccttat gccggttcga cggctggaca aaatactgtt     50100
+     tatcttccca gcgcaggcag gttaatgtac caccccagca gcagccggta tccagcgcgt     50160
+     atataccttc cggcgtacct ttgccctcca gcgatgccca gtgaccaaag gcgatgctgt     50220
+     attcttcagc gacagggcca ggaatcgcaa accacggttt cagtggggca ggggcctctt     50280
+     ccggcgattc tttgctgtac atatccagtt gaccgttcgg gaagcaaaaa cgcatacggg     50340
+     taaaagcgtt ggtgataaaa cgcagtcttc ccagcccccg caattccggt gaccagttat     50400
+     ttggcatatc gccgtacatg gcatcaagaa agaagggata ggagtcactc gatagcaccg     50460
+     cttctacatc gcgtgcgcac tctttggcgg tctgcagatc ccactgcggc gtgatccctg     50520
+     cgtgggccat caccagcttt ttctcttcgt cgatttgcag cagaggctgg cgccgcagcc     50580
+     agttaagcag ctcgtcggca tccggcgctt ccagcagcgg tgtcaggcga tctaacggtt     50640
+     tattgcggct gatcccggca aataccgcca gcagatgcag atcgtgattg cccagcacca     50700
+     gacgtacgct gtcgcctaag gatttcacat agcgcagaac atccagcgaa cccggcccgc     50760
+     gcgcgaccag atcgcccgtc agccagaggg tatctttccc aggggtaaat tctactttat     50820
+     gcagcaatgc gatcagttca tcgtaacaac catgaacgtc gccaataagg tatgtcgcca     50880
+     tattctttta atgaatgagt gtgggaacgg cgagtcggaa tacgggaatg tcgatgctga     50940
+     aagggacgcc attttcatcg atcatttcgt agtgaccctg catggtgccc agcggggttt     51000
+     caatgattgc accgctggtg tactggtact cttcgccagg cgcgataagt ggctggacgc     51060
+     caaccactcc ttcgccctgg acttcggttt cacggccatt gccattggtg atcagccagt     51120
+     aacgccccaa caactgcact ggcgctcgcc ccagattgcg tatggttacg gtataagcaa     51180
+     aaacgtaacg ttcattatca ggtgaagatt gagcctcaat gtagacgctt tgaacctgaa     51240
+     tacacactcg gggcgaattg atcatcgtta actctcctgc aaaggcgcgt tctccgccca     51300
+     gatagttcgc catctggcaa tattgcgcga cagagatatt ttccgctcgc atcgccgggt     51360
+     cgatccccat tcccgttaac acctcgacgc taaacaggtt gccgaggctg ttacgaatgg     51420
+     ttttacgacg ctggttaaag gcttcggtgg tgatgcggct caacacacga acatctttaa     51480
+     ccgggtgagg catcgttgca tgaggaacca ggcgcacgac ggcggaatcc actttgggtg     51540
+     gtggtgtaaa ggctgacggc ggtacttcca gtaccgggat cacattgcaa tagtattgcg     51600
+     ccatgacgct taatcgacca tacgctttgc tgttcggtcc tgcaaccaga cgattcacca     51660
+     cctctttttg caacataaag tgcatgtcgg caatggcatc agtatagcta aacagatgga     51720
+     acatcaacgg cgtggagatg ttataaggca ggttgccgaa aacacgcagc ggctgaccca     51780
+     ttttctcggc cagttcacca aagttaaagg tcatcgcatc ctgctgataa atcgtcagtt     51840
+     tcgggcctaa gaatggatgc gtttgcagac gtgccgccag atcgcggtca agttcgatga     51900
+     ccgtcagctg gtccagacgt tcgccgaccg gttcggtcaa tgccgccaga ccggggccga     51960
+     tttcgaccat cgcctggccc ttttgcgggt taatggcaga cacaatactg tcgatcacga     52020
+     actgatcgtt gagaaagttt tgcccgaagc gtttacgggc taagtggccc tggtggactc     52080
+     gattattcat tgggtgttaa caatcatttt gatggcgaga ttaagcgccg taataaaact     52140
+     gccgacatcg gctttgccac gtcccgccag ttcaagcgcg gtgccgtggt ccactgatgt     52200
+     gcgaataaag ggcaggccca gcgtaatgtt cacaccgcgc ccgaagccct ggtattttag     52260
+     cacgggaaga ccctgatcgt ggtacatcgc cagcacggcg tcggcgttat caagatattt     52320
+     cggctgaaac agggtatcgg caggcagcgg cccgttgagt ttcatcccct gcgcccgcag     52380
+     ctcattgagc accggaataa tggtgtctat ctcttccgta cccatatgac cgccttcgcc     52440
+     cgcgtgcgga ttcagcccgc agaccagaat gcgcggttcg gcaataccaa atttggtccg     52500
+     caaatcgtga tgcaaaatag caatcacttc gtgcaaaagt gcaggggtga tagcgtctgc     52560
+     gatatcgcgc agcggtaaat gcgtcgttgc cagcgccacg cgaagttctt cggtcgccag     52620
+     catcatcacc acctttttcg cctgcgaacg ctcttcgaaa aactcggtat gaccggtaaa     52680
+     aggaatgcca gcgtcgttaa taacgccttt atgcaccgga cctgtgatca gcgcggcaaa     52740
+     ttcgccgttc agacaaccat cgcacgctcg cgccagcgtt tccaccacat aatgcccatt     52800
+     ttcaaccgct aactgccccg cagtgacagg tgcacgtagc gcgacaggaa gtagcgttaa     52860
+     tgtgcccgca gtttgcggtt gtgcagggga gttgggggaa taagggcgga gggtgagcgg     52920
+     caaaccgagc atcgctgccc ggttggtaag gagagtggca tcggcacaaa caaccagttc     52980
+     gaccggccac tcacgctgtg caagctggac aactaagtcc gggccaatcc cggcgggctc     53040
+     gccgggagtg atcacaacac gttgggtttt aaccattagt tgctcaggat tttaacgtag     53100
+     gcgctggcac gttgttcctg catccagctt gctgcttctt ccgagaactt acggttcatc     53160
+     agcatgcggt atgcacgatc tttctgcgca gcgtcggttt tatcgacatt acgggtatcc     53220
+     agcagttcga ttaaatgcca gccgaatgaa gagtgaaccg gtgcactcat ttgacctttg     53280
+     ttcaggcgag tcagggcgtc acggaaggcc ggatcgaaaa tatctggtgt agcccagccg     53340
+     agatcgccgc cctggttagc agagcctgga tcctgagaga actctttcgc tgcggcagca     53400
+     aaagtcgttt taccactctt gatatcagca gcaatctgtt ccagtttcac acgggcctgt     53460
+     tcgtcagtca tgatcggcga cggtttcagc agaatatggc gagcatgaac ttcggtcacc     53520
+     gagatatttt tgctttcgcc gcgcaggtcg ttaactttca gaatatggaa gccaacgccg     53580
+     gaacgaatcg ggccaacaat gtcgcctttc ttcgcggtgc ttaatgcctg ggcgaagatc     53640
+     ccgggcaact cctgaatacg gccccagccc atctggccgc cgttcagcgc ctgctggtcg     53700
+     gcagaatgag caatcgccag cttaccgaaa tcagcgccgt tacgcgcctg atcgacaatg     53760
+     gcgcgcgcct ggctttccgc ttcgttcacc tgatcagagg tcgggttttc cggcagcggg     53820
+     atcaggatgt ggctcaggtt cagctcagtg ctggcgtcgt tttggttacc cacctgctgc     53880
+     gccagggatt cgacttctgc ggcaggatgg tgatgcgacg acgcacctcg ttgttacgca     53940
+     cttcagagat aatcatctct ttgcggatct ggttacgata ggtgttgtag ttcagtccat     54000
+     cgtaagccag acggctgcgc atctgatcca gcgtcatgtt gttctgtttc gcgatgttag     54060
+     caatcgcctg atccagctgc tcatcggaga ttttcactcc cattttctgc cccatctgca     54120
+     ggatgatttg atccatgatc aaacgttcca tgatttggtg gcgcagcgtc gcgtcatcag     54180
+     gaagttgctg ccttgcctga gcagcgttca gttttaccga ctgcattaat ccatcaacgt     54240
+     cgctttccag cacgacgccg ttattgacga cggctgcgac tttatcgact acctgggggg     54300
+     cagcgaaact ggtattcgcg atcatggcga taccgagaag cagcgttttc cagttcttca     54360
+     tactttttcc atttcaatta accgcactgc ggattacgtg gtaaatcaac aaatcacaaa     54420
+     gtgttttgat acggcagaat gttcgaacgc agcatctctt gcgtacccag accgtagttg     54480
+     gagctcaggc cgcgaagttc gatgttaaag ccgattgcgt tgtcatatac cgcatgttgt     54540
+     ttatcgttat cccaaccgtt cagcttccgc tcgtaaccga cgcgaattgc atagcagcag     54600
+     gagctgtatt gcacacctaa catagagtcg gcttgcttgt tagcattggt gtcgtagtag     54660
+     taggccccaa caatggacca acgatcggca attggccagc tggcgacagc acctacctgc     54720
+     gaaataccat tcttatattg ctcagcagtg gaatagtact taggcagcgt agcctgaata     54780
+     tattccgggc tggcgtaacg gtaattcagc tgtaccagac ggtcttcatc ccgacggtat     54840
+     tcaatgctgg agttactggt cgctacgtta tccagacgtg tatcgtactg aatcccgcca     54900
+     cgcaatcccc aacgctcgga gatacgccag taagtatcgc ctgcccacac cagtgaaccc     54960
+     gttttgtcgt cattctccca tgttatgttg tcatcgccag tgcgagactc cgtgaaatag     55020
+     tagatttgac caacggaaat attaaaacgt tcaacggcag catcatcata tatgcgagat     55080
+     gtgacaccgg tcgtcacctg gttagcggag gcaatacggt caagaccgcc gtaagtccgg     55140
+     tcccggaaca ggccagagta gtcagattgc agcagagacg agtcgtagtt atagatgtcg     55200
+     ctctgatcgc gatacggcac gtacaaatac tgcgcgcgcg gttccagcgt ttgggtataa     55260
+     cccggagcca gcatttccat atcgcgttca aagaccattt tgccgtcaac tttgaattgc     55320
+     ggcattacgc ggttaacgga ttcgtccagc ttggtcgtgt ttctggagtt ataccagtca     55380
+     agattggttt gctgataatg ggttgccagc aacttcgctt cggtattgat gctgccccag     55440
+     ttattagaga gcggcaaatt gatggtcggt tccaggtgaa cacgggttgc ttcaggcatg     55500
+     tcgtctctgg tgttaacaaa gtgcactgcc tggccgtaaa tacgcgtatc aaacggacca     55560
+     acatcattct ggtagtaatt aacgtctaac tgcggctctg ccgagtagct actggtgttc     55620
+     tgttcgctga aaacctggaa ctgcttggtt gaaacggtgg cattgaagtt ttgcaccgca     55680
+     tagccaacgc tgaatttttg cgttgcgtag ccgtcagtac tggaaccgta cttgttatcg     55740
+     aaatcattga agtagctagg atcgctgacc ttggtgtagt cgacgttgaa acgccacacc     55800
+     tgatccatga ccccggagtg gttccagtag aataaccaac gacgtgaact gtcatcgttc     55860
+     gggtgttcat cttcatagac tttatctgaa ggcagatagt ccagttccat caagccagcg     55920
+     cccgcctggg agaggtagcg gaattcgttc tcccacatga tgttgccacg acgatgcata     55980
+     taatgcggcg tgatggtggc atccatattt ggcgcgatgt tccagtaata tggcaggtag     56040
+     aactcaaagt agttggtggt ggtgtacttg gcgttcggga tcaagaaacc agagcgacgt     56100
+     ttgtcaccca ccggcaactg caaatagggg ctataaaaga tcggtaccgg acccacctta     56160
+     aagcgggcgt tccagatctc cgcaacttgt tcttcgcggt catgaataat ttcgctacct     56220
+     accacgctcc aggtgtcaga acccggcaga caggaggtaa agctaccgtt atccagaatg     56280
+     gtatagcggt tttcgccacg ttgtttcatc aggtccgctt taccgcgacc ctggcgaccc     56340
+     accatctggt aatcaccttc ccagacgttg gtatctttgg tgttcagatt cgcccagcct     56400
+     ttcggccctt tgaggatcac ctggttatcg tcgtaatgga cattaccgag cgcatcaacg     56460
+     gtacgtaccg gctccggttg tcctggtgcc tctttttgat ggagctgcac ttcgtcggcc     56520
+     tgcagacggc tgttaccctg catgatatcc acgctgccag taaacacggc gtcatccggg     56580
+     tagtcccctt tcgcgtggtc agcattgata gtcacgggta agtcattggt atcgccctgt     56640
+     accagaggac ggtcatagct tggcacgccc aacatgcact gtgaggcgag gtcggctgcc     56700
+     agtccctgtt gactataaag ggcggtggca atcatggtgg ccaggagagt ggggatacgt     56760
+     tttttcatac gttgatttta ttgttccatc atcggtaacg ttgcgcgtga caaacggtca     56820
+     gagactaacg tactcgtcat ctctacgcta gtgttaatcc tgtccgaata gcgtcagtgg     56880
+     tgttaggcac ggcattgaat gacaggtatg ataatgcaaa ttataggcga tgtcccacaa     56940
+     ttgaccgcag ccggaaaacg gtaaaagcac ctttatattg tgggagatag ccctgatatc     57000
+     cgtgtgtcga tttggggaat atatgcagta ttggggaaaa atcattggcg tggccgtggc     57060
+     cttactgatg ggcggcggct tttggggcgt agtgttaggc ctgttaattg gccatatgtt     57120
+     tgataaagcc cgtagccgta aaatggcgtg gttcgccaac cagcgtgagc gtcaggcgct     57180
+     gttttttgcc accacttttg aagtgatggg gcatttaacc aaatccaaag gtcgcgtcac     57240
+     ggaggctgat attcatatcg ccagccagtt gatggaccga atgaatcttc atggcgcttc     57300
+     ccgtactgcg gcgcaaaatg cgttccgggt gggaaaatca gacaattacc cgctgcgcga     57360
+     aaagatgcgc cagtttcgca gtgtctgctt tggtcgtttt gacttaattc gtatgtttct     57420
+     ggagatccag attcaggcgg cgtttgctga tggttcactg cacccgaatg aacgggcggt     57480
+     gctgtatgtc attgcagaag aattagggat ctcccgcgct cagtttgacc agtttttgcg     57540
+     catgatgcag ggcggtgcac agtttggcgg cggttatcag cagcaaactg gcggtggtaa     57600
+     ctggcagcaa gcgcagcgtg gcccaacgct ggaagatgcc tgtaatgtgc tgggcgtgaa     57660
+     gccgacggat gatgcgacca ccatcaaacg tgcctaccgt aagctgatga gtgaacacca     57720
+     tcccgataag ctggtggcga aaggtttgcc gcctgagatg atggagatgg cgaagcagaa     57780
+     agcgcaggaa attcagcagg catatgagct gataaagcag cagaaagggt ttaaatgacc     57840
+     ctgtaaatga tgctgagtaa ctgcccacga ttaaaggtgg ccgccctggc ggtcacttct     57900
+     ttgagaaaag gcgtttactc agaatggtgg acaggctcaa tgcacggttt acgggagggg     57960
+     ttctgtaggt tttatcgcgt tgaccctgct taaggttgag agctttacga cgagcggaat     58020
+     tatattttta cgtcttaaaa ataaaaaaca catacctgaa tgagcgattt ttgaaagtat     58080
+     atttattcag aacgcgcatc atgagttttt aactcaatgc gaggctatta ccatgaaagt     58140
+     atcagttcca ggcatgccgg ttacactttt aaatatgagc aagaacgata tttataagat     58200
+     ggtgagcggg gacaagatgg acgtgaagat gaatatcttt caacgcttgt gggagacgtt     58260
+     acgccatctg ttctggtcag ataaacagac tgaggcttat aaacttctgt tcaatttcgt     58320
+     gaataaccag actggcaaca tcaacgcctc agaatacttt actggggcta tcaacgagaa     58380
+     tgagagagaa aagtttatca atagcctgga attattcaat aaacttaaaa catgcgcaaa     58440
+     aaatccggat gagttggtcg caaagggcaa tatgcgctgg gtcgcccaga ccttcgggga     58500
+     tatcgagtta agtgtcactt ttttcattga aaagaataag atatgtactc agacgttgca     58560
+     gctgcataag ggccaaggta acttgggcgt tgatcttaga aaggcttacc ttcccggcgt     58620
+     tgacatgagg gattgttacc ttggtaaaaa aacaatgaaa ggtagcaatg atatccttta     58680
+     tgagagacct gggtggaatg ctaacctggg cgtgctaccc cggacggtgc taccccggac     58740
+     ggtgctaacc cggacggtgc taacctggac ggtgctaccg tgaacggtgc tacctcctta     58800
+     tatgatgagg taattattat taataaaatc ccccccaaaa aaattgatac taaaggagtt     58860
+     gctactgaag aagttgctac taaaaaagta ctgctgaaca aattactgac aacgcaatta     58920
+     ttgaatgagc cagaataagc taaggttgaa ggggctggaa cgccccttca accttagcag     58980
+     tagcgtggga tgatttcaca attagaaaga cctgcatgat gagctagaga agaggctagt     59040
+     gacgcaaggc gtcgtgcagg acacggatca ccgagatggg catcgccaac cagactgcta     59100
+     attagcccat gaataacaat cagaaaggac cataacagac ccgttaaaat gaaatataag     59160
+     agacggtcaa cgggtgaaga aaaagttcaa aaattcgctg tggagcagga agggaattac     59220
+     cgaatggaaa gcgtagccac acgcaacaac tgaaagcagt ttggcagaaa caaaaaatcc     59280
+     ccggactcgg ggatttatgt acaagaggca gcccttagga tgagggtata aacgtacagg     59340
+     aaaggttaaa aatccgctgg cgctttaaac gtcatactat tgccatacgc cggatgggta     59400
+     atcgtcaaca tctctgcatg tagcaacaaa cgtggtgcca tcgctctcgc ttctggtgat     59460
+     gcataaaaac gatcgccgag aatcggatga cccagcgcca gcatatgcac acgcaattga     59520
+     tgcgaacgcc cggtaatcgg ttttaacacc actcttgccg tgttatccgc cgcatactcc     59580
+     accacttcat attccgtctg cgccaggttt acccgttttc gtaacagcac tttctgtttc     59640
+     gggcggtttg gccagtcgca aatcagcggc agatccacca gaccttctgc gggggatgga     59700
+     tgcccccaga cgcgggccac atactgcttt ttcggctcgc gctcgcgaga actggcgttt     59760
+     taactcccgc tccgcggctt tggtcagcgc cactacaatc acgccgctgg tagccatatc     59820
+     cagacgatgc accgattctg cctgcggata atcacgctga atgcgcgtca tcacgctgtc     59880
+     tttgtgctct tccagacgac ccggcactga caacaaaccg ctcggcttgt tgaccaccat     59940
+     aatatggtca tcctgataca ggataaccaa ccagggttcc tgcggtggat tgtagttttc     60000
+     catccccatt ttcggctccg ttactgatgc gttacaacga tcaaacgcag ggcatccaga     60060
+     cgccaacctg cctgatccag gctttccatt acctgctgac ggttgctctc aatggcggtc     60120
+     agttcgtcgt cacgaatgtt cgggttcact gcacgcagag cttccagacg agacagctcg     60180
+     gcagacagtt tttcgtcggc ttcgttacgc gctgcatcaa tcaatgcacg ggcagatttc     60240
+     tcgatctgcg cttcacccag ttgaaggata gcgtgaacat cctgctgcac ggcgttaacc     60300
+     agtttgctgc cggtgtgacg gttaaccgcg ttaagctggc ggttaaaggt ttcaaactct     60360
+     acctgcgccg ccaggttgtt gccgttttta tccagcagca tacgtaccgg cgtcggtggc     60420
+     aggaagcggt tgagctgcaa ctgcttcgga gcctgggctt caaccacata aatcagttcc     60480
+     accaacagcg tacctaccgg caacgctttg ttttttaaca gtgaaatcgt gctgctaccg     60540
+     gtatcgccag aaaggatcag atccagaccg ttgcggatca gcggatgctc ccaggtaata     60600
+     aactgtgcat cttcacgcgc cagcgccact tcacgatcaa aggtgatggt gatgccatct     60660
+     tccgacaggc cagggaagtc cggcaccagc atatgatcgg acggcgtcag cacgatcatg     60720
+     ttgtcgccgc gatcgtcctg attgataccg ataatatcga acaggttcat ggcgaaggcg     60780
+     atcaggttgg tatcgtcatc ctgctcttca atgctttctg ccagtgcctg ggctttttcg     60840
+     ccaccgttgg agtggatttc cagcaggcgg tcacgaccct gttccagctg tgctttcagc     60900
+     gcttcatgtt gctcgcggca gtttttgatc agatcgtcaa agccttcggt ttgatccggt     60960
+     gaagccagat agttaatcag atcgttgtat acgctatcgt aaatagtgcg tccggtcggg     61020
+     caggtgtgct caaatgcatc cagaccttcg tgataccagc gcaccagcac cgactgagcg     61080
+     gttttctcca gataaggcac atggatctga atatcgtgcg cctggccgat acgatccaga     61140
+     cgaccaatac gctgctccag tagatccggg ttgaatggca ggtcaaacat caccatgtgg     61200
+     ctggcgaact ggaagttacg tccttcagaa ccgatttctg agcacagcag tacctgtgcg     61260
+     ccggtgtctt cttcggcaaa ccaggcggca gcgcggtcac gttcgataat cgacatacct     61320
+     tcgtggaaca ccgcagcgcg aataccttca cgttcgcgca gtacctgctc cagttgcagc     61380
+     gcagtggcag ctttggcgca gatcaccagc actttctgag agcgatggct ggtcaggtag     61440
+     cccatcagcc actcaacgcg cggatcgaag ttccaccagg tggcgttatc accttcaaat     61500
+     tcctgataaa tacgctccgg gtagagcata tcgcgagcac gatcttccgc acttttacgt     61560
+     gcgcccataa tgccggagac tttaatagcc gtctgatact gcgtcggtag cggcagctta     61620
+     atggtgtgca gctcgcgttt cgggaatcct ttcacaccgt tacgcgtgtt acggaacagc     61680
+     acgcggctgg tgccgtggcg atccatcagc atcgaaacca gctcctgacg ggcgctctgg     61740
+     gcatcttcgc tgtcgctgtt tgctgcctgc aacagcggct cgatatcctg ctcgccgatc     61800
+     atctcgccga gcatgttcag ttcgtcattg ctcagtttgt tacctgccag cagcatggca     61860
+     acggcgtccg caaccggacg ataatttttc tgctcttcaa cgaactgcgc aaaatcgtgg     61920
+     aaacggttcg ggtccagcag acgcagacgg gcgaagtggc tttccatccc cagctgttcc     61980
+     ggggtcgcgg tcagcagcag aacgcccggc acgtgctctg ccagttgttc aatggcctga     62040
+     tattcacggc ttggcgcatc ttcgctccac accaggtgat gcgcttcatc gaccaccagc     62100
+     aggtcccatt cggcttcaca gagatgttcc aggcgctgtt tgctacgacg ggcaaaatcc     62160
+     agcgagcaaa tcaccagctg ttcggtgtca aacgggttgt aagcatcgtg ctgagcttcg     62220
+     gcataacgct catcatcaaa tagcgcaaag cgcaggttga aacggcgcag catttctacc     62280
+     agccactgat gctgtaaggt ttccgggacg ataattagca cacgttcagc agcgccagag     62340
+     agcagttgct gatgcaggat catcccggct tcaatggttt tccctaaacc cacttcgtca     62400
+     gccagcagga cgcgcggcgc gtggcggcga ccaacatcat gagcgatgtt gagctgatgc     62460
+     gggatcaggc tggtacgctg accgcgcagg ccgctgtacg gcatacggaa ctgttcgctg     62520
+     gaatatttac gcgcgcgata acgcagcgca aagcggtcca tacggtcaat ctgcccggca     62580
+     aacagacggt cctgcggttt gctgaacacc agtttgctat caaggaaaac ttcacgcagg     62640
+     gctacgccgg actcttcagt atccaggcga gtaccgatat aggtcagcaa gccattttct     62700
+     tcttttactt cttcgacttg catctgccag ccgtcatggc tggtaatggt atcaccaggg     62760
+     ttgaacatca cgcgggtcac gggggaatca ctgcgtgcgt acagacggtt ttcaccagta     62820
+     gatgggaaaa gtaaagtgac agttcgcgca tccaccgcga caacggttcc aagtcccaat     62880
+     tcgctttctg tatcgctgat ccagcgttga ccaagtgtaa aaggcatatg tgttcggctc     62940
+     tatatcttta attgcaggca ataaccaccc gctaccgtgc ttatgaggta gtggtgttat     63000
+     tcaggtccag gaatggaaag ggcgctatgg tactggatgg caaagcattc gtcacgcatc     63060
+     aaaatggtat ctggcgaact cttttttttg ctcaaaatag cccaagttgc ccggtcataa     63120
+     gtgtagcaaa attatcctca ataaaaggga gtattccctc cgccacgggt tgtagctggc     63180
+     gggtcagata gtgttcgtaa tccagtggtg aacgttggta gtccagcggc tccgggccgt     63240
+     tggtggtcca tacgtactta atggtgccgc gattctgata ttgcaagggg cgaccacgct     63300
+     tttggttttc ttcatcggca aggcgagcgg cgcgtacatg aggcggcaca ttacgctgat     63360
+     actcgctcag cggacggcga aggcgtttac ggtaaaccag tcgcgcatcc agttcacccg     63420
+     ccatcagttt gtcgatggtt tcgcgtacat attcctgata tggctcgttg cggaagatgc     63480
+     gcaggtatag ctcctgctga aactgctggg ccagcggcgt ccagtcggtg cgcacggttt     63540
+     ccagcccttt aaacaccatc cgctgcttgt cgccctcctg aatcagtccg gcataacgct     63600
+     ttttactgcc ggtatcggct ccgcgaatgg ttggcatcag aaaacggcag aaatgggttt     63660
+     catactccag ttctaatgcg ctggtcagcc gttgtttttg cagcgtttcc gcccaccagg     63720
+     cgttaacgtg ctgcaccagt gcacgaccga ttttcgccgc ttcttcttcc gaatgtgcgc     63780
+     ctttcagcca gacaaacgtt gagtcggtat cgccgtagat aacgtcgtag ccctgtgctt     63840
+     caatcaacgc tttggtttgc cgcatgatct gatgaccacg catggtgatc gacgatgcca     63900
+     gccgcggatc gaagaagcgg caggcggtgg tgccgagcac gccataaaag gcattcatga     63960
+     tgattttcag cgcctgcgac agcggtttgt taccctggcg tttggcttca tcgcgcccgt     64020
+     gccagatgtt agtcacaatc tccggcaggc aatgtttttc tcgcgagaac caggcatcga     64080
+     gaaaaccttc ggtactgtgc tctggatcag gctgcgccat gccttccacc agcccgacgg     64140
+     gatcaatcag aaaggtgcgg atgatcgacg ggtacaggct tttatagtcc agcaccagca     64200
+     ctgaatcata aagccctggc cgtgaatcca tcacgtagcc gccagggctg gcgtgcggcg     64260
+     gcacttcgcc gagattaggc gcgacataac cagcgcgatg cattcgcgga aaatagagat     64320
+     gaccaaatgc cgccaccgaa ccgccgtgtc ggtccaccgg caggccgttc accgttgccc     64380
+     gttcgagtaa aaatggcatg atttcagttt tgtggaagat ctgcgtcacc agctcgcaat     64440
+     ctttcaggtt ataagttgcc agcgcaggtt tatcttcggc gaaacggcgg tcaatttcgt     64500
+     ccattcgatc ccacgggtta tcgatagatt ttccttcgcc taatagctcc tgagcgacag     64560
+     tttccagcga gaatgaagag aaatcccaga acgcggattt cagcgcctcg ataccgtcga     64620
+     taattagccg acctttagcc tggggcaaaa aagacgccgt ttttgcgggg ccgtgctcgc     64680
+     gccactccag ctcgctatta tcgcgcccaa gacgcagcgg aagacggtaa cggctcggca     64740
+     tgtttttgca gcattcgcag atcgaactgc accacgttcc aaccgatgat cacatcagga     64800
+     tcgtagttgg caaaccaggc gttgagtttt tccagcaact gcgggcggct ggcgacgtat     64860
+     tccagttcga aatcaagcga ggaggcgtcg ccattctccg gccccagcat ataaacgatg     64920
+     cgctgcccgc agccttccag gccgatgcag tacagctcac cgtggcgggt ggtttcaata     64980
+     tctatagaaa cccacttgag cggcggacga tagtcgggat gcggtttcag acgggcatta     65040
+     acgatagtgc cattgtgcat atcaccctcg acccacaccg gtgaggtgat aaaccgctcc     65100
+     atcagatagc gttctggcgg acgcacatcg gcctcgtaga cggtaacgcc accttcacgc     65160
+     aggcgctttt cgtaattcat caattggcga tgggcgcgac agtaaaggcc atacaccggc     65220
+     tggcggtgaa aatcctttaa cgccagcggt gtcaggcgaa agccttgttc accctgcaaa     65280
+     atatgctgag cgcggggaac ctgatcggcg ggaataaacg ccacggactc ttgcggtgca     65340
+     agcgtaacct gcaacggccc gttgtccgtc gccagccaga aggagacttc tgtcccttgc     65400
+     ggggtgtccc gccagtgtcg ggttaagata aaacctgcct gcgccacgct gaaaatccat     65460
+     caaaaaacca ggcttgagta tagcctggtt tcgtttgatt ggctgtggtt ttatacagtc     65520
+     attactgccc gtaatatgcc ttcgcgccat gcttacgcag atagtgttta tccagcagcg     65580
+     tttgctgcat atccggtaac tgcggcgcta actgacggca gaatatcccc atataagcga     65640
+     cctcttccag cacgatggcg ttatgcaccg catcttcggc atttttgccc catgcaaacg     65700
+     ggccgtggga atggaccaga acgccgggca tttgcgctgc atcgataccc tgtttttcaa     65760
+     aggtttctac gatgacgtta ccggtttccc actcatattc gccgttgatt tctgcgtcgg     65820
+     tcattttgcg ggtgcaggga atggtgccgt agaaatagtc ggcgtgggtg gtgccggttg     65880
+     ctggaatcga ctgacccgcc tgcgcccaga tggtggcgtg gcgcgagtgc gtatgcacaa     65940
+     tgccgccaat ggaggggaat gcctgataga gcagccggtg agttggcgtg tcggaggagg     66000
+     gctttttcgt accttcaacc acttcaccgg tttcgatgct aaccacgacc atatcgtcag     66060
+     cggtcatgac gctgtaatcg acgccggaag gtttgatcac aaagacgccg cgctcgcgat     66120
+     caacggcgct gacgttgccc catgtgagcg tgaccaggtt gtgttttggc agcgccaggt     66180
+     tggcttctaa tacctggcgt ttgagatctt ctaacatgtt gactccttcg tgccggatgc     66240
+     gctttgctta tccggcctac aaaatcgcag cgtgtaggcc tgataagacg cgccagcgtc     66300
+     gcatcaggcg ttgaatgccg gatgcgcttt gcttatccgg cctacaaaat cgcagcgcgt     66360
+     aggcctgata agacgcgcca gcgtcgcatc aggcgttgaa tgccggatgc gctttgctta     66420
+     tccggcctac aaaatcgcag cgtgtaggcc agataagacg cgtcagcgtc gcatcaggcg     66480
+     ttacataccg gatgcggcta cttagcgacg aaacccgtaa tacacttcgt tccagcgcag     66540
+     cgcgtcttta aacgctggca ggcgtgtgtc gttatcaatc accgtgattt caatgtcgtg     66600
+     catctcggcg aattggcgca tatcgttgag gttcagtgca tggctgaaga cggtatggtg     66660
+     cgcgccacca gcgaggatcc acgcttcgga agcagttggc agatccggtt gcgctttcca     66720
+     cagcgcattc gccaccggca gtttcggcag ggagtgcggt gttttcaccg tgtcgataca     66780
+     gttaaccagc agacggtaac gatcgccgag atcaatcagg ctggcgacaa tcgctggacc     66840
+     ggtttgggta ttgaagatca gtcgggcagg atcgtcctta ccaccaatac cgagatgctg     66900
+     aacgtcgagg atcggtttct cttctacggc aatcgacggg cagacttcca gcatatggga     66960
+     gccgagcacc aagtcattac ctttctcgaa gtgataggtg tagtcctcca taaaggaggt     67020
+     gccgccctgc agaccggttg acatcacctt catgatgcga agcagggcgg cggttttcca     67080
+     gtcgccttcg cccgcaaagc cgtaaccctg ctgcatcaga cgctgtacgg ccagacctgg     67140
+     aagctgtttc agaccgtgca aatcttcaaa ggtggtggtg aacgcgtgga agccaccttg     67200
+     ttccaggaaa cgcttcatcc ccagctcaat acgcgccgct tccagcacgt tctgtcgttt     67260
+     ttcgccgtgg atttgtgttg caggcgtcat ggtgtagcag ctttcgtact catcgaccag     67320
+     cgcgttaaca tcgccgtcgc tgatggagtt caccacctgc accagatcgc caaccgccca     67380
+     ggtattgacg gagaaaccga acttgatctg tgcggcaact ttatcaccat cggtgaccgc     67440
+     cacttcacgc atgttatcgc caaaacggca gactttcaga tgacgggtat cctgtttaga     67500
+     aaccgcctga cgcatccagg agccgatacg ctcatgggct tgtttatcct gccagtgacc     67560
+     ggtaacgacg gcatgttgct gacgcatacg cgcgccaatg aagccgaact cgcgaccgcc     67620
+     atgtgcagtc tggttcaggt tcataaagtc catatcgata ctgtcccacg gcagcgccgc     67680
+     gttgaactgg gtgtggaatt gcagcaacgg tttgttgagc atggtcaggc cgttgatcca     67740
+     cattttggcc ggggagaagg tgtgcagcca caccaccaga ccagcgcacg gatcgtcgta     67800
+     attcgcgtcg cggcaaatag cggtgatttc atccggcgtg gtgcccagcg gtttcaacac     67860
+     cagtttgcag ggcagtttcg cttccgtatt cagcgcatta acaacgtgct cggcatgttg     67920
+     ggtgacctga cgcagggttt ccgggccata cagatgctgg ctgccaatga caaaccacac     67980
+     ttcataatta tcaaaaatcg tcattatcgt gtccttatag agtcgcaacg gcctgggcag     68040
+     cctgtgccgg ggcggaagtt ggaagatagt gttgttcggc gctcatcgcc cattgctgat     68100
+     agcggcgata aagctgttca aagcgttgtg cctgttcgct gcgcggttgc agggttttct     68160
+     ctaccgcact ggccattttt tgctgggctg atgggatgtc tgcgtgcact ttcgcggcga     68220
+     cggcagcaaa aatcgccgca ccgagcgcac agcactggtc agaggcaaca atttgcagcg     68280
+     ggcgattcag cacgtcgcag caggcctgca taatgacttg gtttttccgc gcgatgccgc     68340
+     ccagcgccat cacgttattg acggcgatcc cctgatcggt aaagcactcc atgattgcgc     68400
+     gtgcgccaaa ggcggtggca gcaatcaaac cgccgaacag cagcggagcg tcggtagcga     68460
+     ggttaagatc ggtaatcacc cctttcaggc gttggttagc gtttggcgag cgacgaccgt     68520
+     taaaccagtc gagcaccacc ggcaggtgat ccagagacgg atttttggcc catgcttcgg     68580
+     tcagcgccgg aagcagttgt ttctggctgg cgttgatttg cgctttcagt tccggatgct     68640
+     gggcggcaag ctgttccagc ggccagctga gtacgcgacc gaaccaggcg tagatatcac     68700
+     caaacgccga ttggcctgct tccagaccga taaatccagg caccacgctg ccatcaacct     68760
+     gaccgcaaat acctttaact gcccgctcgc caacgctctg tttgtcggca atcagaatgt     68820
+     cgcaggtgga agtaccgata acttttacca gtgcgttagg ctgtgcgcct gcgccaactg     68880
+     cgcccatatg gcagtcaaac gcgccgccgg aaatcaccac gctttcaggc aggccgagac     68940
+     gctgcgccca ttccgggcat aaggtgccca ccggaatatc ggcagtccag gtgtcagtga     69000
+     acagcgggga aggcaaatgg cgattgagga tcgggtccag ctcatcaaag aaactggctg     69060
+     gcggcaagcc gccccagctt tcgtgccaca gagatttatg cccggcgctg caacgtccgc     69120
+     gacgaatatc ctgcgggcgg gtggtaccgg aaagcagagc tggcacccag tcgcacagct     69180
+     caatccacga tgcggcagat tgcgccacgg cgctgtcctg gcgagtcaca tgcaggattt     69240
+     ttgcccagaa ccattcgctg gaataaatac cgccaatata gcgggagtag tcaacattgc     69300
+     ccggcgcgtg gcacaaacgg gtaatctctt cgcttctttc aaccgcagtg tggtctttcc     69360
+     acaatacgaa catcgcgttc gggttttcgg caaactccgg gcgcagcgcc agcacgttac     69420
+     cgtcggcatc aatcggtgcg ggcgtcgagc cggtactgtc aacgccaatc ccgaccacag     69480
+     ctgcgcgctg ttcgacgcta agctctgcaa gcacggtttt cagtgccgct tccattgact     69540
+     caatgtagtc acgcggatga tgacggaact ggttattcgg ggcatcacaa aattgccctt     69600
+     tttgccaacg gggataccac tctacgctgg tggcgatctc ttcaccgctg gcgcagtcca     69660
+     ccgccaaagc tcgcacagaa tcactgccaa aatcgaggcc aattgcaatc gccatcgttt     69720
+     cactccatcc aaaaaaacgg gtatggagaa acagtagaga gttgcgataa aaagcgtcag     69780
+     gtaggatccg ctaatcttat ggataaaaat gctatggcat agcaaagtgt gacgccgtgc     69840
+     aaataatcaa tgtggacttt tctgccgtga ttatagacac ttttgttacg cgtttttgtc     69900
+     atggctttgg tcccgctttg ttacagaatg cttttaataa gcggggttac cggttgggtt     69960
+     agcgagaaga gccagtaaaa gacgcagtga cggcaatgtc tgatgcaata tggacaattg     70020
+     gtttcttctc tgaatggtgg gagtatgaaa agtatggctg aagcgcaaaa tgatcccctg     70080
+     ctgccgggat actcgtttaa cgcccatctg gtggcgggtt taacgccgat tgaggccaac     70140
+     ggttatctcg atttttttat cgaccgaccg ctgggaatga aaggttatat tctcaatctc     70200
+     accattcgcg gtcagggggt ggtgaaaaat cagggacgag aatttgtctg ccgaccgggt     70260
+     gatattttgc tgttcccgcc aggagagatt catcactacg gtcgtcatcc ggaggctcgc     70320
+     gaatggtatc accagtgggt ttactttcgt ccgcgcgcct actggcatga atggcttaac     70380
+     tggccgtcaa tatttgccaa tacgggtttc tttcgcccgg atgaagcgca ccagccgcat     70440
+     ttcagcgacc tgtttgggca aatcattaac gccgggcaag gggaagggcg ctattcggag     70500
+     ctgctggcga taaatctgct tgagcaattg ttactgcggc gcatggaagc gattaacgag     70560
+     tcgctccatc cgccgatgga taatcgggta cgcgaggctt gtcagtacat cagcgatcac     70620
+     ctggcagaca gcaattttga tatcgccagc gtcgcacagc atgtttgctt gtcgccgtcg     70680
+     cgtctgtcac atcttttccg ccagcagtta gggattagcg tcttaagctg gcgcgaggac     70740
+     caacgtatca gccaggcgaa gctgcttttg agcactaccc ggatgcctat cgccaccgtc     70800
+     ggtcgcaatg ttggttttga cgatcaactc tatttctcgc gagtatttaa aaaatgcacc     70860
+     ggggccagcc cgagcgagtt tcgtgccggt tgtgaagaaa aagtgaatga tgtagccgtc     70920
+     aagttgtcat aattggtaac gaatcagaca attgacggct tgacggagta gcatagggtt     70980
+     tgcagaatcc ctgcttcgtc catttgacag gccacattat gcaagcattg cggaacactt     71040
+     tattacccaa ccaccgtgtt cattgatggc ggtggtgttg gtggcctttc tggagtcgct     71100
+     ggcgctggtc ggtttgattc tacccggtac ggtgctgatg gcggggctgg gagcgctgat     71160
+     tggcagcggc gagttaagtt tctggcacgc ctggctggca gggattattg gctgcttgat     71220
+     gggcgactgg atttctttct ggctgggttg gcgttttaaa aagccgttgc atcgctggtc     71280
+     atttctgaag aaaaacaaag cactacttga taaaactgaa catgcgttgc atcaacacag     71340
+     catgttcacc attctggtcg gtcgttttgt tggcccgacg cgtccgctgg tgccaatggt     71400
+     ggcgggaatg ctggatctgc cggtggctaa atttattacg ccgaatatta tcggctgcct     71460
+     gctgtggccg ccgttttact tcctgccagg gattctggcg ggcgcggcga tcgatattcc     71520
+     tgccggaatg cagagcggtg agtttaaatg gttgctgctg gcaacagcgg tgtttttgtg     71580
+     ggttggtggc tggctgtgct ggcggttatg gcgcagcggt aaagcgactg accgtttgag     71640
+     tcattatttg tcccgcggtc gtttgttgtg gctgacgccg ttgatttctg ccatcggcgt     71700
+     ggtggcgctg gtggtgttaa ttcgccaccc gttgatgccg gtgtatatcg atattttgcg     71760
+     taaagtggtt ggggtttagg agatagtctt gtgcgggttg cctgagcgcg acgcttgccg     71820
+     cgtcttatca ggcctacaaa acgcactacc cgtaggtcgg ataaggcgtt cacgccgcat     71880
+     ccgacagtgc atactaaccc gtaatcccca atagtgccga agcactcgcc ttaccgctca     71940
+     acaactcatt ggtcataccc tgccaggcga tgcgcccgtc ggcgactact accgagcgcg     72000
+     tggcgatccg cgccgcatct tccacgctgt gcgacaccat caatagcgtc attttttgct     72060
+     gctggcagct cgtgctcacc agcgtcaaca tctcctgacg taacgccgga tcgagcgcag     72120
+     agaacagttc atcgagcaat aaaatcggct gttcgcgtac cagacaacgc gctaacgcca     72180
+     ctcgctgtcg ctgaccgccg gaaagctcgc ccggtaaccg cgccattaaa ttatcaatcc     72240
+     ccatctggcg ggcgatagcg tgcattttcc cctgctgtac cgcgttcagt ttcaatcccg     72300
+     gatttagccc cagcccgatg ttctgtgcga ccgtcaggtg gctgaacagg ttgttctcct     72360
+     gaaacagcat cgacaccgga cggcgtgacg gcggcatagt tgtgtgatct acgccatcga     72420
+     tagtcagcga accgctggct ggcgtcagaa aaccggcgat caaattcagc agggtacttt     72480
+     tacccgcgcc gcttggcccg aggatcgcca cctgctcgcc gcgttccacc gttaagctaa     72540
+     aacgcatcgg caaatggtgg taaagccagg tgatatcagt cagttttaac atttcgcccc     72600
+     ggtagttttt caatcacggt aaacagcaga aaacagagca gcagcagaat taacgtggtg     72660
+     accgcaccgt cctggctgcg ataggagcca atttgctggt agagataaaa cggcagggtg     72720
+     cggaaatcat cgttaccgaa caacgccacc acgccaaaat caccaatcga cagcacgcat     72780
+     gcaaaggcca gcgcctgcgc cagtggacgt ttcagggcgc gcagctccac cacttttaag     72840
+     cgtgaccagc cttcaatccc cagcgactga cataacatgc tgtagcgggc ggtgatatcg     72900
+     cgcatcgggt tttccagcac tttcagcgca taagggatcg ccattaacgc attggtgaaa     72960
+     atcacaatgc cgtcagcaga ttgtggcagg ccgatagtgt tgttgagcag taaaaagaag     73020
+     ccggtagcca gcacaatccc cggcatggcg aggatcaaca tgccgctcat ctccagcacc     73080
+     tgacccgcca gcattttctg ccgcgcccgc agttcgcgac tgctccatag cagcatcatg     73140
+     gtcagcacta cgcacaatac acctgccgcc agcgcaatac gcaacgaggt ccacagcgcc     73200
+     tgccacagca ccggttgtgc cagcacttcc ggcaactggc gatttacccc atcgacgatc     73260
+     accgccagta acggtggcag caacagcagc agcgccagca caattaacac cgtgtcgcaa     73320
+     atgcggctat gcagacgatc gtccgggtcg cgccagcctt gcagcagcgt ggtgccgggc     73380
+     gcaatggcct tactcaatcg ctgactcaac agcaccagcc cgaggcagca caccatctgg     73440
+     agcagcgcca gcattgccgc gcgggcagga tcgtagtcgt aactcagcgc ctgatagatt     73500
+     gccagctcga tagtggtcgc ctgcggaccg ccccccagcg atagcacggt ggcgaagctg     73560
+     gcgaaacaga gcataaagat aagcgcagca accggcggga tttgtcgccg taaccacggc     73620
+     cattcgacga agcggaaaaa atgccagcta cgcatcccaa gctgggcggc aagttgacgc     73680
+     tgttcgccgg ggatgttttc cagtgcctgg agtaataagc ggctcgccat cggcagatta     73740
+     aaaaacacat gggccagcaa aataccttgc aggccgtagg gcgaaaaggt ccactccaga     73800
+     ccgagcgatt ggcagagtgt tgccagccag ccctggcgac catagacgct aagaatgccg     73860
+     aaaacagcga ccaacaccgg gaggatcaag gtcattgcac acagacgcaa cagcgccagc     73920
+     cgacccggaa agcgcctgcg atagagcgcg cgggcgagga atatcgcggg tatgacagag     73980
+     agcagtgccg agagaaacgc ctgccagaag gagaagcgca ccacatgcca cagatagctg     74040
+     tcctgccaga ctgccaccca gtcatcctgc ggcgcgttcc accacagggc gagaaacgcc     74100
+     gccagcgcaa ccgctaccac cagcgtggtg gcgcttacac ctggaattaa ccagccggga     74160
+     attaacggct gacggcgcgt tgccattcgc taatccatgc ctgacgttgt gccgccactt     74220
+     cggctggcgt gaactccaac gtggttgcgg gtttggtcaa tttttcaaaa ccggcaggca     74280
+     gcgtgacgtt tgccaccgga tacatccagt tgccggttgg gatcgcattc tggaaagccg     74340
+     gagaaaccat aaactggagg aatttttgcg ccagctccgg ctgcttgctg gcagcggtgc     74400
+     gggcggcgac ttccacttgc agatagtgac cttcgctgaa gttcgcggcg gcgtagttat     74460
+     ctttcttctc ttcgagaatg tgataagccg gagaggtggt gtaactcagt accagatcgc     74520
+     tttcaccttt taaaaacagg ccgtaggctt cgctccagcc tttggtgacc gtgaccgttt     74580
+     tcttcgccag tttctgccag gcttgtgggg cgtcatcgcc atagactttt tgcatccata     74640
+     gcaacagacc cagccccggt gtactggtgc gcggatcctg ataaatcacc cgccagtttt     74700
+     gatcgctctc aaccagttct ttcaggcttt gtggcgggtt tttcagtttg ttcttgtcat     74760
+     aaacgaaggc gaagtagcca taatcaaacg gtacgaaagt gtcattattc cagccgccgg     74820
+     gaacgttaac ggcatccgct gccacaccgc ttttggcaaa cagtccggtt ttactggcgg     74880
+     cgtctaacag gttgttatcc agccccagca ccacatcggc tttactgttt ttgccttcca     74940
+     tccgtagacg gttgagaagc gaaacgccat cttccagcgc caccagtttc agttcgcaat     75000
+     tacagtcggc ttcaaaggct tttttaacca ccggaccagg cccccagtcg gcggcgaagg     75060
+     aatcgtaggt ataaacagtc agaacgggtt tagcgaaaac gggcgctgtc acagcaacag     75120
+     caggggcaga cattttttta acactttgca cctcaaaaaa gagtggcaaa ggacttgaga     75180
+     aggagcctca aatcccttcg ccggcgttat ccggatcagg ttcgacgggt attttctcag     75240
+     cgcacgcgta cgcgtggcac cccgttgaga acggcgttag tgtagtgatt ttgttatcaa     75300
+     ccagcaatca tggatccggt ggcgcaaacc acgctgattt aaaatcgaac cagccgaggg     75360
+     tattcatgcg caggccgcgc atactgcgtt gcccctgaat gatcagccag tggtgcaata     75420
+     atggcaccat cgctttgctg gcgaccagtt gctggcacca gttcgccaga ttcatctcgc     75480
+     cattgcgcca gcgagcagcg tcggcttgcc agtcaatggg aatgcaatgt tgtagcagtg     75540
+     gcacttcgca taaatgtgcg aaaacagaga agtccagcgg cagggtaaag ttggcgctgt     75600
+     ttagccagat atcactttcg atctctcctg tatgccactg atcgtagtcg atctctttga     75660
+     ttttcagcgt cacctggtga cttgccagaa tctgctgcat gatcccggca atcacccgat     75720
+     gctcactgtg atcctgacaa aaggttaggg tgaggctttc caggccagcc ggtttttcgc     75780
+     tctttatggt gcgggcatgg tgccaacggg ggaacagtcc ataagccggg aaccacagtt     75840
+     gctggtactg ttcctcagcg aaatagacca agttagttgg agaaagcaca tagcttgccc     75900
+     agtccctgac ttgctgattc gccccgcgat gggtgcggct gtcgaacagt aaatagtagc     75960
+     aaccttcctc caggcggctt tcaatctctt tttcctcgcc ctgtggacct tttagcatca     76020
+     gccctccggc tggctcgtcg gcaatttccg gcagaaccca gacgttaact tcgtcgatta     76080
+     atgcccggta accgaagaag tcatcgaatg cctgaatttt cagttgattg gtgctgttgc     76140
+     gaatcaccgc atacggaccg gtgccgatgg gatggctggc aaagttactg agggtttccc     76200
+     attcgcgcgg caggatcatc gccggaactt gccccagcag taacggtaac cagcggtccg     76260
+     gttgcgtgag atggatatcc agcgtccagg gcgtcggcga cacaatgtca gcaatatgcg     76320
+     aatagagcgg cagcgtattg attcgtttta aagaggcgat cacatcgtcc atttccagtt     76380
+     cacgaccatg gtgaaaatgg actcctggac gcaaaaagaa acgccagtga agcggtgaaa     76440
+     tttgctgcca gtggtgggcg atgtctgctt ccagttcccc attttcctca tttatgcgcg     76500
+     ttagcgaact gaagatttgc cgggcgatat gggtttcgga acggcgcaat gcgctgccag     76560
+     gtagcagatt acgcaacgga cgatagtaga gcacgcgcag gatgtgccgc ccctggcgga     76620
+     agctgcggcc cagatgagaa accagcattt gccgcacagt cgctttgtcg ccaaccaact     76680
+     gcaccagttg atcgatacga tcctgctcca gcaggtcttc cgcccgctgt tgctgaagcg     76740
+     ccagcccggt atagaggaat gtcagacgcg agcgtttacc gcgcccgact tccgcttccc     76800
+     acgtcagcca gccgcgatcc tgcatggtgt tgagcagggt gcgcatatga cgacgcgagc     76860
+     agctcaataa cgctgccagt tcgttgagcg ttgtgtcctg cgatttaccc tcgcagcatt     76920
+     gccacaggcg gatgaactgt tgttgcagac gagcagatgg cataaaaggg gaactcctgt     76980
+     gcaaaagaca gcaattttat tgtccctata ttaagtcaat aattcctaac gatgaagcaa     77040
+     gggggtgccc catgcgtcag ttttatcagc actattttac cgcgacagcg aagttgtgct     77100
+     ggttgcgttg gttaagcgtc ccacaacgat taaccatgct tgaaggactg atgcagtggg     77160
+     atgaccgcaa ttctgaaagt tgacttgcct gcatcatgtg tgactgagta ttggtgtaaa     77220
+     atcacccgcc agcagattat acctgctggt tttttttatt ctcgccgcgc taaaaaggga     77280
+     acgtatgatc tggataatga cgatggctcg ccgtatgaac ggtgtttacg cggcatttat     77340
+     gctggtcgct tttatgatgg gggtggccgg ggcgctacag gctcctacat tgagcttatt     77400
+     tctgagtcgt gaggttggcg cgcaaccttt ctggatcggc ctcttttatg cggtgaatgc     77460
+     tattgctggg atcggcgtaa gcctctggtt ggcaaaacgt tctgacagtc agggcgatcg     77520
+     gcgaaaactg attatatttt gctgtttgat ggctatcggc aatgcgctat tgtttgcatt     77580
+     taatcgtcat tatctgacgc ttatcacctg tggtgtgctt ctggcatctc tggccaatac     77640
+     ggcaatgcca cagttatttg ctctggcgcg ggaatatgcg gataactcgg cgcgagaagt     77700
+     ggtgatgttt agctcggtga tgcgtgcgca gctttctctg gcatgggtta tcggtccacc     77760
+     gttggccttt atgctggcgt tgaattacgg ctttacggtg atgttttcga ttgccgccgg     77820
+     gatattcaca ctcagtctgg tattgattgc atttatgctt ccgtctgtgg cgcgggtaga     77880
+     actgccgtcg gaaaatgctt tatcaatgca aggtggctgg caggatagta acgtacggat     77940
+     gttatttgtc gcctcgacgt taatgtggac ctgcaacacc atgtacatta ttgatatgcc     78000
+     gttgtggatc agtagcgagt taggattgcc agacaaactg gcgggtttcc tgatggggac     78060
+     ggcagctgga ctggaaatac cagcaatgat tctggctggc tactatgtca aacgttatgg     78120
+     taagcggcga atgatggtca tagcagtggc ggcaggagta ctgttttaca ccggattgat     78180
+     tttctttaat agccgtatgg cgttgatgac gctgcaactt tttaacgctg tatttatcgg     78240
+     cattgttgcg ggtattggga tgctatggtt tcaggattta atgcctggaa gagcgggggc     78300
+     agctaccacc ttatttacta acagtatttc taccggggta attctggctg gcgttattca     78360
+     gggagcaatt gcacaaagtt gggggcactt tgctgtctac tgggtaattg cggttatttc     78420
+     tgttgtcgca ttatttttaa ccgcaaaggt taaagacgtt tgatgacgtg gacgatagcg     78480
+     gaaagcccgg tcatttgacc gggcaagggg attaattcat aaacgcaggt tgttttgctt     78540
+     cataagcggc aatggcgtcg tcgtgctgca aggtaagccc aatactgtcc agaccgttca     78600
+     tcatgcagtg gcggcggaag gcatcgatgg taaagcgata ggttttctct cccgctttca     78660
+     cctcttgcgc ttccagatcc acgtcgaaat ggatccccgg attagctttc accagcgcaa     78720
+     acagttcgtc cacttctgca tcgcttaatt tcaccggcag cagctggttg ttaaagctat     78780
+     tgccgtagaa gatgtcagca aaactcggcg caatcaccac tttaaaaccg tagtcggtca     78840
+     atgcccaggg cgcgtgctca cgcgaagagc cacagccgaa gttttctcgt gccagcaaaa     78900
+     tggaagcgcc ctgatactgc gggaagttca gcacgaagtc cgggtttggc tgttggcctt     78960
+     tttcatccag aaaacgccag tcgttaaaca gatgcgcgcc aaaacccgta cgggtcactt     79020
+     tctgcaaaaa ctgtttcggg atgattgcat cggtatcgac attggcggca tccagcggaa     79080
+     ccaccaggcc tgtgtgtttg ataaatttct ctgccatggt gtgctcctta tttaatgttg     79140
+     cgaatgtcgg cgaaatgtcc ggtcacagca gcagcggcag ccattgccgg gctgaccaga     79200
+     tgcgtgcgcc cgccgcgccc ctggcggcct tcaaagttac ggttgctggt ggaggcacaa     79260
+     cgttcgcccg gattcagacg gtcgttgttc atcgccagac acattgagca gccaggcaag     79320
+     cgccattcaa aaccggcttc aataaagatt ttatccagac cttccgcttc cgcctgggct     79380
+     tttaccgggc cagagccggg aaccaccagt gcctgcacgc ctggcgcgac ttttcgccct     79440
+     ttgacgatct ccgctgccgc gcgtaaatct tcaatgcgcg agttggtaca ggaaccgata     79500
+     aacactttgt cgatagccac ttcggtcagc ggaatacccg gtttcagccc catataggcc     79560
+     agcgcttttt ctgccgacgc gcgttcaacc ggatcggcaa acgaagccgg atcgggaata     79620
+     ttgtcgttca cggaaatcac ctggccggga ttggtgcccc aggtgacctg cggtgaaatt     79680
+     tcttctgctt gcagagtgac aacggtatcg aaagttgcgc cttcgtcggt ttgcagggtt     79740
+     ttccagtagg caacggcgtc gtcgaaatct ttgcctttcg gcgcatgcag acggcctttg     79800
+     acatagttaa aggtggtttc gtccggtgca accagaccgg cttttgcgcc catttcgatt     79860
+     gccatattgc acagggtcat acgaccttcc atgcttaaat cacggattgc ttcgccgcaa     79920
+     aactccacca catgcccggt gccgcctgcg ctaccggttt taccgataat tgccagcacg     79980
+     atatcttttg cggtaatgcc cggcgcggct ttgccctgga cttcaatttt catggttttt     80040
+     gcgcggccct gtttcagggt ttgcgttgcc agtacgtgtt caacttcgga agtgccgata     80100
+     ccaaaggcca gtgcgccaaa cgcgccgtgg gtggcggtat gcgagtcgcc gcagacaatg     80160
+     gtcatccccg gcaaggtgac gccctgttcc ggccccatta cgtggacgat cccctgatac     80220
+     gggtgattca ggtcatacag ttcgacgcca aattctttgc agtttttgat cagttcctgc     80280
+     atctggatac gcgccatttc accgcaggca ttaatgcctt tggtctgggt agagacgttg     80340
+     tgatccatgg tagcgaaggt tttgcccggc tgacgtaccg ggcgaccgtg ggcgcgcaga     80400
+     ccatcgaacg cctgcggtga ggtcacttca tgcaccaggt ggcggtcgat atataacagt     80460
+     ggggtttcgt tttcggcttc gtacacaacg tgagcgtcga acaatttttc gtataacgtc     80520
+     ttagccatga ttacacccct tctgctacat agcgggcaat gatatcgccc atttcatcgg     80580
+     tactaacggc ggcagcgcca cgggctaaat ccccggtgcg aatgccttct tctaatgcgc     80640
+     ggttaatggc gcgttcaatg gcgcaagccg catcatcggc atccaggctg taacgcagca     80700
+     gcagtgccag cgaaaggatt tgtgcaatcg ggttggcgat gtttttgcct gcgatatctg     80760
+     gtgccgagcc gcccgccggt tcatacagtc caaaaccttg ctcgttcagg ctggcggaag     80820
+     gcaacatccc catcgagcca gtgatcattg cgcactcgtc agacagaatg tcgccaaaca     80880
+     ggttggagca cagcagaacg tcaaactgtg atggatcttt aatcagctgc atggtggcgt     80940
+     tgtcgatgta catatgcgcc agttcgacat ccgggtattc cgtggcgatc tcgttaacga     81000
+     tctcccgcca taaaatagag gattgcagca cgttggcttt atcgatcgac gtcactttgt     81060
+     ggcgacgctt gcgagcagat tcaaacgcga tgcgggcgat acgttcgatc tcaaaacggt     81120
+     gatacacctc ggtatcaaag gctttttcat attgtccgct accttcgcgg ccttttggct     81180
+     gaccgaaata gatgccgccg gtcagttcgc gcacacacag gatgtcgaag ccgtttgcgg     81240
+     caatgtctgc acgcagcgga cagaatgctt ccagcccctg atacagtttt gccgggcgca     81300
+     ggttgctgaa taatttgaag tgcttacgca gaggcagcag cgcgccgcgt tctggttgct     81360
+     ggtctggtgg taaatgttcc cacttcgggc cgcctaccga gccaaacagc acggcatcgg     81420
+     cttgctcaca accttcaacc gtcgcaggcg gcagtggttg cccgtggtta tcaatggctg     81480
+     cgccgcctac atcgtaatgg ctggtggtga tgcgcatcgc aaagcggttg cgcacggcat     81540
+     ccagcacttt cagcgcctgg gtcatcactt ccggaccaat accgtccccc ggcaatacgg     81600
+     caatatggta attcttcgac atcacacggt ttccttgttg ttttcgttgt gttgagcttt     81660
+     gcgttgcaac tctttttcga cttctgcggc acgccagata ttgttcagaa cgtgcaccat     81720
+     ggctttggca gatgactcga caatatcggt agccaggccg acgccgtgga agcggcgacc     81780
+     gttgtagtta gcgacgatat ccacctgacc cagcgcatct ttaccgtggc ctttggcggt     81840
+     caggctgtat ttcaccagtt cgacgttata ttcagtgatg cggttaattg cctgatagac     81900
+     ggcatcgacc ggaccgttac cgttggcggc ttctgctttg acttcttcgc cacaggccag     81960
+     tttgacggcg gcggtggcga tatcgttaga gccagactgc acgctgaagt aatccagacg     82020
+     gaaatgctcc ggctcttctt gctgcttacc gatgaaggcc agcgcctcca gatcgtaatc     82080
+     aaacacctga ccttttttgt ccgccagctt caggaaagca tcgtacaaat tgtctaaatt     82140
+     atattcactt tctttatacc ccatctcatc catgcgatgt ttcaccgccg cacgccccga     82200
+     acgagaggtc agattcagct ggatttggtt cagaccaata gattctggtg tcatgatttc     82260
+     gtagttttcg cggtttttca gcacgccatc ctggtgtata ccggaggagt gtgcgaatgc     82320
+     gccgctgcca acaatggctt tgtttgccgg gatcggcata ttacaaatct ggctaactaa     82380
+     ctggctggtg cgccatatct cctggtgatt aatggcggtg tggacgttga gaatatcctt     82440
+     acgaactttg atcgccatga tgacttcttc cagggaacag tttccggcac gctcgccgat     82500
+     cccgttcatt gcgccttcca cctggcgtgc accggcatgt accgccgcca gtgagtttcc     82560
+     gaccgccagg cccaaatcgt cgtgggtatg tacggagata atggctttgc cgatgctagg     82620
+     cacgcgttca tacaggccgc tgatgattcc ggcgaactca aacggcatgg tgtagcccac     82680
+     ggtgtccgga atgttgatgg tggtggcacc ggcattaatc gccgcttcga ccactcgcgc     82740
+     cagatcggca atgggtgtac gcccggcatc ttcgcaagaa aattcaacat catcggtgta     82800
+     attacgggcg cgtttcacca tatagatagc gcgttcgatc acctcgtcca gcgtgctgcg     82860
+     cagcttggtg gcgatgtgca ttggcgaagt ggcaataaag gtatgaatac ggaaggcttc     82920
+     ggcgactttc agggattcgg ccgccacgtc gatatctttt tccacgcagc gagctaacgc     82980
+     acatacgcgg ctgtttttaa cctggcgggc gatggtttgc accgattcaa aatcgcccgg     83040
+     cgaagagacg gggaaaccga cttccatcac gtcaacaccc atacgctcaa gggccagcgc     83100
+     aatttgcagt ttttctttca cactcaagct tgcctgtaac gcctgttcac cgtcgcgcaa     83160
+     tgtggtatcg aaaataatga cttgctggct catggtttgg gtccttgtct cttttagagc     83220
+     gcctcgcttc gggcataaaa aaacccgcgc aatggcgcgg gttttttgtt tgactgcgtg     83280
+     ctggcttaat gctggatgcc gctcactcgt ctaccgcgca aagaagatgc gtttagtagt     83340
+     agtagaccga taaagcgaac gatgtgagtc attaaatcag ctccagatga atgcgatatg     83400
+     cttttagagt tactggatac aaaaacggat gtcaaccctg acgcaataaa aacgtcccgc     83460
+     cagcgtgagt tctgcatccg taaaattagc taattgtgct gcggtggtta aagtaagcga     83520
+     tattaatttc tgcttaacta ccgacgcttt tcatcggttg acatatttca gcataaattt     83580
+     ttgcatctaa tcaacgagga aaaaggggac aaaatgcacg cgttgcaaaa cctatcctga     83640
+     tgatttgtat tgaattatat gttttgcgat tttttttgat attgatttgg tgaatattat     83700
+     tgatcaatta atgttaagaa ttaatgcatt aaatatataa attaattatt aaataagcac     83760
+     atttaatcca ttttgtagat gattgagtat tcgcggtagt tatgattaga ttgttttcgc     83820
+     aacaaaaaca ttatggatta ttatgctgtg gtaaatgact cattccacgg caatggattc     83880
+     tgtttttatc agaacccgta tctttatgtt ttccgaattt tactcatttt gctttttctt     83940
+     attttatatg catgataaat catattcttc aggattattt ctctgcattc caataaggga     84000
+     aagggagtta agtgtgacag tggagttaag tatgccagag gtacaaacag atcatccaga     84060
+     gacggcggag ttaagcaaac cacagctacg catggtcgat ctcaacttat taaccgtttt     84120
+     cgatgccgtg atgcaggagc aaaacattac tcgtgccgct catgttctgg gaatgtcgca     84180
+     acctgcggtc agtaacgctg ttgcacgcct gaaggtgatg tttaatgacg agctttttgt     84240
+     tcgttatggc cgtggtattc aaccgactgc tcgcgcattt caactttttg gttcagttcg     84300
+     tcaggcattg caactagtac aaaatgaatt gcctggttca ggttttgaac ccgcgagcag     84360
+     tgaacgtgta tttcatcttt gtgtttgcag cccgttagac agcattctga cctcgcagat     84420
+     ttataatcac attgagcaga ttgcgccaaa tatacatgtt atgttcaagt cttcattaaa     84480
+     tcagaacact gaacatcagc tgcgttatca ggaaacggag tttgtgatta gttatgaaga     84540
+     cttccatcgt cctgaattta ccagcgtacc attatttaaa gatgaaatgg tgctggtagc     84600
+     cagcaaaaat catccaacaa ttaagggccc gttactgaaa catgatgttt ataacgaaca     84660
+     acatgcggcg gtttcgctcg atcgtttcgc gtcatttagt caaccttggt atgacacggt     84720
+     agataagcaa gccagtatcg cgtatcaggg catggcaatg atgagcgtac ttagcgtggt     84780
+     gtcgcaaacg catttggtcg ctattgcgcc gcgttggctg gctgaagagt tcgctgaatc     84840
+     cttagaatta caggtattac cgctgccgtt aaaacaaaac agcagaacct gttatctctc     84900
+     ctagcatgaa gctgccgggc gcgataaagg ccatcagtgg atggaagagc aattagtctc     84960
+     aatttgcaaa cgctaactga ttgcagaata ggtcagacat gaatgtctgg tttattctgc     85020
+     attttttatt gaatgtagaa ttttattctg aatgtgtggg ctctctattt taggattaat     85080
+     taaaaaaata gagaaattgc tgtaagttgt gggattcagc cgatttatta tcaatttaat     85140
+     cctctgtaat ggaggatttt atcgtttctt ttcacctttc ctcctgttta ttcttattac     85200
+     cccgtgttta tgtctctggc tgccaattgc ttaagcaaga tcggacggtt aatgtgtttt     85260
+     acacattttt tccgtcaaac agtgaggcag gccatggaga tgttgtctgg aggcgagatg     85320
+     gtcgtccgat cgcttatcga tcagggcgtt aaacaagtat tcggttatcc cggaggcgca     85380
+     gtccttgata tttatgatgc attgcatacc gtgggtggta ttgatcatgt attagttcgt     85440
+     catgagcagg cggcggtgca tatggccgat ggcttggcgc gcgcgaccgg ggaagtcggc     85500
+     gtcgtgctgg taacgtcggg tccaggggcg accaatgcga ttactggcat cgccaccgct     85560
+     tatatggatt ccattccatt agttgtcctt tccgggcagg tagcgacctc gttgataggt     85620
+     tacgatgcct ttcaggagtg cgacatggtg gggatttcgc gacccgtggt taaacacagt     85680
+     tttctggtta agcaaacgga agacattccg caggtgctga aaaaggcttt ctggctggcg     85740
+     gcaagcggtc gcccaggacc agtagtcgtt gatttaccga aagatattct taatccggcg     85800
+     aacaaattac cctatgtctg gccggagtcg gtcagtatgc gttcttacaa tcccactact     85860
+     accggacata aagggcaaat taagcgtgct ctgcaaagcg tggtagcggt aaaaaaaccg     85920
+     gttgtctacg taggcggtgg ggcaatcacg gcgggctgcc atcagcagtt gaaagaaacg     85980
+     gtggaggcgt tgaatctgcc cgttgtttgc tcattgatgg ggctgggggc gtttccggca     86040
+     acgcatcgtc aggtactggg tatgctggga atgcacggta cctacgaagc caatatgacg     86100
+     atgcataacg cggatgtgat tttcgccgtc ggggtacgat ttgatgaccg aacgacgaac     86160
+     aatctggcaa agtactgccc aaatgccact gttctgcata tcgatattga tcctacttcc     86220
+     atttctaaaa ccgtgactgc ggatatcccg attgtggggg atgctcgcca agtcctcgaa     86280
+     caaatgcttg aactcttgtc gcaagaatcc gcccatcaac cactggatga gatccgcgac     86340
+     tggtggcagc aaattgaaca gtggcgcgct cgtcagtgcc tgaaatatga cactcacagt     86400
+     gaaaagatta aaccgcaggc ggtgatcgag actctttggc ggttgacgaa gggagacgct     86460
+     tacgtgacgt ccgatgtcgg gcagcaccag atgtttgctg cactttatta tccattcgac     86520
+     aaaccgcgtc gctggatcaa ttccggtggc ctcggcagca tgggttttgg tttacctgcg     86580
+     gcactgggcg tcaaaatggc gttcccagaa gaaaccgtgg tttgcgtcac tggcgacggc     86640
+     agtattcaga tgaacatcca ggaactgtct accgcgttgc aatacgagtt gcccgtactg     86700
+     gtggtgaatc tcaataaccg ctatctgggg atggtgaagc agtggcagga catgatctat     86760
+     tccggccgtc attcacaatc ttatatgcaa tcgctacccg atttcgtccg tcgcggagcc     86820
+     tatgggcatg tcgggatcca gatttctcat ccgcatggct ggaaagcaaa cttagcgagg     86880
+     cgctggaaca ggtgcgcaat aatcgcctgg tgtttgttga tgttaccgtc gatggcagcg     86940
+     agcacgtcta cccgatgcag attcgcgggg gcggaatgga tgaaatgtgg ttaagcaaaa     87000
+     cggagagaac ctgattatgc gccggatatt atcagtctta ctcgaaaatg aatcaggcgc     87060
+     gttatcccgc gtgattggcc ttttttccca gcgtggctac aacattgaaa gcctgaccgt     87120
+     tgcgccaacc gacgatccga cattatcgcg tatgaccatc cagaccgtgg gcgatgaaaa     87180
+     agtacttgag cagatcgaaa agcaattaca caaactggtc gatgtcttgc gcgtgagtga     87240
+     gttggggcag ggcgcgcatg ttgagcggga aatcatgctg gtgaaaattc aggccagcgg     87300
+     ttacgggcgt gacgaagtga aacgtaatac ggaaatattc cgtgggcaaa ttatcgatgt     87360
+     cacaccctcg ctttataccg ttcaattagc aggcaccagc ggtaagctta gtgcattttt     87420
+     agcatcgatt cgcgatgtgg cgaaaattgt ggaggttgct cgctctggtg tggtcggact     87480
+     ttcgcgcggc gataaaataa tgcgttgaga atgatctcaa tgcgcaattt acagcccaac     87540
+     atgtcacgtt gggctttttt tgcgaaatca gtgggaacct ggaataaaag cagttgccgc     87600
+     agttaatttt ctgcgcttag atgttaatga atttaaccca taccagtaca atggctatgg     87660
+     tttttacatt ttacgcaagg ggcaattgtg aaactggatg aaatcgctcg gctggcggga     87720
+     gtgtcgcgga ccactgcaag ctatgttatt aacggcaaag cgaagcaata ccgtgtgagc     87780
+     gacaaaaccg ttgaaaaagt catggctgtg gtgcgtgagc acaattacca cccgaacgcc     87840
+     gtggcagctg ggcttcgtgc tggacgcaca cgttctattg gtcttgtgat ccccgatctg     87900
+     gagaacacca gctatacccg catcgctaac tatcttgaac gccaggcgcg gcaacggggt     87960
+     tatcaactgc tgattgcctg ctcagaagat cagccagaca acgaaatgcg gtgcattgag     88020
+     caccttttac agcgtcaggt tgatgccatt attgtttcga cgtcgttgcc tcctgagcat     88080
+     cctttttatc aacgctgggc taacgacccg ttcccgattg tcgcgctgga ccgcgccctc     88140
+     gatcgtgaac acttcaccag cgtggttggt gccgatcagg atgatgccga aatgctggcg     88200
+     gaagagttac gtaagtttcc cgccgagacg gtgctttatc ttggtgcgct accggagctt     88260
+     tctgtcagct tcctgcgtga acaaggtttc cgtactgcct ggaaagatga tccgcgcgaa     88320
+     gtgcatttcc tgtatgccaa cagctatgag cgggaggcgg ctgcccagtt attcgaaaaa     88380
+     tggctggaaa cgcatccgat gccgcaggcg ctgttcacaa cgtcgtttgc gttgttgcaa     88440
+     ggagtgatgg atgtcacgct gcgtcgcgac ggcaaactgc cttctgacct ggcaattgcc     88500
+     acctttggcg ataacgaact gctcgacttc ttacagtgtc cggtgctggc agtggctcaa     88560
+     cgtcaccgcg atgtcgcaga gcgtgtgctg gagattgtcc tggcaagcct ggacgaaccg     88620
+     cgtaagccaa aacctggttt aacgcgcatt aaacgtaatc tctatcgccg cggcgtgctc     88680
+     agccgtagct aagccgcgaa caaaaatacg cgccaggtga atttccctct ggcgcgtaga     88740
+     gtacgggact ggacatcaat atgcttaaag taaataagac tattcctgac tattattgat     88800
+     aaatgctttt aaacccgccc gttaattaac tcaccagctg aaattcacaa taattaagtg     88860
+     atatcgacag cgcgtttttg cattattttg ttacatgcgg cgatgaattg ccgatttaac     88920
+     aaacactttt ctttgctttt gcgcaaaccc gctggcatca agcgccacac agacgtaaca     88980
+     aggactgtta accggggaag atatgtccta aaatgccgct cgcgtcgcaa actgacactt     89040
+     tatatttgct gtggaaaata gtgagtcatt ttaaaacggt gatgacgatg agggattttt     89100
+     tcttacagct attcataacg ttaatttgct tcgcacgttg gacgtaaaat aaacaacgct     89160
+     gatattagcc gtaaacatcg ggttttttac ctcggtatgc cttgtgactg gcttgacaag     89220
+     cttttcctca gctccgtaaa ctcctttcag tgggaaattg tggggcaaag tgggaataag     89280
+     gggtgaggct ggcatgttcc ggggagcaac gttagtcaat ctcgacagca aagggcgctt     89340
+     atcagtgcct acccgttatc gggaacagct gcttgagaac gctgccggtc aaatggtttg     89400
+     caccattgac atttatcacc cgtgcctgct gctttacccc ctgcctgaat gggaaattat     89460
+     cgagcaaaaa ttatcgcgtc tgtcgagcat gaacccggtt gagcgccgtg tgcagcgcct     89520
+     actgttaggt catgccagcg aatgtcagat ggatggcgca ggtcgattgt taatcgcgcc     89580
+     agtactgcgg caacatgccg ggctgacaaa agaagtgatg ctggttggac agttcaacaa     89640
+     gtttgagctg tgggatgaaa caacctggca tcaacaggtc aaggaagata tcgacgcaga     89700
+     gcagttggct accggagact tatcggagcg actgcaggac ttgtctctat aaaatgatgg     89760
+     aaaactataa acatactacg gtgctgctgg atgaagccgt taatggcctc aatatccgtc     89820
+     ctgatggcat ctacattgat gggacttttg gtcgcggtgg tcactcacgt ctgatcctct     89880
+     cgcagcttgg cgaagagggg cgtttgctgg cgatcgatcg cgacccgcag gctatcgccg     89940
+     ttgcgaagac tattgatgat ccgcgcttct ccatcatcca cggacctttc tccgcgctgg     90000
+     gcgaatacgt tgccgagcgc gatcttatcg gcaagatcga cggcattctc ctcgatcttg     90060
+     gcgtctcttc accgcaactt gatgatgctg aacgtggctt ttcctttatg cgcgatggtc     90120
+     cgctggacat gcgtatggac ccaacccgtg ggcagtcagc cgctgaatgg ctacaaaccg     90180
+     cagaagaagc cgatatcgcc tgggtattga aaacctatgg tgaagagcgt tttgccaaac     90240
+     gcattgcccg cgccattgtc gagcgtaacc gcgaacagcc gatgacccgc accaaagaac     90300
+     tggcggaagt cgtggctgct gcaacgccgg tgaaagataa gtttaaacat cccgcgaccc     90360
+     gtaccttcca ggcggtgcgc atttgggtaa acagtgaact ggaggagata gagcaggcgc     90420
+     taaaaagctc gctcaacgtg ctggccccgg gtgggcggct ttcgatcatc agcttccact     90480
+     cgctggaaga ccgtattgtg aaacgtttta tgcgtgaaaa cagccgcggt ccgcaagttc     90540
+     cggcagggtt accgatgact gaagagcagc tcaaaaaact gggtggccgt cagctgcgag     90600
+     cactaggcaa gttaatgccg ggcgaagaag aggtggctga gaaccctcgt gcccgtagtt     90660
+     cagttctgcg tattgcagag aggacgaatg catgatcagc agagtgacag aagctctaag     90720
+     caaagttaaa ggatcgatgg gaagccacga gcgccatgca ttgcctggtg ttatcggtga     90780
+     cgatcttttg cgatttggga agctgccact ctgcctgttc atttgcatta ttttgacggc     90840
+     ggtgactgtg gtaaccacgg cgcaccatac ccgtttactg accgctcagc gcgaacaact     90900
+     ggtgctggag cgagatgctt tagacattga atggcgcaac ctgatccttg aagagaatgc     90960
+     gctcggcgac catagccggg tggaaaggat cgccacggaa aagctgcaaa tgcagacatg     91020
+     ttgatccgtc acaagaaaat atcgtagtgc aaaaataagg ataaacgcga cgcatgaaag     91080
+     cagcggcgaa aacgcagaaa ccaaaacgtc aggaagaaca tgccaacttt atcagttggc     91140
+     gttttgcgtt gttatgcggc tgtattctcc tggcgctggc ttttctgctc ggacgcgtag     91200
+     cgtggttaca agttatctcc ccggatatgc tggtgaaaga gggcgacatg cgttctcttc     91260
+     gcgttcagca agtttccacc tcccgcggca tgattactga ccgttctggt cgcccgttag     91320
+     cggtgagcgt gccggtaaaa gcgatttggg ctgacccgaa agaagtgcat gacgctggcg     91380
+     gtatcagcgt cggtgaccgc tggaaggcgc tggctaacgc gctcaatatt ccgctggatc     91440
+     agctttcagc ccgcattaac gccaacccga aagggcgctt tatttatctg gcgcgtcagg     91500
+     tgaaccctga catggcggac tacatcaaaa aactgaaact gccggggatt catctgcgtg     91560
+     aagagtctcg ccgttactat ccgtccggcg aagtgactgc tcacctcatc ggctttacta     91620
+     acgtcgatag tcaagggatt gagggcgttg agaagagttt cgataaatgg cttaccgggc     91680
+     agccgggtga gcgcattgtg cgtaaagacc gctatggtcg cgtaattgaa gatatttctt     91740
+     ctactgacag ccaggcagcg cacaacctgg cgctgagtat tgatgaacgc ctgcaggcgc     91800
+     tggtttatcg cgaactgaac aacgcggtgg cctttaacaa ggctgaatct ggtagcgccg     91860
+     tgctggtgga tgtcaacacc ggtgaagtgc tggcgatggc taacagcccg tcatacaacc     91920
+     ctaacaatct gagcggcacg ccgaaagagg cgatgcgtaa ccgtaccatc accgacgtgt     91980
+     ttgaaccggg ctcaacggtt aaaccgatgg tggtaatgac cgcgttgcaa cgtggcgtgg     92040
+     tgcgggaaaa ctcggtactc aataccattc cttatcgaat taacggccac gaaatcaaag     92100
+     acgtggcacg ctacagcgaa ttaaccctga ccggggtatt acagaagtcg agtaacgtcg     92160
+     gtgtttccaa gctggcgtta gcgatgccgt cctcagcgtt agtagatact tactcacgtt     92220
+     ttggactggg aaaagcgacc aatttggggt tggtcggaga acgcagtggc ttatatcctc     92280
+     aaaaacaacg gtggtctgac atagagaggg ccaccttctc tttcggctac gggctaatgg     92340
+     taacaccatt acagttagcg cgagtctacg caactatcgg cagctacggc atttatcgcc     92400
+     cactgtcgat taccaaagtt gaccccccgg ttcccggtga acgtgtcttc ccggaatcca     92460
+     ttgtccgcac tgtggtgcat atgatggaaa gcgtggcgct accaggcggc ggcggcgtga     92520
+     aggcggcgat taaaggctat cgtatcgcca ttaaaaccgg taccgcgaaa aaggtcgggc     92580
+     cggacggtcg ctacatcaat aaatatattg cttataccgc aggcgttgcg cctgcgagtc     92640
+     agccgcgctt cgcgctggtt gttgttatca acgatccgca ggcgggtaaa tactacggcg     92700
+     gcgccgtttc cgcgccggtc tttggtgcca tcatgggcgg cgtattgcgt accatgaaca     92760
+     tcgagccgga tgcgctgaca acgggcgata aaaatgaatt tgtgattaat caaggcgagg     92820
+     ggacaggtgg cagatcgtaa tttgcgcgac cttcttgctc cgtgggtgcc agacgcacct     92880
+     tcgcgagcac tgcgagagat gacactcgac agccgtgtgg ctgcggcggg cgatctcttt     92940
+     gtagctgtag taggtcatca ggcggacggg cgtcgatata tcccgcaggc gatagcgcaa     93000
+     ggtgtcgctg ccattattgc agaggcgaaa gatgaggcga ccgatggtga aatccgtgaa     93060
+     atgcacggcg taccggtcat ctatctcagc cagctcaacg agcgtttatc tgcactggcg     93120
+     ggccgctttt accatgaacc ctctgacaat ttacgtctcg tgggcgtaac gggcaccaac     93180
+     ggcaaaacca cgactaccca gctgttggcg cagtggagcc aactgcttgg cgaaatcagc     93240
+     gcggtaatgg gcaccgttgg taacggcctg ctggggaaag tgatcccgac agaaaataca     93300
+     accggttcgg cagtcgatgt tcagcatgag ctggcggggc tggtggatca gggcgcgacg     93360
+     ttttgcgcaa tggaagtttc ctcccacggg ctggtacagc accgtgtggc ggcattgaaa     93420
+     tttgcggcgt cggtctttac caacttaagc cgcgatcacc ttgattatca tggtgatatg     93480
+     gaacactacg aagccgcgaa atggctgctt tattctgagc atcattgcgg tcaggcgatt     93540
+     attaacgccg acgatgaagt gggccgccgc tggctggcaa aactgccgga cgcggttgcg     93600
+     gtatcaatgg aagatcatat taatccgaac tgtcacggac gctggttgaa agcgaccgaa     93660
+     gtgaactatc acgacagcgg tgcgacgatt cgctttagct caagttgggg cgatggcgaa     93720
+     attgaaagcc atctgatggg cgcttttaac gtcagcaacc tgctgctcgc gctggcgaca     93780
+     ctgttggcac tcggctatcc actggctgat ctgctgaaaa ccgccgcgcg tctgcaaccg     93840
+     gtttgcggac gtatggaagt gttcactgcg ccaggcaaac cgacggtggt ggtggattac     93900
+     gcgcatacgc cggatgcact ggaaaaagcc ttacaggcgg cgcgtctgca ctgtgcgggc     93960
+     aagctgtggt gtgtctttgg ctgtggtggc gatcgcgata aaggtaagcg tccactgatg     94020
+     ggcgcaattg ccgaagagtt tgctgacgtg gcggtggtga cggacgataa cccgcgtacc     94080
+     gaagaaccgc gtgccatcat caacgatatt ctggcgggaa tgttagatgc cggacatgcc     94140
+     aaagtgatgg aaggccgtgc tgaagcggtg acttgcgccg ttatgcaggc taaagagaat     94200
+     gatgtggtac tggtcgcggg caaaggccat gaagattacc agattgttgg caatcagcgt     94260
+     ctggactact ccgatcgcgt cacggtggcg cgtctgctgg gggtgattgc atgattagcg     94320
+     taacccttag ccaacttacc gacattctca acggtgaact gcaaggtgca gatatcaccc     94380
+     ttgatgctgt aaccactgat acccgaaaac tgacgccggg ctgcctgttt gttgccctga     94440
+     aaggcgaacg ttttgatgcc cacgattttg ccgaccaggc gaaagctggc gcggcaggcg     94500
+     cactactggt tagccgtccg ctggacatcg acctgccgca gttaatcgtc aaggatacgc     94560
+     gtctggcgtt tggtgaactg gctgcatggg ttcgccagca agttccggcg cgcgtggttg     94620
+     ctctgacggg gtcctccggc aaaacctccg ttaaagagat gacggcggcg attttaagcc     94680
+     agtgcggcaa cacgctttat acggcaggca atctcaacaa cgacatcggt gtaccgatga     94740
+     cgctgttgcg cttaacgccg gaatacgatt acgcagttat tgaacttggc gcgaaccatc     94800
+     agggcgaaat agcctggact gtgagtctga ctcgcccgga acgtgcgctg gtcaacaacc     94860
+     tggcagcggc gcatctggaa ggttttggct cgcttgcggg tgtcgcgaaa gcgaaaggtg     94920
+     aaatctttag cggcctgccg gaaaacggta tcgccattat gaacgccgac aacaacgact     94980
+     ggctgaactg gcagagcgta attggctcac gcaaagtgtg gcgtttctca cccaatgccg     95040
+     ccaacagcga tttcaccgcc accaatatcc atgtgacctc gcacggtacg gaatttaccc     95100
+     tacaaacccc aaccggtagc gtcgatgttc tgctgccgtt gccggggcgt cacaatattg     95160
+     cgaatgcgct ggcagccgct gcgctctcca tgtccgtggg cgcaacgctt gatgctatca     95220
+     aagcggggct ggcaaatctg aaagctgttc caggccgtct gttccccatc caactggcag     95280
+     aaaaccagtt gctgctcgac gactcctaca acgccaatgt cggttcaatg actgcagcag     95340
+     tccaggtact ggctgaaatg ccgggctacc gcgtgctggt ggtgggcgat atggcggaac     95400
+     tgggcgctga aagcgaagcc tgccatgtac aggtgggcga ggcggcaaaa gctgctggta     95460
+     ttgaccgcgt gttaagcgtg ggtaaacaaa gccatgctat cagcaccgcc agcggcgttg     95520
+     gcgaacattt tgctgataaa actgcgttaa ttacgcgtct taaattactg attgctgagc     95580
+     aacaggtaat tacgatttta gttaagggtt cacgtagtgc cgccatggaa gaggtagtac     95640
+     gcgctttaca ggagaatggg acatgttagt ttggctggcc gaacatttgg tcaaatatta     95700
+     ttccggcttt aacgtctttt cctatctgac gtttcgcgcc atcgtcagcc tgctgaccgc     95760
+     gctgttcatc tcattgtgga tgggcccgcg tatgattgct catttgcaaa aactttcctt     95820
+     tggtcaggtg gtgcgtaacg acggtcctga atcacacttc agcaagcgcg gtacgccgac     95880
+     catgggcggg attatgatcc tgacggcgat tgtgatctcc gtactgctgt gggcttaccc     95940
+     gtccaatccg tacgtctggt gcgtgttggt ggtgctggta ggttacggtg ttattggctt     96000
+     tgttgatgat tatcgcaaag tggtgcgtaa agacaccaaa gggttgatcg ctcgttggaa     96060
+     gtatttctgg atgtcggtca ttgcgctggg tgtcgccttc gccctgtacc ttgccggcaa     96120
+     agacacgccc gcaacgcagc tggtggtccc attctttaaa gatgtgatgc cgcagctggg     96180
+     gctgttctac attctgctgg cttacttcgt cattgtgggt actggcaacg cggtaaacct     96240
+     gaccgatggt ctcgacggcc tggcaattat gccgaccgta tttgtcgccg gtggttttgc     96300
+     gctggtggcg tgggcgaccg gcaatatgaa ctttgccagc tacttgcata taccgtatct     96360
+     gcgacacgcc ggggaactgg ttattgtctg taccgcgata gtcggggcag gactgggctt     96420
+     cctgtggttt aacacctatc cggcgcaggt ctttatgggc gatgtaggtt cgctggcgtt     96480
+     aggtggtgcg ttaggcatta tcgccgtact gctacgtcag gaattcctgc tggtgattat     96540
+     ggggggcgtg ttcgtggtag aaacgctttc tgtcatcctg caggtcggct cctttaaact     96600
+     gcgcggacaa cgtattttcc gcatggcacc gattcatcac cactatgaac tgaaaggctg     96660
+     gccggaaccg cgcgtcattg tgcgtttctg gattatttcg ctgatgctgg ttctgattgg     96720
+     tctggcaacg ctgaaggtac gttaatcatg gctgattatc agggtaaaaa tgtcgtcatt     96780
+     atcggcctgg gcctcaccgg gctttcctgc gtggactttt tcctcgctcg cggtgtgacg     96840
+     ccgcgcgtta tggatacgcg tatgacaccg cctggcctgg ataaattacc cgaagccgta     96900
+     gaacgccaca cgggcagtct gaatgatgaa tggctgatgg cggcagatct gattgtcgcc     96960
+     agtcccggta ttgcactggc gcatccatcc ttaagcgctg ccgctgatgc cggaatcgaa     97020
+     atcgttggcg atatcgagct gttctgtcgc gaagcacaag caccgattgt ggcgattacc     97080
+     ggttctaacg gcaaaagcac ggtcaccacg ctagtgggtg aaatggcgaa agcggcgggg     97140
+     gttaacgttg gtgtgggtgg caatattggc ctgcctgcgt tgatgctact ggatgatgag     97200
+     tgtgaactgt acgtgctgga actgtcgagc ttccagctgg aaaccacctc cagcttacag     97260
+     gcggtagcag cgaccattct gaacgtgact gaagatcata tggatcgcta tccgtttggt     97320
+     ttacaacagt atcgtgcagc aaaactgcgc atttacgaaa acgcgaaagt ttgcgtggtt     97380
+     aatgctgatg atgccttaac aatgccgatt cgcggtgcgg atgaacgctg cgtcagcttt     97440
+     ggcgtcaaca tgggtgacta tcacctgaat catcagcagg gcgaaacctg gctgcgggtt     97500
+     aaaggcgaga aagtgctgaa tgtgaaagag atgaaacttt ccgggcagca taactacacc     97560
+     aatgcgctgg cggcgctggc gctggcagat gctgcagggt taccgcgtgc cagcagcctg     97620
+     aaagcgttaa ccacattcac tggtctgccg catcgctttg aagttgtgct ggagcataac     97680
+     ggcgtacgtt ggattaacga ttcgaaagcg accaacgtcg gcagtacgga agcggcgctg     97740
+     aatggcctgc acgtagacgg cacactgcat ttgttgctgg gtggcgatgg taaatcggcg     97800
+     gactttagcc cactggcgcg ttacctgaat ggcgataacg tacgtctgta ttgtttcggt     97860
+     cgtgacggcg cgcagctggc ggcgctacgc ccggaagtgg cagaacaaac cgaaactatg     97920
+     gaacaggcga tgcgcttgct ggctccgcgt gttcagccgg gcgatatggt tctgctctcc     97980
+     ccagcctgtg ccagccttga tcagttcaag aactttgaac aacgaggcaa tgagtttgcc     98040
+     cgtctggcga aggagttagg ttgatgcgtt tatctctccc tcgcctgaaa atgccgcgcc     98100
+     tgccaggatt cagtatcctg gtctggatct ccacggcgct aaagggctgg gtgatgggct     98160
+     cgcgggaaaa agataccgac agcctgatca tgtacgatcg caccttactg tggctgacct     98220
+     tcggcctcgc ggcgattggc tttatcatgg tgacctcggc gtcaatgccc atagggcaac     98280
+     gcttaaccaa cgatccgttc ttcttcgcga agcgtgatgg tgtctatctg attttggcgt     98340
+     ttattctggc gatcattacg ctgcgtctgc cgatggagtt ctggcaacgc tacagtgcca     98400
+     cgatgctgct cggatctatc atcctgctga tgatcgtcct ggtagtgggt agctcggtta     98460
+     aaggggcatc gcgttggatc gatctcggtt tgctgcgtat ccagcctgcg gagctgacaa     98520
+     aactgtcgct gttttgctat atcgccaact atctggtgcg taaaggcgac gaagtacgta     98580
+     ataacctgcg cggcttcctg aaaccgatgg gcgtgattct ggtgttggca gtgttactgc     98640
+     tggcacagcc agaccttggt acggtggtgg tgttgtttgt gactacgctg gcgatgttgt     98700
+     tcctggcggg agcgaaattg tggcagttca ttgccattat cggtatgggc atttcagcgg     98760
+     ttgtgttgct gatactcgcc gaaccgtacc gtatccgccg tgttaccgca ttctggaacc     98820
+     cgtgggaaga tccctttggc agcggctatc agttaacgca atcgctgatg gcgtttggtc     98880
+     gcggcgaact ttgggggcaa ggtttaggta actcggtaca aaaactggag tatctgccgg     98940
+     aagcgcacac tgactttatt ttcgccatta tcggcgaaga actggggtat gtcggtgtgg     99000
+     tgctggcact tttaatggta ttcttcgtcg cttttcgcgc gatgtcgatt ggccgtaaag     99060
+     cattagaaat tgaccaccgt ttttccggtt ttctcgcctg ttctattggc atctggttta     99120
+     gcttccaggc gctggttaac gtaggcgcgg cggcggggat gttaccgacc aaaggtctga     99180
+     cattgccgct gatcagttac ggtggttcga gcttactgat tatgtcgaca gccatcatga     99240
+     tgctgttgcg tattgattat gaaacgcgtc tggagaaagc gcaggcgttt gtacgaggtt     99300
+     cacgatgagt ggtcaaggaa agcgattaat ggtgatggca ggcggaaccg gtggacatgt     99360
+     attcccggga ctggcggttg cgcaccatct aatggctcag ggttggcaag ttcgctggct     99420
+     ggggactgcc gaccgtatgg aagcggactt agtgccaaaa catggcatcg aaattgattt     99480
+     cattcgtatc tctggtctgc gtggaaaagg tataaaagca ctgatagctg ccccgctgcg     99540
+     tatcttcaac gcctggcgtc aggcgcgggc gattatgaaa gcgtacaaac ctgacgtggt     99600
+     gctcggtatg ggaggctacg tgtcaggtcc aggtggtctg gccgcgtggt cgttaggcat     99660
+     tccggttgta cttcatgaac aaaacggtat tgcgggctta accaataaat ggctggcgaa     99720
+     gattgccacc aaagtgatgc aggcgtttcc aggtgctttc cctaatgcgg aagtagtggg     99780
+     taacccggtg cgtaccgatg tgttggcgct gccgttgccg cagcaacgtt tggctggacg     99840
+     tgaaggtccg gttcgtgtgc tggtagtggg tggttctcag ggcgcacgca ttcttaacca     99900
+     gacaatgccg caggttgctg cgaaactggg tgattcagtc actatctggc atcagagcgg     99960
+     caaaggttcg caacaatccg ttgaacaggc gtatgccgaa gcggggcaac cgcagcataa    100020
+     agtgacggaa tttattgatg atatggcggc ggcgtatgcg tgggcggatg tcgtcgtttg    100080
+     ccgctccggt gcgttaacgg tgagtgaaat cgccgcggca ggactaccgg cgttgtttgt    100140
+     gccgtttcaa cataaagacc gccagcaata ctggaatgcg ctaccgctgg aaaaagcggg    100200
+     cgcagccaaa attatcgagc agccacagct tagcgtggat gctgtcgcca acaccctggc    100260
+     cgggtggtcg cgagaaacct tattaaccat ggcagaacgc gcccgcgctg catccattcc    100320
+     ggatgccacc gagcgagtgg caaatgaagt gagccgggtt gcccgggcgt aattgtagcg    100380
+     atgccttttg catcgtatga atttaagaag ttaatggcgt aaagaatgaa tacacaacaa    100440
+     ttggcaaaac tgcgttccat cgtgcccgaa atgcgtcgcg ttcggcacat acattttgtc    100500
+     ggcattggtg gtgccggtat gggcggtatt gccgaagttc tggccaatga aggttatcag    100560
+     atcagtggtt ccgatttagc gccaaatccg gtcacgcagc agttaatgaa tctgggtgcg    100620
+     acgatttatt tcaaccatcg cccggaaaac gtacgtgatg ccagcgtggt cgttgtttcc    100680
+     agcgcgattt ctgccgataa cccggaaatt gtcgccgctc atgaagcgcg tattccggtg    100740
+     atccgtcgtg ccgaaatgct ggctgagtta atgcgttttc gtcatggcat cgccattgcc    100800
+     ggaacgcacg gcaaaacgac aaccaccgcg atggtttcca gcatctacgc agaagcgggg    100860
+     ctcgacccaa ccttcgttaa cggcgggctg gtaaaagcgg cgggggttca tgcgcgtttg    100920
+     gggcatggtc ggtacctgat tgccgaagca gatgagagtg atgcatcgtt cctgcatctg    100980
+     caaccgatgg tggcgattgt caccaatatc gaagccgacc acatggatac ctaccagggc    101040
+     gactttgaga atttaaaaca gacttttatt aattttctgc acaacctgcc gttttacggt    101100
+     cgtgcggtga tgtgtgttga tgatccggtg atccgcgaat tgttaccgcg agtggggcgt    101160
+     cagaccacga cttacggctt cagcgaagat gccgacgtgc gtgtagaaga ttatcagcag    101220
+     attggcccgc aggggcactt tacgctgctg cgccaggaca aagagccgat gcgcgtcacc    101280
+     ctgaatgcgc caggtcgtca taacgcgctg aacgccgcag ctgcggttgc ggttgctacg    101340
+     gaagagggca ttgacgacga ggctattttg cgggcgcttg aaagcttcca ggggactggt    101400
+     cgccgttttg atttcctcgg tgaattcccg ctggagccag tgaatggtaa aagcggtacg    101460
+     gcaatgctgg tcgatgacta cggccaccac ccgacggaag tggacgccac cattaaagcg    101520
+     gcgcgcgcag gctggccgga taaaaacctg gtaatgctgt ttcagccgca ccgttttacc    101580
+     cgtacgcgcg acctgtatga tgatttcgcc aatgtgctga cgcaggttga taccctgttg    101640
+     atgctggaag tgtatccggc tggcgaagcg ccaattccgg gagcggacag ccgttcgctg    101700
+     tgtcgcacaa ttcgtggacg tgggaaaatt gatcccattc tggtgccgga tccggcgcgg    101760
+     gtagccgaga tgctggcacc ggtattaacc ggtaacgacc tgattctcgt tcagggggct    101820
+     ggtaatattg gaaaaattgc ccgttcttta gctgaaatca aactgaagcc gcaaactccg    101880
+     gaggaagaac aacatgactg ataaaatcgc ggtcctgttg ggtgggacct ccgctgagcg    101940
+     ggaagtttct ctgaattctg gcgcagcggt gttagccgga ctgcgtgaag gcggtattga    102000
+     cgcgtatcct gtcgacccga aagaagtcga cgtgacgcaa ctgaagtcga tgggctttca    102060
+     gaaagtgttt atcgcgctac acggtcgcgg cggtgaagat ggtacgctgc aggggatgct    102120
+     cgagctgatg ggcttgcctt ataccggaag cggagtgatg gcatctgcgc tttcaatgga    102180
+     taaactacgc agcaaacttc tatggcaagg tgccggttta ccggtcgcgc cgtgggtagc    102240
+     gttaacccgc gcagagtttg aaaaaggcct gagcgataag cagttagcag aaatttctgc    102300
+     tctgggtttg ccggttatcg ttaagccgag ccgcgaaggt tccagtgtgg gaatgtcaaa    102360
+     agtagtagca gaaaatgctc tacaagatgc attaagattg gcatttcagc acgatgaaga    102420
+     agtattgatt gaaaaatggc taagtgggcc ggagttcacg gttgcgatac tcggtgaaga    102480
+     aattttaccg tcaatacgta ttcaaccgtc cggaaccttc tatgattatg aggcgaagta    102540
+     tctctctgat gagacacagt atttctgccc cgcaggtctg gaagcgtcac aagaggccaa    102600
+     tttgcaggca ttagtgctga aagcatggac gacgttaggt tgcaaaggat ggggacgtat    102660
+     tgacgttatg ctggacagcg atggacagtt ttatctgctg gaagccaata cctcaccggg    102720
+     tatgaccagc cacagcctgg tgccgatggc ggcacgtcag gcaggtatga gcttctcgca    102780
+     gttggtagta cgaattctgg aactggcgga ctaatatgtc gcaggctgct ctgaacacgc    102840
+     gaaacagcga agaagaggtt tcttctcgcc gcaataatgg aacgcgtctg gcggggatcc    102900
+     ttttcctgct gaccgtttta acgacagtgt tggtgagcgg ctgggtcgtg ttgggctgga    102960
+     tggaagatgc gcaacgcctg ccgctctcaa agctggtgtt gaccggtgaa cgccattaca    103020
+     cacgtaatga cgatatccgg cagtcgatcc tggcattggg tgagccgggt acctttatga    103080
+     cccaggatgt caacatcatc cagacgcaaa tagaacaacg cctgccgtgg attaagcagg    103140
+     tgagcgtcag aaagcagtgg cctgatgaat tgaagattca tctggttgaa tatgtgccga    103200
+     ttgcgcggtg gaatgatcaa catatggtag acgcggaagg aaataccttc agcgtgccgc    103260
+     cagaacgcac cagcaagcag gtgcttccaa tgctgtatgg cccggaaggc agcgccaatg    103320
+     aagtgttgca gggctatcgc gaaatggggc agatgctggc aaaggacaga tttactctga    103380
+     aggaagcggc gatgaccgcg cggcgttcct ggcagttgac gctgaataac gatattaagc    103440
+     tcaatcttgg ccggggcgat acgatgaaac gtttggctcg ctttgtagaa ctttatccgg    103500
+     ttttacagca gcaggcgcaa accgatggca aacggattag ctacgttgat ttgcgttatg    103560
+     actctggagc ggcagtaggc tgggcgccct tgccgccaga ggaatctact cagcaacaaa    103620
+     atcaggcaca ggcagaacaa caatgatcaa ggcgacggac agaaaactgg tagtaggact    103680
+     ggagattggt accgcgaagg ttgccgcttt agtaggggaa gttctgcccg acggtatggt    103740
+     caatatcatt ggcgtgggca gctgcccgtc gcgtggtatg gataaaggcg gggtgaacga    103800
+     cctcgaatcc gtggtcaagt gcgtacaacg cgccattgac caggcagaat tgatggcaga    103860
+     ttgtcagatc tcttcggtat atctggcgct ttctggtaag cacatcagct gccagaatga    103920
+     aattggtatg gtgcctattt ctgaagaaga agtgacgcaa gaagatgtgg aaaacgtcgt    103980
+     ccataccgcg aaatcggtgc gtgtgcgcga tgagcatcgt gtgctgcatg tgatcccgca    104040
+     agagtatgcg attgactatc aggaagggat caagaatccg gtaggacttt cgggcgtgcg    104100
+     gatgcaggca aaagtgcacc tgatcacatg tcacaacgat atggcgaaaa acatcgtcaa    104160
+     agcggttgaa cgttgtgggc tgaaagttga ccaactgata tttgccggac tggcatcaag    104220
+     ttattcggta ttgacggaag atgaacgtga actgggtgtc tgcgtcgtcg atatcggtgg    104280
+     tggtacaatg gatatcgccg tttataccgg tggggcattg cgccacacta aggtaattcc    104340
+     ttatgctggc aatgtcgtga ccagtgatat cgcttacgcc tttggcacgc cgccaagcga    104400
+     cgccgaagcg attaaagttc gccacggttg tgcgctgggt tccatcgttg gaaaagatga    104460
+     gagcgtggaa gtgccgagcg taggtggtcg tccgccacgg agtctgcaac gtcagacact    104520
+     ggcagaggtg atcgagccgc gctataccga gctgctcaac ctggtcaacg aagagatatt    104580
+     gcagttgcag gaaaagcttc gccaacaagg ggttaaacat cacctggcgg caggcattgt    104640
+     attaaccggt ggcgcagcgc agatcgaagg tcttgcagcc tgtgctcagc gcgtgtttca    104700
+     tacgcaagtg cgtatcggcg cgccgctgaa cattaccggt ttaacggatt atgctcagga    104760
+     gccgtattat tcgacggcgg tgggattgct tcactatggg aaagagtcac atcttaacgg    104820
+     tgaagctgaa gtagaaaaac gtgttacagc atcagttggc tcgtggatca agcgactcaa    104880
+     tagttggctg cgaaaagagt tttaattttt atgaggccga cgatgattac ggcctcaggc    104940
+     gacaggcaca aatcggagag aaactatgtt tgaaccaatg gaacttacca atgacgcggt    105000
+     gattaaagtc atcggcgtcg gcggcggcgg cggtaatgct gttgaacaca tggtgcgcga    105060
+     gcgcattgaa ggtgttgaat tcttcgcggt aaataccgat gcacaagcgc tgcgtaaaac    105120
+     agcggttgga cagacgattc aaatcggtag cggtatcacc aaaggactgg gcgctggcgc    105180
+     taatccagaa gttggccgca atgcggctga tgaggatcgc gatgcattgc gtgcggcgct    105240
+     ggaaggtgca gacatggtct ttattgctgc gggtatgggt ggtggtaccg gtacaggtgc    105300
+     ggcaccagtc gtcgctgaag tggcaaaaga tttgggtatc ctgaccgttg ctgtcgtcac    105360
+     taagcctttc aactttgaag gcaagaagcg tatggcattc gcggagcagg ggatcactga    105420
+     actgtccaag catgtgaact ctctgatcac tatcccgaac gacaaactgc tgaaagttct    105480
+     gggccgcggt atctccctgc tggatgcgtt tggcgcagcg aacgatgtac tgaaaggcgc    105540
+     tgtgcaaggt atcgctgaac tgattactcg tccgggtttg atgaacgtgg actttgcaga    105600
+     cgtacgcacc gtaatgtctg agatgggcca cgcaatgatg ggttctggcg tggcgagcgg    105660
+     tgaagaccgt gcggaagaag ctgctgaaat ggctatctct tctccgctgc tggaagatat    105720
+     cgacctgtct ggcgcgcgcg gcgtgctggt taacatcacg gcgggcttcg acctgcgtct    105780
+     ggatgagttc gaaacggtag gtaacaccat ccgtgcattt gcttccgaca acgcgactgt    105840
+     ggttatcggt acttctcttg acccggatat gaatgacgag ctgcgcgtaa ccgttgttgc    105900
+     gacaggtatc ggcatggaca aacgtcctga aatcactctg gtgaccaata agcaggttca    105960
+     gcagccagtg atggatcgct accagcagca tgggatggct ccgctgaccc aagagcagaa    106020
+     gccggttgct aaagtcgtga atgacaatgc gccgcaaact gcgaaagagc cggattatct    106080
+     ggatatccca gcattcctgc gtaagcaagc tgattaagaa ttgactggaa tttgggtttc    106140
+     gattctcttt gtgctaaact ggcccgccga atgtatagta cacttcggtt ggataggtaa    106200
+     tttggcgaga taatacgatg atcaaacaaa ggacacttaa acgtatcgtt caggcgacgg    106260
+     gtgtcggttt acataccggc aagaaagtca ccctgacgtt acgccctgcg ccggccaaca    106320
+     ccggggtcat ctatcgtcgc accgacttga atccaccggt agatttcccg gccgatgcca    106380
+     aatctgtgcg tgataccatg ctctgtacgt gtctggtcaa cgagcatgat gtacggattt    106440
+     caaccgtaga gcacctcaat gctgctctcg cgggcttggg catcgataac attgttatcg    106500
+     aagttaacgc gccggaaatc ccgatcatgg acggcagcgc cgctccgttt gtatacctgc    106560
+     tgcttgacgc cggtatcgac gagttgaact gcgccaaaaa atttgttcgc atcaaagaga    106620
+     ctgttcgtgt cgaagatggc gataagtggg ctgaatttaa gccgtacaat ggtttttcgc    106680
+     tggatttcac catcgatttt aaccatccgg ctattgattc cagcaaccag cgctatgcga    106740
+     tgaacttctc cgctgatgcg tttatgcgcc agatcagccg tgcgcgtacg ttcggtttca    106800
+     tgcgtgatat cgaatatctg cagtcccgtg gtttgtgcct gggcggcagc ttcgattgtg    106860
+     ccatcgttgt tgacgattat cgcgtactga acgaagacgg cctgcgtttt gaagacgaat    106920
+     ttgtgcgtca caaaatgctc gatgcgatcg gtgacttgtt catgtgtggt cacaatatta    106980
+     ttggtgcatt taccgcttat aaatccggtc atgcactgaa taacaaactg ctgcaggctg    107040
+     tcctggcgaa acaggaagcc tgggaatatg tgaccttcca ggacgacgca gaactgccgt    107100
+     tggccttcaa agcgccttca gctgtactgg cataacgaca tttatactgt cgtataaaat    107160
+     tcgactggca aatctggcac tctctccggc caggtgaacc agtcgttttt ttttgaattt    107220
+     tataagagct ataaaaaacg gtgcgaacgc tgttttctta agcacttttc cgcacaactt    107280
+     atcttcattc gtgctgtgga ctgcaggctt taatgataag atttgtgcgc taaatacgtt    107340
+     tgaatatgat cgggatggca ataacgtgag tggaatactg acgcgctggc gacagtttgg    107400
+     taaacgctac ttctggccgc atctcttatt agggatggtt gcggcgagtt taggtttgcc    107460
+     tgcgctcagc aacgccgccg aaccaaacgc gcccgcaaaa gcgacaaccc gcaaccacga    107520
+     gccttcagcc aaagttaact ttggtcaatt ggccttgctg gaagcgaaca cacgccgccc    107580
+     gaattcgaac tattccgttg attactggca tcaacatgcc attcgcacgg taatccgtca    107640
+     tctttctttc gcaatggcac cgcaaacact gcccgttgct gaagaatctt tgcctcttca    107700
+     ggcgcaacat cttgcattac tggatacgct cagcgcgctg ctgacccagg aaggcacgcc    107760
+     gtctgaaaag ggttatcgca ttgattatgc gcattttacc ccacaagcaa aattcagcac    107820
+     gcccgtctgg ataagccagg cgcaaggcat ccgtgctggc cctcaacgcc tcacctaaca    107880
+     acaataaacc tttacttcat tttattaact ccgcaacgcg gggcgtttga gattttatta    107940
+     tgctaatcaa attgttaact aaagttttcg gtagtcgtaa cgatcgcacc ctgcgccgga    108000
+     tgcgcaaagt ggtcaacatc atcaatgcca tggaaccgga gatggaaaaa ctctccgacg    108060
+     aagaactgaa agggaaaacc gcagagtttc gtgcacgtct ggaaaaaggc gaagtgctgg    108120
+     aaaatctgat cccggaagct ttcgccgtgg tacgtgaggc aagtaagcgc gtctttggta    108180
+     tgcgtcactt cgacgttcag ttactcggcg gtatggttct taacgaacgc tgcatcgccg    108240
+     aaatgcgtac cggtgaagga aaaaccctga ccgcaacgct gcctgcttac ctgaacgcac    108300
+     taaccggtaa aggcgtgcac gtagttaccg tcaacgacta cctggcgcaa cgtgacgccg    108360
+     aaaacaaccg tccgctgttt gaattccttg gcctgactgt cggtatcaac ctgccgggca    108420
+     tgccagcacc ggcaaagcgc gaagcttacg cagctgacat cacttacggt acgaacaacg    108480
+     aatacggctt tgactacctg cgcgacaaca tggcgttcag ccctgaagaa cgtgtacagc    108540
+     gtaaactgca ctatgcgctg gtggacgaag tggactccat cctgatcgat gaagcgcgta    108600
+     caccgctgat catttccggc ccggcagaag acagctcgga aatgtataaa cgcgtgaata    108660
+     aaattattcc gcacctgatc cgtcaggaaa aagaagactc cgaaaccttc cagggcgaag    108720
+     gccacttctc ggtggacgaa aaatctcgcc aggtgaacct gaccgaacgt ggtctggtgc    108780
+     tgattgaaga actgctggtg aaagagggca tcatggatga aggggagtct ctgtactctc    108840
+     cggccaacat catgctgatg caccacgtaa cggcggcgct gcgcgctcat gcgctgttta    108900
+     cccgtgacgt cgactacatc gttaaagatg gtgaagttat catcgttgac gaacacaccg    108960
+     gtcgtaccat gcagggccgt cgctggtccg atggtctgca ccaggctgtg gaagcgaaag    109020
+     aaggtgtgca gatccagaac gaaaaccaaa cgctggcttc gatcaccttc cagaactact    109080
+     tccgtctgta tgaaaaactg gcggggatga ccggtactgc tgataccgaa gctttcgaat    109140
+     ttagctcaat ctacaagctg gataccgtcg ttgttccgac caaccgtcca atgattcgta    109200
+     aagatctgcc ggacctggtc tacatgactg aagcggaaaa aattcaggcg atcattgaag    109260
+     atatcaaaga acgtactgcg aaaggccagc cggtgctggt gggtactatc tccatcgaaa    109320
+     aatcggagct ggtgtcaaac gaactgacca aagccggtat taagcacaac gtcctgaacg    109380
+     ccaaattcca cgccaacgaa gcggcgattg ttgctcaggc aggttatccg gctgcggtga    109440
+     ctatcgcgac caatatggcg ggtcgtggta cagatattgt gctcggtggt agctggcagg    109500
+     cagaagttgc cgcgctggaa aatccgaccg cagagcaaat tgaaaaaatt aaagccgact    109560
+     ggcaggtacg tcacgatgcg gtactggaag caggtggcct gcatatcatc ggtaccgagc    109620
+     gtcacgaatc ccgtcgtatc gataaccagt tgcgcggtcg ttctggtcgt cagggggatg    109680
+     ctggttcttc ccgtttctac ctgtcgatgg aagatgcgct gatgcgtatt tttgcttccg    109740
+     accgagtatc cggcatgatg cgtaaactgg gtatgaagcc aggcgaagcc attgaacacc    109800
+     cgtgggtgac taaagcgatt gccaacgccc agcgtaaagt tgaaagccgt aacttcgaca    109860
+     ttcgtaagca actgctggaa tatgatgacg tggctaacga tcagcgtcgc gccatttact    109920
+     cccagcgtaa cgaactgttg gatgtcagcg atgtgagcga aaccattaac agcattcgtg    109980
+     aagatgtgtt caaagcgacc attgatgcct acattccacc acagtcgctg gaagaaatgt    110040
+     gggatattcc ggggctgcag gaacgtctga agaacgattt cgacctcgat ttgccaattg    110100
+     ccgagtggct ggataaagaa ccagaactgc atgaagagac gctgcgtgac ggcattctgg    110160
+     cgcagtccat cgaagtgtat cagcgtaaag aagaagtggt tggtgctgag atgatgcgtc    110220
+     acttcgagaa aggcgtcatg ctgcaaacgc ttgactccct gtggaaagag cacctggcag    110280
+     cgatggacta tctgcgtcag ggtatccacc tgcgtggcta cgcacagaaa gatccgaagc    110340
+     aggaatacaa acgtgaatcg ttctccatgt ttgcagcgat gctggagtcg ttgaaatatg    110400
+     aagttatcag tacgctgagc aaagttcagg tacgtatgcc tgaagaggtt gaggagctgg    110460
+     aacaacagcg tcgtatggaa gccgagcgtt tagcgcaaat gcagcagctt agccatcagg    110520
+     atgacgactc tgcagccgca gctgcactgg cggcgcaaac cggagagcgc aaagtaggac    110580
+     gtaacgatcc ttgcccgtgc ggttctggta aaaaatacaa gcagtgccat ggccgcctgc    110640
+     aataaaagct aactgttgaa gtaaaaggcg caggattctg cgcctttttt ataggtttaa    110700
+     gacaatgaaa aagctgcaaa ttgcggtagg tattattcgc aacgagaaca atgaaatctt    110760
+     tataacgcgt cgcgcagcag atgcgcacat ggcgaataaa ctggagtttc ccggcggtaa    110820
+     aattgaaatg ggtgaaacgc cggaacaggc ggtggtgcgt gaacttcagg aagaagtcgg    110880
+     gattaccccc caacattttt cgctatttga aaaactggaa tatgaattcc cggacaggca    110940
+     tataacactg tggttttggc tggtcgaacg ctgggaaggg gagccgtggg gtaaagaagg    111000
+     gcaacccggt gagtggatgt cgctggtcgg tcttaatgcc gatgattttc cgccagccaa    111060
+     tgaaccggta attgcgaagc ttaaacgtct gtaggtcaga taaggcgttt tcgccgcatc    111120
+     cgacattcgc acacgatgcc tgatgcgacg ctggcgcgtc ttatcaggcc taaagggatt    111180
+     tctaactcat tgataaattt gtttttgtag gtcggataag gcgttcacgc cgcatccgac    111240
+     atttgcacaa gatgcctgat gcgacgctgt ccgcgtctta tcaggcctac gtgcggcatc    111300
+     agacaaatgt cactgctttg gttcttcgct ccagtcatcg ctttcggaaa gatcgccact    111360
+     gctggggatt cgtttttctt cagcagccca ttctccgagg tcgatcag                 111408
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+!version: $Revision: 1.1 $
+!date:                 Fri Sep 20 14:08:27 GMT 2002
+!saved-by: gwg
+!autogenerated-by: DAG-Edit version 1.311
+!
+!Gene Ontology definitions
+!
+
+term: cellular_component
+goid: GO:0005575
+definition: The part of a cell of which a gene product is a component; for purpose of GO includes the extracellular environment of cells; a gene product may be a component of one or more parts of a cell; this term includes gene products that are parts of macromolecular complexes, by the definition that all members of a complex normally co-purify under all except extreme conditions.
+definition_reference: GO:curators
+
+term: ascus
+goid: GO:0005627
+definition: A sac-like fruiting body (ascomycete fungi); contains ascospores (typically eight in number).
+definition_reference: ISBN:0198547684
+
+term: lipid particle
+goid: GO:0005811
+definition: Any particle of coalesced lipids in the cytoplasm of a cell.
+definition_reference: GO:mah
+
+term: spore wall (sensu Fungi)
+goid: GO:0005619
+definition: The specialized cell wall of the ascospore (spore), as described in Saccharomyces.
+definition_reference: ISBN:0879693568
+
+term: membrane fraction
+goid: GO:0005624
+definition: That fraction of cells, prepared by disruptive biochemical methods, that includes the plasma and other membranes.
+definition_reference: FB:ma
+
+term: microsome
+goid: GO:0005792
+definition: Any of the small, heterogeneous, artifactual, vesicular particles, 50-150 nm in diameter, that are formed when some eukaryotic cells are homogenized and that sediment on centrifugation at 100000 g.
+definition_reference: ISBN:0198506732
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/GO.defs.test2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,22 @@
+!version: $Revision: 1.1 $
+!date:                 Fri Sep 20 14:08:27 GMT 2002
+!saved-by: gwg
+!autogenerated-by: DAG-Edit version 1.311
+!
+!Gene Ontology definitions
+!
+
+term: cellular_component
+goid: GO:0003673
+definition: The part of a cell of which a gene product is a component; for purpose of GO includes the extracellular environment of cells; a gene product may be a component of one or more parts of a cell; this term includes gene products that are parts of macromolecular complexes, by the definition that all members of a complex normally co-purify under all except extreme conditions.
+definition_reference: GO:curators
+
+term: ascus
+goid: GO:0005575
+definition: A sac-like fruiting body (ascomycete fungi); contains ascospores (typically eight in number).
+definition_reference: ISBN:0198547684
+
+term: lipid particle
+goid: GO:0018895
+definition: Any particle of coalesced lipids in the cytoplasm of a cell.
+definition_reference: GO:mah

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Genscan.FastA
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Genscan.FastA	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Genscan.FastA	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+>description
+GTATTGCGTCACTTCCTGTCTTCTCAATCGTTCGTTGGTTGCACTGTGCATGGGGTGTTGTATTCACTTAATATTGAACACTTGGTGTGTTGTATTCACTTAATATTGAACACTTGGTGTGTTGTATTCACTTTACTTAAAATTTTGAACACTTGGGACATTCTAGTCACCTGATAACAACAGTGAGAAACAACCCATATTAAACAAATACAGGGCTCCACCGATTATCAGCGTTAGCATGGCCTCACCGTCTGTTTCACCCGGTGTCTTTGTCTGTTCAGCGTGTGAAATGTTTAGTTACTCCTCTGCCTCCTTTAGTGAAGGGAATAGGTGCAGAAAGTGTAGTTTATTTATGGCTATGGAGGCGAGACTTAGCGAGCTTGAGACGCGGTTCCGCAGCTTGGAGTTAGCTGGAGTTGCGTCAGGTAGCCAGGAGAAGCTAGCTGCTGCGGAGCCGCCTAGCGTAGCTACAGCTAGCGGTCCCCCGGCAGCAGCCGAGCAGCCGGCTAGCCAGGGCGGCTGGGTGACGGTTCGTAGGAAGCGTAGCCCAAAACAAAGGCCCACGGTGCACCACCAACCGCTTCCCGTGGCTAACCGCTTTTCCCCACTCGGCGACACACCCGCTGAGAAACCGACCCTGGTAATTGGCGACTCTGTTTTGCGCTACGTGAAGCCGACTCCAGCGACCATAGTTAAGTGCATTCCGGGGGCCAGAGCGGGCGACATAGAAGCAAATTTACGGCTGCTGGCGAGACGTAATCGTAAATTTGGTAAAGTTATTATTCACGTCGGAGCCAACGACACCCGGCTTCGTCAGTCGGAGGTCACCAAAATTAACTTGGAGTCGGTGTGTAACTACGCAAAAACGATGTCGGACTCCGTAGCATTCTCTGGTCCCCTCCCCAATCTGGCCAGCGAGGAGATGTTTAGCCGCATGTCGTCGCTTCGTCGCTGGCTGTCACGGTGGTGCCCCGAAAACCAGGTGGCCTTTATAGACAATTGGAGCACTTTTTGGGGAAAACCTGGTCTGATTAGGAGAGACGGTGTCCATCCCACACGGGATGGTGCTTCTCTCATTTCTAGTAATTTGGCTAATTTTATT 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer2.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer2.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer2.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,256 @@
+GC Proportion = 33.3%
+Minimum gene length = 90
+Minimum overlap length = 30
+Minimum overlap percent = 10.0%
+Threshold score = 90
+Use independent scores = True
+Ignore independent score on orfs longer than 481
+Use strict independent model = True
+Use first start codon = True
+
+              Orf     Gene                 Lengths     Gene    -- Frame Scores -  Indep
+  ID#  Fr    Start    Start      End      Orf  Gene    Score   F1 F2 F3 R1 R2 R3  Score
+       F3      654      681      779      126    99       0    99  _  0  _  _  _    0     0  -1.452     
+       R1      984      957      772      213   186       0    99  _  _  0  _  _    0     0  -1.383     
+       F2      917      917     1009       93    93       0    99  0  _  _  _  _    0     0  -1.335     
+    1  F1      292      292     1620     1329  1329      99    99  _  _  _  _  _    0   396  -1.219     
+       R2     1631     1607     1515      117    93       0    99  _  _  _  0  _    0     0  -1.331     
+       F2     1592     1706     1837      246   132       0     _  0  _  _  _  _   99     0  -1.346     
+       R2     1844     1829     1737      108    93       0     _  0  _  _  0  _   99     0  -1.351     
+       F3     1860     1881     2000      141   120       2     _  7  2  _  _ 21   68     2  -1.372     
+       F2     1841     1856     2095      255   240       0     _  0  _  _  _  _   99     7  -1.373     
+       R3     2152     2017     1865      288   153       0     _  8  _  _  _  0   90    21  -1.397     
+       F3     2004     2028     2174      171   147       0     _  _  0  _  _  _   99     0  -1.385     
+    2  R1     2385     2349     2233      153   117      99     _  _  _ 99  _  _    0    99  -1.208     
+       R1     3039     3033     2944       96    90      14     _  _  _ 14  _  _   85    14  -1.262     
+       R3     3253     3235     3098      156   138       0     _  _  0  _  _  0   99     0  -1.356     
+       F3     3087     3105     3254      168   150       0     _  _  0  _  _  0   99     0  -1.336     
+       R3     3463     3439     3323      141   117       1     _  _ 98  _  _  1    0     1  -1.235     
+       F1     3550     3589     3699      150   111       0     0  _ 99  _  _  _    0     0  -1.281     
+       R3     3727     3712     3611      117   102       0     _  _ 99  _  _  0    0     0  -1.247     
+       R1     4038     4026     3916      123   111       0     _  _ 99  0  _  _    0     0  -1.376     
+       R2     4097     4091     3999       99    93       0     _  _ 99  _  0  _    0     0  -1.296     
+       F1     4009     4009     4110      102   102       0     0  _ 99  _  _  _    0     0  -1.341     
+       F1     4171     4189     4311      141   123       0     0  _ 99  _  0  0    0     0  -1.416     
+    3  F3     3258     3279     4397     1140  1119      99     _  _ 99  _  _  _    0   791  -1.201     
+       R3     4405     4300     4184      222   117       0     0  _ 99  _  0  0    0     0  -1.360     
+       R2     4481     4436     4323      159   114       0     _  _  _  _  0  _   99     0  -1.359     
+       F2     4454     4460     4549       96    90       0     _  0  _  _  _  _   99     0  -1.348     
+       R1     5025     4962     4801      225   162       0     _  0  _  0  _  _   99     0  -1.394     
+       F2     4790     4811     5026      237   216       0     _  0  _  0  _  _   99     0  -1.350     
+    4  F1     5038     5062     6156     1119  1095      99    99  _  _  _  _  _    0    99  -1.184     
+       R2     6386     6383     6216      171   168       0     0  _ 99  _  0  _    0     0  -1.345     
+       F1     6217     6292     6432      216   141       0     0  _ 99  _  _  _    0     0  -1.391     
+       R3     6487     6469     6338      150   132       1     _  _ 98  _  _  1    0     1  -1.268     
+    5  F3     6105     6156     6617      513   462      99     _  _ 99  _  _  _    0   395  -1.205     
+       R2     6722     6704     6600      123   105       0     0  _  _  _  0  _   99     0  -1.380     
+       F1     6601     6628     6723      123    96       0     0  _  _  _  0  0   99     0  -1.504     
+       R3     6880     6856     6743      138   114       0     _ 99  0  _  _  0    0     0  -1.541     
+       R3     7033     6988     6884      150   105       0     _ 99  _  _  _  0    0     0  -1.423     
+       F3     7062     7062     7187      126   126       0     _ 99  0  _  0  0    0     0  -1.379     
+       R2     7196     7067     6975      222    93       0     _ 99  _  _  0  _    0     0  -1.504     
+       R3     7267     7237     7037      231   201       0     _ 99  _  _  _  0    0     0  -1.426     
+       F3     7299     7299     7415      117   117       0     _ 99  0  _  _  _    0     0  -1.388     
+       R2     7502     7481     7389      114    93       0     _ 99  _  _  0  0    0     0  -1.329     
+       F3     7419     7425     7514       96    90       0     _ 98  0  1  _  0    0     0  -1.341     
+       R3     7525     7468     7337      189   132       0     _ 99  _  _  _  0    0     0  -1.333     
+       R1     7683     7524     7423      261   102       0     _ 96  _  0  _  0    2     1  -1.288     
+       F3     7896     7899     8036      141   138       0     _ 99  0  _  0  0    0     0  -1.479     
+       R3     8050     8002     7895      156   108       0     _ 99  0  _  0  0    0     0  -1.540     
+       R2     8147     8069     7875      273   195       0     _ 99  _  _  0  _    0     0  -1.384     
+       R3     8305     8293     8123      183   171       0     _ 99  _  _  _  0    0     0  -1.399     
+    6  F2     6674     6686     8602     1929  1917      99     _ 99  _  _  _  _    0  1481  -1.218     
+       R3     8986     8935     8819      168   117       0     _ 99  _  _  _  0    0     0  -1.463     
+       F3     9003     9003     9197      195   195       0     _ 99  0  _  _  _    0     0  -1.421     
+       R1     9246     9240     9142      105    99       0     _ 99  _  0  _  _    0     0  -1.309     
+       R3    10417    10360    10199      219   162       0     _ 99  _  _  _  0    0     0  -1.428     
+       R3    10576    10576    10421      156   156       0     _ 99  _  _  _  0    0     0  -1.380     
+       R1    10851    10809    10576      276   234       0     _ 99  _  0  _  _    0     0  -1.340     
+       R1    11097    11088    10999       99    90       0     _ 99  _  0  _  _    0     0  -1.362     
+    7  F2     8606     8624    11125     2520  2502      99     _ 99  _  _  _  _    0   792  -1.206     
+       R2    11399    11345    11205      195   141       0     _  _ 99  _  0  _    0     0  -1.459     
+       R3    11584    11536    11396      189   141       0     _  _ 99  _  _  0    0     0  -1.375     
+       R1    11742    11724    11635      108    90       0     _  0 99  0  0  _    0     0  -1.396     
+    8  F3    11118    11133    12119     1002   987      99     _  _ 99  _  _  _    0   396  -1.229     
+       F1    11974    11974    12129      156   156       0     0  _  _  _  _ 81   18     0  -1.379     
+    9  R3    12142    12130    11921      222   210      94     _  _  _  _  _ 94    5   175  -1.287     
+       R2    12149    12146    12012      138   135       0     _  _  _  _  0  _   99     0  -1.381     
+       R3    12469    12355    12167      303   189       0     _  _  0  _  _  0   99     0  -1.466     
+       F3    12123    12129    12470      348   342       6     _  _  6  _  _  _   93     6  -1.391     
+       R2    12494    12494    12309      186   186       0     _  _  _  _  0  _   99     0  -1.454     
+       F2    12422    12473    12565      144    93       6     _  6  _  _  _  _   93     6  -1.318     
+       R1    12651    12636    12541      111    96       0     _  _  1  0  _  _   98     0  -1.441     
+       F3    12477    12489    12662      186   174       0     _  _  0  _  _  _   99     1  -1.389     
+       R3    13084    13069    12977      108    93       0    99  _  _  _  0  0    0     0  -1.347     
+       R2    13169    13166    12972      198   195       0    99  _  _  _  0  _    0     0  -1.282     
+       F3    13176    13218    13313      138    96       0    99  _  0  _  _  _    0     0  -1.296     
+   10  F1    12670    12715    13449      780   735      99    99  _  _  _  _  _    0   396  -1.211     
+       R1    13470    13449    13294      177   156       0    99  _  _  0  _  _    0     0  -1.444     
+       R3    13627    13624    13535       93    90       0     _  _  _  _  _  0   99     0  -1.409     
+       R2    13691    13688    13593       99    96       0     _  _  _  _  0  _   99     0  -1.364     
+   11  F3    13656    13671    13838      183   168      99     _  _ 99  _  _  _    0    99  -1.293     
+       R2    13889    13871    13740      150   132       4     _  _  _  _  4  _   95     4  -1.327     
+       F1    13762    13774    13917      156   144       0     0  _  _  _  _  _   99     0  -1.376     
+       F2    13928    13949    14074      147   126       1     0  1  _ 98  _  _    0     1  -1.301     
+       F1    13921    13963    14088      168   126       0     0  _  _ 99  _  _    0     0  -1.425     
+       R2    14222    14192    14091      132   102       0     _  _  _ 84  0  _   15     0  -1.473     
+       F1    14128    14128    14223       96    96       0     0  _  0 98  0  _    1     0  -1.481     
+       F3    14103    14133    14330      228   198       0     _  _  0 99  _  _    0     0  -1.405     
+       F2    14216    14270    14362      147    93      38     0 38  _ 50  _  _   11    38  -1.336     
+       F1    14248    14296    14397      150   102       0     0  _  _ 96  _  _    3     0  -1.454     
+       R2    14603    14594    14439      165   156       0     _  _  _ 99  0  _    0     0  -1.305     
+   12  R1    14847    14805    13807     1041   999      99     _  _  _ 99  _  _    0   822  -1.279     
+       F2    15419    15440    15544      126   105       0     _  0  0  _ 94  _    5     0  -1.496     
+       R3    15682    15682    15530      153   153       0     _  _  0  _ 99  0    0     0  -1.381     
+       F2    15584    15599    15688      105    90       0     0  0  0  _ 99  _    0     0  -1.518     
+       F3    15210    15219    15746      537   528       0     _  _  0  _ 99  _    0     0  -1.512     
+   13  R2    15791    15776    15219      573   558      99     _  _  _  _ 99  _    0   490  -1.295     
+       F1    16213    16219    16326      114   108       0     0  _ 99  _  _  _    0     0  -1.297     
+       F2    16253    16268    16369      117   102       0     _  0 99  _  _  _    0     0  -1.351     
+       R3    17203    17101    17006      198    96       0     _  _ 98  _  _  0    0     0  -1.297     
+       F2    17198    17207    17335      138   129       0     _  0 99  0  _  _    0     0  -1.393     
+       R1    17370    17349    17158      213   192       0     _  _ 99  0  _  _    0     0  -1.339     
+   14  F3    15849    15855    17459     1611  1605      99     _  _ 99  _  _  _    0   593  -1.232     
+       R3    17677    17623    17483      195   141       0     _  _ 99  _  _  0    0     0  -1.339     
+       R2    18242    18137    18045      198    93       0     _  _ 99  _  0  _    0     0  -1.320     
+   15  F3    17463    17484    18374      912   891      99     _  _ 99  _  _  _    0   297  -1.206     
+       R2    18620    18554    18438      183   117       0     0  _  _ 99  0  _    0     0  -1.344     
+       F1    18439    18535    18633      195    99       0     0  _  _ 99  _  _    0     0  -1.318     
+       F2    18677    18719    18811      135    93       4     _  4  _ 95  _  0    0     4  -1.188     
+       F1    18808    18835    18927      120    93       0     0  _  _ 99  _  _    0     0  -1.497     
+       R2    19040    18995    18885      156   111       0     _  _  _ 99  0  _    0     0  -1.367     
+       F3    18978    18981    19124      147   144       0     _  _  0 99  _  _    0     0  -1.309     
+       R2    19235    19232    19044      192   189       0     _  _  _ 99  0  _    0     0  -1.301     
+       R3    19357    19357    19211      147   147       0     _  _  0 99  _  0    0     0  -1.360     
+       F3    19188    19269    19391      204   123       0     _  _  0 99  _  _    0     0  -1.269     
+   16  R1    19449    19407    18322     1128  1086      99     _  _  _ 99  _  _    0   986  -1.187     
+       F3    19443    19473    19571      129    99       0     _  _  0  _  _  _   99     0  -1.312     
+       R2    19631    19628    19527      105   102      23     _  _  _  _ 23  _   76    23  -1.268     
+       F2    19610    19616    19720      111   105      42     _ 42  _  _  _  _   57    42  -1.247     
+       R3    19873    19843    19718      156   126       0     _  _  0  _  _  0   99     0  -1.478     
+       F3    19719    19740    19895      177   156       0     _  _  0  _  _  _   99     0  -1.482     
+       F1    20041    20068    20175      135   108       0     0  _ 70  _  _ 28    1     0  -1.444     
+       R3    20185    20110    19976      210   135       0     _  _ 99  _  _  0    0    28  -1.372     
+   17  F3    19977    19980    20192      216   213      99     _  _ 99  _  _  _    0   268  -1.320     
+       F1    20179    20266    20364      186    99       0     0  0  _  _  _ 99    0     0  -1.365     
+       R1    20445    20418    20305      141   114       0     _  0  _  0  _ 99    0     0  -1.318     
+       F2    20261    20288    20446      186   159       0     _  0  _  _  _ 99    0     0  -1.376     
+       F1    20404    20413    20601      198   189       0     0  _  _  _  _ 99    0     0  -1.361     
+       F3    20469    20583    20756      288   174       0     _  _  0  _  _ 99    0     0  -1.274     
+   18  R3    20824    20749    20189      636   561      99     _  _  _  _  _ 99    0   594  -1.215     
+       F1    20752    20773    20910      159   138       4     4  2  _  _  _  _   92     4  -1.340     
+       R1    21078    21039    20842      237   198       0     _ 99  _  0  _  _    0     0  -1.243     
+       F3    21063    21069    21212      150   144       0     _ 99  0  _  _  _    0     0  -1.350     
+       R1    21597    21585    21496      102    90       0     _ 99  _  0  0  _    0     0  -1.313     
+       F1    21508    21520    21624      117   105       0     0 99  _  _  0  _    0     0  -1.361     
+   19  F2    20765    20801    21703      939   903      99     _ 99  _  _  _  _    0   497  -1.192     
+       R3    22108    22078    21914      195   165       0     _ 99  _  _  _  0    0     0  -1.325     
+       F3    21918    21945    22109      192   165       0     _ 99  0  _  _  0    0     0  -1.374     
+       F3    22113    22116    22238      126   123       0     _ 99  0  0  0  _    0     0  -1.273     
+       R2    22277    22232    22020      258   213       0     _ 99  _  _  0  _    0     0  -1.294     
+       R1    22320    22194    22081      240   114       0     _ 99  _  0  0  _    0     0  -1.237     
+   20  F2    21845    21851    22351      507   501      99     _ 99  _  _  _  _    0   594  -1.193     
+       R3    22465    22462    22241      225   222       0     _  _  _  _  _  0   99     0  -1.358     
+       R2    22490    22487    22398       93    90       0     _  _ 99  _  0  _    0     0  -1.415     
+       F1    22402    22420    22554      153   135       0     0  _ 99  _  _  _    0     0  -1.354     
+       R2    22598    22592    22494      105    99       0     _  _ 99  _  0  _    0     0  -1.425     
+       R3    22720    22681    22547      174   135       0     _  _ 99  _  _  0    0     0  -1.269     
+       F1    22657    22663    22776      120   114       0     0  _ 99  _  0  _    0     0  -1.353     
+   21  F3    22332    22359    23099      768   741      99     _  _ 99  _  _  _    0   594  -1.221     
+       R2    23501    23471    23271      231   201       0     _  _ 99  _  0  _    0     0  -1.351     
+   22  F3    23112    23118    23723      612   606      99     _  _ 99  _  _  _    0   198  -1.186     
+       R2    24008    24008    23910       99    99       0     _  _  _  _  0  _   99     0  -1.382     
+       F1    23920    23920    24033      114   114       0     0  _  _  _  _  _   99     0  -1.364     
+       F1    24220    24241    24330      111    90       0     0  _ 16  _  _  _   83     0  -1.424     
+       F2    24320    24365    24454      135    90       0     _  0 19  _  _  _   79     0  -1.403     
+       R1    24507    24507    24412       96    96       0     _  _ 99  0  _  _    0     0  -1.387     
+       F2    24467    24536    24637      171   102       0     0  0 99  _  _  _    0     0  -1.483     
+       R3    24673    24670    24548      126   123       5     0  _ 94  _  _  5    0     5  -1.270     
+       F1    24496    24562    24723      228   162       0     0  _ 99  _  _  _    0     0  -1.385     
+       R1    25095    25089    24967      129   123       0     _  _ 99  0  _  _    0     0  -1.450     
+       R2    25343    25304    25185      159   120       0     _  _ 99  _  0  _    0     0  -1.591     
+       F1    25225    25267    25362      138    96       0     0  0 99  _  _  _    0     0  -1.484     
+       F2    25208    25331    25444      237   114       0     _  0 99  _  _  _    0     0  -1.501     
+       F1    25774    25774    25863       90    90       0     0  0 99  0  _  _    0     0  -1.385     
+   23  F3    24150    24216    25895     1746  1680      99     _  _ 99  _  _  _    0  1020  -1.261     
+       R3    26158    26125    25970      189   156       0     _  _  _  _  _  0   99     0  -1.368     
+       F3    26055    26064    26216      162   153      83     _  _ 83  _  _  _   16    83  -1.262     
+       R2    26309    26285    26133      177   153       1     _  _  _  _  1  _   98     1  -1.294     
+       R1    26370    26328    26212      159   117       0     _  _  _  0  _  _   99     0  -1.327     
+       F3    26463    26592    26684      222    93      19     _  _ 19 14  _  7   59    19  -1.301     
+       R3    26698    26647    26477      222   171      84     _  _  0  _  _ 84   14    91  -1.290     
+       R1    26703    26697    26557      147   141       0     _  _  _  0  _ 92    7    14  -1.342     
+       F2    26672    26684    26890      219   207       0     _  0  _  _  _  _   99     0  -1.325     
+       F2    26945    26954    27067      123   114       2     _  2  _ 97  _  _    0     2  -1.253     
+   24  R1    27117    27090    26731      387   360      99     _  _  _ 99  _  _    0   196  -1.219     
+       R1    27276    27231    27142      135    90       0     _  _  _  0  _  _   99     0  -1.239     
+       R1    27405    27375    27280      126    96       4     _  _  _  4  _  _   95     4  -1.307     
+       R2    27743    27743    27654       90    90       0     _  _  _  _  0  _   99     0  -1.310     
+       F2    27833    27836    27934      102    99       0     _  0  _  _  _  _   99     0  -1.427     
+       F1    27904    27952    28092      189   141       7     7  _  _  _  _  _   92     7  -1.317     
+   25  F2    28001    28025    28141      141   117      95     _ 95  _  _  _  _    4    95  -1.284     
+       R1    28272    28203    28081      192   123      70     _  _  _ 70  _  _   29    70  -1.319     
+       F1    28240    28240    28362      123   123       0     0  _ 99  _  _  0    0     0  -1.434     
+       R3    28390    28375    28154      237   222       0     _  _ 99  _  _  0    0     0  -1.331     
+       R2    28544    28544    28383      162   162       0     _  _ 99  _  0  _    0     0  -1.403     
+       F1    28387    28417    28548      162   132       0     0  _ 99  _  _  _    0     0  -1.393     
+       F1    28591    28663    28791      201   129       0     0  _ 99  _  0  _    0     0  -1.460     
+       R2    28988    28940    28812      177   129       0     _  _ 99  _  0  0    0     0  -1.442     
+       R3    29005    28885    28766      240   120       0     _  _ 99  _  _  0    0     0  -1.343     
+       R1    29019    28977    28885      135    93       0     _  _ 99  0  0  0    0     0  -1.354     
+   26  F3    28155    28185    29195     1041  1011      99     _  _ 99  _  _  _    0   891  -1.249     
+       F1    28987    28993    29244      258   252       0     0  _  _  _  _  _   99     0  -1.434     
+       R2    29579    29561    29430      150   132       0    99  _  _  _  0  _    0     0  -1.207     
+       R1    29622    29616    29521      102    96       0    98  _  _  0  _  _    1     0  -1.321     
+       R3    29866    29836    29702      165   135       0    99  _  0  _  0  0    0     0  -1.290     
+   27  F1    29263    29263    29946      684   684      99    99  _  _  _  _  _    0   395  -1.187     
+End = 29940
+
+   Original Genes =    27
+  Potential Genes =    27
+        Avg Olaps =   0.1
+Potential Changes =     2
+Potential Rejects =     2
+     Sure Rejects =     2
+
+   Original Genes =    27
+  Potential Genes =    25
+        Avg Olaps =   0.0
+Potential Changes =     0
+Potential Rejects =     0
+     Sure Rejects =     0
+
+   Original Genes =    27
+  Potential Genes =    25
+        Avg Olaps =   0.0
+Potential Changes =     0
+Potential Rejects =     0
+     Sure Rejects =     0
+
+
+Putative Genes:
+    1      292     1620  [+1 L=1329 r=-1.219]
+    2     2349     2233  [-1 L= 117 r=-1.208]
+    3     3279     4397  [+3 L=1119 r=-1.201]
+    4     5062     6156  [+1 L=1095 r=-1.184]
+    5     6156     6617  [+3 L= 462 r=-1.205]
+    6     6686     8602  [+2 L=1917 r=-1.218]
+    7     8624    11125  [+2 L=2502 r=-1.206]
+    8    11133    12119  [+3 L= 987 r=-1.229]
+   10    12715    13449  [+1 L= 735 r=-1.211]
+   12    14805    13807  [-1 L= 999 r=-1.279]
+   13    15776    15219  [-3 L= 558 r=-1.295]
+   14    15855    17459  [+3 L=1605 r=-1.232]
+   15    17484    18374  [+3 L= 891 r=-1.206]
+   16    19407    18322  [-1 L=1086 r=-1.187]
+   17    19980    20192  [+3 L= 213 r=-1.320]
+   18    20749    20189  [-2 L= 561 r=-1.215]
+   19    20801    21703  [+2 L= 903 r=-1.192]
+   20    21851    22351  [+2 L= 501 r=-1.193]
+   21    22359    23099  [+3 L= 741 r=-1.221]
+   22    23118    23723  [+3 L= 606 r=-1.186]
+   23    24216    25895  [+3 L=1680 r=-1.261]
+   24    27090    26731  [-1 L= 360 r=-1.219]
+   25    28025    28141  [+2 L= 117 r=-1.284]
+   26    28185    29195  [+3 L=1011 r=-1.249]
+   27    29263        6  [+1 L= 684 r=-1.187]

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.detail
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.detail	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.detail	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+Command:  /bio/sw/glimmer3/bin/glimmer3 -o 50 -g 110 -t 30 ../BCTDNA Glimmer3.icm Glimmer3
+
+Sequence file = ../BCTDNA
+ICM model file = Glimmer3.icm
+Excluded regions file = none
+List of orfs file = none
+Truncated orfs = false
+Circular genome = true
+Minimum gene length = 110 bp
+Maximum overlap bases = 50
+Input is NOT separate orfs
+Threshold score = 30
+Use first start codon = false
+Start codons = atg,gtg,ttg
+Start probs = 0.600,0.300,0.100
+Stop codons = taa,tag,tga
+
+
+>BCTDNA
+Sequence length = 29940
+GC percentage = 33.3%
+Ignore independent score on orfs longer than 482
+
+           ----- Start -----           --- Length ----  ------------- Scores -------------
+ ID  Frame   of Orf  of Gene     Stop   of Orf of Gene      Raw InFrm F1 F2 F3 R1 R2 R3 NC
+0001    +1    29263    29263        9      684     684     9.68    99 99  -  -  -  -  -  0 1.00
+        -1      984      894      769      213     123    -9.85     0 99  -  -  0  -  -  0 1.00
+0002    +1      292      298     1623     1329    1323     8.26    99 99  -  -  -  -  -  0 1.00
+        +2     1592     1706     1840      246     132    -5.10     0  -  0  -  -  -  - 99 1.00
+        +3     1860     1881     2003      141     120    -2.68     0  - 75  0  -  0  - 23 1.00
+0003    +2     1841     1949     2098      255     147     6.59    99  - 99  -  -  0  -  0 1.00
+        -2     2152     1993     1862      288     129    -3.37     0  - 44  0  -  0  - 55 1.00
+        +3     2004     2040     2177      171     135    -1.77     0  -  -  0  -  -  - 99 1.00
+0004    -1     2385     2349     2230      153     117     4.69    97  -  -  - 97  -  -  2 1.00
+        -2     3253     3235     3095      156     138    -9.03     0  -  -  0  -  0  - 99 1.00
+        +3     3087     3126     3257      168     129    -9.09     0  -  -  0  -  0  - 99 1.00
+        -2     3463     3439     3320      141     117     7.18     1  -  - 98  -  1  -  0 1.00
+        +1     3550     3589     3702      150     111    -0.25     0  0  - 99  -  -  -  0 1.00
+        -1     4038     4026     3913      123     111   -10.13     0  -  - 99  0  -  -  0 1.00
+        +1     4171     4192     4314      141     120    -8.48     0  0  - 99  -  0  0  0 1.00
+0005    +3     3258     3279     4400     1140    1119    10.20    99  -  - 99  -  -  -  0 1.00
+        -2     4405     4300     4181      222     117    -0.45     0  0  - 99  -  0  0  0 1.00
+        -3     4481     4436     4320      159     114   -12.02     0  -  -  -  -  -  0 99 1.00
+        -1     5025     4962     4798      225     162    -6.25     0  -  0  -  0  -  - 99 1.00
+        +2     4790     4913     5029      237     114    -7.57     0  -  0  -  0  -  - 99 1.00
+0006    +1     5038     5062     6159     1119    1095     9.92    99 99  -  -  -  -  -  0 1.00
+        -3     6386     6368     6213      171     153    -1.23     0  0  - 99  -  -  0  0 1.00
+        +1     6217     6295     6435      216     138    -7.86     0  0  - 99  -  -  -  0 1.00
+        -2     6487     6469     6335      150     132     2.90     0  -  - 99  -  0  -  0 1.00
+0007    +3     6105     6156     6620      513     462    12.86    99  -  - 99  -  -  -  0 1.00
+        -2     6880     6856     6740      138     114    -9.49     0  - 99  0  -  0  -  0 1.00
+        +3     7062     7062     7190      126     126    -3.41     0  - 99  0  -  0  0  0 1.00
+        -2     7267     7162     7034      231     126    -8.57     0  - 99  -  -  0  0  0 1.00
+        +3     7299     7299     7418      117     117    -2.08     0  - 99  0  -  -  -  0 1.00
+        -2     7525     7468     7334      189     132    -0.14     0  - 99  -  -  0  -  0 1.00
+        +3     7896     7920     8039      141     117    -7.88     0  - 99  0  -  0  0  0 1.00
+        -3     8147     8069     7872      273     195    -0.71     0  - 99  -  -  -  0  0 1.00
+        -2     8305     8293     8120      183     171   -10.93     0  - 99  -  -  0  -  0 1.00
+0008    +2     6674     6686     8605     1929    1917    12.34    99  - 99  -  -  -  -  0 1.00
+        -2     8986     8929     8816      168     111    -9.33     0  - 99  -  -  0  -  0 1.00
+        +3     9003     9084     9200      195     114   -11.31     0  - 99  0  -  -  -  0 1.00
+        -2    10417    10315    10196      219     117    -9.22     0  - 99  -  -  0  -  0 1.00
+        -2    10576    10576    10418      156     156    -7.32     0  - 99  -  -  0  -  0 1.00
+        -1    10851    10728    10573      276     153     1.00     0  - 99  -  0  -  -  0 1.00
+0009    +2     8606     8624    11128     2520    2502    12.77    99  - 99  -  -  -  -  0 1.00
+        -3    11399    11345    11202      195     141    -9.23     0  -  - 99  -  -  0  0 1.00
+        -2    11584    11536    11393      189     141    -3.32     0  -  - 99  -  0  -  0 1.00
+0010    +3    11118    11133    12122     1002     987    11.55    99  -  - 99  -  -  -  0 1.00
+        +1    11974    11974    12132      156     156    -4.16     0  0  -  -  - 54  - 45 1.00
+0011    -2    12142    12130    11918      222     210     2.27    92  -  -  -  - 92  -  7 1.00
+        -3    12149    12143    12009      138     132    -5.42     0  -  -  -  - 56  0 43 1.00
+        -2    12469    12355    12164      303     189    -7.16     0  -  - 20  -  0  - 79 1.00
+0012    +3    12123    12273    12473      348     198    10.08    99  -  - 99  -  0  -  0 1.00
+        -3    12494    12470    12306      186     162    -4.46     0  -  - 99  -  0  0  0 1.00
+        +3    12477    12507    12665      186     156    -5.68     0  -  -  0  -  -  - 99 1.00
+        -3    13169    13166    12969      198     195     2.54     0 99  -  -  -  -  0  0 1.00
+0013    +1    12670    12715    13452      780     735    11.22    99 99  -  -  -  -  -  0 1.00
+        -1    13470    13449    13291      177     156    -7.69     0 99  -  -  0  -  -  0 1.00
+0014    +3    13656    13722    13841      183     117     4.01    92  -  - 92  -  -  -  7 1.00
+        -3    13889    13871    13737      150     132    -1.62     1  -  -  -  -  -  1 98 1.00
+        +1    13762    13774    13920      156     144    -3.79     0  0  -  -  -  -  - 99 1.00
+        +2    13928    13949    14077      147     126     3.00     0  0  0  - 99  -  -  0 1.00
+        +1    13921    13963    14091      168     126   -10.74     0  0  -  - 99  -  -  0 1.00
+        +3    14103    14142    14333      228     189    -3.47     0  -  -  0 99  -  -  0 1.00
+        -3    14603    14594    14436      165     156    -3.77     0  -  -  - 99  -  0  0 1.00
+0015    -1    14847    14805    13804     1041     999     5.49    99  -  -  - 99  -  -  0 1.00
+        -2    15682    15676    15527      153     147    -1.39     0  -  -  0  -  0 99  0 1.00
+0016    +3    15210    15219    15749      537     528   -12.48     0  0  -  0  -  - 99  0 1.00
+0017    -3    15791    15776    15216      573     558    11.35    99  -  -  -  -  - 99  0 1.00
+        +2    17198    17207    17338      138     129    -3.68     0  -  0 99  0  -  -  0 1.00
+        -1    17370    17271    17155      213     114     3.30     0  -  - 99  0  -  -  0 1.00
+0018    +3    15849    15948    17462     1611    1512     9.36    99  -  - 99  -  -  -  0 1.00
+        -2    17677    17623    17480      195     141    -1.26     1  -  - 34  -  1  - 64 1.00
+0019    +3    17463    17484    18377      912     891     6.43    99  -  - 99  -  -  -  0 1.00
+        -3    18620    18554    18435      183     117    -5.09     0  0  -  - 99  -  0  0 1.00
+        -3    19040    18995    18882      156     111    -2.90     0  -  -  - 99  -  0  0 1.00
+        +3    18978    19014    19127      147     111     1.67     0  -  -  0 99  -  -  0 1.00
+        -3    19235    19211    19041      192     168     1.29     0  -  -  - 99  -  0  0 1.00
+        -2    19357    19357    19208      147     147    -3.27     0  -  -  0 99  0  -  0 1.00
+        +3    19188    19269    19394      204     123    -2.25     0  -  -  0 98  -  -  1 1.00
+0020    -1    19449    19407    18319     1128    1086     7.49    99  -  -  - 99  -  -  0 1.00
+        -2    19873    19843    19715      156     126    -8.77     0  -  -  0  -  0  - 99 1.00
+        +3    19719    19740    19898      177     156    -7.39     0  -  -  0  -  -  - 99 1.00
+        -2    20185    20110    19973      210     135    -0.77     0  -  - 98  -  0  -  1 1.00
+0021    +3    19977    19980    20195      216     213     3.39    99  -  - 99  -  -  -  0 1.00
+        -1    20445    20418    20302      141     114    -3.04     0  -  0  -  0 99  -  0 1.00
+        +2    20261    20309    20449      186     138    -4.98     0  -  0  -  0 99  -  0 1.00
+        +1    20404    20413    20604      198     189    -5.81     0  0  -  -  - 99  -  0 1.00
+        +3    20469    20583    20759      288     174     2.71     0  -  -  0  - 99  -  0 1.00
+0022    -2    20824    20749    20186      636     561     9.10    99  -  -  -  - 99  -  0 1.00
+        +1    20752    20773    20913      159     138    -3.20     0  0  0  -  -  -  - 99 1.00
+        -1    21078    21039    20839      237     198     4.53     0  - 99  -  0  -  -  0 1.00
+        +3    21063    21069    21215      150     144    -2.93     0  - 99  0  -  -  -  0 1.00
+0023    +2    20765    20810    21706      939     894    11.73    99  - 99  -  -  -  -  0 1.00
+        -2    22108    22051    21911      195     138    -4.96     0  - 99  -  -  0  -  0 1.00
+        +3    21918    21945    22112      192     165    -9.58     0  - 99  0  -  0  -  0 1.00
+        +3    22113    22116    22241      126     123    -1.18     0  - 99  0  0  -  0  0 1.00
+        -3    22277    22187    22017      258     168     3.69     0  - 99  -  -  -  0  0 1.00
+        -1    22320    22194    22078      240     114     8.28     0  - 99  -  0  -  0  0 1.00
+0024    +2    21845    21851    22354      507     501    10.64    99  - 99  -  -  -  -  0 1.00
+        -2    22465    22462    22238      225     222    -1.02     1  -  -  -  -  1  - 98 1.00
+        +1    22402    22420    22557      153     135     0.60     0  0  - 99  -  -  -  0 1.00
+        -2    22720    22681    22544      174     135     5.32     0  -  - 99  -  0  -  0 1.00
+        +1    22657    22663    22779      120     114    -5.87     0  0  - 99  -  -  0  0 1.00
+0025    +3    22332    22359    23102      768     741    14.02    99  -  - 99  -  -  -  0 1.00
+        -3    23501    23432    23268      231     162    -6.68     0  -  - 99  -  -  0  0 1.00
+0026    +3    23112    23118    23726      612     606    11.58    99  -  - 99  -  -  -  0 1.00
+        +1    23920    23920    24036      114     114    -9.23     0  0  -  -  -  -  - 99 1.00
+0027    -2    24673    24670    24545      126     123     9.42    38  0  - 61  - 38  -  0 1.00
+        +1    24496    24586    24726      228     138    -4.39     0  0  - 99  -  -  -  0 1.00
+        -1    25095    25089    24964      129     123   -11.50     0  -  - 99  0  -  -  0 1.00
+        -3    25343    25304    25182      159     120   -19.97     0  -  - 99  -  -  0  0 1.00
+        +2    25208    25331    25447      237     114   -11.98     0  -  0 99  -  -  -  0 1.00
+0028    +3    24150    24216    25898     1746    1680    11.26    99  -  - 99  -  -  -  0 1.00
+        -2    26158    26125    25967      189     156    -6.07     0  -  -  -  -  0  - 99 1.00
+0029    +3    26055    26103    26219      162     114     2.53    65  -  - 65  -  -  - 34 1.00
+        -3    26309    26249    26130      177     117    -1.20     3  -  -  -  -  -  3 96 1.00
+        -1    26370    26328    26209      159     117    -2.70     0  -  -  -  0  -  - 99 1.00
+0030    -2    26698    26593    26474      222     117     4.01    91  -  -  0  - 91  -  8 1.00
+        -1    26703    26697    26554      147     141    -3.06     0  -  -  -  0  6  - 93 1.00
+        +2    26672    26684    26893      219     207    -1.12     1  -  1  -  -  -  - 98 1.00
+        +2    26945    26954    27070      123     114     6.59     0  -  0  - 99  -  -  0 1.00
+0031    -1    27117    27090    26728      387     360    10.98    99  -  -  - 99  -  -  0 1.00
+        +1    27904    27952    28095      189     141    -0.69     4  4  -  -  -  -  - 95 1.00
+0032    +2    28001    28025    28144      141     117     1.77    53  - 53  -  -  -  - 46 1.00
+0033    -1    28272    28203    28078      192     123     1.32    35  -  -  - 35  -  - 64 1.00
+        +1    28240    28240    28365      123     123    -9.11     0  0  - 99  -  0  -  0 1.00
+0034    -2    28390    28276    28151      237     123     6.20    93  -  -  5  - 93  -  0 1.00
+        -3    28544    28502    28380      162     120    -5.88     0  -  - 99  -  -  0  0 1.00
+        +1    28387    28417    28551      162     132    -3.22     0  0  - 71  -  -  - 28 1.00
+        +1    28591    28663    28794      201     129    -6.01     0  0  - 99  -  -  0  0 1.00
+        -3    28988    28940    28809      177     129    -9.43     0  -  - 99  -  0  0  0 1.00
+        -2    29005    28885    28763      240     120     4.66     0  -  - 99  -  0  -  0 1.00
+0035    +3    28155    28188    29198     1041    1008     7.00    99  -  - 99  -  -  -  0 1.00
+        +1    28987    29119    29247      258     126    -7.99     0  0  -  -  -  -  - 99 1.00
+        -3    29579    29561    29427      150     132     3.22     0 99  -  -  -  -  0  0 1.00
+        -2    29866    29836    29699      165     135     0.42     0 99  -  0  -  0  0  0 1.00

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.predict
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.predict	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Glimmer3.predict	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+>BCTDNA
+orf00001    29263        9  +1     9.60
+orf00002      298     1623  +1     8.09
+orf00003     1856     2098  +2     3.34
+orf00004     2349     2230  -1     2.72
+orf00005     3279     4400  +3     9.99
+orf00006     5062     6159  +1     9.87
+orf00007     6156     6620  +3    12.71
+orf00008     6686     8605  +2    12.31
+orf00009     8624    11128  +2    12.75
+orf00010    11133    12122  +3    11.50
+orf00012    12273    12473  +3     8.92
+orf00013    12715    13452  +1    11.15
+orf00015    14781    13804  -1     5.51
+orf00017    15776    15216  -3    11.26
+orf00018    15948    17462  +3     9.28
+orf00020    19407    18319  -1     7.45
+orf00021    19980    20195  +3     3.15
+orf00022    20749    20186  -2     9.01
+orf00023    20810    21706  +2    11.68
+orf00024    21851    22354  +2    10.53
+orf00025    22359    23102  +3    13.70
+orf00026    23118    23726  +3    11.50
+orf00028    24216    25898  +3    11.23
+orf00029    26103    26219  +3     2.08
+orf00030    26593    26474  -2     3.57
+orf00031    27090    26728  -1    10.84
+orf00035    28185    29198  +3     6.81

Added: trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerHMM.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerHMM.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerHMM.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+GlimmerHMM
+Sequence name: gi|23613028|ref|NC_004326.1|
+Sequence length: 69930 bp
+
+Predicted genes/exons
+
+Gene Exon Strand  Exon            Exon Range      Exon
+   #    #         Type                           Length
+
+
+   1    1  +  Terminal       2235       2284       50
+
+   2    1  -  Terminal       2324       2340       17
+   2    2  -  Initial        2413       2434       22
+
+   3    1  +  Initial        2461       2667      207
+   3    2  +  Internal       2937       2989       53
+   3    3  +  Internal       4203       4216       14
+   3    4  +  Terminal       4587       4627       41
+
+   4    1  +  Initial        4628       4669       42
+   4    2  +  Terminal       5063       5083       21
+
+   5    1  -  Terminal       5095       5102        8
+   5    2  -  Initial        5108       5129       22
+
+   6    1  -  Single         5138       5152       15
+
+   7    1  -  Single         5158       5205       48
+
+   8    1  +  Single         5214       5252       39
+
+   9    1  +  Initial        5262       5469      208
+   9    2  +  Terminal       5538       5647      110
+
+  10    1  +  Initial        5663       5665        3
+  10    2  +  Terminal       5712       5942      231
+
+  11    1  +  Initial        5952       6159      208
+  11    2  +  Terminal       6228       6337      110
+
+  12    1  +  Initial        6353       6355        3
+  12    2  +  Internal       6402       6431       30
+  12    3  +  Internal       6463       6489       27
+  12    4  +  Internal       6563       6707      145
+  12    5  +  Internal       6911       6960       50
+  12    6  +  Internal       7252       7399      148
+  12    7  +  Internal       7672       7860      189
+  12    8  +  Internal       7915       7967       53
+  12    9  +  Internal       8030       8198      169
+  12   10  +  Terminal       8204       8322      119
+
+  13    1  +  Initial        8329       8385       57
+  13    2  +  Internal       8721       8748       28
+  13    3  +  Internal       8820       8976      157
+  13    4  +  Terminal      11310      11361       52
+
+  14    1  -  Single        11364      11375       12
+
+  15    1  -  Single        11378      11386        9
+
+  16    1  -  Terminal      11411      11413        3
+  16    2  -  Initial       11522      11533       12
+
+  17    1  +  Initial       11541      11595       55
+  17    2  +  Internal      11926      11967       42
+  17    3  +  Internal      12472      12483       12
+  17    4  +  Terminal      12641      12921      281
+
+  18    1  +  Initial       12928      13035      108
+  18    2  +  Internal      13334      13350       17
+  18    3  +  Internal      13376      13383        8
+  18    4  +  Internal      13397      13455       59
+  18    5  +  Internal      13481      13497       17
+  18    6  +  Internal      13502      13530       29
+  18    7  +  Internal      13603      13615       13
+  18    8  +  Internal      13649      13656        8
+  18    9  +  Internal      13776      13960      185
+  18   10  +  Internal      13965      13972        8
+  18   11  +  Internal      14173      14189       17
+  18   12  +  Internal      14236      14252       17
+  18   13  +  Internal      14277      14293       17
+  18   14  +  Internal      14487      14515       29
+  18   15  +  Internal      14844      14860       17
+  18   16  +  Internal      15281      15309       29
+  18   17  +  Internal      15470      15477        8
+  18   18  +  Internal      15743      15750        8
+  18   19  +  Internal      15764      15771        8
+  18   20  +  Internal      15785      15792        8
+  18   21  +  Internal      16122      16128        7
+  18   22  +  Internal      16478      16506       29
+  18   23  +  Terminal      17424      17662      239
+
+  19    1  +  Initial       17669      17741       73
+  19    2  +  Internal      17907      18091      185
+  19    3  +  Internal      18139      18146        8
+  19    4  +  Internal      18159      18237       79
+  19    5  +  Internal      18275      18292       18
+  19    6  +  Internal      18431      18532      102
+  19    7  +  Internal      18813      18839       27
+  19    8  +  Internal      18933      18943       11
+  19    9  +  Internal      19248      19269       22
+  19   10  +  Internal      19423      19486       64
+  19   11  +  Internal      19520      19551       32
+  19   12  +  Internal      19671      19744       74
+  19   13  +  Internal      19799      19944      146
+  19   14  +  Internal      20356      20421       66
+  19   15  +  Internal      20912      26376     5465
+  19   16  +  Internal      27215      28172      958
+  19   17  +  Internal      28378      28381        4
+  19   18  +  Internal      28440      28617      178
+  19   19  +  Internal      28980      29244      265
+  19   20  +  Internal      29259      29330       72
+  19   21  +  Internal      30135      30187       53
+  19   22  +  Terminal      30442      30486       45
+
+  20    1  -  Terminal      30483      30503       21
+  20    2  -  Initial       30573      30587       15
+
+  21    1  +  Single        30600      30701      102
+
+  22    1  -  Single        30707      30715        9
+
+  23    1  +  Initial       30723      30768       46
+  23    2  +  Internal      30950      30951        2
+  23    3  +  Internal      31436      31516       81
+  23    4  +  Internal      31862      31887       26
+  23    5  +  Internal      31902      31915       14
+  23    6  +  Internal      32005      32075       71
+  23    7  +  Terminal      32357      32401       45
+
+  24    1  -  Single        32408      32413        6
+
+  25    1  -  Single        32450      32470       21
+
+  26    1  +  Initial       32485      32526       42
+  26    2  +  Internal      32809      32817        9
+  26    3  +  Internal      33416      33470       55
+  26    4  +  Terminal      33601      33956      356
+
+  27    1  +  Initial       33967      34133      167
+  27    2  +  Internal      34240      34252       13
+  27    3  +  Internal      34368      34389       22
+  27    4  +  Internal      34917      34929       13
+  27    5  +  Internal      35211      35289       79
+  27    6  +  Internal      35408      35416        9
+  27    7  +  Internal      35421      35516       96
+  27    8  +  Internal      35827      35964      138
+  27    9  +  Internal      36797      36800        4
+  27   10  +  Internal      36871      36886       16
+  27   11  +  Internal      37250      37352      103
+  27   12  +  Internal      38015      38087       73
+  27   13  +  Internal      38193      38242       50
+  27   14  +  Internal      38538      38540        3
+  27   15  +  Internal      38865      38911       47
+  27   16  +  Internal      38966      38992       27
+  27   17  +  Internal      39553      39567       15
+  27   18  +  Internal      39607      39728      122
+  27   19  +  Internal      39785      39797       13
+  27   20  +  Internal      40060      40090       31
+  27   21  +  Internal      40686      40694        9
+  27   22  +  Internal      41048      41114       67
+  27   23  +  Terminal      41274      41317       44
+
+  28    1  +  Initial       41321      41383       63
+  28    2  +  Internal      41895      41924       30
+  28    3  +  Internal      42145      42158       14
+  28    4  +  Terminal      42273      42366       94
+
+  29    1  +  Initial       42378      42407       30
+  29    2  +  Terminal      43154      43186       33
+
+  30    1  -  Single        43188      43208       21
+
+  31    1  -  Terminal      43211      43217        7
+  31    2  -  Initial       43239      43255       17
+
+  32    1  -  Terminal      43260      43358       99
+  32    2  -  Initial       43372      43650      279
+
+  33    1  +  Initial       43659      43669       11
+  33    2  +  Internal      43827      44543      717
+  33    3  +  Internal      45352      45895      544
+  33    4  +  Internal      47453      47470       18
+  33    5  +  Internal      47477      47501       25
+  33    6  +  Internal      47897      47903        7
+  33    7  +  Internal      48413      48433       21
+  33    8  +  Internal      48698      48711       14
+  33    9  +  Internal      49405      49430       26
+  33   10  +  Internal      49635      49678       44
+  33   11  +  Internal      50184      50212       29
+  33   12  +  Internal      50480      50504       25
+  33   13  +  Internal      50871      50875        5
+  33   14  +  Internal      51003      51079       77
+  33   15  +  Internal      51396      51416       21
+  33   16  +  Internal      51762      51808       47
+  33   17  +  Internal      52597      52652       56
+  33   18  +  Internal      52671      52672        2
+  33   19  +  Internal      53093      53095        3
+  33   20  +  Internal      53262      53308       47
+  33   21  +  Internal      53469      53531       63
+  33   22  +  Internal      55819      56055      237
+  33   23  +  Internal      57050      57132       83
+  33   24  +  Terminal      57138      57181       44
+
+  34    1  +  Single        57193      57207       15
+
+  35    1  +  Single        57208      57216        9
+
+  36    1  +  Initial       57240      57338       99
+  36    2  +  Internal      57510      57543       34
+  36    3  +  Internal      57950      58055      106
+  36    4  +  Terminal      58384      58390        7
+
+  37    1  +  Initial       58406      58463       58
+  37    2  +  Internal      58550      58656      107
+  37    3  +  Internal      59051      59123       73
+  37    4  +  Internal      59168      59226       59
+  37    5  +  Internal      59407      59451       45
+  37    6  +  Internal      59627      59649       23
+  37    7  +  Internal      59680      59705       26
+  37    8  +  Internal      59968      59977       10
+  37    9  +  Internal      60747      60750        4
+  37   10  +  Internal      60942      60947        6
+  37   11  +  Internal      61442      61464       23
+  37   12  +  Internal      61991      62137      147
+  37   13  +  Internal      62413      62425       13
+  37   14  +  Internal      62508      62522       15
+  37   15  +  Terminal      62830      62838        9
+
+  38    1  +  Initial       62839      62850       12
+  38    2  +  Internal      63403      63410        8
+  38    3  +  Internal      63916      63931       16
+  38    4  +  Terminal      63992      64009       18
+
+  39    1  -  Single        64040      64066       27
+
+  40    1  +  Single        64123      64137       15
+
+  41    1  +  Initial       64145      64179       35
+  41    2  +  Internal      64194      64266       73
+  41    3  +  Internal      64332      65390     1059
+  41    4  +  Internal      65945      65955       11
+  41    5  +  Internal      66065      66070        6
+  41    6  +  Internal      66469      66485       17
+  41    7  +  Terminal      66697      66704        8
+
+  42    1  +  Initial       66704      66716       13
+  42    2  +  Internal      66964      67005       42
+  42    3  +  Internal      68146      68190       45
+  42    4  +  Internal      68458      68600      143
+  42    5  +  Terminal      68895      68924       30
+
+  43    1  +  Initial       68930      69670      741
+  43    2  +  Terminal      69839      69868       30
+
+  44    1  -  Terminal      69866      69929       64

Added: trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerM.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerM.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/GlimmerM.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+GlimmerM (Version 3.0)
+Sequence name: gi|23613028|ref|NC_004326.1|
+Sequence length: 69930 bp
+
+Predicted genes/exons
+
+Gene Exon Strand  Exon            Exon Range      Exon
+   #    #         Type                           Length
+
+   1    1  -  Single          461        523       63
+
+   2    1  +  Initial        1095       1103        9
+   2    2  +  Internal       1167       1208       42
+   2    3  +  Terminal       1404       1445       42
+
+   3    1  -  Single         7055       7162      108
+
+   4    1  +  Initial        7273       7356       84
+   4    2  +  Internal       7434       7598      165
+   4    3  +  Internal       7861       7898       38
+   4    4  +  Terminal       7957       7975       19
+
+   5    1  +  Initial       20929      23815     2887
+   5    2  +  Terminal      23910      23956       47
+
+   6    1  +  Initial       24046      26310     2265
+   6    2  +  Internal      27215      28399     1185
+   6    3  +  Internal      28848      28855        8
+   6    4  +  Internal      29246      29309       64
+   6    5  +  Terminal      29502      29516       15
+
+   7    2  -  Terminal      29593      29600        8
+   7    1  -  Initial       29919      30636      718
+
+   8    8  -  Terminal      30737      30744        8
+   8    7  -  Internal      31124      31150       27
+   8    6  -  Internal      31814      31883       70
+   8    5  -  Internal      32469      32515       47
+   8    4  -  Internal      33199      34126      928
+   8    3  -  Internal      34647      34688       42
+   8    2  -  Internal      35216      35334      119
+   8    1  -  Initial       36613      36853      241
+
+   9    1  +  Initial       36912      37101      190
+   9    2  +  Internal      37285      37300       16
+   9    3  +  Terminal      37999      38005        7
+
+  10    5  -  Terminal      42047      42104       58
+  10    4  -  Internal      42781      42795       15
+  10    3  -  Internal      42864      42888       25
+  10    2  -  Internal      43211      43217        7
+  10    1  -  Initial       43372      46329     2958
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.FASTA
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.FASTA	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.FASTA	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2549 @@
+ FASTA searches a protein or DNA sequence data bank
+ version 3.3t08 Jan. 17, 2001
+Please cite:
+ W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+
+t/data/HUMBETGLOA.fasta: 3002 nt
+ >HUMBETGLOA Human haplotype C4 beta-globin gene, complete cds. 
+ vs  /data_2/jason/blastdb/dros_clones.2.5 library
+searching /data_2/jason/blastdb/dros_clones.2.5 library
+
+       opt      E()
+< 20     4     0:==
+  22     4     0:==         one = represents 2 library sequences
+  24    11     0:======
+  26     9     0:=====
+  28    37     0:===================
+  30    48     3:=*======================
+  32    43    10:====*=================
+  34    56    27:=============*==============
+  36    53    56:===========================*
+  38    67    93:==================================            *
+  40    81   130:=========================================                  *
+  42    81   159:=========================================                  *
+  44    98   175:=================================================          *
+  46    86   178:===========================================                *
+  48    77   171:=======================================                    *
+  50    94   156:===============================================            *
+  52    93   137:===============================================            *
+  54    70   117:===================================                       *
+  56   108    98:================================================*=====
+  58    84    80:=======================================*==
+  60    70    65:================================*==
+  62    79    52:=========================*==============
+  64    51    41:====================*=====
+  66    38    33:================*==
+  68    51    26:============*=============
+  70    48    20:=========*==============
+  72    41    16:=======*=============
+  74    30    12:=====*=========
+  76    45    10:====*==================
+  78    42     7:===*=================
+  80    30     6:==*============
+  82    30     4:=*=============
+  84    21     4:=*=========
+  86    15     3:=*======
+  88    14     2:*======
+  90    12     2:*=====
+  92    23     1:*===========
+  94    16     1:*=======
+  96    13     1:*======
+  98     7     1:*===
+ 100     7     0:====
+ 102     7     0:====
+ 104    11     0:======
+ 106    10     0:=====
+ 108     8     0:====
+ 110     4     0:==
+ 112     8     0:====
+ 114     2     0:=
+ 116     0     0:
+ 118     5     0:===
+>120     9     0:=====
+112936249 residues in   657 sequences
+  Expectation_n fit: rho(ln(x))= 10.8578+/-0.0125; mu= 7.5547+/- 1.409
+ mean_var=242.9513+/-133.277, 0's: 0 Z-trim: 52  B-trim: 0 in 0/23
+ Lambda= 0.0823
+ Kolmogorov-Smirnov  statistic: 0.2080 (N=29) at  54
+
+FASTA (3.36 June 2000) function [optimized, +5/-4 matrix (5:-4)] ktup: 6
+ join: 82, opt: 67, gap-pen: -16/ -4, width:  16
+ Scan time: 78.670
+The best scores are:                                       opt bits E(1899)
+BACR21I23 : AC009391, 189254 bases, from 3R:10 (73982) [r]  261   44   0.017
+BACR40P19 : AC010667, 159570 bases, from 4:101 (73982) [f]  261   44   0.017
+BACR30L17 : AC012390, 174441 bases, from 4:101 (32481) [f]  261   44   0.018
+BACR44L03 : 172190 bases, from 4:101.          (73982) [r]  256   44   0.026
+BACH57F14 : 103809 bases, from 4:101.          (32829) [r]  254   43   0.033
+BACR19J06 : AC010581, 175053 bases, from 3R:85 (73982) [r]  251   43   0.039
+BACR19J06 : AC010581, 175053 bases, from 3R:85 (73982) [f]  242   42   0.081
+BACR44L03 : 172190 bases, from 4:101.          (73982) [f]  241   42   0.088
+BACR42I20 : AC008195, 164944 bases, from 3R:93 (73982) [f]  238   41    0.11
+BACR32K23BACR03G18 : 350380 bases, from X:14.  (73982) [f]  237   41    0.12
+BACR12J05 : AC011697, 203853 bases, from X:12. (61893) [r]  237   41    0.12
+BACR30J04 : AC008338, 177277 bases, from X:19. (73982) [r]  235   41    0.14
+BACR10C18 : AC011705, 170975 bases, from X:19. (73982) [r]  235   41    0.14
+BACR32M04 : AC008234, 148847 bases, from 3R:85 (73982) [f]  229   40    0.24
+BACR43K14 : AC008356, 162593 bases, from 3R:85 (20633) [f]  229   40    0.27
+BACR39E09 : AC009255, 163990 bases, from 2R:42 (73982) [r]  227   40    0.28
+BACR13P06 : AC008339, 180296 bases, from 2R:42 (73982) [r]  227   40    0.28
+BACR18C01 : AC007808, 167195 bases, from 3R:88 (73982) [f]  226   40     0.3
+BACH59K20 : AC010840, 29516 bases, from 4:101. (29516) [r]  227   40    0.31
+BACR20B21 : AC099010, 166512 bases, from 2L:27 (73982) [r]  225   40    0.33
+BACR13A13 : AC018484, 172748 bases, from 2L:40 (73982) [f]  225   40    0.33
+BACR48D17 : AC008255, 183316 bases, from X:13. (73982) [r]  225   40    0.33
+BACR06H06 : 186935 bases, from 2L:40.          (44975) [f]  225   40    0.35
+BACR22N03 : AC092397, 167201 bases, from 2L:26 (25241) [f]  225   40    0.37
+BACR37K05 : AC011253, 197597 bases, from 3R:84 (55637) [f]  223   40     0.4
+BACR30L17 : AC012390, 174441 bases, from 4:101 (73982) [r]  221   39    0.46
+BACR01O16 : AC093045, 189904 bases, from 2L:29 (73982) [f]  221   39    0.46
+BACR14D22 : AC012164, 169384 bases, from X:17. (73982) [f]  221   39    0.46
+BACR08A11 : 170075 bases, from 2R:42.          (73982) [f]  221   39    0.46
+BACR25O02 : AC018490, 165098 bases, from X:17. (73982) [r]  221   39    0.46
+BACR02B16 : AC008331, 186938 bases, from 2L:29 (44978) [f]  221   39    0.48
+BACR13P06 : AC008339, 180296 bases, from 2R:42 (38336) [f]  221   39    0.49
+BACR11M14 : AC099308, 173101 bases, from X:19. (73982) [f]  219   39    0.54
+BACR15O02 : AC009735, 159478 bases, from 3R:82 (73982) [r]  219   39    0.54
+BACR21E16 : AC009905, 170662 bases, from 2L:36 (73982) [r]  218   39    0.58
+BACR48D17 : AC008255, 183316 bases, from X:13. (73982) [f]  218   39    0.58
+BACR15O02 : AC009735, 159478 bases, from 3R:82 (73982) [f]  217   39    0.63
+BACR30G03 : AC008355, 151500 bases, from 3R:82 (73982) [f]  217   39    0.63
+BACR13P06 : AC008339, 180296 bases, from 2R:42 (73982) [r]  216   39    0.69
+BACR22H11 : AC013431, 191558 bases, from X:13. (73982) [r]  216   39    0.69
+BACR25C18 : AC092401, 185200 bases, from X:13. (73982) [r]  216   39    0.69
+BACR25O02 : AC018490, 165098 bases, from X:17. (73982) [f]  216   39    0.69
+BACR11C03 : AC008220, 186549 bases, from 3R:10 (73982) [f]  216   39    0.69
+BACR06P08 : AC007975, 180630 bases, from 3R:10 (38670) [f]  216   39    0.74
+BACR29F06 : AC008203, 174728 bases, from 3R:95 (73982) [r]  215   39    0.75
+BACR48E23 : AC005721, 203183 bases, from 3R:88 (73982) [r]  215   39    0.75
+BACR22N03 : AC092397, 167201 bases, from 2L:26 (73982) [r]  215   39    0.75
+BACR08H11 : AC008136, 154890 bases, from 3R:88 (73982) [r]  215   39    0.75
+BACR15J11 : AC008202, 186159 bases, from 3R:95 (44199) [r]  215   39    0.79
+BACR14N05 : AC099033, 170299 bases, from 2R:55 (73982) [f]  214   39    0.81
+BACR48A20 : AC011072, 166158 bases, from 2R:55 (73982) [f]  214   39    0.81
+BACR19D23 : AC095014, 170801 bases, from 3R:84 (73982) [r]  214   39    0.81
+BACR19J06 : AC010581, 175053 bases, from 3R:85 (73982) [f]  214   39    0.81
+BACR11B14 : 174999 bases, from 2R:41.          (73982) [r]  213   38    0.88
+BACR06P07 : 187793 bases, from 2R:41.          (45833) [f]  213   38    0.93
+BACR19J06 : AC010581, 175053 bases, from 3R:85 (73982) [r]  212   38    0.96
+BACR25C18 : AC092401, 185200 bases, from X:13. (73982) [f]  210   38     1.1
+BACR22N03 : AC092397, 167201 bases, from 2L:26 (73982) [f]  210   38     1.1
+BACR03L12 : AC008357, 190672 bases, from 3R:86 (73982) [r]  210   38     1.1
+BACR10E07 : AC012675, 176818 bases, from 2L:27 (73982) [f]  210   38     1.1
+BACR22H11 : AC013431, 191558 bases, from X:13. (73982) [f]  210   38     1.1
+BACR19I14 : AC095013, 175648 bases, from 3R:83 (73982) [f]  210   38     1.1
+BACR11C03 : AC008220, 186549 bases, from 3R:10 (73982) [r]  210   38     1.1
+BACR23O04 : AC008316, 160817 bases, from 3R:85 (73982) [r]  210   38     1.1
+BACR28B01 : AC008139, 170675 bases, from 3R:85 (73982) [r]  210   38     1.1
+BACR30F01 : AC008315, 177028 bases, from 3R:85 (73982) [r]  210   38     1.1
+BACR06P08 : AC007975, 180630 bases, from 3R:10 (38670) [r]  210   38     1.2
+BACR30F01 : AC008315, 177028 bases, from 3R:85 (35068) [r]  210   38     1.2
+BACR30G03 : AC008355, 151500 bases, from 3R:82 (73982) [r]  209   38     1.2
+BACR15O02 : AC009735, 159478 bases, from 3R:82 (73982) [r]  209   38     1.2
+BACR21B19 : AC011250, 161692 bases, from 4:101 (73982) [r]  209   38     1.2
+BACR28B01 : AC008139, 170675 bases, from 3R:85 (28715) [r]  210   38     1.2
+BACR10P11 : AC007888, 164035 bases, from 2R:57 (73982) [f]  208   38     1.3
+BACR48E23 : AC005721, 203183 bases, from 3R:88 (73982) [f]  208   38     1.3
+BACR08H11 : AC008136, 154890 bases, from 3R:88 (73982) [f]  208   38     1.3
+BACR28F21 : AC010705, 163958 bases, from X:11. (21998) [f]  209   38     1.4
+BACR02C24 : 234783 bases, from X:12.           (21843) [r]  209   38     1.4
+BACR36J03 : AC091634, 159065 bases, from 2R:50 (73982) [r]  207   38     1.4
+BACR40P19 : AC010667, 159570 bases, from 4:101 (73982) [r]  207   38     1.4
+BACR14P04 : AC007329, 126140 bases, from 2R:50 (73982) [f]  207   38     1.4
+BACR30L17 : AC012390, 174441 bases, from 4:101 (32481) [r]  207   38     1.6
+BACR34M23 : AC008190, 158012 bases, from 3R:83 (73982) [f]  205   38     1.7
+BACR02C19 : AC008189, 188359 bases, from 3R:82 (73982) [r]  205   38     1.7
+BACR24O24 : AC009538, 173927 bases, from 3R:83 (73982) [f]  205   38     1.7
+BACR28F21 : AC010705, 163958 bases, from X:11. (21998) [r]  206   37     1.8
+BACR02C24 : 234783 bases, from X:12.           (21843) [f]  206   37     1.8
+BACR14D22 : AC012164, 169384 bases, from X:17. (73982) [r]  204   37     1.8
+BACR11A04 : AC010916, 165307 bases, from 4:101 (73982) [r]  204   37     1.8
+BACR01N17 : AC009911, 170184 bases, from 2L:38 (73982) [f]  204   37     1.8
+
+>>BACR21I23 : AC009391, 189254 bases, from 3R:100.        (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 261  Z-score: 134.5  bits: 44.2 E(): 0.017
+ 57.303% identity (59.302% ungapped) in 267 nt overlap (289-31:64902-65167)
+
+    320       310       300       290       280       270          
+HUMBE- GATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTAC
+                                     :::::::::::::::::: : ::::: :: 
+BACR21 GATGTCCTTGGTGGATTATGGTGTTAGGGTATATATATATATATATATATATATATATAT
+           64880     64890     64900     64910     64920     64930 
+
+    260       250       240       230       220       210          
+HUMBE- ACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGC
+       : : ::: ::::: ::::::::  ::  :: : :   : : : : :  ::    : :: :
+BACR21 ATATATATATATATATATATATATATATATATATATATATATATATATATAATATAATAC
+           64940     64950     64960     64970     64980     64990 
+
+    200           190       180          170       160       150   
+HUMBE- ATGCTA----ATAAATTATGTCTAAAAATAGAAT---AAATACAAATCAATGTGCTCTGT
+       :   ::    : ::: :::     :::::: :::   ::::: ::  :::  :    :  
+BACR21 AAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATAC
+           65000     65010     65020     65030     65040     65050 
+
+            140       130       120       110       100        90  
+HUMBE- GCATTA-GTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAAT
+         : ::   :::  : ::   : :   : : :: :   :: : : :: : :       ::
+BACR21 AAAATATAATACAAAATATAATATAAAATATAATATAAAATATAATATAAAATAAAATAT
+           65060     65070     65080     65090     65100     65110 
+
+             80        70        60        50        40        30  
+HUMBE- GCAGTCAAAAATACAAATAAATAAAAAGTCACTTACAACCCAAAGTGTGACTATCAATGG
+         : : ::: ::: ::  ::::: ::: : :  :: ::   ::: : : : ::: ::   
+BACR21 AAAATAAAATATAAAATAAAATATAAAATAAAATATAAAATAAAATAT-AATATAAAATA
+           65120     65130     65140     65150      65160     65170
+
+             20        10                                          
+HUMBE- GGTAATCAGTGGTGTCAAATAGGAGGT                                 
+                                                                   
+BACR21 TAAAATAAAATATAATATAAAATATAATATAAAATATAATATAAAATATAATATAAAATA
+            65180     65190     65200     65210     65220     65230
+
+>>BACR40P19 : AC010667, 159570 bases, from 4:101.         (73982 nt)
+ initn: 190 init1: 190 opt: 261  Z-score: 134.5  bits: 44.2 E(): 0.017
+ 58.750% identity (60.256% ungapped) in 240 nt overlap (158-392:96540-96778)
+
+       130       140       150       160       170       180       
+HUMBE- CCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATA
+                                     :::::: :::  : : :::   : : ::: 
+BACR40 ATTACTATATTTACTTTAGCAAACTATTTCTGATTT-TATACACTGTATACATTGCCATT
+   96510     96520     96530     96540      96550     96560        
+
+         190       200       210       220       230       240     
+HUMBE- A--TTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATAT
+       :  : ::: :  ::  :: :  : ::  : : ::: :   : :: :  ::  ::::::::
+BACR40 AGTTATATAAATATATATAACTATATATATATAAATATATATAACTATATATATATATAT
+    96570     96580     96590     96600     96610     96620        
+
+         250       260         270       280       290       300   
+HUMBE- GTATATGTATGTGTGTACATA--TACACATATATATATATATATATTTTTTCTTTTCTTA
+        ::::: ::: : : :: :::  :: : :::::::::::::::::: : :: :: : :  
+BACR40 ATATATATATATATATATATAACTATATATATATATATATATATATATATTTTTATATAT
+    96630     96640     96650     96660     96670     96680        
+
+           310       320       330       340       350       360   
+HUMBE- CCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCAT
+         :       : :::: :::::   ::  :  :: :::    : : :::    :: :  :
+BACR40 ATATCCTAGCTCAATCTAAATATAAAGTTGGAATACTTTTGCCAGGGGTTTCTTTCTTTT
+    96690     96700     96710     96720     96730     96740        
+
+            370       380       390       400       410       420  
+HUMBE- CCATTC-TGTCCTGTAAGTATTTTGCATATTCTGGAGACGCAGGAAGAGATCCATCTACA
+       :  : : : :  :    :::: :: :::::                              
+BACR40 CTTTCCTTATAATACTGGTATCTTTCATATGACTATATTCCGTAATCTATAAGTCTCTTG
+    96750     96760     96770     96780     96790     96800        
+
+>>BACR30L17 : AC012390, 174441 bases, from 4:101.         (32481 nt)
+ initn: 190 init1: 190 opt: 261  Z-score: 140.2  bits: 44.1 E(): 0.018
+ 58.750% identity (60.256% ungapped) in 240 nt overlap (158-392:157679-157917)
+
+       130       140       150       160       170       180       
+HUMBE- CCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATA
+                                     :::::: :::  : : :::   : : ::: 
+BACR30 ATTACTATATTTACTTTAGCAAACTATTTCTGATTT-TATACACTGTATACATTGCCATT
+   157650    157660    157670    157680     157690    157700       
+
+         190       200       210       220       230       240     
+HUMBE- A--TTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATAT
+       :  : ::: :  ::  :: :  : ::  : : ::: :   : :: :  ::  ::::::::
+BACR30 AGTTATATAAATATATATAACTATATATATATAAATATATATAACTATATATATATATAT
+    157710    157720    157730    157740    157750    157760       
+
+         250       260         270       280       290       300   
+HUMBE- GTATATGTATGTGTGTACATA--TACACATATATATATATATATATTTTTTCTTTTCTTA
+        ::::: ::: : : :: :::  :: : :::::::::::::::::: : :: :: : :  
+BACR30 ATATATATATATATATATATAACTATATATATATATATATATATATATATTTTTATATAT
+    157770    157780    157790    157800    157810    157820       
+
+           310       320       330       340       350       360   
+HUMBE- CCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCAT
+         :       : :::: :::::   ::  :  :: :::    : : :::    :: :  :
+BACR30 ATATCCTAGCTCAATCTAAATATAAAGTTGGAATACTTTTGCCAGGGGTTTCTTTCTTTT
+    157830    157840    157850    157860    157870    157880       
+
+            370       380       390       400       410       420  
+HUMBE- CCATTC-TGTCCTGTAAGTATTTTGCATATTCTGGAGACGCAGGAAGAGATCCATCTACA
+       :  : : : :  :    :::: :: :::::                              
+BACR30 CTTTCCTTATAATACTGGTATCTTTCATATGACTATATTCCGTAATCTATAAGTCTCTTG
+    157890    157900    157910    157920    157930    157940       
+
+>>BACR44L03 : 172190 bases, from 4:101.                   (73982 nt)
+rev-comp initn: 206 init1: 206 opt: 256  Z-score: 131.3  bits: 43.6 E(): 0.026
+ 63.054% identity (68.085% ungapped) in 203 nt overlap (325-132:64216-64412)
+
+          350       340       330       320       310       300    
+HUMBE- TCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAA
+                                     ::: : :: :::::  ::::  ::    ::
+BACR44 ATGATTAGCGAATGATTTGATAATGAAAAATATATTGAATAAAAAATTCTTATACTGCAA
+       64190     64200     64210     64220     64230     64240     
+
+          290       280       270       260       250       240    
+HUMBE- GAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATG-
+         : : :::::::::::::::::: : :::::  ::: : ::: ::::: ::::::::: 
+BACR44 CCATATATATATATATATATATATATATATATACACATATATATATATATATATATATGT
+       64250     64260     64270     64280     64290     64300     
+
+            230       220       210       200       190            
+HUMBE- -CATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAATTATGT----C
+         :: ::: : ::   :  :     :::  :  :::  :::  :: ::: : :::     
+BACR44 ATATACATATATTAAGGAGT---AAAATAAGTGCAT--ATGAAAAAAAACTGTGTGAGCA
+       64310     64320        64330       64340     64350     64360
+
+      180        170         160       150       140       130     
+HUMBE- TAA-AAATAGAATAAATACAAAT--CAATGTGCTCTGTGCATTAGTTACTTATTAGGTTT
+       ::: ::::::   : :::  :::  : :::::    :   :: : ::: ::::       
+BACR44 TAACAAATAGCCAAGATAATAATGACGATGTGGCAAGCAGATCA-TTAATTATATTAATA
+            64370     64380     64390     64400      64410         
+
+         120       110       100        90        80        70     
+HUMBE- TGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCAGTCAAAAATACAAATAAATA
+                                                                   
+BACR44 AAAATAAAAAATTTTGAATCAACGTACATACATATAGTTTACAATACCACAAATTTGGTC
+   64420     64430     64440     64450     64460     64470         
+
+>>BACH57F14 : 103809 bases, from 4:101.                   (32829 nt)
+rev-comp initn: 170 init1: 170 opt: 254  Z-score: 135.7  bits: 43.2 E(): 0.033
+ 60.000% identity (63.559% ungapped) in 250 nt overlap (401-165:84585-84833)
+
+      430       420       410       400       390       380        
+HUMBE- TTTGGGATATGTAGATGGATCTCTTCCTGCGTCTCCAGAATATGCAAAATACTTACA---
+                                     : :: :: :::   ::: : ::  : :   
+BACH57 TTGAAATCCCATTTAAATACATGTACTTAAGCCTTCACAATTGCCAACAAACACAGATAT
+        84560     84570     84580     84590     84600     84610    
+
+         370          360         350        340        330        
+HUMBE- GGACAGAAT---GGATGAAAACTC--TACCTCAGTTCTA-AGCATATCTT-CTCCTTATT
+       :::::::::     :  :: :: :  ::  : : :: :: :  :::: :: : : :::: 
+BACH57 GGACAGAATACCATACTAACACCCCATATATTATTTATATAATATATTTTACACATTATA
+        84620     84630     84640     84650     84660     84670    
+
+      320       310        300       290       280         270     
+HUMBE- TGGATTAAAACCTTCT-GGTAAGAAAAGAAAAAATATATATATATATAT--ATGTGTATA
+       :: ::: : :  :: :   : :   :   : : ::::::::::::::::  :: : ::::
+BACH57 TGTATTTATATTTTATATATTATCTATTTATATATATATATATATATATTAATATATATA
+        84680     84690     84700     84710     84720     84730    
+
+         260       250       240       230       220       210     
+HUMBE- TGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCT
+       : :: : : ::  ::::: ::::::::  : : :: :::   : : : : :   : : : 
+BACH57 TATATATATATTAATATATATATATAT-AAATAATATGTATATATATATATATTTATACA
+        84740     84750     84760      84770     84780     84790   
+
+         200       190       180       170       160       150     
+HUMBE- CATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGC
+        ::  ::  : ::: : ::: : :: : ::: : :: :::                    
+BACH57 TATATATATTTATATAATATATTTATACATATATTATATAATATATATAATATATATATA
+         84800     84810     84820     84830     84840     84850   
+
+         140       130       120       110       100        90     
+HUMBE- ATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCA
+                                                                   
+BACH57 TATATATATATATATATAATATATTTATACATATATTTGTTTTCTAAAATTATGACTTCG
+         84860     84870     84880     84890     84900     84910   
+
+>>BACR19J06 : AC010581, 175053 bases, from 3R:85.         (73982 nt)
+rev-comp initn: 195 init1: 195 opt: 251  Z-score: 128.1  bits: 43.0 E(): 0.039
+ 66.901% identity (70.896% ungapped) in 142 nt overlap (289-154:98989-99128)
+
+    320       310       300       290       280       270          
+HUMBE- GATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTAC
+                                     :: :::: :::::::::::: :::::::: 
+BACR19 TGCACTGGTGTACACGAAGGTGTACGGACTATGTATACATATATATATGTATATATGTAT
+    98960     98970     98980     98990     99000     99010        
+
+    260       250       240       230            220       210     
+HUMBE- ACACATACATATACATATATATGCATTCATTTGTTGT-----TGTTTTTCTTAATTTGCT
+       : : ::: ::::: ::::::::  :::  :::::: :     ::: : :::  ::: : :
+BACR19 ATATATATATATATATATATATATATTTGTTTGTTTTTTAAATGTATCTCTCGATTCGTT
+    99020     99030     99040     99050     99060     99070        
+
+         200       190       180        170       160       150    
+HUMBE- CATGCATGCTAATAAATTATGTCTAA-AAATAGAATAAATACAAATCAATGTGCTCTGTG
+         :  ::   : : :  ::::::::: :  ::  :: :::     ::::: :        
+BACR19 TTTAAATAAAATTGA--TATGTCTAATAGCTATTATCAATCGTTTTCAATTTCAAAAAGT
+    99080     99090       99100     99110     99120     99130      
+
+          140       130       120       110       100        90    
+HUMBE- CATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGC
+                                                                   
+BACR19 TTTGTATTGTATTTCGAAGTAATTCTGTTGCGTTTTCTAATTATAATATTCGGTTTTCCC
+      99140     99150     99160     99170     99180     99190      
+
+>>BACR19J06 : AC010581, 175053 bases, from 3R:85.         (73982 nt)
+ initn: 180 init1: 180 opt: 242  Z-score: 122.3  bits: 41.9 E(): 0.081
+ 82.857% identity (82.857% ungapped) in 70 nt overlap (234-303:98989-99058)
+
+           210       220       230       240       250       260   
+HUMBE- GAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTAC
+                                     ::: ::: :::: ::::::::: : :::: 
+BACR19 TGCACTGGTGTACACGAAGGTGTACGGACTATGTATACATATATATATGTATATATGTAT
+    98960     98970     98980     98990     99000     99010        
+
+           270       280       290       300       310       320   
+HUMBE- ATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAA
+       ::::: : :::::::::::::::::::: ::  ::: :::                    
+BACR19 ATATATATATATATATATATATATATTTGTTTGTTTTTTAAATGTATCTCTCGATTCGTT
+    99020     99030     99040     99050     99060     99070        
+
+           330       340       350       360       370       380   
+HUMBE- TAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTAT
+                                                                   
+BACR19 TTTAAATAAAATTGATATGTCTAATAGCTATTATCAATCGTTTTCAATTTCAAAAAGTTT
+    99080     99090     99100     99110     99120     99130        
+
+>>BACR44L03 : 172190 bases, from 4:101.                   (73982 nt)
+ initn: 211 init1: 211 opt: 241  Z-score: 121.7  bits: 41.8 E(): 0.088
+ 66.418% identity (67.939% ungapped) in 134 nt overlap (185-317:64197-64328)
+
+          160       170       180       190       200       210    
+HUMBE- CATTGATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAA
+                                     :: ::::  ::  ::: :  :   : : ::
+BACR44 TTAAAACCATACATGCCGAATGATTAGCGAATGATTTGATA--ATGAAAAATATATTGAA
+      64170     64180     64190     64200       64210     64220    
+
+          220       230       240       250       260       270    
+HUMBE- GAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATA
+        :::::   :    : :: :  ::::::::: ::::: ::: : : :: :::::::::::
+BACR44 TAAAAAATTCTTATACTGCAACCATATATATATATATATATATATATATATATACACATA
+        64230     64240     64250     64260     64270     64280    
+
+          280        290       300       310       320       330   
+HUMBE- TATATATATATATAT-TTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAG
+       ::::::::::::::: : : : : : : ::    ::::  : ::                
+BACR44 TATATATATATATATATATGTATATACATATATTAAGGAGTAAAATAAGTGCATATGAAA
+        64290     64300     64310     64320     64330     64340    
+
+           340       350       360       370       380       390   
+HUMBE- ATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTATTTTGCATATT
+                                                                   
+BACR44 AAAAACTGTGTGAGCATAACAAATAGCCAAGATAATAATGACGATGTGGCAAGCAGATCA
+        64350     64360     64370     64380     64390     64400    
+
+>>BACR42I20 : AC008195, 164944 bases, from 3R:93.         (73982 nt)
+ initn: 118 init1: 118 opt: 238  Z-score: 119.7  bits: 41.5 E(): 0.11
+ 68.908% identity (70.690% ungapped) in 119 nt overlap (197-315:52925-53040)
+
+        170       180       190       200       210       220      
+HUMBE- TTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAA
+                                     ::  ::: :   :::: :     ::  : :
+BACR42 ATCAACCAAACAATGGCAGCGACATCCTCCCACTCATCATTCAATTTACCGCCACGCCGA
+        52900     52910     52920     52930     52940     52950    
+
+        230       240       250       260       270       280      
+HUMBE- CAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATA
+        ::::  :: :: : : :  ::::: :::::::::: ::::: : :::::::::: ::: 
+BACR42 AAAATCCATACAAAAACA--TATATATATGTGTGTATATATATATATATATATATGTATT
+        52960     52970       52980     52990     53000     53010  
+
+        290       300       310       320       330       340      
+HUMBE- TATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAAC
+       :: ::  : ::::: :: :: ::::::::                               
+BACR42 TA-TTGATATTTTCATAACAAAAGGTTTTTCGTTGTCGGTTTTTGGTGTCGTGTTTTGGT
+           53020     53030     53040     53050     53060     53070 
+
+>>BACR32K23BACR03G18 : 350380 bases, from X:14.           (73982 nt)
+ initn: 194 init1: 194 opt: 237  Z-score: 119.1  bits: 41.3 E(): 0.12
+ 64.667% identity (68.794% ungapped) in 150 nt overlap (189-333:115618-115763)
+
+      160       170       180       190       200       210        
+HUMBE- GATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAA
+                                     ::: :  : :: :::  : : : : : : :
+BACR32 TCGCCGTGGTAGGTATTCAGCTATTCTATTTTTTTCCGAATTCATATGAATAATTATATA
+    115590    115600    115610    115620    115630    115640       
+
+      220       230       240       250       260       270        
+HUMBE- AACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATA
+        ::    : : ::  :: ::::::::: ::::: ::: : :::: :: :: : :::::::
+BACR32 TAC----ATATATATATACATATATATCTATATATATATTTGTATATTTATATATATATA
+    115650        115660    115670    115680    115690    115700   
+
+      280       290        300       310           320       330   
+HUMBE- TATATATATATTT-TTTCTTTTCTTACCAGAAGGTTTTAA----TCCAAATAAGGAGAAG
+        :::::::::::: ::: : :: : :    :    :::::    :  ::: : :::::::
+BACR32 CATATATATATTTATTTATATTGTAAATTTACCAGTTTAAGTCTTTAAAAAATGGAGAAG
+        115710    115720    115730    115740    115750    115760   
+
+           340       350       360       370       380       390   
+HUMBE- ATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTATTTTGCATATT
+                                                                   
+BACR32 GAGCAGGCTCATGTGTACCTGAATGGCTGTAACTTCCGATGTCTGTTACTCGAAGTCCTG
+        115770    115780    115790    115800    115810    115820   
+
+>>BACR12J05 : AC011697, 203853 bases, from X:12.          (61893 nt)
+rev-comp initn: 173 init1: 173 opt: 237  Z-score: 120.3  bits: 41.3 E(): 0.12
+ 62.083% identity (68.664% ungapped) in 240 nt overlap (289-60:202512-202738)
+
+    320       310       300       290       280       270          
+HUMBE- GATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTAC
+                                     :::::::::::::::::: : ::::: :: 
+BACR12 TTCAGATGTATTTGCTGTTACTCATATCGTATATATATATATATATATATATATATATAT
+          202490    202500    202510    202520    202530    202540 
+
+    260       250        240        230       220       210        
+HUMBE- ACACATACATATACATA-TATA-TGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCAT
+       : : ::: ::::: ::: :::: ::::: :: :: ::::  :  : :: ::  :  :   
+BACR12 ATATATATATATATATATTATATTGCATACAATT-TTGT--TCGTCCTAAACATTGTGTA
+          202550    202560    202570       202580    202590        
+
+      200       190        180       170       160       150       
+HUMBE- GCATGCTAATAAATT-ATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCAT
+       ::::  :: : ::::  :: :   :: ::  :: ::::      : ::::::: ::: : 
+BACR12 GCATTTTATTCAATTGGTGGC---AACTATGATGAATA------ATTGTGCTCAGTGTAG
+   202600    202610       202620    202630          202640         
+
+       140       130         120        110       100        90    
+HUMBE- TAGTTACTTATTAGGTTTTGG--GAAAC-AAGAGGTAAAAAACTAGAGACCTCTTAAT-G
+       : :  ::      :   : ::  ::::: :::: ::   :::   : ::: :   : : :
+BACR12 TCG-AACAAGCCGGCAGTGGGACGAAACGAAGATGTTGCAAATGCGGGACTTGGGACTCG
+  202650     202660    202670    202680    202690    202700        
+
+               80        70        60        50        40          
+HUMBE- CAG---TCAAAAATACAAATAAATAAAAAGTCACTTACAACCCAAAGTGTGACTATCAAT
+       :::   ::  :::: ::::  : :: :: :                              
+BACR12 CAGGATTCTGAAATGCAAAGGACTAGAAGGGAGGGTAAGTGGTGGGGAGGCTAGAAGGAC
+   202710    202720    202730    202740    202750    202760        
+
+>>BACR30J04 : AC008338, 177277 bases, from X:19.          (73982 nt)
+rev-comp initn: 231 init1: 162 opt: 235  Z-score: 117.8  bits: 41.1 E(): 0.14
+ 60.311% identity (64.583% ungapped) in 257 nt overlap (293-49:8894-9145)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : :::::: ::::::::::: : :::::
+BACR30 TTGCGCCTCTGCCGCTGTTTATTTTCCCACACATATATATGTATATATATATATTTATAT
+          8870      8880      8890      8900      8910      8920   
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTC
+        :: :   ::: ::::: ::::: :::  :  ::  ::::   : ::: :::::: ::  
+BACR30 ATATATGTATATATATATATATACATGTTTGTATAGGTTG-CATCTTTATTAATTGGCCA
+          8930      8940      8950      8960       8970      8980  
+
+          200       190         180        170       160           
+HUMBE- AT--GCATGCTAATAAATTATG-TCTA-AAAATA-GAATAAATACAAATCAATGTGC-TC
+       ::    :: : : : : :::::  :::  :: :: ::  ::: : :::     :: : ::
+BACR30 ATAAATATCCCAGTTATTTATGAACTATCAACTATGATCAAAAAAAAAATTGCGTACATC
+           8990      9000      9010      9020      9030      9040  
+
+    150        140       130       120       110        100        
+HUMBE- TGTGCAT-TAGTTACTTATTAGGTTTTGGGAAACAAGAGGTA-AAAAACTAGAGACCTCT
+        :  :::  :: ::  : :::  : :    ::     :  :: : ::: :  ::   :  
+BACR30 CGCACATACAGCTAGGT-TTATATATATTTAACTTTCACATATACAAATTCAAG---TAC
+           9050       9060      9070      9080      9090           
+
+       90        80        70            60        50        40    
+HUMBE- TAATGCAGTCAAAAATACAAATAAATA----AAAAGTCACTTACAACCCAAAGTGTGACT
+        ::: ::   : ::::::::::: :::     :::  : :::::: :             
+BACR30 AAATTCAAATACAAATACAAATACATATGCTTAAATACGCTTACATCGGATTTTGTCTAT
+     9100      9110      9120      9130      9140      9150        
+
+           30        20        10                                  
+HUMBE- ATCAATGGGGTAATCAGTGGTGTCAAATAGGAGGT                         
+                                                                   
+BACR30 GCCGTCGTCCATATGAGTTCACTTTATGATTTACACACATTTGACTCGAAATTTGATATA
+     9160      9170      9180      9190      9200      9210        
+
+>>BACR10C18 : AC011705, 170975 bases, from X:19.          (73982 nt)
+rev-comp initn: 162 init1: 162 opt: 235  Z-score: 117.8  bits: 41.1 E(): 0.14
+ 60.311% identity (64.583% ungapped) in 257 nt overlap (293-49:96960-97211)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : :::::: ::::::::::: : :::::
+BACR10 TTGCGCCTCTGCCGCTGTTTATTTTCCCACACATATATATGTATATATATATATTTATAT
+   96930     96940     96950     96960     96970     96980         
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTC
+        :: :   ::: ::::: ::::: :::  :  ::  ::::   : ::: :::::: ::  
+BACR10 ATATATGTATATATATATATATACATGTTTGTATAGGTTG-CATCTTTATTAATTGGCCA
+   96990     97000     97010     97020      97030     97040        
+
+          200       190         180        170       160           
+HUMBE- AT--GCATGCTAATAAATTATG-TCTA-AAAATA-GAATAAATACAAATCAATGTGC-TC
+       ::    :: : : : : :::::  :::  :: :: ::  ::: : :::     :: : ::
+BACR10 ATAAATATCCCAGTTATTTATGAACTATCAACTATGATCAAAAAAAAAATTGCGTACATC
+    97050     97060     97070     97080     97090     97100        
+
+    150        140       130       120       110        100        
+HUMBE- TGTGCAT-TAGTTACTTATTAGGTTTTGGGAAACAAGAGGTA-AAAAACTAGAGACCTCT
+        :  :::  :: ::  : :::  : :    ::     :  :: : ::: :  ::   :  
+BACR10 CGCACATACAGCTAGGT-TTATATATATTTAACTTTCACATATACAAATTCAAG---TAC
+    97110     97120      97130     97140     97150     97160       
+
+       90        80        70            60        50        40    
+HUMBE- TAATGCAGTCAAAAATACAAATAAATA----AAAAGTCACTTACAACCCAAAGTGTGACT
+        ::: ::   : ::::::::::: :::     :::  : :::::: :             
+BACR10 AAATTCAAATACAAATACAAATACATATGCTTAAATACGCTTACATCGGATTTTGTCTAT
+        97170     97180     97190     97200     97210     97220    
+
+           30        20        10                                  
+HUMBE- ATCAATGGGGTAATCAGTGGTGTCAAATAGGAGGT                         
+                                                                   
+BACR10 GCCGTCGTCCATATGAGTTCACTTTATGATTTACACACATTTGACTCGAAATTTGATATA
+        97230     97240     97250     97260     97270     97280    
+
+>>BACR32M04 : AC008234, 148847 bases, from 3R:85.         (73982 nt)
+ initn: 223 init1: 223 opt: 229  Z-score: 114.0  bits: 40.4 E(): 0.24
+ 73.034% identity (73.034% ungapped) in 89 nt overlap (199-287:46334-46422)
+
+      170       180       190       200       210       220        
+HUMBE- TATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACA
+                                     :: :::::::    :::  :  : :: :: 
+BACR32 GCTCCTGCTTTCTCGCTTCACTTGCCACATTGAATGAGCACGAAAAGTTACTCTACTACC
+         46310     46320     46330     46340     46350     46360   
+
+      230       240       250       260       270       280        
+HUMBE- AATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATATA
+       :    :: : :::: :: ::::::::::: : :::::::: : :::::::::::::::: 
+BACR32 ATACTATACGTATACATATATATGTATGTATATACATATATATATATATATATATATATG
+         46370     46380     46390     46400     46410     46420   
+
+      290       300       310       320       330       340        
+HUMBE- TTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTG
+                                                                   
+BACR32 GCCGAATGCAGCCTCGCTGCCTCTGCTCCATTATCAAAATGCAATTTCATCAATGCTGGC
+         46430     46440     46450     46460     46470     46480   
+
+>>BACR43K14 : AC008356, 162593 bases, from 3R:85.         (20633 nt)
+ initn: 223 init1: 223 opt: 229  Z-score: 122.9  bits: 40.2 E(): 0.27
+ 73.034% identity (73.034% ungapped) in 89 nt overlap (199-287:161695-161783)
+
+      170       180       190       200       210       220        
+HUMBE- TATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACA
+                                     :: :::::::    :::  :  : :: :: 
+BACR43 GCTCCTGCTTTCTCGCTTCACTTGCCACATTGAATGAGCACGAAAAGTTACTCTACTACC
+       161670    161680    161690    161700    161710    161720    
+
+      230       240       250       260       270       280        
+HUMBE- AATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATATA
+       :    :: : :::: :: ::::::::::: : :::::::: : :::::::::::::::: 
+BACR43 ATACTATACGTATACATATATATGTATGTATATACATATATATATATATATATATATATG
+       161730    161740    161750    161760    161770    161780    
+
+      290       300       310       320       330       340        
+HUMBE- TTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTG
+                                                                   
+BACR43 GCCGAATGCAGCCTCGCTGCCTCTGCTCCATTATCAAAATGCAATTTCATCAATGCTGGC
+       161790    161800    161810    161820    161830    161840    
+
+>>BACR39E09 : AC009255, 163990 bases, from 2R:42.         (73982 nt)
+rev-comp initn: 142 init1: 142 opt: 227  Z-score: 112.7  bits: 40.2 E(): 0.28
+ 68.421% identity (73.984% ungapped) in 133 nt overlap (345-216:62541-62666)
+
+          370       360       350       340       330       320    
+HUMBE- AGGACAGAATGGATGAAAACTCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATT
+                                     ::: ::::: :: ::     :: :   :: 
+BACR39 TTGAAGTCAATGTCAAGTCCAATGCGGTCATTCCAAGCA-ATTTT----ATAATATAATA
+            62520     62530     62540      62550         62560     
+
+          310       300       290         280       270       260  
+HUMBE- AAAACCTTCTGGTAAGAAAAGAAAAAATATATATAT--ATATATATGTGTATATGTACAC
+         ::: :  :  ::: : :: : ::::::::: :::  :::::: ::::::::: ::   
+BACR39 TTAACATAATATTAATATAATATAAAATATATGTATGCATATATTTGTGTATATATATTT
+       62570     62580     62590     62600     62610     62620     
+
+            250        240       230       220       210       200 
+HUMBE- ACATACATATACATATAT-ATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCA
+       ::::: ::::: :::::: :::  ::::: : :::::   :::                 
+BACR39 ACATATATATAAATATATAATG--TTCATATATTGTTAAGTTTGCTGTGCTGCAAAGTTT
+       62630     62640       62650     62660     62670     62680   
+
+             190       180       170       160       150       140 
+HUMBE- TGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGT
+                                                                   
+BACR39 TGGCACTGACCACGAAAGCAGAATCGCCATGAAGGCGATGCTGGAGGCACACGTCGGGTA
+         62690     62700     62710     62720     62730     62740   
+
+>>BACR13P06 : AC008339, 180296 bases, from 2R:42.         (73982 nt)
+rev-comp initn: 142 init1: 142 opt: 227  Z-score: 112.7  bits: 40.2 E(): 0.28
+ 68.421% identity (73.984% ungapped) in 133 nt overlap (345-216:18716-18841)
+
+          370       360       350       340       330       320    
+HUMBE- AGGACAGAATGGATGAAAACTCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATT
+                                     ::: ::::: :: ::     :: :   :: 
+BACR13 TTGAAGTCAATGTCAAGTCCAATGCGGTCATTCCAAGCA-ATTTT----ATAATATAATA
+       18690     18700     18710     18720          18730     18740
+
+          310       300       290         280       270       260  
+HUMBE- AAAACCTTCTGGTAAGAAAAGAAAAAATATATATAT--ATATATATGTGTATATGTACAC
+         ::: :  :  ::: : :: : ::::::::: :::  :::::: ::::::::: ::   
+BACR13 TTAACATAATATTAATATAATATAAAATATATGTATGCATATATTTGTGTATATATATTT
+            18750     18760     18770     18780     18790     18800
+
+            250        240       230       220       210       200 
+HUMBE- ACATACATATACATATAT-ATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCA
+       ::::: ::::: :::::: :::  ::::: : :::::   :::                 
+BACR13 ACATATATATAAATATATAATG--TTCATATATTGTTAAGTTTGCTGTGCTGCAAAGTTT
+            18810     18820       18830     18840     18850        
+
+             190       180       170       160       150       140 
+HUMBE- TGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGT
+                                                                   
+BACR13 TGGCACTGACCACGAAAGCAGAATCGCCATGAAGGCGATGCTGGAGGCACACGTCGGGTA
+    18860     18870     18880     18890     18900     18910        
+
+>>BACR18C01 : AC007808, 167195 bases, from 3R:88.         (73982 nt)
+ initn: 100 init1: 100 opt: 226  Z-score: 112.0  bits: 40.0 E():  0.3
+ 65.693% identity (67.164% ungapped) in 137 nt overlap (170-304:93850-93985)
+
+     140       150       160       170        180       190        
+HUMBE- CTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTT-TAGACATAATT-TATTAGC
+                                     :::  ::::: : :   :: :: ::   : 
+BACR18 AACGGTTGTCCAAGCTCAACTACAAGGAGCATTAAATTTTATGGGTTTAGTTGTAGATGG
+   93820     93830     93840     93850     93860     93870         
+
+       200       210       220       230       240       250       
+HUMBE- ATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGT
+       :   : : ::::::: ::: ::   :  : :::  : : :: ::: :: ::::: ::: :
+BACR18 AAATAAGTGCAAATTTAGATAACTGATCA-AAACCATTTCAGATAAATATATATATATAT
+   93880     93890     93900      93910     93920     93930        
+
+       260       270       280       290       300       310       
+HUMBE- GTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAA
+        : :: ::::: : :::::::::::::::::: :::   ::  : ::             
+BACR18 ATATATATATATATATATATATATATATATATATTTAAGTTCATCACACTTTTTTTTTTT
+    93940     93950     93960     93970     93980     93990        
+
+       320       330       340       350       360       370       
+HUMBE- TCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGT
+                                                                   
+BACR18 TTGCATTTATATACCCTTTTGTGGGCCAGGGTAATCAGAAGACTTACACATTTTATTGGG
+    94000     94010     94020     94030     94040     94050        
+
+>>BACH59K20 : AC010840, 29516 bases, from 4:101.          (29516 nt)
+rev-comp initn: 179 init1: 179 opt: 227  Z-score: 119.1  bits: 40.0 E(): 0.31
+ 64.189% identity (65.517% ungapped) in 148 nt overlap (300-154:10680-10825)
+
+     330       320       310       300        290       280        
+HUMBE- CTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGA-AAAAATATATATATATATATAT
+                                     ::: : : : : ::::::::::::::::::
+BACH59 CCTCGTCAGATCGACTCGGCTATTGATCCTGAATATATATATATATATATATATATATAT
+   10650     10660     10670     10680     10690     10700         
+
+      270       260       250       240       230       220        
+HUMBE- GTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTA
+        : ::::: :: : : ::  ::::: ::::::::  ::  :: : :   : : ::: :  
+BACH59 TTATATATATATATATATTTATATATATATATATTTATATATATATATATATATTTATAT
+   10710     10720     10730     10740     10750     10760         
+
+      210       200       190       180       170       160        
+HUMBE- ATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGC
+       :::   : ::  ::  ::   : :::: : :  : ::  :: ::::: : ::  :: :  
+BACH59 ATTATAT-ATTTAT-ATATATATTTATTTATTTATATTTAACAAATATATATACATATAT
+   10770      10780      10790     10800     10810     10820       
+
+      150       140       130       120       110       100        
+HUMBE- TCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCT
+                                                                   
+BACH59 ATAATAACGAATCCACGGTAATTTATTAGATAAAAAATATAACCTTTATTCAAAATAAAA
+     10830     10840     10850     10860     10870     10880       
+
+>>BACR20B21 : AC099010, 166512 bases, from 2L:27.         (73982 nt)
+rev-comp initn: 228 init1: 145 opt: 225  Z-score: 111.4  bits: 39.9 E(): 0.33
+ 67.361% identity (71.324% ungapped) in 144 nt overlap (303-165:133938-134078)
+
+        330       320       310       300       290       280      
+HUMBE- CTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATAT
+                                     ::: :  : :: : ::::::::::::::::
+BACR20 TAATATTACATACATTAGTTTGTTGAACATTAAAATGAAAATATATATATATATATATAT
+    133910    133920    133930    133940    133950    133960       
+
+        270         260       250       240       230       220    
+HUMBE- ATGTGTATA--TGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTT
+       :: ::: ::   ::    ::::::::: ::::: ::::: ::  ::  :::      :::
+BACR20 ATTTGTTTAGCGGTTTTTACATACATACACATA-ATATGTATAAATAAGTTAAAAAATTT
+    133970    133980    133990    134000     134010    134020      
+
+          210         200        190       180       170       160 
+HUMBE- CTTAATTTG-CT-CATGCATGCTAAT-AAATTATGTCTAAAAATAGAATAAATACAAATC
+        :::::: : ::  :: :::   ::: : :: :: ::  :::::::  :  :::      
+BACR20 ATTAATTAGACTAAATTCATTGGAATGATATAATATC--AAAATAGTTTGCATAATACAG
+     134030    134040    134050    134060      134070    134080    
+
+             150       140       130       120       110       100 
+HUMBE- AATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGA
+                                                                   
+BACR20 CGATTTAGTTAAAATTTCACAACTATCTAGAAGCGAAAACTTGGAACATTTTAGTTTTGC
+       134090    134100    134110    134120    134130    134140    
+
+>>BACR13A13 : AC018484, 172748 bases, from 2L:40.         (73982 nt)
+ initn: 160 init1: 160 opt: 225  Z-score: 111.4  bits: 39.9 E(): 0.33
+ 64.493% identity (65.926% ungapped) in 138 nt overlap (167-303:102083-102218)
+
+        140       150       160       170       180       190      
+HUMBE- TAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATAATTTATTAG
+                                     ::: :: ::  :  ::: :   :::: :: 
+BACR13 CCGCAACTTTCCGCAAAAATCTTTAACAGTTTTTTTTTACCTCAAGAAACTCTTTAATAC
+         102060    102070    102080    102090    102100    102110  
+
+        200        210       220       230       240       250     
+HUMBE- CATG-CATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTAT
+        ::: ::: : :: ::  : : : :::  :::: :   :  ::::::::  :: :: :::
+BACR13 AATGACATAAACATAT--ATATATACATAAACATACATAAACATATATACATACATATAT
+         102120      102130    102140    102150    102160    102170
+
+         260       270       280       290       300       310     
+HUMBE- GTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTT
+        : : :: ::::: : ::::::::::::::: : :    : :: : ::            
+BACR13 ATATATATATATATATATATATATATATATAAACTAAAACCTTACGTAAACATAAAAAAG
+           102180    102190    102200    102210    102220    102230
+
+         320       330       340       350       360       370     
+HUMBE- AATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCT
+                                                                   
+BACR13 ATGTTCACAAAATACACAAAAAAGGTTAAATCAAATATAATTACACAAAAAATATTAAAT
+           102240    102250    102260    102270    102280    102290
+
+>>BACR48D17 : AC008255, 183316 bases, from X:13.          (73982 nt)
+rev-comp initn: 171 init1: 171 opt: 225  Z-score: 111.4  bits: 39.9 E(): 0.33
+ 67.424% identity (71.774% ungapped) in 132 nt overlap (291-160:52531-52654)
+
+      320       310       300       290       280       270        
+HUMBE- TGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGT
+                                     ::::::::::::::::::::   ::::: :
+BACR48 ATTGTATAATTTTCGTTTGCTTTTGCTATTAAATATATATATATATATATAGATATATAT
+            52510     52520     52530     52540     52550     52560
+
+      260       250       240       230       220       210        
+HUMBE- ACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCAT
+       : :::  :: :: :  ::::::::  ::  :: : :   : : : : ::  : ::   ::
+BACR48 ATACATCTATATTTTTATATATAT--ATATATATCTATATATATATATT--TATGTAAAT
+            52570     52580       52590     52600       52610      
+
+      200       190       180       170       160       150        
+HUMBE- GCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATT
+       : ::: :: :: ::::    :::::::   ::: : ::::::                  
+BACR48 GTATG-TATTATATTAAAAATAAAAAT---ATATACACAAATATTATTATATACATAAAT
+      52620      52630     52640        52650     52660     52670  
+
+      140       130       120       110       100        90        
+HUMBE- AGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCAGTC
+                                                                   
+BACR48 ATAAATATATATATGTATATATGTTTTATATGTAAAATTATGAAGTATAAGGGTATCTTG
+          52680     52690     52700     52710     52720     52730  
+
+>>BACR06H06 : 186935 bases, from 2L:40.                   (44975 nt)
+ initn: 160 init1: 160 opt: 225  Z-score: 114.9  bits: 39.8 E(): 0.35
+ 64.493% identity (65.926% ungapped) in 138 nt overlap (167-303:175937-176072)
+
+        140       150       160       170       180       190      
+HUMBE- TAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATAATTTATTAG
+                                     ::: :: ::  :  ::: :   :::: :: 
+BACR06 CCGCAACTTTCCGCAAAAATCTTTAACAGTTTTTTTTTACCTCAAGAAACTCTTTAATAC
+     175910    175920    175930    175940    175950    175960      
+
+        200        210       220       230       240       250     
+HUMBE- CATG-CATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTAT
+        ::: ::: : :: ::  : : : :::  :::: :   :  ::::::::  :: :: :::
+BACR06 AATGACATAAACATAT--ATATATACATAAACATACATAAACATATATACATACATATAT
+     175970    175980      175990    176000    176010    176020    
+
+         260       270       280       290       300       310     
+HUMBE- GTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTT
+        : : :: ::::: : ::::::::::::::: : :    : :: : ::            
+BACR06 ATATATATATATATATATATATATATATATAAACTAAAACCTTACGTAAACATAAAAAAG
+       176030    176040    176050    176060    176070    176080    
+
+         320       330       340       350       360       370     
+HUMBE- AATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCT
+                                                                   
+BACR06 ATGTTCACAAAATACACAAAAAAGGTTAAATCAAATATAATTACACAAAAAATATTAAAT
+       176090    176100    176110    176120    176130    176140    
+
+>>BACR22N03 : AC092397, 167201 bases, from 2L:26.         (25241 nt)
+ initn: 153 init1: 124 opt: 225  Z-score: 118.9  bits: 39.8 E(): 0.37
+ 67.361% identity (71.324% ungapped) in 144 nt overlap (165-303:166819-166959)
+
+          140       150       160       170       180        190   
+HUMBE- AGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATAAT-TTAT
+                                     :::  :  :::::::  :: :: :: : ::
+BACR22 AGTTGTGAAATTTTAACTAAATCGCTGTATTATGCAAACTATTTT--GATATTATATCAT
+   166790    166800    166810    166820    166830      166840      
+
+           200         210       220       230       240       250 
+HUMBE- TAGCATGCA-TGAG-CAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATAT
+       :   ::: : : :: : :::::: :::      :::  ::  :: ::::: ::::: :::
+BACR22 TCCAATGAATTTAGTCTAATTAATAAATTTTTTAACTTATTTATACATAT-TATGTGTAT
+     166850    166860    166870    166880    166890     166900     
+
+             260         270       280       290       300         
+HUMBE- GTATGTGTGTAC--ATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAA
+       ::::::    ::   :: ::: :::::::::::::::::: : :: :  : :::      
+BACR22 GTATGTAAAAACCGCTAAACAAATATATATATATATATATATATTTTCATTTTAATGTTC
+      166910    166920    166930    166940    166950    166960     
+
+     310       320       330       340       350       360         
+HUMBE- GGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTC
+                                                                   
+BACR22 AACAAACTAATGTATGTAATATTAAAACAACAATTATGAAGACAAACATTAATCTACATT
+      166970    166980    166990    167000    167010    167020     
+
+>>BACR37K05 : AC011253, 197597 bases, from 3R:84.         (55637 nt)
+ initn: 184 init1: 184 opt: 223  Z-score: 112.1  bits: 39.6 E():  0.4
+ 63.636% identity (65.468% ungapped) in 143 nt overlap (156-294:158456-158598)
+
+         130       140       150       160       170          180  
+HUMBE- AACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTAT---TTTTAG
+                                     :::  ::: :  ::::: :::     ::: 
+BACR37 CTCTACTCTCCATTATTACACGTTTTATTAATTTTTTTTTTCTTATTTTATCAAGATTAA
+      158430    158440    158450    158460    158470    158480     
+
+             190       200       210       220       230       240 
+HUMBE- ACATAAT-TTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATAT
+       :  :: : ::: ::  ::  ::    : ::  : : : : :   : : ::  :: ::: :
+BACR37 ATCTAGTATTAATATAATATATATATATATATATATATATACATATATATATATACATTT
+      158490    158500    158510    158520    158530    158540     
+
+             250       260       270       280       290       300 
+HUMBE- ATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCT
+       :::: ::::: ::: : : :: ::::: : ::::::::::: :::::: : ::       
+BACR37 ATATATATATATATATATATATATATATATATATATATATAAATATATATATTGCTGGAG
+      158550    158560    158570    158580    158590    158600     
+
+             310       320       330       340       350       360 
+HUMBE- TACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTC
+                                                                   
+BACR37 CCTTCGACTCATTACAAGATTATTTTATAACATTCTATGGTCTCTATTTCAATTTTTTTT
+      158610    158620    158630    158640    158650    158660     
+
+>>BACR30L17 : AC012390, 174441 bases, from 4:101.         (73982 nt)
+rev-comp initn: 132 init1: 132 opt: 221  Z-score: 108.8  bits: 39.4 E(): 0.46
+ 68.382% identity (75.610% ungapped) in 136 nt overlap (323-199:139648-139781)
+
+        350       340       330       320       310       300      
+HUMBE- TACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAA-AAG
+                                     ::: :: : ::   :: ::::  ::: : :
+BACR30 GTCGGAAATAATGTTTACTTTGACGATCATTTTCGAATTAATATTTGTGGTGCGAATATG
+    139620    139630    139640    139650    139660    139670       
+
+             290        280       270       260       250          
+HUMBE- AA--AAA--ATATATATAT-ATATATATGTGTATATGTACACACATACATATACATATAT
+       ::  :::  ::::::: :: ::::: ::   :::::::: : ::::::::::::::::::
+BACR30 AATCAAATCATATATAAATGATATACATACATATATGTATATACATACATATACATATAT
+    139680    139690    139700    139710    139720    139730       
+
+    240       230            220       210       200       190     
+HUMBE- ATGCATTCATTTGT-----TGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAATTAT
+       :  :: : :::: :      :: ::: ::  :: : : :: :: ::              
+BACR30 A--CAATAATTTCTGGACAGGTCGTTGTTTGTACTCTTCTAATTCAATCGATGCAAAGTG
+      139740    139750    139760    139770    139780    139790     
+
+         180       170       160       150       140       130     
+HUMBE- GTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTT
+                                                                   
+BACR30 CAAAAGCGAAACAAGGACATGCTGGTTCTCAAAATTAATGGTCCGTAGGGTATTGAAAAG
+      139800    139810    139820    139830    139840    139850     
+
+>>BACR01O16 : AC093045, 189904 bases, from 2L:29.         (73982 nt)
+ initn: 174 init1: 174 opt: 221  Z-score: 108.8  bits: 39.4 E(): 0.46
+ 71.134% identity (71.875% ungapped) in 97 nt overlap (197-293:5687-5782)
+
+        170       180       190       200       210       220      
+HUMBE- TTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAA
+                                     ::  ::: ::::   : :::  ::::::::
+BACR01 GGTATTGGGCGCAAATCTTTTTTCGAGTGCCAGACATTAGCAGCATTAGACCAACAACAA
+       5660      5670      5680      5690      5700      5710      
+
+        230       240       250       260       270       280      
+HUMBE- CAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATA
+       :: :: :   : : ::  ::::::: ::: : : :: ::::: : :::::::: ::::::
+BACR01 CAGAT-ATCACTTCTACTTGTATATATATATATATATATATATATATATATATGTATATA
+       5720       5730      5740      5750      5760      5770     
+
+        290       300       310       320       330       340      
+HUMBE- TATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAAC
+       ::  :::                                                     
+BACR01 TACATTTATATGGAGATATAAGAGCGAGGCATTGCACAACACCGAAAACCCACAGGCAAA
+        5780      5790      5800      5810      5820      5830     
+
+>>BACR14D22 : AC012164, 169384 bases, from X:17.          (73982 nt)
+ initn: 196 init1: 196 opt: 221  Z-score: 108.8  bits: 39.4 E(): 0.46
+ 64.029% identity (65.441% ungapped) in 139 nt overlap (158-293:2977-3115)
+
+       130       140       150       160       170        180      
+HUMBE- CCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTAT-TCTATTTTTAGACAT
+                                     :: : : :::: :: :: :: :: :    :
+BACR14 CTGGCCTGCCTAACTTTATAATACAATTTATGGTCTATATTCATCTCGATCTTGATTACT
+       2950      2960      2970      2980      2990      3000      
+
+        190         200       210       220       230       240    
+HUMBE- AATTTATTAGCAT--GCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATA
+         ::: : :   :  :   ::: : ::  : : : : :   : : ::  ::  :::::::
+BACR14 GCTTTTTGACTTTGAGGGGGAGTATATATATATATATATATATATATATATATATATATA
+       3010      3020      3030      3040      3050      3060      
+
+          250       260       270       280       290       300    
+HUMBE- TGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTAC
+       : ::::: ::: :::::: ::::::: ::::: :::::::::: : : :           
+BACR14 TATATATATATATGTGTATATATACATATATACATATATATATGTGTATGTGTAGCTTCG
+       3070      3080      3090      3100      3110      3120      
+
+          310       320       330       340       350       360    
+HUMBE- CAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATC
+                                                                   
+BACR14 ATTTAACTATGGAAAATTGTTTCTTATGCGCAGAAAGATCATATACATTGTATACATTAA
+       3130      3140      3150      3160      3170      3180      
+
+>>BACR08A11 : 170075 bases, from 2R:42.                   (73982 nt)
+ initn: 183 init1: 183 opt: 221  Z-score: 108.8  bits: 39.4 E(): 0.46
+ 74.390% identity (74.390% ungapped) in 82 nt overlap (238-319:63815-63896)
+
+       210       220       230       240       250       260       
+HUMBE- AAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATAT
+                                     :: :::::: :::: ::: : : :: ::::
+BACR08 AACACCTTATGACGATATTTCCATACACATATGTATATGAATATATATATATATATATAT
+        63790     63800     63810     63820     63830     63840    
+
+       270       280       290       300       310       320       
+HUMBE- ACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAG
+       : : :::::::::::::::::::: ::      :: :::::   :::: :::        
+BACR08 ATATATATATATATATATATATTTCTTGGCCGTTTTCCAGATTATTTTGATCGGGGATTT
+        63850     63860     63870     63880     63890     63900    
+
+       330       340       350       360       370       380       
+HUMBE- GAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTATTTTG
+                                                                   
+BACR08 TCGATCCGCGCCCCACTTTCTGTTGCTCAGGTGTGTGTTGCTTTAATTGCACGCTGTAAT
+        63910     63920     63930     63940     63950     63960    
+
+>>BACR25O02 : AC018490, 165098 bases, from X:17.          (73982 nt)
+rev-comp initn: 196 init1: 196 opt: 221  Z-score: 108.8  bits: 39.4 E(): 0.46
+ 64.029% identity (65.441% ungapped) in 139 nt overlap (293-158:33829-33967)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : : :::::::::: ::::: :::::::
+BACR25 CAATTTTCCATAGTTAAATCGAAGCTACACATACACATATATATATGTATATATGTATAT
+    33800     33810     33820     33830     33840     33850        
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCT-
+        :::::: ::: ::::: ::::::::  ::  :: : :   : : : : :  :: : :: 
+BACR25 ATACACATATATATATATATATATATATATATATATATATATATATATATATATATACTC
+    33860     33870     33880     33890     33900     33910        
+
+          200       190       180        170       160       150   
+HUMBE- -CATGCATGCTAATAAATTATGTCTAAAAATAGA-ATAAATACAAATCAATGTGCTCTGT
+        :   ::   : : :::  :    : :: :: :: :: :::: : : ::           
+BACR25 CCCCTCAAAGTCAAAAAGCAGTAATCAAGATCGAGATGAATATAGACCATAAATTGTATT
+    33920     33930     33940     33950     33960     33970        
+
+           140       130       120       110       100        90   
+HUMBE- GCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATG
+                                                                   
+BACR25 ATAAAGTTAGGCAGGCCAGCCAGTGGGCACACTTTTCACAATCCATTTGCGATCAACTAT
+    33980     33990     34000     34010     34020     34030        
+
+>>BACR02B16 : AC008331, 186938 bases, from 2L:29.         (44978 nt)
+ initn: 174 init1: 174 opt: 221  Z-score: 112.3  bits: 39.4 E(): 0.48
+ 71.134% identity (71.875% ungapped) in 97 nt overlap (197-293:183604-183699)
+
+        170       180       190       200       210       220      
+HUMBE- TTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAA
+                                     ::  ::: ::::   : :::  ::::::::
+BACR02 GGTATTGGGCGCAAATCTTTTTTCGAGTGCCAGACATTAGCAGCATTAGACCAACAACAA
+        183580    183590    183600    183610    183620    183630   
+
+        230       240       250       260       270       280      
+HUMBE- CAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATA
+       :: :: :   : : ::  ::::::: ::: : : :: ::::: : :::::::: ::::::
+BACR02 CAGAT-ATCACTTCTACTTGTATATATATATATATATATATATATATATATATGTATATA
+         183640    183650    183660    183670    183680    183690  
+
+        290       300       310       320       330       340      
+HUMBE- TATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAAC
+       ::  :::                                                     
+BACR02 TACATTTATATGGAGATATAAGAGCGAGGCATTGCACAACACCGAAAACCCACAGGCAAA
+         183700    183710    183720    183730    183740    183750  
+
+>>BACR13P06 : AC008339, 180296 bases, from 2R:42.         (38336 nt)
+ initn: 183 init1: 183 opt: 221  Z-score: 113.4  bits: 39.3 E(): 0.49
+ 74.390% identity (74.390% ungapped) in 82 nt overlap (238-319:177941-178022)
+
+       210       220       230       240       250       260       
+HUMBE- AAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATAT
+                                     :: :::::: :::: ::: : : :: ::::
+BACR13 AACACCTTATGACGATATTTCCATACACATATGTATATGAATATATATATATATATATAT
+           177920    177930    177940    177950    177960    177970
+
+       270       280       290       300       310       320       
+HUMBE- ACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAG
+       : : :::::::::::::::::::: ::      :: :::::   :::: :::        
+BACR13 ATATATATATATATATATATATTTCTTGGCCGTTTTCCAGATTATTTTGATCGGGGATTT
+           177980    177990    178000    178010    178020    178030
+
+       330       340       350       360       370       380       
+HUMBE- GAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTATTTTG
+                                                                   
+BACR13 TCGATCCGCGCCCCACTTTCTGTTGCTCAGGTGTGTGTTGCTTTAATTGCACGCTGTAAT
+           178040    178050    178060    178070    178080    178090
+
+>>BACR11M14 : AC099308, 173101 bases, from X:19.          (73982 nt)
+ initn: 166 init1: 166 opt: 219  Z-score: 107.6  bits: 39.2 E(): 0.54
+ 59.624% identity (62.871% ungapped) in 213 nt overlap (94-301:38069-38275)
+
+            70        80        90       100       110       120   
+HUMBE- TTATTTATTTGTATTTTTGACTGCATTAAGAGGTCTCTAGTTTTTTACCTCTTGTTTCCC
+                                     :: :::: : : : :::  : :: :::   
+BACR11 TCGTGATTAATAAACTCGGAAGTGAAAACTAGCTCTCGACTGTCTTAATTTTTTTTTTTT
+    38040     38050     38060     38070     38080     38090        
+
+           130       140       150       160       170       180   
+HUMBE- AAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGA
+          :   ::::: :  :   : : :   : : :::   :  ::  : ::: : :   :  
+BACR11 GGGA---AATAATTTTCACGTACGCGTCGTAAATTTGGTGCTACGT-TTCGAGTGCCATC
+    38100        38110     38120     38130     38140      38150    
+
+           190        200           210       220       230        
+HUMBE- CATAATTTATT-AGCATGCATG-AGCA---AATTAAGAAAAACAACAACAAATGAATGCA
+       : :::   ::: : :::::: :  :::   :::  : : : :::   : : ::: :: ::
+BACR11 CCTAACACATTCAACATGCACGCTGCATACAATACATATATACATAGATATATGCATACA
+        38160     38170     38180     38190     38200     38210    
+
+      240       250       260       270       280       290        
+HUMBE- TATATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTT
+       ::::::  ::::: ::: :   :::::::: :::::::::: ::: ::::: : : : : 
+BACR11 TATATA--TATATATATATACATACATATATACATATATATTTATGTATATATGTACGTA
+        38220       38230     38240     38250     38260     38270  
+
+      300       310       320       330       340       350        
+HUMBE- TCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTT
+       : :                                                         
+BACR11 TATGTAGGATATATAGAGCTCTCCTTCTGGAATTGGGACATGGGGTTTTTGGGTTTGAGT
+          38280     38290     38300     38310     38320     38330  
+
+>>BACR15O02 : AC009735, 159478 bases, from 3R:82.         (73982 nt)
+rev-comp initn: 162 init1: 162 opt: 219  Z-score: 107.6  bits: 39.2 E(): 0.54
+ 67.480% identity (69.167% ungapped) in 123 nt overlap (326-207:43412-43534)
+
+           350       340       330       320       310       300   
+HUMBE- CTCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAA
+                                     :: : ::::   :::: : :   :::   :
+BACR15 CGTCAATTAATAAATAAATATGAAGCAAAGTTGTATGGAACTAAACATGCGTTTAACTTA
+           43390     43400     43410     43420     43430     43440 
+
+            290       280       270       260       250        240 
+HUMBE- AGA-AAAAATATATATATATATATATGTGTATATGTACACACATACATATA-CATATATA
+        :: :   :::::::::::::::::: : ::::: :: : : :::::::::  :::::::
+BACR15 GGATATGTATATATATATATATATATATATATATATATATATATACATATATTATATATA
+           43450     43460     43470     43480     43490     43500 
+
+             230        220       210       200       190          
+HUMBE- TGCATTCATTTGT-TGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAATTATGTCTA
+       :  ::  ::   : : ::  : :  ::::::::                           
+BACR15 TAAATATATAAATATATTTATATATTTAATTTGAATCGGTTCGTGGTGCGTTTCCGTGAG
+           43510     43520     43530     43540     43550     43560 
+
+    180       170       160       150       140       130          
+HUMBE- AAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGA
+                                                                   
+BACR15 CTTAAATATTCAATTATTAAGTACAACATTTTTACCAAGAAACATTAACATTTAGATTTG
+           43570     43580     43590     43600     43610     43620 
+
+>>BACR21E16 : AC009905, 170662 bases, from 2L:36.         (73982 nt)
+rev-comp initn: 219 init1: 196 opt: 218  Z-score: 106.9  bits: 39.1 E(): 0.58
+ 69.492% identity (73.214% ungapped) in 118 nt overlap (341-230:86996-87113)
+
+      370       360       350       340       330       320        
+HUMBE- CAGAATGGATGAAAACTCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGAT-TAAA
+                                     ::: ::: ::: :   ::: :  :: ::::
+BACR21 ACTAGATCGACTCGGCTATTGATATTGATCAAGAATACCTTTTATATATATATATATAAA
+       86970     86980     86990     87000     87010     87020     
+
+       310           300       290       280       270       260   
+HUMBE- ACCT---TCT-GGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTACACA
+       :  :   : :   ::   : : : :: :::::::::::::::::: : ::::: ::::  
+BACR21 ATATATATATATATATATATATATAACATATATATATATATATATATATATATATACAAT
+       87030     87040     87050     87060     87070     87080     
+
+           250        240       230       220       210       200  
+HUMBE- CATACATATACATATA-TATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCAT
+        ::::::::::::::: :::: ::  ::                                
+BACR21 TATACATATACATATATTATGTATATATACATATATATGTACTTTAAAAAATCGGAAACG
+       87090     87100     87110     87120     87130     87140     
+
+>>BACR48D17 : AC008255, 183316 bases, from X:13.          (73982 nt)
+ initn: 177 init1: 177 opt: 218  Z-score: 106.9  bits: 39.1 E(): 0.58
+ 60.748% identity (69.149% ungapped) in 214 nt overlap (94-293:86512-86713)
+
+            70        80        90       100         110           
+HUMBE- TTATTTATTTGTATTTTTGACTGCATTAAGAGGTCTCTAGTTTTTT--ACCTCTT----G
+                                     : ::::: : :::: :  :::: ::    :
+BACR48 TAAATTAGAAATATCAGCAGAGCTTCGGGTAAGTCTCCACTTTTGTAGACCTTTTAGCGG
+           86490     86500     86510     86520     86530     86540 
+
+       120       130       140       150       160       170       
+HUMBE- TTTCCCAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTAT-TCTAT
+       : ::   :: : :  : ::     :::: :::    :::  : :::: :::: :    ::
+BACR48 TGTCAGCAACCGTTTTCAG-----AATGTACA----ACAAGGTTTTGAATTTTTGGGCAT
+           86550     86560              86570     86580     86590  
+
+        180       190        200       210       220       230     
+HUMBE- TTTTAGACATAATTTATTAGCA-TGCATGAGCAAATTAAGAAAAACAACAACAAATGA--
+         ::: ::   : :::  :: : : :  :  ::    :: : :::   ::: : : ::  
+BACR48 GATTAAAC---ACTTAAAAGGATTTCCCGCCCATCCAAAAATAAAACTCAAGATACGAAG
+          86600        86610     86620     86630     86640         
+
+               240       250       260       270       280         
+HUMBE- ----ATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATATAT
+           :::::::::::: ::::: ::: : : :: ::::: :::::::::::: ::::: :
+BACR48 CTTCATGCATATATATATATATATATTTATATATATATATACATATATATATGTATATTT
+   86650     86660     86670     86680     86690     86700         
+
+     290       300       310       320       330       340         
+HUMBE- TTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGA
+        : :                                                        
+BACR48 CTATAGATGGGCGAATAGCACCAAAGACGAAGTTTTACGAAAAGCACCGAATTTACGTTG
+   86710     86720     86730     86740     86750     86760         
+
+>>BACR15O02 : AC009735, 159478 bases, from 3R:82.         (73982 nt)
+ initn: 179 init1: 179 opt: 217  Z-score: 106.3  bits: 39.0 E(): 0.63
+ 64.800% identity (66.393% ungapped) in 125 nt overlap (168-289:126836-126960)
+
+       140       150       160       170       180          190    
+HUMBE- AACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATA---ATTTATT
+                                     :::::  :: :: : :  ::   :: ::: 
+BACR15 TGTATATCAGAAAGTTCACGAAACCACAGATTATTAAATATTAATATCTATCTATATATA
+      126810    126820    126830    126840    126850    126860     
+
+          200       210       220       230       240       250    
+HUMBE- AGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTA
+        : ::  ::    : ::  : : : :::   :   ::  ::: :::::: : ::::::::
+BACR15 TGTATATATATATACATGTATATATACATATATGTATATATGTATATATGTATATATGTA
+      126870    126880    126890    126900    126910    126920     
+
+          260       270       280       290       300       310    
+HUMBE- TGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTT
+       : : :::: ::::  : :::: :::::::::::::                         
+BACR15 TATATGTATATATGTATATATGTATATATATATATACATATACATATATACATATGTATA
+      126930    126940    126950    126960    126970    126980     
+
+          320       330       340       350       360       370    
+HUMBE- TAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCC
+                                                                   
+BACR15 TTTATATATATGTGTGAATATGTGTAACTTAATAGGAAAATGAATTTAATAGAGAAACAA
+      126990    127000    127010    127020    127030    127040     
+
+>>BACR30G03 : AC008355, 151500 bases, from 3R:82.         (73982 nt)
+ initn: 179 init1: 179 opt: 217  Z-score: 106.3  bits: 39.0 E(): 0.63
+ 64.800% identity (66.393% ungapped) in 125 nt overlap (168-289:39251-39375)
+
+       140       150       160       170       180          190    
+HUMBE- AACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATA---ATTTATT
+                                     :::::  :: :: : :  ::   :: ::: 
+BACR30 TGTATATCAGAAAGTTCACGAAACCACAGATTATTAAATATTAATATCTATCTATATATA
+            39230     39240     39250     39260     39270     39280
+
+          200       210       220       230       240       250    
+HUMBE- AGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTA
+        : ::  ::    : ::  : : : :::   :   ::  ::: :::::: : ::::::::
+BACR30 TGTATATATATATACATGTATATATACATATATGTATATATGTATATATGTATATATGTA
+            39290     39300     39310     39320     39330     39340
+
+          260       270       280       290       300       310    
+HUMBE- TGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTT
+       : : :::: ::::  : :::: :::::::::::::                         
+BACR30 TATATGTATATATGTATATATGTATATATATATATACATATACATATATACATATGTATA
+            39350     39360     39370     39380     39390     39400
+
+          320       330       340       350       360       370    
+HUMBE- TAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCC
+                                                                   
+BACR30 TTTATATATATGTGTGAATATGTGTAACTTAATAGGAAAATGAATTTAATAGAGAAACAA
+            39410     39420     39430     39440     39450     39460
+
+>>BACR13P06 : AC008339, 180296 bases, from 2R:42.         (73982 nt)
+rev-comp initn:  90 init1:  90 opt: 216  Z-score: 105.6  bits: 38.9 E(): 0.69
+ 63.030% identity (65.409% ungapped) in 165 nt overlap (333-170:71421-71580)
+
+        360       350       340       330       320        310     
+HUMBE- ATGAAAACTCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGAT-TAAAACCTTCTG
+                                     ::: :: :   : : :: :: :  ::::: 
+BACR13 ATTTAAATATATCGTTTTTATTTTATTACACTTTTCATATATCGTATATATAGACTTCT-
+            71400     71410     71420     71430     71440          
+
+         300       290       280       270       260       250     
+HUMBE- GTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACA
+       :: :::         :::::::::::::::::: : ::::: :::: :: ::  ::::::
+BACR13 GTTAGATTTATGTTTATATATATATATATATATATATATATTTACA-ACTTAATTATACA
+   71450     71460     71470     71480     71490      71500        
+
+         240       230       220       210       200       190     
+HUMBE- TATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAATTAT
+         :::    :::  : :::   : :  :::: ::::     ::  ::  ::: :::::: 
+BACR13 ACTATGCAAATT-GTATGTGTATCTAGTTCTAAATTAATGTATTGAT--TAAGAAATTAG
+    71510     71520      71530     71540     71550       71560     
+
+         180       170       160       150       140       130     
+HUMBE- GTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTT
+          :::::::  :::                                             
+BACR13 TATTAAAAATGTAATCTTTGCGCCCTTACTTTTAGATTAAACTTTTGACAATTGGCCTGA
+       71570     71580     71590     71600     71610     71620     
+
+>>BACR22H11 : AC013431, 191558 bases, from X:13.          (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 216  Z-score: 105.6  bits: 38.9 E(): 0.69
+ 65.185% identity (67.176% ungapped) in 135 nt overlap (293-160:117009-117140)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : :::::::::::::::::: : :::::
+BACR22 TAATTAGTAATAATACGATTACACACACACACACATATATATATATATATATATATATAT
+   116980    116990    117000    117010    117020    117030        
+
+        260       250       240       230       220        210     
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAAT-TTGCT
+        :: : : ::: ::::: ::::::::  ::  :: : :   : :      :::: :  : 
+BACR22 ATATATATATATATATATATATATATTAATATATATATATATTTACCAAATAATATAACA
+   117040    117050    117060    117070    117080    117090        
+
+         200       190       180       170       160       150     
+HUMBE- CATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGC
+        ::  :::::  :::: ::  : ::  : :: :  ::::::  ::               
+BACR22 TATTTATGCT-CTAAAGTA--TATATGACTACAGCAAATACGCATTTTTTGATTTTTTGT
+   117100     117110      117120    117130    117140    117150     
+
+         140       130       120       110       100        90     
+HUMBE- ATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCA
+                                                                   
+BACR22 TCGTAGATAAGATAAGTTAATGTGAAAAGAATAACATGATTGCATACTAAAAAGTTCCAC
+      117160    117170    117180    117190    117200    117210     
+
+>>BACR25C18 : AC092401, 185200 bases, from X:13.          (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 216  Z-score: 105.6  bits: 38.9 E(): 0.69
+ 65.185% identity (67.176% ungapped) in 135 nt overlap (293-160:138279-138410)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : :::::::::::::::::: : :::::
+BACR25 TAATTAGTAATAATACGATTACACACACACACACATATATATATATATATATATATATAT
+   138250    138260    138270    138280    138290    138300        
+
+        260       250       240       230       220        210     
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAAT-TTGCT
+        :: : : ::: ::::: ::::::::  ::  :: : :   : :      :::: :  : 
+BACR25 ATATATATATATATATATATATATATTAATATATATATATATTTACCAAATAATATAACA
+   138310    138320    138330    138340    138350    138360        
+
+         200       190       180       170       160       150     
+HUMBE- CATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGC
+        ::  :::::  :::: ::  : ::  : :: :  ::::::  ::               
+BACR25 TATTTATGCT-CTAAAGTA--TATATGACTACAGCAAATACGCATTTTTTGATTTTTTGT
+   138370     138380      138390    138400    138410    138420     
+
+         140       130       120       110       100        90     
+HUMBE- ATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCA
+                                                                   
+BACR25 TCGTAGATAAGATAAGTTAATGTGAAAAGAATAACATGATTGCATACTAAAAAGTTCCAC
+      138430    138440    138450    138460    138470    138480     
+
+>>BACR25O02 : AC018490, 165098 bases, from X:17.          (73982 nt)
+ initn: 188 init1: 188 opt: 216  Z-score: 105.6  bits: 38.9 E(): 0.69
+ 62.585% identity (65.248% ungapped) in 147 nt overlap (153-293:33754-33900)
+
+            130       140       150       160            170       
+HUMBE- CAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTA-----TTTATTCTATT
+                                     ::: :: :: : ::     : :::  : ::
+BACR25 CGGAGCTTAGATTTGGCGCAGCAGACGACACACTTTAATGTATACAATGTATATGATCTT
+         33730     33740     33750     33760     33770     33780   
+
+       180       190       200       210        220       230      
+HUMBE- TTTAGACATAATTTATTAGCATGCATGAGCAAAT-TAAGAAAAACAACAACAAATGAATG
+       : :   :::::   :  :   : :::    ::::  :::  : :::   ::: ::  :: 
+BACR25 TCTGCGCATAAGAAACAATTTTCCATAGTTAAATCGAAGCTACACATACACATATATATA
+         33790     33800     33810     33820     33830     33840   
+
+        240       250       260       270       280       290      
+HUMBE- CATATATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCT
+         ::::::::::::: ::    : :: ::::: : :::::::::::::::::: : :   
+BACR25 TGTATATATGTATATATACACATATATATATATATATATATATATATATATATATATATA
+         33850     33860     33870     33880     33890     33900   
+
+        300       310       320       330       340       350      
+HUMBE- TTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAG
+                                                                   
+BACR25 TATATATATATACTCCCCCTCAAAGTCAAAAAGCAGTAATCAAGATCGAGATGAATATAG
+         33910     33920     33930     33940     33950     33960   
+
+>>BACR11C03 : AC008220, 186549 bases, from 3R:100.        (73982 nt)
+ initn: 209 init1: 209 opt: 216  Z-score: 105.6  bits: 38.9 E(): 0.69
+ 68.571% identity (69.231% ungapped) in 105 nt overlap (188-291:32153-32257)
+
+       160       170       180       190       200        210      
+HUMBE- TGATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCATGCATG-AGCAAATTAAGA
+                                     :: :::  : :  :: : :::::: :    
+BACR11 TGCGCGCTTAAGTGTTTAAAAATCGGAAAGATATATATGAACACAGGCAGCAAAGTGCAG
+          32130     32140     32150     32160     32170     32180  
+
+        220       230       240       250       260       270      
+HUMBE- AAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATA
+       :  :: : :  : ::  ::  :::::::: ::::: ::: : : :: ::::: : :::::
+BACR11 ACCACGAAAGTATATATATATATATATATATATATATATATATATATATATATATATATA
+          32190     32200     32210     32220     32230     32240  
+
+        280       290       300       310       320       330      
+HUMBE- TATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATA
+       ::::::::::::: :                                             
+BACR11 TATATATATATATATAAATATATATATATAGATAGGGGAGTAGGCGGTACAGAAGGTTAC
+          32250     32260     32270     32280     32290     32300  
+
+>>BACR06P08 : AC007975, 180630 bases, from 3R:100.        (38670 nt)
+ initn: 209 init1: 209 opt: 216  Z-score: 110.2  bits: 38.8 E(): 0.74
+ 68.571% identity (69.231% ungapped) in 105 nt overlap (188-291:154200-154304)
+
+       160       170       180       190       200        210      
+HUMBE- TGATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCATGCATG-AGCAAATTAAGA
+                                     :: :::  : :  :: : :::::: :    
+BACR06 TGCGCGCTTAAGTGTTTAAAAATCGGAAAGATATATATGAACACAGGCAGCAAAGTGCAG
+  154170    154180    154190    154200    154210    154220         
+
+        220       230       240       250       260       270      
+HUMBE- AAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATA
+       :  :: : :  : ::  ::  :::::::: ::::: ::: : : :: ::::: : :::::
+BACR06 ACCACGAAAGTATATATATATATATATATATATATATATATATATATATATATATATATA
+  154230    154240    154250    154260    154270    154280         
+
+        280       290       300       310       320       330      
+HUMBE- TATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATA
+       ::::::::::::: :                                             
+BACR06 TATATATATATATATAAATATATATATATAGATAGGGGAGTAGGCGGTACAGAAGGTTAC
+  154290    154300    154310    154320    154330    154340         
+
+>>BACR29F06 : AC008203, 174728 bases, from 3R:95.         (73982 nt)
+rev-comp initn: 198 init1: 198 opt: 215  Z-score: 105.0  bits: 38.7 E(): 0.75
+ 68.807% identity (70.093% ungapped) in 109 nt overlap (299-192:13760-13867)
+
+    330       320       310       300       290       280          
+HUMBE- TCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGT
+                                     ::::  : : :::::::::::::::::: :
+BACR29 AAACAAAAAAAAAGAAATAGTATAGAATGCAAAAATATATATATATATATATATATATAT
+   13730     13740     13750     13760     13770     13780         
+
+    270       260       250       240        230       220         
+HUMBE- GTATATGTACACACATACATATACATATATATGC-ATTCATTTGTTGTTGTTTTTCTTAA
+        ::::: :: : : ::: :: ::::::::::::: ::  :  : :   : : :: : : :
+BACR29 ATATATATATATATATATATGTACATATATATGCTATATAGATTTCTATATATTCCATTA
+   13790     13800     13810     13820     13830     13840         
+
+     210       200       190       180       170       160         
+HUMBE- TTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCT
+        ::: :::  :    ::::                                         
+BACR29 -TTGATCAGTCTGTGTAATCTCTCTCGCTAAAGAAACGCCTGATGCTGGATGCTAGAGGC
+    13850     13860     13870     13880     13890     13900        
+
+>>BACR48E23 : AC005721, 203183 bases, from 3R:88.         (73982 nt)
+rev-comp initn: 189 init1: 189 opt: 215  Z-score: 105.0  bits: 38.7 E(): 0.75
+ 64.085% identity (67.407% ungapped) in 142 nt overlap (325-187:64593-64730)
+
+          350       340       330       320       310        300   
+HUMBE- TCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTA-AGAAA
+                                     ::::::  ::::    :::   :: : : :
+BACR48 TTCGAAAATTTGCTTTAATTCATATATTTATATTTGCTTTAA----TTCATATATATATA
+          64570     64580     64590     64600         64610        
+
+           290       280       270       260       250       240   
+HUMBE- AGAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATG
+          : : :::::::::::::::::: : ::::: :  : : ::: ::::: :::  ::: 
+BACR48 TATATATATATATATATATATATATATATATATATGTATATATATATATATATAATTATC
+    64620     64630     64640     64650     64660     64670        
+
+           230       220         210       200       190       180 
+HUMBE- CATTCATTTGTTGTTGTTTTTC--TTAATTTGCTCATGCATGCTAATAAATTATGTCTAA
+        :     : ::: : :::::::  ::  :::    ::   ::::::::  ::        
+BACR48 AAAGTTATAGTTTTAGTTTTTCAGTTTTTTTCAATATAGTTGCTAATAGTTTTGCCGATA
+    64680     64690     64700     64710     64720     64730        
+
+             170       160       150       140       130       120 
+HUMBE- AAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGAA
+                                                                   
+BACR48 GTGCTACCTGTAGTTTCCACTTGCCATCTCTAAAAACGTGATAAACACGCTTGTTTTGTT
+    64740     64750     64760     64770     64780     64790        
+
+>>BACR22N03 : AC092397, 167201 bases, from 2L:26.         (73982 nt)
+rev-comp initn: 206 init1: 206 opt: 215  Z-score: 105.0  bits: 38.7 E(): 0.75
+ 74.684% identity (74.684% ungapped) in 79 nt overlap (311-233:100674-100752)
+
+      340       330       320       310       300       290        
+HUMBE- AAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATAT
+                                     ::  ::::::  : : ::: : ::::::::
+BACR22 CGCGTCTTGCCGCACTCAAAACTGCCACTGCCAGCTGGTAGCACATGAATATATATATAT
+        100650    100660    100670    100680    100690    100700   
+
+      280       270       260       250       240       230        
+HUMBE- ATATATATATGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTT
+       :::::::::: : ::::: :: : : ::: :::::   ::::::  :::           
+BACR22 ATATATATATATATATATATATATATATATATATAGGGATATATATATTTTCCTGATATC
+        100710    100720    100730    100740    100750    100760   
+
+      220       210       200       190       180       170        
+HUMBE- GTTTTTCTTAATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAA
+                                                                   
+BACR22 TAGCCACAGCGCGACATGAAAATTCAGCTACGGTCTCGTTCGACGATCGTCTCCCAGCGA
+        100770    100780    100790    100800    100810    100820   
+
+>>BACR08H11 : AC008136, 154890 bases, from 3R:88.         (73982 nt)
+rev-comp initn: 189 init1: 189 opt: 215  Z-score: 105.0  bits: 38.7 E(): 0.75
+ 64.085% identity (67.407% ungapped) in 142 nt overlap (325-187:86974-87111)
+
+          350       340       330       320       310        300   
+HUMBE- TCTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTA-AGAAA
+                                     ::::::  ::::    :::   :: : : :
+BACR08 TTCGAAAATTTGCTTTAATTCATATATTTATATTTGCTTTAA----TTCATATATATATA
+         86950     86960     86970     86980         86990         
+
+           290       280       270       260       250       240   
+HUMBE- AGAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATG
+          : : :::::::::::::::::: : ::::: :  : : ::: ::::: :::  ::: 
+BACR08 TATATATATATATATATATATATATATATATATATGTATATATATATATATATAATTATC
+   87000     87010     87020     87030     87040     87050         
+
+           230       220         210       200       190       180 
+HUMBE- CATTCATTTGTTGTTGTTTTTC--TTAATTTGCTCATGCATGCTAATAAATTATGTCTAA
+        :     : ::: : :::::::  ::  :::    ::   ::::::::  ::        
+BACR08 AAAGTTATAGTTTTAGTTTTTCAGTTTTTTTCAATATAGTTGCTAATAGTTTTGCCGATA
+   87060     87070     87080     87090     87100     87110         
+
+             170       160       150       140       130       120 
+HUMBE- AAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGAA
+                                                                   
+BACR08 GTGCTACCTGTAGTTTCCACTTGCCATCTCTAAAAACGTGATAAACACGCTTGTTTTGTT
+   87120     87130     87140     87150     87160     87170         
+
+>>BACR15J11 : AC008202, 186159 bases, from 3R:95.         (44199 nt)
+rev-comp initn: 198 init1: 198 opt: 215  Z-score: 108.6  bits: 38.7 E(): 0.79
+ 68.807% identity (70.093% ungapped) in 109 nt overlap (299-192:182666-182773)
+
+    330       320       310       300       290       280          
+HUMBE- TCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGT
+                                     ::::  : : :::::::::::::::::: :
+BACR15 AAACAAAAAAAAAGAAATAGTATAGAATGCAAAAATATATATATATATATATATATATAT
+      182640    182650    182660    182670    182680    182690     
+
+    270       260       250       240        230       220         
+HUMBE- GTATATGTACACACATACATATACATATATATGC-ATTCATTTGTTGTTGTTTTTCTTAA
+        ::::: :: : : ::: :: ::::::::::::: ::  :  : :   : : :: : : :
+BACR15 ATATATATATATATATATATGTACATATATATGCTATATAGATTTCTATATATTCCATTA
+      182700    182710    182720    182730    182740    182750     
+
+     210       200       190       180       170       160         
+HUMBE- TTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCT
+        ::: :::  :    ::::                                         
+BACR15 -TTGATCAGTCTGTGTAATCTCTCTCGCTAAAGAAACGCCTGATGCTGGATGCTAGAGGC
+       182760    182770    182780    182790    182800    182810    
+
+>>BACR14N05 : AC099033, 170299 bases, from 2R:55.         (73982 nt)
+ initn: 265 init1: 193 opt: 214  Z-score: 104.3  bits: 38.6 E(): 0.81
+ 79.412% identity (79.412% ungapped) in 68 nt overlap (234-301:116424-116491)
+
+           210       220       230       240       250       260   
+HUMBE- GAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTAC
+                                     :: ::::::: : ::::::::: : : :: 
+BACR14 AGGTAAAGGGGGTACAGAACTTATATGCACATCCATATATTTATATATGTATATATATAT
+        116400    116410    116420    116430    116440    116450   
+
+           270       280       290       300       310       320   
+HUMBE- ATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAA
+       ::::: : :::::::::::::::::: : :  :::: :                      
+BACR14 ATATATATATATATATATATATATATATGTGTTTTTGTGGAAGGACTAAGGCGCGGTTCA
+        116460    116470    116480    116490    116500    116510   
+
+           330       340       350       360       370       380   
+HUMBE- TAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTAT
+                                                                   
+BACR14 CCGATTGCGATGAACTATGGCATGGAAAGCTGCTCCTCTTTCTCTATTCATAAAACCATT
+        116520    116530    116540    116550    116560    116570   
+
+>>BACR48A20 : AC011072, 166158 bases, from 2R:55.         (73982 nt)
+ initn: 265 init1: 193 opt: 214  Z-score: 104.3  bits: 38.6 E(): 0.81
+ 79.412% identity (79.412% ungapped) in 68 nt overlap (234-301:26695-26762)
+
+           210       220       230       240       250       260   
+HUMBE- GAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTAC
+                                     :: ::::::: : ::::::::: : : :: 
+BACR48 AGGTAAAGGGGGTACAGAACTTATATGCACATCCATATATTTATATATGTATATATATAT
+        26670     26680     26690     26700     26710     26720    
+
+           270       280       290       300       310       320   
+HUMBE- ATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAA
+       ::::: : :::::::::::::::::: : :  :::: :                      
+BACR48 ATATATATATATATATATATATATATATGTGTTTTTGTGGAAGGACTAAGGCGCGGTTCA
+        26730     26740     26750     26760     26770     26780    
+
+           330       340       350       360       370       380   
+HUMBE- TAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCTGTAAGTAT
+                                                                   
+BACR48 CCGATTGCGATGAACTATGGCATGGAAAGCTGCTCCTCTTTCTCTATTCATAAAACCATT
+        26790     26800     26810     26820     26830     26840    
+
+>>BACR19D23 : AC095014, 170801 bases, from 3R:84.         (73982 nt)
+rev-comp initn: 131 init1: 131 opt: 214  Z-score: 104.3  bits: 38.6 E(): 0.81
+ 64.773% identity (69.091% ungapped) in 176 nt overlap (324-154:100627-100796)
+
+         350       340       330       320       310       300     
+HUMBE- CTACCTCAGTTCTAAGCATATCTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAG
+                                     :: : :: :::::  :  : :: : ::  :
+BACR19 GGCATCCTCTGACCTTAAAATACTATAAAGATATTGA-TAAAATATAAT-GTTATAAGCG
+     100600    100610    100620    100630     100640     100650    
+
+         290       280       270       260       250       240     
+HUMBE- AAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATGCA
+       ::: :: :::::: :::: :::: : ::::: :: : : ::::::::: ::::::    :
+BACR19 AAATAAAATATAT-TATA-ATATATATATATATATATATATACATATATATATATTCTAA
+       100660     100670     100680    100690    100700    100710  
+
+         230         220       210       200         190           
+HUMBE- TTCATTTGT--TGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAA--ATTAT-GTCTA
+       :  : :: :  : :: :: :  :  :: :  : ::  ::  : ::::  :: ::  : ::
+BACR19 TATAATTATAATATTATTATAATATATATAATAAT-TATTATTATAATTATAATAATATA
+         100720    100730    100740     100750    100760    100770 
+
+    180       170       160       150       140       130          
+HUMBE- AAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGA
+       : :::: ::::: :: ::: :  :::                                  
+BACR19 ATAATA-AATAATTAGAAAACCTTGTTAGAAAGAAACCATTTATTAAAAATGTTGTCGAA
+           100780    100790    100800    100810    100820    100830
+
+>>BACR19J06 : AC010581, 175053 bases, from 3R:85.         (73982 nt)
+ initn: 188 init1: 188 opt: 214  Z-score: 104.3  bits: 38.6 E(): 0.81
+ 72.093% identity (72.093% ungapped) in 86 nt overlap (208-293:63609-63694)
+
+       180       190       200       210       220       230       
+HUMBE- TTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGC
+                                     :::: :  :: :: :   : : :  :::  
+BACR19 TCGTGCTTATGAAAATAGTCGAATCTATATAAATAATTAATAATATATATATAAAAATAT
+    63580     63590     63600     63610     63620     63630        
+
+       240       250       260       270       280       290       
+HUMBE- ATATATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTT
+       :::::::: ::::: ::: : : :: ::::: : :::::::::::::::: ::: :    
+BACR19 ATATATATATATATATATATATATATATATATATATATATATATATATATGTTTATAAAC
+    63640     63650     63660     63670     63680     63690        
+
+       300       310       320       330       340       350       
+HUMBE- TTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGT
+                                                                   
+BACR19 TTCCTCAACATGTAATTCTTGGAATACTTTATCAGTTAGCGGATTCATTTTTTGATAAGT
+    63700     63710     63720     63730     63740     63750        
+
+>>BACR11B14 : 174999 bases, from 2R:41.                   (73982 nt)
+rev-comp initn: 103 init1: 103 opt: 213  Z-score: 103.7  bits: 38.5 E(): 0.88
+ 62.500% identity (65.217% ungapped) in 168 nt overlap (303-141:116881-117046)
+
+        330       320       310       300       290         280    
+HUMBE- CTTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATAT--ATATATATAT
+                                     :: : : : : :: ::::  ::::::  ::
+BACR11 GGCGGACGAGTTGACTCAGCTGTTGATACTTATGTATATATAATATATTAATATATTAAT
+           116860    116870    116880    116890    116900    116910
+
+          270       260       250       240       230       220    
+HUMBE- ATATGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTT
+       : :::: ::::: :  : ::::: :::::  :::: ::  ::   : : :: :: : : :
+BACR11 ACATGTATATATATTAATACATATATATATCTATACATAAAT--GTATATTTTTATATAT
+           116920    116930    116940    116950      116960        
+
+          210       200       190         180        170       160 
+HUMBE- CTTAATTTGCTCATGCATGCTAATAAAT-TATGTCTA-AAAATAGA-ATAAATACAAATC
+        : ::: :    :  ::   ::::: :: ::: : :: :: ::: : ::  ::: : :: 
+BACR11 TTAAATGTATATAAACAAATTAATATATATATTTATATAATATATATATGCATATATATA
+   116970    116980    116990    117000    117010    117020        
+
+             150       140       130       120       110       100 
+HUMBE- AATGTGCTCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGA
+       :   ::   ::: :::::                                          
+BACR11 ATACTGAAATGTACATTAAATCTCAATCCCTAAGTTGCTCCCTCTGCAAGGCTAAACAAA
+   117030    117040    117050    117060    117070    117080        
+
+>>BACR06P07 : 187793 bases, from 2R:41.                   (45833 nt)
+ initn: 103 init1: 103 opt: 213  Z-score: 107.0  bits: 38.4 E(): 0.93
+ 62.500% identity (65.217% ungapped) in 168 nt overlap (141-303:178570-178735)
+
+              120       130       140       150       160       170
+HUMBE- CCTCTTGTTTCCCAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTA
+                                     ::::: :::   ::   : :: : :::  :
+BACR06 TGCAGAGGGAGCAACTTAGGGATTGAGATTTAATGTACATTTCAGTATTATATATATGCA
+  178540    178550    178560    178570    178580    178590         
+
+                180        190       200         210       220     
+HUMBE- T-TCTAT-TTTTAGACATA-ATTTATTAGCATG--CATGAGCAAATTAAGAAAAACAACA
+       : : ::: :: :: : ::: :: :::::   ::   ::   ::  : :: : : : ::  
+BACR06 TATATATATTATATAAATATATATATTAATTTGTTTATATACATTTAAATATATAAAAAT
+  178600    178610    178620    178630    178640    178650         
+
+         230       240       250       260       270       280     
+HUMBE- ACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATAT
+       : : ::  ::: ::: :::: ::::::::  :   :: :::::::  :::  ::::::  
+BACR06 ATACATTTATGTATAGATATATATATGTA--TTAATATATATACATGTATTAATATATTA
+  178660    178670    178680      178690    178700    178710       
+
+         290       300       310       320       330       340     
+HUMBE- ATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAA
+       :::: :: : : : : ::                                          
+BACR06 ATATATTATATATACATAAGTATCAACAGCTGAGTCAACTCGTCCGCCTGTTTTTACCTA
+    178720    178730    178740    178750    178760    178770       
+
+>>BACR19J06 : AC010581, 175053 bases, from 3R:85.         (73982 nt)
+rev-comp initn: 208 init1: 208 opt: 212  Z-score: 103.1  bits: 38.4 E(): 0.96
+ 81.250% identity (81.250% ungapped) in 64 nt overlap (293-230:63631-63694)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     :::::::::::::::::::::: : :::::
+BACR19 ATCTATATAAATAATTAATAATATATATATAAAAATATATATATATATATATATATATAT
+            63610     63620     63630     63640     63650     63660
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTC
+        :: : : ::: ::::: ::::::::   :: ::                          
+BACR19 ATATATATATATATATATATATATATATGTTTATAAACTTCCTCAACATGTAATTCTTGG
+            63670     63680     63690     63700     63710     63720
+
+        200       190       180       170       160       150      
+HUMBE- ATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCA
+                                                                   
+BACR19 AATACTTTATCAGTTAGCGGATTCATTTTTTGATAAGTGCACTTAGGACCCGAAACTGGG
+            63730     63740     63750     63760     63770     63780
+
+>>BACR25C18 : AC092401, 185200 bases, from X:13.          (73982 nt)
+ initn: 198 init1: 198 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 70.968% identity (74.157% ungapped) in 93 nt overlap (202-290:138243-138335)
+
+             180       190       200       210       220           
+HUMBE- TCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACA----AC
+                                     :: : : :::::  :: :: :  :    ::
+BACR25 GTATTCTGAATGTAAAACAGCCTAATTAAAATTATCTAATTAGTAATAATACGATTACAC
+         138220    138230    138240    138250    138260    138270  
+
+       230       240       250       260       270       280       
+HUMBE- AAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATAT
+       : :   :  ::::::::: ::::: ::: : : :: ::::: : ::::::::::::::::
+BACR25 ACACACACACATATATATATATATATATATATATATATATATATATATATATATATATAT
+         138280    138290    138300    138310    138320    138330  
+
+       290       300       310       320       330       340       
+HUMBE- ATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACT
+       :::                                                         
+BACR25 ATTAATATATATATATATTTACCAAATAATATAACATATTTATGCTCTAAAGTATATATG
+         138340    138350    138360    138370    138380    138390  
+
+>>BACR22N03 : AC092397, 167201 bases, from 2L:26.         (73982 nt)
+ initn: 183 init1: 183 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 77.333% identity (79.452% ungapped) in 75 nt overlap (225-297:100683-100757)
+
+          200       210       220       230       240       250    
+HUMBE- AGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTA
+                                     : :: ::::::  :::::::: ::::: ::
+BACR22 CCGCACTCAAAACTGCCACTGCCAGCTGGTAGCACATGAATATATATATATATATATATA
+         100660    100670    100680    100690    100700    100710  
+
+          260       270       280         290       300       310  
+HUMBE- TGTGTGTACATATACACATATATATA--TATATATATTTTTTCTTTTCTTACCAGAAGGT
+       : : : :: ::::: : :::::::::   :::::::: ::::: :               
+BACR22 TATATATATATATATATATATATATAGGGATATATATATTTTCCTGATATCTAGCCACAG
+         100720    100730    100740    100750    100760    100770  
+
+            320       330       340       350       360       370  
+HUMBE- TTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGT
+                                                                   
+BACR22 CGCGACATGAAAATTCAGCTACGGTCTCGTTCGACGATCGTCTCCCAGCGAACTGAAATC
+         100780    100790    100800    100810    100820    100830  
+
+>>BACR03L12 : AC008357, 190672 bases, from 3R:86.         (73982 nt)
+rev-comp initn: 147 init1: 147 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 65.217% identity (67.669% ungapped) in 138 nt overlap (302-165:5957-6089)
+
+       330       320       310       300       290       280       
+HUMBE- TTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATA
+                                     :: :::   : :::::::::::::::::::
+BACR03 ATATGTGTGCCAGCCCAACCAGCACACATAAATAAATATATAAATATATATATATATATA
+       5930      5940      5950      5960      5970      5980      
+
+       270       260       250       240       230       220       
+HUMBE- TGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTT
+       : : ::::::::::   :::  :: : :::::: : :::  :: : : : :  : :: : 
+BACR03 TATATATATGTACATGTATAGGTA-AAATATATGTACAT--ATGTATGGGTTATATTGTG
+       5990      6000      6010       6020        6030      6040   
+
+       210       200       190       180       170       160       
+HUMBE- AATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTG
+         ::: :    ::  ::  : ::: :::   : ::::::   :::: :            
+BACR03 GCTTTCCCGGCGC-GGCATAAAAAGTATAAAT-AAAATAATTTAAAAAGGGCCAACGATT
+          6050       6060      6070       6080      6090      6100 
+
+       150       140       130       120       110       100       
+HUMBE- CTCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTC
+                                                                   
+BACR03 CGAAGGGGGTTAAAACTTTTCCCGAGCCCTGCTCCTCCACTTACCCTTTCTCACGATTTT
+            6110      6120      6130      6140      6150      6160 
+
+>>BACR10E07 : AC012675, 176818 bases, from 2L:27.         (73982 nt)
+ initn: 202 init1: 202 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 65.000% identity (65.546% ungapped) in 120 nt overlap (169-288:46142-46260)
+
+      140       150       160       170       180       190        
+HUMBE- ACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCA
+                                     :::::  :::  ::    ::::::  : : 
+BACR10 CGATGAACTTCAGTCAATGCAAATGGAACCTATTC-GTTTCAAGGTGGAATTTAAAAACT
+           46120     46130     46140      46150     46160     46170
+
+      200       210       220       230       240       250        
+HUMBE- TGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTG
+           : : :::  :  :::::::::::   :  ::::    :::: : ::: :::: :::
+BACR10 AAGGTAAACAAGATGGGAAAAACAACACAGATCGAATAATCATATTTATATCTGTAGGTG
+            46180     46190     46200     46210     46220     46230
+
+      260       270       280       290       300       310        
+HUMBE- TGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAAT
+       ::::::::::    ::: :::::::: : :                              
+BACR10 TGTACATATATGTGTATGTATATATAAACAAGCCCTGATGAAAACGTATTGACGATGCAA
+            46240     46250     46260     46270     46280     46290
+
+>>BACR22H11 : AC013431, 191558 bases, from X:13.          (73982 nt)
+ initn: 198 init1: 198 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 70.968% identity (74.157% ungapped) in 93 nt overlap (202-290:116973-117065)
+
+             180       190       200       210       220           
+HUMBE- TCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACA----AC
+                                     :: : : :::::  :: :: :  :    ::
+BACR22 GTATTCTGAATGTAAAACAGCCTAATTAAAATTATCTAATTAGTAATAATACGATTACAC
+         116950    116960    116970    116980    116990    117000  
+
+       230       240       250       260       270       280       
+HUMBE- AAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATATATATATATATAT
+       : :   :  ::::::::: ::::: ::: : : :: ::::: : ::::::::::::::::
+BACR22 ACACACACACATATATATATATATATATATATATATATATATATATATATATATATATAT
+         117010    117020    117030    117040    117050    117060  
+
+       290       300       310       320       330       340       
+HUMBE- ATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACT
+       :::                                                         
+BACR22 ATTAATATATATATATATTTACCAAATAATATAACATATTTATGCTCTAAAGTATATATG
+         117070    117080    117090    117100    117110    117120  
+
+>>BACR19I14 : AC095013, 175648 bases, from 3R:83.         (73982 nt)
+ initn: 251 init1: 188 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 71.930% identity (79.612% ungapped) in 114 nt overlap (187-293:87497-87606)
+
+        160       170       180       190       200       210      
+HUMBE- TTGATTTGTATTTATTCTATTTTTAGACATAATTTATTAGCATGCATGAGCAAATTAAGA
+                                     :: ::::  ::::   ::::    ::  : 
+BACR19 CAAGTCCCCCATGAAAATGGAGAAAAAATGAAATTATGCGCAT---TGAG-TTGTTCGGC
+      87470     87480     87490     87500        87510      87520  
+
+        220       230            240         250       260         
+HUMBE- AAAACAACAACAAA--TGAA---TGCATATATAT-GT-ATATGTATGTGTGTACATATAC
+       ::: : : ::::::  ::::    : :::::::: :: :::::::::: : :: ::::: 
+BACR19 AAACCCAAAACAAAGGTGAAGCCAGGATATATATAGTCATATGTATGTATATATATATAT
+          87530     87540     87550     87560     87570     87580  
+
+     270       280       290       300       310       320         
+HUMBE- ACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGA
+       : :::::::::::::::::: : :                                    
+BACR19 ATATATATATATATATATATATATAGAGCTGCTTGTTTGGGTCCTACCAGATTTTTTTAT
+          87590     87600     87610     87620     87630     87640  
+
+>>BACR11C03 : AC008220, 186549 bases, from 3R:100.        (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 78.261% identity (78.261% ungapped) in 69 nt overlap (298-230:32189-32257)
+
+             320       310       300       290       280       270 
+HUMBE- CCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTG
+                                     :::: : : :::::::::::::::::: : 
+BACR11 ATGAACACAGGCAGCAAAGTGCAGACCACGAAAGTATATATATATATATATATATATATA
+    32160     32170     32180     32190     32200     32210        
+
+             260       250       240       230       220       210 
+HUMBE- TATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATT
+       ::::: :: : : ::: ::::: ::::::::  ::  ::                     
+BACR11 TATATATATATATATATATATATATATATATATATATATAAATATATATATATAGATAGG
+    32220     32230     32240     32250     32260     32270        
+
+             200       190       180       170       160       150 
+HUMBE- TGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCT
+                                                                   
+BACR11 GGAGTAGGCGGTACAGAAGGTTACATAATATAACATAGGAGGGCCTTATAAGTCAGAGCG
+    32280     32290     32300     32310     32320     32330        
+
+>>BACR23O04 : AC008316, 160817 bases, from 3R:85.         (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 70.968% identity (71.739% ungapped) in 93 nt overlap (291-200:36477-36569)
+
+      320       310       300       290       280       270        
+HUMBE- TGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGT
+                                     : :::::::::::::::::: : ::::: :
+BACR23 AGTAGTTAGTATGTTGGTTCGCATATATGTATATATATATATATATATATATATATATAT
+      36450     36460     36470     36480     36490     36500      
+
+      260       250       240        230       220       210       
+HUMBE- ACACACATACATATACATATATATGCATTCAT-TTGTTGTTGTTTTTCTTAATTTGCTCA
+       : : : ::: ::::: ::::::::  ::  :: : ::: : :    :  :: :: : :::
+BACR23 ATATATATATATATATATATATATATATATATATAGTTTTAGAAAGTAGTAGTTAGTTCA
+      36510     36520     36530     36540     36550     36560      
+
+       200       190       180       170       160       150       
+HUMBE- TGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCAT
+       : :                                                         
+BACR23 TCCGGCTCGGCGGCTTAAAGTTCTTGTAAGTAGATGGGTTTGTTACGCTTGCGTTTGCCA
+      36570     36580     36590     36600     36610     36620      
+
+>>BACR28B01 : AC008139, 170675 bases, from 3R:85.         (73982 nt)
+rev-comp initn: 219 init1: 147 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 65.217% identity (67.669% ungapped) in 138 nt overlap (302-165:142757-142889)
+
+       330       320       310       300       290       280       
+HUMBE- TTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATA
+                                     :: :::   : :::::::::::::::::::
+BACR28 ATATGTGTGCCAGCCCAACCAGCACACATAAATAAATATATAAATATATATATATATATA
+     142730    142740    142750    142760    142770    142780      
+
+       270       260       250       240       230       220       
+HUMBE- TGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTT
+       : : ::::::::::   :::  :: : :::::: : :::  :: : : : :  : :: : 
+BACR28 TATATATATGTACATGTATAGGTA-AAATATATGTACAT--ATGTATGGGTTATATTGTG
+     142790    142800    142810     142820      142830    142840   
+
+       210       200       190       180       170       160       
+HUMBE- AATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTG
+         ::: :    ::  ::  : ::: :::   : ::::::   :::: :            
+BACR28 GCTTTCCCGGCGC-GGCATAAAAAGTATAAAT-AAAATAATTTAAAAAGGGCCAACGATT
+        142850     142860    142870     142880    142890    142900 
+
+       150       140       130       120       110       100       
+HUMBE- CTCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTC
+                                                                   
+BACR28 CGAAGGGGGTTAAAACTTTTCCCGAGCCCTGCTCCTCCACTTACCCTTTCTCACGATTTT
+          142910    142920    142930    142940    142950    142960 
+
+>>BACR30F01 : AC008315, 177028 bases, from 3R:85.         (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 210  Z-score: 101.8  bits: 38.1 E():  1.1
+ 70.968% identity (71.739% ungapped) in 93 nt overlap (291-200:143630-143722)
+
+      320       310       300       290       280       270        
+HUMBE- TGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGT
+                                     : :::::::::::::::::: : ::::: :
+BACR30 AGTAGTTAGTATGTTGGTTCGCATATATGTATATATATATATATATATATATATATATAT
+  143600    143610    143620    143630    143640    143650         
+
+      260       250       240        230       220       210       
+HUMBE- ACACACATACATATACATATATATGCATTCAT-TTGTTGTTGTTTTTCTTAATTTGCTCA
+       : : : ::: ::::: ::::::::  ::  :: : ::: : :    :  :: :: : :::
+BACR30 ATATATATATATATATATATATATATATATATATAGTTTTAGAAAGTAGTAGTTAGTTCA
+  143660    143670    143680    143690    143700    143710         
+
+       200       190       180       170       160       150       
+HUMBE- TGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCAT
+       : :                                                         
+BACR30 TCCGGCTCGGCGGCTTAAAGTTCTTGTAAGTAGATGGGTTTGTTACGCTTGCGTTTGCCA
+  143720    143730    143740    143750    143760    143770         
+
+>>BACR06P08 : AC007975, 180630 bases, from 3R:100.        (38670 nt)
+rev-comp initn: 188 init1: 188 opt: 210  Z-score: 106.3  bits: 38.0 E():  1.2
+ 78.261% identity (78.261% ungapped) in 69 nt overlap (298-230:154236-154304)
+
+             320       310       300       290       280       270 
+HUMBE- CCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTG
+                                     :::: : : :::::::::::::::::: : 
+BACR06 ATGAACACAGGCAGCAAAGTGCAGACCACGAAAGTATATATATATATATATATATATATA
+      154210    154220    154230    154240    154250    154260     
+
+             260       250       240       230       220       210 
+HUMBE- TATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATT
+       ::::: :: : : ::: ::::: ::::::::  ::  ::                     
+BACR06 TATATATATATATATATATATATATATATATATATATATAAATATATATATATAGATAGG
+      154270    154280    154290    154300    154310    154320     
+
+             200       190       180       170       160       150 
+HUMBE- TGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCT
+                                                                   
+BACR06 GGAGTAGGCGGTACAGAAGGTTACATAATATAACATAGGAGGGCCTTATAAGTCAGAGCG
+      154330    154340    154350    154360    154370    154380     
+
+>>BACR30F01 : AC008315, 177028 bases, from 3R:85.         (35068 nt)
+rev-comp initn: 188 init1: 188 opt: 210  Z-score: 107.0  bits: 38.0 E():  1.2
+ 70.968% identity (71.739% ungapped) in 93 nt overlap (291-200:143630-143722)
+
+      320       310       300       290       280       270        
+HUMBE- TGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGT
+                                     : :::::::::::::::::: : ::::: :
+BACR30 AGTAGTTAGTATGTTGGTTCGCATATATGTATATATATATATATATATATATATATATAT
+  143600    143610    143620    143630    143640    143650         
+
+      260       250       240        230       220       210       
+HUMBE- ACACACATACATATACATATATATGCATTCAT-TTGTTGTTGTTTTTCTTAATTTGCTCA
+       : : : ::: ::::: ::::::::  ::  :: : ::: : :    :  :: :: : :::
+BACR30 ATATATATATATATATATATATATATATATATATAGTTTTAGAAAGTAGTAGTTAGTTCA
+  143660    143670    143680    143690    143700    143710         
+
+       200       190       180       170       160       150       
+HUMBE- TGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCAT
+       : :                                                         
+BACR30 TCCGGCTCGGCGGCTTAAAGTTCTTGTAAGTAGATGGGTTTGTTACGCTTGCGTTTGCCA
+  143720    143730    143740    143750    143760    143770         
+
+>>BACR30G03 : AC008355, 151500 bases, from 3R:82.         (73982 nt)
+rev-comp initn: 174 init1: 174 opt: 209  Z-score: 101.1  bits: 38.0 E():  1.2
+ 59.242% identity (61.275% ungapped) in 211 nt overlap (396-188:39228-39433)
+
+           420       410       400       390       380       370   
+HUMBE- GATATGTAGATGGATCTCTTCCTGCGTCTCCAGAATATGCAAAATACTTACAGGACAGAA
+                                     :::::  : ::  : ::  ::::   :  :
+BACR30 ATAGTATTGAAAATGTAATCATATGTATATCAGAAAGTTCACGAAAC-CACAGATTATTA
+     39200     39210     39220     39230     39240      39250      
+
+           360       350       340        330       320       310  
+HUMBE- TGGATGAAAACTCTACCTCAGTTCTAAGCATATCT-TCTCCTTATTTGGATTAAAACCTT
+          :: :: : :::: :: :  : :: : :::: : : : : : : :  ::  : :  : 
+BACR30 AATATTAATA-TCTATCT-ATATATATGTATATATATATACATGTATATATACATATATG
+      39260      39270      39280     39290     39300     39310    
+
+            300        290       280       270       260       250 
+HUMBE- CTGGTAAGAAAAGA-AAAAATATATATATATATATATGTGTATATGTACACACATACATA
+           :: : : : :   : :::: ::::::: ::::: ::::::: :  : : ::: :::
+BACR30 TATATATGTATATATGTATATATGTATATATGTATATATGTATATATGTATATATATATA
+        39320     39330     39340     39350     39360     39370    
+
+             240       230       220       210       200       190 
+HUMBE- TACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAA
+       :::::::: ::  :: ::: :::   : ::: : :  :: ::   ::   ::  : : ::
+BACR30 TACATATACATATATACATATGT--ATATTTATATATATGTGTGAATATGTGTAACTTAA
+        39380     39390       39400     39410     39420     39430  
+
+             180       170       160       150       140       130 
+HUMBE- TTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAG
+       :                                                           
+BACR30 TAGGAAAATGAATTTAATAGAGAAACAACAGACAAAAATTATAAATAATGTTGAAAAGAA
+          39440     39450     39460     39470     39480     39490  
+
+>>BACR15O02 : AC009735, 159478 bases, from 3R:82.         (73982 nt)
+rev-comp initn: 174 init1: 174 opt: 209  Z-score: 101.1  bits: 38.0 E():  1.2
+ 59.242% identity (61.275% ungapped) in 211 nt overlap (396-188:126813-127018)
+
+           420       410       400       390       380       370   
+HUMBE- GATATGTAGATGGATCTCTTCCTGCGTCTCCAGAATATGCAAAATACTTACAGGACAGAA
+                                     :::::  : ::  : ::  ::::   :  :
+BACR15 ATAGTATTGAAAATGTAATCATATGTATATCAGAAAGTTCACGAAAC-CACAGATTATTA
+         126790    126800    126810    126820     126830    126840 
+
+           360       350       340        330       320       310  
+HUMBE- TGGATGAAAACTCTACCTCAGTTCTAAGCATATCT-TCTCCTTATTTGGATTAAAACCTT
+          :: :: : :::: :: :  : :: : :::: : : : : : : :  ::  : :  : 
+BACR15 AATATTAATA-TCTATCT-ATATATATGTATATATATATACATGTATATATACATATATG
+          126850      126860    126870    126880    126890         
+
+            300        290       280       270       260       250 
+HUMBE- CTGGTAAGAAAAGA-AAAAATATATATATATATATATGTGTATATGTACACACATACATA
+           :: : : : :   : :::: ::::::: ::::: ::::::: :  : : ::: :::
+BACR15 TATATATGTATATATGTATATATGTATATATGTATATATGTATATATGTATATATATATA
+  126900    126910    126920    126930    126940    126950         
+
+             240       230       220       210       200       190 
+HUMBE- TACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAA
+       :::::::: ::  :: ::: :::   : ::: : :  :: ::   ::   ::  : : ::
+BACR15 TACATATACATATATACATATGT--ATATTTATATATATGTGTGAATATGTGTAACTTAA
+  126960    126970    126980      126990    127000    127010       
+
+             180       170       160       150       140       130 
+HUMBE- TTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTATTAG
+       :                                                           
+BACR15 TAGGAAAATGAATTTAATAGAGAAACAACAGACAAAAATTATAAATAATGTTGAAAAGAA
+    127020    127030    127040    127050    127060    127070       
+
+>>BACR21B19 : AC011250, 161692 bases, from 4:101.         (73982 nt)
+rev-comp initn: 172 init1: 172 opt: 209  Z-score: 101.1  bits: 38.0 E():  1.2
+ 59.641% identity (63.333% ungapped) in 223 nt overlap (393-180:53098-53316)
+
+        420       410       400       390       380       370      
+HUMBE- ATGTAGATGGATCTCTTCCTGCGTCTCCAGAATATGCAAAATACTTACAGGACAGAATGG
+                                     :::::  : ::   : :::  : ::::   
+BACR21 AGTTTAAATGCAGCCAGAAATGATCAATTAAATATAAATAAAGGTCACATAAAAGAAAAA
+     53070     53080     53090     53100     53110     53120       
+
+        360       350          340       330         320       310 
+HUMBE- ATGAAAACTCTACCT---CAGTTCTAAGCATATCTTCTCCTT--ATTTGGATTAAAACCT
+           :::::  :  :   :: :: : ::: : :    :: ::  : ::: : ::: :  :
+BACR21 TAATAAACTTCATTTATACATTTAT-AGCTTTTTAAATCTTTACAATTGCA-TAATATAT
+     53130     53140     53150      53160     53170      53180     
+
+             300        290       280       270       260          
+HUMBE- TCTGGTAAGAAAAGA-AAAAATATATATATATATATATGTGTATATGTACACACATACAT
+            ::: :: : : : : :::::::::::::::::: : ::::: :: : : ::: ::
+BACR21 ATATATAATAACATATATATATATATATATATATATATATATATATATATATATATATAT
+       53190     53200     53210     53220     53230     53240     
+
+    250       240       230       220          210       200       
+HUMBE- ATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAAT---TTGCTCATGCATGCTAA
+       ::::::: ::::  :  ::  :::   : ::: : :  ::   ::  : ::  ::  :::
+BACR21 ATACATAAATATATA-GCAGATGTATATATTTGTATACATCTATTATTTATTTAT-ATAA
+       53250     53260      53270     53280     53290      53300   
+
+       190       180       170       160       150       140       
+HUMBE- TAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCATTAGTTACTTA
+       : : : :: : ::                                               
+BACR21 TCATTCATATATATTATATGTTCACTGATACATAAGCCATTCAGATCGGAATAAAATTGT
+         53310     53320     53330     53340     53350     53360   
+
+>>BACR28B01 : AC008139, 170675 bases, from 3R:85.         (28715 nt)
+rev-comp initn: 147 init1: 147 opt: 210  Z-score: 108.4  bits: 38.0 E():  1.2
+ 65.217% identity (67.669% ungapped) in 138 nt overlap (302-165:142757-142889)
+
+       330       320       310       300       290       280       
+HUMBE- TTCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATA
+                                     :: :::   : :::::::::::::::::::
+BACR28 ATATGTGTGCCAGCCCAACCAGCACACATAAATAAATATATAAATATATATATATATATA
+     142730    142740    142750    142760    142770    142780      
+
+       270       260       250       240       230       220       
+HUMBE- TGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTT
+       : : ::::::::::   :::  :: : :::::: : :::  :: : : : :  : :: : 
+BACR28 TATATATATGTACATGTATAGGTA-AAATATATGTACAT--ATGTATGGGTTATATTGTG
+     142790    142800    142810     142820      142830    142840   
+
+       210       200       190       180       170       160       
+HUMBE- AATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTG
+         ::: :    ::  ::  : ::: :::   : ::::::   :::: :            
+BACR28 GCTTTCCCGGCGC-GGCATAAAAAGTATAAAT-AAAATAATTTAAAAAGGGCCAACGATT
+        142850     142860    142870     142880    142890    142900 
+
+       150       140       130       120       110       100       
+HUMBE- CTCTGTGCATTAGTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTC
+                                                                   
+BACR28 CGAAGGGGGTTAAAACTTTTCCCGAGCCCTGCTCCTCCACTTACCCTTTCTCACGATTTT
+          142910    142920    142930    142940    142950    142960 
+
+>>BACR10P11 : AC007888, 164035 bases, from 2R:57.         (73982 nt)
+ initn: 139 init1: 139 opt: 208  Z-score: 100.5  bits: 37.9 E():  1.3
+ 62.570% identity (67.879% ungapped) in 179 nt overlap (166-337:41478-41649)
+
+         140       150       160       170        180       190    
+HUMBE- GTAACTAATGCACAGAGCACATTGATTTGTATTTATTCT-ATTTTTAGACATAATTTAT-
+                                     :::::   : :::  :: : : ::: ::: 
+BACR10 GAAAAATTGTATCCTTACTTCATAGCAAAAATTTAAAATAATTCATATATAAAATATATA
+     41450     41460     41470     41480     41490     41500       
+
+             200       210       220       230       240       250 
+HUMBE- --TAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATAT
+         ::  ::  :  :: :: :::  : : : :   : : ::  ::  :::::::: :::::
+BACR10 TATATAAT-AAACAGAAATTTATTATATATA--TATATATATATATATATATAT-TATAT
+     41510      41520     41530       41540     41550      41560   
+
+             260       270       280       290       300       310 
+HUMBE- GTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGG
+        :::   : :: ::::: : ::::::::::::::::::: : : : :: :    : :   
+BACR10 ATAT---TATATATATATATATATATATATATATATATTATATATATTATATATATATAT
+            41570     41580     41590     41600     41610     41620
+
+             320          330       340       350       360        
+HUMBE- TTTTAATCCAAATA---AGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATT
+        :::::   :::::   : ::: : ::::                               
+BACR10 ATTTAACAGAAATAGATAAGAGTAAATATTTCATGAAATCGTTGATAGGAAATCAGGAAT
+            41630     41640     41650     41660     41670     41680
+
+>>BACR48E23 : AC005721, 203183 bases, from 3R:88.         (73982 nt)
+ initn: 179 init1: 179 opt: 208  Z-score: 100.5  bits: 37.9 E():  1.3
+ 61.364% identity (64.671% ungapped) in 176 nt overlap (130-300:64507-64678)
+
+     100       110       120       130        140       150        
+HUMBE- CTAGTTTTTTACCTCTTGTTTCCCAAAACCTAATAAGTAA-CTAATGCACAGAGCACATT
+                                     ::: ::: ::  :::::   : :  : :: 
+BACR48 GCAAGCAATTCCTAGTGTTGTATAATAGATTAACAAGGAATTTAATG--AATATGAAATA
+      64480     64490     64500     64510     64520       64530    
+
+      160        170       180          190       200       210    
+HUMBE- GATTT-GTATTTATTCTATTTTTAGACATAAT---TTATTAGCATGCATGAGCAAATTAA
+        ::::  ::::  ::: :  :: :  :::: :     ::: :: :  ::    : ::: :
+BACR48 AATTTAATATTATTTCCA--TTAAAGCATATTCGAAAATTTGCTTTAATTCATATATTTA
+        64540     64550       64560     64570     64580     64590  
+
+          220       230       240       250       260       270    
+HUMBE- GAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATA
+        :    :   ::   ::  ::  :::::::: ::::: ::: : : :: ::::: : :::
+BACR48 TATTTGCTTTAATTCATATATATATATATATATATATATATATATATATATATATATATA
+          64600     64610     64620     64630     64640     64650  
+
+          280       290       300       310       320       330    
+HUMBE- TATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGA
+       : ::::::::::::: : :  :: ::                                  
+BACR48 TGTATATATATATATATATAATTATCAAAGTTATAGTTTTAGTTTTTCAGTTTTTTTCAA
+          64660     64670     64680     64690     64700     64710  
+
+>>BACR08H11 : AC008136, 154890 bases, from 3R:88.         (73982 nt)
+ initn: 179 init1: 179 opt: 208  Z-score: 100.5  bits: 37.9 E():  1.3
+ 61.364% identity (64.671% ungapped) in 176 nt overlap (130-300:86888-87059)
+
+     100       110       120       130        140       150        
+HUMBE- CTAGTTTTTTACCTCTTGTTTCCCAAAACCTAATAAGTAA-CTAATGCACAGAGCACATT
+                                     ::: ::: ::  :::::   : :  : :: 
+BACR08 GCAAGCAATTCCTAGTGTTGTATAATAGATTAACAAGGAATTTAATG--AATATGAAATA
+     86860     86870     86880     86890     86900       86910     
+
+      160        170       180          190       200       210    
+HUMBE- GATTT-GTATTTATTCTATTTTTAGACATAAT---TTATTAGCATGCATGAGCAAATTAA
+        ::::  ::::  ::: :  :: :  :::: :     ::: :: :  ::    : ::: :
+BACR08 AATTTAATATTATTTCCA--TTAAAGCATATTCGAAAATTTGCTTTAATTCATATATTTA
+       86920     86930       86940     86950     86960     86970   
+
+          220       230       240       250       260       270    
+HUMBE- GAAAAACAACAACAAATGAATGCATATATATGTATATGTATGTGTGTACATATACACATA
+        :    :   ::   ::  ::  :::::::: ::::: ::: : : :: ::::: : :::
+BACR08 TATTTGCTTTAATTCATATATATATATATATATATATATATATATATATATATATATATA
+         86980     86990     87000     87010     87020     87030   
+
+          280       290       300       310       320       330    
+HUMBE- TATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGA
+       : ::::::::::::: : :  :: ::                                  
+BACR08 TGTATATATATATATATATAATTATCAAAGTTATAGTTTTAGTTTTTCAGTTTTTTTCAA
+         87040     87050     87060     87070     87080     87090   
+
+>>BACR28F21 : AC010705, 163958 bases, from X:11.          (21998 nt)
+ initn: 172 init1: 172 opt: 209  Z-score: 109.6  bits: 37.8 E():  1.4
+ 75.000% identity (75.000% ungapped) in 76 nt overlap (226-301:149520-149595)
+
+         200       210       220       230       240       250     
+HUMBE- GCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATATGTATATGTAT
+                                     ::: ::  ::  :::::::: ::::: :::
+BACR28 GTCGCCTCATCCCCTTTTTTGGATATATACACACATATATATATATATATATATATATAT
+  149490    149500    149510    149520    149530    149540         
+
+         260       270       280       290       300       310     
+HUMBE- GTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGTTTT
+        : : :: ::::::: ::::::::::::: :::: : : : : : :              
+BACR28 ATATTTATATATACATATATATATATATAAATATATGTACATATATGTGCCCCCTTTTCG
+  149550    149560    149570    149580    149590    149600         
+
+         320       330       340       350       360       370     
+HUMBE- AATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGTCCT
+                                                                   
+BACR28 GCACTGCCATGTTGTCCTCGGTTATGTCCGATGGACGCGATAACGGCAAAAACGACGCCG
+  149610    149620    149630    149640    149650    149660         
+
+>>BACR02C24 : 234783 bases, from X:12.                    (21843 nt)
+rev-comp initn: 172 init1: 172 opt: 209  Z-score: 109.6  bits: 37.8 E():  1.4
+ 75.000% identity (75.000% ungapped) in 76 nt overlap (301-226:219922-219997)
+
+      330       320       310       300       290       280        
+HUMBE- TCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATAT
+                                     : : : : : : :::: :::::::::::::
+BACR02 GACAACATGGCAGTGCCGAAAAGGGGGCACATATATGTACATATATTTATATATATATAT
+          219900    219910    219920    219930    219940    219950 
+
+      270       260       250       240       230       220        
+HUMBE- GTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTA
+        ::::::: :: : : ::: ::::: ::::::::  ::  :: :::              
+BACR02 ATGTATATATAAATATATATATATATATATATATATATATATGTGTGTATATATCCAAAA
+          219960    219970    219980    219990    220000    220010 
+
+      210       200       190       180       170       160        
+HUMBE- ATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGC
+                                                                   
+BACR02 AAGGGGATGAGGCGACACATGTTCTGTCCGTTTTTCGTTGCGTTTTCGGGACTTTTTCGC
+          220020    220030    220040    220050    220060    220070 
+
+>>BACR36J03 : AC091634, 159065 bases, from 2R:50.         (73982 nt)
+rev-comp initn: 168 init1: 168 opt: 207  Z-score: 99.9  bits: 37.8 E():  1.4
+ 72.043% identity (73.626% ungapped) in 93 nt overlap (301-209:2749-2839)
+
+      330       320       310       300       290       280        
+HUMBE- TCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATAT
+                                     : : : : : : : ::::::::::::::::
+BACR36 ATGCTTAATCTATATAATACTACTATATGTACATATGTATATACATATATATATATATAT
+     2720      2730      2740      2750      2760      2770        
+
+      270       260       250       240       230       220        
+HUMBE- GTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTA
+        : ::::: :::: : ::: ::::::::  :::: :  : ::::  :  :::::: ::  
+BACR36 ATATATATATACATATATATATATACATTAATAT-CTCTGATTTAAT-ATGTTTTCCTGT
+     2780      2790      2800      2810       2820       2830      
+
+      210       200       190       180       170       160        
+HUMBE- ATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGC
+       :::                                                         
+BACR36 ATTCCTGTTCTATATTTGGTATACCTTATCAACTCAACGTCCAACGGTACTTTAAACTGA
+       2840      2850      2860      2870      2880      2890      
+
+>>BACR40P19 : AC010667, 159570 bases, from 4:101.         (73982 nt)
+rev-comp initn: 179 init1: 179 opt: 207  Z-score: 99.9  bits: 37.8 E():  1.4
+ 70.000% identity (70.000% ungapped) in 90 nt overlap (301-212:96607-96696)
+
+      330       320       310       300       290       280        
+HUMBE- TCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATAT
+                                     : : ::  : : ::::::::::::::::::
+BACR40 AAATATATATAACTATATATATATAAATATATATAACTATATATATATATATATATATAT
+      96580     96590     96600     96610     96620     96630      
+
+      270       260       250       240       230       220        
+HUMBE- GTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTA
+        : ::::: :: :   ::: ::::: ::::::::  ::  :::: :   : : : :: ::
+BACR40 ATATATATATATAACTATATATATATATATATATATATATATTTTTATATATATATCCTA
+      96640     96650     96660     96670     96680     96690      
+
+      210       200       190       180       170       160        
+HUMBE- ATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGC
+                                                                   
+BACR40 GCTCAATCTAAATATAAAGTTGGAATACTTTTGCCAGGGGTTTCTTTCTTTTCTTTCCTT
+      96700     96710     96720     96730     96740     96750      
+
+>>BACR14P04 : AC007329, 126140 bases, from 2R:50.         (73982 nt)
+ initn: 168 init1: 168 opt: 207  Z-score: 99.9  bits: 37.8 E():  1.4
+ 72.043% identity (73.626% ungapped) in 93 nt overlap (209-301:68803-68893)
+
+      180       190       200       210       220       230        
+HUMBE- TTAGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCA
+                                     :::  :: ::::: : :  :::: :  : :
+BACR14 GTTGATAAGGTATACCAAATATAGAACAGGAATACAGGAAAAC-ATATTAAATCAGAG-A
+          68780     68790     68800     68810      68820      68830
+
+      240       250       260       270       280       290        
+HUMBE- TATATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTT
+       :::  :::::::: ::: : :::: ::::: : :::::::::::::::: : : : : : 
+BACR14 TATTAATGTATATATATATATGTATATATATATATATATATATATATATGTATATACATA
+            68840     68850     68860     68870     68880     68890
+
+      300       310       320       330       340       350        
+HUMBE- TCTTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTT
+       : :                                                         
+BACR14 TGTACATATAGTAGTATTATATAGATTAAGCATTTTAAGAAGCTCTCTCGTGAAATATTC
+            68900     68910     68920     68930     68940     68950
+
+>>BACR30L17 : AC012390, 174441 bases, from 4:101.         (32481 nt)
+rev-comp initn: 179 init1: 179 opt: 207  Z-score: 105.6  bits: 37.7 E():  1.6
+ 70.000% identity (70.000% ungapped) in 90 nt overlap (301-212:157746-157835)
+
+      330       320       310       300       290       280        
+HUMBE- TCTCCTTATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATAT
+                                     : : ::  : : ::::::::::::::::::
+BACR30 AAATATATATAACTATATATATATAAATATATATAACTATATATATATATATATATATAT
+      157720    157730    157740    157750    157760    157770     
+
+      270       260       250       240       230       220        
+HUMBE- GTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTA
+        : ::::: :: :   ::: ::::: ::::::::  ::  :::: :   : : : :: ::
+BACR30 ATATATATATATAACTATATATATATATATATATATATATATTTTTATATATATATCCTA
+      157780    157790    157800    157810    157820    157830     
+
+      210       200       190       180       170       160        
+HUMBE- ATTTGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGC
+                                                                   
+BACR30 GCTCAATCTAAATATAAAGTTGGAATACTTTTGCCAGGGGTTTCTTTCTTTTCTTTCCTT
+      157840    157850    157860    157870    157880    157890     
+
+>>BACR34M23 : AC008190, 158012 bases, from 3R:83.         (73982 nt)
+ initn: 195 init1: 195 opt: 205  Z-score: 98.6  bits: 37.5 E():  1.7
+ 73.494% identity (74.390% ungapped) in 83 nt overlap (223-304:108588-108670)
+
+            200       210       220       230        240       250 
+HUMBE- TTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGC-ATATATATGTATAT
+                                     :::: : ::   :::  ::::::  : :::
+BACR34 TATATTTAATTTAACACTAGTCCGTCATATACAATACATTGCTGCTGTATATACATGTAT
+    108560    108570    108580    108590    108600    108610       
+
+             260       270       280       290       300       310 
+HUMBE- GTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGG
+        ::: : : :: ::::: : :::::::::::::::::: : :  :: ::::::       
+BACR34 ATATATATATATATATATATATATATATATATATATATATATAGTTCTCTTACACATAAT
+    108620    108630    108640    108650    108660    108670       
+
+             320       330       340       350       360       370 
+HUMBE- TTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTG
+                                                                   
+BACR34 TAACAATTTAGAAATTGCTCACCTGATTAACGTTTACATTTATGTTTGCCACAGTGCTTG
+    108680    108690    108700    108710    108720    108730       
+
+>>BACR02C19 : AC008189, 188359 bases, from 3R:82.         (73982 nt)
+rev-comp initn: 198 init1: 198 opt: 205  Z-score: 98.6  bits: 37.5 E():  1.7
+ 77.941% identity (77.941% ungapped) in 68 nt overlap (293-226:131184-131251)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     ::: :::::::::::::::::: : :::::
+BACR02 TACAATAGAAGTTATACAATGTTACAGTTTAAATATATATATATATATATATATATATAT
+        131160    131170    131180    131190    131200    131210   
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTC
+        :: : : ::: ::::: ::::::::  :  ::: : :                      
+BACR02 ATATATATATATATATATATATATATAAAAGCATATATAAGCATAGATTATGTAAAGGGG
+        131220    131230    131240    131250    131260    131270   
+
+        200       190       180       170       160       150      
+HUMBE- ATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCA
+                                                                   
+BACR02 ATGTGTACTAAACACAAACTTCGTTTTGTCCGCTTTCCCTTAATCCATTTCTCATCCTTT
+        131280    131290    131300    131310    131320    131330   
+
+>>BACR24O24 : AC009538, 173927 bases, from 3R:83.         (73982 nt)
+ initn: 195 init1: 195 opt: 205  Z-score: 98.6  bits: 37.5 E():  1.7
+ 73.494% identity (74.390% ungapped) in 83 nt overlap (223-304:24568-24650)
+
+            200       210       220       230        240       250 
+HUMBE- TTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGC-ATATATATGTATAT
+                                     :::: : ::   :::  ::::::  : :::
+BACR24 TATATTTAATTTAACACTAGTCCGTCATATACAATACATTGCTGCTGTATATACATGTAT
+     24540     24550     24560     24570     24580     24590       
+
+             260       270       280       290       300       310 
+HUMBE- GTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGG
+        ::: : : :: ::::: : :::::::::::::::::: : :  :: ::::::       
+BACR24 ATATATATATATATATATATATATATATATATATATATATATAGTTCTCTTACACATAAT
+     24600     24610     24620     24630     24640     24650       
+
+             320       330       340       350       360       370 
+HUMBE- TTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTG
+                                                                   
+BACR24 TAACAATTTAGAAATTGCTCACCTGATTAACGTTTACATTTATGTTTGCCACAGTGCTTG
+     24660     24670     24680     24690     24700     24710       
+
+>>BACR28F21 : AC010705, 163958 bases, from X:11.          (21998 nt)
+rev-comp initn: 188 init1: 188 opt: 206  Z-score: 107.7  bits: 37.5 E():  1.8
+ 73.418% identity (73.418% ungapped) in 79 nt overlap (293-215:149530-149608)
+
+        320       310       300       290       280       270      
+HUMBE- TTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATAT
+                                     : : :::::::::::::::::: : :::::
+BACR28 CCCCTTTTTTGGATATATACACACATATATATATATATATATATATATATATATTTATAT
+  149500    149510    149520    149530    149540    149550         
+
+        260       250       240       230       220       210      
+HUMBE- GTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTC
+        :::: : ::: ::::: : ::::::: :   :: :::      :::::           
+BACR28 ATACATATATATATATATAAATATATGTACATATATGTGCCCCCTTTTCGGCACTGCCAT
+  149560    149570    149580    149590    149600    149610         
+
+        200       190       180       170       160       150      
+HUMBE- ATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAATCAATGTGCTCTGTGCA
+                                                                   
+BACR28 GTTGTCCTCGGTTATGTCCGATGGACGCGATAACGGCAAAAACGACGCCGGCTGCTCTCC
+  149620    149630    149640    149650    149660    149670         
+
+>>BACR02C24 : 234783 bases, from X:12.                    (21843 nt)
+ initn: 188 init1: 188 opt: 206  Z-score: 107.7  bits: 37.5 E():  1.8
+ 73.418% identity (73.418% ungapped) in 79 nt overlap (215-293:219909-219987)
+
+          190       200       210       220       230       240    
+HUMBE- ATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATATATA
+                                     :::::      ::: ::   : ::::::: 
+BACR02 GGACATAACCGAGGACAACATGGCAGTGCCGAAAAGGGGGCACATATATGTACATATATT
+   219880    219890    219900    219910    219920    219930        
+
+          250       260       270       280       290       300    
+HUMBE- TGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTAC
+       : ::::: ::: : :::: ::::: : :::::::::::::::::: : :           
+BACR02 TATATATATATATATGTATATATAAATATATATATATATATATATATATATATATGTGTG
+   219940    219950    219960    219970    219980    219990        
+
+          310       320       330       340       350       360    
+HUMBE- CAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATC
+                                                                   
+BACR02 TATATATCCAAAAAAGGGGATGAGGCGACACATGTTCTGTCCGTTTTTCGTTGCGTTTTC
+   220000    220010    220020    220030    220040    220050        
+
+>>BACR14D22 : AC012164, 169384 bases, from X:17.          (73982 nt)
+rev-comp initn: 188 init1: 188 opt: 204  Z-score: 97.9  bits: 37.4 E():  1.8
+ 66.667% identity (67.290% ungapped) in 108 nt overlap (336-230:2982-3089)
+
+           360       350       340        330       320       310  
+HUMBE- TGGATGAAAACTCTACCTCAGTTCTAAGCATATCTTC-TCCTTATTTGGATTAAAACCTT
+                                     ::: ::: ::   :: : :::::   : ::
+BACR14 CTGCCTAACTTTATAATACAATTTATGGTCTATATTCATCTCGATCTTGATTACTGCTTT
+            2960      2970      2980      2990      3000      3010 
+
+            300       290       280       270       260       250  
+HUMBE- CTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTACACACATACATAT
+        ::      :  :  :  :::::::::::::::::: : ::::: :: : : ::: ::::
+BACR14 TTGACTTTGAGGGGGAGTATATATATATATATATATATATATATATATATATATATATAT
+            3020      3030      3040      3050      3060      3070 
+
+            240       230       220       210       200       190  
+HUMBE- ACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTAATAAAT
+       : :::::: :: ::  ::                                          
+BACR14 ATATATATGTGTATATATACATATATACATATATATATGTGTATGTGTAGCTTCGATTTA
+            3080      3090      3100      3110      3120      3130 
+
+>>BACR11A04 : AC010916, 165307 bases, from 4:101.         (73982 nt)
+rev-comp initn: 165 init1: 165 opt: 204  Z-score: 97.9  bits: 37.4 E():  1.8
+ 56.738% identity (62.016% ungapped) in 282 nt overlap (294-21:75765-76030)
+
+         320       310       300       290       280       270     
+HUMBE- ATTTGGATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATA
+                                     ::::  ::::::::::::::::  : ::::
+BACR11 AAACGGTTATGAAAATAATGTAACCATATTAAAATTTATATATATATATATAAATATATA
+        75740     75750     75760     75770     75780     75790    
+
+         260           250       240       230       220       210 
+HUMBE- TGTACAC----ACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATT
+       : : :::    : ::: ::::: ::::::::  ::: :  : :     ::: : :: :: 
+BACR11 TATCCACTACTATATATATATATATATATATATATTTAAATATATACATTTGTATTTATA
+        75800     75810     75820     75830     75840     75850    
+
+             200       190       180       170         160         
+HUMBE- TGCTCATGCATGCTAATAAATTATGTCTAAAAATAGAATAAATACAAAT--CAATGTGCT
+       :  : :: ::    :::::: ::     : : :: :  ::  ::::  :  : :: :   
+BACR11 TATTTATTCA---AAATAAACTA-----ATACATCGTTTACTTACACTTTACTATTTCTA
+        75860        75870          75880     75890     75900      
+
+     150       140         130       120       110       100       
+HUMBE- CTGTGCATTAGT--TACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTC
+       ::::    :  :  :::: :  :  ::      :::  ::  : :::: : :  : : : 
+BACR11 CTGTTATGTTATAATACTGAAAAACTT-----GAACTTGAAATGAAAATCGACTG-CTTG
+      75910     75920     75930          75940     75950      75960
+
+        90        80        70        60        50        40       
+HUMBE- TTAATGCAGTCAAAAATACAAATAAATAAAAAGTCACTTACAACCCAAAGTGTGACTATC
+       ::::       :  : ::  :::    ::: : : : :: :::  :::::  : :: : :
+BACR11 TTAAATATAAAATTATTAACAATTCCGAAAGACTAAGTTTCAATACAAAG-CTAAC-AAC
+            75970     75980     75990     76000     76010          
+
+        30        20        10                                     
+HUMBE- AATGGGGTAATCAGTGGTGTCAAATAGGAGGT                            
+       :: :  : ::::                                                
+BACR11 AAGGTAGGAATCTAGCTTTGGCAGTGATAATAATTAAATAATAATATTAAAGCAAGAATT
+    76020     76030     76040     76050     76060     76070        
+
+>>BACR01N17 : AC009911, 170184 bases, from 2L:38.         (73982 nt)
+ initn:  93 init1:  93 opt: 204  Z-score: 97.9  bits: 37.4 E():  1.8
+ 58.238% identity (63.071% ungapped) in 261 nt overlap (54-303:7216-7467)
+
+            30        40        50        60        70        80   
+HUMBE- TACCCCATTGATAGTCACACTTTGGGTTGTAAGTGACTTTTTATTTATTTGTATTTTTGA
+                                     :::: :  : ::::  :    :::::::::
+BACR01 TGTTCATCTTATAGTAAGCAAGAATTGGACAAGTCAAGTATTATGGAAACATATTTTTGA
+        7190      7200      7210      7220      7230      7240     
+
+            90          100       110       120       130       140
+HUMBE- CTGCATTA---AGAGGTCTCTAGTTTTTTACCTCTTGTTTCCCAAAACCTAATAAGTAAC
+        :   :::   :   :     :: ::  ::    :   ::::  :: : : :  : ::::
+BACR01 GTTGTTTAGGTACCAGCGAAGAGATTACTATACATCCATTCC--AATCGTGA-GATTAAC
+        7250      7260      7270      7280        7290       7300  
+
+              150             160       170       180       190    
+HUMBE- TAATGCACAGA---GC---ACATTGATTTGTATTTATTCTATTTTTAGACATAATTTATT
+        :::  : :::   ::   ::::  ::   ::: :: : : :  ::: : :  :: ::: 
+BACR01 GAATTAAAAGAATGGCTTAACATATATACATATGTAATATGTACTTATATAACATATATA
+           7310      7320      7330      7340      7350      7360  
+
+          200       210        220       230       240       250   
+HUMBE- AGCATGCATGAGCAAATTAA-GAAAAACAACAACAAATGAATGCATATATATGTATAT-G
+          :::  : ::   ::::: :::  : :: ::    : ::  ::::::::  ::: :  
+BACR01 TATATGTTTAAG---ATTAACGAATTAAAAGAATGGCTTAA--CATATATACATATGTAA
+           7370         7380      7390      7400        7410       
+
+            260       270       280       290       300       310  
+HUMBE- TATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTCTTACCAGAAGGT
+       ::: :   :: ::: ::: :::::::: ::::::::: : :: : :: :::         
+BACR01 TATATACTTATATA-ACATATATATATCTATATATATATGTTATATTATTATATCTTAAA
+      7420      7430       7440      7450      7460      7470      
+
+            320       330       340       350       360       370  
+HUMBE- TTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTTCATCCATTCTGT
+                                                                   
+BACR01 CAAGCTGTTCATTTCATTTAAATTATAAATTTAATTATGTCAGACTACGCATAAATTCAA
+       7480      7490      7500      7510      7520      7530      
+
+
+
+3002 residues in 1 query   sequences
+112936249 residues in 657 library sequences
+ Scomplib [33t08]
+ start: Sat Dec  8 11:42:13 2001 done: Sat Dec  8 11:43:35 2001
+ Scan time: 78.670 Display time:  2.420
+
+Function used was FASTA [version 3.3t08 Jan. 17, 2001]

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,52 @@
+>HUMBETGLOA Human haplotype C4 beta-globin gene, complete cds. 
+ACCTCCTATTTGACACCACTGATTACCCCATTGATAGTCACACTTTGGGTTGTAAGTGAC
+TTTTTATTTATTTGTATTTTTGACTGCATTAAGAGGTCTCTAGTTTTTTACCTCTTGTTT
+CCCAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTT
+AGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATA
+TATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTC
+TTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTT
+CATCCATTCTGTCCTGTAAGTATTTTGCATATTCTGGAGACGCAGGAAGAGATCCATCTA
+CATATCCCAAAGCTGAATTATGGTAGACAAAACTCTTCCACTTTTAGTGCATCAACTTCT
+TATTTGTGTAATAAGAAAATTGGGAAAACGATCTTCAATATGCTTACCAAGCTGTGATTC
+CAAATATTACGTAAATACACTTGCAAAGGAGGATGTTTTTAGTAGCAATTTGTACTGATG
+GTATGGGGCCAAGAGATATATCTTAGAGGGAGGGCTGAGGGTTTGAAGTCCAACTCCTAA
+GCCAGTGCCAGAAGAGCCAAGGACAGGTACGGCTGTCATCACTTAGACCTCACCCTGTGG
+AGCCACACCCTAGGGTTGGCCAATCTACTCCCAGGAGCAGGGAGGGCAGGAGCCAGGGCT
+GGGCATAAAAGTCAGGGCAGAGCCATCTATTGCTTACATTTGCTTCTGACACAACTGTGT
+TCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGT
+TACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGTT
+GGTATCAAGGTTACAAGACAGGTTTAAGGAGACCAATAGAAACTGGGCATGTGGAGACAG
+AGAAGACTCTTGGGTTTCTGATAGGCACTGACTCTCTCTGCCTATTGGTCTATTTTCCCA
+CCCTTAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATC
+TGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGC
+TCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACAC
+TGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGGTGAGTCTAT
+GGGACCCTTGATGTTTTCTTTCCCCTTCTTTTCTATGGTTAAGTTCATGTCATAGGAAGG
+GGATAAGTAACAGGGTACAGTTTAGAATGGGAAACAGACGAATGATTGCATCAGTGTGGA
+AGTCTCAGGATCGTTTTAGTTTCTTTTATTTGCTGTTCATAACAATTGTTTTCTTTTGTT
+TAATTCTTGCTTTCTTTTTTTTTCTTCTCCGCAATTTTTACTATTATACTTAATGCCTTA
+ACATTGTGTATAACAAAAGGAAATATCTCTGAGATACATTAAGTAACTTAAAAAAAAACT
+TTACACAGTCTGCCTAGTACATTACTATTTGGAATATATGTGTGCTTATTTGCATATTCA
+TAATCTCCCTACTTTATTTTCTTTTATTTTTAATTGATACATAATCATTATACATATTTA
+TGGGTTAAAGTGTAATGTTTTAATATGTGTACACATATTGACCAAATCAGGGTAATTTTG
+CATTTGTAATTTTAAAAAATGCTTTCTTCTTTTAATATACTTTTTTGTTTATCTTATTTC
+TAATACTTTCCCTAATCTCTTTCTTTCAGGGCAATAATGATACAATGTATCATGCCTCTT
+TGCACCATTCTAAAGAATAACAGTGATAATTTCTGGGTTAAGGCAATAGCAATATCTCTG
+CATATAAATATTTCTGCATATAAATTGTAACTGATGTAAGAGGTTTCATATTGCTAATAG
+CAGCTACAATCCAGCTACCATTCTGCTTTTATTTTATGGTTGGGATAAGGCTGGATTATT
+CTGAGTCCAAGCTAGGCCCTTTTGCTAATCATGTTCATACCTCTTATCTTCCTCCCACAG
+CTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCA
+CCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAG
+TATCACTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAG
+TCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATA
+AAAAACATTTATTTTCATTGCAATGATGTATTTAAATTATTTCTGAATATTTTACTAAAA
+AGGGAATGTGGGAGGTCAGTGCATTTAAAACATAAAGAAATGAAGAGCTAGTTCAAACCT
+TGGGAAAATACACTATATCTTAAACTCCATGAAAGAAGGTGAGGCTGCAAACAGCTAATG
+CACATTGGCAACAGCCCTGATGCATATGCCTTATTCATCCCTCAGAAAAGGATTCAAGTA
+GAGGCTTGATTTGGAGGTTAAAGTTTTGCTATGCTGTATTTTACATTACTTATTGTTTTA
+GCTGTCCTCATGAATGTCTTTTCACTACCCATTTGCTTATCCTGCATCTCTCAGCCTTGA
+CTCCACTCAGTTCTCTTGCTTAGAGATACCACCTTTCCCCTGAAGTGTTCCTTCCATGTT
+TTACGGCGAGATGGTTTCTCCTCGCCTGGCCACTCAGCCTTAGTTGTCTCTGTTGTCTTA
+TAGAGGTCTACTTGAAGAAGGAAAAACAGGGGTCATGGTTTGACTGTCCTGTGAGCCCTT
+CTTCCCTGCCTCCCCCACTCACAGTGACCCGGAATCTGCAGTGCTAGTCTCCCGGAACTA
+TC

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.gff
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.gff	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.gff	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+##gff-version 2
+##date 2000-07-31
+##sequence-region test
+test.fasta	RepeatMasker	similarity	238	289	15.4	+	.	Target "Motif:(TA)n" 2 53
+test.fasta	RepeatMasker	similarity	444	537	37.2	+	.	Target "Motif:MIR3" 93 187
+test.fasta	RepeatMasker	similarity	1454	1529	23.7	+	.	Target "Motif:T-rich" 1 75
+test.fasta	RepeatMasker	similarity	1702	1797	14.6	-	.	Target "Motif:L1MA6" 6200 6300

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grail
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grail	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grail	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,157 @@
+
+[Start grail2exons_human_1.3] [Seqlen= 3002]
+
+[Start exons]
+[Num= 3]
+
+f 1    866     941     848    1051    96.000    excellent
+f 2   1088    1314     924    1331   100.000    excellent
+f 0   2161    2279    2116    2289    94.000    excellent
+
+[End exons]
+
+[Start exon_translations]
+[Num= 3]
+
+[Start translation]
+
+[trans_start= 866]
+MVHLTPEEKSAVTALWGKVNVDEVG
+
+[End translation]
+
+[Start translation]
+
+[trans_start= 1086]
+RLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLD
+NLKGTFATLSELHCDKLHVDPENFRV
+
+[End translation]
+
+[Start translation]
+
+[trans_start= 2161]
+LLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAH
+
+[End translation]
+
+[End exon_translations]
+
+[Start clusters forward]
+[Num= 3]
+
+[Start cluster] [Num= 7]
+ 1    848    941     87
+ 1    848    957     84
+ 1    866    941     96
+ 1    866    957     88
+ 1    866    919     71
+ 1    866    934     60
+ 1    866   1051     47
+[End cluster]
+
+[Start cluster] [Num= 17]
+ 2    930   1331     63
+ 2    930   1310     61
+ 2    930   1275     42
+ 2   1088   1314    100
+ 2   1088   1310    100
+ 2   1088   1289    100
+ 2   1088   1275     96
+ 2   1088   1331     87
+ 2   1088   1263     68
+ 2   1088   1203     60
+ 2   1088   1212     59
+ 2   1088   1196     59
+ 2   1088   1175     52
+ 2   1116   1310     94
+ 2   1161   1310     75
+ 2   1161   1275     53
+ 2   1161   1331     41
+[End cluster]
+
+[Start cluster] [Num= 7]
+ 0   2131   2254     80
+ 0   2131   2289     75
+ 0   2161   2279     94
+ 0   2161   2289     93
+ 0   2161   2254     72
+ 0   2161   2247     65
+ 0   2161   2244     62
+[End cluster]
+
+[End clusters forward]
+
+[Start clusters reverse]
+[Num= 1]
+
+[Start cluster] [Num= 6]
+ 0   1707   1941     79
+ 0   1707   1891     79
+ 0   1707   1921     75
+ 0   1707   1899     74
+ 0   1707   1833     59
+ 0   1707   1855     53
+[End cluster]
+
+[End clusters reverse]
+
+[End grail2exons_human_1.3]
+
+[Start DNASequence]
+>HUMBETGLOA Human haplotype C4 beta-globin gene, complete cds. 
+ACCTCCTATTTGACACCACTGATTACCCCATTGATAGTCACACTTTGGGTTGTAAGTGAC
+TTTTTATTTATTTGTATTTTTGACTGCATTAAGAGGTCTCTAGTTTTTTACCTCTTGTTT
+CCCAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTT
+AGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATA
+TATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTC
+TTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTT
+CATCCATTCTGTCCTGTAAGTATTTTGCATATTCTGGAGACGCAGGAAGAGATCCATCTA
+CATATCCCAAAGCTGAATTATGGTAGACAAAACTCTTCCACTTTTAGTGCATCAACTTCT
+TATTTGTGTAATAAGAAAATTGGGAAAACGATCTTCAATATGCTTACCAAGCTGTGATTC
+CAAATATTACGTAAATACACTTGCAAAGGAGGATGTTTTTAGTAGCAATTTGTACTGATG
+GTATGGGGCCAAGAGATATATCTTAGAGGGAGGGCTGAGGGTTTGAAGTCCAACTCCTAA
+GCCAGTGCCAGAAGAGCCAAGGACAGGTACGGCTGTCATCACTTAGACCTCACCCTGTGG
+AGCCACACCCTAGGGTTGGCCAATCTACTCCCAGGAGCAGGGAGGGCAGGAGCCAGGGCT
+GGGCATAAAAGTCAGGGCAGAGCCATCTATTGCTTACATTTGCTTCTGACACAACTGTGT
+TCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGT
+TACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGTT
+GGTATCAAGGTTACAAGACAGGTTTAAGGAGACCAATAGAAACTGGGCATGTGGAGACAG
+AGAAGACTCTTGGGTTTCTGATAGGCACTGACTCTCTCTGCCTATTGGTCTATTTTCCCA
+CCCTTAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATC
+TGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGC
+TCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACAC
+TGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGGTGAGTCTAT
+GGGACCCTTGATGTTTTCTTTCCCCTTCTTTTCTATGGTTAAGTTCATGTCATAGGAAGG
+GGATAAGTAACAGGGTACAGTTTAGAATGGGAAACAGACGAATGATTGCATCAGTGTGGA
+AGTCTCAGGATCGTTTTAGTTTCTTTTATTTGCTGTTCATAACAATTGTTTTCTTTTGTT
+TAATTCTTGCTTTCTTTTTTTTTCTTCTCCGCAATTTTTACTATTATACTTAATGCCTTA
+ACATTGTGTATAACAAAAGGAAATATCTCTGAGATACATTAAGTAACTTAAAAAAAAACT
+TTACACAGTCTGCCTAGTACATTACTATTTGGAATATATGTGTGCTTATTTGCATATTCA
+TAATCTCCCTACTTTATTTTCTTTTATTTTTAATTGATACATAATCATTATACATATTTA
+TGGGTTAAAGTGTAATGTTTTAATATGTGTACACATATTGACCAAATCAGGGTAATTTTG
+CATTTGTAATTTTAAAAAATGCTTTCTTCTTTTAATATACTTTTTTGTTTATCTTATTTC
+TAATACTTTCCCTAATCTCTTTCTTTCAGGGCAATAATGATACAATGTATCATGCCTCTT
+TGCACCATTCTAAAGAATAACAGTGATAATTTCTGGGTTAAGGCAATAGCAATATCTCTG
+CATATAAATATTTCTGCATATAAATTGTAACTGATGTAAGAGGTTTCATATTGCTAATAG
+CAGCTACAATCCAGCTACCATTCTGCTTTTATTTTATGGTTGGGATAAGGCTGGATTATT
+CTGAGTCCAAGCTAGGCCCTTTTGCTAATCATGTTCATACCTCTTATCTTCCTCCCACAG
+CTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCA
+CCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAG
+TATCACTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAG
+TCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATA
+AAAAACATTTATTTTCATTGCAATGATGTATTTAAATTATTTCTGAATATTTTACTAAAA
+AGGGAATGTGGGAGGTCAGTGCATTTAAAACATAAAGAAATGAAGAGCTAGTTCAAACCT
+TGGGAAAATACACTATATCTTAAACTCCATGAAAGAAGGTGAGGCTGCAAACAGCTAATG
+CACATTGGCAACAGCCCTGATGCATATGCCTTATTCATCCCTCAGAAAAGGATTCAAGTA
+GAGGCTTGATTTGGAGGTTAAAGTTTTGCTATGCTGTATTTTACATTACTTATTGTTTTA
+GCTGTCCTCATGAATGTCTTTTCACTACCCATTTGCTTATCCTGCATCTCTCAGCCTTGA
+CTCCACTCAGTTCTCTTGCTTAGAGATACCACCTTTCCCCTGAAGTGTTCCTTCCATGTT
+TTACGGCGAGATGGTTTCTCCTCGCCTGGCCACTCAGCCTTAGTTGTCTCTGTTGTCTTA
+TAGAGGTCTACTTGAAGAAGGAAAAACAGGGGTCATGGTTTGACTGTCCTGTGAGCCCTT
+CTTCCCTGCCTCCCCCACTCACAGTGACCCGGAATCTGCAGTGCTAGTCTCCCGGAACTA
+TC
+
+[End DNASequence]
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grailexp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grailexp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.grailexp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,47 @@
+begin genes
+ 1 1 summary f 3 866 2289 866 2289 -1 -1
+ 1 1 exon 866 957 0 1 96
+ 1 1 exon 1088 1310 2 2 100
+ 1 1 exon 2161 2289 0 0 94
+ 1 1 mrna atggtgcatctgactcctgaggagaagtctgccgttactgccctgtggggcaaggtgaacgtggatgaagttggtggtgaggccctgggcaggctgctggtggtctacccttggacccagaggttctttgagtcctttggggatctgtccactcctgatgctgttatgggcaaccctaaggtgaaggctcatggcaagaaagtgctcggtgcctttagtgatggcctggctcacctggacaacctcaagggcacctttgccacactgagtgagctgcactgtgacaagctgcacgtggatcctgagaacttcaggctcctgggcaacgtgctggtctgtgtgctggcccatcactttggcaaagaattcaccccaccagtgcaggctgcctatcagaaagtggtggctggtgtggctaatgccctggcccacaagtatcactaa
+ 1 1 translation MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH*
+end genes
+begin exons
+ f 1 848 941 1 0 77 0 848 1051 0
+ f 1 848 957 1 0 93 0 848 1051 0
+ f 1 866 919 0 0 85 0 848 1051 0
+ f 1 866 934 0 0 79 0 848 1051 0
+ f 1 866 941 0 0 82 0 848 1051 0
+ f 1 866 957 0 0 96 0 848 1051 1
+ f 1 866 1051 3 0 24 0 848 1051 0
+ f 2 930 1275 0 0 28 0 924 1331 0
+ f 2 930 1310 0 0 56 0 924 1331 0
+ f 2 930 1331 3 0 33 0 924 1331 0
+ f 2 1088 1175 1 2 74 0 924 1331 0
+ f 2 1088 1196 1 2 78 0 924 1331 0
+ f 2 1088 1203 1 2 60 0 924 1331 0
+ f 2 1088 1212 1 2 60 0 924 1331 0
+ f 2 1088 1263 1 2 64 0 924 1331 0
+ f 2 1088 1275 1 2 79 0 924 1331 0
+ f 2 1088 1289 1 2 100 0 924 1331 0
+ f 2 1088 1310 1 2 100 0 924 1331 1
+ f 2 1088 1314 1 2 82 0 924 1331 0
+ f 2 1088 1331 2 2 68 0 924 1331 0
+ f 2 1116 1310 1 0 92 0 924 1331 0
+ f 2 1161 1275 0 0 33 0 924 1331 0
+ f 2 1161 1310 0 0 63 0 924 1331 0
+ f 2 1161 1331 3 0 21 0 924 1331 0
+ f 3 2131 2254 0 0 34 0 2116 2289 0
+ f 3 2131 2289 3 0 37 0 2116 2289 0
+ f 3 2161 2244 1 0 60 0 2116 2289 0
+ f 3 2161 2247 1 0 62 0 2116 2289 0
+ f 3 2161 2254 1 0 65 0 2116 2289 0
+ f 3 2161 2279 1 0 77 0 2116 2289 0
+ f 3 2161 2289 2 0 94 0 2116 2289 1
+ r 1 1062 1296 2 2 79 1 1062 1316 0
+ r 1 1082 1296 1 2 75 1 1062 1316 0
+ r 1 1104 1296 1 2 74 1 1062 1316 0
+ r 1 1112 1296 1 2 79 1 1062 1316 1
+ r 1 1148 1296 1 2 53 1 1062 1316 0
+ r 1 1170 1296 1 2 59 1 1062 1316 0
+end exons

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.mzef
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.mzef	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.mzef	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+ ENTER NAME OF THE SEQUENCE FILE (in single quotes)
+HUMBETGLOA.fasta
+ ENTER 1 FOR FORWARD, 2 FOR REVERSE
+1
+ ENTER PRIOR PROBABILITY (suggesting .04)
+.04
+ ENTER OVER LAPPING NUMBER (suggesting 0)
+0
+ Internal coding exons predicted by MZEF
+ File_Name: HUMBETGLOA  Sequence_length:   3002  G+C_content:  0.400
+  Coordinates    P    Fr1   Fr2   Fr3  Orf   3ss   Cds   5ss
+  1088 -  1310 0.986 0.510 0.468 0.651 221 0.478 0.594 0.571

Added: trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.tblastx
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.tblastx	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/HUMBETGLOA.tblastx	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,353 @@
+TBLASTX 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= HUMBETGLOA Human haplotype C4 beta-globin gene, complete cds. 
+         (3002 letters)
+
+Database: ecoli.nt
+           400 sequences; 4,662,239 total letters
+
+Searching.................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|AE000479.1|AE000479 Escherichia coli K-12 MG1655 section 369 ...    34  0.13
+gb|AE000302.1|AE000302 Escherichia coli K-12 MG1655 section 192 ...    31  0.61
+gb|AE000277.1|AE000277 Escherichia coli K-12 MG1655 section 167 ...    31  0.84
+gb|AE000168.1|AE000168 Escherichia coli K-12 MG1655 section 58 o...    29  2.2
+gb|AE000400.1|AE000400 Escherichia coli K-12 MG1655 section 290 ...    29  3.0
+gb|AE000408.1|AE000408 Escherichia coli K-12 MG1655 section 298 ...    29  3.0
+gb|AE000438.1|AE000438 Escherichia coli K-12 MG1655 section 328 ...    29  3.0
+gb|AE000396.1|AE000396 Escherichia coli K-12 MG1655 section 286 ...    29  3.0
+gb|AE000466.1|AE000466 Escherichia coli K-12 MG1655 section 356 ...    26  3.4
+gb|AE000482.1|AE000482 Escherichia coli K-12 MG1655 section 372 ...    29  4.1
+gb|AE000341.1|AE000341 Escherichia coli K-12 MG1655 section 231 ...    29  4.1
+gb|AE000198.1|AE000198 Escherichia coli K-12 MG1655 section 88 o...    29  4.1
+gb|AE000367.1|AE000367 Escherichia coli K-12 MG1655 section 257 ...    29  4.1
+gb|AE000136.1|AE000136 Escherichia coli K-12 MG1655 section 26 o...    29  4.1
+gb|AE000327.1|AE000327 Escherichia coli K-12 MG1655 section 217 ...    28  5.7
+gb|AE000498.1|AE000498 Escherichia coli K-12 MG1655 section 388 ...    28  7.8
+gb|AE000509.1|AE000509 Escherichia coli K-12 MG1655 section 399 ...    28  7.8
+gb|AE000306.1|AE000306 Escherichia coli K-12 MG1655 section 196 ...    28  7.8
+gb|AE000203.1|AE000203 Escherichia coli K-12 MG1655 section 93 o...    28  7.8
+gb|AE000208.1|AE000208 Escherichia coli K-12 MG1655 section 98 o...    28  7.8
+
+>gb|AE000479.1|AE000479 Escherichia coli K-12 MG1655 section 369 of 400 of the complete
+            genome
+          Length = 10934
+
+ Score = 33.6 bits (67), Expect = 0.13
+ Identities = 11/26 (42%), Positives = 16/26 (61%)
+ Frame = +1 / -2
+
+                                      
+Query: 1057 SAYWSIFPPLGCWWSTLGPRGSLSPL 1134
+            +A W++FPP+G  W  L  +   SPL
+Sbjct: 5893 AAVWALFPPVGSQWGCLASQWRTSPL 5816
+
+
+>gb|AE000302.1|AE000302 Escherichia coli K-12 MG1655 section 192 of 400 of the complete
+            genome
+          Length = 10264
+
+ Score = 31.3 bits (62), Expect = 0.61
+ Identities = 8/17 (47%), Positives = 13/17 (76%)
+ Frame = +2 / +2
+
+                             
+Query: 2177 WSVCWPITLAKNSPHQC 2227
+            +  CWP+ L ++SP+QC
+Sbjct: 1157 YPACWPLPLRRSSPYQC 1207
+
+
+>gb|AE000277.1|AE000277 Escherichia coli K-12 MG1655 section 167 of 400 of the complete
+            genome
+          Length = 11653
+
+ Score = 30.8 bits (61), Expect = 0.84
+ Identities = 9/25 (36%), Positives = 14/25 (56%)
+ Frame = +2 / -3
+
+                                     
+Query: 2174 CWSVCWPITLAKNSPHQCRLPIRKW 2248
+            CW     + L K+ P QCR+ + +W
+Sbjct: 4931 CWLTASVLRLQKSLPRQCRITVVRW 4857
+
+
+>gb|AE000168.1|AE000168 Escherichia coli K-12 MG1655 section 58 of 400 of the complete genome
+          Length = 12663
+
+ Score = 29.5 bits (58), Expect = 2.2
+ Identities = 12/41 (29%), Positives = 24/41 (58%)
+ Frame = -1 / +1
+
+                                                     
+Query: 2813 KEHFRGKVVSLSKRTEWSQG*EMQDKQMGSEKTFMRTAKTI 2691
+            K H RG+ V + ++   ++  E+ D++ G+ +   RT +TI
+Sbjct: 13   KRHLRGE*VKVGEKYITARRGELPDQEPGNGEASYRTMRTI 135
+
+
+>gb|AE000400.1|AE000400 Escherichia coli K-12 MG1655 section 290 of 400 of the complete
+            genome
+          Length = 14295
+
+ Score = 29.0 bits (57), Expect = 3.0
+ Identities = 7/18 (38%), Positives = 10/18 (54%)
+ Frame = +2 / +3
+
+                              
+Query: 2165 WATCWSVCWPITLAKNSP 2218
+            W TCW+ CW      ++P
+Sbjct: 9096 WITCWNCCWQAGWISSAP 9149
+
+
+>gb|AE000408.1|AE000408 Escherichia coli K-12 MG1655 section 298 of 400 of the complete
+            genome
+          Length = 10944
+
+ Score = 29.0 bits (57), Expect = 3.0
+ Identities = 17/39 (43%), Positives = 20/39 (50%)
+ Frame = -3 / +1
+
+                                                   
+Query: 1020 LSPHAQFLLVSLNLSCNLDTNLPRASPPTSSTFTLPHRA 904
+            L+  A FLLV + +S   DT LPR    T  TF    RA
+Sbjct: 7618 LTSTAAFLLV*VKISRACDTFLPRIRSATRRTF*AEERA 7734
+
+
+>gb|AE000438.1|AE000438 Escherichia coli K-12 MG1655 section 328 of 400 of the complete
+            genome
+          Length = 10426
+
+ Score = 29.0 bits (57), Expect = 3.0
+ Identities = 10/28 (35%), Positives = 12/28 (42%)
+ Frame = +2 / +3
+
+                                        
+Query: 2165 WATCWSVCWPITLAKNSPHQCRLPIRKW 2248
+            WA CW  C  + +A NS    R     W
+Sbjct: 750  WACCWRTCSLVVVALNSLRAVRQSSTSW 833
+
+
+>gb|AE000396.1|AE000396 Escherichia coli K-12 MG1655 section 286 of 400 of the complete
+           genome
+          Length = 10098
+
+ Score = 29.0 bits (57), Expect = 3.0
+ Identities = 9/27 (33%), Positives = 18/27 (66%)
+ Frame = -3 / -1
+
+                                      
+Query: 633 PPSKIYLLAPYHQYKLLLKTSSFASVF 553
+           PP++ YLL+P H+++++   S +   F
+Sbjct: 309 PPARFYLLSPVHEWRVIAS*SWYHQSF 229
+
+
+>gb|AE000466.1|AE000466 Escherichia coli K-12 MG1655 section 356 of 400 of the complete
+            genome
+          Length = 10208
+
+ Score = 26.3 bits (51), Expect(2) = 3.4
+ Identities = 11/26 (42%), Positives = 14/26 (53%)
+ Frame = +3 / +2
+
+                                      
+Query: 2796 SPEVFLPCFTARWFLLAWPLSLSCLC 2873
+            S  V L C T  ++L  W L+LS  C
+Sbjct: 5579 SSAVVLRCLTTVFWLPVWALTLSICC 5656
+
+
+ Score = 20.3 bits (38), Expect(2) = 3.4
+ Identities = 4/11 (36%), Positives = 7/11 (63%)
+ Frame = +3 / +1
+
+                       
+Query: 2892 LKKEKQGSWFD 2924
+            +K+  +G W D
+Sbjct: 5737 MKRPFKGDWLD 5769
+
+
+>gb|AE000482.1|AE000482 Escherichia coli K-12 MG1655 section 372 of 400 of the complete genome
+          Length = 20906
+
+ Score = 28.6 bits (56), Expect = 4.1
+ Identities = 14/48 (29%), Positives = 19/48 (39%)
+ Frame = +3 / +1
+
+                                                             
+Query: 660   SQCQKSQGQVRLSSLRPHPVEPHPRVGQSTPRSREGRSQGWA*KSGQS 803
+             S+C    G  R    RP  + P       +P    GR +GW    GQ+
+Sbjct: 20239 SRCALILGPARRWVHRPESLSPAASAHGQSPHVAAGRRRGWKRADGQN 20382
+
+
+>gb|AE000341.1|AE000341 Escherichia coli K-12 MG1655 section 231 of 400 of the complete
+            genome
+          Length = 10231
+
+ Score = 28.6 bits (56), Expect = 4.1
+ Identities = 12/20 (60%), Positives = 13/20 (65%)
+ Frame = -2 / +2
+
+                                
+Query: 2995 PGD*HCRFRVTVSGGGREEG 2936
+            PG  H  +R TVSG GRE G
+Sbjct: 7538 PGWLHAVYRETVSGSGREAG 7597
+
+
+>gb|AE000198.1|AE000198 Escherichia coli K-12 MG1655 section 88 of 400 of the complete genome
+          Length = 11639
+
+ Score = 28.6 bits (56), Expect = 4.1
+ Identities = 11/22 (50%), Positives = 15/22 (68%)
+ Frame = +1 / +3
+
+                                   
+Query: 2332  FPKSNY*TGGYYEGP*ASGFCL 2397
+             F +S  * GGY+ GP +S FC+
+Sbjct: 10947 FQRSGG*PGGYHAGPGSSPFCV 11012
+
+
+>gb|AE000367.1|AE000367 Escherichia coli K-12 MG1655 section 257 of 400 of the complete
+            genome
+          Length = 11438
+
+ Score = 28.6 bits (56), Expect = 4.1
+ Identities = 8/27 (29%), Positives = 13/27 (47%)
+ Frame = +3 / -2
+
+                                       
+Query: 1332 CFLSPSFLWLSSCHRKGISNRVQFRMG 1412
+            C  + +F+W + CH+  I     F  G
+Sbjct: 7990 CLFAAAFVWFAKCHQPVIGRNTTFSKG 7910
+
+
+>gb|AE000136.1|AE000136 Escherichia coli K-12 MG1655 section 26 of 400 of the complete genome
+          Length = 16823
+
+ Score = 28.6 bits (56), Expect = 4.1
+ Identities = 11/23 (47%), Positives = 12/23 (51%)
+ Frame = -2 / +1
+
+                                    
+Query: 2860  RLSGQARRNHLAVKHGRNTSGER 2792
+             RLSG+ RR   A  H    SG R
+Sbjct: 13873 RLSGKVRRRGSAASHFLYLSGSR 13941
+
+
+>gb|AE000327.1|AE000327 Escherichia coli K-12 MG1655 section 217 of 400 of the complete
+            genome
+          Length = 10048
+
+ Score = 28.1 bits (55), Expect = 5.7
+ Identities = 9/19 (47%), Positives = 12/19 (62%)
+ Frame = +1 / +2
+
+                               
+Query: 1231 WTTSRAPLPH*VSCTVTSC 1287
+            W  S  P+PH   C+V+SC
+Sbjct: 2426 WRQSLYPIPHCYRCSVSSC 2482
+
+
+>gb|AE000498.1|AE000498 Escherichia coli K-12 MG1655 section 388 of 400 of the complete
+            genome
+          Length = 10264
+
+ Score = 27.6 bits (54), Expect = 7.8
+ Identities = 8/18 (44%), Positives = 10/18 (55%)
+ Frame = +3 / +3
+
+                              
+Query: 2670 YAVFYITYCFSCPHECLF 2723
+            +   + T C SCPH C F
+Sbjct: 4278 FITLHFT*CVSCPHNCSF 4331
+
+
+>gb|AE000509.1|AE000509 Escherichia coli K-12 MG1655 section 399 of 400 of the complete
+           genome
+          Length = 10589
+
+ Score = 27.6 bits (54), Expect = 7.8
+ Identities = 8/17 (47%), Positives = 12/17 (70%)
+ Frame = -2 / -3
+
+                            
+Query: 682 PWLFWHWLRSWTSNPQP 632
+           P L W+W+R   S+P+P
+Sbjct: 261 PSLLWYWVRRCLSSPRP 211
+
+
+>gb|AE000306.1|AE000306 Escherichia coli K-12 MG1655 section 196 of 400 of the complete
+            genome
+          Length = 10446
+
+ Score = 27.6 bits (54), Expect = 7.8
+ Identities = 7/18 (38%), Positives = 12/18 (65%)
+ Frame = +3 / +1
+
+                              
+Query: 1341 SPSFLWLSSCHRKGISNR 1394
+            +P+  W+S CHR+ +  R
+Sbjct: 136  TPAGCWISGCHRRSVQQR 189
+
+
+>gb|AE000203.1|AE000203 Escherichia coli K-12 MG1655 section 93 of 400 of the complete genome
+          Length = 10751
+
+ Score = 27.6 bits (54), Expect = 7.8
+ Identities = 13/47 (27%), Positives = 23/47 (48%)
+ Frame = +1 / +3
+
+                                                           
+Query: 1156 LLWATLR*RLMARKCSVPLVMAWLTWTTSRAPLPH*VSCTVTSCTWI 1296
+            +L  T R    A   +  +V +   WT S  P P  + C+++S +W+
+Sbjct: 1554 ILRLTYRLPTWAEPVTWAMVSSMRVWTPSVRP*PEALICSISSGSWL 1694
+
+
+>gb|AE000208.1|AE000208 Escherichia coli K-12 MG1655 section 98 of 400 of the complete genome
+          Length = 10619
+
+ Score = 27.6 bits (54), Expect = 7.8
+ Identities = 10/25 (40%), Positives = 15/25 (60%)
+ Frame = -3 / +3
+
+                                     
+Query: 981  LSCNLDTNLPRASPPTSSTFTLPHR 907
+            ++C L      + P  ++TFTLPHR
+Sbjct: 2829 VACTLTCKYRLSLPQRANTFTLPHR 2903
+
+
+  Database: ecoli.nt
+    Posted date:  Jun 14, 2001  3:27 PM
+  Number of letters in database: 4,662,239
+  Number of sequences in database:  400
+  
+Lambda     K      H
+   0.318    0.135    0.401 
+
+
+Matrix: BLOSUM62
+Number of Hits to DB: 31907970
+Number of Sequences: 400
+Number of extensions: 491769
+Number of successful extensions: 23184
+Number of sequences better than 10.0: 20
+length of query: 1000
+length of database: 1,554,079
+effective HSP length: 47
+effective length of query: 953
+effective length of database: 1,535,279
+effective search space: 1463120887
+effective search space used: 1463120887
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 0 ( 0.0 bits)
+S1: 41 (21.7 bits)
+S2: 53 (27.2 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Kingdoms_DNA.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Kingdoms_DNA.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Kingdoms_DNA.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,871 @@
+#NEXUS 
+
+
+BEGIN DATA;
+DIMENSIONS  NTAX=32 NCHAR=2147;
+
+[!These ribosomal sequences are those used in:
+
+Lake, J.A.  1988.  Origin of the eukaryotic nucleus determined by rate-invariant analysis of rRNA sequences.  Nature, 331:184-186.
+
+Thanks to James Lake for supplying these data, March 1990. 
+
+The two trees in the file are the most parsimonious trees found.
+
+]
+FORMAT DATATYPE=DNA  MISSING=? GAP=-  INTERLEAVE ;OPTIONS  MSTAXA=UNCERTAIN  IGNORE=UNINFORM ;
+
+MATRIX
+
+[                                    10        20        30        40        50        60        70        80        90        100]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                --------TAC-----CTGGTTGATCCTGCC---AGTAG-CATATGCTTGTCTCAAAGATACCCACTCCC-----GACC-CGGGGAGGTAGTGACGAAAA   [77]
+Rattus_norvegicus           --------TAC-----CTGGTTGATCCTGCC---AGTAG-CATATGCTTGTCTCAAAGATACCCACTCCC-----GACC-CGGGGAGGTAGTGACGAAAA   [77]
+Xenopus_laevis              --------TAC-----CTGGTTGATCCTGCC---AGTAG-CATATGCTTGTCTCAAAGATACCCACTCCC-----GACG-CGGGGAGGTAGTGACGAAAA   [77]
+A._salina                   --------TAC-----CTGGTTGATCCTGCC---AGTAG-CATATGCTTGTCTCAAAGATACCCACTCCC-----AGCA-CGGGGAGGTAGTGACGAAAA   [77]
+S._cerevisiae               --------TAT-----CTGGTTGATCCTGCC---AGTAGTCATATGCTTGTCTCAAAGATACCCAATCCT-----AATT-CAGGGAGGTAGTGACAATAA   [78]
+P.MICA                      ---------AT-----CTGGTTGATCCTGCC---AGTAGTCATATGCTTGTCTCAAAGATACCCAATCCT-----GACA-CAGGGAGGTAGTGACAAGAA   [77]
+D.DISC                      --------TAA-----CTGGTTGATCCTGCC---AGTAGTCATATGCTTGTCTCAAAGATACTCAATCCC-----AATA-CGGGGAAGTAGTGACAATAA   [78]
+T.BRUC                      --------GAT-----CTGGTTGATTCTGCC---AGTAGTCATATGCTTGTTTCAAGGATGCCCAATGTCGAAAAAATA-CGATGAGGCAGCGAAAAGAA   [83]
+E.GRAC                      --------AAT-----CTGGTTGATCCTGCC---AGCAGTCATATGCTTTGTTCAAGGGTGCCCCATGCA-AAGACACT-CTGTGAGGCAGCGACGAACA   [82]
+Zea_mays                    --------TAC-----CTGGTTGATCCTGCC---AGTAGTCATATGCTTGTCTCAAAGATACCCAATCCT-----GACA-CGGGGAGGTAGTGACAATAA   [78]
+D._mobilis                  ------TAACT-----CCGGTTGATCCTGCC---GGTCCCGACCGCTATCGGGGTGGGGTCCGCAATGCG-----GGAAACCGTGACGGGGCCACCCCGA   [81]
+S._solfataricus             --------ATT-----CCGGTTGATCCTGCC---GGACCCGACCGCTATCGGGGTAGGGTCCCCAATGCG-----CGAAAGCGTGAGGGCGCTACCCCGA   [79]
+T._tenax                    --------AAA-----CCGGTTGATCCTGCC---GGACCTGACCGCTATCGGGGTGGGGTCCGCAATGCG-----GGCAACCGCGACGGGGCCACCCCGA   [79]
+M._vannielii                --------ATT-----CCGGTTGATCCCGCC---GGAGGCTACTGCTATTGGGGTTCGATCCGCAATGCA-----CGAAAGTGCGACGGGGGGACCCCAA   [79]
+MB._formicicum              --------AGT-----CCGTTTGATCCTGGC---GGAGGCCACTGCTATTGGGTTTCGATCCGCAATGCA-----CGAAAGTGCGACGGGGGAAACCCAA   [79]
+MS._hungatei                --------ATT-----CTGGTTGATCCTGCC---AGAGGCCACTGCTATCGGGGTTTGATTTACCATGCG-----GGCAACCGTGATAAGGAAACCCCGA   [79]
+H._volacanii                --------ATT-----CCGGTTGATCCTGCC---GGAGGTCATTGCTATTGGGGTCCGATTTACACTGCA-----CGCAAGTGCGATAAGGGGACCCCAA   [79]
+HC._morrhuae                --------ATT-----CCGGTTGATCCTGCC---GGAGGCTATTGCTATCGGGGTCCGATTTACACTGCA-----CGCCAGTGCGATAAGGGGACCCCGA   [79]
+H.HALO                      --------ATT-----CCGGTTGATCCTGCC---GGAGGCCATTGCTATCGGAGTCCGATTTACACTGTA-----CGAAAGTGCGATAAGGGGACTCCGA   [79]
+H._cutirubrum               --------ATT-----CCGGTTGATCCTGCC---GGAGGCCATTGCTATCGGAGTCCGATTTACACTGTA-----CGAAAGTGCGATAAGGGGACTCCGA   [79]
+AN._nidulans                --CAAAATGGA-----GAGTTTGATCCTGGCTCAGGAT--GAACGCTGGCGGCGTGC--TCCGCAATGGG-----CGCAAGCCTGACGGAGCAACGCCGC   [84]
+HE.CHL                      ----------------------GATCCTGGCTCAGGAC--GAACGCTGGCGGCATGC--TCCGCAATGGG-----CGAAAGCCTGACGGAGCAATGCCGC   [69]
+Bc._subtilis                --TTTATCGGA-----GAGTTTGATCCTGGCTCAGGAC--GAACGCTGGCGGCATGC--TCCGCAATGGA-----CGAAAGTCTGACGGAGCAACGCCGC   [84]
+MY.CAP                      ----AAAATGA-----GAGTTTGATCCTGGCTCAGGAT--AAACGCTGGCGGCTGGC--TTCACAATGGA-----CGAAAGTCTGATGAAGCAATGCCGC   [82]
+E._coli                     ---AAATTGAA-----GAGTTTGATCATGGCTCAGATT--GAACGCTGGCGGCAGGC--TGCACAATGGG-----CGCAAGCCTGATGCAGCCATGCCGC   [83]
+MYX.XA                      ---CAATTGGA-----GAGTTTGATCCTGGCTCAGAAC--GAACGCTGGCGGCGTGC--TGCGCAATGGG-----CGAAAGCCTGACGCAGCAACGCCGC   [83]
+P.TEST                      --CGAACTATA-----GAGTTTGATCCTGGCTCAGATT--GAACGCTGGCGGCATGC--TGGACAATGGG-----CGAAAGCCTGATCCAGCAATGCCGC   [84]
+AG.TUM                      -CTCAACTTGA-----GAGTTTGATCCTGGCTCAGAAC--GAACGCTGGCGGCAGGC--TGGACAATGGG-----CGCAAGCCTGATCCAGCCATGCCGC   [85]
+DV.DES                      --TGAACTGGA-----GAGTTTGATTCTGGCTCAGATT--GAACGCTGGCGGCGTGC--TGCGCAATGGG-----CGAAAGCCTGACGCAGCGACGCCGC   [84]
+BA.FRG                      -TTACAACGAA-----GAGTTTGATCCTGGCTCAGGAT--GAACGCTAGCTACAGGC--TGGTCAATGGG-----CGCTAGCCTGAACCAGCCAAGTAGC   [85]
+'Zea_mays_(chloroplast)'    --CTCAT-GGA-----GAGTTCGATCCTGGCTCAGGAT--GAACGCTGGCGGCATGC--TCCGCAATGGG-----CGAAAGCCTGACGGAGCAATGCCGC   [83]
+T.AMIT                      ATCATAGTCAAAATCTGAGTTTGATCCTGGCTCAGAAG--GAACGCTAGCTATATGC--TGGACAATGGG-----CGAAAGCCCGATCCAGCAATATCGC   [91]
+
+[                                    110       120       130       140       150       160       170       180       190       200]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                ATAACA-AT-AC-AGG-------------------ACTCTTTCGAGGCC--CTGTAATTGGAATGAGTCCACTTT-AA---ATCCTTTAACGAGGA-TCC   [148]
+Rattus_norvegicus           ATAACA-AT-AC-AGG-------------------ACTCTTTCGAGGCC--CTGTAATTGGAATGAGTCCACTTT-AA---ATCCTTTAACGAGGA-TCC   [148]
+Xenopus_laevis              ATAACA-AT-AC-AGG-------------------ACTCTTTCGAGGCC--CTGTAATTGGAATGAGTACACTTT-AA---ATCCTTTAACGAGGA-TCT   [148]
+A._salina                   ATAACG-AT-GC-AGG-------------------ACTCATCCGAGGCC--CTGTGATTGGAATGAGTACACTTT-AA---ATCCTTTAACGAGGA-TCC   [148]
+S._cerevisiae               ATAACG-AT-AC-AGG-------------------GCCCATTCG-GGTC--TTGTAATTGGAATGAGTACAATGT-AA---ATACCTTAACGAGGA-ACA   [148]
+P.MICA                      ATAACA-AT-AC-AGG-------------------GCATATCTG---TC--TTGTAATTGGAATGAGTAGAATTT-AA---ATCCCTTTACGAGTA-CCA   [145]
+D.DISC                      ATATCA-AT-AC-CTA-------------------TCCTTTTTG-GAGG----GCAATTGAAATGAACACAAATT-AA---AACTCTTAATTAACA--CA   [145]
+T.BRUC                      ATAGAG-CCGAC-CGT-------------------GCCCTAGTG-CATG--GTTGTTTTCAATGGGGGATACTCA-AACCCATCCAATATCGAGTA-ACA   [157]
+E.GRAC                      GTAGCA-AC-CC-CGTC------------------GGCCTTACG-TGCCGATGGGGCTTGGAATGGACGCTATCC-AA---AGACAGCCGTGAGTA-TCA   [155]
+Zea_mays                    ATAACA-AT-AC-CGG-------------------GCGCGTTAG-TGTC--TGGTAATTGGAATGAGTACAATCT-AA---ATCCCTTAACGAGGA-TCC   [148]
+D._mobilis                  GTGCCC-CC-TT-ACG-------------------GG------G-GCTT--TTCCC-------------CGCTGT-AG---GAAGGCGGGG---GA-ATA   [129]
+S._solfataricus             GTGCCT-CC-GC-AAG-------------------GA------G-GCTT--TTCCC-------------CGCTCT-AA---AAAGGCGGGG---GA-ATA   [127]
+T._tenax                    GTGCCG-GG-CG-AAG-------------------AGCC---CG-GCTT--TTGCC-------------CGGTGT-AA---GGAGCCGGGC---GA-ATA   [130]
+M._vannielii                GTGC-T-CA-TGCACA-------------------GCAT---GG-GCTT--TTATC-------------AAGTGT-AA---ACAGCTTGAG---GA-ATA   [130]
+MB._formicicum              GTGCCA-CT-CTTAAC-------------------GGGG---TG-GCTT--TTCTT-------------AAGTGT-AA---AAAGCTTTTG---GA-ATA   [131]
+MS._hungatei                GTGC----C-AGCACA-------------------GGCT---GG-CTGT--CCACC--------------AGTGT-AA---ATAACT-GGT---GA-AGA   [126]
+H._volacanii                GTGCGAGGG-CA-TAT-------------------AGTC---CTCGCTT--TTCTC-------------GACCGT-AA---GGCGGTCGAG---GA-ATA   [132]
+HC._morrhuae                GTGCGAGGG-CA-TAC-------------------AGTC---CTCGCTT--TTCGT-------------GACCGT-AA---GAAGGTCTCA---GA-ATA   [132]
+H.HALO                      GTGTGAAGG-CA-TAG-------------------AGCC---TTCACTT--TTGTA-------------CACCGT-AA---GGTGGTGCAC---GA-ATA   [132]
+H._cutirubrum               GTGTGAAGG-CA-TAG-------------------AGCC---TTCACTT--TTGTA-------------CACCGT-AA---GGTGGTGCAC---GA-ATA   [132]
+AN._nidulans                GTGGGG-GA-GG-AAGGTT--TT-T-GGACTGTAAACCC---CT-TTTC--TCAGG-------------GAAGAA-GA---AAG----------------   [138]
+HE.CHL                      GTGGGG-GA-TG-AAGGTC--TT-C-GGATTGTAAACCC---TT-GTCT--TCGGG-------------GAAGAA------------GTT----------   [121]
+Bc._subtilis                GTGAGT-GA-TG-AAGGTT--TT-C-GGATCGTAAAGCT---CT-GTTG--TTAGG-------------GAAGAA-CA---AGTACCGTTC---GA-ATA   [150]
+MY.CAP                      GTGAGT-GA-TG-ACGGCC--TT-C-GGGTTGTAAAGCT---CT-GTTG--TAAGG-------------GAAGAA-AA---AATAGAGTAG---GA-A-A   [147]
+E._coli                     GTGTAT-GA-AG-AAGGCC--TT-C-GGGTTGTAAAGTA---CT-TTCA--GCGGG-------------GAGGAAGGG---AGTAAAGTT-----A-ATA   [148]
+MYX.XA                      GTGTGT-GA-TG-AAGGTC--TT-T-GGATTGTAAAGCA---CT-TTCG--ACCGG-------------GAAGAA-AA---CCCGTTGGCT---AACATC   [150]
+P.TEST                      GTGCAG-GA-TG-AAGGCC--CT-C-GGGTTGTAAACTG---CT-TTTG--TACGG-------------AACGAA--A---AGCCTGGGGC---TA-ATA   [149]
+AG.TUM                      GTGAGT-GA-TG-AAGGCC--TT-A-GGGTTGTAAAGCT---CT-TTCA--CCGGA-------------GAAGAT-AA----------------------   [136]
+DV.DES                      GTGAGG-GA-TG-AAGGTT--TT-C-GGATCGTAAACCT---CT-GTCA--GAAGG-------------GAAGAA-AC---TACGTTGTGC---TA-ATC   [150]
+BA.FRG                      GTGAAG-GA-TG-AAGGCT--CTATGGGTCGTAAACTT---CT-TTTA-?-TATAA-------------GAATAA------AGTGCAGTAT---GT-ATA   [151]
+'Zea_mays_(chloroplast)'    GTGGAG-GT-GG-AAGGCC--TA-C-GGGTCGTCAACTT---CT-TTTC--TCGGA-------------GAAGAA-------------------------   [132]
+T.AMIT                      GTGAGT-GA-AG-AAGGGCAATG-C-CGCTTGTAAAGCT---CT-TTCG--TCGAG-------------TGCG---------------------------   [140]
+
+[                                    210       220       230       240       250       260       270       280       290       300]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGCTGCA   [219]
+Rattus_norvegicus           ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGCTGCA   [219]
+Xenopus_laevis              ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGCTGCA   [219]
+A._salina                   ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAACTCCAGCTCCAATAGCGTATATTAAAGTTGCTGCG   [219]
+S._cerevisiae               ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCA   [219]
+P.MICA                      ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCG   [216]
+D.DISC                      ATTG?AG-----------------------------GGCAAGTCTGGTGCCAGCRGCCGCGGTAATTCCAGCTCCAATAGCATATACTAAAGTTGTTGCA   [216]
+T.BRUC                      ATTGGAG-----------------------------GACAAGTCTGGTGCCAGCACCCGCGGTAATTCCAGCTCCAAAAGCGTATATTAATGCTGTTGCT   [228]
+E.GRAC                      ACCGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCTGCGGTAATTCCAGCTCCGAGGGCGTATACTAACATTGCTGCT   [226]
+Zea_mays                    ATTGGAG-----------------------------GGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCA   [219]
+D._mobilis                  AGCGGGG-----------------------------GGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCCCCGCGAGTGGTCGGGACGATTATTGGG   [200]
+S._solfataricus             AGCGGGG-----------------------------GGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGG   [198]
+T._tenax                    AGCGGGG-----------------------------GGTAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCCCCGCGAGTGGTCAGGGTGATTACTGGG   [201]
+M._vannielii                AGGGCTG-----------------------------GGCAAGTTCGGTGCCAGCAGCCGCGGTAATACCGACGGCCCGAGTGGTAGCCACTCTTATTGGG   [201]
+MB._formicicum              AGAGCTG-----------------------------GGCAAGACCGGTGCCAGCCGCCGCGGTAACACCGGCAGCTCAAGTGGTGGCCGTTTTTATTGGG   [202]
+MS._hungatei                AGGGC?G-----------------------------GGCAAG?????--------------GTAATACCG-------GAGTGGTGGCCGCTATTACTGGG   [176]
+H._volacanii                AGAGCTG-----------------------------GGCAAGACCGGTGCCAGCCGCCGCGGTAATACCGGCAGCTCAAGTGATGACCGATATTATTGGG   [203]
+HC._morrhuae                AGAGCTG-----------------------------GGCAAGACCGGTGCCAGCCGCCGCGGTAATACCGGCAGCTCGAGTGATAGCCACTATTATTGGG   [203]
+H.HALO                      AGGACTG-----------------------------GGCAAGACCGGTGCCAGCCGCCGCGGTAATACCGGCAGTCCGAGTGATGGCCGATCTTATTGGG   [203]
+H._cutirubrum               AGGACTG-----------------------------GGCAAGACCGGTGCCAGCCGCCGCGGTAATACCGGCAGTCCGAGTGATGGCCGATCTTATTGGG   [203]
+AN._nidulans                -----------TGACGGTACCTGAGGAATAAGCCTCGGCTAATTCCGTGCCAGCAGCCGCGGTAATACGGGAGAGGCAAGCGTTATCCGGAATTATTGGG   [227]
+HE.CHL                      ----------TTGACGGTACCCGAGGAGGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTGTCCGGAATGACTGGG   [211]
+Bc._subtilis                GGGCGGTACCTTGACGGTACCTAACCAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGG   [250]
+MY.CAP                      TGACTTTATCTTGACAGTACCTTACCAGAAAGCCACGGCTAACTATGTGCCAGCAGCCGCGGTAATACATAGGTGGCAAGCGTTATCCGGATTTATTGGG   [247]
+E._coli                     CCTTTGCTCATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGG   [248]
+MYX.XA                      CAACGG---CTTGACGGTACCGGGAGAAGAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTGTTCGGAATTATTGGG   [247]
+P.TEST                      TCCCCGGGTCATGACGGTACCGTAAGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTAATCGGAATTACTGGG   [249]
+AG.TUM                      -----------TGACGGTATCCGGAGAAGAAGCCCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGGGCTAGCGTTGTTCGGAATTACTGGG   [225]
+DV.DES                      AG-CAGCGTACTGACGGTACCTTCAAAGGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGG   [249]
+BA.FRG                      C-----TGTTTTGTATGTATTATATGAATAAGGATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGG   [246]
+'Zea_mays_(chloroplast)'    -------ACAATGACGGTATCTGAGGAATAAGCATCGGCTAACTCTGTGCCAGCAGCCGCGGTAAGACAGAGGATGCAAGCGTTATCCGGAATGATTGGG   [225]
+T.AMIT                      ----CGATCAT-GACAGGACTCGAGGAAGAAGCCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAAGACGGGGGGGGCAAGTGTTCTTCGGAATGACTGGG   [235]
+
+[                                    310       320       330       340       350       360       370       380       390       400]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                GTTAAAAAGCTCGTAGTTGGATCTTGGGAGCGGGCGGGCGGTCCGCC-----------------------------------------------------   [266]
+Rattus_norvegicus           GTTAAAAAGCTCGTAGTTGGATCTTGGGAGCGGGCGGGCGGTCCGCC-----------------------------------------------------   [266]
+Xenopus_laevis              GTTAAAAAGCTCGTAGTTGGATCTTGGGATCGAGCTGGCGGTCCGCCGCGAGGC----------------------------------------------   [273]
+A._salina                   GTTAAAAAGCTCGTAGTTGGATATGGGTCTCGGTCGGGTGGTGCCGCCTCAC------------------------------------------------   [271]
+S._cerevisiae               GTTAAAAAGCTCGTAGTTGAACTTTGGGCCCGGTTGGCCGGTCCGATTTTTT------------------------------------------------   [271]
+P.MICA                      GTTAAAAAGCTCGTAGTTGGATTTCTGCCGAGGACGACCGGTCCGCCTCT--------------------------------------------------   [266]
+D.DISC                      GTTAAAAAGCTCGTAGTTGAAGTTTAAGGTTTACCGGGTTTATGTCATTTACCACTT-------------------------------------------   [273]
+T.BRUC                      GTTAAAGGGTTCGTAGTTGAACTGTGGGCCACGTAGTTTTGTGCCGTGCCAGTC----------CCGTCCA--------CCTCGGAC-------GTGTT-   [302]
+E.GRAC                      GTTAAAACACTTGTAGTCTGCCTACGGGCTGCA-GGTCTGCTGGGTGGCCGGTTTGTTGTTTCTCTGGCCAGGGAAGGACCTCGGTTCGACCCTGTGTTG   [325]
+Zea_mays                    GTTAAAAAGCTCGTAGTTGGACCTTGGGCCGGGCCGGGTGCCGCCGCC----------------------------------------------------   [267]
+D._mobilis                  CCTAAAGCGCCCGTAGCCG--------GCCCGGCAAGTCCCCTC--------------------------------------------------------   [236]
+S._solfataricus             CCTAAAGCGCCTGTAGCCG--------GCCCACCAAGTCGCCCC--------------------------------------------------------   [234]
+T._tenax                    CTTAAAGCGCCCGTAGCCG--------GCCCGGCAAGTCGCTCC--------------------------------------------------------   [237]
+M._vannielii                CCTAAAGCGTCCGTAGCCG--------GTCCAGTAAGTCCCTGT--------------------------------------------------------   [237]
+MB._formicicum              CCTAAAGCGTTCGTAGCCG--------GCTTGATAAGTCTCTGG--------------------------------------------------------   [238]
+MS._hungatei                CTTAAAGGGTCCGTAGCTG--------GATATACAAGTCCCTTG--------------------------------------------------------   [212]
+H._volacanii                CCTAAAGCGTCCGTAGCCG--------GCCACGAAGGTTCATCG--------------------------------------------------------   [239]
+HC._morrhuae                CCTAAAGCGTCCGTAGCCG--------GCCGAACAGGTCCGTCG--------------------------------------------------------   [239]
+H.HALO                      CCTAAAGCGTCCGTAGCTG--------GCTGAACAAGTCCGTTG--------------------------------------------------------   [239]
+H._cutirubrum               CCTAAAGCGTCCGTAGCTG--------GCTGAACAAGTCCGTTG--------------------------------------------------------   [239]
+AN._nidulans                CGTAAAGCGCCTGCAGGCG--------GTTAATCAAGTCTGTTG--------------------------------------------------------   [263]
+HE.CHL                      CGTAAAGCGCGTGCAGGCG--------GACATGTAAGTCTGAGG--------------------------------------------------------   [247]
+Bc._subtilis                CGTAAAGCGCTCGCAGGCG--------GTTTCTTAAGTCTGATG--------------------------------------------------------   [286]
+MY.CAP                      CGTATAGGGTGCGTAGGCG--------GTTTTGCAAGTTTGAGG--------------------------------------------------------   [283]
+E._coli                     CGTAAAGCGCACGCAGGCG--------GTTTGTTAAGTCAGATG--------------------------------------------------------   [284]
+MYX.XA                      CGTAAAGCGCGTGTAGGCG--------GCGTGACAAGTCGGGTG--------------------------------------------------------   [283]
+P.TEST                      CGTAAAGCGTGCGCAGGCG--------GTTTTGTAAGACAGTGG--------------------------------------------------------   [285]
+AG.TUM                      CGTAAAGCGCACGTAGGCG--------GATATTTAAGTCAGGGG--------------------------------------------------------   [261]
+DV.DES                      CGTAAAGCGCACGTAGGCT--------GTAGTGTAAGTCAGGGG--------------------------------------------------------   [285]
+BA.FRG                      TTTAAAGGGAGCGTAGGTG--------GACTGGTAAGTCAGTTG--------------------------------------------------------   [282]
+'Zea_mays_(chloroplast)'    CGTAAAGCGTCTGTAGGTG--------GCTTTTCAAGTCCGCCG--------------------------------------------------------   [261]
+T.AMIT                      CGTAAAGGGCACGTAGGCG--------GTGAATCGGGTTGAAAG--------------------------------------------------------   [271]
+
+[                                    410       420       430       440       450       460       470       480       490       500]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                ---------GCGAGGCGAGCCACCGCCC-GTCCCC---GCCCCTTGCCTCTCGG-CG-CCCC--------------------------------------   [313]
+Rattus_norvegicus           ---------GCGAGGCGGCTCAGCGCCCTGTCCCC---AGCCCCTGCCTCTCGG-CG-CCCC--------------------------------------   [314]
+Xenopus_laevis              --------GGCTAC-CGCCTGTCCCAGCCCCTGCCTCTCGGCGCCTCCCCGATG-CT-CTTG--------------------------------------   [324]
+A._salina                   --------GGTGGTCACTGCCTCGATCGGACAATTCATTGGATCGTTCGGGGTG-CT-CTTA--------------------------------------   [323]
+S._cerevisiae               --------CGTGTACTGGATTTCCAACGGGGCCTT---TCCTTCTGGCTAACCT-TG-AGTC--------------------------------------   [320]
+P.MICA                      ---------GGGTGAGTATCTGCTCGCCTGGCATC---TTCTT--GGAGAACGT-AG-CTGG--------------------------------------   [312]
+D.DISC                      --------CGTGGT-TAAATCGACACCGGTATCTCT-TTCTTAATAGTTCAGCT-TG-TATT--------------------------------------   [323]
+T.BRUC                      ------------TTGACCCACGCCCTCGTGGCCCG---TGAACACACTCAGATA-CA-AG--AAACACGGGAGCGGTTCCT------------CCTCACT   [371]
+E.GRAC                      GGCTGCAACGGCTGGACTCAACCCCCAGTGGTACG---TCCCTGCGCCCACCTC-TC-AGTCGATGGTGAGATCTGCTCCTGCCAAAAGTCTGCTTCACT   [420]
+Zea_mays                    ---------GTACG-GGCAGAACCGACCGGCTCGA---CCCTTCTGCCGGCGAT-GC-GCTC--------------------------------------   [314]
+D._mobilis                  ---------------CTAAATTCCCG-------------------GGCTCAACC-CG-GG----------------------------------------   [260]
+S._solfataricus             ---------------TTAAAGTCCCC-------------------GGCTCAACC-GG-GG----------------------------------------   [258]
+T._tenax                    ---------------TGAAATCCCCA-------------------GGCTCAACC-TG-GG----------------------------------------   [261]
+M._vannielii                ---------------TTAAATTCTCT-------------------GGCTTAACCA-G-AG----------------------------------------   [261]
+MB._formicicum              ---------------TGAAATCTCAC-------------------GGCTTAACCGTG-AG----------------------------------------   [263]
+MS._hungatei                ---------------AGAAATCCGCC-------------------GGCTTAACCG-G-TG----------------------------------------   [236]
+H._volacanii                ---------------GGAAATCCGCC-------------------AGCTCAACT-GG-CG----------------------------------------   [263]
+HC._morrhuae                ---------------GGAAATCCACC-------------------CGCTCAACG-GGTGG----------------------------------------   [264]
+H.HALO                      ---------------GGAAATCTGTC-------------------CGCTTAACG-GG-CA----------------------------------------   [263]
+H._cutirubrum               ---------------GGAAATCTGTC-------------------CGCTTAACG-GG-CA----------------------------------------   [263]
+AN._nidulans                ---------------TCAAAGCGTGG-------------------GGCTCAACC-TC-AT----------------------------------------   [287]
+HE.CHL                      ---------------TGAAAGCTTGG-------------------AGCTCAACT-CC-GA----------------------------------------   [271]
+Bc._subtilis                ---------------TGAAAGCCCCC-------------------GGCTCAACC-GG-GG----------------------------------------   [310]
+MY.CAP                      ---------------TTAAAGTCCGG-------------------AGCTCAACT-CC-GG----------------------------------------   [307]
+E._coli                     ---------------TGAAATCCCCG-------------------GGCTCAACC-TG-GG----------------------------------------   [308]
+MYX.XA                      ---------------TGAAAGCCCTC-------------------AGCTCAACT-GA-GG----------------------------------------   [307]
+P.TEST                      ---------------TGAAATCCCCG-------------------GGCTCAACC-TG-GG----------------------------------------   [309]
+AG.TUM                      ---------------TGAAATCCCAG-------------------AGCTCAACT-CT-GG----------------------------------------   [285]
+DV.DES                      ---------------TGAAATCCCAC-------------------GGCTCAACC-GT-GG----------------------------------------   [309]
+BA.FRG                      ---------------TGAAAGTTTGC-------------------GGCTCAACC-GT-AA----------------------------------------   [306]
+'Zea_mays_(chloroplast)'    ---------------TCAAATCCCAG-------------------GGCTCAACC-CT-GG----------------------------------------   [285]
+T.AMIT                      ---------------TGAAAGTCGCC-------------------AAA--AAGT-GG-CG----------------------------------------   [293]
+
+[                                    510       520       530       540       550       560       570       580       590       600]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                --------------CTCGATGCTCT-TAG-----------------------------------CT-GAGTGTCCCGCGGGGCCCGAAGCGTTTACTT--   [360]
+Rattus_norvegicus           --------------CTCGATGCTCT-TAG-----------------------------------CT-GAGTGTCCCGCGGGGCCCGAAGCGTTTACTT--   [361]
+Xenopus_laevis              --------------ACTGAGTGTCC-CGG-----------------------------------GG-GC--------------CCGAAGCGTTTACTT--   [357]
+A._salina                   --------------ACCGAGTGTCC-TGG-----------------------------------GT-GG--------------CCGATACGTTTACTT--   [356]
+S._cerevisiae               --------------CTTGTGGCTCT-TGG-----------------------------------CG-AA--------------CCAGGACTTTTACTT--   [353]
+P.MICA                      --------------CACTTGACTGTGTGG-------------------------------TGCGGT-AT--------------CCAGGACTTTTACTT--   [350]
+D.DISC                      --------------ATCTTTGATAG-TGC-----------------------------------TT-GT--------------TTGGACATTTCACTG--   [356]
+T.BRUC                      --------------TTCACGCATGT-CATGCATGCGAGGGG------------------------G-CG--------------TCCGTGAATTTTACTGT   [417]
+E.GRAC                      GCAGGCCAAAGCGGTTTATGCCTCC-CGCACTGGCAACGGACACCAACAGGGGACCCAGC--CTCG-AG--------------CTGGGTAGTCTACCTCT   [502]
+Zea_mays                    --------------CTGGCCTTAAC-TGG-----------------------------------CC-GGGT------CGTGCCTCCGGGCCGTTACTT--   [355]
+D._mobilis                  -------------------GACTGG-AGG-----------------------------------GG-A--------------------------------   [272]
+S._solfataricus             -------------------AACTGG-GGG-----------------------------------CG-A--------------------------------   [270]
+T._tenax                    -------------------GGCAGG-GGG-----------------------------------CG-A--------------------------------   [273]
+M._vannielii                -------------------GACTGG-CAG-----------------------------------GG-A--------------------------------   [273]
+MB._formicicum              -------------------AATTGC-TGG-----------------------------------AG-A--------------------------------   [275]
+MS._hungatei                -------------------GCGTTC-AGG-----------------------------------GG-A--------------------------------   [248]
+H._volacanii                -------------------GGCGTC-CGG-----------------------------------TGAA--------------------------------   [276]
+HC._morrhuae                -------------------GACGTC-CGG-----------------------------------CGGA--------------------------------   [277]
+H.HALO                      -------------------GGCGTC-CAG-----------------------------------CGGA--------------------------------   [276]
+H._cutirubrum               -------------------GGCGTC-CAG-----------------------------------CGGA--------------------------------   [276]
+AN._nidulans                -------------------ACAGGC-AAT-----------------------------------GG-A--------------------------------   [299]
+HE.CHL                      -------------------AACGGC-CTT-----------------------------------GG-A--------------------------------   [283]
+Bc._subtilis                -------------------AGGGTC-ATT-----------------------------------GG-A--------------------------------   [322]
+MY.CAP                      -------------------TTCGCC--TT-----------------------------------GA-A--------------------------------   [318]
+E._coli                     -------------------AACTGC-ATC-----------------------------------TG-A--------------------------------   [320]
+MYX.XA                      -------------------AAGTGC-GCC-----------------------------------CG-A--------------------------------   [319]
+P.TEST                      -------------------AACTGC-CAT-----------------------------------TG-T--------------------------------   [321]
+AG.TUM                      -------------------AACTGC-CTT-----------------------------------TG-A--------------------------------   [297]
+DV.DES                      -------------------AACTGC-CTT-----------------------------------TG-A--------------------------------   [321]
+BA.FRG                      -------------------AATTGC-AGC-----------------------------------TG-A--------------------------------   [318]
+'Zea_mays_(chloroplast)'    -------------------ACAGGC-GGT-----------------------------------GG-A--------------------------------   [297]
+T.AMIT                      -------------------GAATGC-TCT-----------------------------------CG-A--------------------------------   [305]
+
+[                                    610       620       630       640       650       660       670       680       690       700]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                ----------------------------------TGAAAAAATTAGAGTGTTCAAAGCA----------GGCCCGAGCCGCCT-------GGATACCG-C   [408]
+Rattus_norvegicus           ----------------------------------TGAAAAAATTAGAGTGTTCAAAGCA----------GGCCCGAGCCGCCT-------GGATACCG-C   [409]
+Xenopus_laevis              ----------------------------------TGAAAAAATTAGAGTGTTCCAAGCA----------GGC-CGCGTCGCCT-------GGATACTT-C   [404]
+A._salina                   ----------------------------------TGAACAAATTAGAGTGCTTAAAGCA----------GGTGCACCGCGCCT-------GAATATCA-C   [404]
+S._cerevisiae               ----------------------------------TGAAAAAATTAGAGTGTTCAAAGCA----------GGC--GTATTGCTC-------GAATATAT-T   [399]
+P.MICA                      ----------------------------------TGAGGAAATTAGAGTGTTTCAAGCC----------AGT---TACGCCTT-------GAATACAT-T   [395]
+D.DISC                      ----------------------------------TGAGAAAATTGTGGTGTTTAAAGCA----------GGC--GTCTCGCCT-------GATCTTTTGC   [403]
+T.BRUC                      GACC--------------------------------AAAAA----AGTGCGACCAAAGCAGTCTGCCGAC-T--TGAATTACAAAGCATGGGATAACG-A   [477]
+E.GRAC                      GGTCCACCACCGGAGCCCACCGTCTTCGACACCCTGGAAAACTCAGTGTGCTCAAAGCATCCCCGCGACGGC--TGAATGTCCATCCATGGAATGTCA-A   [599]
+Zea_mays                    ----------------------------------TGAAGAAATTAGAGTGCTCAAAGCA----------AGC---CATCGCTCT------GGATACAT-T   [401]
+D._mobilis                  ---------------------------------------------------------------------------TACTGCCG-------GGCTA-----   [285]
+S._solfataricus             ---------------------------------------------------------------------------TACTGGTG-------GGCTA-----   [283]
+T._tenax                    ---------------------------------------------------------------------------TACTGCCG-------GGCTA-----   [286]
+M._vannielii                ---------------------------------------------------------------------------TACTGCTG-------GACTT-----   [286]
+MB._formicicum              ---------------------------------------------------------------------------TACTATTA-------GCCTT-----   [288]
+MS._hungatei                ---------------------------------------------------------------------------AACTGTAT-------TTCTA-----   [261]
+H._volacanii                ---------------------------------------------------------------------------AACCACGT-------GGCTT-----   [289]
+HC._morrhuae                ---------------------------------------------------------------------------AACCAGTC-------GGCTT-----   [290]
+H.HALO                      ---------------------------------------------------------------------------AACTGTTC-------AGCTT-----   [289]
+H._cutirubrum               ---------------------------------------------------------------------------AACTGTTC-------AGCTT-----   [289]
+AN._nidulans                ---------------------------------------------------------------------------AACTGATT-------GACTA-----   [312]
+HE.CHL                      ---------------------------------------------------------------------------AACTGGAT-------GTCTT-----   [296]
+Bc._subtilis                ---------------------------------------------------------------------------AACTGGGG-------AACTT-----   [335]
+MY.CAP                      ---------------------------------------------------------------------------GACTGTTT-------TACTA-----   [331]
+E._coli                     ---------------------------------------------------------------------------TACTGGCA-------AGCTT-----   [333]
+MYX.XA                      ---------------------------------------------------------------------------AACTGTTG-------TGCTT-----   [332]
+P.TEST                      ---------------------------------------------------------------------------GACTGCAA-------GGCTA-----   [334]
+AG.TUM                      ---------------------------------------------------------------------------TACTGGGT-------ATCTT-----   [310]
+DV.DES                      ---------------------------------------------------------------------------TACTGCAC-------AACTT-----   [334]
+BA.FRG                      ---------------------------------------------------------------------------TACTGTCA-------GTCTT-----   [331]
+'Zea_mays_(chloroplast)'    ---------------------------------------------------------------------------AACTACCA-------AGCTG-----   [310]
+T.AMIT                      ---------------------------------------------------------------------------AACCAATT-------CACTT-----   [318]
+
+[                                    710       720       730       740       750       760       770       780       790       800]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                AGCTAGGAATAA---TGGAATAGGA-------------C-CGCGGTTCTATTTTG-------------TTGGTTTTC------GGAACT-G---------   [462]
+Rattus_norvegicus           AGCTAGGAATAA---TGGAATAGGA-------------C-CGCGGTTCTATTTTG-------------TTGGTTTTC------GGAACT-G---------   [463]
+Xenopus_laevis              AGCTAGGAATAA---TGGAATAGGA-------------C-TCCGGTTCTATTTTG-------------TTGGTTTTC------GGAACT-G---------   [458]
+A._salina                   AGCATGGAATGA---TGGAATAGGA-------------C-CTCGGTCTTATTATG-------------TTGGTTTTC------TGGACTTG---------   [459]
+S._cerevisiae               AGCATGGAATAA---TAGAATAGGA-------------CGTTTGGTTCTATTTTG-------------TTGGTTTCT------AGGACC-A---------   [454]
+P.MICA                      AGCATGGAATAA---TAAGATAGGA-------------C-CTCGGTTCTATTTTG-------------TTGGTTTCT------AGAGCT-G---------   [449]
+D.DISC                      AGCATGGTATGA---TGAAACATGA-------------CATTTTACGCTATTGGT-------------TTGCGTTTA------AAG--------------   [454]
+T.BRUC                      AGCATCAGCCCTGGG-GCCACCGTTTCGGCTTTTGTTGGTTTTAGAAGTCCTTGGGAGATTATGGGGCCGCGTGCCTTGGGTCGGTGTT-TCGTGTCTCA   [575]
+E.GRAC                      GGCATCGACCAAGTGTGGCA----TTGGAGTTGTGCTGCCTTGGGGCCCACTCTGGACAACCTGGTGGTGTGTTCCTG----CAGGATC-AACAGGATCG   [690]
+Zea_mays                    AGCATGGGATAA---CATCATAGGA-------------T-TCCGGTCCTATTGTG-------------TTGGCCTTC------GGGATC-G---------   [455]
+D._mobilis                  ----------------------------------------------------------------------------------------------------   [285]
+S._solfataricus             ----------------------------------------------------------------------------------------------------   [283]
+T._tenax                    ----------------------------------------------------------------------------------------------------   [286]
+M._vannielii                ----------------------------------------------------------------------------------------------------   [286]
+MB._formicicum              ----------------------------------------------------------------------------------------------------   [288]
+MS._hungatei                ----------------------------------------------------------------------------------------------------   [261]
+H._volacanii                ----------------------------------------------------------------------------------------------------   [289]
+HC._morrhuae                ----------------------------------------------------------------------------------------------------   [290]
+H.HALO                      ----------------------------------------------------------------------------------------------------   [289]
+H._cutirubrum               ----------------------------------------------------------------------------------------------------   [289]
+AN._nidulans                ----------------------------------------------------------------------------------------------------   [312]
+HE.CHL                      ----------------------------------------------------------------------------------------------------   [296]
+Bc._subtilis                ----------------------------------------------------------------------------------------------------   [335]
+MY.CAP                      ----------------------------------------------------------------------------------------------------   [331]
+E._coli                     ----------------------------------------------------------------------------------------------------   [333]
+MYX.XA                      ----------------------------------------------------------------------------------------------------   [332]
+P.TEST                      ----------------------------------------------------------------------------------------------------   [334]
+AG.TUM                      ----------------------------------------------------------------------------------------------------   [310]
+DV.DES                      ----------------------------------------------------------------------------------------------------   [334]
+BA.FRG                      ----------------------------------------------------------------------------------------------------   [331]
+'Zea_mays_(chloroplast)'    ----------------------------------------------------------------------------------------------------   [310]
+T.AMIT                      ----------------------------------------------------------------------------------------------------   [318]
+
+[                                    810       820       830       840       850       860       870       880       890       900]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                -------------------------AGGCCATGATT----------------------------------------AAGAGGGACGGCCGGGGGCATTCG   [497]
+Rattus_norvegicus           -------------------------AGGCCATGATT----------------------------------------AAGAGGGACGGCCGGGGGCATTCG   [498]
+Xenopus_laevis              -------------------------GGGCCATGATT----------------------------------------AAGAGGGACGGCCGGGGGCATTCG   [493]
+A._salina                   -------------------------AGGTAATGGTT----------------------------------------AACAGAGACAGACGGGGGCATTCG   [494]
+S._cerevisiae               -------------------------TCGTAATGATT----------------------------------------AATAGGGACGGTCGGGGGCATCGG   [489]
+P.MICA                      -------------------------AGGTAATGATT----------------------------------------AATAGGGATAGTTGGGGGCATTCG   [484]
+D.DISC                      --------------------------TGTAATGATT----------------------------------------AATAGGGATGGATGGGGGTGTTCA   [488]
+T.BRUC                      TTTTTGTGGCGCGCACATTCGGCTCTTCGTGATGTTTTTTTACATTCATTGCGACGCGCGGCTTCCAGGAAT--GAAGGAGGGTAGTTCGGGGGAGAACG   [673]
+E.GRAC                      TTGCCCTGCCTGGCC--TTCGGGTCTCGTCAGGCTTCGTCCCCTGTCCCTGCAGCTTGCACCCATCGATCGTAAGTGATGGGACTGTTCGGGGTGAAAGA   [788]
+Zea_mays                    -------------------------GAGTAATGATT----------------------------------------AATAGGGACAGTCGGGGGCATTCG   [490]
+D._mobilis                  -------------------------------------------------------------------------------GGGGGTGGGAGAGGCCGAGGG   [306]
+S._solfataricus             -------------------------------------------------------------------------------GGGGGCGGGAGAGGCGGGGGG   [304]
+T._tenax                    -------------------------------------------------------------------------------GGGGGCGGGAGAGGCCGCCGG   [307]
+M._vannielii                -------------------------------------------------------------------------------GGGACCGGGAGAGGACAAGGG   [307]
+MB._formicicum              -------------------------------------------------------------------------------GAGGCCGGGAGAGGTTAGCGG   [309]
+MS._hungatei                -------------------------------------------------------------------------------GGGACCGGGAGAGGTGAGAGG   [282]
+H._volacanii                -------------------------------------------------------------------------------GGGACCGGAAGGCTCGAGGGG   [310]
+HC._morrhuae                -------------------------------------------------------------------------------GGGGCCGGGAGACCAGAGAGG   [311]
+H.HALO                      -------------------------------------------------------------------------------GGGACCGGAAGACCTGAGGGG   [310]
+H._cutirubrum               -------------------------------------------------------------------------------GGGACCGGAAGACCTGAGGGG   [310]
+AN._nidulans                -------------------------------------------------------------------------------GAGTATGGTAGGGGTAGCGGG   [333]
+HE.CHL                      -------------------------------------------------------------------------------GAGAGATGGAGAGGATAGCGG   [317]
+Bc._subtilis                -------------------------------------------------------------------------------GAGTGCAGAAGAGGAGAGTGG   [356]
+MY.CAP                      -------------------------------------------------------------------------------GAATGCAAGAGAGGTAAGCGG   [352]
+E._coli                     -------------------------------------------------------------------------------GAGTCTCGTAGAGGGGGGTAG   [354]
+MYX.XA                      -------------------------------------------------------------------------------GAGTGCCGGA?AGGGTGGCGG   [353]
+P.TEST                      -------------------------------------------------------------------------------GAGTGCGGCAGAGGGGGATGG   [355]
+AG.TUM                      -------------------------------------------------------------------------------GAGTATGGAAGAGGTAAGTGG   [331]
+DV.DES                      -------------------------------------------------------------------------------GAATCCGGGAGAGGGTGGCGG   [355]
+BA.FRG                      -------------------------------------------------------------------------------GAGTACAGTAGAGGTGGGCGG   [352]
+'Zea_mays_(chloroplast)'    -------------------------------------------------------------------------------GAGTACGGTAGGGGCAGAGGG   [331]
+T.AMIT                      -------------------------------------------------------------------------------GAGTGAGACAGAGGAGAGTGG   [339]
+
+[                                    910       920       930       940       950       960       970       980       990       1000]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                TATTGCGCCGCTA--GAGGTGAAATTCTTGGACCGGCGCAAGACGGACCAGAGCGAAAGCATTTGCCAAGAATGTTTTCATTAATCAAGAACGAAAGTCG   [595]
+Rattus_norvegicus           TATTGCGCCGCTA--GAGGTGAAATTCTTGGACCGGCGCAAGACGGACCAGAGCGAAAGCATTTGCCAAGAATGTTTTCATTAATCAAGAACGAAAGTCG   [596]
+Xenopus_laevis              TATTGTGCCGCTA--GAGGTGAAATTCTTGGACCGGCGCAAGACGAACCAAAGCGAAAGCATTTGCCAAGAATGTTTTCATTAATCAAGAACGAAAGTCG   [591]
+A._salina                   TACTGCGACGCTA--GAGGTGAAATTCTTGGACCGTCGCAAGACGAACAACTGCGAACAAGTTTGCCAAGAATGTTTTCATTAATCAAGAACGAAAGTTA   [592]
+S._cerevisiae               TATT-CAATTGTC--GAGGTGAAATTCTTGGATTTATTGAAGACTAACTACTGCGAAAGCGTTTGCCAAGGACGTTTTCGTTAATCAAGAACGAAAGTTG   [586]
+P.MICA                      TATT-TAACTGGTCAGAGGTGAAATTCTCGGATTTGTTAAAGACGGACTACTGCGAAAGCATTTGCCAAGGATGTTTTCATTGATCAAGAACGAAAGTTA   [583]
+D.DISC                      TATTGGTGGGCGA--GAGGTGAAATTCGTTGACCCTATCAAGATGAACTTCTGCGAAAGCATTCACCAAATACTTCCCCATTAATCAAGAACGAAAGTTT   [586]
+T.BRUC                      TACT-GGTGCGTCA-GAGGTGAAATTCTTAGACCGCACCAAGACGAACTACAGCGAAGGCATTCTTCAAGGATACCTTCCTCAATCAAGAACCAAAGTGT   [771]
+E.GRAC                      TACG-GGAGCGCCA-GAGGTGAAATTCTTAGATCGCTGCCAGATCCACTGCAGCGAAGGCGTTCTGCAAGTGCACGTCCGTCGATCAAGAATGAGAGTTC   [886]
+Zea_mays                    TATTTCATAGTCA--GAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTG   [588]
+D._mobilis                  TACTCCCGGGGTA--GGGGCGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGAAGGCGCTCGGCTGGAACACGCCCGACGGTGAGGGGCGAAAGCCG   [404]
+S._solfataricus             TACTCCCGGAGTA--GGGGCGAAATCCTTAGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTAGAACGCGCCCGACGGTGAGAGGCGAAAGCCG   [402]
+T._tenax                    TACTCCGGGGGTA--GGGGCGAAATCCTATAATCCCCGGAGGACCACCAGTGGCGAAAGCGGGCGGCCAGAACGCGCCCGACGGTGAGGGGCGAAAGCCG   [405]
+M._vannielii                TACTCCAGGGGTA--GCGGTGAAATGTGTTGATCCTTGGAGGACCACCTATGGCGAAGGCACTTGTCTGGAACGGGTCCGACGGTGAGGGACGAAAGCCA   [405]
+MB._formicicum              TACTCCCGGGGTA--GGGGTGAAATCCTATAATCCCGGGAGGACCACCTGTGGCGAAGGCGGCTAACTGGAACGGGCCTGACGGTGAGTAACGAAAGCCA   [407]
+MS._hungatei                TACTGCCGGGGTA--GGAGTGAAATCCTGTAATCCCGGTGGGACCACCTATGGCGAAGGCATCTCACCAGAACGGGTCCGACAGTGAGGGACGAAAGCTG   [380]
+H._volacanii                TACGTCCGGGGTA--GGAGTGAAATCCCGTAATCCTGGACGGACCACCGATGGCGAAAGCACCTCGAGAAGACGGATCCGACGGTGAGGGACGAAAGCTA   [408]
+HC._morrhuae                TACGTCCGGGGTA--GGAGTGAAATCCTGTAATCCTGGACGGACCACCGGTAGCGAAAGCGTCTCTGGAGAACGGACCCGACGGTGAGGGACGAAAGCTT   [409]
+H.HALO                      TACGTCTGGGGTA--GGAGTGAAATCCTGTAATCCTGGACGGACCGCCGGTGGCGAAAGCGCCTCAGGAGAACGGATCCGACAGTGAGGGACGAAAGCTA   [408]
+H._cutirubrum               TACGTCTGGGGTA--GGAGTGAAATCCTGTAATCCTGGACGGACCGCCGGTGGCGAAAGCGCCTCAGGAGAACGGATCCGACAGTGAGGGACGAAAGCTA   [408]
+AN._nidulans                AATTCCAGGTGTA--GCGGTGAAATGCGTAGATATCTGGAAGAACACCAGCGGCGAAAGCGCGCTACTGGGCCATAACTGACGCTCATGGACGAAAGCTA   [431]
+HE.CHL                      AATTCCCGGTGTA--GCGGTGAAATGCGTAGATATCGGGAGGAACACCCGTGGCGAAGGCGGCTATCTGGACATTATCTGACGCTGAGGCGCGAAAGCGT   [415]
+Bc._subtilis                AATTCCACGTGTA--GCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGT   [454]
+MY.CAP                      AATTCCATGTGTA--GCGGTGAAATGCGTAGATATATGGAAGAACACCTGTGGCGAAAGCGGCTTACTGGCTTGTTATTGACGCTGAGGCACGAAAGCGT   [450]
+E._coli                     AATTCCAGGTGTA--GCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGT   [452]
+MYX.XA                      AATTCCCCAAGTA--GAGGTGAAATTCGTAGATATGGGGAGGAACACCGGTGGCGAAGGCGGCCACCTGGACGGTAACTGACGCTGAGACGCGAAAGCGT   [451]
+P.TEST                      AATTCCGCGTGTA--GCAGTGAAATGCGTAGATATGCGGAGGAACACCGATGGCGAAGGCAATCCCCTGGGCCTGCACTGACGCTCATGCACGAAAGCGT   [453]
+AG.TUM                      AATTCCGAGTGTA--GAGGTGAAATTCGTAGATATTCGGAGGAACGCCAGTGGCGAAGGCGGCTTACTGGTCCATTACTGACGCTGAGGTGCGAAAGCGT   [429]
+DV.DES                      AATTCCAGGTGTA--GGAGTGAAATCCGTAGATATCTGGAGGAACATCAGTGGCGAAGGCGGCCACCTGGACCGGTATTGACGCTGAGGTGCGAAAGCGT   [453]
+BA.FRG                      AATTCGTGGTGTA--GCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGT   [450]
+'Zea_mays_(chloroplast)'    AATTTCCGGTGGA--GCGGTGAAATGCATTGAGATCGGAAAGAACACCAACGGCGAAAGCACTCTGCTGGGCCGACACTGACACTGAGAGACGAAAGCTA   [429]
+T.AMIT                      AATTTCGTGTGTA--GGGGTGAAATCCGTAGATCTACGAAGGAACGCCAAAAGCGAAGGCAGCTCTCTGGGTCCCTACCGACGCTGGGGTGCGAAAGCAT   [437]
+
+[                                    1010      1020      1030      1040      1050      1060      1070      1080      1090      1100]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                GAGGTTCGAAGACGATCAGATACCGTCGTAGTTCCG--ACCATAAACGATGCCGACCGGCG-------------ATGCGGCG------------------   [662]
+Rattus_norvegicus           GAGGTTCGAAGACGATCAGATACCGTCGTAGTTCCG--ACCATAAACGATGCCGACTGGCG-------------ATGCGGCG------------------   [663]
+Xenopus_laevis              GAGGTTCGAAGACGATCAGATACCGTCGTAGTTCCG--ACCATAAACGATGCCGACTAGCG-------------ATCCGGCG------------------   [658]
+A._salina                   GAGGTTCGAAGGCGATCAGATACCGCCCTAGTTCTA--ACCATAAACGATGCCAACCAGCG-------------ATCCGCGG------------------   [659]
+S._cerevisiae               AGGGATCGAAGACGATCTGATACCGTCGTAGTCTTA--ACCATAAACTATGCCGACTAG---------------ATCGGGTG------------------   [651]
+P.MICA                      GGGGATCGAAGACGATCAGATACCGTCCTAGTCTTA--ACCATAAACCATGCCGACTAG-------------AGATTGGAGG------------------   [650]
+D.DISC                      GGGGATCGAAGACGATCAGATACCGTCGTAGTCCAA--ACTATAAACTATGTCGACCAGGG-------------ATCGGTTA------------------   [653]
+T.BRUC                      GGGGATCAAAGATGATTAGAGACCATTGTAGTCCAC--ACTGCAAACCATGACACCCATGAATTGGGGAACATCATTGGGTGCCCGTGTGGCGGCCTTTT   [869]
+E.GRAC                      GGGGAGCAAAGATGATCAGACACCGTCGTAGTCCGGCCACTGTAAACGATGCCGGCCAGGCCTTGGCAGA-GCAA--GAATC------------------   [965]
+Zea_mays                    GGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCA--ACCATAAACGATGCCGACCAGGG-------------AT-CAGCG------------------   [654]
+D._mobilis                  GGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCG--GCTGTAAACGATGCGGGCTAGGT-------------GTTGGGTG------------------   [471]
+S._solfataricus             GGGCAGCAAACGGGATTAGATACCCCGGTAGTCCCG--GCTGTAAACGATGCGGGCTAGGT-------------GTCGAGTA------------------   [469]
+T._tenax                    GGGGAGCAAAGGGGATTAGATACCCCTGTAGTCCCG--GCCGTAAACGATGCGGGCTAGCT-------------GTCGGCCG------------------   [472]
+M._vannielii                GGGGCGCGAACCGGATTAGATACCCGGGTAGTCCTG--GCCGTAAACTCTGCGAACTAGGT-------------GTCACCTG------------------   [472]
+MB._formicicum              GGGGCGCGAACCGGATTAGATACCCGGGTAGTCCTG--GCCGTAAACGATGTGGACTTGGT-------------GTTGGGAT------------------   [474]
+MS._hungatei                GGGGAGCAAACCGGATTAGATACCCGGGTAGTCCCA--GCTGTAAACGATGCGCGTTAGGT-------------GTGTCAGT------------------   [447]
+H._volacanii                GGGTCTCGAACCGGATTAGATACCCGGGTAGTCCTA--GCTGTAAACGATGCTCGCTAGGT-------------GTGACACA------------------   [475]
+HC._morrhuae                GGGTCTCGAACCGGATTAGATACCCGGGTAGTCCAA--GCTGTAAACGATGCTCGCTAGGT-------------GTGGCGTT------------------   [476]
+H.HALO                      GGGTCTCGAACCGGATTAGATACCCGGGTAGTCCTA--GCTGTAAACGATGTCCGCTAGGT-------------GTGGCGCA------------------   [475]
+H._cutirubrum               GGGTCTCGAACCGGATTAGATACCCGGGTAGTCCTA--GCTGTAAACGATGTCCGCTAGGT-------------GTGGCGCA------------------   [475]
+AN._nidulans                GGGGAGCGAAAGGGATTAGATACCCCTGTAGTCCTA--GCCGTAAACGATGAACACTAGGT-------------GTTGCG-T------------------   [497]
+HE.CHL                      GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCCGTAAACGATGAGTGCTAGGT-------------GTTGGG-G------------------   [481]
+Bc._subtilis                GGGGAGCGAACAGGATTAGATACCCTGGTAGTCCAC--GCCGTAAACGATGAGTGCTAAGT-------------GTTAGG-G------------------   [520]
+MY.CAP                      GGGGAGCAAATAGGATTAGATACCCTAGTAGTCCAC--GCCGTAAACGATGAGTACTAAGT-------------GTTGGG-G------------------   [516]
+E._coli                     GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCCGTAAACGATGTCGACTTGGA-------------GGTTGT-G------------------   [518]
+MYX.XA                      GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCCGTAAACGATGAGAACTAGGT-------------GTCGTG-G------------------   [517]
+P.TEST                      GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCCCTAAACGATGTCAACTGGTT-------------GTTGGG-T------------------   [519]
+AG.TUM                      GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCCGTAAACGATGAATGTTAGCC-------------GTCGGG-C------------------   [495]
+DV.DES                      GGGGAGCAAACAGGATTAGATACCCTGGTAGTCCAC--GCTGTAAACGATGGATGCTAGAT-------------GTCGGG-G------------------   [519]
+BA.FRG                      GGGTATCAAACAGGATTAGATACCCTGGTAGTCCAC--ACAGTAAACGATGAATACTCGCT-------------GTTTGC-G------------------   [516]
+'Zea_mays_(chloroplast)'    GGGGAGCAAATGGGATTAGAGACCCCAGTAGTCCTA--GCCGTAAACGATGGATACTAGGT-------------GCTGTG-C------------------   [495]
+T.AMIT                      GGGGAGCGAACAGGATTAGATACCCTGGTAGTCCAT--GCCGTAAACGATG---------A-------------GTGTTC-G------------------   [494]
+
+[                                    1110      1120      1130      1140      1150      1160      1170      1180      1190      1200]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                -----------------------------------------------------------------------------------------------GCGTT   [667]
+Rattus_norvegicus           -----------------------------------------------------------------------------------------------GCGTT   [668]
+Xenopus_laevis              -----------------------------------------------------------------------------------------------GCGTT   [663]
+A._salina                   -----------------------------------------------------------------------------------------------ACGTT   [664]
+S._cerevisiae               -----------------------------------------------------------------------------------------------GTGTT   [656]
+P.MICA                      -----------------------------------------------------------------------------------------------TCGTT   [655]
+D.DISC                      -----------------------------------------------------------------------------------------------AAATT   [658]
+T.BRUC                      GTGCCGACCCTCGGCCCCAATTTATTTATCAATTTACGTGCCTATTCTATCACCCCCGGTTCCCTCTTTTGAGGTTCTTCCGGGGTTTTTTACGGGAATA   [969]
+E.GRAC                      -----------------------------------------------------------------------------------------------CTAGA   [970]
+Zea_mays                    -----------------------------------------------------------------------------------------------GTGTT   [659]
+D._mobilis                  -----------------------------------------------------------------------------------------------GGCTT   [476]
+S._solfataricus             -----------------------------------------------------------------------------------------------GGCTT   [474]
+T._tenax                    -----------------------------------------------------------------------------------------------GGCTT   [477]
+M._vannielii                -----------------------------------------------------------------------------------------------GGCCT   [477]
+MB._formicicum              -----------------------------------------------------------------------------------------------GGCTC   [479]
+MS._hungatei                -----------------------------------------------------------------------------------------------GACCA   [452]
+H._volacanii                -----------------------------------------------------------------------------------------------GGCTA   [480]
+HC._morrhuae                -----------------------------------------------------------------------------------------------GGCTA   [481]
+H.HALO                      -----------------------------------------------------------------------------------------------GGCTA   [480]
+H._cutirubrum               -----------------------------------------------------------------------------------------------GGCTA   [480]
+AN._nidulans                -----------------------------------------------------------------------------------------------GAATC   [502]
+HE.CHL                      -----------------------------------------------------------------------------------------------GTATC   [486]
+Bc._subtilis                -----------------------------------------------------------------------------------------------GGTTT   [525]
+MY.CAP                      -----------------------------------------------------------------------------------------------TAACT   [521]
+E._coli                     -----------------------------------------------------------------------------------------------CCCTT   [523]
+MYX.XA                      -----------------------------------------------------------------------------------------------GAGTT   [522]
+P.TEST                      -----------------------------------------------------------------------------------------------CTTAA   [524]
+AG.TUM                      -----------------------------------------------------------------------------------------------AGTAT   [500]
+DV.DES                      -----------------------------------------------------------------------------------------------AGTAT   [524]
+BA.FRG                      -----------------------------------------------------------------------------------------------ATATA   [521]
+'Zea_mays_(chloroplast)'    -----------------------------------------------------------------------------------------------GACTC   [500]
+T.AMIT                      -----------------------------------------------------------------------------------------------CCCTT   [499]
+
+[                                    1210      1220      1230      1240      1250      1260      1270      1280      1290      1300]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                ATTCCCATG--ACCC-GCCGGGCAGCTT----CCGGGAAA--CCA--AAGTCTTTGGGTTCCGGGGGGAGTATGGTTGCAAAGCTGAAACTTAAAGGAAT   [756]
+Rattus_norvegicus           ATTCCCATG--ACCC-GCCGGGCAGCTT----CCGGGAAA--CCA--AAGTCTTTGGGTTCCGGGGGGAGTATGGTTGCAAAGCTGAAACTTAAAGGAAT   [757]
+Xenopus_laevis              ATTCCCATG--ACCC-GCCGAGCAGCTT----CCGGGAAA--CCA--AAGTCTTTGGGTTCCGGGGGGAGTATGGTTGCAAAGCTGAAACTTAAAGGAAT   [752]
+A._salina                   ACTTGAATG--ACTC-CGCGGGCAGCTT----CCGGGAAA--CCA--AAGTGTTTGGGTTCCGGGGGAAGTATGGTTGCAAAGCTGAAACTTAAAGGAAT   [753]
+S._cerevisiae               TTTTTAATG--ACCC-ACTCGGTACCTT----ACGAGAAA--TCA--AAGTCTTTGGGTTCTGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAAT   [745]
+P.MICA                      ATCTATACG--ACTC-CTTCAGCACCTT----ATGAGAAA--TCA--AAGTCTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAAT   [744]
+D.DISC                      TTTTCAA-A--ATTT-AATCGGCACCTT----GTGAGAAA--TCAT-GAGTGTTTAGATTCCGGGGGGAGTATGGTCGCAAGTCTGAAACTTAAAGGAAT   [747]
+T.BRUC                      TCCTCAGCA--CGTT-TCTTACTTCTTC----ACGCGAAAGCTTG--GAGGTTACAGTCTCAGGGGGGAGTACGTTCGCAAGAGTGAAACTTAAAGAAAT   [1060]
+E.GRAC                      CTCTGTCAG--GGCC-ACTCCTCCCACA----ACGAGAAA-TCCA--CAGCCTGTGGGTTCAGGGGGGAGTACTGTCGCAAGGCTGAAACTTAAAGGAAT   [1060]
+Zea_mays                    ACTAATAGG--ACCCCGCTGGCCACCTT----ATGAGAAA--TCA--AAGTCTTTGGGTTCCGGGGGGAGTATGGTCGCAGGGCTAAAACTTAAAGGAAT   [749]
+D._mobilis                  ------AGA--GCCC-ACCCAGTGCCGC----A-GGGAAG--CCGTTAAGCCCGCCGCCT----GGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAAT   [556]
+S._solfataricus             ------AGA--GCCT-ACTCGGTGCCGC----A-GGGAAG--CCGTTAAGCCCGCCGCCT----GGGGAGTACGGTCGCAAGACTGAAACTTAAAGGAAT   [554]
+T._tenax                    ------AGG--GCCC-GGCCGGTGGCGT----A-GGGAAA--CCGTTAAGCCCGCCGCCT----GGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAAT   [557]
+M._vannielii                ------CGA--GCCC-AGGTGGTGCCGA----A-GGGAAG--CCGTTAAGTTCGCCGCCT----GGGGAGTACGGTCGCAAGACTGAAACTTAAAGGAAT   [557]
+MB._formicicum              ------CGA--GCTG-CCCCAGTGCCGA----A-GGGAAG--CTGTTAAGTCCACCGCCT----GGGAAGTACGGTCGCAAGACTGAAACTTAAAGGAAT   [559]
+MS._hungatei                ------CGT--GTCA-CTGAGGTGCCGA----A-GGGAAA--CCGTGAAACGCGCCGCCT----GGGGAGTACGGTCGCAAGGCTGAAACTTAAAGGAAT   [532]
+H._volacanii                ------CGA--GCCT-GTGTTGTGCCGT----A-GGGAAG--CCGAGAAGCGAGCCGCCT----GGGAAGTACGTCCGCAAGGATGAAACTTAAAGGAAT   [560]
+HC._morrhuae                ------CGA--GCCA-GCGCTGTGCCGT----A-GGGAAG--CCGAGAAGCGAGCCGCCT----GGGAAGTACGTCCGCAAGGATGAAACTTAAAGGAAT   [561]
+H.HALO                      ------CGA--GCCT-GCGCTGTGCCGT----A-GGGAAG--CCGAGAAGCGGACCGCCT----GGGAAGTACGTCTGCAAGGATGAAACTTAAAGGAAT   [560]
+H._cutirubrum               ------CGA--GCCT-GCGCTGTGCCGT----A-GGGAAG--CCGAGAAGCGGACCGCCT----GGGAAGTACGTCTGCAAGGATGAAACTTAAAGGAAT   [560]
+AN._nidulans                ------GAC--CCGC---GCAGTGCCGT----A-GCCAAC--GCGTTAAGTGTTCCGCCT----GGGGAGTACGCACGCAAGTTGGAAACTCAAAGGAAT   [580]
+HE.CHL                      ------GAC--CCCC---CCGGTGCCGC----A-GTTCAC--GCAATAAGCACTCCGCCT----GGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGAAT   [564]
+Bc._subtilis                ------CCG--CCCC---TTAGTGCTGC----A-GCTAAC--GCATTAAGCACTCCGCCT----GGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAAT   [603]
+MY.CAP                      ------C-------------AGCGCTGC----A-GCTAAC--GCATTAAGTACTCCGCCT----GAGTAGTATGCTCGCAAGAGTGAAACTCAAAGGAAT   [591]
+E._coli                     ------GAG--GCG----TGGCTTCCGG----A-GCTAAC--GCGTTAAGTCGACCGCCT----GGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAAT   [600]
+MYX.XA                      ------GAC--CCCC---GCGGTGCCGA----A-GCTAAC--GCATTAAGTTCTCCGCCT----GGGAAGTACGGTCGCAAGACTAAAACTCAAAGGAAT   [600]
+P.TEST                      ------CTG--ACTC-----AGTAACGA----A-GCTAAC--GCGTGAAGTTGACCGCCT----GGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGAAT   [600]
+AG.TUM                      --------A--CTGT---TCGGTGGCGC----A-GCTAAC--GCATTAAACATTCCGCCT----GGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAAT   [576]
+DV.DES                      ------TC------T---TCGGTGTCGT----A-GTTAAC--GCGTTAAGCATCCCGCCT----GGGGAGTACGGTCGCAAGGCTGAAACTCAAAGAAAT   [598]
+BA.FRG                      ------CAG---------TAAGCGGCCA----A-GCGAAA--GCATTAAGTATTCCACCT----GGGGAGTACGCCGGCAACGGTGAAACTCAAAGGAAT   [595]
+'Zea_mays_(chloroplast)'    ------GAC--CCGT---GCAGTGCTGT----A-GCTAAC--GCGTTAAGTATCCCGCCT----GGGGAGTACGTTCGCAAGAATGAAACTCAAAGGAAT   [578]
+T.AMIT                      ------GGTCTACG----CGGATCAGGGGCCCA-GCTAAC--GCGTGAAACACTCCGCCT----GGGGAGTACGGTCGCAAGACCGAAACTCAAAGGAAT   [582]
+
+[                                    1310      1320      1330      1340      1350      1360      1370      1380      1390      1400]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                TGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGAAACCTCACCCGGCCCGGACA----------------CGG-A-C   [838]
+Rattus_norvegicus           TGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGAAACCTCACCCGGCCCGGACA----------------CGG-A-C   [839]
+Xenopus_laevis              TGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGAAACCTCACCCGGCCCGGACA----------------CGG-A-A   [834]
+A._salina                   TGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGAAACCTCACCAGGCCCGGACA----------------CTG-G-A   [835]
+S._cerevisiae               TGACGGAAGGGCACCACTAGGAGTGGAGCCTGCGGCT-AATTTGACTCAACACGGGGAAACTCACCAGGTCCAGACA----------------CAA-T-A   [826]
+P.MICA                      TGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACA----------------TAG-T-A   [826]
+D.DISC                      TGACGGAAGGGCACACAATGGAGTGGAGCCTGCGGCTTAATTTGACTCAACTCGGGAAAACTTACCAAGCTAAGATA----------------TAG-T-A   [829]
+T.BRUC                      TGACGGAATGGCACCACAAGACGTGGAGCGTGCGGTTTAATTTGACTCAACACGGGGAACTTTACCAGATCCGGACA----------------GGG-T-G   [1142]
+E.GRAC                      TGACGGAATGGCACCACAAGGCGTGGAGTATGCGGCTTAATTTGACTCAACGCGGGGAATGTTACCAGGTCAGGACG----------------CAA-C-T   [1142]
+Zea_mays                    TGACGGAAGGGCACCACCAGGCGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACA----------------TAG-C-A   [831]
+D._mobilis                  TGGCGGGGGAGCACCACAAGGGGTGGAGCCTGCGGTTCAATTGGAGTCAACGCCGGGAATCTCACCGGGGGA-GACA----------------GCA-G-G   [637]
+S._solfataricus             TGGCGGGGGAGCACCACAAGGGGTGGAACCTGCGGCTCAATTGGAGTCAACGCCTGGAATCTTACCGGGGGA-GACC----------------GCA-G-T   [635]
+T._tenax                    TGGCGGGGGGGCACCACAAGGGGTGAAGCTTGCGGCTTAATTGGAGTCAACGCCGGAAACCTTACCCGGGGC-GACA----------------GCA-G-G   [638]
+M._vannielii                TGGCGGGGGAGCACCACAACGGGTGGAGCCTGCGGTTTAATTGGATTCAACGCCGGGCATCTCACCAGGAGC-GACA----------------GCA-T-G   [638]
+MB._formicicum              TGGCGGGGGAGCACCACAACGCGTGGAGCCTGCGGTTTAATTGGATTCAACGCCGGACATCTCACCAGGGGC-GACA----------------GCA-G-A   [640]
+MS._hungatei                TGGCGGGGGAGCACCACAACGGGTGGAGCCTGCGGTTTAATCGGACTCAACGCCGGAAATCTCACCGGATAA-GACA----------------GCT-G-A   [613]
+H._volacanii                TGGCGGGGGAGCACTACAACCGGAGGAGCCTGCGGTTTAATTGGACTCAACGCCGGACATCTCACCAGCTCC-GACT----------------ACA-GTG   [642]
+HC._morrhuae                TGGCGGGGGAGCACTACAACCGGAGGAGCCTGCGGTTTAATTGGACTCAACGCCGGACATCTCTCCGGCACC-GACA----------------GTGTGCA   [644]
+H.HALO                      TGGCGGGGGAGCACTACAACCGGAGGAGCCTGCGGTTTAATTGGACTCAACGCCGGACATCTCACCAGCCCC-GACA----------------GTA-GTA   [642]
+H._cutirubrum               TGGCGGGGGAGCACTACAACCGGAGGAGCCTGCGGTTTAATTGGACTCAACGCCGGACATCTCACCAGCCCC-GACA----------------GTA-GTA   [642]
+AN._nidulans                TGACGGGGGCCCGC-ACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCAGGGTTTGACAT-----CC----CCCGAAT-C-T   [668]
+HE.CHL                      TGACGGGGGCCCGC-ACAAGCGTTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCAAGGCTTGACAT-----CC----TCTGAAT-C-C   [652]
+Bc._subtilis                TGACGGGGGCCCGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACAT-----CC----TCTGACA-A-T   [691]
+MY.CAP                      TGACGGGGACCCGC-ACAAGTGGTGGAGCATGTGGTTTAATTCGAAGCAACACGAAGAACCTTACCAGGGCTTGACAT-----CC----AGTGTAA-A-G   [679]
+E._coli                     TGACGGGGGCCCGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACAT-----CC----ACGGAAG-T-T   [688]
+MYX.XA                      TGACGGGGGCCCGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGCAGAACCTTACCTGGTCTTGACAT-----CC----TCAGAAT-C-C   [688]
+P.TEST                      TGACGGGGACCCGC-ACAAGCGGTGGATGATGTGGTTTAATTCGATGCAACGCGAAAAACCTTACCCACCTTTGACAT-----GG----CAGGAAC-T-T   [688]
+AG.TUM                      TGACGGGGGCCCGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGCAGAACCTTACCAGCTCTTGACAT-----TCGGGGTTTGGGC-A-G   [668]
+DV.DES                      TGACGGGGGCCCGC-ACAAGCGGTGGAGTATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTAGGTTTGACAT-----CC---ACGGAACC-C-T   [687]
+BA.FRG                      TGACGGGGGCCCGC-ACAAGCGGAGGAACATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCCGGGCTTAAATT-----GC----AGTGGAA-T-G   [683]
+'Zea_mays_(chloroplast)'    TGACGGGGGCCCGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAAGGCGAAGAACCTTACCAGGGCTTGACAT-----GC----CGCGAAT-C-C   [666]
+T.AMIT                      TGACGGGGGCCTGC-ACAAGCGGTGGAGCATGTGGTTTAATTCGATACAACGCGCAAAACCTTACCAGCCCTTGACATATGAACA----ACAAAAC-C-T   [675]
+
+[                                    1410      1420      1430      1440      1450      1460      1470      1480      1490      1500]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                AG-GATTGACA--G-ATTGA--TAGCTCTTTCTC-GATTC--CGTGGGTGGTGGTGCATGGCCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTTAATTC   [928]
+Rattus_norvegicus           AG-GATTGACA--G-GTTGA--TAGCTCTTTCTC-GATTC--CGTGGGTGGTGGTGCATGGCCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTTAATTC   [929]
+Xenopus_laevis              AG-GATTGACA--G-ATTGA--TAGCTCTTTCTC-GATTC--TGTGGGTGGTGGTGCATGGCCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTTAATTC   [924]
+A._salina                   AG-GATTGACA--G-ATTGA--GAGCTCTTTCTT-GATTC--AGTGGGTGGTGGTGCATGGCCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTTAATTC   [925]
+S._cerevisiae               AG-GATTGACA--G-ATTGA--GAGCTCTTTCTT-GATTT--TGTGGGTGGTGGTGCATGGCCGTTTCTCAGTTGGTGGAGTGATTTGTCTGCTTAATTG   [917]
+P.MICA                      AG-GATTGACA--G-ATTGA--TAGCTCTTTCTT-GATTC--TATGGGTGGTGGTGCATGGCCGTT-CTTAGTTGGTGGAGTGATTTGTCTGGTTAATTC   [916]
+D.DISC                      AG-GATTGACA--G-ACTAA--AAGATCTTTCAT-GATTC--TATAAGTGGTGGTGCATGGTCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTCAATTC   [919]
+T.BRUC                      AG-GATTGACA--G-ATGGA--GTGTTCTTTCTC-GATCC--CCTGAATGGTGGTGCATGGCCGCTTTT-GGTCGGTGGAGTGATTTGTTTGGTTGATTC   [1232]
+E.GRAC                      GG-GATTGACA--G-ATTGA--GAGCTCTTTCTT-GATCT--TGTGGACGGTGGTGCATGGCCGCTCCTGA-TTGGTGGAGTGATTTGTCTGGTTGATTC   [1232]
+Zea_mays                    AG-GATTGACA--G-ACTGA--GAGCTCTTTCTT-GATTC--TATGGGTGGTGGTGCATGGTCGTT-CTTAGTTGGTGGAGCGATTTGTCTGGTTAATTC   [921]
+D._mobilis                  AT-GACGGCCA--G-GTTAA--AG-C-CTTGCCT-GACGC--GCTGAGAGGAGGTGCATGGCCGTC-GCCAGCTCGTGCTGTGAAGTGTCCGGTTAAGTC   [725]
+S._solfataricus             AT-GACGGCCA--G-GCTAA--CGAC-CTTGCCT-GACTC--GCGGAGAGGAGGTGCATGGCCGTC-GCCAGCTCGTGTTGTGAAATGTCCGGTTAAGTC   [724]
+T._tenax                    AT-GAAGGCCA--G-GCTAA--CGAC-CTTGCCG-GACGA--GCTGAGAGGAGGTGCATGGCCGTC-GTCAGCTCGTGCCGTGAGGTGTCCGGTTAAGTC   [727]
+M._vannielii                AT-GACGGCCA--G-GTTGA--CGAC-CTTGCCT-GAAGC--GCTGAGAGGTGGTGCATGGCCATC-GTCAGCTCGTACCGCGAGGCGTCCTGTTAAGTC   [727]
+MB._formicicum              AT-GATAGCCA--G-GTTGA--TGAC-CTTGCTT-GACAA--GCTGAGAGGAGGTGCATGGCCGCC-GTCAGCTCGTACCGTGAGGCGTCCTGTTAAGTC   [729]
+MS._hungatei                AT-GATAGTCG--G-GATGA--AGAC-TCTACTT-GACTA--GCTGAGAGGAGGTGCATGGCCGTC-GTCAGTTCGTACTGTGAAGCATCCTGTTTAGTC   [702]
+H._volacanii                AT-GACGATCA--G-GTTGA--TGAC-CTTATCA-CGACGCTGTAGAGAGGAGGTGCATGGCCGCC-GTCAGCTCGTACCGTGAGGCGTCCTGTTAAGTC   [733]
+HC._morrhuae                GT-GACAGTCA--G-TCCGA--TGGG-CTTACTT-GA--GCCACTGAGAGGAGGTGCATGGCCGCC-GTCAGCTCGTACCGTGAGGCGTCCTGTTAAGTC   [733]
+H.HALO                      AT-GACGGTCA--G-GTTGA--TGAC-CTTACCC-G-AGGCTACTGAGAGGAGGTGCATGGCCGCC-GTCAGCTCGTACCGTGAGGCGTCCTGTTAAGTC   [732]
+H._cutirubrum               AT-GACGGTCA--G-GTTGA--TGAC-CTTACCC-GGAGGCTACTGAGAGGAGGTGCATGGCCGCC-GTCAGCTCGTACCGTGAGGCGTCCTGTTAAGTC   [733]
+AN._nidulans                CT-TGGAAACG--A-GAGAG--TGCC-TTCGGGA-GCGGG--G-AGACAGGTGGTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [756]
+HE.CHL                      GA-TAGAGATA--GCGGAGT--GCCC-TTCGGGGAGCAGA--G-AGACAGGTGGTGCATGGTTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [742]
+Bc._subtilis                CC-TAGAGATA--G-GACGT--CCCC-TTCGGGG-GCAGA--G-TGACAGGTGGTGCATGGTTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [779]
+MY.CAP                      CTATAGAGATA--T-AGTAG--AGGT-T-----A-ACATT--G-AGACAGGTGGTGCATGGTTGTC-GTCAGTTCGTGCCGTGAGGTGTTGGGTTAAGTC   [763]
+E._coli                     TT-CAGAGATG--A-GAATG--TGCC-TTCGGGA-ACCGT--G-AGACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTC   [776]
+MYX.XA                      TT-CAGAGATGAGG-GAGTG--CCCG-CAAGGGA-ACTGA--G-AGACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [778]
+P.TEST                      AC-CAGAGATG--G-TTTGGTGCTCG-AAAGAGA-ACCTG--C-ACACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [778]
+AG.TUM                      TG-GAGACATT--G-TCCTT--CAGT-TAGGCTG-GCCCC--A-GAACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [756]
+DV.DES                      CC-CGAAAAGG--A-GGGGT--GCCC-TTCGGGGAGCCGT--G-AGACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTC   [776]
+BA.FRG                      ATGTGGAAACA--T-GTCAG--TGAG-CAATCAC--CGCT--G-TGA-AGGTGCTGCATGGTTGTC-GTCAGCTCGTGCCGTGAGGTGTCGGCTTAAGTG   [770]
+'Zea_mays_(chloroplast)'    TC-TTGAAAGA--G-AGGGG--TGCC-CTCGGGA-ACGCG--G-ACACAGGTGGTGCATGGCTGTC-GTCAGCTCGTGCCGTAAGGTGTTGGGTTAAGTC   [754]
+T.AMIT                      GT-CCTTAACA--G-GATGG--TACT-------G-ACTTT--C-ATACAGGTGCTGCATGGCTGTC-GTCAGCTCGTGTCGTGAGATGTTTGGTCAAGTC   [757]
+
+[                                    1510      1520      1530      1540      1550      1560      1570      1580      1590      1600]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                CGATAACGAACGAGACTCTGGCATGCTAA-CTAGTT-AC--G--------------------------------------C-GACC----------CCCG   [975]
+Rattus_norvegicus           CGATAACGAACGAGACTCTGGCATGCTAA-CTAGTT-AC--G--------------------------------------C-GACC----------CC-G   [975]
+Xenopus_laevis              CGATAACGAACGAGACTCCTCCATGCTAA-CTAGTT-AC--G--------------------------------------C-GACC----------CC-C   [970]
+A._salina                   CGATAACGAACGAGACTCTAGCCTGCTAA-ATAG-A-CG--ATGGATC--------------------------------C-TAGT----------GG-T   [976]
+S._cerevisiae               CGATAACGAACGAGACCTTAACCTACTAA-ATAGTG-GT--G--------------------------------------C-TAGC----------AT-T   [963]
+P.MICA                      CGTTAACGAACGAGACCTTAACCTGCTAA-ATAGTT-AC--A--------------------------------------CGTAAC----------TC-C   [963]
+D.DISC                      CGATAACGGACGAGACCTCGACCTGCTAA-CTAGTA-GT--ATTTATTAGTCGATATAGACGATAGCTTTTCTGGGGTTTG-GAAT----------GA-T   [1003]
+T.BRUC                      CGTCAACGGACGAGATCC-AAGCTGCCCA-GTAG---GT--G--------------------------------------C-CGGG----------AT-T   [1275]
+E.GRAC                      CGATAACGAGTGAGACATCTGCCTCCCACTAGCCTG-AG--G--------------------------------------C-TCGC----------AT-T   [1279]
+Zea_mays                    CGTTAACGAACGAGACCTCAGCCTGCTAA-CTAGCT-AT--G--------------------------------------C-GGAG----------CC-A   [967]
+D._mobilis                  CGGAAACGAGCGAGACCCCCACCC-CTAG-TTGCTA-CC--C-----------------------------------------GGG----------GC-T   [768]
+S._solfataricus             CGGCAACGAGCGAGACCCCCACCC-CTAG-TTGGTA-TT--C-----------------------------------------TGG----------AC-T   [767]
+T._tenax                    CGGCAACGAGCGAGACCCCCACCC-CTAG-TTGCTA-CC--C-----------------------------------------CGC----------TC-T   [770]
+M._vannielii                AGGTAACGAGCGAGACCCGTGCCC-TATG-TTGCGA-CT--A-----------------------------------------CTT----------TC-T   [770]
+MB._formicicum              AGGCAACGAGCGAGACCCACGCCC-TTAG-TTACCA-GC--G-----------------------------------------GAT----------CC-T   [772]
+MS._hungatei                AGGCAACGAGCGAGACCCACGCGA-GCAG-TTGCCAGCT--T-----------------------------------------GAC----------CT-T   [746]
+H._volacanii                AGGCAACGAGCGAGACCCGCACTT-CTAA-TTGCCA-GCAGC-----------------------------------------CAG----------T--T   [777]
+HC._morrhuae                AGGCAACGAGCGAGACCCGCGTCC-CTAA-TTGCCA-GCAGC-----------------------------------------CAG----------CC-T   [778]
+H.HALO                      AGGCAACGAGCGAGACCCGCACTC-CTAA-TTGCCA-GCGGT-----------------------------------------TAC----------CC-T   [777]
+H._cutirubrum               AGGCAACGAGCGAGACCCGCACTC-CTAA-TTGCCA-GCAGT-----------------------------------------TAC----------CC-T   [778]
+AN._nidulans                CCGCAACGAGCGCAACCCACGTTT-TTAG-TTGCCA-TC--A-----------------------------------------TTC----------AG-T   [799]
+HE.CHL                      CCGCAACGAGCGCAACCCTTATCC-CTAG-TTGCCA-GCG-A-----------------------------------------GAG----------AG-T   [786]
+Bc._subtilis                CCGCAACGAGCGCAACCCTTGATC-TTAG-TTGCCA-GC--A-----------------------------------------TTC----------AG-T   [822]
+MY.CAP                      CCGCAACGAACGCAACCCTTGTCG-TTAG-TTACTA-AC--A-----------------------------------------TTA----------AG-T   [806]
+E._coli                     CCGCAACGAGCGCAACCCTTATCC-TTTG-TTGCCA-GC--G-G---------------------------------------TCC----------GG-C   [820]
+MYX.XA                      CCGCAACGAGCGCAACCCTCGCCT-TTAG-TTGCCA-----------------------------------------------CGC----------AA-G   [818]
+P.TEST                      CCGCAACGAGCGCAACCCTTGCCA-TTAG-TTGCTA--C--A-----------------------------------------TTC----------AG-T   [820]
+AG.TUM                      CCGCAACGAGCGCAACCCTCGCCC-TTAG-TTGCCA-GC--A-----------------------------------------TTT----------AG-T   [799]
+DV.DES                      CCGCAACGAGCGCAACCCCTATGG-ATAG-TTGCCA-GC--AAG---------------------------------------TAA----------TG-T   [821]
+BA.FRG                      CCATAACGAGCGCAACCCTTATCT-TTAG-TTACTA-AC--AGG---------------------------------------TTA----------TG-C   [815]
+'Zea_mays_(chloroplast)'    TCGCAACGAGCGCAACCCTCGTGT-TTAG-TTGCCA-CT--A------------------------------------------TG----------AG-T   [796]
+T.AMIT                      CTATAACGAGCGAAACCCTCGTTT-TGTG-TTGCTG-AG--A-C---------------------------------------ATGCGC????GTGCG-T   [811]
+
+[                                    1610      1620      1630      1640      1650      1660      1670      1680      1690      1700]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                AGCGGTCGGCGT---------------------------------------------------------------------------CCCCC--------   [992]
+Rattus_norvegicus           AGCGCTCGGCGT---------------------------------------------------------------------------CCCCC--------   [992]
+Xenopus_laevis              GGCGGTCGGCGT---------------------------------------------------------------------------CCA----------   [985]
+A._salina                   GG-----ATCGC---------------------------------------------------------------------------T------------   [984]
+S._cerevisiae               TG---CTGGTTA---------------------------------------------------------------------------TCC----------   [975]
+P.MICA                      GG---TTACGTG---------------------------------------------------------------------------GGC----------   [975]
+D.DISC                      TTCGGTCATCTC---------------------------------------------------------------------------CTGCTTCAAGGAG   [1028]
+T.BRUC                      GT---CCACACA----------GGACAGCAGTCCCTCCGGC-----GGGGATTTTTTCCCCAACG-GTGGTCGTCATCCTTCTTTTTACA----------   [1346]
+E.GRAC                      TG---GTAGGGTTCGGCTGCTCGG-TGGCAGCCCC-CTGGCAACAGGGGGAGATGTACCGGTGCATGCTCCCGAGAGCCTCCAGT--TCA----------   [1362]
+Zea_mays                    TC---CCGCTAG---------------------------------------------------------------------------TTA----------   [979]
+D._mobilis                  AC---GGCTCCG---------------------------------------------------------------------------GGG----------   [780]
+S._solfataricus             CC---CGGTCCA---------------------------------------------------------------------------GAA----------   [779]
+T._tenax                    TC---GGGGCGG---------------------------------------------------------------------------GGG----------   [782]
+M._vannielii                CC---GGAA-GG---------------------------------------------------------------------------TAA----------   [781]
+MB._formicicum              TC---GGGATGC---------------------------------------------------------------------------CGG----------   [784]
+MS._hungatei                CG---GGTT-GA---------------------------------------------------------------------------TGG----------   [757]
+H._volacanii                TC---GACTGGC---------------------------------------------------------------------------TGG----------   [789]
+HC._morrhuae                TG---TGCTGGC---------------------------------------------------------------------------TGG----------   [790]
+H.HALO                      TT---GGGTAGC---------------------------------------------------------------------------TGG----------   [789]
+H._cutirubrum               TT---GGGTAGC---------------------------------------------------------------------------TGG----------   [790]
+AN._nidulans                T---------------------------------------------------------------------------------------GG----------   [802]
+HE.CHL                      C---------------------------------------------------------------------------------------GG----------   [789]
+Bc._subtilis                T---------------------------------------------------------------------------------------GG----------   [825]
+MY.CAP                      T---------------------------------------------------------------------------------------GA----------   [809]
+E._coli                     C---------------------------------------------------------------------------------------GG----------   [823]
+MYX.XA                      T---------------------------------------------------------------------------------------GG----------   [821]
+P.TEST                      T---------------------------------------------------------------------------------------GA----------   [823]
+AG.TUM                      T---------------------------------------------------------------------------------------GG----------   [802]
+DV.DES                      T---------------------------------------------------------------------------------------GG----------   [824]
+BA.FRG                      T---------------------------------------------------------------------------------------GA----------   [818]
+'Zea_mays_(chloroplast)'    T---------------------------------------------------------------------------------------TG----------   [799]
+T.AMIT                      G---------------------------------------------------------------------------------------CC----------   [814]
+
+[                                    1710      1720      1730      1740      1750      1760      1770      1780      1790      1800]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                -------------------------------AA----CTTCTTAGAGGGACAAG---TGG--CG-TT-CAGCCA---CCCGAGATTGA-GCAATAACAGG   [1046]
+Rattus_norvegicus           -------------------------------AA----CTTCTTAGAGGGACAAG---TGG--CG-TT-CAGCCA---CC-GAGATTGA-GCAATAACAGG   [1045]
+Xenopus_laevis              --------------------------------A----CTTCTTAGAGGGACAAG---TGG--CG-TT-CAGCCA--CAC-GAGATCGA-GCAATAACAGG   [1038]
+A._salina                   -------------------------------------CTTCTTAGAGGGACAAG---TGG--CG-TC-TAGCCA--TAT-GAGAGTGA-GCAATAACAGG   [1036]
+S._cerevisiae               --------------------------------A----CTTCTTAGAGGGACTAT---CGG--TT-TC-AAGCCG-ATGG-AAGTTTGAGGCAATAACAGG   [1030]
+P.MICA                      --------------------------------A----ACTTTTAGAGGGACTTT---GCG--TG-TC-TAACGC-AAGG-AAGTTTGAGGCAATAACAGG   [1030]
+D.DISC                      TGTGTAGTCTGACTCGATAGGTACGAATTAAAA----CTTCTTAGAGGGACTAC---CTG--CC-TC-AAGCAG-GCGG-AAGTCCGAGGCAATAACAGG   [1115]
+T.BRUC                      --------------------------------GGCCCCTTCTCTGCGGGATTCC---TTGCTTT-TC-GCGCAA-GGTG-AGATTTTGGGCAACAGCAGG   [1407]
+E.GRAC                      --------------------------------G----CTTCTCTGAGGTGCTGTGTCCGC--CA-CA-AAGGGC-ATGC-ATGCTAGAGCCAACAGCAGG   [1420]
+Zea_mays                    --------------------------------G----CTTCTTAGAGGGACTAT---GGC--CG-TT-TAGGCC-G-CG-AAGTTTGAGGCAATAACAGG   [1033]
+D._mobilis                  -------------------------------------CACACTAGGGGGACTGC---CGC--CG-TTTAAGGCG-GAGG-AAGGAGGGGGCCACGGCAGG   [835]
+S._solfataricus             --------------------------------C----CACACTAGGGGGACTGC---CGG--CG-T--AAGCCG-GAGG-AAGGAGGGGGCCACGGCAGG   [833]
+T._tenax                    --------------------------------G----CACACTAGGGGGACTGC---CGG--CG-T--AAGCCG-GAGG-AAGGAGGGGGCGACGGCAGG   [836]
+M._vannielii                --------------------------------G----CACTCATAGGGGACCGC---TAG--CGCT--AAGCTA-GAGG-AAGGAGCGGGCAACGATAGG   [836]
+MB._formicicum              --------------------------------G----CACACTAAGGGGACCGC---CAG--TGAT--AAACTG-GAGG-AAGGAGTGGACGACGGTAGG   [839]
+MS._hungatei                --------------------------------G----GACACTGCTCGGACCGC---CTC--TGCT--AAAGGG-GAGG-AAGGAATGGGCAACGGTAGG   [812]
+H._volacanii                --------------------------------G----TACATTAGAAGGACTGC---CGC--TG-CT-AAAGCG-GAGG-AAGGAACGGGCAACGGTAGG   [844]
+HC._morrhuae                --------------------------------G----TACATTAGGGAGACTGC---CGT--CG-CT-AAGACG-GAGG-AAGGAACGGGCAACGGTAGG   [845]
+H.HALO                      --------------------------------G----TACATTAGGTGGACTGC---CGC--TG-CC-AAAGCG-GAGG-AAGGAACGGGCAACGGTAGG   [844]
+H._cutirubrum               --------------------------------G----TACATTAGGTGGACTGC---CGC--TG-CC-AAAGCG-GAGG-AAGGAACGGGCAACGGTAGG   [845]
+AN._nidulans                --------------------------------G----CACTCTAGAGAAACTGC---CGG--TGAC--AAACCG-GAGG-AAGGTGTGGACGACGTCAAG   [857]
+HE.CHL                      --------------------------------G----GACTCTAGGGAGACTGC---CCG--GGAC--GACCGG-GAGG-AAGGCGGGGATGACGTCAAA   [844]
+Bc._subtilis                --------------------------------G----CACTCTAAGGTGACTGC---CGG--TGAC--AAACCG-GAGG-AAGGTGGGGATGACGTCAAA   [880]
+MY.CAP                      --------------------------------G----AACTCTAACGAGACTGC---TAG--TG-T--AAGCTA-GAGG-AAGGTGGGGATGACGTCAAA   [863]
+E._coli                     --------------------------------G----AACTCAAAGGAGACTGC---CAG--TGAT--AAACTG-GAGG-AAGGTGGGGATGACGTCAAG   [878]
+MYX.XA                      --------------------------------A----T-CTCTAGAGGGACTGC---CGG--TGTT--AAACCG-GAGG-AAGGTGGGGATGACGTCAAG   [875]
+P.TEST                      --------------------------------G----CACTCTAATGGGACTGC---CGG--TGAC--AAACCG-GAGG-AAGGTGGGGATGACGTCAAG   [878]
+AG.TUM                      --------------------------------G----CACTCTAAGGGGACTGC---CGG--TGAT--AAGCCGAGAGG-AAGGTGGGGATGACGTCAAG   [858]
+DV.DES                      --------------------------------G----CACTCTATTCAGACTGC---CCG--GGTT--AACCGG-GAGG-AAGGTGGGGACGACGTCAAG   [879]
+BA.FRG                      --------------------------------G----GACTCTAGAGAGACTGC---CGT--CGTA--AGATGT-GAGG-AAGGTGGGGATGACGTCAAA   [873]
+'Zea_mays_(chloroplast)'    --------------------------------G----AACCCTGAACAGACCGC---CGG--TGTT--AAGCCG-GAGG-AAGGAGAGGATGAGGCCAAG   [854]
+T.AMIT                      --------------------------------G----CACTCACGAGGGACTGC---CAG--TGAG--ATACTG-GAGG-AAGGTGGGGATGACGTCAAG   [869]
+
+[                                    1810      1820      1830      1840      1850      1860      1870      1880      1890      1900]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                TCTGTGATGCCCTTAGA-TGTCCGGGGCTGCACGCGCGCTACACTGACTGGCTCAGCGTG-TGCCTA------------------------CCCTACGCC   [1120]
+Rattus_norvegicus           TCTGTGATGCCCTTAGA-TGTCCGGGGCTGCACGCGCGCTACACTGACTGGCTCAGCGTG-TGCCTA------------------------CCCTACGCC   [1119]
+Xenopus_laevis              TCTGTGATGCCCTTAGA-TGTCCGGGGCTGCACGCGCGCTACACTGAACGGATCAGCGTG-TGTCTA------------------------CCCTGCGCC   [1112]
+A._salina                   TCTGTGATGCCCTTAGA-TGTCCTGGGCCGCACGCGCGCTACACTGGAAGAATCAGCGCG-T--CC-------------------------TCCCTGTCC   [1107]
+S._cerevisiae               TCTGTGATGCCCTTAGAACGTTCTGGGCCGCACGCGCGCTACACTGACGGAGCCAGCGAG-T--CTA------------------------ACCTTGGCC   [1103]
+P.MICA                      TCTGTGATGCCCTTAGA-TGTTCTGGGCTGCACGCGCGCTACACTGATGCGTTCAACGAG-T--TTA----------------------TGCACTTGCCC   [1104]
+D.DISC                      TCTGTGATGCCCTTAGA-TACCTTGGGCCGCACGCGCGCTACAATGTAGGAAACAAAAAG-G--CT--------------------------CCTGGTCC   [1185]
+T.BRUC                      TCTGTGATGCTCCTCAA-TGTTCTGGGCGACACGCGCACTACAATGTCAGTGAGAACAAG-AGTCCGAGCGGCACTTCACAATGTCGCTCCCGCTTGATC   [1505]
+E.GRAC                      TCTGTGATGCTCCCAGA-TGTCCTGGGCCGCACGCGCACTACATTGTCACAGTGAAGGTG-T--CGA-------CATGCCCACTCCGGTGGGCCCTGGCC   [1509]
+Zea_mays                    TCTGTGATGCCCTTAGA-TGTTCTGGGCCGCACGCGCGCTACACTGATGTATCCAACGAG-TATATA------------------------GCCTTGGCC   [1107]
+D._mobilis                  TCAGC-ATGCCCCGAAC--CCCCCGGGCTACACGCGGGCTACAATGGCGGGGACAGCGGGAT--CCG------------------------ACC----CC   [902]
+S._solfataricus             TCAGC-ATGCCCCGAAA--CTCCCGGGCCGCACGCGGGTTACAATGGCAGGGACAACGGGAT--GCT------------------------ACC----TC   [900]
+T._tenax                    TCAGT-ATGCCCCGAAA--CCCCGGGGCTGCACGCGAGCTGCAATGGCGGGGACAGCGGGAT--CCG------------------------ACC----CC   [903]
+M._vannielii                TCCGC-ATGCCCCGAAT--CTCCTGGGCTACACGCGGGCTACAATGGCTAGGACAATGGGCT--GCT------------------------ACC----CT   [903]
+MB._formicicum              TCCGT-ATGCCCCGAAT--CCCCTGGGCTACACGCGGGCTACAATGGTTAGGACAATGGGTT--CCG------------------------ACA----CT   [906]
+MS._hungatei                TCAGC-ATGCCCCGAAT--TATCCGGGCTACACGCGGGCTACAATGGACAGGACAATGGGTT--TCG------------------------ACA----CC   [879]
+H._volacanii                TCAGT-ATGCCCCGAAT--GAGCTGGGCTACACGCGGGCTACAATGGTCGAGACAATGGGTT--GCT------------------------ATC----TC   [911]
+HC._morrhuae                TCAGT-ATGCCCCGAAT--GTGCCGGGCGACACGCGGGCTACAATGGCCGAGACAGTGGGAC--GCT------------------------ACC----CC   [912]
+H.HALO                      TCAGT-ATGCCCCGAAT--GGGCTGGGCAACACGCGGGCTACAATGGTCGAGACAATGGGAA--GCC------------------------ACT----CC   [911]
+H._cutirubrum               TCAGT-ATGCCCCGAAT--GGGCTGGGCAACACGCGGGCTACAATGGTCGAGACAATGGGAA--GCC------------------------ACT----CC   [912]
+AN._nidulans                TCATC-ATGCCCCTTAC--ATCCTGGGCTACACACGTACTACAATGCTCCGGACAGCGAGAC--GCG------------------------AAG----CC   [924]
+HE.CHL                      TCATC-ATGCCCCTTAT--GTCTTGGGCTACACACGTGCTACAATGGGCGGTACAAACCGAA--GCG------------------------AAG----CC   [911]
+Bc._subtilis                TCATC-ATGCCCCTTAT--GACCTGGGCTACACACGTGCTACAATGGACAGAACAAAGGGCA--GCG------------------------AAA----CC   [947]
+MY.CAP                      TCATC-ATGCCCCTTAT--GTCCTGGGCTACACACGTGCTACAATGGCTGGTACAAAGAGTT--GCA------------------------ATC----CT   [930]
+E._coli                     TCATC-ATGGCCCTTAC--GACCAGGGCTACACACGTGCTACAATGGCGCATACAAAGAGAA--GCG------------------------ACC----TC   [945]
+MYX.XA                      TCCTC-ATGGCCTTTAT--GACCAGGGCTACACACGTGCTACAATGGCCGGTACAGAGCGTT--GCC------------------------AAC----CC   [942]
+P.TEST                      TCCTC-ATGGCCCTTAT--AGGTGGGGCTACACACGTCATACAATGGCTGGTACAAAGGGTT--GCC------------------------AAC----CC   [945]
+AG.TUM                      TCCTC-ATGGCCCTTAC--GGGCTGGGCTACACACGTGCTACAATGGTGGTGACAGTGGGCA--GCG------------------------AGA----CA   [925]
+DV.DES                      TCATC-ATGGCCCTTAC--GCCTAGGGCTACACACGTACTACAATGGCGCGCACAAAGGGGA--GCG------------------------AGA----CC   [946]
+BA.FRG                      TCAGC-ACGGCCCTTAC--GTCCGGGGCTACACACGTGTTACAATGGGGGGTACAGAAGGCA--GCT------------------------AGC----GG   [940]
+'Zea_mays_(chloroplast)'    TCATC-ATGCCCCTTAT--GCCCTGGGCGACACACGTGCTACAATGGGCGGGACAAAGGGTC--GCG------------------------ATC----TC   [921]
+T.AMIT                      TCCGC-ATGGCCCTTAT--GGGCTGGGCCACACACGTGCTACAATGGCAATGACAATGGGAA--GCA------------------------AGG----CT   [936]
+
+[                                    1910      1920      1930      1940      1950      1960      1970      1980      1990      2000]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                -GGCAGGCGCGGG-TAACCCGTTGAA-CCCCATTCGTGATGGGGATCGGGGATTGCAATTATTCCCCAT-GAACGAGGAATTCCCAGTAAGTGCGGGTCA   [1216]
+Rattus_norvegicus           -GGCAGGCGCGGG-TAACCCGTTGAA-CCCCATTCGTGATGGGGATCGGGGATTGCAATTATTCCCCAT-GAACGAGGAATTCCCAGTAAGTGCGGGTCA   [1215]
+Xenopus_laevis              -GACAGGTGCGGG-TAACCCGCTGAA-CCCCGTTCGTGATAGGGATCGGGGATTGCAATTATTTCCCAT-GAACGAGGAATTCCCAGTAAGTGCGGGTCA   [1208]
+A._salina                   -GAGAGGACCGGG-TAACC-GCTGAA-CCTCTTCCGTGGTTGGGATTGGGGACTGCAAGGATC-CCCAT-GAACCAGGAATCCCTAGTAGGCGCAAGTCA   [1201]
+S._cerevisiae               -GAGAGGTCTTGG-TAATCTTGTGAA-ACTCCGTCGTGCTGGGGATAGAGCATTGTAATTATTGCTCTT-CAACGAGGAATTCCTAGTAAGCGCAAGTCA   [1199]
+P.MICA                      -GATAGGGTTTGGGTAATCTTTTTAA-ATCGCATCGTGATGGGGATAGATTATTGCAATTATTAATCTT-CAACGAGGAATTCCTAGTAAGCGCGAGTCA   [1201]
+D.DISC                      -GGAAGGATTGGG-TAATCATTTGAA-TTTCCTACGTAACTGGGCTTGATCTTTGTAATTATTGATCAT-AAACGAGGAATTCCTTGTAAGCGTAAGTCA   [1281]
+T.BRUC                      -AAAAGAGCGGGG-AAACCACGGAATCACGTAGACCCACTTGGGACCGAGTATTGCAATTATTGGTCGCGCAACGAGGAATGTCTCGTAGGCGCAGCTCA   [1603]
+E.GRAC                      TGAAGAGGCTGGG-AAATCCTGCAAG-CCTGTGACGTACTGGGGATAGATGGTTGCAACTGTCTGCCTT-GAACGTGGAATGCCTAGTATGCCTGGGTCA   [1606]
+Zea_mays                    -GACAGGC-CGGT-AATCTTGGGAAA-TTTCATGCGTGATGGGGATAGATCATTGCAATTGTTGGTCTT-CAACGAGGAATGCCTAGTAAGCGCGAGTCA   [1202]
+D._mobilis                  -GAAAGGGGAGGC-GAATCCCTC-AA-ACCCCGCCGTGGTTGGGATCGAGGGCTGCAACTCGCCCTCGT-GAACGAGGAATCCCTAGTAACCGCGCGTCA   [997]
+S._solfataricus             -GAAAGGGGGAGC-CAATCCTT--AA-ACCCTGCCGCAGTTGGGATCGAGGGCTGAAACCCGCCCTCGT-GAACGAGGAATCCCTAGTAACCGCGGGTCA   [994]
+T._tenax                    -GAAAGGGGGAGG-CAATCCCGT-AA-ACCCCGCCCCAGTAGGGATCGAGGGCTGCAACTCGCCCTCGT-GAACGTGGAATCCCTAGTAACCGCGTGTCA   [998]
+M._vannielii                -GAAAAGGGACGC-GAATCTCCG-AA-ACCTAGTCGTAGTTCGGATCGTGGGCTGTAACTCGCCCACGT-GAAGCTGGAATCCGTAGTAATCGCAGTTCA   [998]
+MB._formicicum              -GAAAGGTGGAGG-TAATCTCCT-AA-ACCTGGCCTTAGTTCGGATTGAGGGCTGTAACTCGCCCTCAT-GAAGCTGGAATGCGTAGTAATCGCGTGTCA   [1001]
+MS._hungatei                -GAGAGGTGAGGA-TAATCTCCT-AA-ACCTGTCCGAAGTTCGGATTGCGGGTTGTAACTCACCCGCAT-GAAGCTGGAATCCGTAGTAATCGCGTTTCA   [974]
+H._volacanii                -GAAAGAGAACGC-TAATCTCCT-AA-ACTCGATCGTAGTTCGGATTGAGGGCTGAAACTCGCCCTCAT-GAAGCTGGATTCGGTAGTAATCGCATTTCA   [1006]
+HC._morrhuae                -GAGAGGGGACGC-TAATCTCCT-AA-CCTCGGTCGTAGTTCGGATTGCGGGTTGAAACCCACCCGCAT-GAAGCTGGATTCGGTAGTAATCGCATTTCA   [1007]
+H.HALO                      -GAGAGGAGGCGC-TAATCTCCT-AA-ACTCGATCGTAGTTCGGATTGAGGGCTGAAACTCGCCCTCAT-GAAGCTGGATTCGGTAGTAATCGCGTGTCA   [1006]
+H._cutirubrum               -GAGAGGAGGCGC-TAATCTCCT-AA-ACTCGATCGTAGTTCGGATTGAGGGCTGAAACTCGCCCTCAT-GAAGCTGGATTCGGTAGTAATCGCGTGTCA   [1007]
+AN._nidulans                -GCGAGGTGAAGC-AAATCTCCC-AA-ACCGGGGCTCAGTTCAGATTGCAGGCTGCAACTCGCCTGCAT-GAAGGCGGAATCGCTAGTAATCGCAGGTCA   [1019]
+HE.CHL                      -GAGAGGTGGAGC-GAACCGGAG-AA-AGCCGTTCCCAGTTCGGATTGCTCTCTGCAACTCGAGAGCAT-GAAGGCGGAATCGCTAGTAATCGCGGGTCA   [1006]
+Bc._subtilis                -GCGAGGTTAAGC-CAATCCCAC-AA-ATCTGTTCTCAGTTCGGATCGCAGTCTGCAACTCGACTGCGT-GAAGCTGGAATCGCTAGTAATCGCGGATCA   [1042]
+MY.CAP                      -GTGAAGGGGAGC-TAATCTCAA-AA-AACCAGTCTCAGTTCGGATTGAAGTCTGCAACTCGACTTCAT-GAAGCCGGAATCACTAGTAATCGCGAATCA   [1025]
+E._coli                     -GCGAGAGCAAGC-GGACCTCAT-AA-AGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCAT-GAAGTCGGAATCGCTAGTAATCGTGGATCA   [1040]
+MYX.XA                      -GCGAGGGGGAGC-TAATCGCAT-AA-AACCGGTCTCAGTTCAGATTGGAGTCTGCAACTCGACTCCAT-GAAGGAGGAATCGCTAGTAATCGCAGATCA   [1037]
+P.TEST                      -GCGAGGGGGAGC-TAATCCCAT-AA-AGCCAGTCGTAGTCCGGATCGCAGTCTGCAACTCGACTGCGT-GAAGTCGGAATCGCTAGTAATCGTGGATCA   [1040]
+AG.TUM                      -GCGATGTCGAGC-TAATCTCCA-AA-AGCCAT-CTCAGTTCGGATTGCACTCTGCAACTCGAGTGCAT-GAAGTTGGAATCGCTAGTAATCGCAGATCA   [1019]
+DV.DES                      -GCGAGGTGGAGC-CAATCCCAA-AA-AACGCGTCCCAGTCCGGATTGCAGTCTGCAACTCGACTGCAT-GAAGTTGGAATCGCTAGTAATTCGAGATCA   [1041]
+BA.FRG                      -GTGACCGTATGC-TAATCCCA--AA-ATCCTCTCTCAGTTCGGATCGAAGTCTGCAACCCGACTTCGT-GAAGCTGGATTCGCTAGTAATCGCGCATCA   [1034]
+'Zea_mays_(chloroplast)'    -GCGAGGGTGAGC-TAACTCCAA-AA-ACCCGTCCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCAT-GAAGCAGGAATCGCTAGTAATCGCCGGTCA   [1016]
+T.AMIT                      -GTAAGGCGGAGC-GAATC-CGG-AA-AGATTGCCTCAGTTCGGATTGTTCTCTGCAACTCGGGAACAT-GAAGTTGAAATCGCTAGTAATCGCGGATCA   [1030]
+
+[                                    2010      2020      2030      2040      2050      2060      2070      2080      2090      2100]
+[                                    .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens                TA-AGCTTGCGTTGATTAAGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGGATGGTTTAGCTTGACTATCTAGAGGAAGTAAAAGTCG   [1315]
+Rattus_norvegicus           TA-AGCTTGCGTTGATTAAGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGGATGGTTTAGCTTGACTATCTAGAGGAAGTAAAAGTCG   [1314]
+Xenopus_laevis              TA-AGCTCGCGTTGATTAAGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGGATGGTTTAGCTTGACTATCTAGAGGAAGTAAAAGTCG   [1307]
+A._salina                   TT-AGCTTGCGTCGATTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGAATGATTTAGCTTGATCCTTTAGAGGAAGTAAAAGTCG   [1300]
+S._cerevisiae               TC-AGCTTGCGTTGATTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTAGTACCGATTGAATGGCTTAGCTTGGTCATTTGGAGGAACTAAAAGTCG   [1298]
+P.MICA                      TC-AGCTCGTGCTGATTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAGTG-----ACCTTATCACTTAGAGGAAGGAGAAGTCG   [1295]
+D.DISC                      TT-ACCTTATGCTGAATATGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATCGAATGATACGGTCTCATTGTTTAGAGGAAGGAGAAGTCG   [1380]
+T.BRUC                      TC-AAACTGTGCCGATTACGTCCCTGCCATTTGTACACACCGCCCGTCGTTGTTTCCGAT--GATGGTGCAAATTGCTTCAATAGAGGAAGCAAAAGTCG   [1700]
+E.GRAC                      TC-AGCCCAGACCGATTGTGTCCCTGCCATTTGTACACACCGCCCGTCGTTGCTACCGATGG--TGGCTGGAGTCCAGCCACTAGAGGAAGCAAAAGTCG   [1703]
+Zea_mays                    TC-AGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGGTCCGGCCTTATCATTTAGAGGAAGGAGAAGTCG   [1301]
+D._mobilis                  AC-ATCGCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCACCCGAGGGGAGG-GGGAGCTCCCCCTCCCTGAGGGGGGAGAAGTCG   [1095]
+S._solfataricus             AC-AACCCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCACCCGAGCGCGAA-AGGGGCTCCTTTCCCGCGAGGGGGGAGAAGTCG   [1092]
+T._tenax                    CC-AACGCGCGGTGAATACGTCCCTGCCCCTTGCACACACCGCCCGTCGCACCACCCGAGGGAGTT-CTCTGCAGAGAACTCCCGAGGGGGGTGAAGTCG   [1096]
+M._vannielii                TA-ATACTGCGGTGAATGTGTCCCTGCTCCTTGCACACACCGCCCGTCACACCACCCGAGTTGGGT-TCAGGCCTGGGCTCAGCGAGGGGGGTGAAGTCG   [1096]
+MB._formicicum              TA-ACCGCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCACGCCACCCAAAAAGGGT-TTGGATCTAGGTTCTTTGAGGAGGGCGAAGTCG   [1099]
+MS._hungatei                AC-ATAGCGCGGTGAATATGTCCCTGCTCCTTGCACACACCGCCCGTCAAACCACCCGAGTGAGGT-CTTGATCTGGGTTTTGCAAGGGGGGTTAAGTCG   [1072]
+H._volacanii                AT-AGAGTGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCAAAGCACCCGAGTGAGGT-CCGGA-------TTCGCAAGGGGGCTTAAGTCG   [1097]
+HC._morrhuae                GA-AGAGTGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCAAATCACCCGAGTGAGGT-CCGGA-------TTCGCAAGGGGGATTAAGTCG   [1098]
+H.HALO                      GC-AGCGCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCAAATCACCCGAGTGGGGT-TCGGA-------TCCGCAAGGGGGATTAAGTCG   [1097]
+H._cutirubrum               GC-AGCGCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCAAATCACCCGAGTGGGGT-TCGGA-------TCCGCAAGGGGGATTAAGTCG   [1098]
+AN._nidulans                GC-ATACTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGAAGTTGGCC-ATGCCGGTAGGGCTGATGACTGGGGTGAAGTCG   [1117]
+HE.CHL                      GC-ATACCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTCGGCA-ACACCGGTGGGGTCGATGATTGGGGTGAAGTCG   [1104]
+Bc._subtilis                GC-ATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTA-ACACCGGTGGGACAGATGATTGGGGTGAAGTCG   [1140]
+MY.CAP                      GCTATGTCGCGGTGAATACGTTCTCGGGTCTTGTACACACCGCCCGTCACACCATGAGAGTTGGTA-ATACCGGTAGGACTAGCGATTGGGGTGAAGTCG   [1124]
+E._coli                     GA-ATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTT-GCAAATTTGTGATTCATGACTGGGGTGAAGTCG   [1138]
+MYX.XA                      GC-ACGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTCGATT-GCTCCGGAGTGGTCGGTAACTGGGGTGAAGTCG   [1135]
+P.TEST                      GA-ATGTCACGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCATGGGAGCGGGTC-TCGCCGGCGGGGTTCGTGACTGGGGTGAAGTCG   [1138]
+AG.TUM                      GC-ATGCTGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTTGGTT-TTACCGGTAGGGTCAGCGACTGGGGTGAAGTCG   [1117]
+DV.DES                      GC-ATGCTCGGGTGAATGCGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTCGGTT-TTACCGGTAGGGCCGATGATTGGGGTGAAGTCG   [1139]
+BA.FRG                      GCCACGGCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCAAGCCATGGGAGCCGGGG-GTACCGGTAAAACTGGTGACTGGGGCTAAGTCG   [1133]
+'Zea_mays_(chloroplast)'    GCCATACGGCGGCGAATCCGTTCCCGGGCCTTGTACACACCGCCCGTCACACTATAGGAGCTGGCC-AGGTTGGCTAGGCTTGCGACTGGAGTGAAGTCG   [1115]
+T.AMIT                      GC-ATGCCGCGGTGAATATGTACCCGGGCCCTGTACACACCGCCCGTCACACCCTGGGAATTGGTT-TCGCCGGTGGGGTCTTCGACTGGGGTGAAGTCG   [1128]
+
+[                                    2110      2120      2130      2140    ]
+[                                    .         .         .         .       ]
+
+Homo_sapiens                TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTA-------   [1355]
+Rattus_norvegicus           TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTA-------   [1354]
+Xenopus_laevis              TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTA-------   [1347]
+A._salina                   TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTA-------   [1340]
+S._cerevisiae               TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTA-------   [1338]
+P.MICA                      TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTC-------   [1335]
+D.DISC                      TAACAAGGTATCCGTAGGTGAACCTGCGGATGGATCATTTT------   [1421]
+T.BRUC                      TAACAAGGTAGCTGTAGGTGAACCTGCAGCTGGATCATTT-------   [1740]
+E.GRAC                      TAACAAGGTTGCTGTAGGTGAACCTGCAGCAGGATCATTG-------   [1743]
+Zea_mays                    TAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG-------   [1341]
+D._mobilis                  TAACAAGGTAGCCGTACCGGAAGGTGCGGCTGGATCACCTCCT----   [1138]
+S._solfataricus             TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCA-----   [1134]
+T._tenax                    TAACAAGGTAGCCGTAGGGGAACCTGCTTCTGGATCACCTCC-----   [1138]
+M._vannielii                TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCC-----   [1138]
+MB._formicicum              TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCT----   [1142]
+MS._hungatei                TAACAAGGTAGCCGTAGGGGAATCTGCGGCTGGATCACCTCCT----   [1115]
+H._volacanii                TAACAAGGTAGCCGTAGGGGAATCTGCGGCTGGATCACCTCCT----   [1140]
+HC._morrhuae                TAACAAGGTAGCCGTAGGGGAATCTGCGGCTGGATCACCTCCT----   [1141]
+H.HALO                      TAACAAGGTAGCCGTAGGGGAATCTGCGGCTGGATCACCTCCT----   [1140]
+H._cutirubrum               TAACAAGGTAGCCGTAGGGGAATCTGCGGCTGGATCACCTCCT----   [1141]
+AN._nidulans                TAACAAGGTAGCCGTACCGGAAGGTGTGGCTGGATCACCTCCTTT--   [1162]
+HE.CHL                      TAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCT   [1151]
+Bc._subtilis                TAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCT   [1187]
+MY.CAP                      TAACAAGGTATCCGTACGGGAACGTGCGGATGGATCACCTCCTTTCT   [1171]
+E._coli                     TAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA--   [1183]
+MYX.XA                      TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCTTTCT   [1182]
+P.TEST                      TAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCTCCTTTCT   [1185]
+AG.TUM                      TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCTTTCT   [1164]
+DV.DES                      TAACAAGGTAGCCGTAGGGGAACCTGCGGCTGGATCACCTCCTTT--   [1184]
+BA.FRG                      TAACAAGGTAGCCGTACCGGAAGGTGCGGCTGGAACACCTCCTTTCT   [1180]
+'Zea_mays_(chloroplast)'    TAACAAGGTAGCCGTACTGGAAGGTGCGGCTGGATCACCTCCTTT--   [1160]
+T.AMIT                      TAACAAGGTAGCCGTAGGGGAACCTGTGGCTGGATTGAATCC-----   [1170]
+;
+END;
+
+
+BEGIN CODONS;
+CODESET * UNTITLED = Universal: all ;
+END;
+
+BEGIN ASSUMPTIONS;
+	OPTIONS  DEFTYPE=unord PolyTcount=MINSTEPS ;
+END; 
+
+BEGIN TREES; 
+
+	TRANSLATE
+		1	Homo_sapiens,
+		2	Rattus_norvegicus,
+		3	Xenopus_laevis,
+		4	A._salina,
+		5	S._cerevisiae,
+		6	P.MICA,
+		7	D.DISC,
+		8	T.BRUC,
+		9	E.GRAC,
+		10	Zea_mays,
+		11	D._mobilis,
+		12	S._solfataricus,
+		13	T._tenax,
+		14	M._vannielii,
+		15	MB._formicicum,
+		16	MS._hungatei,
+		17	H._volacanii,
+		18	HC._morrhuae,
+		19	H.HALO,
+		20	H._cutirubrum,
+		21	AN._nidulans,
+		22	HE.CHL,
+		23	Bc._subtilis,
+		24	MY.CAP,
+		25	E._coli,
+		26	MYX.XA,
+		27	P.TEST,
+		28	AG.TUM,
+		29	DV.DES,
+		30	BA.FRG,
+		31	'Zea_mays_(chloroplast)',
+		32	T.AMIT
+	;
+	TREE mp_tree_1 =  [&R] ((14,(15,(16,(18,(17,(19,20)))))),((13,(11,12)),(((8,9),(7,(5,(6,(10,(4,(3,(1,2)))))))),(30,(((21,31),(24,(22,23))),((25,27),(26,(29,(28,32)))))))));
+	TREE mp_tree_2 =  [&R] ((14,(15,(16,(18,(17,(19,20)))))),((13,(11,12)),(((8,9),(7,(5,(6,(10,(4,(3,(1,2)))))))),(30,(((21,31),(24,(22,23))),((26,(25,27)),(29,(28,32))))))));
+	TREE  * UNTITLED =  [&R] ((14,(15,(16,(18,(17,(19,20)))))),((13,(11,12)),(((8,9),(7,(5,(6,(10,(4,(3,(1,2)))))))),(30,(((21,31),(24,(22,23))),((25,27),(26,(29,(28,32)))))))));
+
+END;
+
+
+
+BEGIN MACCLADE;
+Version 3.05;
+LastModified -1425155107;
+Singles 1000&/0;
+END;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/L77119.hmmer
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/L77119.hmmer	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/L77119.hmmer	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,50 @@
+hmmpfam - search one or more sequences against HMM database
+HMMER 2.2g (August 2001)
+Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
+Freely distributed under the GNU General Public License (GPL)
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                 Pfam
+Sequence file:            L77119.faa
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+Query sequence: gi|1522636|gb|AAC37060.1|
+Accession:      [none]
+Description:    M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]
+
+Scores for sequence family classification (score includes all domains):
+Model       Description                                 Score    E-value  N 
+--------    -----------                                 -----    ------- ---
+Methylase_M Type I restriction modification system, M  -105.2     0.0022   1
+
+Parsed for domains:
+Model       Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+--------    ------- ----- -----    ----- -----      -----  -------
+Methylase_M   1/1     280   481 ..     1   279 []  -105.2   0.0022
+
+Alignments of top-scoring domains:
+Methylase_M: domain 1 of 1, from 280 to 481: score -105.2, E = 0.0022
+                   *->lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerri
+                       ++EL+++  av+   R              L+F K++ dk      
+  gi|1522636   280    NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------ 306  
+
+                   eieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsql
+                   +i+         p +   + +++y   ++   ++ ++y ++      + l
+  gi|1522636   307 GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPL 338  
+
+                   FwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdl
+                   F++++   e ++  ++++ + +    ++      + +       Glf ++
+  gi|1522636   339 FYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSN 374  
+
+                   dfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfG
+                   ++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G
+  gi|1522636   375 NV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILG 421  
+
+                   DaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDP
+                    +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++        
+  gi|1522636   422 YVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE------- 463  
+
+                   AcGSGSLllqaskflgehdgkrnaisyYGQEsn<-*
+                             +++ ++    k+n+i +    s+   
+  gi|1522636   464 ---------RFKEIIK--NWKINDINF----ST    481  
+
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/LL-sample.seq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/LL-sample.seq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/LL-sample.seq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,164 @@
+>>26
+LOCUSID: 26
+LOCUS_CONFIRMED: yes
+LOCUS_TYPE: gene with protein product, function known or inferred
+ORGANISM: Homo sapiens
+STATUS: REVIEWED
+NM: NM_001091|4501850|na
+NP: NP_001082|4501851
+CDD: Copper amine oxidase|pfam01179|1775|na|6.883370e+02
+PRODUCT: amiloride binding protein 1 precursor
+ASSEMBLY: X78212
+CONTIG: NT_007914.10|22047859|na|11083771|11092582|+|7|reference
+EVID: supported by alignment with mRNA
+XM: XM_032220|14745402|na
+XP: XP_032220|14745403|na
+ACCNUM: X78212|463242|na|na|na
+TYPE: g
+PROT: CAA55046|463243
+ACCNUM: BC014093|15559450|na|na|na
+TYPE: m
+PROT: AAH14093|15559451
+ACCNUM: M55602|387655|na|na|na
+TYPE: m
+PROT: AAA58358|177960
+ACCNUM: U11862|533535|na|na|na
+TYPE: m
+PROT: AAC50270|533536
+ACCNUM: U11863|533537|na|na|na
+TYPE: m
+PROT: AAB60381|533538
+OFFICIAL_SYMBOL: ABP1
+OFFICIAL_GENE_NAME: amiloride binding protein 1 (amine oxidase (copper-containing))
+ALIAS_SYMBOL: DAO
+ALIAS_SYMBOL: AOC1
+PREFERRED_PRODUCT: amiloride binding protein 1 precursor
+SUMMARY: Summary: This gene encodes a membrane glycoprotein that binds amiloride, a diuretic that acts by closing epithelial sodium ion channels. Experimental evidence indicates, however, that the formation of an amiloride sensitive, sodium channel requires complex formation with additional proteins. Although an association was proposed between this gene and cystic fibrosis, a disorder involving sodium and water imbalance in the lungs, genetic evidence showed that it was not involved in producing that disorder.
+CHR: 7
+STS: RH71199|7|8014|na|seq_map|epcr
+STS: ABP1|7|32801|ABP1|seq_map|epcr
+COMP: 10090|Abp1|6|6  cM|76507|7|ABP1|ncbi_mgd
+COMP: 10090|1600012D06Rik|6|6  cM|76507|7|ABP1|ucsc_mgd
+ALIAS_PROT: diamine oxidase
+ALIAS_PROT: Amiloride-binding protein-1
+UNIGENE: Hs.75741
+BUTTON: unigene.gif
+LINK: http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=75741
+OMIM: 104610
+ECNUM: 1.4.3.6
+MAP: 7q34-q36|RefSeq|C|
+MAPLINK: default_human_gene|ABP1
+BUTTON: snp.gif
+LINK: http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?locusId=26
+BUTTON: homol.gif
+LINK: http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=26[loc]&TAXID=9606
+BUTTON: gdb.gif
+LINK: http://gdbwww.gdb.org/gdb-bin/genera/accno?GDB:127105
+BUTTON: ensembl.gif
+LINK: http://www.ensembl.org/Homo_sapiens/contigview?geneid=NM_001091
+BUTTON: ucsc.gif
+LINK: http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg12&position=NM_001091
+DB_DESCR: GeneCards
+DB_LINK: http://bioinformatics.weizmann.ac.il/cards-bin/carddisp?ABP1
+DB_DESCR: KEGG pathway: Tyrosine metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00350
+DB_DESCR: KEGG pathway: Histidine metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00340
+DB_DESCR: KEGG pathway: Tryptophan metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00380
+DB_DESCR: KEGG pathway: beta-Alanine metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00410
+DB_DESCR: KEGG pathway: Alkaloid biosynthesis II
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00960
+DB_DESCR: KEGG pathway: Phenylalanine metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00360
+DB_DESCR: KEGG pathway: Arginine and proline metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00330
+DB_DESCR: KEGG pathway: Glycine, serine and threonine metabolism
+DB_LINK: http://www.genome.ad.jp/dbget-bin/get_pathway?org_name=hsa&mapno=00260
+PMID: 11603849,8595053,8182053,8144586,2217167,1356107
+GRIF: 11603849|could be detected at the feto-maternal interface of human
+SUMFUNC: Diamine oxidase (D-amino-acid oxidase histaminase, amiloride-binding protein); deaminates putrescine and histamine|Proteome
+GO: biological process|metabolism|NR|GO:0008152|Proteome|na
+GO: cellular component|peroxisome|NR|GO:0005777|Proteome|na
+GO: molecular function|amine oxidase|E|GO:0008131|Proteome|8144586
+GO: molecular function|drug binding|NR|GO:0008144|Proteome|na
+EXTANNOT: cellular role|Other metabolism|E|Proteome|1356107
+EXTANNOT: biochemical function|Oxidoreductase|E|Proteome|1356107
+EXTANNOT: biochemical function|Small molecule-binding protein|E|Proteome|8182053
+>>27
+LOCUSID: 27
+LOCUS_CONFIRMED: yes
+LOCUS_TYPE: gene with protein product, function known or inferred
+ORGANISM: Homo sapiens
+STATUS: REVIEWED
+NM: NM_005158|6382059|na
+NP: NP_005149|6382060
+PRODUCT: v-abl Abelson murine leukemia viral oncogene homolog 2 isoform a
+TRANSVAR: Transcript Variant:  Transcript variant a includes the alternate exon IA, but not exon IB and encodes a distinct N-terminus.
+ASSEMBLY: M35296
+NM: NM_007314|6382061|na
+NP: NP_009298|6382062
+CDD: SH3 domain|pfam00018|180|na|7.394420e+01
+CDD: Src homology 2 domains|SH2|229|na|9.281890e+01
+CDD: Src homology 3 domains|SH3|128|na|5.391380e+01
+CDD: Src homology domain 2|pfam00017|212|na|8.627050e+01
+CDD: Tyrosine kinase, catalytic domain|TyrKc|869|na|3.393470e+02
+CDD: Eukaryotic protein kinase domain|pfam00069|510|na|2.010600e+02
+CDD: Serine/Threonine protein kinases, catalytic domain|S_TKc|417|na|1.652360e+02
+PRODUCT: v-abl Abelson murine leukemia viral oncogene homolog 2 isoform b
+TRANSVAR: Transcript Variant:  Transcript variant a includes the alternate exon IB, but not exon IA and encodes a distinct N-terminus.
+ASSEMBLY: M35296
+CONTIG: NT_004487.12|22045208|na|4192865|4314849|-|1|reference
+EVID: supported by alignment with mRNA
+XM: NM_005158|6382059|na
+XP: NP_005149|6382060|na
+EVID: supported by alignment with both mRNA and ESTs (5)
+XM: NM_007314|6382061|na
+XP: NP_009298|6382062|na
+ACCNUM: M35296|178992|na|na|na
+TYPE: m
+PROT: AAA35553|178993
+OFFICIAL_SYMBOL: ABL2
+OFFICIAL_GENE_NAME: v-abl Abelson murine leukemia viral oncogene homolog 2 (arg, Abelson-related gene)
+ALIAS_SYMBOL: ARG
+ALIAS_SYMBOL: ABLL
+PREFERRED_PRODUCT: v-abl Abelson murine leukemia viral oncogene homolog 2 isoform a
+PREFERRED_PRODUCT: v-abl Abelson murine leukemia viral oncogene homolog 2 isoform b
+SUMMARY: Summary:  ABL2 is a cytoplasmic tyrosine kinase which is closely related to but distinct from ABL1.  The similarity of the proteins includes the tyrosine kinase domains and extends amino-terminal to include the SH2 and SH3 domains.  ABL2 is expressed in both normal and tumor cells.  The ABL2 gene product is expressed as two variants bearing different amino termini, both approximately 12-kb in length.
+CHR: 1
+STS: RH69130|1|3401|na|seq_map|epcr
+STS: RH66836|1|44261|na|seq_map|epcr
+COMP: 10090|Abl2|1|1 82.10 cM|11352|1|ABL2|ncbi_mgd
+COMP: 10090|Abl2|1|1 82.10 cM|11352|1|ABL2|ucsc_mgd
+ALIAS_PROT: arg
+ALIAS_PROT: Abelson murine leukemia viral (v-abl) oncogene homolog 2 (arg,
+UNIGENE: Hs.121521
+BUTTON: unigene.gif
+LINK: http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=121521
+OMIM: 164690
+ECNUM: 2.7.1.112
+MAP: 1q24-q25|<a href="http://gdbwww.gdb.org/gdb-bin/genera/accno?GDB:119641">HUGO</a>|C|
+MAPLINK: default_human_gene|ABL2
+PHENOTYPE: Leukemia, acute myeloid, with eosinophilia
+PHENOTYPE_ID: 164690
+BUTTON: snp.gif
+LINK: http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?locusId=27
+BUTTON: homol.gif
+LINK: http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=27[loc]&TAXID=9606
+BUTTON: gdb.gif
+LINK: http://gdbwww.gdb.org/gdb-bin/genera/accno?GDB:119641
+BUTTON: ensembl.gif
+LINK: http://www.ensembl.org/Homo_sapiens/contigview?geneid=NM_007314
+BUTTON: ucsc.gif
+LINK: http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg12&position=NM_007314
+DB_DESCR: GeneCards
+DB_LINK: http://bioinformatics.weizmann.ac.il/cards-bin/carddisp?ABL2
+PMID: 3787260,2198571
+SUMFUNC: Cytoplasmic tyrosine kinase of the Abelson subfamily; contains SH2 and SH3 domains and has similarity to ABL1|Proteome
+GO: cellular component|cytoplasm|NR|GO:0005737|Proteome|na
+GO: molecular function|protein kinase|P|GO:0004672|Proteome|2198571
+GO: biological process|signal transduction|P|GO:0007165|Proteome|2198571
+GO: biological process|protein modification|P|GO:0006464|Proteome|2198571
+EXTANNOT: biochemical function|Transferase|P|Proteome|2198571
+EXTANNOT: subcellular localization|Cytoplasmic|NR|Proteome|2198571

Added: trunk/packages/bioperl/branches/upstream/current/t/data/LOAD_Ccd1.dnd
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/LOAD_Ccd1.dnd	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/LOAD_Ccd1.dnd	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,76 @@
+(
+(
+(
+A_aeolicus:0.27435,
+S_aureus:0.30536)
+:0.01589,
+(
+M_leprae:0.08606,
+M_tubercul:0.06545)
+:0.32304)
+:0.02937,
+(
+(
+(
+(
+(
+A_fulgidus:0.26385,
+T_maritima:0.22186)
+:0.04261,
+Synechocys:0.37630)
+:0.05068,
+C_jejuni:0.34050)
+:0.00557,
+(
+M_jannasch:0.35330,
+M_thermoau:0.41813)
+:0.03001)
+:0.04323,
+(
+(
+A_tumefaci:0.19429,
+S_meliloti:0.17884)
+:0.13366,
+(
+(
+B_sp:0.23437,
+(
+(
+(
+E_coli_A:0.10440,
+Y_pestis:0.10989)
+:0.04376,
+V_cholerae:0.16338)
+:0.01631,
+(
+H_influenz:0.11186,
+P_multocid:0.10553)
+:0.07654)
+:0.02011)
+:0.09091,
+P_aerugino:0.26019)
+:0.04366)
+:0.02890)
+:0.00030,
+(
+(
+(
+B_subtilis:0.27453,
+H_sp:0.29690)
+:0.01772,
+N_meningit:0.29801)
+:0.02281,
+(
+C_crescent:0.33036,
+(
+(
+S_solfatar:0.06649,
+S_tokodaii:0.06208)
+:0.19814,
+(
+T_acidophi:0.01448,
+T_volcaniu:0.01451)
+:0.31728)
+:0.01669)
+:0.01921)
+:0.00981);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/LittleChrY.dbsnp.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/LittleChrY.dbsnp.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/LittleChrY.dbsnp.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,398 @@
+<?xml version="1.0"?>
+<ExchangeSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xmlns="http://www.ncbi.nlm.nih.gov/SNP/docsum"
+xsi:schemaLocation="http://www.ncbi.nlm.nih.gov/SNP/docsum
+http://www.ncbi.nlm.nih.gov/SNP/docsum/docsum_eutil_ver1.0a.xsd">
+
+ 
+
+
+<Rs>
+  <Rs_organism>Homo sapiens</Rs_organism>
+  <Rs_taxId>9606</Rs_taxId>
+  <Rs_rsId>12345</Rs_rsId>
+  <Rs_snpClass value="snp"/>
+  <Rs_snpType value="notwithdrawn"/>
+  <Rs_molType value="cDNA"/>
+  <Rs_het>
+    <Rs_het_type value="est"/>
+    <Rs_het_value>0</Rs_het_value>
+    <Rs_het_stdError>0</Rs_het_stdError>
+  </Rs_het>
+  <Rs_validation></Rs_validation>
+  <Rs_create>
+    <Rs_create_build>52</Rs_create_build>
+    <Rs_create_date>2000-09-19 17:02</Rs_create_date>
+  </Rs_create>
+  <Rs_update>
+    <Rs_update_build>120</Rs_update_build>
+    <Rs_update_date>2004-10-04 13:37</Rs_update_date>
+  </Rs_update>
+  <Rs_sequence>
+    <Rs_sequence_exemplarSs>14616</Rs_sequence_exemplarSs>
+   
+<Rs_sequence_seq5>GGGTGGGCACTAGTTGGCGGGGTGGAAGGCACCACATTGGTCGCCTGCATGTCGTGGATGCA
+GCGCACGGACTGTACCTGAGGGTGACGGGCCCCAAAGT</Rs_sequence_seq5>
+    <Rs_sequence_observed>C/T</Rs_sequence_observed>
+   
+<Rs_sequence_seq3>GCTGCTGTCCTTGTAGTCTCCCTTCTCCAGCAGGTACTGCAGCCCACGGTAGCCAGGGTACT
+AGTAGCCAACCCACGTGCCACTCCGCACCCACACAGAT</Rs_sequence_seq3>
+  </Rs_sequence>
+  <Rs_ss>
+    <Ss>
+      <Ss_ssId>14616</Ss_ssId>
+      <Ss_handle>CGAP-GAI</Ss_handle>
+      <Ss_batchId>607</Ss_batchId>
+      <Ss_locSnpId>59089</Ss_locSnpId>
+      <Ss_subSnpClass value="snp"/>
+      <Ss_orient value="forward"/>
+      <Ss_strand value="bottom"/>
+      <Ss_molType value="cDNA"/>
+      <Ss_buildId>52</Ss_buildId>
+      <Ss_methodClass value="computed"/>
+     
+<Ss_linkoutUrl>http://lpgws.nci.nih.gov:82/perl/gettrace.pl?type=7&amp;trace=</Ss_linkoutUrl>
+      <Ss_sequence>
+       
+<Ss_sequence_seq5>GGGTGGGCACTAGTTGGCGGGGTGGAAGGCACCACATTGGTCGCCTGCATGTCGTGGATGCA
+GCGCACGGACTGTACCTGAGGGTGACGGGCCCCAAAGT</Ss_sequence_seq5>
+        <Ss_sequence_observed>C/T</Ss_sequence_observed>
+       
+<Ss_sequence_seq3>GCTGCTGTCCTTGTAGTCTCCCTTCTCCAGCAGGTACTGCAGCCCACGGTAGCCAGGGTACT
+AGTAGCCAACCCACGTGCCACTCCGCACCCACACAGAT</Ss_sequence_seq3>
+      </Ss_sequence>
+    </Ss>
+    <Ss>
+      <Ss_ssId>1539216</Ss_ssId>
+      <Ss_handle>LEE</Ss_handle>
+      <Ss_batchId>3129</Ss_batchId>
+      <Ss_locSnpId>750160</Ss_locSnpId>
+      <Ss_subSnpClass value="snp"/>
+      <Ss_orient value="reverse"/>
+      <Ss_strand value="top"/>
+      <Ss_molType value="cDNA"/>
+      <Ss_buildId>92</Ss_buildId>
+      <Ss_methodClass value="computed"/>
+     
+<Ss_linkoutUrl>http://www.bioinformatics.ucla.edu/snp/snp/snp_report.php3?db_ver
+sion=db_april_00&amp;allele_id=</Ss_linkoutUrl>
+      <Ss_sequence>
+       
+<Ss_sequence_seq5>ACCGTGGGCTGCAGTACCTGCTGGAGAAGGGAGACTACAAGGACAGCAGC</Ss_sequence_seq5>
+        <Ss_sequence_observed>A/G</Ss_sequence_observed>
+       
+<Ss_sequence_seq3>ACTTTGGGGCCCGTCACCCTCAGGTACAGTCCGTGCGCTGCATCCACGAC</Ss_sequence_seq3>
+      </Ss_sequence>
+    </Ss>
+    <Ss>
+      <Ss_ssId>4398532</Ss_ssId>
+      <Ss_handle>LEE</Ss_handle>
+      <Ss_batchId>5288</Ss_batchId>
+      <Ss_locSnpId>ge750160</Ss_locSnpId>
+      <Ss_subSnpClass value="snp"/>
+      <Ss_orient value="reverse"/>
+      <Ss_strand value="top"/>
+      <Ss_molType value="cDNA"/>
+      <Ss_buildId>106</Ss_buildId>
+      <Ss_methodClass value="unknown"/>
+      <Ss_linkoutUrl> </Ss_linkoutUrl>
+      <Ss_sequence>
+       
+<Ss_sequence_seq5>ACCGTGGGCTGCAGTACCTGCTGGAGAAGGGAGACTACAAGGACAGCAGC</Ss_sequence_seq5>
+        <Ss_sequence_observed>A/G</Ss_sequence_observed>
+       
+<Ss_sequence_seq3>ACTTTGGGGCCCGTCACCCTCAGGTACAGTCCGTGCGCTGCATCCACGAC</Ss_sequence_seq3>
+      </Ss_sequence>
+    </Ss>
+    <Ss>
+      <Ss_ssId>4426703</Ss_ssId>
+      <Ss_handle>LEE</Ss_handle>
+      <Ss_batchId>5293</Ss_batchId>
+      <Ss_locSnpId>e750160</Ss_locSnpId>
+      <Ss_subSnpClass value="snp"/>
+      <Ss_orient value="reverse"/>
+      <Ss_strand value="top"/>
+      <Ss_molType value="cDNA"/>
+      <Ss_buildId>106</Ss_buildId>
+      <Ss_methodClass value="computed"/>
+      <Ss_linkoutUrl> </Ss_linkoutUrl>
+      <Ss_sequence>
+       
+<Ss_sequence_seq5>ACCGTGGGCTGCAGTACCTGCTGGAGAAGGGAGACTACAAGGACAGCAGC</Ss_sequence_seq5>
+        <Ss_sequence_observed>A/G</Ss_sequence_observed>
+       
+<Ss_sequence_seq3>ACTTTGGGGCCCGTCACCCTCAGGTACAGTCCGTGCGCTGCATCCACGAC</Ss_sequence_seq3>
+      </Ss_sequence>
+    </Ss>
+    <Ss>
+      <Ss_ssId>16250752</Ss_ssId>
+      <Ss_handle>CGAP-GAI</Ss_handle>
+      <Ss_batchId>8532</Ss_batchId>
+      <Ss_locSnpId>1500705</Ss_locSnpId>
+      <Ss_subSnpClass value="snp"/>
+      <Ss_orient value="forward"/>
+      <Ss_strand value="bottom"/>
+      <Ss_molType value="cDNA"/>
+      <Ss_buildId>120</Ss_buildId>
+      <Ss_methodClass value="computed"/>
+     
+<Ss_linkoutUrl>http://lpgws.nci.nih.gov/perl/gettrace.pl?type=7&amp;trace=</Ss_linkoutUrl>
+      <Ss_sequence>
+       
+<Ss_sequence_seq5>GTCGTGGATGCAGCGCACGGACTGTACCTGAGGGTGACGGGCCCCAAAGT</Ss_sequence_seq5>
+        <Ss_sequence_observed>C/T</Ss_sequence_observed>
+       
+<Ss_sequence_seq3>GCTGCTGTCCTTGTAGTCTCCCTTCTCCAGCAGGTACTGCAGCCCACGGT</Ss_sequence_seq3>
+      </Ss_sequence>
+    </Ss>
+  </Rs_ss>
+  <Rs_assembly>
+    <Assembly>
+      <Assembly_dbSnpBuild>125</Assembly_dbSnpBuild>
+      <Assembly_genomeBuild>34_3</Assembly_genomeBuild>
+      <Assembly_groupLabel>reference</Assembly_groupLabel>
+      <Assembly_component>
+        <Component>
+          <Component_componentType value="contig"/>
+          <Component_ctgId>960600311</Component_ctgId>
+          <Component_accession>NT_011520.9</Component_accession>
+          <Component_name>Hs22_11677_34</Component_name>
+          <Component_chromosome>22</Component_chromosome>
+          <Component_start>18933985</Component_start>
+          <Component_end>42112197</Component_end>
+          <Component_orientation value="fwd"/>
+          <Component_gi>29807292</Component_gi>
+          <Component_groupTerm>ref_haplotype</Component_groupTerm>
+          <Component_contigLabel>reference</Component_contigLabel>
+          <Component_mapLoc>
+            <MapLoc>
+              <MapLoc_asnFrom>5246027</MapLoc_asnFrom>
+              <MapLoc_asnTo>5246027</MapLoc_asnTo>
+              <MapLoc_locType value="exact"/>
+              <MapLoc_alnQuality>0.913983</MapLoc_alnQuality>
+              <MapLoc_orient value="reverse"/>
+              <MapLoc_physMapStr>24180013</MapLoc_physMapStr>
+              <MapLoc_physMapInt>24180012</MapLoc_physMapInt>
+              <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+              <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+             
+<MapLoc_leftContigNeighborPos>5246026</MapLoc_leftContigNeighborPos>
+             
+<MapLoc_rightContigNeighborPos>5246028</MapLoc_rightContigNeighborPos>
+              <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+              <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+              <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+            </MapLoc>
+          </Component_mapLoc>
+        </Component>
+      </Assembly_component>
+      <Assembly_snpStat>
+        <Assembly_snpStat_mapWeight value="unique-in-contig"/>
+        <Assembly_snpStat_chromCount>1</Assembly_snpStat_chromCount>
+       
+<Assembly_snpStat_placedContigCount>1</Assembly_snpStat_placedContigCount>
+       
+<Assembly_snpStat_unplacedContigCount>0</Assembly_snpStat_unplacedContigCount>
+        <Assembly_snpStat_seqlocCount>1</Assembly_snpStat_seqlocCount>
+        <Assembly_snpStat_hapCount>1</Assembly_snpStat_hapCount>
+      </Assembly_snpStat>
+    </Assembly>
+    <Assembly>
+      <Assembly_dbSnpBuild>125</Assembly_dbSnpBuild>
+      <Assembly_genomeBuild>35_1</Assembly_genomeBuild>
+      <Assembly_groupLabel>Celera</Assembly_groupLabel>
+      <Assembly_current value="true"/>
+      <Assembly_component>
+        <Component>
+          <Component_componentType value="contig"/>
+          <Component_ctgId>960600397</Component_ctgId>
+          <Component_accession>NT_086921.1</Component_accession>
+          <Component_name>Hs22_86581_35</Component_name>
+          <Component_chromosome>22</Component_chromosome>
+          <Component_start>18936537</Component_start>
+          <Component_end>48069707</Component_end>
+          <Component_orientation value="fwd"/>
+          <Component_gi>51477137</Component_gi>
+          <Component_groupTerm>alt_assembly_2</Component_groupTerm>
+          <Component_contigLabel>Celera</Component_contigLabel>
+          <Component_mapLoc>
+            <MapLoc>
+              <MapLoc_asnFrom>3888702</MapLoc_asnFrom>
+              <MapLoc_asnTo>3888702</MapLoc_asnTo>
+              <MapLoc_locType value="exact"/>
+              <MapLoc_alnQuality>0.919897</MapLoc_alnQuality>
+              <MapLoc_orient value="reverse"/>
+              <MapLoc_physMapStr>22825240</MapLoc_physMapStr>
+              <MapLoc_physMapInt>22825239</MapLoc_physMapInt>
+              <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+              <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+             
+<MapLoc_leftContigNeighborPos>3888701</MapLoc_leftContigNeighborPos>
+             
+<MapLoc_rightContigNeighborPos>3888703</MapLoc_rightContigNeighborPos>
+              <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+              <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+              <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+              <MapLoc_fxnSet>
+                <FxnSet>
+                  <FxnSet_geneId>1415</FxnSet_geneId>
+                  <FxnSet_symbol>CRYBB2</FxnSet_symbol>
+                  <FxnSet_mrnaAcc>NM_000496</FxnSet_mrnaAcc>
+                  <FxnSet_mrnaVer>1</FxnSet_mrnaVer>
+                  <FxnSet_protAcc>NP_000487</FxnSet_protAcc>
+                  <FxnSet_protVer>1</FxnSet_protVer>
+                  <FxnSet_fxnClass value="intron"/>
+                </FxnSet>
+              </MapLoc_fxnSet>
+            </MapLoc>
+          </Component_mapLoc>
+        </Component>
+      </Assembly_component>
+      <Assembly_snpStat>
+        <Assembly_snpStat_mapWeight value="unique-in-contig"/>
+        <Assembly_snpStat_chromCount>1</Assembly_snpStat_chromCount>
+       
+<Assembly_snpStat_placedContigCount>1</Assembly_snpStat_placedContigCount>
+       
+<Assembly_snpStat_unplacedContigCount>0</Assembly_snpStat_unplacedContigCount>
+        <Assembly_snpStat_seqlocCount>1</Assembly_snpStat_seqlocCount>
+        <Assembly_snpStat_hapCount>0</Assembly_snpStat_hapCount>
+      </Assembly_snpStat>
+    </Assembly>
+    <Assembly>
+      <Assembly_dbSnpBuild>125</Assembly_dbSnpBuild>
+      <Assembly_genomeBuild>35_1</Assembly_genomeBuild>
+      <Assembly_groupLabel>reference</Assembly_groupLabel>
+      <Assembly_current value="true"/>
+      <Assembly_component>
+        <Component>
+          <Component_componentType value="contig"/>
+          <Component_ctgId>960600396</Component_ctgId>
+          <Component_accession>NT_011520.10</Component_accession>
+          <Component_name>Hs22_11677_35</Component_name>
+          <Component_chromosome>22</Component_chromosome>
+          <Component_start>18933985</Component_start>
+          <Component_end>42210286</Component_end>
+          <Component_orientation value="fwd"/>
+          <Component_gi>51476066</Component_gi>
+          <Component_groupTerm>ref_haplotype</Component_groupTerm>
+          <Component_contigLabel>reference</Component_contigLabel>
+          <Component_mapLoc>
+            <MapLoc>
+              <MapLoc_asnFrom>5246027</MapLoc_asnFrom>
+              <MapLoc_asnTo>5246027</MapLoc_asnTo>
+              <MapLoc_locType value="exact"/>
+              <MapLoc_alnQuality>0.919897</MapLoc_alnQuality>
+              <MapLoc_orient value="reverse"/>
+              <MapLoc_physMapStr>24180013</MapLoc_physMapStr>
+              <MapLoc_physMapInt>24180012</MapLoc_physMapInt>
+              <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+              <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+             
+<MapLoc_leftContigNeighborPos>5246026</MapLoc_leftContigNeighborPos>
+             
+<MapLoc_rightContigNeighborPos>5246028</MapLoc_rightContigNeighborPos>
+              <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+              <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+              <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+              <MapLoc_fxnSet>
+                <FxnSet>
+                  <FxnSet_geneId>1416</FxnSet_geneId>
+                  <FxnSet_symbol>CRYBB2P1</FxnSet_symbol>
+                  <FxnSet_fxnClass value="locus-region"/>
+                </FxnSet>
+              </MapLoc_fxnSet>
+            </MapLoc>
+          </Component_mapLoc>
+        </Component>
+      </Assembly_component>
+      <Assembly_snpStat>
+        <Assembly_snpStat_mapWeight value="unique-in-contig"/>
+        <Assembly_snpStat_chromCount>1</Assembly_snpStat_chromCount>
+       
+<Assembly_snpStat_placedContigCount>1</Assembly_snpStat_placedContigCount>
+       
+<Assembly_snpStat_unplacedContigCount>0</Assembly_snpStat_unplacedContigCount>
+        <Assembly_snpStat_seqlocCount>1</Assembly_snpStat_seqlocCount>
+        <Assembly_snpStat_hapCount>1</Assembly_snpStat_hapCount>
+      </Assembly_snpStat>
+    </Assembly>
+  </Rs_assembly>
+  <Rs_primarySequence>
+    <PrimarySequence>
+      <PrimarySequence_dbSnpBuild>125</PrimarySequence_dbSnpBuild>
+      <PrimarySequence_gi>3171883</PrimarySequence_gi>
+      <PrimarySequence_source value="blastmb"/>
+      <PrimarySequence_mapLoc>
+        <MapLoc>
+          <MapLoc_asnFrom>24363</MapLoc_asnFrom>
+          <MapLoc_asnTo>24363</MapLoc_asnTo>
+          <MapLoc_locType value="exact"/>
+          <MapLoc_alnQuality>0.913983</MapLoc_alnQuality>
+          <MapLoc_orient value="reverse"/>
+          <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+          <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+          <MapLoc_leftContigNeighborPos>24362</MapLoc_leftContigNeighborPos>
+          <MapLoc_rightContigNeighborPos>24364</MapLoc_rightContigNeighborPos>
+          <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+          <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+          <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+        </MapLoc>
+      </PrimarySequence_mapLoc>
+    </PrimarySequence>
+    <PrimarySequence>
+      <PrimarySequence_dbSnpBuild>125</PrimarySequence_dbSnpBuild>
+      <PrimarySequence_gi>9863690</PrimarySequence_gi>
+      <PrimarySequence_source value="blastmb"/>
+      <PrimarySequence_mapLoc>
+        <MapLoc>
+          <MapLoc_asnFrom>52075</MapLoc_asnFrom>
+          <MapLoc_asnTo>52075</MapLoc_asnTo>
+          <MapLoc_locType value="exact"/>
+          <MapLoc_alnQuality>0.913983</MapLoc_alnQuality>
+          <MapLoc_orient value="forward"/>
+          <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+          <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+          <MapLoc_leftContigNeighborPos>52074</MapLoc_leftContigNeighborPos>
+          <MapLoc_rightContigNeighborPos>52076</MapLoc_rightContigNeighborPos>
+          <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+          <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+          <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+        </MapLoc>
+      </PrimarySequence_mapLoc>
+    </PrimarySequence>
+    <PrimarySequence>
+      <PrimarySequence_dbSnpBuild>125</PrimarySequence_dbSnpBuild>
+      <PrimarySequence_gi>51511751</PrimarySequence_gi>
+      <PrimarySequence_source value="blastmb"/>
+      <PrimarySequence_mapLoc>
+        <MapLoc>
+          <MapLoc_asnFrom>24180012</MapLoc_asnFrom>
+          <MapLoc_asnTo>24180012</MapLoc_asnTo>
+          <MapLoc_locType value="exact"/>
+          <MapLoc_alnQuality>0.913983</MapLoc_alnQuality>
+          <MapLoc_orient value="reverse"/>
+          <MapLoc_leftFlankNeighborPos>99</MapLoc_leftFlankNeighborPos>
+          <MapLoc_rightFlankNeighborPos>101</MapLoc_rightFlankNeighborPos>
+          <MapLoc_leftContigNeighborPos>24180011</MapLoc_leftContigNeighborPos>
+         
+<MapLoc_rightContigNeighborPos>24180013</MapLoc_rightContigNeighborPos>
+          <MapLoc_numberOfMismatches>3</MapLoc_numberOfMismatches>
+          <MapLoc_numberOfDeletions>0</MapLoc_numberOfDeletions>
+          <MapLoc_numberOfInsertions>1</MapLoc_numberOfInsertions>
+        </MapLoc>
+      </PrimarySequence_mapLoc>
+    </PrimarySequence>
+  </Rs_primarySequence>
+  <Rs_rsLinkout>
+    <RsLinkout>
+      <RsLinkout_resourceId>1</RsLinkout_resourceId>
+      <RsLinkout_linkValue>12345</RsLinkout_linkValue>
+    </RsLinkout>
+  </Rs_rsLinkout>
+</Rs>
+
+
+
+
+</ExchangeSet>
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/M0.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/M0.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/M0.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,159 @@
+CODONML (in paml 3.15, November 2005)    test.phy   Model: One dN/dS ratio 
+Codon frequencies: F3x4
+Site-class models: 
+ns =   3  ls =   6
+
+Codon usage in sequences
+--------------------------------------------------------------------------
+Phe TTT  1  1  1 | Ser TCT  1  1  0 | Tyr TAT  0  0  0 | Cys TGT  0  0  0
+    TTC  0  0  0 |     TCC  0  0  1 |     TAC  0  0  0 |     TGC  0  0  0
+Leu TTA  0  0  0 |     TCA  0  0  0 | *** TAA  0  0  0 | *** TGA  0  0  0
+    TTG  0  0  0 |     TCG  0  0  0 |     TAG  0  0  0 | Trp TGG  0  0  0
+--------------------------------------------------------------------------
+Leu CTT  0  0  0 | Pro CCT  0  0  0 | His CAT  1  1  1 | Arg CGT  0  0  0
+    CTC  0  0  0 |     CCC  0  0  1 |     CAC  0  0  0 |     CGC  0  0  0
+    CTA  0  0  0 |     CCA  1  1  0 | Gln CAA  0  0  0 |     CGA  0  0  0
+    CTG  0  0  0 |     CCG  0  0  0 |     CAG  0  0  0 |     CGG  0  0  0
+--------------------------------------------------------------------------
+Ile ATT  0  0  0 | Thr ACT  0  0  0 | Asn AAT  0  0  0 | Ser AGT  0  0  0
+    ATC  0  0  0 |     ACC  0  0  0 |     AAC  0  0  0 |     AGC  0  0  0
+    ATA  0  0  0 |     ACA  0  0  0 | Lys AAA  0  0  0 | Arg AGA  0  0  0
+Met ATG  2  1  1 |     ACG  0  1  1 |     AAG  0  0  0 |     AGG  0  0  0
+--------------------------------------------------------------------------
+Val GTT  0  0  0 | Ala GCT  0  0  0 | Asp GAT  0  0  0 | Gly GGT  0  0  0
+    GTC  0  0  0 |     GCC  0  0  0 |     GAC  0  0  0 |     GGC  0  0  0
+    GTA  0  0  0 |     GCA  0  0  0 | Glu GAA  0  0  0 |     GGA  0  0  0
+    GTG  0  0  0 |     GCG  0  0  0 |     GAG  0  0  0 |     GGG  0  0  0
+--------------------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: test0          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.50000    C:0.33333    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#2: test1          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#3: test2          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.33333    C:0.33333    A:0.00000    G:0.33333
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT       3 | Ser S TCT       2 | Tyr Y TAT       0 | Cys C TGT       0
+      TTC       0 |       TCC       1 |       TAC       0 |       TGC       0
+Leu L TTA       0 |       TCA       0 | *** * TAA       0 | *** * TGA       0
+      TTG       0 |       TCG       0 |       TAG       0 | Trp W TGG       0
+------------------------------------------------------------------------------
+Leu L CTT       0 | Pro P CCT       0 | His H CAT       3 | Arg R CGT       0
+      CTC       0 |       CCC       1 |       CAC       0 |       CGC       0
+      CTA       0 |       CCA       2 | Gln Q CAA       0 |       CGA       0
+      CTG       0 |       CCG       0 |       CAG       0 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT       0 | Thr T ACT       0 | Asn N AAT       0 | Ser S AGT       0
+      ATC       0 |       ACC       0 |       AAC       0 |       AGC       0
+      ATA       0 |       ACA       0 | Lys K AAA       0 | Arg R AGA       0
+Met M ATG       4 |       ACG       2 |       AAG       0 |       AGG       0
+------------------------------------------------------------------------------
+Val V GTT       0 | Ala A GCT       0 | Asp D GAT       0 | Gly G GGT       0
+      GTC       0 |       GCC       0 |       GAC       0 |       GGC       0
+      GTA       0 |       GCA       0 | Glu E GAA       0 |       GGA       0
+      GTG       0 |       GCG       0 |       GAG       0 |       GGG       0
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.38889    C:0.44444    A:0.16667    G:0.00000
+position  3:    T:0.44444    C:0.11111    A:0.11111    G:0.33333
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+test0               
+test1               -1.0000 (0.0706 0.0000)
+test2                0.0510 (0.0706 1.3844) 0.0000 (0.0000 0.9745)
+
+
+Model 0: one-ratio
+
+
+TREE #  1:  (1, 2, 3);   MP score: 3
+lnL(ntime:  3  np:  5):    -30.819156     +0.000000
+   4..1     4..2     4..3  
+  0.25573  0.00000  0.62424  5.28487  0.09213
+
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.87997
+
+(1: 0.255727, 2: 0.000004, 3: 0.624239);
+
+(test0: 0.255727, test1: 0.000004, test2: 0.624239);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) =  5.28487
+
+omega (dN/dS) =  0.09213
+
+dN & dS for each branch
+
+ branch           t        N        S    dN/dS       dN       dS   N*dN   S*dS
+
+   4..1       0.256     12.9      5.1   0.0921   0.0224   0.2429    0.3    1.2
+   4..2       0.000     12.9      5.1   0.0921   0.0000   0.0000    0.0    0.0
+   4..3       0.624     12.9      5.1   0.0921   0.0546   0.5930    0.7    3.0
+
+tree length for dN:      0.07702
+tree length for dS:      0.83594
+
+
+Time used:  0:00
+
+
+Model 1: NearlyNeutral (2 categories)
+
+
+TREE #  1:  (1, 2, 3);   MP score: 3
+lnL(ntime:  3  np:  6):    -30.819157     +0.000000
+   4..1     4..2     4..3  
+  0.25573  0.00000  0.62424  5.28488  1.00000  0.09213
+
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.87997
+
+(1: 0.255727, 2: 0.000004, 3: 0.624240);
+
+(test0: 0.255727, test1: 0.000004, test2: 0.624240);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) =  5.28488
+
+
+dN/dS for site classes (K=2)
+
+p:   1.00000  0.00000
+w:   0.09213  1.00000
+
+dN & dS for each branch
+
+ branch           t        N        S    dN/dS       dN       dS   N*dN   S*dS
+
+   4..1       0.256     12.9      5.1   0.0921   0.0224   0.2429    0.3    1.2
+   4..2       0.000     12.9      5.1   0.0921   0.0000   0.0000    0.0    0.0
+   4..3       0.624     12.9      5.1   0.0921   0.0546   0.5930    0.7    3.0
+
+
+Naive Empirical Bayes (NEB) analysis
+Time used:  0:02

Added: trunk/packages/bioperl/branches/upstream/current/t/data/MSGEFTUA.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/MSGEFTUA.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/MSGEFTUA.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,103 @@
+LOCUS       MSGEFTUA                1500 bp    DNA     linear   BCT 18-JUN-1999
+DEFINITION  Mycobacterium leprae Tuf gene for elongation factor Tu, complete
+            cds.
+ACCESSION   D13869
+VERSION     D13869.1  GI:434782
+KEYWORDS    EF-Tu; elongation factor Tu.
+SOURCE      Mycobacterium leprae
+  ORGANISM  Mycobacterium leprae
+            Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales;
+            Corynebacterineae; Mycobacteriaceae; Mycobacterium.
+REFERENCE   1  (bases 1 to 1500)
+  AUTHORS   Dhandayuthapani,S., Banu,M.J. and Kashiwabara,Y.
+  TITLE     Cloning and sequence determination of the gene coding for the
+            elongation factor Tu of Mycobacterium leprae
+  JOURNAL   J. Biochem. 115 (4), 664-669 (1994)
+   PUBMED   8089081
+COMMENT     Submitted (07-DEC-1992) to DDBJ by:
+            Subranamian Dhandayuthapani
+            National Institute for Leprosy Research
+            4-2-1 Aobacho, Higashimurayama-shi
+            Tokyo 189
+            Japan
+            Phone:  0423-91-8211
+            Fax:    0423-94-9092.
+FEATURES             Location/Qualifiers
+     source          1..1500
+                     /organism="Mycobacterium leprae"
+                     /mol_type="genomic DNA"
+                     /isolate="Thai 53"
+                     /db_xref="taxon:1769"
+                     /clone_lib="lambda gt11"
+     repeat_region   20..71
+                     /rpt_type=direct
+                     /rpt_unit="20..29"
+                     /rpt_unit="62..71"
+     repeat_region   90..231
+                     /rpt_type=direct
+                     /rpt_unit="90..99"
+                     /rpt_unit="222..231"
+     RBS             205..210
+                     /standard_name="Shine-Dalgarno sequence"
+     gene            219..1409
+                     /gene="Tuf"
+     CDS             219..1409
+                     /gene="Tuf"
+                     /function="peptide chain elongtion during protein
+                     synthesis"
+                     /standard_name="EF-Tu"
+                     /note="start codon gtg"
+                     /codon_start=1
+                     /evidence=not_experimental
+                     /transl_table=11
+                     /product="elongation factor Tu"
+                     /protein_id="BAA02982.2"
+                     /db_xref="GI:5103687"
+                     /translation="MAKAKFERTKPHVNIGTIGHVDHGKTTLTAAITKVLHDKFPNLN
+                     ESRAFDQIDNAPEERQRGITINISHVEYQTEKRHYAHVDAPGHADYIKNMITGAAQMD
+                     GAILVVAATDGPMPQTREHVLLARQVGVPYILVALNKSDAVDDEELLELVEMEVRELL
+                     AAQEFDEDAPVVRVSALKALEGDAKWVESVTQLMDAVDESIPAPVRETDKPFLMPVED
+                     VFTITGRGTVVTGRVERGVVNVNEEVEIVGIRQTTTKTTVTGVEMFRKLLDQGQAGDN
+                     VGLLLRGIKREDVERGQVVIKPGTTTPHTEFEGQVYILSKDEGGRHTPFFNNYRPQFY
+                     FRTTDVTGVVTLPEGTEMVMPGDNTNISVTLIQPVAMDEGLRFAIREGGPTVGAGRVV
+                     KIIK"
+     repeat_region   465..729
+                     /rpt_type=direct
+                     /rpt_unit="465..474"
+                     /rpt_unit="720..729"
+     repeat_region   883..913
+                     /rpt_type=direct
+                     /rpt_unit="883..892"
+                     /rpt_unit="904..913"
+     repeat_region   1169..1369
+                     /rpt_type=direct
+                     /rpt_unit="1169..1180"
+                     /rpt_unit="1358..1369"
+ORIGIN      
+        1 gctcggtcca agacgcaagg ccggcgaact actccatggt gttcaactcg tactccgaag
+       61 tgccggcgaa cgtgtcgaag gagatcatcg cgaaggcgac gggcgagtag gagttgagtg
+      121 gtcggaaaag cctacgagtt atgctgacgc ggtcactacc gcggataaat tactaacatt
+      181 gctttttaca agcaccaata gtccaggagg acacagaagt ggcgaaggcg aagttcgagc
+      241 ggacgaagcc gcacgtcaac atcgggacca tcggtcacgt tgaccacggc aagaccacat
+      301 tgaccgcggc tattaccaag gtcctgcatg acaagttccc caacctgaat gagtcgcgcg
+      361 cctttgacca gattgacaac gcgcccgagg agcgtcagcg cggtatcacc atcaacattt
+      421 cccacgtgga gtatcagacc gagaagcgtc actatgctca cgtcgacgcc ccggggcacg
+      481 ccgactacat taagaacatg atcaccggtg cggcccagat ggatggtgcg attctggtgg
+      541 tcgctgctac agacggcccg atgccgcaga ctcgcgagca cgtgctgctc gctcgtcagg
+      601 tgggtgtacc ttacatcctg gtcgcactta acaagtccga cgccgtggac gacgaggaac
+      661 tactcgagct tgtcgagatg gaagtccgtg agttgctggc tgcccaggaa ttcgacgagg
+      721 acgccccggt tgtgcgtgtc tcggcattga aggcgctcga gggtgacgcc aagtgggtcg
+      781 agtctgtcac acagttgatg gacgctgtcg acgagtcgat cccggctcca gtccgcgaga
+      841 ctgacaagcc gttcctgatg cccgttgagg acgtcttcac tatcaccggt cgtggcaccg
+      901 tggtcaccgg tcgggtggag cgcggcgtgg tcaacgtgaa cgaggaagtt gagatcgtcg
+      961 gcattcgtca gacgaccacc aagaccaccg tcacgggtgt ggaaatgttt cgtaagctgc
+     1021 tcgaccaagg tcaggccggt gataacgttg gtctgttgtt gcgtggcatc aagcgcgagg
+     1081 acgtcgagcg tggtcaggtg gtcatcaaac ccggcaccac caccccgcat accgagttcg
+     1141 aaggccaagt atacatcctg tccaaggacg aaggtggtcg gcacacaccg ttcttcaaca
+     1201 actaccgtcc gcagttctac ttccgtacca ctgatgtgac cggtgtggtg actttgccgg
+     1261 agggcaccga gatggtgatg ccaggtgaca acaccaacat ctcggtgacg ttgattcagc
+     1321 ccgttgccat ggacgagggt ttgcgcttcg cgatccgcga aggtggtccg actgttggtg
+     1381 ccggccgggt cgtcaagatc atcaagtaag tcgcatcaac gggccaggtg gttacaacgg
+     1441 tccacatttg cgaaacggcg ctcatccgaa aggataggcg cgtttctttt gagtttatgc
+//
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/MSGEFTUA.gb
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Mcjanrna_rdbII.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Mcjanrna_rdbII.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Mcjanrna_rdbII.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,47 @@
+LOCUS       Mc.janrrnA   1475 bp    RNA             RNA       09-NOV-1998
+DEFINITION  Methanococcus jannaschii [gene=rrnA gene].
+REFERENCE   1
+  AUTHORS   Bult,C.J., White,O., Olsen,G.J., Zhou,L., Fleischmann,R.D.,
+            Sutton,G.G., Blake,J.A., FitzGerald,L.M., Clayton,R.A.,
+            Gocayne,J.D., Kerlavage,A.R., Dougherty,B.A., Tomb,J., Adams,M.D.,
+            Reich,C.I., Overbeek,R., Kirkness,E.F., Weinstock,K.G.,
+            Merrick,J.M., Glodek,A., Scott,J.D., Geoghagen,N.S., Weidman,J.F.,
+            Fuhrmann,J.L., Nguyen,D.T., Utterback,T., Kelley,J.M.,
+            Peterson,J.D., Sadow,P.W., Hanna,M.C., Cotton,M.D., Hurst,M.A.,
+            Roberts,K.M., Kaine,B.P., Borodovsky,M., Klenk,H.P., Fraser,C.M.,
+            Smith,H.O., Woese,C.R. and Venter,J.C.
+  TITLE     Complete genome sequence of the methanogenic archaeon,
+            Methanococcus jannaschii
+  JOURNAL   Science 273 (5278), 1058-1073 (1996)
+COMMENT     
+            Corresponding GenBank entry: L77117 (bases 157985 to 159459)
+              complement
+            operon= rrnA gene
+BASE COUNT      295 a    422 c    527 g    231 u   2746 others
+ORIGIN
+        1 AUUCCGGUUG AUCCUGCCGG AGGCCACUGC UAUCGGGGUC CGACUAAGCC AUGCGAGUCA
+       61 AGGGGCUCCC UUCGGGGAGC ACCGGCGCAC GGCUCAGUAA CACGUGGCUA ACCUACCCUC
+      121 GGGUGGGGGA UAACCUCGGG AAACUGAGGC UAAUCCCCCA UAGGGGAGGA GGUCUGGAAU
+      181 GAUCCCUCCC CGAAAGGCGU AAGCUGCCCG AGGAUGGGGC UGCGGCGGAU UAGGUAGUUG
+      241 GUGGGGUAAC GGCCCACCAA GCCUACGAUC CGUACGGGCC CUGAGAGGGG GAGCCCGGAG
+      301 AUGGACACUG AGACACGGGU CCAGGCCUAC GGGGCGCAGC AGGCGCGAAA CCUCCGCAAU
+      361 GCGCGAAAGC GCGACGGGGG GACCCCGAGU GCCCACGCCC UGCGUGGGCU UUUCCGGAGU
+      421 GUAAACAGCU CCGGGAAUAA GGGCUGGGCA AGUCCGGUGC CAGCAGCCGC GGUAAUACCG
+      481 GCGGCCCAAG UGGUGGCCAC UGUUAUUGGG CCUAAAGCGU CCGUAGCCGG CCCGGUAAGU
+      541 CUCUGCUUAA AUCUGCGGCU CAACCGCAGG GCUGGCAGAG AUACUGCCGG GCUUGGGACC
+      601 GGGAGAGGCC GGGGGUACCC CAGGGGUAGC GGUGAAAUGC GUUGAUCCCU GGGGGACCAC
+      661 CUGUGGCGAA GGCGCCCGGC UGGAACGGGU CCGACGGUGA GGGACGAAGG CCAGGGGAGC
+      721 AAACCGGAUU AGAUACCCGG GUAGUCCUGG CUGUAAACUC UGCGGACUAG GUGUCGCGUC
+      781 GGCUUCGGGC CGACGCGGUG CCGAAGGGAA GCCGUUAAGU CCGCCGCCUG GGGAGUACGG
+      841 UCGCAAGACU GAAACUUAAA GGAAUUGGCG GGGGAGCACU ACAACGGGUG GAGCCUGCGG
+      901 UUUAAUUGGA UUCAACGCCG GGCAUCUUAC CAGGGGCGAC GGCAGGAUGA AGGCCAGGUU
+      961 GACGACCUUG CCAGACGCGC CGAGAGGUGG UGCAUGGCCG UCGUCAGCUC GUACCGUGAG
+     1021 GCGUCCUGUU AAGUCAGGUA ACGAGCGAGA CCCGUGCCCC AUGUUGCUAC CUCCUCCUCC
+     1081 GGGAGGAGGG CACUCAUGGG GGACCGCCGG CGCUAAGCCG GAGGAAGGUG CGGGCAACGA
+     1141 CAGGUCCGCA UGCCCCGAAU CCCCUGGGCU ACACGCGGGC UACAAUGGCC GGGACAAUGG
+     1201 GACGCGACCC CGAAAGGGGG AGCGAAUCCC CUAAACCCGG UCGUAGUCCG GAUCGAGGGC
+     1261 UGUAACUCGC CCUCGUGAAG CCGGAAUCCG UAGUAAUCGC GCCUCACCAU GGCGCGGUGA
+     1321 AUGCGUCCCU GCUCCUUGCA CACACCGCCC GUCACGCCAC CCGAGUUGAG CCCAAGUGAG
+     1381 GCCCUGUCCG CAAGGGCAGG GUCGAACUUG GGUUCAGCGA GGGGGGCGAA GUCGUAACAA
+     1441 GGUAGCCGUA GGGGAACUGC GGCUGGAUCA CCUCC
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/MmCT
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/MmCT	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/MmCT	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,93 @@
+Codon usage table
+
+Pan troglodytes [gbpri]  325  CDS's
+
+AmAcid   Codon             Number       /1000    Fraction
+
+Pro      CCC              2351.00       22.55        0.41
+Pro      CCT              1234.00       11.84        0.22
+Pro      CCA              1158.00       11.11        0.20
+Pro      CCG               923.00        8.85        0.16
+
+Leu      CTC              2553.00       24.49        0.24
+Leu      CTT               985.00        9.45        0.09
+Leu      CTA               802.00        7.69        0.08
+Leu      CTG              4997.00       47.93        0.48
+
+His      CAC              1745.00       16.74        0.63
+His      CAT              1041.00        9.99        0.37
+Gln      CAA               925.00        8.87        0.17
+Gln      CAG              4576.00       43.90        0.83
+
+Arg      CGC              1580.00       15.16        0.24
+Arg      CGT               397.00        3.81        0.06
+Arg      CGA               592.00        5.68        0.09
+Arg      CGG              1208.00       11.59        0.19
+
+Ser      TCC              2158.00       20.70        0.27
+Ser      TCT              1651.00       15.84        0.21
+Ser      TCA               923.00        8.85        0.12
+Ser      TCG               417.00        4.00        0.05
+
+Phe      TTC              2447.00       23.47        0.70
+Phe      TTT              1025.00        9.83        0.30
+Leu      TTA               423.00        4.06        0.04
+Leu      TTG               733.00        7.03        0.07
+
+Tyr      TAC              2585.00       24.80        0.69
+Tyr      TAT              1153.00       11.06        0.31
+Ter      TAA                68.00        0.65        0.17
+Ter      TAG                69.00        0.66        0.17
+
+Cys      TGC              1448.00       13.89        0.63
+Cys      TGT               851.00        8.16        0.37
+Ter      TGA               275.00        2.64        0.67
+Trp      TGG              2178.00       20.89        1.00
+
+Thr      ACC              3341.00       32.05        0.50
+Thr      ACT              1121.00       10.75        0.17
+Thr      ACA              1452.00       13.93        0.22
+Thr      ACG               821.00        7.88        0.12
+
+Ile      ATC              2742.00       26.30        0.62
+Ile      ATT              1065.00       10.22        0.24
+Ile      ATA               643.00        6.17        0.14
+Met      ATG              2152.00       20.64        1.00
+
+Asn      AAC              1941.00       18.62        0.67
+Asn      AAT               949.00        9.10        0.33
+Lys      AAA              1276.00       12.24        0.32
+Lys      AAG              2707.00       25.97        0.68
+
+Ser      AGC              1872.00       17.96        0.24
+Ser      AGT               828.00        7.94        0.11
+Arg      AGA              1250.00       11.99        0.19
+Arg      AGG              1467.00       14.07        0.23
+
+Ala      GCC              3337.00       32.01        0.39
+Ala      GCT              2211.00       21.21        0.26
+Ala      GCA              1374.00       13.18        0.16
+Ala      GCG              1671.00       16.03        0.19
+
+Val      GTC              1610.00       15.44        0.24
+Val      GTT               774.00        7.42        0.12
+Val      GTA               397.00        3.81        0.06
+Val      GTG              3873.00       37.15        0.58
+
+Asp      GAC              3294.00       31.60        0.70
+Asp      GAT              1427.00       13.69        0.30
+Glu      GAA              1326.00       12.72        0.22
+Glu      GAG              4666.00       44.76        0.78
+
+Gly      GGC              2639.00       25.31        0.37
+Gly      GGT               782.00        7.50        0.11
+Gly      GGA              1562.00       14.98        0.22
+Gly      GGG              2206.00       21.16        0.31
+
+
+
+Coding GC 57.49%
+1st letter GC 57.76%
+2nd letter GC 45.35%
+3rd letter GC 45.35%
+Genetic code 1

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NC_001284.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NC_001284.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NC_001284.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9175 @@
+LOCUS       NC_001284             366924 bp    DNA     circular PLN 16-OCT-2003
+DEFINITION  Arabidopsis thaliana mitochondrion, complete genome.
+ACCESSION   NC_001284
+VERSION     NC_001284.2  GI:26556996
+KEYWORDS    .
+SOURCE      mitochondrion Arabidopsis thaliana (thale cress)
+  ORGANISM  Arabidopsis thaliana
+            Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
+            Spermatophyta; Magnoliophyta; eudicotyledons; core eudicots;
+            rosids; eurosids II; Brassicales; Brassicaceae; Arabidopsis.
+REFERENCE   1
+  AUTHORS   Giege,P. and Brennicke,A.
+  TITLE     RNA editing in Arabidopsis mitochondria effects 441 C to U changes
+            in ORFs
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 96 (26), 15324-15329 (1999)
+  MEDLINE   20079652
+   PUBMED   10611383
+REFERENCE   2  (bases 1 to 366924)
+  AUTHORS   Unseld,M., Marienfeld,J.R., Brandt,P. and Brennicke,A.
+  TITLE     The mitochondrial genome of Arabidopsis thaliana contains 57 genes
+            in 366,924 nucleotides
+  JOURNAL   Nat. Genet. 15 (1), 57-61 (1997)
+  MEDLINE   97141919
+   PUBMED   8988169
+REFERENCE   3  (bases 1 to 366924)
+  AUTHORS   Marienfeld,J., Unseld,M., Brandt,P. and Brennicke,A.
+  TITLE     Genomic recombination of the mitochondrial atp6 gene in Arabidopsis
+            thaliana at the protein processing site creates two different
+            presequences
+  JOURNAL   DNA Res. 3 (5), 287-290 (1996)
+  MEDLINE   97191539
+   PUBMED   9039497
+REFERENCE   4  (bases 1 to 366924)
+  AUTHORS   Marienfeld,J.R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (30-SEP-1996) J.R. Marienfeld, Institut fuer
+            Genbiologische Forschung GmbH, Ihnestrasse 63, 14195 Berlin, FRG
+COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final
+            NCBI review. The reference sequence was derived from Y08501 and
+            Y08502.
+            On Dec 12, 2002 this sequence version replaced gi:13449290.
+FEATURES             Location/Qualifiers
+     source          1..366924
+                     /organism="Arabidopsis thaliana"
+                     /organelle="mitochondrion"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:3702"
+                     /sub_clone="pUC19"
+                     /sub_clone="pUC18"
+                     /sub_clone="pBluescript SK"
+                     /sub_clone="pBluescript KS"
+                     /clone_lib="Lorist X"
+     CDS             complement(join(327890..328078,329735..330306,
+                     332945..333105,79740..80132,81113..81297))
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="trans-splicing"
+                     /label=nad2_cds
+                     /product="NADH dehydrogenase subunit 2"
+                     /protein_id="NP_085584.1"
+                     /db_xref="GI:13449314"
+                     /db_xref="UniProt/TrEMBL:O05000"
+                     /translation="MKAEFVRILPHMFNLFLAVSPEIFIINATSILLIHGVVFSTSKK
+                     YDYPPLASNVGWLGLLSVLITLLLLAAGAPLLTIAHLFWNNLFRRDNFTYFCQIFLLL
+                     STAGTISMCFDSSDQERFDAFEFIVLIPLPTRGMLFMISAHDLIAMYLAIEPQSLCFY
+                     VIAASKRKSEFSTEAGSKYLILGAFSSGILLFGCSMIYGSTGATHFDQLAKILTGYEI
+                     TGARSSGIFMGILSIAVGFLFKITAVPFHMWAPDIYEGSPTPVTAFLSIAPKISISAN
+                     ILRVSIYGSYGATLQQIFFFCSIASMILGALAAMAQTKVKRPLAHSSIGHVGYIRTGF
+                     SCGTIEGIQSLLIGIFIYALMTMDAFAIVSALRQTRVKYIADLGALAKTNPISAITFS
+                     ITMFSYAGIPPLAGFCSKFYLFFAALGCGAYFLAPVGVVTSVIGRFYYIRLVKRMFFD
+                     TPRTWILYEPMDRNKSLLLAMTSFFITSSLLYPSPLFSVTHQMALSSYL"
+     CDS             complement(273..734)
+                     /note="orf153a"
+                     /codon_start=1
+                     /protein_id="NP_085474.1"
+                     /db_xref="GI:13449291"
+                     /db_xref="UniProt/TrEMBL:P93275"
+                     /translation="MSLLFQQTVPLSHLHRSLDPPLCFRTHILLILLLLSRHLPGFTG
+                     SDCESADPSIVSAIAPGTATTSERDCPVRTAGSDPVPIGDSGTFFDVGTAAPELLSPN
+                     RHHMITRAKDGIRKPNPRYNLFTQKYTPSEPKTITSASQDGDKLCKKRCRH"
+     promoter        complement(2485..2493)
+                     /note="orf153a"
+                     /evidence=not_experimental
+     gene            complement(8848..11415)
+                     /gene="rrn26"
+                     /db_xref="GeneID:814566"
+     rRNA            complement(8848..11415)
+                     /gene="rrn26"
+                     /product="26S ribosomal RNA protein"
+                     /db_xref="GeneID:814566"
+     CDS             11918..12241
+                     /note="orf107a"
+                     /codon_start=1
+                     /protein_id="NP_085475.1"
+                     /db_xref="GI:13449292"
+                     /db_xref="UniProt/TrEMBL:P93276"
+                     /translation="MFQFAKFSKSKERRLATELGYGFPIGDPWITDGISPWPFASESV
+                     LPSQCPGIHPMHSFRSCTQGTLNTTKISMKLTISDCGFEPLTEGFTVLHSTRATTCYH
+                     FLFNS"
+     CDS             complement(16844..17791)
+                     /note="orf315"
+                     /codon_start=1
+                     /protein_id="NP_085476.1"
+                     /db_xref="GI:13449293"
+                     /db_xref="UniProt/TrEMBL:P93278"
+                     /translation="MTKREYNSQPEMLEGAKSIGAGAATIASAGAAIGIGNVFSSLIH
+                     SVARNPSLATTTVLVVTLTLLGGVAAFYLHSFRLKGPLKKIIYLFLVFFIAVGISLIR
+                     IKAIHLLGLALPLLVPPLVWNAIGGGGEALPSTGPNGASSYSEWFTYTSDLEDSASSG
+                     RTSSSVNQPIQREQAGPSNALPEPAASPVAQQQDHLDQPFGEGGEREARAQEHDRISA
+                     EVETITSACENLEAAMVRKAHILLHQRGVTLGDPEDVKRALQLALHDDWEHDIDDRKR
+                     HFTVLRRDFGTARCERWNPFIDELRGLGNRQVNARHYVD"
+     CDS             16856..17251
+                     /note="orf131"
+                     /codon_start=1
+                     /protein_id="NP_085477.1"
+                     /db_xref="GI:13449294"
+                     /db_xref="UniProt/TrEMBL:P93277"
+                     /translation="MSGVYLTVPQAPELINERIPSFTASCSEVAPEHSEMPLTVIYIV
+                     LPVVMESQLESTLDILWIPESYSTLMQENMGFPYHGRLQILAGARDSLYLGRDAVVLL
+                     CPSLPFPTFTKWLIQVILLLSYWGSGGFG"
+     gene            complement(20571..333105)
+                     /gene="nad5"
+                     /db_xref="GeneID:814567"
+     mRNA            complement(join(20571..20717,21692..22086,190740..190761,
+                     140724..141939,142769..142998))
+                     /gene="nad5"
+                     /note="trans-splicing, RNA editing"
+                     /db_xref="GeneID:814567"
+     CDS             complement(join(20571..20717,21692..22086,190740..190761,
+                     140724..141939,142769..142998))
+                     /gene="nad5"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="trans-splicing, RNA editing"
+                     /product="NADH dehydrogenase subunit 5"
+                     /protein_id="NP_085478.1"
+                     /db_xref="GI:13449295"
+                     /db_xref="UniProt/Swiss-Prot:P29388"
+                     /db_xref="GeneID:814567"
+                     /translation="MYLLIVFLPLLGSSVAGFFGRFLGSEGSAIMTTTCVSFSSILSL
+                     IAFYEVAPGASACYLRIAPWISSEMFDASWGFLFDSPTVVMLIVVTSISSLVHLYSIS
+                     YMSEDPHSPRFMCYLSILTFFMPMLVTGDNSLQLFLGWEGVGLASYLLIHFWFTRLQA
+                     DKAATKAMLVNRVGDFGLALGISGRFTLFQTVDFSTIFARASAPRNSWISCNMRLNAI
+                     SLICILLLIGAVGKSAQIGSHTWSPDAMEGPTPVSASIHAATMVTAGVFMIARCSPLF
+                     EYPPTALIVITSAGATTSFLAATTGILQNDLKRVIAYSTCSQLGYMIFACGISNYSVS
+                     VFHLMNHAFFKALLFLSAGSVIHAMSDEQDMRKMGGLASSFPLTYAMMLIGSLSLIGF
+                     PFLTGFYSKDVILELAYTKYTISGNFAFWLGSVSVLFTSYYSFRLLFLTFLVPTNSFG
+                     RDISRCHDAPIPMAIPLILLALGSLFVGYLAKDMMIGLGTNFWANSPLVLPKNEILAE
+                     SEFAAPTITKLIPILFSTSGAFVAYNVNPVADQFQRAFQTSTFCNRLYSFFNKRWFFD
+                     QVLNDFLVRSFLRFGYEVSFEALDKGAIEILGPYGISYTFRRLAERISQLQSGFVYHY
+                     AFAMLLGSTLFVTFSRMWDSLSSWVDNRSSFILIVSSFYTKSSQE"
+     exon            complement(20571..20717)
+                     /gene="nad5"
+                     /number=5
+     misc_feature    20623
+                     /note="C to U RNA editing"
+     misc_feature    20652
+                     /note="C to U RNA editing"
+     misc_feature    20663
+                     /note="C to U RNA editing"
+     misc_feature    20665
+                     /note="C to U RNA editing"
+     misc_feature    20686
+                     /note="C to U RNA editing"
+     intron          complement(20718..21691)
+                     /gene="nad5"
+                     /number=4
+     exon            complement(21692..22086)
+                     /gene="nad5"
+                     /number=4
+     misc_feature    21975
+                     /note="C to U RNA editing"
+     misc_feature    22005
+                     /note="C to U RNA editing"
+     misc_feature    22065
+                     /note="C to U RNA editing"
+     intron          complement(22087..190739)
+                     /gene="nad5"
+                     /note="transpliced intron"
+                     /number=3
+     gene            complement(23663..24235)
+                     /gene="nad9"
+                     /db_xref="GeneID:814568"
+     CDS             complement(23663..24235)
+                     /gene="nad9"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 9"
+                     /protein_id="NP_085479.1"
+                     /db_xref="GI:13449296"
+                     /db_xref="UniProt/Swiss-Prot:Q95748"
+                     /db_xref="GeneID:814568"
+                     /translation="MDNQFIFKYSWETLPKKWVKKMERSEHGNRSDTNTDYLFQLLCF
+                     LKLHTYTRVQVSIDICGVDHPSRKRRFEVVYNLLSTRYNSRIRVQTSADEVTRISPVV
+                     SLFPSAGRWEREVWDMFGVSFINHPDLRRISTDYGFEGHPLRKDLPLSGYVQVRYDDP
+                     EKRVVSEPIEMTQEFRYFDFASPWEQRSDG"
+     misc_feature    23797
+                     /note="C to U RNA editing"
+     misc_feature    23838
+                     /note="C to U RNA editing"
+     misc_feature    23908
+                     /note="C to U RNA editing"
+     misc_feature    23938
+                     /note="C to U RNA editing"
+     misc_feature    24046
+                     /note="C to U RNA editing"
+     misc_feature    24069
+                     /note="C to U RNA editing"
+     misc_feature    24144
+                     /note="C to U RNA editing"
+     misc_feature    24685
+                     /note="C to U RNA editing"
+     misc_feature    24693
+                     /note="C to U RNA editing"
+     misc_feature    24749
+                     /note="C to U RNA editing"
+     misc_feature    24801
+                     /note="C to U RNA editing"
+     misc_feature    25004
+                     /note="C to U RNA editing"
+     misc_feature    25075
+                     /note="C to U RNA editing"
+     gene            complement(25076..25615)
+                     /gene="rpl16"
+                     /db_xref="GeneID:814569"
+     CDS             complement(25076..25615)
+                     /gene="rpl16"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein L16"
+                     /protein_id="NP_085480.1"
+                     /db_xref="GI:13449297"
+                     /db_xref="UniProt/Swiss-Prot:Q95747"
+                     /db_xref="GeneID:814569"
+                     /translation="MYLTIKSIMLLRKYLLVTESQVSKCGFHIVKKKGDVLYPKRTKY
+                     SKYRKGRCSRGCKPDGTKLGFGRYGTKSCKAGRLSYRAIEAARRAIIGHFHRAMSGQF
+                     RRNGKIWVRVFADLPITGKPTEVRMGRGKGNPTGWIARVSTGQIPFEMDGVSLANARQ
+                     AATLAAHKPCSSTKFVQWS"
+     misc_feature    25104
+                     /note="C to U RNA editing"
+     misc_feature    25110
+                     /note="C to U RNA editing"
+     misc_feature    25176
+                     /note="C to U RNA editing"
+     misc_feature    25387
+                     /note="C to U RNA editing"
+     misc_feature    25407
+                     /note="C to U RNA editing"
+     gene            complement(25482..28733)
+                     /gene="rps3"
+                     /db_xref="GeneID:814570"
+     mRNA            complement(join(25482..27077,28659..28733))
+                     /gene="rps3"
+                     /db_xref="GeneID:814570"
+     CDS             complement(join(25482..27077,28659..28733))
+                     /gene="rps3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein S3"
+                     /protein_id="NP_085481.1"
+                     /db_xref="GI:13449298"
+                     /db_xref="UniProt/TrEMBL:Q95749"
+                     /db_xref="GeneID:814570"
+                     /translation="MARKGNPISVRLGKNRSSDSSRFSEYYYGKFVYQDVNLRSYFGS
+                     IRPPTRLTFGFRLGRCIILHFPKRTFIHFFLPRRPRRLKRREKTRPGKEKGRWWTTFG
+                     KAGPIECLHSSDDTEEERNEVRGRGARKRVESIRLDDRKKQNEIRGWPKKKQRYGYHD
+                     RLPSIKKNLSKSLRISGAFKHPKYAGVVNDIAFLIENDDSFKKTKLFKLFFQNKSRSD
+                     GPTSYLRTLPAVRPSLNFLVMQYFFNTKNQINFDPVVVLNHFVAPGAAEPSTMGRANA
+                     QGRSLQKRIRSRIAFFVESSTSEKKCLAEAKNRLTHFIRLANDLRFAGTTKTTISLFP
+                     FFGATFFFLRDGVGVYNNLDAREQLLNQLRVKCWNLVGKDKIMELIEKLKNLGGIEEL
+                     IKVIDMMIEIILRKRGIPYRYNSYFYEVKKMRSFLSNRTNTKTLIESVKIKSVYQSAS
+                     PIAQDISFQLKNKRRSFHSIFAKIVKEIPKGVEGIRICFSGRLKDAAEKAQTKCYKHR
+                     KTSRNVFNHKIDYAPAEVSTRYGISGVKVWISYSQKKGRRAISETYEI"
+     exon            complement(25482..27077)
+                     /gene="rps3"
+                     /number=2
+     misc_feature    25555
+                     /note="C to U RNA editing"
+     misc_feature    25573
+                     /note="C to U RNA editing"
+     misc_feature    25582
+                     /note="C to U RNA editing"
+     misc_feature    25619
+                     /note="C to U RNA editing"
+     misc_feature    25683
+                     /note="C to U RNA editing"
+     misc_feature    25801
+                     /note="C to U RNA editing"
+     misc_feature    25809
+                     /note="C to U RNA editing"
+     misc_feature    26266
+                     /note="C to U RNA editing"
+     misc_feature    26550
+                     /note="C to U RNA editing"
+     misc_feature    26638
+                     /note="C to U RNA editing"
+     misc_feature    26966
+                     /note="C to U RNA editing"
+     misc_feature    27027
+                     /note="C to U RNA editing"
+     intron          complement(27078..28658)
+                     /gene="rps3"
+                     /number=1
+     exon            complement(28659..28733)
+                     /gene="rps3"
+                     /number=1
+     misc_feature    28670
+                     /note="C to U RNA editing"
+     gene            complement(28898..28970)
+                     /gene="tRNA-Lys"
+                     /db_xref="GeneID:814571"
+     tRNA            complement(28898..28970)
+                     /gene="tRNA-Lys"
+                     /product="tRNA-Lys"
+                     /db_xref="GeneID:814571"
+     gene            30463..31083
+                     /gene="ccb206"
+                     /db_xref="GeneID:814572"
+     CDS             30463..31083
+                     /gene="ccb206"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c biogenesis orf206"
+                     /protein_id="NP_085482.1"
+                     /db_xref="GI:13449299"
+                     /db_xref="UniProt/TrEMBL:P93280"
+                     /db_xref="GeneID:814572"
+                     /translation="MRRLFLELYHKLIFSSTPITSFSSFLSYIVVTPLMLGFEKDFSC
+                     HSHLGPIRIPPLFPFPPAPFPRNEKEDGTLELYYLSTYCLPKILLLQLVGHRVIQISR
+                     VFCGFPMLQLSYQFGRSGMDRLNIPLGSLVLTLLCGIHSRSALGITSSSGWNSSQNPT
+                     TSPTLLPLTVSRTSIETEWFHVLSSIGYSSLFVSLFPISVSISLQD"
+     misc_feature    30478
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30490
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30533
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30537
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30540
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30542
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30590
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30599
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30610
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30611
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30616
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30621
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30622
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30626
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30634
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30641
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30643
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30650
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30655
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30656
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30748
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30766
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30800
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30829
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30841
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30842
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30868
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30886
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30890
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30929
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30937
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30938
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30947
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30974
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    30976
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    31013
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    31016
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    31028
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     misc_feature    31031
+                     /gene="ccb206"
+                     /note="C to U RNA editing"
+     CDS             complement(32041..32472)
+                     /gene="nad5"
+                     /note="orf143"
+                     /codon_start=1
+                     /protein_id="NP_085483.1"
+                     /db_xref="GI:13449300"
+                     /db_xref="UniProt/TrEMBL:P93281"
+                     /db_xref="GeneID:814567"
+                     /translation="MAGQCLMQEIALYELFFFSLLKTGSFGLSARMEIFFSKTAWNLI
+                     RQTYPKVDYAGIVRNKFNIPNPSHSIIAWMALNSGLQQIELADFIPTNTVTLCGLCML
+                     EDESAEHLFSAAMQGGFFLSSVRNVDMISLQPTFMMFANGL"
+     CDS             34190..34555
+                     /note="orf121a"
+                     /codon_start=1
+                     /protein_id="NP_085484.1"
+                     /db_xref="GI:13449301"
+                     /db_xref="UniProt/TrEMBL:P93282"
+                     /translation="MASKIRKVTNQNMRINSSLSKSSTFSTRLRITDSYLSSPSVTEL
+                     APLTLTTGDDFTVTLSVTPTMNSLESQVICPRAYDCKERIPPNQHIVSLELTYHPASI
+                     EPTATGSPETRDPDPSAYA"
+     CDS             34599..35102
+                     /note="orf167"
+                     /codon_start=1
+                     /protein_id="NP_085485.1"
+                     /db_xref="GI:13449302"
+                     /db_xref="UniProt/TrEMBL:P93283"
+                     /translation="MNQLDQYSQPMQHLILLWFWLLDLSPPPSFHLSVKSVDLSLFSL
+                     SPLFLLLSISSLIFSRVNKFGIRRVGYAMAPKPDPTVLPDLQEKKAILGTQIEMITQA
+                     MTTLESRVTDLQQESNDHRTWVREALDKLLKRDLGDENRPKPTTNKMIATGEQHKGEV
+                     STSLFHD"
+     promoter        35638..35646
+                     /note="orf116"
+                     /evidence=not_experimental
+     CDS             35782..36132
+                     /note="orf116"
+                     /codon_start=1
+                     /protein_id="NP_085486.1"
+                     /db_xref="GI:13449303"
+                     /db_xref="UniProt/TrEMBL:P93284"
+                     /translation="MSRSSIGPELEVNSKPLKGPIICPIRTYYSKVPLELLFPTTDRR
+                     FYFLKSYVFCSANSVPLYLLLLTSALHFNSYILLFDFQLKSKLLAYKRRARCVAGLLK
+                     SMERYPESTVTAMI"
+     gene            complement(40502..42628)
+                     /gene="cox2"
+                     /db_xref="GeneID:814573"
+     mRNA            complement(join(40502..40585,41930..42628))
+                     /gene="cox2"
+                     /db_xref="GeneID:814573"
+     CDS             complement(join(40502..40585,41930..42628))
+                     /gene="cox2"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c oxidase subunit 2"
+                     /protein_id="NP_085487.1"
+                     /db_xref="GI:13449304"
+                     /db_xref="UniProt/TrEMBL:P93285"
+                     /db_xref="GeneID:814573"
+                     /translation="MIVLKWLFLTISPCDAAEPWQLGSQDAATPIMQGIIDLHHDIFF
+                     FLILILVFVLWILVRALWHFHYKKNAIPQRIVHGTTIEILRTIFPSIISMFIAIPSFA
+                     LLYSMDEVVVDPAITIKAIGHQWYRTYEYSDYNSSDEQSLTFDSYMIPEEDLELGQSR
+                     LLEVDNRVVVPAKTHLRIIVTSADVPHSWAVPSSGVKCDAVPGRLNQISILVQREGVY
+                     YGQCSEICGTNHAFTSIVVEAVPRKDYGSRVSNQLIPQTGEA"
+     exon            complement(40502..40585)
+                     /gene="cox2"
+                     /number=2
+     misc_feature    40543
+                     /note="C to U RNA editing"
+     misc_feature    40564
+                     /note="C to U RNA editing"
+     intron          complement(40586..41929)
+                     /gene="cox2"
+                     /number=1
+     exon            complement(41930..42628)
+                     /gene="cox2"
+                     /number=1
+     misc_feature    41931
+                     /note="C to U RNA editing"
+     misc_feature    42048
+                     /note="C to U RNA editing"
+     misc_feature    42072
+                     /note="C to U RNA editing"
+     misc_feature    42153
+                     /note="C to U RNA editing"
+     misc_feature    42250
+                     /note="C to U RNA editing"
+     misc_feature    42351
+                     /note="C to U RNA editing"
+     misc_feature    42368
+                     /note="C to U RNA editing"
+     misc_feature    42376
+                     /note="C to U RNA editing"
+     misc_feature    42491
+                     /note="C to U RNA editing"
+     misc_feature    42558
+                     /note="C to U RNA editing"
+     misc_feature    42602
+                     /note="C to U RNA editing"
+     misc_feature    42604
+                     /note="C to U RNA editing"
+     misc_feature    42605
+                     /note="C to U RNA editing"
+     repeat_unit     44698..48894
+                     /note="repeat I"
+     promoter        46909..48917
+                     /note="orf139a"
+                     /evidence=not_experimental
+     CDS             48113..48532
+                     /note="orf139a
+                     identical to orf139b"
+                     /codon_start=1
+                     /protein_id="NP_085488.1"
+                     /db_xref="GI:13449305"
+                     /db_xref="UniProt/TrEMBL:P94024"
+                     /translation="MIQRTRNQSIMLSLPSNQSANHAILTFQPIGQSRYLLTFQPTPS
+                     IPLLQQYIISVPYLDAYSSICFPVMARIRSAKYCFFFFLVLFLNGIIATRGKAMLPTL
+                     PQKGAAFFPPKMPVPPSGPSKQHNSAPRSDFVQFFYM"
+     gene            complement(51293..53611)
+                     /gene="ccb452"
+                     /db_xref="GeneID:814574"
+     mRNA            complement(join(51293..51871,52832..53611))
+                     /gene="ccb452"
+                     /db_xref="GeneID:814574"
+     CDS             complement(join(51293..51871,52832..53611))
+                     /gene="ccb452"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c biogenesis orf452"
+                     /protein_id="NP_085489.1"
+                     /db_xref="GI:13449306"
+                     /db_xref="UniProt/TrEMBL:P93286"
+                     /db_xref="GeneID:814574"
+                     /translation="MVQLHNFFFFIIFMVVPCGTAAPVLLKWFVSRDVPTGAPFSNGT
+                     IIPIPISSFPLLVYLHSRKIIRSMDGAKSGVLVRASRPILLPDIIGRSSSETRARKAL
+                     FFFVPVLHFRLLESKGDFSYLESFCGVLCLLFFRTFLFLARDRSAKRERARRRKGQTL
+                     RPNGNEQRRNDKMRCSGHPHLDLERRVEGFGPLAFPVPPELGGACVGGVPPEIGLEAL
+                     ALPRSRQLMAMAVGHDYYQKVPMKMNISHGGVCICMLGVLLSNTKKIQFTQRLPLGYE
+                     LHMGKERCCLRGLDHLHGPTFHSICGNLMIYKPSLTNDRLMFEHDESLHADLLLINFP
+                     ASYKNGKLEHFLHWWMKNRKHNNFWLTMFPEKRYFRERTSTAEVAIHTNLFTDLYASI
+                     GTGSSRTGGWYTTIMKLPFIFFIRIGFMLASLGGSPSLLRQLQKDKLRWNRESSVEFI
+                     IA"
+     exon            complement(51293..51871)
+                     /gene="ccb452"
+                     /number=2
+     misc_feature    51325
+                     /note="C to U RNA editing"
+     misc_feature    51372
+                     /note="C to U RNA editing"
+     misc_feature    51406
+                     /note="C to U RNA editing"
+     misc_feature    51437
+                     /note="C to U RNA editing"
+     misc_feature    51480
+                     /note="C to U RNA editing"
+     misc_feature    51727
+                     /note="C to U RNA editing"
+     intron          complement(51872..52831)
+                     /gene="ccb452"
+                     /number=1
+     exon            complement(52832..53611)
+                     /gene="ccb452"
+                     /number=1
+     misc_feature    53197
+                     /note="C to U RNA editing"
+     misc_feature    53206
+                     /note="C to U RNA editing"
+     misc_feature    53278
+                     /note="C to U RNA editing"
+     misc_feature    53437
+                     /note="C to U RNA editing"
+     misc_feature    53452
+                     /note="C to U RNA editing"
+     misc_feature    53457
+                     /note="C to U RNA editing"
+     misc_feature    53466
+                     /note="C to U RNA editing"
+     misc_feature    53490
+                     /note="C to U RNA editing"
+     misc_feature    53509
+                     /note="C to U RNA editing"
+     misc_feature    53562
+                     /note="C to U RNA editing"
+     gene            complement(53736..53809)
+                     /gene="tRNA-Gly"
+                     /db_xref="GeneID:814575"
+     tRNA            complement(53736..53809)
+                     /gene="tRNA-Gly"
+                     /product="tRNA-Gly"
+                     /db_xref="GeneID:814575"
+     CDS             complement(55767..56090)
+                     /gene="nad5"
+                     /note="orf107b"
+                     /codon_start=1
+                     /protein_id="NP_085490.1"
+                     /db_xref="GI:13449307"
+                     /db_xref="UniProt/TrEMBL:P93287"
+                     /db_xref="GeneID:814567"
+                     /translation="MLNAFAFPQTNECFPAKRGFCANERTKCLNPKMPSKSMFGGSVS
+                     ENLFLSKIRIGLSFPLPLSEIKLQNQDFRLEGQMSSFDPFVDESKALVRRLGQKVKAK
+                     SFLCR"
+     gene            57774..58331
+                     /gene="rpl5"
+                     /db_xref="GeneID:814576"
+     CDS             57774..58331
+                     /gene="rpl5"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein L5"
+                     /protein_id="NP_085491.1"
+                     /db_xref="GI:13449308"
+                     /db_xref="UniProt/Swiss-Prot:P42793"
+                     /db_xref="GeneID:814576"
+                     /translation="MFPLNFHYEDVSRQDPLLKPNHANVMEVPGSCEIRVVPKAPYNF
+                     IIKNGKLAMEIPRGQKFIQTQRGSTGKSFRSNPFLGSNKDKGYVSDLARQSTLRGHGM
+                     SNFSVRISTVMSLLDFPVEIRKNSIQFSMETEFCEFSPELEDHFEIFEHIRGFNVTII
+                     TSANTQDETLPLWSGFLQKDEGETQ"
+     misc_feature    57808
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57820
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57831
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57832
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57837
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57865
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    57942
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    58090
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    58102
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    58285
+                     /gene="rpl5"
+                     /note="C to U RNA editing"
+     misc_feature    58526
+                     /note="C to U RNA editing"
+     gene            60235..61416
+                     /gene="cob"
+                     /db_xref="GeneID:814577"
+     CDS             60235..61416
+                     /gene="cob"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="apocytochrome B"
+                     /protein_id="NP_085492.1"
+                     /db_xref="GI:13449309"
+                     /db_xref="UniProt/TrEMBL:P93289"
+                     /db_xref="GeneID:814577"
+                     /translation="MTIRNQRFSLLKQPISSTLNQHLVDYPTPSNLSYWWGFGPLAGI
+                     CLVIQIVTGVFLAMHYTPHVDLAFNSVEHIMRDVEGGWLLRYMHANGASMFLIVVYLH
+                     IFRGLYHASYSSPREFVWCLGVVIFLLMIVTAFIGYVLPWGQMSFWGATVITSLASAI
+                     PVVGDTIVTWLWGGFSVDNATLNRFFSLHHLLPFILVGASLLHLAALHQYGSNNPLGV
+                     HSEMDKIAFYPYFYVKDLVGWVAFAIFFSIWIFYAPNVLGHPDNYIPANPMSTPPHIV
+                     PEWYFLPIHAILRSIPDKAGGVAAIAPVFICLLALPFFKSMYVRSSSFRPIHQGMFWL
+                     LLADCLLLGWIGCQPVEAPFVTIGQISPLVFFLFFAITPILGRVGRGIPNSYTDETDH
+                     T"
+     misc_feature    60520
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    60559
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    60802
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    61087
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    61142
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    61216
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     misc_feature    61318
+                     /gene="cob"
+                     /note="C to U RNA editing"
+     gene            62349..62433
+                     /gene="tRNA-Ser"
+                     /db_xref="GeneID:814578"
+     tRNA            62349..62433
+                     /gene="tRNA-Ser"
+                     /product="tRNA-Ser"
+                     /db_xref="GeneID:814578"
+     CDS             complement(68918..69253)
+                     /gene="nad5"
+                     /note="orf111a"
+                     /codon_start=1
+                     /protein_id="NP_085493.1"
+                     /db_xref="GI:13449310"
+                     /db_xref="UniProt/TrEMBL:P93290"
+                     /db_xref="GeneID:814567"
+                     /translation="MYLTITRPDLTFAVNRLSQFSSASRTAQMQAVYKVLHYVKGTVG
+                     QGLFYSATSDLQLKAFADSDWASCPDTRRSVTGFCSLVPLWFLGALRKSILSPGLLQR
+                     QNIEALHLL"
+     gene            71349..71526
+                     /gene="tRNA-fMet"
+                     /db_xref="GeneID:814579"
+     promoter        71349..71357
+                     /gene="tRNA-fMet"
+                     /evidence=not_experimental
+     tRNA            71453..71526
+                     /gene="tRNA-fMet"
+                     /product="tRNA-Met"
+                     /note="fMet"
+                     /db_xref="GeneID:814579"
+     CDS             76132..76437
+                     /note="orf101a"
+                     /codon_start=1
+                     /protein_id="NP_085494.1"
+                     /db_xref="GI:13449311"
+                     /db_xref="UniProt/TrEMBL:P93291"
+                     /translation="MSRLELGKEAVALLTLYEEGVKSLAIVDTLRSARQEQYRKHRMP
+                     WFLLQTRLQVRVIESAQLGMLPMPIPELLKEAVAVQPLNLNGIAHSSSSINSFLLFE"
+     gene            complement(76642..77259)
+                     /gene="nad6"
+                     /db_xref="GeneID:814580"
+     CDS             complement(76642..77259)
+                     /gene="nad6"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 6"
+                     /protein_id="NP_085495.1"
+                     /db_xref="GI:13449312"
+                     /db_xref="UniProt/Swiss-Prot:Q01825"
+                     /db_xref="GeneID:814580"
+                     /translation="MILSVLSSPALVSGLMVARAKNPVHSVLFPIPVFRDTSGLLLLL
+                     GLDFFAMIFPVVHIGAIAVSFLFVVMMFHIQIAEIHEEVLRYLPVSGIIGLIFWWEMF
+                     FILDNESIPLLPTQRNTTSLRYTVYAGKVRSWTNLETLGNLLYTYYSVWFLVPSLILL
+                     VAMIGAIVLTMHRTTKVKRQDVFRRNAIDFRRTIMRRTTDPLTIY"
+     misc_feature    76814
+                     /note="C to U RNA editing"
+     misc_feature    77069
+                     /note="C to U RNA editing"
+     misc_feature    77091
+                     /note="C to U RNA editing"
+     misc_feature    77099
+                     /note="C to U RNA editing"
+     misc_feature    77157
+                     /note="C to U RNA editing"
+     misc_feature    77165
+                     /note="C to U RNA editing"
+     misc_feature    77171
+                     /note="C to U RNA editing"
+     misc_feature    77172
+                     /note="C to U RNA editing"
+     misc_feature    77207
+                     /note="C to U RNA editing"
+     misc_feature    77234
+                     /note="C to U RNA editing"
+     misc_feature    77332
+                     /note="C to U RNA editing"
+     CDS             complement(77819..78151)
+                     /gene="nad5"
+                     /note="orf110a"
+                     /codon_start=1
+                     /protein_id="NP_085496.1"
+                     /db_xref="GI:13449313"
+                     /db_xref="UniProt/TrEMBL:P93292"
+                     /db_xref="GeneID:814567"
+                     /translation="MNNAAKRADCWFGAKNYGRAVYECLRGGLYFTKDDENVNSQPFM
+                     RWRDRFLFCAEAVYKAQAETGGIKGHYLNATAGTCEEMIKRAVFARELGVPIVMHDYL
+                     NRGIHRKY"
+     mRNA            complement(join(79740..<80132,81113..81297))
+                     /gene="nad5"
+                     /label=nad2_mrna
+                     /db_xref="GeneID:814567"
+     exon            complement(79740..80132)
+                     /gene="nad2"
+                     /label=nad2_ex2
+     misc_feature    79760
+                     /note="C to U RNA editing"
+     misc_feature    79788
+                     /note="C to U RNA editing"
+     misc_feature    79790
+                     /note="C to U RNA editing"
+     misc_feature    79857
+                     /note="C to U RNA editing"
+     misc_feature    79877
+                     /note="C to U RNA editing"
+     misc_feature    79891
+                     /note="C to U RNA editing"
+     misc_feature    79918
+                     /note="C to U RNA editing"
+     misc_feature    79924
+                     /note="C to U RNA editing"
+     misc_feature    79929
+                     /note="C to U RNA editing"
+     misc_feature    79974
+                     /note="C to U RNA editing"
+     misc_feature    79977
+                     /note="C to U RNA editing"
+     intron          complement(80133..81112)
+                     /gene="nad2"
+                     /number=1
+     exon            complement(81113..>81297)
+                     /gene="nad2"
+                     /label=ex1
+     misc_feature    81208
+                     /note="C to U RNA editing"
+     misc_feature    81209
+                     /note="C to U RNA editing"
+     misc_feature    81239
+                     /note="C to U RNA editing"
+     gene            complement(82028..83116)
+                     /gene="rps4"
+                     /db_xref="GeneID:814581"
+     CDS             complement(82028..83116)
+                     /gene="rps4"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein S4"
+                     /protein_id="NP_085497.1"
+                     /db_xref="GI:13449315"
+                     /db_xref="UniProt/Swiss-Prot:Q31708"
+                     /db_xref="GeneID:814581"
+                     /translation="MWLLKKLIQRDIDLSPLRFQTCRLLSGNVRNRELTIIQRRILRR
+                     LRNRKRSIKKRKIYPKKYLTSYIQLQTTRKLPLFHGDLPITEMHRGTKRTSYIPFPLN
+                     PETRFDVIPLRLHFLETIPQARQPISHRRVCVNKGMVSITHFKLSHGDIISFQENNAI
+                     IRGEEIRRSFYKEISVEKIIGKLLHQPLRMWRRSKTEWFHLLKTKRGCRLLLKSRFLQ
+                     QLRSSMQEEDLERTKKFGSEKVCLGSSFAEHKRMKRNLLKSLFLSKRRKDKNLNLPTR
+                     TISPIVYNSSLSLYSNSTYCFASPHKLTMKRRIKRIELPTHYSEVNHRTPKAVVSYGP
+                     NIGHIPHDIRLKDPNLPLRSRNGRGQNI"
+     misc_feature    82060
+                     /note="C to U RNA editing"
+     misc_feature    82065
+                     /note="C to U RNA editing"
+     misc_feature    82074
+                     /note="C to U RNA editing"
+     misc_feature    82075
+                     /note="C to U RNA editing"
+     misc_feature    82125
+                     /note="C to U RNA editing"
+     misc_feature    82150
+                     /note="C to U RNA editing"
+     misc_feature    82161
+                     /note="C to U RNA editing"
+     misc_feature    82593
+                     /note="C to U RNA editing"
+     misc_feature    82740
+                     /note="C to U RNA editing"
+     misc_feature    82774
+                     /note="C to U RNA editing"
+     misc_feature    82809
+                     /note="C to U RNA editing"
+     misc_feature    82818
+                     /note="C to U RNA editing"
+     misc_feature    82882
+                     /note="C to U RNA editing"
+     misc_feature    82888
+                     /note="C to U RNA editing"
+     misc_feature    83029
+                     /note="C to U RNA editing"
+     CDS             complement(89617..90054)
+                     /gene="nad5"
+                     /note="orf145a"
+                     /codon_start=1
+                     /protein_id="NP_085498.1"
+                     /db_xref="GI:13449316"
+                     /db_xref="UniProt/TrEMBL:P93293"
+                     /db_xref="GeneID:814567"
+                     /translation="MRKGERMLATSKEWERPRPVEASCSEGVLKVLKGCRTILKGNRH
+                     DSLYILQGSVETGESNLAETAKDETRLWHSRLAHMSQRGMELLVKKGFLDSSKVSSLK
+                     FCEDCIYGKTHRVNFSTGQHTTKNPLDYVHSDLWGAPSVPLSF"
+     CDS             complement(90883..91347)
+                     /gene="nad5"
+                     /note="orf154"
+                     /codon_start=1
+                     /protein_id="NP_085499.1"
+                     /db_xref="GI:13449317"
+                     /db_xref="UniProt/TrEMBL:P93295"
+                     /db_xref="GeneID:814567"
+                     /translation="MALPVYAMSCFRLSKLLCKKLTSAMTEFWWSSCENKRKISWVAW
+                     QKLCKSKEDDGGLGFRDLGWFNQALLAKQSFRIIHQPHTLLSRLLRSRYFPHSSMMEC
+                     SVGTRPSYAWRSIIHGRELLSRGLLRTIGDGIHTKVWLDRWIMDETPLPPLN"
+     CDS             91001..91384
+                     /note="orf127"
+                     /codon_start=1
+                     /protein_id="NP_085500.1"
+                     /db_xref="GI:13449318"
+                     /db_xref="UniProt/TrEMBL:P93294"
+                     /translation="MILLQAYEGLVPTLHSIIELCGKYRLRRSRDSSVCGWCIIRKDC
+                     FARRAWLNQPKSRKPKPPSSSLDLHNFCQATHEIFLLFSQELHQNSVIALVNFLHSSF
+                     ESRKHDIAYTGKAIAIDFSSTSFPP"
+     promoter        complement(92271..92279)
+                     /gene="nad5"
+                     /note="orf145a"
+                     /evidence=not_experimental
+     gene            98939..103914
+                     /gene="tRNA-Ser"
+                     /db_xref="GeneID:814582"
+     promoter        98939..98947
+                     /gene="tRNA-Ser"
+                     /evidence=not_experimental
+     gene            103752..104295
+                     /gene="tRNA-Tyr"
+                     /db_xref="GeneID:814583"
+     promoter        103752..103760
+                     /gene="tRNA-Tyr"
+                     /evidence=not_experimental
+     repeat_unit     103805..104337
+                     /note="repeat II"
+     tRNA            103827..103914
+                     /gene="tRNA-Ser"
+                     /product="tRNA-Ser"
+     tRNA            104221..104295
+                     /gene="tRNA-Tyr"
+                     /product="tRNA-Tyr"
+                     /db_xref="GeneID:814583"
+     gene            104457..104531
+                     /gene="tRNA-Pro"
+                     /db_xref="GeneID:814584"
+     tRNA            104457..104531
+                     /gene="tRNA-Pro"
+                     /product="tRNA-Pro"
+                     /db_xref="GeneID:814584"
+     gene            104885..104995
+                     /gene="tRNA-Cys"
+                     /db_xref="GeneID:814585"
+     tRNA            104885..104995
+                     /gene="tRNA-Cys"
+                     /product="tRNA-Cys"
+                     /db_xref="GeneID:814585"
+     CDS             complement(104954..105553)
+                     /gene="nad5"
+                     /note="orf199"
+                     /codon_start=1
+                     /protein_id="NP_085501.1"
+                     /db_xref="GI:13449319"
+                     /db_xref="UniProt/TrEMBL:P93296"
+                     /db_xref="GeneID:814567"
+                     /translation="MVFQSFILGNLVYLCMKIINSVVVVGLYYGFLTTFSIGPSYLFL
+                     LRARVMDEGEEGTEKKVSATTGFIAGQLMMFISIYYAPLHLALGRPHTITVLALPYLL
+                     FHFFFWNNHKHFFDYGSTTRNEMRNLRIQCVFPNNLIFKLFNHLILPSSMLARLVNIY
+                     MFRCNNKMLFVTSSFVVCVRMLLVEWAFPLFQLFLVMKV"
+     promoter        complement(105653..105661)
+                     /gene="nad5"
+                     /note="orf199"
+                     /evidence=not_experimental
+     gene            105887..105948
+                     /gene="tRNA-Asn"
+                     /db_xref="GeneID:814586"
+     tRNA            105887..105948
+                     /gene="tRNA-Asn"
+                     /product="tRNA-Asn"
+                     /db_xref="GeneID:814586"
+     gene            106797..106879
+                     /gene="tRNA-Tyr"
+                     /db_xref="GeneID:814587"
+     tRNA            106797..106879
+                     /gene="tRNA-Tyr"
+                     /product="tRNA-Tyr"
+                     /db_xref="GeneID:814587"
+     CDS             complement(110930..111403)
+                     /gene="nad5"
+                     /note="orf157"
+                     /codon_start=1
+                     /protein_id="NP_085502.1"
+                     /db_xref="GI:13449320"
+                     /db_xref="UniProt/TrEMBL:P93297"
+                     /db_xref="GeneID:814567"
+                     /translation="MGDLEGQDRPDPISTMVGPSGTGNLRLTSFQQVRRSILSQERRN
+                     PAPLATCTKKKLGRKEEPLLIPATINNYRRAIHLKNGARISLDVGLYFFRRARFGTLK
+                     QDMIYIIRHHRRLEIKVRFIALLIQACCRIVGYDYLFFYEVRNHLLLAALLIIIR"
+     gene            111587..112907
+                     /gene="atp6-1"
+                     /db_xref="GeneID:814588"
+     promoter        111587..111595
+                     /gene="atp6-1"
+                     /evidence=not_experimental
+     CDS             111750..112907
+                     /gene="atp6-1"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ATPase subunit 6"
+                     /protein_id="NP_085503.1"
+                     /db_xref="GI:13449321"
+                     /db_xref="UniProt/TrEMBL:P93298"
+                     /db_xref="GeneID:814588"
+                     /translation="MRRIFLFDENSLNSSSTIDTSSASTIDTSFASQCTNFSSGQASG
+                     TQDTHAGIFEDCPGLNPNDERVVELQCEIREKCEALTQDPEMGLILGEALHAESDNVP
+                     FLQSIADDLTQNGVSGEAFQEALNIVGQAAASPLDQFEIVPLIPMHIGNFYFSFTNPS
+                     LFMLLTLSFFLLLIHFVTKKGGGNLVPNAWQSLVELLYDFVLNLVKEQIGGLSGNVKQ
+                     MFFPCILVTFLFLLFCNLQGMIPYSFTVTSHFLITLALSFSIFIGITIVGFQRHGLHF
+                     FSFLLPAGVPLPLAPFLVLLELISYCFRALSLGIRLFANMMAGHSLVKILSGFAWTML
+                     CMNDIFYFIGALGPLFIVLALTGLELGVAILQAYVFTILICIYLNDAINLH"
+     repeat_unit     112147..118736
+                     /note="repeat III"
+     misc_feature    112224
+                     /gene="atp6-1"
+                     /note="C to U RNA editing"
+     gene            112953..113039
+                     /gene="tRNA-Ser"
+                     /db_xref="GeneID:814589"
+     tRNA            112953..113039
+                     /gene="tRNA-Ser"
+                     /product="tRNA-Ser"
+                     /db_xref="GeneID:814589"
+     CDS             115431..115751
+                     /note="orf106a
+                     identical to orf106g"
+                     /codon_start=1
+                     /protein_id="NP_085504.1"
+                     /db_xref="GI:13449322"
+                     /db_xref="UniProt/TrEMBL:P93299"
+                     /translation="MLHRGRSCLTGLFPCYLLSNWLNSNLCWIPLKLVIPCFQLIVES
+                     YLLEFLLLLAISTCLLGEDSLIWLTLVVAHSKSRQSSSQEPLDTRMATRALLDQLRSD
+                     RRHN"
+     CDS             116296..116754
+                     /note="orf152a
+                     identical to orf152b"
+                     /codon_start=1
+                     /protein_id="NP_085505.1"
+                     /db_xref="GI:13449323"
+                     /db_xref="UniProt/TrEMBL:P93300"
+                     /translation="MPTILSSKPAFNSLFSYHLIGLISNKLVTLAPDYTGTKKTTWGA
+                     RLHLQELRVQTKHRQIEPDIKNLPLQPIRYGSFRPVFHWIEKPCMLIGLCGLHSEVSF
+                     IANWPWGKPSEIGGCSIPCMLTGRGSELSYHIRASRRPLPGNRFHFQSFF"
+     CDS             complement(119381..119701)
+                     /gene="nad5"
+                     /note="orf106b"
+                     /codon_start=1
+                     /protein_id="NP_085506.1"
+                     /db_xref="GI:13449324"
+                     /db_xref="UniProt/TrEMBL:P93301"
+                     /db_xref="GeneID:814567"
+                     /translation="MVVTAYPKSSAGMGVTVLPEYLKQSSYEAYSRPYSAFFLSGCTK
+                     QERSPLLARRLVDAWLSFHSILMINEEVSDWEQLSDHYTRRSLFKTIAFRNLQREEEY
+                     RPGG"
+     gene            complement(127021..127094)
+                     /gene="tRNA-Ile"
+                     /db_xref="GeneID:814590"
+     tRNA            complement(127021..127094)
+                     /gene="tRNA-Ile"
+                     /product="tRNA-Ile"
+                     /db_xref="GeneID:814590"
+     CDS             complement(127475..127843)
+                     /gene="nad5"
+                     /note="orf122a"
+                     /codon_start=1
+                     /protein_id="NP_085507.1"
+                     /db_xref="GI:13449325"
+                     /db_xref="UniProt/TrEMBL:P93302"
+                     /db_xref="GeneID:814567"
+                     /translation="MILNLDTNIFNHGLSRHNILAFSQGFPIGLPCRNWIEVGLRLRL
+                     RLLLELAVGNFPQGFKIHLSGSFQAVRLALFSSFTSLRTDELLLIETRPSYLSSVQGL
+                     KYYVIFIDNYSQGSVGCSRN"
+     misc_feature    129794
+                     /note="C to U RNA editing"
+     misc_feature    129825
+                     /note="C to U RNA editing"
+     gene            129909..130385
+                     /gene="orfB"
+                     /db_xref="GeneID:814591"
+     CDS             129909..130385
+                     /gene="orfB"
+                     /codon_start=1
+                     /protein_id="NP_085508.1"
+                     /db_xref="GI:13449326"
+                     /db_xref="UniProt/TrEMBL:P93303"
+                     /db_xref="GeneID:814591"
+                     /translation="MPQLDKFTYFSQFFWLCLFFFTFYIFICNDGDGVLGISRILKLR
+                     NQLLSHRGKTIRSKDPNSLEDLLRKGFSTGVSYMYASLFEVSQWCKAVDLLGKRRKIT
+                     LISCFGEISGSRGMERNILYNISKSSPSNTGRWITCRNCRNDIMLIHVVHGQGSIK"
+     CDS             130817..131140
+                     /note="orf107c"
+                     /codon_start=1
+                     /protein_id="NP_085509.1"
+                     /db_xref="GI:13449327"
+                     /db_xref="UniProt/TrEMBL:P93304"
+                     /translation="MAVRCSKIQRTDGRPGHSLQPAKVSFVAGQPLGYYSSWPLFALS
+                     HHMVVWYAAEHVYPSSFFFQSKLPPSEVFAYPGMEVFNEYTLYHAWVDEALSGVSKME
+                     LYAKA"
+     CDS             130968..131393
+                     /note="orf141"
+                     /codon_start=1
+                     /protein_id="NP_085510.1"
+                     /db_xref="GI:13449328"
+                     /db_xref="UniProt/TrEMBL:P93305"
+                     /translation="MRQNMSILPPSSFSRNFLLVRCSPIQVWKYSMNTLCTMHGWMKL
+                     YLEYQRWNCMLRLSASIELEVFMDVPIFIRTYYRPANMDDSFRYGIIFRMYDLAVQLQ
+                     WGNVMKLLLVTKTQVEKKKALNLNVSKTRDGRGGLNQQG"
+     misc_feature    132032
+                     /note="C to U RNA editing"
+     gene            132071..138153
+                     /gene="nad7"
+                     /db_xref="GeneID:814592"
+     mRNA            join(132071..132213,133177..133245,134309..134775,
+                     135829..136072,137892..138153)
+                     /gene="nad7"
+                     /db_xref="GeneID:814592"
+     CDS             join(132071..132213,133177..133245,134309..134775,
+                     135829..136072,137892..138153)
+                     /gene="nad7"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 7"
+                     /protein_id="NP_085511.1"
+                     /db_xref="GI:13449329"
+                     /db_xref="UniProt/Swiss-Prot:P93306"
+                     /db_xref="GeneID:814592"
+                     /translation="MTTRKRQIKNFTSNFGPQHPAAHGVSRLVLEMNGEVVERAEPHI
+                     GSLHRGTEKLIEYKTYLQALPYSDRSDYVSMMAQEHAHSSAVEKLLNCEVPLRAQYIR
+                     VLFREITRISNHSLALTTHAMDVGALTPFLWAFEEREKLLEFYERVSGARMHASFIRP
+                     GGVAQDLPLGLCRDIDSFTQQFASRIDELEEMSTGNRIWKQRLVDIGTVTAQQAKDWG
+                     FSGVMLRGPGVCWDSRRAAPYDVHDQSDLDVPVGTRGDRYDRYCIRIEEMRQSLRIIV
+                     QCLNQMPSGMIKADDRKLCPPSRCRMKLSMESSIHHFELYTEGFSVPASSTYTAVEAP
+                     KGEFGVFLVSNGSNRPYRRKIRAPGSAHSQGLDSMSKHHMPADVVTIIGTQDIVFGEV
+                     DR"
+     exon            132071..132213
+                     /gene="nad7"
+                     /number=1
+     misc_feature    132094
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    132108
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    132147
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    132207
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     intron          132214..133176
+                     /gene="nad7"
+                     /number=1
+     exon            133177..133245
+                     /gene="nad7"
+                     /number=2
+     misc_feature    133233
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     intron          133246..134308
+                     /gene="nad7"
+                     /number=2
+     misc_feature    134300
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     exon            134309..134775
+                     /gene="nad7"
+                     /number=3
+     misc_feature    134309
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134340
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134347
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134412
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134431
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134440
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    134674
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     intron          134776..135828
+                     /gene="nad7"
+                     /number=3
+     exon            135829..136072
+                     /gene="nad7"
+                     /number=4
+     misc_feature    135847
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135873
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135883
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135888
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135918
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135938
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    135944
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     intron          136073..137891
+                     /gene="nad7"
+                     /number=4
+     exon            137892..138153
+                     /gene="nad7"
+                     /number=5
+     misc_feature    137931
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138018
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138025
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138047
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138056
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138071
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138092
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     misc_feature    138105
+                     /gene="nad7"
+                     /note="C to U RNA editing"
+     intron          complement(140723..190762)
+                     /gene="nad5"
+                     /note="transpliced intron"
+                     /number=2
+     exon            complement(140724..141939)
+                     /gene="nad5"
+                     /number=2
+     misc_feature    140895
+                     /note="C to U RNA editing"
+     misc_feature    141295
+                     /note="C to U RNA editing"
+     misc_feature    141335
+                     /note="C to U RNA editing"
+     misc_feature    141406
+                     /note="C to U RNA editing"
+     misc_feature    141445
+                     /note="C to U RNA editing"
+     misc_feature    141457
+                     /note="C to U RNA editing"
+     misc_feature    141494
+                     /note="C to U RNA editing"
+     misc_feature    141541
+                     /note="C to U RNA editing"
+     misc_feature    141561
+                     /note="C to U RNA editing"
+     misc_feature    141562
+                     /note="C to U RNA editing"
+     misc_feature    141572
+                     /note="C to U RNA editing"
+     misc_feature    141617
+                     /note="C to U RNA editing"
+     misc_feature    141622
+                     /note="C to U RNA editing"
+     misc_feature    141676
+                     /note="C to U RNA editing"
+     misc_feature    141772
+                     /note="C to U RNA editing"
+     misc_feature    141796
+                     /note="C to U RNA editing"
+     misc_feature    141812
+                     /note="C to U RNA editing"
+     misc_feature    141928
+                     /note="C to U RNA editing"
+     intron          complement(141940..142768)
+                     /gene="nad5"
+                     /number=1
+     misc_feature    141949
+                     /note="C to U RNA editing"
+     misc_feature    141961
+                     /note="C to U RNA editing"
+     misc_feature    142139
+                     /note="C to U RNA editing"
+     exon            complement(142769..142998)
+                     /gene="nad5"
+                     /number=1
+     misc_feature    142844
+                     /note="C to U RNA editing"
+     gene            complement(143219..>318390)
+                     /gene="nad1"
+                     /db_xref="GeneID:814625"
+     mRNA            complement(join(143219..143477,146990..147048,
+                     287917..288108,289003..289083,318004..>318390))
+                     /gene="nad1"
+                     /label=nad1_mrna
+                     /db_xref="GeneID:814625"
+     CDS             complement(join(143219..143477,146990..147048,
+                     287917..288108,289003..289083,318004..>318390))
+                     /gene="nad1"
+                     /codon_start=1
+                     /label=nad1_cds
+                     /product="NADH dehydrogenase subunit 1"
+                     /protein_id="NP_085565.2"
+                     /db_xref="GI:26557004"
+                     /db_xref="UniProt/TrEMBL:P92558"
+                     /db_xref="GeneID:814625"
+                     /translation="TYIAVPAEILGIILPLLLGVAFLVLAERKVMAFVQRRKGPDVVG
+                     SFGLLQPLADGSKLILKEPISPSSANFFLFRMAPVATFMLSLVARAVVPFDYGMVLSD
+                     PNIGLLYLFAISSLGVYGIIIAGRSSNSKYAFLGALRSAAQMVSYEVSIGLILITVLI
+                     CVGPRNSSEIVMAQKQIWSGIPLFPVLVMFLISRLAETNRAPFDLPEAEAESVAGYNV
+                     EYSSMGSALFFLGEYANMILMSGPCTLFFPGGWPPILDLPIFKKIPGSIWFSIKVLLF
+                     LFLYIWVRAAFPRYRYDQLMGLGRKVFLPLSLARVVPVSGLLVTFQWLP"
+     exon            complement(143219..143477)
+                     /gene="nad1"
+                     /usedin=Y08502:nad1_cds
+                     /usedin=Y08502:nad1_mrna
+                     /label=nad1_ex5
+     misc_feature    143260
+                     /note="C to U RNA editing"
+     misc_feature    143269
+                     /note="C to U RNA editing"
+     misc_feature    143299
+                     /note="C to U RNA editing"
+     misc_feature    143374
+                     /note="C to U RNA editing"
+     misc_feature    143400
+                     /note="C to U RNA editing"
+     misc_feature    143434
+                     /note="C to U RNA editing"
+     misc_feature    143442
+                     /note="C to U RNA editing"
+     misc_feature    143454
+                     /note="C to U RNA editing"
+     misc_feature    143472
+                     /note="C to U RNA editing"
+     intron          complement(143478..146989)
+                     /gene="nad1"
+                     /number=4
+     misc_feature    143590
+                     /note="C to U RNA editing"
+     misc_feature    144105
+                     /note="C to U RNA editing"
+     gene            complement(144294..146312)
+                     /gene="matR"
+                     /db_xref="GeneID:814593"
+     CDS             complement(144294..>146312)
+                     /gene="matR"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="maturase"
+                     /protein_id="NP_085512.1"
+                     /db_xref="GI:13449330"
+                     /db_xref="UniProt/TrEMBL:P93307"
+                     /db_xref="GeneID:814593"
+                     /translation="GLKFRPLTVVLPIEKIMKEAIRMVLESIYDPEFPDTSHFRSGQG
+                     CHSVLRRIKEEWGISRWFLEFDIRKCFHTIDRHRLIQILKEEIDDPKFFYSIQKVFSA
+                     GRLVGVERGPYSVPHSVLLSALPGNIYLHKLDQEIGRIRQKYEIPIVQRVRSVLLRTG
+                     RRIDDQENPGEEASFNAPQDNRAIIVGSVKSMQRKAAFHSLVSSWHTPPTSTLRLRGD
+                     QKRPFVFPPSSALAVFLNKPSSLLCAAFLIEAAGLTPKAEFYGGERCNNNWAMRDLLK
+                     YCKRKGLLIELGGEAILVIRSERGLARKQAPLKTHYLIRICYARYADDLLLGIVGAVE
+                     LLIEIQKRIAHFLQSGLNLWVGSAGSTTIAARSTVEFLGTVIREVPPRTTPIQFLREL
+                     EKRLRVKHRIHITACHLRSAIHSKFRNLGDSIPIKQLTKGMSKTGSLQDGVQLAETLG
+                     TAGVRSPQVSVLWGTVKHIRQGSRGISFLHSSGRSNASSDVQQVVSRSGTHARKLSLY
+                     TPPGRKAAGEGGGHWAGSISSEFPIKIEAPIKKILRRLRDRGIISRRRPWPIHVACLT
+                     NVSDEDIVNWSAGIAISPLSYYRCRDNLYQVRTIVDHQIRWSAIFTLAHKHKSSAPNI
+                     ILKYSKDSNIVNQEGGKILAEFPNSIELGKLGPGQDLNKKEHSTTSLV"
+     misc_feature    144418
+                     /note="C to U RNA editing"
+     misc_feature    144506
+                     /note="C to U RNA editing"
+     misc_feature    144542
+                     /note="C to U RNA editing"
+     misc_feature    144562
+                     /note="C to U RNA editing"
+     misc_feature    144583
+                     /note="C to U RNA editing"
+     misc_feature    145939
+                     /note="C to U RNA editing"
+     misc_feature    146029
+                     /note="C to U RNA editing"
+     misc_feature    146072
+                     /note="C to U RNA editing"
+     misc_feature    146233
+                     /note="C to U RNA editing"
+     exon            complement(146990..147048)
+                     /gene="nad1"
+                     /usedin=Y08502:nad1_cds
+                     /usedin=Y08502:nad1_mrna
+                     /label=nad1_ex4
+     CDS             complement(147282..147611)
+                     /gene="nad1"
+                     /note="orf109"
+                     /codon_start=1
+                     /protein_id="NP_085513.1"
+                     /db_xref="GI:13449331"
+                     /db_xref="UniProt/TrEMBL:P93308"
+                     /db_xref="GeneID:814625"
+                     /translation="MALFFFLLLLISHREQLLLVQGHQMRDLLPTPRNRSNGRLPSPF
+                     SRVINPSTHLSIHKKALPRGERKLQDEYALDSRIHSRPDPLWNFRNLQKHSRKGLVMI
+                     FSKITRC"
+     CDS             150783..151151
+                     /note="orf102b"
+                     /codon_start=1
+                     /protein_id="NP_085514.1"
+                     /db_xref="GI:13449332"
+                     /db_xref="UniProt/TrEMBL:P93309"
+                     /translation="MYRYEISCPFNLRSPAVPVSSKASSTSFIKTKALRISEVNRELS
+                     VPRVYREKSFTRRLNAPIFGSLFVDKESRFANPYSFTLNQGLTRGRGKQAKLAPDRRG
+                     KSVVTEVDYRTGVGENIVKD"
+     CDS             complement(151265..151747)
+                     /gene="nad1"
+                     /note="orf160"
+                     /codon_start=1
+                     /protein_id="NP_085515.1"
+                     /db_xref="GI:13449333"
+                     /db_xref="UniProt/TrEMBL:P93310"
+                     /db_xref="GeneID:814625"
+                     /translation="MDLRLLHGVAYGHSWFGKWGYRFCSGSFGVEEHHYHRAIAFLTS
+                     ISLVDDITANFRENKANLNIGDIVRCYRDMSEIQLTTLQDLLRFMLTIKSRAPPIRIP
+                     IGKIEAPSVVLPSMKAYGTRACPQVKQCPKDKEKSVKCRKFALPLPYRLAFTSLSVVV
+                     "
+     promoter        complement(153106..153114)
+                     /gene="nad1"
+                     /note="orf160"
+                     /evidence=not_experimental
+     gene            154744..157345
+                     /gene="rpl2"
+                     /db_xref="GeneID:814594"
+     mRNA            join(154744..155660,157213..157345)
+                     /gene="rpl2"
+                     /db_xref="GeneID:814594"
+     CDS             join(154744..155660,157213..157345)
+                     /gene="rpl2"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein L2"
+                     /protein_id="NP_085516.1"
+                     /db_xref="GI:13449334"
+                     /db_xref="UniProt/TrEMBL:P93311"
+                     /db_xref="GeneID:814594"
+                     /translation="MRPGRARALRQFTLSTGKSAGRNSSGRITVFHRGGGSKRLLRRI
+                     DLKRSTSSMGIVESIEYDPNRSSQIAPVRWIKGGCQKKMNTIEKFAPPRKILEPTTNT
+                     ISGLFSFSFLPGKVDKRKVACFSPGLMAAYVVVGLPTGMPPLSSSKSAFASKGAGSTK
+                     TLVKDVFFSAFSSPKAKRETASLAFASSFGFPRIAVAGAKPAFFAPRMRQKVRGKSTF
+                     SLCEVQKGRTHSILWAHRIKGKAGLSWQSFRRQDTLGLVGAAGHKKSKPKTDQGNLPA
+                     KPIGERAKQLKALRGLRAKDGACKVDRAPVTYIIASHQLEAGKMVMNCDWSKPSTSSF
+                     LQSAQNDHPKPLFTV"
+     exon            154744..155660
+                     /gene="rpl2"
+                     /number=1
+     misc_feature    155454
+                     /gene="rpl2"
+                     /note="C to U RNA editing"
+     intron          155661..157212
+                     /gene="rpl2"
+                     /number=1
+     exon            157213..157345
+                     /gene="rpl2"
+                     /number=2
+     gene            157491..158351
+                     /gene="orfX"
+                     /db_xref="GeneID:814595"
+     CDS             <157491..158351
+                     /gene="orfX"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /protein_id="NP_085517.1"
+                     /db_xref="GI:13449335"
+                     /db_xref="UniProt/TrEMBL:P93312"
+                     /db_xref="GeneID:814595"
+                     /translation="NPSLLALNYLYISYEFNFASETILGEVRIRSVRILIGLGLTWFT
+                     CYWFPEELISPLASPFLTLPFDSYFVCTQLTEAFSTFVATSSIACSYFVFPLISYQIW
+                     CFLIPSCYGEQRTKYNRFLHLSGSRFFLFLFLTPPRVVPNVWHFPYFVGATSTNSLMI
+                     KLQPKIYDHIMLTVRISFIPSVCSQVPVIVICLPEPRGLSLETFTNNRRFLMVFPLLT
+                     AALSTPPDIWCQIVARFLISLIIELAIFVASIVQVREEGWTSGMRESGSIEKKNKSSP
+                     PPRTWQSNYQ"
+     misc_feature    157549
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157634
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157635
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157651
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157654
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157663
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157690
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157851
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157854
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157867
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157869
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157897
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157899
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157902
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157930
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157964
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    157995
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158028
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158071
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158133
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158155
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158183
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158190
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     misc_feature    158236
+                     /gene="orfX"
+                     /note="C to U RNA editing"
+     gene            161693..169674
+                     /gene="nad4"
+                     /db_xref="GeneID:814596"
+     mRNA            join(161693..162153,163625..164139,167221..167642,
+                     169585..169674)
+                     /gene="nad4"
+                     /db_xref="GeneID:814596"
+     CDS             join(161693..162153,163625..164139,167221..167642,
+                     169585..169674)
+                     /gene="nad4"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 4"
+                     /protein_id="NP_085518.1"
+                     /db_xref="GI:13449336"
+                     /db_xref="UniProt/TrEMBL:P93313"
+                     /db_xref="GeneID:814596"
+                     /translation="MLEHFCECYSNLSGLILCPVLGSITLLFIPNSRIRPIRLIGLCA
+                     SLITFLYSPVPRIQFDSSTAKSQFVESLRWLPYENINFYLGIDGISLFFVILTTFLIP
+                     ICISVGWSGMRSYGKEYITAFLIREFLMIAVFRMLDLLLFYVFPESVPIPMFIIIGVW
+                     GSRQRKIKAAYQFFLYTLLGSLFMLLAILLILFQTGTTDLQISLTTEFSERRQIFLWI
+                     ASFASFAVKVPMVPVHIWLPEAHVEAPTAGSVILAGIPLKFGTHGFLRFSIPMFPEAT
+                     LCSTPFIYTLSAIAIIYTSLTTSRQIDLKKIIAYSSVAHMNLVTIGMFSPNIQGIGGS
+                     ILPMLSHGLVPSALFLCVGVLYDRHKTRLVRYYGGLVSTMPNLSTIFFSFTLANMSSP
+                     GTSSFIGEFLILVGAFQRNSLVATLAALGMILGAAYSLWLYNRVVSGNLKPDFLHKFS
+                     DSNGREVSIFIPFLVGLVRMGVHPKVFPDCMHTSVSNLVQHGKFH"
+     exon            161693..162153
+                     /gene="nad4"
+                     /number=1
+     misc_feature    161721
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161766
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161776
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161799
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161816
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161850
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161856
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161858
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    161889
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162009
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162054
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162068
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162094
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162095
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162110
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    162141
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     intron          162154..163624
+                     /gene="nad4"
+                     /number=1
+     exon            163625..164139
+                     /gene="nad4"
+                     /number=2
+     misc_feature    163771
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    163822
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    163930
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    163947
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    163999
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    164059
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     intron          164140..167220
+                     /gene="nad4"
+                     /number=2
+     exon            167221..167642
+                     /gene="nad4"
+                     /number=3
+     misc_feature    167254
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167277
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167345
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167373
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167416
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167599
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    167617
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     intron          167643..169584
+                     /gene="nad4"
+                     /number=3
+     exon            169585..169674
+                     /gene="nad4"
+                     /number=4
+     misc_feature    169591
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    169603
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     misc_feature    169619
+                     /gene="nad4"
+                     /note="C to U RNA editing"
+     CDS             169796..170737
+                     /note="orf313"
+                     /codon_start=1
+                     /protein_id="NP_085519.1"
+                     /db_xref="GI:13449337"
+                     /db_xref="UniProt/TrEMBL:P93314"
+                     /translation="MTIRNQRFSLLKQPISSTLNQHLVDYPTPSNLSYWWGFGPLAGT
+                     MILSVLSSPALVSGLMVARAKNLVHSVLFPIPIFFSINQLFHYFCRLPIIKHLATKCQ
+                     LLLFLISHFLLLLVLTKLVLDLGGYLFMDDLSRALSQFVPGFSGGLGGGSNTPPNPSG
+                     DFFLSSYQTSDPDYHDQRRGDSYFSSAPGVQETHRHASGSSTNLHLNLNDQSQDPIFL
+                     EVERLSLKCDKVKEKTILKTQSLLLERGYHIPDERDIERAINVVMTEHETIDIDRRRK
+                     RFYYLYSCLGKTGNKFWMELLETLADYNINIKSDSDN"
+     CDS             174205..174525
+                     /note="orf106c"
+                     /codon_start=1
+                     /protein_id="NP_085520.1"
+                     /db_xref="GI:13449338"
+                     /db_xref="UniProt/TrEMBL:P93315"
+                     /translation="MTPCFINGKCAEQLTSTTAYKLLFLPPLTEHTTSLYPKQYPDKQ
+                     LVLLLDPRSGTRLTYYSQPFFIALSSPYSGSSLSAKFHFAPHLYAFLVVASITRSDIA
+                     YSVN"
+     promoter        176194..176202
+                     /evidence=not_experimental
+     CDS             complement(176542..177027)
+                     /gene="nad1"
+                     /note="orf161"
+                     /codon_start=1
+                     /protein_id="NP_085521.1"
+                     /db_xref="GI:13449339"
+                     /db_xref="UniProt/TrEMBL:P93316"
+                     /db_xref="GeneID:814625"
+                     /translation="MLYGLRLYAFQEISFLDPWQLAAIFSGSCVLFISLEKRTLTGYM
+                     LTFILYSVLALFVSVWLSSAAGKAGIPIEGMVFLLFLIGGICFICLIQKIFQLTPNTV
+                     QALIPILFSALFFFLEELPALEGLPLLKWLKGLDLLLLLVGLLLLIFNENRQGGDGEG
+                     S"
+     repeat_unit     178863..183059
+                     /note="repeat I"
+     promoter        181074..181082
+                     /note="orf139b"
+                     /evidence=not_experimental
+     CDS             182278..182697
+                     /note="orf139b
+                     identical to orf139a"
+                     /codon_start=1
+                     /protein_id="NP_085522.1"
+                     /db_xref="GI:13449340"
+                     /db_xref="UniProt/TrEMBL:P94024"
+                     /translation="MIQRTRNQSIMLSLPSNQSANHAILTFQPIGQSRYLLTFQPTPS
+                     IPLLQQYIISVPYLDAYSSICFPVMARIRSAKYCFFFFLVLFLNGIIATRGKAMLPTL
+                     PQKGAAFFPPKMPVPPSGPSKQHNSAPRSDFVQFFYM"
+     CDS             183111..183443
+                     /note="orf110b"
+                     /codon_start=1
+                     /protein_id="NP_085523.1"
+                     /db_xref="GI:13449341"
+                     /db_xref="UniProt/TrEMBL:P93317"
+                     /translation="MPSPILPMLPISHLIGTEVRNLISVRTPNITMDQLKNGCCSILT
+                     QLETLLRSQSPSEMTIFQTLCDRCCGAEVANEATVECGKTMETTNLTSGGRYWPFHNG
+                     TNLSRISL"
+     gene            complement(188084..188662)
+                     /gene="orf25"
+                     /db_xref="GeneID:814597"
+     CDS             complement(188084..188662)
+                     /gene="orf25"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /protein_id="NP_085524.1"
+                     /db_xref="GI:13449342"
+                     /db_xref="UniProt/Swiss-Prot:Q04613"
+                     /db_xref="GeneID:814597"
+                     /translation="MRLSITNMDGRKMLFAAILSICALSSKKISIYNEEMIVALCFIG
+                     FIIFSRKSLGTTFKVTLDGSLQAIQEESQQFPNPNEVVPPESNEQQRLLRISLRICGT
+                     VVESLPMARCAPKCEKTVQALLCRNLNVKSATLTNATSSRRIRFQDDLVTKFYTLVGK
+                     QFAYSCISKAERVEFIRESLVVLRMVRGGVFS"
+     misc_feature    188247
+                     /note="C to U RNA editing"
+     misc_feature    188268
+                     /note="C to U RNA editing"
+     misc_feature    188412
+                     /note="C to U RNA editing"
+     misc_feature    188413
+                     /note="C to U RNA editing"
+     misc_feature    188415
+                     /note="C to U RNA editing"
+     misc_feature    188448
+                     /note="C to U RNA editing"
+     misc_feature    188525
+                     /note="C to U RNA editing"
+     misc_feature    188574
+                     /note="C to U RNA editing"
+     gene            complement(188929..189231)
+                     /gene="nad4L"
+                     /db_xref="GeneID:814598"
+     CDS             complement(188929..189231)
+                     /gene="nad4L"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 4L"
+                     /protein_id="NP_085525.1"
+                     /db_xref="GI:13449343"
+                     /db_xref="UniProt/Swiss-Prot:Q04614"
+                     /db_xref="GeneID:814598"
+                     /translation="MDLIKYFTFSMIISILGIRGILLNRRNIPIMSMPIESMLLAVNS
+                     NFLVFSVSSDDMMGQVFASLVPTVAAAESAIGLAIFVITFRVRGTIAVEFINSIQG"
+     misc_feature    189035
+                     /note="C to U RNA editing"
+     misc_feature    189044
+                     /note="C to U RNA editing"
+     misc_feature    189074
+                     /note="C to U RNA editing"
+     misc_feature    189122
+                     /note="C to U RNA editing"
+     misc_feature    189132
+                     /note="C to U RNA editing"
+     misc_feature    189137
+                     /note="C to U RNA editing"
+     misc_feature    189146
+                     /note="C to U RNA editing"
+     misc_feature    189177
+                     /note="C to U RNA editing"
+     misc_feature    189191
+                     /note="C to U RNA editing"
+     CDS             complement(190104..190553)
+                     /gene="nad1"
+                     /note="orf149"
+                     /codon_start=1
+                     /protein_id="NP_085526.1"
+                     /db_xref="GI:13449344"
+                     /db_xref="UniProt/TrEMBL:P93318"
+                     /db_xref="GeneID:814625"
+                     /translation="MRPFGEAKFLSGHQTLQLMRKKALWSKGKRVRCHTPCLPKVPRG
+                     RARRSGATTREQSPHRQGDRRRPSQGTSRPTGKTGETREGNPIGSQRIHSTCSPTDFI
+                     FLILESGGKGSLFCNEKKKRSRFDSAQPNDTSNTNDLCLECVARSLF"
+     exon            complement(190740..190761)
+                     /gene="nad1"
+                     /number=3
+     CDS             complement(191055..191882)
+                     /gene="nad1"
+                     /note="orf275"
+                     /codon_start=1
+                     /protein_id="NP_085527.1"
+                     /db_xref="GI:13449345"
+                     /db_xref="UniProt/TrEMBL:P93319"
+                     /db_xref="GeneID:814625"
+                     /translation="MKKYKMVINIDMLRLFLPLLGGSVSGSLFGRFLGSEGSAIMITT
+                     CVSFCALVVFIFGLFYFRKKGPLKRILYLFLVGFVLSLIRIKVVYLLGGQALPLLDPI
+                     LMYAVGAGALLGPNGAESSATWEEDSFELDVLGESFSSSKTDMDSQVAEAPQTEEGEP
+                     SVNQVPQEAGASHRVGPYQDQGLATDRNGNPIDLNDSLPPSSLLYGEIESSASVRARD
+                     LELEKDIKRVQRLTRNFDNAEDPARRLEVAARLDPEVRELDQKWALFQEKDASGLGR"
+     intron          complement(200001..<287916)
+                     /gene="nad1"
+                     /note="transpliced intron"
+                     /number=3
+     CDS             201729..202097
+                     /note="orf122c"
+                     /codon_start=1
+                     /protein_id="NP_085528.1"
+                     /db_xref="GI:13449346"
+                     /db_xref="UniProt/TrEMBL:P92510"
+                     /translation="MKHASFCLSSRILLLAPCRYLGTLLLLLPYPCSTLRQFLFLLRS
+                     LFIRDVEWIPAGLSHHIPYFPLASPPLTVETLLIARLLLSIKQLSLPPAKTASLSASL
+                     DAKTKGRSLLSSCSYCYMPT"
+     CDS             complement(203588..204310)
+                     /gene="nad1"
+                     /note="orf240a"
+                     /codon_start=1
+                     /protein_id="NP_085529.1"
+                     /db_xref="GI:13449347"
+                     /db_xref="UniProt/TrEMBL:P92511"
+                     /db_xref="GeneID:814625"
+                     /translation="MRSSVLRSLRGRLVINLESTRKLRLSRTNIVPGRKKGQKSIKSK
+                     NMARKGNPILVRLGKNRSSDSSRFSAEALLGCLYFFIYFVAPTLGPVLFLLRLIHFVW
+                     GLRLGLGNENFHFGVGPDGGATGLDLNQPPQEQQPTLGVNRAALDLNELPPVHLLYAE
+                     VEGPQSTKAQNDVMLAHLNQVQNLTRDLQTEPNIWRRQALIDILDWEVRSLQRHFRIF
+                     RQRDRLREVQRSWLREQLNRYR"
+     misc_feature    204112
+                     /note="C to U RNA editing"
+     gene            complement(204340..204412)
+                     /gene="tRNA-Lys"
+                     /db_xref="GeneID:814600"
+     tRNA            complement(204340..204412)
+                     /gene="tRNA-Lys"
+                     /product="tRNA-Lys"
+                     /db_xref="GeneID:814600"
+     CDS             complement(207553..207915)
+                     /gene="nad1"
+                     /note="orf120"
+                     /codon_start=1
+                     /protein_id="NP_085530.1"
+                     /db_xref="GI:13449348"
+                     /db_xref="UniProt/TrEMBL:P92512"
+                     /db_xref="GeneID:814625"
+                     /translation="MNRTIIEKVRSMLCECGLPKTFRADAANTAVHIINKYPSTAINF
+                     HVPDEVWFQSVPTYSYLRRFGCVAYIHCDEGKLKPRAKKGEEKGSYLINRIVSILYTI
+                     GIGKTSSPRKASHLGIKG"
+     CDS             209499..209822
+                     /note="orf107d"
+                     /codon_start=1
+                     /protein_id="NP_085531.1"
+                     /db_xref="GI:13449349"
+                     /db_xref="UniProt/TrEMBL:P92513"
+                     /translation="MEDFGFTTALRSESVYIVFRFISSSSLSYNTLSSAFIRLDWIKT
+                     PRHRAASRALGIQSNGQICTACPHLSALFGFLSVASCFPSNKDDGLHTRIYFDLEFQP
+                     DSAVF"
+     gene            217631..219077
+                     /gene="cox3"
+                     /db_xref="GeneID:814601"
+     promoter        217631..217639
+                     /gene="cox3"
+                     /evidence=not_experimental
+     CDS             218280..219077
+                     /gene="cox3"
+                     /codon_start=1
+                     /evidence=not_experimental
+                     /exception="RNA editing"
+                     /product="cytochrome c oxidase subunit 3"
+                     /protein_id="NP_085532.2"
+                     /db_xref="GI:26556997"
+                     /db_xref="UniProt/Swiss-Prot:P92514"
+                     /db_xref="GeneID:814601"
+                     /translation="MIESQRHSYHLVDPSPWPISGSLGALATTVGGVMYMHPFQGGAR
+                     LLSLGLIFILYTMFVWWRDVLRESTLEGHHTKVVQLGPRYGSILFIVSEVMFFFAFFW
+                     ASSHSSLAPAVEIGGIWPPKGIEVLDPWEIPFLNTPILPSSGAAVTWAHHAILAGKEK
+                     RAVYALVATVLLALVFTGFQGMEYYQAPFTISDSIYGSTFFLATGFHGFHVIIGTLFL
+                     IICGIRQYLGHLTKEHHVGFEAAAWYWHFVDVVWLFLFVSIYWWGGI"
+     misc_feature    218391
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218524
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218536
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218590
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218593
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218692
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218701
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    218882
+                     /gene="cox3"
+                     /note="C to U RNA editing"
+     misc_feature    219126
+                     /note="C to U RNA editing"
+     misc_feature    219159
+                     /note="C to U RNA editing"
+     misc_feature    219207
+                     /note="C to U RNA editing"
+     misc_feature    219244
+                     /note="C to U RNA editing"
+     misc_feature    219263
+                     /note="C to U RNA editing"
+     CDS             complement(220471..220773)
+                     /gene="nad1"
+                     /note="orf100a"
+                     /codon_start=1
+                     /protein_id="NP_085533.1"
+                     /db_xref="GI:13449351"
+                     /db_xref="UniProt/TrEMBL:P92515"
+                     /db_xref="GeneID:814625"
+                     /translation="MLHISQEFLLPISMEHWRLLITSQVIKKIGIPWKVIFHAFQNHN
+                     RGILFLQIFEHIIDFLFGEAIIYRYVLLDPKSRQVIRDSILNSFLVLYNQIKLLEK"
+     CDS             220830..221189
+                     /note="orf119"
+                     /codon_start=1
+                     /protein_id="NP_085534.1"
+                     /db_xref="GI:13449352"
+                     /db_xref="UniProt/TrEMBL:P92516"
+                     /translation="MHFKRGSEGHSLPLPCMHRSMQDISQHLKQWPRFVLQAGFYWPT
+                     TFKDAHGFVSSCDACQRKGNFTKRNEMPQHFILEVEVFDVWGIYFMKKTIFSWKPIHP
+                     NGGRLCLKMGGSSCEPH"
+     CDS             complement(221700..222029)
+                     /gene="nad1"
+                     /note="orf109b"
+                     /codon_start=1
+                     /protein_id="NP_085535.1"
+                     /db_xref="GI:13449353"
+                     /db_xref="UniProt/TrEMBL:P92517"
+                     /db_xref="GeneID:814625"
+                     /translation="MLRSGKELEEVVRDDKEEEQVVVRKAKQIVNFPLLGMLSSARYG
+                     LRRWPFTVKCSPLTSQSTTAPYGFTSFMRKGKKHLDLNFPGNNLSLELRSSTWSFTLN
+                     SLGKIFL"
+     CDS             complement(222687..222989)
+                     /gene="nad1"
+                     /note="orf100b"
+                     /codon_start=1
+                     /protein_id="NP_085536.1"
+                     /db_xref="GI:13449354"
+                     /db_xref="UniProt/TrEMBL:P92518"
+                     /db_xref="GeneID:814625"
+                     /translation="MGLSTHCQLVFSWKPMNTHMVSRAMEGSLKSYPFPSNINITSCV
+                     SIKLNDRNYLLYDEGRVRLSLLNLGERCDFVPFLSVELFKPLQSSGVYEKLHESNN"
+     repeat_unit     227087..227619
+                     /note="repeat II"
+     gene            227109..227196
+                     /gene="tRNA-Ser"
+                     /db_xref="GeneID:814602"
+     tRNA            227109..227196
+                     /gene="tRNA-Ser"
+                     /product="tRNA-Ser"
+                     /db_xref="GeneID:814602"
+     gene            227502..227577
+                     /gene="tRNA-Tyr"
+                     /db_xref="GeneID:814603"
+     tRNA            227502..227577
+                     /gene="tRNA-Tyr"
+                     /product="tRNA-Tyr"
+                     /db_xref="GeneID:814603"
+     gene            227615..227686
+                     /gene="tRNA-Glu"
+                     /db_xref="GeneID:814604"
+     tRNA            227615..227686
+                     /gene="tRNA-Glu"
+                     /product="tRNA-Glu"
+                     /db_xref="GeneID:814604"
+     CDS             complement(227709..228431)
+                     /gene="nad1"
+                     /note="orf240b"
+                     /codon_start=1
+                     /protein_id="NP_085537.1"
+                     /db_xref="GI:13449355"
+                     /db_xref="UniProt/TrEMBL:P92519"
+                     /db_xref="GeneID:814625"
+                     /translation="MYLLLYVDDILLTGSSNTLLNMLIFQLSSTFSMKDLGPVHYFLG
+                     IQIKTHPSGLFLSQTKYAEQILNNAGMLDCKPMSTPLPLKLNSSVSTAKYPDPSDFRS
+                     IVGALQYLTLTRPDISYAVNIVCQRMHEPTLADFDLLKRVLRYVKGTIFHGLYIHKNS
+                     KLNVQAFCDSDWAGCTSTRRSTTGFCTFLGCNIISWSAKRQPTVSRSSTETEYRALAL
+                     TAAELTWSSASRSRDPSAMNTN"
+     CDS             complement(228573..229085)
+                     /gene="nad1"
+                     /note="orf170"
+                     /codon_start=1
+                     /protein_id="NP_085538.1"
+                     /db_xref="GI:13449356"
+                     /db_xref="UniProt/TrEMBL:P92520"
+                     /db_xref="GeneID:814625"
+                     /translation="MLTRSKAGINKLNPKYSLTITTTIKKEPKSVIFALKDPGWCQAM
+                     QEELDALSRNKTWILVPPPVNQNILGCKWVFKTKLHSDGTLDRLKARLVAKGFHQEEG
+                     IYFVETYSPVVRTATIRTILNVAQQLEVGQSINWMFKMHFSMGIFKKKFICINLLVLR
+                     ILFIHPMCVC"
+     gene            complement(231894..233042)
+                     /gene="ccb382"
+                     /db_xref="GeneID:814605"
+     CDS             complement(231894..233042)
+                     /gene="ccb382"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c biogenesis orf382"
+                     /protein_id="NP_085539.2"
+                     /db_xref="GI:26556998"
+                     /db_xref="UniProt/TrEMBL:Q9T6H8"
+                     /db_xref="GeneID:814605"
+                     /translation="MSISIYEFFHYSLFPGLFVAFTYNKKQPPAFGAAPAFWCILLSF
+                     LGLSFCHIPNNLSNYNVLTANAPFFYQISGTWSNHEGSILLWCRIPNFYGFFLCYRGR
+                     PQSHNVLKQGGHRESLFFFFVSNFVKNSILSLPRYEQESGLKNQLYTPFVLRTLVDSE
+                     LRSRRNRTFDGPALFYAPLYPERKIKNPLDAWRSRGSREGKRTHPLLHLARDDKERAS
+                     SIDEQRIDGALGIALFFSPFLSASSDPFVRNFFVCTEPLAESNPVPQDPISAIHPPCI
+                     YAGDVASAEGFGLCRSKMMNGIVALHSPPMRKDAAEKNGTLLRSAGCVGSRITSELFT
+                     LKFKHVGAKCYPALLLRSNRSPLMLLRRRFFAFSSFWAGARSHSTKRY"
+     misc_feature    232088
+                     /note="C to U RNA editing"
+     misc_feature    232237
+                     /note="C to U RNA editing"
+     misc_feature    232252
+                     /note="C to U RNA editing"
+     misc_feature    232264
+                     /note="C to U RNA editing"
+     misc_feature    232324
+                     /note="C to U RNA editing"
+     misc_feature    232333
+                     /note="C to U RNA editing"
+     misc_feature    232334
+                     /note="C to U RNA editing"
+     misc_feature    232462
+                     /note="C to U RNA editing"
+     misc_feature    232464
+                     /note="C to U RNA editing"
+     misc_feature    232559
+                     /note="C to U RNA editing"
+     misc_feature    232639
+                     /note="C to U RNA editing"
+     misc_feature    232665
+                     /note="C to U RNA editing"
+     misc_feature    232672
+                     /note="C to U RNA editing"
+     misc_feature    232703
+                     /note="C to U RNA editing"
+     misc_feature    232754
+                     /note="C to U RNA editing"
+     misc_feature    232774
+                     /note="C to U RNA editing"
+     misc_feature    232781
+                     /note="C to U RNA editing"
+     misc_feature    232843
+                     /note="C to U RNA editing"
+     misc_feature    232886
+                     /note="C to U RNA editing"
+     misc_feature    232900
+                     /note="C to U RNA editing"
+     misc_feature    232939
+                     /note="C to U RNA editing"
+     misc_feature    232999
+                     /note="C to U RNA editing"
+     CDS             complement(234538..234903)
+                     /gene="nad1"
+                     /note="orf121b"
+                     /codon_start=1
+                     /protein_id="NP_085540.1"
+                     /db_xref="GI:13449358"
+                     /db_xref="UniProt/TrEMBL:P92521"
+                     /db_xref="GeneID:814625"
+                     /translation="MKLLVAPESIKTMTSCPAICPLILMDLSQLLNLPSTAKTTRVIP
+                     LLHPHLILLSFSLFQLHQPHPTHLLHPQPYTLILCPYICDQDSTCHHNENKAPCSFYP
+                     DQTVSGMVPFYVLGIGRAR"
+     CDS             235338..235661
+                     /note="orf107e"
+                     /codon_start=1
+                     /protein_id="NP_085541.1"
+                     /db_xref="GI:13449359"
+                     /db_xref="UniProt/TrEMBL:P92522"
+                     /translation="MKTCLRCRKDYPQNVAGNMLSTLRKEQNQLTSVLTGIHILRRTR
+                     LKNWLGEMLEARIIQPSISPYSSPVLLVQKKDGGWPTARGLPSLLQAHGTRQVPNSRD
+                     RGTVG"
+     CDS             235916..236392
+                     /note="orf158"
+                     /codon_start=1
+                     /protein_id="NP_085542.1"
+                     /db_xref="GI:13449360"
+                     /db_xref="UniProt/TrEMBL:P92523"
+                     /translation="MNHLGMVLQIWEQHQFYANRKKCAFGQPQIAYLGHRHIISGEGV
+                     SADPAKLEAMVGWPEPKNTTELRGFLGLTGYYRRFVKNYGKIVRPLTELLKKNSLKWT
+                     EMAALAFKALKGAVTTLPVLALPDLKLPFVTRVGKWNWSCFITREQACCVSQPRVF"
+     CDS             complement(236927..237481)
+                     /gene="nad1"
+                     /note="orf184"
+                     /codon_start=1
+                     /protein_id="NP_085543.1"
+                     /db_xref="GI:13449361"
+                     /db_xref="UniProt/TrEMBL:P92525"
+                     /db_xref="GeneID:814625"
+                     /translation="MVSSSRIARSFTRYTSALQRHTIVTKIKQKFPCPRSRTQGQSRR
+                     SETHTISRRRSCRAIARSNLGRVSSVTLPWFSCPSPAVVALAKKPFRTARSSCPRSRK
+                     SSRCRKSLLSWTAHHLLDEFPKRRIWVFLSFRIGGSLHRRHAMEKNICSTRARIRLIS
+                     AISLLSKPIVEIVVGYPQSEDRRL"
+     CDS             237053..237616
+                     /note="orf187"
+                     /codon_start=1
+                     /protein_id="NP_085544.1"
+                     /db_xref="GI:13449362"
+                     /db_xref="UniProt/TrEMBL:P92524"
+                     /translation="MTTMKRSADPEAEEDPDPSLGKFIEQVVSGPTQKGFPASGAFAT
+                     PRARRSRGPKRFLGKRNYRRARARKPGKRDRAHSSKVRSSDGSTRPSARYGMRFRSPT
+                     LPLCSRPRTWEFLLDLGYDSMSLKSARVASERPCDPGRANHVWEPGGLSPEQAQWDWV
+                     GKVYSRDRVLKLMGELHELVYRESGRR"
+     CDS             complement(239636..239956)
+                     /gene="nad1"
+                     /note="orf106d"
+                     /codon_start=1
+                     /protein_id="NP_085545.1"
+                     /db_xref="GI:13449363"
+                     /db_xref="UniProt/TrEMBL:P92526"
+                     /db_xref="GeneID:814625"
+                     /translation="MHLERSVQSQLTESKEIARPYSLWGISLAQHSFKTSTRSTGKKR
+                     SKGSTSQDGKKQESLESRNDLGPTIVGLIRKILSYSSKKEFSNLTGLESGGSSPPFSL
+                     AVSK"
+     gene            complement(239988..240758)
+                     /gene="ccb256"
+                     /db_xref="GeneID:814606"
+     CDS             complement(239988..240758)
+                     /gene="ccb256"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c biogenesis orf256"
+                     /protein_id="NP_085546.2"
+                     /db_xref="GI:26556999"
+                     /db_xref="UniProt/TrEMBL:P92527"
+                     /db_xref="GeneID:814606"
+                     /translation="MSVSLLQPSFLMSKTRSYAQILIGSWLFLTAMAIHLSLGVAPLD
+                     LQQGGNSRILYVHVPAARMSIIVYIATAINTFLFLLTKHPLYLRSSGTGIEMGAFFTL
+                     FTLVTGGFRGRPMWGTFWVWDARLTSVFISFLIYLGALRFQKLPVEPASISIRAGPID
+                     IPIIKSSVNWWNTSHQPGSISRSGTSIHVPMPIPILSNFANFPFSTRILFVLETRLPI
+                     PSFLESPITEEIEAREGIPKPSSLALFASMAEWLKRPT"
+     misc_feature    240086
+                     /note="C to U RNA editing"
+     misc_feature    240103
+                     /note="C to U RNA editing"
+     misc_feature    240109
+                     /note="C to U RNA editing"
+     misc_feature    240135
+                     /note="C to U RNA editing"
+     misc_feature    240140
+                     /note="C to U RNA editing"
+     misc_feature    240141
+                     /note="C to U RNA editing"
+     misc_feature    240145
+                     /note="C to U RNA editing"
+     misc_feature    240151
+                     /note="C to U RNA editing"
+     misc_feature    240184
+                     /note="C to U RNA editing"
+     misc_feature    240191
+                     /note="C to U RNA editing"
+     misc_feature    240211
+                     /note="C to U RNA editing"
+     misc_feature    240238
+                     /note="C to U RNA editing"
+     misc_feature    240262
+                     /note="C to U RNA editing"
+     misc_feature    240282
+                     /note="C to U RNA editing"
+     misc_feature    240286
+                     /note="C to U RNA editing"
+     misc_feature    240292
+                     /note="C to U RNA editing"
+     misc_feature    240296
+                     /note="C to U RNA editing"
+     misc_feature    240301
+                     /note="C to U RNA editing"
+     misc_feature    240313
+                     /note="C to U RNA editing"
+     misc_feature    240338
+                     /note="C to U RNA editing"
+     misc_feature    240359
+                     /note="C to U RNA editing"
+     misc_feature    240364
+                     /note="C to U RNA editing"
+     misc_feature    240428
+                     /note="C to U RNA editing"
+     misc_feature    240497
+                     /note="C to U RNA editing"
+     misc_feature    240575
+                     /note="C to U RNA editing"
+     misc_feature    240580
+                     /note="C to U RNA editing"
+     misc_feature    240626
+                     /note="C to U RNA editing"
+     misc_feature    240656
+                     /note="C to U RNA editing"
+     CDS             complement(241306..241953)
+                     /gene="nad1"
+                     /note="orf215a"
+                     /codon_start=1
+                     /protein_id="NP_085547.1"
+                     /db_xref="GI:13449365"
+                     /db_xref="UniProt/TrEMBL:P92528"
+                     /db_xref="GeneID:814625"
+                     /translation="MPTANQLIRHGREEKRRTDRTEVLVFGLLVTRIIRFVHSVLFPI
+                     PVFCSIKVLLDYFCSLPIIDKLSKKWQLIWFYVLSVILCKSLFAVGYLWMDDLSRAIS
+                     QFYPVVSGGLGGGNTPMPPTNPSEGGLLEGYYAHENEHSHDQQRGSPFWSKEYKESGS
+                     KRLFLNLEVEDQNTDTIGEQVKAESGKCEKIKAKIIAKTHELLVSEDTKFQIKTI"
+     CDS             complement(249272..249919)
+                     /gene="nad1"
+                     /note="orf215b"
+                     /codon_start=1
+                     /protein_id="NP_085548.1"
+                     /db_xref="GI:13449366"
+                     /db_xref="UniProt/TrEMBL:P92529"
+                     /db_xref="GeneID:814625"
+                     /translation="MKMKSPLFRGPLVNSSTENPIHILTREKLRHQTSGTNSTEKVIL
+                     PNILHSYIKNLNLDFTSPYGSTGNNEVLSILRSRFKQSIFPSSGLKCLDTTGDFLIKN
+                     VLHKRYESVQKNISNALSSSINSRTAVFFCILFSITVLMEIAPGPLLKKPSLLFSDNL
+                     PNVLQYTRDVYVNHVCIIHKSLSPCECEEPLNRIIRDMFPQTTFDPLELQKPSPQ"
+     gene            complement(250080..250153)
+                     /gene="tRNA-Trp"
+                     /db_xref="GeneID:814607"
+     tRNA            complement(250080..250153)
+                     /gene="tRNA-Trp"
+                     /product="tRNA-Trp"
+                     /db_xref="GeneID:814607"
+     CDS             complement(251403..251897)
+                     /gene="nad1"
+                     /note="orf164"
+                     /codon_start=1
+                     /protein_id="NP_085549.1"
+                     /db_xref="GI:13449367"
+                     /db_xref="UniProt/TrEMBL:P92530"
+                     /db_xref="GeneID:814625"
+                     /translation="MRKSADEMFIGVRRAPISSNVGGTSFYGGDEYCSYYQSNGGVAK
+                     EDDGSAKKGFRRTGKGKLTAEAVSEAINRAAQGLPFEVVYYPTAGWSDFVVKAEDVEA
+                     SMAIFWTPGTRVKMAMETEDSSRITWFQGIVFYTYQETGPWRGSPWNSFRYKIPFTIP
+                     LIFL"
+     gene            complement(254505..254571)
+                     /gene="tRNA-Gln"
+                     /db_xref="GeneID:814608"
+     tRNA            complement(254505..254571)
+                     /gene="tRNA-Gln"
+                     /product="tRNA-Gln"
+                     /db_xref="GeneID:814608"
+     gene            complement(256865..257476)
+                     /gene="ccb203"
+                     /db_xref="GeneID:814609"
+     CDS             complement(256865..>257476)
+                     /gene="ccb203"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="cytochrome c biogenesis orf203"
+                     /protein_id="NP_085550.2"
+                     /db_xref="GI:26557000"
+                     /db_xref="UniProt/TrEMBL:Q9T6H7"
+                     /db_xref="GeneID:814609"
+                     /translation="VDTGREQAKRVVRNGKKETTTSPLCWTAGANTVVSDQDQEPIRI
+                     WILTCWWFLTVGILPGSWWAYHELGRGGWWFRDPVENASFMPRVLATARIHSVILPLL
+                     HSWTSFLNIVTFPCCVSGTFSIRSGLLAPVHSFATDDTRGIFLWWFFLLMTGISMILF
+                     YQMKQQASVRRTYKKEMVVARSTLVHLRHSARAQPRPVMLWKN"
+     misc_feature    257010
+                     /note="C to U RNA editing"
+     misc_feature    257086
+                     /note="C to U RNA editing"
+     misc_feature    257121
+                     /note="C to U RNA editing"
+     misc_feature    257133
+                     /note="C to U RNA editing"
+     misc_feature    257157
+                     /note="C to U RNA editing"
+     misc_feature    257176
+                     /note="C to U RNA editing"
+     misc_feature    257200
+                     /note="C to U RNA editing"
+     misc_feature    257218
+                     /note="C to U RNA editing"
+     misc_feature    257251
+                     /note="C to U RNA editing"
+     misc_feature    257269
+                     /note="C to U RNA editing"
+     misc_feature    257301
+                     /note="C to U RNA editing"
+     misc_feature    257412
+                     /note="C to U RNA editing"
+     CDS             complement(258045..258398)
+                     /gene="nad1"
+                     /note="orf117"
+                     /codon_start=1
+                     /protein_id="NP_085551.1"
+                     /db_xref="GI:13449369"
+                     /db_xref="UniProt/TrEMBL:P92531"
+                     /db_xref="GeneID:814625"
+                     /translation="MVASDSRPMRLRLRAELFLASFAVREESIRSKKEWTYISKYIKG
+                     ILKSRLSRREQSRWNIIDDTTSMAFFEEFASLNPVFHTFLFYGRRDGEDLSFHIVGFF
+                     RLSIRGYIFFLWESF"
+     gene            complement(260224..260601)
+                     /gene="rpsl2"
+                     /db_xref="GeneID:814610"
+     CDS             complement(260224..260601)
+                     /gene="rpsl2"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ribosomal protein L2"
+                     /protein_id="NP_085552.2"
+                     /db_xref="GI:26557001"
+                     /db_xref="UniProt/Swiss-Prot:P92532"
+                     /db_xref="GeneID:814610"
+                     /translation="MPTFNQLIRHGREEKRRTDRTRALDKCPQKTGVCPRVSTRTPKK
+                     PNSAPRKIAKVRLSNRHDIFAHIPGEGHNSQEHSTVLIRGGRVKDSPGVKSHCIRGVK
+                     DLMGIPGRRRGRSKYGAEKPKSI"
+     misc_feature    260317
+                     /note="C to U RNA editing"
+     misc_feature    260318
+                     /note="C to U RNA editing"
+     misc_feature    260333
+                     /note="C to U RNA editing"
+     misc_feature    260381
+                     /note="C to U RNA editing"
+     misc_feature    260406
+                     /note="C to U RNA editing"
+     misc_feature    260456
+                     /note="C to U RNA editing"
+     misc_feature    260498
+                     /note="C to U RNA editing"
+     misc_feature    260518
+                     /note="C to U RNA editing"
+     gene            complement(260647..261006)
+                     /gene="nad3"
+                     /db_xref="GeneID:814611"
+     CDS             complement(260647..261006)
+                     /gene="nad3"
+                     /EC_number="1.6.99.3"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="NADH dehydrogenase subunit 3"
+                     /protein_id="NP_085553.2"
+                     /db_xref="GI:26557002"
+                     /db_xref="UniProt/Swiss-Prot:P92533"
+                     /db_xref="GeneID:814611"
+                     /translation="MMSEFAPISIYLVISLLVSLILLGVPFPFASNSSTYPEKLSAYE
+                     CGFDPSGDARSRFDIRFYLVSILFLIPDLEVTFFFPWAVPPNKIDLFGFWSMMAFLFI
+                     LTIGFLYEWKRGASDRE"
+     misc_feature    260655
+                     /note="C to U RNA editing"
+     misc_feature    260660
+                     /note="C to U RNA editing"
+     misc_feature    260753
+                     /note="C to U RNA editing"
+     misc_feature    260757
+                     /note="C to U RNA editing"
+     misc_feature    260795
+                     /note="C to U RNA editing"
+     misc_feature    260796
+                     /note="C to U RNA editing"
+     misc_feature    260858
+                     /note="C to U RNA editing"
+     misc_feature    260924
+                     /note="C to U RNA editing"
+     misc_feature    260938
+                     /note="C to U RNA editing"
+     misc_feature    260943
+                     /note="C to U RNA editing"
+     gene            complement(260963..261307)
+                     /gene="orf114"
+                     /db_xref="GeneID:814623"
+     CDS             complement(260963..261307)
+                     /gene="orf114"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /protein_id="NP_085554.1"
+                     /db_xref="GI:13449372"
+                     /db_xref="UniProt/TrEMBL:P92534"
+                     /db_xref="GeneID:814623"
+                     /translation="MTQPAIGWRVGLGPSIIRGPLVGKSPWSVFMIYGRTSKKPGPSR
+                     TSFLVYKRKYSSRKAALGGTLSHKVCKPFGMGFCFFLYFSICRFFASKERENKVGCND
+                     VRICTNFYLFSD"
+     misc_feature    260981
+                     /note="C to U RNA editing"
+     misc_feature    260999
+                     /note="C to U RNA editing"
+     gene            complement(261105..261461)
+                     /gene="orf118"
+                     /db_xref="GeneID:814624"
+     CDS             complement(261105..261461)
+                     /gene="orf118"
+                     /codon_start=1
+                     /protein_id="NP_085555.1"
+                     /db_xref="GI:13449373"
+                     /db_xref="UniProt/TrEMBL:P92535"
+                     /db_xref="GeneID:814624"
+                     /translation="MIGGDSVEAIERRLLAKYPEGSPSAEIIEMARIEAEDLFEIKAQ
+                     IIQRMALYDPTGDWMARGARALDNPRTTSGEESLERLYDIWKDLQETGPLSDEFSRLQ
+                     EKVFLKKGGPGGDPIA"
+     CDS             complement(261617..262078)
+                     /gene="nad1"
+                     /note="orf153b"
+                     /codon_start=1
+                     /protein_id="NP_085556.1"
+                     /db_xref="GI:13449374"
+                     /db_xref="UniProt/TrEMBL:P92536"
+                     /db_xref="GeneID:814625"
+                     /translation="MKNMVRLLLPLLGALAGSFCARFLGSEGSAIMTTTRVSFSSILV
+                     VSFLFCFHFHSFRLKGPQKKIIYLFLVFSMGFVGSLIRIEVIHLVGGLALPVLGPLVL
+                     NAIGGQALPSTGPSGSGSSSMWEEDSFELGVLEESDSPPAGGPERKRGNPR"
+     CDS             complement(264113..264433)
+                     /gene="nad1"
+                     /note="orf106e"
+                     /codon_start=1
+                     /protein_id="NP_085557.1"
+                     /db_xref="GI:13449375"
+                     /db_xref="UniProt/TrEMBL:P92537"
+                     /db_xref="GeneID:814625"
+                     /translation="MQQVFRREGINLYYYSNKTKKFSLDSWYLPQLHLLESKGNKKSK
+                     AATDQYFIHPSRTRQERDLTDRKHRPEQQQLQRRVTRWKKEVTTRSRPKETSSTHLPY
+                     HGSY"
+     CDS             complement(270238..270561)
+                     /gene="nad1"
+                     /note="orf107f"
+                     /codon_start=1
+                     /protein_id="NP_085558.1"
+                     /db_xref="GI:13449376"
+                     /db_xref="UniProt/TrEMBL:P92538"
+                     /db_xref="GeneID:814625"
+                     /translation="MTERNASGRMNTKGRSIKETKKAMNEEVGPFTLFLVTLGADLIN
+                     ALQREGRLGLSHLGMAEHKRFENQVKIRKGREKTERKAVAPVRAREIKNKDSCFPHTH
+                     IGCEK"
+     CDS             complement(272944..273423)
+                     /gene="nad1"
+                     /note="orf159"
+                     /codon_start=1
+                     /protein_id="NP_085559.1"
+                     /db_xref="GI:13449377"
+                     /db_xref="UniProt/TrEMBL:P92539"
+                     /db_xref="GeneID:814625"
+                     /translation="MAFNLFTTFTERLRLVSWRSVFEDTRSLRRFLIRIALVATGLVS
+                     KESAIVCHVLAGKVLRMYKTSRDPCLRIIVSLPLLSIYFRPDSIAKSNRKQRPWKEKI
+                     FHPDSRKMSCHCEPLPYQTRFDRQAPLQEYSIYPKSRKRLELDPQQNSTVVVQHDKF"
+     gene            274899..275222
+                     /gene="orf107g"
+                     /db_xref="GeneID:814599"
+     CDS             274899..275222
+                     /gene="orf107g"
+                     /codon_start=1
+                     /product="ORF107g"
+                     /protein_id="NP_085560.1"
+                     /db_xref="GI:13449378"
+                     /db_xref="UniProt/TrEMBL:P92540"
+                     /db_xref="GeneID:814599"
+                     /translation="MEFFQLSRKIGCNITQSPIYLFLARKTNRCLCPAHTDFILAVVG
+                     PTTLLLSFEISYEKKVDKKNNFKTGIVVQSVRAPPCQGGSCGFEPRQSRPSHNCVLRP
+                     GLATK"
+     gene            275103..275176
+                     /gene="tRNA-Asp"
+                     /db_xref="GeneID:814612"
+     tRNA            275103..275176
+                     /gene="tRNA-Asp"
+                     /product="tRNA-Asp"
+                     /db_xref="GeneID:814612"
+     gene            278649..279152
+                     /gene="atp9"
+                     /db_xref="GeneID:814613"
+     promoter        278649..278656
+                     /gene="atp9"
+                     /evidence=not_experimental
+     CDS             278895..279152
+                     /gene="atp9"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ATPase subunit 9"
+                     /protein_id="NP_085561.2"
+                     /db_xref="GI:26557003"
+                     /db_xref="GeneID:814613"
+                     /translation="MTKREYNSQPEMLEGAKSIGAGAATIASAGAAIGIGNVFSSLIH
+                     SVARNPSLAKQSFGYAILGFALTEAIALFAPMMAFLILFVF"
+     misc_feature    278947
+                     /gene="atp9"
+                     /note="C to U RNA editing"
+     misc_feature    278977
+                     /gene="atp9"
+                     /note="C to U RNA editing"
+     misc_feature    279061
+                     /gene="atp9"
+                     /note="C to U RNA editing"
+     misc_feature    279118
+                     /gene="atp9"
+                     /note="C to U RNA editing"
+     CDS             279463..280251
+                     /note="orf262"
+                     /codon_start=1
+                     /protein_id="NP_085562.1"
+                     /db_xref="GI:13449380"
+                     /db_xref="UniProt/TrEMBL:P92541"
+                     /translation="MYLLIVFLSMLSSSVAGFFGRFLGSESVSRFNLIIFLILLVFSI
+                     CLFRSLKQYLGKRMTQWCYLALVCQISLFLVLLRSHILAGFGTFSADVFTVFMGTFSV
+                     TGSSGGIVNHQDGASSEWFTYTSDMVEDSASSGRTSSSVNQPIPEEQAWEREARAQEH
+                     DRISAEVETITSACENLEAAMVRKAQILLHQRGVTLGDPEDVKRALQLALHDDWEHAI
+                     DDRKRHFTVLRRNFGTARCERWNPFIDELRGLGNHQVNARHYVD"
+     CDS             281614..281931
+                     /note="orf105a"
+                     /codon_start=1
+                     /protein_id="NP_085563.1"
+                     /db_xref="GI:13449381"
+                     /db_xref="UniProt/TrEMBL:P92542"
+                     /translation="MQSPAMKRIKSSSHSRWDGSGSVNEMPFPSTIRLQGSFWECSTR
+                     RHMCYILRYLFRANGHRHFSYERLDCRNQTLRLPDHLYQPSRPHLLPHLSQLLLVRDS
+                     GYL"
+     CDS             complement(283034..283789)
+                     /gene="nad1"
+                     /note="orf251"
+                     /codon_start=1
+                     /protein_id="NP_085564.1"
+                     /db_xref="GI:13449382"
+                     /db_xref="UniProt/TrEMBL:P92543"
+                     /db_xref="GeneID:814625"
+                     /translation="MSVLRRIPQDGTFHQEGPIHRLAKRRPRFIASFDLSAATDRWPV
+                     PVIYELMACLFGQTMASCIVNGALALNSCSLKSVTGRHDEVVFVAGQPLGYYGSWALF
+                     ALSHHAIVWLAALRAYPHQTRPFLDYALLGDDIVIADRSVAKEYRSLLDALQVDISDA
+                     KSIVSETGCLEFAKRFWVKIMSKDLSPVSAKAVLESYFLVGTQQLAYKYKLSPKTCLR
+                     LNKAGYRVLGQMDTTLRPYPGVLVGFRRYLVSF"
+     exon            complement(287917..288108)
+                     /gene="nad1"
+                     /label=nad1_ex3
+     misc_feature    287942
+                     /note="C to U RNA editing"
+     misc_feature    287997
+                     /note="C to U RNA editing"
+     misc_feature    288006
+                     /note="C to U RNA editing"
+     misc_feature    288031
+                     /note="C to U RNA editing"
+     misc_feature    288041
+                     /note="C to U RNA editing"
+     misc_feature    288077
+                     /note="C to U RNA editing"
+     misc_feature    288084
+                     /note="C to U RNA editing"
+     misc_feature    288085
+                     /note="C to U RNA editing"
+     misc_feature    288087
+                     /note="C to U RNA editing"
+     intron          complement(288109..289004)
+                     /gene="nad1"
+                     /number=2
+     exon            complement(289003..289083)
+                     /gene="nad1"
+                     /label=nad1_ex2
+     intron          complement(289084..318003)
+                     /gene="nad1"
+                     /number=1
+     CDS             complement(289197..289517)
+                     /gene="nad1"
+                     /note="orf106f"
+                     /codon_start=1
+                     /protein_id="NP_085566.1"
+                     /db_xref="GI:13449384"
+                     /db_xref="UniProt/TrEMBL:P92544"
+                     /db_xref="GeneID:814625"
+                     /translation="MMVTALQILFSLIRYVTETIRSVSVLFSDSEDEPDDEASSSDKD
+                     VSDATLPARTTYSIVIFLAGSVRRDKREMLSISQKGTLSPAMLPRPAYRGIEREDRGG
+                     VQSK"
+     repeat_unit     290991..297580
+                     /note="repeat III"
+     CDS             complement(292973..293431)
+                     /gene="nad1"
+                     /note="orf152b
+                     identical to orf152a"
+                     /codon_start=1
+                     /protein_id="NP_085567.1"
+                     /db_xref="GI:13449385"
+                     /db_xref="UniProt/TrEMBL:P92545"
+                     /db_xref="GeneID:814625"
+                     /translation="MPTILSSKPAFNSLFSYHLIGLISNKLVTLAPDYTGTKKTTWGA
+                     RLHLQELRVQTKHRQIEPDIKNLPLQPIRYGSFRPVFHWIEKPCMLIGLCGLHSEVSF
+                     IANWPWGKPSEIGGCSIPCMLTGRGSELSYHIRASRRPLPGNRFHFQSFF"
+     CDS             complement(293976..294296)
+                     /gene="nad1"
+                     /note="orf106g"
+                     /codon_start=1
+                     /protein_id="NP_085568.1"
+                     /db_xref="GI:13449386"
+                     /db_xref="UniProt/TrEMBL:P92546"
+                     /db_xref="GeneID:814625"
+                     /translation="MLHRGRSCLTGLFPCYLLSNWLNSNLCWIPLKLVIPCFQLIVES
+                     YLLEFLLLLAISTCLLGEDSLIWLTLVVAHSKSRQSSSQEPLDTRMATRALLDQLRSD
+                     RRHN"
+     gene            complement(296688..296784)
+                     /gene="tRNA-Ser"
+                     /db_xref="GeneID:814614"
+     tRNA            complement(296688..296784)
+                     /gene="tRNA-Ser"
+                     /product="tRNA-Ser"
+                     /db_xref="GeneID:814614"
+     gene            complement(296820..298204)
+                     /gene="atp6-2"
+                     /db_xref="GeneID:814615"
+     CDS             complement(296820..297869)
+                     /gene="atp6-2"
+                     /codon_start=1
+                     /product="ATPase subunit 6"
+                     /protein_id="NP_085569.1"
+                     /db_xref="GI:13449387"
+                     /db_xref="UniProt/TrEMBL:P92547"
+                     /db_xref="GeneID:814615"
+                     /translation="MERLTRLNHFLVNMRWDFYEGVIQAGYIRNLQRELDHTPAELLG
+                     SKLDLIFFRESLNLSTYVNNWYMQNLGVPGPVNFIEKYHDACFSNYMKLMEIPSPLDQ
+                     FEIVPLIPMHIGNFYFSFTNPSLFMLLTLSFFLLLIHFVTKKGGGNLVPNAWQSLVEL
+                     LYDFVLNLVKEQIGGLSGNVKQMFFPCILVTFLFLLFCNLQGMIPYSFTVTSHFLITL
+                     ALSFSIFIGITIVGFQRHGLHFFSFLLPAGVPLPLAPFLVLLELISYCFRALSLGIRL
+                     FANMMAGHSLVKILSGFAWTMLCMNDIFYFIGALGPLFIVLALTGLELGVAILQAYVF
+                     TILICIYLNDAINLH"
+     promoter        complement(298196..298204)
+                     /gene="atp6-2"
+                     /evidence=not_experimental
+     CDS             complement(300966..301301)
+                     /gene="nad1"
+                     /note="orf111b"
+                     /codon_start=1
+                     /protein_id="NP_085570.1"
+                     /db_xref="GI:13449388"
+                     /db_xref="UniProt/TrEMBL:P92548"
+                     /db_xref="GeneID:814625"
+                     /translation="MMTLFTQEPSFSIKCLPTTPSKPHRSLLAARLLFLSISDFSCRC
+                     PKIKLSLKGYRFIYCVRVVPSPSSLAKAQSDRLNIGLIRRSLKLCCARDIRLELGQVL
+                     ISIPLLLFL"
+     gene            complement(302166..303689)
+                     /gene="atp1"
+                     /db_xref="GeneID:814616"
+     CDS             complement(302166..303689)
+                     /gene="atp1"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /product="ATPase subunit 1"
+                     /protein_id="NP_085571.2"
+                     /db_xref="GI:26557005"
+                     /db_xref="UniProt/Swiss-Prot:P92549"
+                     /db_xref="GeneID:814616"
+                     /translation="MELSPRAAELTNLFESRIRNFYANFQVDEIGRVVSVGDGIAQVY
+                     GLNEIQAGEMVLFANGVKGMALNLENENVGIVVFGGDTAIKEGDLVKRTGSIVDVPAG
+                     KAMLGRVVDAMGVPIDGKGALSDHEQRRVEVKAPGILERKSVHEPMQTGLKAVDSLVP
+                     IGRGQRELLIGGRQTGKTTIAIDTILNQKQINSRATSESETMYCVYVAIGQKRSTVGQ
+                     LIQTLEEANALEYSILVAATASDPAPLQFLAPYSGCAMGEYFRDNGMHALIIYDDLSK
+                     QAVAYRQMSLLLRRPPGREAFPGDVFYLHSRLLERAAKRSDQTGAGSLTALPVIETQA
+                     GDVSAYIPTNVISITDGQICLETELFYRGIRPAINVGLSVSRVGSAAQLKAMKQVCGS
+                     SKLELAQYREVAAFAQFGSDLDAATQALLNRGARLTEVPKQPQYAPLPIEKQILVIYA
+                     AVNGFCDRMPLDRISQYEKAIPNSVKPELLQALKGGLTNERKMEPDAFLKERALALI"
+     misc_feature    302206
+                     /note="C to U RNA editing"
+     misc_feature    302275
+                     /note="C to U RNA editing"
+     misc_feature    302398
+                     /note="C to U RNA editing"
+     misc_feature    302512
+                     /note="C to U RNA editing"
+     misc_feature    302580
+                     /note="C to U RNA editing"
+     CDS             complement(303836..304720)
+                     /gene="nad1"
+                     /note="orf294"
+                     /codon_start=1
+                     /protein_id="NP_085572.1"
+                     /db_xref="GI:13449390"
+                     /db_xref="UniProt/TrEMBL:P92550"
+                     /db_xref="GeneID:814625"
+                     /translation="MITRLFAQLVSLSIVTYWNDAIVATNFSWLFITFFVMTFTFRTF
+                     SRYFKKPIIWTLYFFLCLIAFLLLWAARIHINILFSFAFGDVYSFFMAGVFLFYGFGE
+                     LLPIGSDSDVGEASWVVNPATGASGSGGNGWTESAANDPAREVSLAPFPPQLTHPVPF
+                     PAEPGSPDPVSPPPPIASFYSRIERAESLHAGNIELAEDLQRIQEMERNLENERSPYR
+                     GRELAARIDWEVRELEGKVARNRAWDMVRDAQLDIWRQGLDQELVRQQENESRLEERR
+                     FQSHSTNSLFEADSSRDN"
+     CDS             complement(304779..305084)
+                     /gene="nad1"
+                     /note="orf101b"
+                     /codon_start=1
+                     /protein_id="NP_085573.1"
+                     /db_xref="GI:13449391"
+                     /db_xref="UniProt/TrEMBL:P92551"
+                     /db_xref="GeneID:814625"
+                     /translation="MIHQINSINMEIILTDVARDALQEKIVSQLSILLRVYRDTNTSE
+                     SVTLPLSGVNLQEVAARSFFNNESIPWFFHLFRPCQSGHEKKPLCPQALELVLSFSS"
+     CDS             complement(305221..305562)
+                     /gene="nad1"
+                     /note="orf113"
+                     /codon_start=1
+                     /protein_id="NP_085574.1"
+                     /db_xref="GI:13449392"
+                     /db_xref="UniProt/TrEMBL:P92552"
+                     /db_xref="GeneID:814625"
+                     /translation="MKRYATMLSEFTGVVPSTFLSKLFLKCEINLSIGRVQDRKEPYR
+                     QAKSAVRLHGLNNTQYCTGRIRFAARSIPQSPLVPFRLFPQFPTPSVRQNLTTLHFDT
+                     REEDRALVSSG"
+     promoter        complement(305586..305594)
+                     /gene="nad1"
+                     /note="orf113"
+                     /evidence=not_experimental
+     CDS             308932..309369
+                     /note="orf145b"
+                     /codon_start=1
+                     /protein_id="NP_085575.1"
+                     /db_xref="GI:13449393"
+                     /db_xref="UniProt/TrEMBL:P92553"
+                     /translation="MWSYEGKCGFLLLSVYKEQVLDSYSPLTKENGISSNPRYIKRKF
+                     PFDSGFPFTRKLPAKVESFLCLPLFLSFLVANLILWLSFHSARVGHQKLSYHLLEWKA
+                     FPSSFRNKESKATCDLSSWSNPYFKRKAQIPFSFSRYLLKYLF"
+     CDS             complement(309338..309640)
+                     /gene="nad1"
+                     /note="orf100c"
+                     /codon_start=1
+                     /protein_id="NP_085576.1"
+                     /db_xref="GI:13449394"
+                     /db_xref="UniProt/TrEMBL:P92554"
+                     /db_xref="GeneID:814625"
+                     /translation="MSPTLSTGNRDQRGSSRSYAFVSLASHHFTPQGKITITSSLTRK
+                     IDPGNFQGQDVVDFSPWVVLPDEDKVAFLISCRVTAKIRKSGLPQLFKTNTSRDIG"
+     CDS             310514..310882
+                     /note="orf102"
+                     /codon_start=1
+                     /protein_id="NP_085577.1"
+                     /db_xref="GI:13449395"
+                     /db_xref="UniProt/TrEMBL:P92555"
+                     /translation="MGYGVCHFYLLFIINGAPQGLVTPSRGLRQGDPLSPYLFILCTE
+                     VLSGLCRRAQEQGRLPGIRVSNNSPRINHLLFADDTSSARWIPLAAQIWPIFFLSMRL
+                     FQGNPVNHPMSNLYFLGSLP"
+     CDS             complement(312545..313162)
+                     /gene="nad1"
+                     /note="orf205"
+                     /codon_start=1
+                     /protein_id="NP_085578.1"
+                     /db_xref="GI:13449396"
+                     /db_xref="UniProt/TrEMBL:P92556"
+                     /db_xref="GeneID:814625"
+                     /translation="MQPDLTLLGKLRSTWASATVNVIHPISLCLSWFLGTIGCSSPLP
+                     LRCADLRILLLKKKEFCLLPLFYHLGIFQHLFYPIIPLLAFCFYAPRLVCPAASLEFQ
+                     RRYVVWILAVSRHIVFLENSYYIMLLHPHHLHHPHPPFLIFLFLILRKLRRNRSVKAQ
+                     RIMQRSCHRLLFAPVGNDSELSAPSAPSESVVPLRRFNRQSVSTV"
+     misc_feature    314410
+                     /note="C to U RNA editing"
+     misc_feature    314421
+                     /note="C to U RNA editing"
+     gene            314627..315073
+                     /gene="rps7"
+                     /db_xref="GeneID:814617"
+     CDS             314627..315073
+                     /gene="rps7"
+                     /codon_start=1
+                     /product="ribosomal protein S7"
+                     /protein_id="NP_085579.1"
+                     /db_xref="GI:13449397"
+                     /db_xref="UniProt/Swiss-Prot:P92557"
+                     /db_xref="GeneID:814617"
+                     /translation="MGGLDGEQKLLIKKLVNFRMKEGKRTRVRAIVYQTFHRPARTER
+                     DVIKLMVDAVENIKPICEVAKVGVAGTIYDVPGIVARDRQQTLAIRWILEAAFKRRIS
+                     YRISLEKCSFAEILDAYQKRGSARRKRENLHGLASTNRSFAHFRWW"
+     exon            complement(318004..>318390)
+                     /gene="nad1"
+                     /label=nad1_ex1
+     misc_feature    318015
+                     /note="C to U RNA editing"
+     misc_feature    318083
+                     /note="C to U RNA editing"
+     misc_feature    318084
+                     /note="C to U RNA editing"
+     misc_feature    318126
+                     /note="C to U RNA editing"
+     misc_feature    318224
+                     /note="C to U RNA editing"
+     misc_feature    318389
+                     /note="C to U RNA editing"
+     CDS             complement(318588..319463)
+                     /gene="nad5"
+                     /note="orf291"
+                     /codon_start=1
+                     /exception="RNA editing"
+                     /protein_id="NP_085580.1"
+                     /db_xref="GI:13449398"
+                     /db_xref="UniProt/TrEMBL:P92559"
+                     /db_xref="GeneID:814567"
+                     /translation="MIVLKWLFLTISPCDAAEPWQLGSQDAATPIMQGIIDLHHDIFF
+                     FLILILVFVLWILVRALWHFHYKKNAIPQRIVHGTTIEILRTIFPCFISIFIVEPSFA
+                     LALDDAAEALFPNTAPTPSNTSSSEDSFGLRVLSEPWPITRNLGLESSICNRIRLLEA
+                     ANSPFLLGKEKGQYWGEIQECLYNVSEQREYYRLLDFENRDLQIRERKHSCLEVFRGV
+                     LLRNPYLEERAAYSPQEAFFDFLNERRDALDISNPGSSPAEMDRLEILFLGEIERDLL
+                     RRGDESLYIKQLLGD"
+     CDS             324259..324594
+                     /note="orf111c"
+                     /codon_start=1
+                     /protein_id="NP_085581.1"
+                     /db_xref="GI:13449399"
+                     /db_xref="UniProt/TrEMBL:P92560"
+                     /translation="MTFVPTDFLIRTPDDPAYFKDRLASPFSFRGCSKTTSTSSSIYS
+                     KKKASTATYFRVDPVPRGSQSSRVCEPKTKLIVYQPGNYQKKVKRQVTDPLSLMDKVK
+                     KRIDKTEIL"
+     CDS             325300..325710
+                     /note="orf136a"
+                     /codon_start=1
+                     /protein_id="NP_085582.1"
+                     /db_xref="GI:13449400"
+                     /db_xref="UniProt/TrEMBL:P92561"
+                     /translation="MPRTELILNAAVILYTMIPPDAHSLGSEGRVVNGNWRDTSDVKE
+                     GSLPREVTKQVNGSLSSRTKQVNEFSKHTRFLVDISFSCCSLINRSLWESAQKDELSD
+                     SFGKALTTKPECLAVRETPRNFRRNLCLVIPSLN"
+     CDS             326099..326509
+                     /note="orf136b"
+                     /codon_start=1
+                     /protein_id="NP_085583.1"
+                     /db_xref="GI:13449401"
+                     /db_xref="UniProt/TrEMBL:P92562"
+                     /translation="MLKRKLKPKRLQLPPQDVVFEGEAAMNEYTFYRNWVESWLQHIR
+                     SYYLLFIDGDPSLSKFFEIEICAHSWKRSTFDQQVFKFGLLWECVDIARSRTVYWQCA
+                     LGTGHIQEDKVSEATSPFTDDSCTNSCLSRMTGQ"
+     gene            complement(327890..333105)
+                     /gene="nad2"
+                     /db_xref="GeneID:814618"
+     exon            complement(327890..328078)
+                     /gene="nad2"
+                     /usedin=Y08501:nad2_cds
+                     /usedin=Y08501:nad2_mrna
+                     /label=nad2_ex5
+     misc_feature    327900
+                     /note="C to U RNA editing"
+     misc_feature    327954
+                     /note="C to U RNA editing"
+     misc_feature    327957
+                     /note="C to U RNA editing"
+     intron          complement(328079..329734)
+                     /gene="nad2"
+                     /number=4
+     CDS             complement(329082..329465)
+                     /gene="nad2"
+                     /note="orf107h"
+                     /codon_start=1
+                     /protein_id="NP_085585.1"
+                     /db_xref="GI:13449402"
+                     /db_xref="UniProt/TrEMBL:P92563"
+                     /db_xref="GeneID:814618"
+                     /translation="MLPAGCWNDTSRDGPGFRKMKGPKVEIGGYKFPISLGAENESTS
+                     RCDTAFSFLVGKERRSPSEPNRPMKNKRRAKPNGEAHAEQARRKISVEEKQPSSFPSH
+                     PGPKAVQSFLAKSRIWGFLLRYLTI"
+     exon            complement(329735..330306)
+                     /gene="nad2"
+                     /usedin=Y08501:nad2_cds
+                     /usedin=Y08501:nad2_mrna
+                     /label=nad2_ex4
+     misc_feature    329737
+                     /note="C to U RNA editing"
+     misc_feature    329766
+                     /note="C to U RNA editing"
+     misc_feature    329767
+                     /note="C to U RNA editing"
+     misc_feature    329813
+                     /note="C to U RNA editing"
+     misc_feature    329886
+                     /note="C to U RNA editing"
+     misc_feature    329955
+                     /note="C to U RNA editing"
+     misc_feature    330051
+                     /note="C to U RNA editing"
+     misc_feature    330055
+                     /note="C to U RNA editing"
+     misc_feature    330085
+                     /note="C to U RNA editing"
+     misc_feature    330093
+                     /note="C to U RNA editing"
+     misc_feature    330204
+                     /note="C to U RNA editing"
+     misc_feature    330225
+                     /note="C to U RNA editing"
+     intron          complement(330307..332944)
+                     /gene="nad2"
+                     /number=3
+     exon            complement(332945..333105)
+                     /gene="nad2"
+                     /usedin=Y08501:nad2_cds
+                     /usedin=Y08501:nad2_mrna
+                     /label=nad2_ex3
+     misc_feature    332989
+                     /note="C to U RNA editing"
+     misc_feature    333042
+                     /note="C to U RNA editing"
+     misc_feature    333099
+                     /note="C to U RNA editing"
+     gene            337669..337741
+                     /gene="tRNA-met"
+                     /db_xref="GeneID:814619"
+     tRNA            337669..337741
+                     /gene="tRNA-met"
+                     /product="tRNA-Met"
+                     /db_xref="GeneID:814619"
+     CDS             346757..347194
+                     /note="orf145c"
+                     /codon_start=1
+                     /protein_id="NP_085586.1"
+                     /db_xref="GI:13449403"
+                     /db_xref="UniProt/TrEMBL:P92564"
+                     /translation="MTKREYNSQPEMKEEVLAYLLQLSASLVLPVAIWLIAAGQIFTC
+                     LRGYTISNYQEKVEEKLCSTLVDKISEKLADLFPVYGITPSRNAPFPTILEQLLATVS
+                     QEERLAYLSNMYNSLIEMGIDSPCFYPIVQTFLFLMGGGGGPA"
+     gene            complement(349830..351413)
+                     /gene="cox1"
+                     /db_xref="GeneID:814620"
+     CDS             complement(349830..351413)
+                     /gene="cox1"
+                     /codon_start=1
+                     /product="cytochrome c oxidase subunit 1"
+                     /protein_id="NP_085587.1"
+                     /db_xref="GI:13449404"
+                     /db_xref="UniProt/Swiss-Prot:Q07063"
+                     /db_xref="GeneID:814620"
+                     /translation="MKNLVRWLFSTNHKDIGTLYFIFGAIAGVMGTCFSVLIRMELAR
+                     PGDQILGGNHQLYNVLITAHAFLMIFFMVMPAMIGGFGNWFVPILIGAPDMAFPRLNN
+                     ISFWLLPPSLLLLLSSALVEVGSGTGWTVYPPLSGITSHSGGAVDLAIFSLHLSGVSS
+                     ILGSINFITTIFNMRGPGMTMHRLPLFVWSVLVTAFLLLLSLPVLAGAITMLLTDRNF
+                     NTTFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGIISHIVSTFSGKPVFGYLGMV
+                     YAMISIGVLGFLVWAHHMFTVGLDVDTRAYFTAATMIIAVPTGIKIFSWIATMWGGSI
+                     QYKTPMLFAVGFIFLFTIGGLTGIVLANSGLDIALHDTYYVVAHFHYVLSMGAVFALF
+                     AGFYYWVGKIFGRTYPETLGQIHFWITFFGVNLTFFPMHFLGLSGMPRRIPDYPDAYA
+                     GWNALSSFGSYISVVGICCFFVVVTITLSSGNNKRCAPSPWALELNSTTLEWMVQSPP
+                     AFHTFGELPAIKETKSYVK"
+     promoter        355212..355220
+                     /evidence=not_experimental
+     CDS             complement(360717..361052)
+                     /note="orf111d"
+                     /codon_start=1
+                     /protein_id="NP_085588.1"
+                     /db_xref="GI:13449405"
+                     /db_xref="UniProt/TrEMBL:P92565"
+                     /translation="MKISYFIRRGKKTSRRSHFIKMKKNIITTQLFKPDNAFIFFSGI
+                     HGSVNRATYKYKISKTFGRFLAHISCLICILSKRIFVLSFSVIGSFCHPSIVHFDCLL
+                     FFLDTTPCL"
+     gene            complement(361062..361179)
+                     /gene="rrn5"
+                     /db_xref="GeneID:814621"
+     rRNA            complement(361062..361179)
+                     /gene="rrn5"
+                     /product="5S ribosomal RNA"
+                     /db_xref="GeneID:814621"
+     gene            complement(361350..363284)
+                     /gene="rrn18"
+                     /db_xref="GeneID:814622"
+     rRNA            complement(361350..363284)
+                     /gene="rrn18"
+                     /product="18S ribosomal RNA"
+                     /db_xref="GeneID:814622"
+     CDS             363725..364042
+                     /note="orf105b"
+                     /codon_start=1
+                     /protein_id="NP_085589.1"
+                     /db_xref="GI:13449406"
+                     /db_xref="UniProt/TrEMBL:P92566"
+                     /translation="MKYHFSSMEPWWKREFSFCIPAIYIKMASISLFQNSWLKMKHLP
+                     SCLFTQTTNTLGIYRKKKPNHSRDNPRINSNLSTNYAQAKSVERSRSNSLNSGPNPLE
+                     NAT"
+     CDS             complement(366086..366700)
+                     /note="orf204"
+                     /codon_start=1
+                     /protein_id="NP_085590.1"
+                     /db_xref="GI:13449407"
+                     /db_xref="UniProt/TrEMBL:P92567"
+                     /translation="MFGGRRRRLPQDGTFNQTQPFDRLVGSRHSFSFDLKSATDRWPL
+                     VFLFEVVQYLFDRSFASSVVNSAFACNIFEVPFVKLKRRFSQVCFVAGQPLGYHGSWP
+                     TFALSHHILVWWCAKQVHPGVRFTSYAVLGDDVVIADQEVAKVYESALGGLGVKISYQ
+                     KSLIPIQVLLSLLNASGLGNLVLKREIFPRESGVLSLMSVPSLA"
+ORIGIN      
+        1 ggatccgttc gaaacaggtt agcctactat aatataagga ttggattcta ataagttcga
+       61 aacaggttag ccttagccta ctataggatt agatctttct tatcaaccta ctaacttctt
+      121 ccttgttggg atgagaaacc cttttgcaac caagcgtgct ttgagtttgt caagggaccc
+      181 atctgcattc agtttcactc tgaaaaccca tttacaaccg agaagattca tgtcaggtga
+      241 tgcgggaact aagtcccaag tgtgattctg tgttaatgcc gacatctctt cttgcatagc
+      301 ttgtctccat cctgggaggc agacgtaatg gtttttggtt cagagggagt gtatttttgt
+      361 gtaaacaggt tgtaacgagg attaggcttg cgaataccat cctttgcccg agtgatcata
+      421 tgatgtctat taggtgaaag tagctcagga gcagctgtcc caacatcaaa aaaggtaccg
+      481 ctgtcgccaa taggaacagg atctgagcct gccgtacgca caggacagtc tctttctgat
+      541 gtggtagcag ttccaggagc aatcgcagag acaattgaag gatctgcaga ttcgcaatca
+      601 gagcctgtga aaccgggaag atgtcgagat agcaggagaa gaattagcag tatgtgagtg
+      661 cggaagcaga gtggaggatc gagggacctg tgaaggtgtg ataaaggaac tgtctgttga
+      721 aacagaaggg acatatgaag gtttagaacc aagctgccaa gcacgaagca gagaacttgg
+      781 atacggtggg atgagatggt gataacagtc ctgaaaacga aagactagag ctcttgccca
+      841 actgacaggc ttcacaaaca gaaagatccc ttttattaat aactatagct ttactagtct
+      901 tgagttgttg gagaacttga ggatttggat gcccaagcct ataatgccaa gtgagctcac
+      961 tagctgcttg ttgtcttgta gagtagaaga cttatagatt aaaattctcc aacatataga
+     1021 tgtccttaca ccgttttcct ttgctcagca ggctccgtgt ttgcttgtcc tttatgcata
+     1081 cttcgttagc atcaaactca aagaagcatg gataaatcat cacaaagttt ggacacagag
+     1141 agaagagact tagttatgaa aggacaaaca agaacttcat ttaatggtaa actacctgat
+     1201 gagcttggta gattggtaga tccaacatga gtgatgggca agaaggctcc atccccaacc
+     1261 atcacactgt cggatcccac ataaggttgt gactgttgga ggtgatgagc agaattggta
+     1321 atgtgagagg aggcaccaga atctggaaac cattctgtac cagaggtatc cgtgatctga
+     1381 agagcagcaa gagcttgcgg aatatcactg ttctggaagg agttgtcata ttggttccag
+     1441 cattttaggg ctgagtgacc agtttagcca cagatttggc aaacgggtcg agaagagaac
+     1501 tgatcagagg agtgggattg atgcaaacta gttgagggct tgatagcgtt gatgaaaccc
+     1561 tcttcctctt gtagagaatg agccgcgacc acggtagtta ccaaaccgag aataggactg
+     1621 tcatacgagt ataccagagg ctgaaggtat tgagtgatga tccttcctca accctaagct
+     1681 aggcgaatca ataacaaagc aagactactt tccatcagta cggggtacgc cataaacgaa
+     1741 gtcagaaggt tgtcaatcaa atatggtacg cgatgcttgt ccaaataaaa aacgatccct
+     1801 taaaagtgag ttagaaaggt aagagtcttg tacctagtag aacttctaat aactaataaa
+     1861 agctattttc tcagaagcga gagcgcacct ctcgcaacaa gtgctcgagt cactccgcac
+     1921 ctcgaaaaac aagtctcagg aagtgagtga gttatagtat tacaggagct aatagcttca
+     1981 gcagagaaat aagcacagga aatactggtg ggaatctcac ctccaagcct ttcatgggcc
+     2041 cgcgttgaaa tacccaacaa aaatccaaag atcattgata gcagaatgaa gaaactgaag
+     2101 atcctgctat aaagatctcc tcataatgta atgaaattat tataagaaat aaaatgaaag
+     2161 cgcagctaac attatccagg aacaaggaac aggtgatgga ttgagcagta ataaaggtaa
+     2221 tggttggaag ctgatggatg gccgagtaaa gaacccagat attagaaggg agaccgttac
+     2281 agtggttcaa ggtagctaaa ttcatgtttc gcgaatcata gaaaagaaag aagaagggat
+     2341 attggaaaaa agaagtcatg gatttagcaa tacaaagagg ttcagctgtt tccaaatacg
+     2401 gcggatctgg tgataaaggc caagggcgtg ctcttctatt aaaggaagaa cagaaacgaa
+     2461 aaagctctct ttgcgtacta tggatctctt acgtgcgata ttccttgctt tgactttgac
+     2521 gagcagcatc cagtacattc accccggctg ggcagtagcg ccaagaagaa cttcaagcta
+     2581 agagcgaaga gaaggtaatg atttcgctca gtagaaggtc acctacatgg atattaaggc
+     2641 tataagccgc aggtaagata tagttcatcg aaggggaatc ttgaatcaaa tgtcgattca
+     2701 tcttaattgt acgtcaattc taattcaatt tgttctaaac ttcctcgggg ctaaagcctc
+     2761 gtgattcaat tatggctctt tgctgctatt aaggtgatag tatctgctta gcccatagta
+     2821 ataagatggt agtgaccgct tatcctaagt cttccgctgg catgggtgta actgtccttc
+     2881 cagagtacct caaacagtcc tcttacgaag cctattcccg gccttactct gcattctttc
+     2941 tttctggttg tactaagcag gagcgttccc ccttactggc taggcggcta gtagatgctt
+     3001 ggctttcatt ccattccatt ctgatgataa atgaagaaag aggtgtcgga ctgggcatat
+     3061 aagcagctat cagaaatttc actcatgata ccaggcgcga ggagcctgtt taagaccata
+     3121 gcttttcgaa acctgcaaag agaagaagag tatcgaccgg gaggttgaga catgactctt
+     3181 ttcggttaag tgaccctgta gaaaagcgtt gttgacatcc aattgacgaa gagtccagcc
+     3241 atggtataga gtaagactct cagacgaata gtgataggct taaccacagg gcttaatcta
+     3301 tcaaaagact ctctcccagg ccgctgattc ctgtctaaaa cacgagagaa ccttgccttg
+     3361 tgggtgtccg atgccgaccc cgttgtgctg tgcgtcttgt ctttcccaac gaaagtgatg
+     3421 ccgattcagc gtgtgtggcc tgatgcggtt cagcctacca attagtgggg caaggagata
+     3481 tggtttcctt aggcataagc atgtcatgta tatgtacaca aggggaatag caactaaagc
+     3541 tcaatagcac cttagtgaac tacaagggct ctaccgctcg ggtccctcaa ccgagagccc
+     3601 ctcctcgcta cgatccaccc tttctctagc cctctactca accgctttag ctggcccgct
+     3661 tctgacagaa gtagcagatt caatctgtga atcccgtcta gagctcctca cactgtccgc
+     3721 atcatcatca acggcaagcg gatcagattt taagagcttc aaaatacata caatacccta
+     3781 aggagtgagg gatgtacctt ttggaaaggg actgactgaa cggaatcaat gacttacatt
+     3841 aacattgctc ttaaaagaag tcgaattccc ttcattcaga gcataattaa tctagtcact
+     3901 gatttgagtt atatagttat tattcatgtt tgttgttcgt tggacctttt ttgaaaggct
+     3961 ttgttaatgc ttatcgagtt atgatataag aaagggggac gactgaggaa atgccatgct
+     4021 aaggcctatt tccctgctgt ttcaacttct ttctatagcg attggacaca agtttttctc
+     4081 aagttaatgc aactccgttt caattccata atttgactca aactacatgt caatccccca
+     4141 gtccactata ggggagagtt cactccagtt agggggtatt accagcggct tcgaccacat
+     4201 cctatcctct ctagctactg gcttctgtgg ggagggcaaa cttctatatt tccctgctag
+     4261 tgcactactt ccttttcttt cagaacagag acggagccct gctttaaatt gacagaggag
+     4321 ttattggacg ccgggtatat ccagccatcc aaagcacctt atgaagcccc agtgttgttc
+     4381 cagaagaagc gggagcctac gtctatgcat cgactaacct atatctaaac aaggttaccc
+     4441 tcaagaaaat aagtacctga ttctgcgcgt agaagaccta ttcgatccgt tgggagatgc
+     4501 aagcttcttc accaagttgg atctaaagcc ggggtactac caggtgcgca tcgccgaggg
+     4561 taatgaacca aagaggagta tactttatga atgggaaagc agtcatcaag ccagatgcgg
+     4621 atgaaattct aactgctgcg cttacgcctt ttgattagac aactcctagt gccaagctaa
+     4681 ctaactagga ttcgttcaaa ggattcgtga aaacagaaag aagagctagc gactctgacg
+     4741 aatctaatgt tgcccgcgtt gaggcgaaaa gaaaggcgga ttccctatat aagtgaattt
+     4801 gttcttcaat cgattcatct cggccttaga ctgctacggt tagctcttgc cccgagcacc
+     4861 tggactggct atctcgaaag aaatgctttc atatcatagc tgctgacttg gctttgaaac
+     4921 tataggagct gaaggaacgg ctactgattt ggacctaaat catttcatcc ggaagtgact
+     4981 gcactcgcct aagccaaaac caagctactt tcataaccac caggaaagcc tagctacttc
+     5041 ttcgttcctc accctgtccc ttacctctta ccgtaggaaa agccgttgat attcgtagat
+     5101 cgttcgttga ttcccttgct tttgggcgat gattcgattc ttctgggctt gttgccacta
+     5161 aagaaagagt tctgccttct agccaagcct ttgagaccaa gtcaagcttc ccagcagtca
+     5221 accaagaaag agtgtttctc ccctactgaa gaaatggaag tcagtctgct ttccttctta
+     5281 gctttgttca catctgctct ttccaggact ggctatctct caatgtattc acgcatctcg
+     5341 aaatagtgaa aagaagtagt ccctcctccc agagctgaaa tagctggaaa aaaacttcaa
+     5401 cagaagtaat gccgactctt ctacttcctg gtacttttga ataaggcaag gcctttacct
+     5461 attccctatc acaaccactt tcagtagagg aacttcctct ccttttagtc gagttatagc
+     5521 tttcgcttct tcagatcctg atccttcatc agaggagatc gctttacttt tttataagag
+     5581 ccattggcag gaccaagaaa ctcccattca aggcgccaat cattcataaa taattcttat
+     5641 atatgatatg caattcgatg attcaatttc ttgtggaaaa gactagcgca atggcttaat
+     5701 gcatttcatt ggcagtttgg tcctaacaaa ttatgcttct tgggagagga gaacttccac
+     5761 caagcaatat atttgagttg gctaacccaa tgattgagct agcttctaat ccatttcaat
+     5821 ctcatcttac acagcagaag ttgtggattc atacctggaa gcaggcaatg ccgccacatc
+     5881 tcccctgtga aagaaggtta cgcagctgat tcggttaatc acatgtcaga ggcatctatc
+     5941 taaatcgcac atgtcagaac aagacctagg gatccggtgg caccaggagc gggtagccga
+     6001 gttgagatca ttccgttaag acacaaccac ttttacagta aacatgcatc cgcccagcgc
+     6061 cccctcctta cgaatggtta cgggactaga gcgagtctct ttttctttgc aagaataggt
+     6121 ctgagcctga tgacattcct gctttccttg ccatgtccaa ctaaagtgaa agttaatcaa
+     6181 ctgcaaggag gaatcgacct ttttctacta ttcatttgcg ccggaaaacc ccttcatcat
+     6241 ttgccctgaa ctggtgaaag gaataggagg acttttccct cggtggaagt agggatttaa
+     6301 gcacagttgt gattccgctt tcatgtcttg gattgagctt tctcactctt tctatgcctc
+     6361 ttccttgtcg gcgtcctttg ttcgctactg ctttaaagaa tgggatgagg ctacctgatc
+     6421 gtcgagtcga cttccatcaa tcaattggat tggatcttat tatcccatag tcataaatct
+     6481 gattcttcac ccagtgggaa cagtcttcat ggtggtacca gtgcgtatat atgtagatga
+     6541 gttttggttc ccaccctcga ttttccatcc aatcttccta tcttcttttc cctgctttgg
+     6601 aagcattcga tctctcgtaa agcaaaccct cgtcagagaa agggggcggg aagcctgcgt
+     6661 gcgattcctg ggttggatat ccctggctct cttctctctg agtccccgct agctttcaag
+     6721 cgttcattca atctccctcc caacgcaagg aaaccgatcg atgcacttgg ctttaggttt
+     6781 cgatcgataa taatttatat tatatataaa aacgcttctc gctggagact cgggtggggc
+     6841 gcttgctctc cactggcggt tgaatgaaag acgtacccat aaagttgacc taagttggta
+     6901 tgcctcccat cgaactaaag caagaaatct tccttcgatg cgagtctatc aaatgtttag
+     6961 ccacatctgc tgcgccccgg tcagtggagt caatagcctt atcagctgtc cctcccttct
+     7021 tttaaaatat tacccaggat cttatcccag gccgctgatg aattggcaac gaggcgagcc
+     7081 ttgtaccgct caacactgcc atcaggtttc cgcttaaccc gataagaaga cccacaacaa
+     7141 tcttttgatg aggagatgat ggtactagtt cccgggtgcc atggagaatt aatgcatcaa
+     7201 attcagcaga catggatggt ggcactagga tctgcatacg aggcttttgg tgtgcggacg
+     7261 tatgtgacca atgcacttat gctgccttca ctcaaaggaa agcactttca aagaagcaag
+     7321 ggatgaaggg gaggtaagcc tgtcttaaaa tcgattaact aagctagctt aattcttgaa
+     7381 ctacttgaac tagaggagcg gtacgagcgg cttactacaa aagaaaggct atttgagatg
+     7441 ctttttccgg gccaaagtac tctgacccat gataccgatt caagctcact gagaacggat
+     7501 ttccctagtt ttaaattcaa ctacggacca acaaagattg attgttgtcc cgttgacagg
+     7561 aagatggagg gttgttctcg tactcgcttg gcggggtggg cattcttatt tgtttaagac
+     7621 ttctcacagc ttttaggcgc taggagggct tgatagcttt gatgtaaccg ttttggtcgt
+     7681 tgttggaatt gatgagccaa acaagccagt tctcagcctt ctatagtatc cgtttgaata
+     7741 cgagacaaag gaatctatag gcctatctct cactttctaa tactactatt gctagtgcta
+     7801 aggagctaga gtatactagt ctagaaaaga atcccacata gttggctgtc ttactctaat
+     7861 accaatgcga agaaactcat taattcattg atgaggaggg cgaagaaact ttcagcttta
+     7921 gagaatccag ttccagcgga agactaagta cggtagcaag tcaaatcaat ctcctccaac
+     7981 agaaggctct gacctgacca cagtcaatca gtctattgtt ctggttctag agagtctaac
+     8041 cgagaagaaa tcgtctcatc gctttgagcc tggtccaggt gtgggggaat gagtcctcca
+     8101 tctagtagcc cagcccttcc gacaagagca gttacgcctt ttcctaaagc tcttactaaa
+     8161 ccaccgccta gcgaccacgg aaagcatagt cagaagagct gaaacaatag ccatgtcata
+     8221 ttcctcggga attgcctttc gaactgaact accacgactc tcgttaactg catcggattc
+     8281 gtgaatcaat tgctgcagat tatctcattc aattgcagcg gctttagcag acatctcttc
+     8341 aatttgcgta gatagaagat ccggctagaa actccttttc attgccttct actttaagtt
+     8401 ttaaattaat ttctctatat ctaattccaa gtgagatgtc caaaatcaaa ggatggaggg
+     8461 taagaatcga cgaggaatct ataagataaa atcgttcatt caaaaggaag aagaagaaga
+     8521 atcggacgat tgaagactag aatcgctaat agtcagtcgg agtggattga gaaagaattg
+     8581 taggactaga tagctctttc cttttttgat gtgctttctt gcgggaaaga aaaggtattt
+     8641 agttagctag gctttgacgc ttgttctcgg attcctttgc ttgtatattg tccgaaatct
+     8701 ctatggcaag tagtaggacc ctcacactct gttcgttctc ctgtccctat ctctctctgt
+     8761 cacatcctat cccgttcaga atctttggga tcaagaactc tcttccattt ctctgctcgg
+     8821 aaagaaggtt ggcaagagat atttttaaat tagggtgcgg caactaaaga ggtatcgaga
+     8881 ctgaccgcaa ttgcaggttg acttttcttt tcattttgtt tatggcaagt gaaagtcgtt
+     8941 tcgtttagta cgagatggct tcacaacctc gcggtgcttc cacctctcgc ctatcgaagt
+     9001 tctgttctaa aacctgttcc gagaacttgt atagagaagg atttcccgct aagcagcagt
+     9061 tcttccatac caacttagct gcccggcgct gctattggca taacaaccgg tacaccatag
+     9121 gttggcccaa cccagtcctc tcgtactagg gttggctcct cgcagttctc cctttaacac
+     9181 caacggtaga taggaaccga actgtctcac gacgttctaa acccaactca cgtaccactt
+     9241 gaatcggcga acaaccgaac ccttgggacc ttcttcaacc ccaggatgtg atgagtcgac
+     9301 atcgaggtgc caaacgactc cgtcgataag agctcttggg agtcatcagc ctgttatccc
+     9361 cggcgtacct ttgatccgtt gagcgagagc ccttccacac gggactcccg gatcactatg
+     9421 gccgactttc gtctctgttc gaccagtcgg tctcacagtc aggcaggctt ataccattac
+     9481 gctcacgagc agaatcttag cttgagccta ccttcgcaca cctccgttac tctttaggag
+     9541 gcatccgccc cagataaact acccacctcg cagtgtcccg cctccccccg aattctcggt
+     9601 gcggcggtta ggcaccctta gacgaaagag tggtctttca ggattggtcc ttctatgtca
+     9661 cgacctccca cctatcctac acattcgatc aaggttgtca ctgcgaagct atagtgaagg
+     9721 tgcacggggt cttaccgtct agccgttggt actccgcatc ttcacggaga attcaatttc
+     9781 accgggtcca tgtcggagac agcggggcag tcgttacacc attcgtgcag gtcgctactt
+     9841 atgcgacaag gaatttcgct accttaggac agttagagtt actgccgccg tttaccgggg
+     9901 cttccattca aagcttataa cacttctcct tttgactttc cagcaccggg caggtgtcag
+     9961 actctataca tcgtgttacc acttagcaga gtcctgtgtt tttaataaac agtcgctacc
+    10021 ccctggtatg tgccgctttc ctaatcaaaa gataggagag caccccttct cccgaagtta
+    10081 cggggtcatt ttgccgagtt ccttcgacat ggttctctca agcgccctag tatactctac
+    10141 ttgttcacct gtgtcggttt ggggtacggt cagttcaccg ggaggatcgc cctcccaatt
+    10201 cgaagttttt tcctggaagt ttcaaccttc ttgactatga caagagtcgc gactataaac
+    10261 agactcgcga ctatggcagg gcggtacgct ctgctctctc gcgaccccta ctctaatcaa
+    10321 aagactaaag gcccctactg aaggtcgcca aactacgacg agactttcgc cttttgaagc
+    10381 gccagtagcg tagggcgacc gggccaggcc gagtcagaaa ggctttgatg actcaaggtt
+    10441 catattaggg aaaggagagt gaggggaaga gggctacgct gccctcggcc cgatcatcca
+    10501 attcgctcca agagagaggc atggttttgt agtcaaagca acttcgtcac tttcgtgtac
+    10561 ccatcggacg gcagcccttt cgggggttcc ttagggaccg attcactctg cgtagattga
+    10621 ctgaacgcag aaagccttcc actggcaggc gatcgtgttt ttcacaggat ttatcgttac
+    10681 tcatgtcagc attctcactt ctgatatctc caggtcttgt caccaaaaac cttccccgat
+    10741 tgacagaacg ttccgctact gacacttgaa aaagcagctt tcaaggtctc gtcgcttcgg
+    10801 tgaatcactt gagccctgat acattttcgg tgccatggag ctagaccagt gagctattac
+    10861 gctttcttca aaggatggct gcttccaagc ccacctcctg gttgtcatcg ctcgatcact
+    10921 tccttttcca ctaagtgatt gcttagggac cttagcgtac gatctgggct gtttccctct
+    10981 cgactttgga tcttagcacc ccaaaagtct gtctgtacaa acgagaacgg cctgtattcg
+    11041 gagtttccct ggggttggta aggcgaaatg gggccaccct agcccattga gtgctctacc
+    11101 tcgggccatc gacatcatac gctctactga aatagatttc gcggaaaacc agctatatcc
+    11161 gatcttggtt ggcctttcac ccctagccac aagtcatccc cgtattttgc cacatacgtg
+    11221 ggttcggtcc tccaaggcct gttagagctc tcttcaacct gctcatggct agatcgatcg
+    11281 gtttcgggtc aaataggaag aactagaaga ttccaccttt ggaaagcgcc tacacctaat
+    11341 ggcttaagcc gctcttccca tttcctcgct gacccatcat gcaaaaggta cgccgttaga
+    11401 gtgagtgcgc ttgactactc cttcgactga ttgttcgcat cggatctcag gttctctatt
+    11461 gcactcccgt catagggttc ttttcacctt tccctcacgg tacttgtacg ctatcggtca
+    11521 ttgaggaata cttaggctta gagggtggtc cccctttctc gcgtaaaagc gatcagaatt
+    11581 cgaacacgcc gcgttttact gggaaggatc gaaccatagg aacgaatcta cagggctatc
+    11641 accttctttg gccagatctt ccaacctttt cacaattaca gttcacagcg ccctttagga
+    11701 atcttcagtc aaggtacgaa gtaaactcga ctgaaagaag aggggctttc tggtttttcc
+    11761 atcatccaat ccacaataaa tcgaatgaaa cctggcgaaa aagaagtgaa cactttggaa
+    11821 cgaagcttcg tctttctttt tcttaaaatc ccaaatccgc tctcgctcgc cgctactaac
+    11881 ggggtctcgg ttgatttccc ttcctttagc tacttagatg tttcagttcg ctaagttttc
+    11941 aaagtccaaa gagcgcagac tagccacgga gcttggatac ggtttcccga tcggagatcc
+    12001 atggatcaca gacggtatct ccccatggcc tttcgcctct gaaagcgtcc ttccttctca
+    12061 atgcccgggc atccatccaa tgcattcttt tcgatcttgt actcagggta cactgaacac
+    12121 cacaaaaata tcgatgaaac taactataag tgattgcgga ttcgaaccgc tcacagaagg
+    12181 atttacagtc ctgcactcta ccagagctac tacctgttac cactttcttt tcaactcgta
+    12241 aaggcaaaga aaaaagggat ccgcctcgaa tcaaaacgtt ctttcttttc taaaaacgat
+    12301 ctttcttctc ttatgaaatt gatagtttgt gagagggatg caataactcg actgtgtaag
+    12361 gtccaccagg tccgtaggag agtcagtctt tctctaagca agctgttttc tggcctatac
+    12421 ggaaagagtt ttcaaggtct ttccctttcc ccgaagggaa gttaaagctt gcctgtaaag
+    12481 ggaagggaag cacagcaata acgccttttt ccttttttct cccgattaga cgaaagacgt
+    12541 gttggggctg tatccgaaac atttttgaac tgccggcccg acaaccaagc ctataaccgg
+    12601 caataagagg tcaagttgag acatacaaag gcgccagtga agagcttaaa atcagtacaa
+    12661 gggctgctat tgaagggatt aaatctccca atcaaggact gcttaggtcc cttctagtcc
+    12721 gtcctgtatt ttagtatgat cgacgtattg tattttacct atacgggcat atgttcgact
+    12781 tcctgatcaa gggcatccgt ccacaaatta ttttatgtaa atatagatat tccctacttt
+    12841 tcataccatg ggaaatccac tcggacgaag acatagatga taggattatt tgagagaaga
+    12901 gccgtgccct acctattcta caaaacctta tcctagtttt tactttattt tttttatata
+    12961 taagcattca tcaacggatt ctgcttccac tgccttagtg ctaattacaa actaattatc
+    13021 ctcatcccaa aaccccttac aaagaaaggt aggcgtgcaa taagagaaag cttaccttgc
+    13081 cccgagagag accataggat aaagtaagcg tgcaagagct agagaatccc ggggtatagt
+    13141 gagagacctg gtgcaagctt ttagagtatg gctatcagtc ccaagtttaa gctaatcact
+    13201 cgtaaggtta tgtagtccgc ctatagccca gcctttaagc atcaacagaa gatccagcgg
+    13261 tatagtcgtt ctctgtatcc aaatgctaga gtaggagagg agtaagctaa ccttatccaa
+    13321 cctttgtatc cgtaaatccc tatatagcct accagcgttg tacccttttg cgagcaagtc
+    13381 agcctctatt tattaaaccc tttctagagt caccagctaa tcctaaaatc cctggtgcgt
+    13441 aagcacaagc taccctttag tagtcagcaa ccctcctaag ccttgttttt aggagtaacc
+    13501 tgggatcttt ttaagctcgt tataggaaga cgcgctatat gagtaatcta tcctagtaga
+    13561 cattatacta ggaagctcct gagtcaacct ctaatctata agccagtcag cgggatatat
+    13621 gaatatgaac ttctctaaga gaacagttca acgggtttaa ttgaattccg ttatctgatc
+    13681 agattagata ttatctaaga ctgtataata gagacctaac taactggatt gctagcagtg
+    13741 atcatacccg atgatgtgaa cccgattgaa tcagctcttg tgatcatatc ccatgctatc
+    13801 aatgggagtg aagtccgcct atccatcaac ataaaagtga aaggagcccg ctaactactg
+    13861 cacttcctaa tgtgatattg agttttgcct aaacttgccc gcccatactc ttaacttttc
+    13921 gcttaagctc tcagactaga gctattctca ctaaaccgaa aattataata tacccctctc
+    13981 tcctctctcc tcaatggcta aataccaaag ctaaccttaa atgggttaaa gaccgagcca
+    14041 gaacaggagc ttagcctcaa tcttagatag gagattaccc ataacaactc aactacgtcg
+    14101 agccttgtct tgaacaagcc aatcaacctc aatcctagga aatgatttca ggtgaagctc
+    14161 acccctcttg ttctcatctt aagggaaggc ccaggcgcag aaggaagact ctttgatcgg
+    14221 gtagttcctg ttctcagtca tcttaggtgc ggttggccaa ttaattactt aaatttatag
+    14281 cgcgagcccc actcatacaa gtaggggaaa actctagttc aactgttttt ttattcagat
+    14341 tcagaattgg ctgggcctta accgtagctt tagggccaaa agcctatcta tggccttttg
+    14401 gctgggggta gctatggcaa gggtagtttc ctcggaaaaa ggaggggaag ccccgacttt
+    14461 ccaataaggc ccgggggaat gaaaacactc aaccgatggg gggaggaatg aacagcagct
+    14521 ctaggggata aatgtggata tccagtggca actgagcagg agcagctcta cccgcacata
+    14581 gctaaatatc ccccttccac gccacccagt tggtgagctc cgcccttaag ggagcccatc
+    14641 cgtcggtcgg ccaatattgg atagtaaact agcgcacagg gtaactcgaa tatcaaatat
+    14701 aagaatagca gccccggtct caaaaacaag catggaaaca gcagtcaatc agaaagataa
+    14761 gttgaagtta gctttcagct gttcaaccac taaagactaa agtgggtgaa gcatagtttc
+    14821 gtccgtccct ctcgctttca gcgcgggata gagttatcag tacgtgcgac accaactctt
+    14881 gcgttttcac ttttttttta tacatatata gataaattcc taccatattt cgttacttta
+    14941 tcccgaaaaa ggggagctgc tgggctcctt cctgttttat tagcatcaaa cccactcaac
+    15001 aggatattcc ataagcactc cggaaggtat agtgtgaagg aaagtaggtc ccttacatga
+    15061 gagtttgcga acacgggggg cttatgctcc tgctgctcat atggaacagg ttggccccca
+    15121 aaagccccaa gggaacgagc gaaagggctg cttgccggaa cagcagatcg agcagcctat
+    15181 tatttattac atttctgccc caactattcc aaaaagggta aaaaatgggt ctaattctga
+    15241 gacaaaagta atttaacatc cctgattcgg aaactgctgc agtgggaact ctacttgcca
+    15301 ttcctccttc cgaagtgaaa gttttcattc tccccttggc ccagttgaaa agcctaaccc
+    15361 caggtgaagt accagattct taagtgaaag taaggaattg cccctgtctg attcgtaaag
+    15421 ctttcggaga aggaggggag atccgcggag gtagaattct gaatacgggt acgaagggtc
+    15481 atccagaagc gctggaaggg ctggaggaag gggacgaacc gcatcgattg agttatatcc
+    15541 ggcaggctaa cacccacgta cctagaaagg aaggggaaag tcttaccttt tggaaacata
+    15601 aggcgcatca gaaaggaatc cggatatatt tagatagcca gattcacgag ctgaggacta
+    15661 agtcgaagcg aaagtctgtg ccattgttag tgccaagacc ctagaatagt gaacaccaag
+    15721 tgaagagcca cctccgggaa cgtcgagaag ggggacgaac agctttgatt ggatccgatc
+    15781 tggtagacag agacgaagac aaggatgaat aatctgaaga aggctatgcc gaatccgagg
+    15841 gaagaagaag actacgaaga aggaagccga tgcctaggcc gaggctggtg cctaattcta
+    15901 tgcttgaagc ctaagctaga gctggagctg aagcctaatt ataagtttca tctaaagctg
+    15961 aagttagggc tggagcctga gttggtgttg atgtcctaga gtagcgaaac tacaagtgaa
+    16021 ggggcgctat ccggaaccct taaggtcggg gacaatctgc ttcgattggg ccttgcaggc
+    16081 agacaggcta acttgtggag acgaaatgga atcctataag aatgtcgagt cccgagggtc
+    16141 tgcatgcaag aggctgagct aacaagctgg ctgaatatat catggggatg cccgatttgc
+    16201 gattgagcta gccgataagt ataatctctc cggtctggca tgagccactc cattcctact
+    16261 tccactcaac aaaaaagata ggattcttgg gctctacctc cagagtgcgt acggctattt
+    16321 cagaccaaag ggggccaagg gggatcccat aaataataaa gtaagagaag taacccgggc
+    16381 tatgagctat ttcgtcgttt tcttgccgga gttcgctagc cgttcaatcc gatcttccct
+    16441 tcgcatggca gaagagcgcg gctcgagact actctttcaa ttaaaaatga aaggcaagca
+    16501 ggaatttaac ccactaatgg gccagctgca taatcacgta tgtaagcaag tgatcctctt
+    16561 ctttcgcagg aagcattact catttcttca gagttgaaga aaaggctacc gatttcccac
+    16621 tgactattat gtaagggctt gaccttggat gataaattca tggctaagtt agtccgtagt
+    16681 acgaagcttt caattcaagc accttttttc tatactaaag gccaaaggga agttgaagcc
+    16741 caacatcccg tggtcgggat ggacaccggt ctggcccaca cctacatgga cttcaaattg
+    16801 acttctgatc gtaactactt taaaaacctg tggttcctca cgcctagtcg acataatgtc
+    16861 tggcgtttac ctgacggttc cccaagcccc tgagctcatc aatgaacgga ttccatcttt
+    16921 cacagcgagc tgttccgaag tcgcgcctga gcacagtgaa atgcctctta cggtcatcta
+    16981 tatcgtgctc ccagtcgtca tggagagcca actggagagc acgcttgaca tcctctggat
+    17041 ccccgagagt tactccacgt tgatgcaaga gaatatgggc tttccgtacc atggccgcct
+    17101 ccaaattctc gcaggcgctc gtgatagtct ctacctcggc agagatgcgg tcgtgctctt
+    17161 gtgcccgagc ctccctttcc ccaccttcac caaatggttg atccaggtga tcctgctgtt
+    17221 gagctactgg ggaagcggcg ggttcgggta aggcattgga tggcccagcc tgttccctct
+    17281 gaatcggttg attcaccgac gaggaggtac gcccggaact ggccgaatcc tccaaatcag
+    17341 acgtgtaggt aaaccactct gaataggaag atgccccgtt agggcccgta gaaggaagtg
+    17401 cttcccctcc ccctcctatg gcattccaga ccaaaggagg taccaacagg ggcaaagcca
+    17461 gacccagtag gtggatggct ttgatccgta tcaaagatat tcccaccgcg atgaaaaaga
+    17521 ccaaaaagag atagataatc ttcttcagtg gccctttcaa ccggaaggaa tgcaaataaa
+    17581 aggcggcgac cccgccgagg agtgtaagtg taaccactaa aacagtggtg gtcgccaatg
+    17641 atggatttcg agccacagaa tggatcaaag aactgaatac gtttccaata ccgatagcag
+    17701 ctcccgctga agcaattgta gcagctccgg cacctattga ttttgcacct tctaacatct
+    17761 cgggttgaga attatactca cgctttgtca ttcacttatc ttatatctta ttgattcctc
+    17821 gtcgattctt accctcgttc ctttgatctt ggacatctca cttggaatgc attctttgcg
+    17881 atcttatacc cacggagtgg tagactgaag accaactgaa tctcgacaaa gagaagtaca
+    17941 agcaagtaga tcattcgacg tcagaggggc tgatgtgggt cgggctatct atcttgctat
+    18001 cttttttttt atacataccg tattattcta tcaatcggga agttggcctc actcaatagc
+    18061 gggaagttgg cctcactcaa tcaatagtcg gatacaatcg tggacaccgt agacctgctt
+    18121 cgcctagagt ggatcaggtc cttatacgta aggcgatccc gcgaagccgg tcaccggagg
+    18181 tgaagtcaac catcatttct tttcttacta aattcaacta tgtattctga ttaaagagct
+    18241 tattctcttg attcatgtcc accttgcccc tggtaactta aggatgagtt tcgaaattag
+    18301 gaaggattgg actgcagatt tagaaagaga atggggagcc ctcatcaatg aattaatgag
+    18361 tttcttcgcc ctctcttggt gatagtagct cttcatcagc ttatagtagg agctgagctc
+    18421 ttccgctagc ttactagctc tggtagtcag gctcttcggt caatagttct actggaaaca
+    18481 gcttcctgta tcactgcttt ttaatttttt tttaatgctt atattttctg taaagatctc
+    18541 gatgaccgct taattcatct ttcctttccc gctcctgaat gaaaagtggt tttttttatt
+    18601 cctacggtct tggcgtgatc tctgaaactg aaattcgact gaaaagttac caatagaata
+    18661 ggctttagag tagctctttc caaataggtc gagtagccat ttctagaaat agaatgaatt
+    18721 aaaaggagcc aaatagttgg gattcacacg cacagcctta tcctatgagt ctgcctatgc
+    18781 tacgcgcctg cctttgagag cctttaccgc tggttccctt catcggcgtc attgaacctc
+    18841 gttagcattt cgaaaagaat tggaggctca aaacgaatgg tcaaaggaat tgatgattcg
+    18901 tgtttagtct ccgatcttcg cctagtctta gaacctgcac tgtagagagg cgctgcgctg
+    18961 cctctttctg ccgctctcct cccagatgta ggtcttcccg gaagtgagaa cactgctgcc
+    19021 caacttataa ttcggtcact ccccgtcttc atcctagacg ccactggtac tattcatcta
+    19081 tactatatat ggatgactcc tgctttctcc ccaatggaat gatcaggacc ctaaagaagg
+    19141 atcgtctcac tccaactaga ataaagatga gctagttagc gctggagttt tcttgcttct
+    19201 tcgcctctgg atagtagttt gaatgaatct ttgatactgg gattaactct tgaaggaaag
+    19261 atcttccact cctatgcttt caagcgattt ggattccaag acgatagata ggccggttat
+    19321 gaaaaagagg gatcgagcag gacgaagtca acccggaaag aaagtcggat aatatgaaag
+    19381 gtcatccgtc tcattctaag cgtaaaatta atttttttca gagaaagaag agcccatcag
+    19441 ttctcactct tccttaccag tcagttcccg ggtctataac aaagcagtca cagtgtgtca
+    19501 tcttcttgcg tggaagtgag tcctccatta atggtatcaa tgcctgtccc cctctcatcc
+    19561 ctgtctattg ggcttcacgc tagcccccat ttcgatcaag aagaaacttc ccagtcccaa
+    19621 gtgagaaagt gagaatagtc agatgatccc atctcataga taattggctg tctcggaaag
+    19681 agaggaagtg atgaatatag aagatctagc tctcattcct acagaataga cgacatcaga
+    19741 aggaaagatc agtcgataga atagaatccg aggatttgct tggatttgtc cgcttcatct
+    19801 cccccggttt acgcttccag gttctcacat ttggttcttg gaaaagaaca attgaattcg
+    19861 gatgtgattt gacttcttcc ctgatgatct tgtcggccct atctatcctg aaaaagaagc
+    19921 attccagaat caaaaagaag aaaggtctcg acgagcatta actagttgat gaggtctcaa
+    19981 cgagcgactg aaaggagagg agaaaaaagc tgctagaaag aaagtgagct acgatcggat
+    20041 aaggtatgcg ccttctatta ttgatgttgt cactgcctca acttaacctc agaataagat
+    20101 tgaacgtcaa cgagaagcgt cagtccaagc taaagaggaa tgatggagag caagaaaagt
+    20161 cctgtgctga ctctgtgttc taagctgcca tttcgatcaa aagtatactt atttttcctc
+    20221 ctactcgaga atgatggatc cggggaggca aggaagtagt agaaatgttg aagaaggaga
+    20281 cttaaaggca actcgagtaa aacgagaggt ccattaggga ctcgacttaa aggagaggta
+    20341 cggagtgact cgagcaaagc aagaggttta aataactcga gtgtaaacga gaggagacaa
+    20401 cccgctcttg aacttgaaac tatgcgcgtc cgtcttcgat ctagcaatct tggacaattg
+    20461 ttttgatgag ctgtcactaa actaaaaaaa gagagaagag aaagaactgg gaatgggcgc
+    20521 ctggttgctt gcttaccaag ccatcctggc aagctcctcc agttcgatta ttattcttga
+    20581 cttgactttg tataaaaact actcactatc aaaatgaaag acgatcgatt atctacccaa
+    20641 gaagatagag agtcccacat acgagaaaag gtcacaaata gagttgaacc aagtaacatt
+    20701 gcaaaggcat aatgatagta gggtcgggat atccccgccc tccgaaccgg acgtgaaggt
+    20761 ctcccctcat ccggctctcc gcaggggaat ctccactcac tgcttcccct aatatcctcc
+    20821 tttaccacat catgggggtt tacaggagat cccagaggca cgcacggaaa ggctgctata
+    20881 ccataccttt gactcaactc tactctagta gtcatagact cactagaata gtccgtccgg
+    20941 ctgctcttgc tgaaatacat tactcttcat tctcccgtgc tctagcaatt gcgctcccta
+    21001 gaccactacc atgtagttag gtagggacat ccgtaatgac gggaatctag gaatgaatgg
+    21061 ggatccctat caataagaat atgaagaata tctaattagt tacactacct ttctctcact
+    21121 caatagatct atctggtctg gatacggtac agtacaatac gagacgatgg aatgctatgg
+    21181 gatggatggt agagggatgc cagcgcccaa aagcgatgat tcacttgtcc ccttgtccat
+    21241 agggacctcg tggcatacaa ccgaaacgac tcccgctaga tagccgcccc tatctctctt
+    21301 tttacagcct cgtggacgga cgaaagaagg caagttacag aacggtgcag tgaaggctcg
+    21361 cgaagtagac agcaagcaac tgcttttcag ccccttctct attatattag taaagggaag
+    21421 gggactctat caggatctta cgaatctaaa gatctccaaa ttgaagaacg gactcttttg
+    21481 ctgcccccgg agcgtaagca cttcactcgc taggggatgg gattcattca cttgcattcc
+    21541 tgctagcact acaaaaagct cggtcttaac gcccttacta ctgctgtgca gcctttcctc
+    21601 gggttcgtag agtcgggttt cccgtttacc cacaacggag gagggccccc caccaggcag
+    21661 gcggccacgg gtcataacgc actcttcgca caacaaatcc actttgaagt tgacttattc
+    21721 gctcggccaa tcgtcggaat gtgtacgaga taccataagg gcccaatatc tcaatagcac
+    21781 ctttgtctaa agcttcgaat gagacttcat atccgaaacg caggaacgat ctgactagaa
+    21841 agtcattcaa aacttgatcg aagaaccagc gtttattgaa gaagctatag agtcgattac
+    21901 aaaaagtact agtttgaaag gctcgttgga attgatccgc tacgggattt acattatacg
+    21961 caacaaaagc acctgaagta ctaaacagaa taggtattag tttggtaatg gttggagcag
+    22021 caaactcgga ttcggcaaga atctcatttt ttggtagtac gaggggggaa ttggcccaaa
+    22081 aattggatag gggtcaagac gcagtcgggc gccggcggct tactcaagtg ccccccgaac
+    22141 cgcgcgaaat ggtcgcctat tacacggctc actaactctg cctggggtgg gggtacgtac
+    22201 ctatttatta ttcgtcgccg gtccggctta atggagaagg aaattcattc ttaatctaga
+    22261 gagttccctt ttgttaagcg catcgagcca aaagactttt cagtcagaaa attaggaagc
+    22321 atggactcac tctactcctc gcggcggtat tgccgcataa tggaatagat agcatctttt
+    22381 tccttttgag tttgaaaggg aaaaggaata tggttcccgc cattataata atggcgagca
+    22441 aggtatcgac tcgcgaaagg agctcacgat aaaaacccac cggattttct ttttccaata
+    22501 ttaaattgat cgtatggaaa gacgcctaca cggaacctat acgaaggagc aaacaagcaa
+    22561 cggtaactac tctactttac tttgatttga ctaggcaatt gattccccat ctcggtcaat
+    22621 caatcggcct ctaggactgt tgtttaaaga atctgacctt gacaaccgaa acgtgagcgg
+    22681 ggaagctgac tacaccttac cccttgactt cgatttccgc agtgctctat gagacgactg
+    22741 cgagcacata cctatggatc tatgtcaacc aaaagcaacc actccagcaa gtttaccaac
+    22801 cagcacagat cctcccaagc aacttccact gcgccaacta gaaaacttat gcgcttgtca
+    22861 attctttctt atcttactat atttctaatt acctaaataa aataataaat actgtagtta
+    22921 tagctaaagc tcgacttctt tcttcttttg gatctgcttt gccttgcttt gctaccgggc
+    22981 ttctagctgc ccttcctcca ccgttagctc agtagagcat tttctcctcc actcatgtca
+    23041 cggattcagt tcaaaaatga caacagtctc tagtttacaa tggctcagaa gtatggttgg
+    23101 agatggtgct tatcttccta tttctcatgt cggttcaaca actcttgcta ccacagaagg
+    23161 tagaatacct gttagtgatg tttcttcttg ttcctactaa aaaaagaatc cttgctctca
+    23221 gtctctcaat ttaccccttg gtcctaaacc atgctcagtc cttgattgtc agctgctaga
+    23281 acaagcacca gacacctttc aagaaagagc aagagcggcc gggaaagata aatcggatta
+    23341 gcctttcatc gatccccgaa gagaaacatt ttgttgttga atctgtgaga aaagcataag
+    23401 gataggtgag gatagattct ctagattaac tttatcgaac gaatgagaag cttagcaatt
+    23461 catctcggtt aaaggattct acggcgaggg tttaagcgca taaactgctt ggagaaggac
+    23521 ttctgacaag gcggctatct tgatagaagc aattctcttt tgaaacactt tcatagaagt
+    23581 cttcttacaa agcaaatagc atttcctata gcatttccta ttgatttgtc ccctggactg
+    23641 gtggacctat tctgattctg aattatccgt cgctacgctg ttcccaagga ctagcaaaat
+    23701 cgaaatagcg aaattcttgg gtcatctcaa tgggttcaga aaccacacgt ttctctggat
+    23761 catcatagcg tacttggaca tatccactca gaggaaggtc ttttcgtaat ggatgaccct
+    23821 cgaaaccata atcagttgat atacggcgta gatccggatg attgatgaaa gaaacaccaa
+    23881 acatatccca aacttctcgc tcccaccggc cggctgatgg aaatagactt actaccggag
+    23941 atattcgtgt tacttcgtct gcactggttt gtacacgaat gcgtgagtta taccgagtac
+    24001 tcagtaaatt atagaccact tcaaatcttc gttttcgaga gggatgatca actccgcaaa
+    24061 tatcgatcga aacttgaacc cttgtatagg tatgcaattt gagaaagcac aacaattgaa
+    24121 ataggtagtc cgtattggta tcagatctat tcccatgttc cgatctttcc atttttttga
+    24181 cccatttctt gggtaaagtc tcccaactat atttgaaaat gaattggtta tccataaaga
+    24241 taaagaaagc tttcttgtag ttccgcttct tgctcttcaa attctgaaag actcgtcgga
+    24301 aattgccggt gccggttggt ccaaccaaga aaggcatggg agtctttggg cattacttgg
+    24361 gtcgagttaa gtaaagactg gttacctata aagatccctt actttttttc gcaccggaca
+    24421 atctaatttg attgaaagcg aatgatcaaa ttcgtggatc gcctgccata ttggactttt
+    24481 tgtttttcca atacaggcga gcacgctttt gagatgccct agacgactcg catttgaaag
+    24541 gaagctgtgc agatcgctta agtaggctta aatacctatg cgaatatcgc tctcctcgat
+    24601 atttagatct cggagatccc tttcttttcc ttcaattaaa cttttgagag ttccttctcc
+    24661 atacatatcg agggctttat caatgattct ttggatcttt tgttcgcaaa agtggaaatg
+    24721 gcgctctgtg aatcgccact ccttagatga aatcagatct ttccacactt tatcttcatt
+    24781 tagttccaca gttctaaata gatcatctgg ctttctaaca acataacgga tgaagtcccc
+    24841 cccacgggta aagtctgtgg aacttgcagc atagggacaa ttccttgtcc cacctcctgg
+    24901 attactccaa gaaaaggttc gaccattggc aacagttcgg attcgaacat tgtctatctc
+    24961 gtaatcattt gattccgtct gactcctcct actcctcctt tccgaaaaga ttatccttgg
+    25021 tccttacttt tgacataaga ttctcggccc gctttatccc cattaaccaa ttacgttacg
+    25081 accactgaac aaacttggtt gacgaacatg gtttatgagc cgctaatgta gcggcttgtc
+    25141 gagcatttgc caaactcaca ccatccattt caaatgggat ttgtcccgtg gacacacgag
+    25201 caatccaacc cgtaggattt ccttttcctc ttcccattct tacttctgtg ggtttcccgg
+    25261 taatagggag atccgcgaaa actcttaccc atatcttacc atttcttcgg aattgtccgc
+    25321 tcatagcacg atgaaagtgt ccgattatag cccgacgcgc tgcttcaatg gctcgatatg
+    25381 aaagacgacc agctttacaa cttttagtgc catatcttcc aaaacccagt tttgtaccat
+    25441 ccggtttgca acccctacta catctgcctt tacgatattt actatatttc gtacgtttcg
+    25501 gatatagcac gtctcccttt tttttgacta tatgaaatcc acactttgac acctgagatt
+    25561 ccgtaacgag tagatacttc cgcaggagca taatcgattt tatggttaaa tacattacga
+    25621 gaagtttttc tatgcttata gcatttagtt tgagcttttt ctgctgcgtc ttttaatcga
+    25681 ccggaaaaac atatacggat tccctccacc ccttttggaa tctccttcac tattttagca
+    25741 aaaatggaat gaaatgatct tcttttgttc ttcagttgaa aagagatgtc ttgagcaatc
+    25801 ggagaagcgc tttgataaac agattttatt ttgactgact caattaaggt cttagtgttt
+    25861 gttctattag acaagaaaga tcgcattttt ttgacttcgt aaaaataaga gttgtacctg
+    25921 tacggaattc ctctctttct cagtatgatc tctatcatca tatctattac ctttatcaat
+    25981 tcttctatcc cacctaggtt ttttaatttc tctatcaatt ccattatctt atccttaccc
+    26041 acgaggttcc aacatttgac ccttaattga ttgaggagtt gttcccgagc atcaaggtta
+    26101 ttatacaccc caaccccatc tcttagaaag aaaaaggtag caccgaagaa tggaaagagc
+    26161 gagatggtgg ttttagttgt tcccgcgaag cgaagatcat tcgccaagcg aatgaagtgg
+    26221 gtcaacctat ttttggcttc ggccaaacac tttttctcgc tggtcgagct ttctacaaaa
+    26281 aaagcgatac gagaacgtat tctcttctgt aagcttcttc cctgtgcatt cgctctcccc
+    26341 atcgtagatg gttcagcggc gcccggtgcc acgaaatgat tgagaactac gacggggtcg
+    26401 aaattaattt ggttctttgt attaaaaaag tattgcatga ccaaaaaatt caaggagggg
+    26461 cgcactgcag ggagggtccg tagataactc gtcgggccgt cggagcggga cttattttgg
+    26521 aaaaataact tgaataactt cgtttttttg aaggagtcgt cattttctat caggaacgct
+    26581 atgtcattta caaccccggc gtatttcgga tgcttgaagg ccccgctgat ccgaagtgat
+    26641 ttagaaagat tcttctttat cgagggtaat cggtcatggt atccatagcg ttgttttttt
+    26701 ttcggccaac cccggatttc gttttgcttt tttcggtcgt cgagcctgat cgactcgact
+    26761 cttttccttg ccccccggcc tctcacttcg tttcgttctt cttctgtatc gtcgcttgaa
+    26821 tgaagacact cgatcggccc ggctttcccg aatgtcgtcc accaccggcc cttctccttt
+    26881 ccgggtctag ttttttcacg tcgtttcagt cgtcgtggtc gacggggaag aaagaaatga
+    26941 atgaatgttc ttttaggaaa atgtagaata atacacctac cgagacgaaa gccgaaggtg
+    27001 agtctcgtag gtggacgtat cgaaccgaaa taagatctca gattgacatc ttgatacaca
+    27061 aatttaccat aataataaat agagtcaata gtgaccccgc cgtagaaaga gccgcttatt
+    27121 ttttgcttga taggggggct tccattcacc aacaaagact aaaagtgctc tccgaaccgt
+    27181 gctggatagt cacccatcac acggctctca aacccaacct gtggtggatc ccgggagaca
+    27241 aagtaaaagc gcttgatcct ttgccccata ctttgagatg ctcctccccg cgcagcacgc
+    27301 tgctcgtgag aggcttagct ttaagtttaa gctgctatcg ttcggttcgg ataagtcaag
+    27361 tccctttgat cggttcgctc aggtggtctt atcttcccta acgttcagag tcgttttggt
+    27421 ttgagaaagg agcaccggcc gagcgccctg aatgaataag aaatggacag gagagagaat
+    27481 caatgattct tattcaaccg agttgaagct agcaacatgt tgattacaca ccggaggtta
+    27541 gtaagaactg agggcaatgc cctcctaacc taagtaggct acccgcgctg cgaagcaact
+    27601 ggtacttgat tggggcgggg ggcggtttgg tttcgtgcaa ggccctcgca acaggaagag
+    27661 gcagttgtca ttaacctttc gcctttattg atataaaaca agcttactta ccggcaacaa
+    27721 gcaccttttt ttctcaaagt aaagtttctc gcttgtttct ttagaaagat tgcagcctta
+    27781 gcgaaacaac ttatcctttt ctacgaatct tccctttatt gagcaaaatt ctatctatat
+    27841 ttattcccgg aatattttct atttcaattc tatcatttgt ttgacagctt aaactaggct
+    27901 tcattttaga taggttttcg gtctcttaat caaaatcggt caggggtgcc gtcggaacgg
+    27961 tcggtggctg cttagtctca tccgagagtc taatctcttt gtactgcttt ttttcaaaga
+    28021 aaaaaaaaga agaaaggggt cgatttaggc tccttccctt ccactgcata gctacgctaa
+    28081 caggtactac gagccctctg tcccacacat ctaaccagct cgcgtggttc accggttcca
+    28141 ccgaaaactc tcatttgtta aaacggagca tagtgcgctt taggcgccga gcgagaaacc
+    28201 tctcttcttt cgtttcggtt tattcactat ctgaaactta gcacttgttt gatttttctt
+    28261 attgggcggc ggcgttcttt tttgaagtct atccgaaccg aattagcact tctcagatca
+    28321 gactaggcct cccccgcaga gctgggcgcc ggggacctgg atgcgctagc gatcggcgca
+    28381 tagggggggt tagttgcccg ggtttctcgg cctggtatcc tacacctcgc gttaatgata
+    28441 tctacattca actgtgcccc ggagacacgg tcgaagcacg cccatccaat gtgcttcttt
+    28501 cacgacatgc tctggttccg ggtgttttcc agtggtcttc tagcgttaga agaagtcgtg
+    28561 tccgtcccgg acatctgcaa cgtcctccca cgcttttttg aaaatatagg ataaactgaa
+    28621 ggaaaagact tacccattcg gtgactttcg cggtcgccct cactgaaccg acttgaatct
+    28681 gaactacgat tttttccaag tcttaccgaa atcggatttc cttttcgtgc catatttttt
+    28741 gactttatgg atttctgtcc cttttttctt cccggtacaa tatttgttct cgaaagtctg
+    28801 agtttccgtg tactctccaa atttatgacc aaccttcccc tcagtgatct tagaacgcta
+    28861 ctacgcatct ttactatata gtgtggtaag gtaggtttgg gtatagcagg acttgaacct
+    28921 gcgaccatta ggttaaaagc ccaatgctct accaactgag ctatacaccc aaataaagat
+    28981 gtagtagtca attaagattg gtgcggaaaa gagaagaggt ctcaacaagt attaactagt
+    29041 tgatgctgat cgaaccctca gttcgaagct cttcgccaac atgttacgag gctccagtct
+    29101 taggcgggtc accattactt gacttagctt agaaaggcga acatctgggc aagggaaagc
+    29161 gcagtgcgcc tggtcctttc gaaccagtct ttataaacct ggtgcaaatg gctttctttt
+    29221 tttatgatat accaatcaga atggaaggcc ctacgtacat gattgataga agcactacgt
+    29281 atggggggtc cacttgatgc gggagaggca tgagtgcagt tcgatcttgt ggtgtattcg
+    29341 tttgtagtga gtagggcctc tttctcgttg atcagttcgc ctccgctcta ttgaatggtc
+    29401 cccattccta cggcggtttt gtcaacgaat gcgtctcggg ccggattgac taacctaacc
+    29461 cttaaggtaa gggggccttg ccatagttgg gtgctctgct ttagtgtgat tgacgaaggc
+    29521 gctaggctag gtttggtaat accaaaaaca aattaaaaga gattggagtc gatagttggc
+    29581 aaggaacttt tttcccctcc cccccccaaa aaaaaaaagg agctgcggtc gaactctaat
+    29641 tctgaaaata agtcggggcc cttttaccaa gtctactaga tagaaatgat agagggccaa
+    29701 ccatggttgc gttgcacaag acggtgccgc ctcacctcga gttcctttct tcgtagtcaa
+    29761 atcagatgat aggcttcggc gatgttacct accaaataaa aaaaaggcct gctaggtgat
+    29821 cgaaatcgct caggtcagtg aggactcact cactcatcta cgtattacta ataatttttt
+    29881 ctatctactg aattaaagac ccgccacgcc gggctataag atacagctgt tgtactactt
+    29941 gactggtaag aaaaagctaa cgtaagaact ggaagagtct tgtatgtaca gtacccctct
+    30001 cttccgctat tggtggactg ttgaacagaa aggccgacag aagcaacctt tcttagcgcg
+    30061 cttttcaagc gcttagcttt tgctagcgac ggggcggcaa ggctataaag taagtaaagt
+    30121 tggaagccta agccaagaag gtacgaacga agtgacgggc ccctttagag ataggatagg
+    30181 gggctcttgt ggtagtaatt gcgaacatct ctctttttct cgtagtgtgc acagtttgct
+    30241 tgcatgccgc cttaggcaca tctctctttc ttagtttcat gcttcctaat gaacgaaagt
+    30301 cagtctttta gtaagcgtat ataagcagca gtttagtgta taagcaattc tttagtggct
+    30361 gtgcaccgaa aggtacgtac ggtggaggtt ggttgaagat gatgttacgc ggcgttcaga
+    30421 accagccttg aagtgaatga attagaaaga agaagtaagg aaatgagacg actttttctt
+    30481 gaactatatc ataaactgat cttctcctcc acaccaatca cgagtttttc ttcattcctc
+    30541 tcgtatatcg tcgtaacgcc cttaatgcta ggttttgaaa aagacttttc atgtcattcc
+    30601 catttaggtc cgattcggat ccctccgttg tttccttttc ctcctgcacc ttttcctcga
+    30661 aatgagaaag aagatggtac actcgaattg tattatttaa gtacttattg cttgccaaag
+    30721 atcctacttc tacaattggt gggtcaccgg gttattcaaa taagtcgtgt tttctgtggt
+    30781 tttcccatgt tacaactttc gtaccaattc ggtcgatccg gaatggatcg gttaaacatt
+    30841 ccattaggga gcctggtctt gactcttctg tgtggtattc attctcgttc ggctcttgga
+    30901 atcacatcca gcagtggttg gaacagctcg caaaatccaa ccacttcacc tactttattg
+    30961 cccctaaccg tttctcgtac ctctattgaa acagaatggt ttcatgttct ttcatcgatt
+    31021 ggttattcct ctctgttcgt atctcttttt ccaatttcgg tctcgattag tttacaagat
+    31081 taaatggcca attctcttct ggaccctcga ttctttagtt ttccgagaat gttgggccgg
+    31141 gtatgtaagc catgtatcta ggaggaatta gaaagaaggg cttttgtttt tttgcatcct
+    31201 gtgttgtatt ggtcttgcag ctatatatta gaataagaaa tattattctt gtcttatatt
+    31261 attatataat acataataat attagaaaga tggtacataa tatctcatac acgtacataa
+    31321 agaagttcct gcatatcctc tctaataatg taattaaggg ctaagggagg tttcttgggt
+    31381 acattaagac ataactcgac atcccgaaat caagtcggcg gctctattgc tctccctcca
+    31441 acaatgtgtg actctaaaaa acaaactgct ctggcatttt cagtgaagct cagcagcata
+    31501 tttcgcgctt atggggcttg gagttaatca tcttgactgc atataatgag tcagactcta
+    31561 cccaaaggtt acgaatgcct attcgcgaag catctccccc gtatggcgga ttgcataaag
+    31621 ttccaccttc aaaatatctc tccactcaac cgtcttccgg aaagcccaaa gaggccgata
+    31681 aagtctcttg ctatcacaag ccgcaccgca gcagcctcta ttgctactga accgtctgtg
+    31741 ttcacttttg tttacggagg gttcgaagaa gagtaagatt cagctgagca gggtaaccat
+    31801 gcattcccaa aaacggagtt gagtgcaaaa agagtccata actctcagtt taatctgaga
+    31861 ggaaaccaaa tgcagtctca gttttacttt tcccttacaa ttctccaaaa ctacgttctg
+    31921 cttacagctt ggattccctt catggcgcct ggaatttctc tcacgcccat agtatatgac
+    31981 tgcagtaaag cagagcttta ggatcaggcc agtaaaggtc taattcttag tttttctatt
+    32041 ttataatcca ttggcaaaca tcatgaaggt tggttgcagg ctgatcatat ctacatttcg
+    32101 aactgagctg agaaaaaacc caccctgcat agctgcactg aagagatgtt cggcactttc
+    32161 atcttcaagc atgcacaggc cacaaagagt cacggtgttg gttgggataa aatctgctaa
+    32221 ctcgatctgc tgaagcccag agttgagggc catccaggcg ataatactgt ggcttgggtt
+    32281 ggggatatta aatttattcc gaacgatgcc tgcatagtcg accttaggat aggtctgcct
+    32341 aatgaggttc caggctgttt tggagaaaaa aatttccatc cgggctgaga gaccaaacga
+    32401 tcctgtcttc aacagggaga aaaaaaagag ttcatacagg gcaatctctt gcattagaca
+    32461 ctgtccagcc attctgcggc taggaggaag ggtccaatgc aatgcccatt ttttattaaa
+    32521 tcgcttacta gagcatgggt ccgggagacc tctttccctt ataacagtga cctcgtacct
+    32581 gtagctgatc gggccatttc taagccacgg acaatgccac agcttcgtgc tttcgccatt
+    32641 aaatctgaca ctttatagta ttatattagc cacatcttcc cttaccgaca ctcctccaag
+    32701 actaagatca gtcggatgga agcttagcca ccaaaataga atgtctccag agatatctca
+    32761 aattaaccca ttgagtccaa atggacccat tatcattaac tactctccag aaaagcttta
+    32821 acatggcagt cctattccag ccctcaattg ttcttaaacc caatccacct tcatccttcg
+    32881 gtctacattg gaccagctga tcatatggat cttataggtg ggttcggggc ctgaccaaaa
+    32941 gaaatcccga atgctcaatg aaatggattt ctatatcttc tccggaagcc tgaaagcgga
+    33001 cgaccaatag atgtgaaggc ttgcaagcac tgaggtctgc aagcctttca tgacagcagc
+    33061 cttcttttct tttccaattt aaaatttggc gattgaactt gtccaagagg gggagacagt
+    33121 cagagtgtct caatctggta gatactagga aaaaaaccag tatttgagag ggagctgcac
+    33181 ctcacgtaag ccaagagtac gaatgatttt tagttttact tttttttaac tggtggccga
+    33241 gaatgatagg gaacatttgg ctttattaat ccaaagaccc gcctagctgt gaaactaatt
+    33301 cagcacttgg ttgatgacca acgcattacg cacggagctc tccgcgaaaa tgagcacatc
+    33361 agcatatatt atatgagaaa gatagagttt atgaaatctt cttgaatggg ttatccgctt
+    33421 gttgtttact tccctgttaa aaagctggct gaatattcag ggtaaaaaga taggctctaa
+    33481 gaaacttgtc tcaatcaaac cgcggccccc cctgaagaag ctagatggag atccgttaat
+    33541 cataactgca aatgacacta tggagataca ttatctaatc tatcttacct aaattgaagg
+    33601 aaacttcaag gattcaaggg tattgaggac agcatcccaa ggtactgaca tgactcgact
+    33661 gaaaggagag gtgctttagc aactcgactt aaaaggagag gtgctttagc aactcgactt
+    33721 aaaaggagag gataagaaaa agccaacttg aagttcagtt acttgccttc cccattcttt
+    33781 cgaagttgct agcattctct ctcctttgct ccaagggaat ctgtaatagt aaaccacgca
+    33841 cagatgctag gaatgaagcc agccaatgcc ccttacatta caagaagatc gctagtagaa
+    33901 gaagataccc acacccacgg ccagagaata ggcgaatagg ggaaaaacat gaatagggct
+    33961 ttttgaaata aatagggttt gcgcaattcc atttctaata gaatgccgtc tttagcaaaa
+    34021 gcaaaggagg aatgatctac gaatgccggg catgatgtat tagtattacc cgagctaagt
+    34081 gcgctgtttt cagaacaaat gcacagtgaa tctacgaagt cttcccctta atgttaatgt
+    34141 cctaaagcag gagagagtta atcaacgggt agccctctgt caatctctga tggcatcaaa
+    34201 aatccgcaaa gtgacaaatc aaaatatgag aatcaacagc agtctcagta agagtagcac
+    34261 gttttcgacc aggcttagga taaccgattc ttatttatca agcccttctg taacagaact
+    34321 agcaccgctt actttgacaa caggggatga tttcacagtg actttatctg taacaccgac
+    34381 aatgaactca ctagaaagtc aagtcatctg tcccagagcc tatgattgca aagaacgcat
+    34441 tcctcctaac caacatatcg tttctctcga actaacatat catccagctt ctatcgaacc
+    34501 aacggctacg ggttctcccg aaacaagaga tcccgatcct tctgcatacg cataagccgt
+    34561 ttcattcgcc gaagcatcag tctcaagatc cgaatcctat gaatcaactg gatcaatatt
+    34621 cgcaaccgat gcaacatctg attcttttat ggttttggtt gctggacctt tctccgccac
+    34681 cgtcttttca tctatcagtt aaatcagttg acctttctct cttctctctc tctcctcttt
+    34741 ttctcctcct ttctatttct tctcttatct tttctagggt taacaaattt ggtatcagac
+    34801 gcgtaggtta cgcaatggca ccgaaaccag atcccactgt tcttcctgat cttcaggaga
+    34861 aaaaggcgat cctgggaacc cagatcgaaa tgataaccca agccatgaca acactggagt
+    34921 ctcgtgtgac agatctgcag caagaaagca acgaccatag gacttgggtc agggaagctc
+    34981 tagacaagtt gttgaagagg gatttgggag atgaaaacag gccgaaaccc actaccaaca
+    35041 agatgatagc aacaggcgag cagcacaagg gagaggtctc gacgagcctc ttccacgatt
+    35101 aacggttcct ccgagacaag ctcccaacgg aaagagctca cttaggcttg atagctttga
+    35161 tgtgagcgtt tgtccaattc aaagattcta agctctcaca gcactgattt ccgatggaaa
+    35221 ctacagacct tcatcacaga gaaacttcca ggaagatgga gggttctcgt actcgcttgg
+    35281 cgggggatga agtaaaccct ctttctgagc agctatcgag cctattagct tcttgcccgc
+    35341 atcccaaagt acgattcccc cggacaacca caatggttac tcaactaaga agcaagtcct
+    35401 tcaaagagac agctttactt accttttctc cctagatgga acaactcaac tccttactta
+    35461 gatttcaggg ggttaaggta gttgactttc tggctgataa atcaggtaac gaagcctaag
+    35521 atttagatga ctgattagat tactagtttg tttaactgag attccccgcc taaatcccat
+    35581 atcttaaagt aaagactata actcaacagc aagctgcctt tacttggctt caagtcccat
+    35641 aagagaagga agaatagcta gaagctagag agtagctgcc tagctacaaa ctcaaaagcc
+    35701 tggtgaaagg aaggcctttt ccttacttag cttagagtcc caaaaggtgt tataagctgg
+    35761 aagctaagaa gctagtcgtt tatgagtagg agttccatcg gtccagaact agaagtcaac
+    35821 tcaaaaccgc taaaaggacc tattatttgc cctattagga cctattattc taaagtacca
+    35881 ttggagctcc ttttcccgac aacagatcgg agattttact ttctcaagtc atacgtcttt
+    35941 tgttcagcca acagtgttcc gctctactta ttattactta ctagcgccct tcacttcaac
+    36001 tcatacatac tactttttga ctttcagctc aaatcaaagc tacttgctta taagagaaga
+    36061 gcaaggtgcg tagctggctt gttaaaaagc atggaaaggt atccagaaag cacagtaaca
+    36121 gctatgatat aggcaccctc tcgcctaata cccactacta aggattgaag tagcacaatc
+    36181 ggctatagaa ctccagatct atgaatagcc agagcctagc ttgattactg gaactaaaca
+    36241 gcaagctgct cctaccttac ttatcgataa ggagtacagg gactgagtcg acttctttta
+    36301 gttctctttc ccctagcagc ctttccttct tgcttagagc ttctattcct tttgcattgc
+    36361 ccatcattct actcaaaaga ggacgaaggc tactttcggg acattggttg cagcgatttg
+    36421 tagaccaata gcaaaagcag caacgattcg atgctctctt cctccaagag cgcattcaag
+    36481 gtctaatgaa attcccggat cgagcattct tgggctaagt attcactgat tcaagatttc
+    36541 cggtccacgg taaccgatac ccctattaag cagcatacac attgagtcaa ctctgttcac
+    36601 tccgaggcca ttcaatcaag gagataagcg ctgatattcc gatctgccta attcgtcgag
+    36661 tcttccgtcg gcgaagtttt tcatttcaaa ataaagtgga aactgaggac tgggctaagt
+    36721 ctttaatttc ctctatgtga ggaattacat tcgccctata gatcaaggag agtttcagca
+    36781 tagatgaagt agtgccctgg tagctcttcc gagggctggt gctttccttc cctcccgggc
+    36841 gagaagggta ggtttttaat cctggctctc ttcttttatc tttttggggg ttctccagtt
+    36901 tcataattag aaatcttgag cagagcttgt gcagcacatt agttcactct ttggcaatcc
+    36961 tttgaatctt ctactagtga agcagcccta gcgtagcaat tttagttttc cttcctccac
+    37021 acactttcgg ttgataagtc aagcacataa aaggatactt cttttcggag ggatttatga
+    37081 tggatcacgg atgttccaac agatggatct atcgggaatg aagaaatatc gaacaggcct
+    37141 acgtctatcc aaatgctaag ccttgtcact agccatctat ggtatcctcg gtatgacgat
+    37201 gaatgtctct aacctagcta gagccctgtt gagatcctat tctctcgtat agtcagctac
+    37261 tatagaccct attccgccct ttcttttcta cattccttta ttgagccgaa tcaccatcat
+    37321 tatattatat attcattgtt ggtttggctg ctggtctagc gaagcaacct ccttctatag
+    37381 tttatttctg gctggcctgt cggagtccta ttattgtaga tgacaaggag ctatataatc
+    37441 cctcttcttt tccaagaaca acgacatgct cttagatctg gaaggctaga agagagtagc
+    37501 tcatgcccta gaagtccgat cgtataatat ccaaatgttt cccccggagt ggttcaagct
+    37561 tatgtgacca aagctggaaa gaaagattct tctaaacttt acccaaaccc aatgcctgac
+    37621 catcgatggt agagcggtca gtatgatgac ggagaggtcc tgtcttcaaa tcctaggcgg
+    37681 ggttccttag aatgatcata ctcacgaaga tggcatcgaa aagctaaggt aaaggcaaag
+    37741 ctactccacc ttcaattgta actagccggg catcagaaag agcacgaggg taatgcctgt
+    37801 aagacagaag cacttccatt ttatgggctc tccaactggc tcaaaataag gactgggagg
+    37861 caaggaaagg agaaaaaatt tctcactccc ggaagttttg gtaaggacgt caaaaatata
+    37921 cgtccgaggg ttcaattcag tgatcggatg cccccggact aaagtcaaga tttcatacat
+    37981 atcgaaaaca atagctagat actctgcggg acctctttga gaagctctca ctgttcatac
+    38041 attctttcta attaatcgtt ttaccgggaa tactatagtg taagattcat tcctttgttc
+    38101 acaacaattg cccgcaagat gtttctctct acgttggatg atcggagatc catggatcac
+    38161 tgacggtatc ttcccatggc taagcgtcta aaatacttta ctatatcgaa gatttctctc
+    38221 ctaaagtgtt ttccctttgt cagaaacaac ctcaagcttg gaatggatct tacactattg
+    38281 gtttaccggc ctacgcggat cgtacgcaat gtacctgacc tagattgcga ggcactactt
+    38341 gggccctagc ttctcgatgc caattccgta atgaggctct tcttcgatta gattgagatc
+    38401 tgattttctt ctcttacgat attatcgatt tctagattga ataggaatgg gtcagcacgg
+    38461 tgcagcggca acttaaatat atctaaacgg attgcgtggt acccccggac cgaaggaagg
+    38521 gcttcaaaaa tgaattgaat caatgaaatg ttgcttggtc ttcactgaga acttgagtaa
+    38581 ggagtagatt cttttggggt aactcgaaaa aggctttttt ctaggcaatc gaccactgca
+    38641 tctgcatttc acaaaaagat ggttccgctc gccacaagca agcccttccc tttacaggga
+    38701 agctttcacc cttccccatc gagtcgagta gcctcgaata actctcttct ccctctttac
+    38761 tttcgcgtac tcctctgtct ctttcttctg tcctttgtcc ttctcttccg agcagtagca
+    38821 gcagcggtag agtcggtaag tcaattgaaa gagatgctca tctgctctgg ttagctcggt
+    38881 agagtggcag gcgaaatccg tctctcttta tagatataga ataggcggct agtcacttct
+    38941 tactcgtgta gtgagaaaat ccttcttttt ttcatgataa tagtttttga ctctttattt
+    39001 tgaagcaagg tggatctata cgggaaccgg taagtagata gttagaatcg tattcttctt
+    39061 tcttcttgat tgtttgcacg tttgtatgag aataaagctc atctccttcg gtaagcacat
+    39121 atagggtatc gagccctcgg cgcttgagag gaccccgggg agctcttctg tgataggctc
+    39181 cgagttcctg actctactag gcaactaaga cattagatga gggagttgct tgtctgttta
+    39241 gcgctctgta gttcatagat tggatattga attctccgaa gatgagtctc cctcgagcct
+    39301 tacttccaag cactgcttct atagtcaaca aagaagtagg gtttcagact tcgtcatgag
+    39361 tttgaccttc tgttctaaga aagaatgcct gagcttctgt agcgcgaaga caagaactag
+    39421 tcaatttatt atctccaatc tggaataacc ttcttcctcc atccatacac tgacatcagt
+    39481 tactgaaaac cgagatatgg gcataaagaa aataaaactc tttgctttga atagctatcc
+    39541 cgtcgtactc aagggatgaa ggaagtgaag ttgagtttcc tgcctctatc ttctagttat
+    39601 gtaccttata gcccgcctag tccccctttt gggaggagta gtgccagtaa gggtaagact
+    39661 ctcatatgat cattcctagt ttccagccat cgattgagtt gcagtccttt gctcaaccag
+    39721 ttacacgcct gttgaattgg acttttttca aattgaccct tcaagcagtc ttacagccga
+    39781 gtctttcttt gtttagttca aagaggagta gagggtttgc tgggaattcc tggtggaaaa
+    39841 agatcgtgaa atggcaatgg cttttattag tcagatccgc ccttctcggt ggtcgttagc
+    39901 agctgctatt tgaaatccat tcccgctgct gtcgtcaacg gtagaactct tcgggcatcc
+    39961 gtccgttctt cttctgtctg aggcaaaggc gaatccctta tactgtatac ggtcgagctg
+    40021 gcttggctgg tacatcaagc atatcggcat attgcttgtt cggtgtggta cacatatctg
+    40081 tcaatgtcaa tcaagtaata gaattcattc ccactgtctg aagaaaacgt gaataattgc
+    40141 ctgtttatga gcttggaaga cccgttgaag tccccgaata aaggggaaaa ggctataagt
+    40201 aggccgtttg ctattgctag aagggctgct cgcctttata cggcttggct tcgctatcgc
+    40261 tcctatgtag tgatcggcct caaaagtggt attcctgcca tacaagccta ttttttctag
+    40321 tgctagaagc ttcgccagaa gcaagcaaga cgagggagcg gagcttcgta gacaaggctc
+    40381 gttacgtact tgactgacga gctgtctact aaacgagcgt tagagcgagc gagttaagcc
+    40441 tctaaagttt gatagctctt ccccctcccc cccctccctc accttactct taaatttccg
+    40501 cttaagcttc cccggtttgg gggattaatt gattggatac ccgagaacca taatctttcc
+    40561 taggaacagc ttctacgacg atagataggg gtcagctttc tttggcatct atgccccctg
+    40621 ccctccaaac agtatgggag cctttcagct cgtactgctc acactcctag atcttcacgg
+    40681 cacctcctcc accatagtgt gagctgctcc cagcggagaa aagcaaggcc tacttcctaa
+    40741 ttagctttca acaacgtcaa caacaccacg aaaaaagtaa acaatggttg cccactaatc
+    40801 tgatcatagg tgaaatccaa tcccttcgct tcgcgccagg ctttgaaacc gtaagtcagg
+    40861 ctcctttcgc ctctcctttc agtcgagtca cgtcagtacc tctcagaagc gagaaagcga
+    40921 gcagcaagct gaaaaaagga gaaagtggtt ttataaagca aaataagcta agggggctgg
+    40981 ctaggaatcg caagaattga gaagggtggg aaagacaggt tcggtaatgg ccggattagc
+    41041 aggaggaagg tcttgaagag cctgaaacaa agaaaggtgt acataaaaaa agaggctggt
+    41101 tatggccttt acttgatagg actcctttcc catcccggga agaggggata aaaaaaacct
+    41161 tgcggaagcc ctgcccaccc ttccggatcc ctcatattta tgattccagg cttcccggac
+    41221 tcgtaataga cggctaagaa caagaagagg gtcagtagtc tctgccgttg caggtccttc
+    41281 tccttccgct gagacggccc tctttttttg tttgttcacc gcggcacgaa atcgaaatca
+    41341 tgaagaagct ggaataactc agaaagagag tggcgcctag ccgttgagag cgtctattat
+    41401 ctttgtagag gaacagtacg atcttggact ggcccccttc gcatgaccta gaaagatcaa
+    41461 gaagtccatg ctactataag gcctctaaac tcctccctca ggacactatt gcccatgggg
+    41521 acggggtagc cccgacttcc ataggtcctt ggttcgacct cctaatgaga attgaggtcc
+    41581 ttgcgcgggc gtctcatccc taagacttgc ttgctctgta tggagtgccc tgtggttcct
+    41641 cgagtgccag ccgcagagag gaatgccatc aactagggcg ctattggcca ctaaccactc
+    41701 gctcgccagc cgctcgggct ccgcgtttca agttcgttat cctaaccgtc ccctctgctc
+    41761 caccgggtgc ctggcccttt tcttctatct tatctactac cttgctcctc ggctccctac
+    41821 agctccagcc gctcactgta atagcttgct tctcgggtgg ctcgcacccc gggtggtgcg
+    41881 gctgagccag agtggggctc aacagtcggc ctatgtttcc gggcgcacgc gtaaaggcat
+    41941 gattagttcc acaaatctca ctgcactgac catagtaaac tccttctcgt tgtaccaaaa
+    42001 tagagatttg atttaaacga ccaggtacag catcacattt gacacctgag gaaggtacag
+    42061 cccaactatg aggtacatca gcagatgtta caataatacg tagatgagtt ttggctggta
+    42121 caaccactct attgtccact tctaataaac gtgattgacc caattctaga tcttcttctg
+    42181 gaatcatata actgtcaaaa gtgagtgact gctcatcgga actgttatag tcagaatact
+    42241 cataagtccg ataccattga tgtccaatag ctttgatagt aatggctgga tctactacta
+    42301 cctcgtccat tgagtataag agagcaaatg atggtatagc aatgaacatc gagatgatac
+    42361 taggaaagat ggtccgaaga atctcgatag tagttccatg aacaatcctt tgcgggattg
+    42421 catttttttt atagtggaaa tgccataaag cgcgaaccaa gatccataat acgaaaacca
+    42481 aaatcagaat gaggaagaaa aagatatcgt gatgtaagtc tattattcct tgcattatag
+    42541 gtgtagctgc gtcttgagat cctaattgcc atggttccgc tgcatcacaa ggagaaattg
+    42601 tgaggaataa ccattttaga acaatcattt tcaaagcaaa ggttccttca ttttctgctc
+    42661 cccccaaaca aagagagact gattctgact ctcccaatta aggaagacgg aaatggctgg
+    42721 tgccggttgg tccaaccaag aaaaaagaga tgggaatttg gggcgtaaga ttcttcttct
+    42781 tcttacaata ttttgagtta gatgaacaga tcactctcct aaaagcagca gtcttcttat
+    42841 atacgaaacc aacatcctta taatactact aggccccacc acactgaatt atgaactttg
+    42901 cgcctccagg agggtcaagg cagaattcca ttcctatctc cttcgtctgg tcgagaaggg
+    42961 actctgactc ttctattact acagtacaga ggattagtcc cattttttcg tcacgatcga
+    43021 tccacgtccg ggcttagaaa gctagcttac taaggcgaag gaccgctttt tattgattgc
+    43081 acgagctaag aacagatcca caatctatta tctaatgaat attcattaga tagctaatta
+    43141 tcctttgcct agctgcccat tgctagaact tccagcgcta aggtggttgg tgtggtaagg
+    43201 caaggcaact cctctttcct acgctaagcg caaaaggcac tcgaaggagt actgggacca
+    43261 accatcacta ccatagggtt atagtggtaa atcctgccac ctcagattct tattttacct
+    43321 tacgtgtcaa caagcaagtt gggatgctct cccttaacgt ggtagggctc tgtttcaggt
+    43381 cttgacgttg gccttattaa aaaaggcatc ctcgtcgcag caaagcccgt gtacaatgct
+    43441 caaaaaaaac tgagtcaaaa acgagacttt cacaggaata acacatcttt ctaaacaaca
+    43501 acgggttcta ataaattaag ccttgtcatg gctggttgag gttagaattt cataaagata
+    43561 aaggtgggta gagaaggccg cagtagattc ttccgaccga gtcccagtgg cagagtcttg
+    43621 aggcacgaat ccaacggcag gggaatcagc ggctgatcgc gattcatcag tcgcaattct
+    43681 cccagcagct atccatttta gttcaccagt ccattactga gctgtgattg cataggtaat
+    43741 agagcaagct ggagcggagg cagcagagat gaaggtgaga acaggggggg gtaggagtag
+    43801 ggcacctagt tagttacagt ttgcccgaag catcgttaga ggcataggca gtcagaaata
+    43861 aaaatcgtgg tcgatctttt tcaggctatc aaatcagcaa gggtacttga atagtcaagg
+    43921 ctactcgata aaggatggct ggcattcatc agtacgtgtg gatccgccgt aggggttggt
+    43981 tcagccactc gacgaccggc acttgccatt gaaaaaccgc ttaatcgtgc tttctattcc
+    44041 tatgaataaa agaagaagat aaggaagggt agtgatacca atattcactc aacgccaaag
+    44101 aaagcttcta aaacaaggtc aaaacttcac ggatttcata aggattaata gtcataggaa
+    44161 aggtacactc aaacgaagaa aagatctttt ttgcaaggtc accatagttc tatttgaaat
+    44221 tcgactgttt actacattac ttcctgaaag tgtgcccgtg ccctcagagg ggaaggggtt
+    44281 ttggaatagg ggtaggaagg acctttatgg gtataatcat gttcggtcct ccactcaaca
+    44341 agccactgga ttagcttccg gcggaatccg cttttgaagg ccttggcctt ccattgagac
+    44401 aagagatgat tcacgctgaa ctaggatgct tggaaattgc gtactttttt tctgacacct
+    44461 ggcacaaaga actcgagtgc cgcctgctac tacggtgtat tagagttcga gcaacagatc
+    44521 aaggttcgag ctaaagctaa ttccgagttt tcgcgggaag aagtacggtt tcggacagct
+    44581 catgatgaac tattgaatcc ttctccatcc gaatccgtct cagttccagg tattgaactg
+    44641 aaagatgact cgaccggagg aaaaagccac tcggggatca attttccaca tcagtctaag
+    44701 tggctttttg gttggtagca ggacggtaat tggcaacgaa gcagagattt attttaccca
+    44761 ttaaaaagta tatttcacca ttcctctcgt cgtccagaat gggtgactca aagaaaacgt
+    44821 ttcattcacg atgaggcaga attgacataa tataaccaag attggatggt gacatataat
+    44881 atagtaaagt aaggggttcc tccgcccgtc ttcgatacaa tcattgcgat gtgtcactca
+    44941 aggatagcta gccattcttc tctacccatg ccatgattta cagatgagct cgagaccatt
+    45001 attgaatata ttatgaacac aatttgccaa gagttggttg tgacaaaagt gattgggatg
+    45061 cccatctttg gtccagaagg ggttctctat gaaaagagtt tccacctttc tctactcgaa
+    45121 aagccctatc taaaagggct tgtctggatg aatgcagtgt cggaagccgt gatcacatag
+    45181 taacttccgc ccacagtgct attacgacgg cgggtcaccg ggagtgaagt aaactcggct
+    45241 cctgatgtag cattcattcg gaccattcga cgtttgattc tttttatcag ggataccgat
+    45301 gactctgtga gaggtgttct tcggccaagt ttcccatgac gggttacccg gttcaaggct
+    45361 tttctatata atgagaaata ctactttctt tctagcttaa gtgttcacgt aggtaaaata
+    45421 gcttctatag ctccatccaa tagtaatcaa cggagataga gtccagcggt tcaaccaacg
+    45481 cttctaagga gagcggggca agcaagaaag caggcaaagt cattgagcct attctattcc
+    45541 gaaagttcaa ctactggata aacaacgaaa gccgtcggca ttcttctcct actgtagctg
+    45601 ctacaattgc tttagcgcga gcagcaagga ggaggcagct cttactaaaa aagcaaaaag
+    45661 ggaagggcat gacagaaggg agatagacct cctataagca ttactctctt ttgaggtaac
+    45721 ttacttactt actctgatat gatgagttcc gtgggctagt aagataacta ttgaggtgag
+    45781 ggtccgaagg agatctttca ctatttatgc ttgtacagtt ttatgatagt tttgtaaggg
+    45841 aaagcaaatc ctgagaatgg aatgtaagtt agggaacagt agtcagacgc ggaccagcgc
+    45901 aggtgggcgc cacagctgtc ttcctccatg tgatatctga tatcacgcaa caggagattc
+    45961 ttcaccctta tttattgaca cgatttacaa aagaagaacc aacccggtcg tgctatatgt
+    46021 atttgggatc aaaaaggctc actgctgtat gatataaatc tttctgaatg agagttcatg
+    46081 agctacagga agagatcgag tctagaataa accttagaag tggcaaaaac gacttacttt
+    46141 gttgcaacgg gaactactcg ccccgggcac tggtgaaata gaatcttatg tgttgcagct
+    46201 acttatgctt tcccaactag aaatcaaacg gaacaaaacc gagtcttgct atttagagcc
+    46261 agggttcctc cgggggtcac aaccagagac ctaccagccc catgtaaacc actttcggaa
+    46321 gggacaaagc agttgaactc tttagaggaa gagtctaacc acaagtcaaa ctgtgaacat
+    46381 cagaaggatc gatatattgg taatagctct acacgagaga attgaaactc cacctgcaat
+    46441 tccgagtata ggagagtcca gtcaataata aagtcctggt tgcagcatcg ggtaaatggg
+    46501 atccaggtaa atggcttact gttcggtcaa catttcatga ttcgtaatta gtcaagctcc
+    46561 ggccggttcc tatgtggtga atagggatat atttagtatt aaagacatgc gagtgctccg
+    46621 ttcgtcagta agcgaaagag actgaaacct gggagaattc cgctctgtta agagagagaa
+    46681 ctaacgaaaa ttggagactg acggaaggaa aggggttcct cccttctcct ttaggggaga
+    46741 agcttggttc ctccttctcc gctgagaggg gagaagagct gggttcctct ttgaaaactc
+    46801 tttccgtata ggccagaaaa cagcttgctt agagaaagac tgactctcct acggacctgg
+    46861 tggaccttac agtcgagtta ttgcatcgat ctcacaaact atcaatttca taagagaaga
+    46921 aagatcgttt ttagatcatc aagtgaggac aggtagtagc tctggtagag cgagggactg
+    46981 aaaatccttc tgtctgcggt tcgaatccgg actcacttct agctctggca caagttcctc
+    47041 atccttagtt gtttgatgag cctgcgtagt attaggtagt tggttaggta aaggctgacc
+    47101 aagccgatga tgcttagccg gttagagcaa aggacttgaa atccttagag caaagccagg
+    47161 gacttgaaat cctttttgtg tcagctcttt ggaagtgcct tttcctttac ttcagtaagg
+    47221 agtctcaaaa taggaaatac ctagctagca aagtaagcaa taacggaagg taacgaattc
+    47281 tccgtttcga tattaacaat ccggatctag ggaaagtagg ttcccgcatc acgtaaattt
+    47341 catttctttt gggaaaataa tacataaacc ttttgagtca cagccacctc agcacaagtt
+    47401 actcgagtag cacagaagcc atcatcaata gcttgcttgc ctgggagctc aaccacgaat
+    47461 tctctgtctt ctaacctttc tatcacttga ggattatcta cgcaattcgt aaatgatcaa
+    47521 actagatagt cccttatctt actggccagt agtagaaggg gatcttgaat caaagagttc
+    47581 ctgtccaaca agcaaggaat gtcagtcata aataatgaaa tccatataac tctactccgg
+    47641 gttgtgagaa catcgtgcct gttgcgattg tgtggctttc cgcatggttg gagaccccct
+    47701 atgaacaaac aaaacagtag ggtggtgtag gagaaccccg actccctaat gcaagataga
+    47761 gctcttaggg tgcgtttcgt ctttgtcagg aagaaaaaag tggcaagagg agtgtatcca
+    47821 tacccgtacc gaagagatct tgggaaggca tctctataga tttagtagtt ggactaccaa
+    47881 agacccagac agcttcaagg ttaggtcttc agtgttaggt cagttcgagg tcagttgttc
+    47941 cctccgtact gttgctagag agctgctttc catctatctt cctacatgaa aggatctaag
+    48001 tctatccaaa taaaatagcc cagaaaatga cagccatcaa aaggcgcgca cccatatagc
+    48061 ctcggcctgt ccaaatgatg ttcagcggtc tctacccaag tagctgtggc ccatgatcca
+    48121 aaggacccgc aaccagtcaa tcatgctatc cttaccttcc aaccaatcgg ccaatcacgc
+    48181 tatccttacc ttccaaccaa tcggccaatc acgctatctc cttacctttc aaccaacccc
+    48241 ttcgattccg cttctgcagc agtatataat ctcggtccct taccttgatg cctacagctc
+    48301 aatttgtttt ccagtgatgg caagaatccg atccgcgaaa tactgctttt tcttttttct
+    48361 tgtgttgttt ctgaatggca tcatagctac acgagggaaa gcgatgctgc ccactctgcc
+    48421 gcaaaagggg gccgctttct tcccccccaa aatgccagtt ccaccatcag ggcccagcaa
+    48481 gcagcataat tctgctcctc gatcggactt cgtgcaattc ttttatatgt aaataaggga
+    48541 ggggtctcga ccacattttt gagtaatagt aataggctaa ggcggattcg tctgcttcgc
+    48601 ttacgtagtc aaaacattct cgctggacac ctggtaagga atatttgtat atgcgggaaa
+    48661 gatactctca cctacagctc ccgttactag tagttccggt tagcccactt gcccgagcac
+    48721 actctcaact tgtagatgat ccaacagaaa aagcaagaga ccacgaaagc acgcatgaaa
+    48781 acagcccttt tgaaagcata cccaaggcac ccatccaatc tgaacttata gacatcaaga
+    48841 agaagatcta ttaacacgca tggctaccta tataacaagt tacctctgac ctcttctgaa
+    48901 agcatttgag acttctcgtg ctgaaaaaga taatttcatt gacctacggc acggaaggaa
+    48961 agcaccgctg ctagctcgcc ttgtccacgg aacttatcct gttgagaatc ctatcttgct
+    49021 tgactgcgct gggaagtctt tccttaaagc attgatttta cttggagtat ccgcttaagt
+    49081 gagaaaagga acttgattta gcttgaacct tgagccgtag aagaggacct ttcagaagga
+    49141 cggctggctt tagcccttga gcatggacga gagtgaacct atttaaaggt ttactgtatc
+    49201 gtttactggt gatgggaata tgtaatattg ttgtagaagt ttccagcctc acaggcttta
+    49261 gattgaacta tggaaccgag cgaagacctt tgagattgag attccccagt ccacttatga
+    49321 aatggatccg gattaaccgc tttcgactga gatgtaaagt caccttcgct tttgcttttc
+    49381 gtattctatg cggctttctc caactctaat ctcaggtttt tccttctcta tgaactctat
+    49441 agattgatcc acaaagagat tgtacctata cctacttcag tataagttta tcctgaaccc
+    49501 tatggcactt cgttgcttgc tccagtgagc tacctattta atgaaaggga tatttttatc
+    49561 ctttttcagt gtgtgtacac gcttgaaagc ggaaaagatg aactcagact aataaaagga
+    49621 aacggaactt agaaaacaaa acaactaagg atgggctctc cggtggatga aacgacccct
+    49681 aggagtccct tcgattgggt gagggcgaag caaatctaca atccttatgg gaggatggga
+    49741 ggcctaccac aaatatgggg attgaaaaca ggcaaacggt ctcatacgtc gtaaataagc
+    49801 attcttaatg tcaagttggt agataggcca tagctgtcgc caaccaagtc tattagaacc
+    49861 atcacagttt aaaatttagt aagcgaaggt ctctttatca tccttcccaa tcacttgcgt
+    49921 ctacccttta gctactaatg acgctttcta tctctcaata gaaccatcag gatgagactg
+    49981 gatcttatac attagagcct ataagccttt gtattaggtg gtaatgtccg aagtcttctt
+    50041 ggatttgagt gcaccaattt atttttgcat ggcctctttg gtctcattct ttttctgaga
+    50101 agaaggttca gagacagcaa gaagattgtt caaagaagct agatagtctg ctgggaatgt
+    50161 gtgtctatta tgcagaggaa ttgaaagaga atgtgaacag agaatcaaca gaaagtttcc
+    50221 cttcttcttt ggtaccgtag aagcttggcg atgtgaaaag atagaattct tgaacgagcg
+    50281 aagaagcaat cggggcagcg tatagcctta gtcacgcgca tccctttcct gcataaagat
+    50341 gagctagata gcacggcttt gaaagaaaga tatctgaggc aattttccat tcaccttttc
+    50401 ttgctggtca agatagtaag ccgagacttc tgtagtagct ttctgttcct tggataagga
+    50461 gtcagttgaa agtggtgaat aagacccgca aatccccctc taaatggttt tttcattttc
+    50521 aatctgtaaa ttcatgaagt caagaagtgt agccgaatca aatagttggc tctagtctaa
+    50581 ttgactactt ggaaagaaaa aagtgattta gtgaagtctc tttgatgcgc tacgaaggtc
+    50641 aagcaggaaa cagggaagcc ctttcgacct aagccaggct aataggttga gtgaccagga
+    50701 gagtagacca gagaaaggtt atgtaataga ggcaatagtg agccccgaag agtgcttttc
+    50761 tattcttccc cttattcgct acgtattcct ttcactggga tttttcatcg ctcctaatac
+    50821 tatgacaagg ctcgcttatg gctcgctccc gtggctctag tagtagtcca ttttattcag
+    50881 aattattctt tctttttgtt gatggtgcgt tggaagggac gaaaggagct agtgagcaat
+    50941 gacatccgcg cggggaatag gcttgcttct tcctagcgat cgggcggtcc tccctcttcc
+    51001 tgttcaccgc cggttcgccc actgctgctt tcatagatgt cgtgcgaagg gacagagcta
+    51061 atggatgcct attccgttct cctgctcctg ctccagctca agagttcaaa cggacagaag
+    51121 agagtcctgc taccgaaaaa aaagtgcata atattggatt caaacaaagg ggatttattc
+    51181 acgaatacga tttctattga ttgaaagctt tcctattact gatggcagac gggcgagacg
+    51241 gagaccgtcg aagaagtgcc ttgacgcgcc gcagccacta ctttgactcc ttttatgcaa
+    51301 ttatgaactc cacggaactt tctcgattcc aacgcaactt atccttttgg agctgacgta
+    51361 acaaactagg cgagcctccc aacgaagcca acataaatcc tatccgaata aaaaaaataa
+    51421 aaggcagttt cattatggtg gtataccaac ctcctgttct ggaacttcca gttccaatcg
+    51481 aagcatatag atccgtaaat agatttgtat gtatagccac ttcagccgtg ctcgtccttt
+    51541 ctcgaaagta tcttttttct gggaacatgg ttaaccaaaa attattatgt ttgcgattct
+    51601 tcatccacca atgcagaaaa tgctcaagtt ttccattctt atatgaggcc ggaaagttta
+    51661 ttagcaagag gtcggcatga agtgattcat catgctcaaa catgagccga tcgttcgtta
+    51721 gggacggttt atagatcatc aaattcccac aaatggaatg aaaagtgggt ccatgtaaat
+    51781 gatcgagacc tcgcaaacaa caacgttcct tccccatatg gagttcgtaa cccaaaggca
+    51841 atcgttgagt gaactgtatc ttctttgtgt tagttgacct aagccgcacc attactgctg
+    51901 gggtggggct cggcctccga accgtacgtg ggacgagttt ctgcctcata cagctcgggc
+    51961 cgaagaccgg gggaagttta ggagagatgg ggagacccag cagctgccgg tcggggcggg
+    52021 ggtaagcttg tgaagaagcg agcttatccc ccccaaaaaa tcctatagcg ctagcgcttc
+    52081 gcgttctttc tattttatct atctcatttc attccgggat aggcggctaa tactaatcta
+    52141 ataaagtgaa gtagtcgtcg tctgaccaat tgactcggac accagaccgc tcgtgcccgc
+    52201 ccattctgtc tcgccctaaa tggaatggct ctcttagtta cgctgcaccc cgacccgagt
+    52261 ccccacgtcc gctcttctcc gtccgcaacc taagaagttg gctttgccaa cacaacatta
+    52321 gggccgtccc cttcattcta tgctgacccc gcccggcccg gggctggctt tttgggaagc
+    52381 ccgttcccac cgcgctcatg gcccggctgg cctgccagcg gtagtgggaa ttatcccgtt
+    52441 ccctggtcaa agacttggtt ggatgcggga tctactccac gaggagcggt acggacgtag
+    52501 atgatatcat cacgacctct cttttcgtac cgctagggat gcttaacgcc acttcgccaa
+    52561 ctagcgttac ccgcgctttc gtgtctctca gtgtggtcag cactgggtgt ttccgagcag
+    52621 cgaagcttac acccattcgc attagttcat ccaaagttcc ttacccttat gcacgaattt
+    52681 tggaataagc catcttccta tcaaggttaa ggagtcaact gagcatctca gcggcgggat
+    52741 tgaatacccg gatcgaatca gagttcacgc cgcccgccct gaacaaatag gaggcgtggg
+    52801 ccacaggtcg cacataagcc gccgggtcgc acgacagaag aacacccaac atacagatgc
+    52861 acactcctcc atgtgaaata ttcatcttca ttggaacctt ttggtagtag tcgtgaccaa
+    52921 cagccatagc catcagctgt cggctccttg gtaaggcgag agcttcaagc ccgatttctg
+    52981 gtggcacgcc ccccacacaa gcaccaccca actcaggggg gacggggaaa gctagaggcc
+    53041 caaaaccttc gacccttctt tctaaatcta aatgggggtg cccggagcac ctcatcttgt
+    53101 catttcgtcg ttgctcattc ccgttcggcc gaagtgtttg gcctttcctt ctccgcgccc
+    53161 gctcacgctt cgctgaccta tcgcgtgcta aaaagaggaa agtacgaaag aatagtaaac
+    53221 agagcacacc gcagaaagat tctaaatatg agaagtcccc cttggattcg agaagacgga
+    53281 aatgaagaac gggaacgaaa aaaaataagg cctttctagc tctagtttca gatgagcttc
+    53341 tcccaattat gtctggtaat agaatagggc gacttgctct gaccaagact ccactttttg
+    53401 ctccgtccat ggagcgtatg attttccttg aatgtagata gaccaaaaga gggaatgaag
+    53461 agataggaat aggaattata gtaccattgg aaaaaggggc acccgtggga acatctctac
+    53521 tgacgaacca tttcaatagt acgggtgctg ccgtgccaca aggcacgacc ataaaaataa
+    53581 taaaaaagaa aaagttatgt agttggacca tctgctctat ccgttcgagt ttggcttctc
+    53641 taaagaagag aaggcgctaa aaatgaaaat gttcgcaagg cataccgaaa ggataccaat
+    53701 gaagccgacc attaatgact agttcgaaca ccaggagcgg aaggagggac ttgaaccctc
+    53761 aaccttagcc ttggcaaggc tatgctctac cattaagcta tttccgccag ctacggtagt
+    53821 ggcgaagcac tactgagcaa ttcacgtatt acttgatacg gacgacttcc gtttttccgc
+    53881 cggaccacgg ctcttgacct ccgatcgagc tacgagcacg agtaggtagg cggcatctgt
+    53941 cttttttttt ttgaagttga gtcatttcct ttcctaagac ggctcctctt attctacgat
+    54001 aatccaagca gcagctgccc gagtccgctc ggaaccagtc gattcctgag cctggactca
+    54061 aggtctctcc tgcctgcgag tttgctgcga atgctgcgag taaatacatg aacgagacga
+    54121 gtgagttagt ctccgagcga atccggcggg tctttcaaac atccaagttg ccacttttca
+    54181 gctacttgga atgcatgaga cccctgcatt taggaatctt aggttacctc cagccttctg
+    54241 aagcgagttg accttctgtt gaccttatat ctgtctacct tatataataa tgaatatgaa
+    54301 aggaaagcac taactcctga gcttgggcaa agttggctaa tcgatttaga ctattataat
+    54361 agaataataa atccgcgact tgggaaaccc tttcttcatt cgtttggcgg gaaaggagaa
+    54421 accatttctt cattcattgg cgagaaagca tttcgtgcct tcgtcctagg ctcggctact
+    54481 tctttctcaa ataagacaaa gattgcacta gaccttgagc ccgggaagac cgctactagc
+    54541 cctttactct attggattga tttggtcgag acccggtttc tccttcctag aaacctgaaa
+    54601 ctgcaatccg cgaacgaccg ctttcttcct tattagccta atatcttact ctcggactac
+    54661 atttgtctaa gagcagttga ggaatctgca actgaaagaa aggaatcatg gaatctgaac
+    54721 ccaaggttaa aaaaagggga aagaactacg gaaaaggttc tatcacgtta atcgtcaccc
+    54781 acttacgctc ccaagccatc tctggaattg aacttcctcg gaacagctac aatgctttga
+    54841 attcctctgt gctttcacga acaagccgaa taaccttcta tttcggaaga ggcattctga
+    54901 taccattcac ccatagatga tattgagaat tctcaggttg aaagcccatc aacttaacgc
+    54961 tatctggggc cattcagaaa ttataatcca cgaaaggagc catcaccgga ctcgagccgg
+    55021 gagaaccttt gaacaatact ggctgccttt gctcgttgac gactagcctt tgttctttga
+    55081 ccgagatgtc gacctgatca cttttctcaa atttcagcaa caacaataag aaatagtaga
+    55141 gccttcaacg ggacattcga acgcaacaaa gggtttcttg accgcctttg aacgacaact
+    55201 cgataataga atagaagact ttcttcgtcg acttactcgc tcgcatgaca gcatttagtg
+    55261 gtctcttcac tggcttaggc tatattgggg aacgctcggc tacaagattg aaaagaccga
+    55321 tatgatagct ccacggccgg agccgatact tctctctcaa accattacct ttccccgcca
+    55381 cattcttttt tgacggagag acgattcaag aggatccagc taggctgaaa gcggaggatt
+    55441 ctttctttac ctgtgccccg gacccaagct cggctgaagg caaggaattc tttctagact
+    55501 gacacgcaag ctaggctgaa agcaacccgg gttacagcaa tggggattct ctcttctatt
+    55561 agaaatgata ttatattggt gactactccc ttcagattga agatttgaaa gataacacgc
+    55621 ccaccgccgg atactcttca cgtgctgata cctatattgg attggtgaac aaccctggag
+    55681 tttaggacat ttgaaactat agaaagacct tagccgcttt tatgatatag tcctgaagac
+    55741 ctaactctta gaaatgacct actaactcat cgacaaagaa aggacttcgc cttgaccttc
+    55801 tgacctagac gtcgaaccag tgccttggat tcatcaacaa agggatcaaa ggaactcatc
+    55861 tgtccctcca aacggaaatc ttggttttgt aatttgatct ctgacaaagg gagagggaaa
+    55921 gaaagaccaa tccgtatctt tgaaagaaaa agattctcag aaacgctacc accgaacatc
+    55981 gacttactcg gcatcttcgg attcaagcat ttcgttcgtt cgttggcgca aaagcctcgt
+    56041 ttggcgggaa agcattcatt cgtttgcggg aaagcaaaag cgttcagcat tgacttccta
+    56101 aagcgacttc ttagcgtgct tcaacagcgc ccagataatg ccttgaccgg agccggaggg
+    56161 agacctaaac agaagaacga cctcgcagtg gcaacatagc ccatcctcaa ctcatacctc
+    56221 agattttgaa ccatactttg agtcctcaaa ctttgagatg gactggacca acactcttct
+    56281 ttgagatgga attcccacca ctcttatttg acatgaactg taacgctact ctttcaaaga
+    56341 tacctaacaa ccttagctgg cattgatcac tcttccttcc caatccctcc atttagaatt
+    56401 gcaaacatcc caacttccag agctacctat gagccaaggg cggagggaga tatattagac
+    56461 tgatcaactt caaacaaaac gaccttcctg gactcgcttt catctccttc tttgaaacga
+    56521 ctcaaataat tgagtgggca ttgggagcct tatttaatgg tggattcctg agctatgaat
+    56581 gtctctgaaa tcgtgatgtc aaaaaggaat gatatgcgat tattaacttg cttccttccc
+    56641 tctaattttc atgcagtagt tgaatttttg cactcttacc tggatgcgat tttccgcctt
+    56701 gaatagatgg aggtgttttc caccttgaat tgagaatttt tgcatgctcg atgggatttc
+    56761 tgccttgaat tatgatacat ttattatgaa tttttgcatg ctcgatctga tttccttcgc
+    56821 tgtatggagt gcataggcct attctagcat ggggtttctc ggggtaaggc ttgtgctttt
+    56881 ccttccatat aagactagct atgtgctccc tcggtacgcc tgtattcctt tcgtggattg
+    56941 gaacctacgt actgttgtat tgtagcgtga tgttttccca ctgctttctg gcttgactca
+    57001 cataaagtac gtgcgtaact ggcactatca tgtaggtgcg aaatctgtct tctcctatgt
+    57061 cttgttctcc cagtttgatt cctatgtttc gaagtccagc taattagtga aggaaagtgg
+    57121 tatggatgtt tggagctcaa tccatcgaat tcactcgctt ctttgcgctg cgttgtagct
+    57181 cttagattat tgggaggagg tcatttcgaa tgttttgtaa tttggaatcg ttttttcttc
+    57241 aaggcacggc ttcttgaatc gtctttgtct tgtagaatgg ttgtcatctg gaatctaact
+    57301 aaatagttag ttaagaagac gcgtgctttg tttggttttg atctggaatt tcactataag
+    57361 ataagataag agtgaagtgc tttgtttggg caatctgcct ccctaaatct agtggaggtc
+    57421 ccggtcccgc cgctcctgtt tgtaatgggg ccgctttgtg ccaaatgtgt aagttgtttt
+    57481 ttcaatccgt tctaactgta ataagagaag agggaagtcc gtctaaagaa attagtgaag
+    57541 gaaagttata tggatgttca ttagttcagt aaagtgttag taatgttgct gggagtaacc
+    57601 ctttgatctg atctgtcttt gtacgtgtta tcttgcgagg ctgtcaggtt ggcttgtgag
+    57661 gatggggaga aagcgggccg agaatcttat gtcaaaagga ccaaggatga tcttttcgga
+    57721 aaggaggagt aggaggagtc agcttattct atagatactg taaaagccaa ctcatgtttc
+    57781 cactcaattt tcattacgaa gatgtatcac gtcaagatcc gttgctcaaa ccgaatcacg
+    57841 ccaacgttat ggaagttcct ggatcgtgtg aaataagagt agtaccaaag gcaccctata
+    57901 atttcataat aaaaaatgga aaattggcta tggagattcc gcgcggtcag aaattcatac
+    57961 agacacaaag gggttcgaca ggaaagtcct ttcgatctaa tccattcttg gggtcaaata
+    58021 aagacaaagg atatgtaagt gacctagcac gacaaagcac tctccgaggg catggaatgt
+    58081 ctaatttttc ggtcagaatc tcgacagtaa tgtctctatt agattttccg gtcgaaatac
+    58141 ggaaaaactc cattcaattc tcgatggaaa cggagttttg cgaattctcc ccggaactgg
+    58201 aagatcattt cgagatcttc gaacatattc gggggttcaa tgtgactatt atcacttcgg
+    58261 ccaacacaca agatgagact ttaccactgt ggagcggttt tttgcaaaaa gatgaggggg
+    58321 aaactcagta agatgtcgta gaagcaaaat agtagagatc acaaacgtag attgctcgcg
+    58381 gctaaatttg aattgagacg aaagctttat aaagcctttt gtaaagatcc cgatcttcct
+    58441 agtgatatgc gggacaaaca ttgttataag ttgtccaagt tgccaagaaa tagttccttt
+    58501 gcacgagtca gaaaccgatg tatttccacg gtcgccctcg ttctgtatct gagttctttc
+    58561 gaatttctcg tatcgttttt cgtggattag catctcgagg ttctttgatg ggcataaata
+    58621 aatcgtcttg gtagcaacca ccaaaccaat agaacaaaga aaggttagct ccgcagctgg
+    58681 tccacaagca aggtaagtaa gcccattacc agccggctcc ggaccgaaaa gtaacgtatt
+    58741 taaaaccctt atcttggatc ggagatgcga acggggcggg aatcgaagtg ggggacctct
+    58801 ctaccgcttg tgtctatttc ctgtcaagta tgctccccat acatagacta cgtacaggta
+    58861 gtactcttgg aaagaaagat ataatgcatg aacataacat taagttacga atgtaactcc
+    58921 cgaccactct tctaaatata ctaaggcgga gaactcttgt tcattggagc gccgtagtgc
+    58981 ggaggttctt cccatcatgg aagtccgagt tgggactgag ccttccgaat gataatgctt
+    59041 tgtttcgttg gaaaaaccaa cgcaaatctc atattgactt tctatcgccc tacttctaag
+    59101 gatagataga gagagttact ttatgaaatt atctcccttc taaagcagcg caagtcggcc
+    59161 cccccagaac aaagccccta ctcccgagag gtataaatga ctcgactaaa aggagaggta
+    59221 tttattactc gagcacttgt tgcgagaggt ccaacgtaat ttattactct tataaaagag
+    59281 ggaactcgac tgaaaggaga ggttgtttac atactcgact aaaaggagag gtccaaatgg
+    59341 acttctgtga attacagtga tccagtctca cggatatgga gcttcgccgg agatggcagg
+    59401 gcaaaacctg atggaccttt tttttttttt tctcaagagg tgatttcgag aatcaaccaa
+    59461 ccgacgagac taattcgagg atgtgttaaa agagagtcta accgccaagg caagtcccat
+    59521 ggataagccc cagcctccct ctcgtttcac tctcgttcct ctccttacag tcgagctcct
+    59581 ttgttccttc ggacctctcg cccaaatgaa atgggatgaa tccaatcaat aagcttattg
+    59641 attgattcag agcgcagcga agccaaattc aatcaaggca aaggggggct tacttttcct
+    59701 gacgctgagt catcctattc aaatttagct atgctaatgt aacaggaaaa gttttcacag
+    59761 atgatatgga tcccaagaga tgagcgagaa cctccaattg cttaaggatc gcactccgct
+    59821 atcccgcttg gtggacgaga tcttctctcg ggtcatccat cctgggttac tgaagggttg
+    59881 tccgactgct cggtgaccga atcagagaag ttttgaccgc tttctcttct ctccagcact
+    59941 ctcggactga tcatccaatc catcttgctg cgacaaagca agcttaggaa tgaatctaag
+    60001 aaatttaggt ctctgcccgc ttgaaagatt cttctttcct tttcggtgaa agagggcaaa
+    60061 agtgtgtagg agaaagaatt ctaaaaacgt cgacgcttaa ttcgccccct ccatccttca
+    60121 aaagtaaaag aaaggctcaa atatcaatat tatatatata tttgaggata ttttagggcc
+    60181 ctagaacgca aaaaaaaggt gggtgaacaa gagttgtcac gataggaaag agaaatgact
+    60241 ataaggaacc aacgattctc tcttcttaaa caacctatat cctccacact taatcagcat
+    60301 ttagtagatt atccaacccc gagcaatctt agttattggt gggggttcgg tccgttagct
+    60361 ggtatttgtt tagtcattca gatagtgact ggcgtttttt tagctatgca ttacacacct
+    60421 catgtggatt tagctttcaa cagcgtagaa cacattatga gagatgttga agggggctgg
+    60481 ttgctccgtt atatgcatgc taatggggca agtatgtttc ttattgtggt ttaccttcat
+    60541 atttttcgtg gtctatatca tgcgagttat agcagtccta gggaatttgt ttggtgtctt
+    60601 ggagttgtaa tcttcctatt aatgattgtg acagctttta taggatatgt actaccttgg
+    60661 ggtcagatga gcttttgggg agctacagta attacaagct tagctagcgc catacctgta
+    60721 gtaggagata ccatagtgac ttggctttgg ggtggtttct ccgtggacaa tgccacctta
+    60781 aatcgttttt ttagtcttca tcatttactc ccctttattt tagtaggcgc cagtcttctt
+    60841 catctggccg cattgcatca atatggatca aataatccat tgggtgtaca ttctgagatg
+    60901 gataaaatag ctttttaccc ttatttttat gtcaaggatc tagttggttg ggtagctttt
+    60961 gctatctttt tttctatttg gattttttat gctcctaatg ttttgggaca tcccgacaat
+    61021 tatatacctg ctaatccgat gtccaccccg cctcatattg tgccggaatg gtatttccta
+    61081 ccgatccatg ccattcttcg tagtatacct gacaaagcgg gaggtgtagc cgcaatagca
+    61141 ccagttttta tatgtctctt ggctttacct ttttttaaaa gtatgtatgt gcgtagttca
+    61201 agttttcgac cgattcacca aggaatgttt tggttgcttt tggcggattg cttactacta
+    61261 ggttggatcg gatgtcaacc tgtggaggct ccatttgtta ctattggaca aatttctcct
+    61321 ttggttttct tcttgttctt tgccataacg cccattctgg gacgagttgg aagaggaatt
+    61381 cctaattctt acacggatga gactgatcac acctgatcag tgaaaaattc tgacaccaat
+    61441 catttacata ttacaccaag aattgacaag cagataagtt ttctagtttg ctatgttgat
+    61501 atagcttaga tagggaaaag ataactccac tatagagtag ggctgtactt caaaaatcaa
+    61561 aaagggtccc tctccccctt tttttattaa aaaatcaaaa aagaggcccc gccccccaag
+    61621 gcctaaggaa aaaacctctg ataaagaaag aaaaaatata gaactaacct ttacactata
+    61681 accattaagt caagttatta acaatcctat actaagagga gaattcggat cagctcgggc
+    61741 ggaagaggag aggcgaagaa gaagacaata agcactcaga ttggaccaac ctttcattta
+    61801 aggttcgtca ttatcttttc aatcctgact agaatcaggc ttctcttgaa aaaaggtaag
+    61861 gagttattcg atataatatg gtattaaccc tagcgcctaa gtcacccccg caaggtagga
+    61921 agaagataaa ggagaaagtg aacggaggaa aaaaaggcga taagcgaacc ggcgggcggt
+    61981 aaaaaccaca aagaatttat atagaaaaat aggtactacc aatatgaaat taagaaagac
+    62041 atatatacct gaaacaaaat tctgtaatga ctatgtgacc atgaagggag tagttgattc
+    62101 gcgttccaat tcattggcaa aatccggtat aaatataata atataacggg atcgtcgtct
+    62161 tgacaaagat gaatagaaag agtgcttttt ttttaatggt aacaattctt attgtttttt
+    62221 tttcttcgaa ggaagatctt tctttgacta aaaaggtttc catttttaat agattgtttg
+    62281 tggttgtacc tttactgcaa gaatataaat gactcgctat tcactcgagg tttctgggtc
+    62341 ataatgtagg agagatggcc gagtggttta aggcgtagca ttggaactgc tatgtaggct
+    62401 tttgtttacc gagggttcga atccctctct ttccgtacct ttacctaatt caccaacgtt
+    62461 accgaccgcg caataccaat cgagacctct aagacgaact aaagggatgt ctctaagcag
+    62521 ccaaggccaa gagcaagcag gagtggtcct atccgcccat tcatttttaa gtgacttata
+    62581 agacgtgaga gatactctaa agtcataacg gggaaggcca gagacttcgt tcaaatgggg
+    62641 ggaggttttc tcttcaataa aatgaaaggc aggtattttt atacgaaaat tgagaattca
+    62701 ataataaatg ttcacttgag attaggttcg cgaagaagaa gatcacaaat gagatcttga
+    62761 gcttgaagct tactctccag aatcgaaaga tctcttatcc ccgaggcgag gatggcttca
+    62821 taaacttttt ttgccgaagg aaagatagat aaggggtttc atccatattt ttctcccatt
+    62881 tacgtatcac gttcagagat tttaattagt tgtccgctct ttccgtacct ttacttaccg
+    62941 accaaatacc aagactactt acaacaggtg gtgaggatca gcacactgcg ggaatctaag
+    63001 gaattgattg ctgtgcatcg accaatctca gaaagggaac tataaaagaa ggtgaagatg
+    63061 caccgaggga tttatgaaga cgcatggtat gctgtggaga aaggggtgaa aggagtagaa
+    63121 gcggcagggc aagaaaagga tagtgataaa tggacagacg ctgagaagaa gaagtcgaaa
+    63181 gtataacagc aaagcactgt cagcaatcgc aagtgacaca ttcaagggtg aatcatccaa
+    63241 agatgcttgg gatatcctac agaagggttg acaagtgtaa agagatctcg tattgatatg
+    63301 ctgctggctt cacggaggaa agagagacca ttgaacaatt catctgtgct ttgccaatgc
+    63361 tgctcaaaat gaaagacaaa gacaaaagtc aaaaagcttc tgttctgtag cacactcccg
+    63421 gcgaagagcc cttcgcaagc catgaatgac acagataccc ttgcgtttga tgttgttgta
+    63481 ggtcacctac aagcgtatga aatggagact gagaagtccg agaaaaagca ttgctgcagc
+    63541 acacactaag acaacaagag agatattgaa gattcaatcg gcagcacact gcttgctaga
+    63601 aactttggca aagtagtaaa gaagaatgga caaagtagga aaaacccttt tacacagaac
+    63661 acaacaacaa atcgtgatcg tgtgtcaaag aagggagaac aacagtgtag ggatatggca
+    63721 aagctgagca ctgccttttt ttagagaaga gacttcaaat gtttttaatg caaaggttgc
+    63781 ttcgctaggg gcacacaaaa gctgaatgtg tgaatactcc cttattggtt tgcttggagc
+    63841 gagagcgaaa cagacgagga tgatgatgca gaggaagtct ctggattcgt agcatttgtg
+    63901 ataaagatga gacgcttcat tcggacaatt gctttgctag tgaaacaaat ttgctgcttt
+    63961 actcaagccc acaccctcaa ggggttactt cgtcacctcc gtcaaacccg cgtcaaagcc
+    64021 cctcgtcaaa gacctacgtc aaagtaggtt gcttcgccac ctccgtcaaa cctccgtcaa
+    64081 aggaggctgc gggaggctac ggggctgcgg gaggctgcgg gaggctacgg ggctgcggga
+    64141 ggctgcgggg gctacttcgc tgggagtgtg cgcttatgct gaagaaagaa agcaggtcca
+    64201 ggttcaaagc gtaaaacggt gaaaccaaag gcttcttcaa ctcgaagaaa aagcaggaac
+    64261 ggaaagaact ttacttcctg gtgaagcaat tttcaggata gaattctgcc agcttaggaa
+    64321 tctgaacata agcagtagag cgagggcata gattactctg gtcatgccac cccagcaatt
+    64381 ccaagaatta atgcaagcaa agcataggag tgtgatttgt cgtttacctt cttttgctct
+    64441 taggaataaa ggtaaaaaga aaagcagtgt gctggacagc acctttccta cgtttgaaaa
+    64501 cgaattgccg gtactccact tagacttctg cacctgacct agtggctgaa gaaggatctc
+    64561 cctcaagtga tttgctgctt cctccccgac gtaggtttga cggaggtatc agtctcgttg
+    64621 ctaaagcacc ttcggacctc ttttttcgtc tattctttgg ttaaagctaa aaatatttct
+    64681 tttcattttg gatcattcag tagctcagac tggaacttga gagctataac cttttttcct
+    64741 taatttcttc cgttgccgta tcttgcatat ataagatcga gatatctcta gtattcataa
+    64801 tccaggattt cgccatactc ttacacctta cccagatctt ataacttata ataagatcat
+    64861 ctaggctagg acgaggcaga ctaccatcaa taaaagagcg cttcagtcaa catatacaaa
+    64921 gtccagcata gttagatcca ttaagagttg agccagaccc ggatgatcac cgaagataat
+    64981 aagggctaga aacattatct ggtgaatctg agacaacgaa gctgcgatcg gcaacacaaa
+    65041 ccattggagt gtgctgggga ggatctgagc tgtagaaacc cttgatccac taggtaaggt
+    65101 tacttccaga gcggagtatg ctcaggtgaa actggtggtg atgccgatct cttctttctc
+    65161 gatttctagt tccgtttcgc cattgttgag gaaaaaaagt gatccgagac tcaacaaatc
+    65221 gaagagaaaa ggaaaacttt gttgattcag gaactgtgaa acggaaagtc tcaccagaaa
+    65281 gtcattgcac tctctttgaa gagggtcttc cctttgtaag caaaggtaat aaatgctcaa
+    65341 tgaggaaaga ttatgaattt attctttcat ttcggcttaa catctcttaa taattgtttc
+    65401 cctgttttta atacaaaatt tgaggtgaag ccttctctaa tgctacgtgt cttcatctta
+    65461 gcagatctct aaagcttgga gcgtatggta acggttagaa gtgggatttg agccgctgta
+    65521 ctgtggcaga gaatgacgtc gctgtacgtt catggcttca gctctggttt tgttttaagt
+    65581 gataaataag acagaggata ctttgggctt cggtttgtag acaaaactcg tcttgggctt
+    65641 atgtcttgta gtcataggag agctgagcct tgtgttggcc cagttttggt tcatttagtg
+    65701 aaacagtgtc gttttgagtt gtaaccggaa atatctcctt cttcctttga ctgtcgatct
+    65761 cggtgtgggt ggcttaacga gatgttaggg ctaatgttag tatgtagtga tgtgctgttg
+    65821 aaggtagata cactttatac cttcttcctg aagcagacga agcgagctgg aacgtacaag
+    65881 gaaagagaag atgcctccta atctacttat ggatgtcaga gttgatggtc gagcataggt
+    65941 ccattagcag atggaaagaa gattctgctt agcgacactg cagagtcaga tcggccccta
+    66001 ctaatgagaa ggccttgaag gaagtgaaga ttgaggcttt cagggtttag ttaagacttt
+    66061 gccggtgact cttttcctct cgcttcgtaa aaagaaaact cttccatact aagattaatg
+    66121 agattttccg gtatacttcc cagtcgacta tgtgaacagc acaaagcaaa gttttaccct
+    66181 ttagtgtcca atgatatcaa tggtatcttt ctgtttgtta ccaaacagga gggaagcttt
+    66241 atggtttcgt aggttttctt ctactttttc ttctacttta gggtttgata gaatcgtccg
+    66301 ctattacaga ctttgattat ctgtggggca gtgcgttaga cgaaggaagt agcaaggaag
+    66361 atgaagaccg gatattaaaa ggaatatttc acaaagccga agcattacaa gcatacataa
+    66421 aagaggaagc ctaccgtaga cccccacaca agacaaagtt cactaataac atagtacgtg
+    66481 aacacaaaaa gacatagaaa tagaacaaaa ggagaacaaa gaaaagccat gcagtacttt
+    66541 gggtcgaccg gactccttat ttaaatatta gaagagagtg gttctagtct ccctggcgct
+    66601 ggagagcgtg ttcgatagta cacacaatta ggtagttccg ctcctgcagg agaccagaaa
+    66661 tcctttgttg caggccgcga atgcggtccc gtaagacttg catccttgca cccacaaatt
+    66721 ccgggtccag agcaggcggg tgctcccaga acaagcccag catttcctcc cagtggagct
+    66781 ttgcgttttc cgcggcttcg atttcttgtg gaatggtcgc cagcctgtta tagatctcca
+    66841 tatcatccat ctctcttaac tttcgacttc taagtgttct caaatataga cggaaagctg
+    66901 cttctattta taggcaggct agtcttctta tcttaactta atgagtcttc ttacgtaaca
+    66961 cgtttaatat ggaacaccct ttagtccgct gaagaattaa ttaagtacgc tgaagacttt
+    67021 aaaggcccca tccatcaaag agttcttttt ggcgggtatt gccacgcggc ttaataccga
+    67081 tcactccctc agaaagaaga ggcaataata gaacgcacag tagagagtac tcccccgcgt
+    67141 ttctcgcaaa gcccagagca ctccttactc gacagctcct gaccagagtc ctcgttggcc
+    67201 ctacttgcat tagctctgcc tgctcagagc tcgcttttct ttgtctattt ttgattggct
+    67261 gattcccagg tacataacag ccctcggcca atccttactc gacagctccg tcctttctta
+    67321 ggtgagctaa gaccgaagct agctctcttg aatttccccg gaaagacgga acctttccag
+    67381 ctcactctgt cagtccacta agacgcgtat agagtaaatg agcactagca ataacttttt
+    67441 cagtacaata agtccccttc tccacgacca cttttgtttt tgggctttct tggattcgtt
+    67501 tccccagtaa ttccattttg gactcggagg cttttccatt gggctcagta tcataattat
+    67561 cccaaaagcc ccctctcctc tagcttgctt ctttaaaaaa caaagcacac tgcttgttaa
+    67621 tgggcttata tggatcaaag cattagatag aatgctaagt ccaattcctc aatattcttc
+    67681 ttcttcagtg gcccaagcaa gatcatctat cagagaggat agggtccaag gtctgactct
+    67741 tagatatatc ccctactacg agttactccg tacctctgcc tattactgaa caattaccta
+    67801 atgagtgcgg gtagcataag acaaggtgag caaagagagg tctcaaaaag taaaaagacg
+    67861 ggttttactt tttgacacag ttttgccctc gtatctagaa aaggataatt tacggaaata
+    67921 gtaaaagata ggacctccta actttcctgc tacgagtgat taaagacggg gtttactcta
+    67981 aaaaaacgtc aaaatattgt acgggattcc attccccggt tggtttgttt aatcaaacaa
+    68041 gagaaagcac ctcttttcca aaggcagact cggctgtaaa cagaacagaa ttattactaa
+    68101 aaggatgtta atgacttatg gctcctgtct tacttttctg atagggaaac ggaaatccat
+    68161 aactagggat cctcctacgt ggacataaag ggattggaga cagagaaagt gattgtgcta
+    68221 atactataat ggttcgctag ttgctctccc ttgttctcta ggttcggtaa cctgggtaaa
+    68281 ggctgccttg cttcccgttg tggaatcttt tctaatcctt ttggccgctt ctttccactc
+    68341 ggaccatagg gaagtcagaa cggaaaccaa caaaggacaa cacaaacagt ctatctcggc
+    68401 aggagcagag cggctacgca aacccgtctg gctattcata ttccgacctg aggagtaaga
+    68461 ctctcgcagc tactcagggt attgagctac tactagggag gctccggagc tttaaccaaa
+    68521 ggagagaaaa gtacctacac cggtttatac aacaccggtt cacagactca actatacaat
+    68581 tcatcacaat cacagatgca actatacaat tcatcacatt acatcacaaa attccccggg
+    68641 atcaaataga gatcaaaccc atcttactta ccaagttgtg gaaaggagtt ggatgtaatg
+    68701 cctttgtgag aacatctgca atttgtagat cagttcggag agaagcttga atagacctgc
+    68761 aaccaacttc taacacacag tcgatctcta tatgtttagt acgctgaaaa caggatagca
+    68821 agatgtctgg cagcagtgtt atcacaaaag ggaggagcag aagaactcat ttgaatgcca
+    68881 agttcctgca agaaatttgt gagccaaacc atttcactta cagcagatgc aaggcttcga
+    68941 tattctgcct ctgcagaaga cctggagaca gtatgctctt tcttagagcg ccaagaaacc
+    69001 agagaggaac caatgaacag aagccagtta cagagcgccg tgtgtcggga caagaagccc
+    69061 agtcagaatc agcaaaagcc ttgagctgaa gatcagatgt agcagaataa aaaagaccct
+    69121 gcccaactgt ccctttcaca tagtgtaaaa ccttgtaaac tgcctgcatc tgggctgtac
+    69181 gagatgctga tgagaactgg cttaacctgt tgacagcaaa agtgagatct ggacgagtaa
+    69241 tagtcagata catcaacctg ccaataagcc tacgatacaa ctctgcatca accagatcac
+    69301 caccagaatc cttagacaga gattaggttc catagggata gaagaggtct cagcgagcca
+    69361 acaaacctgt ctagatccag agcatatttg cgttgacaaa ctgaaatacc ctctgtagac
+    69421 cgagcaattt caagcccaag gaagtaagag gaccgaggta tgaagttgca actgatcttc
+    69481 gagagtgtat cagctggaaa aatagcttct ctgcgccaag gaaatccgga tctctctaca
+    69541 tactatacga aaagcctctt tcttcacaaa gctggacctt cgttcgggct attggaaagt
+    69601 cagaattaca gaaggaaacg agcctaaaac cacttacttc tgttaccaga tatggcttta
+    69661 cgagttcctt gtcatgctga taagaggcca gtcgatgagg ctcttgaaca agttcccggc
+    69721 ggaagttagc cctataagct aaccgcctgc catgtctacc ccgaaggtat ctttggtgct
+    69781 gttgttccta aaagtctctt ctttaacttt gtttccaaca acttcccctg ctttgttgtt
+    69841 tcagaattga gaggcacgaa atagccccca aaaaaaggcg aaatccacta tttcatcaat
+    69901 caaattttcg ttcgagggtt gaaagcgcca gaacttttga agagttttcg ccccaggcct
+    69961 gtcatccggg ggatctcttt atgctcgtcc actcggggat gacccctact gatctctctc
+    70021 atcggatcga atcgaatcca ctcctgaact ccagaagcaa gaacggacgg atgcccgaag
+    70081 agttctaccg ttgacgacag cagcgggaat ggatttcaaa tagcagctgc taacgaccac
+    70141 cgagaagggc gagtcaagac ttccagtttt atgaaataga agatccagat cttggagcac
+    70201 ctaatcaacc agttgaggaa gaggaacgaa ggtactcagc ctcctggggc aagtgggagc
+    70261 gacacactga atgaaccaac tccaatggag caggatcaag aagaagtgaa gctacgtaaa
+    70321 agtgagcgtg gtagaatctc tcgtcgtcga tttgagattg agggagatcc ggggaatctt
+    70381 attctgtcac ccaaaacgac cactccttct gtcggaaaaa ggacttgccc tatattgaaa
+    70441 tgaaatcgaa acgaatggaa ccgcgacaga gcactcccta tcatcaggta gtgcgcgcca
+    70501 ttcagaacta tgatatcgtc ttctagtcta ttcggcaggt ccaagttgtt ttggtcttat
+    70561 aagctcaggt tcttgagttt tgggaatttc cttccattgg ctggttaaaa ttcaatacgg
+    70621 atggtgcttc caagaggaac cttttcttgc atggtgctgc aacgggacat tctggtaagt
+    70681 cgggtgctgg tggccttctc cgagactgtt caggaacatg gatctatggg tatacctgca
+    70741 aaatagcttt ctctacgagc ctacaagctt cactttggtt tagcttccaa ctaaggctaa
+    70801 gggagttgtt tccacgggat tgagtgaacg agctcatagc cctccacccg taccacaaaa
+    70861 gcgctaattt atataaaaaa gaccgggaaa acgtgaatct atcccgaaca tttcctaaaa
+    70921 tactggataa actgactcag atcggtctcc ctcaatcgag aggctcgctt gcttcacttc
+    70981 aacttcatag aagagggaag gagctatgca taggggtgat tccggtggag atggaatgaa
+    71041 tgcattgaca gttcggtctg atccgtctta ttcgtatatc ttattatggg atatttgcct
+    71101 taacggaatc attagaaggc tttgggcggg gaaacccaac tgcaagacca gaggctactg
+    71161 aaaggaagat actatctctt cttgagcttc cgattgacat accgagattg attcaatgaa
+    71221 agtccctagc gcaggtgaga cctaattcta tgtttacttt tcattcaaat cagcttcctg
+    71281 atgactaaag aagactttta tcctggcggt tgttggtcca acgactctac ttctttcttt
+    71341 tgaaatatcg taagagaaga aggttgacaa gaagaataat ttgtctcctg tgattgtagt
+    71401 tcaatcggtc gtttgtctct agaagaaaga agcctacttg tagaagagaa gaagcggggt
+    71461 agaggaattg gtcaactcat caggctcatg acctgaagat tacaggttcg aatcctgtcc
+    71521 ccgcataaag aacaaacaaa caatagaaaa aataaaaaat gggatttgaa gagtcaaagt
+    71581 caaataaatg atcattcggt ggggactgct ccgccgacat tggaacgagg ccaaccgctg
+    71641 cttatcacaa cctctggtgc cggtggaggt ttgacctcgg cgccgatgag gattaactat
+    71701 gtgaggatta aggtgccgga cgtcatccgc aacgtaaaga tcgtggtgca cagtgtgtac
+    71761 ttgccatttc cacatattaa tcctgtggct gcggcttacg aagtatccta ggaggctcag
+    71821 aagtctcagg aggagatcag acgatggaag gaaggatttc gatcttcgat cttcgttacc
+    71881 attccaaagt taattattta gctttcgatt aagtgagtgt ttgtttgctc agactcagac
+    71941 taaaatgcga ctctatgcct tcccggtaga aaacgacctc tgtggcggga tttaccaggt
+    72001 caaattcaac gggaccctag cctgcagtcg tcaaccggtc cgcttccttg aatgagagta
+    72061 ggtcttcccg atccacgaat actgtctgtt gtcctttact tgtttagtgg catctggaat
+    72121 tgtctgtttc ggtattcact cttgtaaagg gccgcgaact aagctaagcg cttggcttgt
+    72181 cttcttatag ggtcccaacc gaccagtaca ttcaatatcg aaatccgtta ctgttacaga
+    72241 agagaattaa gataacgagg cttggagtat ctaaaactaa gtccagcgaa ctggaagtct
+    72301 cttcagactc cctcattgct tggctatcga agtggacttg tacctttttc gcgtgcgtcg
+    72361 acacgacaat gcttaccttt ctagctttta cccttcttct ccggaaggaa tagaaagaag
+    72421 acaactagaa taaggggaac tactctatat attacagcag gagaaattcc aacatgaaag
+    72481 aaagcaggcg gtgcacttaa atctaaatag ggtgagaaag cttggcttgg gattgggatt
+    72541 gttgactcat catgataggg catgaattgc gtatagataa ggtccagttc attaagttga
+    72601 gacagctgta ggctcaaacc aaactgacgt gtaaagagcc ctagttgaaa taaaccctgg
+    72661 tgaatctttt ggtgtcttag ggtaggagta agtggaggtt aggtaaccca gccagaggat
+    72721 tccgttagcg ggttgattcc gagctggtaa tcgaacaggt caccaatcgt cctcgaaagg
+    72781 gcggttcaag cgcatgagtt cagaggtcta tagatcgtga tctgaatata aagtggattc
+    72841 caggggcagc tgcttctata aacacaagaa aaggtcttgt ggacttacgg attgataact
+    72901 tgccagtaca cgctgagcct cgtcgatgag atgaggtacc tcatttagga tgactattgg
+    72961 aaacgcgcgg tgccgcgtta ggttagggag cggggctcat ttcatcgatc atttctgcac
+    73021 gagcaaagct caatagtaca tatagattca gatagcataa taaactaagt gaacatggct
+    73081 aaaattaaaa ttgataaaca agagcataag attgcctacg tcaccataaa actcgacaca
+    73141 gaaattcata agtaaaagcg gcgtaagtta ctaacatgcc acatgcgagg gacagaactg
+    73201 ccattgacag cctccagggc aggtagctcc cagctcggct tgcttctact accttatagg
+    73261 gtccccccca gttaggatct aacttgcgct ttcacttaag taaggaaagt cctattctaa
+    73321 aagaaaagta gatttctccc tagatgcttg gaaattttaa ttactccacc acttcgcccg
+    73381 gtgaacttct aaccatcctt ttctagagaa gaatagctat ctttcgtcag aattgatttc
+    73441 ttttataccc gagtgaattg aagcattcct ttatgtggga acgtccatct ctagcgtgga
+    73501 taagcctgaa ggtcctacta agcggtctaa ctcgactccc ctttctaggg agatgtggac
+    73561 gaaggctact tgagctcatt ctcacatttc ggggtaagag gctaggctag gaatagggta
+    73621 cagaggcgta gggggaatga aagaatttgc ttagtcataa tagtgctttc aaagtgaaag
+    73681 gatataaagt aattaattcc tctgcttgca agctacctat gattttcttc gtcaccaacc
+    73741 atttccgctg actcaagcat ggcatctttc ctcaagcccc tggcatttcc aggatcgata
+    73801 ggtgcatccc ctcctcaacc ctattttctt atacagccta tcagttattg aataagaaat
+    73861 aggcacctcg ctaaagcctg actatgccac cggtcgttct agcatagtca ctgagaccca
+    73921 acttgcgtac tcggattact tcaggcggcc cattggaaaa tatacttcta ccaggcctac
+    73981 tcaatcctat cttaggaagc ccgcatctct gtgtcaaacc tttttgaaaa ggctctactt
+    74041 gaattgaata atatttcttt attccgactt cgccaatgaa agagggtagt tgaatagacc
+    74101 gatatatcta gaaagtgtgg agtgagtgaa tggtggaaaa aaagtaactg gcaccgcccc
+    74161 ttgctggata gaatatcgag atgcacatgg gattggggtg ctctctaagt gaggggccgt
+    74221 ggaactatag cttagataga aaggtttttt aggccggaca aaggtctagg ttaagtccgg
+    74281 agagaaggct tgatgaacac cacctcctgg ttagtgattg ggcacatgct tcctttacgg
+    74341 cgtagaatcg gcttcttttt ttgacgtatc agaaagggcc tctcttctac cgcccgaagt
+    74401 ttctatttat gcaagtatct tcacttcaat accccagcct atacatttaa tgataaccga
+    74461 acccaagcaa ggagcttaaa tcttgtctat ttccttgaat ggtcgggttg gaaattcttt
+    74521 tatgtgccaa cgcaggccta tcacctaatc tccactgcga aatcctttgt tggaaggctt
+    74581 gctgatttac acctctctcc gccgcttctt aacagttgac tctgcgctgc gagattagca
+    74641 ctttcttagt tactaacaga tatatctttt ccgcgcgagc tagttcaaca gttcattctt
+    74701 gtaagccccc ttgtcagcca cactaagact tcgggtcatc attacctctg ttgaaggaat
+    74761 aagcccggct atttcagctc tgggaggaag gactactttt ttcaccgcga attgcctttg
+    74821 atgcaaaacc atataccgat cctgttaagt cttgaattga cttctgatgt cctggctgcc
+    74881 ccttaagcct ggaagaagag tgactcggga agaaatctcc tgctggagag gtatatccgg
+    74941 cccttctacg aagcctcaac tgcaactgag actgataata ggagcagtat ttttaaattg
+    75001 aattgctggt gccggttggt taaaggcatg gggactgctt tcaattggac agctaactaa
+    75061 caatcgattc cccataaaca aatagttatg aaacattgga ttggatgcct tttaaacaag
+    75121 gaaagtaaag acagctaagg taagtattcc ccacattgaa agatatctat ttctttcttc
+    75181 tgttcccttt gtctaataag atgggactgc aggaaagtaa gtggaaagta agaacacagt
+    75241 tgttagctta tagcggggaa gaaagaacta gctcgactga aaggagaggt ccaaggtaat
+    75301 ttattactct tataaaagag ggaactcgag catttattgc gagagggaaa gcttagagtt
+    75361 ccactccagg agaagtagct tatggtaggg attataggta ggctggctaa ggagttgctc
+    75421 ctggaaaggt agcagcttgg tcccttgatt gcggggtttc aaacctcata ccaagagaaa
+    75481 gattccaaac cagcatacgc aaagcaaaga gcggaaaggt gctttgtacc tcatttcgct
+    75541 aagcagcgag aattgtactg aagctctctt tcaaacgccc gccgatcgta ctacttcatt
+    75601 cttagtgtgc tgaccagata cccccaccct gagctaagag ccatagcaag agatatagcg
+    75661 ttggctgtag gcttagtgag ctcataaact ggcgaatcaa ttcatttgaa agagaaagtc
+    75721 gggtctagct gattcccgag ttgaccaatg atagataggt aggaatggca ggacctaaac
+    75781 gagctaggct tcgttagcca gttctccttt tagaagctat tcttaatgcc gctcgggctc
+    75841 ccttttttat gggattccat cgctcttgga acagatgata taagatctta ctaaaataaa
+    75901 aaaagtatga aaattaaata tacagatata ctctataccg gtatacagca ataacaatgt
+    75961 cagccaatta gcaaagatca agcgaactca ctcaagcgta ctacagcttt tccgtctttc
+    76021 ttccctaagc gcaatagccc cttgaataga taggatagaa ttagtgcgct tgaaagagat
+    76081 ttggcttagt gaaaattcct agtcttactc atctcagatg cgggattacc aatgtcacga
+    76141 ttggaattag gaaaggaagc tgtggcactt cttactttat atgaggaagg ggttaaatca
+    76201 ctagcaatag tagacacgct acgatctgct agacaggagc agtacaggaa gcatcgaatg
+    76261 ccatggttct tgttacagac taggctgcaa gtgagagtga tagaatcagc tcaactggga
+    76321 atgttgccga tgccgatacc ggagctgcta aaggaagcag tagcagtaca accattaaat
+    76381 ttgaatggaa ttgcacattc atcttcctct atcaattcat ttcttttgtt tgaataagct
+    76441 attaaggtgc ctaggacact agtagacacc cagaaggagt tgtaagggag aacggaaatt
+    76501 taagatcgga cgattttcaa cagacaagat cagatcgtag aatagagaag gatttgctgc
+    76561 cgctactgaa tcagagtcca cggtgcaaca atttctcatc tgctcacgaa ttggattcga
+    76621 accaatcaag ctacttgccc tttagtagat cgtgagtggg tcagtcgtcc tcctcattat
+    76681 agtcctccta aaatcaatag catttcgtcg gaatacatcc tgtcttttca ccttagtagt
+    76741 cctatgcata gtcagtacta tagccccaat catggctact aataaaataa gactaggaac
+    76801 caaaaaccag acagaatagt aggtataaag taaattgccc aatgtttcca aattagtcca
+    76861 acttcgtacc tttccggcat aaaccgtata tctaagagag gtcgtatttc tttgggttgg
+    76921 tagtaatgga atgctttcat tatctaaaat gaaaaacatt tcccaccaaa agatcagtcc
+    76981 aataatccca ctcactggta aatagcgcaa tacttcttcg tgaatctccg ctatttgaat
+    77041 atggaacatc ataacaacga aaaggaatga aacggctata gctcctatat gaactactgg
+    77101 gaagatcata gcgaagaagt cgagacctaa caaaagaagt aaacctgaag tgtcgcgaaa
+    77161 gactgggatg ggaaacaaaa cggaatgtac cggattttta gcacgtgcaa ccatcaaacc
+    77221 agagaccaaa gcagggctcg acaaaacaga aagtatcatg gtaccgtcgt ccttccctga
+    77281 aatggaactt tcatgctagt tcttgcttca gcatgaaagt cgatctatta cgaccagcgc
+    77341 ggttgttcct atttcatttt cttcttccaa gcccttcttt cttagaaagc actcactgag
+    77401 ttacttacgg aatctcaatg catccattta atgcattctt ttcgatcttg tacccacgga
+    77461 gcggtagact gaacaccaca aaaatatcga ttcaaaaaaa ggtacaggca actaaacctg
+    77521 tgaactcaga tagccttgtg gtatggtagc gagatccaat cttgagtgaa aaagataccg
+    77581 cggcttcgat ctttttcaac ataatcatcg cgcagtaaat caacaaagcc caaagttgac
+    77641 tctctgtctc cttcaagttt acctactact gtacccgcgt gaatatgatc tccaccagat
+    77701 agacgtaaag ctttagctag tacacggaag tgcaaaccat gattcttctg tctatcaata
+    77761 acagcatgca ttgcacggtg gatgtgaaga agtaggccat tatctcgaat gagccaaact
+    77821 agtatttgcg gtgaatcccc ctgtttaagt agtcatgcat tacgatagga actcccaatt
+    77881 ctctggcaaa tacagctctt ttgatcattt cttcgcatgt acccgcagta gcattcaaat
+    77941 aatgcccttt gattccacct gtttcagcct gtgctttata aacagcttcg gcacaaaata
+    78001 agaaacggtc tctccaacgc ataaatggtt gggagttcac attctcatca tctttggtaa
+    78061 aataaagtcc accacgtaga cattcataaa ctgctctacc atagttcttc gcgccaaacc
+    78121 aacaatctgc acgcttagcc gcattgttca tcttgcctta accctacccg aaggaaggct
+    78181 atgaattgag ctaacgccct ctgtaacagc ttaaatgata taggaggggg ttgatcctcg
+    78241 atttgattct cgtgttgcga gtgtcgtgtc tcgcgtttcg aattcatttc gattgggaaa
+    78301 gtgagccgat ccagacctaa ttgatctggt ttctgagagc actgacaagg ctaaactcgc
+    78361 cggttgaaga gagcactgaa tcattcgaaa ggggaagcct cgactggaat ccctgctata
+    78421 aagggcagcg gaggaatacc tacagattgc tcgccttaac cttaagattg ctagaaaata
+    78481 tctaatgcag gagaataact aacaatttat gtggaatact atttcatttc gggggtaacg
+    78541 actttaaaac ttgcctatag gatcgcttga cctagaacct gtttacactg gtcctgctgt
+    78601 gaggaacgga ctttatcgag caaaaaaagc gattctatgc gcagacgtga aagctctatt
+    78661 gataggaaac attctagcca attttagagg aacgattccc ttgtctgaga accgccattc
+    78721 ccgcactatt cctccccaca ttgagaatct cgataactcc aaattaagaa gtgagcgaat
+    78781 cccaaagctc tctctttccc ccttttcacc tttaattggt gaaagtctaa aatctgatgt
+    78841 gaatagaaga aaaagaaaga agaaagatag gcggacgact ttactatagt ataggtcgaa
+    78901 aaaaccgtga gcagtaagca gcggatctcc ccttcttttg ggaaagcagg tggccgcgtg
+    78961 tgaattcttt caaggcaagg ggcggcctga tttcgacatt gttgtttcac tctgatccgg
+    79021 tcgaggtggt tttcgtttac cagcgcgtag gtggtctaag ttcctatgac cgagtctttc
+    79081 tggcccctgt gaacatcttt tcttaatgta ttaaagaggg cctctctaac cggtgaaaag
+    79141 tagtgggtgt ccactaacgg ccgttcccgg ctcggctccg ataacgaaat tccgggagga
+    79201 gtcggtagtt gggcactgga tcccttcgga cctggagaac gtgtgacgct gggtaggggt
+    79261 ttggtgaacc aactggtcgc tcctctagtt gaagtatcgg gccccttttc gttgcctagg
+    79321 ttacaccttc ggaatacccc agaagggaat ttatctctac ttccctaaat cttcacttga
+    79381 tgccacgccg actctccttt cgtcataagg cgtggaatta gaccaagggg aatctccata
+    79441 gaaacaggtc cctctgattt cgcgcgtagg agatcgagac acagcccaag ggctatggag
+    79501 aaagatgtag atcgatcgcc ctagatagac aactacatag agggtcttta caagtctata
+    79561 tattctttga tttcgttccc cccccccgta agatcaatat cacaactaat gaggtcttaa
+    79621 agtttcacat ctaagtaata cccgatccga tagtttacaa tataaataga tatttatcta
+    79681 tttaacaaca acattctaaa aaaagatatt aattgatatc ggtagttgtc cggtcgtacc
+    79741 caaacaataa tattccagag gaaaatgcac ctaagatcaa atatttcgag ccggcttccg
+    79801 tggaaaattc agactttctt tttgatgctg cgattacata aaaacataaa ctttgaggct
+    79861 caatagctaa atacatggca attaaatcat gagccgagat cataaagagc ataccgcgag
+    79921 taggaagtgg aattaataca atgaattcaa aagcatcaaa cctctcttgg tcggaagaat
+    79981 cgaaacacat cgaaatggta ccagccgtac ttaataatag aaagatttgg cagaaatatg
+    80041 taaaattgtc cctcctaaaa agattattcc agaataaatg ggcaatagtt aggagaggtg
+    80101 cgccagcggc gagcagaagc aaggttatta gatacctcag gaaagcccgg ccagaaccac
+    80161 acgtgcaagt ttccctgcat gtggctcgtc cgtgataact tcttcggatt tgcgtgaact
+    80221 agcagaccga tcactaagtg gaaaaaaccc tccagcataa gcgaaccttt tctccactgg
+    80281 ttaggaatgg gctaagagag gaagccaaac aaaccgacct attaagcaca gctaagctaa
+    80341 tatgcgccgg ggaaagccag gtgccggagg taagctttat tcggcccgga gcattgattc
+    80401 cccataacga ataggctagc tctatctttc aattgcctat cctgtgctcg agaaggtctc
+    80461 ccagttcctc ggcagcacct cggggtgcat ttagttgtct tacatgagac tcgggatatt
+    80521 cctcacccca tggcaccttt tgctcatcca ttccgcccgc ccgtacagac gcggcgccct
+    80581 ttttcattat catctaccgc cctttctttc ctcccggcta ttcacgcatg aagatcgtcc
+    80641 tatgtatgct ttacttcggt tgccggtgct ataggtagga gtacattatg gcaggatcag
+    80701 tcacccgggc aaaccaaccc ccctccaaga ggaattgtgc tatgcccccc cgatccctat
+    80761 agagcgaaag agctgcctgg tgatccctag gttgcagcac gtccggtcgt taactcctcg
+    80821 cgccgttgcc gccgtttcta ggaccgaccg acgcctcacc tgcactggta cctacgtagt
+    80881 gtcggtcgca cattcaacat agcgttcgga ctcatgtgcc ttccagaacc agggggtctt
+    80941 cgagcctgct gcgtacgatt agccagcctt gcggcggcaa aaggattaga cgtccccccc
+    81001 cccatgctac ggttcctgcg gtccgaatcc ggcgccgcat taggttaggg tagggggctg
+    81061 ggcgataata gttcctctgc atccccaagc gcgctgccct cctaggcgcg caacactaag
+    81121 taatccaagc caacccacat tactggctaa cggcggataa tcatatttct tagaggtact
+    81181 aaatacaact ccatgaatga gcaaaatgga ggttgcatta atgataaaga tctctgggga
+    81241 aaccgctaaa aaaagattga acatgtgtgg gaggatccga acgaattctg ctttcatttc
+    81301 ccccgtatgg agcactgagt cacttacgtt acggtcttca gcgggcaggc tgcactctag
+    81361 gcgacggcta ggaaggatcg ctagaccagc agccaaacca acaatgaata tataatataa
+    81421 tgatagtgat tcggctcaat aaaaaaatgt agaaaagaaa gggcggaata gcataattgt
+    81481 ccgatttatt cctcgatctt ctttgaagac ttactcctcc tttccgaaaa gatcgtcctt
+    81541 ggtccttttg acataagatt ctcggccgct caagagactc tctctcgtat atatatttat
+    81601 acgcaatatc ttgactccta ctacccctcc tttccgaaaa attgattgag tctttgtcta
+    81661 agcgagtttt ctaggccggg tgacctggcc acttctattg cttatctttt cattacatct
+    81721 tttgttctct agcggacttt ccgacttcaa ctggaccacc cacccggact aggaacccga
+    81781 atttatccgc ccatccccgt cgaagaaggg gcgacggcct tgattatgcc gggcatgctc
+    81841 cccgaacccc acccaattaa taagactaag taaaaaaaaa tggaaatagt gaatcaagat
+    81901 ctagatggat ccctatcttt ctctctatcc acgggatacc tatctagatg aaaaaatcca
+    81961 tctatgaatc gatctagact atcctctatc cggatagaga tgtccctatg agtgactacg
+    82021 ccgatcttta tatgttttgg ccacgtccgt ttctgctccg aagaggaagg tttggatctt
+    82081 ttaatcttat gtcgtgaggg atatgaccta tgttaggtcc ataagatacc acagcttttg
+    82141 gtgttctatg attaacctcc gaataatgag taggtagttc gatccttttg attcttctct
+    82201 tcatagtcaa cttatggggg gatgcgaagc aataggtcga attactatat aaagataaag
+    82261 aagagttgta aactatagga ctgattgttc gagtaggaag atttaggttt ttatccttcc
+    82321 ttctcttcga taagaatagg gattttaaca aattcctctt cattctcttg tgctcagcga
+    82381 aggaacttcc taagcatact ttttcggatc caaacttctt tgttctttct aagtcttctt
+    82441 cttgcataga agaacgcaac tgttgcaaaa accgggattt tagtagtagg cggcatcccc
+    82501 tcttagtttt gagtaggtgg aaccattcag ttttgcttct tctccacatt cttagcggtt
+    82561 gatgcagtaa tttgcctatg attttttcaa ctgaaatttc tttatagaaa gatctcctta
+    82621 tttcttcacc gcgtattatc gcgttatttt cttgaaaaga tattatatca ccgtgggaaa
+    82681 gtttaaaatg agtaatgctt accattcctt tattcacaca aacccttcga tgacttatcg
+    82741 gctgccttgc ttgaggaata gtttcaagaa aatggagacg aagcggaata acgtcaaatc
+    82801 ttgtttctgg attgagtgga aaagggatat atgaagttcg ttttgttcct ctgtgcatct
+    82861 ctgtgatggg taaatcccca tgaaaaaggg gcaactttcg tgtagtttgt aattgtatat
+    82921 aactggtaag atattttttc ggataaatct ttctcttctt aatagatctc ttcctgttcc
+    82981 tcaatcttcg gagaatgcgg cgttgtatta ttgtcagttc tctgttccga acatttcctg
+    83041 aaagtagacg acaagtttga aatcttaatg gggacaaatc tatatctcgt tgaatcagtt
+    83101 ttttaagcag ccacattgcg tccctgggac tcgaacccaa ttctgcaaca acccctgttc
+    83161 ctgtcctttc tttgttttgt tgatctattc ctcttcacgt tgtcttttta aaaaaagact
+    83221 gaaagtatgg gctattgaca ccatggtcag aaagatattt taggactttt ctaagtcccg
+    83281 cttctacgat aaagtctttc cggttttaat aaagcgagcg ccactctgaa tccacgatcc
+    83341 gtaaggatct agctccgcca cattttgagt agcacttctt tattttttga aaaggccaca
+    83401 aaactctttt caaaaaataa atctacagtt tttggtagtt catttttttg tgaaagaaag
+    83461 aggcggaaag ctatgtcctg tacagtttct cctcccacct gagggcgaag tcatgactcg
+    83521 acctttagtt ttgtgactcg agtacttgtt gcgagaggta ctttagtaac tcgactaaaa
+    83581 ggagaggttg tttacatact tgagctttct gcgagaggga tttattactc gactaaaagg
+    83641 agagagagca aggtgaactc ttcccattgc taacattcca gggattccag aaccaactaa
+    83701 tctaatttcc taacctggga ttccttaacc aactaccaat ttaaagagta aaacacttcc
+    83761 ttgggataaa ctgattcaaa cagataggca gattgaaatg gaactttcta caaacaaaaa
+    83821 ctccttacct tacttggctt acctctccca ttcctctaat ccctaaccaa aacaaaccta
+    83881 atctcatcac ctcattgcta atattgctgg gattccctcc taaccaacta atctccttac
+    83941 ctcctattcc tgggattact gggattactg gttctcccat tcctaacatt cctaggaatt
+    84001 cgcaagacac cataggcaac atgggaatcc ttgatctact tgtttctcag attccctttg
+    84061 aggttaggag agtgagtcga ttcgacgagg ctagcttgta ggttagcctt tagtttccgt
+    84121 caaagaattg gattttcata cttccttccc ctttggtcga ttcaagtgga ctttatccta
+    84181 gatttgtcga tttatccctg gttgaggacc tttggatgcc agttttctag atgccagttt
+    84241 tctataagaa gggtcagcct taaaggaaaa aaaccaatcc caaggtaagg tctaaactga
+    84301 tctaatcggt aaagcccgta aaaagagctg agtttaacgg cgagaagctc ctttcaaagg
+    84361 aaggagtgac agaaaaggtc gggacagtgg ctgctaccgc attgacttcg taagatttgg
+    84421 atgtgttaag gctaacctcc caaaactcac taaatttacg ttgcttcaca aggtaattcc
+    84481 tatttgctta cttgttagag taaggaaagg agaagagctt caaggtagaa cctatgaacg
+    84541 tagatcgcat ataatgtgtc gatctcatca ccttaccaac cagggcctat gtatacgaaa
+    84601 ctggtgcacc atctgcaatc gctatatata cctgacctat cttctcgtgt atcacccttt
+    84661 ctttcttcct ttcctaacta tcactgaccg cgggggaaat gctgacagaa tgccaatact
+    84721 tgttcgatcg aaattctaac agttgttccg ggcgtttaat gtttgttctg gttcaagcta
+    84781 gaaaacggtc aagtgtgttt ctaatcaggg ttatctaaat tggaataagt actttttgat
+    84841 aactttgtct catgcggagc aaagcccttc ttagtcaagc ttagtccctt tggagtatat
+    84901 ttaaagggaa gtgcggatct ggagtgtttt gtttagacga gctatgcagc aagctgagtt
+    84961 ctatcgaaac aacgttttcc gggcatacga gaaaagagtc cagcccagca tatttatttg
+    85021 cgactttctt gatcccggaa atcttgtact cagagtcacg gataaagata aagtggacta
+    85081 ccctgtatga cctacagttc tttttagtgg ctttttcttg tcctggacca ttcagccctt
+    85141 tctcaaccaa gtataggtag aagccctctt ttccatacag ataaaggttc gacttccaag
+    85201 agaaaagata aggaagtcgg tattgatcgt ggaaatatct tagaaagtct tcaaaatcgc
+    85261 ttctcttcct tcattctcag taggctaccg tcctcggaac caatgcctaa gatttgcctt
+    85321 gcggtctgcc cttagtctaa agacttctat ccactacgat ggaatctgtt tagtggagag
+    85381 aaggcctagt taatctagca ctatggtgga atctaagtgt aagatattgt tccgcggttg
+    85441 gaaaactagc tcctatgcgc ctgtaggtag gacgttagaa aaagaatccg aattaggctt
+    85501 gcagacctac tattaggctt tcagtgtagt aacctagatg ctgaatatag actcccatct
+    85561 atagtaaaag agaagaaagt caggcggaca ctggcagctg caagtcagtc ggttggcttc
+    85621 cgtcacttct ataaaacgct gcccaacaga ctgaaatgac tatcttcggg atatgatgcc
+    85681 agagttggtg cttttgcggc atctagttca gtatcagagg gctgcgctgc ccttggctaa
+    85741 gaaggtttac ttctctgtca cttccgtact tctgtaactt ctctttagct tattaatgga
+    85801 attggttaga aagtctttcc ggctagaaga gagggattag tacacctcca acactgactt
+    85861 cttagtcgag tagttctctt tctcctgggc tgagacgtga cctcttctcc tcgttttcat
+    85921 tgggagggaa gatatagata caaaggtaag cacgaacgga agtattcgaa agagctattt
+    85981 agctgaaaac tctatagcta aattactcta tcatagaagc cattactgac tgaatctggg
+    86041 agggcaaaag gcttagtaat agtaagcttc tcaaccctgg taagggttta cttcgtccca
+    86101 ggccaggtcg ttgctcgttg cacggtaggc ggctttgagg aaaggtatga agtcactcgc
+    86161 cctttagctt tagaaagcag gagaggtgct ttagcaactc gactgaaagg agaggttgtg
+    86221 aacacaaact cgactgaaag gagaggttgt gaacacaaac tcgactgaaa ggagaggttg
+    86281 tgaacacaaa ctcgactgaa aggagaggtc caaggtaatt tattactctt ataaaagagg
+    86341 gaactcgact gaaaggagag ggagagggat ttgacttttt ctgtcaggcc aatttctttc
+    86401 tagtggtaag tgctttcagc cagttctagt ccttttctta tacgcttatt ggcgttagca
+    86461 cattcctctt tcttattgga tggagttgtt ttgccacctg gagttttaat ggaagtttga
+    86521 gtgcgtccta aaagccaatg cttctgcctt ttggaagtgt ctgcctatat ctgtcctccc
+    86581 catttaatca gtctctgtcc ttaccttcga cggtaacccc tttatagtac agcagtcccg
+    86641 tttccggcag tgccttcccc ccccctagtc tagaccgagt atcaatagtt gtagttcctg
+    86701 tccgttatgg acccttcgta tctttcaaat ccaatcaatg tgtggtgtgt atccaagggc
+    86761 aaatgcaatc tctgaatctg ttacagccat tgtagcacta ggtgcctacc cataagcagt
+    86821 agttgccctt gaatgtgcat caaatcctaa gacaagccta tctaatctca cttcttagtc
+    86881 taaagcactc aaatcccttc taaggagcca gccctttttg aatccctctc ttttcttcat
+    86941 gctgtgaaga aattttccct atccccctga ctggcgaggc ttcttctaaa cgagtggagt
+    87001 tcagttagac tgccccttct ttctcgtgct ctccgtccaa gcgagctaac tcctgatctc
+    87061 ctgttgtaaa gatctccaga ccaaggctat tcttaccagt tgacatccat cccgttgcat
+    87121 tcccagcggt tttagttcta ttggatgggg ttctatacga gcgttccata ccgtctcttg
+    87181 ggatcaaact gaattaagtt ggactccctt caagagatag atttctttcc acgataatat
+    87241 ctatattttc ttaagaaaga agactagaat atcattggga gtgcacttaa gccaattgga
+    87301 gtactaggcc ttctgttcca gtgacagtta tagaaaaaag cgaagccctc cactcataag
+    87361 taataagtaa tttcatccct taccagtccg tcctaccagc tcttctgtct tcctttttaa
+    87421 aaggagctaa cgatacgatg ttcaagcttt gccagaagcc ttttgcagag agaaagttcg
+    87481 aagatcagga tcaggattgg attgaaaata aatctcctca gcattcaatt caagtgggtc
+    87541 tatcgataca agtggtaaga ccgataattc agcatccaag acctggccga tgtctacttc
+    87601 aaattcaagt gccacttatc ttaacacaac tcggggctaa tcgactccca gcctatcccg
+    87661 acctggtcct ttcgaaccag tctttataaa cctggtgaaa gcaatcaagc aggacttacc
+    87721 tgctgggagc cccttccttc tattccgaaa cctagaagtt ttttttccag ctcttgagct
+    87781 gattgaatga ttttgttcac tgctaagcta atccttctgt cctactttac tctattatga
+    87841 gacctcaccc taggctaggg agctggttta agccatgccc ttatttgact cccaagaccg
+    87901 gatacatagc tcttctgagc gggtcaaatt caaatacaat gagtgcttag ggaaagtaag
+    87961 aaagaaagac ataaagcgga atccggtaat cctactgcta ctaagactac tccgtcttag
+    88021 ggctaggagt tcaagcgcgg cgtaacgggg gttgtcagcg taacagctgg tgctctaatc
+    88081 tgagctgaga aataggaact gcacttcgac ttgatttatt tatttactat attcccccct
+    88141 ctctaaccca agtcctagaa gtgaatgagg aatgaaagaa cttctgcttc cgcaatgaga
+    88201 cgctaaagtc cttaaatcaa ggaaatcaca atgcttctct tattcagtag ttcaatctta
+    88261 ctttcttacc ccgccgggga agcgttgaat gaatgtgtaa tgatgtccat caggggagat
+    88321 cttgaccgat ggccgcgcga tgtccgagtt ccttcgaatg ctttttccaa gccccttctt
+    88381 gcgctttacg aaattcgttg ttaaggccag tccttctctc accttcacag aagaaaagag
+    88441 attcttctga atggttgata tattcattcc agaggagatg ccggtgaaga ctaccagcca
+    88501 atgatggacc tgggcttact accgccaagt agcctattaa gtctcaaatt gagtactaaa
+    88561 gtcaaactac ttgtcctccc caaaagagca gctgaaagaa acccatggca aagcaacata
+    88621 cccgtttaag catagtcgct cgtctaacgt ttcgggacaa gaggagagga acgcctttga
+    88681 gatcaggatg ttctaccgct tttcctaggg aaggtatata ctactgatag tattccgctt
+    88741 tgtgaaagtg ccataaaagc tctcttcgcg tcaacgacat ccatctaacc atctaacgag
+    88801 atatgcttcg ggtaagattc cctgatctga aatttcgtta gtgagctggt tcggtctatc
+    88861 ccttcactta acctaaagcg tacattcgat agggaacttt gattatcaaa tgggttggtt
+    88921 tattagcgca ctcacgccaa gtgctaccgg ctcatcctgg tgaggcatcc caagcccctg
+    88981 taatatttcg cttttttcgt cctctcgctt tgtccattcc ttgatctacg aaaggtattc
+    89041 agtattagga tcagaggcct atgggatttc agtagccttt aatcaattac aaactcatca
+    89101 ctcttcttga aggttaactt cacatcttgt gtgtgttccc ttgatgtatc ttaagaccca
+    89161 tttcacagcc tgccaatgac tcctcaatgg cttactcata tatctgctaa tcactcctac
+    89221 tggataaggg taatctggtc ttgtaccaat cattgcgtac atcacacttc caacagcact
+    89281 tgaatatggg agtgatttca tatagtccat ttgttcctgc agttcttctt ctgtagctgt
+    89341 ttggaatctg aagtgaactc ccaaaggagg taggaaaggt ttggcacaac gctcatagga
+    89401 ctatcggtgt gcggagttca tctcttggtt agcaaggaaa gattcttcaa ccagacatct
+    89461 cccatttaat cggaacagaa gtaagaaatc taatatcagt acggaccagg ggaaaggtgt
+    89521 tacaattgta gcatcaagtg cactcaatcc ggttgttagt atttgttagg tcttccgtgt
+    89581 taggtcatct ataagactta gaaaatactg acactttcaa aaagataatg ggactgaagg
+    89641 tgctccccat aaatctgaat gcacataatc cagaggattc ttagtagtgt gctggcccgt
+    89701 agagaaattc actctatgtg tctttccata gatgcaatct tcacaaaatt tgagacttga
+    89761 aaccttgctg ctatctagga aacctttctt gaccagtaac tccattcctc tttggctcat
+    89821 atgtgcaagc ctactgtgcc aaagccttgt ctcatcctta gctgtttctg caaggttaga
+    89881 ctctcctgtt tcaacagagc cttgaagaat gtaaagtgaa tcatgtctgt tccctttcag
+    89941 tattgttctg catcctttaa gaaccttcag aactccttca ctacaacttg cttcaacggg
+    90001 ccggggcctt tcccattctt tcgaagttgc tagcattctc tctcctttgc gcatgtgtgg
+    90061 gaataaattg actccatctg tctctgagaa tgggggaagg ccacactttg ttctcttctc
+    90121 tctctctagt ccaatgtcca attaaaccta accgatcaac cctttcagtg gtccaatacg
+    90181 cgcgctgtag ggcgctgaca cttgggtcaa cgatgattgt acctaaaatg agaagagagg
+    90241 agataaagaa agagagcgta ttgaaactct taggtgagga atttggaagt actttaaggc
+    90301 ttgacggaga agagagagat gagagggaag gatagcagtt accgagtagg agtagttgat
+    90361 cctcacatca aaatatgttt ataagaaagt ttcctcattc aagtgagaaa taggataaac
+    90421 caccgcccaa agttgcttag ccaagagtag tcctatatgg gctcgcgaag ccggtctccg
+    90481 gtgggaaaga tcctatctaa ctatttggga gaacttctct tcttttcttt gattgtgctt
+    90541 tgcttaacac aatcgaagtg ggatcatcgt ggagaaaaat atcttgcggt cagtctttcc
+    90601 atatcttctt ctttagctca ttcgcaacag tttcctccat tcctgcgatt tcctcaggat
+    90661 tcctcgcttg ttgtgatagc aatctataac cgctctgtac cgtgtacatg ccactcttag
+    90721 tgtaagccca cacattatca tccataagat ttgcccctgg tatcatcgag atgataaatg
+    90781 ccacattctg tggaggaaaa atattatgca acatatccag tttccacgac tgcctattca
+    90841 tgtcgtacaa gtcagacgaa gattaaggtc aatagatagg tttcaattca gtggaggtag
+    90901 aggtgtctca tccattatcc atctgtctaa ccaaaccttc gtgtgtatac cgtctccaat
+    90961 cgtccttagg agcccgcgag agaggagttc tctgccatga atgatacttc tccaagcata
+    91021 tgaaggtctt gtcccaacac tacactccat cattgagcta tgtgggaaat atcggctacg
+    91081 gagaagtcga gatagtagcg tatgtggttg gtgtataatc cggaaagatt gttttgcaag
+    91141 aagagcttgg ttaaaccaac ctaaatcccg aaaacccaaa ccaccatcat cttctttaga
+    91201 tttgcataat ttctgccaag ctacccatga gatcttcctt ttgttttcgc aagaactcca
+    91261 ccaaaattcc gtcatcgcac ttgttaactt cttgcatagc agctttgaaa gccggaagca
+    91321 cgacatcgca tacaccggta aggccattgc tatagatttc agcagcacct cctttccccc
+    91381 ctgagatcaa aattttgcaa accaaccatt aaggcgggat tgaagcttgt cccgaatgaa
+    91441 ggctaacaac ttcttatttt atctacaaca gcagaatcgc agcaataatc aacagataga
+    91501 aagaaataga ttagaggaga gggtaagggt ggatagggta tggacagccg tagaactcgc
+    91561 gaagcatgca ttagaagcag atcctactag ggtttgttaa cctactcggc ggggtaggag
+    91621 atagatatgt tcctcgtagg gatgacctag tcgaaggaaa gaagatgttc ccaaagagat
+    91681 gttgaggaag gatagcagta tacataatct taatgcaatc aaacggttat cctccttttt
+    91741 ataggtatcc aaactcggaa gatgttatga gactgagccg gctaccgtac ttcgtgctgg
+    91801 aacaggggtg actcctaaca atggatccac cacagtaggg gttatatcac aacttattac
+    91861 catccggaca aaggaaaagg gaatagtagt gaaatgacag gaacgacagc tttcactgct
+    91921 acggctattg gctttagcta tactatggca ttccctgtag ttcttggagt cactcaaact
+    91981 cgaactcctg ttacagctcg gctatagact acgcctctat taatcctttg ttcgtgatca
+    92041 ctcccagcaa tcacttcact ggctaacagt tagttagaaa gtgttccggc taaggcctat
+    92101 agaactcatt tcactccggt tatagctgaa agtggctttg ttcttatcgt tcagtggtca
+    92161 ctctttatag gtaagtaaag aagggattct attgtattcc gatactgcta cgagctgtct
+    92221 taactccact tacttctagg ccattagctc agtaagctca gtaactccat tctcttatgc
+    92281 cagccttacc tcaattccct tcttagcctc aacagctaaa gcatcccttt ctacagctcc
+    92341 tgaaacagcc gaagaaccac aggcctaagt aagtgttgtg ttcaaccttt ctataagtaa
+    92401 tatgggttgg ctggttggaa ggacagcttt atgtcgtaag taagtaaata gtcgtcaact
+    92461 atcgagaacc tgtctgaagt gttattccag cgcttcctag tctttacata gtctttttcc
+    92521 ctataatcaa gaaacgtctc tcaagtcgtg ttccaaccag atgactccat ctctctcttg
+    92581 gactgtttcc ttcatcctca gaataggtgt ctgagggtct tcatctccgc tggaatcttt
+    92641 acttgcaatt tgatcaagtc agctatccaa aatgagtttt cattcaagaa ccactttact
+    92701 ttgagctcgt agcggaaaga gcagattaac tttcagtttc tcatatggag tcccatatgc
+    92761 ttagccatca gctgagaaag attgatacca agtcggagca tatcccgttc gagggtccac
+    92821 ctattcggga gccagtacga gggtcaattc cagtttcgga tacctgcgcc tattgcatat
+    92881 ttagtatttg cagttcaccc cgctgttaga taggcagagt aagcataggc tgtggtataa
+    92941 atcttttgag tttgaagata ttttatcgac cggagcaaga tccattcata agggtttgga
+    93001 gatttctcaa tctaaaaaat caaatacgat tcaacagcgg gagagatcag cattgatagg
+    93061 acatggagat gtgggctcag aagggagtgg aatagagtca tttgatagct tagatgccgc
+    93121 cactagagag aggttcactt gcttaccttg ccgggtcacc aggagaggtt gtgcagaaca
+    93181 agaccaataa cccgatctct gctaaggagc aatacggatc ggataagaga ggatccatct
+    93241 tctcggggga gtctaaaaga agaccaccac ttccatacgg aaaagtgtag acgatgatcc
+    93301 gggacttttc gatctttttt ctgttaccta actcttcaat cactctttct tagggggcga
+    93361 caggattcga acctgcagtc ttcaggtcat gagcctgatg agttttcttc ctctacccag
+    93421 cggtcttccc cttctctttc tttcacaatt cccacctagg tcccccgctt tgttagcttg
+    93481 gccgggatag aagctgcgct tagtaagcgc cccttgtctt gtataggttg gagctatgaa
+    93541 gcttagcctt cttattagaa aaaggtatga aatgactcga ctaaaagagg aactttcctt
+    93601 tgtgaacatt tacttgcctt tgcctttcct tcccgcctta agaaaaagaa agcccggatc
+    93661 cggataagca gctatcggct tcatgccatc ttcattcatt tgtccaattc cgggcactgt
+    93721 aagaacctat tctctgattc aatcaaatat cccaaaccaa aggcctatga aagcccgctc
+    93781 aatagcaaga aggctttttc aacgattttc atgcttcctt cctcgattga tgaggccgtc
+    93841 ggggtgcctt ttgtgctttg cttgcctgtt agttttgcgt atcagttact agctttcctt
+    93901 ccctagcttg aagttcagtt cttcctttcc ttgggtcatt agtggcatta gtttgaaaat
+    93961 gaatatcaga tcatctgtcc aaattgatgg aggaagattc tttcacagcg cattcaattc
+    94021 ctaactattg tgacgtagcc cgatgtagtg tgactcgacc tttcttcttc ttttttctgg
+    94081 agagcttcaa tttagctgtc atccacggag aagcttcttg ggctctagcc tcatgtctca
+    94141 tattctttca tattctttct cgttggatat ggtatcaact tcaagactgc agacatgaac
+    94201 tcaagatccc accaatccct tcttcttgtt atatgcgacg acctaccgaa gacaagagat
+    94261 gattagctcc tttcttattg gattggagaa gatggctcgc tcacttcctt tgtgcacctc
+    94321 ccgctatcga tctcatttct tcccttctaa aagaaactga agaaagaagt agagtcgcct
+    94381 aggaagtaaa gaaagttgtg ttggttcttt ccctggttcc ttgttcccct gttgccctaa
+    94441 gagaaaggct gttgtgattc tacggtttag agcatttcca agataggcat tgttagcctt
+    94501 gcgcttgaac ctacaaggag gaagagatag caaagtctta tcttataggg ggcctcgaaa
+    94561 tcagaataag ttgaggggga gggaattcaa tgaatctgaa gcatcagata gcaacaccac
+    94621 aggataatcc attcctactt tagaaaaagg gaaggcgtga gggtaaggta aagtagaata
+    94681 ctactcaaaa agctggtaaa agaccttggt tgatcgggta tcgaaggaat ccctttagtt
+    94741 tgaaactaat acccccttgg aaattgattt gatctggcta cgtatctcac ttcctcagca
+    94801 accggtaaaa agtagattca tcaagatagg aaggaaagac gcagtgctcc tgtggtgaaa
+    94861 ctgcaggata aagataggaa aagtggtgct tgtcttgaag gtgaggaagg gaagaggaag
+    94921 tgaatttgct ttgggtgtgg gttaagctct tccttatctt aggacgaatc agcagagaag
+    94981 ggccttcgct tcgaactcgg atgtttagga agaaagctgt ccacttgaag ctgaaacctt
+    95041 agacttcctc tcgggctatc tatgaattag aaagctaacc cttagatccg ggaagaggga
+    95101 actggcagta agggtattac acttcaagaa aagtggaact cactcgaata gagagagtct
+    95161 aggctcactc acagacccag ctgtctttgc tttgttcaca acgtcagccc gcaagaactc
+    95221 accctcgtgt gaaagatagc aagatgtggt tgatggaagt aggtatgtgt cataagttca
+    95281 ggcacagtgt cttctaggaa gaaactttgt gtaaggatct tttacagtaa agcaaggtat
+    95341 tgatcaaagt gacagtggtg agggtaacct tgtgaaaaaa aaaatggtta ggtgtgccct
+    95401 ttcctatggt gtctcttctg tgttagctag tataagtctt agtgcttttg ttttagtaag
+    95461 gtcaatagga gttagctggt agggcaatct ctcgtgcaat caggaataat ccttgaacct
+    95521 tccaccccgg agtttcttct ttcttcaaag ctttggattg aaaacctttg cttagcctta
+    95581 taaagtagcc aattttccaa ctataattag aatgaatgtg aaaagaagat gaaagaatcg
+    95641 tgtgatacta tagctagcta cgatccccta caagaggaat cactctactt accctttggc
+    95701 gggcttcgag gaccctactg actaattctt tatgcatgga tgcaggaaga gcctacgagt
+    95761 tcatttgtaa caatatgaaa gaagaacctt gcgtgcccga tcatgatata ggtacagctt
+    95821 ccagccagaa aggtgattgt gctagaaaga tggcagtgaa gatcatggat aaagagctag
+    95881 tcaagcagca tcctgatgat atccacgtac caactactgg cactgctagt gcttttgtgg
+    95941 attaacctac ttcccagggc cggcggtcag agagaaaaga tctctatgaa aagcaggaga
+    96001 cgggaaaata ggctcgctcg aattcgaatg ggttttaaga gttcccggta atcaataatg
+    96061 gcaatgcttt ccttagaaat ccaataagac aaaagggcgt aacggtaccc tacgaaatcg
+    96121 gcagatgatt gctttgattg gcctgctgta tcagtttatt cgactacttc ctccataccg
+    96181 tagtggactt ccttcctaga tagatgtagg agcagcggaa agggctgctt tagaagcttc
+    96241 aatagttccc tcatcaacgg atggtttagc cgccaggtga tagggatgtc aaatagaaaa
+    96301 ggcaagcagg cgtagaaggc cacgttcgat agctgaaaag tcaagctgcg gtagagggtg
+    96361 taaactaaat agttacagct gggcttggca attatctagc cgaagtggaa ggttaaacgg
+    96421 gataaaatcc tggtcattct atagttgcag gatcggtaaa tgaagccgga aagagtatct
+    96481 agagataaaa gtccaggcgt cgaagccaat taaacacttg gacaagatag tgaagggtct
+    96541 ggggagaagt aaattctaga gttcattact cggatgggga ctacagtaaa gaaatctatt
+    96601 ctagaggact aaatgtcaac aaactttagc gagagtacca agatcggact caacttccta
+    96661 ttccaaaggc aactttacta gcgtggttat gaatcaactt acaattcttg tacaggagga
+    96721 actgttaatg aaagacaact atagtagtaa tagctgtgaa ttccacttct gagttagagt
+    96781 tcaatccttt ccatctgaaa gtacgtactt cttatagctt gagaagagga aggatgcatt
+    96841 cgtcagcata ggaaggccca atcaaatacc ccatcatagt ataagttttc cctcttgatt
+    96901 gacttgaaca aaaggaaaga aagaaagatg cacaaacgaa acaggaaact acgaaggtct
+    96961 caggagcaat atcaactacc tagaccgacc tgctataaga attccataaa gaccttgcga
+    97021 agatacctgt actgaatgac tggatcgtac tagatagata gtcttagata gacttgaata
+    97081 cgcaattcct gccaatctcg cacttgacac gcaataaaca gcagtctatc ttcacagaga
+    97141 aggatggcaa ggggtgaaag gcttagacct tatctgctct aggctagttt cattctattc
+    97201 ttgttcagca tatttagttt caaaagatag aggtccggag gcattaatta gttgatgagg
+    97261 agaaaggcca gccacgaaac actcacccaa tcaaccttca aaggcaagta aggactctct
+    97321 caaactgaaa attgctgttc acattgcaag atctgtctca agatgttttg aaaatgagaa
+    97381 acagcagcag aataaatagt atactttatc gagaaaaggg aggaagtatc ttctttctct
+    97441 gacgtagacg ggcattccta tcgagattac ttccttagga ccgggaggtc gaagaatcta
+    97501 tatgctacct tactttcagg atggctttac gaaagattcc atttagctga agtaggacga
+    97561 agcggtatga gataagaggt ctcaacgagc cttcagcatc tgaaacggga gctttaacct
+    97621 gcatatcata cgggagtctt agtggagagc gttctctggt tatcatccct agttaaattt
+    97681 gaagtgacgg aggaacccct attcgataaa gaagggaacg gtgcaaataa agctatttcg
+    97741 agtcctctat caacaaccaa ccaaaccacc cgcttgtcgg gcttaaagcg aaactgagct
+    97801 tctcactaac aaaatcaatc gtaagcttca tgtccgagac tcggaaagag tggcgttaaa
+    97861 gagggcgtca gagcctctcg aagtctccga tctcacagat ggttaggctg caattccttc
+    97921 gcttgtgaag aagcttggat agttctcctt actcggacat tctattcatt cgaccaccgc
+    97981 ccatgcttaa agattgaagt tttgttttga cccggtaaaa aaaaagaacg ttgtaacgtc
+    98041 ggttaacgca gcctcacagg cagcctgaga agtttttgac ccctttacct atcttgggtg
+    98101 agctacatcc atacatagtc taagcctctg attgacagta ctagatacgg aatcttagtc
+    98161 ccaagagttc tgcggaagcg ggctaaaagg caatagttcg ctagggtggt tatcccaatt
+    98221 ctttgataag atcgctaccg aacagcccca aaagcctagt ctttagaagg aacgaacgga
+    98281 tttctttgtt tgttcgtgca atgaagaatc ttatctgaag aaggtaatac ggaaagaaaa
+    98341 aaactcaacc tgatgctact ggaatagaga agcaagcaaa gtgggtcggg ctggccaatt
+    98401 gaaaaaagcg gtatttcttc cgcgaagact aactaacggg gcgcactcaa tttctacctt
+    98461 ccccgagctt accaccaccc gtggtacaat agaatgaaaa cagagtaatg accaagggaa
+    98521 ttttctatta accaacggga gatcctcctt ttctcgctaa tctaggtctc caggttcccc
+    98581 agcacagact tcttgttagt ttaaactagt aaaatgggag tgaagctatg ctgttaaaca
+    98641 agacaaacca atcgcagatg tactatcatg cgaggaaggc cccccattat tgttgaaaaa
+    98701 ggactattcc caacacgtct ttcgtctaat cgggagaaaa aaggaaaaag gcgttattgc
+    98761 tgtgcttccc ttccctttac aggcaagctt taacttcctt cggggaaagg gaaagacctt
+    98821 gaaaactctt tccgtatagg ccagaaaaca gcttgcttag agaaagactg actctcctac
+    98881 ggacctggtg gaccttacac agtcgagtta ttgcatccct ctcacaaact atcaatttca
+    98941 taagagaaga aagatcgttt ttagaaaaga aagaacgttt tgattcgagg cggatccctt
+    99001 ttttctttgc ctttacgagt tgaaaagaaa gtggtaacag gtagtagctc tggtagagtg
+    99061 caggactgta aatccttctg tgagcggttc gaatccgcaa tcacttatag ttagtttcat
+    99121 cgatattttt gtggtgttca gtgtaccctg agtacaagat cgaagaggat cctcattccc
+    99181 tctttagtca atcccaatcg aaaaatcagc ggagccaatg gcttgatcat aatgctgcgg
+    99241 cggggtcaga gagccggcaa accgtttctg gttctagtag aaagggattt actgattcgg
+    99301 gttgaggatg acgagcaaaa gagtggagga aaagctgtta tatagttagt ctatttctat
+    99361 cttcctatct ttgtcatgtc agtcttattc ctatcttggt atctttgtca tgaggtatgc
+    99421 agcacactca accaaaatat atagaattgg atcatcaacg ggtgtgctaa agaaacgcct
+    99481 ctacgtaaac ggatgaacgg gatcatcata tagtgagctt cgtttgggat acttaaaccc
+    99541 acaaaaatta gctctttagc tgatatacga ccaccaacta tctggctatc gagaagatct
+    99601 aatatgcagg gaagtaattc aatagaataa cgcaaggttc gataaatcaa ttgatgcaaa
+    99661 agcagggaat tagaccactc catcctttat ttcacctagg agccatctta gcagcagggc
+    99721 tttcgactct cgaatgtcaa agaaggtcct ttttatcatc tgagctttct cacttgtgcc
+    99781 ctcagccctc aagacttcgc cagaccttga ttttcgattg aagataatcg tttcttcgct
+    99841 atcccgatac caaaatggga agatctccct tctgggttgg tagatgaatt gttctgggta
+    99901 acaggcggtg gacatcttcg ttgacagatc tgagttatcg acttggtcat atgccctaaa
+    99961 gttaccctaa gccctcgggg ctccatccga acctgtgact caaccttctc tttagcggag
+   100021 gagaatagtt tcgagtgagt gtgctcccga accaacgatt tgtatactct ttgccccagg
+   100081 taccaatacc catgtcttcc tgcttctttt gattctctta ctcttcaaaa ataaaagcaa
+   100141 agcacaccgc taaacaaaaa gctggtatta ccaaccttct cagaggttag gcgattcatt
+   100201 gagtcgatca gtcatctttt tcccaagaca actttattgt gtgtaggcgc acaaaggaag
+   100261 atcgtaaaat caggattact cgtccacttt catcgccttt tgttgatccg tgtcatcctt
+   100321 gctgcttcat ccaattattc tttattcctc ctctcacttg ggcgaaacac tagaaacggc
+   100381 cattaattta cggatcggtt atacaaggag catacctact aatacttcgg cgtgtgctgc
+   100441 attcgagtgg ttaacagcat acatctattc ccaaatcccc aacggagata gatagtatcc
+   100501 ttctgtgcta tcccctatat cctcgtagta gggttgaggg cagcgaagtc tcccgtttat
+   100561 cctgtctcct tcctcttggt tagtgcgtac tacttccaac gtatcttata tagagacatt
+   100621 ggaggagcct gatcttccat ctacgcagaa gagatgcttg agttcttcca tccgcgaagg
+   100681 cggtgtgctc aaataagtta gtttgttacg agactatcag gtccaccagg ataattcgag
+   100741 gtggatagcc ctgactgaca ggggactaag attaggaagc atccacaaga gcgaaagagc
+   100801 ccttttatgt ttgtttcgct aatcttatag attggaagtt tgcccagcac agagtagtag
+   100861 ggttatgctt tcgaaagtct tttcggagta taagggaggt ccatctttag agatctgaat
+   100921 gaaagttctg agcttaatgg attcccggaa ccccaggact ttagagagtc ccaaccctaa
+   100981 gaaggcttgc tcgggttgaa agatgtgatg cagcacagac agaggtgcga tagtaagtca
+   101041 attgatcagg agaggtggta gcttctggtc tctcaagctg acaatggtag tctagataga
+   101101 tataaagttc tatgctgttt gcttgctcaa ggctagaagc aggagtctgt tattgatagc
+   101161 tcccgttgcc attggctgtt ccgcccgtgg tctttcagat agacgttcaa aataaaattc
+   101221 ttttcatcct ctcctcctgg ctaccagtat atggagatgg tttgattctg agagttgagg
+   101281 aacaccaagc aagatttgat tgacatcatc tacggctttg cgccaatatg aattatgaat
+   101341 ataggttaag tgaaatctga caaccagcac actgcttgat ctcattagca gctccggcga
+   101401 agcagcacag accaaggtaa gcacaggata ggcagtatct ttatgaagtc tcgtaaggct
+   101461 cagatcttta atagaagtaa acttcaggtc tttgttctaa taaatagcca tcttcgtcct
+   101521 caatggcaag aaagaaaggc tgctaggagt aagtgggaag tgccagggta aagcccctga
+   101581 tggatcgatc aacggattct gcaatttcat ctctttcata agcgattaat tcttgtttag
+   101641 ccttagttgg gatttggact cagaagccga acacagttct tagatcgatt aaaagcctgc
+   101701 cgcttatcga cactcttttt ctaaggatct taggcagcgg ttgaagttac cagggatgga
+   101761 accataggtg ctttagcaac agcaactcga ctattagaac atgcgtacgt gaggagcaac
+   101821 tcgagtgaag aagcaagtta tggtgtattg tccaatgtgc cccgcttttc tccatgctga
+   101881 ggactattcg acttgtgagt cagattccga tttagtggag gaacttcctt tgaaggcacc
+   101941 cgagagccta tataagatta ccaatttcct cagctagatg atcaaagtct tggatgcagg
+   102001 atggctccaa tctgacagat gtggcaccaa aacctaaaga ttcctttact agggtttggt
+   102061 ggaatgatag cccaaagaag atttttggtg tagaaagtct caaattcagg atcttccgct
+   102121 gtacggattt cctgggaaac gaagtcatag gcacgtaggt tcagagccag accgatcact
+   102181 cacaatatcc ctgggcaagc caaagatttt gaccacatgc ttaaggaaca actccgcagc
+   102241 atcatggacg gtgcatgcat gttgcgctgc tatgaacaca gcatacttag aaaagcgatc
+   102301 caccactacc atcacggacc taatcttttt aggaatggaa cccacaaaca aggtcttgaa
+   102361 gagccttgtg ttacctaact aaagagctcc caaggccacc tgattcggag tagagcactc
+   102421 ctatttacac ttcgattccg cccactgatt cagatccagc taagccccgc aaccaaagag
+   102481 gagacacttg caccggggta atgctaccac acaggttttg aggcgaggcg ctgccctctt
+   102541 tcgggcaatc aactctttct tgtacctggc actagtcaaa ttgtatctcg gttcatggca
+   102601 tgccacaacg actctatcgg cttatagtca ttcctggcac tcggcgcatg tctttgatca
+   102661 accccggttc cgtcactcgt atctggttga accagtactt cctttccgcc gttcctaggg
+   102721 accacccact cccgttccat gagtgatccg agacagtacg cggagattaa tcggtctctt
+   102781 ttggttctca ttccaaaagt tgagcagcct gttagcataa aaatgcaatt acctccttct
+   102841 tatcgttctg ggtacgggcc tagtcctgag tgcgctgtct gtcttctcgg ctggcccaaa
+   102901 caacgggatt ggtcctgtgg gattagcacc aaaattcctc ggcatctcat cctcctgttg
+   102961 ctggtcattt cctaaaggat cgaaggttca gccgacgcct caaagtggat ttgttgtgcg
+   103021 aagggtgcga ccgggattta gagttagaat agcggggaag acttgacctc aggcagcgtg
+   103081 aaaccagttt atttcattcg atgtagtggt ccgattgcct agtggaagcc ctgaggcact
+   103141 tgattcttgc gtacgctcgg ctcctctacc aatcaccaca gcgcgagatg cgcaagagtg
+   103201 acccatctct ccacaccaac tggaccatac aatacactct ttataagaaa aaaagatttg
+   103261 gaaccgaaag ataggaaata aatttacttc cagagcggaa gggaatactt aaccacgcct
+   103321 cgagtcccag tctcctagtg gtgagatggt aaagggcaat gtccggttct ggccaagaga
+   103381 tgtctttgct ccgtccccct tcttgttggt tggcacaagc ccttgtagta gtaagtcttt
+   103441 cctccggccg gatcggatgt tagagtcaat tccattgtag gtttgaccat taagtgaatg
+   103501 caaatatagg tagaatttga atcaacgacc taagagtctt cttttgccta tcgtagttac
+   103561 cttgatgtga taaatggtcc aactttcttg aaagatatca gatctttaaa gtcctgctca
+   103621 tatacatact aatatgatcg ctaccaggtc atcccactac acaccatcaa gcaaccacta
+   103681 ctgaagctat caactcaatt ctctcgttcg aatggggaga tagataaaag aaacgaaact
+   103741 ctataaaata tcataagaga agaagatttt aattccagct taaataagta agacttgact
+   103801 ctttaaaaaa ttccgatcaa caacttggag ggatggctga gtggcttaag gcattggttt
+   103861 gctaaatcga catacaagaa gattgtatca tgggttcgaa tcccatttcc tccggcgcgg
+   103921 aagtgaaacg ggcgggcgaa atgagaagag cactacttag tgactaggag cggggagccc
+   103981 gttgcgcgtt tttttgtttg accggcctat cttcataagt aagctcccta tggccgtcca
+   104041 gtccctgggc ggctctcggt tcttgagcat gttgggagat tagtcgtcaa ttgaaagagc
+   104101 tgctctaaag cttgacgaag aagttttccc tattaattag attagtaaag ggcttttccc
+   104161 ttactagtca agtggtaagg tagggcgctc ttcgatgaag aaaagaagag acttttggaa
+   104221 aagtggttca gctcagctgg ttagagcaaa ggactgtaaa tccttgtgtc agtggttcga
+   104281 atccacaacc acttctattc tcggagctga ggtatatgaa gaatggcctt ttggtccttt
+   104341 ggacacgtag ccgagagcga gccggatttt taaaattcaa gtgaaagaag gaagaaggca
+   104401 aaaagccgta tagtgcagga ggcagatgaa aaaaaaaaag caattacagt gagacgcgag
+   104461 gtgtagcgca gtctggtcag cgcatctgtt ttgggtacag agggccatag gttcgaatcc
+   104521 tgtcaccttg attaatgtct ttttttttgg aacctttgtt ttggagtttt cccaaaccta
+   104581 taccttactc atcttactaa gaaaatgggg cttatgtttt tcagccgcat cgcactgctg
+   104641 cataaacaat agatccgctg ggctggatga gcaaccttct atctggcctc tgtactagta
+   104701 gtagagttgc ttgctacttt caatcataaa acgaaaattg agcaaggcaa gggagaaaga
+   104761 agttgtcccc tcctctctgg taacccgccg ccggtcatat agagcgtccg cccgccccag
+   104821 catatgtata aaaaagagga agaaagaaca accgttttac tttggcacat gaggtggcgg
+   104881 gtttggctag gtaacataat ggaaatgtat cggactgcaa atcctgtaat gacggttcga
+   104941 ctccgtcctt ggcctacacc ttcatgacca gaaataactg gaacaaaggg aaagcccact
+   105001 caactagtaa catacggaca caaacaacaa aactacttgt tacaaataac atcttgttgt
+   105061 tgcatcgaaa catataaatg tttactaatc tggctaacat tgaacttggt aaaattaaat
+   105121 ggttgaataa tttaaaaatt agattattcg ggaatacaca ttgaatgcga agattacgca
+   105181 tttcatttct ggtagtagat ccataatcaa aaaagtgttt gtgattgttc cagaagaaga
+   105241 aatgaaacaa aagatacggt agagctagga cagttattgt atgaggtcta cccaatgcta
+   105301 aatgcagagg cgcataatag atcgatatga acatcatgag ctgtcccgca ataaaacctg
+   105361 ttgttgctga tactttcttc tcggttcctt cttctccttc gtccataacc cgagctcgga
+   105421 gaaggaagag ataagagggc cctatggaga atgtggtcag aaatccataa tagagtccga
+   105481 ccacaacgac cgaattgatt atcttcatgc ataagtatac tagattacct agtataaaag
+   105541 attgaaaaac catcacaaac ctcccctttt tcttttctat ttctggatta ttatatgatg
+   105601 attttgcaac tttccatata tagaaataga aagagataga ctagaaacga catctcttat
+   105661 gtcaatgaca ccaaagggat attaaatgaa tggaattggg atatggatag aatataatga
+   105721 aataaataga gccgctttga ggttccctat gaaatgaggc atggaacgga gccactacga
+   105781 agaagttccg ggggttacga aggaaacttc gagttcatat tggtcatgtg ggttgagaac
+   105841 gggaattgaa ctctataaga tctaatctcc cgttgttcct cagtagctca gtggtagagc
+   105901 ggtcggctgt taactgattg gtcgtaggtt caaatcctac ttggggagat ttgttagtta
+   105961 tcgcttttct gacctaacgg cccctgtcct tctcctttgt ttctaaacta gctgaatcgt
+   106021 gtcaaaagtg gaaagttgat tgttggtttt gattcctcac tctcgtatag gtgaacttgg
+   106081 ttcgttaaat tcttaggaag aaaagaagga tcaactggga atgaatggga agactggagt
+   106141 agtatctctt cattagtcgg aaggaatttg tctccactag cctcattcga aaaagaacag
+   106201 aatccgaaca aacaagaaaa gggtttaggt aggatagtgc gatgtagtcc aatggctaaa
+   106261 gctctgccag cttcttgtag actgaactct ctttaggctc ctagtggttt tcgggtggga
+   106321 tggttgaaat tttattcgcc actagtggca acgaagttct tggtaatttt aaagggggaa
+   106381 ggcttgcaga ccactcattt cattcattca gtgcctgcgg tgaggcgcga cccacaacaa
+   106441 aaacaaaggg ggaaagcttg cttactgtag ccctctttta gtagactagg cttggtaagc
+   106501 gtaactattt taagtaggct cgagaagggt agggctcgca gcttcgccgg aagcgacgaa
+   106561 ggggggctgg ggaaggccga cgactacatg agggggaagc tgtcgtagaa gctttgtccc
+   106621 ttgctttaca gagattgtta tgaattgact aaatgactag gattctcccg gaacgctagc
+   106681 taagctaaga aatagtatta gttcccttca atcaaatcaa taagtttatt ttttttattg
+   106741 attcagggaa aaaacctcct tcttagaacc cattgagggg gcctcggccc gggaagggga
+   106801 gagtggccga gtggtcaaaa gcggcagact gtaaatctgt tgaagttttt ctacgtaggt
+   106861 tcgaatcctg cctctcccac ttgttgtagt tttagcttgt atatgtgaaa aaacgagatt
+   106921 tctttcatag aataactctt tcacataaga aagtggaggc aggcttgggg gtacgactaa
+   106981 aatgattcca catgaaaagg gaccggcaat ccccctcttt tttaatgaag aagcgggcta
+   107041 gtccccggga atgcccgtta atcaagcaag ttggggaaaa aatcttacct attagacgga
+   107101 acacgaagag gtgaactgcc ggaacaggag gacttaaagc tgaggaaaca ggaagggcag
+   107161 ctaagggaac ttgcttcggt aggagtgtgc tgaagttggc ttcaaacaaa gagaaagaaa
+   107221 gtagtgtgct gaaggaaaga agaaagagag aaggttaaga agaggatact ggcttcgtaa
+   107281 gtaaagtatc tgaagatgag atagatcgga gaggtgagtt accagcgaaa ctattcactc
+   107341 tgatgtccat ttcctgcctt ttctttgtgc cgatagagga tttaatatgt agattccttt
+   107401 ccttattccc tttctggtaa ccataacacg tccctgcttt tttctgtcgt tgactgctag
+   107461 agcaagaggg gacgaccttt cctaccttat gatagactga actgctacgg cggctttagc
+   107521 tctactatgg cagaaggaat cgccaacttt catttcatat agctgtaaaa gttagcaacc
+   107581 ccatttctaa gggaaagagc aatctttttt tgaaaagttt catctacagt caaaagaaag
+   107641 caaacctcgg cagcacagaa gagagctact actattattg catgcgtgct cacgtgtaag
+   107701 agtgtgctaa ataaggaaac ttcggtctaa tcctcgggtg tgctatcaca aggaactcgt
+   107761 ccggtgtagg attgtagatt acctgtccta ttcagcacac tcctccatta cagcacagag
+   107821 gattagctcg ggttaggaaa acaagtagct ggtcaaagct atcggaaaag cagaggagtg
+   107881 tgctgacttc aagcagtagc tagtcaacca acttctgctc aaccctagga acaaggaaag
+   107941 tatcagccga taactctgtt cacttggtgt gggattccta aaagtgtact attatcgcac
+   108001 gccaaagagt gtgctgaaaa gggaatagtt ggttaactac cgaagctaga aacaaatacc
+   108061 tgctaaaacc aagggaagca agagtaggaa ctacccaccc aggaatcagc attctaaact
+   108121 gcagcacaag cacaccagcg tgtatcaggg gtgtgctata aaagatcaca gctaaggttg
+   108181 agtccttacc ttcttattcc ttccgtagtt agcagaacat actagcctgg cttatccatt
+   108241 cctcgtattt gaggaaagta gggtttgaca tcccattctc ggatgcacga attatgcgtt
+   108301 taatcccacg caacaaactc ctctgtgctg ggttgtgatc ttaaaaagga gctttactgt
+   108361 atagtgttac agtaggaata cctactagat cgaggattcg gaacagtact atcctgttta
+   108421 tattgacatt cttctgttaa gatttcccgt ttgagaaacc ctttaagatc tgagtatctc
+   108481 ggaattgaaa ataggacttt atttgcatcc cttccatttc ataggaatac agctaaggat
+   108541 accatcagct cgcatactat ttacgggaaa cagaagtagg gcaaagagga aaagagcaac
+   108601 tccctctatt agaacaatat tctcgaatcg ttgccctttt attcatgaag aaagaaaaca
+   108661 catcggtcta tctaaagcaa caaaattcgt tctatctttc tttctctcct attaataaaa
+   108721 ctggacttct cctctgctcc tatcttagcg cgacggagaa gccagcactg agagaagacc
+   108781 tccctttcaa agtccatcgc ctgcacttga accggatacc tactatttat tttattgaac
+   108841 tagacgatca gcggactttt atatcatatc accaacaacg ggtttcgccc acagtacaga
+   108901 gatgatctca atcgcatcgg cataaaggtc cggattccaa ctttcaaaaa ggatttacgc
+   108961 gcgaattcaa ctatagacaa aggtccggat tccacttcaa aaggatttac gcgcgaattc
+   109021 aactatagac atatctcttt ggcgacccag ggacggggat tcatcaatcc cggggttcga
+   109081 gggagacttt agaaccccta tatatgagtc tcgaaaccat tatttataca tttctcgatg
+   109141 acagagatca tctttaagga gaataaacat tggtccttta tttaataacc ccgatattat
+   109201 caaaaccatg atttccttat ttctcgatgg cataggttcc agggaaggaa aacagtccgg
+   109261 tcctttttca ccccgaaatt cgagagtagc cgtataaacc aataccggca gaaccaccca
+   109321 aactcggctc aaagcaggaa caaagacatc tagaaagccc acatcacgtg gggacttggc
+   109381 cttggtggga cattcaaccg acatacacaa ccaaataata ataggtttta gccatgtatt
+   109441 ccgctacacc gccctattga tctacgcttt cgaagcaacc accggtacca ttttcacaca
+   109501 acagagccga cttacttatt acatttatag gacagggaac ccgtctcggt aggacttcca
+   109561 aatcctacct caagcaaccg acatctcttt acttacttga ccgctagcca ggggcttgaa
+   109621 accccttaca accttccttg cttgccgggc ttacatcgct atcatcgcta gccagggact
+   109681 ggaaatccct tacaatcaca tgcctgcgct cgtcgtgcct ttaatactta tttattaacc
+   109741 aaaccctcct tggccccaat tggtctaaga ctcaccaaca tcttacgatg agacatcggg
+   109801 ctacctggca agagtcaata aattggtgaa cctatgagag acgcttctaa ttcttttctt
+   109861 tctcctgaca ctcactatcc tacccgatcc tgctaacagg aaatttgatg aaataaggaa
+   109921 acttaggctg ttgttgaatc aacttagaat acaaccggta cccacgtcgc aactaaagca
+   109981 ctcgtagcat acgttccatc cacatccggc cagttgaagt gaagatcgag aaaatgttct
+   110041 tcaaagcttg ctaataacca aaaaagaatt ggactagtgt aagggacttc cttttcaagc
+   110101 tagcatttta gaccaatgca atcgagtgaa agtgcttcct atcatttgtc tgcttgaaac
+   110161 cttgcattga gtgtgcacct tgggaatact tgagtttgga ggtcttcaag cctttctatg
+   110221 ggaaaggttt ccaatacgta ttcttgccgt tgttcgagct taaaagcgga tgatgatcct
+   110281 aatcaagagt ttggtctagc ttttcgattc atttgagtct gtcctcgcaa agatatgaag
+   110341 ggcgctttca ggacggatac ggatacaggt gacccctttt tcccctttgc tttctttatt
+   110401 acttctcgta cgtgcccagt ctcaacgtca cttccaaaac cgcctttccc agatctatga
+   110461 aaggtgaaag gttgaaggtc cacaaaaggt aacataaatg aacaagttta ttttgtattc
+   110521 gagctgctga gggccggagc tcgtatggat acattacttc attgttctat gaggccttgc
+   110581 acaaagccca tactcggata aggatcgcga agtgcaaaga tccggtcttt gacaacccaa
+   110641 ccgtcgtaag gacaacaaaa cctatactta tgtgtgttcg acactatagg cgagaccagt
+   110701 tgtcagatga tcgccgagtg gcgttctgct tgattaggcg aatgcgagtg aggtatctaa
+   110761 tcccgttcgc taggagggaa actcaggaag agttcgggca gaaatagatg gtgaaaagcc
+   110821 ctttatatta tataataaag cgcgctcctt gcatgactcc atatcattca ttattaaagc
+   110881 gaaagcacaa cgtgtcaccc taaccttcaa atcgataaac gggaatgcgt tacctaataa
+   110941 taatgagcaa tgctgctaag agcaaatggt tcctaacctc ataaaaaaag agatagtcat
+   111001 atccgactat ccggcaacac gcttggataa gtaaggcgat gaaccgtacc tttatctcta
+   111061 ggcgcctatg atgtcttatt atgtagatca tgtcttgctt gagagttcca aacctcgccc
+   111121 ttctaaaaaa gtacaaaccc acgtctaggg atatacgagc gccattcttc agatggatcg
+   111181 ctcttcgata gttatttatt gtagccggga taagcagggg ttcctccttc cgtcctagtt
+   111241 tttttttagt acaagtagct aagggggctg ggttgcgtct ttcttgtgaa agaatacttc
+   111301 tgcgcacctg ctgaaaggaa gtcaaacgga gattaccggt tccggaggga ccaaccattg
+   111361 tacttatagg gtcaggtcta tcttgccctt ccaaatcacc catgtgtagc tccaccgcac
+   111421 gagtcagacc ttgtacgggc ttttccacag gcttgaggca gactctttcc ttattgggct
+   111481 tcaatccgtt tgtctttgcc tcctctactg agtcagtgac agaagtgcag cagccaataa
+   111541 tacgtatata agaagaggac tgcttacggg atcaaactat caatctcata agagaagaaa
+   111601 tctctatgcc ccctttttct tggttttctc ccatgctttt gttggtcaac aaccaaccac
+   111661 aactttctat agttcttcac tactcctaga ggcttgacgg agtgaagctg tctggaggga
+   111721 atcattttgt tgaaatcaat taatccaata tgcgacgaat ctttttgttt gatgaaaata
+   111781 gtcttaattc aagttccacc attgatacat cttctgcttc caccattgat acatcttttg
+   111841 cgagtcaatg cactaacttt tctagtggtc aagcgtccgg tactcaggat actcatgctg
+   111901 gtatttttga ggattgtccg ggccttaatc ctaacgatga gcgtgtagta gagctgcaat
+   111961 gtgagatacg cgagaagtgt gaggcattaa cgcaagatcc cgaaatgggc ttgattttgg
+   112021 gcgaagcttt acatgcggaa agcgacaatg tccctttttt gcagtccatt gctgatgatt
+   112081 taacccaaaa cggagtatcc ggggaagcct ttcaagaagc tctgaatata gtgggacagg
+   112141 cggcggcctc cccactggac caatttgaga ttgtcccatt gattcctatg catatcggaa
+   112201 acttctattt ctcattcaca aatccatctt tgttcatgct gctaactctg agttttttcc
+   112261 tacttctgat tcattttgtt actaaaaagg gaggaggaaa cttagtccca aatgcttggc
+   112321 aatccttggt agagcttctt tatgatttcg tgctgaacct ggtaaaggaa caaataggtg
+   112381 gtctttccgg gaatgtgaaa caaatgtttt tcccttgcat cttggtcact tttctttttt
+   112441 tgttattttg taatcttcag ggtatgatac cttatagctt cacagtgaca agtcattttc
+   112501 tcattacttt ggctctctca ttttcgattt ttattggcat tactatagtg ggatttcaaa
+   112561 gacatgggct tcattttttc agctttttat tacccgcagg agtcccactg ccgttagcac
+   112621 cttttttagt actccttgag ctaatttctt attgttttcg cgcattaagc ttaggaatac
+   112681 gtttatttgc taatatgatg gccggtcata gtttagtaaa gattttaagt gggttcgctt
+   112741 ggactatgct atgtatgaat gatattttct attttatagg ggctcttggt cctttattta
+   112801 tagttcttgc attaaccggt ctggaattag gtgtagctat attacaagct tatgttttta
+   112861 cgatcttaat ctgtatttac ttgaatgatg ctataaatct ccattaaagt tcttctttct
+   112921 tttatttata tttataattg aacaaaagcg agggatggat gtctgagcgg ttgaaagagt
+   112981 cggtcttgaa aaccgaagta tttctaggaa taccgggggt tcgaatccct ctccatccgc
+   113041 gaagtcataa gttctctctt gccgcctgat aagaacgaat cggatcgact cgactgatat
+   113101 gatagatgga atgggtacct tgtgttatga ttttgttagg aactttgtct ccctttcgtt
+   113161 atcttctctt tttttcttgg tcggcaaggg ttagggcctt tctcgctggg cgagcgcatc
+   113221 cgattagtcc tttcctaagc cacttcccgt tcagttgctg aaaaaaaaat ggggataagc
+   113281 tatgcgacga cagagaaatc tgaatctact ttttgccagt tccacgaatt ctgcagataa
+   113341 cgacgttctg ccaggagtgc cagttctgga catgtttcct aatatatcag tagacaccat
+   113401 aattaaccac cccttctact gttcgggtac agtagctctc gcagaagaat tggtggatcc
+   113461 tattattatg gagaggtgga tagataggac gactagttgc tcgatcagga ccttagcttt
+   113521 attgcgagcc tagaagtctc tcttttttcg gaacagcctt taagtaaaat ccaacctaat
+   113581 atcattcaga gtaaaaggac tacggaacca aagagaacca cacttgcttc gctaggaatg
+   113641 aagcagcaca ccaatgggca aagggtatgg gcgacctatc tgctgcgcta gagaaagaaa
+   113701 gaaagtgctt cgggctgctt tgctgcgcta gaggaaggag actgataagg agaggagtga
+   113761 tccaagctca actcgatact ccactcatct ttctcgattc tattgacata atttggtttc
+   113821 ttctatatta gtgaaatcat tacatgacag tggatcttgc ttggtacttg tccgagggga
+   113881 gatccatact caactcatct ttctcaattc tactgacatg attacttgga attgattgtc
+   113941 gtgaactcaa tctctcatat aggcatagtt gaatatcgcg aagcaaaaac ccaaacttga
+   114001 atatcgcgaa gcaaaaggct acatcagacg atgtacaagt ttttcaaaaa tgatttgatc
+   114061 gattggtgtg tttcgatctt gttgattcac taggtaaatc acagcctcat ccttctatat
+   114121 tacctgttga tcgatcaaaa aaagcacctt acctcggatc aagaaaaaag aagaaaccga
+   114181 atctcggaaa acgacaaatc acacagttga acaaggtggg agtagctcgt gtgatctagt
+   114241 cgaaggtcca taggaaactg ataaggagag gggtcttcca tctcactatt ctagattcta
+   114301 ccgaattagt tggaattgaa ttcgtccaag attaaatgcc agttgaatgt cgactagtgt
+   114361 tgaaggtcca agctgtactc gactcataat tttgcttttc tataccttac tcgtgaaatc
+   114421 gatgatgatt acatgacagt ttaatgtcgc cttattgctt gatccttatc ttgtatccaa
+   114481 accatactca actcagaatc tactgacgat cgaccaccaa ctcatgatta caaaaaccct
+   114541 ctcttcgaaa acccaataat cataaaccaa ctcgtcgact ttggttgatc cttcacttgg
+   114601 ccaaaggtcc tagccgtact tccctccact cataatcacc aactctatca aatttttaga
+   114661 attgaaatgt ttgactatta caaaagtaca ttttccatag agtgcaggaa ccccatcccc
+   114721 tcacccctca tcatctagca cacccttggg gttcacatac ggttttaaaa agcgaagcaa
+   114781 ctcttcatac tcatggcgaa gcacgcaaca agccgaaccc tcaatcccag tgaggactac
+   114841 tccgttgggg gttacttcgc aaggacgccg catgtgcagt tcctcagtct tccttctcct
+   114901 ccatatgaag atttatccca tacggtgcta gcgaagcaag tcttccgagt tcatactgca
+   114961 tctcgaacga aggtcactca cagttccgaa ggtcactacg gtttccgagt cttattgttc
+   115021 ttcttcttcc acgtatgaca gataccctca accctcaggc ccacgggggg ttgctacgcc
+   115081 cagggctact ccgttggccc tgaagggtta cttcgttagg gctacttgga ggagttgacc
+   115141 aacccgaaca tggtgccttg atcttcagct ttccaataag aagatcattg atgcagcaca
+   115201 ctgagctaaa ttcttaaact atctttaccg gtgtgctgcc tcgcagagat tctcaaccta
+   115261 gtccgcgcat actctttccg tttctgtctt acagtctttc ttgttcttct tttccctata
+   115321 gttagaagat gagcccctgg ggtgccctgc ccttctttta cggctgatgg acattcaata
+   115381 gatttatctg ctgttgcctg atctacgacc ccctcaatct gtcgaagcaa atgctgcaca
+   115441 gagggagaag ttgcctgacc gggctattcc cttgttatct gttatccaac tggttgaata
+   115501 gtaatctgtg ctggatccct ctgaagctgg tcattccttg tttccagtta atagtggagt
+   115561 cttacttact tgaattccta ttactgttag ctattagcac ttgcttgtta ggtgaggact
+   115621 ctcttatctg gttaacctta gtggtagcac actccaagtc tagacaatct tcctcccaag
+   115681 aacctctcga taccagaatg gctaccaggg cgctactaga ccaactcaga tcagatcgtc
+   115741 ggcataatta attcttttta cttgcccagt agtggtctct gaagtttact actagtggta
+   115801 gtgcagaaga cctctttacc tacccggcta ctgacaggaa aggatgattt tagattcttt
+   115861 gcttgcccat acgccgatcc tgttagctga ctggtactac gaagatctat gccattcaaa
+   115921 ggagagatat gaatacacca actacagaca agaaagagac tctaagtcca acccatgtcg
+   115981 tgggcaggga gagacaggca ggcagctaac acagatatca gatattagac ttggcaacag
+   116041 acgctttgat atgtgttaca cagacgccct gtagacattg aatcggctca ctccgacttc
+   116101 gattgctaaa atgttgacgt gaatagtgaa tacgcttgct ggttgtgata tgttccctac
+   116161 tatactacta ttgaagacct tgacttttga tcttccttct aggagaattc atcaaacctt
+   116221 gcttatcctt ctgatgagca ttcctctaac taagttagac tgcctcttac gagcaggtaa
+   116281 cctaccttct aaaccatgcc cactatactt tcttctaagc cagcttttaa ctctttgttt
+   116341 tcgtatcatc tcattggact tatttcaaac aaactcgtta cactcgcacc tgattacacg
+   116401 ggaacgaaga aaactacatg gggagcacgc ttacacctgc aagaactaag ggtccagacc
+   116461 aagcacagac agatagaacc agacatcaaa aatcttcctt tgcagcctat cagatatgga
+   116521 agttttcgac ctgtctttca ctggattgaa aagccttgta tgttgatagg actatgtggg
+   116581 ctgcattctg aagtttcttt catcgcaaac tggccatggg gtaaaccctc tgagataggg
+   116641 ggctgctcaa tcccatgtat gttgacaggg agagggagcg aactttctta tcatatcaga
+   116701 gcaagccgaa gacctcttcc gggaaatcgg ttccacttcc aatctttttt ttgaagaaag
+   116761 aaaagtagca agagaatggt tcaagttcgg atcgagtaac ggtgattgaa gggatgagat
+   116821 cggaagctcc tgattcaaat agttaactgg gctgtggttg tcgagtctgc tttccctctc
+   116881 tatcaaagcc cttgcacctg ccttctcagt tcctcgtgct tagcgggact ccttttctaa
+   116941 ttagaaggag acagattagg tcaagttgct ccaggtatga aatcaatccc atgttggtcg
+   117001 taagagtggc aagccatgag gtaggtctta tggaaataaa taagcatctt cctcaagcaa
+   117061 ctccttcatc tgtggcggct gctcctccaa gacaactgcc cagcataact aaactcttat
+   117121 ctttgacttg actctctttt gttctatgcc ccttaggtag gagtcagatc tgttttgtag
+   117181 cttgtttcac ccttctttgt agcttgccgt ttggtgtgtt catccccgcc ctccctgaac
+   117241 ggagctgttg ctaaagcata actaatatct tagctgtttg gaacgtatag ctacccggtt
+   117301 cttcctttct ttagcggagg cgtaggtaac atatctatct atgagttgct gagaacgtct
+   117361 ccaaagcata aggagttgtt cttcaatttc aaccaagaag cacaggttcc cgcaaccctg
+   117421 acgccagaag agttgcaaga cgtggtctgg tcgaacagat tacgaccgct gacctttaca
+   117481 gatcaattat tttccatctt gaaagaacag ccggaaggta tgaagctcta gtccttcttt
+   117541 ctagccgtaa atcaagaact cgaaaggttt attactttgt ttaatcgctt gactagcttc
+   117601 gaaagagcaa tctcctcttc tagtcttacc acaaatcaat gctttcaaga aagtgctcag
+   117661 ttcctgacaa catggttccc ggtcgcgcat ccccatggag gtggatattt cacacattgg
+   117721 ggtcatcagc cgtcgaacac agtcaagtta gcaaagattc gtacgaagga aaagaagaaa
+   117781 gatagatggt ttctaatggg aaggtagcag ctgtcatcat tatgatgatt caatccatat
+   117841 taatccctat ttcagaacta tgtattagtt agacgacgtc aacggaacaa gaactccttc
+   117901 ctttacaggc aagcggggta gctgctgtct cgtttaggaa cttgagttag tcgactaaga
+   117961 ggtagaaagt ggaaagaagc ggcctaaagg attagcgacg acccctctcg gcttaaagta
+   118021 aaggaagcgg cttgtcccat ccaatgccct acttttaagt actcggtatg ttcactcaat
+   118081 cagaagaagc tttcggaagt tcattggcct ctgcggagca tgaaaaggcc tgttttgaag
+   118141 ctccggggac ttattagccc gtcagcctta cttcttagga agaagagaaa tgtgcttgac
+   118201 ttcttcatct tataagatga cggcaacatc ctagataaga tctacgattc ctgcttatga
+   118261 atcatggcaa accggtggtg aaaacagtat tacagagtag gttggttaaa ttaaagcggg
+   118321 ttactttgat cttatcgtct agccgggcat gagagcccaa taaaaaagga ccgacgacga
+   118381 tccgagaagg acgaatcaat aaatcgactt cctttagagc agacagacta cgcgctaaca
+   118441 gctttagaaa ctttgctata ctaacaaagc aagcatagca aagtcaaagt ttgagttaga
+   118501 ccgggtatga ggcaaagcca aaggcagaag tggtttatcc aggtcggttg aatccccgaa
+   118561 ataagcacca attgctaccg tccaatcgct atcattatca tggttggagc atagatagca
+   118621 aaggtatggt tagaggtcaa atacacatat ctaccaggta ggtgaaacga attccatacc
+   118681 catatcgggt aatgggcctc caattacttc agcctaaaac gaagtctgag gtcgggcctt
+   118741 cctggacctg ccttaaaccc tccatcttcc tgtcaacggg actcctctct ctctgaggag
+   118801 gtccgtagag tctcactagg gatctccgtt ctcagtgagc gagaatcggt atggtcagag
+   118861 tactgaggcc tggtcaaagc atctcagcct ttcttttgta gtaagccgct cgtatcgctc
+   118921 ctctagttta gttaattaag ctagcttagt ttctcgattg aaagacaggc ttacctcccc
+   118981 ttcatccctt gcttctttga aagtgctttc ctgagagtgt cggcagagtg cattggtcac
+   119041 agacgtccgc acaccaaaag cctcgtatgc agatcctagt gccaccatct ctgctgaaga
+   119101 tgatgccttc tccatggcac ctgggaacta gtaccatcat ctcctcatca tcagagagtt
+   119161 gtgggtctta tcgggttaag gggatccctg atggcagtgt tgagcggtac aaggctcgcc
+   119221 tcgttgccca tcagcggcct gggagagagt cttttgatag attaagccct gtggttaagc
+   119281 ctatcactat tcgtctgaga gtcttactct ataccatggc tggactcttc gtcaattgga
+   119341 tgtctttcta cagggtcact taaccgaaaa gagtcatgtc tcaacctccc ggtcgatact
+   119401 cttcttctct ttgcaggttt cgaaaagcta tggtcttaaa caggctcctc ctggtatagt
+   119461 gatctgatag ctgctcccag tccgacacct cttcatttat catcagaatg gaatggaatg
+   119521 aaagccaagc atctactagc cgcctagcca gtaaggggga acgctcctgc ttagtacaac
+   119581 cagaaagaaa gaatgcagag taaggccggg aataggcttc gtaagaggac tgtttgaggt
+   119641 actctggaag gacagttaca cccatgccag cggaagactt aggataagcg gtcactacca
+   119701 tcttattact atgggctaag cagatactat caccttaata gcagcaaaga gcaatagtcc
+   119761 ttggactcga ggaaggatag aacagaagtc gtacgtacaa ttaagatgaa tcgatgatta
+   119821 ttccccttcg atgaactata tcttacctgc ggcttatagc ctatctaggt gaccgactac
+   119881 tgagctatct accttctctt tgctcttaga agttcttctt gttataacat taatggagaa
+   119941 gacaagttat tgtacgttag ttgaaatata gatagaaata caaacctttt cctgttctgt
+   120001 cctaggactt tctaacattt gaagcttacc gggatttgct cttccctacc tatgataaag
+   120061 gctgagaggg gacttcagca gtataatttc attccgcttt ggacacgcaa caaaaagtga
+   120121 gggaggggca aatatttcac cgaaaggtgg gagggggggc caatatcctg cagcccagct
+   120181 ggatcttccc atcattgatc gactaaagag aaagtcggcg gatatattgg ttcgaatcca
+   120241 attaatgatt acaatttaga aggacttcat tcaatcgatg tttaggatgg aacagactag
+   120301 tcgttcagta gtcttacaga gcggttactc aatggtacag tacagacgtc tttgatcctt
+   120361 cttggtacgg gagttgaggt tgaataagtc gttcagatag ccatacagac agacagaatc
+   120421 taatcttttg tgaaagaaaa aaggggatag ggagctggca atgtcggaga tcctacctca
+   120481 gaatctttct atttagaagt tagggataga aggtgagtac caactcgaaa agtatactca
+   120541 catccccatg tctttagcca accgtcttga ttgaatagat ttcctgatgt ctagttttcc
+   120601 tactgtggaa atcctattta gagtagactc acttcaatga aagctattac ttagtgggtc
+   120661 aaaaagattt actttcaaac gtcagcatct tatcctatta gggaaatagg ctaaactatt
+   120721 ggtgtgtaac ggaagaaggg gaagaaaata aatgatccta gctgtcgaat gcagcttgat
+   120781 cttaatcaga ggaatgaagg tctttgaatc gcttatctct cagattgaac tgaccgggga
+   120841 gtacctgagc tctttctctg tttaaatctg tattccttag ttctgttatt agctgttagg
+   120901 tgggggaagt ataattccct aaggtaagtt cttctgatct catatgtatg tatactcagg
+   120961 ttctactggg attctcgaaa gagctccttt tgttcttctc tttggtttgg agtgagagga
+   121021 agtatcggaa ggtgccttgt tcagcacact cctctgcttt gctttgcttt tccgagacca
+   121081 gctatttgcg atcgaagttg aatagttcct atgaagatgc gtaatcccat tccgttgtcg
+   121141 tccttcttct tcttgcattt attttaattg taaactgagc ttgtcgatca aactgtggct
+   121201 taggtcggac cgttccctaa attattcact tcctcacaac cgagcccttc gagtctttgt
+   121261 cctgaaacag ttccttcttc gcatctctca agtgagaagg atttggtctc atcttcttcc
+   121321 ttcttccttt tagtcgagac tcttcctctc cttttagtcg agactcttcc tctcctttta
+   121381 gtcgagactc ttcctggaaa gcctaaaccc tcactcttcc aagcggacat caatcccgga
+   121441 atagtgtaag taagaagaag accggttagg atcacactag tcctggctgg cctataactt
+   121501 ctcagaaaca aaaagaggct tgcaggtcta gtcgcatacc cctatcgtct tattgttcta
+   121561 ttcgttgcag tggtaaactc attgaatact caaatatggt ttagtcacac ccttagtgta
+   121621 cattctgatt caatagcaaa cagagcattc ctttgcttcc tgattccatc cctgaaacaa
+   121681 ttctatcggc gaatcgagga ctctcccaca gctttgctta ttcgtgcaag atccagcatt
+   121741 ccttcactta tcatatagat aacgatcgac agtcagagtg tgcacaacga gagatgaagt
+   121801 acgctgcaga ttcccgatag cagcatctct ctatatgtga tgagaagtca gttaagtcag
+   121861 tctacgttcc ggaccagaaa gcattgaata cggctgtcag gctagcaact gattctccgg
+   121921 taagatagat cagtcagtcc gcccgcattc aataataaaa taaatggcat tgagctaagt
+   121981 caagcttttt ccaagtgttg taaagaacgg gctacaacga aagtaaaaaa gatgaaaact
+   122041 tccgtcactg cgcaccactt tgggctctgg tccactttcg ctccgtttac gcgaattggt
+   122101 tgtatctgcc cgctaggaac gctggcatct gttgacaaca gtcccgctaa aggccgctgt
+   122161 ctcttctcgg caaggtaagt tagttaggaa tcatagcagt cggtgaagct ttttctttgt
+   122221 tatgcacaga tggttgggcg cagctgctaa ggaaaaagtg ctaaagcttt agctgctttg
+   122281 attgaccgaa gagcctgact accagagcta gtaagctagc ggaagagctc agctcctact
+   122341 ataagctgat aaagagctac tatcaccaag agagggcgaa gaaactcatt aattcattga
+   122401 tgagggctcc ccattctctt tctaaatctg cagtccaatc cttcctaatt tcgaaactca
+   122461 tccttaagtt accaggggca aggtggacat gaatcaagag aataagctct ttaatcagaa
+   122521 tacatagttg aatttagtaa gaaaagaaat gatggttgac tcattcaagt gataaatcgg
+   122581 atcaagaaaa caccgcccaa ataagcttcg ccatgagtgg atcaggtcct ttaaggcgat
+   122641 cccgcgaagc cggtccccgg tgtctaagat cctatctgac tattgaggag agctgatctt
+   122701 cacgacgaac ttttgttgat agtgctttgc ttaacactaa gaagttcgat catccaacgt
+   122761 agagagaaac atcttgcggg cagttgttgt gaacaaagga atgaatcttt gggtttcctc
+   122821 tcaacaacag aattttgttc aggagtttca gggcaagaat gataagaaag aatgcctttt
+   122881 tatttaaaaa gagtgtgaaa tttgagttca tgagcattat cggctctaac agctttggct
+   122941 acccttttgt attgagtttc tatgagcttt agaaaccctg ggaagacagt gagaacctca
+   123001 ctttgacttt tgagaagata gacccaagta gcccttgaat gatcatccac tatggttcaa
+   123061 aaatatcgaa aaccttctgt tctgaggttt caactgaaaa agggcccaaa atgtcaatgt
+   123121 gaatcagatc aaaaggttca tcacaaatgt tattatgaga tttaaaaggt aagtgttttt
+   123181 gtttagccaa aggacagatt ttacaaatgg tctgattgac ctttttattc ttatgagtag
+   123241 ggagtcaatc tgaaaggata gcagttttag aagaagaagg atggcctagc ctgttatgcc
+   123301 aagtgtgaga atctgcaata acagaggcac taaaaaaagt agtacctgaa agagatggat
+   123361 cagaatccaa aacatagagg tttgatactt cacataaatt ttcattttta cgaattcgtc
+   123421 tttctaaatt caactattag catagcatta agtagtaaac attttacact aggagctgcg
+   123481 cccataccac acacatacat gcaaacataa caaatcaaac aaagacaaag aaaagctatt
+   123541 tctagtatac ttcttcactt cagataatcc aaccgagatg tgattgcgat gatgctacca
+   123601 gaaggaggtg atgtcttcgc gctacgatcg aaagatcaag aatcaaataa tgacttttcg
+   123661 aggaatcatg ttaacgaaac gaatccaaca ggcttgaccg catacctact cctgaaagga
+   123721 gtcaagctta ttacttttag ttactagtta tagttatccc gggtaagcat acttttgagt
+   123781 agtttcagtg tattcaactc ttatctatag aaagcgagcg gagtccagta gtgaaattac
+   123841 tttatttagt aaatgaatac agtgtctact aagaaaagtt agaggtaagg cgggaacagc
+   123901 ttccatcttt atctttatta agctggaaaa tagaaagccg gggctccagt ccagagattg
+   123961 gatcatctct acgttcattt ttaggaagta gggtacgtct aaggtaaggt ctgaagccga
+   124021 tgttcattgg ccttccgcgc ctatgatttc ttgaattcct cgcaggtcaa gcggctgaac
+   124081 tctttttatc tttttttttg tgtctatccg gtcttccact gttcttgcct tcttatctta
+   124141 agcagtcgct aggctaatgg gtatggtcca atcaatggaa cattcttctg aactgaagac
+   124201 ggaagcaagc gcatttagta ataaatttct tatacaggat ttctttcact attagattca
+   124261 gtattagaaa ggtctgggtg atcgcagtat attcggacct gtagttagct cagcttgggc
+   124321 agcaccccat ttttaggtac catcaacagc aagaccatac aaatgtcatc gcttattctt
+   124381 tcaacatttg cggagcgtct cactgaactg ggtcaaagaa ctaccttcct ggagctacct
+   124441 ccttacttag atcgttcgtt ccccaattca aactcgggca aagttgtcca ggtgggaaaa
+   124501 tatctaccac ctgaggtaaa acataaagct gtagacctgc ttcgtgagta caaagacgtc
+   124561 taatccttcg cctgtgagac atctacttag ttatcggtac tcggacaccc aatcccatcc
+   124621 ccgaaagaca gaaggaaaga caaagattac cattaggcga aagcaggtcc acaaggtgaa
+   124681 gactctctat cgtcagatcg ttggcagtct agttctgctt tctttggaat cgaagtttaa
+   124741 ggcattcaga catgggattt tttaagtgca ttcagcatca gtatctgctt tctttcctga
+   124801 tgtaattgca gagaagatgt agacaaaact tacttaattt aattcaaacc ctgtgcgaag
+   124861 gaactcagga atcacttccc cttcattgaa aacaacataa gaaggagtgt tctaaggcca
+   124921 atagacagaa agtgagtgct tgatgtagca tatctgctgt tcactatcaa ccataggcag
+   124981 actaagcaag gctcgatgga attgaggttc gatgtaattg acctgcctta tgttggactt
+   125041 cccaacttac ctagcggaag aaaagggcta ggacctataa atataaatat gtataggtca
+   125101 cttccaccaa caactcaagc tatttcttac tgggattctc cacaagtact agaaagtatg
+   125161 ggatctctac aacttgttcc acaaactcaa actatcggtc aaggaagcat acgcctctaa
+   125221 ctcggttcat tccctaggta cccattcttg ggttcttaca aggtaggctc gtgcccgggt
+   125281 tcactcgtga aagaaaggtt atagcggtaa agtaaagaaa atcctccctt cttgcttcct
+   125341 tagggtacgt cccctatcgc ctttcattcg gaaaagctcg agtcattatc cttcctgtac
+   125401 ttcaagtgag agctagagcg cttctttctc aattacatcg acccttcgca ccttggaata
+   125461 ttgaatagac acttttgaat cccccctttc gattagtagg ggattcctta gctgagcatc
+   125521 aatcccattc tttgcggata aagtacacta cccgcttgcc tgtaaccttc tgcttgcttg
+   125581 actacagtca ctcctattag gtcacgaaca tccttaactt aagagagcag gaagagttta
+   125641 ctatcatagc agcacataca gagtgccgga actacaaatg gactaatgct ttctctgctt
+   125701 tccctcttgg tttcgttcat ggaagatgcg gcatagatag aaagagagca ttccagacta
+   125761 ttctctagca tccgagtctg ggcatgaatg gaagactcct agctccctcc taccacactt
+   125821 aagtatctct tctttagcgg gcgcttcata atcaaaagat tgttccatac catatgaccc
+   125881 aggctttgtc acgagctgga ttcgaaccag cacttccgga tgagttacat ccgacgaatt
+   125941 cccacaacta tagtgatttc ccggttcctc attccattca tttcagccgg attgtacggc
+   126001 gaaactcgct aatagataga gaaattttct cagggaagaa gaacatctat ctccggtctt
+   126061 gtggaacttc tgtcgtcccg ttcattgtgc ttcctcggcc ctgctagcca tttgcttgct
+   126121 cgaacaactg gaagtccccc caccccattc tttgcctcat cctggggagc ggtttgttgc
+   126181 cattgaaatc ccttgttgaa ggggctggct taccaaacca acttaccaaa tcgactctcc
+   126241 ctctactctc tacctttctt ccctacaaac tcttaacctc aatccagggc taaggggact
+   126301 aacaactggc ttcccatcac taaccagcta acctccttag aaaaactcta tctcctgttt
+   126361 caagtaacct taatctaggg ctaggggaac tagggacaga tctaactccc attcctccaa
+   126421 tccttaacca actaccaatt aaaagattca aagctcactg agaacagatt tctatagtta
+   126481 taatttcaat tactgacctt tctcaccctc gctcaattca ttgataattt tggctcaatt
+   126541 cttccattag ttaaatggga aagaaaggct aagtaaggaa gtccgtagag tagcggagct
+   126601 aatttcaggg agaaataaat cactgaaaca cagtattcgc accttacttg attcattcct
+   126661 tccacccgaa gcccctgctg cctgcctggg atgagcgtcc tctgatctaa ataagagctc
+   126721 ctgtttgctt tctggcaatg agctagctta accctgattg cgacaggctt aatacagtca
+   126781 tgttggatct cctattcgtt ctgctaccag gaagagaaag aaagactgat tgcgacagct
+   126841 caaccggatg agacatctct tatttacaga actgtgccaa ccgtgccttc ctaccaactc
+   126901 cgccaacccc agacggggat attgatttag ccgcacctga acccgcaact gctgcctcta
+   126961 ctccgcctac ttcttgtgcc gacttcgact gccttatttc gggtgaagag taaggggtgg
+   127021 taggcttagt agggctcgaa cctacaatat aaccgttatg agcggtacgt ttcaaccaat
+   127081 taaactataa gcccctacgg atctctacat gcaatttgct cacttcggga agagcggacg
+   127141 caagcaaagg ggaggccttt gctctatctg cttcttcctc ttcccaggaa tgcccaatta
+   127201 gataaaataa tgattttaat attgtttata gatagataat cttatatatc tattatttat
+   127261 aataataaga aaataaagca agcggtcctt tcgcgactaa aactgccggt acgctttccg
+   127321 gctcgcaacg actcaaaacg ggcggggggc tctctatttg cttgcaatgc cggctgttcg
+   127381 cgtttagtta aaagtagaag tagagggcgc ccttttcccc acacttaaaa aaagtataaa
+   127441 agtctaaagt acataaaaag taagaaagaa aaacttagtt acgggaacat ccaacagaac
+   127501 cttgtgaata attgtcgatg aaaataacat agtacttcaa accttgaaca gaggagaggt
+   127561 aactggggcg ggtctcaata agtaagagtt catccgttct caaggaagta aagcttgaga
+   127621 acagagcgag ccttacagct tgaaaggaac ctgaaagatg gatcttgaaa ccttggggaa
+   127681 agtttcctac ggcgagctcg agcagaaggc gcagccgaag gcgcagccct acttctatcc
+   127741 aattccggca aggtaatcca attggaaaac cctgactgaa cgcgaggatg ttatgtcgac
+   127801 ttaatccgtg gttgaatatg ttagtatcta gattgagtat cataccattt tccaaagaaa
+   127861 gatagtatgc taaaatatta tgctaatatg gtcatttggt atgctcttat agggatagag
+   127921 cactatgtaa gaggtctcga cgagcctcca ccatactaag ctaaggtcta ctagctatag
+   127981 tagctagatg ggaatgtagc ctagcagcta gtagctctct tcttatagct cggtagttgg
+   128041 atctttcata attcctttat ggttagagca cactgggata gaagaataga tgagtttgta
+   128101 tctttcttcc tacttttttt ttatcggaaa acagagctag aacaaggaac aagcaacact
+   128161 agcacttaaa ataaggtagg cggctaaaac aattgggcat ttccttgatg tccggaaatg
+   128221 gtatatctga aaacagaaga aagactttct tttgaaccaa aactaaagat gatccgatcc
+   128281 gtgcaccgct tttcttctct accctttaga atccaaggaa aaggcaaaga gaaagaggtt
+   128341 gcagagggca cctaaccttc caaagacaca gcagctctag cctaacttcg gattcgaaac
+   128401 catacttccc tgacttcctc tcaacaagaa gttaaggctg aaagtcctaa tcttcttatc
+   128461 ccgatgcctg gagtagcggg tcttagttcg gattagtacc ttttgagcaa ggtaagttaa
+   128521 gtttcggcag tgaaagaaag taaccaggat ccgcttaacc agttatggat gaaggtaggt
+   128581 ctactccaat ccctcagcca ataaacaata aagtaggcac atttccctcg gcaaagaaag
+   128641 tagtccctcg ctaaagagta gacttgagta tcttcgaagt caaagccgag gaataagact
+   128701 cagttcgggt agagcaacca tttatatttt cattattctt acccgtttac ataaatgaaa
+   128761 aataaaagtg aattaccacg gaaaatatcc acctaaaaaa atttctgggg ttacttggca
+   128821 tagtcctcta aagaccctat ttagatactc tttcttccga gttcgagata aggaggaaga
+   128881 atggggcgac agataaaatt aaataaagag caaaaatgct tgactttcat tttattaaat
+   128941 ataatggtag gtagggcttc ttttaggtca taaagggaat agtactttta gctcgttcat
+   129001 ctaattctgc tggtctatag gagtggctcc cagggctatg tgatagcacc caaaatacga
+   129061 cggcctgatc aagagagcca gaagctgggt ccgcccgccg gagcttcttt caatttccgg
+   129121 ggggcttagc ttgaaccact cccaagtgcc aagaaccggc gaatgaaggc tccgtggcgg
+   129181 catcggagct gccaataagc taatccgttc ccagtgagct attacgcgct ctttcaaggg
+   129241 tggctgcttc taggcaaacc tccttggctg tctctgcacc cctacctcct ttataactga
+   129301 gcggccattt gggggcctta gctggtgatc cgggctgttt ccctctccta gagaaagaag
+   129361 ttctctagtc actaaagtgc gtttcactct cgttcctctc gtttcactcg ggttgctaaa
+   129421 gcacctctcc tctgccaagc tctctgtctt ggttaatggt tatccaaaag gatatttctc
+   129481 atgttaggcg gtgtgagaca gggggataaa ggatcccccg ttgttatttt gtattgcgga
+   129541 agatgtctta tcacggggtc ttaccgggct taatcacctg ggcattcttt cttcaattac
+   129601 cgagacctgt acatacaaag atctaggcag ctcatcttct tctaacgaag tcagatcttt
+   129661 ttccatacca taacatatat agaatcgatt ttcttttctg atcgctagcc tgccgggccg
+   129721 cccccgcgat caaactatca atctcataag agaagaaatc tctatgcccc ctttttcttg
+   129781 gttttctccc atgcttttgt tggtcaacaa ccaaccacaa ctttctatag ttcttcacta
+   129841 ctcctagaga cttgacggag tgaagctgtc tggagggaat cattttgttg aaatcaatta
+   129901 atctaatcat gcctcaactg gataaattca cttatttttc acaattcttc tggttatgcc
+   129961 ttttcttctt tactttctat attttcatat gcaatgatgg agatggagta cttgggatca
+   130021 gcagaattct aaaactacgg aaccaactgc tttcacaccg ggggaagacc atccggagca
+   130081 aggaccccaa cagtttggaa gatctcttga gaaagggttt tagcactggt gtatcctata
+   130141 tgtacgctag tttattcgaa gtatcccaat ggtgtaaggc cgtcgactta ttgggaaaaa
+   130201 ggaggaaaat cactttgatc tcttgtttcg gagaaataag tggctcacga ggaatggaaa
+   130261 gaaacatatt atataatata tcgaagtcct ctccttcaaa tactggaagg tggatcactt
+   130321 gtaggaattg taggaatgac ataatgctaa tccatgttgt acatggccaa ggaagcataa
+   130381 aatgattctt tcattctata gatacctctg gtaggtaaag cactcgactg tgctttattg
+   130441 aaagttccca tcgcgggggc gaggatactt gccttcgcgg ttcgactttc ttttcaggct
+   130501 tgactcattc gctagcttct cctccccttt agagctcttt atgatgccca ctgagtaaga
+   130561 ttcggggact tccggcgcag aagctcattc tgaaccgcgg gaaccttcgt ctcaatcttc
+   130621 ataatcaatt tatgccatct taaaaaaatt ggaatcttgt taggtacccg tacgacagct
+   130681 ctaataatga gctaacgtac tcatctttag agtggtttgt gcgcaggctc gagaagttgc
+   130741 tgcccgcttc agaagacctt agggtgcctt cgattcctgg acgactttgt tcgtcgtgta
+   130801 caggggacgg gaagagatgg ctgttcgctg tagcaaaatt caacgaaccg acgggcgtcc
+   130861 cggacatagc cttcaacccg caaaggttag ctttgttgct ggacaaccac tagggtacta
+   130921 ttcttcctgg cccctcttcg ccctatcaca tcacatggtg gtgtggtatg cggcagaaca
+   130981 tgtctatcct tcctccttct tctttcagtc gaaacttcct cctagtgagg tgttcgccta
+   131041 tccaggtatg gaagtattca atgaatacac tctgtaccat gcatgggtgg atgaagcttt
+   131101 atctggagta tcaaagatgg aattgtatgc taaggcttag tgccagtata gagcttgaag
+   131161 tctttatgga tgtgccgatc tttatccgta catattaccg acctgcgaac atggatgatt
+   131221 cgttcaggta cgggataata tttaggatgt acgatttggc tgtccagtta cagtggggca
+   131281 atgtcatgaa attgctacta gtaactaaga ctcaagtgga gaaaaaaaaa gcactcaatt
+   131341 tgaatgtaag taaaacacgg gatggaagag gaggcctgaa ccaacaggga tgaaacagta
+   131401 tagattcccc tgggcgaagc aagtcaccga ttagtaacct aagaaagagt tgtcaacggg
+   131461 cgagtgtcct gctataaggt aaactcctac tatttcagct tcttttcccg tggtcgaaag
+   131521 ctaaatatct caagatgata ttttccaaac tctcgacaag cagcaaatag aaaattaaaa
+   131581 gaagattttg aaggatctga tttcatctaa ggagtggcgg ttaacagaag gccatttcaa
+   131641 cttatgctct aaaaagatag aaagcattgt tgataaagcc ctctctttat ataaagaggg
+   131701 ctttgcgcct tcggataaag gaagattccg atattcgcat aggtattgaa gcggggatat
+   131761 gaacagctag cttcccctca aatgggagcc gtttaagtaa tcttaaaaga gtcctcgact
+   131821 gtattggaaa aacaaaaagt tcaatatggc gggagatcca taattttatt gagagctttc
+   131881 aatcaaatta gattgtaaag tagtagtcct gtgtaaaaaa aaaagctggt ggggcggggt
+   131941 ccaagcaagc gtaataaggg gagggggact aggggtggaa gggtcgtcga aggagatgca
+   132001 tttctggtac aagtggtatt ggacaagatc tcagggaatc atctctttca gatttctgcc
+   132061 tttctttccc atgacgacta ggaaaaggca aatcaaaaat tttacttcga attttggacc
+   132121 tcaacatcct gctgctcatg gtgtttcacg attagtattg gaaatgaacg gagaagtggt
+   132181 ggaacgtgcg gaaccacata ttggatcact ccagtgcggc acgaagccgc tgacgccgag
+   132241 tcggctccta tgccgctagc tatgccctgc ttggtccccc ggcacggtgg aggttccgta
+   132301 gcgggtcatg agcaccgggc taaggggcga agtcactcga ctgaaaagag aggggcggtt
+   132361 gagcaactca agcgaaccgc cctaccttac tacaacatag ggacagaggg ggagaaggtt
+   132421 gtgaaggtgg cctcgttatc cacacctccg gtcggatgaa tggaggaccg cccgacccgg
+   132481 gtttcatgag cgttggcggg tcctggagtg cctgtcaagg gcgctagcgc ataccccggg
+   132541 gtgatcatca tcacctgcac ctcacatctc ggcgtagtgg aacgtgtaac ccgcctgctg
+   132601 tctcattcaa ctacatttgt tactgtaatc tatagcctaa cagaaggcag cgtcgagggg
+   132661 ctttaggaac tcgactgaaa ggagaggaga taaattccca tacagccagc ggggaggatg
+   132721 gcactacagg caaagaccgt ctggcgaaaa cgccgcaggc gcgaagcgtg gtaggcctgc
+   132781 gccgggtgag catagggggg gaaagggatc ccggacggtg taagagccag gggaggccgg
+   132841 gtcatttgac ggaaatggaa ggcttttccc ctcttataga aagccctatg aagttaaggg
+   132901 aagtgaatga attcttggaa aaagaaggag cgagcctata tataaaatgt aagaaagtca
+   132961 attattcaat gaatagatga taaagtcaac cgtacgacag acagcgctgc ctacacgcga
+   133021 attagcttcc aaggtcgagc agtctcaatt tcactacagg atttgcgaat gaatgctggg
+   133081 ctgggccacc tcgaatggcg tgagcgcatg cggggagacc cgcacgtacg gtttttaggg
+   133141 ggatctggcc gaaagaccgg ccggcgccca cccgactaga gggactgaga aattaataga
+   133201 gtacaaaact tatcttcaag ctttacctta ttctgatcgt tcagagggcg atcgcggagt
+   133261 cactgaatga agtcctccgt ttctttcgga ggtgctgacc cgcagcgagg cagagatgac
+   133321 taagtgacat atggaatatg acgacaacaa cagcatgtcg tagaaggaga gaacaggtgg
+   133381 agctaacgac ccacgttgac taacgtatct acaactacat ccccgagcgg cagtcaaacg
+   133441 gaggcgtgaa tgcaagatgc cagcggaatg atcggccgga cagaggctag ggctgcttcc
+   133501 ttcccaccgc gtccttcctt gtgtatcgga gatataaagc gagtgcaccg gaaaagaacg
+   133561 ggaactgggt cgatctattg cgaagcatcc gaagcataac tgcacactca cacgatcttt
+   133621 gccgagagat aggagcattc ggtggaaccg gtgaactaca cttgcttctg gatagatgtg
+   133681 tgggacagag ggctcgtggt accttctgcc cacccttcct cctctgcttt gagaactgtg
+   133741 tgaacggaga gtgggcagaa gggaaggagg tcctcataca gagaaaatca tggaatgggt
+   133801 cgagatagat gacagcgcct ttttcctctc ttccttgccg ggagggcaat cttctcttat
+   133861 ggtcttcacc tcccgcccgg cctggaaatt gaatccagcc cccttctttc tgatccattc
+   133921 atttctgcaa gcccagagcg ttgcctccct tctattgcat aacctaaaaa gctataagca
+   133981 aagtaacaaa agcgcgctcc gcccggtgac taagaaagag gtttgcgcaa caattaaagt
+   134041 gataaggtcg agggaagtag ggctcctatt gaaaggcttt ccctccctca aaagaagact
+   134101 agctttcaat actagttctt acgttacgct gccatttttc caatattatt gaatagcatg
+   134161 gcctggggct aaagtaactc aagtgggaga gccgtgttat gggtgacctt attgcacggt
+   134221 tcagagagca cttgtgtatg tgatgcaagt gaacgtgtac gaaaaagctg tcgtaaagtt
+   134281 tcgtttttcg ttccgttttc gaccctatct atgtttctat gatggcccaa gaacacgctc
+   134341 attcttcagc tgtagagaaa cttttgaatt gcgaggtacc attacgagct caatatatac
+   134401 gagtgttatt ccgtgaaata actcgaattt caaatcattc acttgcttta actactcatg
+   134461 ctatggatgt gggagcatta actccgttcc tgtgggcttt tgaggagcgg gagaaattgt
+   134521 tggaattcta tgaaagagtc tcgggagcca ggatgcatgc cagtttcata cgaccaggtg
+   134581 gagtggcaca agatctgcct cttggcttat gtcgagatat tgattccttc acacaacaat
+   134641 ttgcttctcg tatcgatgaa ttagaagaga tgtcaaccgg caaccgtatc tggaaacaac
+   134701 gattagtgga tattggtact gtcactgcac agcaagcaaa ggattgggga ttcagtggtg
+   134761 taatgttaag aggtcgtgcg acatgaagac attgatagca atatggggga agttcccatc
+   134821 aggcaacaat ggttccgcct gactctactt aagcatgcat attatgtaag tgaagacttg
+   134881 gtgtgaagcc ttggagctta cgttagaaga gcaaaaggcc cggggctagg gtgagctggg
+   134941 gggggacagc gtaagtgagc gaatgtgtgt aagcccagtc aaagatgact gttctaagcg
+   135001 gggggagcca cccacctttg aatggtgttg gtcctacgga ccgtgaacgg atttcgcctc
+   135061 tggcctctgg gcacgtcgga accgcgtgag ttcaccgggg tggagcacgg tccgccaaaa
+   135121 ccggcataga ttaggtgcta ttgatggaac atggtaagcc tatctttctc catatggaag
+   135181 tgctgcgagc acttagagat gcgggtagag gaagcctcaa aaagcgaagg ccgagctgta
+   135241 ggtcacgtga cctgcaccga gttggtggct gactgggctt tttccttgat caaagcagat
+   135301 caactcgcct tctttcttgt tacccaaaac taaagttggt cgaatggttt ttttcctgcc
+   135361 ccggaacgtc gaatgaaata gggggccggg ttctctttct acaacccttt tgatatgata
+   135421 ggcctggcta cctttcccct atcccttatg attagggggc tgttaagccc aagaacaacc
+   135481 agtcttgtgg tggtacggaa ggaacggact ccgcgaacgt cccgcgcccc ggaaagaaag
+   135541 tctcaaccag aaccacattc cttttgcgtg cggatgtagc taagtgtctg actctattgg
+   135601 tcatagtttc ctgctgttgc ggctggtgct cgtttgcgcg cgcgtgaacc aactcaacaa
+   135661 agaaggaaag gatgcccggg gaggcatctg agaatgattc gagccgtatg aagggaaact
+   135721 ctcacgtaca gtttgttttt tttggggggg gcaggagccc gacagggtcc cccactgact
+   135781 tggcccgggc ctaagttaaa gtgaagtggt gggcctaccc atcccaacca ggggtatgct
+   135841 gggattcgcg aagagcagca ccttacgatg ttcatgacca atcggatctt gacgtaccag
+   135901 taggtaccag aggagatcgc tatgatcgtt actgtatccg tatcgaagag atgcgacaaa
+   135961 gtcttcggat cattgtgcaa tgtcttaatc aaatgcctag cggcatgatc aaagccgatg
+   136021 atcgtaagct atgtcctcca tcacgatgtc gaatgaaact atccatggaa tcgtgcgtcg
+   136081 tgtgaaacgt agatcatcgc cgttcttaac caagactcag gttaagctcc gtctcggaac
+   136141 cttgtgggtt aggagtaaag catcccgggg ttggcgcatc tcattgggcg tagagaagca
+   136201 ttgggaaccc caatttattt cttcggagcc gtttcttttc ccgtcccccc caccccggca
+   136261 tagcgcttcg cttccggttc ttcggaagaa tcaacttact tctaccttct tcattgatct
+   136321 gggggaaaag gaaccgtcta ccagttggga agctagacat caagtaagtg gcttgatgag
+   136381 gataactaag ctgacacgcc ggagttggct gctggcacaa cagggtggtg ccttaccgca
+   136441 ccgcaggcga acgcgcggta gcgttcgtgg tggtgcttca ggattccaat gtactgcgtc
+   136501 caagatcaga acgagcttgc cggcggacca ctgccgtccc attcttgagt gagctggagc
+   136561 gcagccatct tatccactga actagctaga agctatcgct tcgggtcgaa gcactaaaag
+   136621 aaaagcaccg ggaaacgcgg cggcatagga accacgggac ccccacccta ctagtaaagg
+   136681 gaaaacggaa gtgcgctcct gcgcaccagc tgaaaaaagc cctttcccct ttctctgata
+   136741 ataaggaaag cttcttagct ccaacctata caaggggttt ttatgtcctt tttataggtt
+   136801 gggttgttgg atacgggatc ctcgtagtag gctggaccaa catccagccg agagagggca
+   136861 gcctctagaa gcaacaggtt gggaaaccaa gagaacgctt cgccttttct tatcttcttt
+   136921 ctgccctagg agtagaagta gcacaaaaag agggattcgc attattgacc caatgataaa
+   136981 ccactaacac cttcctcgtt ggggctccgc gcactgggaa aacgcttgac gcgattggga
+   137041 aaccggccac tagttacaaa gctccaataa ggtatcgaga gggctatcac agtcaggtgc
+   137101 gaagaattac ccctatttgg aaagtaccct tcttcctatt tagggggtta aggcgagaaa
+   137161 tggcttgatg aatcgttccg ttcgccatgc accggcccca ttcacttgct tatcgtagag
+   137221 gctgtaagta cacagtgccc cacaactatc aatagtatag tggggttgaa agacgagagt
+   137281 gcccgccctt tcttttcttt caagtgggcc acttttttcc cgaacgcagt ccgggatcac
+   137341 cgtggccgtg tatatatata tatatatctt cgatgctgtc atttcgaaat gtccgcttca
+   137401 atcgctcttt cacctccccc caaaaaaagc aaagttggct taacgagcgc agatgtgagg
+   137461 aagcgggagc aataaaacaa aaaaatatct ttcttgtcct tctacttaag gggcaaagag
+   137521 aagcgctttt gctactgaga aagcgaacgg tcagcgcgaa ggttcaagac ttttctgagc
+   137581 gttagcgaag ctagattctc atagcgaggc gcttcgagtt agcgaagcct gtagtagcgc
+   137641 cgaagcccta tgtgctataa tgctgagcca aggacactcc gccttattct tataagaagc
+   137701 agtcaactga gttctgaacg aattagatcc ttggtaaatg gctcaatcta tagatagaaa
+   137761 gccttatgat gggaaactac cacgttaggt ttggagagag atgggaccgg ttatataata
+   137821 gagggagcag atgcaagctt ttttctttca atagccggcc aaatgactac aggatcatcg
+   137881 gtctactcta cctcaattca ccatttcgaa ctttatacag aaggtttttc cgtaccagct
+   137941 tcttctacct ataccgcagt tgaagcacct aaaggagaat ttggtgtctt tctggtcagt
+   138001 aatggaagca atcgtcccta ccgtcgtaaa ataagagcac ccggctctgc ccattcacaa
+   138061 ggactcgatt ctatgtccaa acatcacatg ccagcagatg tggtcaccat cataggtact
+   138121 caagatattg tgtttggaga ggtggataga taggacgact agttgctcga tcaggacctt
+   138181 agctttattg cgagcccaga agtctctctt tttttcggcc ttcaggaaca gcctttaagt
+   138241 aaaatccaac ctaatataat gaatatcctt ctacatagaa gaaagacact ctaagatcct
+   138301 ttttcaaacc tgctcccatt tcgagtcaag agatagataa atagacacgt cccattgcac
+   138361 tgatcggggg cgttcgttgt atgttgaagc agagatgaat agggtggctg tgaagagagt
+   138421 gatggttgat cctgactcca cagtcaatct catacctatg tccactaggc attccaaaag
+   138481 aaaagattgg aggagcactt ctcgaggtat ataatttgag ttcgaattgt caaatcaatt
+   138541 ttctaggaat ggtatggtca gagtcaattg caacgttgga ccttttcaaa gtccgataga
+   138601 gttccaggtt gtttttgcac cgacgacaca ttatgctctt ctgggcagac tctagattca
+   138661 taaagaccaa gcggtcccat cgacatacca tcaatgtata aaaggtataa tcaaaggaaa
+   138721 ataagtactc gtaccagcgg tgagcactcc cttcgaaaga tctgaaatcc atttttgctg
+   138781 acgctataca ttattcggag tttgccgagg atggagagct ttcagggtcc agggagctgt
+   138841 tggcttactc gttgggagga cttattaatt aaggaagatt cagttcgtat ctatttatag
+   138901 aaaagagacc accggtatgc aaaagttgag aaaagccgtc tttagcctgt cactaagttg
+   138961 ccaatcgaca tgatctattt gctcacattc caggcgaagg tcataattcg ctgtttccta
+   139021 caggtactgg gacggaaaca tcatattccc gaaaaaaatg cgggacttcg gtagcgagga
+   139081 gggccctcac agcaaagccc attgccaagg ccagaccccc agccgatctt gatgatcaag
+   139141 aaagaaagat ggctcgactc cctactaggt ataccccaac cttagatagt agggccataa
+   139201 aaggtttccc taggaaaaaa gaaagagaat caaatatagc gagaacacat aaaagaatcg
+   139261 agattctgat gagaatgcaa ccactgcgat tagaaagggt tccgcgattg taattcacag
+   139321 tcatttcttc aagatgtttc tgccattccc aacaaatcat ctttagaaag ctgcagattt
+   139381 tccattcttt ttaatgcctt aaatacggat tttttataat catctgtctc agtactcatc
+   139441 tttttactta tttacttatc cttcctgaga cttttttcag tttatcgtag aaattagtcc
+   139501 acaaaactga taagaacgac acgagtttaa aaatttcgat tacgctgggc catgcctggc
+   139561 actttcattt tattgagtca taagattaat ttaatcaatt gcctcttatt ttaggcaaaa
+   139621 aaaacaatcc catttgttct ccgcagtcct tctgtgtgta gaacattggc cagctccatc
+   139681 taccaatggg tgcacctgac tgtctccagt tcagttagaa cagaaggccg ccagcttaca
+   139741 tagtatataa ttgtagcggg gggagctgac tgttgcctcc ctagttgaca aatgagccaa
+   139801 atataattta atcctatgtg ctctacatat tttctattta tttcttatct atcccattta
+   139861 gcgataagat actcgggtta tgtttggtga actggggccg tgttttatat cctccttccc
+   139921 cacgggaaaa cgtttgcaga tgcgatcatg aatcgaacca gatcgaaaca ttcagctgtc
+   139981 gacggacaaa ccttgccgaa acgtcgaccg caaacgaagg atggccaggc cccggttccc
+   140041 agagttatag tattccctat tcctcgccta ctcaaataag aactagttga ttctctttcg
+   140101 cctatcggcc ggccggcttg ttgcaacctt cgcatttcct gctgagatcc taagtctcca
+   140161 agtgggccct cttggccacc cgcgaccttg gctttttaaa gaatcccgct cctgagtcgt
+   140221 ggaacttgta gctcggcagt ccaccgggtg ggtttgttta tccttccagt ttcgagtgtc
+   140281 ttcttggata gttatagcgg cccataggcg cgagatgtac cttgtggggg ggggggcggc
+   140341 ggtcccctgg acatagtcgt ttcaggcagt ggccgtttag tccatggtcc attagatggg
+   140401 aggtgcaagg ccagaaaatt gaacacattg attccgctcg ttcccgtcct tcgcttcagg
+   140461 gcctgcccct cggtgtggtc agtactccat actgtcgggc agcgaagctt acacttgttc
+   140521 actaattatg acggttcgcc agggcctctt tcctcctccc ttttctgctc actcgtaggg
+   140581 gtccggaccc ccacaaaggg ggagggagtc gactgaacat ctcagccatt ggcggaaatt
+   140641 tcgcccgcat ccgatgtacg atcgtgtcgg gtgagcaaca gccgcttcgt cacagtactt
+   140701 acttatgggc taacgggtca cactttggcc aagtatccta caaagagact cccgagagcc
+   140761 agaagtatta aaggaatggc cataggaatg ggcgcatcat gacatcgtga gatgtctcgc
+   140821 ccgaacgaat tagttggtac tagaaatgtt agaaaaagta aacgaaaaga gtaataagaa
+   140881 gtgaaaagga cagagacact tcccaaccag aaagcaaagt tcccactgat ggtatactta
+   140941 gtgtaagcga gctctaagat cacatctttg gaataaaatc cagttagaaa aggaaatcca
+   141001 attagagata agctgcctat gagcatcatg gcataggtca aagggaagga ggaggcaagc
+   141061 ccccccatct tccgcatatc ttgctcatcc gacatggcat gaatcaccga accagcactc
+   141121 aggaatagta atgctttgaa aaaggcgtga ttcattaagt gaaagacgct aaccgaatag
+   141181 ttagagatgc cgcaagcaaa gatcatatag cctaattgac tacaagttga ataagcgatg
+   141241 accctcttta gatcgttctg taatattcca gtggttgccg caaggaatga cgtcgtagct
+   141301 cctgcagaag taataacaat caaagccgta ggtgggtatt caaataaagg ggagcacctt
+   141361 gctatcatga aaacgccagc tgttaccata gtagctgcat gaatcgaagc ggatactgga
+   141421 gtgggaccct ccatagcatc gggtgaccaa gtatgcgatc ctatctgcgc ggatttccca
+   141481 acagcaccaa taagaagtaa aatacaaata agacttatgg cattcaatct catattgcaa
+   141541 gaaatccaag aatttctggg ggcactagca cgagcaaaaa tggttgaaaa gtctactgtt
+   141601 tgaaagagag taaaacgacc cgaaatccca agagctaatc caaaatcacc tactcgattg
+   141661 acaagcatag cttttgtagc tgctttatct gcctgaagtc gtgtaaacca aaaatgaatt
+   141721 aacaaatatg aagcaagacc tactccctcc catcccagga ataattgaag agagttatct
+   141781 ccagtcacca acattggcat aaaaaaagta agaatggata aataacacat aaatcgaggg
+   141841 ctatgcggat cctcggacat atatgaaatg gaataaagat ggaccaagct acttatggat
+   141901 gtaaccacaa ttaacatcac tacggtcggg ctatcgaaca cagagtcaga agtgaattac
+   141961 gagtcggacc tatttgcgaa tcgagcgagc tccccttgca tgcaatgatg tggtggtgaa
+   142021 cctctcattc taattcagtg ctctccgaac cgtgcgggaa ggtttcccat cacacggctc
+   142081 accaacttga tcttccggcg ggaaccgtat gtccaaacag gcctagaaaa acaggtacga
+   142141 ttgccactaa agtgtacgaa aaaatccgtg cttaggcccc ttcttccctt ccaagagtca
+   142201 ccaccgcctt agtagtctca caaatagggc gtgcaggcct ccctattttt tgagtaggta
+   142261 attcactacc gaagcgcgaa gaaaaggctg gatcaagaaa agggggtact acgagccctc
+   142321 tgccccacgc atctaaccag ctcgcgtggt tcaccggttc caccgactag accaaaaaga
+   142381 gttattcagt cgatacagag gtgcgcttga agcggggggt gtgctgtccc tattggctgg
+   142441 gcccttcccc ataaggcccc accgtcgggg cataagcgcc ctcttgctac ccatatgcag
+   142501 ggcgccgtct tagccttccc tgaccaggat cgctcccaca cctgtagcgt tcgtgatcgg
+   142561 cctactcaac tgtgtatcga ttgaaaggca ggaatttccc acaaccaaag ggagtggtta
+   142621 cgtccagtat gtcccccctt attcccgaca tgctatggtg ccccggggtg ggtaggagcg
+   142681 ggtcgagtcc gtatcgccgc ggagcaacag ccgcgtccgg atcagatcta tctactcggc
+   142741 aattcatccg gtgacttcac ggtcgccaaa gaagccccaa gaagcatcaa acatttccga
+   142801 tgagatccat ggagcaattc ttagatagca agcactagct cccggtgcga cttcataaaa
+   142861 agcaatcaaa gataagatcg aagagaatga aacgcacgta gtggtcatta tagcgcttcc
+   142921 ttctgatcct agaaaacgtc cgaaaaaacc tgctacggaa ctaccgagca ggggcaaaaa
+   142981 tacgataagt agatacataa tttaatttcg agtgtgatca aaaacctaaa atcaagacaa
+   143041 tgacagagcg gccagtgatt gagtgataga tttctcgacg tccggagaac gctcgaccga
+   143101 agaaatgagt aactaacaag gaagattttg ttccccaact tgcttgatta acgggcattt
+   143161 tcggggacta gcccgcttcc cattactcca agagggcaat tcctcgcaca taataaaatt
+   143221 aaggaagcca ttgaaaggtg actaaaagac cagaaacagg gactacccga gctaatgata
+   143281 gaggcaagaa cactttccgg ccaagtccca ttaattgatc ataacgatat cgtggaaatg
+   143341 ctgcacggac ccatatatat aggaacagaa agagaagaac cttgatacta aaccagatcg
+   143401 agcccgggat cttcttgaaa atgggaagat ctaggatagg cggccaacct cctggaaaga
+   143461 acaatgtgca tggaccaggt gagtagggat gcagctccgt ggaccgctcg tcgggcctga
+   143521 taggtggtgg tatcacaccc ttctcaaaga aaccgtacgt gacactctcg cgtcatacgg
+   143581 ctccgccccg gaatcatttt tatacctctc cttttagtcg agtttgtttc acaacctctc
+   143641 ccaacaagtg gtcgagttcc tttgtacctt tcctttgacc aaggggtcct cgaaccaacc
+   143701 tgtccttcct ttctattcct agtcaggcgt tttcattcat tcattgattg attcaaggta
+   143761 cgaagtgaca agtccaagcg atagcgtaaa agctagtcgc cggaagcgaa cttccgggcc
+   143821 gggaaggggc caaaaaacgt gagcgccccc cgcaatcttt ctaaagaaac aagctagaaa
+   143881 ctttactttg agaaatacct tattatatat atataattag taaaggcgtg ttagcctatc
+   143941 tagtagtaag gggccttttc ttgatcgtta gcgctttact aataacaagg acttttctcg
+   144001 cttgtttagt attgctttgg cttcgctgtc cgtatcttgc tggcgcggaa gctaccgcaa
+   144061 ctaaaagaaa atgaatgaag gaagaaggca ttagaaagac taccgaggca ttccgggccg
+   144121 actacaatac aagtcatgag cgatagcgaa gccaagccta tccggctttt ttatgtcaaa
+   144181 agccctaccc caaaactagc tctatcttat agcagacaac tcaggcaagt ctactcaact
+   144241 aatctcataa gtaaacgcct gttcgcatcg caactaatag aaaaaaacga ctactagact
+   144301 agactagtag ttgagtgttc cttcttgttc aggtcttgac cgggtccgag cttcccaagc
+   144361 tctatgctgt tggggaactc tgcaaggatc ttgccacctt cttgatttac aatatttgag
+   144421 tctttggagt acttgaggat tatattcggc gccgaggatt tgtgcttgtg ggctagggtg
+   144481 aatattgcag accagcgaat ctggtggtcg acaatcgttc ggacttgata aaggttgtcg
+   144541 cggcacctgt agtaggacag aggacttatc gcgatgcccg cggaccaatt tacgatgtct
+   144601 tcgtcgctga cgttcgtcaa acaggccacg tggattggcc agggtcttct tcggctaatg
+   144661 atacctcgat cccgaagcct tcggagtatc ttttttatag gtgcctctat ctttatgggg
+   144721 aattcgctgc tgatagatcc cgcccagtgt cctcctccct cccccgccgc cttccgaccc
+   144781 gggggagtat acaatgacaa cttacgggca tgagtgcccg atcgtgagac tacctgttga
+   144841 acgtccgatg acgcgttgct ccgacctgag ctatgcaaga acgagatccc ccttgatcct
+   144901 tgccggatgt gcttgacggt cccccataat acgctaactt ggggacttct gactccagct
+   144961 gttccaagag tctccgctag ttgaaccccg tcctgtagac tccctgtttt gctcatcccc
+   145021 ttcgtcagct gtttgatcgg gatactatca cctaggttcc taaactttga atggatggcg
+   145081 gagcgtaggt ggcaagcagt tatatggata cggtgcttta cccgtagacg cttttccagc
+   145141 tctcgcaaaa attgtatggg agtcgtcctc ggagggactt cccgaatgac cgtaccaagg
+   145201 aattctaccg tactccgtgc agctattgtt gttgatcctg cggagcctac ccaaaggttc
+   145261 aggccagatt gtaggaaatg ggcgatacgt ttttgtattt ctatgagaag ctctacggca
+   145321 cccacgattc ccagtagtaa gtcgtcggca tatcgcgcgt aacaaatcct tattaagtaa
+   145381 tgggttttta agggggcctg cttacgggcc aggcctctct ctgacctgat aactagtatc
+   145441 gcctccccgc ccagctctat cagcaggccc tttcttttgc aatacttaag aaggtctctc
+   145501 atggcccaat tattattaca gcgttctcca ccatagaatt cagccttcgg ggtcaacccg
+   145561 gcggcttcta tgaggaaggc ggcgcaaaga aggctcgagg gcttgttaag gaagacggca
+   145621 agggccgacg aaggggggaa aacgaaaggc cttttctggt cccccctgag ccggagggtg
+   145681 cttgtggggg gggtgtgcca cgacgaaaca agggaatgaa aggccgcttt gcgttgcatg
+   145741 ctcttaacgc tccccacaat gatggctctg ttgtcttggg gagcgttgaa gcttgcttct
+   145801 tctccagggt tttcttggtc atcaatacga cgacctgtcc ttaatagaac cgatctgact
+   145861 ctctgaacaa tcggaatttc gtacttctgt cggatcctcc ctatctcctg atcgagcttg
+   145921 tgtaggtaga tgttgcctgg tagggccgat agtagtacac tgtgtgggac ggagtaaggg
+   145981 cccctctcaa ctcctacgag tcgtccggcg gaaaatactt tctgaatgga gtaaaagaac
+   146041 ttgggatcgt cgatctcttc cttcaaaatt tggatgagtc gatgtcggtc gatggtgtga
+   146101 aaacacttcc tgatgtcgaa ttctaaaaac cagcgagaga ttccccactc ttctttgatc
+   146161 cgtcttagga ccgagtggca gccttgaccc gagcggaaat gcgatgtgtc tggaaactcg
+   146221 ggatcgtaaa tggattcgag taccattctg atcgcctctt tcatgatctt ttctataggt
+   146281 agaactactg tgagcggtct aaacttcaac ccttctttct ttcttcatat tgtaaagggg
+   146341 ggaaaagctc gttttttgct cccctttttt ataatcaaag gccccctcct gccgtcgttt
+   146401 cagtgactca tagggcttcc ctcagttcag tctttttggt tcttgaaaat ggtcgccacc
+   146461 tctcctcttc tttcagtcga tggggttcct ctccttttag tcgagtaatc aatacctctc
+   146521 ccaggaccgg aatgattatc cctgcccgat gggtctacat ccatccctga atgtcgtcgg
+   146581 gtactgttca cttccccgca ttcttgtaac cgtcacctgt aatccgcgcg ggtgtgtccg
+   146641 cacccccctg agtggacgag aaagaaagga tttctcggag caacccccca ggttccagac
+   146701 ccaggagtca actttcccgt atgagcattc ggtacatgta tcagtccgtg ggaagaggtg
+   146761 aaagggtcac cactactgag gatctccccc ccccctaatc ttagataggt cgtctgaggg
+   146821 ttcgccgcgg ttcattgctg tgcttacaca caaggctacc cttctccgaa agctacgcgg
+   146881 gaccacctac cactagtctt cggccggagg ggtttattgc acaaaaacgg ccgggacgca
+   146941 ggctcccgaa gagggaagcc caacgaatgt cagatgcaaa gttccgcacc tcattaagat
+   147001 catattggca tactctccca aaaaaaaaag agcagacccc attgaagacg agagtaaggt
+   147061 acaacaaggc catttctgtc caccgccctt ctcacggaac cgtacgtgga cgttaccgct
+   147121 catacagctc ccagccagca agcacagtta gccttcctct acaagaaatg gaaatgtgga
+   147181 tgaatcgaat gaaatcgagg aattcggttt ttcttttcag acatgataag taagagcatc
+   147241 cctttcacaa aaactacgga tccccctcct atcctgccac ttcagcacct tgtgatcttt
+   147301 gagaagatca ttacgagccc tttcctagaa tgtttttgta gattccgaaa attccagagt
+   147361 ggatcaggac gagaatgaat ccgggaatcc agggcatact catcctggag cttccgctct
+   147421 cctctgggaa gggctttttt atggatagag agatgagtcg agggattgat tactcgacta
+   147481 aaaggagagg gtagcctccc gtttgaccga tttctcggag tcggaagaag atctctcatc
+   147541 tgatgaccct gaacaagaag gagctgctct cgatgtgaga tcagcagcaa aagaaagaag
+   147601 aaaagggcca tgatgatgat cctatgttcg ttttcgccct gcccctatga tgcgctccta
+   147661 gctcgcagag cataagtcag cgcaaaggag agatacatcc ttttaagact gtgaagagaa
+   147721 tccttggttt gacctacgga ctacgtggga aatacgagat ctaggcaagc cgctacatgt
+   147781 tctccccttg cccttgaacg atagaagaac tacttatctt ctcttagata gcagtcgatc
+   147841 ggttctatga tctatggtca atcaataggc ccacggatct attcttctat acgataaatt
+   147901 gccatctcgt agcaccacag cctgggggta agactagaag aggagatttg cttggtcttc
+   147961 tgtgcggaat ctgcttccta atcagtgtag ctcttgggtc agctcacagc taggacgtca
+   148021 tcagttctaa aggattcctc actagggaga ctctctcctt atgggcaagg gaggcagatt
+   148081 ttgacgggga gaaggattcg gaatatagtt acataagtcc ccttccctct cctcaatata
+   148141 gtttcactcg agtcacgtca gtacctctcc ttttagtcgc tcgtcgagac ctcatcaact
+   148201 agttaatacc tccggacctc tcctttatca gtcgagtttg tgttcacaac ctctcgagaa
+   148261 acaagttcga gagttacgta ctttcaatgc tctctctatt ccttgctaat caaggcctgc
+   148321 aagggaactg gttacgacta taggcgggac gacatgcttt tagcttccct gaaataggcg
+   148381 acttgctcgc tttggcagga agtatttttt ttgaagaatt gaacaaaaaa aactgaaatt
+   148441 tctgtaaatc attaaaacgg gactttcttt aaagaaaaat ggtctctcag gctggttcaa
+   148501 agccctattt ttagataata ggaaaaatcc catttcattc gggttttcta ttctgtcctt
+   148561 atccccactg agggaagtag ctaacctaac ttcatattat ggattagtaa ggatccactg
+   148621 ggtgggagga atcaaactca acgttgcgag ctcaaaaagt gatcctggaa ggcccacggg
+   148681 taagcaaaga ttctaattcg agattggcag agaaacggcc cgtgcgagca cgaaccaaaa
+   148741 gaaggtgcca agccgaagtg tacaaatctc ataggtctat atctgctcag tctctgttag
+   148801 cagcttcagt aggattctgg tcttttcttt ctttcctgcg agtgttgaag tggggaagtc
+   148861 cggatcaatc cgtcgaaaga gacgccatct ctcagtctaa atggaagtag accaagtagt
+   148921 ttcataggaa gaaatgtgaa agtaggggct gctaagcgcc cagacccatg ggaggtgagg
+   148981 tgagatagtc aatgaatggg gtggtagatt ctggcgcagt gtcaattgag aaagcgattg
+   149041 tgactctgac cttcttatcg gctgctggtc ttgaagtctc gctaactact tggatacctt
+   149101 ctctcattcc tcgggaacga aaaagcgaga gtacattctc ctttttagtg catttcagct
+   149161 gcgtcagcag tttattcttt cttattccct ttcccgtgtt cattttaaag tggtggatag
+   149221 gatgaactta ctggccgttg gtccttctct tctctttctt attagtaatc ttgttcaagt
+   149281 gagagggact gacatcttct tttcatttct ttcatatctc ccttgattct gtttcagttt
+   149341 gaagtaatag ggaggcttgc agaccttaca attgtctttg ttaaatactt ttatgaccga
+   149401 ctgacttgaa tatgcttgcc attggtcctt atcttatctt taagataatg gtaagagtgg
+   149461 taatgtaggg ctaggggtat agaatggaga gcctttgtag gccgataggt tgattgagtc
+   149521 agtacacttg acgtatgctt ctagaaagaa aaagaatgag attcagctct gatcaccacc
+   149581 accggatgca tttttcttga tcttctcgga ctccagtaaa gtttatgcgt tgggcctaga
+   149641 aagcggtaaa cgtcagcaat gagtgcttcg attgtattaa attcttcttc tcgattgggg
+   149701 aaagtccgtt acatgctaag acgcaccaga tctatgatct ctattttcta cagaatcgtc
+   149761 tagcgagaaa actctttcac taatttttct tcttggcact ttcggtcttc tgcagaatag
+   149821 tagcattgca tcgtcaattc acaattgatg tggggagagc ttgttcaaag aactcgagtc
+   149881 aacacgccac tctgcacctt gtgtgagcta gacgaagaaa gcacaaagta cttaccctac
+   149941 atatgcagga aagaaagtta actatggata ccatagctac tcgggcactt ggcgtcattc
+   150001 tcacccctaa tgcttctttc tattcggatt ctatttgaag gtttttctcc ggattatatt
+   150061 ttttttataa aatgaaaaaa gggggctttg ttccctcgta ctaatagctg aacagttgta
+   150121 gaattaatgt ggtcagcata acacaagtac tcttcgcccg ttgtggaatc atgcaaaaaa
+   150181 tagacgaaaa aagacgggtt ttgtcaactt tttgacactg tttagacatc tatctaacag
+   150241 ccagaaagat aaagcagctt tgataactaa gaaagcagtc tatgggatcc tttttagcaa
+   150301 gaatagcaat tggattagga tatagatggc atgtctcatt agccggcact ggctttcgat
+   150361 tcggggaaaa actagactaa tccgtatagg agaagtttaa gtggaagcag tggaggcaga
+   150421 agggcgaaaa tccccaccac tctccctgaa agtagctaat tcaaagcaga acgagaacag
+   150481 atggcttgca cgctctacct tttcatctct tgcaacatct catctgcaac tatcacttat
+   150541 cgatttcttg cacctatatc ttatgttggg tacagaaagg gatcaagaat agggtggatt
+   150601 caccaaaggg tattgacacc ccagggcaaa tcacacctcg acctgccttg ggcaaaaaag
+   150661 tctccgtgcg gatcacctag gggcaactcg taccggcttc taacaagtcg aagaagcact
+   150721 ctttccttca gtcaacccat ctccttcggt ttagtggaaa gaaaactctc ctagctgagt
+   150781 tgatgtaccg atatgaaatc tcctgcccct tcaacctaag gagtccggca gtacccgtct
+   150841 catcaaaagc cagcagtacc tctttcatca aaacaaaggc cctcagaata agcgaagtaa
+   150901 accgtgagtt aagcgtccct cgcgtttacc gtgagaagag tttcacgcgg cggctaaacg
+   150961 cacctatttt tggcagccta ttcgtagaca aagaaagccg attcgctaat ccttattcct
+   151021 ttactttgaa tcaaggtctt acccgaggaa gagggaagca agcaaagctt gccccggatc
+   151081 gtaggggaaa gagtgttgta accgaagtag actaccgaac gggtgtgggg gagaatatcg
+   151141 tcaaagattg agctaatccg aatccctctc acaacaagag ctaaccgtag cagtctaagg
+   151201 ccgagatgaa tcaattcgaa agctgttctt cacgtttatg atctgaaaga cccggacaga
+   151261 agagtcatac tactacggaa agggaagtga aagcaagcct gtaagggagg gggagggcaa
+   151321 acttccgaca tttcactgat ttctccttat ctttaggaca ttgctttact tgagggcatg
+   151381 ctcgtgtacc atacgctttc atcgatggaa gcacaacaga aggtgcttca atctttccta
+   151441 tgggtattct gatgggcggg gctcgggatt tgatagtaag catgaatcta aggagatctt
+   151501 gcagagtagt cagctggatc tcgctcatgt ctctatagca cctgactatg tcgcctatat
+   151561 ttagattggc tttattctct ctgaaattag cagttatatc atccaccaaa gatatggagg
+   151621 ttagaaacgc aattgccctg tgataatggt gttcttccac accaaagctc ccactgcaaa
+   151681 agcggtaacc ccatttacca aaccatgaat gaccataagc aactccatgc aatagcctaa
+   151741 gatccattga cctcttttgt gattcatcct ccagagtgat tttcctacaa aagcaacatt
+   151801 aaaatcgctc atcataagag ataaaccgaa cattaatcaa gttaacccaa acaaaccgac
+   151861 aagttatcaa atatcgtcag atttcaagcc aacccaaatt ggtttctcgc aagaaggata
+   151921 aacaggtcta tccgatttgt ttattttcca tcgaattact aacctagcat cgtttttttg
+   151981 gggtaaaacc taacttcgag ttttagcaat tttttagggc aaaacttacc gagtatgcag
+   152041 gttaacacaa aaacgatccc aaagatccat aatctctctc ccatgaaggt gcttggaccc
+   152101 accttcaatc cccttcgagc aagtgaccga acccgttcaa atggaccatt ccatgcaggt
+   152161 ccgggtcagt ggatctgtgc cttacccgcg ggggctttct cattgagtca tttccgtagt
+   152221 cagggaggga tcgatatctt ggtccatggg cccattctca attagtcttc ctggactatg
+   152281 tttcttaccc acccgaacca tccctgctag cataaaaaca agttcgggca gtgagtcaca
+   152341 tgttggaatg tcaggatgtg gaattttttt attgagatta gattagtgat taattagtca
+   152401 cacctaccgt aacctatatt ggtaaggtta agcagctgac ttagcccccc gtcttcttaa
+   152461 ggaggtcttt ctcacaggac ggagataggt tatgcaatta tgacttattc aagaggtcct
+   152521 cccttcccta ctggagcgct aactcctatt tttttgtaga gaatccttgt gtagtgtatt
+   152581 ctactggtat agaatctttg tgcgctgact cctacgaata taccgaacta gtccatccat
+   152641 tttgattgat tcactgttcc tcgtccaatc cctctcgtat caatccttcg gatctgcctt
+   152701 tccctgggtt actttttgta gctgttgttt catcccccta tctaacgagg gaattgagct
+   152761 ggtgaatcac agcttatttc tcgagtgatc cctttgtaag tgtagctaaa gcaggggcga
+   152821 tagccccttt ttaaatatct ttcctatctt tcttacttcg tttaccagct tggaatatca
+   152881 ctgataaccc ttcatatgtc attcgttgtt atctgttgat ttccctgtgt ttgccgatca
+   152941 ctgatgtaat ggctgttccg tcctttcttt cctctgagtt ggcttgcaga ccaaggagga
+   153001 tcaacgtctc attgtattcc agttagtttc acagctaccc cttcccctac atcttctttc
+   153061 ttcttctctg tatctttcgc tgttggccag cttgaatcgt agttttctct tatgggaagg
+   153121 gcggacgaag ccactttcac tttggcaaac aggaaccagc caagtaaaaa agaaaggtga
+   153181 tgggaaattt ccaattagat cgagattctt ctttcttctt atggatagag gttaggttat
+   153241 ctttgccagc tcgatgccac aaaggtttaa aatggggatt ccttggtgcc gttcgcttga
+   153301 cttgaggtca gtcctttcaa ctagcctaga cccatcatat ggggagcttt aaatggtgcc
+   153361 tagcctttcg aaaggaagcc tgtctgtctg ttagtatgga ctatagaata gaattctttt
+   153421 atcactgcta agcaataagt ctcagctagg gcacaggaga actgaggtcg gggggggggt
+   153481 gaaataatat agattagcaa actcgttcga ttccagattg gcgagtcagg gggatcttat
+   153541 taccaaggat cgatcaggga taaagaaaaa aaaagggttc cggccgggct aaaggtttca
+   153601 aatcaaggtg taggtgggca tatccatgtt tgggcatttc acatgatgag tagagcgatg
+   153661 ggtcttcctc ttgcaatcga gagaggcaaa taggcggaaa tcggtacact tttagtacgg
+   153721 tggtatgcga gcagcaaaac agtatatgca atacagaaat acgagcaagc tcactaatca
+   153781 tagtacggta aataagcaca agcagcagca aggaaagaaa gccagctagc ccttatagtg
+   153841 cccgaagcga gatcggagta ggaagaccat gagcggaaag gaagaaagct tcaatgccaa
+   153901 gagcagatag gataagagct gctacggtgg gggaacctca cccagtcaat cctaggagta
+   153961 agccctttta ctaagaagta gaagttcaca tgcccatctt ttctcttgca tgcgccttcg
+   154021 ttcctcatta attcaaatca aagtaagcga catcgcatcg ggaagggata gggatagcgc
+   154081 atttgcttgg ttggcggctg gctagcttag ctctcgtcct gtagctgctg cttatcgacc
+   154141 ggcagaccgg ctaccagcag caagaattga atcttttgag ccttaagttc ttcttgcccc
+   154201 agtttcacta tctctttcac ttcttcgatt ccccttgaag tagggggttt cagtgggaac
+   154261 gagatcaatc aatgaagggt caagagaaag aagaaaagaa gactaatctg gatattcccg
+   154321 aagatgcttg acttcgagtt cttggagtga cagctttgta aacaagggtc ccttactttt
+   154381 gagtttcgaa aatgcatctt tctttctccc atgcttttca ttggtcaaca accaaaccaa
+   154441 ccactaattc ttccttcact actaatccag gaagaagtct tgtcttcttc tgttcggaat
+   154501 acattttttt caaaagaaag actcaaaaat aatataaata ataatatata gaatatagaa
+   154561 agaattgctt aaataactca gcgatctaaa atcatagtta cgatctacta aaagtaaagt
+   154621 tgagcacccg gaccagacag aggtggccga gaatcttatg tcaaaagtaa ggaccaagga
+   154681 tgatcttttc ggaaaggagg agtaggagga gtcagacgga atcaaatgat tacgagatag
+   154741 acaatgagac cagggagagc aagagcactt agacaattca ctttgagtac aggaaagtct
+   154801 gctggtagga attcctcagg gcgtattacg gtttttcacc gagggggtgg ctcgaagcga
+   154861 ttgctgcgaa gaattgatct gaaacgaagc acttcctcta tgggcattgt agagagtata
+   154921 gaatatgacc ctaatcgttc ttctcagatc gctccagtac gatggatcaa agggggctgc
+   154981 cagaaaaaaa tgaacacgat cgagaagttc gctccgccgc gcaagatcct cgaacctacc
+   155041 acgaacacca tcagcggcct cttttcgttc tctttcctgc ccgggaaggt ggataaaaga
+   155101 aaggtagctt gcttctctcc tggactgatg gccgcttatg tagtggtcgg ccttcctacc
+   155161 ggaatgcctc ctttgtcttc gtctaagagc gcctttgcta gtaagggcgc aggaagcaca
+   155221 aaaactttag tgaaggacgt cttcttctct gccttctcct ctccaaaggc caagagagag
+   155281 actgcatccc ttgccttcgc tagctctttt ggtttcccaa ggatagcggt agctggggca
+   155341 aagcccgctt tcttcgctcc gcgaatgaga cagaaagtga gaggaaaaag cacgttctct
+   155401 ctttgcgagg tccaaaaggg gagaacgcat agcattctct gggcacatag gatcaaaggt
+   155461 aaagcagggc tttcttggca gagttttagg cggcaagata ctttagggct tgttggagct
+   155521 gctgggcata aaaaatcgaa gccgaagacg gatcaaggta acttgcctgc caagccaata
+   155581 ggcgaaaggg cgaagcaact caaagctctc cggggtttga gggcgaagga tggagcgtgc
+   155641 aaagtcgatc gtgcacctgt cgtgtgaccc gttggtccta agcaatgtct tgcgcgaagc
+   155701 gacccaccta gaaagagctc tcctttatct gggggcacta aaatgaaact tcgatcagat
+   155761 gcgggtataa aatcccgccg ctgagatgtc cagcggattc ctgagccttg acgaaaggtc
+   155821 ggccaccttt tttttacgga gagcaaaagg cccggggcat agcaggatga accaatgtga
+   155881 atgagtgtaa gcttcgttgc ccgaacacga ttggtgctga ccacactagg tgctaccgcg
+   155941 gtagcaagag aggccaggca atgacaattg agaggttgtc actgaacatt tctagtcaca
+   156001 cgggaagaga ggtccaatgg caaggccata cgcccgtttg gctcctcgcg gagtatagct
+   156061 cacatccaaa tatcatatct gattggggaa cggggcaaca cccatgaagc tccgacggaa
+   156121 agggaaggcc tgccaggccg tatgcccatg ggtgcaggat tcttcgaaaa agcgcgggct
+   156181 gactcggaga cctgggacct tggcttagca acgaatgaat atttctcctc gagctttctc
+   156241 cgccagcggc ttatgtagtg atcggccagc tcgctaagct ttccttcttg tagtcggccc
+   156301 gtaatgcctc ccttcatttg cttgcctcct tccttacttt tcagagaagc attttacgac
+   156361 tataaggggc gctgttcacc tttggaaact tagctacacc ggtcacgata tcttgttgat
+   156421 attgattgag gactttcgct gactaaatcc ataaacctag aaagtcaccg tcactggtac
+   156481 ttttttgact cgataggtag gtattggtgg agcttgcgta atgtagttgt agttaaggtt
+   156541 gcattgaagt ctttcttttt tttgaagatc tactgaacaa aggcgaacgg ggttcccaag
+   156601 gcgggacgtc tggcagaatg cttggcctcc cgcgctggaa gcgagacccg aagggtgagc
+   156661 ttctggcggt tagcttctag aacttataat aggcattagg cattctgagc tggaaggagg
+   156721 caagcaaaag gccgaccact ctatcatttc ccatttctga tgggaaggcc gaccactaca
+   156781 cggactctat aacaagtcat gagcgatatc gaaaccaagc caaagcctat aggctttggg
+   156841 atgaaagcca gcccgacgaa ggcctatgat agagtaagaa aaaaagtacg ttaaagtgac
+   156901 gaagtaactt agccgtctac aaagggaaag gcgtcggtac ggagtcaccg tcagctgtgg
+   156961 atatagacta tactataagg aacggagtct taaactatgg accgagacta cactaaggaa
+   157021 caaggaagct tgactgagca aagaagtcaa ggaacgaagc tgcttctcta atagccccgt
+   157081 tgaataggag ggcgaaggct ttaaaaaaaa gtttgattta gggagagggg gcttcaagtt
+   157141 cttaggaaga gccgtacgag gcagctcacg tacggttcgg gagccgagcc cctgcacagg
+   157201 ggcttaggtc aacacttata taatagccag tcatcaatta gaagcaggca aaatggtgat
+   157261 gaattgcgat tggtccaaac cttcgaccag ctccttcttg caatccgccc agaatgacca
+   157321 tcctaagccc ttattcactg tgtgaagaac ggggcagtcc gctggagtgt aggcttcttg
+   157381 gccacgcccc ctgcttatag atacgaaata cttgagataa attatcaaat aggaaattgc
+   157441 ataccattag ccgatatacg tttaggaaca tgggtacatg aaccctataa aatccttcac
+   157501 ttttagcttt gaattactta tatatatcct atgaatttaa tttcgcatcg gaaacgattc
+   157561 taggagaagt tcgaatccgt tccgttcgga tattgatcgg tcttggtttg acatggttta
+   157621 cgtgttactg gttcccggaa gagttaatat ctccattagc gtcacccttt cttaccctgc
+   157681 cttttgactc gtattttgtt tgtacacaat taacggaggc cttttcgaca tttgttgcaa
+   157741 cgtcttcaat agcatgctct tatttcgtct ttcctttaat aagttatcaa atttggtgct
+   157801 ttttgatccc cagttgctat ggagaacaaa ggacgaaata caatcgattc ctccatttaa
+   157861 gtggttctcg cttcttcttg ttcctgttcc taactcctcc ccgggtcgtt cccaatgttt
+   157921 ggcactttcc atacttcgtg ggtgcaacat caacaaattc gctcatgatc aagttacaac
+   157981 ctaagatcta tgaccatatt atgttaactg ttcgtatttc gttcattcca tcggtatgct
+   158041 cccaggtacc tgtaattgtg atctgtttgc cagaaccaag gggtctttct ttggaaacct
+   158101 tcacgaacaa tcgtcgtttt ttgatggttt ttccgcttct cacagctgct ctttccacac
+   158161 ctccggatat ctggtgccaa atcgtcgccc gtttccttat ttctttgata atagagttgg
+   158221 ctatttttgt ggcatcgatt gtacaagttc gtgaagaggg ctggacgagt ggaatgaggg
+   158281 agagcggctc gatcgagaaa aaaaataaga gtagcccccc ccctagaacc tggcaaagta
+   158341 actatcaatg aattccaata aagattataa cacacagagg actccttacc agcgggatga
+   158401 gtgatacatt cggtgagcgc cgtagttaag ttttccttat tcgttagcta tgctaaagga
+   158461 atggattgtt ctctttaggg agtgcagaat caactagggt taacctctct ttagcctatc
+   158521 tgtcctcatg ctagtcaatc tcaggtaatt ggtctatggc aaaggggttc ctctttcctt
+   158581 cacctgatgg caaatacaag aagagttcgg caacgcaaaa gcgggcttaa taatcaaata
+   158641 ataagatcga gttaagagaa ctcgtcccct aaactatcta gttaacggcg gcgtgatctt
+   158701 gctgtagggt cagagctgca ccgaagaaca gttattatcg actcgatggc tctgatccgc
+   158761 gtgaatcaga cggtgagtta ctggttcttg aatgggaatc ccgagcacca aagagcagag
+   158821 tgatcctctg ctcgagatag gaccgggtaa agggaaaaag acagaaaaga ccatagaata
+   158881 gcaataggaa tcgctgctat ccgacaactt tctccgtgga gcccgcagct aaaggttaac
+   158941 aaaagattgt agaacccatg gacttactat ctaaacgata cgagacgggt cagtcaaggg
+   159001 ccaccagagg cacaaagctg gcgaatatcc gtgagagcgg aagatataaa aggtataggt
+   159061 aagtaagctc ttttaagatc aagggcaata agacgtcgaa ggaagaatcc gcaatgcttt
+   159121 gacattcctt ttcaaatgag ttgtgcccag cccagttagt ccactactac agatagtaac
+   159181 tagcacagaa aaacaagatg gaatcaaata cggtgttaat taatttactg aagtgaagca
+   159241 aggaacgtag taactcgact ggttggagag aaactttgtt attagggttg aatttccagt
+   159301 cttcgggcct caacaccgga gcttatcctg aggcctaggt aggtaggtag tgacgtctct
+   159361 tgctgggtct ttcataagga atggtcctgt tagtcttaat ccagcgactg gccttgctcc
+   159421 gagccgatat ccgaagtatg gttcgctcgc tagttatatg cttaacacat gcatctgagg
+   159481 tcagaggtgc cgctaaggtg aacgctcagc ttcaactctg actatatatt atatataagt
+   159541 gcactgaagg ttaactatgt caatctacaa ctcaatgtga ttggcttcgc ccgggctcag
+   159601 tctctttcgg ccggtatgta gaatcgtcgg agcgagcaaa gcagcggagc gaagtgggct
+   159661 gtgtaatcat tttatttttt gacttttttt taatatataa gggtaagtaa ggagctgaaa
+   159721 acgagtcctt cggagggcga agaaactcat taattcattt tgatgagggc ccctcattca
+   159781 aatgttatgc ttagtgcttc cctcacattt tgagttgatt atttccaatt tcgagagtga
+   159841 gattgatccg accaagtagt agtgcggaag ccagtacata catggctagt ttccaggcaa
+   159901 gccatccata ttggcataac cttcttctat gccatctagc ttcatagccg atagtttccg
+   159961 ttgatcatta tcttcatcca tcggatcagc tccttctttt ctgatttttt tcaatgactg
+   160021 cttcttatgc ttgccctcgt tcttctctct tctcctagac ccacccccct tactgtctgt
+   160081 ttaggccccc tgccaggtac tccagtctca ttgcgtaccg tcgtctgcat cttcttgccc
+   160141 tgttttcata gttgtttgct tttcttatat ggattaggct tggttaaaag cgtaccgtca
+   160201 agcaagaaaa ggaaccgtaa actagcttta ccgagttggc ttcttcgtat gagccctagc
+   160261 agcatctatt ccttactctg ttttctgctt tcaagcccct aagtaggatc gtctaacgaa
+   160321 gtcaggtacg agggaatatg ttaatgcaaa agaggcgagg aggctggtga ccagagaagg
+   160381 tctgtgatgg aagcaattcc tggctttctg gtctgtgata aaagcaatct ctctccggtc
+   160441 ggttcgactg ttaatggaaa aatgaataga tccttaggaa gaaaaaggct cttttgctct
+   160501 tgtgtagaat cagttgttac agagaaggag cggttttcgt ttcgcaatgg aatcagaatt
+   160561 agctacgatc aattaaaata tcgtagtata gtaagagcca acgaagtagc tgtaacgtga
+   160621 acagctaaag ctccttacct tatatgggct gcaccgcgct gaatgatcaa atcccattta
+   160681 ttcttatttt tttacattac aactcccatg cctttccgtt ggtcaacaac caaccgtcga
+   160741 tttacctttt ccttcatttt gagaacaagt ctctcttggg gggagcagag catgcaaaat
+   160801 cgagcaatag atggatctta gaagaattcc actttgaacg gcacgactct ttcgattttt
+   160861 gcgctggcat ttgagttgtc tcccctcctc tttcaatcga cacactcgac gcagatagct
+   160921 ccggtggccc cggcttctgc ctctatcagg cttaggctgc ccccaccccc acagagccac
+   160981 agcatggagt agctgctccc gcgctacaac caatcaaaat tttatacgga tcgatatatg
+   161041 atgatgctaa accgagatag agaaagaggg cgaagaaact cattaattca tcgatgaggg
+   161101 ttgccgcgcc tccctttgtt ggctcttcga gacaaaaaca ctcataggaa tggtgctaat
+   161161 tcccatgttc cttctagtct cgtttggttt cgaaagcctc cccctccctg tctcttactc
+   161221 tttgaaagct gtcatcctgg atttattttt gaatggtttc tttgttctct tatttggatt
+   161281 gaaagaaaga caaaacttca ttttcttcga cgaatttcgg ctatgaccaa tgcaccaata
+   161341 gctgaatagg cgtaacaaag ctacctgaaa aggcaaggtg caccttggga ttgaaatcga
+   161401 cgaattgcgt tttgcccgag agatttattt agaaagatta tccatcggat aaagaatgca
+   161461 aaaatagaaa gaatataaat aaaggcgtat acacgggaag ggggcaccac tacttccagg
+   161521 gggagactag cctcattact tcttagtggg cgagaggtgc gcctaaccca cctacccact
+   161581 cataaatcac ggtgttcagc tgacttgaac tgatagaccc ttattgtatt ggaatttagc
+   161641 gcccatcttt tgactgttgt caacaaatct cttcaatgtt cgattctact ctatgttaga
+   161701 acatttctgt gaatgctatt ctaatctaag tggtcttatt ctgtgtcctg tgctaggaag
+   161761 cattactctt cttttcattc caaattcaag aatacgaccg atacgattaa ttggtctgtg
+   161821 tgcctctctt attacttttt tgtattctcc tgttcctcgg atacaattcg actcttctac
+   161881 ggccaaatct caatttgtgg aaagccttcg atggcttcct tatgaaaaca tcaattttta
+   161941 tttgggtata gacggtatct ctttattctt cgtgatattg accacatttc tgatccctat
+   162001 ttgcatttca gtgggttggt ctggtatgag aagttatggg aaagagtata ttacagcatt
+   162061 tttaattcgt gaatttctaa tgatcgccgt gttccgcatg ctagatcttc tactattcta
+   162121 tgtttttccc gaaagcgtgc caatccctat gttgtgcgga gcggagtatc ttatattcgc
+   162181 tgggagaaag cttttcctct gcaggggcct tgtgcagtaa acccctacgg gcggtcgtcc
+   162241 gtcgtcgtaa agtagtcccc gcgaagcttt cgggaagagg ggtagtcttg tgtgtaagca
+   162301 tagcatttct ggtcgaaccc gcccaatcca actaagaaga accgaacctg acaaacacat
+   162361 ctttttcctt ttgggagggt actccgagta tagtgggtac ctcgtaggac ctcgacccgc
+   162421 ctactcgggt cttgtatgga tatgcaggaa ggggtgctcc taggtgtgtg taggggttgt
+   162481 gtttgttcgc gagaatggat tcctcgtcaa gtctgtttgg ggggtgtgga cacacttgcg
+   162541 cgaattcagg taacggctac aagggagaaa taaaaaggaa actgtacccg accagggatg
+   162601 gacgtaaact cgtaagctac cgaggttagg gataatcgtc caggtcttat tgtgaaacaa
+   162661 aaaagccgcc ccgccacagc aagcgggttg gttcctctgt cgtcgccgga tagctcttgg
+   162721 cgaggtactt ttgtcactcg actgaaagga gaggagacct taggataagt tgctaaaaca
+   162781 aaggggagta gaggatcgac ccgttcagta gaattccgaa gaaagactgt tgacagctgg
+   162841 tggagacatt tctttggccc ccggcaaatc aaaaggaaat gcgggcaggt ttaagctcgg
+   162901 cagagggttc aagaataggg tcctgccctt cagattctca gaataaaaaa atagttccaa
+   162961 acctttatgc atgcacctcc gtataagtgc tgcgtacaag ttccggccag gataattggg
+   163021 aaagatcaaa cccgaaagaa ccgctcacat cacagtagta gtagcgtaaa ggccgtaagt
+   163081 cgggtagcgg ccataccata aggtaagggg ctattacttt cacatctctc cttctgtcgt
+   163141 actataagaa agagagatcc gctgcgtgag caacccgact gtgcgttaca tgtgctctac
+   163201 aggccgaact ccatctttct tcttaacaag cccatttatc tttagatttt gaagacgggc
+   163261 gttgcgttcg gttcgaaagg tatggttttc agtatgtctc cagatagggc gccccactag
+   163321 tccggctagc tagtgagcgg ttctttcggg cgagaagcag gccgggccct acgggcgggg
+   163381 gcatctcccg caacgaaagc tgcatagttc gccaccaccc gaaaagtaaa agattagaga
+   163441 gtccagacta aaaatacatg catagatagt gatctaatga caagggccga cgacggaagc
+   163501 tcgggacgga gccgtatgat gcggaagtct cacgtacggt tccctgagaa gggagtggct
+   163561 acctactgga gcttcgacca agcacccccg gtcaattccg ctttggggcc accccttact
+   163621 ctaccattat tataggagta tggggttcga gacaaagaaa gatcaaggca gcatatcagt
+   163681 ttttccttta tactttactt ggatctcttt ttatgctatt agctattctg ttgattcttt
+   163741 tccaaacagg aaccaccgat ttacaaatat cattaaccac agaatttagt gagcggcgcc
+   163801 aaatctttct atggattgct tctttcgcct ctttcgccgt caaagtgcct atggtaccag
+   163861 ttcatatttg gttacctgaa gctcatgtag aggcacctac ggcaggatcc gtcatcttgg
+   163921 caggaattcc tttaaaattt ggaacccacg ggtttttaag attttcaata cccatgtttc
+   163981 ccgaagcgac actttgttct actcctttca tttatacttt aagcgcgatt gctataatat
+   164041 atacttcctt gaccacttca agacagatcg atctaaagaa gatcattgct tactcctcag
+   164101 tagcccatat gaatctggtg actattggta tgtttagtcg ggcggcggcc gttaggtcac
+   164161 ctattttgag ttatggacac acaaggccaa aacatgtgtg tcgggcgtgc gacccatcaa
+   164221 cctactagca atgggggaga aaacatagca tgtcgcaaca aaagcttgat tcgaggcgtc
+   164281 agcaaaacac tgccgtctgt tcccttcagt cccttagcgc cccgggacgg gagtggggga
+   164341 cggctctacg cgcaggcaac agcagcaccg gctccacgaa gtctgaatcg aatctttctg
+   164401 ttggctttcc caaattcatt cgtaaatcaa aatcaaaggg ctagaagcga gcgcttctag
+   164461 ctgcttcgcc tgcttcttct tattatggcg gccatgttgg cgtggcgaaa atgaacgaaa
+   164521 agcgagatga acgtgctatt ttcaaatcgg attgatagat tgatctgttc tgatagatct
+   164581 aaagagtaga aatagataga gaatagagag ggaatcaata ataaggtctt tgagcctatt
+   164641 tctatctatt gatgagacaa ctatctattc ttgatccatc agaaagaaat tatttatcca
+   164701 tcagaaagaa atcgatatgt atctgatgga gtcttctaca tcgtacgtag agcgcccaag
+   164761 cgctttttgg gccagctcag ttctcttatc catcggtcca atgcactggg ctcatctcat
+   164821 ggagggaaaa gccaaaatgt agttgtcttg ttgttgttcg ccgcctcgac gcattccctt
+   164881 ctctccccgg catcgtccca cacagaaaga aagagcgcgg agccccggcc cgagccgtag
+   164941 gtccgctaac gtaaagcgag gagttgagcc tgaactggcg aaccgaagtc actttcggaa
+   165001 ccatacttcc tacagctgct aacatgtgcc cagtcctgcg gaaaggcgca aacgaacgtg
+   165061 agctgctata ccggaatccc ccgctggcca tcggggaccg agtggtaagg ccatgatctg
+   165121 caggggaacg gatcactcat tcttccattg gggacaggtg cacgaacgac aactccaaac
+   165181 gtcacacatc cgccgcctac ttaccgttta ggtggcacca gcgagatcca gctaaggaaa
+   165241 aagagtgtcg cggctgctcc actccgccgc ggtctcatga acttcacttc gttgccttcc
+   165301 cgcgcgcaaa gcgaatgggc gctgtgctgt gggtcagttt cggggcgggg ggcgcagaga
+   165361 taccagaata aatgattcat ttgtttggat cgacgagctt tttcagcccc aaaactcaga
+   165421 atcaatggaa tgtctgtcca taaacatcta ttctatctgt atatataggg gatctctcta
+   165481 tacatatcaa agtcttttat ggcattgata tgatcgccta gctgtagccg catatcacag
+   165541 ctgcactcaa tatgcggatt tctgttggat cagatctttt ctttcgcttt gacggaagct
+   165601 ttttgaccta gcgaaaagca ctttcgcgca agcaaagtag aagctttgcc agaagcaaga
+   165661 cgaggaagtg gagctgtcgt ataagcggta gcttcccccg accgactaaa atacaagagt
+   165721 cgcggcctac tttgattgcg aaggggtttg gcaacaagca aacggctttc tatcatagtt
+   165781 gcaagggttc aaaaccttag ttcgctgctt ttcccagtac cagagaaggg cttatactgc
+   165841 tcgcctttgt ttggtttgat atattcattc agtcaaaata caaactacaa caaagtggaa
+   165901 gtggaagggc cgctatagaa gctagaactt gctttataag tcggcctaac caaagcgtca
+   165961 cgacaacaaa aaattatcct tatgaatgga atcttaagta gggggggctt tgaccgcccg
+   166021 cctaccaatc aaagaggcag agagtaaacg taagctcacc cgtaagctcg aagagagctt
+   166081 cccttcattc gcttcgcggg agccgcacaa gcacatagct ggaagtcaga gggcccatac
+   166141 tacctgccta accctttgtt ccgagggacc gtagatcgga aagcacccca tttatccaaa
+   166201 agagaaggga aggggcctat gtatttgcat gacccctgcg gatttgaccc tatcccggag
+   166261 ccaatcccct attggtcctg ccaccacgcc gcagaacgag agctcgtgtg gaaccttttc
+   166321 tttctggcgt aacagccggt ggaacgtaac aaaagattac ggtcgcctaa cattaacata
+   166381 agggcggggg gtacggtaaa ctcggccaaa atatgacacc cgaagggccc gaacgcacaa
+   166441 tcctatccca tccgagtccg agtttacccc ttgcacttcg gacagccgac cgtagcatca
+   166501 taaggaggac cccctttcga ggtaaaaaaa aaaaaaggta cggtacatat aggaggttgg
+   166561 actttctcaa cgtggtgtat agcacgaaaa acttttcgat acaagaaagg gccgttctca
+   166621 catgaaagaa gagaagaaat cctttcttct cttctttctc tttctcgaga aggaaagaaa
+   166681 gaggatgggg ggaggggggg aatggggccg gtgcccttct tttacggccg tcactcttat
+   166741 ttgtcagccg tgaggaacta ccggctcggt ctcggtggga aaggaaaggc ttgggcctac
+   166801 ctatcccgat aagacctcat aaaggaacgg cgggagtgat aggttccata ttgccgagct
+   166861 gaagggcaag acttttgtac gtgatcgtag tatgtgacgt cgtctcgtcc acgctgcatt
+   166921 gaagagtacc tacgcactaa gttccggttc actgataagg aagatagagt tgggcggggg
+   166981 tctacgatgt gatactcaag tatatgaccc ggggagatac atgctaacta tgggtaggaa
+   167041 gcaggaaccc ttatgtaaat aatttcgggg gggggttaca gatctcttat actaccctcg
+   167101 atcgacagag cggaacgacc agaaaaataa gtgatgttag aaagccgtat gataggtggt
+   167161 aactatcttg tacggttcgg ggggtaatcg gcgtactccg ggagaaatct ttcgctctat
+   167221 cgaacataca gggaattgga ggtagcattc taccgatgtt aagtcatgga ctggttcctt
+   167281 cagccctttt tctatgtgtt ggtgttttat atgaccgaca taagactcga cttgttagat
+   167341 attacggagg tttagtgagc accatgccga atctctctac cattttcttt tcttttactt
+   167401 tggccaatat gagttcacct ggtactagca gctttatcgg ggaatttctc atcttagtag
+   167461 gagctttcca aagaaatagc ttagtagcca cattagcagc gcttgggatg attttaggtg
+   167521 cggcctattc cctttggcta tataatcgtg tggtttctgg aaatttaaaa cctgatttcc
+   167581 tccataaatt ctccgattca aatggcagag aagtttccat atttatacct tttcttgttg
+   167641 gaggggcgac cgtccgttga actaccaaag aaaaagggta aacctatgtg atcatgacat
+   167701 tgtaggtgct tgcgatggga cggatgcgac ttccctcagt tggtttgggt ggcatagccc
+   167761 gttgcataag tccccctttt tttgattcat tttttgagtc tttagggagc caaagcttta
+   167821 ctttactaat aaaggctcgc gcaggggcgc tcactttttt ttgctaagcc gtctctttct
+   167881 gggtgggacc gagagaaata aaggacagag ggcaaccatg catggtactt ctcgaccctg
+   167941 tctccgaggg acagttgaac gagcgactca tgaatgctgc cgggtcggac gagccaataa
+   168001 ctcgaacgcg ttcggtctgt tttttgagca agaatcacag cgttacctta ccttcaccat
+   168061 gatacggact ccaagttctt atggcagagc acgaggagat ttatcatcaa tattctaatg
+   168121 ggaatggaaa ccagaagcac gaccgaggtc ttcgtagtct aaaataataa aaccatcaat
+   168181 aacagtaatg taagcatgag actttttggt agtaccggtg aaccagatgg ccgcggcgat
+   168241 ggaatctggg acggaggact cgtagtatct ctctagatcc ggcaaaagcg aaagaccccc
+   168301 tagttccatt cgaatgaagg ctgactgctg agcccactct ggccccggcg ggcccccgtg
+   168361 gttgcgagcc ggagctgcca tagcttatgg ctagagcaat gggaggggcc tcagcagaga
+   168421 gaacagaaca gaaccaccgt aaggataacg agtgctccgc ccgtcaagcg ggcggcagga
+   168481 gcagcaggca agtacttggt aggccaacag tccagtgggc actcgacgaa agggggcaca
+   168541 cggagcaagt acgagaaatt ggccccgctc cgctttataa aaagcaagga cccactacgg
+   168601 gaggtcaaaa acccaggacc tatggaagtc ggggctcgtc cccggtcaat attggatcaa
+   168661 acaaaacaag caataggggc ccgtagcact gacctctttt tttattgatt caatataata
+   168721 ggggaaaaga tcgtacagtt ccctaccgag acaagagaca ctcttaacag atcctccgcg
+   168781 cgctgggcat acctcttccg tgcgtctttc tcgtggcagg aacagaacaa cagggaaaga
+   168841 aagacccggg gccgacctgc ccagggctcg agggcgagct ttatttaaga gagaatgggg
+   168901 agcgaatcga aaggcttccg ttttagttct tggtttggtt cgggctcctc tcgatctttt
+   168961 tcgtagtagg gaagggggaa ggagtctaaa tctatggaca ttaatgaacc atcattgatg
+   169021 gacgttgcac atgacacgat caattcgact cagggtccgg cgctaataga ggttgcttac
+   169081 tttcctagta gcgaaggaaa agggcagggc ttttttcgtg gtaatagtgg gcgggtctcc
+   169141 tttcgaagta aaggccttcg cattcctaat ccgccccacc aaccccggac ggcttagttt
+   169201 gtcccagctt ggtgaatcgc atccccgcgc aacgacatag tttgtgcgcc ctttaccgtt
+   169261 ctcgctcagt gtttgcaacg gctggggagg cagtcgtaga agcgaagtct atcgccacgc
+   169321 caaccatcaa atacgagatt gggccccttc tcaaagattt gatggaatgg cccacccaat
+   169381 agcgcttatg tcatatggga actcatggct ggaaacaatc cttatggttt tgatatccgg
+   169441 taggaataat aagaatcaaa gtccaggtag gttggtgagc ctagtgatag gagactatct
+   169501 agcttggttc ggagagcact tgttgggtta aaaacttttt ttgttgctaa atgttacagc
+   169561 ctaaatgctg aactattgac tctactcgtt cggatgggtg ttcaccccaa agtgttcccg
+   169621 gactgcatgc atacatccgt aagtaactta gtgcaacatg gcaaatttca ttgagaggaa
+   169681 tcagcaaaga aaagaaaaac gggtcaacat cttaatgtgt atttgaggat attttagggc
+   169741 cctagaacgc aaaaaaaagg tgggtgaaca agagttgtca cgataggaaa gagaaatgac
+   169801 tataaggaac caacggttct ctcttcttaa acaacctata tcctccacac ttaatcagca
+   169861 tttagtagat tatccaaccc cgagcaatct tagttattgg tgggggttcg gtccgttagc
+   169921 tggtaccatg atactttctg ttttgtcgag ccctgctttg gtctctggtt tgatggttgc
+   169981 acgtgctaaa aatctggtac attccgtttt gtttcccatc cctatctttt tttctatcaa
+   170041 tcaacttttc cattactttt gtaggcttcc tattataaag cacctggcaa caaaatgcca
+   170101 actccttctt tttttgattt ctcactttct gcttttgctg gttcttacca aattggtact
+   170161 tgatctgggc ggctatctct ttatggacga cctgagtcgt gccctttctc aattcgtccc
+   170221 cggtttttcc gggggattgg gagggggttc taatacgccg cccaacccct caggggattt
+   170281 ttttctctct tcttatcaaa cttcagaccc ggactatcac gatcagcgga ggggtgattc
+   170341 ctacttttcc tccgcgcccg gagtacagga aacccaccgg cacgcgtcgg gaagttccac
+   170401 aaaccttcac ttgaacctaa acgaccaaag ccaagatccc atttttttag aggttgagcg
+   170461 cctaagcttg aaatgcgata aagtgaaaga aaaaacgata ttaaagaccc agtctttatt
+   170521 gctagaaaga ggttaccata ttccggatga acgggatata gaacgagcaa taaatgttgt
+   170581 gatgactgaa catgaaacga tcgatataga tcggcgtagg aaacggttct actaccttta
+   170641 ctcgtgttta ggaaaaaccg gaaacaagtt ttggatggag ctactggaaa cgctggctga
+   170701 ctacaatata aatataaaaa gcgattccga caactaacct gccatttcag gttttttgtt
+   170761 aggtctcgac tttcttttca ggcttgactc attattttgc taggtatagg tcctctcctt
+   170821 caaatactgg aaggtggatc acttgtagga attgtaggaa tgacataatg ctaatccatg
+   170881 ttgtacatgg ccaaggaagc ataaaatgat tctttcattc tatagatacc tctggtaggt
+   170941 aaagcactcg actgtgcttt attgaaagtt cccatcgcgg gggcgaggat acttgccttc
+   171001 gcggttcgac tttcttttca ggcttgactc attcgctagc ttctcctccc ctttagagct
+   171061 ctttatgatg cccactgagt aagattcggg gacttccggc gcagaagctc attctgaacc
+   171121 gcgggaacct tcgtctcaat cttcataatc aatttatgca cttttttctt atgtgatttc
+   171181 atagttcgag gcgcttaatg aagaagcagc tatagttcct ctcgggctag aatgactgac
+   171241 ttggctttcg actgatatag tgcatgggtc tgtacttcag gctgaggttc ttggattgaa
+   171301 atcttatgat gttctgactg accttttccc tcctcgactt gagtagatag tcagggtttc
+   171361 agaatatgtt tgttggatgg gaatatggtg cttgatggtg ttgttgttca gacctcggct
+   171421 ttctcccttc ctttcgtata tgccatgcca gctgacaggg agatcttcgt tgttcagctt
+   171481 atattagagt cgttctacct tctatgggtc caagggaaat ggggcagttg ttccatcttc
+   171541 ccatgcctgg actaccgata gttccctctt cttcctctta ttgtttccct ctttcgtacg
+   171601 tacacacgtc gtcaagggat tgatgctaaa caaccaaagc tgccttggct gctgtgactg
+   171661 cctctccttg cctcctatcc tttaagtcga gtacttcgtt ccttttcagt cgagttatcg
+   171721 ctttcgcttc ttttgatcct gcagaggcag gacaaactcc cggcgagcaa ggtcaagatg
+   171781 ccgatgctgt cggtctgcag cctcctttag tcaacgaagc gaactggctg aactctatgt
+   171841 cttttggctt cgtcttgctg aaagaaaaag agtgaggaaa acaaagcaac aaaagagagt
+   171901 ggcaacaaga agaatggaag gcttcccgtt actgaagtga acttctcggt aagtccggca
+   171961 ctctgcccca tataccttat tcgtagtatc gctcctatct cgatgaagct tctagcctac
+   172021 accggggttc ctctcgatac atcttgcaag ggatttctag ttgactgttg cctctccttc
+   172081 tccaccaccg aatccttacc ctcctcttct ctccggctag acatctgctt tggcttcacc
+   172141 ggctgactcg gatgtaagcg atgaactagt ctctcctaca gttaatcatt tctatggtgt
+   172201 gggtagcgag gttcttcttc tccattgaag aaatgagcag ccttgagacc caggagaata
+   172261 gcttcaacat agccttcacc atctgacccg gagcccttgc cgttcacttc actactacgt
+   172321 acgccaggat ctagccctta gctgcagaga ttcggataaa gctctgtgct ctctccaaca
+   172381 gcgtattatc ccgtagctac cgctctttcc tgccacttca aagtggaatt ctttctctgt
+   172441 gttgctttgc gaattgggat tgagtctcag ccaagaaaga taaggtgatc gaaaggtcaa
+   172501 atcagagatc tcctcgctta cccctgaagg agctgatcga ggaccaatgt cagaacactc
+   172561 gagttggcct tccttcccta cggatgagga gatgaaacag attatggttt agaagccgaa
+   172621 caaagagcat tggtgtgacc gttagcagcc cagtcgtgcc ctcccctgaa cactcggacg
+   172681 agagggtgag ggtgggtgcc tctctatgat tcgtctatct atgttcctat ctatcggaat
+   172741 gactcccacc gctgcttcaa tctatatctc ccggtaaagt gaaatcatca atctgaatca
+   172801 attgaatcct tcgatatcag tgaaatcctc catctatgat tgctccttca tttcctcaat
+   172861 cggaagagtc ttcttctctt ctgtcagttg atcagcatcc cttttccaag aacacctcga
+   172921 attcggccta ctctacttcc cggaatgaga cgggggtcga gtcataggag ctgcttctct
+   172981 ctgctattgg acttgcaaca gcggtctctc tataggaaag agatagctcg ttcactccgt
+   173041 ctctctctgc tttgccgacc agaaactttc cgtatcgtac ttaagagccc gaaagcacac
+   173101 tcctctgctt tgattccagg ttagggataa gaactcttca ctgttgaata gagagattca
+   173161 attgctagta agttattcaa tcgatacttc tgggcctgcc tacgctgatg aatccctcct
+   173221 ttcttcaggt agcatgagaa gatggaatct cagacttgca aagcagacct acgactcagg
+   173281 tctcgaagag ccttgccata gcgtacagag aaagcaagag gactggtagc tctagagtcc
+   173341 agggggtctt cacctttact tatagtcgag tgggcagcgt tgacggaagg agagtcaggt
+   173401 attcacttag ttgacgctat cgtccttcta atccatcttt ccgtttagct cggcattgct
+   173461 attgatctct atctctgctg aagcatgtta ttgatcggga atcgttcctt gaccatattg
+   173521 gtcttatcca tcctcctcta acccctacca acttcgatag cgatcactaa ggcaacctac
+   173581 atcctctatc ccttagcctc tctcaatgta tgcaagagtc tccctgcgag gagaaggaag
+   173641 gcaaacaaca gaaaggagag ggcaccagca agtgaaacac acatggaagt cagtctagca
+   173701 atctatccca gccaagaagc cgttaaaaag aagtcaggat acccgaggta agccaaccaa
+   173761 gccaatgatc caaccaagcc aatgatgtgg tttgatgcat cgatcttctt ccgaacaagg
+   173821 tagccaacca agaaagtatc tgatcaccca tccgatacga agacctttga tagtcaacct
+   173881 ctccagcttg atagtccctc tccagtcaat aaagaacctt cccttgcgaa ataaccttcc
+   173941 cttgccaggc agttggcttt gttccttggt aaagaagaga aagctaacgg gtaagcggat
+   174001 ggcaaagaag agaaagctag cctttgcttg ggtgaagatg tgaaagagga gaaatccaaa
+   174061 gaaagaagag aaagctgcta tagtttaagc taactcctcc gacagcgaaa agaaagagga
+   174121 gaaagctgcc agtactcact cagccgagga tccttacttc ctttcataga cgaaacgggg
+   174181 cttgaatcgg tatattggtt gagaatgact ccctgcttca ttaatgggaa atgcgcggag
+   174241 caactaacta gtaccactgc atacaagctt ctcttccttc ctccgttaac agaacatacg
+   174301 acttctctgt atccaaagca gtaccccgat aaacaactag tacttttact agaccccagg
+   174361 agcggaactc gattaacata ctacagccag cccttcttca ttgccctctc ttccccatat
+   174421 tctgggtcct ctttgtctgc taagttccac tttgctcctc atctctatgc attcttggtg
+   174481 gttgctagca taaccagatc tgacatagct tactcagtga actgactttc tgatcgaaga
+   174541 gagatcagaa ggtttaccag tttatattcc ttcctagaaa acgctttctc tgttgagata
+   174601 tcttctctcg acctctctcc ttgcgtagta gctgagtcat cagtttatct tgtagtgagg
+   174661 gattcacctc tggcttattc ttcgtgtcag gcttgttaga ccacttcttg tcttcctagt
+   174721 attcttgctg cttttactta ttctgagtga gagctcagtc ttatcttctt agccttctat
+   174781 ccgcatactt ccaacacaaa gagaaagaag gtaaatggcc tctggagagg taagatttgt
+   174841 agggaaaggt aagccaatac atcaaatttc tcactcacaa ctggcatgct accagcaaga
+   174901 atagaatctt ccattttcgg tgaatgctct ctcttcaaca acaggagttc taaataaagt
+   174961 aacgaaggag agctgcggaa ccgctagcag cacttgccaa cacttgtcag cagccaaggg
+   175021 gtcctcttct tgccagccaa tgactaacag atagatgttt gggattcaga cttgtatgca
+   175081 tcgggggaaa catggctccg cctaaggttc gaggaattcc tttgacctag tttgctgcgt
+   175141 ttccgttctt gcttgctcga cttatgactc acaactggtg gagctttgcc cttggcttgc
+   175201 tgtaaagagc ttggccaaat aggatggttc ctcactcttt aggggaagga acggtgcttc
+   175261 tcaaagtagg attggagaag tctcccccta acaggaaatg tacacaacgc tgtttgtggg
+   175321 acgggaactt cctggtgatt tgactttgtc gaagtctctc tagtgcttcc gatgtgataa
+   175381 tgttaagcat atagctagta acgcctttga agcaaggttg ccaccgaagt tcatcaaaaa
+   175441 aagacggggt attgcaactt ttgaaggctt ttcggctctc taaaataaga tcagatacta
+   175501 tacgaaatta gccttttcga ggacccaaaa atgggtcttt ctttactgat tcattaaaaa
+   175561 accacggctc tttaaaaaaa gaaagagttg aaaaagctga ttgcatagcc gttttattca
+   175621 cgtcgggcat tcactttagc agctttagcg agtaaaggca tatagctagt atagcagttg
+   175681 agtcggtgaa ctcggataag taagcttaca gtatccgtta tcttattcct tagggcaggc
+   175741 ctctatcttg ttccaatccc agtagcaaaa cgcgggctag gagtggcttg ttcctcactc
+   175801 gggggactgc ttgatccgct cctcccgtct atatttctta ccttgtcgct tcccgttctc
+   175861 catcgtactg atatgactgc tatggtagga gaatctcaag tgcttgcgat aaaggtcggc
+   175921 cccttccaag tcggaccttg gcttgtagcc tggacttctc atttgatgag gaggaaaagg
+   175981 ggcatgtaag tgtaaggatg aaacctcggg taggtgagat tggagcctat tattgtatgc
+   176041 gtgatggaat tccgatcttc cctctcttcc tctggagccc ttacctcgca ggggcctgac
+   176101 tttggggtgt ggttcttggc ctgtattatt tgtccgatcg cattatcttt tattccggtt
+   176161 catcatagtc tcgtatctct caaccatcta tatcataaga gaagccggtt ccaagactag
+   176221 tcgagatgcc ctattcagaa aggtagatga gagcttgcgg agaaggaaag ttttcatggt
+   176281 tggctgatcc atcgaatact gtcctttccc ctttttacca ttttttcgtc gagttgctga
+   176341 tacgttcttc tccagcaaca gcctacgtag caatcttcct cttttcaagg acaaggaaga
+   176401 aagcatcaga cacgtccgta gttctaggtc tttttgccca gttcctggag cactcttgtg
+   176461 gtttagtgtg ctaagtaatc tgggagccca ctgtcatttt attcgggctc agctcaatcg
+   176521 gcatcgaaac tcttaattct attatgaccc ttcgccatct cctccctgtc gattctcatt
+   176581 aaaaattaag agcaggagac ccacaagtag taaaagcaag tctagacctt ttagccattt
+   176641 caagagaggt agaccctcca gcgcaggaag ttcttctaaa aagaagaaga gtgcactgaa
+   176701 gaggatagga atcagggcct gtactgtatt gggagtgagt tgaaatatct tttgtataag
+   176761 acatataaaa caaatcccac cgatcagaaa taaaaggaag accatccctt cgatcggtat
+   176821 tcccgctttg cccgccgcgg acgagagcca aacggataca aaaagggcga gaaccgaata
+   176881 aagtataaag gtcagcatat atcctgttag cgtgcgtttc tccagggaaa taaaaagaac
+   176941 gcacgatccc gagaatatag cggctagttg ccaggggtct aagaaagaga tctcctgaaa
+   177001 agcgtataac cttagtccat ataacatgag acaaagcaaa atgagaggta gaattccccc
+   177061 tcgtagagct acaggggatt ctcttcttcc tagacagaat gaaagcgcgg agtatcctaa
+   177121 aagctgcgaa gctaatagga acccccactc cgcgagttct ctatctagtc caaagaacag
+   177181 taaaaaaata aaaggcgaca agggagggga accccaagac cagaacaccg gagaaaatgc
+   177241 caagcaaccc tacttgggcg aaagccaacc ctcttaaaaa aagggtggaa agttggcata
+   177301 gggcgaatag tcctaggaga tatggcctta gtctcggatc tatagaaagt gtgcagtgag
+   177361 agatctttat aggtagagtt aagtaaagcc agtctttact taactcggcc caaacaattt
+   177421 tgattattca atataaaact gtcccatgct ttccgttggt caacaaccaa ccaaaccaca
+   177481 tattttcgtc tttccaaatt aggagagcaa ataagcaagt cctttccaac tagagctctg
+   177541 gcggtctttt aagctgttca actagttagg gcattagcct cagaggcagg ggtttgaagt
+   177601 caggaatttc ccactcccac tggacgacat gcggatagag aaatgcgact aagctagatt
+   177661 caggaacagc ttctattagt acacaatctt tttgaatata gatcagcagc ggatgttgcg
+   177721 acagtaactt caacatcgac aacagccttc agtcgtcgaa gtaaggaatt actaggaact
+   177781 gctttcattc ccacgatatg cgccaacagg gcttttagga agagtagtaa cagcaccagt
+   177841 aaccgctttc tttattccgg actttcagta aggaatgata gaaccgctga tacaaagtat
+   177901 agtgagaaga gctgatatgc ttaagcgtcg aaaggactgt ttgcttacgt gccggattgc
+   177961 ttttctaact agccagtata gttacagctg atatttctat atgaaagaaa gctttaataa
+   178021 tagatgcgat tgtgagcagg cttacttatc agtgctagct tgtatgcgct ttctattaca
+   178081 gcaacagctt ttatgcgaaa aacaagaaaa aggaaaactg cagctctgcg acataaccag
+   178141 tcttgcagaa ctggaagaac gaatttactc gacaaggaaa ccaagtcaaa aggttgcgcc
+   178201 tgacagccaa gcgccagttt ttgtaatctc tttccttcca aggagtcctt gtcttcttgc
+   178261 atcacggcct cgatctacgc aatgaaaacg tatttcctct aactgcccgt aaagtcagaa
+   178321 ttcaacgaag agcactcatt tatgatgtaa ctattatgta cctagaccgg ccttacccga
+   178381 tacattactt gcttcccata ggaagaaggt ctgcaagcct ccatatgatt gattctcgca
+   178441 atgaagtctt cccgttgatt ccacttcttt ctcctctgac tcagtctagc aaccccacct
+   178501 tggtctcgta gcccacttgt agccatcact accgcgggtc tttgctcata gtcacgaata
+   178561 agcttctcct aagtcctctc agcaattcaa agatgagact gactgacttc tattaagtaa
+   178621 tcggataaga ctgatgcact cacttttaag acttccccga tctcctaccc ggctaaaaac
+   178681 tagaagtcag cattctattc taatctccgg ccttcatgcc aggggttcct cctccgtacc
+   178741 ttttccttct gtttggcttt tccccagccc gtgagaaatt gataagccaa tagagtcaat
+   178801 cgtcaagtcg ggatatccaa taggtttttt tccaggtaag gaactagaat agattcatac
+   178861 gaaagtggct ttttggttgg tagcaggacg gtaattggca acgaagcaga gatttatttt
+   178921 acccattaaa aagtatattt caccattcct ctcgtcgtcc agaatgggtg actcaaagaa
+   178981 aacgtttcat tcacgatgag gcagaattga cataatataa ccaagattgg atggtgacat
+   179041 ataatatagt aaagtaaggg gttcctccgc ccgtcttcga tacaatcatt gcgatgtgtc
+   179101 actcaaggat agctagccat tcttctctac ccatgccatg atttacagat gagctcgaga
+   179161 ccattattga atatattatg aacacaattt gccaagagtt ggttgtgaca aaagtgattg
+   179221 ggatgcccat ctttggtcca gaaggggttc tctatgaaaa gagtttccac ctttctctac
+   179281 tcgaaaagcc ctatctaaaa gggcttgtct ggatgaatgc agtgtcggaa gccgtgatca
+   179341 catagtaact tccgcccaca gtgctattac gacggcgggt caccgggagt gaagtaaact
+   179401 cggctcctga tgtagcattc attcggacca ttcgacgttt gattcttttt atcagggata
+   179461 ccgatgactc tgtgagaggt gttcttcggc caagtttccc atgacgggtt acccggttca
+   179521 aggcttttct atataatgag aaatactact ttctttctag cttaagtgtt cacgtaggta
+   179581 aaatagcttc tatagctcca tccaatagta atcaacggag atagagtcca gcggttcaac
+   179641 caacgcttct aaggagagcg gggcaagcaa gaaagcaggc aaagtcattg agcctattct
+   179701 attccgaaag ttcaactact ggataaacaa cgaaagccgt cggcattctt ctcctactgt
+   179761 agctgctaca attgctttag cgcgagcagc aaggaggagg cagctcttac taaaaaagca
+   179821 aaaagggaag ggcatgacag aagggagata gacctcctat aagcattact ctcttttgag
+   179881 gtaacttact tacttactct gatatgatga gttccgtggg ctagtaagat aactattgag
+   179941 gtgagggtcc gaaggagatc tttcactatt tatgcttgta cagttttatg atagttttgt
+   180001 aagggaaagc aaatcctgag aatggaatgt aagttaggga acagtagtca gacgcggacc
+   180061 agcgcaggtg ggcgccacag ctgtcttcct ccatgtgata tctgatatca cgcaacagga
+   180121 gattcttcac ccttatttat tgacacgatt tacaaaagaa gaaccaaccc ggtcgtgcta
+   180181 tatgtatttg ggatcaaaaa ggctcactgc tgtatgatat aaatctttct gaatgagagt
+   180241 tcatgagcta caggaagaga tcgagtctag aataaacctt agaagtggca aaaacgactt
+   180301 actttgttgc aacgggaact actcgccccg ggcactggtg aaatagaatc ttatgtgttg
+   180361 cagctactta tgctttccca actagaaatc aaacggaaca aaaccgagtc ttgctattta
+   180421 gagccagggt tcctccgggg gtcacaacca gagacctacc agccccatgt aaaccacttt
+   180481 cggaagggac aaagcagttg aactctttag aggaagagtc taaccacaag tcaaactgtg
+   180541 aacatcagaa ggatcgatat attggtaata gctctacacg agagaattga aactccacct
+   180601 gcaattccga gtataggaga gtccagtcaa taataaagtc ctggttgcag catcgggtaa
+   180661 atgggatcca ggtaaatggc ttactgttcg gtcaacattt catgattcgt aattagtcaa
+   180721 gctccggccg gttcctatgt ggtgaatagg gatatattta gtattaaaga catgcgagtg
+   180781 ctccgttcgt cagtaagcga aagagactga aacctgggag aattccgctc tgttaagaga
+   180841 gagaactaac gaaaattgga gactgacgga aggaaagggg ttcctccctt ctcctttagg
+   180901 ggagaagctt ggttcctcct tctccgctga gaggggagaa gagctgggtt cctctttgaa
+   180961 aactctttcc gtataggcca gaaaacagct tgcttagaga aagactgact ctcctacgga
+   181021 cctggtggac cttacagtcg agttattgca tcgatctcac aaactatcaa tttcataaga
+   181081 gaagaaagat cgtttttaga tcatcaagtg aggacaggta gtagctctgg tagagcgagg
+   181141 gactgaaaat ccttctgtct gcggttcgaa tccggactca cttctagctc tggcacaagt
+   181201 tcctcatcct tagttgtttg atgagcctgc gtagtattag gtagttggtt aggtaaaggc
+   181261 tgaccaagcc gatgatgctt agccggttag agcaaaggac ttgaaatcct tagagcaaag
+   181321 ccagggactt gaaatccttt ttgtgtcagc tctttggaag tgccttttcc tttacttcag
+   181381 taaggagtct caaaatagga aatacctagc tagcaaagta agcaataacg gaaggtaacg
+   181441 aattctccgt ttcgatatta acaatccgga tctagggaaa gtaggttccc gcatcacgta
+   181501 aatttcattt cttttgggaa aataatacat aaaccttttg agtcacagcc acctcagcac
+   181561 aagttactcg agtagcacag aagccatcat caatagcttg cttgcctggg agctcaacca
+   181621 cgaattctct gtcttctaac ctttctatca cttgaggatt atctacgcaa ttcgtaaatg
+   181681 atcaaactag atagtccctt atcttactgg ccagtagtag aaggggatct tgaatcaaag
+   181741 agttcctgtc caacaagcaa ggaatgtcag tcataaataa tgaaatccat ataactctac
+   181801 tccgggttgt gagaacatcg tgcctgttgc gattgtgtgg ctttccgcat ggttggagac
+   181861 cccctatgaa caaacaaaac agtagggtgg tgtaggagaa ccccgactcc ctaatgcaag
+   181921 atagagctct tagggtgcgt ttcgtctttg tcaggaagaa aaaagtggca agaggagtgt
+   181981 atccataccc gtaccgaaga gatcttggga aggcatctct atagatttag tagttggact
+   182041 accaaagacc cagacagctt caaggttagg tcttcagtgt taggtcagtt cgaggtcagt
+   182101 tgttccctcc gtactgttgc tagagagctg ctttccatct atcttcctac atgaaaggat
+   182161 ctaagtctat ccaaataaaa tagcccagaa aatgacagcc atcaaaaggc gcgcacccat
+   182221 atagcctcgg cctgtccaaa tgatgttcag cggtctctac ccaagtagct gtggcccatg
+   182281 atccaaagga cccgcaacca gtcaatcatg ctatccttac cttccaacca atcggccaat
+   182341 cacgctatcc ttaccttcca accaatcggc caatcacgct atctccttac ctttcaacca
+   182401 accccttcga ttccgcttct gcagcagtat ataatctcgg tcccttacct tgatgcctac
+   182461 agctcaattt gttttccagt gatggcaaga atccgatccg cgaaatactg ctttttcttt
+   182521 tttcttgtgt tgtttctgaa tggcatcata gctacacgag ggaaagcgat gctgcccact
+   182581 ctgccgcaaa agggggccgc tttcttcccc cccaaaatgc cagttccacc atcagggccc
+   182641 agcaagcagc ataattctgc tcctcgatcg gacttcgtgc aattctttta tatgtaaata
+   182701 agggaggggt ctcgaccaca tttttgagta atagtaatag gctaaggcgg attcgtctgc
+   182761 ttcgcttacg tagtcaaaac attctcgctg gacacctggt aaggaatatt tgtatatgcg
+   182821 ggaaagatac tctcacctac agctcccgtt actagtagtt ccggttagcc cacttgcccg
+   182881 agcacactct caacttgtag atgatccaac agaaaaagca agagaccacg aaagcacgca
+   182941 tgaaaacagc ccttttgaaa gcatacccaa ggcacccatc caatctgaac ttatagacat
+   183001 caagaagaag atctattaac acgcatggct acctatataa caagttacct ctgacctctg
+   183061 tctcacgacc gcaaatataa tctgtagctt catgccctga cctgaacctg atgccaagtc
+   183121 ccatcttgcc tatgttgccc atctcccatt taatcggaac ggaagtaaga aatctaatat
+   183181 ctgtacggac cccaaacatc acaatggatc aactgaaaaa tggctgttgt tctattctca
+   183241 ctcaactgga aactctcctc cgcagccaga gtccaagtga aatgactatc tttcaaacac
+   183301 tctgtgatag gtgctgtggt gctgaagttg cgaatgaagc gacggtagaa tgtggcaaga
+   183361 ctatggaaac tacgaacctt actagtggag gtaggtactg gccattccac aatggcacta
+   183421 acctttcccg gatcagcctt tgactccttc agctgaaatg caaaatctca agaacaaaaa
+   183481 aaggatcccc cggaggcttc ttcaaattga tatagagttt ctcttgttgc aacactatca
+   183541 ttaattggcg aagatgttcc atatgttcaa cggcattttt acgaaagacc aatatctcat
+   183601 caaaagatac caaaacaaac cgaccatggg cgcaagacct gcgtcattaa ttgcataaac
+   183661 gtacttggag cgttgcttcg accaaacggc atcatcatcc actgacccct tgctcccaag
+   183721 ccaagtccag gttttttttc catctctata tgagccacgt aatgatttgc ataggatcaa
+   183781 aatccagaga cacactttct atagcattaa caaaccggaa cccggactca agtcattcgg
+   183841 tcatagtaat gatttttaac aaagaccaat tgttaggatt ctttgcctcc ctatcacatg
+   183901 aacatcttaa tgagatagaa atgaatagcc atcaagaaag ggactctcga agattcccac
+   183961 tggaaacgga ataagaagga aaagcagaag gtccgaaggg cttgatagat ctcattattg
+   184021 gatagattat cctagtggtt ctcacttatt ttatggtact attatcctag tggttctcac
+   184081 ttattttatg gtacttttga accatttttt ttgaatacat agtagaaaaa aagtcatgca
+   184141 aaagagcgta gtgtaagaga aagagcctgt caagtaggtc tactcaatct tctatcaggc
+   184201 agtttcaaag tcatagatct ttcaaagtca tagatttcca agtcatagat tctgctggca
+   184261 gtttcaaagt catagattct tctcgaagaa aataagtcat agttcgcttg ataatcagag
+   184321 ttcacttcat agttatagat cacactttct tctaaacaga cttctatcaa tcctttggag
+   184381 aatcctaatc cacccgtctt ccccagtccc tgagcccagc ttgtagcaag ccaactgttt
+   184441 gactttgcta agagaagaga gagaagggaa agacagacgg cagaaagcct tagttgctat
+   184501 ggagtttgat tgcttcccct gggcctctgg cgtaaaagga ttcgcacctt tgctagcccg
+   184561 gtcccaatga accgagcggg gcgcacactg cagcttcctg cattcgtact tctgactagc
+   184621 tcccatccta ttcttttagt cagtctagct agtatcggat tcggaaaatg aaagtgagat
+   184681 ccttgtactg tggtactttg cctattcact catctccttt agtcgcaagg aggcaaccca
+   184741 aaacagcaga gccaaaccaa accctaagtc aagcacagtc acatcagcac agcactcgag
+   184801 accagtctag tacaggcgca gacattagag ttatagatcc acagcaaaag cctactttga
+   184861 aaccttggag tacttattcc aggttattta aaaggccaaa gtcgggtctt tctttcacct
+   184921 tgaataacta tcgataaata aagggctttg agaattaacc tcaccctttt tctaaggcga
+   184981 agtgggaaat gtaagataag gattccggaa gcattggaaa agcagcgaga cgggcctaca
+   185041 gtaaaaaaag gtaaactgaa gctacatcaa ctatcgaagc tgttgagtcg gcagaagaag
+   185101 tgaaatcatc tgcagtagcc aattaaatta ggaaccatca gactgatcaa tccctagtta
+   185161 aagctatggt tctttgtagg ctcttccccg gctagtcctc aagccatgcc agtcataata
+   185221 gaatctcttc cagccaaggc agttaaaaaa ggactagagt aaagggaggg gcacacaagg
+   185281 ctcgttccgt acttgactta cgagctcgcc tctatctcta acaaaggaaa aggattcaat
+   185341 ccagtcccaa gcgtacccgg ggctaacctg ttttaccgta tccttagagg tctgcccgtc
+   185401 ggtacctcat ttaattaagg cggcccttca accgcttttg gggtcttttt cgacttgtaa
+   185461 ttgcaaaagc tcatcgggag tacgtatcct ataaaagctg ttacaatcat taaaaagaag
+   185521 gtaaaccaca atgagaaacc tacttgtact agtaagttga ttaccatttg aagcaaaaag
+   185581 tgcattaaga gtcacgatta tgctcagggc gactgctcca gtcagtactg gtatggtctt
+   185641 agtggctctt gactatttgc atctttctat gagttggcta ctagagtaat caaagagtga
+   185701 cggattcagg aattacttgc tagagattgg acaagtgagg attatcggga gggtcgcaga
+   185761 tttgctcgcg aacaactatt cggatgggga acttcggagt gaaaattctt ctttacccac
+   185821 cccaactgcg atgcaggatc ttcttgtcgc ggacaaatgt ggaatagcat ggcctagatt
+   185881 gtgtgtgtat taggtagttc tctctctttt ataagaaaga actattatac aacatcccca
+   185941 ggcttgagtt ttcttctcaa ctctttctat ccaatagttt gctgctgctt tctttgattt
+   186001 accccacaag gaagaatcag gcttatccta ttatctttat gccaactgta tttaaatcta
+   186061 tcatcacatt atggttacac cacaccattc gaaagacgaa catcaagatc acatttcagg
+   186121 gactacaaga ttttgagatt ggcagcatta agtagaaagc attattgtag aagggccccc
+   186181 catccaatag atcaggggtc ttgccctata acctagtgaa tagggaagaa cagatcaaat
+   186241 atgtctagtt aaaatgtctt tcctttgcgg cggagtgaac taagcaccaa tcggaatgcc
+   186301 tagcgtgctt gcccctcttc cctctgcttc gacttaaaat gtgatctata cgctcttcga
+   186361 tatttttaag gagatctttt ggtcgtatcg tagaaaaaga aagatcaagg cggagtgaga
+   186421 tgtccctgtc ctgtaactat caatcacttg aataatcgaa gagagatggg atcctagggc
+   186481 agatgaaaga gatgcccgtt tcatgatcta ggtgccgttg attgattcta cttcttttcc
+   186541 ttctttctcg agatgttgtt ggtcttcaat ctactgatca taggtagaag agagagaaaa
+   186601 ttcatcttat ataaggataa gttcgttctg ttgcacttct tcatgtcttg gttcactgtc
+   186661 catctgaaga taggaagtga ggggcgcacg gaaggaggta gcttcttttt cggtcttgtt
+   186721 tttcaacaag cggaataacc caatttgtat ctcggaaatg aatcccctat tgaattgaga
+   186781 tctttagtac agaaatcatt ttccattctt tttgtccttg gccgcattag caaaacattc
+   186841 ttatctgact ttggatccag accttcgaac cctggaacga tggaagctat cgaaaaacta
+   186901 acttcactcc atcttcccac ccattctgtc gatcaaatgg ttataatgac ccaaccctgg
+   186961 aatgaattag atgcataaag tggtagagat gtttgactga gatggatgtg tcacttgaag
+   187021 cttagagttc catgtgtcag tcaagcgaga gggctagatc aaggtggcaa gcggaaggaa
+   187081 gagggcatag agtcaccgac taaagcaagc caggaaaggt aattgcttac agacagacca
+   187141 gatttttgaa tagcagctta ctctcaaaca ccgtattccg ccaaaaccat ttactacgca
+   187201 aacaagaccg gcaactggtt gagctgatag gaccacagct gagattgact caagagcgtc
+   187261 tgtggctaaa agactagcag catattcttt cttcaattgc gacaggagct cattctttcg
+   187321 cagcttattc gtcgtttggg caaatggatg tgggatggag cttgaactaa gggacccggg
+   187381 gcgtagcctt gattcaaagt gtctaggtac caagagtaaa ggaaggaggg ctaactaata
+   187441 taataggggt aggggggcgc taacgagcca gaaagggccc ctttattagt aaggttgctt
+   187501 gcttgtcaca caggtcgtct ttggctcgtc tccttccgcg atacgcacct ggtagtatcc
+   187561 cgcccggcca tagatccaac ttcgagaagt atctcgcgcg ggcgaagaag tctgcaataa
+   187621 aagtggatac ttgtttttga tagttaggtt accttgttga gcgcccgata atcaatgcac
+   187681 aaggctcccg ctttttctgg aaaggggctc cccaaccttt tcccagcagg caatcaatat
+   187741 agggaaaaac tgccttggag gctggaatca aaataaggcc tccatgagtt cctgaaattt
+   187801 attcctgagt tcaaccaatc cccttaacta atagatgcta gttggggggc catccgctta
+   187861 acccatagcc ccagtctaaa tagttggggg tttggctcca gctccaactc gatcttgtgg
+   187921 tagactgccc tactagggcg acttggcaac taattcgtgg ggcatgatat cccaaaattc
+   187981 ctcaagtact tgctgcactt cctgagaaag aagtagtctt ggtattggat ccgctcttct
+   188041 gagagcatga acatcaatta gattcaattc ctcttcttta ttcttaagag aaaacccccc
+   188101 cccgaaccat tcttaagacc accaagctct ctcgaatgaa ttctactctt tctgctttcg
+   188161 agatacaaga gtaggcaaat tgcttcccca ctaaggtgta aaactttgtg actagatcgt
+   188221 cctgaaaacg gatgcgacgg gaagaagtgg catttgtaag tgttgctgac ttaacattta
+   188281 ggtttcggca taacaaagct tgcactgtct tttcgcactt aggcgcacag cgtgccattg
+   188341 gtaatgattc tactacggtg ccacaaattc gcaagctgat cctaagtaat cgttgttgtt
+   188401 cattggattc cggaggaact acttcgttag gattggggaa ttgctgcgat tcttcctgaa
+   188461 tagcctggag gctcccgtcg agagtcactt tgaaagtcgt acctaaactc ttacgactga
+   188521 atatgataaa gcctataaaa caaagagcta ctatcatttc ttcattatag attgagatct
+   188581 tcttcgaact taatgcacaa atagatagaa tagcagcaaa taacatcttt ctaccatcca
+   188641 tattcgtaat actcaatctc atttagaaag cttatcccca tttttttttc agcaactgaa
+   188701 cgggaagtgg cttaggaaag gactaatcgg atgcgctcgc ccagcgagaa aggccctgac
+   188761 cctgccgacc aagaaaaaaa gagaacgaaa ggagaagaga acttcgtatt ttccttattt
+   188821 gagaagaggt acaaagtgac tcgactgaaa gaagaggtcc gaaggaacaa aggagctcga
+   188881 ctgtaaggtc caccaggtct cgacgagcga ctgaaaggag aggaatggtt aaccttgaat
+   188941 gctattaata aattctacag caatagtacc tcggactcgg aaagttataa cgaaaatggc
+   189001 taacccaata gcggattccg cagctgccac cgttggaacc aatgaagcaa atacttgacc
+   189061 catcatatca tccgaagaaa cggaaaatac caaaaagttc gaattcacag ctaataacat
+   189121 tgattcaatt ggcattgaca taataggaat atttcgtcta ttaaggagga ttccccgaat
+   189181 acctaaaata gaaataatca tagaaaatgt gaaatatttg ataagatcca tttcgggaac
+   189241 gtggaatcta agataaattc aaatgttatt agatgacaag ctcgaccgaa cacctgaagt
+   189301 ccttgatatt taggttaggg agacacgcgc gcattcctga caacaggcac gggcgataat
+   189361 ccaggcaagc ttccccgcaa gcctcacaac agacacgtcc cagctgtccg accgtaggcc
+   189421 ccgctgctgc ggcaaggcgc ggcagccctc tgaccctgcg aaaaagagct tcccgtcggt
+   189481 tcaggcacca tttttttgaa taggtgggac ttcggtgata ggttgttttt gatcccatga
+   189541 gggatctagt gatttccccc gttcccccca agaggagatt ggttaggaac gggatgggca
+   189601 acttcagccg gtgctggcgc ctgcgtaagc ggctcaagag ccgctaaaga gaaccaatag
+   189661 acacgacagc tgggccagca ccagaagtgg atttactaaa tcccccttat ctatttctgg
+   189721 gtaaataagt tggtgactaa caacaaagac aaaaaaactt gctaccaaga agacgatgga
+   189781 taagagaaaa attagggtat tttctccttc cggtttgact ttagaaaaaa aaaaggaata
+   189841 atttacaccg actaagatca aagaaactag cagactaatc actaaaaagg cctttggttc
+   189901 aaattgttac atctatctta acttactcaa ttatctcgtg aactatctgc ttcaaaaaga
+   189961 gagttggttg aatgagactg aaaccttctt tcttcgagga gggcaccact tcgactgatg
+   190021 cgggcgcggt tccctcgtcg tgaaataagt aatgaaaggg aagacccgcc cctggtcaac
+   190081 tttattgttg cctgtatagc tttttagaac agagatctag caacgcattc taggcacaga
+   190141 tcattggtat tggatgtatc gttaggttgt gccgagtcaa atctgctccg tttttttttt
+   190201 tcgttgcaga aaagagatcc cttccctccg ctttctaaaa tgaggaatat gaagtccgtg
+   190261 gggctgcagg tactatggat cctctgactc ccaatcgggt tgccttccct ggtctcgccg
+   190321 gtcttacctg taggtcgtga tgtgccttga gatggccgtc tcctgtcccc ctgccggtgg
+   190381 ggactctgct cccgggttgt cgctccgctg cgtctggccc gtcctctagg cacctttgga
+   190441 aggcagggag tgtgacaacg tacacgcttc cctttactcc atagggcctt ctttctcatc
+   190501 agttgcaggg tttggtggcc cgaaagaaac ttggcttcgc caaaaggcct catctctgga
+   190561 agcccggctc gcgaggcgtc cctcccgctg tagggttcca aaactcgcct cgctcttggg
+   190621 acatgctatc tttcccctct ttattctcaa gtaaagggtg agtcccatgc gtcagtttgt
+   190681 ggatcgcggt ctcacgcact aatccctaca ggcgggtgcc cgtagtaggc cggccgccct
+   190741 acctaaacca atcatcatat cggtccctaa gccccattgc tggaaaggct cggcttcaaa
+   190801 accgtacgtg gggcttccgc ctcatacggc tcctctaagg atggaggtag gcccagccca
+   190861 ggcttgcgcg gttaaggttg ttgtacacct gccaatcaag taaaaaaaaa gaagagaacg
+   190921 aaagctttag ttttgggggg gttggttatt ccgtgtatgg atgatcacct tgagccacga
+   190981 atttagtaat gcaaggcggg cttctttctc ccttcccgcc gctaaatcct ccgaaaacgt
+   191041 ctccctccat tcggctatct gcctagcccg gaggcatcct tttcttgaaa gagtgcccac
+   191101 ttctgatcta gctcgcgcac ctccggatcc aggcgcgcgg ctacttccag acgacgggcg
+   191161 ggatcctccg cgttatcgaa gttccgcgtc aacctctgga cccgcttaat atctttctcc
+   191221 aattcgagat cccgcgctcg cacagaggcg gaactctcta tttctccgta taacaacgaa
+   191281 gaggggggga gcgaatcatt aagatctatc gggttaccat tcctatcagt agctaggccc
+   191341 tggtcctgat aggggcccac tctatgagat gccccagctt cctggggcac ttgatttacc
+   191401 gagggttccc cctcttccgt ttggggcgcc tccgccacct gggagtccat gtccgttttg
+   191461 gaggaggaga atgattctcc gagaacgtcc agttcaaagg aatcctcctc ccacgtggcg
+   191521 gagctctcag ccccgttagg gcccagaagt gctcccgctc cgactgcata cataagaatg
+   191581 ggatctaaca ggggcaaagc ctgaccaccc agtagataga cgactttgat ccggatcaaa
+   191641 gatagcacga accccactaa gaagagatag agaatcctct tcagcggtcc tttcttacga
+   191701 aagtaaaaaa gtccaaatat aaataccact agcgcacaga atgaaacgca cgtagtgatc
+   191761 attatagcgc ttccttctga tcctagaaaa cgtccgaata aagaacctga tacggaacca
+   191821 ccgaggaggg gcaaaaatag tcgaagcata tcaatattaa taaccatttt atattttttc
+   191881 atttttgata ttatttgaga gtatttttga ataaataatt tacggcgagc tcttccgagg
+   191941 aatgcttaca atagaagtgg ttgtggattc gaaccactgc aagggtttac agtcctttgc
+   192001 tctaaccaac tgagctgaac cactttgttc tcaactgaat ctctttctct tatttcattc
+   192061 cactttgttt cgtttagtga tagttagaga gaaagatcac tccacaaagc agccttctga
+   192121 ttatatacgt attattctat caatcggaaa gtagagtctg gaatgagatg aatcttctta
+   192181 tatacgtatt tggataagca atcgataagc ctggtttgtc tgtctcttct tttggaacga
+   192241 gatttaactc tttcttcttt gggaactaaa agttgtttag ctttcattct tccttacggt
+   192301 accctttcag tccaactgag caaattagtc aaactttgta ctcgaaatgc tacttccctg
+   192361 agttttgttt caaatacttt cttacttcac tgggaatatg ctgttattga actgtgttct
+   192421 ttccttattc ccctggtttc accggttggc ctgtgcccgt actactcgta agctaaccac
+   192481 taacctaatc cgcatctatc atccggaaca tcttctgttg ttttctgaaa cttttttgta
+   192541 ttagcttgtt tcgttatcaa agggttcgtt tgtgtaaggc gtagttcacc ccctatagaa
+   192601 gtgagctggt taatcacagc ttacttcttg agtgatcccg gaggcagggc taaggcagag
+   192661 ggaaggtgta gttcgcttac ttctacctgg gagagaaagt cccctccttg agaagcggaa
+   192721 gaaggggaag gaagactaat gggatggtta aggcatagcc gaggccatta gtctgagtga
+   192781 gcggctcctg ttgctttgca gctgcggtat aactcctatc cctgtttgtt cgatgggaat
+   192841 aggtattact aagccagcta gtttcctacg gcgagctcga gcagaaggtc tgcaagccta
+   192901 acaatttacg tacctcggga gggaaatcag actcagtgga atctatttat ctaatcttac
+   192961 ttgaattgga gtagcaagcc cttcaactgg cacaacacaa gcaaaaggaa agaggcaagt
+   193021 ctgctatctt cgcctcgttg tctgccttga cttagtcttt ctttccgaac agaaatccgg
+   193081 gtttcatcgg aaagatcaca aaaattaatc cactgaggga cttttacgag aaggcttttt
+   193141 ttcttagcgt ttaatggcat atttatgagc cggttgtccc aactactata cgaacagtcc
+   193201 tctccattgc tctaggccaa ggctggcaaa tcaaacaact ttatgttaat aacaccttta
+   193261 acttatgaca ttttaatgaa tgaggtgcgg gcaacaggcg ggaataacaa gactctatgg
+   193321 cttaattcct tccccaccaa aacccttact cgggcctcga cagggtgaga gtagttggaa
+   193381 atgagcgtct cctttgtttg cgaggaggtt gcgttgcttt cgcgtgaaga ggacctgcag
+   193441 acggcataat ttgcttatgt taataaggat atgaggtggt gggatgccga atggtagtct
+   193501 tattctgtgg agtcgatcta agtcgagcta gccctagtat ggacggtttt gagtgccggg
+   193561 acctttacca attctgatct acgatagggg tattccagtg gtatgttaat ttgattttcc
+   193621 tccttcacac aggccttcca atgtagattc ttccaaagat atactgacta aaaggtattt
+   193681 atggattcgg attggaggtc ttgggaatga aagtcttata gaggagactg gcttcgtaag
+   193741 taaagtaaat taattttaga tagatagctc ggagaagctg gagaggcacg gagtcgagaa
+   193801 caacatctgt tctgtatact agtcgaccag agagaagccg acttatccga caaggctaaa
+   193861 aaagaatggt ttattgacca gtttccacgg actttcgata accggcttgt ggaggtgccc
+   193921 attatgtcgt tcgttatgat gcttcttgtg attgaggtgt ccggtgggtg gacgttccct
+   193981 tatgttcacc tctcgccctt cgcttaaaaa caaggatcga atggacgtgc tatttttgaa
+   194041 ctcttccagt gaaaaaaagc gatctatgca tttatcatag gcaaaaaaaa taaggtattg
+   194101 aataagctca gttctactca tatctatgtc aatttgatga aagcaattcc tacctttggt
+   194161 agctctcctt aagctcggaa gtcacggaac tctcttccta aactgaaact caaataggaa
+   194221 agtacactga ctgagttcag tcgtgccggt ctttcttatt ctgattttca gagagtgaag
+   194281 tagttctggg aaagagaaat agacttactt tccagtaaga taagagccaa ggaagagaag
+   194341 gtaacgagag ctagagcttg aatcaaacag gtcctaggag tcaagtcatg agcgtacgag
+   194401 tctagagtga atgtcttatt tgtttgaatg gtcctgttat tgctaggaag gtatggaaca
+   194461 cgggtaagct agccaacaca gggcaataca atagtacaag agagggagta agggatgggt
+   194521 caagtaagta aagcacgcat aggaataaag agagcttccc ctgccttcct cggcacacgc
+   194581 atataagata taaaggtacc ccgtgggcac acacacgcga gagttggaaa tgcatgattg
+   194641 gccgattcga taatatccgc ataatagagg gaactcaaga ttggatactc tatgtctttc
+   194701 tttccttctg agtatttgga gatatctgaa ctggatactc tatgtggaat ggacactcta
+   194761 tgcctttctt gactaatagg aatttctacg tgttaaacac tcaactggtt tcttccctgg
+   194821 attgttagct ggaatggctc aattcgaata tttacgtgtt aaacactcaa ctgtaatggc
+   194881 tactctatgt cttcctccta cttaactcag tggttagagt attgctttca tagggcacta
+   194941 gtgattggtt cgactccaat ccaatagtag gtaactgctt tccttctgtt gcttgtctga
+   195001 gactttccgc gtatatgata gaacgataga ttgatactct atgtcccttt ttgatcccta
+   195061 actaggaata gatgggtgac tgttaaccac gagactgagc tctttcacct tgttattcat
+   195121 agataggttt gttctgcttg cctggcttgc cttgattcct ttccttgctt gtattttatt
+   195181 gattggcttg gaataattgt ttgataactc ctccttctat cactggctta tatctaggga
+   195241 acaccggtag gcaaatacag gtgagtgacc acactccctg gcttaatgga cctgcttgta
+   195301 gtaaagagag aagtcttggg tcaaggaagt aaagaaagaa caggccaagc aagtaaggaa
+   195361 ccgggaaagc gagccaagca agccctcgag tattggagtt gggatgggcg cgtaagtgca
+   195421 catcaaatga gaggttgaat tccttccacg cgagagagct tttgaatcaa gctaagcaaa
+   195481 tcaagctaag caaatcaaag aaggggttcc ggtcaagcta attaagggag gcccaggttt
+   195541 tggttctcaa aagtcatgca cttaatagct tgcaaaagga aagcgagcct atatataagc
+   195601 aggtgatcct ttcttagaga gctcaagttc ctaaagctaa agagaaagcc aagcaagcca
+   195661 agccagatga tacgcgcatt tgaaccattg accgaaagga aatgcactcc caagcaatga
+   195721 gagaaggatt cgaactagac ttctcatata aagagaaaga tctacagcta caactccaaa
+   195781 tagactctta taggaaagga atcccagtag ctacttatct gataggctgg ctcatccaca
+   195841 gcccattcct actacttatc tattgatcgg aaactccttc acatgaggct tcttaggagc
+   195901 ccttctcttt cttccgttac tgtgctgcac gtattcgcag cagcggatgg gagatgaacc
+   195961 gagcaactaa ctagtaccgg ggaactccca ctccaagcct tgactatacc ccactcaaag
+   196021 ccttaactac aatcttatct tggaattcct tcctccgtta gcagaacata cgacttctct
+   196081 gtatccaaag caggggatta ccccgatcaa ctagtaccga ccccaggaac tcgattagca
+   196141 gaacatacgg cttctctatt agcagaacat acggcttctc tatatccaaa gcagcggatt
+   196201 gggaaaggag taaaccctag aaaccgaagc caatagccaa gacatgaata cacttttgtc
+   196261 gatccatctc acagacaaag aaagtaggct tatgtacggg acaccaagat cggtatgatg
+   196321 ccctggccct tacccattca cctggaccct accattcata cctaagcgga ataactagaa
+   196381 agactgccga ggctaactcc cggaatcgcc aacccgaatt cagctaacag ctgctaaccc
+   196441 gccaacctac catcaaccta ccaataagaa tgaagctaac tgctaaagcc atgcccttct
+   196501 ctaacagcta aagctaagtc ctcaagaaag gggaatccta tggttaccta ggcatccggc
+   196561 tacctatcag gacgagaagt tacgtagaaa gggaagagtt ctagagattc ctccgatacc
+   196621 ttttagctgc ttcacgcaga gactccaagg cgttctaacg gaagaggaaa gagttgttta
+   196681 tgccttccct tcgcccggaa agagtttatg ccctctgtct tagggttcat tcaacagccc
+   196741 catctgatcg ttcatatgat tcagcacact cttccgtcta acatagtttc cttgttgctc
+   196801 ctcctcgaga tgcccggatg gatagaagac ttatagattg aataactaga ttccgcggat
+   196861 gtaatgtagc tgatagtagg agtcaacgta ggaaggggaa gatcctcacc caagcacacg
+   196921 ctagccttta tacgttagca acatccgcta gcctttctac tggagattcg catctcaaca
+   196981 tccgaataca ctgaactcct ctatcaggca tctcattcac atcattcctc tatcctggca
+   197041 gctggatatt ccatcactcc tctggcatct caacatccga atccccgagt tgacttccct
+   197101 tctatcagcg gaatagcatc ccttcggaac agcagacctt ctgccatttt ctttagtcct
+   197161 ccattccttc tttagtttgg taggattagc agagccttct atcagtttgg taggaacagg
+   197221 atcccatcta tcaggagcat ctcttctctc agttgaacag catcccattg atcttccttg
+   197281 ccatagcgag agatgtatcg acggacacta tagagtgaac tcggcgagag gggtccctaa
+   197341 ctccaggaga agagttgtca atgggcttag attaggaaag tgttgtcaat ggcttagatg
+   197401 aatcaaagca ttagcttcct cttaccaagc aagatcatat agggagggaa tacccactat
+   197461 ctatgggcgt aggcaataag tagctccctg tttgtgttca ggtaaagccg accctgcttc
+   197521 ctctccgttc tcaaagcagc aatacactca agcactttac tcgattgaac aaggaaattg
+   197581 tattaggtga agaggactca gattccggaa caaggaaacg gcttatgtct tccctatctg
+   197641 cagaagcagc aggaatgggg ctctgactca ccgaatacct tcatcatcca caagcaccca
+   197701 actgagggaa ctgttcttca acatacgaca atcttccttc ccaacaatca agctagtcct
+   197761 ttcttccgta cttttgttgc aagattcgat tggccgcttc tccttaccct ctcatattct
+   197821 gtgtttcata tgcctctctc ctccgctttg cttccttagg gcctcgctgc tttgaagtgg
+   197881 aagatgaata gaccgaggct ccggctactt taccaggacc aggagtctct gttaaccttg
+   197941 cagacctttc ccacctatag ttcgttgact gaggaacttt cctacctata gacttcacag
+   198001 ctacgacttg cttgctgata ctgatgagga gcaagaggac caatccaaac ctatagactt
+   198061 cactgctacg gctgctttag ctctactatg ggtcgggagt cctgctgatc tgtccttcct
+   198121 cgtcttctcc atgtcaagac atactggact tctcaatcag taagagagaa agggctaacc
+   198181 actaagtagg agaattgttc gggagatccc gctgtctctg aacttgctta cctagttgac
+   198241 aaaggagggc agttgctatt gctaaaccaa aggagaacac taagccttac cactaagagg
+   198301 agcaggtata aggggggaag gctaagatag cagggttgaa ggatcgttga gacatctgtt
+   198361 tattcttttt ttacattgaa cagggatttg accgactcgc aggcagcgct cccgaagcga
+   198421 gaaaggggat tggctcaccg gccggcgctg cgtcaacaag gcccttgggc gacattccag
+   198481 gtctctcgaa tgcctattca gagggatacg ggacagagca actcgaagtg aagtaaagtg
+   198541 ttctagttac accagttgga ttattaaagt caaaaacact gattccagta aggagttcaa
+   198601 atttctattc tcaggcccgg ctatgtgatc agattttctt tggctattgg ggcagaggac
+   198661 cggatgtcgc cttctcttac ttgctcaata gagcgaacag gggcatgcta cttttttttt
+   198721 ggtcttgtct catttcagtc ctcaaaaggg gagaagaggc gcgtctcacg ccgctttcct
+   198781 cacttctgtt tgttagtaaa ggttcgatct aattgaaata ttctatgacg ggatccccgt
+   198841 ctcctctttc cttccgatga atggattgac agcttacagt gccttgtaga aaagcattgc
+   198901 ctcattgttc attcactgat ctacgactga tatagggcct acctggctca tagagactcc
+   198961 gcctggaaaa tagagaatag ggccacatac acctttcttt tccacaaaaa tctctcctta
+   199021 agcgttgagg ggacaattga ttcgtaggac cattaactga tattttcact tcttgtttag
+   199081 caaatggctg gtagatcttc attgttctta ccggaggaat cataaatagt cgcccagtca
+   199141 acttcctgtc aaatcaagac cgaaaagagt tctctatatg gaacgtaaac tcatcttacc
+   199201 gggcgaaagg gcacataact gactcttcct tctttagctg aagcgaaagc attggacagc
+   199261 aggtcttgcc tgccccaatc aatacagaga tctcgtccca cactacttca tctttccttt
+   199321 actcctttta ggggtcaggt ccgcttaaca tcactggtgt agaccgaagc ctatactcac
+   199381 tcctccccat cagaacgact ctcagcttca gccattcgag tcagaaggga gagagggggt
+   199441 cgaagcgcga tgcttctgat ctgccgattc cgaggtcagg gatgatgatg gctgtggtga
+   199501 ctaggcctag tggcagctaa tccaactgta attgatacaa accttgactc tcgctatgac
+   199561 aaggaagctt tcaatcaaca gaagcaacaa ctatgcctta aacggagaat ataactattg
+   199621 ctcacttaag accgatttca agcaatcatc caagcagaaa tctcttcctt tcgataagcc
+   199681 ttgcctggcc ctgctttacc tgccccctca aaccaaaaag gcagctaagc cataccccaa
+   199741 aaccgactat cagatatatg ggtagagtca gagtacgagc tcaatatgga cactaccctt
+   199801 aaagaaatag gcttctccgt cggagacaga atggctcaga cctggagaca aacaggcgaa
+   199861 acttttctga taatgggtgg gaatgaagga gcaggcgtgg taaactgaga tcaggctcgt
+   199921 tctcacaacc agaatgagaa ctcctctcga agttattact tctcaacaca gaagctctga
+   199981 tccgagcgag actcttcttt catttcgata caactgtaaa caagagtagt gccggtactc
+   200041 attgatgttt ttgtaattct ttcagcgggg gttgataatg ctaataccag aggagagtag
+   200101 ttcccatagg tgatatcagt gaccaaaccg gaccgggagg agtacgggcg ggcttccttt
+   200161 cgagcggggt ggcgagagtg gtcaataaat ctcactttct agactacggt tacggtctgg
+   200221 atagtaagaa aggaaaagaa aggtagtaga tcgaagttta ccagagagat cttccggaga
+   200281 cttggtagta gtagcgagcc ttcccagatc gagaagcgag aaatcttccc ccattacaga
+   200341 ggcgcagctt gcctctgtct ttcccccata taccttattt taagtatcag actctcgaat
+   200401 gcgactgcac ctatcccttc ttcggtgcct cttactccga acgagtcacc cttgaatatt
+   200461 tcaattggtg tgctagtcaa ccagtcgtca tcccggaatc ccactgagtt ggtaagcaag
+   200521 cctgggcgtg gtaatatgcc cattgagggc caagcagctg catctgattc cgactcttct
+   200581 ttctgagact caaccatcag gagctcgtgt ttcatgggaa gcagtcaaac tctaacgttt
+   200641 cgcctagcgg agccgatagt acatctttct tcctatcccg ataatcgtac atctttccgg
+   200701 taggagcaaa gcagaggaag gtgctttatc gagtagttct attgggagga aacgaaagga
+   200761 aaaggtaaga gacccgggaa aggcggggac atccattggt gcatactctg aacgagagac
+   200821 ggtaggttca tcagaggaag aaagatcggc acttaggttc tctgaccaag catctatata
+   200881 tctctcggtt tcggcatctc cctcctactc atcagtctcc agagtggtag aggaatcagc
+   200941 ctcatcagtg ttagagtcat ccccctcccg agttgtagtt gaacagatgg aatcaggaac
+   201001 atctgtgtgt ggagggccca agtgatacgg agctcctttc tagcgctagt ctttgaatcc
+   201061 cgtccttcgg ctaaggttga tacagaacag agaagaggaa ccaatacgaa atcaaagtag
+   201121 tgagccggag agtccctttt ccgcccccaa ctctcgattg cccgacctgt tcacataagt
+   201181 cattctcaag ggtaatggga aagagagata gaatacccaa taggcatagg gtagctcctc
+   201241 tcgtacaagt gttcaggatg tagcgataga agactaaaag gataggatat cgaggagatc
+   201301 atagaagagt gatcctcgca tcaaaataca gggcagaagg aaaagaagag acatccagaa
+   201361 actagggaca gaaacctgac atccatttcc attgaatagg agacagaagt gacacacatt
+   201421 caatccattg agaagccaaa gaaagtcctt caccctttac ccacccatct ctctcagcac
+   201481 agtgctgctt cttcccttgt tgcggaatac cacctctctc tccgaatctt tcatctccta
+   201541 cacttccgat acatcagaga cattcgcgcc tcgctcttac cgccactcaa ctcaggctta
+   201601 ttctcctact caaggtaaac ggcatctcca cccaactgcc aagccgatct ataagattcg
+   201661 cgtatagcct ctaccttccc cgagtgtacg cccgccaagc agcgcaaacg atccaattcg
+   201721 aataggccat gaaacatgcc tctttctgcc tctctagccg gattctgctc ttagccccat
+   201781 gtagatacct ggggacacta ctcttgcttc ttccctatcc gtgctccacc ctacgccagt
+   201841 tcctattcct actccggagc ctattcatac gcgatgtaga atggatccct gcaggtctga
+   201901 gtcatcatat cccttatttt ccgctagctt ctcctcccct gacagttgag actcttctga
+   201961 tagcaagact tcttctctcg ataaagcaac tttcattgcc tccggccaag actgcatccc
+   202021 tctcggcttc gctagatgct aaaaccaagg gaaggagttt gttatcttcc tgcagctact
+   202081 gttatatgcc tacgtaaaga tatgtgtccg gccccgcagg gggcttccca taggtgtgag
+   202141 cgcagggcgt taatgcagct actccgtcaa ccataggaaa cagtactctt atcccctatg
+   202201 tcttgtacct ttcagagctt cggctggtta ggtaaaagat gaataaaggt tagcttgctt
+   202261 gcttccgccc ctcactccgc ctcgtggcca cgttcatgac ttcatccctc cagctcaaac
+   202321 tacgacgata tggtgacctc tcccaccacg ggtctcttta tagcacctat tccgcacttc
+   202381 cggcgcagaa agaaagctcg aacccttcct tgcagaggag tattatcaag ctactgttac
+   202441 gtttgtgacg caaagacctg attcaactag accgttgaac aaagaatccc agctcgtgta
+   202501 aatatggcac aaagaaaaga cgacacctgc cccttcctcg ttctagcctt cctcgttctc
+   202561 agtcaacgaa tcggaatcgg gatgttgcca gtgcccaagc gaactttatt taagaaggaa
+   202621 atgagcgtag attgatctct gatgctagaa atgatttatt ctttcaagct agtctctgat
+   202681 cgtaaagaga tgaaattcca cctgctaatg ggatgttagg ataggtctat ctctttccct
+   202741 tctatcagtt tggtgagaat agcatccctt cttattaagc acacaaagca gggatgcaca
+   202801 tatatagtaa gttgatcacc ctgagtagtt tccttgccat agcgagagcg ttggactagc
+   202861 tgaacaggta tccgaactta aataaaaccc tgactctcca ccttcttcgt atgggaccga
+   202921 gtggtagatc aaattggaag tagaaatgtg aacgaaggtg aagattaagg cttggttgct
+   202981 attgggatgg gaactaggtg gaaccggccg atcgatggca atcactggtt cagtagcctg
+   203041 cagtcagtcg agcttacagg tgtcacatat agcagacttc tttctccgga ttggggcaag
+   203101 tgtagagggt cacggcctag ttactaattc ctttcctctt ccaacgctct ctttcttctg
+   203161 gacgaggtta ccctatctcg acttacttat gctatcggga atggtgttgc acccatagta
+   203221 gttgttctct tctcatcggt acctctcatc catctatttc ataagagaat aaaagttgag
+   203281 ccagttgaaa gactagtcga gaagccctat tcagaaaggt agatgagagc ttgcggagaa
+   203341 ggcaaagttc tcatttgtcc aatccaccga atactatcct ttccttcttt taccaatttt
+   203401 atcgtcgagt tgctgatacg tttgttgaac agcaacagcc tacatagccc gtctccctct
+   203461 tttcaaggaa gaaagcatca gacaaatctg tagttctagg tccttttgtc cagttcctga
+   203521 aagcaccatc gtggtttctt cactcaggtt tgctaagtag tctgggagcc cgactgtcat
+   203581 tttaaattta tcgataacga tttagctgct cgcgcaacca ggacctttgg acctccctga
+   203641 gtcgatctcg ctggcgaaag atacgaaaat ggcgctggag gcttctgacc tcccagtcca
+   203701 gtatatctat caacgcctga cgacgccaga tattaggctc agtctggagg tccctagtta
+   203761 agttttgtac ttgatttaga tgagctaaca ttacgtcatt ttgcgccttc gtagactggg
+   203821 ggccctcgac ctctgcgtac aataaatgta ctggaggaag ctcgttgaga tcgagagcag
+   203881 cacgattgac tcctagggtg ggctgctgct cttggggcgg ctggttgaga tcgagaccag
+   203941 tcgcgccgcc atcaggccca acgccaaaat gaaagttctc atttcccaat cccaacctca
+   204001 gcccccaaac aaaatgtatt aatcgcagta gaaacagaac aggccccaat gttggggcta
+   204061 cgaaatatat aaaaaagtat aaacacccta acagtgcctc ggcactgaac cgacttgaat
+   204121 ctgaactacg attttttcca agtcttacca aaatcggatt tccttttcgt gccatatttt
+   204181 ttgactttat ggatttctgt cccttttttc ttcccggtac aatatttgtt ctcgaaagtc
+   204241 tgagtttccg tgtactctcc aaatttatga ccaaccttcc cctcagtgat cttagaacgc
+   204301 tactacgcat ctttactata tagtgtggta aggtaggttt gggtatagca ggacttgaac
+   204361 ctgcgaccat taggttaaaa gcccaatgct ctaccaactg agctatacac ccaaataaag
+   204421 atgtagtagt caattaagat tggtgcggaa aagagaagag gtctcaacaa gtattaacta
+   204481 gttgatgctg atcgaaccct cagttcgaag ctcttcgcca acatgttacg aggctccagt
+   204541 cttaggcggg tcaccattac ttgacttagc ttagaaaggc gaacatctgg gcaagggaaa
+   204601 gcgcagtgcg cctggtcctt tcgaaccagt ctttataaac ctggtgcaaa tggctttcga
+   204661 tagttgcatc gtaaacgggc atccaataga ttctactcat aaagagatag atcagtagtt
+   204721 taggaatgta tgcttgcttt atttttttct ttctttcctt tcatcggaaa atcaacaacc
+   204781 taattcaact cacttctttt accgggcgat ggaagactag ggatatatca tttaatcagg
+   204841 aagggcatca ccagctacag tagttgcatt ctcaccagcc ttcatagcaa gatttcataa
+   204901 agagtattta cttaggaggg ctatagataa gattctaagg tagggttcta tataatctat
+   204961 ttatttcctg ttgaggatag tttaataatc tcatagcgca tcgacgtaga tcgcccagtt
+   205021 ggttctaggt ggggtctcga gtaagggtag ccgcactctc cttcttttat aacaaaaagc
+   205081 ggaggacctt ccctcttagc tcttaaggaa ttctgaaacc tatacacgtt gttaacgagc
+   205141 taacctaact aatatagata gaaaaacctg agcgaagtag ctaagggact aggtcggtta
+   205201 tttcagttcc gaggaaagag aggactagag ctaattctta taagctaatg tgcgatacgc
+   205261 gttacacgtg ttaagcatat acgcgtgtaa catgttcata tgtatgttca ttcacatgtt
+   205321 cacatcagtg gtagcttgaa caaacaacaa tttatccctt ctcacatagc atgtctcatt
+   205381 catattcaaa gcagactgtc ttcatgctca atcagtagca agtttacttc atccccgggg
+   205441 accgtccagt agaggagcgg aaacgactct gttatcttac attcctcatt cgcattagag
+   205501 cattagagta tgaatataag caagctttcc ttattccgaa atctaaaaaa aaaagagctc
+   205561 tagaagttca gtcggtgttc tctcatctct taattaagat atttcttaat cgagaatcgt
+   205621 gagactcccc aacctagtaa aggggcttga atcggtattt tgtttaggga gaaaccctac
+   205681 taaggaaagg atgagatata gcattaactc cacttgttgg tattcgttgg gacgcatgct
+   205741 tggaacggcc tctaaggggt tgttgtataa caagtgcata attgtccact agaacgtccg
+   205801 tgtattttat catcaacctg atgaatgaat ttcaagatat agggctttcc tattatcaca
+   205861 ggctgttcaa aaggatctcc tgttcttcca tcaaaaaatc tgctttttcc tggatactcg
+   205921 ggttcaaata cccatggatt cgctgtttgc ttactggctt catataatta agaaaacact
+   205981 agttttctcg aagcctcttg ttcatatctc tcatcaaaag gggctattcg ataatgtcta
+   206041 tctagcagac ttcccgctaa cccaagcgag cattcaaata tctgtcctac attcattcgt
+   206101 gagggtactc ctaatgggta tgggttgaag accatatcca cgggtcttcc cgcttaggaa
+   206161 gaattgcgag ccggagtttt gttttagtcc atacaatgcc tttctgagtc tactaactag
+   206221 atgtgaagaa actgatcacc ctggcgaagg tgtaaactaa acccgactct aaatctagaa
+   206281 tcgttttcca ttcataaaag cattccccac atctcactgg aactgagtat caagggctac
+   206341 agtaagaaag acaagagaaa ggtaagatgc cgtggtgatc taagccgtca acgagcaatt
+   206401 tgtctcagtt gaatccaaag aaagagggcg aagaaactca ttaattcatt gatgagggct
+   206461 aggccgagag aaaataaata tgaaagggaa ggagatattc aggtgggtcg aatccataag
+   206521 gtcatgatcg agttaagctt gcgcttacgt tttacagcat tgagaacttc gaatcatcct
+   206581 ccacaaccct aacctcacaa taaaatcttt taaaattatc tttttagctg tgatttgaat
+   206641 gtctaaagtt tgtcttcaac ctccctggtt tcgatatggg ttcaaaactc ccggttaaga
+   206701 tcaaaagtag gagctaattc gtctatcaca ggtagtagct cacccgcctc tctttctcaa
+   206761 gtgagacaaa ggcattcaag atggactgct aaaagacata tcttaccttt actttacatc
+   206821 gcatttattc tttcagaacc ttactatcta ttgagcttga actactaaat ctctattatt
+   206881 acataccagg gtttactaca gtttcactgc cctgaaggga gctagccctg cttcttcttc
+   206941 agtgtttgct aactgcttct gtgcttaatc ccgcatgata agaggaggtt tctgaaacgg
+   207001 acacggactc aaaactccct gagtctatac ctggtgaact ccctggggct gagaagaaag
+   207061 cttataattc gattggaaca tttacttcga gggcactcat aactaaagta ctggggagcg
+   207121 gtttgttgcc cttgtaggta gtccttgttg gagagtcata gcttatctca attgagagtc
+   207181 ctgcgctgcg tcttatctag accgctgcca gccggattga aattccagtt gttggaagag
+   207241 ctactgcaac taaagcaaca gatactagaa ctattgaatt cacagctact tctgcaacta
+   207301 actactgaga ctcttacttg aatcatttat ggaactctcg tcggagcctg ttctttcttc
+   207361 tacgaagcct cctgtaaagg catcatttcc atccgtctca gtaaaggctt cagttgttga
+   207421 attacctgga gtttcctcaa cacccactac tagagaaagt gaagatggtt catttacaga
+   207481 ccctgtcact ctttcttatc ctgtagtttc atatggaatg gtttcagctt cctctactcc
+   207541 tactaggtct acttagcctt ttattcctag gtggcttgct ttcctggggc tgctagtctt
+   207601 tcctatccct atggtgtaaa gaatgcttac tatcctattt attaggtaac tccccttctc
+   207661 ctctcctttc ttagctctag gcttcagctt tccttcatca cagtgaatgt aagcaacaca
+   207721 cccaaatctc ctgagatatg aataagtagg gactgattga aaccatacct catctggaac
+   207781 atggaaattg attgctgtgg aagggtattt gttgatgatg tgcacagctg tgtttgcagc
+   207841 atctgccctg aaagtcttgg gaagtccaca ctcacaaagc atacttctga ccttttcaat
+   207901 gattgtgcgg ttcatccttt cagctacccc gttttgttga ggggtgtatg cacatgtctt
+   207961 acaatcccat ttctccgatt tacagatctt ccattaaggg atctaacatt aagggatcta
+   208021 acattgattt accattcctc ctatgaagtg aacaactaac caaaccgctc cgccccttgg
+   208081 cttacttggc ttcccttcgt ttcctaccat tcccaactaa cctactttcc tagttctttg
+   208141 attctgatat tcctattcct gggattacta gttctcacat tcctcttttc tttctggttt
+   208201 cccggatttc tcttcttcct actttctttg ctctcctggc tttctggatg tgagggttag
+   208261 cttaaaaaga actatctcta gctctcccag ctaagatgta cgatgtctga taccgattca
+   208321 agctcactca gagatatatg atttatgtgt tctatgggtt tgaaaacagg gatggtaggg
+   208381 gtttcttcca ttgaaaagaa agaaagctta tttaggacct ctgagaccgt ttagctctta
+   208441 ttcctctttg gaagttactt tgtgatggga ttgggtcttc tgtgagggat cattctatta
+   208501 aacgaataaa gtgaacgttt ccggacgttt tctggttggg agagcgtaca aagcaaggac
+   208561 tcggaggtcg gtgtagttag gatagtaata taacctatta cctattggta acctattgct
+   208621 gtacgtatcc cctataagat cttgtccagc atctttgtct agctaatctt agggactgta
+   208681 agcaaaggat gtcactatag ctaaccttcc caagtaacag tacctaattt gaccaatttt
+   208741 tattttataa gaaaacaagg aaagtcaact accctattcc caaaccatcg gtacaggaaa
+   208801 acaaggaaag tcaactaccc tattcccaat tcccaagcca accaaaccag ggaagctagg
+   208861 tccaaccatt acccttttcg agcagactca gaaaaagaac cattagttgg tcgctggtct
+   208921 tatggtaatg gtgactatct gcttcgtttc caatgcgtag tagttctcaa gtgtaggagg
+   208981 tgcttttagg ttttaccgtg ttttgagctt gccggtgttt tcactctttg aaatcataac
+   209041 aaaaagtgaa gagttgcgtt ttaccgggaa ttttggttgt aaactcaatt catacttaga
+   209101 aagccgatca aaaagagaca agcgacgcaa accccggtat tttggctatc gatgaatttg
+   209161 actcttagga atcatggaaa aatgatcaaa tttgctaggt tttggccatg gtggaacgca
+   209221 acatttctct ttccgatggg tccccaaaga gtgaaacagg gtttctaggt tttcccctag
+   209281 ggggagatgg aagattctac tttttcatgt catccaaaga gtcaaagtgc cttgctaggt
+   209341 tttgtccatg gtggagtgta aactttttgt tttctcatgt ggtcttacga agtcaaaagg
+   209401 gtttgcatgt ttttgccact atatgagatg gaagttcttc attccaacag aggtcttacg
+   209461 aagtaaaaca aggtttcatc aattcgtcgc tatatgagat ggaagatttt ggtttcacca
+   209521 cggccctacg aagtgaatct gtctatattg tcttccgctt catctcttca tcttccttat
+   209581 cttataatac actgtctagc gcctttattc gattagactg gattaagact cctcgtcacc
+   209641 gggcagcttc ccgagccctc ggaatccagt ccaatggtca gatctgcaca gcctgtccac
+   209701 atctctctgc tctcttcgga ttcctttctg ttgcgtcttg ctttccctct aacaaagatg
+   209761 acggactaca caccaggatc tattttgact tagaatttca gccggatagc gccgtgtttt
+   209821 agggttacga caagctgacc tctgccctaa tcaactgctt tatagccggt catagttata
+   209881 ccgggaaaga aactccaact ggaaatcaaa atggggggac ttacacttca actagatagg
+   209941 catctctttt aggaaagaaa atatctgata ataacctgcc tcaaaggata tgtcgtactc
+   210001 tccaatcccc tcactcagga aactatagcc acagataggc ttcattgcca tatcccgagg
+   210061 tagccttcca acaagaattg agctagatgc gcttggaaaa cagtgaacta ccactccaac
+   210121 tggcttgtcg ggccaggata aggtattata ttcaggcagg gcgtcttaca aaagagatat
+   210181 ataaaaaagg gctggccgta cttctgtttc gacgaatgga atcttagtta agagaacaag
+   210241 accgtagaca aagccttcca acataagtta ttcaaacctt tttcgagttg accatcaaga
+   210301 gcaaagcgtg gctttttgcc ccggggcact agggcctgat catctttatg gctataaatc
+   210361 tgccttcaca gtgatcagct ttttgatccg agtggaaaga ggagtcttcc cagatgtgat
+   210421 tagagaataa cccaaaagag gcatgtcggc atcagctgag gaaagggttg gctaggggtt
+   210481 agcaatcaag aaagcaatca aagacgatga aagaatcaat tagtactgcg ccctgatgca
+   210541 agaacggaga gtttctggtc tctaaaagag tcagacgacg actaataaga agccgataga
+   210601 agagctacgc cccttcttct ctgttctaga tagaatagga atcccagagc ctggcgaaga
+   210661 tgaataagag gaattttact agtagtgtta tggaagtttc agagcgagag aaggttccat
+   210721 atctgttttt tttctctaag ggatgagaga gtgagggagg tcagatctat ctatcaaagc
+   210781 ggaaagtcca gagccggctg tacagcactc ttccgccgag taccggataa aggtaaaggg
+   210841 ttagctttct agcaagagag tcaaagccct atccacagcc aagaagcaag ggctgtctaa
+   210901 agaccggatt ccctctctga atcacagggt attcactgga ctttttatca aactcgtctg
+   210961 agtaataagc caaggcaatc gggaaaagaa ctcaatgaaa aagggcctat tctcttaagg
+   211021 ctaacggatt cttagtcgag tctcgtccct ctcctttaaa gtagagtcag cacacttccc
+   211081 cccccccttc aaagaagctt acctgagttt agtctcgggg gactgatgac cttctattga
+   211141 aagagtagaa ggtagcaaat ggagattgag attgaaggtg gcccgacgag gaagtcttgc
+   211201 ccattaaagt cgcttccggc aaccgcttat ccgtgggaaa gagaggatgg aacttgagaa
+   211261 ggcttcatgc cgattcggca actgattgac tattattcag gtacggatga ggctgggaag
+   211321 ggctgataag catcggctag tggaaccact cctgtccttc tttcgtatcc gaggtgggcg
+   211381 ctttacacaa tatggaaatc tagtacggtg aataaggtag gggctgatcg attaggcgaa
+   211441 ccagtccaac tagtcaaaaa aagttaaccc cttttgcttt agcaaagctt aggtccccgg
+   211501 gtccaatcgg cagaggctcg tcgagacctc gatttgctgc acagaggtga gagcccctct
+   211561 gatgcgagcc cttcccccct actatccacg taacaccaag ctcagaaagt gtacctgacg
+   211621 agggcttcct cccctccaag cctgtctact ctggaaaaaa aaataactcc catctctcag
+   211681 tgagtcccat caaatcaaca gccccgagtt gttggcctga cctgttgatg gaaggtccga
+   211741 gctgctttgt catccagagc ctgggggtcc cctcggttta ttaataggcc ccaatagtct
+   211801 tagaggaagt caaaggtcct cccaaaatac acagttgaat aaggtgagtg aagtgaaaga
+   211861 agagaaaaaa gcccgaggtt cgaaggctcg gactctcatc cggtcaccca agagggctct
+   211921 tctagaagca ctactatcta ctgttccttc tttagtgagt agacggtttt gaacggagaa
+   211981 tgcagaatag aatcatcaga ataatggata ttccatctag ctggggttgg ctgactctca
+   212041 ccaggtcagg gggagaccac agcagatgaa tgagggattc agtagcacgg acaccattcc
+   212101 caaggaatgg aaagaaggaa ccatcaacta gaagaacagg caaggaagag ccctctcagg
+   212161 tcgactcagg agaaggagct tacgatccac cccgtccacc ggaagaccga acccagagat
+   212221 ttattacctg tgctttgctc ggtacggaga taggagctcc gaacctgcag agacgagatc
+   212281 tctactccta gaagtttggg gcaccgtagc tcacctttca ctaaaaagaa gtgtagccag
+   212341 gttgctaaca tcggctaccc ctgaccttct ccttccgcat ctagatttat tcagagcgca
+   212401 caatcggacg ctaacctgga actgaccctt caactgacta ggcgcttggg aattctgaaa
+   212461 cccccccctg aaatgagact tcctccttgc ttgaaccttt gcgacgacta tggcaccttt
+   212521 tggtactcct ctgagacctt tggaaggggc acggactcca ctttagagtc tgattggaat
+   212581 tcgcattctc gataagagcc gctacgagat ctcaaaacag aaagtacctt atcgtagaag
+   212641 acgacttctt tctattctat tttgatcttg cagaatggaa ccctactctg agaggcaggg
+   212701 aagaccctct ccagagaatc cagagaggaa agaggaaggt tatttcaagc tgctttcagg
+   212761 cgtgcgccca cgtcgtcctg ccatcagaaa gagcctacca gtggtcccta ctggcacagt
+   212821 actggcccct cggtcaactt ttcatctgaa agaaggtagc ccttctcccc ttctgaacag
+   212881 aacatttcta agcttgaagc tctttcggat tagatctttt ttcactggat tcacctgagt
+   212941 agctcagtct gatttccaaa aacggatcga attcttggct cggcaaccct tattcattcg
+   213001 atttgtcccc tcagatattc taattgaatg aaattaggcc caacggcaaa gagtaggcga
+   213061 ggaaggaggg aggggttctt tttttctttt tcccctaaat agctttttag aagtgccagt
+   213121 ccgttcccgg gtaatagtca gaacgaagag ctagaatagg agttctagta gagtaagcgc
+   213181 ggacgtcttc ctactttcat ttgtcacaat aggttgaatc tctaaataaa aagcgagccg
+   213241 gcacatgggt tctttgccaa ccctagctct atccatagtg taggatgaga agagatcagc
+   213301 gacctgaatt caccatcctt ttctccttca ccacaaaata caaactgctt gtagaaggag
+   213361 gtgccttctc cctttctgag acttctttcc acttattagt gctggcagat cctctttatt
+   213421 gattgcccac cgaaagttcc ctctacctct aaatatcggg ttgtcatcgt ccttttccca
+   213481 gtttgcgcca ggagcctccg acagcctggc agactgcaaa atcattgcac ttccttctca
+   213541 cgatggatgg aaccaggaga gctaggcacg ggaagggaag ctaggctcac tcccgacaat
+   213601 tatatacctg cattccagac aatggtccaa ccctttgata ttcctaaggt aacagagtac
+   213661 ataggtgaga aaggagcgct tttctaagct cagcaagacc gaatggaatt caagtatatg
+   213721 gcactctaac tacgaaagga atgccaacaa gaaagctaat agggatagga ctactagctg
+   213781 ctagtataga aaccgaaaag aactcctaat agcattggtt gccgctctat ctattctgtt
+   213841 agttccatag ccgcgcttcc tactagtcgg ataggaacaa ctaactgcct agctacaaga
+   213901 agagagctac gaggaaagaa gagctagcag ctaggatagg aactctaaac tagctactag
+   213961 aggaagtcaa gctactaaca gagctcaaag cggataagaa atcggaagag acagacgcaa
+   214021 ggaagatcag aggcgttcct cggtgctatt gccctcttgt cctagcagcc gatcactcac
+   214081 taatagctac taccaggagg agaggaattt ttttcgaaca ggatcctttc cggtggattg
+   214141 gaatttgact cagatttgct taatccccaa gaagctcaat cctcggttaa tgacatagat
+   214201 gcgacccatt agtctttgct cagtcatgta taagatcata tcaaagatca tggtggccag
+   214261 actcaaaccg attctgcctt cccttgtgtc tcctacacag tctgcatttg tttcggaaag
+   214321 gcttatatct gacaatattt taatagccca tgagttggtt tacaatctac gtactcaccc
+   214381 ctcatttcat cagctactct cccctcattt cattggtcac tggcttagtt gcggaggaac
+   214441 ctagcttctt cagcgaagct ggaggaaccc ctatggctag cattgttctt gcttcactca
+   214501 cttacattcc cttcattcaa acttcttcta attcacattc acttacccaa cttctctggc
+   214561 ttagggtcag ggttactcac attcctagtt ataagattcc tagttctggg tctgggtaag
+   214621 ggaagggtct aggctcctaa cattccttcc caactcacca cttatctccc tagttcttga
+   214681 attcaaagtt ctgggattct ttgattctta gttctgagat tgttagttct gggactgaga
+   214741 gtgccaggct gcgacccatc tttgcaaagc aagagcgagg ccaagaagca gcagcactcg
+   214801 tacactagaa gtgagtatag ccttgccgga aagagattta gctttgactt tgcttagcag
+   214861 gctggctggt ttgtttggct tggctgtctt cgtagttgag cacactgatg taccagtttc
+   214921 aagggttaaa gttggaagct gcttcccttt tgtgcagcac acaggttcat ctcatatgta
+   214981 gctctttcta ccaatgtgct gcattaatcc ctagctatca agctgaaact tgaacagaaa
+   215041 ttgctctaaa cctgttcgct acaactatag gagtccaatc tactgaggtt cggcttctgt
+   215101 tgaatctcca gttcacttgc gatggaaagg cttcactcct agcgcagcaa atgaagcgca
+   215161 gcaattttaa gtagcttctc ctctagcaca gtgctgcatt tagtcgtcag cacacaccta
+   215221 gaacatctcc tttagacgca agcaaagacc tgtcttcgtc taaacgtcgt ccagcccttg
+   215281 aaaggatcct tgaagaaaac ccaattcagc acactcctcg tgaagcagcg ggtaatcatt
+   215341 caaatgctaa agataaaggg ccagctttag agcgacttga gctccctgtc gatgaggctg
+   215401 gtacctctaa atctaaagaa aagcggcctg ctttggaaag aatcgagacc caaccagtct
+   215461 tccagcacac tcctcctacg gtcttcgata ggcttggaga tcctgatctg ttaactttct
+   215521 ttcgtctatg agccgtctta tcttccatag cagtgtgctg gctttccact cctgcttcct
+   215581 tttagatcaa agttcgacaa gtgcacctct attgattaaa ggcgtgattc gtacatcttt
+   215641 gaatagtatg ccaccgatta tccaatcaat catttccatc ttttgctttt attgatcaca
+   215701 tatgtttatc tagtttaaat cagctggtaa tgcaggacaa taagcagaac tgtaaacatc
+   215761 cccttgtcat ctgagtaatc taagtcttcc tctttttaaa gcagcaattt aagttgagtt
+   215821 cctttggacc tcttctgaaa ccggttcttc ttcttttggt aatcagagga tttctctttt
+   215881 cccttttctt gatcttatgc ttttcctctc ttttcgcagg gatggacccc gggtaatatg
+   215941 agaaatcaga attctttttt tctgatgctt gcccttacgc atccctagcg catcaaagga
+   216001 aattcttgac tttatcttac acgtgcgtgc tgtcacgcat ctgccaatag taggagctag
+   216061 cacaaagctg ctaatcccgg gacctatccc ttgaaaggta cccttcgttt ttattggctc
+   216121 ttcagtttta ggggtatcag cacctgaaaa gttatgaaac gtgcgattgc cagtaagcaa
+   216181 cttgaaaggt ttttacaagg acctacccta aatctgatac tttattgggt cggatgccca
+   216241 gctttcttgg atgggataga tacaacaggt gattgatctc ccatttctgt catccttaat
+   216301 ttcccaattc ctagcgcagc aactttgact tttcttgatc ttttgctttt ccttttcttt
+   216361 tcgcagggat ttatactttg acttgataga ggtaagggct cctgcccaat atgtcaacat
+   216421 gccaacacat gccacaccaa tatgagttga gttggccctg ataagtggtg ttcaatcttc
+   216481 ctacccttat atttcagagg tcctatcacc ttcatacccc tcgaaacgtg ggttttcgac
+   216541 tatagacatt tcgatgagca acctaaaagt agaagtttat tgtgggagat agatgaatct
+   216601 ccaattgata tcatctcgaa ttacgaagat tggttctttc ttttcattcc gaattagcag
+   216661 ggatatctcc caccttctac caacaagtcg attatcaact gacgagaaat ttgattgaag
+   216721 ctttcctctt tcctttcctc tgctgcttct tgccagggat ctaccaccaa gtcgatatga
+   216781 aacatgaaaa gtgtgatgtt ggttttcgtc gatcgatagt aatgtatata aaagcgatgg
+   216841 atttgagttg acgatcgaag gtgtaaggta gataatagcg attgttgagt tgagtacggc
+   216901 ttggatcaat cctcccctcg acaaggatca accaagaaat aatcgtcgat gattacaatt
+   216961 tcaacatatc atgtattgat aggattggtt gatttgattg atttgaagtg tgattcccct
+   217021 cgtatgaagc ggagaatcaa tgagtcccaa acccgacaat tgatcaaccc ttctcgtctt
+   217081 ttttcattga ttgattaatc gttttgaatc gattgtgttt ctggttttaa aatatagtca
+   217141 agtgttacaa catttgaatg ttgatgttga ggagaaagca ggagtgtgct gaggagctca
+   217201 gggttgcttc gctcacttaa ctggctcata ctacttgttc acagacggac aaagagatag
+   217261 ttgtacgttt gctgctcaga cgattaacaa agtgtttagt gcaaggaaaa aaagcatagt
+   217321 ttgatgagtg ccaagatgat tcggagagtg ttcagtgcag cacaaaggaa gaagatctct
+   217381 cttggcaggg ccggaaacag gtgcgcgcag ttgggaggat gccggggtgt gctagtaagc
+   217441 aaatgggaag ttgatccgat cttaagtagc ccaggatcca tcccagggga agatctatcg
+   217501 agtaaccagg aaagagatgg gtaggtagaa cagccagaaa gatagaaaga gctgttgctg
+   217561 gaaatcagag aaatagtcga gttcttccct ctcgtttttc ggacctctaa ctatcactaa
+   217621 aaaagaatct cataagagaa gaaatcaagt tgatagatca gttagttgag gcggatcctt
+   217681 tctttacctt tacgagcacg gaatcgccta ttgtcggagt cttgggaacg ggtcgagggg
+   217741 acggaacggg ccctccagaa cgagggggat ccaggccggc gtcgggaatt aaccgcgcgc
+   217801 ctggatcagg aaatacgaac cctccaacga caaatccatc tcggtcgaag agctgattcc
+   217861 attcgggatc ggcaaatcgc cgagtggagg ggaaggttta atacggaatt agcagggata
+   217921 gaggaggaaa gcgcacgccg tgcgttcctt aattggtgtc tccgtgtgct catccatgca
+   217981 cacgaggacc aacctcccca gaactaaatc cgttcgttcg ttctcttctt tcttttcata
+   218041 gatttaagtc tgtttgtgca tctttctttc tcccatgctt tccgtcggtc aaaccaacga
+   218101 ttctcttctc aaagtaatag agagatcctt ttctagttag aacttctatc aatgcaatta
+   218161 aagaaccatc ccttcctatt agtttgtcct gtcagataga aagaaaatta ggccccaaag
+   218221 ataaagagcc cggtgggggt gaagggtggg gtttacatat aaccgagaca aagtggttta
+   218281 tgattgaatc tcagaggcat tcttatcatt tggtagatcc aagtccatgg cctatttcgg
+   218341 gttcactcgg agctttggca accaccgtag gaggtgtgat gtacatgcac ccatttcaag
+   218401 ggggtgcaag acttctaagt ttgggcctca tatttatcct atataccatg ttcgtatggt
+   218461 ggcgcgatgt tctacgtgaa tccacgttgg aaggacatca taccaaagtc gtacaattag
+   218521 gacctcgata tggttctatt ctgttcatcg tatcggaggt tatgttcttt tttgcttttt
+   218581 tttgggcttc ttctcattct tctttggcac ctgcggtaga gatcggaggt atttggcccc
+   218641 caaaagggat tgaggtttta gatccttggg aaatcccttt tcttaatacc cctattctcc
+   218701 cttcatccgg agctgccgta acttgggctc atcatgctat actcgcgggg aaagaaaaac
+   218761 gagcagttta tgctttagta gctaccgttt tattggctct agtatttact ggctttcaag
+   218821 gaatggaata ttatcaagca cccttcacta tttcggatag tatttatggt tctacctttt
+   218881 tcttagcaac aggctttcat ggttttcatg tgattatagg tactcttttc ttgattatat
+   218941 gtggtattcg gcaatatctt ggtcatctga cgaaggagca tcacgttggc tttgaagcag
+   219001 ctgcatggta ctggcatttt gtagacgtag tttggttatt cctatttgtc tctatctatt
+   219061 ggtggggagg tatatgaagg aacgaaagag tggattacaa aatgaaagct cgaagacaaa
+   219121 gagaaccggg cttttccaaa gaattactgc agctttccca ctccctttga ttatcatata
+   219181 caataaagtc tcttccactt tcctaccaaa tctatcttta ttctggcaca taaatgaagg
+   219241 aatcgaagag attatggcag atcatgttca ccaagaaatg acccgaaatt agatcttggt
+   219301 ctatttgaga ttgttccttt taatcgtaat caaagatgtt ttcttgtttc ttgtttcttt
+   219361 tctgaacaaa ttgaagaacc taatggatcg aactcatcct acgaaagata tcggaacacc
+   219421 aaaaccagaa agaaagaaag tagagctaat tgcgagagtt cctcgttaaa agtgcaagct
+   219481 gtaattatac gtttcagaat cccacttaat aatttcctca aaaaattaaa tctgcttttc
+   219541 gtaagacgtg tctttagaat actgttcaaa acaagtaaaa aattcgttga ccaagcgaag
+   219601 attgaggtga tgacaagttt gcaaccacca aacttggtaa agggtattcg aagcttctta
+   219661 ggacattagg gttttataga agatttctca aggatttatc gaagattgct aggccactca
+   219721 cacaactctt atgcaaagat gtggcttttg tgtttgatga gaaatgcctt gaggcattct
+   219781 tgatgcttaa agaggctcta gtgaccgcgc ccatagtcca actggaattt accatttgag
+   219841 gtcatgtgcg atgcaagtga ctacgcggtt ggagccgttt tggggcaaag gaaggacagg
+   219901 aagttgagcg ccatctactt agtgccatag ggcgaagcgg gacatgttcg acctcacgtc
+   219961 taaccagcgt caggtcgaac gagcccccta gctctttctt gtttgaagac cggaacatta
+   220021 gcaaagatgg attaggaaca ttagcaagat ggattaggat ggctctgggg gtaagccccg
+   220081 taggaatact agcaaatcca atgttacatg tattttaggg caaggaaggg ctatctttac
+   220141 tctttagttt gagagggaga aggcttcctt ttccgcgtag gcagattagc aatccatagt
+   220201 cttactaaaa gagtcttgga cttagatttc ctcctcctcg aggatgatgg atccggggag
+   220261 ggggaaaaga tgaggctagt agaaccttgg acgacgcgca agtcaactat gccaccaccg
+   220321 cgcttacgct cctcgccata gttttcgcct ttgagaagtt taggtcggtc ttacttggtg
+   220381 ggctccaaag taatagtgca cacggaccac gcggctttga gatacttatt ggcaaagaag
+   220441 gacgcgaaac cgaggctact tagatggatt ctacttctcc aagagtttga tttggttata
+   220501 aaggacaaga aaggaattga gaatggagtc gcgaatcact tgtctagact tcgggtcgag
+   220561 gaggacatac cgatagatga tagcctcccc gaagagaaag tctattatgt gctcgaatat
+   220621 ttgaaggaag agtatcccgc ggttatgatt ttggaaagca tggaagatga ccttccatgg
+   220681 tatgccgatt tttttaatta cttggcttgt gatcaagagc ctccaatgtt ccatggatat
+   220741 aggaagaaga aattcttgcg agatgtgcaa cattattttt tggatgagcc atttttgtat
+   220801 aagagatgct ccgatggtct ttttaggaga tgcatttcaa aagaggaagt gaagggcatt
+   220861 ctctaccatt gccatgcatg catcggagta tgcaggacat ttcgcaacat ttaaaacagt
+   220921 ggccaaggtt tgttttacaa gcaggattct attggccaac cacgttcaag gatgctcatg
+   220981 ggtttgtttc ttcttgcgac gcttgccaaa ggaagggcaa ctttacaaag aggaatgaga
+   221041 tgccacaaca ctttatcttg gaagtagaag tatttgatgt gtggggcatc tatttcatga
+   221101 aaaaaaccat cttctcatgg aaaccaatac atcctaatgg cggtagatta tgtttaaaaa
+   221161 tgggtggaag ctcttgcgag ccccactaat gacgccaagg ttgtgctaaa aatgtttaag
+   221221 aaagtgattt tcccaagatt cggtatacca cgagtggtta ttagtgatgg gggatctcac
+   221281 ttcatcaaca aagtttttga gaaccttttg aagaaaacat ggtgtgaagc ataaggttgc
+   221341 cactccttac catcctcaaa caagtggcca agtggaaatc tccaaccgat aaatcaaagc
+   221401 gagaagacgg ttgggaaaac aagaaaggat tggtccacca agcttgacga tgcgctatgg
+   221461 gcatatagga cggcatacaa gacacccata agaaccacac ctttcaatct tgtctatggg
+   221521 aagtcttgcc atttgccggt tgagattgag cacaaagctt tttgggctac aaaattgttg
+   221581 aactatgaca taaaaaccgc ttcggaaagg cggttggtcc aattgaacca cctcgacgaa
+   221641 ataaggcaag aggcctacga gaacacaaag atctacaagg agaggactaa ggcttggcat
+   221701 tataagaaga tcttaccaag ggagttcaag gtaaatgacc aagtgctgct cctcaactct
+   221761 agactcaagt tatttccagg aaagttgaga tctagatgtt ttttaccctt tcgcataaag
+   221821 gaagtgaaac cgtatggagc ggtagtgctt tgggatgtca atggtgaaca cttcacggtg
+   221881 aatggccagc gccttaagcc ataccttgct gatgagagca tgccaagcaa gggaaagttg
+   221941 acgatctgtt tagcctttct cacaacaact tgctcctctt ccttatcatc tctcacaact
+   222001 tcctcaagct cttttccact tctaagcatt atgtaaaatg ttcccttggg tttgcttccg
+   222061 gtttaccggg tagtgttccc atgggtctct ttgatgaaga ggcggtttaa gcaacttggt
+   222121 tgtcaagaga tttgacatga gaggccaaag tttaaaactt cccattaaga tcattgtaca
+   222181 tgttgtccat ctttgtgttc atctcaatgg tacttttctt ttgtccttca aggaattgtt
+   222241 gcaacattag cttcacctca ctatctggtg cagttggagc agaaaaagaa ctcccttgag
+   222301 cttggtaagg ggcttggtaa ggtgcttggt agggcgcttg gtaaggtggt ctttgttgga
+   222361 gcagtagccc ctctagcttg gaacccttgg ttgaaaggcc ttggttggaa tccttgacct
+   222421 ccaacatagt tcaactcttc ttgtggctcc tcataccctt caacaccaaa gtcttggtac
+   222481 ccttgatatc caccatactc ttcacaagag tttaccgtca tcttttcttt tggtcacgct
+   222541 tcaatatctt atccattttt gcattcagct cttggatggc atgatgagat tcatcattgt
+   222601 ttttccttgt agtccgatca taatccgagc catgactccc attgctttat gcgagattct
+   222661 ctaccaaagt ataagtatcg gcttcctcaa ttgtttgact catgaagctt ctcgtagaca
+   222721 ccagagctct ggagaggttt aaataactcg actgaaagga aaggaacgaa gtcacatctc
+   222781 tcgcccaagt tcagcagact caatcgcacc cttccttcgt cgtataaaag atagttccga
+   222841 tcgttgagtt taatggagac acaactagta atattgatat tggaaggaaa aggataagac
+   222901 ttaagagaac cctccatggc tcttgatacc atgtgagtgt tcatgggctt ccaactaaaa
+   222961 accaattggc aatgagtgga gaggcccatg tcctttatat actagtgtag gtcccttctc
+   223021 actttcaatt agggattcct aacaaaggga agtctggata cttgacacag gttgttcttt
+   223081 cggaagaaga gaatggtttg taagtttatc agatgctggt agtgaaacaa gtaaagatgg
+   223141 caaatgacac catgtcaaca gtaaagggaa taggaagtgt gagactgaga aatgaagatg
+   223201 gttctactgt tttgctgaca aaggtgagat atgtccccag ggtgagagca ttactaatcg
+   223261 gggaagtttt aactagctgg agagtaagga tctcttctcg aaagctatta tcaacgttca
+   223321 gcgattgaat taccttcagt aaatctaacc agctaagcta ccctttataa ttaagttaag
+   223381 aaagggcagt tctgtcgatt cccaatctcg aaactgagag tgtgcgaagt cacaagctga
+   223441 tgggctatat gtgaagttaa aaaacccagt taagaaagga aagatatagc tatcaagctg
+   223501 aaacttgaac agaaatttcc ctagctttta acctttctcg ttagtacact atctttcacc
+   223561 cttccacatc ttctctttat tattggatgg tttatggtca aaaagcttag gctgtgtatc
+   223621 ctcagctgtt gtctgttcag tattctcact ggcatctccg tctgtgataa catctactta
+   223681 tctcctttca ctctctcctt ctcctctcct tcttccaatc ccgtagagag gcctggaaaa
+   223741 atttattgaa aataacaaga cgattgacat cttaaataaa gacatcatgc cctagatgcg
+   223801 aaggttagta agagacccaa gtgaattctg cgaagataga agagcggact tctgaggaga
+   223861 ctggaagcct ataaataata ggaatggcga actggaacct catccttcaa agggttggtg
+   223921 tatatttgtc ctattcgtaa agaccccgac cttgcgtcag gaaaagtgga aaaaccccta
+   223981 agactctacg ggctagccgg gcctaaagaa gcttataaaa ttccacctat gtaatgactc
+   224041 gcattcaaca actaagagtc ttccttgctt tcgtcacaaa tcccatcgct cgaaagaaag
+   224101 agggtttgga gttagttaca agctggacta agactgaaat gaaggaacca gatggaggct
+   224161 gatgcaacgg ataggagtta tgctttttgg aaatgaggat tagcaatcat tcgacagttg
+   224221 ggatgccgct ttcactagta aaagatatcc acgcaaggaa agaaaatgcc ccgccggaaa
+   224281 gaaaagtaat taagttaaag agtgtcggtt tggggtgagg cttactcctc gaccaaaaga
+   224341 attgaaaaaa agcaagacta acaagtagag gtggtttttt tcctaagcca aagaagggtt
+   224401 tgattccacc tcaactgaaa gagagtcagt agcagaactt aactaagcag ctgcagagtt
+   224461 tcgctcagtc taatctaagt agctaggcta agagtcagac agtaggcttc cggtagggac
+   224521 agggatcggg tggcctgaat ggctcaacta attagtagag gaggaacttg agttactcga
+   224581 ccactcggaa gaggaacgaa ggtactcgac caaaagaaaa ggttagaacc tgctcggccg
+   224641 actaaagcta aagggatagg ggaaagaaaa ctgcttgaat aactagtggg ttactactcg
+   224701 atcgactaaa acagaagtca gggaaaaaaa ccaccatcat actgtctttt cccttaaaga
+   224761 aagtaaggca tcccaaggca gggaaggaat ctgaaccaag gaaggattct gctggagctg
+   224821 cagtctcgga gtttaaaact gacctttagg cactgacgtg actcttccac ttgttgatga
+   224881 ggagagggat tgattactcg actaaaagga gagggtagca gaataccatt aataaaaata
+   224941 tcccattagt gctgctcttt cctaagtaaa atacctaaca acgagaatca ggagggagct
+   225001 tctgggtagc atcaggagac ttacttacta gaaactaata aaatagtgaa ggttgtagag
+   225061 aagggttctt ataagataaa ggtacagtag ctcgtatcga agaatattcc cgagatgtga
+   225121 atctcactaa agcatttcaa ataaagagag gggccaagct taagaattcg ctttccagga
+   225181 cgtactcagg gcagggagac agggcgcaac aattgtgact ggccattttg atcgatgagg
+   225241 gaaggcaagg ccttaaccaa gagtcatcgc attcgtcgca ccaatcatag atagaacccg
+   225301 atccttggat agaatccata atcagaagtt atcggtaccc ttagcatctt ccctttgatt
+   225361 agaatgtgaa ggtccttggt tcgggcaagc agcgtattta gaagcctgct accgaaatgg
+   225421 aattcctcgg tcgataactt tcatgataaa gcagaattcc gttggaccaa ttaagtacct
+   225481 tctcgtttca gtcgagtcac tgagcgccca ccgggttgat tgaaagtgcc cctacggact
+   225541 gggacacctt cttccagagc attctcacca tctgtactag ccttgcatga aaaaaggatg
+   225601 aaccatgaag aattctccac tctatgaact catagcgctc ttgggatggg catgtgtctt
+   225661 gcagtaggta aaagttaaag aataaataag cacgagggag atgctaggct gatcaaaaga
+   225721 agaaggatta gcgagaagta tatctaactt tattctccaa ttgcaattga ctgattatag
+   225781 attttctata ttctttttct attttcctag cgaactagca gctcccccgc cctacacagg
+   225841 attggatact gaacctgatc agatagaccg atcggtgcgt aagatgatag tgacgcacta
+   225901 ggtttgattt gatctcgcta cctactatgt atactgttat agctcttctt ctacctctat
+   225961 ctactgagga tattggagca ctaacccgca actttcatct acgctaacag ctctcttctc
+   226021 ttaatccgcc ttcgattatt cattagcgct ccgcgggtcc tcctgctact caaacagctt
+   226081 tctttaggaa atgccgacat ctactactaa tggcttgaca ttccagaaca ggaagtcatt
+   226141 tgaatgcccg aagtctgcat cttctattac atctccgaaa gagtgaaaag tccccttctt
+   226201 gcctgatgag ttcttctttc ttttcttgct cctcacagtc aagatcccgc aaattctgac
+   226261 tgacttgaaa ttaatagtcg ttttatctga tgatatgaga tgacgagcgt cagacagaga
+   226321 aagatcaaaa agtctagtca agaagcttaa cggtgaagat ggagaatgaa ttgaatcttt
+   226381 cacccacctc agtccaacta gcactcttgt gcctcatcga tctgccttct cactccagga
+   226441 gagccatttc gccattcaaa aagaactgta cacgcgcgta tggatcatcc atcagaatca
+   226501 atcagtcttt cagagtagtc atcaagcaag ctatagaagt acgtgtgagg aaagagaaag
+   226561 aagtgagttg agtgacttgt gaccatcctt ataatttcga attgcataaa aggagtcagt
+   226621 cactgacgcc gctcccagaa tcttaggttg atttcttttc gttttaggta aaataaccaa
+   226681 ggttctaaaa agctatgggg ggccctcata aatccaccac ttatcagact gcgaatgcta
+   226741 acagtatcga agcgcaccaa gagattcccg gtcgaacaaa tttcgtatag aaaaaaaaga
+   226801 tgtgttttgt cgagtttgct tcgtcctctt tttttttctt attttccttc ctttaggcgc
+   226861 ttgctctcaa aaaaaaggga aaggggtcaa ataaaaagct accttttatc aaggctaatt
+   226921 ccaacggact tcgcccagat cttcattcaa tgggacgacg tcagtgcatc tttctggatg
+   226981 atgaacgcct gtctcgctca tagatagagg aagagtacgc gcgctagcgc gctacggctt
+   227041 acgaagttga tcgggtcaga tagccccttc gggcaaccaa ccgcacaaaa aattccgatc
+   227101 aacaacttgg agggatggct gagtggctta aggcattggt ttgctaaatc gacatacaag
+   227161 aagattgtat catgggttcg aatcccattt cctccggcgc ggaagtgaaa cgggcgggcg
+   227221 aaatgagaag agcactactt agtgactagg agcggggagc ccgttgcgcg tttttttgtt
+   227281 tgaccggcct atcttcataa gtaagctccc tatggccgtc cagtccctgg gcggctctcg
+   227341 gttcttgagc atgttgggag attagtcgtc aattgaaaga gctgctctaa agcttgacga
+   227401 agaagttttc cctattaatt agattagtaa agggcttttc ccttactagt caagtggtaa
+   227461 ggtagggcgc tcttcgatga agaaaagaag agacttttgg aaaagtggtt cagctcagct
+   227521 ggttagagca aaggactgta aatccttgtg tcagtggttc gaatccacaa ccacttctat
+   227581 tctcggagct gaggtatatg aagaatggcc ttttggtccc tttcgtccag tggttaggac
+   227641 atcgtctttt catgtcgaag acacgggttc gattcccgta agggataggt actcattctc
+   227701 ggccgctttc agttagtgtt cattgctgag ggatcccgag atctcgaagc agatgaccaa
+   227761 gtgagctccg cagccgttag tgcaagggct cgatattccg tttccgtaga agaacgagac
+   227821 actgtaggct ggcgttttgc ggaccaggag ataatattgc atccgagaaa cgtacagaat
+   227881 ccagttgtag acctcctcgt ggatgtacat cctgcccagt cactatcaca gaaggcctga
+   227941 acattgagct tgctgttttt gtgaatatac agcccatgaa agatggtgcc tttaacatag
+   228001 cgaaggacac gcttcaggag atcaaaatca gcgagtgtag gctcatgcat tcgctgacag
+   228061 acaatgttca cggcataact tatgtctggt cgagtgagag ttaagtattg gagagcacct
+   228121 acaatgctgc gaaagtctga aggatccgga tacttagcag tggaaacaga ggagttgagc
+   228181 ttaaggggca gaggagtaga cataggtttg caatctaaca tccctgcatt gttcagaatc
+   228241 tgttcagcat acttggtttg ggatagaaac agacccgaag gatgagtttt gatctgaatt
+   228301 cccagaaaat agtggactgg tccaaggtct ttcattgaga aggtggagct gagctgaaag
+   228361 atcaacatat tgagcagagt gttggaactg cccgtgagaa gaatatcatc cacgtagaga
+   228421 agcagataca tgacatcaga attgttgtga tacgtgaaca gagaaggatc agctgagctg
+   228481 cagacaaacc caaactccag aagaaaatcg aaaatcgcta aacttgtcga accaagcacg
+   228541 aggcgcctgt ttcaatccgt agagagcctt tttcaacaga cacacatggg atggatgaat
+   228601 aggattcgta aaaccaggag attgatgcat ataaactttt tcttgaagat acccatggag
+   228661 aaatgcattt tgaacatcca attgattgat tggccaacct ctaactgttg tgcaacattc
+   228721 aagatagtac gaatagtggc tgtcctcaca accggactgt aggtctcaac aaagtaaata
+   228781 ccttcttctt gatgaaaccc tttagcaacc aagcgagcct ttaggcgatc aagagtacca
+   228841 tcagaatgta atttggtttt gaagacccat ttacaaccaa gaatattctg attaactgga
+   228901 ggtggaacaa gtatccaagt cttgttccga gagagagcat caagttcttc ctgcattgct
+   228961 tgacaccatc ctggatcctt aagggcaaaa ataacagatt ttggttcttt tttaatagtg
+   229021 gtagtaatag tgagactata cttgggattt agtttgttta tgccagcctt tgatctggta
+   229081 agcatggggt gagaggagtg gttttgaggt agtggaagaa ctgtagatga agaaagcggt
+   229141 atttgagagg gaagtagaag aacatggaga ggaaggtggg tctggttctc tgtttagtaa
+   229201 agaagaaggt gggttagggg aaggtggatt aggtcagttg gttagggatg tgaggaaagg
+   229261 aagggaaggt tgggccaggc aagctaggga agttgagaag gttgggctag ggaagttggg
+   229321 agttggtgtt tcccttgttc ccagttgcta gtgcctgtgt agcttgttcc cttgttccct
+   229381 tgttccctgt agctagtccc ctgttgttag gaatgggact aaagagctct aacagaagag
+   229441 cggcaagagc ttctctgaaa agacctgttc tcttatcgct ttgaacattc tcctgggttc
+   229501 ctagcccaaa gaaaaagaca aggctgagtt gccccttcta ccatctgagg tggaatacgg
+   229561 atcaagattt gctgagtttg ttctatggaa taggccaggc cctctcactc tatctcatgt
+   229621 cggagaagat tatgtatgaa tttgaatacc cacgaacgtc taagaggtca gctacttagt
+   229681 tgattttatt taaggtatct cttgatcgaa gggttcaagc cgctaaagcg gaaagatata
+   229741 tctcttcgcc tgaagcgttg tggatttagg cgtgtcacgc caaccttctg taagttcctg
+   229801 gataatcgtt cccggataat gcctgccgca gggactaacc ctatcttcgc gcatacttcc
+   229861 ttgtttgtct gggagttgaa tcagaagcat cgaatgaaag cttctttaca ggacctcttc
+   229921 tatttccctc cattcttctt cctagtgacc taggagttgg gttcgtgaca ttttttcttt
+   229981 ctgcttattc aagtagcccg gtctggtaag taaggacttt gctctcagta gaagtcaaaa
+   230041 agccttcaac aagaccacat ccgctattga atcagctgct atagacttag ctaaagaatg
+   230101 tcttgcacga gattcaatgt tatgtgcttg attgcgatca catttttctc gtagttgatg
+   230161 agctgccaac ctccggaatt gagggttgta ggtagagttt cctgcgttca acttggagtt
+   230221 ggaagagttg gatttggagt ttataaaagc tgttattagt tccgactccc cggtctaggc
+   230281 ctggtacttg ttagttctaa ggtatcagtg tcgctacttt ctgtaaaaaa tttcctgcga
+   230341 agggaagcgg gaaagacaag gtttcgaatc ttggtctaag aacttacact acactacgga
+   230401 ggcacgggcc aagagatcat aacagtcaaa tcctcaacct cacaagcaga caagctggtg
+   230461 gatcatctga tcttgaaata ggtcatccca gctatcccga tcttttctct ctttatttcg
+   230521 tcaggtatct gaatcatacg attcctcgga tgtagctttt cacattggac caaaacaagg
+   230581 cccggaaacc ccgctagctt tgttgataga aagttagctt cccgtgcttg actagtaggg
+   230641 ctaatgaacg acccttaact atggaatgtt acgcctttcc tcggttactc acaacgtaat
+   230701 caactccatt ggttattttc cccgggggaa cacctatagc cccaatagac tgatttagta
+   230761 ggtatgatgt attactagga gtctctaagt tcctgctttg gttgtccctg tccgggctaa
+   230821 gcggagttta gctttggccg ttaataagag cttccctcca ttccttcagt ccagtcacgc
+   230881 ttttcttttc ggcttttgct tcagtctctt cttttcgtcc tttcaggtgg catttcgctt
+   230941 agtcttgtca ttccttccgt catgctactg tcatgtgcta taaattgatc ctcccgtcta
+   231001 gtgtttcctc tctcatgtta tacagaccgc tcctttaatc caatgaatag gcggcgaggg
+   231061 tgacaaaggc ttagaagcag gggcgggcag aagcagcatt aattctttcg cccgcgcgta
+   231121 catattctat tgagcgtata cgatcattga gctgatcacc ggtggcttcc tcacttgtga
+   231181 tgtgaagctg ctttccattt ctcatttttg aggatatcga tattctctta agagaaaggg
+   231241 tagccttttg tcacgagctg gactcgaacc agcgcgtctg gcaaaaagac catccagatt
+   231301 tcaaccttag actgatcgtg actttggtta ctcggttccc cacgcgaacc ccacacgcca
+   231361 tgggtacgtc cgaggtagat taactacgaa cttttttcaa aaaaagtgtg atatattttc
+   231421 ctcacttact agcaaagaac tggaccagga ttcgtccttg gctgcctacg tacccctttg
+   231481 ggaatcaagt aaatcgtagt tctcttaact tattgtcttc tttctaagtc ttgctttctt
+   231541 ctctctttac tattgcttct ttttcttttc tttctataaa atgttcctca atagcactga
+   231601 ttcttcatat atttttgtca aagcaattgg gttcacaaag cattattttc tcggtccatt
+   231661 ttccacctct ttcttttgtc gaaccatgct aagcaaagat gcacggttcc agtcttcttt
+   231721 ccctccccgt gtccactagc gctcctatcc tctccttttc agtcgagtca ctagggttcc
+   231781 tctcctttta gtcgagtcat tagggttcct ctccttttag tcgagtcatt agggttcctc
+   231841 tccttttagt cgagtcatta gggttcctct ccttttttta gttgagagtt acgtcagtac
+   231901 cttttagtcg agtggcttct cgctcctgcc cagaacgaag agaaggcgaa aaagcgccgc
+   231961 cgaagcagca tgagcgggct tctattgcta cgtaacaata gagcaggata gcattttgcg
+   232021 cccacatgtt tgaattttag ggtaaaaagc tcgcttgtta tacgggatcc gacgcatcca
+   232081 gcagagcgaa gcagcgttcc attcttttcg gcggcatcct tccgcattgg cggcgagtgg
+   232141 agtgccacaa tcccattcat catttttgat ctacataagc caaagccctc cgcactggcg
+   232201 acgtccccgg cataaatgca aggaggatgt atagctgata taggatcttg tggaacagga
+   232261 tttgattctg caagcggttc ggtacaaaca aagaaatttc gaacaaaagg atcggaactc
+   232321 gctgatagga aaggagagaa aaacaaagca atgccaagag ctccgtcaat tcgctgttca
+   232381 tcgatagacg aagctctctc tttatcatct cgtgccagat gcaacaaagg atgagtcctt
+   232441 tttccttctc gcgaaccacg ggagcgccaa gcgtccagag gatttttaat tttcctttca
+   232501 ggataaagcg gcgcataaaa aagggctggt ccgtcaaaag tccggttcct tcgcgaacga
+   232561 agttcagaat caacaagggt tcgtagaacg aagggagtgt acaactggtt tttcaaacca
+   232621 ctttcttgtt cgtaacgagg gagagataaa attgagttct tcacgaagtt cgagacaaaa
+   232681 aaaaaaaaaa gactttccct atggcctcct tgttttaaga cattatggct ctggggtcga
+   232741 ccccggtaac aaagaaaaaa tccataaaaa tttgggatcc gacaccataa taaaatacta
+   232801 ccctcatgat tagaccatgt tcctgagatt tgataaaaga aaggtgcatt agcggttaat
+   232861 acgttgtaat tggataagtt attaggaata tgacagaacg aaagaccaag gaaagaaaga
+   232921 agaatgcacc aaaatgcagg tgctgcacca aacgctggtg gttgtttctt gttgtaagtg
+   232981 aatgcaacga aaagacccgg aaataacgaa taatgaaaga attcatatat tgatattgac
+   233041 atttcgtgct catttcaaga tttctgcttc gttattccca tcatccggta accacaggat
+   233101 gatccacaag aaaggtggca ggattcgaac ctatggccgg cccaaccctg acctgttggg
+   233161 ttgggtggcc tgcttcgccc tcgtcgcctc tgtacccgaa acagatgcgc tgcgctaccc
+   233221 agcgcgtaac cttgtccccc tatccctctt ctgcttatgc cattaccaat cgcgggtaac
+   233281 ccccggaccg gccgccccta acctaataag aacgattatc cttatgacca aacaaggacc
+   233341 agcttacttt tcgagcgaga gtttcacgat cccgaccagc aacttgttgt gggagtaagg
+   233401 gcatccaagc ttgcccaacc tagtaaaggg gcttggagat agagggtttt ctggggggat
+   233461 ttggcttctt tggaaagctg agttggaggt gaagattgtt catcgccagt atgttcattt
+   233521 tagtgtttat cggaataatg cttttctgtc ctgggtaacg gcagtctatg ctagtcctaa
+   233581 tgctactttg cggcgtgctc tctgggagca gttgagtagc ttggcatcta tgatatctga
+   233641 tccttggcta ataggtgggg ggactttcat tccattctgt gtagttcaga tagccggggt
+   233701 ggctcagcta gtggtaatcg gccgtgtcgt ttgagttggt tccattcctc tagtctctat
+   233761 gacatgaagt tcgttggtcc gaagttgact tgggctcgca agggtttgct gaagcgttta
+   233821 gatagagcta cacttgagag tctttctatt acttccttgc ttacctgggc tcacttcgca
+   233881 acaaagagaa gttgtttatt tgcttgctta gctagaagat gccaacttcg ttcacttacg
+   233941 tctctcgttt acttgttagc tagccctatt cctcaccttt gctttgaggg tatctaaata
+   234001 aaaaaaatat acagtggtgt gctcgttaga ggatgctttt ttagttagct aagcgactac
+   234061 cttagttgaa ggtgggagga gagcagcaaa gtctcttatc tttccaaaga aggcggaaat
+   234121 cgagatgtga gaaaaagaat agctattcac tctccaaaga agccctcaaa attctttcta
+   234181 gcaaatgtga agagaggatt gttgacaatt caataaccac tgcaataccc tcttacggaa
+   234241 tgatctcggt gttctcgcta tgtcgtcaat ctcagtatac ggaaatggtc tgaccaagag
+   234301 gcatcgatct ggagtgggtc ttgcattgcc ggatgacagg agccctttac ctccactctt
+   234361 caattatatc cataccaggt gaacacagat gatcgacctt cagtgcctgg tcaaccatta
+   234421 aagaagagaa gaggagcgga acaatgagaa ggagttcgta gccctccaac cagaactaat
+   234481 caacccgcct cttcaacatc tccatttccc attccatctc tgggatccaa tgcttcttca
+   234541 ccgggcccgc ccgatcccca gaacgtaaaa tggaaccata ccggaaaccg tttgatcagg
+   234601 atagaaagag caaggggctt tgttttcgtt gtggtgacaa gtggaatcct ggtcacagat
+   234661 gtaagggcag agaattaaag tatatggctg tggatgaagg agatgagttg gatgaggctg
+   234721 atgaagctga aacagggaaa aagatagaag aatcagatga ggatgaagta aaggaattac
+   234781 tctggttgtc tttgcggtcg atggcaggtt taacaactga gagagatcca tgaggatgag
+   234841 aggacaaatt gcaggacaag aagtcattgt tttaattgat tcaggggcta caagcaattt
+   234901 catagcagag agtgtagcac atcggtgtgg tctgcaaatc acagaaactc gtggatttgg
+   234961 ggtttctatt ggaaatggtc aagtcgttcc aagtgccggg aaatgcagcg gagtagagct
+   235021 aacggtccaa gatgtgcaga tacgtgcgga cttcttcctg taaccatgga tatagtgctg
+   235081 ggttatgcgt ggttggctac tctgggtgac accagaatga attggggaag acacaccttg
+   235141 agtttccgta gggaagatca gtgggtcacc ttagcaggcg atccgtcact ggtgcgagct
+   235201 caaatctctc tcaattcggt agtcaaagag ggaaggaagg cttattatct cctagagctc
+   235261 accgcactgt tttgcaggag gaattaagac tcaaaatttt cctgaggatg ctcaagtgaa
+   235321 acagttgttg gaatggcatg aaaacgtgtt tgagatgccg caaggattac ccccaaaatg
+   235381 tggcagggaa catgctatca accttaagga aggagcagaa ccagttaaca tccgtcctta
+   235441 ccggtataca tatactcaga aggacgagat tgaaaaattg gttaggggag atgttagaag
+   235501 cacggatcat ccagccgagc atcagtccat attcaagtcc agttttgctc gtccaaaaga
+   235561 aggacggagg atggcctacg gcgcgtggat taccgagcct tttacaagct cacggtacca
+   235621 gacaggtacc caattcccgt gatcgaggaa ctgttggatg agttacaagg agcttctgta
+   235681 ttctccaaac tcgatctcaa atccgggtac caccaaatac gtgtacgagc cacagacgtg
+   235741 aagaaaactg cctttcgaac tcacgaaggg cactatgagt tcctggtcat gccatttggg
+   235801 cttaccaacg caccagcaac attccaatcc gtgatgaacg atattttccg cagggaagtt
+   235861 tgtgttggta ttttttgatg acatactagt ctatagcaag ggcacggagg aacacatgaa
+   235921 tcacttggga atggtgctcc agatttggga acaacaccag ttttatgcaa acagaaagaa
+   235981 gtgtgctttt ggacaaccac agattgcata tttgggacat agacatatta tatcgggcga
+   236041 aggggtgtca gctgatccag ctaagttaga agcaatggtg ggttggccag aaccaaagaa
+   236101 cacaacagaa ttgcgaggct tcttgggctt gacggggtac tacaggagat tcgttaagaa
+   236161 ttatgggaaa attgtgagac cgttgaccga gttactcaaa aagaacagtt taaaatggac
+   236221 agaaatggca gcactagctt tcaaggctct caaaggagca gtgacgacgc ttcctgttct
+   236281 agccttgcca gatttgaagc taccttttgt gacccgcgtc gggaaatgga attggagctg
+   236341 ttttattaca agggaacagg cctgttgcgt atctcagcca agggttttct gaaaaagggg
+   236401 agcttgaaat cagtgtacga acgagagctc ttagcaatag tcttggcagt ccaacgttgg
+   236461 agacatcagg gcaacgtttc acgatacgca cagaccagaa gagtctaaag cacttgctgg
+   236521 aacaaaggtt tgtcaccaag gaacaacaga ggatgggcta ccaaactgtt aggcttggat
+   236581 tttctcattg agtataagcc gggaactgaa aataaggctg ctgatgcgtt gtcaagacga
+   236641 gtgcaacctg agcgtctgtt ggaacttgta ttcgcacctc ctccctcttt cgatgcagca
+   236701 gaattactat ctcaagttga agctgatccg gagttaagaa ctgtgttaca acaagcccga
+   236761 tagggaaaga acctggaatc tggttatacg gaaaaaagag gggctgctat gtaaagatgg
+   236821 aagagtggcg tctcgaatca gccaagcttt ctcccaattt cttctatatt ttcatctctt
+   236881 tctctattct accgagaaga tccatctcgg gtcttatcag atggtatcag agccttcggt
+   236941 cttctgactg tgggtatcca acgacgattt cgacaatggg tttggatagt aacgatatag
+   237001 cgctgatcag gcggattctc gctcgggtgg agcagatgtt tttttccatc gcatgacgac
+   237061 gatgaagaga tccgccgatc ctgaagctga ggaagaccca gatccgtcgc ttgggaaatt
+   237121 catcgagcag gtggtgagcg gtccaactca gaagggattt cctgcatctg gagcttttgc
+   237181 gactcctcgg gcaagacgat ctcgcggtcc gaaaaggttt cttggcaagc gcaactaccg
+   237241 cagggctcgg gcaagaaaac cagggaagcg tgacagagct cactcgtcca aggttagatc
+   237301 tagcgatggc tctacacgac cttctgcgag atatggtatg cgtttccgat ctcctacttt
+   237361 gcccttgtgt tctagacctc ggacatggga atttctgctt gatcttggtt acgatagtat
+   237421 gtcgctgaag agcgctcgtg tagcgagtga acgaccttgc gatcctggac gagctaacca
+   237481 tgtgtgggag ccgggtggat tatcgcctga acaagctcag tgggactggg tagggaaggt
+   237541 ttacagtcgt gatagagttc tgaagctaat gggagagctt catgaattgg tttacaggga
+   237601 atcaggaaga cggtaaggat cttagattgt caaggatctt cctcataaac acgtttccgc
+   237661 ttgcagctga aacaagcgga gaagatggga ccaacgagat cagccaccag gaagttaata
+   237721 agatctgatt tttggcatcc tttgatcctg gaagaacttg tgctgaaaag tgagttcgtc
+   237781 aaagagctct ggcgaggcat tagagctgtg aagggaaaga aggttaagaa gaggaagaaa
+   237841 cgggaaccta gcctgccagc tttggaaccc cagaattttc cctgtgttgg ggggttggag
+   237901 gtatgattag taggcttaat tataaagttc tcgttcaacc agaactagat aaagctacaa
+   237961 gggcagtctc tagaattact tacggattgg ttgctgttgg taaagctgat gggtatagtt
+   238021 ttgaggttgt ggattgatca agttaactcc actgacgaag ataagcttgt gagtttcatt
+   238081 ggatctggtg ggtgtcaaac acactttaag aagcagctag tagcaaggaa gttagcattg
+   238141 tttaatcccg aaaaattcat gaacctaaga ctacctcatg acaagggaat tctagtcttt
+   238201 gctggtcata gaattgctca aaccacattg tgagattgat gccattttga tagaaatcgt
+   238261 cggtatataa agagaaagtt tgcgatgtaa gatgtgctct tacgaaggaa aataggattg
+   238321 ctaacttttc catctatata aaatgaaaag caagtttttg tatcaggaaa gaggcaagga
+   238381 cagatccgcc gaagaaggat cggggttaga actagtctga agcttaaggc ttttgctatt
+   238441 caagaaggct ctccctgctg gctccatgta tgctatagct cctgaaagtg tgagtttggc
+   238501 ccctggatag agagaaaggt aaaataaaga ggagagtgat gtttcgtttc aagttcgaga
+   238561 tctcgtgtag gtatacatga gaacagagaa gtgtcctacg gattcagaac cggaaggtca
+   238621 gaaaaaagtg ggaatgacag tccggctacc ggaggagcta ttcaactatt atcaactatt
+   238681 gagtctctga gtggattttc caatagatgg gagcaaggat attctgtctg aagaaggacg
+   238741 cagaagttag tgcgtacgct tgatacatca aagtatgcct cccgcttaca catcagcagt
+   238801 cgttctaatt tttcaatcaa agtagtagct gtcgtattcc agaaggaagg gggattgata
+   238861 taagaagagt cagacttctt tttaagcagc aatcttttac agaaaagaga gtagttgtcg
+   238921 tcgatgtggg atagtccctg aagaatagta cccatgtcaa cggattgcag gcatagttca
+   238981 actcatagcc ccaatccacg ggtgacgata ttgtcattgg ggatctgtat gctttcaaaa
+   239041 gagtcgttag ctgatctagg agtcagtggt caataagtag gcaaaagact ctggggaagt
+   239101 aagcgaagct attgattggt tgagcccctc aacaaagcat gtccagcact tatcttaagt
+   239161 tcctttaagc agatctgtga gcaatcctag gcctctcttt cagtaatttc caaagagtat
+   239221 aggctcaatt aaaattattt atggtaggca gagcaatcgt agacccttac ctagctagtg
+   239281 acctggctct cttcttcata cttactgcac gcaggataag atctacttgt cctcctccct
+   239341 tccctttacc tttaaatgaa agctttcacc ctctccctca tctgcatcca aaccggagcg
+   239401 actttgttca atcgcttttg cacttttttc tctgcctggc aggcttttca tagttcacgg
+   239461 cccctgttga gtagggtcgg tataggtaag gcaatcgatt caaaagtaga ctgagagtgg
+   239521 aggtttagaa taacctaagg cgtactgtag aaggaaagaa aggtaagcaa tcccagaccc
+   239581 aatagggcta cctttcttcg actgagacct atctgcttca ttcccgagtt tcacttcact
+   239641 tggacacagc caaagaaaaa ggggggcttg atccaccgct ttcaagtcca gttaaattag
+   239701 agaactcctt tttagaagaa taggatagaa tcttccgaat cagtcctact atggttggac
+   239761 caaggtcatt tcgactctct agactttctt gcttctttcc atcctgagaa gtagatcctt
+   239821 tggacctttt ctttcctgtt gaacgagtgg atgttttgaa cgagtgttga gcgagtgaaa
+   239881 tgccccataa gctataaggg cgagctatct ctttagattc tgtgagctgc gattgaactg
+   239941 aacgttccaa gtgcatccag caggaatcga acctacttgc ctctttatta ggttgggcgc
+   240001 tttaaccatt cagccatgga tgcaaagaga gcgagtgaac taggttttgg tattccttct
+   240061 cgagcttcaa tttcttccgt tataggagat tcgagaaaag atggaatagg aagacgtgtt
+   240121 tccagaacaa acaagatacg ggttgagaag gggaagttag caaagttaga caagattgga
+   240181 atgggcatag gaacatgtat tgatgtacca gatcggctaa tgctcccagg ttgatgcgat
+   240241 gtattccacc agttgactga agactttatt attggtatat cgatcggtcc agcacgaatt
+   240301 gaaatagaag ccggttcgac aggaagcttt tgaaaacgca gtgcacccag gtaaataaga
+   240361 aacgagatga atacagaggt caaacgagca tcccacaccc aaaaggtccc ccacattggt
+   240421 cttccccgaa accccccagt aactaaggta aacaacgtaa aaaaagcacc catttctata
+   240481 ccggttccgg aagagcgaag ataaagggga tgttttgtta ataggaacaa gaaagtgttt
+   240541 atagccgtgg cgatataaac aataatactc atccgagccg caggaacatg tacatagaga
+   240601 atacgagaat ttccaccttg ttgaagatct agtggtgcta ccccaagact taaatgaata
+   240661 gccatcgctg ttaagaacaa ccaagaccca atgagaattt gcgcgtagct tctggtcttt
+   240721 gacatcaaaa aagaaggttg taataacgaa acggacatgt gcaaattttg tcctagctag
+   240781 tggaacaaga aagacatgga tctataatac gcaatcaaag gatttccccc aacgaataat
+   240841 tccccctccc ctgctttgtt tgttttcgct ctgcttgtgc gtggcactcc ttgctggcgg
+   240901 agcggcgcat agcgaaaaaa agaaaaaagg aaactcacca ccaaaagatc agtccaataa
+   240961 tcccactcac tggtaaatag cgcaatactt cttcgtgaat ctccgctatt tgaatatgga
+   241021 acatcataac aacgaaaagg aatgaaacgg ctatagctcc tatatgaact accgggaaga
+   241081 tcatagcgaa gaagtcgaga cctaacaaaa gacgtaaacc tgaaatggcg gtttagtcgt
+   241141 cggaatcgct tttatttatg tcgtagtcag acagcaattc cagtagctcc atccaaaact
+   241201 tgtctccgtt ttttcctaac cgggaataaa gatagtagaa tcgtttctta cgccgatcga
+   241261 tatcgatcaa ctcatcgtca aacatcgcta catcgataat tctttttata tcgtttttat
+   241321 ctggaatttt gtatcttcgg atactaagag ttcatgtgtt tttgctatta tttttgcctt
+   241381 gattttttcg catttaccac tctctgcttt tacttgctcc ccgatggtat ctgtgttttg
+   241441 atcctctacc tcgaggttca agaaaaggcg ttttgagcca ctttccttgt actctttcga
+   241501 ccagaaaggt gaacccctct gctgatcgtg agagtgctca ttctcatgag cataatatcc
+   241561 ttctaaaaga ccaccctcag atggatttgt cgggggcatt ggcgtatttc ctcctcctag
+   241621 tcctccggat accaccgggt agaactgaga aatggcacga cttaagtcgt ccatccacaa
+   241681 gtatcctaca gcaaataaag atttacagag aataactgaa agaacataaa accaaatgag
+   241741 ttgccatttt ttggaaagtt tgtctataat aggaagcgaa caaaagtaat ctaaaaggac
+   241801 ttttatgctg caaaagactg ggatgggaaa caaaacagaa tgtacaaaac ggataatacg
+   241861 tgtaaccagc aaaccaaaga ccaaaacttc agtacggtcc gtgcgccgtt tttcttctct
+   241921 accatgacga atcaattgat tagctgtagg cattattctc tttcctttct tttccccccc
+   241981 attttgtgcc ctatcacttt actcccgatc cgaagcaccc cttttccatt catatagaaa
+   242041 tccaatcgtc aaaataaata aaaagagtcc atcatggacc aaaatctgcc cgagacaaag
+   242101 ctgagctgtc aaataagacc aacaagggat aagataggga ggctcctcta aagctggaac
+   242161 ctcttccaac cccagcacgg ctcttccccg tcgactcctc gtacgcgtat cacaggggca
+   242221 aacacgcctt caaaacatct ggatcataca aacttctcat tcagtggaaa aaaccttacc
+   242281 aagactagtt gcctcaacga attgatatgc ttttgctttt tgtgacaccg gcgcgaatgc
+   242341 tgaatacgga ggaatacaat gaaatcagaa caaagcaggt agaccaagct agcttacaga
+   242401 attcgaggaa ttaaaactca gaataagctc ttttattagc acgtttggta gccagaaagg
+   242461 aaagaggttc tcattcacag tgccttctcc ttctgtgatc actgatcaaa aggacattca
+   242521 ttcagtaagg gattgatcct caaagtggtt cttttagcac aatgaattac ttatcttata
+   242581 atgaattact tatcttatat agtagtgtaa ccttctcttc gtacaaaaaa gcctattctg
+   242641 aaagccgtct ttcaatgcct gtaccctagg gcgggctagg ttcaagggta cgcacctagt
+   242701 agctgcgagc ctaagattaa aggcaagttc ccttggttcg gcacgctttc actcagtatg
+   242761 ataggctacg gtcagattcg ctcggtctag gtatcacctt gatgcgcgta gttattgcct
+   242821 tgatcccttt cctttgctcg ggtcattcca ttgcctgaaa aagtgagcct ggatcttcgg
+   242881 agaaaggatt ttagagctct gtggagaaag tcttgaaata ttggatgctt tctttcgtgg
+   242941 ttaggcgggc atggcatcaa ctctgtatag gtgggttggc ccggccggcc aaagctagat
+   243001 atagatcggt tggcactgga tctgctggat agaggtagcg gcacagacag ctacggatgg
+   243061 atagaattac gtcagcttgt cgggtcagta gatatgccca gcatccattt tgactagaaa
+   243121 tgaaatggta cagagaagcg cagtgagcgg tagcaaccat tgactactcg ggccgggcct
+   243181 tgcagccaga aaatgggact cgatcagctc agcccattga ctcttttatg gggaactatt
+   243241 agagacctag gaaagtcaag atgccaccgg ggcgtagaaa gaaaaaggct tcttcggcat
+   243301 gatagctctg ctctgtcgga taatatgagc cccttatgga acagctcact ccggggtgat
+   243361 tctggtttag tcatagaata aagtaaatcc atgcgtttgt cttcgccttt tcggaagacc
+   243421 aagacagcta gacgatagat taaccaaagc cataaagaga agatcgatat cgcacagaaa
+   243481 tcctggatct atcaaaaaag cccgccaaag gtgtcctcac atgaaatcat gaaaaggatc
+   243541 cgagcccatc aaagactcta ctacatgaaa ggatctgaat ctatccaaat agctgtggcc
+   243601 catgatccaa gggacccgca accagtcaat catgctatcc ttaccttcca accaatcgga
+   243661 aaaaattacg ttcaattcag gatcgaatac atctttggct tatgagttat gcgttttctc
+   243721 ttcttggcac tttatgtctg ctagtgcata atcatgctta tggaaatagt ggaggttgag
+   243781 cagtttccct gatgaagaga actctccttc tctaaacaag cttgcctgtg ggcgtcgagt
+   243841 aagagcatct gggaattgat cttttacgag acagcccgga gttggtctcg tggctaggtc
+   243901 tttaaccgaa ccggttcgga atagaaagtc tcggtcaatg aagtggagtg ggctgaaaca
+   243961 gaaagttggg tattgtattt ttatgctcgc ttggccggaa tcccctatag tgcttttcgg
+   244021 gcgaagtgac ggattacttt ttgagtgact accctcagtc tttcagagcc tcgggataag
+   244081 ctgttcttgt ttcataagca aatgtggcaa cctggtggta tagtgagctc tctctctgga
+   244141 ttaagcttcg gtcattgata gagaggaatc acctagccta gcattagact cgactagcta
+   244201 gccttccttg ggcggattgc tttgaatagc gtagttttct tagctccggg gactgcctta
+   244261 tctacactat gagaaatgtt agatgcaacc attatagtat agatatattc ttagatgatt
+   244321 actatatgtg atttctttca tacagtctta acaaatatag agatcttcac cataggagac
+   244381 tagcttgaac taaatcctcc aaccttaaag cagtatgaga gaggacggat accagaaaga
+   244441 atcacctcgg agccgctatg ccactatgag aaacgttata ctatagatag acatgagatg
+   244501 atattctata aggaatacaa tgtttgaaaa gcaagaatag atagagttga aagaaaggcg
+   244561 ctcgtataga gccctcgact atgtatcgga catatggcag gttgggaaag cgcgcttgga
+   244621 aaagaaagac tatcctcgag gtaaatgtaa atcaatctat ctcgcattgc ccaatctgaa
+   244681 tgctaaaggc tcgtaacccc ctaaaaggaa actcaccacc aaaagaaaaa ggtaggttaa
+   244741 ctaacgaaac tcaaaactac attttaattc ttgctcaccg ataatccttt agaaaaaata
+   244801 ttatacagct gtatacagga ttattggtat agctgtccac tagccgaaaa tcggacacca
+   244861 aattattgta gactgaatta agtaattgca tagctgccgg ggatgccgga ttgcttgagt
+   244921 tccttcctat agaaaaagaa ttaattcagg agttccaggc aggaaaagta tacaaccata
+   244981 cttaaaaata ctgggcgcca ttcggaatag aaagaataga gatgttgggg gtggaaagat
+   245041 tccatggatg tcttatagag ccatatggga aagataagtc tgtgttcggt ctctgggcat
+   245101 cggtatcgtc ttatgggctt gtcgactcta gaacccgaaa agggaatacc aaaaatgaag
+   245161 ttaggggatt ccggcgcgga tgatctgtct tagagtctct tgctaagcca ctcggattcg
+   245221 ggtgttctca aacggtgaag actttaaggt accaggtctg cgagcctatt cagttcttgt
+   245281 tcttccgaca tcggatattg cccgactaaa acgacgtgtc ttgcaatcct aaccaacatc
+   245341 gcttcggtag ctcagtcagc atcaaaggac acatgggggt ctcgaaacga acgttttcga
+   245401 ttagtaacat ggggtcggtc gccccattcc ttcttaggta ccagcataat aggcagagac
+   245461 agagctacga ttccccaggg cgagggaaaa agactgacta gacccggctc gaggagattc
+   245521 aaagggcaga ccctcttcac aatcagcttt ttaagaagca agaaatgcca cttttggccc
+   245581 tcctgggtca tacctatggg gttggctctc acgtttattc gtattcaagg aaaggtgtca
+   245641 atctagcgta agccgcaata ctagatcgat acgagatagc agtcaaaagc agcacccatt
+   245701 gaggaatgaa aaaaatccac cccctaacta gactatggaa gatcttctcg ttttggctaa
+   245761 acggatcttc cctgtactgt aacagggtca acaccgcctt ttgaggttgg tgtccgagct
+   245821 ttccgcccag ctttcttcga tcgtatgcca cgaactatgt cttagatacg taagggcccc
+   245881 tgtccaagtt tggctttctt agtacattgc ctttgtactg gaagctggat catacgaacc
+   245941 agcaatgacc tgaagctttc ttgcagcaag atgatcagtc cgagagtgct gggcattcat
+   246001 ctctagcttt tacaatggtt atttcattcg ccggatattc accaaaagaa agtgctcgct
+   246061 agccgtaagc ggaatgactt tcttgatatg gaccgatcga agcccttaag gatagcttac
+   246121 tatggtaaat agctagaccc cgaacctatt cccagtttcc gagtaactat cattggtcaa
+   246181 tttcctgttt cccatggtct atcattggtc cggtccttca gaggcaggag aagtacaaga
+   246241 tgcaagctca taaacgagac ggtaggatgg cgctaatgcc aggcgatcat tagacttcgg
+   246301 ttaatggaaa gtggaactta gaaatagctt cttaagaaag agatatatag tagtgcttcc
+   246361 ccctccttag aatggtaatg gttcgttctt cgccaggatc atctccaggt agtgctgctc
+   246421 cgtcttcgcc ttgaccttgt gctggccgct tttgtacagc atgaaggggg gaaggttccg
+   246481 tctctgtttt atcttttgct gcgaactttt ttttcctctg ttctctgtac agcacactgc
+   246541 tgctttttct cggtactcct ctgctttctc tagcacagtg ctgggttact ccttcctaac
+   246601 tctcctaact gttatcctcc tctgtgctct acctgaagca cactcctctc tctgtgctgg
+   246661 tatactccta actcttaata gtacgcacac acgtaactct taatagtacg cacacacgaa
+   246721 agtgaaagta aagggattgt ctggccctgc tacctagcca acatgagcac acccagctgt
+   246781 atttcctaga tttaggtttg attacttttc tcggtatagc agcgctacta cgcttcgctc
+   246841 acactctctc gaccaatgct gccttctcta acaactctca gggagactaa agaactgaag
+   246901 gcttcggagc ctagcctcta actcaggtga caggccagag aagaacttcc tagctagagt
+   246961 caactcgctt caggtgcctg ccaaatatac ctgcgggctt agttgaacct cctgatgtag
+   247021 accttaacag agtagctaaa ctgatccaat catcatccca gtgattaggc taactacatt
+   247081 cctttttttc gaaagagaaa gaagtcaaag gcgttcctct cctattagtc gagtagtagg
+   247141 cgttcctctc ctattagtcg agtagtaggc gttcctctcc gagtaagaga ctgagccggc
+   247201 taccttactt cgtctggaac aggaacagct aacaacagta cgaagagtat gaccgctgac
+   247261 cttctgtgga aagaaagaaa agatattgat tctacgggtt cctcctatga gataggagaa
+   247321 gctgggttcc tcctacctac ttgaacagat aactaactac acctactgga ctggctactc
+   247381 tgcttctaag ggcgcgtagc cagatacaag ggttaacaac agatacaata gttcccatcc
+   247441 gaacaaagag aggaggagat tattaactga cagcagaacg cagatcgtac ttagctacac
+   247501 atgataactc agccagggac tacggacaac agatgaactc ttcatgctac ttattcccag
+   247561 agaacaaaga aaggaagtgg aagctgcaca gcgggaagca gacacaaccc agaaacaata
+   247621 cctgcaagac ggagaggggg aacttagtga tataaagtgg tagacccctt ctcctatccc
+   247681 acgtatcaac gggaggactg cttcgcctac ttcacgccta ctcatacaca ttccgcccag
+   247741 aagaaggaag actcttttac cgcagcagca agggatcccg ggccccgcca tgcatgaaca
+   247801 ggggttcctc cactggaagc aaccaactgg ttattcatgg ctagtccgaa cgaagagaag
+   247861 ggagattctt tcacaaatat gccatgtcgc ttacgctcct cgcccagaat atacagctgc
+   247921 aagtgctggt tcagctacaa cggtaactag aggaagaagc cctagtatgc acgcacctta
+   247981 ggaacactct agctacattc acattattcc ctggaccgga acattcataa gatattgact
+   248041 aaagacagct gagcaacaag caacccttaa ccgctcacac ggcttacccg tcgtcgtcga
+   248101 caagctcgct tcccgggtaa tcctaacatg agctacactt catgacagaa ctcactggaa
+   248161 caaagaagag cagactagta agggaccaac agggaacttc tagacaatac ttacatattc
+   248221 cagcctatac taacacatat tctaggaaat gacaggactg gcaacaaccg aagtgccaac
+   248281 acccgataca ccgctagaga gaagtatcca ctagcagaga accacaggga agacatctag
+   248341 ctacttcttc cacattacag acgatactaa gatattgact cattccagta tgacaaccgg
+   248401 aggcaacaac tgaactaaga tattgactcc agtatgccag ctacaccttc tacactggaa
+   248461 gcaaggaagt aggaactagt caagacccta gggaaacatc tagcatcttc taccccttcc
+   248521 cggaaacttt cttaataagg tcaaatctgt gcagtgtaag cttgatctct ttgaattggg
+   248581 tattccccca acctgtcaaa caaagaaagc tagtataggc atgaaaacag cttgcttaga
+   248641 gagcttgcct tccccttaca ggcaagcttt aacttccctt cctaacgagc caacaagtta
+   248701 gagatgtaaa gacggttcct taccaggagt caatcccaca agaatcctag cttgattggt
+   248761 ttgatgtaag cgaatgcgag tacagggaaa cgagacagct cacaacaaag cttaggctca
+   248821 cttctgaccg aaagtccaat tcaatcattt aaagctcact gagcacttct ttccttagtt
+   248881 tgaggacgtt aatgagtttc aaaggataat agctgatcta gaaaacctga aagtctctat
+   248941 atcagacgag gatcaatact gatgtcctta ccaaaacata attcaattca attcaacagc
+   249001 aaaagaagga tagcttgatc ctcaatgcct ttgatcatct ttttctaggt agggaattgc
+   249061 ttatctcggt aagctaagaa tgttagaaac aaagaactag gacagaacgg gaaatggaga
+   249121 aggaggttag aatcaaagaa cagtaaatgg agaaggaggt taatgtgtat ttcattctat
+   249181 ctacatttta actaattgag tgtatccagt cttatccatt aatgtaatta caagaagaat
+   249241 agtaccaagc atgtaggtta tagttttcac tttactgggg tgaaggtttc tgtagttcaa
+   249301 gtgggtcaaa agtggtttgc ggaaacatat ctctaataat tcgattgaga ggctcctcgc
+   249361 actcacatgg acttaaactt ttgtgtatta tacaaacatg attcacatac acatctcgtg
+   249421 tatattgcaa tacatttggt aaattatctg aaaataataa tgaaggtttc ttcaaaagag
+   249481 gtccaggagc tatttccatt aacactgtta tactgaacag tatacaaaag aagactgcag
+   249541 tgcgagaatt tatggaggat gataatgcat ttgagatatt cttctgaaca ctttcatatc
+   249601 ttttatgtaa aacatttttg atgagaaaat caccagtagt atccaaacac tttaatccag
+   249661 atgatgggaa aatgctttgt ttaaacctac tacgaagtat gcttaatact tcattattac
+   249721 cagttgatcc atatggtgat gtgaaatcta agtttaggtt tttgatataa ctatgaagta
+   249781 tattaggtag tattactttc tctgttgagt ttgtaccact ggtctgatgt ctcaatttct
+   249841 ctcgcgttag gatatgaatt ggattttctg tggaactgtt gaccagtggt cctctaaata
+   249901 aaggagattt cattttcata tctaaattca ccaatatgta tagagcacca ctggtcagaa
+   249961 gtaagaaaga taatatatat gtctattggt taaatcaatt ctggcgagag atcaaattca
+   250021 gattactgtt caattatttg agttcagtct aattttcgac ctaacaagag caacggaatc
+   250081 acgctctgta ggatttgaac ctacgacatt gggttttgga gacccacgtt ctaccgaact
+   250141 gaactaagag cgctttctta tcacaattga taagactgta aagacgagga ttcttttttt
+   250201 ttttataacc ccaataaatt ttccacgcct atattatcat atataatatg agaaattgaa
+   250261 agattatcta tgtccaattt gaatcgatta ccaagccatg tcccctcatg ctatatgaga
+   250321 ctgaactctc agtttgtata tgtggaaaga gacctaagag aactgaaact gacttctaac
+   250381 ctaggctctt actctcttat tttgacttct ttccttcttt tcccgtagat ctcccaagaa
+   250441 aaagccaata tcctgagagt gatctataaa ttcccacgaa ggccaatcag catcactaaa
+   250501 acaagagcaa gttgagagga aggtgtcaag tgcagacaat tcagagaaga tatctcaaca
+   250561 atcttttgaa agcgtttaat aggaaggaat agaaacttta atctaaacct tcttttctat
+   250621 ctaaggtctc atcagaaatt gtgaaagttc actgagtaag ctatgtcagg tctggttatg
+   250681 ctagcaacca ccaagaatgc tctagctacg atagagatga ggattagcag caaagtggaa
+   250741 cttacaacag cagagcagac aaagaggacc cagaagtgac tatgggagaa gggagaggtt
+   250801 ctttccatgc cagccttgac caatagatca gtaatgtttt gagaaagata gaagcagaag
+   250861 ttctactcac tttttgacca agattgagcc agatacttta tagaaaagat ggcatttttg
+   250921 atgcatacta tgtcatcaat atgaagagga ttggaccatg tgattctaag atcatcaaca
+   250981 tatactagaa tgtacagagc atgatcaccg ctcagaaaga gagagagaga aagagaccgt
+   251041 aatgtctgag agaaaggaga gattgaaaac ggaagcaagc ggagggatag gagaagagag
+   251101 aagagagatt gtagcgattt actgtatctg agtgcaacac aaacaatact aagaccaaga
+   251161 tcttccgact tctttttcca atgtatcttt gttgaagatg aatctcaaac aacaaaacta
+   251221 agggatatgg cattcatttg aaaatgaact aactaatact gcttccaatc aacatatttc
+   251281 gatttcaaac cacgaatccc cccaacacag caatccaaac ttgacacaac tggaacctca
+   251341 cattccaaac ccaaattagc attcgaataa ccagatttat atattttccc atcagctcaa
+   251401 catcagagga atattaaggg aatagtgaag gggatcttat acctgaagct gttccaagga
+   251461 gatccacgcc atggaccagt ttcctgatat gtatagaaga caatcccctg aaaccaagta
+   251521 atccgcgaag agtcctcagt ctccatggcc atcttcactc tagtaccagg agtccagaat
+   251581 atagccatgg aagcctcaac atcttctgcc ttcaccacaa agtccgacca tccagccgta
+   251641 ggataataaa ccacctcaaa cggcaaaccc tgagctgctc tattaatcgc ctcagagaca
+   251701 gcttcagccg tcaatttccc tttcccagtc ctcctaaacc ccttcttcgc acttccgtcg
+   251761 tcttccttag caacgcctcc attgctctga taataactac agtactcatc tccgccgtag
+   251821 aagcttgttc ctccgacgtt actagagatt ggtgctcgcc ttacaccaat aaacatctca
+   251881 tcagccgatt tcctcataaa tacaacagaa tcaccagcga ttagcttctt attgttcacg
+   251941 aacttgctcc atccggtggt taacaagtgc ctcctcggtg tggttcgact tgacgaagag
+   252001 cctttatgcc aacaacaact gctttcgaag cggagctttt tgagtgtttt ttgggctgct
+   252061 gaaagtttaa gcaccaacag gtgagagtgg aattttaaac tgactgtaga gaggtggcgg
+   252121 aggcaatcaa ccacaggtgt ggcctttgca tcggaacctc ctaagtcaaa ttgataatta
+   252181 tcctgccatt gaaggttggt ctgttaccgt cattgaaaaa gaagcgaaca gagcggcaac
+   252241 attgctcgga gtgtcgcaag agaccgaagt accagtccta tatatctagt gaggaacctc
+   252301 ggtggttggc tgctgttcta tttcataaag agaggaaacc cttcgaggtt ggccctagag
+   252361 tgaagttgtg aagttctcga acagaccctt ccaaaggtcc gcccggttca acttgagtac
+   252421 tccattcggg gtgatatata aaggctaaaa tcgtcagcag aatagcgagt tttgaagccg
+   252481 gttgaagggg cctctgagac aaggaagccc ttgcttgaaa gagtcaacgg agcaatggat
+   252541 tattttccgt tacttatttt ataataaagt ctgatgacca caacagggct tgttgaatgg
+   252601 caggcttgac tggcaatgga aaggagatgg ctcatccatc tcagatgatc ctcaatcaaa
+   252661 gactccttct agtggttcgc ttaaaagatc tgtactaaaa acatccctcc ctaccgggaa
+   252721 atcacaaaaa aagagtacgc gaaaaaagca cctttaggtg ccaggagtgc ggcgtccagc
+   252781 catgcttcga atcgagtacc ggcaccgatg tcctatttcg tctgattcaa caagcctcct
+   252841 atcccggacc aaaagaagtg gctggataac ctttcttccc ttctactttt agtagtcgat
+   252901 gttataccgg tactaaagag aagtaattga tagtgttcaa catccctatc aatgacttct
+   252961 aacggtcagt ctatagcgct tctagccagt ctgacttaat aaaagatctt ccagccaagc
+   253021 ctgtttcagt tgaaatagaa gtcagccttt ctctggtttt atatatccct gtgactggtc
+   253081 ttactaagag cagctactgg gtttgcttac tacagaccct tcattaccac tttatcgata
+   253141 aagggattga ctgctctagc tacaacggtc tcgctcctac tacttttacc tcttattaca
+   253201 taggataaac cgagaagact accgacaaac agcaggaatg aatgtgaccc acggctacaa
+   253261 acgaatgaat agaggggttt ggaactagaa ccgtatccca actcaaggcc ctagaaaggt
+   253321 agggcgatgc agtaggaaca gattacttgc tgcttgatac tgaaggaatt tagtgtactc
+   253381 atcctcacat acagatagag ttttatgctc agtagtggta gaaggattag gagggggaac
+   253441 atagtcagaa gtagctacat tcgcaaggtc taccaaatct caacaagaat tcactaggct
+   253501 taacgccctc ttcaccgaat caccatagtg agaacacttc atcctcatcc acttctccac
+   253561 tgccataaaa agagccttaa tctccttctc gggcaggcca taacaccttc tttccctcag
+   253621 ctaaacacca tataaagagt cactcctcga ggtaggtcag cgactggaag agtcgtttga
+   253681 aggcgatcag cagtcggtct gataaagaag tctgtatgac catctctgtc tgttctcctc
+   253741 atccccctat cataactcgg caaagcgtta gtgcctttcc agctagtcgt tccaacagga
+   253801 atgcttaaga taacgaacct tgaccaactt gttcaaaaac agaggtcggc ctcttcatcc
+   253861 ttcaagcttt gagtggaccg ggtagcgtca atcagagtgt gaatgaaagg tcaagggaac
+   253921 aagcaacgga ttgaacaacg ttagcgaaag ccgttgcgct aacgcgcatc cgttttcttg
+   253981 ctcgtcttcc ttgatctttt aatatggaag gaaggctaga atcttctttc cgctttcaaa
+   254041 tcttccgatt tcgatgagaa taaaaaagat caaagcctgc agggtcagaa taatggaatt
+   254101 agactagtta agttcagtta gcaattcaat cagccttagc taataaagcg aatatccttc
+   254161 cgcttcaacc caatcttact cgaagaacta gttatgtttg ccggaagtac gatcagtgtg
+   254221 aggggtagta taagtgatca aggggtggta actcatatta ctggaggaaa ctttgcacag
+   254281 agttccatta ctattaatgg gtggaattgc cacaacatgg gcgttcttct tagcaagaat
+   254341 tattgcagta ggataatggc taggaggatt ttaaaggcat tatggcatta agatttccaa
+   254401 ggcttagctc agggactgct gtcagtcgct tcccctcatc aactaattaa tgagagcttc
+   254461 cgtcgtcagc tcgttcttga cagatccgat cgggtgttca taatctggag taaaaggatt
+   254521 cgaacctttg catgccggta ccaaaaaccg gtgccttacc acttggctat actccatacg
+   254581 gcctctgagt tttggacggt aagggggggg agggggaagg gagaagcagg gcggggttgc
+   254641 gcgcgagccg tgcaagaacg cgggaatata gcaagtaagc agcaaggttc tttaggaccg
+   254701 actacaacaa gctcgcgact ctcatagaaa gaaagggtaa gctctctgct ctctatttgc
+   254761 tcgcaatgct atacctatca aagttggcga acgcttctgt aaccttagac tcgttacgct
+   254821 cttcgactga actcgtttgg ctccgcgtcc accgaaagtc ggtaaccttc gtcaccgtca
+   254881 gcatttggta ctctctcgat agcttcaata gttcactgaa agcctttggt gtaatgaacc
+   254941 gcaagccctt ctgaaagaaa gaataagaga aaggcttggt tgcgggaacc gacggtgacg
+   255001 aagcgggccg atgcggccgt ttccctttgt aaggtaggcc ttttgataac taattagagt
+   255061 tgacatgaaa tggatcacgg gaaaagacgt atccgatgaa tgaatgattc aatcctccca
+   255121 ctcacacggg ttcttctatt gaagggcttc ccccttcttc tatgtgaggt ggggaaaggt
+   255181 cagagcggaa cctagataga aacgcggagc gccgacccct tagccgatac agttaccatg
+   255241 cttaccagct cttaccattg ccgcttatag atgggaggta ggcgaagggt agcttgcttc
+   255301 tcttctccta ggctacctcc actccacatg gttattcgcg aagaaaaggc agcgagcggt
+   255361 tcttcgctta gtagagctac cggccggcgt acgacgaccg cttcttgttc tcgcccagcc
+   255421 gcttcttttg atttgattat tatttgaaat cctagactaa agaagaagct ccagaatccg
+   255481 cgcagggcaa aatctgcagc aggagaagaa agagggtcca ggtcaatttg ataatgaagc
+   255541 gaaggtcccc cttactccct attccctctt aaagctttat aaagctgggc ggttggttaa
+   255601 gaactactga ctgtaaacta tagatagcag ttacagtcac tcaattgaaa tgttcgcctt
+   255661 tggaatgaag atagatgagc aggggagttt tcactcctac gtggtattct cttccaacaa
+   255721 gggttacggc taaaacaccg cttacttttc aaagacaaac tgctcctaaa ccaagcctga
+   255781 acacgtttcc agttgttgat cgacgcgtgt cgtgatcacc agcatagtcg gcgtcacaat
+   255841 agccaactat cttacactgt tctcctttct tatacagaag accatagtca agtcttcctt
+   255901 tcatgtacct caatattcgt cgaaagtgag gtttctttgg attttgcatg aatcgactaa
+   255961 ccactccaac tgcatatgcg atgtagggcc ttgtcagggt tagcgcttgt gctaagcgat
+   256021 aagaattcgt tctggaatgc cggcgcggac tgaaggatga tgtttattag ctagacaaat
+   256081 ccttcattcg aaagacgaca atcaaagcat aacgactcgt ggaacattca ttcctaaaag
+   256141 gtgtaaacct gtgagatcgg acaacccgta aaaggaagcc gctaggcatc aagccctatt
+   256201 atcttacacc tagggagggt ggccgttgtt gggtttttct caattagagt cgcaaggagt
+   256261 ttgctgctcg ttggtagtgg aatgctgact aaccgtctcc gtccccctat gatgagaaaa
+   256321 agctttccga gtggacgagg accgatcttc tctttcttta gtcttgctcg tcttatggca
+   256381 ttagcctttg cggattcctt aatccgcggg acagctgtct cggcacatgt caatccttat
+   256441 gtccccagcg agcaggtcaa ggtactctat gttgctgctt gcctttatct tccagtctgc
+   256501 caagcaagct tgtttattct atgttattga taaatcgcta tttcccccaa accgctagtt
+   256561 ttcatacgac tcagaactta taacgttgtt ctgtctccaa acagagagct tgtaggcctt
+   256621 attctttgga taaaagtagc gacgagccga ctactacgac cacatgcgca tctagcgcag
+   256681 tggcttgtca cttcgtacct tgaccatctt ccgaagttct aaataatcta ctgatcaaac
+   256741 gctgtagggg cggactgctc tacattcagc cacgccacag tgaccccccg aagcgatctg
+   256801 cctcattgca ggacgaaatc cggcagccaa ttgctggctc tgaataacca gcccagcaag
+   256861 aagttcaatt cttccataac ataacggggc ggggttgcgc gcgagccgag tgacgtagat
+   256921 gcacaagagt acttcgcgcc acaaccatct cttttttata cgttctacgg accgatgcct
+   256981 gctgcttcat ctggtagaaa agaatcatag atatgccggt cattagaagg aagaaccacc
+   257041 ataaaaagat tcctcgtgta tcatctgtag caaaactatg aacgggagct agcaatccgg
+   257101 accgtattga aaaggttcct gagacacagc atggaaaagt cacaatatta agaaacgagg
+   257161 tccaagaatg aagaaggggt aaaattacag aatgaatacg agctgtggct aatacccgag
+   257221 gcataaaaga agcattttct acgggatccc gaaaccacca gccaccccga cctaattcat
+   257281 gataagccca ccaacttcct ggcaagatgc ctacggttaa aaaccaccaa catgtcaaga
+   257341 tccaaattcg aattggttcc tggtcctgat cagagaccac tgtgttcgcg ccggcggtcc
+   257401 aacaaagagg cgaagtagtg gtctctttct ttccattacg aacgacacgc ttcgcctgct
+   257461 ccctccccgt gtccacgtcg tcgaagaaag ggtcttttgc cccgatttac cgtcgtaggg
+   257521 agcactcttc tgagccccac atgattcgcc gaaggatgag atcttgccta tgcagtagga
+   257581 ttaaaaggaa agttatccga acagtgagac ccgggaagac tactacttct gtatttttga
+   257641 taaatcaaag ggctatttcc tgattcattc cacatttcac attccccttg ggattagcaa
+   257701 aatcaaatac ctttttctcg ttatagctat cctttgcttg tgtaatgact cgaactttct
+   257761 catttatgaa gacttgaaat cgcatgcata agtgttggtc catcagagct tacagttgga
+   257821 gatgccgcaa agcaaagatc tggacgaaat gagagtcctt ccgcgttgga gctagggaac
+   257881 ctagggccag attgatcggt aactgaatca gttaaccaaa agatgctgac acgatcatta
+   257941 cttgcacaaa gataaaaaaa ttgagtactc tgcccttcca gtgctacctt tcttcgatgt
+   258001 gatttgattc tagaagatga gcttgctcgg atccttttcg gtagctaaaa gctttcccat
+   258061 aaaaagaata tgtaaccccg tatcgataga cgaaaaaacc caacaatgtg aaaagaaaga
+   258121 tcttctccgt ctctcctccc gtaaaataag aaagtatgaa aaactggatt aagagacgca
+   258181 aattcttcaa agaaagccat agaagttgta tcatctatta tattccagcg tgactgctca
+   258241 cgtcgggata atcttgattt tagaatacct ttaatatact ttgaaatata agtccactcc
+   258301 ttcttactgc gtatgctctc ttctctcact gcgaatgatg ccaaaaagag ttcagctcgc
+   258361 aagcggagtc tcattggccg tgaatcagaa gctaccatgt tctaacttgc acttagtcaa
+   258421 ttgggaagat taaccacaac aaatcgtaaa catcagatcg gacctttatt ttattagcgt
+   258481 tctttctttt agtttgaaca acaaagtccg ggccaacaat atcccaagtt aatctgtaaa
+   258541 agtcagctgg aaaaccatcc gagccagggg atttgtcgag cggcatgctg aaaagacaat
+   258601 ttggaatctc ctccgtagtt aacctgttgc agaagattcg cttgcatcta gttggagcat
+   258661 ctgatgaagt cgataagaga gcgaagggat gagatagagg ctcgagtgtg gttcaccggt
+   258721 gaaaactgaa acaggttgga gaagtaacga atggcttctt cttggatatc acttgttcga
+   258781 aatagatcca caccttgatc tgatgtaatt ctgcggattg cattagcaga ttttctggct
+   258841 cgcactctat gaagatcgcc gatgaaaaaa aggcatatcc actcgtagat cgggttggtt
+   258901 atatttatat aagggctatt ttgcatattt ttaatgttga taggcagtag ggtaaggaaa
+   258961 ggatcatcaa tctatgataa tttccgagtt gcaaaatcca aatacaacct gacttgaatc
+   259021 cttactggac tacttcatat gccttccgca cacaggacag aggaattcac tccacacacg
+   259081 agccaactcc cctaaaaggc ctttctttca tgcaattggc gacgccatcg ccttatatct
+   259141 ttcttgttaa gatcttaagc ttcctggacg tctacaaatc tgttaggcag gaaatgttta
+   259201 catcggggga agagttcatg ctatcatgtc gctaaaaagt gggctttttg cctctacctt
+   259261 tgaaagtagt gttaaaaagc cttacttcat tggttcgatc agtcttttcg tataaatagc
+   259321 attgggcgtt tagcaccggc cttctattca ttcccaccct gaagcagtta ttcggactaa
+   259381 gatcgtcatc gtaccctcaa ggcaagctcc tgaagtagaa gtgtgaggca agacgggctg
+   259441 ataaactacg aaaatcgttt tttgagggtg gaataaggca cgcggaagac ttaggagtcg
+   259501 gagatcatca aaaaataaat ccaatgcttg tcgtgggcaa tcctatagcc cctttcctcg
+   259561 ggatcggctt aggctggcta gccttttcta tctgcaaatc tgctaaccga ccgacggata
+   259621 ccactaactg actaagctag agaacaggag accaaaggtc ttcctgctta atcgagcttc
+   259681 ttttagaacg ggtaaaataa ggagctttcc ctccttccac tgtcactttt ctcattcttt
+   259741 tccctaaaaa aaattcataa gctaaagaag gaaataactt acaatgctac aaagaaggga
+   259801 acttcaaact attctcaacg aaaaagcctt tcaattagtc ggtctaacct tcctaacttg
+   259861 gcacacatat gacataaagc catcaaccaa gtcagccaac taccagcagt cgatgagagg
+   259921 gtaaggagga agcgatagcc tgctctcatt gatgatttcg aaccttctta ccctctcttg
+   259981 gatgaagcaa ccatataact gacgatatgg attgagtagc gagatcagtt acactagccc
+   260041 gggcatgcct taggcttcct ctgccacctc cacgggcgaa gtgagtcaaa tgcactcttt
+   260101 cttggctagt gtagtagact ccattatggt cattcgtaac ggctccatat agttttattt
+   260161 tggggcgtaa cgttgtgatt gttccaccga ctgaccgata ctagttccag aggcatcttc
+   260221 cattcatatc gatttgggtt tttctgcacc atattttgat ctgcctcttc ttcgacccgg
+   260281 aattcccatc aaatccttta ctcctcgaat acaatgggat tttacacctg gcgaatcttt
+   260341 cactctacct cctcttatta acaccgtaga atgttcctgc gaattatgac cttcgcccgg
+   260401 gatgtgagca aatatatcat gtcgattgct caaccgtact ttggctatct tacgtggagc
+   260461 ggaattaggt tttttcggtg ttctcgttga aacacgcggg catactcctg ttttctgggg
+   260521 acatttatcc aaagctcgag tacggtccgt gcgccgtttt tcttctctac catgacgaat
+   260581 caattgatta aacgtgggca ttattctctt tcctttcttt ttccccccat tttgtgccct
+   260641 atcactttac tcccgatccg aagcacccct tttccattca tatagaaatc caatcgtcaa
+   260701 aataaataaa aaggccatca tggaccaaaa tccaaacaga tcaatcttgt tgggaggtac
+   260761 tgcccaagga aagaaaaagg ttacttccag atcagggatt aaaaataaaa ttgaaacaag
+   260821 ataaaatcgt atatcgaaac gacttctggc atcaccggaa ggatcgaaac cacattcgta
+   260881 ggccgacaat ttttctgggt aggtagaact attggaagca aatggaaaag gaacaccgag
+   260941 taggatcaaa gaaactagca gactaatcac taaatagata gaaattggtg caaattctga
+   261001 catcattaca gcccactttg ttttctcgct ccttgctggc gaagaagcgg catatcgaaa
+   261061 aataaagaaa gaagcaaaag cccatcccga aaggcttgca gaccttatgc gatagggtcc
+   261121 cccccagggc cgcctttctt gaggaatact ttctcttgta aacgagaaaa ctcgtccgag
+   261181 aggggcccgg tttcttggag gtccttccat atatcataaa gacgctccaa ggactcttcc
+   261241 ccactagtgg tcctcggatt atcgagggcc cgagccccac gcgccatcca atcgccggtt
+   261301 gggtcatata gagccatccg ttggatgatt tgggctttga tctcgaatag atcttcggcc
+   261361 tctattcggg ccatctctat gatctccgca gagggagagc cttcggggta tttcgccaga
+   261421 aggcggcgtt ctatcgcctc tacgctatcc cccccaatca tttcatccct cctgtatgga
+   261481 taggggacta ctggcccagc ttcctccccc ccgggaggga ctggattagc tggaagcgca
+   261541 ggcccagctt cctggggcac ttgatttacc gagggttccc cctcttccgt ttgggacggg
+   261601 aggggactgc cctgatttac cgagggttcc ccctcttccg ttcgggaccc cctgccgggg
+   261661 gggagtcgga ttcctcgaga actcccagtt caaaggaatc ttcctcccac atggaagaac
+   261721 tgcccgaccc ggaagggccg gtagaaggaa gtgcttgccc ccctattgca ttcaagacca
+   261781 aaggtcccaa cacgggcaaa gccagacccc ccactaggtg gatgacttcg atccggatca
+   261841 aagatcccac gaaccccatg gaaaagacca aaaagagata gataatcttt ttctgcggcc
+   261901 ctttcaaacg aaaggaatga aaatgaaagc aaaataggaa gcttacgact aagatcgaag
+   261961 agaatgaaac gcgcgtagtg gtcattatag cgcttccttc tgatcctaga aaacgggcgc
+   262021 aaaaactacc tgccaaagct ccgagcaggg gcaacaacag tcgaaccata tttttcataa
+   262081 atttgagatt ctgtagtttg cttcgttatt agcgtccgag gaacgcatag aagtggttgt
+   262141 ggaatcgaac cactaacaca aggatttaca gtcctttgct ctaaccatct gagctacctg
+   262201 tgaaccactt tgttctcaac ttatccaacg aatctctttc tcttattcaa ttccactttg
+   262261 tttagtttag tgatagttag aggttagaga gctagatcac tcctctaagc agccttctga
+   262321 ttatatacgt attattctat ctatctgtcc ctaccttata ctatccttcc tatctgccat
+   262381 tacttgtatt ctatcgatag gaatggttag ttagtgtgca tagctgcctt gtttctgtgt
+   262441 atgcacagca gttgtttctg tgtgaagcat gtatggaggt agtgaagaac ggcctgcgaa
+   262501 ctaccgtagc taagcggcac tagttcctgt tgcttgcagc ttcagtagaa ttcatatccc
+   262561 ttgttggtat gtgagtaagt tgttgcatga atggtcaggt aggtaagcgt agcagtattc
+   262621 ctgttcatgc atgtatccct gcaggttgtc ttccctatcg aatgagggaa gtgagctggt
+   262681 taatcgcagc ttacttctgg agtggtccct aggcgggtgt agctattgtt gcttgtagct
+   262741 gctgttaagt atctatcttt gtttgccgat cactgatgta atggctgttc cgtgctgtgc
+   262801 agcgcgttcc gaggtctgct ttcctttctt tgtcatttga ctatctctct acgttcgatg
+   262861 ggaacagttg tttcacgact aagtaagaaa ggttagcgaa gccgggttcc tatgtatgca
+   262921 tgatatttgt tccgaggtgg ttgttttctc ttcttttgga aattgcccgg ggtttaaata
+   262981 gggcaagaac atcaggaagg tcttttcgtt cggatacaga catggcaatc tacgtatatg
+   263041 aagtctcgat ttaagctgcc taagcataga tccaacgcgc tactcaagta ggcgggaaag
+   263101 aaatgccctt ctagcattgc cacttgcttc aagctgctct ttctataagc tatattccag
+   263161 tagtgattcg agaaggaatt catgagtgac tgaccatcca gcctttcctt cgtatgagac
+   263221 cggacatgcc agcgaatcga atagcgtagg aagtcaacaa gtcttaatgg tggaaagcta
+   263281 gaaagggaaa aagcattagt ttcaggttgt ttacatactc gactaaaagg aggaacccct
+   263341 cgggcgaata aagtatgtaa ctggcttaag aacaagaaag caagaaaaag gtatgaagtt
+   263401 ctaattgtac aatgctatta cttcatccgc tgtgaatagg ctgaaggaat tggatcttgt
+   263461 ctctttgaat ctgcttttaa ggcaggaagt cagaactata aactttctct ttagatatcc
+   263521 ccatacgttc ccttgtgtga aagagctgta gaataagtcc tagctatctg ataatagcag
+   263581 tcttgttgct atgggcttag cactagtctg caatactcaa ttcataccag tagtacggtt
+   263641 gctaacttgt ctttcaaagt tgccctggtg tatatcccgg agtacttcga ggaacgccag
+   263701 taggcctatt agtgtataga tccttcccgg gttgtttgct gtgtttcact gtttcttcct
+   263761 tgggaactgt ttgttttttc tcttctgtta tagtaagagt tctcactcct tcacctcatc
+   263821 ggtcaagtgg aagagtcacg tcagtcccga ggaacgcctt tccgctcatg gtcttcctac
+   263881 tccgatctcg cttcgggcac tataagggct agctggcttg ttgctagagc acagttaaga
+   263941 gtcactcctc tcctggttct acgtagcttg atgagataga ttgctggcgt agctgtgatt
+   264001 cctaagtaaa caagttgtag ggaaggagtg ccttccgggt tgtgcgtatg catctactga
+   264061 ggtaaggcac gattgagcgg ctagctgctc tgtcattact agagagtctt tcttagtatg
+   264121 agccgtgata gggaagatgt gtgctcgaag tctccttagg tcttgatcta gttgtgactt
+   264181 cttttttcca gcgagttacc cgtcgttgta gttgttgctg ttccgggcgg tgcttcctgt
+   264241 ctgtaagatc cctttcttgt ctcgtccggg aaggatgtat gaagtattgg tccgtagctg
+   264301 ccttgctttt cttgtttcct ttgctttcga gtaggtgcag ctgaggcagg taccaagagt
+   264361 caagagagaa cttctttgtt ttgtttgaat aatagtatag atttatacct tctctacgga
+   264421 atacctgttg catcctgtct ctcctgccaa gcgaatggat ttgtcatgca tttgttgcta
+   264481 actgccgaca caagctggct tgttaggaaa aaaacccttt tcttcttctt atgaatgttg
+   264541 cttactctgt ccactgcgat acaactgttg taagctctgt tttacagctt gtaatgggat
+   264601 ttctgctctc ttttcaatat atcccctctc tttttcctag gaaagcctct gttgaatcta
+   264661 tctctctccg ttcggaatgg gaactccaag ctgtttagct ttcattcttc cttccggtac
+   264721 cttttcacag tgcaactgag caaattagtc aaactttgta ctcggaatgt tactttcctt
+   264781 agttttgttt caaatatctg ccgtttcatc tgcttcagga aaggcagttg acttggtcga
+   264841 atcccgatta cgatcaaaag gaaaatcttg acttgcagtg gaataacggc tttgcggagg
+   264901 aagatcatca aaggctgttg ctgggagagg tccattagtc cggggtcgat tgcttccatc
+   264961 ctcaattgct tcttttgaag gttacatgcc tgctgttgaa agcctgggac gagacagagt
+   265021 tagtaggcgc ctccggccct gcggttgaag gtgatatacc aacaacatga gctagcatgg
+   265081 tttcgagtgc gataacatgg gatatggata tgttttcttc gatgcctagg caggctggga
+   265141 ttggaactgc tcgtgattag accttctata tgactgaact tccccgcaac aagatagaaa
+   265201 gaagagtata attcggagaa agatgaagat cgtccaatag tgatctacgg atagctaccg
+   265261 acgtgagagg cctagagtga agctctggca gagtggctcc cttgttaagt gtagggcccc
+   265321 ttggctttgg acaactcacc aagccataga gactaaagaa agagctaaat aagtccttgc
+   265381 tgcaactgaa ggaacgaaag aaatctactt ccgaactgag cgctaaaggt acatacggag
+   265441 agacctatga tttccattct gtttaggcta tgaaaacctc cccggatgaa ggaatgtaat
+   265501 acattgaaga cttcgaactg cttacgtaat ggaaaaaggg gctagatggc ttacggggta
+   265561 tagggttttg aggaggaaaa tgactcactt tttgacagaa tcaaggtcct atcatttctt
+   265621 ctttctgagt gaaatcaaag ctagaccata aagggaagaa ggttggttac ctgctatact
+   265681 aagagtaagc gagctaacga ggcctaatcc tgggttcacg gcattccatt cctaccaatt
+   265741 ccatttcgag gggtagcagt agattagatc ctgttgattc aaaaccttgt tcgaactctc
+   265801 tttggcatct cacaaatgtt tcaaaatata tagtacagat tggatcctga tcaagctcaa
+   265861 gctaaaactt acctacaaga gaatccatac tgaatctatt atcatagatc ttgctttcgt
+   265921 cggatacgtc tctttctccc caatttggat ggatcctctc attctaattc atcgctacta
+   265981 actagaagaa gttcctaaaa atcgaaagct cccctctttt tctccatcca taaccaccag
+   266041 ttgtagcgga ggaagtagca gtcagtggct tctcccgggg cccttaccct tgctcttttc
+   266101 tttctcgggc gcagtaaaaa gatatacgag gttatagaga aatagacaaa gaagttcagc
+   266161 tgcaagagtg gatcctatcc cgccgtattt accagcggga ttcatccgtc taggcattcc
+   266221 tcggaggaac tcgtccaagc acatgatgtc tcctttatct gttttcagcc agcctaagcg
+   266281 ccctacttta cctacttgaa atgtcaattt tcctttcatc aaagccttcg tgttcatcga
+   266341 atactcgctt cactatgtgt agctcgctcc cctctcttga aagccgaacc ggcgccttct
+   266401 tttgataact gggttagggt taccgaaggc aatctggcag gcaaagtttc cccttttatt
+   266461 cggcagccac ctttgatgta actaaactag atagagtttg ggtcacgaat tcggatttac
+   266521 cgatccgagg gaaccaaatg aagcatcttt ttctccttcc gaaagaaaag aggaagactt
+   266581 attcccaccg aagtgcacta atagaaaacc cctaccagtc gaagtggaat gatgagctaa
+   266641 aagtcccttg tacgatgaag ctgagtcaca gaagcgggag aggtgtcttc tttgaagatc
+   266701 gtttcctatc gagtggataa aatcccaagc cttaagagcc cattaaccca tcttctgttc
+   266761 cgctagccag ctaagctaaa gcttgccatc gaccggttcg ttcggaagta acgcatgcgc
+   266821 tcacctatcc atcaaccaag acagaaggga atgaccaacc aagcgagcag agcccaacta
+   266881 ccaatcctat gttctcccaa ttctggaact ggtggcaatc taacgaattc gggaaagcaa
+   266941 gatgatcgac actggaaaag acgtcttggc gagaggtgct ttagcaactc gactgaaaag
+   267001 gagagggcga agtcatgact caagcacttg tctagagagg agagggagct tacagcattc
+   267061 tcaggcgcgg atgccctaga cgagagagtt attgagagag aaaagttagg taggtcgtca
+   267121 gtaagttagc gagatagagc gagactgttt gttaggtagc ttagcgagat agggagcagc
+   267181 tatggaacaa ggggtctgtt aaaacagaac taattaccct tttttttgtt tgttcaaggg
+   267241 gttcctcgga agagctcgca gtgaatgccc agtgcaatcc aataaaacaa cctaacagaa
+   267301 ccgctccgcc cctccatgta cagaaaagta aacccaaaga agttgttctg gacgagagag
+   267361 tgagattaga ctgcttaaac cgtagatagg cgttcctctc cctttagtcg actaactggc
+   267421 gttcctcggg aagtacacct taggtttaca aacctaacac agaataacga gacgaagtcg
+   267481 actttagaac ctattgatta gagcttggct gctcaagagc tacaaatgcg aggggatagc
+   267541 tgcaataaac ccatgtaagc acagggagaa gccggctaaa gcctacatac ctgttccctt
+   267601 aatgcaacct attcccaacg gaggagaaga ggaatactaa tgaaatgaca gaagctacag
+   267661 cgcggacttc gtgcttgcag gaacagcagc actactatgc aattaattag ctgccccatt
+   267721 catgcgccct ctaaacaacg aaagctacaa catcaacgaa gggaatgggt tcctccggca
+   267781 tctaggtaga accttccaga aaaacgggga gtagacccaa ggagcgaagc tacagccaag
+   267841 gaagaaatcc cgggcaggga catgaacaag atagatattc tacgcagcca agccccacag
+   267901 cacaggagag gtgtaggagt aggaagattc gctttttata gatggtcaaa gcgctcaaac
+   267961 ctgcttacag gatgaattag aaaaggagat agtaccaagt caagatacga aaagatgtgc
+   268021 tgcacgactc agttgaccac aaggggcata gtcaggaggg tgtaaaggac tggagattga
+   268081 tcacattatc cttcttccat tagctcctat tacatccttt gagataaagt catgacgggc
+   268141 cactgcctga attagggatt ttttctataa tgcccttatc acgatctagc cccttgtcta
+   268201 tatcatggtc tgttttcata taaccttact actgtatagc atgctaaata aagcgctgtg
+   268261 ggatcaatca ttgcattttt ccattcataa ccatcatata cgctctttca agaaaaggga
+   268321 tagaatgaca gtctatcagt cttaccagca tgcatacaaa agggagtgta gttgcctatc
+   268381 agctggttaa acagataaga ttctgatata ttatataagg aataaggatc ccatatcgat
+   268441 cgaatgaaga tggatgggtt gagataggtt cgttccaact taaggtaagg ccggctgtta
+   268501 gcagggatcg ttagctgtag aagtccattc agtagaggta gggcccggga ctcctttctt
+   268561 tattagacga gaaaagggtt ggcacaaggg gcgtagcgct gaatgaaaat agaaaagcat
+   268621 tccaaggggt tcttccaagt actcgactag acacaaaata aatgaatcct tacttcatca
+   268681 agaaaggaga cggagagcca cgaacctact ttcctgtgcc ggaggagaac cagaaagaag
+   268741 tcttgatcca tggccgggtg caggggaggt agggacggag aaaacgtgct tcatatacgc
+   268801 cagcggtcga agattggcga aagttggtct agtcctatac tatatataaa gcgagtggtg
+   268861 acatcgcaag ataaaagcat ttttttgaag tattattctc aaagcaagag attcaagtaa
+   268921 gaaccagcag aaccatcatc ggtcaactga ggttaggact actctgacac tcgcttattc
+   268981 ctctctaata ctttcttctg tctattcctt tctttcccgc tttcaagctg aaggaagaaa
+   269041 gtcaatcttt cgtagtcccg tccggatcaa tagattgggt cagatcaatt ccttttcatc
+   269101 cccgaggaac gccaattcct cgactaatag gtctgcaagc cttatctttt ctttttctgt
+   269161 tgtaaaccgg cctgttagga tcttctcgcc gtaactagta ataggcttat ctgccctttc
+   269221 attcgcatta tcgtgagcgg gtctggtgaa ctaccctttc actggttcta gctactattg
+   269281 gatttgaacc ctagcttagc actccctctt gttattgtta gggcgctagc gtcccgtcga
+   269341 atcttgattc tccttgacag accagcagac cttacatacg caatttatcg attaattaac
+   269401 gtgcaagaga aagcgatcca caagagggtt cgttcatcta acacttaccc actatcggct
+   269461 tcaagcaact atcgatgtag tctgataacc tattgtataa ggtgtttgac tatgtgtgag
+   269521 aatttattga caattttttt tcacatactc gatttaccta ggaagtcctg agagagcaac
+   269581 agcagctgga attggaccta agggctgaaa gcttcagact gtctcaatcc aggtaactct
+   269641 tactccatta cttacaaacg gattaaggta gctcagatct agctaggtta gacggtaaag
+   269701 aagtccctca agcattgaag aaaggaagaa agcctgtaaa ctaccttact aaactgcttt
+   269761 ctgactctct ctagccagta aactacctct ccttccagtg ttttttcctt tgtacctcca
+   269821 cctctctgaa gagtcgatcc cgacttggga agcaagggtc tcggagttca ggaagcagtt
+   269881 cagcaacaca caaagaaggg ttggagtgcc cgaactacta aaaaccaagc aaaggtataa
+   269941 tgaacctgtc atggaattca ctgcaaggtg gcgagctttt acttttgcct gtccccataa
+   270001 gtttacttag caagagctcc tcaagatgtg catgaacaac ttcaggcacg acttgtcgtc
+   270061 gatactcctg ccccaaacct ttcctttaag ggattcgacg acttgtgcac taaagactca
+   270121 cgatgtggaa gcacatctta gcaaacggcg gcgtcctacg aaggacttga agttcgttcc
+   270181 gttaccttat taagaaagta gacgaatcac tcactttctc tattaaaaaa agtagactta
+   270241 tttttcacag cctatatgcg tatgcggaaa acaagagtcc ttatttttta tttctcttgc
+   270301 cctgacgggg gctaccgctt tacgttcagt tttctctcta ccttttctta ttttgacttg
+   270361 attttcaaat cttttatgct cggccatacc aagatgggaa agacctagcc ttccctccct
+   270421 ttgaagtgca tttatcagat cagctcccag agtaactaga aagagggtga aaggcccaac
+   270481 ttcttcattc atggcctttt ttgtttcttt gattgatctc cctttcgtat tcattcgacc
+   270541 tgaggcgttc ctctcagtca tacaaaaaag aaaggttatt tcatgctctg gttccgggtg
+   270601 ttttccagat gaaatgaaaa caaaaaggaa aagggtcaaa ccatatctta cttaataatc
+   270661 tggcttaaag gacggaacca ccgaggaata ccggttagca gctctagtag gatgtatact
+   270721 gtgctttgtt ccctgtctct ttctttcatc ctccgagtag ttctccaagc ctaataggaa
+   270781 catcgctagc tcggtcagtt tgtgtttctc ctgctttctt tccttctgct tccgaagaac
+   270841 tgttggtctc tttgtatcca ctgaacactc tcctaatctg tcttccttct cccgtactct
+   270901 tcattccgac taggtttact agcctaggag gataaccgtt tgaatttggt atataagagc
+   270961 tctactgcta tcctttcccc tatcatactc tctatcttcc ttctctcaat cgttcagatc
+   271021 tttgtttaca tcggtatatg taagggacat cttatgggtt cttctgtagt aatctttcga
+   271081 cagttccggt taaacttcta ctttcactcg gaacatcccg ggtgttggct tccctgtaat
+   271141 tccgtgtaag gaaggttagt aagtaagcta agccctgtca ggaagtaatc ccaggaatgt
+   271201 ttgttagttg ttagtaaggt aagccaggta agccatgtca gttggtaaga aagtaatccc
+   271261 ttccagagtt gtaaggttag taagcttttc ccgtttatag gaatgggttt ctaagcctag
+   271321 gaagtcaagg aagttgtgtt ggtctttccc ctggttcctt gttcccctgt tgcctttctc
+   271381 ttcatttcat tgacgggttg gagaaatgtt cacgacgtaa gtcagtggaa gcaaacgggc
+   271441 aatcggctgc ctaaaggacc ccatttgtgg ttgaaggcta agttcattct tcgatatata
+   271501 tcactccgct atgctagaag actgagcagc ttcagtttat taccaacctc gagttcaaca
+   271561 actattgggt aaagcctaaa ccttactcct ttccttctat cggaactggc aacagatctc
+   271621 tagatccgat gcaactagaa gtcagtcgat tccgtctgaa aatggtttca gtccaaacct
+   271681 tttttgtcga gtgaaactcc gcttcttaag agcttctttg gacagactca ctcccttact
+   271741 cgctgggttg gcgtatccaa ggtagcgcca gcagcaagcg taggggttac tacgtaagtg
+   271801 cgttcgttag cgcgttacgt tttctcgctt gctagacacg acaactaagg atagacgcgc
+   271861 tgggatagca agcaagtttg ttgaaagagg ggcagtagat tctagatctg ctgaaaacca
+   271921 ggttataggg ctatatcttt attccatggt agggctaggg gcagggtggt ttaaaataac
+   271981 tccttttact ttccctccta caattacaat gggaagggac gagacaaagc cttcttttaa
+   272041 actaaataaa gtaggattcc gctttcactg ttctcaaggt atctcctgac tcgaagatta
+   272101 gggcttcttc ccagccggag gcatccgaaa ctggagaatc atcaaccctc gcataaagaa
+   272161 acaactaaac tatacgcggc aggttggtca aaaagggaaa gaaaaaggtg acatacattc
+   272221 tattcgggga cataatagtg attcctatcg agcacactct gttatatcga tcccaatcaa
+   272281 tagtttttgg gggaagtcga tggtcccccc catgctttat gacatctttt ctcttctccg
+   272341 tagtgatcag tgtatgcacg tagcatcaca gtcaatcttt tatgcttggt tccggtcgcg
+   272401 ctcttcttct aactgcagga gcccgacagg gctgtcttac tctttttgtt gctaatgaat
+   272461 aacctttttc ttggtgattc atttccaagg gaataaaaga aagacctgtg actcattgtc
+   272521 tcaatcataa cttcaaacag aacctcactg tagagaggga gacccccgaa agcgtaagaa
+   272581 cgacgatttt tgagcctgag ggtgaggttc tccatcgacg agtgagaaaa gggcctggtc
+   272641 tcggaagaga tgcagctaac gtgagtaata gctgaacagt tgcctaatgc atgtggtcag
+   272701 catcagacaa ggttataaaa tacatcttta catcccggtc aaagtaacga aaaaagacga
+   272761 gttttagcaa ctttttcata cagttttgcc ctcttttata taaaaaacta acttacgcaa
+   272821 atagtaaatg agaggacctt agcacttttc gttatggagt gattcatcta aaattcccgg
+   272881 ggttttgtga atgaattcaa ctagaaaatc tcaattgcac gggattcgcg attcggtgta
+   272941 agttcaaaat ttatcgtgct ggacgaccac ggttgagttt tgttgtggat ccaactccaa
+   273001 gcgtttgcgg gattttggat agatggaata ctcttgcaac ggagcttgtc tgtcaaatct
+   273061 ggtctggtat ggtagcggtt cacaatggca gctcattttc ctggaatccg ggtgaaagat
+   273121 cttttctttc caaggtcttt gctttctatt cgacttagcg atggagtcgg gtcggaaata
+   273181 aatactaagt aaaggcagac ttacaataat acgcaaacaa gggtcccgcg atgtcttgta
+   273241 cattctaagt actttgcctg ctaagacgtg acaaacaatc gctgattctt tactaacgag
+   273301 cccagtagcc actaaggcga tccgaatgag gaaacgccgg agtgaccgag tgtcttcaaa
+   273361 aacactgcgc caagagacca agcggagcct ctcggtgaat gtggtaaaca aattgaaagc
+   273421 catatcaatc ggtataaaat gacaaacctt ttggtatgta ccacaaaaca ctaagaaaga
+   273481 ccccgccggt tctactctcc ttgacccgtg gacaaacgcc acgtgatgga cggcgcccgg
+   273541 ctctgacgaa gaaccagtaa gattaggtat tgcctaatcc gtaaggttgt cataccctta
+   273601 tcagctttct gaccaaacct ctgtgttgcc actttggtaa ggaaagtcca ccaaccgggc
+   273661 atagtacctc gttagcagga ccactattgc tccttttaaa gctttgtgct caagtgatct
+   273721 gggagggtgt caaacctccc ccagcccctt attcagggga tggtgccagg gagtttgaac
+   273781 tgttaacaaa agattttctc gatacaaaga agagccccac tcctagaata gttagccact
+   273841 cagtccacag attcggctta gtgaaaatag ccctagaaag atcaaggtgc ggaaggataa
+   273901 tttcctccgt taatgatagc tataccatag agttttccat caaggaaaga caatgccttc
+   273961 tcttctccat cctccaggta ccagggaaga actctatatt ccgggtccaa ggatgatggt
+   274021 tgcaacgccc cgcgcttcca ccaataagag gtgcggggga tggtgatatc ccttgaaaga
+   274081 cataactaac ttacaagact taaaatgaaa agtaaactaa cggaacaagg cgccggagga
+   274141 acccccttct aaaagtctaa gagctagaag gcggggtaca gcggttgctt cctgtaagat
+   274201 ttgaatagtc atggcctcat ggtcaagcca gaaaggaaag aaaaagtcta tatcttctgt
+   274261 cctctctatt actcgctcca ggtattacac tcgttgggct caaggtagca ttactaaacc
+   274321 tataaagagc gggatgctat tctaagagga atttactgta aaatagatat ctctttgctc
+   274381 ttcagccagt aagtctgaaa gcaagtcagt tgtaggccag acctgtcatc agtagttcta
+   274441 cttttgtaga ttgaagcttt tataaggcag gaatgggagc tgcaaaggca gagataaagc
+   274501 tgcgattgct cttgctcgac tcttcatggc acggttggcc gggaatgaga acaaaaggag
+   274561 taagaagaaa ctggctaaga ttcaattgac ctatatagca accagttcaa ctaaaggggc
+   274621 gtagcgggct taaggctgct agggactctg gggcgagaag aagcttttcc cgcattcctg
+   274681 ttgatgcaga gatcagacga gaaggcccat ctctttacta gaatccgatg tgatagaagg
+   274741 agcagttcta gcttaagcct aaggcctaag ttgatcctta cttaatagaa tagccataga
+   274801 gctcatcccc ggtgaaatag ttgatgattt cttgatggtt gactgctata tcttaattag
+   274861 agaatccgac tgggaacgaa tactttgagc tcttcttcat ggaatttttc cagctctcaa
+   274921 ggaagattgg atgtaatatt acacaatctc caatttatct ctttctcgct cggaaaacaa
+   274981 acaggtgtct ttgccccgct cacacagatt ttatcctggc ggttgttggt ccaacgactc
+   275041 tacttctttc ttttgaaatt tcgtatgaga agaaggttga caagaagaat aattttaaaa
+   275101 ctgggattgt agttcaatcg gtcagagcac cgccctgtca aggcggaagc tgcgggttcg
+   275161 agccccgtca gtcccgacct agtcataatt gcgttttaag acctggtcta gcaacaaagt
+   275221 agaatcataa gcacactccc ttgtcacaac taaagagggg agcggcaggc ccttgataag
+   275281 acgcctttgc tttagcgcac ccaaccgtcc tggttgaaag gtcgacccac tctatcttta
+   275341 atggaatgtc gtctttggct ttacggaatg agactgaact gatctcatat cttcatctga
+   275401 ttcaattagc tcatccgaaa ggaagaacaa ttaacccacc tatatccatg tgattaggag
+   275461 acttaaaggc aactcgagtc aaacgagagg agacgtcccc agtgatcacg attcaatttg
+   275521 ttaatgaacc agattttttt gggaagaaaa gaaactggtc aaagcagtcc gggctgatct
+   275581 atgcacagtg gattcatcta ggggtctata tgcacgtgtg tgtgtggagg tagatttcaa
+   275641 attcaaaaag agttcccagt tttgtcttaa tgatgacatc cagagggtcg aatacgaagg
+   275701 cttgcatctt atatgttttt gagtgtgggg aatatggcca ccggatagag ctttgtccga
+   275761 agcaaaacca gactgatgta gaaaactcac cggcggcgaa ggaggttctc ccaaacccgg
+   275821 atccgcaagt cgggtcggaa caaaggcctt tcggaccttg gatgctacct cccaacagac
+   275881 gtcgcaagag gaccacgtgg tcagcctaag aatcagggac cacaagatag caaataccca
+   275941 aaaggagaag gaaggatgtt aacaaaccca gaaaggtcgt atcggtcagg gagccagaag
+   276001 cgagatgcgt gcgcacgggt gagaaatcaa ccgtatttca gcctagtgga aagaagacac
+   276061 tcagtgctag caggagtact ttctcggata cgatcccttt ttataggttt gctgccctgc
+   276121 gacacgtggc agaagaggac tcaaggacac atgccagatc tttgggtcgg tatttcttcc
+   276181 ttaagatctc aaccaaccac cctttggtgg ttttccacga ccgaaccata ggtcatgagg
+   276241 cattcaaaat ttgacatgca tcgaggtcaa tctttgcacc ttagatgtga tgagactatt
+   276301 tattttcaaa gcgacccgtt gaacccaaag cccagagcga acgaccttga caaaggtccc
+   276361 ataaagaccc ttttttgggg gtcttgtttc aaggtcttat tccttccata ggaggacacc
+   276421 ttggttccaa gatctctatg tctttaatgc cccttgggac gttgaaagga gtgagtactt
+   276481 ggaggacttt ggggtcggcc ggcctctgta ctaaatcata tttcggatta cggggaactc
+   276541 cctatctcct acgcctctca gtcaaaagag tgcgggttcc catcccgaat ctccttagct
+   276601 tagttgaaag tgaaaaacct ctctctatcg cccttccttc ttttagcagt tcatctcgag
+   276661 aatcttgaat gtttagcgaa tcgatcgtaa ttggtctgac cgctcaaggc ttatctcttc
+   276721 tattcccctt tctggtctcc ggtcattggt agaaagcctg taaatggaat ttatctcctc
+   276781 tcctttcagt cgagttccct cttttataag agtaataaat tacgttggac ctctccgatg
+   276841 gccattgctg gggcattcta tcaagttaga gctagggctt tctatcaagt tttcctgatc
+   276901 tctgagtccc taaagatgct acccataaag tactagcttt tgactgtaga catatcacag
+   276961 taagtcgata gtgtccttaa cagtaaactg ctgggtgccc cccaaaaaag gggcagaagc
+   277021 tgtgaacgat ggatactatt ggcgaactgg aacatatgtc ggctacagag aatatttcat
+   277081 tgtaatcgat gcgagaatat ccctttccca ccaatcttgc cttaaaccgt ggagaatatg
+   277141 ggagcaagcc tacatcgagc aagttgacat tgcgaaattg tgtgaaataa atcaaagatg
+   277201 gatctttttc acgcccgatt cgagatgctt ccttaagcgc cttagattct cacgaggagt
+   277261 attggtcatt gtaagtgatc catctagtgc atgaagggaa aggcttgatc tgctcattcc
+   277321 ccctctttct ttgaagcaga aaggcaaaga gctgcttctt caggacagga agagaccccc
+   277381 aatcctatga tacgatgaac gggaaagaaa ccgatcctcc tttatttgag tccctcgtgg
+   277441 aagaagaatt agctgatgta gaaggtttct ttcttatcct atccaccaca tcttttacat
+   277501 actcagacag actccagcgc ctatttcaag ctaaagagat cacaatagtc aattcgctac
+   277561 gcccctactc tttagactta gactcaccaa cttaaccaat ggcttagagg gcgaaggaca
+   277621 agaaagtcaa ttggcagaaa agattcgcta cagatcgatg atcaaccctc atttctcatt
+   277681 agactgcaac ccccactgaa acgaaaggag ctgtggttag cctgaacttc attctcaccc
+   277741 gttcccttca gattattagg ggaaagcaaa agtcgatccg ctcaacgcct tgctctcgag
+   277801 actctttcgc taacttctcc aatcaagttg agtgcttgga tctttcttca aaggaattct
+   277861 tccggagagt cgctccaact tagaatgggc ccaacgctcc tcaaccatag gagccgtgag
+   277921 agccctctaa taagagaagg agttgtacgc aacgcaggaa agcaagaaaa gaagaaaaag
+   277981 caattcatgg gggaagtcac agcatcaaaa ccaggagttc aagtagaaag gggtcggacg
+   278041 ggatcgcatg agtacgagag aaagcaattc atgggggaaa gaaaggacga tgccattcaa
+   278101 ttatgggact gattctttga cttgctttag ctttccattc tttggataat ggtctagccg
+   278161 agtaacttga tcttccttta ccatctatat ctgaatggac tttaccatcc atatctgaat
+   278221 tgaaaagaag attgaagtaa ggagcaggtt caagtcatag ttctcaccaa ctatagaaga
+   278281 atagggatcg actcgtgcta aagaagattg atagccaact aacatcttat ctgttcggaa
+   278341 cgaactccta tgtcttattg gtatgtgatt gaaatctatc ggatcctcct atgtcttatt
+   278401 ggtatgtgat acaagcatat taaaggaaag aatgcggaag gagattggaa aagctttcca
+   278461 taggcttggc cctagaggtt gtcattgagt tgaactagct cgccttcttt cctcgatcag
+   278521 aatacgaata agatctttac ttttgggata agtgaaatcg tatgtatcca tccatggtgt
+   278581 atctggtgct ctcgtatata agagaagggc agcatttatg agtaatcgat ctcacaaact
+   278641 atcaatttca taagagaaga cgaagacgga tcaaattgaa taatcgaaga gagatgggac
+   278701 cctagctacg agtcattccc tctgacgtcg aatgatctac ttgcttgtac ttctctttgt
+   278761 cgagattcag ttggtcttca gtctaccact ccgtgggtat aagatcgcaa agaatgcatt
+   278821 ccaagtgaga tgtccaagat caaaggaacg agggtaagaa tcgacgagga atcaataaga
+   278881 tataagataa gtgaatgaca aagcgtgagt ataattctca acccgagatg ttagaaggtg
+   278941 caaaatcaat aggtgccgga gctgctacaa ttgcttcagc gggagctgct atcggtattg
+   279001 gaaacgtatt cagttctttg attcattctg tggcgcgaaa tccatcattg gctaaacaat
+   279061 catttggtta tgccattttg ggctttgctc taaccgaagc tattgcattg tttgccccaa
+   279121 tgatggcctt tttgatctta ttcgtattct gatcgaagaa agaaggtttc cattcagtct
+   279181 cataaagcaa gcacctcttt cacataagaa agtggaggca ggcttgggga tacgatctaa
+   279241 aatgattcca aggaccgggc aatcgccctc ttttttaatg aagaagcggg ctagtccccg
+   279301 aaaatgcccg ttaatcaagc aagttgggga acaaaatctt ccttgttagt tactcatttc
+   279361 ttcggtcgag cgttctccgg acgtcgagaa atctatcact caatcactgg ccgctctgta
+   279421 attgtctgat tttaggtttt tgatcacact cgaaattaaa ttatgtatct acttatcgta
+   279481 tttttgtcca tgctcagtag ttccgtagca ggttttttcg gacgttttct aggatcagaa
+   279541 agcgtttccc gtttcaatct tataatcttc ttgattctat tggttttttc aatttgccta
+   279601 tttagatcct taaagcagta tttaggaaag aggatgacac aatggtgcta tctagccctt
+   279661 gtttgtcaaa tctccctctt tctcgttctt ctacgtagcc atatcttggc gggttttggt
+   279721 acattctccg cggatgtatt tactgtcttt atgggcacat tttcagttac cggttcatcg
+   279781 ggggggatag tgaatcacca agacggggcc tcgtctgagt ggttcacgta tacatccgat
+   279841 atggtcgaag attcggccag ttccgggcgt acctcctcgt cggtcaatca accgattcct
+   279901 gaggagcagg cttgggaaag ggaggctcgg gcacaagagc acgaccgcat ctctgccgag
+   279961 gtagagacta tcacgagcgc ctgcgagaat ttggaggcgg ccatggtacg gaaagcccaa
+   280021 attctcttgc atcaacgtgg agtaactctc ggggatccag aggatgtcaa gcgtgctctc
+   280081 cagttggctc tacatgacga ctgggagcac gctatagatg accgtaagag gcatttcact
+   280141 gtgctcaggc gcaacttcgg aacagctcgc tgtgaaaggt ggaatccgtt cattgatgag
+   280201 ctcaggggct tggggaacca tcaggtgaat gcccggcatt acgtcgactg aggcataccg
+   280261 tgctgggatc ttcgactggg gtgaagtcgt aacaaggtag ccgtagggga acctgcggct
+   280321 ggattgaatc cttctataga aaagttgtta agcaaagact ggagaggccc ccggtcgaga
+   280381 tgtagtaagt aggtctccaa tactgggaga ctgaggagaa tgggagtttg tgggttgagg
+   280441 gtggcatgct ccccagggcc ctcttttttt tagataggta gggtaatgca tttcagtatg
+   280501 aatttgatct agcagtgtaa acctgagatt ttcaagagtt ggcttccgct tattttgatt
+   280561 attaagcgta acgagactaa aagagtcgtg aagcgagtcg gaaagaaaga ggcgaatcta
+   280621 caagagtctg attttttata tgttaacggg cggagattaa gaggtgggca agttggtagg
+   280681 ctccggagaa tagaatgcaa tggaggacct aacgcctagt aagacgggga aggagtgtta
+   280741 tgaaagggag gaagaggaaa agctactctt gagcctcctt gctcaaacct gccttcttcg
+   280801 ggaactaatg gatgtctatt ccgttctcca gctccatcag tccactagcc agtaccactt
+   280861 tcaagtaagt cttcagtact agtgtcagcg ggcaggtttg aagatgtctt tcctggtcta
+   280921 tctccagact tggccagaga gtaaaatatg cgttttcgca cagtaaggca ctttagactt
+   280981 tgatttccgc taaatcttca agataaaatg tctttgcctc agcttcacat atggcttacc
+   281041 atcctcttct agtaaggaag tccttcccgc ccttcctaac cttacccgta accacgactc
+   281101 cctttgatta tcataagtta atattcaaaa tgtgggtctt tctcgtcagg tctatgttct
+   281161 tgctttcaat tcaatctttc caatcgggtc tagaatgaca atttatccca gacaaacaag
+   281221 tctcctctag cttaaggagt agtccatagc taataatcta tggtactagc ttattccagt
+   281281 cattgagcac aacttccttt tctgtttaca gagaatagga taaacctcta ttccatccgg
+   281341 gcaaataaga tgctcgctca ctccactacg ccacttgctg tgcagtctcc aatacgccac
+   281401 ttgcagttcc acagagtgtt tttctaatct tcttgagtat aaagaggaat ctaagggaag
+   281461 acacttcatt gcgggaagca ccacacgcct gcttcctcct ctaagcaagc aatagccaga
+   281521 gctctgaagc tgaagtgagc caagctcaac aatcttgaat ggcttactca ctcaagcagt
+   281581 aaggcgactc cgcgggctca atcagaggtt cctatgcaaa gcccagctat gaagcgaata
+   281641 aagagcagtt cgcactctcg gtgggatggg tcgggttcgg tcaatgaaat gccttttcca
+   281701 agcacgatca ggcttcaggg tagcttctgg gagtgctcta cgagaaggca tatgtgttac
+   281761 atcttgagat atttgtttag ggcaaacggt caccgccatt tctcgtatga aaggctggac
+   281821 tgtcgcaatc aaactctccg cctgccggat catctctatc aaccgtctcg gccgcacctt
+   281881 cttccgcatc tgtctcaact gctccttgtg cgggactctg gatacctata aagctcctct
+   281941 gggcaatcgt attggttcca atcagcattc ctaacctccg tcttctcctg agcgtctgga
+   282001 ctgcactgag tgcctggact gaaaggattc tctttcgcca tttagaactt ccccgtggta
+   282061 gcatagcata gtagtgcaag tgaaagtaaa taatcaaaga cgttgaaaaa aaaaccttgt
+   282121 gatcgatgtg ggaccttaga taacaaagta gcgatagggc gcctaattct ggtcatttag
+   282181 agcatcttct cttgattcac tcagaaatta ccctttctct gaaaagcttc aactcaatca
+   282241 cttggacaga gggatttggc ctaccaactg caggctctgg tgggggaacg gtatccattg
+   282301 cctgctgccc catcccaata aaacgtgttt ctcaaccacc cctgtcgact tcgcacaagc
+   282361 caaccttgca ctaaccctaa aagaaaaatc cagtataaaa tcccgagcgg ggaaccacca
+   282421 agtgcagcta aagcagggcc gctaggtccg aagagtaact gcccttctga cttccctgtc
+   282481 cgggatctct tcactgaatg tggaggcgcg tagcgctgcg aaaaagaaag ggttgccttc
+   282541 catgccaagc cagcacatgg accggattgt tactcgctac gcgcctctgc atggtgagat
+   282601 gggaacaagt tgtcatgatg tatgcaggta agtgagcgat gaacgcctgt ctctgttgct
+   282661 gcatgtgctg tttgtttgtt tccgaggtgg tgtggtctgc ccccgaggaa tgtctaggag
+   282721 aagggttttc tgctgtgatt gcaccaagtg cagctgcggg ttagtatctc ttcctaggtg
+   282781 ggaaggggca cgacgaggaa cgaagtcaaa ccataccacg tactcacacc attgttcata
+   282841 caatgtctat aagcggtgtt ttctgcgata tgtgcttctg gctctccaca aaggaacatc
+   282901 tccaagggag ggggcgtgag ctgcttaagc ttcatcccat cccgaatcct tgcaattaac
+   282961 acaccacgaa gatagggact gagaggtaat cctcttccaa tccaccactc gagtggtaac
+   283021 cgatcaggac cgactaaaag cttactagat aacgcctgaa tccgactaaa acgccgggat
+   283081 agggccttag tgtagtgtcc atttgtccta gcacgcggta tccagcttta tttaacctta
+   283141 aacaagtctt aggacttaac ttgtacttat aggccaactg ctgagtacca acgaggaagt
+   283201 agctctcaag cacagctttc gcagatactg gggataagtc cttcgacatt atcttaaccc
+   283261 agaaccgctt ggcgaactca agacaacccg tctcagatac aatagacttg gcatcagata
+   283321 tatctacttg caaagcatca agtagagaac ggtactcctt agccacacta cgatcggcaa
+   283381 tgacgatgtc atcacctaac aaagcgtagt cgagaaatgg tctcgtctga tgaggatatg
+   283441 ctcgtaatgc tgccaaccac acaatagcat ggtgggacaa cgcgaataga gcccaggagc
+   283501 cgtaataacc caaaggttga cctgcaacaa acacaacttc gtcatgccgt ccagtaacgg
+   283561 acttcaaaga gcatgagttg agtgctaagg caccattgac aatgcacgat gccatcgttt
+   283621 gaccgaaaag gcacgccata agctcatata tgactggaac gggccaccga tcggtggcag
+   283681 cgctcaagtc aaaacttgct ataaatcttg ggcgtctctt tgccagacga tggataggac
+   283741 cttcttgatg gaaggtaccg tcttgtggaa tacgcctcag aacagacata ccccacacat
+   283801 gcacggggta taacagacgt tgtttaaacc agttaccgat tacaaacaaa cgccgctttc
+   283861 cggccaccct ctaaggactg agccaaacgt ccggaataaa taggttgaga agaacggtcg
+   283921 aagcccatgg cgagggaatc aaacaccaag ccaggccaag cctcataaaa cctcaaccct
+   283981 tcattagctg cccaggtact aaattgcgta tcaaacgggt acaaagtata cccaggttga
+   284041 aagagaatac ctggactgaa aataccatcc ggttctgaat gaatgattcg cagctggcgg
+   284101 gcaaaagccg cgatttccaa gaacatcgca gagaagaaac aagtctcctt ccgttcttta
+   284161 ggagcaggct gctcatctac ctgtaacctt tttagaacac atgtgcaact gaaggattcg
+   284221 gtttcggcgg acgcagtttt ttgagcttac gctcttcacg aagttgcctc cgtgcttcac
+   284281 ggcgcgcttc ctcctcttca gggctccgtt ggatcttatg ttgaacaagg ttacttaaat
+   284341 cattaggggt ggacttccag gacgggacaa actgaaatcc cttttctaag gggattgagt
+   284401 tcacccaggg gagatagcgt ctgaacaaat tttctacatt ttgtttaagt tcagaacaca
+   284461 cctctttaac ccgacccata tctttaatgg gagtcacaat ggtacggaat gtgctagcct
+   284521 ttataggctt cgccaattta ataatccgac acactgagaa ccaagaaaga tacaatcgga
+   284581 ccaactgatc tgcccgatca tcccgcactt tgatcaattg gcggtgatgg gtagggattc
+   284641 tagttggtat cccaagcaag ccaggaaggc agctgcgcgc tacggtcctt tgctgtgtta
+   284701 agcatgtgtt gctgagctaa ccatgtcaac agtatggaac cccgctcgta gagaccttaa
+   284761 aaagaataca agcaagtgta gctcactctc ccgaagggag cttagcaaca gtagttagtt
+   284821 agaaacagca gccgcggatc agtcggagaa gtcgtttcta ccatttcaat cggacaataa
+   284881 attccgagac ctttcaaaca gcgtattctc ggcacgaacc aggcccgctg ggagagtcaa
+   284941 ctttctgatt tccgaattga ttggaaaaaa ctcctttttg ggccagagac aggtacggaa
+   285001 aattctcagg ggccgaagcg ccaagtagga gaggtcagag tcaatatctc gtccgatact
+   285061 cgtcctatcc tttctttaaa gtggaaacgg cgttaccgtc attgaaaaat aagcgaacag
+   285121 ggaacggcgg gagaaagact cggcattcag gccatgctag tacagtttgt tttcttagct
+   285181 acacttgctt ccatcaagct tccatcaaga gctcccaagg ccacctgatt cggagtagcc
+   285241 cgttcgtggc agagatttta attccagttg gacttggcca aatgccaatc ctcctagttg
+   285301 tggaagtggt tggaacggtt tcctattcct atcgagcagc tgagccagtg aaaagcagga
+   285361 cagggtcaca atcttccaaa ttttaaaagc aaataaaaaa aagtcggagc tggtaacgcc
+   285421 ttggtgagca aatcggcagg ttgttcttta gtagagatgt gttcggttgt aatgagtttg
+   285481 tcttgaaccg catcacgcac ctgatgacaa tcagattcaa tgtgcttcgt gcgctccatg
+   285541 aaacacagga ttggcagcaa tattcatagc agacttgcta tcacaaaatc acttcattgg
+   285601 ttcgtcatgt ttgataccaa acgagagaag taactccttg aaccatttaa gctcgcacaa
+   285661 agcaaacgcc atagatctat actcggcttc ggcggaagac cttgacactg ttggttgttt
+   285721 cctagttttc caagagatcg gagaactacc aagatagaac cgttcaagca ggacttgctt
+   285781 gcttcgcctg ttgattacct ctctcctttc agtcgagtta tttcaacctc tcctgaagcg
+   285841 tcttattcag atatagatat tacagattcg catacgaaag attggaatga ccggataact
+   285901 gcccaaagca ctactaaatg ggtgtcaaag aggaatgaaa tgggatgact gattgatgga
+   285961 tacgtggtaa ttttccactc ctgaacgact tggtactccc tctttctcta tggatcgagc
+   286021 atatgaggtg ccactctacc gcaccaatgc aggttatgaa attatagatg ctccccccgg
+   286081 tgtcttgcca gaagctcgtt ttctaaccaa aaaatggaag attttaaaac cccatccccg
+   286141 ggaaaaggac ctaaactcgt gatagaagcc gtctttctct agtaggaaat caaagacatc
+   286201 gggaaggctc gcgtattcct attcttgtta tgggagactg tccataccct atctcgactc
+   286261 cgttcacaag accataaagt aggggaaggc gggtttgatc ctatcatttg ttattttagt
+   286321 ttgaaatggc aacactaaag aagtgtggaa acagagattt agaaagcaga gttagagacc
+   286381 ccttacccag ggatgtgatt gggaaaagag ccattggaag gtgactaaaa gaccagaaac
+   286441 aggggctacc cgagctaatg atagaggcaa gaacactttc cggccaggcc tgactataac
+   286501 caaccttaca gatcccactc aaccttttac accgatgtat cgtactctct cgaccaggtc
+   286561 atcataagaa aatactgcta aatctttcca aagttataga aggagggaag gcgcgctaca
+   286621 actaaataac agccagctga aaaatgctag ctagttaggc tagcgcgcaa tggctttctc
+   286681 tgctctgata ggggcttgct tcttcaagct ctgcccaacc tatacaaggt gctgctcgct
+   286741 ttctctcagc cactagtggc ttatgtggtg gtcgacattc ctacgcgctc ctccttgccc
+   286801 cctacttcag aatccaaagg taaactttgg atgttgtcgt gacgctttgg ttacgaaggt
+   286861 taccggggtc taggtttatg gatggagtca gtcagcgaaa gtcctcaatc aatatcaaca
+   286921 agatgtcgtg accgcttaga cttggtggat tttgtcagaa aataaagttg gtttcggggg
+   286981 ttcattgatt agtacacctc tggtgctggt aaggtcggaa ccgtggtcgt ccgtctccgg
+   287041 tttgccggcc gataagatct ctgagattga tcagccagct gggttggttt ggctattcct
+   287101 ttctattgaa aaggagtcag ctgtctgcgc gagtcacctt cttttaggct ccgctgaacg
+   287161 acacggcatt ctcgttcaaa tataggccgg ccgtacttgt gatggttccc aaggatccaa
+   287221 tatgaccact taggtctacg ttgccgcgat atttttctat ggaagcgggc gggctgttag
+   287281 aagttcggcc cggttttttt ggtgtcgtag gggagggatt gacctttcta acgcatattc
+   287341 agtcgtaccg gcatggcccc actgatttgt tttcgatcgg agcatcaagc agcgcaaccc
+   287401 ggttctactt gactaagccc cgtgctcgtc caaggaggga gtttgcttta cccaacaact
+   287461 tcctttttga taacaaggca gaaacctctg acccgacatt caaaagtttc gaatgaagaa
+   287521 tgaagagtgc cctgccccag caagcaccta ctcaattcta aataaaaaag cgtgacttta
+   287581 ctaaacaagc aagaaaagcc ctttcgcact tcttagtaaa gcctaagcgc ccttgttgct
+   287641 aaaggtaagg ccggctttct tcgcatgctt gagcgcatta cgcattaata taatacatat
+   287701 atatcaagct ttccttgagt tttcaataag gggcatctat agtaaggggc cttttcaata
+   287761 agactcgcgc taggcgatca cgttttttgt cttcccaaaa atcgaatatt tttgagttgg
+   287821 taaagaccca cccctagttt cagtcagaat gagtccccgg gaccccggga cattggcttc
+   287881 cgccaacagt ggactattaa ggatcgcatc ccgcgcatat tctacattat agcctgcaac
+   287941 tgattcagct tccgcttctg ggagatcaaa cggagctcga ttagtttctg ctagacgaga
+   288001 aataaggaac ataaccaata cagggaacag gggaatacca gaccatatct gcttttgcgc
+   288061 catgacaatc tcactcgaat tacggggacc tacacatatt agtacagtat accgggggtg
+   288121 tccccggcca gaaccacacg tgcaagtttc cctgcaatgt ggctcgtccg tgctttccga
+   288181 ggcgctgcct gtgactcagc atgagagaag ggggcggaac tgcacactct cgtgttcaga
+   288241 gcattgtccg agtgagtagg cagcgcctac caagcaaagc tttcttgaag aggctgggct
+   288301 gcttcctttt cggcgcccgc ctttattcat ctatactaaa gccacacacg acccaatagg
+   288361 aacgatttca gcgcccctct ctagataaga agcaaaacgc gccatcacct acagcccttt
+   288421 cctctgccgg ggacttccat gaaatgaaaa cgggcggcat cgttgtatgc tcgacatttg
+   288481 ttgccctggt ttatatcccg gagtactcct atggccgatc tgtcacccaa cctacttaaa
+   288541 caacctaaag gcttggccct gtcccgcttt tagaattacc cttatggcac agcttagtca
+   288601 gttattctcg cagttcagat aagattcgga ccgccacttc tctgaaagca tggtgcttgc
+   288661 ctcctccctc cttggagctc gtccattcgt tgggtgatcc cttgctctca cgttcgagtg
+   288721 ttttctcgtc gtttaggccg gtgagtgaga tttctgctca ttgcagtcac ctccggggtt
+   288781 cttcgcacct ggatagtacc aggacccgtg tgccccggcc cttgatcaga taaagctgcc
+   288841 cgcccgaagc ccgacctatg acccacaact caaaatgagc cttgcgacga gacgcggaca
+   288901 ttacccatgc tgcggttccc tccggtccgt tcccgggttg ggttagggga acatccgagc
+   288961 gatgattgcc ttcgcggata caaacgcgct cacaaggcgc acaataagaa taagaccaat
+   289021 agatacttca taagagacca tttgagctgc agatcgtaat gctcctagaa aggcatattt
+   289081 cgaatataga ggagttcaag ttaccaacaa tcaacgagtt gatcataccc ttctatgaac
+   289141 cgtacgagca cgtcctctct cacaccccac cgcgcttacg gctctatacc ccaaccctac
+   289201 ttgctctgga cccccccccg gtcctccctt tctattcctc ggtaagcggg ccgtgggagc
+   289261 atggcgggag acaaagtccc cttttgactg atagaaagca tctctctttt gtctctcctg
+   289321 accgaacccg ccaaaaaaat cacaatagaa taagttgttc ttgccgggag agtagcgtct
+   289381 gagacatctt tatctgagga ggaggcttcg tcgtccggct catcctcgga atccgaaaat
+   289441 aaaactgaga cagaacggat tgtttcagtg acatatctaa tcagactgaa taggatttga
+   289501 agagctgtca ccatcattca attcacatca atttaagcag gcaggaaggg cgcggaagtt
+   289561 accgtttata gaatgcaagc aaggtagctt gcctgccaag ccgataggcg aaagggcgaa
+   289621 ctgatcgact tgcatgtgtt aagcatatag ctagcgttcc ttagtctcaa tcacagacct
+   289681 ttttccattt taggtgaacg agggttaccg gctctacgac cttgcaaggc actacagtag
+   289741 agctcttttg gcctgccact ttctcaatcg aaaatgtcaa ctgtgaagat tctcgcgttt
+   289801 ctttatgtaa tttcaggatt tcttttcgtt gatcggatcg agcacatagg aatatttccc
+   289861 taagtcaatt cattctcttt ggctgaagtc aatcttcatc aagacagctg accgagtcga
+   289921 aacctactgc tcaagtctga tgaatgcccc gttcatgagc tggtaaagtc gagatcaatc
+   289981 aactgggatc ggagactgga cgtctatttc actatcccgc ttttgtctaa gaaagtaaga
+   290041 tacctatgcg ggatcattat gaacaggtta cgaggcctga agcggaagcg ctattgaact
+   290101 gtcgggggga cttggatcaa tcaaagacac tagtcggtaa acccgatctc ttacaggcac
+   290161 tggaattgga cgatatagtg gcggctatgc atcccgagga aaaatttggt actggcatcc
+   290221 ttgcttctgt ccccgttgcc tctgatagct atgcccccgc ctctcttttt gtctctgtga
+   290281 ctcgagaagc gagtctcgat ctcaaaaggg tacttgtatt aaaaaaatcg agatctttca
+   290341 ggtgctccca gatctggagc aggcgatggc tcttgaaatg ggctagggat gcctagaaaa
+   290401 ggtgaataat taggctattc tgctgctggt catggaaaaa aaatggatga aactgaaggg
+   290461 tctgcttcga cgctcccgac cttcaactcc gaattatgct atgcctttcc tttcggggag
+   290521 cttccgccgg ctcagatgct ttctttgcca cggatgcctt cggtttgagg tcagggactt
+   290581 tttttttccg gctggtggct gtgatgctag gctaaatgct gttagtgaga ctcggtaaac
+   290641 ccggtccatt gtaggtgcta cttcttgctc ttttgccccc atttctctgt gattggcttt
+   290701 agaaccggga aaaaagagtc aacaacaatc aatcagttat gcctctcctc ttcctgcctc
+   290761 tctagctggc ttgatcttag actccgctac cagagaagag cccttgtgtc cctagctaat
+   290821 ggatgcctct tccattagtg ctccttcctc tgatgtctcc gctcggtcaa ccgtctctgg
+   290881 aatttcttac tttttcggtt ctaaagccac tccgcccgcc atccagtgga tcttcacctg
+   290941 gctaggattc tgactctcga acgggggaag gcaacctatt agattgttga cccgacctca
+   291001 gacttcgttt taggctgaag taattggagg cccattaccc gatatgggta tggaattcgt
+   291061 ttcacctacc tggtagatat gtgtatttga cctctaacca tacctttgct atctatgctc
+   291121 caaccatgat aatgatagcg attggacggt agcaattggt gcttatttcg gggattcaac
+   291181 cgacctggat aaaccacttc tgcctttggc tttgcctcat acccggtcta actcaaactt
+   291241 tgactttgct atgcttgctt tgttagtata gcaaagtttc taaagctgtt agcgcgtagt
+   291301 ctgtctgctc taaaggaagt cgatttattg attcgtcctt ctcggatcgt cgtcggtcct
+   291361 tttttattgg gctctcatgc ccggctagac gataagatca aagtaacccg ctttaattta
+   291421 accaacctac tctgtaatac tgttttcacc accggtttgc catgattcat aagcaggaat
+   291481 cgtagatctt atctaggatg ttgccgtcat cttataagat gaagaagtca agcacatttc
+   291541 tcttcttcct aagaagtaag gctgacgggc taataagtcc ccggagcttc aaaacaggcc
+   291601 ttttcatgct ccgcagaggc caatgaactt ccgaaagctt cttctgattg agtgaacata
+   291661 ccgagtactt aaaagtaggg cattggatgg gacaagccgc ttcctttact ttaagccgag
+   291721 aggggtcgtc gctaatcctt taggccgctt ctttccactt tctacctctt agtcgactaa
+   291781 ctcaagttcc taaacgagac agcagctacc ccgcttgcct gtaaaggaag gagttcttgt
+   291841 tccgttgacg tcgtctaact aatacatagt tctgaaatag ggattaatat ggattgaatc
+   291901 atcataatga tgacagctgc taccttccca ttagaaacca tctatctttc ttcttttcct
+   291961 tcgtacgaat ctttgctaac ttgactgtgt tcgacggctg atgaccccaa tgtgtgaaat
+   292021 atccacctcc atggggatgc gcgaccggga accatgttgt caggaactga gcactttctt
+   292081 gaaagcattg atttgtggta agactagaag aggagattgc tctttcgaag ctagtcaagc
+   292141 gattaaacaa agtaataaac ctttcgagtt cttgatttac ggctagaaag aaggactaga
+   292201 gcttcatacc ttccggctgt tctttcaaga tggaaaataa ttgatctgta aaggtcagcg
+   292261 gtcgtaatct gttcgaccag accacgtctt gcaactcttc tggcgtcagg gttgcgggaa
+   292321 cctgtgcttc ttggttgaaa ttgaagaaca actccttatg ctttggagac gttctcagca
+   292381 actcatagat agatatgtta cctacgcctc cgctaaagaa aggaagaacc gggtagctat
+   292441 acgttccaaa cagctaagat attagttatg ctttagcaac agctccgttc agggagggcg
+   292501 gggatgaaca caccaaacgg caagctacaa agaagggtga aacaagctac aaaacagatc
+   292561 tgactcctac ctaaggggca tagaacaaaa gagagtcaag tcaaagataa gagtttagtt
+   292621 atgctgggca gttgtcttgg aggagcagcc gccacagatg aaggagttgc ttgaggaaga
+   292681 tgcttattta tttccataag acctacctca tggcttgcca ctcttacgac caacatggga
+   292741 ttgatttcat acctggagca acttgaccta atctgtctcc ttctaattag aaaaggagtc
+   292801 ccgctaagca cgaggaactg agaaggcagg tgcaagggct ttgatagaga gggaaagcag
+   292861 actcgacaac cacagcccag ttaactattt gaatcaggag cttccgatct catcccttca
+   292921 atcaccgtta ctcgatccga acttgaacca ttctcttgct acttttcttt cttcaaaaaa
+   292981 aagattggaa gtggaaccga tttcccggaa gaggtcttcg gcttgctctg atatgataag
+   293041 aaagttcgct ccctctccct gtcaacatac atgggattga gcagccccct atctcagagg
+   293101 gtttacccca tggccagttt gcgatgaaag aaacttcaga atgcagccca catagtccta
+   293161 tcaacataca aggcttttca atccagtgaa agacaggtcg aaaacttcca tatctgatag
+   293221 gctgcaaagg aagatttttg atgtctggtt ctatctgtct gtgcttggtc tggaccctta
+   293281 gttcttgcag gtgtaagcgt gctccccatg tagttttctt cgttcccgtg taatcaggtg
+   293341 cgagtgtaac gagtttgttt gaaataagtc caatgagatg atacgaaaac aaagagttaa
+   293401 aagctggctt agaagaaagt atagtgggca tggtttagaa ggtaggttac ctgctcgtaa
+   293461 gaggcagtct aacttagtta gaggaatgct catcagaagg ataagcaagg tttgatgaat
+   293521 tctcctagaa ggaagatcaa aagtcaaggt cttcaatagt agtatagtag ggaacatatc
+   293581 acaaccagca agcgtattca ctattcacgt caacatttta gcaatcgaag tcggagtgag
+   293641 ccgattcaat gtctacaggg cgtctgtgta acacatatca aagcgtctgt tgccaagtct
+   293701 aatatctgat atctgtgtta gctgcctgcc tgtctctccc tgcccacgac atgggttgga
+   293761 cttagagtct ctttcttgtc tgtagttggt gtattcatat ctctcctttg aatggcatag
+   293821 atcttcgtag taccagtcag ctaacaggat cggcgtatgg gcaagcaaag aatctaaaat
+   293881 catcctttcc tgtcagtagc cgggtaggta aagaggtctt ctgcactacc actagtagta
+   293941 aacttcagag accactactg ggcaagtaaa aagaattaat tatgccgacg atctgatctg
+   294001 agttggtcta gtagcgccct ggtagccatt ctggtatcga gaggttcttg ggaggaagat
+   294061 tgtctagact tggagtgtgc taccactaag gttaaccaga taagagagtc ctcacctaac
+   294121 aagcaagtgc taatagctaa cagtaatagg aattcaagta agtaagactc cactattaac
+   294181 tggaaacaag gaatgaccag cttcagaggg atccagcaca gattactatt caaccagttg
+   294241 gataacagat aacaagggaa tagcccggtc aggcaacttc tccctctgtg cagcatttgc
+   294301 ttcgacagat tgagggggtc gtagatcagg caacagcaga taaatctatt gaatgtccat
+   294361 cagccgtaaa agaagggcag ggcaccccag gggctcatct tctaactata gggaaaagaa
+   294421 gaacaagaaa gactgtaaga cagaaacgga aagagtatgc gcggactagg ttgagaatct
+   294481 ctgcgaggca gcacaccggt aaagatagtt taagaattta gctcagtgtg ctgcatcaat
+   294541 gatcttctta ttggaaagct gaagatcaag gcaccatgtt cgggttggtc aactcctcca
+   294601 agtagcccta acgaagtaac ccttcagggc caacggagta gccctgggcg tagcaacccc
+   294661 ccgtgggcct gagggttgag ggtatctgtc atacgtggaa gaagaagaac aataagactc
+   294721 ggaaaccgta gtgaccttcg gaactgtgag tgaccttcgt tcgagatgca gtatgaactc
+   294781 ggaagacttg cttcgctagc accgtatggg ataaatcttc atatggagga gaaggaagac
+   294841 tgaggaactg cacatgcggc gtccttgcga agtaaccccc aacggagtag tcctcactgg
+   294901 gattgagggt tcggcttgtt gcgtgcttcg ccatgagtat gaagagttgc ttcgcttttt
+   294961 aaaaccgtat gtgaacccca agggtgtgct agatgatgag gggtgagggg atggggttcc
+   295021 tgcactctat ggaaaatgta cttttgtaat agtcaaacat ttcaattcta aaaatttgat
+   295081 agagttggtg attatgagtg gagggaagta cggctaggac ctttggccaa gtgaaggatc
+   295141 aaccaaagtc gacgagttgg tttatgatta ttgggttttc gaagagaggg tttttgtaat
+   295201 catgagttgg tggtcgatcg tcagtagatt ctgagttgag tatggtttgg atacaagata
+   295261 aggatcaagc aataaggcga cattaaactg tcatgtaatc atcatcgatt tcacgagtaa
+   295321 ggtatagaaa agcaaaatta tgagtcgagt acagcttgga ccttcaacac tagtcgacat
+   295381 tcaactggca tttaatcttg gacgaattca attccaacta attcggtaga atctagaata
+   295441 gtgagatgga agacccctct ccttatcagt ttcctatgga ccttcgacta gatcacacga
+   295501 gctactccca ccttgttcaa ctgtgtgatt tgtcgttttc cgagattcgg tttcttcttt
+   295561 tttcttgatc cgaggtaagg tgcttttttt gatcgatcaa caggtaatat agaaggatga
+   295621 ggctgtgatt tacctagtga atcaacaaga tcgaaacaca ccaatcgatc aaatcatttt
+   295681 tgaaaaactt gtacatcgtc tgatgtagcc ttttgcttcg cgatattcaa gtttgggttt
+   295741 ttgcttcgcg atattcaact atgcctatat gagagattga gttcacgaca atcaattcca
+   295801 agtaatcatg tcagtagaat tgagaaagat gagttgagta tggatctccc ctcggacaag
+   295861 taccaagcaa gatccactgt catgtaatga tttcactaat atagaagaaa ccaaattatg
+   295921 tcaatagaat cgagaaagat gagtggagta tcgagttgag cttggatcac tcctctcctt
+   295981 atcagtctcc ttcctctagc gcagcaaagc agcccgaagc actttctttc tttctctagc
+   296041 gcagcagata ggtcgcccat accctttgcc cattggtgtg ctgcttcatt cctagcgaag
+   296101 caagtgtggt tctctttggt tccgtagtcc ttttactctg aatgatatta ggttggattt
+   296161 tacttaaagg ctgttccgaa aaaagagaga cttctaggct cgcaataaag ctaaggtcct
+   296221 gatcgagcaa ctagtcgtcc tatctatcca cctctccata ataataggat ccaccaattc
+   296281 ttctgcgaga gctactgtac ccgaacagta gaaggggtgg ttaattatgg tgtctactga
+   296341 tatattagga aacatgtcca gaactggcac tcctggcaga acgtcgttat ctgcagaatt
+   296401 cgtggaactg gcaaaaagta gattcagatt tctctgtcgt cgcatagctt atccccattt
+   296461 ttttttcagc aactgaacgg gaagtggctt aggaaaggac taatcggatg cgctcgccca
+   296521 gcgagaaagg ccctaaccct tgccgaccaa gaaaaaaaga gaagataacg aaagggagac
+   296581 aaagttccta acaaaatcat aacacaaggt acccattcca tctatcatat cagtcgagtc
+   296641 gatccgattc gttcttatca ggcggcaaga gagaacttat gacttcgcgg atggagaggg
+   296701 attcgaaccc ccggtattcc tagaaatact tcggttttca agaccgactc tttcaaccgc
+   296761 tcagacatcc atccctcgct tttgttcaat tataaatata aataaaagaa agaagaactt
+   296821 taatggagat ttatagcatc attcaagtaa atacagatta agatcgtaaa aacataagct
+   296881 tgtaatatag ctacacctaa ttccagaccg gttaatgcaa gaactataaa taaaggacca
+   296941 agagccccta taaaatagaa aatatcattc atacatagca tagtccaagc gaacccactt
+   297001 aaaatcttta ctaaactatg accggccatc atattagcaa ataaacgtat tcctaagctt
+   297061 aatgcgcgaa aacaataaga aattagctca aggagtacta aaaaaggtgc taacggcagt
+   297121 gggactcctg cgggtaataa aaagctgaaa aaatgaagcc catgtctttg aaatcccact
+   297181 atagtaatgc caataaaaat cgaaaatgag agagccaaag taatgagaaa atgacttgtc
+   297241 actgtgaagc tataaggtat cataccctga agattacaaa ataacaaaaa aagaaaagtg
+   297301 accaagatgc aagggaaaaa catttgtttc acattcccgg aaagaccacc tatttgttcc
+   297361 tttaccaggt tcagcacgaa atcataaaga agctctacca aggattgcca agcatttggg
+   297421 actaagtttc ctcctccctt tttagtaaca aaatgaatca gaagtaggaa aaaactcaga
+   297481 gttagcagca tgaacaaaga tggatttgtg aatgagaaat agaagtttcc gatatgcata
+   297541 ggaatcaatg ggacaatctc aaattggtcc agtggggagg ggatttccat aagtttcata
+   297601 tagttagaaa aacaggcgtc gtgatatttt tcaataaaat tgacagggcc aggaacgccc
+   297661 aaattctgca tataccagtt attaacgtac gtagataaat taagcgattc cctaaaaaaa
+   297721 ataaggtcca gtttgctccc gaggagttcc gcgggagtat gatccaattc tcgctggagg
+   297781 ttcctaatat atccggcctg aatcactccc tcgtaaaaat cccagcgcat attaactaag
+   297841 aagtgattca accgggttaa cctctccata ataataggat ccaccaattc ttctgcgaga
+   297901 gctactgtac ccgaacacta gaaggggtgg ttaattatgg tgtctactta tatattagga
+   297961 aacatgtcca gaataggcac tcctggcaga acgtcgttag ttccggaatt cgtggaactg
+   298021 caaaaaagta gatttggctt ttgaggcatg attagattaa ttgatttcaa caaaatgatt
+   298081 ccctccagac agcttacgtc aagcctctag gagtaacaac tataaaaagt tgtggttggt
+   298141 tgttgaccaa caaaaacatg gtagaaaacc aagaacaggg ggcatagaga tttcttctct
+   298201 tatgagattg atagtttgat ggggggcccg gcaggctagc gatcagaaac ctaaaatcag
+   298261 acaatgatag agcgcccgta ccctacctga agtagcatcc cgccgatctt tcgaataaac
+   298321 aaaaggatca actaatacaa ggattgagaa atcttggcaa ctcaacagaa acgggattga
+   298381 ggaaacttga ctttattgaa attcgagcag cgaacccgat aaacacctca ataggatcgc
+   298441 taatactacc acttctagac ttggcacaga caacacttct tttctatata cttaattcaa
+   298501 gatgaaagca atgaatgatt gatccaacac gccaagccat acttttcatt cttattacta
+   298561 tacttaattt atctaggtga tcgacggaac acggagaggt gctttagtca ggacgagggg
+   298621 atttagacgg aactgagagg agaaccgaat gaatgcaagt gcaagagtgg aagattgatc
+   298681 ggactctcta cgaaagctgt ccttcttctc gcacaaacat atccgactcg tacgaaacaa
+   298741 aagattgaac cttcacccaa aaccgtacga agctgagtag caaagatgac agcacgccaa
+   298801 gcagccgtcg aaagaaagtt gacagatcag gagctgaaag actctactga tgattcccaa
+   298861 gccagctgta caaaacatag attacagatt aatcaggatg gaacacgaaa gaaagaatat
+   298921 actgaaatga tgattagcac aaaggcagtg ctcgtaccga aacaaatcta cagcactttg
+   298981 accaagcatt actcctccac tttctactga tgattagcac aaaggagcag tttggcatac
+   299041 aaaactgatt agcagcacaa aggatcagga gcagaagcag gatggagtct caatattgaa
+   299101 atgattagca cattgatcac gggatggagt caagtacgaa actccacact tcgcacacgc
+   299161 tctatggaaa caaaagccct cacacaactg atcagtagca catcactcaa gtacaaaact
+   299221 catactttca cattcttctt tattttagtt atcacattat cgcaaaagca cgaacgactg
+   299281 atcagcacaa tcgtacacag aagcactaat acacttatgc cccgaagcag gtcacaggtc
+   299341 cgaagcagga ctctccctcc cttccccgac tgataaaatg gacttaatta cgaaagaaag
+   299401 aaaaagaaat aagattccct acacaggtgc agtaaccatt ggagcattat cactatctgt
+   299461 actgaaccat aatccactga ccaaggaaag aaagattaca gcaccaagcc gtacgagaag
+   299521 ccaagacgct ttacgtcgat tagtcaaggt caaggtgccg atgaataaac aacacgctca
+   299581 tactccgccc taaaacccct gtaagcatac cgaaacccca acctcgagga aaaggtgcaa
+   299641 aggtgcttta tcaacaggac gaggaacatc tgaatgacat ggatcagcag caggcgcaga
+   299701 gtggaagatt gatcaggagc caggagctga agagtctact gatgaacaca aagattgata
+   299761 aatagcatta ctcgtatgaa agaaacccaa gccaaaacaa aggaaataat aagattgatg
+   299821 gaaccaatca ttactcctcc actttcccgt ttactgaaat ggattagcac aaagatagca
+   299881 gtatgcaaag aatatactat actgattagc agcacaaaga tagcacggtg gagtaaacaa
+   299941 cgccaagcct acgcacaaca agaaggaaca gtgaagctgc caagcatcag cattaccata
+   300001 aatgtgagtt aattactgca aggaaagaga aggaataaaa gacaggtctt caaaggaaca
+   300061 atgatcggac gaaggtctcg aaatggcaca gtaacaaagg aacaactgtg ctggactcag
+   300121 cacctaccat aggtcaacgt ctttcgagaa taagtcctgg ggaagatgtc taggcataag
+   300181 cagcacccgg tcataagtaa acaaatgttg cttaaggagc caatacaaca caatcagagg
+   300241 actactttac gcttgagagg gccgatcagc tcataccaac aaaacaaaga agatggcaat
+   300301 gctgtggagt aaaccggcag taagtaaata cctgggaaag gctcggtagc acaccggagg
+   300361 caacaaagaa agataggtgc tttagcaact cgaccaagag gagaggaaga gacggacgat
+   300421 cggacaaagg aaaagtgctg gactcagaag aagactcgac taatcaaaac acttggaaac
+   300481 agctcgtaca aaacaaagag gcacttggga aggggaccac cggtctatgc ctctccttat
+   300541 agagtttcta cgaaccttgg ttatacggtc atagctattg ttcattggta attggatagc
+   300601 tggtaaagag ctgataaagg gaacggggaa cagcaacaaa agaccaaggg tcactgggcc
+   300661 acggagtagt cccttgaggg taatggttca aagaaaagaa taaaggaagc ggctgtactt
+   300721 accgagtcga gcaaacaaag atgtgctggg tagtcaactc ttccatatca gaaggaaccg
+   300781 atcaaagcat cggaagaata ggattctaga tgacctgtcc ttctactgct ctgctcgttt
+   300841 ggctgcttat cttgcggatt ctgattcaat tagatcaagg attgccaagc aaagcagcac
+   300901 caacctagct ctactctagc taatcaatca tggcctactt ctaactgtaa atgtgatcac
+   300961 cagcatcaca aaaacaacag aagcggtatc gaaataagca cctgtcccaa ctcaagcctg
+   301021 atatccctgg cacaacagag tttgagactt cgccggataa gcccgatatt gagcctatct
+   301081 gactgagcct tagccagaga tgaagggctt ggcaccaccc taacgcagta aatgaagcga
+   301141 tatcccttga gcgaaagctt tatttttggg catctacagg agaagtcgct tatggaaaga
+   301201 aacagaagcc tagctgctaa caaagaccta tgcggcttgc ttggggtagt gggcaggcat
+   301261 ttaatcgaga aagaaggctc ttgagtaaac agagtcatca tatgagactt ctgtcggagt
+   301321 gctgtggaaa gactttcgtc cgcataagca aagatgattg ctgctcaaga ctgggaggca
+   301381 agtaagggag aaagagagaa ggttgtgcgc taggagagtg ttgaagctat agatttattt
+   301441 agtagagact aggagtctct aacttactgt gctttggccg agggtgaaag tcgaaaagat
+   301501 atatcagagg gctgcccttt cttcttagag accgagtcaa ataaagctcc tccaaactga
+   301561 cctgatctat tgtttgagtc taaccgagaa gtctcatcgc tttgagcctg gtttaggcgc
+   301621 atacgcccat agagtagccc agcccgacca atcatcagtg tacgtggtcc aaggaaatca
+   301681 aggctttatg gccaatgaag aggtgggcta tgaaggaatg gaaaagcagg gttgtgcttg
+   301741 tgctgcataa cttacttcta cattcctcag attacccagg agtgtgctgg aatgagaaag
+   301801 tgaagtattc cttgggcgaa aaatagtcaa ctcttacttt gcttgggtag ggctttgcaa
+   301861 cacaccgata tccctttctt tctagggtgt atggttccta gacctgtaca catgtaatta
+   301921 taaggccaac tcttgaaaat ctcaggttta cactgctaga tcaaattcat actgaaatgc
+   301981 attaccctac ctatctaaaa aaaagagggc cctggggagc atgccaccct caacccacaa
+   302041 actcccattc tcctcagtct cccagtattg gagacctact tactacatct cgaccggggg
+   302101 cctctccagt ctttgcttaa caacttttct ttcttactga ttgaggattc ctaatgtgat
+   302161 gaaagctaaa ttaaagctaa agctctttct tttaagaaag catctggttc catttttctt
+   302221 tcgttagtta atccaccttt aagggcttgt agtaattcag gtttgacact atttggaatg
+   302281 gctttctcat attgagagat tctgtctagt ggcattcgat cacagaatcc attgacagct
+   302341 gcataaatga ctagtatttg tttttcaatt ggaagtggtg catattgtgg ttgtttcggt
+   302401 acttctgtca gccttgcacc tctattgagt aatgcctgag tcgcagcatc aaggtctgag
+   302461 ccaaattgag caaaggcggc cacttcgcga tattgtgcca attccagttt tgaactaccg
+   302521 catacttgtt tcatagcttt caactgagcg gcagacccga cgcgactgac agataagccg
+   302581 acgttaatag caggtctaat tccgcgataa aagagctctg tttccaaaca gatttgtcca
+   302641 tcagtaatgg agatcacatt ggtgggaata taggccgata cgtctccagc ttgtgtttca
+   302701 atgacgggta aggcggtcaa gctacctgca cctgtctggt ccgatcgttt agccgctctt
+   302761 tctaagagac gggaatgtaa atagaaaaca tcacctggga aagcctcacg gcctggtggt
+   302821 cggcgtaaca ataatgacat ttgtcgatat gccaccgcct gtttactaag atcatcatag
+   302881 attattaatg cgtgcattcc attatcgcgg aaatattccc ccatggcaca cccggaatat
+   302941 ggggccaaaa attgcagagg agcaggatcc gaagcggtgg ctgctacaag aatggaatat
+   303001 tccaaagcat tcgcttcttc aagagtttga attaattgtc ccacagtcga gcgtttctgt
+   303061 ccaatcgcta catagacaca atacattgtc tcactctcag aggtggccct tgagtttatt
+   303121 tgcttttggt ttaatatggt atcgatagca atagtcgttt ttccagtttg tcggccaccg
+   303181 attagaagtt ctcgttgacc acggcctata ggaaccaggc tatctaccgc ttttaaccct
+   303241 gtttgcatag gctcgtgcac tgatttacgt tcaagaatcc caggggcttt cacttcgaca
+   303301 cgtctttgct cgtgatcgct tagagcccct tttccatcaa taggtactcc catcgcgtcg
+   303361 accacacgcc ctagcatagc ctttcccgcg ggaacatcca caatagatcc agtgcgcttg
+   303421 acaagatctc cttcttttat agcggtatca ccaccaaaga caacaatccc gacattctca
+   303481 ttctcaagat tcaaggccat tcctttcaca ccgttggcaa aaagaaccat ttccccagct
+   303541 tgaatctcgt tcaatccata aacttgtgca atcccatctc caactgagac cactcgaccg
+   303601 atctcatcca cttgaaaatt cgcgtaaaag ttcctaattc gactttcgaa tagattcgtt
+   303661 agttccgcag ctctaggaga taattccata attccatttt ttaattaaga ggcaaggagg
+   303721 aataccgctt tagaaaaaaa aaaggaataa tttacaccga ctaagatcaa agaaactagc
+   303781 agactaatca ctaaaaaggc ctttggttca aattgttaca tctatcttaa cttactcaat
+   303841 tatctcgtga actatctgct tcaaaaagag agttggttga atgagactga aaccttcttt
+   303901 cttcgaggcg ggactcattc tcctgttgcc ttactaattc ctgatcaagc ccctgcctcc
+   303961 atatgtcgag ttgagcatct cgtaccatat cccaggcgcg attgcgcgcc acttttccct
+   304021 ccagctcgcg cacctcccaa tcgatgcggg cggctagttc ccgcccacgg tagggagacc
+   304081 tctcgttctc caggttacgt tccatttcct ggatccgttg tagatcctcc gccaattcta
+   304141 tatttccggc gtgtaaggac tcagcccttt ctattcggga gtagaacgag gctatggggg
+   304201 gagggggaga aaccgggtcc ggagagccag gttcggcagg aaagggcacg ggatgtgtaa
+   304261 gttgcggagg aaagggggcg agggaaactt cccttgccgg atcatttgca gcagactcag
+   304321 tccagccgtt ccccccacta ccgctcgccc ctgtagcagg gtttacgacc caacttgcct
+   304381 caccaacatc ggaatctgat ccgatgggca gcaattcccc gaaaccataa aacaagaaca
+   304441 cccctgccat gaagaagcta taaacatctc caaaagcaaa tgagaaaagg atatttatat
+   304501 ggattcgggc tgcccaaaga aggagaaagg ctattaaaca taaaaaaaaa tataaagtcc
+   304561 aaatgatcgg ttttttgaag tagcgcgaga aggtacgaaa ggtgaaagtc ataacaaaga
+   304621 aagttatgaa gagccaggaa aagtttgtcg caacaatagc gtcattccaa taagttacaa
+   304681 ttgataacga taccagttgg gcgaacagcc gagtaatcat attagatttg aggattagag
+   304741 gcaaggagga ataccgcttt agaaaaaaaa agtctacctt atgaactgaa cgaaagcact
+   304801 aactccagag cttgtgggca aagtggtttt ttttcgtgcc ctgattgaca aggtctaaat
+   304861 agatgaaaaa accatggaat gctctcgttg ttaaaaaaac tacgagcggc gacttcttgg
+   304921 agatttactc cggataaagg taaggtcaca ctctcactag tattagtatc cctatatact
+   304981 ctcaacagaa ttgagagttg ggacacgatt ttttcttgaa gcgcatctcg cgctacatca
+   305041 gtcaggatta tttccatatt aatactattt atttgatgga tcatttggcc agacctaacg
+   305101 gctacaagaa taaaagtaca ggcagagcca aacccatcct agaaacgaaa tcagcaataa
+   305161 gttgatccat gagataagga gtatttgatt tgagttttct taaaagaaga ccgataactc
+   305221 ctatcccgaa gatacaagcg cccggtcctc ttctcttgtg tcgaagtgta gtgtggtgag
+   305281 gttttgcctc acagaaggag tgggaaattg gggaaaaagc cgaaacggaa ctaacggaga
+   305341 ttgaggtata ctccgtgctg cgaaacggat tcggccagta caatactgag tattattgag
+   305401 gccatgaaga cggacagcgc ttttagcctg cctatagggt tcttttcgat cttgtactct
+   305461 accaattgat aggtttattt cgcactttag aaaaagtttg gaaagaaaag tagacggaac
+   305521 gacacctgtg aactcggata gcattgtggc ataccttttc atctgaacta ggctactttc
+   305581 tttcttctct tatgaaattg atagtttcag atcattctta aaagaagctc tctcttatat
+   305641 acgataccac cagatgtgcc ccttccaccc tcgtccaatc ttcctaagtg ccagcctagc
+   305701 ttcataggaa gctgaacaat acgtccagct ccttaaggaa tgaatacttg ggcttggacc
+   305761 atcaacgggc gaagcaggtt tcctgtttgc atcacctttt ctccggtatt ttctttttga
+   305821 aactcaatct ttaaatcatt caattataaa tagcttatat aacgaaaaat tggaattcga
+   305881 ttgaaaactc tctcaactca attggaagat aacatagtaa gtagctttcg cccaaaaaag
+   305941 cttcgcctag agtggatcag gtccttatac gtaaggcgat cccgcgaagc cggtcaccgg
+   306001 agtgaagtca acttcctttt ctttttgagc agatgttgct tgcaaataaa aagataataa
+   306061 gtgttttctt attacccgtt tttttaagag aaaagattta attagctctt agatatccca
+   306121 agtaagcttt taggtcaaaa tagatatgat gatatcagac agcagtgaac ccccttatcc
+   306181 ccatctgcat ggacttggct agtagtgaag cacaaccact cttctttcca tagagagtgc
+   306241 taccctagag taagatgagc gaccccagtg gcaacgtggg ccccaaggaa aaacccttca
+   306301 ctagtcaacg aagtgcatca acggatgtag gcacgttctt gcttgatcat ttgctctcac
+   306361 tagaccgacc tatcataaac gacggcattt gaccaggtaa acataatggc taacggtgaa
+   306421 gagtcgttct caggcttgtt tccgagtaac attctgtgct tccggcttat gcaacgtggt
+   306481 tctttaagcc atcaccactc ctttctcttc ctctcttcca aggggttgga agaaaatctc
+   306541 ccttatcgat gtcggccgac tgagtttcag tgcccagcag ttgcaagcta gcaaacaagt
+   306601 gagatagaag catttgaagg aaaaagctat acgattgggc gaactctttc tttcttttca
+   306661 ttgctcgaat taccattccc aatttcttca ttcttttatt gagctacttc ggtcacctcc
+   306721 tgcattggct ctatcctatc tcaatggttg ttgatcagcg tgggggggaa caaaaaggtc
+   306781 cttacggaag aagagatagc tagaaagaaa aagggataga gtcgatagct agaaaggacg
+   306841 caaaagggat cgatagcgag aggtagagga ctgaccgctc tcccgcccac atcctccctt
+   306901 gcagcaccag cttcgcactt aggtgcacta aagggcctga acgagcacaa ttctaagagt
+   306961 tctcgactcg tggcaccact ttcgcagcag tctgcttcat ccgataggtc agccgatttc
+   307021 cttcagaatc catctttccc tcaaacttct tcaacacgac tggggaagag cgcgtcgaca
+   307081 ctctcctgac atcagtacct ctgctaaaaa agaagctctc atcttctgag accagacaat
+   307141 ttccttgtga tagcactctc tggtatggct gggttgcgca gaggagtttg aggaaggtag
+   307201 cttcttctac gcgggtacgc ctatgagaat aagttagcac actcctagcg aagcaagcta
+   307261 tctactcttc tgtgctgtcc tgtcctagcg aagaagcttt gagtgctcct taatagtaat
+   307321 agtaggagct ttcctctcca aaggtaatcc caggatctct ggcacaccta actctttcct
+   307381 ttagaatcct tattcttggc agttacatga ttagctggta ttctcgctcg tacagttaaa
+   307441 caaaggtgta ctgtgagcag cttttgttct aattcttggg tagacagttc cactagctcc
+   307501 tcctcagctt tagtcccatg ctttagtctt ccttttcctt ttcaggtttg gaagtataaa
+   307561 gtatcatagt ttcatagatg gctagggagt ttggtaaacc ttagttctta tagggaaggt
+   307621 gaaaccaact ctcccttttc ctttcctagc gaagcaagtt ctctctactt ctcggtattc
+   307681 ctctgctttc tctagcgcag tgctgggtta ttccttccta actgttatct gtatactcct
+   307741 agcgaagaag cccgataaac tagtattgca cacattccag cacacacccg tggatgaaga
+   307801 ttggtcccta tctcgtgcta aaatcaattg ggtgccacta tagtataaac gcgggcgaaa
+   307861 gaccctaaga gtcggtttac cccttcccag gagtgtgctc aaccaaccaa cttagtagat
+   307921 tagctgccca tcttcaccta acctagttga cttacttatc tgtgttctat cctctgtgct
+   307981 gggatcagat caaaggaact ctcctgaaaa ggatagccaa tttagcaact tcacaagggt
+   308041 gtgttgtaaa caagcgtagc cctgtgttgg caaaagggat gaactacgga aggtaagaga
+   308101 tcagattcgg aactagcgct gcaatcttct gataaaagaa agtataaatc cctgcttact
+   308161 tagcagactt agcttagttc ctatccatcc atggaactag aaacccatca gcattagcaa
+   308221 ctgcccaaaa gagtcaccct tcgcgctacc tatgaaatat aaggaacgtc agttagtcga
+   308281 ctaaagggat ccttagaccc ggactcccac gactattaag aaagcactgc tctttctttc
+   308341 ttcttttgca tttcctactg aggaatccat tgcaagagct cttcttcccc caaacttctt
+   308401 ttcttagaga ggcggccctt caccttaagg cccggttcca gtaaacatag atagcggggc
+   308461 atgtcatgga aagatatcga cgcaatacaa agctgctcga gctcgccgta ggaaactaac
+   308521 tggcttagtc atgcctattc ccacaaaaca aacagaggag agtctttctt catgcaagca
+   308581 aaggaacaag gcagataagc taactacctt ttctcatcat gcacattccc aacgaaccaa
+   308641 gaaagagact ctttaatcaa agctagctac aggggaatga tccggcaggc ggagagtttg
+   308701 attgcgacag tccagccttt catacgagaa atggcggtga ccgtttgccc taaacaaata
+   308761 tctcaagatg taacacatat gccttctcgt agagcacacc cagctcgtat ttcctagatt
+   308821 tgcttacttt tcaccatgct gagaaggcat ttttccaaaa aagatttgtc ttacgaaggg
+   308881 aaatgtggtt tcttactttt caccatgtgg agaaggcttt tttgattcca aatgtggtct
+   308941 tacgaaggga aatgtggttt cttacttttg tcggtatata aagagcaagt tttggattcc
+   309001 tacagtcctc ttacgaagga aaatgggatt tcatcaaatc ctcgctatat aaagaggaag
+   309061 tttccctttg actctggttt cccttttacg aggaagctac cagctaaagt ggaatccttt
+   309121 ctctgtctcc ctctctttct ttcctttcta gtagcaaact tgattctgtg gctttccttc
+   309181 cattccgcga gagtagggca ccaaaagctg tcctatcatt tattggagtg gaaggccttc
+   309241 ccatcctcat tccgaaacaa agagagtaag gccacttgtg acctatcttc ttggtccaac
+   309301 ccctacttta agaggaaagc acaaatcccc ttttcgttta gccgatatct cttgaagtat
+   309361 ttgttttgaa aagctgcggt aatccactct tccttatttt tgctgtaact cgacaagata
+   309421 tgagaaaggc gaccttgtct tcgtctggca acactaccca aggcgagaaa tcaactacgt
+   309481 cttggccttg gaagttcccg gggtctatct ttctagtaag gctggacgta atcgtaatct
+   309541 tcccttgtgg agtgaagtga tgggatgcta aagatacaaa agcgtaggat ctggaggaac
+   309601 ccctttggtc tcgattccct gtagagagcg taggactcat gggatggtgt agtcgttaga
+   309661 cggccatgag tccgagtgat cggctagctg ggctgctgct tctgtcatta tataatatct
+   309721 cttcctctac tttaagtcga gttccgttgg tggggacagg agcaccggat acttttacgc
+   309781 agtaacaaga ggaagaagaa cgataaacag attcactgtg ttggaggatg aaaacggagt
+   309841 tgaagtcaag aagctcaaat agcgaacgtc atctcctcat actttaatga catcttcacg
+   309901 gcgagagctt cgggtaattt ccaactcgtc cgtgaagctc tttctccttg catttctgat
+   309961 gaaatgaact cagccctcac ggcttctcct agtgatgaag agattaaggc agcagtcctc
+   310021 tcaattcaaa taaggctcca ggccccgatg ggttttccgc caagttctac cataaatact
+   310081 gggatatcat tggaccagaa atttcaaaga agatcaaaga attcttcacc tcggcctcct
+   310141 ttcctcctcg ggttaacgaa actcatattt gtttaatccc aaaagtgaga gctccgaaga
+   310201 aagtctcaga ttatcgacct attgctctat gcaatgttta ctacaagatt gtagcaaaga
+   310261 tgttaacgaa gcgcctccaa ccgtatctgc ctttgattat ttcagagaat cagtctgctt
+   310321 ttgttccggg tagagtgatc tctgataatg tactgatcac ccatgagaca ctccattact
+   310381 taagaatatc gaaagctact gttagaggtt ccatggctgt gaaaacagat atgagcaaag
+   310441 catacgatcg catcgaatgg ggctttctaa gataagtttt gacttgcttt ggcttccata
+   310501 atcaatggat tgaatgggtt atggagtgtg tcacttctat ctcctattca ttatcaatgg
+   310561 agcccctcaa ggtttagtca ccccatctcg aggtcttcgt caaggagatc cactttcccc
+   310621 atacctcttt atcctatgta cggaggtgct ctcaggatta tgtcgaagag ctcaggaaca
+   310681 aggacgtttg cctggtatta gagtatcaaa caacagcccc cgtatcaacc atctcctctt
+   310741 tgcagatgac acttcttctg caagatggat cccactagct gctcaaatct ggccgatatt
+   310801 ctttctaagt atgaggctgt ttcagggcaa cccagtcaac cacccgatgt cgaaccttta
+   310861 ctttctcggc tcacttccat gatagtgcag aatcctaaaa gaagatcaat ctattgctac
+   310921 tgaaggtgag aaagaaagag tgatttgctt cttgcaaacc ctagtgagtt atctaccgcg
+   310981 tctgatatgc ttgctaagct cttgatgaac gaaactggat agaaagattc ctcttgatgt
+   311041 acgaaaacct tggattcctt actagttggt acttgatcca atgctacatc ggaaccggga
+   311101 tatcgatttc ccaagcctta ctcagagcga tgctcgatac gaggaggaaa gcaggtcttc
+   311161 tcactctctc tcatcttgaa atgcccctat ctcaataagg cagtgaaatt acattacatc
+   311221 aaagtctggg tctcgttatc cacccttttc cttttatcct ggcactgtgc ccctcagcac
+   311281 ttggacgatt gagtgatcat agggaaagta catatctaga tgcttctcat agggttgaag
+   311341 aaggtataac gaagctatgg tacatcagtt ccctacagcg gccgtcttag ccgaacccgg
+   311401 aaagaattaa aaaagacgag tgctcattat tctatttcat agccttccgg gagagaattc
+   311461 tttctcgatt cgtaccaaat tcctagggtg tgaggcgata gttttccacc catggcattg
+   311521 gcctacttac ttctccaatc tagatgtgcg tcttagtctg ctgtcaacgc gttatgggaa
+   311581 atcttctttg atcttcccag tagttctgtt accgctctgt gaggggaagg gcaggctgac
+   311641 gagtgactat tcatcatccg aaacaaggat ctttgaaata gtctcgtttc aagccaactc
+   311701 cagagctcta ttccataacc aggtgctgtg gcgagtgtgg tttccgatac ttgattgagt
+   311761 tcgattgctt gcccggaaag aagaattgtg ccatcgacgg ttgggagtca aataagagga
+   311821 agacaagttc tactgaaaga cttctggtca gacacaagaa aaggaatata ataataatag
+   311881 ttggtacggc caagagcact tgtaattgtt actagttgtc ttctctatat cttctccggc
+   311941 aaagaaacat ggaatagtcg tgcgaactct ggcggaatct agcactaaaa agagtggttt
+   312001 cattattggt agcaaatggt tatacttaat caagctgaaa tccgatggta gtatagatag
+   312061 gtataaagct cgtctgcttg ctcaaggcta taagcaggag tatggcatcg attatgaaga
+   312121 aagatttgct cccgttgcca aaatgactac tgttcgtacc ccgttggctg ttcgtacccc
+   312181 gttggctgtt ccgcccattc actataggtc tttacatcag atggacgttc aaaatgaatt
+   312241 tcttcatggt catcttaagg agacagttta tatgctatgc aacctcctcc tggctaccag
+   312301 gaagcggaaa aaactctcgt ttgtcgtctt cgtaaatctc tatatggatt gtgaaacaag
+   312361 tgcatggttt gataaatttc aaactatctg agagttgagg aacaccaagt atgatttgat
+   312421 tctagggcct ggagctcatc atctatggct ttgcgccaac atgcgagttc tgctgcctgc
+   312481 ttataggtag aagggacatg aacagaatga aaagtagtca aaaaagaaga tgataatacc
+   312541 catatcagac agtagataca gattgacgat tgaaacgtcg aagaggaact acagattcag
+   312601 atggtgcaga aggagcagag agttctgagt cattacctac tggggcaaaa agaaggcggt
+   312661 ggcacgaacg ttgcattatt cgctgggctt tgacagaccg attccgtcga agttttctca
+   312721 agatcaaaaa aaggaagata agaaatggag gatgtgggtg gtgcaagtgg tgtgggtgaa
+   312781 gaagcataat ataataagaa ttttccaaga agacaatgtg tcgggaaaca gctaggatcc
+   312841 agacgacata cctcctctgg aattccaatg aagcggccgg acaaacaaga cgaggggcat
+   312901 agaaacagaa agccaaaagt ggaatgattg gataaaagag atgctggaag atgccgagat
+   312961 ggtaaaacag tgggaggagg cagaattcct ttttctttag aagaagtatc ctcagatcag
+   313021 cacaccttag aggtagaggc gatgagcagc ctatggtgcc cagaaaccag ctcaaacata
+   313081 atgagatggg gtgaatcaca ttcactgtcg cggacgccca tgtgcttcgt aatttgccta
+   313141 aaagagtcaa gtctggctgc atacgataaa gtagccaaga aaacgatgtt gaattaatgg
+   313201 atcacatact tccgaacaaa agtaacagga gatccactta tccccccgat aggtttatat
+   313261 gccgcctggc ttagcaagtt catcttccct ttgagccccc tactacctaa tagggagaga
+   313321 cttcgctcca gtttcagttt ggttagtgct cagtcagtag taactagatc tcacggcagg
+   313381 tgccgcttaa tcagtctacc ggattccttc tgtctttatc acagattttt ttcttctcca
+   313441 gaagtacaga tcagtgctcc tacactacag ctcaaaacat actttgcatc gatcgattgg
+   313501 cttagcttat aagtaggaac ggaaccttct tttctcaact caaatgcttc gagtttaagt
+   313561 tagtgttcat tgcttactag taaattgata ttataagttt atcctaagtg gggagaagaa
+   313621 gataattgcg gatttcacac aaaataaatt ctaccaaagc attctccttt gaattagatc
+   313681 gctcgcgacc tatgtcaatc aaaggaatga atggttgaca ggcgatggat aaaaataagt
+   313741 ggaagaaaat gtgcgattac cacttcgatt ggactttatt ctttctttag ccataataga
+   313801 agctctgttc aagcggagga ataaaaatgc gttagtgact tcgatttgaa ggaaggtata
+   313861 cctaacggcc caagaagggg cagttcacca agaaggagcc aggggctagc gaagaagaag
+   313921 aatcaggggc ccctgattct ccgacttttg cagaagcaga attgaaagac tttttatttg
+   313981 ttgaaactcc tacaattgag gaggacctcg agttcgatag attgttcgag gagatctacg
+   314041 acagaatcct ttccattttt gaaagggaaa atcttattct cccccccccc ccaaatccac
+   314101 tttcttctat gatggatata gtcaatttcg tcctcgtgga cgattcagat atgtctgaac
+   314161 tcatatatat atatatatga tgacttatat ctaaatggaa tccagtctat atatttagaa
+   314221 gagtttgact ctacctacgt ccaggaaatg ctactttttt ttagcgtggg agggtaattc
+   314281 ttggagattg agggaggaca ggttggttcg aggacccctt ggtcaaagga aaggtacaaa
+   314341 ggaacttgac cacttgttgg gagaggttgt gaaacaaact cgactaaaag aagaggtata
+   314401 aaatgattcc ggggcggagc catatgacgc gagagtgtag actctggaac tcagggagca
+   314461 agaccctaaa gaaagttcaa gaagctatga agagtggtaa actctaaaag gaaagataga
+   314521 aactggggag ttggctgata aagatggaca gtaacgattg cgtaatataa atttatcggc
+   314581 ctcgtcatcg aaagcggctt ccaattgctc ggaaattttc agctatatgg ggggcttgga
+   314641 tggtgagcaa aaactattga tcaagaagtt ggtcaatttt cgcatgaaag aaggtaaaag
+   314701 aacgagagtt cgtgctattg tttatcaaac ttttcatcgc ccagctcgaa ctgaacgcga
+   314761 tgtaatcaaa cttatggttg acgccgtaga gaatataaag cccatatgcg aagtagcaaa
+   314821 agtaggagta gcgggtacta tttatgatgt ccctgggatt gtagccaggg atcgtcaaca
+   314881 aaccttagct attcgttgga tccttgaagc agctttcaaa cgacgtataa gctataggat
+   314941 aagcttagag aaatgttcat ttgctgagat actggatgct taccaaaaga ggggaagtgc
+   315001 acgtaggaaa agggagaatc ttcatggact ggcttccacc aatcgaagtt tcgcgcattt
+   315061 cagatggtgg taaagtgaga ccacataaag agctcttcgt cattcagtca gattattaag
+   315121 taagatatgg tttgaccctt ttcctttttg ttttcatttt catccagaaa gccggccttc
+   315181 ctcatactcc tcccttcatt cattgagtta gaggaatcca taggaggccc acccgttatg
+   315241 cattgcatga aagaaccttt cttttttgta tgactgagag gaacgcctca ggtcgaatga
+   315301 atacgaaagg gagatcaatc caaagaaaca aaaaaggcca tgaatgaaga agttgggcct
+   315361 ttcaccctct ttctagttac tctgggagct gatctgataa atgcacttca aagggaggga
+   315421 aggctaggaa tctcgttttt gctcctcgta cacatcttcc tcagtccggg cattccgacc
+   315481 ggcggaatag gcattcatat aatccattta gccatctgag gcacctctat tggatgagtt
+   315541 gacctcacct ctttcattca gatctgcaga gacaactgga acagaagctt cactggaaga
+   315601 ggtagctaag cctgccgaat cttgggaatt caaatctgtt accgacctac gttgaaaata
+   315661 aaccagcagc taactgctga gtaagaaggg cagggtcggt actgtgtagg tggaacaaag
+   315721 taaggtcgag agtcgcgagt caagaaagaa atcttcttag caccaaagga ttataagtta
+   315781 gagttagacg atggttcaga tgttatgaag cacaagggtg caggtaaaga ctacgtcact
+   315841 gaagagtggt ttgaagaaca atacaacgat ccctgctcgg acagtaactg gtcgaaaagc
+   315901 ctctgctaaa gatcgtcttc ctagcaaaat gacattctcc taactgagag agtcaagggc
+   315961 agctcttacc ctgccaagcc aatctagatg tcttacgcat aacacaagct aagccgcttc
+   316021 cagtgaaagc agtagtgagg tagctcaata aacctagcaa acaccggcta gataagtggg
+   316081 gcaggctgct aactcagggg ggattctttg tcgatagaag gcattcgcga aagtcgccga
+   316141 tgaagagaag ctttactaaa agagggcttc tcattatgtg gatacaaagg cttcaagtgg
+   316201 tgcggttccc acaggaattg caaacacctg aagagagcgg gtccaacccg atacgatatc
+   316261 cttctttccc tagaaagaag tattcacata gtccaatcca ttacagcccg gcagtctaac
+   316321 aggcgtagat cacactggaa ctggagtagt ggaactagca cgacgaacaa tgctcggcac
+   316381 tctggtcaag tgagcccctc tcattctcta taatccctat agttgccctg tataagcatt
+   316441 cttaatatgc cctctcaggc taatgaaagc aaaaaaaagc agaaggccct tactacacta
+   316501 agcgctacga gagcccccta tacggaagga gactctattt ggtcaaagtc actttccaga
+   316561 agagatcaag tcgtttactt cttatttgtg actcttgcca acaattagtt ctgtaactgg
+   316621 cacttgactc aaaccgcttg tctaaatact ggaaaagagg gaattgattc aagattccct
+   316681 tgtgccctac aaccctcgag gcttttaagt ccatagcgcc cgattcgatc actctatcga
+   316741 atagaagtcc agggatgctt caatcgattc ttaacacctt gaaaactccc ttctttattt
+   316801 ttgatgagaa ggcaaatttt tagctttctc ggcttaacga ctctttcttc gaaccttttg
+   316861 gtatgtattg ccccataact atagatcaga tccaccagac gagaaactca attccgcact
+   316921 gctgctgagt cgtcggactt atccaccaag ggattcgtgg aatttctgtg aattgcgttg
+   316981 ggggaaatct accaaagcta ggcagataga tagactcctt tcccgctgct aagggagagc
+   317041 aagaaataaa ataaaattct tgttttttaa ttagcgcctc cttttgagta tgccgatact
+   317101 aagagtcctc tcactaccaa ccagcagaca tcatctggct gggtcttgcc ggtatcaaca
+   317161 acgagaaaaa aacaaccaaa tggaaacagc aactaactat gactcttggc ccagacaacg
+   317221 tcctaggcgt agggtagatc ggagcaagag ggaacgccta gacgacgttt ttattcctgg
+   317281 gctgctgtaa gtagatcgag aggtcgttcc gccccttgtc cccgaagatg gggtaatatg
+   317341 ttctcaaaaa atgttgcttc aatctgacct agctggcgag cgatcccagt tcgcggcaga
+   317401 gaacaagaca gcaagcgagc gtacctttgt tcgcttcttc tccaccaagc acggaagttt
+   317461 aaggataact gtaggtcggt ggctacctaa ggaaactgcg attcgatccg ccccccgaaa
+   317521 aggaggcatt tacctcatcc cgatcacaag gagaggttca ccatgatggg ggtacttata
+   317581 attttccttt ctagaaagaa tgaaatgcat aggggaccat ccttttgttg cggcatgctc
+   317641 ctcaatttac ggattcgcag ggggcggaac gacctactta gttgactgac aatgacaaag
+   317701 gcttgcgaaa ctaagtatgg cctttcaaat tgaatcttta gtagtctatc agtggtgcga
+   317761 agagaaacat atctttttat gacttctact tattgatccc ggcccggaaa agtgagcgac
+   317821 caggtttata aagactggtt cgaaaggcgc gtctagccct cttgatgaat gaaagaaagg
+   317881 gtttctgagc cctagccctg taagcccaca cctgtgtaga gtagatcgtt caacacctgc
+   317941 ggcacaaacc aatttttgac ctattggtcc tagtctcata ggcgaccgaa cggccgcccc
+   318001 ctaattacta gaccgacccg ctataataat tccataaaca cctagcgaag atatggcaaa
+   318061 caaataaagt agccctatgt tcggatctga caataccata ccataatcaa aaggtacaac
+   318121 ggcccgagcg accagactta acataaatgt agccactgga gccattctaa aaaggaaaaa
+   318181 attagcacta cttggtgaaa taggttcttt tagaatcaat ttcgaaccat ctgctagagg
+   318241 ttgtaacaat ccgaacgatc ccactacatc aggacccttt ctacgttgca caaaagccat
+   318301 tactttacgt tcagctagca ctaaaaaggc tactcctagt agaagtggta gaattattcc
+   318361 aagtatttca gctggaacag ctatgtacgt tttcgatttt ttatttactc atgatctggc
+   318421 ctggtcgacc caatcatgat attgaaggat gggacctttt ctcgaaaaac tcccgatacc
+   318481 gagaagagtt gaagatggtg caatattcca tctattctca atggaacgaa ccccctatca
+   318541 ctttactccc gaaataaagc accccttccc cttctccgcg aactatctca atccccaagt
+   318601 aattgtttaa tataaagaga ttcgtcgccc cgtctgagaa ggtcacgctc tatttcgcct
+   318661 agaaataaga tctctaggcg gtccatttcg gccgggctag atccggggtt cgaaatgtcc
+   318721 agcgcgtccc gcctctcgtt caagaagtcg aaaaaagcct cttgaggaga gtaggcggcc
+   318781 ctttcttcca aataaggatt cctcaaaagc actccccgaa agacttctaa acaggagtgc
+   318841 ttccgttccc ttatttggag atcccggttc tcgaaatcca agagtctata atactccctt
+   318901 tgttcggaaa cattataaag acattcttga atttcccccc aatactgccc tttctctttt
+   318961 ccaagcagaa aaggggaatt cgccgcctcg agcaggcgaa ttcgattaca aatcgaagac
+   319021 tccaacccga gatttcgggt aatgggccag ggttccgaaa gaaccctcag cccaaaagaa
+   319081 tcctccgacg aggaggtgtt ggaaggggtc ggggctgtgt tggggaagag agcctccgcc
+   319141 gcgtcgtcaa gggccaaagc aaaggatggt tcaacaatga atatcgaaat gaaacaagga
+   319201 aaaatggtcc gaagaatctc gatagtagtt ccatgaacaa ttctttgcgg gattgcattt
+   319261 tttttatagt ggaaatgcca taaagcgcga accaagatcc ataatacgaa aaccaaaatc
+   319321 agaatgagga agaaaaagat atcgtgatgt aagtctatta ttccttgcat tataggtgta
+   319381 gctgcgtctt gagatcctaa ttgccatggt tccgctgcat cacaaggaga aattgtgagg
+   319441 aataaccatt ttagaacaat cattttcaaa gcaaaggttc cttcattttc tgctcccccc
+   319501 aaacaaagag agactgattc tgactctccc aattaaggaa gacggaaatg gctggtgccg
+   319561 gttggtccaa ccaagaaaaa agagatggga atttggggcg taagattcta tcaatcggga
+   319621 agttggcctc actcaatagt cggatacaat cgtggacacc gtagacctgc ttcgcgggat
+   319681 cgccttaaaa ggacttgatc cgcacatggc gaacggtatt ttggcagtgt tttcttaatt
+   319741 gatatctcta actggaatga taatctgatt atatacgtat tattccaagg aggatcaaca
+   319801 tcccattcca ttacaaatgg ttaaacagct atcctttacc atacatctct tcccgctacg
+   319861 cgcctcacag ccggtattgg cccttgctgt gctccccctg gaactgtagt tcacttcctt
+   319921 cctccgggag ctgcagctat cttctcaaca atcaactatc caatcttgtt gctaatgtgc
+   319981 aaaaagttga agcaggtgtt gcgaagctgc cctgctgctt tgattgcgta gctacctccc
+   320041 gcagggaaat gcagctaact ccttaaaaag agtaaacagt ttgttagtag gtatgagttg
+   320101 caaggaagtg ttgcgtagct gccttgctcc ttagtaaagt aagcacggga accctcccgc
+   320161 agggagctgt agctagagct ccttattaag aactcccttg cttgttttgc ctaggcatga
+   320221 gtagttcagt tcgaaaatgg atcctcaata aaaaaaatgg aatatctgtg cggatatgtt
+   320281 cagctttggg tggaaaggta aggaatttta ctccgactaa gattattgtc catggtgtaa
+   320341 cggacacagc caaaataact ctcgggtaca acatgctaca gatgcctgtg gatcgctggg
+   320401 aggcgtacat acaatgaatg ttattgatgc tactccttct taccattttt gactggggcg
+   320461 accccccatt gatcttggct tgcttctttg ttcttggcta ctgcttggtt aactattgcg
+   320521 aattatttaa tggtataaaa aatacttatt ccttctcgca tcgcgcccta gaggggccgc
+   320581 tcggacgaag aaagtcaggg atagagttcg agtgagacat caaccataga tttcggaact
+   320641 ttcgagatgc tttcttgcga aagccaacgg aatctgtaac tcaaccttct cacccatggg
+   320701 ggattcaatc atattcgatg aaaaatttca atcgagggca ccctcatttc cagagataaa
+   320761 atcaggctgc acagaagggg aaaagccctc ctactgagca agatcattaa tgaataagga
+   320821 agcacttaac ttaggacagc cggtctcact tcacgcagca tagcgccttg ctatttgcat
+   320881 tcatccttgt aacttaccga agtgaggaaa gaaagccaca ttggggatcg cgagtcggga
+   320941 gggtggcgaa gatattgtgt gtgtcagcgg agtctttccg gggttggttg aaagcatgag
+   321001 cgatgaactt ctcgacttcg atctttttct tttaagttat gggttgattg tgacctagta
+   321061 taggactgat ccaggtcaat tcaaaggcta agcccaagaa ttccaatagt tcgcttttta
+   321121 gttttagcgc tacttggata cttcaagaaa accatgaatg accattgtgc cttcataaag
+   321181 aggtacgtta gcggcgactt tctcatactt ttgctatatg tcgatgattg attgtgggcc
+   321241 atgaccgcac caagataacc gctctaaaga aagatttgaa caagtgcttt gccatgaaag
+   321301 tggggccagc aaaacattgg gatgaagatc actcgtgata gatcgagaaa cctgttatcc
+   321361 taagaacgat ctgttgggtg gagagattca atatacacaa ggcaaagcct acttgctgga
+   321421 catttaaaat taagctcgga acaaagtcca acaaatgaga aggaggaaat gaagactacc
+   321481 ccatatgcat ctgcagtagg cagtcttctg tatgccgtgg tgtgtacaag gccagatatc
+   321541 gcctatgcag ttgtgagtcg ctttctagcc aatccagact gcaaatggat cctccactat
+   321601 ttacgtaaaa gttgtctatg ctttggtaat gagaagcttg agctgatggg ctataccgat
+   321661 gtggtgtgct gccgacgaca aagattctag gaaataaact tcagggtata taactacctt
+   321721 tgcaggagga gctgtttcct ggcaatcaaa actacagctt tgttccacga ctgaagcaga
+   321781 attgctgcca cagaagcttg caaggagatg ttgtggatga agaacttctt gcttgagttg
+   321841 ggagtaaagc aagataaata tatctatgtg ataaccagag cgctattcac ttagcaaaga
+   321901 attcgacctt gcactcccgc tcaatcgata tacgacatcg ttggatccga gaagttctga
+   321961 aagatctcaa caaagtggag gagaactggt cagacgacct aggtgttgtc gattcccttt
+   322021 cctaatataa aaagagtttt cttgcattcc tattgtctaa ttcccatctt catcacacga
+   322081 aagaatattg ggtaaatttc ccaacaatgt ctggaacagc cactatcaaa cagggaatac
+   322141 tctcatctta gcacactcct gtcacatgca tttgagttct ccttcatctc gaaaaatcac
+   322201 tcgcgcactc agacttcact acagcttgag ttctgtaatc aggttttctg catctctgtt
+   322261 gtatctccgt ctgtaggtgt ttagcacact gctgtgagcc ccatatagat ctgtcctttt
+   322321 gatccatgct tgattacgtc aaccgtagag cgcatccaat ctgtcaaaat agtcgtagca
+   322381 gtaagaatca gtaattacag cgtggccttc ttttctttct gatacgtatg cttgctcctt
+   322441 ctgctttcct ttgacgaata cattccgtat ttgagacgcc ttgatatcct agaccttact
+   322501 ttctcactct tcttatttga ctgttcagaa tcttgtctag ttcttttgtt cctgagttca
+   322561 gcatccttat cagcttgagc tgcgtgtcta ggtctgtctg aagcttagct gatttcgatg
+   322621 tctgtcttta gcttctctat aagacttcgt agctctgtgc tgtgttctca ttttccagaa
+   322681 taaaattctc tttagggttt cacacatagc cctgtcctcc ttttcaacat ctttgccttc
+   322741 gacatcgacc tgatgaatca gactctgtgt cactttcttc tgttatccct gaccatgcaa
+   322801 cgaatcatgc ctaacgaatc ctcagattca tcttcatctg cattcactgt catcagcaca
+   322861 cagaagagag cgagtattcc caatccctca gcacaatgca cacacctaga acaacacctt
+   322921 gtccctttct cggttatccg gcacaacatc gtcactccat aacgattcgg agccaagctg
+   322981 gcatccgcaa acccaagttg atactctctc ttctagcctt ccagatctaa gagcatgtcg
+   323041 ttgttcttgg aaaagaagag ggattatata gctccttgtc atctacaata ataggactcc
+   323101 gacaggccag ccagaaataa actatagaag gaggttgctt cgctagagct gaagaagttt
+   323161 cgggctgaaa agctgcctca ccaggaagac ctgcttctta ctttctggtc gtatgcccga
+   323221 taggatttcc gtagtataaa ttgaaactcg ggtaccctta tcatttcact aatcaaaaga
+   323281 ctcaaggtct gtgctcttca cctgttcacc tgttctcttc acttgcttag cagcctcact
+   323341 tcgaaggtgt gctgcatact ccttcctgct tgctcttgag cacactcctt ttggagattc
+   323401 catacactac taagtaaaac gtcgtcaccc aagtggaaaa gaatcacttg aatccagaag
+   323461 atctagagtt ttgtttggag ctggcttgtg cagcacactc ccacccaact catcttcttt
+   323521 gggaactaaa agttgtttag ctgtcattct tccttccggt accctttcag tccaactgag
+   323581 caaattagtc aaactttgta ctcacaatga gagaggaacc tgtaggagct ccccataggt
+   323641 ctgaatgaac ataatccagc ttttcttcct ttgtatcatg ttgagctaaa tcaaaactaa
+   323701 ctctcttagc tctgccgtat atgcaatctt caaagatgct gaaggttgac actttctttc
+   323761 tgtcaaggaa tcctcttttt actaagagat ccatgttctt ctggctcatg tgcccaagcc
+   323821 tttgatgcca caagactgtg ttatcaactc tcttctccac tgatagagag tgtccagttt
+   323881 aaggtgtccc acgcaatatg tataaagtat ctagcctctt cctcaaagcc gatgcttctg
+   323941 actcaattgc gaaaaccatt ctaagactaa ctgccgtaat cccactaatg attgaatcga
+   324001 gagtcatcct tcttgctaaa ctcctccctt ctgtgaaact cactttaagt aagcagatga
+   324061 gcctcttacc cgtgcaacat cagaacatgc attctcgcga tctacagagt ccttatgggc
+   324121 ttgctcttga aaaagaagaa tgaaccttct tgttacagaa catacattcg aattcgttcc
+   324181 ttccccagca ctgactcttt cgatgctaca gtcggctcta ctgattagtc aactagtcaa
+   324241 atcgctttca catctcagat gacatttgta ccaacagact tcttgataag aactccagat
+   324301 gatccagctt attttaaaga taggttagcc agccccttct ccttcagggg atgcagcaag
+   324361 actacgtcca cttcttcctc aatctactct aagaaaaagg caagtacagc tacttacttt
+   324421 agagtcgatc cagttcctcg ggggtctcaa tcaagtaggg tatgtgagcc taaaactaaa
+   324481 ctaatagtct atcaacctgg gaactatcaa aaaaaagtaa agagacaagt aaccgatcct
+   324541 ctttctttaa tggataaagt aaaaaaacga attgataaaa cagaaatact ctgaggggtc
+   324601 ttggattgta gagctgatca cagcagccag acagaagaca ataataatcc ccgaaactat
+   324661 aatccgccgg gcgaaaaaag tgcaagtgtg tgcaggtcaa gctcagggga agttgaatct
+   324721 agcggagggg tctttgcttt tgcttttcat gactcagcta ctattgaatc cgatcctagg
+   324781 gcattaaaag aagagtacga gttagcaggg ctgtctcacc ttgggcttgt tggccggcgg
+   324841 aataaccgca gtaacctcct tcacttcatg cctttgatag aacggagatg gtaattaggc
+   324901 tgctggtagt acaactaggc tctttctttg caagattctc agcatctacg gggagaatgt
+   324961 cccaaagccc cttttctcca attgcaattc cagagggata tccttttaga cgctctcccg
+   325021 gcattagcgg aataagagca gtgggacttg agctagtgag atagattcac tttcactatt
+   325081 tgaatgcccc tcttcgattt cgatgagcag aggattttcg gacattagct ttagcagacc
+   325141 atttgcggca tatactacta tttccatgaa gactataact gaagacttac tttcttatta
+   325201 ataaacttac actaggaata gccccgagaa cccatagttg cccgctacgc cccttctgct
+   325261 actttctttc taggtccaat aggctctttg tcggtcttga tgccgagaac tgagttaatc
+   325321 ctaaatgcag ccgtgatctt atatacgatg ataccaccag acgcgcattc cttgggatca
+   325381 gaaggaagag ttgtcaacgg gaactggcga gacacttctg acgttaaaga aggaagtctt
+   325441 ccccgggagg taactaaaca agttaatgga agtctgtctt cccgaactaa gcaagttaat
+   325501 gagttcagca agcacacaag gtttcttgta gacataagtt tcagttgttg ttccctaata
+   325561 aatagatctc tttgggaatc agcacagaaa gatgagctta gtgattcctt cgggaaagca
+   325621 cttacaacaa agccagaatg tttagctgtg agagaaaccc ctaggaactt tagaagaaat
+   325681 ctgtgcttag tgattccttc tttgaattga aatgcttact atagctaaga aatcatacgg
+   325741 gattacttgg accttaccac gttccctccc ctctcttagt atgggtcctg ccccgtacga
+   325801 gcaaatcgct tcaaaaggat ttgtctccaa tctctttaaa ggcactatta acttttcggt
+   325861 cttcacttgg aatctgtgca ctatgagtgc gttacaattg ttctttgaat actgtggcgc
+   325921 gcctttcagg tgctggtttc cgggtgaggg caaaccttca atcaaatgtg tccaagaaat
+   325981 ggactagatt gaggacaata tattctaaac ccccgtcatc gtctcaacta cctttagagt
+   326041 ggtggattgc tggagacaaa gtcccctctc tccatacata aagggaattc tagttgagat
+   326101 gctgaagaga aaattgaagc ctaaacgact tcaattgcct cctcaggatg tcgtctttga
+   326161 aggggaggca gctatgaatg agtatacttt ctatcgtaac tgggtggaat cctggttaca
+   326221 gcacatccgt tcatactacc ttcttttcat tgatggagat ccttctcttt caaagttctt
+   326281 tgagattgag atatgtgccc actcttggaa gcgttcgact ttcgaccaac aggtctttaa
+   326341 gtttggcctt ctatgggagt gtgtggatat tgctcgctct agaacagttt attggcaatg
+   326401 tgctttaggc acaggtcata tacaggaaga taaggtatcg gaggcaacaa gcccctttac
+   326461 cgatgacagt tgtaccaaca gctgtttatc aaggatgact ggacaataaa gaatgggaac
+   326521 tgggaaagtc actgctattg gcggatcttt gcctagaacc gaaactgatc tttagctagc
+   326581 cgaccacgtg ccagaagttt atactggcat tgcaataggt caagcaagtc tgctggcaac
+   326641 cctgtagcct attggcttct tcctatatcc ctattcgggc aaacttttga ttccattacc
+   326701 gagccaaaga agattcttcc cctagttcta cttatcacct tttcatactg ggcagcatct
+   326761 attgagagtg gattgacggt gttgagagat ttccagcata agcagtagtt gaaccagcat
+   326821 aatgaaaaag ctagaaaaag gggcaagtag gtcggcttca gccagaagag cttaaccgaa
+   326881 tttttccaat acaacttaag ctccaggaag aaagcaaagc cttgagggag tgagcttagt
+   326941 tacttttttt cggtcggttt gctaaagcac ctctcgggca ctaaacaaaa agtaatcaaa
+   327001 aagtaatact taataggtcc ctatccacct ttctaagaca acagatcgaa ctcctatact
+   327061 cccttcgttc gaagaccact cttgaagagc ttttccttac ctatgcccct tagccaatca
+   327121 agccggactt tccctttttt tcttgcctcg gtctttcttt ggtctatttc tgcgggtcac
+   327181 taaactaacc ttctctggtc cgctttggct attgaactaa tattcttccc ccccaggccc
+   327241 caagccagtc agtttgacca ggaatgcctg cgaacggtat aatcatgaat aagaagagca
+   327301 agcggtaaac gagtgcgaag ggagcgaagc gagtttttct ttttaaggaa gtctagtctc
+   327361 cttcaggcga gttgaacgaa cggtctactc aagctcctga gagcgagcgg aatagcctgg
+   327421 tctttctaat caaataagta gttaacccgc gattgacctt tatcttgtct acctctgtct
+   327481 cgcatgagaa atcgaattcg gggtgatcaa aactagccaa cgaactcagc tggcgagtgt
+   327541 gaggctatag atatagtcta taaaaaaaaa gaattgagct gacaaaaggg aagaaagagt
+   327601 tgttacgccg ccttcgacaa tcgtgaaacc aaccaacagg tgctttcttt tgttgtgtcg
+   327661 ctaaataagc aggcttccgt caagcgaggt cagccacaaa gggaaagaag aaagagtgag
+   327721 cgagaaagcg gtgagcttac tctttaagaa ttgattaccg ttgatggcgg ggggcggaaa
+   327781 cctttccctt tctagttgtg aattccgtat ttttgaaaaa aaaaaagtgc ataatattgg
+   327841 attcaaaccg acggcctacc ctttctttga accttgtcaa tgatcgaact taaagatatg
+   327901 aactgagtgc catttgatga gtaactgaga acaaaggaga ggggtatagc aaggatgaag
+   327961 taatgaaaaa ggaagtcatt gctagtagta acgacttatt acgatccatt ggttcatata
+   328021 gaatccatgt cctaggtgta tcaaaaaaca ttcttttcac taagcgtata taataaaata
+   328081 gagtgaaaaa gggtccaccc cgaaaccaag gggatactaa ctaacagctg agtcctctcc
+   328141 aaaccgcagg agatagttgc ccatcatacg gctcaccaac ttggcctcta tgggaggctc
+   328201 actccgggca ggttcggatc acttataata caaagctcgg agaaggaagg ggttaggtta
+   328261 ggaacgcagt aactcgaccc ctcatcaact aattaatgag accttatcct tgggagagcc
+   328321 ggaaaggcac tcgactaaaa ggttaaaaga tctcgactga aaaggagagg gcgaagtcat
+   328381 gactcgagca cttgttgcga gaggtccata ggaacaaagg agctcgactg taaggaaagg
+   328441 aacgagagtg aaacgagagg agaggtgagg ttttcaatat gattcttttt tcgtatttgt
+   328501 tcgatgagaa atgcgagtcc aggttttgtc cattttttct atccccctct atcaaacaaa
+   328561 atgatcaaaa aggaagttta ctggcttctt attctcgtct ttgatctctt ccatctctgc
+   328621 ctcgctcgtt gtcacctata ttgaagaaag aaaccctgta gagaatgaag aggggcctag
+   328681 gatcttcttc tcaacagtgc ttctcgaggc tccccccctg aataagtaag gccccgttag
+   328741 cctgggcgaa gatggggata aggaataagg attgaagccc ccttagctct gccaggcact
+   328801 gacaggggtt agctcggtaa atgtgtagag ccaagtgtag tatggtgtag tagtagtagg
+   328861 cacttctagg ccccttcccg gctactggat cactccagtg ctttgggtac tacggaccct
+   328921 ctgccatcca ttgcagcaga gccgtttcat gagcgggggg gctaggcgca gttctttgaa
+   328981 tcaaacgttg aatgaaatcg attctttttt agatatgaaa atggaaatcg gataggatag
+   329041 atggatggat ctatctttcc atttatatat tactaaagga tttatatagt taagtatcgt
+   329101 agcaagaagc cccaaatcct tgatttggcc aggaaagact gcactgcttt gggcccagga
+   329161 tgcgaaggga atgagctcgg ctgcttctcc tccacactga tttttctccg tgcctgctcc
+   329221 gcatgcgctt ccccattggg ctttgctctc ctcttattct tcattggacg gttcggttcg
+   329281 gatggacttc gccgttcttt cccaactaaa aaagaaaagg ctgtatcaca tcgagatgtc
+   329341 gattcgtttt ccgcccccaa tgagatgggg aatttgtaac cccctatttc tactttgggg
+   329401 cccttcatct ttctgaatcc aggcccgtcc cggctcgtgt cgttccaaca accggcgggg
+   329461 agcatctcag tatacgatcg cgcgcagtaa ctgggagtcc tattacacct aaggcgaact
+   329521 tcaattcacc aaaccaaggt tcatctcgtg tagtgattgt ggactctact aaggatattg
+   329581 agtagacggt tgatgtatca gactcgaccc tatctttcgt agcatgcatt cccatcggtg
+   329641 tcgcaactga ttcggtaagc tacgtgtccg gtgcacggag aactgccttc ggtcctccaa
+   329701 acttacttat tcgtggcaac cttccggccg cccaacgacc tataacgcta gtcactactc
+   329761 ccactggggc tagaaagtaa gccccacaac ccaaagcggc gaagaacaaa tagaatttgc
+   329821 tacaaaagcc ggctaacggg ggtattcctg cgtatgagaa catagtaatg gagaaggtaa
+   329881 tagccgaaat aggattcgtt ttggctagag cgcccaaatc cgctatatat ttgacacggg
+   329941 tttgccgtaa tgctgaaact atggcgaatg catccatcgt cattaatgca taaataaaga
+   330001 taccaattag tagtgattga attccttcta tggttccaca tgagaaacca gtacgaatat
+   330061 aacctacatg tccaattgaa ctatgagcta gaggtctttt gactttcgtt tgggccatgg
+   330121 cggccagtgc tcctaagatc atagaagcaa tgctgcagaa aaagaagatt tgttgcaatg
+   330181 tagctccata ggaaccataa atagaaacac gtaaaatatt agcagaaata gagattttag
+   330241 gcgcaataga aaggaatgct gtaaccgggg tgggtgaacc ctcatagata tctggtgccc
+   330301 acatatatag agtcaaaggg aatatggagt ccgaggggga gggggttttc ttcggggctc
+   330361 gagagattga atagacccac aagtttgaaa ttcgccactg gggaattcac acgaaaggga
+   330421 acgaggaatt ctcaacgaaa aaagtgctct ctgaaccgaa cgtgaaagtt tttttccatc
+   330481 agacggctgt tcctgtaact ccactctcga cttggattat atatccaaaa aggtccgctc
+   330541 tcggccattc cacacgctgc ctagccgagg tgtggtttct gaagtggatt ctctcttttc
+   330601 tagtagatgc cgaacctgct gccccattga ctaagaagag gggcgggcgc agcagctaca
+   330661 tggacccctt cctttctgtt gttgccagcg ctttcccggg ccgagccgga atttttgatt
+   330721 attattctaa tgggcaggca aagccgcagg ctgctatccc aataggccag cgggcggttc
+   330781 gaggagaggc tccggcaatc atagcaccgc ggtcgaaaaa gcgtgctccc ttcagataac
+   330841 acgcaccgta gctatcctcg gccttcttcg ttttcgatgc aaaagaaagg aaatctctcg
+   330901 atctccgaaa gccttttcct taccccgccg catctccctc ccttcgtcga gcctttccgg
+   330961 ggttgttcct tccttatcag aacttggcgg gcattctttc cagccggggg atgggttttg
+   331021 tttgaacata ggctcaaaca taattttaag gtcctagcta cgccatgcgt tcaataccaa
+   331081 atctggaaag ctgcagaaat gacaaaaaga gggccgcttt cctatgttgc actgaaaaaa
+   331141 gaaggaccta agagcgtctt ctcttaggtt ttttcctccc gcgcccgccg ccgcctggcc
+   331201 cccgttagag ggaaggggaa gattagcaaa gcgaaaaaag acagaggaag ggggagcagc
+   331261 atcttattct cttcgcgaga acttccggat tgaagaagca ttcggaacgg gctccagcgt
+   331321 tcatttcatt ggcgggaaac gatccaatcc attcggggct tcggccaaaa acgaattcga
+   331381 cttgattcat agatagaatc aatgataaat aaacaaaaga tagatgttcg agatatcttt
+   331441 tctttctata aaaaaagagg aatagaatag acatcccttt ctttagatgt ctatctatcc
+   331501 ttccgatttt atatcgttat atctgctctc tcaatttttt tttagagaga gcagatagat
+   331561 cctatcccct atatcgaaca ctaattccta tctattgata ggaagatctt cgtctaatag
+   331621 cggactttgc ctttttttag gaatttctca tatccaaggc agcttaccac aagaacccca
+   331681 ccccatacaa ctagttgggg gggctgttcg ccttttgaat caaacaaaga ttgaagtagg
+   331741 taggggcttc atagctactt tcattctaaa ggaaagcgaa gaaccaatct ttagtcaata
+   331801 ggagccctac ttccctcgac ctcatcactt caattgttgc gcaaacctct ttcttagtca
+   331861 ccgggggagc gcacctttgt cacagtattt cgagctgttt gatagtgact tcttgcgttt
+   331921 ggtagggcga cgaaagggct acttccttca atctaggaaa cagccggaaa ctcgagaagg
+   331981 tcgcctttgg aacggttcgc cagtaaccaa agcctcactc cttccttttg attatgtcgt
+   332041 gacgctgact gaatccaatc tataaaaaaa cccggaaact caacaaagtg cgttcccttc
+   332101 gtcaagtagg taaagtaggc atacgaacca cttttgcttt gcggaataaa acaatgaatg
+   332161 aggcggaatg gagaaaggaa aggtctgcag gtatttatta ctcttataaa agaggaactc
+   332221 gagcttattg cgagaggtgc tttacgaact cgactaaaag gagaggggcg aagtcactcg
+   332281 agcacttgtc tagagaggtt gtgaacacaa actcgactga aaggagaggg acaagggcgg
+   332341 tcttgcttgg cgcgaaggct gctggtttgg gggtacggta ctaaaggtcc tcggacttcc
+   332401 aggcggtttt tattttgggc agctgttcac cgttggatct cgccaataca gccccctatg
+   332461 gtttttgtta ccgagatatc tttttttttc attgttccca gggatttttt gggtaatctg
+   332521 ctcccatgct gcaaacagtc aaatctgaac tcaaccttgt cgctcttctt tctttcctcg
+   332581 cgggctggaa gcacaccagc agcgtgcgtt gcgtgattct actgtgtttt ttgcacttga
+   332641 ctgggtggac agttgaccgg aagagaactt cgattcgccc ggccagcagg cgggcgacgt
+   332701 gcttatcttg tgtgacaaca ctacagagcg agtgactctt ctaggcgggc agctgtgtga
+   332761 caacactaca agggtttttt tcctcgtgta acatgctatg gtctcaatgc ccttacgagg
+   332821 tagtgatgag tttcactcgc tctaagcccg gcccgcgcgc ggttaggaag attggccgca
+   332881 atctgagcgg taccacccac cctaccctac cacctatagg cggccgtccg tcctacagcc
+   332941 gcccgaaaag gaactgcagt gatcttgaat aggaatccta cagcgataga cagaatcccc
+   333001 ataaaaatac cactagatcg agcaccagtg atttcgtatc cggtcaaaat cttggctaat
+   333061 tgatcgaagt gggtagctcc agtagaccca tagatcatgg aacaaatagg gtgggtaggc
+   333121 ccaaccacca cactacacgt atagacgcga accccccctc gttaccgtac gtgcgactct
+   333181 caccgcatac ggctcgcaca aagactccta aatccatccc gagccttttc ttcccacttc
+   333241 tcccttcagt cgagttacta aagtacctct ccaatcctcg atcaaacttg cccaggcgct
+   333301 attaaataaa tgggggtctt tccttcgcct atcgtatatt gtatgtatcg gcttggcttc
+   333361 gtccaaaact aaaacaagga ggcattccgg cgggctctag cgtgtaggaa ggccgaccat
+   333421 tacataagct aaaagactag gactataagc cagccggaag cgactcgctt ctattccccg
+   333481 ttgggattgg atatagatgg ggaatctatt gatcgtagta gttcgccaac ctctctaact
+   333541 aacatacgat caaattttcg gcacggccca aaaaaaagaa agagcttcgc tcggtgggcc
+   333601 ttcctacgct gacgaatgcc tcctttctct tctcttcagt ctacaacaag tgggagaggc
+   333661 aggattcgaa cctacgtaga aaaacttcaa cagatttaca gtctgccgct tttgaccact
+   333721 ctcccctttc ccttttctct taccttctgc cttctcccct tcccctagtt ccaatcgttt
+   333781 taggaaagtc ggcattcttt gtggaaagga cttataccat gaacggactc tttgccttgg
+   333841 gtcaatcagt tccttggctt actaatgacc acttcgagaa gaacctattt gaaaacagtc
+   333901 taatgccaca tctgactcaa cagctccttc ttccttttct tcttcataca agcccccttc
+   333961 aactatatgc aaccacttct tgaacaacct attcctattc atgcgatctt cagctcattc
+   334021 tatgccatct tacttatact attattaaac caagcttcaa ttgcacaagc cattctaagg
+   334081 aagattagac tggcatctgc ttagccctgt ctccctctct agctaattga ataccagctt
+   334141 tctcttatat tagtattata tgtcacttcc ttgtcctatt cttttagttt tagtatgcat
+   334201 gggggtaagg agatattgat tacacagtag ttaggagctc atagtttcgt gcgcttacgg
+   334261 cgtgctcttt cttttatttt tatgacaaat ggccgccagt agacgacaat aggtgaggtg
+   334321 tccggtacgt gcttctaatg attattccag gctgagcact tcattcttat gcacccattt
+   334381 cacatgttag gtgtagctgg tgtattcggc ggctccctat ttagtgctat acatgggttc
+   334441 cttggtaact tctagtttga tcagggaaac catagaaaat gaatctgcta atcaaggtta
+   334501 cagattcggt caagaagaat aaacttacaa cattgtagct gctcatggtt attttggccg
+   334561 attgatcttc cgtttcaaca attctcgttc tttacatttc ttcttagcgg cttggccggt
+   334621 agtaggtatt tggtttactg ctttaggtat tagcactatg gctttcaacc taaatggttt
+   334681 aaatttcaac caatcagtag ttgatagtca agggcgtgtt attaatactt gggctgatat
+   334741 tattaaccgt gctaactttg gtatagaagt tatgcatgaa cgtaatgctc acaacttccc
+   334801 tctagaccta gctgctgttg aggctccatc taagagggaa cactgtaaca acaactagat
+   334861 cgggtctatt cccattcctt caccgaagtg gttgacgagc tcaaaagttc cttttttctt
+   334921 acttttcgtg gctgattaga tcactgaact tggttcactg acgctaagag aaggaaccat
+   334981 tggattattt atatatccct ccctttccac taggcttaaa tcggccagta aattcctggt
+   335041 ggaccaaata cagcaacctc acctttagcc atgcagaaga ggagattccc agactatgat
+   335101 gcctggcacc gatcaactct gaaagtcctg gattttttgt actctctctg aagaaactat
+   335161 tggatatgta tttagacttg caacagctca ggaggtctct ctttggctga tcatttcaat
+   335221 cattgttcta tcttctatct cccgtgactt ttctttgcta ataagctagc tctctgcaaa
+   335281 tgataagaaa ggtagaacta gggagataag agataagaga tttctttcca attccctttt
+   335341 ttggaggcca agcgagctaa atggtataaa aataactcgc cagtcaatgg gagtttgctt
+   335401 aggaaaataa gtgcaggtca ggttcggttc gttcctttca gcagtatcga ataagccaaa
+   335461 tgaaagtgag aaagcaggcc atctatccgt tgacttgccc cggagtgagc tgcatcgaca
+   335521 gaagagagaa gagagagtgg aagcggtaag tgaacttgac ttgaaagaga attcccactt
+   335581 gacgcccgaa cagcattttc agagggaaga tatcgaccat acatagggag agagctcaag
+   335641 ttagacggcg ccttagcgaa actttctcac tctacattca ttccctaatc acacatgact
+   335701 ttgaacgttg atttgacttc acatagcttt tcgtctcctt gatagctgga agagaacgtt
+   335761 gagaagctga tctgtctttc cgcgcagaat cctctttatc taaagtaaag agcagagttg
+   335821 cgatagagaa cctatttgct ttcttcgact cccttgcccc tgtccttgca gcaaattcaa
+   335881 agagttgtgt ggcattgact aactcactat tcctgcctga aggaaatgca tgagcgaatc
+   335941 tttcagttgc aattcccctt tccaaatctg aatcaaagaa cgggattaag gggaaataaa
+   336001 agtcggcgat gggactgatg ctctttcatt gcttcgagga tcacatttct tttcaggtca
+   336061 gaatctgatt gatgcgttgt aacatccact gactcaaaga actacccttt agtaaataaa
+   336121 tcccagggaa agtccattta tgttaaaatc tctagtgtag tggctggttc ctatacaact
+   336181 acatcataag aagaagccag gtctgaggcc agcgagctca agatcaggag ccaaagcgga
+   336241 ccgaagaagg cttcatctgc tccaagcccc taaagaagtg aagggacttg ctttcctaaa
+   336301 aaaaaatgaa atggaaagag ttcgtttctt cctcaaattg taaacatggg ttgtccaact
+   336361 ggaaatgata acagaatgca taggtcaaac gaaggagtcc caataagcgg atgaatatag
+   336421 tatacctatt tcctctatga gggagaaagc aaattgaaga acccgcggat atgtgcaaaa
+   336481 cttcaattct tcttagccaa ggctttccaa ctcttatagt agataatatc taaagcgata
+   336541 gagctggtgg atggaaatta aaatgtcctc cagaatgaaa gtgataaaat agataaagaa
+   336601 agggcgcaaa acgcagtcct ccaacggaat caatggcatt cctttcttac tagtccttac
+   336661 gaactctaca ttgcttacaa ggttgaatag cagaaagcct tagctttagc tacttcaatc
+   336721 tctcctttcc tcatattcga gcaaggaaag ctgtacaaag gaaaaggtat tctaaagact
+   336781 tgtttgcaaa aaaatgcaaa atctcttact tagaaaaagt acctgcagat tcctgcccaa
+   336841 aatcaactgc tattgaatct aagccaattg aatggaatca gatcatccgt ttctggtatg
+   336901 ttgggtgtca tcctatccag tactggatgc agtagaaggt gctctcgggc gaattcccgg
+   336961 ctaaaaagta cactgagtta ggcgagaatt gtctcgagat tgacagattg acttagggga
+   337021 attaggagtt gtttggaggt gggtaccacg gaagaggtag gttatccctg aaaaagggac
+   337081 tcggatagga atagagacta gaagctgaca gcaagctctt agtcttggct tcttcttcct
+   337141 ttattttatg gagacagggt tactaccata cttggagcac gaattcatat gtaagatgcc
+   337201 cagttggatc actaattcgt aaattgacaa cctctagaaa gggccttttt cccaaaagat
+   337261 tcctaattta atttcgtcga tattgaatac cttattgact ttgagactac taataaatag
+   337321 caacgaagct gactcgacca gtttacaact tcgaaagaag gagagcggag atctttcttt
+   337381 caatcgaagg gtcatgggtt ttttcatagg caccggactg aggtatgaaa tgaaggacga
+   337441 gtttcactcg cttcgggctc aagtttagat aaaagagtcc aatgagccca aatggaaatt
+   337501 cgatagaaaa ctaaagctga gaaagtcaca atccattgaa taagatcggc taaaggtgag
+   337561 agctcaaaaa agaaagaaaa gaagtcgctc acccgcctac gattgattgg agacttattc
+   337621 aataggtcct cctttagggg tctattttct ctgacttgac tcagcttgac ctacttgact
+   337681 cagcggttag agtatcgctt tcatacggcg agagtcattg gttcaaatcc aatagtaggt
+   337741 aaacccggcc gaaaccccag caagaaaacc tcctcaaaat atttccctgg caacctttta
+   337801 agggaacccg cccagaagct tcgatttgtc aaagctgcgc taaaagcctt gaattcaaaa
+   337861 gttgaaggtc gataaaagaa taagaaaggc catagaagaa ctagacgcca ttcaatccca
+   337921 gctatccttt ttccagttca atgaaaggct cgcagagcag aagagtatat tatcaaattg
+   337981 gatgagttga acggatacag aaaaggattg ggaccacaat atcaaagact tccacgctac
+   338041 actcacagaa agaagggcaa ggaagactat caatatcagt atcaagagaa taaaggatga
+   338101 gagtggaacc tgtggaagcc tacatcaaaa gaaaagacag gcggaactct actttattaa
+   338161 tctagtgggc acagctagca gagtggaggg cctctataat ataataggtt aggcttcttt
+   338221 cttctgggaa ttggcagttg ctttagcagc tgtgcatatc ttaactagtg ggcttagtcc
+   338281 gaaaagaata ttgcctgctc ttcgtagagc aactatgtca cttctgtcta ttagtgaaag
+   338341 gctaaggctc catccatccg aactttcgaa tgattcctct atcaaagtta tgaaataagt
+   338401 ttgttagaga aagctaggtt tccctaaaaa agggttttct cgctcagtgt catcgttccg
+   338461 gtgatcacct tcaggtcagg atactaaagt ggaatggatt gcatttcaat cttgcctata
+   338521 atgttgatta ttttccccag cgtatgtggt cttgaagagc ctttcagcgg cttaataaag
+   338581 gctggaattt acggaccgga ctactacgtg tcggttcctt ttcaatctaa aagagcaaaa
+   338641 gcctattctc ccacctacca gcagcctaat tccccctctt tccaatccta gctctacggt
+   338701 tgattcgaga aagaggaaga ggaccgaagg aacaacctaa gagaccaatc caaatccaaa
+   338761 cctcacctct tcgcttcttc aacaagaaag aagaagactc ccacctgtac gccgacgcag
+   338821 cactcgcggt tacttagcgg aattcacgga gcagtcacgg ataccaccat ctgggcagac
+   338881 tagaaattcg ctatcgcttt tctgtttttt aagattaggc taaaaaagtg gttagtagca
+   338941 ccttgacgtg ccatgatatt tactttgctt ctgtgaggcg gtaggctcga ttgttttcca
+   339001 tgtgatgata gaccctttcg cccataccgt gaccacgtga agcaggcgaa gagctttgtg
+   339061 tcaccatgtc aacttgtatg tgttcagcat atcatatatt cttttggata gttggacgga
+   339121 agtcctatca acctacagga tctgacgctc aaaagtaccc acgcgtttcc tacgagtact
+   339181 tatcatggcg gtgtagcgcc cttggcaagc cctgcaaacg acaccactgg taccctaact
+   339241 tagtctaatc atagatctgt atgaagcact catttctgat tctaccaatt cccaagacgg
+   339301 aaagtacttt tgatcataca ttcgcacgtg ttctatggat gcagtggtcg atccatatgt
+   339361 cgtgcagagg caagggtatt caaatgtcat agttgagcca gctcatcagc atcaataaaa
+   339421 gaaccttttg tatatcaatt gcgtcatcac gactagcacg gttatcatag cttcaagatc
+   339481 tcatgcttcc atagctcagc tgctttctcc tttctctgca ctgtgccctt gcccagctta
+   339541 ctctggtgcc tctagggaag aaaagaggct ttaaaggttt tgctactata actataggag
+   339601 atgaactttt gcaattccaa tataggtctt acgaaaggaa atcgcgttgg taactttctc
+   339661 gctatgggag atggaatgga tttctcgata agcccactac tgcatgggat cggtatagat
+   339721 cgtgacgctc atctcaagat ctaaggagat gtaaatgtgt caaggaaatg gcatctgact
+   339781 ttaattccca ctattctata atagcactca atatggtgac gaaacaatgg tgaccaactc
+   339841 ccacaattga ggagcaagcc gggatccaag cccaagcagc caatgaaaaa gccacttgtc
+   339901 tctgtcaact tcatttcatt ttcgctctag gattgacatg caaagtaagg acttagatta
+   339961 gccgaggagc tgtggctgct ggactcgatt tgacttttgt taacgatcga ctaattaacc
+   340021 ctttatcaga tggaagcatg ggatagactt gtctgaaatg agctggtcat aataggaatt
+   340081 agtgtctgca ctgtctgctt tagcgtcgtg gtcccaccat ttggtggcgg aaagagagca
+   340141 tattcacgct tctatataag gacgagagcc ctggaccggg ttttgcatga cagcccatct
+   340201 cgcgacccta actaactttt gttctaaacg gggactccag tgcattctga agcaagtacg
+   340261 tatgctttta agccaaagga acgaaaactc ccgagcaaca ctatcggtag tcaaaggaag
+   340321 agagaaggtc ctacgatcat tcctaaacag aggagaggga gtagaaattg ctaaggattt
+   340381 tacagaaaaa tcacagatca aacttaacac agagaaaaat gatgtgactg tttatcggtt
+   340441 gagtatagag tcatctcgcg tagagtcatt tctaaaagta atgtagtcat atggtatcag
+   340501 ccagtagcac taaggagcat tctatagcat ggtgcatcgg ttgactatca agagtggtca
+   340561 agtccattga gaactatttc cgggtatagg agcatttcag ggtcaagatc caatcgaaga
+   340621 aagggcacag ggcatgccag tattccagtt cttagcttct tggcttgggc taatcacgag
+   340681 cctgggtcta gctaattggc tgcaggcacc ggtgatgaga ttgggcacct ttcgcgtttc
+   340741 aaattgaaag gattccagga ccagaatgaa ggtcgtcagt ccctcccgat cactgctctc
+   340801 gacgactcga cgaggtaagg tctcgagatg cgtgtccctc ttttcgtatt gattagcgcc
+   340861 cggacgatag tagtttgccc ttgctgtttc ttcgactgtt gctttgtcgc attgatgctt
+   340921 tccatcgctg ctttactccg cttgtcatct tgctcttgct gctccccatg tttgttcttt
+   340981 tgcattggta gctctttgta gtattccatg agttgggcta aaacgctacc ctccgttact
+   341041 ttgatgctag ataggtggga ttcgattgat tatgtgatgt tgccgctgct gctcctcgtc
+   341101 gcgatgaaca gaccaagtta ggctaacgca atcctagtca gaagacaaaa acttctcttt
+   341161 ttattcctat acttggacaa ataaggcgag tagaagaata gactgagatg cttcttcaaa
+   341221 acaaagaagt cacatcaata atgggactaa ccataaggga gttggtagta tcccattggg
+   341281 gggaagcagt agaatggcaa tcgcaaggtt tgacctcttt gtagctagcc tcccttctat
+   341341 gtgttagtga gcgcccattt agtggaaaag cgagggatca ggtggagtta tcccttgtag
+   341401 tcaaaacaca gtccgaaatc cgcctcattg ttttcgtcat cagcattgtc ttcggcatag
+   341461 ctttagccgt tagccctcca tctcgattcc ggaatttggt gcagacttgg agtgacttcg
+   341521 cagagggaag ggttgtggct ttgacagata aataggttgt cattagacaa atcggggagt
+   341581 taggcctggc acagctgaat ctagctaggg aactaagtag cagcgggttg cggcacaaaa
+   341641 gaagttgaca cttcagccat cggcacaatc caataatctc aagtcaactc agggtggcat
+   341701 agagtattag ccccttagct tttagtcgaa aagtttaggc ctgatgggaa tacagactac
+   341761 aaagtttgct cagcagcgga agcacttttt tttagttatc aaattagaga gtagtgccag
+   341821 gcaatgaagg cccggtttat gcgagcaaga tattcatacc tttcatacca tccgaaggct
+   341881 gaactaagag tgcatatgta ctatcttcta ttctttctat tctattggat tattggattt
+   341941 ctatcttcta tttcatttca acattccttc taattattgt tattggattt cagcaatcta
+   342001 agcagaaaag catgaagtta aggaaagcac ggaagaagaa aaggccaatg cgaagtagtt
+   342061 tgccaacctt tgactagtcc cattctatcg gttaaggagg ccaggaaaga tcctcaggta
+   342121 gaactcagtt tgggaacaaa aacttttccg agcccagcaa aataatcgca aggagcccag
+   342181 cttcaaacta atagcccagc tccataacag gcgagcagcc aacaaactat gacccgaggc
+   342241 aaggaacatt ccccatccag ctcagcccgg caagtgaaac ataaaacaca atttgcactc
+   342301 ttccagccga cttctcagca tagcaggacc aaccaatcag cacatttggc acttacgcca
+   342361 gcaaagcata tatgcaagtc aggctcaaga ttcacaccca aactaggctc agcagcataa
+   342421 agatgcccgt ggtatagact actcgactta gactaagaag gaaccgagaa agtaagccaa
+   342481 gcggcgtaga ccaataatca atgtagactt tgacacctaa caattgcact agattcaagg
+   342541 aaacctacca aggaacaatc acatcgggat tagcaagtca ggctccgcat tcgtactacc
+   342601 aagctaagca gaaagattac cggtaaatgg attcctttac ttagaagtcc actcacccga
+   342661 gcaaggaaga aaagcaatgc aaggaccaat ccaactaagg acaatcaaac aataccagca
+   342721 tgggagaaaa aggagactca tcacattcca tcagtcactc gagtcaaagc ataaagcaga
+   342781 gcaatcgtac tccactcaag aataatccca tcaaacaaaa atgggcgatt cacatttccc
+   342841 ttccctcgac ctggctctct tcaacttgtt tcgaccggct tctatattct gaatgtttca
+   342901 accaatttaa ctttcaccct tatcgttcta aatgatcttt ctctttgact tcgagtttca
+   342961 gtattcgtgt gacttatgct ccacttgatt agcgtagctg ccttagcttg ccccgacttg
+   343021 actgattggc tgggttcgct tcacctcgct cggcttagtt ggaggactac agaaagagaa
+   343081 gcatgacaag catgacgttc cttaattata tctatatata gttaactatc cgtatatcat
+   343141 agtaaggaac cgaaaagtag ttacccattc acgtacatcg gttgtgtcga atatttgtaa
+   343201 tcaatcggtc aaccagacat cacgtaacta ttcagatcca acttctagaa ataggtttca
+   343261 gtcagctcga tgaaatagac taatattgac catttatgtc agatccaata gcttgcctcg
+   343321 aatgccagca tgaaacctaa ttacgatagc ggtggcgaag acttgaagta atcacttttt
+   343381 cctggacaat cggccagcta aaggtcagac ccaataatag cataatttca cccatcagtg
+   343441 gttggtacca gtccgtaaaa gtccagcact taggtagcag ccccatcaac tgtacaatac
+   343501 cgacctaaca ggaccagtat ccaccataaa aaccgtagca taacaagcta ggttcccata
+   343561 gcatataaag gtagggccta gtctaataga ctgaagcact aatcacaagg aatcacaaat
+   343621 agcatcccat ggacaagtat ttcaccaaca tacagaggag cggaccaatt tcaacgagta
+   343681 gacaaatcca tttgaaaaga caactaaaga tttattaaat ttccggttaa cttatgatcc
+   343741 cggcctagct cgtcttttct tctataatag cttttggtca ccgctaatca tcaggtgacg
+   343801 ctggtcaaga tcagtactct tattaacttt ctgattggca ttagtataga atgggcgact
+   343861 aatataatag aagcatcaat ctaaaatcaa caatcacaag tatctacatg tttactcaat
+   343921 gcctggacca ccttaactac tatgtctagt cttcaaaatg cctggctacg ctataataaa
+   343981 ggcttgcaat gattgtgatc tcttattgta tttgatccta ttaaagtatc atgagaaagc
+   344041 tacgaatgtt actatgcacg cataaaaaga agtgagtcga atatcttgag tgagaaatat
+   344101 ggtagcttta cgtgctagaa tggaccaata cattataaaa aaggatctct ctttgaaacc
+   344161 tttatagacc ccaggcttca aaccggacct tagaaccctc caagctcaag agtacttatc
+   344221 tagacatcgc aagtcaggct caacagtcgt actacaagct cagcagcaat atgaccatgg
+   344281 aatggagtca ttgagaagaa agggagatcc tacctccaat acgagaggga atcgtacgaa
+   344341 aggaacaaac tcaacttcaa gctcttagcc cagcagcaac aaactatggg attggtccga
+   344401 gacaaagaag acagaaacag cccggcccag taccaatatc atctaacgaa ttgtacgaga
+   344461 aagctaccaa tccagcccgg ctcagcaagg cgtggaaaat gaagcacatc gggtccacat
+   344521 gggacagcgc aagttagact caagattcgt accatccacg actgagtgaa gtaggacgaa
+   344581 agcaaccaat ctaactaaac tagatagaat aatgtttcaa ctaattaaac tataaacctt
+   344641 gttcgttctg aactatctcc tttcttcgac gttcactggg ttggcgtgac tggcttaaac
+   344701 tctctgaaac ctgctattat atcaaccgat gaactaacta tctcttacat atctctatat
+   344761 agaatagatt atctataatg tttgattctt agtctactgc ccgtttaatt cccttgactt
+   344821 ggccttactt cttttccttt cttcgcttgg gcgttcctat caaaagaatc tgtcatgcca
+   344881 aagacccaat ccatgaggta gtccaccgat ccagtaaacg atggaaatgc acaagtcctg
+   344941 aaagcataag accgaagtat gagaccgagt gacgagcaaa ttcagtattt ttgtttccca
+   345001 ggaaaaaagg aaaagggggg cgagagaagg aaaggggaca cagtggtgac aaggatgact
+   345061 caggtcagtc ttccacccat aagcaccatc aggaaaaaat aaaggaaggt agggacaagc
+   345121 ataagccacg tgagcttcat tctttcctat gcaatggacc tcactgggaa ggtaataaaa
+   345181 aataaaaaaa aaaagccatc cactctctca cagacaagct aagtgaggag gagccagatg
+   345241 aggagttggg atctatacag ctgatcaatg ccatggccaa atcaaaaggg ctcatccacg
+   345301 ttgaagtcgg aatcaatggg aagaacactc gggccatcat aaagaggtct caacgagcca
+   345361 ctactttctc tcagtagagg aggcaaagac aaacggattg aagcccaata aggaaagagt
+   345421 ctgcctcaag cctgtggaaa agcccgtaca aggtctgact cgtgcggtgg agctacacat
+   345481 gggtgattgg aagggcaaga tagacctgac cctataagta caatggttgg tccctccgga
+   345541 accggtaatc tccgtttgac ttcctttcag caggtgcgca gaagtattct ttcacaagtt
+   345601 tgacgcaagt cgtacctccc agccccctta gctacttgta ctaaaaaaaa ctaggacgga
+   345661 agttgctaat cagaggtggg cagggttcct ctccacactc tcaaccttaa ttatctttct
+   345721 cctctgtggc tatagcctcg ggttcgtatg gatgaatcaa ccgactgtgg aattttcttt
+   345781 gcctttgcat ctccgtcttt cgcgtgccat ggaaatttct ttgctttctt caacccttac
+   345841 ggacctttac tttattccta tctgtgtggt tactcctagt acgcactgtt taagtatccg
+   345901 gtttccttaa ggcctagatt caatcgtatt tttgatactt gggagatatc cgacgagtac
+   345961 ctgtttggca aaccaaggaa gctgtatact caacctctgt cccgaaagta gctttaaggt
+   346021 ctaatgaccc ggatcgagta cactgattca atctttccgg tcttaacacc tctactaagt
+   346081 aagcagctcc attcaatgac gatctgtcta aggagtcttc cgtcgtcgaa gtgattcatt
+   346141 tattctgtca gtgggctcat agatcaagga gtgtgcagca gcacattgtg ctaaaagccg
+   346201 gttatcgtgt ctgcctcttt cctctctagc cgcccttcag gctggtctat ccaaacaaaa
+   346261 agcagaaaaa gaaactctag ctagctaaca aacctttgct ttctctaaga ctccgggtga
+   346321 ttgtactggc taaccttttc agaggagaaa cgccgagggc atcaagagca gaataaaaaa
+   346381 caaggcatgt tcaatgagaa ggtatgggat aaagacccaa gggatattct aacgacagat
+   346441 acgttctttc gatacggaga aggggctcca aggttcgttg agaccaaacc aatgaaacga
+   346501 ttcggtagtt aacaatcttt cgattgagct gctatcttat tgaacgcaga gagtggctac
+   346561 tgcttcaaag ctttgaaaag gagctgccct tctctatccc gtactcctat tcttcttttc
+   346621 ctctaataat acaaagcatc tgtttgtagg taaacaggtc aagcactatc ttcagaacag
+   346681 tgagaagatg cctaaagcta agaataagaa cgagggtaag aatcgacgag gaatcaataa
+   346741 gatataagat aagtgaatga caaagcgtga gtataattct caacccgaga tgaaggagga
+   346801 ggtgttagcc tacttgctgc agctgtctgc gtcgctagtt ctgccggtag ccatttggct
+   346861 gattgctgcc ggacagattt ttacctgttt acgcggatat actatctcca attaccaaga
+   346921 aaaagtagag gagaagctct gctcgacttt agtagataag atttcagaga aactggccga
+   346981 tttatttccc gtatatggta tcacgccgtc aaggaacgca ccctttccta ctattctcga
+   347041 gcaattgctc gccacagttt cgcaagagga gcgcttggcc tacctctcca atatgtataa
+   347101 cagtttgata gaaatgggta tcgacagccc ctgtttttat cccatagttc aaacatttct
+   347161 ttttctaatg ggcggcggcg gcgggcccgc ttgacctcaa ttttccatac ccctagcccc
+   347221 cggatgtagt ccttgcttag tcttctagcg gcgaaccggg taaacaacgc gatccgaata
+   347281 aaaggaagtt cgctgggatt gtatttattc attttccaga ggtactggtt cgactccagt
+   347341 tcgcgactac cgtgatttca ttttatacga gcatcaaatc aaacctgtcg gctatttctg
+   347401 tttcatctct ttctaatctt cgttgcgtgc aaattcatat ttctcgcgga attctccatt
+   347461 ttatagatct cagcaaacaa atgtacaaag tggatcgcct agcaaacgtt gctgcctatg
+   347521 atatgccaac aggttcgaaa gaagctttct gggaagccat cgggaccagg ggatattctt
+   347581 ctcgaagacc gtgcttttga tctcgtcctc atctggcata gcagttagga aagcattgtc
+   347641 atcctcggat accactggag gaatcgattc caggagttcc ggatgggctt tcctggggct
+   347701 ggaggtgtac acaacttatc aaagaattgg aatatgtact aataaatcat gttggtcatc
+   347761 ttggatttcg tataaagagt ttttcgatct atgtatttga cccgagattt ttgcttccac
+   347821 atcctatatt gaagtcacaa cgcattatgg aggtcccttt tcagctgtaa tttatccctc
+   347881 tcccgatctc tttcttctcc tgctgttcgc ttcgcttctt tctttcgtta gcttcctttc
+   347941 ttttctcctg agaaaaagat tgtttgttgt ggcatgcatg actaagccac ctttcgatag
+   348001 cctcggactc tacattaatc tggttgggag taagacaggg ttagaagcag acaagaagga
+   348061 gcagaagagg atctttgtta tctgcttact ttatgtttga gtttgatctc tgacttgtct
+   348121 aactccatct gtactttagc attcactaat ccatcacttc cctatcttat ctatcttaaa
+   348181 tccaacttgg cagtttccct gaccacccat aagaaaaaaa aagatgatta aataggcata
+   348241 atctaaagga aataagttta ttagaataag aaagaaaaac aaaactaaag aaaggatttc
+   348301 ccaatctatc ccttgcacaa aagactactt agaacgctca taaaaaacac agaagaataa
+   348361 gtcatcgaag aagtaataat ggggtaggaa gaggtaaact aactttattg ctagtctttg
+   348421 tctttctagt gtcctcaatt gtccgcctct gaatggcttt gtcagagctt tctgtcgttc
+   348481 cgttccctca ggtcttgggg gagtagagct gcttctttca actcggagga acacacctca
+   348541 ttggagaact tgttgtagtt tcaggtatga ctaggaaatg caatatacgc taggtcaatc
+   348601 cattcctata aggccggtga gccgccgtta ggccgttaag gctaattaca tgaagggatc
+   348661 caagagaagg taggtatgga attggcttta gtgctctatt aggtaaaggg gtgctttagt
+   348721 agcctcccct cactctcctt gacccgtgga caaacgccac gtgatggacg gcgcccggct
+   348781 ctgacgaaga accagtaaga ttaggtattg cctaatccgt aaggttgtca tacccttatc
+   348841 agctttctga ccaatcctcc tatcctatgg gaatgctttc ctaaactctg attgctcttg
+   348901 ccgactcgga acgaaaagat agcgagggat tgatagattc ctcgttggat tgggagacca
+   348961 actcggaaag aaagaaagag gcttggatgc tagggatagc ttgaagaacg agtaccggga
+   349021 gaaaggacct tcttttgagg aatgaggatt aggaagctta ctcgtagaaa attaagttgg
+   349081 cacctactaa accaagtgcc tgaaaagggg tagtgaaaaa ggagaaggca atctcggatt
+   349141 gttcagacac tgcctttggt tcattacccg ttgagaaggc agtagaagag gtagcattcg
+   349201 cagaccaaag caaagacggc tggggattct tcatagcata gcaagaagga agggcgtgct
+   349261 taatatataa ggactagtag aggcagtaga ggtccttcac catgaagatt gtatgctttc
+   349321 cttttttcac tcaacaatga aagctaaacg aaaccgattc caatctcttt tccattcaga
+   349381 aagagaatcg attatgtagc tcacagaagg ggtgatcaaa gaagttgact aagtgaatgg
+   349441 gaatccgata gctctaccag ccttggaaga gtagtcagag gcaattcgct aatatttaac
+   349501 cttttctatc atctttcttt cctcaagcat gaaacggagt tgagcggtag gcattccttt
+   349561 ccattacagt gggaaagcgt atgcgagtca tagagttctt ccagtaaggt ttgaggtcac
+   349621 aggtaccgat gcagtggaaa aggaaaacta ggatataaac aagacccatt tgtttcaagt
+   349681 ttagggcccc ctagtgttac aagctactgc ttgcgaagga aagaaagagt tgctaaagct
+   349741 ccatgaaaat agtctcctta agataaccat gcataaattt ctaaaagttc tgttaggttc
+   349801 ttagtagcag tcggcgacct tttcttcttt tacttcacat agcttttcgt ctccttgata
+   349861 gctggaagtt ctccaaaagt atgaaaagca ggaggacttt gtaccatcca ttccagtgta
+   349921 gttgaattca gttcaagagc ccaaggactc ggagcacatc ttttgttatt tccactgctt
+   349981 aaagtgattg ttacgaccac gaagaaacaa caaatcccaa ctacggatat ataagagcca
+   350041 aaactggaaa gggcattcca tccagcgtaa gcatccggat aatctggaat acgacgtggc
+   350101 atacctgaaa gccctaagaa atgcatagga aagaaggtca aattaacccc gaaaaaagtg
+   350161 atccaaaaat ggatttgacc taaagtttca gggtatgtcc gaccaaagat tttacccacc
+   350221 caatagtaaa atcctgcaaa taaagcaaaa acggctccca tagaaagtac ataatggaaa
+   350281 tgtgcaacca cataataagt atcatgtaga gcaatgtcta gccctgaatt tgccaggact
+   350341 attccagtga gtcctcctat ggtgaacaaa aagatgaatc ctacagcaaa taacatgggt
+   350401 gttttgtatt gtatcgaacc cccccacatg gtagcgatcc aactaaagat tttgattcca
+   350461 gtggggacag ctatgatcat ggtagctgcg gtgaagtagg cacgggtatc tacgtctaag
+   350521 cccacagtaa acatatgatg agcccaaaca agaaatccta agacaccaat actgatcatg
+   350581 gcataaacca tgcctagata cccgaagacc ggttttcccg aaaaagtcga aacgatatga
+   350641 cttatgatac cgaatccagg cagaatgaga atatacacct ctggatgacc gaagaaccaa
+   350701 aagagatgct ggtataaaat tgggtctccc cctccagcgg gatcaaaaaa ggttgtatta
+   350761 aagtttcgat cggttaataa catggtaatt gcccctgcca gtaccgggag tgataataaa
+   350821 agtaggaatg ctgtcactag aacggaccac acaaataggg gtaatctatg catagtcatt
+   350881 ccaggtccac gcatgttgaa gatagttgtt ataaaattga tagaacctaa aatggatgaa
+   350941 acaccagata gatgaagact aaaaattgct aaatcaactg ctcctccaga atgactggta
+   351001 ataccactta agggcggata gaccgtccac ccagtgccgc tacctacttc tactaaggct
+   351061 gagcttaata ggagcaagag acttggtggc aacaaccaga atgaaatatt atttaatcgt
+   351121 ggaaatgcca tgtcaggtgc acctatcaga atcggaacaa accaattacc aaatccacct
+   351181 atcatcgccg gcataaccat aaaaaagatc attaaaaaag catgagctgt tattaaaaca
+   351241 ttataaagtt gatgattccc accaagaatt tgatcgccgg gtcgtgctaa ttccatacga
+   351301 atcagtactg agaagcatgt gcccatcact ccagcaatgg caccgaaaat gaaatagaga
+   351361 gtccctatat ccttgtggtt tgtggagaac agccatcgaa ccagattttt cataaatttg
+   351421 agattctttc gtttattacc ttatcagaga ggggttaggt atttagtaac tctcgcgctt
+   351481 tcctggaacg ggttccatag tctacgagtt cttgagagtc ccgtggaacg gtcaaatttt
+   351541 tgctagtaaa ctagctcaag caggaaattc caacctagct gatggttttg ctaggaacgc
+   351601 tagctcatgg caggaaatca taccttgcct gattctcctg cgtaggagag cccaggccta
+   351661 ccattggatt cgaaccaatc agcatcgtcc tacaaaagat aatgctctca ccaattagag
+   351721 aagtaggaaa atacaacaaa cacatttcga cagaacataa ttattattat ataattgaat
+   351781 aaattctcag gaaaagagaa acagtggtgc aaactctact acaatagggt ttcacagatc
+   351841 aaaactttct tcctcgaatt gaagttttgg gagctacatt ctatattaag acaacccaga
+   351901 agccctagga ctatactcca acaagatcac ggaacggacg ttacgaccac tcaacaaagt
+   351961 ctcgacaata cacccaaata atcacaactt ttttcttcgc tacctttagt attcggataa
+   352021 acactcggac agtattgaca aatcgcgtgt gttaagcaaa tcacaagact tctcttttct
+   352081 ttgcctcatt tgaaacttct ttattaaacc tgacttcagt ctatcatttt aagaaaagga
+   352141 agactcttat taaacctgag ggaaagtata gagttatgac agaggccaca tttcatacac
+   352201 tttaggcagg tgaaacactt caaacaggaa acacttacag aggaacagga aacagctcac
+   352261 cttacagagt tacgcacaac agagaggcag cgcggggcag tcaaggaaca cattcggctt
+   352321 tcacatacaa ttactttact taccttaccc tacagtctta gcgaacctta cagatccttt
+   352381 tacaaacctg gaagcggggg aatacctatc aacgggggga cccatccgac tcttctatat
+   352441 atatgcccgg gggacacaaa gaacttcttt cgggatccca gtgcgagtca catcaatcag
+   352501 ctttctaaca accactcgta ctattttcac acaaaagatc cgccattgat cacttttcgg
+   352561 tcacaataca gacatttagc aacaggggag attagagact atcgaagtaa gcccgaccta
+   352621 ggctgaaacc agcagctaca accaagtcag ctgaaagcag tagctatatc taagctcagc
+   352681 tgaaaacagt gacacttcaa aagctagcgt tcccagttcc gatacaagac aaccaaggag
+   352741 gttcagaatt tagactatcg aaaggacctt tagccgctca tacgtaacgc ctcctcggtt
+   352801 acgccttccc tcgcacgggg cgccactact gagctacaaa caagctcggc agaagcagga
+   352861 ctaagtacgg ctgaaagcag gtataagact ttagacggag gagcccaagc taggctgaag
+   352921 gcagaccaag cggaggctga aggcatacat tcactcagag ctatacgcgt tacggcagct
+   352981 gctaccttcc ttcaaaggag ataccaaata gaggctgaaa acaaccaagt aggctgaaca
+   353041 gaacaatcta ggcagaagca gcagaccggg agtagatcct tagctctttg cccgggggcc
+   353101 ccatagaact tcttgagcgg aaactcgaaa cggggcggat ctacagaccc tctttcacgg
+   353161 tgacagtcca agtaccatca gacgggggtg caagtactca cattatcacg cggggcagag
+   353221 ggaccatctc atccagtcaa tacgggtcca ccacttcatc ctccccagcc atcgggtcag
+   353281 ccatgccaaa cagctaggac atctcaggac cggtcaacca gaggagatcc atcgccaaca
+   353341 ggggctccag tacaaacagt agggaggcgg atagaccatt tgtcagttcc aatagccaat
+   353401 cagtcaggct atttagcagt cgggacatcc ggaacattct actaaaaacc atacgataca
+   353461 gatttttatc tatttaaaac attggcatca tccgggagtg ggagtggtat acctcagact
+   353521 ttgaggacct gaaagcagca gaactagatt tggtggacct tcgtagcgga agaataagac
+   353581 tttcttgacc ttcgaagcaa cagttttacc tgacatccca gtggcagcat tctttgactt
+   353641 ggaggcccta agcgatcgtc ctacctcgaa aggagtggca gcagcggcag gaatacccaa
+   353701 gtcatagtct ttttcgggag agggctcggc atagtcttta tgcgacagta ccagcatgcc
+   353761 atagtcttta tgcgacagta ccagcatgcc atagtctttt atgcgacagt aacagcgtca
+   353821 tagtctttat tatgcggctt cagcagcacc cggttaggac ttcaaccaca gaagacccgt
+   353881 ctttgccttc acgtcagaaa caacaacgca ttgaacttcg accgcttgac ttccaacaga
+   353941 agtggatgat tctttcgcct ttcttcgttg acctgacacg acattcaaaa tggagagtcc
+   354001 caatagtcca gaggtagcag cacgatcagg gaaccaaaat ggctgtctcg cgcttcgaag
+   354061 ccacagcatt tagagggccc ttggcacatc ttgcaatgac aacacgtctc accacccaaa
+   354121 cttggttttc gacggcgaat cagcaatgtg tgtcaagcaa ccaattctgc tcctaggcca
+   354181 gttctgatct taggaattcc ctgatccaga atatctgagc cggaaggtgg cgttcaaaga
+   354241 cttaaacgag tagtgggcca aagggctcta taatgatagt tcgccctcag cttttgtcca
+   354301 ggattggcat tctcaaagga gaaagatctg atcagggaag gattgatcga tcaagatcag
+   354361 gaagaggaga acgaggaaca attaacgaaa gaaggggaaa gggctaaagc atctgcttcc
+   354421 acttccgctg cagaagaagt tgggggaggt tacaaagccc ttatttcaac agactgttaa
+   354481 ccactttaat gcctttatcc ggctttgcaa caaagtgatc ttcgatcgca ctaaggcgca
+   354541 taagacagta cacttcaccg agattcacca agtaacccta cggaattgat gttgtgacac
+   354601 ttgaaacgca tgaaaggaat ctaggaacaa aacaaagagt tgcacccctt gcatttgaat
+   354661 gccggtatac tgctccgcca ctcagggcac actaccaaag gactccaatc ctcatcattg
+   354721 ggcaattcaa gacgcaagtg gtgtgtatca tctctagcct cctcggtgat gtttcatatg
+   354781 ggtcactaga atctccatct ggggccgaag aacgtacgat caaagacttc tgaagcgtcg
+   354841 tctaaaggct ccgatctaat aagttacgtg cgtgatgtcg atgagatgtc ggcaggagat
+   354901 gtcggcaaag aaagaggaag gcgcgcagaa ggtaggtaat agatctcata ccgaacgtga
+   354961 ccgagtccac ccaaccgtac acgatccttg agtcaagtat gcgaatgaaa aagaaggagt
+   355021 aattcgagca ggaagaagaa aggctgacgc tgcagcttca acttccgctg caaaagacag
+   355081 gggatggggg agtttcaaag ccattcttat aacggattac tgtaccttat atacctttat
+   355141 tggctttttc aactaaaagt gctccgctcc actaccactc ataaggttcc gccactacaa
+   355201 tagggcacag tcataagaga atcgagagct ttatcctggc tcaggcaaac ggaactacct
+   355261 taggtctcgc tgggaaccca gctcaggaac ctaaaccaca gtgctttctt aaggagttgc
+   355321 cgagtcctac ctcgctatca aagatactaa ctgaggaact cacctatgac ccaagggaag
+   355381 agcctagaag aaggtttctc acgttgatcg tcacccctgg gtctaagaga cccccaagct
+   355441 gtctcaccgg gatcgaatcg ggctcacctc ggatcaggaa cagctacaat atacgccgct
+   355501 taaacaagca gattaaagga aagagaccca tctatctaag gctaaaagaa gggaaagaag
+   355561 ccgtagctag tcgtagaagt cagtgaaggg gaacaagcaa gccgaaccgt aggcgagcaa
+   355621 agctttcatt caacagcaga ggaagacgta gaaagaccag caaaaagaat gaacacgtgc
+   355681 caagaaccca ttccattcac ggacgcgggg aaggaggaaa gtcaaggcac aagtttctct
+   355741 attaagaacc aaggccctac tgagctacat ttactcttta caccaagaac agctcagata
+   355801 tgcgaccccg gggactgggc tttggtcttc gaccgacggc tcctttttcg gcatatgaag
+   355861 caacaataaa taaaagagaa ctacagatga acgaagggaa gaggtagatc atacagttca
+   355921 aactcaaaca acaaagacat tctaacagtt gcctcgcttt catccttata agtggacaga
+   355981 ctcatccgca ttctccgctt tgaacagagt ttgcttcttc gttcgccttc cctcacacta
+   356041 ctgagctaca accaagctcg gcagaagcag gacagagatt taagaaccgt gaagtgagcc
+   356101 caaacaaggc agaaacagca gctgcaacca aataggctga aaacagtgcg gcagcactag
+   356161 ccgtccccag taacagcagc agtacgagca ttccttagga gggtcaaacg acggactacg
+   356221 tcctatcctc aggtgaatac ttttacttca gttggatact ttgatagttc actactacga
+   356281 taggggaata ctaggctagg gaatgggaac tacttgggaa gtcatctctt tacttaacga
+   356341 actaaccact cagggattca aatccacgat actcctacca tctgcttgat ccagttgaaa
+   356401 actttactcc gacttcacat cagcccttag ggtcaacaca gctcctctta tcctttactc
+   356461 tttctatgcc tggattccta tgcctgtttt acctcgagtt ggaattcagt gggagaaatt
+   356521 ccaacaaaat tcagtgggag aaattccaac aaaattcagt gggagaaatt ctccagaaaa
+   356581 cgaaaacggc gattccaact ctacaatcaa ggcagaacgc agggaggaga agagaataga
+   356641 ttatagacgg cggaccggac ccaagctagg ctgaaggcag gactctatta agtacggcta
+   356701 aaggcagact ctattaagta cgactgaagg caaaccaagt aagtgcggtt tcaaacaaag
+   356761 gtgaaggaaa ggtacgatcg gtggacaagg aatcggttga gatcgaaacc atattcgtag
+   356821 gacgcgtacc caatagcgtt tccaggggcc tcatcccggg ggcaagccgg ggcatcagta
+   356881 cgcgctaggc ctatcactcg gagcccgggc gatctactcc tactctttat ttagaggcgg
+   356941 gaaatcttac tcttcttgag cgggggatag accaatcttc ttcagcggga aatccaatcc
+   357001 ataccttggg ggaaatacat agcttggtag acaatccata gcttggtata caatcactct
+   357061 acttcaagtg ggctaactta ctcatctttt gggattggtc tttcttcaat gtataagtgg
+   357121 agtaaggtct ttttccccac cgaaagaaaa gaaggtttcc attcccattc cagtagtaag
+   357181 aaaagatcag aggcaccgtt gcctacttcg cgggatcgcc ttacgtagaa ggacttgtga
+   357241 tccactctcg gctaagtttc tatggtcggt cctctctcga ttaatagcta actggaaaga
+   357301 gaatcctctt ttatacttaa ttatcgatca agagggaagt tcctaacgtg ttcttaggaa
+   357361 cgaaggtctg tcgaaggcgg gcgaacgtgt ctttctatgg tacgctcggt acgccttgtt
+   357421 tctattgttc tccgatagct ctatcagtga ttatttaaag tagtgctttt tgtctgttac
+   357481 cggatatgta tagaagtttc cttatggtat tcctctccgg tgttgctgaa tacacgtcac
+   357541 cggttcgact cctatttata tttagtggtc catttattga ttgatgggcg aatggagctc
+   357601 gggatgctga aaatgtaaag gccaaggcct ctctcggttg ttgctttacc tgtcctatta
+   357661 gactagctag gccctccttc ggtatgtctt gttccttggc ttctccttgt tggaattcat
+   357721 ggttgattgt aacctaaggg ttgttatatc actgtccttc tgtgggggag aaagaggggg
+   357781 tgttgctagc tcggtaggca cacaggcgcg ggttccggtt gaagagtcaa attctccttc
+   357841 ttgtttttat tgtgagaatc tttctctata ctctatataa gatgccatct gttttcttta
+   357901 ctcgtttcct ccttagcttt gaatctatct gtgactgttc tccttcttct ctaaggtctg
+   357961 ttcctttgtt ccttctttgt gccttcattt gctttctttt gtttgactgt taacttaaag
+   358021 tatctacctt cttcctttgg tttcttgtag ttctatcagt tagttcttca atggttagct
+   358081 ctgcctgagc tggagttgtc cctccagcga gagttgcttt cttccttgct tgatcggtag
+   358141 gagttgccga gatagcaggt attagctgtt agccgtcgga ctcatttaaa ggagtcctat
+   358201 cgggctacac tagtctaagg gcagttgagt aaccgattcc tacgcttcct ccgcttacct
+   358261 ccgtcgctta actattaata cgtcgcttac ccgcctccgc ttacccgcct ccgtcgcttg
+   358321 ttatctgctt cccttagcta aaacgttagg cgatagtgaa agccttcact ctcttatatt
+   358381 tacgtacacg tgggaagaag acctagtaat tgagtgacag cattcctgga ccaattctat
+   358441 aaacagccaa tcaggcatat catggtacag gtgaaagaaa cttatccaat acagatctta
+   358501 tcacctgccc atcaacgctc aaccgtctgc tcctctcgtg cgtgtaggaa taatcaaaag
+   358561 aggagacagc atgcctatta ttaagaaccc aattctacta aaaacgataa caacgattcg
+   358621 aggttctcaa tcaaggcaga aagcaagaaa cccatccagc taggaggaaa gcaaatcaag
+   358681 gagaccaatc agacatctag attctatagt acgcgtatgt gtacgtaccc aggctaaaac
+   358741 agcaaatcaa acttggctat cgtttgagag caacctatcg ctaaacacaa accaatcctt
+   358801 ttacacagtc cattgaaagg aagaaaacaa gggtggaaat tcatgttcct ttttacacag
+   358861 acctccaaaa ctcgaatagg ccaatcttac ttttcctcat tctagaggtt gcaagggaag
+   358921 aaaacaagat tggttcttta tgaaacctgg ttcaaagcaa cgaactccgc tctctcccgt
+   358981 gggcacattc ctattatagt aaacggggac ttctctcttt attcctatgc gtgctttact
+   359041 tgactcctct gaattcagaa caatgacttt taggctattt ttccccaccg gcaagaatga
+   359101 acttctatgt gttaagcaaa gaagaacaga agatctattc cagtagttag gtacaatctt
+   359161 ggacggccgg ggacctactt cgcgggaccg cctaaaagga cttggtccac acgcggcgaa
+   359221 gtttctttgg gcggcgtttt atcgattgat atctaaagtg actggaattc gaatcttctt
+   359281 atataaggaa ttatcgatta taaacttctt atataaggaa ttatcgatta taaacgagga
+   359341 tgaaagaagt taggaataaa aacccgatgt tgtgttctca tgcccggtcg aagtccaagg
+   359401 ccaacccggt ctgttgccaa tgagttgtga ggaagtccac cgtcaagtgc gggatgtgag
+   359461 gccacgtcaa ctgtgtatgt tgctgcgaag gtcaaccaag tcgaggaatg cgctgcggaa
+   359521 aaaataaaag tcactgagga tttgccttcg cggataccaa ttgcttggat ggatgcgttt
+   359581 gaaagcctcg agatgacttg cagaaaaaat gctttctagg tttgtcttgt gtctccgtaa
+   359641 caaatggtcc atttattgat taatgggcga acgacgggaa ttgaacccgc gcatggtgga
+   359701 ttcacaatcc actgccttga tccacttggc tacatccgcc cctaccctcc ccgcccttaa
+   359761 ttttgtttca ctgtcataaa acctattaaa caactcattt acatttgtat cttagcacca
+   359821 aactaaaatg aaattattct ctgcctaatc attaataatc attaatgaag gggcccgcct
+   359881 caccactcca cttccccttt ttatggaagc aaccaaactc accatgacaa gggccagcgc
+   359941 tcttacagcg aaaggctcct tttctctagc caaaaacatt atgccatcaa gccaagccag
+   360001 ctcccgaatg tctttaatcg agagaggtac agaatagtcg aagaaagtaa gcaactgcga
+   360061 caagaaagga cttataagat aacatggaaa atgagatttc acttctagtt atccgccaag
+   360121 gaaatgccgg aggaattcta tatatagctg agcgatgacc gtttcgttgt gcatcttgga
+   360181 tacagtaccg aaggtctgct ctactaaaac tttgtcataa tgagagcggg gatcatgaag
+   360241 attgaaacta ttcatgttgt tttctataaa ctccaaggct tctggtctta tggcccaggg
+   360301 tgattgagat caaattttga ttaggcgcgc atccggttcg tttaccaata aagtaaaaag
+   360361 tttattttgt aaatgtgctt tccgctccat tattataatg tcgaatagct cgttgtttag
+   360421 ggcactttga tagtgatgaa tgttgactgc gtggtccaaa taatctctga aaaaggtttc
+   360481 gtatttaccg aagtggaggc aatccatgaa ctaatttttg aggtttcgta tgcgcgaaaa
+   360541 gagtttcgct ctcaacttcg acgttctctg ccctatattg tccagccagg gatgtaaatc
+   360601 aagttggctg gttaaaaaca ttggtccagc ttaagcttga cgttgaatgg gaatttagag
+   360661 ttccaatcca gctagaagaa gcgttgttag aagcggcaaa agctatgaga ttcgaatcat
+   360721 aggcaaggag ttgtgtcaag aaaaaacaaa agacaatcga aatggacaat cgacggatgg
+   360781 caaaaagatc caataactga gaaagacaaa acaaagattc ttttcgataa aatacaaata
+   360841 agacaagata tatgagcaag aaatctgcca aaggtcttcg agattttgta tttatatgtc
+   360901 gctctattaa cagagccgtg tataccggaa aaaaaaataa aagcgttatc gggtttgaac
+   360961 aactgagttg ttataatgtt ctttttcatt ttaataaagt gactccgtcg cgatgttttt
+   361021 tttcctcttc ttatgaaata tgaaattttc attttatttc ttcaccgggc ttggaccatg
+   361081 tctcccgaac aatctcagta catatggcgc aagacgattc cacatatcga ggtcggaatg
+   361141 ggatcgggtg ttttcacgtc tcaccgtagt gcccggtttg tcttgatttc cgattgatga
+   361201 acaagaagga aaatggaact attggattta gttgtgactc gcccccggct acctgtccaa
+   361261 aggaccaaaa gcccgcccgc cgccagtggg tgggaaggaa gctagccccc tatcttggtt
+   361321 gggggaagag gaacgaagtc catcgcgaag gattcaatcc agccacaggt tcccctacgg
+   361381 ctaccttgtt acgacttcac cccagtcgaa gaccccaccg tggtatgcgc caataagacc
+   361441 accaaaagcc tttgtggcac tagtggtaca cagaagtcat gggtgatcat tggtccgatg
+   361501 cttcgggcga aaccaattcc cagggtgtga cgggcggtgt gtacagggcc cgggtacata
+   361561 ttcaccgcgg catgctgatc cgcgattact agcgattcca acttcatgtt cccgagttgc
+   361621 agagaacaat ccgaactgag gcaatctttc cggattcgct ccgccttaca gccttgcttc
+   361681 ccattgtaat tgccattgta gcacgtgtgt ggcccagccc ataagggcca tgcggacttg
+   361741 acgtcatccc caccttcctc cagtatatca ctggcagtcc ctcgtgagtg cggcacgcac
+   361801 ctttttgttt gtttcggagc cgttttggca gggcgtacta aacccactta cttcgtccca
+   361861 caccaccggg cggctcgcct gaatgccgag tctttctccg ccgccaactc gacgtcgtcg
+   361921 tcacctgggt caaaaacttg actttactaa acaagcgaga aaagcccttt cgcacttctt
+   361981 agtaaagcgc ggcgctagct gcaatcaaac tcaagcgcag actagaaagg gcttggaaag
+   362041 gcgccgtctc ccttcttact gagagcagag ctagttgctg tcactcaatt actaggtctg
+   362101 gcacgtcact cggctccttg gctcacttcg gttttcaagc ctttctcctt aggcgcatgt
+   362161 ctgagcaaca caagacgagg gtttcgctcg ttataggact tgaccaaaca tctcacgaca
+   362221 cgagctgacg acagccatgc agcacctgta tgaaagtgag taccatcccg ttaaggatag
+   362281 gttttgttgt tcatatgtca agggctggta aggttttgcg cgttgtatcg aattaaacca
+   362341 catgctccac cgcttgtgca ggcccccgtc aattcctttg agttttggtc ttgcgaccgt
+   362401 actccccagg cggagtgttc acgcgttagc tgagcccctg atctgcgtag accaagggcg
+   362461 aacactcatc gtttacggca tggactacca gggtatctaa tcccgttcgc tcccccatgc
+   362521 tttcgcaccc ccagcgtcgg tagggaccca gagagctgcc ttcgcttttg gcgttccttc
+   362581 gtagatctgt agatttcacc cctccacacg aaattccact ctcctctgtc tcactcaagt
+   362641 gaattggttt cgaaagcatt ccgccacttt ttggcgactt tcactttcaa cccgattcac
+   362701 cgcctacgtg ccctttacgc ccagtcattc cgaagaacac ttgccccccc cgttttaccg
+   362761 cggctgctgg cacggagtta gccggggctt cttcctcgag tcctgtcatg atcgcgcact
+   362821 cgacgaaaga gctttacaag cggcattgcc tttcttcact cacgcgatat tgctggatcg
+   362881 gctttcgccc attgtccaag attccccact gctgcccccc gtgggagtcc gggccgtgtc
+   362941 tcagtcccag tgtggctgat catccgaaaa gaccagctaa gcatcattgg cttggtcagc
+   363001 ctttacctaa ccaactacct aatactacgc aggctcatca aacagcgctt tttagctttc
+   363061 ttcaggattt ggcccgaact gttcggcaga ttcccacgcc ttacgcaccc gttcgccact
+   363121 ttgttctcaa ctcttcccgc ctcctgggcg agacaagcta ccttgagcta ggagcctctt
+   363181 ttccttctgc ctagctcccc gagaacaacg ttcgacttgc atgtgttaag catatagcta
+   363241 gcgttccttc tgagccagga tcaaactctt cttttgacta tgattgggcc ctgcagtggt
+   363301 agaacctcgt gaaccgggcg tactacttcc caaccttctg tggacctttc ttctcttatt
+   363361 caattccact ttgtttagtt tagtgatagt tagaggttag agaagagagc tagatcactc
+   363421 ctctaagcag ccttctgatt atatacgtat tattctatca atcgataagc aagggtaggg
+   363481 ttcctggatc ggtctcgacc agaagaccga gaggtacttt tgtcacgagc tggcttaacc
+   363541 cattccctta acaccaagcc tggataagca actgaaaagt caagtagaat ccctggcttg
+   363601 tgtaggcgag ctaaacacaa atctttcttt tcattttata tagatggaaa agtaagaaat
+   363661 cctattttcc ttcgtaagag cacatcttac atcgcaaact ttctctttat ataccgagga
+   363721 tttgatgaaa taccatttta gctctatgga gccgtggtgg aaaagagaat tttcattttg
+   363781 tataccggcg atttatatca aaatggcatc aatctcactt tttcaaaata gttggctgaa
+   363841 gatgaaacat ctccctagtt gcctattcac ccagacaacc aacactttag gcatttacag
+   363901 gaaaaaaaaa cctaaccata gtcgtgacaa ccctagaatc aactcaaacc tttcgaccaa
+   363961 ctatgcccaa gctaagtcag ttgagagatc aagatccaat agcctcaact caggcccaaa
+   364021 cccattagag aatgccacgt agagaaccat cgggcacata aacttctata ttccacccga
+   364081 tgagactaat gctgtcactc gaaaccaagc tgctcaaact aatgctagct aggctgtgaa
+   364141 ctttgaaggg gtagatagca atccaagtct tatacccgaa atgctgcaaa taacttgaga
+   364201 agtgcctaga gagttcctac gaaaagatac tacatcatca tcgataagct gagctccacc
+   364261 cgcataagca tttccatgtt agagctgctt aaaactttgc ttgtgcgtag ggatgctttc
+   364321 gtgagagaag tgctactctc ggtttggatc aaaagaatat actgctgaac ctattgactg
+   364381 actactttat gcaactactc gctataaagc cttaaagcta gctctcacta acaaagccag
+   364441 tgaaatctct attagcccat agggtatact ctcaaccatt acttgccctt tactagattg
+   364501 tctgaaactt gtcttattgc tgctttgtct taggataacc accctgcaaa ggcatgggga
+   364561 aagctatagg catcagcaga aaggcaagca aaggcgtagg tagccaaatg gtctagtccc
+   364621 tgccagagaa caactgcagc tgagacgaga gctgacaagt atgtaacacc tacttccacc
+   364681 ccatcgggga gtgaaataga acatgaaacc gtaagctccc aagcagtggg aggagccctg
+   364741 ggctctgacc gcgtgcctgt tgaagaatga gccggcgact cataggcagt ggcttggtta
+   364801 agggaaccca ccggagccgt agcgaaaggc gagtcttcat agggcaattg tcactgctta
+   364861 tggacccgaa cctgggtgat ctatctatga ccaggatgaa gcttgggtga aactaagtgg
+   364921 aggtccgaac cgactgatgt tgaaaaatca gcggatgagt tgtggttagg ggtgaaatgc
+   364981 cactcgaacc cagagctagc tggttctccc cgaaatgcgt tgaggcgcag cagttgactg
+   365041 gacatctagg ggtaaagcac tgtttcggtg cgggccgcga gagcggtacc aaatcgaggc
+   365101 aaactctgaa tactagatat gacctcaaaa taacaggggt caaggtcggc cagtgagacg
+   365161 gtgggggata agcttcatcg tcgagaggtt gtttacatac tcgactaaaa ggagaggtcc
+   365221 aaaatcgact gataaaggag aggtccggag gtattaacta gttgatgagg tctcgacgag
+   365281 cgactgaaag gttcacctag tgctcgagag agaattgggt cgcgcacctt ctgaacccga
+   365341 ccaccaccta taagacgacg tcaagaaagc caactctcag gccaagagaa catacctcgc
+   365401 ctaatagact ggaggaggtg tcccgtaccc ttagaatcgt ctaataaaaa aaatgtgctt
+   365461 accttcatga gagctgacat tacttctttt gatatcgatg ccttagtcag gctaactata
+   365521 tgcgatgcgt caaacataaa atgaaattca tcctaccgaa accagaccag agaaaagagg
+   365581 atctccggag ctcaaccatc gaatcgggaa agcactcttt ccagcttaaa acagagcaca
+   365641 gaaaggacac tcaaagcagg tccgtctgta tttcattata gtagtgatcc ccttctcttg
+   365701 atcaattaga caagaacttt actttgttaa agacataaaa ggaggtccca ctatgactat
+   365761 tagcggttcc tagcaggcat gggagaagag caggaagaga aagaaaaaag agacaaagat
+   365821 agactagcca cttccttatt atacgaaaag tttttgattg atgagttctc accttctctc
+   365881 atggagtagg tagatgagac agaagggaaa aggagtgacc cctactatga ctcatctcag
+   365941 ttactcttca tcacaggaca agatacgagc agaatcagag gaaaatcgaa caactacaag
+   366001 ctaagatgaa aaatctaagt gaaagactaa ctgataggcc ctggctggac cgatggaatg
+   366061 tttcaagtaa agggctaact cttaactagg ctagcgaagg aactgacatt agggaaagaa
+   366121 ctcctgactc tcggggaaag atctctctct ttaatacgag attccctaac cctgaagcat
+   366181 ttagcaaact cagcaaaacc tgaatgggga tcaaggactt ttgataactt attttgaccc
+   366241 ccaatccacc caaagcagat tcatagactt tggcgacttc ctgatcggca atgacaacat
+   366301 catcaccgag taccgcgtac gatgtaaagc gtacaccagg atgcacctgt ttcgcacacc
+   366361 accacactaa tatatggtgt gatagcgcga aagtaggcca agaaccgtga tatcccaatg
+   366421 gctgccctgc cacaaagcat acttgagaga accttcgttt aagtttaaca aaaggcacct
+   366481 caaagatatt gcatgcaaat gcagaattaa ccacacttga ggcaaaggag cggtcaaata
+   366541 ggtactgcac cacctcaaac agaaagacta aaggccaacg atcagtggcc gactttaagt
+   366601 caaaggagaa agagtgccta ctgccaacca gtcgatcaaa aggctgtgtt tggttaaaag
+   366661 tcccatcttg ggggagacgt cttctccgcc ccccaaacat ggaaggggtg taacaaccta
+   366721 ttattaacgt agttcccaat ggcgaataga aggcttttgc cacctccctc aacaacctga
+   366781 cctagtcggc ccatcctccg aggtttttgt tctgctgata tgatagctct tttaaagaag
+   366841 aggtctcaac gagcctcctg aatttaattt catccataca ggagaatgca tcattcttat
+   366901 cgcagaatgg aaacaaaccg gatt
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NC_006346.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NC_006346.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NC_006346.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,57 @@
+LOCUS       NC_006346              21657 bp    DNA     circular VRT 12-JAN-2005
+DEFINITION  Bolitoglossa n. sp. RLM-2004 mitochondrion, complete genome.
+ACCESSION   NC_006346
+VERSION     NC_006346.1  GI:53686544
+KEYWORDS    .
+SOURCE      mitochondrion Bolitoglossa n. sp. RLM-2004 (mushroomtongue
+            salamander)
+  ORGANISM  Bolitoglossa n. sp. RLM-2004
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Amphibia; Batrachia; Caudata; Salamandroidea; Plethodontidae;
+            Plethodontinae; Bolitoglossini; Bolitoglossa.
+REFERENCE   1  (bases 1 to 21657)
+  AUTHORS   Mueller,R.L., Macey,J.R., Jaekel,M., Wake,D.B. and Boore,J.L.
+  TITLE     Morphological homoplasy, life history evolution, and historical
+            biogeography of plethodontid salamanders inferred from complete
+            mitochondrial genomes
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 101 (38), 13820-13825 (2004)
+   PUBMED   15365171
+REFERENCE   2  (bases 1 to 21657)
+  AUTHORS   .
+  CONSRTM   NCBI Genome Project
+  TITLE     Direct Submission
+  JOURNAL   Submitted (01-OCT-2004) National Center for Biotechnology
+            Information, NIH, Bethesda, MD 20894, USA
+REFERENCE   3  (bases 1 to 21657)
+  AUTHORS   Mueller,R.L., Macey,J.R., Jaekel,M., Wake,D.B. and Boore,J.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (18-AUG-2004) Museum of Vertebrate Zoology and
+            Evolutionary Genomics, University of California at Berkeley and DOE
+            Joint Genome Institute, 3101 Valley Life Sciences Bldg., Berkeley,
+            CA 94720-3160, USA
+COMMENT     REVIEWED REFSEQ: This record has been curated by NCBI staff. The
+            reference sequence was derived from AY728235.
+FEATURES             Location/Qualifiers
+     source          1..21657
+                     /organism="Bolitoglossa n. sp. RLM-2004"
+                     /organelle="mitochondrion"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:291262"
+                     /common="mushroomtongue salamander"
+     tRNA            1..64
+                     /product="tRNA-Phe"
+     rRNA            65..96
+                     /product="s-rRNA"
+     D-loop          152..216
+                     /note="putative control region"
+ORIGIN      
+        1 gcaagtgtag tttataaaaa catagcactg aaaatgctaa aataaatatt atatttcact
+       61 agcaatagat ttggtcctaa tctttttatt aattataact ataattatac atgcaagttt
+      121 caccacccca gtgagtaagc cccacatccc cgcttagtga tgaaggagct ggtatcaggc
+      181 atacacaccc aaaacaccac gcttagccac acctacacag gaactcagca gtaattaaca
+      241 ttaaaccata agtgaaaact tgatttagta atagtattta aagttggtaa atctcgtgcc
+      301 agccaccgcg gttacacgaa agacttaaat taatatatgc ggcccaaagg acagtttaag
+      361 gttttatagg aacttaagag cctttaaggc cgttatacgt ttaaaaggtc aaaaacacaa
+      421 atcaccccac caacttgaat ctgttaaagc cgggggacac actgggatta gataccccac
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NC_006511-short.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NC_006511-short.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NC_006511-short.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+LOCUS       NC_006511            4585229 bp    DNA     circular BCT 03-DEC-2005
+DEFINITION  Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
+            9150, complete genome.
+ACCESSION   NC_006511
+VERSION     NC_006511.1  GI:56412276
+KEYWORDS    .
+SOURCE      Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
+            9150
+  ORGANISM  Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
+            9150
+            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
+            Enterobacteriaceae; Salmonella.
+REFERENCE   1  (bases 1 to 4585229)
+  AUTHORS   McClelland,M., Sanderson,K.E., Clifton,S.W., Latreille,P.,
+            Porwollik,S., Sabo,A., Meyer,R., Bieri,T., Ozersky,P., McLellan,M.,
+            Harkins,C.R., Wang,C., Nguyen,C., Berghoff,A., Elliott,G.,
+            Kohlberg,S., Strong,C., Du,F., Carter,J., Kremizki,C., Layman,D.,
+            Leonard,S., Sun,H., Fulton,L., Nash,W., Miner,T., Minx,P.,
+            Delehaunty,K., Fronick,C., Magrini,V., Nhan,M., Warren,W.,
+            Florea,L., Spieth,J. and Wilson,R.K.
+  TITLE     Comparison of genome degradation in Paratyphi A and Typhi,
+            human-restricted serovars of Salmonella enterica that cause typhoid
+  JOURNAL   Nat. Genet. 36 (12), 1268-1274 (2004)
+   PUBMED   15531882
+REFERENCE   2  (bases 1 to 4585229)
+  AUTHORS   .
+  CONSRTM   NCBI Genome Project
+  TITLE     Direct Submission
+  JOURNAL   Submitted (08-DEC-2004) National Center for Biotechnology
+            Information, NIH, Bethesda, MD 20894, USA
+REFERENCE   3  (bases 1 to 4585229)
+  AUTHORS   McClelland,M., Sanderson,K.E., Clifton,S.W., Latreille,P.,
+            Porwollik,S., Sabo,A., Meyer,R., Bieri,T., Ozersky,P., McLellan,M.,
+            Harkins,R., Wang,C., Nguyen,C., Burghoff,A., Elliott,G.,
+            Kohlberg,S., Strong,C., Ali,J., Dante,M., Du,F., Layman,D.,
+            Leonard,S., Sun,H., Fulton,L., Nash,W., Miner,T., Delehaunty,K.,
+            Fronick,C., Florea,L., Spieth,J., Nhan,M. and Wilson,R.K.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (01-OCT-2004) Genome Sequencing Center, Washington
+            University, 4444 Forest Park Ave., St. Louis, MO 63108, USA
+COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final
+            NCBI review. The reference sequence was derived from CP000026.
+            COMPLETENESS: full length.
+FEATURES             Location/Qualifiers
+     source          1..4585229
+                     /organism="Salmonella enterica subsp. enterica serovar
+                     Paratyphi A str. ATCC 9150"
+                     /mol_type="genomic DNA"
+                     /strain="ATCC 9150"
+                     /db_xref="ATCC:9150"
+                     /db_xref="taxon:295319"
+     gene            190..255
+                     /gene="thrL"
+                     /locus_tag="SPA0001"
+                     /db_xref="GeneID:3175747"
+     CDS             190..255
+                     /gene="thrL"
+                     /locus_tag="SPA0001"
+                     /note="similar to Salmonella typhi CT18 thr operon leader
+                     peptide"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="thr operon leader peptide"
+                     /protein_id="YP_149352.1"
+                     /db_xref="GI:56412277"
+                     /db_xref="GeneID:3175747"
+                     /translation="MNRISTTTITTITITTGNGAG"
+                     /transl_table=11
+                     /product="putative major fimbrial subunit"
+                     /protein_id="YP_153436.1"
+                     /db_xref="GI:56416361"
+                     /db_xref="GeneID:3177061"
+                     /translation="MRRLYLALILLFAYSSHGYASCRRSGNEGAITITPPSQLVVDSH
+                     AYTAGEVLWQSGWVSTSEVTMDGCSRDYKVGFLYEPGSAQSNTSATINANDGNNTPVF
+                     STGISGVDIAIKTQTNAGPYDNVMPIDNTYHNGDGNKTHHAMAPAYNVELVALGGPIT
+                     SDTATFQSPLARVSFRDSATEDSGGDILTHLYLGNTQLIMKAMGCRVETPAITVDLGS
+                     VNLGSFANSQTAGTGEQDILLTCEQGTAISASLSAQPASGNNPDNSVIQLSNASAPTS
+                     ATGVGVQLGIQAPDAGFFTDSLPINQKIDLFTHTITTNADGSQTVNGGTMNMSTTLKI
+                     SARYYKTAATVTAGQANATATLNLTYN"
+     gene            4584519..4585205
+                     /gene="lasT"
+                     /locus_tag="SPA4410"
+                     /db_xref="GeneID:3177689"
+     CDS             4584519..4585205
+                     /gene="lasT"
+                     /locus_tag="SPA4410"
+                     /note="similar to Salmonella typhi CT18 putative RNA
+                     methyltransferase"
+                     /codon_start=1
+                     /transl_table=11
+                     /product="putative RNA methyltransferase"
+                     /protein_id="YP_153444.1"
+                     /db_xref="GI:56416369"
+                     /db_xref="GeneID:3177689"
+                     /translation="MRVTIVLVAPARAENIGAAARAMKTMGFTDLRIVDSQAHLEPAT
+                     RWVAHGSGDIIDNIEVFHTLADALHDVDFTVATTARSRAKFHYYASPAELVPLLQEKS
+                     RWMRHAALVFGREDSGLTNDELALADVLTGVPMAADYPSLNLGQAVMVYCYQLAGLMQ
+                     QATESVDIADESQLQALRARLLRLLTTLEAADDHKLTDWLQQRIGLLGQRDTVMLHRL
+                     VHDIEKKLTK"
+ORIGIN      
+        1 agagattacg tctggtttca agagatcata acagagggaa ttggttgaaa ataaatatat
+       61 cgccagcagc acatgaacaa atttcggaat gtgatcaatt taaaaattta ttgacttagg
+      121 cgggcagata ctttaaccaa tataggaata caagacagac aaataaaaat gacagagtac
+      181 acaacatcca tgaaccgcat cagcaccacc accattacca ccatcaccat taccacaggt
+      241 aacggtgcgg gctgacgcgt acaggaaaca cagaaaaaag cccgcacctg aacagtgcgg
+      301 gctttttttt cgaccagaga tcacgaggta acaaccatgc gagtgttgaa gttcggcggt
+      361 acatcagtgg caaatgcaga acgttttctg cgtgttgccg atattctgga aagcaatgcc
+      421 aggcaagggc aggtagcgac cgtactttcc gcccccgcga aaattaccaa ccatctggtg
+      481 gcgatgattg aaaaaactat cggcggtcag gatgctttgc cgaatatcag cgatgccgaa
+      541 cgtatttttt ctgacctgct cgcaggactt gccagcgcgc agccgggatt cccgcttgca
+      601 cggttgaaaa tggttgtcga acaagaattc gctcagatca aacatgtttt gcatggtatc
+      661 agcctgctgg gtcagtgccc ggacagcatc aacgccgcgc tgatttgccg tggcgaaaaa
+      721 atgtcgatcg cgattatggc gggacttctg gaggcgcgtg ggcatcgcgt cacggtgatc
+      781 gatccggtag aaaaactgct ggcggtgggc cattaccttg agtctaccgt cgatatcgcg
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NM_002253.tseq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NM_002253.tseq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NM_002253.tseq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!DOCTYPE TSeqSet PUBLIC "-//NCBI//NCBI TSeq/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd">
+<TSeqSet>
+<TSeq>
+  <TSeq_seqtype value="nucleotide"/>
+  <TSeq_gi>11321596</TSeq_gi>
+  <TSeq_sid>ref|NM_002253.1|</TSeq_sid>
+  <TSeq_taxid>9606</TSeq_taxid>
+  <TSeq_orgname>Homo sapiens</TSeq_orgname>
+  <TSeq_defline>Homo sapiens kinase insert domain receptor (a type III receptor tyrosine kinase) (KDR), mRNA</TSeq_defline>
+  <TSeq_length>5830</TSeq_length>
+  <TSeq_sequence>ACTGAGTCCCGGGACCCCGGGAGAGCGGTCAGTGTGTGGTCGCTGCGTTTCCTCTGCCTGCGCCGGGCATCACTTGCGCGCCGCAGAAAGTCCGTCTGGCAGCCTGGATATCCTCTCCTACCGGCACCCGCAGACGCCCCTGCAGCCGCCGGTCGGCGCCCGGGCTCCCTAGCCCTGTGCGCTCAACTGTCCTGCGCTGCGGGGTGCCGCGAGTTCCACCTCCGCGCCTCCTTCTCTAGACAGGCGCTGGGAGAAAGAACCGGCTCCCGAGTTCTGGGCATTTCGCCCGGCTCGAGGTGCAGGATGCAGAGCAAGGTGCTGCTGGCCGTCGCCCTGTGGCTCTGCGTGGAGACCCGGGCCGCCTCTGTGGGTTTGCCTAGTGTTTCTCTTGATCTGCCCAGGCTCAGCATACAAAAAGACATACTTACAATTAAGGCTAATACAACTCTTCAAATTACTTGCAGGGGACAGAGGGACTTGGACTGGCTTTGGCCCAATAATCAGAGTGGCAGTGAGCAAAGGGTGGAGGTGACTGAGTGCAGCGATGGCCTCTTCTGTAAGACACTCACAATTCCAAAAGTGATCGGAAATGACACTGGAGCCTACAAGTGCTTCTACCGGGAAACTGACTTGGCCTCGGTCATTTATGTCTATGTTCAAGATTACAGATCTCCATTTATTGCTTCTGTTAGTGACCAACATGGAGTCGTGTACATTACTGAGAACAAAAACAAAACTGTGGTGATTCCATGTCTCGGGTCCATTTCAAATCTCAACGTGTCACTTTGTGCAAGATACCCAGAAAAGAGATTTGTTCCTGATGGTAACAGAATTTCCTGGGACAGCAAGAAGGGCTTTACTATTCCCAGCTACATGATCAGCTATGCTGGCATGGTCTTCTGTGAAGCAAAAATTAATGATGAAAGTTACCAGTCTATTATGTACATAGTTGTCGTTGTAGGGTATAGGATTTATGATGTGGTTCTGAGTCCGTCTCATGGAATTGAACTATCTGTTGGAGAAAAGCTTGTCTTAAATTGTACAGCAAGAACTGAACTAAATGTGGGGATTGACTTCAACTGGGAATACCCTTCTTCGAAGCATCAGCATAAGAAACTTGTAAACCGAGACCTAAAAACCCAGTCTGGGAGTGAGATGAAGAAATTTTTGAGCACCTTAACTATAGATGGTGTAACCCGGAGTGACCAAGGATTGTACACCTGTGCAGCATCCAGTGGGCTGATGACCAAGAAGAACAGCACATTTGTCAGGGTCCATGAAAAACCTTTTGTTGCTTTTGGAAGTGGCATGGAATCTCTGGTGGAAGCCACGGTGGGGGAGCGTGTCAGAATCCCTGCGAAGTACCTTGGTTACCCACCCCCAGAAATAAAATGGTATAAAAATGGAATACCCCTTGAGTCCAATCACACAATTAAAGCGGGGCATGTACTGACGATTATGGAAGTGAGTGAAAGAGACACAGGAAATTACACTGTCATCCTTACCAATCCCATTTCAAAGGAGAAGCAGAGCCATGTGGTCTCTCTGGTTGTGTATGTCCCACCCCAGATTGGTGAGAAATCTCTAATCTCTCCTGTGGATTCCTACCAGTACGGCACCACTCAAACGCTGACATGTACGGTCTATGCCATTCCTCCCCCGCATCACATCCACTGGTATTGGCAGTTGGAGGAAGAGTGCGCCAACGAGCCCAGCCAAGCTGTCTCAGTGACAAACCCATACCCTTGTGAAGAATGGAGAAGTGTGGAGGACTTCCAGGGAGGAAATAAAATTGAAGTTAATAAAAATCAATTTGCTCTAATTGAAGGAAAAAACAAAACTGTAAGTACCCTTGTTATCCAAGCGGCAAATGTGTCAGCTTTGTACAAATGTGAAGCGGTCAACAAAGTCGGGAGAGGAGAGAGGGTGATCTCCTTCCACGTGACCAGGGGTCCTGAAATTACTTTGCAACCTGACATGCAGCCCACTGAGCAGGAGAGCGTGTCTTTGTGGTGCACTGCAGACAGATCTACGTTTGAGAACCTCACATGGTACAAGCTTGGCCCACAGCCTCTGCCAATCCATGTGGGAGAGTTGCCCACACCTGTTTGCAAGAACTTGGATACTCTTTGGAAATTGAATGCCACCATGTTCTCTAATAGCACAAATGACATTTTGATCATGGAGCTTAAGAATGCATCCTTGCAGGACCAAGGAGACTATGTCTGCCTTGCTCAAGACAGGAAGACCAAGAAAAGACATTGCGTGGTCAGGCAGCTCACAGTCCTAGAGCGTGTGGCACCCACGATCACAGGAAACCTGGAGAATCAGACGACAAGTATTGGGGAAAGCATCGAAGTCTCATGCACGGCATCTGGGAATCCCCCTCCACAGATCATGTGGTTTAAAGATAATGAGACCCTTGTAGAAGACTCAGGCATTGTATTGAAGGATGGGAACCGGAACCTCACTATCCGCAGAGTGAGGAAGGAGGACGAAGGCCTCTACACCTGCCAGGCATGCAGTGTTCTTGGCTGTGCAAAAGTGGAGGCATTTTTCATAATAGAAGGTGCCCAGGAAAAGACGAACTTGGAAATCATTATTCTAGTAGGCACGGCGGTGATTGCCATGTTCTTCTGGCTACTTCTTGTCATCATCCTACGGACCGTTAAGCGGGCCAATGGAGGGGAACTGAAGACAGGCTACTTGTCCATCGTCATGGATCCAGATGAACTCCCATTGGATGAACATTGTGAACGACTGCCTTATGATGCCAGCAAATGGGAATTCCCCAGAGACCGGCTGAAGCTAGGTAAGCCTCTTGGCCGTGGTGCCTTTGGCCAAGTGATTGAAGCAGATGCCTTTGGAATTGACAAGACAGCAACTTGCAGGACAGTAGCAGTCAAAATGTTGAAAGAAGGAGCAACACACAGTGAGCATCGAGCTCTCATGTCTGAACTCAAGATCCTCATTCATATTGGTCACCATCTCAATGTGGTCAACCTTCTAGGTGCCTGTACCAAGCCAGGAGGGCCACTCATGGTGATTGTGGAATTCTGCAAATTTGGAAACCTGTCCACTTACCTGAGGAGCAAGAGAAATGAATTTGTCCCCTACAAGACCAAAGGGGCACGATTCCGTCAAGGGAAAGACTACGTTGGAGCAATCCCTGTGGATCTGAAACGGCGCTTGGACAGCATCACCAGTAGCCAGAGCTCAGCCAGCTCTGGATTTGTGGAGGAGAAGTCCCTCAGTGATGTAGAAGAAGAGGAAGCTCCTGAAGATCTGTATAAGGACTTCCTGACCTTGGAGCATCTCATCTGTTACAGCTTCCAAGTGGCTAAGGGCATGGAGTTCTTGGCATCGCGAAAGTGTATCCACAGGGACCTGGCGGCACGAAATATCCTCTTATCGGAGAAGAACGTGGTTAAAATCTGTGACTTTGGCTTGGCCCGGGATATTTATAAAGATCCAGATTATGTCAGAAAAGGAGATGCTCGCCTCCCTTTGAAATGGATGGCCCCAGAAACAATTTTTGACAGAGTGTACACAATCCAGAGTGACGTCTGGTCTTTTGGTGTTTTGCTGTGGGAAATATTTTCCTTAGGTGCTTCTCCATATCCTGGGGTAAAGATTGATGAAGAATTTTGTAGGCGATTGAAAGAAGGAACTAGAATGAGGGCCCCTGATTATACTACACCAGAAATGTACCAGACCATGCTGGACTGCTGGCACGGGGAGCCCAGTCAGAGACCCACGTTTTCAGAGTTGGTGGAACATTTGGGAAATCTCTTGCAAGCTAATGCTCAGCAGGATGGCAAAGACTACATTGTTCTTCCGATATCAGAGACTTTGAGCATGGAAGAGGATTCTGGACTCTCTCTGCCTACCTCACCTGTTTCCTGTATGGAGGAGGAGGAAGTATGTGACCCCAAATTCCATTATGACAACACAGCAGGAATCAGTCAGTATCTGCAGAACAGTAAGCGAAAGAGCCGGCCTGTGAGTGTAAAAACATTTGAAGATATCCCGTTAGAAGAACCAGAAGTAAAAGTAATCCCAGATGACAACCAGACGGACAGTGGTATGGTTCTTGCCTCAGAAGAGCTGAAAACTTTGGAAGACAGAACCAAATTATCTCCATCTTTTGGTGGAATGGTGCCCAGCAAAAGCAGGGAGTCTGTGGCATCTGAAGGCTCAAACCAGACAAGCGGCTACCAGTCCGGATATCACTCCGATGACACAGACACCACCGTGTACTCCAGTGAGGAAGCAGAACTTTTAAAGCTGATAGAGATTGGAGTGCAAACCGGTAGCACAGCCCAGATTCTCCAGCCTGACTCGGGGACCACACTGAGCTCTCCTCCTGTTTAAAAGGAAGCATCCACACCCCAACTCCCGGACATCACATGAGAGGTCTGCTCAGATTTTGAAGTGTTGTTCTTTCCACCAGCAGGAAGTAGCCGCATTTGATTTTCATTTCGACAACAGAAAAAGGACCTCGGACTGCAGGGAGCCAGTCTTCTAGGCATATCCTGGAAGAGGCTTGTGACCCAAGAATGTGTCTGTGTCTTCTCCCAGTGTTGACCTGATCCTCTTTTTTCATTCATTTAAAAAGCATTATCATGCCCCTGCTGCGGGTCTCACCATGGGTTTAGAACAAAGAGCTTCAAGCAATGGCCCCATCCTCAAAGAAGTAGCAGTACCTGGGGAGCTGACACTTCTGTAAAACTAGAAGATAAACCAGGCAACGTAAGTGTTCGAGGTGTTGAAGATGGGAAGGATTTGCAGGGCTGAGTCTATCCAAGAGGCTTTGTTTAGGACGTGGGTCCCAAGCCAAGCCTTAAGTGTGGAATTCGGATTGATAGAAAGGAAGACTAACGTTACCTTGCTTTGGAGAGTACTGGAGCCTGCAAATGCATTGTGTTTGCTCTGGTGGAGGTGGGCATGGGGTCTGTTCTGAAATGTAAAGGGTTCAGACGGGGTTTCTGGTTTTAGAAGGTTGCGTGTTCTTCGAGTTGGGCTAAAGTAGAGTTCGTTGTGCTGTTTCTGACTCCTAATGAGAGTTCCTTCCAGACCGTTAGCTGTCTCCTTGCCAAGCCCCAGGAAGAAAATGATGCAGCTCTGGCTCCTTGTCTCCCAGGCTGATCCTTTATTCAGAATACCACAAAGAAAGGACATTCAGCTCAAGGCTCCCTGCCGTGTTGAAGAGTTCTGACTGCACAAACCAGCTTCTGGTTTCTTCTGGAATGAATACCCTCATATCTGTCCTGATGTGATATGTCTGAGACTGAATGCGGGAGGTTCAATGTGAAGCTGTGTGTGGTGTCAAAGTTTCAGGAAGGATTTTACCCTTTTGTTCTTCCCCCTGTCCCCAACCCACTCTCACCCCGCAACCCATCAGTATTTTAGTTATTTGGCCTCTACTCCAGTAAACCTGATTGGGTTTGTTCACTCTCTGAATGATTATTAGCCAGACTTCAAAATTATTTTATAGCCCAAATTATAACATCTATTGTATTATTTAGACTTTTAACATATAGAGCTATTTCTACTGATTTTTGCCCTTGTTCTGTCCTTTTTTTCAAAAAAGAAAATGTGTTTTTTGTTTGGTACCATAGTGTGAAATGCTGGGAACAATGACTATAAGACATGCTATGGCACATATATTTATAGTCTGTTTATGTAGAAACAAATGTAATATATTAAAGCCTTATATATAATGAACTTTGTACTATTCACATTTTGTATCAGTATTATGTAGCATAACAAAGGTCATAATGCTTTCAGCAATTGATGTCATTTTATTAAAGAACATTGAAAAACTTGA</TSeq_sequence>
+</TSeq>
+</TSeqSet>
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NM_002254.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NM_002254.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NM_002254.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,165 @@
+LOCUS       KIF3C                   4913 bp    mRNA    linear   PRI 06-APR-2003
+DEFINITION  Homo sapiens kinesin family member 3C (KIF3C), mRNA.
+ACCESSION   NM_002254
+VERSION     NM_002254.2  GI:19923139
+KEYWORDS    .
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 4913)
+  AUTHORS   Sardella,M., Navone,F., Rocchi,M., Rubartelli,A., Viggiano,L.,
+            Vignali,G., Consalez,G.G., Sitia,R. and Cabibbo,A.
+  TITLE     KIF3C, a novel member of the kinesin superfamily: sequence,
+            expression, and mapping to human chromosome 2 at 2p23
+  JOURNAL   Genomics 47 (3), 405-408 (1998)
+  MEDLINE   98149989
+   PUBMED   9480755
+REFERENCE   2  (bases 1 to 4913)
+  AUTHORS   Telford,E.A., Wightman,P., Leek,J., Markham,A.F., Lench,N.J. and
+            Bonthron,D.T.
+  TITLE     cDNA cloning, genomic organization, and chromosomal localization of
+            a novel human gene that encodes a kinesin-related protein highly
+            similar to mouse Kif3C
+  JOURNAL   Biochem. Biophys. Res. Commun. 242 (2), 407-412 (1998)
+  MEDLINE   98113366
+   PUBMED   9446808
+REFERENCE   3  (bases 1 to 4913)
+  AUTHORS   Telford,E.A.R., Wightman,P., Leek,J., Lench,N.J., Markham,A.F. and
+            Bonthron,D.T.
+  TITLE     Homo sapiens kinesin family member 3C (KIF3C), mRNA
+  JOURNAL   Unpublished (1997)
+COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final
+            NCBI review. The reference sequence was derived from AF035621.1.
+            On Apr 4, 2002 this sequence version replaced gi:4504868.
+FEATURES             Location/Qualifiers
+     source          1..4913
+                     /organism="Homo sapiens"
+                     /mol_type="mRNA"
+                     /db_xref="taxon:9606"
+                     /chromosome="2"
+                     /map="2p23"
+     gene            1..4913
+                     /gene="KIF3C"
+                     /db_xref="LocusID:3797"
+                     /db_xref="MIM:602845"
+     CDS             154..2535
+                     /gene="KIF3C"
+                     /codon_start=1
+                     /product="kinesin family member 3C"
+                     /protein_id="NP_002245.2"
+                     /db_xref="GI:19923140"
+                     /db_xref="LocusID:3797"
+                     /db_xref="MIM:602845"
+                     /translation="MASKTKASEALKVVARCRPLSRKEEAAGHEQILTMDVKLGQVTL
+                     RNPRAAPGELPKTFTFDAVYDASSKQADLYDETVRPLIDSVLQGFNGTVFAYGQTGTG
+                     KTYTMQGTWVEPELRGVIPNAFEHIFTHISRSQNQQYLVRASYLEIYQEEIRDLLSKE
+                     PGKRLELKENPETGVYIKDLSSFVTKNVKEIEHVMNLGNQTRAVGSTHMNEVSSRSHA
+                     IFIITVECSERGSDGQDHIRVGKLNLVDLAGSERQNKAGPNTAGGAATPSSGGGGGGG
+                     GSGGGAGGERPKEASKINLSLSALGNVIAALAGNRSTHIPYRDSKLTRLLQDSLGGNA
+                     KTIMVATLGPASHSYDESLSTLRFANRAKNIKNKPRVNEDPKDTLLREFQEEIARLKA
+                     QLEKRGMLGKRPRRKSSRRKKAVSAPPGYPEGPVIEAWVAEEEDDNNNNHRPPQPILE
+                     SALEKNMENYLQEQKERLEEEKAAIQDDRSLVSEEKQKLLEEKEKMLEDLRREQQATE
+                     LLAAKYKAMESKLLIGGRNIMDHTNEQQKMLELKRQEIAEQKRREREMQQEMMLRDEE
+                     TMELRGTYTSLQQEVEVKTKKLKKLYAKLQAVKAEIQDQHDEYIRVRQDLEEAQNEQT
+                     RELKLKYLIIENFIPPEEKNKIMNRLFLDCEEEQWKFQPLVPAGVSSSQMKKRPTSAV
+                     GYKRPISQYARVAMAMGSHPRYRAENIMFLELDVSPPAVFEMEFSHDQEQDPRALHME
+                     RLMRLDSFLERPSTSKVRKSRSWCQSPQRPPPSTTHASLASASLRPATVADHE"
+     misc_feature    199..1251
+                     /gene="KIF3C"
+                     /note="kinesin; Region: Kinesin motor domain"
+                     /db_xref="CDD:pfam00225"
+     misc_feature    442..465
+                     /gene="KIF3C"
+                     /note="encodes ATP/GTP-binding site motif"
+     misc_feature    871..906
+                     /gene="KIF3C"
+                     /note="encodes kinesin motor domain signature"
+     polyA_signal    4889..4894
+                     /gene="KIF3C"
+BASE COUNT     1150 a   1342 c   1355 g   1066 t
+ORIGIN      
+        1 cctcccagcg tccccaccct aggaggctgc atgcggattg aagacgtgcg cctgggggct
+       61 gggccggccc cgctgatccc gacctagcga gcaggatagc aggaccgccc aggctgcgga
+      121 ggggctcggg ggcaggaagg tcagagcagc aagatggcca gtaagaccaa ggccagcgag
+      181 gccctcaagg tggtggcccg gtgccgcccc ctcagcagga aggaggaggc tgctggtcac
+      241 gagcagatcc tgaccatgga cgtgaaactg ggccaggtga ccctgcggaa cccccgcgcc
+      301 gccccggggg agctgcccaa gaccttcacc tttgacgccg tgtatgatgc cagctccaag
+      361 caggccgacc tgtatgacga aaccgtgagg cccctgatag actccgtgct ccagggtttc
+      421 aatggcacgg tgtttgccta tggccagacg ggcactggca agacctatac catgcagggg
+      481 acctgggtgg agcccgagct gcgcggggtc atcccgaatg cctttgagca catcttcacc
+      541 cacatctccc gctcccagaa ccaacagtac ctggtccggg cctcctattt ggagatctac
+      601 caggaagaga ttcgagacct gctctccaag gagccgggca agaggctaga gctgaaagag
+      661 aaccccgaga ctggcgtcta catcaaggac ctctcctcct tcgtcaccaa gaatgtcaag
+      721 gagattgagc atgtgatgaa cctggggaac cagacccggg ctgtgggcag cacccacatg
+      781 aatgaggtca gctcccgctc ccatgccatc ttcatcatca ctgtggagtg cagcgaacgt
+      841 ggctctgatg gccaggacca catccgagtg ggcaagctca acctcgtgga cctggctggc
+      901 agcgagaggc agaacaaggc aggccccaac acagcgggag gggcagccac accatcctcg
+      961 ggtggcggtg gtggcggtgg aggcagtggt ggtggtgctg gtggagagag gcctaaggaa
+     1021 gcctccaaaa tcaacctctc attatctgcc ctgggcaacg tgattgctgc cctggcgggc
+     1081 aacaggagca cccacattcc ctaccgggac tccaagctga cccggctgct ccaggactcc
+     1141 ctggggggga atgcgaagac catcatggta gccacactgg ggccagcttc tcacagctac
+     1201 gatgagagcc tctccacctt gcgctttgcc aaccgagcca agaacatcaa gaacaagccc
+     1261 cgggtgaacg aggaccccaa ggacacactg ctgcgggaat tccaagagga gattgcccgc
+     1321 ctgaaggccc agctggagaa gagggggatg ctggggaagc ggccccggag gaagagcagc
+     1381 cgcaggaaga aggccgtgtc cgccccgcct gggtaccctg agggcccagt gattgaggct
+     1441 tgggtggcag aagaggagga tgacaacaac aacaaccacc gcccgcccca gcccatcctg
+     1501 gagtcagcct tggagaagaa catggagaat tacctgcagg aacagaagga gcggctggag
+     1561 gaggagaagg cagccatcca ggatgaccgc agcctggtga gcgaggagaa gcagaagctg
+     1621 ctggaggaga aggagaagat gctggaggac ctgcggcggg aacagcaggc cacagagctg
+     1681 cttgcggcca agtacaaggc catggagagc aagctcctca tcgggggcag gaacatcatg
+     1741 gatcacacca acgaacagca gaagatgttg gaactgaaga ggcaggagat tgccgagcag
+     1801 aaacgtcgtg agcgggagat gcagcaggag atgatgctcc gggacgagga gactatggag
+     1861 ctccggggca cctacacatc cctgcagcag gaggtggagg tcaaaaccaa gaaactcaag
+     1921 aagctctacg ccaagctgca ggcggtgaag gcggagatcc aggaccagca tgatgagtat
+     1981 atccgcgtgc ggcaggacct ggaggaggcg cagaacgagc agacccgcga actcaagctc
+     2041 aagtacctaa tcatcgagaa cttcatcccg ccggaggaga agaacaagat catgaaccgg
+     2101 cttttcctgg actgtgagga ggagcagtgg aagttccagc cactggtgcc agccggcgtc
+     2161 agtagcagcc agatgaagaa gcggccaaca tctgcagtgg gctacaagag gcctatcagc
+     2221 cagtatgctc gggttgccat ggcaatgggg tcccacccca ggtacagggc tgaaaacata
+     2281 atgtttctgg agttggatgt gtcccctcca gctgtctttg agatggaatt ctctcacgac
+     2341 caagaacaag accctcgtgc gctacacatg gagaggctca tgcgattgga cagctttctg
+     2401 gaaagacctt ccacgtctaa agtccgaaag tccagatcct ggtgccagag tcctcagcgg
+     2461 cctccacctt ccaccacaca tgcctccctg gcctctgctt ctctgcgccc tgcaacagtg
+     2521 gcggaccatg agtgacaacc atcacgtcag gctgcccatc caatagactc ctgggatggg
+     2581 gcagccaacc ctggctcatc tcatctgccg cttggtgcgt gtgcgtgtgc gtgcatgtgc
+     2641 gtgtgcgtgt gtgcaggggt gagaatctgg cagatggtgc ctctgcctgc tcttcttcgc
+     2701 ctcctttatt taattcatgt tatttattcg cggacgtctg ttcgtgttgg ggagatgccc
+     2761 tcgcctgagc cgtctgggcc taccgtggtc actgcgtacg ctctttttct tctgacttga
+     2821 gagctccccc agtcagatct caggcttgtc cccctgtcag ctgcctccag aagggaaggt
+     2881 agccagtgcc tgagaagaca gtcccttttc tacccaccgc actccataac ctccatcttc
+     2941 tcccacactg atggcgagca gcccctgagc actttctggg actgggagac tgcttggtgt
+     3001 tccctgagga caagagacat cctgacagtg ttgggcatct gctccccgtg gacacagccc
+     3061 cactctccac tttctgagcc tcagacaacc tcattcagcc tcttgggctc cttttcaagg
+     3121 acattaataa cctcaccaac atagctcatg cccttcagct ttgacaagaa ctcacggctt
+     3181 cccaaactct gctttctgcc caccttggat gggaactgtg gaccaagcaa ttaccatcgc
+     3241 cttggaacct gcaggaaatg gaacagcaat tgagacaact tgaacagtca tcaacggaag
+     3301 tccctccact ggattccttt gtttctgtcc cctccgagga gtcattttgg tcgacaggct
+     3361 ctcaaggcaa ctccccattt tcaagaggct gctcctgcct gcttcgatca tttctccctg
+     3421 cagctgccta gaccccgttc acagtgggag gagtcaatgt cattctaccc ctcgctaaac
+     3481 gaagatatta acatctattg ctttttccct tcatctgtca caggaaacag aagcccaggc
+     3541 acaatctttt ccagctttgc ctgttacccc tgtttctgaa ttgcatcttt aaggtattat
+     3601 tttgttgaca atagatcctt tattcactag ttacgcaaat tggttcctag ggggatactc
+     3661 cttaccttcc tttgtgatgg cccaaaatgt ctctaggtat ctcaagtgat aagtaaattt
+     3721 ctacaaaaaa aaaatggtta atgttcattg actggctttt taagtgtata ttttggagga
+     3781 cgggtgaaga ggtcataacg aaagcaagcg agtgaattag gatttcaaag tgccctaata
+     3841 gtgtgagtct ccagttccta gaatatgaag agtgctgtcg ttggggtgaa accatgagac
+     3901 tgacagatct gcctgaaatg gggggtgtgg gaggtggtgg cgggggttat tctctttcct
+     3961 tcaggaaatg aacccttctt acatcattca agttctgctc tgaggatcaa gcttgggtct
+     4021 gatttaactc agcgacactg tcatttctgc ttcattactg gactagaggg ttgagccacc
+     4081 cacttgccat ttgctcctgt ccttccagga aatcacaatt ttcatcagag cccaagagat
+     4141 tatttgagac tcaggattca gatcagaggt tcgactgtgg ctgggacagg agttgtgtgt
+     4201 agaaattcac caggtggcct gagcgcaggg ggacctccag gctgcgttga gcagcctctc
+     4261 ccactgacct ctttctcgtt tgtggacaaa gcagcacgta tcacctcatt catcacttgg
+     4321 acacatcgcc tttgcattgt cttgtcacac ctccctcaca gtcttatagc acaatatacc
+     4381 caaatcagcc cccccagtcc gaggctgggc ccaaggtatg gtcggaggag gagctcctgc
+     4441 ctgcggtttt gtgtatgtgt gtatgtgtgt gcgtgtttgt gtgcgtgttt acctccacag
+     4501 gggacactct acactcagtg taagatctgc tgggaacagg gccaccagga gtgcgtggat
+     4561 ctcagtctct ctgtctctct ttctctcctt ttaattttgg tgtatcaaat atttgattga
+     4621 caaagtaagg gccttgatta ggaccaaatt ctcgtgtgtt gctatggtct ttatttagga
+     4681 caacaattaa caatgcagtg gcccattctt gtcactctac acatatgact atacgggaca
+     4741 tatgtaatat ataaatatat atataaaaca ttcccctctg tccccttggc ttcggatgga
+     4801 ggaatttctg ttgagctgaa atgcacctgc agctgggtgc tgccagcagc ttgcaggccc
+     4861 cagccctgtt ccaatcaatg cagttgacaa taaaggaatg agtatcgtca cgg
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/NT_021877.gbk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/NT_021877.gbk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/NT_021877.gbk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,241 @@
+LOCUS       NT_021877              10001 bp    DNA     linear   CON 17-OCT-2003
+DEFINITION  Homo sapiens chromosome 1 genomic contig.
+ACCESSION   NT_021877 REGION: 13920000..13930000
+VERSION     NT_021877.16  GI:37539616
+KEYWORDS    .
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 10001)
+  AUTHORS   International Human Genome Sequencing Consortium.
+  TITLE     The DNA sequence of Homo sapiens
+  JOURNAL   Unpublished (2003)
+COMMENT     GENOME ANNOTATION REFSEQ:  Features on this sequence have been
+            produced for build 34 of the NCBI's genome annotation [see
+            documentation].
+            On Oct 7, 2003 this sequence version replaced gi:29789880.
+            The DNA sequence is part of the second release of the finished
+            human reference genome. It was assembled from individual clone
+            sequences by the Human Genome Sequencing Consortium in consultation
+            with NCBI staff.
+            COMPLETENESS: not full length.
+FEATURES             Location/Qualifiers
+     source          1..10001
+                     /organism="Homo sapiens"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9606"
+                     /chromosome="1"
+     source          <1..>10001
+                     /organism="Homo sapiens"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9606"
+                     /clone="RP11-302I18"
+                     /note="Accession AL451081 sequenced by The Sanger Centre"
+     gene            complement(3024..6641)
+                     /gene="LOC127086"
+                     /note="Derived by automated computational analysis using
+                     gene prediction method: GNOMON."
+                     /db_xref="GeneID:127086"
+                     /db_xref="InterimID:127086"
+     mRNA            complement(join(3024..4108,4110..4258,4357..4533,
+                     5985..6225,6324..6641))
+                     /gene="LOC127086"
+                     /product="similar to ATP-dependent DNA helicase II, 70 kDa
+                     subunit (Lupus Ku autoantigen protein p70) (Ku70) (70 kDa
+                     subunit of Ku antigen) (Thyroid-lupus autoantigen) (TLAA)
+                     (CTC box binding factor 75 kDa subunit) (CTCBF) (CTC75)"
+                     /note="Derived by automated computational analysis using
+                     gene prediction method: GNOMON."
+                     /transcript_id="XM_060320.3"
+                     /db_xref="GI:37539614"
+                     /db_xref="GeneID:127086"
+                     /db_xref="InterimID:127086"
+     CDS             complement(join(3024..4108,4110..4258,4357..4533,
+                     5985..6225,6324..6641))
+                     /gene="LOC127086"
+                     /note="overriding stop codons"
+                     /codon_start=1
+                     /transl_except=(pos:complement(6444..6446),aa:OTHER)
+                     /transl_except=(pos:complement(4224..4226),
+		     aa:OTHER)
+                     /transl_except=(pos:complement(4067..4069),aa:OTHER)
+                     /transl_except=(pos:complement(4049..4051),aa:OTHER)
+                     /transl_except=(pos:complement(4046..4048),aa:OTHER)
+                     /transl_except=(pos:complement(3791..3793),aa:OTHER)
+                     /transl_except=(pos:complement(3678..3680),aa:OTHER)
+                     /transl_except=(pos:complement(3036..3038),aa:OTHER)
+                     /protein_id="XP_060320.3"
+                     /db_xref="GI:37539615"
+                     /db_xref="GeneID:127086"
+                     /db_xref="InterimID:127086"
+ORIGIN      
+        1 atctctatgt gtcttttgta tccaatgtta actctagtcg ctgggaagtt ggatgtccct
+       61 ccttaatcag atttgatata acagcaagag atttccaaga aaataccaaa aaatctaatt
+      121 tcagcaaaaa gaaaaaattt ggcctccagt tttccagatg taggtagatt ctgtccaaaa
+      181 cagatatcat ttaaaattta attttaaatc ctttgagttc ttcaaaagaa gatatttaac
+      241 aaaatgtggg agctgatgac aattttaatt atttacaaag taattttctc agtaaaatga
+      301 gccatgcttc aaaagggaaa ataccaacaa tttgccaggc attgtacctt ctttaatttt
+      361 cttctttctt tctttctttc tttctttctt tctttctttc tttctttctt tctttctttc
+      421 tttctttctt tcttttcttt ctttctttct ttctttcttt cttgagacgt tgtctcgctc
+      481 tgtcacccag actagagtgc agtggcacca tctttgctca ctgcaagctc cgcctcctgg
+      541 gttcacgcca ttctcctgcc acagcctcct gtgaagctgg gactacaggt gcctgccacc
+      601 acgcccagct aattttttgt atttttggta gacacggggt ttcaccgtgt tagccaggat
+      661 ggtctcgatc tcctgacctc gtgatctgcc tgccttggcc tcccaaagtg ctgggattat
+      721 aggtgtgagc cactgtgccc ggccgccgcc tttaattttt ataaccactg tgaggagtgg
+      781 tattattatt tgtgctcaaa cgggctcaca gaagtaggta acttggttaa aatcacttag
+      841 ccattaagtg gtgaagttgg aattggaact cagatgtgtg tgaccctaat tttattgcta
+      901 ttcctttaca cttggaattt tggtattttc attgctgtag aagaatgaag ttagccacca
+      961 ctcattttgt tgacagattg accatatggc cttcctagaa gtgatcattg aactgatggt
+     1021 attagctatt tctcctcaga cttactgatt gattgccttt gtgtgcaaaa agctctgtgt
+     1081 tatgacagaa acaccaaaga atagcaaaga tctttacttt ccaggatatt caaatctaac
+     1141 tggaggaaga gaacatgtaa ataaaaacta tatatcacca ggtagtaatt aattagtcag
+     1201 agacaaccta atgtctcttc ttttttttat tctaaagata ataactacat attgcaggaa
+     1261 aatctcctct aatctcacca ttatttcctc aactcttttt ttcaatgcac acatatattt
+     1321 agatatttaa ttggatgata tttataaaca cttttatatc ctatttttta ttgaatattg
+     1381 tattatgagc actctttcat gacatattat tccaatgtat aatttttaat ggcagaataa
+     1441 tattcatagt attttaattt ataatatctt ctggacattt agattgtttc taatattttt
+     1501 aattttttat aaattatatt gcattgaaca tcctagacca caaatctttt ctcatgtctc
+     1561 tacttattga cttaggataa atttattcag gagaaatcga gaataaattt ctggagattc
+     1621 ctgatatatg ttgagaagac ataagagaga atggagatcc ctaaacagat ttcgggtgca
+     1681 ccacttcagt caccaccatt gtgggcacca ggccctagag gctaaagtac cacattatat
+     1741 ctccaattcg tttgccaggt ggtctgtatt ctttgcttct ttgatttctc tgaatagagc
+     1801 tttccattca tttgaagggg cactttctgg tggtgcctga tggcattttg cattacattt
+     1861 tatttactgc aaaataaatt ttatttaatg caaaatgcca ccagggtgcc atcagaaagt
+     1921 gccccttcaa atgagtagaa agtctccttt tatgattgca gtatagtttg acagagttag
+     1981 aattctttgc tgttttcata ttagctactt aggatctata taaatctcat tgcaacggaa
+     2041 ataccagagt gtcttgaggc tggctgactg gggccttatt tctagcattc taagaaggtg
+     2101 caagagaaag aaactagcac tagatgagac tcatgagcat aaaccccttc aaattgggag
+     2161 agatgtctgc atttgcagcc aaacttctat catcagcctt gattcctgac ataatgagtt
+     2221 gtgagttgtt ataatcatgg aagaaaaatg acttgagagg ataacatgga tgatgtgatt
+     2281 tgaagatgag ttcagtataa actttggtga aaaatcattc tgataaactg atcttaagac
+     2341 atcatgccat gtttgatggc tcatgcctgt aatcccagaa ctttgcagca gtttgggagg
+     2401 ctgaggcagg caggttgctt gagcccagga gtttgacacc agcctgggca acatggcgaa
+     2461 accctatctc ttaaaaaaaa aatacaaaaa gtagccagtc atggtggctc atgcctggag
+     2521 acccagctac ctgggaggct gacaggggag gttcacctga accgggggaa gatctgagcc
+     2581 atgattgtgc cactacactc cagcctagga ctctctctgt ctcttaaaaa acaaaaaaaa
+     2641 caaaaaaaaa aaaacaaaaa aaaacaaagg catcacattg tttactccct tgtgatgccg
+     2701 gctggagtga cacttggcat caggagggca gtatagtctc tcttttttct tacagtacaa
+     2761 agttagggct cctcattcag gcagtagagt aaagaacagc aaagtgggag ggctacacca
+     2821 ttgccatggc aacagaaagc ctcctgaaga taaagtccct ctgcttctgt caggcagact
+     2881 cttcctagat caggagacac ctgttttcac tggctgagaa caaggccagg tagcctggtt
+     2941 acagtgtgga agggcagctg gacacatggc ctctggtcag ttctggaagt gattggtgaa
+     3001 ggcttccagt ggctcctgct tcttcagcct gctcttcagc ccatacccca gcaggtctct
+     3061 ttcagcatgg gcacggcaaa cttgcccagc atgctgttgc ttatgtgggt cttcagctcc
+     3121 tcttctgaat actccacctt gggccttttg cttctgaaac cttcattatt gtgttttatg
+     3181 ttgctaactt tcccttcagg attgtaatct ggtgggtaga gaagttcctt aaacttatcc
+     3241 accagtgggg agcccagtct tttattcatt gcttcaacct caggcaatgt caggtccact
+     3301 gcttgttcag gctccatcaa atccaaggcc aaggcctcca ggttcctgga acactgctgc
+     3361 agcacagagt tctcaaagct gtcacttctg tatttgaatc ggagcttctg aacaatagcc
+     3421 ttcaccttgt ccccctgctc tggggttgcc atgacttttt tagcaaaggg caccttccct
+     3481 ttattatcag cataacataa aaagaccagc tggaagcctg caggaggcac ctgaattttc
+     3541 tgatcatcca actcctcttc ctgtggcacc aaagccacaa aataaggggg gatgttcctg
+     3601 cagggtgtgt atctgcacaa tgctgtgacc tccttctcca gacacttgat gagtagagca
+     3661 ctgaacaggg ttgagctcta attcatcagt gactcttcag agtacacaaa caaggagggc
+     3721 ctcaggtaac ggtgcttctt cagctttacc aagggcttaa acccatgaga atcaaacctg
+     3781 gttcatcaaa ctattttagc tcttctgttt cctctttctc cagtataatc tgacgccttc
+     3841 catagatctg agacctcttg gtatctctag acagaagcca actgcctgta tttacattaa
+     3901 atatccaggt cttggttttc actgattcat ctatttctcg atagggcttt attggaggag
+     3961 gcttgagagc cttctggatc ggattataaa tgcccacaga gagcactata tctttattga
+     4021 gcttcagctt taacctgctg agtgttcact aactgatctc cttggctcaa acctgcctca
+     4081 acaggtcttc tagctttctg gattcctcag tgagccctgg ggtcctcatc ctctgctatg
+     4141 ctggtgatat ctctgtagaa gaaaggtata tcaaagccct cagttttctt caggtgcatc
+     4201 aagtcaagga agatgcctgt atctcagtga ttattggctt tggtcctggc ccagctggct
+     4261 ttggcatgtc attgccatgg gggttatcct cattggtgaa caacatgact cctcttgtac
+     4321 agtacattgc taaagaggtt gttggcacag actcacagca cttcactgag tgagtagtca
+     4381 gatccatgcc catcaggtct tggaaacgtt tttgtccctg ctgctcctta aaccggtcaa
+     4441 gctctagaat tcgttttgcc cctggattat ccaactcctg gatgctcatg tcaaaaggag
+     4501 tcaactcatc ttcactctga gattcaaaca tagctctgga ggcatcaacc aaaaaaatca
+     4561 aactatttct tcctgaatat ttatagactc cacttgcttc acatgattct catgggtttc
+     4621 aagcccttgg taatgctgaa gtgctgtgga tctgtgccaa cctctttagc gatgtccagt
+     4681 tcaaaatgtt cttcttctgc ttcttcatca cccctggttt tgtaataaga ctttcaccct
+     4741 gacatgttgc ttactgctca ctttggtgca ggccagagca gtatgatttc ttaaaattct
+     4801 ctctgccagt ctggcctctg tcccactaca ttatttcaga gacccaagag tgttccaaag
+     4861 gaatgttctg atattgataa caatacatat catcaaattg ctctctagaa atgtataaat
+     4921 gtacaactcc atcgtaagag tatgtgatag tgctccttac agtgtatcct caccaacatg
+     4981 gaatattatc ttttaaatat ctttgatgta ttattaattg tttttgtgga gtgctgtgac
+     5041 tcaggctttt agaagaagcc taggataaca caaacataat tctggttaaa atcgatcagc
+     5101 acagatgagt ggtgctggca atgactcgtc ttggctgaca ctggagaaaa taaaaggttt
+     5161 agccaatggg aagagaaaat gataagtttg gtttgagata agataaattt ggattatctg
+     5221 tgggacatcc aagaggtgat atgcagtagg tcattgagta tctctgtgtg ggggtgtgag
+     5281 atgtaaatag aaaatgaaag tattaatata tggtaagtag gtaaaatcat aaatgtaggt
+     5341 actatggttc aaggagaatt cagaatggga aaggagttga atatgaaatc ttggggaata
+     5401 tcaacattta aagggtcgca aaagacacat aagctaaagg tactgaattt tttttttttt
+     5461 tttttttttt agagatgagg tcttgcccta ttgcacaggc tggagtgcag cggtgcaatc
+     5521 atagctcact gaagccttga attcctgggc tgaagcaatt ctcccacatc agcctctgga
+     5581 gtagctaaga ctacaggcac aagccagcat gcccagagga aggcactttt ttttttcttt
+     5641 ttaaagaagt ctttatttcc ttgttctgca aataaagctg gctgagttgg ttgctttttg
+     5701 gtgattagtc agggaccaaa tcccatatcc ttgtccaatt cctccgactc ttccttggct
+     5761 tcaaccttag ttggggtagc agcagcagca ggagcagtca tggcagcagt gtccacaggg
+     5821 gcagcagcca caaaggcaga tggattaacc aagaaggcct tgaccttttc agcaagtggg
+     5881 aaggtgtaat caatctccac agacaaagcc aggactcgtt tgtacccatt gatgatagaa
+     5941 tggggtactg atgcaacagt tgggtagcca atctgcaaac agaccctgga aacatcgcag
+     6001 acactctcca gaaagtgaga atgcagtttc ctctgtgatg tcaagccctt cagggttgta
+     6061 gatgctgcca ttgtcgaaca cctgctgaat gaccagccca aagaagggag agatgttcag
+     6121 cagtgtggct tcgctggctc ccactttgtc tccagtcttg atcagctgca catcactcag
+     6181 gatttcagtg gcgcccctgg agattttagt ggtgatgcct aaaacctgga gaaacgaggt
+     6241 cttcttgggc cctagaccag tgttctggac tggcacagtg acttcacagg ggcaatggaa
+     6301 ccagcatgag ccacagctgg caccttgttg gccagcagca ggtccctgat ctcagtgagg
+     6361 tcctccttga tgaacacaaa gcccacattc ccctgaatat gaggcaacag tttctctaga
+     6421 gctgggttgt tttccaggtg ccttcagatg gccttgacca tcgtggtgtt cttgcctagc
+     6481 agcaccatgg ccttcctgca gagggatatg cggatctgct gcatctgctt agagcccata
+     6541 ttgtctgctc ccatgatgaa acatttcgga taatcatcca aaagttggat gatcttaagg
+     6601 aagtagttgg gcttccaggt caccctgtct tccctgggca tcacagcggt gtgtcaggga
+     6661 tttaaagaca caagggttta aagatgatgt cacttaaacg acgacacctg gtgagaggag
+     6721 gcacttttta tgaagagcca cagaggtaga aagaaaactc agagagtatt gtcattgaaa
+     6781 ataaagactt tcagaaagga gagaatagca tgtcacagag aaagaaaatt gagtaaatat
+     6841 tagtgattgt aaaatggtga tttagtctgt tttgtgctgc tattacaaaa tacctgagac
+     6901 tggataattt ataaagaaaa gaaatttatt ttctcacagt tctgagtgct agaaagtcta
+     6961 agattaaggt gacagcaggt tgcattttct ggtgagttct gcatcatcca gagggaagga
+     7021 acgctgtgtc ctcacatggt ggaaggctaa agagcaagcc agctgagtga ggcaagaaga
+     7081 ttctttaata aaggccttaa ttccattcat gaggggacca gctctcatga ccttatcatc
+     7141 tctagaaaac tccacctctt catactatca cattgcacta agtttcaaca tctgaatttt
+     7201 ggagcggaca cattcagact atagcagatg aagttttaat agaaataatt aatgtttagc
+     7261 aaccaagaag taccctcata cttccctgct tgttaatttc tacctttcag gttccccaag
+     7321 gggtggtaca cagaggaata tcaaataata accaaattgc aattttatta aggcccaagt
+     7381 caatgtcatt aacactcaat gcagctttgg cagggatagg tggacctctg gtttaggctc
+     7441 aaggcataaa tgagattttc taccctgtgt ttccatcccc gttccccact atgggtcaaa
+     7501 tctaaagctt aggatgttac ctacctgtgg tttgctgcat gatgaaagga tgtcaacgta
+     7561 agatctgaca gagcctcctg gggaaggcct gaatccactt atttatcacg ccacatggag
+     7621 taggaaacag agctcagaga aagactgtgt gcctgtgtga ccccaagaat cctatctttt
+     7681 ggccatggaa gcaactctgt ctcctgagca ggtggggagg ttcttgaaaa actaggaaca
+     7741 aaggctgtct gcttcatctt gtccctttag taggagaaag cagatccctt ctgcactcta
+     7801 ggaacttcac atacatacag ctcggacaat agcacttgaa aaattaccat cttctaagaa
+     7861 tagacccctt cactcatgag gatgtcttct agagcagtgc cattcaatag aattttctgt
+     7921 gatgatgaag atgctattta aatctgtcct gtccagtact gtagttacta tgtgaggtca
+     7981 ctgaagttat tattagcatt taaatttaaa tagccccctg tggatagtgg tgatcctatt
+     8041 atacagtgaa gctctaggga tttcagggaa ttgtgaaatc tgctgatact ttcatgatag
+     8101 tcttgttttc tcttttcagc ctctccatct ttatttgatt gattattcct ggttattcta
+     8161 ggtctgctct taaggtatgg attaatattc atgtatagct gatacaactt atggagcctc
+     8221 aggttctcga tctatgaaat gggaatgcta ataacaagtg ttccgttgtg gagattaaat
+     8281 gacataatta aaccataagt tccaaatatg caaatggctt tcttacttaa ttgcaaattg
+     8341 tcagggttta cccaatgcct gtaccaccat tgtatcttag aggtaaataa cttgtttttc
+     8401 atcttacagg ctcatagctg aaaggaactt accttgagtc tctaatgaga ctttggactt
+     8461 tagacttttg agatgatgct ggaatgagtt aagacttttg ggactattgg gaaaggatga
+     8521 ttgtattttg caatgtgaga agaacatgag tttgctaggg ggggcaggag tggaatgcta
+     8581 tggtttggat ataatttgtt tgtccccacc aaatcacatg ttgaaattta atccccagtg
+     8641 tagcgatgtt gggtggtggg gcctagtggg aagtatttgg gtcacaggag tggatcgttc
+     8701 atgaatatat caacgccctt ccactctgga gtggtgagtt ctcactatgt tagttcccat
+     8761 gtgagctggt tgttaaaaca atcctcgctt ctccctgccc tgccctcctc tcttagcgtg
+     8821 cagcctctgc acacaccagc tcctcttcgc catccaccac gagtggaagc agcctgaagg
+     8881 ttttatcaga tgcagatgcc caatcttgaa ctttccagcc atccagaatc ataaaacaaa
+     8941 taaaccttct ttttctttat aaattaccca acctccagta ttcctttata gcaaaacaaa
+     9001 acagactaag acagtcagtt ttctgcattg atttttgtga ttcagggttc cagccctctg
+     9061 gtgccatcaa taaatcacta ttcccaagtt aaagcaaatc cccatgagca ctggggatgt
+     9121 gcagatagtg agtggtggtg atggtgaggt gcaaccacga tctttttttt tttttttgag
+     9181 atgagtttta ctccatcacc aaggctgggg ctcagtggca tgatctcaac tcactgcaac
+     9241 ctcggcctcc tgggttcaag caattcttgt gcctcagcct cttaaatagc tgggactagg
+     9301 ggtgtgccac catgcctggc taatttttgc acttttatta gagatgaggc tttgccatgt
+     9361 tggtcagact ggtctctaac tcctgacctc aagtgatcca cctgcctggg cctcccaaag
+     9421 tgctgggatt acaggcatga gccaccgcac ccggcaccat gttcgattga agggagagat
+     9481 ggggagggaa cctcattact gtattcacat gataaagctc ccatagaaac tcagcatcaa
+     9541 ctaagtcaat tctagttttc attctaccaa caggcctccg ggctctggct tcctgaaggg
+     9601 taggagtgat atatacaaag acgtcaagtg ggtgaaaaaa aactccacct gtcccctcag
+     9661 aacgtggaga acctctttaa agttatgggg ctgaagactg atggcatcca cttatcagag
+     9721 ctcccccagg tatgaaagga gtagaagatt tttcagggtg gggcattaat gaattttcag
+     9781 aaataaagaa ataattgatt agtaatggtt ctactcaaca ggcactgttt actaacacca
+     9841 caatcctaca tattcctaaa gctgaggatg aacaaaagtt cattactttg agactctaat
+     9901 ttagacaaaa cttgaagcaa atcagtgatg aaagaaatcc tgctagctca gcaatcttcc
+     9961 tgtggtactt gtgtgtgtgt gtgtttgtgt gtgtatgcgt g
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/O_sat.wgs
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/O_sat.wgs	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/O_sat.wgs	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+LOCUS       AAAA02000000           50231 rc    DNA     linear   PLN 11-MAR-2005
+DEFINITION  Oryza sativa (indica cultivar-group) whole genome shotgun
+            sequencing project.
+ACCESSION   AAAA00000000
+VERSION     AAAA00000000.2  GI:54362548
+KEYWORDS    WGS.
+SOURCE      Oryza sativa (indica cultivar-group)
+  ORGANISM  Oryza sativa (indica cultivar-group)
+            Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
+            Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; BEP
+            clade; Ehrhartoideae; Oryzeae; Oryza.
+REFERENCE   1  (bases 1 to 50231)
+  AUTHORS   Yu,J., Wang,J., Lin,W., Li,S., Li,H., Zhou,J., Ni,P., Dong,W.,
+            Hu,S., Zeng,C., Zhang,J., Zhang,Y., Li,R., Xu,Z., Li,S., Li,X.,
+            Zheng,H., Cong,L., Lin,L., Yin,J., Geng,J., Li,G., Shi,J., Liu,J.,
+            Lv,H., Li,J., Wang,J., Deng,Y., Ran,L., Shi,X., Wang,X., Wu,Q.,
+            Li,C., Ren,X., Wang,J., Wang,X., Li,D., Liu,D., Zhang,X., Ji,Z.,
+            Zhao,W., Sun,Y., Zhang,Z., Bao,J., Han,Y., Dong,L., Ji,J., Chen,P.,
+            Wu,S., Liu,J., Xiao,Y., Bu,D., Tan,J., Yang,L., Ye,C., Zhang,J.,
+            Xu,J., Zhou,Y., Yu,Y., Zhang,B., Zhuang,S., Wei,H., Liu,B., Lei,M.,
+            Yu,H., Li,Y., Xu,H., Wei,S., He,X., Fang,L., Zhang,Z., Zhang,Y.,
+            Huang,X., Su,Z., Tong,W., Li,J., Tong,Z., Li,S., Ye,J., Wang,L.,
+            Fang,L., Lei,T., Chen,C., Chen,H., Xu,Z., Li,H., Huang,H.,
+            Zhang,F., Xu,H., Li,N., Zhao,C., Li,S., Dong,L., Huang,Y., Li,L.,
+            Xi,Y., Qi,Q., Li,W., Zhang,B., Hu,W., Zhang,Y., Tian,X., Jiao,Y.,
+            Liang,X., Jin,J., Gao,L., Zheng,W., Hao,B., Liu,S., Wang,W.,
+            Yuan,L., Cao,M., McDermott,J., Samudrala,R., Wang,J., Wong,G.K. and
+            Yang,H.
+  TITLE     The Genomes of Oryza sativa: A History of Duplications
+  JOURNAL   PLoS Biol. 3 (2), E38 (2005)
+   PUBMED   15685292
+REFERENCE   2  (bases 1 to 50231)
+  AUTHORS   Yu,J., Hu,S., Wang,J., Li,S., Wong,K.-S.G., Liu,B., Deng,Y.,
+            Dai,L., Zhou,Y., Zhang,X., Cao,M., Liu,J., Sun,J., Tang,J.,
+            Chen,Y., Huang,X., Lin,W., Ye,C., Tong,W., Cong,L., Geng,J.,
+            Han,Y., Li,L., Li,W., Hu,G., Huang,X., Li,W., Li,J., Liu,Z., Li,L.,
+            Liu,J., Qi,Q., Liu,J., Li,L., Wang,X., Lu,H., Wu,T., Zhu,M., Ni,P.,
+            Han,H., Dong,W., Ren,X., Feng,X., Cui,P., Li,X., Wang,H., Xu,X.,
+            Zhai,W., Xu,Z., Zhang,J., He,S., Zhang,J., Xu,J., Zhang,K.,
+            Zheng,X., Dong,J., Zeng,W., Tao,L., Chen,X., He,J., Liu,D.,
+            Tian,W., Tian,C., Xia,H., Li,G., Gao,H., Li,P., Chen,W., Wang,X.,
+            Zhang,Y., Hu,J., Wang,J., Liu,S., Yang,J., Zhang,G., Bao,Q.,
+            Xiong,Y., Li,Z., Mao,L., Zhou,C., Chen,R., Zhu,Z., Hao,B.,
+            Zheng,W., Chen,S., Guo,W., Li,G., Liu,S., Huang,G., Tao,M.,
+            Wang,J., Zhu,L., Yuan,L. and Yang,H.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (04-JAN-2002) Beijing Genomics Institute/Center of
+            Genomics & Bioinformatics, Institute of Genomics, Chinese Academy
+            of Sciences, Beijing Airport Industrial Zone B6, Beijing, Beijing
+            101300, P.R.China
+REFERENCE   3  (bases 1 to 50231)
+  AUTHORS   Yu,J., Wang,J., Lin,W., Li,S., Li,H., Zhou,J., Ni,P., Dong,W.,
+            Hu,S., Zeng,C., Zhang,J., Zhang,Y., Li,R., Xu,Z., Li,S., Li,X.,
+            Zheng,H., Cong,L., Lin,L., Yin,J., Geng,J., Li,G., Shi,J., Liu,J.,
+            Lv,H., Li,J., Wang,J., Deng,Y., Ran,L., Shi,X., Wang,X., Wu,Q.,
+            Li,C., Ren,X., Wang,J., Wang,X., Li,D., Liu,D., Zhang,X., Ji,Z.,
+            Zhao,W., Sun,Y., Zhang,Z., Bao,J., Han,Y., Dong,L., Ji,J., Chen,P.,
+            Wu,S., Liu,J., Xiao,Y., Bu,D., Tan,J., Yang,L., Ye,C., Zhang,J.,
+            Xu,J., Zhou,Y., Yu,Y., Zhang,B., Zhuang,S., Wei,H., Liu,B., Lei,M.,
+            Yu,H., Li,Y., Xu,H., Wei,S., He,X., Fang,L., Zhang,Z., Zhang,Y.,
+            Huang,X., Su,Z., Tong,W., Li,J., Tong,Z., Li,S., Ye,J., Wang,L.,
+            Fang,L., Lei,T., Chen,C., Chen,H., Xu,Z., Li,H., Huang,H.,
+            Zhang,F., Xu,H., Li,N., Zhao,C., Li,S., Dong,L., Huang,Y., Li,L.,
+            Xi,Y., Qi,Q., Li,W., Zhang,B., Hu,W., Zhang,Y., Zheng,W., Hao,B.,
+            Liu,S., Wang,W., Yuan,L., Cao,M.L., McDermott,J., Samudrala,R.,
+            Wang,J., Wong,G.K.-S. and Yang,H.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (12-SEP-2003) Beijing Institute of Genomics, Chinese
+            Academy of Sciences, Beijing Airport Industrial Zone B6, Beijing,
+            Beijing 101300, P.R.China
+COMMENT     On Oct 21, 2004 this sequence version replaced gi:19924305.
+            The Oryza sativa (indica cultivar-group) whole genome shotgun (WGS)
+            project has the project accession AAAA00000000.  This version of
+            the project (02) has the accession number AAAA02000000, and
+            consists of sequences AAAA02000001-AAAA02050231.
+            The improved whole-genome shotgun (WGS) sequences for the genomes
+            of indica and japonica rice, AAAA02000000 and AACV01000000,
+            respectively, have multi-megabase contiguity and are nearly
+            1000-fold improved over the drafts of 2002. Tested against a
+            non-redundant collection of 19,079 full-length cDNAs, 98.1% of the
+            genes are aligned without fragmentation to the mapped
+            super-scaffolds of one or the other genome. Despite having only a
+            small variation in gene content, there is an enormous variation in
+            intergenic regions. At least a quarter of the two sequences could
+            not be aligned, and in the aligned region
+            single-nucleotide-polymorphism (SNP) rates varied from as little as
+            3.0 SNP/kb in the coding regions, to 27.6 SNP/kb in the
+            transposable elements.
+FEATURES             Location/Qualifiers
+     source          1..50231
+                     /organism="Oryza sativa (indica cultivar-group)"
+                     /mol_type="genomic DNA"
+                     /cultivar="93-11"
+                     /db_xref="taxon:39946"
+WGS         AAAA02000001-AAAA02050231
+WGS_SCAFLD  CM000126-CM000137
+WGS_SCAFLD  CH398081-CH401163
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/P33897
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/P33897	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/P33897	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,562 @@
+ID   ALD_HUMAN      STANDARD;      PRT;   745 AA.
+AC   P33897;
+DT   01-FEB-1994 (Rel. 28, Created)
+DT   01-FEB-1994 (Rel. 28, Last sequence update)
+DT   15-JUN-2004 (Rel. 44, Last annotation update)
+DE   Adrenoleukodystrophy protein (ALDP).
+GN   ABCD1 OR ALD.
+OS   Homo sapiens (Human).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RX   MEDLINE=93180910; PubMed=8441467;
+RA   Mosser J., Douar A.-M., Sarde C.-O., Kioschis P., Feil R., Moser H.,
+RA   Poustka A.-M., Mandel J.-L., Aubourg P.;
+RT   "Putative X-linked adrenoleukodystrophy gene shares unexpected
+RT   homology with ABC transporters.";
+RL   Nature 361:726-730(1993).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RA   Platzer M., Bauer D., Brenner V., Drescher B., Nyakatura G.,
+RA   Reichwald K., Sandoval N., Coy J., Kioschis P., Korn B.,
+RA   Poustka A.-M., Rosenthal A.;
+RL   Submitted (MAY-1996) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SUBUNITS, AND CHARACTERIZATION OF VARIANTS X-ALD HIS-389; GLN-401;
+RP   ARG-484 AND GLN-591.
+RX   MEDLINE=20020240; PubMed=10551832;
+RA   Liu L.X., Janvier K., Berteaux-Lecellier V., Cartier N., Benarous R.,
+RA   Aubourg P.;
+RT   "Homo- and heterodimerization of peroxisomal ATP-binding cassette
+RT   half-transporters.";
+RL   J. Biol. Chem. 274:32738-32743(1999).
+RN   [4]
+RP   FUNCTION, AND CHARACTERIZATION OF VARIANTS X-ALD SER-512 AND LEU-606.
+RX   MEDLINE=21145507; PubMed=11248239;
+RA   Roerig P., Mayerhofer P., Holzinger A., Gaertner J.;
+RT   "Characterization and functional analysis of the nucleotide binding
+RT   fold in human peroxisomal ATP binding cassette transporters.";
+RL   FEBS Lett. 492:66-72(2001).
+RN   [5]
+RP   REVIEW.
+RX   MEDLINE=93283453; PubMed=8507690;
+RA   Aubourg P., Mosser J., Douar A.-M., Sarde C.-O., Lopez J.,
+RA   Mandel J.-L.;
+RT   "Adrenoleukodystrophy gene: unexpected homology to a protein involved
+RT   in peroxisome biogenesis.";
+RL   Biochimie 75:293-302(1993).
+RN   [6]
+RP   REVIEW ON VARIANTS.
+RX   MEDLINE=97338663; PubMed=9195223;
+RA   Dodd A., Rowland S.A., Hawkes S.L.J., Kennedy M.A., Love D.R.;
+RT   "Mutations in the adrenoleukodystrophy gene.";
+RL   Hum. Mutat. 9:500-511(1997).
+RN   [7]
+RP   REVIEW ON VARIANTS.
+RX   MEDLINE=21614879; PubMed=11748843;
+RA   Kemp S., Pujol A., Waterham H.R., van Geel B.M., Boehm C.D.,
+RA   Raymond G.V., Cutting G.R., Wanders R.J.A., Moser H.W.;
+RT   "ABCD1 mutations and the X-linked adrenoleukodystrophy mutation
+RT   database: role in diagnosis and clinical correlations.";
+RL   Hum. Mutat. 18:499-515(2001).
+RN   [8]
+RP   VARIANT X-ALD LYS-291.
+RX   MEDLINE=94108454; PubMed=7904210;
+RA   Cartier N., Sarde C.-O., Douar A.-M., Mosser J., Mandel J.-L.,
+RA   Aubourg P.;
+RT   "Abnormal messenger RNA expression and a missense mutation in
+RT   patients with X-linked adrenoleukodystrophy.";
+RL   Hum. Mol. Genet. 2:1949-1951(1993).
+RN   [9]
+RP   VARIANTS X-ALD SER-148; ASP-174; ARG-266; GLN-401; TRP-418 AND
+RP   PHE-515.
+RX   MEDLINE=95152524; PubMed=7849723;
+RA   Fuchs S., Sarde C.-O., Wedemann H., Schwinger E., Mandel J.-L.,
+RA   Gal A.;
+RT   "Missense mutations are frequent in the gene for X-chromosomal
+RT   adrenoleukodystrophy (ALD).";
+RL   Hum. Mol. Genet. 3:1903-1905(1994).
+RN   [10]
+RP   VARIANTS X-ALD TRP-518; LEU-606; CYS-617 AND HIS-617.
+RX   MEDLINE=94314951; PubMed=8040304;
+RA   Fanen P., Guidoux S., Sarde C.-O., Mandel J.-L., Goossens M.,
+RA   Aubourg P.;
+RT   "Identification of mutations in the putative ATP-binding domain of
+RT   the adrenoleukodystrophy gene.";
+RL   J. Clin. Invest. 94:516-520(1994).
+RN   [11]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=95126139; PubMed=7825602;
+RA   Ligtenberg M.J.L., Kemp S., Sarde C.-O., van Geel B.M., Kleijer W.J.,
+RA   Barth P.G., Mandel J.-L., van Oost B.A., Bolhuis P.A.;
+RT   "Spectrum of mutations in the gene encoding the adrenoleukodystrophy
+RT   protein.";
+RL   Am. J. Hum. Genet. 56:44-50(1995).
+RN   [12]
+RP   VARIANTS X-ALD HIS-104; GLU-178; LEU-560 AND GLY-528 DEL.
+RX   MEDLINE=95233433; PubMed=7717396;
+RA   Braun A., Ambach H., Kammerer S., Rolinski B., Stoeckler S., Rabl W.,
+RA   Gaertner J., Zierz S., Roscher A.A.;
+RT   "Mutations in the gene for X-linked adrenoleukodystrophy in patients
+RT   with different clinical phenotypes.";
+RL   Am. J. Hum. Genet. 56:854-861(1995).
+RN   [13]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=96047143; PubMed=7581394;
+RA   Kok F., Neumann S., Sarde C.-O., Zheng S., Wu K.-H., Wei H.-M.,
+RA   Bergin J., Watkins P.A., Gould S., Sack G., Moser H., Mandel J.-L.,
+RA   Smith K.D.;
+RT   "Mutational analysis of patients with X-linked adrenoleukodystrophy.";
+RL   Hum. Mutat. 6:104-115(1995).
+RN   [14]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=96213748; PubMed=8651290;
+RA   Feigenbaum V., Lombard-Platet G., Guidoux S., Sarde C.-O.,
+RA   Mandel J.-L., Aubourg P.;
+RT   "Mutational and protein analysis of patients and heterozygous women
+RT   with X-linked adrenoleukodystrophy.";
+RL   Am. J. Hum. Genet. 58:1135-1144(1996).
+RN   [15]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=96163493; PubMed=8566952;
+RA   Krasemann E.W., Meier V., Korenke G.C., Hunneman D.H., Hanefeld F.;
+RT   "Identification of mutations in the ALD-gene of 20 families with
+RT   adrenoleukodystrophy/adrenomyeloneuropathy.";
+RL   Hum. Genet. 97:194-197(1996).
+RN   [16]
+RP   VARIANT X-ALD ARG-679.
+RX   MEDLINE=98112466; PubMed=9452087;
+RA   Korenke G.C., Krasemann E., Meier V., Beuche W., Hunneman D.H.,
+RA   Hanefeld F.;
+RT   "First missense mutation (W679R) in exon 10 of the
+RT   adrenoleukodystrophy gene in siblings with adrenomyeloneuropathy.";
+RL   Hum. Mutat. Suppl. 1:S204-S206(1998).
+RN   [17]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=99408241; PubMed=10480364;
+RA   Wichers M., Kohler W., Brennemann W., Boese V., Sokolowski P.,
+RA   Bidlingmaier F., Ludwig M.;
+RT   "X-linked adrenomyeloneuropathy associated with 14 novel ALD-gene
+RT   mutations: no correlation between type of mutation and age of onset.";
+RL   Hum. Genet. 105:116-119(1999).
+RN   [18]
+RP   VARIANTS X-ALD LEU-108 AND SER-143.
+RX   MEDLINE=99299442; PubMed=10369742;
+RA   Perusi C., Gomez-Lira M., Mottes M., Pignatti P.F., Bertini E.,
+RA   Cappa M., Vigliani M.C., Schiffer D., Rizzuto N., Salviati A.;
+RT   "Two novel missense mutations causing adrenoleukodystrophy in Italian
+RT   patients.";
+RL   Mol. Cell. Probes 13:179-182(1999).
+RN   [19]
+RP   VARIANTS X-ALD.
+RX   MEDLINE=20202141; PubMed=10737980;
+RA   Lachtermacher M.B., Seuanez H.N., Moser A.B., Moser H.W., Smith K.D.;
+RT   "Determination of 30 X-linked adrenoleukodystrophy mutations,
+RT   including 15 not previously described.";
+RL   Hum. Mutat. 15:348-353(2000).
+RN   [20]
+RP   VARIANTS X-ALD GLN-401; TRP-418; LEU-543 AND ARG-556.
+RX   MEDLINE=20438355; PubMed=10980539;
+RA   Lira M.G., Mottes M., Pignatti P.F., Medica I., Uziel G., Cappa M.,
+RA   Bertini E., Rizzuto N., Salviati A.;
+RT   "Detection of mutations in the ALD gene (ABCD1) in seven Italian
+RT   families: description of four novel mutations.";
+RL   Hum. Mutat. 16:271-271(2000).
+RN   [21]
+RP   VARIANTS X-ALD LEU-98; ASP-99; GLU-217; GLN-518; ASP-608; ILE-633 AND
+RP   PRO-660, AND VARIANT THR-13.
+RX   MEDLINE=21331689; PubMed=11438993;
+RA   Dvorakova L., Storkanova G., Unterrainer G., Hujova J., Kmoch S.,
+RA   Zeman J., Hrebicek M., Berger J.;
+RT   "Eight novel ABCD1 gene mutations and three polymorphisms in patients
+RT   with X-linked adrenoleukodystrophy: the first polymorphism causing an
+RT   amino acid exchange.";
+RL   Hum. Mutat. 18:52-60(2001).
+RN   [22]
+RP   VARIANT X-ALD VAL-GLY-GLN-300 INS.
+RX   MEDLINE=21668186; PubMed=11810273; DOI=10.1007/s00439-001-0632-z;
+RA   Guimaraes C.P., Lemos M., Menezes I., Coelho T., Sa-Miranda C.,
+RA   Azevedo J.E.;
+RT   "Characterisation of two mutations in the ABCD1 gene leading to low
+RT   levels of normal ALDP.";
+RL   Hum. Genet. 109:616-622(2001).
+RN   [23]
+RP   INVOLVEMENT IN CONTIGUOUS ABCD1/DXS1375E DELETION SYNDROME.
+RX   PubMed=11992258;
+RA   Corzo D., Gibson W., Johnson K., Mitchell G., LePage G., Cox G.F.,
+RA   Casey R., Zeiss C., Tyson H., Cutting G.R., Raymond G.V., Smith K.D.,
+RA   Watkins P.A., Moser A.B., Moser H.W., Steinberg S.J.;
+RT   "Contiguous deletion of the X-linked adrenoleukodystrophy gene (ABCD1)
+RT   and DXS1357E: a novel neonatal phenotype similar to peroxisomal
+RT   biogenesis disorders.";
+RL   Am. J. Hum. Genet. 70:1520-1531(2002).
+CC   -!- FUNCTION: Probable transporter. The nucleotide-binding fold acts
+CC       as an ATP-binding subunit with ATPase activity.
+CC   -!- SUBUNIT: Can form homo- and heterodimers with ABCD2/ALDR and
+CC       ABCD3/PMP70. Dimerization is necessary to form an active
+CC       transporter.
+CC   -!- SUBCELLULAR LOCATION: Integral membrane protein. Peroxisomal.
+CC   -!- DISEASE: Defects in ABCD1 are the cause of recessive X-linked
+CC       adrenoleukodystrophy (X-ALD) [MIM:300100]. X-ALD is a rare
+CC       peroxisomal metabolic disorder that occurs in boys and is
+CC       characterized by progressive multifocal demyelination of the
+CC       central nervous system and by adrenocortical insufficiency. It
+CC       produces mental deterioration, corticospinal tract dysfunction,
+CC       and cortical blindness. There is laboratory evidence of adrenal
+CC       cortical dysfunction. Different clinical manifestations exist
+CC       like: cerebral childhood ALD (CALD), adult cerebral ALD (ACALD),
+CC       adrenomyeloneuropathy (AMN) and "Addison disease only" (ADO)
+CC       phenotype.
+CC   -!- DISEASE: Microdeletions in ABCD1 are involved in the contiguous
+CC       ABCD1/DXS1375E deletion syndrome (CADDS) [MIM:300475]. Patients
+CC       manifest profound neonatal hypotonia, subsequent failure to
+CC       thrive, and cholestatic liver disease.
+CC   -!- SIMILARITY: Belongs to the ABC transporter family. ALD subfamily.
+CC   -!- DATABASE: NAME=X-ALD gene mutation database;
+CC       WWW="http://www.x-ald.nl/".
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; Z21876; CAA79922.1; -.
+DR   EMBL; Z31348; CAA83230.1; -.
+DR   EMBL; Z31006; CAA83230.1; JOINED.
+DR   EMBL; Z31007; CAA83230.1; JOINED.
+DR   EMBL; Z31008; CAA83230.1; JOINED.
+DR   EMBL; Z31009; CAA83230.1; JOINED.
+DR   EMBL; Z31010; CAA83230.1; JOINED.
+DR   EMBL; U52111; -; NOT_ANNOTATED_CDS.
+DR   PIR; G02500; G02500.
+DR   Genew; HGNC:61; ABCD1.
+DR   MIM; 300371; -.
+DR   MIM; 300100; -.
+DR   MIM; 300475; -.
+DR   GO; GO:0005779; C:integral to peroxisomal membrane; NAS.
+DR   GO; GO:0004009; F:ATP-binding cassette (ABC) transporter acti...; NAS.
+DR   GO; GO:0005215; F:transporter activity; NAS.
+DR   GO; GO:0015919; P:peroxisomal membrane transport; NAS.
+DR   GO; GO:0007031; P:peroxisome organization and biogenesis; NAS.
+DR   InterPro; IPR003593; AAA_ATPase.
+DR   InterPro; IPR003439; ABC_transporter.
+DR   InterPro; IPR005283; FA_transporter.
+DR   Pfam; PF00005; ABC_tran; 1.
+DR   Pfam; PF06472; Ald_N; 1.
+DR   ProDom; PD000006; ABC_transporter; 1.
+DR   TIGRFAMs; TIGR00954; 3a01203; 1.
+DR   PROSITE; PS50929; ABC_TM1F; 1.
+DR   PROSITE; PS00211; ABC_TRANSPORTER_1; 1.
+DR   PROSITE; PS50893; ABC_TRANSPORTER_2; 1.
+KW   ATP-binding; Glycoprotein; Transmembrane; Transport; Peroxisome;
+KW   Disease mutation; Polymorphism.
+FT   TRANSMEM     92    112       Potential.
+FT   TRANSMEM    131    151       Potential.
+FT   TRANSMEM    238    258       Potential.
+FT   TRANSMEM    333    353       Potential.
+FT   TRANSMEM    473    493       Potential.
+FT   NP_BIND     507    514       ATP (By similarity).
+FT   VARIANT      13     13       N -> T (very rare polymorphism; does not
+FT                                affect ALDP function).
+FT                                /FTId=VAR_013340.
+FT   VARIANT      90     90       E -> K (in X-ALD).
+FT                                /FTId=VAR_009349.
+FT   VARIANT      98     98       S -> L (in X-ALD; CALD type).
+FT                                /FTId=VAR_000024.
+FT   VARIANT      99     99       A -> D (in X-ALD; AMN-type).
+FT                                /FTId=VAR_013341.
+FT   VARIANT     103    103       S -> R (in X-ALD).
+FT                                /FTId=VAR_009350.
+FT   VARIANT     104    104       R -> C (in X-ALD).
+FT                                /FTId=VAR_000025.
+FT   VARIANT     104    104       R -> H (in X-ALD; ADO-type).
+FT                                /FTId=VAR_000026.
+FT   VARIANT     105    105       T -> I (in X-ALD; ADO-type).
+FT                                /FTId=VAR_000027.
+FT   VARIANT     105    105       T -> P (in X-ALD).
+FT                                /FTId=VAR_009351.
+FT   VARIANT     107    107       L -> P (in X-ALD; ALD/AMN/ADO-types and
+FT                                asymptomatic).
+FT                                /FTId=VAR_000028.
+FT   VARIANT     108    108       S -> L (in X-ALD).
+FT                                /FTId=VAR_009352.
+FT   VARIANT     108    108       S -> W (in X-ALD; CALD and AMN-types).
+FT                                /FTId=VAR_000029.
+FT   VARIANT     113    113       R -> C (in X-ALD).
+FT                                /FTId=VAR_009353.
+FT   VARIANT     113    113       R -> P (in X-ALD).
+FT                                /FTId=VAR_013342.
+FT   VARIANT     116    116       G -> R (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000030.
+FT   VARIANT     123    123       A -> V.
+FT                                /FTId=VAR_000031.
+FT   VARIANT     138    141       Missing (in X-ALD; ALD-type).
+FT                                /FTId=VAR_000032.
+FT   VARIANT     141    141       A -> T (in X-ALD).
+FT                                /FTId=VAR_000033.
+FT   VARIANT     143    143       P -> S (in X-ALD).
+FT                                /FTId=VAR_009354.
+FT   VARIANT     148    148       N -> S (in X-ALD; ADO-type).
+FT                                /FTId=VAR_000034.
+FT   VARIANT     149    149       S -> N (in X-ALD).
+FT                                /FTId=VAR_000035.
+FT   VARIANT     152    152       R -> C (in X-ALD; ADO-type).
+FT                                /FTId=VAR_000036.
+FT   VARIANT     152    152       R -> L (in X-ALD).
+FT                                /FTId=VAR_009355.
+FT   VARIANT     152    152       R -> P (in X-ALD).
+FT                                /FTId=VAR_000037.
+FT   VARIANT     152    152       R -> S (in X-ALD).
+FT                                /FTId=VAR_009356.
+FT   VARIANT     161    161       S -> P (in X-ALD).
+FT                                /FTId=VAR_009357.
+FT   VARIANT     163    163       R -> H (in X-ALD).
+FT                                /FTId=VAR_000038.
+FT   VARIANT     163    163       R -> P (in X-ALD).
+FT                                /FTId=VAR_009358.
+FT   VARIANT     174    174       Y -> C (in X-ALD).
+FT                                /FTId=VAR_009359.
+FT   VARIANT     174    174       Y -> D (in X-ALD; ALD-type).
+FT                                /FTId=VAR_000039.
+FT   VARIANT     174    174       Y -> S (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000040.
+FT   VARIANT     178    178       Q -> E (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000041.
+FT   VARIANT     181    181       Y -> C (in X-ALD; ALMD-type).
+FT                                /FTId=VAR_000042.
+FT   VARIANT     182    182       R -> P (in X-ALD).
+FT                                /FTId=VAR_000043.
+FT   VARIANT     189    189       R -> W (in X-ALD).
+FT                                /FTId=VAR_009360.
+FT   VARIANT     190    190       L -> P (in X-ALD).
+FT                                /FTId=VAR_009361.
+FT   VARIANT     194    194       D -> H (in X-ALD).
+FT                                /FTId=VAR_000044.
+FT   VARIANT     198    198       T -> K (in X-ALD).
+FT                                /FTId=VAR_009362.
+FT   VARIANT     200    200       D -> N (in X-ALD).
+FT                                /FTId=VAR_009363.
+FT   VARIANT     200    200       D -> V (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000045.
+FT   VARIANT     207    207       S -> SAAS (in X-ALD).
+FT                                /FTId=VAR_013343.
+FT   VARIANT     211    211       L -> P (in X-ALD).
+FT                                /FTId=VAR_000046.
+FT   VARIANT     213    213       S -> C (in X-ALD).
+FT                                /FTId=VAR_009364.
+FT   VARIANT     214    214       N -> D (in X-ALD).
+FT                                /FTId=VAR_009365.
+FT   VARIANT     217    217       K -> E (in X-ALD).
+FT                                /FTId=VAR_013344.
+FT   VARIANT     218    218       P -> T (in X-ALD).
+FT                                /FTId=VAR_009366.
+FT   VARIANT     220    220       L -> P (in X-ALD).
+FT                                /FTId=VAR_000047.
+FT   VARIANT     221    221       D -> G (in X-ALD; CALD and AMN-types).
+FT                                /FTId=VAR_000048.
+FT   VARIANT     224    224       V -> E (in X-ALD).
+FT                                /FTId=VAR_013345.
+FT   VARIANT     229    229       L -> P (in X-ALD).
+FT                                /FTId=VAR_009367.
+FT   VARIANT     254    254       T -> M (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000049.
+FT   VARIANT     254    254       T -> P (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000050.
+FT   VARIANT     263    263       P -> L (in X-ALD; CALD, AMN and AD-
+FT                                typeS).
+FT                                /FTId=VAR_000051.
+FT   VARIANT     266    266       G -> R (in X-ALD).
+FT                                /FTId=VAR_000052.
+FT   VARIANT     271    271       E -> K (in X-ALD).
+FT                                /FTId=VAR_009368.
+FT   VARIANT     274    274       R -> W (in X-ALD).
+FT                                /FTId=VAR_013346.
+FT   VARIANT     276    276       K -> E (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000053.
+FT   VARIANT     277    277       G -> R (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000054.
+FT   VARIANT     277    277       G -> GN (in X-ALD; ADO-type).
+FT                                /FTId=VAR_000055.
+FT   VARIANT     277    277       G -> W (in X-ALD).
+FT                                /FTId=VAR_000056.
+FT   VARIANT     280    280       R -> C (in X-ALD).
+FT                                /FTId=VAR_013347.
+FT   VARIANT     285    285       R -> P (in X-ALD).
+FT                                /FTId=VAR_009369.
+FT   VARIANT     291    291       E -> D (in X-ALD; ACALD and CALD-types).
+FT                                /FTId=VAR_000057.
+FT   VARIANT     291    291       E -> K (in X-ALD).
+FT                                /FTId=VAR_000058.
+FT   VARIANT     291    291       Missing (in X-ALD; ALD-type).
+FT                                /FTId=VAR_000059.
+FT   VARIANT     294    294       A -> T (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000060.
+FT   VARIANT     296    296       Y -> C (in X-ALD).
+FT                                /FTId=VAR_009370.
+FT   VARIANT     298    298       G -> D (in X-ALD).
+FT                                /FTId=VAR_009371.
+FT   VARIANT     300    300       E -> EVGQ (in X-ALD).
+FT                                /FTId=VAR_013348.
+FT   VARIANT     302    302       E -> K (in X-ALD).
+FT                                /FTId=VAR_009372.
+FT   VARIANT     322    322       L -> P (in X-ALD).
+FT                                /FTId=VAR_009373.
+FT   VARIANT     336    336       K -> M (in X-ALD).
+FT                                /FTId=VAR_009374.
+FT   VARIANT     339    339       W -> R (in X-ALD).
+FT                                /FTId=VAR_013349.
+FT   VARIANT     342    342       S -> P (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000061.
+FT   VARIANT     343    343       G -> D (in X-ALD).
+FT                                /FTId=VAR_013350.
+FT   VARIANT     389    389       R -> G (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000062.
+FT   VARIANT     389    389       R -> H (in X-ALD; does not affect protein
+FT                                stability, homo- and heterodimerization
+FT                                with ALDR and PMP70).
+FT                                /FTId=VAR_000063.
+FT   VARIANT     401    401       R -> Q (in X-ALD; ALD and AMN-types; does
+FT                                not affect protein stability, homo- and
+FT                                heterodimerization with ALDR and PMP70).
+FT                                /FTId=VAR_000064.
+FT   VARIANT     401    401       R -> W (in X-ALD).
+FT                                /FTId=VAR_009375.
+FT   VARIANT     418    418       R -> W (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000065.
+FT   VARIANT     427    427       Missing (in X-ALD).
+FT                                /FTId=VAR_013351.
+FT   VARIANT     484    484       P -> R (in X-ALD; CALD, AMN and ADO-
+FT                                types; significantly decreases
+FT                                homodimerization and abolishes
+FT                                heterodimerization with ALDR and PMP70).
+FT                                /FTId=VAR_000066.
+FT   VARIANT     507    507       G -> V (in X-ALD; CALD-types).
+FT                                /FTId=VAR_000067.
+FT   VARIANT     512    512       G -> S (in X-ALD; CALD and AS-types;
+FT                                reduced ATPase activity).
+FT                                /FTId=VAR_000068.
+FT   VARIANT     515    515       S -> F (in X-ALD).
+FT                                /FTId=VAR_000069.
+FT   VARIANT     518    518       R -> Q (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000070.
+FT   VARIANT     518    518       R -> W (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000071.
+FT   VARIANT     522    522       G -> W (in X-ALD; AD-type).
+FT                                /FTId=VAR_000072.
+FT   VARIANT     528    528       Missing (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000073.
+FT   VARIANT     529    529       G -> S (in X-ALD).
+FT                                /FTId=VAR_009376.
+FT   VARIANT     534    534       P -> L (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000074.
+FT   VARIANT     540    540       F -> S (in X-ALD).
+FT                                /FTId=VAR_009377.
+FT   VARIANT     543    543       P -> L (in X-ALD).
+FT                                /FTId=VAR_009378.
+FT   VARIANT     544    544       Q -> R (in X-ALD).
+FT                                /FTId=VAR_009379.
+FT   VARIANT     552    552       S -> P (in X-ALD).
+FT                                /FTId=VAR_009380.
+FT   VARIANT     554    554       R -> H (in X-ALD).
+FT                                /FTId=VAR_009381.
+FT   VARIANT     556    556       Q -> R (in X-ALD; ACALD type).
+FT                                /FTId=VAR_013352.
+FT   VARIANT     560    560       P -> L (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000075.
+FT   VARIANT     560    560       P -> R (in X-ALD; AMN and ALMD-types).
+FT                                /FTId=VAR_000076.
+FT   VARIANT     560    560       P -> S (in X-ALD).
+FT                                /FTId=VAR_013353.
+FT   VARIANT     566    566       M -> K (in X-ALD).
+FT                                /FTId=VAR_000077.
+FT   VARIANT     591    591       R -> P (in X-ALD).
+FT                                /FTId=VAR_013354.
+FT   VARIANT     591    591       R -> Q (in X-ALD; AMN-type; significantly
+FT                                decreases homodimerization and abolishes
+FT                                heterodimerization with ALDR and PMP70).
+FT                                /FTId=VAR_000078.
+FT   VARIANT     591    591       R -> W (in X-ALD).
+FT                                /FTId=VAR_009382.
+FT   VARIANT     606    606       S -> L (in X-ALD; decreased ATP-binding
+FT                                affinity).
+FT                                /FTId=VAR_000079.
+FT   VARIANT     606    606       S -> P (in X-ALD; CALD, AMN and ALMD-
+FT                                types).
+FT                                /FTId=VAR_000080.
+FT   VARIANT     608    608       G -> D (in X-ALD; CALD-type).
+FT                                /FTId=VAR_013355.
+FT   VARIANT     609    609       E -> G (in X-ALD).
+FT                                /FTId=VAR_000081.
+FT   VARIANT     609    609       E -> K (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000082.
+FT   VARIANT     616    616       A -> V (in X-ALD).
+FT                                /FTId=VAR_009383.
+FT   VARIANT     617    617       R -> C (in X-ALD; ALD-type and
+FT                                asymptomatic).
+FT                                /FTId=VAR_000083.
+FT   VARIANT     617    617       R -> G (in X-ALD; ADO and AMN-types with
+FT                                cerebral involvement).
+FT                                /FTId=VAR_000084.
+FT   VARIANT     617    617       R -> H (in X-ALD).
+FT                                /FTId=VAR_000085.
+FT   VARIANT     626    626       A -> D (in X-ALD).
+FT                                /FTId=VAR_013356.
+FT   VARIANT     626    626       A -> T (in X-ALD; CALD and AMN-types).
+FT                                /FTId=VAR_000086.
+FT   VARIANT     629    629       D -> H (in X-ALD).
+FT                                /FTId=VAR_000087.
+FT   VARIANT     630    630       E -> G (in X-ALD).
+FT                                /FTId=VAR_009384.
+FT   VARIANT     631    631       C -> Y (in X-ALD).
+FT                                /FTId=VAR_009385.
+FT   VARIANT     632    632       T -> I (in X-ALD).
+FT                                /FTId=VAR_013357.
+FT   VARIANT     633    633       S -> I (in X-ALD; asymptomatic).
+FT                                /FTId=VAR_013358.
+FT   VARIANT     633    633       S -> R (in X-ALD).
+FT                                /FTId=VAR_009386.
+FT   VARIANT     635    635       V -> M (in X-ALD).
+FT                                /FTId=VAR_013359.
+FT   VARIANT     636    636       S -> I (in X-ALD).
+FT                                /FTId=VAR_009387.
+FT   VARIANT     638    638       D -> Y (in X-ALD).
+FT                                /FTId=VAR_009388.
+FT   VARIANT     646    646       A -> P (in X-ALD).
+FT                                /FTId=VAR_009389.
+FT   VARIANT     654    654       L -> P (in X-ALD).
+FT                                /FTId=VAR_009390.
+FT   VARIANT     657    657       Missing (in X-ALD; CALD-type).
+FT                                /FTId=VAR_000088.
+FT   VARIANT     660    660       R -> P (in X-ALD; CALD-type).
+FT                                /FTId=VAR_013360.
+FT   VARIANT     660    660       R -> W (in X-ALD; CALD, ALMD and AS-
+FT                                types).
+FT                                /FTId=VAR_000089.
+FT   VARIANT     667    667       H -> D (in X-ALD).
+FT                                /FTId=VAR_009391.
+FT   VARIANT     668    668       T -> I (in X-ALD).
+FT                                /FTId=VAR_009392.
+FT   VARIANT     679    679       W -> R (in X-ALD; AMN-type).
+FT                                /FTId=VAR_000090.
+FT   VARIANT     693    693       T -> M (in X-ALD).
+FT                                /FTId=VAR_009393.
+SQ   SEQUENCE   745 AA;  82908 MW;  13A8EFDE9EB1E7FA CRC64;
+     MPVLSRPRPW RGNTLKRTAV LLALAAYGAH KVYPLVRQCL APARGLQAPA GEPTQEASGV
+     AAAKAGMNRV FLQRLLWLLR LLFPRVLCRE TGLLALHSAA LVSRTFLSVY VARLDGRLAR
+     CIARKDPRAF GWQLLQWLLI ALPATFVNSA IRYLEGQLAL SFRSRLVAHA YRLYFSQQTY
+     YRVSNMDGRL RNPDQSLTED VVAFAASVAH LYSNLTKPLL DVAVTSYTLL RAARSRGAGT
+     AWPSAIAGLV VFLTANVLRA FSPKFGELVA EEARRKGELR YMHSRVVANS EEIAFYGGHE
+     VELALLQRSY QDLASQINLI LLERLWYVML EQFLMKYVWS ASGLLMVAVP IITATGYSES
+     DAEAVKKAAL EKKEEELVSE RTEAFTIARN LLTAAADAIE RIMSSYKEVT ELAGYTARVH
+     EMFQVFEDVQ RCHFKRPREL EDAQAGSGTI GRSGVRVEGP LKIRGQVVDV EQGIICENIP
+     IVTPSGEVVV ASLNIRVEEG MHLLITGPNG CGKSSLFRIL GGLWPTYGGV LYKPPPQRMF
+     YIPQRPYMSV GSLRDQVIYP DSVEDMQRKG YSEQDLEAIL DVVHLHHILQ REGGWEAMCD
+     WKDVLSGGEK QRIGMARMFY HRPKYALLDE CTSAVSIDVE GKIFQAAKDA GIALLSITHR
+     PSLWKYHTHL LQFDGEGGWK FEKLDSAARL SLTEEKQRLE QQLAGIPKMQ RRLQELCQIL
+     GEAVAPAHVP APSPQGPGGL QGAST
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/P35527.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/P35527.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/P35527.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,288 @@
+LOCUS       P35527                   623 aa            linear   PRI 19-SEP-2006
+DEFINITION  Keratin, type I cytoskeletal 9 (Cytokeratin-9) (CK-9) (Keratin-9)
+            (K9).
+ACCESSION   P35527
+VERSION     P35527  GI:81175178
+DBSOURCE    swissprot: locus K1C9_HUMAN, accession P35527;
+            class: standard.
+            extra accessions:O00109,Q14665
+            created: Jun 1, 1994.
+            sequence updated: Nov 8, 2005.
+            annotation updated: Sep 19, 2006.
+            xrefs: Z29074.1, CAA82315.1, S69510.1, AAC60619.1, X75015.1,
+            CAA52924.1, AB001594.1, BAA19418.1, I37984
+            xrefs (non-sequence databases): HSSP:P08670, IntAct:P35527,
+            Ensembl:ENSG00000171403, KEGG:hsa:3857, HGNC:6447, MIM:144200,
+            MIM:607606, ArrayExpress:P35527, GO:0005200, GO:0008544,
+            InterPro:IPR011000, InterPro:IPR001664, InterPro:IPR002957,
+            Pfam:PF00038, PRINTS:PR01248, PROSITE:PS00226
+KEYWORDS    Coiled coil; Direct protein sequencing; Disease mutation;
+            Intermediate filament; Keratin.
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (residues 1 to 623)
+  AUTHORS   Langbein,L., Heid,H.W., Moll,I. and Franke,W.W.
+  TITLE     Molecular characterization of the body site-specific human
+            epidermal cytokeratin 9: cDNA cloning, amino acid sequence, and
+            tissue specificity of gene expression
+  JOURNAL   Differentiation 55 (1), 57-71 (1993)
+   PUBMED   7507869
+  REMARK    NUCLEOTIDE SEQUENCE [MRNA], AND PARTIAL PROTEIN SEQUENCE.
+            TISSUE=Foot sole tissue
+            Erratum:[Differentiation. 1994 Jan;55(2):164. PMID: 7511549]
+REFERENCE   2  (residues 1 to 623)
+  AUTHORS   Reis,A., Hennies,H.-C., Langbein,L., Digweed,M., Mischke,D.,
+            Dreschler,M., Schroek,E., Royer-Pokora,B., Franke,W.W., Sperling,K.
+            and Kuester,W.
+  TITLE     Keratin 9 gene mutations in epidermolytic palmoplantar keratoderma
+            (EPPK)
+  JOURNAL   Nat. Genet. 6 (2), 174-179 (1994)
+   PUBMED   7512862
+  REMARK    NUCLEOTIDE SEQUENCE [GENOMIC DNA], AND VARIANTS EPPK LYS-161;
+            GLN-163 AND TRP-163.
+REFERENCE   3  (residues 1 to 623)
+  AUTHORS   Bienvenut,W.V.
+  JOURNAL   Unpublished
+  REMARK    PROTEIN SEQUENCE OF 14-29, AND MASS SPECTROMETRY.
+            TISSUE=Cervix carcinoma
+REFERENCE   4  (residues 1 to 623)
+  AUTHORS   Kobayashi,S., Tanaka,T., Matsuyoshi,N. and Imamura,S.
+  TITLE     Keratin 9 point mutation in the pedigree of epidermolytic
+            hereditary palmoplantar keratoderma perturbs keratin intermediate
+            filament network formation
+  JOURNAL   FEBS Lett. 386 (2-3), 149-155 (1996)
+   PUBMED   8647270
+  REMARK    NUCLEOTIDE SEQUENCE [MRNA] OF 147-372, AND VARIANT EPPK GLN-163.
+REFERENCE   5  (residues 1 to 623)
+  AUTHORS   Rosen,E.M., Meromsky,L., Romero,R., Setter,E. and Goldberg,I.
+  TITLE     Human placenta contains an epithelial scatter protein
+  JOURNAL   Biochem. Biophys. Res. Commun. 168 (3), 1082-1088 (1990)
+   PUBMED   2140676
+  REMARK    PROTEIN SEQUENCE OF 450-466.
+REFERENCE   6  (residues 1 to 623)
+  AUTHORS   Hennies,H.C., Zehender,D., Kunze,J., Kuster,W. and Reis,A.
+  TITLE     Keratin 9 gene mutational heterogeneity in patients with
+            epidermolytic palmoplantar keratoderma
+  JOURNAL   Hum. Genet. 93 (6), 649-654 (1994)
+   PUBMED   7516304
+  REMARK    VARIANTS EPPK VAL-157 AND PRO-172.
+REFERENCE   7  (residues 1 to 623)
+  AUTHORS   Bonifas,J.M., Matsumura,K., Chen,M.A., Berth-Jones,J.,
+            Hutchison,P.E., Zloczower,M., Fritsch,P.O. and Epstein,E.H. Jr.
+  TITLE     Mutations of keratin 9 in two families with palmoplantar
+            epidermolytic hyperkeratosis
+  JOURNAL   J. Invest. Dermatol. 103 (4), 474-477 (1994)
+   PUBMED   7523529
+  REMARK    VARIANT EPPK SER-161.
+REFERENCE   8  (residues 1 to 623)
+  AUTHORS   Torchard,D., Blanchet-Bardon,C., Serova,O., Langbein,L., Narod,S.,
+            Janin,N., Goguel,A.F., Bernheim,A., Franke,W.W., Lenoir,G.M. and
+            Feunteun,J.
+  TITLE     Epidermolytic palmoplantar keratoderma cosegregates with a keratin
+            9 mutation in a pedigree with breast and ovarian cancer
+  JOURNAL   Nat. Genet. 6 (1), 106-110 (1994)
+   PUBMED   7511021
+  REMARK    VARIANT EPPK TYR-161.
+REFERENCE   9  (residues 1 to 623)
+  AUTHORS   Rothnagel,J.A., Wojcik,S., Liefer,K.M., Dominey,A.M., Huber,M.,
+            Hohl,D. and Roop,D.R.
+  TITLE     Mutations in the 1A domain of keratin 9 in patients with
+            epidermolytic palmoplantar keratoderma
+  JOURNAL   J. Invest. Dermatol. 104 (3), 430-433 (1995)
+   PUBMED   7532199
+  REMARK    VARIANTS EPPK TRP-163 AND SER-168.
+REFERENCE   10 (residues 1 to 623)
+  AUTHORS   Endo,H., Hatamochi,A. and Shinkai,H.
+  TITLE     A novel mutation of a leucine residue in coil 1A of keratin 9 in
+            epidermolytic palmoplantar keratoderma
+  JOURNAL   J. Invest. Dermatol. 109 (1), 113-115 (1997)
+   PUBMED   9204965
+  REMARK    VARIANT EPPK VAL-160.
+REFERENCE   11 (residues 1 to 623)
+  AUTHORS   Covello,S.P., Irvine,A.D., McKenna,K.E., Munro,C.S., Nevin,N.C.,
+            Smith,F.J., Uitto,J. and McLean,W.H.
+  TITLE     Mutations in keratin K9 in kindreds with epidermolytic palmoplantar
+            keratoderma and epidemiology in Northern Ireland
+  JOURNAL   J. Invest. Dermatol. 111 (6), 1207-1209 (1998)
+   PUBMED   9856842
+  REMARK    VARIANTS EPPK THR-157; VAL-157 AND GLN-163.
+COMMENT     On Nov 8, 2005 this sequence version replaced gi:547748.
+            [FUNCTION] May serve an important special function either in the
+            mature palmar and plantar skin tissue or in the morphogenic program
+            of the formation of these tissues.
+            [SUBUNIT] Heterotetramer of two type I and two type II keratins.
+            [TISSUE SPECIFICITY] Expressed in the terminally differentiated
+            epidermis of palms and soles.
+            [DISEASE] Defects in KRT9 are a cause of epidermolytic palmoplantar
+            keratoderma (EPPK) [MIM:144200]; also abbreviated EHPPK. EPPK is an
+            autosomal dominant disease characterized by diffuse thickening of
+            the epidermis on the entire surface of palms and soles sharply
+            bordered with erythematous margins.
+            [MISCELLANEOUS] There are two types of cytoskeletal and
+            microfibrillar keratin, I (acidic) and II (neutral to basic) (40-55
+            and 56-70 kDa, respectively).
+            [SIMILARITY] Belongs to the intermediate filament family.
+            [CAUTION] Was originally (Ref.4) thought to be a 60 kDa chain of
+            placental scatter protein.
+            [WEB RESOURCE] NAME=Human Intermediate Filament Mutation Database;
+            URL='http://www.interfil.org'.
+            [WEB RESOURCE] NAME=GeneReviews;
+            URL='http://www.genetests.org/query?gene=KRT9'.
+FEATURES             Location/Qualifiers
+     source          1..623
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+     gene            1..623
+                     /gene="KRT9"
+     Protein         1..623
+                     /gene="KRT9"
+                     /product="Keratin, type I cytoskeletal 9"
+     Region          1..623
+                     /gene="KRT9"
+                     /region_name="Mature chain"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Keratin, type I cytoskeletal 9.
+                     /FTId=PRO_0000063640."
+     Region          1..152
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Head."
+     Region          12..13
+                     /gene="KRT9"
+                     /region_name="Conflict"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="SR -> T (in Ref. 1; AAC60619 and 2)."
+     Region          15..26
+                     /gene="KRT9"
+                     /region_name="Compositionally biased region"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Poly-Gly."
+     Region          152..463
+                     /gene="KRT9"
+                     /region_name="Filament"
+                     /note="Intermediate filament protein; pfam00038"
+                     /db_xref="CDD:40140"
+     Region          153..461
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Rod."
+     Region          153..188
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Coil 1A."
+     Region          157
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="M -> T (in EPPK). /FTId=VAR_010499."
+     Region          157
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="M -> V (in EPPK). /FTId=VAR_010500."
+     Region          160
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="L -> V (in EPPK). /FTId=VAR_010501."
+     Region          161
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="N -> K (in EPPK). /FTId=VAR_003822."
+     Region          161
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="N -> S (in EPPK). /FTId=VAR_010502."
+     Region          161
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="N -> Y (in EPPK). /FTId=VAR_010503."
+     Region          163
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="R -> Q (in EPPK). /FTId=VAR_003823."
+     Region          163
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="R -> W (in EPPK). /FTId=VAR_003824."
+     Region          168
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="L -> S (in EPPK). /FTId=VAR_003825."
+     Region          172
+                     /gene="KRT9"
+                     /region_name="Variant"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Q -> P (in EPPK). /FTId=VAR_010504."
+     Region          189..207
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Linker 1."
+     Region          208..299
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Coil 1B."
+     Region          300..322
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Linker 12."
+     Region          323..461
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Coil 2."
+     Region          462..623
+                     /gene="KRT9"
+                     /region_name="Region of interest in the sequence"
+                     /experiment="experimental evidence, no additional details
+                     recorded"
+                     /note="Tail."
+ORIGIN      
+        1 mscrqfsssy lsrsgggggg glgsggsirs sysrfsssgg rggggrfsss sgygggssrv
+       61 cgrggggsfg ysygggsggg fsasslgggf gggsrgfgga sgggysssgg fgggfgggsg
+      121 ggfgggygsg fgglggfggg agggdggilt anekstmqel nsrlasyldk vqaleeannd
+      181 lenkiqdwyd kkgpaaiqkn yspyyntidd lkdqivdltv gnnktlldid ntrmtlddfr
+      241 ikfemeqnlr qgvdadingl rqvldnltme ksdlemqyet lqeelmalkk nhkeemsqlt
+      301 gqnsgdvnve invapgkdlt ktlndmrqey eqliaknrkd ienqyetqit qiehevsssg
+      361 qevqssakev tqlrhgvqel eielqsqlsk kaaleksled tknrycgqlq miqeqisnle
+      421 aqitdvrqei ecqnqeysll lsikmrleke ietyhnlleg gqedfessga gkiglggrgg
+      481 sggsygrgsr ggsggsyggg gsgggygggs gsrggsggsy gggsgsgggs gggygggsgg
+      541 ghsggsgggh sggsggnygg gsgsgggsgg gygggsgsrg gsggshgggs gfggesggsy
+      601 gggeeasgsg ggygggsgks shs
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/PAM250
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/PAM250	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/PAM250	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+#
+# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
+#
+# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
+#
+# Expected score = -0.844, Entropy = 0.354 bits
+#
+# Lowest score = -8, Highest score = 17
+#
+   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+A  2 -2  0  0 -2  0  0  1 -1 -1 -2 -1 -1 -3  1  1  1 -6 -3  0  0  0  0 -8
+R -2  6  0 -1 -4  1 -1 -3  2 -2 -3  3  0 -4  0  0 -1  2 -4 -2 -1  0 -1 -8
+N  0  0  2  2 -4  1  1  0  2 -2 -3  1 -2 -3  0  1  0 -4 -2 -2  2  1  0 -8
+D  0 -1  2  4 -5  2  3  1  1 -2 -4  0 -3 -6 -1  0  0 -7 -4 -2  3  3 -1 -8
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3  0 -2 -8  0 -2 -4 -5 -3 -8
+Q  0  1  1  2 -5  4  2 -1  3 -2 -2  1 -1 -5  0 -1 -1 -5 -4 -2  1  3 -1 -8
+E  0 -1  1  3 -5  2  4  0  1 -2 -3  0 -2 -5 -1  0  0 -7 -4 -2  3  3 -1 -8
+G  1 -3  0  1 -3 -1  0  5 -2 -3 -4 -2 -3 -5  0  1  0 -7 -5 -1  0  0 -1 -8
+H -1  2  2  1 -3  3  1 -2  6 -2 -2  0 -2 -2  0 -1 -1 -3  0 -2  1  2 -1 -8
+I -1 -2 -2 -2 -2 -2 -2 -3 -2  5  2 -2  2  1 -2 -1  0 -5 -1  4 -2 -2 -1 -8
+L -2 -3 -3 -4 -6 -2 -3 -4 -2  2  6 -3  4  2 -3 -3 -2 -2 -1  2 -3 -3 -1 -8
+K -1  3  1  0 -5  1  0 -2  0 -2 -3  5  0 -5 -1  0  0 -3 -4 -2  1  0 -1 -8
+M -1  0 -2 -3 -5 -1 -2 -3 -2  2  4  0  6  0 -2 -2 -1 -4 -2  2 -2 -2 -1 -8
+F -3 -4 -3 -6 -4 -5 -5 -5 -2  1  2 -5  0  9 -5 -3 -3  0  7 -1 -4 -5 -2 -8
+P  1  0  0 -1 -3  0 -1  0  0 -2 -3 -1 -2 -5  6  1  0 -6 -5 -1 -1  0 -1 -8
+S  1  0  1  0  0 -1  0  1 -1 -1 -3  0 -2 -3  1  2  1 -2 -3 -1  0  0  0 -8
+T  1 -1  0  0 -2 -1  0  0 -1  0 -2  0 -1 -3  0  1  3 -5 -3  0  0 -1  0 -8
+W -6  2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4  0 -6 -2 -5 17  0 -6 -5 -6 -4 -8
+Y -3 -4 -2 -4  0 -4 -4 -5  0 -1 -1 -4 -2  7 -5 -3 -3  0 10 -2 -3 -4 -2 -8
+V  0 -2 -2 -2 -2 -2 -2 -1 -2  4  2 -2  2 -1 -1 -1  0 -6 -2  4 -2 -2 -1 -8
+B  0 -1  2  3 -4  1  3  0  1 -2 -3  1 -2 -4 -1  0  0 -5 -3 -2  3  2 -1 -8
+Z  0  0  1  3 -5  3  3  0  2 -2 -3  0 -2 -5  0  0 -1 -6 -4 -2  2  3 -1 -8
+X  0 -1  0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1  0  0 -4 -2 -1 -1 -1 -1 -8
+* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8  1

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Primate_mtDNA.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Primate_mtDNA.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Primate_mtDNA.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,433 @@
+#NEXUS 
+
+
+BEGIN DATA;
+DIMENSIONS  NTAX=12 NCHAR=898;
+
+[!A portion of primate mitochondrial DNA.   Site names correspond to Anderson (1981) numbering scheme.
+
+This portion includes parts of two protein-coding genes, as well as 3 tRNA's:
+
+tRNA His:  459-528
+tRNA Ser (AGY):  529-588
+tRNA Leu (CUN):  589-659
+
+Data from:
+
+Hayasaka, K., T. Gojobori, and S. Horai. 1988. Molecular phylogeny and evolution of primate mitochondrial DNA. Mol. Biol. Evol., 5:626-644.
+
+See pp. 335-339 of the MacClade book.]
+FORMAT DATATYPE=DNA  MISSING=? GAP=-  MATCHCHAR=.  INTERLEAVE ;OPTIONS  IGNORE=UNINFORM ;
+
+CHARLABELS
+	[1]	A_11680	[2]	A_11681	[3]	A_11682	[4]	A_11683	[5]	A_11684
+	[6]	A_11685	[7]	A_11686	[8]	A_11687	[9]	A_11688	[10]	A_11689
+	[11]	A_11690	[12]	A_11691	[13]	A_11692	[14]	A_11693	[15]	A_11694
+	[16]	A_11695	[17]	A_11696	[18]	A_11697	[19]	A_11698	[20]	A_11699
+	[21]	A_11700	[22]	A_11701	[23]	A_11702	[24]	A_11703	[25]	A_11704
+	[26]	A_11705	[27]	A_11706	[28]	A_11707	[29]	A_11708	[30]	A_11709
+	[31]	A_11710	[32]	A_11711	[33]	A_11712	[34]	A_11713	[35]	A_11714
+	[36]	A_11715	[37]	A_11716	[38]	A_11717	[39]	A_11718	[40]	A_11719
+	[41]	A_11720	[42]	A_11721	[43]	A_11722	[44]	A_11723	[45]	A_11724
+	[46]	A_11725	[47]	A_11726	[48]	A_11727	[49]	A_11728	[50]	A_11729
+	[51]	A_11730	[52]	A_11731	[53]	A_11732	[54]	A_11733	[55]	A_11734
+	[56]	A_11735	[57]	A_11736	[58]	A_11737	[59]	A_11738	[60]	A_11739
+	[61]	A_11740	[62]	A_11741	[63]	A_11742	[64]	A_11743	[65]	A_11744
+	[66]	A_11745	[67]	A_11746	[68]	A_11747	[69]	A_11748	[70]	A_11749
+	[71]	A_11750	[72]	A_11751	[73]	A_11752	[74]	A_11753	[75]	A_11754
+	[76]	A_11755	[77]	A_11756	[78]	A_11757	[79]	A_11758	[80]	A_11759
+	[81]	A_11760	[82]	A_11761	[83]	A_11762	[84]	A_11763	[85]	A_11764
+	[86]	A_11765	[87]	A_11766	[88]	A_11767	[89]	A_11768	[90]	A_11769
+	[91]	A_11770	[92]	A_11771	[93]	A_11772	[94]	A_11773	[95]	A_11774
+	[96]	A_11775	[97]	A_11776	[98]	A_11777	[99]	A_11778	[100]	A_11779
+	[101]	A_11780	[102]	A_11781	[103]	A_11782	[104]	A_11783	[105]	A_11784
+	[106]	A_11785	[107]	A_11786	[108]	A_11787	[109]	A_11788	[110]	A_11789
+	[111]	A_11790	[112]	A_11791	[113]	A_11792	[114]	A_11793	[115]	A_11794
+	[116]	A_11795	[117]	A_11796	[118]	A_11797	[119]	A_11798	[120]	A_11799
+	[121]	A_11800	[122]	A_11801	[123]	A_11802	[124]	A_11803	[125]	A_11804
+	[126]	A_11805	[127]	A_11806	[128]	A_11807	[129]	A_11808	[130]	A_11809
+	[131]	A_11810	[132]	A_11811	[133]	A_11812	[134]	A_11813	[135]	A_11814
+	[136]	A_11815	[137]	A_11816	[138]	A_11817	[139]	A_11818	[140]	A_11819
+	[141]	A_11820	[142]	A_11821	[143]	A_11822	[144]	A_11823	[145]	A_11824
+	[146]	A_11825	[147]	A_11826	[148]	A_11827	[149]	A_11828	[150]	A_11829
+	[151]	A_11830	[152]	A_11831	[153]	A_11832	[154]	A_11833	[155]	A_11834
+	[156]	A_11835	[157]	A_11836	[158]	A_11837	[159]	A_11838	[160]	A_11839
+	[161]	A_11840	[162]	A_11841	[163]	A_11842	[164]	A_11843	[165]	A_11844
+	[166]	A_11845	[167]	A_11846	[168]	A_11847	[169]	A_11848	[170]	A_11849
+	[171]	A_11850	[172]	A_11851	[173]	A_11852	[174]	A_11853	[175]	A_11854
+	[176]	A_11855	[177]	A_11856	[178]	A_11857	[179]	A_11858	[180]	A_11859
+	[181]	A_11860	[182]	A_11861	[183]	A_11862	[184]	A_11863	[185]	A_11864
+	[186]	A_11865	[187]	A_11866	[188]	A_11867	[189]	A_11868	[190]	A_11869
+	[191]	A_11870	[192]	A_11871	[193]	A_11872	[194]	A_11873	[195]	A_11874
+	[196]	A_11875	[197]	A_11876	[198]	A_11877	[199]	A_11878	[200]	A_11879
+	[201]	A_11880	[202]	A_11881	[203]	A_11882	[204]	A_11883	[205]	A_11884
+	[206]	A_11885	[207]	A_11886	[208]	A_11887	[209]	A_11888	[210]	A_11889
+	[211]	A_11890	[212]	A_11891	[213]	A_11892	[214]	A_11893	[215]	A_11894
+	[216]	A_11895	[217]	A_11896	[218]	A_11897	[219]	A_11898	[220]	A_11899
+	[221]	A_11900	[222]	A_11901	[223]	A_11902	[224]	A_11903	[225]	A_11904
+	[226]	A_11905	[227]	A_11906	[228]	A_11907	[229]	A_11908	[230]	A_11909
+	[231]	A_11910	[232]	A_11911	[233]	A_11912	[234]	A_11913	[235]	A_11914
+	[236]	A_11915	[237]	A_11916	[238]	A_11917	[239]	A_11918	[240]	A_11919
+	[241]	A_11920	[242]	A_11921	[243]	A_11922	[244]	A_11923	[245]	A_11924
+	[246]	A_11925	[247]	A_11926	[248]	A_11927	[249]	A_11928	[250]	A_11929
+	[251]	A_11930	[252]	A_11931	[253]	A_11932	[254]	A_11933	[255]	A_11934
+	[256]	A_11935	[257]	A_11936	[258]	A_11937	[259]	A_11938	[260]	A_11939
+	[261]	A_11940	[262]	A_11941	[263]	A_11942	[264]	A_11943	[265]	A_11944
+	[266]	A_11945	[267]	A_11946	[268]	A_11947	[269]	A_11948	[270]	A_11949
+	[271]	A_11950	[272]	A_11951	[273]	A_11952	[274]	A_11953	[275]	A_11954
+	[276]	A_11955	[277]	A_11956	[278]	A_11957	[279]	A_11958	[280]	A_11959
+	[281]	A_11960	[282]	A_11961	[283]	A_11962	[284]	A_11963	[285]	A_11964
+	[286]	A_11965	[287]	A_11966	[288]	A_11967	[289]	A_11968	[290]	A_11969
+	[291]	A_11970	[292]	A_11971	[293]	A_11972	[294]	A_11973	[295]	A_11974
+	[296]	A_11975	[297]	A_11976	[298]	A_11977	[299]	A_11978	[300]	A_11979
+	[301]	A_11980	[302]	A_11981	[303]	A_11982	[304]	A_11983	[305]	A_11984
+	[306]	A_11985	[307]	A_11986	[308]	A_11987	[309]	A_11988	[310]	A_11989
+	[311]	A_11990	[312]	A_11991	[313]	A_11992	[314]	A_11993	[315]	A_11994
+	[316]	A_11995	[317]	A_11996	[318]	A_11997	[319]	A_11998	[320]	A_11999
+	[321]	A_12000	[322]	A_12001	[323]	A_12002	[324]	A_12003	[325]	A_12004
+	[326]	A_12005	[327]	A_12006	[328]	A_12007	[329]	A_12008	[330]	A_12009
+	[331]	A_12010	[332]	A_12011	[333]	A_12012	[334]	A_12013	[335]	A_12014
+	[336]	A_12015	[337]	A_12016	[338]	A_12017	[339]	A_12018	[340]	A_12019
+	[341]	A_12020	[342]	A_12021	[343]	A_12022	[344]	A_12023	[345]	A_12024
+	[346]	A_12025	[347]	A_12026	[348]	A_12027	[349]	A_12028	[350]	A_12029
+	[351]	A_12030	[352]	A_12031	[353]	A_12032	[354]	A_12033	[355]	A_12034
+	[356]	A_12035	[357]	A_12036	[358]	A_12037	[359]	A_12038	[360]	A_12039
+	[361]	A_12040	[362]	A_12041	[363]	A_12042	[364]	A_12043	[365]	A_12044
+	[366]	A_12045	[367]	A_12046	[368]	A_12047	[369]	A_12048	[370]	A_12049
+	[371]	A_12050	[372]	A_12051	[373]	A_12052	[374]	A_12053	[375]	A_12054
+	[376]	A_12055	[377]	A_12056	[378]	A_12057	[379]	A_12058	[380]	A_12059
+	[381]	A_12060	[382]	A_12061	[383]	A_12062	[384]	A_12063	[385]	A_12064
+	[386]	A_12065	[387]	A_12066	[388]	A_12067	[389]	A_12068	[390]	A_12069
+	[391]	A_12070	[392]	A_12071	[393]	A_12072	[394]	A_12073	[395]	A_12074
+	[396]	A_12075	[397]	A_12076	[398]	A_12077	[399]	A_12078	[400]	A_12079
+	[401]	A_12080	[402]	A_12081	[403]	A_12082	[404]	A_12083	[405]	A_12084
+	[406]	A_12085	[407]	A_12086	[408]	A_12087	[409]	A_12088	[410]	A_12089
+	[411]	A_12090	[412]	A_12091	[413]	A_12092	[414]	A_12093	[415]	A_12094
+	[416]	A_12095	[417]	A_12096	[418]	A_12097	[419]	A_12098	[420]	A_12099
+	[421]	A_12100	[422]	A_12101	[423]	A_12102	[424]	A_12103	[425]	A_12104
+	[426]	A_12105	[427]	A_12106	[428]	A_12107	[429]	A_12108	[430]	A_12109
+	[431]	A_12110	[432]	A_12111	[433]	A_12112	[434]	A_12113	[435]	A_12114
+	[436]	A_12115	[437]	A_12116	[438]	A_12117	[439]	A_12118	[440]	A_12119
+	[441]	A_12120	[442]	A_12121	[443]	A_12122	[444]	A_12123	[445]	A_12124
+	[446]	A_12125	[447]	A_12126	[448]	A_12127	[449]	A_12128	[450]	A_12129
+	[451]	A_12130	[452]	A_12131	[453]	A_12132	[454]	A_12133	[455]	A_12134
+	[456]	A_12135	[457]	A_12136	[458]	A_12137	[459]	A_12138	[460]	A_12139
+	[461]	A_12140	[462]	A_12141	[463]	A_12142	[464]	A_12143	[465]	A_12144
+	[466]	A_12145	[467]	A_12146	[468]	A_12147	[469]	A_12148	[470]	A_12149
+	[471]	A_12150	[472]	A_12151	[473]	A_12152	[474]	A_12153	[475]	A_12154
+	[476]	A_12155	[477]	A_12156	[478]	A_12157	[479]	A_12158	[480]	A_12159
+	[481]	A_12160	[482]	A_12161	[483]	A_12162	[484]	A_12163	[485]	A_12164
+	[486]	A_12165	[487]	A_12166	[488]	A_12167	[489]	A_12168	[490]	A_12169
+	[491]	A_12170	[492]	A_12171	[493]	A_12172	[494]	A_12173	[495]	A_12174
+	[496]	A_12175	[497]	A_12176	[498]	A_12177	[499]	A_12178	[500]	A_12179
+	[501]	A_12180	[502]	A_12181	[503]	A_12182	[504]	A_12183	[505]	A_12184
+	[506]	A_12185	[507]	A_12186	[508]	A_12187	[509]	A_12188	[510]	A_12189
+	[511]	A_12190	[512]	A_12190a	[513]	A_12191	[514]	A_12192	[515]	A_12193
+	[516]	A_12194	[517]	A_12195	[518]	A_12196	[519]	A_12197	[520]	A_12198
+	[521]	A_12199	[522]	A_12200	[523]	A_12201	[524]	A_12202	[525]	A_12203
+	[526]	A_12204	[527]	A_12205	[528]	A_12206	[529]	A_12207	[530]	A_12208
+	[531]	A_12209	[532]	A_12210	[533]	A_12211	[534]	A_12212	[535]	A_12213
+	[536]	A_12214	[537]	A_12215	[538]	A_12215a	[539]	A_12216	[540]	A_12217
+	[541]	A_12218	[542]	A_12219	[543]	A_12220	[544]	A_12221	[545]	A_12222
+	[546]	A_12223	[547]	A_12224	[548]	A_12225	[549]	A_12226	[550]	A_12227
+	[551]	A_12228	[552]	A_12229	[553]	A_12230	[554]	A_12231	[555]	A_12232
+	[556]	A_12233	[557]	A_12234	[558]	A_12235	[559]	A_12236	[560]	A_12237
+	[561]	A_12238	[562]	A_12239	[563]	A_12240	[564]	A_12241	[565]	A_12242
+	[566]	A_12243	[567]	A_12244	[568]	A_12245	[569]	A_12246	[570]	A_12247
+	[571]	A_12248	[572]	A_12249	[573]	A_12250	[574]	A_12251	[575]	A_12252
+	[576]	A_12253	[577]	A_12254	[578]	A_12255	[579]	A_12256	[580]	A_12257
+	[581]	A_12258	[582]	A_12259	[583]	A_12260	[584]	A_12261	[585]	A_12262
+	[586]	A_12263	[587]	A_12264	[588]	A_12265	[589]	A_12266	[590]	A_12267
+	[591]	A_12268	[592]	A_12269	[593]	A_12270	[594]	A_12271	[595]	A_12272
+	[596]	A_12273	[597]	A_12274	[598]	A_12275	[599]	A_12276	[600]	A_12277
+	[601]	A_12278	[602]	A_12279	[603]	A_12280	[604]	A_12281	[605]	A_12282
+	[606]	A_12283	[607]	A_12284	[608]	A_12285	[609]	A_12286	[610]	A_12287
+	[611]	A_12288	[612]	A_12289	[613]	A_12290	[614]	A_12291	[615]	A_12292
+	[616]	A_12293	[617]	A_12294	[618]	A_12295	[619]	A_12296	[620]	A_12297
+	[621]	A_12298	[622]	A_12299	[623]	A_12300	[624]	A_12301	[625]	A_12302
+	[626]	A_12303	[627]	A_12304	[628]	A_12305	[629]	A_12306	[630]	A_12307
+	[631]	A_12308	[632]	A_12309	[633]	A_12310	[634]	A_12311	[635]	A_12312
+	[636]	A_12313	[637]	A_12314	[638]	A_12315	[639]	A_12316	[640]	A_12317
+	[641]	A_12318	[642]	A_12319	[643]	A_12320	[644]	A_12321	[645]	A_12322
+	[646]	A_12323	[647]	A_12324	[648]	A_12325	[649]	A_12326	[650]	A_12327
+	[651]	A_12328	[652]	A_12329	[653]	A_12330	[654]	A_12331	[655]	A_12332
+	[656]	A_12333	[657]	A_12334	[658]	A_12335	[659]	A_12336	[660]	A_12337
+	[661]	A_12338	[662]	A_12339	[663]	A_12340	[664]	A_12341	[665]	A_12342
+	[666]	A_12343	[667]	A_12344	[668]	A_12345	[669]	A_12346	[670]	A_12347
+	[671]	A_12348	[672]	A_12349	[673]	A_12350	[674]	A_12351	[675]	A_12352
+	[676]	A_12353	[677]	A_12354	[678]	A_12355	[679]	A_12356	[680]	A_12357
+	[681]	A_12358	[682]	A_12359	[683]	A_12360	[684]	A_12361	[685]	A_12362
+	[686]	A_12363	[687]	A_12364	[688]	A_12365	[689]	A_12366	[690]	A_12367
+	[691]	A_12368	[692]	A_12369	[693]	A_12370	[694]	A_12371	[695]	A_12372
+	[696]	A_12373	[697]	A_12374	[698]	A_12375	[699]	A_12376	[700]	A_12377
+	[701]	A_12378	[702]	A_12379	[703]	A_12380	[704]	A_12381	[705]	A_12382
+	[706]	A_12383	[707]	A_12384	[708]	A_12385	[709]	A_12386	[710]	A_12387
+	[711]	A_12388	[712]	A_12389	[713]	A_12390	[714]	A_12391	[715]	A_12392
+	[716]	A_12393	[717]	A_12394	[718]	A_12395	[719]	A_12396	[720]	A_12397
+	[721]	A_12398	[722]	A_12399	[723]	A_12400	[724]	A_12401	[725]	A_12402
+	[726]	A_12403	[727]	A_12404	[728]	A_12405	[729]	A_12406	[730]	A_12407
+	[731]	A_12408	[732]	A_12409	[733]	A_12410	[734]	A_12411	[735]	A_12412
+	[736]	A_12413	[737]	A_12414	[738]	A_12415	[739]	A_12416	[740]	A_12417
+	[741]	A_12418	[742]	A_12419	[743]	A_12420	[744]	A_12421	[745]	A_12422
+	[746]	A_12423	[747]	A_12424	[748]	A_12425	[749]	A_12426	[750]	A_12427
+	[751]	A_12428	[752]	A_12429	[753]	A_12430	[754]	A_12431	[755]	A_12432
+	[756]	A_12433	[757]	A_12434	[758]	A_12435	[759]	A_12436	[760]	A_12437
+	[761]	A_12438	[762]	A_12439	[763]	A_12440	[764]	A_12441	[765]	A_12442
+	[766]	A_12443	[767]	A_12444	[768]	A_12445	[769]	A_12446	[770]	A_12447
+	[771]	A_12448	[772]	A_12449	[773]	A_12450	[774]	A_12451	[775]	A_12452
+	[776]	A_12453	[777]	A_12454	[778]	A_12455	[779]	A_12456	[780]	A_12457
+	[781]	A_12458	[782]	A_12459	[783]	A_12460	[784]	A_12461	[785]	A_12462
+	[786]	A_12463	[787]	A_12464	[788]	A_12465	[789]	A_12466	[790]	A_12467
+	[791]	A_12468	[792]	A_12469	[793]	A_12470	[794]	A_12471	[795]	A_12472
+	[796]	A_12473	[797]	A_12474	[798]	A_12475	[799]	A_12476	[800]	A_12477
+	[801]	A_12478	[802]	A_12479	[803]	A_12480	[804]	A_12481	[805]	A_12482
+	[806]	A_12483	[807]	A_12484	[808]	A_12485	[809]	A_12486	[810]	A_12487
+	[811]	A_12488	[812]	A_12489	[813]	A_12490	[814]	A_12491	[815]	A_12492
+	[816]	A_12493	[817]	A_12494	[818]	A_12495	[819]	A_12496	[820]	A_12497
+	[821]	A_12498	[822]	A_12499	[823]	A_12500	[824]	A_12501	[825]	A_12502
+	[826]	A_12503	[827]	A_12504	[828]	A_12505	[829]	A_12506	[830]	A_12507
+	[831]	A_12508	[832]	A_12509	[833]	A_12510	[834]	A_12511	[835]	A_12512
+	[836]	A_12513	[837]	A_12514	[838]	A_12515	[839]	A_12516	[840]	A_12517
+	[841]	A_12518	[842]	A_12519	[843]	A_12520	[844]	A_12521	[845]	A_12522
+	[846]	A_12523	[847]	A_12524	[848]	A_12525	[849]	A_12526	[850]	A_12527
+	[851]	A_12528	[852]	A_12529	[853]	A_12530	[854]	A_12531	[855]	A_12532
+	[856]	A_12533	[857]	A_12534	[858]	A_12535	[859]	A_12536	[860]	A_12537
+	[861]	A_12538	[862]	A_12539	[863]	A_12540	[864]	A_12541	[865]	A_12542
+	[866]	A_12543	[867]	A_12544	[868]	A_12545	[869]	A_12546	[870]	A_12547
+	[871]	A_12548	[872]	A_12549	[873]	A_12550	[874]	A_12551	[875]	A_12552
+	[876]	A_12553	[877]	A_12554	[878]	A_12555	[879]	A_12556	[880]	A_12557
+	[881]	A_12558	[882]	A_12559	[883]	A_12560	[884]	A_12561	[885]	A_12562
+	[886]	A_12563	[887]	A_12564	[888]	A_12565	[889]	A_12566	[890]	A_12567
+	[891]	A_12568	[892]	A_12569	[893]	A_12570	[894]	A_12571	[895]	A_12572
+	[896]	A_12573	[897]	A_12574	[898]	A_12575
+	;
+
+MATRIX
+
+[                            10        20        30        40        50        60        70        80        90        100]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        AAGCTTCACCGGCGCAGTCATTCTCATAATCGCCCACGGGCTTACATCCTCATTACTATTCTGCCTAGCAAACTCAAACTACGAACGCACTCACAGTCGC   [100]
+Pan                 ................A.T..C.................A...............T......................T..T........C.........   [100]
+Gorilla             ..................TG....T.....T........A........A......T...............................A..C.....C...   [100]
+Pongo               ................AC..CC.....G..T.....T..A..C........CC....G.............................A..C.....C...   [100]
+Hylobates           ......T..A..T...AC.G.C.................A..A..C..T..CC.G...........T....................A........C...   [100]
+Macaca_fuscata      ......TT........AC...C..T..G.....T.....A..C..C..T..CA..TAT...........C..T........T.............AC..T   [100]
+M._mulatta          ......TT.T......AC...C.....G..T..T.....A..C..C..T..CA..TAT...........C..T........T.............AC..T   [100]
+M._fascicularis     .......T........AC..CC..T.................C..C..T..CA.GTAT......T.G..C..T........T..G........T.AC..T   [100]
+M._sylvanus         .......T....T...ACT..C..T...G.T.....T..A..C..C..T..CA..TAC......T.G..C....................C.....C...   [100]
+Saimiri_sciureus    ................A.G..C..A........T......T....T..G..TA.G.......................T........A.T......C..A   [100]
+Tarsius_syrichta    ...T....TT..A..CAC..C...T.....T.....T..C..C..C.....CC..T....T...........TA.............AGTC........A   [100]
+Lemur_catta         ........TA..A...AC......A........A..T..C........A..CA..T.......T.....C.....T...........A.TC..T..C..T   [100]
+
+[                            110       120       130       140       150       160       170       180       190       200]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        ATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTA   [200]
+Pan                 ........T.....C................................C...........C........................C.......T..C....   [200]
+Gorilla             ........T..............C.....C.................CC.............G...........C....................C....   [200]
+Pongo               ....................C.................C........CC.C.....................A.......T...C....A.....C..C.   [200]
+Hylobates           ...........A....G...G..C...G.CT..........G.....C..C........CGC......................C...............   [200]
+Macaca_fuscata      .C....C.A..G..C.G...........TC.....T.......C...C.........T.AAC.........TA.......T...C.............C.   [200]
+M._mulatta          .C....C.A..G..C.GG..........TC.....T.......C......C......T.AAC.........TA.......T...C.............C.   [200]
+M._fascicularis     .C....C.A..A..C.G...........T......T...T.G.C...C..C........CAC.........TA.......T...C...............   [200]
+M._sylvanus         ......C.A..A..C.G......C....TC.............C...C..C......T.CAC.........TA....T..T..TC.....T.........   [200]
+Saimiri_sciureus    .CA....CAT.TA...G...G..C.....A...T....G..T....GCC.C........C.......AT...............C.....A.AG......   [200]
+Tarsius_syrichta    .CA...GCA..AG.C.GT..C........C...T.A..T..TGC...AACA........C..C..C...T.AA.C.....G...C.T.....A..A....   [200]
+Lemur_catta         .CA...C.A..AG.A.G...GA.C.....CA.T.....T..T.....CACC........A..C..C.....AA.......A...C.....A..T....C.   [200]
+
+[                            210       220       230       240       250       260       270       280       290       300]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        ACCTACTGGGAGAACTCTCTGTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGGACTCAACATACTAGTCACAGCCCTATACTC   [300]
+Pan                 .T..C..A..G........C............T.A................C............C......T...........A..........G.....   [300]
+Gorilla             .......A.....G.....C..A...........A..............C.C...C..TT...........TCT.........A.T........G.....   [300]
+Pongo               ....T..A...........C..A...A..G...TA.....T.....T..C.....CA.......A..................A.....A..........   [300]
+Hylobates           ....C..A..T.......TC..A...A.GG..T.C........GG....C.CT...A.TAC...C..C..G......G.....A....G...........   [300]
+Macaca_fuscata      .T.....A..T.......T...AA.C.C....T.A...........CC.......CA.TA.G..A........T......T..A.T..G.....C.....   [300]
+M._mulatta          .......A..T.......T...AA.C.CG...T.A........G..CC.......CA.TA..T.A......T.T.........A.T..G.....C.....   [300]
+M._fascicularis     .T.....A..C.......T...AA.CAC...TT.A..T........CC.......CA.TG.GT.A..G..C..T..T......A..........C.....   [300]
+M._sylvanus         .T........C.......TC..AA.C.C....T.A..T........CC.C.....CA..A....A........G.........A.T........C.....   [300]
+Saimiri_sciureus    .T...G.A......T.ACTCACAA.C...T.TT.C.....T.....C..CT.T...A.TA..T.C........T..T......A.T.....A..C.....   [300]
+Tarsius_syrichta    .TT..A.C..T.....G..C..AA..A..G.AG.A..T..A..G...C.CC.A...A.TA.CT.AGT...C..T....CC..TA....C........T..   [300]
+Lemur_catta         .TT..A.T..C.....A.TC..CACTA..G.AT.C.....A........C..T..AA.TA.CT.A.T...CT.A..T..G..CA....C..T..C..T..   [300]
+
+[                            310       320       330       340       350       360       370       380       390       400]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        CCTCTACATATTTACCACAACACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTCACACGAGAAAACACCCTCATGTTCATACAC   [400]
+Pan                 .........G.................A.......................T........G....................T..T.....A..TT.....   [400]
+Gorilla             ...T..T....................A...C.......A........C.C..................T.............T......A.....G...   [400]
+Pongo               T.....T.....C............C.A..TA...C...A........C..............T..T........C.....T.........C........   [400]
+Hylobates           ...T..........T..T.......C.A...A....T..A...........A...............C...............TAT.A..AC.T..G...   [400]
+Macaca_fuscata      T...C.......C..T.........C.A..AA.......A..T.....A.T.........G...C.C.................AT.A..A.........   [400]
+M._mulatta          ....C.......C............C.A..AG.......A..T.....A.T.............C.C................TAT.A..A.........   [400]
+M._fascicularis     T...C....G..C.TT...GT....C.A..AA.......A........A.T...T.........C.C................TAT.A..A.........   [400]
+M._sylvanus         T..TC.......C............C.A..AG.G.....A........A.TT...........AC.T................TAT.A..AC........   [400]
+Saimiri_sciureus    A..TC.T..G.A.G..T.T.....GC.A..TC....T..AT..AG..CC.G...T........AAT...T...........T..G..A..A..T.....T   [400]
+Tarsius_syrichta    ...A..T...C.A.T..T...T...C.A..AAA.TA...AT.T..T..C.....T..C.TG...C.T.....C........T..AT.A..AA........   [400]
+Lemur_catta         ......T.....A..T..T......C.A..AAA......AT.T..TTCGC.....C....C..A..C..T.................T..A.C.......   [400]
+
+[                            410       420       430       440       450       460       470       480       490       500]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        CTATCCCCCATTCTCCTCCTATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACA   [500]
+Pan                 ...........C.....T...........T..T..T.....C..T..A..CA....C...........................................   [500]
+Gorilla             ...........C.......................T..T..C........CA....C.........................................T.   [500]
+Pongo               ...........C......T..............AG......CG.T.....CG...AC.........................T.............A.T.   [500]
+Hylobates           ..C.T....C.C.........A..........TA..........T..C...A.TC.C......C.........T........T.............A...   [500]
+Macaca_fuscata      ..CG.T..A...A....T...............A.......CCTG......A....C....G............T......CT.............A..C   [500]
+M._mulatta          ..CG.T..A..CA..........T.........A.......CCTG......A.T..C....G............T.......T.............A..C   [500]
+M._fascicularis     ..CG.T..A...A....T.....T.........A.......CCTG......A....C.................T.......T.............A..T   [500]
+M._sylvanus         ..CG.T..A...A.T..T.....T..T......A........CTA..A...A.T..C................TT.......T...C.........A..T   [500]
+Saimiri_sciureus    A..A.A..A..C........TA..T.G.G....A.GG.A....TA..ACCC..AC.......T.........G.T.......T.............A.T.   [500]
+Tarsius_syrichta    ....TT...T.AA....A.....TAC.......A.AG.A....TA..AACCATG.AC................A........T.........G...A.T.   [500]
+Lemur_catta         A..CT....C.......AT.TA..T.A......A.A..T...CTA..ACCCA.G.AC................A-......CT............CAGA.   [499]
+
+[                            510       520       530       540       550       560       570       580       590       600]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        ACAGAGGCTTA-CGACCCCTTATTTACCGAGAAAGCT-CACAAGAACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGA   [598]
+Pan                 .........C.-.........................-T.T............T....AT.......C..G.............................   [598]
+Gorilla             .........C.-.A.......................-.GT....G............A.....G..CT.G.............................   [598]
+Pongo               .T..G.C.CC.-.A.......................-..................TCA.T-......G.G.................G...........   [597]
+Hylobates           .T.......CG-AA...T...GC.............C-...................CTAT.......A.G.............................   [598]
+Macaca_fuscata      .T....A..C.-.C...T................A..-.G....G.........C....TA...G.AC....A.TT.C..T...................   [598]
+M._mulatta          .T....A....-.C...T................A..-.G.G..G.........C....TAT..G.AC....A.TT.C..T...................   [598]
+M._fascicularis     .T......C..-.C..TT................A..-.G....G........TC......T..G.ACT...A.CT.C..T..C................   [598]
+M._sylvanus         .T...A.....-.C..TT................A..-TG....G..C.....TC..CA..T..G.ACT...A.CT.C..T...................   [598]
+Saimiri_sciureus    .T...A.AA..-TA..TT....A............TG-.G.............T......T....A.A........T......C................   [598]
+Tarsius_syrichta    .T...A..CC.AA..TTT..........A......TA-TG.....................T....A.A......TG.........T-............   [598]
+Lemur_catta         .T...A...C.AAC-.TT.................TAATGT.T.............TGCA.T..G.A.A...A..T.C....A.................   [598]
+
+[                            610       620       630       640       650       660       670       680       690       700]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        TAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATAACCACCCTAACCCTGACTTC   [698]
+Pan                 .......C....G.......................................................T.T.....C.........T.......A...C.   [698]
+Gorilla             ........................A.......................................T...T..G....C.........T..G....A.....   [698]
+Pongo               ............C...........AT..................................C.G.....TTT..C..C.....TG....C...T.A.....   [697]
+Hylobates           ........................A.....................................G.A...T....C..C...G...TT.....G..A..C..   [698]
+Macaca_fuscata      ................AC......AGT......CA............................T.........CC.C..C.TT.TAAC......T.TC..   [698]
+M._mulatta          ................AC......AGT.......A............................T.........CC....C.TA.TAAC......T.TC..   [698]
+M._fascicularis     ................AC......AGT......CA............................T.........CC.C..C.TA.TAAC......C.TC..   [698]
+M._sylvanus         .................C......AGT.......A............................T....T.T..CC.C..C.TA.TAAC...T..C.TC..   [698]
+Saimiri_sciureus    ..GT..T.................AG.......CA...........................---..A...TTCT.C..C..TCTAA....A..A.T.AG   [695]
+Tarsius_syrichta    ..GA..TA......C.........AA..G....-A............................ATT.ATTTT.AT.CTCC.TTTTA...T.A..T..ACT   [697]
+Lemur_catta         ..GA..TA.........C......AG.......-A............................ATC.ATTAT.CT..T.C...CTTG.C..A....T.AT   [697]
+
+[                            710       720       730       740       750       760       770       780       790       800]
+[                            .         .         .         .         .         .         .         .         .         .]
+
+Homo_sapiens        CCTAATTCCCCCCATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGT   [798]
+Pan                 .T......T.........C.........A.........................T...........G.........A....G...........C..T..C   [798]
+Gorilla             .T..........T............T..A.C..T.............G...............C........T..C.................C.....C   [798]
+Pongo               ......C........TACCG.T......A.......C............C..........C.........A.GGCCA........G......C......C   [797]
+Hylobates           ...............TACAG.......TA.......C..T.....G....T......G..C..C......ATG.CCA.T..C..T........A.....C   [798]
+Macaca_fuscata      .....C..T...A..TT..G........A.C......T........CGTC.......AG....C......A.A.CC..AAT..ATG.T..C..C.....C   [798]
+M._mulatta          .....C..T...A..TT..G........A.C......T........CGTC.......AG....C......A.A.CC..AAT..ATG.T..C..C.....C   [798]
+M._fascicularis     ...G.CC.TT..A..TT..G........ACC.....CT.T......CGT........AG.C..C......A.A.CC..AAT..ATG.T.......C....   [798]
+M._sylvanus         .....C..TT..A..TT.CG.T.....TA.C.....C.........C..CT...T..AA.C..C......A.AGCC..AAT..ATG.T..C....C...C   [798]
+Saimiri_sciureus    ....C.AG.G..A.....AG.T......A...............GC.CACT......GT.C..C......CTAGCCA..ATC.A.G..C.C....C....   [795]
+Tarsius_syrichta    .T.....A....AT.TA...TT..AACAAC...AAAAT.TG...C.C.TG.......TT.C..C......AA.TC.A....C.G.G.A.....A.CA..C   [797]
+Lemur_catta         .....C.TTA..T...A...TA.A.G.TACA...ATAT.......CT..C.C..TG.A.CA..C........TTC.A.T....GTG....C..C.CT..C   [797]
+
+[                            810       820       830       840       850       860       870       880       890      ]
+[                            .         .         .         .         .         .         .         .         .        ]
+
+Homo_sapiens        CTCTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTT   [896]
+Pan                 ..T....................A................C.........A.....G........A................................   [896]
+Gorilla             ....................TC.A................C.........A.G............A.............TT..A..............   [896]
+Pongo               ..TA....A...........T..C.......GA......ACC..CG..A.A......TG....A.A..C.....G...CTA..A.....A........   [895]
+Hylobates           ..A..T.......T.............AC..........ACC.....T..A............A.TG..........GCTAG.A..............   [896]
+Macaca_fuscata      ...CC.T.A.....TT.......C.T.TC.A........ACA.CC..T.G..G......T...ATA.TG.........CTAG.C..AA.G........   [896]
+M._mulatta          ...CC.T.A.....TT.......C.T.TC.A........ACA.CC..T.GA.G......T...ATA.T..........CTAG.C..AA.A........   [896]
+M._fascicularis     ...CC.T.A.....CC.......CCT.TC.A........ACA.CC..T.G..GT.....T...ATA............TTAG.C..AA.A........   [896]
+M._sylvanus         ....CTT.A.....TT...AT..A.T.T..A........ACA..C....GA.G...G......ATA.T..........CTAAGC..AA.AT.......   [896]
+Saimiri_sciureus    AC...AT.T.T..T.T.C..T..CCTTAC..G.......TCA..A..T..A............ATA..T.T......C.T.A.A..A...........   [893]
+Tarsius_syrichta    ..AG....A.TGCTC.....TC.A.A.AC.A.T......A.A..C..T..C........T...ATA..G.TT..T..T.T.A.AT.A.G.........   [895]
+Lemur_catta         ...A....A..T.T.T....T..C.C.TC..GA......ACA..C..T..C........T...ATA....T......CCTAA.A..A..TA.T.....   [895]
+;
+END;
+BEGIN GENETICCode;
+StandardNUCLEAR;
+END;
+
+BEGIN CODONS;
+CODONPOSSET * UNTITLED = 
+		N: 1 458-659 897 898, 
+		1: 2-455\3 660-894\3, 
+		2: 3-456\3 661-895\3, 
+		3: 4-457\3 662-896\3;
+CODESET * UNTITLED = mtDNA.mam: all ;
+END;
+
+BEGIN ASSUMPTIONS;
+	USERTYPE ttbias STEPMATRIX= 4
+	     A C G T
+	 [A] . 6 1 6
+	 [C] 6 . 6 1
+	 [G] 1 6 . 6
+	 [T] 6 1 6 .
+	;
+
+	OPTIONS  DEFTYPE=unord PolyTcount=MINSTEPS ;
+	CHARSET  tRNA_His  =  459-528;
+	CHARSET  'tRNA_Ser_(AGY)'  =  529-588;
+	CHARSET  'tRNA_Leu_(CUN)'  =  589-659;
+	CHARSET  noncoding = 1 458-659 897 898; 
+	CHARSET  position1 =  2-455\3 660-894\3; 
+	CHARSET  position2 =  3-456\3 661-895\3; 
+	CHARSET  position3 =  4-457\3 662-896\3;
+	EXSET  protein_only  =  noncoding;
+	EXSET  non_protein  =  position1 position2 position3;
+	EXSET  only_3rds = position1 position2 noncoding;
+	EXSET  only_2nds = position1 position3 noncoding;
+	EXSET  only_1sts = position2 position3 noncoding;
+END; 
+
+BEGIN TREES; 
+
+[!
+Heuristic search settings:
+
+   Addition sequence: random
+      Number of replications = 10
+      Starting seed = 1
+   Global branch-swapping performed
+   MULPARS option in effect
+   Steepest descent option not in effect
+   Initial MAXTREES setting = 100
+   Branches having maximum length zero collapsed to yield polytomies
+   Trees are unrooted
+   Length of shortest tree found = 998
+   Number of trees retained = 2
+   Time used = 50.0 sec
+]
+	TRANSLATE
+		1	Homo_sapiens,
+		2	Pan,
+		3	Gorilla,
+		4	Pongo,
+		5	Hylobates,
+		6	Macaca_fuscata,
+		7	M._mulatta,
+		8	M._fascicularis,
+		9	M._sylvanus,
+		10	Saimiri_sciureus,
+		11	Tarsius_syrichta,
+		12	Lemur_catta
+	;
+	TREE  * 'Homo-Pan' =  [&R] (12,(((((((1,2),3),4),5),(((6,7),8),9)),10),11));
+	TREE 'Pan-Gorilla' =  [&R] (12,((((((1,(2,3)),4),5),(((6,7),8),9)),10),11));
+	TREE 'Homo-Gorilla' =  [&R] (12,(((((((1,3),2),4),5),(((6,7),8),9)),10),11));
+
+END;
+
+
+
+BEGIN MACCLADE;
+Version 3.05;
+LastModified -1425155069;
+Singles 0000&/0;
+END;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Rab1.chaos-xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Rab1.chaos-xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Rab1.chaos-xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,485 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE chaos PUBLIC  "-//Chaos//Custom XML//EN" "http://www.fruitfly.org/chaos-xml/dtd/chaos.dtd">
+<chaos>
+  <chaos_metadata>
+    <chaos_version>1</chaos_version>
+    <chaos_flavour>chaos</chaos_flavour>
+    <focus_feature_id>gene:EMBL/GenBank/SwissProt:AE003734:52204:55287</focus_feature_id>
+    <feature_unique_key>feature_id</feature_unique_key>
+    <equiv_chado_release>chado_1_01</equiv_chado_release>
+    <export_unixtime>1114399206</export_unixtime>
+    <export_localtime>Sun Apr 24 20:20:06 2005</export_localtime>
+    <export_host></export_host>
+    <export_user>cjm</export_user>
+    <export_perl5lib>:/Users/cjm/cvs/DBIx-DBStag:/Users/cjm/stag:/Users/cjm/cvs/sqlfairy/lib:/Users/cjm/chaos-xml/lib::/Users/cjm/cabal::/Users/cjm/biostag/lib::/Users/cjm/amigo/perl::/Users/cjm/go-db-perl::/Users/cjm/go-perl::/Users/cjm/cvs/bioperl-live::/users/cjm/cvs/gmod/XML-XORT/lib:/sw/lib/perl5:/sw/lib/perl5/darwin</export_perl5lib>
+    <export_program>/Users/cjm/chaos-xml/bin/cx-genbank2chaos.pl</export_program>
+  </chaos_metadata>
+  <feature>
+    <feature_id>AE003734.2</feature_id>
+    <dbxrefstr>SEQDB:AE003734.2</dbxrefstr>
+    <name>AE003734</name>
+    <uniquename>Drosophila_melanogaster:AE003734.2</uniquename>
+    <type>databank_entry</type>
+    <featureprop>
+      <type>comment</type>
+      <value>On Sep 18, 2002 this sequence version replaced gi:7300718.</value>
+    </featureprop>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+  </feature>
+  <feature>
+    <feature_id>contig:AE003734.2:55787:51703</feature_id>
+    <name>contig-Rab1-500-500</name>
+    <uniquename>contig-Drosophila_melanogaster:Rab1-500-500</uniquename>
+    <type>contig</type>
+    <featureloc>
+      <nbeg>55787</nbeg>
+      <nend>51703</nend>
+      <strand>-1</strand>
+      <srcfeature_id>AE003734.2</srcfeature_id>
+    </featureloc>
+    <residues>AACCTGTGCTATCTGGAGGCCATTCACAACTTCGTGGAGGTGCTCAACGAATACTTCCATAATGTGTGCGAGCTGGATCTGGTCTTCAACTTCTACAAGGTGTACAGTGTGGTGGACGAGATGTTCCTGGCGGGCGAGATCCGGGAGACCTCGCAGACGAAGGTGCTCAAGCAGCTGCTCACGCTAAATTCGCTGGAGTAGCGGCCCAATTCATACATATGTATATCGGATCTGCCAGATGCATTCCCCCAGGCGCTACACCCGCCTATCGAATCTTCTTTCATATACTATTACGCGTATTATTATTATTATCTTTAATGTGTTCTCTATGCAAACACTCAAATGTTAAAGGCCGTGTTTAGAAACAAATGATGTACCATAAGAGATTGTATGCAAAAGTTAAGTATTTTTGTTTTTATATGGGGTGTGTTAGGGGTCATTTGGCGATAATCCATATATTTTCTTCTCTGGTTCCGCCCACTCACCTCCGTAGACAAGATAATGTGTAGCCCTGGTTGGAAGAGGGCCCCGCGCATTTTCCAACACTGCCGCTAATGCCACTTCGTCATTTTTTCAGTCGCCATTGTTCTCGTTCTCGGCCGTCGAATTTGTGGAAATAAACGTATTTCCCATTTAAATTCGCTGCAATAATTACAATAGCGAACTGCAAAACACAACACAAGCCGAGTGCGTGTCGATTTCCAGTATTTCGGAACGCGCAAATCAAATAAAAACACTCTGAACAGCAGCCAGTCCAGGGCCAGAAATCGAATATTTGTTCGTGGCAAATATATAATCTTGTGAAAAAGCCAGAAAATGTCATCTGTGAATCCGGAATAGTAAGTCGTCACACGTAATTATGCGCAGTTATCTGGAGTTAAAACAGTGACTGGGCTTCCACTTCCTTTTCTGGCCTTCGAAAATAGTTTCTTTTATTTTTTCTGCGACGTCATTTCTGCTACGTTTCATTCCCATTCATTATCGAGCAGCCCCACTCCACTCCCTTTTGCCCTCGTTGGCAACTCCAAAAAAACAGACCATTTGCTAAAAACAAAATACACACTCGGACCTAACAACCGACCCAATCGAGAAACCATAACCAAAACTGTGTGTTTCAGCTGGATTTCAATGTAGTGCCCACGGGCGAGGCGATGAAAAGCGCGTTCGAAACTCTCATCGGAATCTGCATATGTATAAAGTCCACACATATATACAGCGAGATCCCAGTTGACGGGCCAAATCGCTAGTCGGGCGGATGAGTCACCTGCGTGGAGAGCCTTTCCCACCCCCAGCGCAGTGCTACGTAATCAAAGGCCATTCCAGTGCTACTTAATTGCCGCTCTCACTCAAGGCAATTAAATCTGAACCGATCTTGGTTCGCACTCTCCAGAAAGCCTCCTTCAATAGCCAGCATACATGTGTGCAGAGGATTGGAACACTATCGCATATCATTCAGCAATTGGGGGTTGATGTTGGACATAGCATGATAAGATAAAGCCTAAACCAAACCGGTTTTATGTATATATTAGTATCTGCCCGACTAAACTATGCCCGCAAAACACCTCTCTAAATGTTTTGCTTGAATCAATGATTTCTAACTGATTGTCATCCAACTTATTTCCCTTTAAATGTATACTAAACCTTTTACACTCTGATTTTAAACATTTCGTCTTGTAATATCACTATCGCAAATATGCATCATCAGTTTTATTTAGATTTCTCTGAAGTTTTATATGTTTCAGCATAGGGTCTGAACTATTCCCACATCGCAGCACAAATTTATATCATTAACTATTAAATTAACTATAATTTATTAACTATATAACTATTTTATAACATTAACTATATTAACTATCTCCAAATCCACAGCGATTATCTCTTTAAGTTGCTGCTTATTGGAGACTCGGGCGTTGGAAAGTCCTGTCTTCTGTTGCGATTTGCCGATGACACATATACAGAAAGTTATATCAGCACAATCGGAGTGGATTTCGTAAGCCAACCGGAAATTTGTGAAAATTCTTTATGTCTGACTCATAAACCTTTATCTTGCAGAAAATCAGGACTATAGAACTCGACGGAAAGACCATTAAACTGCAAATCGTAAGCATAAAAACGCCAGAGGCAATCTAGTCCAGAGCTAATTAGTCTGTTTACAGTGGGATACTGCTGGCCAGGAGCGCTTCCGCACCATCACGTCTTCATATTATAGGGGCGCCCATGGCATCATTGTCGTATACGATTGCACGGACCAGGAGTCTTTCAACAATGTGAAGCAGTGGCTGGAGGAGATCGAGCGGTATGCCTGTGAGAATGTCAACAAGTTGCTGGTTGGCAACAAGAGCGACTTGACCACCAAGAAAGTAGTCGACCACACCACAGCTGCGGTGCGTATATTGCTGGTTTAGTCCTGGGAATCATAGATGCACGTATATACTAATCTGGGTCTTTGTTTCCAAAATTCCAGGAGTACGCCGCCCAGTTAGGCATTCCATTCCTTGAAACTTCGGCCAAGAGCGCCACCAACGTTGAGCAGGCCTTCATGACGATGGCGGCGGAGATCAAGAATCGCGTCGGGCCGCCGTCCAGCGCCACTGACAACGCTAGCAAAGTGAAAATCGATCAAGGACGTCCAGTGGAAAACACCAAATCCGGTTGCTGCTGAATAACTCTGATTGTGAATCATTATTTTATTATACAATTAAACAAAATTTAAATATAATAAATTAAAACGGAAACCACACAATAATAATAAAACAAGCGAAACCGATGGCGGATCCACGAAATCGTAATTATAATAACTAATTCTGTAATAACAATAATGGTGAGGAATGACGAGTAGTGCGTTTGTATGATCCTAACCGTGATCCAATTCGATAAACAATTTAAGCAAATATGTATTAGGTACGTAGCCAACCAGCTCCAGATCCAGATCCCGATCCAGACTTCAGACATCCCCAACTAAACTCGAGCAACAAGGACATCGATCCTAATACTAACCGTCACTCTGATTTCCGCAGTTCTGTTCATAAGTAGTGCAAAGCAAGCAAATTCGGTGTAGCAATTAACCTAATATTACTTTAACCTACAATAAGAAGTTACCTATCACAGCAGCGCGAAACCATACCGAAACTAATATGTACTATTAACTACACATTAATTGAGCGCAGGCGACGGCAGGAGCAATTGTACTTGTCCTGTGTCAGCTACTACTTACCACTATGTATACATATGTATGTGTTTAAATTACAAATACAATTTTCTAAACATATTATCTCTAGCGAACGTGATCGTCACTTGAAATTTTATGTAAATCGAACCCAAAACCGAACGAACATTTTTAGATTTAACGTCGAGCGATTGTGTGTTTGAATTATCGTGTGACACAAGGGATTTGTTTAGGCACATAATTAGTGTAATAATGACGACATAGCGATGAAGTTTAATAAAAATTATTGCGTGTGCGTATTTAAATTGCAAGGTTTGGGATCAAGGACACACTTTAGTTTCGGTTTTTACTGTCGCCCGAATTTGAATCTTAATTTGACCTGAGTCTAATAAAATCGATCTGTACCGATGCCTACAAACGACTTGTTTTATTTCTTGACTTTGGAAAGATTGCAAAACTTAAGTGAAGGCCGGAACTGATCCGCACGCACCCAAATACTTGGCAGAAACTTTAAGGGCGAGGCGATTTGTGTAGAATCTTCCTGTTTTTGGAGCAAGTTTTTGGCTGATTTAAAACTTGGATCATGCCAATGTATTTAGAATATATTCAAAGCACTCTATACGAGTGCTTAGATGTTGAAGAATCAGATGTATAAATATATTTCTATTTAAAAATGTTTTATATCTGCATTATTTCACAAATCAGATTAGTTAGTGCACTGCCGTTAAGTTAACTGCACATAGGTAATCAATTTTTGTAAACCTGATACGTTGGCAAATCTTCTAGGATACTTACGAAAAATGTATGTATCCACTTATTTTATAAATTATATACATATATTCGGAAAGATATTTTTCTTTTTAGGACATACTCTACCTTTATGTATTGCATTGTATACATACAGGACTTATTATTTTT</residues>
+  </feature>
+  <feature>
+    <feature_id>gene:EMBL/GenBank/SwissProt:AE003734:52204:55287</feature_id>
+    <name>Rab1</name>
+    <uniquename>Drosophila_melanogaster:Rab1</uniquename>
+    <type>gene</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>500</nbeg>
+      <nend>3584</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <feature_dbxref>
+      <dbxrefstr>FLYBASE:FBgn0016700</dbxrefstr>
+    </feature_dbxref>
+    <featureprop>
+      <type>map</type>
+      <value>93D2-93D2</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>note</type>
+      <value>last curated on Mon Apr 01 10:37:17 PST 2002</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>FLYBASE:FBgn0016700</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature>
+    <feature_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</feature_id>
+    <name>Rab1-mRNA-2</name>
+    <uniquename>Drosophila_melanogaster:Rab1-mRNA-2</uniquename>
+    <type>mRNA</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>500</nbeg>
+      <nend>3584</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <feature_dbxref>
+      <dbxrefstr>FLYBASE:FBgn0016700</dbxrefstr>
+    </feature_dbxref>
+    <featureprop>
+      <type>product</type>
+      <value>CG3320-RB</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>FLYBASE:FBgn0016700</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</subject_id>
+    <object_id>gene:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature_relationship>
+    <subject_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</subject_id>
+    <object_id>gene:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>AAF55873.1</feature_id>
+    <name>Rab1-CDS-1</name>
+    <uniquename>Drosophila_melanogaster:Rab1-CDS-1</uniquename>
+    <type>polypeptide</type>
+    <residues>MSSVNPEYDYLFKLLLIGDSGVGKSCLLLRFADDTYTESYISTIGVDFKIRTIELDGKTIKLQIWDTAGQERFRTITSSYYRGAHGIIVVYDCTDQESFNNVKQWLEEIERYACENVNKLLVGNKSDLTTKKVVDHTTAAEYAAQLGIPFLETSAKSATNVEQAFMTMAAEIKNRVGPPSSATDNASKVKIDQGRPVENTKSGCC</residues>
+    <seqlen>205</seqlen>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>816</nbeg>
+      <nend>2662</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <feature_dbxref>
+      <dbxrefstr>GI:7300727</dbxrefstr>
+    </feature_dbxref>
+    <feature_dbxref>
+      <dbxrefstr>FLYBASE:FBgn0016700</dbxrefstr>
+    </feature_dbxref>
+    <feature_dbxref>
+      <dbxrefstr>protein:AAF55873.1</dbxrefstr>
+    </feature_dbxref>
+    <featureprop>
+      <type>product</type>
+      <value>CG3320-PA</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>note</type>
+      <value>Rab1 gene product from transcript CG3320-RA</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>GI:7300727</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>FLYBASE:FBgn0016700</value>
+      <rank>1</rank>
+    </featureprop>
+    <featureprop>
+      <type>protein_id</type>
+      <value>AAF55873.1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>translation</type>
+      <value>MSSVNPEYDYLFKLLLIGDSGVGKSCLLLRFADDTYTESYISTIGVDFKIRTIELDGKTIKLQIWDTAGQERFRTITSSYYRGAHGIIVVYDCTDQESFNNVKQWLEEIERYACENVNKLLVGNKSDLTTKKVVDHTTAAEYAAQLGIPFLETSAKSATNVEQAFMTMAAEIKNRVGPPSSATDNASKVKIDQGRPVENTKSGCC</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>codon_start</type>
+      <value>1</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>AAF55873.1</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>derives_from</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>AAN13857.1</feature_id>
+    <name>Rab1-CDS-2</name>
+    <uniquename>Drosophila_melanogaster:Rab1-CDS-2</uniquename>
+    <type>polypeptide</type>
+    <residues>MSSVNPEYDYLFKLLLIGDSGVGKSCLLLRFADDTYTESYISTIGVDFKIRTIELDGKTIKLQIWDTAGQERFRTITSSYYRGAHGIIVVYDCTDQESFNNVKQWLEEIERYACENVNKLLVGNKSDLTTKKVVDHTTAAVRILLV</residues>
+    <seqlen>146</seqlen>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>816</nbeg>
+      <nend>2405</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <feature_dbxref>
+      <dbxrefstr>GI:23171868</dbxrefstr>
+    </feature_dbxref>
+    <feature_dbxref>
+      <dbxrefstr>FLYBASE:FBgn0016700</dbxrefstr>
+    </feature_dbxref>
+    <feature_dbxref>
+      <dbxrefstr>protein:AAN13857.1</dbxrefstr>
+    </feature_dbxref>
+    <featureprop>
+      <type>product</type>
+      <value>CG3320-PB</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>note</type>
+      <value>Rab1 gene product from transcript CG3320-RB</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>GI:23171868</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>db_xref</type>
+      <value>FLYBASE:FBgn0016700</value>
+      <rank>1</rank>
+    </featureprop>
+    <featureprop>
+      <type>protein_id</type>
+      <value>AAN13857.1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>translation</type>
+      <value>MSSVNPEYDYLFKLLLIGDSGVGKSCLLLRFADDTYTESYISTIGVDFKIRTIELDGKTIKLQIWDTAGQERFRTITSSYYRGAHGIIVVYDCTDQESFNNVKQWLEEIERYACENVNKLLVGNKSDLTTKKVVDHTTAAVRILLV</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>codon_start</type>
+      <value>1</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>AAN13857.1</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>derives_from</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:52204:53323</feature_id>
+    <name>Rab1-exon-1</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-1</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>2464</nbeg>
+      <nend>3584</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:52204:53323</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:52204:53631</feature_id>
+    <name>Rab1-exon-6</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-6</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>2156</nbeg>
+      <nend>3584</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:52204:53631</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:53404:53631</feature_id>
+    <name>Rab1-exon-2</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-2</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>2156</nbeg>
+      <nend>2384</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:53404:53631</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:53688:53735</feature_id>
+    <name>Rab1-exon-3</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-3</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>2052</nbeg>
+      <nend>2100</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>id</type>
+      <value>exon:EMBL/GenBank/SwissProt:AE003734:53688:53735</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:53688:53735</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:53688:53735</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:53798:53918</feature_id>
+    <name>Rab1-exon-4</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-4</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>1869</nbeg>
+      <nend>1990</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>id</type>
+      <value>exon:EMBL/GenBank/SwissProt:AE003734:53798:53918</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:53798:53918</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:53798:53918</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature>
+    <feature_id>exon:EMBL/GenBank/SwissProt:AE003734:54949:55287</feature_id>
+    <name>Rab1-exon-5</name>
+    <uniquename>Drosophila_melanogaster:Rab1-exon-5</uniquename>
+    <type>exon</type>
+    <organismstr>Drosophila melanogaster (fruit fly)</organismstr>
+    <featureloc>
+      <srcfeature_id>contig:AE003734.2:55787:51703</srcfeature_id>
+      <nbeg>500</nbeg>
+      <nend>839</nend>
+      <strand>-1</strand>
+      <rank>0</rank>
+    </featureloc>
+    <featureprop>
+      <type>id</type>
+      <value>exon:EMBL/GenBank/SwissProt:AE003734:54949:55287</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>gene</type>
+      <value>Rab1</value>
+      <rank>0</rank>
+    </featureprop>
+    <featureprop>
+      <type>locus_tag</type>
+      <value>CG3320</value>
+      <rank>0</rank>
+    </featureprop>
+  </feature>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:54949:55287</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+  <feature_relationship>
+    <subject_id>exon:EMBL/GenBank/SwissProt:AE003734:54949:55287</subject_id>
+    <object_id>mRNA:EMBL/GenBank/SwissProt:AE003734:52204:55287</object_id>
+    <type>part_of</type>
+  </feature_relationship>
+</chaos>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family4nl.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family4nl.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family4nl.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,45 @@
+#NEXUS
+begin taxa;
+	dimensions ntax=27;
+	taxlabels
+	Chlamydomonas_reinhardtii_AAB04944.1	Trypanosoma_cruzi_AAB39949.1	Caenorhabditis_elegans_AAB53822.1	Mus_musculus_AAB60902.1	Saccharomyces_cerevisiae_AAB68939.1	Babesia_bovis_AAB69755.1	Candida_albicans_AAB86583.1	Pneumocystis_carinii_AAC24764.1	Penicillium_chrysogenum_AAC36585.1	Tritrichomonas_foetus_AAC47734.1	Tritrichomonas_foetus_AAC47735.1	Trichomonas_vaginalis_AAC48291.1	Toxoplasma_gondii_AAC63943.1	Neurospora_crassa_AAD28503.1	Arabidopsis_thaliana_AAF01529.1	Schizosaccharomyces_pombe_AAF19051.1	Drosophila_melanogaster_AAF57955.1	Oryza_sativa_BAA92737.1	Arabidopsis_thaliana_BAA97372.1	Arabidopsis_thaliana_BAB11186.1	Escherichia_coli_CAA27580.1	Rattus_norvegicus_CAA39937.1	Entamoeba_histolytica_CAA50204.1	Onchocerca_volvulus_CAA57658.1	Candida_sp_CAA72335.1	Caenorhabditis_elegans_CAB02913.1	Arabidopsis_thaliana_CAB87434.1;
+end;
+
+begin characters;
+	dimensions nchar=1035;
+	format datatype=dna missing=? gap=-;
+	matrix
+Chlamydomonas_reinhardtii_AAB04944.1	???atggccctcgcgatgaaggctcaggcttccagcctggtggctggc---------------------------cagcgccgcgctgtgcgccccgcttcgggccgtcgcgctgtgatcactcgc------------------------gctgctctggagctgaagtctcccccctacgctctggatgctctggagccccac---atgagcaagcagaccctggagttccactggggcaagcaccaccgcgcctacgtggataacatgaacaagcaggtcgctggcactccc---ctggac---------------------------ggcaagtcgctggaggagatcgtcctggccagctggaacaatggccag------cccaccccggtgttcaacaacgccgctcaggtctggaaccacactttcttctgggagagcatgaagccc---------aacggtggcggtgcccccaccggc---------gcgctggctgaggccatcacccgcgacttcggcagcctggacaagttcaaggaggagttcaagcaggctggcatgacccagttcggctctggctgggcctggctgaacgccgacaag------------------------------------------------acc------ggcaagctgtcgatcagcaagtcgcccaacgccgtgaaccccgtggtggag------------------ggcaagacccccatcctgactgtcgatgtgtgggagcacgcctactacattgacgtgcagaaccgccgccccgactacatcaccaccttcatggagaagctgatcaactgggacgccgttgctcagcgctacgcccgtgccaccaag??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Trypanosoma_cruzi_AAB39949.1	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcttcagcattcctccgctcccatggggctacgatgggcttgcggcaaaaggcctctcaaagcagcaggtgacgctccactacgacaaacaccatcaggggtatgtgacgaaactcaacgccgcggcgcagacaaactccgcgcttgca---------------------------acgaaaagcatcgaggaaatcatcaggacggag---aaaggc------------------cccatattcaaccttgcggcgcagatttttaaccacacgttctactgggagagcatgtgtcct---------aatggcggtggcgagccgacggga---------aaagttgccgacgagatcaacgcttcatttggcagttttgcgaagttcaaggaggagtttacaaacgtggctgtgggccactttggctcgggtttggcgtggcttgtgaaggacacc---------------------------------------------aattcc------ggcaaactgaaagtctaccagacgcatgacgcgggatgtccactgacagagccc---------------aacttgaagcctctccttacatgcgatgtatgggagcatgcgtactacgtggactacaagaacgaccgtgcggcatatgtgcagaccttttggaac---gttgtcaactgg------------aagaacgtggaacggcaactt?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Caenorhabditis_elegans_AAB53822.1	???????????????????????????????????????????????????????????????????????????atgctgcaatctactgctcgcactgcttcaaagcttgttcaaccggttgcgggagttctcgccgtccgc------tccaagcacactctcccagatctcccattcgactatgcagatttggaacctgta---atcagccatgaaatcatgcagcttcatcatcaaaagcatcatgccacctacgtgaacaatctcaatcagatcgaggagaaacttcacgaggctgtttcg---------------------aaagggaatctaaaagaagcaattgctctccaacca---------------------------gcgctgaaattcaatggtggtggacacatcaatcattctatcttctggaccaacttggct------------aaggatggtggagaaccttcaaag---------gagctgatggacactattaagcgcgacttcggttccctggataacttgcaaaaacgtctttctgacatcactattgcggttcaaggctctggctggggatggttgggatattgcaag---------------------------------------------aaagac------aaaatcttgaagatcgccacctgtgcaaaccaggat---cctttggaagga---------------------atggtcccactttttggaattgacgtttgggagcacgcctactacttgcagtacaaaaatgtccgcccagactatgtccatgctatttggaag---attgccaactggaagaatatcagcgagagatttgccaatgctcgacaa??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Mus_musculus_AAB60902.1	???????????????????????????????????????????????????????????????????????????atgttgtgtcgggcggcgtgcagcacgggcaggaggctgggccctgtggccggtgccgcgggctcccgg------cacaagcacagcctcccagacctgccttacgactatggcgcgctggagccacac---attaacgcgcagatcatgcagctgcaccacagcaagcaccatgcggcctacgtgaacaacctcaacgccaccgaggagaagtaccacgaggctctggcc---------------------aagggagatgttacaactcaggtcgctcttcagcct---------------------------gcactgaagttcaatggtgggggacatattaatcacaccattttctggacaaacctgagccct---------aagggtggtggagaacctaaagga---------gagttgctggaggctatcaagcgtgactttgggtcttttgagaagtttaaggagaagctgacagccatgtctgtgggagtccaaggttcaggctggggctggcttggcttcaataag---------------------------------------------gagcaa------ggtcgcttacagattgctgcctgctctaatcaggac---ccattgcaaggaacaacaggc------------cttattccgctgctggggattgacgtgtgggagcacgcttactaccttcagtataaaaacgtcagacctgactatctgaaagctatttggaat---gtaatcaactgggagaatgttactgaaagatacacagcttgcaagaag??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Saccharomyces_cerevisiae_AAB68939.1	?????????????????????????????????????????????atgttc---------------------gcgaaaacagcagctgctaatttaaccaagaagggtggtttgtcattgctctccacc---acagcaaggaga------accaaagtcaccttgccagacttgaagtgggacttcggtgcattggaaccttat---atctccggtcaaatcaacgaattgcattacaccaagcaccatcaaacttatgtgaacggattcaacactgctgttgaccaattccaagaactctcagatcttctggccaaggagccctctcccgcaaacgcaagaaaaatgattgctatccaacaa---------------------------aacatcaagttccatggcggtggtttcacaaaccactgtctattctgggaaaacctggctccagagtcg---cagggcggtggtgaaccacccaccggc------gctttggcaaaggcaatcgacgagcagtttggcagtctggacgagctgattaagttgaccaacacaaagctagcaggcgtgcagggctccggatgggccttcattgtgaaaaacctc---------------------------------------------tctaat---ggaggcaagctggacgttgttcaaacctacaaccaggat---accgtcacaggccca------------------ctagttcctctagttgccattgacgcctgggaacacgcctactacttgcagtaccaaaacaagaaagccgactacttcaaagccatttggaat---gtggtcaactggaaagaagcatccagaagattcgatgctggcaagatc??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Babesia_bovis_AAB69755.1	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggccttcaaactaccagcgctcccttacggcatgagggaactcatccctcac---atcagcgaggaaaccttgagcttccactacggcaaacaccatgcaggctacgtcaacaaactaaacagtctcatcaagggc---acacctatggaa---------------------------tcatgcactatcgaagagttgatactcggacaa---accggt------------------gccgtattcaacaacgcagcacaaatatggaaccacaccttctactggaactccatgggacct---------aactgtggaggcgagcccaccggt---------cccatccgcaagaagatcgaggaaaagttcggttctttcagcgcattcaagaccgatttctctaaccttctcgcagggcatttcggatccggttggggatggttggtgctaaaggat---------------------------------------------gatggc---------acagctgatattgtccaaacacatgatgcaggatcaccattaaaagagaat---------------ctagggcgcccactactttgctgtgatgtctgggagcatgcttactacattgactacaagaatgaccgtttgagctacattaacagctggtggaat---cttgtcaactgggacttcgccaataagaacctcgaagctcccttcaagtggtct????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Candida_albicans_AAB86583.1	?????????????????????????????????????????????atgttttctatcagatcatcatctcgtgttttattaaaggcttcttccgcaaccacccgtgctactttgaacgccgctgcttccaagactttcactaga------tctaaatatagtttaccagaattggactatgaattctccgctactgaaccatac---atttctggtcaaataaacgaaattcactacactaaacatcaccaaacttatgttaacaaccttaatgcttcaattgaacaagccgttgaagccaaatct---------------------aaaggtgaagttaaaaaattggttgccttagaaaaa---------------------------gccatcaatttcaacggtggtggttacctcaatcattgtttgtggtggaaaaacttggctcctgtctct---caaggtggtggtcaaccaccaagtgaagattccaaattaggtaaacaaatcgtcaaacaatttggttctttggataaattgattgaaatcaccaatggcaaattggctggtattcaaggttctggatgggcttttattgttaaaaacaaa---------------------------------------------gccaat---ggtgatactattgatgtcatcaccactgctaaccaagat---actgttactgatccaaac---------------ttggttccattgattgctattgatgcttgggaacatgcttattatttgcaataccaaaatgttaaagctgattacttcaagaacctttggcat---gttatcaactggaaggaagctgaaagaagatttgaattt???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Pneumocystis_carinii_AAC24764.1	????????????????????????????????????????????????????????????????????????atgttaaagtcatttagagatttggttttcaaaaaaactattaaaaactctcgtttt------ttttctcat------tcaaaacatgttttaccaagtcttccttatgattatcaggcattagaaccatat---ctttcagcagatttaattgagcttcattataatcaacatcatcgggcatatgttacaaatttaaataagacgatagagaaatattatgagggaaatgaa---------------tcatcattagattcgtttattaaccggttaaatcttttaaca---------------------------tctataaaattttttgcaggtggtcatattaatcattcattatattgggaaaatcttcttcctaataaa---caaggaggcggggaaataattaatgga------cctttagtagaagctattaagaaagaatggaaaagtgtcgatgaatttattcgtatatttaatatgcaattggcaggaattcaaggaagtggatgggcatggcttgtaaaatcg------------------------------------------------cctttt---agtcaacgtctaagtattcaaataacaatgaatcaagat---gttgtgacacagggt---------------------aaagttattcttggaatcgatgcatgggaacattcatattatgttcaatatttaaataataaaacaaaatattttgaaaatatatggaat---gtgatcaattggaaagttatgaatcaacgatttgagcaa???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Penicillium_chrysogenum_AAC36585.1	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgacttct------------------------caaacacacacactgcctcctctcccctatgcctacgatgcgttggagcccgtc---atctccaagcagataatggagctgcatcatcagaagcaccaccaaacttacatcaacaatctgaacgcagcgctctccgcccaagcttctgcaacagca---------------------tcaaatgatgtccccaccttgatttcattgcaacaa---------------------------aagcttcgctttaacggtggaggccatatcaatcactccctgttctggaagaacctgactccccccggt---acacctgcgaatgacatcgctggt---gctcccgctctgcgcgaagccatcgtctcccgctggggctcgcatgaagcgtttgtcaaggcttttggcgccgagctgctcggtcttcaggggagtggctggggatggctggtgagcaagggt---------------------------------------------ggtgcc---aagggacgacttgagatcgttacgactaaggaccaggac---cctgttaatgcacctgat------------------gtgcctgtcttcggtgtggatatgtgggagcatgcctactacctccagtacctgaacaacaaggctggctatgttgaggggatttggaag---atcattcactgggctgaagctgagaagcgttacactgctggcgttgagaacccg---------ctgaagctg??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Tritrichomonas_foetus_AAC47734.1	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgttcagcatcgctccaatcccatacatggaaacaggaatcagtggattc---cttaccaagcacgccgtcgagatccacgtcacaaagcaccaccaagcctacgtcgacttcgctaacaagaatgttccaggaacagaa---ttcgaa---------------------------ggcaagccaatcgaagaaatcatccaaaaggctacagga---------------------ccactcttcaacaacgttgcccaacatttcaaccacgccttcttctggaactgcctcacagcc---------aagaagcaagaagttcca---gct---------ggcgtcgcatctttcctcgccaagcactttgaaagcgttgacaacttcaaggcccaattcgtccaaaaggcttcaacagtcttcggctccggatggtgctacctcgcccaaaacaag------------------------------------------------gac------aagacaatctcaatcaatcaatactcaaacgccctcaacccagtcaaggac------------------ggaggtgttccactcctttgcgttgacacctgggagcacgcctggtacatcgactacgaaaaccgcaaggccgaatacttcaacaaattctgggat---gcctgcaactgggaattccttgagaagaaccttaaggccgccggtctcatt???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Tritrichomonas_foetus_AAC47735.1	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgttcagcatcgaaccagtttcattcttagagtcaggacttccaaacttc---ctcacaccacacgctgtccagatccacgttacaaagcaccaccaaggctacgtcgacatggccaacaaaattgtcccagaaagcgaa---ttcaag---------------------------ggcaaatccgttgaagaaatcattcaaaatgcttcaggt---------------------ccagtcttcaataacgtcgcccagcacttcaatcacagctttttctggaaatgtttgacagca---------acaacacaagaagttcca---gcc---------gccgttgcctcgttcctttcaaagcatttcgaaagcgtcgataacttcaaggcccagttcgtccaaaaggcatcaacagttttcggaagcggatggtgctatcttgccgtcaacaag------------------------------------------------gat------ggctcagtttcaatcaaccaatactcaaacgctctcaatccagtaaaggat------------------ggcggtgttccactcctttgcgttgacacatgggagcacgcctggtatatcgattatgaaaaccgcaaggccgaatacttcaacaaattctggggt---gttgtcaactggaactttgttgaagaaaaccttaagaaagccaaattaatc???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Trichomonas_vaginalis_AAC48291.1	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgttcacaatggagcatcctgcctacttgaagactggtcttccaggcttc---ctcacacagcacgctgtcgaggtccatgttacaaagcaccatcagtcctacattgatacagctaacaagcttatcgttggctctggc---ttcgaa---------------------------ggcaagccaattgaagaaatcatccaaaaggctcagggc---------------------ccactcttcaacaacgttgcccagcacttcaaccactccttcttctggaagtccctctccgct---------gagaaggttgctgttcca---gct---------catgttgctgagctcctcaagaagaacttcggctctgtcgagaagttccaggaaacattcacagctaaggcttcaacagtcttcggctctggctgggcttacctttacaagacaaag------------------------------------------------gac------ggcaagcttgagatcggccagtactccaacgctgctaacccagtcaaggat------------------ggccttacaccaattctcacagtcgatacatgggaacatgcttggtacatcgactacgagaacagaaaggctgagtacttcaagaactactggaac---cacgtcaactggaactttgttgagcagaacttaaaggctgctggtttg??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Toxoplasma_gondii_AAC63943.1	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtattcactttgcccccgctcccctacgcgcatgacgcgcttgctccccat---atcagctcggagacacttcagttccatcatggcaaacaccatgcaggctacgtggctaagctgaatggcttcattgaagga---accgctttcgcg---------------------------ggaaagactctagaagaggtgatccgtacgtct---actgga------------------gccattttcaataatgcggctcaagtgtggaatcatacattctacttcagcagcatgaagccaccaatgtctggtggtggtggagaacctactggg---------agacttctcgacgaaatcaagaaggaattcacatctgttgagaactttaaggacgagttttcgaaggttgccgctggccactttggctcaggttgggcatggcttgtttgggacaaa---------------------------------------------caaggc------aagaaggtcggcattgagcaaactcatgatgcgggcaccccgataacggagccg---------------atgaaggttccgctcctttgctgtgatgtatgggagcatgcctactaccttgacagaaagaacgacaggccagcgtacatcaaagcatggtggaat---gttgtaaactgggacttcgccagcaagaacttggagaatgccttgaaa??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Neurospora_crassa_AAD28503.1	?????????????????????????????????????????????????????????atggtcaatctaggaagcatttggcaaaaccttctcgcctcgcaagctcctctccagtccatgacaggcaacgcaaccaccatggccggcctcgcgacttattcccttccgcagctaccatatgcctacaatgctctggagccctac---atctcagcccagatcatggagcttcaccacagcaagcaccatcagacttacgtgaccaatctcaacaacgccttgaaagttcacgtcgccgccatcgcc---------------------tctagcgatatccccgcccagatcgcccagcagccc---------------------------gccatcaagttcaacggcggcggccacatcaatcactcgctcttctggaagaacctggcccccgctgag---accccagagaccaactactcgaaggcggctccttcgctggcggccgaaattgagaagacatggggtagttttgacgagttcaagaaggccttctctgctgcgctcttgggcatccagggcagtggctggggttggctcgttaaggagtct---------------------------------------------actgccgagaagggaaggctgcgtatcattactaccaaggatcaggat---cccgtcgtgggcggtgag------------------gtccctgtgttcggggtggacatgtgggagcatgcgtactacctccagtacctaaacggaaaggctgcgtatgttgagaatatctggaag---gtgatcaactggaagacagccgaggagcgcttccagggtagtcgtgaggacgcttttgcggacctcaaggctttgcta????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Arabidopsis_thaliana_AAF01529.1	??????????????????????????????????????????????????????????????????atggcgattcgttgtgtagcgagtagaaaaaccctagccggcttgaaggagacatcatcgaggctattgaggatcagagggattcagacttttacgcttcctgatcttccttacgattatggcgcattggaaccggcc---attagtggagagatcatgcagattcatcaccagaagcatcaccaggcttatgttactaattacaataatgctcttgagcagcttgatcaagctgtgaac---------------------aagggagatgcttccactgttgttaagttgcagagc---------------------------gccatcaaattcaacggcggaggtcatgtcaaccattcgattttctggaagaaccttgctccttccagt---gaaggtggtggagagccaccaaaa------ggatctcttggtagtgccattgacgctcactttggctcccttgaaggtctggtgaaaaagatgagtgctgagggtgctgcagtgcaaggctcaggatgggtgtggctcggactagacaaa---------------------------------------------gaactg------aagaagctagttgttgacacaactgccaatcaggat---ccattagtgacaaaaggaggaagc---------ttggtacctctggtgggtatagatgtttgggagcacgcctactacttgcagtacaaaaatgtgaggcctgagtatctgaagaatgtatggaaa---gtgatcaactggaaatatgcaagcgaggtttatgagaaggaaaacaac??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Schizosaccharomyces_pombe_AAF19051.1	?????????????????????????????????????????????????????????????????????atgcttcgctttttgtct------aagaactctgtagccgctattaggaacgtctcaattgccaggggagttcat------actaaggctactcttccccctttaccttatgcttacaatgcacttgaacctgct---ttgtcggaaacgattatgaagttacatcatgacaagcatcaccaaacatatgttaacaacttgaatgccgctcaggagaagctggccgatcccaac---------------------------ctcgatttggagggagaggttgcccttcaagct---------------------------gctattaaattcaatggcggtggtcacatcaatcattctctcttttggaagattttagcacctcaaaag---gaaggtggtggcaaacccgtcacc---tctggatctttacataaggctataacctctaaatggggttctttggaggatttccagaaggaaatgaatgccgccttagctagcatccaaggtagtggttgggcatggctaatcgtggataaa---------------------------------------------gacggt------agt---ctccgtattactactactgctaaccaagac---acgattgtcaagtccaag---------------------cccattattggaattgatgcttgggaacatgcctactatcctcaatacgagaatcgtaaggccgaatactttaaagctatttggaat---gtgattaattggaaagaggccgagtctcgttattccaaccgt????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Drosophila_melanogaster_AAF57955.1	??????????????????????????????????????????????????????????????????????????????????????????atgttcgtggcccgtaaaatttcgcaaactgcaagc------ctggcggtgcgt------ggcaagcacaccctaccgaagctgccctacgactatgccgccctggagcctatc---atctgccgggagatcatggagctgcatcaccagaagcaccaccagacctacgtcaacaatctaaatgccgccgaggagcagctggaggaggccaagtcg---------------------aagagcgacaccaccaagctgattcagctggctcct---------------------------gccctgcgtttcaatggcggtggccacatcaaccacaccatcttctggcagaacctctcg------------cccaacaagacccagcccagcgat---------gatctgaagaaggccatcgagtcgcagtggaagagcctcgaggagttcaaaaaggagctgaccacgctgaccgtggcggtccagggctccggctggggctggctgggcttcaacaag---------------------------------------------aagtcg------ggcaaactgcaactggccgccctgcccaaccaggat---cccctggaggcctccaccggc------------ctgatcccgctcttcggcatcgatgtctgggagcacgcctactatctgcagtacaagaacgtgcgtccctcctacgtggaggccatctgggac---atcgccaactgggatgacatctcgtgccgcttccaggaggccaagaagctcggttgc?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Oryza_sativa_BAA92737.1	???atggatgaggatgctgaagcaaatggt------------------------------------------------------gatgagtcgtcaggcactgatgaagatgcttctgtctcc------------------------------tggatagagcagcagcctcttccttatccttcagatgccctagagccatac---atcagcaaggagacggtggaacagcactggggagttcatcagaacattcacgtcgagaggctcaatggcatgattggtggcagtgag---tgggag---------------------------gggatgtcgctggggcagatgatgctatcctccttcaatgagggcagggaggcaccccatccccccttcttccatgctgcacagatatggaaccatgatttctattggcgatctatgcaacct---------ggtggcgggggaaaacccccagaa---------cgccttttgaaatttatcaacagggactttggatcctatgatggcatgattcgacaatttatggatgctgcatcaactcaatttggttctggatgggtttggctttgttacaaaacaagcaagttgcctcatgtgaaatcaagaagcccaatcccatctgataattat------ggtagactggtcatctcaaagtctccgaatgccatcaatcctcttgtctgg------------------ggtcactct---------------------------catgcatactacctggattatgaggatcggagatctgactatgtctccacatttctagagaagcttgtgtcatgggaaactgttgagtccaggcttaagaaggccgtacaacgggcagtagaaagagatgaatatgttagcacaaagcatataaggaagcaacttttagctcgggcaaagagccaaatcagagctatgcctcagcaggtcaatggggatgcaagagagcagaccagcggtcaagagaagtccctaggggtg
+Arabidopsis_thaliana_BAA97372.1	atgatgaatgttgcagtgacagccactccctcgtctctcttgtactctcctctgcttcttccttctcaagggccaaaccggcgaatgcaatggaaaagaaacggaaagagacggttagggacaaaggtggctgtttccggtgttatcacagctggatttgagctgaagccacctccatatcctcttgatgctctggaaccgcat---atgagccgggaaaccttggattatcactggggcaaacatcacaaaacttatgtagagaacctgaacaagcaaatcttaggcacggat---ctagat---------------------------gcattatccttggaagaagttgtgcttctttcatacaacaaaggcaat------atgcttcctgctttcaacaacgctgcacaggcttggaaccacgagttcttctgggagtctatccaacct---------ggaggtggaggaaagccaactgga---------gagctcctcagattaatagaaagagattttgggtctttcgaagagtttttggaaaggttcaagtcggctgcagcttcgaattttggttcgggttggacatggcttgcatataaggcgaatagacttgacgttgcaaatgccgttaatcctctcccaaaggaggaagac------aagaaacttgttatagtgaagacgcccaatgcagtaaatccgctcgtatgg------------------gattattctccacttctcaccattgatacctgggagcacgcttactatctggattttgagaaccgaagagctgaatacataaatacattcatggaaaagcttgtgtcatgggaaactgtaagcacaaggttggaatccgcaattgctcgagcagtgcaaagagaacaagaaggaacagagacagaagat------------------------gaagagaatccagatgatgaagtaccagaggtctatttagatagtgacatcgatgtatctgaggttgac???????????????
+Arabidopsis_thaliana_BAB11186.1	???atgagttcttgtgttgtgacgacaagctgtttctatacaatttcagattctagtatacgtttgaaatcccccaagctcctcaatctgagtaaccagcagagaagacgctctcttaggtctcgaggtggtttaaag------gttgaagcttactacggtctaaagacacctccttatccacttgatgctttggagccgtat---atgagtagaagaacactagaagtgcattggggaaaacaccatcgaggttatgtagataatctgaataaacagttagggaaagatgatagactctat---------------------------ggatacaccatggaagagcttatcaaggctacatacaacaacgggaat------cctttacccgagttcaacaacgctgcacaggtctataaccatgatttcttctgggagtcgatgcaacct---------ggtggtggagacacgcctcaaaag---------ggtgttcttgagcagattgataaggattttggttctttcacaaattttagagaaaagttcactaatgcagctcttactcagtttggttctggatgggtctggcttgtcttaaagagg---------------------------------------------gaagag------agaaggcttgaggtggtcaaaacctcaaacgccattaacccactcgtgtgg------------------gacgatattccaatcatctgcgtggatgtgtgggagcactcttattatctggactacaagaacgacagggctaagtatataaacacatttctgaaccacttggtgtcgtggaacgctgccatgagtcggatggcccgtgcagaagcgtttgtgaatcttggtgaa------------------------------------------------------------------------------------------cccaacatcccaatcgct????????????????????????
+Escherichia_coli_CAA27580.1	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgagctataccctgccatccctgccgtatgcttacgatgccctggaaccgcac---ttcgataagcagaccatggaaatccaccacaccaaacaccatcagacctacgtaaacaacgccaacgcggcgctggaaagcctgccagaatttgccaacctgccggttgaagagctgattaccaaactggaccagctgccagcagacaagaaaacc---------------------------gtactgcgcaacaacgctggcggtcacgctaaccacagcctgttctggaaaggtctgaaa---------------aaaggcaccaccctgcagggt---------gacctgaaagcggctatcgaacgtgacttcggctccgttgataacttcaaagcagaatttgaaaaagcggcagcttcccgctttggttccggctgggcatggctggtgctgaaaggc---------------------------------------------gataaa------------ctggcggtggtttctactgctaaccaggattctccgctgatgggtgaagctatttctggcgcttccggcttcccgattatgggcctggatgtgtgggaacatgcttactacctgaaattccagaaccgccgtccggactacattaaagagttctggaac---gtggtgaactgggacgaagcagcggcacgttttgcggcgaaaaaa?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Rattus_norvegicus_CAA39937.1	???????????????????????????????????????????????????????????????????????????atgttgtgtcgggcggcgtgcagcgcgggcagaagactgggccccgcggccagtaccgcgggctcccgg------cacaagcacagcctccctgacctgccttacgactatggcgcgctggagccgcac---attaacgcgcagatcatgcagctgcaccacagcaagcaccacgcgacctacgtgaacaatctgaacgtcaccgaggagaagtaccacgaggcgctggcc---------------------aagggagatgttacaactcaggttgctcttcagcct---------------------------gcactgaagttcaatggcgggggccatatcaatcacagcattttctggacaaacctgagccct---------aagggtggtggagaacccaaagga---------gagttgctggaggctatcaagcgtgactttgggtcttttgagaagtttaaggagaaactgacagctgtgtctgtgggagtccaaggttcaggctggggctggcttggcttcaataag---------------------------------------------gagcaa------ggtcgcttacagattgccgcctgctctaatcaggac---ccactgcaaggaaccacaggc------------cttattccactgctggggattgatgtgtgggagcacgcttactatcttcagtataaaaacgtcagacctgactatctgaaagccatttggaat---gtaatcaactgggagaatgttagccaaagatacatagtttgcaagaag??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Entamoeba_histolytica_CAA50204.1	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtctttccaattaccacaattaccttatgcttataatgctcttgagcctcat---attagtaaagagactcttgaattccatcatgataagcatcacgctacttatgttaataagttaaatggtcttgtaaaagga---actgaacaagaa---------------------------cataaaactcttgaagaattaattaaacaaaagccaactcaa------------------gcaatttataataatgcagcccaagcatggaatcatgcattctattggaaatgtatgtgtgga------------tgtggagttaaaccatctgaa---------caattaattgctaaattaactgctgcttttggaggattagaagaatttaagaagaagtttactgagaaagctgttggacattttggaagtggatggtgttggttggttgaacatgat---------------------------------------------ggt------------aagttagagattattgatactcatgatgctgttaatccaatgaccaat------------------ggaatgaaaccattattaacttgtgatgtttgggaacatgcttattacattgacactagaaacaacagagctgcttacttagaacattggtggaat---gtagtcaactgg------------aagttcgttgaagaacaactc?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Onchocerca_volvulus_CAA57658.1	???????????????????????????????????????????????????????????????????????????atgaatctgattattggcgtcgcaggtcgcttattggttggcaagaactattgtctgaatacacaacga------ctaaaacatgttcttcctgatttaccatacgattatggagccttggagccgata---ttgtcggccgaaattatgcaagttcatcacggtaaacatcatgctgcatatgtaaatgctcttaatcaagccgaagagaaagtaaaagaagcgctagca---------------------aaaggagatacacaggctgctgttgctggcacaaaa---------------------------ttaatgaatttcaacactggcggacacattaatcatacactgttctgggaaggattaactgccgta------aagaatagcggagaaccgaattct---------gaattaatgacggctataaagaaggatttcgggtctttggaaacgatgatagataagttaaatgctaaaacaattgcgatccaaggttccggatggggatggcttgcttatgacaag---------------------------------------------gaaatg------aaacgtttgcaacttgcttgttgtcccaaccaagat---cttcttgagccaacaacaggt------------ctaattccactattttgcatcgatgtctgggaacatgcttattatctgcagtataaaaatttacggccagatttcgtgaaagctatttggaaa---attgcgaattggaaaataatcagtgatcgctacattaaagctagagga??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Candida_sp_CAA72335.1	?????????????????????????????????????????????atgctt------------tcttctgctatcaagagatccgccgtcggtattgctagacgttccgtcgtgtcttcttccgttgga------gccgtcaga------accaaagtatcgttaccagacttggattgggacttcggagccttggagcctcac---atttcgggacagatcaacgaaatccactacaccaagcaccaccaaacttacgttaacggttacaaccaagccattgagcaagcagccaggccaagg------------------------caaggggaggtcaagaagaccattgaattgcaaaag---------------------------gccatcaacttccacggtggtggatacaccaaccactgtttgttctggaagaacttggctccagagaag---caaggaggtggtgagccacctgctgaagactctgagtttgccaagagaatcgttgagcaatacggctccctcgacaacttgaaggctatcaccaacggtaagttggccggtatccaaggttctggttgggctttcattgtcaagaacaag---------------------------------------------gaaaac---ggtggtgctttagatgttatcactactgcaaaccaagat---actgttcttggacca------------------tttgttccattggttgctattgatgcttgggaacatgcctactacttgcaataccagaacgtcaaggctgactacttcaaggccatctggaac---gttatcaactggaaggaagccgagaagagatacttgatcaac????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Caenorhabditis_elegans_CAB02913.1	???????????????????????????????????????????????????????????????????????????atgcttcaaaacaccgttcgctgtgtctcaaagcttgttcaaccgatcacaggagtcgctgctgttcgc------tcgaagcactcgctgccagatttaccatacgactatgctgatttggagcctgta---atcagtcacgagattatgcaacttcatcatcaaaagcatcatgccacttatgtgaacaatctcaaccaaattgaggaaaagcttcacgaggcggtctcc---------------------aaaggaaacgtcaaagaagctatcgctcttcagcca---------------------------gctctcaagttcaatggaggaggacatatcaaccactccatcttctggactaatttggca------------aaggacggaggagaaccatcggcg---------gagttgctcaccgcaattaagagcgacttcggatctctggataatcttcaaaaacagctttcggcatcaactgtcgctgttcaaggatcaggatggggatggttgggatactgtcca---------------------------------------------aaggga------aagatcttgaaggttgccacatgtgccaatcaggat---ccacttgaggcaacaactgga------------cttgttccactgttcggaattgacgtctgggagcacgcttactacttgcagtacaagaatgttcgaccagattatgtcaatgctatttggaag---atcgccaactggaagaacgtcagcgagcgttttgcaaaggcacagcaa??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+Arabidopsis_thaliana_CAB87434.1	???????????????????????????????????????????????????atgacgaccaccgttattatcattatcttcgttgccatcttcgctacgactcttcacgacgccagaggagcaaccatggaaccgtgtctcgaatcaatgaagactgcttcgcttcctgatcttccgtatgcttatgatgcgctagagccagca---atcagtgaggagatcatgcggctacaccatcagaaacaccaccagacttatgtcactcagtacaacaaagccctcaacagccttcgttctgccatggct---------------------gacggtgatcactcctccgtcgtcaaactccaaagc---------------------------ctcatcaagttcaacggcggagggcacgttaaccatgcaatcttctggaaaaacctagccccggttcat---gaaggaggtggcaaaccaccgcat------gatcctttggcttcggcgatcgatgctcatttcggatcactagaaggattgatccaaaaaatgaacgcggaaggcgctgctgtacagggatctggatgggtgtggtttggattagacaga---------------------------------------------gagctt------aagagacttgtcgttgaaaccacagccaaccaggat---ccattggtgactaagggatcacac---------ctagttcctctaattgggattgacgtgtgggagcatgcctactatccacagtacaagaatgcaagagcggagtacttgaagaacatatggact---gtaatcaactggaaatatgcagcggacgtcttcgagaagcacactcgtgatctt------gatattaac?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+;
+end;
+
+begin trees;
+	tree Mn_Fe_superoxide_dismutase = (((((Penicillium_chrysogenum_AAC36585.1:0.34051,Neurospora_crassa_AAD28503.1:0.27339)7:0.26409[1],(Schizosaccharomyces_pombe_AAF19051.1:0.36196,Pneumocystis_carinii_AAC24764.1:0.71151)8:0.12214[0.943])6:0.06412[0.513],(Candida_albicans_AAB86583.1:0.24934,(Candida_sp_CAA72335.1:0.18954,Saccharomyces_cerevisiae_AAB68939.1:0.32136)5:0.07268[0.61])4:0.25434[1])3:0.08564[0.791],((Arabidopsis_thaliana_CAB87434.1:0.31765,Arabidopsis_thaliana_AAF01529.1:0.21348)10:0.27524[1],(Drosophila_melanogaster_AAF57955.1:0.35616,((Onchocerca_volvulus_CAA57658.1:0.50902,(Caenorhabditis_elegans_CAB02913.1:0.15628,Caenorhabditis_elegans_AAB53822.1:0.11313)15:0.15295[1])14:0.11584[0.994],(Rattus_norvegicus_CAA39937.1:0.03387,Mus_musculus_AAB60902.1:0.02684)13:0.23219[1])12:0.08725[0.873])11:0.14599[1])9:0.10103[0.724])2:0.21049[1],(((Toxoplasma_gondii_AAC63943.1:0.21401,(Babesia_bovis_AAB69755.1:0.3377,Entamoeba_histolytica_CAA50204.1:0.47354)19:0.08578[0.743])18:0.09059[0.924],Trypanosoma_cruzi_AAB39949.1:0.35308)17:0.14755[1],((Trichomonas_vaginalis_AAC48291.1:0.12536,(Tritrichomonas_foetus_AAC47735.1:0.1604,Tritrichomonas_foetus_AAC47734.1:0.09764)22:0.20185[1])21:0.38885[1],(Chlamydomonas_reinhardtii_AAB04944.1:0.28584,((Arabidopsis_thaliana_BAA97372.1:0.29163,Oryza_sativa_BAA92737.1:0.56464)25:0.13153[0.967],Arabidopsis_thaliana_BAB11186.1:0.43387)24:0.21097[1])23:0.12537[0.998])20:0.09524[0.633])16:0.25459[1],Escherichia_coli_CAA27580.1:0.39688)1;
+end;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family7n.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family7n.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family7n.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+#NEXUS
+begin taxa;
+	dimensions ntax=71;
+	taxlabels
+	otu_1	otu_2	otu_3	otu_4	otu_5	otu_6	otu_7	otu_8	otu_9	otu_10	otu_11	otu_12	otu_13	otu_14	otu_15	otu_16	otu_17	otu_18	otu_19	otu_20	otu_21	otu_22	otu_23	otu_24	otu_25	otu_26	otu_27	otu_28	otu_29	otu_30	otu_31	otu_32	otu_33	otu_34	otu_35	otu_36	otu_37	otu_38	otu_39	otu_40	otu_41	otu_42	otu_43	otu_44	otu_45	otu_46	otu_47	otu_48	otu_49	otu_50	otu_51	otu_52	otu_53	otu_54	otu_55	otu_56	otu_57	otu_58	otu_59	otu_60	otu_61	otu_62	otu_63	otu_64	otu_65	otu_66	otu_67	otu_68	otu_69	otu_70	otu_71;
+end;
+
+begin characters;
+	dimensions nchar=1557;
+	format datatype=dna missing=? gap=-;
+	matrix
+otu_1	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcgaag------atcggaatcaac---------------------------------------ggatttggccgcatcggccgcttggtgctccgcgccgccatcgataag---------ggc------gcctccgtggtggccgtcaacgatcccttcatcgatgtcaactacatggtttacctgtttaaattcgactcgactcacggtcgtttcaag------------------------ggc------accgttgcggctgagggcggattcctggtggtgaac---------------ggccagaagatcaccgtgttcagcgag---cgcgacccggccaacatcaactgggccagtgctggagccgagtatgtggtggagtccaccggagtgttc---------------------------------accaccatcgacaaggcgtccacccacttgaagggcggtgccaagaaggtcatcatctcggccccatcc---gcc---gatgcgcccatgttcgtgtgcggcgttaacctggacgcctacagcccc---gacatgaaggtggtctcc---------------------------aacgcctcgtgcaccaccaactgcctggctcccctggccaaggtcatcaat---gacaacttcgagatcgtcgagggtctgatgaccaccgtgcacgccaccactgccacccagaagaccgtcgacggtccctctggcaaactgtggcgcgatggacgtggcgccgcccagaacatcatcccggccgccaccggagccgccaaggctgtgggcaaggtcatccccgccctgaac---ggcaagctgaccggcatggctttccgcgtgcccacgcccaatgtctccgttgtggatcttaccgtccgcctgggcaaggga---gccacctatgacgaaatcaaggctaaggtcgaggaggcctccaaggga------------cccctgaagggaatcctgggctacaccgacgaggaggtggtctccaccgacttcctcagcgacacccattcgtctgtgttcgacgccaaggctggcatttcgctgaac------------gataagttcgtcaagctaatctcgtggtacgacaacgagttcggttactccaaccgcgtcatcgacctgatcaagtatatgcagagcaaggac??????????????????????????????????????????????????????????????????????????????????????????
+otu_2	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttaatgttggaatcaat---------------------------------------gggtttgggaggatcggacgtctcgtcctcagaaatgcattacaaatg---------caa---attctcactgttgtagctgttaatgatcctttccttgacgttgaatacatggcgtatctgttcaagtatgattccgttcatggacgatatcaa------------------------gga------aaagtcgaaaccaaggacgggaaattgatcattgat---------------ggacataaaatcgcggctttcgcagaa---cgtgaaccggcaaatattaaatgggccgattgcggcgctgagtacatcgttgaatctaccggcgtgttc---------------------------------aaaacagaagaattagcgaaggagcatttgaagggtggggccaaaaaagttgtcatcaccgctcctggg---agt---ggcgtacccacatacgtcgttggtgtcaatctggataaatacgatcct---aaagaagttgtgatttca---------------------------aatgcttcgtgcactaccaattgcctagcagtcctggcgaaggtcatcaat---gacaaatttggaattgtggaaggcttgatgacgacagtgcatgccaccacagccacgcagaagactgtcgatgctcctgcaaagaaggattggcgttctggaaggagtgttacaaataacatcattccagcatctacgggtgccgctaaagctgttacaaaggcgattcctgatttggag---ggaaaactcactggactggcattccgagtcccgacactcgacgtatcggttgttgacctcgtcgttcgcctcgaaaaggaa---accagttacgatgacgtcaaaaaagccatgagggacgcagccgacggtaaacacccgggcatcgagaaaggcattgtcgactatacggaagaagacgttgtttccaccgatttcgttgggagcaactattcgatgatctttgacgcaaaagccgggatcgcgttgaac------------tcgcgttttatgaagttggttgcatggtatgataatgagtggggatatgcgcgtagagtctgcgatgaggttgtgtatgtagcgaagaagaat??????????????????????????????????????????????????????????????????????????????????????????
+otu_3	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttaaagttggaatcaac---------------------------------------ggtttcggtcgtatcggccgcattgtcctccgtaatgctctccaattc---------cag---gacatcgaagttgtcgccgtgaacgacccgttcattgacctcgaatacatggcatacatgttcaagtacgactccgtccacggtcgcttcaag------------------------ggt------accgttgaggtcaagaacggcagctttgtcgttgac---------------ggcaggcctatgaaagtctttgctgaa---cgcgatcccgctgccatcccttggggttcagtcggcgcggactacgtcgtggaatccacaggtgtattc---------------------------------actactatcgacaaggcttcggctcacttgaaggggggcgccaaaaaagtcgttatctccgctccttcg---gcc---gatgcgccgatgtatgtctgcggtgttaaccttgacaagtacaatccc---aaggacacaattatctcg---------------------------aacgcttcttgcacaaccaattgcttggctactcttgctaaagtcattcac---gataactttggtatcgttgagggtctgatgaccactgttcacgccaccaccgctactcaaaagactgtggatggtccttctcacaaggactggcgtggtggccgtggtgtcggcaataacatcattccttcctctactggcgccgccaaggccgtcggaaaggttatcccttcactcaac---ggcaagctcactggtctctcgatgcgtgttcccactcaggacgtttccgttgtcgatcttgttgttcgtcttgagaagccc---gcttcctatgaacagatcaaggaggtcatgcgcaaggccgctgaaggc------------gaatacaagggaattatcgcatacaccgacgaggacgtggtttccactgacttcattagtgataacaattcttgtgtcttcgatgcgaaggccggaattcagcttagc------------ccgaactttgtcaagctgattgcttggtacgataacgaatggggatactcgcgccgtgtttgcaacctcctccaatacgttgcaaaggaggacgccaaggctggcatt???????????????????????????????????????????????????????????????????????????
+otu_4	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctgacaagaagattaggatcggaatcaac---------------------------------------ggattcggaagaattggtcgtttggttgctagagttgttctccagagg---------gac---gatgttgagctcgtcgctgtcaacgaccccttcatcactactgagtacatgacctacatgttcaagtacgacagtgttcacggtcaatggaaa------------------------cacaatgaactcaagatcaaggatgagaagacccttctcttcggt---------------gagaagccagtcactgttttcggcatc---aggaaccctgaggatatcccatgggccgaggctggagctgactacgttgttgagtctactggtgtcttc---------------------------------actgacaaagacaaggctgcagctcacttgaagggtggtgccaagaaggttgttatctctgaacccagc---aaa---gacgctccaatgtttgttgttggtgtcaacgagcacgaatacaagtcc---gaccttgacattgtctcc---------------------------aacgctagctgcaccactaactgccttgctccccttgccaaggttatcaat---gacagatttggaattgttgagggtcttatgactacagtccactcaatcactgctactcagaagactgttgatgggccttcaatgaaggactggagaggtggaagagctgcttcattcaacattattcccagcagcactggagctgccaaggctgtcggaaaggtgcttccagctcttaac---ggaaagttgactggaatgtctttccgtgtcccaaccgttgatgtctcagttgttgaccttactgtcagactcgagaaagct---gctacctacgaagaaatcaaaaaggctatcaaggaggaatccgaaggc------------aaactcaagggaatccttggatacaccgaggatgatgttgtctcaactgacttcgttggcgacaacaggtcgagcatttttgacgccaaggctggaattgcattgagc------------gacaagtttgtgaaattggtgtcatggtacgacaacgaatggggttacagttcccgtgtggtcgacttgatcgtccacatgtcaaaggcc?????????????????????????????????????????????????????????????????????????????????????????????
+otu_5	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctcccaaggtcggaatcaac---------------------------------------ggcttcggtcgtattggacgcatcgttttccgtaacgccatcgaggcg---------ggt---accgtcgatgttgttgccgtcaacgaccctttcatcgagacccactacgctgcctacatgctcaagtatgactcacagcacggtcagttcaag------------------------ggc------accattgagacctacgacgagggtcttattgtcaac---------------ggcaagaagatccgcttccacaccgag---cgtgaccccgccaacatcccctggggccaggacggtgctgaatacattgtcgagtccaccggtgtcttc---------------------------------actacccaggagaaggctagcgctcacctgaagggtggtgccaagaaggttgtcatctctgccccatct---gct---gatgcccctatgttcgtcatgggtgtcaacaacgagacctacaagaag---gacattcaggtcctctcc---------------------------aacgcttcttgcaccaccaactgccttgcccctctcgccaaggtcatcaac---gacaacttcggtatcatcgagggtctgatgaccaccgtccactcctacactgctacccagaaggtcgtcgacggcccctcggccaaggactggcgtggtggccgtaccgctgctaccaacatcatcccctcctccactggtgctgccaaggctgtcggcaaggtcattccttcgctcaat---ggcaagctcaccggcatggcgatgcgtgttcccacctccaacgtctccgttgttgacctgaccgtccgcaccgagaaggct---gttacctacgaccagatcaaggatgccgtcaagaaggcttctgagaac------------gagctcaagggcatccttggctacaccgaggacgacatcgtctctaccgacctcaacggtgacacccgctcttccatcttcgatgctaaggcgggtattgccctcaac------------tccaacttcatcaagctcgtttcctggtacgacaacgagtggggttactcccgccgtgttgttgacctcatcacctacatctccaaggttgatgcccaa????????????????????????????????????????????????????????????????????????????????????
+otu_6	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgccggtcaaagcaggaatcaac---------------------------------------ggtttcggtcgtattggccgtatcgtcctccgtaatgctctccttcac---------gga---gacattgacgtcgtcgcggtcaacgaccccttcattgacctcgagtacatggtctacatgttcaagtacgactccgttcacggtcgcttcaag------------------------ggt------tccgtcgaggccaaggacggcaagctctatgtcgag---------------ggcaagcctatccacgtcttcgctgag---aaggacccggccaacatcccttggggctctgtcggcgctgagtacatcgtcgagtcgaccggtgtgttc---------------------------------accaccaccgagaaggcctctgcccacttgaagggcgtgtgcaagaaggtcatcatctcggccccctct---gct---gatgcccccatgtttgtctgcggtgtcaacctcgacgcgtacgactcc---aagtacaaggtcatctcg---------------------------aacgcgtcctgcaccaccaactgcttggcgccccttgccaaggtcatccac---gacaagttcggcattgtgcagggtctcatgacctccgtccacgctaccaccgctacccagaagaccgtcgatggcccctcgaacaaggactggctgggtggccgttccgtcggcaacaacatcatcccctcgtcgactggtgccgccaaggccgtcggcaaggtcatcccttcgctcaac---ggcaagctgaacggtctcgccttccgtgtccccaccgtcgatgtctccgtcgtcgacctcgtcgtccgtctcgagaagccc---gcttcttacgacgagatcaagcaggccatcaaggaggcgtcggagacc------------acccacaagggcatcctcggctacaccgaggagaaggtcgtctccaccgacttcaccggcaacgacaactcgtcgatcttcgatcgtgacgcgggtatcgcgctcaac------------aagacattcgtcaagctcatctcctggtacgacaacgagtggggctactcccgccgttgctgcgacctcctcggttacgccgcgaaggtcgacggtgccctc?????????????????????????????????????????????????????????????????????????????????
+otu_7	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggccgtgaaggttggaattaac---------------------------------------gggttcggtcgcattggccgtatcgttctccgtaatgcccttcagctg---------ggc---aacatcgaggtcgtcgccatcaacgacccgttcatcgcccttgactacatggtctacatgttcaagtacgacaccgtccacggccgctacaag------------------------ggc------accgtcgaggtgaaggacggcaagctcgtcgtcgac---------------ggccatgccatcaccgtcttcgctgag---aagaaccccgccgacatcaagtggggctctgctggcgccgactacatcgtcgagtccaccggtgtcttc---------------------------------accaccgtcgagaaggcgtcgctgcacttgcagggcggcgcgaagaaggtcgtcatctcggcgccctcc---gcc---gacgccccgatgttcgtcgtcggcgtcaacctcgacaagtacgactcg---aagtaccaggtcatctcg---------------------------aacgcgtcctgcacgaccaactgcctggcgccgctcgcgaaggtcatccac---gacaagtacggcatcgcggagggtctcatgaccaccgtgcacgccacgaccgcgacccagaagaccgtcgacggcccgtcgcacaaggactggcgcggtggccgctcggtcaacaacaacatcatcccctcgtcgactggtgcggctaaggccgtcggcaaggtcatcccctcgctcaac---gggcgcctcaccggcctcgccttccgtgtcccgaccctcgacgtctccgtcgtcgacctcgtcgtccgcctcgagaaggag---gcctcgtacgacgagatcgtcgcgaccgtcaaggaggcgtccgagggc------------ccgctcaagggcatcctcggcttcaccgacgagtccgtcgtctcgaccgacttcaccggcgcgaacgagtcgtccatcttcgactccaaggccggcatcgcgatcagc------------aagagcttcgtgaagctcattgcctggtacgacaacgagtggggctactcccgccgtgtctgcgacctgctcgtctacgcggcgaagcaggacggcgcgctc?????????????????????????????????????????????????????????????????????????????????
+otu_8	atggcaactcatgctgctttagcttctacaagaatcccaacaaacacaaggtttccatctaagacctct---cactctttcccatctcaatgtgcctcaaagagacttgaggtaggtgaattctctggactcaaatcaacttcatgtatttcctatgttcatagtgctagagattcttctttttatgatgttgtagctgctcaactcacttccaaggcaaatgga---tcaactgctgtgaaggga---------gtgactgtggctaagttgaaggtagcaatcaat---------------------------------------ggttttggacgcatcggtagaaatttccttcgatgctggcacggtcgaaaggactcg------ccacttgaagtcattgttgtcaatgac---agtggaggtgtcaagaatgcttcacatttgttgaaatatgattctatgcttggaacttttaaa------------------------gca------gaagtgaagatactaaacaatgagactattacagtt------------gatggtaaacccatcaaggttgtctctagcagagat---cctcttaagcttccttgggccgaacttggaattgacattgttattgagggaacaggagtgttt---------------------------------gtggacggccctggcgcgggcaaacacatccaagcaggtgccaagaaagttatcatcactgctcctgcaaagggtgctgatattccgacttacgttattggagtgaacgaacaagactacggccatgaagtagccgacatcataagc---------------------------aatgcttcttgcaccacaaactgtcttgctccctttgctaaggtcctggat---gaagagttcggaatcgttaagggaaccatgacaaccacacattcctacaccggagaccagaggcttttggat---gcttcacatagggacttgagaagagctagagctgcagcactgaacattgttccgaccagcacaggagcagccaaggctgtatctctagtgttgccacagctcaag---ggaaagctcaacggaatcgccctccgtgtgcctacgcctaatgtttcagttgttgaccttgtggtcaatgttgcgaagaagggtatatcagctgaagatgtcaatgcagcattcagaaaggcagctgaggga------------ccactgaaaggtatattggatgtctgtgatgttccgctcgtgtctgttgacttccgctgctccgatgtttctacaactattgactcttccttgactatggtcatgggagatgat------------atggttaaggtggttgcttggtatgacaatgaatggggttacagccaaagagtggtggatttggcacatctagtagcaaacaaatggccaggaacccctaaagtagggagtggagatccattggaggacttctgcgagacgaatccggccgacgaggaatgcaaagtttatgaa???
+otu_9	??????????????????atggccgccatgatgcag---------------------------------------------------------------------------aagagcgccttcaccggcagcgccgtgtcctccaag---------------------------------------------------tctggcgtccgcgccaaggctgcccgcgccgtcgtcgacgtgcgc---------------gcggagaagaagatccgcgtggccatcaac---------------------------------------ggcttcggtcgcattggccgcaacttcctgcgctgctggcacggtcgccagaacacc------ctgctggacgtggttgccatcaacgac---agcggcggtgtcaagcaggccagccacctgctgaagtacgactccaccctgggcacgttcgcc------------------------gcc------gatgttaagatcgtcgacgacagccacatctcggtg------------gacggcaagcagatcaagattgtgtccagccgcgac---cccctgcagctgccctggaaggagatgaacatcgacctggtcattgagggcactggtgtcttc---------------------------------attgacaaggttggcgctggcaagcacatccaggccggtgcctccaaggtgctgatcaccgcccccgccaaggacaaggacatccccaccttcgtggtcggtgtgaacgagggcgactacaagcacgagtacccc---atcatctcc---------------------------aacgcctcgtgcaccaccaactgcctggcccccttcgtcaaggtgctggag---cagaagttcggcattgtcaagggcacgatgaccaccacccactcctacaccggtgaccagcgcctgctggac---gcgtcccaccgcgacctgcgccgcgcccgcgccgccgccctgaacattgtgcccaccaccaccggtgccgccaaggccgtgtcgctggtgctgcccagcctgaag---ggcaagctgaacggcattgccctgcgcgtgcccacccccaccgtgtcggtcgtcgacctggtcgtccaggttgagaagaag---accttcgccgaggaggtgaacgccgccttccgcgaggccgccaacggc------------cccatgaagggcgtgctgcacgtcgaggacgcccccctggtgtccattgacttcaagtgcaccgaccagtcgacctccatcgacgcctccctgaccatggtcatgggcgacgac------------atggtcaaggtcgtggcctggtacgacaacgagtggggctactcccagcgcgtggtcgacctggctgaggtcaccgccaagaagtgggtggcg????????????????????????????????????????????????????????????????????????????????????
+otu_10	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgggcaagatcaagatcggaatcaac---------------------------------------ggtttcggaaggatcggcaggctcgtggccagggtcgccctgcagagc---------gag---gatgtcgagctcgtcgccgtcaacgaccccttcatcaccacggattacatgacgtacatgttcaagtacgacaccgtgcacggccaatggaag------------------------cacagcgacatcgccctcaaggactccaagacgcttctcttcggc---------------gagaagccggtcaccgtctttggcatc---aggaaccccgaggagatcccgtggggtgaggctggtgctgagtatgtcgtggagtccaccggtgtcttc---------------------------------actgacaaggacaaggctgctgcacatctgaagggtggtgccaagaaggttgttatctctgccccaagc---aaa---gatgcgcccatgtttgttgttggtgtcaatgaggacaagtacacctcc---gatgttaacattgtttcc---------------------------aatgctagctgcaccacaaactgccttgctccccttgctaaggtcattcat---gacaactttggcattattgagggcttgatgacaactgttcatgccatcactgccacccagaagactgttgatggaccctcagccaaggactggagaggtggcagggctgccagctttaacatcattcccagcagcaccggtgctgccaaggctgtcggtaaggttcttcctgaattgaat---ggcaagctcactggcatgtccttccgggttcccaccgtggatgtgtcagttgttgacctcactgtcagaatcgagaagggg---gcctcttacgaggaaatcaagaaagctattaaggctgcttctgagggc------------ccactcaagggtattatgggctacgtggaggaggatctggtttccaccgacttcaccggtgacagcaggtcgagcatcttcgacgccaaggccgggattgccctgaac------------gaccacttcatcaagctcgtctcttggtacgacaacgagtggggctacagcaaccgcgtcgtcgacctgatccgccacatgttcaagacccag??????????????????????????????????????????????????????????????????????????????????????????
+otu_11	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttgtcaggattgcaatcaac---------------------------------------ggtttcggcagaattggacgtcttgtcttccgcgcagttcgcgatctctatccaaaa------gaatgccaaattgtcgctgttcacgat---ctttgcgatatcgccactaacgttcatttattgaactacgattccgctcatcaaaggttccca------------------------gaa------caacttacagttacagctgaagataccttcgaagttggcacaggctccgacaagtgggttgtcaaaaatcttacaggcagattaggtccatcccaactcccatggaaggaattagatgtcgatgtcgttttagaatccactggactcttccgcacccactgtgttaagggagaagacggaagcgtcacaaaagatggatatgacggacatttattagccggtgccaagaaagtcgttttatcagtaccttcagccgacgaaatcgaatgcacacttgttttaggggtcaacgacgaagacctcaagccagatacaaac---tgcatttca---------------------------aacgcctcatgcacaacaaattgcctcgcaccagtcatcaaagtgttgaac---gacacattcggcatcagaaacggatacatgacaactgttcacgcctacacaaacgatcaagttgtttcagat---atcatgcataaggatctccgccgtgcccgtgcagctgcaatgaacatcattccaacatcaacaggagctgccatcgccctcacacgtgttgttaagaacctcacacgcggatgcatggatggcctcgctttacgcgttccagccatcacaggcagcctcgtcgacatcaccgtcaacacacgtgaaaag---gtaaccaaggatcaagttaatcaagccctcaaggctgcttcagaatccgaa---------gccctccgtggaatccttggctacactgacgagccaatcgtcagctcagatataattggtgacagacattcatccatcgttgactcactctctacaatggttctcgacaacgaaaagggtggaagcctcgtaaaggtcctctcatggtacgataacgaatggatgtactcatgccgctgcgctgatattttccaccatcttgaacagtacacaaag???????????????????????????????????????????????????????????????????????????????????????
+otu_12	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctcccatcaaggttggcatcaac---------------------------------------ggtttcggccgtatcggtcgtatcgtcttccgcaacgctgtcgagcac---------ccc---gacgtcgagatcgttgccgtcaacgaccccttcattgagaccaagtacgctgcctacatgctcaagtacgactccacccacggcattttcaac------------------------ggc------gagattgcgcaggatggcaacgaccttgtcatcaac---------------ggcaagaaggtcaagttctacactgag---cgcgaccccgctgtcatcccctggaaggagaccggcgccgactacgtcgtcgagtccactggtgtcttc---------------------------------accaccatcgacaaggccaaggcacatcttcagggcggtgccaagaaggtcatcatctctgctccctcc---gcc---gacgcccccatgtacgtgatgggtgtcaacgagaagtcctacgacggc---agc---gcagtcatctcc---------------------------caagcttcttgcaccaccaactgcctggctcccctcgccaaggtcatcaac---gacaagtacaccatcattgagggtctcatgaccaccgtccactcctacaccgctacccagaagaccgttgacggtccctccgccaaggactggcgtggtggccgcactgctgctcagaacatcattcccagcagcactggcgcgcccaaggctgtcggcaaggtcattcctgagctcaac---ggcaagctcaccggcatgtccatgcgtgtccctaccgccaacgtctccgtcgttgacctgactgtccgcatcgagaagggc---gccacgtacgacgagatcaagcaggccatcaaggaggcggctgagggt------------cccctcaagggcgtcttggcctacaccgaggacgacttcgtctccaccgacatgatcggcaaccccaactcctccatcttcgacgccaaggccggtatctccctgaac------------aacaacttcgttaagctggtctcctggtacgacaacgagtggggttactcccgccgtgtcctcgacctcttggcccacgtcgccaaggtcgatgcctccaag?????????????????????????????????????????????????????????????????????????????????
+otu_13	?????????????????????????????????????????????atgtcgaagcgcgacatcgtcctcaccaatgtcaccgttgtccagttgctgcgacagccgtgcccggtgaccagagcaccgcccccacctgagcctaaggctgaagtagagccccagccacaaccagagcccacaccagtcagggaggaaataaagccaccaccgccaccactgcctcctcaccccgctactcctcctcctaagatggtgtctgtggcccgggagctgactgtgggcatcaat---------------------------------------ggatttggacgcatcggtcgcctggtcctgcgcgcctgcatggagaag---------ggt------gttaaggtggtggctgtgaatgatccattcattgacccggaatacatggtgtacatgtttaagtatgactccacccacggccgatacaag------------------------gga------agtgtggaattcaggaatggacaactggtcgtggac---------------aaccatgagatctctgtctaccagtgc---aaagagcccaaacagatcccctggagggctgtcgggagcccctacgtggtggagtccacaggcgtgtac---------------------------------ctctccatacaggcagcttcggaccacatctctgcaggtgctcaacgtgtggtcatctccgcgccctca---ccg---gatgcaccaatgttcgtcatgggtgtcaatgaaaatgactataaccctggctccatgaacattgtgagtgtccgtgcacaccttggctgtttcagcaacgcgtcctgcaccaccaactgtttggctcccctcgccaaagtcatccac---gagcgatttgggatcgtggaagggttgatgaccacagtccattcctacacggccacccagaagacagtggacgggccatcaaggaaggcctggcgagatgggcggggtgcccaccagaacatcatcccagcctccactggggctgcgaaagctgtgaccaaagtcatcccagagctcaaa---gggaagctgacagggatggcgttccgggtaccaaccccggatgtgtctgtcgtggacctgacctgccgcctcgcccagcct---gccccctactcagccatcaaggaggctgtaaaagcagcagccaagggg------------cccatggctggcatccttgcctacaccgaggatgaggtcgtctctacggacttcctcggtgatacccactcgtccatcttcgatgctaaggccggcattgcgctcaat------------gacaatttcgtgaagctcatttcatggtacgacaacgaatatggctacagtcaccgggtggtcgacctcctccgctacatgttcagccgagacaag???????????????????????????????????????????????????????????????????????????????????????
+otu_14	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcgtcaaggtcggcatcaac---------------------------------------ggtttcggccgtatcggtcgcattgtcttccgcaatgccattgagcac---------gat---gacatccacatcgtcgctgtcaacgaccccttcattgagcccaagtacgctgcttacatgctccgctacgacaccacccacggcaacttcaag------------------------ggc------accatcgaggttgacggtgctgacctcgtcgtcaac---------------ggcaagaaggtcaagttctacactgag---cgcgaccccgctgccatcccctggtccgagaccggtgccgactacattgtcgagtccactggtgtcttc---------------------------------accaccaccgagaaggcctccgcccacttgaagggtggtgccaagaaggtcatcatctctgccccctct---gct---gatgcccccatgtacgttatgggtgtcaacaacgagacctacgatggc---tccgccgacgtcatctcc---------------------------aacgcctcttgcaccaccaactgcttggctcccctcgccaaggtcatccac---gacaacttcaccatcgtcgagggtctcatgaccaccgtccactcctacaccgccacccagaagaccgtcgatggtccttccgccaaggactggcgcggtggccgcactgctgctcagaacatcattcccagcagcactggttccgccaaggccgtcggcaaggtcatccccgacctcaac---ggcaagctcactggtatggccatgcgtgtccccaccgccaacgtctccgttgtcgatcttactgcccgcatcgagaagggt---gctacctacgatgagatcaaggaggtcatcaagaaggcctctgagggt------------cccctcgctggcatccttgcctacaccgaggatgaggttgtctcttccgacatgaacggcaaccccgcctcctccatcttcgatgccaaggctggtatctccctcaac------------aagaacttcgtcaagcttgtctcctggtacgacaacgagtggggctactctcgccgtgtcctcgacctcatctcctacatctccaaggtcgatgccaagaaggct??????????????????????????????????????????????????????????????????????????????
+otu_15	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgaccgcaaacgttggaattaat---------------------------------------ggatttggaagaattggtagactggtgttgagaattgccttgagcaga---------gac---gacatcaacgtcattgctatcaatgatccattcattgctcctgattacgccgcttacatgttcaagtacgactctacccacggaaagttcaag------------------------gga------actgtcacccacgagggcaagtacttggtcattaac---------------ggcaagaagattgaggtcttccaagag---agagacccagcaaacatcccatggggtaaggagggcgtcgactacgttcttgactccactggagttttc---------------------------------accaccatcgagggtgctcaaaagcacattgatgctggtgccaagaaggtcatcatcactgctccatct---aag---gacgctccaatgttcgtcgtcggtgtgaaccacgaggagtacactcca---gacatcaagatcctgtct---------------------------aacgcttcttgtaccaccaactgtctggctccactggccaaggttatcaac---gacatcttcggaatcgaggaaggtttgatgaccaccgtccactccatcaccgctactcaaaagactgtcgacggtccatcccacaaggactggagaggtggtagaactgcttctggtaacatcatcccatcctccaccggtgctgccaaggctgtcggaaaggtccttccagcattggcc---ggtaagctcactggtatgtccatgagagtcccaaccactgatgtttctgttgttgacttgaccgtcaaccttaagaagcca---accacctacgaggacatttgtgccaccatgaagaaggctgctgagggc------------ccattggctggaattcttggatacaccgacgaggctgttgtttcgtctgacttcttgaccgacagcagatcctctgtctttgacgccaaggccggtatcttgttgacc------------ccaaccttcgtcaagctcgtttcctggtacgacaatgagtacggttactctaccagagttgttgacttgcttcagcacgttgctaaggtttccgcc???????????????????????????????????????????????????????????????????????????????????????
+otu_16	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctatcactgtcggtattaac---------------------------------------ggtttcggacgtattggacgtctcgttctgagagtcgctctttccaga---------gct---gacatcaaggttgttgctatcaacgacccattcattgctccagaatacgctgcttacatgttcaagtacgactctacccacaaggcttacaag------------------------ggt------gaggtttctgccagcggcaacaagatcaacattgac---------------ggtaaagaaatcaccgttttccaagag---agagaccctgtcaacatcccatggggtaaggctggtgtcgactacgtcattgagtccaccggtgttttc---------------------------------accactttggagggtgcccaaaagcacatcgacgccggtgccaagaaggtggtcatcactgctccatcc---aag---gatgctccaatgttcgttgtcggtgtcaacgaggagaaatacacttct---gacttgaacattgtctcc---------------------------aatgcttcttgtactaccaactgtttggctccattggccaaggttgtcaac---gacactttcggaattgagtccggtttgatgaccaccgtccactccatgaccgccactcaaaagaccgttgacggtccatcccacaaggactggagaggtggtagaacggcttctggtaacatcattccatcttccactggtgctgctaaggccgtcggtaaggttattccagaattgaac---ggtaagctgaccggtttggctttccgtgtcccaaccgtcgatgtctccgttgttgacttgaccgtcaacttgaagaaggag---actacctacgaggagatcaagtctgttatcaaggctgcttccgagggt------------aagctcaagggtgttttgggttacactgaagatgccgttgtctcttctgacttcttgggtgacgagagatcctccatcttcgacgcttctgccggtattcaattgact------------ccatctttcgtcaagctgatctcttggtacgacaacgagtacggttactccaccagagtcgtcgacttgttgcaacacgttgctaaggct?????????????????????????????????????????????????????????????????????????????????????????????
+otu_17	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctattaaaattggtattaac---------------------------------------ggtttcggtagaatcggtagattagtcttaagagttgctttgggcaga---------aaa---gacattgaagttgtcgccgtcaacgatccattcattgctccagactatgctgcttacatgttcaaatacgattctactcacggtagatacaag------------------------ggt------gaagtcactgcttctggtgacgacttggtcattgat---------------ggtcacaagattaaagttttccaagaa---agagacccagctaacattccatggggtaaatctggtgttgactacgttattgaatccaccggtgttttc---------------------------------accaaagtcgaaggtgctcaaaagcacattgatgctggtgccaaaaaagttatcatcactgctccatct---gct---gatgccccaatgtttgttgtcggtgttaacgaagacaaatacactcca---gacttgaagattatctcc---------------------------aatgcttcttgtaccaccaactgtttggctccattagctaaagtcgtcaac---gatactttcggtattgaagaaggtttgatgaccactgtccactccatcactgctacccaaaagaccgttgacggtccatcccacaaggactggagaggtggtagaactgcttctggtaacattatcccatcttccactggtgctgctaaagccgttggtaaggttattccagaattgaac---ggtaaattgactggtatgtctttgagactcccaaccaccgatgtttccgttgttgacttgactgtcagattgaagaaagct---gcttcttacgaagaaattgctccagctatcaagaaagcttctgaaggt------------ccattgaaaggtgttttgggctacactgaagatgctgttgtctccaccgatttcttgggttcaagctactcatctatctttgatgaaaaagccggtatcttgttgtcc------------ccaactttcgtcaaattgatttcctggtacgataacgaatacggttactccaccaaagttgttgacttgttggaacacgttgcc???????????????????????????????????????????????????????????????????????????????????????????????????
+otu_18	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctccgatcaaggttggcatcaac---------------------------------------ggctttggccgcatcggccgcatggtgttccagtccatgtgcgaggacaacgtgctcggcaccgagctggacgttgtcgccgtggtggacatgagcacggacgccgagtacttcgcgtaccagatgaagttcgacacggtgcacggccgcccgaag---tacacggtggaggtggcgaagagctcgccgtcggcgaag---aagccggacgtgctggtggtgaac---------------ggccaccgcatcctgtgcgtgaaggcggaccgcaacccggcggacctgccgtggggcaagctcggcgtggactacgtgatcgagtccaccggcctgttc---------------------------------acggacaaggcgaaggcggagggccacgtgaagggcggcgcgaagaaggtggtgatcagcgcgccggcgtctggc---ggcgccaagacgatcgtgatgggcgtgaaccagcacgagtacaacccggccacgcaccacgtcgtgtcg---------------------------aacgcgtcctgcacgaccaactgcctggcgccgatcgtgcacgtgctgaccaaggagaacttcggcatcgagaccggcctgatgaccaccatccactcctacacggcgacgcagaagacggtggacggcgtgtcgatcaaggactggcgcggcggccgcgcggctgcggtgaacatcatccccagcacgaccggcgctgccaaggccgtgggcatggtgatcccgtccaccaag---ggcaagctgaccggcatgtcgttccgcgtgccgacgccggatgtgtcggtggtcgacctgaccttccgcgccacccgcgac---acgtccatccaggagatcgacgcggccctcaagaaggcgtcgcagacc------------tacatgaagggcatcctcggcttcacggacgaggagctggtgagcagcgacttcatcaacgacgcgcgcagctccatctacgactccaaggcgacgctgcagaacaacctgcccggcgagaagcgcttcttcaaggtggtgtcgtggtacgacaacgagtggggctactcgcaccgcgtggtggaccttgtgcgcttcatgggcgccaaggac---cgctccagctccaagctg?????????????????????????????????????????????????????????????????????
+otu_19	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctcccgtcaaggttggcatcaac---------------------------------------ggtttcggccgcattggccgcatggtcttccaggccatgtgcgagcagggcgtgctcggcaaggacttcgacgttgtcgcggtcgtggatatgtcgacggacgctgagtactttgcgtaccagatgaagtacgactctgtgcacggcaagcccaag---tacacggtggaggtcgcgaagagctctccgtctgtgaag---aagccggatgtgcttgtggtgaac---------------ggccaccgcatccagtgcgtgaaggcgcagcgcaaccccgctgacctgccctggagcaagctcggcgtggagtacgtgatcgagtcgactggtctgttc---------------------------------acgaacaaggccaaggctgagggccacctcaagggcggcgcgaagaaggtgatcatcagcgcgcctgcctctggc---ggcgccaagacgatcgtgatgggcgtgaacaaccaggagtacaaccccagcagccacagcgttgtgtcg---------------------------aacgcgtcctgcacgaccaactgccttgccccgctggtgcacgtgctgctgaaggagggcttcggcgtggagactggcctgatgaccaccatccactcgtacaccgccacgcagaagaccgtcgatggcgtctccctgaaggactggcgcggcggccgtgctgctgccatgaacatcatcccgtcgacgacgggtgctgccaaggccgttggtgaggtgctgcccgtgaccaag---ggcaagctgaccggcatgtccttccgtgtgccgacgccggatgtgtccgtggtggacctgaccttcaccgctgccaaggac---acgtcgatcaaggagattgacgaggccttcaagcgcgcgtccaagacg------------tacatgaagggcatcctcagctacaccgacgaggagcttgtgtcgtcggacttctacaacaacaacaacagctccatctacgactccaaggcgacgctgcagaacaacctgcccggcgagaagcgcttcttcaaggttgtgtcctggtacgacaacgagtggggctactctcaccgtgtcgtgaacctgctgtccttcatgatcaagaaggac---cgcgccggctccaagatc?????????????????????????????????????????????????????????????????????
+otu_20	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctccgctcaaggttggcatcaac---------------------------------------ggcttcggtcgcattggccgcatggtgttccagtcgatgtgcgaggacaacgtcctcggcaacgagatcgatgtcgtggcggtggtggacatgagcacggacgccgagtacttcgcgtaccagatgaagtacgacacggtgcacggccgcccgaag---tacacggtggaggtggcgaagagctctgccgcggtgaag---aagccggatgtgcttgtggtgaac---------------ggccaccgcatcctgtgcgtgaaggcgcagcgcaacccggcggacctgccgtggggcaagcttggcgtggactacgtgatcgagtcgaccggcctgttc---------------------------------acggacaaggtgaaggctgagggccacgtgaagggcggcgcgaagaaggtggtgatcagcgcgccggcgtccggc---ggtgcgaagacgatcgtgatgggcgtgaaccagaacgagtacgactccgccaagcaccacgtcgtgtcg---------------------------aacgcgtcctgcacgaccaactgcctcgccccgatcgtgcacgtgctgacgaaggagggcttcggcgtcgagaccggcctgatgacgaccatccactcctacactgccacgcagaagacggtggacggcgtgtcgatcaaggactggcgcggtggccgcgctgctgcggtgaacatcatcccgagcacgaccggtgcggccaaggccgtgggcatggtgatcccgtcgacgaag---ggcaagctgaccggcatgtccttccgcgtgccgacgccggatgtgtccgtggtggacctgacgttccgcgcgacccgcgac---acgtcgatccaggagatcgacgcggcgctgaagaaggcgtccaagacg------------tacatgaagggcatcctcggcttcacggacgaggagctggtgagcgccgacttcatcaacgacgcccgcagctcgatctacgactccaaggcgacgctgcagaacaacctgcccggcgagaagcgcttcttcaaggtcgtgtcgtggtacgacaacgagtggggctactcccaccgcgtggtggacctggtgcgcttcatgggcgccaaggac---cgcgcgagctcgaagatg?????????????????????????????????????????????????????????????????????
+otu_21	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctcccatcaaggttggaattaac---------------------------------------ggcttcggtcgtattggccgcaatgtcttccaggcgatctgcgagggaaaccatctcggcactgacatcgacgtggttgcggttgcggatatgagcacagacgccgagtacttctcctaccagatcgtggacgactccgtgcacggcaagccgcgctcctatggcgtcgaggtggcgaagagcaacccgtctgtggag---aagccggatgtgctcgtggtgaac---------------ggccaccgcatccagtgtgtgaaggcagcccgcaaccccgctgatctgccgtggggtaagcttgatatcgagtatgtgatcgagtccaccgggctcttc---------------------------------acgaagaagacccaggctgagggtcacatcaagggaggggccaagaaggtggtcatcagcgctcccgcatctggc---ggtgtcaagacgatcgtgatgggcgtgaacgagaaggagtatgacccgtcgtctcaccacatcgtgtcg---------------------------aacgcgtcgtgcacgaccaactgcctcgcgccccttgtgcacgttctgacgaaggagggcttcgggctggagactggtctgatgaccacgatccactcgtacacggcgacccagaagactgtggacggcgtgtcgatcaaggactggcgtggtggccgcgctgccgcgatcaacataatccccagcacgactggtgcggccaaggctgtgggcgaggtgatccccacgaccaag---ggcaagttgacgggcatggccttccgcgtcccgacacccgacgtgtcggtggtggacttgactttcacggccacccgcgac---acatcgatcaaggagatcgacgccgcgctgaagaaggcg---gtgacg------------tacataaagggcatcctcagctttaccgacgaggagttggtgagcacagactttatcaacgacaaccacagctccatctacgactccaaggcgacgctccaaaacaacctgcccaacgagaagcgcttcttcaagctcgtgtcctggtacgataacgaatggggctactcccaccgtgttgtggatctaattatctacatggctaagagggac---gctgcctccgccaagcta?????????????????????????????????????????????????????????????????????
+otu_22	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctaccaaggtcggcatcaac---------------------------------------ggtttcggtcgcattggccgcatcgtgctccgcaacgccctcgagcac---------ggc---gacattgacgtcgttgccatcaacgaccccttcattgacctcgactacatggtctacatgttcaagtacgactcgacccacggccgcttcaag------------------------ggc------aaggttgagaccaaggacggcaagctcgtcattgag---------------ggcaagcccatctcggtcttcggcgag---cgtgaccccgctgccatcccttggggcaaggccggcgccgactacgtcgtcgagtcgaccggtgtcttc---------------------------------accaccattgagaaggcctcgctccacctcaagggtggtgccaagaaggtcatcatctcggccccttcg---gct---gacgcccccatgttcgtttgcggtgtcaacctcgaggcttacaagccc---gagtacgaggtcatctcg---------------------------aacgcctcgtgcaccaccaactgccttgcgccccttgccaaggtcatccac---gacaacttcggcatcgtcgagggtctcatgaccaccgtccacgccaccaccgccacccagaagaccgttgacggcccttcgcacaaggactggcgtggcggccgtggcgctgccgccaacatcattccctcgtcgaccggtgccgccaaggccgtcggcaaggtcatcccctcgctcaac---ggcaagctcaccggcatgtcgttccgtatccccacctcggacgtttcggtcgttgaccttgtcgtccgtcttgagaagggc---gcctcgtacgaccagatcaaggaggtcatcaagaaggcctcggagggc------------gagctcaagggcatcctcggctacactgaggacgaggttgtctcgaccgacttcctcggctcgaccgagtcgtcggtctttgacgccaaggcgggtatccccctcaac------------gacaagttcgtcaagctcatctcgtggtacgacaacgagtacggctactcgcgccgcgtctgcgacctcatcgcctacgttgccaagaaggacaagtcggcg?????????????????????????????????????????????????????????????????????????????????
+otu_23	???????????????????????????????????????????????????atggccttctcttctctcctcagatctgccgcctcctacacggttgccgctcctcgccctgactttttctcgtcgccggcgtctgatcattctaaggtgttgtcaagtcttggatttagtcgcaacctgaagccatcaagattttcttctgggatatcttcatctctacaaaatggcaatgcaagaagtgtgcaacccatcaaggccacggctacagaagtgccatctgcagttcgaaggtcaagtagcagtggaaagacaaaggttgggatcaacggttttggtcggattggaaggttggtcctccgcattgcaacatcaagg---------gat---gatattgaggttgtagcagtgaatgacccattcattgatgccaagtacatggcttacatgttgaagtatgattctactcatggaaatttcaag------------------------gga---agcatcaatgtcattgatgattctactttggagatcaat---------------gggaagaaggtcaatgttgtcagcaag---agagatccatctgagatcccatgggctgatcttggagctgattatgttgttgagtcttccggtgtattc---------------------------------accaccctgtcaaaggctgcatcccatttgaagggcggtgccaagaaagttataatttctgccccttct---gct---gacgcacctatgtttgttgttggagtaaacgagcacacataccaacca---aacatggatatagtctcc---------------------------aatgcaagttgtaccaccaattgtcttgcccctcttgccaaggtggtgcat---gaggaatttggtattcttgaaggcttgatgacaactgtccacgcaactacagctactcagaaaactgttgatgggccatcaatgaaggactggagaggaggtcggggcgctagtcaaaacatcattcctagctcaaccggcgccgcgaaggctgtaggtaaagttcttccagaactgaat---gggaaacttacgggaatggccttccgtgtaccaacatcgaatgtttctgtggtggatttaacttgtcgacttgagaagggt---gcctcttacgaagatgttaaggcagccattaagcatgcctcagaagga------------cctcttaaaggcattctcgggtacacagatgaagatgtcgtctccaatgatttcgtcggtgattcaaggtccagtatctttgacgccaatgctggtattggattgagc------------aagtcctttgtgaaacttgtctcttggtacgacaacgaatggggttacagcaaccgagttcttgaccttatagagcacatggctttggtagctgccagccac?????????????????????????????????????????????????????????????????????????????????
+otu_24	???????????????????????????????????????????????????atggccttatcttctctcctcagatctgccgccact---tccgccgcagctcctcgtgtcgagctttatccatcgtcatcgtacaatcattctcaggtcacgtcaagtcttggattcagtcatagcctgacctcgtctagattttct---ggtgctgcagtttcaaccggaaaatacaatgcgaagagggttcaacccatcaaggccacagctacggaagcacctcctgctgttcat------------agt---------------------------tttggtcgaattggaagattggtcctccgcatcgcaacctttagg---------gat---gatatcgaggttgtagcagtcaacgatccattcatagatgccaagtacatggcttacatgttcaagtatgattctactcatggaaattacaaa------------------------gga---actatcaatgtaattgatgattcaactttggagatcaat---------------ggaaaacaagtcaaagttgtcagcaag---agagacccagctgaaatcccgtgggctgatcttggagctgagtatgttgttgagtcttcaggggtattc---------------------------------accaccgttggacaagcttcatcacatttgaagggtggtgctaagaaggtcatcatttctgcaccttca---gcg---gacgcgcccatgtttgttgttggagtaaatgaaaagacatacctgcct---aacatggatatagtctcc---------------------------aatgcaagttgtaccaccaattgtcttgcacctcttgccaaggtggtgcat---gaggaatttggtattcttgaaggcttgatgacaacagtccacgcgaccacagctactcagaaaactgtggacggcccatcaatgaaggactggagaggaggccgaggcgcaagtcaaaacatcattcctagctcaacaggcgctgcaaaggctgttggtaaggtccttccggaactcaat---ggaaaactaacaggaatggccttccgtgtcccaacaccaaatgtctctgttgtggatttaacttgtcgacttgagaaggat---gcatcgtatgaagacgtcaaggcagccataaagtttgcatcagaagga------------ccacttaggggcattcttggatatacagaagaagatgtcgtctctaatgattttcttggagattcaaggtcaagtatctttgatgctaatgctgggattggattaagc------------aagtccttcatgaaacttgtctcctggtatgacaacgaatggggttacagcaaccgagtccttgacctgatagaacacatggcgttagttgcagccagccgc?????????????????????????????????????????????????????????????????????????????????
+otu_25	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctgtcaaggttggaatcaac---------------------------------------ggtttcggacgaatcggacgaatcgtccttcgaaacgctatcatccac---------ggt---gatatcgatgtcgtcgccatcaacgaccctttcatcgatcttgagtacatggtctacatgttcaagtacgactccacccacggtgtcttcaag------------------------gga------tccgtcgagatcaaggacggcaagctcgtgatcgag---------------ggcaagcccatcgtcgtctacggtgag---cgagaccccgccaacatccagtggggagctgccggtgccgactacgtcgtcgagtccaccggtgtcttc---------------------------------accacccaggagaaggccgagctccacctcaagggaggagccaagaaggtcgtcatctctgccccttcg---gcc---gatgcccccatgttcgtctgcggtgttaacctcgacaagtacgacccc---aagtacaccgtcgtctcc---------------------------aacgcttcgtgcaccaccaactgcttggctcccctcgccaaggtcatccac---gacaactacaccattgtcgagggtctcatgaccaccgtccacgccaccaccgccacccagaagaccgtcgacggtccttccaacaaggactggcgaggaggtcgaggagctggtgccaacatcatcccctcctccaccggagccgccaaggccgtcggtaaggttatcccctccctcaac---ggaaagctcaccggaatggccttccgagtgcccacccccgatgtctccgtcgtcgatcttgtcgtccgaatcgagaaggcc---gcctcttacgaggagatcaaggagaccatcaagaaggcctcccagacccct---------gagctcaagggtatcctgaactacaccgacgaccaggtcgtctccaccgatttcaccggtgactctgcctcctccaccttcgacgcccaggccggtatctcccttaac------------ggaaacttcgtcaagcttgtctcctggtacgacaacgagtggggatactctgcccgagtctgcgaccttgtttcttacatcgccgcccaggacgccaaggcc?????????????????????????????????????????????????????????????????????????????????
+otu_26	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctatcacagttggtattaac---------------------------------------ggtttcggtcgtattggtcgtttagtcctaagaattgctctttcaaga---------aaa---gatattcaaattgttgcaattaatgatccattcattgcaccagaatatgcttcatatatgtttaaatatgattctactcatggtcgttattca------------------------ggt------gaagtttctcatgaaggtgaaaacattgttattgat---------------ggtaaaaaaatcagagtttatcaagaa---cgtgatccagttaatatcccatggggtaaagatggtgttgattatgttattgattcaactggtgttttt---------------------------------aaagaattagattctgctcaaaaacatattgatgccggtgctaaaaaagttgttattactgctccatca---tca---actgctccaatgtttgttgttggtgttaatgaagataaatatactcca---gatttaaacattatttca---------------------------aatgcttcatgtacaacaaattgtttagctccattagctaaaattattaac---aataaatttggtattgaagaaggtttaatgactactgttcattcaattactgctactcaaaaaactgttgatggtccatctcataaagattggagaagtggtcctactgcttcaggtaatattattccatcatcaactggtgctgctaaagctgttggtaaagttattccagaattggct---ggtaaattaactggtatgtctttaagagttccaactgttgatgtttcagttgttgatttaactgttaaattattaaaagat---gccacttatgatgaaattaaagctgctgttaaagaagctgctgaaggt------------ccattaaaaggtgttgttggttatactgaagatcaagttgtttcttcagatttcttaactgataacagatcatcaatttttgatgctgaagctggtatttggttatca------------ccaagatttgttaaattaattgcttggtatgataatgaatatggttactctaccagagttgttgatttattagaatacgttgcttcaaagaac??????????????????????????????????????????????????????????????????????????????????????????
+otu_27	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctgacaagaagattaggatcggaatcaac---------------------------------------ggattcggaagaattggtcgtttggttgctagagttgttctccagagg---------gac---gatgttgagctcgtcgctgtcaacgaccccttcatcactactgagtacatgacctacatgttcaagtacgacagtgttcacggtcaatggaaa------------------------cacaatgaactcaagatcaaggatgagaagacccttctcttcggt---------------gagaagccagtcactgttttcggcatc---aggaaccctgaggatatcccatgggccgaggctggagctgactacgttgttgagtctactggtgtcttc---------------------------------actgacaaagacaaggctgcagctcacttgaagggtggtgccaagaaggttgttatctctgcccccagc---aaa---gacgctccaatgtttgttgttggtgtcaacgagcacgaatacaagtcc---gaccttgacattgtctcc---------------------------aacgctagctgcaccactaactgccttgctccccttgccaaggttatcaat---gacagatttggaattgttgagggtcttatgactacagtccactcaatcactgctactcagaagactgttgatgggccttcaatgaaggactggagaggtggaagagctgcttcattcaacattattcccagcagcactggagctgccaaggctgtcggaaaggtgcttccagctcttaac---ggaaagttgactggaatgtctttccgtgtcccaaccgttgatgtctcagttgttgaccttactgtcagactcgagaaagct---gctacctacgatgaaatcaaaaaggctatcaaggaggaatccgaaggc------------aaactcaagggaatccttggatacaccgaggatgatgttgtctcaactgacttcgttggcgacaacaggtcgagcatttttgacgccaaggctggaattgcattgagc------------gacaagtttgtgaaattggtgtcatggtacgacaacgaatggggttacagttcccgtgtggtcgacttgatcgtccacatgtcaaaggcc?????????????????????????????????????????????????????????????????????????????????????????????
+otu_28	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgaagttctctgccgccacttttgctgcccttgtaggatctgccgctgcctactccagttcttcctttaccggatcggccctcaagagctcggcgtccaatgatgcctccatgtcgatggctaccggtatgggagtcaac---------------------------------------ggattcggacgtatcggacgtctcgtcacccgcatcatgatggaagac---------gac---gaatgcgatttggtcggaatcaacgccggttccgccactccggactacatggcctaccagtacaagtacgataccatccacggcaaggccaag------------------------cag------acggtcgaaatcgatggcgacttcctcgtcttggac---------------ggcaagaagatcatcacttcgcgctgc---cgtgaccccaaggaagtgggctggggcgcactcggagccgactacgtctgcgaatccaccggagtcttc---------------------------------ctcaccaaggaatccgcacagtccatcattgacggaggcgccaagaaggtcatctactcggcacccgccaaggac---gactcactcaccattgtcatgggagtcaaccaggaagcctacgatggt---tcggaagatttcatctcc---------------------------tgcgcttcttgcaccaccaacggacttgcccctatggttaaggccattcac---gacgaattcgtcattgaggaagccctcatgaccaccgtccacgccatgaccgccacccaggccgttgtcgactcctcatcccgcaaggactggcgcggaggacgtgcggcctcgggaaatatcatcccatcctccaccggagccgccaaagccgtcaccaaggtcattccttccctcgtt---ggaaagatcaccggcatggccttccgtgtccccaccattgacgtctccgtcgtcgacttgaccgcaaaactcgaaaagtcc---accacttacgaagaaatctgtgccgtcatcaaggccaagtccgagggt------------gaaatgaagggattcctcggatactccgacgaaccgttggtctccaccgactttgaaggtgacttgcgctcctccatctttgatgccgatgccggtatcatgctcaac------------cccaactttgtcaagctcatcgcctggtacgacaacgaatacggttactccggccgtgtcgtcgacctcatgaagcacgtcgcggccgtcgacgccaagatcaaggcc???????????????????????????????????????????????????????????????????????????
+otu_29	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcgaag------attggtatcaat---------------------------------------ggatttggtcgcatcggccgcttggttctccgcgccgccattgataag---------ggc------gccaacgttgtggccgtcaacgatcccttcatcgatgtgaactacatggtctacctgttcaagttcgattcgacccacggacgttttaag------------------------ggc------accgttgccgccgagggcggtttcctggtggtcaac---------------ggccagaagatcaccgtcttcagcgaa---cgcgacccggccaacatcaactgggccagcgctggtgccgaatacatcgtggagtccactggcgtgttc---------------------------------accaccatcgacaaggcatccactcacttgaagggcggtgccaagaaggttatcatctcggccccatcc---gcc---gatgctcccatgttcgtttgcggcgtcaacttggatgcctacaagccc---gacatgaaggtggtctcc---------------------------aacgcatcgtgcaccaccaactgcttggctcctctggccaaggtgatcaac---gacaacttcgagatcgtcgagggtctgatgaccaccgttcatgccaccaccgctacccagaagaccgtcgatggaccttccggcaagttgtggcgtgatggacgtggcgctgcccagaacatcattccagcttccactggagctgccaaggccgtgggcaaggttatccccgccctcaac---ggtaagctcaccggaatggcattccgtgtgcccactcccaacgtttccgtggtcgatttgaccgtgcgcttgggcaagggt---gcgtcctatgatgaaattaaggccaaggttcaggaggccgccaacgga------------cccctgaagggtatcctgggatacaccgatgaggaggtcgtttctaccgatttcctcagcgacacccactcgtcggtgttcgatgccaaggctggcatttcgctaaac------------gacaagttcgtgaagctgatctcttggtacgacaacgagtttggctactccaaccgcgtcatcgacctgatcaagtacatgcagagcaaggat??????????????????????????????????????????????????????????????????????????????????????????
+otu_30	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtccggtattggcatcaat---------------------------------------ggctttggccgaatcggtcgcatgtttgcccgccaggctctagttcgc---------aaa---gatgtcaagatcgtggcaatcaacgatccctcactggatcccaagtatctggcctacatgctgcgatacgactccactcatggacagttcaat------------------------cag------aagatctctgtcgacgggaacaatcttgttgttaat---------------ggcaagaagatccagctgcttaaggag---tcggacgtcaagaagattaaatggtgcgacctgggcgtgcatacggtggtggagtgctccggtaggttt---------------------------------accaccctgaaagcctgtcaaggtcacttggatagtggggccaaaaaggtggtcatatcggcaccatct---gcc---gatgctccgatgtttgtgtgcggagtgaatcttgaggcatacaagccg---ggcacagcaatcatctcg---------------------------aatgcctcatgcacaaccaattgcctggcgccgctggccaaggtggtgcac---gataactttgaaatatgtgagggccttatgaccaccgttcatgcggctacagctacccaaaagatcatcgacggacccagcagcaaactctggcgggatggacgcagtggcatgaccaacatcatacccgcatccacgggagccgctaaagccgtcggcaaggtgattccggatctaaac---gggaagctaacgggtatggctttccgtgtaccagttcccaatgtctcggtggtggacctcacctgtaggctttctaagccc---gccaaaatggacgatatcaagaagtgcataaaggctgcatccaaatgc------------gaaatgaagggaatcctggggtatgtggaggaggaggtggtgtccaccgattttaacggctcacgatttgcatccgtctttgatgccaaggcctgtattgccctaaac------------gataacttcgtcaagttgatcagttggtacgataacgagaccggatattcctgccgtctccttgacctagttctctatgcccagttggtcgaccaatgcgatgcgaaggaaaagggtgcttgc????????????????????????????????????????????????????????????
+otu_31	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcaatcactgtcggcatcaac---------------------------------------ggcttcggccctgtcgggaagtccgccttattcgcggccctagccgatccg------------ctgttcaccgtcacggcagtcgttgacgtctccgtgtgtgcggcctacattgcgtatgtgattgagcaagagtatccgcatcgcaacccaacaggg------------------cctccgattcgggtgacggacaagcagaaggatcagattgtactgaac---------------aacacccacgtcattcacgtgtcggccgcgcaagatccgcagtcatccatgtggaagaagtacggtgcgcagtacgtgctagagtgcacaggcctctac---------------------------------accacacgtagccgcagctggggtcatgtgacaggtggtgcggtgggcgtcttcatcgccgccgccagcgctgatacgaacacagtcatggcatcaagcggcttggaaagactcgcggcatcgttgcctgtgtgcgccgcaggagcgccc------------------------atcggggccgtcgtagctccagtgctggacgcgctggcgaaggtgttggagatcgagcaggtgagttacacggcc---ctctatgggcctcagccacagcacccgatcggcgctaagtcggacgactcgcgcgactggcggcaagtacgactacagccatttgccagctgtgcgatggcgtccagtcgcgacaacggcgctgaaacagttggcgcgctcctgccgcatcttgtt---ggtcgcgtgagtgctagtgctttccaggttcctgtggcgcaagggtgtgcgatcgacctcgttgtctacacgaaggaggcc---gcgtcggcggatgtggtggcgagcgccttcgcgcccgccgcggcggactcg---------gagccgcttgtaaaggtctgcatcgccaacggacccatgatcagcgttgactgcattggcagctca---agtgtcatactcgacgccacttcgttgagcagcagcaccgaaggc---------aaggtgcatcgcatggtgctgtgggtggacgtagcgtgctactacgctgccctgttgttgtcattagcgaagcaggtgcacagcattcacgcaccgccgtcgtcg???????????????????????????????????????????????????????????????????????????
+otu_32	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggcagcccgc---gtcggcatcaac---------------------------------------ggcttcggccgcatcggtcgcctcgtcctccgcgtcctcctctcccgc---------ggcatgcccgccaacgtcgtcctgatcaacgacccgttcatcacgactgacgccatggcgtacatcttcaagtacgacaccgtccacggccgcttcccc------------------------ggc------accgtcgtcggcaaggagggcaagctcgagatcaccctcaacggc---cacgtctacgacatcgccgtctcggcctgc---aagaaccccgccgagatcccgtggggcgcgagcaacgtcgaggtcgtcatcgagtcctcgggcgccttc---------------------------------accaccatggagaaggcccacctccaccagcagggtggtgcgaagaaggtcctgatcaccgccccgtcg---gcc---gacgcccccatgttcgtctacggtgtcaaccaccagtcgctcaaggcc---gaccagacggtcttctcg---------------------------aacgcctcgtgcacgaccaactgcctggcgcccctcaccaaggtcatcaac---gacaagttcggcatcgtccgcggcctgatgaccaccgtccactcgacgacggccacccagaagaccgtcgacggcccctcgggcaaggcgtggcgcgacggccgtgccgccgccggcaacatcatcccctcgagcactggcgccgccaaggccgtcactgccgtcatccccgagctcaag---ggccgcctgactggcatgtcgttccgcgtccccacgctcaacgtctcggtcgtcgacctgactgccgagctcaagaccccc---gccacgtacgaggagatctgcaacgccgtccgtgaggccgccaacggc------------gagctcaagggcgtcctcggctacaccgacgagcaggtcgtctcgtcggacttcgtcggcgagcccatgtcgtcgatcttcgacgccaaggcgggaatcgccctggac------------aagaccttcgtcaagctggtctcgtggtacgacaacgagtggggctactcgaaccgcgtcgtcgacctgctcctccactcgctctccctccac??????????????????????????????????????????????????????????????????????????????????????????
+otu_33	??????????????????atggcttcggctactttctctgtggccaaaccatctcttcagggt------------------------------------------------ttttctgagttctcaggacttcgaaactcctctgct------cttccctttgccaagagatcttcttccgatgagtttgtttccttcgtcagtttccaaacttctgcaatgagaagcaatggtggatacaggaaaggg------gtgaccgaggccaagataaaggtagccatcaat---------------------------------------gggttcggtaggattggtaggaacttcttgaggtgttggcatggtcgtaaggactct------cctcttgatgtcgttgtcattaacgac---actggtggtgttaaacaagcatcacatctcctcaaatacgactcaactcttggaatctttgac------------------------gct------gatgtcaaaccttcaggagactcagctctctctgtt------------gatggaaagatcatcaagattgtatctgatcgtaac---ccatctaatctcccctggggggaactaggcattgacttagttatcgaaggaaccggagtgttt---------------------------------gttgacagagacggtgctgggaagcaccttcaggctggagccaagaaggttcttatcactgcacctggaaaaggt---gacatcccaacttatgtcgttggtgtcaatgctgaactttacagccatgaagatacg---atcatcagc---------------------------aatgcgtcttgtactactaactgtctcgctccattcgtcaaggttcttgac---cagaaatttgggatcataaagggtacaatgacaactactcactcatacactggtgaccaaaggttgttagat---gcgagccaccgtgatctaaggagagcaagagcagcagctttgaacattgttccaacatctacaggagcagccaaagctgtggctcttgtgcttcctaacctcaaa---ggaaaacttaacggaattgcattgcgtgtgccaactccaaacgtttcagtggttgacttagtcgtgcaagtctccaagaaa---acttttgctgaagaagtcaatgctgctttcagagatgcagctgagaaa------------gagcttaaaggtatccttgacgtctgtgatgagcctcttgtctctgttgacttcaggtgctctgatgtatcctccactattgattcttccctcacaatggttatgggagatgat------------atggttaaagtgattgcttggtatgacaatgaatggggttactcacagagagtcgttgatttggctgacattgttgccaataactggaag???????????????????????????????????????????????????????????????????????????????????????
+otu_34	?????????????????????????????????????????????atgtcgaagcgcgacatcgtcctcaccaatgtcaccgttgtccagttgctgcgacagccgtgcccggtgaccagagcaccgcccccacctgagcctaaggctgaagtagagccccagccacaaccagagcccacaccagtcagggaggaaataaagccaccaccgccaccactgcctcctcaccccgctactcctcctcctaagatggtgtctgtggcccgggagctgactgtgggcatcaat---------------------------------------ggatttggacgcatcggtcgcctggtcctgcgcgcctgcatggagaag---------ggt------gttaaggtggtggctgtgaatgatccattcattgacccggaatacatggtgtacatgtttaagtatgactccacccacggccgatacaag------------------------gga------agtgtggaattcaggaatggacaactggtcgtggac---------------aaccatgagatctctgtctaccagtgc---aaagagcccaaacagatcccctggagggctgtcgggagcccctacgtggtggagtccacaggcgtgtac---------------------------------ctctccatacaggcagcttcggaccacatctctgcaggtgctcaacgtgtggtcatctccgcgccctca---ccg---gatgcaccaatgttcgtcatgggtgtcaatgaaaatgactataaccctggctccatgaacattgtgagc---------------------------aacgcgtcctgcaccaccaactgtttggctcccctcgccaaagtcatccac---gagcgatttgggatcgtggaagggttgatgaccacagtccattcctacacggccacccagaagacagtggacgggccatcaaggaaggcctggcgagatgggcggggtgcccaccagaacatcatcccagcctccactggggctgcgaaagctgtgaccaaagtcatcccagagctcaaa---gggaagctgacagggatggcgttccgggtaccaaccccggatgtgtctgtcgtggacctgacctgccgcctcgcccagcct---gccccctactcagccatcaaggaggctgtaaaagcagcagccaagggg------------cccatgcgtggcatccttgcctacaccgaggatgaggtcgtctctacggacttcctcggtgatacccactcgtccatcttcgatgctaaggccggcattgcgctcaat------------gacaatttcgtgaagctcatttcatggtacgacaacgaatatggctacagtcaccgggtggtcgacctcctccgctacatgttcagccgagacaag???????????????????????????????????????????????????????????????????????????????????????
+otu_35	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctgacaagaagatcagaatcggaatcaac---------------------------------------ggtttcggaagaatcggtcgtttggttgctagagttgttcttcagagg---------gat---gatgttgagctcgtcgctgttaacgatcctttcatcaccaccgagtacatgacatacatgtttaagtatgacagtgttcacggtcagtggaag------------------------caccatgagcttaaggtgaaggatgacaaaactcttctcttcggt---------------gagaagccagtcactgttttcggcatc---aggaaccctgaggacatcccatggggtgaggctggagctgactttgttgttgagtctactggtgtcttc---------------------------------actgacaaagacaaggctgctgctcacttgaagggtggtgctaaaaaggttgtcatctctgccccaagc---aaa---gatgcgcccatgttcgttgttggtgtcaacgagcacgagtacaagtct---gaccttgacattgtttcc---------------------------aacgctagttgcaccactaactgccttgctcctcttgccaaggttattaat---gacaggtttggcattgttgagggactcatgaccactgtccactctatcactgctactcagaagacagttgatggtccatcaatgaaggactggagaggtggaagagctgcttccttcaacattattcctagcagcactggtgccgccaaggctgttgggaaagtgttgccatccctcaat---ggaaaattgaccggaatgtctttccgtgttccaaccgttgatgtctcagttgttgatctcaccgttagacttgagaaagct---gcaacatacgacgaaatcaagaaggccatcaaggaggaatctgaaggc------------aaaatgaagggaattttgggatacactgaggatgatgttgtgtctaccgactttgttggtgacaacaggtcaagcattttcgatgccaaggctgggattgcattgagc------------gacaagtttgtgaagttggtgtcatggtacgacaacgaatggggttacagttctcgtgtcgttgaccttatcgttcacatgtcaaaggcc?????????????????????????????????????????????????????????????????????????????????????????????
+otu_36	atggccacacatgcagctctcgccgtctcaagaatcccggtcacacagcgactgcagtctaagagtgccattcactctttccctgctcaatgctcctccaagaggctagaagtcgctgaattctccggtctgcgtatgagtagtatcggt------------------ggggaagcatctttcttcgatgctgtagctgcacaaatcatccctaaggctgtgacaacatcaactcctgttagagga---------gagacagtggcgaaactgaaagttgcgattaac---------------------------------------ggttttggaaggattggtaggaactttcttaggtgttggcatggtcgtaaagactct------cctctcgaagttgttgtacttaacgac---agtggtggtgtcaagaatgcatcccacttgcttaagtatgactccatgcttggaaccttcaag------------------------gct------gaagtgaaaattgtggacaatgaaactattagtgtt------------gatggtaagctcatcaaagttgtctccaacagagac---cctcttaagcttccatgggctgagctcggcattgacattgttatcgagggaacaggagtgttt---------------------------------gttgatgggccaggagcagggaagcatatccaagccggagcctcgaaagttatcatcactgcaccagccaaaggtgctgatatccctacctatgttatgggagtcaatgagcaagactatggtcacgatgtcgctaacattattagc---------------------------aatgcatcttgcaccaccaactgtttggcaccttttgctaaagtcttggat---gaagaatttggaattgtcaaggggacaatgacaaccacacactcctacaccggagaccaaaggcttctagat---gcatcacacagggacctaaggcgtgcaagagccgcagcactgaacatagtgcctaccagcacaggagcagccaaggcggtgtcattagtgttgccgcagctgaag---ggtaaacttaacggcattgcactccgtgtgccaacaccaaacgtctcagtggttgaccttgttataaacgttgagaagaaaggtttgacagcagaggatgtgaacgaggcctttagaaaagccgctaatgga------------ccgatgaaaggcattttagacgtttgcgatgcgcctcttgtctctgttgacttcaggtgctctgatgtctctaccaccattgactcgtccctcactatggttatgggtgatgat------------atggtcaaggtggttgcttggtatgataacgagtggggttacagccaaagagtggtggatttggctcacctagtggctagcaagtggccgggagcggaagctgttggaagtggagatcctttggaggatttctgcaagacaaacccggctgatgaggaatgcaaagtctatgac???
+otu_37	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttaacgtctcagtcaac---------------------------------------ggtttcggtagaattggtagattagtcaccagaattgctatcagcaga---------aag---gacatcaacttggtcgctatcaacgacccattcatctccactgactacgctgcttacatgttcaagtacgactccacccacggtcgtttcgac------------------------ggt------gaagtctcccacgacaaagaccacattatcttgaac---------------ggtaagaaggtcgctgtcttcaacgaa---aaggacccagccgctttgccatggggtaagttgggtgttgatgtcgccatcgactccactggtatcttc---------------------------------aaggaaatggactctgctaacaagcacattgaagctggtgccaagaaggttgtcatcaccgctccatct---ggt---tccgccccaatgtacgtcatgggtgtcaacgaagaaacctacactcca---gaccaaaagatcgtctcc---------------------------aacgcttcctgtaccaccaactgtttggccccattggctaaggtcatccac---aacgaattcggtatcaaggaaggtttgatgaccactgtccactctatgaccgccacccaaaagactgttgacggtccatcccacaaggactggagaggtggtagaaccgcttctggtaacatcatcccatcttccactggtgctgccaaggctgtcggtaaggtcttgccatccttgcaa---ggtaagttgaccggtatggctttcagagttccaaccgtcgatgtctccgttgttgacttgactgttaacttggccaaggaa---acctcttacgacgaaatcaaggctgctttgaagaaggcttccgaaggt------------tccatgaagggtatcttgggttacactgaagacgacgttgtctcttctgacttcttgggtgatgctcactcttccatcgtcgatgctgctgccggtatccaattgacc------------ccaactttcgtcaagttggtctcctggtacgacaacgaattcggttactccaccagagttgtcgacttggttgaacacgtcgctaagtccgct??????????????????????????????????????????????????????????????????????????????????????????
+otu_38	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttaacgttggtatcaac---------------------------------------ggattcggtcgtatcggccgtatcgtcttccgcaatgccctcctcaac---------ccc---aaaatccaggtcgtagccatcaacgacccattcatcaaccttgaatacatggtctacatgttcaagtacgactccgttcacggtcgcttcaag------------------------gga------accgtcgaggcaaaagatggcaagctttggattcag---------------ggcaagcccgtcatcgtctacggcgag---aagaatccctctgacatcaaatggggcgccgccggccgtgactacgttgtcgaatccacgggtgttttc---------------------------------acaacagtcgaaaaggctgaaggacacttgaagggcggtgccaagaaagtcatcatctctgctccttcg---gcc---gacgcgccaatgttcgttatgggttgcaatctcgaccagtacgatccc---aagtacaccgtcatttcg---------------------------aacgcttcatgcacgaccaactgccttgcgcccctcaccaaggtcatccac---gacaagtatggcatcattgagggcttgatgagcaccattcatgccaccaccgccacccaaaagaccgtggacggtccctccaacaaggactggcgcggtggacgcgccgtcgttaacaacatcatcccttcgtccaccggtgccgctaaggctgttggaaaggtcatcccttcgctcaac---ggcaagctcactggcctctccttccgtgtgcccaccatcgacgtctccgtgatcgaccttgtcgtccgcctcgagaagccc---gcaagctacgaggatatcaagaaaactgtcaaggaggcttcagagggt------------gcctacaaaggcatcatcgagtacaccgaggaacaggtcgtctccgccgacttcatcggccaccacgcctcctcgatcttcgacgcacaagctggcatccagctcaac------------cccaacttcgtcaagctcatcgtttggtacgacaacgagtggggctactccgcccgcgtgtgcgacctcctcgtcttcgccgccgagcaggacgccaagcaacag??????????????????????????????????????????????????????????????????????????????
+otu_39	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggccgtcaaagtcggtatcaac---------------------------------------ggtttcggtcgtatcggccgtatcgtactcagaaatgctttgttgaac---------cct---gaagtcaacgttgttgcagtcaacgaccctttcattgctcttgagtacatggtctacatgttcaagtatgactccgtccacggacgtttcaaa------------------------ggc------actgtcgagactaagggcggcaagctcatcgtcgac---------------ggaaaggaaatctccgtcttcggtgag---aaggatgctggtgctattccttggagctctgtcggtgcagagtacatagtcgagtctaccggtgtcttc---------------------------------accacgattgaaaaggcctctgctcacttgaagggtggtgccaagaaagtcatcatctctgctccttcc---gct---gatgcacctatgtacgtttgcggtgtcaatctcgactcttatgactcg---caacatgctgttatctcc---------------------------aatgcttcttgcacgacgaactgcctcgcacctctcgccaaggttatccac---gacaaattcggtatcgttgaggctctgatgactaccgtccatgccaccaccgctacccagaagactgttgatggtccttcgaacaaagattggcgtggaggtcgttctgtcaacggaaacatcatcccttcttcaactggtgctgccaaggctgtcggaaaggttattccttcgctcaac---ggaaagttgaccggtcttgccttccgtgttcctaccctcgacgtttcggtagtcgaccttgtttgccgtaccgaaaagagt---gcaacctacgacgagatcaaagctgctgtcaaagaagcctccaagggt------------cctctcaagggtatcttaggctacactgaggaccatgttgtttccaccgacttcattggggacaaccactcttcgatcttcgatgctaccgccggaatccagctcaac------------aagaactttgtcaagttgattgcttggtacgacaacgagtggggttactctggaagagttgttgatctcttggtgtttgctgccaagaaggatggtgctctc?????????????????????????????????????????????????????????????????????????????????
+otu_40	atggccacacacgcagcgctcgcggcgtcccgcattccggccaccgcccggctgcacagcaaggcggcg------------------tccaag------cagagggtggacttcgccgacttctccggactgaggccgggatcgtgctcc---atcagccacgccgcgagggaggcgtccttctccgatgtccttggctcgcagctcgtcgccagggctaccgga---gagaacgccgtgagggcg---------ccggctgaggcgaagctcaaggttgccatcaac---------------------------------------ggcttcggccgcattggccgcaacttcctccggtgctggcacgaacgcgagaactcc------ccgctcgaggtcgtcgtcgtcaacgac---agcggaggcgtcaggaacgcatcacaccttctcaagtacgactcgatgctcggcaccttcaag------------------------gcc------gacgtcaagatcgtcgacgaccagaccatcagcgtc------------gacggcaagctgatcaaggtcgtctccaacagggac---cccctcaagctgccatgggctgagctcggcatcgacattgtcatcgagggtaccggagtgttc---------------------------------gtcgacggccccggcgccgggaagcacatccaggccggcgcgaagaaggtcatcatcactgctccggcgaagggtgctgacatccctacctacgtcctcggtgtcaacgagggagactactcccacgaagtggccaacattatcagc---------------------------aatgcttcctgcacaaccaactgcctcgctccgttcgtcaagatcttggac---gaagagttcggaatcgtaaagggaaccatgaccacaactcactcctacaccggcgaccagaggttgctggac---gcgtcgcaccgtgacctgaggagggcccgggcggcggcgctgaacatcgtgccgacgagcaccggcgccgcgaaggccgtggcgctggtgctcccgcagctgaag---gggaagctcaacggcatcgcgctgcgcgtgccgaccccgaacgtgtccgtggtggacctggtgatcaacaccgtgaagaccggcatcaccgccgacgacgtgaacgccgcgttccgcaaggccgcggcgggg------------ccactcagcggcatcctcgacgtctgcgacgtgccgctggtgtccgtcgacttccgctgctccgacgtctcctccaccatcgacgcctcgctcaccatggtcatgggcgacgac------------atggtcaaggtggtcgcctggtacgacaacgagtggggctacagccaacgcgtggtcgatctggcgcatctggtggcgagcaagtggcccggcgcggcggtgcagggcagcggcgacccactggaggacttctgcaaggacaacccggagaccgacgagtgcaaagtgtacgaaaac
+otu_41	??????????????????atggcttcggttactttctctgtccccaag---------ggt---------------------------------------------------ttcactgaattctcaggattgcgaagctcctctgcttct---cttcccttcggcaagaaactttcttccgatgagttcgtttccatcgtctccttccagacttctgcaatgggaagcagtggtggatacaggaaaggt------gtgactgaggccaagcttaaggtggccattaat---------------------------------------ggattcggtaggatcgggaggaacttcctgagatgttggcatggtcgcaaggactct------cctcttgatatcattgccattaatgac---actggtggcgtcaagcaggcttcgcatttacttaaatacgactctactctcggaatctttgat------------------------gct------gatgtcaaaccttctggagagactgcaatctctgtt------------gatggaaagatcatccaagttgtctctaaccgaaac---ccgtctcttctcccttggaaggagctaggaattgacattgtcatcgaaggaaccggagtgttt---------------------------------gtggatagagaaggtgcagggaaacacattgaagctggtgccaagaaggttatcattactgctccaggcaaagga---gatattccaacttatgtcgttggtgtcaatgcagatgcttacagtcatgatgaacct---atcatcagc---------------------------aatgcatcttgcactaccaactgtcttgctccctttgtcaaagttcttgac---cagaaattcggtatcataaagggtacaatgacgactactcactcttacaccggtgaccagaggttgctagac---gcgagtcaccgtgatctaaggagagcaagagcagctgctttgaacattgttcctacttctacaggagcagctaaagctgtggctcttgtgctccctaacctcaaa---ggaaaactcaacgggatcgctctccgtgtaccaacaccaaacgtatcagtggttgatctcgttgtgcaggtctcaaagaag---acatttgctgaggaagtcaacgctgctttcagagattctgcagagaaa------------gagcttaaaggtatactcgatgtctgcgatgagccactagtgtccgttgatttcagatgctcagatttttcaacgaccattgattcatcactcactatggttatgggagatgat------------atggttaaggtgattgcttggtatgataatgaatggggttactcacagagagttgttgacttggctgacattgttgccaacaactggaag???????????????????????????????????????????????????????????????????????????????????????
+otu_42	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggcaattcctaaagttggtatcaac---------------------------------------ggtttcggtcgtattggccgtatcgtcctccgtaacgccatcctcact---------ggc---aagatccaagttgtcgctgtcaacgatcctttcatcgatcttgactacatggcatatatgttcaagtacgactctacccacggtcgctttgag------------------------ggc------tccgttgagaccaagggtggcaagcttgtcatcgac---------------ggtcactccatcgatgtccacaacgag---cgtgaccccgccaacatcaagtggtctgcctctggtgctgagtacgttattgagtccactggtgtcttc---------------------------------actaccaaggagactgcctctgctcacttgaagggtggtgccaagcgtgtcatcatctctgctccttcc---aag---gatgctcccatgttcgtcgtcggtgttaacttggagaagttcaacccc---tccgagaaggttatctcc---------------------------aacgcctcttgcaccaccaactgtttggctccccttgccaaggtcatcaac---gacaccttcggtattgaagagggtctcatgaccactgttcacgctaccactgccacccaaaagaccgttgacggtccctctaagaaggactggcgtggtggtcgtggcgccagcgccaacatcatcccctcctccactggtgccgccaaggccgtcggtaaggttattcccgctcttaac---ggtaagcttaccggtatggctttccgtgtccctacccccgatgtttccgttgttgacttgaccgtcaagttggccaagcct---accaactacgaggacatcaaggctgctatcaaggctgcctctgag------------ggtcccatgaagggtgtgttgggttacaccgaggactctgttgtctccaccgacttctgtggtgacaaccactcctccatcttcgatgcctctgccggtatccaactttct------------cctcaattcgtcaagctcgtctcttggtacgataacgaatggggttactcccaccgtgttgttgacttggttgcctacactgcttccaaggat??????????????????????????????????????????????????????????????????????????????????????????
+otu_43	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggcaattcctaaggttggtattaac---------------------------------------ggtttcggtcgtattggacgtattgtcctccgtaacgctttggtcgct---------aag---accatccaagtcgttgctatcaacgatccttttatcgatcttgaatacatggcctacatgttcaagtacgactctacccacggtcgcttcgat------------------------ggc------tccgtcgagatcaaggatggtaagctcgtcattgat---------------ggcaatgccatcgatgtccacaacgag---cgtgaccccgccgacatcaagtggtctacctctggtgctgactacgttatcgagtccaccggtgtcttc---------------------------------accactcaagagactgcctctgctcacttgaagggtggtgccaagcgtgtcatcatctctgctccttcc---aag---gacgcccccatgtacgttgtcggtgtcaacgaggagaagttcaacccc---tctgagaaggttatctcc---------------------------aacgcttcttgcaccaccaactgtttggctccccttgccaaggtcatcaac---gacaccttcggtatcgaggagggtctcatgaccactgtccacgctaccactgccacccaaaagaccgttgacggtccctccaagaaggactggcgtggtggtcgtggcgccagcgccaacatcatcccctcctccactggtgccgccaaggccgtcggtaaggttattcccgctcttaac---ggtaagcttaccggtatggctttccgtgtccctacccccgatgtttccgttgttgacttgaccgtcaagttggccaagcct---accaactacgaggacatcaaggccgccatcaaggctgcctctgag------------ggccccatgaagggtgtgttgggttacaccgaggacgctgttgtctccactgacttctgcggtgacaaccactcctccatcttcgatgcctctgccggtatccaactttct------------cctcaattcgtcaagctcgtctcttggtacgataacgaatggggttactcccgccgtgtcgttgacttggttgcctacactgccgccaaggacaac???????????????????????????????????????????????????????????????????????????????????????
+otu_44	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgactatcaaagtaggtatcaac---------------------------------------ggttttggccgtatcggtcgcattgttttccgtgctgctcagaaacgt---------tct---gacatcgagatcgttgcaatcaacgac---ctgttagacgctgattacatggcatacatgctgaaatatgactccactcacggccgtttcgac------------------------ggt------accgttgaagtgaaagacggtcatctgatcgttaac---------------ggtaaaaaaatccgtgttaccgctgaa---cgtgatccggctaacctgaaatgggacgaagttggtgttgacgttgtcgctgaagcaactggtctgttc---------------------------------ctgactgacgaaactgctcgtaaacacatcaccgctggtgcgaagaaagtggttatgactggtccgtctaaagac---aacactccgatgttcgttaaaggcgctaacttcgacaaatatgct------ggccaggacatcgtttcc---------------------------aacgcttcctgcaccaccaactgcctggctccgctggctaaagttatcaac---gataacttcggcatcatcgaaggtctgatgaccaccgttcacgctactaccgctactcagaaaaccgttgatggcccgtctcacaaagactggcgcggcggccgcggcgcttcccagaacatcatcccgtcctctaccggtgctgctaaagctgtaggtaaagtactgccagaactgaat---ggcaaactgactggtatggcgttccgcgttccgaccccgaacgtatctgtagttgacctgaccgttcgtctggaaaaagct---gcaacttacgagcagatcaaagctgccgttaaagctgctgctgaaggc------------gaaatgaaaggcgttctgggctacaccgaagatgacgtagtatctaccgatttcaacggcgaagtttgcacttccgtgttcgatgctaaagctggtatcgctctgaac------------gacaacttcgtgaaactggtatcctggtacgacaacgaaaccggttactccaacaaagttctggacctgatcgctcacatctccaaa????????????????????????????????????????????????????????????????????????????????????????????????
+otu_45	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtctcaggtcaacatcggtatcaac---------------------------------------ggcttcggtcgtatcggacgtatcgtcttccgtaactcggtcgtccac---------aac---acggccaacgtcgttgccatcaacgaccccttcattgacctagaatacatggtgtacatgctcaagtacgactccacccacggtgtcttcaac------------------------ggt------gacatctccaccaaggatggcaagctcattgttaac---------------ggcaagtcgatcgctgtcttcgccgag---aaggacccctccaacatcccctggggtcaggctggtgcccactacgttgtcgagtccaccggtgtcttc---------------------------------accaccattgacaaggcctcggcccacatcaagggtggtgccaagaaggtcgtcatctcggctccctcg---gct---gacgcccctatgtacgtctgtggtgtcaaccttgacgcctacgacccc---aaggcccaggtcgtctcg---------------------------aacgcctcatgcaccaccaactgccttgctcccctcgccaaggtcatccac---gacaagttcggtatcgttgagggtctcatgaccactgtccatgccaccaccgccacccagaagaccgtcgacggcccctcggccaaggactggcgtggaggccgtgccgccgctgccaacatcatcccctcgtccactggtgccgccaagcgcgtcggcaaggtcatcccttcgctcaac---ggcaagctcaccggtatggctttccgtgtgcccaccaccaacgtctcggttgttgacctgaccgcccgcctcgagaagggc---gccagctacgacgagatcaaggccgaggtcaagcgcgcttccgagaac------------gagctgaagggtatcctcggctacactgaggacgccgttgtgtctcaggacttcattggcaactcgcactcgtcgatcttcgatgctgctgccggtatctctctcaac------------aacaactttgtcaagcttgtctcgtggtacgacaacgagtggggttactcgaaccgatgcctcgacctcctcgtcttcatggctcagaaggacagcgcg????????????????????????????????????????????????????????????????????????????????????
+otu_46	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgccaaagccaaatgtcggaatcaac---------------------------------------ggattcggaagaatcggacgtcttgtcctccgcgccgctgtcgagaag---------gacagt---gtcaacgttgatgccgtcaacgatccattcatctccatcgactacatggtctacttgttccagtacgactccacccacggacgcttcaag------------------------gga------accgttgctcacgagggagactaccttcttgtcgccaaggaagggaagtcgcagcacaagatcaaggtctacaactca---agagacccagctgaaatccaatggggagcctctggagccgactatgtcgttgagtccaccggagtcttc---------------------------------accaccatcgagaaggccaacgctcacttgaagggaggagccaagaaggtcatcatctctgctccatct---gct---gatgctccaatgttcgtcgtcggagtcaaccacgagaagtacgatcatgccaacgaccacatcatctcc---------------------------aatgcttcctgcaccactaactgccttgctccacttgccaaggtcatcaat---gacaacttcggaattattgagggacttatgaccactgtccacgccgtcaccgccacccaaaagactgttgacggaccatcaggaaagctctggagagacggacgtggagctggacagaacatcatcccagcctctactggagccgctaaggctgttggaaaggttatcccagagctcaat---ggaaagctcaccggaatggctttccgtgtcccaaccccagatgtgtctgttgttgatctcactgctcgtcttgagaagcca---gcttccctcgatgacatcaagaaagttatcaaggctgccgctgacgga------------ccaatgaagggaattctcgcttacactgaggatcaagttgtctccaccgactttgtctccgataccaactcttccatcttcgatgccggagcatccatctcactcaac------------ccacacttcgtcaagctcgtctcgtggtacgataacgagttcggatactccaacagagttgttgatctcatctcgtacatcgccaccaaggcc??????????????????????????????????????????????????????????????????????????????????????????
+otu_47	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgaccaagccaagtgtcggaatcaac---------------------------------------ggattcggaagaatcggacgtcttgtcctccgcgccgctgtcgagaag---------gacagt---gtcaatgttgttgccgtcaacgatccattcatctccatcgactacatggtctacttgttccagtacgattccactcacggacgcttcaag------------------------gga------accgttgcccacgagggagactaccttcttgtcgccaaggaaggaaagtcccagcacaagatcaaggtctacaactca---agagacccagctgagatccaatggggagcctctggagccgactatgtcgttgagtccaccggagtcttc---------------------------------accaccatcgagaaggccaatgctcacttgaagggaggagccaagaaggtcatcatctctgctccatct---gct---gatgctccaatgttcgtcgtcggagtcaaccacgagaagtacgatcatgccaacgaccacatcatctcc---------------------------aatgcttcctgcaccactaactgccttgctccacttgccaaggtcatcaat---gacaacttcggaattattgagggacttatgaccactgtccacgccgtcaccgccacccaaaagactgttgacggaccatcaggaaagctctggagagacggacgtggagctggacaaaacatcatcccagcctctactggagccgctaaggctgtcggcaaggttatcccagagctcaat---ggaaagctcaccggaatggctttccgtgtcccaaccccagatgtctctgttgttgatctcactgctcgtcttgagaagcca---gcttccctcgatgacattaagaaggttatcaaggctgccgctgacgga------------ccaatgaagggaattctcgcttacaccgaggatcaagttgtctccactgactttgtctccgataccaactcttccatcttcgatgccggagcatccatctcactcaac------------ccacactttgtcaagctcgtctcatggtacgataacgagttcggatactccaacagagtcgtcgaccttatctcctacattgctaccaaggcc??????????????????????????????????????????????????????????????????????????????????????????
+otu_48	??????????????????atggcgtcgtccatgctctccgctaccaccgtgccactccagcaggggggc---------------------------------------ggcctgtccgagttctccgggctcaggagctccgcgtcg------ctgcccatgcgccggaatgccacctccgacgacttcatgtccgccgtctccttcaggacccacgcggtcggcacgagcggcgggccgcggcgggcgccg------acggaggcgaagctgaaggtggccatcaat---------------------------------------gggttcggccgcatcgggcgcaacttcctgcggtgctggcacgggcgcggcgacgcctcg---cccctcgacgtcatcgccatcaacgac---accggaggcgtcaagcaggcgtcccacctgctcaagtacgactccacgctcggcatcttcgat------------------------gcc------gacgtcaagcccgtcggcgacaacgccatctccgtc------------gacggcaaggtcatcaaggtcgtgtccgaccgcaac---cccagcaacctgccgtggggcgagctcggcatcgacctcgtcatcgagggcaccggcgtcttc---------------------------------gtcgaccgcgagggcgcggggaagcacatccaggcgggggccaagaaggtgctcatcacggcgcccggcaagggc---gacatccccacctacgtcgtcggcgttaatgccgaccagtacaaccccgacgagccc---atcatcagc---------------------------aacgcctcctgcaccaccaactgcctcgcgccattcgtcaaggtcctcgac---caaaagttcggcatcatcaagggcaccatgaccaccacccactcctacaccggcgaccagaggctgctggac---gcgagccaccgcgacctgcgccgcgcccgcgccgccgcgctcaacatcgtgcccacgtccaccggtgccgccaaggccgtctcgctggtgctccccaaccttaag---ggcaagctcaacgggatcgcgctccgggtgcccaccccgaacgtctccgtcgtcgacctcgtcgtgcaggtctccaagaag---accctcgccgaggaggtgaaccaggcgttccgcgacgccgcggccaac------------gagctcacgggcatcctcgaggtctgcgacgtgccgctcgtgtccgtcgacttcaggtgctccgacgtctcctccaccatcgacgcctccctcaccatggtcatgggcgacgac------------atggtcaaggtcatctcctggtacgacaacgagtggggctactcgcagagggtcgtcgacctcgctgacatctgcgccaaccagtggaag???????????????????????????????????????????????????????????????????????????????????????
+otu_49	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgggcaagattaagatcggaatcaac---------------------------------------ggcttcggaaggatcggcaggctcgtggccagggtcgccctccagagc---------gag---gatgtcgagctcgtcgccgtcaacgaccccttcatcaccacggactacatgacctacatgttcaagtacgacaccgtccacggccactggaag------------------------cacagcgacatcacactcaaggactccaagacccttctcttcggt---------------gacaagccggtcaccgtctttggcatc---aggaaccctgaggaaatcccgtggggtgaggctggcgctgagtacgtcgtggagtccaccggcgtcttc---------------------------------actgacaaggacaaggctgctgcacatctcaagggtggtgccaagaaggttgttatctctgccccaagc---aaa---gacgcacccatgtttgttgttggtgtcaatgaggacaagtacacctcg---gatgttaacattgtttcc---------------------------aatgctagctgcaccacaaactgccttgctccccttgctaaggtcattcat---gacaacttcggcattgttgagggtctgatgacaactgttcatgccatcactgccacccagaaaactgttgacggaccctcagccaaggactggagaggtggcagggctgccagctttaacatcattccgagcagcaccggtgctgccaaggctgttggtaaagttctccctgatctgaat---ggcaagctcactggtatgtccttccgcgttcctactgtggatgtctcggttgttgacctcaccgtcagaatcgagaagggg---gcctcctatgaggatatcaagaaagctattaaggctgcttccgagggt------------ccactcaagggtatcatgggttatgtggaggaggatctggtttctaccgacttccttggtgacagcaggtcgagcatcttcgacgccaaggccgggatcgctctgaac------------gaccacttcgtcaagcttgtctcgtggtacgacaacgagtggggctacagcaaccgcgtcgtcgacctgatccgccacatgttcaagacccag??????????????????????????????????????????????????????????????????????????????????????????
+otu_50	??????????????????atggcttcggctactttctctgtagccaaaccagctattaaggcaaatggg------------------------------------aaaggcttctctgaattctctggtctccgcaactcttcaagacat---cttcccttttctagaaaa---tcttcagatgattttcattctcttgttaccttccaaaccaatgcagttggaagtagtggaggacacaagaaaagtcttgtagtggaagcaaaacaactgaaggtagccataaat---------------------------------------ggatttggaagaattggaaggaacttcttgagatgttggcatggtcgcaaggactcg------cctcttgatgtcattgcaatcaatgac---accggaggtgtaaagcaagcttctcaccttctcaagtatgattccacacttggaatctttgat------------------------gct------gatgttaagcctgttggtactgatggcatctcagtt------------gatggaaaggttatcaaagttgtctccgaccgcaac---cctgccaaccttccttggaaggagttggggatagacttggtgattgaaggaactggagtgttt---------------------------------gtggacagagaaggtgcagggaggcacattacagcaggggctaagaaggttctcatcactgcccctggaaaagga---gacatccctacttatgtggttggtgtcaatgctgatgcttacacccacgccgacgac---attatcagc---------------------------aatgcttcttgcaccactaactgccttgctccctttgtcaaggtccttgat---cagaaattcggtatcatcaagggtaccatgactactactcactcctacaccggtgaccaacggcttcttgac---gcgagccaccgtgacctaaggcgtgcgagagcagcagccctcaacatagtcccaacatcaacaggagcagctaaagcagtggcccttgtcctcccaacactcaaa---ggcaagctcaacggtattgcgcttcgtgtgccaacaccaaacgtttcggtggtggacctcgtcgttcaagtctcaaagaag---acatttgctgaagaagtgaatgaggcttttagagagagtgcagccaag------------gagctgactggtattctctcggtctgtgacgagccactcgtgtctgtagattttaggtgcaccgatgtgtcgtctaccgttgattcgtcattgacaatggtcatgggcgatgac------------ctggttaaggtgattgcttggtatgataatgagtggggttactcacaaagggttgttgatttggctgacattgttgccaataactggaag???????????????????????????????????????????????????????????????????????????????????????
+otu_51	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttaaggttgctattaac---------------------------------------gggtttggtagaatcggtagattggttttgagaattgctttgcaaaga---------aag---gctctagaagttgttgccgttaacgatccattcatttctgttgattatgccgcttacatgttcaagtacgattccacccatggtagatacaag------------------------ggt------gaagttactaccagcggtaacgacttggtcattgac---------------ggtcacaagattgctgttttccaagaa---aaggatccagctaacttgccatggggtaagctaggtgtcgatatcgttatcgactctactggtgtcttc---------------------------------aaggaattggactccgctcaaaagcatctagacgctggtgccaagaaggtcgtcatcactgctccttcc---aag---actgctccaatgtttgtcgttggtgttaacgaagacaagtacaac------ggtgaaaccattgtttct---------------------------aacgcttcttgtactaccaactgtttggctccaattgctaagattatcaac---gatgaattcggtattgacgaagctttgatgactaccgttcattccatcactgctactcaaaagactgttgatggtccatcccacaaggactggagaggtggtagaactgcttccggtaacattatcccatcctctactggtgctgctaaggctgtcggtaaagtcttgccagaattgcaa---ggtaaattgaccggtatggctttcagagtcccaaccgtcgatgtttctgtcgttgatttgaccgtcaagttggctaaggaa---gccacttacgatgaaatcaaggccgctgttaagaaggcttctcaaggt------------aagctaaagaatgttgttggttacactgaagactctgttgtttccagcgatttccttggtgacactcactccaccatctttgacgcctctgctggtattcaattgtct------------ccaaagttcgtcaaggttgttgcttggtacgataacgaatacggttactctgaaagagttgtcgatttggttgagcacgttgct???????????????????????????????????????????????????????????????????????????????????????????????????
+otu_52	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtaataagagtcggcatcaat---------------------------------------gggtttggccgtattggccgcgttgtgttccgagctgcgcagcgccga---------aac---gacattgaaattgttgggatcaacgat---ttgctcgatgcggattacatggcttacatgttgaagtatgattccactcacggccgcttcgaa------------------------ggt------gcggtagaggttcaaggtggtgcacttgtggtaaat---------------ggcaagaagattcgcgtcacgtccgag---cgggatcccgcgaacttgaagtggaatgaaattaatgtggacgttgttgtggagtctactggacttttt---------------------------------ctctccgacgatacggcacgaaagcacatccaggctggtgccaagaaggtcgtcatcacaggtccttcgaaggac---gacacaccaatgttcgttatgggtgtaaaccacacgacttacaag------ggggaagccatcgtctca---------------------------aacgcatcatgcacgacgaactgccttgccccccttgcaaaggtgctcaac---gacaagtttggcatcgtggaaggactcatgacaaccgttcacgcaacaacggcgacacagaaaactgtcgatggcccctcgcagaaggactggcgtggcggcaggggtgcggcgcagaatattattccttcatccaccggtgcggcgaaagcggtgggcaaaatcatcccgtcgctcaat---ggcaaactcactggtatggccttccgtgtaccaactcccaacgtttccgtggtggatctcacagtgcggctagagcgtccc---gcgacctacaagcaaatttgcgacgccattaaggctgcatcagagggt------------gaactgaaggggattttgggttatgtggacgaagagattgtttccagtgacatcaacggcattccgctcacatccgtcttcgatgccagggcgggtatttcgctgaac------------gacaacttcgttaagctggtctcgtggtatgacaacgaaaccgggtactcaaacaaggtccatgacctcattgcgcacatcacaaag????????????????????????????????????????????????????????????????????????????????????????????????
+otu_53	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcgtcaaggtcggcatcaac---------------------------------------ggatttggccgtattggccgtatcgtcttccgcaacgctcacgagcac---------tcc---gacgtcgagatcgttgccgtcaacgaccccttcattgagccccactacgctgcctacatgctcaagtatgactctcaacacggcaacttcaag------------------------ggc------gacgtcaccgtcgagggcagcgacctggtcgttggt---------------ggcaagaaggtccgcttctacaccgag---cgtgaccccgctgccatcccctggagcgagactggcgccgactacattgtcgagtccactggtgtcttc---------------------------------accaccaccgagaaggccaaggctcatctgaagggtggtgccaagaaagtcatcatctctgccccctcg---gcc---gatgcccccatgtacgtgatgggcgtcaacgagaagacctacgacggc---tccggcatggtcatctcc---------------------------aacgcctcctgcaccaccaactgcctggctcccctcgccaaggtcatcaac---gacgagttcaagatcattgagggtctcatgaccaccgtccactcctacaccgccacccagaagaccgtcgacggtccctccgccaaggactggcgtggtggacgtaccgccgcccagaacatcatcccctcctccaccggtgccgccaaggctgtcggcaaggtcatccccgagctcaac---ggcaagctcaccggcatgtccatgcgtgtgcccacatccaacgtctccgttgttgacttgactgtccgcatcgagaagggc---gccacctacgagcagatcaagaccgccgtcaagaaggccgccgatggt------------cccctcaagggtgttctggcctacactgaggacgacgtcgtctccactgacatgaacggcaaccccaactcctccatctttgacgccaaggctggtatctccctgaac------------gaccactttgtcaagctggtctcctggtacgacaacgagtggggctactcccgccgtgtccttgacctgatctcccacgtcgccaaggtcgatggcaacgcc?????????????????????????????????????????????????????????????????????????????????
+otu_54	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgactgtcaaggttggcatcaac---------------------------------------ggtttcggccgcattggccgtatcgtcttccgcaatgcggttgagcac---------ccc---gacgtcgagatcgttgccgttaacgaccccttcattgagcccaagtacgctgagtacatgctcaagtacgactctacccacggcgtcttcaag------------------------ggc------accattcaggtctccggcagcgatctgatcgtcaac---------------ggcaagaccgtcaagttctacactgag---cgcgacccctctgccatcccctggaaggacaccggcgccgagtacatcgtcgagtccaccggtgtcttc---------------------------------accaccactgagaaggctagcgcccacttgaagggtggtgccaagcgcgtcatcatctctgccccctcg---gcc---gatgcccccatgtacgtgatgggtgtcaacgagaagacctacgacggc---aaggccgctgtcatctcc---------------------------aacgcctcttgcaccaccaactgcctggctcccctcgccaaggttgtcaac---gacaagttcggcatcgttgagggtctcatgaccaccgtccactcctacactgccacccagaagaccgtcgatggtccctcggccaaggactggcgcggtggccgtggcgctgctcagaacatcatccccagcagcaccggtgccgccaaggccgtcggcaaggttatccccgagctcaac---ggcaagcttaccggcatggccttccgtgtccccacctccaacgtctccgttgtcgacctcacctgccgtctcgagaagccc---gccagctacgagaccatcaaggccgccctcaaggaggcttccgagggt------------gagctcaagggcattctcggctacaccgaggacgagattgtctcctctgacctcaacggcaatgccaactcttccatcttcgacgccaaggctggtatctccctgaac------------gacaactttgtcaagcttgtctcctggtacgacaacgagtggggctacagcagacgtgtgctcgacctcctctcctatgtcgccaagtatgatgcttctcat?????????????????????????????????????????????????????????????????????????????????
+otu_55	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcgtcaaggttggtatcaac---------------------------------------ggcttcggtcgcattggccgcatcgtcttccgcaatgccatcgagcac---------aac---gacgtcgacattgtcgccgtaaacgaccctttcatcgagccccactacgctgcatacatgctcaagtatgacagcacacacggccagttcaag------------------------ggt------gacatcaaggttgacggcaacaacctgactgtcaac---------------ggcaagaccatccgtttccacatggag---aaggaccccgccaacattccatggagcgagactggcgcttactacgtcgtcgagtctaccggtgtcttc---------------------------------accaccaccgagaaggccaaggcccacttgaagggcggagccaagaaggttgtcatctctgctccctcg---cct---gacgcccccatgttcgtcatgggtgtcaaccacgagacctacaagccc---gacatcgaggcactctcc---------------------------aacgcctcttgcacaaccaactgcttggctcctctcgccaaggtcatccac---gacaagtacaccatcattgagggtctcatgaccaccatccactcatacactgccacccagaaggtcgttgacggcccctccgcaaaggactggcgtggtggacgtactgctgctcaaaacatcattcccagcagcactggtgccgccaaggctgtcggcaaggttatccccgagctcaac---ggtaagctcactggtatggccatgcgtgtccctaccgccaatgtctcggttgtcgacttgactgttcgcatcgagaagggt---gcttcctacgacgagatcaagcaggccgtcaaggaggcctctgagggc------------tccctcaacggtatccttggttacactgaggatgacattgtttccactgacttgaacggcgacaaccgctcctccatcttcgacgccaaggccggtatctcccttaac------------aagaactttgtcaagctcgtctcctggtacgacaacgagtggggttactcccgccgtgtcctcgacctcctggtctacattgccaagattgatggcaacgct?????????????????????????????????????????????????????????????????????????????????
+otu_56	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcaaggtaggcatcaac---------------------------------------ggtttcggacgcatcggccgtgtcgtctttcgagcggcacagatgcgc---------ccc---gacatcgagattgtcggcattaacgat---ctgctcgatgccgagtacatggcctacagcctcaagtatgactccacgcatggccgctttgat------------------------ggg------acggtggaggtgataaaaggggcacttgttgtgaac---------------ggcaagagcatccgcgtcacgagcgag---cgtgacccggcaaacctcaagtgggacgaaatcggtgtggaggtggtggtggagtctacgggcttattc---------------------------------ctcacgcaggagacagctcacaagcacatcgaggcaggggcaaggcgcgtcgtcatgacggggccgccgaaagat---gacacaccgatgtttgtgatgggcgtgaaccacacaacgtacaag------gggcaaccgattatatct---------------------------aacgcgtcgtgtacgacgaactgcctcgccccactggcaaaggtggtgaac---gagaagtacggtattgttgaaggcctcatgactactgtgcacgcgactacggcaacgcagaagacggtggatggtccctctctgaaagactggcgaggtggtcgcggcgcgtcgcagaacatcatcccctcctccaccggcgctcctaaggccgtcggcaaggtgtacccggctctggat---ggaaagctcactggtatggcttttcgcgttccgaccccgaacgtgtcagtggtcgacctcaccgtgcgtctagagaagcca---gcaacctataaggacatctgcgccgcaatcaaggctgcagcggagggc------------gagatgaaaggcatccttggatacaccgacgatgaggtcgtgtcttcggacttcaacggtgtggcgctgacatctgtttttgacgtgaaggccggtatctcactgaac------------gatcactttgtcaagctcgtgtcttggtatgacaacgaaacaggctactcgcacaaggtactcgatctcatcctacatacgtccgcgagg?????????????????????????????????????????????????????????????????????????????????????????????
+otu_57	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgacagcaccgaaggtcggcatcaac---------------------------------------ggcttcggccgcatcggccgcctcgtcctgcgcgccgccatcgagaag---------gga---acgtgccaggtcgtcgccatcaacgacccgttcatcgacctcgactacatggcctacatgctcaagtacgactcgacgcacggccgctacgcc------------------------ggt------gacgtctccatcaaggacggcaagctgcaggtcgac---------------ggcaactccatcaccgtctttgcccac---cgcgaccccgccgagatcccgtgggccacggccgccgccgactacatcgtcgaggccaccggcgtcttc---------------------------------acgctcaaggacaaggccgccgcccacttcaagggcggcgccaagaaggtcgtcatctcggccccctcc---aag---gacgcccccatgtttgtgtgtggcgtcaacgaggccaagtacaccccg---gacttggacatcatctcc---------------------------aacgcctcgtgcaccactaattgcctcgcccccctcgtcaaggtcatccat---gagaagtacggcatcgaggagggcctcatgaccaccgttcacgccaccaccgccacgcagaagaccgtcgacggtcccagcaacaaggactggcgcggcggccgcggccgtggccgtaacatcatcccgtccagcaccggcgccgcaaaggccgtcggcaaggtcatgcccgagctcaac---ggtaagctcaccggcatggccttccgcgtccccacccccgatgtctccgtcgtcgatctcaccgtccgcctcacgtccgag---actacctacgaggacatcaaggccaccatgaaggccgccgccgac------------gactccatgaagggcatcatgaagtacaccgaagacgccgttgtcagtaccgactttatccatgacgacgcgtcgtgcatatttgacgccagcgcgggcatcatgcttaac------------agcaagttttgcaagctcgtcgcatggtacgacaatgagtggggatattctaaccgcgtcgttgacctcatcgcacacatctccaaggtccag??????????????????????????????????????????????????????????????????????????????????????????
+otu_58	??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgggtgccaagatcaagatcggaatcaac---------------------------------------ggattcggaagaatcggacgtttagttgctagagtagctttgaagaga---------gat---gatgttgaactcgttgcagttaacgatcctttcatcaccactgattacatgacgtatatgtttaagtacgacagtgttcacggacagtggaag------------------------aacgacgaactcaccgtcaaggactctaacactcttctcttcggt---------------cagaagccagttactgtctttgcacac---aggaacccagaagagatcccatgggccagcactggtgctgatatcattgttgagtctactggtgttttt---------------------------------actgataaggacaaggctgctgctcatttgaagggtggtgccaagaaggtcatcatttctgctcccagt---aaa---gatgctcccatgtttgttgttggtgttaatgagaatgaatacaagcca---gagtttgacattatttcc---------------------------aatgctagctgcaccaccaactgccttgcaccacttgcaaaggttattaat---gacaggtttggcattgttgagggtctcatgaccactgtccattccatcaccgccacccagaagactgttgatggaccatcaagcaaggactggagaggtggaagagctgcttcatttaacatcattcccagcagtaccggagctgctaaggctgtcggcaaagtgcttcctgctttgaat---ggaaagttgaccggtatgtcattccgtgtcccaactgtggatgtctccgttgttgaccttacagtgaggcttgagaaggcc---gccacctatgatgaaatcaaagctgctatcaaggaagagtctgagggc------------aagttgaaaggaatccttggttacactgaagacgatgtggtctccactgactttattggtgacaccaggtcaagtatctttgatgccaaggcaggaattgccttaaat------------gacaagtttgttaagcttgtctcatggtatgacaacgagttgggttacagtacccgtgtggttgacctcattgttcacattgctaaacaactt??????????????????????????????????????????????????????????????????????????????????????????
+otu_59	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctgtcaaggttggcatcaac---------------------------------------ggcttcggtcgcattggccgtatcgtcttccgcaacgccgtcgagcac---------ccc---gagattgaggtcgttgccgtcaacgaccctttcatcgaccccgaatatgctgcctacatgctcaagtatgactcttcccacggcgtcttcaag------------------------ggc------gagatcaagaaggacgcggacggcttgattgtcaat---------------ggcaagaaggtcaagttccacactgag---cgcgacccttctgctattccctggaaggcgtctggagccgagtacattgtcgagtccaccggtgtcttc---------------------------------accaccaccgagaaggccaaggcccatttgactggtggcgccaagaaggtcatcatctcggctccttct---gcc---gatgctcccatgtacgttatgggagtcaacgagaagacctatgacggc---aaggccgatgtcatttcc---------------------------aacgcttcttgcaccaccaactgcctggctcccctggccaaggttatccac---gacaagtacaccattgttgagggtctgatgaccactgtccactcgtacactgccacccagaagaccgtcgacggtccctcaggcaaggactggcgaggtggccgtggtgctgcccagaacatcattcccagcagcaccggtgccgccaaggccgtcggcaaggtcattcccgacctcaac---ggcaagctcactggcatgtccatgcgagtgcctactcccaacgtctccgtcgttgacttgactgtccgcatcgagaagggt---gctacttacgacgagatcaaggcgactgtcaaggaggccgccaacggt------------tcccttgccggcattctcggctacaccgaggacgacattgtgtccagcgacatgaacggcaacaccaactcctccatcttcgatgccaaggccggtatctccctcaac------------aagaactttgtcaagctcattgcttggtacgacaacgagtggggctactcccgccgtgttctcgacctccttgcctacgttgccaaggctgacgctagcaag?????????????????????????????????????????????????????????????????????????????????
+otu_60	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctccaatcaaagtaggaattaac---------------------------------------ggatttggccgcatcggtcgtatggtcttgcaagccatatgcgaccagggccttctcggtactgagattgatgttgtcgctgtagttgtcagatcccctgacgctgattacttatcttatcggttgcgctatgactctgttcatggccgcttcaag---cataaggtagatgttgccgcaagtcccgaatgtgagcccggaaagcatgatacccttgtggtaaat---------------ggccataaagtaaagtgcgtgaagggaggccctgatccttctacactcccatggggaaaactgggtgttgactatgtcattgaatccacaggatttttc---------------------------------acagataagtccaaggccgagggtcacatcaaagctggtgctaagaaggttatcatttcagccccagctaaggga---ggcgccaagaccattgtgatgggtgtaaaccagcatgagtacaaccctaatgagcattctgttgtatcc---------------------------aatgcatcttgtactactaatggtctcgcacctattgtacacgtgttgaataaagaaggctttggcataaaggtgggcctgatcactacattgcacgcttataccgctactcagaagacagttgacggcgtctcccagaaggactggcgcggcggacgtgccgcatctgtgaacatcatccccagcagcactggtgccgccaaagctgtaggtgaggtactgcccgagaccaaa---ggtaaactgaccggtatggccttccgtgtgcccacacctgacgtgtccgtagtcgaccttacatttaccactactaaagat---acaagcattaaagagatcgatgccgctctgaagcgtgctgctgcatcg------------tacctccgtggtatcctggacatctccaaagaagagctcgtcagcactgactttatccataatccgaacagctccatctatgactcactggccacactacagaacaacctgcctacagagaagcgcttcttcaaggttgtttcgtggtatgataacgagtggggatactccaatcgcgtggtagacctcgtacgctttatgaattccaaagactccaaatgccatgcaaagttg?????????????????????????????????????????????????????????????????????
+otu_61	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctccaatcaaagtaggaattaac---------------------------------------ggatttgggcgcatcggtcgtatggtcttacaagccatatgcgatcagggccttctcggtactgagattgatgttgtggctgtagttgacatgtccactgacgctgattacttttcttatcaaatccgctatgactctgttcatggccgcttcaag---cataaggtagatgttgccgcaagtcccgaatgtgagcccggaaagcatgatacccttgtgataaat---------------ggccataaagtaaagtgcgtgaaggcaacccgtaacccttctgatcttccatggggaaaactgggtgttgactatgtcattgaatccacagggctattc---------------------------------acagataagtccaaggccgagggtcacatcaaagctggtgctaagaaggttatcatttcagccccagctaaggga---ggcgccaagaccattgtgatgggtgtaaaccagcatgagtacaaccctaatgagcattctgttgtatcc---------------------------aacgcatcttgtactaccaattgcctcgcacctattgtacacgtgttgaataaagaaggctttggcataaaggttggcctgatgactaccatccacgcttataccgctactcagaagacagttgacggcgtctcccagaaggactggcgcggcggacgtgccgcatctgtgaacatcatccccagcagcactggtgccgccaaagctgtaggtgaggtactgcccgagaccaaa---ggtaaactgaccggtatggccttccgtgtgcccacacctgacgtgtccgtagtcgaccttacatttaccactactaaagat---acaagcattaaagagatcgatgccgctctgaagcgtgctgctgcatcg------------tacctccgtggtatcctggacatctccaaagaagagctcgtcagcactgactttatccataatccgaacagctccatctatgactcactggccacactacagaataacctgcctacagagaagcgcttcttcaaggttgtttcgtggtatgataacgagtggggatactccaatcgcgtggtagacctcgtacgctttatgaattccaaagactccaaaagccatgcaaagttg?????????????????????????????????????????????????????????????????????
+otu_62	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttagagttgctattaac---------------------------------------ggtttcggtagaatcggtagattggtcatgagaattgctttgtctaga---------cca---aacgtcgaagttgttgctttgaacgacccattcatcaccaacgactacgctgcttacatgttcaagtacgactccactcacggtagatacgct------------------------ggt------gaagtttcccacgatgacaagcacatcattgtcgat---------------ggtaagaagattgctacttaccaagaa---agagacccagctaacttgccatggggttcttccaacgttgacatcgccattgactccactggtgttttc---------------------------------aaggaattagacactgctcaaaagcacattgacgctggtgccaagaaggttgttatcactgctccatct---tcc---accgccccaatgttcgtcatgggtgttaacgaagaaaaatacacttct---gacttgaagattgtttcc---------------------------aacgcttcttgtaccaccaactgtttggctccattggccaaggttatcaac---gatgctttcggtattgaagaaggtttgatgaccactgtccactctttgactgctactcaaaagactgttgacggtccatcccacaaggactggagaggtggtagaaccgcttccggtaacatcatcccatcctccaccggtgctgctaaggctgtcggtaaggtcttgccagaattgcaa---ggtaagttgaccggtatggctttcagagtcccaaccgtcgatgtctccgttgttgacttgactgtcaagttgaacaaggaa---accacctacgatgaaatcaagaaggttgttaaggctgccgctgaaggt------------aagttgaagggtgttttgggttacaccgaagacgctgttgtctcctctgacttcttgggtgactctcactcttccatcttcgatgcttccgctggtatccaattgtct------------ccaaagttcgtcaagttggtctcctggtacgacaacgaatacggttactctaccagagttgtcgacttggttgaacacgttgccaaggct?????????????????????????????????????????????????????????????????????????????????????????????
+otu_63	attggcccagttgcaaaagtattaaat---gatgaatttggtatcgtgaacggtctgatgactacagtacacgcaattactaatgaccaaaagaatattgat---aatggccataaagatttgcgtagagcacgttcatgtaatgaaagtattattccgacatctacaggcgcacgtaaagcattaaaagaagtgctacctgaagtagaa---ggtaaattacatggaatggcattaagagtaccaactaaaaacgtttctcttgtagacttagtagtagatttagaacaaaat---gtaacagctgatcaaattaatgacgctttcaaaaatgcaaat---------------------ttagatggtgtattggatgtagaaagtgaaccacttgtttcagtagattttaatacaaatcctaattctgcagtaattgatgcacaatcaacaatggttatgggcgataat------------aaagttaaagtaatcgct???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
+otu_64	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggctcccaaggtcggtatcaac---------------------------------------ggcttcggtcgtatcggacgtatcgtcttccgtaacgccatcaaccac---------ggc---gaggttgacgtcgttgctgtcaacgaccccttcatcgagacccactatgctgcctacatgctcaagtacgacagcacccacggccagttcaag------------------------ggc------accatcgagacctacgaggagggtctgattgtcaac---------------ggcaagaagatccgcttcttcgctgag---cgtgaccccgctgccatcccctggggcaccaccggcgctgactacatcgtcgagtccactggtgtcttc---------------------------------accacccaggagaaggccgccgctcacttgaagggtggtgccaagaaggtcgtcatctctgctccttcc---gct---gatgcccccatgttcgtcatgggtgtcaacaacacctcctacaccaag---gacatcaacgtcctctcc---------------------------aacgcttcttgcaccaccaactgccttgctcccctcgccaaggtcatcaac---gacaagttcggcatcgttgagggtctcatgaccaccgtccactcctacaccgctacccagaaggtcgtcgatgccccctccagcaaggactggcgtggtggccgtactgcggcccagaacatcatcccctcttccaccggtgctgccaaggctgtcggcaaggtcatccctacccttaac---ggcaagctcaccggtatggccatgcgtgtccccacctccaacgtctccgttgtcgacttgacctgccgcctcgagaaggcc---accagctacgacgagatcaagaaggccctcaaggacgcttccgagaac------------gagctcaagggcatcctcggctacactgaggacgacatcgtctcctccgacctgaacggtgacgaccactcctccatcttcgatgccaaggccggtatcgcccttaac------------tccaacttcgtcaagctcgtctcctggtacgacaacgagtggggttactcccgccgtgtcgtcgacctcattgcctacatctccaaggttgatgcccag????????????????????????????????????????????????????????????????????????????????????
+otu_65	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcgaag------attggtattaat---------------------------------------ggatttggccgcattggtcgtctggtgctccgtgccgccgtcgacaaa---------ggt------gccagcgtcgtggccgttaacgatcccttcatcgatgtgaactacatggtctacttgttcaagttcgattctacccacggccgtttcaag------------------------ggc------accgtttccgctgagggcggcttcctggtcgtcaat---------------ggccagaagatcaccgtgttcagcgaa---cgcgaccccgctaacatcaactgggccagcgctggcgctgagtacgtggttgagtctaccggtgtcttc---------------------------------accaccaccgagaaggcctccactcacttgaagggcggtgccaagaaggttgtcatttcggcaccatcc---gct---gatgcgcccatgttcgtgtgcggcgtgaacttggatgcgtacagcccc---gacatgaaggtcgtgtcg---------------------------aacgcttcttgcaccaccaactgcttggccccattggctaaggttatcaac---gataacttcgagattgtggaggtcttgatgaccactgtgcatgccaccacagccacccagaagaccgtcgatggcccctccggcaagttgtggcgtgatggccgtggtgcttgccaaaacatcattccagcatccactggtgccgccaaggccgtcggcaaagtcattccagctctgaat---ggaaaactgactggcatggctttccgtgtgcccacaccaaatgtgtccgttgtcgatttgactgtccgcttgggcaagggc---gccagctacgatgagattaaggccaaggtacaagaggctgccaacggt------------ccattgaagggcattctgggctacaccgatgaggaggttgtctccactgacttccttagcgatactcactcgtctgtgttcgagcccaaggccggcatctcgctcaac------------gacaagttcgtgaagctgatttcctggtacgacaacgagttcggctactccaaccgtgtcattgacttgatcaagtacatgcagagcaaggat??????????????????????????????????????????????????????????????????????????????????????????
+otu_66	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcgaaagccaacgtcggaatcaac---------------------------------------ggtttcggaagaatcggacgtctcgtgctccgcgcggctgtcgaaaag---------gacacg---gttcaagtggtcgccgtcaacgatccgtttatcacaatcgactacatggtctaccttttcaagtatgactcgacccacggtcaattcaaa------------------------gga------actgttacctacgatggagactttctgattgtccagaaagatggcaaatcatcgcacaagatcaaggtcttcaacagc---aaggatccagctgccatcgcatggggatcagtcaaagccgatttcgttgtcgagtctactggagttttc---------------------------------acgacaaaggaaaaggcctctgcccatcttcaaggaggagccaagaaggtcatcatctctgctccatct---gct---gatgctccaatgtacgtggttggagtaaaccatgagaagtacgacgcttcgaatgatcacgttatttct---------------------------aacgcatcgtgcaccaccaactgtctggcaccactggcgaaggttatcaat---gataacttcggtatcatcgaaggactcatgacgacagtgcacgctgtaaccgcaacccagaagacagtcgatggaccatccggaaagctgtggagagatggacgcggtgctggacaaaacatcattccagcttccactggagccgccaaggctgtgggaaaagtcattccggagctgaac---ggaaagctcactggaatggctttccgtgtccctacaccagatgtttccgtcgttgatctgaccgttcgcctcgagaaacca---gcttcgatggatgacatcaagaaggtagtcaaggccgctgccgatgga------------ccaatgaagggaatcctcgcctacaccgaagatcaagttgtgtcgactgacttcgtgtccgaccctcactcgtccattttcgatactggagcatgcatttcgctcaac------------ccgaacttcgtcaagctcgtctcttggtacgacaatgaatatggatactcgaaccgtgttgtcgacctcatcggatacatcgccacccgtgga??????????????????????????????????????????????????????????????????????????????????????????
+otu_67	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgtcgaaggccaacgtcggaatcaac---------------------------------------ggtttcggaagaatcggacgtctcgtgctccgcgcggctgtcgaaaag---------gacacg---gttcaagtggtcgccgtcaacgatccgtttatcacaatcgactacatggtctaccttttcaagtatgactcgacccacggtcaattcaaa------------------------gga------actgttacctacgatggagactttctgattgtccagaaagatggcaaatcatcgcacaagatcaaggtcttcaacagc---aaggatccagctgccatcgcatggggatcagtcaaagccgatttcgttgtcgagtctactggagttttc---------------------------------acgacaaaggaaaaggcttctgctcatcttcaaggaggagccaagaaggtcatcatctctgctccatct---gct---gatgctccaatgtacgtggttggagtaaaccatgagaagtacgacgcttcgaatgatcacgttgtttct---------------------------aacgcatcgtgcaccaccaactgtctggcaccactggcgaaggttatcaat---gataacttcggtatcatcgaaggactcatgacgacagtgcacgctgtaaccgcaacccagaagacagtcgatggaccatccggaaagctgtggagagatggacgcggtgctggacaaaacatcattccagcttccactggagccgccaaggctgtgggaaaagtcattccggagctgaac---ggaaaactcactggaatggctttccgtgtccctacaccagatgtttccgtcgttgatctgaccgttcgcctcgagaagcca---gcttcgatggacgacatcaagaaggtagtcaaggccgctgccgatgga------------ccaatgaagggaatcctcgcctacactgaagatcaagttgtgtcgactgatttcgtgtccgaccctcactcgtccattttcgatgctggagcatgcatttcgctcaac------------ccgaacttcgtcaagctcgtctcttggtacgacaatgaatatggatactcgaaccgtgttgtcgacctcatcggatacatcgccacccgtgga??????????????????????????????????????????????????????????????????????????????????????????
+otu_68	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atgatcagaattgctattaac---------------------------------------ggtttcggtagaatcggtagattggtcttgagattggctttgcaaaga---------aaa---gacattgaggttgttgctgtcaacgatccatttatctctaacgattatgctgcttacatggtcaagtacgattctactcatggtagatacaag------------------------ggt------actgtttcccatgacgacaagcacatcatcattgat---------------ggtgtcaagatcgctacctaccaagaa---agagacccagctaacttgccatggggttctctaaagatcgatgtcgctgttgactccactggtgttttc---------------------------------aaggaattggacaccgctcaaaagcacattgacgctggtgccaagaaggttgtcatcactgctccatct---tct---tctgctccaatgtttgttgttggtgttaaccacactaaatacactcca---gacaagaagattgtctcc---------------------------aacgcttcttgtaccaccaactgtttggctccattggccaaggttatcaac---gatgctttcggtattgaagaaggtttgatgaccactgttcactccatgaccgccactcaaaagactgttgatggtccatcccacaaggactggagaggtggtagaaccgcttccggtaacattatcccatcctctaccggtgctgctaaggctgtcggtaaggtcttgccagaattgcaa---ggtaagttgaccggtatggctttcagagtcccaaccgtcgatgtttccgttgttgacttgactgtcaagttggaaaaggaa---gctacttacgaccaaatcaagaaggctgttaaggctgccgctgaaggt------------ccaatgaagggtgttttgggttacaccgaagatgccgttgtctcctctgatttcttgggtgacactcacgcttccatcttcgatgcctccgctggtatccaattgtct------------ccaaagttcgtcaagttgatttcctggtacgataacgaatacggttactccgccagagttgttgacttgatcgaatatgttgccaaggct?????????????????????????????????????????????????????????????????????????????????????????????
+otu_69	???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttagagttgctattaac---------------------------------------ggtttcggtagaatcggtagattggttatgagaattgctttgcaaaga---------aag---aacgtcgaagttgttgctttgaacgatcctttcatctctaacgactactccgcttacatgttcaagtacgactctactcacggtagatacgct------------------------ggt------gaagtttcccacgatgacaagcacatcatcgttgat---------------ggtcacaagatcgccactttccaagaa---agagacccagctaacttgccatgggcttctctaaacattgacatcgccattgactccactggtgttttc---------------------------------aaggaattggacactgctcaaaagcacattgacgctggtgccaagaaggttgtcatcactgctccatct---tcc---accgccccaatgttcgtcatgggtgttaacgaagaaaaatacacttct---gacttgaagattgtttcc---------------------------aacgcttcttgtaccaccaactgtttggctccattggccaaggttatcaac---gatgctttcggtattgaagaaggtttgatgaccactgttcactccatgaccgccacccaaaagactgttgacggtccatcccacaaggactggagaggtggtagaaccgcttccggtaacatcatcccatcctctaccggtgctgctaaggctgtcggtaaggtcttgccagaattgcaa---ggtaagttgaccggtatggctttcagagtcccaaccgtcgatgtttccgttgttgacttgactgtcaagttgaacaaggaa---accacctacgatgaaatcaagaaggttgtcaaggctgccgctgaaggt------------aagttgaagggtgtcttgggttacactgaagacgctgttgtctcctctgacttcttgggtgactctaactcttccatcttcgatgctgccgctggtatccaattgtct------------ccaaagttcgtcaagttggtttcctggtacgacaacgaatacggttactctaccagagttgtcgacttggttgaacacgttgccaaggct?????????????????????????????????????????????????????????????????????????????????????????????
+otu_70	?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggttgtccccaaggttggaatcaac---------------------------------------ggcttcggtcgtatcggccgtattgtcttccgtaacgctatcgagcac---------gag---ggtgttgacatcgttgccgtcaacgaccccttcattgag------------gcctacatgctcaagtatgacagcacccacggccgcttcaac------------------------gga------gccgtcgagttcgacggcaacacgctcatcgtcaac---------------ggcaagaagatcaagttctacgcagag---agggaccccgctcagatcccctggagcgagactggc---cagtacgtcgttgagtccactggtgtcttc---------------------------------accaagcaggagaaggcctcccttcacctgagagggtgtgccaagaaggtcatcatctccgctccctct---tcc---gactcccccatgtttgtcatgggtgtcaacaacgaccaatacaccaag---gacatcaccgtcctttcc---------------------------aacgcctcttgcaccaccaactgcttggctccccttgccaaggtcatcaat---gacaagttcggcatcgtcgagggtctgatgaccacagtccactcctacactgctacccagaaggtcgtcgatggcccctccaacaaggactggagaggtggccgtaccgctgcccagaacatcatccccagctccaccggtgtgcctaaggcagtcggcaaggtcattccttccttgaac---ggcaagctcactggcatgtctatgcgtgtgcctacttccaacgcctccgttgtcgaccttactgcccgtctcgagaaggcc---gccacctacgacgagatcaagcaggccgtcaagaaggcctctgagcgc------------cctctgaagggcatcctcggctacactgaggatgacgttgtctcctccgatctcaacggagacccccactcctccatcttcgatgccaaggctggtatcgccctcaac------------tcgaacttcgtcaagctgttttcctggtacgacaacgagtggggttactcccgccgtgttatcgacctcattgcctatgcc---caggtcgatgcccag????????????????????????????????????????????????????????????????????????????????????
+otu_71	????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????atggtcgtcaaggttggtatcaac---------------------------------------ggcttcggtcgcattggccgtatcgtcttccgcaatgccatcgagcac---------aac---gacgtcgagatcgtcgccgtcaacgaccctttcatcgagccacactacgctgcctacatgctcaagtatgacagccagcacggccagttcaag------------------------ggc------gacatcaaggtcgagggcaacgacctgaccatcaac---------------ggcaagaccatccgcttctacactgag---aaggaccccgccaacatcccatggagcgagaccggcgcatactacgtcgttgagtccaccggtgtcttc---------------------------------accaccaccgacaaggccaaggcccacttgaagggtggtgctaagaaggtcgtcatctccgctccctcg---gct---gatgctcccatgttcgtcatgggtgtcaacaacgagacctacaccaag---gacattgaggtgctctcc---------------------------aacgcctcctgcacaaccaactgcttggctcccctcgccaaggtcatccac---gacaagttcaccatcattgagggtttgatgaccaccgtccactcctacaccgctacccagaaagtcgttgacggcccttccgccaaggactggcgtggtggccgcactgcggcccagaacatcattcccagcagcactggtgccgccaaggctgtcggcaaggtcattcctgacctcaac---ggcaagctcaccggaatgtccatgcgtgttcccacctccaacgtttctgtggttgacttgactgtccgcctcgagaagggt---gctacctacgatgagatcaaggaggccgtcaaggccgccgctgatggt------------cctcttaacggcattctcggatacactgaggacgagatcgtctccaccgacttgaacggtgacacccgctcttccatcttcgacgccaaggccggtatctccctgaac------------aagaacttcgtcaagctcgtctcctggtacgacaacgagtggggttactcccgccgtgtcctcgacctcttggtctacattgccaaggtcgatggtaacgcc?????????????????????????????????????????????????????????????????????????????????
+;
+end;
+
+begin trees;
+	tree glyceraldehyde-3-phosphate_dehydrogenase = ((otu_30:0.49051,((otu_28:0.49077,(((otu_31:2.01423,((otu_19:0.06892,((otu_20:0.03391,otu_18:0.04913)67:0.06131[1],otu_21:0.17073)66:0.05144[1])65:0.15044[1],(otu_60:0.03246,otu_61:0.0051)68:0.24938[1])64:0.25626[1])63:0.09208[0.586],(((((otu_40:0.12574,(otu_36:0.13995,otu_8:0.17533)59:0.19561[1])58:0.17743[1],(otu_48:0.09594,((otu_41:0.08423,otu_33:0.09024)62:0.06736[1],otu_50:0.14704)61:0.26418[1])60:0.11592[1])57:0.0924[1],otu_9:0.15489)56:0.38086[1],otu_63:0.6026)55:0.0988[0.746],otu_11:0.70361)54:0.18437[0.988])53:0.12048[0.994],(((otu_32:0.3374,(otu_45:0.17225,((((otu_38:0.17276,(otu_2:0.45585,otu_3:0.1545)50:0.06724[0.971])49:0.03913[1],otu_39:0.21582)48:0.03959[0.907],(otu_7:0.12835,otu_6:0.09868)51:0.0189[0.894])47:0.04402[1],(otu_22:0.10484,otu_25:0.16061)52:0.02817[0.999])46:0.02715[1])45:0.02351[0.999])44:0.02505[0.935],(((otu_1:0.06459,(otu_65:0.12251,otu_29:0.05151)38:0.03182[0.848])37:0.14809[1],(((otu_47:0.01586,otu_46:0.01469)43:0.08936[1],(otu_67:0.0074,otu_66:0.00478)42:0.16879[1])41:0.12119[1],(otu_34:0.00116,otu_13:0.00225)40:0.45393[1])39:0.05934[0.962])36:0.08529[1],((otu_43:0.03399,otu_42:0.04004)35:0.17074[1],otu_57:0.24799)34:0.04464[1])33:0.03287[0.998])32:0.03751[0.995],((otu_64:0.05167,otu_5:0.09417)24:0.0371[1],(otu_70:0.13406,((otu_14:0.08084,((otu_59:0.1093,otu_54:0.10356)29:0.02172[0.999],(otu_12:0.0721,otu_53:0.08486)30:0.01661[0.856])28:0.02685[1])27:0.04636[1],(otu_71:0.05705,otu_55:0.07532)31:0.05053[1])26:0.03818[1])25:0.02449[0.973])23:0.06116[1])22:0.04736[0.998])21:0.038[0.307])20:0.03735[0.319],((((otu_10:0.02615,otu_49:0.03557)8:0.12455[1],(otu_58:0.13279,((otu_27:0.00127,otu_4:0.00359)11:0.04842[1],otu_35:0.06224)10:0.06368[1])9:0.06089[1])7:0.1334[1],(otu_23:0.0696,otu_24:0.11442)6:0.32447[1])5:0.1036[1],(((((otu_37:0.15673,((otu_69:0.01483,otu_62:0.02881)18:0.03515[1],otu_68:0.06013)17:0.03773[1])16:0.0128[0.45],otu_51:0.13459)15:0.07285[1],(otu_26:0.21596,otu_17:0.07616)19:0.04614[0.858])14:0.04159[0.8],otu_16:0.13043)13:0.05611[1],otu_15:0.13292)12:0.16149[1])4:0.09891[0.998])3:0.10067[1])2:0.0983[1],(otu_56:0.21641,otu_52:0.19337)69:0.09771[1],otu_44:0.17553)1;
+end;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family8a.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family8a.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/SPAN_Family8a.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,50 @@
+#NEXUS
+begin taxa;
+	dimensions ntax=32;
+	taxlabels
+	otu_1	otu_2	otu_3	otu_4	otu_5	otu_6	otu_7	otu_8	otu_9	otu_10	otu_11	otu_12	otu_13	otu_14	otu_15	otu_16	otu_17	otu_18	otu_19	otu_20	otu_21	otu_22	otu_23	otu_24	otu_25	otu_26	otu_27	otu_28	otu_29	otu_30	otu_31	otu_32;
+end;
+
+begin characters;
+	dimensions nchar=280;
+	format datatype=protein missing=? gap=-;
+	matrix
+otu_1	?????????????????????????????????MTVYSYLVILFILLDNYCSAYGYG-----YSYYHRRHFDPAIASFTK-------EPYIGAVWFTQHGDY----MYVNGSVAGLPPGKLLGTHVHRYG--------GLGNMCLEAGPHFNPFNQ-RHGPRHG--YPRHAGDLGNIRVGRGGVAKFDFYVTIKGLGPFDGFIGRALVIHANRDDLGRN---RDEGSRTTGNSGPRLACATIGFRAP?????????????????????????????????
+otu_2	?????????????????????????????????????????????????????????????????????MAMKAVCVLKG------DGPVQGTIHFEQKASGE--PVVLSGQITGLTE-GQHGFHVHQYG--------DNTQGCTSAGPHFNPHSK-KHGGPAD--EERHVGDLGNVTAGKDGVANVSIEHRVISLSGEHSIIGRTMVVHEKQDDLGKG-G--NEESTKTGNAGSRLACGVIGIAQ??????????????????????????????????
+otu_3	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DV--KGTVHFEQQDAKS--PVLVTGEVNGLAK-GLHGFHVHEFG--------DNTNGCTSAGPHFNPYGN-SHGAPSD--LNRHLGDLGNIEASGDGATKVEISDKLITLFGENSIVGRTIVVHADPDDLGKG---GHELSKTTGNAGARLGCGVIGICKI?????????????????????????????????
+otu_4	??????????????????????????????????????????????????????????????????????MVKAVAVVRG------DSNVKGTVIFEQESESA--PTTITYDISGNDPNAKRGFHIHTFG--------DNTNGCTSAGPHFNPHGT-THGDRTA--EVRHVGDLGNIETDAQGNAKGTVTDNLVKLIGPESVIGRTVVVHAGTDDLGKG---GNEESLKTGNAGPRPACGVIGISQ??????????????????????????????????
+otu_5	????????????????????????????????????MDILSDIANAVLPQDVVSKVE-------------SKRAVAVLRG------TA-VFGTVWLTQKAEGE--ETEFEGEIKGLSP-GLHGFHIHQYG--------DSTDGCTSAGPHFNPCKM-NHGGRDS--VVRHVGDLGNVEAGADGVAKIKFSDKVVSLFGANTVIGRSMVVHVDRDDLGQGIDDKAEESLKTGNAGARAACGVIALAAPA????????????????????????????????
+otu_6	????????????????????????????????????MKRFSLAILALVVATGAQAAS-----------EKVEMNLVTSQG------VGQSIGSVTITETDKG----LEFSPDLKALPP-GEHGFHIHAKGSCQPATKDGKASAAESAGGHLDPQNTGKHEGPEG---AGHLGDLPALVVNNDGKATDAVIAP--RLKSLDEIKDKALMVHVGGDNMSDQ-------PKPLGGGGERYACGVIK?????????????????????????????????????
+otu_7	??????????????????????????????????????????????????????????????????????MVKAVAVLGS------SDGVKGTIFFTQEGDG---PTAVTGSVSGLKP-GLHGFHVHALG--------DTTNGCMSTGHDYNPASK-EHGAPED--ENRHAGDLGNVTAGADGVANINVTDSQIPLTGPNSIIGRAVVVHADPDDLGKG---GHELSKSTGNAGGRVACGIIGLQG??????????????????????????????????
+otu_8	????????????????????????????????????????????????????????M-------------STNAIAVLRG------DT-VSGIIRFKQDKEGL--PTTVTGEVKGLTP-GLHGFHIHQYG--------DTTNGCISAGPHFNPYNK-THGDRTD--EIRHVGDLGNIEAGADGTAHISISDQHIQLLGPNSIIGRSIVVHADQDDLGKGVGAKKDESLKTGNAGARVACGIVAIGAAS????????????????????????????????
+otu_9	???????????????????????????MINSFIVIFLSFLIFINYANLVYVEATHVYGRRSHSNGMHGNGARRAVAVLRG------DAGVSGIIYFQQDSGGS--ITTISGSVSGLTP-GLHGFHVHQYG--------DQTNGCTSAGGHYNPYGK-THGDPND--RIKHIGDLGNIVAGANGVAEVYINSYHIKLRGPLSVIGRSLVVHENPDDLGQGTGNMREESLKTGNAGSRLACAVIGIAAVS????????????????????????????????
+otu_10	??????????????????????????????????????????????????????????????????????MVKAVAVVRG------DSKVQGTVHFEQESESA--PTTISWEIEGNDPNALRGFHIHQFG--------DNTNGCTSAGPHFNPFGK-QHGAPED--DERHVGDLGNISTDGNGVAKGTKQDLLIKLIGKDSILGRTIVVHAGTDDYGKG---GFEDSKTTGHAGARPACGVIGLTQ??????????????????????????????????
+otu_11	??????????????????????????????????????????????????????????????????????MVKAVVVLGS------SEIVKGTIHFVQEGDG---PTTVTGSVSGLKP-GLHGFHIHALG--------DTTNGCMSTGPHYNPAGK-EHGAPED--ETRHAGDLGNVTAGEDGVANIHVVDSQIPLTGPNSIIGRAVVVHADPDDLGKG---GHELSKTTGNAGGRVACGIIGLQG??????????????????????????????????
+otu_12	??????????????????????????????????????????????????????????????????????MVKAVAVLAS------SEGVKGTIFFSQEGDG---PTSVTGSVSGLKP-GLHGFHVHALG--------DTTNGCMSTGPHFNPTGK-EHGAPQD--ENRHAGDLGNITAGADGVANVNVSDSQIPLTGAHSIIGRAVVVHADPDDLGKG---GHELSKTTGNAGGRVACGIIGLQG??????????????????????????????????
+otu_13	???????????????????????????????????????????????????????????????????????MKAVCVMTG------TAGVKGVVKFTQETDNG--PVHVHAEFSGLKA-GKHGFHVHEFG--------DTTNGCTSAGAHFNPTKQ-EHGAPED--SIRHVGDLGNVVAGADGNAVYNATDKLISLNGSHSIIGRTMVIHENEDDLGRG---GHELSKVTGNAGGRLACGVVGLAAE?????????????????????????????????
+otu_14	??????????????????????????????????????????????????????????????????????MVKAVAVLNS------SEGVSGTIFFTQEGDG---PTTVTGNLSGLKP-GLHGFHVHALG--------DTTNGCMSTGPHFNPVGK-EHGAPED--ENRHAGDLGNVTVGDDGTAAFTIIDFQIPLTGPHSIIGRAVVVHGDPDDLGKG---GHELSKTTGNAGGRVACGIIGLQG??????????????????????????????????
+otu_15	???????MAATNTILAFSSPSRLLIPPSSNPSTLRSSFRGVSLNNNNLHRLQSVSFAVKAPSKALTVVSAAKKAVAVLKG------TSDVEGVVTLTQDDSG---PTTVNVRITGLTP-GPHGFHLHEFG--------DTTNGCISTGPHFNPNNM-THGAPED--ECRHAGDLGNINANADGVAETTIVDNQIPLTGPNSVVGRAFVVHELKDDLGKG---GHELSLTTGNAGGRLACGMFKLSITMT???????????????????????????????
+otu_16	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSET--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVSITDSRITLFGADSIIGRTVVVHADADDLGKG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_17	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSET--PVKVSGEVCGLAK-CLHGFHVHEFG--------DNTNGCMSSGPHFNPHGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVSITDSKITLFGADSIIGRTVVVHADADDLGKG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_18	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSGT--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVNITDSKITLFGADSIIGRTVVVHADADDLGQG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_19	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSGT--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVNITDSKITLFGADSIIGRTVVVHADADDLGQG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_20	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSET--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVSITDSKITLFGADSIIGRTVVVHADADDLGKG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_21	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSET--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVSITDSKITLFGADSIIGRTVVVHADADDLGQG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_22	??????????????????????????????????MFMNLLTQVSNAIFPQVEAAQKM-------------SNRAVAVLRG------ET-VTGTIWITQKSEND--QAVIEGEIKGLTP-GLHGFHVHQYG--------DSTNGCISAGPHFNPFGK-THGGPKS--EIRHVGDLGNVEAGADGVAKIKLTDTLVTLYGPNTVVGRSMVVHAGQDDLGEGVGDKAEESKKTGNAGARAACGVIALAAPQ????????????????????????????????
+otu_23	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSGT--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVNITDSKITLFGADSIIGRTVVVHADADDLGQG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_24	MSSIKIEFAVQMRRGDESYAGALRSALDGVGQVEIDTQEGRVIIQTQRPWSEIQDKIEATGVRAVLSGFGGQSAVALINTTGSVVDKTPIQGVVRFTTITADKKPGVVVDGVVDGLSP-GLHGLHIHESG--------DTSAGCSSVGEHYNPRQS-PHGSPAAGAEERHAGDLGNIRADENGRATFRFVDPVLEVWD---IIGRAVVLTANADDLGRG---GNDQSLIDGNSGERIACGIIARSAGILENFKRICACDGVTLWDERNKPLAGKERSQKL
+otu_25	??????????????????????????????????????????????????????????????????????MAKGVAVLNS------SEGVTGTIFFTQEGDG---VTTVSGTVSGLKP-GLHGFHVHALG--------DTTNGCMSTGPHFNPDGK-THGAPED--ANRHAGDLGNITVGDDGTATFTITDCQIPLTGPNSIVGRAVVVHADPDDLGKG---GHELSLATGNAGGRVACGK???????????????????????????????????????
+otu_26	???????????????????????????????????MMQYLVVSLALCATICSAAQTRN-----------MPIQAIAYLIGPVQ-SDNTQVKGNVTFTQNDCGQ--NVHVRVQLEGLKE-GKHGFHIHEKG--------DLTNGCISMGAHYNPDKV-DHGGPDH--EVRHVGDLGNLEANSTGIIDVTYTDQVITLTGKLGIIGRGVVVHELEDDLGLG---NHTDSKKTGNAGGRIACGVIGIK???????????????????????????????????
+otu_27	????????????????????????????????????????????????????????????????MEAPRGNLRAVALIAG------DNNVRGCLQFVQDISG---TTHVTGKISGLSP-GFHGFHIHSFG--------DTTNGCISTGPHFNPLNR-VHGPPNE--EERHAGDLGNILAGSNGVAEILIKDKHIPLSGQYSILGRAVVVHADPDDLGKG---GHKLSKSTGNAGSRVGCGIIGLQSSADAKL????????????????????????????
+otu_28	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQEGEGC--PVKVTGEVTGLAK-GQHGFHVHEFG--------DNTNGCMSSGPHFNPYQK-EHGAPTD--ENRHLGDLGNIIANGDGPTPVNICDCKITLLGANSIIGRTVVVHADPDDLGKG---GHELSKTTGNAGARIGCGVIGIAKI?????????????????????????????????
+otu_29	?????????????????????????????????????????????????????????????????????MVVKAVCVING------DA--KGTVFFEQESSGT--PVKVSGEVCGLAK-GLHGFHVHEFG--------DNTNGCMSSGPHFNPYGK-EHGAPVD--ENRHLGDLGNIEATGDCPTKVNITDSKITLFGADSIIGRTVVVHADADDLGQG---GHELSKSTGNAGARIGCGVIGIAKV?????????????????????????????????
+otu_30	??????????????????????????????????????????????????????????????????????MVQAVAVLKG------DAGVSGVVKFEQASESE--PTTVSYEIAGNSPNAERGFHIHEFG--------DATNGCVSAGPHFNPFKK-THGAPTD--EVRHVGDMGNVKTDENGVAKGSFKDSLIKLIGPTSVVGRSVVIHAGQDDLGKG---DTEESLKTGNAGPRPACGVIGLTN??????????????????????????????????
+otu_31	??????????????????????????????????????????????????????????????????????MVRAVAVLRG------DSKVSGVVTFEQVDQNS--QVSVIVDLVGNDANAKRGFHIHQFG--------DNTNGCTSAGPHFNPEGK-THGDRTA--AVRHVGDLGNLESDAQGNIKTTFSDSVISLFGANSIIGRTIVIHAGEDDLGKG---TSEESLKTGNAGARNACGVIGIAV??????????????????????????????????
+otu_32	????????????????????????????????????MKTRVVLILALSVCIEAAS--------------EVIRARAYIFKAEAGKIPTELIGTIDFDQ--SGS--FLKLNGSVSGLAA-GKHGFHIHEKG--------DTGNGCLSAGGHYNPHKL-SHGAPDD--SNRHIGDLGNIESPASGDTLISVSDSLASLSGQYSIIGRSVVIHEKTDDLGRG---TSDQSKTTGNAGSRLACGTIGTV???????????????????????????????????
+;
+end;
+
+begin trees;
+	tree Cu_Zn_superoxide_dismutase = ((otu_24:0.9929,(((otu_26:0.68297,otu_32:0.55754)30:0.14493[0.82],otu_13:0.29491)29:0.06243[0.44],(((((otu_9:0.31741,otu_8:0.13498)27:0.20268[1],(otu_22:0.14579,otu_5:0.27311)28:0.2533[1])26:0.12655[0.98],((otu_30:0.28255,otu_10:0.25631)25:0.08358[0.91],(otu_31:0.3159,otu_4:0.1635)24:0.11954[0.97])23:0.17514[1])22:0.08988[0.77],(otu_2:0.49149,(otu_3:0.18945,(otu_28:0.11453,(((otu_17:0.00661,otu_16:0.00769)17:0.00497[0.92],(otu_21:0.004,otu_20:0.01012)18:0.0073[0.87])16:0.01271[0.88],(((otu_23:0.00836,otu_19:0.00552)21:0.00203[0.28],otu_18:0.01103)20:0.00398[0.7],otu_29:0.00595)19:0.00739[0.75])15:0.11795[1])14:0.11754[1])13:0.12932[1])12:0.10326[1])11:0.0712[0.9],(((((otu_7:0.05142,otu_11:0.09031)10:0.02799[0.98],otu_12:0.06915)9:0.05245[0.99],(otu_25:0.17064,otu_14:0.1075)8:0.08023[1])7:0.08596[1],otu_27:0.46052)6:0.06401[0.75],otu_15:0.42442)5:0.14252[0.94])4:0.15394[0.91])3:0.16508[0.85])2:0.16387[0.45],otu_1:1.03491,otu_6:1.06056)1;
+end;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/SwissProt.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/SwissProt.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/SwissProt.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,43 @@
+ID   1433_CANAL     STANDARD;      PRT;   264 AA.
+AC   O42766;
+DT   15-JUL-1998 (Rel. 36, Created)
+DT   01-MAR-2002 (Rel. 41, Last sequence update)
+DT   01-MAR-2002 (Rel. 41, Last annotation update)
+DE   14-3-3 protein homolog.
+GN   BMH1 OR BMH.
+OS   Candida albicans (Yeast).
+OC   Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
+OC   Saccharomycetales; mitosporic Saccharomycetales; Candida.
+OX   NCBI_TaxID=5476;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   STRAIN=SC5314;
+RA   Cognetti D., Devine D., Sturtevant J.;
+RT   "The Candida 14-3-3 gene (BMH1) is essential for growth.";
+RL   Submitted (JUN-2001) to the EMBL/GenBank/DDBJ databases.
+CC   -!- SIMILARITY: BELONGS TO THE 14-3-3 FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; AF038154; AAB96910.2; -.
+DR   HSSP; P29312; 1A38.
+DR   InterPro; IPR000308; 14-3-3.
+DR   Pfam; PF00244; 14-3-3; 1.
+DR   PRINTS; PR00305; 1433ZETA.
+DR   ProDom; PD000600; 14-3-3; 1.
+DR   SMART; SM00101; 14_3_3; 1.
+DR   PROSITE; PS00796; 1433_1; 1.
+DR   PROSITE; PS00797; 1433_2; 1.
+SQ   SEQUENCE   264 AA;  29480 MW;  192EC2FFEFD52BB6 CRC64;
+     MPASREDSVY LAKLAEQAER YEEMVENMKA VASSGQELSV EERNLLSVAY KNVIGARRAS
+     WRIVSSIEQK EEAKGNESQV ALIRDYRAKI EAELSKICED ILSVLSDHLI TSAQTGESKV
+     FYYKMKGDYH RYLAEFAIAE KRKEAADLSL EAYKAASDVA VTELPPTHPI RLGLALNFSV
+     FYYEILNSPD RACHLAKQAF DDAVADLETL SEDSYKDSTL IMQLLRDNLT LWTDLSEAPA
+     ATEEQQQSSQ APAAQPTEGK ADQE
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/T7.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/T7.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/T7.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,310 @@
+CLUSTAL W (1.82) multiple sequence alignment
+
+
+AF419503        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419511        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419509        ATGGCTAACGTAATTAAAACCGTTTTGACTTATCAGTTAGATGGCTCCAATCGTGATTTT
+AF419507        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419508        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419504        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419506        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419510        ATGGCTAACGTAATTAAAACCGTTTTGACTTACCAGTTAGATGGCTCCAATCGTGATTTT
+AF419505        ATGGCTAACGTAATTAAAACCGTTTTGACTTATCAGTTAGATGGCTCCAATCGTGATTTT
+                ******************************** ***************************
+
+AF419503        AATATCCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAC
+AF419511        AATATTCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGATAACTCTTATTGGTGTAGAC
+AF419509        AATATTCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAC
+AF419507        AATATCCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAC
+AF419508        AATATTCCATTTGAGTATCTAGCCCGTAAGTTCATAGTGGTAACTCTTATTGGTGTAGAC
+AF419504        AATATTCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAC
+AF419506        AATATTCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAC
+AF419510        AATATTCCGTTTGAATATCTAGCCCGTAAGTTCGTAGTGGTAACTCTTATTGGTGTAGAT
+AF419505        AATATCCCGTTTGAATATCTAGCCCGTAAGTTCATAGTGGTAACTCTTATTGGTGTAGAC
+                ***** ** ***** ****************** ***** ******************* 
+
+AF419503        CGAAAGATCCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419511        CGAAAGATCCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419509        CGAAAGATTCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419507        CGAAAGATCCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATCTCTCTG
+AF419508        CGAAAGATTCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419504        CGAAAGATCCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419506        CGAAAGATTCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+AF419510        CGAAAGATTCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTATTATCTCTCTG
+AF419505        CGAAAGATTCTTACGATTAATACAGACTATCGTTTTGCTACACGTACTACTATTTCTCTG
+                ******** **************************************** *** ******
+
+AF419503        ACAAAGGCTTGGGGTCCAGCCGATGGCTACATGACTATCGAGTTACGTCGAGTAACCTCC
+AF419511        ACAAAGGCTTGGGGTCCAGCCGATGGCTACACAACCATCGAGTTACGTCGAGTAACCTCC
+AF419509        ACAAAGGCTTGGGGTCCAGCCGATGGCTACATGACCATCGAGTTACGTCGAGTAACTTCC
+AF419507        ACAAAAGCTTGGGGTCCAGCCGATGGCTACACGACCATCGAGTTACGTCGAGTAACTTCC
+AF419508        ACAAAGGCTTGGGGTCCAACCGATGGCTACATGACTATCGAGTTACGTCGAGTAACCTCC
+AF419504        ACAAAGGCTTGGGGTCCAACCGATGGCTACATGACTATCGAGTTACGTCGAGTAACCTCC
+AF419506        ACAAAGGCTTGGGGTCCAGCCGATGGCTACATGACTATCGAGTTACGTCGAGTAACCTCC
+AF419510        ACAAAGGCTTGGGGTCCAACCGATGGTTATACGACCATCGAGTTACGTCGAGTAACCTCC
+AF419505        ACAAAGGCTTGGGGTCCAGCCGATGGCTACACGACCATCGAGTTACGTCGAATAACTTCC
+                ***** ************ ******* ** *  ** *************** **** ***
+
+AF419503        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGCGCATATGACCTTAAC
+AF419511        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATTCTCCGCGCATATGACCTTAAC
+AF419509        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGCGCATATGATCTTAAC
+AF419507        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATTCTCCGCGCGTATGATCTTAAC
+AF419508        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGTGCATATGATCTTAAC
+AF419504        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGTGCGTATGATCTTAAC
+AF419506        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGCGCGTATGATCTTAAC
+AF419510        ACTACCGACCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGCGCGTATGATCTTAAC
+AF419505        ACTACCGATCGATTGGTTGACTTTACGGATGGTTCAATCCTCCGCGCGTATGATCTTAAC
+                ******** ***************************** ***** ** ***** ******
+
+AF419503        GTCGCTCAAATTCAAACGATGCACGTAGCGGAAGAGGCCCGTAATCTCACTACGGACACT
+AF419511        GTCGCTCAGATTCAAACGATGCATGTAGCAGAAGAGGCCCGTAATCTCACTACGGATACT
+AF419509        GTTGCTCAAATTCAAACGATGCACGTAGCGGAAGAGGCCCGTAATCTCACTACGGATACT
+AF419507        GTTGCTCAGATTCAAACGATGCACGTAGCAGAAGAGGCTCGTAATCTCACTACGGACACT
+AF419508        GTCGCTCAGATTCAAACGATGCACGTAGCGGAAGAGGCCCGTGATCTCACTACAGATACT
+AF419504        GTCGCTCAGATTCAAACGATGCACGTAGCGGAAGAGGCCCGTGACCTCACTACGGATACT
+AF419506        GTCGCTCAGATTCAAACGATGCACGTAGCGGAAGAGGCCCGTGACCTCACTACGGATACT
+AF419510        GTCGCTCAGATTCAAACGATGCACGTAGCGAAAGAGGCCCGTGACCTCACTACGGATACT
+AF419505        GTCGCTCAGATTCAAACGATGCACGTAGCAGAAGAGGCTCGTAATCTCACTACGGATACT
+                ** ***** ************** *****  ******* *** * ******** ** ***
+
+AF419503        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTGTGAATCTAGCG
+AF419511        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTGTGAACCTAGCG
+AF419509        ATCGGTGTCAATAACGATGGTCATTTGGATGCTCGTGGTCGTCGAATTGTGAACCTAGCG
+AF419507        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTGTGAACTTAGCG
+AF419508        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTGTGAACTTAGCG
+AF419504        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTATGAACCTAGCG
+AF419506        ATCGGTGTCAATAACGATGGTCACTTAGATGCTCGTGGTCGTCGAATTGTGAACTTAGCG
+AF419510        ATCGGTGTCAATAACGATGGTCATTTGGATGCTCGTGGTCGTCGAATTGTGAACCTAGCG
+AF419505        ATCGGTGTCAATAACGATGGTCACTTGGATGCTCGTGGTCGTCGAATTGTGAACCTAGCG
+                *********************** ** ********************* ****  *****
+
+AF419503        AACGCCGTGGATGATCGCGATGTTGTTCCGTTTGGTCAACTAAAGACCATGAACCAGAAC
+AF419511        AACGCCGTGGACGACCGCGATGCTGTTTCGTTTGGTCAACTAAAGATCATGAACCAGAAT
+AF419509        AACGCCGTGGATGATCGTGATGCTGTTTCGTTTGGTCAACTAAAGACCATGAACCAGAAC
+AF419507        AACGCCGTGGATGATCGCGATGTTGTTCCGTTTGGTCAACTAAAGACCATGAACCAGAAC
+AF419508        AACGCCGTGGATGATCGCGATGTTGTTCCGTTTGGTCAACTAAAGACCATGAACCAGAAC
+AF419504        AACGCCGTGGACGATCGTGATGCTGTTTCGTTTGGTCAACTAAAGATCATGAACCAAAAC
+AF419506        AACGCCGTGGATGATCGCGATGCTGTTTCATTTGGTCAATTAAAGACCATGAACCAGAAC
+AF419510        AATGCCGTGGATGATCGCGATGTTGTTCCGTTTGGTCAACTAAAGACCATGAACCAGAAC
+AF419505        AACGCCGTGGATGATCGCGATGTTGTTCCATTTGGTCAACTAAAGACCATGAACCAGAAC
+                ** ******** ** ** **** **** * ********* ****** ********* ** 
+
+AF419503        TCATGGCAAGCACGTAATGAAACCTTACAGTTCCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419511        TCATGGCAAGCACGTAATGAAACCTTACAGTTCCGTAATGAGGCTGAGACTTTCAAAAAC
+AF419509        TCATGGCAAGCACGTAATGAAACCTTACAGTTTCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419507        TCATGGCAAGCACGTAATGAAACCTTACAGTTTCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419508        TCATGGCAAGCACGTAATGAAACCTTACAGTTTCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419504        TCATGGCAAGCACGTAATGAAACCTTACAGTTCCGTAATGAGACTGAGACTTTCAAAAAC
+AF419506        TCATGGCAAGCACGTAATGAAACCTTACAGTTTCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419510        TCATGGCAAGCACGTAATGAAACCTTACAGTTCCGTAATGAGGCTGAGACTTTCAGAAAC
+AF419505        TCATGGCAAGCACGTAATGAAACCTTACAGTTCCGTAATGAGGCTGAGACTTTCAGAAAC
+                ******************************** ********* ************ ****
+
+AF419503        CAAGCGGAGGACTTTAAGGACGAGTCCAGTATCAACGCTACGAACACAAAACAGTGGCGC
+AF419511        CAAGCGGAGGACTTTAAGAACGAGTCCAGTATCAACGCTACGAATACAAAGCAATGGTAT
+AF419509        CAAGCGGAGGGCTTTAAGGACGAGTCCAGTATCAACGCTACGAACACAAAGCAGTGGCGC
+AF419507        CAAGCGGAGAGCTTTAAGGACGAGTCCAGTACCAACGTTACGAACACAAAGCAGTGGCGC
+AF419508        CAAGCGGAGGGCTTTAAGGACGAGTCCAGTACCAACGCTACGAACACAAAGCAGTGGCGC
+AF419504        CAAGTGGAGGACTTTAAGAACGAGTCCAGTATCAACGCTACGAACACAAAACAGTGGCGC
+AF419506        CAAGCGGAGAGCTTTAAGGACGAGTCCAGTACCAACGCTACGAACACAAAGCAGTGGCGC
+AF419510        CAAGCGGAGGGCTTTAAGGACGAGTCCAGTACCAACGCTACGAACACAAAGCAGTGGCGC
+AF419505        CAAGCGGAGGGCTTTAAGGACGAGTCCAGTACCAACGCTACGAACACAAAGCAATGGCGC
+                **** ****  ******* ************ ***** ****** ***** ** ***   
+
+AF419503        GATGAAATCAAGGATTTTCGAGACGAAGCCAAGCGGTTCAAGAATACGGCTGATCAATAC
+AF419511        GATGAGATCAAGGATTTTCGAGACGAAGCCAAACGGTTCAAGAATACGGCTGATCAATAC
+AF419509        GATGAGATCAAGGATTTTCGAGACGAAGCCAAGCGGTTCAAGAATACGGCTGATCAATAC
+AF419507        GATGAAATCAAGAATTTCCGAGACGAAGCCAAGCGATTCAAGAATATGGCTGATCAATAC
+AF419508        GATGAAATCAAGGATTTCCGAGACGAAGCCAAGCGATTCAAGAATACGGCTGATCAATAC
+AF419504        GATGAAATCAAGGATTTCCGAGACGAAGCCAAGCGGTTCAAGAATACGGCTGATCAATAC
+AF419506        GATGAAATCAAGGATTTCCGAGACGAAGCCAAGCGATTCAAGAATACGGCTGATCAATAC
+AF419510        GATGAGATCAAAGGTTTTCGAGACGAAGCCAAGCGGTTCAAGAATACGGCTGATCAATAC
+AF419505        GATGAGATCAAGGATTTTCGAGACGAAGCCAAGCGGTTTAAAAATACGGCTGATCAATAC
+                ***** *****   *** ************** ** ** ** **** *************
+
+AF419503        GCTACATCTGCTGGGAACTCTGCTTCCGCTGCGTATCAATCTGAGGTAAACGCTGAGAAC
+AF419511        GCTACATCTGCTGGGAACTCTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAC
+AF419509        GCTACATCTGCTGGGAACTTTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAT
+AF419507        GCTACATCTGCTGAGAACTCTGCTTCCACTGCGCATCAATCTGAGGTAAACGCTGAGAAT
+AF419508        GCTACATCTGCTGGAAACTCTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAT
+AF419504        GCTACATCTGCTGGGAACTCTGCTTCCGCTGCGCATCAATCTGAGATAAACGCTGAGAAT
+AF419506        GCTACATCTGCTGGGAACTCTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAT
+AF419510        GCTACATCTGCTGGGAATTCTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAC
+AF419505        GCTACATCTGCTGAGAACTCTGCTTCCGCTGCGCATCAATCTGAGGTAAACGCTGAGAAC
+                *************  ** * ******* ***** *********** ************* 
+
+AF419503        TCTGCCATAGTATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGACCGTGCGGAA
+AF419511        TCTGCCACAGCATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGACCGTGCGAAA
+AF419509        TCTGCCACAGCATCCGCTAATTCTGCTCATTTGGCAGAACAGCAAGCAGACCGTGCGAAA
+AF419507        TCTGCCACAGCATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGACCGTGCGGAA
+AF419508        TCTGCCACAGCATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGACCGTGCGGAA
+AF419504        TTTGCCACAACATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGATCGTGCGGAA
+AF419506        TCTGCCACAGCATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGATCGTGCGGAA
+AF419510        TCTGCCACAACATCCACTAATTCTGCTCATTTGGCAAAACAGCAAGCAGACCGTGCGGAA
+AF419505        TCTGCCACAACATCCGCTAACTCTGCTCATTTGGCAGAACAGCAAGCAGATCGTGCGGAA
+                * ***** *  **** **** *************** ************* ****** **
+
+AF419503        CGTGAGGCAGACAAGTTGAAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419511        CGTGAGACAGACAAATTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419509        CGTGAGACAGACAAGCTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419507        CGTGAGGCAGATAAGCTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419508        CGTGAGGCAGACAAGCTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419504        CGTGAGGCAGACAAATTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419506        CGTGAGGCAGACAAATTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAGATAGAT
+AF419510        CGTGAGGCAGACAAGCTGAAAAATTACAATGGATTGGCTGGTGCAATTGATAAAATAGAT
+AF419505        CGTGAGGCAGACAAATTGGAAAATTACAATGGATTGGCTAGTGCAATTGATAAAATAGAT
+                ****** **** **  ** ******************** ************* ******
+
+AF419503        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTACATGACCACA
+AF419511        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTATATGACCACA
+AF419509        GGAACTAATGTGTACTGGAAAGGAAATATTCACGCTAACGGACGTCTTTACATGACCACA
+AF419507        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGACGTTTTTACATGACCACA
+AF419508        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTACATGACCACA
+AF419504        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAATGGGCGTCTTTACATGACCACA
+AF419506        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTACATGACCACA
+AF419510        GGAACCAATGTGTATTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTACATGACCACA
+AF419505        GGAACCAATGTGTACTGGAAAGGAAATATTCACGCTAACGGGCGTCTTTACATGACCACA
+                ***** ******** *********************** ** *** **** *********
+
+AF419503        AACGATTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+AF419511        AACGGTTTTGATTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+AF419509        AATGATTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTATTCT
+AF419507        AACGGTTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+AF419508        AACGGTTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTTACTAATCGTTACTCT
+AF419504        AACGATTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+AF419506        AACGATTTTGACTGTGGTCAGTATCAACAATTCTTTGGTGATGTTACTAATCGTTACTCT
+AF419510        AACGATTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+AF419505        AACGATTTTGACTGTGGCCAGTATCAACAGTTCTTTGGTGGTGTCACTAATCGTTACTCT
+                ** * ****** ***** *********** ********** *** *********** ***
+
+AF419503        GTCATGGAGTGGGGAGATGAGAACGGATGGTTGATGTATGTTCAACGTAGAGAGTGGACA
+AF419511        GTCATGGAATGGGGAGATGAGAATGGATGGCTGATGTATGTTCAACGTAGAGAATGGACA
+AF419509        GTCATGGAGTGGGGAGATGAGAACGGATGGTTGATGTATGTTCAACGTAGAGAGTGGACA
+AF419507        GTCATGGAGTGGGGAGATGAGAATGGATGGCTGATGTATGTTCAACGTAGAGAATGGACA
+AF419508        GTCATGGAGTGGGGAGATGAGAACGGATGGCTGATGTATGTTCAACGTAGAGAGTGGACA
+AF419504        GTCATGGAGTGGGGAGATGAGAACGGATGGTTGATGTATGTTCAACGTAGAGAGTGGACA
+AF419506        GTCATGGAATGGGGAGATAAGAACGGATGGTTGATGTATGTTCAACGTAGAGAATGGACA
+AF419510        GTCATGGAGTGGGGAGATGAGAACGGATGGTTGATGTATGTTCAACGTAGAGAATGGACA
+AF419505        GTCATGGAGTGGGGAGATGAGAACGGATGGTTGATGTATGTTCAACGTAGAGAGTGGACA
+                ******** ********* **** ****** ********************** ******
+
+AF419503        ACAGCGATAGGCGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACCCAAGGTGGA
+AF419511        ACAGCGATAGGTGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACCCAAGGTGGA
+AF419509        ACAGCGATAGGTGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACTCAAGGTGGA
+AF419507        ACAGCGATAGGTGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACTCAAGGTGGA
+AF419508        ACAGCAATAGGCGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACCCAAGGTGGA
+AF419504        ACAGCGATAGGCGATAACATTCAGTTAGTAGTAAATGGACAGATCATCACCCAAGGTGGA
+AF419506        ACAGCAATAGGCGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACCCAAGGTGGA
+AF419510        ACAGCGATAGGCAATAACATTCAGTTAGTAGTAAACGGACAGATTATCACCCAAGGTGGA
+AF419505        ACAGCGATAGGCGATAACATTCAGTTAGTAGTAAACGGACAGATCATCACCCAAGGTGGA
+                ***** *****  ********************** ******** ***** *********
+
+AF419503        GCCATGACTGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATTC
+AF419511        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATTC
+AF419509        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATCC
+AF419507        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATCC
+AF419508        GCCATGACCGGTCAGCTAAAATTGCAGAATGGACATGTTCTTCAATTAGAATCCGCATCC
+AF419504        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATCC
+AF419506        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATCC
+AF419510        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAATCCGCATCC
+AF419505        GCCATGACCGGTCAGCTAAAATTGCAGAATGGGCATGTTCTTCAATTAGAGTCCGCATCC
+                ******** *********************** ***************** ******* *
+
+AF419503        GACAAGGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419511        GACAAGGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419509        GACAAAGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419507        GACAAGGCACACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419508        GACAAGGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419504        GACAAGGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419506        GACAAGGCGCACTATATTCTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419510        GACAAGGCGCACTATATTTTATCTAAAGATGGTAACAGGAATAACTGGTACATTGGTAGA
+AF419505        GACAAGGCACACTATATTCTATCTAAAGATGGTAACAGGAATAATTGGTACATTGGTAGA
+                ***** ** ********* ************************* ***************
+
+AF419503        GGGTCAGATAACAACAATGACTGTACTTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419511        GGGTCAGATAACAACAATGACTGTACTTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419509        AGGTCAGATAACAACAATGACTGTATCTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419507        GGGTCAGATAACAACAATGACTGTACCTTCCATTCTTATGTACATGGTACGACCTTAACA
+AF419508        GGGTCAGATAACAACAATGACTGTACTTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419504        GGGTCAGATAACAACAATGACTGTACTTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419506        GGATCAGATAACAACAATGACTGTACTTTCCATTCTTATGTACATGGTACGACTTTAACA
+AF419510        GGGTCAAATAACAACAATGACTGTACCTTTCATTCTTATGTACATGGTACGACTTTAACA
+AF419505        GGGTCAGATAACAACAATGATTGTACCTTTCATTCTTATGTACATGATACAACTTTAACA
+                 * *** ************* ****  ** **************** *** ** ******
+
+AF419503        CTCAAGCAGGACTATGCAGTAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419511        CTCAAGCAGGACTATGCAATAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419509        CTCAAGCAGGACTATGCAGTAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419507        CTCAAGCAGGATTATGCAGTAGTTAACAAACATTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419508        CTCAAGCAGGACTATGCAATAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419504        CTCAAGCAGGACTATGCAGTAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419506        CTCAAGCAGGACTATGCAGTAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419510        CTCAAACAGGACTATGCAGTAGTTAACAAACACTTCCACGTAGGTCAGGCCGTTGTGGCC
+AF419505        CTCAAGCAGGACTATGCAGTAGTTAACAAACACTTTCACGTAGGTCAGGCTGTTGTGGCC
+                ***** ***** ****** ************* ** ************** *********
+
+AF419503        ACTGATGGTAATATTCAAGGTACTAAGTGGGGAGGTAAATGGCTGGATGCTTACCTACGT
+AF419511        ACTGATGGTAATATTCAAGGTACTAAATGGGGAGGTAAATGGCTGGATGCTTACTTACGT
+AF419509        ACTGATGGTAATATTCAAGGTACTAAGTGGGGAGGTAAATGGCTGGATGCTTACCTACGT
+AF419507        ACTGATGGTAATATTCAAGGTACTAAGTGGGAAGGTAAATGGTTGGATGTTTACCTACGT
+AF419508        ACTGATGGTAATATTCAAGGTACTAAGTGGGAAGGTAAATGGCTGGATGTTTACCTACGT
+AF419504        ACTGATGGTAATATTCAAGGTACTAAGTGGGGAGGTAAATGGCTGGATGCTTATCTACGT
+AF419506        ACTGATGGTAATATTCAAGGTACTAAGTGGGGAGGTAAATGGCTGGATGCTTACCTACGT
+AF419510        ACTGATGGTAATATTCAAGGTACTAAGTGGGGAGATAAATGGCTGGATGCTTACCTACGT
+AF419505        ACTGATGGTAATATTCAAGGTACTAAGTGGGAAGGTAAATGGTTGGATGCTTATCTACGT
+                ************************** **** ** ******* ****** ***  *****
+
+AF419503        GACAGCTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGAGCTGGCGGT
+AF419511        GACAGCTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGAGCTGGCGGT
+AF419509        GACAGCTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGAGCTGGCGGT
+AF419507        GATAGCTTCATTGCAAAGTCCAAGACATGGACTCAGGTGTGGTCTGGTAGGGCTGGCGGT
+AF419508        GACAACTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGGGCTGGCGGT
+AF419504        GACAGCTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGGGCTGGCGGT
+AF419506        GACAGCTTCGTTGCAAAGTCCAAGACGTGGACTCAGGTGTGGTCTGGTAGGGCTGGCGGT
+AF419510        GACAGCTTCGTTGCAAAGTCCAAAACGTGGACTCAGGTGTGGTCTGGTAGAGCTGGCGGT
+AF419505        GACAATTTCGTTGCGAAGTCCAAGGCGTGGACTCAAGTGTGGTCTGGTAGGGCTGGCGGT
+                ** *  *** **** ********  * ******** ************** *********
+
+AF419503        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+AF419511        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+AF419509        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+AF419507        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGTTTCCGCAATATCTGGATTAAGTGTGCC
+AF419508        GGGATAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+AF419504        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGTTTCCGCAATATCTGGATTAAGTGTGCC
+AF419506        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTTCGCAATATCTGGATTAAGTGTGCC
+AF419510        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+AF419505        GGGGTAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGTGCC
+                *** **************************** ** ************************
+
+AF419503        AACAATTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTATTTCATAGCCTCTGAT
+AF419511        AACAACTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTACTTCATAGCCTCTGAT
+AF419509        AACAATTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTACTTCATAGCCTCTGAT
+AF419507        AACAACTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTACTTCATAGCCTCTGAT
+AF419508        AACAACTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTACTTCATAGCCTCTGAT
+AF419504        AACAACTCTTGGAACTTCTTCCGTACTGGTCCCGATGGAATCTACTTCATAGCCTCTGAT
+AF419506        AACAACTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATCTACTTCATAGCTTCTGAT
+AF419510        AACAATTCTTGGAACTTCTTTCGTACTGGCCCCGATGGAATTTACTTCATAGCCTCTGAT
+AF419505        AACAATTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCTGAT
+                ***** ************** ******** *********** ** ******** ******
+
+AF419503        GGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419511        GGTGGATGGTTACGATTCCAAATACATTCCAACGGTCTCGGATTTAAGAATATTGCAGAC
+AF419509        GGTGGATGGTTACGATTTCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419507        GGTGGATGGTTACGATTTCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419508        GGTGGATGGTTACGATTTCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419504        GGTGGATGGTTACGATTCCAAATACATTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419506        GGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419510        GGTGGATGGTTACGATTTCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCAGAC
+AF419505        GGTGGATGGTTACGATTCCAAATACACTCCAACGGTTTTGGATTTAAGAATATTGCAGAC
+                ***************** ******** ********* * ***** ***************
+
+AF419503        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+AF419511        AGGCATTCAGTACCTAATGCAATCATGGTGGAAAACGAGTAA
+AF419509        AGTCATTCAGTACCTAATGCAATCATGGTGGAAAACGAGTAA
+AF419507        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+AF419508        AGTCATTCAGTACCTAATGCAATCATGGTGGAAAACGAGTAA
+AF419504        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+AF419506        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+AF419510        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+AF419505        AGTCATTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAA
+                ** ***************************** *********

Added: trunk/packages/bioperl/branches/upstream/current/t/data/Treebase-chlamy-dna.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/Treebase-chlamy-dna.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/Treebase-chlamy-dna.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,167 @@
+#NEXUS
+[File created by TreeBASE:  12/13/00  11:58:39]
+[Matrix accession#: M809]
+BEGIN DATA;
+DIMENSIONS  NTAX=14 NCHAR=629;
+
+[!This data set was downloaded from TreeBASE, a prototype relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, and UC Davis. Please do not remove this acknowledgment from the Nexus file. 
+TreeBASE © 1994-2000.
+
+Study reference: 
+Coleman, A. W., L. Jaenicke, and R. C. Starr. 2001. Genetics and sexual behavior of the pheromone producer, Chlamydomonas allensworthii (Chlorophyceae). J. Phycology 37, in press.
+
+Study accession number = S549
+Matrix accession number = M809
+]
+FORMAT DATATYPE = DNA MISSING = - GAP = # INTERLEAVE  ;
+MATRIX
+
+'Chlamydomonas_reinhardtii_Crein'         AATCTATCACAATCCACACCGCGAACTAACACTGTTGGCCTCCGTCTGTG-TAAAAGCA-----AACGGGCCAGGTCTGGGCGCAATGTAAAAGTTACGC
+'Chlamydomonas_allensworthii_Cat'         AATCTATCAATACCAACACCGAGAACCGATTCGTCTGACC-CCGTC-GTGGCGCAAGCG---ACGACGG-CGCTTGCCGATCCG----TAAATT-CGGAT
+'Chlamydomonas_allensworthii_21A'         AATCTATCAATACCAACACCGAGAACCAATTCGTCTGACC-CCGTC-GTGGCGCAAGCG---ACGACGG-CGCTTGCCGATCCG----TTTATT-CTGAT
+'Chlamydomonas_allensworthii_Neb'         AATCTATCAATACCAACACCGAGAACCGATTCGTCTGACC-CCGTC-GTGGCGCAAGCG---ACGACGG-CGCTTGCCGTTCCG----TTTATT-CGGAT
+'Chlamydomonas_allensworthii_Hon2'        AATCTATCAATACCAACACCGAGAACCGATTCGTCTGACC-CCGTC-GTGGCGCAAGCG---ACGACGG-CGCTTGCCGATCCG----TTTATT-CGGAT
+'Chlamydomonas_allensworthii_Hon9'        AATCTATCAATACCAACACCGAGAACCAATTCGTCTGACC-CTGTT-GATGCGCAAGCA---GCAACGG-CGCTTGCCGATGCG----TTAACT-CGGGT
+'Chlamydomonas_allensworthii_Chile'       AATCTATCAATAACAACACCGAGAACCATTTCGTCTGACC-CT-TTATAAGCCTTTGGGTTTATAACGG-CGCCTGTTGTTTGG----TTAATT-CCAAG
+'Chlamydomonas_allensworthii_Flam'        AATCTATCAATAACAACACCGAGAACCATTTCGTCTGACC-CT-TTATAAGCCTTTGGGTTTATAACGG-CGCCTGTTGTTTGG----TTAATT-CCAAG
+'Chlamydomonas_allensworthii_88.10'       AATCTATCAATAACAACACCGAGAACCATTTCGTCTGACC-CTTTTATAAGCCTTTGGGTTTATAACGG-CGCCTGTTGTTTGG----TTAATT-CCAAG
+'Chlamydomonas_allensworthii_266'         AATCTATCAATAACAACACCGAGAACCATTTCGTCTGACC-CTTTTATAAGCCTTTGGGTTTATAACGG-CGCCTGTTGTTTGG----TTAATT-CCAAG
+'Chlamydomonas_allensworthii_Krueger'     AATCTATCAATAACAACACCGAGAACCATTTCGTCTGACC-CTTTTATAAAC-TTCGGGTTTATAACGG-CGCCTGTTGTTTGG----TTAATT-CTAAG
+'Chlamydomonas_allensworthii_LCA'         AATCTATCAATACCAACACCGAGAACCATTTCGTCTGACC-CTCCTAT--GCTTTCGGG--TATAGAAG-CGCTCGGAGCCTAG----TTAATT-CTAAG
+'Chlamydomonas_allensworthii_LCH'         AATCTATCAATACCAACACCGAGAACCATTTCGTCTGACC-CTTCTAT--GCTTTCGGG--TATAGAGG-CGCTCGTTGCCTAG----TTAATT-CTAAG
+'Chlamydomonas_allensworthii_LCN'         AATCTATCAATACCAACACCGAGAACCATTTCGTCTGACC-CTTCTAT--GCTTTCGGG--TATAGAGG-CGCTTGGAGCCTAG----TTAATT-CTAAG
+
+
+'Chlamydomonas_reinhardtii_Crein'         CTGGCCTGGGT------TGCCGCAA------GGCATCGGTCTCTTATACTAACCAACCAACACCAAACCAAAACTAAATTAAAACCGAGTATCTAGCTTA
+'Chlamydomonas_allensworthii_Cat'         CGGCGATCGGGCTCCGGGGCCTCAAAACCCTAGGGTCGGTCTCT----GTCCAACAACAACACCAAACCAATACCTCTAACTAAAC-TGCATTCGGCTC-
+'Chlamydomonas_allensworthii_21A'         CGGCGATCGGGCTCCGGGGCCTCAAAACCCTAGGGTCGGTCTCT----GTCCAACAACAACACCAAACCAATACCTCTAACTAAAC-TGCATTCGGCTC-
+'Chlamydomonas_allensworthii_Neb'         CGGCGATCGGGCTCCGGGGCCTCAAAACCCTAGGGTCGGTCTAT----GTCCAACAACAACACCAAACCAATACCTCTAACTAAAC-TGCATTCGGCTC-
+'Chlamydomonas_allensworthii_Hon2'        CGGCGATCGGGCTCCGGGGCCTCAAAACCCTAGGGTCGGTCTAT----GTCCAACAACAACACCAAACCAATACCTCTAACTAAAC-TGCATTCGGCTC-
+'Chlamydomonas_allensworthii_Hon9'        CGGTGATCGGGCTCTAGGGCTTAACAACCCTAGGGTCGGTCCCT----GTCCAACAACAACACCAAACCAATACCTCTAACTAAAC-TGCATTCGGCTC-
+'Chlamydomonas_allensworthii_Chile'       CACCAGGTGGGCTC---GGCTT-------GCCGAGTTGGTCTCT----GTCCAACAACAACACCAAACCAATAATTCTAATTATCC-TGCATTCGGCAT-
+'Chlamydomonas_allensworthii_Flam'        CACCAGGTGGGCTC---GGCTT-------GCCGAGTTGGTCTCT----GTCCAACAACAACACCAAACCAATAATTCTAATTATCC-TGCATTCGGCAT-
+'Chlamydomonas_allensworthii_88.10'       CACCAGGTGGGCTC---GGCTT-------GCCGAGTTGGTCTCT----GTCCAACAACAACACCAAACCAATAATTCTAATTATCC-TGCATTCGGCAT-
+'Chlamydomonas_allensworthii_266'         CACCAGGTGGGCTC---GGCTT-------GCCGAGTTGGTCTCT----GTCCAACAACAACACCAAACCAATAATTCTAATTATCC-TGCATTCGGCAT-
+'Chlamydomonas_allensworthii_Krueger'     CACCAGGTGGGCTC---GGCTT-------GCCGAGTTGGTCTCT----GTCCAACAACAACACCAAACCAATAATTCTAATTATCC-TGCATTCGGCAT-
+'Chlamydomonas_allensworthii_LCA'         CTCCGGGTGGGCTC---GGCTTC-----GGCTGGGTTGGTTCCT----GTCCAACAACAACACCAAACAAATAATTCTAAATAATC-TGCATTCGGCA--
+'Chlamydomonas_allensworthii_LCH'         CTCCGGGTGGGCTC---AGCTTC-----GGCTGGGTTGGTTCTT----GTCCAACAACAACACCAAACAAATAATTCTAAATAATC-TGCATTCGGCA--
+'Chlamydomonas_allensworthii_LCN'         CTCCGGGTGGGCTC---GGCTTC-----GGCTGGGTTGGTCCTT----GTCCAACAACAACACCAAACAAATAATTCTAAATAATC-TGCATTCGGCA--
+
+
+'Chlamydomonas_reinhardtii_Crein'         GAGCTAGTGCTCACTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGGATCGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Cat'         -AGCCGGTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_21A'         -AGCCGGTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Neb'         -AGCCGGTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Hon2'        -AGCCGGTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Hon9'        -AGCCGGTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Chile'       -AGCCGATGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Flam'        -AGCCGATGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_88.10'       -AGCCGATGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_266'         -AGCCGATGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_Krueger'     -AGCCGATGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_LCA'         -ATCGGCTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_LCH'         -ATCGGCTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+'Chlamydomonas_allensworthii_LCN'         -ATCGGCTGC-ATCTAACCAAGA-CAACTCTCAACAACGGATATCTTGGCTCTCGCAACGATGAAGAACGCAGCGAAATGCGATACGTAGTGTGAATTGC
+
+
+'Chlamydomonas_reinhardtii_Crein'         AGAAATACGTGAATCATCGAATCTTTGAACGCATATTGCGCTCGAGGCTTCGGCCAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCCCTA-CT
+'Chlamydomonas_allensworthii_Cat'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_21A'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Neb'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Hon2'        AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Hon9'        AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Chile'       AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Flam'        AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_88.10'       AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_266'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_Krueger'     AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_LCA'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATA
+'Chlamydomonas_allensworthii_LCH'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATT
+'Chlamydomonas_allensworthii_LCN'         AGAAAACCGTGAACCATCGAATCTTTGAACGCATATTGCGCTCGACCCTTCGGGGAAGAGCATGTCTGCCTCAGCGTCGGGTT-AATACTCGCTCCAATA
+
+
+'Chlamydomonas_reinhardtii_Crein'         CCAACATACACTTGTGTGTTTGGAGCAAGAGCGGACCTGGCTGTCTCGGTGTTTGATTTTCGGATCAGACGCCGGGTCAGCTGAAGTACAGAGGTTGATG
+'Chlamydomonas_allensworthii_Cat'         CCACA--------------------TTGGAACGGATCTGGCAGTCTCAGTCTGC------------AAAGACTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_21A'         CCACA--------------------TTGGAACGGATCTGGCAGTCTCAGTTTGC------------AAAGACTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Neb'         CCACA--------------------TTGGAACGGATCTGGCAGTCTCAGTTTGC------------AAAGACTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Hon2'        CCACA--------------------TTGGAACGGATCTGGCAGTCTCAGTTTGC------------AAAAACTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Hon9'        CCACA--------------------TTGGAACGGATCTGGCAGTCTCGGTTTGC------------AAAAACCGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Chile'       CCCCAA-------------------TTGGAACGGATCTGGCAGTCTCAGCTTTTTC--------AATAGAGCTGGGCCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Flam'        CCCCAA-------------------TTGGAACGGATCTGGCAGTCTCAGCTTTTTC--------AATAGAGCTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_88.10'       CCCCAA-------------------TTGGATCGGATCTGGCAGTCTCAGCTTTTTC--------AATAGAGCTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_266'         CCCCAA-------------------TTGGAACGGATCTGGCAGTCTCAGCTTTTTC--------AATAGAGCTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_Krueger'     CCCCAA-------------------TTGGAACGGATCTGGCAGTCTCAGCTTTTTC--------AATAGAGCTGGGTCTGCTGAAGTGCAGAGGTTGATG
+'Chlamydomonas_allensworthii_LCA'         CCACA--------------------TTGGAACGGACCTGGCAGTCTCAGTTCTTTG--------ATTAGAGCTGGGTCTGCTGAAGTGCAGAGATTGATG
+'Chlamydomonas_allensworthii_LCH'         CCACA--------------------TTGGAACGGACCTGGCAGTCTCAGTTCTTTT--------AATAGAGCTGGGTCTGCTGAAGTGCAGAGATTGATG
+'Chlamydomonas_allensworthii_LCN'         CCACA--------------------TTGGAACGGACCTGGCAGTCTCAGTTCTTTG--------ATTAGAGCTGGGTCTGCTGAAGTGCAGAGATTGATG
+
+
+'Chlamydomonas_reinhardtii_Crein'         CATGGACCCGCTTATGGGCCTCTACTGGGTAGGCAACTCGTTGCTAATGCTTTAGTAGATGGCTTGGAGCTGTGCTTGTCGACCCAAACCAGGAACTTTG
+'Chlamydomonas_allensworthii_Cat'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACT-
+'Chlamydomonas_allensworthii_21A'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_Neb'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACT-
+'Chlamydomonas_allensworthii_Hon2'        TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACT-
+'Chlamydomonas_allensworthii_Hon9'        TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACA-
+'Chlamydomonas_allensworthii_Chile'       TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_Flam'        TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_88.10'       TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_266'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_Krueger'     TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGACCCTCAACAGGAAAACC-
+'Chlamydomonas_allensworthii_LCA'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGATCCTCAACAGGAAAAC--
+'Chlamydomonas_allensworthii_LCH'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGATCCTCAACAGGAAAAC--
+'Chlamydomonas_allensworthii_LCN'         TA-GGACCCGC-TATGGGCCGCAACTGGGTAGGCA-CTCGT-GCTAATTCTT-AGTTGTTGGCTTGGGACT-TGCT-GTCGATCCTCAACAGGAAAAC--
+
+
+'Chlamydomonas_reinhardtii_Crein'         GCCCTGTGCCGAAGCAAACCCCTATTTTC
+'Chlamydomonas_allensworthii_Cat'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_21A'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Neb'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Hon2'        ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Hon9'        ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Chile'       ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Flam'        ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_88.10'       ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_266'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_Krueger'     ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_LCA'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_LCH'         ---------------ATTAACCTATTTTC
+'Chlamydomonas_allensworthii_LCN'         ---------------ATTAACCTATTTTC
+;
+END;
+
+BEGIN CODONS;
+CODONPOSSET * CodonPositions = 
+ N: 1 - 629 ; 
+ CODESET * UNTITLED = Universal: all ; 
+ END;
+BEGIN ASSUMPTIONS;
+OPTIONS DEFTYPE = unord PolyTcount = MINSTEPS ; 
+ END;
+BEGIN TREEBASE;
+END;
+
+BEGIN TREES;
+
+[! 1 trees. TreeBASE accession#: Tree1641 ]
+	TRANSLATE
+		1	'Chlamydomonas_allensworthii_Krueger',
+		2	'Chlamydomonas_allensworthii_88.10',
+		3	'Chlamydomonas_allensworthii_Chile',
+		4	'Chlamydomonas_allensworthii_Flam',
+		5	'Chlamydomonas_allensworthii_Hon9',
+		6	'Chlamydomonas_allensworthii_Hon2',
+		7	'Chlamydomonas_allensworthii_LCN',
+		8	'Chlamydomonas_allensworthii_LCH',
+		9	'Chlamydomonas_allensworthii_LCA',
+		10	'Chlamydomonas_allensworthii_266',
+		11	'Chlamydomonas_allensworthii_Neb',
+		12	'Chlamydomonas_allensworthii_21A',
+		13	'Chlamydomonas_allensworthii_Cat',
+		14	'Chlamydomonas_reinhardtii_Crein',
+		15	'Chlamydomonas_allensworthii',
+		16	'Chlamydomonas',
+	;
+	TREE 'Fig._2' =  [&R] (((((7,9),8),(1,(10,2,4,3))),(5,((6,11),12,13))),14);
+
+
+END;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/U58726.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/U58726.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/U58726.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,785 @@
+LOCUS       U58726                 31931 bp    DNA     linear   INV 
+DEFINITION  Caenorhabditis elegans cosmid T01C8, complete sequence.
+ACCESSION   U58726
+KEYWORDS    HTG.
+SOURCE      Caenorhabditis elegans.
+  ORGANISM  Caenorhabditis elegans
+            Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+            Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
+REFERENCE   1  (bases 1 to 31931)
+  AUTHORS   Waterston,R.
+  TITLE     Genome sequence of the nematode C. elegans: a platform for
+            investigating biology. The C. elegans Sequencing Consortium
+  JOURNAL   Science 282 (5396), 2012-2018 (1998)
+  MEDLINE   99069613
+   PUBMED   9851916
+REFERENCE   2  (bases 1 to 31931)
+  AUTHORS   Wohldmann,P. and Hawkins,J.
+  TITLE     The sequence of C. elegans cosmid T01C8
+  JOURNAL   Unpublished (2001)
+REFERENCE   3  (bases 1 to 31931)
+  AUTHORS   Waterston,R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (19-MAY-1996) Robert Waterston
+REFERENCE   4  (bases 1 to 31931)
+  AUTHORS   Waterston,R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (25-JUL-2001) Department of Genetics, Washington
+            University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+            Louis, MO 63110, USA
+REFERENCE   5  (bases 1 to 31931)
+  AUTHORS   Waterston,R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (23-MAY-2002) Department of Genetics, Washington
+            University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+            Louis, MO 63110, USA
+REFERENCE   6  (bases 1 to 31931)
+  AUTHORS   Waterston,R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (29-JUN-2002) Department of Genetics, Washington
+            University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+            Louis, MO 63110, USA
+COMMENT     Submitted by: Genome Sequencing Center Department of Genetics,
+            Washington University St. Louis , MO 63110, USA, and Sanger
+            Centre, Hinxton Hall Cambridge CB10 IRQ, England email:
+            rw at nematode.wustl.edu and jes at sanger.ac.uk NOTICE: This sequence
+            may not be the entire insert of this clone. It may be shorter
+            because we only sequence overlapping sections once, or longer
+            because we provide a small overlap between neighboring
+            submissions. This sequence was finished as follows unless
+            otherwise noted: all regions were double stranded, sequenced with
+            an alternate chemistry or covered by high quality data (i.e.,
+            phred quality >= 30); an attempt was made to resolve all
+            sequencing problems, such as compressions and repeats; all regions
+            were covered by sequence from more than one m13 subclone. For a
+            graphical representation of this cosmid sequence and its analysis
+            see:
+            http://www.wormbase.org/db/seq/sequence?name=T01C8;class=Sequence
+            NEIGHBORING COSMID INFORMATION The 5' cosmid is EGAP8, 200 bp
+            overlap; the 3' cosmid is F41G4, 2000 bp overlap. Actual start of
+            this cosmid is at base position 1 of T01C8; actual end is at 10488
+            of F41G4. NOTES: Coding seqences below are the result of
+            integration and manual review of the following data : computer
+            analysis using the program Genefinder (P. Green and L. Hillier,
+            personal communication), the large scale EST projects of Yuji
+            Kohara (http://www.ddbj.nig.ac.jp/c-elegans/html/CE_INDEX.html)
+            and The C. elegans ORFeome cloning project
+            (http://worfdb.dfci.harvard.edu/), similarity to other proteins
+            from BlastX analyses (http://blast.wustl.edu/), sequence
+            conservation with C. briggsae using Jim Kent's WABA alignment
+            program (Genome Research 10:1115-1125, 2000), individual C.
+            elegans GenBank submissions, and personal communications with C.
+            elegans researchers. tRNAs are predicted using the program
+            tRNAscan-SE (Lowe, T.M. and Eddy, S.R., 1997, Nucl. Acids. Res.,
+            25, 955-964).
+FEATURES             Location/Qualifiers
+     source          1..31931
+                     /chromosome="X"
+                     /clone="T01C8"
+                     /strain="Bristol N2"
+                     /organism="Caenorhabditis elegans"
+                     /db_xref="taxon:6239"
+     gene            complement(5778..6881)
+                     /gene="T01C8.3"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.3;class
+                     =Sequence"
+     CDS             join(complement(5778..5951),complement(5996..6076),
+                     complement(6123..6219),complement(6268..6317),
+                     complement(6359..6509),complement(6555..6631),
+                     complement(6783..6881))
+                     /product="Hypothetical protein T01C8.3"
+                     /gene="T01C8.3"
+                     /protein_id="AAB00575.1"
+                     /codon_start=1
+                     /translation="MLNHSLPEVWKKTAVSEKNGLQFQKIIVFCPTHGCFTSPTDLPL
+                     GCSTNSRGSIFCICNSTDYCNEMTNVKEEKNITYLICEYAKDSMFRGADCVQPWCVKT
+                     ASSYMDEMVECGEGTYEMEMYDIGFVYSGMLLPINSCYAVADDSRYDKSQICTYKVNK
+                     TTPYKLKVPGSTKCFAPGEVMTRMKNSTCIGQFCYSASAVFGCISQFNREGAILKVTI
+                     FHFEILNKNNNICLTIRNFCKKKL"
+                     /db_xref="GI:1326251"
+     gene            11238..12262
+                     /gene="T01C8.2"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.2;class
+                     =Sequence"
+     CDS             join(11238..11384,12167..12262)
+                     /product="Hypothetical protein T01C8.2"
+                     /gene="T01C8.2"
+                     /protein_id="AAB00576.1"
+                     /codon_start=1
+                     /translation="MPKQEFNPLDYTGPLIVGAIFCVFLFVISFFVINFFCITKYDDI
+                     TKFELMGGKYGWRLGPHPLIVVKKGGFVAEEEVDDA"
+                     /db_xref="GI:1326252"
+                     /note="coded for by the following C. elegans cDNAs:
+                     yk725a4.3"
+     gene            complement(12416..14009)
+                     /gene="T01C8.4"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.4;class
+                     =Sequence"
+     CDS             join(complement(12416..12555),complement(12785..13093),
+                     complement(13138..13358),complement(13606..14009))
+                     /product="Hypothetical protein T01C8.4"
+                     /gene="T01C8.4"
+                     /protein_id="AAB00577.1"
+                     /codon_start=1
+                     /translation="MRLSFFDGIHVASPIKELHTSELFQKEICPVKINLAIEAYRTED
+                     GEPWVLPVVREIELKFPHEPHHNHEYLPILGHDGFCKSATALLLGNDSLAIKEGRSFS
+                     VQCISGTGAICVGAEFLAQVLSMKTIYVSNPCCLCYNPTGMDPTREQWIQMAQVIKQK
+                     NLFTFFHIADQGLASGDADADAWAVRFFVEQGLEMIVSQSFSKNFGLYNDRVGSLTVI
+                     VNKPSHIANLKSQLTLVNVSNFSNPPAYGARIVHEILKSPKYREQWQNSIKMMAFRIK
+                     KTRQELIRELNMLQTSGKWDRITQQSGLFSYTGLTPCQVDHLIAHHKIYLLSDGRINI
+                     CGLNMSNLDYVARAIDDTVRTIH"
+                     /db_xref="GI:1326253"
+                     /note="strong similarity to cytoplasmic aspartate
+                     aminotransferase; coded for by the following C. elegans
+                     cDNAs: cm20g8"
+     gene            complement(16309..17896)
+                     /gene="T01C8.5"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.5;class
+                     =Sequence"
+     CDS             join(complement(16309..16457),complement(16507..16815),
+                     complement(16863..17233),complement(17499..17896))
+                     /product="Hypothetical protein T01C8.5"
+                     /gene="T01C8.5"
+                     /protein_id="AAB00578.1"
+                     /codon_start=1
+                     /translation="MSFFDGIPVAPPIEVFHKNKMYLDETAPVKVNLTIGAYRTEEGQ
+                     PWVLPVVHETEVEIANDTSLNHEYLPVLGHEGFRKAATELVLGAESPAIKEERSFGVQ
+                     CLSGTGALRAGAEFLASVCNMKTVYVSNPTWGNHKLVFKKAGFTTVADYTFWDYDNKR
+                     VHIEKFLSDLESAPEKSVIILHGCAHNPTGMDPTQEQWKLVAEVIKRKNLFTFFDIAY
+                     QGFASGDPAADAWAIRYFVDQGMEMVVSQSFAKNFGLYNERVGNLTVVVNNPAVIAGF
+                     QSQMSLVIRANWSNPPAHGARIVHKVLTTPARREQWNQSIQAMSSRIKQMRAALLRHL
+                     MDLGTPGTWDHIIQQIGMFSYTGLTSAQVDHLIANHKVFLLRDGRINICGLNTKNVEY
+                     VAKAIDETVRAVKSNI"
+                     /db_xref="GI:1326254"
+                     /note="strong similarity to cytoplasmic aspartate
+                     aminotransferase; coded for by the following C. elegans
+                     cDNAs: yk204c10.3, yk327b7.3, yk503d6.3, cm20g8,
+                     yk204c10.5, yk327b7.5, yk468h3.3, yk468h3.5"
+     gene            20873..27634
+                     /gene="T01C8.1"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.1a;clas
+                     s=Sequence"
+     CDS             join(20873..21015,24491..24654,24830..25298,25473..25591,
+                     25817..25922,26012..26117,26167..26374,26476..26787,
+                     27198..27349,27533..27634)
+                     /product="Hypothetical protein T01C8.1b"
+                     /gene="T01C8.1"
+                     /protein_id="AAM69096.1"
+                     /codon_start=1
+                     /translation="MFSHQDRDRDRKEDGGGDGTEMKSKSRSQPSGLNRVKNLSRKLS
+                     AKSRKERKDRDSTDNSSKMSSPGGETSTKQQQELKAQIKIGHYILKETLGVGTFGKVK
+                     VGIHETTQYKVAVKILNRQKIKSLDVVGKIRREIQNLSLFRHPHIIRLYQVISTPSDI
+                     FMIMEHVSGGELFDYIVKHGRLKTAEARRFFQQIISGVDYCHRHMVVHRDLKPENLLL
+                     DEQNNVKIADFGLSNIMTDGDFLRTSCGSPNYAAPEVISGKLYAGPEVDVWSCGVILY
+                     ALLCGTLPFDDEHVPSLFRKIKSGVFPTPDFLERPIVNLLHHMLCVDPMKRATIKDVI
+                     AHEWFQKDLPNYLFPPINESEASIVDIEAVREVTEFQRYHVAEEEVTSALLGDDPHHH
+                     LSIAYNLIVDNKRIADETAKLSIEEFYQVTPNKGPGPVHRHPERIAASVSSKITPTLD
+                     NTEASGANRNKRAKWHLGIRSQSRPEDIMFEVFRAMKQLDMEWKVLNPYHVIVRRKPD
+                     APAADPPKMSLQLYQVDQRSYLLDFKSLADEESGSASASSSRHASMSMPQKPAGIRGT
+                     RTSSMPQAMSMEASIEKMEVHDFSDMSCDVTPPPSPGGAKLSQTMQFFEICAALIGTL
+                     AR"
+                     /db_xref="GI:21629470"
+                     /note="coded for by the following C. elegans cDNAs:
+                     yk237e11.5"
+     CDS             join(20873..21015,24491..24654,24830..25298,25473..25591,
+                     25817..25922,26012..26117,26173..26374,26476..26787,
+                     27198..27349,27533..27634)
+                     /product="Hypothetical protein T01C8.1a"
+                     /gene="T01C8.1"
+                     /protein_id="AAM69095.1"
+                     /codon_start=1
+                     /translation="MFSHQDRDRDRKEDGGGDGTEMKSKSRSQPSGLNRVKNLSRKLS
+                     AKSRKERKDRDSTDNSSKMSSPGGETSTKQQQELKAQIKIGHYILKETLGVGTFGKVK
+                     VGIHETTQYKVAVKILNRQKIKSLDVVGKIRREIQNLSLFRHPHIIRLYQVISTPSDI
+                     FMIMEHVSGGELFDYIVKHGRLKTAEARRFFQQIISGVDYCHRHMVVHRDLKPENLLL
+                     DEQNNVKIADFGLSNIMTDGDFLRTSCGSPNYAAPEVISGKLYAGPEVDVWSCGVILY
+                     ALLCGTLPFDDEHVPSLFRKIKSGVFPTPDFLERPIVNLLHHMLCVDPMKRATIKDVI
+                     AHEWFQKDLPNYLFPPINESEASIVDIEAVREVTERYHVAEEEVTSALLGDDPHHHLS
+                     IAYNLIVDNKRIADETAKLSIEEFYQVTPNKGPGPVHRHPERIAASVSSKITPTLDNT
+                     EASGANRNKRAKWHLGIRSQSRPEDIMFEVFRAMKQLDMEWKVLNPYHVIVRRKPDAP
+                     AADPPKMSLQLYQVDQRSYLLDFKSLADEESGSASASSSRHASMSMPQKPAGIRGTRT
+                     SSMPQAMSMEASIEKMEVHDFSDMSCDVTPPPSPGGAKLSQTMQFFEICAALIGTLAR
+                     "
+                     /db_xref="GI:21629469"
+                     /note="strong similarity to 5'-AMP-activated protein
+                     kinase; coded for by the following C. elegans cDNAs:
+                     yk443g12.5, yk298g7.5, yk273e10.5, yk390b1.5, yk114d1.3,
+                     yk114d1.5, yk133b5.3, yk133b5.5, yk222d7.3, yk222d7.5,
+                     yk273e10.3, yk390b1.3, yk402f12.3, yk402f12.5"
+     gene            complement(28176..31889)
+                     /gene="mec-4"
+                     /note="for a graphical representation of this gene see:
+                     ttp://www.wormbase.org/db/seq/sequence?name=T01C8.7;class
+                     =Sequence"
+     CDS             join(complement(28176..28361),complement(28501..28576),
+                     complement(28625..28753),complement(28811..28871),
+                     complement(28916..29030),complement(29154..29249),
+                     complement(29301..29473),complement(29567..29655),
+                     complement(29698..29855),complement(29903..30051),
+                     complement(30149..30309),complement(30526..30637),
+                     complement(30690..31052),complement(31100..31355),
+                     complement(31716..31889))
+                     /product="C. elegans MEC-4 protein (corresponding
+                     sequence T01C8.7)"
+                     /gene="mec-4"
+                     /protein_id="AAB00580.2"
+                     /codon_start=1
+                     /translation="MQNLKNYQHLRDPSEYMSQVYGDPLAYLQETTKFVTEREYYEDF
+                     GYGECFNSTESEVQCELITGEFDPKLLPYDKRLAWHFKEFCYKTSAHGIPMIGEAPNV
+                     YYRAVWVVLFLGCMIMLYLNAQSVLDKYNRNEKIVDIQLKFDTAPFPAITLCNLNPYK
+                     ASLATSVDLVKRTLSAFDGAMGKAGGNKDHEEEREVVTEPPTTPAPTTKPARRRGKRD
+                     LSGAFFEPGFARCLCGSQGSSEQEDKDEEKEEELLETTTKKVFNINDADEEWDGMEEY
+                     DNEHYENYDVEATTGMNMMEECQSERTKFDEPTGFDDRCICAFDRSTHDAWPCFLNGT
+                     WETTECDTCNEHAFCTKDNKTAKGHRSPCICAPSRFCVAYNGKTPPIEIWTYLQGGTP
+                     TEDPNFLEAMGFQGMTDEVAIVTKAKENIMFAMATLSMQDRERLSTTKRELVHKCSFN
+                     GKACDIEADFLTHIDPAFGSCFTFNHNRTVNLTSIRAGPMYGLRMLVYVNASDYMPTT
+                     EATGVRLTIHDKEDFPFPDTFGYSAPTGYVSSFGLRLRKMSRLPAPYGDCVPDGKTSD
+                     YIYSNYEYSVEGCYRSCFQQLVLKECRCGDPRFPVPENARHCDAADPIARKCLDARMN
+                     DLGGLHGSFRCRCQQPCRQSIYSVTYSPAKWPSLSLQIQLGSCNGTAVECNKHYKENG
+                     AMVEVFYEQLNFEMLTESEAYGFVNLLADFGGQLGLWCGISFLTCCEFVFLFLETAYM
+                     SAEHNYSLYKKKKAEKAKKIASGSF"
+                     /db_xref="GI:15011790"
+                     /note="C. elegans mechanosensory protein 4 (PIR:S13645);
+                     coded for by the following C. elegans cDNAs: yk411c2.3,
+                     yk411c2.5"
+BASE COUNT    10115 a   5759 c   5657 g  10400 t
+ORIGIN      
+        1 gatcaaaacc caaaaaaaaa tttcattcaa aaatttggtt ctttcttaca agcaaaatgc
+       61 gaggaatagg tgagaaggaa ctgagactga aactaaaaat ttgtgaatag aaactaaaaa
+      121 aaaactttat caaggtgttg caattttggt ttctgcaaaa ctgacttgac agtgataagc
+      181 tgatagcagc cagaaaccga tagcggtttc aatgacaatt cggggcaagt tggagatcaa
+      241 ggactcgttc cacgtggagc gatgtaaagg tctgtttgtt ttgtattgaa tcaaaatcag
+      301 aaaaaaaaag aatcagaaac agcaatagat gaaacagaag aaagaaaaat atgataaaat
+      361 tgataatgaa attgaaaacc acgccgacgc tgttcgttga ttgattgagg tgggaaacgt
+      421 gagcgagaaa aaagtcaaaa gatgataagt ctatgtgaac aagtcacgtt gcttgaatgg
+      481 acccctcgag gcgtgagtgt gtcctccgtt atcaagtgat catagtgggt aggtgggtgt
+      541 atgatttgaa attgtgaaca ccgacgaata tacataactt acgccgaaca gttgttcgct
+      601 cgcaagacaa ggtacttacc aaactctcaa cgttttccat ttctcactct gccgaccccc
+      661 catcacttta ccgaagtgtt taggtgtgtt ggtgtgttgg tactcaaaac catagttgag
+      721 ttgacatttc atgagtcagc gtagaacgat atacaaagat cataagtgaa atgtatgtag
+      781 gttagtggaa aaaagatttg ttagtatgta agtacattaa attttgaaaa atcagacttt
+      841 taatatatat ggctgtgaat tcaaaattta aaaaaatata ggttaaaaat gacctaattt
+      901 tagttgaaat cttcaatacg acaggatgga agaagcttaa aggtggtgta gtcgaatttg
+      961 agactatgct taaaaacatt ctctatgctc gtacaagtcg atatgataaa gcgaaacatc
+     1021 tcaaaaattt ctctaacgga agttatgagc cttcaaagtg ccgaaaaaaa tctctttctt
+     1081 cactgtcaaa attttttttg ttgaaaattt agcaatttac caaaacttcc actacaactt
+     1141 ttgaaaggaa ggatcaatgc aacaaagttc aggcgtagta ggcaataaat gcatttttcg
+     1201 aataatttta gctattttcg aattagcatt taagttgcac ttttgaaaat gatatttttc
+     1261 agtttcaaga tgttttatct tttcagtaat attccaaact aggaaagtca tatggaaaaa
+     1321 atgacaagga acatgatttc gattcaaagt tacactatat gttatattaa aaagggagca
+     1381 atactttttc agagtagcgc ggcaagcgca atattactac ggaggctatg ataatagaag
+     1441 gagtacggta aaactagagt tcaaatagtt caaggagaac aagtcttaga catatgacaa
+     1501 tgatcacaat tctacgagaa tcttgctcat tttaatgcta tttaaggaaa ggaatcactt
+     1561 gtcataaatt gtaaagcaaa taaacaaagt tcaagtaagc gggggatgac cgttcttcct
+     1621 aattgtttat cttttcttat cctcttttca caatcgagca gaaggatgca tcacggaatg
+     1681 agaatacgaa tacacagaaa aacatctgca caccattatt gtttacaagc cttgcatatt
+     1741 tcaaggtctt caaggaactg atcaaaaagt gacggacaac acattgtaca ggtcagttga
+     1801 tgaacgagag aaagagtttc aaaaaagtgt tatatattga gtatcgggtt acaaggaagg
+     1861 aaacggcaga gttaaaacgt gtcgggttac aaaacaacac aattgttgat tggattaatc
+     1921 cagaggaatt gaacaaagaa tggaaagaga aaaacaaaag cgaagtcaac attcagaaac
+     1981 taatgcgctt gggaactttc gatcaaatgg agaaaaaatt attgcattgg agggctagag
+     2041 caaagagttg ctatactttt tcagtttggc acaatattgt actctgttaa tttctccaat
+     2101 tagttactta agttgattaa ttaattggtt tatttttcag ttgaggtaaa aaaaaacaaa
+     2161 aaataccgcg tcttgttatt ttattatcgc atgcaaaatt atcgcatgtt attttattat
+     2221 cgcatgttca tatttagtgc aatactgaaa ttggagacaa ttttcacgtt tttttggtca
+     2281 cataatttga gttttacaaa aaaggcattt tttacgtcaa tttgaagcag cttatcaatt
+     2341 gtttttggtg actataatag agcaatagaa aaaaattcgc tactctagca ggaaatcaat
+     2401 aaacttaggt atgcagtgct ttaaggaacg ctcggggata ctaatagata aatttcaatt
+     2461 ttatagtact tcataaatat caatcaccaa tgttcagagc gttttgaaac tgagaaattg
+     2521 tttacaaatg atgcgatcgt ttgtaatgtg caattaaacg tttgaaaaca tttcaaaaat
+     2581 cttccaggaa caatgtgagt ttctgaccga tgattgtatc atttcacatt tggatatctt
+     2641 tatcattttg aaaactttac atacatttaa tttttacaac actaatacaa gtacaataga
+     2701 cgacaatttc taaatttagc tttacatgac ataactcatt gaaccatgtt ttaattgttc
+     2761 cctaacctca ttaactcaga ttttacactt gatagaaaaa ccgaaattca tagaccaaat
+     2821 acgtgaaaat aaaataaaat aggaaaaagc tggttgggtt gtttttgttt tccttattgt
+     2881 atactactca catttctcaa caaaaacact tgaagaatta gtttttttat cgcatttcca
+     2941 gatcttttgc agcagaaatg tagcaaaaaa aattgaaaaa aaaaagaaaa aagcttgaag
+     3001 aaaaaagaaa gatgaactac aaaggaagtt gagggaaaca ctaaacagaa gctagaacat
+     3061 tttagatctg caaaaaacat ttaaaaaagt attggctatg ttggggttga agaaacatta
+     3121 ttggaaactt tgaaacaact tgtaaatact caacctttaa aaatttgtta tattttcttg
+     3181 tgacaacaat ttattttctt aatctcatat tttttttgta tgccgtttca tttttcatgc
+     3241 tcaacaactt aaaaacaata atcattctaa aacagcaaag tcaatacaga caagaacaca
+     3301 aaaattcaat tcttaacatt ccaaaatgtt tcttaaaagc tttaagcttg acgctttcta
+     3361 aattgaaatt taatgtatgc aaactagtgg ggagaaaagc agaccaacga ggtggtcatt
+     3421 cgtagaaaga gcggcagacc cccccccccc ctttgtgttg ttgctcatac tcctccaaat
+     3481 gtgatgcaat ctctctgtct gcctcacacc gcttcaaaag cctcccgcgc tccgtgtccg
+     3541 tcagaatatt cgaatgcatt tcattttcat ctgccgttta tgagatgtga gggctgctga
+     3601 aatagaaatt gaaaaatgtc tggaaagcat tgtcgtcgaa tccaccctaa aaaggctaca
+     3661 atcgacgacc ttgcagcgct ataacatagt tctcattgga atatcccact attcttaaac
+     3721 tatgttaaca ctgccagatt ctcctaatta ctctcttgtt aattcctttt atttcattca
+     3781 aaaccgttaa gtataagccg agatacactc aacttacata atcaccaaac gagctagttg
+     3841 cattcaaact acgcgcccga tcgccaccgt acctctcttc ccctcgagac ccacgcggca
+     3901 ccactacttg gcaccgcgcc aagaaagggg gagaggtggc ggcgcgcgct atgtttgttc
+     3961 gtacacgaag atatgcgtgc cacccaacgg ttaattctac tcgaaaatac cttttaatcg
+     4021 agcaagaata ttattatttg ttagaattaa ttaaatatta aagtttgaac caaattgata
+     4081 gtctaaagat caaaatcgta ttccatttcg ccatgaggat ttcggaccac aagaacagtt
+     4141 ttttataaca cctacttttc tgttctttaa tgtattaccc actttttact tgttattttc
+     4201 ttatttattg atggcctgcg cgaccctagg gcgtggttgt acaataaata agtttttgag
+     4261 ttatcttcgt ttgtgttatt cgtttctttc tctctattgc aggttttagg tgtattccgt
+     4321 cgtgaagggg gtgtttggga gtgggaaggg ttgtccacga tgccgactac gtacacttta
+     4381 ttggatttcg ccacaagcat gttggattca tatatttcaa tttttaaccg gttcaattca
+     4441 ttaacttggt ggtttcattt atataaggtt taatgcaatt atgattctct cagaagcacc
+     4501 gagaaaattg cggtttccac tgaacacaat ataaagaaat ttgatttaaa taggcgcaat
+     4561 aaatttaaaa aaacaactta ttgtaatttc cttcccgatg actaagtatg taaatttgga
+     4621 atatcactga atttttaatt tttcaagctt gaaacggctt caaaaaaatg tatttccttc
+     4681 gctcagttct ttttaaaaac aaaattttta cctataatta tttttctaca tacatggata
+     4741 gagccaacta gacacatttt tattttgtag atattcagga acagaaaaaa aaacaaattt
+     4801 ttccaaaaaa taaaaacaaa gtgtttagaa aatttaaaaa ttctagcttt tcttatcatt
+     4861 aataaatttt tcttatcact tgtctgtgca aaaaacgaaa tcaaataacg gatgacatct
+     4921 ttattggaaa atccaaatat tttagagaaa caagcaaaag aaatatggga aagaatgaga
+     4981 aaatgatctt tgcaacgacg gcggccaaga aaagggcatg ttctgaaagt gcttttcctt
+     5041 gaaggcaaac tctctgagca actgagcaac tgggaattag acatacacca accacaccaa
+     5101 ttgtatcaaa atgagcacta cgaagcattg cttctttgca gaactccaca cataagttgt
+     5161 cgggtaagaa tatcttcctt cgacatcatc ttccaactga aatagttgtt cttcgaaccg
+     5221 gggtgaaagg tgtgaatgtc aatgataaga aacagacact agtacatacc gtttgattgt
+     5281 tgcaaaagtc gctatcacag atttggaagg gtgttaaaaa cggtgtaaga tggtataagc
+     5341 cggcctgaac acgtcttgat tgcacttttt cgtttgctcg gtttgggggt acttttgttc
+     5401 ttgctcgaaa tatttggact tgagaatatg aaagaggggt caagtgaata cgaaactgtc
+     5461 aacaacaaaa aaattccaga ccttgtcttc gtgacccaaa ttaaggtttt cttcgtgacc
+     5521 caaattaagg cttgtaaaat aatattaaaa aaacaaaaat cttaaaatat ttttcttcgg
+     5581 caagtttagg tatggtatag tttcatcaaa cttgaatttg atagaaactt tcagaaagtc
+     5641 ttttcagatt tcaacatgac tttcgcggta actcaaagta tgaaatgaag aaatacctga
+     5701 atttagtcgt atgtatttct acaatttgtt aattttccca gaaaaaatgt gacgtcactc
+     5761 gtgtttttaa taacgtttta cagttttttt ttgcagaaat ttctgatggt caaacaaatg
+     5821 ttgttgtttt tgtttaaaat ttcaaaatga aaaatggtta ctttcaatat ggctccttcc
+     5881 ctattgaatt gagaaatgca tccaaaaaca gcgctggctg agtaacagaa ctgcccaata
+     5941 cacgttgaat tctaaatttt cattttctcg ttgttttgtt ttttttcatt ctcactttca
+     6001 tccttgtcat gacttctcct ggagcaaagc atttggtact tccgggcact ttcagtttgt
+     6061 atggagttgt tttgttctga actttaacta tgtaatattt gcggtcttca atcataactt
+     6121 accactttat atgtacaaat ttgacttttg tcataacgtg agtcatctgc aacggcatag
+     6181 catgagttga taggtagtaa cataccactg tagacgaatc tataaaaata ttctttatta
+     6241 gtttcaagtc atacagtttt cctttaccca atatcataca tctccatttc atatgtccct
+     6301 tctccgcatt caaccatcta aattattaac atagttatat atataatcac agccttacct
+     6361 catccatata actagatgct gtcttcacac accacggttg cacacaatcg gctcctctga
+     6421 acatcgaatc tttagcgtat tcacaaatta aatacgtgat atttttctcc tcttttacgt
+     6481 tggtcatctc attacagtaa tccgtactgc tggaaaaaaa aatgtagagt tttaactttc
+     6541 tataggaatt ttacttgcaa atacagaaaa tgcttcctcg gctgtttgtg gaacatccca
+     6601 atggcaagtc ggtaggagat gtgaagcacc cctaaaattt ttttaaaaat tgtctttttg
+     6661 aaattttgcg gtagtttaga attttaccct cttttcgttt aatttgaaat taaggaatac
+     6721 tatttattgc cggaacggta taaaatttta aagaataatg ctactaaata tttattttcc
+     6781 acgtgtgttg ggcagaaaac tataattttc tgaaactgca gtccattttt ttctgaaact
+     6841 gctgtctttt tccaaacttc tggcaaactg tggttgagca ttttttttta tggaagtttt
+     6901 tgaatgtcta ttttcaaact acaaaaattg aaggaacttg ttcgaggatt aagaaaaaaa
+     6961 aatttcaaat gttttcaaaa aaggttttct ttaaaaatat taaaaaagaa ccgaaaactt
+     7021 cgaaaacatt tacatagttt tttagttttg gttataaaaa aggtaaacga aaaatttccg
+     7081 agtacaaaaa atttggcaaa tcggcaaact gccctttctc agattttttc ctgatatcat
+     7141 acccgatacc tgaatcaaga ttcaaagaga ctgaaattta ataatggtgt tatacgcgtg
+     7201 gtgtcagagt gtctcatttc ggcttgatct acgtagatct acaaaaaaat gcgagaaaag
+     7261 agacgcagag ttctgaactg atttcgtatg agagaacgtg ctgacgtcac atatttttcg
+     7321 gcacaacatt tccgcaattt ttgtagatca aaacgtaatg ggacagcctg gcaccacgtg
+     7381 ttttaatata ttcaaattag ctattgcaaa aagtcaaaac attttatgag agaatcaaaa
+     7441 ttcataactt ttcatttcgt tattcaaaaa aatcagcttt cagttctcta ttaattcgtt
+     7501 ttgggaagtt ttttacattt tcagaatcaa cattagcatt tatacattga gtaaaacaaa
+     7561 cctgataata agactgctcg tttttcagag acgggttcag gtgtttataa tttacggcgt
+     7621 aacagtaatc tccggtgcag tttgtacatc tgaagttaca gttttcaagc agttgcattt
+     7681 acaggaaaaa ttacgtttta tctaccgaaa taacccttcc atcacatttg acacattcaa
+     7741 gttcgcaatc acaaaaagtc gataaaaata gtaaaaacca taaaatccga agcaacattt
+     7801 gactgaaagc tacttgacag cgccggctta cttgctccgt ttcaaattac gttgttctta
+     7861 tcaatttctt ggttttcatt ttcagctgtc gcgcccccaa atcaccattt taggggactc
+     7921 gtttctatag caatttgtat catgtgtaat tggaaaaaaa accaattttc gtgtgataaa
+     7981 tgacagaaag aaaaacgaaa aagaaggaag agattttccc tttgattaaa atgatagaca
+     8041 gttggtcagt gcgggaagaa gaagagattt tggaaaaaaa gtgcagaggg ataccggaag
+     8101 aattgggaaa agtgtgattt tgaaacaatc agagctcatt catttgtact tggttttttg
+     8161 ctcgggttca ttgcatcccg agaatctgtc tgtgttgggc aactgtattg gcaaaatttt
+     8221 aaacatcctg aatgaaattt cagattcact tttttttgtt cgaagttcta ttcagaagct
+     8281 atttttgtga caatattttt ataatgtgtt ctatcataca acacagattt tgtttggaat
+     8341 ttcgagattt ccgggttttc cgaagttttt ctttattaac attaattacg actaaatatt
+     8401 aaataacttg aaaaaataaa aattattgaa tttctcaatc aaaacgtttt tatcttgttc
+     8461 cttgatggct taaattgtat tctcagaggt tcccttccat attctcacat ttcaagtgtc
+     8521 atttagaatt cttgtcagat gagtaatatt aggaaggcgt ctagactaaa taataaaagt
+     8581 aatacatagt acggaatttt gacagcgctt ttactaattt actaaatttt tgtacttcac
+     8641 atttaataag aaaacaattc atttccgttt ttccgatttg cgatcgaatc aattgaaaat
+     8701 aaaacaggtt ttggtttgtg tccgttttta tttataatat ttttgatatt caaaattaaa
+     8761 caaaaatttt gcataatatt caaaatcata aaaaatattg gttcttttat tggctcaatg
+     8821 aatcttcttc acaattcctc ccaactaacc ttttttcaaa gttcccctat gaatcaatct
+     8881 cccgccccag cccatgtttc cttccactaa tttttctgtt tttcatttta attcttctct
+     8941 attgccacgc atcgattcca ccccagcaaa aattgaaaaa gtaacgaagg tcaccgagag
+     9001 atcgaccatt ttcatctacg tcgattcttt ttaaaagatg cgcgcacctc cccccgaaaa
+     9061 tagatcaatc tgtctcagtt ttgctggttt ttattagtga gtttttatct gacaattgtg
+     9121 agaaggtgat actagagata atcaaagaat tcatttgatt caatgatcca gaagtctttt
+     9181 tttttatatt ttcaaaaaag tgaaactagc aatattaaaa gaatcccaat cctacaaaat
+     9241 tttcgaagta aattcaatta aaattaaaat taaaattaga aaaaaaactt taatttatca
+     9301 agattgatgc gatatcatcg tcaacaattg ctactcaatt attagattta ttttatttat
+     9361 ttcatttaga cataaatatg tacgtgagaa acaattgttt tctgttacac tgaataaact
+     9421 aaactcattt tatttttttt gagcttacga taattgctat gaattattca taacattttc
+     9481 taatgtcgag tgcacttttg cagatatgac tcactttttt ttgttttgtg agttgagcag
+     9541 cctaggactt tgtatgaatt aatttgcaaa aaactaggta aatttaatca tttacgtttt
+     9601 ttttcaaatc aattgcaaat ataaattctt gtctgaaatg tatattaccg tatttctttt
+     9661 attagtatat atgtaactac attttttaac aagtttttca aaagtctccc aataacgcaa
+     9721 aatcataaaa aattctacac ataggtacat attatttaga gttttctcaa gacctatatc
+     9781 acctcatttt caatttttcc ttatcaatca atctcgtttt cttcaaaaat tgaatcatct
+     9841 tttgatacaa cagacgtatt ctattcatag cctcatattg gcattggcta tcaatcaaga
+     9901 tgccaatatt ttacaatgca tttttcattt ccggtgaaaa tctagaatct agaattgaga
+     9961 atcgagaatc tagaatccgg cccgcttcaa ctattttata tatatacaaa tatataaatt
+    10021 tctgcccact ctttcttgat ttatttgcga tttggttgca tgacgcaggc gtgttgctct
+    10081 tactacccaa acaaggaata aagatcttta aatgcggatt tcatatatta atttttttct
+    10141 tctttcaatt ttattttgtt tgccaagaaa aaaaaaacgt gtcctttact acccttcaat
+    10201 atttttgttt caactgaaaa tataggtttt ttaaattaaa ggattaaaaa tgaattaaaa
+    10261 ctttttttta aagtaagaaa gtctgtttta gatttgtttt tattttaaac ttgaaaccat
+    10321 acctacaaaa ataccattaa aaattttaac aaaatctttg taacatttga agtcaacttt
+    10381 aaatacttca aataagcatt tgattatgag aagtgtgcat atgtatattt attactatta
+    10441 ttttttgtag aattttttaa aacgtatgta aaatttacat cttatgaaac atgtatgggg
+    10501 aaattttcag atgaatggta agagggttag taagtttatt aggtttatct attcaatcac
+    10561 aatgtaattt tttgactgaa atgcgcctgg ttttaaaatt tgagagcatg tacctttaat
+    10621 ttttgtccta catttttact ttatgggctg tttgttataa attttgtata tttacagaaa
+    10681 gatttcaact cttcattttt tatgaatctt ttttattgta aagagcatta gtcaatgctc
+    10741 accaaatgat ttctcaatta aaaatcaccg atctgagagc tcaaaatgct ccgttctcgt
+    10801 aacgccaaca tttttatctg tttgtgtctt cacattagtc taccaccacc aacactcaaa
+    10861 atatcaaatt tttctcaatt ttcattcacc gatcattgtc tccactcttc gcctcaatct
+    10921 ccgtcgacct gtaactaacc agttgtcgaa tcgcattgga tcgtcgcgta tcgctctctc
+    10981 atttcctcat ggtcttcccc ctcagggcac tactcctctt cttttcccaa aatgcccatg
+    11041 attttcactt ttcattctgt gttgttgttg acacgacgac tgtctcgttt cgtcctttcc
+    11101 tcttgtttca tccctcccac ggttacctct tcaagtcttt tctcattgat ttttgaaagt
+    11161 ttttcaattc ttttgtaccg taatgatagt gatattgata agttgaacta ttttagtaca
+    11221 gtttcaggtg actagagatg ccaaagcaag agttcaaccc acttgattac actggaccac
+    11281 tgatagttgg agcaattttc tgcgtctttt tgtttgtgat ttcgttcttt gttatcaact
+    11341 tcttctgtat taccaaatac gatgatatta caaagtttga attggtgagt ttagtacttt
+    11401 gtagttttat ctttaaatta tttacaaata gaaggaaacc aatagcattg tacgttctaa
+    11461 aggttgggtt tgctagagag cctgctttat gttaattaaa aagttgagta tctatacaac
+    11521 atacacattt gatttttggc tttgctgcgt accattatta ctcaagaatt ggtatgtatt
+    11581 caaataatgc aataacgttg tgaaattttt tctctagaaa tcgttcataa tttctaagta
+    11641 gaaaagtcaa ttatttctga caatttctaa tttttttata aataaaaatg cacattaaga
+    11701 acttttgata atgttgcaag aaagttggga aagttatttg tctggtcact tactttcgaa
+    11761 atgtattaat cgatccttga acttttttgt tccgcagagg ctggcggagt ttacaagcgt
+    11821 acgacgtggt tcaattttca tttaaagctt taaaaatgga acatagatga acatttcgaa
+    11881 tgctaaatgc aaaatcaaat aatttaattt tcacgactct ataaagtttg ccagcttcct
+    11941 ataactgaca caaataaaaa ggaatccttt aatgagaacg agtataattt ctggaaaccc
+    12001 ccgcccctgc ataatactca aattctcaaa aaatttccaa ttgtctttac tgcagacgtt
+    12061 caatgaactt ttcccccacc atatcaatga taagtgtttt actacctaaa catgatttat
+    12121 agtttgaacc aatcagtctc tttcctttat atgcattttt tttcagatgg gaggtaaata
+    12181 tggatggcgc ctaggaccac atccattgat cgttgtcaag aagggtggat ttgtcgccga
+    12241 ggaggaggtt gatgatgctt aaatacttag ttccattcca acaacaaatc tatattttgc
+    12301 ttttaatggt tttgtttttg aaatatgcag tacgtttatt gcatttttaa taattccgca
+    12361 actttttgct gcaaactttt attcgaataa aattttaatt tgaagtgaat actttttaat
+    12421 gaattgttcg tacagtgtca tcaattgctc tagccacata atccaagtta ctcatgttaa
+    12481 gcccgcagat attaatacga ccgtcactga gaaggtagat cttgtgatgg gctatgagat
+    12541 ggtcgacttg acaagctgaa aaccatttca tttatttttt aggtttttca gaaaatgtct
+    12601 aatctaaaat tgctttttat taattttatt ccccacgacc atacggactt aaagctggat
+    12661 gtaacttttg tgcaaaatgg cacaattccg tccggagctt tggggtgttt tacgagaata
+    12721 ggtaaaataa cctaatgctt tttcactatg ctaaatagtt ttttgaaaaa tattgcttac
+    12781 ttacgcgtca atccggtata actgaacaac cccgattgtt gagtaatacg atcccatttt
+    12841 ccagaagttt gaagcatatt caactctcga atcagttctt gtcttgtttt cttaatacga
+    12901 aaagccatca ttttaatgga attttgccat tgttctctgt acttcggcga cttgagaatc
+    12961 tcatgcacaa tacgtgctcc gtaagctgga gggttagaaa aattagaaac attaacaagt
+    13021 gtcagttggg attttaggtt cgcaatatga cttggtttgt tcacgatcac agtaaggctt
+    13081 ccaacacgat catctgaaaa ttttaatcct aactaataat attcaattca aacttactgt
+    13141 aaagcccaaa attctttgag aaagattgag agacaatcat ttccaaaccc tgttcgacaa
+    13201 agaaccgaac agcccaagca tctgcatcag cgtcgccaga agctaatccc tgatcagcaa
+    13261 tatgaaaaaa agtgaaaaga ttcttctgtt tgatgacttg cgccatttgt atccattgct
+    13321 ctcgagttgg gtccattcct gttggattat agcacaggct tgaagaagaa cagctgatct
+    13381 ttccggagct tggcggaggt cgtcaagtaa ctgtgtgatg ttaatttctc tgttatgcaa
+    13441 gtcccagtac gtatattccc taattgaatt gaaaccactc ttttgaaatg tacgctcgta
+    13501 gtttcccctg gaaaaagaaa catcacacaa ttggtcgttt taataatcgg ttgaaaaaat
+    13561 tttgattttc tcattaaaaa atacccccaa aaagcattta cttaccagca tggattgctg
+    13621 acataaatcg tcttcatact aagaacttga gcaagaaact ctgctccaac acatatggca
+    13681 cctgtaccag agatgcactg aacactaaat gatcttccct ctttgatagc caaagaatca
+    13741 tttccgagta gcaaagcggt ggctgatttg cagaatccat catgtccaag aatcggtagg
+    13801 tactcatgat tgtgatgagg ctcgtgaggg aatttaagtt cgatttctcg aaccactggc
+    13861 aaaacccacg gctctccatc ttccgtgcga taagcttcta ttgcaagatt tattttcaca
+    13921 gggcaaattt ctttctggaa gagttccgat gtatggagct ctttgattgg cgaagcaaca
+    13981 tgaattccat caaagaaaga tagccgcata ttttaactga aatttatttg ctgtttaagc
+    14041 ttagataaca ttaaccctta gcctaagatg gacaaaaaag tcacgtaaaa ctatgtataa
+    14101 gaatggccac catttctggg tcagttgaac tacacaagta ccaaaagttg aatagttcca
+    14161 tgcataacac gaaatagaaa aatgggagtt ttttggagca ggtggcgata agaaaacgaa
+    14221 attttaattt atagctgacg ttgaaaaaaa ctcttgaact cagaaagtgc aagaaacaaa
+    14281 atttctggga ctgcaaattc agtacatttt atactttgac tataaaacat ttttgtagct
+    14341 cactgtcttc tgaaaaaaag aactgggagt actttttttc cttagcgcaa tcctgtttat
+    14401 atacggttgc aaaaatcgtt atagcgttcg caacatggta attttgttca agaatcaaaa
+    14461 aactgcggta ctaagttttt ttttttttta gtttattttt ttttagtttt tttatttttt
+    14521 taaatttatt tagtttaact atgtaaagtt tttctgttcg tttgaaaatg aatttgtgtt
+    14581 cctgattact tcctggcacg tattcacatt ttcatggtga atcgagtctg gtgaagatgg
+    14641 gcgtacccac ggttcgacat gttgctggaa ttctcgtgct ttttgaaaaa tgaatatgcc
+    14701 gttctattaa ttttgtaacc ccgtaacaaa tgaaaaccgt tttgtaacgg ggtttcttag
+    14761 taataatatt gggtataagt acaacaataa caaccataat aacaatataa taataataat
+    14821 aataacaata acaataataa gtacaaccgc ttgataattg agtttgccat ttttagagca
+    14881 atggaaaata ttacattgtc attttctatt tattaattgt tatgatcaaa taaaaccttt
+    14941 ttcatccttc ccatattcat tggtaatgat tactctattt ttttccataa aacaactact
+    15001 tgatgaacga tgttacggcg aaacacgaag gttcgaaaaa aactaagaca gcttaaagtg
+    15061 cattgcaata aaaaatgatt tatcacttga ttgacgttga taagaaaata gaagcctacc
+    15121 tcaactttga atactccggg aaatgaagtt cgtgtagatg agaatgcgaa agtttgacgt
+    15181 ggtcagttat agaattaaat cagtttttca aactgtagta acaaatatta acagctgctc
+    15241 gaaaaaaagt ttttaaacat ggtgccaggt ggtcagtaat acactgtatt ttgaagcata
+    15301 tgacagaagt tgaaacactt ttttttcaat taactacaag aaaattacaa aaataaaata
+    15361 aagcctagtg atgattagta tttgcaccat atatcataat tgcttgaaca gaaaacattg
+    15421 ttcataaatc cccgcaattt tttaactgtc tttcacactc cgggtcatcg aagtttgcaa
+    15481 gacgtaagct tttcaggtaa tggaacactg atgtatcctt acctgtacac gtgatttcag
+    15541 aaagttggag agccgttttt gaattggtgc tccgttcaaa ttgactctaa aagttattca
+    15601 cgaaatccac atatagttga atgttttttt agtattgtgg aataatttta aaacaattat
+    15661 tctaaatact agagaaccat ttttagtaca aaactgatta ttttatccca tcccgttgta
+    15721 caacgtaaca gaaaagcata atttgaaagt ttctcggagc acactttcaa tccaatgaac
+    15781 tgtaaagaaa attgttgtaa caagattttt ctcaaatttt gccacatgac ttcgaggaga
+    15841 ctgaaaaatt ttaaccaaat cgacaggtgt ctaaacaata gatagatggc aaatattctt
+    15901 aatctaaact tggggtgacc agaattacaa actttgtaac aaaatgtcgt agacttttcg
+    15961 cacaacagtt ttagttttat ttttgttccg gcaattctga tgttcctctt gcttccatat
+    16021 gccaaaaatc cgctgcttca tgcattatgg cagtgtatcc ataattgtcg cggttaggag
+    16081 cgaatatttt atgaaatagt tccccaagta aacgtccaca tcacacggat aataatagca
+    16141 atgattcatt ttccattcaa gaaaaaaaag taatcgggtt aaaataaata aaagtacaca
+    16201 cgggaaatta aaactaagtg catagaaata ctcaatctac acttaatgct caaacttgaa
+    16261 gagcacaaaa actcaatttt tcacataaaa ttcataaatt tcaaagcttt agatgttgct
+    16321 cttcacggca cgcacagtct catcaatcgc cttggcgaca tattccacgt ttttcgtgtt
+    16381 aagaccacaa atgttgatgc ggccgtctct gagcaggaat actttgtgat tggcaatcag
+    16441 gtgatccact tgggcagctg aaaattaaga ttgtcctcat cagagtgtta aaaaaagcaa
+    16501 actcacaagt aagtccggtg tagctgaaca ttccaatttg ttgaatgatg tgatcccaag
+    16561 ttccaggagt gccaagatcc atcaaatggc gcaataaggc ggctctcatc tgcttgatac
+    16621 gagaagacat cgcttgaata gactggttcc attgttcacg gcgagctggg gtggtgagca
+    16681 ccttgtgcac aattcttgct ccatgtgctg gtggattgga ccagttggca cggataacca
+    16741 acgacatttg agactggaat ccggcaatga cagctggatt gttgacgacg acagtaaggt
+    16801 ttccgacacg ctcgtctgaa attttgaagt attttacttt tcatcttcta ttcaaaactt
+    16861 actgtaaagt ccaaagttct tggcaaatga ttgggacaca accatctcca ttccctggtc
+    16921 tacaaagtag cggatcgccc aggcgtccgc agctggatca ccagatgcga atccttggta
+    16981 agcaatgtca aagaaggtga agaggttctt tctcttgatc acctcggcaa ccaacttcca
+    17041 ttgctcctga gttgggtcca ttccagttgg gttgtgagca catccgtgaa gaatgatgac
+    17101 agacttctct ggggctgact caagatcgga gaggaacttc tcaatgtgca cacgtttgtt
+    17161 atcatagtcc cagaaggtgt agtcggcgac ggtggtgaaa ccagccttct tgaagacaag
+    17221 cttgtgattt cccctgaaaa tgataaaaat tcatgatttg atgaatttcg cccagcatta
+    17281 tcagaggttc aggaaaagta ccttacacct ctttgaaaga gccttgctga aactcatttt
+    17341 ttacctagat tacctaggtt tatggtaata ctgccaagaa ttgggcggct attttaagaa
+    17401 gttgaagttc tggttgactg ctactcactc tggccaatac aagacttgtt atattttatc
+    17461 agataagtat gtttggctcc gatttttcga ccacttacca tgttgggttg ctcacgtaga
+    17521 cggtcttcat gttacacact gaagcaagaa actcggctcc agcacgaaga gctccggttc
+    17581 cagagagaca ctgtactcca aacgatcttt cctccttgat agctggcgat tcggctccga
+    17641 ggacaagctc ggtggcagcc ttgcggaaac cctcgtgacc gagaactgga agatactcgt
+    17701 gattgagcga ggtgtcgttg gcgatttcga cttcagtctc atgaacgact ggaagcaccc
+    17761 atggctgtcc ctcctctgtg cggtaggctc caattgtcag gttaactttg acgggagccg
+    17821 tctcatcgag gtacatcttg ttcttgtgga acacctcgat tggtggagca actgggattc
+    17881 cgtcaaagaa ggacatgttg agtgttgttt ctggaattaa aaaaaatcaa taagtttagt
+    17941 tatagaaatt gaagagatga aaagagtggc ggtatgccgc aaaacgcgat aatgaggtgt
+    18001 atttatatag cgaaagagat caaaatcgaa aaatgaccac gttaaaagga ggtcacctca
+    18061 tcgacattcg agaggattta cttggaaaca aaaaacgttt taggtgtttc caataaaaat
+    18121 cgatttgatg ataaaaatta ttatacgata agaaacgtaa aactttctac tttcttattt
+    18181 tagcactcaa gaaatcaaca gaaacgtgtg ttgataagac tacgggccca gtttcaagac
+    18241 tgaactagat ggttactttc tattgcacca aagtcgacag ttcacaacaa aagttacagg
+    18301 aaattttgcc tagttttcgg acaaattgag gaaagggggt ctcaaaaacg gactgcctct
+    18361 tctcgacaac tctttctcgt cggcccgccc tccaataagc acacactatt tccgagaaaa
+    18421 caacacatgc gggtagaaaa cgagaggcag cgttgttgtt tgccagagac gtttttcgag
+    18481 acgtgaccat tttgaaaatt tttatttcga ggaaaacgtg atggcaaggt ggagagaacg
+    18541 ggcaaaacgc agcgcaatga gatcgaaatg tgtcatttgt gagggtttca aagtgttctt
+    18601 tgataaggca aattactttt ttgaaaattg aaaaaaaaga ttggatattt atctaggtta
+    18661 atgtaataag atcctaggaa taaatgaaat tatatcaaaa ttccctcacc aaaaatgaaa
+    18721 aactattttt ttttttgaaa ttttctagtg gaagttttct tctgtttttg cctatagaaa
+    18781 aaccttgaaa gagctgattt tttcaaaatc cctttacaaa aggaatcaag gttaacctat
+    18841 acaactacct cgtgaacttc aaagattcaa aaaccaaacg accacttcaa aaattattgg
+    18901 caaaatgacc ttttgaaatg agtgatggtg cgaaaccgcc tagataaata aatataacaa
+    18961 tctcacgtgg ccaggcggaa aaacatgttt atgtaaattt tcgaaacgaa aaataatcgg
+    19021 ttttcttttt ggggtgatga tggcgggaca tgatgaaaag aaatggaaca gtatacacga
+    19081 gaaaatccaa gttgctgggt atttagaaat atggatgaga actaaacaac attccgcccg
+    19141 gtaaacgcct ttgaatgaat gagaagtaaa aatgaaaaaa aatcacagaa aaatattcac
+    19201 cgtaatgaga atttgcatac aagatttccg gtagaaagaa gggtgagaga agaggtaaag
+    19261 aaaaaggcga taagaggcgc ctccgggccg ccttaaagcc tcaattctac gtggcgaagt
+    19321 tctcaacaaa gttttgtaaa atttacgaca actaaaaatt ttttcagcac attctgattt
+    19381 attattttag tcaccgactt taatgggtgg cctttaatga ttttgaagtt aatttgaaaa
+    19441 tttttaaaat caataaagat gaacatggac atggattttt cacaaaacta acaattaaga
+    19501 ggcaacaaaa acaaacataa tgatagttgt tagaaaaatg gaaattgaac aaaaatttca
+    19561 aaaacaaaga aagcaagtca acaagtgtct aacgaagaag taaacttttc tcatcacttg
+    19621 gacgccgagt gtgctcaaaa ccattgtgct ccgaaatatg acacttttca actcgtccca
+    19681 tcgttcaaag ttattattcc ctctcatttt ttttcccgcg ctctcgttgg ctctcgtatc
+    19741 atgagctctc gttgacaagt aacttttaag tttcaaaatt tatctatagt tgttaaagta
+    19801 taatgttttc ctagaagctg taagcttggc aggtgacttt ccctgcggga attgcgaaaa
+    19861 aaatagagac tgaataaaac gcatatactg tttcatattg tctggcacgt gccttgtggc
+    19921 aggaggtata ccaatacact gccccgttct gatccgatta ctttgacaaa agtcgccgag
+    19981 tctgatactc acaaacagta gcgtcaagtg aggagggctc actttaccag aatttattat
+    20041 gaatgtttct ggtatcatca cattgtttca actttccacg cgctgttaca ttgcgttttg
+    20101 aatcgatttt ttggtattct gccaaatata gcttcagtgc gttgcactcg agcatctgaa
+    20161 catatttgct ttttgtttgg cgagaggaag gaaaaagcac tgaaaagtgc ttaaaaatcg
+    20221 ttttgaaata aggattagta actggattag agctagagca ttaagtgata ttaggcaatt
+    20281 tttgttattt ctaaaaatat ggaactatgc gggtggaatt ttttgttcac atgaacgtgt
+    20341 caaaaaaatt gttaatgttc ataccctcat ttacatttcc acttttatgg cattttggaa
+    20401 aaagatggct atattcgccc tgttagtttt agaaaacctt ttccactaga tattgaaatt
+    20461 tcgaagtgaa gttttcgctt tcatcaaaca ttcttcgaga gctagcttca attgttctcc
+    20521 ttcatttttt ctattgcttt atgttgcatt gaatagctaa aaaaaaattc aaaaagctat
+    20581 cccttcaaaa aaagaaaatt cgctgcttct cgacgtgaaa aacaatgaaa aaagtgttcg
+    20641 gtgagtactt gttctattta cacagcaaac ctttttagca gccgagctgt tagaaaaacc
+    20701 aaaaaacgcc accaaaatat gcttattcac attttccgtc atcaccatcg ccgccgattt
+    20761 ctgcgcatga caaagtggct tttctctctt tttttggatt tcaaggtcta acattgccgt
+    20821 tctaatgccc gttttttagg ttacatttca aaataaccga ctattttcag aaatgttttc
+    20881 tcatcaagat cgagaccgcg ataggaagga ggacggtggt ggagacggta ctgagatgaa
+    20941 atccaagagt cggagtcagc caagtggact taatcgcgtg aaaaatcttt caagaaagct
+    21001 atcggcaaag tcaaggttag tttttgaggg gttttcgggg ggtcaaggga agtcaaaaac
+    21061 cgcaattcta aaattttgcg atgggtagcc acagaaggga tgttcttttc atttgaaaat
+    21121 gtgaaaaaga ttaatttttt ttaaaaccac actgatgagt ttagtgaaaa aattaaacat
+    21181 gcttctagcg cctcagctaa tcaactcaga ttttccgtac aaattacttt agtattacga
+    21241 aatattgaaa aattcatcgg ccctcagcaa attagattcc aattcgggaa aatactgaat
+    21301 tttccctgaa ttccgtgttt tgtctgtgtg aattaaagat tgttcacatt taaaataacg
+    21361 taccaaggtt tttttttgaa aactttctag atcactagtt tttgaaagac taaatattaa
+    21421 ttttcagcaa ctttttgtaa acgcccatta cgattagcaa aacagaaaaa gataacagca
+    21481 aaaatgttca atgcaaaaac aaaagaaaag agactggagc cttaatgcaa aacgaacaat
+    21541 gtgggggccc ttcccgaata agtgagccgc gcggaacgct gccgccctcc actaaacaac
+    21601 gcgcggtacg atactccgtt catcggacca acttcccccc tctttaccta gttggcaaca
+    21661 ttttcttcgt attctcacgg attctcctca ttcattccgc tttcaaaccc aatttccgtg
+    21721 catttagcat tggaaaaaca cgtgccgcaa cgtatccgaa aatagtgtga gacgcagacg
+    21781 gctggcgccc cgcgtgaggc gaccaaaagg cttttcttct ttttgtcctc cgttgaaaat
+    21841 catggtaaat tgagtgatct cgtttgcgta tttgtgtgta tgagagagtg tgagtgtgtg
+    21901 tgtgtgtgtg tgtggacacg ccaaagcttg atggagtcac atgtatatca tgaaaaacac
+    21961 aaacagatac cacaccaacg aatcttgaac cttgagttct tccgccagtg ttcattttta
+    22021 acatctcttc atcgttctgc aaggagacct gtcaaaattt ccaacaaaaa aaagagtttc
+    22081 ccataaatta ggaaaattct gataggttcg gaagttttcc gtgaagtaac tgaatatatt
+    22141 ttcaatgatc acgagaaggc aattgaataa atgttagttg gaaaattttt attgagcttc
+    22201 tagattttgt tgtctaatat ttgagcctca aacttttaat ataggttatt attttttatt
+    22261 gcagccgaca attttttgtg gcaatttata tacaaaagca taaatatctg agatgcttag
+    22321 tttcatagaa attcagttca agaggcttca ccgtataata taatatattc ttgctgaaac
+    22381 tacaggaaaa tttagtggga acttgtgaaa tttgaaagct cccagattcg attttgtcat
+    22441 tcttttatgc aatgcaaaaa ttatttttaa aaaactgttt tcgacatttt ctcatgattt
+    22501 gtgaaaaagt ttttttctca aatagaagtt aagctcattt ttctaaatag tttttctaaa
+    22561 ccctggattg tctcaaatcc gctgtagaaa agttgaagcg actgacatct tatgagctaa
+    22621 atttttcctg agtttttgaa atagactaca tcttgataat tgaacattac agtttgacaa
+    22681 aatttataaa aatattgatc agttggattt tattattttt ctgttgtttt gttctagctc
+    22741 cagtagggtg gatatttttc aaaaatctca aatttttgtg caacttctat tagtacaaat
+    22801 ataaaaatat ttcaaacgtg cactttttga acatggacat acgttaaatt tccaatataa
+    22861 agggcaaact ttcagctgag ttaggagaat atgagcagct gttaatcaca taaatctaac
+    22921 aaaaaaacca catgttccaa cttaaaattg atttcaaaat tttgtggcca aaatgaaacc
+    22981 ctacgatgtc agccgagttt actttttcaa agtgcagtta gaaaaattgt ctcaattttt
+    23041 tatcagtgga aacgtttttg gtagtctcac taattttaaa gtttaactcg ttttagtaga
+    23101 agtcatttaa aaaattcaaa aattgcggtg taacagttct tttaatactt tttcaaatgc
+    23161 cataaatcgt ctgtaattaa ttttctacca acttgacaat ttcaattagt aaaatcacag
+    23221 ttttaatgca taactttttg atgctagtcg tatcaaattg tgattattgt tgttatcttc
+    23281 accacttata ttacgagtgt atcaaatttc gatgacacaa tacggctatt attcattgag
+    23341 cgcttgtgtt ttctctatcg tctccactgt ctgaccttcg atgatgtttt tctacatgtt
+    23401 tttttgttct ctcaaaatag atgtttcagt tcctccaacc ttttgaaaat ctgtaattgg
+    23461 tgttgatttt ggaacctttc ctattgtctc ttctaccatt catatccggc acatttatgt
+    23521 caaaacactg actgcacctg ctgtttttga aatttagtgt ctggtgtctg ggcggcagtc
+    23581 ataagacctg atgggcatac ggttttccta gagactccta ctgcaatggt cattctgtga
+    23641 taagaagttg cttttttgtt tatccattca aaattaccat ccaaaaacca caccagttat
+    23701 caatttttaa gtctttgaga cccgtaaaat caatcaatca tgtccacatt gcacttttgc
+    23761 ctgtagaaaa acgcgatttt cggttttcaa tcttatctca tttctggttt tcgttcccat
+    23821 ttgactactt gttgtcaatt ttttaggtta ggtctaaata aatattaaag attttttaaa
+    23881 aatttaaatg aaagttcaat ttcttctttc tatttttttg ttcctaccta ccaacgctgt
+    23941 ttgcgccgga ggacgataga cataaggaaa cgaacacacg ggatgccttt cccttctatt
+    24001 tttctcctct ttttctctat ctctcgccat tgctttgtgc tgtatggtca actctcgttg
+    24061 tcttcgctgt gcgtgtgtgt ttgtctgttg ttagaaaacg aggaaaattg tgagaaagag
+    24121 tgagcgagga agatcgagag agaagagtgt gcgtgtgaga ggtgtagggg tcgcgagagt
+    24181 gtgcgggagg ttggcggacc ttttaccaca ttttcggcca aaggaaaaaa ggcatcgatc
+    24241 tttgttggtt gtagttctag acattttttg tttctgctat gttctcaaaa caccaaaact
+    24301 tgttcttaga tttctgatct ttgaaaaata catgctcttt atacacttgt ttttctcgtt
+    24361 gttttgctat ttgcaggagg ttcctcgtga ctgtatttat tgatttttta actgaacaat
+    24421 tattctgagg aacttgtgaa ttcgagattt tacttacgat catttaccta ccaacaacgt
+    24481 acgttttcag aaaggaacgc aaggaccgcg atagcacaga caacagttcg aaaatgtcgt
+    24541 ctccaggagg agaaacgtcg acgaagcagc aacaagagct caaagctcaa atcaaaatcg
+    24601 gacattacat tctcaaggag acactcggag ttggtacttt tggtaaagta aaaggtaagc
+    24661 tacaacacct taaaattcaa ccaaaccgga aagttagaaa ctttttcatt tgctgcaact
+    24721 tcctggcaac accataagct gccaaattgt tgcaaaaatt ttgtcccaat ctgccaaata
+    24781 ctgacatatt tatctttctt ttctgttcat ctagttttca cattttcagt tggaatccat
+    24841 gagacaactc aatacaaagt ggctgtcaag attctgaacc gtcagaagat caagtcactg
+    24901 gatgtcgttg gaaagattcg ccgcgaaatc caaaacctct cgctcttccg ccatccgcat
+    24961 atcatccgcc tctaccaagt catcagtaca ccttctgaca ttttcatgat tatggagcac
+    25021 gtttccggcg gggagctctt tgactacatt gttaagcacg gacggctgaa gaccgcagaa
+    25081 gctcgtcgct tctttcaaca aatcatttcc ggcgttgact actgtcatcg tcatatggtt
+    25141 gtccatagag atttgaagcc agagaatttg ttgctcgatg agcagaacaa tgtgaagatt
+    25201 gcggactttg gactttcaaa tattatgacg gatggtgact tcttacgcac cagctgcgga
+    25261 tcgccaaatt atgctgcccc tgaggttatt agcggaaagt gagtgattat ggtggtctcc
+    25321 tgatcatggt tcaatgtaaa ataaaaacca aattagaact gaataacaaa acatgaaaca
+    25381 tgtgttcgca aacaccaaga cgacattttg cactagtaga aatccaatta aagtaattga
+    25441 acaagattag acacaacaac tttgttttgc aggttgtacg caggtcccga agttgatgta
+    25501 tggtcgtgtg gagtcatttt gtatgcactt ctttgtggaa ccctgccatt tgatgatgag
+    25561 cacgtgccaa gtcttttcag aaaaattaaa tgtacgttag tgtgacatga gaatcaaaac
+    25621 acaaaaatgg caaaggatta cgacaacata ccaatttcat ttagacatat gaaagcgaaa
+    25681 tgggatttga aaattgaaat atccctatta caaaaggcac tattactatg ttttggcgca
+    25741 ctgcccgggt gataaatgtt tgaaatttaa attggaatca ttaaaaatta taacgttcac
+    25801 gaaatatgtt ttttagctgg cgtattccca actccagact ttctggagcg cccaattgta
+    25861 aatctgcttc accatatgct ctgcgtagac ccgatgaaga gggccaccat caaggacgtc
+    25921 atgtaagtgg tcttctaaaa agaaaaaaaa aacaaacatc gttgctatga gacgccaagg
+    25981 aaacaagaca ataactgctt tatattttca gtgctcacga gtggttccag aaggatttgc
+    26041 cgaactactt gttcccacca atcaacgaga gtgaggcttc cattgttgat attgaggctg
+    26101 tccgagaggt cactgaggta ttaatcgtgt tcgataggag cagaagtttc taatcacaat
+    26161 gaaaagtttc agcgctatca tgtcgccgaa gaggaagtca cctcagcatt gctgggagat
+    26221 gatccacatc accatttgtc gattgcatat aacttgattg ttgataacaa gagaattgcc
+    26281 gatgagactg ccaagttgtc aattgaggag ttttatcaag tgacgccgaa taagggacct
+    26341 ggaccagttc atcgccatcc agagcgcatt gcaggtagga gagggtgcat agaaaaactc
+    26401 ttggtggcaa agaataataa aacttttgca tcgaacatga aattcgtaaa acaatatcta
+    26461 aatcttcaat ttcagcgtca gtcagcagca agatcacacc aactctcgac aacacggaag
+    26521 ccagtggcgc gaaccgcaac aaacgtgcca agtggcatct gggtatccga tcccaaagtc
+    26581 gtccggaaga catcatgttc gaggtgttcc gtgcgatgaa gcagctggac atggagtgga
+    26641 aggtgctcaa cccatatcat gtgattgttc gtcgcaagcc tgatgcaccc gctgccgacc
+    26701 cgccaaagat gtcgttgcag ttgtaccaag tggatcagag aagttacttg ttggatttca
+    26761 agagtttggc cgacgaggag tctggatgta agggttttct ttgtttagat tgaacttttt
+    26821 gaaagtggaa cctgtttaaa atatttagtt aatcgttcta gaaaatacta gaaaatacaa
+    26881 gttagtgcat ttttttaaac taaaatcaca atttgaattt gaaaagccgt agaataatat
+    26941 tttgcaatgg tctctgaaca tgcccctcta taacctaagt ttctgcatgt agtctagctt
+    27001 aacgtgtcac aataattttg ttcagaaatg tttaaaacga agtttgcttt gcagttggaa
+    27061 aaataaaaag gtcaatggaa attctttttc aatagcgtca agtcttctca agacgatttt
+    27121 ttttgtatgc attttttcct aatgggtcaa atgaatatat aatttccacc tcaaccaata
+    27181 ttcattaaaa atttcagccg ccagtgcatc ttcatcccga cacgcatcaa tgtctatgcc
+    27241 gcagaagccg gccggaattc gtggaactag aacgtcaagc atgccacagg cgatgagtat
+    27301 ggaggcgagt attgagaaaa tggaagtgca tgatttttcg gacatgtcgg tgagaattaa
+    27361 tatttttctt ttatttcaat atttttattg tgaagtatta caaataactg gaattgtatt
+    27421 tggagtggtt gatatagtta taggtttttt aaaaatcttc gagtgacaag aagttgatgc
+    27481 cactaccttt aaataattaa cgcactaatt ttcaatgacc gagtattttc agtgcgatgt
+    27541 gacaccacca ccatctcctg gaggagctaa gctttcccag actatgcaat tctttgagat
+    27601 ttgcgccgca ttgattggaa cactggctcg ttaagcactc cttgatgcca acaatgctct
+    27661 gtgtaaaatt caacttttct gtctcaaaat ttatttcttc cattccttgt attaagcttg
+    27721 aaacccccgt ttcttatatt ggttcacttt gttgcagctt acagtatctt tgtatttttt
+    27781 ttcttgactc tgtttggttt tgcccctccc cccttcacct ccgtaaatgc cacgtatttt
+    27841 aattgatgct attctagtgc cgctttcatt ttcagccaat tttgacaagt aaccatacaa
+    27901 ataattattt aaagttatga cttgacttgt cacccccttt gtttctgtga atagattttc
+    27961 cttcagcgag tgattgtttc tgttttttat atgtttctct tcagtagttt ttttttctgt
+    28021 cttttaaact ccccttctcc tacttctttg tgtctattgt aagtaaaata ttaattgaat
+    28081 aatttcctac ccaaaattat tcaattattc tcatgtaaat ttttatttta agacacaaca
+    28141 ttgcaatggt aactttaaaa caagaaaaaa caaattcaga aagatccaga cgcaattttc
+    28201 tttgccttct cagccttctt ctttttgtac agagagtagt tatgttcggc actcatgtag
+    28261 gcagtttcca agaaaaggaa cacaaattcg caacaggtaa ggaaggatat tccgcaccaa
+    28321 agaccgagtt gtccaccaaa atcggctagc aagttgacaa actagaaata gttgagataa
+    28381 attttttgga ataaaaataa attttttttt gcctgctcgc tagtagtaat tcggcatttt
+    28441 ttcgaatttt tttgatacat ttctttgata cacgttcttt gcctaaattt ttcaactcac
+    28501 cccataagcc tctgattcag tgagcatttc aaaattcaac tgctcgtaga acacttccac
+    28561 cattgctccg ttctctctca aataggccca attttaaagt gtgttttttc ttgaattaac
+    28621 ttacttataa tgcttattac actctaccgc tgtaccatta cacgatccta gttgaatttg
+    28681 caaagataac gacggccact ttgccggcga gtatgtaacg gagtagattg actggcggca
+    28741 tggttgttgg catcttaaaa ttgaaattgc agttttttat ttaaaaagta tataacaatt
+    28801 caaaagttac ctgcaacgga aagatccgtg taggcctccc aagtcattca ttctggcgtc
+    28861 aagacatttt cctgcagaaa aaaatattta tggagatgtc aaatttgaaa ctaacttgca
+    28921 atagggtctg ctgcatcgca atgccgtgca ttttcaggga ctgggaaacg tggatctcca
+    28981 catctgcact ctttcagcac gagttgttgg aagcaagaac ggtagcagcc ctggaatagt
+    29041 tccctctgtt atcatattaa gttctatatt taattgctat tttaacaaga atcacaaagt
+    29101 ttctatgacc catttttcct actaaacttg cttaaattct gtttactatt cacctctacc
+    29161 gaatattcat aattgctgta aatatagtcc gatgttttgc catctggcac acaatctcca
+    29221 taaggtgctg gcaaacgtga catctttcgc tgaaaacaac aacaccatta ctctataaaa
+    29281 agagtttgaa tttctaatac caatcgtaat ccaaatgagg atacatatcc agttggagca
+    29341 gaataaccga acgtatcagg aaatgggaaa tcttctttgt catgaatagt caaacgaacg
+    29401 cctgtggctt ccgtggttgg catatagtca gacgcgttta cataaaccag catacgtaat
+    29461 ccgtacatgg gacctggaat cggaacacgt tttttggttt atacataacg gacttcaaaa
+    29521 attattttag aaaaaaatat gtggaaaaaa ttttgaggaa actaacctgc tcgaatacta
+    29581 gtcaagttta ctgttcgatt atgattgaag gtaaagcacg aaccaaacgc agggtcaata
+    29641 tgagtcagaa aatctctgaa taattgaatt tatatcatat ctgaaagcct aactaacgct
+    29701 tcgatatcac acgcttttcc gttaaacgag cacttgtgga caagttccct ttttgtagta
+    29761 cttagccgtt ccctatcttg cattgacaag gtagccattg caaacatgat gttttccttg
+    29821 gctttagtga caattgcaac ttcatctgtc attccctgaa acttttcagt ttccacttga
+    29881 tttatatatt tgtgattgtt acctgaaatc ccatagcttc aaggaagttt ggatcttcag
+    29941 ttggagttcc tccttgaaga tatgtccaaa tttcaattgg tggcgtcttt ccgttgtatg
+    30001 ctacacagaa tctagatgga gcacaaatac atggggatct atggcccttc gctgaaaaat
+    30061 tccgatttag aaaaaaacaa caataaaaat taagaagagt aaaatcaaac aagtgaagct
+    30121 agtttggcaa agtgattgaa aaacctacca gttttgttat ctttggtgca gaaagcatgt
+    30181 tcattgcaag tatcacattc ggtggtttcc caggttccgt tcagaaaaca gggccacgca
+    30241 tcatgagttg atctatcgaa agcgcaaata caccgatcgt caaatcccgt cggctcgtcg
+    30301 aattttgttc tgaacgagaa aaaagatttg tggagaacat ttcgaaacgt ctcattatga
+    30361 acttctgtat tttggaacca tatgaatttc aaatttatgg aaggcaaaat tgagccttct
+    30421 caaaaattgg acttatcaaa atggacaaac tttaccgtac ctcaaaattg taagactaga
+    30481 ctttttcaaa cacttttttg tcaaagttgt tagactagac tttacctctc tgattgacat
+    30541 tcttccatca tattcattcc agtagttgct tccacatcgt aattctcata atgctcattg
+    30601 tcatattctt ccattccatc ccattcctca tcggcatctg tgttcaaaag aatgttttac
+    30661 gcgatgctca tttgacatgc aacactcacc attaatatta aatacctttt tggtagttgt
+    30721 ttcaagtaac tcttcctcct tctcctcatc cttatcctct tgctcactag acccttggct
+    30781 tccacaaagg catcttgcaa atcctggctc aaaaaatgct ccagataaat cacgttttcc
+    30841 tcgacgacgt gccggttttg tggtgggtgc aggggtggtg ggtggttcgg tgacaacttc
+    30901 gcgttcctct tcgtgatctt tgtttcctcc ggcttttccc attgctccat caaatgctga
+    30961 caacgttcgc tttactaaat ccacgcttgt tgctaaactt gctttgtaag gattcaaatt
+    31021 acaaagcgta attgctggaa aaggtgcagt gtctggaaat aaaaaagctc agtaaggagc
+    31081 acatgatttt tcaaactacc aaatttcaat tgaatatcga caattttctc attcctgttg
+    31141 tatttgtcaa gcacagattg agcattaaga tagagcatta tcatacatcc aagaaaaagt
+    31201 acgacccaaa ctgctcgata atatacgttt ggagcttcac caatcatggg aattccgtga
+    31261 gcagatgttt tgtagcaaaa ctctttgaaa tgccaggcta gacgtttgtc atagggtagc
+    31321 aattttggat cgaattctcc cgtaataagt tcacactgaa attcggaaat gttagattac
+    31381 tcagctgcga ctttttgcaa gttttatttt ttttaccgtt tattgaacat tttaagcgtt
+    31441 ttgattatat tgttccaaaa caaaaaatct tggtttttaa ttttggtaca gttttgcaga
+    31501 taagctatca aagtaaaata aacaagttga ttcagcaaaa acttagtagt tataaagaca
+    31561 aaatacttat cgaagaatta agaaaatacg ggccttcaaa tttacagttg aaaaacttca
+    31621 atgtatataa cgaatatttc aaaaactggg aacaaaatac aattgcatac aaaaataaaa
+    31681 tttaccgttg caatttgaaa attacgtaaa ctcacttgta cttctgattc tgtagagttg
+    31741 aaacattcgc cataaccaaa atcttcataa tattctcttt ctgtcacaaa tttagtcgtc
+    31801 tcttgtaagt acgctaacgg gtctccataa acctgggaca tgtactcgga tgggtcccga
+    31861 aggtgttggt agtttttcag gttttgcatc catgacattc tataacttga tagcgataaa
+    31921 aaaaatagca t 
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/U71225.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/U71225.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/U71225.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,58 @@
+LOCUS       U71225                  1164 bp    DNA     linear   VRT 27-NOV-2001
+DEFINITION  Desmognathus quadramaculatus 12S ribosomal RNA gene, partial
+            sequence; tRNA-Val gene, complete sequence; and 16S ribosomal RNA
+            gene, partial sequence, mitochondrial genes for mitochondrial RNAs.
+ACCESSION   U71225
+VERSION     U71225.1  GI:2804359
+KEYWORDS    .
+SOURCE      mitochondrion Desmognathus quadramaculatus (black-bellied
+            salamander)
+  ORGANISM  Desmognathus quadramaculatus
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Amphibia; Batrachia; Caudata; Salamandroidea; Plethodontidae;
+            Desmognathinae; Desmognathus.
+REFERENCE   1  (bases 1 to 1164)
+  AUTHORS   Titus,T.A. and Larson,A.
+  TITLE     Molecular phylogenetics of Desmognathine salamanders (Caudata:
+            Plethodontidae): A reevaluation of evolution in ecology, life
+            history, and morphology
+  JOURNAL   Syst. Biol. 45, 451-472 (1996)
+REFERENCE   2  (bases 1 to 1164)
+  AUTHORS   Titus,T.A.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (19-SEP-1996) Biology, University of Oregon, Eugene, OR
+            97403, USA
+FEATURES             Location/Qualifiers
+     source          1..1164
+                     /organism="Desmognathus quadramaculatus"
+                     /organelle="mitochondrion"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:52105"
+     rRNA            <1..638
+                     /product="12S ribosomal RNA"
+     tRNA            639..706
+                     /product="tRNA-Val"
+     rRNA            707..>1164
+                     /product="16S ribosomal RNA"
+ORIGIN      
+        1 ggcccaaagg gtagttttag gtgaaataaa atagaattta aaatttatct agtagttata
+       61 tataaacata aaatgtaaaa tcaaaaacga aagtcatact atataacctt gaatctacta
+      121 cagctgagaa acaaactagg attagatacc ctactatgct caactttaaa atggaccttc
+      181 ccgccagagc actacgagcc acagcttaaa actcaaagga cttggcggtg ctctacaccc
+      241 acctagagga gcctgttcta taatcgacac tccccgataa acctcaccac ctcttgctaa
+      301 tacagcctat ataccaccgc cctcagttca cccttcaaaa gaataatagt gaacaaaata
+      361 atttaaaata aaaaagtcag gtcaaggtgc agcaaatgaa gtggaaagaa atgggctaca
+      421 ttttttatag taaaaaatac ggaatattct atgaaataaa atataaagga ggatttagaa
+      481 gtaaaaagaa aaaagagtgt tctttttaaa ttggcaatag agcacgcaca caccgcccgt
+      541 caccctcttc aaaattaaat aaactaaata aatatataaa tttataagaa aaggtaagtc
+      601 gtaacatggt aagtctaccg gaaggtggcc ttggatatcg aagtatagct taaataaagc
+      661 attttgctta caccaaaaaa atatttgtta acccaaatta ccttaaattt taaatctatg
+      721 ctaaatataa aatactactt cctaatacac aaaacattat tatatgatag tacgggcgac
+      781 agaaaactta ttagcgcaat agaaaaagta ctgtaaagga aagatgaaat aaaattgaaa
+      841 taaaataaaa atataaaaga gcaaagatta taacttttac ctttagcata atggtctagc
+      901 cagtctatat taacataaag aattttagtt atataccccg aaaccaggcg agctacccta
+      961 aaacagcaat atatgagcga actcttctct gtggcaaaag agtgagaaga atttttggta
+     1021 gaggcgaaaa accaaacgag cccggatata gctggttact tgagaatgaa ttttagttca
+     1081 attaaaagca taaatattat aaaaacataa cgcttttatt ataattaatt gaggtacagc
+     1141 ccaattaata aaggaaacaa ccta
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/U83300.bsml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/U83300.bsml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/U83300.bsml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?format DECIMAL="."?>
+<!DOCTYPE Bsml PUBLIC "-//EBI//Labbook, Inc. BSML DTD//EN" "http://www.ebi.ac.uk/xembl/dtd/BSML2_2.DTD">
+<!-- The BSML specification was created by Joseph H. Spitzner, Ph.D., LabBook, Inc. http://www.labbook.com -->
+
+<Bsml>
+ <Definitions>
+  <Sequences>
+   <Sequence id="MIVN83300" ic-acckey="U83300" title="MIVN83300" comment="Veniliornis nigriceps strain LSU1305 cytochrome b gene, mitochondrial gene encoding mitochondrial protein, partial cds. " length="946" topology="linear" molecule="dna">
+    <Attribute name="version" content="U83300.1" />
+    <Attribute name="organism-species" content="Veniliornis nigriceps (bar-bellied woodpecker)" />
+    <Attribute name="organism-classification" content="Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Archosauria; Aves; Neognathae; Piciformes; Picidae; Veniliornis" />
+    <Attribute name="source" content="Veniliornis nigriceps LSU1305" />
+    <Attribute name="date-created" content="14-MAY-1997" />
+    <Attribute name="date-last-updated" content="4-MAR-2000" />
+    <Attribute name="database-xref" content="UniProt/TrEMBL:O03345" />
+    <Attribute name="database-xref" content="GOA:O03345" />
+    <Feature-tables>
+     <Feature-table>
+      <Reference>
+       <RefAuthors>Moore W.S., DeFilippis V.R.</RefAuthors>
+       <RefTitle>The window of taxonomic resolution for phylogenies based on mitochondrial cytochrome b</RefTitle>
+       <RefJournal>(in) Mindell D.R. (eds.). AVIAN MOLECULAR EVOLUTION AND SYSTEMATICS:81-116. Academic Press, Inc., San Diego, CA, USA (1997)</RefJournal>
+      </Reference>
+      <Reference>
+       <RefAuthors>Moore W.S., DeFilippis V.R.</RefAuthors>
+       <RefJournal>Submitted (27-DEC-1996) to the EMBL/GenBank/DDBJ databases. Biological Sciences, Wayne State University, Biological Sciences Building, Detroit, MI 48202, USA</RefJournal>
+      </Reference>
+      <Feature id="FTR_U83300.1_0" class="SOURCE" value-type="source" title="source" display-auto="1">
+       <Qualifier value-type="strain" value="LSU1305" />
+       <Qualifier value-type="organelle" value="mitochondrion" />
+       <Qualifier value-type="organism" value="Veniliornis nigriceps" />
+       <Qualifier value-type="db_xref" value="TAXONOMY:56076" />
+       <Interval-loc startpos="1" endpos="946" />
+      </Feature>
+      <Feature id="FTR_U83300.1_1" class="CDS" value-type="cds" title="CDS" display-auto="1">
+       <Qualifier value-type="product" value="cytochrome b" />
+       <Qualifier value-type="codon_start" value="1" />
+       <Qualifier value-type="translation" value="XFGSLLGICLMTQIVTGLLLATHYTADTTLAFSSVAHTCRNVQYGWLIRNLHANGASFFFICIYLHIGRGFYYGSYLFKETWNTGVILLLTLMATAFVGYVLPWGQMSSWGATVITNLFSALPYVGQTIVEWAWGGFSVDNPTLTRFFXLHFLLPFLIXGLTLIHFTFLHESGSNNPLGIVSDXDKIPFXPYFSXKDILGFMFMLLPLVXLALFSPNLLGDXENXTPANPLVTPPHIKPEWYFLFAYAILRSIPNKLGGVLALAASVLILFLAPLLHTSKQRTMAFRPFSQLLFWMLVANLLILTWIGXQPXEHP" />
+       <Qualifier value-type="db_xref" value="GOA:O03345" />
+       <Qualifier value-type="db_xref" value="UniProt/TrEMBL:O03345" />
+       <Qualifier value-type="db_xref" value="PID:AAB53618.1" />
+       <Interval-loc startpos="1" endpos="946" startopen="1" endopen="1" />
+      </Feature>
+     </Feature-table>
+    </Feature-tables>
+    <Seq-data>aantttggatctctcctaggcatttgcctaataacacaaattgtcacagg
+cctcctgcttgccacccactacactgccgacacaaccctagccttttctt
+ccgtcgcccatacatgccgcaacgttcaatacggctgactaatccgtaac
+ctccatgccaacggggcctcattctttttcatctgcatctacttacacat
+cggacgtggattctactacggatcctacttatttaaagaaacttgaaaca
+cgggagtcatccttctcctcaccctcatagccaccgccttcgtcggctac
+gtcctcccctgaggacaaatatcatcctgaggagcaaccgttattacaaa
+tttattctcagccctcccctacgtaggacaaactatcgtcgaatgagcct
+gaggaggattctctgtagacaaccccactctcacccgattcttcgnccta
+cactttctcctcccattcttaattgnaggactcaccctaattcacttcac
+tttcctccacgaatccggntcgaacaatcccctcggaatcgtatccgaca
+gngataaaatcccctttcanccctacttctccntaaaagatatcctagga
+ttcatattcatactcctccccctcgtnnccctagcnctattctcacctaa
+cctcctaggagaccnggaaaatttnacgcccgcaaaccccctagtnacac
+ccccccacatcaaaccagaatggtacttcctatttgcatatgctatccta
+cgctcaatccccaataaactaggaggagtcctagccctagctgcctcagt
+cctaattctattcctagcccccctccttcatacatccaaacaacgcacga
+tagccttccgacccttttcccaactcctattctgaatactagtcgccaac
+ctcctcatcctcacctgaatcggnagncaaccagnagaacatccct
+</Seq-data>
+   </Sequence>
+  </Sequences>
+ </Definitions>
+</Bsml>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/UnaSmithHIV-both.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/UnaSmithHIV-both.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/UnaSmithHIV-both.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1883 @@
+#NEXUS
+[Complete genomes of HIV-1, all "non-recombinant".]
+
+begin data;
+  dimensions ntax=10 nchar=9281;
+  format datatype=dna gap=-;
+  matrix
+'A1.BY.97.97BL006'
+AYRGGTGCGAGAGCGTCAGTATTAAGCGGGGG---AAAATTAGATGCATA
+GGAAAAAATTCGGTTAAGGCCAGRRGGAAAGAAAAAATATAGAATAAAAC
+ACCTAGTATGGGCAAGCAGGGAGCTGGAAAGATTCGCGCTTAACCCTGGC
+CTTTTAGAAACATCAGAAGGATGTCAACAAATACTGGAACAGTTACAACC
+AACTCTCAAGACAGGATCAGAAGAACTTAAATCATTATATAATACAGTAG
+CAACTCTCTATTGTGTACATCAACGGATAGAGATAAAAGACACCAAGGAA
+GCTTTAG---AT--AAA-ATAGAGGAAATACA------------AAATNA
+NAGCAAG------------CAAAAGACC---------CA---ACAG----
+--------------GCA---GCA---------------------ACT---
+------------------------GGCACAGGA-----------------
+-------AGCAGCAGC------------AAG------GTCAGTCAAAATT
+ACCCCATAGTRCAAAATGCACAAGGGCAAATGACACACCAGTCCATGTCA
+CCTAGGACTTTGAATGCATRGGTGAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTTTCAGCATTGTCAGAGGGAGCCACCCCAC
+AAGATTTAAACATGATGCTGAATATAGTGGGGGGACACCA---GGCAGCT
+ATGCAAATGTTGAAAGATACCATCAATGAGGAAGCTGCTRAATRGGATAG
+GTTACATCCAGCACAGGCAGGGCCTTTTCCACCAGGGCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCTTCAAGAACAAATA
+GGATRGATGACAAGCAACCCACCTATCCCAGTRGGAGACATCTATAAAAG
+ATRGATAATCCTAGGATTAAATAAGATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTAGACATAAGACAAGGGCCAAAAGAACCCTTCAGAGATTATGTA
+GATAGGTTCTTTAAAACTCTTARAGCTGAGCAAGCTACACAGGATGTAAA
+GAACTGGATGACAGAAACCCTGCTGGTCCAAAATGCAAATCCAGATTGTA
+AAACCATTTTAAGAGCATTAGGATCAGAGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAARGGTTTTRGC
+CGAGGCAATGAGTCAAGT---AC---AAA---A---TG--C---A-----
+-----------------AACATA---ATGATGCAGAAAAGTAATTTTAGG
+GGCCCAAAA---AGAATTAAGTGTYTCAACTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAARGGCTGTTGGAAATGTGGAA
+AGGAAGGCCATCAAATGAAAGACTGCACT------GAGAGACAGGCTAAT
+TTTTTAGGAAGAATTTGGCCTTC---CAGCAAAGGG---AGGCCAGGAAA
+TTTTCCTCAGAGCAGA---------------CCAGAGCCATCAGCCCCA-
+-----------------------------------CCA---GCAGAAAAC
+TTTAGGATGGGGGAAGAGATA---------------ACCCCCTCCCTG--
+----AAACAGGAACAG------------AAAGA---CAGGGAACAGTAT-
+-----CCT------CCTTCAATTTCCCTCAAATCACTCTTTGGCAACGAC
+CCCTTGTCACAGTAAGAATAGGAGGACAGCTAAAAGAAGCTCTATTAGAT
+ACAGGAGCAGATRATACAGTATTAGAAGACATAAATTTGCCAGGAAAATG
+GAAACCAAAAATGATAGGRGGAATTRGAGGTTTTATCAAGGTAAGACAGT
+ATGATCAGATACTTGTAGAAATTTGTGRAAAAAAGGCTATARGTACGGTA
+TTAGTAGGACCTACCCCTGYCAACA---TAATTRGAAGAAATATGTTGAC
+TCAGCTTGGTTGTACTTTAAATTTTCCAATAAGTCCTATTGAAACTGTAC
+CAGTAACATTAAAGCCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTAACAGAAGAGAAAATAAAAGCATTAMCAGACATTTGTAAG-----
+-GAGATRGAAAAGGAAGGAAAAATTTCAAAAATTRGGCCTGAAAATCCAT
+ACAATACTCCAGTATTTGCTATAAAGAAAAAGGACAGCACTAAGTGGAGG
+AAATTAGTASATTTCASRGAGCTCAATAAAAGAACTCAGGACTTTTSRGA
+AGTTCAATTAGGAATACCCCATCC---AGCGGGTTTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTAGATGTGGGGGATGCATATTTTTCAGTACCTTTA
+GATGAAAGCTTCAGAAAGTNTACNGCATTCACTATACCAAGTGTAAACAA
+TGAGACACCARGGATCAGATATCAGTACAATGTACTTCCACAGGGATGGA
+AAGGATCACCATCAATATTCCAGAGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATTAAAAAATCCAGAAATAGTTATCTATCAATACATRGATGA
+CTTGTATGTAGGCTCTGATTTAGAAACAGGGCAACATAGAACAAAAATAG
+AGGAGTTAAGAGCTCATCTATTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATWRGATATGAANTCCA
+TCCTGACAAATGGACAGTCCAGCCTATAATGCTGCCAGATAAAGACAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTRGGAAAACTAAATTGGGCA
+AGTCAGATTTATCCAGAGATTAAAGTAAGGCAATTGTGTAAACTCCTTAR
+RGGAGCCAAAGCACTGACAGATATAGTGACACTGACTGAGGAAGCAGAAT
+TAGAATTGGCAGAGAACA---GAGAGATTCTAAAARAACCTGTGCATGRA
+GTATATTATGACCCATCAAAAGATTTAGTAGCAGAAATACAGAAACAAGG
+ACAAGACCAATGGACATATCAAATTTATCAGGAGCCATTTAAGAATCTAA
+AAACAGGAAAATATGCAAAAAARRGGTCTGCTCACACTAATRATGTAAAA
+CAATTAACAGCAGTGGTGCAAAAAGTGGCCACAGAAAGCWTAATACTATR
+GGGAAAGA---CTCCTAAATTTAGACTAC--CCCATACAAAARGAAACAT
+RGGAAGCMT--TGGTRGATGGARTWCTGGCAGGCTACCTRGATTCCTGAA
+TAGGAGTTTGTCAATACCCCTCCTCTAGTAAAACTATGGTACCAGTTGGA
+GAAAGAACCCATAGTAAGAGCAGAGACTTTCTATGTAGATAGGGCAGCTA
+ACAGGGAGACTAAGATAGGAAAGGCAGGGTATGTTACTGACTGAAGAAGA
+CAAAAGGTTGTTCCTCTAACTGAGAC--CAACGAATCAAAAGACTGAATT
+ACATGCAATCCATCTAGTTTTGCAGGATTCAGGATCAGAAGTAGATATAG
+TA--AACAGACTCACAATATGCATTAGGAATTAT--TTCAGGCACAACCA
+GACAGGAGTGAATCAGAGATAGTCAATAAAATAATAGAGAAACTAATAAA
+AAAAGAAAGWGTCTACCTGT--TCWTAGGTACCAGCGCACAAGAGGATTR
+GAGRAAATGAACAAGTAGATAAATTAGTCAGTAATRGAATCAGGARGGTG
+TTATTTTTAGAARRGATAGATAAGGCTCAAG---AARAACATGAAAAATA
+T--TCACAGCAATTGAAAAGCAATGGCTAGTGATTTTAATCTGCCACCTA
+TAGTAGCAAARGAAATAGTAGCCAGCTNTGATAAAT---GTCA---ACTA
+AAARGGGAAGNTATGCATRGACAGGTAGACTGTAGTCCARRGATATGGCA
+ATTAGATTGCACACATCTARAARGAAAAGTAATCATAGTRGCAGTCCAYG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAG
+GAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGATAATGGCCCCAATTTCACCAGCAGTGCAGTTAAGGCTG
+CCTGTTGGTAGGCAAATATCCAACAGGAATTTARGATTCCCTACAATYCC
+CAAAGTCAAGGAGTAGTGGAGTCTATGAATAARGAATTAAAGAAAATCAT
+AAGGCAGGTAAGAGAGCAAGCTGAATACCTTAAGACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGNGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAA
+AGAATTACAAAAACARATTACAAAAATTCAAAATTTTYSGGTTTATTACA
+GAGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTRGAAA
+GGTGAAGGGGC---AGTGGTAATACAGGACAATNACGATATAAAAGTAGT
+ACCAAGAAGAAAAGCAAAGATCATTNGGGATTNTRGAAAACAGATGGCAG
+GTGNTGATTGTGTGGCAAGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTTTAGTAAAACATCATATGTATGTCTCAAAGAAAGCTAGAGAGTAGG
+TTTATAGACATCACTATGAAAGCAGGCAGCCAAGAGTAAGTTCAGAAGTA
+CACATCCCACTAGGGG---ATG---CTAGGCTAGTAGTAAAAACATATTR
+RGGTCTGCATGCAGRAGAAAAAGACTGGCAATTAGGTCATRGGGTCTCCA
+TAGAATRGAGACAGGAAARGTATAGCACACAA------ATAGACCCTGAC
+CTGGCGGACCAACTAATTCATCTGTATTATTTTGACTGTTTTTCAGAATC
+---TGCCATAAGGAAAGCCATAGTAGGACACATAGTTAGCCCTAGGTGTA
+ACTATCCAGCAGGACACA---ACAAGGTAGGATCTCTACAATACTTGGCA
+CTGAAAGCATTAGTAACACC---AACAAGGGAAAGGCCACCTTTGCCTAG
+TGTTAGGAANCTAACAGAGGATAGATRGAACAAGCCCCAGAAGACCAGGG
+GCCGCAGARGGAACCACACAATGAATRGATGTTAGAACTGTTAGAAGATA
+TTAAGCATGAAGCTGTCAGACATTTTCCAAGGCCRTGGCTCCATGGATTA
+GGACAACATATCTATAACACCTATRGGGATACTTRGGAAGARGTTGAAG-
+--CTATAATAAGAACTTTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTARGTGCC---AACATAGCAGA--AATAGGCATT---GTGCAAARG
+AGAAGAGTCA----------GGAATRGATCCAGTAGATCCTAACCTAGAG
+CCCTRGAATCATCCRRGAAGTCAGCCTAAAACTGRTTGTAG--GCAATTG
+TTACTGTAAAAAGTGTTGCTGGCATTGCCAA---------------ATTT
+GCTTTCTAAAA--AAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGGC------A---CAGACGCGGA------A---CTTCTCACAGCAGTAA
+GGATCATCANATTCCTATATCAAAGCAGTAAGTAC--TA-----------
+-AATAAATGTA---ATG---------------------------------
+---------ACACCTTTAR---------------AAATTTATGCAATAGT
+A---GCATTAGTAGTAGT---GT--TTGTTA-TAGCNNTAGTTGTGTGGA
+CTATAGTAGGTATARAATAT---ANNN------------NATTGCTAAAA
+CAA---AGAAAAATAGACAGGTTARTTGAGAGAANNNNAGANAGAGCAGA
+AGACAGTGGCAATGAAAGCGAGGGGGATGCARAGGAATTATCAACACTT-
+--ATGGAGGTG------------------------RGGAACTATG---CT
+CTTTTGG---ATGATAATAAT------------GTGT------AAGGCTG
+C---------AGA---A------------G---A------CTTG------
+TAGGTCACRGTATACTATARGGTACCTGTGTRGARAGATGCAG---CG--
+ACCACCC-TATTTTGTGCATCAGATGCTAAAGCAYATGATAAAGAAGTAC
+ACAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCTGACCCACAA
+GAAATAATTTTAGGAAATGTGACAGAAAAATTTGACATGTRGAAAAATAA
+CATRGTAGAACAAATGCAAACAGATATAATCAGTCTCTAGGACCAAAGCC
+TAAAGCCATGTGTAAAGTTAACCCCTCTCTGCGTTACTTTAAATTGTGCT
+G---AAC---CC--AAC---------------------------------
+-----------AG--CACT---AG---ATC-TA-ACA---AC--------
+-------A-GTAGCGTT---AA--C-------------------------
+--------------------------AGC---AA--C---AG--C-----
+AGC---GATAGCTTGTTT-RAA-RA-AATGA-AGAACTGCTCTTTCAACA
+TGACCACAGAACTAAGAGATAAAAGGAAAACTGTACATTCAC---TTTTT
+TATAAACTTGATATAGTATCAACTAGTAAT--------------------
+------------------AA--TGATAGT---RGG---------------
+---------------------C---AG--TATAGA-C-TAATAAATTGTA
+-ATACATCAGCCATGACACAGGCCTGTCCTAARGTAACCTTTGAGCCAAT
+TCCTATATATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAGTGCAARG
+ATACAAATTTTACTAGAACARGGCCATGCAAGAATGTCAGCACAGTACAA
+TGCACACATRGAATCAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGG
+CAGTCTAGCAGAG---AAAGA---RGTAATGATTAGATCTGAAAATATCA
+CAGACAATGTCAAAATCATAATAGTACAGCTTACTGAGCCTGTAAACATC
+ACTTGTATCAGACC------T------GGCAACA---AT---ACAAGAAC
+AAGTATACGTATA---------GGACCAGGACAAACCTTCTATGCA---A
+CAGGTGAT------GTAATARRGGACATAAGAAAAGCATATTGTAATGTC
+AGCAGAGCAGCATRGAATAGCACTTTACAAAAGATAAGT---AC--ACAA
+TTAAGAAAA------TACTTT---A----AT--AAC------AAAACA--
+----ATAATCTTTAAG----AGCTC---CACAGGARAGGATTTAGAAGTT
+ACAACAC------ATAGTTTCAATTGTGGAG---GAGAATTTTTCTATTG
+CAATACAACAGACCTGTTCAAT---AGCACT---TR---G----------
+------GAT-----------------------------------GGCACT
+G---TCACA-----------------AAT---AG-CAC---A---A----
+--AG--GC--CAA---T---------------GGAACTATAACT------
+---CTACCATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+GTAGGACAAGCAATGTATGCCCNTCCTATCAAARGAAGTATAAGGTGTGA
+ATCAAACATTACAGGACTACTACTAACAAGAGATGG---TRGAGG---TR
+GA------ACT---------------AAT--NGCA---GC--AATGAGAC
+CTTCAGACCTATAR-GAGGAG-ATWTGAGGAACAATTGGAGAAGTGAACT
+ATATAAGNATAAAGTAGTAAAAATTGAACCAATARRAGTAGCACCTACCA
+GGGCAAAGAGAAGAGTRGTRGA------GAGAGAAAAAAGAGCAA---TT
+GGA-------CTARGAGC---TG--C---CTTCCTTARGT---TCTTAGG
+----AG-CAGCAGRAAG-TACTATRGGCGCGGCGTCAATGACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAGCAATTTG
+CTGARGGCTATAWARGCTCAGCAGCAYCTGCTGAAACTCACGGTCTRGGG
+CATTAAACAGCTCCAGGCAAGARTCCTGGCTGTGGAAAGRTACCTAAAGG
+ATCAGCAGYTCCTAAGAATTTGRGGTTGCTCTARAAAACTCATCTGCACC
+ACTAATGTGCCCTRGAAYTCTAGTTRGAG---T---AAT-----------
+-AAATYTCAGAG---TGAGATAT---ARG---ATAACATGACCTAGATGC
+AATAGGACAARGAAGTTATCAATWACACAGACATAATATATGATCTAATT
+RAAAAATCGCAAAACCAGCAGGAAAAGAATGARCAAGATTTATTGGCATT
+AGATAAGTAGGCAGGTCTGTRRAGTTRGTTWGACATATCAAATTGGTTAT
+RGTATATARAAATATTTATAATAATAGTAGGAGGCTTAATARGATTAAGA
+ATAATTTTTGCTGTGCTTTCTATAATAAATAGAGCCAGGCARRGATACTC
+ACCCTTGTC---ATTGCAGACC---CTTACCC---CACACCCAGAAAGA-
+-----CCAGACAGGCCCRGAAGAATCAAAGAAGAAGGTRGAGAGCAAGGC
+AGAGACAGATCAATTCGATTAGTAAGCGGATTTTTAGCACTTGCCTRGGA
+CGATCTACRGAGCCTGTGTCTCTTCAGCTACCACCGATTGAGAGACTTCA
+TCTCGATTGCAGCGAGGACTGTRGAACTTCTGAAA---CGCAGCAGTCTC
+AARGGACTGAGACTGRGGTARGARGGCCTCAAATATCT---RRGGAATCT
+T---CTRGGATATTRRGGTCAGGAACTAAAGAGTAGTGCTATTAATCTGA
+TAGATA---CCATAGCAATAGCAGTAGCTRGGTRGACAGATARGGTTATA
+GAAATAGGACAAAGATTTTGTAGAGCTATTCGTAACATACCTAGGAGAAT
+CAGACARGGCGCAGAAAAAGCTTTGCAATAA
+'A1.KE.00.KER2008'
+------GCGAGAGCGTCAGTATTAAGTGGGGG---AAAATTAGATGCATG
+GGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAAC
+ACTTAGTATGGGCAAGCAGGGAGCTGGAAAAATTCGCACTTAACCCTGGC
+CTTTTAGAAACTTCAGAAGGATGTCAGCAAATAATGAACCAAATACAACC
+AGCTCTTCAGACAGGAACAGAAGAACTTAGATCATTATTTAATGCAGTAG
+CAACCCTCTATTGTGTACATCAGCGGATAGAGGTAAAAGACACCAAGGAA
+GCTTTAG---AT--AAA-GTAGAGGAAATACA------------AAACAA
+GAGCAAA------------CAAAAGACA---------CA---ACAG----
+--------------GCA---GCA---------------------GCT---
+------------------------GATACAGGA-----------------
+-------AACAACAGC------------AAG------GTCAGCCATAATT
+ACCCTATAGTGCAAAATGCACAAGGGCAAATGATACATCAGTCCTTATCA
+CCAAGGACTTTGAATGCATGGGTAAAGGTAATAGAAGAAAGGGGTTTTAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTATCAGAAGGAGCCACCCCAC
+AAGATTTAAATATGATGCTGAACATAGTGGGGGGACACCA---GGCAGCT
+ATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGACAG
+ATTACATCCAGCACAGGCAGGGCCTATTCCACCAGGCCAGATAAGAGACC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGCACCCCTCAAGAACAAATA
+ACATGGATGACAAACAACCCACCTATCCCAGTGGGAGACATCTATAAAAG
+ATGGATAATCCTAGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTCTGGATATAAAACAAGGGCCAAAAGAACCCTTCAGAGACTATGTA
+GATAGGTTCTTTAAAGTTCTCAGAGCCGAACAAGCTACACAGGAAGTAAA
+AGGTTGGATGACAGAGACCCTGCTGGTTCAAAATGCAAATCCAGATTGTA
+AATCCATCCTAAGAGCATTAGGAACAGGGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGGGTTTTAGC
+TGAGGCAATGAGTCAAGC---AC---AAC---A---GG--C---A-----
+-----------------AATGTA---ATGATGCAGAGGGGCAATTTTAAG
+GGCCAGAAA---AGAATTAAGTGTTTCAACTGTGGCAAAGAGGGACACCT
+AGCCAGAAATTGCAGAGCCCCTAGGAAAAAAGGCTGTTGGAAGTGTGGGA
+AAGAAGGACACCAAATGAAAGATTGCATT------GAGAGACAGGCTAAT
+TTTTTAGGGAAAATTTGGCCTTC---CAGCAAAGGG---AGGCCAGGGAA
+TTTTCCTCAGAGCAGA---------------CCGGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TTTGGGATGGGGGAAGAGATA---------------ACCTCCCCTCCG--
+----AAGCAGGAGCAG------------AAAGA---GAGGGAACAAACC-
+-----CCA------CCCTTTGTTTCCCTCAAATCACTCTTTGGCAACGAC
+CCGTTGTCACAGTAAAAGTAGGAGGAGAGATGAGAGAAGCTCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGATATAAATTTGCCAGGAAAATG
+GAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAGGTAAAACAAT
+ATGATCAGGTATCTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAATATGTTGAC
+TCAAATTGGTTGTACTTTAAATTTTCCAATTAGTCCTATTGAAACTGTAC
+CAGTAACATTAAAGCCAGGAATGG---ATGGCCCAAGGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTGACAGAAATTTGTCAA-----
+-GAGATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCAATAAAGAAAAAAGATAGCACTAAATGGAGG
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGA
+AGTTCAATTAGGGATACCGCACCC---AGCGGGACTAAAAAGGAAAAAAT
+CAGTAA---CAGTACTAGATGTGGGGGACGCATATTTCTCAGTTCCCCTA
+GATAAAAACTTTAGAAAGTATACTGCATTTACCATACCTAGTTTAAATAA
+TGAAACACCAGGAATTAGGTATCAATACAATGTGCTTCCACAAGGATGGA
+AAGGATCACCAGCAATATTCCAGTGCAG---TATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATATATGGATGA
+CCTGTATGTAGGATCAGATTTAGAAATAGGGCAGCATAGAGCAAAAATAG
+AAGAATTAAGAGCTCATCTACTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACAGTCCAGCCTATAGAGCTGCCAGAAAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGAAAACTAAATTGGGCC
+AGTCAAATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTTCTTAG
+GGGAGCCAAAGCACTAACAGATATAGTAACACTGACTGAGGAAGCAGAAT
+TAGAATTAGCAGAGAACA---GGGAGATTCTAAAAGACCCTGTACATGGA
+GTATATTATGACCCATCAAAAGACTTAACAGCAGAAATACAGAAGCAAGG
+GCAAGACCAATGGACATACCAAATTTATCAGGAGCCATTTAAAAATTTAA
+AGACAGGAAAATATGCAAGAAAAAGGTCTGCCCACACTAATGATGTAAGA
+CAATTAGCAGAAGTGGTGCAGAAAGTGGTCATGGAAAGCATAGTAATATG
+GGGAAAGA---CTCCTAAATTTAAACTA---CCCATACAAAAAGAGACAT
+GGGAGACA---TGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAA
+TGGGAGTTTGTCAATACCCCTCCCCTAGTAAAATTATGGTACCAGTTGGA
+GAAAGACCCCATAGCAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCCC
+ATAGGGAGACTAAGCTAGGAAAGGCAGGATATGTCACTGACAGGGGAAGA
+CAAAAGGTTGTTTCCCTGACTGAGA---CAACAAATCAAAAAACTGAACT
+ACATGCAATTTATCTAGCCTTGCAGGATTCAGGATCAGAAGTAAATATAG
+T---AACAGACTCACAGTATGCATTAGGAATCA---TTCAGGCACAACCA
+GACAGAAGTGAATCAGAGATAGTTAATCAAATAATAGAGAAGCTAATAGG
+AAAGGACAAAGTCTACCTG---TCATGGGTACCAGCACACAAAGGGATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGCTCTGGAATCAGGAGGGTA
+CTATTTCTAGATGGGATAGATAAGGCTCAAG---AAGAACATGAAAGATA
+---TCACAACAATTGGAGAGCAATGGCTAGTGATTTTAATATCCCACCTA
+TAATAGCAAAGGAAATAGTAGCCAGCTGTGATAAAT---GTCA---ACTA
+AAAGGGGAAGCCATGCATGGACAAGTAGACTGTAGTCCAGGGATGTGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAG
+GAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAACTTCACCAGCGCTGCATTTAAAGCAG
+CCTGTTGGTGGGCAAATGTCCAACAAGAATATGGAATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTAAAGAAAATCAT
+AGGGCAGGTAAGGGACCAAGCTGAACACCTCAAGACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCATCAGACATACAAACCAA
+AGAACTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACA
+GGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGGAGAAAAGCAAAGATTATCAGGGACTATGGAAAACAGATGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTTTAGTAAAACATCATATGTATATTTCAAAGAAAGCTAAAGGTTGGT
+TTTATAGACATCACTATGAAAGCAGGCATCCAAAAGTAAGTTCAGAAGTA
+CACATCCCACTAGGGG---ATG---CTAAATTAGTAGTAAGGACATATTG
+GGGTCTGCATCCAGGAGAAAAAGATTGGCACTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGCTAAAAAGATATAGCACACAA------GTAGATCCTGAA
+CTGGCAGACCAACTAATTCATCTGCATTATTTTAACTGTTTTTCAGAATC
+---TGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGCCCTAGTTGTG
+AATATCAAGCAGGACATA---ACAAGGTAGGATCTCTACAATATTTGGCA
+CTGAAAGCATTAGTAACACC---AAAGAAGACAAAGCCACCTTTGCCTAG
+TGTTAAGAAACTAGCAGAGGATAGATGGAACAAGCCCCAGAAGATCAGGG
+GCCACAGAGAGAGCCATACAATGAATGGACATTAGAATTGTTAGAAGAGC
+TTAAAAATGAAGCTGTTAGACATTTTCCTGGGCCATGGCTTCATGGATTA
+GGACAATATATCTATCACACCTATGGGGATACTTGGGAAGGAGTTGAAG-
+--CTATGCTAAGAATTTTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCA---CACATAGCAG---AATAGGCATT---ATTCCCAGG
+AGAAGAGCCA----------GGGATGGATCCGGTAGATCCTAACCTAGCG
+CCCTGGCAACACCCGGGAAGTCAGCCTACAACTCCTTGTA---ACAAGTG
+TTTCTGTAAAAAGTGTTGCTATCATTGTCCA---------------GTTT
+GCTTGCTGAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAGGA------A---CTCCTCAAAGCAATAA
+GGATCATCAAAATCCTATACCAAAGCAGTAAGTAC--TTAGTAACT----
+-AATATATGTA---ATGTTG------------------------------
+---------TCTGCTTTAG---------------AAATCTGTGCAATTGC
+A---GGACTGGTAATAGC---TT--TAATCA-TAGCAATAGTTGTGTGGA
+CTATAGTAGGTATAGAATAT---AGGA------------GATTGTTAAAA
+CAA---AGAAAAATAGACAGGTTAATTGAGAGAATAAGAGAAAGAGCAGA
+AGACAGTGGCAATGAGAGTGATGGGGACACAGAGGAATTGGCAGCCCTT-
+--ATTGAGATG------------------------GGGAACTATG---AT
+CTTGGGG---ATGCTAATGAT------------CTGT------AGTGCTG
+C---------AGA---C------------A---A------CTTG------
+TGGGTTACTGTCTATTATGGGGTACCTGTGTGGAAAGATGCAG---AG--
+ACCACCT-TATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAAAAAC
+ATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCAGCCCACAA
+GAAATACCTTTGAAAAATGTGACAGAAAAGTTTAACATGTGGAAAAATAA
+CATGGTAGAACAGATGCATACAGATATAATCAGTCTATGGGACCAAAGCC
+TAAAGCCATGTGTACAGTTAACCCCTCTCTGCGTCACTCTAAATTGTAGT
+G---ACG---TC--ACCACCAATAACACCAAG------------------
+-----------GT--CAAT---AC---CAC-CA-GTG---CC--------
+-------C-CTACCACC---GC--T-------------------------
+--------------------------GCC---AC--T---GT--C-----
+AAC---AATGA------C-AGT-GA-CATGA-AAAACTGTTCTTACAATA
+TGACCACAGAACTAAGGGATAAGAAACAGAAAATGTATTCAC---TCTTT
+TATAGACTTGATGTAGAACTAATAAATAAG--------------------
+-------AATA---AT--AG--TAATAGC---ACT---------------
+---------------------G---AG--TATGTA-T-TAATAAATTGTA
+-AAACCTCAACCCTTACACAGGCTTGTCCAAAGGTATCCTTTGAGCCAAT
+TCCCATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAATGTGTAAGG
+ATAAGGAGTTCAATGGAACAGGGCTATGCAAGAATGTCAGCACAGTACAA
+TGCACACATGGAATCAAGCCAGTAGTGTCTACTCAACTACTATTAAATGG
+CAGTTTAGCGGAA---AAAAG---GGTAATGATTAGATCTGCAAATATCA
+CAGACAATACCAAAAACATAATAGTACAACTTAAGGATCCTGTAGAAATT
+AATTGTACCAGACC------T------AACAACA---AT---ACAAGAAG
+AGGTGTACATATA---------GGGCTAGGGAGAAGATTCTATACA---A
+CA---CAG------GTAGTAGGGGATATAAGACAAGCATATTGTAATGTC
+AGTAAATCAAAATGGGATGATACTTTGAAAAAAGTAGTT---TA--CCAA
+TTAAGAAAA------TACTTT---A----AC-----------AAAACA--
+----ATAATCTTTAAC----TCCTC---CTCAGGAGGGGATGTAGAAATT
+ACAACAC------ATATGTTTACTTGTGGAG---GAGAGTTTTTCTATTG
+TAATACATCAGGCCTGTTCAAT---AGCACT---TG---G----------
+------CCT-----------------------------------GTCAAT
+C---AGGAG---T-C-------A---AAT---AG-CAC---A---A----
+--AG--TC--AAA---T---------------GGCATTATAACT------
+---CTCCGATG-CA-GAATAAAGCAAATCATAAATATGTGGCA---GAGA
+ACAGGACAAGCAATGTATGCCCCTCCCATTCAAGGAATAATAAAGTGTGT
+ATCAAACATTACAGGACTACTGTTAACAAGAGATGG---TGGGAA---TA
+GT-----------------------------ACTG---TC--AATGAAAC
+TTTCAGACCTGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAATT
+ATATAAGTATAAAGTAGTAAAAATTGAACCACTAGGAGTAGCACCCACCA
+AGGCAAGGAGAAGAGTGGTGGG------AAGAGAAAAAAGAGCAG---TT
+GGA-------ATAGGAGC---TG--T---TTTCCTTGGGT---TCTTAGG
+----AG-CAGCTGGCAG-CACTATGGGCGCGGCGTCAATAACGCTGACGG
+TACAGGCCAGACAATTACTGTCTGGCATAGTGCAACAGCAAAACAATTTG
+CTGAGGGCTATAGAGGCTCAACAACAACTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAATGTGCCTTGGAACTCCAGTTGGAG---T---AAT-----------
+-AAGTCTCAGAG---TGAGATAT---GGG---ACAACATGACCTGGCTGC
+AATGGGATAGAGAAATTAACAATTATACAGACATAATATATAGACTACTT
+GAAGATTCGCAAAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGACAAGTGGGCAAGCCTGTGGACTTGGTTTGACCTATCAAACTGGCTGT
+GGTACATAAAAATATTTATAATGATAGTAGGAGGTTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGCTTGCTGTAATAAATAGAGTTAGGCAGGGATACTC
+ACCTTTGTC---GTTCCAGACC---CTTACCC---CAAACCCAAGGGAA-
+-----CTCGACAGGCCCGGAAGAATCGAAGAAGAAGGTGGAGAGCAAGAC
+AGAGGCAGATCGATTCGCTTAGTGAGCGGATTCTTAGCACTTGCCTGGGA
+CGACCTGCGGAGCCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTCA
+TCTTGATTGCAGCGAGGATTGTGGGACTTCTGGGA---CGCAGG------
+---------------GGGTGGGAAGGCCTCAAGTATCT---GTGGAATCT
+C---CTGATATATTGGGGTCAGGAACTAAAAACTAGTGCTGTTAGCTTGC
+TTGATA---CCACAGCAATAGTAGTAGCTGGCTGGACAGATAGGGTTCTA
+GAAATAGGACAAAGAATTGGTAGGGCTTTTATCAACATACCTAGAAGAAT
+TAGACAGGGTTTTGAAAGGGCTTTGCTGTAA
+'A1.KE.00.KER2009'
+------GCGAGAGCGTCAGTATTAAGCGGGGG---AAAATTAGATGCATG
+GGAGAGGATTCGGCTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAAC
+ATTTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTGGC
+CTTTTAGAAACAGCAGAAGGATGTCAGCAAATAATAGAACAGTTACAACC
+AGCTCTCAAGACAGGAACAGAAGAACTTAAATCATTATTCAATGCAGTAG
+CAGTCCTCTATTGTGTACATCAAAGAATAGATGTAAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAAATACA------------AAATAA
+GAGCAAG------------CAAAAGACA---------CA---ACAG----
+--------------ACA---GCA---------------------GCT---
+------------------------GACACAGGA-----------------
+-------AACAGCAGC------------AAG------GTCAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAGGGGCAAATGATACACCAGACCGTGTCA
+CCTAGGACTTTGAATGCATGGGTAAAAGTAATAGAAGAGAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTAGCAGAAGGAGCCACCCCTG
+GAGATTTAAATACGATGCTGAATATAGTGGGGGGACACCA---GGCAGCC
+ATGCAAATGTTAAAAGATACCATCAATGAGGAGGCTGCAGAATGGGACAG
+ACAACATCCAGTACATGCTGGACCTATTCCACCAGGCCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGTACCACTCAAGAGCAAATA
+GGATGGATGACAAGTAACCCACCTATCCCAGTGGGAGAAATCTATAAAAG
+ATGGATAATCCTGGGATTAAATAAGATAGTAAGAATGTATAGCCCTGTTA
+GCATTCTGGATATAAGACAAGGGCCAAAAGAACCCTTCAAAGATTATGTA
+GATAGGTTCTTTAAAACTCTCAGAGCTGAGCAAGCTACGCAGGAGGTAAA
+AAATTGGATGACAGAAACATTACTGGTCCAAAATGCAAATCCAGATTGTA
+AAACCATTCTAAGAGCATTAGGACCAGGGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAAGATTTTGGC
+TGAGGCAATGAGTCAGGC---AC---AAC---A---TA--C---A-----
+-----------------AACATA---ATGATGCAGAGAGGTAATTTTAGG
+GGTCAAAAA---AGGATTAAGTGTTTCAACTGTGGCAAGGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGA
+AGGAGGGACACCAAATGAAAGACTGCACG------GAAAGACAGGCTAAT
+TTTTTAGGGAAAATCTGGCCTTC---CAGCAAGGGG---AGGCCAGGAAA
+TTTTCCACAGAAAAGA---------------CTGGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TATGGGATGGGGGAAGAGATA---------------GCCTCTCCTCCG--
+----AAGCAGGAGCAG------------AAAGA---CAGGGAACAGGGT-
+-----CCA------CCTGCAATTTCCCTCAAATCACTCTTTGGCAACGAC
+CTATTGTCACAGTAAAAATAGGGGGACAGCTAAGAGAAGCTTTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAGGAAAATG
+GAAACCAAGAATGATAGGGGGAATTGGAGGTTTCATCAAAGTAAAACAGT
+ATGATCAGATACTTATAGAAATTTGTGGAAAAAGGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAACATGTTGAC
+CCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAGACTGTAC
+CAGTAAAATTAAAGCCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTAACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GAGATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCAATAAAGAAAAAAGATAGTACTAAATGGAGA
+AAATTAGTAGATTTCAGAGAGCTTAATAAAAGAACACAAGACTTTTGGGA
+AGTTCAATTAGGAATACCGCATCC---AGCGGGCTTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTGGATGTGGGGGACGCATATTTCTCAGTTCCTTTA
+GATGAAAGCTTTAGAAAATATACTGCGTTCACCATACCTAGCACAAACAA
+TGCGACACCAGGAATCAGATATCAGTACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCAGCAATATTCCAGTGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGACATAATTATCTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAACAAAAATAG
+AAGAATTAAGAGCTCATCTATTGAGCTGGGGATTAACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCCCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACTGTCCAGCCTATAGTGCTGCCAGAAAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTAGGGAAACTAAATTGGGCA
+AGTCAAATTTATCCAGGGATTAAAGTAAAGCAATTGTGTAAACTTCTCAG
+GGGAGCCAAAGCACTAACAGATATAGTAACATTGACTGAGGAAGCAGAAT
+TAGAATTGGCAGAGAACA---GGGAGATCCTAAAAGACCCTGTGCATGGA
+GTATACTATGACCCATCAAAAGAATTAATAGCAGAAATACAAAAACAAGG
+GCAAGACCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAA
+AAACAGGAAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAGA
+CAATTAACAGAGGTGGTGCAAAAGGTGGTCCTGGAAAGCATAGTAATATG
+GGGAAAGA---CCCCTAAATTTAAACTG---CCCATACAGAAAGAGACAT
+GGGAAACA---TGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAA
+TGGGAGTTTGTTAATACCCCTCCTCTAGTAAAATTGTGGTACCAGTTAGA
+GAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCGGCCA
+ATAAAGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGA
+CAAAAGGTTGTTTCCCTAACTGAGA---CAACAAATCAAAAGACTGAACT
+ACATGCAATCTATCTAGCCTTGCAGGATTCAGGATCAGAAGTAAACATAG
+T---AACAGACTCACAGTATGCATTAGGAATCA---TTCAGGCACAACCA
+GACAGGAGTGAATCAGAAATAGTCAATCAAATAATAGAGAAGCTAATAGA
+AAAGGACAAAATCTACCTG---TCATGGGTACCGGCACACAAAGGGATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAGAAAGGTA
+CTGTTCTTAGATGGGATAGATAAAGCTCAAG---AGGAACATGAAAGATA
+---TCACAGCAATTGGAGAACAATGGCTAGTGATTTTAATCTGCCACCTG
+TAGTAGCAAAGGAAATAGTAGCCAGCTGTGATAAAT---GTCA---GCTA
+AAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAATAATTCTGGTAGCAGTTCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAA
+GAGACAGCATACTTTTTACTGAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAG
+CCTGTTGGTGGGCAGGTATCCAACAGGAATTTGGAATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTAAAGAAAATCAT
+AGGACAGGTAAGAGAACAAGCTGAACACCTTAAAACAGCGGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAA
+AGAACTACAAAAACAAATTACAAAAGTTCAAAATTTTCGGGTTTATTTCA
+GGGACAGCAGAGATCCACTTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGATAATAGTGAGATAAAAGTAGT
+ACCAAGGAGAAAAGCAAAGATCATCAGGGACTATGGAAAACAGATGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTCTAGTGAAACATCATATGTATGTCTCAAAGAAAGCTAAAAGGTGGT
+TTTATAGACATCACTATGAAAGCAGGCATCCAAAAGTAAGTTCAGAAGTA
+CACATCCCAGTAGGGG---AGG---CTATGTTAGTAATAACAACATATTG
+GGGTCTGCATACAGGAGAAAAAGACTGGCAGTTGGGTCATGGGGTTTCCA
+TAGTATGGAGGCTAAGAAGATATAGCACACAA------ATAGATCCTGAC
+CAGGCAGACCAACTAATTCATCTGCATTATTTTGACTGTTTTTCAGGCTC
+---TGCCATAAGGAAGGCCATATTAGGACAAATAGTTAGGCCTAGTTGTG
+AATATCCTACAGGACATA---ACAAGGTAGGATCTCTACAATATTTAGCA
+CTGAAAGCATTAGTAACACC---AACAAGGAGAAAGCCACCTTTGCCTAG
+TGTTAGGAAATTAACAGAGGATAGATGGAACAAGCCCCAGAAGACCAGGG
+GCCGCAGAGAGAGCCATACAATGAATGGATGTTAGAGTTGTTAGAAGATC
+TTAAGCATGAAGCTGTTAGACATTTTCCTAGGCCATGGCTTCATGGATTA
+GGACAACATATCTATAACACCTATGGGGATACTTGGGAGGGAGTTGAAG-
+--CTATAATAAGAACTCTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AACATAGCAG---AATAGGCATT---ACTCAGACG
+AGAAGAGGCA----------GGAATGGACCCGGTAGATCCTAACTTAGAG
+CCCTGGAACCATCCGGGAAGTCAGCCTACAACACCTTGTA---GCAAATG
+TTACTGTAAAAAGTGTTGCTATCATTGCCCG---------------CTTT
+GCTTCATAAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAGGA------A---CTCCTCAGAGCTATAA
+GGATCATCAAAATCTTGTACCAAAGCAGTAAGTAT--TAGTAACT-----
+-AGTATATGTA---ATGACTCCTG--------------------------
+---------AAACCTGTG----------------------CAGCAATAGT
+A---GGACTGATAGTAGC---GC--TAATCC-TAGCAATAGTTGTGTGGA
+CTATAGTAGGTATAGAATAT---AAGA------------AAATATTAAAA
+CAA---AGGAAAATAGACAGGGTAATTGAAAGAATAAGAGAAAGAGCAGA
+AGATAGTGGCAATGAGAGTGATGGGGATACAGAGGAATTGTCAGCACTT-
+--GTTGGGATG------------------------GGGAACTATG---AT
+CTTGGGA---ATATTAATAAT------------GTGT------AGTGCTG
+C---------AGA---A------------C---A------CTTG------
+TGGGTTACTGTCTACTATGGGGTACCTGTGTGGAAAGAAGCAG---AA--
+ACCACCT-TATTTTGTGCATCAGAAGCTAAAGCATATGATCCAGAACAGC
+ATAACGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATAAATTTGATAAATGTGACAGAAAAGTTTAACATGTGGAAAAATAC
+CATGGTAGAGCAAATGCATACAGATATAATCAGTCTATGGGACGAAAGTC
+TAAAGCCATGTGTAAAGTTAACCCCTCTCTGTGTTACTTTACGTTGTAAC
+A---ATA---CC--AAT---------------------------------
+-----------GT--CAAT---GT---CAA-TA-ACA---CG--------
+-------A-TTGACAAC---AG--C-------------------------
+--------------------------ACG---GA--C---AG--G-----
+-------------------GGG-GA-AATGA-AAAACTGCTCTTTCAAAG
+TGACCACAGAGCTAAGGGATAAGGAACAGAAGGTACATTCAC---TTTTT
+TATAGACTTGACTTAGTACAATTGGATAATACAACGCCA-----------
+-------GAAA---AT--AG--TAGTAAC---AAG---------------
+---------------------G---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATCACACAGGCTTGCCCAAAGGTATCTTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTATGCGATCCTAAAGTGTAATG
+ATAAGAAGTTCAATGGAACAGGGCCATGCACGAATGTCAGCACAGTACAA
+TGCACACATGGAATCAGGCCAGTAGTATCAACTCAACTGCTGTTAAATGG
+AAGTCTAGCAGAA---GAAGA---GGTAATAATTAGATCTGAAAACATCA
+CAGACAATGCCAAAACTATAATAGTACAACTTACTGAGCCTGTAAAAATT
+AATTGTACCAGACC------T------AACAACA---AT---ACAAGAAG
+GAGTATAACTATG---------GGACCAGGAAAAGCATTCTATACA---A
+AT---GAC------ATCATAGGAAATATAAGACAAGCATATTGTACTGTC
+AATAGATCAGAATGGAATAACACTTTACAAAAGGTAGCT---AC--ACAA
+TTAAGAGAA------CACTTT---G----AA--AAC------AAAACA--
+----ATAATATTTACT----CACTC---CTCAGGAGGGGATCTAGAAGTC
+ACAACAC------ATATGTTTAATTGTGGAG---GAGAATTCTTCTATTG
+TAATACATCAGGCCTGTTTAAC---AGCACC---TG---GC--ACTATAA
+CAGC--AATGG--TACTTGG------------------------CACAAT
+A---ACAGC---A-C-------A---AAT---AG-CAT---G---G----
+--AC--TC--AAA---T---------------GAAACTATAACT------
+---CTCACATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+GTAGGAAAAGCAATGTATGCTAATCCCATCCAAGGAATAATAAAGTGTGA
+ATCAAACATTACAGGACTACTCTTAACAAGAGATGG---TGGGAA---TA
+CT------AGC--------------------AGTA---CA--AATGAAAC
+CTTCAGACCTGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAATT
+ATACAAATATAAAGTAGTAAAGATTGAGCCACTAGGAGTAGCACCCACCA
+GGGCACAGAGAAGAGTGGTGGG------GAGAGAAAAAAGAGCAA---TT
+GGA-------ATAGGAGC---TG--T---CTTCCTTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAATGACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAGCAATTTG
+CTGAGGGCTATAGAGGCTCAACAACAACTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCACGAGTCCTGGCTGTGGAAAGATACCTACAGG
+ATCAACAGCTCCTAGGAATTTGGGGATGCTCTGGAAAACTCATCTGCCCC
+ACTAATGTGCCCTGGAACTCTACTTGGAG---T---AAT-----------
+-AAATCTTATAG---TGAGATAT---GGG---ATAACATGACCTGGCTGC
+AATGGGATGAAGAAGTTAGCAATTACACACAAATAATATATGACCTAATT
+GAAAAATCGCAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGACAAGTGGGCAAGTCTGTGGAGTTGGTTTAGCATATCAAACTGGCTGT
+GGTATATAAGAATATTTATAATGGTAGTAGGAGGTTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGTTTTCTGTAATAAATAGAGTTAGGCAGGGATACTC
+ACCTCTGTC---ATTTCAGACC---CATTCCC---CAGCCCCAGGGGGT-
+-----CTCGACAGGCCCGGAAGAATCGAAGAAGAAGGTGGAGAGCAAGGC
+AGAGACAGATCGATTCGATTAGTGAGCGGATTCTTAACACTTGCCTGGGA
+CGACCTACGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAAACTTCA
+TCTTGATTGTAGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGACTGGGATGGGAAGGCCTCAAGTATCT---GTGGAATCT
+C---CTGATATATTGGGGTCGGGAACTAAAAATTAGTGCTACTAATTTGA
+TAGATA---CTATAGCAATAGTAGTAGCTGGCTGGACAGATAGGGTGATA
+GAAATAGGACAGAGCATTGGTAGAGCTATCCTCCACATACCTAGAAGAAT
+CAGACAGGGCTTTGAAAGAGCTTTGCTATAA
+'A1.KE.00.KER2012'
+------GCGAGAGCGTCAGTATTAAGTGGGGG---AAAATTAGATGCATG
+GGAGAGAATTCGGTTAAGGCCAGGGGGAAAGAAACAATATAGACTGAAAC
+ATCTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTAGC
+CTTTTAGAAACAACAGAAGGATGTCAACAAATAATAACACAGTTACAACC
+AGCTATCGGAACAGGAACAGAAGAACTTAAATCATTATATAATACAGTAG
+CAACCCTCTATTGTGTACATCAGGGGATAAAGGTAAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAAATACA------------AAAGAA
+AAGCAAA------------CAACAGACA----------------------
+--------------------GCA---------------------GCT---
+------------------------GACACAGGA-----------------
+----------AGCAGC------------------------AGCCAAAATT
+ACCCTATAGTGCAAAATGCACAAGGGCAAATGATACACCAGTCCTTGTCA
+CCTAGGACTTTGAATGCATGGGTGAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACCCCAC
+AAGATTTAAATATGATGCTGAACATAGTGGGGGGACACCA---GGCAGCT
+ATGCAAATGTTAAAAGACACCATCAATGAGGAAGCTGCAGAATGGGACAG
+GCTACATCCAGTACATGCAGGGCCTCCTGCACCAGGCCAGCTGAGAGAAC
+CAAGGGGAAGTGATATAGCAGGAACTACTAGCACCCCTCAAGAACAGATA
+GGATGGATGACAGGCAACCCACCTATCCCAGTGGGAGACATCTATAAAAG
+ATGGATAATTCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTCTGGATATAAAACAAGGGCCAAAAGAACCCTTCAGAGATTATGTA
+GATAGGTTCTTTAAAGTTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAA
+AGGTTGGATGACAGAAACATTATTGATCCAAAATGCAAATCCAGATTGTA
+AGTCCATTTTAAGAGCATTAGGAGCAGGGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGGGTCTTGGC
+AGAGGCAATGAGCCAAGT---AC---AAC---A---AA--C---A-----
+-----------------AACATA---ATGATGCAGAGAGGCAATTTTAGG
+GGCCAAAAA---AGGATTAAGTGTTTCAACTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGCTGGAAATGTGGGA
+AGGAGGGACACCAGATGAAAGACTGCACT------GAGAGACAGGCCAAT
+TTTTTAGGGAAAATTTGGCCTTC---CAGCAAGGGG---AGGCCAGGGAA
+TTTTCCTCAGAGCAGG---------------CCAGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGTTC
+TTTGGAACGAGGGAAGAGATG---------------ACCCCCCCTCCG--
+----AAGCAGGAGCAG------------AAGGA---CAGGGAACAGAAC-
+-----TCA------CCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGAC
+CTATTGTCACAGTAAAAATAGGGGGACAGCTAAAAGAAGCTCTACTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTACCAGGAAAATG
+GAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATTAAGGTAAAACAGT
+ATGATCAGATAGTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTGGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAATATGTTGAC
+CCAGATTGGCTGTACTTTAAATTTCCCAATTAGTCCTATTAGTACTGTAC
+CGGTAAAACTAAAGCCAGGAATGG---ATGGCCCAAGGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAGATTTGTAAA-----
+-GAGATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCAT
+ACAATACCCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATGGAGA
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACTCAAGACTTTTGGGA
+AGTTCAATTAGGAATACCGCATCC---AGCGGGCTTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTAGATGTGGGGGACGCATATTTTTCAGTTCCCTTA
+GATGAAAGCTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAA
+TGAGACACCAGGAATCAGGTATCAGTACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCGGCAATATTTCAGAGTAG---CATGACAACAATTTTAGAG
+CCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATGGATGA
+CTTATATGTAGGATCTGATTTAGAAATAGGGCAACATAGAGCAAAAATAG
+AGGAGTTGAGAGCTCATCTATTGAGCTGGGGACTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCCCCATTTCTTTGGATGGGATATGAACTCCA
+TCCTGACAAGTGGACAGTCCAACCTATACAGCTGCCAGAGAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGAAAACTAAATTGGGCA
+AGCCAAATTTACCCAGGGATTAAAGTAAAACAATTGTGTAGACTCCTCAG
+GGGAGCCAAAGCACTGACAGATGTAGTAACATTGACTGAGGAAGCAGAAT
+TAGAACTGGCAGAAAACA---GGGAGATTCTAAAAGACCCTGTGCATGGA
+GTATATTATGACCCATCAAAAGACTTGATAGCAGAAATACAGAAACAAGG
+GCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATTTAA
+AAACAGGAAAATATGCAAGAAAGAAATCTGCTCATACTAATGATGTAAAA
+CAATTAGCAGAAGTGGTGCAAAAGGTGGTCATGGAAAGCATAGTAATATG
+GGGAAAGA---CTCCTAAATTTAAACTA---CCCATACAAAAGGAAACAT
+GGGAAACA---TGGTGGATGGATTATTGGCAGGCTACCTGGATTCCTGAA
+TGGGAATTTGTCAATACCCCTCCTCTGGTAAAATTATGGTACCAGTTAGA
+GAAAGACCCCATAGCAGGAGCAGAAACTTTCTATGTAGATGGGGCAGCCA
+ATAGGGAGACTAAACTAGGGAAGGCAGGGTATGTCACTGACAGAGGAAGA
+CAAAAGGTTGTTCCCCTAACTGAGA---CAACAAATCAAAAGACTGAATT
+ATATGCAATAGATTTAGCCTTGCAGGATTCAGGATCAGAAGTAAACATAG
+T---AACAGATTCACAGTATGCATTAGGAATCA---TCCAGGCACAACCA
+GACAGGAGTGAATCAGAGATAGTCAATCAAATAATAGAGAAGCTAATAGG
+AAAGGACAAGGTCTACCTG---TCATGGGTACCAGCACACAAAGGAATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGGAAGGTG
+CTATTTTTAGATGGAATAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCACAGTAATTGGAGAGCAATGGCTAGTGATTTTAATCTGCCACCTA
+TAGTAGCAAAGGAAATAGTAGCCAGCTGTGATAAAT---GCCA---GCTA
+AAAGGGGAAGCCATACATGGACAAGTAGACTGCAGTCCAGGGATGTGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTCCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTAATCCCAGCAGAAACAGGACAG
+GAGACAGCCTACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGATAATGGCAGCAATTTTACCAGTGCTGCATTTAAAGCAG
+CCTGTTGGTGGGCAGGTGTCCAACAGGAATTTGGGATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAGAGAATTAAAGAAAATCAT
+AGGACAAGTAAGGGAGCAGGCTGAACACCTTAAGACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAA
+AGAATTACAAAAACAAATTACAAAAATTCAAAAATTTCGGGTTTATTACA
+GGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTGGTGATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGGTGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTTTAGTAAAATACCATATGTATGTCTCAAAGGAAGCTAAAGGTTGGT
+TTTATAGACATCACTATGAAAGTAGTCATCCAAAAGTAAGTTCAGAAGTA
+CACATCCCACTAGGGG---ATG---CTAAAATAGTAGTAAGAACATATTG
+GGGTCTGCATACAGGAGAAAAAGATTGGCACTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGCTAAAAAGATATAGCACACAA------ATAGATCCTGAC
+GTGGCAGACCAACTAATTCATCAACATTATTTTGCCTGTTTTTCAGACTC
+---TGCCATAAGGAGAGCCATATTAGGGCATGTAGTTAGCCCTAGGTGTG
+AATATCAAACAGGACATA---ACAAGGTAGGATCTCTACAATATTTAGCA
+CTGAAAGCATTAGTAAAACC---AACAAGGAGAAAGCCACCTTTGCCTAG
+TGTTAGGAAATTAACAGAGGATAGATGGAACAAGCCCCGGAAGACCAGGG
+GCCACAGAGGGAGCCATACAATGAATGGATGTTAGAGCTGTTAGAAGATC
+TTAAGCATGAAGCTGTAAGGCATTTTCCTAGGCCATGGCTTCATGGATTA
+GGACAACATATCTATAACACCTATGGGGACACTTGGGAAGGAGTTGAGG-
+--CTATAATAAGAATCTTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AACACAGCAG---AATAGGCATT---ATTCGAGGG
+AGAAGAGCCA----------GGGATGGATCCGGTAGATCATAAACTAGAG
+CCCTGGAACCATCCGGGAAGTCAGCCTACAACTCCTTGTA---GCAAATG
+CTACTGTAAAAAATGTTGCTATCATTGCATA---------------GTTT
+GCTTTCAGAC---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAGGA------G---CTTCTCAAAGCAGTAA
+GGATCATCAAAATCCTGTACCAAAGCAGTAAGTAG--TAGTAATT-----
+-AACATATGTA---ATG---------------------------------
+---------TCTCCTTTGG---------------AAGTTTGGGCAATAGT
+A---GGACTCGTAGTAGC---GC--TAATAC-TAGCAATAGTTGTGTGGA
+CTATAGTAGGTATAGAATAT---AGAA------------AAGTGTTAAAG
+CAA---AGAAAAATAGACAGGTTAATTGAGAGAATAACAGAAAGAGCAGA
+AGACAGTGGCAATGAGAGTGATGGGGATACAGAGGAATTGTCAGCACTT-
+--GTTGAGATG------------------------GGGAATTATA---AT
+CTTGGGT---ATGATAATGAT------------CTGT------AGTATGG
+C---------ACA---A------------A---A------CTTG------
+TGGGTTACTGTCTATTATGGGGTACCTGTGTGGAGAGATGCAG---AC--
+ACCACCC-TATTTTGTGCATCAGATGCTAAAGCATATAAGACAGAAGTGC
+ATAATGTCTGGGCTACACATGCCTGTGTGCCCACAGACCCCAACCCACAA
+GAAATATATTTGACAAATGTGACAGAAAATTTTAACATGTGGAAAAATAA
+AATGGTAGAGCAGATGCATGAAGACATAATTAGTCTATGGGACCAAAGCC
+TAAAGCCATGTGTACAGTTAACCCCTCTCTGCGTTACGTTAAACTGTAGC
+G---ATG---TC--ACGAATAACACCCTC---------------------
+-----------AG--GAAT---GC---CAC-TG-TGAATGCC--------
+-------A-ATGCCAAT---GC--C-------------------------
+--------------------------ACT---GT--C---AC--TGG--G
+GAC---ATGGA------A-GGA-GA-AATGA-AAAACTGCTCTTACAATA
+TGACCACAGCAGTAAGGGATAAGCAAAAGAAAGTATACTCAC---TTTTT
+TATAGGCTAGATGTAGTGCAAATTAGCAAT--------------------
+-------AATA---GT--AG--CAGTAGT---AGT---------------
+---------------------G---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATTACACAAGCTTGTCCAAAAGTAACCTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATCCTAAAGTGTAATG
+AAGAAGGGTTCAATGGAACAGGACCATGCAAAAATGTCAGCACAGTACAA
+TGCACACATGGAATCAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGG
+CAGTCTAGCAGAA---AAAGG---GGTAATAATCAGATCTGAAAACATCT
+CAAACAATGCTAAAACCATAATAGTACAACTTGCCGAGCCTGTAACAATT
+AATTGTACCAGACC------T------AACAACA---AT---ACAAGAAA
+AGGTATACATATA---------GGACCAGGACGAGCATTCTATGCA---A
+CAGGTGAC------ATAATAGGGGATATAAGAAAAGCATATTGTAATGTC
+AGTAGAACACAATGGAATAAAACTTTGGCACAGGTAGCT---GC--ACAA
+TTAACAAAA------TATTGG---A----AC-----------AAAACA--
+----ATAAACTTTACT----AGCTC---CTCAGGAGGGGATGTAGAAATT
+ACAACAC------ATAGTTTTAATTGTGGGG---GAGAATTTTTCTATTG
+TAATACAACAAACCTGTTTAAT---AGCACC---TG---G----------
+------AGAGG--AAAT---------------------------AAAACT
+G---TCAGC-----------------AAT---AG-CAC---A---G----
+--AG--TT--AGA---TGAAAAT---------GGCACTATAACT------
+---CTCCCATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+ACAGGACAAGCAATGTATGCCCCTCCCATCCAAGGAGTAATAAAGTGTGT
+ATCAAACATTACAGGACTACTATTAACAAGAGATGG---TGGAAG---TG
+AT------AAC---------------AAT--ATAA---GT--AGTGAAAC
+CTTCAGACCAGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAATT
+ATATAAGTATAAAGTAGTGAAACTTGAACCACTAGGAGTAGCACCCAACA
+AGGCAAGGAGAAGAGTGGTGGA------GAGAGAAAAAAGAGCAG---TT
+GGA-------CTGGGAGC---TG--T---ATTCATTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAATAACGCTGACGG
+CACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAGCAATTTG
+CTGAAGGCTATAGAGGCTCAACAGCATCTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTGGCTCTGGAAAGATACCTAAAGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAGTGTGCCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTTTTGA---GCAAATAT---GGA---ATAACATGACCTGGTTGG
+AATGGGATAAAGAAGTTAGCAATTACACACAAATAATATATGAGCTACTT
+GAAGTATCGCAGAACCAGCAAGAAAAGAATGAACAAGACTTATTGTCATT
+GGACAAATGGGCAAGTCTGTGGAATTGGTTTGACATATCAAAATGGCTGT
+GGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGCTTAAGA
+ATAGTTTTTGCTGTGCTTTCTATAATAAATAGAGTTAGGCAGGGATACTC
+ACCTTTGTC---ATTTCAGACC---CTTACCC---CAAACCCAGGGGGA-
+-----CTCGACAGGCCCGGAAGAATCGAAGAAGAAGGTGGAGAGCAAGGC
+AGAGACAGATCGATTCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGA
+CGATCTGAGGAACCTGTGCCTTTTCAGCTACCACCGCTTGAGAGACTTCA
+TCTTGATTGCCGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGACTGGGGTGGGAAGGGATCAAGTATCT---GGGGAATCT
+C---CTGTTGTATTGGGGTCGGGAACTGAAAAATAGTGCTATTAATTTGT
+TAGATA---CTATAGCAATAGCAGTAGCTGGCTGGACAGATAGGGTTATA
+GAAATAGGACAAAGAATTGGTAGAGCTATTCTCAACATACCTAGAAGAAT
+CAGACAGGGCGCTGAAAGAATCTTAGTATAA
+'A1.KE.00.KER2018'
+------GCGAGAGCGTCAGTATTAAGTAGGGG---AAAATTAGATGCATA
+AGAAAAAATTCGGTTAAGGCCAGGAAGAAAGAAAAAATATAAATTAAAAC
+ATCTAGTATAGGCAAGCAAAGAGCTGGACAGATTTGCACTTAACCCTAGC
+CTTTTAGAAACAACAGAAGGATGTCAACAAATAATAGAACAGTTACAACC
+AAGTCTCAAGACAGAAACAGATGAACTTAGATCATTATTTAATACAGTAG
+CAACTCTCTATTGTGTACATCAACAGATAGATGTAAAAGACACCAAAGAA
+GCTCTAG---AT--AAA-ATAGAAGAAATACA------------AAATAA
+AAGCAAG------------CAAAAGGCA---------CA---ACAG----
+--------------GCA---GCA---------------------GCT---
+------------------------GACACAAGA-----------------
+-------AGCAGCAGC------------AAG------GTCAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAAAGGCAAATGATACACCAGTCCTTGTCA
+CCTAAGACTTTGAATGCATAAGTAAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTATCAGAAGGAGCCACCCCAC
+AAGATTTAAACATGATGCTGAACATAGTAAAGAGGCACCA---GGCAGCT
+ATGCAAATGTTAAAAGATACCATCAATGAGGAAGCTGCAAAGTAAGACAG
+GCTACATCCAGCACATGCAAGACCTATTGCACCAGGCCAGATAAAAGAAC
+CAAAAAGAAGTGATATAGCAAGAACTACTAGTACCACCCAAGAACAAATA
+GCATAGATAACAGGCAACCCACCTATCCCAGTAGGAGACATCTATAAAAG
+ATAGATAATCCTAAGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTAGATGTAAAACAAAGGCCAAAAGAACCCTTCAGAGACTATGTA
+GATAGGTTCTTTAAAATTCTCAAAGCTGAGCAAGCTTCACAAGATGTAAA
+AGGTTAGATGACAGAAACATTACTAGTTCAAAATGCAAATCCAGATTGTA
+AGTCTATTTTAAGAGCATTAGGAGCAGGGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAAGGAGTAGGAGGACCCGGCCATAAAGCAAAGGTTTTGGC
+TGAGGCAATGAGTCAAGT---AC---AAA---A---TA--C---A-----
+-----------------AACATA---ATGATGCAGAGAAGTAATTTTAAA
+GGCCAGAAA---AAGATTAAGTGTTTCAACTGTGGCAAAGAAAGACACCT
+AGCCAAAAATTGCAGGGCCCCTAAGAAAAAAGGCTGTTAGAAATGTAAGA
+AAAAAAGACACCAAATGAAAGACTGCACT------GAGAGACAGGCTAAT
+TTTTTAAAGAGAATTTAGCCTTC---CAGCAAGAAG---AGGCCAGGAAA
+TTTTCCTCAGAGCAGA---------------CCAGAACCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGCTC
+CTTAAGATGAAAGACAAGATA---------------GCCTTCCCTCCG--
+----AAGCAGGAGCAG------------AACGA---CAAAAGTCAGAAT-
+-----TCA------CCTTCAGTTTCCCTCAAATCACTCTTTAGCAACGAC
+CTCTTGTCACAGTAAGAATAGAAGGACAGCTAAAAGAAGCTCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGACATAAATTTGCCAAGAAAATA
+GAAACCAAAAATGATAGGGGGAATTAGAAGTTTTATCAAAGTAAAACAGT
+ATGATCAGATACTTATAGAAATTTGTAGAAAAAAGGCTATAAGTACAGTA
+TTGATAAGACCTACACCTGTCAACA---TAATTAGAAGAAATATGTTGAC
+TCAGATTAGTTGTACGTTAAATTTCCCAATTAGCCCTATTAAAACTGTAC
+CAGTACAATTAAAGCCAAAAATAG---ACAGCCCAAAAGTTAAACAATAG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GAAATAGAAAAAGAAAGAAAAATTTCAAAAATTAAGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCTATAAAGAAAAAAGACAGCACTAAATAGAGA
+AAATTAGTAGATTTCAGAAAGCTCAATAAAAGAACTCAAAACTTTTAAGA
+AGTTCAATTAAGAATACCGCATCC---AGCAGGCTTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTAGATGTAGAAGACGCATATTTTTCAGTTCCCTTA
+GATGAAAGCTTTAGAAAGTATACAGCATTCACCATACCTAGTACAAACAA
+TAAGACACCAAGAATCAGGTATCAGTACAATGTGCTTCCACAAAAGTAAA
+AAAGATCACCAGCAATATTCCAGAGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATAGATGA
+CTTGTATGTAAGATCTGATTTAGAGATAAAGCAGCATAGAACGAAAGTAG
+AAGAGTTAAAAGATCATTTATTAAGCTAAAGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCCTTTCTTTAGATAAGATATGAACTCCA
+TCCTGATAAGTAGACTGTCCAACCTATAGAGCTGCCAGAAAAAGAAAGCT
+AGACTGTCAATGATAT---ACAGAAATTAGTAAAGAAACTAAATTAGGCA
+AGTCAAATTTATGCAAGAATTAAAGTAAAACAATTGTGTAGGCTCCTCAG
+AGGAGCCAAAGCCCTAACAGATGTAGTAACATTAACTGAAGAAGCAGAAT
+TAGAATTAGCAGAGAACA---AAGAGATCCTAAAGGACCCTGTGCATAAA
+GTATATTATGACCCATCAAAAGACTTAATAGCAGAGATACAGAAGCAAAG
+ACAAGACCAATAGACATATCAGATTTATCAAGAACCATTTAAAAATCTGA
+AAACAAGGAAATATGCAAGAAAAAAGTCTGCTCACACTAATGATGTAAAA
+CAATTAGCAGAAGTGGTGCAAAAAGTAGTCATAGAAAGCATAGTAATATG
+AAGAAAGA---CTCCTAAATTTAAACTA---CCCATACAAAAAGAAACAT
+AAGAAACA---TAGTAGATAGACTATTAGCAGGCTACCTAGATTCCTGAA
+TAAGAGTTTGTCAATACCCCTCCTCTAGTAAAATTATAGTATCAGTTAAA
+GAAAGATCCTATAGTAAGAGCAGAGACTTTCTATGTAAATGAGGCATCCA
+ATAAGGAGACTAAGCTAAGAAAAGCAAAGTATGTCACTGACAGAAGAAGA
+CAAAAAGTTGTTTCCCTAACTAAGA---CAACAAATCAAAAGACTAAATT
+ACATGCAATCCATCTAGCCTTGCAAGATTCAAGATCAGAAGTAAACATAG
+T---AACAGACTCACAGTATGCATTAAGAATCA---TTCAAGCTAAGCCA
+GACAGGAGTGAAGCAGAGTTAGTCAATCAAATAATAGAAGAGCTAATAAG
+AAAAGACAAAGTCTACCTA---TCATAAGTACCAGCACACAAAAGAATTA
+GAAGAAATGAACAAGTAGATAAATTAGTTAGTTCTAGAATCAAGAAAGTG
+CTATTTTTAGATAAGATAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCACAGCAATTAAAGAACAATAGCTAATGATTTTAATCTGCCACCTA
+TAGTAGCAAAAGAAATAGTAGCCAGCTGTAATAAAT---GTCA---GCTA
+AAAAAAGAAGCCATGCACAGACAAGTAGACTGTAGTCCAGAAATATGGCA
+ATTAGATTGCACACATCTAGAAAGAAAAATAATTCTAGTAGCAGTCCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAAGACAA
+GAGGCAGCATACTTTCTGCTAAAGTTAGCAAAAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAG
+CCTGTTAGTAGGCAAATGTCAGCCAAAAATTTAAAATCCCCTACAATCCC
+CAAAGTCAAAGAGTAGTAGAATCTATGAATAAAGAATTAAAAAAAATCAT
+AAGACAGGTAAAAGAGCAAGCTGAACACCTTAGAACAGCAGTACAAATAG
+CAGTTTTCATTCACAATTTTAAAAGAAAA---GGGGGGATTAGAAGGTAC
+AGTGCAGAGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAG
+AGAATTACAAAAACAAATTACAAATATTCAAAAATTTCAAGTTTATTACA
+AAGACAGCAAAGATCCAATTTAGAAAAGACCAGCAAAACTACTCTAGAAA
+AGTGAAAAGGC---AGTAGTAATACAGGATAATAGTGATATAAAAGTAGT
+ACCAAGAAGAAAAGCAAAGATCATTAAGGATTATAAAAAACAGATGGCAA
+GTGATGATTGTGTGGCAGGTAGACAGGATGAAGATTAGA---ACATAGAA
+CAGTTTAGTAAAACATCATATGTATGTCTCAAAGAAAGCTAAAAGTTAAG
+TTTATAGACATCACTTTGAAAGCAGGCATCCAAAAAGAAGTTCAGAAGTA
+CACATCCCACTAGAAG---ATG---CTAGAATAGTAATAAGAACATATTA
+AAGTCTGCATACAGGAGAAAAAGACTGGCAGTTAGGTCATAAAGTCTCCA
+TAGAATGGAGGCTAAAAGAATATAGCACACAA------ATAGATCCTGAC
+CTGGCAGACCAACTAATTCATCTGCATTATTTTAACTGTTTTTCAGACTC
+---TGCCATAAAGAAAGCCATATTAAGGCAAGTAGTTAGCCCTAGTTGTG
+AATATCAAACAGGACACA---ACAAGGTAAGATCTCTACAATATTTAGCA
+CTAAAAGCAGTAGTAACACC---AACAAAGGCAAAGCCACCTTTGCCTAG
+TGTTAAGAAACTAACAGAAGATAGATGGAGCAAGCCCCAAAAGACCAGGG
+GCCCCAGAAAGAGCCATACAATGAATAGATGTTAGATCTGTTAGAAGAGC
+TTAAGCATAAAACTGTTAGACATTTTCCTAGGCCATGGCTCCATAGATTA
+AGACAGCATATCTATGTCACTTATGAAGATACTTAAGAAAAAGTTGAAG-
+--CTATAGTAAGAACTTTGCAG---CAACTACTGTTTGTTCAT---TTCA
+GAATCAGGTGCC---AACACAGCAG---AATAGGCATT---ATTCAAAAA
+AGAAAAGTCA----------GAGATAGATCCAGTAGATCCTAACCTAGAG
+CCCTAGAACCACCCAGGAAGTCAGCCTACAACTCCTTGTA---GCAAGTG
+TTACTGTAAAGCTTGTTGCTATCATTGCATA---------------GTTT
+GCTTTCAGAA---AAAAAGCTTAGGCATCTCCTATGGCAAGAAGAAGCAG
+AAAC------A---GCGACGAAGA------A---CTCCTCAAGGCAATAA
+GGATCATCAAGATCCTATACCAAAGCAGTAAGTAC--TAGTAATT-----
+-AATATATGTA---ATG---------------------------------
+---------CTTCCTTTAG---------------AAATCTGTGCAATAGT
+A---AGACTGATAGTAGC---GC--TAATTC-TAGCAATAGTTGTGTAGA
+CTATAGTAGGTATAGAATAT---AAGA------------AATTGCTAAAG
+CAA---AGAAAAATAGACAGGTTAATTAAAAGAATAAAAGAAAGAGCAGA
+GGACAGTGGCAATAAGAGTGATAAAGATACAGAAGAATTGTCAGCACTT-
+--ATTAAGATG------------------------AAGAACTATG---AT
+CTTGAAG---ATGATAATAAT------------CTGT------AGTACTG
+C---------AGA---A------------G---A------CTTG------
+TAAGTTACTGTCTACTATAAAGTACCTGTGTAGAAAAACGCAA---AG--
+ACCACCC-TATTTTGTGCATCAGATGCTAAGGCATATAAAACAGAAGTGC
+ATAATGTCTAAGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATACATTTAGAAAATGTGACAGAAGAGTTTAATGTGTAGAAAAATAA
+CATAGTAAAGCAGATGCATACAGACATAATCAGTTTATAAGACCAAAGCC
+TAAAGCCATGTGTAAAGTTAACCCCTCTCTGCGTTACTTTAAACTATACC
+A---ATG---TC--------------------------------------
+----------------------------GA-AG-TTA---CC--------
+-------A-ACAGCAGC---A---T-------------------------
+--------------------------GAT---AG--A---AA--G-----
+---------------------A-GA-AATAA-AAAACTGCTCTTACAATA
+TGACCACAGAACTAAAAGATAAGAAAAAGAAAATATTTTCGC---TTTTT
+TATAAACTTGATGTAGTACCAATTAATAAA--------------------
+-----------------------AGTAAT---AGT---------------
+---------------------A---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATTACACAGGCTTGTCCAAAAGTAACCTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTAGTTTTGCAATTCTAAAGTGTAAAG
+ATGAAGAGTTCAATAGAACAAAGTTATGCAAGAATGTCAGCACAGTACAA
+TGCACACATAGAATCAAGCCAGTAGTGTCAACTCAACTACTGTTAAATGG
+CAGTCTAGCAGAA---AAAGA---GATAAGAATTAAATCTAAAAACATCT
+TAGACAATGCTAAAACCATAATTGTACAACTTACCAAGCCTGTAATAATC
+AATTGTACCAGACC------T------AACAACA---AT---ACAAGAAA
+AAATGTACATATA---------AGACCAAGACAAGCATTCTATGCA---A
+CAGATGAT------ATAATAAAAGATATAAGACAAGCATATTGTGAAGTC
+AATAGAGCACAATAGAATGACACTTTAAGACAAGTAGCG---AT--ACAA
+TTAAGAAAA------CACTAG---A----AC-----------ACAACA--
+----ATAATCTTTAAT----AAACC---CTCAAGAGAAGATTTAGAAATT
+ACAACAC------ATAGTTTTAATTGTAAAG---AAGAATTTTTTTATTG
+TAATACATCAGGCCTATTTAAT---AGTACT---TA---G----------
+------AAAAT--TGAA---------------------------GACACT
+G---CCAGC---A-T-------A---AAT---GG-CAC---A---G----
+--AG--TT--AAA---T---------------GACACTATAACT------
+---CTCCCATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+ATAAGACAAGCAATGTATGCCCCTCCCATCCAAAGAGTAATAAAGTGTCA
+ATCAAACATTACAAGAATACTATTAACAAGAGATGG---TAAGAA---TA
+CT------AAGAATAAT---------AGC--AGTA---CA--AATGAAAC
+CTTTAGACCTAGAA-GAGAAG-ATATGAAAGACAATTAAAGAAGTAAATT
+ATATAAGTATAAAGTAATAAAAATTAAACCACTAAGAGTAGCACCCACCA
+AGGCAAAAAGAAAAGTAGTAGA------AAAAGAAAAAAGAGCAG---TT
+AGA-------CTAAGAGC---TG--T---ATTCATTAAGT---TCTTAAG
+----AG-CAGCCAGAAG-CACTATAGGCGCGGCGTCAATAACGCTGACAG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAACAATTTG
+CTGAAGGCTATAGAAGCTCAACAGCATCTGTTGAAACTCACAGTCTAAGG
+CATTAAACAGCTCCAGGCAAAAGTCCTGGCTGTAGAAAGATACCTAAAAG
+ATCAACAGCTACTAAGAATTTAAGGCTGCTCTAAAAAACTCATCTGCACC
+ACTAATGTGCCTTAGAACTCTAGTTGAAG---T---AAT-----------
+-AAATCTTACAA---TGAGATAT---AAG---AAAACATAACCTAGCTGC
+AATAAGATAAAGAAATTAACAATTACACAAAACTAATATATAGCCTAATT
+AAAGAATCGCAGAACCAGCAAAAAAAGAATGAACAAGACTTATTGGCATT
+AGACAAGTAAGCAAGTCTGTAGAATTAGTTTGACATATCCAACTAGCTGT
+AGTATATAAAAATATTTATAATGATAGTAAGAGGCTTAATAAGATTAAAA
+ATAGTTGTTGCTGTGCTTTCTGTAATAAATAGAGTTAGGCAAAGATACTC
+ACCACTATC---ATTTCAGACC---CATACCC---CAAACCCAAAAGGA-
+-----CTCGACAGGCCCGAAAGAATCGAAGAAGAAAGTGAAAAGCAAGAC
+AGAACCAGATCGACTCGATTAGTCAGCAGATTCTTAGCACTTGCTTAAGA
+CGACCTGCAGAGCCTGAGCCTTTTCCTCTACCACCGATTAAAAGACTTCA
+TCTTGATTGCCGCGAAGACTGTAGAACTTCTAAGA---CACAGCAGTCTC
+AAAAAGTTAAGACTAAAGTAAAAAAGACTCAAGTATCT---GTAAAATCT
+C---CTGTTATATTAAGGTCAAGAACTAAAAATTAGTGCTATTAACTTGC
+TCAATA---CCATAGCAATAGCAGTAGCTAGCTAGACAGATAAAGTTATA
+GAAATAAGACAAAGAATTAGTAAAGCTATTCTCCACATACCTAGAAGAAT
+CAGGCAAAGCTTCGAAAAGGCTTTATTATAA
+'A1.KE.00.KNH1144'
+------GCGAGAGCGTCAGTATTAAGTGGGGG---AAAATTAGATAGATG
+GGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGAATGAAAC
+ATCTAGTATGGGCAAGCAGGGAGCTGGAAAGATTCGCACTTAACCCTAGC
+CTTTTAGAAACAGCAGAAGGATGTCAGCAAATAATAGAACAGATACAACC
+AGCTCTCAAGACAGGAACAGAAGAACTTAGATCATTATTTAATACAATAG
+CAACCCTCTATTGCGTACATCAAAAGATAGATGTGAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAACTGCA------------AAATAA
+GAGCAAG------------CAAAAGACA---------CA---ACAA----
+--------------GCA---GCA---------------------GCT---
+------------------------GATACAGGA-----------------
+-------AGTGGCAGC------------AAGGTCAGCGTCAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAAGGGCAAATGATACACCAGACTTTGTCA
+CCTAGGACCTTGAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTTTCAGCATTATCAGAAGGAGCCACTCCAC
+AAGATTTAAATATGATGCTGAACATAGTGGGGGGACACCA---GGCAGCC
+ATGCAAATGTTAAAGGATACCATCAATGAGGAAGCTGCAGAATGGGACAG
+GACACATCCAGTACATGCAGGGCCTATTCCACCAGGCCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGCACCCCTCAAGAACAAATA
+GGATGGATGACAAGCAACCCACCTATCCCAGTGGGAGACATCTATAAAAG
+ATGGATAATCTTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTGGATATAAAACAAGGGCCAAAAGAACCCTTCAGAGATTATGTA
+GATAGGTTCTTTAAAACTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAA
+AGGTTGGATGACAGAAACATTACTGGTCCAAAATGCAAATCCAGATTGTA
+AGTCCATTTTAAGAGCATTAGGAACAGGAGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGGGTTTTGGC
+TGAGGCAATGAGTCAAGT---AC---AAC---A---TA--C---A-----
+-----------------AACATA---ATGATGCAAAGAGGCAACTTTAGG
+GGCCAGAAA---AGAATTAAGTGTTTCAACTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGA
+AGGATGGACATCAAATGAAAGACTGCACT------GAAAGACAGGCTAAT
+TTTTTAGGGAAAATTTGGCCTTC---CAGCAAGGGG---AGGCCAGGAAA
+TTTTCCTCAGAGCAGA---------------CCGGAACCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGCTC
+TTTGGGATGGGGGAAGAGATA---------------GCCTCCCCTCTA--
+----AAGCAGGAGCAG------------AAAGG---CAAGGAACAGACT-
+-----CCA------CCCTCAATCTCCCTCAAATCACTCTTTGGCAACGAC
+CCCTTGTCACAGTAAAAATAGCGGGACAGCTAAGAGAAGCTCTATTAGAT
+ACAGGGGCAGATGATACAGTATTAGAAGACATAGATTTGCCAGGTAAATG
+GAAGCCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAACAGT
+ATGAGCAGGTACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAACATGTTGAC
+CCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAGACTGTAC
+CAGTAAAATTAAAGCCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGCACA-----
+-GAGATGGAAAAGGAAGGAAAAATTTCACAAATTGGACCTGAAAATCCAT
+ACAATACTCCAATATTTGCAATAAAGAAAAAAGATAGCACTAAATGGAGG
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGA
+AGTTCAATTAGGGATACCGCATCC---AGCGGGCCTAAAAAAGAAAAAAT
+CAGTAA---CAGTGCTAGATGTGGGGGACGCATATTTCTCAGTCCCTTTA
+CATGAAGACTTTAGAAAGTATACTGCATTCACCATACCTAGTACAAACAA
+TGAGACACCAGGAAAAAGGTATCAGTACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCAGCAATATTCCAGAGTAG---TATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAGCAAAAATAG
+AAGAATTAAGAGCTCATCTATTGAGCTGGGGATTTACTACACCAGAC---
+AAGAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACAGTCCAGCCTATAGAGCTGCCAAACAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGGAAACTAAATTGGGCT
+AGTCAAATTTATGCAGGAATTAAAGTAAAGCACTTGTGTAGACTTCTCAG
+GGGAGCCAAAGCACTAACAGATGTAGTAACATTGACTGAGGAAGCAGAAT
+TAGAGTTGGCAGAAAACA---AGGAGATTCTAAAAGAACCTGTGCATGGA
+GTATATTATGACCCATCAAAAGAACTAATAGCAGAAGTACAGAAACAAGG
+GCAAGACCAATGGACATACCAAATCTATCAAGAGCCATTTAAAAATCTAA
+AAACAGGGAAATATGCAAGACAAAGGTCTGCTCACACTAATGATGTAAAA
+CAATTAGCAGAAGTGGTACAAAAAATAGCCATGGAAGGCATAGTAATATG
+GGGAAAGA---CTCCTAAATTTAGACTA---CCCATACAAAAAGAGACAT
+GGGAAACA---TGGTGGAGGGACTATTGGCAGGCTACCTGGATTCCCGAA
+TGGGAATTTGTCAATACCCCTCCTTTAGTAAAATTGTGGTACCAATTAGA
+GAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGAGCAGCCA
+ATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGA
+CAAAAGGTTGTTTCCCTGACTGAGA---CAACAAATCAAAAAACTGAACT
+ACATGCAATCTATCTAGCCTTGCAAGATTCAGGATCAGAAGTAAACATAG
+T---AACAGACTCACAGTATGCATTAGGAATCA---TTCAGGCACAACCA
+GACAAGAGTGAATCAGAGTTAGTCAATCAAATAATAGAGAAGCTAATAGG
+AAAGGACAAAGTCTACCTG---TCATGGGTACCAGCACACAAGGGAATCG
+GAGGAAATGAAAAAGTAGATAAATTAGTCAGTAACGGAATCAGGAAGGTG
+CTGTTTTTAGATGGGATAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAATCTACCACCTA
+TAGTAGCAAAGGAAATAGTAGCCTGCTGTGATAAAT---GTCA---GCTA
+AAAGGAGAAGCCATACATGGACAAGTAGACTGCAGTCCAGGGATATGGCA
+ATTAGATTGTACACATCTAGAAGGAAAAATAATTTTGGTAGCAGTCCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAA
+GAGACAGCATACTTTATACTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AATACACACAGACAATGGCAGCAATTTCACCAGCGCTGCAGTTAAAGCAG
+CCTGTTGGTGGGCAGATATCCAGCAGGAATTTGGAATTCCCTACAACCCC
+CAAAGTCAAGGAGTAGTAGAATCTATGAATAAGGAATTAAAGAAAATCAT
+AGGACAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGATATAATAGCAACAGACATACAAACTAA
+AGAATTACAAAAACACATTACAAAAATTCAAAATTTTCGGGTTTATTACA
+GGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAACTACTTTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGCAAAAATCATCAGGGATTATGGAAAACAAATGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTTTAGTAAAACATCATATGTATGTATCAAAGAAAGCAAAAAATTGGG
+TCTATAGGCATCACTTTGAAAGCAGAAATCCAAGAGTAAGTTCAGAAGTA
+CACATCCCACTAGGGG---ATG---CTAAACTAGTAGTAAGAACATATTG
+GGGTCTGCATCCAGGAGAAAATGACTGGCATTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGCTAAAAAGTTACAGCACACAA------GTAGATCCTGAC
+CTGGCAGACCAACTAATTCATCTGTATTATTTTGACTGTTTTTCAGGCTC
+---TGCCATAAGGAAAGCCATATTAGGAGAATTAGTTAGCCCTAGATGTG
+AATATCAAACAGGACACA---GCAAGGTAGGATCTCTACAATATTTAGCA
+CTGAAAGCATTAGTAACACC---ACAAAAGACAAAGCCACCTTTGCCTAG
+TGTTAGGATATTAGCAGAGGATAGATGGAACAAGCACCAGAAGACCAGGG
+GCCCCAGAGAGAGCCATACAATGAATGGATGTTAGAGCTGTTAGAAGATC
+TTAAGCATGAAGCTGTTAGACACTTTCCTAGGCCATGGCTTCATGGATTA
+GGACAACACATCTATGACACCTATGGGGATACTTGGGAAGGAGTTGAAG-
+--CTATAATAAGAACTCTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGTC---AACATAGCAG---AATAGGCATT---ATTCGGGGA
+AGAAGAGGCA----------GGAATGGATCCAGTAGATCCTAACCTAGAC
+CCCTGGAACCACCCGGGAAGTCAGCCTACAACTCCTTGTA---ACAAGTG
+TTACTGTAAAAAGTGTTGCTATCATTGCCAG---------------TATT
+GCTTTTTGAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAGGA------A---CTCCTCACAGCAGTAA
+GGATCATCAAAATCCTCTACCAAAGCAGTAAGTAT--TAGTAATT-----
+-AGTATATGTA---ATGTTG------------------------------
+---------CCTCCTTTAC---------------ATATCTGTGCAATAGT
+A---GGACTGATAGTAGC---GG--CAATCC-TAGCAATAATTGTGTGGA
+CTATAGTAGGTATAGAATAT---AGGA------------AACTGTTAAAA
+CAA---AGGAAAATAGACAGGTTAATTGAGAGAATAAGAGAAAGAGCAGA
+GGACAGTGGCAATGAGAGTGATGGGGACACAGATGAATTGTCAGCACTT-
+--ATTGAGATG------------------------GGGAACTATG---AT
+CTTGGGA---TTGATAATAAT------------CTGT------AATGCTG
+T---------AAACAGC------------A---A------CTTG------
+TGGGTTACTGTCTATTATGGGGTACCTGTGTGGAAAGATGCAG---AG--
+ACCACCT-TATTTTGTGCATCAGATGCTAAAGCATATAAAACAGAAAAGC
+ATAATGTCTGGGCTACACATGCCTGTGTGCCCACAGACCCCAACCCACAA
+GAAATACCTTTGGAAAATGTGACAGAAGAGTTTAACATGTGGAAAAATAA
+AATGGTAGAACAAATGCATACAGATATAATCAGTCTATGGGACCAAAGCC
+TACAGCCATGTGTAAAGTTAACCCCTCTCTGCGTTACTTTAAATTGTACA
+G---ATG---TT--ACTAATGTT---------------------------
+-----------AC--AGAT---GT---TAG-TG-GTA---CG--------
+-------A-GGGGCAAC---AT--C-------------------------
+--------------------------ACC---AT--C---AT--GAA--A
+GAG---ATGGA------G-GGA-GA-AATAA-AAAACTGTTCTTTCAATA
+TGACCACAGAAATAAGGGATAAGAAACAGAAAGTATATTCAC---TCTTT
+TATAGACTTGATGTAGTACCAATAAATCAGGGTAATAGT-----------
+-------AGTA---GT--AA--AAACAGT---AGT---------------
+---------------------G---AG--TATAGA-T-TAATAAGTTGTA
+-ATACCTCAGCCATTACACAAGCTTGCCCAAAGGTAAGCTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATCCTGAAGTGTAGGG
+ATAAGGAGTTCAATGGAACAGGGGAATGCAAGAATGTCAGCACAGTCCAA
+TGCACACATGGAATCAAGCCAGTAGTATCAACTCAACTACTGTTAAATGG
+CAGTCTAGCAGAA---GAAAA---GGTAAAAATCAGAACTGAAAATATCA
+CAAACAATGCCAAAACTATAATAGTACAACTTGTCGAGCCTGTGAGAATT
+AATTGTACTAGACC------T------AATAACA---AT---ACAAGAGA
+GAGTGTGCGTATA---------GGGCCAGGACAAGCATTCTTTGCA---A
+CAGGTGAC------ATAATAGGGGATATAAGACAAGCACATTGTAATGTC
+AGTAGATCACAATGGAATAAGACTTTACAACAGGTAGCT---GA--ACAA
+TTAAGAGAA------CACTTT---A----AA--AAC------AAAACA--
+----ATAATATTTAAC----AGTTC---CTCAGGAGGGGATCTAGAAATC
+ACAACAC------ATAGTTTCAATTGTGGAG---GAGAATTTTTCTATTG
+TAATACATCAGGTCTGTTCAAT---AGCACC---TG---G----------
+------AAT-----ACC---------------------------AGCATG
+T---CAGGG---T-C-------A---AGT---AA-CAC---G---G----
+--AG--AC--AAA---T---------------GACACTATAACT------
+---CTCCAATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+ACAGGACAAGCAATATATGCCCCTCCCATCCAGGGAGTGATAAGGTGTGA
+ATCAAACATCACAGGACTACTGTTAACAAGAGATGG---TGGGGA---GG
+AG------AAG---------------AAC--AGTA---CA--AATGAAAT
+CTTCAGACCTGGAG-GAGGAG-ATATGAGGGACAACTGGAGAAGTGAATT
+ATATAAGTATAAAGTAGTAAAAATTGAACCACTAGGAGTAGCACCCACCA
+GGGCAAGGAGAAGAGTGGTGGG------AAGAGAAAAAAGAGCAG---TT
+GGA-------ATAGGAGC---TG--T---TTTCCTTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAATAACGCTGACGG
+TACAGGCCAGGCAATTATTGTCTGGCATAGTGCAACAGCAGAGCAATTTG
+CTGAGGGCTATAGAGGCTCAACAACATATGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTTGCTGTGGAAAGATACCTAAGGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAATGTGCCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTCAGGA---TGAAATAT---GGA---ACAACATGACCTGGCTGC
+AATGGGATAAAGAAATTAGCAATTACACAAACCTAATATATAGTCTAATT
+GAAGAATCGCAAAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGGCAAGTGGGCAAATCTGTGGACTTGGTTTGACATATCAAATTGGCTGT
+GGTATATAAGAATATTTATAATGATAGTAGGAGGCTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGCTTGCTGTAATAAAGAGAGTTAGGCAGGGATACTC
+ACCTGTGTC---ATTTCAGATC---CATGCCC---CAAACCCAGGGGGT-
+-----CTCGACAGGCCCGGAAGAATCGAAGAAGAAGGTGGAGAGCCAGGC
+AGAGGCAGATCGATTCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGA
+CGATCTGAGGAACCTGTGCCTCTTCAGCTACCATCGCTTGAGGGACTTCG
+CCTTGATTGTTGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGGCTGGGGTGGGAGGGCCTCAAGTATCT---GTGGAATCT
+C---CTGGTATACTGGAGTCAGGAACTAAAAACTAGTGCTATTAGTTTGG
+TTGATA---CTATAGCAATAGCAGTAGCTGGCTGGACAGATAGAGCTATA
+GAAATAGGACAAGGAATTGGTAGAGCTTTTCTCCACATACCTAGAAGAAT
+CAGACAGGGCTTAGAAAGGGCTTTGCTGTAA
+'A1.KE.00.KNH1199'
+------GCGAGAGCGTCAGTATTAAGCGGGGR---AAAATTAGCTGCATG
+GGAGAAGATTCGGTTAAGGCCAGGGGGAAGGAAAAGATATCGCATAAAAC
+ATTTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTCAACCCTAGC
+CTTTTAGAAACAGCAGAAGGATGTCAACAAATAATGGAACAGTTACAACC
+AGCTCTCAAGACAGGAACAGAAGAACTTAGATCATTATTTAATACAGTAG
+CAACCATCTATTGCGTGCATCAACGAATAGATGTGAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAAATGCA------------AAAGAA
+GAGCAAG------------CAAAAGACA---------CA---ACAG----
+--------------GCA---GCA---------------------GCT---
+------------------------GACACAGGA-----------------
+-------CACAGCAGC------------CAG------GTCAGCCAAAATT
+ACCCTATAGTACAAAATGCACAAGGGCAAATGGTACACCAGGCAGTATCA
+CCTAGGACTTTGAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTATCAGAAGGAGCCACCCCAC
+AAGATTTAAATATGATGCTGAACATAGTGGGGGGACATCA---GGCAGCT
+ATGCAAATGTTAAAAGATACCATCAATGAGGAAGCTGCAGAATGGGACAG
+GTTACATCCAGTACATGCAGGGCCTATTCCACCAGGCCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCCTCAAGAACAAATA
+GGGTGGATGACAGGCAACCCACCCATCCCAGTGGGAGACATCTATAAAAG
+ATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTAGATATAAAACAAGGGCCAAAAGAACCCTTCAGAGATTATGTG
+GATAGGTTCTTTAAAACTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAA
+AAATTGGATGACAGAGACATTACTGATCCAAAATTCAAATCCAGATTGTA
+AGTCCATCTTAAGAGCATTAGGACCAGGGGCTACATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGGGTTTTGGC
+TGAGGCAATGAGTCAGGT---AC---AAC---A---GC--C---A-----
+-----------------AACATAATGATGATGCAAAGAGGTAATTTTAGG
+GGCCAGAAA---AAGATTAAGTGTTTCAACTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAA
+AGGAGGGACACCAAATGAAAGACTGCACT------GAAAGACAGGCTAAT
+TTTTTAGGGAGAATTTGGCCTTC---CAGCAAAGGG---AGGCCAGGGAA
+TTTTCCTCAGAACAGA---------------TTGGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TGTGGGATGGGGGAAGAGATA---------------GCCTCCCCTCCG--
+----AAGCAGGAGCAG------------AGAGA---CAAGGAACAGGCC-
+-----TCA------CCTTCAGTTTCCCTCAAATCACTCTTTGGCAACGAC
+CTATTGTCACAGTAAGAATAGGGGGACAGCTAAAAGAAGCTCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGATATAGATCTGCCAGGAAAATG
+GAAGCCAAAAATGATAGGGGGAATTGGAGGATTCATCAAAGTAAGACAGT
+ATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGGAACATGTTGAC
+CCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACTGTAC
+CAGTAAAATTAAAGCCAGGTATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GATATGGAAAAGGAAGGAAAAATTTCAAGAATTGGGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCAATAAAGAAAAAGGATAGCACTAAATGGAGG
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTGTGGGA
+AGTTCAATTAGGAATACCACATCC---ATCGGGCCTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTAGATGTGGGGGACGCATATTTTTCAGTTCCTTTA
+GATGAGAACTTTAGAAAATATACTGCATTCACCATACCTAGTACAAACAA
+TGAGACACCAGGAATCAGATATCAGTACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCAGCAATATTCCAGAGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATTAAAAAATCCAGAAATAATTATTTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAGCAAAAATAG
+AAGAGTTAAGAGCTCATCTATTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACAGTCCAGCCTATAGTGCTGCCAGAAAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGAAAACTAAATTGGGCA
+AGTCAAATTTATGCAGGGATTAAAGTAAGGCAGTTGTGTAAACTCCTCAG
+GGGAGCCAAAGCACTAACAGATGTAGTAACATTGACTGAGGAAGCAGAAT
+TAGAATTGGCAGAGAACA---GGGAGATACTAAGAGACCCTGTGCATGGA
+GTATATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGG
+GCAAGACCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAATCTAA
+AAACAGGAAAATATGCAAGAAAAAAGTCTGCTCACACTAATGATGTAAGA
+CAATTAGCAGAGGTGGTGCAAAAGGTAGTCATGGAAAGCATAGTCATATG
+GGGAAAGA---CTCCTAAATTTAGACTA---CCCATACAAAAAGAGACAT
+GGGAAACA---TGGTGGATGGACTATTGGCAGGCTACCTGGATTCCTGAA
+TGGGAGTTTGTCAATACCCCTCCTCTAGTAAAATTGTGGTACCAATTAGA
+AAAAGACCCTATAGCAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCCA
+ATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGA
+CAAAAGATTGTTTCCCTAACTGAAA---CAACAAATCAAAGGACTGAACT
+ACATGCAATCTATCTAGCTTTGCAGGATTCAGGATCAGAAGTAAATATAG
+T---AACAGACTCACAATATGCATTAGGAATCA---TTCATGCACAACCA
+GACAGAAGTGACTCAGAGTTAGTCAATCAGATAATAGAGAAGCTAATAGG
+AAAGGACAAAGTATACCTG---TCATGGGTTCCAGCACACAAAGGAATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAGAAAGGTG
+CTGTTTTTAGATGGGATAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAATTTGCCACCTA
+TAGTAGCAAAGGAGATAGTAGCCAGCTGTGATAAAT---GTCA---GCTA
+AAAGGGGAAGCCATGCATGGACAAGTAGACTGCAGTCCAGGGATATGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATACTGGTAGCAGTTCATG
+TAGCCAGTGGCTATATGGAAGCAGAAGTTATCCCAGCAGAAACAGGACAA
+GAGACAGCATACTTTATACTAAAACTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAATTTCACCAGTGCTGCAGTTAAAGCAG
+CCTGTTGGTGGGCAAATATCAAACAGGAATTTGGGATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTCTAAATAAGGAATTAAAGAAAATTAT
+AGGACAGGTAAGAGAGCAAGCTGAACATCTTAAAACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---AGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAC
+AGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACA
+GGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATGGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGTAAAAATCATTAGGGACTATGGAAAACAGATGGCAG
+GTGATGATTGTGTGGCAGGTGGACAGAATGAGGATTAGA---ACATGGCA
+CAGTTTAGTAAAACATCATATGTATATCTCAAAGCAAGCTAAAAATTGGT
+CTTATATACATCACTTTCAAAGCAGGCATCCAAAAGTAAGTTCAGAAGTA
+CACATCCCACTAAAGG---ATG---CTAGATTAGTAGTAAAAACATATTG
+GGGTCTGCATACAGGAGAAAAAGACTGGCACTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGTTAAAAGGATATAGCACACAA------GTAGATCCTGAC
+CAGGCAGACCAACTAATTCATCTGCATTATTTTGACTGTTTTTCAGACTC
+---TGCCATAAGGAAAGCCATATTAGGAGAAGTAGTTAGCCCTAGGTGTG
+CATATCAAACAGGACATA---ACAAGGTAGGATCTCTGCAATATTTAGCG
+CTGAAAGCATTAGTAACACC---AACAAAGACAAAGCCACCTTTGCCTAG
+TGTTAGGATATTAGCAGAGGATAGATGGAACAAGCCCCGGAAGACCAAGG
+GCCCCAGAGAGAGCCATACAATGAATGGATGTTAGAGCTGTTAGAAGATC
+TTAAGAATGAAGCCGTTAGACATTTTCCTAGAATGTGGCTTCATGGATTA
+GGACAACACATCTATGAAACCTATGGGGATACTTGGGAAGGAGTTGAAG-
+--CTATAGTAAGAACTCTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AACATAGCAG---AATAGGCATT---ATTCGAGGG
+AGAAGAGGCA----------GGAATGGATCCGGTAGATCCTAACCTAGAG
+CCCTGGAACCACCCGGGAAGTCAGCCTACAACACCTTGTA---GCAAGTG
+TTACTGTAAAAAGTGTTGCTATCATTGTCCA---------------GTTT
+GCTTTTTGAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAAGA------A---CTCCTCCGTGCAATAA
+GGATCATCAAAATCCTGTACCAAAGCAGTAAGTAA--TAGTAATT-----
+-AGTATATGTA---ATGTGG------------------------------
+---------CTTCCTTTGC---------------AAATTTGTGCAGTAGT
+A---GGATTGATAGTAGC---AA--TAATCA-TAGCAATAGTTGTGTGGA
+CTATAGTAGGTATAGAATAT---AAGA------------GATTGCTAAAG
+CAA---AGAAAAATAGACAGGTTAATTGAAAGAATAAGAGAAAGAGAAGA
+AGACAGTGGCAATGAGAGTGATGGAGACACAGAGGAATTGTCAACACTT-
+--ATTAAGATG------------------------GGGAATTATG---AT
+CTTGGGG---ATGATAATAAT------------TTGT------AGTACTA
+C---------AGA---A------------A---A------CTTG------
+TGGGTTACTGTCTACTATGGGGTACCTGTGTGGAAAGACACAG---AG--
+ACCACCT-TATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAAAAAC
+ATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATATATTTGGAAAATGTGACAGAAGAGTTTAACATGTGGAAAAATAA
+CATGGTAGAGCAAATGCATACAGATATAATCAGTCTATGGGACCAAAGCC
+TAAAGCCATGTGTAAAGTTAACCCCTCTCTGCGTCACTTTAAATTGTAGC
+A---ATG---TC--AGGGTCAATGCCTCGAGTATAGAGACCAATGCC---
+-----------TC--GAGA---AG---CAA-TG-TCA---GC--------
+-------A-GTATCAAT---GG--C-------------------------
+--------------------------TSG---AG--T---TT--A-----
+-------------------GAA-GA-AATAA-GAAACTGTTCTTTCAATA
+TGACCACAGAGCTAAGGGATAAGAAAAAGCAGGTATATGCAC---TTTTT
+TATAAACTTGATGTAGTACAAATTAATGAA--------------------
+-------AATG---AA--AG--TAATAGT---AGT---------------
+---------------------A---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATTACACAGGCTTGCCCAAAGGTAACCTTTGAACCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATCCTAAAGTGTAGGG
+ATGAAGAGTTCAATGGAACAGGGCCATGCAAGAATGTCAGCACAGTACAA
+TGTACACATGGAATCAAGCCAGTAGTATCAACTCAACTGCTGTTAAATGG
+CAGTCTAGCAAAA---AATAC---AGTAAAAATTAGATCTGAAAATATCA
+CAAACAATGTCAAAACTATAATAGCACAACTTGTCAATCCTGTAACAATT
+AATTGTACCAGACC------T------AGCAACA---AT---ACAAGGAC
+AAGTATACGTATA---------GGACCAGGACAAGCATTCTATGCA---A
+CAGGAGAC------ATAATAGGGGACATAAGAAAAGCATATTGTAATGTC
+AGTGAATCAGAATGGGAGAAAGCTTTAAAACAGGTAGCT---GG--ACAA
+TTAGGAAAG------CACTTT---A----GC--AAC------AAAACA--
+----ATAAAATTTACT----AACTC---CTCAGGAGGGGATCTAGAAATC
+ACAACAC------ATAGTTTTAACTGTGGAG---GAGAATTCTTCTATTG
+TAATACATCAGGCCTGTTTAAT---AGCTTT---TG---GA--GTTATAG
+C-----AATGG--GACTTACAGCACAAATAGCACA---------AAGTCA
+A---ATGGC---A-C-------A---AAT---AG-CAC---A---A----
+--AG--TC--AAA---T---------------GGCACTATACAG------
+---CTCCCATG-CA-GAATAAAGCAAATTATAAATATGTGGCA---GAGA
+GCAGGACAAGCAATGTRTGCTCCTCCCATCCGAGGAATAATAAGGTGWGA
+ATCAAACATTACAGGACTAATATTAACAAGGGATGG---TGGGAG---TA
+ATGGGAATAAT---------------AGC--AATA---CA--ACAGAGAT
+CTTCAGACCTGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAATT
+ATATAAGTATAAAGTAGTAAAAATTGAACCACTAGGAGTAGCACCCACCA
+GGGCAAGGAGAAGAGTGGTGTG------GAGAGAAAAAAGAGCAG---TT
+GGA-------ATAGGAGC---TG--T---ATTCCTTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAATAACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCGACAGCAAAGCAATTTG
+CTGAGGGCTATAGAGGCTCAACAACATCTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAATGTGCCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTCAGAC---TGAGATAT---GGA---ATAACATGACCTGGCTGC
+AATGGGATAAAGAAATTACCAATTACACACAAGAAATATATAAACTAATT
+GAAGAGTCGCAGAACCAGCAGGAAAAGAATGAGCAAGACTTATTGGCCTT
+GGACAAGTGGGCAAGTCTGTGGAATTGGTTTGAAATATCAAATTGGCTGT
+GGTACATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGATTAAGG
+ATAATTTTTGCTGTGCTTTCTGTAATAAATAGAGTTAGGCAGGGCTACTC
+ACCTTTGTC---ATTTCAGACC---CATACCC---CAAACCCAGGGGGT-
+-----CTCGACAGGCCAGGAAGAATCGAAGAAGAAGGTGGAGAGCAAGGC
+AGAGACAGATCGATTCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGA
+CGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTCA
+TCTTGATTGCAGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGACTGGGGTGGGAAGGCCTCAAGTATCT---GTGGAATCT
+C---CTGTTATACTGGGCTCGGGAACTAAAAATTAGTGCTATTAATTTGT
+TTGATA---CCATAGCAATAGTAGTAGCTGGCTGGACAGATAGGGTTATA
+GAACTAGGACAAAGAATTGGTAGAGCTATTCTA-----------------
+------AGGCTTTGAAAAGGCTTTGCTATAA
+'A1.KE.00.KNH1207'
+------GCGAGAGCGTCAGTATTAAGTGGGGG---AAAATTAGATGCATG
+GGAGAAAATTAATTTAAGGCCAGGGGGAAAGAAAAAATATAAAATGAAGC
+ATTTAGTATGGGCAAGCAGGGAGCTGGAAAGATTCGCACTTAACCCTAGT
+CTTTTAGAAACAGCAGAAGGATGTCAACAAATAATAGAACAATTACAACC
+AGCTCTCAGGACAGGAACAGAAGAATTTAGATCATTATTTAATACAGTAG
+CAACCCTCTATTGCGTGCATCAAAAGATTGATGTAAAAGACACCAAGGAA
+GCTTTAG---AT--AAA-ATAGAAGAAATACA------------AAATAA
+GAACAAG------------CAAAAGACA---------CA---GCAG----
+--------------GCA---GCG---------------------GCT---
+------------------------GATACAGGA-----------------
+-------AACAGCAGC------------AAG------GTCAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAAGGGCAAATGATACACCAGCCCTTATCA
+CCCAGGACTTTGAATGCCTGGGTAAAAGTAATAGAAGAAAAGGGTTTCAG
+CCCAGAAGTAATACCTATGTTCTCAGCATTATCAGAAGGAGCCATCCCAC
+AAGATTTAAATATGATGCTGAACATAGTGGGGGGACACCA---GGCAGCT
+ATGCAAATGTTAAAAGATACCATCAATGAGGAAGCTGCAGAATGGGATAG
+ATTACATCCAGTACATGCAGGGCCCATTCCACCAGGCCAAATGAGAGAAC
+CAAGGGGAAGTGATATAGCAGGAACTACTAGTACCCCTCAAGAACAAGTA
+GGATGGATGACAGGCAATCCACCTATCCCAGTGGGAGACATTTATAAAAG
+ATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTGGACATAAAACAAGGGCCAAAAGAACCCTTCAGAGACTATGTA
+GATAGGTTCTTTAAAGTTCTCAGAGCTGAACAAGGTTCACAGGATGTAAA
+AAATTGGATGACAGAAACCTTGCTGGTCCAAAATGCAAATCCAGATTGTA
+AGTCTATTCTGAGAGCATTAGGACAAGGGGCTTCATTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGGGTTTTGGC
+TGAGGCAATGAGTCAGGT---AC---AAC---A---TA--C---A-----
+-----------------AATATA---ATGATGCAAAGAGGCAATTTTAGG
+GGCCAGAAA---AGAATTAAGTGTTTCAACTGTGGCAAAGAAGGACATCT
+AGCCAGAAATTGCAAGGCCCCTAGGAAAAAAGGCTGTTGGAAATGTGGGA
+GAGAGGGACACCAAATGAAAGACTGCACT------GAGAGACAGGCTAAT
+TTTTTAGGGAAAATTTGGCCTTC---CAGCAAGGGG---AGGCCAGGAAA
+TTTTCCTCAGAGCAGA---------------CCGGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TTTGGGATGGGGGAAGAGATA---------------ACCTCTCCTCCG--
+----AAGCAGGAGCGG------------AAACA---GGACCCA-------
+--------------CCCTTAGTTTCCCTCAAATCACTCTTTGGCAACGAC
+CCCTTGTCACAGTAAAAATAGGAGGACAGCTAAAAGAAGCTCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGATATAAATTTGCCAGGGAAATG
+GAAACCAAAAATGATAGGAGGAATTGGAGGTTTTATCAAGGTAAAACAGT
+ATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAATATGCTGAC
+CCAAATTGGTTGTACTTTAAATTTTCCCATAAGTCCTATTGAGACTGTAC
+CAGTAAAATTGAAACCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTAACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GATATGGAAAAGGAAGGAAAACTTTCAAGAATTGGGCCTGAAAATCCAT
+ACAATACTCCAGTGTTTGCTATAAAGAAAAAAGATAGCACTAAATGGAGG
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGA
+AGTCCAATTAGGAATACCGCATCC---AGCGGGCCTAAAAAAGAAAAAAT
+CAGTGA---CAGTACTGGATGTGGGGGACGCATATTTTTCAGTTCCTTTA
+GATGAAGGCTTTAGAAAGTATACTGCTTTCACCATACCTAGTATAAACAA
+TGAGACACCAGGAATCAGGTATCAGTACAATGTGCTCCCACAGGGATGGA
+AAGGATCACCGGCAATATTCCAGAGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTAGAAATAGGGCAACATAGAACAAAAATAG
+AAGAGTTAAGAGCTCATTTATTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTTCTTTGGATGGGATATGAACTACA
+TCCTGACAAATGGACAGTCCAGCCTATACAGCTGCCAGAAAAGGACAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGAAAATTAAATTGGGCA
+AGTCAGATTTATGCAGGGATTAAGGTAAAGCAACTGTGTAGACTCCTTAG
+GGGAACCAAAGCACTAACAGATGTAGTACCAATGACTGAGGAAGCAGAAT
+TAGAATTGGCAGAAAATA---GGGAGATTTTAAAAGACCCTGTACATGGA
+GTATATTATGACCCATCAAAAGACCTAATAGCAGAAATACAGAAGCAAGG
+GCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAA
+AAACAGGGAAATATGCAAAAAGGAGGTCTGCTCACACTAATGATGTAAAA
+CAATTAACAGAAGTAGTGCAAAAGGTGGTCACGGAAAGCATAGTAATATG
+GGGAAAGA---CTCCTAAATTTAAACTA---CCCATACAAAAAGAAACAT
+GGGAAGCA---TGGTGGATGGACTATTGGCAGGCTACCTGGATCCCTGAA
+TGGGAGTTTGTCAATACCCCACCTCTAGTAAAACTATGGTACCAGTTAGA
+GAAAGACCCTATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCCA
+ATAGGGAAACTAAGTTAGGAAAAGCAGGGTATGTCATTGACAGAGGAAGA
+CAAAAGGTTGTTTCCCTAACTGAGA---CAACAAACCAAAAGACTGAATT
+ACATGCTATCCATCTAGCATTGCAGGATTCAGGATCAGAAGTAAATATAG
+T---AACAGACTCACAGTATGCATTAGGAATTA---TTCAGGCACAACCA
+GACAGGAGTGAATCGGAGTTAGTCAATCTAATAATAGAACAGCTAATAGG
+AAAGGACAAGGTCTACCTG---TCATGGGTACCAGCACACAAGGGGATTG
+GAGGAAATGAACAGGTAGATAAATTAGTCAGCTCTGGAATCAGGAAGGTA
+CTATTTTTAGATGGGATAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCATAGTAATTGGAGAACAATGGCTAGTGATTTTAATCTGCCACCTA
+TAGTAGCAAAGGAAATAGTAGCCAGCTGTGACAAAT---GTCA---ACTA
+AAAGGGGAAGCCATGCATGGACAAGTAGATTGTAGTCCAGGGATATGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATCCTGGTAGCAGTCCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAG
+GAAACAGCATACTTTCTGTTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAATTTCACCAGCGCTGCATTTAAAGCAG
+CCTGTTGGTGGGCAAATATCCAACAGGAATTTGGAATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAAAGAATTAAAGAAAATCAT
+AGGGCAGGTAAGAGAGCAAGCTGAACACCTTAGGACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAMCAGATATACAAACTAA
+AGAATTACAAAAACACATTTCAAAAATTCAAAATTTTCGGGTCTATTACA
+GGGACAGCAGAGATCCCATTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGCAAAGATCATTAGGGATTATGGAAAACAGATGGCAG
+GTGATGATTGTATGGCAAGTAGACAGGATGAGGATTAGA---ACATGGAA
+CAGTTTAGTAAAACATCATATGTATCTTTCAAAGAAAGCTAAAGGGTGGT
+TTTATAGACATCACTATGAAAGCAGGCATCCAAAAGTAAGTTCAGAAGTA
+CACATTCCACTAGGGG---ATG---CTAGATTAGTAGTAAGAACATATTG
+GGGGCTGCATACAGGAGAAAAAGACTGGCACTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGCTAAAAAGATATAGCACACAA------ATAGATCCTGAC
+CTGGCAGACCAACTAATTCATATGCATTATTTTGAATGTTTTTCAGACTC
+---TGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGCCCTAGGTGTG
+AATACCAAACAGGACATA---ACAAGGTAGGATCTCTACAATATTTGGCA
+CTGAAAGCATTAGTAAAACC---AAAAAAGATAAAGCCACCCTTACCAAG
+TGTTAGGAAATTAACAGAGGATAGATGGAACGAGCCCCAGAAGACCAGGG
+GCCCCAGAGGGAGCCATACAATGAATGGATGTTAGAACTATTAGAAGACC
+TTAAACATGAAGCTGTTAGACATTTTCCTAGGCCATGGCTTCATGGACTA
+GGACAACATATCTATAACACCTATGGGGATACGTGGGAGGGAGTTGAAG-
+--CTATAATAAGAACTTTGCAA---CAATTGCTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AGCATAGCAG---AATAGGCATT---ATTCAAGGG
+AGAAGAGGCA----------GGAATGGATCCGGTAGATCCTAACCTAGAG
+CCCTGGAACCATCCGGGAAGTCAGCCTGCAACTCCTTGTA---GCAAGTG
+TTACTGTAAAAAGTGCTGCTATCATTGTCCA---------------GTTT
+GCTTTCTGAA---AAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------C---GCGACGAAGA------G---CTCCTCAAAGCAGTAA
+GGATCATCAGAATCCTATACCAAAGCAGTAAGTAT--TAGCAATT-----
+-AGTATATGTA---ATG---------------------------------
+---------AGTCCTTTGG---------------AAGTCTGTGCAATAGT
+A---GGACTGATAGTGGC---AC--TAATCA-TAGCAATAGTTGTGTGGA
+CTATAGTAGGTATAGAATAT---AGGA------------AATTGCGAAAG
+CAA---AAAAGAATAGACAGGTTAATTAAGAGAATAAGTGAAAGAGCAGA
+AGACAGTGGCAATGAGAGTGATGGGGATACAGATGAATTGTCAAAGCTT-
+--GTGGAGATG------------------------GGGAACTATG---AT
+CTTGGGA---ATGTTAATGAT------------TTGT------AGTGTTG
+C---------AGG---A------------A---A------CTTG------
+TGGGTTACTGTCTACTATGGGGTACCTGTGTGGAAAGAGGCAG---AC--
+ACCACCT-TATTTTGTGCATCAAATGCTAGAGCATATGATACAGAAGTGC
+ATAATGTCTGGGCTACACATGCCTGTGTACCTACAGACCCCAACCCACAA
+GAAATAGATTTGGAGAATGTGACAGAAGAGTTTAACATGTGGAAAAATAA
+CATGGTAGAGCAAATGCATACAGATATAATTAGTCTATGGGACCAAAGCC
+TAAAACCATGTGTAAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTGGC
+T---ATA---AT--------------------------------------
+----------------------GT---AAC-CA-ACT---TG--------
+-------A-ATTTCACC---AG--T-------------------------
+--------------------------------------------------
+AAC---ATGAA------A-GGA-GA-CATAA-CAAACTGCTCTTACAATA
+TGACCACAGAAATAAGGGATAGGAAACAGAAAGTGTATTCAC---TTTTT
+TATAGGCTTGATATAGTACCAATTAATGAAGAAAAGAAT-----------
+-------AATA---GC--AG--GGAGACT---AGT---------------
+---------------------C---CG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATTACACAGGCTTGTCCTAAGGTATCTTTTGAACCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATTCTAAAATGTAAGG
+ATGCAGAGTTCAATGGAACAGGGCCATGCAAGAATGTCAGCACAGTACAA
+TGTACACATGGAATCAGGCCAGTAATATCAACTCAACTGCTGTTAAATGG
+CAGTTTAGCAGAG---AATGG---GACAAAGATTAGATCTGAAAATATCA
+CAAACAATGCCAAAACCATAATAGTACAACTTAACGAGACTGTACAAATT
+AATTGTACCAGACC------T------AGCAACA---AT---ACAAGAAA
+AAGTGTACGTATA---------GGACCAGGACAAGCATTCTATACA---A
+CAGGTGAT------ATAACAGGGGATATAAGACAAGCATATTGTAATGTC
+AGTAGACAAGAATGGGAACAAGCATTAAAAGGGGTAGTT---AT--ACAA
+TTAAGAAAA------CACTTT---A----AC-----------AAAACA--
+----ATAATCTTTAAC----AGTTC---CTCAGGAGGGGATTTAGAAATT
+ACAACAC------ATAGTTTTAATTGTGGAG---GAGAATTTTTCTATTG
+TGATACATCAGGCCTGTTTAAT---AGCACC---TG---G----------
+------AAC-----ACG---------------------------AACACC
+A---CCGAG---C-C-------A---AAC---AA-CAC---A---A----
+--CG--TC--AAA---T---------------GGCACTATCATT------
+---CTCCAATG-CA-GAATAAAGCAAATTATAAATCTGTGGCA---GAGA
+ACAGGACAAGCAATGTATGCCCCTCCCATCCAAGGGGTAATAAGGTGTGA
+TTCCAACATTACAGGACTACTATTAACAAGAGATGG---TGGAGT---AG
+TT------GATAGT------------ATA--AATG---AA--ACCGAAAT
+CTTCAGACCTGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAATT
+ATATAAGTATAAAGTAGTAAAAATTGAACCACTAGGAGTAGCACCCACCG
+GGGCAAAGAGAAGAGTGGTGGA------GAGAGAAAAAAGAGCAG---TT
+GGC-------ATAGGAGC---TG--T---ATTCATTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAATAACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAACAATTTG
+CTGAGGGCTATAGAGGCTCAACAGCATATGTTGAGACTCACGGTCTGGGG
+CATTAAGCAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAGGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAATGTGCCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTCAGGA---GGAAATAT---GGG---GTAACATGACCTGGCTGC
+AATGGGATAAAGAAATTAGCAATTACACACAAACAATATATAACCTACTT
+GAAGAATCGCAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGACAAGTGGGCAAATTTGTGGACTTGGTTTGACATAACAAATTGGCTGT
+GGTATATAAAAATATTTATAATGATAGTAGGAGGCTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGCTTTCTGTAATAAATAGAGTTAGGCAGGGATACTC
+ACCTCTGTC---GTTTCAGACC---CATATCC---CGAGCCCAAGGGGT-
+-----CTCGACAGGCCCGGAAGAATCGAAGAAGAAGGTGGAGAGCGAGAC
+AGAGACAGATCGATTCGATTGGTGAGCGGATTCTTAGCACTTGCCTGGGA
+CGATCTACGGAACCTGTGCCTCTTCAGCTACCACCGATTGAGAGACTTCA
+TCTTGATTGCAGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGACTGGGGTGGGAAGGCCTCAAGTATCT---GTGGAATCT
+T---CTGATATATTGGGGTCGGGAACTAAAACTTAGTGCTATCAATTTGA
+TTGATA---CCATAGCAATAGCAGTAGCTGGCTGGACAGATAGAATTATA
+GAAGTAGGACAAAGACTTTGTAGAGCTATACTTAACATACCTAGAAGAAT
+CAGACAGGGTGCCGAAAGGGCTTGGCTATAA
+'A1.KE.00.KNH1209'
+------GCGAGAGCGTCAGTATTAAGCGGGGG---AAAATTAGATGCATG
+GGAGAAGATTCGGTTAAGGCCAGGGGGAAAGAAAAAATACAGAATGAAAC
+ATTTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCGGGC
+CTTTTAGAAACAGCAGAAGGATGTCAACAAATATTGGAACAATTACAATC
+AGCTCTCGGGACAGGAACAGAGGAACTTAAATCATTGTATAATACAGTAG
+CCACCCTCTATTGCGTACATCAAAGAATAGAGGTAAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAACTACA------------AAATAA
+GAGCAAG------------CAAAAGATA---------CA---ACAG----
+--------------GCA---GCA---------------------GCA---
+------------------------GCTACAGGA-----------------
+-------AGCAGCAGC------------AAG------GTTAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAAGGGCAAATGATACACCAGTCCTTGTCA
+CCTAGGACTTTGAATGCATGGGTAAAAGTAATAGAAGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTATCAGAGGGAGCCACCCCAC
+AAGATTTAAATATGATGCTGAATATAGTGGGGGGACACCA---GGCAGCG
+ATGCAAATGTTAAAAGATACCATCAATGAGGAAGCTGCAGAATGGGACAG
+GTTACATCCAGTACATGCAGGGCCTATTCCACCAGGCCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCCTCAAGAACAGATA
+GGGTGGATGACAAGCAACCCACCTATTCCAGTGGGAGACATCTATAAAAG
+ATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTAGATATAAAACAAGGACCAAAAGAACCCTTCAGAGATTATGTA
+GATAGGTTCTTTAAAACTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAA
+AAATTGGATGACAGACACATTGCTGGTCCAAAATGCAAATCCAGATTGCA
+AGTCCATTTTAAGAGCATTAGGACCAGGAGCTACATTAGAGGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCAGCCATAAAGCAAGGGTTTTGGC
+TGAGGCAATGAGTCAGGC---AC---AAA---A---TA--C---A-----
+-----------------AACATA---ATGATGCAGAGAGGCAATTTTAGG
+GGCCAGAAA---AGAATTAAGTGCTTCAATTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGA
+AAGAGGGACATCAAATGAAAGACTGCACT------GAAAGACAGGCTAAT
+TTTTTAGGGAGAATCTGGCCTTC---CAGCAAGGGG---AGGCCAGGGAA
+TTTTCCTCAGAACAGA---------------CTAGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TGTGGGATAGGGGAAGAGATA---------------GCCTCCCCTCTG--
+----AAGCAGGAGCAG------------AGAGA---CAGGGAACAGACT-
+-----CCA------ACTTTAATTTCCCTCAAATCACTCTTTGGCAACGAC
+CCATTGTCACAGTAAGAATAGGGGGACAACTAAAAGAAGCTCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAGGAAAATG
+GAAACCAAAAATGATAGGGGGAATTGGAGGTTTCATCAAAGTAAAACAGT
+ATGATCAGATACTTATAGAAATTTGTGGAAAAAGGGCTATAGGCACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGCAGAAACATGTTGAC
+CCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAGACTGTGC
+CAGTAGAATTAAAGCCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GAAATGGAAAAGGAAGGAAAAATTTCAAGAATAGGGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCAATAAAGAAAAAAGACAGCACTAAATGGAGG
+AAATTAGTGGATTTCAGAGAGCTCAATAAAAGAACACAAGACTTTTGGGA
+AGTTCAATTAGGAATACCGCATCC---AGCGGGTCTAAAAAAGAAGAAAT
+CAGTAA---CAGTACTAGATGTGGGGGATGCATACTTTTCAGTTCCTTTA
+CATAAAAGTTTTAGAAAATATACTGCATTCACCATACCTAGTACAAACAA
+TGAGACACCAGGAATCAGATATCAATACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCAGCAATATTCCAGAGTAG---CATGATAAAAATCTTAGAG
+CCTTTTAGATCAAAAAATCCAGAAATAATTATCTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTGGAAATAGGACAGCATAGAGCAAAAATAG
+AAGAGTTGAGAGCTCATCTATTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACAGTCCAGCCTATACAGTTGCCAGAAAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTGGGGAAACTAAATTGGGCA
+AGTCAAATTTATGCAGGGATTAGAATAAAGCAATTGTGTAAACTCCTCAG
+GGGAGCCAAAGCTTTAACAGATATAGTAACATTGACTGAGGAAGCAGAAC
+TAGAATTGGCAGAAAACA---GGGAAATTCTAAAAAACCCTGTGCATGGG
+GTATATTATGACCCATCAAAAGACTTAGTAGCAGAAATACAGAAACAAGG
+GCAAGAACAATGGACATACCAAATTTATCAAGAGCCATTTAAAAATTTAA
+AAACAGGAAAATATGCAAGAAAAAGATCTACTCACACTAATGATGTAAAG
+CAATTAGCAGAAGTGGTGCAAAAGGTAGCCATGGAAAGCATAGTAATATG
+GGGAAAGA---CTCCTAAATTTAAACTA---CCCATACAAAAGGAGACAT
+GGGAGACA---TGGTGGATGGACCACTGGCAGGCTACCTGGATTCCTGAA
+TGGGAATTTGTCAATACCCCTCCTCTAGTAAAATTGTGGTACCAATTAGA
+GAAAGACCCCATAATGGGAGCAGAGACTTTCTATGTAGATGGGGCAGCCA
+ATAGACAGACCAAACTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGA
+CAAAAGGTTGTTTCCCTACCTGAGA---CAACAAATCAAAAGACTGAACT
+ATATGCCATCCATCTAGCCTTGCAGGATTCAGGATCAGAAGTAAACATAG
+T---AACAGACTCACAGTATGCATTAGGAATCA---TTCAGGCACAACCA
+GACAGAAGTGAATCAGAAATAGTTAATCAAATAATAGAGCAACTAATAGA
+AAAAGACAGAGTCTACCTA---TCATGGGTACCGGCACACAAAGGAATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGTTCTGGAATCAGAAAGGTG
+CTGTTTATAGATGGGATAGATAAAGCTCAAG---AGGATCATGAAAGATA
+---TCACAGCAATTGGAGAACAATGGCTAGTGATTTTAATCTGCCACCTA
+TAGTAGCAAAAGAAATAGTAGCCAGCTGTGATAAAT---GTCA---GCTA
+AAAGGAGAAGCCATGCATGGACAAGTAGACTGCAGCCCAGGGATATGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATTTTGGTAGCAGTTCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAA
+GAGACAGCATACTTTCTACTAAAATTAGCAGGAAGATGGCCAGTAAAAGT
+AGTACACACAGACAATGGCAGCAATTTTACCAGCGCTGCAGTCAAAGCAG
+CCTGTTGGTGGGCAGGTATCCAACAGGAATTTGGGATCCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTAAAGAAAATCAT
+AGGACAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCTGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAA
+AGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACA
+GGGACAGCAGAGATCCAGTTTGGAAAGGACCAGCAAAACTGCTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGCAAAAATCATCAGGGACTATGGAAAACAGATGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGCA
+CAGTTTGGTAAAATATCATAAGTATATATCAAAGAAAGCTAAAAATTGGC
+ATTATAGACATCACTATGAAAGTAGACATCCAAAAACATGTTCAGAAGTA
+CATATCCCACTAGGGG---ATG---CTAGATTAGTAGTAAGGACATATTG
+GGGTCTGCATACAGGAGAAAAAGACTGGCACTTGGGTCATGGGGTCTCCA
+TAGAATGGAGGCTAAGAAGATATAGCACACAA------GTAGATCCTGAT
+CAGGCAGACCAACTAATTCATCTGCATTATTTTAACTGTTTTTCAGACTC
+---TGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGCCCTAGGTGTG
+ACTATCAAACAGGACATA---ACAAGGTAGGATCTCTACAATATTTAGCA
+CTGAAAGCATTAGTAAAACC---AGAAAAGACAAAGCCACCTTTGCCTAG
+TGTTAGGATATTAGCAGAGGATAGATGGAACAAGCCCCAGAAGACCAAGG
+GCCCCAGAGAGAGCCATACAATGAATGGATGTTAGACCTGTTAGAAGAGC
+TTAAGCATGAAGCCGTTAGGCATTTTCCCAGGCCATGGCTTCATGGATTA
+GGACAACATATCTATAACACCTATGGGGATACTTGGGAAGGAGTTGAAG-
+--CTATAATAAGAATTCTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AACATAGCAG---AATAGGCATT---ATTCGAGGG
+AGAAGAGTTA----------GGAATGGACCCAGTAGATCCTAACCTAGAG
+CCATGGAACCACCCGGGAAGTCAGCCTGCAACCCCTTGTA---ACAAATG
+TTATTGTAAAAAGTGTTGCTATCATTGCCAA---------------GCTT
+GCTTTTTGAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAAGA------T---CTCCTCAGAGCAGTAA
+GGATCATCAACATTCTACACAAGAGCAGTAAGTAT--TAGTAGTT-----
+-GATATATGTA---ATG---------------------------------
+---------CTTCCTTTGC---------------AAATCTTGGCAATAAT
+A---GGACTGATAGTAGC---CC--TAATCC-TAGCAATAGTTGTATGGA
+CTATAGTAGGTATAGAATGT---AAGA------------AATTTCTAAAA
+CAA---AGAAAAATAGACAGGTTAATTGAGAGAATAAGAGAAAGAGCAGA
+AGACAGTGGCAATGAGAGTGATGGGGATACAGAGGAATTGTCAGAACTT-
+--ATTGACATG------------------------GGGAACTATG---AT
+CTTGGGG---ATAATAATCTT------------CTGT------AGTGCTA
+C---------AGA---C------------A---A------CTTG------
+TGGGTTACTGTCTATTATGGGGTACCAGTGTGGAAAGACGCAG---AG--
+ACCACCT-TATTTTGTGCATCAGATGCTAAAGCATATGCGACAGAAAAGC
+ATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATACATTTGGAAAATGTGACAGAAGAATTTAACATGTGGAAAAATAA
+CATGGTAGAACAGATGCATACAGATATAATCAGTCTATGGGACCAAAGCC
+TAAAACCATGTGTAAAGTTAACCCCTCTCTGCGTTACTTTAAATTGTAGC
+A---ATG---CC--AATGTAGGTTAT------------------------
+-----------AG--CAAT---GC---CAC-TG-TTA---AC--------
+-------A-ACACCATC---AA--G-------------------------
+--------------------------------------------------
+-------------------GAT-GA-AATAA-AAAACTGCTCTTTCAATA
+CAACCACAGCACTAAGGGATAAGAGACAGAAAGTATATTCAC---TTTTT
+TATAGACTTGATATAGTACAAATTGATAATAGTAGT--------------
+-------AGTG---AT--AG--TAGTAGT---AGT---------------
+---------------------G---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCCATTACACAAGCTTGTCCAAAGGTAACCTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCAATCCTAAAGTGTAAAG
+ATGAGGAGTTCAATGGAACAGGGCCATGCAAGAATGTCAGCACAGTACAA
+TGCACACATGGAATCAAGCCAGTAGTATCAACTCAACTGCTGCTAAATGG
+CAGTCTAGCAAAA---AGAGA---GGTAAAAATTAGATCTGAAAATATCA
+CAAACAATGCCAAAAATATAATAGTACAATTTGTTGATCCTGTGGAAATT
+AATTGTACCAGACC------T------AACAACA---AT---ACAAGAAA
+AAGTATACATATA---------GGACCAGGACAAGCATTCTATGCA---A
+CAGGTGAC------ATAATAGGGGATATAAGACAAGCACATTGTAATGTC
+AGTAGATCATCCTGGAATAGGACTTTACAACAGGTAGCT---AA--ACAA
+TTAGGAACA------TACTTT---A----AG--AAC------AAAACA--
+----ATAGTATTTAAT----ACATC---CTCAGGAGGGGATCCTGAAATC
+ACAACAC------ATAGTTTTAACTGTGCAG---GAGAATTTTTCTATTG
+TGATACATCAGGCCTGTTTAAT---AGCAGT---TG---G----------
+------AATGA--TACC---------------------------ACTTGG
+A---ATGAG---T-C-------A---AAT---AG-CAC---G---G----
+--GG--TC--AAA---T---------------GACACTATAACT------
+---CTCCTATG-CA-GAATAAAACAAATTATAAATATGTGGCA---GAGA
+ACAGGACAAGCAATGTATGCCCCTCCCATCCCAGGGTTAATAAGCTGTAA
+ATCAAACATTACAGGTATAATATTGACAAGAGATGG---TGGGAA---TG
+GT------AAC--------------------AATA---CA--AATGAAAC
+CTTCAGACCGGGAG-GAGGAG-ATATGAGGGACAATTGGAGAAGTGAACT
+GTATAGGTATAAAGTAGTACAAATTGAACCACTAGGAGTAGCACCCACCA
+GGGCAAGGAGAAGAGTGGTGCA------GAGAGAAAAAAGAGCAG---TA
+GGA-------ATAGGAGC---TG--T---CTTCCTTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCAGCGTCAATAACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAGCAATTTG
+CTGAGGGCTATAGAGGCTCAACAACATCTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAGAGATACCTAAGGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTAATGTACCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTTATAA---TGACATAT---GGG---ATAACATGACCTGGCTGC
+AATGGGATAAAGAAATTCACAATTACACACAATTAATATATAACCTAATT
+GAAGAATCGCAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGACAAGTGGGCAAATCTGTGGAATTGGTTTAACATAACAAATTGGCTGT
+GGTATATAAAAATATTTATAATGGTAGTAGGAGGCTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGCTTTCGATAATAAATAGAGTTAGGCAGGGATACTC
+ACCTTTGTC---GTTTCAGACC---CACCTCC---CAAACCCAAGGGAT-
+-----CTCGACAGGCCCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGGC
+AGAGACAGATCGATTCGCTTAGTGAGCGGATTCTTAGCGCTTGCCTGGGA
+CGATCTGCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTCA
+TCTTGATTGCCGCGAGGACTGTGGAACTTCTGGGA---CAGAGCAGTCTC
+AAGGGGTTGAGACTGGGGTGGGAAAGCCTCAAGTATCT---GTGGAATCT
+C---CTAAGGTATTGGGTTCGGGAACTAAAAATTAGTGCTGTTAATTTAG
+TTGATA---CCATAGCAATAGCAGTAGCTGGCTGGACAGATAGGGTTATA
+GAAATAGGACAAGAAATTGGTAGAGCTATTCGCCACATACCTAGAAGAAT
+CAGACAGGGTTTAGAAAGAGCTTTGCTATAA
+'A1.KE.00.KNH1211'
+------GCGAGAGCGTCAGTATTAAGCGGGGG---AAAATTAGATGCATG
+GGAGAAGATTTATTTAAGGCCAGGGGGAAAGAAAAAATATAAACTAAAAC
+ATTTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTAGC
+CTTTTAGAAACAACAGAAGGATGTCAACAAATAATGGAACAGTTACAACC
+ATCTCTCAAGACAGGATCAGAAGAACTTAGATCTTTATTTAATACAGTAG
+CAACCCTCTATTGTGTACATCAAAGAATAAATGTAAAAGACACCAAGGAA
+GCTCTAG---AT--AAA-ATAGAGGAAATACA------------AAATAA
+GAACAAG------------CAAAAGACA---------CA---ACAG----
+--------------GCA---GCA---------------------GCT---
+------------------------GATACAGGA-----------------
+-------AGCAGCAGC------------AAG------GTCAGCCAAAATT
+ACCCTATAGTGCAAAATGCACAGGGGCAAATGATACATCAGTCCTTATCA
+CCTAGGACTTTGAATGCATGGGTAAAAGTAATAGAGGAAAAGGCTTTCAG
+CCCAGAAGTAATACCCATGTTCTCAGCATTATCAGACGGAGCCACCCCAC
+AAGATTTAAATATGATGTTGAATATAGTAGGGGGACATCA---GGCAGCT
+ATGCAAATGTTAAAAGATACCATCAATGAGGAAGCTGCAGAATGGGACAG
+GTTACATCCAGTACATGCAGGGCCTATTCCACCAGGCCAGATGAGAGAAC
+CAAGGGGAAGTGACATAGCAGGAACTACTAGTACCCCTCAAGAACAAATA
+GGATGGATGACAGGCAACCCACCTATCCCAGTGGGAGAAATCTATAAAAG
+ATGGATAATCCTGGGATTAAATAAAATAGTAAGAATGTATAGCCCTGTTA
+GCATTTTGGATATAAAGCAAGGACCAAAAGAACCCTTTAGAGATTATGTA
+GATAGGTTCTTTAAAACTCTTAGAGCTGAGCAAGCCACACAGGAGGTAAA
+AGGTTGGATGACAGAAACATTACTGGTCCAAAATGCAAATCCAGATTGTA
+AGTCCATTTTAAGAGCATTAGGAACAGGGGCTACGTTAGAAGAAATGATG
+ACAGCATGCCAGGGAGTGGGAGGACCCGGCCATAAAGCAAGGGTTTTGGC
+TGAGGCAATGAGTCAGGT---AC---AGC---A---TA--C---A-----
+-----------------AACATA---ATGATGCAGAGAGGCAATTTTAGG
+GGCCAGAAA---AGGATTAAGTGTTTCAACTGTGGCAAAGAAGGACACCT
+AGCCAGAAATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGA
+AGGAGGGACATCAAATGAAAGACTGCACT------GAAAGGCAGGCTAAT
+TTTTTAGGGAAAATTTGGCCTCC---CAACAAGGGG---AGGCCAGGAAA
+TTTTCCTCAGAGCAGA---------------TTAGAGCCAACAGCCCCA-
+-----------------------------------CCA---GCAGAGATC
+TGTGGGATGAGGGAAGAGATA---------------GCCTCCCCTCCG--
+----AAGCAGGAGCAG------------AAAGA---CAGGGAACAAACT-
+-----CCA------CCTTCAATTTCCCTCAAATCACTCTTTGGCAACGAC
+CCCTTGTCACAGTAAAAATAGGGGGACAGCTAAAAGAAGCCCTATTAGAT
+ACAGGAGCAGATGATACAGTATTAGAAGACATAGATTTGCCAGGGAAATG
+GAAACCAAGAATGATAGGGGGAATTGGAGGTTTCATCAAGGTAAAACAGT
+ATGATCAGATACTTATAGAAATTTGTGGAAAAAAGGCTATAGGTACAGTA
+TTAGTAGGACCTACACCTGTCAACA---TAATTGGAAGAAACATGTTGAC
+CCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAGACTGTAC
+CAGTAACATTAAAGCCAGGAATGG---ATGGCCCAAAGGTTAAACAATGG
+CCATTGACAGAAGAAAAAATAAAAGCATTAACAGAAATTTGTACA-----
+-GAAATGGAAAAGGAAGGAAAAATTTCAAAAATTGGGCCTGAAAATCCAT
+ACAATACTCCAATATTTGCGATAAAGAAAAAAGATAGCACTAAATGGAGA
+AAATTAGTAGATTTCAGAGAGCTCAATAAAAGAACACAAGATTTTTGGGA
+AGTTCAATTAGGAATACCACATCC---AGCGGGCCTAAAAAAGAAAAAAT
+CAGTAA---CAGTACTGGATGTGGGGGACGCATATTTTTCAGTTCCTTTA
+CATGAAAGCTTTAGAAAATATACTGCATTCACCATACCTAGTACAAACAA
+TGAGACACCAGGAATCAGGTACCAGTACAATGTGCTTCCACAGGGATGGA
+AAGGATCACCGGCAATATTCCAGAGTAG---CATGACAAAAATCTTAGAG
+CCCTTTAGATCAAAAAATCCAGACATAATTATCTATCAATACATGGATGA
+CTTGTATGTAGGATCTGATTTAGAAATAGAGCAGCATAGAACAAAAATAG
+AAGAGTTAAGAGCTCATCTATTGAGCTGGGGATTTACTACACCAGAC---
+AAAAAGCATCAGAAAGAACCTCCATTCCTTTGGATGGGATATGAGCTCCA
+TCCTGACAAGTGGACAGTCCAGCCTATAATGCTGCCAGAAAAAGAAAGCT
+GGACTGTCAATGATAT---ACAGAAATTAGTAGGGAAACTAAATTGGGCA
+AGTCAAATTTACCCAGGGATTAGAGTAAAACAATTGTGTAAACTCCTCAG
+GGGAGCCAAAGCACTAACAGAGATAGTAACATTGACTGAGGAAGCAGAAT
+TAGAATTGGCAGAGAACA---GGGAGATTCTAAAGGACCCTGTGCATGGG
+GTATATTATGACCCCTCAAAAGACTTAATAGCAGAAATACAGAAACAAGG
+GCAAGACCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTAA
+AAACAGGGAAATATGCAAGAAAAAGGTCTGCTCACACTAATGATGTAAAA
+CAATTAGCAGAAGTGGTGCAAAAGGTGGTCATGGAAAGCATAGTAATATG
+GGGAAAGG---CTCCTAAATTTAAACTA---CCCATACAAAAGGACACAT
+GGGAAACA---TGGTGGATGGACTATTGGCAGGCTACCTGGATCCCTGAA
+TGGGAATTTGTCAATACCCCTCCTCTAGTAAAATTATGGTACCAATTAGA
+GAAAGACCCCATAACAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCCA
+ATAGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGGGGAAGA
+CAAAGGGTTGTTTCCCTAACTGAGA---CAACAAATCAAAAGACTGAGCT
+ACATGCAATCCATCTAGCCTTGCAGGATTCAGGATCAGAAGTAAACATAG
+T---AACAGACTCACAGTATGCATTAGGAATCA---TTCATGCACAACCA
+GACAGTAGTGAATCAGAGTTAGTTAATCAAATAATAGAGAAGCTAATAGA
+AAAGGACAAAATCTACCTA---TCATGGGTACCGGCACACAAAGGAATTG
+GAGGAAATGAACAAGTAGATAAATTAGTCAGTTCCGGAATCAGGAAGGTG
+CTGTTTCTAGATGGGGTAGATAAAGCTCAAG---AAGAACATGAAAGATA
+---TCACAGCAATTGGAGAGCAATGGCTAGTGATTTTAATCTGCCACCTG
+TAGTAGCAAAGGAAATAGTAGCCAGCTGTGATAAAT---GTCA---GCTA
+AAAGGGGAAGCCATGCATGGACAAGTAGATTGTAGTCCAGGGATATGGCA
+ATTAGATTGCACACATCTAGAAGGAAAAGTAATTCTGGTAGCAGTTCATG
+TAGCCAGTGGCTATATAGAAGCAGAAGTTATCCCAGCAGAAACAGGACAA
+GAGACAGCATACTTTCTACTAAAATTGGCAGGAAGATGGCCAGTAAAAGT
+AGTCCACACAGACAATGGCCCCAACTTTATCAGCGCTGCAGTTAAAGCAG
+CCTGTTGGTGGGCAGGTATCCAACAGGAATTTGGAATTCCCTACAATCCC
+CAAAGTCAAGGAGTAGTGGAATCTATGAATAAGGAATTAAAGAAAATCAT
+AGGACAGGTAAGAGAGCAAGCTGAACACCTTAAAACAGCAGTACAAATGG
+CAGTATTCATTCACAATTTTAAAAGAAAA---GGGGGGATTGGGGGGTAC
+AGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACAAACTAA
+AGAATTACAAAAACAAATTATAAAAATTCAAAATTTTCGGGTTTATTACA
+GGGACAGCAGAGATCCAATTTGGAAAGGACCAGCAAAACTACTCTGGAAA
+GGTGAAGGGGC---AGTAGTAATACAGGACAATAGTGATATAAAGGTAGT
+ACCAAGAAGAAAAGCAAAAATCATCAGGGACTATGGAAAACAGATGGCAG
+GTGATGATTGTGTGGCAGGTAGACAGGATGAGGATTAGA---ACATGGCA
+CAGTTTAGTAAAACATCATATGTATGTCTCAAAGAAGGCTAACAAATGGT
+GGTATAGACATCACTATGAAAGCAGACATCCAAGAGTAAGTTCAGAAGTA
+CACATCCCACTAGAGG---ATG---CTAGATTAGTAGTAAGAACATATTG
+GGGTCTGCATACAGGAGAAAAAGACTGGCAATTGGGGCATGGAGTCTCCA
+TAGAATGGAGGCTAAGAAAATATAGCACACAA------ATAGATCCTGAA
+CAGGCAGACCGACTAATTCATCTACATTATTTTGACTGTTTTTCAGACTC
+---TGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGACCTAGATGTG
+AATATCAAACAGGACATA---ACAAGGTAGGATCTCTACAATATTTAGCA
+CTGAAAGCATTAGTAGGACC---AGTAAAGACAAAGCCACCTTTGCCTAG
+TGTCAGGAAATTAACAGAGGATAGATGGAACAAACCCCAGAAGACCAGGG
+GCCACAGAGAGAGCCATACAATGAATGGATGTTAGATCTGTTAGAAGATC
+TTAAGCATGAAGCCGTCAGACATTTTCCTAGGCCATGGCTTCATGGATTA
+GGACAACATATCTATAACACCTATGGGGATACCTGGGAAGGAGTTGAAG-
+--CTATAACAAGAACTCTGCAA---CAACTACTGTTTGTTCAT---TTCA
+GAATTGGGTGCC---AACATAGCAG---AATAGGCATT---ATTCCRGRG
+AGAAGAGGCA----------GGAATGGATCCGGTAGATCCTAACCTAGAG
+CCCTGGAACCACCCGGGAAGTCAGCCTGCAACACCTTGTA---GCAAGTG
+TTACTGTAAAAAATGTTGCTATCATTGTCCA---------------GCTT
+GCTTTTTGAA---CAAAGGCTTAGGCATCTCCTATGGCAGGAAGAAGCGG
+AGAC------A---GCGACGAGGA------A---CTTCTCAGAGCAATAA
+GGATCATCAAAATCCTGTACCAAAGCAGTAAGTAT--TAGTAATT-----
+-AGTATAAGTA---ATGTTG------------------------------
+---------TCTCTTTTGC---------------ACATCTTTGCAATAGT
+A---AGTCTGATAGTATC---GC--TAATCA-TAGCAATAATTGTGTGGA
+CTATAGTAGGTATAGAATAT---AAGA------------GATTGTTAAAA
+CAA---AAGAGAATAGACAGATTAATTGAGAGAATAAGAGAAAGAGCAGA
+AGACAGTGGCAATGAGAGTGATGGGGATACAGAGGAATTGTCAACACTT-
+--GTTAACATG------------------------GGGGACTACG---AT
+CTTGGGG---ATGATATTAAT------------CTGTTAT---AGTGTTA
+C---------AGA---A------------G---A------CTTG------
+TGGGTTACTGTCTACTATGGGGTACCTGTGTGGAAAGATGCAG---AG--
+ACCACCC-TATTTTGTGCATCAGATGCTAAAGCATATGAGACAGAAAAGC
+ATAATGTATGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATACATTTGGAAAATGTAACAGAAGAGTTTAACATGTGGAAAAATAA
+CATGGTAGAGCAGATGCATACAGATATAATCAGTTTATGGGACCAAAGCT
+TAAAGCCATGTGTAAAGTTAACCCCTCTCTGTGTTACTCTAAATTGTACC
+A---ATG---CC--------------------------------------
+--------------------------------A-ATG---CC--------
+-------A-AGAATGGC---AC--C-------------------------
+--------------------------AAC---GA--T---TT--G-----
+AAG---GACCA------G-GAA-GA-AATAA-AAAACTGCTCTTTCAACA
+TAACCACAGAGCTAAGGGATAAGAGGAGGAAGGTATATTCAC---TTTTT
+TATAAACTTGATATAGTACAGATTAATGAAAATCAAGGT-----------
+-------AATA---GA--AG--TAACAAT---AGT---------------
+---------------------G---AG--TATAGA-T-TAATAAATTGTA
+-ATACCTCAGCAATTACACAGGCTTGCCCAAAGGTAACTTTTGAGCCAAT
+TCCCATACATTATTGTGCCCCAGCTGGTTTTGCGATCCTAAAGTGTAAGG
+ATGAGGAGTTCAATGGAACAGGGCCATGCAAGAATGTCAGCTCAGTCCAA
+TGCACACATGGAATCAGGCCAGTAGTATCAACTCAACTGTTGTTAAATGG
+TAGTCTAGCAAAA---GGAAA---GGTAAAAATTAGATCTGAAAATATCG
+CAGACAATGTCAAAACTATAATAGTACAACTTACTGAGCCTGTAAACATT
+ACTTGTATCAGGCC------T------AATAACA---AT---ACAAGAAC
+AAGTGTACGTATA---------GGACCAGGACAAACATTCTATGCA---A
+CAGGTGAC------ATAATAGGGAATATAAGACAAGCACAGTGTACTGTC
+AATAGAACAAAATGGAATAGCGCTTTACACCAGGTAGTT---AC--ACAA
+TTAGGACAC------TTTGTC---A----CC-----------AAAACT--
+----ATAAAATTTAAT----GAATC---CTCAGGAGGGGATTTAGAAATC
+ATAACAC------ATAGTTTTAATTGTGGAG---GAGAATTTTTCTATTG
+TAATACATCAGGCCTGTTTAAT---AGCACT---TG---G----------
+------AATAG--CACTGGGAATGACACTTGGAGT---------GGCAAT
+G---CCAGC-----------------------AC-GCA---G---G----
+--AG--TC--AAA---T---------------GACACTATAACT------
+---CTCCCATG-CA-GAATAAAGCAAATTGTAAATATGTGGCA---GAGA
+ATAGGACAAGCAATGTATGCCCCTCCCATCCAAGGAGTAATAAGGTGTGA
+ATCAAACATTACAGGACTACTATTAACAAGAGATGG------TGG---GA
+AC-----------------------------AATG---AA--AGTGAAAC
+CTTCAGACCTGGAG-GAGGAA-ATATGAGGGACAATTGGAGAAGTGAATT
+ATATAGGTACAAAGTAGTGAAAATTGAACCACTAGGAGTAGCACCCACCA
+GGGCAAAAAGAAGAGTGGTGGG------GAGAGAAAAAAGAGCAG---TT
+GGA-------ATAGGAGC---TG--T---GTTCCTTGGGT---TCTTAGG
+----AG-CAGCAGGAAG-CACTATGGGCGCGGCGTCAGTAACGCTGACGG
+TACAGGCCAGACAATTATTGTCTGGCATAGTGCAACAGCAAAGCAACTTG
+CTGAGGGCTATAGAGGCTCAACAACATCTGTTGAAACTCACGGTCTGGGG
+CATTAAACAGCTCCAGGCAAGAGTCCTTGCTGTGGAAAGATACCTAAGGG
+ATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACC
+ACTACTGTGCCCTGGAACTCTAGTTGGAG---T---AAT-----------
+-AAATCTCTGGG---GGAGATAT---GGG---AAAACATGACCTGGCTGC
+AATGGGAAAAAGAAATTAGCAATTATACAGACATAATATATAGCCTAATT
+GAAGAATCGCAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATT
+GGATAAGTGGGCAAGTCTGTGGAATTGGTTTGACATATCAAAGTGGCTGT
+GGTATATAAGAATATTTATAATAATAGTAGGAGGCTTAATAGGATTAAGA
+ATAGTTTTTGCTGTGCTTTCTATAATAAATAGAGTTAGGCAGGGGTACTC
+ACCTTTGTC---GTTTCAGACC---CATACCC---CAAACCCAGGGGAT-
+-----CCCGACAGGCCCGGAAGAATCGAAGAAGAAGATGGAGAGCAAGGC
+AAAAACAGATCGATTCGATTAGTGAGCGGGTTCTTAGCGCTTGCCTGGGA
+CGATCTTCGGAGCCTGTGCCTCTTCAGCTACCACCGCTTGAGAGACTTAA
+TCTTGATTGCCGCGAGGACTGTGGAACTTCTGGGA---CACAGCAGTCTC
+AAGGGGTTGAGACTGGGGTGGGAAGGCCTCAAGTATCT---GTGGAATCT
+C---TTACGATATTGGGTTCGGGAACTCAAAATTAGTGCTATTAGTTTGG
+TTGATA---CCATAGCAATAGTAGTAGCTGGCTGGACAGACAGGGTACTA
+GAAA------------TTGGTAGAGCTATCCTTCACATACCTAGAAGAAT
+TAGACAGGGTCTTGAAAGAGCTTTGCTATAA
+;
+end;
+
+begin assumptions;
+  options gapmode=missing;
+end;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/X98338_Adh-mRNA.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/X98338_Adh-mRNA.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/X98338_Adh-mRNA.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,101 @@
+LOCUS       DMALCDEHP               2071 bp    mRNA    linear   INV 06-JAN-1997
+DEFINITION  D.melanogaster mRNA for alcohol dehydrogenase related protein.
+ACCESSION   X98338
+VERSION     X98338.1  GI:1752657
+KEYWORDS    ADH protein; ADHR protein.
+SOURCE      Drosophila melanogaster (fruit fly)
+  ORGANISM  Drosophila melanogaster
+            Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota;
+            Neoptera; Endopterygota; Diptera; Brachycera; Muscomorpha;
+            Ephydroidea; Drosophilidae; Drosophila.
+REFERENCE   1
+  AUTHORS   Brogna,S. and Ashburner,M.
+  TITLE     The Adh-related gene of Drosophila melangaster is expressed via a
+            functional dicistronic messenger: multigenic transcription in
+            higher organisms
+  JOURNAL   Unpublished
+REFERENCE   2  (bases 1 to 2071)
+  AUTHORS   Brogna,S.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (05-JUN-1996) S. Brogna, University of Cambridge,
+            Genetics, Downing Street, Cambridge, CB2 3EH, UK
+FEATURES             Location/Qualifiers
+     source          1..2071
+                     /organism="Drosophila melanogaster"
+                     /mol_type="mRNA"
+                     /strain="Canton-S"
+                     /db_xref="taxon:7227"
+                     /dev_stage="adult"
+     gene            125..895
+                     /gene="ADH"
+     CDS             125..895
+                     /gene="ADH"
+                     /codon_start=1
+                     /product="alcohol dehydrogenase protein"
+                     /protein_id="CAA66981.1"
+                     /db_xref="GI:1752658"
+                     /db_xref="FLYBASE:FBgn0000055"
+                     /db_xref="SWISS-PROT:P00334"
+                     /translation="MSFTLTNKNVIFVAGLGGIGLDTSKELLKRDLKNLVILDRIENP
+                     AAIAELKAINPKVTVTFYPYDVTVPIAETTKLLKTIFAQLKTVDVLINGAGILDDHQI
+                     ERTIAVNYTGLVNTTTAILDFWDKRKGGPGGIICNIGSVTGFNAIYQVPVYSGTKAAV
+                     VNFTSSLAKLAPITGVTAYTVNPGITRTTLVHKFNSWLDVEPQVAEKLLAHPTQPSLA
+                     CAENFVKAIELNQNGAIWKLDLGTLEAIQWTKHWDSGI"
+     polyA_signal    1017..1022
+     polyA_site      1066
+     gene            1194..2012
+                     /gene="ADHR"
+     CDS             1194..2012
+                     /gene="ADHR"
+                     /codon_start=1
+                     /product="alcohol dehydrogenase related protein"
+                     /protein_id="CAA66982.1"
+                     /db_xref="GI:1752659"
+                     /db_xref="FLYBASE:FBgn0000056"
+                     /db_xref="SWISS-PROT:P91615"
+                     /translation="MFDLTGKHVCYVADCGGIALETSKVLMTKNIAKLAILQSTENPQ
+                     AIAQLQSIKPSTQIFFWTYDVTMAREDMKKYFDEVMVQMDYIDVLINGATLCDENNID
+                     ATINTNLTGMMNTVATVLPYMDRKMGGTGGLIVNVTSVIGLDPSPVFCAYSASKFGVI
+                     GFTRSLADPLYYSQNGVAVMAVCCGPTRVFVDRELKAFLEYGQSFADRLRRAPCQSTS
+                     VCGQNIVNAIERSENGQIWIADKGGLELVKLHWYWHMADQFVHYMQSNDEEDQD"
+     polyA_signal    2023..2028
+     polyA_site      2071
+BASE COUNT      581 a    504 c    511 g    475 t
+ORIGIN      
+        1 attattgtct cagtgcagtt gtcagttgca gttcagcaga cgggctaacg agtacttgca
+       61 tctcttcaaa tttacttaat tgatcaatat cgaaagagcc tgctaaagca aaaaagaagt
+      121 caccatgtcg tttactttga ccaacaagaa cgtgattttc gttgccggtc tgggaggcat
+      181 tggtctggac accagcaagg agctgctcaa gcgcgatctg aagaacctgg tgatcctcga
+      241 ccgcattgag aacccggctg ccattgccga gctgaaggca atcaatccaa aggtgaccgt
+      301 caccttctac ccctatgatg tgaccgtgcc cattgccgag accaccaagc tgctgaagac
+      361 catcttcgcc cagctgaaga ccgtcgatgt cctgatcaac ggagctggta tcctggacga
+      421 tcaccagatc gagcgcacca ttgccgtcaa ctacactggc ctggtcaaca ccacgacggc
+      481 cattctggac ttctgggaca agcgcaaggg cggtcccggt ggtatcatct gcaacattgg
+      541 atccgtcact ggattcaatg ccatctacca ggtgcccgtc tactccggca ccaaggccgc
+      601 cgtggtcaac ttcaccagct ccctggcgaa actggccccc attaccggcg tgaccgctta
+      661 caccgtgaac cccggcatca cccgcaccac cctggtgcac aagttcaact cctggttgga
+      721 tgttgagccc caggttgctg agaagctcct ggctcatccc acccagccat cgttggcctg
+      781 cgccgagaac ttcgtcaagg ctatcgaact gaaccagaac ggagccatct ggaaactgga
+      841 cttgggcacc ctggaggcca tccagtggac caagcactgg gactccggca tctaagaagt
+      901 gataatccca aaaaaaaaac ataacattag ttcatagggt tctgcgaacc acaagatatt
+      961 cacgcaaggc aataaggctg attcgatgca cactcacatt cttctcctaa tacgataata
+     1021 aaactttcca tgaaaaatat ggaaaaatat atgaaaattg agaaatccaa aaaactgata
+     1081 aacgctctac ttaattaaaa tagataaatg ggagcggcag gaatggcgga gcatggccaa
+     1141 gttcctccgc caatcagtcg taaaacagaa gtcgtggaaa gcggatagaa agaatgttcg
+     1201 atttgacggg caagcatgtc tgctatgtgg cggattgcgg aggaattgca ctggagacca
+     1261 gcaaggttct catgaccaag aatatagcga aactggccat tttacagagt acggaaaatc
+     1321 cccaggccat cgctcagttg cagtcgataa agccgagtac ccaaatattt ttctggacct
+     1381 acgacgtgac catggcaagg gaagatatga agaagtactt cgatgaggtg atggtccaaa
+     1441 tggactacat cgatgtcctg atcaatggtg ctacgctgtg cgatgaaaat aacattgatg
+     1501 ccaccatcaa tacaaatcta acgggaatga tgaacactgt ggccacagtg ttaccctata
+     1561 tggacagaaa aatgggagga actggtgggc taattgtgaa cgtcacttcg gtcattggat
+     1621 tggacccttc gccggttttc tgcgcatata gtgcatccaa attcggtgta attggattca
+     1681 ccagaagtct agcggacccc ctttactatt cccaaaacgg ggtagctgtg atggcggttt
+     1741 gttgtggtcc tacaagggtc tttgtggacc gggaactgaa agcgtttttg gaatacggac
+     1801 aatcctttgc cgatcgcctg cggcgagcgc cctgccaatc gacatcggtt tgtggtcaga
+     1861 atattgtcaa tgccatcgag agatcggaga atggtcagat atggattgcg gataagggtg
+     1921 gactggagtt ggtcaaattg cattggtact ggcacatggc cgaccagttc gtgcactata
+     1981 tgcagagcaa tgatgaagag gatcaagatt gaattcgaat caaataaaat aatgctttac
+     2041 gcaaaaagta ggcaattcat tttcctatga t
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/a_thaliana.blastn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/a_thaliana.blastn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/a_thaliana.blastn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,505 @@
+BLASTN 2.2.1 [Apr-13-2001]
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+RID: 1012577175-3730-28291
+Query= 
+         (60 letters)
+
+Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,
+or phase 0, 1 or 2 HTGS sequences) 
+           1,083,200 sequences; 4,677,375,331 total letters
+
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+gb|AY052359.1| Arabidopsis thaliana At2g17400 mRNA, complete cds       96   3e-18
+gb|AC002329.2|AC002329 Arabidopsis thaliana chromosome II sectio...    96   3e-18
+gb|AF132318.1|AF132318 Buchnera aphidicola phosphoribosyl anthra...    42   0.040
+gb|AC024791.1| Caenorhabditis elegans cosmid Y47G6A, complete se...    36   2.5  
+gb|AC017078.8| Homo sapiens BAC clone RP11-457N9 from 2, complet...    36   2.5  
+gb|AC005046.3|AC005046 Homo sapiens BAC clone CTB-13F3 from 7q22...    36   2.5  
+gb|AC006017.2|AC006017 Homo sapiens PAC clone RP5-981O7 from 7q3...    36   2.5  
+dbj|AP001519.1|AP001519 Bacillus halodurans genomic DNA, section...    36   2.5  
+gb|AC095064.3| Homo sapiens chromosome 4 clone RP11-620C21, comp...    34   9.7  
+gb|AC003029.3| Homo sapiens Chromosome 12q24 PAC RP3-462E2 (Rosw...    34   9.7  
+gb|AC079248.5| Homo sapiens BAC clone RP11-24J11 from 2, complet...    34   9.7  
+gb|AC093865.2| Homo sapiens chromosome 2 clone RP11-560C24, comp...    34   9.7  
+gb|AC010202.6|AC010202 Homo sapiens 12q BAC RP11-210L7 (Roswell ...    34   9.7  
+gb|AC017118.3|AC017118 Genomic sequence for Arabidopsis thaliana...    34   9.7  
+emb|AL355520.8|AL355520 Human DNA sequence from clone RP4-595C2 ...    34   9.7  
+emb|AL137879.15|AL137879 Human DNA sequence from clone RP11-153O...    34   9.7  
+gb|AC006395.1|AC006395 Homo sapiens PAC clone RP3-394H4 from Xq2...    34   9.7  
+gb|AE000650.1|AE000650 Helicobacter pylori 26695 section 128 of ...    34   9.7  
+gb|AC006924.3|AC006924 Homo sapiens, clone hRPK.32_A_1, complete...    34   9.7  
+gb|AC006266.1|AC006266 Arabidopsis thaliana BAC F1K3 from chromo...    34   9.7  
+gb|U32797.1|U32797 Haemophilus influenzae Rd section 112 of 163 ...    34   9.7  
+emb|AJ286341.1|HIM286341 Human immunodeficiency virus type 1 pro...    34   9.7  
+emb|AL161561.2|ATCHRIV61 Arabidopsis thaliana DNA chromosome 4, ...    34   9.7  
+emb|AL161508.2|ATCHRIV20 Arabidopsis thaliana DNA chromosome 4, ...    34   9.7  
+emb|AL035356.1|ATF22K18 Arabidopsis thaliana DNA chromosome 4, B...    34   9.7  
+emb|AJ132676.1|MMU132676 Mus musculus IgVk gh33r pseudogene            34   9.7  
+emb|AJ132673.1|MMU132673 Mus musculus IgVk gd33r pseudogene            34   9.7  
+emb|AJ132672.1|MMU132672 Mus musculus IgVk gc33r pseudogene            34   9.7  
+emb|Z74955.1|SCYOR047C S.cerevisiae chromosome XV reading frame ...    34   9.7  
+emb|Z74954.1|SCYOR046C S.cerevisiae chromosome XV reading frame ...    34   9.7  
+gb|U28135.1|SCU28135 Saccharomyces cerevisiae DEAD-Box Protein 5...    34   9.7  
+
+ALIGNMENTS
+>gb|AY052359.1| Arabidopsis thaliana At2g17400 mRNA, complete cds
+          Length = 2826
+
+ Score = 95.6 bits (48), Expect = 3e-18
+ Identities = 58/60 (96%), Gaps = 1/60 (1%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 1   aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat 60
+           |||||||||||||||||||||||  |||||||||||||||||||||||||||||||||||
+Sbjct: 154 aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat 212
+
+
+>gb|AC002329.2|AC002329 Arabidopsis thaliana chromosome II section 100 of 255 of the complete
+             sequence. Sequence from clones T23A1, F5J6, MJB20
+          Length = 76170
+
+ Score = 95.6 bits (48), Expect = 3e-18
+ Identities = 58/60 (96%), Gaps = 1/60 (1%)
+ Strand = Plus / Plus
+
+                                                                         
+Query: 1     aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat 60
+             |||||||||||||||||||||||  |||||||||||||||||||||||||||||||||||
+Sbjct: 60659 aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat 60717
+
+
+>gb|AF132318.1|AF132318 Buchnera aphidicola phosphoribosyl anthranilate transferase (trpD),
+           phosphoribosyl anthranilate isomerase/indoleglycerol
+           phosphate synthetase fusion (trpC/F), beta subunit of
+           tryptophan synthetase (trpB), and alpha subunit of
+           tryptophan synthetase (trp>
+          Length = 5383
+
+ Score = 42.1 bits (21), Expect = 0.040
+ Identities = 21/21 (100%)
+ Strand = Plus / Plus
+
+                                
+Query: 35  aatttgacggaaatagaatca 55
+           |||||||||||||||||||||
+Sbjct: 536 aatttgacggaaatagaatca 556
+
+
+>gb|AC024791.1| Caenorhabditis elegans cosmid Y47G6A, complete sequence
+          Length = 194322
+
+ Score = 36.2 bits (18), Expect = 2.5
+ Identities = 18/18 (100%)
+ Strand = Plus / Minus
+
+                                
+Query: 34     gaatttgacggaaataga 51
+              ||||||||||||||||||
+Sbjct: 193876 gaatttgacggaaataga 193859
+
+
+>gb|AC017078.8| Homo sapiens BAC clone RP11-457N9 from 2, complete sequence
+          Length = 193979
+
+ Score = 36.2 bits (18), Expect = 2.5
+ Identities = 24/26 (92%)
+ Strand = Plus / Plus
+
+                                        
+Query: 3      gaatgctgtttaattggaatcgtaca 28
+              ||||| ||||||| ||||||||||||
+Sbjct: 142900 gaatgttgtttaaatggaatcgtaca 142925
+
+
+>gb|AC005046.3|AC005046 Homo sapiens BAC clone CTB-13F3 from 7q22, complete sequence
+          Length = 219436
+
+ Score = 36.2 bits (18), Expect = 2.5
+ Identities = 18/18 (100%)
+ Strand = Plus / Plus
+
+                               
+Query: 13    taattggaatcgtacaat 30
+             ||||||||||||||||||
+Sbjct: 46649 taattggaatcgtacaat 46666
+
+
+>gb|AC006017.2|AC006017 Homo sapiens PAC clone RP5-981O7 from 7q34-q36, complete sequence
+          Length = 162556
+
+ Score = 36.2 bits (18), Expect = 2.5
+ Identities = 18/18 (100%)
+ Strand = Plus / Minus
+
+                                
+Query: 13     taattggaatcgtacaat 30
+              ||||||||||||||||||
+Sbjct: 114940 taattggaatcgtacaat 114923
+
+
+>dbj|AP001519.1|AP001519 Bacillus halodurans genomic DNA, section 13/14
+          Length = 303650
+
+ Score = 36.2 bits (18), Expect = 2.5
+ Identities = 18/18 (100%)
+ Strand = Plus / Minus
+
+                               
+Query: 30    tggagaatttgacggaaa 47
+             ||||||||||||||||||
+Sbjct: 28875 tggagaatttgacggaaa 28858
+
+
+>gb|AC095064.3| Homo sapiens chromosome 4 clone RP11-620C21, complete sequence
+          Length = 87943
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 12    ttaattggaatcgtaca 28
+             |||||||||||||||||
+Sbjct: 42346 ttaattggaatcgtaca 42362
+
+
+>gb|AC003029.3| Homo sapiens Chromosome 12q24 PAC RP3-462E2 (Roswell Park Cancer
+             Institute Human PAC library) complete sequence
+          Length = 137830
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 24    gtacaatggagaatttg 40
+             |||||||||||||||||
+Sbjct: 71491 gtacaatggagaatttg 71507
+
+
+>gb|AC079248.5| Homo sapiens BAC clone RP11-24J11 from 2, complete sequence
+          Length = 128535
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                               
+Query: 6      tgctgtttaattggaat 22
+              |||||||||||||||||
+Sbjct: 111999 tgctgtttaattggaat 111983
+
+
+>gb|AC093865.2| Homo sapiens chromosome 2 clone RP11-560C24, complete sequence
+          Length = 186218
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                               
+Query: 5      atgctgtttaattggaa 21
+              |||||||||||||||||
+Sbjct: 113514 atgctgtttaattggaa 113530
+
+
+>gb|AC010202.6|AC010202 Homo sapiens 12q BAC RP11-210L7 (Roswell Park Cancer Institute Human
+             BAC Library) complete sequence
+          Length = 170004
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 4     aatgctgtttaattgga 20
+             |||||||||||||||||
+Sbjct: 95294 aatgctgtttaattgga 95310
+
+
+>gb|AC017118.3|AC017118 Genomic sequence for Arabidopsis thaliana BAC F6N18 from chromosome I,
+             complete sequence
+          Length = 92219
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 14    aattggaatcgtacaat 30
+             |||||||||||||||||
+Sbjct: 84400 aattggaatcgtacaat 84416
+
+
+>emb|AL355520.8|AL355520 Human DNA sequence from clone RP4-595C2 on chromosome 1q24.1-25.3
+             Contains ESTs, STSs and GSSs. Contains the 3' part of the
+             gene for two isoforms of the KIAA0351 protein and the gene
+             for angiopoietin Y1, complete sequence [Homo sapiens]
+          Length = 157575
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 5     atgctgtttaattggaa 21
+             |||||||||||||||||
+Sbjct: 80465 atgctgtttaattggaa 80481
+
+
+>emb|AL137879.15|AL137879 Human DNA sequence from clone RP11-153O23 on chromosome 13, complete
+             sequence [Homo sapiens]
+          Length = 85149
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 25    tacaatggagaatttga 41
+             |||||||||||||||||
+Sbjct: 41783 tacaatggagaatttga 41799
+
+
+>gb|AC006395.1|AC006395 Homo sapiens PAC clone RP3-394H4 from Xq23, complete sequence
+          Length = 72291
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                              
+Query: 1     aggaatgctgtttaatt 17
+             |||||||||||||||||
+Sbjct: 40047 aggaatgctgtttaatt 40063
+
+
+>gb|AE000650.1|AE000650 Helicobacter pylori 26695 section 128 of 134 of the complete genome
+          Length = 11043
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                             
+Query: 36   atttgacggaaatagaa 52
+            |||||||||||||||||
+Sbjct: 1772 atttgacggaaatagaa 1756
+
+
+>gb|AC006924.3|AC006924 Homo sapiens, clone hRPK.32_A_1, complete sequence
+          Length = 165633
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                              
+Query: 4     aatgctgtttaattgga 20
+             |||||||||||||||||
+Sbjct: 11360 aatgctgtttaattgga 11344
+
+
+>gb|AC006266.1|AC006266 Arabidopsis thaliana BAC F1K3 from chromosome IV near 21 cM, complete
+             sequence
+          Length = 105680
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 20/21 (95%)
+ Strand = Plus / Minus
+
+                                  
+Query: 20    aatcgtacaatggagaatttg 40
+             ||||||||||||||| |||||
+Sbjct: 52077 aatcgtacaatggagtatttg 52057
+
+
+>gb|U32797.1|U32797 Haemophilus influenzae Rd section 112 of 163 of the complete genome
+          Length = 10274
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                             
+Query: 26   acaatggagaatttgac 42
+            |||||||||||||||||
+Sbjct: 2447 acaatggagaatttgac 2431
+
+
+>emb|AJ286341.1|HIM286341 Human immunodeficiency virus type 1 proviral env gene for gp160,
+            genomic RNA, isolate M2424/4, clone 1
+          Length = 2586
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 20/21 (95%)
+ Strand = Plus / Plus
+
+                                 
+Query: 7    gctgtttaattggaatcgtac 27
+            |||||||||||||||| ||||
+Sbjct: 1176 gctgtttaattggaatagtac 1196
+
+
+>emb|AL161561.2|ATCHRIV61 Arabidopsis thaliana DNA chromosome 4, contig fragment No. 61
+          Length = 198402
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                               
+Query: 44     gaaatagaatcaacgat 60
+              |||||||||||||||||
+Sbjct: 146009 gaaatagaatcaacgat 146025
+
+
+>emb|AL161508.2|ATCHRIV20 Arabidopsis thaliana DNA chromosome 4, contig fragment No. 20
+          Length = 196517
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 20/21 (95%)
+ Strand = Plus / Minus
+
+                                   
+Query: 20     aatcgtacaatggagaatttg 40
+              ||||||||||||||| |||||
+Sbjct: 180658 aatcgtacaatggagtatttg 180638
+
+
+>emb|AL035356.1|ATF22K18 Arabidopsis thaliana DNA chromosome 4, BAC clone  F22K18 (ESSAII project)
+          Length = 125803
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                               
+Query: 44     gaaatagaatcaacgat 60
+              |||||||||||||||||
+Sbjct: 102901 gaaatagaatcaacgat 102885
+
+
+>emb|AJ132676.1|MMU132676 Mus musculus IgVk gh33r pseudogene
+          Length = 737
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                            
+Query: 13  taattggaatcgtacaa 29
+           |||||||||||||||||
+Sbjct: 317 taattggaatcgtacaa 301
+
+
+>emb|AJ132673.1|MMU132673 Mus musculus IgVk gd33r pseudogene
+          Length = 813
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                            
+Query: 13  taattggaatcgtacaa 29
+           |||||||||||||||||
+Sbjct: 318 taattggaatcgtacaa 302
+
+
+>emb|AJ132672.1|MMU132672 Mus musculus IgVk gc33r pseudogene
+          Length = 736
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                            
+Query: 13  taattggaatcgtacaa 29
+           |||||||||||||||||
+Sbjct: 318 taattggaatcgtacaa 302
+
+
+>emb|Z74955.1|SCYOR047C S.cerevisiae chromosome XV reading frame ORF YOR047c
+          Length = 3461
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                            
+Query: 24  gtacaatggagaatttg 40
+           |||||||||||||||||
+Sbjct: 473 gtacaatggagaatttg 489
+
+
+>emb|Z74954.1|SCYOR046C S.cerevisiae chromosome XV reading frame ORF YOR046c
+          Length = 2310
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Plus
+
+                             
+Query: 24   gtacaatggagaatttg 40
+            |||||||||||||||||
+Sbjct: 1605 gtacaatggagaatttg 1621
+
+
+>gb|U28135.1|SCU28135 Saccharomyces cerevisiae DEAD-Box Protein 5 (DBP5) gene, complete cds
+          Length = 3696
+
+ Score = 34.2 bits (17), Expect = 9.7
+ Identities = 17/17 (100%)
+ Strand = Plus / Minus
+
+                             
+Query: 24   gtacaatggagaatttg 40
+            |||||||||||||||||
+Sbjct: 1212 gtacaatggagaatttg 1196
+
+
+  Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,
+  or phase 0, 1 or 2 HTGS sequences)
+    Posted date:  Jan 31, 2002 11:56 PM
+  Number of letters in database: 382,408,035
+  Number of sequences in database:  1,083,200
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 117,267
+Number of Sequences: 1083200
+Number of extensions: 117267
+Number of successful extensions: 7699
+Number of sequences better than 10.0: 31
+length of query: 60
+length of database: 4,677,375,331
+effective HSP length: 19
+effective length of query: 41
+effective length of database: 4,656,794,531
+effective search space: 190928575771
+effective search space used: 190928575771
+T: 0
+A: 30
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 17 (34.2 bits)
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/aaml.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/aaml.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/aaml.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,54 @@
+AAML (in paml 3.12 February 2002)    stewart.aa   Model: Empirical (wag.dat) 
+ns = 6  	ls = 130
+# site patterns = 98
+    4    1    1    1    1    8    2    1    6    1    1    3    1    1    1
+    1    1    4    2    1    1    3    1    4    1    1    1    1    1    1
+    3    1    1    1    1    1    1    1    1    1    1    2    2    2    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1
+
+Langur                KIFERCELAR TLKLGLDGYK VSNWVLAKWG NTETYNPGDE TDYIFQSRYN NGTPGAVDAH ISSALQNNIA DAVARVVSDP QIRVRNHQNK VSQVKGGV
+Baboon                .......... ..R......R I........D ..Q......Q .......H.. D......N.. ..N...D..T .......... .........R ....Q...
+Human                 .V........ ..R..M...R I...M..... ..R...A..R .......... D......N.. L.....D... .......R.. ......R..R .R..Q...
+Rat                   .TY....F.. ..RN.MS..Y ..D....QHN ..QR.D...Q .......... D...R.KN.G .P....DD.T Q.IQ...R.. ....QR.K.R L.GIRN..
+Cow                   .V........ .......... ....L.T..S ..K....SS. .......KW. D...N...G. V..EME.D.. K...KI..E- ..T.KS.RDH ..S.E.TL
+Horse                 .V.SK....H K.AQEM..FG Y....M.EYN ..RFGKNANG S..L..NKWK DN-RSSSN.N .M.K.DE..D .DIS...R.. KMSKVK.KD. L.ELASNL
+
+
+
+
+Frequencies..
+                                    A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V
+Langur                         0.1000 0.0462 0.0846 0.0538 0.0615 0.0385 0.0385 0.0846 0.0154 0.0462 0.0615 0.0692 0.0000 0.0154 0.0231 0.0615 0.0385 0.0385 0.0462 0.0769
+Baboon                         0.0923 0.0615 0.0846 0.0692 0.0615 0.0615 0.0231 0.0769 0.0231 0.0538 0.0615 0.0385 0.0000 0.0154 0.0231 0.0538 0.0462 0.0385 0.0462 0.0692
+Human                          0.1077 0.1077 0.0769 0.0615 0.0615 0.0462 0.0231 0.0846 0.0077 0.0385 0.0615 0.0385 0.0154 0.0154 0.0154 0.0462 0.0385 0.0385 0.0462 0.0692
+Rat                            0.0846 0.0923 0.0692 0.0692 0.0615 0.0692 0.0231 0.0769 0.0154 0.0538 0.0462 0.0462 0.0077 0.0154 0.0308 0.0538 0.0462 0.0308 0.0615 0.0462
+Cow                            0.0775 0.0233 0.0620 0.0543 0.0620 0.0155 0.0620 0.0620 0.0233 0.0388 0.0698 0.0930 0.0078 0.0155 0.0155 0.1008 0.0620 0.0465 0.0388 0.0698
+Horse                          0.0853 0.0310 0.1008 0.0775 0.0620 0.0155 0.0465 0.0543 0.0155 0.0233 0.0775 0.1163 0.0310 0.0388 0.0078 0.1008 0.0078 0.0388 0.0310 0.0388
+
+Average                        0.0913 0.0604 0.0797 0.0643 0.0617 0.0411 0.0360 0.0733 0.0167 0.0424 0.0630 0.0668 0.0103 0.0193 0.0193 0.0694 0.0398 0.0386 0.0450 0.0617
+(Ambiguity characters are used to calculate freqs.)
+
+
+# constant sites:     46 (35.38%)
+AA distances (raw proportions of different sites)
+
+Langur         
+Baboon           0.1077
+Human            0.1385  0.1077
+Rat              0.2923  0.2538  0.2846
+Cow              0.2462  0.3000  0.3154  0.4231
+Horse            0.5000  0.5000  0.4923  0.4923  0.5462
+
+TREE #  1:  (((1, 2), 3), 4, (5, 6));   MP score: -1
+lnL(ntime:  9  np:  9):  -1042.768973     +0.000000
+   7..8     8..9     9..1     9..2     8..3     7..4     7..10   10..5    10..6  
+  0.00950  0.02220  0.08009  0.03337  0.06233  0.27133  0.09393  0.24105  0.58792
+
+tree length =   1.40172
+
+(((1:0.080088, 2:0.033370):0.022202, 3:0.062325):0.009497, 4:0.271333, (5:0.241055, 6:0.587920):0.093926);
+
+(((Langur:0.080088, Baboon:0.033370):0.022202, Human:0.062325):0.009497, Rat:0.271333, (Cow:0.241055, Horse:0.587920):0.093926);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/aaml_pairwise.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/aaml_pairwise.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/aaml_pairwise.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+AAML (in paml 3.13, August 2002)    abglobin.aa   Model: Empirical_F (wag.dat) 
+ns = 5  	ls = 285
+# site patterns = 126
+   16   21   10    1    1   10   17    1    1    2    1    1    3    1   13
+    1    2    1    1    5    2    1    5   15    1    1    5    1   10    1
+    1    7   10    1   14    1    1    1    1    1    3    1    2    1    1
+    1    1    1    1    1    1    1    1    1    1    1    4    1    4    1
+    1    1    1    1    2    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1
+
+human                 VLSPADKTNV AAWGGAHAEY GAEALERMFL SPTPHFLSHA QVKGKADTNV AVDMNALANL CLALPAEFAV LSSTPESAVT ALGNVDEVQF ETPDVMGKLG ASDLANFTCR VLCVAHFEPV AAYVNK
+goat-cow              ...A...S.. .....GN.A. .......... .......... ....E.A.K. GL.LGT.D.. S.C..ND... .N..A.A... .F.K...... ..A...N..D S.NMKD.A.K ..V..R..VL .DF..R
+rabbit                .........I T..E.S.G.. ....V..... G.....FT.E .I.A.SE.K. GL.LG..T.. ..NH.S.... .N.SS..... .....E.... .SAN..N..A ..E.S..K.. ..I.S...Q. ......
+rat                   ...AD....I NC...G.G.. .E...Q...A A..S.IV.P. ...A...AKA D.ELG..T.F ..CH.GD..M ....DAA..N ....P.D..Y DSASI...IN .N..K..H.. MII.G.L.CA ..F.S.
+marsupial             ...D....H. .I...G..A. A....A.T.. ........P. .IQ....SQ. .L.LGTMK.. .I..SKDLE. F.A.S.NCI. TISQ..QTT. GS.G..SA.T SGEVK.YK.K IIIC.E.DEC V.WLH.
+
+
+
+
+Frequencies..
+                                    A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V
+human                          0.1263 0.0211 0.0351 0.0526 0.0105 0.0140 0.0421 0.0702 0.0632 0.0000 0.1263 0.0772 0.0105 0.0526 0.0491 0.0561 0.0561 0.0105 0.0211 0.1053
+goat-cow                       0.1193 0.0246 0.0421 0.0632 0.0070 0.0140 0.0386 0.0737 0.0526 0.0000 0.1298 0.0842 0.0105 0.0596 0.0351 0.0596 0.0526 0.0105 0.0175 0.1053
+rabbit                         0.0982 0.0211 0.0421 0.0386 0.0070 0.0175 0.0596 0.0702 0.0667 0.0140 0.1228 0.0842 0.0070 0.0561 0.0386 0.0737 0.0561 0.0105 0.0211 0.0947
+rat                            0.1193 0.0211 0.0386 0.0667 0.0175 0.0175 0.0316 0.0807 0.0667 0.0246 0.1123 0.0842 0.0140 0.0491 0.0386 0.0596 0.0456 0.0105 0.0211 0.0807
+marsupial                      0.1088 0.0175 0.0211 0.0561 0.0175 0.0281 0.0351 0.0702 0.0632 0.0281 0.1088 0.0842 0.0175 0.0491 0.0351 0.0737 0.0667 0.0140 0.0211 0.0842
+
+Average                        0.1144 0.0211 0.0358 0.0554 0.0119 0.0182 0.0414 0.0730 0.0625 0.0133 0.1200 0.0828 0.0119 0.0533 0.0393 0.0646 0.0554 0.0112 0.0204 0.0940
+
+# constant sites:    170 (59.65%)
+ln Lmax (unconstrained) = -1189.106658
+
+AA distances (raw proportions of different sites)
+
+human          
+goat-cow         0.1439
+rabbit           0.1368  0.1825
+rat              0.2000  0.2351  0.2035
+marsupial        0.2456  0.2561  0.2877  0.3123
+
+ML distances of aa seqs.
+human          
+goat-cow         0.1551
+rabbit           0.1474  0.2020
+rat              0.2267  0.2694  0.2306
+marsupial        0.2870  0.3024  0.3392  0.3861

Added: trunk/packages/bioperl/branches/upstream/current/t/data/acefile.ace.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/acefile.ace.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/acefile.ace.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7661 @@
+AS 53 114
+
+CO Contig1 796 1 1 U
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccggcaacaagcacaccacctgttgaaGCAGAAGAGAGTAAGAATAA
+GAAGCCACGATTGGCATAGAGTCAGCTGAGTGTCGCACCCACCACCGCAT
+TTACTTTTCTTTTTAAAATCTTTGTTTTCATTTGCTTTCTGTCTTTTTGT
+TTCCAACTTCGTATTTTAGACTCCATGTGGTGGTGCATGTTTTACTTATG
+AATCCAAGTTCTTCTCTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAaggGGggxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxgggggacgggaaaaaccct
+ggggttccccaacttaatcgccttgcagaaaatccccttttccccagttg
+gggtaaaaccaaaaaggcccccaccgatcgcccttcccaacagttgccca
+ccctgaatggcaaaggggaccccccctgtaccggcccattaagcgcgggg
+ggtgtgggggttccccccagggggaccgttacatttgccagggccctagc
+gcccgctcctttggtttttttcccttcttttttcgccacgttcgccggtt
+ttccccgtcaagctttaaatggggggccccctatagggttccgatt
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 42 42 39 39 37 42 42 43 37 40 35 39 39 39 35 35 35 35 35
+ 35 37 37 35 39 35 35 35 35 40 40 39 39 39 35 35 35 39 39 40 39 35 35 35 35 35 39 39 40 51 51 40 35 35 35 35 35 35 40 40 40 40 40 40 35 35 38 38 38 39
+ 39 35 35 35 35 39 40 56 56 56 56 46 46 46 46 40 40 51 56 56 56 51 51 51 51 51 51 51 51 51 51 43 35 35 35 35 35 43 51 51 41 51 51 51 51 51 56 56 56 43
+ 43 43 43 43 43 43 43 43 43 43 43 43 45 56 51 51 51 51 43 43 43 43 43 43 45 51 51 51 56 56 56 56 43 43 43 43 43 43 56 56 51 51 43 43 43 43 43 43 51 51
+ 56 56 56 56 56 56 56 51 45 45 45 45 45 51 51 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 46 33 13 13 12 31 29 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2261r U 1
+BS 1 796 LL2261r
+
+RD LL2261r 796 0 6
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccggcaacaagcacaccacctgttgaaGCAGAAGAGAGTAAGAATAA
+GAAGCCACGATTGGCATAGAGTCAGCTGAGTGTCGCACCCACCACCGCAT
+TTACTTTTCTTTTTAAAATCTTTGTTTTCATTTGCTTTCTGTCTTTTTGT
+TTCCAACTTCGTATTTTAGACTCCATGTGGTGGTGCATGTTTTACTTATG
+AATCCAAGTTCTTCTCTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAaggGGggxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxgggggacgggaaaaaccct
+ggggttccccaacttaatcgccttgcagaaaatccccttttccccagttg
+gggtaaaaccaaaaaggcccccaccgatcgcccttcccaacagttgccca
+ccctgaatggcaaaggggaccccccctgtaccggcccattaagcgcgggg
+ggtgtgggggttccccccagggggaccgttacatttgccagggccctagc
+gcccgctcctttggtttttttcccttcttttttcgccacgttcgccggtt
+ttccccgtcaagctttaaatggggggccccctatagggttccgatt
+
+QA 81 571 46 796
+DS CHROMAT_FILE: LL2261r PHD_FILE: LL2261r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:32 2000
+
+RT{
+LL2261r matchElsewhereLowQual phrap 710 758 000919:094547
+}
+
+RT{
+LL2261r matchElsewhereLowQual phrap 664 695 000919:094547
+}
+
+RT{
+LL2261r matchElsewhereLowQual phrap 599 614 000919:094547
+}
+
+RT{
+LL2261r matchElsewhereLowQual phrap 572 589 000919:094547
+}
+
+RT{
+LL2261r matchElsewhereLowQual phrap 529 553 000919:094547
+}
+
+RT{
+LL2261r matchElsewhereLowQual phrap 492 525 000919:094547
+}
+
+CO Contig2 787 1 1 U
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacgcg
+tccgcccacgcgtccgcaacaaccaataaacggtttATTTCGCTCAGCAC
+TCAACCGCAATGGCCGCCTCAACAATGGCTCTCTCCTCCCCTGCCTTCGC
+CGGAAAGGCCGTCAAGCTTTCCCCAGCAGCATCAGAAGTCCTTGGAAGCG
+GCCGTGTGACAATGAGGAAGACCGTAGCCAAGCCAAAGGGCCCATCAGGC
+AGCCCATGGTACGGATCCGAGAGAGTCAAGTACTTGGGCCCATTCTCCGG
+CGAGCCACCGAGCTACCTTACCGGAGAGTTCCCCGGAGACTACGGATGGG
+ACACCGCAGGTCTCTCAGCCGATCCCGAGACGTTCGCGAGGAACCGTGAG
+CTAGAAGTTATCCACTGCAGGTGGGCCATGCTCGGAGCCCTAGGCTGCGT
+CTTCCCGGAGCTGTTGGCCAGGAACGGAGTCAAGTTCGGAGAGGCGGTTT
+GGTTCAAGGCCGGTTCGCAGATCTTCAGCGAAGGAGGACTTGATTACTTG
+GGAAACCCTAGCTTGGTTCACGCTCAGAGCATCTTGgCTATTTGGGCTAC
+TCAAGTGATCTTGATGGGAGCCGTTGAAGGTTACAGAGTCGCAGGAGATG
+GGCCGTTGGGAGAGGCCGAGGACTTGCttTACCCAGGTggCAGCTTTGAC
+ccgTTGGGTTTGgctACTGACccagaggccTTCGCGGAGTTGAaggTGAA
+GgAGATCaagaaacggaGattggctatgttCtctAtg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 25 29 32 34 34 34 34 34 40 46 39 39 39
+ 39 39 40 45 35 35 35 35 35 40 46 40 40 34 34 34 34 34 35 40 40 51 40 40 40 40 39 39 39 39 39 39 40 40 40 40 40 40 40 40 39 39 39 39 39 35 35 35 35 35
+ 35 35 51 51 56 46 40 39 39 39 35 40 40 51 39 39 39 39 39 39 51 51 51 51 51 51 40 40 40 40 40 40 45 45 45 51 40 40 40 40 40 40 56 51 56 51 39 39 35 35
+ 35 35 35 35 45 45 51 51 51 40 40 40 45 45 51 56 56 56 39 38 35 35 35 35 43 43 45 45 45 45 51 51 51 51 51 51 56 46 45 41 41 41 45 46 56 56 56 56 56 51
+ 45 45 45 45 45 56 56 56 56 45 45 45 45 45 45 45 45 43 43 43 43 43 43 43 45 51 56 51 51 43 43 43 43 43 43 43 51 51 51 51 51 51 51 45 45 45 45 45 43 43
+ 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 45 51 51 45 45 51 51 43 43 51 43 43 36 36 36 36 36 36 43 43 43 51 56 56 43 43
+ 43 43 43 43 43 43 43 43 45 51 51 51 51 51 51 56 51 51 51 51 51 51 51 45 45 45 45 43 43 43 43 43 43 43 43 43 43 43 43 43 56 56 45 45 45 45 45 45 51 45
+ 45 45 45 45 45 51 51 51 51 51 51 51 45 45 45 45 40 40 43 46 46 51 51 51 45 45 45 45 40 40 45 45 45 40 45 45 45 45 45 51 51 56 45 40 40 40 40 40 40 45
+ 45 51 51 51 51 45 45 45 45 45 45 45 51 51 51 45 45 45 45 45 45 51 45 45 45 45 45 51 45 45 45 45 45 45 45 45 45 45 45 51 51 46 46 43 42 42 46 48 56 56
+ 56 56 51 51 51 51 51 45 56 56 42 46 46 51 37 37 37 37 37 40 46 51 51 56 56 56 45 45 45 37 37 37 40 38 40 40 40 40 40 40 45 42 37 37 37 40 40 40 51 56
+ 51 56 46 46 46 40 40 40 44 40 40 40 40 34 32 25 29 32 32 32 34 34 34 33 37 35 48 40 40 40 48 48 48 48 34 26 19 25 27 27 29 39 48 46 39 31 35 35 35 35
+ 35 37 40 40 40 51 42 51 56 56 56 56 56 56 56 56 56 48 46 44 32 32 26 26 25 29 29 29 29 21 21 21 29 32 40 48 40 40 29 29 27 27 25 25 34 37 40 40 40 32
+ 34 32 32 32 25 25 25 29 29 48 40 40 27 25 22 25 27 29 25 22 22 25 25 22 24 29 29 19 19 23 27 27 27 32 25 22 22 25 19 18 24 25 27 29 25 32 20 25 21 21
+ 12 15 19 21 21 29 29 37 34 34 22 22 15 19 18 20 20 29 27 27 25 18 19 14 13 8 8 8 11 15 24 27 27 25 25 20 22 29 29 29 25 28 25 19 19 15 24 24 20 20
+ 22 19 25 26 26 29 24 19 9 10 10 9 9 9 10 18 19 22 19 15 19 18 12 17 14 13 13 16 11 16 23 17 18 14 21 18 0
+
+AF LL2330r U 1
+BS 1 787 LL2330r
+
+RD LL2330r 787 0 0
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacgcg
+tccgcccacgcgtccgcaacaaccaataaacggtttATTTCGCTCAGCAC
+TCAACCGCAATGGCCGCCTCAACAATGGCTCTCTCCTCCCCTGCCTTCGC
+CGGAAAGGCCGTCAAGCTTTCCCCAGCAGCATCAGAAGTCCTTGGAAGCG
+GCCGTGTGACAATGAGGAAGACCGTAGCCAAGCCAAAGGGCCCATCAGGC
+AGCCCATGGTACGGATCCGAGAGAGTCAAGTACTTGGGCCCATTCTCCGG
+CGAGCCACCGAGCTACCTTACCGGAGAGTTCCCCGGAGACTACGGATGGG
+ACACCGCAGGTCTCTCAGCCGATCCCGAGACGTTCGCGAGGAACCGTGAG
+CTAGAAGTTATCCACTGCAGGTGGGCCATGCTCGGAGCCCTAGGCTGCGT
+CTTCCCGGAGCTGTTGGCCAGGAACGGAGTCAAGTTCGGAGAGGCGGTTT
+GGTTCAAGGCCGGTTCGCAGATCTTCAGCGAAGGAGGACTTGATTACTTG
+GGAAACCCTAGCTTGGTTCACGCTCAGAGCATCTTGgCTATTTGGGCTAC
+TCAAGTGATCTTGATGGGAGCCGTTGAAGGTTACAGAGTCGCAGGAGATG
+GGCCGTTGGGAGAGGCCGAGGACTTGCttTACCCAGGTggCAGCTTTGAC
+ccgTTGGGTTTGgctACTGACccagaggccTTCGCGGAGTTGAaggTGAA
+GgAGATCaagaaacggaGattggctatgttCtctAtg
+
+QA 81 786 44 787
+DS CHROMAT_FILE: LL2330r PHD_FILE: LL2330r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:21 2000
+
+CO Contig3 770 1 1 U
+atttcgagctcggtacccggggatcctctagagtcgacctgcaggcxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxGCCTGGGGTGCCTAATGAGTGAG
+CTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAA
+ACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGC
+GGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCG
+CTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAA
+TACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCA
+AAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTT
+TTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAA
+GTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCC
+CCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGG
+ATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGgCGCtTTCTCATAGCT
+CACGCTGTAggTATCTCAGttCGGTGTAggTCGTtCGCTCCAAGCTGGGC
+TGTgTGCACGAACCCCCCGt
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 41 45 45 45 51 51 56 36 36 36 36 36 36 41 41 46 56 56 42 46 42 38 36
+ 43 36 36 36 43 43 36 36 40 40 43 41 41 41 43 43 43 43 43 43 43 43 43 43 43 43 36 36 36 36 39 43 36 36 36 36 36 36 43 46 42 42 42 46 46 46 46 42 42 41
+ 41 41 45 45 45 45 45 43 43 39 40 40 36 38 43 43 43 43 40 43 41 41 41 45 43 43 41 45 39 43 36 39 38 36 36 43 43 43 43 43 43 36 40 41 41 41 38 38 43 39
+ 43 43 46 43 43 43 43 43 43 43 56 56 51 43 43 43 43 43 43 43 43 43 36 36 36 36 36 36 43 43 43 43 43 36 36 36 36 36 36 36 43 43 36 39 43 43 43 43 43 43
+ 43 43 43 43 43 45 51 51 51 51 51 51 51 45 45 45 45 43 40 40 38 38 41 45 43 43 43 51 45 45 45 45 45 40 38 38 40 40 38 41 43 46 45 45 45 45 45 45 56 56
+ 40 43 38 40 38 35 42 43 42 46 41 41 41 41 41 42 41 41 41 41 41 45 50 56 56 56 41 40 40 40 40 35 41 41 42 42 42 44 44 56 42 43 43 50 50 44 47 47 50 50
+ 56 56 50 42 45 45 40 35 35 42 50 44 44 42 40 40 40 37 37 37 35 42 40 40 40 40 40 40 50 50 43 43 44 46 40 39 35 35 35 39 39 35 35 35 39 40 45 45 45 45
+ 56 56 51 51 46 46 40 37 37 40 45 45 40 40 51 51 51 51 56 56 56 40 39 35 35 35 39 40 51 56 56 44 47 56 56 56 56 56 42 42 40 40 40 40 40 37 40 42 42 44
+ 44 42 42 45 37 37 40 45 45 40 35 35 35 35 35 37 37 40 40 45 40 40 40 40 40 37 37 35 35 35 35 42 51 45 40 35 35 35 35 35 37 40 40 40 36 34 40 40 36 40
+ 48 40 34 32 32 29 40 30 32 32 32 33 46 48 56 40 46 33 40 34 32 32 32 29 29 29 34 40 40 37 29 22 25 27 27 29 39 39 33 28 28 29 29 32 29 27 27 25 26 32
+ 34 32 32 36 34 40 40 40 33 39 29 29 29 35 46 40 40 40 40 40 32 48 31 29 25 29 31 34 29 29 40 31 29 22 19 25 25 25 19 22 29 40 40 40 40 44 40 40 40 40
+ 33 31 29 29 29 29 27 27 25 18 18 25 29 27 29 29 27 25 21 17 19 25 20 22 27 28 29 24 19 19 25 29 29 25 18 24 27 27 25 22 22 22 20 25 27 34 29 29 29 29
+ 32 24 24 18 25 20 29 25 27 24 24 25 29 40 40 40 29 29 24 16
+
+AF pgemr U 1
+BS 1 770 pgemr
+
+RD pgemr 770 0 1
+atttcgagctcggtacccggggatcctctagagtcgacctgcaggcxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxGCCTGGGGTGCCTAATGAGTGAG
+CTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAA
+ACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGC
+GGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCG
+CTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAA
+TACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCA
+AAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTT
+TTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAA
+GTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCC
+CCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGG
+ATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGgCGCtTTCTCATAGCT
+CACGCTGTAggTATCTCAGttCGGTGTAggTCGTtCGCTCCAAGCTGGGC
+TGTgTGCACGAACCCCCCGt
+
+QA 178 770 178 770
+DS CHROMAT_FILE: pgemr PHD_FILE: pgemr.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:41 2000
+
+RT{
+pgemr matchElsewhereHighQual phrap 178 318 000919:094547
+}
+
+CO Contig4 637 2 79 U
+tTtttCGCATAAA*TGAAAAATA*GTTTAAAACGgc*GAAAAATAAACAA
+TAATCAGGACTTCAACAATATTTATAAGAAAAAAAAAACCAAAAAAAATG
+AAGCAGCGACCAAAACATAATATAATATTTTTTGCTTAAGGACAAAAAAA
+CAGCATCACAAGAATCCTAAAGAGAAATAAAAGACACAGATAGAACTAAA
+GTACACAGAGACTCTTGACACATCCAGAAGGTTCCAAAGAACCACATAAC
+AACATATAGTTCCAATTCCCATATCAGCTAGGGAAAGAGACTTGTAAACA
+GACTCACCACACCACGAAGAAGGGAGCAGAGCCCTTAGAAGCACTTCCTG
+TGAACAATGGATGGACCTGACTCATCGTACTCTCCCTTTGAAATCCACAT
+CTGTTGGAAAGTGCTGAGGGATGCAAGGATAGATCCTCCAATCCAGACAC
+TGTATTTCCTCTCAGGAGGAGCCACCACCTTGATCTTCATGCTGCTAGGC
+GCAAGCGCCGTGATCTCCTTGCTCATACGATCAGCGATTCCTGGGAACAT
+GGTTGAACCACCACTGAGGACGATGTTTCCATACAGATCCTTCCTGATAT
+CGACATCACACTTCCCCACGCGTCCGCGGACGCGTGG
+
+BQ
+ 0 27 18 18 19 24 25 27 27 25 22 22 25 32 32 42 47 44 42 31 29 27 29 27 41 47 43 44 44 44 36 31 13 18 25 25 55 55 55 61 55 55 69 62 67 65 65 74 66 69
+ 69 69 75 72 72 71 69 55 50 38 35 33 36 28 43 47 55 80 86 88 82 82 82 77 77 84 84 78 82 90 90 88 75 65 62 60 64 83 87 88 90 90 80 71 71 77 77 69 69 66
+ 64 66 61 64 70 70 90 90 90 90 90 90 82 81 82 74 74 72 72 74 74 74 74 77 88 83 90 90 90 90 90 81 79 77 75 75 71 88 88 87 86 90 90 90 90 90 81 75 75 75
+ 72 72 82 88 83 83 80 90 87 82 79 77 79 82 84 85 90 90 90 90 90 90 90 90 88 88 90 90 90 90 90 88 88 88 90 90 90 90 90 88 86 86 86 86 80 77 82 82 82 82
+ 90 90 90 90 90 90 90 85 85 82 78 82 86 86 90 82 82 82 87 86 90 90 90 90 90 90 90 90 90 84 84 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 85 85 85 85 77 77 83 90 90 90 90 90 90 90 90 90 90 90 90 90 87 81 80 76 77 86 86 90 90 90 90 90 90 90 90 88 88 71 71 67 76 76 87 88 89 86 86 80 80
+ 77 80 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 84 84 84 84 84 80 85 84 84 86 74 74 74 74 73 85 78 78 71 70 82 85 90 71 71 62 69 69 77
+ 82 82 82 77 78 72 88 81 88 90 90 90 90 90 90 90 90 85 82 82 78 85 85 78 71 70 76 75 77 81 83 88 88 88 90 90 90 90 90 90 90 85 86 85 82 82 78 80 80 80
+ 88 88 90 90 87 73 73 57 57 62 72 74 82 85 90 90 90 83 83 79 79 78 78 90 87 87 87 87 87 82 88 90 90 90 90 90 90 90 90 89 87 80 80 80 74 79 67 69 57 73
+ 68 79 84 85 87 78 78 78 85 88 90 90 90 84 77 76 80 75 84 84 90 90 90 90 90 90 84 84 84 88 88 84 86 90 88 79 80 80 70 70 66 60 60 64 67 68 68 64 64 64
+ 67 64 67 62 59 59 50 50 50 67 68 83 90 90 90 82 75 71 71 71 71 75 74 80 82 82 82 81 76 72 67 64 69 70 72 77 72 72 73 79 82 82 79 79 77 72 68 70 64 62
+ 69 81 75 70 77 83 83 90 90 76 71 71 72 78 79 73 78 74 74 68 67 66 85 61 61 70 75 80 90 90 90 85 85 85 80 90 82 82 84 79 79 79 77 77 73 64 61 65 65 69
+ 71 82 74 76 59 68 63 67 61 67 73 72 66 60 55 59 59 57 54 64 58 58 57 57 56 63 75 71 49 49 49 34 34 35
+
+AF LL2260r C -108
+AF LL2260f U 0
+BS 1 9 LL2260r
+BS 10 10 LL2260f
+BS 11 16 LL2260r
+BS 17 21 LL2260f
+BS 22 26 LL2260r
+BS 27 35 LL2260f
+BS 36 54 LL2260r
+BS 55 55 LL2260f
+BS 56 61 LL2260r
+BS 62 68 LL2260f
+BS 69 69 LL2260r
+BS 70 71 LL2260f
+BS 72 73 LL2260r
+BS 74 76 LL2260f
+BS 77 81 LL2260r
+BS 82 90 LL2260f
+BS 91 93 LL2260r
+BS 94 98 LL2260f
+BS 99 100 LL2260r
+BS 101 109 LL2260f
+BS 110 115 LL2260r
+BS 116 126 LL2260f
+BS 127 129 LL2260r
+BS 130 131 LL2260f
+BS 132 143 LL2260r
+BS 144 144 LL2260f
+BS 145 146 LL2260r
+BS 147 149 LL2260f
+BS 150 160 LL2260r
+BS 161 164 LL2260f
+BS 165 165 LL2260r
+BS 166 168 LL2260f
+BS 169 172 LL2260r
+BS 173 175 LL2260f
+BS 176 218 LL2260r
+BS 219 221 LL2260f
+BS 222 223 LL2260r
+BS 224 224 LL2260f
+BS 225 228 LL2260r
+BS 229 234 LL2260f
+BS 235 242 LL2260r
+BS 243 248 LL2260f
+BS 249 258 LL2260r
+BS 259 262 LL2260f
+BS 263 273 LL2260r
+BS 274 274 LL2260f
+BS 275 282 LL2260r
+BS 283 284 LL2260f
+BS 285 322 LL2260r
+BS 323 323 LL2260f
+BS 324 338 LL2260r
+BS 339 339 LL2260f
+BS 340 352 LL2260r
+BS 353 353 LL2260f
+BS 354 390 LL2260r
+BS 391 394 LL2260f
+BS 395 407 LL2260r
+BS 408 410 LL2260f
+BS 411 455 LL2260r
+BS 456 456 LL2260f
+BS 457 461 LL2260r
+BS 462 463 LL2260f
+BS 464 468 LL2260r
+BS 469 470 LL2260f
+BS 471 471 LL2260r
+BS 472 473 LL2260f
+BS 474 479 LL2260r
+BS 480 482 LL2260f
+BS 483 484 LL2260r
+BS 485 486 LL2260f
+BS 487 526 LL2260r
+BS 527 537 LL2260f
+BS 538 542 LL2260r
+BS 543 548 LL2260f
+BS 549 554 LL2260r
+BS 555 557 LL2260f
+BS 558 591 LL2260r
+BS 592 596 LL2260f
+BS 597 637 LL2260r
+
+RD LL2260r 791 0 0
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxctttttttttttttttttttttttttttttttttnn
+nntttnntntTtttCGCATAAA*TGAAAAATA*GTTTAAAAcggc*GAAA
+AATAAACAATAATCAGGACTTcagcaATATTTATAAGAAAAAAAAAACCA
+AAAAAAATGAAGCAGCGACCAAAACATAATATAATATTTTTTGCTTAAGG
+ACAAAAAAACAGCATCACAAGAATCCTAAAGAGAAATAAAAGACACAGAT
+AGAACTAAAGTACACAGAGACTCTTGACACATCCAGAAGGTTCCAAAGAA
+CCACATAACAACATATAGTTCCAATTCCCATATCAGCTAGGGAAAGAGAC
+TTGTAAACAGACTCACCACACCACGAAGAAGGGAGCAGAGCCCTTAGAAG
+CACTTCCTGTGAACAATGGATGGACCTGACTCATCGTACTCTCCCTTTGA
+AATCCACATCTGTTGGAAAGTGCTGAGGGATGCAAGGATAGATCCTCCAA
+TCCAGACACTGTATTTCCTCTCAGGAGGAGCCACCACCTTGATCTTCATG
+CTGCTAGGCGCAAGCGCCGTGATCTCCTTGCTCATACGATCAGCGATTCC
+TGGGAACATGGTTGAACCACCACTGAGGACGATGTTTCCATACAGATCCT
+TCCTGATATCGACATCACACTTCCCCACGCGTCCGCGGACGCGTGGxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 111 746 110 746
+DS CHROMAT_FILE: LL2260r PHD_FILE: LL2260r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:27 2000
+
+RD LL2260f 792 0 0
+nctttccgcATaaactgAAAAAtaggtTTAAAACGgcggaAAAATAAACA
+ATAATCAGGACTTCAACAATATTTATAAGAAAAAAAAAACCAAAAAAAAT
+GAAGCAGCGACCAAAACATAATATAATATTTTTTGCTTAAGGACAAAAAA
+ACAGCATCACAAGAATCCTAAAGAGAAATAAAAGACACAGATAGAACTAA
+AGTACACAGAGACTCTTGACACATCCAGAAGGTTCCAAAGAACCACATAA
+CAACATATAGTTCCAATTCCCATATCAGCTAGGGAAAGAGACTTGTAAAC
+AGACTCACCACACCACGAAGAAGGGAGCAGAGCCCTTAGAAGCACTTCCT
+GTGAACAATGGATGGACCTGACTCATCGTACTCTCCCTTTGAAATCCACA
+TCTGTTGGAAAGTGCTGAGGGATGCAAGGATAGATCCTCCAATCCAGACA
+CTGTATTTCCTCTCAGGAGGAGCCACCACCTTGATCTTCATGCTGCTAGG
+CGCAAGCGCCGTGATCTCCTTGCTCATACGATCAGCGATTCCTGGGAACA
+TGGTTGAACCACCACTGAGGACGATGTTTCCATACAGATCCTTCCTGATA
+TCGACATCACACTTCCCCACGCGTCCGCGGACGCGTgGxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+QA 40 638 3 638
+DS CHROMAT_FILE: LL2260f PHD_FILE: LL2260f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:50 2000
+
+CO Contig5 605 2 63 U
+CTtTTTTTTTTTTTTTTTTtttTtttttTTTTTTTTTTTTTTTTTTtttt
+ttTTTTTTTTTTTTTTTTTTTTTTTGAGGTTAACTTGATAGATTCAAATG
+GATATGAGAACTTTGATGAATCAAGACAGTGAGAGAGATGGGTGATAGAT
+ACATTGTTGAGTTTATACAAGAATGGACAGAAAAAAAAAAAGAAACATGT
+TCACCATTTCCAGAAAACATTGGAACCACACTTGAACTTGTCTTTGCCTT
+CGCATTCCAACGCCAAGTCTTCTGAGATAAATGGGACTTTCTTTTGCTTG
+GCAAGATCTTGGCAACCAGTGAAATTCTCAGGGAACTTGCAGCTTCCGAA
+TTGGACAGTAAACGCTCTCGCAAAGTTTGCTCCACTTGTTGCCAATCTCT
+TCTTATCATTAAGCTCTTTGTTGGCTTTGCTCTTCTCGAGGTATTCATCG
+AAGACGCCAGCATTAGCAGAAGCAGAAACAGCAGCAGAGGAGAAGAGTGT
+AGCGGCTAAGACAACCATTGCGGATCTTCGTCCATTAACATCATCACCAC
+CAACTCTTTGAGCTGCTTTGATCACCGGAAGCGTTTGTTTCTTCCGGACG
+CGTGG
+
+BQ
+ 22 21 16 25 27 27 56 56 47 42 33 44 42 30 30 30 33 33 31 18 18 18 25 19 18 17 17 17 25 25 26 26 26 42 48 42 42 42 42 42 43 44 44 30 30 20 19 14 14 14
+ 14 18 28 29 33 42 44 47 48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 45 40 40 39 39 39 44 50 55 62 57 56 55 66 67 69 66 71 76 78 76 80 80 71 70
+ 74 71 71 72 81 81 75 71 71 71 71 77 81 80 85 85 80 80 80 80 80 80 81 81 74 71 55 61 62 68 69 80 85 85 85 86 90 89 90 90 86 90 90 90 90 87 87 80 80 86
+ 90 90 90 90 90 90 90 80 82 82 82 82 82 90 90 90 90 90 90 90 90 90 90 88 81 83 81 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 82 85 85 90 90 90 90 90 90 90 90 86 86 86 90 86 86 86 78 78 78 83 83 83 89 90 90 90 90 80 80
+ 80 80 80 80 90 90 90 90 81 78 78 78 78 78 80 86 84 83 81 85 86 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86
+ 90 86 86 85 90 90 90 90 90 84 84 89 89 88 88 90 90 90 90 90 87 90 90 90 90 90 90 87 90 90 90 90 89 90 89 83 79 86 79 79 79 79 83 83 83 88 90 90 90 90
+ 90 90 90 90 86 86 83 81 86 79 86 81 86 86 86 86 86 86 86 86 86 86 86 84 90 90 81 86 90 90 90 90 90 90 90 87 87 90 90 89 90 89 89 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 84 88 83 83 86 90 90 90 90 90 88 82 80 80 80 82 88 90 90 90 90 90 90 90 90 90 90 90 90 90 85 80 80 80 85 90 90 86 85 85 86
+ 87 83 70 70 70 70 73 74 80 90 86 86 86 81 90 85 85 77 77 71 73 71 84 71 82 90 90 85 75 75 75 75 75 75 77 77 76 88 90 90 85 85 85 70 70 62 59 56 66 63
+ 52 52 52 57 56 64 58 70 58 61 69 80 86 86 90 88 88 72 77 79 90 90 90 90 90 90 83 83 77 69 65 65 75 57 57 61 64 64 67 73 80 85 90 90 90 85 85 90 90 90
+ 90 84 84 86 86 80 80 80 82 84 82 82 80 79 90 90 79 79 82 85 80 80 76 85 83 80 79 70 57 56 62 58 57 57 53 60 60 62 62 62 59 60 64 68 69 76 76 80 83 49
+ 49 49 34 33 32
+
+AF LL2263r C -139
+AF LL2263f U 71
+BS 1 81 LL2263r
+BS 82 83 LL2263f
+BS 84 144 LL2263r
+BS 145 145 LL2263f
+BS 146 182 LL2263r
+BS 183 188 LL2263f
+BS 189 265 LL2263r
+BS 266 268 LL2263f
+BS 269 281 LL2263r
+BS 282 283 LL2263f
+BS 284 290 LL2263r
+BS 291 293 LL2263f
+BS 294 298 LL2263r
+BS 299 303 LL2263f
+BS 304 304 LL2263r
+BS 305 305 LL2263f
+BS 306 311 LL2263r
+BS 312 321 LL2263f
+BS 322 322 LL2263r
+BS 323 324 LL2263f
+BS 325 328 LL2263r
+BS 329 333 LL2263f
+BS 334 337 LL2263r
+BS 338 342 LL2263f
+BS 343 346 LL2263r
+BS 347 350 LL2263f
+BS 351 354 LL2263r
+BS 355 356 LL2263f
+BS 357 358 LL2263r
+BS 359 361 LL2263f
+BS 362 362 LL2263r
+BS 363 373 LL2263f
+BS 374 374 LL2263r
+BS 375 376 LL2263f
+BS 377 377 LL2263r
+BS 378 378 LL2263f
+BS 379 410 LL2263r
+BS 411 411 LL2263f
+BS 412 414 LL2263r
+BS 415 415 LL2263f
+BS 416 436 LL2263r
+BS 437 439 LL2263f
+BS 440 446 LL2263r
+BS 447 447 LL2263f
+BS 448 449 LL2263r
+BS 450 451 LL2263f
+BS 452 452 LL2263r
+BS 453 475 LL2263f
+BS 476 478 LL2263r
+BS 479 486 LL2263f
+BS 487 507 LL2263r
+BS 508 508 LL2263f
+BS 509 517 LL2263r
+BS 518 529 LL2263f
+BS 530 540 LL2263r
+BS 541 542 LL2263f
+BS 543 545 LL2263r
+BS 546 547 LL2263f
+BS 548 558 LL2263r
+BS 559 562 LL2263f
+BS 563 570 LL2263r
+BS 571 572 LL2263f
+BS 573 605 LL2263r
+
+RD LL2263r 791 0 0
+ttaagttgggtaacgccagggttttxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxCTtTTTTTTT
+TTTTTTTTTtttTtttttTTTTTTTTTTTTTTTTTTttttttTTTTTTTT
+TTTTTTTTTTTTTTTGAGGTTAACTTGATAGATTCAAATGGATATGAGAA
+CTTTGATGAATCAAGACAGTGAGAGAGATGGGTGATAGATACATTGTTGA
+GTTTATACAAGAATGGACAGAAAAAAAAAAAGAAACATGTTCACCATTTC
+CAGAAAACATTGGAACCACACTTGAACTTGTCTTTGCCTTCGCATTCCAA
+CGCCAAGTCTTCTGAGATAAATGGGACTTTCTTTTGCTTGGCAAGATCTT
+GGCAACCAGTGAAATTCTCAGGGAACTTGCAGCTTCCGAATTGGACAGTA
+AACGCTCTCGCAAAGTTTGCTCCACTTGTTGCCAATCTCTTCTTATCATT
+AAGCTCTTTGTTGGCTTTGCTCTTCTCGAGGTATTCATCGAAGACGCCAG
+CATTAGCAGAAGCAGAAACAGCAGCAGAGGAGAAGAGTGTAGCGGCTAAG
+ACAACCATTGCGGATCTTCGTCCATTAACATCATCACCACCAACTCTTTG
+AGCTGCTTTGATCACCGGAAGCGTTTGTTTCTTCCGGACGCGTGGxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxatt
+
+QA 141 745 141 745
+DS CHROMAT_FILE: LL2263r PHD_FILE: LL2263r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:41 2000
+
+RD LL2263f 788 0 3
+nctaaaagGTTAACTTGATAGATTCAAATGGATATGAGAACTTTGATGAA
+TCAAGACAGTGAGAGAGATGGGTGATAGATACATTGTTGAGTTTATACAA
+GAATGGACAGAAAAAAAAAAAGAAACATGTTCACCATTTCCAGAAAACAT
+TGGAACCACACTTGAACTTGTCTTTGCCTTCGCATTCCAACGCCAAGTCT
+TCTGAGATAAATGGGACTTTCTTTTGCTTGGCAAGATCTTGGCAACCAGT
+GAAATTCTCAGGGAACTTGCAGCTTCCGAATTGGACAGTAAACGCTCTCG
+CAAAGTTTGCTCCACTTGTTGCCAATCTCTTCTTATCATTAAGCTCTTTG
+TTGGCTTTGCTCTTCTCGAGGTATTCATCGAAGACGCCAGCATTAGCAGA
+AGCAGAAACAGCAGCAGAGGAGAAGAGTGTAGCGGCTAAGACAACCATTG
+CGGATCTTCGTCCATTAACATCATCACCACCAACTCTTTGAGCTGCTTTG
+ATCACCGGAAGCGTTTGTTTCTTCCGGACGCGtgGxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxcgcatagc
+ctcgggtgcctaatgagcgagccacctcacattattgcgctgcgctcact
+gcccgtttccagccgggaaacccggcgtgccacctgca
+
+QA 14 535 7 535
+DS CHROMAT_FILE: LL2263f PHD_FILE: LL2263f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:04 2000
+
+RT{
+LL2263f chimera phrap 698 788 000919:094547
+}
+
+RT{
+LL2263f matchElsewhereLowQual phrap 735 754 000919:094547
+}
+
+RT{
+LL2263f matchElsewhereLowQual phrap 704 722 000919:094547
+}
+
+CO Contig6 1092 2 38 U
+cttttatatttcaattgcataaccatatatttatgcacaccacgtacttg
+cgtatgtgcacaattgcataaaagacattaCAATCCAATTTTTCTCGCAA
+TTATATAAACGTACAAGAAGAAACATGATCATTGTACGTACGTCTTGTAC
+ATGGTATTATTTCAAGCATGACACTCGATTGGTGGGACAGGATACCTTGA
+CTTGTCGGTACAATAATCATATACCATATGGTTTACTCTAACCCAACGGT
+AACGTCGAGCTTCCACGGCGTTAAGTGACTGATAAGCGTAGCCTTCCCAC
+CAATTGTGAGGGTTCGATGGACAATTGGTGGGTCCAGGAACAGGACAGCC
+TTCGATGTCGAAATCTTTGTAATAAGCATAAAAAGGAGCTTTGCTCCAAT
+CAATTTTCTCTAATCCTCCACGTGTCGCCCAATCATCGGCTTCCCATAGT
+GTTGAGTAAACTCCCATGGGTTGTGATGTTGGGTAGGCTATGTTCTTGGC
+TTGGTTGTTTTTGTATTCTCTTATTGGTACATCGTCCACGTAAAAAACAA
+TGTGTTTGTGTGACCAAAGGATACTGTAAGTGTGGAAATCCAAAGATGGG
+TCGAACCAAAGATTAACCCTTTGTTCTCTATCTCCTTTGCCATGAGCAAA
+TATGTTTGTTTGCACCGAGTAAGGTTGGCCACTTCGGTTTCCCAAGAACT
+CAAAATCTAGTTCATCCCGTACGGTGTCTGTATCCGAGTTCATGTAGAAG
+GCGGTGACCGTACCGGCAGAGTCGCCGGGAATGAGTTTGATCTTCATGCT
+CACTTTTCCGAATAGATACTTTCTTTTGGAAGAGAATCCACATCCAGTGC
+TCTGGTCAAGGACAAGTTGGATAGCTTTTCCACCGTCGACTTGACGGATG
+TGAGATCCCGACCATGCGGCTTTGAAATCCTCGGCGAATGTCGTAGGCCG
+TGCTGAAATCTTGATGAACATTAGCGTACAAAGAGTGACAATGCAAAGAA
+AATGAGTTTTGGCCATCATGGTCGGCCTTTATAACGATGAAAAAAATATG
+TTCGAGTgactacttgtgacccacgcgtccgcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 39 39 51 40 40 40 46 46 46 51 51 51 51 51 39 39 39 39 35 35
+ 35 35 35 35 35 39 39 35 35 35 35 35 35 40 40 45 45 40 40 51 51 51 51 51 40 40 40 40 40 40 40 40 40 45 40 40 39 39 38 38 38 38 39 39 51 39 39 40 38 38
+ 43 45 43 43 40 35 43 43 43 43 43 45 51 51 51 51 51 51 51 51 51 43 43 43 43 43 43 43 45 56 56 56 56 56 43 43 43 43 43 43 51 43 43 43 43 43 43 36 36 36
+ 36 36 36 43 43 43 43 43 43 43 43 43 43 51 51 45 45 45 45 45 45 45 45 45 45 45 45 45 51 51 56 43 43 43 43 43 43 43 43 45 45 45 45 45 43 43 43 43 43 45
+ 43 43 43 43 43 43 51 45 45 43 43 43 43 43 43 43 43 43 43 43 43 43 51 51 51 51 43 43 43 43 43 43 56 56 43 43 43 43 43 43 45 45 45 45 43 43 43 43 43 43
+ 43 43 56 56 56 56 56 56 56 56 56 51 43 43 43 38 38 43 43 43 43 43 43 43 51 51 56 56 56 56 56 56 56 56 56 56 56 43 43 43 43 43 43 43 43 43 43 43 43 43
+ 43 43 43 45 45 51 45 63 70 66 61 43 51 56 56 56 56 56 56 56 51 51 70 69 69 45 51 51 56 56 56 56 56 56 56 56 45 45 45 45 55 54 43 48 48 46 47 65 68 85
+ 85 80 85 76 78 80 74 74 77 82 82 88 88 85 81 84 88 83 60 63 64 61 69 80 74 74 77 74 70 70 83 88 88 88 83 83 80 76 76 74 77 77 77 85 85 81 78 83 88 88
+ 85 85 81 81 72 78 76 73 60 64 69 77 71 74 88 90 90 88 88 88 84 84 76 82 84 86 86 76 85 81 83 73 78 80 80 66 75 75 75 75 83 78 77 77 77 75 80 64 64 66
+ 61 56 62 66 70 74 80 86 86 84 88 80 80 77 74 74 74 88 88 80 79 76 64 61 63 65 65 85 88 90 80 77 69 67 65 60 56 62 61 61 66 73 88 81 89 89 88 73 73 71
+ 90 73 90 90 90 90 90 90 90 84 77 75 74 86 84 82 81 80 76 77 80 78 88 78 75 68 62 59 69 69 80 80 90 82 74 70 73 77 80 80 76 72 72 81 85 90 90 74 70 70
+ 69 74 69 70 69 76 74 67 70 75 83 67 69 69 62 65 65 78 76 80 66 70 60 65 64 53 56 58 42 47 43 43 51 51 51 51 51 51 51 56 72 76 66 67 61 64 61 59 64 76
+ 85 80 88 79 74 71 66 71 68 64 63 62 62 61 59 68 69 65 69 72 63 59 59 58 58 58 61 59 62 62 76 75 69 61 62 55 55 55 69 70 76 73 74 74 68 63 62 64 64 61
+ 61 57 57 57 59 54 53 51 50 50 50 45 44 44 42 42 46 46 51 45 45 45 45 43 43 45 45 45 45 62 63 62 63 66 55 56 56 53 51 51 57 51 45 45 51 51 51 51 51 51
+ 51 51 51 51 51 51 45 45 45 45 45 45 51 45 45 45 45 43 43 43 43 43 43 43 45 45 51 56 56 51 51 43 43 43 43 43 43 51 56 56 56 56 56 56 56 51 51 51 51 51
+ 56 56 56 56 56 56 45 45 45 45 45 41 41 41 45 45 45 56 56 51 51 51 51 51 51 56 51 51 45 45 45 45 45 45 45 41 41 43 56 56 46 43 42 38 43 43 43 43 43 43
+ 43 43 43 43 43 43 43 43 46 43 46 46 43 43 43 43 43 43 51 51 40 43 43 43 43 45 56 56 56 56 56 51 43 43 43 43 43 43 43 45 51 51 51 51 51 46 46 41 43 43
+ 43 43 43 43 40 41 41 45 45 45 45 46 56 38 36 36 36 36 36 45 45 51 51 51 51 51 56 51 51 43 43 36 36 36 36 35 35 43 43 43 43 43 43 43 43 43 43 43 43 43
+ 43 43 43 43 43 43 43 43 56 56 51 46 46 43 43 43 46 46 51 40 38 35 35 35 35 35 35 38 46 45 45 39 39 38 38 38 38 40 40 46 37 37 37 40 40 40 51 51 51 51
+ 51 51 45 45 45 45 45 40 40 40 40 40 40 40 40 56 40 40 40 39 35 35 35 35 35 39 40 40 40 39 39 39 35 35 35 35 35 34 35 35 35 56 56 56 51 51 51 46 51 51
+ 40 40 35 35 35 35 35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2264f U 1
+AF LL2264r C 354
+BS 1 357 LL2264f
+BS 358 361 LL2264r
+BS 362 372 LL2264f
+BS 373 375 LL2264r
+BS 376 390 LL2264f
+BS 391 399 LL2264r
+BS 400 461 LL2264f
+BS 462 462 LL2264r
+BS 463 486 LL2264f
+BS 487 491 LL2264r
+BS 492 497 LL2264f
+BS 498 500 LL2264r
+BS 501 501 LL2264f
+BS 502 504 LL2264r
+BS 505 506 LL2264f
+BS 507 510 LL2264r
+BS 511 511 LL2264f
+BS 512 515 LL2264r
+BS 516 517 LL2264f
+BS 518 523 LL2264r
+BS 524 525 LL2264f
+BS 526 527 LL2264r
+BS 528 530 LL2264f
+BS 531 531 LL2264r
+BS 532 532 LL2264f
+BS 533 541 LL2264r
+BS 542 544 LL2264f
+BS 545 550 LL2264r
+BS 551 551 LL2264f
+BS 552 622 LL2264r
+BS 623 628 LL2264f
+BS 629 629 LL2264r
+BS 630 630 LL2264f
+BS 631 640 LL2264r
+BS 641 711 LL2264f
+BS 712 729 LL2264r
+BS 730 742 LL2264f
+BS 743 1092 LL2264r
+
+RD LL2264f 762 0 0
+cttttatatttcaattgcataaccatatatttatgcacaccacgtacttg
+cgtatgtgcacaattgcataaaagacattaCAATCCAATTTTTCTCGCAA
+TTATATAAACGTACAAGAAGAAACATGATCATTGTACGTACGTCTTGTAC
+ATGGTATTATTTCAAGCATGACACTCGATTGGTGGGACAGGATACCTTGA
+CTTGTCGGTACAATAATCATATACCATATGGTTTACTCTAACCCAACGGT
+AACGTCGAGCTTCCACGGCGTTAAGTGACTGATAAGCGTAGCCTTCCCAC
+CAATTGTGAGGGTTCGATGGACAATTGGTGGGTCCAGGAACAGGACAGCC
+TTCGATGTCGAAATCTTTGTAATAAGCATAAAAAGGAGCTTTGCTCCAAT
+CAATTTTCTCTAATCCTCCACGTGTCGCCCAATCATCGGCTTCCCATAGT
+GTTGAGTAAACTCCCATGGGTTGTGATGTTGGGTAGGCTATGTTCTTGGC
+TTGGTTGTTTTTGTATTCTCTTATTGGTACATCGTCCACGTAAAAAACAA
+TGTGTTTGTGTGACCAAAGGATACTGTAAGTGTGGAAATCCAAAGATGGG
+TCGAACCAAAGATTAACCCTTTGTTCTCTATCtnctTTGCCATGAGCAAA
+TATGTTTGTTTGCACCGAGTAAGGTTGGCCACTTCGGTTTCCCAAGAACT
+CAAAATCTAGTTCattccgtacggngtCTGTATCCGAGTTCATGtaaaag
+gccgtgACCGta
+
+QA 81 622 1 762
+DS CHROMAT_FILE: LL2264f PHD_FILE: LL2264f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:31 2000
+
+RD LL2264r 784 0 0
+gATGTCGAAATctntgTAATAAGCATanaaagGAGCTTTGCTCCAATCAA
+TTTTCTCTAATCCTCCACGTGTCGCCCAATCATCGGCTTCCCATAGTGTT
+GAGTAAACTCCCATGGGTTGTGATGTTGGGTAGGCTATGTTCTTGGCTTG
+GTTGTTTTTGTATTCTCTTATTGGTACATCGTCCACGTAAAAAACAATGT
+GTTTGTGTGACCAAAGGATACTGTAAGTGTGGAAATCCAAAGATGGGTCG
+AACCAAAGATTAACCCTTTGTTCTCTATCTCCTTTGCCATGAGCAAATAT
+GTTTGTTTGCACCGAGTAAGGTTGGCCACTTCGGTTTCCCAAGAACTCAA
+AATCTAGTTCATCCCGTACGGTGTCTGTATCCGAGTTCATGTAGAAGGCG
+GTGACCGTACCGGCAGAGTCGCCGGGAATGAGTTTGATCTTCATGCTCAC
+TTTTCCGAATAGATACTTTCTTTTGGAAGAGAATCCACATCCAGTGCTCT
+GGTCAAGGACAAGTTGGATAGCTTTTCCACCGTCGACTTGACGGATGTGA
+GATCCCGACCATGCGGCTTTGAAATCCTCGGCGAATGTCGTAGGCCGTGC
+TGAAATCTTGATGAACATTAGCGTACAAAGAGTGACAATGCAAAGAAAAT
+GAGTTTTGGCCATCATGGTCGGCCTTTATAACGATGAAAAAAATATGTTC
+GAGTgactacttgtgacccacgcgtccgcggacgcgtggxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 47 704 1 739
+DS CHROMAT_FILE: LL2264r PHD_FILE: LL2264r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:09 2000
+
+CO Contig7 796 2 49 U
+ctttagggaaccatagagactaggatgttaaatagagaagcattcattat
+gaagagaccacatattatagagaagcccaaatcaaaccaaagcaaagtat
+tcaatggcagtttttcatacagacgataacatgctctttacttataaatc
+ttattcatatgtttctaaaagcttattacggtcctgaatctgccaccttt
+ttcaaaacttctattttgtttttaagcacgccctttataaaatgcgttta
+tgataataggaaagtggtgctcaaaatttggcgctttggaggcctatcct
+cagcttcagagactctTATTTGTCTGCCgtcCAAAtcagcgccATTCAAG
+gttctGaTGGCAtttctggaCcttcttgAgcagagttgtacgtcacaAAC
+CcaaaacCcttggaACGgccactgtcCCTGTCgtagaTGACTCTAGCCTC
+GACAACCTTTCCTTgcTCACCGAACAAACTCTCCAGAGCCATGTCATCGA
+CtccCCATGAGagactgcCGACATAAACACGgcttccTGAACCAGCAGCA
+GCAC*CACCACCATATCCAGAACCAGAGCTTCCAAAGCTGcttCTGGgaC
+ctctgGaGAAAGagtcttccctcttcgggggaggaggaccagcattcact
+ctnaagggtctaccatccaactcatagccaccgaattgctgcgcagctgc
+cctaa*cttcagagaccgacgaccatagcc*cgaacccgaaccccctgcc
+tcgc*cctgtccctttgtcatagatcacccca*ccatcccgaaatt
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 25 27 34 44 30 28 26 24 24 26 22 20 19 19 19 22 23 22 25 14 14 12 12 11 13 17 16 22 22 25 25 21 21 21
+ 19 19 11 11 13 23 19 23 23 22 22 22 13 13 13 9 10 10 18 17 22 12 12 12 12 13 16 18 22 19 19 15 15 15 14 14 12 11 11 11 14 14 15 15 15 17 19 22 22 25
+ 25 17 17 18 18 18 18 20 13 8 8 12 13 13 23 24 24 18 18 17 19 16 16 11 11 15 21 21 27 24 24 27 19 18 13 12 16 25 25 30 33 26 24 24 33 24 24 27 24 27
+ 24 24 30 31 31 29 31 31 42 33 37 33 33 27 14 14 22 23 23 28 32 33 35 39 35 42 37 37 42 30 30 24 25 22 27 22 24 27 31 30 27 27 33 24 24 24 27 33 33 24
+ 27 15 15 15 29 25 33 28 28 23 26 17 14 8 6 6 8 13 21 25 25 27 30 29 29 25 25 22 24 20 24 14 11 11 13 8 15 23 29 29 40 31 31 28 23 30 29 32 29 29
+ 29 31 31 32 29 29 33 33 29 33 30 27 22 32 27 32 28 30 25 21 25 25 31 32 29 31 38 32 32 35 35 41 48 45 38 41 27 24 20 12 11 11 24 25 25 22 19 19 23 12
+ 11 11 14 12 23 18 21 20 22 25 23 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2266f U 1
+AF LL2266r C 438
+BS 1 568 LL2266f
+BS 569 569 LL2266r
+BS 570 575 LL2266f
+BS 576 587 LL2266r
+BS 588 588 LL2266f
+BS 589 589 LL2266r
+BS 590 613 LL2266f
+BS 614 616 LL2266r
+BS 617 626 LL2266f
+BS 627 631 LL2266r
+BS 632 635 LL2266f
+BS 636 636 LL2266r
+BS 637 638 LL2266f
+BS 639 644 LL2266r
+BS 645 646 LL2266f
+BS 647 647 LL2266r
+BS 648 654 LL2266f
+BS 655 661 LL2266r
+BS 662 663 LL2266f
+BS 664 664 LL2266r
+BS 665 665 LL2266f
+BS 666 668 LL2266r
+BS 669 676 LL2266f
+BS 677 679 LL2266r
+BS 680 692 LL2266f
+BS 693 693 LL2266r
+BS 694 694 LL2266f
+BS 695 695 LL2266r
+BS 696 720 LL2266f
+BS 721 722 LL2266r
+BS 723 724 LL2266f
+BS 725 727 LL2266r
+BS 728 732 LL2266f
+BS 733 733 LL2266r
+BS 734 738 LL2266f
+BS 739 740 LL2266r
+BS 741 745 LL2266f
+BS 746 748 LL2266r
+BS 749 750 LL2266f
+BS 751 751 LL2266r
+BS 752 754 LL2266f
+BS 756 762 LL2266r
+BS 763 764 LL2266f
+BS 765 773 LL2266r
+BS 774 775 LL2266f
+BS 776 776 LL2266r
+BS 777 784 LL2266f
+BS 785 788 LL2266r
+BS 789 796 LL2266f
+
+RD LL2266f 796 0 0
+ctttagggaaccatagagactaggatgttaaatagagaagcattcattat
+gaagagaccacatattatagagaagcccaaatcaaaccaaagcaaagtat
+tcaatggcagtttttcatacagacgataacatgctctttacttataaatc
+ttattcatatgtttctaaaagcttattacggtcctgaatctgccaccttt
+ttcaaaacttctattttgtttttaagcacgccctttataaaatgcgttta
+tgataataggaaagtggtgctcaaaatttggcgctttggaggcctatcct
+cagcttcagagactctTATTTGTCTGCCgtcCAAAtcagcgccATTCAAG
+gttctGaTGGCAtttctggaCcttcttgAgcagagttgtacgtcacaAAC
+CcaaaacCcttggaACGgccactgtcCCTGTCgtagaTGACTCTAGCCTC
+GACAACCTTTCCTTgcTCACCGAACAAACTCTCCAGAGCCATGTCATCGA
+CtccCCATGAGagactgcCGACATAAACACGgcttccTGAACCAGCAGCA
+GCAC*CACCACCATATCCAGAACCAGAGCTTCCAAAGCTGcttCTGGgaC
+ctctgGaGAAAGagtcttccctcttcggnggaggaggaccagcattcact
+ctnaagggtctaccatccaactcatagccaccgaattgctgcgcagctgc
+cctaa*cttcagagaccgacgaccatagcc*cgaacccgaaccccctgcc
+tcgc*cctgtccctttgtcatagatcacccca*ccatcccgaaatt
+
+QA 316 613 1 796
+DS CHROMAT_FILE: LL2266f PHD_FILE: LL2266f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:40 2000
+
+RD LL2266r 816 0 0
+ttttttttgcttcgacaacctttcgttggtcgcgggacaggatcccgaga
+gcgggggctggaactccccatgggagggaccgggcagaagcccggcttcg
+ggagccagcaggagcagtcaccgccagatCCAgacccaGAGCTTCCAAAG
+CTgcgtggaggacctctgtagagagagtctacgctgttcgggggaggagg
+gccagcattcactagcaagggtctagcatccaggtgaacgccattgagnt
+gatgagcaga*gccctcagcatcggaggcagaagac*atagtcgcgaaac
+cgaaacctctgcttcgtgcctgtcccgttgtcatagagcacctcaaccat
+ctacacatctccggcgctctcgaagagctgagcgagctgagcactgtacg
+cgttaaaggcaaggtgaccgactcagagtctgaggtcaccatcgaagtgt
+tgctcccgcatgcggaggcggctcagcgtcatcggagataatgctgtcat
+cttcttcttcttattcgctgaagtcatctgtacccgcggccgatgcgcat
+aaccagaaacccggcggaggaagggaagatggagagggtaatggactggg
+agttgagtttgaagtagacgaaagaatggaggaggagagtgcaacctggg
+ttcggagatggagaaagggatgcatttacggcttgaaggtgcagagagcg
+agggcggaagctggagcagccatgtaggccgagaagagagcggacgcgtg
+ggtcgacccgggaattcccggaacggtaccgcaagttgacccgagggncg
+cccaacagcttatcct
+
+QA -1 -1 93 351
+DS CHROMAT_FILE: LL2266r PHD_FILE: LL2266r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:18 2000
+
+CO Contig8 875 2 40 U
+cttttgggatacagtacaaatgatctcatgacataaactgctggcaaaac
+atatgtagtctcaggaggaacaaaaacaaaAGACAAGTCAAAAACTCTTT
+ATGGACTTGTAGCAAGCTTGTTGAAGGCGGCATGAACCCTCTCTTCAGTG
+AACATTCTGTGATTCTCATAGAAATCTAATATCTCCATTGCGATATTCTT
+CACTATGTTCATGTCCGCAGAGAGCTCTTCAAACCATGTCTTTATGTCTT
+TCTCTTTGTACACAGAAGCAACGTAAATGCAAGCGATCGTGATGAGATGA
+GGCGGGTGTGTAAGGATCAGGTCCATTTTGTAAGTGTCGTTGACAAGACC
+CCAAGTTAAATGGGTCATGCTTGTGTCATTGAGTCCAGAGTCCTGCAAGT
+ACTCAGGGAGAGAACGGTATGGGTGGAAGACAACAAGATAGAAGTTCAAA
+GCTTCCAAGACCTTCATCTCCATCTCCAGAATATCCTTAATCTCATACCT
+AAACTTCTCATCAGCATACAATTTCTTGATGTAGAAGACAAGAATCTTGG
+CATGGACCACACTCTCCTCTGCTTTGCAAGCCAGATACAAGCAGGTGGGA
+GCAACAAGACGAGGCTCATACTCTGTCAAACTCTTCCTTGTGTAAACACG
+GCGCATATAGGTTACAGCAGTTGCAACAACTCTTTGCCTAATCTTAATGT
+GTTGTGCCAGCTTCGATATGTAGTTAGACATATGAAGCTTAATGAGTTTG
+AAATCTTCGAGAGAGATTCCTCTCTGAGCATCAAGAGGATGAACAACATT
+AATCTCTTCTGGGTCCTTAAGCTCTTTGTAGTGTGTTGATgtccagaaat
+tggaagccattttccggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 40 40 40 39 39 39 39 39 39 40 40 51 51 51 51 51 40 46 40
+ 40 40 40 40 40 40 40 40 45 51 51 51 51 51 40 40 40 40 40 45 40 40 40 40 40 35 35 35 35 35 35 39 39 40 40 40 40 66 60 60 60 63 63 55 58 58 65 69 61 59
+ 55 55 55 60 65 67 67 69 74 71 74 69 69 70 70 69 78 76 80 82 85 85 80 70 67 61 61 74 74 75 77 71 75 72 77 77 88 65 60 66 68 70 70 66 74 69 69 69 67 68
+ 68 75 69 74 72 72 80 80 80 74 74 85 80 73 61 58 61 61 68 83 85 85 83 83 83 74 72 65 67 69 78 76 90 81 85 85 90 90 90 90 85 85 85 80 90 90 90 90 85 83
+ 83 83 83 84 84 83 77 84 90 90 85 85 85 85 90 85 85 80 80 78 78 74 74 72 82 90 83 83 83 80 83 68 72 75 75 75 79 85 85 85 85 90 90 90 90 90 90 90 90 90
+ 76 78 78 82 76 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86 86 90 90 90 90 90 90 90 90 90 90 90 90 90 83 83 88 83 83 83 90 83 83 90 90 90 90 90 90
+ 90 90 90 90 86 86 90 90 90 86 90 90 90 87 89 87 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88
+ 88 88 85 90 90 90 90 90 90 90 90 90 90 90 90 90 86 90 90 90 90 90 90 90 90 90 90 90 90 89 90 90 90 90 90 90 90 90 90 85 87 90 87 87 85 85 90 89 89 85
+ 90 85 85 90 90 90 90 90 90 90 90 86 86 90 90 90 90 90 88 88 88 90 90 90 90 90 87 90 90 86 86 87 90 90 90 90 90 90 90 90 90 90 90 88 90 90 90 90 90 90
+ 90 90 90 90 89 84 81 85 85 85 80 84 90 89 73 75 80 90 84 83 78 77 77 77 78 85 86 90 90 90 82 80 72 66 75 72 71 72 75 71 71 80 70 76 82 82 79 76 66 66
+ 69 78 72 76 71 66 66 76 85 66 66 66 75 70 62 60 60 60 63 64 67 75 64 66 64 76 76 77 77 77 76 76 79 75 74 75 80 61 64 58 58 58 60 58 59 64 61 63 66 58
+ 58 43 43 43 51 51 51 51 43 43 43 43 43 45 51 51 68 73 70 80 74 51 43 43 43 43 43 43 56 51 51 51 45 45 51 51 51 43 45 43 43 43 43 43 43 65 62 65 48 42
+ 42 43 43 43 55 58 51 45 45 45 45 45 45 43 43 43 43 43 43 43 43 43 43 43 43 43 56 56 56 56 56 51 51 51 51 51 45 45 45 45 45 45 45 45 45 45 45 45 45 51
+ 51 51 51 51 51 51 43 43 43 45 45 43 43 43 43 43 43 45 43 43 43 43 43 43 43 43 43 43 45 56 56 51 51 43 43 43 43 43 43 51 51 51 43 43 43 43 43 43 51 51
+ 51 51 51 56 51 40 40 40 40 40 40 45 45 45 45 45 45 51 51 51 51 51 40 40 39 35 35 35 35 35 39 51 51 51 51 51 51 39 39 39 39 39 39 40 40 40 56 39 39 39
+ 39 40 40 51 51 51 51 51 51 51 40 40 40 40 40 40 40 35 35 35 35 45 45 51 51 51 51 45 40 40 40 40 40 46 46 51 51 51 40 35 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2268f U 1
+AF LL2268r C 136
+BS 1 264 LL2268f
+BS 265 265 LL2268r
+BS 266 291 LL2268f
+BS 292 298 LL2268r
+BS 299 305 LL2268f
+BS 306 306 LL2268r
+BS 307 315 LL2268f
+BS 316 317 LL2268r
+BS 318 324 LL2268f
+BS 325 325 LL2268r
+BS 326 326 LL2268f
+BS 327 327 LL2268r
+BS 328 337 LL2268f
+BS 338 338 LL2268r
+BS 339 341 LL2268f
+BS 342 342 LL2268r
+BS 343 344 LL2268f
+BS 345 352 LL2268r
+BS 353 354 LL2268f
+BS 355 360 LL2268r
+BS 361 363 LL2268f
+BS 364 379 LL2268r
+BS 380 386 LL2268f
+BS 387 387 LL2268r
+BS 388 393 LL2268f
+BS 394 403 LL2268r
+BS 404 404 LL2268f
+BS 405 440 LL2268r
+BS 441 444 LL2268f
+BS 445 450 LL2268r
+BS 451 451 LL2268f
+BS 452 464 LL2268r
+BS 465 465 LL2268f
+BS 466 512 LL2268r
+BS 513 513 LL2268f
+BS 514 616 LL2268r
+BS 617 621 LL2268f
+BS 622 645 LL2268r
+BS 646 656 LL2268f
+BS 657 875 LL2268r
+
+RD LL2268f 764 0 0
+cttttgggatacagtacaaatgatctcatgacataaactgctggcaaaac
+atatgtagtctcaggaggaacaaaaacaaaAGACAAGTCAAAAACTCTTT
+ATGGACTTGTAGCAAGCTTGTTGAAGGCGGCATGAACCCTCTCTTCAGTG
+AACATTCTGTGATTCTCATAGAAATCTAATATCTCCATTGCGATATTCTT
+CACTATGTTCATGTCCGCAGAGAGCTCTTCAAACCATGTCTTTATGTCTT
+TCTCTTTGTACACAGAAGCAACGTAAATGCAAGCGATCGTGATGAGATGA
+GGCGGGTGTGTAAGGATCAGGTCCATTTTGTAAGTGTCGTTGACAAGACC
+CCAAGTTAAATGGGTCATGCTTGTGTCATTGAGTCCAGAGTCCTGCAAGT
+ACTCAGGGAGAGAACGGTATGGGTGGAAGACAACAAGATAGAAGTTCAAA
+GCTTCCAAGACCTTCATCTCCATCTCCAGAATATCCTTAATCTCATACCT
+AAACTTCTCATCAGCATACAATTTCTTGATGTAGAAGACAAGAATCTTGG
+CATGGACCACACTCTCCTCTGCTTTGCAAGCCAGATACAAGCAGGTGGGA
+GcaccacgaccagGCTCATACTCTgCcaacctCTTCCTtgcgtAAACACG
+GCGCATATaggctaCAGcacgctgcccacctttcctccctatctcactgc
+gctgcgcccacccccccccgctccctcactccccatccccaccgcccccc
+ccatcccccatacg
+
+QA 81 601 1 669
+DS CHROMAT_FILE: LL2268f PHD_FILE: LL2268f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:50 2000
+
+RD LL2268r 785 0 0
+acCCTCTCTTCAGTGAACATTCTGTGATTCTCATAGAAATCTAATATCTC
+CATTGCGATATTCTTCACTATGTTCATGTCCGCAGAGAGCTCTTCAAACC
+ATGTCTTTATGTCTTTCTCTTTGTACACAGAAGCAACGTAAATGCAAGCG
+ATCGTGATGAGATGAGGCGGGTGTGTAAGGATCAGGTCCATTTTGTAAGT
+GTCGTTGACAAGACCCCAAGTTAAATGGGTCATGCTTGTGTCATTGAGTC
+CAGAGTCCTGCAAGTACTCAGGGAGAGAACGGTATGGGTGGAAGACAACA
+AGATAGAAGTTCAAAGCTTCCAAGACCTTCATCTCCATCTCCAGAATATC
+CTTAATCTCATACCTAAACTTCTCATCAGCATACAATTTCTTGATGTAGA
+AGACAAGAATCTTGGCATGGACCACACTCTCCTCTGCTTTGCAAGCCAGA
+TACAAGCAGGTGGGAGCAACAAGACGAGGCTCATACTCTGTCAAACTCTT
+CCTTGTGTAAACACGGCGCATATAGGTTACAGCAGTTGCAACAACTCTTT
+GCCTAATCTTAATGTGTTGTGCCAGCTTCGATATGTAGTTAGACATATGA
+AGCTTAATGAGTTTGAAATCTTCGAGAGAGATTCCTCTCTGAGCATCAAG
+AGGATGAACAACATTAATCTCTTCTGGGTCCTTAAGCTCTTTGTAGTGTG
+TTGATgtccagaaattggaagccattttccggacgcgtggxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxatt
+
+QA 3 705 1 740
+DS CHROMAT_FILE: LL2268r PHD_FILE: LL2268r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:27 2000
+
+CO Contig9 1201 2 34 U
+ccacgcgtccgtacgataaaaaaaaaaaaagaacTTACCAACTAATAACT
+ACAGCATTAATAAATAACCTAAGCAACTCATTGCTTCGCATTATCTGGAT
+CCAAAAAATTAGTCAAGATATGATAGGCTGATCATATCATTGGAGCAACT
+GACTAAAAAAAAAGAATAAAGAAAGATGATTATTAAGTTATGAAAGGTAA
+TCAAAAAGTATGCGGATAAATGGAAGGATGAAAGAAAGAGCGAAAAAATT
+GAATATTAATGATATATGATTCCAATTTGGAAAATCTATGAGGTCACTGT
+AAGAAAAGCAATGTAATAAAGCATCAATACAGATTCATTCATAATAATTA
+GATAAATAATAATAATTAGATAAATCTGTTCCGAAAACAAAAAATTAAGA
+GCCTTGAGACAATCAAAACTGAGAAAATTGCCTCAAAAAAAAAAAGAAAT
+TCAAAATTTCATGTAAAAGCTCCATTGTAGAATTCAGGCCTAATGATTAA
+TCAAGAAGCGATGGGAACGACGGAACCCATGAATATATAGGATTCTAGTG
+AACAAGAAATCTTAGTAATTCATTGGACAGGATGGCGGAATAAACCAGAA
+ACTTTATTATCTATTCTGATTTTGATTCTGAGACCTCGGGGGATAAACAG
+CAAACTTAAATAGATATTGAAAGAGTAAATATTCGCCGGCGAAAAATTGG
+TTTTTTTTTTTCAAATAAAAACAGTAATAAAAGATGAAAAAAACAATGAA
+AAAAAAATAAGGATTTGTTATAATATTCTAACTCTAATAAAAATTACATT
+TGTAATGATGATATTACGTTATTTTTAAATAAATCGAAATAAAATTGATC
+TTTGATTCTATTTCAAAAAAGACATACACAAATTTAGAAGAGATAAGATG
+AAATAAAAAAAAAATACCATGATTAATAGGATTAATCATTAACTACATCT
+ATATCTTAATTAATCCTTTTATTCGCGAGGAGCTGGATGAGaaGAAACTC
+TCACGTCCAGTtCTGCAGTAGAGATGGAATTTCTCATTTAGAAAAAACCC
+ATCAACTATAACCCAAAAAGAACCAAATTTCGTAAACAACATCGAGGAaG
+aCTAaAAGGAATATcCTCTCgtgggAATCGTATttGttttGGCAGaTATG
+CTcttCaaACACttGAACCCGCTTGGAttAcAtctagacaaatagaagca
+g
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 40 40 40 40 37 37 37 40 40 40 46 40 39 39 39
+ 39 39 40 56 56 46 46 46 46 46 46 46 40 40 40 39 39 39 40 40 40 40 46 51 40 40 40 40 40 40 40 39 35 39 39 35 35 35 35 35 39 39 35 35 35 51 51 45 51 46
+ 46 46 46 46 46 56 56 35 35 35 35 35 35 40 51 51 51 51 45 45 45 45 45 45 45 43 43 43 43 43 43 51 51 51 51 51 51 51 56 56 56 56 51 51 51 43 35 35 35 35
+ 35 35 35 35 35 43 43 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 56 56 56 56 56 51 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 45 51 51 51 51 51 51
+ 51 51 51 51 51 56 51 51 45 43 36 36 36 36 36 43 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 43 43 43 43 43 43 51 56 56 56 56 56
+ 56 56 56 51 45 45 45 45 45 45 51 51 51 56 51 51 51 51 45 45 45 45 45 51 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 56 56 43 36 36 36 36 36 45
+ 56 51 51 51 51 45 51 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 45 45 45 45
+ 45 45 51 56 56 56 56 56 56 56 56 56 56 51 45 45 45 45 45 45 51 51 51 51 56 51 51 51 51 43 45 45 45 45 45 56 56 56 56 56 56 56 56 45 45 45 45 45 43 56
+ 56 56 56 56 56 56 43 43 42 46 43 51 56 56 56 56 45 45 45 45 45 41 43 51 51 51 56 51 45 43 43 43 43 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 46 56 56 56 50 53 53 56 52 61 56 56 56 66 66 66 61 61 61 61 55 55 55 55 56 61 66 50 50 50 50 50
+ 50 53 53 52 52 66 55 55 55 50 50 50 66 66 66 50 49 49 45 45 45 55 55 50 61 61 61 61 66 66 66 56 56 56 52 52 52 52 52 52 52 66 66 52 52 52 52 52 52 52
+ 54 56 54 52 50 52 52 56 56 55 50 50 50 50 50 50 50 50 50 61 61 61 61 61 50 53 53 53 53 53 66 66 53 45 45 45 45 45 53 61 61 53 53 53 53 53 53 55 53 45
+ 45 45 45 50 53 53 53 53 53 53 53 53 53 53 55 56 55 66 55 55 66 66 66 66 61 61 61 61 61 66 53 53 53 53 53 53 61 61 61 66 66 61 61 61 61 61 55 55 55 55
+ 55 55 53 53 53 53 53 53 61 55 55 53 55 55 61 61 61 61 61 61 61 61 61 61 61 61 66 66 66 66 66 53 53 53 53 53 53 53 55 55 55 55 55 66 66 66 66 66 66 66
+ 66 66 66 66 66 66 66 66 56 56 42 42 42 44 43 51 51 61 53 50 50 50 53 48 56 42 42 56 56 56 56 56 66 56 56 56 56 56 56 56 43 38 38 38 38 38 42 42 50 56
+ 56 56 56 56 56 56 56 51 51 43 43 56 50 50 44 44 42 46 44 56 51 51 51 51 44 43 42 42 43 41 41 41 41 41 51 56 56 56 56 56 56 56 43 43 42 41 41 41 50 50
+ 42 42 46 46 42 42 50 42 56 56 56 56 56 56 43 43 43 43 43 35 40 40 40 40 40 51 51 51 51 51 51 46 40 43 42 42 42 46 56 56 56 56 56 50 42 42 42 42 43 43
+ 56 56 51 43 44 43 42 42 43 46 43 42 42 42 56 56 56 56 56 40 40 40 40 40 42 44 44 44 44 48 48 56 56 56 56 56 56 56 56 47 56 56 56 42 46 46 46 42 42 56
+ 56 44 44 44 56 56 56 56 56 56 56 56 56 56 42 42 42 46 42 42 42 40 40 45 45 45 45 51 51 51 40 40 40 40 40 40 51 45 40 40 40 40 40 40 40 35 40 40 40 46
+ 46 44 48 48 48 40 37 34 32 34 40 40 40 40 40 48 48 40 40 37 37 40 35 29 31 31 34 34 37 40 40 40 40 40 34 34 40 37 37 28 25 18 16 21 26 29 29 32 29 29
+ 32 32 32 32 29 29 24 25 29 29 25 19 24 25 29 32 33 32 37 27 27 24 33 33 35 35 40 40 40 40 48 33 33 33 26 26 29 22 22 24 24 24 48 48 32 25 25 25 25 24
+ 37 40 48 29 20 24 24 24 25 24 25 29 32 32 32 26 29 29 29 25 20 22 25 27 27 27 27 29 29 25 29 32 28 24 24 29 29 29 29 37 40 40 40 27 27 25 25 22 18 21
+ 17 22 22 22 19 24 24 24 25 22 24 25 26 25 19 25 24 23 24 21 10 10 10 17 18 25 25 29 29 29 24 24 24 19 19 21 16 16 19 19 29 25 29 21 21 18 22 22 23 29
+ 27 23 18 12 17 20 17 18 23 25 24 21 19 18 22 22 22 22 22 24 25 25 21 22 22 25 21 17 14 21 18 20 18 18 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0
+
+AF LL2271r U -45
+AF LL2276r U 409
+BS 1 468 LL2271r
+BS 469 473 LL2276r
+BS 474 477 LL2271r
+BS 478 481 LL2276r
+BS 482 492 LL2271r
+BS 493 493 LL2276r
+BS 494 495 LL2271r
+BS 496 501 LL2276r
+BS 502 515 LL2271r
+BS 516 516 LL2276r
+BS 517 518 LL2271r
+BS 519 521 LL2276r
+BS 522 531 LL2271r
+BS 532 534 LL2276r
+BS 535 551 LL2271r
+BS 552 552 LL2276r
+BS 553 554 LL2271r
+BS 555 555 LL2276r
+BS 556 559 LL2271r
+BS 560 603 LL2276r
+BS 604 604 LL2271r
+BS 605 615 LL2276r
+BS 616 616 LL2271r
+BS 617 617 LL2276r
+BS 618 618 LL2271r
+BS 619 620 LL2276r
+BS 621 621 LL2271r
+BS 622 623 LL2276r
+BS 624 624 LL2271r
+BS 625 717 LL2276r
+BS 718 725 LL2271r
+BS 726 732 LL2276r
+BS 733 733 LL2271r
+BS 734 1201 LL2276r
+
+RD LL2271r 785 0 0
+actxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccac
+gcgtccgtacgataaaaaaaaaaaaagaacTTACCAACTAATAACTACAG
+CATTAATAAATAACCTAAGCAACTCATTGCTTCGCATTATCTGGATCCAA
+AAAATTAGTCAAGATATGATAGGCTGATCATATCATTGGAGCAACTGACT
+AAAAAAAAAGAATAAAGAAAGATGATTATTAAGTTATGAAAGGTAATCAA
+AAAGTATGCGGATAAATGGAAGGATGAAAGAAAGAGCGAAAAAATTGAAT
+ATTAATGATATATGATTCCAATTTGGAAAATCTATGAGGTCACTGTAAGA
+AAAGCAATGTAATAAAGCATCAATACAGATTCATTCATAATAATTAGATA
+AATAATAATAATTAGATAAATCTGTTCCGAAAACAAAAAATTAAGAGCCT
+TGAGACAATCAAAACTGAGAAAATTGCCTCAAAAAAAAAAAGAAATTCAA
+AATTTCATGTAAAAGCTCCATTGTAGAATTCAGGCCTAATGATTAATCAA
+GAAGCGATGGGAACGACGGAACCCATGAATATATAGGATTCTAGTGAACA
+AGAAATCTTAGTAATTCATTGGACAGGATGGCGGAATAAACCAGAAACTT
+TATTATCTATTCTGATTTTGATTCTGAGACCTCGGGGGATAAACAGCAAA
+CTTAAATAGATATTGAAAGAGTAAATATTCGCCGGCGAAAAATTGGTTTT
+TTTTTTTCAAATAAAAACAGTAATAAAAGATGAAA
+
+QA 81 756 47 785
+DS CHROMAT_FILE: LL2271r PHD_FILE: LL2271r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:42 2000
+
+RD LL2276r 793 0 0
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgAAAAGCTCCATTGTAGAATTCAGGCCTAATGATTAATCAAGAAG
+CGATGGGAACGACGGAACCCATGAATATATAGGATTCTAGTGAACAAGAA
+ATCTTAGTAATTCATTGGACAGGATGGCGGAATAAACCAGAAACTTTATT
+ATCTATTCTGATTTTGATTCTGAGACCTCGGGGGATAAACAGCAAACTTA
+AATAGATATTGAAAGAGTAAATATTCGCCGGCGAAAAATTGGTTTTTTTT
+TTTCAAATAAAAACAGTAATAAAAGATGAAAAAAACAATGAAAAAAAAAT
+AAGGATTTGTTATAATATTCTAACTCTAATAAAAATTACATTTGTAATGA
+TGATATTACGTTATTTTTAAATAAATCGAAATAAAATTGATCTTTGATTC
+TATTTCAAAAAAGACATACACAAATTTAGAAGAGATAAGATGAAATAAAA
+AAAAAATACCATGATTAATAGGATTAATCATTAACTACATCTATATCTTA
+ATTAATCCTTTTATTCGCGAGGAGCTGGATGAGaaGAAACTCTCACGTCC
+AGTtCTGCAGTAGAGATGGAATTTCTCATTTAGAAAAAACCCATCAACTA
+TAACCCAAAAAGAACCAAATTTCGTAAACAACATCGAGGAaGaCTAaAAG
+GAATATcCTCTCgtgggAATCGTATttGttttGGCAGaTATGCTcttCaa
+ACACttGAACCCGCTTGGAttAcAtctagacaaatagaagcag
+
+QA 57 777 57 793
+DS CHROMAT_FILE: LL2276r PHD_FILE: LL2276r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:28 2000
+
+CO Contig10 1053 2 32 U
+cattgttttcttcaagaatcaagatcctttacatgataagttccaagaag
+agggaacttgagcgccccccaccccccactCAAGAACCACAAAGTTACAA
+ACCACTAGAGCCTTACATATTAGTCTGTCTCTTTCCACACCATTAAACAA
+GCAAACAAGCAGTGTAGGAAAGATAAAGAGAAGTGGTTTTAGAAAGAAAA
+ATAAGTTGTTTACTTGCGGTGTGTGTATTGTTATAGTGGGGCAAAAGGCT
+CAGGGAGCAGCTCGAAACCTCCATTATCAAACATGAGTCCATGTTCTGAC
+GACCTATCATCAATGCTATGCAACCAGTTTGTGTTCTTCACGTCTTCTTT
+CAACAGAAACGCGTCATACCTTGAGCTCTCATACATATTCATATCGCTCG
+ACTTCCCCATCAGTCTTCTTCCCTCGTTTGTTTCGTTCCCGTTCTCTGCG
+CTGTTGCTGCTGATAGAATCCGAAAGGCTATTCACTGAACGGATTGGCTG
+TATCTTGGACGAGGCTTGTCTGATGCTCTGAGGGAACAACGTCGTGCTTG
+ATAGTGCGCAACCGTTACTCTTCCCGTTTCTTATGTCCATGTGCTTAATG
+GCCATATCAAGTGAACTTTTTGATAATGACCTCCCAAGCCCGTTGTTATT
+GTCCATCACAGTTGAAGAAGTCCTCACGGTTCTCCGTGAAGTTACCTCTG
+AAACATTTGAAATCCTTCTCGGCTCTGGTGCATATATGTTATGTTGCTGT
+CCATTACCACTCAAACGGCCTTTTCCTTGGCTCTCTATGAGTCTTCCTCT
+CGTCACAACAGGAGatgnnagtTCCTTCTCGTCATGGGACCTTTGGGTTC
+TGGACTTGCCTTTGCTGCAACAGGCCTAGACCTGCCAGCTGATATCGGTC
+TTCCCGGGAGACTTGTTCTGAGGTTAGGTGGTGCATCAAGAGAGAAGTCT
+GGTAGCACAATTGGCTGTTGTGTGTTGGTGGTGGTGGTGGTTCGGACTCT
+AGGTTGTCCAGGAGAGCTcggcctagacaatgaaggtgcgggcggacgcg
+tgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 35 37 37 37 35 35 37 37 40 40 42 42 37 37 37 37 37 39 43
+ 37 37 35 35 32 32 32 32 35 42 42 35 35 35 35 35 35 37 35 28 35 35 28 28 33 37 35 36 36 35 35 35 46 35 40 40 40 38 35 42 42 37 37 40 40 40 40 40 40 40
+ 40 43 45 43 56 43 45 41 41 41 41 41 35 35 40 40 37 37 43 56 56 37 37 38 43 43 43 56 56 56 50 50 44 44 44 50 50 51 46 46 42 42 42 56 56 56 56 56 56 43
+ 43 41 41 45 45 42 42 44 46 42 38 38 38 38 38 35 35 35 32 35 35 50 42 42 42 41 41 41 41 35 38 38 40 40 40 35 35 44 40 40 40 40 40 43 56 42 38 31 31 31
+ 35 35 46 56 56 56 43 40 36 32 32 32 36 36 36 36 36 32 36 31 36 35 35 42 50 50 44 44 42 43 42 42 42 42 50 44 50 50 50 44 44 44 50 42 42 42 35 36 36 36
+ 36 36 38 41 41 42 42 44 44 44 50 42 42 32 35 38 38 38 38 41 41 41 43 42 43 43 50 50 44 44 44 44 44 46 42 42 42 35 35 35 36 36 36 38 42 42 42 42 46 42
+ 42 42 42 42 41 41 44 45 40 40 40 40 38 38 35 44 42 42 42 42 42 42 42 41 41 41 41 48 48 53 52 57 56 56 56 48 48 50 49 51 49 57 41 32 35 33 37 35 37 41
+ 42 42 42 46 46 50 50 50 50 44 57 50 56 56 56 56 50 50 50 50 50 44 44 44 50 50 50 52 50 50 53 56 51 43 43 42 42 40 40 40 40 40 36 52 52 45 39 46 44 46
+ 46 48 52 47 47 51 51 51 51 51 43 48 50 62 54 57 53 60 50 48 45 45 45 48 52 45 40 45 38 42 42 51 41 41 45 49 50 43 45 45 45 45 52 55 51 43 47 53 54 54
+ 50 56 45 44 44 58 49 52 62 61 65 60 60 57 62 62 53 48 54 56 58 70 67 67 54 54 46 48 47 66 57 66 66 52 49 49 52 48 53 62 83 65 71 59 57 57 69 59 62 65
+ 65 67 69 69 58 50 50 45 44 30 34 41 49 54 57 56 59 59 61 65 53 80 61 57 61 72 64 57 53 58 48 61 58 67 67 65 64 65 69 77 74 74 74 66 57 54 57 56 49 38
+ 39 48 59 67 69 80 58 57 54 57 57 61 68 57 57 61 80 88 77 72 72 64 63 69 65 65 69 77 80 80 80 67 62 65 71 71 67 65 65 61 66 69 70 76 75 71 71 71 60 55
+ 55 55 55 55 76 75 82 64 64 66 64 58 60 69 64 64 78 59 55 57 60 47 48 47 45 46 51 61 65 64 59 64 68 61 64 61 61 61 66 64 65 58 54 50 48 48 52 54 62 71
+ 71 74 73 79 76 69 64 67 66 63 52 55 54 52 47 50 48 40 40 40 45 40 40 40 40 40 46 51 59 59 68 77 82 85 89 84 90 75 68 66 68 68 62 59 59 54 49 51 55 55
+ 54 53 66 66 70 75 66 61 65 65 65 65 62 40 40 39 39 39 39 45 51 56 56 56 56 56 56 51 51 51 51 45 45 45 45 45 45 40 34 34 34 34 34 35 51 51 51 51 40 39
+ 39 39 39 39 40 46 46 46 46 40 39 39 36 33 19 15 4 0 0 4 19 19 33 36 44 44 46 39 34 34 34 34 34 40 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51
+ 56 56 51 51 51 45 45 45 45 45 45 45 45 45 40 40 40 40 40 40 51 45 45 45 45 45 45 45 45 45 51 56 56 56 40 39 39 39 39 39 45 45 51 40 45 35 35 35 35 35
+ 35 51 45 45 45 40 40 40 45 45 45 40 39 39 35 35 35 35 45 45 40 40 40 40 40 40 45 45 40 40 39 39 39 39 39 39 45 45 45 45 45 45 45 45 45 45 45 40 40 40
+ 35 35 35 35 35 35 39 40 56 40 40 40 40 40 40 45 51 51 51 51 51 51 51 51 51 51 40 40 40 40 40 40 40 40 40 40 40 40 40 46 40 40 40 40 40 45 51 51 51 51
+ 46 40 40 35 35 40 40 40 40 51 51 51 51 42 42 37 40 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0
+
+AF LL2272f U 1
+AF LL2272r C 368
+BS 1 377 LL2272f
+BS 378 392 LL2272r
+BS 393 410 LL2272f
+BS 411 411 LL2272r
+BS 412 427 LL2272f
+BS 428 432 LL2272r
+BS 433 443 LL2272f
+BS 444 539 LL2272r
+BS 540 554 LL2272f
+BS 555 557 LL2272r
+BS 558 563 LL2272f
+BS 564 564 LL2272r
+BS 565 572 LL2272f
+BS 573 573 LL2272r
+BS 574 574 LL2272f
+BS 575 575 LL2272r
+BS 576 576 LL2272f
+BS 577 577 LL2272r
+BS 578 589 LL2272f
+BS 590 594 LL2272r
+BS 595 599 LL2272f
+BS 600 612 LL2272r
+BS 613 616 LL2272f
+BS 617 618 LL2272r
+BS 619 621 LL2272f
+BS 622 627 LL2272r
+BS 628 628 LL2272f
+BS 629 671 LL2272r
+BS 672 717 LL2272f
+BS 718 728 LL2272r
+BS 729 763 LL2272f
+BS 764 1053 LL2272r
+
+RD LL2272f 779 0 0
+cattgttttcttcaagaatcaagatcctttacatgataagttccaagaag
+agggaacttgagcgccccccaccccccactCAAGAACCACAAAGTTACAA
+ACCACTAGAGCCTTACATATTAGTCTGTCTCTTTCCACACCATTAAACAA
+GCAAACAAGCAGTGTAGGAAAGATAAAGAGAAGTGGTTTTAGAAAGAAAA
+ATAAGTTGTTTACTTGCGGTGTGTGTATTGTTATAGTGGGGCAAAAGGCT
+CAGGGAGCAGCTCGAAACCTCCATTATCAAACATGAGTCCATGTTCTGAC
+GACCTATCATCAATGCTATGCAACCAGTTTGTGTTCTTCACGTCTTCTTT
+CAACAGAAACGCGTCATACCTTGAGCTCTCATACATATTCATATCGCTCG
+ACTTCCCCATCAGTCTTCTTCCCTCGTTTGTTTCGTTCCCGTTCTCTGCG
+CTGTTGCTGCTGATAGAATCCGAAAGGCTATTCACTGAACGGATTGGCTG
+TATCTTGGACGAGGCTTGTCTGATGCTCTGAGGGAACAACGTCGTGCTTG
+ATAGTGCGCAACCGTTACTCTTCCCGTTTCTTATGTCCATGTGCTTAATG
+GCCATATCAAGTGAACTTTTTGATAATGACCTCCCAAGCCCGTTGTTATT
+GTCCATCACAGTTGAAGAAGTCCTCACGGTTCTCCGTGAAGTTACCTCTG
+AAACATTTGAAATCCTTCTcgcttcTGGTGCATATATGTTATGTTGCTGT
+CCATTACCACTCAAAcggncttTttcttg
+
+QA 81 671 1 779
+DS CHROMAT_FILE: LL2272f PHD_FILE: LL2272f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:31 2000
+
+RD LL2272r 731 0 0
+accttaagCTCTCATACATATTCATATcggtggact*cCCCATCAGTCTT
+ctcccctCGTTTGTTTCgtccccgTTCTCTGCGCTGTTGCTGCTGATAGA
+ATCCGAAAGGCTATTCACTGAACGGATTGGCTGTATCTTGGACGAGGCTT
+GTCTGATGCTCTGAGGGAACAACGTCGTGCTTGATAGTGCGCAACCGTTA
+CTCTTCCCGTTTCTTATGTCCATGTGCTTAATGGCCATATCAAGTGAACT
+TTTTGATAATGACCTCCCAAGCCCGTTGTTATTGTCCATCACAGTTGAAG
+AAGTCCTCACGGTTCTCCGTGAAGTTACCTCTGAAACATTTGAAATCCTT
+CTCGGCTCTGGTGCATATATGTTATGTTGCTGTCCATTACCACTCAAACG
+GCCTTTTCCTTGGCTCTCTATGAGTCTTCCTCTCGTCACAACAGGAGatg
+nnagtTCCTTCTCGTCATGGGACCTTTGGGTTCTGGACTTGCCTTTGCTG
+CAACAGGCCTAGACCTGCCAGCTGATATCGGTCTTCCCGGGAGACTTGTT
+CTGAGGTTAGGTGGTGCATCAAGAGAGAAGTCTGGTAGCACAATTGGCTG
+TTGTGTGTTGGTGGTGGTGGTGGTTCGGACTCTAGGTTGTCCAGGAGAGC
+Tcggcctagacaatgaaggtgcgggcggacgcgtggxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 173 651 1 686
+DS CHROMAT_FILE: LL2272r PHD_FILE: LL2272r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:09 2000
+
+CO Contig11 735 2 105 U
+ccacgcgtccgcttttttttttttttttttttttTTTTTCTCATAAAACC
+TCTGTTTATatctTGAACAAAGTTACATTATAAGATGAGATGATGAGTGA
+TGAGAAAGGAGAAAAAGATTTCTAAGAGGAGGAGAAGGTATCGATGATGG
+TGGTGTGAAGGGGATCACTGAGGTGAGTAGCCCAATTGTTGAGTGGGCCT
+TTACCAGTAGCAGCGGCTTGAACAGCGAAGCCCAAGAAGGCGACCATGGC
+AAGACGAGCGTGCTTGATCTCAGCTAACTGAAGCTGAGCCATCTTCTCAG
+GGTCAGACGCTAAACCAAGCGGGTCGAAGAATTTGCCGCCAGGGTACAAA
+CGCTTCTCAGAGTCAAGCTCAGCGTTGCGCTGGAACTCGATGTAGCCGAT
+CACTAACACCTCGATCCATATCAACGTCGAGATGGAGAAGGGTAGTGGCT
+GCCCCAAGTAGGACGATCCATCCACTAGCTCCACCTTTCCGGCGTCTTGC
+CAAGTGACGCCGGTGAGCCAtTcGACGGagaGAGCGCCGAGAGTGGCGAG
+CATGGCCCACCGTCCGTGAATCAGCTCGCATTCTCTGAACCtCTGGATCC
+CGAACACCtCGCTGTACGGCTGAAACGGAGTcgaTtTGGGGGCCgCCTCt
+tcGGTACGGGttcCGATCACGtctccggctaagttcttcGccaGaTtCtg
+gtcTagtgaatctatatCgaaCTggagataCTcgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 56 56 56 56 47 35 35 35 35 35 34 46 35 40 40 37
+ 48 50 50 50 44 42 42 34 25 4 4 4 13 20 26 42 42 42 44 43 44 44 47 44 45 45 45 45 45 45 50 50 44 45 45 45 45 45 45 45 45 45 47 50 50 47 47 47 47 47
+ 47 50 47 47 47 47 47 52 53 50 50 47 52 52 52 52 47 47 52 47 47 47 47 47 47 47 52 54 53 61 66 66 50 50 50 52 50 56 50 45 45 45 45 45 45 52 53 52 56 66
+ 54 54 52 52 45 50 50 50 50 50 58 56 53 50 50 35 35 35 35 37 47 53 45 45 45 47 45 47 45 45 45 54 54 52 52 52 66 60 52 52 53 53 61 61 50 50 50 50 50 50
+ 50 50 52 53 53 50 54 53 53 53 53 45 45 45 45 45 45 45 45 53 50 50 50 45 45 45 45 45 45 52 52 52 52 54 54 60 60 48 46 46 46 46 46 46 48 50 66 66 66 66
+ 66 48 46 48 46 46 46 55 55 48 48 48 53 50 50 55 61 61 61 61 61 53 53 53 53 46 46 46 51 51 55 55 55 51 51 51 55 53 53 55 55 51 51 51 55 55 55 53 61 61
+ 61 54 54 52 48 46 45 46 46 46 45 45 47 60 60 60 60 48 48 48 48 56 52 51 51 51 51 45 47 47 52 52 51 51 47 46 46 46 45 46 48 56 56 52 51 51 52 52 51 51
+ 46 53 46 46 45 45 45 46 45 45 45 45 52 54 60 51 48 48 42 42 41 45 51 51 51 51 47 46 46 42 42 42 47 46 45 46 43 43 45 52 52 47 54 45 45 45 60 60 52 56
+ 51 51 51 55 53 53 51 52 52 52 45 47 52 47 56 60 66 66 56 52 47 60 52 56 54 56 56 52 52 44 47 47 52 46 56 32 32 32 47 58 66 52 52 47 47 47 45 45 42 42
+ 45 45 55 50 53 52 56 56 56 56 50 45 45 45 45 45 45 52 52 54 54 52 49 50 49 49 41 43 45 48 45 45 48 47 49 48 56 52 52 53 53 52 52 42 34 34 34 42 42 56
+ 56 42 52 44 43 33 39 37 34 32 30 36 42 42 42 37 40 40 33 22 19 25 16 24 34 41 34 34 15 15 15 35 30 32 34 39 39 32 35 32 32 32 43 39 35 42 34 39 39 39
+ 40 50 50 57 57 47 43 42 39 34 33 23 28 20 20 24 26 26 31 32 31 33 21 21 21 29 25 29 29 29 30 38 38 36 36 54 43 43 50 41 31 18 20 32 27 25 34 40 29 29
+ 21 24 33 35 38 39 32 29 18 25 25 30 30 33 34 34 35 35 29 29 28 32 29 35 35 33 29 29 33 29 21 18 15 15 20 18 25 29 40 48 40 32 29 22 19 25 23 23 21 15
+ 15 17 24 27 26 29 25 24 26 22 18 15 18 23 25 25 24 23 25 24 20 17 18 18 15 15 18 11 15 10 9 8 19 15 9 12 9 11 15 21 15 18 18 21 16 23 14 20 14 15
+ 14 17 15 21 14 13 14 14 17 17 16 16 13 18 11 12 11 21 19 18 19 24 25 13 15 12 12 11 16 18 23 20 18 18 18
+
+AF LL2273r U -45
+AF LL2273f U 36
+BS 1 50 LL2273r
+BS 51 63 LL2273f
+BS 64 64 LL2273r
+BS 65 65 LL2273f
+BS 66 68 LL2273r
+BS 69 69 LL2273f
+BS 70 70 LL2273r
+BS 71 72 LL2273f
+BS 73 73 LL2273r
+BS 74 83 LL2273f
+BS 84 93 LL2273r
+BS 94 107 LL2273f
+BS 108 108 LL2273r
+BS 109 112 LL2273f
+BS 113 116 LL2273r
+BS 117 118 LL2273f
+BS 119 119 LL2273r
+BS 120 135 LL2273f
+BS 136 136 LL2273r
+BS 137 137 LL2273f
+BS 138 138 LL2273r
+BS 139 145 LL2273f
+BS 146 146 LL2273r
+BS 147 155 LL2273f
+BS 156 161 LL2273r
+BS 162 175 LL2273f
+BS 176 176 LL2273r
+BS 177 177 LL2273f
+BS 178 178 LL2273r
+BS 179 181 LL2273f
+BS 182 184 LL2273r
+BS 185 202 LL2273f
+BS 203 203 LL2273r
+BS 204 206 LL2273f
+BS 207 207 LL2273r
+BS 208 312 LL2273f
+BS 313 317 LL2273r
+BS 318 321 LL2273f
+BS 322 323 LL2273r
+BS 324 332 LL2273f
+BS 333 335 LL2273r
+BS 336 339 LL2273f
+BS 340 341 LL2273r
+BS 342 343 LL2273f
+BS 344 344 LL2273r
+BS 345 346 LL2273f
+BS 347 350 LL2273r
+BS 351 354 LL2273f
+BS 355 356 LL2273r
+BS 357 365 LL2273f
+BS 366 366 LL2273r
+BS 367 370 LL2273f
+BS 371 377 LL2273r
+BS 378 382 LL2273f
+BS 383 383 LL2273r
+BS 384 384 LL2273f
+BS 385 385 LL2273r
+BS 386 386 LL2273f
+BS 387 391 LL2273r
+BS 392 392 LL2273f
+BS 393 393 LL2273r
+BS 394 396 LL2273f
+BS 397 399 LL2273r
+BS 400 407 LL2273f
+BS 408 414 LL2273r
+BS 415 421 LL2273f
+BS 422 432 LL2273r
+BS 433 434 LL2273f
+BS 435 435 LL2273r
+BS 436 439 LL2273f
+BS 440 441 LL2273r
+BS 442 450 LL2273f
+BS 451 451 LL2273r
+BS 452 452 LL2273f
+BS 453 453 LL2273r
+BS 454 454 LL2273f
+BS 455 466 LL2273r
+BS 467 468 LL2273f
+BS 469 471 LL2273r
+BS 472 476 LL2273f
+BS 477 483 LL2273r
+BS 484 485 LL2273f
+BS 486 487 LL2273r
+BS 488 489 LL2273f
+BS 490 507 LL2273r
+BS 508 510 LL2273f
+BS 511 517 LL2273r
+BS 518 520 LL2273f
+BS 521 528 LL2273r
+BS 529 533 LL2273f
+BS 534 557 LL2273r
+BS 558 560 LL2273f
+BS 561 581 LL2273r
+BS 582 590 LL2273f
+BS 591 593 LL2273r
+BS 594 596 LL2273f
+BS 597 602 LL2273r
+BS 603 607 LL2273f
+BS 608 645 LL2273r
+BS 646 651 LL2273f
+BS 652 663 LL2273r
+BS 664 666 LL2273f
+BS 667 669 LL2273r
+BS 670 670 LL2273f
+BS 671 735 LL2273r
+
+RD LL2273r 781 0 0
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccac
+gcgtccgcttttttttttttttttttttttTTTTTCTCATAAAACCTCTG
+TTTatat*tTGAACAAAGTTACATTATAAGATGAGATGATGAGTGATGAG
+AAAGGAGAAAAAGATTTCTAAGAGGAGGAGAAGGTATCGATGATGGTGGT
+GTGAAGGGGATcattGAGGTGAGTAGCCCAATTGTTGAGTGGGCCTTTAC
+CAGTAGCAGCGGCTTGAACAGCGAAGCCCAAGAAGGCGACCATGGCAAGA
+CGAGCGTGCTTGATCTCAGCTAACTGAAGCTGAGCCATCTTCTCAGGGTC
+AGACGCTAAACCAAGCGGGTCGAAGAATTTGCCGCCAGGGTACAAACGCT
+TCTCAGAGTCAAGCTCAGCGTTGCGCTGGAACTCGATGTAGCCGATCACT
+AACACCTCGATCCATATCAACGTCGAGATGGAGAAGGGTAGTGGCTGCCC
+CAAGTAGGACGATCCATCCACTAGCTCCACCTTTCCGGCGTCTTGCCAAG
+TGACGCCGGTGAGCCAtTcGACGGagaGAGCGCCGAGAGTGGCGAGCATG
+GCCCACCGTCCGTGAATCAGCTCGCATTCTCTGAACCtCtGGATCCCGAA
+cACCtCGCTGTACGGCTGAAACGGAGTcgaTtTGGGGGCCgcctcttcGG
+TACGGGttccGATCAcGtctccggctaagttcttcGccaGaTtCtggtcT
+agtgaatctatatCgaaCTggagataCTcgg
+
+QA 81 781 47 781
+DS CHROMAT_FILE: LL2273r PHD_FILE: LL2273r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:14 2000
+
+RD LL2273f 767 0 0
+cttaaccggaaccccTCTGTTTATatcttGAACAAAGTTACATTATAAGA
+TGAGATGATGAGTGATGAGAAAGGAGAAAAAGATTTCTAAGAGGAGGAGA
+AGGTATCGATGATGGTGGTGTGAAGGGGATCACTGAGGTGAGTAGCCCAA
+TTGTTGAGTGGGCCTTTACCAGTAGCAGCGGCTTGAACAGCGAAGCCCAA
+GAAGGCGACCATGGCAAGACGAGCGTGCTTGATCTCAGCTAACTGAAGCT
+GAGCCATCTTCTCAGGGTCAGACGCTAAACCAAGCGGGTCGAAGAATTTG
+CCGCCAGGGTACAAACGCTTCTCAGAGTCAAGCTCAGCGTTGCGCTGGAA
+CTCGATGTAGCCGATCACTAACACCTCGATCCATATCAACGTCGAGATGG
+AGAAGGGTAGTGGCTGCCCCAAGTAGGACGATCCATCCACTAGCTCCACC
+TTTCCGGCgtttTgtcaAGTGACGCcggcGAGCCAtccgACGGagaGAGC
+GCCGAGAGTGGCGAGCATGGCCCACCgtncgcgaATCacctCGCATTCTC
+TGAACCtCTGGattccgAACACCtccctgcacGGCTGAAACGGAgttcat
+tcgcgggtcgCCTCttcGGTAcggcttcCGATCACgttctcggctccctc
+ctttcccacactctgccccactcactcacatcccattcgttcattttgcc
+cgcttcacctactttataggctcaatctgtctttttcccttatcgccctc
+tcccctctcttgttacc
+
+QA 28 526 14 637
+DS CHROMAT_FILE: LL2273f PHD_FILE: LL2273f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:36 2000
+
+CO Contig12 771 2 7 U
+agcgcacacccaccatatgatctttaagggaatgacttaacaactgttgg
+taatagttttgttgcttctcatataaCCCAATAGAAAAAAAAATTAATCA
+TCCCGCTTATGGAAAATTAACATGCTTTCAGTTTTATTATAAAAAGTTTC
+TAAGCATTAGCTCCAATGGGAACATTCTTGAGTCTTGATGTTCCGTAAAG
+GGTTTCTTCAAAGCGGATGATCTCGTTCTTTGAGCCATAAGCAACTTGAG
+TTCTATCGTCGAGGTTGACGATGGTCTTGTCGAGCACAGACTTGTATCCA
+TCACTGCTGAATCCACCAGCATTCTCAATGAGTAGGCCTAAAGGAGCCAC
+TTCGAACAAGAGTCTCAACTTTGCCTTAGCCGTAGGAGAAGTCACGTTTG
+TGAAGATTCCTTTCTCCTTCACAATAATCTGGTTAACGTCAGGAACCATT
+CCTCCTGTATATCTCAATGTGTACTTCTCTTTCACGTAGTAATCAATCAG
+CTTGCTGTATTCAGAATTGTCAAACGTGGCTCTCAAGTTTCCTGGTGAGA
+ACATTTTCCCTTCATTAATCTctgTGGTCTCCTTAACATGCTGCCATTTA
+CCTTCATCAAGA*AGCAAGAAC*TCATGAG*TTCC*TGGAAACCCTTTAA
+CAGCCAAAACATAAGTGgttcTTGGACCATagATTccCATGgCTGCAGcc
+ACttGATCTcctcccgTGACTccggTtaaCTTATCACCAGGCCAAACCGg
+ACgcgtGggCggacgcgtggc
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 40 39 37 35 35 35 37 37 40 46 51 56 56 56 56 40 40 37 40 40 35 35 35
+ 35 35 35 35 35 35 35 35 35 35 39 51 51 51 51 35 35 35 35 35 35 37 35 39 35 35 35 35 40 37 37 37 35 35 35 38 38 38 38 38 39 40 40 56 56 50 42 42 40 40
+ 40 40 40 40 42 37 37 40 35 35 35 35 35 43 40 51 51 56 56 51 51 51 51 51 43 41 45 45 45 51 51 45 45 45 45 45 43 43 43 42 42 38 43 43 40 41 45 56 56 56
+ 56 56 40 38 43 43 36 43 43 51 51 45 45 45 45 45 45 56 56 51 51 45 43 43 38 38 38 41 41 51 56 56 56 46 46 43 43 42 46 56 56 56 43 43 43 43 43 43 56 51
+ 45 43 43 43 43 43 43 45 45 45 51 51 51 56 56 43 43 43 43 43 43 43 51 56 56 56 51 51 45 45 45 45 45 43 43 43 43 43 43 43 43 43 43 45 51 51 51 51 51 51
+ 51 43 43 43 43 43 43 43 43 43 56 56 46 43 43 43 43 43 51 51 51 51 45 45 45 45 45 51 56 56 56 56 51 45 45 45 45 45 45 51 51 56 56 56 51 51 43 43 43 43
+ 43 43 43 51 51 51 56 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 56 56 56 50 50 46 46 51 51 51 51 51 51 46 46 50 50 50 56 56 45 45 45 45 45 51 56 56
+ 56 56 56 56 51 45 45 45 45 41 41 56 46 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 56 56 56 45 45 51 43 43 43 51 51 51 56 56 56 51 45 45 45 45 40 40
+ 51 51 51 51 51 56 56 56 56 56 45 45 45 45 45 45 56 56 50 50 50 45 45 40 45 40 42 56 56 56 56 56 51 51 51 51 51 51 56 56 56 56 48 56 44 44 42 43 45 45
+ 45 37 37 37 37 37 37 40 56 56 56 56 56 56 42 46 46 46 40 40 42 46 45 40 51 42 42 35 35 35 35 35 37 43 46 56 56 42 42 40 45 45 45 48 42 42 42 44 42 42
+ 42 42 42 42 42 56 56 42 46 42 40 38 35 35 38 40 40 38 42 37 48 16 16 11 21 21 40 40 40 44 40 40 40 34 32 29 34 48 32 32 29 29 25 25 27 29 33 33 34 34
+ 34 32 29 31 33 32 40 40 29 29 29 29 29 29 40 40 40 34 22 24 25 27 32 32 48 40 37 29 25 28 25 29 27 34 34 27 32 32 32 32 32 32 32 29 25 24 29 32 38 41
+ 38 38 35 42 39 37 27 26 25 20 25 24 20 15 10 10 17 44 46 50 48 70 45 42 40 24 18 18 22 24 23 18 18 27 40 29 22 19 23 25 25 25 25 24 19 15 21 23 18 18
+ 25 32 40 26 25 17 15 19 15 10 15 17 26 22 24 21 25 18 13 15 12 20 16 19 19 32 32 24 25 29 27 29 27 27 25 22 22 25 24 20 22 25 27 25 26 18 24 21 18 18
+ 18 18 22 13 13 21 18 18 17 17 13 11 19 15 0 0 0
+
+AF LL2274f U -3
+AF LL2274r C 4
+BS 1 652 LL2274f
+BS 653 660 LL2274r
+BS 661 661 LL2274f
+BS 662 663 LL2274r
+BS 664 671 LL2274f
+BS 672 679 LL2274r
+BS 680 771 LL2274f
+
+RD LL2274f 776 0 0
+ctgnagcgcacacccaccatatgatctttaagggaatgacttaacaactg
+ttggtaatagttttgttgcttctcatataaCCCAATAGAAAAAAAAATTA
+ATCATCCCGCTTATGGAAAATTAACATGCTTTCAGTTTTATTATAAAAAG
+TTTCTAAGCATTAGCTCCAATGGGAACATTCTTGAGTCTTGATGTTCCGT
+AAAGGGTTTCTTCAAAGCGGATGATCTCGTTCTTTGAGCCATAAGCAACT
+TGAGTTCTATCGTCGAGGTTGACGATGGTCTTGTCGAGCACAGACTTGTA
+TCCATCACTGCTGAATCCACCAGCATTCTCAATGAGTAGGCCTAAAGGAG
+CCACTTCGAACAAGAGTCTCAACTTTGCCTTAGCCGTAGGAGAAGTCACG
+TTTGTGAAGATTCCTTTCTCCTTCACAATAATCTGGTTAACGTCAGGAAC
+CATTCCTCCTGTATATCTCAATGTGTACTTCTCTTTCACGTAGTAATCAA
+TCAGCTTGCTGTATTCAGAATTGTCAAACGTGGCTCTCAAGTTTCCTGGT
+GAGAACATTTTCCCTTCATTAATCTctgTGGTCTCCTTAACATGCTGCCA
+TTTACCTTCATCAAGA*AGCAAGAAC*TCATGAG*TTCC*TGGAAACCCT
+TTAACAGCCAAAACATAAGTGgttcTTgGACCATagATTccCATGgCTGC
+AGccACttGATCTcctcccgTGACTccggTtaaCTTATCACCAGGCCAAA
+CCGgACgcgtGggCggacgcgtggcn
+
+QA 81 772 5 775
+DS CHROMAT_FILE: LL2274f PHD_FILE: LL2274f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:41 2000
+
+RD LL2274r 866 0 0
+atataactcttagaaacaacaattcagcagtacggacatggagaatcagc
+atgcgctcagtcggaccatacacagagtccaatcataggctgcaatgaga
+acagtatttagttgtgaagtgccgaacaagggattctcaaaacggacgat
+ctcgttcttgaagccataagcagcttgagctcgatcgtcgagtgtaacga
+cgctctcatcgagcgcataggtagtatccgacaccgaataagccaccacc
+actcgcaaaggaagccttaaaatagccgctgatacaaagctctccactat
+gcttcaccaggtagcatgaagcgcgcgctttgatgacagtcatcacgttc
+ctaccttgcgagcagagtatactactagagtgtatactgtcccagcgaga
+gcctatntccatacgctgtcactatgcacctaagcgtcgttacagtgctc
+aatttcccaggtagctaactcaatcacagcantgtagtaatcagatgatg
+tcggtcagtggctagctaaggttgtctgagagatgagcaattatctcctc
+atacatctagttgtattgtggtcgccatcaacattggagacatcatacat
+tcatcagaatagcaagaacgtcatgaggttccgtgtaatccgtttaACAG
+CCAAAACATAagaga*tcTTGGACCATacaatgccatgggggcaagcact
+*gctctcctccatgttgcaccggttaacaaaactaccagcccaatccatc
+cgtgtagccggaagcgtgctcgaacgcggagacgaacgcgcgggcggacg
+cgtgggtcgacccgggaatcccggaacggtaccgcaagtacccngagggn
+gnnnaaaaggttatac
+
+QA -1 -1 596 711
+DS CHROMAT_FILE: LL2274r PHD_FILE: LL2274r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:19 2000
+
+CO Contig13 319 2 1 U
+cttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTAAATTGTAAACCAAAAAATGATATTACATTACAT
+GTGTTAACATATTTCTACTAGAAATGACATCACTAGAACTCGAATAGACT
+TAAAATTAAAGAAAAGATGAATGACTGTTATTTACCATACAACGCAAGCG
+CAAGAGAATTGTTTTCCATGTATTCATACACGAGTAGCAATTGATTCTTC
+TCGACACAACATCCATAAAGCTTGACAAGGTTTGGATGGTTCAACCCTGA
+GATCATTCCGGACGCGTGG
+
+BQ
+ 15 15 15 36 46 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 45 45 45 45 45 45 45 56 71 71 71 71 71 71 71 71 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 83 83 82 83 78 83 90 85 90 90 90 90 85 84 84 84 84 84 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 83 83 83 83 83 83 90 85 90 90 90 90
+ 90 84 90 90 85 85 80 80 80 80 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 80 80 86 86 86 86 90 84 84 84 85 85 85 90 82 70 70 70 70 70 70 69 69 69
+ 69 72 75 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 78 78 78 82 82 82 83 83 83 90 83 83 83 83 78 78 78 78 78 78 78 82 83 83 83 89 89 83 90 90 89 82
+ 82 82 82 82 83 90 90 90 90 90 90 90 90 90 90 90 90 86 83 88 78 78 76 76 83 83 89 83 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 85 80 80 78 78 83 88 90 90 90 49 49 49 34 33 32
+
+AF LL2275r C -406
+AF LL2275f U 63
+BS 1 319 LL2275r
+
+RD LL2275r 771 0 5
+aaagggagcccccgatttagagcttgacggggaaagccggcgaacgtggc
+gagaaaggaagggaagaaagcgaaaggagcgggcgctagggcgctggcaa
+gtgtagcggtcacgctgcgggtaaccaccacacccgccgcgcttaatgcg
+ccgctacagggcgcgtcccattcgccattcagggtgcgcaactgttggga
+agggcgatcggtgcgggccttttcgctattacgccagctggcgaaagggg
+gatgtgctgcaaggcgattaagttgggtaacgccagggttttxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxcttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTAAATTGTAAACCAAAAAATGATATTAC
+ATTACATGTGTTAACATATTTCTACTAGAAATGACATCACTAGAACTCGA
+ATAGACTTAAAATTAAAGAAAAGATGAATGACTGTTATTTACCATACAAC
+GCAAGCGCAAGAGAATTGTTTTCCATGTATTCATACACGAGTAGCAATTG
+ATTCTTCTCGACACAACATCCATAAAGCTTGACAAGGTTTGGATGGTTCA
+ACCCTGAGATCATTCCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxatt
+
+QA 1 726 408 726
+DS CHROMAT_FILE: LL2275r PHD_FILE: LL2275r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:23 2000
+
+RT{
+LL2275r chimera phrap 1 292 000919:094547
+}
+
+RT{
+LL2275r matchElsewhereHighQual phrap 1 119 000919:094547
+}
+
+RT{
+LL2275r matchElsewhereHighQual phrap 121 183 000919:094547
+}
+
+RT{
+LL2275r matchElsewhereHighQual phrap 185 236 000919:094547
+}
+
+RT{
+LL2275r matchElsewhereHighQual phrap 222 292 000919:094547
+}
+
+RD LL2275f 767 0 2
+ctttaaggtgacaaCCAAAAAATGATATTACATTACATGTGTTAACATAT
+TTCTACTAGAAATGACATCACTAGAACTCGAATAGACTTAAAATTAAAGA
+AAAGATGAATGACTGTTATTTACCATACAACGCAAGCGCAAGAGAATTGT
+TTTCCATGTATTCATACACGAGTAGCAATTGATTCTTCTCGACACAACAT
+CCATAAAGCTTGACAAGGTTTGGATGGTTCAACCCTGAGATCATTCCGGA
+CGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxGCCTGGGGTGCCTAATGAGTGAGCTAACTC
+ACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTC
+GTGCCAGCTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGC
+GTATTGGGCGCCAGGGTGGTTTTTCTTTTCACCAGTGAGACGGGCAACAG
+CTGATTGCCCTTCACCGCCTGGCCCTGAGAGAGTTGCAGCAAGCGGTCcA
+CGCTGGTTTGCCCCAGCAGGCGAAAATCCTGTTTGATGGTGgttaACGGC
+GGGATATAACATGAGCTGTCTTCGGTATCGTCGTATCCCACTACCGAGAT
+ATCCGCACCAACGCGCA
+
+QA 15 767 13 257
+DS CHROMAT_FILE: LL2275f PHD_FILE: LL2275f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:46 2000
+
+RT{
+LL2275f chimera phrap 421 767 000919:094547
+}
+
+RT{
+LL2275f matchElsewhereHighQual phrap 421 767 000919:094547
+}
+
+CO Contig14 1056 2 32 U
+ccttttcttagaagcatatgataaaaccattgtagtatatcaagaagatg
+aagatggaagtttgttttcacagtgtcaatAGAAATTGTCATCAAACAAA
+GTCTACAAAGCTCTTTAATAAACTAGAAATGGCTTAAAGTGTAGGAGCTC
+TCTCAGCGGTTCCAGCTATGACGGTAAGCAAATTGTTTCCAAAAGGATCA
+CTGAGATGCTTTGAAAGGTTCTCAACAGGACCTTCTCCAGTAACATAAGC
+TTGGATGAAGAAACCGAGCATCGCAAACATGGCTAATCTCCCGTTCTTGA
+TCTCTTTAACCTTGAGGAGAGCTCCTTGTTCGGGGTCCTTAGCAAGTCCT
+AAAGGATCAAATGGACCTCCGGGGTGTAGCTTGTCCTCAAAATCCAATCC
+GTTGGTGATTCTGTAGTACTCGGCTCCACCGAGGAGAACAACCTCAGCAA
+CTACGGCGAGAACAAGGTTGATTGGGATGTTCTTGCCAAAGTAGTTCAAT
+GTGTTTCCATCAAGAAGCAGAGCACCAGTCTTGAACCAGACGGCTTCAGG
+ACCACAGTTAGCTCCGTATTTGTTTAAAGCTTCAGGGATGATGAAACCAG
+CTGCTCCCAACATAGCCCATCTCGCATGGATCAGCTCAAAGGCTTGGTAT
+TTAGCAAAGTTCTCAGGCTTCTTTCCAAGTCCAAATGGGTCATAGCCATA
+GTCTCCAGCAACTTCACCGTTTAAGTACTCTGGGATCTCTGATCTGTCCA
+AAAGACCATCCGGCAGGAAAACTCTCCTGTCAGGACCATACCACTTGGCG
+AGTTCATCGTTAGCAACAGAGACGGTTTTGGCCTTGGCAGGAGCTGGCTT
+TTTCTTGGAGAACAGAGCAACTGTCTTAAACGTAGCCGGGCTCGATGCCG
+ATGGAGCAGACGATCTTGACACTGCCTTGAAGTTAAGTGGCTTACCAAGC
+ATTTCCGAAACACCCATAGACGCCATCTTTGTACCCCAAAGTTTCAAGCT
+TTAACTCTGTTTCAAAACAAtgaaatgctaagtgaaaggaaggaacggac
+gcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 40 40 51 51 46 46 46 46 46 40 40 40 46 51 51 51 51 51 51
+ 51 39 39 39 39 39 40 51 40 39 39 39 39 35 39 39 35 35 35 39 38 35 35 35 35 35 35 40 51 39 35 35 35 35 35 35 39 39 51 43 43 43 43 43 43 43 35 35 35 35
+ 35 35 43 43 43 43 43 43 35 35 37 43 43 43 35 35 35 35 35 35 43 43 43 43 43 43 43 43 43 43 43 56 56 56 56 38 38 43 43 46 46 56 56 56 56 51 51 43 36 36
+ 36 36 36 43 51 43 35 35 35 35 35 43 56 56 56 56 56 51 45 45 45 45 45 45 51 51 51 43 43 43 43 43 43 45 45 45 45 45 51 56 56 56 51 45 45 45 45 43 43 45
+ 45 45 51 56 51 51 51 51 51 51 51 45 43 43 43 43 45 45 56 43 36 36 36 36 36 36 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 45 45 51 51 51 51 51
+ 51 51 51 51 45 45 45 45 45 45 45 45 51 51 56 56 56 56 71 58 58 58 58 67 65 67 78 75 73 80 80 80 77 67 69 69 60 60 60 66 69 76 74 76 69 69 69 60 60 63
+ 69 70 85 85 85 85 90 88 88 88 85 81 71 64 69 64 64 69 73 77 83 82 81 83 83 81 81 70 72 72 69 74 72 80 75 75 80 80 88 88 88 88 85 81 81 88 88 77 81 89
+ 79 74 74 85 88 83 88 85 85 85 90 90 78 80 85 88 78 76 85 81 85 90 85 79 77 90 90 90 79 79 90 85 80 80 80 80 75 75 76 76 76 80 80 80 83 83 83 77 63 63
+ 70 85 77 74 72 72 72 75 72 89 90 90 90 90 84 84 81 79 79 82 88 88 90 90 90 90 90 90 76 74 74 76 77 67 71 75 78 78 86 80 75 75 75 79 72 84 90 90 90 90
+ 90 90 90 90 85 80 90 90 90 90 90 90 90 90 90 90 90 90 90 90 83 83 78 83 77 77 76 83 88 88 90 90 77 72 72 72 72 88 88 88 90 84 84 86 88 88 90 90 90 90
+ 90 90 90 86 80 84 79 79 79 75 79 76 88 82 85 90 90 90 90 90 90 88 90 90 90 86 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 90 86 74 79 80
+ 74 72 83 83 81 78 78 90 85 85 74 75 75 75 75 79 86 90 90 90 80 69 69 69 67 70 83 85 90 90 90 83 77 77 72 72 81 81 85 90 83 83 69 65 64 66 74 69 85 83
+ 72 72 66 61 56 60 55 59 56 64 65 74 90 83 80 75 76 80 71 75 80 67 71 75 73 67 61 61 66 68 68 74 90 90 90 85 80 80 75 80 75 83 83 70 68 65 65 68 70 69
+ 76 83 72 80 70 67 72 68 65 67 69 78 74 74 77 81 77 76 72 66 66 62 60 60 64 68 68 72 68 68 64 69 74 73 74 90 90 90 90 90 90 90 90 76 77 67 71 66 71 78
+ 78 82 66 71 65 59 64 73 66 69 62 69 60 66 66 52 53 53 59 51 51 51 51 51 51 56 43 43 43 43 43 43 45 45 45 51 51 56 56 56 56 56 56 56 56 43 43 43 43 43
+ 43 43 43 43 43 51 45 45 43 43 43 43 43 43 43 43 51 51 51 51 45 45 43 43 43 43 43 43 51 51 51 51 51 51 43 43 43 43 43 43 45 45 45 45 45 56 56 56 56 56
+ 56 56 56 56 56 51 51 51 51 51 51 51 51 45 43 43 43 43 43 43 51 51 51 51 51 51 51 51 43 43 43 43 43 43 43 43 43 43 45 45 43 43 43 43 43 43 43 43 43 43
+ 43 43 43 35 35 35 35 35 35 43 43 43 43 43 51 51 51 51 51 51 51 51 51 51 51 51 51 51 43 43 43 43 43 40 40 40 40 38 39 40 40 40 40 40 46 39 39 39 39 39
+ 39 46 51 40 39 39 39 39 39 40 51 51 51 56 39 35 35 35 35 35 35 35 35 35 40 56 51 51 51 39 39 40 40 40 40 56 56 56 51 51 51 40 40 46 40 40 40 45 45 35
+ 40 40 40 40 35 51 40 40 40 40 46 46 46 46 46 51 51 51 40 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0
+
+AF LL2277f U 1
+AF LL2277r C 314
+BS 1 425 LL2277f
+BS 426 428 LL2277r
+BS 429 430 LL2277f
+BS 431 431 LL2277r
+BS 432 459 LL2277f
+BS 460 460 LL2277r
+BS 461 482 LL2277f
+BS 483 483 LL2277r
+BS 484 484 LL2277f
+BS 485 485 LL2277r
+BS 486 486 LL2277f
+BS 487 490 LL2277r
+BS 491 497 LL2277f
+BS 498 498 LL2277r
+BS 499 503 LL2277f
+BS 504 509 LL2277r
+BS 510 510 LL2277f
+BS 511 511 LL2277r
+BS 512 537 LL2277f
+BS 538 555 LL2277r
+BS 556 561 LL2277f
+BS 562 566 LL2277r
+BS 567 569 LL2277f
+BS 570 578 LL2277r
+BS 579 579 LL2277f
+BS 580 587 LL2277r
+BS 588 593 LL2277f
+BS 594 611 LL2277r
+BS 612 617 LL2277f
+BS 618 752 LL2277r
+BS 753 769 LL2277f
+BS 770 1056 LL2277r
+
+RD LL2277f 776 0 0
+ccttttcttagaagcatatgataaaaccattgtagtatatcaagaagatg
+aagatggaagtttgttttcacagtgtcaatAGAAATTGTCATCAAACAAA
+GTCTACAAAGCTCTTTAATAAACTAGAAATGGCTTAAAGTGTAGGAGCTC
+TCTCAGCGGTTCCAGCTATGACGGTAAGCAAATTGTTTCCAAAAGGATCA
+CTGAGATGCTTTGAAAGGTTCTCAACAGGACCTTCTCCAGTAACATAAGC
+TTGGATGAAGAAACCGAGCATCGCAAACATGGCTAATCTCCCGTTCTTGA
+TCTCTTTAACCTTGAGGAGAGCTCCTTGTTCGGGGTCCTTAGCAAGTCCT
+AAAGGATCAAATGGACCTCCGGGGTGTAGCTTGTCCTCAAAATCCAATCC
+GTTGGTGATTCTGTAGTACTCGGCTCCACCGAGGAGAACAACCTCAGCAA
+CTACGGCGAGAACAAGGTTGATTGGGATGTTCTTGCCAAAGTAGTTCAAT
+GTGTTTCCATCAAGAAGCAGAGCACCAGTCTTGAACCAGACGGCTTCAGG
+ACCACAGTTAGCTCCGTATTTGTTTAAAGCTTCAGGGATGATGAAACCAG
+CTGCTCCCAACATAGCCCATCTCGCATGGATCAGCTCAAAGGCTTGGTAT
+TTAGCAAAGTTCTCAGGCTTCTTTCCAAGTCCAAATGGGTCATAGCCATA
+GTCTCCAGCAACTTCACCGTTTAAGTACTCTGGGATCTCTGATCTGTCCA
+AAAGACCATCCGGCAGGAAAACTCTC
+
+QA 81 752 1 776
+DS CHROMAT_FILE: LL2277f PHD_FILE: LL2277f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:56 2000
+
+RD LL2277r 787 0 0
+nagGAGAGCTCCTTGTTCGGGGTCCTTAGCAAGTCCTAAAGGATCAAATG
+GACCTCCGGGGTGTAGCTTGTCCTCAAAATCCAATCCGTTGGTGATTCTG
+TAGTACTCGGCTCCACCGAGGAGAACAACCTCAGCAACTACGGCGAGAAC
+AAGGTTGATTGGGATGTTCTTGCCAAAGTAGTTCAATGTGTTTCCATCAA
+GAAGCAGAGCACCAGTCTTGAACCAGACGGCTTCAGGACCACAGTTAGCT
+CCGTATTTGTTTAAAGCTTCAGGGATGATGAAACCAGCTGCTCCCAACAT
+AGCCCATCTCGCATGGATCAGCTCAAAGGCTTGGTATTTAGCAAAGTTCT
+CAGGCTTCTTTCCAAGTCCAAATGGGTCATAGCCATAGTCTCCAGCAACT
+TCACCGTTTAAGTACTCTGGGATCTCTGATCTGTCCAAAAGACCATCCGG
+CAGGAAAACTCTCCTGTCAGGACCATACCACTTGGCGAGTTCATCGTTAG
+CAACAGAGACGGTTTTGGCCTTGGCAGGAGCTGGCTTTTTCTTGGAGAAC
+AGAGCAACTGTCTTAAACGTAGCCGGGCTCGATGCCGATGGAGCAGACGA
+TCTTGACACTGCCTTGAAGTTAAGTGGCTTACCAAGCATTTCCGAAACAC
+CCATAGACGCCATCTTTGTACCCCAAAGTTTCAAGCTTTAACTCTGTTTC
+AAAACAAtgaaatgctaagtgaaaggaaggaacggacgcgtggxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 6 707 2 743
+DS CHROMAT_FILE: LL2277r PHD_FILE: LL2277r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:33 2000
+
+CO Contig15 728 2 1 U
+taaccccgccccaaaggaagttaagaaagggaaaggggcggggctaggtc
+cctctcaagtgtgcggtttttttgacggaacaaggggggccctttgttaa
+ttccccctacaggaaaagggaattcgctcaaaggggtttacatttgttag
+caacgggtttttcccagggccttttggttaaaacgccggccgtgaaaaag
+agtatgtgggcacacaaaaaaaagtttggtaacccacgggccttgccagt
+aaattgggtgctattaaaaggcccgccccatttatttaggttggctttat
+aaaagatttatttttttgcattgtttcgcgtTTTttatTTtgtTtTTTTt
+ttatttttTttttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTCAGACTCTACCAAACGTAAAATAAAACCTGCT
+TGTAGCACAACTCCAGGCAACAACAACAAGCAAATTACAATCTCTAATAT
+ATTTGCCTAGTGACTGACAATAGGCAAATAGGGATAGAGACCCCTTGAAG
+ATGTAGTAGTAGCTAAGACTAAGATTTATGTGCTCTGTGATCAGGCCTTA
+ACCTCCTCTCCACCACCACCAGATCCTCCTCCAGCTTTCTCTCTCTCTTT
+CTCAGCATCCTCTTCTTCTCCTTCCTCAGCATCGGCTTCATCTGCTAGCT
+TTGTGAGCTCATCTTGGCGGACGCGTGG
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 16 25 23 24 16 12 12 12 22 33 12 11 11 20 19 29 29 33 44 19
+ 16 11 11 11 19 12 17 17 24 19 15 15 15 21 28 48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 56 56 56 56 71 75 82 75 72 72 77 75 75 77 70 70 70 74 76 70 71 71 74 78 90 90 77
+ 80 72 74 77 77 77 83 83 80 80 90 90 89 89 82 82 82 76 83 83 85 85 85 90 90 90 90 90 90 90 90 89 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 82 82 83 83 89 83 85 85 85 85 85 85 82 82 82 83 83 83 88 90 82 80 80 80 80 80 82 82 82 85 82 82 82 85 90 88 88 90 90 90 90 90 90 90
+ 90 85 80 80 78 78 78 78 78 78 78 78 78 78 86 86 86 86 86 80 80 80 80 89 82 88 90 78 78 78 73 73 78 75 73 78 78 80 86 89 88 88 81 81 86 83 76 76 75 75
+ 75 75 90 90 90 90 90 90 90 86 86 86 90 90 90 90 90 90 90 88 88 85 85 85 85 85 85 90 90 90 90 90 84 84 80 80 87 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 77 76 76 76 76 76 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 82 72 76 76 71 71 71 72 72 78 82 86 83 82 82 76 75 75 75 77 77 78
+ 78 78 82 81 81 79 74 89 67 66 66 69 75 75 75 75 75 75 70 70 76 75 44 44 44 29 29 34
+
+AF LL2279r C 1
+AF LL2279f U 412
+BS 1 728 LL2279r
+
+RD LL2279r 772 0 0
+taaccccgccccaaaggaagttaagaaagggaaaggggcggggctaggtc
+cctctcaagtgtgcggtttttttgacggaacaaggggggccctttgttaa
+ttccccctacaggaaaagggaattcgctcaaaggggtttacatttgttag
+caacgggtttttcccagggccttttggttaaaacgccggccgtgaaaaag
+agtatgtgggcacacaaaaaaaagtttggtaacccacgggccttgccagt
+aaattgggtgctattaaaaggcccgccccatttatttaggttggctttat
+aaaagatttatttttttgcattgtttcgcgtTTTttatTTtgtTtTTTTt
+ttatttttTttttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTCAGACTCTACCAAACGTAAAATAAAACCTGCT
+TGTAGCACAACTCCAGGCAACAACAACAAGCAAATTACAATCTCTAATAT
+ATTTGCCTAGTGACTGACAATAGGCAAATAGGGATAGAGACCCCTTGAAG
+ATGTAGTAGTAGCTAAGACTAAGATTTATGTGCTCTGTGATCAGGCCTTA
+ACCTCCTCTCCACCACCACCAGATCCTCCTCCAGCTTTCTCTCTCTCTTT
+CTCAGCATCCTCTTCTTCTCCTTCCTCAGCATCGGCTTCATCTGCTAGCT
+TTGTGAGCTCATCTTGGCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxat
+
+QA 330 728 1 728
+DS CHROMAT_FILE: LL2279r PHD_FILE: LL2279r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:42 2000
+
+RD LL2279f 779 0 5
+ncttttaagacaactACCAAACGTAAAATAAAACCTGCTTGTAGCACAAC
+TCCAGGCAACAACAACAAGCAAATTACAATCTCTAATATATTTGCCTAGT
+GACTGACAATAGGCAAATAGGGATAGAGACCCCTTGAAGATGTAGTAGTA
+GCTAAGACTAAGATTTATGTGCTCTGTGATCAGGCCTTAACCTCCTCTCC
+ACCACCACCAGATCCTCCTCCAGCTTTCTCTCTCTCTTTCTCAGCATCCT
+CTTCTTCTCCTTCCTCAGCATCGGCTTCATCTGCTAGCTTTGTGAGCTCA
+TCTTGGCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxcgtcaaagcctggggtgcctaatgag
+cgagctaactcacattaattgcgctgcgctcactgcccgctttccagtcg
+ggacgcctgccgtgccacctgcattaacgcactccccatcgcgcggggag
+aggcggcttgcgcactgtccgccacggcggctctctttctcaccactgag
+acggtcgacacccgcccgccctcttccgtctcgtcccgtcacagccgctc
+cgtgccgcctcatcctcgatcgtcccggcgggctcatcgttctgcccgct
+cgcgcgctcgtcccccttttccgctcccc
+
+QA 17 545 14 317
+DS CHROMAT_FILE: LL2279f PHD_FILE: LL2279f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:05 2000
+
+RT{
+LL2279f chimera phrap 481 654 000919:094547
+}
+
+RT{
+LL2279f matchElsewhereHighQual phrap 590 606 000919:094547
+}
+
+RT{
+LL2279f matchElsewhereHighQual phrap 525 553 000919:094547
+}
+
+RT{
+LL2279f matchElsewhereHighQual phrap 502 523 000919:094547
+}
+
+RT{
+LL2279f matchElsewhereHighQual phrap 482 500 000919:094547
+}
+
+CO Contig16 440 2 50 U
+CCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTAAAGTAAAACCAACGAAAGACTCA*ACAGCTTG
+AAAACCCCATAAAAAAAAACAGAGTTTAGAGTTTCCAAAGAAGAAAACCA
+AATCATGGTCTTCTCTTTGCTTCTACAAACCTCAAATCATAATGTTTTAA
+GAAACTAAAGCCAAATCCATACCATACATACTTAGAAATGTCTTGCTCGC
+TCCCTTCCTCTGATCACTTCAACAACTACGCTTGTTTCCCTTCTTCAGGC
+TCGTCTTTTGACGCCTCCTTGATCTCATCACCACCCACCTCATCGTTGAG
+GTCAGAAGTCCAGAGGGTCAGATTGTCACGGAGAAGTTGCATTATCAGTG
+TACTGTCTTTGTATGATTCTTCTCCTAATCGGACGCGTGG
+
+BQ
+ 27 42 47 50 50 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 41 45 43 43 43 43 43 43 56 56 46 43 51 43 46 47 82 75 75 65 56 56 56 56 51 51 66 66 58 77 79 84 77
+ 79 90 90 90 90 90 79 79 79 79 85 90 90 90 90 90 90 90 90 86 80 80 80 80 82 90 90 90 90 90 90 90 90 90 90 90 86 82 90 90 90 90 90 86 82 90 90 90 90 90
+ 90 90 90 90 80 80 87 80 85 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 82 82 85 90 90 90 90 90 90 86 86 86 86
+ 90 90 86 86 86 78 71 75 76 76 76 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 90 88 88 88 88 90 90 90 90 90 88 85 88 78 90 86 78 78 78 78 78 78
+ 86 90 90 90 90 90 90 88 88 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86 86 86 86 79 86 88 90 90 90 90 90 90 90 90 90 90 90 90 87 86 86 88 79
+ 77 77 79 79 86 90 90 90 86 78 78 78 78 78 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 83 83 83 90 84 84 84 84 84 85 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 85 90 86 86 82 90 90 90 90 90 82 82 90 90 90 90 85 85 86 84 77 74 76 76 76 76 90 90 90 90 90 90 78 76 81
+ 85 83 82 77 79 80 80 81 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 76 76 90 80 90 90 90 58 58 58 43 51 56
+
+AF LL2280r C -296
+AF LL2280f U 66
+BS 1 82 LL2280r
+BS 83 87 LL2280f
+BS 88 139 LL2280r
+BS 140 144 LL2280f
+BS 145 157 LL2280r
+BS 158 159 LL2280f
+BS 160 160 LL2280r
+BS 161 161 LL2280f
+BS 162 162 LL2280r
+BS 163 166 LL2280f
+BS 167 184 LL2280r
+BS 185 185 LL2280f
+BS 186 191 LL2280r
+BS 192 195 LL2280f
+BS 196 208 LL2280r
+BS 209 212 LL2280f
+BS 213 215 LL2280r
+BS 216 217 LL2280f
+BS 218 220 LL2280r
+BS 221 228 LL2280f
+BS 229 229 LL2280r
+BS 230 230 LL2280f
+BS 231 234 LL2280r
+BS 235 242 LL2280f
+BS 243 257 LL2280r
+BS 258 263 LL2280f
+BS 264 276 LL2280r
+BS 277 287 LL2280f
+BS 288 288 LL2280r
+BS 289 295 LL2280f
+BS 296 299 LL2280r
+BS 300 316 LL2280f
+BS 317 319 LL2280r
+BS 320 328 LL2280f
+BS 329 337 LL2280r
+BS 338 361 LL2280f
+BS 362 364 LL2280r
+BS 365 371 LL2280f
+BS 372 372 LL2280r
+BS 373 378 LL2280f
+BS 379 382 LL2280r
+BS 383 386 LL2280f
+BS 387 394 LL2280r
+BS 395 404 LL2280f
+BS 405 405 LL2280r
+BS 406 409 LL2280f
+BS 410 410 LL2280r
+BS 411 431 LL2280f
+BS 432 434 LL2280r
+BS 435 440 LL2280f
+
+RD LL2280r 782 0 5
+cacgctgcgcgtaaccaccacaccccccgcgcttaatgcgccgctacagg
+gcgcgtcccattcgccattcaggctgcgcaactgttgggaagggcgatcg
+gtgcgggccttttcgctattacgccaactggcgaaagggggatgtgctgc
+aaggcgattaagttgggtaacgccagggttttxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxCCT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTAAAGTAAAACCAACGAAAGACTCA*ACAGCTTGAAA
+ACCCCATAAAAAAAAACAGAGTTTAGAGTTTCCAAAGAAGAAAACCAAAT
+CATGGTCTTCTCTTTGCTTCTACAAACCTCAAATCATAATGTTTTAAGAA
+ACTAAAGCCAAATCCATACCATACATACTTAGAAATGTCTTGCTCGCTCC
+CTTCCTCTGATCACTTCAACAACTACGCTTGTTTCCCTTCTTCAGGCTCG
+TCTTTTGACGCCTCCTTGATCTCATCACCACCCACCTCATCGTTGAGGTC
+AGAAGTCCAGAGGGTCAGATTGTCACGGAGAAGTTGCATTATCAGTGTAC
+TGTCTTTGTATGATTCTTCTCCTAATCGGACGCGTGGxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxatt
+
+QA 94 737 298 737
+DS CHROMAT_FILE: LL2280r PHD_FILE: LL2280r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:10 2000
+
+RT{
+LL2280r chimera phrap 1 182 000919:094547
+}
+
+RT{
+LL2280r matchElsewhereHighQual phrap 1 25 000919:094547
+}
+
+RT{
+LL2280r matchElsewhereHighQual phrap 27 110 000919:094547
+}
+
+RT{
+LL2280r matchElsewhereHighQual phrap 75 126 000919:094547
+}
+
+RT{
+LL2280r matchElsewhereHighQual phrap 128 182 000919:094547
+}
+
+RD LL2280f 777 0 3
+ctttaagtaacccaACGAAAGACTcanacAGCTTGAAAACCCCATAAAAA
+AAAACAGAGTTTAGAGTTTCCAAAGAAGAAAACCAAATCATGGTCTTCTC
+TTTGCTTCTACAAACCTCAAATCATAATGTTTTAAGAAACTAAAGCCAAA
+TCCATACCATACATACTTAGAAATGTCTTGCTCGCTCCCTTCCTCTGATC
+ACTTCAACAACTACGCTTGTTTCCCTTCTTCAGGCTCGTCTTTTGACGCC
+TCCTTGATCTCATCACCACCCACCTCATCGTTGAGGTCAGAAGTCCAGAG
+GGTCAGATTGTCACGGAGAAGTTGCATTATCAGTGTACTGTCTTTGTATG
+ATTCTTCTCCTAATCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxgcctggggtgcc
+taatgagtgagctaactcacattaattgcgttgcgctcactgcccgcttt
+ccagtcgggaaacctgtcgtgccagctgcattaatgaatcggccaacgcg
+cggggagaggcggcttgcgtattgggcgccagggtggtttttcttttcac
+cagtgagacgggcaacagctgattgcccttcaccgcctggccctgagaga
+gttgcagcaagcggtccacgctggttt
+
+QA 29 755 12 375
+DS CHROMAT_FILE: LL2280f PHD_FILE: LL2280f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:32 2000
+
+RT{
+LL2280f chimera phrap 539 777 000919:094547
+}
+
+RT{
+LL2280f matchElsewhereHighQual phrap 665 777 000919:094547
+}
+
+RT{
+LL2280f matchElsewhereHighQual phrap 539 663 000919:094547
+}
+
+CO Contig17 969 2 46 U
+cttttagaaacgaggttcggtttctgataaaatcatacaaagatacgaca
+aaggaagtttcacaaatatacaaacaaactCTCTTTCACATGATAATTAA
+CCTTTTAACTCCTTCACTTTCCGGGGACGAAGTTGGTGGCAAAGGCCCAT
+GCATTGTTGTTGACTGGATCAGCCAAATGGTCAGCAAGATTCTCCAACGG
+TCCCTTTCCGGTGACGATAGCCTGAACGAAGAATCCAAACATAGAGAACA
+TAGCCAACCTTCCGTTCTTGATCTCCTTCACCTTCAACTCCGCGAAAGCT
+TCTGGGTCGGTAGCAAGACCCAACGGGTCAAAGCTGCCTCCTGGGTAGAG
+CAAGTCCTCTGCTTCCCCCAATGGTCCTTCTCCGGCGACTCTGTAACCTT
+CAACAGCTCCCATGAGGATCACCTGAGTAGCCCAAATGGCTAAGATGCTC
+TGTGCGTGGACCAAGCTCGGGTTTCCCAAGTAGTCGAGCCCTCCTTCGCT
+GAAGATCTGTGAACCAGCCTTGAACCAAACAGCTTCTCCGAACTTCACTC
+CGTTCCTAGCCAAAAGCTCAGGGAAAACGCAGCCTAGGGCTCCAAGCATG
+GCCCATCTGCAGTGGATAACTTCTAGCTCACGGTTCCTGGCGAAGGTCTC
+GGGGTCAGCGGAAAGACCGGCGGTGTCCCATCCGTAGTCACCGGGGAACT
+CTCCTGTGAGGTAGCTTGGGGGCTCTCCGGAGAATGGACCCAAGTACTTG
+ACCCTGTCGGATCCGTACCATGGGCTGCCAGATGGACCGGTGGGTTTGAC
+GGTTTTTCTCATGGTGACACGGCCGGTTCCAAAGACCTCTGAGGCTCCCG
+GTGAGAGCTTCACGGCCTTTCCGGTAAAGGCAGGGGAGGACAGAGCCATG
+GTTGAGGCAGCCATTAGAAATTTTCTTGATCTTttttagtttttgtgttt
+tttttttcggacggcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 56 56 56 40 40 40 40 39 39 40 51 51 51 51 35 35 35 35 35
+ 35 45 45 40 46 46 40 40 40 45 45 45 46 46 40 40 40 40 45 45 45 45 45 51 40 39 35 35 35 35 35 35 56 56 56 56 40 40 40 40 40 40 40 40 40 40 40 40 43 43
+ 43 43 43 43 56 56 51 51 51 43 43 43 43 43 43 43 45 51 45 43 40 40 43 37 40 41 51 56 56 56 56 37 38 38 38 38 38 42 43 41 41 41 41 41 38 36 36 36 38 36
+ 40 42 42 42 41 45 40 38 38 38 41 38 38 36 36 36 36 36 40 43 43 43 51 40 36 36 36 36 51 50 71 73 79 79 79 75 85 77 74 68 73 69 74 79 83 82 70 63 60 56
+ 56 61 64 60 65 62 62 62 62 62 67 67 69 70 67 67 70 67 74 80 90 90 82 80 75 76 75 68 72 65 60 60 60 70 77 72 72 72 75 61 61 66 67 67 63 69 70 77 70 65
+ 63 66 64 60 66 62 62 74 72 68 72 83 85 85 85 83 83 80 77 69 65 60 63 67 69 61 68 78 80 78 88 73 72 75 72 72 67 69 68 83 83 72 90 74 80 82 82 78 79 85
+ 85 85 85 90 90 86 90 90 85 83 90 90 90 90 90 90 90 88 88 88 83 86 86 85 83 81 81 85 85 85 90 89 84 74 74 74 79 79 85 89 85 90 90 85 80 80 85 90 90 90
+ 90 90 89 89 85 85 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 80 80 85 80 88 88 90 90 90 90 90 90 83 83 80 81 90 90 90 90 90 90 90 85 90
+ 90 90 83 83 83 83 83 83 90 90 90 90 90 90 90 88 88 88 83 90 90 90 90 90 90 90 90 90 90 90 90 88 85 85 84 84 84 84 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 80 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 83 83 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 83 83 83 83 83 88 90 90 90 90 90 88 86 86 86 90 86 90 90 90 90 90 88 83
+ 80 80 82 90 90 90 90 90 87 87 87 87 90 87 90 90 90 90 90 90 90 90 87 90 87 85 85 85 85 78 79 79 85 79 83 83 83 79 71 70 71 72 74 68 67 67 72 83 83 83
+ 85 90 90 89 87 76 68 68 68 64 68 75 90 72 80 68 70 63 65 62 72 72 72 70 75 68 67 68 76 90 74 74 72 72 83 83 74 72 68 70 72 70 64 68 77 90 80 74 74 80
+ 80 76 77 64 72 74 72 63 61 65 62 68 66 70 66 60 71 71 66 60 60 70 72 68 66 66 64 64 71 69 68 66 66 66 71 74 76 79 81 76 69 63 72 72 75 75 74 70 67 70
+ 70 67 73 64 76 71 69 68 65 65 68 70 65 63 67 66 64 69 70 81 81 70 74 74 62 62 59 53 53 58 51 51 51 56 56 51 51 43 43 43 43 43 43 51 51 51 51 45 45 35
+ 35 35 35 43 43 56 56 56 56 56 56 43 43 43 43 43 43 43 43 43 43 43 43 35 35 35 35 39 40 56 56 45 45 45 45 45 51 56 46 40 40 40 40 40 40 40 39 45 35 35
+ 35 35 35 35 39 39 39 39 40 51 40 40 39 39 39 39 40 46 51 45 45 40 35 35 35 39 39 39 39 39 39 39 39 40 40 40 40 40 40 51 40 39 39 39 39 39 35 39 39 39
+ 39 39 40 40 35 35 32 32 33 33 39 39 46 46 46 46 46 46 51 51 51 51 51 51 40 40 37 40 40 40 46 46 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2281f U 1
+AF LL2281r C 227
+BS 1 317 LL2281f
+BS 318 318 LL2281r
+BS 319 360 LL2281f
+BS 361 365 LL2281r
+BS 366 408 LL2281f
+BS 409 424 LL2281r
+BS 425 441 LL2281f
+BS 442 448 LL2281r
+BS 449 449 LL2281f
+BS 450 450 LL2281r
+BS 451 458 LL2281f
+BS 459 468 LL2281r
+BS 469 471 LL2281f
+BS 472 475 LL2281r
+BS 476 476 LL2281f
+BS 477 477 LL2281r
+BS 478 500 LL2281f
+BS 501 536 LL2281r
+BS 537 540 LL2281f
+BS 541 549 LL2281r
+BS 550 554 LL2281f
+BS 555 555 LL2281r
+BS 556 558 LL2281f
+BS 559 560 LL2281r
+BS 561 561 LL2281f
+BS 562 564 LL2281r
+BS 565 568 LL2281f
+BS 569 572 LL2281r
+BS 573 573 LL2281f
+BS 574 581 LL2281r
+BS 582 582 LL2281f
+BS 583 593 LL2281r
+BS 594 599 LL2281f
+BS 600 612 LL2281r
+BS 613 613 LL2281f
+BS 614 623 LL2281r
+BS 624 624 LL2281f
+BS 625 651 LL2281r
+BS 652 653 LL2281f
+BS 654 654 LL2281r
+BS 655 656 LL2281f
+BS 657 662 LL2281r
+BS 663 663 LL2281f
+BS 664 776 LL2281r
+BS 777 780 LL2281f
+BS 781 969 LL2281r
+
+RD LL2281f 785 0 0
+cttttagaaacgaggttcggtttctgataaaatcatacaaagatacgaca
+aaggaagtttcacaaatatacaaacaaactCTCTTTCACATGATAATTAA
+CCTTTTAACTCCTTCACTTTCCGGGGACGAAGTTGGTGGCAAAGGCCCAT
+GCATTGTTGTTGACTGGATCAGCCAAATGGTCAGCAAGATTCTCCAACGG
+TCCCTTTCCGGTGACGATAGCCTGAACGAAGAATCCAAACATAGAGAACA
+TAGCCAACCTTCCGTTCTTGATCTCCTTCACCTTCAACTCCGCGAAAGCT
+TCTGGGTCGGTAGCAAGACCCAACGGGTCAAAGCTGCCTCCTGGGTAGAG
+CAAGTCCTCTGCTTCCCCCAATGGTCCTTCTCCGGCGACTCTGTAACCTT
+CAACAGCTCCCATGAGGATCACCTGAGTAGCCCAAATGGCTAAGATGCTC
+TGTGCGTGGACCAAGCTCGGGTTTCCCAAGTAGTCGAGCCCTCCTTCGCT
+GAAGATCTGTGAACCAGCCTTGAACCAAACAGCTTCTCCGAACTTCACTC
+CGTTCCTAGCCAAAAGCTCAGGGAAAACGCAGCCTAGGGCTCCAAGCATG
+GCCCATCTGCAGTGGATAACTTCTAGCTCACGGTTCCTGGCGAAGGTCTC
+GGGGTCAGCGGAAAGACCGGCGGTGTCCCATCCGTAGTCACCGGGGAACT
+CTCCTGTGAGGTAGCTTGGGGGCTCTCCGGAGAATGGACCCAAGTACTTG
+ACCCTGTCGGATCCGTACCATGGGCTGCCAGATgg
+
+QA 81 776 1 785
+DS CHROMAT_FILE: LL2281f PHD_FILE: LL2281f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:36 2000
+
+RD LL2281r 787 0 0
+CGAAGAATCCAAACATAGAGAACATAGCCAACCTTCCGTTCTTGATCTCC
+TTCACCTTCAACTCCGCGAAAGCTTCTGGGTCGGTAGCAAGACCCAACGG
+GTCAAAGCTGCCTCCTGGGTAGAGCAAGTCCTCTGCTTCCCCCAATGGTC
+CTTCTCCGGCGACTCTGTAACCTTCAACAGCTCCCATGAGGATCACCTGA
+GTAGCCCAAATGGCTAAGATGCTCTGTGCGTGGACCAAGCTCGGGTTTCC
+CAAGTAGTCGAGCCCTCCTTCGCTGAAGATCTGTGAACCAGCCTTGAACC
+AAACAGCTTCTCCGAACTTCACTCCGTTCCTAGCCAAAAGCTCAGGGAAA
+ACGCAGCCTAGGGCTCCAAGCATGGCCCATCTGCAGTGGATAACTTCTAG
+CTCACGGTTCCTGGCGAAGGTCTCGGGGTCAGCGGAAAGACCGGCGGTGT
+CCCATCCGTAGTCACCGGGGAACTCTCCTGTGAGGTAGCTTGGGGGCTCT
+CCGGAGAATGGACCCAAGTACTTGACCCTGTCGGATCCGTACCATGGGCT
+GCCAGATGGACCGGTGGGTTTGACGGTTTTTCTCATGGTGACACGGCCGG
+TTCCAAAGACCTCTGAGGCTCCCGGTGAGAGCTTCACGGCCTTTCCGGTA
+AAGGCAGGGGAGGACAGAGCCATGGTTGAGGCAGCCATTAGAAATTTTCT
+TGATCTTttttagtttttgtgttttttttttcggacggcgtggxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 1 707 1 743
+DS CHROMAT_FILE: LL2281r PHD_FILE: LL2281r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:15 2000
+
+CO Contig18 491 2 1 U
+GCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGCGCTC
+ACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAA
+TCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGGTGGT
+TTTTCTTTTCACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACCGCCT
+GGCCCTGAGAGAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAGCAGG
+CGAAAATCCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAGCTGTC
+TTCGGTATCGTCGTATCCCACTACCGAGATATCCGCACCAACGCGCAGCC
+CGGACTCGGTAATGGCGCGCATTGCGCCCAGCGCCATCTGATCGTTGGCA
+ACCAGCATCGCAGTGGGAACGATGCCCTCATTCAGCATTTGCATGgtttg
+ttgaAAACCGGACATGGCActccaGTcgccttCccgttccg
+
+BQ
+ 43 43 43 43 51 51 56 43 43 43 43 43 43 45 45 56 56 56 56 56 51 43 43 43 43 43 43 43 43 43 43 45 45 45 51 51 51 43 36 36 36 36 36 36 36 36 36 36 36 36
+ 36 36 36 36 43 45 43 43 43 43 43 45 56 51 51 51 51 56 56 51 51 51 45 45 45 45 45 56 51 51 45 45 45 45 45 43 45 43 43 43 43 45 45 51 51 51 56 56 56 51
+ 51 45 45 45 45 51 51 51 45 45 45 45 45 45 51 56 56 56 56 51 45 45 45 45 45 51 51 43 43 43 43 43 43 56 56 56 43 43 43 43 43 43 40 43 43 46 56 56 56 56
+ 56 56 56 56 56 56 56 51 51 45 45 45 45 51 51 56 56 51 45 45 45 45 45 51 56 56 56 56 45 45 51 45 45 45 45 45 45 45 45 45 51 45 45 45 40 39 35 35 35 39
+ 40 45 45 45 45 51 51 51 51 51 51 51 56 40 40 40 40 37 40 56 56 45 45 45 45 45 45 51 40 40 39 39 39 40 40 40 51 39 39 37 37 37 39 46 56 56 51 51 45 45
+ 45 45 45 45 56 51 51 51 51 51 51 56 56 56 56 51 51 51 51 51 51 35 35 35 35 35 39 46 42 42 42 51 46 56 46 46 51 51 51 51 56 56 56 51 51 45 45 35 35 35
+ 40 40 40 40 46 56 40 40 40 40 40 36 40 40 40 34 29 28 32 32 32 36 36 40 40 40 40 40 40 40 40 40 33 32 29 29 27 27 25 25 28 32 32 32 32 32 32 32 29 32
+ 32 26 25 25 27 27 29 32 44 40 40 40 34 26 25 25 27 32 32 34 40 39 37 29 29 29 29 29 29 42 35 35 29 32 29 32 40 40 40 40 40 32 32 29 24 24 29 29 29 25
+ 25 29 29 26 26 32 32 29 29 29 29 27 27 25 25 29 25 25 25 29 32 30 30 29 29 24 25 25 25 26 32 32 34 32 40 40 40 34 22 22 24 24 25 24 22 19 19 10 17 9
+ 9 9 19 16 25 24 22 25 25 26 20 23 25 27 25 25 21 25 21 19 9 10 10 19 21 22 19 15 10 11 14 17 21 12 15 0 0 0 0 0 0
+
+AF LL2283r C 1
+AF LL2283f U -303
+BS 1 491 LL2283f
+
+RD LL2283r 800 1 5
+aagggcgaaaaaccgtttatcagggcgatgggcccctacgtgaaccatcc
+ccctaatcaagttttttggggtcgaggtgccgtaaagcccttaattggaa
+ccctaaagggagccccccatttaaaactttacggggaaaaccggcgaacg
+tggcgaaaaaagaagggaaaaaaaccaaaagagcgggcgctaaggccctg
+gcaaatgtaacggtccccctgggggtaaccccccccccccccgcgcttta
+tgggccggttcagggggggttccctttgccatttagggtgggcaaatttt
+tggaaaggcgattggttcgggccttttttttttttccccaactggggaaa
+aggggatttttttcaaagcgatttaatttggtaaccccagggtttttxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxCCCCCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTGGTGCAACAAACACAAAAGCATAATCTCTCA
+GATTAATAAATAATAACGAGCTTTAACTCCAATCTGCAGAGAACATTTCA
+AGCACATGGGAGAAGCTCTGCAGAACAAGAAAGATCATATCTTCCCGGAC
+gctggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 265 752 -1 -1
+DS CHROMAT_FILE: LL2283r PHD_FILE: LL2283r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:24 2000
+
+WR{
+LL2283r unaligned phrap 000919:094547
+}
+
+RT{
+LL2283r chimera phrap 1 397 000919:094547
+}
+
+RT{
+LL2283r matchElsewhereLowQual phrap 131 179 000919:094547
+}
+
+RT{
+LL2283r matchElsewhereLowQual phrap 194 225 000919:094547
+}
+
+RT{
+LL2283r matchElsewhereHighQual phrap 336 360 000919:094547
+}
+
+RT{
+LL2283r matchElsewhereHighQual phrap 383 397 000919:094547
+}
+
+RD LL2283f 796 0 2
+ngtgnngggaacAAACACAAAAGCATAATCTCTCAGATTAATAAATAATA
+ACGAGCTTTAACTCCAATCTGCAGAGAACATTTCAAGCACATGGGAGAAG
+CTCTGCAGAACAAGAAAGATCATATCTTCCCGGACGCGTGGxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGC
+GCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAA
+TGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCCAGGG
+TGGTTTTTCTTTTCACCAGTGAGACGGGCAACAGCTGATTGCCCTTCACC
+GCCTGGCCCTGAGAGAGTTGCAGCAAGCGGTCCACGCTGGTTTGCCCCAG
+CAGGCGAAAATCCTGTTTGATGGTGGTTAACGGCGGGATATAACATGAGC
+TGTCTTCGGTATCGTCGTATCCCACTACCGAGATATCCGCACCAACGCGC
+AGCCCGGACTCGGTAATGGCGCGCATTGCGCCCAGCGCCATCTGATCGTT
+GGCAACCAGCATCGCAGTGGGAACGATGCCCTCATTCAGCATTTGCATGg
+tttgttgaAAACCGGACATGGCActccaGTcgccttCccgttccgn
+
+QA 11 789 305 795
+DS CHROMAT_FILE: LL2283f PHD_FILE: LL2283f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:46 2000
+
+RT{
+LL2283f chimera phrap 305 651 000919:094547
+}
+
+RT{
+LL2283f matchElsewhereHighQual phrap 305 512 000919:094547
+}
+
+CO Contig19 911 2 62 U
+ctttaagggaacaagagttgaatataaagtcacaatatagcagacagaaa
+tcatgatcaaataaacaaatatttttttaaTACAGAAAACAATACAAAAA
+GACAGCTGGGCCAAGCTAATCGAGCATGTGTGTCTCTTCGCCATCATTAC
+CTGTCTCCGGCACAGCCGAGCTGGAGCCAACGGCGGAGATAATGGCATTC
+AAAGCACGAATCTGCATCTCCATCGCAACAATGTAATCAGTAGTTTCTTC
+TAAAACCACCGGTAATGGTTGTTTCCGGCAACCGGGGATTAACCGGCTCA
+GCAATTTCACTTTCCTCTGTACAGCTGTAAACCCCTTCCCCTTCAACCTC
+AAAACCGTTGCTCTCTTTTTCTTTAACCGGCTATTCCCGGTTATCGAAAC
+CACCGTGGGAGGAGGATTCGAAGTTCTTAACCGGTTATGTTTCCTAAACT
+TGAGCTTCACTTTAACGGCTTTAGATAGTATAGCTCGGCTCCACAGTGTT
+CTGCCTCTCGCCGTAACCGCCAGAGACCTGTAAGCGGCGTCACGTACGGC
+TATACCTCCGCCGCGAGGAGAGAGCTTACCAGATGCATCGGTGGCGGCGC
+TAATACGTAGTTCCCTTAGAGATTGAATGATGTTGGTCGAGTAGATTTGC
+TGCTGCTTCTCGGATCTCCATTTCTGCACCAAAGATTGAGACGAAGAATG
+CGACGGTGATGGTGACGTTGGTTTCTTCTTACGCTTACGCCGCGACTGAT
+CATAACTGGAGCCAGCCGTTGGATGATTTGTCAGTGGAGAAATATACTCC
+ATGTCAATTCCTCTGCTTCTCCGGTGACAGTACCTGAAGACAGAAACAGA
+GAAAGGAGATTAGCAAGGTGAGACACgtatgtggaggaagaggaaaatcc
+cggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 45 40 40 40 51 51 39 40 45 40 40 40 51 51 51 51 51 51 51 46
+ 46 40 40 39 39 40 40 40 40 39 39 39 39 35 35 35 35 35 35 35 39 39 39 39 46 46 46 56 56 51 51 45 45 45 45 40 40 38 38 38 38 38 40 51 51 39 39 39 39 39
+ 39 51 45 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 51 51 51 51 43 43 58 58 58 62 58 58 68 83 80 90 83 80 72 72 70 68 70 68 77 80
+ 77 68 73 72 60 65 65 68 68 76 71 76 80 80 80 83 79 85 85 89 81 75 65 68 64 62 58 58 66 74 74 80 80 80 80 82 82 90 79 72 72 70 83 76 77 74 78 75 69 72
+ 72 72 69 67 74 74 85 80 72 69 74 77 77 81 80 78 78 83 80 76 85 83 83 75 72 69 66 63 63 58 58 72 83 90 90 79 85 70 70 70 66 67 61 69 72 83 83 75 72 72
+ 74 76 79 81 90 90 83 85 89 89 90 89 83 90 90 90 90 90 83 85 80 80 88 83 83 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 80 80 80 88 89 90
+ 90 90 90 85 88 88 88 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 90 90 89 90 90 90 90 90 90 90 90 90 90 87 87 89 85 85 85 90 88 88 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 88 90 90 90 90 90 90 90 90 88 88 88 90 90 86 86 90 90 90 90 90 90 90 90 88 83 88 81 83 83 85 85 85 85 85 90 90 90 86 83 83 83 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 83 82 82 78 78 78 90 90 90 90 90 90 90 90 90 90 90 88 88 83 83 80 80 80 83 90 88 88 83 83 83 88 90 88 88
+ 80 90 90 90 90 90 90 90 90 90 83 83 88 88 90 90 90 90 90 90 90 90 85 86 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 78 78 78 78 76 76 74 78
+ 78 86 86 82 80 79 77 82 75 83 83 72 74 80 80 88 83 74 74 77 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 83 80 80 83 83 83 88 83 85 90 90 80 84 80 80
+ 80 78 78 78 78 76 76 82 83 83 83 83 77 83 83 83 76 78 78 83 90 90 90 83 83 75 75 75 80 90 90 90 90 90 90 82 80 79 74 72 70 70 70 72 72 81 85 76 72 72
+ 72 72 72 74 74 68 78 90 90 90 90 77 77 72 72 72 68 65 65 79 74 74 74 80 85 90 90 80 73 61 68 66 66 68 65 61 64 64 64 65 65 65 64 65 65 74 80 80 83 90
+ 90 90 80 80 80 80 80 72 67 68 70 70 64 61 60 58 58 62 58 61 62 78 71 71 51 45 45 43 43 43 43 43 43 43 43 43 43 45 45 45 45 45 40 40 40 40 40 40 56 56
+ 51 40 40 40 40 40 40 51 51 51 51 51 51 51 51 51 51 51 51 40 45 40 39 39 39 40 39 39 39 39 39 39 46 46 46 40 40 40 51 51 51 51 51 51 51 51 51 51 51 51
+ 51 51 51 51 51 51 51 51 40 39 39 39 39 39 51 39 39 39 39 35 40 37 51 45 40 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2286f U 1
+AF LL2286r C 176
+BS 1 283 LL2286f
+BS 284 284 LL2286r
+BS 285 308 LL2286f
+BS 309 312 LL2286r
+BS 313 322 LL2286f
+BS 323 323 LL2286r
+BS 324 331 LL2286f
+BS 332 342 LL2286r
+BS 343 343 LL2286f
+BS 344 344 LL2286r
+BS 345 347 LL2286f
+BS 348 353 LL2286r
+BS 354 354 LL2286f
+BS 355 371 LL2286r
+BS 372 376 LL2286f
+BS 377 392 LL2286r
+BS 393 395 LL2286f
+BS 396 402 LL2286r
+BS 403 414 LL2286f
+BS 415 422 LL2286r
+BS 423 425 LL2286f
+BS 426 426 LL2286r
+BS 427 431 LL2286f
+BS 432 442 LL2286r
+BS 443 446 LL2286f
+BS 447 451 LL2286r
+BS 452 454 LL2286f
+BS 455 463 LL2286r
+BS 464 466 LL2286f
+BS 467 474 LL2286r
+BS 475 476 LL2286f
+BS 477 478 LL2286r
+BS 479 479 LL2286f
+BS 480 480 LL2286r
+BS 481 481 LL2286f
+BS 482 496 LL2286r
+BS 497 498 LL2286f
+BS 499 506 LL2286r
+BS 507 515 LL2286f
+BS 516 530 LL2286r
+BS 531 534 LL2286f
+BS 535 541 LL2286r
+BS 542 543 LL2286f
+BS 544 546 LL2286r
+BS 547 547 LL2286f
+BS 548 556 LL2286r
+BS 557 557 LL2286f
+BS 558 562 LL2286r
+BS 563 565 LL2286f
+BS 566 569 LL2286r
+BS 570 572 LL2286f
+BS 573 586 LL2286r
+BS 587 591 LL2286f
+BS 592 622 LL2286r
+BS 623 628 LL2286f
+BS 629 629 LL2286r
+BS 630 635 LL2286f
+BS 636 641 LL2286r
+BS 642 642 LL2286f
+BS 643 709 LL2286r
+BS 710 711 LL2286f
+BS 712 911 LL2286r
+
+RD LL2286f 776 0 0
+ctttaagggaacaagagttgaatataaagtcacaatatagcagacagaaa
+tcatgatcaaataaacaaatatttttttaaTACAGAAAACAATACAAAAA
+GACAGCTGGGCCAAGCTAATCGAGCATGTGTGTCTCTTCGCCATCATTAC
+CTGTCTCCGGCACAGCCGAGCTGGAGCCAACGGCGGAGATAATGGCATTC
+AAAGCACGAATCTGCATCTCCATCGCAACAATGTAATCAGTAGTTTCTTC
+TAAAACCACCGGTAATGGTTGTTTCCGGCAACCGGGGATTAACCGGCTCA
+GCAATTTCACTTTCCTCTGTACAGCTGTAAACCCCTTCCCCTTCAACCTC
+AAAACCGTTGCTCTCTTTTTCTTTAACCGGCTATTCCCGGTTATCGAAAC
+CACCGTGGGAGGAGGATTCGAAGTTCTTAACCGGTTATGTTTCCTAAACT
+TGAGCTTCACTTTAACGGCTTTAGATAGTATAGCTCGGCTCCACAGTGTT
+CTGCCTCTCGCCGTAACCGCCAGAGACCTGTAAGCGGCGTCACGTACGGC
+TATACCTCCGCCGCGAGGAGAGAGCTTACCAGATGCATCGGTGGCGGCGC
+TAATACGTAGTTCCCTTAGAGATTGAATGATGTTGGTCGAGTAGATTTGC
+TGCTGCTTCTCGGATCTCCATTTCTGCACCAAAGATTGAGACGAAGAATG
+CGACGGTGATGGTGACGTTGGTTTCTTCTTACGCTTACGCCGCGACTGAT
+CATAACTGGAGCCAGCCGTTGGATga
+
+QA 81 774 1 776
+DS CHROMAT_FILE: LL2286f PHD_FILE: LL2286f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:01 2000
+
+RD LL2286r 781 0 0
+nccaACGGCGGAGATAATGGCATTCAAAGCACGAATCTGCATCTCCATCG
+CAACAATGTAATCAGTAGTTTCTTCTAAAACCACCGGTAATGGTTGTTTC
+CGGCAACCGGGGATTAACCGGCTCAGCAATTTCACTTTCCTCTGTACAGC
+TGTAAACCCCTTCCCCTTCAACCTCAAAACCGTTGCTCTCTTTTTCTTTA
+ACCGGCTATTCCCGGTTATCGAAACCACCGTGGGAGGAGGATTCGAAGTT
+CTTAACCGGTTATGTTTCCTAAACTTGAGCTTCACTTTAACGGCTTTAGA
+TAGTATAGCTCGGCTCCACAGTGTTCTGCCTCTCGCCGTAACCGCCAGAG
+ACCTGTAAGCGGCGTCACGTACGGCTATACCTCCGCCGCGAGGAGAGAGC
+TTACCAGATGCATCGGTGGCGGCGCTAATACGTAGTTCCCTTAGAGATTG
+AATGATGTTGGTCGAGTAGATTTGCTGCTGCTTCTCGGATCTCCATTTCT
+GCACCAAAGATTGAGACGAAGAATGCGACGGTGATGGTGACGTTGGTTTC
+TTCTTACGCTTACGCCGCGACTGATCATAACTGGAGCCAGCCGTTGGATG
+ATTTGTCAGTGGAGAAATATACTCCATGTCAATTCCTCTGCTTCTCCGGT
+GACAGTACCTGAAGACAGAAACAGAGAAAGGAGATTAGCAAGGTGAGACA
+Cgtatgtggaggaagaggaaaatcccggacgcgtggxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 6 701 2 736
+DS CHROMAT_FILE: LL2286r PHD_FILE: LL2286r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:38 2000
+
+CO Contig20 547 2 7 U
+cCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAT
+TTGAAGAAACCTTTATAAAAACGATTAAGGGGCTGAATGCGAGACAAAAA
+TATATTTCTGATTTGAATCAATGAAAAATATATAAATGGAAGATCTAAAA
+CATTTCTCACCAATGGCGCTATAACACAACAGAGAAGATTGAATTTAAAG
+GCATGACTGCTCTTGTCACTTCCAGCCTTTCTTTTATTATTGGTTTAAAT
+AC*GTGAAGGTGGATGAGCTCCATCACCTTGTCAACTTTATAGCTTTGCG
+ATCGCTGAAGCAGGCAAGTGTTTCCTTGGTGGAAGCTTAAGATCCTCCCA
+TCCCTCCTCTTCTTCCACTCTCTTGAAGTACTTATCTACCATGTTATCTT
+TCATATCCTCCAGTCTCCTTGGCTCCCACTTTGGGTTCCTATCTTTGTTT
+ATCAATATGGCTCTACCCCCCTCCACAAAATCTTTGCTTATATCTCCCTT
+CATCACATGTGACACCATTCTATTCTCACGGATAAGCGGACGCGTGG
+
+BQ
+ 19 22 37 48 48 48 48 48 48 48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51
+ 51 51 51 51 51 51 56 56 56 56 56 56 56 51 51 51 51 51 51 51 51 51 51 51 56 56 56 56 51 60 60 60 60 60 66 71 60 60 45 45 45 45 56 71 71 71 73 77 78 84
+ 90 90 89 87 72 68 68 70 76 80 80 85 87 80 81 81 82 83 78 73 80 80 86 84 84 90 85 85 90 90 89 89 66 66 66 70 78 72 74 72 72 72 79 76 81 74 74 74 74 80
+ 86 89 84 71 71 71 79 75 84 81 74 66 74 61 62 60 62 62 62 58 60 60 64 68 78 89 88 88 71 71 71 56 56 56 56 56 56 60 60 60 77 83 82 77 75 73 73 70 66 75
+ 77 71 67 73 85 85 80 80 75 74 74 75 75 78 81 85 87 84 82 79 78 80 75 70 73 73 78 90 90 83 74 73 80 71 71 71 75 75 81 72 71 66 66 66 66 66 66 66 66 61
+ 43 43 43 43 58 58 58 71 72 68 66 67 73 60 60 58 58 58 68 74 83 86 84 84 84 66 66 58 43 43 43 43 43 71 71 71 71 76 76 76 71 69 62 62 51 51 51 57 58 65
+ 72 71 67 67 67 65 70 78 76 90 73 70 70 66 58 58 58 43 43 43 43 43 45 60 60 51 51 51 51 51 55 62 58 58 60 60 51 51 51 51 56 56 71 71 77 77 77 89 89 90
+ 90 84 84 78 78 78 79 79 79 89 84 90 74 74 71 79 74 78 75 74 77 77 82 74 67 58 58 62 70 74 74 84 77 74 74 81 81 84 90 78 80 66 70 74 82 82 84 87 84 78
+ 78 79 90 87 86 74 80 72 58 50 35 35 35 35 43 71 66 60 58 58 58 58 66 66 86 86 86 86 86 86 89 73 59 59 55 55 55 84 84 90 86 86 84 90 75 70 64 65 65 67
+ 73 73 86 83 72 72 69 63 55 55 55 64 67 75 81 81 90 90 89 89 72 66 69 76 78 80 86 86 86 86 86 82 82 65 61 61 76 71 80 88 87 79 79 74 79 79 88 88 74 74
+ 74 74 74 77 84 76 64 54 55 54 63 67 84 84 73 69 69 68 77 70 68 64 63 63 66 66 69 67 54 54 54 39 39 39 39 39 35 35 45 45 49 49 49 34 33 32
+
+AF LL2289r C -194
+AF LL2289f U 57
+BS 1 310 LL2289r
+BS 311 311 LL2289f
+BS 312 465 LL2289r
+BS 466 466 LL2289f
+BS 467 489 LL2289r
+BS 490 490 LL2289f
+BS 491 547 LL2289r
+
+RD LL2289r 788 0 2
+gcgggcctcttcgctattacgccagctggcgaaagggggatgtgctgcaa
+ggcgattaagttgggtaacgccagggttttxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxcCTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTGAA
+GAAACCTTTATAAAAACGATTAAGGGGCTGAATGCGAGACAAAAATATAT
+TTCTGATTTGAATCAATGAAAAATATATAAATGGAAGATCTAAAACATTT
+CTCACCAATGGCGCTATAACACAACAGAGAAGATTGAATTTAAAGGCATG
+ACTGCTCTTGTCACTTCCAGCCTTTCTTTTATTATTGGTTTAAATAC*GT
+GAAGGTGGATGAGCTCCATCACCTTGTCAACTTTATAGCTTTGCGATCGC
+TGAAGCAGGCAAGTGTTTCCTTGGTGGAAGCTTAAGATCCTCCCATCCCT
+CCTCTTCTTCCACTCTCTTGAAGTACTTATCTACCATGTTATCTTTCATA
+TCCTCCAGTCTCCTTGGCTCCCACTTTGGGTTCCTATCTTTGTTTATCAA
+TATGGCTCTACCCCCCTCCACAAAATCTTTGCTTATATCTCCCTTCATCA
+CATGTGACACCATTCTATTCTCACGGATAAGCGGACGCGTGGxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaatt
+
+QA 19 742 196 742
+DS CHROMAT_FILE: LL2289r PHD_FILE: LL2289r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:15 2000
+
+RT{
+LL2289r chimera phrap 1 80 000919:094547
+}
+
+RT{
+LL2289r matchElsewhereHighQual phrap 1 80 000919:094547
+}
+
+RD LL2289f 769 0 3
+caacCTTTat*aaAaCgatgaagGGGCTGAATGccagACAAAAATATATT
+TCTGATTTGAATCAATGAAAAATATATAAATGGAAGATCTAAAACATTTC
+TCACCAATGGCGCTATAACACAACAGaaaAGATTGAATTTAAAGGCATGA
+CTGCTCTTGTCACTTCCAGCCTTTCTTTTATTATTGGTTTAAATacagtG
+AAGGTGGATGAGCTCCATCACCTTgccaACTTTATAGCTTTGCGATCGCT
+GAAGCAGGCAAGTgcttcCTTGGTGGAAGCTtaacaTCCTCCCATCCCTC
+CTCTTCTTCCACTCTCTTGAAGTACTTATCTACCATGTTATCTTTCATAT
+CCTCCagcctCCTTGGCTCCCACTTTGGGTTCCTATCTTTGTTTATCAAT
+ATGGCTCTACCCCCCTCCACAAAATCTTTGCTTATATCTCCCTTCATCAC
+ATGTGACACCATTCTATTCTCACGGATAaacgcaCGCGTGgxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxgcctggcgtgcctaatgagcgagctaactcacattaattgcgttgc
+gctcactgcccgctttccagtcggcaaacctgtccagccagctgcattaa
+tgaatcggccaacgcgcgg
+
+QA 23 720 2 491
+DS CHROMAT_FILE: LL2289f PHD_FILE: LL2289f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:37 2000
+
+RT{
+LL2289f chimera phrap 655 769 000919:094547
+}
+
+RT{
+LL2289f matchElsewhereHighQual phrap 737 769 000919:094547
+}
+
+RT{
+LL2289f matchElsewhereHighQual phrap 675 724 000919:094547
+}
+
+CO Contig21 766 2 29 U
+ccacgcgtccgtttttgatgtaatacAGTTTTTTCTGAATAAAATTTAAC
+ATTTATTCAAAAAAAAAAAAATAATTAGATTACAAGCTTAACAAATTTAT
+TATAAATCATCATACCTTGTTCATTTATAAATGAAAATCTGAAGAAAACA
+AAAAATGTTTGGTAAACACCAACATGAACTGTTGTCAGGTTGGTATAACT
+AATTGTGAAGCAGGAGAGGACTGGAGGAGCAGTAACCGAACTATGGAGAC
+TAGCAAGATCAACTGTATCAAGAAATAGAAACTGGACTTGCATGTTTTAC
+CTAACATGGCTCCACGGTTCGGTTCAGGATCTTCAGCTACAGAATGTACC
+TTCTTCGGATGTTCTTTGAGTAGCGTTGAGTTCAGAGAACTTCTGAGCAT
+CAAATGCTGGATTTCAGCTTCGATCTTTCGCTGGAAAACGCCTTCAAGTT
+CAGCTTCAATCTTCGCGTTTTCGAGTTCTTGGATCCTGGCTTCCTTTGCG
+TCGA*GATCAGCTCTCGCTTCCTCGAGTTTAACTTGGAGAAGTTTCACCT
+TCTGCTTCAAG*ATCAGTACCTGTGATCCTGAATTGTCTTCATCAATATT
+CGAATGTAGTGGTGGTATAGATTCCTTACTCagCTcCTGGAAACTtGTaa
+cCTCtTTccaaAGAGCttCcTGCaaAGCTgcaagaCAtCTGATAGaATCt
+gcaacATGATCCTtgtctgcctgataatggntgntcttcttcttatggct
+ctcatctccaccatct
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 51 61 66 66 66 66 66 50 50 50 45 45 47 47 50 50 61 56 56 56 56 56 50
+ 61 50 50 50 50 49 45 49 49 50 50 61 61 66 66 66 66 66 66 57 52 52 48 47 45 45 45 45 45 45 50 52 48 48 55 55 47 47 47 47 52 52 66 61 61 50 56 50 47 47
+ 50 50 50 50 56 56 50 50 50 50 48 52 55 55 55 55 50 66 52 56 56 55 55 50 50 50 50 50 50 55 55 55 55 55 61 61 61 61 61 61 61 61 61 66 66 54 66 66 53 53
+ 53 66 66 66 66 66 66 66 66 66 61 61 61 66 66 66 66 61 61 55 55 55 55 55 55 61 55 55 55 55 53 53 61 51 55 55 55 55 55 66 66 55 55 55 55 55 53 53 53 53
+ 53 53 53 61 61 61 61 61 61 66 66 66 66 66 61 61 61 53 53 53 53 53 53 53 61 61 61 61 61 61 61 61 55 55 53 53 53 53 53 53 53 53 53 55 61 66 53 53 53 53
+ 53 53 53 55 61 61 61 61 61 55 53 53 53 53 53 53 66 66 66 61 61 61 61 61 61 61 61 61 55 53 53 53 53 53 53 53 53 53 53 53 53 53 53 66 66 61 61 55 53 53
+ 53 53 53 53 61 61 61 56 53 53 53 53 53 53 43 43 43 51 56 56 66 61 61 61 61 61 61 61 66 66 66 61 61 61 55 53 53 53 53 53 53 51 56 56 56 51 55 53 53 53
+ 50 61 61 66 66 66 66 66 66 66 66 66 66 66 66 66 56 56 56 51 51 40 40 40 40 40 40 51 56 51 51 46 46 42 42 42 46 45 45 45 45 55 55 61 61 66 66 66 66 56
+ 66 56 56 56 55 55 55 55 55 51 56 56 56 56 56 45 45 45 45 45 40 51 51 56 56 56 56 40 35 35 35 45 45 46 56 56 61 51 51 51 51 51 51 51 51 56 51 51 51 51
+ 51 45 45 40 40 40 40 45 51 56 56 51 40 40 37 37 37 40 40 40 40 40 40 40 46 56 56 56 48 48 48 48 48 48 48 48 48 29 32 32 32 32 36 36 40 39 34 29 29 29
+ 29 35 42 42 42 42 44 44 37 40 40 37 34 29 32 32 27 27 27 25 25 32 48 48 40 48 40 46 44 34 32 32 32 32 25 29 27 27 25 25 37 40 40 40 34 34 40 40 40 40
+ 33 32 32 32 32 32 29 25 25 29 29 29 29 28 28 33 33 40 33 40 36 36 36 48 48 34 34 48 48 29 29 29 40 40 40 29 34 29 29 27 32 37 29 29 29 40 29 29 29 32
+ 40 40 40 40 32 26 29 25 25 21 27 21 25 29 29 40 32 30 28 33 33 40 34 35 33 33 33 25 25 18 18 20 23 19 20 29 24 25 29 26 29 29 25 18 23 24 19 19 18 25
+ 25 25 18 22 21 18 15 19 19 23 25 25 25 21 18 18 20 16 24 21 21 14 18 29 40 27 21 15 13 8 10 11 14 20 24 19 25 32 32 29 29 26 25 19 29 29 21 19 10 10
+ 10 15 19 25 40 40 28 27 25 25 20 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2291r U -43
+AF LL2291f U 14
+BS 1 65 LL2291r
+BS 66 77 LL2291f
+BS 78 79 LL2291r
+BS 80 80 LL2291f
+BS 81 81 LL2291r
+BS 82 84 LL2291f
+BS 85 86 LL2291r
+BS 87 93 LL2291f
+BS 94 95 LL2291r
+BS 96 107 LL2291f
+BS 108 110 LL2291r
+BS 111 112 LL2291f
+BS 113 145 LL2291r
+BS 146 148 LL2291f
+BS 149 150 LL2291r
+BS 151 151 LL2291f
+BS 152 166 LL2291r
+BS 167 167 LL2291f
+BS 168 391 LL2291r
+BS 392 399 LL2291f
+BS 400 400 LL2291r
+BS 401 401 LL2291f
+BS 402 404 LL2291r
+BS 405 409 LL2291f
+BS 410 431 LL2291r
+BS 432 433 LL2291f
+BS 434 436 LL2291r
+BS 437 437 LL2291f
+BS 438 766 LL2291r
+
+RD LL2291r 810 0 0
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacgc
+gtccgtttttgatgtaatacAGTTTTTTCTGAATAAAATTTAACATTTAT
+TCAAAAAAAAAAAAATAATTAGATTACAAGCTTAACAAATTTATTATAAA
+TCATCATACCTTGTTCATTTATAAATGAAAATCTGAAGAAAACAAAAAAT
+GTTTGGTAAACACCAACATGAACTGTTGTCAGGTTGGTATAACTAATTGT
+GAAGCAGGAGAGGACTGGAGGAGCAGTAACCGAACTATGGAGACTAGCAA
+GATCAACTGTATCAAGAAATAGAAACTGGACTTGCATGTTTTACCTAACA
+TGGCTCCACGGTTCGGTTCAGGATCTTCAGCTACAGAATGTACCTTCTTC
+GGATGTTCTTTGAGTAGCGTTGAGTTCAGAGAACTTCTGAGCATCAAATG
+CTGGATTTCAGCTTCGATCTTTCGCTGGAAAACGCCTTCAAGTTCAGCTT
+CAATCTTCGCGTTTTCGAGTTCTTGGATCCTGGCTTCCTTTGCGTCGA*G
+ATCAGCTCTCGCTTCCTCGAGTTTAACTTGGAGAAGTTTCACCTTCTGCT
+TCAAG*ATCAGTACCTGTGATCCTGAATTGTCTTCATCAATATTCGAATG
+TAGTGGTGGTATAGATTCCTTACTCagCTcCTGGAAACTtGTaacCTCtT
+TccaaAGAGCttCcTGCaaAGCTgcaagaCAtCTGATAGaATCtgcaacA
+TGATCCTtgtctgcctgataatggntgntcttcttcttatggctctcatc
+tccaccatct
+
+QA 71 758 45 810
+DS CHROMAT_FILE: LL2291r PHD_FILE: LL2291r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:24 2000
+
+RD LL2291f 810 0 0
+cctttatgtatctagTTTTTTCTGAATAAAATTTAACATTTATTCAAAAA
+AAAAAAAATAATTAGATTACAAGCTTAACAAATTTATTATAAATCATCAT
+ACCTTGTTCATTTATAAATGAAAATCTGAAGAAAACAAAAAATGTTTGGT
+AAACACCAACATGAACTGTTGTCAGGTTGGTATAACTAATTGTGAAGCAG
+GAGAGGACTGGAGGAGCAGTAACCGAACTATGGAGACTAGCAAGATCAAC
+TGTATCAAGAAATAGAAACTGGACTTGCATGTTTTACCTAACATGGCTCC
+ACggctcGGTTCAGGATCTTCAGCTACAgactgTACCTTCTTCGGATGTT
+CTTtgcgtactgccgagtttagAGAACTTCTGAGCATCAAATGCTGGATT
+tccgttcctatttTtcGCTGGAAAACGCCTTCAagctcAGCTtccttctt
+tccgtctccgagttcctggaccttggctttcttt*cgccctcgatcagtt
+ctcgctttctcgaccctaacttgcccaagccccaccttccgcctcaagca
+tcagcccctgcgattccgaattgtctccttccctccccgacagtcccgct
+cgtcctctctctctctcttctcctgcacttcctctcccccgtttcccgtc
+cccgcgcttccccgctcccgcctgtcgtcttcgcccctccctcctcgctc
+ttcccttcccttcccttcccttcctcctttgccgcctcccttctcgtgtc
+ttatccgccgatgcgctcccttctccattctccgcccggctcgcgccaat
+gcgcctaccc
+
+QA 15 354 14 576
+DS CHROMAT_FILE: LL2291f PHD_FILE: LL2291f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:47 2000
+
+CO Contig22 1048 2 38 U
+ttagtttctcattcttgatttggtacgcttgatgaagtgatattgtcata
+caaatcccatgtcccAAAACACTGAAAGATCTACAAACAGCTCTCTAACC
+TCATCAAACGTTAACTGTTCTTCGGTTTTCTCTCAAGTCTTTATTTGACT
+GGCCCTGTCCTTGTTCTTGTTCATCATCTCCATCTTCTTCCCCTTCCCCT
+TCTTCATCTTCATACACCACCATAACAACATTACCATCCTCATCTACTTC
+ATACTCTACATCATCCCCTTCTTCCTCTTCTTCAAACTCCTCTTCCTCTT
+CCTCATCATCATCAAAGTCGTCATCCTCGTCCATACTTGCGCGAACCGAG
+TGAAACTCGGGAGGGGGAGGGCAGTCTTCTGTAATAGCCTCATCACGTTT
+CATCACAAGATGCCTAATAACCCTCTCATCACCATCCAACATCCCTTTGA
+ACTCGTTCAAGTGTTTCGCTTCCATCTCAAAGTTCATCAGTATATAATGT
+GCATTCTCCGCCTTCTGTATCTTATACGCCAGTCTACGCATTCCCCAGTC
+ACTAAACCTCCACACTTTCCCCTTCTTCTCCTTCAGAAACTCTTGAACTT
+TCTCATTGACGCTTTTGACCTCCTCTGCATGTTTCTCATGAATCAAGTAA
+ACCACCTCATAGTGTCTCATTCTTTCTTCATTCAAGTCGCTCTGAAGCTG
+AAGATCAAGAAACTCGTATAGCTCCTTTTCTTCATCATCAGCGAACTCAG
+GGAGAAGATCACCTTCCTCATCAACCTTCTTCTCTTTGAGAAGAATGGAT
+TCGGGAAACGGACCGGTGGCTTCGTCAGGTCTAGCGGAGAAGCTGTGATT
+GTCATCTTTCTTGCTTTTCTTCTTCTTCGCGGCGGCGACCAAAGGAAGGG
+ACTTTGATCCTCTGCGGCCGAGACTAGGTATGAAGCACACGCGGGGAGAG
+TTGATGAACGAATCTCGGCCGTCTATTCTGGGACCGAGAGATACTAGAGA
+CGAAGACGAGTGcaggattgactccattggaggaatgcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 51 51 51 40 40 40 51 51 51 51 40 46 46 46 46 46 46 46 46 46 51 40 39 39 39 39 39 39 40 40 40 40 40 40
+ 40 40 40 46 46 46 40 40 45 35 35 35 35 35 35 40 40 40 46 42 42 43 46 46 42 44 42 46 46 51 51 40 40 40 40 40 40 46 46 37 37 37 40 40 37 37 43 43 43 43
+ 38 43 43 43 43 45 41 41 41 41 41 41 41 46 51 51 51 51 51 51 51 51 51 51 51 43 43 43 43 43 43 45 51 51 46 46 38 38 38 43 43 43 45 45 45 45 51 51 56 56
+ 56 56 51 51 51 51 51 51 43 43 43 51 51 51 56 56 51 45 45 45 45 45 51 56 56 56 56 56 51 45 45 45 45 45 45 45 45 45 51 51 51 51 45 45 43 43 43 43 43 43
+ 51 40 40 40 38 38 43 45 45 51 51 56 51 51 51 51 51 45 45 45 45 45 45 51 51 51 51 51 51 51 56 56 56 51 43 43 43 43 43 45 51 60 60 66 66 66 66 66 66 58
+ 60 60 58 66 66 71 88 88 88 88 88 88 90 81 76 75 63 67 60 58 58 58 86 75 76 65 68 66 64 64 58 76 80 72 64 55 56 53 58 62 62 60 62 58 62 62 62 66 66 74
+ 81 75 81 66 62 66 69 65 64 60 64 71 71 68 68 74 74 74 80 71 65 51 51 35 33 57 51 58 71 67 62 70 68 59 65 64 64 61 61 71 68 79 81 71 66 61 61 55 56 69
+ 73 76 79 77 80 80 64 63 78 81 81 70 63 58 61 62 72 69 57 61 61 71 71 71 77 80 80 80 82 66 66 65 69 74 79 85 85 85 87 90 83 71 74 75 68 68 64 78 81 64
+ 64 62 71 68 57 57 59 61 54 65 77 88 80 80 52 51 48 58 58 56 53 57 74 71 82 70 63 71 74 69 75 84 80 80 76 79 68 80 85 85 73 73 84 76 82 75 68 67 62 65
+ 67 67 75 81 80 75 67 67 74 70 66 66 69 76 73 88 84 84 90 88 81 76 76 74 62 57 67 67 60 49 56 49 55 55 51 58 56 60 62 76 64 83 83 87 78 78 75 69 69 64
+ 73 73 67 65 58 60 60 66 62 65 70 72 67 73 73 78 68 75 90 90 90 90 90 90 90 90 90 90 90 90 83 83 77 80 79 77 73 74 81 83 83 80 83 90 80 80 80 85 85 86
+ 88 88 88 90 90 90 90 78 75 77 77 72 74 77 71 71 66 67 66 69 76 80 83 81 78 66 83 80 81 81 81 76 79 84 81 83 83 80 80 75 75 71 75 69 56 54 54 54 54 66
+ 66 76 67 65 64 73 65 61 55 57 59 50 49 49 54 55 69 74 79 81 79 79 79 73 69 67 66 65 65 71 66 56 45 45 45 45 45 45 45 45 45 56 75 66 65 59 66 67 72 75
+ 71 71 73 73 56 56 56 56 56 56 56 56 43 43 43 54 56 59 69 67 60 55 55 66 66 71 70 70 66 66 64 64 67 69 71 74 74 57 47 46 46 50 49 36 43 45 45 45 45 45
+ 45 45 45 45 45 51 51 51 51 56 56 56 56 56 56 56 56 51 51 51 51 63 56 56 56 56 56 56 51 51 51 51 51 51 56 56 51 51 51 51 51 51 51 51 51 51 51 56 56 45
+ 45 45 43 43 43 45 43 43 45 45 45 45 45 43 43 43 43 43 43 56 45 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 43 43 43 43 45 45 56 51 51 51 51 45
+ 45 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 45 43 43 43 35 34 34 34 34 34 34 35 35 43 51 56 56 43 43 43 43 43 43 43 43
+ 43 43 43 51 51 51 51 51 51 51 51 35 35 35 35 35 35 43 43 43 43 43 43 45 45 45 45 45 45 40 46 39 35 35 35 35 35 35 34 34 34 34 34 35 40 40 35 35 35 39
+ 39 39 40 40 40 40 39 39 39 39 39 39 40 35 35 35 35 35 35 35 35 35 35 39 39 39 40 46 46 46 46 51 45 45 45 45 45 45 51 40 40 40 40 40 40 40 40 40 51 51
+ 51 51 51 51 46 46 42 35 35 35 35 35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2292f U -14
+AF LL2292r C 289
+BS 1 319 LL2292f
+BS 320 322 LL2292r
+BS 323 373 LL2292f
+BS 374 375 LL2292r
+BS 376 424 LL2292f
+BS 425 428 LL2292r
+BS 429 452 LL2292f
+BS 453 458 LL2292r
+BS 459 464 LL2292f
+BS 465 471 LL2292r
+BS 472 475 LL2292f
+BS 476 481 LL2292r
+BS 482 484 LL2292f
+BS 485 486 LL2292r
+BS 487 490 LL2292f
+BS 491 494 LL2292r
+BS 495 495 LL2292f
+BS 496 506 LL2292r
+BS 507 518 LL2292f
+BS 519 519 LL2292r
+BS 520 529 LL2292f
+BS 530 536 LL2292r
+BS 537 539 LL2292f
+BS 540 544 LL2292r
+BS 545 550 LL2292f
+BS 551 552 LL2292r
+BS 553 558 LL2292f
+BS 559 568 LL2292r
+BS 569 569 LL2292f
+BS 570 652 LL2292r
+BS 653 681 LL2292f
+BS 682 692 LL2292r
+BS 693 704 LL2292f
+BS 705 715 LL2292r
+BS 716 743 LL2292f
+BS 744 771 LL2292r
+BS 772 772 LL2292f
+BS 773 1048 LL2292r
+
+RD LL2292f 802 0 0
+cttaagggaacctgnttagtttctcattcttgatttggtacgcttgatga
+agtgatattgtcatacaaatcccatgtcccAAAACACTGAAAGATCTACA
+AACAGCTCTCTAACCTCATCAAACGTTAACTGTTCTTCGGTTTTCTCTCA
+AGTCTTTATTTGACTGGCCCTGTCCTTGTTCTTGTTCATCATCTCCATCT
+TCTTCCCCTTCCCCTTCTTCATCTTCATACACCACCATAACAACATTACC
+ATCCTCATCTACTTCATACTCTACATCATCCCCTTCTTCCTCTTCTTCAA
+ACTCCTCTTCCTCTTCCTCATCATCATCAAAGTCGTCATCCTCGTCCATA
+CTTGCGCGAACCGAGTGAAACTCGGGAGGGGGAGGGCAGTCTTCTGTAAT
+AGCCTCATCACGTTTCATCACAAGATGCCTAATAACCCTCTCATCACCAT
+CCAACATCCCTTTGAACTCGTTCAAGTGTTTCGCTTCCATCTCAAAGTTC
+ATCAGTATATAATGTGCATTCTCCGCCTTCTGTATCTTATACGCCAGTCT
+ACGCATTCCCCAGTCACTAAACCTCCACACTTTCCCCTTCTTCTCCTTCA
+GAAACTCTTGAACTTTCTCATTGACGCTTTTGACCTCCTCTGCATGTTTC
+TCATGAATCAAGTAAACCACCTCATAGTGTCTCATTCTTTCTTCATTCAa
+gccGCTCTGAAGCTGAAGATCAAgacacTCGTATAGCTCCTTTTCTTCAT
+CATCAGCGAActtagGGAGAAGATCACctttctCATCAACCTTCTTCTct
+tg
+
+QA 81 667 16 801
+DS CHROMAT_FILE: LL2292f PHD_FILE: LL2292f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:52 2000
+
+RD LL2292r 804 0 0
+cctCTTCCTCTTCCTCATCATCATCAAAGTCGTCATCCTCGTCCATACTT
+GCGCGAACCGAGTGAAACTCGGGAGGGGGAGGGCAGTCTTCTGTAATAGC
+CTCATCACGTTTCATCACAAGATGCCTAATAACCCTCTCATCACCATCCA
+ACATCCCTTTGAACTCGTTCAAGTGTTTCGCTTCCATCTCAAAGTTCATC
+AGTATATAATGTGCATTCTCCGCCTTCTGTATCTTATACGCCAGTCTACG
+CATTCCCCAGTCACTAAACCTCCACACTTTCCCCTTCTTCTCCTTCAGAA
+ACTCTTGAACTTTCTCATTGACGCTTTTGACCTCCTCTGCATGTTTCTCA
+TGAATCAAGTAAACCACCTCATAGTGTCTCATTCTTTCTTCATTCAAGTC
+GCTCTGAAGCTGAAGATCAAGAAACTCGTATAGCTCCTTTTCTTCATCAT
+CAGCGAACTCAGGGAGAAGATCACCTTCCTCATCAACCTTCTTCTCTTTG
+AGAAGAATGGATTCGGGAAACGGACCGGTGGCTTCGTCAGGTCTAGCGGA
+GAAGCTGTGATTGTCATCTTTCTTGCTTTTCTTCTTCTTCGCGGCGGCGA
+CCAAAGGAAGGGACTTTGATCCTCTGCGGCCGAGACTAGGTATGAAGCAC
+ACGCGGGGAGAGTTGATGAACGAATCTCGGCCGTCTATTCTGGGACCGAG
+AGATACTAGAGACGAAGACGAGTGcaggattgactccattggaggaatgc
+ggacgcgtggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxat
+
+QA 3 724 1 760
+DS CHROMAT_FILE: LL2292r PHD_FILE: LL2292r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:29 2000
+
+CO Contig23 1431 2 12 U
+cttttaagaacaaacactaataaaatattgaatcttctttttaacataac
+attccaatccaaagtatctcccttatgagaGCTCCAAGTGGAAAATAAAA
+AAATAAAACTGAATAAGGACTCTCATGTAGCTCATATTATCAAAATACAG
+CAACAATACAAATATATATATATATAGTAGATGATTAAGATCATTTTTTT
+AACTGGAAGTTAAAGACAAAGAAaccttcagaagATAacatgtaaCCACC
+AGATGATGAAGATACTATTTGTCTCCCTggatgagCTTaaactggaGAAT
+gtACCaGACAGATTCTTAAGCGTTGCAAGCGGAGACCCTGCATTTGTAAA
+GTATGACGCGAGGTTGATCtCCGAGTTGTTGTCGTATTTCTTCACGAAAG
+GATGTTCCATTAGTTCCCTAGCAGAGCTTCGACTGTTTGGGTCCTTTTGC
+AAACAAGTGGAGATGAATGAAGATAACTCAGGGGAGAAGCttTCTGAAGG
+AAGagttggTGGTGGTTGGTCCACGATTGCTTCCATCAACTCGAAAACAC
+TGCTCCATGTTTCCTCTTCATCTGGTGGCAAATATGGGAACTTCCCTGTT
+GCACATTCAAGCACTACTAATCCCAAGCTCCATATATCGCTTTTGTTACC
+GTACTTGTTccCAACGATCCTCTCTGGAGACATATAGTTGTAAGTCCCAA
+CAAATGTGtttgCTAAACCCGCGGTGTTAGTCATAACGGTACTCACACCG
+AAGTCAGTGATCTTGACCTCTCCTCTGTGGTTGACCAACAGATTCGATGG
+TTTCAAGTCACGGTGGATGATGTGCTTGTCATGATGAAGATAGATCAGTC
+CTTGAAGCACTTGCTTAAAGATGGTAGAAAGATAGGACTCAGGGATGGTT
+TTGACTGATTTGAGAAAATCTTCTAGAGATCCACCGTCCATGTACTCCAA
+AATCAGCGAGATTGCGCCATTATCGTAAAACGATTGGTACGAGGTAACAA
+GGTACGGACATTGTGACGACTGGTTTATTTTAAGCTCTTGTGCAATTGAC
+TTGCGAATCGCTTCATCGACGTTTAGTTGAATGACCTTTAAGGCGAAGAA
+TTGACCAGTCCATTTGTGTTGAACAAGCTGCACAACACCGCTGCTTCCTT
+TACCAATGACTTTAACCATGTCCAAATCGGATAAGCTCAACTCATCATCC
+GCTGGCTTAATCGGAGACAGAGCTTCAGGCTCAGACTGAGAGACGATTCG
+AACTCCGTCTTTGTTGACTCGCAGATCTCCATCCTTGAACGTACCGCTCT
+GCGTCAAGAATTTGGTGATGGATTGCTCGCCAGCAGGAGGGATTGAGAGC
+TTGAGATTATTGCTGAATCCACCCTTCTTCATTTCGTCTTCTTCTtcctt
+cttctcctcgtcgctttccccggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 35 35 32 29 32 33 26 30 29 36 36 42 32 29 29 32 32 32 47
+ 47 39 39 35 35 35 35 42 35 35 35 35 35 32 35 35 35 35 35 35 35 35 32 35 35 35 32 32 32 29 29 32 29 29 35 35 35 35 35 35 29 35 35 30 35 35 35 26 26 30
+ 30 30 30 29 29 29 29 31 31 32 35 35 35 35 37 37 37 37 37 37 37 37 37 35 32 32 31 31 29 27 27 35 42 42 42 42 42 35 35 33 42 37 37 42 48 48 56 40 39 37
+ 31 31 31 31 31 35 30 30 20 20 29 25 25 29 33 37 30 28 28 28 28 28 27 12 11 7 7 7 9 11 14 14 19 18 25 25 25 16 16 9 9 10 13 14 18 21 33 27 27 30
+ 26 26 28 33 33 42 42 33 42 30 28 28 29 27 29 29 29 29 39 33 33 30 28 28 28 28 28 28 16 13 6 6 6 13 13 20 22 22 19 18 13 10 10 10 16 13 21 21 23 23
+ 19 19 20 24 24 19 20 20 29 29 35 33 33 28 28 28 36 33 29 27 28 29 24 24 28 28 28 22 28 29 29 29 26 29 27 29 29 29 26 23 23 23 26 35 28 29 29 29 32 42
+ 44 37 37 35 32 29 29 32 29 29 29 32 22 22 22 24 24 24 24 19 23 23 32 35 35 32 32 28 29 30 26 29 29 29 29 35 35 39 29 29 29 29 28 28 33 33 35 35 44 47
+ 44 42 42 35 35 35 35 30 35 36 36 30 30 35 35 35 35 33 35 33 37 44 44 44 35 35 35 36 30 27 27 29 29 32 35 38 56 47 47 47 47 47 47 47 47 48 45 35 35 35
+ 35 35 42 42 39 39 37 37 42 47 42 42 42 42 42 42 42 42 42 42 37 37 37 42 35 33 42 30 30 30 33 37 40 40 40 40 40 40 31 27 16 18 21 27 27 29 40 40 40 40
+ 47 47 23 16 9 9 9 16 15 22 23 25 30 42 42 42 42 42 42 42 42 35 35 35 35 35 35 35 35 35 35 35 35 42 43 48 47 37 36 36 35 35 35 40 40 46 51 37 37 37
+ 35 32 32 32 35 35 40 45 40 37 37 37 37 37 39 45 45 45 45 37 37 37 40 45 40 42 42 42 38 38 39 39 40 40 40 40 56 51 37 40 37 35 35 35 42 44 48 42 42 42
+ 37 37 37 46 46 51 56 56 56 56 46 37 33 33 34 32 32 32 32 32 29 29 29 34 40 32 29 34 29 29 31 37 46 46 46 40 32 32 25 25 25 32 30 46 30 30 30 39 30 39
+ 33 34 34 40 32 32 24 24 21 19 19 29 25 25 29 32 32 32 40 40 40 40 40 40 37 34 34 29 29 24 28 25 32 33 37 34 33 33 41 43 43 41 42 40 44 44 37 37 30 30
+ 20 22 22 37 40 32 29 23 17 12 19 19 32 40 37 37 40 45 37 30 39 35 42 44 45 47 40 40 47 55 53 72 72 54 44 49 47 38 34 48 59 60 60 61 53 53 52 49 47 47
+ 41 44 47 52 52 63 49 49 44 61 61 63 58 48 50 54 55 63 58 61 44 37 37 40 42 48 47 44 47 44 34 37 40 42 34 41 33 33 40 46 47 48 32 29 34 32 32 29 27 30
+ 25 25 25 34 32 25 29 32 32 32 29 27 27 25 22 22 34 40 48 40 44 44 46 46 46 56 56 56 56 56 56 56 51 51 51 51 51 46 46 42 44 43 51 42 42 40 40 40 40 40
+ 40 40 46 40 40 34 40 40 40 48 34 34 48 32 32 32 32 39 39 39 31 35 35 35 35 34 40 40 44 44 40 48 40 32 32 32 32 32 32 36 44 48 48 46 40 39 39 39 35 39
+ 40 40 37 40 40 51 51 51 51 51 51 51 51 51 56 51 51 51 51 45 45 45 40 40 40 45 45 56 56 56 56 56 56 45 45 45 45 45 45 51 45 45 45 45 45 45 51 56 56 56
+ 56 56 45 39 39 39 39 39 40 56 56 45 39 39 39 39 39 39 40 45 51 40 39 39 39 39 39 45 45 51 51 51 45 45 45 45 45 45 45 45 56 51 45 45 45 45 51 51 56 45
+ 45 45 45 45 45 51 51 51 51 56 56 56 56 56 51 51 51 56 51 51 51 51 51 51 51 56 56 56 56 56 45 45 45 45 51 51 56 56 56 56 56 51 51 51 51 51 51 51 51 51
+ 43 43 43 43 43 43 45 45 45 45 45 45 56 56 56 56 56 51 45 45 45 43 43 43 43 43 43 45 56 56 56 56 56 56 56 56 56 56 56 51 45 45 45 45 45 45 56 56 56 51
+ 51 51 56 56 45 45 45 45 45 45 56 56 56 56 56 56 56 51 51 51 51 51 51 46 43 43 43 43 43 43 43 43 43 51 56 56 46 43 43 43 43 43 45 45 45 51 56 56 56 56
+ 56 56 51 51 56 51 51 51 56 56 51 45 45 45 45 45 56 43 43 43 43 43 43 56 56 45 45 45 45 45 45 43 43 43 43 43 43 56 56 56 56 56 56 56 56 56 56 43 43 43
+ 43 43 43 45 45 45 45 45 45 43 43 43 43 43 43 45 45 45 43 43 43 43 43 45 56 51 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 45 45 45 45 43 43 43 43 43
+ 43 43 51 43 43 43 43 43 43 43 51 51 51 43 43 43 43 36 31 31 31 31 31 35 43 43 51 56 56 56 56 56 56 56 51 51 51 43 43 43 43 43 43 43 43 43 43 43 43 36
+ 35 35 35 35 43 43 45 45 45 45 51 56 51 51 51 51 51 51 51 45 45 45 45 45 43 35 35 35 35 35 35 35 35 35 35 39 51 51 51 51 51 51 40 40 40 40 40 40 45 40
+ 40 40 40 40 40 39 35 35 35 35 35 40 40 40 40 40 46 40 51 51 51 51 51 51 51 51 46 46 46 46 46 40 35 35 35 35 35 35 45 44 40 40 40 40 46 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2293f U 1
+AF LL2293r C 682
+BS 1 685 LL2293f
+BS 686 706 LL2293r
+BS 707 711 LL2293f
+BS 712 717 LL2293r
+BS 718 718 LL2293f
+BS 719 720 LL2293r
+BS 721 721 LL2293f
+BS 722 728 LL2293r
+BS 729 737 LL2293f
+BS 738 747 LL2293r
+BS 748 749 LL2293f
+BS 750 1431 LL2293r
+
+RD LL2293f 794 0 0
+cttttaagaacaaacactaataaaatattgaatcttctttttaacataac
+attccaatccaaagtatctcccttatgagaGCTCCAAGTGGAAAATAAAA
+AAATAAAACTGAATAAGGACTCTCATGTAGCTCATATTATCAAAATACAG
+CAACAATACAAATATATATATATATAGTAGATGATTAAGATCATTTTTTT
+AACTGGAAGTTAAAGACAAAGAAaccttcagaagATAacatgtaaCCACC
+AGATGATGAAGATACTATTTGTCTCCCTggatgagCTTaaactggaGAAT
+gtACCaGACAGATTCTTAAGCGTTGCAAGCGGAGACCCTGCATTTGTAAA
+GTATGACGCGAGGTTGATCtCCGAGTTGTTGTCGTATTTCTTCACGAAAG
+GATGTTCCATTAGTTCCCTAGCAGAGCTTCGACTGTTTGGGTCCTTTTGC
+AAACAAGTGGAGATGAATGAAGATAACTCAGGGGAGAAGCttTCTGAAGG
+AAGagttggTGGTGGTTGGTCCACGATTGCTTCCATCAACTCGAAAACAC
+TGCTCCATGTTTCCTCTTCATCTGGTGGCAAATATGGGAACTTCCCTGTT
+GCACATTCAAGCACTACTAATCCCAAGCTCCATATATCGCTTTTGTTACC
+GTACTTGTTccCAACGATCCTCTCTGGAGACATATAGTTGTaAGTcCCAa
+canaTGTGtttgCTAAACCCGCGGTGTTAGTCATAACGGTACTCACACCG
+AAGTCAGTGATCTTGACCTCTCCTCTGTGGTTGACCAACAGATT
+
+QA 81 794 1 794
+DS CHROMAT_FILE: LL2293f PHD_FILE: LL2293f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:56 2000
+
+RD LL2293r 794 0 0
+atATAGTTGTAAGTCCCAACAAATGTGtntgCTAAACCCGCGGTGTTAGT
+CATAACGGTACTCACACCGAAGTCAGTGATCTTGACCTCTCCTCTGTGGT
+TGACCAACAGATTCGATGGTTTCAAGTCACGGTGGATGATGTGCTTGTCA
+TGATGAAGATAGATCAGTCCTTGAAGCACTTGCTTAAAGATGGTAGAAAG
+ATAGGACTCAGGGATGGTTTTGACTGATTTGAGAAAATCTTCTAGAGATC
+CACCGTCCATGTACTCCAAAATCAGCGAGATTGCGCCATTATCGTAAAAC
+GATTGGTACGAGGTAACAAGGTACGGACATTGTGACGACTGGTTTATTTT
+AAGCTCTTGTGCAATTGACTTGCGAATCGCTTCATCGACGTTTAGTTGAA
+TGACCTTTAAGGCGAAGAATTGACCAGTCCATTTGTGTTGAACAAGCTGC
+ACAACACCGCTGCTTCCTTTACCAATGACTTTAACCATGTCCAAATCGGA
+TAAGCTCAACTCATCATCCGCTGGCTTAATCGGAGACAGAGCTTCAGGCT
+CAGACTGAGAGACGATTCGAACTCCGTCTTTGTTGACTCGCAGATCTCCA
+TCCTTGAACGTACCGCTCTGCGTCAAGAATTTGGTGATGGATTGCTCGCC
+AGCAGGAGGGATTGAGAGCTTGAGATTATTGCTGAATCCACCCTTCTTCA
+TTTCGTCTTCTTCTtccttcttctcctcgtcgctttccccggacgcgtgg
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaat
+
+QA 31 714 1 750
+DS CHROMAT_FILE: LL2293r PHD_FILE: LL2293r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:33 2000
+
+CO Contig24 1039 2 38 U
+agttactattacagatgtatgagacggccactaaaatgaagcagcttatT
+TTGTCAAATGTAAAAAAGAAAGTAGTCATGTGGAATGATATTTTTCGATT
+TGTTTTTTTTTTCTTTCTTATAAATGGCATTTTCAGATTCAGAAACTGGA
+GTCGTCAGTAGATGGACGCCTCAAGAAGTACCTCAGACTATTGGCAAGAA
+CCTGCTGAAGAGGCTTGATTAGGGCTTTGTTTCTGTAACTTACGTGGAAC
+TTAACCTTAGCCAACAAACCCTCAGTGTCGAATTCTCCAGATTCGACAGC
+GACGCTGATATCTGTAATGTTCTTCACTAGCTCAACTAATAGTCCAGGCC
+TATCCGCTGTTTCTATGTATAGTAAACTGCGGTCTGGTCCATCATCTTCA
+ATTTTTATATGCGTTGCTATGTCCACATCAACCTGTTCAGTTGGTGGAAG
+AACACCAAAAGCTGCTCCCATTGCCAACTGAGAACTTGATTCAGGATGAA
+ACTCGAGCAAATTGTTAATGACAGTGAGACGGATGGCCTCGAGCAACTCA
+GGGTCTTCTACTTTTCTTCCACTATCCGCTTTAGTAATGGCAAATTTGTT
+GTGCTTGCCAGAAGAATCGAGGTAGACATTAGCCTTGACAACATTCAGAC
+CCAAATTTTTGAGCGCATTCATAGTGTCAAGTAGAGCTCCTAGACGATCT
+CCAAATGTTACTTCAACAACGGTTGCATCAGGGTCAGAATCTTGGTCGAT
+TATGACCACTGGAGTTGGGACTTCGTCAGAATCCTGCTTACCATTCTCCA
+CAGCAGTAGCGTTTGCTGCTCGAGGCTTCAACCCCGATAATCTGAGTCGC
+TGAAGATTTATAATAGGCTTATCGACAAATCCAAACGTGATTGTGCTAGG
+AGGAAGACATAAAGCTCCAAGATCACGTCTGATTGCGGGAGAAGTAGAAG
+CATCAGTGAAGCAAAGAGCAGAACCAGAAGCAGAGGCCATAGCCACCATA
+GCtgacttgaacccgaagcaaaaaaaaacggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51
+ 40 40 39 35 35 37 37 37 51 51 51 51 51 51 51 51 51 51 51 51 40 35 35 35 35 35 37 37 37 40 56 56 56 56 46 40 40 40 40 40 40 40 51 40 35 35 35 35 35 40
+ 51 51 51 51 51 56 56 56 56 56 56 56 56 51 51 51 46 46 46 46 46 40 40 37 37 37 40 40 40 40 56 51 51 51 51 51 51 51 45 45 40 40 40 35 35 35 35 35 40 46
+ 43 43 43 43 43 43 51 51 51 51 51 43 56 43 43 43 43 43 43 43 43 43 43 43 51 51 45 45 51 40 43 43 45 45 43 43 36 43 43 43 40 41 41 45 45 45 41 41 45 45
+ 45 43 43 43 43 43 43 43 56 56 56 45 45 45 41 41 41 43 43 43 43 43 43 43 43 43 43 43 51 51 51 51 51 51 43 45 43 43 43 43 43 43 43 36 36 40 45 43 43 43
+ 43 43 43 45 45 45 45 51 51 51 51 45 45 45 45 45 45 45 43 43 43 43 45 45 56 56 38 43 43 43 43 43 51 51 51 51 51 51 51 51 51 45 45 43 43 43 43 43 43 43
+ 43 43 43 43 43 43 43 43 56 41 41 44 46 43 46 56 56 56 50 56 56 56 56 51 43 43 43 43 43 51 45 45 45 43 43 43 43 43 43 45 56 56 51 51 51 51 51 51 51 51
+ 51 51 45 43 43 43 38 38 38 46 56 56 56 51 51 51 51 51 51 51 51 51 51 56 46 46 43 43 41 49 41 45 46 56 56 56 56 51 56 56 66 71 56 56 56 56 45 45 45 51
+ 51 51 56 56 56 56 56 51 51 45 45 51 59 48 46 46 46 47 49 47 53 59 59 59 59 59 56 56 56 45 45 45 45 45 40 43 43 51 51 51 43 56 56 56 56 56 56 56 56 56
+ 56 56 45 45 53 55 54 53 64 52 49 57 61 58 52 52 52 53 58 63 46 35 35 35 35 39 40 45 45 40 40 42 40 45 45 45 45 40 40 59 58 60 64 61 61 60 63 64 64 53
+ 55 57 63 60 58 51 51 51 51 51 51 46 46 38 38 37 40 40 66 66 64 54 63 59 57 55 54 49 45 46 48 49 52 50 48 46 43 42 46 48 65 60 58 44 37 36 45 50 50 55
+ 49 47 47 58 50 58 57 49 52 56 53 45 48 48 50 46 52 55 55 46 56 59 61 59 58 59 59 54 52 47 56 52 54 56 56 49 46 46 32 27 33 33 28 29 31 29 32 27 40 40
+ 40 39 36 36 33 37 40 40 45 42 47 51 49 54 58 59 66 67 64 50 38 33 36 38 45 45 51 53 44 44 42 45 47 50 45 51 50 48 44 37 37 49 73 57 66 69 77 69 72 76
+ 56 59 59 63 82 63 68 85 63 58 56 56 48 56 56 54 55 69 70 73 77 74 69 63 61 63 68 62 71 67 70 73 60 65 60 55 53 51 40 40 48 50 52 54 55 57 61 58 57 53
+ 47 47 49 55 47 51 42 48 48 60 59 56 59 59 62 50 45 40 40 39 44 39 35 35 56 48 51 57 64 63 54 52 51 55 57 54 49 52 52 49 56 54 54 54 44 48 45 49 58 58
+ 68 70 71 76 71 69 64 74 74 70 54 54 59 57 56 56 70 73 51 45 45 40 35 35 35 35 35 39 45 45 45 51 51 51 51 51 51 51 51 51 56 56 56 51 51 51 51 51 51 46
+ 39 35 35 35 39 39 39 39 39 39 39 39 39 40 45 45 40 45 40 40 40 40 45 45 51 51 51 51 51 51 51 56 46 46 46 39 39 39 45 45 45 51 40 40 39 39 39 39 39 39
+ 40 51 51 51 56 56 51 56 51 45 45 45 45 45 51 51 51 51 51 51 43 43 43 43 43 51 56 56 56 56 56 51 51 43 43 43 43 43 43 45 43 43 43 43 43 43 51 45 45 45
+ 45 51 45 45 45 45 51 45 51 51 43 43 43 43 43 43 51 56 56 56 51 51 51 43 35 35 35 35 35 43 45 45 43 43 43 43 43 43 51 56 51 45 45 45 45 45 45 45 51 51
+ 51 51 46 40 39 39 39 39 40 40 45 45 45 56 45 39 39 39 39 39 40 51 51 51 45 51 45 40 40 40 40 40 40 51 51 51 51 39 35 35 35 35 35 40 40 56 40 40 37 37
+ 35 35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2300f U -30
+AF LL2300r C 344
+BS 1 379 LL2300f
+BS 380 380 LL2300r
+BS 381 390 LL2300f
+BS 391 392 LL2300r
+BS 393 412 LL2300f
+BS 413 426 LL2300r
+BS 427 454 LL2300f
+BS 455 471 LL2300r
+BS 472 489 LL2300f
+BS 490 505 LL2300r
+BS 506 518 LL2300f
+BS 519 593 LL2300r
+BS 594 600 LL2300f
+BS 601 606 LL2300r
+BS 607 607 LL2300f
+BS 608 611 LL2300r
+BS 612 621 LL2300f
+BS 622 626 LL2300r
+BS 627 630 LL2300f
+BS 631 632 LL2300r
+BS 633 633 LL2300f
+BS 634 634 LL2300r
+BS 635 635 LL2300f
+BS 636 637 LL2300r
+BS 638 638 LL2300f
+BS 639 639 LL2300r
+BS 640 641 LL2300f
+BS 642 643 LL2300r
+BS 644 644 LL2300f
+BS 645 670 LL2300r
+BS 671 671 LL2300f
+BS 672 679 LL2300r
+BS 680 682 LL2300f
+BS 683 683 LL2300r
+BS 684 684 LL2300f
+BS 685 706 LL2300r
+BS 707 768 LL2300f
+BS 769 1039 LL2300r
+
+RD LL2300f 804 0 0
+ncttttatgnatagcgttggaagtagagaanagttactattacagatgta
+tgagacggccactaaaatgaagcagcttatTTTGTCAAATGTAAAAAAGA
+AAGTAGTCATGTGGAATGATATTTTTCGATTTGTTTTTTTTTTCTTTCTT
+ATAAATGGCATTTTCAGATTCAGAAACTGGAGTCGTCAGTAGATGGACGC
+CTCAAGAAGTACCTCAGACTATTGGCAAGAACCTGCTGAAGAGGCTTGAT
+TAGGGCTTTGTTTCTGTAACTTACGTGGAACTTAACCTTAGCCAACAAAC
+CCTCAGTGTCGAATTCTCCAGATTCGACAGCGACGCTGATATCTGTAATG
+TTCTTCACTAGCTCAACTAATAGTCCAGGCCTATCCGCTGTTTCTATGTA
+TAGTAAACTGCGGTCTGGTCCATCATCTTCAATTTTTATATGCGTTGCTA
+TGTCCACATCAACCTGTTCAGTTGGTGGAAGAACACCAAAAGCTGCTCCC
+ATTGCCAACTGAGAACTTGATTCAGGATGAAACTCGAGCAAATTGTTAAT
+GACAGTGAGACGGATGGCCTCGAGCAACTCAGGGTCTTCTACTTTTCTTC
+CACTATCCGCTTTAGTAATGGCaAATTTGTTGTGCTTGCCAGAAGAATCG
+AGGTAGACATTAGCCTTGACAACATTCAGACCCAAATTTTTGAGCGCATT
+CATAGTGTCAAGTAGAGCTCCTAGACGATCTCCAAATGTTACTTCAACAA
+CGGTTGCATCAGGGTCAGAATCTTGGTCGATTATGACCACTGGAGTTGGG
+ACTT
+
+QA 81 737 32 804
+DS CHROMAT_FILE: LL2300f PHD_FILE: LL2300f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:52 2000
+
+RD LL2300r 739 0 1
+ccaggcc*atc*gctggttcaagg*atagt*aacTGCGGTctg*tCCATC
+ATctccaatttt*aTATGCGTTGCTATGTCCACATcancctgtncagttg
+*tgaaagAACACCAAAAGCTGCTCCCATTGCcanctgaganctTGATTCA
+GGATGAAACTCGAGcanantgTTAATGACAGTGAGACGGATGGCCTCGAG
+CAACTCAGGGTCTTCTACTTTTCTTCCACTATCCGCTTTAGTAATGGCAA
+atntGTTGTGCTTGCCAGAAGAATCGAGGTAGACATTAGCCTTGACAACA
+TTCAGACCCAAATTTTTGAGCGCATTCATAGTGTCAAGTAGAGCTCCTAG
+ACGATCTCCAAATGTTACTTCAACAACGGTTGCATCAGGGTCAGAATCTT
+GGTCGATTATGACCACTGGAGTTGGGACTTCGTCAGAATCCTGCTTACCA
+TTCTCCACAGCAGTAGCGTTTGCTGCTCGAGGCTTCAACCCCGATAATCT
+GAGTCGCTGAAGATTTATAATAGGCTTATCGACAAATCCAAACGTGATTG
+TGCTAGGAGGAAGACATAAAGCTCCAAGATCACGTCTGATTGCGGGAGAA
+GTAGAAGCATCAGTGAAGCAAAGAGCAGAACCAGAAGCAGAGGCCATAGC
+CACCATAGCtgacttgaacccgaagcaaaaaaaaacggacgcgtggxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 255 659 1 696
+DS CHROMAT_FILE: LL2300r PHD_FILE: LL2300r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:29 2000
+
+RT{
+LL2300r compression phrap 100 102 000919:094547
+}
+
+CO Contig25 1296 2 20 U
+gatggaagataacttcaATATATTACAAACTTATTAAAAGGAGTAGCTAA
+TGCAAATTCAGCAGAGAAAATAAGTGATTTGACATGATAATAACAACCAT
+ACTCGAAAACAAATGTAATTAAAACACTTGCTTCATCTCCTTTCACAGCA
+TCTTCACCTCTTCAACATTGTTGTCTTCAGTAGGTGTAGCCTTTGACGAA
+CATACCCTCCTTAGCCTCTTCAGACTCACCTTCTCCGGTGTATTTTCCGA
+GCTGAGCCAACGAATTGGCCTTGGCTCGAGTCAAGAGAGTGGTCTGAGCT
+GCGTTCACGTTCTCAGCTCTGCCTCCCCATGTTTTCAGGCAAGTGTTCTG
+AAGAGCACGTGCGTAGGAGAAGGACACGTGCCACGGGTTTGGTGCCTGGT
+TCATCGCGTTCAGGTTCAACGTTGCCTCCAACTCAGACTGTCCTCCGGAC
+AAGAACATGATTCCAGGGACGGCAGGAGGGATTCTGTTGCGGAGGAGCTT
+AAGGGTGTAGGAGGCAACTTGTTCAGGAGTAGCTCTGTCTTTAGACTCGG
+CTCCTGGAGTCACCATGCTCGGCTTCAGGAGGATACCTTCAAACATGACA
+TTGTTCTGAGCTAGGTAAAAGAAAACCTCAGCCCACACCTTCTCTGCTAC
+TTCGTAAGTCCTGTCAATGTCGTGTTCTCCATCCAACAAGATCTCTGGCT
+CCACTATGGGAACCAATCCACTGTCTTGTGAAATGGCAGCGTATCGAGCA
+AGGCCCCAAGCAGCTTCTTTCACAGCGAGAGCAGACGGACCGTTGGGAAT
+GCTCACCACAGTACGCCATTTGGCGAAACGAGCACCCTGTTGGTAGTAAG
+CAGCGGTCCGAGAGGATAGACCGTCAAGTCCTTGGCACCATGACTCATTG
+TTAGATCCAACAAGTGGCACCAAACCCTTGTCGACTTTGATACCGGGGAC
+GATGTTCTGCTCGACTAGGACGTCGACCATTTTCTTGCCTTCGGTGGTAG
+ACTGATAGAGAGTCTCCTCGAACAGGATTGCACCGGAGATGTACTGTCCG
+AGACCTGGTGCAGAGACAAGCAGTGTCCGGTATGCTTGACGGTTTGCCTC
+AGTGTTCTCTAGCCCTATCGAGTCCAAACGCTTCCCGCATGTGGCGTTCG
+ACTCGTCCATCGCCAAGATTCCTCGTCCAGGAGACGCAATTGTTTTCGCT
+GTCTTAACGAGCTCATCGGCGTAGGAAGAAGCGGCGCGGACGGTAAGGTA
+GGTGGCGCGGTTGCGTAGGACGACGGCGGCGGAAGcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 36 37 40 39 34 34 34 34 34 35 35 35 35 35 35 35 39 39 39 39 40 51 46 46 46 46 46 40 34 34 34 34 34
+ 35 40 40 40 46 51 46 46 46 46 46 40 40 40 40 40 40 46 46 56 37 37 40 40 40 40 46 40 35 39 39 39 39 39 40 40 40 40 39 39 39 39 39 45 51 40 40 40 40 40
+ 40 45 40 40 40 40 40 51 51 46 46 46 51 46 46 40 40 35 35 35 35 35 35 51 40 40 40 35 35 35 35 35 35 39 40 40 40 40 40 40 40 40 40 46 40 40 40 45 45 45
+ 45 45 43 43 43 43 43 43 45 43 43 43 43 46 43 51 51 51 56 56 56 51 43 45 45 45 45 45 43 43 43 43 43 40 51 56 43 43 43 43 43 43 56 43 36 36 36 36 36 43
+ 43 43 43 43 43 43 43 43 43 43 45 45 45 45 45 45 45 45 51 51 51 43 43 43 43 43 43 43 45 45 45 51 56 51 51 51 51 56 41 51 51 51 51 51 51 43 43 43 43 43
+ 43 43 43 43 43 45 45 45 43 43 43 45 45 45 56 43 43 43 43 43 43 45 43 43 43 43 43 43 45 51 56 56 51 51 51 51 51 51 51 51 56 45 45 45 45 45 45 45 36 36
+ 36 36 36 36 43 43 43 43 43 43 43 45 45 45 43 43 43 43 43 43 43 43 43 43 45 45 51 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 51 43 43 43 43 43
+ 45 56 51 45 43 43 43 43 43 43 43 43 43 43 45 51 51 56 51 56 56 51 51 51 43 43 43 43 43 43 45 43 43 43 43 43 51 56 56 56 56 51 45 45 45 45 45 56 51 56
+ 56 56 45 43 43 43 43 43 43 45 56 56 56 56 56 56 51 43 40 40 40 40 40 45 45 45 45 45 51 40 45 45 45 45 40 45 45 45 45 40 43 43 51 51 51 51 51 51 51 51
+ 51 51 51 51 51 51 51 51 51 51 51 51 51 51 56 51 45 40 40 40 40 40 45 56 56 56 56 56 56 51 51 51 51 51 51 51 45 40 40 40 40 40 45 56 56 45 45 37 37 37
+ 40 45 45 51 51 45 51 51 51 51 51 46 46 42 42 37 40 40 40 40 40 56 56 56 56 56 56 51 51 51 40 40 40 45 45 45 56 56 56 46 46 46 37 48 51 59 59 65 63 51
+ 54 54 49 42 42 44 47 44 47 52 50 49 55 59 58 71 68 68 66 65 69 57 60 60 62 60 67 55 50 52 52 48 50 54 51 51 51 46 46 50 44 46 53 72 75 75 71 61 59 58
+ 58 64 55 55 55 66 66 64 64 54 50 43 48 41 42 40 40 40 42 47 25 25 26 23 25 25 44 44 47 61 57 51 46 43 55 55 52 43 41 38 43 48 65 69 69 55 55 50 59 51
+ 44 44 52 59 54 47 54 59 65 65 62 67 62 56 54 60 57 62 47 44 47 43 43 40 44 44 58 58 63 59 76 72 44 41 37 37 50 53 57 69 67 80 80 80 69 54 51 45 51 48
+ 40 49 67 63 63 71 69 47 41 48 44 37 37 47 47 55 59 60 58 61 56 55 45 49 47 47 54 49 55 47 49 56 61 54 46 47 53 55 56 67 72 72 72 80 80 80 69 63 62 59
+ 54 47 32 32 32 32 26 29 29 30 39 39 57 52 47 51 61 59 63 58 57 61 53 53 56 56 54 62 65 75 69 63 57 40 51 51 51 51 56 45 45 45 45 45 51 40 40 40 40 40
+ 45 56 56 56 56 56 56 42 37 40 40 40 40 37 40 40 45 56 51 45 40 37 37 37 40 40 51 51 45 45 40 45 40 45 56 56 56 56 51 45 45 45 45 45 45 45 40 40 37 37
+ 37 37 37 37 40 45 45 51 45 51 51 51 51 51 45 45 45 45 45 45 51 51 51 51 51 45 45 45 45 45 45 51 56 56 56 56 56 56 51 51 51 51 51 51 56 56 56 56 56 51
+ 45 45 45 45 45 56 56 56 56 56 51 45 45 45 43 43 45 45 45 51 56 56 56 56 56 56 56 45 51 45 45 45 45 51 56 51 51 56 56 56 56 51 56 56 56 51 51 51 51 51
+ 51 45 45 51 51 51 51 56 56 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 51 51 56 56 56 56 56 56 56 51 51 51 45 45 45 43 43 43 43 43 43 43 43 43 43 43
+ 51 51 56 56 51 51 51 51 45 43 43 43 43 43 43 56 45 45 45 45 45 45 45 45 45 45 45 51 51 51 51 45 45 45 43 43 43 43 43 43 51 51 51 43 43 43 43 43 43 43
+ 43 43 45 51 56 43 43 43 43 43 43 45 45 51 51 51 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 51 51 56 56 56 45 45 43 43 43 43 51 45 45 45 45 43 43
+ 43 43 43 43 43 43 43 43 43 43 43 43 43 43 51 51 43 43 43 43 43 43 43 43 43 51 43 36 36 36 36 36 43 43 43 43 43 43 36 43 43 36 34 34 34 34 34 43 43 43
+ 43 43 36 36 36 36 36 43 43 43 43 43 43 43 43 45 45 45 45 51 43 35 35 35 35 35 35 43 43 43 43 43 35 35 35 35 35 35 43 45 45 45 51 43 35 35 35 35 35 38
+ 40 40 39 39 39 35 35 35 39 39 39 40 40 40 40 40 40 40 40 40 40 40 40 46 46 46 46 39 35 35 35 35 35 35 35 35 35 35 35 40 40 40 40 40 40 39 39 39 39 39
+ 39 39 39 35 35 29 29 29 35 30 35 39 39 35 35 39 39 45 46 46 46 46 46 40 40 39 35 35 35 35 35 35 35 35 35 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2302f U 1
+AF LL2302r C 539
+BS 1 590 LL2302f
+BS 591 591 LL2302r
+BS 592 612 LL2302f
+BS 613 616 LL2302r
+BS 617 632 LL2302f
+BS 633 636 LL2302r
+BS 637 643 LL2302f
+BS 644 650 LL2302r
+BS 651 656 LL2302f
+BS 657 684 LL2302r
+BS 685 687 LL2302f
+BS 688 694 LL2302r
+BS 695 695 LL2302f
+BS 696 708 LL2302r
+BS 709 711 LL2302f
+BS 712 725 LL2302r
+BS 726 726 LL2302f
+BS 727 760 LL2302r
+BS 761 783 LL2302f
+BS 784 1296 LL2302r
+
+RD LL2302f 788 0 0
+gatggaagataacttcaATATATTACAAACTTATTAAAAGGAGTAGCTAA
+TGCAAATTCAGCAGAGAAAATAAGTGATTTGACATGATAATAACAACCAT
+ACTCGAAAACAAATGTAATTAAAACACTTGCTTCATCTCCTTTCACAGCA
+TCTTCACCTCTTCAACATTGTTGTCTTCAGTAGGTGTAGCCTTTGACGAA
+CATACCCTCCTTAGCCTCTTCAGACTCACCTTCTCCGGTGTATTTTCCGA
+GCTGAGCCAACGAATTGGCCTTGGCTCGAGTCAAGAGAGTGGTCTGAGCT
+GCGTTCACGTTCTCAGCTCTGCCTCCCCATGTTTTCAGGCAAGTGTTCTG
+AAGAGCACGTGCGTAGGAGAAGGACACGTGCCACGGGTTTGGTGCCTGGT
+TCATCGCGTTCAGGTTCAACGTTGCCTCCAACTCAGACTGTCCTCCGGAC
+AAGAACATGATTCCAGGGACGGCAGGAGGGATTCTGTTGCGGAGGAGCTT
+AAGGGTGTAGGAGGCAACTTGTTCAGGAGTAGCTCTGTCTTTAGACTCGG
+CTCCTGGAGTCACCATGCTCGGCTTCAGGAGGATACCTTCAAACATGACA
+TTGTTCTGAGCTAGGTAAAAGAAAACCTCAGCCCACACCTTCTCTGCTAC
+TTCGTAAGTCCTGTCAATGTCGTGTTCTCCATCCAACAAGATCTCTGGCT
+CCACTATGGGAACCAATCCACTGTCTTGTGAAATGGCAGCGTATCGAGCA
+AGgcccncagCAGCTTCTTTCACAGCGAGAGCAGAcgg
+
+QA 18 752 1 788
+DS CHROMAT_FILE: LL2302f PHD_FILE: LL2302f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:02 2000
+
+RD LL2302r 801 0 0
+ctTTAGACTCGGCTCCTGGAGTCACCATGCTCGGCTTCAGGAGGATACCT
+TCAAACATGACATTGTTCTGAGCTAGGTAAAAGanaacCTCAGCCCACAC
+CTTCTCTGCTACTTCGTAAGTCCTGTCAATGTCGTGTTCTCCATCCAACA
+AGATCTCTGGCTCCACTATGGGAACCAATCCACTGTCTTGTGAAATGGCA
+GCGTATCGAGCAAGGCCCCAAGCAGCTTCTTTCACAGCGAGAGCAGACGG
+ACCGTTGGGAATGCTCACCACAGTACGCCATTTGGCGAAACGAGCACCCT
+GTTGGTAGTAAGCAGCGGTCCGAGAGGATAGACCGTCAAGTCCTTGGCAC
+CATGACTCATTGTTAGATCCAACAAGTGGCACCAAACCCTTGTCGACTTT
+GATACCGGGGACGATGTTCTGCTCGACTAGGACGTCGACCATTTTCTTGC
+CTTCGGTGGTAGACTGATAGAGAGTCTCCTCGAACAGGATTGCACCGGAG
+ATGTACTGTCCGAGACCTGGTGCAGAGACAAGCAGTGTCCGGTATGCTTG
+ACGGTTTGCCTCAGTGTTCTCTAGCCCTATCGAGTCCAAACGCTTCCCGC
+ATGTGGCGTTCGACTCGTCCATCGCCAAGATTCCTCGTCCAGGAGACGCA
+ATTGTTTTCGCTGTCTTAACGAGCTCATCGGCGTAGGAAGAAGCGGCGCG
+GACGGTAAGGTAGGTGGCGCGGTTGCGTAGGACGACGGCGGCGGAAGcgg
+acgcgtggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxa
+t
+
+QA 6 747 1 758
+DS CHROMAT_FILE: LL2302r PHD_FILE: LL2302r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:39 2000
+
+CO Contig26 816 2 80 U
+cacaatagacttcattaataacaccacaacaaaataaacaatatcgagct
+ccaacctcacttttacACACAAGACAATCACAACAACAAGTACATAAACA
+AACTCAACAACAACTAAGGGCAGGGATAAAAGAACATGATATGGTGATTT
+TATTTAAAAAAAACGCTAGTCTCAGCTGCATTTTCAGTCTTGATACTCCA
+CTTCTTCCTCCTCCTCTTCTTCATACTCTCCTTCTTCATCTGCGGTTGCG
+TCTTGGTATTGCTGATACTCTGAGACCAGATCGTTCATGTTGCTCTCGGC
+TTCTGTAAACTCCATCTCGTCCATCCCTTCACCTGTGTACCAATGCAAGA
+AAGCTTTCCTCCTGAACATGGCTGTGAACTGCTCACTCACCCTCCTAAAC
+ATCTCTTGGATCGATGTGGAGTTCCCAATAAAGGTTGAGGCCATCGAGAG
+GCCTCGAGGCGCTATGTCACAGACGCTTGACTTCACGTTGTTCGGTATCC
+ATTCCACAAAGTAGGATGAGTTCTTGTTCTGCACGTTTATCATCTGTTCG
+TCCACTTCTTTTGTGCTCATTTTGCCACGGAACATTGCTGAGGCAGTCAA
+GTATCGTCCGTGACGCGGGTCTGCGGCACACATCATGTTCTTTGAATCCC
+ACATCTGTTGGGTGAGCTCAGGGACGGTGAGTGCGCGGTACTGCTGGGAG
+CCACGAGAGGTGAGAGGAGCGAAACCGACCATGAAGAAGTGGAGACGAGG
+GAAGGGGATGAGGTTCACTGCAAGCTTCCTcagatcagagttgagctgac
+cagggcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 51 51 46 51 40 40 46 48 50 40 40 40 40 51 51 56 56 56 56 51 51 40 40 40 65 67 73 68 63 59 59 59 70
+ 60 70 40 40 40 40 51 51 56 56 56 40 35 50 50 50 57 61 55 55 61 64 62 85 65 75 69 65 66 66 76 90 70 70 76 83 83 79 83 83 77 83 77 67 67 67 77 72 69 65
+ 65 66 61 61 61 66 73 73 85 82 80 61 53 56 62 58 60 64 72 68 76 76 83 89 89 75 70 78 78 89 89 90 90 90 88 88 82 88 90 90 90 85 86 83 83 83 77 70 69 83
+ 83 69 69 84 77 77 90 83 75 77 77 77 77 85 90 90 88 90 90 90 90 90 90 87 87 85 83 90 75 77 77 83 88 90 90 90 90 89 89 87 75 75 70 75 75 69 63 70 70 70
+ 72 82 79 85 85 90 90 90 89 90 90 90 90 89 89 89 83 83 83 83 79 75 68 75 75 72 75 90 85 75 79 79 82 84 84 90 90 90 90 89 90 83 83 83 83 83 90 90 90 90
+ 90 90 90 85 85 90 90 80 80 90 90 90 90 90 88 83 83 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 88 88 90 90 87 87 90 90 90 90 90
+ 90 88 88 88 88 89 89 90 90 90 90 90 90 90 90 90 90 90 90 83 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 86 86 86 86 86 89 90 90 90 90 90 90 90 90 86 83 83 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 84 84 82 78 78 82 83 83 83 90 90 90 90 90 90 90 90 90 90 87 81 81
+ 81 76 83 90 90 90 90 90 90 90 90 90 87 83 83 90 90 90 90 90 90 90 90 88 90 82 82 85 90 90 90 90 90 90 90 90 86 86 86 86 78 83 83 83 83 89 89 89 89 89
+ 89 83 72 75 75 70 75 68 68 72 70 70 72 72 72 72 74 82 89 83 75 68 68 69 69 72 78 78 78 86 86 90 90 90 88 90 83 90 90 90 90 90 90 90 90 90 85 90 90 90
+ 89 89 85 85 75 75 75 75 72 75 77 90 90 83 75 75 75 72 74 69 73 73 65 70 60 54 54 60 75 83 83 70 58 51 51 49 53 49 67 67 70 72 78 78 76 68 63 70 65 61
+ 61 54 56 60 68 68 74 65 73 67 69 72 80 90 69 64 56 59 61 61 61 54 54 54 63 70 64 64 60 55 57 58 57 61 62 66 58 59 65 65 74 65 65 57 56 66 66 68 73 73
+ 76 76 76 79 79 83 80 85 90 90 69 62 55 55 55 63 71 60 62 56 52 52 49 52 52 37 40 42 46 46 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2304f U 1
+AF LL2304r C 67
+BS 1 74 LL2304f
+BS 75 76 LL2304r
+BS 77 91 LL2304f
+BS 92 102 LL2304r
+BS 103 137 LL2304f
+BS 138 138 LL2304r
+BS 139 173 LL2304f
+BS 174 176 LL2304r
+BS 177 177 LL2304f
+BS 178 182 LL2304r
+BS 183 199 LL2304f
+BS 200 201 LL2304r
+BS 202 203 LL2304f
+BS 204 204 LL2304r
+BS 205 206 LL2304f
+BS 207 207 LL2304r
+BS 208 223 LL2304f
+BS 224 225 LL2304r
+BS 226 227 LL2304f
+BS 228 228 LL2304r
+BS 229 239 LL2304f
+BS 240 240 LL2304r
+BS 241 258 LL2304f
+BS 259 266 LL2304r
+BS 267 277 LL2304f
+BS 278 278 LL2304r
+BS 279 289 LL2304f
+BS 290 291 LL2304r
+BS 292 296 LL2304f
+BS 297 303 LL2304r
+BS 304 309 LL2304f
+BS 310 315 LL2304r
+BS 316 320 LL2304f
+BS 321 323 LL2304r
+BS 324 324 LL2304f
+BS 325 334 LL2304r
+BS 335 335 LL2304f
+BS 336 343 LL2304r
+BS 344 345 LL2304f
+BS 346 349 LL2304r
+BS 350 350 LL2304f
+BS 351 357 LL2304r
+BS 358 363 LL2304f
+BS 364 366 LL2304r
+BS 367 373 LL2304f
+BS 374 375 LL2304r
+BS 376 376 LL2304f
+BS 377 393 LL2304r
+BS 394 396 LL2304f
+BS 397 402 LL2304r
+BS 403 407 LL2304f
+BS 408 417 LL2304r
+BS 418 423 LL2304f
+BS 424 426 LL2304r
+BS 427 439 LL2304f
+BS 440 487 LL2304r
+BS 488 489 LL2304f
+BS 490 490 LL2304r
+BS 491 496 LL2304f
+BS 497 508 LL2304r
+BS 509 513 LL2304f
+BS 514 516 LL2304r
+BS 517 528 LL2304f
+BS 529 544 LL2304r
+BS 545 552 LL2304f
+BS 553 558 LL2304r
+BS 559 559 LL2304f
+BS 560 560 LL2304r
+BS 561 562 LL2304f
+BS 563 595 LL2304r
+BS 596 601 LL2304f
+BS 602 618 LL2304r
+BS 619 619 LL2304f
+BS 620 642 LL2304r
+BS 643 646 LL2304f
+BS 647 661 LL2304r
+BS 662 663 LL2304f
+BS 664 678 LL2304r
+BS 679 680 LL2304f
+BS 681 816 LL2304r
+
+RD LL2304f 779 0 0
+cacaatagacttcattaataacaccacaacaaaataaacaatatcgagct
+ccaacctcacttttacACACAAGACAATCACAACAACAAGTACATAAACA
+AACTCAACAACAACTAAGGGCAGGGATAAAAGAACATGATATGGTGATTT
+TATTTAAAAAAAACGCTAGTCTCAGCTGCATTTTCAGTCTTGATACTCCA
+CTTCTTCCTCCTCCTCTTCTTCATACTCTCCTTCTTCATCTGCGGTTGCG
+TCTTGGTATTGCTGATACTCTGAGACCAGATCGTTCATGTTGCTCTCGGC
+TTCTGTAAACTCCATCTCGTCCATCCCTTCACCTGTGTACCAATGCAAGA
+AAGCTTTCCTCCTGAACATGGCTGTGAACTGCTCACTCACCCTCCTAAAC
+ATCTCTTGGATCGATGTGGAGTTCCCAATAAAGGTTGAGGCCATCGAGAG
+GCCTCGAGGCGCTATGTCACAGACGCTTGACTTCACGTTGTTCGGTATCC
+ATTCCACAAAGTAGGATGAGTTCTTGTTCTGCACGTTTATCATCTGTTCG
+TCCACTTCTTTTGTGCTCATTTTGCCACGGAACATTGCTGAGGCAGTCAA
+GTATCGTCCGTGACGCGGGTCTGCGGCACACATCATGTTCTTTGAATCCC
+ACATCTGTTGGGTGAGCTCAGGGACGGTGAGTGCGCGGTACTGCTGGGAG
+CCACGAGAGGTGAGAGGAGCGAAACCGACCATGAAGAAGTGGAGACGAGG
+GAAGGGGATGAGGTTCACTGCAAGCTtcn
+
+QA 67 775 1 778
+DS CHROMAT_FILE: LL2304f PHD_FILE: LL2304f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:33 2000
+
+RD LL2304r 794 0 0
+acACAAGACAATCACAacancaAGTACATAAACAAACTcaccaACAACTA
+AGGGCAGGGATAAAAGAACATGATATGGTGATTTTATTTAAAAAAAACGC
+TAGTCTCAGCTGCATTTTCAGTCTTGATACTCCACTTCTTCCTCCTCCTC
+TTCTTCATACTCTCCTTCTTCATCTGCGGTTGCGTCTTGGTATTGCTGAT
+ACTCTGAGACCAGATCGTTCATGTTGCTCTCGGCTTCTGTAAACTCCATC
+TCGTCCATCCCTTCACCTGTGTACCAATGCAAGAAAGCTTTCCTCCTGAA
+CATGGCTGTGAACTGCTCACTCACCCTCCTAAACATCTCTTGGATCGATG
+TGGAGTTCCCAATAAAGGTTGAGGCCATCGAGAGGCCTCGAGGCGCTATG
+TCACAGACGCTTGACTTCACGTTGTTCGGTATCCATTCCACAAAGTAGGA
+TGAGTTCTTGTTCTGCACGTTTATCATCTGTTCGTCCACTTCTTTTGTGC
+TCATTTTGCCACGGAACATTGCTGAGGCAGTCAAGTATCGTCCGTGACGC
+GGGTCTGCGGCACACATCATGTTCTTTGAATCCCACATCTGTTGGGTGAG
+CTCAGGGACGGTGAGTGCGCGGTACTGCTGGGAGCCACGAGAGGTGAGAG
+GAGCGAAACCGACCATGAAGAAGTGGAGACGAGGGAAGGGGATGAGGTTC
+ACTGCAAGCTTCCTcagatcagagttgagctgaccagggcggacgcgtgg
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 48 714 1 750
+DS CHROMAT_FILE: LL2304r PHD_FILE: LL2304r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:11 2000
+
+CO Contig27 547 2 10 U
+CTTTTTTTTTTTTTTTTAAACGAAGAAGCAAAAACTCAAGTTAATCCAAA
+AAATATTCAGAATTATTACACGATAAAAGTTTTTTAGACAAAGATAGATT
+TTGTTCTATATGAAGTACAATCGATGAAGAATTTACTTTTAAGTGATCAC
+TGATCTAAGTACTTCACGGGAACAAGTAGAAGCTCACTTTTCTTGCTCAC
+AGTAAGCCCAGGAGCTTCCTCCATGTCGACATCTTCCACCACCATGCCTT
+CAGGTAACTCCCAGTCAAAATGATACAATAGACTCGCCAGACCAAACTCC
+ACCGAAGTCGTCCCCATGTATATTCCAGGACAGATTCTCCTACCACTCCC
+AAACGACAACAGCTCAAAG**TTTTGTCCCTTTGCATCAATGTTATTATC
+CATAAACCTCTCGGGGAAAAACATCTCTGGATCCTTCCAGGTATCCGGAT
+CACGCCCAATAGCCCAGACATTCACATGAAGACGTGTCTTGACGGGAATC
+GTGTAGCCGTCGATCTCAAACTCCGAGGTTACTTCGGACGCGTGGga
+
+BQ
+ 46 56 56 56 56 56 56 56 56 56 56 56 56 56 46 40 40 40 40 40 45 45 45 45 39 39 35 35 35 39 40 56 56 56 56 56 51 45 45 39 39 39 40 40 40 56 56 56 56 56
+ 56 51 51 51 51 60 60 60 60 70 78 74 73 74 74 80 83 86 75 77 79 79 74 80 90 90 82 80 72 72 72 78 75 86 83 85 85 90 85 85 85 88 83 76 71 66 60 60 71 71
+ 71 56 56 56 56 56 56 56 56 56 56 45 45 45 45 45 45 51 66 66 66 66 75 69 73 86 86 71 71 71 56 51 45 45 45 45 60 66 79 71 70 74 67 70 60 72 72 77 73 68
+ 74 74 74 76 83 83 83 80 80 77 76 76 72 90 80 85 85 80 66 66 66 66 51 51 45 45 45 60 58 59 70 70 74 77 85 85 85 81 80 79 90 90 90 90 90 90 85 85 66 58
+ 43 43 43 43 43 43 60 60 83 81 80 72 68 72 62 62 66 73 75 86 90 90 87 81 67 63 62 62 65 69 80 85 85 85 85 90 90 90 90 90 90 90 90 82 80 77 77 67 64 60
+ 60 62 68 68 76 69 71 80 87 87 86 85 87 80 73 73 78 89 90 90 90 88 90 90 84 78 78 68 68 66 78 78 75 69 69 69 69 72 74 80 86 86 90 90 87 80 80 80 80 80
+ 88 83 80 74 70 62 58 58 58 58 43 43 45 56 56 45 60 60 60 60 66 56 56 56 56 56 56 71 66 76 80 80 80 90 90 89 90 81 75 60 62 65 74 76 79 79 85 85 90 90
+ 90 78 78 80 77 78 74 86 60 60 45 45 45 45 56 45 43 43 43 43 43 51 51 51 66 66 66 80 67 58 58 58 58 59 58 69 83 75 66 66 66 67 75 70 70 70 69 69 72 81
+ 84 86 80 83 74 74 74 72 72 67 72 67 72 78 88 82 90 90 71 71 56 56 56 51 51 66 66 66 78 66 66 75 77 71 66 66 45 40 40 40 40 39 54 60 61 58 55 61 50 55
+ 55 58 54 58 68 68 68 68 57 57 52 62 59 63 65 69 75 73 70 70 70 67 55 55 55 55 67 59 66 50 50 50 35 35 40 40 40 46 40 40 40 40 40 45 51 40 39 39 39 35
+ 32 32 29 29 29 32 32 35 35 35 35 40 46 46 68 68 59 62 56 56 49 54 65 64 64 57 61 39 41 40 39 39 39 51 51 51 51 39 39 34 34 33 32 0 0
+
+AF LL2305r C -196
+AF LL2305f U 36
+BS 1 262 LL2305r
+BS 263 263 LL2305f
+BS 264 516 LL2305r
+BS 517 529 LL2305f
+BS 530 530 LL2305r
+BS 531 531 LL2305f
+BS 532 539 LL2305r
+BS 540 541 LL2305f
+BS 542 545 LL2305r
+BS 546 547 LL2305f
+
+RD LL2305r 787 0 2
+gtgcgggcctcttcgctattacgccagctggcgaaagggggatgtgctgc
+aaggcgattaagttgggtaacgccagggttttxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxCTT
+TTTTTTTTTTTTTTAAACGAAGAAGCAAAAACTCAAGTTAATCCAAAAAA
+TATTCAGAATTATTACACGATAAAAGTTTTTTAGACAAAGATAGATTTTG
+TTCTATATGAAGTACAATCGATGAAGAATTTACTTTTAAGTGATCACTGA
+TCTAAGTACTTCACGGGAACAAGTAGAAGCTCACTTTTCTTGCTCACAGT
+AAGCCCAGGAGCTTCCTCCATGTCGACATCTTCCACCACCATGCCTTCAG
+GTAACTCCCAGTCAAAATGATACAATAGACTCGCCAGACCAAACTCCACC
+GAAGTCGTCCCCATGTATATTCCAGGACAGATTCTCCTACCACTCCCAAA
+CGACAACAGCTCAAAG**TTTTGTCCCTTTGCATCAATGTTATTATCCAT
+AAACCTCTCGGGGAAAAACATCTCTGGATCCTTCCAGGTATCCGGATCAC
+GCCCAATAGCCCAGACATTCACATGAAGACGTGTCTTGACGGGAATCGTG
+TAGCCGTCGATCTCAAACTCCGAGGTTACTTCGGACGCGTGGxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaatt
+
+QA 13 742 198 742
+DS CHROMAT_FILE: LL2305r PHD_FILE: LL2305r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:16 2000
+
+RT{
+LL2305r chimera phrap 1 82 000919:094547
+}
+
+RT{
+LL2305r matchElsewhereHighQual phrap 1 82 000919:094547
+}
+
+RD LL2305f 768 0 2
+tcatgttattccAAaAactaTTCAGAATTATTACACGATAAAAGTTTTTT
+AGACAAAGATAGATTTtgctctAtctGAagcaCAATCGATGAAGAATtca
+cTTTTAAGTGATCACTGATCTAAGTACTTCACGGGAACacgTAGAAGCTC
+ACTTTTCTTGCTCACAgcaaGCCCAGGAGCTTCCTCCATGTCGACATCTT
+CCACCACCATGCCTTCAGGTAACTCCCAGTCAAAATGATACAATAGACTC
+GCCAGACCAAACTCCACCGAAGTCGtcctcaTGTATAtttcaGGACAGAT
+TCTCCTACCACTCCCAAACGACAACagttcaaacatttttgTCCCTTTGC
+ATCAATGTTATTATCCATAAACCTCTCGGGGAAAAACatatcTGGATCCT
+TCCAggcATCCGGATCACGCCCAATAGCCCAGACATTCACATGAAGACGt
+gcctTGAcggcaatcgtgcaccCgttcatCTCAAACTCCGAGGTTACTta
+ggaCGCGtgGgaxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxtgcgaaatcgtcatcggctcacaatcgcacacaccatac
+tcacccgcacgtccaatactgtacagtctggggcgcctaatgagcgagct
+aactcacattaattcgcgctgcgtccactgcccgattttcacgccggaaa
+cctgttttgccacttgcg
+
+QA 20 449 1 512
+DS CHROMAT_FILE: LL2305f PHD_FILE: LL2305f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:37 2000
+
+RT{
+LL2305f chimera phrap 678 762 000919:094547
+}
+
+RT{
+LL2305f matchElsewhereLowQual phrap 685 714 000919:094547
+}
+
+CO Contig28 692 2 131 U
+ctttttagcgacTATAATATGTTTCAAACGAAAGGAAGACCCACAGAGGC
+AGAAACAAGCAGAAGAAACAAAGATTGGTGACTAGATTGGGTAACTTGCC
+ATACAAGTCAAACATATACATAAACGAGAGAGATATAGTTTGAATGAATC
+AGCCAAAGAAGCTTGGATCGTAGCCATTGCTAGAAGTGGCCAAAACATAG
+TAAGCAACGATGTGACCAATAGAACCCCAAGCAAGAACATCCACGATGTT
+GAATCCAACAGGGTCGTTCGATTTCAAGAGGCTTACGTACTCCTTGGCAC
+GATCATCTCCCGCTTCGAAGTGGGTCTTCCCGTTCTGCTCCGGCACCTGT
+TTAGCCACATTCTCTCTCTGGAAGTTGAAGAAGACGAACCGGCCTAGGAA
+GAGGGAGAGACCTGTGCTGAGGCTAATAACGACGGATGGGCTGAGCTCGG
+CTCTCACGACGGCGGAGGAGGAAGATCTTCTTCCGGAGGTGGTGGATAGC
+TTGGGTAAGGCAGATGAGGAGCCACCGAGGCGGAGAGGACGGAGGCCGTG
+GAATGAGATGGAGTTCGGGTGTTTCTGGGAGATTGTAGAGGAGAAAGTTG
+TGGGTGTGAGCAAAGCAGATGCGCTTGTCGCCATGTTATCTTCTTCTTCT
+GTGGAGAGTTTCAGTGCAACCCACGCGTCCGCGGACGCGTGG
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 27 27 54 55 55 52 55 55 55 55 53 53 53 55 52 56 59 61 54 55 51 49 49 48 53 54 77 52 52 58 59 54 57 52 54 48 48 58
+ 71 67 64 51 51 63 71 66 90 86 90 84 80 80 72 64 54 54 65 69 69 88 88 79 74 74 75 71 71 70 75 75 75 75 75 69 71 74 75 71 69 65 55 57 60 75 64 67 67 75
+ 75 68 80 74 80 80 80 80 75 86 85 90 79 88 90 88 88 88 74 77 76 74 72 72 77 81 81 86 90 90 90 90 90 79 75 75 75 78 75 75 75 75 75 75 80 88 90 85 75 88
+ 75 75 75 88 90 90 90 90 88 80 80 80 80 86 88 90 75 80 70 70 70 70 72 72 80 90 90 90 87 87 87 87 76 80 90 83 77 71 83 83 83 83 90 90 90 78 77 79 81 81
+ 78 82 78 78 86 78 80 89 89 89 85 90 90 90 90 90 90 83 81 86 90 82 82 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 90 89 89 89 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 86 86 83 83 81 81 88 90 90 90 90 90 90 90 90 90 90 89 89 83 81 79 79 79 79 79 81 90 90 90 90 90 86 86 88 83 86 86
+ 88 88 88 90 90 90 90 90 83 81 81 80 77 81 90 90 87 85 89 90 90 90 90 90 90 90 90 90 90 86 83 84 84 84 90 90 90 90 88 88 86 86 83 83 90 87 90 90 90 81
+ 85 85 84 84 84 84 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 81 79 79 79 81 87 83 88 88 83 83 83 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 83 86 83 85 83 86 84 81 81 86 88 88 86 86 85 88 88 88 88 90 84 84 88 90 90 88 88 87 88 90 90 86 86 86 88 88 83 83 83 88 86 80 80
+ 80 88 88 90 88 88 85 85 85 83 83 83 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 87 88 90 90 90 90 90 90 90 90 90 86 83 78 78 78
+ 80 90 89 90 90 90 88 88 88 88 88 83 86 90 90 90 87 85 85 85 85 85 80 80 80 80 82 76 76 83 88 88 88 88 90 87 89 80 83 80 80 83 80 83 83 83 88 88 85 85
+ 90 90 90 90 90 90 90 90 90 90 90 90 77 75 75 80 88 88 90 90 90 82 82 82 90 90 90 90 90 90 85 85 90 90 86 87 87 79 75 90 90 80 80 65 69 88 77 73 73 83
+ 80 80 88 88 88 80 80 75 90 75 72 75 75 75 73 77 79 79 65 70 68 72 72 72 75 75 74 63 63 63 63 64 68 90 65 62 73 78 78 78 86 86 86 86 86 86 86 90 77 74
+ 71 74 66 80 83 90 90 90 80 80 74 69 67 64 71 60 62 63 63 85 86 72 67 67 62 62 62 59 45 45 53 53 55 63 69 69 49 49 49 34 33 32
+
+AF LL2308r C -58
+AF LL2308f U 1
+BS 1 12 LL2308f
+BS 13 14 LL2308r
+BS 15 38 LL2308f
+BS 39 39 LL2308r
+BS 40 44 LL2308f
+BS 45 47 LL2308r
+BS 48 61 LL2308f
+BS 62 62 LL2308r
+BS 63 71 LL2308f
+BS 72 73 LL2308r
+BS 74 80 LL2308f
+BS 81 85 LL2308r
+BS 86 95 LL2308f
+BS 96 96 LL2308r
+BS 97 99 LL2308f
+BS 100 101 LL2308r
+BS 102 109 LL2308f
+BS 110 110 LL2308r
+BS 111 111 LL2308f
+BS 112 118 LL2308r
+BS 119 119 LL2308f
+BS 120 120 LL2308r
+BS 121 122 LL2308f
+BS 123 147 LL2308r
+BS 148 148 LL2308f
+BS 149 157 LL2308r
+BS 158 158 LL2308f
+BS 159 168 LL2308r
+BS 169 175 LL2308f
+BS 176 183 LL2308r
+BS 184 186 LL2308f
+BS 187 187 LL2308r
+BS 188 192 LL2308f
+BS 193 197 LL2308r
+BS 198 207 LL2308f
+BS 208 211 LL2308r
+BS 212 212 LL2308f
+BS 213 215 LL2308r
+BS 216 216 LL2308f
+BS 217 217 LL2308r
+BS 218 223 LL2308f
+BS 224 227 LL2308r
+BS 228 237 LL2308f
+BS 238 254 LL2308r
+BS 255 260 LL2308f
+BS 261 261 LL2308r
+BS 262 264 LL2308f
+BS 265 270 LL2308r
+BS 271 271 LL2308f
+BS 272 274 LL2308r
+BS 275 275 LL2308f
+BS 276 298 LL2308r
+BS 299 300 LL2308f
+BS 301 303 LL2308r
+BS 304 304 LL2308f
+BS 305 318 LL2308r
+BS 319 323 LL2308f
+BS 324 324 LL2308r
+BS 325 325 LL2308f
+BS 326 329 LL2308r
+BS 330 330 LL2308f
+BS 331 340 LL2308r
+BS 341 342 LL2308f
+BS 343 360 LL2308r
+BS 361 366 LL2308f
+BS 367 369 LL2308r
+BS 370 381 LL2308f
+BS 382 395 LL2308r
+BS 396 407 LL2308f
+BS 408 408 LL2308r
+BS 409 409 LL2308f
+BS 410 412 LL2308r
+BS 413 413 LL2308f
+BS 414 416 LL2308r
+BS 417 417 LL2308f
+BS 418 426 LL2308r
+BS 427 427 LL2308f
+BS 428 429 LL2308r
+BS 430 432 LL2308f
+BS 433 436 LL2308r
+BS 437 438 LL2308f
+BS 439 441 LL2308r
+BS 442 443 LL2308f
+BS 444 446 LL2308r
+BS 447 448 LL2308f
+BS 449 466 LL2308r
+BS 467 467 LL2308f
+BS 468 468 LL2308r
+BS 469 474 LL2308f
+BS 475 475 LL2308r
+BS 476 476 LL2308f
+BS 477 483 LL2308r
+BS 484 484 LL2308f
+BS 485 486 LL2308r
+BS 487 496 LL2308f
+BS 497 502 LL2308r
+BS 503 511 LL2308f
+BS 512 512 LL2308r
+BS 513 516 LL2308f
+BS 517 527 LL2308r
+BS 528 529 LL2308f
+BS 530 530 LL2308r
+BS 531 536 LL2308f
+BS 537 546 LL2308r
+BS 547 548 LL2308f
+BS 549 550 LL2308r
+BS 551 574 LL2308f
+BS 575 576 LL2308r
+BS 577 580 LL2308f
+BS 581 582 LL2308r
+BS 583 588 LL2308f
+BS 589 595 LL2308r
+BS 596 596 LL2308f
+BS 597 600 LL2308r
+BS 601 618 LL2308f
+BS 619 619 LL2308r
+BS 620 620 LL2308f
+BS 621 621 LL2308r
+BS 622 624 LL2308f
+BS 625 625 LL2308r
+BS 626 627 LL2308f
+BS 628 649 LL2308r
+BS 650 650 LL2308f
+BS 651 652 LL2308r
+BS 653 658 LL2308f
+BS 659 663 LL2308r
+BS 664 665 LL2308f
+BS 666 674 LL2308r
+BS 675 677 LL2308f
+BS 678 691 LL2308r
+BS 692 692 LL2308f
+
+RD LL2308r 795 0 0
+tnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
+nnnntntnntnnaccacGAAATATAATATGTTTCAAACGAAAGGAAGACC
+CACAGAGGCAGAAACAAGCAGAAGAAACAAAGATTGGTGACTAGATTGGG
+TAACTTGCCATACAAGTCAAACATATACATAAACGAGAGAGATATAGTTT
+GAATGAATCAGCCAAAGAAGCTTGGATCGTAGCCATTGCTAGAAGTGGCC
+AAAACATAGTAAGCAACGATGTGACCAATAGAACCCCAAGCAAGAACATC
+CACGATGTTGAATCCAACAGGGTCGTTCGATTTCAAGAGGCTTACGTACT
+CCTTGGCACGATCATCTCCCGCTTCGAAGTGGGTCTTCCCGTTCTGCTCC
+GGCACCTGTTTAGCCACATTCTCTCTCTGGAAGTTGAAGAAGACGAACCG
+GCCTAGGAAGAGGGAGAGACCTGTGCTGAGGCTAATAACGACGGATGGGC
+TGAGCTCGGCTCTCACGACGGCGGAGGAGGAAGATCTTCTTCCGGAGGTG
+GTGGATAGCTTGGGTAAGGCAGATGAGGAGCCACCGAGGCGGAGAGGACG
+GAGGCCGTGGAATGAGATGGAGTTCGGGTGTTTCTGGGAGATTGTAGAGG
+AGAAAGTTGTGGGTGTGAGCAAAGCAGATGCGCTTGTCGCCATGTTATCT
+TCTTCTTCTGTGGAGAGTTTCAGTGCAACCCACGCGTCCGCGGACGCGTG
+Gxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 68 751 72 751
+DS CHROMAT_FILE: LL2308r PHD_FILE: LL2308r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:29 2000
+
+RD LL2308f 791 0 0
+ctttttagcgactATAATATGTTTCAAACGAAAGGAAGACCCACAGAGGC
+AGAAACAAGCAGAAGAAACAAAGATTGGTGACTAGATTGGGTAACTTGCC
+ATACAAGTCAAACATATACATAAACGAGAGAGATATAGTTTGAATGAATC
+AGCCAAAGAAGCTTGGATCGTAGCCATTGCTAGAAGTGGCCAAAACATAG
+TAAGCAACGATGTGACCAATAGAACCCCAAGCAAGAACATCCACGATGTT
+GAATCCAACAGGGTCGTTCGATTTCAAGAGGCTTACGTACTCCTTGGCAC
+GATCATCTCCCGCTTCGAAGTGGGTCTTCCCGTTCTGCTCCGGCACCTGT
+TTAGCCACATTCTCTCTCTGGAAGTTGAAGAAGACGAACCGGCCTAGGAA
+GAGGGAGAGACCTGTGCTGAGGCTAATAACGACGGATGGGCTGAGCTCGG
+CTCTCACGACGGCGGAGGAGGAAGATCTTCTTCCGGAGGTGGTGGATAGC
+TTGGGTAAGGCAGATGAGGAGCCACCGAGGCGGAGAGGACGGAGGCCGTG
+GAATGAGATGGAGTTCGGGTGTTTCTGGGAGATTGTAGAGGAGAAAGTTG
+TGGGTGTGAGCAAAGCAGATGCGCTTGTCGCCATGTTATCTTCTTCTTCT
+GTGGAGAGTTTCAGTGCAACCCACGCGTCCGCGGACGCGTGGxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+QA 13 692 1 692
+DS CHROMAT_FILE: LL2308f PHD_FILE: LL2308f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:52 2000
+
+CO Contig29 904 2 54 U
+ctggaaatttagaaaatatgaccttaaatcataatcattcgttcgggaaa
+aaagaaatacattttactcaacaaaataaaTATAAATACATTTTACCCGA
+GCATAGTAAACATTTCAAAGATTGAGAAATCTTTGAAGACTCATCACTCA
+TATGATTCTACACCGTCAAGTACTTTAGAACTGAAGCACGCACCTTCTCC
+ATGTAATCTCCTGCTTCTTTCTGGTTTCCTCTGTATGCATTTGCTATGTT
+TTCATCAAGAAGTGTCAACAAAGACCTGTTGATCTCGTTTTTCTCCACCA
+TATCCAACAATGTTGCTTTTATGTCGGTTGAGGTTAATATCTTGGTTAAG
+CTATTTGTAGCGGTCATAAGCTCCTTTTGCATTTTGTCATAAGCCTCTAT
+TCCTTCTTCTAAGGCTTTTTGAAGTGATTCAAGCTCAATCAATCTGTCTT
+CATCTTCCGCGGTTTTTGTAACCGCAAACCGGATTTGCCCGATTTCAAGT
+CGAATTTGCGCAAAAAACTCGTCATTTAACCTTCCACGAAGCCTAGCGAT
+TTCAAACTCAATCTCCTGAGCTTCCGTATCAAGAAAGAACTCAATGAGCT
+CAGCGGAAGTGTCTGGTACAACTCTAGACTCTCTAAGAGCTTGACGACGC
+TCTCTCTCTTCACGGAGTTGTTTCTGAAAAGCCTCTTTAGCCTCGGAGTC
+TCTCTCTAGCCTGCGTTTGAACTCAAGGCGAGCGATATGACCAGTTTGAG
+CAGGACCTAAGATACCTTTCGGATCCCATTCGCAGGAGATTTTGCCCCTT
+CTTGTATTAAATCTCGGGATTGTACGGACGGTGTTAGCAAAGGCGATTCC
+TATTGAGAGCGAAGCCATtgttatttttatttttttgtgggcgcggacgc
+gtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 37 32 32 29 29 29 35 35 35 35 26 26 26 35 35 35 32 32 32
+ 32 35 35 35 35 29 23 23 25 25 42 35 39 35 35 35 35 35 35 37 37 29 29 29 29 32 32 36 36 37 37 37 37 35 35 35 35 32 35 35 35 39 35 35 32 32 35 32 35 35
+ 37 37 40 40 40 35 35 35 35 35 37 35 29 30 29 29 23 31 29 35 47 47 56 49 39 41 35 40 33 45 48 60 60 54 57 54 53 53 49 59 55 55 50 59 59 67 66 65 58 58
+ 64 77 77 79 79 71 61 68 57 59 58 50 55 51 54 47 35 35 37 35 37 48 51 52 59 52 58 67 75 75 73 90 73 75 75 82 82 86 82 71 64 64 66 72 72 61 67 74 90 90
+ 76 81 85 89 85 81 83 90 84 58 61 54 65 65 73 71 70 66 79 82 81 79 79 79 80 75 75 79 81 81 90 88 87 78 78 78 78 83 86 90 90 90 86 86 89 90 89 79 80 83
+ 90 90 90 90 90 90 82 77 86 84 87 87 90 83 79 79 83 80 83 90 86 90 80 86 73 73 67 67 67 72 73 77 66 62 68 73 79 82 90 90 86 75 73 73 67 67 73 73 70 70
+ 75 80 86 89 86 90 77 75 72 72 73 75 80 86 86 88 90 90 86 86 90 90 90 90 90 90 88 85 90 90 90 90 90 90 90 90 90 75 77 80 75 82 82 81 81 90 90 90 90 90
+ 89 90 90 90 90 90 90 86 86 86 88 86 87 90 90 90 90 90 90 90 90 87 80 78 80 85 90 90 90 89 89 86 84 86 86 90 90 90 90 90 90 88 88 80 85 85 85 87 90 90
+ 90 90 90 90 90 88 80 78 78 79 79 79 90 85 88 89 79 72 69 69 76 76 83 78 78 78 78 86 90 90 88 90 88 90 90 90 90 86 86 88 90 90 90 90 90 89 89 90 82 75
+ 75 86 84 87 89 86 72 72 72 74 75 80 89 90 90 90 85 83 83 78 78 76 78 86 86 83 83 83 86 86 90 90 88 90 90 80 83 79 76 76 72 73 72 76 65 69 69 74 74 80
+ 90 90 90 90 86 88 90 89 90 89 90 90 90 88 88 88 87 87 80 80 66 70 62 67 70 76 85 89 90 90 90 90 90 88 90 84 85 80 81 85 85 85 90 78 71 73 71 73 80 80
+ 78 75 72 72 72 78 78 85 70 70 65 65 65 76 68 61 61 70 75 85 90 90 80 80 80 79 74 69 69 63 65 65 69 70 74 65 68 68 69 69 76 88 78 85 84 82 72 68 66 68
+ 58 60 66 66 66 69 69 84 84 81 80 81 83 77 77 84 89 80 80 81 70 64 60 66 66 76 74 68 72 83 83 70 74 60 60 58 69 69 68 72 72 85 81 75 77 89 76 76 65 65
+ 61 74 68 73 65 70 73 76 73 73 72 72 72 72 65 62 66 72 72 69 70 76 83 83 76 68 61 62 64 66 66 71 71 77 72 74 88 88 85 85 72 65 63 59 58 58 58 64 71 74
+ 74 68 58 60 60 66 66 60 58 58 58 58 58 66 66 51 46 43 43 43 43 43 43 51 51 56 56 51 43 35 35 35 35 35 38 45 45 45 45 45 51 40 40 40 40 40 40 56 45 45
+ 45 45 35 35 35 35 35 35 35 39 39 40 39 39 39 39 39 40 45 40 40 40 40 40 40 45 45 45 45 45 45 45 39 35 35 35 35 35 35 35 34 31 31 31 31 31 40 51 40 40
+ 40 40 40 46 46 39 34 34 34 33 32 33 40 40 40 45 46 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0
+
+AF LL2309f U 1
+AF LL2309r C 168
+BS 1 176 LL2309f
+BS 177 179 LL2309r
+BS 180 180 LL2309f
+BS 181 181 LL2309r
+BS 182 186 LL2309f
+BS 187 189 LL2309r
+BS 190 196 LL2309f
+BS 197 200 LL2309r
+BS 201 210 LL2309f
+BS 211 211 LL2309r
+BS 212 212 LL2309f
+BS 213 213 LL2309r
+BS 214 224 LL2309f
+BS 225 225 LL2309r
+BS 226 227 LL2309f
+BS 228 231 LL2309r
+BS 232 232 LL2309f
+BS 233 235 LL2309r
+BS 236 240 LL2309f
+BS 241 246 LL2309r
+BS 247 248 LL2309f
+BS 249 250 LL2309r
+BS 251 254 LL2309f
+BS 255 255 LL2309r
+BS 256 261 LL2309f
+BS 262 262 LL2309r
+BS 263 271 LL2309f
+BS 272 280 LL2309r
+BS 281 281 LL2309f
+BS 282 300 LL2309r
+BS 301 301 LL2309f
+BS 302 303 LL2309r
+BS 304 304 LL2309f
+BS 305 337 LL2309r
+BS 338 338 LL2309f
+BS 339 366 LL2309r
+BS 367 368 LL2309f
+BS 369 391 LL2309r
+BS 392 393 LL2309f
+BS 394 401 LL2309r
+BS 402 405 LL2309f
+BS 406 420 LL2309r
+BS 421 421 LL2309f
+BS 422 447 LL2309r
+BS 448 449 LL2309f
+BS 450 462 LL2309r
+BS 463 463 LL2309f
+BS 464 497 LL2309r
+BS 498 498 LL2309f
+BS 499 512 LL2309r
+BS 513 513 LL2309f
+BS 514 695 LL2309r
+BS 696 696 LL2309f
+BS 697 904 LL2309r
+
+RD LL2309f 768 0 0
+ctggaaatttagaaaatatgaccttaaatcataatcattcgttcgggaaa
+aaagaaatacattttactcaacaaaataaaTATAAATACATTTTACCCGA
+GCATAGTAAACATTTCAAAGATTGAGAAATCTTTGAAGACTCATCACTCA
+TATGATTCTACACCGTCAAGTACTTTAGAACTGAAGCACGCACCTTCTCC
+ATGTAATCTCCTGCTTCTTTCTGGTTTCCTCTGTATGCATTTGCTATGTT
+TTCATCAAGAAGTGTCAACAAAGACCTGTTGATCTCGTTTTTCTCCACCA
+TATCCAACAATGTTGCTTTTATGTCGGTTGAGGTTAATATCTTGGTTAAG
+CTATTTGTAGCGGTCATAAGCTCCTTTTGCATTTTGTCATAAGCCTCTAT
+TCCTTCTTCTAAGGCTTTTTGAAGTGATTCAAGCTCAATCAATCTGTCTT
+CATCTTCCGCGGTTTTTGTAACCGCAAACCGGATTTGCCCGATTTCAAGT
+CGAATTTGCGCAAAAAACTCGTCATTTAACCTTCCACGAAGCCTAGCGAT
+TTCAAACTCAATCTCCTGAGCTTCCGTATCAAGAAAGAACTCAATGAGCT
+CAGCGGAAGTGTCTGGTACAACTCTAGACTCTCTAAGAGCTTGACGACGC
+TCTCTCTCTTCACGGAGTTGTTTCTGAAAAGCCTCTTTAGCCTCGGAGTC
+TCTCTCTAGCCTGCGTTTGAACTCAAGGCGAGCGATATGACCAGTTTGAG
+CAGGACCTAAGATACcTt
+
+QA 81 768 1 768
+DS CHROMAT_FILE: LL2309f PHD_FILE: LL2309f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:57 2000
+
+RD LL2309r 781 0 0
+nagTACTTTAGAACTGAAGCACGCACCTTCTCCATGTAATCTCCTGCTTC
+tntcTGGTTTCCTCTGTATGCATTTGCTATGTTTTCATCAAGAAGTGTCA
+ACAAAGACCTGTTGATCTCGTTTTTCTCCACCATATCCAACAATGTTGCT
+TTTATGTCGGTTGAGGTTAATATCTTGGTTAAGCTATTTGTAGCGGTCAT
+AAGCTCCTTTTGCATTTTGTCATAAGCCTCTATTCCTTCTTCTAAGGCTT
+TTTGAAGTGATTCAAGCTCAATCAATCTGTCTTCATCTTCCGCGGTTTTT
+GTAACCGCAAACCGGATTTGCCCGATTTCAAGTCGAATTTGCGCAAAAAA
+CTCGTCATTTAACCTTCCACGAAGCCTAGCGATTTCAAACTCAATCTCCT
+GAGCTTCCGTATCAAGAAAGAACTCAATGAGCTCAGCGGAAGTGTCTGGT
+ACAACTCTAGACTCTCTAAGAGCTTGACGACGCTCTCTCTCTTCACGGAG
+TTGTTTCTGAAAAGCCTCTTTAGCCTCGGAGTCTCTCTCTAGCCTGCGTT
+TGAACTCAAGGCGAGCGATATGACCAGTTTGAGCAGGACCTAAGATACCT
+TTCGGATCCCATTCGCAGGAGATTTTGCCCCTTCTTGTATTAAATCTCGG
+GATTGTACGGACGGTGTTAGCAAAGGCGATTCCTATTGAGAGCGAAGCCA
+Ttgttatttttatttttttgtgggcgcggacgcgtggxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 3 701 2 737
+DS CHROMAT_FILE: LL2309r PHD_FILE: LL2309r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:34 2000
+
+CO Contig30 1421 2 24 U
+ttcatatattaaaggctTATTAAAAAGGAGTAGCTAATGCAAATTTAGTA
+GAGAAAGTAAATGATTTGACATGATAATAACAACCAAACTCAAACAAATG
+CAACCAAGACACTTGCTTCGTCTCCTTTCACAGCATCATGATCGCTTCTT
+CAACATTGTTGTCTTTAATAGGTGTAGCCTTTGACGAACATACCCTCCTT
+AGCCTCTTCAGACTCGCCTTCTCCGCTGTATTTTCCAAGCTGAGCCAACG
+AGTTGGCCTTGGCACGAGCCAAGAGAGTGGTCTGAGCAGCGTTCACGTTC
+TCAGCTCTGCCTCCCCATGTTTTCAGACAAGTGTTCTGGAGAGCACGTGC
+GTAGGAGAAGGACACGTGCCATGGGTTTGGTGCCTGGTTCATCGCGTTGA
+GGTTCAACGTTGCCTCCAACTCAGACTGTCCTCCAGACAAGAACATGATT
+CCGGGGACGGCAGGAGGGATTCTGTTGCGGAGGAGCTTAAGGGTGTAGGC
+GGCAACTTGTTCAGGAGTAGCTCTGTCTTTAGACTCGGCTCCAGGAGTCA
+CCATGCTCGGCTTCAGGAGGATACCTTCAAACATGACATTGTTCTGAGCA
+AGGTAAAAGAAAACCTCAGCCCAAACCTTCTCAGCTACTTCGTAAGTCCT
+GTCAATGTCGTGTTCTccGTCCAACAAGATCTCTGGCTCCACAATTGGGA
+CCAATCCACTGTCCTGTGAAATGGCAGCGTAGCGAGCAAGGCCCCAAGCA
+GCTTCTTTCACAGCGAGCGCAGACGGGCCGTTGGGAATGCTCACGACAGT
+ACGCCATttaGCGAAACGAGCACCCTGTTGGTAGTAAGCAGCAGTGCGAG
+AGGATAGACCATCGAGTCCTTGACACCATGACTCATTGTTAGAGCCAACA
+AGTGGCACCAAACCCTTGTCAACTTTGATACCGGGGACGATGTTCTGCTC
+GACGAGGACGTCGACCATTTTCTTTCCTTCGGTGGTAGACTGATAGAGAG
+TCTCCTCGAACAAGATTGCCCCGGATATGTACTGTCCGAGACCTGGTGCA
+GAGACAAGCAATGTCCTGTACGCTTGACGGTTTGCCTCAGTGTTCTCTAG
+CCCTATCGAGTCCAAACGCTTCCCGCAAGTCGCGTTCGACTCGTCCATCG
+CCAAGATTCCACGTCCCGGAGATGCAATTGTTTTCGCTGTCTTGACAAGC
+TCATCGGCGTAGGAAGAAGCGGCACGGACGGCGAGGGAGGTGGCGCGGTT
+GGGGAGGACGACGGATGCGGAAGAAGGCTGACGGAAGAGAACGCTTTGGC
+CCTTGACCCACTCAGATTTGTCCAACACTGGAGATGCCTTGAGGAGTGAG
+GTAGATGCCATTTTTTTTTGTTAATGTTATCTcTTtctcttctctctgcc
+tttggtttgtcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 15 25 25 29 29 27 27 31 46 40 40 40 40 37 37 34 29 29 29 29 29 35 46 40 40 40 40 40 40 40 38 38 40 40
+ 40 40 46 40 40 37 37 37 37 37 37 46 40 46 40 40 40 35 35 35 37 40 46 46 40 35 35 35 35 35 40 51 35 35 35 35 35 35 37 40 37 37 40 40 40 40 40 40 40 40
+ 40 37 37 37 37 37 37 40 46 35 35 35 39 35 35 40 40 37 40 37 37 40 40 37 37 46 46 46 46 40 40 40 40 40 40 40 40 40 40 40 38 34 34 34 34 34 35 35 37 40
+ 40 51 40 43 51 46 46 44 43 40 38 35 35 35 35 35 43 43 43 43 43 43 43 43 43 43 43 43 43 43 51 43 35 35 35 35 35 36 38 43 43 43 43 43 43 43 41 41 43 45
+ 41 41 41 45 41 45 45 45 45 40 36 36 43 43 43 43 43 40 43 42 56 56 43 43 43 36 36 36 43 46 51 51 51 51 51 51 51 43 43 45 45 45 45 56 56 51 43 43 43 43
+ 43 45 56 51 45 45 45 45 45 45 41 43 38 43 38 38 38 40 43 44 42 41 41 41 41 41 41 42 56 41 41 41 43 45 45 56 51 43 43 43 43 43 43 36 36 36 36 36 43 45
+ 45 45 43 43 43 43 43 43 43 43 43 43 43 43 43 56 56 56 56 56 51 51 51 51 43 43 43 43 43 43 51 51 45 45 45 45 45 45 46 46 41 41 43 36 36 36 36 36 43 43
+ 43 38 38 56 56 56 56 56 56 51 51 51 43 43 43 43 43 43 45 45 45 51 56 56 56 56 56 56 56 51 51 41 45 45 45 51 51 45 45 45 43 43 43 43 43 43 43 45 56 56
+ 56 56 51 51 51 43 43 43 45 43 43 45 45 41 41 43 51 45 45 45 45 45 41 41 41 45 45 43 56 56 56 56 56 51 43 43 43 43 43 43 43 43 43 43 43 43 56 56 56 51
+ 51 51 51 51 45 45 45 45 45 45 51 56 56 56 56 56 56 56 56 56 56 56 56 50 40 38 39 35 35 35 40 51 51 40 45 40 40 40 40 51 51 56 56 51 51 51 51 40 40 40
+ 40 40 40 37 39 37 37 37 40 43 51 45 40 40 40 45 45 51 40 40 40 45 45 45 56 56 56 56 56 56 42 40 40 45 45 45 51 40 40 40 40 40 51 56 56 56 51 51 45 45
+ 45 45 45 45 45 51 51 46 46 42 40 38 38 38 46 46 56 56 56 51 51 40 40 45 40 40 40 43 46 51 51 56 56 56 56 56 56 56 42 40 35 30 30 35 40 43 40 40 40 47
+ 37 46 28 28 28 32 32 48 48 34 32 34 40 40 40 40 40 40 40 40 40 40 36 40 37 37 36 39 35 42 42 46 37 35 35 33 35 35 29 39 40 44 44 48 40 40 34 29 29 29
+ 29 40 40 40 40 46 34 33 33 33 31 40 31 23 27 21 17 18 21 25 24 25 25 25 29 25 24 29 40 40 40 40 40 29 47 34 45 42 34 30 45 49 45 45 61 61 57 51 49 50
+ 49 51 50 52 62 71 53 56 55 53 54 50 42 40 48 48 48 48 39 46 53 53 39 37 48 54 51 58 50 57 52 58 57 58 54 50 50 42 41 41 37 40 42 39 37 37 35 52 63 71
+ 67 58 50 50 58 49 54 54 58 60 56 58 44 43 43 28 29 29 29 32 32 28 40 29 26 24 22 20 20 25 25 25 25 25 25 29 32 32 40 48 44 33 33 33 28 28 34 39 39 33
+ 33 32 32 32 32 32 27 15 15 17 22 22 29 39 32 33 29 33 29 35 29 35 42 56 56 56 40 37 37 37 37 37 37 37 35 36 35 29 35 28 28 26 32 32 37 37 37 40 40 51
+ 51 46 40 37 40 37 37 37 51 56 51 51 51 45 45 45 40 40 51 51 51 42 40 40 40 47 46 46 46 44 44 40 48 48 48 32 32 34 40 40 36 39 32 34 34 39 37 56 40 40
+ 40 40 39 35 36 33 36 37 56 56 56 56 56 56 56 45 51 51 45 45 45 51 56 45 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 45 45 51 51 51 51 56 56 56 56 56
+ 56 56 56 51 51 51 51 45 45 40 40 40 37 40 51 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 45 51 51 51 51 51 56 56 51 45 45 45 45 40 40 40 40
+ 40 40 51 51 45 45 45 45 40 51 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 51 51 51 45 45 45 43 43 43 43 43 43 51 51 51 51 56 56 51 51 51 51 45 45 45
+ 45 51 51 56 51 45 45 45 45 45 45 45 45 45 45 45 45 45 43 43 43 45 45 45 56 45 45 43 43 43 43 45 45 51 51 51 45 45 45 43 43 43 51 45 45 45 43 43 43 43
+ 43 45 56 56 51 51 43 43 43 43 43 43 43 56 43 43 43 43 43 43 51 46 43 43 43 43 36 36 36 36 36 36 43 43 43 43 43 45 45 43 36 36 36 36 36 43 43 43 43 43
+ 43 43 43 43 43 43 43 45 51 43 43 43 43 43 43 43 45 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 51 56 56 45 45 45 45 45
+ 45 56 45 45 43 43 43 43 43 43 43 51 45 45 45 43 36 36 36 36 36 36 36 40 40 51 51 51 43 43 43 43 43 43 51 51 51 43 43 43 43 43 43 43 43 43 43 43 43 51
+ 51 43 43 43 43 43 43 43 43 43 43 43 43 43 34 35 35 35 35 35 43 51 45 45 43 43 43 43 43 43 45 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 43 43 40 40
+ 40 40 51 51 51 51 51 51 56 40 40 40 40 40 40 45 45 45 45 51 51 51 51 51 56 56 51 51 40 40 40 40 40 40 45 51 51 51 51 51 51 51 45 40 40 40 40 40 40 40
+ 40 40 40 40 40 40 46 46 46 46 51 51 51 56 51 51 51 51 51 40 39 39 39 39 39 39 40 37 29 30 24 30 19 30 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2311f U 1
+AF LL2311r C 683
+BS 1 685 LL2311f
+BS 686 690 LL2311r
+BS 691 696 LL2311f
+BS 697 697 LL2311r
+BS 698 699 LL2311f
+BS 700 700 LL2311r
+BS 701 710 LL2311f
+BS 711 718 LL2311r
+BS 719 719 LL2311f
+BS 720 720 LL2311r
+BS 721 725 LL2311f
+BS 726 726 LL2311r
+BS 727 727 LL2311f
+BS 728 728 LL2311r
+BS 729 731 LL2311f
+BS 732 732 LL2311r
+BS 733 733 LL2311f
+BS 734 747 LL2311r
+BS 748 751 LL2311f
+BS 752 760 LL2311r
+BS 761 761 LL2311f
+BS 762 763 LL2311r
+BS 764 764 LL2311f
+BS 765 1421 LL2311r
+
+RD LL2311f 767 0 0
+ttcatatattaaaggctTATTAAAAAGGAGTAGCTAATGCAAATTTAGTA
+GAGAAAGTAAATGATTTGACATGATAATAACAACCAAACTCAAACAAATG
+CAACCAAGACACTTGCTTCGTCTCCTTTCACAGCATCATGATCGCTTCTT
+CAACATTGTTGTCTTTAATAGGTGTAGCCTTTGACGAACATACCCTCCTT
+AGCCTCTTCAGACTCGCCTTCTCCGCTGTATTTTCCAAGCTGAGCCAACG
+AGTTGGCCTTGGCACGAGCCAAGAGAGTGGTCTGAGCAGCGTTCACGTTC
+TCAGCTCTGCCTCCCCATGTTTTCAGACAAGTGTTCTGGAGAGCACGTGC
+GTAGGAGAAGGACACGTGCCATGGGTTTGGTGCCTGGTTCATCGCGTTGA
+GGTTCAACGTTGCCTCCAACTCAGACTGTCCTCCAGACAAGAACATGATT
+CCGGGGACGGCAGGAGGGATTCTGTTGCGGAGGAGCTTAAGGGTGTAGGC
+GGCAACTTGTTCAGGAGTAGCTCTGTCTTTAGACTCGGCTCCAGGAGTCA
+CCATGCTCGGCTTCAGGAGGATACCTTCAAACATGACATTGTTCTGAGCA
+AGGTAAAAGAAAACCTCAGCCCAAACCTTCTCAGCTACTTCGTAAGTCCT
+GTCAATGTCGTGTTCTccGTCCAACAAGATCTCTGGCTCCACAATTGGGA
+CCAATCCACTGTCCTGTGAAATGGCAGCGTAGCGAGCAAGGCCCCAAGCA
+GCTTCTTTCACAGCGAG
+
+QA 17 767 1 767
+DS CHROMAT_FILE: LL2311f PHD_FILE: LL2311f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:07 2000
+
+RD LL2311r 783 0 0
+cTGGCTCCACAATTGGGACCAATCCACTGTCCTGTGAAATGGCAGCGTAG
+CGAGCAAGGCCCCAAGCAGCTTCTTTCACAGCGAGCGCAGACGGGCCGTT
+GGGAATGCTCACGACAGTACGCCATttaGCGAAACGAGCACCCTGTTGGT
+AGTAAGCAGCAGTGCGAGAGGATAGACCATCGAGTCCTTGACACCATGAC
+TCATTGTTAGAGCCAACAAGTGGCACCAAACCCTTGTCAACTTTGATACC
+GGGGACGATGTTCTGCTCGACGAGGACGTCGACCATTTTCTTTCCTTCGG
+TGGTAGACTGATAGAGAGTCTCCTCGAACAAGATTGCCCCGGATATGTAC
+TGTCCGAGACCTGGTGCAGAGACAAGCAATGTCCTGTACGCTTGACGGTT
+TGCCTCAGTGTTCTCTAGCCCTATCGAGTCCAAACGCTTCCCGCAAGTCG
+CGTTCGACTCGTCCATCGCCAAGATTCCACGTCCCGGAGATGCAATTGTT
+TTCGCTGTCTTGACAAGCTCATCGGCGTAGGAAGAAGCGGCACGGACGGC
+GAGGGAGGTGGCGCGGTTGGGGAGGACGACGGATGCGGAAGAAGGCTGAC
+GGAAGAGAACGCTTTGGCCCTTGACCCACTCAGATTTGTCCAACACTGGA
+GATGCCTTGAGGAGTGAGGTAGATGCCATTTTTTTTTGTTAATGTTATCT
+cTTtctcttctctctgcctttggtttgtcggacgcgtggxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 1 703 1 739
+DS CHROMAT_FILE: LL2311r PHD_FILE: LL2311r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:44 2000
+
+CO Contig31 788 2 46 U
+ccattctttctacttttacacattgaaaatacaacaatttcaataccata
+tacaatacacaaaaatataaaaatttcgttTGTATAAGTTCTACAGGGAC
+GAGCCCTAAATAAAAATCATACACTTATTGTACGTAAACACTTCTCGAAT
+GAAGCAATGTGACTTGAGAACAAAGGCAAATACGGAGAGAGACTTACGAA
+GGGGAAGTAGAAGGTGCAGTTGCAGGGTTAAATGCCTTCTGCTCAGCGCA
+GCTGAGCTCCTCGTTTGGCCCCGCGTCTTCCCTTCCAATACCCAAGAGGT
+*CGAGGAAGTACAAGTAGTGCGAGACTATGTTGCTCATTGGCTCAATGTC
+ACCTTGACCACATGTGTACTCACCGTACAAGACGTTCATCGTGGTACCAA
+AAGTCGGGCCACGTTTGGACAAAGTATCATTCTTTGTCGGCTTCCAGTTC
+CCAACAAAGATGTCGTGAGCTGACGGCTGAGCTTTCTTGATCGGTGTCAT
+CCATCTCCAGATTGCAGCTTGGAAGGCAAGCGTCGCGTTCTGCTCGATGT
+ACTCGGGGTGGTTCAAGAGATCGGCTTTTAGAGCATCCCCAGCTGCACCG
+TAGTTGAAGTTCCAGTAGATGGGTAAAGCACCACGTCCGTAGTACTCAGC
+TCCAGGGCTGCAAGGGTACTTGTATTTCCATGTCTCGTCACAGTAAGATT
+GCATTGGACTCATCTCCCTGTTATAGCACAGACCCCAAGCTAAAGGCCCT
+CCTGTCGCAACTCCGTATCCACaggagcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 37 35 35 35 35 35 35 45 35 37 37 35 35 35 51 40 35 35 35
+ 35 35 35 35 40 42 42 44 35 37 37 40 40 40 56 37 35 35 35 35 35 35 35 35 35 35 35 37 37 35 35 35 35 35 35 35 40 40 43 37 37 37 40 40 40 43 40 40 37 37
+ 40 40 40 40 40 40 37 44 42 42 37 40 40 37 40 37 42 42 42 42 42 46 43 43 43 43 43 43 40 40 36 36 36 36 36 40 42 42 44 36 34 32 32 32 28 36 36 36 36 43
+ 46 56 56 56 56 43 42 42 42 42 42 50 50 38 38 38 40 43 43 40 40 43 43 43 43 56 43 43 43 43 43 38 38 38 38 46 47 49 45 43 46 44 44 36 41 36 36 35 36 35
+ 35 38 35 47 38 38 38 38 38 38 41 41 41 45 50 38 38 43 40 43 43 43 43 38 38 36 38 41 45 45 35 41 41 45 41 42 43 43 41 45 45 45 45 46 42 42 42 46 40 43
+ 43 43 43 43 56 56 56 43 38 38 46 56 52 63 71 72 56 48 49 47 48 49 47 45 44 45 60 58 58 53 54 51 54 53 54 53 57 60 53 57 56 56 58 56 68 74 63 60 56 55
+ 58 58 55 61 50 58 58 58 58 58 69 79 76 72 71 61 66 57 57 55 55 58 58 59 58 55 58 59 59 52 55 61 59 59 65 69 66 63 66 61 61 53 69 71 81 78 71 71 71 71
+ 71 71 71 75 74 71 55 55 55 60 62 64 70 64 70 62 64 67 71 72 74 74 69 80 76 80 67 69 65 60 77 77 71 71 65 83 69 69 69 69 62 66 66 71 76 90 85 85 90 73
+ 68 61 61 65 72 77 90 90 86 86 78 76 78 73 68 68 69 76 83 69 77 66 72 57 57 55 58 61 63 70 70 72 78 74 81 82 90 90 78 76 76 82 68 68 69 79 82 82 90 88
+ 86 82 82 82 76 74 72 68 66 66 75 75 69 73 71 71 72 72 75 64 74 58 60 50 54 56 72 66 74 69 69 64 71 67 67 67 72 70 73 84 86 75 75 73 75 81 83 90 87 87
+ 85 85 85 90 87 83 69 63 63 67 67 64 60 75 73 80 79 79 78 85 77 90 81 81 84 80 63 60 54 54 54 66 66 68 76 78 78 65 68 67 75 64 64 73 68 69 69 68 64 67
+ 63 83 55 59 65 65 69 69 69 69 69 69 69 72 72 66 71 77 90 88 72 69 69 64 64 69 69 70 72 83 78 64 62 62 60 55 60 64 78 74 79 71 69 73 73 72 69 69 67 64
+ 64 67 85 72 67 67 67 67 58 58 59 61 63 68 64 69 72 69 65 63 68 76 76 76 73 74 72 72 63 66 66 67 68 67 75 56 54 54 55 61 60 63 62 56 55 58 80 58 59 58
+ 58 58 59 62 65 65 65 69 66 63 62 55 71 71 71 71 71 68 61 56 55 53 53 52 53 57 59 61 66 66 71 75 71 78 75 73 73 68 68 63 63 61 60 59 56 47 60 35 37 30
+ 45 44 50 50 50 50 50 50 55 58 63 59 55 50 50 35 35 35 35 40 45 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2314f U 1
+AF LL2314r C 62
+BS 1 235 LL2314f
+BS 236 243 LL2314r
+BS 244 253 LL2314f
+BS 254 254 LL2314r
+BS 255 311 LL2314f
+BS 312 327 LL2314r
+BS 328 424 LL2314f
+BS 425 425 LL2314r
+BS 426 426 LL2314f
+BS 427 427 LL2314r
+BS 428 457 LL2314f
+BS 458 460 LL2314r
+BS 461 471 LL2314f
+BS 472 472 LL2314r
+BS 473 476 LL2314f
+BS 477 480 LL2314r
+BS 481 493 LL2314f
+BS 494 496 LL2314r
+BS 497 499 LL2314f
+BS 500 502 LL2314r
+BS 503 508 LL2314f
+BS 509 509 LL2314r
+BS 510 515 LL2314f
+BS 516 523 LL2314r
+BS 524 534 LL2314f
+BS 535 537 LL2314r
+BS 538 538 LL2314f
+BS 539 546 LL2314r
+BS 547 549 LL2314f
+BS 550 554 LL2314r
+BS 555 557 LL2314f
+BS 558 572 LL2314r
+BS 573 573 LL2314f
+BS 574 591 LL2314r
+BS 592 592 LL2314f
+BS 593 602 LL2314r
+BS 603 603 LL2314f
+BS 604 620 LL2314r
+BS 621 621 LL2314f
+BS 622 639 LL2314r
+BS 640 640 LL2314f
+BS 641 641 LL2314r
+BS 642 642 LL2314f
+BS 643 748 LL2314r
+BS 749 751 LL2314f
+BS 752 788 LL2314r
+
+RD LL2314f 773 0 0
+ccattctttctacttttacacattgaaaatacaacaatttcaataccata
+tacaatacacaaaaatataaaaatttcgttTGTATAAGTTCTACAGGGAC
+GAGCCCTAAATAAAAATCATACACTTATTGTACGTAAACACTTCTCGAAT
+GAAGCAATGTGACTTGAGAACAAAGGCAAATACGGAGAGAGACTTACGAA
+GGGGAAGTAGAAGGTGCAGTTGCAGGGTTAAATGCCTTCTGCTCAGCGCA
+GCTGAGCTCCTCGTTTGGCCCCGCGTCTTCCCTTCCAATACCCAAGAGGT
+*CGAGGAAGTACAAGTAGTGCGAGACTATGTTGCTCATTGGCTCAATGTC
+ACCTTGACCACATGTGTACTCACCGTACAAGACGTTCATCGTGGTACCAA
+AAGTCGGGCCACGTTTGGACAAAGTATCATTCTTTGTCGGCTTCCAGTTC
+CCAACAAAGATGTCGTGAGCTGACGGCTGAGCTTTCTTGATCGGTGTCAT
+CCATCTCCAGATTGCAGCTTGGAAGGCAAGCGTCGCGTTCTGCTCGATGT
+ACTCGGGGTGGTTCAAGAGATCGGCTTTTAGAGCATCCCCAGCTGCACCG
+TAGTTGAAGTTCCAGTAGATGGGTAAAGCACCACGTCCGTAGTACTCAGC
+TCCAGGGCTGCAAGGGTACTTGTATTTCCATGTCTCGTCACAGTAAGATT
+GCATTGGACTCATCTCCCTGTTATAGCACAGACCCCAAGCTAAAGGCCCT
+CCTGTCGCAACTCCGTattcacn
+
+QA 81 766 1 772
+DS CHROMAT_FILE: LL2314f PHD_FILE: LL2314f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:43 2000
+
+RD LL2314r 772 0 2
+aaaat*taaaaatt*cgtttgt*taagtt*tccaggc*cgagcc*taa*t
+aaaa*tcat*cact*attg*acgtaacc*ct*ctcgaatg*agcaatgt*
+acttaa*aacaaa*gcaaatccg*agaaagncttcng*a*gggaAGTaga
+agtt*cagt*gcagggt*aAATGCCTTCTGCTCAGcg*agCTGAGctc*t
+cgtttg*cCCCGCGTCTtcctttcca*tacc**agaggtncgagnaagTA
+CAAGTAGTGCGAGACTATGTTGCTCATTGGCTCAATGTCACCTTGACCAC
+ATGTGTACTCACCGTACAAGACGTTCATCGTGGTACCAAAAGTCGGGCCA
+CGTTTGGACAAAGTATCATTCTTTGTCGGCTTCCAGTTCCCAACAAAGAT
+GTCGTGAGCTGACGGCTGAGCTTTCTTGATCGGTGTCATCCATCTCCAGA
+TTGCAGCTTGGAAGGCAAGCGTCGCGTTCTGCTCGATGTACTCGGGGTGG
+TTCAAGAGATCGGCTTTTAGAGCATCCCCAGCTGCACCGTAGTTGAAGTT
+CCAGTAGATGGGTAAAGCACCACGTCCGTAGTACTCAGCTCCAGGGCTGC
+AAGGGTACTTGTATTTCCATGTCTCGTCACAGTAAGATTGCATTGGACTC
+ATCTCCCTGTTATAGCACAGACCCCAAGCTAAAGGCCCTCCTGTCGCAAC
+TCCGTATCCACaggagcggacgcgtggxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxatt
+
+QA 267 711 1 727
+DS CHROMAT_FILE: LL2314r PHD_FILE: LL2314r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:21 2000
+
+RT{
+LL2314r compression phrap 206 208 000919:094547
+}
+
+RT{
+LL2314r compression phrap 154 156 000919:094547
+}
+
+CO Contig32 1387 2 16 U
+ccatccccagctttgttaataatgcatttacatatacagaagattctaga
+gcagagtaacaatgtgagattggagatcatTCTTCATTTACTTTCTCCTA
+GAGAAATCATCCCATGGGTCTTCTTGACCAAAGATCTCGTCGATGATGTT
+GGAAGTTGGAAGAAAATCAATGTTCTTACTCCAACAATGATCAAATGAAT
+AAAAATGATCCTCCACAAGATCATTGCAATGGCTCCAGTGATTCCCAACA
+TCGGATTCCTCTCTGTTTACATTGGACTGGAAGTCATCGGACATGCACGA
+ATCTTCTTCTACAATCACCTCTTCTTCTTCATCATCTCTTGAGAGGCTTT
+TTCCGGTTTTATCAACTGGAGTTTCAGGACCATAAAACGTGTTGTCATAC
+TCTACTTGCCACTCATCATCGTCACTATCGATGTCCAGGCTTTTAGAACG
+ATTCTGAACAGCTCTTCTTCGCAGGATGAAGACGTTGAAATAATAGCTCA
+CAATCTCCTTCATGGTTCGTGAAGGAAACGCAGACTTTAGGTGTTTCCAG
+AAATCACGGTTCAGTGAAACAGGGTTGGAGTAAACAACCTCGTGGAATAG
+ATCTTCCTCGTCCTCGGTTAGTTTACCAGCGACTTCCTCTCCCATTTCAC
+CGAGACCTATGTTCAAACATCTTTCGTATCCGATAGTCTCAAACAAGCCT
+TCTCGGTTTTCAATGATGTGCTGCTGCACGCATCTGATGGAACCTTTATC
+CACGCAAATGCATTCTTTTCTTCCTTTACCAATTTCGAAGAACTCGGTTT
+CATGGTCAGGCATTGGAATCACGCACTTGCCGATCAGTTGTTCTTCGTTG
+TGATCAACAACATCACGGACTTCTTCCTTGACACAATCAGGAATATTAGC
+CTGGTAATCAGACCCGATTCGAACTTGTTTTCTAGGAGAACTCTGACACC
+AATCTGACTCGGGAGATTCGTGAGAAAGGGTAGACTGAGTCGTAGCATCC
+TCCTCAGATGAGATCCAGGAGAAAGGTGCCTTGGTCTCAGACCCCTTCTC
+AACACTATCCTCACATTCAAGCCCGTAGATATCACTCAGATCCTCTCCTA
+TGACAAGACCAGACTTCTCCAGTGAAACATGATATGGAACTCCTTCGTCT
+AGCTTGGCCAGCTTATTGCAATAACTAATCTCTCTTGCATGCTTCATGTT
+GAGCTCTTGCACATCCTCAGCTTCAAAAGTGCGCTTAACACCCATATTTT
+TTTGTACAATTGATTCGAAGATAGATCTGGAAGAAGGAAGAGATCTGAAC
+ACGAATTCAAGAAAAAATAAATTTGGGGGAAATTTCGTGCTTTTCgatct
+ggggaagacccggcccggattaaattcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 39 56 40 46 40 40 40 40 40 40 40 40 45 45 51 51 46 46 40 40
+ 40 40 51 51 51 46 40 40 40 40 40 51 51 51 51 51 56 51 51 45 45 45 45 51 40 40 39 39 39 39 39 40 56 51 45 40 40 40 43 43 43 43 43 43 43 43 43 43 56 56
+ 56 56 56 56 56 56 51 51 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 45 45 43 43 43 43 43 43 43 43 43 43 43 43 51 56 56 56 56 56 56 56 56 56 51 51 51
+ 51 51 51 56 56 56 56 56 56 51 51 51 45 45 51 51 45 45 51 51 51 51 51 45 45 45 43 41 42 56 40 43 43 43 43 43 51 56 56 56 51 51 51 51 51 51 51 51 51 43
+ 43 46 46 46 43 56 51 51 51 51 51 51 51 51 51 51 43 43 43 43 43 43 56 43 43 43 43 43 43 43 51 51 51 51 51 45 45 45 36 36 36 43 43 43 45 43 43 43 43 43
+ 43 51 51 51 51 56 56 56 43 43 43 43 43 43 45 45 43 43 43 43 45 45 51 51 51 51 51 51 51 51 56 56 56 51 51 51 51 51 51 56 56 42 42 41 43 43 43 43 43 43
+ 56 51 51 51 51 56 56 56 56 56 56 56 56 43 43 43 46 46 46 56 56 56 51 45 45 45 45 45 45 45 45 43 51 42 56 51 51 51 51 51 51 56 56 56 56 56 56 45 45 45
+ 45 45 45 51 51 51 45 43 43 43 43 43 43 43 43 56 56 51 51 51 51 51 45 45 45 45 45 45 45 51 51 51 56 56 56 56 56 45 45 51 46 45 41 44 42 42 41 45 45 45
+ 45 45 51 56 56 51 51 51 51 43 43 43 43 43 40 56 50 50 40 40 45 45 45 45 51 51 56 56 56 56 45 45 45 40 40 40 46 56 51 51 40 40 45 45 40 40 40 39 39 35
+ 46 43 45 46 51 51 51 51 51 51 56 56 56 51 56 56 43 51 43 43 42 51 51 56 56 51 40 40 37 37 40 37 45 40 45 40 40 46 42 42 42 43 44 40 40 45 45 45 40 40
+ 42 56 56 56 45 45 45 45 38 38 42 46 46 56 56 44 56 56 56 56 56 56 56 47 47 47 56 56 40 37 37 37 37 40 56 51 51 51 51 43 43 42 42 42 56 42 42 42 37 37
+ 34 40 40 40 40 40 40 40 40 40 29 29 34 37 40 40 25 20 20 25 27 27 29 29 25 29 29 29 29 29 29 29 27 27 25 22 22 25 32 29 29 29 29 32 32 32 34 34 44 49
+ 52 58 56 57 50 51 48 56 63 67 69 47 47 50 42 40 41 64 66 66 69 60 65 64 65 65 56 56 50 44 40 50 48 53 65 69 54 57 56 57 52 55 55 44 41 35 50 42 42 44
+ 47 50 53 56 52 51 49 50 50 58 61 69 69 69 69 69 78 72 70 73 69 64 62 62 62 62 52 59 57 62 62 69 75 88 88 80 80 58 42 42 37 37 40 44 46 47 47 60 61 55
+ 44 49 43 46 38 44 47 51 51 53 51 47 47 49 63 63 63 49 49 63 25 25 29 32 29 32 24 29 24 32 34 40 32 32 32 34 34 31 40 40 40 46 37 40 37 37 37 37 37 37
+ 40 37 34 34 29 29 32 34 32 32 32 32 40 25 22 22 25 32 32 32 32 29 34 31 36 40 39 31 31 31 39 39 40 32 32 32 32 25 25 34 40 32 32 32 34 34 39 40 40 46
+ 46 46 51 40 46 42 42 42 42 42 42 46 56 51 51 40 45 45 45 51 51 56 56 56 56 56 56 56 56 44 44 44 42 42 46 43 51 51 56 51 51 51 51 46 40 35 35 35 39 39
+ 45 51 40 40 40 40 37 37 51 51 51 56 56 56 56 56 45 40 40 40 40 40 40 56 56 56 56 56 56 56 56 56 44 56 56 56 56 56 56 56 42 46 46 51 43 43 42 43 43 51
+ 56 56 56 56 51 51 51 51 51 51 51 51 51 56 56 56 45 40 40 40 40 40 51 56 56 56 56 45 45 45 45 45 45 45 51 45 45 45 45 45 43 43 43 43 43 43 41 51 51 51
+ 51 51 56 51 51 51 45 45 45 51 51 51 56 56 56 56 56 56 56 56 56 56 56 45 45 45 45 45 51 51 45 43 43 43 43 43 43 51 51 51 56 56 56 56 51 51 51 51 51 51
+ 51 51 51 51 51 51 51 51 51 51 51 51 56 56 56 56 56 51 51 51 51 51 45 51 45 45 45 45 45 45 56 51 51 51 51 45 45 45 45 45 45 56 56 56 56 56 56 56 51 51
+ 56 56 56 45 45 45 45 45 45 51 51 51 51 51 51 51 51 51 43 36 36 36 36 36 43 51 51 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 56 45 43 43 43 43 43 43
+ 43 43 43 43 45 45 45 43 43 43 45 43 43 51 51 43 43 43 43 43 43 43 43 43 43 45 45 45 45 51 51 51 51 51 45 45 45 46 43 43 56 56 56 56 56 51 51 51 51 43
+ 43 43 43 43 43 45 43 43 43 43 40 40 56 56 56 56 45 45 45 45 45 45 56 56 56 40 38 38 35 35 35 35 35 35 35 40 40 40 40 56 56 56 51 51 51 51 51 51 56 56
+ 56 51 51 51 51 51 51 51 51 51 51 51 51 45 45 45 45 45 45 40 40 40 40 40 40 39 39 39 39 39 40 51 51 51 40 40 40 40 40 40 45 45 45 35 35 35 35 35 39 45
+ 45 45 45 45 45 51 51 51 51 51 51 51 51 51 56 39 39 39 39 40 40 51 51 56 56 51 51 51 51 51 51 56 56 40 39 39 39 39 39 46 40 40 29 24 23 13 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2319f U 1
+AF LL2319r C 645
+BS 1 654 LL2319f
+BS 655 655 LL2319r
+BS 656 663 LL2319f
+BS 664 664 LL2319r
+BS 665 676 LL2319f
+BS 677 682 LL2319r
+BS 683 696 LL2319f
+BS 697 697 LL2319r
+BS 698 704 LL2319f
+BS 705 728 LL2319r
+BS 729 735 LL2319f
+BS 736 738 LL2319r
+BS 739 744 LL2319f
+BS 745 759 LL2319r
+BS 760 763 LL2319f
+BS 764 1387 LL2319r
+
+RD LL2319f 774 0 0
+ccatccccagctttgttaataatgcatttacatatacagaagattctaga
+gcagagtaacaatgtgagattggagatcatTCTTCATTTACTTTCTCCTA
+GAGAAATCATCCCATGGGTCTTCTTGACCAAAGATCTCGTCGATGATGTT
+GGAAGTTGGAAGAAAATCAATGTTCTTACTCCAACAATGATCAAATGAAT
+AAAAATGATCCTCCACAAGATCATTGCAATGGCTCCAGTGATTCCCAACA
+TCGGATTCCTCTCTGTTTACATTGGACTGGAAGTCATCGGACATGCACGA
+ATCTTCTTCTACAATCACCTCTTCTTCTTCATCATCTCTTGAGAGGCTTT
+TTCCGGTTTTATCAACTGGAGTTTCAGGACCATAAAACGTGTTGTCATAC
+TCTACTTGCCACTCATCATCGTCACTATCGATGTCCAGGCTTTTAGAACG
+ATTCTGAACAGCTCTTCTTCGCAGGATGAAGACGTTGAAATAATAGCTCA
+CAATCTCCTTCATGGTTCGTGAAGGAAACGCAGACTTTAGGTGTTTCCAG
+AAATCACGGTTCAGTGAAACAGGGTTGGAGTAAACAACCTCGTGGAATAG
+ATCTTCCTCGTCCTCGGTTAGTTTACCAGCGACTTCCTCTCCCATTTCAC
+CGAGACCTATGTTCAAACATCTTTCGTATCCGATAGTCTCAAACAAGCCT
+TCTCGGTTTTCAATGATGTGCTGCTGCACGCATCTGATGGAACCTTTATC
+CACGCAAATGCATTCTTTTCTtcc
+
+QA 81 770 1 774
+DS CHROMAT_FILE: LL2319f PHD_FILE: LL2319f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:07 2000
+
+RD LL2319r 787 0 0
+tttcACCGAGACCTATGTTCAAACATCTTTCGTATCCGATAGTCTCAAAC
+AAGCCTTCTCGGTTTTCAATGATGTGCTGCTGCACGCATCTGATGGAACC
+TTTATCCACGCAAATGCATTCTTTTCTTCCTTTACCAATTTCGAAGAACT
+CGGTTTCATGGTCAGGCATTGGAATCACGCACTTGCCGATCAGTTGTTCT
+TCGTTGTGATCAACAACATCACGGACTTCTTCCTTGACACAATCAGGAAT
+ATTAGCCTGGTAATCAGACCCGATTCGAACTTGTTTTCTAGGAGAACTCT
+GACACCAATCTGACTCGGGAGATTCGTGAGAAAGGGTAGACTGAGTCGTA
+GCATCCTCCTCAGATGAGATCCAGGAGAAAGGTGCCTTGGTCTCAGACCC
+CTTCTCAACACTATCCTCACATTCAAGCCCGTAGATATCACTCAGATCCT
+CTCCTATGACAAGACCAGACTTCTCCAGTGAAACATGATATGGAACTCCT
+TCGTCTAGCTTGGCCAGCTTATTGCAATAACTAATCTCTCTTGCATGCTT
+CATGTTGAGCTCTTGCACATCCTCAGCTTCAAAAGTGCGCTTAACACCCA
+TATTTTTTTGTACAATTGATTCGAAGATAGATCTGGAAGAAGGAAGAGAT
+CTGAACACGAATTCAAGAAAAAATAAATTTGGGGGAAATTTCGTGCTTTT
+Cgatctggggaagacccggcccggattaaattcggacgcgtggxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 1 707 1 743
+DS CHROMAT_FILE: LL2319r PHD_FILE: LL2319r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:44 2000
+
+CO Contig33 558 2 92 U
+CTTTTTTTTTTTTTTTTTGAGAAAAGAATAAATACATTATTATGCTTTTC
+ATCTGCTCAGATAAGTGCAATGCAGAAGAAATTGCAACAATGTTGTCCAC
+TGGAAGAAAAGACGCAGAATAAAAAAATAATATATAAGCTTTAAAAAGAA
+AAACAACGAGAAGAGAAACTCAAAAACAATGTTTGATTGTACGATACAAA
+ATACTATTGCATTTTCGGTTATTTTATTATATAGATATTCATCGGAGGAA
+TTGTTTACGCAACCGTCGACCGCCGTCGGCCGTAGCTCTCTACGCCGTCT
+TACCTTGCCGTTTCTCTTCTTCGATCTTGGCCTTAATGACAGAGTAGAGT
+GCAACGCCAGCAATGGCTATTCCAGTTCCGATACCTGTCTGCGTCGATAT
+CTTGTTTCCGAAGATAACGATGGAGAATCCGATCACGAAGACACGTTTCA
+GAACGTTTCCAACAGCGTGAGTCAACGGTGCTACCCTCTCCAACGTATTA
+GTAGCCAACTGATTGTAGAGATGGTAAAACATTCCAACCCAGAAGAGCGG
+ACGCGTGG
+
+BQ
+ 42 56 56 56 56 56 56 48 48 48 48 48 40 40 40 34 29 29 34 40 40 48 48 48 40 40 40 40 40 55 55 55 57 54 54 55 79 79 79 79 75 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 89 73 72 72 72 75 86 86 86 86 90 85 83 83 77 80 80 90 90 90 90 90 90 85 76 76 77 76 82 90 90 90 90 90 90 90 71 71 66 51 45 45
+ 45 45 66 66 66 90 90 90 70 70 49 47 46 57 62 74 86 77 77 77 77 77 77 90 90 90 90 86 86 86 86 88 88 88 80 80 80 80 90 80 86 86 88 90 90 90 90 90 90 90
+ 90 90 90 90 86 85 85 85 85 86 90 90 90 90 90 90 90 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 90 90 90 90 90 90 78 78 78 78 78 78 90 90 90 90 90
+ 90 90 90 90 90 90 86 90 90 88 88 90 90 78 78 82 81 78 78 90 90 90 90 90 90 90 88 90 90 90 85 85 90 85 85 90 90 90 90 90 88 88 88 88 88 86 90 90 90 90
+ 90 90 90 90 90 79 78 78 78 78 78 79 88 88 88 88 86 86 83 86 86 86 79 79 79 88 86 86 86 86 86 86 88 86 86 86 86 90 90 90 82 79 79 79 79 86 79 81 87 86
+ 90 90 90 90 88 90 86 86 86 86 86 86 86 90 90 90 90 90 90 90 90 90 90 90 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 90 87 89 85 85 85 83 83
+ 83 86 86 86 86 86 86 86 86 86 83 81 81 83 83 84 83 83 83 83 90 90 90 90 90 90 85 85 89 89 88 84 90 90 88 88 90 90 88 86 83 83 73 73 78 90 90 90 90 90
+ 90 90 90 90 90 90 83 85 80 83 78 80 85 90 90 83 83 88 85 85 83 90 89 90 90 90 90 83 90 90 90 90 90 90 90 90 90 85 82 87 87 75 74 74 76 74 74 80 80 80
+ 80 73 78 78 79 74 74 90 86 86 86 75 76 84 84 84 76 72 76 81 81 81 82 79 80 80 80 71 71 70 72 72 75 72 80 90 90 90 90 90 88 78 78 72 72 72 72 77 77 72
+ 70 70 77 74 79 73 76 76 76 84 84 84 86 82 82 87 69 66 63 74 74 79 83 79 82 82 87 90 90 90 90 90 88 90 90 90 90 90 90 90 90 88 85 85 82 66 66 66 68 66
+ 80 77 49 50 54 35 46 56
+
+AF LL2321r C -190
+AF LL2321f U 15
+BS 1 33 LL2321r
+BS 34 35 LL2321f
+BS 36 46 LL2321r
+BS 47 47 LL2321f
+BS 48 64 LL2321r
+BS 65 75 LL2321f
+BS 76 108 LL2321r
+BS 109 114 LL2321f
+BS 115 153 LL2321r
+BS 154 154 LL2321f
+BS 155 172 LL2321r
+BS 173 173 LL2321f
+BS 174 179 LL2321r
+BS 180 182 LL2321f
+BS 183 188 LL2321r
+BS 189 189 LL2321f
+BS 190 196 LL2321r
+BS 197 198 LL2321f
+BS 199 212 LL2321r
+BS 213 213 LL2321f
+BS 214 219 LL2321r
+BS 220 220 LL2321f
+BS 221 222 LL2321r
+BS 223 226 LL2321f
+BS 227 227 LL2321r
+BS 228 230 LL2321f
+BS 231 232 LL2321r
+BS 233 233 LL2321f
+BS 234 245 LL2321r
+BS 246 268 LL2321f
+BS 269 269 LL2321r
+BS 270 272 LL2321f
+BS 273 275 LL2321r
+BS 276 287 LL2321f
+BS 288 295 LL2321r
+BS 296 300 LL2321f
+BS 301 305 LL2321r
+BS 306 313 LL2321f
+BS 314 319 LL2321r
+BS 320 327 LL2321f
+BS 328 331 LL2321r
+BS 332 332 LL2321f
+BS 333 333 LL2321r
+BS 334 337 LL2321f
+BS 338 340 LL2321r
+BS 341 345 LL2321f
+BS 346 351 LL2321r
+BS 352 360 LL2321f
+BS 361 371 LL2321r
+BS 372 376 LL2321f
+BS 377 378 LL2321r
+BS 379 381 LL2321f
+BS 382 384 LL2321r
+BS 385 390 LL2321f
+BS 391 392 LL2321r
+BS 393 406 LL2321f
+BS 407 413 LL2321r
+BS 414 415 LL2321f
+BS 416 417 LL2321r
+BS 418 418 LL2321f
+BS 419 421 LL2321r
+BS 422 427 LL2321f
+BS 428 428 LL2321r
+BS 429 433 LL2321f
+BS 434 434 LL2321r
+BS 435 437 LL2321f
+BS 438 447 LL2321r
+BS 448 451 LL2321f
+BS 452 452 LL2321r
+BS 453 455 LL2321f
+BS 456 463 LL2321r
+BS 464 466 LL2321f
+BS 467 467 LL2321r
+BS 468 468 LL2321f
+BS 469 469 LL2321r
+BS 470 477 LL2321f
+BS 478 479 LL2321r
+BS 480 485 LL2321f
+BS 486 491 LL2321r
+BS 492 506 LL2321f
+BS 507 509 LL2321r
+BS 510 515 LL2321f
+BS 516 517 LL2321r
+BS 518 526 LL2321f
+BS 527 527 LL2321r
+BS 528 529 LL2321f
+BS 530 544 LL2321r
+BS 545 545 LL2321f
+BS 546 548 LL2321r
+BS 549 549 LL2321f
+BS 550 552 LL2321r
+BS 553 558 LL2321f
+
+RD LL2321r 793 0 2
+ccctcttcgctattacgccagctggcgaaagggggatgtgctgcaaggcg
+attaagttgggtaacgccagggttttxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxCTTTTTTTT
+TTTTTTTTTGAGAAAAGAATAAATACATTATTATGCTTTTCATCTGCTCA
+GATAAGTGCAATGCAGAAGAAATTGCAACAATGTTGTCCACTGGAAGAAA
+AGACGCAGAATAAAAAAATAATATATAAGCTTTAAAAAGAAAAACAACGA
+GAAGAGAAACTCAAAAACAATGTTTGATTGTACGATACAAAATACTATTG
+CATTTTCGGTTATTTTATTATATAGATATTCATCGGAGGAATTGTTTACG
+CAACCGTCGACCGCCGTCGGCCGTAGCTCTCTACGCCGTCTTACCTTGCC
+GTTTCTCTTCTTCGATCTTGGCCTTAATGACAGAGTAGAGTGCAACGCCA
+GCAATGGCTATTCCAGTTCCGATACCTGTCTGCGTCGATATCTTGTTTCC
+GAAGATAACGATGGAGAATCCGATCACGAAGACACGTTTCAGAACGTTTC
+CAACAGCGTGAGTCAACGGTGCTACCCTCTCCAACGTATTAGTAGCCAAC
+TGATTGTAGAGATGGTAAAACATTCCAACCCAGAAGAGCGGACGCGTGGx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 7 749 192 749
+DS CHROMAT_FILE: LL2321r PHD_FILE: LL2321r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:17 2000
+
+RT{
+LL2321r chimera phrap 2 76 000919:094547
+}
+
+RT{
+LL2321r matchElsewhereHighQual phrap 2 76 000919:094547
+}
+
+RD LL2321f 794 0 3
+cttttanggaaacatAAATACATTATTATGCTTTTCATCTGCTCAGATAA
+GTGCAATGCAGAAGAAATTGCAACAATGTTGTCcagtGGAAGAAAAGACG
+CAGAATAAAAAAATAATATATAAGCTTTAAAAAGAAAAACAACGAGAAGA
+GAAACTCAAAAACAATGTTTGATTGTACGATACAAAATACTATTGCATTT
+TCGGTTATTTTATTATATAGATATTCATCGGAGGAATTGTTTACGCAACC
+GTCGACCGCCGTCGGCCGTAGCTCTCTACGCCGTCTTACCTTGCCGTTTC
+TCTTCTTCGATCTTGGCCTTAATGACAGAGTAGAGTGCAACGCCAGCAAT
+GGCTATTCCAGTTCCGATACCTGTCTGCGTCGATATCTTGTTTCCGAAGA
+TAACGATGGAGAATCCGATCACGAAGACACGTTTCAGAACGTTTCCAACA
+GCGTGAGTCAACGGTGCTACCCTCTCCAACGTATTAGTAGCCAACTGATT
+GTAGAGATGGTAAAACATTCCAACCCAGAAGAGCGGACGCGTGGxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxgcctgtggtgcctactgagtgagctaactcacattaattgcgt
+tgcgctcactgccccgctttccagtcggaaacctgtcgtgccag
+
+QA 15 548 14 544
+DS CHROMAT_FILE: LL2321f PHD_FILE: LL2321f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:38 2000
+
+RT{
+LL2321f chimera phrap 708 794 000919:094547
+}
+
+RT{
+LL2321f matchElsewhereHighQual phrap 779 794 000919:094547
+}
+
+RT{
+LL2321f matchElsewhereHighQual phrap 723 764 000919:094547
+}
+
+CO Contig34 956 2 50 U
+cttggtaataagaatcacaacatgaacaaactcaggagagtacatgaaca
+tgaacaagtgagtacaggtaagaaggtacaACAAGCACAGACGCAAAGAA
+AAAACAATCTGAAACCAGTTTGGTAGTAACCACCTTTTTGAAACTTAAAA
+ATGGAACTTTTAAGATTCAGGGACGGCTTCTACAGCAGCAGCAGCAGCAG
+TAGCCGCAGCAGCGGAGCGTTTGATGGAAGCAGATTTGATGAGGTGATTG
+TAACTTCCTTTCTCCTTGAAAAGCTGAGCAAAGTGGTGTTTGCAGTACAA
+GATCCCTTCAAGAGCTGCGTAGTTGGATGGAGAGATAGGACAGCCTCCGT
+GCGAACACTTGAAGCAAGACTTGTGGTAACACTGGCTCTCCACAGTTACC
+TTCTCGATAGGGTACACTGTTTTACTGCAAGTAGCGCATTTATCTTGCGT
+TCCAGAGAACATTCCAGCAACTCGGCTAGGTGTTCTTGTCAGCTCAGGTG
+TTGGTTTGTCTGTTAATGGCTTTGCAGGTGATTGAAAGTTCTTGTTGAAG
+CTACCAGTCTCCTTGAAGAGCTGCTCAAAGTGAGGCTTACAGTACAACAC
+ACCTTCCATTGATGAATAGTTGCTCAGTTGAAGGGTGGATTTGCAGTGAG
+AGCACTTGAAGCAAGACTTGTGGAAAGGGACACCATCAGCTGAGAGAAGC
+TCCATAGGGTACACTGTCTTCTCACACGCCCTGCACTTCTGCTGTGTTCC
+TGTAAACGACATCGTTTTAGTCTTCCTTCTTCACGATCTGTCCTTCCTCT
+CCTCGCTCACTCTCCGTGGATCTGAACAGAAAATGCGGAATCAAAAAAGA
+AGAAGTTAGATGTGTAGAAGACGAAGTTAAAGATGAGATGTAATGATTAT
+TGAGAGCACGTTCgacacacttatgagacagatagtattctaaagcggac
+gcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 44 46 42 42 37 37 39 37 35 35 35 35 35 35 35 35 35 39 36 35
+ 35 42 42 42 37 37 35 42 35 35 37 37 37 40 40 35 35 35 35 42 47 35 35 32 32 32 32 32 35 35 40 40 37 37 42 42 50 40 37 37 37 35 35 35 32 35 35 35 35 42
+ 42 40 40 40 37 37 37 37 35 35 40 40 40 37 42 42 41 41 41 41 37 35 35 35 29 29 27 24 26 26 32 35 35 38 38 41 41 41 41 41 41 40 38 35 32 32 32 35 35 30
+ 23 21 21 21 23 21 26 28 29 32 27 27 32 32 51 53 55 65 65 62 64 74 73 74 63 65 65 53 55 53 56 62 66 90 75 69 69 69 82 82 79 79 73 68 68 64 63 63 63 60
+ 66 56 58 62 58 53 62 73 71 76 71 75 75 67 71 71 81 85 75 74 76 70 70 74 71 76 73 84 86 90 89 82 75 71 71 71 68 68 65 55 53 53 54 62 60 63 70 74 78 79
+ 84 79 79 68 68 67 67 67 68 73 70 81 78 78 76 69 76 68 52 50 48 46 34 49 43 54 59 60 68 68 68 71 72 90 76 88 75 58 53 56 56 64 59 60 60 65 63 61 64 60
+ 56 61 58 58 60 59 65 61 61 57 65 79 82 76 78 82 88 75 80 75 90 75 72 73 76 76 76 71 74 75 87 76 80 81 81 81 81 85 89 90 70 70 62 73 67 67 64 75 66 70
+ 70 73 76 82 82 82 84 90 90 90 90 90 81 87 71 83 83 90 90 89 84 84 81 77 82 82 80 73 78 82 78 78 70 71 65 67 67 75 82 86 82 82 82 72 73 64 67 70 70 67
+ 75 73 79 73 81 81 81 79 79 57 63 63 56 43 39 33 29 29 47 50 77 86 78 81 78 78 70 78 78 82 90 90 88 88 90 90 90 83 74 79 72 72 72 69 72 77 90 90 90 87
+ 82 82 86 86 80 82 90 90 90 90 86 86 82 82 82 80 82 75 75 75 80 81 77 84 82 87 87 90 90 90 90 90 90 90 90 90 90 89 89 89 90 90 90 78 73 73 73 73 77 72
+ 75 68 73 68 68 72 81 77 77 74 80 90 90 90 90 90 90 87 84 84 82 80 90 90 90 90 80 76 85 83 83 83 84 90 88 78 78 90 75 65 71 70 72 72 90 90 90 90 90 87
+ 87 84 90 90 90 90 90 90 90 90 90 90 90 90 90 79 79 78 73 73 75 90 89 72 74 62 62 62 67 89 89 90 78 78 67 67 67 84 89 90 76 75 75 72 75 69 73 58 62 62
+ 71 75 68 67 70 69 70 68 67 72 76 65 67 65 68 70 70 72 72 69 69 69 68 63 59 67 59 58 55 65 67 67 81 81 85 68 70 77 80 73 75 77 71 71 66 61 61 61 58 63
+ 60 69 74 74 72 68 60 61 61 66 67 72 85 75 76 67 67 67 76 76 76 69 69 74 70 66 68 66 62 68 68 70 66 64 64 60 71 71 71 71 71 71 73 66 66 66 66 66 68 69
+ 82 72 72 68 58 65 68 62 62 58 52 52 37 40 40 43 43 43 43 43 43 43 46 56 56 56 56 56 56 56 56 45 45 45 45 45 45 40 40 40 40 40 46 56 56 56 56 56 56 56
+ 51 35 35 35 35 35 35 51 45 45 45 45 39 35 35 35 35 35 46 56 51 51 51 51 51 51 51 51 51 51 51 51 35 34 34 34 34 34 35 37 37 40 40 40 56 56 51 51 51 51
+ 51 40 40 40 39 39 39 40 40 40 45 40 40 46 46 46 46 46 51 40 40 45 45 45 45 40 40 40 40 40 40 40 40 40 40 40 40 51 39 40 39 39 39 39 39 39 39 40 40 46
+ 46 46 40 40 40 40 40 35 34 29 29 29 29 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0
+
+AF LL2322f U 1
+AF LL2322r C 213
+BS 1 280 LL2322f
+BS 281 281 LL2322r
+BS 282 312 LL2322f
+BS 313 315 LL2322r
+BS 316 316 LL2322f
+BS 317 317 LL2322r
+BS 318 318 LL2322f
+BS 319 323 LL2322r
+BS 324 352 LL2322f
+BS 353 353 LL2322r
+BS 354 366 LL2322f
+BS 367 374 LL2322r
+BS 375 380 LL2322f
+BS 381 381 LL2322r
+BS 382 389 LL2322f
+BS 390 390 LL2322r
+BS 391 411 LL2322f
+BS 412 412 LL2322r
+BS 413 415 LL2322f
+BS 416 427 LL2322r
+BS 428 432 LL2322f
+BS 433 433 LL2322r
+BS 434 434 LL2322f
+BS 435 435 LL2322r
+BS 436 437 LL2322f
+BS 438 440 LL2322r
+BS 441 450 LL2322f
+BS 451 457 LL2322r
+BS 458 460 LL2322f
+BS 461 479 LL2322r
+BS 480 482 LL2322f
+BS 483 485 LL2322r
+BS 486 487 LL2322f
+BS 488 489 LL2322r
+BS 490 490 LL2322f
+BS 491 524 LL2322r
+BS 525 525 LL2322f
+BS 526 534 LL2322r
+BS 535 537 LL2322f
+BS 538 540 LL2322r
+BS 541 541 LL2322f
+BS 542 556 LL2322r
+BS 557 559 LL2322f
+BS 560 566 LL2322r
+BS 567 571 LL2322f
+BS 572 578 LL2322r
+BS 579 583 LL2322f
+BS 584 660 LL2322r
+BS 661 661 LL2322f
+BS 662 956 LL2322r
+
+RD LL2322f 780 0 0
+cttggtaataagaatcacaacatgaacaaactcaggagagtacatgaaca
+tgaacaagtgagtacaggtaagaaggtacaACAAGCACAGACGCAAAGAA
+AAAACAATCTGAAACCAGTTTGGTAGTAACCACCTTTTTGAAACTTAAAA
+ATGGAACTTTTAAGATTCAGGGACGGCTTCTACAGCAGCAGCAGCAGCAG
+TAGCCGCAGCAGCGGAGCGTTTGATGGAAGCAGATTTGATGAGGTGATTG
+TAACTTCCTTTCTCCTTGAAAAGCTGAGCAAAGTGGTGTTTGCAGTACAA
+GATCCCTTCAAGAGCTGCGTAGTTGGATGGAGAGATAGGACAGCCTCCGT
+GCGAACACTTGAAGCAAGACTTGTGGTAACACTGGCTCTCCACAGTTACC
+TTCTCGATAGGGTACACTGTTTTACTGCAAGTAGCGCATTTATCTTGCGT
+TCCAGAGAACATTCccgCAACTCGGCTAGGTGTTCTTGTCAGCTCAGGTG
+TTGGTTTGTCTGTTAATGGCTTTGCAGGTGATTGAAAGTTCTTGTTGAAG
+CTACCAGTCTCCTTGAAGAGCTGCTCAAAGTGAGGCTTACAGTACAACAC
+ACCTTCCATTGATGAATAGTTGCTCAGTTGAAGGGTGGATTTGCAGTGAG
+AGCACTTGAAGCAAGACTTGTGGAAAGGGACACCATCAGCTGAGAGAAGC
+TCCATAGGGTACACTGTCTTCTCACACGCCCTGCACTTCTGCTGTGTTCC
+TGTAAACGACATcgntctaGTCTTCCTTct
+
+QA 81 763 1 780
+DS CHROMAT_FILE: LL2322f PHD_FILE: LL2322f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:43 2000
+
+RD LL2322r 789 0 0
+cgGAGCGTTTGATGGAAGCAGATTTGATGAGGTGATTGTAACTTCCTTTC
+TCCTTGAAAAGCTGAGCAAAGTGGTGTTTGCAGTACAAGATCCCTTCAAG
+AGCTGCGTAGTTGGATGGAGAGATAGGACAGCCTCCGTGCGAACACTTGA
+AGCAAGACTTGTGGTAACACTGGCTCTCCACAGTTACCTTCTCGATAGGG
+TACACTGTTTTACTGCAAGTAGCGCATTTATCTTGCGTTCCAGAGAACAT
+TCCAGCAACTCGGCTAGGTGTTCTTGTCAGCTCAGGTGTTGGTTTGTCTG
+TTAATGGCTTTGCAGGTGATTGAAAGTTCTTGTTGAAGCTACCAGTCTCC
+TTGAAGAGCTGCTCAAAGTGAGGCTTACAGTACAACACACCTTCCATTGA
+TGAATAGTTGCTCAGTTGAAGGGTGGATTTGCAGTGAGAGCACTTGAAGC
+AAGACTTGTGGAAAGGGACACCATCAGCTGAGAGAAGCTCCATAGGGTAC
+ACTGTCTTCTCACACGCCCTGCACTTCTGCTGTGTTCCTGTAAACGACAT
+CGTTTTAGTCTTCCTTCTTCACGATCTGTCCTTCCTCTCCTCGCTCACTC
+TCCGTGGATCTGAACAGAAAATGCGGAATCAAAAAAGAAGAAGTTAGATG
+TGTAGAAGACGAAGTTAAAGATGAGATGTAATGATTATTGAGAGCACGTT
+Cgacacacttatgagacagatagtattctaaagcggacgcgtggxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxag
+
+QA 1 709 1 744
+DS CHROMAT_FILE: LL2322r PHD_FILE: LL2322r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:21 2000
+
+CO Contig35 645 2 99 U
+acggccgGAGAAGATTTCATTTCTTTAAATAGCAAAGAGAAAACAca*cG
+ACAAGAGTTCCAAAAGCAAACTAGATAAGCCAAGACTGCGTGCTGTTGTT
+GCTAAACAAAGCTCTTATAACAAACGGCAGACAATATCTGTGACTTTCTT
+CTTCCCATAAAACCCCATTAATTTTGTCATAATTAACCACTACGTCCAGG
+TAATCGCCACGGGACTACCCACCACGTGTTTCCCATCTGACCATTCAATG
+TTCCCGAAGCTAGATCCAGACGCCTTAGAAGAGTCTACGGTAAACGTCAC
+CGTATACGATTTCTTCTCATTAACTTCTTTGAAATTCAAAACCGCCGGTG
+CAACCGAAATCTTGACTGCTGTACTCTCCGAAATGACTTTAACCTTGTAG
+GACCCAGCTCCTCCAACGCTCGTGACAGTACGCGTGTACTTATACGCTCC
+GGATCCGTCAACGTTAACGGCGAAGGACGGATAGTTTAAATCCGCGACTG
+AGTGCGTTTTGGAGGGATCGCAAGTGAAATTGCGTCTCGAAACGCTTCTG
+ATCTGCGACGACGTGTAGTTCAACGCGCAGAGGAAACCTAAGTAATCCAC
+CGTCGttAGATCGTAAACGAGTCCTGGATTGGTGCGGACGCGTGG
+
+BQ
+ 0 0 0 0 0 15 18 25 29 29 25 34 37 48 34 34 33 30 35 35 41 54 61 51 51 45 48 51 52 55 50 47 49 59 57 69 71 63 63 49 44 44 49 41 23 16 19 19 39 39
+ 50 60 53 64 64 64 60 65 75 80 85 73 71 71 66 61 61 55 56 60 64 64 69 69 66 41 41 25 22 26 29 48 40 46 50 50 64 64 69 64 67 75 83 83 74 80 80 69 64 60
+ 62 62 59 50 55 72 80 80 85 85 85 85 74 69 69 65 76 66 66 58 75 58 55 54 55 51 50 60 52 57 57 88 88 90 90 88 86 90 84 84 78 79 68 67 69 72 85 80 85 90
+ 90 90 86 84 79 74 80 72 78 72 88 88 90 90 90 83 80 80 80 83 83 90 90 90 88 90 85 89 89 89 82 72 72 68 68 68 89 90 90 83 75 75 75 75 75 83 83 83 75 75
+ 73 72 63 67 65 67 65 67 77 83 76 75 75 75 75 75 76 89 90 90 90 76 76 83 80 80 83 90 90 90 90 90 90 90 90 88 87 83 85 85 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 85 85 85 83 88 88 90 90 90 90 90 90 90 79 79 78 79 79 88 88 90 90 90 90 90 90 90 88 83 88 78 78 78 80 73 78 87 80 80 80 80 80 80 80 83 88
+ 81 83 81 76 76 76 81 81 88 90 90 90 90 90 88 88 90 85 90 90 83 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 83 78 78 71 71 79 79 70 70 73 75
+ 72 75 84 88 88 88 88 90 90 90 90 90 90 90 88 80 80 80 84 78 75 75 81 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 90 86 78 78 80 85
+ 86 88 90 90 88 88 88 88 90 90 90 90 90 83 83 83 80 80 80 80 83 80 80 89 89 90 90 88 83 82 78 78 78 78 78 82 82 85 85 85 85 83 83 83 83 85 83 83 88 90
+ 90 90 83 88 88 83 81 81 88 88 83 80 80 80 85 85 82 83 75 75 80 84 90 90 78 80 80 80 79 89 90 90 90 75 75 77 77 80 77 88 75 75 72 72 72 75 77 79 75 77
+ 70 69 64 64 64 64 72 90 90 90 90 87 87 87 83 69 64 64 56 56 50 57 77 80 80 90 90 90 68 64 53 52 52 57 64 75 75 73 75 75 75 75 75 75 79 85 88 90 89 89
+ 90 72 65 60 60 60 65 73 76 79 79 79 71 64 72 72 73 73 79 75 59 60 67 62 62 64 67 72 72 77 80 70 64 64 67 64 69 65 64 56 59 61 66 66 54 62 56 52 53 56
+ 40 38 23 21 15 15 24 29 47 47 44 58 58 53 56 58 68 63 63 63 63 63 59 72 65 69 69 80 67 61 51 48 50 59 63 63 69 65 45 45 45 32 33 32
+
+AF LL2323r C -108
+AF LL2323f U 13
+BS 1 14 LL2323r
+BS 15 19 LL2323f
+BS 20 21 LL2323r
+BS 22 23 LL2323f
+BS 24 30 LL2323r
+BS 31 36 LL2323f
+BS 37 39 LL2323r
+BS 40 78 LL2323f
+BS 79 85 LL2323r
+BS 86 92 LL2323f
+BS 93 95 LL2323r
+BS 96 132 LL2323f
+BS 133 143 LL2323r
+BS 144 154 LL2323f
+BS 155 155 LL2323r
+BS 156 176 LL2323f
+BS 177 180 LL2323r
+BS 181 187 LL2323f
+BS 188 190 LL2323r
+BS 191 205 LL2323f
+BS 206 208 LL2323r
+BS 209 211 LL2323f
+BS 212 224 LL2323r
+BS 225 231 LL2323f
+BS 232 239 LL2323r
+BS 240 241 LL2323f
+BS 242 247 LL2323r
+BS 248 250 LL2323f
+BS 251 255 LL2323r
+BS 256 259 LL2323f
+BS 260 264 LL2323r
+BS 265 273 LL2323f
+BS 274 280 LL2323r
+BS 281 282 LL2323f
+BS 283 283 LL2323r
+BS 284 284 LL2323f
+BS 285 285 LL2323r
+BS 286 289 LL2323f
+BS 290 292 LL2323r
+BS 293 300 LL2323f
+BS 301 311 LL2323r
+BS 312 314 LL2323f
+BS 315 315 LL2323r
+BS 316 317 LL2323f
+BS 318 318 LL2323r
+BS 319 324 LL2323f
+BS 325 327 LL2323r
+BS 328 338 LL2323f
+BS 339 340 LL2323r
+BS 341 345 LL2323f
+BS 346 347 LL2323r
+BS 348 354 LL2323f
+BS 355 373 LL2323r
+BS 374 374 LL2323f
+BS 375 376 LL2323r
+BS 377 384 LL2323f
+BS 385 387 LL2323r
+BS 388 400 LL2323f
+BS 401 401 LL2323r
+BS 402 402 LL2323f
+BS 403 403 LL2323r
+BS 404 409 LL2323f
+BS 410 424 LL2323r
+BS 425 429 LL2323f
+BS 430 449 LL2323r
+BS 450 453 LL2323f
+BS 454 454 LL2323r
+BS 455 456 LL2323f
+BS 457 459 LL2323r
+BS 460 461 LL2323f
+BS 462 468 LL2323r
+BS 469 469 LL2323f
+BS 470 471 LL2323r
+BS 472 473 LL2323f
+BS 474 476 LL2323r
+BS 477 484 LL2323f
+BS 485 488 LL2323r
+BS 489 502 LL2323f
+BS 503 507 LL2323r
+BS 508 516 LL2323f
+BS 517 524 LL2323r
+BS 525 526 LL2323f
+BS 527 536 LL2323r
+BS 537 546 LL2323f
+BS 547 548 LL2323r
+BS 549 549 LL2323f
+BS 550 551 LL2323r
+BS 552 553 LL2323f
+BS 554 560 LL2323r
+BS 561 570 LL2323f
+BS 571 599 LL2323r
+BS 600 600 LL2323f
+BS 601 601 LL2323r
+BS 602 612 LL2323f
+BS 613 624 LL2323r
+BS 625 625 LL2323f
+BS 626 628 LL2323r
+BS 629 630 LL2323f
+BS 631 645 LL2323r
+
+RD LL2323r 799 0 0
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxctttttnnnnnnnnnnnnnnnn
+ntntntnnnacggccgGAGAAGATtTCATTTCTTTAAATAGCAAAGAGAA
+AACacaacGACAAGAGTTCCAAAAGCAAACTAGATAAGCCAAGACTGCGT
+GCTGTTGTTGCTAAACAAAGCTCTTATAACAAACGGCAGACAATATCTGT
+GACTTTCTTCTTCCCATAAAACCCCATTAATTTTGTCATAATTAACCACT
+ACGTCCAGGTAATCGCCACGGGACTACCCACCACGTGTTTCCCATCTGAC
+CATTCAATGTTCCCGAAGCTAGATCCAGACGCCTTAGAAGAGTCTACGGT
+AAACGTCACCGTATACGATTTCTTCTCATTAACTTCTTTGAAATTCAAAA
+CCGCCGGTGCAACCGAAATCTTGACTGCTGTACTCTCCGAAATGACTTTA
+ACCTTGTAGGACCCAGCTCCTCCAACGCTCGTGACAGTACGCGTGTACTT
+ATACGCTCCGGATCCGTCAACGTTAACGGCGAAGGACGGATAGTTTAAAT
+CCGCGACTGAGTGCGTTTTGGAGGGATCGCAAGTGAAATTGCGTCTCGAA
+ACGCTTCTGATCTGCGACGACGTGTAGTTCAACGCGCAGAGGAAACCTAA
+GTAATCCACCGTcgc*agATCGTAAACGAGTCCTGGATTGGTGCGGACGC
+GTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 115 754 110 754
+DS CHROMAT_FILE: LL2323r PHD_FILE: LL2323r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:25 2000
+
+RD LL2323f 784 0 0
+gaTTTCATTTCTTTAAATAGCAAAGAGAAAACAca*cGACAAGAGTTCCA
+AAAGCAAACTAGATAAgcgaagACTGCGTGCTGTTGTTGCTAAACAAAGC
+TCTTATAACAAACGGCAGACAATATCTGTGACTTTCTTCTTCCCATAAAA
+CCCCATTAATTTTGTCATAATTAACCACTACGTCCAGGTAATCGCCACGG
+GACTACCCACCACGTGTTTCCCATCTGACCATTCAATGTTCCCGAAGCTA
+GATCCAGACGCCTTAGAAGAGTCTACGGTAAACGTCACCGTATACGATTT
+CTTCTCATTAACTTCTTTGAAATTCAAAACCGCCGGTGCAACCGAAATCT
+TGACTGCTGTACTCTCCGAAATGACTTTAACCTTGTAGGACCCAGCTCCT
+CCAACGCTCGTGACAGTACGCGTGTACTTATACGCTCCGGATCCGTCAAC
+GTTAACGGCGAAGGACGGATAGTTTAAATCCGCGACTGAGTGCGTTTTGG
+AGGGATCGCAAGTGAAATTGCGTCTCGAAACGCTTCTGATCTGCGACGAC
+GTGTAGTTCAACGCGCAGAGGAAACCTAAGTAATCCACCGTCGttAGATC
+GTAAACGAGTCCTGGATTGGTGCGGACGCGTGGxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+QA 8 633 1 633
+DS CHROMAT_FILE: LL2323f PHD_FILE: LL2323f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:49 2000
+
+CO Contig36 742 2 113 U
+TTTTTTTttTTTTTTTTTTTTATCTATGAAATCGATaaacTATAACCAGA
+TACTTTGATCATCACTGTTTCATTGGATTTCCAATGAAGATCTCAACAGT
+GGAGTATACTTATTTAAGAAATAACAGTACAAGAAAAAAAGAGAAGGATA
+GTTTCATTTCCAAGCCGGAGATGCAAGAAACATGATGAGGATCAGTATCC
+CCAGTAGTCGTTACGGATTTCGTCATCAAACTTCTTTTTGAGTTCTGGGT
+GCTTCTCAAAGTACTCATCTGCAGTCATCGTGCTGAGCTTTTTGCTTATC
+TCCTGGACATCAACAATTTCTTTCTCCAGCCGTTCAGACTCCTTGAGCGA
+CTTCTGTTCTGCTTCTTTCAGCTCCACCAACAAAGCATCAAACTTAGGCT
+TGTATTCAGGGGTAACATTGTCCACGTACTTGGGAATCTCAACGCTGTCA
+TAAGCTTCCTTGTACAAGTCAACAATGCCGGATCCAATACCCTTTCTGTA
+GAAATCCCAATCAATAGGTTCAGGCTCCTGGCTGAACTTGGTCTGGAGCT
+GCGTGTTGACCTCGTCGAAAGCGCGACGGAGGTTAGAGAACTCTCTGCGA
+GCCTCATCGGTGACGAGGACCTTAGCCATCCCTTCCCAATCTATAGTCCT
+CGACGCTTTGAATGCCACATCCGCCACTTTCTTGCCTGCTCCGCTCATTT
+TTCTCGATTTCGCCCAAAACACACACTGATGCGGACGCGTGG
+
+BQ
+ 32 32 32 32 32 22 23 17 15 23 27 32 26 26 22 22 22 22 22 20 20 21 27 24 24 25 29 25 25 25 25 27 27 27 27 24 19 17 19 16 23 21 32 34 34 34 37 42 47 54
+ 54 62 58 62 55 59 63 62 62 75 66 66 62 66 61 67 60 66 62 65 74 64 55 55 53 53 59 64 61 69 65 77 66 69 66 69 71 62 66 77 77 79 72 78 66 55 58 70 59 66
+ 61 66 64 67 71 71 69 71 69 69 69 69 64 64 64 60 60 60 85 67 64 64 55 50 50 61 61 54 52 52 66 88 88 88 88 90 90 90 90 82 82 90 90 90 90 76 66 62 69 69
+ 69 69 67 75 75 75 69 67 66 66 72 69 69 69 80 74 74 75 76 88 69 77 77 75 81 81 88 88 80 77 80 85 88 90 90 90 88 80 86 86 80 73 71 76 71 76 73 69 72 77
+ 75 88 69 69 69 66 69 69 68 68 69 66 72 75 82 82 89 80 78 71 71 71 71 75 84 90 90 87 78 90 90 90 88 88 90 90 90 90 90 90 90 82 82 81 90 90 81 86 78 76
+ 76 79 90 90 90 90 90 90 82 90 82 78 78 90 90 90 90 90 84 80 77 77 78 77 89 90 86 81 85 71 71 71 86 86 87 89 90 90 90 90 90 90 87 87 81 81 88 88 90 87
+ 90 90 90 90 90 90 90 90 90 83 87 82 83 86 90 90 90 90 90 90 90 90 90 90 90 81 80 79 76 76 77 83 83 83 90 90 88 83 87 87 87 89 90 88 87 83 81 79 79 86
+ 81 90 87 83 87 88 88 90 87 90 90 90 90 90 90 90 90 90 90 85 85 80 83 88 90 90 90 90 90 90 90 86 86 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 88 88 86 90 90 90 90 90 90 87 89 85 90 87 80 80 80 90 90 90 90 90 90 90 90 90 90 90 90 86 83 81 83 83 83 85 86 89 90
+ 90 90 86 88 81 81 83 90 90 85 90 87 87 87 90 89 90 90 90 84 88 90 90 90 86 87 85 85 86 85 86 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89
+ 90 89 90 90 90 90 90 90 90 88 90 90 90 90 90 86 86 86 83 83 89 90 86 86 86 84 80 80 90 82 82 82 88 90 90 90 90 90 88 90 90 90 90 90 87 82 78 78 78 78
+ 78 78 78 78 83 90 90 90 90 90 83 88 85 85 83 83 83 85 88 83 75 75 71 71 71 78 80 82 82 81 81 83 83 83 85 85 80 85 85 83 83 90 90 85 72 71 71 71 71 71
+ 85 83 83 78 86 78 83 85 85 85 73 70 73 72 77 77 72 76 82 88 76 72 79 67 64 64 65 73 76 67 67 70 74 85 85 90 85 85 83 69 79 74 71 71 71 71 71 79 80 80
+ 74 71 67 67 70 65 64 68 69 77 74 63 64 64 66 72 72 77 83 68 63 62 56 53 53 63 83 90 90 90 90 65 69 60 68 67 75 74 76 63 58 58 64 68 63 72 72 75 75 78
+ 84 71 62 59 59 66 66 64 60 54 56 56 54 58 62 75 72 72 64 69 72 72 83 83 78 79 80 79 74 74 63 61 56 56 76 73 47 44 44 32 33 34
+
+AF LL2325r C 1
+AF LL2325f U 35
+BS 1 38 LL2325r
+BS 39 39 LL2325f
+BS 40 41 LL2325r
+BS 42 42 LL2325f
+BS 43 43 LL2325r
+BS 44 46 LL2325f
+BS 47 51 LL2325r
+BS 52 59 LL2325f
+BS 60 60 LL2325r
+BS 61 89 LL2325f
+BS 90 92 LL2325r
+BS 93 118 LL2325f
+BS 119 119 LL2325r
+BS 120 131 LL2325f
+BS 132 135 LL2325r
+BS 136 137 LL2325f
+BS 138 139 LL2325r
+BS 140 141 LL2325f
+BS 142 145 LL2325r
+BS 146 153 LL2325f
+BS 154 156 LL2325r
+BS 157 171 LL2325f
+BS 172 178 LL2325r
+BS 179 179 LL2325f
+BS 180 180 LL2325r
+BS 181 181 LL2325f
+BS 182 187 LL2325r
+BS 188 188 LL2325f
+BS 189 190 LL2325r
+BS 191 193 LL2325f
+BS 194 194 LL2325r
+BS 195 208 LL2325f
+BS 209 217 LL2325r
+BS 218 227 LL2325f
+BS 228 237 LL2325r
+BS 238 244 LL2325f
+BS 245 246 LL2325r
+BS 247 248 LL2325f
+BS 249 258 LL2325r
+BS 259 261 LL2325f
+BS 262 270 LL2325r
+BS 271 272 LL2325f
+BS 273 275 LL2325r
+BS 276 276 LL2325f
+BS 277 277 LL2325r
+BS 278 279 LL2325f
+BS 280 311 LL2325r
+BS 312 315 LL2325f
+BS 316 325 LL2325r
+BS 326 330 LL2325f
+BS 331 344 LL2325r
+BS 345 345 LL2325f
+BS 346 349 LL2325r
+BS 350 350 LL2325f
+BS 351 367 LL2325r
+BS 368 369 LL2325f
+BS 370 383 LL2325r
+BS 384 388 LL2325f
+BS 389 409 LL2325r
+BS 410 411 LL2325f
+BS 412 413 LL2325r
+BS 414 415 LL2325f
+BS 416 423 LL2325r
+BS 424 424 LL2325f
+BS 425 449 LL2325r
+BS 450 454 LL2325f
+BS 455 460 LL2325r
+BS 461 469 LL2325f
+BS 470 470 LL2325r
+BS 471 471 LL2325f
+BS 472 478 LL2325r
+BS 479 479 LL2325f
+BS 480 480 LL2325r
+BS 481 481 LL2325f
+BS 482 492 LL2325r
+BS 493 494 LL2325f
+BS 495 495 LL2325r
+BS 496 499 LL2325f
+BS 500 500 LL2325r
+BS 501 501 LL2325f
+BS 502 502 LL2325r
+BS 503 504 LL2325f
+BS 505 506 LL2325r
+BS 507 509 LL2325f
+BS 510 510 LL2325r
+BS 511 513 LL2325f
+BS 514 515 LL2325r
+BS 516 518 LL2325f
+BS 519 520 LL2325r
+BS 521 521 LL2325f
+BS 522 528 LL2325r
+BS 529 529 LL2325f
+BS 530 537 LL2325r
+BS 538 548 LL2325f
+BS 549 561 LL2325r
+BS 562 562 LL2325f
+BS 563 568 LL2325r
+BS 569 569 LL2325f
+BS 570 570 LL2325r
+BS 571 572 LL2325f
+BS 573 591 LL2325r
+BS 592 593 LL2325f
+BS 594 594 LL2325r
+BS 595 600 LL2325f
+BS 601 621 LL2325r
+BS 622 623 LL2325f
+BS 624 640 LL2325r
+BS 641 650 LL2325f
+BS 651 700 LL2325r
+BS 701 707 LL2325f
+BS 708 726 LL2325r
+BS 727 730 LL2325f
+BS 731 742 LL2325r
+
+RD LL2325r 786 0 0
+TTTTTTTttTTTTTTTTTTTTATCTATGAAATCGATaaacTATAACCAGA
+TACTTTGATCATCACTGTTTCATTGGATTTCCAATGAAGATCTCAACAGT
+GGAGTATACTTATTTAAGAAATAACAGTACAAGAAAAAAAGAGAAGGATA
+GTTTCATTTCCAAGCCGGAGATGCAAGAAACATGATGAGGATCAGTATCC
+CCAGTAGTCGTTACGGATTTCGTCATCAAACTTCTTTTTGAGTTCTGGGT
+GCTTCTCAAAGTACTCATCTGCAGTCATCGTGCTGAGCTTTTTGCTTATC
+TCCTGGACATCAACAATTTCTTTCTCCAGCCGTTCAGACTCCTTGAGCGA
+CTTCTGTTCTGCTTCTTTCAGCTCCACCAACAAAGCATCAAACTTAGGCT
+TGTATTCAGGGGTAACATTGTCCACGTACTTGGGAATCTCAACGCTGTCA
+TAAGCTTCCTTGTACAAGTCAACAATGCCGGATCCAATACCCTTTCTGTA
+GAAATCCCAATCAATAGGTTCAGGCTCCTGGCTGAACTTGGTCTGGAGCT
+GCGTGTTGACCTCGTCGAAAGCGCGACGGAGGTTAGAGAACTCTCTGCGA
+GCCTCATCGGTGACGAGGACCTTAGCCATCCCTTCCCAATCTATAGTCCT
+CGACGCTTTGAATGCCACATCCGCCACTTTCTTGCCTGCTCCGCTCATTT
+TTCTCGATTTCGCCCAAAACACACACTGATGCGGACGCGTGGxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 1 742 1 742
+DS CHROMAT_FILE: LL2325r PHD_FILE: LL2325r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:35 2000
+
+RD LL2325f 766 0 0
+agtaacTATAACCAGATACTTTGATCATCACTGTTTCATTGGATTTCCAA
+TGAAGATCTCAACAGTGGAGTATACTTATTTAAGAAATAACAGTACAAGA
+AAAAAAGAGAAGGATAGTTTCATTTCCAAGCCGGAGATGCAAGAAACATG
+ATGAGGATCAGTATCCCCAGTAGTCGTTACGGATTTCGTCATCAAACTTC
+TTTTTGAGTTCTGGGTGCTTCTCAAAGTACTCATCTGCAGTCATCGTGCT
+GAGCTTTTTGCTTATCTCCTGGACATCAACAATTTCTTTCTCCAGCCGTT
+CAGACTCCTTGAGCGACTTCTGTTCTGCTTCTTTCAGCTCCACCAACAAA
+GCATCAAACTTAGGCTTGTATTCAGGGGTAACATTGTCCACGTACTTGGG
+AATCTCAACGCTGTCATAAGCTTCCTTGTACAAGTCAACAATGCCGGATC
+CAATACCCTTTCTGTAGAAATCCCAATCAATAGGTTCAGGCTCCTGGCTG
+AACTTGGTCTGGAGCTGCGTGTTGACCTCGTCGAAAGCGCGACGGAGGTT
+AGAGAACTCTCTGCGAGCCTCATCGGTGACGAGGACCTTAGCCATCCCTT
+CCCAATCTATAGTCCTCGACGCTTTGAATGCCACATCCGCCACTTTCTTG
+CCTGCTCCGCTCATTTTTCTCGATTTCGCCCAAAACACACACTGATGCGG
+ACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxx
+
+QA 9 708 4 708
+DS CHROMAT_FILE: LL2325f PHD_FILE: LL2325f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:58 2000
+
+CO Contig37 1093 2 34 U
+tcgaaattctttacaaaataacaaaaacctaaaccttaaaaccaacaatg
+gacattctctcgagctcgctTCACCTACATTACAAAAACATAAATATTTA
+CATAAAAAAATAAATCAAACCGGTTTACGCTAAACCTAAAAGAAACAAAA
+AAAAACCTAATCCTCCGCCTCCACCTCCTCTGTTCGCTTCCTCCGTCAAC
+CCCTCTGCCTCCTCCTCCATCAAGATGTAGCAATGTCGTCATGGCTACCA
+AGTCTATCAATTGTTCCATCTTTTAAATCGTACCACGGATTCTAACACCT
+CGTTTCTTGCTTGGTAGAATGCTCTCATCCTCACGATCCTCAAACGTCCT
+CCGTAGTTAATCTCCAACAATGAGTCAATAATAGCTTCTTGCTCCATTCT
+TTCTTGACAAGAGAAGACCAAATCTCGAACCATTGCTAGACCTTTGCTAG
+AATCCACCAATGAGAAGCTTCAACACTCAAAAAATGGTTCAGGAAAAACA
+CAAACTGATGAATCAAGTTCTCTTTTCACTGCTGAAGACCAGCTCGAGCA
+GAAGATCTGAATAGAGCTTAGAAGCAATGGACGCCATATGGGAGAGAAGC
+TCGGGATCTGAGCCGTCGTCCTCTTCCTCTGTCTTCTTCGTTTCCGCTAA
+TGGGATAGAAGCTCAGGATTCGCTGCCACCGCCTTTGACGCTGCTCCGCC
+TTGCCACCGGAGAAGAGAGAGAGAACGAGGTGTGACGGAGATGAGGAGAT
+GCGACGAACCCCTAACAAGAAGTCTCGAATGGAAACAAGTAAATctGAAC
+CCTATAATCAGTAGAAGAAGAAACAAATTCTCAGAAATCGACCTGTGGAT
+CAAGGCCTGTTCGAGACCCGTAACGATGACGACGAACTCAGACCGGTGGT
+GATGGAGACGAACTCATACCGGTGACGATGGCACAAACTCAGACCGGTCT
+CTTCTTCCTCATGCAATCTCTCATCTTTCTCTGTCTCATCTCTCTTCTCT
+CTGTCGTGTTTGAGAAGAGAAATGGAGAGGACAAAACACAATCTCCCTCC
+TTcccctaatgcaggaatcaagattaactttccggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 20 29 32 34 34 39 40 40 46 46 46 46 46 46 51 46 46 46 46 46 46 51 46 46 46 46 40 40 40 39
+ 39 39 40 40 40 46 51 51 40 40 40 39 39 39 40 40 40 40 39 40 37 37 37 35 35 29 29 31 31 31 31 34 35 39 40 40 40 40 40 40 51 56 40 40 46 40 40 40 51 51
+ 56 56 46 40 40 40 40 40 45 45 45 45 45 45 35 35 35 35 35 35 35 35 35 35 35 35 39 39 40 45 45 40 37 35 34 34 34 34 34 38 40 40 45 45 40 40 40 45 45 45
+ 45 45 51 51 45 45 51 51 51 45 45 45 45 45 45 51 51 51 51 51 51 56 56 56 56 56 50 42 42 42 45 45 45 46 41 41 37 37 43 43 45 45 43 36 36 36 36 36 43 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 42 42 42 43 46 45 51 51 51 51 51 56 56 56 45 45 45 36 36 36 43 43 43 43 43 43 43 43 45 45 45 45 45 45 45 45 45 45
+ 51 51 45 45 45 45 43 43 43 43 43 43 43 43 43 42 43 43 43 43 43 43 43 43 43 45 45 45 45 45 43 43 43 43 43 43 45 51 51 51 51 43 43 43 43 36 36 38 46 46
+ 51 51 56 56 79 60 52 52 56 56 60 76 70 58 60 59 59 59 75 79 83 72 71 65 66 68 74 75 64 60 60 62 63 70 67 72 72 72 74 70 72 69 68 67 80 85 85 85 85 81
+ 81 81 88 76 71 75 75 75 73 71 85 85 85 81 72 72 72 68 72 69 81 81 83 72 72 72 67 58 59 67 73 90 85 80 66 65 62 70 65 69 63 63 68 61 61 64 64 69 77 77
+ 83 85 85 77 75 72 73 77 80 88 88 88 90 90 85 74 74 70 65 61 66 72 72 67 72 66 70 70 82 90 90 90 90 86 77 71 60 51 51 66 72 65 67 81 87 82 77 79 79 81
+ 79 90 82 88 83 86 82 82 90 90 90 83 90 90 86 86 76 85 71 79 90 90 90 90 85 90 77 88 87 90 90 83 90 90 86 88 88 88 90 90 84 80 80 80 80 72 74 80 85 88
+ 90 90 90 90 90 90 90 90 90 90 88 90 88 90 85 90 80 74 80 71 72 68 77 80 90 90 90 90 90 72 67 67 67 67 78 86 90 90 90 90 90 90 90 90 90 90 87 90 90 90
+ 90 90 90 90 90 90 90 90 83 75 75 75 81 71 67 68 66 69 66 66 78 90 88 88 88 82 81 85 77 88 68 66 69 88 90 90 79 78 78 63 60 57 64 62 71 63 65 67 80 80
+ 90 90 90 90 85 85 85 74 74 73 77 75 75 83 80 81 80 85 74 77 74 74 67 64 60 57 57 56 60 58 56 56 60 65 82 83 72 72 67 72 72 72 68 74 68 60 56 59 60 65
+ 84 80 77 73 71 76 81 79 71 82 81 81 78 73 78 78 81 81 88 88 90 77 71 59 60 65 73 69 67 67 82 85 75 68 68 63 61 62 71 82 82 88 88 81 73 73 78 82 83 72
+ 72 72 65 63 67 61 53 56 63 79 64 63 64 60 56 41 45 46 56 45 45 45 45 45 45 51 51 51 51 45 45 46 46 42 42 41 43 43 43 43 43 51 46 46 15 15 24 33 33 50
+ 46 46 45 45 45 42 42 38 36 43 43 43 43 43 56 56 51 45 45 45 45 41 42 50 56 56 51 51 51 51 51 45 45 45 45 45 43 45 43 41 41 41 43 43 43 43 43 43 56 56
+ 51 51 51 51 51 51 43 43 43 43 43 43 43 43 40 40 40 43 43 43 43 38 38 41 45 45 45 51 51 45 43 43 43 43 43 43 51 45 45 45 41 50 50 41 41 41 41 41 41 41
+ 45 46 51 45 45 45 43 43 43 43 43 43 51 51 51 51 43 40 40 39 39 39 39 38 38 39 39 39 35 38 38 38 38 38 45 51 56 56 51 45 45 40 37 37 35 35 35 35 35 35
+ 51 51 51 51 45 51 45 45 45 45 35 35 35 35 35 35 56 56 40 40 40 40 40 40 51 51 51 51 45 45 40 40 40 35 35 35 35 35 39 56 51 51 51 51 51 51 51 51 56 56
+ 51 40 35 35 35 35 35 39 51 51 51 51 51 46 51 51 51 51 51 51 39 39 39 39 40 40 51 51 46 40 40 40 40 40 51 51 51 51 51 51 46 46 40 40 48 48 40 40 40 40
+ 24 24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2326f U -19
+AF LL2326r C 350
+BS 1 354 LL2326f
+BS 355 368 LL2326r
+BS 369 520 LL2326f
+BS 521 524 LL2326r
+BS 525 527 LL2326f
+BS 528 536 LL2326r
+BS 537 540 LL2326f
+BS 541 545 LL2326r
+BS 546 551 LL2326f
+BS 552 555 LL2326r
+BS 556 560 LL2326f
+BS 561 561 LL2326r
+BS 562 562 LL2326f
+BS 563 563 LL2326r
+BS 564 564 LL2326f
+BS 565 565 LL2326r
+BS 566 566 LL2326f
+BS 567 571 LL2326r
+BS 572 584 LL2326f
+BS 585 586 LL2326r
+BS 587 594 LL2326f
+BS 595 595 LL2326r
+BS 596 596 LL2326f
+BS 597 603 LL2326r
+BS 604 605 LL2326f
+BS 606 607 LL2326r
+BS 608 613 LL2326f
+BS 614 629 LL2326r
+BS 630 630 LL2326f
+BS 631 633 LL2326r
+BS 634 634 LL2326f
+BS 635 636 LL2326r
+BS 637 639 LL2326f
+BS 640 1093 LL2326r
+
+RD LL2326f 790 0 0
+nctttaggggtacgatatantcgaaattctttacaaaataacaaaaacct
+aaaccttaaaaccaacaatggacattctctcgagctcgctTCACCTACAT
+TACAAAAACATAAATATTTACATAAAAAAATAAATCAAACCGGTTTACGC
+TAAACCTAAAAGAAACAAAAAAAAACCTAATCCTCCGCCTCCACCTCCTC
+TGTTCGCTTCCTCCGTCAACCCCTCTGCCTCCTCCTCCATCAAGATGTAG
+CAATGTCGTCATGGCTACCAAGTCTATCAATTGTTCCATCTTTTAAATCG
+TACCACGGATTCTAACACCTCGTTTCTTGCTTGGTAGAATGCTCTCATCC
+TCACGATCCTCAAACGTCCTCCGTAGTTAATCTCCAACAATGAGTCAATA
+ATAGCTTCTTGCTCCATTCTTTCTTGACAAGAGAAGACCAAATCTCGAAC
+CATTGCTAGACCTTTGCTAGAATCCACCAATGAGAAGCTTCAACACTCAA
+AAAATGGTTCAGGAAAAACACAAACTGATGAATCAAGTTCTCTTTTCACT
+GCTGAAGACCAGCTCGAGCAGAAGATCTGAATAGAGCTTAGAAGCAATGG
+ACGCCATATGGGAGAGAAGCTCGGGATCTGAGCCGTCGTCCTCTTCCTCT
+GTCTTCTTCGTTTCCGCTAATGGGATAGAAGCTCAGGATTCGCTGCCACC
+GCCTTTGACGCTGCTCCGCCTTGCCACCGGAGAAGAGAGAGAGAACGAGG
+TGTGACGGAGATGAGGAGATGCGACGAACCCCTAACAAga
+
+QA 81 785 21 790
+DS CHROMAT_FILE: LL2326f PHD_FILE: LL2326f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:03 2000
+
+RD LL2326r 789 0 0
+tcCGTAGTTAATCTCCAACAATGAGTCAATAATAGCTTCTTGCTCCATTC
+TTTCTTGACAAGAGAAGACCAAATCTCGAACCATTGCTAGACCTTTGCTA
+GAATCCACCAATGAGAAGCTTCAACACTCAAAAAATGGTTCAGGAAAAAC
+ACAAACTGATGAATCAAGTTCTCTTTTCACTGCTGAAGACCAGCTCGAGC
+AGAAGATCTGAATAGAGCTTAGAAGCAATGGACGCCATATGGGAGAGAAG
+CTCGGGATCTGAGCCGTCGTCCTCTTCCTCTGTCTTCTTCGTTTCCGCTA
+ATGGGATAGAAGCTCAGGATTCGCTGCCACCGCCTTTGACGCTGCTCCGC
+CTTGCCACCGGAGAAGAGAGAGAGAACGAGGTGTGACGGAGATGAGGAGA
+TGCGACGAACCCCTAACAAGAAGTCTCGAATGGAAACAAGTAAATctGAA
+CCCTATAATCAGTAGAAGAAGAAACAAATTCTCAGAAATCGACCTGTGGA
+TCAAGGCCTGTTCGAGACCCGTAACGATGACGACGAACTCAGACCGGTGG
+TGATGGAGACGAACTCATACCGGTGACGATGGCACAAACTCAGACCGGTC
+TCTTCTTCCTCATGCAATCTCTCATCTTTCTCTGTCTCATCTCTCTTCTC
+TCTGTCGTGTTTGAGAAGAGAAATGGAGAGGACAAAACACAATCTCCCTC
+CTTcccctaatgcaggaatcaagattaactttccggacgcgtggxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaagt
+
+QA 20 709 1 744
+DS CHROMAT_FILE: LL2326r PHD_FILE: LL2326r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:40 2000
+
+CO Contig38 894 2 66 U
+cttttgacaaaccaaacataactctacaattatatgtactaaactgagta
+caacattagaaatgaatgtctcatgctataGATtTGGAATCAAGTTTGTT
+TATGGTTTATAAAAGAGAAAATAGCCTTTAAAGTGGCGAAACACAAACAA
+ACAATGTTTCGAGAAATGAGACGATTAATTTATGAAAAGTGGACAGCTGC
+AGAGTCTCTGCGACACAACCTATGTAGTGAGTTCCATGATGGCGGAAACA
+ATATCTCCATCATTGGCTTTGAGAGCCTTAGTGGCTTTGGCCTTCGAAAC
+ACCAGCCTGAGTCATCACGAGCTCAACATCCTTGGCTTCAACACCAGTCT
+CATCCACGTCATCATCTTCGTACTCCTCTTGTGCAACCGCGGCTGCTTCA
+GAAGCATCGGAGTTAGGGATCATAGAGGCAACGTCCGGCATCTTGAACCT
+CTGAGCAGCTTGAGCTTGTAGCTGAGAGCTCATATCATCAATCTTGGCCT
+CACCGAATATGACATAGGTCTCAGAGTTGGGACTCTTGAAGACATCCGGC
+TTCGAGATGACAAACAAAACATTCTTTGATCTCTTGATAGTCACTCTGCT
+AACATCAGTGACAGGTTTCATTCCAAGTTTCAGCATAGCTTTGCGGCTTT
+TCTTTTCGCTTCTGCTTTGTTTAGAGCTCTCATTGTCACCAGCTCCATCG
+ACGTTGTCATCGTCATCGTCATCGACGTCTTCGTCTCCGTCTTTCACATC
+CTCAACGACAACATCATCTTCCTTCTCGAGCTTCATTTGCTCTTTGATGG
+CTTCCTCAATCTTAACTTCTTCAGCAACAGGGCCTGGCATCTTCTTCTTC
+Aaccccctatcttcttccaacaaacccttctagcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 33 24 24 16 22 26 39 40 40 37 37 40 37 37 56 40 37 37 35 35
+ 35 35 35 35 40 35 35 35 35 35 35 46 46 51 56 40 40 40 40 37 32 35 35 35 35 35 35 35 40 35 32 32 44 32 30 35 35 35 35 35 37 40 40 40 39 42 42 42 42 42
+ 42 37 50 50 50 52 56 55 52 56 58 64 60 71 63 64 66 65 62 53 53 53 59 53 49 57 57 57 57 65 61 64 64 63 56 59 63 61 62 60 50 53 52 56 58 53 48 44 46 50
+ 60 59 69 73 61 59 52 52 52 56 56 51 51 51 53 55 51 50 50 50 62 69 71 78 73 65 71 71 71 71 71 66 68 69 66 90 73 71 62 57 56 56 56 56 56 53 57 57 57 61
+ 62 85 74 72 66 65 59 69 70 78 78 71 60 60 57 57 63 65 64 60 67 69 74 59 58 56 56 57 62 62 79 71 64 61 61 64 58 58 59 54 50 48 50 50 56 62 54 51 56 67
+ 67 60 69 64 64 53 54 66 71 73 76 78 76 84 82 78 73 71 73 73 75 75 72 82 78 81 81 81 64 59 59 65 67 66 70 61 68 76 70 72 70 70 72 68 69 70 88 72 75 82
+ 86 88 90 90 78 79 70 70 67 70 88 90 87 90 90 90 90 82 82 81 72 72 82 80 78 90 86 87 82 82 76 79 72 73 72 69 69 69 62 59 65 56 66 69 70 82 76 90 90 90
+ 89 74 85 72 81 79 88 90 90 82 82 86 81 82 83 88 87 90 90 90 90 90 90 90 88 78 78 78 78 75 75 81 86 86 86 86 81 81 81 87 90 90 90 87 87 82 83 81 88 84
+ 90 87 89 79 81 81 80 80 86 86 90 90 90 90 90 90 81 75 85 70 74 74 85 90 90 90 90 90 86 86 86 86 90 90 90 90 90 90 90 90 88 90 90 90 85 80 78 81 85 85
+ 82 82 82 90 90 90 90 90 90 90 90 90 90 90 90 85 85 85 86 84 83 90 89 85 85 80 80 80 90 90 90 88 88 88 82 85 87 87 87 80 90 79 84 90 81 88 90 86 86 83
+ 86 86 88 90 90 90 83 88 90 90 90 90 90 86 86 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 85 85 85 80 87 90 90 77 76 74 74 74
+ 70 72 78 90 82 82 87 85 85 71 80 83 85 85 90 85 90 90 90 90 90 90 90 90 85 85 83 83 83 83 83 76 76 76 83 90 83 83 72 68 58 73 63 61 61 64 71 82 90 90
+ 90 90 87 85 68 65 70 70 68 65 65 75 80 77 81 75 63 68 67 59 59 67 67 69 68 75 75 74 80 74 77 77 72 87 65 71 71 76 72 74 70 64 60 56 65 65 58 62 64 67
+ 65 62 60 58 58 66 66 72 72 72 72 73 68 64 64 72 75 63 59 60 68 80 74 63 67 67 63 60 63 60 57 60 63 59 52 50 50 50 54 56 60 59 78 71 77 79 76 76 72 66
+ 66 69 69 54 54 58 62 54 54 60 66 60 64 65 65 71 78 81 85 77 72 72 76 70 66 63 60 57 59 61 54 50 55 66 66 70 55 55 40 46 46 46 51 51 51 40 40 40 40 40
+ 40 40 40 40 40 40 40 40 40 45 34 34 32 32 35 35 42 46 46 46 39 33 33 33 33 33 34 51 51 40 40 40 40 40 39 39 39 39 39 40 51 56 56 56 44 46 33 33 30 24
+ 24 14 19 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2327f U 1
+AF LL2327r C 144
+BS 1 194 LL2327f
+BS 195 197 LL2327r
+BS 198 235 LL2327f
+BS 236 236 LL2327r
+BS 237 257 LL2327f
+BS 258 258 LL2327r
+BS 259 282 LL2327f
+BS 283 283 LL2327r
+BS 284 315 LL2327f
+BS 316 316 LL2327r
+BS 317 323 LL2327f
+BS 324 325 LL2327r
+BS 326 337 LL2327f
+BS 338 338 LL2327r
+BS 339 350 LL2327f
+BS 351 355 LL2327r
+BS 356 356 LL2327f
+BS 357 364 LL2327r
+BS 365 365 LL2327f
+BS 366 366 LL2327r
+BS 367 370 LL2327f
+BS 371 376 LL2327r
+BS 377 377 LL2327f
+BS 378 378 LL2327r
+BS 379 390 LL2327f
+BS 391 402 LL2327r
+BS 403 406 LL2327f
+BS 407 409 LL2327r
+BS 410 411 LL2327f
+BS 412 412 LL2327r
+BS 413 415 LL2327f
+BS 416 425 LL2327r
+BS 426 432 LL2327f
+BS 433 436 LL2327r
+BS 437 439 LL2327f
+BS 440 445 LL2327r
+BS 446 448 LL2327f
+BS 449 450 LL2327r
+BS 451 451 LL2327f
+BS 452 452 LL2327r
+BS 453 458 LL2327f
+BS 459 466 LL2327r
+BS 467 469 LL2327f
+BS 470 507 LL2327r
+BS 508 509 LL2327f
+BS 510 519 LL2327r
+BS 520 521 LL2327f
+BS 522 562 LL2327r
+BS 563 565 LL2327f
+BS 566 566 LL2327r
+BS 567 570 LL2327f
+BS 571 577 LL2327r
+BS 578 582 LL2327f
+BS 583 604 LL2327r
+BS 605 607 LL2327f
+BS 608 610 LL2327r
+BS 611 612 LL2327f
+BS 613 614 LL2327r
+BS 615 615 LL2327f
+BS 616 635 LL2327r
+BS 636 636 LL2327f
+BS 637 641 LL2327r
+BS 642 642 LL2327f
+BS 643 683 LL2327r
+BS 684 684 LL2327f
+BS 685 894 LL2327r
+
+RD LL2327f 792 0 0
+cttttgacaaaccaaacataactctacaattatatgtactaaactgagta
+caacattagaaatgaatgtctcatgctataGATtTGGAATCAAGTTTGTT
+TATGGTTTATAAAAGAGAAAATAGCCTTTAAAGTGGCGAAACACAAACAA
+ACAATGTTTCGAGAAATGAGACGATTAATTTATGAAAAGTGGACAGCTGC
+AGAGTCTCTGCGACACAACCTATGTAGTGAGTTCCATGATGGCGGAAACA
+ATATCTCCATCATTGGCTTTGAGAGCCTTAGTGGCTTTGGCCTTCGAAAC
+ACCAGCCTGAGTCATCACGAGCTCAACATCCTTGGCTTCAACACCAGTCT
+CATCCACGTCATCATCTTCGTACTCCTCTTGTGCAACCGCGGCTGCTTCA
+GAAGCATCGGAGTTAGGGATCATAGAGGCAACGTCCGGCATCTTGAACCT
+CTGAGCAGCTTGAGCTTGTAGCTGAGAGCTCATATCATCAATCTTGGCCT
+CACCGAATATGACATAGGTCTCAGAGTTGGGACTCTTGAAGACATCCGGC
+TTCGAGATGACAAACAAAACATTCTTTGATCTCTTGATAGTCACTCTGCT
+AACATCAGTGACAGGTTTCATTCCAAGTTTCAGCATAGCTTTGCGGCTTT
+TCTTTTCGCTTCTGCTTTGTTTAGAGCTCTCATTGTCACCAGCTCCATCG
+ACGTTGTCATCGTCATCGTCATCGACGTCTTCGTCTCCGTCTTTCACATC
+CTCAACGACAACATCATCTTCCTTCTCGAGCTTCATTTGctt
+
+QA 81 788 1 791
+DS CHROMAT_FILE: LL2327f PHD_FILE: LL2327f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:08 2000
+
+RD LL2327r 795 0 0
+caAACAAACAATGTTTCGAGAAATGAGACGATTAATTTATGAAAAGTGGA
+CAGCTGCAGAGTCTCTGCGACACAACCTATGTAGTGAGTTCCATGATGGC
+GGAAACAATATCTCCATCATTGGCTTTGAGAGCCTTAGTGGCTTTGGCCT
+TCGAAACACCAGCCTGAGTCATCACGAGCTCAACATCCTTGGCTTCAACA
+CCAGTCTCATCCACGTCATCATCTTCGTACTCCTCTTGTGCAACCGCGGC
+TGCTTCAGAAGCATCGGAGTTAGGGATCATAGAGGCAACGTCCGGCATCT
+TGAACCTCTGAGCAGCTTGAGCTTGTAGCTGAGAGCTCATATCATCAATC
+TTGGCCTCACCGAATATGACATAGGTCTCAGAGTTGGGACTCTTGAAGAC
+ATCCGGCTTCGAGATGACAAACAAAACATTCTTTGATCTCTTGATAGTCA
+CTCTGCTAACATCAGTGACAGGTTTCATTCCAAGTTTCAGCATAGCTTTG
+CGGCTTTTCTTTTCGCTTCTGCTTTGTTTAGAGCTCTCATTGTCACCAGC
+TCCATCGACGTTGTCATCGTCATCGTCATCGACGTCTTCGTCTCCGTCTT
+TCACATCCTCAACGACAACATCATCTTCCTTCTCGAGCTTCATTTGCTCT
+TTGATGGCTTCCTCAATCTTAACTTCTTCAGCAACAGGGCCTGGCATCTT
+CTTCTTCAaccccctatcttcttccaacaaacccttctagcggacgcgtg
+gxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaag
+
+QA 10 715 1 751
+DS CHROMAT_FILE: LL2327r PHD_FILE: LL2327r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:44 2000
+
+CO Contig39 566 2 76 U
+tTTTTTTTTtttaacCataacAGTTTCCTTGATTACATTAACTTCATGAG
+GTTTAGTATTACACAC*ACTGGTCTGAAACAAAGCTATCTCAGCCTTATT
+TATTTCATATAATTTAGCAGAAACATTTAGATTATTACCCTTTAAACAAG
+TGAAATGTTCAAGAAGAAGATTCTTTAAACTCTTGTAACTGCTTGAAGTT
+CATCCATGGCTTCACCCAAACTTTGGCTTCGAAGTTCTTAGCCTGGCCAC
+CTTCGTTTGCTTCAAGAGTTAAGTAGTACATGGTTCCAGCAACCACCTGT
+TCTCTTGCCTTTACAATCTTCTTGAACTCAAGAACCTTGTTCTCTCGGTT
+GTTATGTTCTTGAATAGCGAATCGAGCGAGACTCTCGATCTCTCCACTGT
+TTTGGTTTCCTCGGAGATCATGAACGCCTCCTAAAATCATCGTCTTCTCC
+GTGGTTTTCTCTATTGATCTGCAACTTCTTGGTTGAATTGTTCGACAGAG
+AAGGAGGGTTACGATCAAGAACGTAACAGATACGAAAGCTTTTGATTCCA
+TTTTCCGGACGCGTGG
+
+BQ
+ 0 27 27 48 40 40 40 40 25 17 8 9 9 11 11 22 17 19 10 10 10 24 24 29 25 25 25 22 22 22 25 27 22 22 25 24 27 27 24 21 28 43 46 57 70 87 79 79 75 80
+ 66 66 64 65 65 67 64 58 49 49 51 48 42 40 40 40 40 42 47 48 72 66 68 70 78 69 65 71 67 69 66 74 72 72 72 80 80 80 86 86 65 60 57 59 54 56 60 60 71 72
+ 79 85 79 90 90 90 77 77 80 77 75 74 68 68 66 68 68 80 82 88 82 88 79 88 86 82 84 79 85 80 80 80 80 86 80 84 80 80 90 72 72 75 81 81 86 82 86 85 85 85
+ 85 90 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86 86 90 86 80 90 90 90 90 90 90 90 77 77 76 90 90 89 89 89 84 90 90 90 90 90 90
+ 90 90 90 90 90 88 80 80 80 80 86 90 90 90 90 90 90 90 80 80 66 61 64 74 70 88 90 90 90 90 88 90 88 88 83 90 86 86 88 88 88 90 90 90 90 90 90 90 90 90
+ 88 90 90 90 90 86 86 90 90 90 90 90 90 90 90 90 86 86 86 86 86 86 86 86 86 86 88 90 90 90 90 90 90 90 90 90 90 90 90 87 90 90 90 90 90 86 86 88 88 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 90 88 88 88 88 88 88 88 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 88 81 75 75 75 75 75 85 90 85 82 82 82 82 82 78 78 80 86 80 78 82 83 83 83 88 90 90 90 90 90 90 90 83 83 83 90 90
+ 90 90 90 90 83 83 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 78 78 78 78 78 83 88 88 90 90 90 90 90 90 90 86 86 86 90 90 90 90 90 90 90 85 84 80
+ 85 85 90 90 90 90 90 90 90 90 90 89 84 90 82 82 90 81 81 75 75 68 69 83 87 79 73 88 88 72 69 69 69 72 72 80 90 90 79 75 69 75 75 75 85 90 90 90 90 90
+ 90 90 90 90 90 90 86 86 75 72 71 74 74 74 72 68 67 67 64 67 67 67 67 61 71 70 77 77 80 77 74 77 77 77 87 86 86 86 86 80 84 84 72 72 67 71 61 61 46 40
+ 39 39 39 39 39 51 66 66 61 47 47 47 32 33 46
+
+AF LL2329r C -169
+AF LL2329f U 29
+BS 1 37 LL2329r
+BS 38 41 LL2329f
+BS 42 43 LL2329r
+BS 44 44 LL2329f
+BS 45 48 LL2329r
+BS 49 63 LL2329f
+BS 64 69 LL2329r
+BS 70 96 LL2329f
+BS 97 99 LL2329r
+BS 100 100 LL2329f
+BS 101 107 LL2329r
+BS 108 119 LL2329f
+BS 120 122 LL2329r
+BS 123 123 LL2329f
+BS 124 139 LL2329r
+BS 140 146 LL2329f
+BS 147 152 LL2329r
+BS 153 153 LL2329f
+BS 154 155 LL2329r
+BS 156 156 LL2329f
+BS 157 183 LL2329r
+BS 184 189 LL2329f
+BS 190 195 LL2329r
+BS 196 197 LL2329f
+BS 198 203 LL2329r
+BS 204 206 LL2329f
+BS 207 232 LL2329r
+BS 233 233 LL2329f
+BS 234 235 LL2329r
+BS 236 236 LL2329f
+BS 237 237 LL2329r
+BS 238 248 LL2329f
+BS 249 251 LL2329r
+BS 252 258 LL2329f
+BS 259 263 LL2329r
+BS 264 278 LL2329f
+BS 279 280 LL2329r
+BS 281 292 LL2329f
+BS 293 296 LL2329r
+BS 297 302 LL2329f
+BS 303 303 LL2329r
+BS 304 304 LL2329f
+BS 305 316 LL2329r
+BS 317 324 LL2329f
+BS 325 330 LL2329r
+BS 331 331 LL2329f
+BS 332 336 LL2329r
+BS 337 350 LL2329f
+BS 351 356 LL2329r
+BS 357 370 LL2329f
+BS 371 388 LL2329r
+BS 389 389 LL2329f
+BS 390 396 LL2329r
+BS 397 405 LL2329f
+BS 406 407 LL2329r
+BS 408 408 LL2329f
+BS 409 409 LL2329r
+BS 410 422 LL2329f
+BS 423 429 LL2329r
+BS 430 472 LL2329f
+BS 473 478 LL2329r
+BS 479 480 LL2329f
+BS 481 486 LL2329r
+BS 487 487 LL2329f
+BS 488 489 LL2329r
+BS 490 491 LL2329f
+BS 492 492 LL2329r
+BS 493 495 LL2329f
+BS 496 496 LL2329r
+BS 497 507 LL2329f
+BS 508 524 LL2329r
+BS 525 532 LL2329f
+BS 533 540 LL2329r
+BS 541 543 LL2329f
+BS 544 564 LL2329r
+BS 565 566 LL2329f
+
+RD LL2329r 780 0 0
+ttggtancgccagggttttxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxcttttttttttttttt
+ttttnttttttttnnttnnntTTTTTTTTtttaacCataacAGTTTCCTT
+GATTACAttaaCTTCATGAGGTTTAGTATTACACAC*ACTGGTCTGAAAC
+AAAGCTATCTCAGCCTTATTTATTTCATATAATTTAGCAGAAACATTTAG
+ATTATTACCCTTTAAACAAGTGAAATGTTCAAGAAGAAGATTCTTTAAAC
+TCTTGTAACTGCTTGAAGTTCATCCATGGCTTCACCCAAACTTTGGCTTC
+GAAGTTCTTAGCCTGGCCACCTTCGTTTGCTTCAAGAGTTAAGTAGTACA
+TGGTTCCAGCAACCACCTGTTCTCTTGCCTTTACAATCTTCTTGAACTCA
+AGAACCTTGTTCTCTCGGTTGTTATGTTCTTGAATAGCGAATCGAGCGAG
+ACTCTCGATCTCTCCACTGTTTTGGTTTCCTCGGAGATCATGAACGCCTC
+CTAAAATCATCGTCTTCTCCGTGGTTTTCTCTATTGATCTGCAACTTCTT
+GGTTGAATTGTTCGACAGAGAAGGAGGGTTACGATCAAGAACGTAACAGA
+TACGAAAGCTTTTGATTCCATTTTCCGGACGCGTGGxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 172 736 171 736
+DS CHROMAT_FILE: LL2329r PHD_FILE: LL2329r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:17 2000
+
+RD LL2329f 759 0 2
+ttgat*aCATTAACTTCATGAGGTTTAGTATTACAcaccaCTGGTCTGAA
+ACAAAGCTATCTCAGCCTTATTTATTTCATATAATTTAGCAGAAACATTT
+AGATTATTACCCTTTAAACAAGTGAAATGTTCAAGAAGAAGATTCTTTAA
+ACTCTTGTAACTGCTTGAAGTTCATCCATGGCTTCACCCAAACTTTGGCT
+TCGAAGTTCTTAGCCTGGCCACCTTCGTTTGCTTCAAGAGTTAAGTAGTA
+CATGGTTCCAGCAACCACCTGTTCTCTTGCCTTTACAATCTTCTTGAACT
+CAAGAACCTTGTTCTCTCGGTTGTTATGTTCTTGAATAGCGAATCGAGCG
+AGACTCTCGATCTCTCCACTGTTTTGGTTTCCTCGGAGATCATGAACGCC
+TCCTAAAATCATCGTCTTCTCCGTGGTTTTCTCTATTGATCTGCAACTTC
+TTGGTTGAATTGTTCGACAGAGAAGGAGGGTTACGATCAAGAACGTAACA
+GATACGAAAGCTTTTGATTCCATttctcgGACGCGTGGxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xgcctggngtgcctaatgagtgagctaactcacattaattgcgttgcgct
+cactgcccg
+
+QA 14 538 1 538
+DS CHROMAT_FILE: LL2329f PHD_FILE: LL2329f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:39 2000
+
+RT{
+LL2329f chimera phrap 702 759 000919:094547
+}
+
+RT{
+LL2329f matchElsewhereHighQual phrap 709 759 000919:094547
+}
+
+CO Contig40 338 2 1 U
+CCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGAAA
+AAGAGGTTCGATTTCTGATGAATCATACAAAGATACGACAAAGGAAATTG
+CACAAATTTACATAGGAAAGACCACAAACAAACTTCTCTTTCTCTGGCTC
+TCACTCACATAAGCTTCACTTTCCGGGGACGAAGTTGGTGGCGAAGGCCC
+ATGCGTTGTTGTTGACTGGATCAGCCAAATGGTCGGCGAGATTCTCCAAC
+GGTCCCTTTCCAGTGACAATAGCCTGGACAAAGAATCCAAACATAGagcc
+caTAGCCAACCTTCCGTTCTTGATCTCCGGACGCGTGG
+
+BQ
+ 20 27 27 50 50 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51
+ 51 51 51 43 43 58 58 58 77 79 79 79 90 86 90 77 77 82 82 82 85 85 85 86 86 86 86 90 86 80 80 80 74 72 72 72 78 78 80 90 90 90 90 90 90 90 90 90 90 90
+ 88 90 90 90 90 90 90 90 90 90 90 90 86 85 85 90 90 90 60 66 66 66 51 51 51 51 56 56 56 71 71 71 71 71 71 90 90 90 90 90 90 90 90 90 90 85 85 85 85 90
+ 90 90 90 90 90 90 90 85 85 83 83 80 80 90 85 90 90 90 90 90 90 85 85 85 85 80 79 75 71 71 69 68 68 74 87 79 90 90 72 70 67 67 67 67 71 75 82 82 85 82
+ 77 70 70 70 70 70 80 90 83 86 86 85 85 75 86 86 86 86 77 82 76 74 74 74 74 74 77 90 90 90 90 75 75 69 69 69 69 69 69 74 74 84 90 90 88 90 88 75 75 74
+ 74 74 74 80 86 90 90 90 90 82 82 81 83 77 77 79 77 75 73 73 70 70 73 74 79 76 76 77 88 88 88 90 90 90 90 88 88 88 90 90 88 88 55 49 47 40 19 11 9 9
+ 14 16 31 44 71 79 72 77 84 86 89 89 90 84 84 80 80 80 80 80 85 90 85 79 79 79 79 79 83 76 76 76 49 49 49 35 35 35
+
+AF LL2331r C -410
+AF LL2331f U 42
+BS 1 338 LL2331r
+
+RD LL2331r 793 0 2
+ccctaaagggagcccccgatttagagcttgacggggaaagccggcgaacg
+tggcgagaaaggaagggaagaaagcgaaaggagcgggcgctagggcgctg
+gcaagtgtagcggtcacgctgcgcgtaaccaccacacccgccgcgcttaa
+tgcgccgctacagggcgcgtcccattcgccattcaggctgcgcaactgtt
+gggaagggcgatcggtgcgggcctcttcgctattacgccagctggcgaaa
+gggggatgtgctgcaaggcgattaagttgggtaacgccagggttttxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxCCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTGAGAAAAAGAGGTTCGATTTCTGATGAATCATACAAAGATACGAC
+AAAGGAAATTGCACAAATTTACATAGGAAAGACCACAAACAAACTTCTCT
+TTCTCTGGCTCTCACTCACATAAGCTTCACTTTCCGGGGACGAAGTTGGT
+GGCGAAGGCCCATGCGTTGTTGTTGACTGGATCAGCCAAATGGTCGGCGA
+GATTCTCCAACGGTCCCTTTCCAGTGACAATAGCCTGGACAAAGAATCCA
+AACATAGagcccaTAGCCAACCTTCCGTTCTTGATCTCCGGACGCGTGGx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccagaat
+
+QA 142 749 412 749
+DS CHROMAT_FILE: LL2331r PHD_FILE: LL2331r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:26 2000
+
+RT{
+LL2331r chimera phrap 1 296 000919:094547
+}
+
+RT{
+LL2331r matchElsewhereHighQual phrap 1 296 000919:094547
+}
+
+RD LL2331f 788 0 3
+cttttagaaaacagGTTCGATTTCTGATGAATCATACAAAGATACGACAA
+AGGAAATTGCACAAATTTACATAGGAAAGACcaggaaCAAACTTCTCTTT
+CTCTGGCTCTCACTCACATAAGCTTCACTTTCCGGGGACGAAGTTGGTGG
+CGAAGGCCCATGCGTTGTTGTTGACTGGATCAGCCAAATGGTCGGCGAGA
+TTCTCCAACGGTCCCTTTCCAGTGACAATAGCCTGGACAAAGAATCCAAA
+CATAGAGAACATAGCCAACCTTCCGTTCTTGATCTCCGGACGCGTGGxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxgcctggggtgcctaatgagtgagctaactcacattaattg
+cgttgcgctcactgcccgctttccagtcgggaaacctgtcgtgccagctg
+cattaatgaatcggccaacgcgcggggagaggcggtttgcgtattgggcg
+ccagggtggtttttcttttcaccagtgagacgggcaacagctgattgccc
+ttcaccgcctggccctgagagagttgcagcaagcggtccacgctggtttg
+ccccgcaggcgaaaatcctgtttgatggtggttaacggcgggatataaca
+tgagctgtcttcggtatcgtcgtatcccactaccgaga
+
+QA 14 785 8 297
+DS CHROMAT_FILE: LL2331f PHD_FILE: LL2331f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:49 2000
+
+RT{
+LL2331f chimera phrap 461 788 000919:094547
+}
+
+RT{
+LL2331f matchElsewhereHighQual phrap 705 788 000919:094547
+}
+
+RT{
+LL2331f matchElsewhereHighQual phrap 461 703 000919:094547
+}
+
+CO Contig41 830 2 64 U
+ccacgcgtccgcggacgcgtgggttttttttttttTTTTTTTTTTTTATT
+TTTATTTTTTAGAAATTGAATTATTAAATTTAGTTTTGCTATAATCTTTA
+TATTTTCTGGATAATTAAATTCATCAATAAACTCTCATTTTTTAGTTTTG
+CTATAATCTTGATTATGAAAATAGCTCATACACACGACTCATGATCACAC
+AACTCACTCAGCTCTGGATCAGTCTGCCTGAACCGTGATTGGCTTTCCTC
+TTCCCATCGAGAATCCTCTGGTCCCATCAGGCATCCTTGGTCCCGGTGGT
+TGCTGTTTTGGCTACTCCATGCTGCTCATTGGATGGTTTGAAGGTGGTGT
+TCCAACTTGATGTTGATGGTGATGATTATGATGATTCCCACGCTACGCCC
+TTTTCTCTGCCCACCCGCTTTCTCATTCCCTTCATCCCTTGCTCCTCTCC
+TACTATCACTGCCTGCTCAGGCACATTTGTATCCCATTCTCCAGAGAACT
+CCTCAGCTTGTTTCTCAATTGGCTGTTGCTCAGATGTCGTGGCATCATCT
+TCTTCATGTTCTACTTCTACATCATGTCCTTTTCTTCCTCGCCCTTGAAC
+CTGTTTTGGTTCCTTTGTCTTGGTGCCTTGATCGAAACCTTGCAGAGGCA
+AGTGTAATTATCTCTTTTCTGAAGATTTTCAGGGAAACCACCAAAGATCC
+TCTCAATACAAGTGTGAATGTCTCTTTTCATAGACTTATTCCCAAATTGG
+AAGAaACCTAAATTcCCAAATCagaaaatCAAAATCCCTAAATTGAGATA
+ATACCCACGaAaTCaaACATGATGAagaat
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 30 30 30 30 33 44 48 56 40 40 37 37 40 40
+ 40 40 40 40 40 40 56 40 39 39 29 29 29 46 46 40 40 40 40 35 35 35 35 35 35 35 35 45 49 45 45 45 45 45 53 45 45 45 45 45 45 45 45 50 50 52 50 50 49 55
+ 50 50 52 52 66 56 61 61 53 53 53 50 55 50 50 50 50 50 50 61 61 61 61 61 66 50 40 40 40 35 35 37 47 47 50 56 55 61 66 66 61 56 56 52 52 52 54 45 43 44
+ 44 44 44 45 53 50 50 66 50 50 50 50 50 53 53 53 53 66 66 61 55 55 53 53 47 51 51 50 56 56 56 61 49 44 44 50 50 53 51 51 51 66 66 66 66 60 52 56 56 61
+ 49 48 53 53 53 48 49 49 49 48 50 50 48 48 48 66 66 66 61 56 53 52 52 51 53 51 51 51 53 53 50 53 49 49 45 50 50 61 66 53 48 48 48 48 53 53 51 51 51 51
+ 51 55 55 61 61 55 53 51 51 51 51 52 56 56 56 54 53 53 53 53 53 66 66 66 66 66 66 66 66 61 61 61 61 61 61 61 61 66 66 66 66 55 55 55 56 51 51 66 66 53
+ 46 46 46 46 46 53 66 66 61 53 46 46 46 46 46 46 53 55 66 61 61 53 53 50 53 50 53 53 66 66 66 66 66 66 66 66 66 61 61 61 61 61 61 66 66 66 66 60 52 52
+ 52 66 66 53 53 53 53 60 66 61 66 66 66 66 66 66 61 61 61 61 61 61 61 66 61 61 61 61 61 61 66 66 66 61 61 61 61 55 50 50 48 48 48 48 48 48 53 53 54 56
+ 66 66 66 61 61 61 53 53 53 53 53 53 53 51 53 53 46 46 46 53 55 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 61 55 55 55 55 55 55 61 66 66 66 53
+ 53 53 53 53 53 55 53 53 53 53 53 53 53 55 53 53 53 55 53 61 61 61 61 61 61 61 66 66 66 66 66 66 66 66 66 66 66 66 66 61 56 56 52 52 52 53 50 53 53 53
+ 53 53 61 66 55 55 55 55 55 55 61 61 61 61 61 61 61 61 61 61 55 55 55 55 55 53 48 53 53 53 53 47 52 66 56 55 55 55 50 50 50 56 52 61 53 61 61 61 66 66
+ 66 66 61 61 61 61 61 61 66 50 52 55 50 50 50 55 50 52 47 52 52 56 61 61 61 61 61 61 61 61 66 66 66 61 61 61 61 61 49 45 45 45 45 49 66 66 50 50 50 50
+ 50 50 54 61 61 61 61 61 47 47 47 50 50 50 66 66 66 66 66 66 66 66 50 45 45 45 45 50 66 66 61 50 50 50 50 50 50 61 61 61 61 56 53 52 52 45 47 45 47 45
+ 56 57 53 52 52 52 50 47 47 47 47 47 50 50 50 50 50 50 50 50 44 39 39 44 50 50 50 54 54 56 56 56 56 56 56 50 50 50 47 47 52 56 56 56 56 56 50 50 50 50
+ 50 50 50 44 39 44 58 58 50 50 50 50 50 37 39 39 45 42 43 47 46 46 50 50 50 44 35 35 39 39 42 42 42 42 39 41 41 45 45 44 47 36 40 44 40 40 40 34 29 29
+ 34 40 37 25 18 23 27 27 23 22 22 27 27 23 19 20 25 34 34 34 32 32 15 9 9 9 9 9 16 25 25 32 32 31 29 25 29 29 29 34 29 34 29 29 26 28 26 25 29 24
+ 24 29 32 29 32 20 20 20 22 19 25 19 25 21 17 17 25 29 32 32 40 36 28 27 25 17 0 0 0 0
+
+AF LL2336r U -44
+AF LL2336f U 67
+BS 1 92 LL2336r
+BS 93 95 LL2336f
+BS 96 96 LL2336r
+BS 97 98 LL2336f
+BS 99 99 LL2336r
+BS 100 105 LL2336f
+BS 106 106 LL2336r
+BS 107 108 LL2336f
+BS 109 135 LL2336r
+BS 136 136 LL2336f
+BS 137 147 LL2336r
+BS 148 153 LL2336f
+BS 154 155 LL2336r
+BS 156 161 LL2336f
+BS 162 178 LL2336r
+BS 179 185 LL2336f
+BS 186 197 LL2336r
+BS 198 201 LL2336f
+BS 202 206 LL2336r
+BS 207 209 LL2336f
+BS 210 230 LL2336r
+BS 231 231 LL2336f
+BS 232 232 LL2336r
+BS 233 239 LL2336f
+BS 240 266 LL2336r
+BS 267 271 LL2336f
+BS 272 279 LL2336r
+BS 280 294 LL2336f
+BS 295 295 LL2336r
+BS 296 308 LL2336f
+BS 309 309 LL2336r
+BS 310 317 LL2336f
+BS 318 327 LL2336r
+BS 328 328 LL2336f
+BS 329 329 LL2336r
+BS 330 351 LL2336f
+BS 352 353 LL2336r
+BS 354 357 LL2336f
+BS 358 359 LL2336r
+BS 360 360 LL2336f
+BS 361 366 LL2336r
+BS 367 373 LL2336f
+BS 374 374 LL2336r
+BS 375 387 LL2336f
+BS 388 400 LL2336r
+BS 401 401 LL2336f
+BS 402 403 LL2336r
+BS 404 413 LL2336f
+BS 414 416 LL2336r
+BS 417 419 LL2336f
+BS 420 436 LL2336r
+BS 437 463 LL2336f
+BS 464 464 LL2336r
+BS 465 467 LL2336f
+BS 468 468 LL2336r
+BS 469 543 LL2336f
+BS 544 544 LL2336r
+BS 545 560 LL2336f
+BS 561 562 LL2336r
+BS 563 567 LL2336f
+BS 568 568 LL2336r
+BS 569 569 LL2336f
+BS 570 571 LL2336r
+BS 572 830 LL2336f
+
+RD LL2336r 790 0 0
+actxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgcggacgcgtgggttttttttttttTTTTTTTTTTTTATTTTTAT
+TTTTTAGAAATTGAATTATTAAATTTAGTTTTGCTATAATCTTTATATTT
+TCTGGATAATTAAATTCATCAATAAACTCTCATTTTTTAGTTTTGCTATA
+ATCTTGATTATGAAAATAGCTCATACACACGACTCATGATCACACAACTC
+ACTCAGCTCTGGATCAGTCTGCCTGAACCGTGATTGGCTTTCCTCTTCCC
+ATCGAGAATCCTCTGGTCCCATCAGGCATCCTTGGTCCCGGTGGTTGCTG
+TTTTGGCTACTCCATGCTGCTCATTGGATGGTTTGAAGGTGGTGTTCCAA
+CTTGATGTTGATGGTGATGATTATGATGATTCCCACGCTACGCCCTTTTC
+TCTGCCCACCCGCTTTCTCATTCCCTTCATCCCTTGCTCCTCTCCTACTA
+TCACTGCCTGCTCAGGCACATTTGTATCCCATTCTCCAGAGAACTCCTCA
+GCTTGTTTCTCAATTGGCTGTTGCTCAGATGTCGTGGCATCATCTTCTTC
+ATGTTCTACTTCTACATCATGTCCTTTTCTTCCTCGCCCTTGAACCTGTT
+TTGGTTCCTTTGTCTTGGTGCCTTGATCGAAACCTTGCAGAGGCAAGTGT
+AATTATCTCTTTTCTGAAGATTTTCAGGGAAACCACCAAAGATCCTCTCA
+ATACAAGTGTGAATGTCTCTTTTCATAGACTTATTCCcan
+
+QA 81 786 46 789
+DS CHROMAT_FILE: LL2336r PHD_FILE: LL2336r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:13 2000
+
+RD LL2336f 765 0 0
+cttgatactaAATTTAGTTTTGCTATAATCTTTATATTTTCTGGATAATT
+AAATTCATCAatgaaCTCTCATTTTTTAGTTTTGCTATAATCTTGATTAT
+GAAAATAGCTCATACACACGACTCATGATCACACAACTCACTCAGCTCTG
+GATCAGTCTGCCTGAACCGTGATTGGCTTTCCTCTTCCCATCGAGAATCC
+TCTGGTCCCATCAGGCATCCTTGGTCCCGGTGGTTGCTGTTTTGGCTACT
+CCATGCTGCTCATTGGATGGTTTGAAGGTGGTGTTCCAACTTGATGTTGA
+TGGTGATGATTATGATGATTCCCACGCTACGCCCTTTTCTCTGCCCACCC
+GCTTTCTCATTCCCTTCATCCCTTGCTCCTCTCCTACTATCACTGCCTGC
+TCAGGCACATTTGTATCCCATTCTCCAGAGAACTCCTCAGCTTGTTTCTC
+AATTGGCTGTTGCTCAGATGTCGTGGCATCATCTTCTTCATGTTCTACTT
+CTACATCATGTCCTTTTCTTCCTCGCCCTTGAACCTGTTTTGGTTCCTTT
+GTCTTGGTGCCTTGATCGAAACCTTGCAGAGGCAAGTGTAATTATCTCTT
+TTCTGAAGATTTTCAGGGAAACCACCAAAGATCCTCTCAATACAAGTGTG
+AATGTCTCTTTTCATAGACTTATTCCCAAATTGGAAGAaACCTAAATTcC
+CAAATCagaaaatCAAAATCCCTAAATTGAGATAATACCCACGaAaTCaa
+ACATGATGAagaatn
+
+QA 12 760 9 764
+DS CHROMAT_FILE: LL2336f PHD_FILE: LL2336f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:34 2000
+
+CO Contig42 1294 2 18 U
+ctttaagggaactgcttttagttaagtttattcgagtttgtaataccaaa
+gataaagcaaataccatgttcaagaggtaTATGAACAAACTCAAACACAA
+ACATCTAACATAGATTCGGACAAACAAACAAAAACGAAGTGTGTGTGTAA
+TAAGCTCTCTCAGACATAGCTTACTCAACTTATAGACGAAACTAAAAGAT
+ATGACATTTGACTCCACGCATCATTCTCCTTAATGCAGATAGGTTTCTAG
+GAATGTCTACTACACATAGCTTACATATAATATCTTTTAGCCTAGATCAG
+GGACAATGAAATGCTCTTCTTCATCATCCTCATCATCAACTCTGATCTCT
+AACCTCGGCTTCTTGTATGACACGCTGGACTTCATTGGTCTGTACGAATG
+AACATGACCTAAATGCGCAACAGAAAGCTCCGGAACTTGAGCTGCAGGAA
+GAGTCTCCTGATTAATCTGATCACTGAAGAAACCCATGTCTGAAAACCAC
+TCCAACTCCCCAAGATCAAGCTGTCCTTTCTGCTATACATTCAAACAAAA
+AAAAAAACAAGTCAATACAAAAAAAAAACAAAGCATTAAGGATCAAACCA
+ACACTTCAATCTCTACCTTATCGGTGAACTCAGGATCAGAGAAGTGGAAG
+AAATCATCAACAGCCCAAGGAAGAGGAGAAGCAGAAGAAGGCTGTTGCTG
+GCTTGGGATCTCCTTAGCTTTCTGTTGGTTGTTGGAAGGCTCGGACTGAT
+GATTCTTGTCTGTTTCTTTACTGCAACTACTTGAGCTCAGAGCCACTTTG
+ATCCCAGTGGCTAAGAACCTCTGGTGATTTGCAGATCTAGAGTTAGCCAC
+GTGGATAGATTCATCACAGTCCCTGCAAAGCAGAGCTCTATCCTCCACAC
+AGAAAATGAAAGCTGCCTTCTCTTGACAGATGTCGCAACGAGGGAACTTG
+GTGGAGAGAGAGTTGAGATGGAGGCGTTGGTGCTTGCTGGCGAGTTTGTT
+AGCTGCGTGAATCTCCACGTCGCATTTAGGACACAGAGCTGCTTCGTCGG
+CGCAACATATCACCGTCGCAGGAGCATTCTCACACACATCACACTGTATC
+TTCATCACAGTTCTTGATTCTTGGTTTAATAGATCAAGAACACTTGAATC
+TACCTTCTGAGAGTAGCCTAGCAAAAATATAGATGAAGAGAAAGTATTTT
+GAGACAGAAATCGAGAGATGTAAACAACAAAAGAGGAACAAGTAACAACT
+GTGTGGTGtggaggggaagagaggagagaagatcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 35 35 35 27 27 29 32 35 35 40 40 37 37 43 51 46 46 46 40 40
+ 42 42 42 37 37 39 37 37 37 40 40 37 40 40 40 40 40 35 35 35 40 40 40 51 51 51 51 51 51 51 51 51 40 39 39 39 39 38 37 56 50 50 44 44 46 40 40 40 38 38
+ 35 35 35 35 43 43 43 43 45 45 45 43 46 46 41 40 40 38 38 38 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 36 36 36 43 43 36 36 38 56 42 42 41
+ 41 41 43 45 45 41 41 42 38 36 36 36 35 35 36 36 43 43 36 38 42 42 42 42 46 43 43 43 41 43 43 42 42 42 46 46 42 42 42 46 46 46 42 41 38 38 38 38 40 43
+ 51 56 50 50 41 38 38 39 43 43 43 43 36 40 43 46 46 43 42 41 35 35 35 38 38 38 56 56 56 56 56 42 42 42 42 41 41 50 42 42 42 46 40 35 35 35 36 40 50 41
+ 36 36 36 36 35 35 47 50 50 56 51 43 43 43 43 43 40 42 42 50 50 50 44 44 44 50 46 56 50 50 44 44 47 50 50 50 43 43 43 43 43 38 38 43 42 56 45 45 45 38
+ 38 38 36 36 35 41 45 45 45 41 41 41 44 50 50 37 37 42 42 43 43 43 43 43 38 38 38 38 43 38 38 38 41 42 43 43 56 50 50 41 42 35 32 32 38 38 38 43 56 56
+ 56 56 56 56 56 45 41 41 41 41 42 50 50 41 38 38 38 38 35 37 42 42 42 42 41 41 41 41 41 41 41 42 41 35 35 38 38 38 41 38 40 38 40 35 38 50 50 50 50 42
+ 42 42 42 41 41 35 33 40 35 35 35 41 42 46 51 45 51 46 46 35 35 31 35 35 35 30 35 35 37 35 35 35 42 46 42 56 56 56 56 50 56 56 56 56 56 50 50 35 35 35
+ 40 40 40 40 40 37 40 40 40 56 56 56 56 56 51 51 51 51 45 45 56 46 42 42 56 56 56 56 56 56 56 42 42 42 42 42 46 56 56 56 50 56 46 46 46 42 42 42 47 56
+ 56 56 56 47 47 47 48 44 37 52 57 57 63 64 64 57 61 55 69 71 82 82 74 88 78 78 71 62 59 61 50 51 58 57 62 62 65 60 60 62 73 73 84 69 73 62 74 65 64 53
+ 53 59 70 62 54 43 42 42 56 54 63 65 80 69 69 67 67 69 71 71 82 73 67 47 42 40 46 50 60 75 69 63 66 69 74 80 72 72 74 74 71 53 48 54 50 40 43 58 53 52
+ 59 60 71 73 73 65 54 49 34 34 40 54 48 58 61 61 67 50 41 44 54 50 50 54 46 49 59 64 56 55 55 40 48 48 48 46 46 55 55 55 55 55 55 71 71 71 66 55 55 60
+ 60 68 63 68 70 70 76 90 83 69 61 57 50 50 47 47 51 51 61 63 65 61 61 58 55 55 58 59 58 62 59 51 48 41 41 40 47 47 55 59 64 63 61 61 61 64 71 67 69 70
+ 69 58 53 55 72 52 54 59 61 62 61 61 67 61 61 61 61 71 68 71 54 52 52 52 52 53 66 68 69 66 61 60 68 60 60 60 45 56 40 40 45 45 45 45 56 56 51 51 51 51
+ 51 51 56 51 45 40 40 40 40 45 40 40 51 45 45 51 40 40 45 40 40 40 40 40 40 40 45 45 40 37 37 37 40 40 51 51 56 56 45 45 45 40 40 40 40 40 40 45 40 40
+ 40 40 40 51 51 51 51 51 51 56 56 56 56 56 56 51 40 40 40 40 40 45 51 56 56 56 45 40 40 40 40 40 40 45 45 45 45 46 46 43 43 56 56 56 56 56 56 51 51 51
+ 51 51 56 56 56 56 56 56 43 43 43 43 43 43 56 56 51 51 51 51 51 45 45 45 51 45 45 56 56 51 51 45 43 43 43 43 43 45 51 51 51 51 51 51 51 51 51 51 45 45
+ 45 45 45 51 56 56 56 56 51 45 45 45 45 45 56 56 56 56 56 51 51 45 43 43 43 43 43 43 45 45 45 45 45 51 51 51 51 43 43 43 43 43 43 43 43 43 45 45 43 43
+ 43 43 43 43 36 36 36 36 36 43 45 45 51 51 51 45 43 43 43 43 43 43 43 43 51 51 51 51 51 51 51 56 51 45 43 43 43 43 43 43 45 45 43 43 43 43 43 36 35 35
+ 35 35 35 36 43 51 51 51 51 56 56 51 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 45 45 45 45 51 51 56
+ 56 56 56 56 56 56 51 43 43 43 43 43 51 51 51 51 51 51 56 56 56 56 56 56 43 43 43 43 43 43 45 45 45 51 45 43 43 43 43 43 45 51 51 45 45 51 51 51 51 56
+ 56 56 56 51 51 51 45 45 45 43 43 35 35 35 35 35 35 43 35 35 35 35 35 38 56 45 45 40 40 40 40 40 40 45 51 51 51 51 51 51 40 40 39 39 39 39 46 46 46 40
+ 40 40 40 51 40 40 40 40 40 40 51 51 51 51 51 51 51 40 39 39 39 39 39 39 40 40 40 37 40 46 56 56 56 51 51 51 51 51 45 40 40 40 40 40 40 37 51 51 40 37
+ 46 40 36 35 35 35 35 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2337f U 0
+AF LL2337r C 553
+BS 1 582 LL2337f
+BS 583 583 LL2337r
+BS 584 606 LL2337f
+BS 607 608 LL2337r
+BS 609 612 LL2337f
+BS 613 613 LL2337r
+BS 614 621 LL2337f
+BS 622 626 LL2337r
+BS 627 627 LL2337f
+BS 628 628 LL2337r
+BS 629 629 LL2337f
+BS 630 632 LL2337r
+BS 633 635 LL2337f
+BS 636 644 LL2337r
+BS 645 648 LL2337f
+BS 649 658 LL2337r
+BS 659 661 LL2337f
+BS 662 1294 LL2337r
+
+RD LL2337f 789 0 0
+nctttaagggaactgcttttagttaagtttattcgagtttgtaataccaa
+agataaagcaaataccatgttcaagaggtaTATGAACAAACTCAAACACA
+AACATCTAACATAGATTCGGACAAACAAACAAAAACGAAGTGTGTGTGTA
+ATAAGCTCTCTCAGACATAGCTTACTCAACTTATAGACGAAACTAAAAGA
+TATGACATTTGACTCCACGCATCATTCTCCTTAATGCAGATAGGTTTCTA
+GGAATGTCTACTACACATAGCTTACATATAATATCTTTTAGCCTAGATCA
+GGGACAATGAAATGCTCTTCTTCATCATCCTCATCATCAACTCTGATCTC
+TAACCTCGGCTTCTTGTATGACACGCTGGACTTCATTGGTCTGTACGAAT
+GAACATGACCTAAATGCGCAACAGAAAGCTCCGGAACTTGAGCTGCAGGA
+AGAGTCTCCTGATTAATCTGATCACTGAAGAAACCCATGTCTGAAAACCA
+CTCCAACTCCCCAAGATCAAGCTGTCCTTTCTGCTATACATTCAAACAAA
+AAAAAAAACAAGTCAATACAAAAAAAAAACAAAGCATTAAGGATCAAACC
+AACACTTCAATCTCTACCTTATCGGTGAACTCAGGATCAGAGAAGTGGAA
+GAAATCATCAACAGCCCAAGGAAGAGGAGAAGcacaagAAGGCTGTTGCT
+GGCTTGGGATCTCCTTAGCTTTCTGTTGGTTGTTGGAAGGCTCGGACTGA
+TGATTCTTGTCTGTTTCTTTACTGCAACTACTTGAGCtc
+
+QA 81 787 2 789
+DS CHROMAT_FILE: LL2337f PHD_FILE: LL2337f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:39 2000
+
+RD LL2337r 786 0 0
+aaaancaAGTCAATACAAAAAAAAAACAAAGCATTAAGGATCAAACCAAC
+ACTTCAATCTCTACCTTATCGGTGAACTCAGGATCAGAGAAGTGGAAGAA
+ATCATCAACAGCCCAAGGAAGAGGAGAAGCAGAAGAAGGCTGTTGCTGGC
+TTGGGATCTCCTTAGCTTTCTGTTGGTTGTTGGAAGGCTCGGACTGATGA
+TTCTTGTCTGTTTCTTTACTGCAACTACTTGAGCTCAGAGCCACTTTGAT
+CCCAGTGGCTAAGAACCTCTGGTGATTTGCAGATCTAGAGTTAGCCACGT
+GGATAGATTCATCACAGTCCCTGCAAAGCAGAGCTCTATCCTCCACACAG
+AAAATGAAAGCTGCCTTCTCTTGACAGATGTCGCAACGAGGGAACTTGGT
+GGAGAGAGAGTTGAGATGGAGGCGTTGGTGCTTGCTGGCGAGTTTGTTAG
+CTGCGTGAATCTCCACGTCGCATTTAGGACACAGAGCTGCTTCGTCGGCG
+CAACATATCACCGTCGCAGGAGCATTCTCACACACATCACACTGTATCTT
+CATCACAGTTCTTGATTCTTGGTTTAATAGATCAAGAACACTTGAATCTA
+CCTTCTGAGAGTAGCCTAGCAAAAATATAGATGAAGAGAAAGTATTTTGA
+GACAGAAATCGAGAGATGTAAACAACAAAAGAGGAACAAGTAACAACTGT
+GTGGTGtggaggggaagagaggagagaagatcggacgcgtggxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxat
+
+QA 7 706 1 742
+DS CHROMAT_FILE: LL2337r PHD_FILE: LL2337r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:17 2000
+
+CO Contig43 512 2 76 U
+tgAGCGAATTGTGTCTATGTCATTCACTTTCAAAAAATTATAGAACAACT
+CTTTCAAGAAGAAGAGAAAAAAATCATCTATTAACAAGGTACAAGTCTAT
+CTCTAATTAGATGAACAATATGATGGATAATCATCAATGAAGCCGAGAAG
+TAACATTGATGAAAGTATCTTCAGGACAAGGGATAGTTAAGCCACCCATT
+GGATGATCAAACCCAAACTCTTCTTCAGATCTGCTGAGGAGAGCTTGAAA
+TAAGGGCTGGCTCAAGTATGAGATTGGCACCACATATCTCTTCTTCTGGC
+TCTCTCCAACGTACACCGCAAGAAACCCTTTTGGTGGTGCCGCCATGGTT
+GCTCTTTTGCTTGTAGAAGCTGTTGCTGTTACGGAACGACCAAGAATCTT
+TTTTGCACCCAATAAACTTCTAACCAAAGCCATTTGTCTGAAATTTTAAA
+CTCTTTGGAAGAGATGTATGAAAAGATTCAAGTTATTGGATTGCTGCTTG
+CCGGACGCGTGG
+
+BQ
+ 0 17 20 27 29 34 34 39 40 40 46 55 55 55 55 55 86 83 85 85 85 85 85 88 88 85 85 85 88 90 90 90 90 90 86 85 85 85 79 85 90 80 85 78 82 82 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 86 82 82 79 90 90 85 82 77 80 80 80 90 90 79 79 90 90 90
+ 90 84 84 84 84 80 80 86 86 86 86 86 86 82 82 82 81 81 86 89 90 90 85 85 85 84 84 83 89 89 89 90 90 90 90 90 90 90 90 85 85 80 88 88 90 90 88 88 78 78
+ 78 75 86 86 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 90 90 90 83 83 86 86 83 83 83 86 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 88 88 88 88 88 90 90 86 84 86 88 88 86 90 90 90 90 90
+ 90 90 90 88 88 88 88 88 88 90 90 90 86 86 88 88 88 88 88 90 90 90 90 90 88 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 90
+ 90 90 90 90 90 90 90 89 86 86 86 86 86 90 86 86 86 81 81 81 79 86 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 75 72 72 70 72 72 86 82 90 90 76 75
+ 75 75 75 75 90 90 90 90 90 90 85 82 86 90 90 85 89 80 80 76 80 85 83 83 83 83 83 83 88 82 82 78 78 78 78 78 78 78 78 78 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 86 86 86 90 90 90 90 90 90 84 84 90 90 74 74 90 85 79 80 90 90 84 90 90 90 90 90 90 88 90 90 90 79 84 85 85 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 66 51 51 51 56 56 51 51 60 60 89 89 90 86 86 86 90 86 90 90 90 90 85 85 81 81 81 86 81 81 85 81
+ 83 83 86 90 90 83 52 55 55 40 51 56
+
+AF LL2340r C -231
+AF LL2340f U -2
+BS 1 15 LL2340r
+BS 16 16 LL2340f
+BS 17 41 LL2340r
+BS 42 42 LL2340f
+BS 43 43 LL2340r
+BS 44 44 LL2340f
+BS 45 46 LL2340r
+BS 47 47 LL2340f
+BS 48 71 LL2340r
+BS 72 74 LL2340f
+BS 75 76 LL2340r
+BS 77 80 LL2340f
+BS 81 86 LL2340r
+BS 87 87 LL2340f
+BS 88 90 LL2340r
+BS 91 95 LL2340f
+BS 96 138 LL2340r
+BS 139 139 LL2340f
+BS 140 154 LL2340r
+BS 155 163 LL2340f
+BS 164 181 LL2340r
+BS 182 185 LL2340f
+BS 186 187 LL2340r
+BS 188 189 LL2340f
+BS 190 192 LL2340r
+BS 193 193 LL2340f
+BS 194 197 LL2340r
+BS 198 200 LL2340f
+BS 201 207 LL2340r
+BS 208 208 LL2340f
+BS 209 221 LL2340r
+BS 222 230 LL2340f
+BS 231 237 LL2340r
+BS 238 240 LL2340f
+BS 241 241 LL2340r
+BS 242 242 LL2340f
+BS 243 244 LL2340r
+BS 245 245 LL2340f
+BS 246 247 LL2340r
+BS 248 248 LL2340f
+BS 249 262 LL2340r
+BS 263 269 LL2340f
+BS 270 274 LL2340r
+BS 275 280 LL2340f
+BS 281 283 LL2340r
+BS 284 292 LL2340f
+BS 293 293 LL2340r
+BS 294 297 LL2340f
+BS 298 308 LL2340r
+BS 309 313 LL2340f
+BS 314 314 LL2340r
+BS 315 317 LL2340f
+BS 318 331 LL2340r
+BS 332 336 LL2340f
+BS 337 345 LL2340r
+BS 346 348 LL2340f
+BS 349 356 LL2340r
+BS 357 378 LL2340f
+BS 379 379 LL2340r
+BS 380 390 LL2340f
+BS 391 394 LL2340r
+BS 395 408 LL2340f
+BS 409 410 LL2340r
+BS 411 416 LL2340f
+BS 417 417 LL2340r
+BS 418 424 LL2340f
+BS 425 425 LL2340r
+BS 426 441 LL2340f
+BS 442 442 LL2340r
+BS 443 450 LL2340f
+BS 451 451 LL2340r
+BS 452 480 LL2340f
+BS 481 487 LL2340r
+BS 488 488 LL2340f
+BS 489 506 LL2340r
+BS 507 512 LL2340f
+
+RD LL2340r 789 0 2
+cgatcggtgcgggcctcttcgctattacgccagctggcgaaagggggatg
+tgctgcaaggcgattaagttgggtaacgccagggttttxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxctttttttttttttttttttttttttttntgAGCGAATTGTGTCTAT
+GTCATTCACTTTCAAAAAATTATAGAACAACTCTTTCAAGAAGAAGAGAA
+AAAAATCATCTATTAACAAGGTACAAGTCTATCTCTAATTAGATGAACAA
+TATGATGGATAATCATCAATGAAGCCGAGAAGTAACATTGATGAAAGTAT
+CTTCAGGACAAGGGATAGTTAAGCCACCCATTGGATGATCAAACCCAAAC
+TCTTCTTCAGATCTGCTGAGGAGAGCTTGAAATAAGGGCTGGCTCAAGTA
+TGAGATTGGCACCACATATCTCTTCTTCTGGCTCTCTCCAACGTACACCG
+CAAGAAACCCTTTTGGTGGTGCCGCCATGGTTGCTCTTTTGCTTGTAGAA
+GCTGTTGCTGTTACGGAACGACCAAGAATCTTTTTTGCACCCAATAAACT
+TCTAACCAAAGCCATTTGTCTGAAATTTTAAACTCTTTGGAAGAGATGTA
+TGAaacgATTCAAGTTATTGGATTGCTGCTTGCCGGACGCGTGGxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaag
+
+QA 28 744 233 744
+DS CHROMAT_FILE: LL2340r PHD_FILE: LL2340r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:31 2000
+
+RT{
+LL2340r chimera phrap 1 88 000919:094547
+}
+
+RT{
+LL2340r matchElsewhereHighQual phrap 1 88 000919:094547
+}
+
+RD LL2340f 787 0 3
+cttttagcgaactGTGTCTATGTCATTCACTTTCAAAAAATTATAGAACA
+ACTCTTTCAAGAAGAAGAGAAAAAAATCATCTATTAACAAGGTACAAGTC
+TATCTCTAATTAGATGAACAATATGATGGATAATCATCAATGAAGCCGAG
+AAGTAACATTGATGAAAGTATCTTCAGGACAAGGGATAGTTAAGCCACCC
+ATTGGATGATCAAACCCAAACTCTTCTTCAGATCTGCTGAGGAGAGCTTG
+AAATAAGGGCTGGCTCAAGTATGAGATTGGCACCACATATCTCTTCTTCT
+GGCTCTCTCCAACGTACACCGCAAGAAACCCTTTTGGTGGTGCCGCCATG
+GTTGCTCTTTTGCTTGTAGAAGCTGTTGCTGTTACGGAACGACCAAGAAT
+CTTTTTTGCACCCAATAAACTTCTAACCAAAGCCATTTGTCTGAAATTTT
+AAACTCTTTGGAAGAGATGTATGAAAAGATTCAAGTTATTGGATTGCTGC
+TTGCCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxgcctggggtgcctaatgagtga
+gctaactcacattaattgcgttgcgctcactggcccgctttcagtcggga
+aacctgtcgtgccagctgcattaatgaatcggccacg
+
+QA 14 781 6 515
+DS CHROMAT_FILE: LL2340f PHD_FILE: LL2340f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:54 2000
+
+RT{
+LL2340f chimera phrap 679 785 000919:094547
+}
+
+RT{
+LL2340f matchElsewhereHighQual phrap 743 785 000919:094547
+}
+
+RT{
+LL2340f matchElsewhereHighQual phrap 679 732 000919:094547
+}
+
+CO Contig44 1230 2 30 U
+ctttatgggaaacaaaaaaatatttatatatcatagacctagtgactatt
+cccctgtcttctcgtctccgaatctctaataactaacatatcttcgtctc
+tctttgtactttgtaatatgagactctgacttgaacatccaatctaacac
+acaacatttaaaagccaatattgatttgccagtcttgtcttctctttctc
+atatatttggcatactatcattatagtcttgttcaactgtgtcattagcc
+tggtgagttggttgccgaacccttttctctttctctctcagctgctgctt
+catcctcagttttcttggcaaaccggctctcactatctccttgcccTTTt
+caTTTgcatgcttgtgtctTGACTCATGAagaTATGgctTtctGTTGATA
+AGTTTTCTTTctagctcggccttgtgcgcgtgctTTttctccgccttata
+attccgtggtacttgtTTcgcATTCACaTAAACAGGCTCTTGTGTCATGT
+CAAGCGGTAGTGCTGTTCTTTCACCAGGCATTCCAGGATATGGACGAAAC
+CCTTGCAATTGCTGATGTCCATATGCTCCCACCATTCCCCCATAATATGG
+ATCCTGATATGGATTTGGGACACAAGCAATGTAGTGTCCAACAAGCTCTG
+GTGGTTGTACAAGTTGCTGCTGATCATGCACGTTACCCACAGAAGCAGCA
+GGATCATTCCCTTCCACTCCATGGTTATCTGAAGGGGACGAAGTAGCAGC
+TTGTGTATCCTTCCAAGCGCCATCGTCTCCAGACGCTGAGTGAACATCGT
+TTGATTCTAAAGAGGAGGACTTTGAAAGAACCACAGAAGGTATCTCTTGA
+GGAACGACACCAAAAGAGTTATTTTTTTTCCACCATGGCTCCTCTGCATA
+CATCATTGGCTGCTCACCAGAATGGTGATTGACTTCCACTTCATTCCCAC
+TTCCCTTGGATTGCATCTCTTCAAAACAACACCAGAGGATATTCACGAGG
+GAGTTTTTTGCAATCGTCTTCGTGTTCCTATGATGATGACTTAAAGACCC
+CTGAAGTTATGTAATCGATGGAGCTCAGAGTGAGACAAAACAAAAAGAAT
+CAGAATTTGAACAGAGCCAGAGAAGAATTTGGAGAAGAGAGACGAATACG
+ATTGGGCAGAAGAAACCCTAATTTTTGGGGATTTTTGTttctcAAaattg
+ttttttctttgtatttttgcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 15 15 15 14 18 21 22 20 17
+ 17 15 29 27 21 15 12 11 11 12 13 15 17 14 15 15 17 17 18 23 23 25 25 22 22 24 24 20 25 14 14 14 25 22 22 22 19 19 16 27 17 15 15 21 21 26 30 30 28 30
+ 22 22 22 33 36 24 30 30 42 24 19 19 15 15 18 18 16 16 14 12 11 14 10 8 8 8 11 11 17 11 10 10 10 11 23 23 18 15 15 15 19 17 15 18 17 17 17 11 11 11
+ 11 10 15 12 14 11 13 14 14 13 15 10 8 10 13 18 24 21 18 18 18 24 24 25 25 23 22 19 21 34 37 38 49 45 50 42 48 49 52 39 46 47 51 54 53 61 54 56 50 53
+ 50 45 39 47 48 51 48 48 46 46 46 50 49 54 50 40 40 50 50 53 44 52 51 44 44 36 40 43 43 42 44 47 47 44 53 46 52 55 57 60 58 50 48 54 55 53 47 49 49 53
+ 54 49 48 49 54 54 61 57 55 54 47 59 43 49 44 47 47 49 50 65 64 63 63 59 61 55 55 55 63 44 52 52 55 58 68 68 68 72 77 79 68 63 62 62 59 60 60 55 49 51
+ 45 52 46 51 49 55 47 55 56 52 47 47 50 49 40 40 40 40 39 52 52 56 65 76 55 55 40 46 40 40 37 37 52 50 50 50 55 52 54 49 49 49 61 60 55 57 57 67 63 68
+ 60 67 62 58 52 50 57 57 60 68 75 78 73 62 60 67 66 60 61 65 64 80 78 80 85 83 70 73 54 52 52 59 59 50 55 59 64 73 73 65 75 63 63 55 60 58 58 58 62 62
+ 67 75 80 72 72 75 71 74 71 71 68 66 66 66 66 70 71 71 71 71 75 75 74 66 67 66 66 67 74 73 71 71 71 71 67 66 70 73 70 56 56 55 55 55 55 62 64 65 59 67
+ 54 64 64 51 51 51 45 45 45 51 51 51 56 45 43 43 68 70 68 63 63 68 64 66 70 62 60 51 58 63 66 62 59 60 61 45 45 43 43 43 43 43 51 56 56 45 45 45 45 43
+ 43 51 51 51 51 51 51 51 45 45 45 51 45 45 45 45 45 45 51 51 45 45 51 51 51 51 56 56 56 56 56 56 56 56 56 51 56 45 45 45 45 45 45 51 51 51 51 51 56 56
+ 51 45 43 43 43 40 40 41 56 56 56 56 56 56 51 45 45 45 45 45 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 45 45 45 45 45 45 51 51 45 51 51 51 51 51 56
+ 56 56 56 56 51 43 43 43 43 43 43 51 56 56 56 56 56 51 51 51 51 51 45 45 45 45 45 51 51 51 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 51 51 51 45 43 43 43 43 43 43 45 45 51 56 56 56 56 56 56 56 56 56 56 56 56 45 43 43 43 43 43 43 45 56 56 56 56 51 51 51 51 51 51 45
+ 43 43 43 43 43 46 56 51 51 45 45 45 43 43 43 43 43 43 43 43 43 43 43 43 51 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 51 51 45 45 45 51 51 51 56 56
+ 56 56 45 45 45 43 43 43 51 43 43 43 43 43 43 43 43 43 43 43 43 43 43 43 45 45 43 43 43 43 43 43 43 45 45 45 51 51 51 51 51 51 51 56 51 51 51 43 43 43
+ 43 43 43 45 51 51 43 40 40 40 40 40 39 39 39 39 39 39 40 51 51 51 51 51 51 51 51 40 40 46 46 46 46 51 51 51 51 51 51 51 40 40 40 40 40 39 39 39 39 39
+ 39 39 46 40 39 39 39 39 39 51 51 51 51 51 51 51 35 35 35 35 35 40 51 56 56 56 56 56 56 56 56 51 51 42 40 36 39 24 16 13 11 10 12 24 24 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2343f U 0
+AF LL2343r C 478
+BS 1 477 LL2343f
+BS 478 479 LL2343r
+BS 480 484 LL2343f
+BS 485 485 LL2343r
+BS 486 486 LL2343f
+BS 487 489 LL2343r
+BS 490 491 LL2343f
+BS 492 500 LL2343r
+BS 501 505 LL2343f
+BS 506 512 LL2343r
+BS 513 514 LL2343f
+BS 515 515 LL2343r
+BS 516 516 LL2343f
+BS 517 519 LL2343r
+BS 520 531 LL2343f
+BS 532 535 LL2343r
+BS 536 556 LL2343f
+BS 557 557 LL2343r
+BS 558 558 LL2343f
+BS 559 581 LL2343r
+BS 582 582 LL2343f
+BS 583 591 LL2343r
+BS 592 600 LL2343f
+BS 601 619 LL2343r
+BS 620 622 LL2343f
+BS 623 750 LL2343r
+BS 751 753 LL2343f
+BS 754 766 LL2343r
+BS 767 785 LL2343f
+BS 786 1230 LL2343r
+
+RD LL2343f 809 0 0
+nctttatgggaaacaaaaaaatatttatatatcatagacctagtgactat
+tcccctgtcttctcgtctccgaatctctaataactaacatatcttcgtct
+ctctttgtactttgtaatatgagactctgacttgaacatccaatctaaca
+cacaacatttaaaagccaatattgatttgccagtcttgtcttctctttct
+catatatttggcatactatcattatagtcttgttcaactgtgtcattagc
+ctggtgagttggttgccgaacccttttctctttctctctcagctgctgct
+tcatcctcagttttcttggcaaaccggctctcactatctccttgcccTTT
+tcaTTTgcatgcttgtgtctTGACTCATGAagaTATGgctTtctGTTGAT
+AAGTTTTCTTTctagctcggccttgtgcgcgtgctTTttctccgccttat
+aattccgtggtacttgtTTcgcATTCACatAAACAGGCTCTTGTGTCATG
+TCAAGCGGTAGTGCTGTTCTTTCACCAGGCATTCCAGGATATGGACGAAA
+CCCTTGCAATTGCTGATGTCCATATGCTCCCACCATTCCCCCATAATATG
+GATCCTGATATGGATTTGGGACACAAGCAacgtAGTGTCCAACAAGCTCT
+GGTGGTTGTACAAGTTGCTGCTGATCATGCACGTTACCCACAGAAGCAGC
+AGGATCATTCCCTTCCACTCCATGGTTATCTGAAGGGGACGAAGTAGCAG
+CTTGTGTattctTCCAAGCGCCATCGTCTCCAGACGCTGAgcgAACATCg
+cttgATTct
+
+QA 343 751 2 809
+DS CHROMAT_FILE: LL2343f PHD_FILE: LL2343f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:08 2000
+
+RD LL2343r 798 0 0
+aTAAACAGGCTCTTGTGTCATGTCAAGCGGTAGTGCTGTTCTTTCACCAG
+GCATTCCAGGATATGGACGAAACCCTTGCAATTGCTGATGTCCATATGCT
+CCCACCATTCCCCCATAATATGGATCCTGATATGGATTTGGGACACAAGC
+AATGTAGTGTCCAACAAGCTCTGGTGGTTGTACAAGTTGCTGCTGATCAT
+GCACGTTACCCACAGAAGCAGCAGGATCATTCCCTTCCACTCCATGGTTA
+TCTGAAGGGGACGAAGTAGCAGCTTGTGTATCCTTCCAAGCGCCATCGTC
+TCCAGACGCTGAGTGAACATCGTTTGATTCTAAAGAGGAGGACTTTGAAA
+GAACCACAGAAGGTATCTCTTGAGGAACGACACCAAAAGAGTTATTTTTT
+TTCCACCATGGCTCCTCTGCATACATCATTGGCTGCTCACCAGAATGGTG
+ATTGACTTCCACTTCATTCCCACTTCCCTTGGATTGCATCTCTTCAAAAC
+AACACCAGAGGATATTCACGAGGGAGTTTTTTGCAATCGTCTTCGTGTTC
+CTATGATGATGACTTAAAGACCCCTGAAGTTATGTAATCGATGGAGCTCA
+GAGTGAGACAAAACAAAAAGAATCAGAATTTGAACAGAGCCAGAGAAGAA
+TTTGGAGAAGAGAGACGAATACGATTGGGCAGAAGAAACCCTAATTTTTG
+GGGATTTTTGTttctcAAaattgttttttctttgtatttttgcggacgcg
+tggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxtgtccagaatt
+
+QA 1 718 1 753
+DS CHROMAT_FILE: LL2343r PHD_FILE: LL2343r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:45 2000
+
+CO Contig45 867 2 66 U
+ctgtaaggcgtccatctacttcaacagacaaatgtattacaaaatagctt
+caagaaaatatttataacattttgaagattCCAAAATATTTTTTTTAAAT
+AGCAAAAAAGACAAATGCAGCAAAAGATTTAACAAGAAAAACTTCACAAA
+CTTTGATCCTGGCCAATCTCTTCCCCCCTTTCTCTATGTTCCTTGGGGAT
+GCAGCAGTAGCAGTCTTTGACTATTGGAAAGGTTGTTAGTTGGCGAAAAC
+GTTGAGCCTAACGCGAGCAGTGACATCAGGGTGAAGTTTGAGTTCGGCTA
+TGTATTCTCCCGTTTCACGGATCTCCGGAAGAGAGACAAGGCGCTTGTCT
+ATATCCCTTTGGAGCTGCGCCTTGATGATGTCAACAAGGTCTTGTGCTGT
+GACAGATCCAAATATTTGTTTGCCCTTACCGCCCTTGCGTTTAACCTTGA
+AAGCCCCAACGGTTTCGAATACGGTAGCCAATTGTAGCGCCTCTTCTTTC
+ACCCTTTGCTTTTCTGCCTCTATCCTTTCGTTTTCCATCTTCATTTCCTT
+GAGCAGAAGCGGAGTCATGAGCTGAGCCTTTCCAGTGGGCAAGAGGAAGT
+TTCTGAAGAAACCGGCTTTCACGTCCAGTAATTGCCCTTGCTTGCCCAAG
+TCCAACACATCCTCTTTCAAGATTACCTTGCGGAGTTTCTTGGCTTTCTT
+CTGGGCAACAACCTCCAAAGTAGACCTTCGCTGGGATACTTTTAGTGTTT
+CATTTCCGACGACATTGAAGGTGTGAGACGAACAAAGCGAGGAGCTCCAC
+GAGAGTGACAAAGACGAAGAAGCCATTggcGAAgcttattcagtcccacg
+cgtccgcggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 34 39 39 40 37 37 37 37 40 37 56 56 56 35 35 35 35 35 34 35
+ 35 35 35 40 46 51 51 51 40 40 40 40 35 35 35 35 35 35 35 35 40 40 55 61 61 71 71 54 55 55 55 60 51 65 65 80 83 80 81 67 67 72 62 62 65 72 71 72 65 79
+ 76 74 58 63 65 67 64 62 62 58 55 55 55 55 60 72 67 72 74 84 84 88 90 90 90 80 80 79 77 70 67 72 77 85 85 77 77 72 65 65 65 65 65 76 83 88 85 80 64 66
+ 66 68 66 66 73 79 69 74 80 80 80 86 78 80 79 77 77 77 79 79 75 90 72 68 68 78 78 83 83 85 88 82 68 67 64 61 61 69 74 72 77 72 73 72 77 77 77 75 75 75
+ 75 75 68 81 81 83 77 77 85 83 77 79 80 78 78 78 80 82 85 85 83 83 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 84 84 83 83 83 89 87 87
+ 83 90 90 79 77 77 90 90 85 81 81 90 85 85 89 89 87 87 89 89 87 90 90 90 90 70 68 70 70 77 77 90 90 90 90 90 90 89 83 82 71 71 75 75 76 76 83 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 80 80 78 78 80 83 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 87 90 83 88 90 90 90 90 90 90 85 85 85 85 85 85 90 90 80 80 80 83 78 78 88 82 82 85 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 85 88 88 90 90 90 90 90 90 90 90 85 85 85 87 88 88 90 90 90 90 90 90 90 88 82 82 79 79 79 84 87 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 83 83 83 83 76 78 90 90 90 90 89 90 83 83 81 83 83 83 87 87 86 83 90 90 86 84 89 85 85 90 90 90 90 90 90 90 90 90 89 82 90 90 83
+ 76 76 85 79 77 85 83 75 69 66 73 76 74 76 85 90 84 90 90 75 72 63 63 61 58 65 77 83 83 75 80 70 80 76 72 78 80 89 89 85 85 80 84 72 70 70 82 75 83 83
+ 68 65 73 76 78 83 83 75 85 85 85 85 90 90 90 89 88 83 76 76 85 73 73 74 75 80 75 78 78 76 72 70 70 83 83 72 71 76 80 77 72 63 63 68 63 63 70 74 75 83
+ 90 83 68 68 65 65 61 61 79 72 64 67 72 66 63 68 68 57 67 67 67 59 53 52 59 64 72 69 83 83 83 90 85 80 72 72 70 75 75 78 79 77 75 75 68 61 55 55 55 55
+ 60 72 74 66 62 62 58 60 61 61 62 65 69 83 90 83 68 63 60 55 55 52 52 52 54 40 40 40 40 40 40 45 43 40 35 35 35 35 35 35 35 35 35 39 39 39 46 45 45 45
+ 45 45 45 35 35 35 35 37 37 46 46 42 42 42 46 46 46 46 40 40 40 40 40 40 40 29 21 15 15 15 21 21 46 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2345f U 1
+AF LL2345r C 121
+BS 1 169 LL2345f
+BS 170 173 LL2345r
+BS 174 221 LL2345f
+BS 222 222 LL2345r
+BS 223 272 LL2345f
+BS 273 276 LL2345r
+BS 277 297 LL2345f
+BS 298 300 LL2345r
+BS 301 306 LL2345f
+BS 307 308 LL2345r
+BS 309 314 LL2345f
+BS 315 325 LL2345r
+BS 326 331 LL2345f
+BS 332 338 LL2345r
+BS 339 342 LL2345f
+BS 343 346 LL2345r
+BS 347 347 LL2345f
+BS 348 359 LL2345r
+BS 360 366 LL2345f
+BS 367 367 LL2345r
+BS 368 370 LL2345f
+BS 371 377 LL2345r
+BS 378 391 LL2345f
+BS 392 400 LL2345r
+BS 401 406 LL2345f
+BS 407 416 LL2345r
+BS 417 417 LL2345f
+BS 418 418 LL2345r
+BS 419 419 LL2345f
+BS 420 427 LL2345r
+BS 428 445 LL2345f
+BS 446 448 LL2345r
+BS 449 457 LL2345f
+BS 458 475 LL2345r
+BS 476 478 LL2345f
+BS 479 482 LL2345r
+BS 483 486 LL2345f
+BS 487 493 LL2345r
+BS 494 496 LL2345f
+BS 497 519 LL2345r
+BS 520 521 LL2345f
+BS 522 525 LL2345r
+BS 526 532 LL2345f
+BS 533 549 LL2345r
+BS 550 557 LL2345f
+BS 558 561 LL2345r
+BS 562 566 LL2345f
+BS 567 567 LL2345r
+BS 568 569 LL2345f
+BS 570 575 LL2345r
+BS 576 577 LL2345f
+BS 578 581 LL2345r
+BS 582 584 LL2345f
+BS 585 586 LL2345r
+BS 587 592 LL2345f
+BS 593 594 LL2345r
+BS 595 595 LL2345f
+BS 596 596 LL2345r
+BS 597 599 LL2345f
+BS 600 637 LL2345r
+BS 638 639 LL2345f
+BS 640 677 LL2345r
+BS 678 679 LL2345f
+BS 680 731 LL2345r
+BS 732 732 LL2345f
+BS 733 867 LL2345r
+
+RD LL2345f 777 0 0
+ctgtaaggcgtccatctacttcaacagacaaatgtattacaaaatagctt
+caagaaaatatttataacattttgaagattCCAAAATATTTTTTTTAAAT
+AGCAAAAAAGACAAATGCAGCAAAAGATTTAACAAGAAAAACTTCACAAA
+CTTTGATCCTGGCCAATCTCTTCCCCCCTTTCTCTATGTTCCTTGGGGAT
+GCAGCAGTAGCAGTCTTTGACTATTGGAAAGGTTGTTAGTTGGCGAAAAC
+GTTGAGCCTAACGCGAGCAGTGACATCAGGGTGAAGTTTGAGTTCGGCTA
+TGTATTCTCCCGTTTCACGGATCTCCGGAAGAGAGACAAGGCGCTTGTCT
+ATATCCCTTTGGAGCTGCGCCTTGATGATGTCAACAAGGTCTTGTGCTGT
+GACAGATCCAAATATTTGTTTGCCCTTACCGCCCTTGCGTTTAACCTTGA
+AAGCCCCAACGGTTTCGAATACGGTAGCCAATTGTAGCGCCTCTTCTTTC
+ACCCTTTGCTTTTCTGCCTCTATCCTTTCGTTTTCCATCTTCATTTCCTT
+GAGCAGAAGCGGAGTCATGAGCTGAGCCTTTCCAGTGGGCAAGAGGAAGT
+TTCTGAAGAAACCGGCTTTCACGTCCAGTAATTGCCCTTGCTTGCCCAAG
+TCCAACACATCCTCTTTCAAGATTACCTTGCGGAGTTTCTTGGCTTTCTT
+CTGGGCAACAACCTCCAAAGTAGACCTTCGCTGGGATACTTTTAGTGTTT
+CATTTCCGACGACATTGAAGGTGTGAg
+
+QA 81 777 1 777
+DS CHROMAT_FILE: LL2345f PHD_FILE: LL2345f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:39 2000
+
+RD LL2345r 793 0 0
+CAAAAGATTTAACAAGAAAAACTTCACAAACTTTGATCCTGGCCAATCTC
+TTCCCCCCTTTCTCTATGTTCCTTGGGGATGCAGCAGTAGCAGTCTTTGA
+CTATTGGAAAGGTTGTTAGTTGGCGAAAACGTTGAGCCTAACGCGAGCAG
+TGACATCAGGGTGAAGTTTGAGTTCGGCTATGTATTCTCCCGTTTCACGG
+ATCTCCGGAAGAGAGACAAGGCGCTTGTCTATATCCCTTTGGAGCTGCGC
+CTTGATGATGTCAACAAGGTCTTGTGCTGTGACAGATCCAAATATTTGTT
+TGCCCTTACCGCCCTTGCGTTTAACCTTGAAAGCCCCAACGGTTTCGAAT
+ACGGTAGCCAATTGTAGCGCCTCTTCTTTCACCCTTTGCTTTTCTGCCTC
+TATCCTTTCGTTTTCCATCTTCATTTCCTTGAGCAGAAGCGGAGTCATGA
+GCTGAGCCTTTCCAGTGGGCAAGAGGAAGTTTCTGAAGAAACCGGCTTTC
+ACGTCCAGTAATTGCCCTTGCTTGCCCAAGTCCAACACATCCTCTTTCAA
+GATTACCTTGCGGAGTTTCTTGGCTTTCTTCTGGGCAACAACCTCCAAAG
+TAGACCTTCGCTGGGATACTTTTAGTGTTTCATTTCCGACGACATTGAAG
+GTGTGAGACGAACAAAGCGAGGAGCTCCACGAGAGTGACAAAGACGAAGA
+AGCCATTggcGAAgcttattcagtcccacgcgtccgcggacgcgtggxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxatt
+
+QA 1 713 1 747
+DS CHROMAT_FILE: LL2345r PHD_FILE: LL2345r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:18 2000
+
+CO Contig46 1226 2 44 U
+cttttggtaacgaagaggctacagatagatgaagcttgccaagaaaaagc
+ttatacaatagataaacgtgtggactgacTCTGAATTTGCTGAGTTCTAC
+AGAGCGATTGACGCGGCAAGTGCTTTCATACATTaGcTgCTTTGAGGGAA
+AGAAAATCagaAATCTTGCTGgtgagaAATCTGGTTGTATCTTGTTCTCA
+GGGCTTTCCAACATCGAGATTGAAGTAACACCCGAGAGGACAAGCAGAGT
+TTTACAACCACCATTCTGCCCAAACAAAATATCAGTGTCCAATCTATCAC
+CCACCATGCATATCTGTGACTTTTCGATTCCAAATTTGTCTGCCAAATAG
+TCCATCATAAAAGTTGAGGGTTTTCCAACTACAAGAggttcACgttGACA
+GGATCCAACAAGAGCACCAACCATAGAGCCACCACCTGCCCATTCTTGAG
+CATCGGTAAGGTGGGTGACAGCATCTCGgttTgTAGCAATGAACAGACAG
+CCCGGGTTTTCGCGGATACAGAGAGTTCCATACTGAATTTTGTAGTAGTT
+GAAATAGCGGTCAAATCCAACCACCACAGCTCCCACATCATTATCATGCT
+CCATTAGAAACCCTGGCTTCAGTTCAATCTGTTTTTTACCATCATCCGGA
+CCTCCAAGATACTGGAAACCAGCAAGGTCGAGCTCCTTCAAGATACCCTC
+CTCACCAATCACATAGACCTTCTTATCTTTGGGGAAATTAATAGACTGCA
+AGTATGCAGCTGCAGCAAAAGAAGAAGCAAATATTTCCTCCTCGTTAACA
+TTCAGGCCAAGAGTCTCAAACTTTTTGCCATATTGTTTCCTAGATTTTGT
+TGAGTTGTTTGTCACAAAAACCAACCTCTTTCCCTTGGCACGGAGCATAT
+CAAGAGTCTCTGGAACTCCCTCAATCAATTTATCTCCCTTCCAAATGACT
+CCGTCACAGTCGAAGATAAAAGTTTCAACGGAATCGATGAGCTGATCGGC
+GTTCTCGAGCTGCTGTGCCGCCATAGCTCTCGGTGTCATATTGGGAGTCA
+TCACGCGGAGAGGCTTATGATCGATTCTGCGAATGATGCCACCGGAGAAG
+GAGGAAGAACGAAATCCTGAGAAAGCCTTGACGGAGAAGAAAGGTTTGGA
+GTTTGGTAAGAGAGAAACTGAGGAAGAAGAAACAagcgctgttctgttca
+gcatcttttcttctccggacgcgtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 35 35 39 37 33 42 32 29 29 35 35 35 35 37 37 35 35 35 35 32
+ 32 35 35 32 32 32 32 35 37 35 32 35 32 32 32 29 29 29 29 29 29 29 35 32 35 32 32 35 35 35 35 35 24 29 17 21 15 28 19 27 27 28 39 35 35 35 35 35 35 35
+ 32 37 37 37 37 44 27 27 15 13 13 20 24 35 35 32 35 35 32 25 21 10 10 9 9 9 10 31 23 32 32 33 33 35 37 33 35 35 36 36 36 36 36 32 35 35 38 38 38 32
+ 35 32 32 32 36 36 35 41 41 35 42 42 35 35 30 30 29 35 37 39 39 42 35 29 26 26 26 26 26 37 35 41 31 33 27 27 27 33 29 35 32 30 30 31 28 21 21 21 37 37
+ 46 46 42 42 39 35 35 35 35 35 35 36 42 42 42 35 35 35 35 37 37 35 32 32 32 32 32 35 37 42 42 35 33 28 28 26 26 29 32 33 33 37 42 42 42 43 40 40 40 40
+ 37 37 37 37 37 37 40 40 40 40 40 35 32 32 32 32 29 35 35 35 35 35 42 36 35 33 30 36 42 44 44 44 44 44 44 35 38 36 35 35 35 35 35 35 32 37 30 33 26 28
+ 33 42 42 42 47 47 47 47 47 47 42 42 37 37 42 42 34 34 34 34 34 35 35 34 34 34 33 33 33 33 33 29 32 33 33 23 19 15 10 8 17 20 23 18 19 19 23 25 37 40
+ 40 37 37 37 37 42 37 33 42 30 30 30 30 30 42 42 37 42 32 29 29 35 35 35 33 35 35 42 35 38 38 38 38 38 42 42 42 42 42 44 42 44 44 44 44 44 48 44 47 35
+ 33 35 33 33 29 26 26 26 33 31 23 22 22 23 36 33 40 40 40 35 35 29 29 24 34 34 39 27 12 9 9 21 14 29 35 40 52 64 60 50 54 41 41 41 58 46 50 50 66 61
+ 62 62 66 66 63 69 69 67 59 55 56 56 55 50 50 58 58 61 62 66 73 75 67 60 59 55 58 62 62 66 75 80 68 55 59 59 44 44 44 37 42 33 30 29 35 42 44 49 44 58
+ 58 58 59 52 52 43 52 52 61 55 54 46 51 58 71 71 72 74 69 69 69 67 63 77 72 72 66 71 73 80 78 86 83 88 85 77 77 79 80 88 73 73 62 65 64 79 72 72 80 63
+ 54 58 56 51 51 46 52 45 47 54 55 58 52 50 44 47 52 48 51 46 50 50 48 48 56 55 61 64 63 63 58 55 55 58 53 54 54 54 61 61 69 60 60 68 70 58 73 43 40 46
+ 46 46 44 40 41 54 65 68 73 67 67 55 40 40 37 40 37 45 48 49 43 47 58 53 49 44 44 44 54 57 63 59 48 47 40 40 44 47 47 58 59 58 58 66 70 75 73 71 69 73
+ 72 68 74 83 75 80 65 64 70 78 75 80 71 70 52 50 47 55 47 47 47 50 49 48 57 70 70 72 75 75 71 72 77 77 74 71 55 56 56 58 53 53 56 61 56 60 42 40 40 45
+ 45 45 45 45 42 56 53 51 48 48 52 53 57 37 37 40 45 51 56 56 56 56 56 45 40 40 40 37 40 51 51 51 51 51 45 51 51 51 51 45 40 40 40 40 40 40 51 51 51 56
+ 56 51 51 51 51 51 51 56 56 45 40 40 40 40 40 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 51 51 51 51
+ 40 40 45 45 45 45 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 45 45 45 45 45 45 56 56 56
+ 56 56 45 43 43 43 43 43 43 51 51 51 56 56 56 56 56 56 51 51 51 51 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 41 43 41 45 45 43
+ 43 43 43 43 43 43 43 43 43 43 43 43 51 51 45 45 45 51 43 43 43 43 43 43 43 56 45 45 43 43 43 43 43 43 45 45 43 43 43 43 43 43 43 51 45 45 43 43 43 43
+ 43 43 43 43 51 43 43 43 43 43 43 45 45 45 43 43 43 43 43 43 43 43 43 43 43 43 43 45 45 45 45 45 45 45 56 56 51 51 51 51 51 51 56 45 43 43 43 43 43 51
+ 56 45 39 39 39 39 39 40 45 45 45 45 45 45 51 51 51 51 51 39 39 39 39 39 39 45 35 34 34 34 34 34 39 51 51 40 40 40 40 40 40 51 40 40 40 40 40 40 40 40
+ 40 40 40 40 51 51 51 40 40 40 40 40 40 51 56 51 40 40 40 40 40 39 40 40 40 40 39 46 46 40 40 40 45 45 45 51 51 51 51 51 51 51 51 51 51 51 56 45 45 45
+ 45 35 35 39 39 39 39 39 40 46 51 51 51 51 51 51 51 51 51 51 56 56 51 51 46 46 46 46 46 40 40 32 27 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+AF LL2347f U 0
+AF LL2347r C 473
+BS 1 474 LL2347f
+BS 475 479 LL2347r
+BS 480 482 LL2347f
+BS 483 485 LL2347r
+BS 486 521 LL2347f
+BS 522 522 LL2347r
+BS 523 523 LL2347f
+BS 524 524 LL2347r
+BS 525 530 LL2347f
+BS 531 532 LL2347r
+BS 533 544 LL2347f
+BS 545 549 LL2347r
+BS 550 555 LL2347f
+BS 556 556 LL2347r
+BS 557 561 LL2347f
+BS 562 562 LL2347r
+BS 563 563 LL2347f
+BS 564 564 LL2347r
+BS 565 572 LL2347f
+BS 573 576 LL2347r
+BS 577 579 LL2347f
+BS 580 589 LL2347r
+BS 590 594 LL2347f
+BS 595 602 LL2347r
+BS 603 606 LL2347f
+BS 607 610 LL2347r
+BS 611 611 LL2347f
+BS 612 612 LL2347r
+BS 613 613 LL2347f
+BS 614 635 LL2347r
+BS 636 639 LL2347f
+BS 640 646 LL2347r
+BS 647 647 LL2347f
+BS 648 650 LL2347r
+BS 651 652 LL2347f
+BS 653 657 LL2347r
+BS 658 659 LL2347f
+BS 660 667 LL2347r
+BS 668 668 LL2347f
+BS 669 740 LL2347r
+BS 741 746 LL2347f
+BS 747 755 LL2347r
+BS 756 763 LL2347f
+BS 764 1226 LL2347r
+
+RD LL2347f 795 0 0
+ncttttggtaacgaagaggctacagatagatgaagcttgccaagaaaaag
+cttatacaatagataaacgtgtggactgacTCTGAATTTGCTGAGTTCTA
+CAGAGCGATTGACGCGGCAAGTGCTTTCATACATTaGcTgCTTTGAGGGA
+AAGAAAATCagaAATCTTGCTGgtgagaAATCTGGTTGTATCTTGTTCTC
+AGGGCTTTCCAACATCGAGATTGAAGTAACACCCGAGAGGACAAGCAGAG
+TTTTACAACCACCATTCTGCCCAAACAAAATATCAGTGTCCAATCTATCA
+CCCACCATGCATATCTGTGACTTTTCGATTCCAAATTTGTCTGCCAAATA
+GTCCATCATAAAAGTTGAGGGTTTTCCAACTACAAGAggttcACgttGAC
+AGGATCCAACAAGAGCACCAACCATAGAGCCACCACCTGCCCATTCTTGA
+GCATCGGTAAGGTGGGTGACAGCATCTcGgttTgTAGCAATGAACAGACA
+GCCCGGGTTTTCGCGGATACAGAGAGTTCCATACTGAATTTTGTAGTAGT
+TGAAATAGCGGTCAAATCCAACCACCACAGCTCCCACATCATTATCATGC
+TCCATTAGAAACCCTGGCTTCAGTTCAATCTGTTTTTTACCATCATCCGG
+ACCTCCAAGATACTGGAAACCAGCAAGGTCGAGCTCCTTCAAGATACCCT
+CCTCACCAATCACATAGACCTTCTTATCTTTGGGGAAATTAATAGACTGc
+angtATGCAGCTGCAGcacacgacgaagcacatattccctcctcg
+
+QA 81 741 2 795
+DS CHROMAT_FILE: LL2347f PHD_FILE: LL2347f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:50 2000
+
+RD LL2347r 799 0 0
+atCTCGgtntgTAGCAATGAACAGACAGCCCGGGTTTTCGCGGATACAGA
+GAGTTCCATACTGAatttgntaGTAGTTGAAATAGCGGTCAAATCCAACC
+ACCACAGCTCCCACATCATTATCATGCTCCATTAGAAACCCTGGCTTCAG
+TTCAATCTGTTTTTTACCATCATCCGGACCTCCAAGATACTGGAAACCAG
+CAAGGTCGAGCTCCTTCAAGATACCCTCCTCACCAATCACATAGACCTTC
+TTATCTTTGGGGAAATTAATAGACTGCAAGTATGCAGCTGCAGCAAAAGA
+AGAAGCAAATATTTCCTCCTCGTTAACATTCAGGCCAAGAGTCTCAAACT
+TTTTGCCATATTGTTTCCTAGATTTTGTTGAGTTGTTTGTCACAAAAACC
+AACCTCTTTCCCTTGGCACGGAGCATATCAAGAGTCTCTGGAACTCCCTC
+AATCAATTTATCTCCCTTCCAAATGACTCCGTCACAGTCGAAGATAAAAG
+TTTCAACGGAATCGATGAGCTGATCGGCGTTCTCGAGCTGCTGTGCCGCC
+ATAGCTCTCGGTGTCATATTGGGAGTCATCACGCGGAGAGGCTTATGATC
+GATTCTGCGAATGATGCCACCGGAGAAGGAGGAAGAACGAAATCCTGAGA
+AAGCCTTGACGGAGAAGAAAGGTTTGGAGTTTGGTAAGAGAGAAACTGAG
+GAAGAAGAAACAagcgctgttctgttcagcatcttttcttctccggacgc
+gtggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagt
+
+QA 11 719 1 754
+DS CHROMAT_FILE: LL2347r PHD_FILE: LL2347r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:27 2000
+
+CO Contig47 757 2 91 U
+tgaatttaaATCTCAAGTTGATGTCATGAAGTCACCAATATACATTCTCA
+GTTTACAATCACAAAGCTTAGTTTATGACTTTTTGGTATACAAAAAAACT
+ATATAAAAATGTCACTGAAGACCATACACAAGCAGAGCTCCAACAGCTCC
+AAGACCGGCCAATAAGCCAACAATAACACCGATCGGAGGCAGTCCTCCTC
+CTATGGCCTTCCCTGTGTTCACATCATACCTCGCAAACCTTTTCTTTGGT
+GCTATACATTGTGGACATACGTATGTATCCGGCTGTTCATCAAAAGGCTT
+TGGTAAAGTGTATATGAATCCACAATCAAGACAAATGTGAGTAGCTCTTG
+CCTTCTGAGTCTCGGTTAACTTTCTTCCAAACCGAGGAGGAGCCGGACGC
+TTATTTAGCTTCTTCACATCAACATCCGCACCACCTCTGCTGACGACAAA
+GTAGACAGAATCAGGTTTTGCCTGAATAGCTGTTTTAGTGAACCCCAACT
+TATCAGCAGGCCAAAGCTCATCTCCAAAGAAGCTGCTTGTGTACAGAACT
+TGATCTCCAGATTTAAGCCCAGCTTTTGCTGCGTTGCCACCTCCTTCCAC
+ACCAGTGATGACAACTCCACCACCTTGCTTCTGTCCAAGAGTCAGACCCA
+AAGGCTTGTCCACTTCAACCTCTATGGTCTTAGAAGCTGCTGTTCTCGCT
+TTGAC*TTCGAATCTTctcgaCAGGTTTAACCCACCGATCTTTCTGCGGA
+CGCGtgg
+
+BQ
+ 0 0 0 0 0 0 0 0 14 20 26 31 31 29 24 29 25 39 39 45 46 44 47 57 59 60 62 58 47 51 52 52 44 52 47 49 53 75 72 71 63 63 54 52 52 55 55 52 60 60
+ 58 55 55 52 52 57 64 52 55 55 55 40 38 38 42 46 55 52 52 54 64 61 64 57 61 67 66 77 66 64 71 70 69 64 65 67 67 71 70 71 64 68 71 76 76 71 71 69 72 67
+ 67 75 75 66 69 69 66 71 73 61 64 68 63 66 75 75 75 72 74 67 65 65 67 67 65 69 66 69 67 62 61 69 71 77 71 64 59 59 57 54 54 57 53 59 64 61 61 58 60 58
+ 64 75 71 68 67 64 57 53 50 47 54 67 67 65 65 60 60 60 60 60 71 66 64 62 57 57 61 60 63 72 72 83 83 75 72 69 64 69 69 68 71 61 72 64 67 64 61 58 56 50
+ 54 60 70 64 66 66 66 62 62 79 86 81 90 84 86 86 86 84 84 84 90 90 90 90 72 74 75 75 71 71 66 64 64 64 70 72 77 80 80 80 90 90 90 84 84 90 88 80 69 69
+ 69 71 71 71 71 74 57 57 57 42 42 42 42 42 44 59 63 71 66 66 67 67 72 79 79 79 84 69 74 79 70 72 70 70 76 76 87 87 90 90 90 90 90 90 90 90 90 85 74 73
+ 78 73 75 75 81 72 75 75 78 78 90 90 90 85 82 82 82 82 89 88 69 65 65 77 77 80 85 83 79 76 79 82 82 85 90 90 90 81 79 79 79 72 63 61 55 60 67 67 83 75
+ 72 66 68 61 53 52 69 69 79 76 50 54 42 42 37 35 35 35 50 50 57 87 87 90 90 90 90 90 90 90 84 84 84 79 79 85 90 82 80 88 75 73 71 74 74 76 73 76 70 77
+ 77 87 87 78 81 83 71 74 73 81 84 90 90 90 90 90 86 88 84 90 90 90 90 90 87 80 75 72 73 70 75 75 70 72 79 69 77 75 77 77 79 72 72 74 67 71 73 75 71 69
+ 63 55 50 50 56 74 60 65 65 65 56 58 58 53 53 71 71 78 85 85 77 83 81 81 77 68 66 71 53 53 53 43 43 43 38 38 38 53 53 57 78 84 86 90 90 88 88 75 75 72
+ 68 81 61 56 54 54 68 70 74 78 75 75 71 73 73 62 63 54 73 63 72 68 65 65 65 83 72 74 64 56 54 56 54 55 62 61 56 56 56 58 61 69 73 74 68 75 78 78 82 82
+ 86 86 86 90 90 75 60 57 60 60 67 69 78 68 68 60 59 62 60 59 61 62 61 62 61 64 73 69 70 64 51 51 52 54 56 64 75 73 75 90 90 90 90 83 83 84 84 78 79 79
+ 85 66 67 69 70 78 90 87 87 84 85 83 73 71 85 87 79 79 79 78 66 66 59 62 57 63 65 90 70 67 67 72 73 72 83 66 74 56 60 50 50 54 59 55 55 57 59 57 65 77
+ 85 68 65 64 57 44 43 44 48 48 48 55 49 47 46 43 42 48 63 65 63 61 60 61 54 48 44 37 37 35 39 32 44 44 45 45 50 55 61 53 51 47 45 45 45 51 35 35 35 35
+ 39 37 35 35 29 29 24 24 24 24 29 35 40 40 22 19 9 9 11 16 31 30 53 57 49 51 52 45 50 50 69 66 75 63 54 53 54 52 50 43 43 50 41 40 47 44 41 34 40 40
+ 34 34 34 0 0 0
+
+AF LL2349f U -1
+AF LL2349r C 0
+BS 1 17 LL2349r
+BS 18 19 LL2349f
+BS 20 25 LL2349r
+BS 26 37 LL2349f
+BS 38 38 LL2349r
+BS 39 77 LL2349f
+BS 78 78 LL2349r
+BS 79 98 LL2349f
+BS 99 99 LL2349r
+BS 100 101 LL2349f
+BS 102 103 LL2349r
+BS 104 114 LL2349f
+BS 115 119 LL2349r
+BS 120 147 LL2349f
+BS 148 148 LL2349r
+BS 149 149 LL2349f
+BS 150 150 LL2349r
+BS 151 151 LL2349f
+BS 152 154 LL2349r
+BS 155 179 LL2349f
+BS 180 185 LL2349r
+BS 186 190 LL2349f
+BS 191 191 LL2349r
+BS 192 192 LL2349f
+BS 193 194 LL2349r
+BS 195 210 LL2349f
+BS 211 211 LL2349r
+BS 212 217 LL2349f
+BS 218 224 LL2349r
+BS 225 225 LL2349f
+BS 226 226 LL2349r
+BS 227 234 LL2349f
+BS 235 241 LL2349r
+BS 242 247 LL2349f
+BS 248 286 LL2349r
+BS 287 288 LL2349f
+BS 289 304 LL2349r
+BS 305 305 LL2349f
+BS 306 314 LL2349r
+BS 315 318 LL2349f
+BS 319 320 LL2349r
+BS 321 323 LL2349f
+BS 324 337 LL2349r
+BS 338 342 LL2349f
+BS 343 352 LL2349r
+BS 353 353 LL2349f
+BS 354 354 LL2349r
+BS 355 362 LL2349f
+BS 363 369 LL2349r
+BS 370 370 LL2349f
+BS 371 388 LL2349r
+BS 389 390 LL2349f
+BS 391 399 LL2349r
+BS 400 401 LL2349f
+BS 402 403 LL2349r
+BS 404 405 LL2349f
+BS 406 408 LL2349r
+BS 409 409 LL2349f
+BS 410 412 LL2349r
+BS 413 413 LL2349f
+BS 414 414 LL2349r
+BS 415 415 LL2349f
+BS 416 416 LL2349r
+BS 417 417 LL2349f
+BS 418 419 LL2349r
+BS 420 423 LL2349f
+BS 424 427 LL2349r
+BS 428 432 LL2349f
+BS 433 441 LL2349r
+BS 442 446 LL2349f
+BS 447 467 LL2349r
+BS 468 468 LL2349f
+BS 469 492 LL2349r
+BS 493 493 LL2349f
+BS 494 553 LL2349r
+BS 554 554 LL2349f
+BS 555 562 LL2349r
+BS 563 563 LL2349f
+BS 564 606 LL2349r
+BS 607 609 LL2349f
+BS 610 611 LL2349r
+BS 612 612 LL2349f
+BS 613 655 LL2349r
+BS 656 696 LL2349f
+BS 697 715 LL2349r
+BS 716 716 LL2349f
+BS 717 719 LL2349r
+BS 720 722 LL2349f
+BS 723 723 LL2349r
+BS 724 748 LL2349f
+BS 749 757 LL2349r
+
+RD LL2349f 797 0 0
+cttttnagcaacccngcagTTGATGTCATGAAGTCACCAATATACATTCT
+CAGTTTACAATCACAAAGCTTAGTTTATGACTTTTTGGTATACAAAAAAA
+CTATATAAAAATGTCACTGAAGACCATACACAAGCAGAGCTCCAACAGCT
+CCAAGACCGGCCAATAAGCCAACAATAACACCGATCGGAGGCAGTCCTCC
+TCCTATGGCCTTCCCTGTGTTCACATCATACCTCGCAAACCTTTTCTTTG
+GTGCTATACATtgcggaCATACGTATGTATCCGGCTGTTCATCAAAAGGC
+TTTGGTAAAGTGTATATGAATCCACAATCAAGACAAATGTGAGTAGCTCT
+TGCCTTCTGAGTCTCGgctAACTTTCTTCCAAACCGAGGAGGAGCCGGAC
+GCTTATTTAGCTTCTTCACATCAACATCCGCACCACCTCTGCTGACGACA
+AAGTAGACAGAATCAGGTTTTGCCTGAATAGCTgttctaGTGAACCCCAA
+CTTATCAGCAGGCCAAAGCTCATCTCCAAAGAAGCTGCTTGTGTACAGAA
+CTTGATCTCCAGATTTAAGCCCAGCTTTTGCTGCGTTGCCACCTCCTTCC
+ACACCAGTGATGACAACTCCACCACCTTGCTTCTGTCCAAGAGTCAGACC
+CAAAGGCTTGTCCACTTCAACCTCTATGGTCTTAGAAGCTGCTGTTCTCG
+ctctgaccttccaatCTTcttgaCAGGTTTAACCCACCGATCTTTCTGCG
+GACGcgcggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxt
+
+QA 20 657 18 756
+DS CHROMAT_FILE: LL2349f PHD_FILE: LL2349f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:59 2000
+
+RD LL2349r 803 0 0
+ntgaatttaaATCTCAAGTTGATGTCATGAAGTCACCAATATACATTCTC
+AGTTTACAATCACanaGCTTAGTTTATGACTTTTTGGTATACAAAAAAAC
+TATATAAAAATGTCACTGAAGACCATACACAAGCAGAGCTCCAACAGCTC
+CAAGACCGGCCAATAAGCCAACAATAACACCGATCGGAGGCAGTCCTCCT
+CCTATGGCCTTCCCTGTGTTCACATCATACCTCGCAAACCTTTTCTTTGG
+TGCTATACATTGTGGACATACGTATGTATCCGGCTGTTCATCAAAAGGCT
+TTGGTAAAGTGTATATGAATCCACAATCAAGACAAATGTGAGTAGCTCTT
+GCCTTCTGAGTCTCGGTTAACTTTCTTCCAAACCGAGGAGGAGCCGGACG
+CTTATTTAGCTTCTTCACATCAACATCCGCACCACCTCTGCTGACGACAA
+AGTAGACAGAATCAGGTTTTGCCTGAATAGCTGTTTTAGTGAACCCCAAC
+TTATCAGCAGGCCAAAGCTCATCTCCAAAGAAGCTGCTTGTGTACAGAAC
+TTGATCTCCAGATTTAAGCCCAGCTTTTGCTGCGTTGCCACCTCCTTCCA
+CACCAGTGATGACAACTCCACCACCTTGCTTCTGTCCAAGAGTCAGACCC
+AAAGGCTTGTCCACTTCAACCTCTATGGTCTTAGAAGCTGCTGTTCTCGC
+TTTGAC*TTCGAATCTTctcgaCAGGTTTAACCCACCGATCTTTCTGCGG
+ACGCGtggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+aag
+
+QA 10 755 2 758
+DS CHROMAT_FILE: LL2349r PHD_FILE: LL2349r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:36 2000
+
+CO Contig48 549 2 72 U
+CTTTTTTTTTTTTTTTTTAGAGAATCATTCACGTTCATTCCATAGAAAGA
+AGTACAAACTGACTCATCCGTCACACATGGCAGAGACAACTAGATATGAC
+GTCAACACAAACGAAAACCATTATCGGCTAAAAGACAAAAGCCTGCAGCC
+GTTATTACACCGTAAACAAAACCTAGAACATCATTAGGGTTACTTAATAT
+AATTAAACTCTGATTATCTCTGCCTCCTTAGAAAGGTTCCATTATTATTC
+ATTTACAGGTGCAAGGATCACACTTGCAGTCAGATCCACACTTGCATGCA
+TCGTTCTCAGCGACACCCTCTCCAGAAGCCTCGTACTGGTTCTTCATCGC
+CGGTGCAACGCCGAAGACAAAAGTCTCGGTCGTGGTCGACTCGCCGGAAA
+AGCCCAAGTCCGGGTACATTTTGCAACCTCCGCAACCGTTGCCGCACTTG
+CAGCCAGATCCACAACCACAGTTTCCACCACAGCAAGACATTTTCTCGAG
+AAAGACCGAAGAAATAGCAGAGAATGTTTATCAAATTGCGGACGCGTGG
+
+BQ
+ 29 37 48 48 48 48 48 48 48 48 56 56 56 56 56 46 42 42 36 46 46 48 46 42 42 42 46 46 56 40 52 50 62 54 59 60 66 70 82 85 80 72 71 65 74 75 85 85 88 73
+ 77 74 80 85 79 90 90 90 90 90 90 90 90 90 90 90 84 79 76 77 75 75 81 90 90 90 90 81 90 90 90 86 86 90 90 86 90 90 86 86 86 90 90 90 90 80 83 62 61 53
+ 55 63 63 85 86 90 90 90 90 77 77 77 80 80 80 89 90 90 90 88 88 88 80 80 80 86 75 75 75 75 75 75 90 90 90 90 90 80 82 82 82 83 78 78 75 75 75 75 75 80
+ 72 72 65 77 77 84 86 90 78 82 82 81 81 78 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 90 90 90 88 77 77 77 86 86 86 86 86 89 89 90 90
+ 90 89 89 89 90 87 88 90 88 88 88 90 88 88 83 85 83 88 90 88 84 79 79 81 81 87 86 89 90 90 90 90 90 90 90 85 85 85 88 90 86 86 86 85 85 85 90 90 90 90
+ 90 90 90 90 90 90 84 84 84 84 85 89 89 90 90 90 90 90 90 90 90 90 90 79 79 86 83 86 86 90 90 88 90 90 90 90 90 90 90 90 90 81 81 89 90 90 90 90 90 86
+ 79 81 81 81 79 88 88 81 79 79 74 74 79 86 90 90 85 85 83 85 85 90 90 90 90 90 90 90 90 90 90 90 83 86 86 88 86 81 88 89 90 90 90 90 90 86 78 79 82 86
+ 79 83 84 84 85 90 86 86 86 86 86 86 81 90 80 86 86 87 87 79 78 86 80 73 78 82 89 85 84 59 59 59 36 36 36 36 36 36 58 58 79 79 81 84 84 84 85 85 90 90
+ 85 86 86 90 86 77 76 76 76 76 77 85 85 85 90 90 90 90 90 90 88 78 78 75 75 75 90 90 90 79 79 85 85 85 74 82 82 81 75 70 72 72 75 75 75 75 79 72 72 72
+ 77 77 72 89 89 90 90 90 90 90 90 90 90 90 90 90 90 84 82 82 81 82 85 80 90 90 90 90 90 80 75 75 75 79 82 81 90 90 90 90 90 83 78 81 75 77 86 86 88 88
+ 88 88 82 80 82 55 60 40 42 30 30 30 42 33 40 50 54 54 66 67 84 90 84 82 82 82 75 75 75 75 75 77 90 90 90 78 71 71 71 71 71 75 75 50 50 49 34 42 42
+
+AF LL2350r C -206
+AF LL2350f U 17
+BS 1 52 LL2350r
+BS 53 54 LL2350f
+BS 55 68 LL2350r
+BS 69 70 LL2350f
+BS 71 78 LL2350r
+BS 79 81 LL2350f
+BS 82 104 LL2350r
+BS 105 105 LL2350f
+BS 106 108 LL2350r
+BS 109 116 LL2350f
+BS 117 164 LL2350r
+BS 165 166 LL2350f
+BS 167 198 LL2350r
+BS 199 199 LL2350f
+BS 200 211 LL2350r
+BS 212 212 LL2350f
+BS 213 218 LL2350r
+BS 219 219 LL2350f
+BS 220 220 LL2350r
+BS 221 221 LL2350f
+BS 222 232 LL2350r
+BS 233 235 LL2350f
+BS 236 261 LL2350r
+BS 262 267 LL2350f
+BS 268 275 LL2350r
+BS 276 276 LL2350f
+BS 277 277 LL2350r
+BS 278 285 LL2350f
+BS 286 299 LL2350r
+BS 300 314 LL2350f
+BS 315 321 LL2350r
+BS 322 332 LL2350f
+BS 333 333 LL2350r
+BS 334 335 LL2350f
+BS 336 336 LL2350r
+BS 337 337 LL2350f
+BS 338 338 LL2350r
+BS 339 342 LL2350f
+BS 343 349 LL2350r
+BS 350 350 LL2350f
+BS 351 355 LL2350r
+BS 356 362 LL2350f
+BS 363 370 LL2350r
+BS 371 377 LL2350f
+BS 378 379 LL2350r
+BS 380 382 LL2350f
+BS 383 398 LL2350r
+BS 399 400 LL2350f
+BS 401 401 LL2350r
+BS 402 411 LL2350f
+BS 412 414 LL2350r
+BS 415 418 LL2350f
+BS 419 419 LL2350r
+BS 420 426 LL2350f
+BS 427 428 LL2350r
+BS 429 456 LL2350f
+BS 457 467 LL2350r
+BS 468 474 LL2350f
+BS 475 475 LL2350r
+BS 476 476 LL2350f
+BS 477 479 LL2350r
+BS 480 486 LL2350f
+BS 487 505 LL2350r
+BS 506 515 LL2350f
+BS 516 523 LL2350r
+BS 524 526 LL2350f
+BS 527 532 LL2350r
+BS 533 541 LL2350f
+BS 542 543 LL2350r
+BS 544 545 LL2350f
+BS 546 547 LL2350r
+BS 548 549 LL2350f
+
+RD LL2350r 801 0 2
+agggcgatcggtgcgggcctcttcgctattacgccagctggcgaaagggg
+gatgtgctgcaaggcgattaagttgggtaacgccagggttttxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxCTTTTTTTTTTTTTTTTTAGAGAATCATTCACGTTCATTCCAT
+AGAAAGAAGTACAAACTGACTCATCCGTCACACATGGCAGAGACAACTAG
+ATATGACGTCAACACAAACGAAAACCATTATCGGCTAAAAGACAAAAGCC
+TGCAGCCGTTATTACACCGTAAACAAAACCTAGAACATCATTAGGGTTAC
+TTAATATAATTAAACTCTGATTATCTCTGCCTCCTTAGAAAGGTTCCATT
+ATTATTCATTTACAGGTGCAAGGATCACACTTGCAGTCAGATCCACACTT
+GCATGCATCGTTCTCAGCGACACCCTCTCCAGAAGCCTCGTACTGGTTCT
+TCATCGCCGGTGCAACGCCGAAGACAAAAGTCTCGGTCGTGGTCGACTCG
+CCGGAAAAGCCCAAGTCCGGGTACATTTTGCAACCTCCGCAACCGTTGCC
+GCACTTGCAGCCAGATCCACAACCACAGTTTCCACCACAGCAAGACATTT
+TCTCGAGAAAGACCgaacaaATAGCAGAGAATGTTTATCAAATTGCGGAC
+GCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxa
+t
+
+QA 45 756 208 756
+DS CHROMAT_FILE: LL2350r PHD_FILE: LL2350r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:41 2000
+
+RT{
+LL2350r chimera phrap 1 92 000919:094547
+}
+
+RT{
+LL2350r matchElsewhereHighQual phrap 1 92 000919:094547
+}
+
+RD LL2350f 798 0 2
+ctttgagaaacttcACGTTCATTCCATAGAAAGAAGTACAAACTGACTCA
+TCCGTCACACATGGCAGAGACAACTAGATATGACGTCAACACAAACGAAA
+ACCATTATCGGCTAAAAGACAAAAGCCTGCAGCCGTTATTACACCGTAAA
+CAAAACCTAGAACATCATTAGGGTTACTTAATATAATTAAACTCTGATTA
+TCTCTGCCTCCTTAGAAAGGTTCCATTATTATTCATTTACAGGTGCAAGG
+ATCACACTTGCAGTCAGATCCACACTTGCATGCATCGTTCTCAGCGACAC
+CCTCTCCAGAAGCCTCGTACTGGTTCTTCATCGCCGGTGCAACGCCGAAG
+ACAAAAGTCTCGGTCGTggccgACTCGCCGGAAAAGCCCAAGTCCGGGTA
+CATTTTGCAACCTCCGCAACCGTTGCCGCACTTGCAGCCAGATCCACAAC
+CACAGTTTCCACCACAGCAAGACATTTTCTCGAGAAAGACCGAAGAAATA
+GCAGAGAATGTTTATCAAATTGCGGACGCGTGGxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxgcct
+gcggtgcctaatgagtgagctaactcacattaattgcgttgcgctcactg
+cccgctttccagtcgggaaacctgtcgtgccagctgcattactgaatn
+
+QA 14 751 12 533
+DS CHROMAT_FILE: LL2350f PHD_FILE: LL2350f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:04 2000
+
+RT{
+LL2350f chimera phrap 697 797 000919:094547
+}
+
+RT{
+LL2350f matchElsewhereHighQual phrap 703 791 000919:094547
+}
+
+CO Contig49 516 2 56 U
+CCCCccttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTAAGAAAGAAATTTTGGACATTTGTGGACATCGATT
+CAGTTTGGTTCTTTGGTTGGGATGGGGTTTTGGGTTAAAATGCCCATTCA
+CCAAGCGGAAGAACTTGCACAGATGCAATACACGTTGATAGACATCGAAT
+TCAAAAATAAACGCACTTCAAGAACAGCAACAGTCCGATTGAATTACACT
+CAAGTTTGCTCAGAGAGACAAGTAAACAACCATTCTTTACCCTATGGCGA
+CGGCTGCTGAGACTCCATCGGAATCCCAAGATCATTCCTCTTGAGAACAG
+GAAGCGCGACGAAAGATTTGGTCTTACTGATGGGTCTGCTCTTTTCAAGC
+CTCACAACGTCACCAAGCGGAAGAACTTGCACAGATGCAATACACGTTGA
+TAGACCGGACGCGTGG
+
+BQ
+ 22 22 27 26 12 10 10 19 33 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+ 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 71 71 71 71 71 90 89 89 77 75 71 71 71 72 75 75 75 70 73 66 66 51 56 51
+ 45 45 60 60 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 80 76 79 90 90 90 71 71 56 56 56 56 56 56 71 71 71
+ 90 82 75 76 76 82 76 76 83 90 90 85 85 85 83 83 83 89 83 83 85 85 82 82 82 82 82 82 90 90 80 78 82 78 78 78 90 85 85 85 85 85 90 84 82 82 82 82 82 90
+ 90 90 90 90 90 90 90 90 90 78 78 78 78 78 78 78 90 90 90 90 90 90 90 90 90 90 90 90 90 88 87 81 81 75 75 75 84 80 80 90 90 90 90 90 90 90 90 90 90 90
+ 84 83 90 90 90 78 86 90 90 90 90 90 90 90 90 89 89 89 83 81 81 83 83 83 89 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 83 83 83 82 82
+ 74 74 74 74 75 75 75 82 82 77 79 79 82 80 90 90 84 90 86 80 80 80 90 90 90 90 90 90 90 83 83 83 83 85 90 90 90 90 90 90 81 83 83 83 85 85 90 90 85 85
+ 90 79 77 70 70 70 70 70 78 88 88 88 85 90 90 90 82 83 82 90 82 86 81 80 83 83 83 83 90 90 90 90 90 84 84 82 75 76 76 82 82 90 90 89 90 90 90 85 85 85
+ 85 89 89 89 89 83 82 82 82 82 82 90 90 90 76 76 76 76 76 76 76 76 74 56 56 41 40 40 40 41 56 56 85 85 81 81 81 89 89 90 90 90 83 83 83 83 83 86 90 90
+ 90 85 81 90 90 90 86 80 80 83 55 55 56 42 56 56
+
+AF LL2351r C -249
+AF LL2351f U 112
+BS 1 154 LL2351r
+BS 155 161 LL2351f
+BS 162 162 LL2351r
+BS 163 169 LL2351f
+BS 170 175 LL2351r
+BS 176 176 LL2351f
+BS 177 182 LL2351r
+BS 183 183 LL2351f
+BS 184 202 LL2351r
+BS 203 208 LL2351f
+BS 209 209 LL2351r
+BS 210 211 LL2351f
+BS 212 217 LL2351r
+BS 218 218 LL2351f
+BS 219 269 LL2351r
+BS 270 286 LL2351f
+BS 287 290 LL2351r
+BS 291 292 LL2351f
+BS 293 302 LL2351r
+BS 303 305 LL2351f
+BS 306 324 LL2351r
+BS 325 327 LL2351f
+BS 328 334 LL2351r
+BS 335 335 LL2351f
+BS 336 336 LL2351r
+BS 337 337 LL2351f
+BS 338 344 LL2351r
+BS 345 350 LL2351f
+BS 351 357 LL2351r
+BS 358 359 LL2351f
+BS 360 360 LL2351r
+BS 361 364 LL2351f
+BS 365 366 LL2351r
+BS 367 377 LL2351f
+BS 378 379 LL2351r
+BS 380 385 LL2351f
+BS 386 390 LL2351r
+BS 391 396 LL2351f
+BS 397 398 LL2351r
+BS 399 409 LL2351f
+BS 410 416 LL2351r
+BS 417 428 LL2351f
+BS 429 430 LL2351r
+BS 431 436 LL2351f
+BS 437 442 LL2351r
+BS 443 443 LL2351f
+BS 444 444 LL2351r
+BS 445 451 LL2351f
+BS 452 455 LL2351r
+BS 456 463 LL2351f
+BS 464 464 LL2351r
+BS 465 482 LL2351f
+BS 483 484 LL2351r
+BS 485 503 LL2351f
+BS 504 507 LL2351r
+BS 508 516 LL2351f
+
+RD LL2351r 812 0 4
+gtacagggggcgtcccatttgccattcagggtgcgcaactgtgggaaggg
+cgatcggtgcgggccttttcggtattaccccaactggcgaaaaggggatg
+ttcttcaaggcgattaagttggggaaccccagggtttttxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+CCCCccttTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+TTTTTTTTTTTTTTTAAGAAAGAAATTTTGGACATTTGTGGACATCGATT
+CAGTTTGGTTCTTTGGTTGGGATGGGGTTTTGGGTTAAAATGCCCATTCA
+CCAAGCGGAAGAACTTGCACAGATGCAATACACGTTGATAGACATCGAAT
+TCAAAAATAAACGCACTTCAAGAACAGCAACAGTCCGATTGAATTACACT
+CAAGTTTGCTCAGAGAGACAAGTAAACAACCATTCTTTACCCTATGGCGA
+CGGCTGCTGAGACTCCATCGGAATCCCAAGATCATTCCTCTTGAGAACAG
+GAAGCGCGACGAAAGATTTGGTCTTACTGATGGGTCTGCTCTTTTCAAGC
+CTCACAACGTCACCAAGCGGAAGAActcgcACAGATGCAATACACGTTGA
+TAGACCGGACGCGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxaagt
+
+QA 251 766 251 766
+DS CHROMAT_FILE: LL2351r PHD_FILE: LL2351r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:46 2000
+
+RT{
+LL2351r chimera phrap 1 139 000919:094547
+}
+
+RT{
+LL2351r matchElsewhereLowQual phrap 18 33 000919:094547
+}
+
+RT{
+LL2351r matchElsewhereLowQual phrap 43 59 000919:094547
+}
+
+RT{
+LL2351r matchElsewhereHighQual phrap 106 139 000919:094547
+}
+
+RD LL2351f 805 0 6
+cctttatgaagacatTTTGGACATTTGTGGACATCGatccaGTTTGGTTC
+TTTGGTTGGGATGGGGTTTTGGGTTAAAATGcggatTCACCAAGCGGAAG
+AACTTGCACAGATGCAATACACGTTGATAGACATCGAATTCAAAAATAAA
+CGCACTTCAAGAACAGCAACAGTCCGATTGAATTACACTCAAGTTTGCTC
+AGAGAGACAAGTAAACAACCATTCTTTACCCTATGGCGACGGCTGCTGAG
+ACTCCATCGGAATCCCAAGATCATTCCTCTTGAGAACAGGAAGCGCGACG
+AAAGATTTGGTCTTACTGATGGGTCTGCTCTTTTCAAGCCTCACAACGTC
+ACCAAGCGGAAGAACTTGCACAGATGCAATACACGTTGATAGACCGGACG
+CGTGGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxgcctggggtgcctaatgagtgagctaactcac
+attaattgcgttgcgctcactgcccgctttccagtcgggaaacctgtcgt
+gccagctgcattaatgaatcggccaacgcgcggngagaggcggtttgcgt
+attgcgcgccagggtggtttttcttttcccagtgagacgggcaacagctg
+attgcccttcaccgnctgcgcctgacagagctgcaccaagcggtccacgc
+tgcct
+
+QA 15 682 9 405
+DS CHROMAT_FILE: LL2351f PHD_FILE: LL2351f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:09 2000
+
+RT{
+LL2351f chimera phrap 569 802 000919:094547
+}
+
+RT{
+LL2351f matchElsewhereHighQual phrap 787 802 000919:094547
+}
+
+RT{
+LL2351f matchElsewhereHighQual phrap 729 764 000919:094547
+}
+
+RT{
+LL2351f matchElsewhereHighQual phrap 706 727 000919:094547
+}
+
+RT{
+LL2351f matchElsewhereHighQual phrap 685 704 000919:094547
+}
+
+RT{
+LL2351f matchElsewhereHighQual phrap 569 683 000919:094547
+}
+
+CO Contig50 984 3 112 U
+ccacgcgtccgattcattgcaacgaaatatttaagacatgtttTTTGAGT
+TAATTAACAGCTTCTATTTTTTTTGTTCTTGTTCTTTATGGACCCTAAAA
+TTAAAATAGAAGATTGGGGGTGAATCATAAATCCAAAGGAGGTTTCATGG
+CCAAAGGTAAAGATGTTCGAGTAACAATTATTTTGGAATGTACCAGTTGT
+GTTCGAAATGATATTAAGAAAGAATCGGCTGGAATTTCCAGATATATTAC
+TCAAAAGAATCGGCATAACACTCCTAGTCGATTGGAATTGAGAAAATTCT
+GTCCCTATTGTTATAAACATACAATTCATGGGGAAATTAAGAAATAGATA
+AAATTGAGTGCTTGTATGTCAAATTTTATTTTAAGAACAGGAATAATGAG
+AGTATCTACGTATTATTACATATATATAAATATAAACAAATAAAATAATA
+GAAAGAAATCAAATCCTATATTCTTAATTCTATATAGAAACTCTATCCTA
+TATAGAAATAGCAATCGTTTTTATTTTGATCCGATCAAAAATAGGATTTT
+ATAGGTAAGGAATAAAAAATTATGAATAAATCTAAGCGACCTTTTACTAA
+ATCCAAGCGATCTTTTCGTCGGCGTTTGCCCCCGATCCAATCGGGGGATC
+GAATTGATTATAGAAACATGAGTTTAATTAGTCGATTTATTAGTGAACAA
+GGAAAAATATTATCTAGACGGGTGAATAGAGTAACTTTAAAACAACAACG
+ATTAATTACTATTGCTATAAAACAAGCTCGTATTTTATCTTTGTTACCTT
+TTCTTAATAATCAGAAACAATTTGAAAGAAGTGAGTCGACCCCTAGAACT
+ACTAGCCTTAGAACCAGAAAAAAATAGACTTATTCTTCAATTGAATAACT
+AATCTGAAGGAATTAAAAAAGAGGTTAATATTTTGTTCGAca*aatCCA*
+ATCA*AGAATCA*AAATTTGATTGTtacGTctgt
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 27 29 32 34 33 34 30
+ 35 34 34 34 34 34 34 34 40 40 40 40 37 40 37 40 40 40 56 56 56 56 40 40 37 37 37 40 51 51 42 42 42 51 37 40 35 35 35 39 51 51 40 40 40 40 40 40 46 35
+ 35 35 35 35 35 35 35 35 35 35 37 40 40 51 56 56 56 56 56 42 42 42 46 46 51 43 43 51 56 51 43 43 43 51 51 51 51 56 56 56 56 56 37 37 37 37 37 37 40 40
+ 40 40 45 51 56 51 51 51 51 51 51 56 56 56 50 40 37 40 40 40 40 42 42 42 40 42 40 40 40 40 40 45 56 56 56 56 56 56 56 51 51 51 45 45 45 42 42 42 42 42
+ 46 45 43 43 43 43 43 74 81 81 82 81 76 67 62 64 60 60 67 61 61 63 66 58 58 56 51 42 42 44 51 58 73 78 65 66 67 71 66 71 66 67 73 70 73 65 60 58 62 62
+ 67 78 78 88 86 86 86 80 74 80 80 80 82 77 75 71 70 79 71 78 80 90 71 71 80 83 77 79 80 80 84 77 81 79 83 87 89 84 85 90 90 90 90 90 90 90 90 86 86 88
+ 89 82 82 81 90 90 90 90 90 77 77 77 81 81 86 88 90 90 89 87 87 90 86 90 89 90 90 90 90 90 90 90 90 90 90 87 86 80 84 86 85 90 90 90 90 90 88 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 82 82 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 86 81 76 90 80 90 86 88 88 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 81 81 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 88 87 85 85 88 82 81 86 90 90 90 90 90 88 86 86 88 88
+ 90 90 90 90 86 84 84 86 86 90 90 90 90 90 90 90 90 90 90 87 88 89 89 89 88 88 88 90 90 90 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 85 90 90 90 88 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86 90 87 90 90 90 90 90
+ 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 90 90 90 90 90 90 90 90 90 90 90 90 82 73 73 65 55 63 72 75 83 84 86 86 88 84 72 72 77 66 66 72 78
+ 64 70 78 76 69 69 75 75 75 76 81 81 81 75 74 68 78 78 82 86 83 85 83 65 63 55 63 58 61 64 58 66 80 76 82 86 86 82 82 90 85 81 69 62 59 65 74 71 77 81
+ 85 90 64 67 62 60 63 70 75 75 75 82 77 71 69 71 68 68 58 58 64 86 76 77 75 75 83 83 69 69 61 75 75 70 62 69 63 66 63 64 64 74 67 67 54 54 48 54 56 62
+ 62 64 57 56 59 68 56 61 61 55 54 51 52 44 48 48 37 37 42 42 56 56 40 40 40 55 55 55 69 69 69 69 66 69 67 64 64 64 64 64 61 75 72 64 61 58 45 40 48 51
+ 52 52 66 70 75 60 52 57 50 50 48 51 58 64 66 69 76 90 90 90 90 67 64 48 48 48 50 50 58 86 69 72 62 64 58 44 40 40 42 36 13 13 13 16 19 20 22 25 24 29
+ 29 25 25 27 29 42 33 29 25 25 26 25 27 44 41 41 39 44 43 22 20 16 17 17 21 23 18 19 9 9
+
+AF LL2342r U -44
+AF LL2317r U 194
+AF LL2342f C 204
+BS 1 207 LL2342r
+BS 208 231 LL2342f
+BS 232 267 LL2342r
+BS 268 268 LL2317r
+BS 269 271 LL2342r
+BS 272 274 LL2317r
+BS 275 283 LL2342r
+BS 284 284 LL2317r
+BS 285 296 LL2342r
+BS 297 297 LL2317r
+BS 298 315 LL2342r
+BS 316 316 LL2317r
+BS 317 340 LL2342r
+BS 341 341 LL2342f
+BS 342 342 LL2317r
+BS 343 352 LL2342r
+BS 353 353 LL2317r
+BS 354 362 LL2342r
+BS 363 363 LL2342f
+BS 364 364 LL2342r
+BS 365 366 LL2317r
+BS 367 371 LL2342r
+BS 372 377 LL2317r
+BS 378 379 LL2342r
+BS 380 384 LL2317r
+BS 385 390 LL2342r
+BS 391 400 LL2317r
+BS 401 410 LL2342r
+BS 411 412 LL2317r
+BS 413 413 LL2342r
+BS 414 414 LL2342f
+BS 415 424 LL2342r
+BS 425 438 LL2317r
+BS 439 442 LL2342r
+BS 443 463 LL2317r
+BS 464 468 LL2342r
+BS 469 470 LL2317r
+BS 471 472 LL2342r
+BS 473 485 LL2317r
+BS 486 487 LL2342r
+BS 488 494 LL2342f
+BS 495 497 LL2317r
+BS 498 501 LL2342r
+BS 502 513 LL2317r
+BS 514 516 LL2342r
+BS 517 520 LL2317r
+BS 521 526 LL2342f
+BS 527 549 LL2317r
+BS 550 551 LL2342f
+BS 552 590 LL2317r
+BS 591 592 LL2342r
+BS 593 627 LL2317r
+BS 628 628 LL2342f
+BS 629 674 LL2317r
+BS 675 677 LL2342f
+BS 678 685 LL2317r
+BS 686 691 LL2342f
+BS 692 706 LL2317r
+BS 707 710 LL2342f
+BS 711 715 LL2317r
+BS 716 716 LL2342f
+BS 717 751 LL2317r
+BS 752 752 LL2342f
+BS 753 754 LL2317r
+BS 755 766 LL2342f
+BS 767 775 LL2317r
+BS 776 776 LL2342f
+BS 777 777 LL2317r
+BS 778 778 LL2342f
+BS 779 779 LL2317r
+BS 780 780 LL2342f
+BS 781 784 LL2317r
+BS 785 789 LL2342f
+BS 790 792 LL2317r
+BS 793 795 LL2342f
+BS 796 796 LL2317r
+BS 797 798 LL2342f
+BS 799 802 LL2317r
+BS 803 808 LL2342f
+BS 809 811 LL2317r
+BS 812 812 LL2342f
+BS 813 813 LL2317r
+BS 814 822 LL2342f
+BS 823 828 LL2317r
+BS 829 831 LL2342f
+BS 832 838 LL2317r
+BS 839 841 LL2342f
+BS 842 842 LL2317r
+BS 843 852 LL2342f
+BS 853 853 LL2317r
+BS 854 854 LL2342f
+BS 855 859 LL2317r
+BS 860 872 LL2342f
+BS 873 878 LL2317r
+BS 879 891 LL2342f
+BS 892 893 LL2317r
+BS 894 894 LL2342f
+BS 895 895 LL2317r
+BS 896 904 LL2342f
+BS 905 905 LL2317r
+BS 906 917 LL2342f
+BS 918 919 LL2317r
+BS 920 938 LL2342f
+BS 939 944 LL2317r
+BS 945 947 LL2342f
+BS 948 958 LL2317r
+BS 959 960 LL2342f
+BS 961 966 LL2317r
+BS 967 976 LL2342f
+BS 977 977 LL2317r
+BS 978 978 LL2342f
+BS 979 984 LL2317r
+
+RD LL2342r 792 0 0
+actaxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgattcattgcaacgaaatatttaagacatgtttTTTGAGTTAATT
+AACAGCTTCTATTTTTTTTGTTCTTGTTCTTTATGGACCCTAAAATTAAA
+ATAGAAGATTGGGGGTGAATCATAAATCCAAAGGAGGTTTCATGGCCAAA
+GGTAAAGATGTTCGAGTAACAATTATTTTGGAATGTACCAGTTGTGTTCG
+AAATGATATTAAGAAAGAATCGGCTGGAATTTCCAGATATATTACTCAAA
+AGAATCGGCATAACACTCCTAGTCGATTGGAATTGAGAAAATTCTGTCCC
+TATTGTTATAAACATACAATTCATGGGGAAATTAAGAAATAGATAAAATT
+GAGTGCTTGTATGTCAAATTTTATTTTAAGAACAGGAATAATGAGAGTAT
+CTACGTATTATTACATATATATAAATATAAACAAATAAAATAATAGAAAG
+AAATCAAATCCTATATTCTTAATTCTATATAGAAACTCTATCCTATATAG
+AAATAGCAATCGTTTTTATTTTGATCCGATCAAAAATAGGATTTTATAGG
+TAAGGAATAAAAAATTATGAATAAATCTAAGCGACCTTTTACTAAATCCA
+AGCGATCTTTTCGTCGGCGTTTGCCCCCGATCCAATCGGGGGATCGAATT
+GATTATAGAAACATGAGTTTAATTAGTCGATTTATTAGTGAACAAGGAAA
+AATATTATCTAGACGGGTGAATAGAGTAActnt*aaacaaca
+
+QA 81 778 46 792
+DS CHROMAT_FILE: LL2342r PHD_FILE: LL2342r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:40 2000
+
+RD LL2317r 791 0 0
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgCTCAAAAGAATCGGCATAACACTCCTAGTCGATTGGAATTGAGA
+AAATTCTGTCCCTATTGTTATAAACATACAATTCATGGGGAAATTAAGAA
+ATAGATAAAATTGAGTGCTTGTATGTCAAATTTTATTTTAAGAACAGGAA
+TAATGAGAGTATCTACGTATTATTACATATATATAAATATAAACAAATAA
+AATAATAGAAAGAAATCAAATCCTATATTCTTAATTCTATATAGAAACTC
+TATCCTATATAGAAATAGCAATCGTTTTTATTTTGATCCGATCAAAAATA
+GGATTTTATAGGTAAGGAATAAAAAATTATGAATAAATCTAAGCGACCTT
+TTACTAAATCCAAGCGATCTTTTCGTCGGCGTTTGCCCCCGATCCAATCG
+GGGGATCGAATTGATTATAGAAACATGAGTTTAATTAGTCGATTTATTAG
+TGAACAAGGAAAAATATTATCTAGACGGGTGAATAGAGTAACTTTAAAAC
+AACAACGATTAATTACTATTGCTATAAAACAAGCTCGTATTTTATCTTTG
+TTACCTTTTCTTAATAATCAGAAACAATTTGAAAGAAGTGAGTCGACCCC
+TAGAACTACTAGCCTTAGAACCAganaAAAATAGACTTATTCTTCAATTG
+AATAACTAATCTGAAGGAATTAAAAAAGAGGTTAATATTTTGTTCGAca*
+aatCCA*ATCA*AGAATCA*AAATTTGATtgttacGTctgt
+
+QA 57 780 57 791
+DS CHROMAT_FILE: LL2317r PHD_FILE: LL2317r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:35 2000
+
+RD LL2342f 782 0 0
+cgAAATGATATTAAGAAAGAATCGGCTGGAATTTCCAGATATATTACTCA
+AAAGAATCGGCATAACACTCCTAGTCGATTGGAATTGAGAAAATTCTGTC
+CCTATTGTTATAAACATACAATTCATGGGGAAATTAAGAAATAGATAAAA
+TTGAGTGCTTGTATGTCAAATTTTATTTTAAGAACAGGAATAATGAGAGT
+ATCTACGTATTATTACATATATATAAATATAAACAAATAAAATAATAGAA
+AGAAATCAAATCCTATATTCTTAATTCTATATAGAAACTCTATCCTATAT
+AGAAATAGCAATCGTTTTTATTTTGATCCGATCAAAAATAGGATTTTATA
+GGTAAGGAATAAAAAATTATGAATAAATCTAAGCGACCTTTTACTAAATC
+CAAGCGATCTTTTCGTCGGCGTTTGCCCCCGATCCAATCGGGGGATCGAA
+TTGATTATAGAAACATGAGTTTAATTAGTCGATTTATTAGTGAACAAGGA
+AAAATATTATCTAGACGGGTGAATAGAGTAACTTTAAAACAACAACGATT
+AATTACTATTGCTATAAAACAAGCTCGTATTTTATCTTTGTTACCTTTTC
+TTAATAATCAGAAACAATTTGAAAGAAGTGAGTCGACCCCTAGAACTACT
+AGCCTTAGAACCAGAAAAAAATAGACTTATTCTTCAATTGAATAACTAAT
+CTGAAGGAATTAAAAAAGAGGTTAATATTTTGTTCGAcacaatCcacatc
+acagAATcacaaaTTTGATTGTtacgtctgtg
+
+QA 29 737 1 781
+DS CHROMAT_FILE: LL2342f PHD_FILE: LL2342f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:04 2000
+
+CO Contig51 797 4 178 U
+cgttTGGGATCCGTGTGGAAG*ACGATTACAACAGGTGTTGTCCTCTGAG
+GACATAAAATACACA*CCGAGATTCATCAACTCAT*TGCTGGAGTTAGCA
+TATCTACAATTGGGTGAAATGGGGAGCGATTTGCAGGCATTTGCTCGGCA
+TGCCGGTAGAGGTGTGGTCAATAAGAGCGACCTCATGCTATACCTGAGAA
+AGCAACCTGACCTACAGGAAAGAGTTACTCAAGAATAAGAATTTTCGTTT
+TAAAACCTAAGAGTCACTTTAAAATTTGTATACACTTATTTTTTTTATAA
+CTTATTTAATAATAAAAATCATAAATCATAAGAAATTCGCTTATTTAGAA
+GTGTCAACAACGTATCTACCAACGATTTGACCCTTTTCCATCTTTTCGTA
+AATTTCTGGCAAGGTAGACAAGCCGACAACCTTGATTGGAGACTTGACCA
+AACCTCTGGCGAAGAAGTCCAAAGCTCCACCGCGGTGGCGGCCGTTACTT
+ACTTAGAGCTCGACGTCTTACTTACTTAGCGGCCGCACTAGTAGATCTGA
+ATTCCCGGGGTCGACCCACCCTCTTTTTTTGGGTTTGGTGGGGTATCTTC
+ATCATCGAATAGATAGTTATATACATCATCCATTGTAGTGGTATTAAACA
+TCCCTGTAGTGATTCCAAACGCGTTATACGCAGTTTGGTCCGTCCAACCA
+GGTGACAGTGGTTTTGAATTATTACCATCATCAATTTtACTAGCCGTGAT
+TTCATTATTCATGAAGTTATCATGAACGTTAGAGGAGGCAatTggtt
+
+BQ
+ 0 8 8 12 25 33 37 36 42 42 42 44 44 44 44 44 49 49 49 44 44 44 39 39 43 43 44 44 44 45 44 49 49 49 50 50 61 50 50 50 50 50 56 56 56 45 45 45 45 45
+ 45 45 45 45 56 56 56 56 56 66 61 61 50 50 50 50 50 45 45 45 45 45 50 56 56 56 50 50 50 55 55 55 50 50 47 50 50 50 55 61 50 49 49 49 49 49 50 56 56 56
+ 56 56 56 50 50 61 61 61 66 66 50 56 56 56 56 56 66 66 66 66 56 49 45 45 45 45 45 45 45 45 45 45 45 50 50 50 50 49 45 45 45 45 45 48 53 53 53 53 49 49
+ 49 49 53 53 61 61 61 66 66 66 66 66 61 61 61 61 61 61 55 55 55 53 53 53 53 53 53 53 53 53 55 53 53 53 53 53 53 53 53 53 53 53 61 61 66 66 55 55 61 53
+ 53 53 55 55 53 53 53 53 53 53 53 53 53 53 66 66 66 66 66 66 61 61 53 53 53 53 53 53 66 66 61 61 61 61 61 61 66 66 66 66 61 61 61 61 61 55 55 55 61 61
+ 55 61 61 61 61 61 61 61 66 66 66 53 53 53 53 53 53 66 66 66 66 66 61 61 61 61 55 61 61 53 53 53 53 53 53 66 66 66 66 66 66 66 66 66 66 53 53 53 53 53
+ 53 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 55 53 53 53 53 53 53 53 61 61 61 61 66 66 66 66 66 66
+ 66 66 66 66 66 61 61 61 61 61 61 66 61 61 61 61 61 61 61 61 61 61 66 66 61 55 55 55 55 55 66 66 66 61 61 61 55 55 55 66 66 66 66 61 61 61 61 66 66 66
+ 66 66 66 66 66 66 66 66 66 61 61 61 55 55 55 55 55 55 66 66 61 61 61 61 61 61 66 66 66 61 61 66 66 66 61 66 61 55 55 55 55 55 61 55 55 55 55 61 61 66
+ 66 66 66 66 61 55 55 55 55 55 66 66 66 66 66 66 66 66 66 61 61 61 61 53 53 55 55 55 55 61 61 61 61 55 55 53 53 53 53 61 61 53 53 53 53 53 55 55 55 55
+ 55 55 66 55 55 55 55 50 50 50 50 55 55 61 56 55 55 55 55 55 61 61 61 61 55 55 50 50 47 47 47 47 45 45 49 50 50 55 66 66 66 66 66 66 66 66 66 61 61 61
+ 61 61 61 66 66 61 61 61 55 55 50 55 52 55 50 50 50 55 56 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 56 61 55 55 56 52 52 52 61 61 61 61
+ 61 61 61 61 61 61 66 66 66 66 66 61 50 50 47 50 50 50 66 66 66 66 66 66 56 56 56 43 50 50 50 50 50 50 58 58 42 39 39 35 39 39 39 44 44 58 58 47 50 50
+ 58 58 50 50 50 58 58 46 39 39 39 34 34 39 42 42 42 42 50 39 41 41 42 39 39 42 42 39 40 44 44 44 58 42 44 39 35 42 39 39 35 39 39 42 42 42 42 42 50 50
+ 50 50 50 50 42 47 35 35 35 35 39 39 39 39 30 33 37 37 39 39 44 44 44 44 39 42 50 50 44 39 32 34 27 22 17 24 37 37 36 38 42 40 34 34 34 42 39 35 35 37
+ 37 42 42 42 42 39 42 50 50 39 39 35 37 29 25 35 31 37 39 50 39 39 39 39 35 28 28 31 34 34 25 27 33 28 25 25 25 18 18 23 16 17 0 0
+
+AF LL2269r U 1
+AF LL2335r U 1
+AF LL2312r U 1
+AF LL2320r U 2
+BS 1 1 LL2312r
+BS 2 4 LL2320r
+BS 5 5 LL2312r
+BS 6 12 LL2320r
+BS 13 15 LL2269r
+BS 16 25 LL2320r
+BS 26 26 LL2269r
+BS 27 27 LL2320r
+BS 28 29 LL2269r
+BS 30 31 LL2335r
+BS 32 35 LL2320r
+BS 36 37 LL2269r
+BS 38 38 LL2320r
+BS 39 45 LL2269r
+BS 46 60 LL2320r
+BS 61 61 LL2269r
+BS 62 75 LL2320r
+BS 76 78 LL2269r
+BS 79 81 LL2320r
+BS 82 90 LL2269r
+BS 91 93 LL2320r
+BS 94 99 LL2269r
+BS 100 100 LL2320r
+BS 101 101 LL2269r
+BS 102 102 LL2312r
+BS 103 108 LL2269r
+BS 109 124 LL2320r
+BS 125 125 LL2335r
+BS 126 146 LL2320r
+BS 147 147 LL2269r
+BS 148 151 LL2320r
+BS 152 155 LL2269r
+BS 156 157 LL2320r
+BS 158 160 LL2312r
+BS 161 196 LL2320r
+BS 197 199 LL2269r
+BS 200 205 LL2320r
+BS 206 210 LL2312r
+BS 211 223 LL2320r
+BS 224 225 LL2269r
+BS 226 231 LL2320r
+BS 232 233 LL2269r
+BS 234 239 LL2320r
+BS 240 241 LL2269r
+BS 242 243 LL2320r
+BS 244 249 LL2312r
+BS 250 261 LL2320r
+BS 262 264 LL2312r
+BS 265 280 LL2320r
+BS 281 282 LL2312r
+BS 283 288 LL2320r
+BS 289 289 LL2335r
+BS 290 293 LL2320r
+BS 294 298 LL2312r
+BS 299 304 LL2320r
+BS 305 308 LL2312r
+BS 309 309 LL2320r
+BS 310 315 LL2312r
+BS 316 330 LL2320r
+BS 331 335 LL2269r
+BS 336 358 LL2320r
+BS 359 360 LL2312r
+BS 361 364 LL2320r
+BS 365 365 LL2312r
+BS 366 384 LL2320r
+BS 385 386 LL2312r
+BS 387 395 LL2320r
+BS 396 396 LL2312r
+BS 397 411 LL2320r
+BS 412 412 LL2312r
+BS 413 422 LL2320r
+BS 423 423 LL2312r
+BS 424 429 LL2320r
+BS 430 431 LL2269r
+BS 432 434 LL2320r
+BS 435 437 LL2335r
+BS 438 440 LL2320r
+BS 441 446 LL2312r
+BS 447 450 LL2320r
+BS 451 455 LL2269r
+BS 456 457 LL2320r
+BS 458 458 LL2269r
+BS 459 472 LL2320r
+BS 473 476 LL2269r
+BS 477 478 LL2320r
+BS 479 480 LL2335r
+BS 481 485 LL2269r
+BS 486 490 LL2320r
+BS 491 492 LL2269r
+BS 493 494 LL2312r
+BS 495 499 LL2320r
+BS 500 503 LL2269r
+BS 504 516 LL2312r
+BS 517 517 LL2269r
+BS 518 518 LL2320r
+BS 519 529 LL2269r
+BS 530 538 LL2320r
+BS 539 545 LL2312r
+BS 546 557 LL2320r
+BS 558 558 LL2269r
+BS 559 561 LL2320r
+BS 562 565 LL2312r
+BS 566 566 LL2335r
+BS 567 568 LL2269r
+BS 569 569 LL2335r
+BS 570 571 LL2269r
+BS 572 574 LL2312r
+BS 575 589 LL2320r
+BS 590 593 LL2269r
+BS 594 595 LL2320r
+BS 596 596 LL2312r
+BS 597 601 LL2269r
+BS 602 602 LL2312r
+BS 603 604 LL2320r
+BS 605 613 LL2269r
+BS 614 617 LL2312r
+BS 618 621 LL2320r
+BS 622 627 LL2269r
+BS 628 637 LL2320r
+BS 638 639 LL2312r
+BS 640 640 LL2320r
+BS 641 641 LL2312r
+BS 642 646 LL2320r
+BS 647 648 LL2335r
+BS 649 651 LL2320r
+BS 652 653 LL2335r
+BS 654 654 LL2312r
+BS 655 661 LL2320r
+BS 662 669 LL2269r
+BS 670 673 LL2320r
+BS 674 676 LL2269r
+BS 677 678 LL2320r
+BS 679 680 LL2312r
+BS 681 681 LL2335r
+BS 682 682 LL2312r
+BS 683 686 LL2269r
+BS 687 687 LL2320r
+BS 688 688 LL2312r
+BS 689 690 LL2320r
+BS 691 691 LL2312r
+BS 692 692 LL2320r
+BS 693 702 LL2312r
+BS 703 703 LL2320r
+BS 704 704 LL2312r
+BS 705 705 LL2320r
+BS 706 707 LL2312r
+BS 708 709 LL2269r
+BS 710 716 LL2320r
+BS 717 718 LL2335r
+BS 719 728 LL2312r
+BS 729 731 LL2320r
+BS 732 732 LL2312r
+BS 733 741 LL2320r
+BS 742 742 LL2312r
+BS 743 744 LL2320r
+BS 745 745 LL2312r
+BS 746 747 LL2320r
+BS 748 753 LL2312r
+BS 754 754 LL2320r
+BS 755 758 LL2335r
+BS 759 759 LL2320r
+BS 760 763 LL2312r
+BS 764 764 LL2269r
+BS 765 765 LL2312r
+BS 766 766 LL2335r
+BS 767 767 LL2312r
+BS 768 768 LL2335r
+BS 769 769 LL2312r
+BS 770 770 LL2335r
+BS 771 771 LL2320r
+BS 772 774 LL2269r
+BS 775 777 LL2320r
+BS 778 778 LL2335r
+BS 779 784 LL2320r
+BS 785 787 LL2269r
+BS 788 790 LL2335r
+BS 791 792 LL2320r
+BS 793 797 LL2335r
+
+RD LL2269r 792 0 0
+natatgGGATCCGTGTGGAAGAACGATTACAACAGGTGTTGTCCTCTGAG
+GACATAAAATACACA*CCGAGATTCATCAACTCAT*TGCTGGAGTTAGCA
+TATCTACAATTGGGTGAAATGGGGAGCGATTTGCAGGCATTTGCTCGGCA
+TGCCGGTAGAGGTGTGGTCAATAAGAGCGACCTCATGCTATACCTGAGAA
+AGCAACCTGACCTACAGGAAAGAGTTACTCAAGAATAAGAATTTTCGTTT
+TAAAACCTAAGAGTCACTTTAAAATTTGTATACACTTATTTTTTTTATAA
+CTTATTTAATAATAAAAATCATAAATCATAAGAAATTCGCTTATTTAGAA
+GTGTCAACAACGTATCTACCAACGATTTGACCCTTTTCCATCTTTTCGTA
+AATTTCTGGCAAGGTAGACAAGCCGACAACCTTGATTGGAGACTTGACCA
+AACCTCTGGCGAAGAAGTCCAAAGCTCCACCGCGGTGGCGGCCGTTACTT
+ACTTAGAGCTCGACGTCTTACTTACTTAGCGGCCGCACTAGTAGATCTGA
+ATTCCCGGGGTCGACCCACCCTCTTTTTTTGGGTTTGGTGGGGTATCTTC
+ATCATCGAATAGATAGTTATATACATCATCCATTGTAGTGGTATTAAACA
+TCCCTGTAGTGATTCCAAACGCGTTATACGCAGTTTGGTCCGTCCAACCA
+GGTGACAGTGGTTTTGAATTATTACCATCATCAaTtntaCTAGCCGTGAT
+TTCATTATTCATGAAGTTATCATGAACGTTAGAGGAGGCAat
+
+QA 6 792 5 792
+DS CHROMAT_FILE: LL2269r PHD_FILE: LL2269r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:32 2000
+
+RD LL2335r 797 0 0
+catttGGGATCCGTGTGGAAG*ACGATTACAACAGGTGTTGTCCTCTGAG
+GACATAAAATACACA*CCGAGATTCATCAACTCatgtgCTGGAGTTAGCA
+TATCTACAATTGGGTGAAATGGGGAGCGATTTGCAGGCATTTGCTCGGCA
+TGCCGGTAGAGGTGTGGTCAATAAGAGCGACCTCATGCTATACCTGAGAA
+AGCAACCTGACCTACAGGAAAGAGTTACTCAAGAATAAGAATTTTCGTTT
+TAAAACCTAAGAGTCACTTTAAAATTTGTATACACTTATTTTTTTTATAA
+CTTATTTAATAATAAAAATCATAAATCATAAGAAATTCGCTTATTTAGAA
+GTGTCAACAACGTATCTACCAACGATTTGACCCTTTTCCATCTTTTCGTA
+AATTTCTGGCAAGGTAGACAAGCCGACAACCTTGATTGGAGACTTGACCA
+AACCTCTGGCGAAGAAGTCCAAAGCTCCACCGCGGTGGCGGCCGTTACTT
+ACTTAGAGCTCGACGTCTTACTTACTTAGCGGCCGCACTAGTAGATCTGA
+ATTCCCGGGGTCGACCCACCCTCTTTTTTTGGGTTTGGTGGGGTATCTTC
+ATCATCGAATAGATAGTTATATACATCATCCATTGTAGTGGTATTAAACA
+TCCCTGTAGTGATTCCAAACGCGTTATACGCAGTTTGGTCCGTCCAACCA
+GGTGACAGTGGTTTTGAATTATTACCATCATCAattctaCTAGCCGTGAT
+TTCATTATTCATGAAGTTATCATGAACGTTAGAGGAGGCAatTggtt
+
+QA 6 795 3 797
+DS CHROMAT_FILE: LL2335r PHD_FILE: LL2335r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:45 2000
+
+RD LL2312r 787 0 0
+cgttTGGGATCCGTGTGGAAG*ACGATTACAACAGGTGTTGTCCTCTGAG
+GACATAAAATACAcanccgAGATTCATCAACTCAT*TGCTGGAGTTAGCA
+TATCTACAATTGGGTGAAATGGGGAGCGATTTGCAGGCATTTGCTCGGCA
+TGCCGGTAGAGGTGTGGTCAATAAGAGCGACCTCATGCTATACCTGAGAA
+AGCAACCTGACCTACAGGAAAGAGTTACTCAAGAATAAGAATTTTCGTTT
+TAAAACCTAAGAGTCACTTTAAAATTTGTATACACTTATTTTTTTTATAA
+CTTATTTAATAATAAAAATCATAAATCATAAGAAATTCGCTTATTTAGAA
+GTGTCAACAACGTATCTACCAACGATTTGACCCTTTTCCATCTTTTCGTA
+AATTTCTGGCAAGGTAGACAAGCCGACAACCTTGATTGGAGACTTGACCA
+AACCTCTGGCGAAGAAGTCCAAAGCTCCACCGCGGTGGCGGCCGTTACTT
+ACTTAGAGCTCGACGTCTTACTTACTTAGCGGCCGCACTAGTAGATCTGA
+ATTCCCGGGGTCGACCCACCCTCTTTTTTTGGGTTTGGTGGGGTATCTTC
+ATCATCGAATAGATAGTTATATACATCATCCATTGTAGTGGTATTAAACA
+TCCCTGTAGTGATTCCAAACGCGTTATACGCAGTTTGGTCCGTCCAACCA
+GGTGACAGTGGTTTTGAATTATTACCATCATCAaTtntaCTAGCCGTGAT
+TTCATTATTCATGAAGTTATCATGAACGTTAGAGGag
+
+QA 6 785 1 787
+DS CHROMAT_FILE: LL2312r PHD_FILE: LL2312r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:11 2000
+
+RD LL2320r 791 0 0
+gtttGGGATCCGTGTGGAAG*ACGATTACAACAGGTGTTGTCCTCTGAGG
+ACATAAAATACACA*CCGAGATTCATCAACTCAT*TGCTGGAGTTAGCAT
+ATCTACAATTGGGTGAAATGGGGAGCGATTTGCAGGCATTTGCTCGGCAT
+GCCGGTAGAGGTGTGGTCAATAAGAGCGACCTCATGCTATACCTGAGAAA
+GCAACCTGACCTACAGGAAAGAGTTACTCAAGAATAAGAATTTTCGTTTT
+AAAACCTAAGAGTCACTTTAAAATTTGTATACACTTATTTTTTTTATAAC
+TTATTTAATAATAAAAATCATAAATCATAAGAAATTCGCTTATTTAGAAG
+TGTCAACAACGTATCTACCAACGATTTGACCCTTTTCCATCTTTTCGTAA
+ATTTCTGGCAAGGTAGACAAGCCGACAACCTTGATTGGAGACTTGACCAA
+ACCTCTGGCGAAGAAGTCCAAAGCTCCACCGCGGTGGCGGCCGTTACTTA
+CTTAGAGCTCGACGTCTTACTTACTTAGCGGCCGCACTAGTAGATCTGAA
+TTCCCGGGGTCGACCCACCCTCTTTTTTTGGGTTTGGTGGGGTATCTTCA
+TCATCGAATAGATAGTTATATACATCATCCATTGTAGTGGTATTAAACAT
+CCCTGTAGTGATTCCAAACGCGTTATACGCAGTTTGGTCCGTCCAACCAG
+GTGACAGTGGTTTTGAATTATTACCATCATCAATTTtACTAGCCGTGATT
+TCATTATTCATGAAGTTATCATGAACGTTAGAGGAGGCAat
+
+QA 4 791 1 791
+DS CHROMAT_FILE: LL2320r PHD_FILE: LL2320r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:12 2000
+
+CO Contig52 657 5 44 U
+tgaggaacggtagcttggaacagtacagcggggttaatgaagggtttcca
+cccTTCcTGtggtgcaggagccACATGAgaaggatgttgGggaAaGAAGA
+TaatgcAaATGAGAATGAaacAGAGAGAAAGAGATGAGAGTTTGGATGAC
+TAGTTCTAGATCGCGAGCGG*CCGCCCTTTTTTTTTTTTTTTTTTTTTAA
+CTTTGAAAAGCTTCATTTTGAAGGAAATAATGAACAAACTCACCATATTG
+GTCAGTTATTGACCTTATTACACGCAATTAGCAACATAAAAAGTTTTGGA
+TCTTATTGACGATCTTCTCTTCTTTGTTTTATGCCCTTTGCTCCACGTAT
+GACCCCGAGAGTCCTGAGGTTCCAAGAACGGTAACCTTTCCCCACTCTGA
+ACCGAGCCCGCTGCATCTCATCTTCAAGTGGATCATCTTTGTCCCCAACT
+CCATCTCAACCTTCTTCTTCAAGTAAACTTCTTCA*GGCCCAAACGCATC
+TAGGTAGGTAGTATACCTATGATAAGCAAGATGCAGCGCTTTGGTTATAT
+CTTGTGGTTGCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAggggcg
+ggccggt
+
+BQ
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 18 18 20 20 22 18 23 20 18 11 14 15 19 16 17 10 10 10 17 12 16 24 21 27 21 27 21 18 14 14 8 10 16 16 10 13 18 18 22 15 15 15 22 19 29 25 25 25 25
+ 25 18 19 17 17 12 20 15 29 29 40 40 40 32 29 25 21 21 10 12 12 31 31 48 40 40 40 27 25 25 37 40 40 40 40 40 34 32 32 32 32 25 25 25 29 29 40 40 40 40
+ 29 43 50 56 71 71 65 81 81 83 80 80 80 76 74 73 80 75 76 68 69 70 70 77 85 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 77 76 75 75 75
+ 81 78 74 77 77 90 85 81 81 75 75 87 81 84 84 77 81 70 70 70 71 72 90 88 78 81 76 76 76 71 72 75 81 75 86 81 84 77 77 70 70 75 74 88 87 90 90 84 90 76
+ 73 75 80 76 76 76 79 72 72 73 61 61 73 73 82 79 80 73 73 61 56 56 61 61 70 73 78 76 76 79 79 75 78 78 90 90 90 90 90 90 90 88 88 85 88 90 90 90 90 90
+ 80 78 80 83 82 82 72 75 75 75 78 81 80 90 90 90 90 90 83 89 89 90 90 90 90 90 75 75 75 84 84 81 82 76 80 90 76 75 75 75 81 84 78 78 73 73 75 76 78 80
+ 76 78 70 71 72 70 61 61 59 67 67 65 65 67 74 74 86 86 86 82 88 90 86 77 76 70 75 76 79 74 74 73 75 74 80 80 89 89 90 90 84 80 80 65 51 51 57 56 55 66
+ 60 66 58 55 55 60 58 60 60 51 51 53 59 59 50 57 57 70 70 64 61 61 67 58 64 64 62 62 66 77 82 75 57 56 56 41 41 41 41 50 65 65 71 79 63 51 58 58 53 55
+ 61 66 57 57 57 57 57 61 61 65 65 59 59 62 63 78 90 81 78 57 58 42 43 43 51 51 46 46 56 59 59 50 50 56 43 42 43 43 51 45 43 43 40 55 55 56 55 57 57 66
+ 66 59 59 60 69 73 59 59 55 60 68 72 62 61 60 55 59 56 73 73 73 67 66 68 71 75 66 71 60 63 56 61 54 54 53 54 52 53 55 58 65 52 52 62 56 52 37 42 43 36
+ 36 21 21 21 37 29 35 35 28 29 32 48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 44 44 56 47 47 56 44 56 44 44 44 44 47 47 47 56
+ 56 56 56 56 56 56 56 56 56 47 47 47 44 48 56 42 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 47 56 47 48 48 48 44 40 40 28 26 19 18 15 0 0 0 0 0
+ 0 0 0 0 0
+
+AF LL2297f C -232
+AF LL2296r C 1
+AF LL2332r U 93
+AF LL2297r U 93
+AF LL2259r U 95
+BS 1 150 LL2296r
+BS 151 153 LL2332r
+BS 154 160 LL2297r
+BS 161 163 LL2297f
+BS 164 164 LL2332r
+BS 165 175 LL2297r
+BS 176 182 LL2297f
+BS 183 195 LL2297r
+BS 196 203 LL2297f
+BS 204 208 LL2297r
+BS 209 210 LL2297f
+BS 211 212 LL2297r
+BS 213 216 LL2297f
+BS 217 217 LL2297r
+BS 218 218 LL2297f
+BS 219 221 LL2297r
+BS 222 222 LL2297f
+BS 223 225 LL2297r
+BS 226 231 LL2297f
+BS 232 236 LL2297r
+BS 237 240 LL2297f
+BS 241 245 LL2297r
+BS 246 248 LL2297f
+BS 249 250 LL2297r
+BS 251 253 LL2297f
+BS 254 254 LL2297r
+BS 255 260 LL2297f
+BS 261 265 LL2297r
+BS 266 268 LL2297f
+BS 269 276 LL2297r
+BS 277 277 LL2297f
+BS 278 285 LL2297r
+BS 286 287 LL2297f
+BS 288 309 LL2297r
+BS 310 313 LL2297f
+BS 314 332 LL2297r
+BS 333 333 LL2297f
+BS 334 341 LL2297r
+BS 342 342 LL2297f
+BS 343 391 LL2297r
+BS 392 394 LL2297f
+BS 395 467 LL2297r
+BS 468 468 LL2297f
+BS 469 657 LL2297r
+
+RD LL2297f 797 0 2
+tcagggccaggcggtgaagggcaatcagctgttgcccgtctcactggtga
+aaagaaaaaccaccctggcgcccaatacgcaaaccgcctctccccgcgcg
+ttggccgattcattaatgcagctggcacgacaggtttcccgactggaaag
+cgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggca
+ccccaggcxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxCCACGCGTCCGCTAGTTTTAGATCGCGAG
+CGG*CCGCCCTTTTTTTTTTTTTTTTTTTTTAACTTTGAAAAGCTTCATT
+TTGAAGGAAATAATGAACAAACTCACCATATTGGTCAGTTATTGACCTTA
+TTACACGCAATTAGCAACATAAAAAGTTTTGGATCTTATTGACGATCTTC
+TCTTCTTTGTTTTATGCCCTTTGCTCCACGTATGACCCCGAGAGTCCTGA
+GGTTCCAAGAACGGTAACCTTTCCCCACTCTGAACCGAGCCCGCTGCATC
+TCATCTTCAAGTGGATCATctatGTCCCCAACTCCATCTCAACCTTCTTC
+TTCAAGtgagcTTCTtcgcGgcccataCGCATCTAGGTAGGTAGTATACC
+TATGATAAGCAAGATGCAGCGCTTTGGTTATATctggtggtgtaaan
+
+QA 15 781 383 791
+DS CHROMAT_FILE: LL2297f PHD_FILE: LL2297f.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:44:37 2000
+
+RT{
+LL2297f chimera phrap 1 208 000919:094547
+}
+
+RT{
+LL2297f matchElsewhereHighQual phrap 1 208 000919:094547
+}
+
+RD LL2296r 799 0 0
+tgaggaacggtagcttggaacagtacagcggggttaatgaagggtttcca
+cccTTCcTGtggtgcaggagccACATGAgaaggatgttgGggaAaGAAGA
+TaatgcAaATGAGAATGAaacAGAGAGAAAGAGATGAGAGTTTGGATGAC
+TCGtTCTAGATCGCGAGcgggcCGCCCTTTTTTTTTTTTTTTtAaaCAAA
+ACCCAAGGcaaCTAGGAAAATGCAATCAAATAGTACATCGTATCTCATTG
+GTTTATTATTCCATCTCGGGAATTTTATTACAAAAACAAAAGAAAAACAA
+GATTACCTTAACTTAAAATCATCGTCAAGGCAATAAAAACCGAACAATAT
+TACAGAATCGAAGAAACAACAACATAAACACACAAAACATATTTGCTTAT
+AAATATTATTGATTACGAAGAAATTATACGACATGTCTCCCGCAGAGGCT
+TTTCTCACCTTTAGGCGTCGGACGTGGAAGGAGCT*GTTGCATCGTCAGT
+CGTGGGAGGAGCTGTTGCATCGTCGGACGCAGGAGGAGCTGTGTCAGCGT
+CGGCTGTGGCATCCTCGGAGGCTGATTTATCCTTTGATGAGTTAAGTTTG
+TTGAACAATGGCTTAAACACGAACTCTACCGCCAACCATGCGACAGCTAA
+AGCTCCGACAACAACTGTCGTTGTTTGTTTCTTGCCTGAAGATTTCTCCA
+GTTTTGGAAAATGAATTTCtggtactttatccattgcttcttccggacgc
+gtggxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaat
+
+QA 52 719 1 193
+DS CHROMAT_FILE: LL2296r PHD_FILE: LL2296r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:11 2000
+
+RD LL2332r 790 0 0
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxCCACGC
+GTCCGGACTAGTTCTAGATCGCGAGCGG*CCgcgcTTTTTTTTTTTTTTT
+CGAATCTTAGCGACAAAGGGCTGAATCTCAGTGGATCGTGGCAGCAAGGC
+CACTCTGCCACTTACAATACCCCGTCGCGTATTTAAGTCGTCTGCAAAGG
+ATTCTACCCGCCACTCGGTGGTAATTATAATTCAAGGCGGTCCGAACGGC
+GCTTCCACCGAACGGACTTAGCCAACGACACGTGCCTTTGGGAGCCGAAG
+CTCCTACTGAGGGTCGGCAATCGGGCGGCGGGCGCATGCGTCGCTTCTAG
+CCCGGATTCTGACTTAGAGGCGTTCAGTCATAATCCAGCGCAC*GGTAGC
+TTCGCGCCACTGGCTTTTCAACCAAGCGCGATGACCAATTGTGCGAATCA
+ACGGTTCCTCTCGTACTAGGTTGAATTACTATTGCGACGCGGGCATCAGT
+AGGGTAAAACTAACCTGTCTCACGACGGTCTAAACCCAGCTCACGTTCCC
+TATTGgtGGGTGAACAATCCAACACTTGgTGAATTCTGCTTCACAATGAT
+AGGAAGAGCCGACATCGAAGGATCAAAAAGCAACGTCGCtaTGAACGCtt
+GGCTGCCACaaGCCAgttatcCCTgtggtAACTTTTCTGACAcCtCTAGC
+TTCAAATTCCGaaggTCTaAAGGATCGATAggccacgctttCAcggttCG
+TattcgtacTGaaaatcagaatcaaacGAGCtttTACcct
+
+QA 45 790 56 100
+DS CHROMAT_FILE: LL2332r PHD_FILE: LL2332r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:31 2000
+
+RD LL2297r 800 0 4
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxcCAC
+GCGTCCGCTAGTTCTAGATCGCGAGCGG*CCGCCCTTTTTTTTTTTTTTT
+TTTTTTAACTTTGAAAAGCTTCATTTTGAAGGAAATAATGAACAAACTCA
+CCATATTGGTCAGTTATTGACCTTATTACACGCAATTAGCAACATAAAAA
+GTTTTGGATCTTATTGACGATCTTCTCTTCTTTGTTTTATGCCCTTTGCT
+CCACGTATGACCCCGAGAGTCCTGAGGTTCCAAGAACGGTAACCTTTCCC
+CACTCTGAACCGAGCCCGCTGCATCTCATCTTCAAGTGGATCATCTTTGT
+CCCCAACTCCATCTCAACCTTCTTCTTCAAGTAAACTTCTTCA*GGCCCA
+AACGCATCTAGGTAGGTAGTATACCTATGATAAGCAAGATGCAGCGCTTT
+GGTTATATCTTGTGGTTGCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+AAggggcgggccggtxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxaaaaaccctggcgttacccaacttaatcgc
+cttgaagaacatccccctttcgccagttggcgtaatagagaagaggcccg
+acccgatcgcccttcccaacagttggcgcaactgaatgggaaatggcccc
+
+QA 47 555 58 565
+DS CHROMAT_FILE: LL2297r PHD_FILE: LL2297r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:15 2000
+
+RT{
+LL2297r chimera phrap 671 796 000919:094547
+}
+
+RT{
+LL2297r matchElsewhereHighQual phrap 753 775 000919:094547
+}
+
+RT{
+LL2297r matchElsewhereHighQual phrap 709 738 000919:094547
+}
+
+RT{
+LL2297r matchElsewhereHighQual phrap 672 704 000919:094547
+}
+
+RD LL2259r 799 0 0
+natcttxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxagctcgacccatg
+cGTCCGTAGTTCTAGATCGCGAGCGG*CCGCCcTTTTTTTTTTTTTtttg
+atcCAACAAATCAACTTGATCAGATCATTATAACAGTACAACCCTTAAAC
+AAGTGAGTTGCTACAACTGTTGGTAATAAGAATTACAGTTTCTTGTCTAA
+GAAAATGGCAATGTAAGGGAAGTAAGGACACTCGTATTCCCATCCGGTTC
+CAGAGTAACGAACGCAATGCAACCAAAGAGCCTCATCATGCTCCTCGTct
+TGTgggAACCagtgACAGGGAGAACTCATCAAATAAgagcgcatctAcaa
+TATACAGGGcatcCCATagCGAATGACCCgctTtatctaAC*TCAACACG
+gggatGATCTTgagaccaagattgtcagaagaatcgatgcacgtgagaga
+aagctcgccttcccagacgactatctgcattccagcgaagaccngagcat
+cgatccaatgcccacaacactgccctaagtctgctccccccagcggtctg
+acggaccatgacgaggccctatcggccgtattgtgtttcgtccgccagca
+ccccggatggcgcctccgcggcgcccccacctcccccgacaccgccatcc
+gccgcagccacggcaaccggcatcctcccaacagccgcgggcgtgagcga
+cgcaccccggagatcgggctccccncgccacccagatccgaccaagtttg
+gccccctcgtctgtcacgcattccatttgtgtctcgcgttctcttttct
+
+QA 50 412 57 99
+DS CHROMAT_FILE: LL2259r PHD_FILE: LL2259r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:22 2000
+
+CO Contig53 615 7 124 U
+AAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTC
+GCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACA
+GTTGCGCAGCCTGAATGGCGAATGGGACGCGCCCTGTAGCGGCGCATTAA
+GCGCGGCGGGTGTGGTGGTTACGCGCAGCGTGACCGCTACACTTGCCAGC
+GCCCTAGCGCCCGCTCCTTTCGCTTTCTTCCCTTCCTTTCTCGCCACGTT
+CGCCGGCTTTCCCCGTCAAGCTCTAAATCGGGGGCTCCCTTTAGGGTTCC
+GATTTAGTGCTTTACGGCACCTCGACCCCAAAAAACTTGATTAGGGTGAT
+GGTTCACGTAGTGGGCCATCGCCCTGATAGACGGTTTTTCGCCCTTTGAC
+GTTGGAGTCCACGTTCTTAATAGTGGACTCTTGTTCCAAACTGGAACAAC
+ACTCAACCCTATCTCGGTCTATTCTTTTGATTTATAAGGGATTTTGCCGA
+TTTCGGCCTATTGgttaaAAAATGAGCTGATTTAACAAAAATTTAACGCG
+AATTTTAACaaAaTATTAACGCTTACAATTTaggtGGCACTTTTCgggga
+AATGTGcGCGgaacc
+
+BQ
+ 56 56 51 56 51 61 61 55 53 53 53 53 45 45 55 55 55 55 55 55 53 53 53 53 56 55 55 51 51 51 55 55 60 60 53 56 61 61 61 61 66 66 66 66 66 66 66 61 55 53
+ 55 53 53 50 53 55 55 55 53 53 53 53 53 61 61 53 53 53 53 53 53 56 61 61 61 55 46 49 46 46 46 53 53 53 55 53 53 53 53 53 53 58 55 55 61 66 66 66 61 61
+ 61 55 53 53 53 53 53 53 53 53 53 53 66 66 66 61 53 53 53 53 53 53 66 66 61 53 53 53 53 53 53 53 53 53 66 61 61 53 53 53 53 53 53 53 53 53 55 61 61 53
+ 53 53 53 53 53 53 53 53 53 56 66 66 66 66 66 66 66 61 61 53 53 46 46 46 46 46 47 53 53 53 53 53 55 55 53 53 53 53 53 53 53 53 53 53 53 55 55 55 55 55
+ 55 55 55 55 61 55 53 53 53 53 53 53 53 53 53 53 55 66 66 61 53 53 53 53 53 61 66 66 66 66 66 66 66 66 66 66 66 66 66 66 61 55 55 55 55 55 55 55 55 53
+ 53 53 53 53 53 55 55 53 53 53 61 61 61 66 66 66 66 66 55 55 66 61 61 61 66 66 61 66 66 66 66 61 55 55 55 55 55 55 61 61 55 55 55 55 55 55 55 55 55 55
+ 55 61 55 61 61 61 61 50 50 55 55 50 50 55 55 55 55 55 55 55 55 55 50 50 50 50 50 50 61 66 66 66 61 55 55 55 55 55 61 55 55 55 55 55 55 61 61 66 66 66
+ 61 61 61 61 55 55 55 55 55 61 66 66 61 55 55 55 50 50 47 49 47 47 47 50 66 66 66 66 61 55 50 50 50 50 52 54 58 61 50 50 47 47 47 50 66 66 56 50 50 50
+ 43 43 50 54 58 50 50 44 36 34 35 37 37 42 42 42 42 42 40 38 37 42 42 42 44 44 44 47 46 46 44 58 50 39 35 32 32 42 42 42 42 42 42 39 42 42 44 39 42 44
+ 50 43 42 42 35 35 42 42 43 58 58 58 58 58 58 66 66 50 50 46 46 47 47 61 56 66 66 56 50 50 44 39 39 39 50 37 39 35 35 38 42 42 44 39 38 29 31 31 36 39
+ 39 35 42 37 32 29 31 35 37 39 50 39 32 15 15 15 15 15 37 58 58 50 58 50 44 43 42 42 40 42 39 39 35 35 35 35 39 39 42 42 58 35 35 35 32 32 35 39 39 39
+ 35 35 44 44 39 39 39 28 26 15 15 25 15 33 37 37 33 28 28 33 42 39 34 32 35 36 27 28 37 25 24 18 15 15 17 25 30 34 39 39 35 36 32 29 20 10 10 10 17 10
+ 25 24 25 25 24 25 17 25 25 24 18 18 18 18 18
+
+AF LL2257r U -747
+AF LL2348r U -680
+AF LL2306r U -518
+AF LL2303r U -433
+AF LL2318r U -175
+AF LL2265r U -174
+AF LL2316r U -174
+BS 1 1 LL2316r
+BS 2 2 LL2306r
+BS 3 3 LL2316r
+BS 4 4 LL2303r
+BS 5 8 LL2306r
+BS 9 12 LL2265r
+BS 13 14 LL2316r
+BS 15 20 LL2306r
+BS 21 24 LL2316r
+BS 25 26 LL2303r
+BS 27 27 LL2306r
+BS 28 30 LL2303r
+BS 31 32 LL2306r
+BS 33 34 LL2303r
+BS 35 35 LL2316r
+BS 36 36 LL2306r
+BS 37 40 LL2316r
+BS 41 41 LL2303r
+BS 42 43 LL2316r
+BS 44 44 LL2265r
+BS 45 46 LL2316r
+BS 47 47 LL2265r
+BS 48 48 LL2316r
+BS 49 49 LL2303r
+BS 50 50 LL2316r
+BS 51 51 LL2303r
+BS 52 53 LL2316r
+BS 54 54 LL2303r
+BS 55 55 LL2316r
+BS 56 58 LL2306r
+BS 59 63 LL2316r
+BS 64 65 LL2306r
+BS 66 71 LL2316r
+BS 72 72 LL2306r
+BS 73 75 LL2265r
+BS 76 76 LL2303r
+BS 77 77 LL2316r
+BS 78 78 LL2306r
+BS 79 81 LL2316r
+BS 82 82 LL2265r
+BS 83 91 LL2316r
+BS 92 92 LL2306r
+BS 93 95 LL2316r
+BS 96 98 LL2303r
+BS 99 101 LL2265r
+BS 102 112 LL2316r
+BS 113 113 LL2303r
+BS 114 122 LL2316r
+BS 123 123 LL2265r
+BS 124 124 LL2316r
+BS 125 125 LL2306r
+BS 126 133 LL2265r
+BS 134 134 LL2316r
+BS 135 137 LL2265r
+BS 138 167 LL2316r
+BS 168 169 LL2265r
+BS 170 176 LL2316r
+BS 177 177 LL2303r
+BS 178 197 LL2316r
+BS 198 203 LL2265r
+BS 204 239 LL2316r
+BS 240 249 LL2265r
+BS 250 264 LL2316r
+BS 265 268 LL2265r
+BS 269 277 LL2316r
+BS 278 278 LL2265r
+BS 279 308 LL2316r
+BS 309 311 LL2265r
+BS 312 351 LL2316r
+BS 352 360 LL2265r
+BS 361 363 LL2316r
+BS 364 369 LL2265r
+BS 370 370 LL2316r
+BS 371 373 LL2265r
+BS 374 385 LL2316r
+BS 386 394 LL2265r
+BS 395 404 LL2316r
+BS 405 405 LL2265r
+BS 406 406 LL2316r
+BS 407 409 LL2265r
+BS 410 418 LL2316r
+BS 419 420 LL2318r
+BS 421 443 LL2316r
+BS 444 444 LL2318r
+BS 445 467 LL2316r
+BS 468 469 LL2265r
+BS 470 484 LL2316r
+BS 485 485 LL2265r
+BS 486 493 LL2316r
+BS 494 495 LL2265r
+BS 496 502 LL2316r
+BS 503 506 LL2265r
+BS 507 508 LL2316r
+BS 509 509 LL2265r
+BS 510 520 LL2316r
+BS 521 522 LL2265r
+BS 523 524 LL2316r
+BS 525 526 LL2265r
+BS 527 528 LL2316r
+BS 529 532 LL2265r
+BS 533 557 LL2316r
+BS 558 558 LL2265r
+BS 559 559 LL2316r
+BS 560 564 LL2265r
+BS 565 571 LL2316r
+BS 572 573 LL2265r
+BS 574 575 LL2316r
+BS 576 576 LL2265r
+BS 577 579 LL2316r
+BS 580 580 LL2265r
+BS 581 582 LL2316r
+BS 583 584 LL2318r
+BS 585 585 LL2316r
+BS 586 592 LL2265r
+BS 593 595 LL2316r
+BS 596 596 LL2265r
+BS 597 600 LL2316r
+BS 601 602 LL2318r
+BS 603 603 LL2265r
+BS 604 604 LL2318r
+BS 605 606 LL2265r
+BS 607 607 LL2316r
+BS 608 610 LL2318r
+BS 611 615 LL2316r
+
+RD LL2257r 788 0 1
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccggtggtgcctacatcgttaggcaagCGGCTAAGAGTATTGTTGCC
+AGTGGGCTAGCGAGGCGTTGCATTGTGCAGGTCTCGTACGCTATTGGTGT
+CCCTGAGCCGTTGTCTGTGTTTGTGGACAGCTACGGAACTGGGAAGATAC
+CAGACAAGGAGATTCTTGAGATTGTGAAGGAGAGTTTTGACTTCAGGCCG
+GGGATGATTTCGATTAACTTGGATTTGAAGAGGGGTGGTAATGGTAGGTT
+CTTGAAGACTGCTGCTTATGGTCATTTCGGAAGGGACGATGCTGACTTCA
+CCTGGGAGGTTGTGAAGCCACTCAAGTCTAACAAGGTCCAAGCTTGAAGA
+AAACTTGAACTAGTTTAGTTCTCTCAGGTCTTGCCTCTGTTTCAACTAAA
+GAGATGGGTCTATTCTTTTTCCTTTTTCAATATAAGCAGTTGAGAATTTA
+TATTATTATTATTTATTATCATATGTTATTGTTGTGGTAAGAAAGTTAAA
+AGTCATATTTTGTATTTCTTGTTTCTTTTCAGAAGAGAATTTATATAAAA
+GATTATATTCAATATTAAAAAAAAAAAAAAAAGxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaa
+AACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCAca
+
+QA 81 775 749 788
+DS CHROMAT_FILE: LL2257r PHD_FILE: LL2257r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:13 2000
+
+RT{
+LL2257r matchElsewhereHighQual phrap 749 788 000919:094547
+}
+
+RD LL2348r 793 0 1
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgcaactgtcttggtgactcagaacaAAAATCTGAACAGACCTTCT
+GAGTATCTTCTTGATGGGAAAGTAACAGACAAGAATGATGTCTACTCTTT
+TGGGGTGATTCTCCTAGAACTTCTCCTGGGGAAGAGATCAGTGGAGAAAC
+CATCAACTGAACCAGAATCCGTTGTCACTTGGGCTGTACCAAAGCTGAGT
+GACAGAGCTAATCTGCCAAACATATTGGATCCTGCAATCAAAGGAACCAT
+GGATTTGAAGCATCTTTATCAGGTAGCAGCGGTTGCGGTGTTGTGTGTGC
+AGCCAGAGCCAAGTTACAGACCACTTATAACCGATGTTTTGCACTCACTC
+ATCCCTCTTCTACCACTAGAACTCGGCGGATCATTGCGAATTTTATAGAA
+ACACATTTTCATTTCCTTTATTGTAATCTTTTCATTTTGAATTGAATGTT
+GTTGGTGATCTTGTGCAAAATTCCACTTTTTATCTGTTTTTTTTTGGTAA
+AAAAAAAAAAAAAAAGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAAAACCCTGGCGTTACCCA
+ACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCG
+AAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCct
+
+QA 81 793 682 793
+DS CHROMAT_FILE: LL2348r PHD_FILE: LL2348r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:31 2000
+
+RT{
+LL2348r matchElsewhereHighQual phrap 682 793 000919:094547
+}
+
+RD LL2306r 814 0 2
+agggtaggttttggacngcctttcgtgactgccgcaccgtttccgggttc
+cacggtcgacccacgcgtccgctcactctcACTACCTTCTCTTCACCCgc
+aaATGGCGACCGCTCCTCTCTCCGGCTTCTTCCTCACCTCTCTTTCTCCT
+TCTCAGCCTTCTCTCCAAAAACAGACTCTTCGTTCTTCTCCCACCGTGGC
+TTGCCTTCCCTCATCCTCCTCTTCCTCCTCCTCCTCCTCCTCCTCTCGTT
+CCGTTCCAACACTTATCCGTAACGAGCCCGTTTTTGCCGCTCCTGCTCCT
+ATCATCACCCCTTACTGGAGCGAAGAGATGGGTAGCGAAGCATACGAAGA
+GGCCATTGAAGCTCTCAAGAAGCTTATTATCGAGAAGGAGGAGCTAAAGA
+CTGTTxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxAAAACCCTGGCGTTACCCAACTTAATCGCCT
+TGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCA
+CCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGGACGCG
+CCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGTGGTTACGCGCAGCGT
+GACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTCCTTTCGctntCTTCC
+CTTCCTTTCTCGCCACGTTCGCCGGCTTTCCCCGTCAAGCTCTAAATCgg
+nggCTCCCTTTagg
+
+QA 81 742 520 814
+DS CHROMAT_FILE: LL2306r PHD_FILE: LL2306r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:20 2000
+
+RT{
+LL2306r matchElsewhereHighQual phrap 745 800 000919:094547
+}
+
+RT{
+LL2306r matchElsewhereHighQual phrap 520 743 000919:094547
+}
+
+RD LL2303r 793 0 1
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxccacg
+cgtccgtagatcgcgaccggccgcccttttttttttttttaatatttgct
+cttttctctcatcttcttttagttatctcttcgtttacctcttcctcctt
+tatttaatgtaactttgctttcaatatcttatattaatgttctcagtgac
+ataaaaaatatattgtatgcatatgattatgcatgtacatgattgaaaaa
+acttaatggcttaagtgtttaaattgtaataactaaaaatatattttaat
+caaaaaaaaaaaaaaaaagxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxAAAACCCTGGCGTTAC
+CCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATA
+GCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAAT
+GGCGAATGGGACGCGCCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGT
+GGTTACGCGCAGCGTGACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTC
+CTTTCGCTTTCTTCCCTTCCTTTCTCGCCACGTTCGCCGGCTTTCCCCGT
+CAAGCTCTAAATCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACG
+GCACCTCGACCCCAAAAAACTTGATTAGGGTGATGGTTCACgt
+
+QA 81 777 435 793
+DS CHROMAT_FILE: LL2303r PHD_FILE: LL2303r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:43 2000
+
+RT{
+LL2303r matchElsewhereHighQual phrap 435 730 000919:094547
+}
+
+RD LL2318r 788 0 4
+aatxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxAAAACCctgtcgTTACCCAACTTA
+ATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAG
+GCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATG
+GGACGCGCCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGtggctACGC
+GCAGCGTGACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTCCTTTCGCT
+TTCTTCCCTTCCTTTCTCGCCAcggtCGCCGGCTTTCCCCgccAAGCTCT
+AAATCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACGggaCCTCG
+ACCCCAAAAAACTTGATTAGGGTGATGGTTCACGTAGtggtccATCGCCC
+TGATAGACGGTTTTTCGCCCTTTGACGTTGGAGTCCACGTTCTTAATAGT
+GGACTCTTGTTCCAAACTGGAACAACACTCAACCCTATCTcgtgctATTC
+TTTTGATTTATAAGGGATTTTGCCGatctcggactATTGgtataaAAAtG
+AGCTGATTTAAcagatatTTAACGCGAattgtaacaacatATTAACGCTT
+ACAATTTaggtggCActgttcgtggaAATGtgcGCGga
+
+QA 177 671 177 788
+DS CHROMAT_FILE: LL2318r PHD_FILE: LL2318r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:39 2000
+
+RT{
+LL2318r matchElsewhereLowQual phrap 443 472 000919:094547
+}
+
+RT{
+LL2318r matchElsewhereLowQual phrap 426 441 000919:094547
+}
+
+RT{
+LL2318r matchElsewhereHighQual phrap 350 424 000919:094547
+}
+
+RT{
+LL2318r matchElsewhereHighQual phrap 187 344 000919:094547
+}
+
+RD LL2265r 783 0 1
+atxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxAAAACCCTGGCGTTACCCAACTTAA
+TCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGG
+CCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGG
+GACGCGCCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGTGGTTACGCG
+CAGCGTGACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTCCTTTCGCTT
+TCTTCCCTTCCTTTCTCGCCACGTTCGCCGGCTTTCCCCGTCAAGCTCTA
+AATCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACGGCACCTCGA
+CCCCAAAAAACTTGATTAGGGTGATGGTTCACGTAGTGGGCCATCGCCCT
+GATAGACGGTTTTTCGCCCTTTGACGTTGGAGTCCACGTTCTTAATAGTG
+GACTCTTGTTCCAAACTGGAACAACACTCAACCCTATCTCGGTCTATTCT
+TTTGATTTATAAGGGATTTTGCCGATTTCGGCCTATTGgttaaAAAATGA
+GCTGATTTAACAAAAATTTAACGCGAATTTTAAcaaAaTATTAACGCTTA
+CAATTtangtGGCACTTTTcggngaAATGTGcg
+
+QA 176 754 176 783
+DS CHROMAT_FILE: LL2265r PHD_FILE: LL2265r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:14 2000
+
+RT{
+LL2265r matchElsewhereHighQual phrap 176 471 000919:094547
+}
+
+RD LL2316r 790 0 1
+attxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxxxxAAAACCCTGGCGTTACCCAACTTAA
+TCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGG
+CCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGG
+GACGCGCCCTGTAGCGGCGCATTAAGCGCGGCGGGTGTGGTGGTTACGCG
+CAGCGTGACCGCTACACTTGCCAGCGCCCTAGCGCCCGCTCCTTTCGCTT
+TCTTCCCTTCCTTTCTCGCCACGTTCGCCGGCTTTCCCCGTCAAGCTCTA
+AATCGGGGGCTCCCTTTAGGGTTCCGATTTAGTGCTTTACGGCACCTCGA
+CCCCAAAAAACTTGATTAGGGTGATGGTTCACGTAGTGGGCCATCGCCCT
+GATAGACGGTTTTTCGCCCTTTGACGTTGGAGTCCACGTTCTTAATAGTG
+GACTCTTGTTCCAAACTGGAACAACACTCAACCCTATCTCGGTCTATTCT
+TTTGATTTATAAGGGATTTTGCCGATTTCGGCCTATTGgttaaAAAATGA
+GCTGATTTAACAAAAATTTAACGCGAATTTTAaCaaaaTATTAACGCTTA
+CAATTTaggtggcACTTTTCggggaaaTGtgcgcggaacc
+
+QA 176 790 176 790
+DS CHROMAT_FILE: LL2316r PHD_FILE: LL2316r.phd.1 CHEM: unknown DYE: unknown TIME: Tue Sep 19 09:45:30 2000
+
+RT{
+LL2316r matchElsewhereHighQual phrap 176 471 000919:094547
+}
+
+
+WA{
+phrap_params phrap 000919:094547
+/usr/local/genome/bin/phrap Run_SRC3700_2000-07-06_44+45.fasta.screen -new_ace -view 
+phrap version 0.990329
+}
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/acefile.ace.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/acefile.singlets
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/acefile.singlets	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/acefile.singlets	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1291 @@
+>RL767F  CHROMAT_FILE: RL767F PHD_FILE: RL767F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:27 2001
+CCTANTGAGATACTTAACCTCAATGTGTCTTCTTTCCTACACATGGCATC
+CTTATTAATGAAAAACAGAGAAAACATCAAATCAAATAGTAGCTTACACT
+TGGTGAAAACTGAGAAAGCCCATGCTCGGAGAGATCACTTTAGAAAAGGT
+GTCATGGTTCAGCAAAAGCATGAATAAGCCAAGTGGCTCTTCTTTGAGAT
+GTAAGGAGAGATGAAATCTGTACCATGGGAAGAAGTCTCAGAGATCTCTG
+CTGGAACTAGCACTTGAGGCTTACCCAAAGCTATCCTTTTCCCAACTGTG
+TTTGCCTAGAACAAACCAGATATGGCTGGAACAAACACATCGCTTTTTGC
+ACTTATGTACAAGTCAATAACATTTTCATACTCAGAACTCTCTGATTCAA
+GGTACTCTGATCCCTTACTTGCTGGCATTACTGCCTCCTTTGTAAACGCT
+CCTGGCAACACATCCTTAAGGATATTGAGGCTACTGTCCCATCTACGCTG
+AGCCCCACACATGGCTGTGTCCCTCACCCCCATCCCGCTTCCTCCACAAC
+ACACCATCCCCTTGTGCCTTGCAACAAGCTCTGTCCCCTCCCACACCCGT
+TCGAACGCCCCCATCTTTTCTCCAGTGCTTCGATCCTACCGTCCCCAGCT
+ATCCAACGGACACCCGCACACCCGTCCCGGCCCCTACCCCTCTTCGCGGT
+CTCCACCACCCGCCCCATCTCACTCTCCACACCCAGGCACCCCCACCCGC
+CCCCCGCCCCTGGTC
+>RL767R  CHROMAT_FILE: RL767R PHD_FILE: RL767R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:52 2001
+ACTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXATACCTTGTCTCTATCTGCCCCCACACCACACTCTCTTTGTTGG
+ACTTTCCTTTTCTTTTTCTTTTCCTTTTCCTTCCTTCCTTCTCTCTGTTT
+CAACTCTGTTTTCACAGATCCAGTTTTGTAAGAAAGTACTAGAAATGGGT
+GTGGATTTGAGGCAAGTGGTTGCTGGTATTCTCACCATCACCATGTTTGT
+GATGCTCGGACAGATGCTTCATCGAGATTACTATGATTCTCTTCAGGAGA
+AAGCTCAGGGAGATGCACATGATATCGAATTCGAAGGATCAAGAGTATCT
+GTGAAAGATAGTCTTGTTGGAGCGTTAGAAGGAAATAAAGGACCTTGGAT
+GGATGATAACAATGACCTTAATCCTTGCTGGCCAACATTACTATCCGATG
+AAGCGGTATCATCAAAAGGGTATGTTACATTCTCTCTAACGAATGGTCCT
+GAGTACCATATCTCTCAGATCACTGATGCTGTAATGGTGGCAAAGCATCT
+TGGAGCAACACTAGTGCTTCCTGATATAAGAGGAAGCAAACCTGGTGATG
+AAAGGAACTTTGAAGACATTTATGATGCTGATAAACTAATCAAAAGCTTG
+GACAATGTCATCAAAGTTGTCAAACAATTGCCTGAAGAAGTATCTCTAAG
+AGACATCGCCATTGTTAAAGTCCCTACAAGAGTTACAGAAGACTACATCA
+TAGAACACATTGGACCCATCTTCAAGTCCAAAGGAAACATTCGAGTAGCT
+ACATACTTCCCTTCTGTCAACCTGAGGAAATCCTCACACGACGGCGAATC
+CGATCCTGTGGCTTGTTTGGCATGTTTGGTCCTTGGAGTGCACCT
+>RL769F  CHROMAT_FILE: RL769F PHD_FILE: RL769F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:33 2001
+GCGGGGCGGGGCCCAGTAAGGNAGGCGGAGGGAAAAACAAAGAAAAAAGT
+AAAAGGGGGGGGCTGGNGGCAGACTNGGAGGACGGGCCTAGAGGGGGGAG
+GACAGACAGAGGCGCGGTGCGGCAAGAGAAGAACAAAGAAAAAACAAACA
+GCAGCAACGGAGGGCAAAGCCCGCGCGCCGGCGAGCCCATAAAGACGCGA
+TGAAGGACGACCGCCGTAGGAGTGGGGACCGAGGGAACCAAGAGAAAAAA
+CCGAGACGAGGCGCCTGACGCCGGGCGGCGCGCCGAAACCGCCCCGCCGC
+ACCCGACGAGACACAGCAGCCCNACATGCGGCAAGCGCGGAGCGCCAGCG
+CCCACCAAAGGAGGACCGCGAGACCGGCGGGGGCGGAGGGAGGAGCAACG
+AAGCACAAAAACAGACCAAAGAAATAAGTCGCCACGCGCGGGGCGTGGCG
+TAGTGAGGAAAGCAGGGGACGAGAGCAAGGAGCAGACCGAGCGAGCGCGC
+ACAGCGCGCACAGACCGCNCAAGCAGAACACATAGAACGACCCGTGCCNC
+GCGCACGACCCGCGAGCCCAGGACACGCCCAGACGCAACCACACGCCGGA
+ACGGAGCGGAACCCTGCGCCCACGGACCGGGGGCAACACGAGAGGGAAGG
+AACGCGGAAAAAACATGCACCAACAAGACCTCGACACCGACGAGCGTGCA
+AGAGGATCGGCGCGCGCGCGAGACCCCGAAGAAGAGAAACGCGCACACGT
+AGACCCGGACGACAACGAGCGAGACNCAGAGCGAACAGCGACAACGACGA
+CACACCAACGCAAAACAACGGATGACGGACCGCGCAACACGCGAACGAGA
+GCGGAAAGCGAACGCGAGCGAGAGACGANGAGACCAGTAACCGGAAAGAA
+GGACGCCATACGGCGGAACGCAGCGAACGCGCACGCCGCGACACGAAATC
+GACACATTCGCCGCGACCGAAGGAACCGTACGCCCGCAGAACGACGACAG
+CNAAGACACCAGCACGCACAGCNGCCCGAGCNGTAAGCAAGTCGACGGCA
+GGAGCAGTGCGAGCGCGGCGACGGCGAGAAGACG
+>RL769R  CHROMAT_FILE: RL769R PHD_FILE: RL769R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:59 2001
+CGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTTCTGGTATAATTCAAGGGCTTAAGTTCAACTCCAACAAGAAGT
+CTTCTGATGTTATTGGATACAATGATGGTACTCCATTTACCCTCCAAGTT
+CAAGACAAGAAGATAATTGGCTTTCATGGCTTCGCCGGAGACAATCTTAA
+TTCTCTTGGAGCTTACTTTTCTCCATTAATAGCTGCCCCTCCTTCAGTTC
+CCCCAAAGAAGCTTGAAGCTAAGGGTGGTGTGTCTGGAGCTGAGTGGGAC
+GATGGTGCTCACGACAATGTTAAAAAGGTATCTGTAGGACAGGGCGAAGA
+TGGTGTAGCAGCTGTCAAGTTTGAATACACAAATGGTTCGCAAGTGGTTA
+TTGGAGCTGAACGTGGGACACCAACATTGCTTGGATACGAAGAGTTTGAG
+CTTGAATCAGATGAATACATAACCATCGTGGAAGGCACCTACGACAAAAT
+CTTAGGGAGTGATGGCCTGACGATGCTCACTTTTAAGACTAACAAGAGCA
+GAACATATGGGCCGTATGGTCTCGAAGGTAGCACACACTTTGATCTCAAG
+GAGGAAGGTCACAAGATTACAGGGTTCCATGGACGAGCTGGCGCGACTAT
+TAGTGCTATTGGAGTTTACTTAGCTCCAGTAGGCACCATCCCCTTGACTC
+CTGCACAACCAACCAAGAAGCTAGAAGCTAAGGGTGGTGAGGGAGGAACG
+TCATGGGATGATGGTGCTTTCGACGGTGTACAAAAAGTGTCTGTAGGACA
+AGCCCAAGATGGTATATCAGCGGTTAAGTTTGTGTACAACANAGGTTCTT
+CTNGAATCATAGGAGATGAACATGGGAAGAGTACTCTACTTGGATTCGN
+>RL770F  CHROMAT_FILE: RL770F PHD_FILE: RL770F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:36 2001
+ACGGTACACATTCTTCTATCAGATACATTAATCCACATTAATCCAGATCT
+CGCAACACAAACCATGACTATATTTTTTTTGTCTGCATCCTGTCTAGGAC
+CATACAAGATAGGCACCAATGCCTTAAGAGACATAAACATATGGCCCTCG
+GTGAACCCCTCTGTTCAACACCAAAATTGAAAGCTATCCTTTTGCATGGG
+TTCCGAAATTTAAAGCGTGTATGCTTGTGGACTGGGACAAACATCAGATT
+AAGCTTGCCTAGTTCTTTGCTCTATGAACTTGAGTCAATGGGAACAGATT
+TTGCCGGAGAATAACCAAAGCTGTTGCTGCTACTGCTTTCTCCATTGGTT
+GTAACATCGGATTGAGCTGAAGCTGAAGAGTTCATAAAACTCTGGTTGGC
+ATTTGGGGATGGGGCTTGACCTTCTTTACGGAGTTTCTTAAGGAGGTTCA
+TGACCGCTGGAACGAACTTGGTTGACAAGGCAAGAAGCACACCTCCCTGC
+TCCATGTCGCGCATTCTCCTGAACTATTTAACTGCACTCACCCGGCCTTC
+GAAAACATCTGCTCTCTCTCACCACACTACCTCCGTCCCACTGGCTTCAC
+TCCCTCCTCCCCTCTACCGCCCTCACCTCACCCTCTCTATATGTCCTGGC
+CTTGTCTCCGCCTCGCGCACACTCTGCTTATCATACCCCGTCTTTTATCT
+TTCTTCCTACCCACTCATCTCTCCCTCACCCTCGCTCCTTTTACCTTCAT
+CTGCCATTGCACTCGCATCTCTTCTT
+>RL770R  CHROMAT_FILE: RL770R PHD_FILE: RL770R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:02 2001
+ACTCTTGGTACNGNACATGCAGGTACCNNNGGTCCGGNAATNNTCCCGGG
+NTNNCGACCCACNGCCGNNTCCGGNAGACTATCTCAAACGATGCAGGCTT
+GACTATATTTGACGACAAACAGGCCTCATTAACACCTACTGTCTGCACTC
+TTGGCAGGCACTTTGCCTAGCGTGAGTAGTACTGATGGATGTTGGTCTCC
+GTGACCACATAGCGGTTGTGCTTTCTTATGCTGCATTAACATGATGCTGG
+CTAAGTGATCAATAGACAATCATAATACTTTGTGACTCTTCCAGTCTCAC
+TATGGCTGATGGTACACAACTAGTGCGATGCGACGAAATTGTCAATGAAA
+ATGTTGCTGGTTCTTATTGCCAATGACAACTTGACGTGAATTGGATAAAG
+CCTGTTCTTTCTGTACCTGAGCGTGAATTAGGAAATAATGTGAGCTAACT
+CCTCCACCATTGCTTATTACAGTATGACACAGAGGCAACACTACTTTGAA
+GAAGGCGTCATATCTTCCAAGAGACAACAGCTGAACTAAAAATTATTGCT
+ACTTGTTCAACCAACTTTACTATACCTGCTTGGACATTTGAGATCTGGAC
+CACTTGAAAACTTCAAGGATGCTTTTGATAAGGCGTTAAATGCCGGCGAA
+ACCTCTTCACAATCAGAGGGTGTTCGCGCTCAGCCTTGCGTGTCTACATT
+GGATAAACGATGTGACTATGCTGATACTGAACAACCGAACTGTGATACAT
+GCTAAACTCGGAAACTGCTCCAGAGTGACATTGAAGCCCATATCTCTTCT
+GCGCGTACTGCACAGCTGTCTGAACTGACTACTCTGTATGATTCAAAACT
+AAGTGCAGCATCATCTGTACAGTAGGAGCTCTTCTGTATGGAGCACTGAC
+GAACGATGGGCAGCATATTAAGCTACTAGACGAGATGGAGAATTGCTGCC
+ATGATCTCTCGACGA
+>RL773F  CHROMAT_FILE: RL773F PHD_FILE: RL773F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:45 2001
+CATTCAAGTTAAAGATGGTTTAATGTGTCAACAGTGTGAGCTTTGAGTTC
+AGTATGTTTACACATTGTGATAACAGCAACGGTATTCTTGATATCATCAT
+CTCAATTCACCTTTTAAGACAAGTGATGCATGTCTTATAGCTCTAGACAC
+TCTCCATATCTCGTGTTGAACGTGTTCCCAAGAAGCTTTGGTCTTTACTC
+TCTTTGCATTGTCTATTGCATCATCCACTCCAGGAAATGATTTTGATCCG
+TAGTCATCGTACTTGGATGGTCCATATACCAAATGCTTGTACCATGTTCT
+TCCAGAGAGTCCATCTCTGTCTGTTAAAGCTCGCTCCGCCATCATTAATC
+TGTCATTGACCTCTCTCACTCTCAAAGCTCCCTTTACTCCCTCCTCTTTC
+TCTATATTAATTTCTTGTGCTGCTGTAGATAGATCTTGGATTGATTTGAT
+TAACGGAGAGATATCTATGCTATGTGCTAACATCTNCTCCTCCAAATCTT
+CAGCACTTTTCTTGAGTTCTGATGCATAGGAGGCATAGCTGAAAGGTAAC
+AACTCATCATCTGCTAACCGAAGAGCCCCAAACCTACAACACTACCTATT
+GCCACATGTCTCTGAAACATTGGGTCCCCAACTTCTCCATCCAAGTGAAC
+CCATCCTACATACAGCGACTCACTGCCTATCCTCCTCCGAAAAGCCTGCC
+ATCTGCCGGAATGCCTATCTGCTGCCCACATGCTCTCTCACACTACGC
+>RL773R  CHROMAT_FILE: RL773R PHD_FILE: RL773R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:13 2001
+AGTCTGGXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXAAACCAACAGCAGTTCTCGTCTTCCTAATCGCTGCTTTCTCCTAC
+TTCCTCTTCTCTCCTTCCCCAAAACCTCACTACTACCACACACTCTTCCT
+CTCCTCTCCCTTCTCCGACAACGCTTCCATCGCCTCAAACCTCCGCACCC
+TCACGCGCCGCCCCCACGTCGCCGGCTCATTAGCCAACGCAGAAGCTGCA
+TCCCACGTCCTCTCCTCCTTCGCCTCCTCCTCCCTCAAACCCCGCGTCGC
+CGCTTACAAAGTATCTCTAACGTACCCAGTTCATCGCTCACTCTCACTAC
+TAACACCAAAAGCATCATCATCAAAGTCCATCATCGCTTTCTCTCTGGAG
+CAAGAACACCTCGGAGACAACCCTTACGCGGACGAAGTCACGCCTACGTT
+CCACGCCTACGCCAAGTCAGGCGACGTCTCCGGTCCCGCGGCGTACGCGA
+ACTACGGACGAGTCGAAGACTTCGTGGGCCTAAACGTCTCCGGCGCCGTC
+GTGGTCGCGAGGTACGGGAAGATCTACAGAGGGGATATAGTGAAGAACGC
+GTACGAGGTGGGAGCTGTAGGCGTTGTGATATATACTGACGAGAGAGATT
+ACGGCGGAGAGGAGTGTTTTCCGGAGAGTAGGTGGATGCCGCCTAGCGGG
+GTTCAGGTGGGTACTGTTTACAATGGGTTGGGTGATCCGACGACTCCTGG
+GTGGGCTAGTGTTGATGGGTGTGAGAGGTTGTCGGAGGAGAGTGTGGAGC
+TGAGGGGAGATTCTNCGGGTATACCTTCTCTGCCTATCTCTGCGGCTGAT
+GGNGAAGTGATNCTGAAGACGGTTGTTGGAGGTGTTGGGCCTG
+>RL777F  CHROMAT_FILE: RL777F PHD_FILE: RL777F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:33 2001
+CTTCGTAGGTTACTAGGCATGCATGAAAGGACAAAACACCTATTAACATT
+GCAATGAATACAATTGACAGCTCACAGATTTAAAAATAACAATTCGACTC
+TTTAAGTACATTTTTATTCAGATCACACTCAAGAGGTTTCTTGAAAAGGG
+AGAAATATAGAACAGATGTTGGCACTACAAGGTAACTTCCAGATGACGAG
+GTTTCGTTCCAGCCCATAAAACATGCCCTATGATCTTGCCACCGACATCG
+GGGAATGTCTTGCATCCTGGAAGAGAACTTACTTTGCCTGTTCTATCGAC
+TTGAACTGGATACTTTACAAGATGACTTGCCATCACTGTGACTTCCTCTG
+CTGCATACTGCCTTCAGCTGAGCTCACTCAAATGCCTAACTCGCCCCACG
+CATTCCATGCCACCTGGCTCCTCGAAACCCTGCTCCATAAACCCTACCAT
+GCTCTGCCCACAGTGACCCCCCGCCTCCATACATCTTGCCCCTATGCCTC
+CAGCTCTTCTTCACTCATGCCTCCCGCCGCCGATACCTCCCCATGGCACC
+TCCAATGCCTCTACCCCTCTCACTCCTACATTGCCGCACCCTCCACACCC
+CTCCACTATCCTTTCTCCTTCCCACTCTACCGACCGCATGCCCCACCTCA
+CCCCACCCTCCCCTCCCTTCCCAGCCCCACTTCACCCCTGCTCCCCACCA
+CCCTTTCCCCGCCCACCCCCCCATATATTCACCCCCCCCCCTCATCTTTT
+T
+>RL777R  CHROMAT_FILE: RL777R PHD_FILE: RL777R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:59 2001
+ATTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXATAATTTGAATTCACTGTATCGTTCATACGATTTAAGTAATGTT
+TTACTGAGTCTGGACCACTGTTGAAGTCTTTCATTGGCTAAAGTTTGAAA
+GGTCAACAACGTACTGCTCACTGCTTTGATTCGTCTACTTATACCTCATA
+GATTTATTCTGGATCTGTATCTCAAAGGTCCTCATAGATTTATTCTGGAT
+CTGTATCTCAAAGGTCTCAGTGAACTTTTTCTTTGTTTTGTGGGTTTGAC
+TCGAGATTTGATCCTTTTCTTATTTATTTTTTCCTCCGAATTTTTGGTGC
+ATCATTCAGTTTATACCGAGACCAATGTCAATGGAAGGGTCAAGTAACGC
+TTCTTTGAGGGTAGTGTTGTTACATGGTAACTTAGAAATTTGGGTGAAGG
+AAGCTAAAAATCTTCCTAACATGGATCGTTTCCGGAGGTACAAGAAGAAC
+AGTACAAGTGATCCTTACGTGACTGTCTCTATCGCAGATGCAAAGATTGG
+CACAACTTTTGTGATCGACAATGATGAGAATCCTGTGTGGATGCAGCATT
+TCTATGTACCGGTGGCTCACCATGCTACGGTGGTTAAGTTTGTGTTGAAA
+GACAGTGACCGTTTTGGATCAAGGTCCATAGGAGATGTTAGAATCCCAAC
+CGAGGAGTTGTGTTCAGGGAATAGGATCGAAGGGTTGTTTCCGATACTGA
+ACACTAGTGGGAAGCCATGTANAAAGGGTGCTGTGTTGAGTTTGGCTATT
+CATACACTCCAGTGGAAATGATGAAAATTTACCCAATGGGTGTTGGTAAT
+GAGTGCGAAGGAGTNTCCGGTACGTACTTCCCTTTGAGGAA
+>RL778F  CHROMAT_FILE: RL778F PHD_FILE: RL778F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:36 2001
+GCTAAATGACACTTTTTATTCTGATTATTAANCTTGAAAAATCATACAGA
+ATAACAACAAAAAAGACCAAGAAACCATATAATAGAAGGGAAATCAAAGT
+GTATTTAAAAAGCAAATAGAAAATGAAAAGATATGCTTATTGATGTGCTA
+TTAAGCCTAGATCACAAGACAAAGCCATATCAAAGAAAACAAACTGGTAC
+AAAACAAGAACAGGTTCTCCATCACATGGGTTTATAGAAGAAAAGCTGGT
+CCCTCCTTATTGCGGACCTCCTTTATAAGTATCTATTAGCTCCTTGAGTT
+TCTCGAATCCCATGCGGGTTGTGAATTCTCCGAATGATTCTTTAGTTTCT
+ATGTCCACTTTCCAGTGATAAAACAACGGCTCGAAGACTTTCTCCAAGTC
+GTGAACCTTCACCTTATCCATGAAGCTGCTTGCTATCTGTGTCTGGTTCG
+GTGTTCCTCCTAGCCAAACCTGATAGCTGCTGGGACCATTACCGACTAGA
+CCAAGCTCAGTCATGTACGGTTTTGCACATCCGTTAGGACAACCGGTTAC
+TGTTATCACAACAGACTTTTCGTACTGCAGACCAATCTTGTCAAACATCG
+GTCTTACTCGCTCCACAATGCTGGGGATTTCGCGCTCTGCCTCCGTTATC
+GTTTGAGGGTTTTTAGGATAACCTGGTCACTTCTTGTTGCCTGCTCTACT
+GGTCCACCAACCTCCGCCGTCTCCTGCCCTCTTCACCTCTCCTGTGCCTA
+TGGGCTCCTTCCTCTGCTA
+>RL778R  CHROMAT_FILE: RL778R PHD_FILE: RL778R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:03 2001
+CGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXAAAAGCCTTCGTCACACACACTCCCTCTCTCTCAATCAGTGAAGT
+GATGTCGTCGTCGTCGTCTTCGTTTCGATCTCCGGCGGGTGCCGCCGCCA
+CTCTGTTCTCGTCTGATCAGAAGATCAGACTCGGGAGGCTCGACGTTCTG
+AGATCTTCTCATTCGGCTTTCTTAGGGAGGAGAAATCCACGTGGCGTCGT
+CTCGGTTCATCCATCCTCTTCCTCGTCGAGTCCTTCGCCTATCCAAGCCG
+TCTCCACGCCGTCGAAGCCTGAGGCTGCGACCAAGCGGAGTAAAGTTGAA
+ATAATCAAGGAGAAGAGCAACTTCATAAGGTATCCACTCAACGAGGAGCT
+TTTAACCGAAGCTCCCAACGTCAACGAGTCAGCCGTGCAGCTCATCAAGT
+TCCACGGTAGCTACCAGCAGTACAACAGAGAAGAGCGCGGTGGAAGATCT
+TACTCCTTCATGCTCCGCACCAAGAACCCTTCCGGAAAGGTCCCTAACCA
+GCTCTATTTAACTATGGACGACTTAGCAGATGAGTTCGGCATCGGTACGC
+TTCGTTTGACCACTAGGCAGACGTTTCAGCTCCACGGTGTTCTCAAGCAG
+AATCTCAAGACCGTGATGAGCTCGATTATTAGAAACATGGGAAGCACTTT
+GGGGGCTTGTGGTGATCTGAACAGAAACGTTCTTGCTCCGGCGGCGCCTT
+ATGTGAAGAAAGATTATCTCTCTGCGCCAGAGACGGCGGATAATATTGCG
+GCTCTGCTNTCTCCTCAGTCGGGGTTNTATTACGATATGTGGGTTGATGG
+NGAGCAGTTTATGACTGCTGAGCCTCCTGAGGTGGTGAAAGCTCGAANTG
+ATAC
+>RL781F  CHROMAT_FILE: RL781F PHD_FILE: RL781F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:46 2001
+CTTTAAGTAAACATAACACTAAAATCTGACATAACACAATACCCTCAAAG
+AGTTGATAAAACAACTAACATGAAAATTTAGCATAGCTGCGAGTTATAAA
+GTTTGCAAACATTTATTCACACACCCAAGGCTTGTAGGCACCACAACACA
+AAGGAAGATGATGACTCAAGTGAAAGTGCCAATAAAGCAACAAGGATTTT
+ATTAAAACACCAAACACATTAGCAAATAGAAGAAGAAGAAGGATAATCAA
+GCGGCAGCAACTTGAGAGTGCTTCAAAAGCTTGGCTTCGATTTCCTCCTT
+CGCAGCACCAACAACCTTGTCGAGCTTCTCTTCGCCTTTCATGTAGACAA
+AGGTCGGCATGGCCTGAACATCGAACTCCTTTGCAACAGTGGCCAATTCA
+TCGACGTCGACCTTGAAGAAGACAACATCAAGGTGCTTCTTAGCGAGCTC
+GACAAAGATGGGTGCAATGAAACGGCAAGGTGGGCACCACACTGCTGTAA
+AGTCTATCACAATCAGTTTGTTGGATTCTTTGGCTGCCTTGAGCTTGTTG
+TTCCAGTCCTCGACGGTGTGGCAAGCGATCACTTCTCCTGCTGGGATCAA
+TTCTGCTGTTGCGGCCATTGTTCTCTCTTCGCTTCTAGATCTACTCGAAA
+CCATGGCCACTATCACCGTCGTTAAGGCTAGGCAGATCTTCGACAGTCGT
+GGCAACCCCACCGTCGAGGTTGATGTACACACATCAAATGGTGTCAAGGT
+CACTGCTGCTGTTCCGAG
+>RL781R  CHROMAT_FILE: RL781R PHD_FILE: RL781R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:13 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXTTTTTTTTTTAAATTAAAAGAAGCCGAAATTAGATTATAAAAGAA
+GAGCTGCTGTCCTCTTTAAAGGCAGAGCTATCTCAGGTTTTTTTCTTACA
+AAAATAAGCAACTTAATCCCAACAAGAGTGGTTTGGAAAATAAACCAACA
+AAGGGAGAAAAAAAAAGCAAACGTAATGTTTTATTTAAAGCAAAATGACC
+AAGCACAACTCAGATCTTCTTTTTCTCGTAACAAAGGAGAGCACTTCGCT
+TCTACGAGCAGCACTTGTCTAGTACGGCTCCACAGGCTTGCGGAAGTTAG
+CTCCAGCGTAAACTGCCTCTGATCCCAACTCCTCTTCAATACGCAAAAGC
+TGGTTGTACTTGGCTAGACGCTCGGACCTGCATGGAGCTCCAGTCTTGAT
+TTGTCCAGTAGACAAGCCAACAGATAGGTCAGCAATGAAGGTGTCCTCGG
+TTTCACCACTTCTGTGGCTGGCCATCACTCCCCATCCTGCTCTTTTCGAC
+ATCTTAACTGCCTCGATACTCTCGGTTACTGACCCGATTTGGTTAACCTT
+CAAGAGAAGAGCATTGCAAGACTTCTCGGCGATTGCCTTTGCAACCCTCT
+TGGGGTTGGTGACCAACAAGTCATCACCAACAATCTGAACACTGTCTCCA
+CACTCGGTGGTCATCTTGGCATAGTGTTCCCAGTCATCTTGGTCAAATGG
+GTCCTCAATGGACACGATTGGGTACTCAGCAACGAAGGACTTGTATAGGT
+CCTTGAGTGCATCTNCAGAAATCTTCTGAGAACCATTGNTGNTCTCTTCT
+TTGAAGNTCAAGTCATAGGTCTTGTCTGATGAGTAGAACTCGGAAGCGGG
+CACN
+>RL782F  CHROMAT_FILE: RL782F PHD_FILE: RL782F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:49 2001
+CCCTTTAAGTAGACAATAAAAAGTATGTGAGATGCCAAACCGATCCTTCA
+GTAGAGAAATCAAACAAGTTGGAGAACTTAAATATCTAGATAGTAAACAT
+ATAANCGGGTTTAAATAACATAAGCTCCATAATGCACTTTCCTTTTAACC
+CTTCCATAGCTTCAAAACTTTATCTTTACCACCAGAGACTACTTTCTCAC
+CATCTGGACTCCAATCCACAGCAAAAACCTCATCTGCATGACCAGGAAGA
+TCTTGTTTTAACTTTTTCGTCCTAATTTCCCAAATCTTGAGAGTAGAGTC
+TTTACTGCCACTCAAGAGCAATCTACTGCCTGCAGACCAACTGACTTGAT
+AAACAGGCCCAACATGGCCACGGAAAGCTGCAACGAATTGAACTGTGACA
+CCGCCCCATACCCTAACTGATCTATCGAATGAAGGACTTGCGGTCCACTT
+CCCATCAGGCGAGAAACACACACGATTCACCAGCTGATGCCGACCGGACA
+CGCGCCCCTCTATGCTGCCCGCTCCCGCACGGTTGCCACCCGCCTCTCCC
+TGGCCCCGCCCCACCTCGTCTCTCTCCTCCCCGCGGCCACCTCCACCCTC
+CCTCGCCGCTCGTCTCCTCCCCCTTCCTTCCAACCCACCCAATGGACCCT
+GTTGCCTCCCCCGCTCCCGGCCCGTCGACCCCGCCGCTCTTCTCCACCCC
+TGCCCCGCTCCCTCCTCCACCCCTACCCCGTGTCTGGATCCCGCGCTTCC
+CCCACACTCCCT
+>RL782R  CHROMAT_FILE: RL782R PHD_FILE: RL782R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:17 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTCGTCGCGATGAACATGGAGACGAGCCAAGCAGGGATGGGGAAC
+AACACGGTGATGTGTCTGTTAACAGACCCAGAAGGAACGAACTTAGGTTC
+CGCCATGTATATTCCTCAGACCGCTGGTCCTTTGCAGCTTACTCAGCTCG
+TCAACAGGTTCCTCAACAACGAGGAGATGTTGCCTTACAGTTTCTATGTA
+TCAGACGAAGAGCTTCTTGTTCCTGTTGGAACTTACTTGGAGACAAACAA
+CGTGTCTGTGGAGACGGTGTTGACCATAGTTTATCAACAACAAGCTGTGT
+TTCGAATTCGTCCAGTTAACCGCTGCTCGCAAACTATTGCTGGTCACGCG
+GAAGCTGTTCTTTGTGTTTCGTTTAGTCCTGACGGTAAGCAGCTAGCAAG
+TGGCTCAGGTGATACAACTGTCCGGCTTTGGGATCTCTACACTGAGACTC
+CATTGTTTACTTGCAAAGGACACAGGAACTGGGTACTCTCAATTGCTTGG
+TCTCCAGATGGCAAGCATCTTGTGAGTGGTAGTAAATCAGGTGAAATCTG
+TTGCTGGAATCCAAAGAAGGGAGAGTTAGACGGTACTCCACTTACAGGTC
+ATAAGAAATGGATTACTGGTATCTCGTGGGAGCCAGTCCACCTTAGTTCA
+CCATGCCGTCGGTTTGTAACTTCTAGCANAGATGGGGATGCAAGGATTTG
+GGATGTTACTCTTAAAAAAACTTTGATTTGTCTCAGTGGACACACACTTG
+CTGTGACTTGTGTCAAATGGGGCGGAGACGGGATTATCTATACAGGTTCC
+CAAGATTGTACGATAAAGATGTGGGAGACTACTCAGGNGAN
+>RL783F  CHROMAT_FILE: RL783F PHD_FILE: RL783F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:27 2001
+TTCATCTGGAGAACTTATATATATCACTCTTTTGATCAACCTAACACAAA
+GATCAAGTTGGTGCAAAATGTGCAGAGAGACCTTATTACAAAAGCATAAT
+TTTAACTAAATACTAGGAGAAGCTTACACTAACCTCCAAAACAAGTTACA
+ACTTTCTCAGTGAGTCTACGGTTTTACTCGTGGAGCCAATCTAAACTGAA
+ACATTCGTCAGCAGCTTCAATCTTTAACCCTGGAACGGCCCTCTCAACTT
+CTTCCCATATAAACGCGACGTCCTCATTGCACATCACATGGCGCTATGGA
+CTCCACCGAAACACCTGCTCCTCACTCTACTATCGATGCCCCTTTCTCTC
+CTGCCACTTTTTTCCAGCGCTCCCCCCCCCCCTATACTCCGCGGCTCACT
+CCCCTCCCTCCCCCACCCGCCCACGTCTCTGCCCCCAAACTGCCGTTTCC
+CCCCCTCTCCCCGCCCCCCCCGCCCTCCCCCCCTCCCCCCCCCCCCTCCC
+TCTCCCGCCCTTNTACTTCGCCCCCGCCCCCCCCCCCGCCCCCCCTCTCC
+TCTCCCGCCCTCTCCTCCTCCCTTTCTCTCTCCCCCCGTCCTTCGCCCCC
+CTTCCTCTCCGCTCCTTCTCCTCCCGCTCCGCTCCCTACCGACCCTTTCT
+CCCGGTCCCTGCCCGCTCCTTCGCCTCTCCTTTCCCTCCTCCCCCATCTC
+CCCTTCCTCCCCTCCCTCCCGCTCCCCCCTCCTCTTCCTGTTGCTCGCCC
+TCCACCACCTCCCCATCACATTACATA
+>RL783R  CHROMAT_FILE: RL783R PHD_FILE: RL783R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:52 2001
+ATXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXCTATCTCTCTCTTTTGTCTCTGTGCGTTTGGTCAGAAATCTCTT
+CCTCCTGGTTTCGACAAAGACCTTGTTAAGAAGGTTGCTGGTGAGTGTAA
+AGGTCTACCTTTGGCTCTCAAAGTTACGGGCGCTTCACTAAAAGACCGCC
+CTGAAATGTATTGGCAGGGCGCATTGCAGAGGTTATCAAAAGGTGAACCT
+GCTGATGAAACTCACGAGACTAGATTGCTTCATCACATGGAAGCTAGTCT
+AGCAGATCTGGACGAGACAGCCAGAGAGTGTTTCTTGGATCTTGGCGCAT
+TCCCTGAAGACAGGAAGATTCCTGTTGATATTCTCATCAACATGTGGATT
+GAAGTACATGATCTAGACGAGGCAGTTGCTTTTGCCACTCTTGTTGATTT
+GTCACACAAGAATCTCCTTACTCTTGGGAAAGATCCACGGCTTGGCTCTA
+CGTATGCAAGCTACTATGATGTATTTGTGACACAGCATGATGTTCTGCGT
+GACTTGGCACTTCATTTATCCAACAAAGGGAAAGTAAACAGAAGAAGGCG
+ATTGCTGATGCCAAAAAGAGAGGAAGAGCTTCCGAAAACATGGGGAAAGA
+ACTGTGATGAGGAATACAATGCAGAGATAGTCTCTATTCATACATGGGAA
+ATGGATGACATGGATTGGTCTGACTTTGACATGGACTTCCCTAAGGCAGA
+GATTCTAATATTGAATTTTTCCTCGGACAAGTATGTTTTGCCTCCTTTCC
+TCAGCAAGATGACCCGGCTTAAGGTCCTCGTGATCATCAACAACGGCATG
+TCCCCTGCGATTCTCCGTGACTTTTCAATGTTGCCAN
+>RL785F  CHROMAT_FILE: RL785F PHD_FILE: RL785F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:33 2001
+GCGCCACCCCACCCCCAACCGCGCACGAGCGCAAGGAGCACGACGNACGG
+CGCGCCTGAGCGACCGCGNGCGGCGCACAGCGCGCCCAAATACACACACA
+AGACACCCAGCCCTGANATCNGGNACTAAACGTGTCACTACATANACATC
+TCAAGTAAGAAACACTGCTGGCGACTATCGCACAGAGCGAACATTCTAGA
+CAAAGCCAACACGAAACCACAGCCACAGGGATATAGATCAGCGCGCACGC
+NAAGTCAGCGGCAAGCGAGGAACCAACGGCCGCCCCACCACGTGGAGCAC
+ATGTCGCGCCGCGTCGCCCCATGAGACGACACGAGAGCACCCCCCACTCC
+ACACACAGAGGGCGGGCGAGAGGCACCGCCGCTGAGGCCAGTAGAGACCC
+ACGATCTATAGAGAGTGTTCAGAGCTGGCGCGCCCGGCCGCGCCGCGGTA
+CGCGCGCGGTCGCGGACGCGCGCCGTCCCCCGGCGCGCGCACCGCGCCGC
+GCGCACCGGCACGGAGCGCGAATACCNCACCCCCGCCCCCGACTCACCTG
+GCATACCACCCCCACCAGCCAATGCCCAAAACCCTCCGCCGCCGCCCATC
+CCCCGTCGCCCCACACCCGACCACCCGGCCCCTGGGACAGACGGACCCAC
+CGGCAGCGCCCACGACAGGGACAGCGGACACATAGGTACAGGACACCCAA
+CGGCCTATGCGATGAGCGGAGGCGAGCAGCCCGACTCGTCATCGGGACGA
+GAGGACTGATGCTAGCCGGCTCGCGAAGGANCGGCTAGGATTAGATATAA
+CCAAAGCGCCCACGTCTCGACCGACCACCGGGACCGGCAGCCGCACGCAG
+CGCGCGCCTCCAGCTGCGCGCACGCCGCGCGCCGCGCCGCACCGCAAGAC
+TCGACCAGAACCAGCCGATCCGGCCGGCAACTAGCGGCTAGCCAGTCACG
+GCGGGCGCGTCGCGGTTGGACGCAAGATAACGAGCGCCCGACGTGACAGC
+GACNCCACAAANCGACAGCAAGACCTGGCGGCGACAGCCCACGCCTCGGA
+CGCATCCGCCGCGAACGTAGCCCACGGCACGCAACTGATACGCACAGCAN
+AGCGATACGTAACGGCTCACACTACGCCGTATCACCACACACACGCACTC
+CCGCCCCACCGCGACACACGCACCCATGTCTATACCACAGCTATCGGGAG
+GAACCGCCGCCACGAGCACCAGCGCACCGCGTCACGACGCACGCGCCTCC
+ACGNTGAGAGTATAACTAGCACGCGCACACCACGCATAGAGATCGCG
+>RL787F  CHROMAT_FILE: RL787F PHD_FILE: RL787F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:40 2001
+CCAAGCAAGAGGCGGGGCGGGNGACGCGAAAACAAGGTGAAAAAAAAACA
+AAAAAAAGTTAAAAAAAGGNGGCCCTTTTNGGGGNCCCNCAAAGAAGAGA
+AGTTTAGGGGCGGGGGCCCCCGGNGCCACGGCCNCCCCAGCGCGGGCCCG
+ACCGAAGGGNGGGCCGGGGNGGGCGCGGCGGCGAGGGGGGAGAGAGACGC
+GCGAGCGCGCCGCGGGGCACCCGGCACCGCCGGGAGAAGCAGGCACCCGC
+CGCCCAGCCCGGAGAGGGAGAGACGGGCGCGCCAGACGCCGCGNCGNGGG
+GACGCGCGCGCCGGGGCGGCCCGGCGCGGCGAGCGGGGAGAGCGCCAGAG
+GCCAGGGGGAGAGACAGGGGCGGAAACGCCCCGCGCGCGGACGGGCGCGG
+CCCCCAGCGCGCGCAAAGCAGCAGGCGGGGAAAGGGACCACGCCCCGCGC
+GCGCCCCGGCCGGCGGCGCGGCCCACCAGCCGCGCNGGCGGGAAAGGGCG
+CACGGACAGGGCGCGCCGGCAAGCGAGACCGCGCGAGCGGAGGCCAAACC
+GCNAGAGAGGCCAAAGCGGGGGGAGAAGGACGACCGCGGAAAGAGCGCAC
+CGCCCGCACACCGCGCGCCACACCCAGAAAGCCGNGGCGCAGGCACCAAC
+AGCAGCGCAGGCGGGGAGATGGCCGCGCGCACGCACGCCGGCGAAAGAGG
+GCCCCGCGGTCGCGGNCGCGAGCACCGGCGGCACAGAGCAACCAACAGCG
+CAGCACGCGCCAATAGACGAACGCAACAGAGAAACACAACGACCGACATC
+ACAAACGACCACCGCAGGGACAACAACAACCACACATCCGCGCGGACCGA
+CGCGCGAGACCCACGGAAACAAGGCCAACCCAACACGAAGCACGAACGCG
+ACGCGACACGCACAGCGCGAAAAAACGCGCACCCGACACACACAACAGCG
+CGAGCGCACCCGCACAGCCACGAACCCCAACAACCGAGACCGCGACCACA
+AACCCAACAACAACTGAAAAGCAACGGGACCGGCCACACAACACGAGAAC
+AT
+>RL787R  CHROMAT_FILE: RL787R PHD_FILE: RL787R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:06 2001
+ATTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCCCACGCGTCCGATTCCTCCATAAGCTTTTTATAAAATTATCATC
+CTCTGCTTTTGAACCCTAAGGAAGGAGACGAAATGGCCACGGACGCGTAC
+GAAGCCGCCATCAAAGGACTTAAGGATCTTCTCAGTAAGAAGACAGATCT
+AGGGAACGTGGCCGCGGAGAAGATCAAAGAGTTGACTCAGGAGCTTAAGG
+AGCTTGATTCCAGCAACTCTGACGCCGTTGAACGTATCAAATCCGGTTTC
+ACCCATTTCAAAACTCAGAAATACTTGAAGGACAGTGCTCGGTTCAATGA
+TCTTGCCAAGGGTCAGAGCCCAAAGTTTCTGGTATTCGCTTGTTCTGATT
+CTCGAGTGTGTCCATCTCACATCTTGAATTTCCAACCTGGCGAGGCCTTT
+GTTGTCAGAAACATTGCAAACATGGTTCCACCTTTCGACCAGAAGAGACA
+TTCTGGCGTTGGTGCCGCCGTTGAATACGCAGTTGTACATCTCAAGGTGG
+AGAACATTGTGGTGATAGGCCATAGCTGTTGCGGTGGGATTAAGGGTCTC
+ATGTCCATTGAAGATGATGCTGCACCAACTCAGAGTGACTTCATAGAAAA
+TTGGGTGAAGATTGGTGCATCAGCAAGGAACAAGATCAAGGAGGAACATC
+AAGACCTAGACTACGATGAACAATGTAACAAGTGTGAGAAGGAAGCTGTG
+AATGTGTCGCTTGGAAACTTGCTTTCATACCCCTTTGTGCGAGCTGCAGT
+GGTGAAGACACACTTGCATAAGAGGAGCTCACTACAAATTCGTCAAGGCA
+CATTTGATCTCTGGGAGCTCGATTTCAAGACCACCCCTGCTTATGCCTTC
+TCTTGACATN
+>RL789F  CHROMAT_FILE: RL789F PHD_FILE: RL789F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:46 2001
+NCTTGATGGTACCTGCTGTTTGACAAGGTTCAAATCATATATCTTTATTG
+GTTTGTAAGTAACCAGTGAGATTCCTAATCAGCTGCTTTGAAGCACAAGA
+AGAAAATGAAAAATGGTATATCATTGATGCTCATGTATAAGTGAATTAGA
+GATCCTACAAATTTGTATGGATCTTCCAAGAAACCAGTCTTCTTCTTTTT
+CCTATATTGTACAAGAAGGCCTACAAAAGTGTTGAACCAATGTCTTGTCT
+TCGTTCTTCGTTCTTCGTTCTTCTATCTTGAACTCTCCTTCCATCTATCT
+GACTCTCTTCTTACACTTGACTTCCAAGGCTGCTCAGGCTGAGACTTGTT
+ACAGATGCAATCTGGTTGCGGGTTCGCATAAAAGTTCTCCTCTAGCCGAG
+GGAACTCACCAGGAAGTCTAGCATAAGGAGCTCCAATCCTGGCTCCATGT
+AATCTCTTCACCTCTGAAGAAAACTCATCCCAAGACATCACACCTTCGCC
+TAAATGATCTATCAGCCTAACAAAATCTAACCTGTCTGGATTGATAGCTT
+TATTGAACCCTTCAAACCTCCTGTGCCCTTGTACAGCCTTACCCATATTG
+CTCATCATATGCACGCACCAAAACATCGCTCTATAACGCCTCGCTGTCCT
+CTCCTGTAGTCTACCGGCCTCGGTTTGCTTTACTGGGCCCCTTCCTTCTC
+TCACCCGTCCCAGCCGCCTGGACTAACCCTCTTGCTCTATCTTCCCTCAC
+CCCCCACTCCCTCTTTTCTT
+>RL789R  CHROMAT_FILE: RL789R PHD_FILE: RL789R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:13 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXAGAAAGCTTCGACGCCTCTCAGCTTCACCGAACATCACGGTGGC
+TAATACAAAAAAAAAGGCATAACATGATCAACCACTAACCCCTCAAACAC
+ACATTTTACCAAAAAAAAATAATAAAAAAAAAACCAACTCTTTTCTCCCT
+CCCTCTCTAAACATGTCAGCGGCCGGCGCTAGCCCATTGGCCGTTGCACC
+GATCACACGACGTCGTATCGGAGACTCCCTCGAAACCACGACAACCTCCG
+AACGAGCCTCCGTTTCCTCCGAGTATTGCAACATAGTCAACATCTCCTCC
+CTCTCTCCGGACTTAGACGACGTCGAGGGGACGAACGGCGCGTGTTCCTC
+ACCTTCCTCGATGGGCTCCACGTCTAGCGGATCGCATTACCACCACGATC
+ATCACTACCACCCGAAGATTCGTTACCTTATCCCACGTAAACTGAAATGG
+CCGTTTCTATGCGACGGTGGATGGACGGCTGTGATTGGTCAAGGATTGGG
+GAGAAACGTGGGACGTCGGATCTTGGGTTTCCTCATGGTTCTCGTCGTCG
+TTTCTCTATTTGTCAGAGTTTCTGTTATGAGTGGGCGCGTTGCTGATCAC
+GCGCACAGGAGAGATTTGAACGAGCTTGTTGTCGTTAGGGCGTTACATGA
+AGATTGGTCGATGGCTCAAAACGCAATGTCGGAGAACGTTGCTGTCGAGA
+AGCTTCCCATTCCAGAGATATGGCAGAAACCTGAATCCAGCAATTATCGC
+CAGTGTGCGTCACGTCCCAAGAGTAGTCATTCAAGGGCACGTAGAAAAAC
+CAACGTTACCTTCTAGTACACGCCAATGGCGGNTTAAATCAGATGAGACT
+
+>RL790F  CHROMAT_FILE: RL790F PHD_FILE: RL790F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:49 2001
+GCGCACCGCACAACAGACACCGCNGCGGGCGCGACGCGNNNNNACNCGAC
+AACAACACNAANACAAGGAGTGAAACACAAGAAAANCAATGAGACNACAG
+ACACANGNTTTACACAGCAGAAGANACAGAACNGNNNNCCCCTCNNNNTN
+TNNNGGNNNNNNNNNNACTGNNGACNGNAGCANAGCAGNANANNAGNNGG
+GTTAGNANACGANAGANNNGAGANANCAAAAACCCANNNNNGNGNNNGNA
+GAGGGAGNNANGCGNAGNNNNAGNGANGGCAGCAGACAGAGGNGNCGCGC
+GCGCCCGNGCAGCCGCGCCGGCGNGNCNGNGNGGCCGNGGCGGCGCNACG
+CGGNGNNCNNCGCGCGCGGACCGAGCGCGGNCGCGAGCACGAACGAGCGA
+GGCNNCNNNNNNGCGGGGCNAGAGAGAGGAGNGCGNCCNNNCNGANCGNA
+GGNGCGCGCCGCGAAGCNCGAGGNGNCGCGCGGCGCGCGCGCCGCCNCGC
+GCNCGGNCGNGCAGGACAGAGNAGGCGNCGCGCCGCNAGCACGCGCGAGC
+ANGCACCCAGGGNCGNGCACGCGCGNGCGNCGNGNGNNAGAGCGCNGCNN
+CACGACGAGGCGCGCGCCNCCCAACGCACGACGCGNACCACGCACGCGCC
+ACAAGGACNCGCGCNCCNNCGCGCCGCCGGCCGCGCGGCCCGACGCANCG
+NCGCGCGACGACGCACGCGNNAGAACAGCACACACNGCGCGCAGCCCCGC
+GAGCNCCGGCGGNGCACCGCGCGGCNCNCNCGAANACACCACGAAGCGCG
+NGCGNCNCAGGACGACGAACGCGACGCGGCGCGANCCGGCGCAGAGGACC
+GCAGGCGCGCGACGCGCGGCGCACGTCGCACGAAGACGGCCCCACNGCNG
+CGCCGNCGCAGACNGCCNCNGGACGCGCACGAGANACGCGCNGACAGNGN
+GNCAACCNNAGCACCNNGGCGGCGGCAGGCNGNGAACGNCGCGAGCAGNC
+NNNGAGGGCGAGGNCGCGGCGCCAACGCNGCGCCAACCCAGAGCGGAACG
+NGACACGCGACGACACANGAACGAACAGNACGGATTTAGTAAGGATATGA
+GAGACGNACTCGCTGACCTCCGTCGCCGGGCGTTCTGGGGACGCGGCGTG
+CGCGACGCAGCGAGCCGCACGAGCGAGGACACGAGNCGGCGTATAACGCG
+TTACTACGTCCACTCGCAGTCGTCGNGCGGACTGTGTTGCCACACTGCTG
+AGGTCTGCTCCGCGNTCACGNGCACGCCCTCATCGTTCATCCAAACGTGC
+CTGCGTCCGAGTGNTCTCCAGATCTTGTCACGCAGCGACGACACTTACAT
+CCGGCTACTCCATACGCCGCTACAGNCACACTGTCATNTGATATGTGTTT
+CAGATACTACGAANCCAAGTTACATTCATACTACACATCCATACGAGCAC
+GACACNTCGCGATTGCCGTACCCCACCACACGCAATCGATAAACATCGCC
+ATACGTCCATGCGAAGACGCGACACGAGAATCCGAGTCACATGACACGCG
+ACCGCAAGTACAGCGCCATACTATTCACAACTTCAACATAGTCGACACGA
+CACACCACGGCACGGCTCTGAAGAGATACGCTGGACAACCACATCAGGCT
+CGTACACCACGAGACCGAACTTCCGAGTTATAAATAGTGTACCACCACAC
+AGCAGTACCACACCATAGAGAAACTGAA
+>RL790R  CHROMAT_FILE: RL790R PHD_FILE: RL790R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:17 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXAGGAAAGAAGGTTTATTACTCAGCAAATCTAGTTGTTACGTTAA
+GTTTCTGTGAGGTTATTACTTTGAGAGACGAGACAATGTTTGGTTTGAGG
+AAATCTCCAGCAAAGCTTCCCAAGCACAACTCTGCTGACCCTGGCTTTCA
+CAAATCTTCCAAGCCCAATCCTTTCGATTCAGATGATGAGCTGGACAACA
+ACAGCAACAAACACACACTTAACCCCTCCAAGAGGACTTCCTCTGAGCCT
+TCCTTGGCTGACATGACCAACCCTTTTGACGGCGAGGATAAGGTTGAGAA
+AGGGTTTACTTCGTCGTCCAAGCAGCCCTTGACTTCTAACTCCAGATACC
+AGTACAAGAACAACTTCCGTGATTCCGGAGGTGTTGAGAACCAGTCTGTT
+CAGGAGCTTGAGGGTTATGCTGTCTACAAGGCTGAAGAGACTACCAAATC
+TGTACAAGGTTGTCTCAAGGTGGCTGAAGATATAAGGTCTGATGCTACCA
+GGACTTTGGTCATGTTGCACGAGCAGGGTGAGCAGATCACCAGGACGCAC
+CATAAAGCTGTTGAAATTGATCATGATCTCAGTAGAGGTGAGAAGCTTCT
+TGGTAGCCTTGGAGGTATGTTTTCAAAGACTTGGAAGCCGAAGAAGACTC
+GCCCTATTAATGGCCCTGTCATTACTAGAGATCACTCGCCAACGAGAAGA
+ATTAACCACTTGGAGAAAAGGGAGAAACTGNGACTNGACCCAGCNACCAA
+AGCACAATCAAGAAGCAGAGAACCGCTTCCTGAATCAGCTGATGCGTATC
+AGAGAGTGGAGATGGAAAAGCNNCAGCAGACGATGGGCTTTCAGATTTGA
+GTGATCTACT
+>RL791R  CHROMAT_FILE: RL791R PHD_FILE: RL791R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:53 2001
+AATXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXCCGCCCTTTTTTTTTTTTTTTACGTTTAATCTTTTGATGTGGT
+CTTGAATTGCTTACGAGAATTGAAAAGGTGAATGCAAAATATAGAGACAA
+GTAACAAAGAAAAACACTGAAAGGAGTATAAACAGTAAGTCGCATTGTAT
+GATAAAACAATGTTTTTATGATCACAAATCTTTTTGCTTAAACCCAGAGT
+TTTACTTACTGCTGTTTCGAACCATGGGTCTCGCTATTCACAGGTGTGGA
+TGAGCCACTACGCTTACTAGACTGACCTGTAATTGTTCTACCCATGAATC
+TCCCTGCTTTGCTCAATCCACTACCCATTGCTCCAAAACCGGTACCAACC
+ATCCCGACGCCCGAGGTCACCCCGGTCCCAACCAGGCCTACTCCTGCACC
+AATCCCTGTCCCAACCAATCCTACACCAGTACCGAGCCCACTTCCAACTG
+CATCCATTGTGCTACCTATCATTCCTGCTTCCTTCAGTCTCTTCCGCTCT
+TCCATTATTTTCTTCTCTTCTTCTAACGCAGCCATTTGCTCCTCTTTGTT
+GAACTCATGATAATGTACCTTTAGGGTTATACTTCCTCTATCTTTCTTAT
+CTTTTACTTTCAGAGTATCAAGTGAAGACAACAGGTTTAGCTCCATCTCC
+TTTGTAACCCCGACCTCCAAACTGCTTAAAGGAAGTTTCACAAGTCCTAG
+ACGCTCGTCTTGGCCTACGTCTTTATCAAATACCTCTATAGTGAGCGACT
+GAGTTTCTTTGTCCTCTGCAATCAATTCAAAGGTTTGGTCCCAACAGGAC
+TCAGGTTGTTCTCGATTGTCTTTGTTATATACTTG
+>RL793F  CHROMAT_FILE: RL793F PHD_FILE: RL793F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:34 2001
+GGCTTTTTCTGTACACAGTGAACAAAAACTGGTAGAAATCCCAAAGACAA
+CATCCATAAAATAAATCAACAATATTATATAACCTAAATGCAAAATGAAA
+GAGAAATTATGCTACGGAACATCTTGTTGTTATTTGATTGACAGAGTCAA
+GAGAATGAAGAAAAAAGAACAAAATCCATTCAACGATCACTTAAGCAGCT
+GCCTGAATCGGTGGTCCACTTGATCTGTTGAATCCACCACGGCCTCCTCC
+CCTTCCACGACCTCCCCTACCACGCCCACCTCCACGGCCCTGATGAGGAG
+CACCGTATCCGTAGTCTCCCCCATCATGCTCAGCTTCATAGTATTGTGGA
+GGACCATTGAAACCTCCTCTTCCACGACCACCGCGAGTACCTCTTCTCCC
+TCCCCCTCTTCCTCTACCAGATGGCCGCTCAGGCTCCATGCCTCCATCAT
+CGTACTCATGATCACTGTGGNNNNNNNCCCCTCTCCCTCCCCCCCCCCCC
+CCCCCCCCCCNCCTCCCCCNTCCCCCCGCCCCCGCCTCCTCCCCTTCCCT
+CCCCCCGCTCCCCCCTTCCTCCTTCTCTCCCCTCCCACCTCCTCCTTCCC
+CCCCCTCTCCGTCCCCTCTCTCACTCTTCTCCCNCGCGCCCGTCCCTTAC
+CCCTCCGTTTTTCCTCCTTCTTTCCTCCCCCCTCTCTCCTCTCCATTTCT
+CACCACCCGCTCTCTCTCTCTTTCTCTCTCTCCCTTCCCTCNTCCCCGCC
+CCTCGTCCCCTCCCCTTTATTCCACCCGCCACCTCACAATATAATAA
+>RL793R  CHROMAT_FILE: RL793R PHD_FILE: RL793R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:00 2001
+AGTTTGTACGACCTGCAGTACCNNGTCTCGGATTCCCCGCTCGACCCACG
+CGTCCGAGAAAGAGTACTCGCCTCCATCATCGTACTCATGATCCTGTGGA
+GNACCCNCANTGNTATCATAGTGNNNTTGANGGGGTAGACTAATGTTATG
+GTGTTGAATATAAAGAATGTGGAGGAGGATTTGTTAATGGAGGTTATAGT
+TATGACATCAGGCAGTGCCGTCCCTTTNTGATAACTGAGCCTCCCGGTTC
+TCTGCATCCTCCACCCCTCACCCCCGTCCGTCTTACCGCCCCCCCCCCAT
+CAATTCCCTAGTCCCCGGTCTCCTCGCTCCTTCGTTCCATCCTTCCTCTC
+ACCTCATCCCCCCATCCCTTCACCATCCTCCAACCCACTTCTCCCTACTC
+CTTCAATCTTCACCACTATCCTTCCTCCCCCCCACCTCGCTTTCATCCTC
+TCCCTCTCCCCCCCCCCTATTTCTTTTATCCAACCCCCCCCACATCAACT
+ATCGTCTCTTATTCTTTTATCCCCCCATTGTATCGTCTGCTCTATTCCCC
+CCCCCCCCCCCCCCCCACCAGTACCAACCTGCCATTCTTATTAACCCTCC
+CCCCCCCCACCCCCCAGCATCCCTCTTCCCCCCTCCACTTCCTCATTCTC
+CTCACCCCCTCCACCTATTTTCTTCTCCCCATATCCGCTATACAATTTAC
+TTCACCCCTACTCCCTAACTCACCTCACACACCTACCCATCCTACATTAT
+CTTTTCTTCCCCTATTCTTTTCACCATCTCCACCTCTTTCCCCTCTTCAC
+CTCCTGTCAACTACTCCCACACCCATATTACACGTCTCCTCCTCTTCTCT
+TCACTCACTCGTTCCTTCATCTTTCTTATCCATTTATCCTATCAACCTCA
+TCATTATCCCTTTGTT
+>RL795F  CHROMAT_FILE: RL795F PHD_FILE: RL795F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:40 2001
+CCTTGGNGGGAGACAACCTTCTCGTGTTCGAAACTCAACGACAGCAACAT
+CAACAGCAAAAGTTACACATCGCAAAGGAATTAAAAAAAAAAAAAGAGAA
+CATGAAATGTGGAATGGTTGAAAGGTGTTAAACAAAGTTTCATGGCACGC
+TTAATAAAAAGAGTTTTTGCTTTTTAAGCGAACTGTCGACCACCATCTGA
+AGACGAACAAATGGTACCAACCACATGCGGTACTGACGCCATCACCCCTC
+ACCGCCGCCCTCACCACCCCTAGTGAACACACTGCCTCCAATACTCGCCC
+CCCCATGCCCCACCATCCCCCCCGCACCCCCCGCCTCCCCGCCCTTGCTT
+ATATCCCACCTCCCCCCCCCCCCTCCCCGCACACCCCCCCCGCAAACCAA
+CCACTCCCCCCTCCCCGACACCTGCCCCCCCGCCCCCTCCCCCCCACCCT
+TCCCACCCAACCTCCCACACCCTCCCTACACCACCTCCCTCATGTTCTAA
+CCACCTACTCCTCCTCCCGTCCTCTCTCTCCTCCCCCCCCCGACCCTTAA
+CCAACCCCGCCCTCGCATCGATCCCCCTCCCTCCACACCACACACCCCCA
+ACACACCCCCCACCTTCCCACCCCCACGACACACCCCACGTATCGAACCA
+ACCCACTCCTTACACCCACTTCTCCCCCACACCACCCCGCACATTGCACA
+CCCCCTCACCTCCCTACCCACCCCCTCGCACCCTCACCCAACCCCTCCCA
+AAAACTATCCACCCCTCACCCCCGCACTTCATT
+>RL795R  CHROMAT_FILE: RL795R PHD_FILE: RL795R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:07 2001
+CTTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXTGGCTCGGAAGGTCTGTTTGCTCAAAAGACTCTGGAGATCACT
+GATGATATACTCCAGACTTACAAAAACCAAGGTGGCTATGATTTGCTGGG
+AAGAACCAAGGATCAGATCAGAACCACTGAGCAGGTTAACGCTGCTCTCA
+AAGCTTGCACAGATTTGAAGCTAGATGGCCTTGTTATCATTGGAGGTGTA
+ACATCAAACACAGATGCCGCTCATCTTGCTGAATTTTTCGCTGAAGCAAA
+ATGCTCAACAAAGGTAGTTGGTGTTCCAGTCACTACAAATGGAGATCTCA
+AGAATCAGTTTGTGGAGGCAAACGTTGGTTTTGACACCATATGCAAGGTG
+AATTCTCAGCTCATTAGCAATGCCTGCACCGATGCTCTATCTGCAGAGAA
+GTACTATTATTTTATCCGTCTCATGGGTCGGAAGCACTCTCATGTTGCCC
+TTGAGTGTACCCTCCAGTCTCATCCAAACATGGTGATACTGGGAGAGGAG
+GTCGCAGCATCTAAGCTCACCATTTTTGACATTTCTAAGCAGATCTGTGA
+TGCAGTTCAAGCAAGAGCAGGACAAGACAAGAATCATGGAGTCATCCTCA
+TTCCCGAAGGGATCATAGAGAGTATTCCTGAAGTTTACGCTCTGTTGAAG
+GAAATTCATGGGCTACTTAGGGAGGGTGTAGCTGCTGATAAGATTTCTAC
+TCAGCTCTCACCTTGGTCATCTGCTTTGTTTGAATTCCTTCCTCCATTCA
+TTAAGAAACAGCTACTCCTCCATCCTGAGTCTGATGATTCTGCTCAGCTA
+TCCCAAATTGAGACTGAGAAGCTTCTCGCGTATCTGGTGGAGACTGA
+>RL797F  CHROMAT_FILE: RL797F PHD_FILE: RL797F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:46 2001
+GGCCTTTCTATGTGACGACAAACAAAGTGTCCAATGTAAGTAACGTAGAA
+ATTCAGGAATCAAATAAATAAGGGAAATCCAATTCAGACTTAGATTCAGA
+TCCTTCAAGTTTCATTCAGCAAAAACAAAAAGATAAGAAAAAAAACATGA
+AAAAGGAAGATTGTTTTCACCAGGTGATTCTTGTTGTATGCCTATTAATC
+CTCCTCAGTGTTCTTGAGATTCATAAACAGATGACCAAGTGCAGTTTCAC
+AAGCTGTTTGGCCATCTTCTCCCAAACCCTTGAGGAGAGACTTGGTGAGT
+TTCAACGAAGAGACCATTCTAAACACCCTGCCACAGAACTTGCGCACCTT
+AGCCCGCCTTGCTTGCTTCTCCGGCATATTTGTAACAAGCTCTTTAATAA
+AATGACTCAGTTTCACCAGATGAAACTTCATTTTCTTCTTCGACTCTTCT
+TGCGTCTCTTTGCCTATAGGAGCCAACGACCTTAAAGCTTCAGAGATCAA
+TTCAAGAGCTTCCACTCTACGATACTCAACCTTCGGATTCACAATTTTCT
+CCAGAAGAAATCCAAACAGCTGGTGTGCAATCCACGGTCTCCTTCTAAAC
+ACTTCCTCAAGAAATTCTACCTTTATTTGAGACTTCTTGCTGCTATCAAA
+GTACCCGACAAGGACACTCCTAAACACATCCAAGATCTTCTCCAGTTCGG
+TCTCTGAGAACTTTCTCGAGTCGATTATCCTAAGACCCAGTAGGTTGAGT
+TCTGGGCAGGTTAGTGATCATCTTGTGTCG
+>RL797R  CHROMAT_FILE: RL797R PHD_FILE: RL797R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:14 2001
+ACTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXCGGACGCGTGGGCGGACGCGTGGGCGGACGCGTGGGCAAAATCAAA
+ATCGCGGCTTCTGAGTTTCATCACCAATGGGTAGTAAGAAGAGAAGCAGC
+GCCGATTCATCGGAAGATGTCGAGAAGAATCACACCGATTCCTTCATGAA
+GAAGAAGAAGAAATCCAAGCGTGATAAGATGAACGCTGACGATTCCGATG
+CCGAGGCGGCGGCGCCTCCTCCTCCCGGCGTCGCGAGTACCGGTAAAGAC
+ATGGAGAAGAAGAAGAAGAGGAGAGCTTCCGACAAGGAGAGAAAACGCGC
+CGCTCTCGATAACGACGGCGGTGAGCCTCCCCGCCGCCCTAAACCCCCTG
+CGGCGGTGGTGGTTTCTGATTCGAACTCCGACGGAGCCGCGGCGGCTTCT
+TCGTCCTCTTCTTTGCCGGAGTTGCCTCTTAGCTATTTTAGGGATTTGGC
+TTCTCCTGAGGGTTCAGTTAGAGAAGCAGCAGCTACCTCATTGGTGACGA
+GGCTGCAAGAGATTCAGAAGCAGTACGAGATGTTGCCTGATAAGGGATCC
+GTCGACGGAGGATTGATGCTTGAAGCTGAGAAGAACGATGGGTTGGATAG
+CTGTGCGCCGCATCTCAGATACGCTCTACGGAGGCTTATTCGTGGTGTCT
+CTTCTTCAAGAGAGTGTGNCAGACAAGGATTTGCTTTGGGGCTGACGTTA
+CCTGTTAGCTTAATCTCTAGCATTAATGTGGAGTCGCTACTCAAGCTCAT
+TTCTGATTCTTTGTCCGTTTCTTCGTCCATGAAGGGACAGGATGTNGAAG
+AATGCTTATTGGGTCGGNTGTNTGCCTACGGTGCGCTAGCACGATCTGGG
+AGACTTGGTGN
+>RL803F  CHROMAT_FILE: RL803F PHD_FILE: RL803F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:40 2001
+AGAATAACTTGGGNNCNGGCTGACTGTACTACAGCTAAAGATCAAAGAAG
+GACGTAAGCTCACTTCAAAGTTAAAAGCCTCATCAACAGAAGAGAAAACA
+AGGAAAGCGAAATGTGTAAAAGTATTTTAAAAGGGAAGAGTCTCTACTCA
+GTGTGTAAGTGAGGCATATGTTTTAGGGAGTCTCTTCCTCCACCGTACGC
+TTTGTACGGATCCCCAACTTCGCCGCCCATGGTTCCCTTGTCCTTTGTTA
+TCGTACCTGTTTCCGCCCCCGCGCCGTCTTCCCCCTCCGCCTGCCGCCTG
+GCCCCCCCGCCCCTCTCCCCCCCCTCCTCCTTCTCCCCCTCCCCGCCCCG
+CCCTCCCGCGCTGCCCTCCTCCCCTCCCCCCCTCGTCCCTTCTTGCTTCC
+CCTTCCCCCCCCCCCCCGNCCCCGCCCCCCTCTCCCCGTGCCCCCCTCTC
+CCCCCCCCTCCTTCCCCTTTTCTTTCCCCTCTTTGTGTTTTCTTCCGTCC
+TTTCTCTGTGCGCTTCGCGCCTCCCCTTCTCTTCCTCCTCCCCCCCTTCC
+CCNTTCTTCCCTTCCTCTTCCTGTCTTCCCCGCTCCTCCTCCTCTTCCTT
+CCCCCTCTCTTCCCCCCCCCCTCCCTCCCTCCCTTCCTCTCGCTTCTCTT
+CCCTCTCCCTCCCTCCCCCCTCCCTCTTCTCCCCCTCCTCTCCCCTTCCT
+CTCCGTCCCGTTCGTCCCCTCCTCCCTCCCCCCTCCCCCTCTTCTCCCCC
+CTCCTCCCCTCTCTTCGCCCTTCTCCTCCTCTCGTCCCCTTCCGCTCCCC
+CCTTCCCCTTCCCCTCCTCTCTCCCCTACCACACCATAACAAA
+>RL806F  CHROMAT_FILE: RL806F PHD_FILE: RL806F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:50 2001
+CTTTGGCCTATGATATGCTGGCATATACCGCAGTAAGAACAAAACATGCA
+ACACCTACAAAAGAGCCAACGTCGCCCAACTTATCAGATCATTGTGCTCC
+TCACGAAGACGAATGTAGTATACAAAAGAAGTGAGTCCATGTCCTTCCCC
+TTCCACAAATGCATATTTACATTACCACTCATAGGTAAGTTTAAAATACA
+AAGATAAACCCAACAAGACGAGTCTAGGCAACAGCAGCATGTCCTGAACA
+AATCTTCGAACAAGAGACTTCTTCGTTTTAAAGGCCACCCTATTTATGTC
+TTAATTACCTATTTACATTTGTAACTTTAGTGCGGAATAAGATCCGTTTA
+CAATCCCTTTTTCAGTCTCAAGTGGCGAGTAGGAGAGATCCACAGGCATC
+AGAAGCATGTTTGCCTTCTCCAAACACCAAGTTTCCTCTTACAAATGTTG
+AAACTACTTTACCTGATAACTTTCTTCCCAAATAAGCTGAGATGCCAGGG
+TGTTTGAAGTGAATAGGATGGTCTTCATCAAGATCAAACTCCACTTCAGG
+TTCCCACACAACAATATCCGCATGTTTCCCAACCGCAATAGCTCCCTTAG
+AGTGTAGTCCAGCGAGTTTGGAAGGCCTATCGCTCCACCAAGAAGCTACC
+TGCTCGAGAGTCACTCCATACTTTCTTCCGTATGACCATGTTACTGGTAG
+AACAAACTGTAAAGACGATATCCCACCCCATGCCTTCAAGAAATTACCAT
+CACTCAAGAGTTTGAGTTCAGGCTTTGTTGGTGAAT
+>RL806R  CHROMAT_FILE: RL806R PHD_FILE: RL806R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:18 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXAGAGAGATGGAGAGGTCGCTGCTTCAATGGAGATTACTGCCTCT
+TCTCGCTCTCATCGCTTCTCTTCTCTCAGTCTTCTTCGCTTCTCGCTCTT
+ACGAGGAGACGCAGTGCAGTCTTCTTCCTCATGATCACTATTGGATCTCG
+AGCAAACGCATCCTCACACCAGATGGTCTCATCTCCGGCTCTGTGGAAGT
+GAACGGAGGCGTTATTGTGTCGGTGGTGAAAGAGGAAGATTGGTATAAGA
+AGCAGAGGAGCCGAGTGAAAGTGATTGACTATGGAGAAGCTGTCATCATG
+CCTGGTCTTGTTGATGTGCATGCACATCTTGATGATCCTGGACGAAGTGA
+ATGGGAAGGGTTTCCTTCTGGAACAAAGGCTGCTGCTGCTGGGGGTATCA
+CCACGTTGATTGACATGCCCTTAAACAGTGACCCTTCAACTGTATCTCCT
+GAAACTTTGAAACTCAAGATTGAAGCTGCTAAAAAGAGAGTATTTGTTGA
+TGTTGGTTTCTGGGGAGGGTTGGTGCCTGATAATGCACTCAATTCAACTG
+CCCTTGCCTCTCTCTTAGATGCTGGAGTTCTTGGTCTCAAGTCCTTCATG
+TGTCCTTCAGGGATCAATGATTTCCCCATGACANACATCACTCACATAAA
+GGAAGGTCTTTCTGTATTAGCTAAATACAAACGACCGTTGCTTGTACACG
+CAGAGGTCGAAATGGATTCAGAAACTTGTTATCGGTTACCGAGAACACNA
+AGGATTGGTGGTTCTGCAGAAGGAGCACATGTTCATATTGTTCATCTGTC
+TGACGCCTCTTCTTCCTTGGAGATGANTAAGGAAGCGAAGGGCAAGGAGA
+CAGTGNTACCGTTGAACATGCCCGN
+>RL807F  CHROMAT_FILE: RL807F PHD_FILE: RL807F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:28 2001
+NCCTGNGGGATACTTATTTAAAAATTGTTACAATATGTATCGTTTTTTAT
+TTTTATTTTTTTTTATGTATATTTTTTTTTTTTTTTTGTTTGTTTTTTTT
+TTTTTTTTTTTGTTTTTTAAATATTTTATTATTTTTTGTTTTTTGTTTTT
+TTTTTTTTTTTTTTTTTTTGTTTTTTTTTTTGTTTTGGTTTTAATGAAAT
+ATATTTAATTTTATTGTTTTCTTTTATTTTTTTTTTTATTTTTTTGGTTT
+TTTTTGTCTTTTCTATTGATCCGGTTTTTTTTATGTACTCTTTCATGGAG
+AAAGAGTGAATTTATCCGATAATTCAAACTACTCGCCCGCAAAGCTGCTT
+CCTTTTTGCCCACCTGTTTTTTCCTGCCGGTATCCGTCTCCTCCGATGGA
+AGACACCCAATCGGCCGGTGCTGCACATCGACACCATTTCTATAGGGAGG
+CACTCACAGGCCCGCCCCGCACCCCACCGCCCCACCCCCCAACCGGACCG
+CCCACAGCCCGCGCGCCCGGCGCGCAACGCGCCCACCGCCGGCGCCGCCC
+CGGGGCGGACCGCGCACGCCGGGCACCCGGTAAGGCGGCCACAGACAACA
+CCCACGCCCCCCGTGCCGCGCCCCCGCACGACCGCGCGCCCGGCCGACCG
+ACCGCACCGCAGACCCGCACGCCCACGCGCGGAGCCACGCCAGGCGCCGC
+CCACGCACCCCCGCACCCGGGCCACGACGCCGCCCGCCCGCCGCCCACAG
+GCGGGCCGAGCCCGCACAGAACCGAAAA
+>RL807R  CHROMAT_FILE: RL807R PHD_FILE: RL807R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:53 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTTCCATCGCTGCTGACCTTCCTTCATCGAAGAATCTCTCTATCT
+ATCTTTCTCCCTTCGCCGGAGCTCGTAGTGTTGGTGGTAGACGGCGGAGA
+GATGTCTATCTTCGGTGCTATTGCTCCCTCGATCTCAAATTGGGTATCAG
+CTGTGACCAAATCAGTTAACGGTTTGGTTGGGTATGAGGGAGTTGAAGTT
+ATTAACCCTGAAGGAAGTAATGAAGATGCAGCTGAGGAAGCTAACAAAGG
+GAGATGGAATCAAGAGGATCGAGATGGTTATTGGAAGATGATGCAGAAGT
+ACATAGGATCTGATGTTACATCAATGGTGACCCTCCCTGTTATTATTTTC
+GAACCGATGACTATGCTCCAGAAAATGGCTGAGTTGATGGAATACTCGCA
+TCTATTGGACATGGCTGACAAAACGGATGACCCTTATATGCGCATGGTGT
+ATGCTTCATCATGGGCTATATCTGTGTACTATGCTTTCCAACGTACCTGG
+AAACCGTTTAACCCAATCCTCGGAGAGACTTATGAGATGGCTAATTACAA
+CGGTGTTAACTTCATATCCGAACAGGTCAGCCATCACCCTCCAATGAGTG
+CTGGTCATGCTGAAAATGAGCACTTCACTTATGATTGTACTTCAAAGCTG
+AAAACAAAGTTTCTCGGCAATTCTATTGACGTTTACCCAGTAGGAAGGAC
+ACGGGTGACACTTAAAAGAGATGGAGTAGTTCTTGACTTGGTACCTNCTC
+TAAACCAAGTTCACAACCTAATCTTTGGACGAACTTGNGTCGATTCTCCT
+GGGGAAATGATCATGACTAACCTCACCACTGGTGACANAGTGGGTGCTTA
+CTTTCAACCGTGTGGNTGGNTCNGATCTGGT
+>RL808F  CHROMAT_FILE: RL808F PHD_FILE: RL808F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:31 2001
+GGTGNGGGGAACTCGGAGCCTGACCCCAACATACTTAACAGTTAATAAAA
+TTACAAGAAGCTTGAAAATTAAACGCGAGGAGGAGTACAGAACGAGGGAA
+AGGAGATCCAGGGTCAACTCTTTGGAGCTGAATGGCCCTGTGAGACTCAT
+CATCATTATCATAGTACATATAAGCAAAACCACCGTGACGTGTCATATCC
+ATACCGGCCATCTCATCCTTCTCCGATATCCTCAGCAAACCGAGCCTTTT
+GAGCAATGAAGAAGAGTGTTCCCATGGTAGCACTAACCCATCCTGCAACC
+ACAAGTATTTGAACTAATTGTGCTCCCAACAGCTTCCCTCCTCCACCCAT
+TAACAGTCCATATGGCCTTCCAGGTGTCTCGCCGTAAACCTCGTTTAGAT
+ACTTCTCCTTTGCAAAAAGCCCTACGAATATCAACCCCCACGCACCACAT
+CCTCCGTGTAGTTGCGCCGCCTCAAGGGGATCATCATACTGTACGATCTC
+CGCGAGCTTGTTGCAGCCGATGAGAACGAGGGCAGCCATGAATCCACATA
+CGATCGCAGCCCAGGGCTCTACCACAGAGCAACCCCCTGTTATGGCGGCG
+AACCCACCGAGTAGCCCGTTACATACGTCTGTCACGGCCCAGCGGCCAGA
+GAGAAGACGTTTACCGAAGAGTGTGGCTAGAGCTGCGGTGGATCCAGCAA
+GCGTGGTTGTAACCGCTGTCCGTCCTATTCCGTCCATTGGCCGTACTAGA
+ACAGGTCCATACGGAACGAGGATCTG
+>RL810F  CHROMAT_FILE: RL810F PHD_FILE: RL810F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:37 2001
+NGTGANGGAAAATCAATTTATACAACCAATAGGGGGAAAATAATCCTGTA
+ACTTTCAAATAATTGATACAACTATTTTAAAGCAAGACTCTTTTCTGTTC
+AGATGTTTACTCTGTCAAGTGTCAACACTTTGAACGAGTATGGATAAGCA
+AACAACCTTGTAAACAAAGACATGACTCCCAGTTCCTATTAACAGAACCA
+GAAACTAATGAGAAAAGAGTCAGTCCAGAGGGTTACTCTGTCAACGCTTA
+TATGACAAAGGCTTCTTCCTAGTTTTGGAAAACGCGCCTTTAGCTTTTGG
+AGCCTGAGGGTGGCCATGTCCTCTATTTCACTAACAGATAGATCTCTACT
+GAAAAGTTCTTCCGAGTACAACCCCACTGAATATAATAGCACCAAACCAC
+TTGTTCGACACAAACTTTTTACTGCAGTCAGTGCGAGATGACAAGTCAGC
+TGTCCCTATTTGCCATCCTAAGTGTCCTGATGCAGCGACCAGTGATGCGT
+AATATTGCCACCCGAGATCTGCACTGAGTCCAGAAAGTGTAAGCAAACCC
+ATGGATGCTGTGCCAAATCCAGTTAGCCAAAGCTTTGTATTATCACCGAA
+TCTAAGAGCTGTTGACTTCACACCAACCTTCACATCATCTTCTTTGTCCT
+GATGTGCATAGATAGTATCATACACAAGGGTCCAGCAGACTCCTGAGAGA
+TAAAGAGGGAGAACAACAGCTGGTTCTAAGCTTCCTTTAACAGCAGCCCA
+TCCTAACAATGCTCCCCAGTTTATGGTCAAACCT
+>RL810R  CHROMAT_FILE: RL810R PHD_FILE: RL810R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:04 2001
+ATTAATGTNNNCATGCTGTGACGCTGCAGTACCGGTCCGGGATCCCAGGT
+CGACCACGCGTCCGCCCATGCGTCCGNNNATGTGTNNAGTTAGGGGTAAG
+GTTATTNTTTTTATTAATTGTTGTTTGGTTGTGTGTGTTGTATGTTGTGT
+AATGATTTGGTTTATTTTTTATGGTAATTGTNGTTGTTTGTGTTTTTATT
+ATTTGGTTTTTTATTGTTTTGTTTTTTATATTTTTTAATTTTATAATAAA
+TATAGTGGGGTTTTTTTTTATTTATTTTTTTATGTGTTTTTTATTATGTT
+ATGATTTGATTTTAGTTTTATTTTTTTTTTTGTAATTTTTTTATTTATTG
+TGTTTTTTTGATAAGATTTATTTATATTTGTTGTATAATATATTAATAAT
+TATTTATATATTATTATTATTTTTATTATTATAATATATATTTATATTTA
+TTTTTTTTTATTTTAGATGATATATAGTTTTATTTATATTTATTTGTATT
+TGTTTTTGGATATTTATTGTTTTGTTTTTTTTTATTAATTTAATTTATAA
+TTAAGATTTTTTATTTTTTTATTAAGTAATAAGATTTATGGNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>RL811F  CHROMAT_FILE: RL811F PHD_FILE: RL811F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:40 2001
+CGCCCCGCGCGACCCCNGGCGCGNNGCGCGCNNNCGGCGACGAGACAGCG
+AGGAGCGAGAGACAATAGCATACATAGAACAGAACTTTGNGGCCCGCCGC
+GCCGCGNNCCCTTTCNTCNGGGNNNNAACCTTGNACNCANCACAAAAAGA
+NTTTNNNNAAGAAAGNAGANTGAACCCGGNCGNCCAGGAGCACGCGCGGA
+GACCNAGCACAAGACGAGCGGGCGCGCCGCCCCGCGGGCCGCGACGCNGG
+GCCGCGCAGGAGAGNAGAGACAGAGAACAGAGCACAGACACAAACAAGCA
+AGCNCNGGCGCGAGACGACGTGGGCNGCGCCGAAANGCGCGGCAGCCNAG
+CCGCCGGCGACGCGAGCGCCACCACCCGCGCGCAGNACGGCTGTGGTATG
+CTACCTGCGTCGGTCTCTCCTCTTGCGCACGGGACTCGTCGCGCCGCGTC
+AGCGCGCGCGACGCGCCCATGTCCCGGCGCTGACCAGGTGCGCTGTGGAG
+GATCGGATCACGCGGGCGTGCTGCGTGATGGGCGAGGTGCACCGGTGCCC
+GCCGTCGACATCGAGACAGGANGACACGNGCAGGAGACTAGTTGTCAGTC
+GATGTGACTGATGCGCGGGCGCGTGTNGTGTCGTCGTATGATCATACTTA
+ATGGAGATGCGTGTGAAGAATTGCCGGCGCACACTGCTACGACACAGTAA
+GGCCATGATGCGGTGCCGTGCCGTGTCCAATCACCACAGCGCAGCGCGCG
+TGGACAAGCNACAGACGGATGACTCGGTGGNGACACACGGCAGNACCGAG
+AGGNAGNCGAAGCACAGGTGCTNATTGTGTTCTATCGTGCTCGGCCNCGC
+ATCTCGAATATACATCGATTGANAGTCGCAGTTGGATAGAAAGAATAACG
+GAACTCCGGGTGGCACGTTCGCACCATCGCACACTCTGCTGTGCGCGACG
+CCCGTGTAANACCTACGGCGAATAGCGCACTATCACTAGCCAGAGCGTGC
+GCCCCGTGCACGCGGCGTACTCGAACTCTGTCACACAAACGTAGCGTCTT
+ACACGCCCGTTAAACATACAGAGGAGCAAGTCGAGATCACAAATCGAGCA
+CCAGCGGTCCCGCACCTGTGGTCAATAACGTATCGTCCGACTACATCAAG
+GCGAGATTGTATAGTAGGCGTATAACATCGCGCACTACATGCTAGAACGC
+AGCATCGAACGTCTACCGAGGGCGTATTCGTAGGCGTCCTCACGCAACCG
+AATAACTAAGTGGCTACGACGATTGAGACGACAGACACGACGACGTATGA
+GCATAGCGATGGACACGACAACACCATGACATAGGCTCCGATCACACATA
+CACGCAACATATCACGNGTTCGACTACGCTACAGCTGAGTGCACGCTCGT
+GTACTTNACCTAGACATCACAACCTAGGACTAACGACGTCGAGCGACGCG
+ACGAGGCGTGCTGCGTNGATAATANTGTTACCTCATCTCACACACATACA
+CCAATCGCCAACATAATATCAATCT
+>RL811R  CHROMAT_FILE: RL811R PHD_FILE: RL811R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:07 2001
+NAAACAAAAAANNNNTTCNNTTNTTTTAAAAAATAAAAAAAATAAAAAAA
+NAANCTACGGGGCGACTGGGAGGCACGGTCCGGGGATTCCGGGGCGGACC
+CAGGCGNCCGGGGCGCGCGCGCGCGAGACGCGACGGAGGGAGCAGCCGGC
+AGAAGTGAGCGTAAGAGAAGACAGGGAGGCAGGCTGCGGACCACACGCAC
+ACGGAGCTCGCTGGCTAGAGTGGCAAGNGGACCGCTCAAAGAGAGACGGC
+AGCCAGAAGAGGAAACCCACACCCCCCTAGCAGCAAGGCTAGCAGCCACG
+GTAGCCAAACAAAGTGGAATAGAGCGGAAGACATCATCGACCCGCGAGAG
+AGTGGGGAACCACGCCGCCGCTGAGGAGCGCGCTACGGAGACCAGAACAA
+CAACCATAGCACCGGGGCAAAGGAGGACGATGACGTAGCCCCAACGCAGC
+GCAGACCAGCCACCACACCCAACCCCGCACGAGCATAGCACACAGCCCGA
+GAGTCCCCCGNCACGATGCAGAGTATCGCCCGTCCGTCGACCGCCGCGGA
+ACACACCGAGATAGGGGGGAAGGGCCCCACGAAGACTCAAGCGCGGGCAG
+ACGCAACAAAAGGACACACCAACAGCCCACCGCTCTCACGAGCAGACTAC
+ACCAAGAAGGACGACGAGACACAGCCCGCGCGCGAGGCGGCTTAAACCAC
+ACCCCACCGGAGCACGCAGAGAGAAAATTCAATAGCCACATCACTGAACC
+CTAGTGGCCCGCGCGCCCCGCCCACGCATACATTCGAAACACAACGGAGC
+ACACACGAGGATGGCCAGCCAACCCCCCGCACGCGCACGCACAGACACGC
+AACCACCAAACCACCCCACCCCCAGTACATGCACCAGCGCAAATGCAGAT
+GCAGTGGTTCGGAGACCAACAGCCACGACACAGCGCCAAACCACACGCAC
+ACCCCCACAACACAGCACACCAAAGCGCCCAGCAACCACAAGAGACACGC
+CACAAACGCAACGGAAACACTACACGCNGCAATACCAAATCAACACACAA
+CCACAACACAAAAACA
+>RL812F  CHROMAT_FILE: RL812F PHD_FILE: RL812F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:44 2001
+TCTTAGTTGGAACATCAAATAACATAAGGTCATATTTCATATCCACCACA
+TTTGGAAGTTCTTTTGCTCGAAAGTAACTACTTCATATAAAATAACTCGG
+ACACAGAGATACAACACACAAGTCACATTTTATTCTTGATAATATCTTCT
+TCTTGTTTGTTTGGACAACATGATAGGAGTTTTCTCTTAAAACATAGGAC
+CAGCAGCGAGAATCTCCTCCGTGAGCTTACCATCCACACCAATCTTGTGG
+CTAGCGAACGTCTCAAAGTTCTTCTTGAACAAACCTCCCAGCTTCAACAG
+AGTCTCCTTGTGTGCGTTCTTATCAGACCACGAGTTGATTGGATCCAAGA
+TCTCTGAAGGTATCCCTTCGATCTCAGTTGGGATTTCAAACCCAAAGATC
+TCCGTCTTCTTGTAGTTCGCCTTCAACAAGCTACCTGAATGGATTGCATC
+GATGATCTTCCTAGTATACGCCAGCTTGATCCTGTTTCCAACACCGTAGC
+TGCCACCAGACCAGCCAGTGTTAACGAGCCATCCAGTAGCACCTTGCGTC
+TTCATCTTCTCGGCTAACATAGCTGCGTACTTGGTAGGATGCAGCATTAT
+GAAAGCTGCACCAAAACAAGCTGAGAATGTTGCTGTTGGCTCCTTGATAC
+CGTCCTCTGTTCCAGCAACCAGAGCAGTGTAACCACTAATGAAGTGGTAC
+ATGGTCTGCGCCAGGTTCAGCTTGCTCACAGGCGGGAGAACACCAAAGGC
+ATCACAAGCCAGAAGTATCACGTTNTTAGG
+>RL812R  CHROMAT_FILE: RL812R PHD_FILE: RL812R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:11 2001
+ATXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCCCACGCGTCCGCTCTTCCATACACTTCAAATGATCTTCTCCTCT
+AATCATTAATCTCTTTGATCTCTCTCAAGTCTTCTTGTTCGTGATCTCTC
+TTCCTTCCCCCGGTAACGGAAAAAGATGTCGGCAGGTAACGGAAATCCTA
+ACGGTGACGGAGGCTTCAGCTTCCCGAAAGGACCAGCGATGCCGAAGATA
+ACGACGGGAGCTAAGAGGGGTAGCGAAATATGCCACGATGACAGTGGTCC
+GACGGTGAAGGCGACGACGATCGATGAGCTCCACTCGTTACAGAAGAAGC
+GTTCTGCTCCCACCACACCTATTAACCAGAGCGGCGCCGCCGCTTTCTCC
+GCCGTCTCGGAGGAGGAGCGCCAGAAGATTCAACTCCAGTCTATCAGTGC
+ATCGTTAGCGTCGTTGACGAGAGAGTCTGGACCCAAAGTGGTGAGAGGAG
+ATCCGGCGGAGAAGAAGGCCGACGGTTCCACTACTCCGGCCTACGCACAC
+GGCCAGCATCACTCCATATTCTCTCCGGATCTAGGAGCCGTGAGTGACAG
+CTCATTGAAATTCACCCACGTCCTCTACAACCTCTCCCCTGCAGAGCTCT
+ACGAGCAAGCTATCAAGTACGAGAAAGGCTCGTTCATCACTTCTAATGGA
+GCTTTGGCGACGCTTTCGGGGGCCAAGACTGGTCGTGCTCCGAGGGATAA
+GCGTGTCGTTAGAGATGCCACGACAGAGGGCGAGCTNTGGTGGGGAAAGG
+GTTCGCCTAATATCGAGATGGATGAACACACCTTCATGGTGAACAGAGAA
+AGAGCCGTTGATTACTTNGACTCCTTTGGAAAGGTCTTTGTCAACGATCA
+GTTCTTGAACTGGGATCCAG
+>RL817F  CHROMAT_FILE: RL817F PHD_FILE: RL817F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:34 2001
+CCAGAAAACGGCCCGTCNCGCCTTCTAATTTGTTTCATGTATAATATTAA
+TTACAAAACACCGCCCTTTTTNGNNNNACTGNAACGAAANAGTTGAGAGA
+GGGGCCGCCGCNCGGGCGGCCGGCCGCCGCCGGAGCGGCCCCGACGCCAA
+AAAGCGCGGGAAGACGGCACAGCANCGGGCGCCCGCGGGCGCCGGCGCCC
+GCACGCGCAGACGGCACAAGCCCCCGCCCCCGGCGCGGAGCGCCACACCA
+AACCCNGAGGGACGCAGCGCGAAACCCCCGCCGACACACCGAGAGCACCG
+GGCGAAAACGCAGAAAGGCCCCAGAGGGTGCCCGCGGGGGGGAGAGCCAG
+AGCGAGCCGACGGGGACAGGAGACCCCGCGGGACGAGCGTCAGTGCGCGG
+CGAGGGCGGGAGGCTGATTTTTTTTTTTCTTCTTTTTTTCTCTTTTCTTC
+TTCTTTTTTCTTGTTCTGTTTTTTGTTATTGTTGGTCTTGTCAGCTGTTC
+GCATTGTGTAGTTATACTTTTTTCATCTCTCTTTTTATATTTTATATTTT
+ATTAGTATTTATTTCTTTCGTTCTATTTTTTGTTATCCATTTTTTTTTTC
+TTATTTTTTTTATCCTCTTTCTTTCCTGATATTATTTTTTCTTTTTTTTA
+TTAGTATGTTTTTGATTCTTCTCTTTTCGTTATTCTTATTTTTCTCTTCT
+TTTTTTTTATTTTTCTTCTTTAATTTTCTTTCTTTTAATTTTTGTCGTTT
+TTTTATATTAGAATTATGCCGTTCTTCTCATTTCTCTACTTGGTTATCTT
+TCTATGCTGTGGTACTATTTTATCGATTTTATTTTCACTATCACCTTACA
+TTCTATATCACCCTTCTATTTTCGTTTATCTTATATCGTTTTTTTGTCTT
+CTCATCTTTGTTCATTTTATTATTTTCTTCACTTTTTTCATTATATTTTA
+TTN
+>RL819F  CHROMAT_FILE: RL819F PHD_FILE: RL819F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:41 2001
+GGCAGCACGAGACGGCGCNCGNNGGAGNNNGGTCGCGGGCCGCCGAGGGG
+AAGCAAAGCGTTCGGCGGGTGGGGCCTACTGGGGGAACGCGCGCCNNNCG
+AGTTGCGAGGGAGCCGTCGCNCAACGCGCAGGGCGANNCAAAGAGAGCGC
+CCGCGCACGGAGGAACGGGCAGAAGAGNGGGGGGGGAGAGGTACGGCGCG
+TGCACGGGATACCTACGCACCCCGATCGCGCCTGCCGCCCGCCCCGCGCG
+GGATGAGCGACGGCGACCAGCCNGTGGTCCGTCGTGCGGAGCCGGCGACC
+CACACGCCGACAGACCGTAACGCGCGGTACGCGCGCCACGGCGATCTGCG
+TGCGAGCCCTTACCCCTTCCGATACACGACAATAACCTAGTAGGATACAG
+ACACGCCTGGCCACAGGTATATAACTATATATATACAANGAAGCCAAATC
+CAATGAAACACTGATAACACGAGAAGCTAAATAGTTGAAGACAAGCCGGC
+GAGAAGGGGGANNGAGACCGGGTGAGTGAGACAGGTACGCCGAGCCGAGA
+GGGCGGTCAGAGATGGGAGCTCGCCACGGGACACGGTAAGCATCGACACG
+GTGAGGAGAGGCACGCAGAAACAGAACGGGCAGAGACTAACCTATCAGAC
+ATGGGAAGGGTAGCGGAATGCCACCCTGGAGACCCGCGACCGCANACGAA
+ACAATCGCCAGACAACACAGAAAAGTANCGTCTCAAAACGGCAGCATCCA
+GCGCCGGGGGACACGGAAGCGGACGACGCCCGGACGTGTTATAAACGAAC
+GCGACCGCTAAGATGGCATCTAAGGCGGAGACAACATAACGACATAAGAG
+ACAACGCGCGACGACGCGAACGTACGCGATGGATCCGAGGAACGCGCAAC
+GAANACGATCGTCTATGCAAACACACAACGAGAAACACGATACGGTTATA
+GGACGAAGATGGAAGTAACGTGGCGAACACATCACCAAATACTATGACGA
+TACANACACGGAGAAGGAGCAGAGTGTACGCGCGGTCCGCTCATCGCGCC
+ACGGCGACCCAGCGAACATGATGGAAGTGTATCGGCGGCGCGGACAACTC
+GAGGAATAAAG
+>RL822F  CHROMAT_FILE: RL822F PHD_FILE: RL822F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:50 2001
+CCCCCCAACGGCNGGGCCGCGGCNGNTGATTTGTCGGGTGTAAGAAAAAA
+TGAAATATAAAACTANAAANNAAGNNGGGCCTTCTTCGNNNNACTTGCCA
+CACNCGGGNCGTTTCCGCGGGAGNCNGACCCNNCAGNGCCGGCAGCGCCN
+CCCGCGCGCCGGCCCCCCCCGCCCGGGCGGCNCNNCCCGCGGGGGGGCGN
+NCGGGCGCGCGNACNCCACAACCCCCCNGCAACACGNCGGCGGGCCCCGG
+CGACGCCGCGGGCGGGGGCGGGCCGCGCGCCCGCGCGCGNGCCGCGGGCG
+CGCGCCGGCACCGGGCCCGGCGGGCCCGCNCCGCGGCGCCCCCCGCCCCC
+GCCCGCCCGCCGGGGGCGCGGGCGCGCGGCCGGGCGCGCCGCGCGCGCNG
+CGCCGCGCGGCGNNCGGGCGNNGNGNGGNNNGGGCCGCGGCAGCGGCGCG
+NAGGCGCGCGCGGGCGCCGACCAGAGACGCGCCCCCCGCGCNCCGCGCGC
+GGCNGAGAGACGCGCGNACGCGACAACACAAAACGAGCGCGCGGCNNGCC
+NGGGGGGGCGGCGGCGACGCGCCGCGCACGCCGCCGGGCCAGGCGCGNGG
+ACGGCGCGCGCCNACGGANGCNCGNNNGCCNNGCGCCGCCGCGCCGGGGN
+GCGCCGCGAACCCGCACCGCCGAGCGGCGCGGGCGGGNGGGGGGAGGGGG
+CGGGGGAGAGCGANACGAGGGCGCGCCGCGGAAAGGGCGGAGGCGGCGAG
+GACGCGCGGCGCCGGCGGCGACGCGGCNGGCGGCCCGGGCNCCAGAGCNC
+AGCGAAACACGAGGGCGCANACGCGNCCAACCGNCACNCAACGCGCCNCG
+AACAGAAGAAGCGCACACGNGCGCGCACGGCGCGNCGCGGCAGGCGAGCG
+ANACCNAGACGAGGAGCAGGCGACGGACGGNGGAGNAGAGNGGNNGGGCC
+CGAGNCGCCGGCGACGCAGACAGAGAGAAGCCGGCCGNGGNCGGCACCGA
+CACGCCGACGCGCGACCGCGGNCGGGCCCGCGCGAGCGCACGCAACACGA
+CGNCGGCGCGCCGNCGCGGNNCNCGCACGCGCGGACGCGAAGCGGCAAGA
+GCGNGACGCGCGAGCGCAGGNGGGNGCNCAGCCGCGCGAGAGGCCGGAAG
+ACGNCGAGCGNGGCNGACGACGCNGACGACGAAGACGNCGCGCGCGCACG
+CGACAACACACACGACGNCGCGCCGCAGCCNGCACAACACCAACACGCAC
+AGCGCACAGACGCCGGCACGAGANCNAGANCACGACGNACCNAGCAGACG
+ACGCGTCNTNGCNTCTCTATGATCTATGCAGTCGTTCTCGAAAGNGTATA
+TAANATGTAGTAGCTAGCTNGNACACAGANTNGNATCCTTCAGACTCATC
+AGTNTGATCNNTTAT
+>RL829F  CHROMAT_FILE: RL829F PHD_FILE: RL829F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:47 2001
+CTTANGAGTAACTATATTTTAAAATTCTCCAAGTTGAAGAAAGTTGTTTT
+ATTCTCAAGGGGACAATTTTGTATTTTCCCACTTTAGTTACAAAGAGGCA
+GTGATCTTCTCAGTATTCCCCCAAATTCAAATACTTAAATCCAAAAAGCT
+TAATTAAGAAAAAGAAACTGCCTAACAAACTAAACCTTGTTGTCTCCATT
+CTCATAATTTCTTCATTATTATTGGGGACGCATATCTACATCAAGCTCTT
+TCCCCCGGTTTTAGCCGAAATCAACGAGCTCAACATCGGACTGGAACACT
+GCATGTCTTGCCCGATCAAAGCTTTGCCTGGGAATACAAGAAGGTTCCGA
+GGATAGCAATGGCAGCTCCAAGAGCGTTAACGGGCTGGACAGGGGTGCGG
+AAGATAATGATGGAGGAGACAATGACTGAGATACGCTTCATGGTGTTACC
+GACACTAAACGTCAAGGGAGAGATTTGGTCTAAAGACATGTAAGACACTT
+GGTTGTAGAGATGATAGAACACACTCTGCGCAGCCACCCACCCGACGAAT
+TGAGGTCCGACGTGGGAGAGAGCCTTTTGCCAGCCATCGATCCACATCTG
+AGGACCTTCAACCGCAATTGCAAAGGGGGTGAGAATCAAGAGTGATAGCA
+TGGAGAGACAAGCATAGTAGTTCATTCCGCTCACAGACTTTCCCTTCATT
+CCCTTCTTTGAGAAGATGTTACGGAACACAAAAGCCAAGTTCGATATCAT
+CGCTCCCATGAAACCAGTCATG
+>RL829R  CHROMAT_FILE: RL829R PHD_FILE: RL829R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:15 2001
+CGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTCTTACTTAATAACGAACGGCCTTCTCCGTGATCAAAGTTTTAA
+TATTTTATTTTAATTCGACGAAAAAGTCTTTTCCTTTTCTCTTTCTCTCT
+CTTTTATTCCGACGATGGTTTTAGCGGCGAAGCAGGCTCTCTCCGCTAAG
+ATCGGGTTCTCCAACCCTCTTTCGCGACGGAACCCATCTTCCCCGCTCCA
+ACGATCACCTCTCGCCGCCTCGTTTCCATCGACGGACCTTCGGAAACGCA
+CCGTTTTAGCCGTCTCCAAGCCTCTGCACCTCTCTCCCATGAGAGCGAAG
+CCTCCGGCGAGACGCGAGGCCTACGAAGCCGATAAGTCGGAGCCTCAGCC
+GATCGATGATGATGCGGCGGAAACGAAGTCGGAGGCGGCGAAGAAGCTGA
+AGATCGGAATCTACTTCGCGACTTGGTGGGCGTTGAACGTTGTGTTCAAC
+ATCTACAACAAGAAGGTGTTGAACGCTTACCCTTATCCTTGGCTTACCTC
+CACGCTCTCTCTCGCGGCTGGTTCGTTGATGATGCTCATCTCTTGGGCTG
+TTGGGATCGTTGAGACTCCGAAAACTGATTTCGATTTCTGGAAAACTCTT
+TTTCCGGTTGCTGTGGCACATACGATTGGTCATGTGGCTGCAACGGTGAG
+TATGTCAAAGGTTGCGGTTTCCTTCACTCACATCATCAAGAGTGGTGAAC
+CGGCGTTTAGCGTTCTTGTCTCGAGGTTCCTTTTGGGTGAAACCTTCCCT
+ACTTCGGTTTACTTGTCCCTTATTCCGATCATTGGTGGCTGTGCTCTCTC
+TGCTCTTACTGAGNCTAACTTCACATGACTGGNTCATGGGAGCGATGATA
+TCGN
+>RL835F  CHROMAT_FILE: RL835F PHD_FILE: RL835F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:41 2001
+GCCGCCGCAGGCCGCCCCCCCCGCCCGGGCGGCGACCCACCCCACATCAA
+AAGAGAAGCCTTGTGGGGACTGGTAGTAGGGTTTCAGGCAGAAGCACATA
+GTGGGGGGCGGTGCGGAGGGGGCCAGAGTGATAAGGGGGGGGAGGAGGGA
+GAAAACGCGCCACCGGCGCCGATCCCGGATGCCGTCGGAGTGGTTGCGCC
+GAAGTAGACGTTAGAAATTAGTGGGTGGAGTGTATATAAATGAGCCTCTA
+TTTGAACTCCCTTCTATCTCTCAGTCGGTGTCCGTGTGCTCCACACGCTT
+TAACTATATCCATCCGCTACGCCACTACTCCGCGAGGCCGCGACAACGGA
+GGTGAAACAGAGGGAGGGCGCGCGTCGCGAGAGCGGAGGGTAGGGGCACG
+CGCGCCGGCGCGGCTGGTGCTGGGTTGCGGAAGATGGGGTCGCGTGCGAG
+CAGAGCGCATCTGGCGATGCAAGGTTAAAGATTATGGTTAGTTTTAGAGG
+TAGGGAACGAAGAGAGAGTGTAGATCTAGAGCTTGTGATACCGCCTATAT
+ACTAAAGCGATTCATCCCCTCTGAGCGTGCCGACGCGCCCGCGCGTGGCA
+CGGCAAAAGTGGTAGCGCATCACGGCGTACACGCGCGCATTCATGAAAGC
+GGCGCGGCACGGAAGGTAAATACTATCGCGCTACGAGACCGAACCGAGCG
+AGAGCATACGAAGTGTNGACAGNCGCGACCGACGACACGACGGCGCGAAC
+GAGGGAGACGCACACAGAAAGACGCGNCGAGATGCGATCGCGAACCGAAC
+GCGTATATGATCCGACAATATACGGCCACCGCACACAGAGAGCACGGTGA
+CGGTGCACACGAGAAAGACATNATATCAGTCACATGCACAGTTCCGAGTA
+GCGGCGTCCAGTTGATATGGGAGGGGAGCCGACGCAAGCGCGGATATAG
+>RL835R  CHROMAT_FILE: RL835R PHD_FILE: RL835R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:08 2001
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNCTCNGGGGACAACAGCNAAGAACCGGTTCCGG
+GAATTNCCGGGGNGGAACCAAACAAGCCGGGCCGGACACCAGCAACACGC
+GCCCAACAAGGGGGGGAGCAGGGAGCAGAAAAAAACAGGACAACAACCGG
+CCCAGCCAAGAGAGAGAACGAAAGCAGCGCGCCCAAAGAAGAGCACACAT
+CGACGCGCAGACCCAAGAGAGCAAANGAGGGAGCACAGCGGAGTACCACA
+CGAGACCACACAAGCCCCCCCGAACGCAAACAGCCACCAGAGCGAAACGG
+CCGCGAAAAGACACAAACGCCACGCAGGACAGAAGGAGACCAAGAACCAA
+CGCCCGCAAGCCCCGGCGAAAACTGAAGACCCACTAGCCCACACCCCGCN
+CACCAGACACACAGAAAAGGACAGTCTACGTCCGCGGCCGCCAAAGCTCA
+TAACACCCACACCAACGCCAAGCACAAAGCAAAGCGGCACAAAAAGGACC
+CGTAGAGAAGCCACACTCCGCAACGCCCCACCCACACCGACCCGACCAAA
+TAGCCGAGGAGACAAAGAACGCCCACCGACACAGAACCCCCAGACCGAAC
+AGCAGAGAAAAGCACAAAAACCACGAACAGAACGAACACACAAAACAGAG
+AAAGGATCAGAAACGCCGCAACAACAGGCCACCCAACACATAGAGCGAAG
+AGTCACGAAGGAAAGCGAGCAACAGAATCCCACACACAACGAAGAGCACA
+GAGCAAAAAGAGAACGCGGGCGACCACACCGCTCCCCCCCTCAGGCCGGC
+AGAAAAAACACGCCACACCCACCACAAAACCCAGGAGTACAACAACAAAA
+NACCAGAAAACAACAAAGCCAACGCAAGGCGCACCACAGACCACCGCAGC
+ACCAAACACGCAAAGCAACATCAGAGAACGCCCGACAGCGGCAAGAAATG
+AGGCACCACGCCGGACNGCACAGGAGGAAGAAAACAGCGAGCGAAGCCAA
+CAGAGACACGGAGAGGAGCCAACCGCACAAACAGAACATGAGGACAAAAA
+CACCAGACAACACCCCACCAACCAGCAGCCTCAAAACACAACAGCGACGC
+GCCACACACACACGACCAGCTCAGAGCCACAGCGAGCACAGAGACCGACG
+CGCAANCCCAACATAACACAGCCGACGACGAACGAAAAACACAGCGACAA
+AAGGGACCCAGAAGAGAAAGCAAGACCATAGAAAGTAT
+>RL844F  CHROMAT_FILE: RL844F PHD_FILE: RL844F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:45 2001
+NTTCGACAAAGCCGAAGAGACAAAGAGAGAGAGGGATTGTATCATTTGCA
+GGCCATAAGCATGGAGTTTATCACGTGCATTGTAAGTCTTGTGGCAGAGA
+CGTCGCTGCCTGGTGCGTTGCGGTGGTGGTGCAGAATTTGATACACTTGC
+TCAAAGATTGGGCGACCATGGACAGCTACCATCGGGTCTGATGGGTTAAT
+TGCTGCAACCACATCGGTCATCCATGCAAGCTTACGGGATGAGTCCTTGC
+TGATGTCGCAGGCTAGCTGTTGCAGCAGTGACAGAAGAACGCCTTGGCTC
+AGAGGAAGCGGGTTCATCGCCAGTAGTCCACGCAGATCCACCTGTGAGCA
+AAGCCATGATACTATGGAGACATCGCTTCTCTGTAGAGCTGAAGTAAATG
+ATTCTTCATATTTGCGTTCCGATATCAACCTTGATAATTCTGTTATTGGG
+TCTGCTTCAACCTTTTCAAGAAGAGCACCCAAAGGTCCACCACTTCGTTG
+AGAAACCAATGTGTTTGATCCACTAGAATTTGCTCCAGCGGCTGCGAGAG
+CTAAGCGATTCCTTTGGCTCTCCGCTAATTCACGGCTCAATGCTTGAGTA
+ACTGACGACGCAGAAGTAATGGTTTCCTTTAAAGTATGAGCAAGTTGGGA
+GTGTCCAGCGTCAAACCGTTGTTGGGCTGCGTTTGTGTGCTCGGCCATCC
+CTTTCTGGAAGGCTGCGTCTACTTGCTCAAACATGGTCTTGCATGACCTC
+TCAAGGAAGGTATGAGCGAGGC
+>RL844R  CHROMAT_FILE: RL844R PHD_FILE: RL844R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:12 2001
+ATTTTGGAAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXAAGCTCAAGAAGCAAGGACAATGATGTTAGGCCTGATGATGATGT
+GTCTGGGATGCGAACCGCACAAGCTTTCTTCAAGCATCCCACGCATCTTG
+TAACTCCTTCGGAGTTTTTGATGCGCGTTCCGTCTACTGAAGCTTCCATT
+ACCACTGAAGACAAAAGGGATAGAGATGCAAATATCCAGGACGTCAATAT
+TGATTCAAGAGACACAGAAGTTGAGGTAAAAGAAGTAGGGGAATCAAGTT
+CGACGCAGAATGGTGAAATCAATTACAGTGATGAAACTGAGCATCGCACT
+TCAGTAAATACAGAAAAAACCTTTTACTCACAGACTACGAACCTGAGCAC
+TGAGATGGCAAGAGACTGTTATCCTGGTACAGAGTTAGAGGAATCTAAGG
+CTTATGAACAGTCTGTACAAGCTGGGGATAATCTTGACTCGAGAGATGTG
+TCTGGAAAGCTTCCTGAGTCGGTTTCATCAATTGGGCTTTCACAGTCAGC
+CGCTACAACCAAAGGGAAGAAGCAGAAGTCGAAAAGCTCACAGGGTCCCG
+GATTGTCATCTACATCCTCAAATGTTGCCAATTTGGCTGATACCTACAAT
+GAGCAAACTCAGAGTTCAAGCCAACCTATCTTAGCTTTGCAAGAAACAAT
+GAATCAGATGATGGTTTCACAGAAGGAGATGCAGAGACAACTGTCCAATG
+CTGTCAATGGCCCTGTTACTAAAGAAGGTAAAAGACTAGAAGTGGCTTTA
+GGGAGAATGATTGAGAGATCCAGCNNAGTCAATGCTGATGCTCTNTGNGC
+CCGCTTTACAGAGGAGATCGTTAAGAATGAAAGGGCATGCGTGACATTCA
+CAGCAATTGT
+>RL845F  CHROMAT_FILE: RL845F PHD_FILE: RL845F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:48 2001
+CCTTTTGGAGACANTAAAGATCCAAACATAACAGTACGAAACACACAGCA
+CACATGCTTGTGAAAGGTGAGAGTACCTCAACTTCACAAGCCATTATTCA
+TTACGAAACAAAAGAAAGATTGCATAAAAACAGAAAGCTCATAAATAGTT
+CTGCTAGATTCCCTCGTCCTTCTCGTCCTCGCTGCTGCAGCTATTGTTGG
+CTGCTCCACTCACACTTCAAGGTAGATCTAGGCGATCCACAGTACTTGCC
+AAGCTTCTCAAACGTCTGATACATAACCGTCCCACTCTCTTCCCCTAGAG
+ACTCAAATGTCCTCTCTCTGCACCCATAGTCTTCCAAGTCCACACTGATC
+TCTGGCTTGTACGCGTTCGATCCAACGCTCGAGTGCACGGCCACAGAGAA
+TTGCTTTGGCTCAAAGCAAGACAGAACCCTTGTCACAAGCTGGCTCAGGT
+CGAGGGTGTTGAAGTCGTAACCCACGGCTTCGAAGCTAGCGTAGCTAAAC
+CCATCCTCAGGGGTCACATGGATAGTGGAGATCGCATCTCCTTCAATGGA
+GTTCATAGAGTAGCCACAGGGCTCGAACTCAAAGTCGCAGATCTGGGACT
+TGGGGAGGATCTTTCTGATTCCAGAGTTGTCGGTCATCGATCCATTCTCC
+CCACCAGTTTCATTCTTGTAGAAGACAGAGGCTTTCTCCTTGTCAAGACC
+AGTCATACACATCTCGAGCGTGTAGACATTGTTGTTGCAGTCGCTGGAGG
+TCTGAGCAGAGGAGGCAG
+>RL845R  CHROMAT_FILE: RL845R PHD_FILE: RL845R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:15 2001
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTCATTGGCTCATCATTACCAAATCATCATAATAAATCGTCATTT
+CTCTCAAAATTAGGGTTTTCTCTTCCGGTTATCTGTATCTGGGGTTTTGA
+AGGTTTCGTTTCGTATCTGGGAAGAAAGGATTCGTTAATTCGTGGTCTAG
+ATCTAAAATTCATATATATAAGGCGTGAATGAGATTATGATGGAATCGAA
+AGGTGGTAATAAAAAGTCGTCGTCTAGTAGTTCCTTATTTTACGAAGCTC
+CCCTCGGTTACAGCATTGAAGACGTTCGCCCCAACGGTGGAATCAAGAAA
+TTCAAATCTTCTGTCTACTCCAACTGCGCTAAGAGGCCATCCTGAGTTGT
+AGCGTGCACCGAGTCCTCCTGCTATAGTTATAGCTTTTTATAATTTCCAG
+TTTATAATTTACTTTTTTCTTTCTAAGCTCCGTTTCTGCTTGGTTCCCCA
+ATCCCATCGTTCTCTCCTCCTACTACAATGGCCTTATCTGCAATCGGTTT
+CGAAGGCTACGAGAAGCGCCTCGAGGTCTCTTTCTTCGAGCCTAGCTTCT
+TCCAAGACTCCAAGGGACTTGGTCTCCGTGCTCTGACCAGGTCCCAGCTT
+GACGAGATTCTCACTCCTGCTGCCTGCGAGATCGTTTCCTCTCTCTCCAA
+CGATCACTTGGACTCTTACGTCCTCTCTGAGTCCAGCTTCTTTGTCTACC
+CTTACCAAGTCATCATCAAGACCTGTGGCACCACCAAGCTCCTCCTCTCC
+ATCCCGCCGCTTCTTAAGCTGGCCGGTGAGCTNTCCCTGAGTGTTAAGTC
+TGTTAAATACACTCGCGGCTCCTTCCTCTGCCCTGGAGGCCAGCCTTTTT
+CTN
+>RL847F  CHROMAT_FILE: RL847F PHD_FILE: RL847F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:29 2001
+CTTGGGGATACGTTCAAAGATGCATAACAAACAAAAAGAAGGTAAATAAA
+TAAAGATCACAAAGCTAACATTCTCACCACGTGGTTGCACGAATTAAAAC
+CAAAGGGGTGAAAATGCAACATACCACACCAACGACCAACTTATTAGTTC
+TAAAAGTAGACACAGACACTCAGTTTATTCAAAGACAGAGAAGTTCATTG
+TTAAGATATACCCAAGTGAAACCTAGGTTGTAAGGATTGGAGGCAAGTAG
+TCAGATTTGGTTCCAAGGATACGAGCCTTTGTGTCAGGGAAGAACTCAAA
+TCCGGGAAACTCAGTGACGTCACCTTCACAGTCTACATCAACAGGGTAGC
+GTAGCAAGTGACCGGGAAGGTCATGTTCAAGTGACTCGCTCGAGTATAAG
+TCCCAATACTTGTCAGAAATGCGGTTAACTTTCTCAATGCATTCCACGCT
+TGATGGATCGAGGAAGGTCTCATCGAGCATTCCTAGGTGTTCGTACCAGA
+GTGACATGCGGAACCCATGGATCTGTCCACGAGCTGGTTGTCTATGTGAC
+AAGTGATGTGGTTGGTAGCCTCCCATTGCAATCTCAGAGTCCCTCGCACC
+ATCCATCGACCTCTGGTTGATGTTAGCAGATCCAATGATGATATATTCAT
+CGTCAACGATCATCATTTTGGTGTGGACGTAGATCATGAAGCGACGTGCT
+TCTTGTGCCTTCATATAGCTCGAGTCAGCGTCTGGTCTCTCTGCTGGCTC
+ATACTCTCCTTCTTTCTTT
+>RL847R  CHROMAT_FILE: RL847R PHD_FILE: RL847R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:55 2001
+CTTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXTTGTGGACAGCGAGATGCCGAGCCGAGGGGGTTCACAGATGAGGA
+GGATCGTGAGTTTTGTTGGTGGGATCGATCTCTGTGATGGACGTTACGAC
+ACTCCTTTCCACTCCTTGTTCAGGACGTTGGACACTGTCCACCACGACGA
+CTTCCACCAGCCTAACTTCACCGGCGCCGCCATCACCAAAGGCGGGCCGA
+GGGAGCCTTGGCACGACATCCACTCTCGCCTCGAAGGTCCCATCGCTTGG
+GATGTTTTGTACAACTTCGAGCAGAGGTGGAGCAAGCAAGGTGGTAAAGA
+CATTCTCGTTAAGCTGAGGGAGCTTAGTGATATCATCATCACACCTTCTC
+CCGTTATGTTCCAAGAGGATCACGACGTGTGGAATGTGCAGCTGTTTAGA
+TCCATCGACGGTGGAGCTGCTGCTGGGTTCCCCGAGTCGCCTGAAGCTGC
+TGCTGAAGCTGGTCTTGTGAGTGGTAAGGATAACATCATTGATAGAAGCA
+TCCAAGATGCTTACATTCACGCTATCCGTCGCGCGAAAGACTTCATCTAC
+ATTGAGAATCAGTACTTCCTTGGAAGCTCTTTTGCTTGGGCAGCTGATGG
+TATCACTCCTGAGGACATCAACGCTCTGCACTTGATCCCAAAAGAGCTGT
+CCTTAAAGATCGTTAGCAAGATTGAGAAGGGAGAGAAGTTCAGGGTTTAT
+GTTGTGGTGCCGATGTGGCCGGAAGGTCTTCCGGAGAGTGCATCAGTGCA
+AGCTATATTGGATTGGCAGAGGAGGACCATGCAGATGATGTACAAGGATA
+TTGTTCAAGCTCTTAGGGCTCANGGCTTAGAGGAAGATCCTAGAACTATC
+TG
+>RL849F  CHROMAT_FILE: RL849F PHD_FILE: RL849F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:36 2001
+GCCCGTTCTCGGCCACTCATAGGCGTTCCACAATTTCATACAGTGAGTTG
+GTGGGCCCTTTATTGAAGCCCATTATCTTATGAGAAAAAAAAAAAACATA
+AACATATGTTCGACACTTCTCACAACAAAGCTTCGTTTTCGACTTTGATT
+AAACCATTACTGTGTTCACTTCTAATCACCATTACAATCTTGGAACTTGA
+GATCGTTTTCCCTCTGAGTTTCATCAAATGCGTAAGAACCTCATTCATAT
+CGGGCCTATCCTCAGGTCTAGAGCTTGTACAGTACAAACAAAGCTTAAAC
+AAATCTCCAACAGCCTTTTCCTGCTTCCTAGTAACAATAGATAACCCAAT
+CTCCGAATCAAGAACCCTGATCATCCCTTCCACTCCATCTCCAACCGCTT
+CCTCCACCAATTGACGCAGACTCACCCCTTGTGATTCTTCATCGATCACC
+GAAGTCGGTCTCCGTTTCGTCATCAGCTCCATCATCACAATCCCGAAGCT
+GAACACGTCCGCTTTCGTCGTCACTTTCCTCATATAAGCAAACTCTGGAG
+CTAAGAACCCAATCGTCCCCTCGAAGGCTAACGTAGAAGCTGTAACGCTT
+CCATCTTCTCGTAACCCTAGGATCCTCGCAGTCCCAAAATCGCTGACGTG
+AGCAACTCCATCTCCGTCGAGGAGTATATTCGCCGGCTTCAGATCGCAAT
+GCACGATCGGGAATCCAAATCCAGAGTGAAGGTAATCGATTCCGCTAGCG
+ATATCAACGCACAGATCGATTT
+>RL849R  CHROMAT_FILE: RL849R PHD_FILE: RL849R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:02 2001
+AGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXCGAATCTGGGGATTCTTACAAACCTTGGGAACCTTTCTGCACA
+CGACAATCTTCTTACCGGGTCAATACCTTCTAGCATAAGTAACTGCACCA
+GTCTTAAAGTCTTGGACCTGTCTTATAACCAGATGACTGGCAAGATTCCT
+AGGGGTTTAGGACGGATGAATCTTACATTACTTTCTCTTGGGCCGAATCG
+GTTTACTGGCGAAATTCCGGATGATATCTTTAACTGTTCAGACTTGGGAA
+TTCTTAATCTGGCACAGAACAACTTCACAGGAGCAATCAAGCCATTCATT
+GGGAAGCTTCAGAAGCTACGGATATTGCAGCTATCTTCTAACTCTCTCAC
+TGGATCAATTCCTCGAGAGATAGGGAATCTCAGAGAGCTGAGTCTCTTGC
+AACTTCACACTAATCATTTCACGGGAAGAATCCCGAGGGAGATATCAAGT
+CTGACTCTACTGCAGGGGCTTGAGCTGGGTAGAAACTATCTCCAAGGTCC
+AATTCCTGAAGAGATATTTGGTATGAAGCAACTCTCAGAGCTCTATCTGT
+CCAACAACAACTTCTCAGGTCCAATTCCTGTCTTGTTCTCGAAGCTTGAA
+TCTCTTACCTACTTGGGTCTTCGTGGAAACAAGTTCAACGGGTCTATCCC
+TGCAAGCCTCAAGTCACTTTCGCATCTCAACACATTGGATATCTCCGACA
+ATCTTCTCACCGGAACCATCCCTAGCGAGCTGATATCTTCGATGAGAAAC
+TTGCAGCTTACCCTCAACTTCTCAAACATTTATTGTCAGGAACCATCCNN
+CATGAGCTTTTGGAAGCTAGAATGGTTCAAGAGATCGACTTTTCAN
+>RL855F  CHROMAT_FILE: RL855F PHD_FILE: RL855F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:30 2001
+GCTTACTGCTAACCACATGACCATGTCAAGAAGAATATCAAAAAGACCGG
+CAAACATTCGCTGAGCTGCTTTATTCAAACAAGGGTAATAAAACTGAAGA
+AAGCTGGGAAAAAAGTGAAGAGGGTTTTCTTGGTGATTGTACACATCATC
+ATAATATTTTCTAGCGAGCGTCCTTGGACAAAAGGCCATGCGATTCAACT
+TTCCTTGCGTTGCGAAGCTCATCAACTCCATCTCCTCTCTCAAACACAGC
+CGCTGCACCCATCCCCGTCCCAATGCACATTGAAACTACCCCAAATCGAC
+AGTCTTTTCCGCGGCGTTTCATCTCATGCAACAAAGTAGCAACGCAACGT
+GCTCCTGTAGCACCCAAAGGATGTCCTATGGCCATTGCGCCTCCGTTGAC
+ATTGATTTTCTCTGCGTCGAGTCCCAACTTGTTACGGCAATAAACAAACT
+GAGATGCAAATGCCTCGTTGATCTCAAACAAGTCAATGTCATCGAGTTCT
+AAACCAGCAGCCTTAACTGCAGCAGGAATGGCAACTGCTGGACCAACTCC
+CATGATTGCTGGGTCAACACCAACTGCAGCACATGTCCTGAATACACCAA
+GAACGGGAAGCCCTTTCTGCGCAGCACCACTCCTTCTTATGAGAAGAACC
+GCTCCAGCACCATCACTAACTTGGCTGGAGCTAACAGCTGCTGCGGCCTC
+ATCCTTTTTAAACACTGGCTTCATCTCCCAACAGCACCAGGGTTGTGGCT
+GGCTCGATTCCATCACTCACAAAAN
+>RL855R  CHROMAT_FILE: RL855R PHD_FILE: RL855R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:55 2001
+ATTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXACACAAATCCTTTGGTCAGGATGGAGGGGCAATCGAGAGACAAAG
+AGTTCTTCTTGAACATCTCCGTCCTTCTTCTTCTTCTTCCTCTCACAGCT
+TTGAAGGCTCTCTCTCTGCTTCAGCTTGCTTGGCTGGGGATAGTGCTGCG
+TATCAGAGGACCTCTCTCTATGGAGATGATGTAGTCATTGTTGCGGCACA
+TAGGACGGCACTATGCAAGTCGAAACGTGGCAATTTCAAGGATACTTATC
+CGGATGATCTTCTTGCACCTGTTTTGAGGGCATTGATAGAGAAGACGAAT
+CTGAACCCAAGTGATGTTGGTGACATTGTTGTCGGTACTGTTTTGGCACC
+GGGATCTCAGAGGGCCAGCGAGTGCAGGATGTCTGCTTTCTACGCCGGTT
+TCCCTGAAACCGTTCCCGTTAGAACCGTGAATAGACAATGCTCTTCTGGG
+CTTCAGGCTGTTGCTGATGTAGCCGCCGCCATCAAAGCTGGATTTTATGA
+TATTGGTATTGGGGCTGGATTGGAGTCCATGACCACTAATCCAATGGCAT
+GGGAAGGGTCAGTCAACCCAGCGGTGAAGAAGTTTGAGCAAGCACAGAAC
+TGTCTTCTCCCTATGGGTGTTACTTCAGAAAATGTGGCACAACGATTTGG
+TGTCTCAAGGCAGGAGCAAGATCAAGCTGCTGTTGACTCGCACAGAAAGG
+CAGCTGCTGCTACTGCTGCTGGTAAGTTCAAGGATGAGATCATTCCGGTT
+AAAACCAAGCTTGNTGACCCAAAGACAGGAGATGAGAAACCCCATTACAG
+TTCTGTGGATGATGGGATCCGACCACCACACCCTTGCTACTCTTGGAAGC
+TGAGCCAGT
+>RL856F  CHROMAT_FILE: RL856F PHD_FILE: RL856F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:33 2001
+CCCAAAACCGCGNGGGGGNGGGCGNGGCCTGCGGGAGAAAGAAAAATGGC
+CATCNGCTTCGGGNNNCNNNNCTTGNTCGTNTACTGCCACCAAANNCTNN
+NNNGGGGNNAAACCNNGNGGNGNGGGGGNNGGGGGGGGGGGGGCGCCCCG
+GCCNGNCGCGCCCGCGGGGCGCGNGCGCGGGGGGCGGGCGCGGCGCGGCG
+CNCGCGCGCGGCCGGCGCCCGGCGGGGGCGGGCCCGCCCCCGCCGCGCGC
+GGGCGGCGGCCCGGCCCGCCCGCGGCGCCGCCCGCCCCCGCGCGCGGCCG
+CCGCGCGCGCGGCGGCCCCGGGCGGNGGGCCGCGGGGGGCGGGGCGCGGG
+CGCGGGCGGGGNNGGCGGCCCGCGGGCGCCGGGCGCCCCGCCCCGCCCGC
+CGCGCGGCGCCGCCCGGGGGCGCCGGGGGGGGCGGGGCGGGGGCCCGGGC
+GGGCGCCCGGCCCCGCGGCGGCCGGCGCCGCCGCCGCCCCGCCCGCCGGC
+GCGCGGCGCCCCCCGGCCCCGCGCCCCGGGGCCGGCCCGCGCGGGCCCGG
+NGCGCGCCCCGGGGCCGGCGCNGCGCGGCGGNNCGCGGGCGGGGGGGCCG
+GCCNCCGCGCCGCGGGCCGCCCGCCCCCCGGCGCGCGCCGCGCCGCGCCG
+CGGCGCCGGCCCGCGCGCCGCGCGCGCCCGCGGGCCGNGNCGCCNCGCCC
+CGGCGCCGGCCGCGGCCGCGCGGGCGCGCGCCCCGCGGGCCCCGGCGNCG
+CGCGNGCCGGCGCGGCCCGGGCNCGGCGCCCGGCGGCCCNCCGCGCCGCG
+CCGGCGCCGCNGCGGCGCGGCGGCGGNGCGCGAGCCGGCCCCCGCGGCGC
+CGCCGCGCGCGCCGCGTGNGGCGNCGGCGCNCGCGGCCGNGCGCGTCCCG
+GCGCGGGNGGCGGNCGGCGTCGCCCGCGTCGTCGCGCGTCGGCCCCGTGC
+GNCCGCCGCTCGGCGTCCGTCGCCGCTCTCCGCGTTTCCGCGTGTTGCGC
+GCGCGACGCGGCGNCGCGCGCGCTGGCCCCGTCGCGCGTCCCCGCGTCGT
+CTGCCGTGTCCGCCGCCGCGTCGCGCGNGTCGCGGTCGTGCGCCGTCNTC
+NCGCGCGGGCTCGCCGGTGCGCGCNCGTGCCGCCGCGGCGCGCTGCGTGC
+GCCGCGGTCCGCTNTCCTCGCGCGCGACACACAACACGAACAATCNCCGC
+AATTCCNTTA
+>RL856R  CHROMAT_FILE: RL856R PHD_FILE: RL856R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:58 2001
+NAACAAAATGCNGGGGCNGNTTGTTTGATGGAAAAAAAAAAAAAAAACTA
+AAAANNNNNCCTTTGCGACGACTGCAGGAACCGGTCCGGAATCCAGCGTG
+GACCCACGCGGCCGATGGCGGCGCCTGCTTCCCGCGCCGGCGGGGGCCCG
+GCCCCGCTAGGCCGGTCCGCCCCAGACACCGAACGCCCGGGCCCCGCGGA
+GGCGCCGAGCGCGCGGAGCGGGGTGACCCCACCGTGCGCCCGAGCGCGCG
+CCCCCCCCCCCCCCCGCGCCCGCCGGCCCCCGCGCCCCCCCGGCCCCCCC
+GCGGAGGGGGGCGCACGAGCGAGGCCCCCCCCCCCCTCCAGCCGGCGCCC
+GACCCCCCCCAGGCGCCCCCACCGGCCCGCCGCGCGGCGGTGGGACCCCG
+GCCCGCCGTGCGCGCCCCCGCCGCCCCCCCGCGACCCGAAAACGCACAAC
+ACCAACCCGCCCTCCGCCCCCCGCGCAAGGGGCGCGGCCGCACTTCGCGT
+GCCCGCCGCCGCGCCGGGCCGGCGCAAGACAAACCCGCGACGCCCCGCGG
+GGAGTGAGGCACTCGGTGGGAGCCCCCCCCGGAGCCGCAGACGCAGGCAC
+GAGAGACAATCCGCGCGCCAGAGCCGACCGCCGCGCAACAGCCAAACCGC
+CCCGCCCGCCCCCGCCAGCGGCGAACCCGCACGACAGGGACACAGGCCAC
+CCGGCAGCCAGCACAGCCGACGAGAGGGCCACCCGCCAGACCAGCCCCGC
+ATGGCGCCCCCGAGCCCGACACAGAGTCGCAACAAGACACGCCGTGAGCC
+CCGCCCGGAGCCAAAGAAGACCGACAAGGACGGAGCGAGCCGCACGCCCG
+CCGCCACGAGCCCCGCCGCGCGGCGGCCGCCACAGAGAGAAACCACACCC
+GCAGCCACGCCCCACCGTCCCCCCACTACACACGCCCCCCACACCACACA
+CCCCCCAACCCCCGCCGCCCCGACGCGCCGCAACCCTCCCCCCCCACCAC
+GAACGAAAAACAAAGAAAACAAACACACACG
+>RL859F  CHROMAT_FILE: RL859F PHD_FILE: RL859F.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:44:42 2001
+GCTTCTGAGATACATCACGAAATCCAACATTTTGGTCTTAGGAATGAAAC
+AGACAACATGAGGATATCAAACTGATTTTGTGTTTAAACAGACAACATGA
+AGGGAGGAAACAAGTGAGAGCGTTTGAGATGGGATAAAGACAACCAGTAC
+ATCAAGTTGTTTGACTAGAGAGGAAGAGAGCTTTTCATTTTTTTCTCCAT
+CATTCCTCTGCAGCACTAGCTGCGTTAAGGTCCACACTTAAATCCAGTTC
+CTTGCCGGTGATAAGTTTATACATGTTCAGCACATCAACAACAGTAGTAT
+CAAAGTGTGTTGTCGCATCGTAACTCGTTGAAATGAAGCAGTTGTCCAAA
+GTGGGTTCGTTGACAGGCTCGTATCTATCATAGATGTCGAAGAACAGCTC
+ATCAACTTGGCCAAGAAGGTCGATTCCAGGCTTCAGCTCGGTCTGTGGGT
+TGTCAGTCTCTGCGTCTGTCGACACAAATGCTATGAATTTTCCCTTGGGA
+GCAACGTTGTGGGAATACGAACAGCAGAAGACATACATGTCTGATTTGCG
+GCCCAATTGCTTTTGAGGTAGGATGACTTGCACCGAGTGAGAATCATTGG
+TGTTTGGAATAGGGTGGCTCATAATGGCAATCGCCCTAGCAACCCTCCCA
+ATCTTCCTCACCTTGTTGGGCAGGTAAGAAGGATCGCAGACAACTTTCTT
+GCATTTGGCAGTCTCTCCCTCAGATGTAACACCCACAACCTTACCTTCCT
+CATCGAACTCCACCTTGCACTCAG
+>RL859R  CHROMAT_FILE: RL859R PHD_FILE: RL859R.phd.1 CHEM: term DYE: big TIME: Thu May  3 10:45:09 2001
+ATTCTGGXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXCTCCTACCAAAAAATAAAAAGTCCCTCTCTTTCTTCAGCGATCAGC
+CAATCATGGATGAAGAGTACGAAGTTATCGTTCTCGGTACTGGTCTCAAG
+GAGTGCATCCTCAGTGGCCTCCTCTCCGTCGATGGCATCAAGGTACTTCA
+CATGGACAGGAATGACTACTACGGTGGAGAGTCAACTTCACTTAATCTCA
+ATCAGCTGTGGAAGAAGTTTAGGGGAGAAGACAAAGCTCCTGCTCATCTA
+GGTTCTAGCAGAGACTACAATGTTGACATGATGCCCAAGTTTATGATGGC
+GAATGGGAAGCTTGTTCGTGTCCTTATCCACACAGATGTGACCAAGTACT
+TGTCTTTTAAAGCTGTGGATGGGAGTTACGTCTTCGTCAAAGGCAAGGTT
+CAGAAGGTGCCAGCAACTCCAGTGGAGGCCCTCAAATCTTCTCTTATGGG
+TATTTTTGAGAAACGTCGTGCTGGAAAGTTCTTCAGCTACGTTCAGGAAT
+ACGATGAGAAGGATCCGAAAACACATGATGGAGTTGATTTGAAGAGAGTT
+ACAACAAAGGAGTTGATTGCGAAATTTGGTCTGGATGAAAACACTATTGA
+CTTTATTGGTCATGCGGTGGCGCTTCACAGTAATGATAGCCATCTCCATC
+AACCTGCTTATGATACTGTGATGAGAATGAAGCTCTACGCAGAGTCCCTT
+GCTCGTTTCCAAGGAGGTTCACCGTATATCTATCCTCTCTATGGGTTGNG
+AGAATTGCCTCAGGCGTTTGCACGACTTAGTGCTGTCTATGGTGGGACTA
+CATGTTGAATAACCTGAGTGCAGGTGGAGTTCGATGAGGAAGGTAAGTTG
+TGGG


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/acefile.singlets
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/adh.mb_tree.nexus
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/adh.mb_tree.nexus	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/adh.mb_tree.nexus	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+#NEXUS
+begin trees;
+   translate
+     17 d.tsacasi,
+     22 d.texana,
+     54 z.tuberculatus,
+     14 d.orena,
+     18 d.crassifemur,
+     48 d.planitibia,
+     19 d.montana,
+     43 d.affinidisjuncta,
+     49 d.heteroneura,
+     25 d.borealis,
+     37 d.mulleri_2,
+     53 d.hawaiiensis,
+      7 d.madeirensis,
+      9 d.lebanonensis,
+     12 d.mauritiana,
+     36 d.mulleri_1,
+     32 d.hydei_1,
+     21 d.buzzatii_2,
+     52 d.mayaguana,
+     38 d.navojoa,
+     24 d.lacicola,
+     39 d.virilis_1,
+     28 sc.albovittata,
+     23 d.lummei,
+     10 d.melanogaster,
+     16 d.yakuba,
+     42 d.grimshawi,
+     31 d.arizonae_2,
+     44 d.nigra,
+     45 d.mimica,
+      8 d.willistoni,
+     33 d.hydei_2,
+     35 d.mojavensis_2,
+      4 d.miranda,
+      5 d.ambigua,
+     13 d.teissieri,
+      2 d.pseudoobscura1,
+     47 d.picticornis,
+     41 d.silvestris,
+      6 d.subobscura,
+     34 d.mojavensis_1,
+     50 d.differens,
+     46 d.adiastola,
+     51 d.wheeleri,
+     15 d.sechellia,
+     11 d.simulans,
+     26 d.flavomontana,
+     40 d.virilis_2,
+     27 d.americana,
+     20 d.buzzatii_1,
+     29 d.immigrans,
+      3 d.pseudoobscura2,
+     30 d.mettleri,
+      1 d.persimilis;
+   tree rep.1 = (((22:0.157963,54:0.125585):0.018579,14:0.027001):0.134996,(17:0.023317,((18:0.172811,(48:0.153690,(19:0.120099,((43:0.049425,49:0.080196):0.145571,(((25:0.014888,37:0.157056):0.020382,53:0.082720):0.110316,((((7:0.039223,9:0.129119):0.180479,12:0.167484):0.107546,(36:0.143350,32:0.037119):0.126282):0.083525,((21:0.150729,(52:0.112407,(38:0.005115,(24:0.127020,(39:0.083231,(28:0.121717,23:0.086451):0.112299):0.008499):0.120916):0.119008):0.115839):0.082094,(10:0.150963,(16:0.040991,(42:0.138176,((31:0.034408,44:0.128276):0.043900,(45:0.175700,(((8:0.047586,33:0.158794):0.065235,35:0.048337):0.029596,(4:0.000947,(5:0.050439,(13:0.174097,2:0.081204):0.149235):0.166073):0.094284):0.158802):0.012646):0.018672):0.133240):0.177054):0.014718):0.017657):0.032351):0.009559):0.070867):0.105545):0.050691):0.031521,(47:0.091158,(41:0.111627,(6:0.011247,(34:0.012016,(50:0.172260,(46:0.000766,((51:0.109803,(15:0.160931,(11:0.071819,(((26:0.075067,40:0.157040):0.113187,27:0.173541):0.018777,(20:0.074769,(29:0.074750,3:0.122801):0.079648):0.059964):0.053481):0.089055):0.139545):0.064332,30:0.012602):0.001813):0.031837):0.116060):0.160691):0.045201):0.001897):0.164978):0.077134):0.031565,1:0.014800);
+   tree rep.100 = (((22:0.157963,54:0.125585):0.065002,(17:0.038858,((18:0.172811,(48:0.153690,(19:0.076261,((43:0.049425,49:0.080196):0.145571,(((25:0.014888,37:0.157056):0.104016,53:0.082720):0.027222,((36:0.139635,32:0.037119):0.204371,((21:0.150729,((24:0.127020,(39:0.083231,(28:0.121717,23:0.075231):0.112007):0.007485):0.120916,(38:0.117588,52:0.116324):0.019183):0.004655):0.030382,(10:0.150963,(16:0.050516,(42:0.138176,((31:0.034408,44:0.128276):0.043900,(45:0.175700,(((8:0.045541,(5:0.050439,(4:0.000947,(13:0.174097,2:0.081204):0.096513):0.041065):0.337782):0.068564,33:0.158794):0.069085,35:0.046885):0.062082):0.012646):0.019251):0.133240):0.129116):0.098729):0.063630):0.031512):0.008868):0.069030):0.102165):0.049377):0.030704,(47:0.091158,(41:0.111627,((6:0.011247,(34:0.047703,(46:0.000766,(50:0.081921,((51:0.042867,30:0.071396):0.059062,(15:0.160931,(11:0.071819,(((26:0.062302,27:0.173541):0.008927,40:0.157040):0.125217,(20:0.074769,(29:0.074750,3:0.174294):0.044187):0.002981):0.111490):0.086250):0.036449):0.181165):0.074995):0.116060):0.117958):0.013739,((7:0.222720,12:0.057596):0.112188,9:0.129119):0.107546):0.028495):0.001848):0.160703):0.070932):0.017780):0.030528,14:0.027001,1:0.076733);
+   tree rep.200 = (4:0.140083,(10:0.046455,(((44:0.303242,3:0.046149):0.007085,(12:0.264171,(39:0.010329,(28:0.010442,((((13:0.164415,32:0.085013):0.012181,36:0.088070):0.239645,((47:0.045731,42:0.055141):0.028604,(43:0.037115,(53:0.094489,(45:0.170989,17:0.084207):0.037785):0.081498):0.028893):0.075241):0.031188,(((11:0.053767,15:0.007988):0.365837,41:0.009924):0.106798,(((14:0.020334,(16:0.003141,(52:0.009450,(31:0.065281,9:0.081911):0.025622):0.205320):0.130518):0.130135,((((20:0.069017,21:0.001805):0.095042,((22:0.000992,27:0.055347):0.028541,((33:0.048426,(37:0.055803,((((5:0.197727,35:0.126381):0.024362,7:0.110282):0.037632,48:0.012853):0.004593,49:0.069144):0.048461):0.043626):0.044665,40:0.137528):0.041607):0.174554):0.131272,25:0.119919):0.026458,(19:0.043106,(24:0.036835,26:0.141457):0.001981):0.023511):0.045316):0.004905,(30:0.164116,23:0.139063):0.051306):0.045154):0.027607):0.184355):0.092816):0.111078):0.017810):0.011860,(51:0.124000,(((((((6:0.036025,(8:0.044154,18:0.131440):0.010261):0.051278,50:0.092479):0.015989,(54:0.138965,29:0.038900):0.117072):0.019978,2:0.066155):0.055828,34:0.084369):0.007028,38:0.132118):0.116475,46:0.095168):0.081231):0.099059):0.057599):0.092299,1:0.155488);
+   tree rep.300 = ((((41:0.074469,(47:0.070075,(18:0.170195,(48:0.216005,(19:0.155284,((43:0.049425,49:0.080196):0.153125,(((25:0.078322,37:0.160449):0.035496,53:0.112645):0.008125,((36:0.139635,32:0.052845):0.168416,(((24:0.127020,(28:0.121717,(39:0.083231,23:0.078065):0.115864):0.008696):0.052982,((42:0.138176,((31:0.034408,44:0.124188):0.013267,(45:0.117313,(((8:0.045541,(4:0.000947,(5:0.086055,(13:0.209025,2:0.081204):0.024175):0.011399):0.337782):0.105831,33:0.158794):0.069085,35:0.069308):0.031964):0.003124):0.025722):0.259725,(10:0.042425,16:0.110297):0.039735):0.191014):0.068713,(21:0.135116,(38:0.117588,52:0.111942):0.022356):0.011332):0.059385):0.037720):0.000909):0.014457):0.003132):0.007537):0.005212):0.020328):0.031939,(9:0.157964,(6:0.071972,((7:0.222720,12:0.057596):0.020190,(34:0.039949,(((50:0.016105,46:0.110602):0.081029,(((26:0.056793,(27:0.064620,40:0.125765):0.033330):0.043797,(20:0.074769,(29:0.074750,3:0.181634):0.046047):0.017717):0.027723,(11:0.106754,15:0.040884):0.191869):0.021053):0.006087,(51:0.042867,30:0.071396):0.058507):0.170142):0.250851):0.044118):0.046820):0.067374):0.068167,(22:0.158868,54:0.125585):0.065002):0.043176,(17:0.044466,14:0.016232):0.030029,1:0.254144);
+   tree rep.400 = (((3:0.054881,((12:0.149557,(((((36:0.011361,32:0.091895):0.227251,13:0.164415):0.072047,((47:0.045731,42:0.045457):0.028604,(45:0.170989,(43:0.037115,(53:0.094489,17:0.151875):0.010276):0.036399):0.002994):0.069442):0.023635,(((11:0.053767,15:0.007291):0.341002,41:0.082941):0.018099,(((9:0.107703,(52:0.022211,31:0.065281):0.129209):0.052897,(14:0.052534,16:0.002995):0.119894):0.152800,(((37:0.055803,(33:0.048426,((49:0.069144,48:0.018638):0.011130,((5:0.203993,7:0.107996):0.003241,35:0.116375):0.033522):0.046802):0.027947):0.066914,(40:0.005572,(22:0.000992,27:0.055347):0.074961):0.262467):0.011952,(((26:0.044133,(19:0.021968,24:0.034463):0.004692):0.090414,25:0.110170):0.081060,((20:0.069017,21:0.001768):0.142084,(30:0.164116,23:0.139063):0.039907):0.003682):0.029602):0.039057):0.033620):0.024068):0.165826,(28:0.013535,39:0.149488):0.042895):0.097397):0.002867,44:0.296540):0.111579):0.011860,(51:0.124000,(((((2:0.086296,(54:0.138965,29:0.046449):0.131077):0.002123,((6:0.036862,(8:0.044154,18:0.134018):0.044121):0.029918,50:0.100654):0.002909):0.054425,34:0.064165):0.004946,38:0.012104):0.078140,46:0.095168):0.024181):0.112252):0.007356,(10:0.095486,4:0.148723):0.005218,1:0.069766);
+   tree rep.500 = (((3:0.033638,(46:0.095168,(((((2:0.086296,(54:0.138965,29:0.046449):0.131077):0.002123,((6:0.036862,(8:0.044154,18:0.134018):0.075578):0.033011,50:0.104237):0.001729):0.055197,34:0.064165):0.068011,38:0.012104):0.033603,51:0.068707):0.046721):0.129746):0.016252,((44:0.300154,(28:0.013535,((((((11:0.053767,15:0.007291):0.341002,41:0.102068):0.016061,(((9:0.074116,(14:0.052534,16:0.002995):0.119929):0.086773,(52:0.022211,31:0.026627):0.084519):0.050469,(((37:0.080049,(33:0.086746,(35:0.116375,((5:0.203993,7:0.107996):0.038960,(49:0.069144,48:0.018638):0.011130):0.030953):0.018647):0.012069):0.016175,(40:0.005572,(22:0.003777,27:0.055347):0.065961):0.240988):0.011172,((20:0.069017,21:0.001768):0.142084,(23:0.139063,(30:0.145755,((26:0.044133,(19:0.021968,24:0.034463):0.004692):0.090414,25:0.110170):0.120926):0.016964):0.000632):0.034642):0.047233):0.001381):0.018795,((47:0.046981,(42:0.008165,((53:0.060089,43:0.027814):0.025023,17:0.151875):0.025364):0.041768):0.031422,45:0.085737):0.049032):0.030224,((36:0.010390,32:0.091895):0.227251,13:0.348171):0.010029):0.007993,39:0.116947):0.096577):0.017004):0.154726,12:0.149557):0.028553):0.018993,(10:0.095486,4:0.003046):0.005218,1:0.009243);
+   tree rep.600 = (((10:0.095486,4:0.003046):0.005343,3:0.021494):0.009169,(((((((2:0.086296,(54:0.138965,29:0.046449):0.131077):0.002123,((6:0.089047,(8:0.044154,18:0.128126):0.075578):0.018450,50:0.104237):0.001729):0.055197,34:0.111462):0.032442,38:0.011793):0.036585,51:0.040738):0.061847,(46:0.095789,((((36:0.010390,32:0.091895):0.227251,(39:0.103153,13:0.335504):0.007758):0.003709,((((47:0.046981,(42:0.008165,((53:0.057021,43:0.027814):0.029252,17:0.151875):0.018562):0.041768):0.029268,45:0.085737):0.078836,(((((37:0.080049,(35:0.116375,(33:0.086746,((5:0.203993,7:0.107996):0.036763,(49:0.069144,48:0.010749):0.027480):0.053627):0.010414):0.002045):0.014092,(40:0.005572,(22:0.003777,27:0.055347):0.065961):0.240988):0.013061,((20:0.069017,21:0.001768):0.139885,(30:0.145755,(23:0.074823,((26:0.038522,(19:0.020894,24:0.034463):0.004692):0.007915,25:0.110170):0.103839):0.021174):0.058565):0.059773):0.038037,(52:0.022211,31:0.026627):0.037033):0.068284,(9:0.137424,(14:0.052534,16:0.002594):0.077089):0.080195):0.023779):0.009356,((11:0.053767,15:0.007291):0.341002,41:0.102068):0.016061):0.015006):0.096982,(28:0.029431,44:0.300154):0.111298):0.065358):0.013070):0.103558,12:0.149557):0.021459,1:0.010304);
+   tree rep.700 = (((10:0.095486,4:0.003046):0.001620,(((((((2:0.086296,(54:0.138965,29:0.046449):0.131077):0.002123,((6:0.089047,(8:0.044154,18:0.128126):0.075578):0.018450,50:0.104237):0.001729):0.055197,34:0.111462):0.032442,38:0.011793):0.035039,51:0.040738):0.062315,((((36:0.009831,32:0.091895):0.144482,(39:0.099656,13:0.335504):0.007758):0.070347,((((47:0.046981,(42:0.008165,((17:0.151875,43:0.004931):0.021655,53:0.041322):0.054598):0.039002):0.036883,45:0.085737):0.077772,((((((33:0.086746,((5:0.101655,7:0.107996):0.036763,(49:0.069144,48:0.010749):0.029499):0.156790):0.010414,(35:0.057180,37:0.085725):0.060494):0.011283,(40:0.005572,(22:0.003777,27:0.055347):0.065961):0.240988):0.013043,((20:0.069017,21:0.001768):0.139885,((23:0.023018,(24:0.034463,(19:0.016869,(26:0.033856,25:0.110170):0.001843):0.002865):0.102246):0.103048,30:0.145755):0.029617):0.059773):0.034692,(52:0.027299,31:0.026627):0.037033):0.026735,(9:0.137424,(14:0.052534,16:0.002594):0.077089):0.080195):0.023779):0.009923,((11:0.053767,15:0.007291):0.341002,41:0.102068):0.015241):0.016285):0.002109,(46:0.104690,(28:0.117042,44:0.300154):0.090580):0.034915):0.013070):0.123724,12:0.149557):0.028839):0.003483,3:0.007190,1:0.016949);
+   tree rep.800 = (((10:0.095486,4:0.003046):0.001620,(((((((2:0.086296,(54:0.206909,29:0.046449):0.053397):0.025375,((6:0.089047,(8:0.044154,18:0.128126):0.075578):0.018450,50:0.104237):0.001729):0.053655,34:0.111170):0.039514,38:0.011793):0.027790,51:0.040738):0.063963,((((32:0.121633,36:0.009831):0.107936,(39:0.099656,13:0.335504):0.007758):0.055974,((((47:0.046981,(42:0.008165,((17:0.151875,43:0.004931):0.028254,53:0.041322):0.018579):0.064401):0.027425,45:0.075355):0.031943,((((20:0.069017,21:0.001768):0.089403,(52:0.027299,31:0.024214):0.081996):0.018165,((((33:0.086746,((5:0.104794,7:0.107996):0.044540,(49:0.069144,48:0.010749):0.029499):0.154989):0.010414,(35:0.057180,37:0.085725):0.060494):0.011980,(40:0.074830,(22:0.003777,27:0.007992):0.025837):0.286798):0.013043,((23:0.022780,(24:0.001597,(19:0.016600,(26:0.033856,25:0.086776):0.001843):0.023453):0.102246):0.133695,30:0.145755):0.056725):0.005719):0.060348,(9:0.137424,(14:0.052534,16:0.002594):0.077089):0.080195):0.023779):0.009923,((11:0.053767,15:0.007291):0.341002,41:0.102068):0.015241):0.016285):0.001986,(46:0.104690,(28:0.117042,44:0.300154):0.090580):0.034915):0.013070):0.123724,12:0.161895):0.118611):0.012373,3:0.007190,1:0.008078);
+   tree rep.900 = ((((((51:0.040738,38:0.039412):0.030687,((50:0.104237,(2:0.086296,((6:0.089047,(8:0.044154,18:0.128126):0.075578):0.021833,(54:0.206909,29:0.046449):0.046705):0.008497):0.028494):0.081836,34:0.111170):0.007082):0.033001,((((32:0.118685,36:0.082241):0.038003,(39:0.099656,13:0.335504):0.007758):0.056416,((((47:0.046981,(42:0.007482,((17:0.151875,43:0.004931):0.028254,53:0.045698):0.018579):0.020312):0.016615,45:0.075355):0.030191,((((((20:0.069017,21:0.021536):0.068942,(52:0.027299,31:0.024214):0.081996):0.022691,(30:0.154294,(23:0.022780,(24:0.001597,(19:0.016600,(26:0.033856,25:0.086776):0.001843):0.023453):0.102246):0.133695):0.034372):0.022434,(40:0.043656,(27:0.016136,22:0.003553):0.018409):0.041255):0.006726,((33:0.051346,(35:0.057180,37:0.085725):0.060494):0.024317,((5:0.104794,7:0.105982):0.044540,(49:0.069144,48:0.010749):0.086428):0.060317):0.012113):0.068974,(9:0.137424,(14:0.052534,16:0.002594):0.077089):0.080195):0.023779):0.009923,((11:0.053767,15:0.007291):0.341002,41:0.102068):0.008310):0.015474):0.001986,(46:0.104690,(28:0.117042,44:0.300154):0.084854):0.038572):0.006848):0.204873,(10:0.027456,12:0.056298):0.052795):0.211349,4:0.003046):0.013823,3:0.007190,1:0.008078);
+   tree rep.1000 = ((((((51:0.040738,38:0.039412):0.030687,((50:0.104237,(2:0.086296,(((54:0.206909,29:0.046449):0.096613,(8:0.044154,18:0.128126):0.053092):0.028063,6:0.089047):0.000197):0.028494):0.081836,34:0.111170):0.007082):0.033001,((((32:0.118685,36:0.082241):0.038003,(39:0.099656,13:0.335504):0.007758):0.056416,((11:0.053767,15:0.007291):0.341002,((((47:0.025066,(((53:0.008946,42:0.007482):0.033860,43:0.031909):0.000497,17:0.151875):0.035837):0.016615,45:0.071141):0.026863,41:0.049708):0.006356,(((((20:0.071823,21:0.021536):0.013429,(52:0.027299,31:0.024214):0.081996):0.100193,(40:0.043656,(27:0.016136,(22:0.002140,(30:0.154294,(23:0.022780,(24:0.001467,(19:0.005589,(26:0.033856,25:0.085683):0.001772):0.012551):0.118877):0.089879):0.034372):0.003064):0.010310):0.054562):0.006438,((33:0.051346,(35:0.055085,37:0.085725):0.026366):0.047866,((5:0.087195,7:0.105982):0.058704,(49:0.069144,48:0.010749):0.086428):0.058929):0.010462):0.049444,(9:0.137424,(14:0.052534,16:0.003555):0.079087):0.083174):0.037953):0.032490):0.009204):0.006528,(46:0.073950,(28:0.117042,44:0.300154):0.084854):0.038033):0.044531):0.163318,(10:0.009874,12:0.056298):0.052795):0.068925,4:0.003046):0.013823,3:0.007190,1:0.008078);
+   tree rep.1100 = ((((((50:0.107844,(2:0.086296,(((54:0.206909,29:0.046449):0.096613,(8:0.044154,18:0.128126):0.053092):0.028063,6:0.089047):0.000197):0.028494):0.044237,((51:0.040738,38:0.036842):0.035562,34:0.048777):0.067401):0.031800,((((32:0.100759,36:0.082241):0.038003,(39:0.099656,13:0.335504):0.007151):0.043657,((11:0.053767,15:0.007291):0.341002,((((47:0.025835,(((53:0.008946,42:0.007482):0.034313,43:0.031909):0.017045,17:0.151875):0.019774):0.012526,45:0.084441):0.022574,41:0.015479):0.063603,(((33:0.051346,((((5:0.087195,7:0.105982):0.058704,(49:0.069144,48:0.010749):0.086428):0.092891,(35:0.055085,37:0.085725):0.020590):0.014053,((20:0.071823,21:0.021536):0.012302,(52:0.027299,31:0.024214):0.081996):0.008775):0.057688):0.066101,(40:0.043656,((30:0.154294,(23:0.066909,(24:0.001467,(19:0.005343,(26:0.033856,25:0.059398):0.001772):0.024194):0.118877):0.045948):0.005920,(22:0.005639,27:0.009875):0.006250):0.003561):0.054562):0.009186,(9:0.137424,(14:0.052534,16:0.003555):0.079087):0.077207):0.037953):0.000861):0.009204):0.006528,(46:0.073950,(28:0.117042,44:0.300154):0.084854):0.038033):0.044531):0.163318,(10:0.009874,12:0.056298):0.052795):0.068925,4:0.003046):0.013823,3:0.007190,1:0.008078);
+   tree rep.1200 = ((((((50:0.107844,(2:0.086296,(((54:0.206909,29:0.046449):0.096613,(8:0.044154,18:0.128126):0.051987):0.028063,6:0.102805):0.011867):0.028494):0.033927,((((32:0.131601,36:0.082241):0.038003,(39:0.099656,13:0.335504):0.007547):0.007449,((((47:0.059698,45:0.049145):0.013379,(((53:0.008435,42:0.006022):0.041278,43:0.028299):0.021851,17:0.151875):0.000340):0.015594,41:0.015479):0.063603,(((33:0.041632,((20:0.071823,21:0.019709):0.034885,((((5:0.087534,7:0.105982):0.111835,(49:0.069144,48:0.010749):0.086428):0.040349,(35:0.055085,37:0.082988):0.029187):0.013419,(52:0.027299,31:0.067965):0.004678):0.029692):0.029170):0.053192,(40:0.043656,((23:0.066909,(30:0.190145,(24:0.001467,(19:0.005343,(26:0.033856,25:0.059398):0.001772):0.024194):0.076242):0.036640):0.005920,(22:0.005639,27:0.009875):0.006250):0.003561):0.054562):0.018943,((11:0.053767,15:0.007291):0.392912,(9:0.137424,(14:0.052534,16:0.003555):0.079087):0.017883):0.066686):0.015733):0.007235):0.009433,(46:0.073950,(28:0.172143,44:0.300154):0.016105):0.035520):0.036868):0.012822,((51:0.040738,38:0.036244):0.035562,34:0.037424):0.045547):0.206198,(10:0.009874,12:0.056298):0.052795):0.068925,4:0.003046):0.013823,3:0.007190,1:0.008078);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/alnfile.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/alnfile.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/alnfile.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+>BS1-fragment 7fab light chain variable region
+TISCTGSSSNIGAGNHVKWYQQLPG
+>BS2-fragment 2fb4 light chain variable region
+VTISCTGTSSNIGSITVNWYQQLPG
+>BS3-fragment 2fb4 heavy chain variable region
+LRLSCSSSGFIFSSYAMYWVRQAPG
+>BS4-fragment 7fab heavy chain variable region 
+LSLTCTVSGTSFDDYYSTWVRQPPG
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/amino.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/amino.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/amino.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+>CYS1_DICDI fragment
+SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE
+
+>ALEU_HORVU
+MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
+SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
+AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
+GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
+QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
+IATCASYPVVAA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ar.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ar.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ar.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,126 @@
+ID   HSANDREC   standard; RNA; HUM; 3569 BP.
+XX
+AC   M20132; J03180;
+XX
+SV   M20132.1
+XX
+DT   23-NOV-1989 (Rel. 21, Created)
+DT   02-JUL-1999 (Rel. 60, Last updated, Version 4)
+XX
+DE   Human androgen receptor (AR) mRNA, complete cds.
+XX
+KW   androgen receptor.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [1]
+RP   1-3569
+RX   MEDLINE; 89112208.
+RA   Lubahn D.B., Joseph D.R., Sar M., Tan J., Higgs H.N., Larson R.E.,
+RA   French F.S., Wilson E.M.;
+RT   "The human androgen receptor: complementary deoxyribonucleic acid cloning,
+RT   sequence analysis and gene expression in prostate";
+RL   Mol. Endocrinol. 2(12):1265-1275(1988).
+XX
+DR   GDB; 120556; AR.
+DR   SWISS-PROT; P10275; ANDR_HUMAN.
+DR   TRANSFAC; T00040; T00040.
+XX
+CC   Draft entry and computer readable sequence [1] kindly submitted by
+CC   E.M.Wilson, 18-AUG-1988.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..3569
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT                   /map="Xq11.2-q12"
+FT   gene            363..3122
+FT                   /gene="AR"
+FT   CDS             363..3122
+FT                   /codon_start=1
+FT                   /db_xref="SWISS-PROT:P10275"
+FT                   /note="androgen receptor"
+FT                   /gene="AR"
+FT                   /protein_id="AAA51729.1"
+FT                   /translation="MEVQLGLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAA
+FT                   SAAPPGASLLLLQQQQQQQQQQQQQQQQQQQQQETSPRQQQQQQGEDGSPQAHRRGPTG
+FT                   YLVLDEEQQPSQPQSALECHPERGCVPEPGAAVAASKGLPQQLPAPPDEDDSAAPSTLS
+FT                   LLGPTFPGLSSCSADLKDILSEASTMQLLQQQQQEAVSEGSSSGRAREASGAPTSSKDN
+FT                   YLGGTSTISDNAKELCKAVSVSMGLGVEALEHLSPGEQLRGDCMYAPLLGVPPAVRPTP
+FT                   CAPLAECKGSLLDDSAGKSTEDTAEYSPFKGGYTKGLEGESLGCSGSAAAGSSGTLELP
+FT                   STLSLYKSGALDEAAAYQSRDYYNFPLALAGPPPPPPPPHPHARIKLENPLDYGSAWAA
+FT                   AAAQCRYGDLASLHGAGAAGPGSGSPSAAASSSWHTLFTAEEGQLYGPCGGGGGGGGGG
+FT                   GGGGGGGGGGGGGGEAGAVAPYGYTRPPQGLAGQESDFTAPDVWYPGGMVSRVPYPSPT
+FT                   CVKSEMGPWMDSYSGPYGDMRLETARDHVLPIDYYFPPQKTCLICGDEASGCHYGALTC
+FT                   GSCKVFFKRAAEGKQKYLCASRNDCTIDKFRRKNCPSCRLRKCYEAGMTLGARKLKKLG
+FT                   NLKLQEEGEASSTTSPTEETTQKLTVSHIEGYECQPIFLNVLEAIEPGVVCAGHDNNQP
+FT                   DSFAALLSSLNELGERQLVHVVKWAKALPGFRNLHVDDQMAVIQYSWMGLMVFAMGWRS
+FT                   FTNVNSRMLYFAPDLVFNEYRMHKSRMYSQCVRMRHLSQEFGWLQITPQEFLCMKALLL
+FT                   FSIIPVDGLKNQKFFDELRMNYIKELDRIIACKRKNPTSCSRRFYQLTKLLDSVQPIAR
+FT                   ELHQFTFDLLIKSHMVSVDFPEMMAEIISVQVPKILSGKVKPIYFHTQ"
+XX
+SQ   Sequence 3569 BP; 796 A; 1009 C; 974 G; 790 T; 0 other;
+     taataactca gttcttattt gcacctactt cagtggacac tgaatttgga aggtggagga        60
+     ttttgttttt ttcttttaag atctgggcat cttttgaatc tacccttcaa gtattaagag       120
+     acagactgtg agcctagcag ggcagatctt gtccaccgtg tgtcttcttc tgcacgagac       180
+     tttgaggctg tcagagcgct ttttgcgtgg ttgctcccgc aagtttcctt ctctggagct       240
+     tcccgcaggt gggcagctag ctgcagcgac taccgcatca tcacagcctg ttgaactctt       300
+     ctgagcaaga gaaggggagg cggggtaagg gaagtaggtg gaagattcag ccaagctcaa       360
+     ggatggaagt gcagttaggg ctgggaaggg tctaccctcg gccgccgtcc aagacctacc       420
+     gaggagcttt ccagaatctg ttccagagcg tgcgcgaagt gatccagaac ccgggcccca       480
+     ggcacccaga ggccgcgagc gcagcacctc ccggcgccag tttgctgctg ctgcagcagc       540
+     agcagcagca gcagcagcag cagcagcagc agcagcagca gcagcagcag cagcaagaga       600
+     ctagccccag gcagcagcag cagcagcagg gtgaggatgg ttctccccaa gcccatcgta       660
+     gaggccccac aggctacctg gtcctggatg aggaacagca accttcacag ccgcagtcgg       720
+     ccctggagtg ccaccccgag agaggttgcg tcccagagcc tggagccgcc gtggccgcca       780
+     gcaaggggct gccgcagcag ctgccagcac ctccggacga ggatgactca gctgccccat       840
+     ccacgttgtc cctgctgggc cccactttcc ccggcttaag cagctgctcc gctgacctta       900
+     aagacatcct gagcgaggcc agcaccatgc aactccttca gcaacagcag caggaagcag       960
+     tatccgaagg cagcagcagc gggagagcga gggaggcctc gggggctccc acttcctcca      1020
+     aggacaatta cttagggggc acttcgacca tttctgacaa cgccaaggag ttgtgtaagg      1080
+     cagtgtcggt gtccatgggc ctgggtgtgg aggcgttgga gcatctgagt ccaggggaac      1140
+     agcttcgggg ggattgcatg tacgccccac ttttgggagt tccacccgct gtgcgtccca      1200
+     ctccttgtgc cccattggcc gaatgcaaag gttctctgct agacgacagc gcaggcaaga      1260
+     gcactgaaga tactgctgag tattcccctt tcaagggagg ttacaccaaa gggctagaag      1320
+     gcgagagcct aggctgctct ggcagcgctg cagcagggag ctccgggaca cttgaactgc      1380
+     cgtctaccct gtctctctac aagtccggag cactggacga ggcagctgcg taccagagtc      1440
+     gcgactacta caactttcca ctggctctgg ccggaccgcc gccccctccg ccgcctcccc      1500
+     atccccacgc tcgcatcaag ctggagaacc cgctggacta cggcagcgcc tgggcggctg      1560
+     cggcggcgca gtgccgctat ggggacctgg cgagcctgca tggcgcgggt gcagcgggac      1620
+     ccggttctgg gtcaccctca gccgccgctt cctcatcctg gcacactctc ttcacagccg      1680
+     aagaaggcca gttgtatgga ccgtgtggtg gtggtggggg tggtggcggc ggcggcggcg      1740
+     gcggcggcgg cggcggcggc ggcggcggcg gcggcggcga ggcgggagct gtagccccct      1800
+     acggctacac tcggccccct caggggctgg cgggccagga aagcgacttc accgcacctg      1860
+     atgtgtggta ccctggcggc atggtgagca gagtgcccta tcccagtccc acttgtgtca      1920
+     aaagcgaaat gggcccctgg atggatagct actccggacc ttacggggac atgcgtttgg      1980
+     agactgccag ggaccatgtt ttgcccattg actattactt tccaccccag aagacctgcc      2040
+     tgatctgtgg agatgaagct tctgggtgtc actatggagc tctcacatgt ggaagctgca      2100
+     aggtcttctt caaaagagcc gctgaaggga aacagaagta cctgtgcgcc agcagaaatg      2160
+     attgcactat tgataaattc cgaaggaaaa attgtccatc ttgtcgtctt cggaaatgtt      2220
+     atgaagcagg gatgactctg ggagcccgga agctgaagaa acttggtaat ctgaaactac      2280
+     aggaggaagg agaggcttcc agcaccacca gccccactga ggagacaacc cagaagctga      2340
+     cagtgtcaca cattgaaggc tatgaatgtc agcccatctt tctgaatgtc ctggaagcca      2400
+     ttgagccagg tgtagtgtgt gctggacacg acaacaacca gcccgactcc tttgcagcct      2460
+     tgctctctag cctcaatgaa ctgggagaga gacagcttgt acacgtggtc aagtgggcca      2520
+     aggccttgcc tggcttccgc aacttacacg tggacgacca gatggctgtc attcagtact      2580
+     cctggatggg gctcatggtg tttgccatgg gctggcgatc cttcaccaat gtcaactcca      2640
+     ggatgctcta cttcgcccct gatctggttt tcaatgagta ccgcatgcac aagtcccgga      2700
+     tgtacagcca gtgtgtccga atgaggcacc tctctcaaga gtttggatgg ctccaaatca      2760
+     ccccccagga attcctgtgc atgaaagcac tgctactctt cagcattatt ccagtggatg      2820
+     ggctgaaaaa tcaaaaattc tttgatgaac ttcgaatgaa ctacatcaag gaactcgatc      2880
+     gtatcattgc atgcaaaaga aaaaatccca catcctgctc aagacgcttc taccagctca      2940
+     ccaagctcct ggactccgtg cagcctattg cgagagagct gcatcagttc acttttgacc      3000
+     tgctaatcaa gtcacacatg gtgagcgtgg actttccgga aatgatggca gagatcatct      3060
+     ctgtgcaagt gcccaagatc ctttctggga aagtcaagcc catctatttc cacacccagt      3120
+     gaagcattgg aaaccctatt tccccacccc agctcatgcc ccctttcaga tgtcttctgc      3180
+     ctgttataac tctgcactac tcctctgcag tgccttgggg aatttcctct attgatgtac      3240
+     agtctgtcat gaacatgttc ctgaattcta tttgctgggc tttttttttc tctttctctc      3300
+     ctttcttttt cttcttccct ccctatctaa ccctcccatg gcaccttcag actttgcttc      3360
+     ccattgtggc tcctatctgt gttttgaatg gtgttgtatg cctttaaatc tgtgatgatc      3420
+     ctcatatggc ccagtgtcaa gttgtgcttg tttacagcac tactctgtgc cagccacaca      3480
+     aacgtttact tatcttatgc cacgggaagt ttagagagct aagattatct ggggaaatca      3540
+     aaacaaaaaa caagcaaaca aaaaaaaaa                                        3569
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/atp1.matrix
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/atp1.matrix	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/atp1.matrix	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,516 @@
+
+Last position-specific scoring matrix computed, weighted observed percentages rounded down, information per position, and relative weight of gapless real matches to pseudocounts
+           A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V   A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V
+    1 M   -2 -2 -3 -4 -2 -1 -3 -4 -2  0  1 -2  8 -1 -3 -2 -1 -2 -2  0    0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0   0   0   0   0  1.01 0.28
+    2 E    1 -1 -1 -1 -3  0  4 -2 -2 -3 -3  0 -3 -4 -2  3  1 -4 -3 -2   10   0   0   0   0   0  41   0   0   0   0   5   0   0   0  33   8   0   0   3  0.55 0.40
+    3 L   -2 -3 -2 -3 -3 -2 -3 -4 -3  0  1 -3  8  0 -4 -3  0 -3 -3  1    1   0   2   1   0   0   1   0   0   3   8   0  66   2   0   0   7   0   0   9  1.03 0.64
+    4 S    0  0  3  1 -4  3  2 -2 -2 -4 -4  2 -3 -5 -3  2  1 -5 -4 -4    9   3  13   7   0  15  12   1   0   0   0  13   0   0   0  16  10   0   0   0  0.43 0.68
+    5 P   -2 -4 -5 -3 -3 -4 -4 -5 -5  6  3 -3  1  0 -1 -3 -2 -4 -3  1    2   0   0   2   0   0   0   0   0  59  25   1   2   3   3   1   1   0   0   2  0.82 0.68
+    6 R   -2  4  5 -1 -4  1 -2 -3 -2 -4 -4  2 -3 -5 -3  1 -1 -5 -4 -4    0  27  36   2   0   4   0   0   0   1   0  15   0   0   1  12   2   0   0   0  0.78 0.70
+    7 A    2 -4 -2 -3 -3 -3 -3 -1 -2 -2 -2 -3 -3 -4  6  1  0 -5 -4 -1   22   0   1   0   0   0   0   4   1   2   4   0   0   0  45  11   7   0   0   3  0.95 0.72
+    8 A    2 -3  0  4 -3  0  2 -2 -3 -2 -4 -1 -3 -4 -3  3  1 -5 -4 -3   16   0   4  22   0   3  15   1   0   3   0   2   0   0   0  25   8   0   0   1  0.49 0.72
+    9 E   -3 -2 -1  1 -5  1  7 -4 -2 -3 -4 -1 -4 -5 -3 -2 -3 -5 -4 -4    0   0   1   3   0   2  90   0   0   1   1   1   0   0   0   1   0   0   0   0  1.41 0.72
+   10 L   -3 -5 -5 -5 -3 -4 -5 -5 -5  6  1 -4  0 -1 -5 -4 -2 -4 -1  3    1   0   0   0   0   1   0   0   0  68   8   0   0   1   0   0   0   0   2  19  1.06 0.72
+   11 T    0  0 -1 -2 -1 -1 -2 -1 -2 -3 -4 -2 -3 -3 -2  5  2 -5 -3 -2    4   4   0   0   1   2   0   2   1   1   1   0   0   0   1  69  15   0   0   1  0.84 0.72
+   12 N    2 -1  0  1 -4  1  2 -2 -2 -4 -4  3 -3 -4 -3  3  0 -5 -3 -3   15   1   4   8   0   4  13   2   0   0   1  19   0   0   0  30   3   0   1   0  0.46 0.73
+   13 L   -1 -4 -5 -5 -3 -4 -5 -5 -5  6  3 -4  0 -2 -4 -3 -2 -4 -3  2    6   1   0   0   0   0   0   0   0  55  26   0   1   0   0   1   0   0   0   9  0.87 0.72
+   14 F   -3 -4 -4 -5 -3 -4 -5 -6 -5  5  4 -4  0 -1 -5 -4 -2 -4 -3  0    0   0   1   0   0   0   0   0   0  50  46   0   0   1   0   0   1   0   0   0  1.01 0.74
+   15 E   -2  3 -1 -2 -5  1  3 -4  0 -5 -4  6 -3 -5 -3 -2 -3 -5 -4 -4    1  17   1   1   0   4  19   0   2   0   0  55   0   0   0   1   0   0   0   0  0.96 0.74
+   16 S   -1 -1  1  1 -5  5  4 -3 -2 -5 -4  2 -3 -5 -3  1 -1 -5 -3 -4    5   1   7   8   0  27  28   1   0   0   0  12   0   0   0   8   2   0   1   0  0.67 0.75
+   17 R   -2  4 -2 -2 -5  6  1 -3 -2 -4 -3  2 -3 -2 -3 -2 -2 -4 -1 -2    2  26   1   0   0  44   6   1   0   0   1  11   0   1   0   0   2   0   2   4  0.84 0.76
+   18 I   -3 -2 -5 -5 -3 -4 -5 -6 -5  7  1 -4  1 -1 -5 -4 -1 -4 -3  2    0   2   0   0   0   0   0   0   0  74   8   0   3   1   0   0   3   0   0   9  1.10 0.78
+   19 R    1  0  1  2 -4  1  3 -2 -2 -4 -1  3 -3 -5 -3  0 -1 -5 -4 -4   12   4   6  10   0   4  24   1   0   0   6  23   0   0   0   7   3   0   0   0  0.44 0.78
+   20 N   -2 -1  4  0 -4  2  0  2 -2 -5 -5  3 -4 -5 -3  2 -2 -5 -4 -4    1   2  28   4   0  10   5  15   0   0   0  20   0   0   0  14   1   0   0   0  0.61 0.79
+   21 F   -1 -4 -4 -5 -4 -2 -4 -5 -1 -1  0 -2 -2  5 -5 -2 -1  0  6  0    4   0   0   0   0   1   0   0   1   3  10   2   0  30   0   2   4   1  36   5  0.91 0.78
+   22 Y   -2 -2  2  3 -4  1  3  2 -2 -4 -2  0 -3 -4 -1 -1  0 -4  0 -3    2   1  10  21   0   7  18  14   0   0   4   6   0   0   3   4   7   0   4   1  0.39 0.78
+   23 A    0 -1  0  1 -3  1  0 -1 -3 -1 -1  1 -2 -1  1  1  2 -4 -3  0    7   1   4   7   0   8   6   3   0   4   7   9   0   2   8   9  16   0   0   6  0.15 0.78
+   24 N   -1 -1  1  1 -4  1  2  0  2 -3 -4  3 -3 -3 -2  1  1 -5 -3 -2    3   1   7   7   0   6  17   8   5   1   1  21   0   1   1  12   8   0   0   2  0.36 0.78
+   25 F    3 -4 -2 -4 -1 -3 -2 -3 -4  1  1 -2  0  0 -1  1  0 -4 -3  3   25   0   1   0   1   0   2   0   0   9  13   2   1   5   3  11   6   0   0  22  0.32 0.78
+   26 Q   -2 -1  2  3 -5  2  4 -2 -2 -3 -3  1 -3 -3 -2  1  0 -5 -4 -2    1   2   8  16   0  10  30   2   0   1   1   8   0   1   1   9   6   0   0   2  0.49 0.77
+   27 V    0  0 -2 -3 -3  0 -2 -4 -4  1  1 -2  2 -1  1 -1  0 -4 -2  4    8   5   2   1   0   4   2   0   0   7  14   1   4   2   7   5   4   0   1  35  0.32 0.77
+   28 D    0  1  0  1 -4  1  3 -2  0 -3 -3  0 -2 -1 -3  2  1 -4 -2  0    6   7   3   9   0   5  19   2   2   1   0   6   0   3   0  21   8   0   1   8  0.28 0.77
+   29 E   -2 -2  4  2 -5  0  5 -3 -1 -5 -3 -1 -4 -5 -3 -1  0 -5 -4 -4    1   0  23   7   0   1  56   0   1   0   2   1   0   0   0   3   5   0   0   0  0.93 0.76
+   30 I   -2 -4 -2 -3 -3 -1  1 -5 -1  3 -1 -3 -1 -3 -4 -1  2 -4 -1  4    0   0   2   0   0   2  11   0   2  18   4   1   0   0   0   2  15   0   2  40  0.52 0.77
+   31 G   -2 -4 -2 -3 -4 -4 -4  7 -4 -6 -6 -3 -5 -5 -4 -2 -3 -4 -5 -5    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.92 0.77
+   32 R   -2  3 -1 -3 -4  2  0 -4 -2 -1 -3  2 -3 -3 -3 -1  4 -4  0 -1    0  21   1   0   0   9   5   0   0   4   0  12   0   1   0   2  38   0   3   3  0.59 0.77
+   33 V   -2 -5 -5 -5 -3 -4 -5 -5 -5  4 -1 -4 -1 -3 -4 -4 -2 -5 -3  6    0   0   0   0   0   0   0   0   0  19   0   0   0   0   0   0   0   0   0  81  1.24 0.77
+   34 V   -2 -4 -3 -4 -2 -3  1 -5 -4  3  4 -1  0 -1 -4 -3  0 -4 -3  2    2   0   1   0   0   0  10   0   0  15  44   3   1   1   0   1   5   0   0  17  0.54 0.78
+   35 S   -1 -1 -1 -1 -4  3  1 -2 -1 -4 -4  1 -3 -1 -3  5  1 -4  0 -3    2   2   2   2   0  16   7   1   1   0   0   7   0   3   0  49   6   0   4   0  0.60 0.79
+   36 V   -1 -5 -5 -5 -3 -4 -5 -5 -5  4  0 -4 -1 -2 -5 -3 -2 -5 -2  6    3   0   0   0   0   0   0   0   0  29   4   0   0   1   0   1   1   0   1  60  1.04 0.85
+   37 G    1 -3 -2 -3 -2  0 -2  6 -3 -5 -5  0 -3 -5 -4  2 -1 -4 -2 -4    9   0   1   0   1   4   1  57   0   0   0   6   0   0   0  16   4   0   2   1  0.90 0.86
+   38 D   -4 -2 -1  8 -6 -2  0 -3 -2 -5 -6 -3 -5 -6 -4 -2 -1 -6 -5 -5    0   1   0  96   0   0   0   0   0   0   0   0   0   0   0   0   2   0   0   0  1.91 0.86
+   39 G   -2 -4 -1 -2 -5 -4 -4  7 -4 -6 -6 -4 -5 -5 -4 -1 -3 -5 -5 -5    0   0   2   1   0   0   0  95   0   0   0   0   0   0   0   2   0   0   0   0  1.84 0.86
+   40 I   -3 -5 -5 -5 -3 -5 -5 -6 -5  7  0 -5 -1 -2 -5 -4 -2 -5 -3  4    0   0   0   0   0   0   0   0   0  75   1   0   0   0   0   0   1   0   0  23  1.34 0.87
+   41 A    6 -4 -4 -4  0 -3 -3 -2 -4  2 -2 -3 -2 -4 -3 -1 -2 -5 -4  1   72   0   0   0   2   0   0   0   0  14   0   0   0   0   0   0   1   0   0  11  0.96 0.87
+   42 Q   -3  7 -1 -4 -5  1 -1 -4  0 -2 -2  1 -3  2 -4 -3 -3 -4 -2 -4    0  65   2   0   0   5   2   0   2   3   4   6   0   9   0   0   0   0   1   0  1.08 0.88
+   43 V    0 -5 -5 -5  1 -4 -5 -5 -5  4 -1 -4 -1 -3 -4 -2 -2 -5 -3  5    9   0   0   0   3   0   0   0   0  30   1   0   0   0   0   3   0   0   0  54  0.96 0.88
+   44 Y   -3 -2 -2 -3 -3 -3 -1 -3  5 -3 -3 -3 -3  1 -4  2  0 -1  7 -4    0   1   1   0   0   0   3   1  17   0   1   0   0   4   0  16   5   0  48   0  1.08 0.88
+   45 G   -2 -2 -2 -3 -3 -4 -4  7 -4 -6 -6 -3 -5 -5 -4 -2 -4 -5 -5 -5    0   2   0   0   1   0   0  97   0   0   0   0   0   0   0   0   0   0   0   0  1.91 0.88
+   46 L   -3 -4 -5 -6 -3 -4 -5 -6 -5  1  6 -5  2 -2 -5 -4 -3 -4 -3 -1    0   0   0   0   0   0   0   0   0   3  92   0   3   0   0   1   0   0   0   1  1.33 0.88
+   47 N    0 -1  2  3 -5 -1  3  0 -3 -5 -4  2 -4 -5  1  1 -1 -5 -4 -4    6   3  12  20   0   1  20   6   0   0   2  12   0   0   8   9   3   0   0   0  0.48 0.88
+   48 E   -3 -2  6  2 -5  1  2  0 -1 -5 -5  1 -4 -5 -4 -1 -2 -6 -4 -5    1   1  51  12   0   6  11   5   1   0   0   8   0   0   0   3   1   0   0   0  0.94 0.88
+   49 I    2 -4 -5 -4  2 -4 -4 -4 -5  3 -1 -4 -1 -3 -4 -3 -2 -5 -3  5   22   0   0   1   4   0   0   0   0  20   1   0   0   0   0   0   0   0   0  52  0.88 0.88
+   50 Q    0  1 -3 -4 -4  5 -1 -3 -3 -2 -1  1  7 -1 -4 -2 -3 -4 -3 -2    7   5   0   0   0  32   1   2   0   1   3   8  38   2   0   0   0   0   0   1  0.86 0.88
+   51 A    4 -3 -2 -4 -3  1 -3 -3 -3 -1  0 -3 -1  0 -3  1 -2 -3  2  0   48   0   2   0   0   6   0   0   0   3  12   0   1   4   0  11   1   0   8   4  0.51 0.88
+   52 G   -2 -4  1  0 -5 -2  1  6  2 -6 -5 -3 -2 -5 -4  0 -3 -3 -2 -5    0   0   6   3   0   1  11  63   6   0   0   0   1   0   0   8   0   0   1   0  1.05 0.87
+   53 E   -3 -2 -2  0 -6  2  7 -4 -2 -5 -5 -1 -4 -5 -3 -2 -3 -5 -4 -5    0   0   0   0   0   6  94   0   0   0   0   0   0   0   0   0   0   0   0   0  1.68 0.87
+   54 M   -1 -4 -5 -5 -3 -3 -4 -5 -4  0  4 -1  7 -1 -5 -4 -3 -4 -3  1    5   0   0   0   0   0   0   0   0   2  40   4  41   1   0   0   0   0   0   6  1.03 0.88
+   55 V   -3 -5 -5 -5 -1 -4 -5 -6 -5  3  3 -5 -1 -2 -5 -4 -2 -5 -3  6    0   0   0   0   1   0   0   0   0  14  24   0   0   0   0   0   0   0   0  61  1.05 0.88
+   56 L   -1  1 -2 -1 -3  0  6 -4 -2 -3 -1  1 -3 -3 -3 -1 -1 -5 -4 -4    3   6   0   1   1   2  67   0   0   1   6   6   0   1   0   3   3   0   0   0  0.97 0.90
+   57 F   -4 -5 -5 -6 -4 -5 -5 -5 -4  0  1 -5 -2  8 -6 -4 -4 -1  1 -2    0   0   0   0   0   0   0   0   0   6  10   0   0  84   0   1   0   0   0   0  1.74 0.90
+   58 A    1 -3 -1  3 -4 -1  3 -2 -1 -4 -5 -1 -4 -5  4  1 -2 -5 -4 -3   14   0   3  16   0   2  23   1   1   0   0   1   0   0  24  13   0   0   0   1  0.69 0.90
+   59 N   -2 -3  4  3 -4 -2  1  3  0 -5 -5 -1 -4 -5 -4  3 -1 -5 -3 -5    0   0  20  15   0   1   9  20   2   0   0   3   0   0   0  28   2   0   1   0  0.68 0.90
+   60 G   -2  0  3  1 -5 -3 -1  6 -3 -6 -6 -1 -5 -5 -4  0 -3 -5 -5 -5    0   6  13   7   0   0   3  62   0   0   0   2   0   0   0   5   0   0   0   0  1.09 0.90
+   61 V   -1 -2 -3 -2 -3 -3 -3 -4  2  3  1 -3 -1 -3 -4  1  3 -5 -3  3    3   3   0   2   0   0   1   0   5  19  13   1   0   0   0  10  17   0   0  26  0.44 0.90
+   62 K   -3  1 -3 -4 -5  0  0 -4 -2  0  0  4  0  3 -4 -2 -2 -3  4 -1    0   6   0   0   0   4   4   0   0   5   7  33   3  16   0   2   2   0  14   4  0.52 0.90
+   63 G    0 -4 -2 -3 -5 -4 -4  7 -4 -6 -6 -4 -5 -5 -4 -2 -4 -5 -5 -5    5   0   0   0   0   0   0  95   0   0   0   0   0   0   0   0   0   0   0   0  1.88 0.89
+   64 M   -3 -4 -5 -5 -3 -1 -4 -5 -4  4  2 -4  8  0 -5 -4 -3 -4 -2  1    0   0   0   0   0   3   0   0   0  26  16   0  45   2   0   0   0   0   1   8  1.08 0.91
+   65 A    6 -4 -4 -4  0 -3 -3 -3 -4  0 -3 -3 -2 -4 -3 -1 -1 -5 -4  3   70   0   0   0   1   0   0   0   0   5   0   0   0   0   0   1   2   0   0  20  0.96 0.91
+   66 L   -3 -4 -5 -5 -4  0 -4 -5 -2 -1  5 -4  4  2 -5 -3 -3 -3 -2  0    1   0   0   0   0   4   0   0   1   0  67   0  12   8   0   1   1   0   0   5  0.96 0.92
+   67 N   -3 -3  8  0 -5 -2  0 -1 -2 -5 -6 -2 -4 -5 -4 -1 -1 -6 -4 -5    0   0  88   1   0   0   4   2   0   0   0   0   0   0   0   2   2   0   0   0  1.71 0.92
+   68 L   -4 -4 -6 -6 -3 -4 -5 -6 -5  1  6 -5  0 -1 -3 -5 -3 -4 -3  0    0   0   0   0   0   0   0   0   0   4  92   0   0   1   2   0   0   0   0   2  1.32 0.92
+   69 E   -3 -1  1  3 -6  0  6 -3 -2 -5 -5 -2 -4 -5 -3 -2 -3 -5 -1 -5    0   2   7  16   0   1  72   1   0   0   0   0   0   0   0   0   0   0   3   0  1.30 0.93
+   70 N    0  1  2  0 -4  1  3 -4 -1 -3 -4  1 -2 -5  0  1 -1 -5 -2  0    7   9  10   4   0   7  23   0   1   1   0   8   1   0   5  12   2   0   1  10  0.35 0.93
+   71 E   -3 -2  1  7 -5 -2  3 -3 -2 -5 -6  0 -5 -6 -4  0 -1 -6 -5 -5    0   1   7  62   0   0  17   1   1   0   0   4   0   0   0   5   3   0   0   0  1.22 0.93
+   72 N   -2 -2  6 -1 -4  0  0 -1  0 -4 -3 -2 -4  0 -4  2  0 -5 -1 -2    2   0  49   2   0   5   5   3   2   1   1   0   0   5   0  16   5   0   2   2  0.78 0.93
+   73 V   -2 -5 -5 -5 -2 -4 -5 -5 -5  3 -1 -4  0 -3 -5 -4 -1 -5 -3  6    1   0   0   0   1   0   0   0   0  13   0   0   1   0   0   0   2   0   0  83  1.30 0.93
+   74 G   -2 -2 -1 -2 -5 -3 -4  7 -4 -6 -6 -3 -5 -5 -4 -1 -4 -5 -5 -5    1   3   2   1   0   1   0  91   0   0   0   0   0   0   0   3   0   0   0   0  1.75 0.95
+   75 I    3 -5 -5 -5  2 -4 -4 -1 -5  4 -1 -4 -2 -3 -4 -3 -2 -5 -4  4   26   0   0   0   5   0   0   5   0  28   1   0   0   0   0   0   1   0   0  35  0.75 0.95
+   76 V   -1 -5 -5 -5 -3 -4 -5 -2 -5  3  0 -4 -1 -3 -5 -3 -2 -5 -3  6    3   0   0   0   0   0   0   4   0  12   7   0   0   0   0   1   0   0   0  73  1.10 0.97
+   77 V   -3 -5 -5 -6 -3 -5 -5 -4 -5  4  4 -5  1 -2 -5 -4 -3 -4 -3  4    0   0   0   0   0   0   0   1   0  27  39   0   3   0   0   0   0   0   0  29  0.95 0.97
+   78 F   -3 -4 -4 -4 -3 -3 -4 -4 -3  1  4 -4  5  5 -4 -3 -3 -2  0 -1    1   1   0   1   0   0   1   1   0   7  37   1  17  29   1   1   1   0   1   1  0.87 0.89
+   79 G   -2 -3  2  1 -4 -3 -3  6 -3 -5 -5 -3 -4 -4 -3 -2 -3 -4 -4 -4    1   1   9  10   0   0   1  71   0   1   1   1   0   0   1   1   1   0   0   1  1.26 0.90
+   80 G   -2 -2  3  5 -4  0  2 -2 -2 -4 -4  0 -4 -3  3  1 -2 -4 -2 -4    2   1  12  32   0   3  11   2   0   1   1   5   0   1  13  11   1   0   2   1  0.62 0.90
+   81 D   -2 -3 -2  4 -4 -1  1  1 -2 -2 -4  0 -3  0 -2  0  0 -3  4 -3    3   0   0  27   0   3   8  12   0   3   1   6   0   4   2   6   5   0  18   1  0.46 0.89
+   82 T   -1  3  1  1 -3 -1  0 -3 -3 -2  0  1 -2 -2 -3  1  3 -4 -2  0    3  16   8   7   0   1   5   1   0   2  10   7   0   1   1  10  19   0   1   7  0.24 0.88
+   83 A    1 -1  1  2 -4  0  1  2  3 -3 -1  0 -1 -2 -3  0 -1 -4 -3 -3   13   4   7  11   0   3   8  15   9   1   8   4   1   2   1   6   4   0   0   1  0.22 0.87
+   84 I   -3 -4 -4 -4 -3 -4 -4 -4 -4  6  2 -4 -1 -2 -4 -3  0 -4 -3  3    1   1   0   1   0   0   1   1   0  55  14   1   0   0   1   1   6   0   0  17  0.89 0.84
+   85 K   -1  1 -1 -2 -4  2  0 -3  0 -2 -1  5 -2 -3 -3  1  0 -4 -1 -2    4   5   1   1   0  11   4   1   2   2   8  37   0   0   1  13   4   0   2   3  0.45 0.86
+   86 E    0 -2 -2 -1 -4  2  6 -3 -2 -4 -4 -1 -3 -4 -2  0 -2 -4 -3 -2    9   0   0   1   0   6  68   1   0   0   1   1   0   0   1   6   1   0   0   2  1.04 0.86
+   87 G   -2 -4 -1 -2 -4 -3 -3  7 -3 -5 -5 -3 -4 -4 -3 -2 -3 -4 -4 -4    1   0   3   1   0   0   1  86   0   0   1   1   0   0   1   2   1   0   0   1  1.58 0.86
+   88 D   -1 -3 -1  5 -4  2  0 -2 -2 -3 -3 -2  2 -3 -3  2  1 -4 -3 -2    5   0   1  38   0   9   4   2   1   0   1   1   7   1   1  14   9   0   0   3  0.57 0.86
+   89 L   -2  0 -1 -2 -3 -1  0 -3  0  1  1  2  0 -2 -1  1  2 -4 -2  0    1   4   3   1   0   3   7   1   2  10  17  13   1   1   3  10  16   0   1   6  0.19 0.86
+   90 V    0 -4 -4 -4  1 -3 -4 -4 -4  2 -1 -4 -1 -3 -4 -3 -2 -4 -3  6    7   0   0   1   3   0   1   1   0   9   1   1   0   0   1   1   1   0   0  74  1.01 0.82
+   91 K   -2  2  0 -2 -4 -1  0 -2 -1 -3 -3  5  0 -3 -3  1 -1 -2  1 -2    1  11   3   0   0   1   4   3   1   1   2  49   3   0   0   9   3   0   5   2  0.63 0.83
+   92 R    1  5 -2 -3  4 -1 -2  1 -3 -3 -3 -1 -3 -4 -1  0 -1 -4 -3 -1   12  44   0   1  10   1   1  12   0   0   1   1   0   0   2   5   4   0   0   5  0.66 0.81
+   93 T   -2 -2 -2 -3  1 -2 -3 -3 -3 -2 -2 -2  1 -3 -3  1  7 -4 -3 -2    1   2   0   1   3   0   1   1   0   0   3   1   5   0   0   6  75   0   0   1  1.07 0.81
+   94 G   -2 -2  1 -1 -4 -2 -1  6  0 -4 -4  0 -4 -2 -3 -1 -3 -3  2 -4    1   1   8   3   0   1   3  58   2   0   1   5   0   1   0   4   1   0   9   1  0.88 0.81
+   95 S    0  4  0 -1 -4  2  0 -3  1 -3 -4  4 -3 -3 -2  2 -1 -4 -3 -3    7  21   3   3   0   8   4   1   3   1   1  24   0   1   1  18   4   0   0   1  0.49 0.81
+   96 I   -3 -4 -2 -2 -3 -4 -4 -4 -4  6  2 -4 -1  1 -4 -3 -2 -4 -2  2    1   0   2   3   0   0   1   1   0  53  17   1   0   5   0   1   1   0   0  13  0.76 0.78
+   97 V    3 -3 -4 -4 -3 -3 -3 -3 -4  1  2 -3  2  0 -3 -2 -2 -3 -2  4   26   0   0   0   0   0   1   1   0   7  20   1   6   5   0   1   1   0   0  30  0.51 0.78
+   98 D   -1 -1 -1  4 -4  2  3 -2  0 -4 -4  1 -3 -3 -3  2 -2 -4 -1 -4    4   1   1  21   0  10  23   2   2   0   1   8   0   1   0  21   1   0   2   1  0.54 0.79
+   99 V   -1 -4 -4 -4 -3 -3 -4 -4 -4  4  1 -3 -1  0 -4 -3  1 -4 -3  5    2   0   0   0   0   0   1   1   0  20   9   1   0   3   0   1   9   0   0  51  0.74 0.77
+  100 P   -2 -2  0 -1 -4 -1 -2 -2 -3 -4 -2  0 -3 -3  7 -2 -3 -4  2 -3    1   1   5   3   0   3   1   2   0   0   6   5   0   0  61   1   1   0   8   1  1.33 0.78
+  101 A    1 -4 -4 -4 -2 -3 -4 -4 -4  1  2 -3 -1 -2 -3 -2 -1 -4 -3  5   10   0   0   0   0   0   1   1   0   2  18   1   0   0   0   2   2   0   0  60  0.76 0.76
+  102 G   -1 -4 -2 -2 -4 -3 -3  7 -3 -4 -4 -3 -4  0 -3 -1 -3 -4 -4 -4    2   0   0   2   0   0   1  81   0   0   1   0   0   5   0   4   0   0   0   1  1.37 0.77
+  103 K   -1  2  0  3 -4 -1  3 -3 -1 -3 -4  3 -2 -4  0 -2 -2 -4  0 -3    5  10   4  18   0   1  24   1   1   2   1  21   1   0   4   1   0   0   4   1  0.51 0.77
+  104 A    3 -3 -1 -1 -2  0  3  2 -1 -4 -3 -2 -3  0 -3 -1 -2 -3  2 -2   25   0   2   3   1   4  21  19   1   0   1   1   0   4   0   3   2   0   9   2  0.39 0.77
+  105 M    0 -3 -4 -4 -3 -3 -3 -4 -3  0  4 -3  5  0 -4  0 -1 -3  0  1    6   0   0   0   0   0   0   1   0   3  43   0  16   4   0   9   2   0   3  10  0.52 0.72
+  106 L   -1 -3 -4 -4 -1 -2 -3 -4 -4  2  5 -2  1 -1 -3  0 -2 -3 -3  0    5   0   0   0   1   1   1   1   0  13  58   2   2   1   1   9   1   0   0   3  0.59 0.71
+  107 G   -1 -2 -2 -3 -4 -3 -3  6 -3 -4 -4 -3 -4 -4 -3  1 -3 -4 -4 -4    1   2   0   0   0   0   0  78   0   0   1   0   0   0   0  10   0   0   0   0  1.27 0.71
+  108 R   -3  7 -2 -3  4 -1 -2 -4 -2 -4 -3  1 -3 -4 -3 -1 -2 -4 -3 -4    1  76   0   0   9   0   0   0   0   0   1   5   0   0   0   5   0   0   0   0  1.26 0.69
+  109 V   -2 -4 -4 -4 -3 -4 -4 -5 -4  3 -1 -4 -1  2 -4 -3 -1 -4 -2  6    0   0   0   0   0   0   0   0   0  13   1   0   0   8   0   0   2   0   0  72  0.98 0.72
+  110 V   -2 -4 -4 -4 -3 -4 -4 -4 -4  3  0 -4  3 -2 -4 -3 -1 -4 -3  6    2   0   0   0   0   0   0   0   0  19   3   0   9   0   0   0   2   0   0  62  0.90 0.72
+  111 D   -3 -3  5  6 -4 -2  1 -3 -2 -5 -5 -2 -4 -5 -3  0 -1 -5 -4 -4    0   0  34  44   0   0   7   0   0   0   0   0   0   0   0   6   2   0   0   0  1.02 0.79
+  112 A    3 -3 -3 -3 -3 -3 -3  1 -4 -4 -4 -3 -3 -4  6  1  1 -5 -4 -2   29   0   0   0   0   0   0   8   0   0   0   0   0   0  37   9   9   0   0   2  0.86 0.79
+  113 M   -3 -1 -4 -5 -3 -4 -4 -5 -4  1  5 -4  1 -2 -4 -3  1 -4 -3 -1    0   4   0   0   0   0   0   0   0   5  71   0   2   0   0   0  12   0   0   2  0.86 0.81
+  114 G    0 -4 -3 -3 -4 -4 -4  7 -4 -5 -5 -3  0 -5 -4 -1 -3 -5 -5 -2    7   0   0   0   0   0   0  83   0   0   0   0   3   0   0   4   0   0   0   3  1.50 0.87
+  115 V   -2  1  4 -1 -5  2  2  0  3 -3 -3  2 -2 -2 -4 -2 -1 -5 -2 -2    1   6  20   1   0  11  16   8   9   1   2  12   1   1   0   1   2   0   1   3  0.43 0.96
+  116 P   -1 -4 -4  1 -1 -1  0 -4 -4 -3  0 -3  3  1  6 -2 -1 -5 -4 -2    4   0   0   7   2   2   8   0   0   0   8   0   8   6  47   2   4   0   0   2  0.95 0.96
+  117 I   -3 -5 -5 -6 -3 -5 -5 -6 -5  6  3 -5  2  2 -5 -5 -3 -4 -3  2    0   0   0   0   0   0   0   0   0  53  26   0   5   7   0   0   0   0   0   9  1.04 0.96
+  118 D   -3 -3 -1  5 -4 -2 -1 -4 -2 -2  0 -3 -2 -2 -3 -2  2 -3  3  1    0   0   0  49   0   0   0   0   0   0  11   0   0   0   0   0  13   0  15  13  0.69 0.61
+  119 G    1  2  2  2 -4 -2  2  1 -3 -4  0 -2 -3 -4 -4  1 -2 -5 -4 -3   13  13  13  13   0   0  13  12   0   0  13   0   0   0   0  13   0   0   0   0  0.33 0.78
+  120 K   -2  1 -1 -1 -5  0  0 -2 -3 -4  0  6 -3 -4 -2 -1 -2 -5 -4 -3    0   6   2   2   0   1   3   2   0   0  10  68   0   0   1   3   0   0   0   2  0.88 0.77
+  121 G   -2 -1 -2 -2 -4 -3 -1  7 -4 -6 -5 -2 -4 -5 -2  0 -3 -5 -5 -4    0   3   0   1   0   0   3  83   0   0   0   1   0   0   2   6   0   0   0   0  1.42 0.77
+  122 A    2 -1  0  1 -4 -1  3 -3 -3 -4 -4 -2 -4 -4  5  0 -2 -5 -1 -3   23   3   3   9   0   1  19   0   0   0   1   1   0   0  30   6   1   0   2   1  0.68 0.77
+  123 L   -3 -4 -5 -5 -3 -4 -2 -6 -5  6  3 -4  0 -1 -3 -4 -3 -4 -3  2    0   0   0   0   0   0   3   0   0  51  31   0   0   1   1   0   0   0   0  12  0.90 0.76
+  124 S   -1 -2  2  2 -4  2  1 -1 -1 -3 -2  2 -3 -1 -1  1 -1 -5 -3 -1    4   0  12  12   0   9  10   5   1   1   4  17   0   2   3   9   4   0   0   6  0.22 0.76
+  125 D    2 -3  0  1  0 -2 -1  0 -1 -3 -4  0 -3 -2  0  1  3 -4 -2 -1   17   0   4  10   2   0   3   7   2   1   0   6   0   1   4  12  23   0   1   3  0.27 0.76
+  126 H   -1 -2  1  3 -4 -1  2 -1  2 -3 -3  2 -3 -4 -3  2  2 -5 -3 -2    2   0   5  16   0   2  14   5   5   1   1  15   0   0   0  16  13   0   0   4  0.33 0.76
+  127 E   -1 -1 -1  1 -3  1  4 -1  1 -2 -2  2 -1 -3 -2 -1  0 -4  1 -3    6   1   2   6   0   4  31   4   3   2   3  17   1   0   1   4   5   0   6   0  0.34 0.73
+  128 Q   -2  5 -1 -1 -4  0  0 -3 -1 -3 -2  1 -2  1 -4  1  2  0  1 -3    1  34   2   3   0   2   4   0   1   0   3   5   1   8   0  11  18   1   4   0  0.44 0.72
+  129 R    0  5 -2 -1 -4 -1 -1 -2 -3 -3  0  0  3 -3 -4  2 -1  0 -3 -2    6  38   0   3   0   1   1   2   0   0   8   5   8   1   0  16   3   2   0   3  0.45 0.72
+  130 R   -1  4 -3 -3 -4 -1 -1 -4 -2 -2 -1  0 -1 -3  6 -1 -3 -5 -4 -2    3  31   0   1   0   2   2   0   1   1   8   3   1   1  39   3   0   0   0   2  0.87 0.70
+  131 V   -1 -4 -5 -5 -1 -4 -4 -5 -5  4  1 -4  0 -2 -4 -4 -2 -4 -3  5    3   0   0   0   1   0   0   0   0  32  12   0   0   0   0   0   0   0   0  49  0.86 0.69
+  132 E   -3 -2 -2  2 -5  1  7 -2 -2 -5 -4 -1 -4 -1 -3 -2 -3 -4 -3 -4    0   0   0   7   0   1  83   3   0   0   0   0   0   3   0   0   0   0   0   0  1.24 0.69
+  133 V    0  3 -3 -4  1 -2 -2 -4 -3  1  0  1 -1 -3 -4  0  0 -4 -3  3    8  17   0   0   3   0   1   0   0   8   8  10   1   1   0   9   6   0   0  28  0.30 0.69
+  134 K   -2 -1 -2 -2 -4  1 -2 -2 -3  1 -2  5 -1 -4  3 -2 -2 -5 -3  1    1   0   1   2   0   5   0   2   0   9   1  49   2   0  15   1   0   0   0  12  0.61 0.69
+  135 A    6 -3 -3 -3 -2 -2 -3 -2 -3 -3 -3 -2 -3 -4 -3  0  0 -4 -4 -2   96   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   3   0   0   0  1.19 0.69
+  136 P   -1 -4 -4 -3 -4 -3 -3 -4 -3 -4 -3 -3 -4 -4  8 -2 -2 -5 -3 -4    3   0   0   0   0   0   0   0   0   0   2   0   0   0  91   0   2   0   1   0  2.33 0.67
+  137 G    0 -4  0 -2 -4 -3 -4  7 -4 -5 -5 -3 -4 -5 -2  0 -2 -4 -5 -5    5   0   3   0   0   0   0  86   0   0   0   0   0   0   1   4   1   0   0   0  1.47 0.67
+  138 I   -2 -4 -5 -5 -3 -4 -5 -5 -5  6  0 -4 -1 -2 -4 -4 -2 -4 -3  4    0   0   0   0   0   0   0   0   0  62   0   0   0   0   0   0   0   0   0  35  1.07 0.66
+  139 L   -2 -4 -5 -5 -2 -4 -5 -5 -5  6  1 -4  5 -2 -4 -4 -2 -4 -3  2    1   0   0   0   0   0   0   0   0  65   3   0  18   0   0   0   0   0   0  11  1.01 0.68
+  140 E    1 -2 -2  2 -3  1  3 -3 -2 -4 -4 -1 -3 -1  4  1  0  0 -1 -2   12   0   0  11   0   5  21   0   1   0   0   2   0   4  22  10   6   1   3   2  0.47 0.68
+  141 R   -3  8 -2 -3 -5 -1 -1 -4 -2 -2 -4  1 -3 -4 -4 -2 -3 -4 -3 -4    0  96   0   0   0   0   1   0   0   3   0   0   0   0   0   0   0   0   0   0  1.63 0.66
+  142 K   -1  1 -2  0 -4  4  1 -3 -1 -2 -3  5 -2 -4 -3 -1 -2 -4 -3 -3    3   6   0   6   0  26   6   0   1   3   1  41   0   0   0   4   1   0   0   1  0.67 0.67
+  143 S   -1 -3 -1 -2 -3 -2 -2 -1 -3 -4 -3 -2 -2 -3  4  6  0 -5 -4 -3    0   0   0   0   0   0   0   2   0   0   2   1   1   1  19  73   1   0   0   0  0.98 0.67
+  144 V   -2 -4 -5 -5 -2 -4 -4 -5 -5  2 -1 -4 -1 -2 -4 -3 -2 -5 -3  7    0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0   0   0   0  98  1.26 0.66
+  145 H   -2  1  3  2  1 -2 -1 -2  6 -4 -4 -1 -3 -2 -3  1  1 -4  0 -4    2   7  18  13   4   0   1   1  26   0   0   3   0   1   0  13   9   0   3   0  0.59 0.66
+  146 E   -2  0 -2 -1 -5  4  6 -4 -2 -4 -4  0 -3 -4 -3 -2 -2 -4 -1 -1    0   3   0   0   0  18  70   0   0   0   0   2   0   0   0   0   1   0   2   5  1.09 0.66
+  147 P   -2 -4 -2 -3 -5 -3 -3 -4 -4 -5 -5 -3 -4 -5  8 -2 -3 -5 -5 -4    0   0   2   0   0   0   0   0   0   0   0   0   0   0  96   2   0   0   0   0  2.49 0.65
+  148 M   -3 -4 -5 -5 -3 -3 -4 -5 -4  1  4 -4  7  0 -3 -4 -2 -3 -2  2    0   0   0   0   0   0   0   0   0   3  38   0  37   3   1   0   0   0   0  18  0.89 0.66
+  149 Q   -1 -1 -1 -2 -4  7  1 -3 -1 -3 -1 -1 -1 -4 -2 -1 -2 -4 -2 -3    4   0   2   0   0  74   6   0   0   1   8   0   0   0   1   2   0   0   1   0  1.07 0.66
+  150 T   -2 -3 -2 -3 -3 -2 -3 -3 -3 -1 -2 -2 -2 -2 -3  0  7 -4 -3 -2    0   0   0   0   0   0   0   0   0   3   1   0   0   2   0   2  92   0   0   0  1.24 0.66
+  151 G   -1 -3 -2 -3 -4 -3 -4  7 -4 -6 -5 -3 -4 -5 -4 -2 -3 -4 -5 -5    0   1   0   0   0   0   0  99   0   0   0   0   0   0   0   0   0   0   0   0  1.81 0.66
+  152 L   -3 -4 -5 -5 -3 -4 -5 -5 -4  5  4 -4  0  0 -5 -3 -2 -3  1  1    0   0   0   0   0   0   0   0   0  40  45   0   0   2   0   1   1   0   6   5  0.82 0.66
+  153 K   -2  0  0 -3 -4 -1 -1 -3 -3 -1 -1  6 -2 -4 -3 -2  0 -5 -3 -1    2   0   4   0   0   0   0   0   0   3   7  72   0   0   0   0   4   0   0   5  0.82 0.66
+  154 A    5 -3 -3 -3  1 -3 -3 -2 -3  0 -2 -2 -2 -4 -2  3 -1 -4 -3  1   61   0   0   0   3   0   0   0   0   4   1   0   0   0   1  21   1   0   0   9  0.71 0.66
+  155 V   -2 -5 -5 -5 -3 -4 -5 -5 -5  6  0 -4 -1 -2 -4 -4 -2 -4 -3  5    0   0   0   0   0   0   0   0   0  57   3   0   0   0   0   0   0   0   0  40  1.08 0.66
+  156 D   -3 -3  0  8 -5 -2  0 -3 -3 -5 -5 -2 -5 -5 -3 -2 -3 -6 -5 -5    0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.86 0.66
+  157 S    4 -3 -2 -3  2 -2 -2 -1 -3 -3 -3 -2 -2 -4 -3  5  0 -4 -3 -2   37   0   0   0   4   0   0   1   0   0   0   0   0   0   0  53   3   0   0   1  0.80 0.66
+  158 L   -3 -4 -5 -5 -3 -3 -4 -5 -4  0  4 -4  7 -1 -2 -4 -3 -3 -3 -1    0   0   0   0   0   0   0   0   0   2  49   0  47   0   2   0   0   0   0   0  1.12 0.67
+  159 V   -2 -4 -5 -5 -3 -4 -4 -5 -5  5  0 -4 -1  1 -4 -3  0 -4 -2  5    0   0   0   0   0   0   0   0   0  40   1   0   0   5   0   0   6   0   0  47  0.90 0.67
+  160 P   -2 -4 -4 -3 -4 -3 -3 -4 -4 -4 -4 -3 -4 -5  8 -2 -3 -5 -5 -4    2   0   0   0   0   0   0   0   0   0   1   0   0   0  96   1   0   0   0   0  2.50 0.67
+  161 I   -3 -5 -5 -5 -3 -4 -5 -6 -5  7  0 -4 -1 -2 -5 -4 -2 -4 -3  3    0   0   0   0   0   0   0   0   0  80   0   0   0   0   0   0   0   0   0  20  1.24 0.69
+  162 G   -1 -4 -2 -3 -2 -4 -4  7 -4 -6 -6 -3 -5 -5 -3 -2 -3 -4 -5 -5    0   0   0   0   1   0   0  99   0   0   0   0   0   0   1   0   0   0   0   0  1.82 0.70
+  163 R   -3  7 -2 -3  1 -1 -2 -4 -2 -5 -3  1 -3 -5 -4 -2 -3 -5 -4 -4    0  92   0   0   3   0   0   0   0   0   2   4   0   0   0   0   0   0   0   0  1.58 0.71
+  164 G   -2 -4 -2 -3 -4 -4 -4  7 -4 -6 -6 -3 -5 -5 -4 -2 -3 -4 -5 -5    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.88 0.71
+  165 Q   -3 -1 -2 -2 -5  8  0 -4 -1 -5 -4  0 -2 -5 -3 -2 -2 -4 -3 -4    0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.74 0.71
+  166 R   -3  8 -2 -3 -5 -1 -2 -4 -2 -5 -4  1 -3 -5 -4 -3 -3 -5 -4 -4    0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.79 0.71
+  167 E   -3 -2 -2  0 -6  2  7 -4 -2 -5 -5 -1 -4 -5 -3 -2 -3 -5 -4 -4    0   0   0   0   0   6  94   0   0   0   0   0   0   0   0   0   0   0   0   0  1.55 0.71
+  168 L   -3 -4 -5 -5 -3 -4 -5 -6 -5  0  6 -4  0 -1 -2 -4 -3 -4 -3 -1    0   0   0   0   0   0   0   0   0   0  96   0   0   0   2   0   0   0   0   0  1.26 0.76
+  169 L   -3 -5 -5 -5 -3 -5 -5 -6 -5  7  0 -5 -1 -2 -5 -4 -3 -5 -3  2    0   0   0   0   0   0   0   0   0  90   2   0   0   0   0   0   0   0   0   7  1.41 0.77
+  170 I   -3 -5 -5 -5 -1 -5 -5 -6 -5  7  1 -5  0 -2 -5 -4 -3 -5 -3  1    0   0   0   0   1   0   0   0   0  92   4   0   0   0   0   0   0   0   0   2  1.41 0.78
+  171 G   -2 -4 -2 -3 -5 -4 -4  7 -4 -6 -6 -3 -5 -5 -4 -2 -4 -4 -5 -5    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.94 0.79
+  172 G   -4 -4  0  8 -6 -2  0 -3 -3 -5 -6 -3 -5 -6 -3 -2 -3 -6 -5 -5    0   0   2  96   0   0   1   1   0   0   0   0   0   0   0   0   0   0   0   0  1.90 0.80
+  173 R   -3  8 -2 -4 -5  1 -2 -4 -2 -5 -4  1 -3 -5 -4 -3 -3 -5 -4 -5    0  96   0   0   0   3   0   0   0   0   0   1   0   0   0   0   0   0   0   0  1.79 0.81
+  174 Q   -1 -1  0 -2 -5  8  0 -3 -1 -5 -4  1 -2 -5 -3 -2 -2 -4 -3 -4    3   0   3   0   0  87   0   0   0   0   0   4   0   0   0   1   1   0   0   0  1.52 0.81
+  175 T   -2 -3 -2 -3 -1 -3 -3 -4 -4 -1 -3 -3 -2 -4 -3  0  7 -4 -4 -1    0   0   0   0   1   0   0   0   0   2   0   0   0   0   0   2  94   0   0   2  1.43 0.81
+  176 G   -2 -4 -2 -3 -5 -4 -4  7 -4 -6 -6 -3 -5 -5 -4 -2 -4 -5 -5 -5    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.96 0.81
+  177 K   -2  0 -2 -2 -5  0 -1 -3 -2 -4 -4  7 -3 -5 -3 -2 -2 -4 -3 -4    0   0   0   0   0   0   0   0   0   0   0  95   0   0   0   0   0   0   0   0  1.47 0.77
+  178 T   -2 -3 -2 -3 -3 -2 -3 -3 -3 -3 -3 -2 -2 -4 -3  0  7 -4 -3 -2    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   3  93   0   0   0  1.45 0.82
+  179 T    5 -2 -3 -3 -2 -1 -2 -2 -3 -3 -3 -2 -3 -4 -3  2  1 -4 -3 -2   68   1   0   0   0   2   0   0   0   0   0   0   0   0   0  18   8   0   0   0  0.92 0.82
+  180 I   -3 -4 -5 -5 -3 -4 -5 -5 -5  6  1 -4  0 -2 -4 -4 -2 -4 -3  4    0   0   0   0   0   0   0   0   0  50  11   0   1   0   0   0   0   0   0  34  1.04 0.85
+  181 A    6 -3 -3 -4  4 -2 -3 -1 -4 -2 -3 -3 -3 -4 -3 -1 -2 -4 -4 -2   84   0   0   0   8   1   0   3   0   2   1   0   0   0   0   0   0   0   0   0  1.15 0.85
+  182 I   -3 -4 -5 -5 -3 -4 -5 -5 -5  6  2 -4  0 -2 -4 -4 -1 -4 -3  3    0   0   0   0   0   0   0   0   0  58  14   0   1   1   0   0   3   0   0  19  0.99 0.85
+  183 D   -3 -3 -1  8 -5 -2  0 -3 -3 -5 -5 -3 -5 -5 -3  0 -3 -6 -5 -5    0   0   0  92   0   0   0   0   0   0   0   0   0   0   0   5   0   0   0   0  1.84 0.88
+  184 T    3 -3 -3 -3  0 -3 -3 -3 -4  0 -3 -3 -1 -4 -3 -1  6 -4 -4 -2   26   0   0   0   2   0   0   0   0   7   0   0   2   0   0   1  61   0   0   0  0.96 0.88
+  185 I   -3 -5 -5 -5 -3 -5 -5 -5 -5  7  0 -4  0 -1 -5 -4 -3 -4 -3  1    0   0   0   0   0   0   0   0   0  88   1   0   1   2   0   0   0   0   0   4  1.40 0.88
+  186 L   -3 -4 -5 -5 -3 -4 -5 -5 -5  6  4 -4  0 -2 -4 -2 -3 -4 -3  1    0   0   0   0   0   0   0   0   0  50  38   0   0   0   1   4   0   0   0   5  0.98 0.88
+  187 N   -3 -2  8 -1 -5 -2 -2 -3  1 -5 -5 -2 -4 -5 -4 -1 -2 -6 -4 -5    2   1  93   0   0   0   0   0   2   0   0   1   0   0   0   1   0   0   0   0  1.86 0.93
+  188 Q   -3 -1 -2 -2 -5  8  0 -4 -1 -5 -3 -1 -2 -5 -3 -2 -3 -4 -3 -4    0   0   0   0   0  97   0   0   1   0   2   0   0   0   0   0   0   0   0   0  1.87 0.93
+  189 K   -2  2 -1 -3 -5  0 -1 -3 -3 -4 -4  7 -3 -5 -3 -2 -3 -5 -4 -2    2   9   1   0   0   1   0   0   0   0   1  81   0   0   0   0   0   0   0   2  1.31 0.89
+  190 Q   -1  3  1  0 -3  2  1  0  1 -2 -1  1 -2 -1 -1  0 -1 -2  2 -2    5  17   6   5   1   8   8   8   3   2   6   9   1   3   3   5   2   0   7   2  0.17 0.69
+  191 I   -1 -2 -2 -2 -2 -1 -1 -2 -2  2  0  0  2  2 -1  0  2  2  0  1    4   2   2   2   1   2   3   3   1  16   8   5   5   9   2   9  14   2   3   8  0.16 0.61
+  192 N   -1 -1  6 -1 -2 -1 -1 -1  4 -2 -2 -1 -2 -2 -2 -1 -1 -2  0 -2    4   2  48   2   1   2   3   3  10   2   4   2   1   2   2   3   2   1   4   3  0.70 0.56
+  193 S    0 -1  1  2 -1  0  0 -1 -1 -1  0  0 -1 -1 -1  1 -1  3 -1 -1    7   3   7  14   1   5   6   5   1   3   8   4   1   2   3  13   4   4   2   5  0.10 0.35
+  194 R    0  2  0  0 -1  1  2 -1 -1 -1 -1  2 -1 -1 -1  0  0 -1 -1 -1    6  11   5   3   1   6  16   4   1   3   6  13   1   2   3   5   5   1   2   4  0.13 0.40
+  195 A    0 -1  0 -1 -1 -1 -1  2 -1  1 -1 -1 -1 -1  0  2  0  0 -1 -1    6   3   3   3   1   2   4  17   1   8   7   4   1   2   5  19   5   1   2   5  0.12 0.42
+  196 T    0  0  1  1 -1  0 -1  1 -1 -1 -1  0 -1 -1  0  0  2  0 -1 -1    8   4   6   9   1   3   4  12   1   3   7   6   1   3   4   8  13   1   2   4  0.08 0.40
+  197 S   -1  0  2  4 -2 -1  0  0 -1 -2 -1 -1 -1 -2 -1  1 -1 -2 -1 -1    5   6   9  25   1   2   6   7   1   3   5   4   1   2   3   9   4   1   2   4  0.22 0.40
+  198 E    0 -1 -1  0 -2  0  4 -1  2 -2 -1  0 -1 -2  1  0  1 -2 -1 -1    7   3   3   5   1   3  27   4   6   3   6   5   1   2   6   6   9   1   2   4  0.22 0.44
+  199 S   -1 -1  1  2 -2 -1 -1  3 -2 -3 -3  3 -3 -3 -2  2 -1 -3 -3 -3    4   1   6  12   1   1   2  22   0   1   2  19   0   1   1  21   3   0   1   1  0.39 0.66
+  200 E   -2  0  2  1 -3  4  2 -1 -2 -3 -3  3 -3 -4 -2  1 -2 -3 -3 -3    2   2   9   8   0  20  16   3   0   2   2  21   0   1   1  11   1   0   1   1  0.45 0.71
+  201 T   -1 -1  1  4 -4  0 -1  2 -3 -4 -4  3 -3 -1  0  0  0 -4 -3 -3    4   1   5  23   0   3   2  15   0   0   1  25   0   4   5   5   4   0   0   1  0.44 0.75
+  202 M   -3 -4 -5 -5 -1 -4 -5 -5 -5  3  3 -4  3 -2 -5 -4 -2 -4 -3  5    0   0   0   0   1   0   0   0   0  17  30   0   6   0   0   0   1   0   0  45  0.86 0.79
+  203 Y   -3 -1 -3 -4 -4 -2 -3 -5  1  3 -1  1 -2  2 -3 -3  0 -2  6  0    0   3   1   0   0   1   0   0   3  24   3  11   0   6   1   0   5   0  37   6  0.71 0.80
+  204 C   -2  0 -4 -5 11 -4 -5 -3 -5 -3 -3 -4 -3 -4 -5 -1 -3 -4 -4 -2    0   6   0   0  87   0   0   2   0   0   0   0   0   0   0   4   0   0   0   1  2.31 0.78
+  205 V   -3 -5 -5 -5 -3 -5 -5 -6 -5  6 -1 -5 -1  1 -5 -4 -2 -4 -3  5    0   0   0   0   0   0   0   0   0  54   0   0   0   5   0   0   0   0   0  42  1.13 0.79
+  206 Y   -4 -4 -4 -5 -5 -3 -4 -5  2 -3 -3 -4 -3  1 -5 -4 -4  0  9 -3    0   0   0   0   0   0   0   0   4   0   0   0   0   0   0   0   0   0  96   0  2.18 0.83
+  207 V   -2 -5 -5 -5  1 -4 -5 -3 -5  1 -1 -4 -1 -3 -4 -4 -2 -5 -3  7    1   0   0   0   3   0   0   2   0   1   2   0   0   0   0   0   1   0   0  90  1.27 0.87
+  208 A    6 -4 -3 -4  0 -3 -3 -1 -4 -3 -4 -3 -3 -3 -1  0  0 -5 -4 -2   82   0   0   0   2   0   0   3   0   0   0   0   0   1   3   4   4   0   0   0  1.09 0.88
+  209 I   -3 -5 -5 -5  0 -5 -5 -6 -5  7 -1 -5  1 -2 -5 -4 -3 -5 -3  3    0   0   0   0   2   0   0   0   0  78   0   0   2   0   0   0   0   0   0  18  1.36 0.90
+  210 G   -2 -5 -3 -4 -5 -4 -4  7 -4 -5 -6 -4 -5 -5 -4 -2 -4 -5 -5 -2    0   0   0   0   0   0   0  96   0   0   0   0   0   0   0   0   0   0   0   4  1.91 0.93
+  211 Q   -3 -1 -2 -2 -5  8  0 -4  0 -5 -3 -1 -2 -5 -3 -2 -1 -4 -3 -4    0   0   0   0   0  92   0   0   2   0   3   0   0   0   0   0   4   0   0   0  1.72 0.93
+  212 K   -3  4 -2 -3 -6 -1 -2 -4 -3 -5 -5  7 -4 -5 -3 -3 -3 -5 -4 -5    0  17   0   0   0   0   0   0   0   0   0  83   0   0   0   0   0   0   0   0  1.59 1.04
+  213 R    3  5 -1 -1  0  1 -1 -1 -3 -2 -3  0 -2 -3 -4  0 -2 -5 -4 -4   27  36   2   3   2   7   4   3   0   2   1   3   1   1   0   6   2   0   0   0  0.60 1.04
+  214 S    1 -3 -2 -2 -3 -2 -2 -1 -3 -4 -4 -2 -3 -4 -3  6  1 -4 -4 -3    8   0   0   0   0   0   1   4   0   0   1   0   0   0   0  76   6   0   0   1  1.11 0.99
+  215 T    0 -3  0 -3 -3 -1 -1 -2 -3 -3 -3 -1 -3 -4 -3  3  6 -4 -2 -3    8   0   3   0   0   1   3   2   0   0   1   3   0   0   0  22  52   0   2   1  0.82 0.99
+  216 V   -2 -4 -5 -5 -3 -4 -4 -3 -5  3 -1 -4  0 -3 -4 -4 -2 -4 -3  6    2   0   0   0   0   0   0   3   0  16   3   0   2   0   0   0   0   0   0  70  1.15 1.08
+  217 G    5  2 -2 -4 -3 -1 -2 -2 -3 -2 -1 -2 -1 -4 -3 -1 -2 -4 -4 -1   63  12   2   0   0   3   2   1   0   2   5   2   1   0   0   2   1   0   0   3  0.80 1.15
+  218 Q    0  1  2 -1 -4  6  0 -1  1 -4 -3  0 -3 -5 -4  0 -1 -4 -4 -3    8   8   9   2   0  45   4   4   3   0   2   3   0   0   0   6   4   0   0   2  0.75 1.17
+  219 L   -1 -4 -2 -5 -3 -1 -4 -5 -5  4  2 -4  0 -1 -4 -2 -1 -4 -3  4    6   0   2   0   0   3   0   0   0  24  22   0   2   2   0   3   3   0   0  31  0.65 1.16
+  220 I   -2 -2 -4 -4  0 -1 -1 -5 -1  2 -1 -2 -2 -3 -4 -2 -1 -4  1  6    3   2   0   0   2   3   3   0   2   7   4   2   0   0   0   2   2   0   6  56  0.70 1.19
+  221 Q   -1  1  2  0 -5  4  2 -2  0 -4 -2  3 -3 -5 -4 -1  1 -5 -4 -4    5   7   8   4   0  19  15   3   2   0   3  19   0   0   0   3   7   0   0   0  0.44 1.12
+  222 T    0  0 -1 -3 -4  1 -1 -4 -4  3 -1  1  0 -4 -4  0  3 -5 -4  0    8   6   3   0   0   6   4   0   0  22   4   9   3   0   0   5  23   0   0   5  0.37 1.12
+  223 L   -3 -5 -5 -5 -4 -4 -5 -6 -5  2  4 -5 -1  3  2 -4 -3 -4 -2  1    0   0   0   0   0   0   0   0   0  11  53   0   0  13  12   0   0   0   0  10  0.86 0.94
+  224 E    0  3  0  0 -4  1  2 -4  2 -1 -4  0 -3 -5  2  1  0 -5 -4 -1    7  16   4   4   0   7  16   0   5   4   0   4   0   0  10  13   4   0   0   4  0.32 0.94
+  225 E   -1  0  1  2 -5  2  5 -2  0 -5 -5  2 -4 -5 -4 -1 -1 -5 -4 -2    4   5   5  13   0   7  40   2   2   0   0  11   0   0   0   4   2   0   0   4  0.66 1.00
+  226 A    2  0  3 -3 -4 -1  0 -2  3 -4 -2  1 -1  1 -4 -1  1 -4  2 -2   20   6  15   0   0   2   7   2   9   0   4   8   2   5   0   2   8   0   8   2  0.31 1.00
+  227 N   -1 -4  2  3 -5 -1 -1  6  0 -6 -6 -1 -5 -5 -4 -1 -3 -5 -5 -5    3   0   8  16   0   3   3  57   3   0   0   3   0   0   0   4   0   0   0   0  1.04 1.00
+  228 A    6 -4 -4 -4  3 -3 -3 -2 -4 -3 -4 -3 -3 -4 -3  1 -2 -5 -4  0   76   0   0   0   7   0   0   0   0   0   0   0   0   0   0  10   0   0   0   7  1.13 1.00
+  229 L    0 -4 -5 -5 -4 -4 -5 -5 -4  1  3 -4  6  1  0  0 -3 -4  2  0    9   0   0   0   0   0   0   0   0   6  33   0  21   6   6   7   0   0   6   7  0.65 1.17
+  230 E    0  0  0  1 -5  2  4 -1  3 -5 -2  0 -4  2  0 -1 -1 -5 -3 -4    7   4   4   8   0   9  29   4   6   0   4   5   0   9   5   4   4   0   0   0  0.41 1.17
+  231 Y    1  1  0 -4 -4 -2 -3 -4  3 -3 -3  1 -3  2 -4 -2 -3 -1  7 -3   12  10   6   0   0   0   0   0   7   0   0  10   0   5   0   0   0   0  51   0  0.91 0.67
+  232 S   -2 -3 -3 -3  3 -3 -3 -4 -4  1  1 -3 -2 -3  1  1  5 -4 -3  1    0   0   0   0   7   0   0   0   0   7  16   0   0   0   7  10  45   0   0   7  0.58 0.67
+  233 I    1 -4 -4 -4  3 -3 -4 -4 -4  4  0 -3 -1 -3 -4  1  1 -4 -3  3   11   0   0   0   7   0   0   0   0  31   7   0   0   0   0  11  10   0   0  23  0.51 0.67
+  234 L   -3 -4 -4 -4 -3 -3 -4 -4 -4  3  2 -4  4  3 -4 -3 -2 -3 -1  2    0   0   0   0   0   0   0   0   0  17  17   0  17  17   0   0   0   0   0  17  0.40 0.56
+  235 V   -2 -4  2 -4 -3 -3 -4 -5 -4  3 -1 -4  4 -3 -4 -3 -2 -5 -3  5    0   0  13   0   0   0   0   0   0  15   0   0  13   0   0   0   0   0   0  60  0.87 0.72
+  236 A    4 -3  0 -3  2 -3 -1 -3 -3  0 -1 -3  1  0 -3  1  0 -4 -3  1   44   0   6   0   5   0   5   0   0   5   5   0   5   5   0   9   5   0   0   8  0.40 0.72
+  237 A    5 -1 -3 -3 -3 -2  0  1 -3 -3 -4 -2 -3 -4 -3  1  1 -4 -4 -2   66   3   0   0   0   0   7   7   0   0   0   0   0   0   0   9   7   0   0   0  0.76 0.70
+  238 T    1 -3  1 -2 -3 -2  0  0  0 -2 -3 -2  0 -4  1  1  4 -4 -3 -2   15   0   8   0   0   0   6   8   3   2   2   0   2   0   6  10  39   0   0   0  0.47 0.70
+  239 A    4 -3 -2  1 -2 -2 -2  1 -3 -3 -3 -2 -3 -4  2  1 -2 -4 -3 -2   56   0   0   9   0   0   0  10   0   0   0   0   0   0   9   9   0   0   0   0  0.54 0.56
+  240 S    2 -3  3  3 -3 -2 -2  2 -2 -4 -4 -2 -3 -4 -3  2  2 -5 -4 -3   17   0  17  17   0   0   0  17   0   0   0   0   0   0   0  17  17   0   0   0  0.50 0.56
+  241 D    1 -3  2  2 -4  2  1 -3 -2 -4 -4 -2 -3  2 -4  1  1 -3  3 -4   11   0  11  11   0  11  11   0   0   0   0   0   0  11   0  11  11   0  11   0  0.41 0.89
+  242 P    2 -3 -3 -3  2 -2  0 -3 -3 -3 -1 -2  1 -4  5  3 -2 -5 -4 -3   16   0   0   0   5   0   5   0   0   0   7   0   5   0  32  30   0   0   0   0  0.74 0.67
+  243 A    5 -3 -3  0 -2 -3 -2 -2 -3 -3 -1 -3 -2 -4  2  1 -2 -4 -4  0   64   0   0   6   0   0   0   0   0   0   6   0   0   0  11   8   0   0   0   6  0.71 0.67
+  244 P    2 -3 -3 -3 -3 -3 -3  0 -4 -1 -4 -3 -3 -4  6  1  2 -5 -4 -1   16   0   0   0   0   0   0   8   0   4   0   0   0   0  43  10  15   0   0   5  0.92 0.67
+  245 L    1 -4 -4 -4  4 -3  1 -4 -3  2  1 -4  4  3  2 -3 -3 -3  3 -2   11   0   0   0  11   0  11   0   0  11  11   0  11  11  11   0   0   0  11   0  0.48 0.89
+  246 Q   -2  0 -1 -2 -4  7  0 -4  2 -1 -2  1 -2 -4 -2 -1 -1 -4 -3 -2    0   2   2   0   0  74   0   0   4   4   3   5   0   0   2   2   2   0   0   2  1.06 0.63
+  247 F   -3 -4 -1 -5 -4 -3 -4 -5 -1 -2 -2 -4  1  5 -5 -3 -3  5  7 -1    0   0   4   0   0   0   0   0   0   0   0   0   4  28   0   0   0   7  52   6  1.26 0.63
+  248 L   -3 -4 -5 -5 -3 -4 -5 -5 -4  3  4 -4  1  1 -2 -4 -3 -3  2  0    0   0   0   0   0   0   0   0   0  23  59   0   3   4   2   0   0   0   7   3  0.78 0.63
+  249 A    3 -2 -1 -2 -2 -1 -1 -1 -2 -1 -2 -1 -2 -3 -2  4  2 -4 -3  1   31   0   0   0   0   0   0   0   0   0   0   0   0   0   0  44  13   0   0  13  0.44 0.28
+  250 P    3 -3 -3 -3 -2 -2 -2 -2 -3  0 -1 -2 -1 -3  5 -1 -1 -4 -3  3   33   0   0   0   0   0   0   0   0   0   0   0   0   0  33   0   0   0   0  33  0.65 0.22
+  251 Y   -3 -3 -3 -4 -3 -2 -3 -4  0 -1  1 -3  3  4 -4 -3 -2  1  7 -1    0   0   0   0   0   0   0   0   0   0  13   0  13  14   0   0   0   0  61   0  0.90 0.28
+  252 S    1 -4 -3 -3  5 -3 -3  2 -4 -2 -3 -3 -3 -4  3  2  2 -4 -4  1   14   0   0   0  14   0   0  14   0   0   0   0   0   0  14  14  14   0   0  14  0.51 0.67
+  253 G    1 -3 -2 -3 -3 -3 -3  6 -3 -4 -4 -3 -4 -4 -3  1 -2 -4 -4 -4   14   0   0   0   0   0   0  69   0   0   0   0   0   0   0   8   0   0   0   0  1.05 0.59
+  254 C    1 -4 -4 -4  8 -3 -4 -4 -4  0 -2 -4  1  0 -4  0  1 -4 -3  2   12   0   0   0  42   0   0   0   0   4   0   0   4   4   0   5   7   0   0  20  0.89 0.59
+  255 A    4 -3 -2 -3  2 -2 -3 -2 -3 -2  0 -2 -2 -3 -3  2  3 -4 -3  0   41   0   0   0   5   0   0   0   0   0  11   0   0   0   0  20  20   0   0   4  0.51 0.58
+  256 M   -1 -3 -4 -4 -2 -3 -4 -4 -3  4  2 -3  6  1 -4 -3 -2 -3 -2  1    6   0   0   0   0   0   0   0   0  38  11   0  34   6   0   0   0   0   0   6  0.61 0.37
+  257 G    2 -3 -2 -3  3 -3 -3  5 -3 -4 -4 -2 -3 -4 -3  1 -2 -4 -4 -3   23   0   0   0   8   0   0  60   0   0   0   0   0   0   0   8   0   0   0   0  0.88 0.37
+  258 E   -2 -1 -1  0 -5  3  6 -3 -1 -4 -4  2 -3 -4 -2 -1 -2 -4 -3 -4    0   0   0   0   0  12  77   0   0   0   0  11   0   0   0   0   0   0   0   0  1.01 0.37
+  259 Y    0 -3  2 -3 -4  2  1 -4  4 -4  0 -3 -3  2 -4  1 -3  6  3 -4   10   0  10   0   0  10  10   0  10   0  10   0   0  10   0  10   0  10  10   0  0.50 1.00
+  260 F   -3 -4 -4 -5 -4 -4 -4 -4 -3  0  0 -4 -1  6  0 -1 -3  6  3  0    0   0   0   0   0   0   0   0   0   6   6   0   0  49   6   6   0  13   7   6  0.90 0.56
+  261 R    2  3 -3 -3 -2 -1 -2 -3 -2 -1  2 -1  5 -2 -3 -2 -2 -3 -2 -1   25  25   0   0   0   0   0   0   0   0  25   0  25   0   0   0   0   0   0   0  0.41 0.33
+  262 D   -2 -1  0  5  0 -3  0 -4  1 -4 -4  1  0  0 -4 -2 -3  3  5 -2    3   3   3  34   2   0   5   0   3   0   0   8   2   3   0   3   0   3  25   2  0.74 1.04
+  263 N   -1  1  4  0 -5  3 -1 -1  1 -2 -3  2 -3 -5 -4  1  0 -5 -4 -4    6   8  24   4   0  15   3   3   3   3   3  14   0   0   0  12   4   0   0   0  0.47 1.04
+  264 G   -1 -1 -3 -3 -5 -3 -1  6  0 -6 -6 -1 -5 -5  0 -1 -4 -5 -5 -5    3   4   0   0   0   0   5  73   3   0   0   3   0   0   5   3   0   0   0   0  1.30 1.04
+  265 M    0  1  1  1 -5  2  1  0 -3 -4 -1  1  3 -4 -4  0  1 -5  2 -4    8   8   8   8   0   8   8   8   0   0   8   8   8   0   0   8   8   0   8   0  0.23 1.33
+  266 H    1  2  2  2 -4 -2 -2 -3  4 -5 -5  2 -4 -5  3  1 -2 -5 -3 -4   13  13  13  13   0   0   0   0  13   0   0  13   0   0  13  13   0   0   0   0  0.52 0.78
+  267 A    1  2 -2  2  5 -3 -3  1 -3 -3 -3 -2 -3 -4 -4  1  2 -5 -4  1   13  13   0  13  12   0   0  13   0   0   0   0   0   0   0  13  13   0   0  12  0.39 0.78
+  268 L   -2 -4 -4 -4  5 -3 -4 -4 -4  2  1 -4  4  3 -4  1 -2 -3 -2  2    0   0   0   0  14   0   0   0   0  14  14   0  14  14   0  14   0   0   0  14  0.55 0.67
+  269 I   -1 -4 -4 -4  1 -4 -4 -5 -4  5  1 -4  1 -2 -4 -3  2 -4 -1  2    3   0   0   0   3   0   0   0   0  53  12   0   3   0   0   0  16   0   3   9  0.69 0.62
+  270 I   -1 -4 -4 -5  3 -4 -4 -3 -4  5 -1 -4  2  1 -4 -2 -2 -4 -2  4    3   0   0   0   6   0   0   2   0  38   0   0   5   6   0   2   0   0   0  36  0.66 0.62
+  271 Y   -3 -2 -2 -3 -3 -2 -2 -4  6  0  0 -3 -2  2 -4 -2  0 -1  5 -2    0   0   0   0   0   0   0   0  32   7   7   0   0   9   0   0   7   0  30   0  0.66 0.39
+  272 D   -3 -2  0  6 -5  4  1 -3 -2 -4 -4 -1 -3 -5 -3 -1 -2 -5 -4 -4    0   0   0  68   0  32   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.12 0.39
+  273 D   -1 -2  0  5 -4 -1  0  4 -2 -4 -4 -1 -3 -4 -2 -1 -2 -4 -4 -4    0   0   0  50   0   0   0  50   0   0   0   0   0   0   0   0   0   0   0   0  0.79 0.11
+  274 L   -2  3 -3 -4 -3 -2 -3 -4 -3  0  4 -2  0 -2  2 -3 -2 -3 -3  1    0  23   0   0   0   0   0   0   0   0  52   0   0   0  13   0   0   0   0  13  0.52 0.39
+  275 S    1 -2 -1 -2 -2 -2 -2 -2 -2 -3 -3 -2 -3 -4  4  4  2 -4 -3 -2   10   0   0   0   0   0   0   0   0   0   0   0   0   0  23  46  12   0   0   0  0.58 0.39
+  276 K   -2  3  3 -1 -4  3 -1 -2 -1 -4 -4  3 -3 -4 -3  2 -1 -4 -3 -4    0  20  20   0   0  20   0   0   0   0   0  20   0   0   0  20   0   0   0   0  0.60 0.44
+  277 Q   -3  0 -1 -2 -5  6  0 -3  6 -5 -4  3 -2 -4 -3 -2 -2 -4 -2 -4    0   0   0   0   0  54   0   0  22   0   0  25   0   0   0   0   0   0   0   0  1.12 0.63
+  278 A    5 -3 -2 -3 -2 -2 -2 -2  5 -3 -3 -2 -3 -4 -3  2 -2 -4 -2 -2   67   0   0   0   0   0   0   0  19   0   0   0   0   0   0  14   0   0   0   0  0.90 0.63
+  279 V   -1 -1 -3 -1  1  0 -1  2 -4  0 -2 -3  0 -3 -4 -1 -1  3 -1  4    4   3   0   3   3   5   3  19   0   4   0   0   2   0   0   3   3   5   3  41  0.37 0.63
+  280 A    5 -3 -3 -4  6  0 -3 -1 -3 -3 -3 -3 -3 -4 -3  0  0 -4 -4 -1   59   0   0   0  20   4   0   4   0   0   0   0   0   0   0   6   4   0   0   4  0.82 0.64
+  281 Y   -3 -3 -3 -5 -4 -3 -4 -5  4  1  2 -3 -1  1 -4 -3 -3  0  7 -2    0   0   0   0   0   0   0   0  11   8  21   0   0   0   0   0   0   0  59   0  1.11 0.58
+  282 R    2  4 -2 -2 -3 -1 -1 -2 -2 -3 -3  0 -2 -4  5 -1 -1 -4 -3 -2   33  33   0   0   0   0   0   0   0   0   0   0   0   0  33   0   0   0   0   0  0.74 0.22
+  283 Q    0 -2 -2 -2 -3  3  1 -3  2  3 -2  0 -2 -3 -3  2  1 -4 -3 -1    7   0   0   0   0  20   9   0   6  23   0   6   0   0   0  23   7   0   0   0  0.34 0.50
+  284 M   -2 -4 -4 -5  4 -3 -4 -5 -4  4  2 -4  5 -2 -4 -3  1 -4 -3  2    0   0   0   0  11   0   0   0   0  28  18   0  25   0   0   0   8   0   0   9  0.67 0.56
+  285 S    1 -3 -2 -3  3 -2 -2 -2 -2 -2 -3 -2 -2 -3 -3  4  3 -3  2  0    9   0   0   0   8   0   0   0   0   0   0   0   0   0   0  49  17   0   8   9  0.58 0.56
+  286 L   -3 -4 -5 -5 -3 -4 -4 -5 -4  2  5 -4  1  3 -4 -4 -3 -3 -2  0    0   0   0   0   0   0   0   0   0  10  78   0   0  11   0   0   0   0   0   0  0.92 0.52
+  287 L   -3 -3 -3  1 -3 -3 -3 -4 -4  1  4 -3  0  1 -4 -2  1 -3 -2  0    0   0   0  11   0   0   0   0   0   3  64   0   0   6   0   2  10   0   0   3  0.59 0.52
+  288 L   -2 -4 -4 -4 -3 -3 -4 -4 -4  0  4 -4  1  3  2 -3  0 -3 -1  0    3   0   0   0   0   0   0   0   0   3  52   0   2  18  11   0   8   0   0   3  0.59 0.52
+  289 R   -3  6  1 -2 -5  0 -1 -3  2 -4 -4  3 -3 -4 -3 -2 -2 -4 -3 -4    0  66   8   0   0   0   0   0   5   0   0  22   0   0   0   0   0   0   0   0  1.09 0.52
+  290 R   -3  7 -2 -2 -5  2  1 -4 -2 -4 -4  3 -3 -4 -3 -2 -2 -4 -3 -4    0  72   0   0   0   6  10   0   0   0   0  12   0   0   0   0   0   0   0   0  1.12 0.49
+  291 P   -2 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -4 -5  8 -2 -2 -5 -4 -4    0   0   0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0   0   0  2.48 0.40
+  292 P   -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -3 -2 -3 -5  8 -1  0 -5 -4 -3    2   0   0   0   0   0   0   0   0   0   2   0   0   0  89   2   5   0   0   0  2.07 0.38
+  293 G    0 -3 -2 -2 -4 -3 -3  6 -3 -5 -5 -3 -4 -4 -3 -1 -3  2 -4 -4    2   0   0   0   0   0   0  91   0   0   0   0   0   0   0   2   0   4   0   0  1.39 0.38
+  294 R   -2  7 -1 -3 -4  0 -1 -3 -1 -4 -3  2 -2 -4 -3 -2  1 -4 -3 -3    0  88   0   0   0   0   0   0   0   0   0   3   0   0   0   0   9   0   0   0  1.17 0.38
+  295 E   -2  1 -1  0  3  1  6 -3 -1 -4 -4  0 -3 -4 -2 -1 -2 -4 -3 -3    0   6   0   0   8   0  84   0   0   0   0   2   0   0   0   0   0   0   0   0  1.00 0.36
+  296 A    5 -3 -2 -3 -2 -2 -2  2 -3 -3 -3 -2 -2 -4  2  0 -1 -4 -3 -2   72   0   0   0   0   0   0  16   0   0   0   0   0   0   9   0   2   0   0   0  0.70 0.36
+  297 F   -3 -3 -3 -4 -3 -3 -3 -4  0  1 -1 -3 -1  4 -4 -2 -2  1  7 -1    0   0   0   0   0   0   0   0   0  11   0   0   0  20   0   2   0   0  68   0  1.06 0.30
+  298 P   -2 -3 -3 -2 -4 -2 -2 -3 -3 -3 -3 -2  2 -4  8 -1 -2 -4 -4 -3    0   0   0   0   0   0   0   0   0   0   0   0  11   0  86   3   0   0   0   0  1.90 0.30
+  299 G   -1 -3 -1 -2 -4 -3 -3  7 -3 -5 -5 -2 -4 -4 -3 -1 -3 -3 -4 -4    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.47 0.30
+  300 D   -3 -2  1  7 -5 -1  1 -2 -2 -4 -5 -1 -4 -5 -2 -1 -2 -5 -4 -4    0   0   0  95   0   0   5   0   0   0   0   0   0   0   0   0   0   0   0   0  1.36 0.30
+  301 V   -1 -3 -4 -4 -2 -3 -3 -4 -4  3  0 -3  0 -2 -3 -1 -1 -4 -2  5    0   0   0   0   0   0   0   0   0  22   0   0   0   0   0   5   0   0   0  73  0.67 0.30
+  302 F   -2 -3 -3 -3 -3 -3 -3 -3 -2 -1  0 -3 -1  7 -3 -3 -2  0  2 -1    1   1   1   1   0   1   1   1   0   1   4   1   0  85   1   1   1   0   0   1  0.99 0.24
+  303 Y   -3 -3 -3 -4 -3 -2 -3 -4  2 -2 -2 -3 -2  3 -4 -3 -3  1  8 -2    0   0   0   0   0   0   0   0   2   0   0   0   0   6   0   0   0   0  93   0  1.40 0.30
+  304 L   -2 -3 -4 -4 -1 -3 -4 -4 -4  2  5 -3  1 -1 -3 -2 -2 -3 -2  1    2   0   0   0   1   0   0   0   0   8  78   0   0   0   1   2   0   0   0   8  0.65 0.30
+  305 H   -3 -1  0 -2 -4  0 -1 -3  9 -4 -4 -2 -2 -2 -3 -2 -3 -3  1 -4    0   0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0  1.57 0.28
+  306 S    1 -2  0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2  5  1 -4 -3 -2    8   0   0   0   0   0   0   0   0   0   0   0   0   0   0  92   0   0   0   0  0.78 0.28
+  307 R   -2  7 -1 -2 -4  0 -1 -2 -1 -4 -3  2 -2 -4 -3 -1 -2 -4 -3 -3    0  92   0   0   0   1   0   2   0   0   0   1   0   0   0   4   0   0   0   0  1.10 0.28
+  308 L   -2 -3 -4 -4 -2 -3 -4 -5 -4  1  5 -3  1  0 -1 -2 -2 -3 -2  0    0   0   0   0   0   0   0   0   0   0  96   0   0   0   2   2   0   0   0   0  0.77 0.28
+  309 L   -2 -3 -3 -4 -2 -3 -3 -4 -3  1  5 -3  1 -1 -3  1 -1 -3 -2  0    0   0   0   0   0   0   0   0   0   0  84   0   0   0   0  16   0   0   0   0  0.61 0.27
+  310 E   -2 -1 -1  1 -5  1  6  1 -1 -4 -4  0 -3 -4 -2 -1 -2 -4 -3 -4    0   0   0   0   0   0  88  11   0   0   0   1   0   0   0   0   0   0   0   0  0.97 0.29
+  311 R   -2  7 -1 -3 -4  0 -1 -3 -1 -4 -3  1 -2 -4 -3 -2 -2 -4 -3 -4    0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.26 0.29
+  312 A    5 -2 -2 -2  0 -1  1 -1 -2 -3 -3 -1 -2 -3 -1  2 -1 -4 -3 -1   72   0   0   0   1   0  11   1   0   0   0   0   0   0   2  13   0   0   0   1  0.57 0.29
+  313 A    5  1 -2 -3  1 -1 -2 -1 -2 -2 -2 -1 -2 -3 -2  1 -1 -4 -3  0   74  11   0   0   3   0   0   1   0   0   1   0   0   0   0   6   1   0   0   4  0.55 0.29
+  314 K   -2  3 -1  1 -4  1  0 -3  0 -4 -3  5 -2 -4 -2 -1 -2 -4 -3 -3    0  13   0  11   0   4   0   0   2   0   0  69   1   0   0   0   0   0   0   0  0.75 0.31
+  315 R   -2  1 -4 -4 -2 -2 -3 -4 -2  1  4 -2  3  0 -4 -3 -2 -2  2  2    0  11   0   0   0   0   0   0   0   3  49   0  12   0   0   0   0   0  12  13  0.45 0.31
+  316 S    0 -2  4  2 -2 -1 -1 -1 -1 -2 -3 -1 -3 -3 -2  4  0 -4 -3 -3    2   1  27  11   1   0   1   1   1   2   1   1   0   0   0  49   2   0   0   0  0.54 0.33
+  317 D    0 -1  0  5 -4  0  2 -2  3 -4 -4  0 -3 -4  0  0  0 -4 -2 -3    8   1   3  41   0   3  13   1  10   0   0   6   0   0   4   2   6   0   0   0  0.53 0.36
+  318 Q    0  2  1  2 -4  3  4 -2 -1 -3 -3  1 -3 -4 -2  0 -1 -4 -3 -2    6  12   6  13   0  13  30   1   1   0   1   9   0   0   0   4   1   0   0   3  0.44 0.40
+  319 T   -2 -1  3 -2 -2 -1 -2  0  4 -2  2  1  0  0 -3 -1  1 -3  0 -2    1   1  20   1   1   1   0   7  12   1  22  12   2   3   0   1  13   0   2   1  0.25 0.47
+  320 G    0 -3 -2 -3 -4 -2 -3  6 -3 -5 -5  1 -4 -4 -3 -1 -3 -4 -4 -3    4   0   0   0   0   1   0  84   0   0   0   9   0   0   0   1   0   0   0   1  1.27 0.47
+  321 A    2 -3 -1 -2 -2 -2 -1  5 -3 -4 -4 -2 -3 -4 -3  0  1 -4 -4 -3   19   0   1   0   0   1   3  62   0   0   0   1   0   0   0   4  10   0   0   0  0.89 0.47
+  322 G   -1 -4 -2 -3 -4 -3 -3  7 -3 -5 -5 -3 -4 -5 -3 -1 -3 -4 -4 -5    0   0   0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0  1.65 0.46
+  323 S    0 -2  0 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  6  0 -4 -3 -3    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0   0  1.04 0.44
+  324 L   -3 -3 -4 -5 -2 -3 -4 -5 -4  3  5 -4  3 -1 -4 -3 -1 -3 -2  0    0   0   0   0   0   0   0   0   0  21  64   0   9   0   0   1   3   0   0   1  0.74 0.44
+  325 T   -1 -2 -1 -2 -2 -2 -2 -3 -3 -2 -2 -2 -2 -3 -2  0  7 -4 -3 -1    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0  1.16 0.44
+  326 A    6 -3 -3 -3  2 -2 -2  0 -3 -3 -3 -2 -2 -4 -2  0 -1 -4 -3 -1   93   0   0   0   4   0   0   2   0   0   0   0   0   0   0   1   0   0   0   0  0.91 0.43
+  327 L   -3 -4 -5 -5 -3 -4 -4 -5 -4  1  5 -4  1  1 -4 -4 -2 -3 -2  0    0   0   0   0   0   0   0   0   0   4  91   0   0   6   0   0   0   0   0   0  0.94 0.43
+  328 P   -2 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -4 -5  8 -2 -2 -5 -4 -4    0   0   0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0   0   0  2.49 0.42
+  329 V   -2 -4 -4 -4 -2 -3 -4 -4 -4  5  0 -3  1  0 -4  0 -1 -4 -2  3    0   0   0   0   0   0   0   0   0  58   0   0   4   3   0   8   0   0   0  27  0.70 0.43
+  330 I   -1 -4 -4 -4 -2 -3 -4 -5  2  6  0 -4  0 -1 -4 -3 -2 -4 -2  2    5   0   0   0   0   0   0   0   8  77   0   0   1   0   0   0   0   0   0   9  0.81 0.43
+  331 E   -2  1 -1  0 -5  1  6 -3 -1 -5 -4  0 -3 -5 -2 -1 -2 -4 -3 -4    0   9   0   0   0   2  88   0   0   0   0   1   0   0   0   0   0   0   0   0  1.16 0.42
+  332 T   -1 -2  2 -2 -2 -2 -2 -3 -3 -1 -2 -2 -2 -3 -2  0  6 -4 -3 -1    0   0  10   0   0   0   0   0   0   2   0   0   0   0   0   0  88   0   0   0  0.96 0.41
+  333 Q   -2  0 -1 -1 -4  7  1 -3 -1 -3  0  0 -1 -4 -3 -1 -2 -3 -3 -3    0   0   0   2   0  75   6   0   0   0  14   3   1   0   0   0   0   0   0   0  0.92 0.42
+  334 A    5 -3 -2 -2 -2 -1 -1  1 -3 -2 -3 -2 -2 -3 -2  1 -1 -4 -3  1   69   0   1   1   0   1   4   9   0   0   0   0   0   0   0   6   0   0   0   9  0.61 0.42
+  335 G   -1 -3  3  0 -4 -1 -3  6 -3 -5 -5 -2 -4 -4 -3 -1 -2 -4 -4 -4    0   0  15   4   0   2   0  77   0   0   0   0   0   0   0   1   0   0   0   0  1.18 0.43
+  336 D   -3 -2  2  7 -5 -1  1 -2 -2 -4 -5  1 -4 -5 -3 -1 -2 -5 -4 -4    0   0   7  82   0   0   2   0   0   0   0   9   0   0   0   0   0   0   0   0  1.26 0.42
+  337 V   -1 -4 -4 -5 -2 -4 -4 -5 -4  3  1 -4  0 -2 -4 -3 -1 -4 -2  6    0   0   0   0   0   0   0   0   0   8   8   0   0   0   0   0   0   0   0  84  0.87 0.42
+  338 S    0 -2 -1 -2 -1 -1 -1 -2 -2 -3 -4  1 -3 -4 -2  5  1 -4 -3 -3    3   0   0   0   0   0   0   0   0   0   0   9   0   0   0  81   7   0   0   0  0.80 0.42
+  339 A    6 -3 -3 -3 -2 -2 -2  0 -3 -3 -3 -2 -2 -3 -2  0 -1 -4 -3 -1   97   0   0   0   0   0   0   2   0   0   0   0   0   0   0   2   0   0   0   0  0.94 0.42
+  340 Y   -3 -3 -4 -4 -4 -3 -3 -4  0 -2 -2 -3 -2  5 -4 -3 -3  1  8 -2    0   0   0   0   0   0   0   0   0   0   0   0   0  21   0   0   0   0  79   0  1.47 0.41
+  341 I   -2 -4 -4 -4 -2 -4 -4 -5 -5  6  0 -4  0 -1 -4 -3 -1 -4 -2  3    0   0   0   0   0   0   0   0   0  88   0   0   0   0   0   0   1   0   0  11  0.97 0.41
+  342 P   -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -4 -5  8 -1 -2 -5 -4 -3    2   0   0   0   0   0   0   0   0   0   0   0   0   0  93   3   0   0   0   1  2.23 0.41
+  343 T   -1 -2 -1 -2 -2 -2 -2 -3 -3 -2 -2 -2 -2 -3 -2  0  7 -4 -3 -1    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0 100   0   0   0  1.10 0.40
+  344 N   -3 -1  8  0 -4 -1 -1 -2  0 -4 -5 -1 -3 -4 -3  0 -1 -5 -3 -4    0   0  99   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0  1.39 0.39
+  345 V   -1 -4 -4 -4 -2 -3 -4 -4 -4  2  1 -3  0 -2 -4 -3 -1 -4 -2  6    1   0   0   0   0   0   0   0   0   2   8   0   1   0   0   0   0   0   0  89  0.85 0.39
+  346 I   -2 -4 -5 -4 -2 -4 -4 -5 -5  7  1 -4  0 -1 -4 -4 -2 -4 -2  2    0   0   0   0   0   0   0   0   0  98   2   0   0   0   0   0   0   0   0   0  1.03 0.39
+  347 S    0 -2  0 -1 -2  0 -1 -1 -2 -4 -4 -1 -3 -4  0  6  0 -4 -3 -3    0   0   0   0   0   2   0   0   0   0   0   0   0   0   4  94   0   0   0   0  0.90 0.39
+  348 I   -2 -4 -4 -4 -2 -4 -4 -5 -4  6  1 -4  0 -1 -4 -3 -1 -4 -2  2    0   0   1   0   0   0   0   0   0  96   2   0   0   0   0   0   1   0   0   0  0.99 0.39
+  349 T   -1 -2 -1 -2 -2 -2 -2 -3 -3 -2 -1 -2 -2 -3 -2  0  6 -4 -3 -1    2   0   0   0   0   0   0   0   0   0   4   0   0   0   0   0  93   0   0   0  0.96 0.38
+  350 D   -3 -3  0  7 -5 -1  0 -1 -2 -4 -4 -2 -1 -4 -3 -1 -2 -5 -4 -4    0   0   0  93   0   0   0   3   0   0   0   0   4   0   0   0   0   0   0   0  1.36 0.38
+  351 G    0 -3 -1 -2 -4 -3 -3  7 -3 -5 -5 -3 -4 -4 -3 -1 -3 -4 -4 -4    4   0   0   0   0   0   1  95   0   0   0   0   0   0   0   0   0   0   0   0  1.44 0.38
+  352 Q   -2  0 -1 -1 -4  7  1 -3  3 -4 -3  1 -2 -4 -2 -1 -2 -3 -2 -3    0   0   0   0   0  85   0   0   8   0   0   4   0   0   0   0   0   0   0   0  1.13 0.40
+  353 I   -2  0 -4 -4 -1 -3 -4 -5 -4  6  1 -3  0 -1 -4 -3 -1 -3  0  1    0   8   0   0   1   0   0   0   0  79   5   0   0   0   0   0   1   0   4   1  0.74 0.40
+  354 C   -3 -3 -4 -4  3 -4 -4 -4 -2 -1 -1 -4 -1  7 -4 -3 -3  0  5 -1    0   2   0   0   7   0   0   0   0   0   0   0   0  67   0   0   0   0  19   4  1.09 0.40
+  355 L   -2 -3 -4 -4 -2 -3 -4 -4 -4  0  5 -3  1  2 -2 -2 -1 -2 -2  0    0   0   0   0   0   0   0   0   0   0  81   0   2   8   1   3   3   0   0   1  0.73 0.40
+  356 E   -1 -1  1  1 -4  1  5 -2 -1 -4 -4  0 -2 -4 -2  2 -1 -4 -3 -3    0   0   8   6   0   5  58   1   0   0   1   4   0   0   0  15   1   0   0   1  0.72 0.40
+  357 T    1  1 -1 -2 -2 -1 -1 -2 -2 -2 -3 -1 -2 -3  0  3  5 -4 -3 -2   11   8   0   1   0   0   1   1   0   0   0   2   0   0   4  25  45   0   0   0  0.52 0.42
+  358 E    0 -2  2  4 -4  0  4 -1  0 -4 -4 -1 -3 -4 -2  0 -1 -4 -3 -3   11   0  12  34   0   1  31   2   1   0   0   1   0   0   0   5   1   0   0   0  0.65 0.42
+  359 L   -2 -3 -4 -5 -2 -3 -4 -5 -4  1  5 -4  1 -1 -4 -3 -2 -3 -2  1    0   0   0   0   0   0   0   0   0   1  87   0   1   0   0   0   0   0   1   8  0.87 0.43
+  360 F   -2 -4 -4 -5 -3 -4 -4 -4 -3 -1  1 -4 -1  8 -4 -3 -3  0  2 -2    2   0   0   0   0   0   0   0   0   0  13   0   0  82   0   0   0   0   1   0  1.23 0.43
+  361 Y   -1 -1  5 -1 -3 -2 -1 -2  0 -3 -2 -2 -2  1 -3  1 -1 -2  4 -3    6   3  45   0   0   0   2   0   1   0   4   0   0   6   0   9   2   0  22   0  0.59 0.43
+  362 R    2  2 -1 -2 -3  3 -1 -1 -2 -3 -3  1 -2 -4 -2  3 -1 -4 -3 -3   20  14   0   0   0  14   0   3   0   0   2   9   0   0   0  35   1   0   0   0  0.43 0.43
+  363 G   -1 -3  1 -2 -4 -2 -3  6 -3 -5 -5 -2 -4 -4 -3 -1 -3 -4 -4 -4    0   1   6   0   0   1   0  89   0   0   0   0   0   0   0   0   0   0   0   1  1.36 0.44
+  364 I   -2 -3 -3 -3 -3  2 -2 -4 -3  5  1 -3  0 -1 -3 -3 -1 -3 -2  3    0   0   1   0   0  16   0   0   0  51   5   0   0   2   0   0   1   0   0  22  0.58 0.44
+  365 R   -2  7 -2 -3 -4  0 -1 -3 -1 -3 -2  2 -2 -4 -3 -2 -2 -4 -3 -3    0  86   0   0   0   0   0   0   0   1   3   7   0   0   0   0   0   0   0   0  1.21 0.44
+  366 P   -2 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -4 -5  8 -2 -2 -5 -4 -3    0   0   0   0   0   0   0   0   0   0   0   0   0   0  96   0   0   0   0   0  2.34 0.43
+  367 A    6 -2 -3 -3 -2 -2 -2 -1 -3 -2 -3 -2 -2 -3 -2  0 -1 -4 -3 -1   95   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0  0.93 0.43
+  368 I   -2 -4 -4 -4 -2 -3 -4 -4 -4  5  0 -3  1 -1 -4 -3 -2 -4 -2  4    0   0   0   0   0   0   0   0   0  56   2   0   3   0   0   0   0   0   0  36  0.79 0.43
+  369 N   -3 -2  7  3 -4 -1 -1 -2  0 -5 -5 -1 -4 -4 -3  0 -1 -5 -4 -4    0   0  77  20   0   0   0   0   1   0   0   0   0   0   0   1   0   0   0   0  1.23 0.45
+  370 V    1 -4 -4 -4 -2 -3 -3 -4 -4  1 -1 -3 -1 -3  3 -2 -1 -4 -3  5   11   0   0   0   0   0   0   0   0   4   0   0   0   0  18   1   1   0   0  65  0.69 0.44
+  371 G   -1 -4 -2 -3 -4 -3 -3  7 -3 -5 -5 -3 -4 -4 -3 -1 -3 -4 -4 -4    1   0   0   0   0   0   0  98   0   0   0   0   0   0   0   1   0   0   0   0  1.59 0.44
+  372 L   -2 -3 -4 -4 -3 -3 -4 -5 -4  4  4 -2  0 -1 -4 -2 -2 -3 -2  1    0   1   0   0   0   1   0   0   0  32  55   2   0   1   0   2   1   0   0   5  0.67 0.45
+  373 S    0 -2 -1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -1  6  0 -4 -3 -3    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1  99   0   0   0   0  1.02 0.44
+  374 V   -1 -4 -4 -4 -2 -3 -4 -4 -4  2  0 -4  0 -2 -4 -3 -1 -4 -2  6    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  98  0.99 0.44
+  375 S    0 -2 -1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2  6  1 -4 -3 -3    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  96   4   0   0   0  1.00 0.44
+  376 R   -3  7 -2 -3 -5  0 -1 -4 -1 -4 -3  1 -3 -4 -3 -2 -2 -4 -3 -4    0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.46 0.43
+  377 V   -1 -4 -4 -5 -2 -4 -4 -5 -4  2  0 -4  0 -2 -4 -3 -1 -4 -2  6    0   0   0   0   0   0   0   0   0   4   0   0   0   0   0   0   0   0   0  96  0.99 0.43
+  378 G   -1 -4 -2 -3 -4 -3 -3  7 -3 -5 -5 -3 -4 -4 -3 -1 -3 -4 -4 -4    0   0   0   0   0   0   0  98   0   0   0   0   0   0   0   2   0   0   0   0  1.58 0.43
+  379 S    0 -3 -1 -2 -3 -2 -2  5 -3 -4 -4 -2 -3 -4 -3  4 -1 -4 -4 -4    0   0   0   0   0   0   0  49   0   0   0   0   0   0   0  51   0   0   0   0  0.91 0.42
+  380 A    4 -2  0  0 -2 -1 -1 -1 -2 -3 -3  0 -2 -4 -2  3 -1 -4 -3 -2   57   0   3   4   0   0   0   0   0   0   0   7   0   0   0  28   0   0   0   0  0.60 0.42
+  381 A    6 -3 -3 -3 -2 -2 -2 -1 -3 -3 -3 -2 -2 -3 -2  0 -1 -4 -3 -1   99   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0  0.96 0.42
+  382 Q   -2  0 -1 -1 -4  8  1 -3 -1 -4 -3  0 -2 -5 -2 -1 -2 -3 -3 -3    0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.36 0.42
+  383 L   -2 -2 -2 -3 -1 -2 -3 -4  1  4  1 -3  0 -1 -1 -1  3  1 -1  1    1   2   1   0   1   1   0   0   4  33  16   0   1   1   3   2  29   2   2   4  0.36 0.43
+  384 K   -1  1 -1  0 -4  0  0 -2 -2 -4 -4  6 -3 -4  1 -1 -1 -4 -2 -3    4   1   1   3   0   1   0   2   0   0   0  77   0   0   7   1   1   0   1   1  0.83 0.45
+  385 A    5 -3 -3 -3  0 -2 -2  0 -3  1 -2 -2 -2 -3 -2  1 -1 -4 -3  0   72   0   0   0   1   1   0   3   0  12   1   0   0   0   0   5   0   0   0   4  0.62 0.45
+  386 M   -2 -3 -4 -4 -3 -2 -4 -4 -3  2  1 -3  8 -1 -4 -3 -1 -3  0  0    0   0   0   0   0   0   0   0   0  15   6   0  72   0   0   0   1   0   3   3  0.99 0.45
+  387 K   -2  3 -1 -2 -4  0  0 -3 -2 -4 -4  6 -3 -4 -2 -1 -2 -4 -3 -4    0  12   1   0   0   0   0   0   0   0   0  87   0   0   0   1   0   0   0   0  1.09 0.45
+  388 Q   -1  0 -1 -1 -4  5  1 -2 -2 -4 -2  5 -1 -4 -2 -1 -2 -4 -3 -3    3   0   1   0   0  33   5   2   0   0   3  48   1   0   0   2   0   0   0   1  0.71 0.46
+  389 V   -2 -4 -4 -5 -2 -3 -4 -5 -4  2  2 -4  0 -2 -4 -3 -1 -4 -2  5    0   0   0   0   0   1   0   0   0   5  18   0   0   0   0   0   1   0   0  75  0.81 0.46
+  390 C    4 -1 -2 -3  4 -2 -2  0 -3 -3 -3 -2 -2 -4 -2  3  0 -4 -3 -2   48   3   0   0  10   0   0   6   0   0   0   0   0   0   0  30   2   0   0   0  0.60 0.46
+  391 G    0 -3 -2 -3 -4 -3 -3  7 -3 -5 -5 -1 -4 -4 -3 -1 -3 -4 -4 -4    4   0   0   0   0   0   0  89   0   0   0   3   0   0   0   3   0   0   0   0  1.39 0.46
+  392 S   -1  1  0 -1 -3 -1 -2  1 -2 -4 -4  1 -2 -4  2  3  2 -4 -3 -3    1   8   4   2   0   1   0  13   0   0   0  10   0   0   9  35  17   0   0   0  0.43 0.46
+  393 S   -2 -3 -4 -5 -3 -3 -4 -5 -4  3  4 -4  3 -1 -4 -1 -2 -3  0  1    0   0   0   0   0   0   0   0   0  16  58   0   9   0   0   5   0   0   2   9  0.65 0.46
+  394 K   -2  5 -1 -2 -4  0 -1 -3 -2 -4 -4  6 -3 -4 -3 -2 -1 -4 -3 -4    0  34   0   0   0   0   0   0   0   0   0  63   0   0   0   0   3   0   0   0  1.01 0.47
+  395 L   -2 -3 -4 -5 -3 -3 -4 -2 -4  1  5 -4  1 -1 -1 -3  0 -3 -2  0    1   0   0   0   0   0   0   3   0   6  78   0   1   0   3   0   7   0   1   1  0.72 0.47
+  396 E    0 -2 -1  3 -4  0  5 -3 -2 -1 -1 -1 -2 -3 -3 -1 -1 -4 -3 -3   11   0   1  20   0   2  51   0   0   4   5   0   1   1   0   2   2   0   0   0  0.63 0.47
+  397 L   -3 -3 -4 -5 -3 -2 -4 -5 -2  1  5 -4  1  0 -4 -3 -2 -2  3 -1    0   0   0   0   0   2   0   0   1   2  79   0   1   0   0   1   0   0  15   0  0.82 0.48
+  398 A    6 -3 -2 -3 -2 -2 -2 -1 -3 -3 -3 -2 -2 -4 -2  2 -1 -4 -3 -2   84   0   0   0   0   0   0   0   0   0   0   0   0   0   0  15   1   0   0   0  0.89 0.47
+  399 Q   -1  0 -1 -2 -4  7  1 -3 -1 -4 -3  0 -1 -4 -3  0 -2 -3 -3 -3    3   0   1   0   0  89   2   0   0   0   1   0   1   0   0   4   0   0   0   0  1.23 0.47
+  400 Y   -3 -3 -4 -5 -4 -3 -4 -4  0 -2 -2 -3 -2  5 -4 -3 -3  1  8 -2    0   0   0   0   0   0   0   0   0   0   0   0   0  24   0   1   0   0  75   0  1.50 0.47
+  401 R   -1  6 -1 -1 -4  0  0 -3 -1 -3 -2  1 -2 -4 -3 -1 -2 -2 -3 -2    5  73   1   3   0   1   5   0   0   0   4   1   0   0   0   1   0   0   0   3  0.92 0.48
+  402 E   -2 -1 -1  1 -5  1  7 -3 -1 -5 -4  0 -3 -5 -2 -1 -2 -4 -3 -4    1   0   0   2   0   0  94   0   0   0   0   1   0   0   0   1   1   0   0   0  1.29 0.48
+  403 V   -2 -3 -4 -5 -3 -3 -4 -5 -4  1  5 -3  4 -1 -4 -3 -2 -3 -1  2    0   0   0   0   0   0   0   0   0   1  62   0  16   0   0   0   0   0   2  17  0.76 0.49
+  404 A    4 -2 -2 -1 -3  2  4 -2 -2 -3 -2  0 -2 -4 -2  0 -2 -4 -3 -2   47   0   0   2   0  10  30   0   0   1   2   5   0   0   0   2   0   0   0   1  0.56 0.49
+  405 A    5 -2 -2 -2 -2 -2 -1 -1 -3 -3 -3 -2  1 -4 -2  2  0 -4 -3 -1   71   2   0   0   0   0   3   0   0   0   0   0   4   0   0  16   3   0   0   1  0.70 0.49
+  406 F   -3 -4 -4 -3 -4 -4 -4 -4 -3 -1  0 -4 -1  8 -5 -2 -3  0  2 -2    0   0   0   2   0   0   0   0   0   0   4   0   0  90   0   3   0   0   0   1  1.41 0.49
+  407 A    5 -3 -2 -2 -2 -2 -1 -1 -3 -3 -2 -2 -2 -4 -2  3  0 -4 -3 -2   60   0   0   0   0   0   2   0   0   0   4   0   0   0   0  31   3   0   0   0  0.69 0.49
+  408 Q   -2  1 -1  0 -4  7  1 -3 -1 -4 -3  1 -1 -5 -3 -1 -2 -4 -3 -3    2   6   0   3   0  79   1   0   0   0   0   7   1   0   0   0   1   0   0   2  1.10 0.50
+  409 F   -3 -4 -4 -5 -4 -4 -5 -3 -3 -1  0 -4 -1  8 -5 -2 -3  0  2 -2    0   0   0   0   0   0   0   2   0   0   3   0   0  92   0   3   0   0   0   0  1.47 0.49
+  410 G    3 -3 -2 -2 -3 -3 -3  6 -3 -4 -4 -3 -3 -4 -3  0 -2 -4 -4 -3   26   0   0   1   0   0   0  67   0   0   0   0   0   0   0   6   0   0   0   0  1.07 0.49
+  411 S    0 -2  1 -2 -2 -1 -2  0 -2 -4 -4 -2 -3 -4 -2  6  1 -4 -3 -3    4   0   4   0   0   1   0   5   0   0   0   0   0   0   0  83   2   0   0   0  0.88 0.49
+  412 D   -2 -2  0  7 -5  0  1 -3 -2 -4 -3 -2 -4 -2 -3 -1 -2 -5 -4 -4    3   1   1  82   0   3   4   0   0   0   2   1   0   2   0   1   0   0   0   0  1.25 0.50
+  413 L   -2 -4 -5 -5 -3 -3 -4 -3 -4  1  5 -4  1 -1 -2 -4 -1 -3 -2  0    1   0   0   0   0   0   0   2   0   3  88   0   1   0   2   0   2   0   0   2  0.90 0.50
+  414 D   -3 -3  0  7 -5  0  1 -1 -2 -5 -5 -2 -4 -5 -3 -2 -2 -6 -4 -5    0   0   0  93   0   2   1   3   0   0   0   0   0   0   0   0   0   0   0   0  1.53 0.50
+  415 A    4 -1 -2  2 -3  1  2 -2 -2 -3 -3  2 -3 -4  0  0 -1 -4 -3 -2   42   0   0  13   0   8  15   0   0   0   0  13   0   0   4   1   2   0   0   1  0.46 0.50
+  416 A    4 -2 -1  1 -2 -2  0 -1 -1 -1 -3 -2 -2 -3 -2  2  0 -4 -3  0   47   1   3   7   0   0   7   3   1   5   0   0   0   0   0  16   3   0   0   5  0.40 0.51
+  417 T   -1 -3 -1 -2 -2 -2 -2 -3 -3 -2 -2 -2 -2 -3 -1  2  6 -4 -3  0    0   0   0   0   0   0   0   0   0   0   2   0   0   0   2  10  81   0   0   4  0.96 0.51
+  418 Q   -1  3 -2 -2 -4  5  0 -3 -1 -3 -2  4  0 -4 -3 -2 -2 -4 -3 -3    3  17   0   0   0  43   0   0   0   0   5  27   3   0   0   0   1   0   0   0  0.73 0.50
+  419 A    3  1  1  0 -3  2  0 -1 -2 -3 -3  3 -1 -4 -2  0 -2 -4 -1 -3   34   6   7   6   0  11   3   3   0   1   1  22   1   0   0   3   0   0   2   0  0.37 0.51
+  420 L   -2  2 -2 -3 -3  5 -1 -4 -2  0  2  1  1 -3 -1 -2 -1 -3 -1 -1    1  12   0   0   0  34   1   0   0   4  26   6   3   0   3   1   3   0   2   3  0.42 0.51
+  421 L   -3 -4 -5 -5 -3 -4 -4 -5 -4  2  5 -4  2 -1 -3 -2 -2 -2 -3  1    0   0   0   0   0   0   0   0   0   8  80   0   3   0   1   4   0   0   0   5  0.88 0.54
+  422 N    2 -2  4  1 -3  0  3 -2 -1 -1 -3 -1 -3 -4 -3  0  0 -5 -3 -1   24   0  30   7   0   2  20   0   0   5   0   1   0   0   0   4   4   0   0   3  0.46 0.54
+  423 R   -3  6  0 -1 -5  1 -1 -4  3 -4 -3  2 -3 -4 -3 -2 -2 -4 -2 -4    0  70   3   3   0   6   0   0   9   1   1   7   0   0   0   0   0   0   0   0  1.06 0.54
+  424 G   -1 -4 -2 -3 -4 -3 -4  7 -4 -5 -5 -3 -4 -5 -4 -2 -3 -4 -5 -5    1   0   0   0   0   0   0  99   0   0   0   0   0   0   0   0   0   0   0   0  1.69 0.53
+  425 A    2  2 -1 -2 -3  3  3 -3 -1 -3 -3  2 -2 -4 -3  1 -2 -4 -2 -1   25  14   2   0   0  14  19   0   1   0   1  11   0   0   0   8   0   0   1   4  0.40 0.54
+  426 R   -2  7 -2 -3 -3  0 -1 -4 -1 -3 -1  3  0 -4 -3 -2 -2 -4 -3 -3    1  75   0   0   0   0   0   0   1   1   5  13   2   0   0   0   0   0   0   0  1.11 0.54
+  427 L   -2 -4 -2 -5 -3 -4 -4 -5 -4  3  4 -4  2  0 -4 -3  0 -3 -2  2    1   0   2   0   0   0   0   0   0  21  52   0   6   3   0   0   5   0   0  10  0.65 0.55
+  428 T   -2  2 -1 -3 -3 -2 -3 -2 -3  0 -1 -2  2 -3 -3 -1  5 -4 -2  3    1  13   2   0   0   0   0   2   0   3   1   0   6   0   0   1  44   0   1  26  0.52 0.55
+  429 E    0 -2 -2  0 -5  1  6 -3 -2 -4 -4 -1 -3 -5 -3 -1 -2 -4 -3 -3    9   0   0   0   0   2  87   0   0   1   0   1   0   0   0   0   0   0   0   2  1.18 0.55
+  430 V   -3 -4 -5 -5 -3 -4 -4 -5 -4  3  4 -4  2 -1 -4 -3 -2 -3 -1  3    0   0   0   0   0   0   0   0   0  20  52   0   5   0   0   0   0   0   2  20  0.75 0.54
+  431 P   -3 -4 -5 -5 -3 -4 -4 -5 -4  1  5 -4  3  2  0 -4 -2 -3 -2 -1    0   0   0   0   0   0   0   0   0   5  72   0   7   9   6   0   1   0   0   0  0.82 0.55
+  432 K   -1  1  0 -2 -4  0 -1 -3 -2 -3 -4  7 -3 -5 -2 -1 -1 -5 -3 -4    3   0   3   0   0   2   0   0   0   1   0  89   0   0   0   0   1   0   0   0  1.11 0.55
+  433 Q   -2  0 -1 -2 -4  8  1 -3 -1 -4 -4  0 -2 -5 -3 -1 -2 -3 -3 -4    0   0   0   0   0 100   0   0   0   0   0   0   0   0   0   0   0   0   0   0  1.54 0.55
+  434 P   -1 -1  1  2 -4 -1 -1  1 -3 -4 -4  2 -3 -2  5  1 -1 -5 -4 -3    4   1   7  10   0   1   1   9   0   0   0  20   0   2  32  11   2   0   0   1  0.64 0.55
+  435 Q   -2  1 -1 -2  0  7  0 -3 -1 -1 -1  1 -2 -4 -1 -2 -2 -3 -3 -2    1   5   2   0   2  66   2   0   1   4   6   5   0   0   3   0   0   0   0   2  0.83 0.54
+  436 Y   -2 -2  2 -3 -4 -2 -3 -2  3 -3 -1 -3 -2  3 -4 -1 -2  0  7 -3    3   1  13   0   0   1   0   3   6   0   3   0   0  11   0   4   2   0  53   0  0.89 0.54
+  437 A    1 -1  0  0 -3  1  0 -2  1 -3 -3  0 -1 -4 -1  4  1 -4 -3 -1   12   1   4   4   0   7   6   0   3   0   0   6   1   0   2  42   7   0   0   4  0.40 0.56
+  438 P   -2 -4 -3 -3 -3 -3 -3 -4 -4 -4 -4 -2 -4 -5  8 -2 -1 -5 -4 -3    0   0   0   0   0   0   0   0   0   0   0   0   0   0  96   0   3   0   0   1  2.43 0.56
+  439 L   -3 -3 -4 -5 -2 -2 -4 -5 -3  1  4 -3  6  0 -3 -2 -2 -3  0  1    0   1   0   0   0   1   0   0   0   5  42   0  32   3   1   3   0   0   4   8  0.72 0.57
+  440 P    0  0  0  1 -3 -1 -1 -2 -3 -3 -4 -1 -3 -2  5  3  2 -4 -2 -3    6   4   5   8   0   2   1   0   0   1   0   2   0   1  27  25  14   0   1   1  0.57 0.57
+  441 I   -2 -4 -3 -4 -3 -1 -4 -5 -4  3  1 -4  0  0 -2 -3  0 -2 -2  5    0   0   1   0   0   3   0   0   0  21   9   0   2   3   2   0   5   0   0  53  0.65 0.57
+  442 E    2 -2 -2 -1 -4 -1  5 -2 -2 -3 -3 -1  1 -2  1  0  0  0  0 -1   20   0   1   1   0   0  47   1   0   0   0   0   5   1   6   5   4   1   4   4  0.52 0.58
+  443 K   -2 -1  1  2 -2  2  4 -3  0 -4 -3  3 -3 -4 -3 -1 -1 -4 -2 -2    0   0   5  11   1   8  39   0   2   0   2  23   0   0   0   3   2   0   1   2  0.62 0.59
+  444 Q   -1 -1 -2 -2 -4  7  2 -3 -1 -3 -1  0  2 -4 -3 -2 -2 -4 -3 -3    3   0   0   0   0  78   8   0   0   1   5   0   6   0   0   0   0   0   0   0  1.12 0.59
+  445 I    1 -4 -4 -4 -1 -4 -4 -2 -4  4  0 -4 -1 -2 -4 -2 -1 -4 -3  5   12   0   0   0   1   0   0   3   0  26   1   0   0   0   0   2   1   0   0  54  0.71 0.59
+  446 L    2 -4 -4 -4  1 -3 -4 -2 -4  3  2 -3  0 -2 -2 -2 -1 -4 -3  3   21   0   0   0   2   0   0   2   0  18  23   0   2   0   1   1   2   0   0  27  0.45 0.59
+  447 V   -1 -3 -3 -4  0 -3 -3 -4 -4  4  1 -3  1 -1 -4  2  0 -4 -3  3    1   0   0   0   2   0   0   0   0  26  15   0   3   2   0  24   5   0   0  21  0.44 0.59
+  448 I   -3 -4 -5 -5 -3 -4 -5 -5 -5  5  4 -4  1  0 -4 -4 -2 -2 -2  2    0   0   0   0   0   0   0   0   0  42  45   0   1   2   0   0   0   0   0   8  0.84 0.58
+  449 Y   -2 -4 -4 -5 -4 -3 -4 -3 -1 -2 -1 -4 -2  5 -5 -2 -3  3  7 -2    3   0   0   0   0   0   0   1   0   1   4   0   0  29   0   3   0   3  56   1  1.23 0.59
+  450 A    5 -3 -3 -3  3 -2 -3 -1 -3 -1 -1 -2 -1 -3 -3  0  1 -4 -3 -1   71   0   0   0   6   0   0   1   0   2   6   0   1   0   0   3   9   0   0   2  0.73 0.58
+  451 A    3 -4 -3 -4  2 -3 -4  4 -4  0  1 -3 -1  0 -3 -2 -2 -4 -3  1   26   0   0   0   4   0   0  34   0   5  13   0   1   4   0   0   0   0   0  12  0.48 0.59
+  452 V    0 -3  1 -1 -3  0 -1 -1 -3  0 -1 -1 -1 -3 -3  0  3 -4 -3  3    8   0   8   3   0   3   4   5   0   5   3   3   0   0   0   4  22   0   0  30  0.28 0.59
+  453 N   -2  2  5 -1 -4  1  2 -2  2 -5 -4  3 -3 -2 -3 -1 -1 -4 -2 -4    2  10  39   0   0   5  14   1   5   0   0  18   0   2   0   3   2   0   1   0  0.66 0.59
+  454 G   -1 -2  2  0 -4 -3 -2  6 -2 -5 -5  0 -4 -5 -4 -1 -3 -4 -4 -5    1   1   9   4   0   0   1  76   0   0   1   5   0   0   0   2   0   0   0   0  1.20 0.60
+  455 F   -2 -3 -4 -5 -4 -3 -4 -5  3 -2  0 -3 -2  5 -5 -3 -3  0  7 -1    3   0   0   0   0   0   0   0   6   0  11   1   0  28   0   0   0   0  46   4  1.04 0.60
+  456 C   -3 -4 -5 -5  2 -4 -5 -5 -4  2  5 -4  2  2 -5 -4 -3 -3 -2  0    0   0   0   0   4   0   0   0   0  11  71   0   3   7   0   0   0   0   1   3  0.87 0.61
+  457 D   -2 -1  0  6 -5 -1  0 -3 -2 -4 -3  1 -1 -5 -2 -1  0 -5 -4 -4    2   3   2  67   0   1   2   0   1   0   2  11   2   0   1   1   6   0   0   0  0.99 0.61
+  458 R   -1  0  2  5 -4  0  1 -2 -2 -3 -4  2 -3 -3 -1  0 -2  0 -4 -3    4   6   9  40   0   3   5   2   0   1   1  16   0   1   2   5   1   2   0   2  0.56 0.61
+  459 M   -2 -3 -5 -5 -3 -4 -4 -5 -5  5  2 -4  2  0 -4 -4 -2 -4 -2  4    1   1   0   0   0   0   0   0   0  39  16   0   6   2   0   0   0   0   1  35  0.77 0.61
+  460 P    0 -2 -1  2 -4 -1  3 -3 -3 -4 -4  0 -4 -5  6 -1 -2 -5 -4 -3    8   0   2  10   0   1  24   0   0   0   0   6   0   0  45   3   0   0   0   0  1.05 0.60
+  461 L   -2 -3 -3 -4 -3 -2 -3 -5 -4  2  2 -3 -1 -2  1 -2 -1 -4 -3  5    2   1   1   0   0   2   1   0   0  11  25   1   0   0   7   1   2   0   0  47  0.58 0.63
+  462 D    1 -1  2  3 -4  0  3 -1 -2 -4 -4  2 -3 -5 -2  1 -1 -5 -4 -3   11   1  11  20   0   3  26   4   0   0   0  12   0   0   1  11   1   0   0   1  0.50 0.63
+  463 R   -2  1  1  4 -1  2  1 -3 -1 -3 -3  3 -3 -4 -3 -1 -2 -4  0 -3    1   8   7  26   2  12   4   0   1   1   3  26   0   0   0   3   0   0   5   1  0.50 0.63
+  464 I   -1 -4 -5 -5 -3 -4 -4 -5 -5  5  1 -4  1 -2 -4 -3 -2 -4 -3  4    4   0   0   0   0   0   0   0   0  48   7   0   2   0   0   0   0   0   0  40  0.91 0.64
+  465 S    0  3 -2 -3 -4  2 -1  0 -3 -1 -1  2 -2 -4 -1  1  1 -4 -3 -1    8  20   1   0   0  10   3   7   0   3   7  12   0   0   3  11   7   0   0   6  0.24 0.64
+  466 Q    0  2  0  3 -4  1  2 -3 -2 -4 -3  2 -3 -2 -1  1 -1 -4 -2 -3   10  14   3  16   0   6  16   0   0   1   2  14   0   1   2  10   3   0   1   0  0.37 0.64
+  467 Y   -2 -4 -4 -5 -2 -4 -4 -5 -2 -1 -1 -4 -2  7 -5 -4 -4  2  5 -2    3   0   0   0   1   0   1   0   0   3   3   0   0  71   0   0   0   2  17   0  1.36 0.63
+  468 E   -2  0 -2 -1 -4  1  6 -3 -2 -3 -1  2 -2 -2 -3 -2 -2 -4 -3 -3    1   3   0   0   0   2  71   0   0   1   7  11   0   2   0   1   0   0   0   1  0.92 0.64
+  469 K    0  1  1  0 -4  1  1  0  0 -3 -3  3 -2 -3 -3  1  1 -4  0 -2   10   7   6   3   0   5  11   5   2   1   1  25   0   0   0  10   8   0   3   3  0.28 0.64
+  470 A    2 -1 -1  2 -4  1  3  1 -3 -3 -1  1  0 -3 -3  0 -2 -4 -2 -2   17   3   2  12   0   5  19  12   0   1   8  10   3   1   0   7   1   0   1   1  0.26 0.65
+  471 I   -3 -4 -5 -5 -3 -4 -5 -5 -4  3  5 -4  1  3 -5 -4 -3  0  1  0    0   0   0   0   0   0   0   0   0  16  61   0   2  12   0   0   0   1   4   3  0.83 0.65
+  472 P   -2  2 -2 -4 -2 -2 -4 -5  1  2  3 -2  0  3 -3 -3 -2 -2  1  0    3  12   1   0   1   2   0   0   4  14  40   0   1  12   1   0   1   0   4   4  0.43 0.64
+  473 N    1 -1  2  2 -4  1  3 -2 -1 -3 -2  0 -3 -4 -2  1  0 -4 -3 -3   14   2  10  12   0   6  24   1   1   1   5   6   0   0   1  11   4   0   0   0  0.34 0.64
+  474 S   -1 -1 -1 -3 -3 -1  0 -3  4 -3 -2  1 -2  4 -4 -1 -2  2  6 -3    4   2   2   1   0   2   5   1  10   0   3  10   0  17   0   5   2   2  33   1  0.62 0.63
+  475 V    0 -4 -4 -5  2 -4 -4 -4 -4  2  4 -4  2  3 -4 -3 -2 -3 -2  2    8   0   0   0   5   0   0   0   0  12  40   0   6  12   0   1   0   0   0  16  0.56 0.64
+  476 K   -2  3  2  3 -4  1  0 -3  3 -3 -4  4 -3 -4 -2 -1 -1 -4 -2 -3    1  16  10  21   0   4   4   1   8   2   0  27   0   0   1   2   3   0   1   2  0.52 0.63
+  477 P    2  0  1 -1 -3  1  0 -1  0 -3 -3  0 -3 -4  1  3  2 -4 -2 -2   15   4   8   3   0   5   6   3   3   0   2   5   0   0   6  26  13   0   1   1  0.31 0.64
+  478 E   -1 -1  4  2 -4  2  2 -2  1 -3 -4  2 -3 -4 -3  2 -1 -5 -2 -3    2   2  23   9   0   8  17   1   3   1   0  17   0   0   0  14   2   0   1   1  0.51 0.65
+  479 L    0  0  0 -1 -3  0 -1 -2  6 -2  0  3 -2 -1 -4 -2 -3 -3  3 -2    8   3   3   4   0   2   2   2  29   2   9  20   0   2   0   0   0   0  13   1  0.53 0.65
+  480 L    1 -2 -1 -2 -3  0 -2  1 -3 -3  0  1 -3  0  5  1 -1 -4 -3 -3   11   1   3   0   0   5   1   9   0   0  10  10   0   4  29  12   4   0   0   1  0.47 0.64
+  481 Q    0 -2 -1  4 -4  2  4 -2 -2 -3 -4  0 -3 -3 -2  0 -2 -4 -3 -3    8   0   1  29   0   8  36   2   0   2   1   5   0   1   1   5   1   0   0   0  0.66 0.63
+  482 A   -2 -4 -4 -2 -2 -3 -1 -4 -2  4  3 -3 -1  1 -4 -1 -1 -3  0  2    2   0   0   2   0   0   4   0   1  31  27   1   0   7   0   5   2   0   3  14  0.44 0.64
+  483 L   -2 -2 -4 -4 -1 -3 -2 -2 -4  2  4 -3  4  2 -3 -3 -3  0 -1  1    2   1   0   0   1   1   3   3   0  15  43   1  13   8   1   0   0   1   1   6  0.51 0.66
+  484 K    0  1  1  2 -4  1  3 -2 -2 -4 -3  3 -3 -4 -3  0  0 -4  0 -4   10   6   5  11   0   4  25   1   0   0   2  21   0   0   0   7   4   0   4   0  0.43 0.68
+  485 G    0 -1  0  1 -4  1  2  2 -1 -1 -3  1 -3 -4 -3  2  1 -3 -4 -3    8   2   4   7   0   5  13  13   1   4   2  11   0   0   0  18   8   0   0   1  0.26 0.67
+  486 G   -2 -2 -3 -2 -3  0 -3 -1 -4  6  0 -2  1 -2 -4 -2  1 -4 -3  2    2   2   1   2   0   4   1   5   0  55   3   2   3   0   0   2  10   0   0   8  0.57 0.66
+  487 L    1  3  1 -2 -3  1  0 -2 -2  2  1  2 -1 -1 -3 -1 -2 -4 -2 -1   11  15   9   0   0   7   4   3   0  13  13  11   1   3   0   4   1   0   0   2  0.18 0.67
+  488 T   -1  0  1  1 -3 -1  2 -2 -3 -3 -4  0 -3 -4 -3  2  5 -4 -3 -3    1   4   7   6   0   0  14   2   0   0   0   6   0   0   0  18  41   0   1   0  0.57 0.68
+  489 N   -2 -1  3  1 -4  0  1  3 -2 -3 -3  3 -3 -4 -2 -1  0 -5 -4 -4    2   0  15   6   0   2  11  26   0   1   3  26   0   0   1   3   4   0   0   0  0.48 0.68
+  490 E   -1 -1 -1  3 -2  1  3 -2 -2 -4 -4  4 -3 -5 -2 -1  0 -5 -3 -2    3   1   2  20   1   5  23   2   0   0   0  29   0   0   1   2   7   0   1   3  0.57 0.69
+  491 R   -1  0 -2 -3 -3 -3 -3 -2 -1  2  3  0  1  3 -4 -3 -2  2  2 -1    4   5   1   1   0   0   1   3   1  11  39   8   3  11   0   0   1   3   7   0  0.38 0.69
+  492 K   -2 -2  3  3 -4 -1  1 -2 -2 -4 -2  1 -3 -1 -3  2  3 -4 -2 -3    1   0  17  14   0   0  10   2   0   0   4   7   0   3   0  20  19   0   1   0  0.42 0.70
+  493 M    1 -2 -1  4 -4 -1  2 -2 -2 -1 -1  1 -1 -2  0 -1 -2 -3 -1 -3   12   0   1  32   0   1  17   3   0   3   8   8   2   2   4   4   0   0   3   0  0.40 0.70
+  494 E   -1 -2  1  4 -5  2  4 -2 -2 -4 -4  0  0 -5  0  0 -2 -5 -4 -3    3   0   5  25   0   7  36   2   0   1   1   4   3   0   4   8   1   0   0   1  0.64 0.71
+  495 P    1 -3  0  0 -3 -2 -1 -4  1  3  2 -2 -1 -2 -2  0  2 -4  0  0   13   0   6   7   0   0   3   0   3  18  18   2   0   1   2   5  13   0   3   6  0.19 0.71
+  496 D    0 -2 -1  2 -4  1  5 -4 -2 -3 -3  2 -3 -3 -3 -1 -1 -5 -4  0    6   0   2  12   0   4  47   0   0   0   2  12   0   1   0   1   2   0   0  10  0.63 0.70
+  497 A    3 -1  2  1 -4  1  2 -1 -2 -4 -4  3 -3 -5 -3  0  0 -5 -4 -3   25   1   9   7   0   6  12   3   0   0   0  26   0   0   0   5   4   0   0   1  0.44 0.70
+  498 F    2  0 -2 -1 -1  2  0 -2 -3 -1  0  3  2  1 -3  0  0 -4 -3 -1   20   2   0   2   1  13   4   1   0   3   9  19   5   6   0   5   4   0   0   4  0.22 0.70
+  499 L   -3 -4 -5 -5 -1 -4 -5 -5 -4  2  5 -4  1  2 -5 -4 -2 -3 -1  0    1   0   0   0   1   0   0   0   0  12  74   0   1   9   0   0   1   0   1   1  0.96 0.70
+  500 K   -1  0  2  1 -4  0  2 -3 -2 -1 -3  5 -3 -3 -3  0  0 -5 -4 -1    3   3   9   6   0   1  11   1   0   3   1  47   0   1   0   5   5   0   0   4  0.55 0.71
+  501 E    1 -2  1  2 -4  0  4 -2 -2 -4 -3  2 -3 -4 -3  0 -1 -4  0 -3   16   0   7   8   0   3  35   2   0   0   1  14   0   0   0   7   2   0   3   0  0.50 0.71
+  502 R    3 -3 -1 -4  0 -3 -2 -2 -3  2  0 -3 -1  1 -4  0 -1 -4 -1  3   35   0   3   0   2   0   1   2   0  14   8   0   1   7   0   4   1   0   1  20  0.38 0.71
+  503 A   -1 -4 -5 -5 -2 -3 -4 -4 -5  5  2 -1  1  0 -4 -3 -1 -4 -3  2    6   0   0   0   0   1   0   0   0  48  20   4   2   3   0   0   3   0   0  12  0.68 0.71
+  504 L   -1  1  1  2 -4  2  3 -3 -2 -3 -3  3 -3 -5 -3  0  1 -5 -4 -3    4   5   7  13   0   7  22   0   0   1   3  23   0   0   0   5   8   0   0   1  0.45 0.71
+  505 A    0 -1  2  2 -4  0  4 -2 -2 -4 -4  1 -1  0 -1  1  0 -5 -3 -3    7   1   9  13   0   2  32   2   0   0   0   9   1   4   3  11   5   0   0   0  0.44 0.71
+  506 L   -4 -4 -5 -5 -4 -5 -5 -5 -2 -2  1 -5 -1  8 -5 -4 -4 -1  4 -2    0   0   0   0   0   0   0   0   0   0   9   0   0  75   0   0   0   0  15   1  1.51 0.69
+  507 I   -2 -4 -4 -5 -2 -3 -4 -5 -4  3  3 -4  3 -1 -4 -3 -2 -4 -2  4    0   0   0   0   0   0   0   0   0  21  33   0  11   0   0   0   0   0   0  35  0.67 0.45
+
+                      K         Lambda
+Standard Ungapped    0.1349     0.3178
+Standard Gapped      0.0410     0.2670
+PSI Ungapped         0.1840     0.3174
+PSI Gapped           0.0563     0.2670

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ay007676.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ay007676.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ay007676.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+LOCUS       AY007676                1389 bp    DNA     linear   BCT 29-OCT-2001
+DEFINITION  Unknown marine gamma proteobacterium NOR5 16S ribosomal RNA,
+            partial sequence.
+ACCESSION   AY007676
+VERSION     AY007676.1  GI:12000362
+KEYWORDS    .
+SOURCE      unknown marine gamma proteobacterium NOR5
+  ORGANISM  unknown marine gamma proteobacterium NOR5
+            Bacteria; Proteobacteria; Gammaproteobacteria.
+REFERENCE   1  (bases 1 to 1389)
+  AUTHORS   Eilers,H., Pernthaler,J., Peplies,J., Glockner,F.O., Gerdts,G. and
+            Amann,R.
+  TITLE     Isolation of novel pelagic bacteria from the German bight and their
+            seasonal contributions to surface picoplankton
+  JOURNAL   Appl. Environ. Microbiol. 67 (11), 5134-5142 (2001)
+   PUBMED   11679337
+REFERENCE   2  (bases 1 to 1389)
+  AUTHORS   Eilers,H., Pernthaler,J., Peplies,J., Gloeckner,F.O., Gerdts,G.,
+            Schuett,C. and Amann,R.
+  TITLE     Identification and seasonal dominance of culturable marine bacteria
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 1389)
+  AUTHORS   Eilers,H., Pernthaler,J., Peplies,J., Gloeckner,F.O., Gerdts,G.,
+            Schuett,C. and Amann,R.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (29-AUG-2000) Molecular Ecology, Max-Planck-Institute,
+            Celsiusstrasse 1, Bremen 28359, Germany
+FEATURES             Location/Qualifiers
+     source          1..1389
+                     /organism="unknown marine gamma proteobacterium NOR5"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:145658"
+     rRNA            <1..>1389
+                     /product="16S ribosomal RNA"
+ORIGIN      
+        1 cgcgaaagta cttcggtatg agtagagcgg cggacgggtg agtaacgcgt aggaatctat
+       61 ccagtagtgg gggacaactc ggggaaactc gagctaatac cgcatacgtc ctaagggaga
+      121 aagcggggga tcttcggacc tcgcgctatt ggaggagcct gcgttggatt agctagttgg
+      181 tggggtaaag gcctaccaag gcgacgatcc atagctggtc tgagaggatg atcagccaca
+      241 ccgggactga gacacggccc ggactcctac gggaggcagc agtggggaat attgcgcaat
+      301 gggcgaaagc ctgacgcagc catgccgcgt gtgtgaagaa ggccttcggg ttgtaaagca
+      361 ctttcaattg ggaagaaagg ttagtagtta ataactgcta gctgtgacat tacctttaga
+      421 agaagcaccg gctaactccg tgccagcagc cgcggtaata cggaggtgcg agcgttaatc
+      481 ggaattactg ggcgtaaagc gcgcgtaggc ggtctgttaa gtcggatgtg aaagccccgg
+      541 gctcaacctg ggaattgcac ccgatactgg ccgactggag tgcgagagag ggaggtagaa
+      601 ttccacgtgt agcggtgaaa tgcgtagata tgtggaggaa taccggtggc gaaggcggcc
+      661 tcctggctcg acactgacgc tgaggtgcga aagcgtgggg agcaaacagg attagatacc
+      721 ctggtagtcc acgccgtaaa cgatgtctac tagccgttgg gagacttgat ttcttggtgg
+      781 cgaagttaac gcgataagta gaccgcctgg ggagtacggc cgcaaggtta aaactcaaat
+      841 gaattgacgg gggcccgcac aagcggtgga gcatgtggtt taattcgatg caacgcgaag
+      901 aaccttacca ggccttgaca tcctaggaat cctgtagaga tacgggagtg ccttcgggaa
+      961 tctagtgaca ggtgctgcat ggctgtcgtc agctcgtgtc gtgagatgtt gggttaagtc
+     1021 ccgtaacgag cgcaaccctt gtccttagtt gccagcgcgt aatggcggga actctaagga
+     1081 gactgccggt gacaaaccgg aggaaggtgg ggacgacgtc aagtcatcat ggcccttacg
+     1141 gcctgggcta cacacgtgct acaatggaac gcacagaggg cagcaaaccc gcgaggggga
+     1201 gcgaatccca caaaacgttt cgtagtccgg atcggagtct gcaactcgac tccgtgaagt
+     1261 cggaatcgct agtaatcgtg aatcagaatg tcacggtgaa tacgttcccg ggccttgtac
+     1321 acaccgcccg tcacaccatg ggagtgggtt gctccagaag tggttagcct aaccttcggg
+     1381 agggcgatc
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ay116458.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ay116458.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ay116458.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,48 @@
+LOCUS       AY116458                 229 bp    DNA     linear   PLN 09-APR-2003
+DEFINITION  Eucalyptus globulus subsp. bicostata isolate EgRT12 Ty1-copia
+            retrotransposon reverse transcriptase-like (RT) gene, partial
+            sequence.
+ACCESSION   AY116458
+VERSION     AY116458.1  GI:22858882
+KEYWORDS    .
+SOURCE      Eucalyptus globulus subsp. bicostata
+  ORGANISM  Eucalyptus globulus subsp. bicostata
+            Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
+            Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons;
+            rosids; Myrtales; Myrtaceae; Eucalyptus.
+REFERENCE   1  (bases 1 to 229)
+  AUTHORS   Diez,J., Beguiristain,T., Le Tacon,F., Casacuberta,J.M. and Tagu,D.
+  TITLE     Identification of Ty1- copia retrotransposons in three
+            ectomycorrhizal basidiomycetes: evolutionary relationships and use
+            as molecular markers
+  JOURNAL   Curr. Genet. 43 (1), 34-44 (2003)
+   PUBMED   12684843
+REFERENCE   2  (bases 1 to 229)
+  AUTHORS   Beguiristain,T., Diez,J., Le Tacon,F., Casacuberta,J. and Tagu,D.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (29-MAY-2002) UMR 1136 IaM Interactions
+            Arbres/Micro-Organismes, INRA-Nancy, Champenoux 54280, France
+FEATURES             Location/Qualifiers
+     source          1..229
+                     /organism="Eucalyptus globulus subsp. bicostata"
+                     /mol_type="genomic DNA"
+                     /isolate="EgRT12"
+                     /sub_species="bicostata"
+                     /db_xref="taxon:71272"
+     gene            <1..>229
+                     /gene="RT"
+                     /note="EgRT"
+     misc_feature    <1..>229
+                     /gene="RT"
+                     /note="similar to reverse transcriptase; unknown if
+                     correctly expressed due to presence of premature
+                     stopcodons and frameshifts"
+     repeat_region   <1..>229
+                     /transposon="Ty1-copia retrotransposon"
+ORIGIN      
+        1 aatgggaatt tagaggaaga ggtttatatg gaccaacccg aaggctttcc agttatagga
+       61 aaggaacaca tggtatgtaa attaaagaag tcaatatatg gacttaaaca agcttcccga
+      121 caatggtatc ttaagttaaa cgataccata acttcatttg gatttaagga aaacactgtt
+      181 gatcggtgta tatatatgaa gattagtggg agcaagttta tttttctag
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ay149291.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ay149291.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ay149291.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,44 @@
+LOCUS       AY149291                 357 bp    DNA     linear   PRI 25-FEB-2003
+DEFINITION  Homo sapiens neanderthalsensis mitochondrial D-loop hypervariable
+            region I, partial sequence.
+ACCESSION   AY149291
+VERSION     AY149291.1  GI:28557455
+KEYWORDS    .
+SOURCE      mitochondrion Homo sapiens neanderthalensis
+  ORGANISM  Homo sapiens neanderthalensis
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 357)
+  AUTHORS   Schmitz,R.W., Serre,D., Bonani,G., Feine,S., Hillgruber,F.,
+            Krainitzki,H., Paabo,S. and Smith,F.H.
+  TITLE     The Neandertal type site revisited: interdisciplinary
+            investigations of skeletal remains from the Neander Valley, Germany
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 99 (20), 13342-13347 (2002)
+   PUBMED   12232049
+REFERENCE   2  (bases 1 to 357)
+  AUTHORS   Serre,D. and Paabo,S.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (05-SEP-2002) Department of Evolutionary Genetics, Max
+            Planck Institute for Evolutionary Anthropology, Inselstrasse,
+            Leipzig D-04103, Germany
+FEATURES             Location/Qualifiers
+     source          1..357
+                     /organism="Homo sapiens neanderthalensis"
+                     /organelle="mitochondrion"
+                     /mol_type="genomic DNA"
+                     /isolate="Neandertal 2"
+                     /sub_species="neanderthalensis"
+                     /db_xref="taxon:63221"
+                     /country="Germany: Neandertal"
+     D-loop          <1..>357
+                     /note="hypervariable region I"
+ORIGIN      
+        1 gttctttcat gggggagcag atttgggtac cacccaagta ttgactcacc catcagcaac
+       61 cgctatgtat ttcgtacatt actgccagcc accatgaata ttgtacagta ccataattac
+      121 ttgactacct gcagtacata aaaacctaat ccacatcaac cccccccccc catgcttaca
+      181 agcaagcaca gcaatcaacc ttcaactgtc atacatcaac tacaactcca aagacaccct
+      241 tacacccact aggatatcaa caaacctacc cacccttgac agtacatagc acataaagtc
+      301 atttaccgta catagcacat tacagtcaaa tcccttctcg cccccatgga tgacccc
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/barns-combined.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/barns-combined.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/barns-combined.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,124 @@
+#NEXUS 
+
+[non-conforming file, fused from two files obtained from Chuck
+ Delwiche.
+The first has an empty PAUP block and a DATA block, with no TAXA
+ block.  The second has only a TREES block.]
+
+BEGIN PAUP;
+
+[!
+Small subunit rRNA alignment from Barns, S.M., C.F. Delwiche,
+J.D. Palmer, and N.R. Pace.  1996.  Perspectives on archaeal
+diversity, thermophily and monophyly from environmental rRNA
+sequences.  Proc. Natl. Acad. Sci. USA. In press.
+]	
+ENDBLOCK;
+
+BEGIN DATA;
+DIMENSIONS  NTAX=64 NCHAR=922;
+FORMAT MISSING=- GAP="  DATATYPE=RNA ;
+OPTIONS  MSTAXA=UNCERTAIN ;
+
+MATRIX
+[                     10        20        30        40        50        60        70        80        90        100       110       120       130       140       150       160       170       180       190       200       210       220       230       240       250       260       270       280       290       300       310       320       330       340       350       360       370       380       390       400       410       420       430       440       450       460       470       480       490       500       510       520       530       540       550       560       570       580       590       600       610       620       630       640       650       660       670       680       690       700       710       720       730       740       750       760       770       780       790       800       810       820       830       840       850       860       870       880       890       900       910       920]
+[                     .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .         .  ]
+
+E.coli       UCAGAU-UGAACGCUGGCGGCA-GGC-CUAA-CACAUGCAAGUCACGUGGCGGACGGGUGAGUAAUGUCUGGGAACUGCCUGAUGGGGGGGAUAACUACUGCGGUAGCUAAUACCCAUCGGAUGUGCCCAGAUGGGAUUAGCUAGUAGGUGGGGUAACGGCUCACCUAGGCGACGAUCCCUAGCUGGUCUGAGAGGAUGACCAGCCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCACAAUGGGCGCCUGAUGCAGCCAUGCCGCGCACC-GGCUA-A-CUCCGUGCCAGCAGCCGCGGUAAUACGGAG-GGUGCAAGCGUUAAUCGGAAUUACUGGGCGUAAAGCGCACGCAGGCAGUCUCGUAGAGGGGGGUAGAAUUCCAGGUGUAGCGGUGAAAUGCGUAGAGAUCUGGAGGAAUACCGGUGGCGAAGGCGGCCCCCUGGACGAAGACUGACGCUCAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGUCGACUUAGUCGACCGCCUGGGGAGUACGGCCGCAAGGUUAAAACUCAAAUGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCUGGUCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUUGUGAAAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAGACUGCCAGUACUG-GAGGAAGGUGGGGAUGACGUCAAGUCAUC-AUGGCCCUGACCAGGGCUACACACGUGCUACAAUGCGUAGUCCGGAUUGGAGUCUGCAAGACUCCAUGAAGUCGGAAUCGCUAGUAAUCGUGGAUCAGA-AUGCCACGGUGAAUACGUUCCCGGGCCU   [922]
+Aquifex_p    UCAGCG-CGAACGCUGGCGGCG-UGC-CUAA-CACAUGCAAGUCGCGCGGCAAACGGGUGAGUAACACGUGGGUCCUACCCCCAGGGGGGGAUAACCCCGGCCGGGGCUAAUACCCUGGGGAUGGGCCCGCGGCCCAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCUAUGACGGGUAGCCGGCCUGAGAGGGUGGCCGGCCACAGCGGGACUGAGACACGGCCCGCACCCCUACGGGGGGCAGCAGUGGGGAAUCGUGGGCAAUGGGCGCCUGACCCCGCGACGCCGCGGGAC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GUCCCGAGCGUUGCGCGAAGUCACUGGGCGUAAAGCGUCCGCAGCCAGGCACGCCCGGGCAGGCGGAAUUCCCGGGGUAGCGGUGAAAUGCGUAGAUCUCGGGAGGAACACCGAAGGGGAAGCCAGCCUGCUGGGGCUGUCCUGACGGUCAGGGACGAAAGCCGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCCGGCCGUAAACCAUGGGCGCUAAGCGCCCCGCCUGGGGAGUACGGGCGCAAGCCUGAAACUCAAAGGAAUUGGCGGGGGCCCGC-ACAACCGGUGGAGCGUCUGGUUCAAUUCGAUGCUAACCGAAGAACCUUACCCGGGCAGGUGGUGCAUGGCCGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCUGGACCGCCGGCGCCG-GAGGAAGGGGGGGAUGACGUCAGGUCAGU-AUGCCCUUGCCCGGGGCCACACAGGCGCUACAGUGCAUGGUGCGGAUUGGGGGCUGAAAGCCCCCAUGAAGCCGGAAUCGGUAGUAACGGGGUAUCAGCGAUGUCCCCGUGAAUACGUUCUCGGGCCU   [922]
+Thermotog    UCAGGG-UGAACGCUGGCGGCG-UGC-CUAA-CACAUGCAAGUCGCGCGGCGGACGGGUGAGUAACACGUGGGUCCUGCCCUCCGGGGGGGAUAACCAGGGCCCUGGCUAAUACCCGGAGGAGGGGCCCGCGGCCCAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCGACGACGGGUAGCCGGCCUGAGAGGGUGGUCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUGGGGAAUCUUGGACAAUGGGGCCCUGAUCCAGCGACGCCGCGCCCC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GGGGCAAGCGUUACCCGGAUUUACUGGGCGUAAAGGGGGCGUAGGCGGGGCGGUAGAGGGAGACGGAACUGCCGGUGUAGGGGUGAAAUCCGUAGAUAUCGGCAGGAACGCCGGUGGGGAAGCCGGUCUCCUGGGCCGACCCCGACGCUGAGGCCCGAAAGCCAGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCUGGCUGUAAACGAUGCCCACUAAGUGGGCCGCCUGGGGAGUACGCCCGCAAGGGUGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCGUGUGGUUUAAUUGGAUGCUAAGCCAAGAACCUUACCAGGGCAGGUGGUGCACGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCUGGACUGCCGGCGCCG-GAGGAAGGAGGGGAUGACGUCAGGUACUC-GUGCCCCUGCCCUGGGCGACACACGCGCUACAAUGCUCAGUUCGGAUCGCAGGCUGCAAGCCUGCGUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCU   [922]
+Thermomic    UCAGGG-GGAACGCUGGCGGCG-UGC-CUAA-UGCAUGCAAGUCACGUGGCGGACGGGUGCGUAACACGUGGGGCCCUCCCGGGUGGGGGGAUAACCCGGGCUCGGGCUAAUACCGCUCGGAGGGCCCUGCGGCCUAUCAGCUAGACGGUAGGGUAACGGCCUACCGUGGCGAUGACGGGUAGCUGGUCUGAGAGGAUGGCCAGCCACACGGGCACUGAGACACGGGCCCGACUCCUACGGGAGGCAGCAGCAGGGAAUCUUCCGCAAUGGGGCCCUGACGGAGCGACGCCGCGUCCC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAGACGUAG-GGGGCGAGCGUUACCCGGAGUCACUGGGCGUAAAGGGCGUGUAGGCAGGGCGGGAGAGGCGGGUGGAAUUCCCGGUGUAGCGGUGAAAUGCGUAGAGAUCGGGAGGAACGCCGGUGGCGAAGGCGGCCCGCUGGCCCGUACCUGACGCUGAGGCGCGAAGGCGUGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCACGCAGUAAACGAUGCGGGCGAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUAAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCAGCGGAGCGUGUGGUUUAAUUCGACGCAACGCGAAGAACCUUACCAGGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGGACUGCCGGGCCCG-GAGGAAGGAGGGGAUGACGUCAAGUCAGC-AUGGCCCUGCCCUGGGCGACACACACGCUACAGUGCGUGGUGGGGAUCGCAGGCUGCAAGCCUGCGUGAACGCGGAGUUGCUAGUAACCGCCGGUCAGCCAUACGGCGGUGAAUACGUUCCCGGGCCU   [922]
+Thermus_t    UCAGGG-UGAACGCUGGCGGCG-UGC-CUAA-GACAUGCAAGUCGCGCGGCGGACGGGUGAGUAACGCGUGGGUCCUACCCGGAAGGGGGGACAACCCGGGCUCGGGCUAAUCCCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCCUGACGGAGCGACGCCGCGCGCC-GGCCA-A-CUCCGUGCCAGCAGCCGCGGUAAUACGGAG-GGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGGACUGCCCGCGCGG-GAGGAAGGAGGGGACGACGUCUGGUCAGC-AUGGCCCUGGCCUGGGCGACACACGUGCUACAAUGCCCAGUUCGGAUUGGGGUCUGCAAGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCU   [922]
+Chlamydia    UCAGAU-UGAACGCUGGCGGCG-UGG-AUGA-GGCAUGCAAGUCACGUGGCGGAAGGGUUAGUAAUACAUAGAUUCUGUCCUCAACUGGGAAUAACGGUUGCGACCGCUAAUACCUUGAGGGAGAGUCUAUGGGAUAUCAGCUUGUUGGUGGGGUAAUGGCCUACCAAGGCUUUGACGUCUAGGCGGAUUGAGAGAUUGACCGCCAACACUGGGACUGAGACACUGCCCAGACUUCUACGGAAGGCUGCAGUCGAGAAUCUUUCGCAAUGGACGUCUGACGAAGCGACGCCGCGCACC-GGCUA-A-CUCCGUGCCAGCAGCUGCGGUAAUACGGAG-GGUGCUAGCGUUAAUCGGAUUUAUUGGGCGUAAAGGGCGUGUAGGCAGGGUAGAUGGAGAAAAGGGAAUUCCACGUGUAGCGGUGAAAUGCGUAGAUAUGUGGAAGAACACCAGUGGCGAAGGCGCUUUUCUAAUUUACACCUGACGCUAAGGCGCGAAAGCAAGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCUUGCCGUAAACGAUGCAUACUUAGUAUGCCGCCUGAGGAGUACACUCGCAAGGGUGAAACUCAAAAGAAUUGACGGGGGCCCGC-ACAAGCAGUGGAGCAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCUGGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAGACUGCCUGGCCAG-GAGGAAGGCGAGGAUGACGUCAAGUCAGC-AUGGCCCUGCCCAGGGCUACACACGUGCUACAAUGCCCAGUUCGGAUUGUAGUCUGCAAGACUACAUGAAGUCGGAAUUGCUAGUAAUGGCGUGUCAGCUAUAACGCCGUGAAUACGUUCCCGGGCCU   [922]
+Planctomy    UCAGAA-UGAACGUUGGCGGCA-UGG-AUUA-GGCAUGCAAGUCGCGCGGCGAAAGGGAGAGUAAUACGUAGGACCUACCUUCGGGCUGGGAUAGCGGCGGCUGCCGGUAAUACCCUGAAGAGGGGCCUACGUCGUAUUAGCUAGUUGGUAGGGUAAUGGCCUACCAAGGCAAAGAUGCGUAUGGGGUGUGAGAGCAUGCCCCCACUCACUGGGACUGAGACACUGCCCAGACACCUACGGGUGGCUGCAGUCGAGAAUCUUCGGCAAUGGGCGCCUGACCGAGCGAUGCCGCGGGCC-GGCUA-A-UCUCGUGCCA--AGCCGCGGUAAUACGAGA-GGCCCAAACGUUAUUCGGAUUUACUGGGCUUAAAGAGUUCGUAGGCAGGGGGAUAGAGGUAAGCGGAACUGAUGGUGGAGCGGUGAAAUGCGUUGAUAUCAUCAGGAACACCGGAGGCGAAGGCGGCUUACUGGGUCCUUUCUGACGCUGAGGAACGAAAGCUAGGGGAGCAAACGGGAUUAGAUACCCCGGUAGUCCUAGCCGUAAACGAUGAGCACUGAGUGCUCCGCCUGGGGAGUAUGGUCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCUCAC-ACAAGCGGUGGAGGAUGUGGCUUAAUUCGAGGCUACGCGAAGAACCUUACCUAGUCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUCGGGUUAAGUCCCUUAACGAGCGAAACCCUUGGACUGCCGGUACCG-GAGGAAGGUGGGGAUGACGUCAAGUCCUC-AUGGCCUUGAUUAGGGCUGCACACGUCCUACAAUGCUCAGUUCGGAUUGCAGGCUGCAAGCCUGCAUGAAGCUGGAAUCGCUAGUAAUCGCGGGUCAGC-AUACCGCGGUGAAU-UGUUCCUGAGCCU   [922]
+Leptonema    UCAGAA-CUAACGCUGGCGGCG-CGU-CUUA-AACAUGCAAGUCGCGCGGCGAACGGGUGAGUAACACGUAGGUUUUGCCCACGGAGGGGGAUAACCUUUCGGAAGGCUAAUACCCUGUGGAGAAGCCUGCGUCCGAUUAGGUAGUUGGUGAGGUAACGGCUCACCAAGCCAGCGAUCGGUAGCCGGCCUGAGAGGGUGAACGGCCACACUGGAACUGGGACACGGUCCAGACUCCUACGGGAGGCAGCAGUUAAGAAUCUUGCGCAAUGGAGCUCUGACGCAGCGACGCCGCGCACC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAU-GGUGCAAGCGUUGUUCGGAAUCACUGGGCGUAAAGGGUGCGCAGGCAGUCCCGGAGAGGCAGGCGGAAUUCCCGGUGUAGCGGUGAAAUGCACAGAUAUCGGGAGGAACACCAAUGGCGAAGGCAGCCUGCUGGACGGAGACUGACGCUCAUGCACGAAAGCGUGGGGAUCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCCUAAACGUUGUAGACCAAGUCUACCGCCUGGGGACUAUGCUCGCAAGAGUGAAACUCAAAGGAAUUGACGGGGG-CCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGAUACGCGAAAAACCUUACCCGGGCAGGUGCUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCUAAACUGCCGGUACCG-GAGGAAGGUGGGGAUGACGUCAAAUCCUC-AUGGCCUUGUCCGGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUUGGGGUCUGCAAGACUCCAUGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGC-AUGCCGCGGUGAAUACGUUCCCGGGCCU   [922]
+Chlorobiu    UCAGGA-CGAACGCUGGCGGCG-UGC-CUAA-CACAUGCAAGUCAAUUGGCGCAAGGGUGAGUAAGGCAUAGGUUCUGCCCUUUGGCUGGCAUAACCCCGAUCGGGGACAAUACCCAAAGGAUGAGCCUAUGUUCCAUCAGGUAGUUGGUAGGGUAACGGCCUACCAAGCCAACGACGGAUAGCUGGUCUGAGAGGAUGAUCAGCCACAUUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGCGCAAUGGGCGCCUGACGCAGCAACGCCGCGCCAC-GGCUA-A-CUCUGUGCCAGCAGCCGCGGUGAUACAGGG-GUGGCAAGCGUUGUCCGGAUUUACUGGGUGUAAAGGGUGCGCAGGCAGUCUCGAAGAGGAAGAUGGAAUUUCCGGUGUAACGGUGGAAUGUGUAGAUAUCGGAAAGAACACCAGUGGCGAAGGCAGUCUUCUGGUCGAGUACUGACGCUCAGGCACGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUACUAAGUAUUCCACCUGGGAAGUACGCCCGCAAGGGUGAAACUCAAAGGAAUUGACG----CC-GC-ACAAGCGGUGGAUCAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCUAGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCUAAACUGCCUACGUAGAGAGGAAGGA-GGGAUGACGUCAAGUCCUC-AUGGCCCUGCCUAGGGCCACACACGUGAUACAAUGCUCAGUCCGGAUCGGAGUCUGCAAGACUCCGUGAAGUUGGAAUCGCUAGUAAUCGCGGAUCAGC-AUGCCGCGGUGAAUGUGUUCCCGGGCCU   [922]
+Flavobact    UCAGGA-U-AACGCUAGCGGGA-GGC-UUAA-CACAUGCAAGCCGGCCGGCGCACGGGUGCGUAACGCGUAUGCCUUGCCCUACUGAAAGGAUAGCCCAGAUUUGGAUUAAUACUAGUAGGAUAGGCAUGCGUAAGAUUAGAUAGUUGGUGAGGUAACGGCUCACCAAGUCGACGAUCUUUAGGGGGCCUGAGAGGGUGAACCCCCACACUGGUACUGAGACACGGACCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGACAAUGGGUGCCUGAUCCAGCCAUCCCGCGCACC-GGCUA-A-CUCCGUGCCAGCAGCCGCGGUAAUACGGAG-GGUGC-AGCGUUAUCCGGAUUUAUUGGGUUUAAAGGGUCCGUAGGCAAUUAGUUUGAAGUGGCUGGAAUGUGUAGUGUAGCGGUGAAAUGCUUAGAUAUUACGCAGAACACCAAUUGCGAAGGCAGGUCACUAAGUCUAUAUUGACGCUGAUGGAC-AAAGCGU-GGGAGCGAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGGAUACUUAGUAUCCCACCUGGGGAGUACGUUCGCAAGAAUGAAACUCAAAGGAAUUGACGGGGGCC-GC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCAAGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGCCGUGAGGUGUUAGGUUAAGUCCU-CAACGAGCGCAACCCCUAGACUGCCAAUGUUGAGAGGAAGGUGGGGACGACGUCAAGUCAUC-ACGGCCCUGUCUUGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUUGGAGUCUGCAAGACUCUAUGAAGCUGGAAUCGCUAGUAAUCGCAUAUCAGCCAUGAUGCGGUGAAUACGUUCCCGGGC-U   [922]
+Agrobacte    UCAGAA-CGAACGCUGGCGGCA-GGC-UUAA-CACAUGCAAGUCACGUGGCAGACGGGUGAGUAACGCGUGGGAUCUACCGUGCCCGCGGAAUAGCUCCGGCUGGAAUUAAUACCGGUAUGAUGAGCCCGCGUUGGAUUAGCUAGUUGGUGGGGUAAAGGCCUACCAAGGCGACGAUCCAUAGCUGGUCUGAGAGGAUGAUCAGCCACAUUGGGACUGAGACACGGCCCAAACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGGACAAUGGGCGCCUGAUCCAGCCAUGCCGCGCCCC-GGCUA-A-CUUCGUGCCAGCAGCCGCGGUAAUACGAAG-GGGGCUAGCGUUGUUCGGAAUUACUGGGCGUAAAGCGCACGUAGGCAGUAUGGAAGAGGUAAGUGGAAUUCCGAGUGUAGAGGUGAAAUUCGUAGAUAUUCGGAGGAACGCCAGUGGCGAAGGCGGCUUACUGGUCCAUUACUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAAUGUUAAACAUUCCGCCUGGGGAGUACGGUCGCAAGAUUAAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGCAGAACCUUACCAGCUCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGGACUGCCGGUGCCGAGAGGAAGGUGGGGAUGACGUCAAGUCCUC-AUGGCCCUGGGCUGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUUGCACUCUGCAAGAGUGCAUGAAGUUGGAAUCGCUAGUAAUCGCAGAUCAGC-AUGCUGCGGUGAAUACGUUCCCGGGCCU   [922]
+Flexibact    UCAGGA-U-AACGCUAGCGGCA-GGC-CUAA-UACAUGCAAGUCACGUGGCGCACGGGUGCGUAACGCGUAUGCCCUACCUUUUACGGGGGAUAGCUCGGGCUCGAAUUAAUACCUAAAAGAUGGGCAUGCGUCUGAUUAGCUAGAUGGCGGGGUAACGGCCCACCAUGGCGAUGAUCAGUAGGGGUUCUGAGAGGAUGAUCCCCCACACUGGUACUGAGACACGGACCAGACUCCUACGGGAGGCAGCAGUAGGGAAUAUUGGACAAUGGGCGCC--AUCCAGCCAUGCCGCGCACC-GGCUA-A-CUCCGUGCCAGCAGCC-C----AUACGGAG--GUGC-AGCGUUGUCCGGAUUUAUUGGGUUUAAAGGGUGCGUAGGCAGUACGGUUGAAGUAGGCGGAAUUUAUGGUGUAGCGGUGAAAUGCAUAGAUACCAUAAAGAACACCGAUAGCGUAGGCAGCUUACUAAGCCGU-ACUGACGCUGAGGCAC-AAAGCAUGGGGAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGAUGAUCACUCAGUGAUCCACCUGGGGAGUACGUCCGCAAGGAUGAAACUCAAAGGAAUUGACGGGGGUC-GC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGAUACGCGAGGAACCUUACCUGGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCC--AACGAGCGCAACCCC-AGACUGCCUGCGCAGAGAGGAAGGAGGGGACGACGU--AGUCAUC-AUGGCCCUGUCCAGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUUGGGGUCUGCAAGACCCUAUGAAGUUGGAAUCGCUAGUAAUCGCGUAUCAGCAAUGACGCGGUGAAUACGUUCCCGGACCU   [922]
+Rhodocycl    UCAGAU-UGAACGCUGGCGGCA-UGC-CUUA-CACAUGCAAGUCACGUGGCGAACGGGUGAGUAAUGCAUCGGACAUGCCCUGAAGGGGGGAUAACGUAGCGUUACGCUAAUACCUUUGGGAGUGGCCGAUGUCGGAUUAGCUAGUUGGUGGGGUAAAAGCCUACCAAGGCAACGAUCCGUAGCGGGUCUGAGAGGAUGAUCCGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUUUUGGACAAUGGGCGCCUGAUCCAGCCAUGCCGCGCACC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GGUGC-AGCGUUAAUCGGAAUUACUGGGCGUAAAGCGUGCGCAGGCAGUACGGCAGAGGGGGGUGGAAUUCCACGUGUAGCAGUGAAAUGCGUAGAGAUGUGGAGGAACACCGAUGGCGAAGGCAGCCCCCUGGGCCAAUACUGACGCUCAUGCACG-AAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCCUAAACGAUGUCAACUAAGUUGACCGCCUGGGGAGUACGGCGGCAAGGUUAAAACUCAAAGGAAUUGACGGGGA-CCGC-ACAAGCGGUGGAUGAUGUGGAUUAAUUCGAUGCAACGCGAAAAACCUUACCUACCCAGGUGCUGCAUGGC-GUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUGAACUGCCGGUACCG-GAGGAAGGUGGGGAUGACGUCAAGUCCUC-AUGGCCCUGGGUAGGGCUUCACACGUCAUACAAUGCGUAGUCCGGAUUGCAGUCUGCAAGACUGCAUGAAGUCGGAAUCGCUAGUAAUCGCGGAUCAGC-AUGUCGCGGUGAAUACGUUCCCGGGUCU   [922]
+Desulfovi    UCAGAU-UGAACGCUGGCGGCG-UGC-UUAA-CACAUGCAAGUCACGUGGCGCACGGGUGAGUAACGCGUGGAUUCUGCCCUUAUGUCGGGAUAACAGUUGCGGCUGCUAAUACCGUAAGGAUGAGUCCGCGUCCCAUUAGCUUGUUGGCGGGGUAACGGCCCACCAAGGCAUCGAUGGGUAGCCGAUUUGAGAGGAUGAUCGGCCACACUGGAACUGAAACACGGUCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCGCAAUGGGCGCCUGACGCAGCGACGCCGCGCACC-GGCUA-A-CUCCGUGCCAGCAGCCGCGGUAAUACGGAG-GGUGCAAGCGUUAAUCGGAAUUACUGGGCGUAAAGCGCACGUAGGCAAUCCGGGAGAGGGUGGCGGAAUUCCAGGUGUAGGAGUGAAAUCCGUAGAUAUCUGGAGGAACAUCAGUGGCGAAGGCGGCCACCUGGACCGGUAUUGACGCUGAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCUGUAAACGAUGGAUGCUAAGCAUCCCGCCUGGGGAGUACGGUCGCAAGGCUGAAACUCAAAGAAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGUAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCUAGGUAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCUAGACUGCCCGGCCGG-GAGGAAGGUGGGGACGACGUCAAGUCAUC-AUGGCCCUGCCUAGGGCUACACACGUACUACAAUGCCCAGUCCGGAUUGCAGUCUGCAAGACUGCAUGAAGUUGGAAUCGCUAGUAAUUCGAGAUCAGC-AUGCUCGGGUGAAUGCGUUCCCGGGCCU   [922]
+Gloeobact    ----GA-UGAACGCUGGCGGCG-UGC-UUAA-CGCAUGCAAGUCACGUGGCGGACGGGUGAGUAACACGUGGGAUCUGCCCUCAGGGGGGGAUAACGGCCGCGGCCGCUAAUACCCUGAGGAUGAGCCCGCGUCUGAUUAGCUAGUUGGUGGGGUAAUGGCCUACCAAGGCUACGAUCAGUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUUUUCCGCAAUGGGCGCCURACGGAGCAACGCCGCGCAUC-GGCUA-A-CUCCGUGCCAGCAGCCGCGGUAAUACGGAG-GAUGCAAGCGUUAUCCGGAAUUAUUGGGCGUAAAGCGUACGUAGGCAGUGCGGUAGGGGCAAGGGGAAUUCCCGGUGUAGCGGUGAAAUGUGGAGAUAUCGGGAAGAACACCAGCGGCGAAAGCGCCUUGCUGGACCGCAACUGACGCUGAGGUACGAAAGCCAGGGGAGCAAAUGGGAUUAGAUACCCCAGUAGUCCUGGCCGUAAACGAUGGACACUAAGUGUCCCGCCUGGGGAGUACGCACGCAAGUGUGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCAGGGCAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGGACUGCCGGUACCG-GAGGAAGGUGGGGAUGACGUCAAGUCAGC-AUGGCUCUGUCCUGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUUGCAGGCUGCAAGCCUGCAUGAAGUCGGAAUCGCUAGUAAUCGCAGGUCAGC-AUACUGCGGUGAAUACGUUCCCGGGCCU   [922]
+Synechoco    UCAGGA-UGAACGCUGGCGGCG-UGC-UUAA-CACAUGCAAGUCACGUGGCGGACGGGUGAGUAACGCGUGAGAUCUGCCUACAGGCGGGGACAACAGUUGCGACUGCUAAUACCCUGUAGAUGAGCUCGCGUCUGAUUAGCUAGUUGGUGGGGUAAGGGCCUACCAAGGCGACGAUCAGUAGCUGGUCUGAGAGGAUGAUCAGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUUUUCCGCAAUGGGCGC--GACGGAGCAACGCCGCGCCUC-GGCUA-A-UUCCGUGCCAGCAGCCGCGGUAAUACGGGA-GAGGCAAGCGUUAUCCGGAAUUAUUGGGCGUAAAGCGCCUGCAGGCAGUAUGGUAGGGGUAGCGGGAAUUCCAGGUGUAGCGGUGAAAUGCGUAGAUAUCUGGAAGAACACCAGCGGCGAAAGCGCGCUACUGGGCCAUAACUGACGCUCAUGGACGAAAGCUAGGGGAGCGAAAGGGAUUAGAUACCCCUGUAGUCCUAGCCGUAAACGAUGAACACUAAGUGUUCCGCCUGGGGAGUACGCACGCAAGUUGGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGUAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCAGGGUAGGUGGUGCAUGGCUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCACGAACUGCCGGUACCG-GAGGAAGGUGUGGACGACGUCAAGUCAUC-AUGCCCCUAUCCUGGGCUACACACGUACUACAAUGCUCAGUUCAGAUUGCAGGCUGCAAGCCUGCAUGAAGGCGGAAUCGCUAGUAAUCGCAGGUCAGC-AUACUGCGGUGAAUACGUUCCCGGGCCU   [922]
+Arthrobac    UCAGGA-UGAACGCUGGCGGCG-UGC-UUAA-CACAUGCAAGUCACGUGGCGAACGGGUGAGUAACACGUGAGUCCUGCCCUUGACCUGGGAUAAGCCUGGCUGGGUCUAAUACCUUUUGGAUGGACUCGCGGCCUAUCAGCUUGUUGGUGAGGUAAUGGCUCACCAAGGCGACGACGGGUAGCCGGCCUGAGAGGGUGACCGGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCACAAUGGGCGCCUGAUGCAGCGACGCCGCGCGCC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GGCGCAAGCGUUAUCCGGAAUUAUUGGGCGUAAAGAGCUCGUAGGCAGUGAUGUAGGGGAGACUGGAAUUCCUGGUGUAGCGGUGAAAUGCGCAGAUAUCAGGAGGAACACCGAUGGCGAAGGCAGGUCUCUGGGCAUUAACUGACGCUGAGGAGCGAAAGCAUGGGGAGCGAACAGGAUUAGAUACCCUGGUAGUCCAUGCCGUAAACGUUGGGCACUAAGUGCCCCGCCUGGGGAGUACGGCCGCAAGGCUAAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGCGGAGCAUGCGGAUUAAUUCGAUGCAACGCGAAGAACCUUACCAAGGCAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUCGGACUGCCGGGCUCG-GAGGAAGGUGGGGACGACGUCAAAUCAUC-AUGCCCCUGUCUUGGGCUUCACGCAUGCUACAAUGCUCAGUUCGGAUUGGGGUCUGCAAGACCCCAUGAAGUCGGAGUCGCUAGUAAUCGCAGAUCAGCAACGCUGCGGUGAAUACGUUCCCGGGCCU   [922]
+Heliobact    UCAGGA-CGAACGCUGGCGGCA-UGC-CUAA-CACAUGCAAGUCACGUGGCGGACGGGUGAGUAACGCGUGGACCCUACCGGAGAGGGGGGAUAACAGUCCGGGCUGCUAAUACCUUUCCGAUGGGUCCGCGUCCGAUUAGCUAGUUGGUAGGGUAAAGGCCUACCAAGGCGACGAUCGGUAGCCGGCCUGAGAGGGUGAACGGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUCUUCCGCAAUGGGCGCCUGACGGAGCAAUGCCGCGCCCC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GGGGCAAGCGUUGUCCGGAAUGACUGGGCGUAAAGCGCGUGCAGGCAGAGAUGGAGAGGAUAGCGGAAUUCCCGGUGUAGCGGUGAAAUGCGUAGAUAUCGGGAGGAACACCCGUGGCGAAGGCGGCUAUCUGGACAUUAUCUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAGUGCUAAGCACUCCGCCUGGGGAGUACGGCCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGACGCAACGCGAAGAACCUUACCAAGGCAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAGACUGCCCGGCCGG-GAGGAAGGCGGGGAUGACGUCAAAUCAUC-AUGCCCCUGUCUUGGGCUACACACGUGCUACAAUGCCCAGUUCGGAUUGCUCUCUGCAAGAGAGCAUGAAGGCGGAAUCGCUAGUAAUCGCGGGUCAGC-AUACCGCGGUGAAUACGUUCCCGGGCCU   [922]
+Clostridi    UCAGGA-CGAACGCUGGCGGUA-UGC-UUAA-CACAUGCAAGUCACGUGGCGAACGGGUGAGUAACGCGUGGGUCCUGCCUCAUGGAAGGAAUAGCCUCGGCUGGGAGUAAAGCCCAUGAGAUGGACCCGCGUCCCAUUAGCUAGUUGGUGAGAUAACAGCCCACCAAGGCAACGAUGGGUAACCGGUCUGAGAGGGCGAACGGUCACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCGCAAUGGGGCCCUGACGCAGCAAUACCGCGUCCC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GGGACAAGCGUUGUCCGGAAUCACUGGGCGUAAAGGGCGCGUAGGCAGUACUGGAGAGGCAAGUGGAAUUCCUAGUGUAGCGGUGAAAUGCGUAGAUAUUAGGAGGAACACCGGUGGCGAAGGCGGCUUGCUGGACAGAUACUGACGCUGAGGUGCGAAAGCGUGGGGAGCGAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAUGACUAAGUCAUCCGCCUGGGGAGUACGACCGCAAGGUUGAAACUCAAAGGAAUUGACGGGGACCCGC-ACAAGCAGCGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUGGACUGCCGUAUACG-GAGGAAGGUGGGGACGACGUCAAAUCAUC-AUGCCCCUGGCCUGGGCUACACACGUGCUACAAUGCCCAGUUCGGAUUGCAGGCUGAAAGCCUGCAUGAAGUUGGAGUUGCUAGUAAUCGCAGAUCAGA-AUGCUGCGGUGAAUGCGUUCCCGGGUCU   [922]
+Bacillus     UCAGGA-CGAACGCUGGCGGCG-UGC-CUAA-UACAUGCAAGUCGCGCGGCGGACGGGUGAGUAACACGUGGGUCCUGCCUGUAAGCUGGGAUAACUCCGGCCGGGGCUAAUACCUUACAGAUGGACCCGCGGCGCAUUAGCUAGUUGGUGAGGUAACGGCUCACCAAGGCAACGAUGCGUAGCCGACCUGAGAGGGUGAUCGGCCACACUGGGACUGAGACACGGCCCAGACUCCUACGGGAGGCAGCAGUAGGGAAUCUUCCGCAAUGGACGUCUGACGGAGCAACGCCGCGCCAC-GGCUA-A-CUACGUGCCAGCAGCCGCGGUAAUACGUAG-GUGGCAAGCGUUGUCCGGAAUUAUUGGGCGUAAAGGGCUCGCAGGCAGUGCAGAAGAGGAGAGUGGAAUUCCACGUGUAGCGGUGAAAUGCGUAGAGAUGUGGAGGAACACCAGUGGCGAAGGCGACUCUCUGGUCUGUAACUGACGCUGAGGAGCGAAAGCGUGGGGAGCGAACAGGAUUAGAUACCCUGGUAGUCCACGCCGUAAACGAUGAGUGCUAAGCACUCCGCCUGGGGAGUACGGUCGCAAGACUGAAACUCAAAGGAAUUGACGGGGGCCCGC-ACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGUCAGGUGGUGCAUGGUUGUCGUCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUGGACUGCCGGUACCG-GAGGAAGGUGGGGAUGACGUCAAAUCAUC-AUGCCCCUGACCUGGGCUACACACGUGCUACAAUGCUCAGUUCGGAUCGCAGUCUGCAAGACUGCGUGAAGCUGGAAUCGCUAGUAAUCGCGGAUCAGC-AUGCCGCGGUGAAUACGUUCCCGGGCCU   [922]
+pSL_17       GGACCC-C-ACUGCUAUCGGGGUGGGACUAA-GCCAUGCGAGUUGCGCGGCGGACGGCUCCGUAACACGUAGCCCCUACCCUCAGGCGGGAAUAACCCCGGCUGGGGCUAAUUCCCUGAGGAUGGGGCUGCGGCCGAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCUAUAACCGGUACGGGCCGUGAGAGCGGGAGCCCGGAGAUGGGUACUGAGACAAGGACCCAGGCCCUACGGGG-GCAGCAGGCGCGAAAACUCCGCAAUGCGCGCGUGACGGGGCUACCCCGAGGGGAGGGCAA-GUCGG-GUGUCAGCCGCCGCGGUAAUAC-CCGCUCCCCGAGUGGUGGGGACGAUUAUUGGGCUUAAAGCGUCCGUAGCCGGGGCGGGAGAGGCCGGCGGUAUUCCCGGGGUAGGGGUGAAAUCCUAUAAUCCCGGGAGGACCGCCGGUGGCGAAGGCGGCCGGCCAGAACGCGCCCGACGGUGAGGGACGAAAGCUGGGGGAGCGAUCCGGAUUAGAUACCCGGGUAGUCCCAGCCGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGUCGCAAGACUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGAAGCUUGCGGUUCAAUUGGAGUCAACACCGGGAACCUUACCGGGGGAGGAGGUGCAUGGCCGUCGCCAGUUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGCAACGAUCGAGACCCGCGGACUGCCGCCGGCG-GAGGAAGGGGCGGGCUACGGCAGGUCAGU-AUGCCCCGCCCCCGGGCCACACGCGAGCUGCAAUGCUCAGUUGGGAUCGAGGGCUGCAAGCCCUCGUGAACAUGGAAUCCCUAGUAAUCGCGCGUCAUC-AACGCGCGGUGAAUACGUCCCCGCUCCU   [922]
+pSL_22       GGACCC-G-ACUGCUAUCAGGGUGAGACUAA-GCCAUGCGAGUCACGUGGCGCACGGCUCAGUAAUACACGGUCCCUACCCUUAGGCGCGGAUAACCGCGGCUGCGGAUAAUCCGCUAAGGAUGGGACCGUGCCCGAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCUAAGACCGGUGCGGGCCGUGGGAGCGGGAGCCCGGAGAUGGACCCUGAGACAAGGGUCCAGGCCCUACGGGG-GCAGCAGGCG-GAAAACUCCGCUAUGCGCGCGCGACGGGGUCACCCUGAGGGGAGGGCAA-GUCUG-GUGUCAGCCGC-G-GGUAAUAC-CAGCUCCCCGAGUGGUCGGGGCGAUUAUUGGGCCUAAAGCGUCCGUAGCCGGGGCGGGAGAGGCUGGCGGUAUUCCCGAGGGAGGGGCGAAAUCCUGAGAUCUCGGGAGGACCACCAGUGGCGAAGGCGGCCAGCUGGAACGCGCCCGACGGUGAGGGACGAAAGCCGGGGGAGCGAACCGGUUUAGAUACCCGGGUAGUCCCGGCCGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGGAUGCUGCGGUUUAAUUGGAGUCAACCCCGGGAACCUUACCAGGGGAGGAGGUGCAUGGCCGUCGCCAGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGCAACGAGCGAGACCCCCGGACUGCCGCCGGCG-GAGGAAGGAGGGGGCUACGGCAGGUCAGU-AUGCCCCGCCCCUGGGCCACACGCGGCAUACAAUGCUCAGUUGGGAUCGAGGGUUGCAAACCCUCGUGAACCCGGAAUCCCUAGUAACUSCGCGUCACC-AACGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+pSL_50       GGACAU-G-ACCGCUGUCGGGGUGGGACUAA-GCCAUGCGAGUCACGUGGCGCACGGCUCCGUAAAACGUGGCUACUACCCCCAGGCGAGAAUAACCCCGGCUGGGGCUAAUUCUCUGGGGAUGUGGCCACGGCCUAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCGAUGACGGGUACGGGCCCUGAGAGGGGGAGCCCGGAGAUGGCCACUGAGACAAGGGGCCAGGCCCUACGGGG-GCAGCAGGCGCGAAACCUCCGCAAUGGGCGCCCGACGGGGUCACCUCGAGCGGGGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAACAC-CAGCCCCGCGAGUGGUCAUGACGUUUAUUGGGCUUAAAGCAUCCGUAGCCGGGGUGGGAGAAGCCGGGGGUACUCCAGGGGUAGGGGCAAAAUCCUAUAAUCCCUGGAGGACCACCAGUGGCGAAGGCGCCCGGCUAGAACACGCCCGACGGUGAGGGAUGAAAGCUGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCCAGCUGUAAACGAUGCAGGCUAAGCCUGCCGCCUGGGGAGUACGGUCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGGGCACCACAAGGGGUGAAUGCUGCGGUUUAAUUGGAGUCAACGCCGGGAACCUUACCAGGGGAGGAGGUGCAUGGCCGUCGCCAGUUCGUGCCGUGAGGUGUCCGGUUAAGUCCGGAAACGAACGAGACCCCUAGACUGCCGGCGUCG-GAGGAAGGAGGGGGCUACGGCAGGUCAGU-AUGCCCCGCCCCUGGGCUACACGCGGCAUGCAAUGCUAAGUUGGAAUCGAGGGCUGCAAGCCCUCGUGAAUACGAAAUCCCUAGUAACCGCGUGUCAUC-AUCGCGCGGUGAAUACGUCCCUGCCCCU   [922]
+pJP_78       GGAGGG-A-ACCCCUAUCGGGUUCAGACUAA-GCCAUGCAAGUCGCGCGGCGCACGGCUCAGUAAUACGCGGUCCCUACCCUGGGGCCGGGAUAACCUCGGCUGAGGCUAAUCCCCCCAGGAUGGGACCG-GGCCUAUCAGGUAGUAGGUGGGGUAACGGCCCACCUAGCCUACGACGGGUACGGGCCCUGAGAGGGGGA-CCCGGAGAUGGGCACUGAGACAAGGGUCUAGGCCCUAAGGGGUGCAGCAGGCGCGAAGAUUCCGCAAUGCCCGGGCGACGGAGUGAACCCGAGGGGAGGGCAA-GGCUG-GUGGCAGCCGCCGCGGUAAAAC-CAGCUCCCCGAGGGGUUCCCACGCAUACUGGGCCUAAAGCGUCCGUAGCCGGAGCGGGAGGAGCCGGGGGUAUUCCCGGGGGAGCGGUAAAAUGCGUAGAUCCCGGGAGGACCACCAGUGGCGAAGGCGCCCGGCUUGAACGCGUCCGACGGUGAGGGACGAAAGCUGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCAGCCGUAAACGAUGCCGGCUAAGCCGGCCGCCUGAGGAGUACGGCCGCAAGGCUGAAACUUGAAGGAAUUGACGGGGGGGCACCACAAGGGGUGAAGCCUGCGGCUUAAUUGGACUCAACGCCGGGAAUCUUACCGGGGGGGGUGGUGCAUGGCCGUCGCCAGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGCAACGAGCGAGACCCCCGGACCGCCUCCGGAG-GAGGAAGGUGGGGGCUACGGCAGGUCAGU-AUGCCCCUCCCCCGGGCCGCACGCGGGCUGCAAUGCGUAGUUGGGAUCGAGGGCUGCAAGCCCUCGUGAACCCGGAAUCCCUAGUAACCGCGGUUCUCC-AUACCGCGGUGAAUACGUCCCUGCCCCU   [922]
+pJP_27       GGAGGG-A-ACCCCUAUCGGGCUCGCACUAA-GCCAUGCGAGUCGCGUGGCGCACGGCUCCGUAAUACACGGUCCCUGUCCUGGGGCCGGGAUAACCUCGGCUGAGGCCAAUACCCCCAGGGUGGGACCGUGGCCUAUCAGGUAGUAGGUGGGGUAACGGCCCACCUAGCCUAAGACGGGUACGGGCUCUGAGAGGAGGAGCCCGGAGAUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGG-GCAGCAGGCGGGAAACUUCCCCAAUGCGCGCGUGAGGGAGUGAGCCCGAGGGGAGGGUAA-GGCUG-GUGCCAGCCGCCGCGGUAAAAC-CAGCUCCCCGAGGGGUUCCCACGCAUACUGGGCCUAAAGCGUCCGUAGCCGGAGCGGGAGGGGCCGAGGGUAUUCCGGGGGGAGCGGUAAAAUGCGUAGAUCCCCGGAGGACCACCAGUGGCGAAGGCGCUCGGCUGGAACGCGUCCGACGGUGAGGGACGAAAGCUGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCAGCCGUAAACGAUGCCGGCUAAGCCGGCCGCCUGAGGAGUACAGCCGCAAGGCCGAAACUUAAAGGAAUUGACGGGGGGGCACCACAAGGGGUGAAGCCUGCGGCUCAAUUGGACUCAACGCCGGGAAUCUUACCGGGGGGGGUGGUGCAUGGCCGUCGCCAGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGCAACGAGCGAGACCCCCGGACUGCCGGCGCCG-GAGGAAGGAGGGGGCUACGGCAGGUCAGU-AUGCCCCUCCCCCGGGCCGCACGCGGGCUGCAAUGCGUGGUUGGGAUCGAGGGUUGCAAGCCCUCGUGAACCCGGAAUCCCUAGUAACCGCGGUUCUCC-AUACCGCGGUGAAUACGUCCCUGCCCCU   [922]
+pSL_4        GGACCC-C-ACUGCUAUCGGGGUGGGACUAA-GCCAUGCAAGUCAUAUGGCGUACGGCUCAGUAACACGUAGCUCCUGCCCUAGAGGGGGGAUAUCCUCGGCUGAGGGUAAACCCUCUAGGAUGGGGCUGCGGCCGAUCAGGUAGUUGGUGAGGUAACGGCUCACCAAGCCUGUGACCGGUGCGGGCCGUGAGAGCGGGAGCCCGGAGAUGGACUCUGAGACAAGAGUCCAGGCCCUACGGGG-GCAG-AGGCGCGAAACCUCCGCAAUG-GCGCGCGACGGGGUCGCCCCGAGGGGGGGGCAA-GACGG-GUGUCAGCCGCCGCGGUAAUAC-CCGCUCCCCGAGUGGUGGGGACUUUUAUUGGGCUUAAAGCGUCCGUAGCGGGGGUGGAAGAAGCCACCGGUACUCCUAGGGUAGGGGUAAAAUCCUCUGAUCCUAGGAGGACCACCAGUGGCGAAGGCGGGUGGCUAGGACACGCCCGACCGUGAGGGACGAAAGCUGGGGGAGCAAACGGGAUUAGAUACCCCGGUAGUCCCAGCUGUAAACGAUGCAGACUAAGUCUGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGGAUGUUGCGGCUUAAUUGGAGUCAACGCCGGAAACCUUACCAGGGGAGGAGGUGCAUGGCCGUCGCCAGUGCGUGCCGUGAGGCGUCCUGUUAAUUCAGGCAACGGACGAGACCCCCAGACUGCCGCCGGCG-GAGGAAGGAGGGGGCCACGGCAGGUCAGU-AUGCCCCGCCCCUGGGCCGCACGCGACAUUCAAUGCUAGGUUGGGAUCGAGGGUUGCAAGCCCUCGUGAACGUGGAAUCCCUAGUAACCGCGCGUUACC-AUUGCGCGGUGAAUACGUCCCCGCUCCU   [922]
+pJP_96       GGACCC-G-ACUGCUAUGGGGGUGGGACUAA-GCCAUGCGAGUCGCGCGGCGUACGGCUCAGUAAAACGUGGCUCCUACCCUAGGGUGGGGAUAACCCCGGCUGGGGAUAACCCCCCUAGGAUGGGGCCACGUCCCAUCAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCUAUAACGGGUACGGGCCGUGAAAGCGGGAGCCCGGAGAUGGGCACUGAGACAAGGGCCCAGGC--UACGGG-GGCAGCAGGCGCGAAACCUACACAAUGUGCGCACGAUGUGGUCACCCCCAGCGGGGGGCAA-GUCUG-GUGUCUGCCGCCGCGGUAAUAC-CAGCUCCGCGAGUGGUCGGGGCGUUUAUUGGGCCUAAAGCGUCCGUAGCCGGGGCGGGAGAGGCUGAGGGUACUCCUGGGGUAGGGGCGAAAUCCUAUAAUCCCAGGAGGACCACCAGUGGCGAAGGCGCUCAGCUGGAACGCGCCCGACGGUGAGGGACGAAAGCCGGGGGAGCGAAGGGGAUUAGAUACCCCCGUAGUCCCGGCUGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGAAGCCUGCGGUUUAAUUGGAGUCAACCCCGAGAACCUUACCGGGGGAGGAGGUGCAUGGCCGUCGCCAGUUCGUGCCGUGAGGUGUCCGGUUAAGUCCGGCAACGAACAAGACCCCCAGACUGCCGCCGGCG-GAGGAAGGAGGGGGCCACGGCAGGUCCGU-AUGCCCCGCCCCCGGGCCACACGCGGGCUGCAAUGCUCAGUUGGGAUCGAGGGCUGCAAGCCCUCGUGAACAUGGAAUCCCUAGUAACCGCGCGUCACC-AACGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+pSL_12       GGACCC-G-ACUGCUAUCGGAAUGGGACUAA-GCCAUGCGAGUCAUAUGGCGAACGGCUCAGUAACACGUGGCCCCUACCCUCAAGUGCGGAUAACCCCGGCUGGGACUAAUCCGUUGAGGAUGGGGCUACGGCCGAUCAGGCUGUUGGUGGGGUAACGGCCCACCAAACCUUUGACCGGUACGGGCUUUGAGAGAAGGAGCCCGGAGAUGGACACUGAGACAAGGGUCCAGGCCCUAUGGGGCCCAGCAGGCGCGAAACCUCCACAAUGGGCGCCUGAUGGGGUCAUUCCGAGGGGAGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAAUAC-CAGCUCCCCGAGUGGUCGGGACGGUUAUUGGGCCUAAAGCAUCCGUAGCCGGGGCGGGAGAGGUGGACGGUACUCCAAGGGUAGGGGUAAAAUCCUCUGAUCCUUGGAAGACCACCGGUGGCGAAGGCGGUCCACCAGAACGCGCCCGACGGUGAGGGAUGAAAGCUGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCCAGCCGUAAACGAUGCGAGCUAAGCUCGCCGCCUGGGGAGCACGGACGCAAGUCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGAAGCCUGCGGUUCAAUUGGAGUCAACGCCGGGAACCUCACCAGGGGAGGAGGUGCAUGGCCGUCGCCAGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGCAACGAUCGAGACCCUCGGACUGCCGCCAGGG-CAGGAAGGAGAGGGCUACGGCAGGUCAGU-AUGCCCCGCCCCUGGGCCACACGCGGGCUGCAAUGCCCAGUUGUGAUUGAGGGUUGCAAACCCUCAUGAAUAUGGAAUCCCUAGUAACCGCGUGACACC-AUCGCGCGAUGAAUACGUCCCUGCUCCU   [922]
+marineSBA    GGACCU-G-ACUGCUAUCGGAUUGAUACUAA-GCCAUGCGAGUCUGAAGGCAUACGGCUCAGUAACGCGUAGUCCCUAACCUAUGGCGGGAAUAACCUCGGCUGAGAAUAAUGCCCCGUAGAUGGGACUGCGGCCUAUCAGUUUGUUGGUGAGGUAAUGGCCCACCAAGACUAUUACAGGUACGGGCUCUGAGAGGAGUAGCCCGGAGAUGGGUACUGAGACACGGACCCAGGGCCUAUGGGGCGCAGCAGGCGAGAAAACUUUGCAAUGUGCGCACGACAAGGUUAAUCCGAGGGGUGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAAAAC-CAGCACCUCAAGUGGUCAGGAUGAUUAUUGGGCCUAAAGCAUCCGUAGCCGGAGUGGGAGAAGUAGACGGUACUCGGUAGGAAGUGGUAAAAUGCUUUGAUCUAUCGAUGACCACCUGUGGCGAAGGCGGUCUACUAGAACACGUCCGACGGUGAGGGAUGAAAGCUGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCAGCUGUAAACUAUGCAAACUCAGUUUGCCGCCUGGGAAGUACGUACGCAAGUAUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGAAGCUGCGGUUCAAUUGGA--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------   [922]
+Sulfolobu    GGACCC-G-ACCGCUAUCGGGGUAGGGAUAA-GCCAUGGGAGUCACGUGGCGGACGGCUGAGUAACACGUGGCUCCUACCCUCGGGCGGGGAUAACCCCGGCUGGGGAUAAUCCCCCGAGGAUGGGGCUACGGCCCAUCAGGCUGUCGGUGGGGUAAAGGCCCACCGAACCUAUAACGGGUAGGGGCCGUGGAAGCGGGAGCCUCCAGUUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGGCGCACCAGGCGCGAAACGUCCCCAAUGCGCGCGUGAGGGCGCUACCCCGAGCGGGGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAAUAC-CAGCUCCGCGAGUGGUCGGGGUGAUUACUGGGCCUAAAGCGCCUGUAGCCGGGGCGGGAGAGGCGGGGGGUACUCCCGGAGUAGGGGCGAAAUCCUUAGAUACCGGGAGGACCACCAGUGGCGGAAGCGCCCCGCUAGAACGCGCCCGACGGUGAGAGGCGAAAGCCGGGGCAGCAAACGGGAUUAGAUACCCCGGUAGUCCCGGCUGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGUCGCAAGACUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGGAACCUGCGGCUCAAUUGGAGUCAACGCCUGGAAUCUUACCGGGGGAGGAGGUGCAUGGCCGUCGCCAGCUCGUGUUGUGAAAUGUCCGGUUAAGUCCGGCAACGAGCGAGACCCCCAGACUGCCGGCGCCG-GAGGAAGGAGGGGGCCACGGCAGGUCAGC-AUGCCCCGCUCCCGGGCCGCACGCGGGUUACAAUGCGCAGUUGGGAUCGAGGGCUGAAAGCCCUCGUGAACGAGGAAUCCCUAGUAACCGCGGGUCAAC-AACCCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Pyrodicti    GGGCCC-G-ACCGCUAUCGGGGUGGGACUAA-GCCAUGGGAGUCGCGCGGCGGACGGCUGAGUAACACGUGGCCCCUACCCUCGGGCGGGGAUAACCCCGGCUGGGGCUAAUCCCCCGAGGAUGGGGCUGCGGCCCAUCAGGUAGUUGGCGGGGUAACGGCCCGCCAAGCCGAUAACGGGUAGGGGCCGUGAGAGCGGGAG-CCCCAGAUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGGCGCACCAGGCGCGAAACCUCCGCAAUGCGGCCGUGACGGGGUCACCCCGAGCGGGGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAAUAC-CAGCCCCGCGAGCGGUCGGGAUGAUUACUGGGCCUAAAGCGCCCGUAGCCGGGGCGGGAGAGGCCGAGGGUACUCCCGGGGUAGGGGCGAAAUCCGAUAAUCCCGGGAGGACCACCAGUGGCGAAGGCGCUCGGCUGGAACGCGCCCGACGGUGAGGGGCGAAAGCCGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCGGCUGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGG-AGCACCACAAGGGGUGGAGCCUGCGGCUUAAUUGGAGUCAACGCCGGGAAUCUUACCGGGGGAGGAGGUGCAUGGCCGUCGCCAGCUCGUGCCGUGAGGUGUCCGGUUAAGUCCGGCAACGAGCGAGACCCCCAGACUGCCG-CGGCG-GAGGAAGGAGGGGGCCACGGCAGGUCAGC-AUGCCCCGCCCCCGGGCUGCACGCGGGCUACAAUGCGCAGUUGGGAUCGAGGGCUGCAAGCCCUCGUGAACGCGGAAUCCCUAGUAACCGCGCGUUAGC-AUCGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Desulfuro    GGUCCC-G-ACCGCUAUCGGGGUGGGGCUAA-GCCAUGGGAGUCACGUGGCGGACGGCUGAGUAACACGUGGCUCCUACCCUCGGGGGGGGAUAACACCGGCUGGUGCUAAUCCCCCGAGGAUGGGGCUACGGCCCAUUAGGUUGUUGGCGGGGUAACGGCCCGCCAAGCCGAUAAUGGGUAGGGGCCGUGAGAGCGGGAGCCCCCAGAUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGGCGCACCAGGCGCGAAACCUCCGCAAUGCGGCCGUGACGGGGCCACCCCGAGCGGGGGGCAA-GUCUG-GUGUCAGCCGCCGCGGUAAUAC-CAGCCCCGCGAGUGGUCGGGACGAUUAUUGGGCCUAAAGCGCCCGUAGCCGGGGUGGGAGAGGCCGAGGGUACUCCCGGGGUAGGGGCGAAAUCCUAUAAUCCCGGGAGGACCACCAGUGGCGAAGGCGCUCGGCUGGAACACGCCCGACGGUGAGGGGCGAAAGCCGGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCCGGCUGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGGAGCCUGCGGUUCAAUUGGAGUCAACGCCGGGAAUCUCACCGGGGGAGGAGGUGCAUGGCCGUCGCCAGCUCGUGCUGUGAAGUGUCCGGUUAAGUCCGGAAACGAGCGAGACCCCCAGACUGCCGCCGGCG-GAGGAAGGAGGGGGCCACGGCAGGUCAGC-AUGCCCCGCCCCCGGGCUACACGCGGGCUACAAUGCGUGGUUGGGAUCGAGGGCUGCAAGCCCUCGUGAACGAGGAAUCCCUAGUAACCGCGCGUCAAC-AUCGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Thermopro    GGACCU-G-ACCGCUAUCGGGGUGGGGCUAA-GCCAUGCGAGUCGCGCGGCGCACGGCUCAGUAACACGUACCCCCUAACCUCGGGGGGGGACAACCCCGGCUGGGGCUGAUCCCCCGAGGGUGGGGGUACGGCCCAUCAGGUUGUUGGCGGGGUAACGGCCCGCCAAGCCGAAGACGGGUAGGGGCGGUGAGAGCCGGAGCCCCGAGAUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGGUGCAGCAGGCGCGAAUACUCCGCAAUGCGGCCGCGACGGGGCCACCCCGAGCGGGGGGUAA-GUCUG-GUGUCAGCCGCCGCGGUAAUAC-CAGCCCCGCGAGUGGUCAGGGUGAUUACUGGGCUUAAAGCGCCCGUAGCCGGGGCGGGAGAGGCCGCCGGUACUCCGGGGGUAGGGGCGAAAUCCUAUAAUCCCCGGAGGACCACCAGUGGCGAAAGCGGGCGGCCAGAACGCGCCCGACGGUGAGGGGCGAAAGCCGGGGGAGCAAAGGGGAUUAGAUACCCCUGUAGUCCCGGCCGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGGGCACCACAAGGGGUGAAGCUUGCGGCUUAAUUGGAGUCAACGCCGGAAACCUUACCCGGGGAGGAGGUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUCCGGUUAAGUCCGGCAACGAGCGAGACCCCCAGACUGCCGGCGCCG-GAGGAAGGAGGGGGCGACGGCAGGUCAGU-AUGCCCCGCCCCGGGGCUGCACGCGAGCUGCAAUGCCCAGUAGGGAUCGAGGGCUGCAAGCCCUCGUGAACGUGGAAUCCCUAGUAACCGCGUGUCACC-AACGCGCGGUGAAUACGUCCCUGCCCCU   [922]
+Thermofil    GGACCC-G-ACCGCUAUCGGGGUGGGGCUAA-CCCAUGGAAGUCGGCCGGCGGACGGCUCAGUAGCACGUGGCUCCUACCCUCGGGGGGGGAUAACCCCGGCUGGGGAUAAACCCCCGAGGAUGGGGCUGCGCCCUAUCAGGUAGUUGGCGGGGUAACGGCCCGCCAAGCCGAUAACGGGUGGGGGCCGUGAGAGCGGGAGCCCCGAGAUGGGCACUGAGACAAGGGCCCAGGCCCUACGGGGUGCACCAGGGGCGAAACUUCCGCAAUGCGGCCGUGACGGAGUCACCCCGAGCGGGGGGCAA-GCUUG-GUGUCAGCCGCCGCGGUAAUAC-CAACCCCGCGAGUGGUCGGGACGUUUAUUGGGCCUAAAGCGUCCGUAGCCGGGGCGGGAGAGGCCGGGGGUACUCCUGGGGUAGGGGCGAAAUCCUAUAAUCCCAGGAGGACCACCAGUGGCGAAGGCGCCCGGCUAGCACGCGCCCGACGGUGAGGGACGAAAGCUGGGGGAGCAAAGGGGAUUAGAUACCCCCGUAGUCCCAGCUGUAAACGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAAGGGGUGAAGCUUGCGGUUUAAUUGGAGUCAACGCCGGAAACCUUACCGGGGGAGGAGGUGCAUGGCCGUCGCCGGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGGAACGAGCGAGACCCCCGGACUGCCGGCGCCG-GAGGAAGGUGGGGGCUACGGCAGGUCAGU-AUGCCCCGCCCCCGGGCUACACGCGAGCUGCAAUGCUCAGUAGGAAUCGAGGGCUGCAAGCCCUCGUGAACGUGGAAUCCCUAGUAACCGCGUGUCACC-AACGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Methanopy    GGAGGC-C-ACCGCUAUCGGGGUCCGACUAA-GCCAUGCAAGUCGGCCGGCGGACGGCUCAGUAACACGUGGGUCCUACCCUCGGGCGGGGAUAACCCCGGGUGGGGCUAAUCCCCCGAGGAUGGGCCUGCGGCCGAUUAGGUAGUUGGCGGGGUAACGGCCCGCCAAGCCGAUAAUCGGUACGGGCGGUGAGAGCCGGAGCCCGGAGACGGGGACUGAGACAAGGCCCCGGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCGGCCGCGACGGGGGGACCCCGAGCGGCGGGCAA-GACCG-CUGCCAGCCGCCGCGGUAAUAG-CGGCGCCGCAAGUGGUGGCCGCUUUUAUUGGGCCUAAAGGGGCCGUAGCCGGACCGGGAGAGGCCGGAGGUACCCCCGGGGUAGGGGUGAAAUCCUGUCAUCCCGGGGGGACCGCCAGUGGCGAAGGCGUCCGGCUGGAACGGGUCCGACGGUGAGGGCCGAAAGCCGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCGGCUGUAAACGAUGCGGACUAAGUCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAACCGGUGGAGCCUGCGGUUUAAUUGGAUUCAACGCCGGAAACCUUACCGGGGGAGGAGGUGCAUGGCCGCCGUCAGCUCGUGCCGUGAGGUGUCCUGUUAAGUCAGGUAACGAGCGAGACCCCCGGAUCGCCGCCGGCG-GAUGAAAGUGGGGGCGACGGCAGGUCCGU-AUGCCCCGCCCCCGGGCUACACGCGGGCUACAAUGCGUAGUUCGGAUUGCGGGCUGCAAGCCCGCAUGAAGGUGGAAUCGGUAGUAACCGUGCCUCAGA-AUGGCACGGUGAAUACGUCCCUGCUCCU   [922]
+Thermococ    GGAGGC-C-ACUGCUAUGGGGGUCCGACUAA-GCCAUGCGAGUCGGCCGGCGGACGGCUCAGUAACACGUCGGUCCUACCCUCGGGGGGGGAUAACCCCGGCUGGGGCUAAUCCCCCGAGGAUGGGCCGGCGGCCGAUUAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCGAAGAUCGGUACGGGCCAUGAGAGUGGGAGCCCGGAGAUGGACACUGAGACACGGGUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCGGCCGCGACGGGGGGACCCCCAGGGCUGGGCAA-GGCCG-GUGGCAGCCGCCGCGGUAAUAC-CGGCGGCCCGAGUGGUGGCCGCUAUUAUUGGGCCUAAAGCGUCCGUAGCCGGACCGGGAGAGGCCGGGGGUACCCCUGGGGUAGGGGUGAAAUCCUAUAAUCCCAGGGGGACCGCCAGUGGCGAAGGCGCCCGGCUGGAACGGGUCCGACGGUGAGGGACGAAGGCCAGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCUGGCUGUAAAGGAUGCGGGCUAAGCCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACUACAAGGGGUGGAGCGUGCGGUUUAAUUGGAUUCAACGCCGGGAACCUCACCGGGGGAGGAGGUGCAUGGCCGCCGUCAGCUCGUACCGUGAGGCGUCCACUUAAGUGUGGUAACGAGCGAGACCCGCGGACCGCCGGCGCCG-GAGGAAGGAGCGGGCGACGGUAGGUCAGU-AUGCCCCGCCCCCGGGCUACACGCGCGCUACAAUGCCCAGUUCGGAUCGCGGGCUGCAAGCCCGCGUGAAGCUGGAAUCCCUAGUACCCGCGUGUCAUC-AUCGCGCGGCGAAUACGUCCCUGCUCCU   [922]
+Archaeogl    AGAGGC-C-GCUGCUAUCCGGCUGGGACUAAGCCAUGCGAGUCAGGCCGGCGGACGGCUCAGUAACACGUGGACCCUGCCCUCGGGGGGGGAUAACCCCGGCUGGGGCUAAUCCCCCGAGGAUGGGUCUGCGGCGGAUUAGGUUGUUGGUGGGGUAACGGCCCACCAAGCCGAAGAUCCGUACGGGCCAUGAGAGUGGGAGCCCGGAGAUGGACCCUGAGACACGGGUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCGGCCGCGACGGGGUCAGCCGGAGGGCCGGGCAA-GGCCG-GUGGCAGCCGCCGCGGUAAUAC-CGGCGGCCCGAGUGGCGGCCACUUUUAUUGGGCCUAAAGCGUCCGUAGCCGGACCGGGAGAGGCCGGGGGUAUUCCCGGAGUAGGGGUGAAAUCCUGUAAUCCCGGGAGGACCACCUGUGGCGAAGGCGCCCGGCUGGAACGGGUCCGACGGUGAGGGACGAAGGCCAGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCUGGCUGUAAACGAUGCGGACUAAGUCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACUACAACGGGUGGAGCCUGCGGUUUAAUUGGAUUCAACGCCGGGAAGCUUACCGGGGGAGGUGGUGCAUGGCCGCCGUCAGUUCGUACUGUGAAGCAUCCUGUUAAGUCAGGCAACGAGCGAGACCCGCGGACUGCCGGCGCCG-GAGGAAGGUGCGGGCAACGGCAGGUCCGU-AUGCCCCGCCCCCGGGCUACACGCGGGCUACAAUGCUAACCUGGGAUCGAGGGCUGCAAGCCCUCGUGAACCUGGAAUCCGUAGUAAUCGCGCCUCAAA-AUGGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Thermopla    GGCGGU-C-ACUGCUAUCAGGUUCCGACUAAGCCAUGCAAGUCAGGCCGGCGAACAGCUCAGUAACACGUGGAUUUUACCCUCAGGGGGGCAUAACCUCGGCUGAGGCUAAUUCCCUGAGGAUAAGUCUGCGGCCUAUCAGGUAGUAGGUGGUGUAAAGGACCACCUAGCCUAAGACGGGUACGGGCCCUGAAAGGGGGAGCCCGGAGAUGGACUCUGAGACAACAGUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAAACUGUGCAAUGCGCGCGCGACACGGGGAGCCUGAGGGCUGGGCAA-GACGG-GUGCCAGCCGCCGCGGUAACAC-CCGCAGCUCGAGUGGUGAUCACUUUUAUUGAGUCUAAAGCGUUCGUAACCGGACCGGGUGAGGUUGAAUGUACUUUCAGGGUAGGGGUAAAAUCCUGUAAUCCUGAAAGGACGACCGGUGGCGAAAGCGUUCAACUAGAACGGAUCCGACGGUGAGGGACGAAGGCUAGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCUAGCUGUAAACGCUGCCCACUUAGUGGGUCACUUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCGCAACGGGAGGAGCGUGCGGUUUAAUUGGAUUCAACGCCGGAAAACUCACCGGGAGAGGUGGUGCAUGGCCGUCGUCAGCUCGUACCGUAGGGCGUUCACUUAAGUGUGAUAACGAGCAAGACCCCCAGACCGCCAGCGCUG-GAGGAAGGAGGGGUCGACGGCAGGUCAGU-ACGCCCCGCUCCCGGGCUACACGCGCGCUACAAAGCGUAGUCAGGACUGAGGGCUGUAAGCCCUCACGAAUGUGGAUUCCGUAGUAAUCGUAGGUCAAC-AGCCUACGGUGAAUAUGCCCCUGCUCCU   [922]
+Haloferax    GGAGGU-C-AUUGCUAUUGGGGUCCGAUUUA-GCCAUGCUAGUUACGUGGCGAAAAGCUCAGUAACACGUGGCCACUACCCUACAGGAACGAUAACCUCGGCUGAGGCUAAUAGUUGUAGGAUGUGGCUGCGGCCGAUUAGGUAGACGGUGGGGUAACGGCCCACCGUGCCGAUAAUCGGUACGGGUUGUGAGAGCAAGAGCCCGGAGACGGAAUCUGAGACAAGAUUCCGGGCCCUACGGGGCGCAGCAGGCGCGAAACCUUUACACUGCACGUGCGAUAAGGGGACCCCAAGAGCUGGGCAA-GACCG-GUGCCAGCCGCCGCGGUAAUAC-CGGCAGCUCAAGUGAUGACCGAUAUUAUUGGGCCUAAAGCGUCCGUAGCCGGACCGGAAGGCUCGAGGGGUACGUCCGGGGUAGGAGUGAAAUCCCGUAAUCCUGGACGGACCACCGAUGGCGAAAGCACCUCGAGAAGACGGAUCCGACGGUGAGGGACGAAAGCUAGGGUCUCGAACCGGAUUAGAUACCCGGGUAGUCCUAGCUGUAAACGAUGCUCGCUAAGCGAGCCGCCUGGGAAGUACGUCCGCAAGGAUGAAACUUAAAGGAAUUGGCGGGGGAGCACUACAACCGGAGGAGCCUGCGGUUUAAUUGGACUCAACGCCGGACAUCUCACCAGCUCAGGAGGUGCAUGGCCGCCGUCAGCUCGUACCGUGAGGCGUCCUGUUAAGUCAGGCAACGAGCGAGACCCGCAGACUGCCGCUAGCG-GAGGAAGGAACGGGCAACGGUAGGUCAGU-AUGCCCCGGAGCUGGGCUACACGCGGGCUACAAUGCGUAGUUCGGAUUGAGGGCUGAAAGCCCUCAUGAAGCUGGAUUCGGUAGUAAUCGCAUUUCAAU-AGAGUGCGGUGAAUACGUCCCUGCUCCU   [922]
+Methanoba    GGAGGC-C-ACUGCUAUUGGGUUUCGAUUAA-GCCAUGCAAGUCACGUGGCGUACGGCUCAGUAACACGUGGAUCCUAACCUUAGGCUGGGAUAACCCUGGCUGGGGAUAAUACCCUAAGGAUGGAUCUGCGGCAGAUUAGGUAGUUGGCGGGGUAAAUGCCCACCAAGCCAGUAAUCUGUACGGGUUGUGAGAGCAAGAGCCCGGAGAUGGAACCUGAGACAAGGUUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCACGUGCGACGGGGGAAACCCAAGAGCUGGGCAA-GACCG-GUGCCAGCCGCCGCGGUAACAC-CGGCAGCUCAAGUGGUGGCCGUUUUUAUUGGGCCUAAAGCGUUCGUAGCCAGGCCGGGAGAGGUUAGCGGUACUCCCGGGGUAGGGGUGAAAUCCUAUAAUCCCGGGAGGACCACCUGUGGCGAAGGCGGCUAACUGGAACGGGCCUGACGGUGAGUAACGAAAGCCAGGGGCGCGAACCGGAUUAGAUACCCGGGUAGUCCUGGCCGUAAACGAUGUGGACUUAGUCCACCGCCUGGGAAGUACGGUCGCAAGACUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAACGCGUGGAGCCUGCGGUUUAAUUGGAUUCAACGCCGGACAUCUCACCAGGGGAGGAGGUGCAUGGCCGCCGUCAGCUCGUACCGUGAGGCGUCCUGUUAAGUCAGGCAACGAGCGAGACCCACGGACCGCCAGUACUG-GAGGAAGGAGUGGACGACGGUAGGUCCGU-AUGCCCCGCCCCUGGGCUACACGCGGGCUACAAUGCUUAGUUCGGAUUGAGGGCUGUAAGCCCUCAUGAAGCUGGAAUGCGUAGUAAUCGCGUGUCAUA-ACCGCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Methco.va    GGAGGC-U-ACUGCUAUUGGGGUUCGACUAA-GCCAUGCGAGUCAUAUGGCGGACGGCUCAUUAACACGUGGUUCUUAACCUCAGGGGAGCAUAACCUUGGCUGAGGAUAAUUCUCCGAGGAUAGGACUGCGCUCGAUUAGGUAGUUGGUGGGGUAAUGGCCCACCAAGCCUACGAUCGAUACGGGCCUUGAGAGAGGGAGCCCGGAGAUGGGGACUGAGACACGGCCCCAGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCACGUGCGACGGGGGGACCCCAAGGGCUGGGCAA-GUUCG-GUGCCAGCAGCCGCGGUAAUAC-CGACGGCCCGAGUGGUAGCCACUCUUAUUGGGCCUAAAGCGUCCGUAGCCGGACCGGGAGAGGACAAGGGUACUCCAGGGGUAGCGGUGAAAUGUGUUGAUCCUUGGAGGACCACCUAUGGCGAAGGCACUUGUCUGGAACGGGUCCGACGGUGAGGGACGAAAGCCAGGGGCGCGAACCGGAUUAGAUACCCGGGUAGUCCUGGCCGUAAACUCUGCGAACUAAGUUCGCCGCCUGGGGAGUACGGUCGCAAGACUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAACGGGUGGAGCCUGCGGUUUAAUUGGAUUCAACGCCGGGCAUCUCACCAGGAGAGGUGGUGCAUGGCCAUCGUCAGCUCGUACCGCGAGGCGUCCUGUUAAGUCAGGUAACGAGCGAGACCCGUGGACCGCUAGCGCUA-GAGGAAGGAGCGGGCAACGAUAGGUCCGC-AUGCCCCGCUCCUGGGCUACACGCGGGCUACAAUGCGUAGUUCGGAUCGUGGGCUGUAAGCCCACGUGAAGCUGGAAUCCGUAGUAAUCGCAGUUCAUA-AUACUGCGGUGAAUGUGUCCCUGCUCCU   [922]
+Methco.ja    GGAGGC-C-ACUGCUAUCGGGGUCCGACUAA-GCCAUGCGAGUCGGCCGGCGCACGGCUCAGUAACACGUGGCUCCUACCCUCGGGGGGGGAUAACCUCGGCUGAGGCUAAUCCCCCGA--AUGGGGCUGCGGCGGAUUAGGUAGUUGGUGGGGUAACGGCCCACCAAGCCUACGAUCCGUACGGGCCCUGAGAGGGGGAGCCCGGAGAUGGACACUGAGACACGGGUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAACCUCCGCAAUGCGCGCGCGACGGGGGGACCCCGAGGGCUGGGCAA-GUCCG-GUGCCAGCAGCCGCGGUAAUAC-CGGCGGCCCAAGUGGUGGCCACUGUUAUUGGGCCUAAAGCGUCCGUAGCCG-ACCGGGAGAGGCC-GGGGUACCCCAGGGGUAGCGGUGAAAUGCGUUGAUCCCUGGGGGACCACCUGUGGCGAAGGCGCCCGGCUGGAACGGGUCCGACGGUGAGGGACGAAGGCCAGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCUGGCUGUAAACUCUGCGGACUAAGUCCGCCGCCUGGGGAGUACGGUCGCAAGACUGAAACUUAAAGGAAUUGGCGGGGGAGCACUACAACGGGUGG--ACCGCGGUUUAAUUGGAUUCAACGCCGGGCAUCUUACCAGGGGAGGUGGUGCAUGGCCGUCGUCAGCUCGUACCGUGAGGCGUCCUGUUAAGUCAGGUAACGAGCGAGACCCGUG-ACCG-CGGCGCCG-GAGGAAGGUGCGGGCAACGACAGGUCCGC-AUGCCCCGCCCCUGGGCUACACGCGGGCUACAAUGCGUAGUCCGGAUCGAGGGCUGUAAGCCCUCGUGAAGCCGGAAUCCGUAGUAAUCGCGCCUCACC-AUGGCGCGGUGAAUGCGUCCCUGCUCCU   [922]
+Methanosp    AGAGGC-C-ACUGCUAUCGGGGUUUGACUAA-GCCAUGCGAGUCGAUCGGCGUACUGCUCAGUAACACGUGGACUCUGCCCUGAAGGGAGGAUAAUCCCGGCUGGGGGUAAUACUUUCAGGAUGAGUCUGCGGCCGAUUAGGUAGUUGUUGGGGUAACGGCCCAACAAGCCUGUCAUCGGUACGGGUUGUGGGAGCAAGAGCCCGGAGAUGGAUUCUGAGACACGAAUCCAGGCCCUACGGGGCGCAGCAGGCGCGAAAACUUUACCAUGCGGCCGUGAUAAGGAAACCCCGAGGGCCGGGCAA-GACCG-GUGCCAGCCGCCGCGGUAAUAC-CGGCGGCUCGAGUGGUGGCCGCUAUUACUGGGCUUAAAGGGUCCGUAGCUGGACCGGGAGAGGUGAGAGGUACUGCCGGGGUAGGAGUGAAAUCCUGUAAUCCCGGUGGGACCACCUAUGGCGAAGGCAUCUCACCAGAACGGGUCCGACAGUGAGGGACGAAAGCUGGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCCAGCUGUAAACGAUGCGCGUUAAACGCGCCGCCUGGGGAGUACGGUCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAACGGGUGGAGCCUGCGGUUUAAUCGGACUCAACGCCGGAAAUCUCACCGGAUAAGGAGGUGCAUGGCCGUCGUCAGUUCGUACUGUGAAGCAUCCUGUUUAGUCAGGCAACGAGCGAGACCCACGGACCGCCUCUAGGG-GAGGAAGGAAUGGGCAACGGUAGGUCAGC-AUGCCCCGUAUCCGGGCUACACGCGGGCUACAAUGCGAAGUUCGGAUUGCGGGUUGUAAACCCGCAUGAAGCUGGAAUCCGUAGUAAUCGCGUUUCAAC-AUAGCGCGGUGAAUAUGUCCCUGCUCCU   [922]
+Methanoth    GGAGGC-C-ACUGCUAUGGGGGUCCGACUAA-GCCAUGCAAGUCACGUGGCGAACGGCUCAGUAACACGUGGACCCUACCCUGGGGCCGGGAUAACCCCGGCUGGGGCUAAUCCCCCCAGGAUGGGUCUGCGGCCGAUUAGGUAGUUGGUAGGGUAACGGCCUACCAAGCCUACGAUCGGUACGGGUUGUGAGAGCAAGAGCCCGGAGACGGGGCCUGAGACAAGGCCCCGGGCCCUACGGGGCGCAGCAGGCGCGAAAACUCCGCAAUGCGCGCGCGACGGGGGGACCCCCAGGGCUGGGCAA-GACCG-GUGCCAGCCGCCGCGGUAACAC-CGGCAGCCCGAGUGGUGGCCGCGUUUAUUGGGCCUAAAGCGUCCGUAGCCGGGCCGGGAGAGGCCGGAGGUACCCCCGGGGUAGGGGUGAAAUCCUGUAAUCCCGGGGGGACCACCUGUGGCGAAGGCGUCCGGCUGGAACGGGCCCGACGGUGAGGGACGAAAGCCAGGGGAGCGAACCGGAUUAGAUACCCGGGUAGUCCUGGCCGUAAACGAUGCGGACUUAGUCCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUUAAAGGAAUUGGCGGGGGAGCACCACAACGCGUGGAGCCUGCGGUUUAAUUGGAUUCAACGCCGGACACCUCACCGGGGGAGGAGGUGCAUGGCCGCCGUCAGCUCGUACCGUGAGGCGUCCUGUUAAGUCAGGCAACGAGCGAGACCCGCGGACCGCCAGCGCUG-GAGGAAGGUGCGGGCGACGGUAGGUCCGU-AUGCCCCGCCCCCGGGCUACACGCGGGCUACAAUGCGUAGUUCGGAUCGAGGGCUGCAAGCCCUCGUGAAGCUGGAAUGCGUAGUAAUCGCGGGUCACU-AUCCCGCGGUGAAUACGUCCCUGCUCCU   [922]
+Vairimorp    UGACGU-A-GACGCUAUUCCCU-AAGAUUAA-CCCAUGCAUGU-UUAAAAUGGACUGCUCAGUAAUACUCACUUUUUAAUGUAUUAUUAGUAUAACUGCGUGUGUAGCUAAGACAUAUACAGUAAGAGUGAGACCUAUCAGCU--UUGUUAAGGUAAUGGCUUAACAAGGCAAUGACGGGU-CGGUAUUACUUUUAAU-AUUCCGGAGAAGGAGCCUGAGAGACGGCUACUAAGUCUAAGGAUUGCAGCAGGGGCGAAACUUGACCUAUGGAUAUCUGAGGCAGUUAUGGGAAUUGGAGGGCAA-AUCAA-GUGCCAGCAGCCGCGGUAAUAC-UUGUUCCAAGAGUGUGUAUGAUGAUUGAUGCAGUUAAAAAGUCCGUAGUUAAGCAAUAUGAGGUGUACUGUAUAGUUGGGAGAGAGAUGAAAUGUGACGACCCUGACUGGACGAACAGAAGCGAAAGCUGUACACUUGUAUGUAUUUUUUGAACAAGGACGUAAGCUGGAGGAGCGAAGAUGAUUAGAUACCAUUGUAGUUCCAGCAGUAAACUAUGCCGACGAAGUUGGCUCUGGGGAUAGUAUGAUCGCAAGAUUGAAAAUUAAAGAAAUUGACGGAAGAAUACCAGAAGGAGUGGAUUGUGCGGCUUAAUUUGACUCAACGCGAGGUAACUUACCAAUAUUAGUGGUGCAUGGCCGUUUUCAAUGGAUGCUGUGAAGU-UUUGAUUAAUUUCACCAAGACGUGAGACCCUUUGACAGACA-CG-UG-UAGGAAGGAAAGGAUUAAAACAGGUCCGUUAUGCCCUCAUUUUGGGCUGCACGCGCAAUACAAUAUCUUUAUGGGAUAAUAUUUUGUAAAGAUAUUUGAACUUGGAAUUGCUAGUAAAUUUUAUUAAAU-AAGUAGAAUUGAAUGUGUCCCUGUUCUU   [922]
+Dictyoste    AGUAGUCA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCAGACGGCUCAUUACAACAGUGAUACUAAUAGACUUUUUGGAUAACCGCAGC-GGGGCUAAUACAAGUCUACUGUGUCACUGCCCUAUCAACU--AUGGUACGGUAUUGGCCUACCAUGGUUGUAACGGGU-CGGGGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAAUGGCUACCACUUCUACGGAAGGCAGCAGGCGCGCAAAUUACUCAAUCCCACGGGGAAGUAGUGACAAUAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCAUAUACUAAAGUUGUUGCAGUUAAAAAGCUCGUAGUUGGGAUGGAUGGGGGUGUUCAUAUUGGUGGGCGAGAGGUGAAAUUCGUUGACCCUAUCAAGAUGAACUUCUGCGAAAGCAUUCACCAAAUACCUCCCCAUUAAUCAAGAACGAAAGUUUGGGGAUCGAAGACGAUCAGAUACCGUCGUAGUCCAAACUAUAAACUAUGUCGACCAAGUGGAUUCCGGGGGGAGUAUGGUCGCAAGUCUGAAACUUAAAGGAAUUGACGGAAGGGCACACAAUGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACUCGGGAAAACUUACCAAGCUUGGUGGUGCAUGGUCGUUCUUAGUUGGUGGAGCGAUUUGUCUGGUCAAUUCCGAUAACGGACGAGACCUCGAGACUACCUGCGCAG-GCGGAAGUCCGAGGCAAUAACAGGUCUGUGAUGCCCUUACCUUGGGCCGCACGCGCGCUACAAUGCGUAACUGGGCUUGAUCUUUGUAAUUGAUCAUAAACGAGGAAUUCCUUGUAAGCGUAAGUCAUU-ACCUUAUGCUGAAUAUGUCCCUGCCCUU   [922]
+Encephali    UGACGU-A-GAUGCUAGUCUCU-GAGAUUAA-GCCAUGCAUGU-AGACGGCGAACGGCUCAGUAAUGUUGCGGUUUUGGUCUCUGUGUAAACUAACCACGGCUGUGGCUAAAA--CGGAGAAUAAGGCGCAACCCUAUCAGCU--UUGGUAGUGUAAAGGACUACCAAGGCCAUGACGGGU-CGGGAAAUCAGGGUUUGAUUCCGGAGAGGGAGCCUGAGAGAUGGCUCCCACGUCCAAGGACGGCAGCAGGCGCGAAACUUG-UCCACUCCUGGGGGAGACAGUCAUGAGACUUGGAGGGCAAGCUUUG-GUGCCAGCAGCCGCGGUAACUC-CAACUCCAAGAGUGUCUAUGGUGGAUGCUGCAGUUAAAGGGUCCGUAGUCGGAACGGAUAGGGAGUGUAGUAUAGACUGGCGAAGAAUGAAAUCUCAAGACCCAGUUUGGACUAACGGAGGCGAAGGCGACACUCUUAGACGUAUCUUAGGAUCAAGGACGAAGGCAGGAGUAUCGAAAGUGAUUAGACACCGCUGUAGUUCCUGCAGUAAACUAUGCCGACAGAGUGGGCUCUGGGGAUAGUACGCUCGCAAGGGUGAAACUUAAAGAAAUUGACGGAAGGACACUACCAGGAGUGGAUUGUGCUGCUUAAUUUAACUCAACGCGGGAAAACUUACCAGGGUGAGUGGUGCAUGGCCGUUGGAAAUUGAUGGGGCGACCU-UUAGCUUAAAUGCUUAAACCAGUGAGACCUCCUGACAGGUGUUAACA-CAGGAGGGUGGAGGCUAUAACAGGUCCGUGAUGCCCUUAUCCUGGGCAGCAAGCGCAAUACAAUAU--GAGUAGGAUCUACGUUUGUAAUACGUAGUGAAUAAGGAAUUCCUAGUAACGGUGCCUCAUC-AAGGCAUGGUGAAUGUGUCCCUGUUCUU   [922]
+Physarum     AGUAGU-G-UAUGCUUCUCCUA-AAGACUAA-GCCAUGCAUGUCCGUCUGCGAACGGCUCCGCAUACCAGUUGUACCAUAGCAAGCCAGGGAUAACCCUGGCUGAGGCUAAUACACUCGCUGUGUGCUUCUGACCUAUCAACU--UGGCA-GCGUAACGGACAUGCCAGGUAACAACGGGU-CAGAGGAUAAGGGUUCGAUCCUGGAGAGUGGGCCUGAGAGAUUGCUCACACUUCUAAGGAAGGCAGCAGGCGCGCAACGUUCCCAUUGGGCGCUCGAGGGCGUUAGGGGACUUAGAGGACAA-GUCUG-GUGCCAGCACCCGCGGUAAUUC-CAGCUCUAAUAGCAUACGUUAAAGUUGUUGCGGUUAAAACGCUCGUAGUCGGGAUGUUCGAGGGUGACCGAAUUGCUGGGCGAGUGGUGAAAUACGUUGACCCUAGCAAGUCGACCAAAGGCGUAAGCAGUCAUCAAGGGCAUUCCCGUUGAUCAAGAGCGAAAGUUAAGGGUUCGAAGACGAUCAGAUACCGUCGUAGUCUUAACUAUAAAUGAUGCAGACCAAGUCGGUUCUGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCAC-ACAAAGAGUGGAACCUGCGGCUUAAUUUGACUCAACACGGGAAAACUCACCAGGUCUGGUGGUGCAUGGUCGUUCUUAGUUCGUGGAUUGAUUUGUCUGGUUUAUUCCGAUAACGAGCGAGACCCCGGUAUCAGAGCCGGUU-CUUGAAA--UGGGUUAAUAACAGGUCAGUCAUGCCCUUGUUCUGGGCCGCACGCGCGUUACAAUGUUUGACUGGGACAGAUCUUUGCAAUUGGUCUCAAACGAGGAAUUUUUAGUAAUCGCAGGUCAUU-AACCUGCGUUGAAUGCGUCCCUGCCCUU   [922]
+Entamoeba    AGUAUU-A-UAUGCUGAUGUUA-AAGAUUAA-GCCAUGCAUGUGAGACUGCGGACGGCUCAUUAUAACAGUAAUUUUCUUUGGUUACAAGGAUAGCUUUGUAUAAAGAUAAUACUACCAAUGAGAAUUUCUGAUCUAUCAAUC--UUGGUAGUAUCGAGGACUACCAAGAUUAUAACGGAU-CGAGGAAUUGGGGUUCGACAUCGGAGAGGGAGCUUUACAGAUGGCUACCACUUCUAAGGAAGGCAGCAGGCGCGUAAAUUACCCACUUUCGUGAAGAGGUAGUGACGACACUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGUGUAUAUUAAAGUUGCUGUGAUUAAAACGCUCGUAGUUGGAACAAUUGGGGUGAUUCAGAAAAUAACGGGAGAGGUGAAAAUCCAUGAUCGCUAUAAGAUGCACGAGAGCGAAAGCAUUUCACUCAACUGUGUCCAUUAAUCAAGAACGAAAGUUAGGGGAUCGAAGACGAUCAGAUACCGUCGUAGUCCUAACUAUAAACGAUGUCAACCAAGUUGACUUCAGGGGGAGUAUGGUCACAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCAC-ACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGAAAACUUACCAAGACUAGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCAGGUUAAUUCCGGUAACGAACGAGACUGAAAGACCUCUUUUGAAA-AAGGAAGCAUUCAGCAAUAACAGGUCUGUGAUGCCCUUAUCUUGGGCCGCACGCGCGCUACAAUGCAUGACAGGGAUAAAUGAUUGGAAUUUGUUUUGAACGAGGAAUUCCUUGUAAUAUCGAGUCAUU-AACUCGAGAUGAAUACGUCCCUGCCCUU   [922]
+Euglena_g    AGCAGUCA-UAUGCUUUGUUCA-AGGGCUAA-GCCAUGCACGUCAGUCUGUGAAUGGCUCCUUACAUCAGCAGUUCUACGUGAUAGAUUGGACAUCCACCAUUGUGGCUAAUACAGAUCGCAAGAGCUUCUGACCUAUCAGCU--ACUGUGGUGUAUCGGACCACAGUGGCCUUGACGGGU-CGGAGAAUCAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAGACGGCUACCACUACCAAGGUGGGCAGCAGGCACGCAAAUUGCCCCAUGCAACUGUGAGGCAGCGACGAACACCGGAGGGCAA-GUCUG-GUGCCAGCAGCUGCGGUAAUUC-CAGCUCCGAGGGCGUAUACUAACAUUGCUGCUGUUAAAACACUUGUAGUCGGACUGUUCGGGGUGAAAGAUACGGGAGCGCCAGAGGUGAAAUUCUUAGAUCGCUGCCAGAUCCACUGCAGCGAAGGCGUUCUGCAAGUGCACGUCCGUCGAUCAAGAAUGAGAGUUCGGGGAGCAAAGAUGAUCAGACACCGUCGUAGUCCGGACUGUAAACGAUGCCGGCCAAGCCGGUUCAGGGGGGAGUACUGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAUGGCACCACAAGGCGUGGAGUAUGCGGCUUAAUUUGACUCAACGCGGGGAAUGUUACCAGGUCCGGUGGUGCAUGGCCGCUCCUGAUUGGUGGAGUGAUUUGUCUGGUUGAUUCCGAUAACGAGUGAGACAUCUGUGCUGUGUCCGGGC-AUGCAUGCUAGAGCCAACAGCAGGUCUGUGAUGCUCCCGUCCUGGGCCGCACGCGCACUACAUUGCGUACUGGGGAUAGAUGGUUGCAAUCUGCCUUGAACGUGGAAUGCCUAGUAUGCCUGGGUCAUC-AGCCCAGACCGAUUGUGUCCCUGCCAUU   [922]
+Naegleria    AGUACU-A-UAUGCUUGUCUCA-AAGCCUAA-GCCAUGCAAAUGAGUCUGUGGAAGGCUCAUUAUAACAGUUAUUCCUAGCCACUGCAAGGAUACCACCGUGCAGCGAUA-UACUAGGCAGAGGAGUUUCUUACCUAUCAGCU--UUGUUUGUUUAAAGGACAAACCAGGCUUUGACGGGU-CGGGGAAUCAGUGUUCGAUUCCGGAGAGGGAGCCUGAGAAAUCGCUACCACAUCUAAGGACGGCAGCAGGCGCGCAAAUUACCCAAUCUCACGAGGAGGUAGUGACAAGCUUUGGAGGAAAA-GUCUG-GUGCCAGCACCCGCGGUAAUUC-CAGCUCCAAGAGCGUAUAUUAAUACUGCUGUAGUUAAAACGCCCGUAGUAGCUCAGGGUGAGGCCCCGGGUACCAUGAGGCUAGAGGUGAAAUUCUGAGACCCUCAUGUGACCAACUAAGGCGAAAGCUGUGGGCCACCACAAGCUCGUCUAUCAGGGACAAAAGUUGGGGGAUCGAAGACGAUUAGAUACCGUCGUAGUCCCAACUAUAAACGAUACCAACCGAGUCGGUUCUGGGGGGAGUAUAGUCGCAAGACCGAAACUUAAAGGAAUUGACGGAAAGGCACCACCAGGAGUGGAGUCUGCGGCUUAAUUCGACUCAACACGGGGAAACUCACCAGGUCGGGUAGUGCAUGGCCGUUUCCAGUUCGUGGAGUGAUCUGUCUUGUUAAUUCAGAUAACGAACGAGACCUAAGGACUUCAUUCGGAU-GAGGAAGAUUUAGGCCAUAACAGGUCUGUGAUGCUCUUGUCCUGGGCUGCACGCGUACUACAAUAUAUGACAGGGAUCGAGGAUUGGAAAUCCUCGUGAACGAGGAAUUCCUAGUAAGCGUGGUUCAUC-AUACCACAUUGAUUACGUCCCUGCCUUU   [922]
+Trypanoso    AGUAGUCA-UAUGCUUGUUUCA-AGGACUUA-GCCAUGCAUGCCAGUCUGCGCAUGGCUCAUUACAUCAGACGUUCUGCCGCCAAAACUGGAUAACUUGGCGCCAAGCUAAUACACUACUGACGAACAACUGCCCUAUCAGCC--AUGGCCGUGUAGUGGACUGCCAUGGCGUUGACGGGA-CGGGGGAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAAUAGCUACCACUUCUACGGAGGGCAGCAGGCGCGCAAAUUGCCCAAUGUCGCGAUGAGGCAGCGAAAAGAAUUGGAGGACAA-GUCUG-GUGCCAGCACCCGCGGUAAUUC-CAGCUCCAAAAGCGUAUAUUAAUGCUGUUGCUGUUAAAGGGUUCGUAGUUGGGUAGUUCGGGGGAGAACGUACUGGUGCGUCAGAGGUGAAAUUCUUAGACCGCACCAAGACGAACUACAGCGAAGGCAUUCUUCAAGGAUACCUUCCUCAAUCAAGAACCAAAGUGUGGGGAUCAAAGAUGAUUAGAGACCAUUGUAGUCCACACUGCAAACCAUGACACCCAGAGGGUCUCAGGGGGGAGUACGUUCGCAAGAGUGAAACUUAAAGAAAUUGACGGAAUGGCACCACAAGACGUGGAGCGUGCGGUUUAAUUUGACUCAACACGGGGAACUUUACCAGAUCUGGUGGUGCAUGGCCGCUUUUGGUCGGUGGAGUGAUUUGUUUGGUUGAUUCCGUCAACGGACGAGAUCCAAGGAUUCCUUGCGCAA-GGUGAGAUUUUGGGCAACAGCAGGUCUGUGAUGCUCCUGUUCUGGGCGACACGCGCACUACAAUGCCCACUUGGGACCGAGUAUUGCAAUUGGUCGCGAACGAGGAAUGUCUCGUAGGCGCAGCUCAUC-AAACUGUGCCGAUUACGUCCCUGCCAUU   [922]
+Tritricho    AAGGAA-G-CACACUUCGGUCA-UAGAUUAA-GCCAUGCAAGUGAGACUGCGAACAGCUCAUUAACACGCUCAGUCUACUUGGUGGUUUGGAUAGCAGCAGCUGGUGCUAAUACACACCAAUCGAUUGAGCGACCUAUCAGCU--UACUUAGGGUCUUUACCUAGGUAGGCUAUCACGGGU-CGGGCGGUUACCGUCGGACUCCGGAGAAGGCGCCUGAGAGAUAGCGACUAUAUCCACGGGUAGCAGCAGGCGCGAAACUUACCCACUCGAGUUCGGAGGUGGUAAUGACCAGCAGAGGGCCA-GUCUG-GUGCCAGCAGCUGCGGUAAUUC-CAGCUCUGCGAGUUUGCUCCCAUAUUGUUGCAGUUAAAACGCUCGUAGUCAGAGCCACCGGGGGUAGAUCUAUUUCAUGGCGAACGGUGGAAUGUUUUGACCCAUGAGAGAGAAACGAAGGCGAAAGCAUCUACCUAGAGGGUUUCUGUCGAUCAAGGGCGAGAGUAGGAGUAUCCAACCGGAUCAGAGACCCGGGUAGUUCCUACCUUAAACGAUGCCGACAGAGUUGGCUCUGGGGGAACUACGACCGCAAGGCUGAAACUUGAAGGAAUUGACGGAAGGGCAC-ACCAGGGGUGGAGCUUGUGGCUUAAUUUGAAUCAACACGGGGAAACUUACCAGGACUGGUGGUGCAUGGCCGUUGGUGGUGCGUGGGUUGACCUGUCGCGUUGAUUCAGAUAACGAGCGAGAUUAUCGGACUCCCUGCGCAG-GAGGAAGAGGGUAGCAAUAACAGGUCCGUGAUGUCCUUGCUCUGGGCUGCACGCGCGCUACAAUGCGUAGUUGGGAUUGAGGAUUGUAAAUUCUCAUGAACCAGGAAUCCCUUGUAAAUGCGUGUCAAC-AGCGCGCGUUGAAUACGUCCCUGCCCUU   [922]
+Babesia_c    AGUAGUCA-UAUGCUUGUCUUA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUACAACAGUUAUUUUCUUUGGUAUUAUGGAUAACCGUGCGUAGGGCUAAUACAGCCCAUUCAAGUUUCUGACCCAUCAGCU--ACGGUAGGGUAUUGGCCUACCGAGGCAGCAACGGGU-CGGGGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCUAAGGAAGGCAGCAGGCGCGCAAAUUACCCAAUCCUGCAGGGAGGUAGUGACAAGAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUAUUAAACUUGUUGCAGUUAAAAAGCUCGUAGUUGGAACGGUUGGGGGCAUUCGUAUUUAACUGUCAGAGGUGAAAUUCUUAGAUUUGUUAAAGACGAACUACUGCGAAACGAUUUGCCAAGGACGUUUUCAUUAAUCAAGAACGAAAGUUAGGGGAUCGAAGACGAUCAGAUACCGUCGUAGUCCUAACCAUAAACUAUGCCGACUAAGUCGGUUCUGGGGGGAGUAUGGUCGCAAGUCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGCGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUCACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGUUAACGAACGAGACCUUAAGACUUUACAGCUGU-AGGGAAGUUUAAGGCAAUAACAGGUCUGUGAUGCCCUUGUCCUGGGCUGCACGCGCGCUACACUGCGUGUCGGGGAUUGAUUUUUGCAAUAAAUCAUGAACGAGGAAUGCCUAGUAUGCGCAAGUCAUC-AGCUUGUGCAGAUUACGUCCCUGCCCUU   [922]
+Porphyra     AGUAGUCA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUACAACAGUUAUUUCCUUUGAGAACUUGGAUAACCGUAGCUAGAGCUAAUACAGCUCAUUCAAAUUUCUGCCCUAUCAACU--AUGGUAGAGUAUUGGUCUACCAUGGUGUCGACGGGU-CGGGGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGCAAAUUACCCAAUCCCGCGGGGAGGUAGUGACAAAAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUAUUAAAGUUGUUGCAGUUAAAACGCUCGUAGUCGGGACAGUUGGGGGUAUUCGUAUUUCAUUGUCAGAGGUGAAAUUCUUGGAUUGAUGGAAGACGCACAACUGCGAAAGCAUCUGCCAUGGAUGUUUUCAUUGAUCAAGAACGAAAGUUAGGGGAUCGAAGACGAUCAGAUACCGUCGUAGUCUUAACCAUAAACGAUGCCGACUGAGUCGGUUCUGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGAAAACUUACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGUUAACGAACGAGACCUCGUGACUAUGCGCGCGU-AUGGAAGAUUGAGGCAAUAACAGGUCUGUGAUGCCCUUGUUCUGGGCCGCACGCGCGCUACACUGCGUGCUGGGGAUAGAUCAUUGCAAUUGAUCUUGAACGAGGAAUUCCUUGUAGGCGUAGGUCAUC-ACCCUGCGCCGAAUACGUCCCUGCCCUU   [922]
+Parameciu    AGAAGUCA-UAUGCUUGUCUUA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAAAACAGUUAUUUUAUUUGAUAGCAUGGAUAACCGUGGCUAGAGCUAAUACAAAUCAUUCAAGUUUCUGCCCUAUCAGCU--AUGGUAGUGUAUUGGACUACCAUGGCAGUCACGGGU-CGGAGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCUAAGGAAGGCAGCAGGCGCGUAAAUUACCCAAUCCCGCGGGGAGGUAGUGACAAGAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUACUUAAGUUGUUGCAGUUAAAAAGCUCGUAGUUGGGACAGAUGGGGGCAUUAGUAUUUAAUUGUCAGAGGUGAAAUUCUUGGAUUUAUUAAAGACUAACUUAUGCGAAAGCAUUUGCCAAGGAUGUUUUCAUUAAUCAAGAACGAAAGUUAGGGGAUCAAAGACGAUCAGAUACCGUCGUAGUCUUAACUAUAAACUAUACCGACUCAGUCGGUUCUGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUUACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGAUAACGAACGAGACCUUAAGACUAUGUAUGUGC-AUGGAAGUUUAAGGCAAUAACAGGUCUGUGAUGCCCCUGUCCUGGGCCGCACGCGCGCUACACUGCGUGCUGGGGAUAGAUCUUUGCAAUAGAUCUUGAACGAGGAAUUCCUUGUAAGCACAGGUCAUC-AGCCUGUGCUGAAUACGUCCCUGCCCUU   [922]
+Zea_mays     AGUAGUCA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUGAGACUGCGAAUGGCUCAUUAAAUCAGUUAUUUUGUUUGAUGGCUCGGAUAACCGUAGCUAGAGCUAAUACGCAUCAUUCAAAUUUCUGCCCUAUCAACU--AUGGUAGGAUAGGGGCCUACCAUGGUGGUGACGGGU-CGGAGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGCAAAUUACCCAAUCCUGCGGGGAGGUAGUGACAAUAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUAUUUAAGUUGUUGCAGUUAAAAAGCUCGUAGUUGGGACAGUCGGGGGCAUUCGUAUUUCAUAGUCAGAGGUGAAAUUCUUGGAUUUAUGAAAGACGAACAACUGCGAAAGCAUUUGCCAAGGAUGUUUUCAUUAAUCAAGAACGAAAGUUGGGGGCUCGAAGACGAUCAGAUACCGUCCUAGUCUCAACCAUAAACGAUGCCGACCAAGUCGGUUCCGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGCGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUUACCAGGUCUGGUGGUGCAUGGUCGUUCUUAGUUGGUGGAGCGAUUUGUCUGGUUAAUUCCGUUAACGAACGAGACCUCAGGACUAUGGCCGGCC-GCG-AAGUUUGAGGCAAUAACAGGUCUGUGAUGCCCUUGUUCUGGGCCGCACGCGCGCUACACUGCGUGAUGGGGAUAGAUCAUUGCAAUUGGUCUUCAACGAGGAAUGCCUAGUAAGCGCGAGUCAUC-AGCUCGCGUUGACUACGUCCCUGCCCUU   [922]
+Homo_sapi    AGUAG-CA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAAAUCAGUUAUUUCCUUUGGUCGCUUGGAUAACUGUGGCUAGAGCUAAUACAACCCAUUCGAACGUCUGCCCUAUCAACU--AUGGUAGUCGCCGUGCCUACCAUGGUGACCACGGGU-CGGGGAAUCAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGCAAAUUACCCACUCCCGCGGGGAGGUAGUGACGAAAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUAUUAAAGUUGCUGCAGUUAAAAAGCUCGUAGUUGGGACGGCCGGGGGCAUUCGUAUUGCGCCGCUAGAGGUGAAAUUCUUGGACCGGCGCAAGACGGACCAGAGCGAAAGCAUUUGCCAAGAAUGUUUUCAUUAAUCAAGAACGAAAGUCGGAGGUUCGAAGACGAUCAGAUACCGUCGUAGUUCCGACCAUAAACGAUGCCGACCGAGUCGGUUCCGGGGGGAGUAUGGUUGCAAAGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGAAACCUCACCCGGCCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGCGAUUUGUCUGGUUAAUUCCGAUAACGAACGAGACUCUGGGACAAGUGGCGCCA-CCCGAGA-UUGA-GCAAUAACAGGUCUGUGAUGCCCUUGUCCGGGGCUGCACGCGCGCUACACUGCGUGAUGGGGAUCGGGGAUUGCAAUUCCCCAUGAACGAGGAAUUCCCAGUAAGUGCGGGUCAUA-AGCUUGCGUUGAUUAAGUCCCUGCCCUU   [922]
+Coprinus     AGUAGUCA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAAAUCAGUUAUUUUAUUUUAUGGCAUGGAUAACUGUGGCUAGAGCUAAUACACUUCAUUCAAAUAUCUGCCCUAUCAACU--AUGGUAGGAUAGUGGCCUACCAUGGUUUCAACGGGU-CGGGGAAUAAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGCAAAUUACCCAAUCC-GCGGGGAGGUAGUGACAAUAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUA-CGUAUAUUAAAGUUGUUGCAGUUAAAAAGCUCGUAGUUGGGAUAGUUGGGGGCAUUGGUAUUGAGUCGCUAGAGGUGAAAUUCUUGGAUUGACUCAAGACCAACUACUGCGAAAGCAUUUGCCAAGGAUGUUUUCAUUAAUCAAGAACGAAGGUUAGGGGAUCGAAAACGAUUAGAUACCGUUGUAGUCUUAACAGUAAACUAUGCCGACUAAGUCGGUUCUGGGGGG-GUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGUGUGGACGCUGCGGCUUAAUUUGACUCAACACGGGGAAACUCACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGAUAACGAACGAGACCUUAAGACUGUCAGCGCUG-ACGGAAG-UUGAGGCAAUAACAGGUCUGUGAUGCCCUUGUUCUGGGC-GCACG-GUGCUACACUGCGUGCUGGGGAUAGAGCAUUGCAAUUGCUCUUCAACGAGGAAUACCUAGUAAGCGUGAGUCAUC-AGCUCGCGUUGAUUACGUCCCUGCCCUU   [922]
+Cryptomon    AGUAGUCA-UAUGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAAAUCAGUUAUUUUAUUUGAUGGCAUGGAUAACCGUAGCUAGAGCUAAUACAAUUCAUUCAAAUUUCUGCCCUAUCAACU--AUGGUAGGAUAGAGGCCUACCAUGGUUUUAACGGGU-CGGAGAAUUAGGGUUCGAUUCCGGAGAGGGAGCCUGAGAGACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGCAAAUUACCCAAUCCCGCGGGGAGGUAGUGACAAUAAUUAGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCUAAUAGCGUAUAUUAAAGUUGUUGCAGUUAAAAAGCUCGUAGUCGGGACAGUUGGGGCCGUUUAUAUUUCGUUGUCAGAGGUGAAAUUCUUGGAUUUACGAAAGAUAAACUUCUGCGAAAGCAUUCGGCAAGGAUGUUUUCAUUGAUCAAGAACGAAAGUUAGGGGAUCGAAGACGAUCAGAUACCGUCGUAGUCUUAACCAUAAACUAUGCCGACUAAGUUGGUUCCGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUUACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGUUAACGAACGAGACCUCAGGACUAUUUGUAUGA-AUGGAAGUUUGAGGCAAUAACAGGUCUGUGAUGCCCUUGUUCUGGGCCGCACGCGCGCUACACUGCGUGAUGGGGAUAGAUUAUUGCAAUUAAUCUUCAACGAGGAAUUCCUAGUAAGCGCGAGUCAUC-AGCUCGCGUUGAUUACGUCCCUGCCCUU   [922]
+Achlya_bi    AGUAGUCA-UACGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAUAUCAGUUAUUCUACUUGAUAGCUUGGAUAACCGUAGCUAGAGCUAAUACACAUCAAUUGAGUUUCUGCCCUAUCAGCU--AUGGUAGGAUAUGGGCCUACCAUGGCGUUAACGGGU-CGGGGAAUUAGGGUUUGAUUCCGGAGAGGGAGCCUUAGAAACGGCUACCACAUCCAAGGAAGGCAGCAGGCGCGUAAAUUACCCAAUCCUGCAGGGAGGUAGUGACAAUAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCCAAUAGCGUAUAUUAAAGUUGUUGCAGUUAAAAAGCUCGUAGUUGGGACAGUUGGGGGUAUUCAUAUUUCAACGUCAGAGGUGAAAUUCUUGGAUCGUUGAAAGAUGAGCUUAGGCGAAAGCAUUUACCAAGGAUGUUUUCAUUAAUCAAGAACGAAAGUUAGGGGAUCGAAGAUGAUUAGAUACCAUCGUAGUCUUAACCAUAAACUAUGCCGACUCAGUCGGUUCCGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGGAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUUACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUCCGUUAACGAACGAGACCUCCGGACUUUCAGUACUG-AAGGAAGUUGGAGGCAAUAACAGGUCUGUGAUGCCCUUGUUCUGGGCCGCACGCGCGCUACACUGCGUGCUAGGGAUAGAUUAUUGCAAUUAAUCUUGAACGAGGAAUUCCUAGUAAACGCAAGUCAUC-AGCUUGCAUUGAUUACGUCCCUGCCCUU   [922]
+Costaria     AGUAGUCA-UACGCUUGUCUCA-AAGAUUAA-GCCAUGCAUGUCAGACUGCGAAUGGCUCAUUAUAUCAGUCAUUUUAUUUGAAAGCAUGGAUAACCGUAGCUAGAGCUAAUACAUUUCAUUCAAGUUUCUGCCCUAUCAGCU--AUGGUAGGGUAUUGGCCUACCAUGGCUUUAACGGGU-CGGGGAAUUGGGGUUCGAUUCCGGAGAGGGAGCCUGAGAAACGGCUACCACAUCCAAGGAAGG-AGCAGGCGCGUAAAUUACCCAAUCCUGCAGGGAGGUAGUGACAAUAAUUGGAGGGCAA-GUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUC-AAUAGCGUAUAUUAAAGUUGCUGCAGUUAAAAAGCUCGUAGUUGGAACGGUUGGGGGUAUUCGUAUUCAAUUGUCAGAGGUGAAAUUCUUGGAUUUAUGGAAGACGAACUACUGCGAAA-CGUUUACCAAGGAUGUUUUCAUUAAUCAAGAACGAAAGUUAGGGGAUCGAAGAUGAUUAGAUACCAUCGUAGUCUUAACCAUAAACUAUGCCGACUAAGUCGGUUCCGGGGGGAGUAUGGUCGCAAGGCUGAAACUUAAAGAAAUUGACGGAAGGGCACCACCAGGAGUGGAGCCUGCGGCUUAAUUUGACUCAACACGGGGAAACUUACCAGGUCUGGUGGUGCAUGGCCGUUCUUAGUUGGUGGAGUGAUUUGUCUGGUUAAUUC-GUUAACGAACGAGACCCCCGGACUUUCGGUACCG-AAG-AAGUUGGGGGCAAUAACAGGUCUGUGAUGCCCUUGUCCUGGGCCGCACGCGCGCUACACUGCGUGAUAGGGAUAGAUCAUUGCAAUUGAUCUUGAACGAGGAAUUCCUAGUAAACGCGAGUCAUC-AGCUCGCAUUGAUUACGUCCCUGCCCUU   [922]
+Giardia_m    GGAGUA-C-UACGCUA-CCCCA-AGGACAAAAGCCAUGCAAGCGCAGUGGCGGACGGCUCGGUACAACGGUACGUCUGACCGGGGGGACGGAUACCGCUGGCCAGCGCCAAGACGGCCCGGAUGAGGUUCCGAGGUAUUACCU--UCGGUAGAGUAGUGGUCUACGGAGGGGAUGAUGCCU-CGGAGGAUCAGGGUUUGACUCCGGAGAACGGGCCUGAGAGACGGCCCGUACAUCCAAGGACGGCAGCAGGCGCGGAACUUGCCCAAUGCGUGCGUGAGGCAGCAACGGGGGGUCGAGGGAAAGGUCUG-GUGCCAGCAGCCGCGGUAAUUC-CAGCUCGGCAGGCGUCGUACGGCGCUGUUGCAGUUAAAACGUCCGGAGUCUGAAUGGGUAAGGGCAUGUGUAUUGGUGGGGGACGGGUGAAAUAGGAUGAUCCGACCAAGACAGACAAAGGCGUAGGCACUUGCCAAGACCAUAUCAGUCGAACCAGGACGAAGCCCGGGGGCGAGAAGGCGAUUAGACACCACCGUAUUCCCGGGCGUAAACGAUGCCACCGAAGGGGGCUCUGGGGGGAGUAUGGCCGCAAGGCUGGAACUUGAAGGCAUUGACGGAGGGGUACCACCAGACGUGGAGUCUGCGGCUCAAUUUGACUCAACGCGAACA-CCUUACCAGGCCUGGUGGUGCAUGGCCGUUCACAGCCCGUGGCUUGAGCCGUCUGCUUGACUGCGACAACGAGCGAGACCCUAAGACCGCCAAUAUUG-GAGGAA-GGUGGGGCGAUAACAGGUCUGUGAUGCCCUUGCCCUGGGCUGCACGCGUACUACACUGCGUGGUUGGGAUCGUGGACUGGAAGUCCUCGUGAACCUGGAAUGUCUAGUAGGCGUAGGUCAUC-AAUCUACGCCGGAUACGUCCCUGCCCCU   [922]
+Hexamita     AGAAGG-G-CAAGCUAAGAUUA-AGGAUUAA-GCCAUGCAUGCCGUGUGAAACUGCGAACGGCUCAUUAAAUCAUGAAAUGUCUCGGUCGGCUAAUACGGGUCUCUUACUAAUCGGGCACGAUCAGCUUCUGACGCAUCAUUA--UAGGAGAAGUAAAGGUUAUCCUAUGAGUUCACGCGU-CGGAGAAUUAGGGUUCGACUCCGGAGAAUGAGCAUGAGAGACGGCUCAUAGUUCUAAGGGAGGCAGCAGGCGCGGAAAUUGCCCAAUGUACGUACGAGGCAGUGACGAAAAUUAGUGGGAGA-GCAUG-GUGCCAGCAGCCGCGGUAAUUC-CAUCACUGAUAGCUUUCUCUUACGUUGUUGCAGUUAAAAAGCUCGUAGCCUGGCGGCGCGGGACUGAUAGUAUUUGGUGGGGACAGGUGAAAUAGGAUGAUCCACCAAGGACUUUCAACAGCGAAGGCAGUCGGUAAGCGCCGUCCAGUUGGUCAAGAGCGAAAGUGUGAGGAUAGACGAUGAUUAGAAACCGUUUUAUUUCACGCCUUAAACGAUACCAUCUCAAGUGGUUCUGGGGGAAGUAUGAUUGCAAGAUUGAAACUUGAAGGUAUUGACGGAAAGAUACCACAAGACGUGGAGUCUGCGGCUCAAUUUGACUCAACACGCAAA-UCUUACCAGACCUGGUGGUGCAUGGCCGCUCCUAGUUCGUGGUGUAAACUGUCUGCCUUAUUGCGUUAACGAGCGAGACAACCAGAGCGCCUGCGCAG-GACGAACGUGGUUGCUCUAGCAGGUCUGUGAUGCCCUUACUCUGGGCCGCACGCGUACUACAAUGCGUGGUAGGGAUCGGAGAUUGGAAUUUCUCGUGAACGAGGAAUGUCUUGUAGGUCUGCGUUAUU-AGCGCGGGCUGACUAUGUCCCUGUCUUU   [922]
+;
+END;
+
+
+
+ [from Barns et al. 1996 PNAS 93:9188]
+
+ begin trees;
+ 
+  utree pnas=((((Flexibact: 0.061548,Flavobact: 0.119902): 0.087037,Planctomy: 
+  0.169467): 0.027312,((Desulfovi: 0.097401,((Gloeobact: 0.046858,Synechoco: 0.073425): 
+  0.061168,((Leptonema: 0.125621,(Chlorobiu: 0.129472,Chlamydia: 0.150492): 0.029726): 
+  0.037272,((Heliobact: 0.086575,(Bacillus_: 0.066895,Clostridi: 0.085782): 0.023452): 
+  0.024500,(((Thermomic: 0.119327,Thermus_t: 0.079374): 0.029503,(Thermotog: 0.063284,(Aquifex_p: 
+  0.098565,((Tritricho: 0.199512,((Giardia_m: 0.175678,Hexamita_: 0.219659): 0.080379,((Vairimorp: 
+  0.213154,Encephali: 0.146264): 0.170005,(Physarum_: 0.175572,(((Dictyoste: 0.101802,((Porphyra_: 
+  0.046000,(Babesia_c: 0.052891,Parameciu: 0.032407): 0.017681): 0.009800,((Costaria_: 
+  0.025262,Achlya_bi: 0.027623): 0.021980,(Zea_mays: 0.032720,((Coprinus_: 0.030974,Homo_sapi: 
+  0.090566): 0.020407,Cryptomon: 0.027381): 0.001796): 0.009373): 0.010101): 0.038120): 
+  0.024795,(Naegleria: 0.167833,Entamoeba: 0.135045): 0.039350): 0.023652,(Trypanoso: 
+  0.157718,Euglena_g: 0.151850): 0.081778): 0.024206): 0.046480): 0.037954): 0.047334): 
+  0.202657,((pJP_27: 0.043872,pJP_78: 0.033809): 0.073583,((((marineSBA: 0.209428,pSL_12: 
+  0.060451): 0.064360,(pSL_22: 0.085505,(pSL_4: 0.099318,pSL_17: 0.038405): 0.013970): 
+  0.006916): 0.021609,(((Pyrodicti: 0.019002,(Desulfuro: 0.031971,Sulfolobu: 0.083252): 
+  0.014211): 0.028489,(Thermofil: 0.043513,Thermopro: 0.080630): 0.020236): 0.017727,(pSL_50: 
+  0.096937,pJP_96: 0.061339): 0.015609): 0.013605): 0.021279,(Methanopy: 0.083784,((Methanoba: 
+  0.104398,Methanoth: 0.029354): 0.036693,(Thermococ: 0.067505,(((Thermopla: 0.176997,(Methanosp: 
+  0.114294,Haloferax: 0.133982): 0.056133): 0.041739,Archaeogl: 0.077897): 0.016797,(Methco.va: 
+  0.104705,Methco.ja: 0.022091): 0.046189): 0.015186): 0.017027): 0.013439): 0.041431): 
+  0.035926): 0.036530): 0.132286): 0.042630): 0.024804): 0.066252,Arthrobac: 0.101839): 
+  0.017192): 0.024545): 0.021638): 0.023967): 0.022243,Agrobacte: 0.097634): 0.010306): 
+  0.030954,Rhodocycl: 0.087659,E.coli: 0.086111):0.0; 
+  
+end;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/baseml.pairwise
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/baseml.pairwise	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/baseml.pairwise	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+
+seed used = 30455833
+BASEML (in paml 3.14, January 2004)  m.phy  HKY85 dGamma (ncatG=5)
+ns = 3  	ls = 57
+# site patterns = 12
+    7   16   15    1    3    2    1    1    8    1    1    1
+
+wih99_snap                           GTAGAGTACT TT
+wm276_snap                           ...AGAG..G A.
+jec21_snap                           ....GACT.. CC
+
+
+
+
+Frequencies..
+                                    T      C      A      G
+wih99_snap                     0.3509 0.1404 0.3333 0.1754
+wm276_snap                     0.2982 0.1404 0.3509 0.2105
+jec21_snap                     0.3158 0.1930 0.2982 0.1930
+
+Average                        0.3216 0.1579 0.3275 0.1930
+
+# constant sites:     46 (80.70%)
+ln Lmax (unconstrained) = -110.532715
+
+Distances:HKY85 (kappa)  (alpha set at 0.50)
+This matrix is not used in later m.l. analysis.
+
+wih99_snap       
+wm276_snap         0.3240( 9.3595)
+jec21_snap         0.2974(33.1197)  0.1343( 1.1101)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/baseml.usertree
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/baseml.usertree	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/baseml.usertree	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+
+seed used = 30455833
+BASEML (in paml 3.14, January 2004)  m.phy  HKY85 dGamma (ncatG=5)
+ns = 3  	ls = 57
+# site patterns = 12
+    7   16   15    1    3    2    1    1    8    1    1    1
+
+wih99_snap                           GTAGAGTACT TT
+wm276_snap                           ...AGAG..G A.
+jec21_snap                           ....GACT.. CC
+
+
+
+
+Frequencies..
+                                    T      C      A      G
+wih99_snap                     0.3509 0.1404 0.3333 0.1754
+wm276_snap                     0.2982 0.1404 0.3509 0.2105
+jec21_snap                     0.3158 0.1930 0.2982 0.1930
+
+Average                        0.3216 0.1579 0.3275 0.1930
+
+# constant sites:     46 (80.70%)
+ln Lmax (unconstrained) = -110.532715
+
+Distances:HKY85 (kappa)  (alpha set at 0.50)
+This matrix is not used in later m.l. analysis.
+
+wih99_snap       
+wm276_snap         0.3240( 9.3595)
+jec21_snap         0.2974(33.1197)  0.1343( 1.1101)
+
+TREE #  1:  (2, 3, 1);  MP score: 13.00
+lnL(ntime:  3  np:  3):   -129.328757   +0.000000
+   4..2     4..3     4..1  
+  0.08669  0.05274  0.15105
+
+tree length =   0.29048
+
+(wm276_snap, jec21_snap, wih99_snap);
+
+(wm276_snap: 0.086692, jec21_snap: 0.052740, wih99_snap: 0.151050);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/basic-bush.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/basic-bush.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/basic-bush.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,25 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=6;
+      format datatype=protein missing=? gap=-;
+      charlabels one two three four five six;
+      matrix
+A     WITH-B
+B     WITH-A
+C     WITH-D
+D     WITH-C
+E     WITH-F
+F     WITH-E
+G     WITH-H
+H     WITH-G;
+END;
+
+BEGIN TREES;
+       tree basic_bush = (((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/basic-ladder.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/basic-ladder.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/basic-ladder.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,25 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels one two three four five;
+      matrix
+A     --ONE
+B     --ONE
+C     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+G     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+       tree basic_ladder = (((((((A:1,B:1):1,C:2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+## sequence-region Contig1 1 37450
+Contig1	confirmed	transcript	1001	2000	42	+	.	Transcript trans-1; Gene "abc-1"; Gene "xyz-2"; Note "function unknown"
+Contig1	confirmed	exon	1001	1100	.	+	.	Transcript trans-1
+Contig1	confirmed	exon	1201	1300	.	+	.	Transcript trans-1
+Contig1	confirmed	exon	1401	1450	.	+	.	Transcript trans-1
+Contig1	confirmed	CDS	1051	1100	.	+	0	Transcript trans-1
+Contig1	confirmed	CDS	1201	1300	.	+	2	Transcript trans-1
+Contig1	confirmed	CDS	1401	1440	.	+	0	Transcript trans-1
+Contig1	est	similarity	1001	1100	96	.	.	Target "EST:CEESC13F" 1 100
+Contig1	est	similarity	1201	1300	99	.	.	Target "EST:CEESC13F" 101 200
+Contig1	est	similarity	1401	1450	99	.	.	Target "EST:CEESC13F" 201 250
+Contig1	tc1	transposon	5001	6000	.	+	.	Transposon c128.1
+Contig1	tc1	transposon	8001	9000	.	-	.	Transposon c128.2
+Contig1	confirmed	transcript	30001	31000	.	-	.	Transcript trans-2; Gene "xyz-2"; Note "Terribly interesting"
+Contig1	confirmed	exon	30001	30100	.	-	.	Transcript trans-2; Gene "abc-1"; Note "function unknown"
+Contig1	confirmed	exon	30701	30800	.	-	.	Transcript trans-2
+Contig1	confirmed	exon	30801	31000	.	-	.	Transcript trans-2
+
+## sequence-region Contig2 1 37450
+Contig2	clone	Component	1	2000	.	.	.	Target "Clone:AL12345.1" 1 2000; Note "Terribly interesting"
+Contig2	clone	Component	2001	5000	.	.	.	Target "Clone:AL11111.1" 6000 3001
+Contig2	clone	Component	5001	20000	.	.	.	Target "Clone:AC13221.2" 1 15000
+Contig2	clone	Component	2001	37450	.	.	.	Target "Clone:M7.3" 1001 36450
+Contig2	predicted	transcript	2501	4500	.	+	.	Transcript trans-3 ; Alias trans-18
+Contig2	predicted	transcript	5001	8001	.	-	.	Transcript trans-4
+
+
+#processed_transcript
+Contig3	clone	Component	1	50000	.	.	.	Clone AL12345.2
+Contig3	confirmed	mRNA	32000	35000	.	+	.	mRNA trans-8
+Contig3	confirmed	UTR	32000	32100	.	+	.	mRNA trans-8
+Contig3	confirmed	CDS	32101	33000	.	+	.	mRNA trans-8
+Contig3	confirmed	CDS	34000	34500	.	+	.	mRNA trans-8
+Contig3	confirmed	CDS	34600	34900	.	+	.	mRNA trans-8
+Contig3	confirmed	UTR	34901	35000	.	+	.	mRNA trans-8
+
+## preferred group assignments
+Contig4	clone	Component	1	50000	.	.	.	Clone ABC123
+Contig4	confirmed	gene	32000	35000	.	+	.	Misc thing1 ; gene gene-9
+Contig4	confirmed	mRNA	32000	35000	.	+	.	Misc thing2 ; mRNA trans-9 ; gene gene-9
+Contig4	confirmed	CDS	32000	35000	.	+	.	Misc thing3 ; mRNA trans-9
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/biodbgff/test.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,43 @@
+##gff-version 3
+## sequence-region Contig1 1 37450
+Contig1	confirmed	transcript	1001	2000	42	+	.	ID=Transcript:trans-1;Gene=abc-1;Gene=xyz-2;Note=function+unknown
+Contig1	confirmed	exon	1001	1100	.	+	.	ID=Transcript:trans-1
+Contig1	confirmed	exon	1201	1300	.	+	.	ID=Transcript:trans-1
+Contig1	confirmed	exon	1401	1450	.	+	.	ID=Transcript:trans-1
+Contig1	confirmed	CDS	1051	1100	.	+	0	ID=Transcript:trans-1
+Contig1	confirmed	CDS	1201	1300	.	+	2	ID=Transcript:trans-1
+Contig1	confirmed	CDS	1401	1440	.	+	0	ID=Transcript:trans-1
+Contig1	est	similarity	1001	1100	96	.	.	Target=EST:CEESC13F 1 100 +
+Contig1	est	similarity	1201	1300	99	.	.	Target=EST:CEESC13F 101 200 +
+Contig1	est	similarity	1401	1450	99	.	.	Target=EST:CEESC13F 201 250 +
+Contig1	tc1	transposon	5001	6000	.	+	.	ID=Transposon:c128.1
+Contig1	tc1	transposon	8001	9000	.	-	.	ID=Transposon:c128.2
+Contig1	confirmed	transcript	30001	31000	.	-	.	ID=Transcript:trans-2;Gene=xyz-2;Note=Terribly+interesting
+Contig1	confirmed	exon	30001	30100	.	-	.	ID=Transcript:trans-2;Gene=abc-1;Note=function+unknown
+Contig1	confirmed	exon	30701	30800	.	-	.	ID=Transcript:trans-2
+Contig1	confirmed	exon	30801	31000	.	-	.	ID=Transcript:trans-2
+
+## sequence-region Contig2 1 37450
+Contig2	clone	Component	1	2000	.	.	.	Target=Clone:AL12345.1 1 2000 +;Note=Terribly+interesting
+Contig2	clone	Component	2001	5000	.	.	.	Target=Clone:AL11111.1 6000 3001 +
+Contig2	clone	Component	5001	20000	.	.	.	Target=Clone:AC13221.2 1 15000 +
+Contig2	clone	Component	2001	37450	.	.	.	Target=Clone:M7.3 1001 36450 +
+Contig2	predicted	transcript	2501	4500	.	+	.	ID=Transcript:trans-3;Alias=trans-18
+Contig2	predicted	transcript	5001	8001	.	-	.	ID=Transcript:trans-4
+
+
+#processed_transcript
+Contig3	clone	Component	1	50000	.	.	.	ID=Clone:AL12345.2
+Contig3	confirmed	mRNA	32000	35000	.	+	.	ID=mRNA:trans-8
+Contig3	confirmed	UTR	32000	32100	.	+	.	ID=mRNA:trans-8
+Contig3	confirmed	CDS	32101	33000	.	+	.	ID=mRNA:trans-8
+Contig3	confirmed	CDS	34000	34500	.	+	.	ID=mRNA:trans-8
+Contig3	confirmed	CDS	34600	34900	.	+	.	ID=mRNA:trans-8
+Contig3	confirmed	UTR	34901	35000	.	+	.	ID=mRNA:trans-8
+
+## preferred group assignments
+Contig4	clone	Component	1	50000	.	.	.	ID=Clone:ABC123
+Contig4	confirmed	gene	32000	35000	.	+	.	ID=Misc:thing1;gene=gene-9
+Contig4	confirmed	mRNA	32000	35000	.	+	.	ID=Misc:thing2;mRNA=trans-9;gene=gene-9
+Contig4	confirmed	CDS	32000	35000	.	+	.	ID=Misc:thing3;mRNA=trans-9
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.cor
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.cor
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.fpc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.fpc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/biofpc.fpc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3360 @@
+// fpc project demo
+// 8.5.1  Date: 10:54 Fri 18 Aug 2006  User: will
+// Contigs 10  Clones 355  Markers 15  Bands 9772
+// Framework Maize Label Chromosome Abbrev Chr Genome 0 AvgBand 4096  AvgInsert 160000
+// Configure 429 Tol 7 Cut 1e-13 Apx 0.100 Gel 3300 End 15 Kill -1 Bad 15 Best 10 Log 0 Std 1 Page 3000 Match 2 agarose
+// CpM Off 50 1 0 0 TBL 1 1e-12 2 1e-11 3 1e-10
+// Build 10/6/105 15:24 Cut 1e-13 Off 50 1 0 0 TBL 1 1e-12 2 1e-11 3 1e-10 
+// Clip(0 4600) MinMax(0 65535) Precompute UseSeq DQer(5,1)
+// MTP Mndtry 62 MinOlap -5 MaxOlap 10 MinShared 6 Weight 10 MinScore 400 Identity 97 MaxSeqOlap 50000 CtgRatio 3
+// BES_pre 0 BES_suff 1 BES_dot 0
+  
+BAC : "b0297K22"
+Map "ctg9" Ends Left 9.000
+Map "ctg9" Ends Right 32.000 Oldctg 9
+Gel_number    b1297B1
+Bands  4386 24
+Remark "this is a test"
+Shotgun NONE SHOTGUN
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "b0303H17"
+Map "ctg8" Ends Left 27.000
+Map "ctg8" Ends Right 58.000 Oldctg 8
+Gel_number    b8303C1
+Bands  7476 32
+Shotgun NONE TILE
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0306N07"
+Map "ctg2" Ends Left 8.000
+Map "ctg2" Ends Right 36.000 Oldctg 2
+Gel_number    b8306C1
+Bands  7508 29
+Exact_match_to_cosmid "c1074L23"
+Shotgun NONE SENT
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0306N18"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 106.000 Oldctg 3
+Gel_number    b8306D1
+Bands  7537 29
+Approximate_match_to_cosmid "c0186L14"
+Shotgun NONE READY
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0307M15"
+Map "ctg1" Ends Left 36.000
+Map "ctg1" Ends Right 53.000 Oldctg 1
+Gel_number    b1307A1
+Bands  4410 18
+Exact_match_to_cosmid "c1024E06"
+Shotgun NONE CANCELLED
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0316M15"
+Map "ctg2" Ends Left 33.000
+Map "ctg2" Ends Right 60.000 Oldctg 2
+Gel_number    b1316A1
+Bands  4428 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0319B18"
+Map "ctg5" Ends Left 30.000
+Map "ctg5" Ends Right 64.000 Oldctg 5
+Gel_number    b1319D1
+Bands  4456 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0324I14"
+Map "ctg9" Ends Left 7.000
+Map "ctg9" Ends Right 29.000 Oldctg 9
+Gel_number    b1324B1
+Bands  4491 23
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0325O10"
+Map "ctg4" Ends Left 72.000
+Map "ctg4" Ends Right 95.000 Oldctg 4
+Gel_number    b1325B1
+Bands  4514 24
+Exact_match_to_cosmid "H0054L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0326L07"
+Map "ctg9" Ends Left 23.000
+Map "ctg9" Ends Right 47.000 Oldctg 9
+Gel_number    b1326C1
+Bands  4538 25
+Positive_STS "C24" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0327I21"
+Map "ctg7" Ends Left 9.000
+Map "ctg7" Ends Right 29.000 Oldctg 7
+Gel_number    b1327A1
+Bands  4563 21
+Approximate_match_to_cosmid "c1115H20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0332M08"
+Map "ctg1" Ends Left 54.000
+Map "ctg1" Ends Right 89.000 Oldctg 1
+Gel_number    b1332B1
+Bands  4584 36
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0333F08"
+Map "ctg1" Ends Left 1.000
+Map "ctg1" Ends Right 29.000 Oldctg 1
+Gel_number    b1333D1
+Bands  4620 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0334C23"
+Map "ctg4" Ends Left 50.000
+Map "ctg4" Ends Right 72.000 Oldctg 4
+Gel_number    b1334A1
+Bands  4649 23
+Approximate_match_to_cosmid "c1082B18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0336D23"
+Map "ctg7" Ends Left 3.000
+Map "ctg7" Ends Right 26.000 Oldctg 7
+Gel_number    b1336C1
+Bands  4672 24
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0336J22"
+Map "ctg6" Ends Left 62.000
+Map "ctg6" Ends Right 83.000 Oldctg 6
+Gel_number    b1336D1
+Bands  4696 22
+Exact_match_to_cosmid "c1085E05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0340P21"
+Map "ctg3" Ends Left 20.000
+Map "ctg3" Ends Right 39.000 Oldctg 3
+Gel_number    b1340C1
+Bands  4718 20
+Exact_match_to_cosmid "b1584L19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0341J19"
+Map "ctg1" Ends Left 13.000
+Map "ctg1" Ends Right 38.000 Oldctg 1
+Gel_number    b1341C1
+Bands  4776 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0341K22"
+Map "ctg6" Ends Left 71.000
+Map "ctg6" Ends Right 92.000 Oldctg 6
+Gel_number    b1341B1
+Bands  4754 22
+Approximate_match_to_cosmid "c1085E05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0341O09"
+Map "ctg8" Ends Left 11.000
+Map "ctg8" Ends Right 26.000 Oldctg 8
+Gel_number    b1341A1
+Bands  4738 16
+Exact_match_to_cosmid "b1622G13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0343C02"
+Map "ctg2" Ends Left 36.000
+Map "ctg2" Ends Right 68.000 Oldctg 2
+Gel_number    b1343B1
+Bands  4802 33
+Positive_STS "A01" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0343L16"
+Map "ctg7" Ends Left 5.000
+Map "ctg7" Ends Right 23.000 Oldctg 7
+Gel_number    b1343D1
+Bands  4835 19
+Approximate_match_to_cosmid "b0336D23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0347L04"
+Map "ctg7" Ends Left 0.000
+Map "ctg7" Ends Right 19.000 Oldctg 7
+Gel_number    b1347D1
+Bands  4854 20
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0368F18"
+Map "ctg3" Ends Left 61.000
+Map "ctg3" Ends Right 83.000 Oldctg 3
+Gel_number    b1368D1
+Bands  4898 23
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0368G07"
+Map "ctg3" Ends Left 109.000
+Map "ctg3" Ends Right 132.000 Oldctg 3
+Gel_number    b1368A1
+Bands  4874 24
+Approximate_match_to_cosmid "c1065C21"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0372I14"
+Map "ctg4" Ends Left 13.000
+Map "ctg4" Ends Right 42.000 Oldctg 4
+Gel_number    b1372B1
+Bands  4921 30
+Positive_STS "D57" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0376P09"
+Map "ctg9" Ends Left 6.000
+Map "ctg9" Ends Right 26.000 Oldctg 9
+Gel_number    b1376C1
+Bands  4951 21
+Approximate_match_to_cosmid "b0324I14"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0377M02"
+Map "ctg2" Ends Left 69.000
+Map "ctg2" Ends Right 101.000 Oldctg 2
+Gel_number    b1377B1
+Bands  4972 33
+Approximate_match_to_cosmid "H0002F22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0379F02"
+Map "ctg6" Ends Left 26.000
+Map "ctg6" Ends Right 50.000 Oldctg 6
+Gel_number    b1379D1
+Bands  5005 25
+Approximate_match_to_cosmid "H0082F20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0380N03"
+Map "ctg6" Ends Left 8.000
+Map "ctg6" Ends Right 32.000 Oldctg 6
+Gel_number    b1380C1
+Bands  5030 25
+Approximate_match_to_cosmid "c1053I24"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0382H22"
+Map "ctg3" Ends Left 121.000
+Map "ctg3" Ends Right 152.000 Oldctg 3
+Gel_number    b1382D1
+Bands  5055 32
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0382N02"
+Map "ctg6" Ends Left 4.000
+Map "ctg6" Ends Right 35.000 Oldctg 6
+Gel_number    b1382D1
+Bands  5087 32
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0390G23"
+Map "ctg5" Ends Left 14.000
+Map "ctg5" Ends Right 41.000 Oldctg 5
+Gel_number    b1390A1
+Bands  5119 28
+Approximate_match_to_cosmid "H0005K08"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0391G06"
+Map "ctg1" Ends Left 53.000
+Map "ctg1" Ends Right 86.000 Oldctg 1
+Gel_number    b1391B1
+Bands  5147 34
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0393C16"
+Map "ctg2" Ends Left 72.000
+Map "ctg2" Ends Right 101.000 Oldctg 2
+Gel_number    b1393B1
+Bands  5181 30
+Approximate_match_to_cosmid "H0002F22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0398K02"
+Map "ctg8" Ends Left 12.000
+Map "ctg8" Ends Right 22.000 Oldctg 8
+Gel_number    b1398B1
+Bands  5211 11
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0398L18"
+Map "ctg3" Ends Left 76.000
+Map "ctg3" Ends Right 97.000 Oldctg 3
+Gel_number    b1398D1
+Bands  5222 22
+Approximate_match_to_cosmid "c0186L14"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0399D10"
+Map "ctg3" Ends Left 55.000
+Map "ctg3" Ends Right 77.000 Oldctg 3
+Gel_number    b1399D1
+Bands  5244 23
+Approximate_match_to_cosmid "H0006G12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0406I02"
+Map "ctg6" Ends Left 71.000
+Map "ctg6" Ends Right 93.000 Oldctg 6
+Gel_number    b1406B1
+Bands  5288 23
+Exact_match_to_cosmid "c1085E05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0406I15"
+Map "ctg3" Ends Left 29.000
+Map "ctg3" Ends Right 49.000 Oldctg 3
+Gel_number    b1406A1
+Bands  5267 21
+Approximate_match_to_cosmid "b1246P23"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0407D07"
+Map "ctg2" Ends Left 32.000
+Map "ctg2" Ends Right 52.000 Oldctg 2
+Gel_number    b1407C1
+Bands  5311 21
+Exact_match_to_cosmid "b1522B13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0411C10"
+Map "ctg10" Ends Left 8.000
+Map "ctg10" Ends Right 30.000 Oldctg 10
+Gel_number    b1411B1
+Bands  5332 23
+Approximate_match_to_cosmid "H0201H09"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0412E22"
+Map "ctg8" Ends Left 47.000
+Map "ctg8" Ends Right 71.000 Oldctg 8
+Gel_number    b1412B1
+Bands  5355 25
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0419N02"
+Map "ctg3" Ends Left 49.000
+Map "ctg3" Ends Right 71.000 Oldctg 3
+Gel_number    b1419D1
+Bands  5380 23
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b0435L06"
+Map "ctg4" Ends Left 80.000
+Map "ctg4" Ends Right 107.000 Oldctg 4
+Gel_number    b1435D1
+Bands  9538 28
+Approximate_match_to_cosmid "c1050F12"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "b0439K14"
+Map "ctg5" Ends Left 35.000
+Map "ctg5" Ends Right 62.000 Oldctg 5
+Gel_number    b1439B1
+Bands  5403 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0441C07"
+Map "ctg9" Ends Left 35.000
+Map "ctg9" Ends Right 55.000 Oldctg 9
+Gel_number    b1441A1
+Bands  5431 21
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0443N08"
+Gel_number    b1443D1
+Bands  5452 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "b0447G07"
+Map "ctg9" Ends Left 20.000
+Map "ctg9" Ends Right 41.000 Oldctg 9
+Gel_number    b1447A1
+Bands  5482 22
+Positive_STS "C24" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0449B24"
+Map "ctg1" Ends Left 69.000
+Map "ctg1" Ends Right 92.000 Oldctg 1
+Gel_number    b1449D1
+Bands  5504 24
+Approximate_match_to_cosmid "c1032M05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b0452G20"
+Map "ctg2" Ends Left 57.000
+Map "ctg2" Ends Right 85.000 Oldctg 2
+Gel_number    b1452B1
+Bands  5528 29
+Positive_STS "A01" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b0461E16"
+Map "ctg6" Ends Left 19.000
+Map "ctg6" Ends Right 41.000 Oldctg 6
+Gel_number    b1461B1
+Bands  5557 23
+Approximate_match_to_cosmid "b1531L10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1004I23"
+Map "ctg3" Ends Left 95.000
+Map "ctg3" Ends Right 124.000 Oldctg 3
+Gel_number    b1004A1
+Bands  3756 30
+Approximate_match_to_cosmid "c1098L12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1046O08"
+Map "ctg1" Ends Left 60.000
+Map "ctg1" Ends Right 94.000 Oldctg 1
+Gel_number    b1046B1
+Bands  3786 35
+Positive_STS "J9" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1075H04"
+Map "ctg1" Ends Left 0.000
+Map "ctg1" Ends Right 29.000 Oldctg 1
+Gel_number    b1075D1
+Bands  3821 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1075N16"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 72.000 Oldctg 3
+Gel_number    b1075D1
+Bands  3851 21
+Exact_match_to_cosmid "b1503B23"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1081C22"
+Map "ctg6" Ends Left 45.000
+Map "ctg6" Ends Right 73.000 Oldctg 6
+Gel_number    b1081B1
+Bands  3872 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1083O09"
+Map "ctg3" Ends Left 121.000
+Map "ctg3" Ends Right 150.000 Oldctg 3
+Gel_number    b1083A1
+Bands  3901 30
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1085K18"
+Map "ctg3" Ends Left 120.000
+Map "ctg3" Ends Right 137.000 Oldctg 3
+Gel_number    b1085B1
+Bands  3931 18
+Exact_match_to_cosmid "c1065C21"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1101J03"
+Map "ctg2" Ends Left 14.000
+Map "ctg2" Ends Right 51.000 Oldctg 2
+Gel_number    b1101C1
+Bands  3949 38
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1103E10"
+Map "ctg9" Ends Left 7.000
+Map "ctg9" Ends Right 29.000 Oldctg 9
+Gel_number    b1103B1
+Bands  3987 23
+Exact_match_to_cosmid "b0324I14"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1153C20"
+Map "ctg1" Ends Left 77.000
+Map "ctg1" Ends Right 105.000 Oldctg 1
+Gel_number    b1153B1
+Bands  9509 29
+Approximate_match_to_cosmid "c1032M05"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "b1154L07"
+Map "ctg6" Ends Left 4.000
+Map "ctg6" Ends Right 32.000 Oldctg 6
+Gel_number    b1154C1
+Bands  4010 29
+Exact_match_to_cosmid "c1053I24"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1158P05"
+Map "ctg5" Ends Left 14.000
+Map "ctg5" Ends Right 47.000 Oldctg 5
+Gel_number    b1158C1
+Bands  4039 34
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1177F07"
+Map "ctg1" Ends Left 47.000
+Map "ctg1" Ends Right 75.000 Oldctg 1
+Gel_number    b1177C1
+Bands  4073 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1245K13"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 70.000 Oldctg 3
+Gel_number    b1245A1
+Bands  4102 19
+Exact_match_to_cosmid "H0006G12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1246P23"
+Map "ctg3" Ends Left 30.000
+Map "ctg3" Ends Right 52.000 Oldctg 3
+Gel_number    b1246C1
+Bands  4121 23
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1257O16"
+Map "ctg8" Ends Left 33.000
+Map "ctg8" Ends Right 69.000 Oldctg 8
+Gel_number    b1257B1
+Bands  4144 37
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1259M17"
+Map "ctg7" Ends Left 10.000
+Map "ctg7" Ends Right 30.000 Oldctg 7
+Gel_number    b1259A1
+Bands  4181 21
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1264H12"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 78.000 Oldctg 3
+Gel_number    b1264D1
+Bands  4202 27
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1264N04"
+Map "ctg6" Ends Left 26.000
+Map "ctg6" Ends Right 50.000 Oldctg 6
+Gel_number    b1264D1
+Bands  4229 25
+Positive_Locus "J5" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1266K23"
+Map "ctg4" Ends Left 62.000
+Map "ctg4" Ends Right 81.000 Oldctg 4
+Gel_number    b1266A1
+Bands  4254 20
+Exact_match_to_cosmid "c1082E16"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1274A11"
+Map "ctg3" Ends Left 63.000
+Map "ctg3" Ends Right 88.000 Oldctg 3
+Gel_number    b1274A1
+Bands  4303 26
+Approximate_match_to_cosmid "H0198H02"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1274M03"
+Map "ctg3" Ends Left 138.000
+Map "ctg3" Ends Right 166.000 Oldctg 3
+Gel_number    b1274A1
+Bands  4274 29
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1282L07"
+Map "ctg2" Ends Left 28.000
+Map "ctg2" Ends Right 56.000 Oldctg 2
+Gel_number    b1282C1
+Bands  4329 29
+Approximate_match_to_cosmid "b1522B13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1285J21"
+Map "ctg4" Ends Left 59.000
+Map "ctg4" Ends Right 86.000 Oldctg 4
+Gel_number    b1285C1
+Bands  4358 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1468I11"
+Map "ctg1" Ends Left 39.000
+Map "ctg1" Ends Right 74.000 Oldctg 1
+Gel_number    b1468A1
+Bands  5580 36
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1477K21"
+Map "ctg7" Ends Left 14.000
+Map "ctg7" Ends Right 28.000 Oldctg 7
+Gel_number    b1477A1
+Bands  5616 15
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1478J02"
+Map "ctg9" Ends Left 12.000
+Map "ctg9" Ends Right 33.000 Oldctg 9
+Gel_number    b1478D1
+Bands  5631 22
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1482G15"
+Map "ctg6" Ends Left 50.000
+Map "ctg6" Ends Right 75.000 Oldctg 6
+Gel_number    b1482A1
+Bands  5653 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1482I12"
+Map "ctg6" Ends Left 29.000
+Map "ctg6" Ends Right 50.000 Oldctg 6
+Gel_number    b1482B1
+Bands  5679 22
+Approximate_match_to_cosmid "H0082F20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1494B06"
+Map "ctg3" Ends Left 23.000
+Map "ctg3" Ends Right 44.000 Oldctg 3
+Gel_number    b1494D1
+Bands  5701 22
+Approximate_match_to_cosmid "b1584L19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1498E10"
+Map "ctg3" Ends Left 125.000
+Map "ctg3" Ends Right 152.000 Oldctg 3
+Gel_number    b1498B1
+Bands  5723 28
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1502M06"
+Map "ctg8" Ends Left 7.000
+Map "ctg8" Ends Right 24.000 Oldctg 8
+Gel_number    b1502B1
+Bands  5751 18
+Exact_match_to_cosmid "c0021L10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1503B23"
+Map "ctg3" Ends Left 50.000
+Map "ctg3" Ends Right 76.000 Oldctg 3
+Gel_number    b1503C1
+Bands  5769 27
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1503L05"
+Map "ctg2" Ends Left 61.000
+Map "ctg2" Ends Right 87.000 Oldctg 2
+Gel_number    b1503C1
+Bands  5796 27
+Exact_match_to_cosmid "b1588O10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1505H15"
+Map "ctg6" Ends Left 38.000
+Map "ctg6" Ends Right 66.000 Oldctg 6
+Gel_number    b1505C1
+Bands  5823 29
+Approximate_match_to_cosmid "c0108C20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1509B02"
+Map "ctg4" Ends Left 60.000
+Map "ctg4" Ends Right 90.000 Oldctg 4
+Gel_number    b1509D1
+Bands  5852 31
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1511G13"
+Map "ctg4" Ends Left 79.000
+Map "ctg4" Ends Right 104.000 Oldctg 4
+Gel_number    b1511A1
+Bands  9566 26
+Approximate_match_to_cosmid "c1050F12"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "b1512M05"
+Map "ctg9" Ends Left 13.000
+Map "ctg9" Ends Right 40.000 Oldctg 9
+Gel_number    b1512A1
+Bands  5883 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1514A05"
+Map "ctg2" Ends Left 20.000
+Map "ctg2" Ends Right 50.000 Oldctg 2
+Gel_number    b1514A1
+Bands  5911 31
+Exact_match_to_cosmid "b1566O10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1520O04"
+Map "ctg6" Ends Left 19.000
+Map "ctg6" Ends Right 40.000 Oldctg 6
+Gel_number    b1520B1
+Bands  5942 22
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1522B13"
+Map "ctg2" Ends Left 22.000
+Map "ctg2" Ends Right 56.000 Oldctg 2
+Gel_number    b1522C1
+Bands  5991 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1522D12"
+Map "ctg10" Ends Left 7.000
+Map "ctg10" Ends Right 30.000 Oldctg 10
+Gel_number    b1522D1
+Bands  6048 24
+Approximate_match_to_cosmid "H0201H09"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1522J10"
+Map "ctg3" Ends Left 64.000
+Map "ctg3" Ends Right 85.000 Oldctg 3
+Gel_number    b1522D1
+Bands  6026 22
+Positive_STS "B13" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1522J13"
+Map "ctg1" Ends Left 13.000
+Map "ctg1" Ends Right 39.000 Oldctg 1
+Gel_number    b1522C1
+Bands  5964 27
+Approximate_match_to_cosmid "c1086K04"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1529D11"
+Map "ctg3" Ends Left 127.000
+Map "ctg3" Ends Right 149.000 Oldctg 3
+Gel_number    b1529C1
+Bands  6072 23
+Approximate_match_to_cosmid "b0382H22"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1529P11"
+Map "ctg2" Ends Left 56.000
+Map "ctg2" Ends Right 82.000 Oldctg 2
+Gel_number    b1529C1
+Bands  6095 27
+Exact_match_to_cosmid "b1588O10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1531L10"
+Map "ctg6" Ends Left 18.000
+Map "ctg6" Ends Right 42.000 Oldctg 6
+Gel_number    b1531D1
+Bands  6122 25
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1537B07"
+Map "ctg2" Ends Left 39.000
+Map "ctg2" Ends Right 73.000 Oldctg 2
+Gel_number    b1537C1
+Bands  6182 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1537E21"
+Map "ctg5" Ends Left 25.000
+Map "ctg5" Ends Right 59.000 Oldctg 5
+Gel_number    b1537A1
+Bands  6147 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1540A14"
+Map "ctg2" Ends Left 19.000
+Map "ctg2" Ends Right 44.000 Oldctg 2
+Gel_number    b1540B1
+Bands  6244 26
+Exact_match_to_cosmid "b1101J03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1540M20"
+Map "ctg1" Ends Left 1.000
+Map "ctg1" Ends Right 27.000 Oldctg 1
+Gel_number    b1540B1
+Bands  6217 27
+Approximate_match_to_cosmid "b1075H04"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1548D16"
+Map "ctg1" Ends Left 6.000
+Map "ctg1" Ends Right 28.000 Oldctg 1
+Gel_number    b1548D1
+Bands  6270 23
+Exact_match_to_cosmid "b1075H04"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1557J13"
+Map "ctg2" Ends Left 68.000
+Map "ctg2" Ends Right 96.000 Oldctg 2
+Gel_number    b1557C1
+Bands  6293 29
+Approximate_match_to_cosmid "H0002F22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1566O10"
+Map "ctg2" Ends Left 18.000
+Map "ctg2" Ends Right 53.000 Oldctg 2
+Gel_number    b1566B1
+Bands  6322 36
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1567H22"
+Map "ctg8" Ends Left 24.000
+Map "ctg8" Ends Right 53.000 Oldctg 8
+Gel_number    b1567D1
+Bands  6358 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1568O15"
+Map "ctg3" Ends Left 77.000
+Map "ctg3" Ends Right 95.000 Oldctg 3
+Gel_number    b1568A1
+Bands  6388 19
+Approximate_match_to_cosmid "c1041H02"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1570G15"
+Map "ctg4" Ends Left 61.000
+Map "ctg4" Ends Right 81.000 Oldctg 4
+Gel_number    b1570A1
+Bands  6407 21
+Exact_match_to_cosmid "c1082E16"
+Positive_STS "D58" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1573O12"
+Map "ctg3" Ends Left 97.000
+Map "ctg3" Ends Right 124.000 Oldctg 3
+Gel_number    b1573B1
+Bands  6428 28
+Approximate_match_to_cosmid "c1098L12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1577M16"
+Map "ctg3" Ends Left 129.000
+Map "ctg3" Ends Right 159.000 Oldctg 3
+Gel_number    b1577B1
+Bands  6456 31
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1584L19"
+Map "ctg3" Ends Left 17.000
+Map "ctg3" Ends Right 44.000 Oldctg 3
+Gel_number    b1584C1
+Bands  6487 28
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1585M07"
+Map "ctg1" Ends Left 57.000
+Map "ctg1" Ends Right 85.000 Oldctg 1
+Gel_number    b1585A1
+Bands  6515 29
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1586A22"
+Map "ctg2" Ends Left 68.000
+Map "ctg2" Ends Right 95.000 Oldctg 2
+Gel_number    b1586B1
+Bands  6544 28
+Approximate_match_to_cosmid "H0002F22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1588H16"
+Map "ctg10" Ends Left 0.000
+Map "ctg10" Ends Right 22.000 Oldctg 10
+Gel_number    b1588D1
+Bands  6626 23
+Positive_STS "A10" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1588K02"
+Map "ctg2" Ends Left 70.000
+Map "ctg2" Ends Right 89.000 Oldctg 2
+Gel_number    b1588B1
+Bands  6606 20
+Exact_match_to_cosmid "H0016J06"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1588O10"
+Map "ctg2" Ends Left 56.000
+Map "ctg2" Ends Right 89.000 Oldctg 2
+Gel_number    b1588B1
+Bands  6572 34
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1593C11"
+Map "ctg2" Ends Left 68.000
+Map "ctg2" Ends Right 89.000 Oldctg 2
+Gel_number    b1593A1
+Bands  6649 22
+Exact_match_to_cosmid "H0276H10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1596H05"
+Map "ctg8" Ends Left 16.000
+Map "ctg8" Ends Right 42.000 Oldctg 8
+Gel_number    b1596C1
+Bands  6671 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1599A19"
+Map "ctg4" Ends Left 57.000
+Map "ctg4" Ends Right 80.000 Oldctg 4
+Gel_number    b1599A1
+Bands  6727 24
+Approximate_match_to_cosmid "c1082E16"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1599K15"
+Map "ctg4" Ends Left 23.000
+Map "ctg4" Ends Right 51.000 Oldctg 4
+Gel_number    b1599A1
+Bands  6698 29
+Approximate_match_to_cosmid "H0143J16"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1601G04"
+Map "ctg1" Ends Left 36.000
+Map "ctg1" Ends Right 53.000 Oldctg 1
+Gel_number    b1601B1
+Bands  6751 18
+Exact_match_to_cosmid "c1024E06"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1601H13"
+Map "ctg5" Ends Left 44.000
+Map "ctg5" Ends Right 65.000 Oldctg 5
+Gel_number    b1601C1
+Bands  6797 22
+Approximate_match_to_cosmid "b1601H14"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1601H14"
+Map "ctg5" Ends Left 41.000
+Map "ctg5" Ends Right 70.000 Oldctg 5
+Gel_number    b1601D1
+Bands  6819 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1601I10"
+Map "ctg6" Ends Left 15.000
+Map "ctg6" Ends Right 42.000 Oldctg 6
+Gel_number    b1601B1
+Bands  6769 28
+Positive_Locus "J5" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1604O19"
+Map "ctg3" Ends Left 53.000
+Map "ctg3" Ends Right 86.000 Oldctg 3
+Gel_number    b1604A1
+Bands  6849 34
+Positive_STS "B13" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1607F08"
+Map "ctg2" Ends Left 61.000
+Map "ctg2" Ends Right 90.000 Oldctg 2
+Gel_number    b1607D1
+Bands  6909 30
+Positive_STS "A01" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1607I12"
+Map "ctg8" Ends Left 21.000
+Map "ctg8" Ends Right 46.000 Oldctg 8
+Gel_number    b1607B1
+Bands  6883 26
+Approximate_match_to_cosmid "H0186L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1610I15"
+Map "ctg3" Ends Left 53.000
+Map "ctg3" Ends Right 75.000 Oldctg 3
+Gel_number    b1610A1
+Bands  6939 23
+Exact_match_to_cosmid "H0006G12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1613A17"
+Map "ctg6" Ends Left 6.000
+Map "ctg6" Ends Right 32.000 Oldctg 6
+Gel_number    b1613A1
+Bands  6962 27
+Approximate_match_to_cosmid "c1053I24"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1613P20"
+Map "ctg4" Ends Left 48.000
+Map "ctg4" Ends Right 71.000 Oldctg 4
+Gel_number    b1613D1
+Bands  6989 24
+Positive_STS "D58" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1622G13"
+Map "ctg8" Ends Left 11.000
+Map "ctg8" Ends Right 32.000 Oldctg 8
+Gel_number    b1622A1
+Bands  7013 22
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1626B19"
+Map "ctg6" Ends Left 45.000
+Map "ctg6" Ends Right 70.000 Oldctg 6
+Gel_number    b1626C1
+Bands  7060 26
+Approximate_match_to_cosmid "c1037B24"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1626J19"
+Map "ctg6" Ends Left 46.000
+Map "ctg6" Ends Right 70.000 Oldctg 6
+Gel_number    b1626C1
+Bands  7035 25
+Exact_match_to_cosmid "c0108C20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1628C03"
+Map "ctg8" Ends Left 0.000
+Map "ctg8" Ends Right 22.000 Oldctg 8
+Gel_number    b1628A1
+Bands  7086 23
+Positive_STS "A05" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1628F14"
+Map "ctg9" Ends Left 18.000
+Map "ctg9" Ends Right 50.000 Oldctg 9
+Gel_number    b1628D1
+Bands  7129 33
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1628J23"
+Gel_number    b1628C1
+Bands  7109 20
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "b1630L22"
+Map "ctg3" Ends Left 125.000
+Map "ctg3" Ends Right 152.000 Oldctg 3
+Gel_number    b1630D1
+Bands  7162 28
+Approximate_match_to_cosmid "b1498E10"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1631H15"
+Map "ctg1" Ends Left 6.000
+Map "ctg1" Ends Right 29.000 Oldctg 1
+Gel_number    b1631C1
+Bands  7190 24
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1632J14"
+Map "ctg8" Ends Left 66.000
+Map "ctg8" Ends Right 92.000 Oldctg 8
+Gel_number    b1632D1
+Bands  7214 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1633E09"
+Map "ctg2" Ends Left 54.000
+Map "ctg2" Ends Right 81.000 Oldctg 2
+Gel_number    b1633A1
+Bands  7241 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1633J03"
+Map "ctg1" Ends Left 29.000
+Map "ctg1" Ends Right 49.000 Oldctg 1
+Gel_number    b1633C1
+Bands  7269 21
+Approximate_match_to_cosmid "c1024E06"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1634P04"
+Map "ctg6" Ends Left 21.000
+Map "ctg6" Ends Right 42.000 Oldctg 6
+Gel_number    b1634D1
+Bands  7290 22
+Positive_Locus "J5" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1642H04"
+Map "ctg1" Ends Left 63.000
+Map "ctg1" Ends Right 90.000 Oldctg 1
+Gel_number    b1642D1
+Bands  7312 28
+Exact_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b1644B11"
+Map "ctg3" Ends Left 51.000
+Map "ctg3" Ends Right 84.000 Oldctg 3
+Gel_number    b1644C1
+Bands  7340 34
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "b1644D20"
+Map "ctg4" Ends Left 20.000
+Map "ctg4" Ends Right 44.000 Oldctg 4
+Gel_number    b1644D1
+Bands  7374 25
+Positive_STS "D57" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "b1645E04"
+Map "ctg8" Ends Left 11.000
+Map "ctg8" Ends Right 30.000 Oldctg 8
+Gel_number    b1645B1
+Bands  7399 20
+Exact_match_to_cosmid "b1622G13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b3299K19"
+Map "ctg1" Ends Left 80.000
+Map "ctg1" Ends Right 106.000 Oldctg 1
+Gel_number    b3299A1
+Bands  9592 27
+Approximate_match_to_cosmid "c1032M05"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "b4305E09"
+Map "ctg6" Ends Left 29.000
+Map "ctg6" Ends Right 49.000 Oldctg 6
+Gel_number    b4305A1
+Bands  7419 21
+Exact_match_to_cosmid "H0082F20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "b6294A03"
+Map "ctg6" Ends Left 38.000
+Map "ctg6" Ends Right 73.000 Oldctg 6
+Gel_number    b6294A1
+Bands  7440 36
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0021L10"
+Map "ctg8" Ends Left 1.000
+Map "ctg8" Ends Right 31.000 Oldctg 8
+Gel_number    c0021D1
+Bands  7566 31
+Positive_STS "A05" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c0071G24"
+Map "ctg3" Ends Left 102.000
+Map "ctg3" Ends Right 130.000 Oldctg 3
+Gel_number    c0071B1
+Bands  7597 29
+Approximate_match_to_cosmid "c1098L12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c0072H16"
+Map "ctg3" Ends Left 98.000
+Map "ctg3" Ends Right 134.000 Oldctg 3
+Gel_number    c0072D1
+Bands  7626 37
+Approximate_match_to_cosmid "c1098L12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c0085G04"
+Map "ctg6" Ends Left 24.000
+Map "ctg6" Ends Right 81.000 Oldctg 6
+Gel_number    c1085B1
+Bands  8792 58
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0105F22"
+Map "ctg3" Ends Left 41.000
+Map "ctg3" Ends Right 71.000 Oldctg 3
+Gel_number    c1105D1
+Bands  9162 31
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c0105P24"
+Map "ctg1" Ends Left 24.000
+Map "ctg1" Ends Right 61.000 Oldctg 1
+Gel_number    c1105D1
+Bands  9124 38
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0107A18"
+Map "ctg1" Ends Left 18.000
+Map "ctg1" Ends Right 62.000 Oldctg 1
+Gel_number    c1107B1
+Bands  9193 45
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0108C20"
+Map "ctg6" Ends Left 33.000
+Map "ctg6" Ends Right 73.000 Oldctg 6
+Gel_number    c1108B1
+Bands  9238 41
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0109A18"
+Map "ctg6" Ends Left 30.000
+Map "ctg6" Ends Right 66.000 Oldctg 6
+Gel_number    c1109B1
+Bands  9314 37
+Positive_STS "J130" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c0109L22"
+Map "ctg8" Ends Left 30.000
+Map "ctg8" Ends Right 62.000 Oldctg 8
+Gel_number    c1109D1
+Bands  9351 33
+Approximate_match_to_cosmid "c0109M22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0109M22"
+Map "ctg8" Ends Left 30.000
+Map "ctg8" Ends Right 64.000 Oldctg 8
+Gel_number    c1109B1
+Bands  9279 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c0186L14"
+Map "ctg3" Ends Left 70.000
+Map "ctg3" Ends Right 112.000 Oldctg 3
+Gel_number    c0186D1
+Bands  7663 43
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1010O04"
+Map "ctg3" Ends Left 97.000
+Map "ctg3" Ends Right 133.000 Oldctg 3
+Gel_number    c1010B1
+Bands  7706 37
+Positive_STS "B01" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1017J12"
+Map "ctg1" Ends Left 48.000
+Map "ctg1" Ends Right 94.000 Oldctg 1
+Gel_number    c1017D1
+Bands  7743 47
+Positive_STS "J9" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1021A24"
+Map "ctg2" Ends Left 40.000
+Map "ctg2" Ends Right 77.000 Oldctg 2
+Gel_number    c1021B1
+Bands  7790 38
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1024C16"
+Map "ctg1" Ends Left 29.000
+Map "ctg1" Ends Right 70.000 Oldctg 1
+Gel_number    c1024B1
+Bands  7870 42
+Approximate_match_to_cosmid "c1024E06"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1024E06"
+Map "ctg1" Ends Left 34.000
+Map "ctg1" Ends Right 75.000 Oldctg 1
+Gel_number    c1024B1
+Bands  7828 42
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1025O11"
+Map "ctg3" Ends Left 7.000
+Map "ctg3" Ends Right 42.000 Oldctg 3
+Gel_number    c1025A1
+Bands  7912 36
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1031I14"
+Map "ctg3" Ends Left 0.000
+Map "ctg3" Ends Right 30.000 Oldctg 3
+Gel_number    c1031B1
+Bands  7948 31
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1032I23"
+Map "ctg5" Ends Left 15.000
+Map "ctg5" Ends Right 56.000 Oldctg 5
+Gel_number    c1032A1
+Bands  9658 42
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "c1032M05"
+Map "ctg1" Ends Left 68.000
+Map "ctg1" Ends Right 106.000 Oldctg 1
+Gel_number    c1032A1
+Bands  9619 39
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "c1032M10"
+Gel_number    c1032B1
+Bands  7979 35
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "c1036J07"
+Map "ctg10" Ends Left 8.000
+Map "ctg10" Ends Right 32.000 Oldctg 10
+Gel_number    c1036C1
+Bands  8014 25
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1037B24"
+Map "ctg6" Ends Left 33.000
+Map "ctg6" Ends Right 73.000 Oldctg 6
+Gel_number    c1037D1
+Bands  8039 41
+Positive_STS "J130" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1041H02"
+Map "ctg3" Ends Left 80.000
+Map "ctg3" Ends Right 115.000 Oldctg 3
+Gel_number    c1041D1
+Bands  8080 36
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1042M12"
+Map "ctg1" Ends Left 27.000
+Map "ctg1" Ends Right 53.000 Oldctg 1
+Gel_number    c1042B1
+Bands  8116 27
+Approximate_match_to_cosmid "c0107A18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1043L12"
+Map "ctg5" Ends Left 24.000
+Map "ctg5" Ends Right 63.000 Oldctg 5
+Gel_number    c1043D1
+Bands  8143 40
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1047E05"
+Map "ctg9" Ends Left 28.000
+Map "ctg9" Ends Right 52.000 Oldctg 9
+Gel_number    c1047A1
+Bands  8183 25
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1050F12"
+Map "ctg4" Ends Left 73.000
+Map "ctg4" Ends Right 110.000 Oldctg 4
+Gel_number    c1050D1
+Bands  9735 38
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "c1050J12"
+Map "ctg4" Ends Left 75.000
+Map "ctg4" Ends Right 109.000 Oldctg 4
+Gel_number    c1050D1
+Bands  9700 35
+Approximate_match_to_cosmid "c1050F12"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "c1052J13"
+Map "ctg6" Ends Left 33.000
+Map "ctg6" Ends Right 72.000 Oldctg 6
+Gel_number    c1052C1
+Bands  8208 40
+Positive_STS "J130" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1053I24"
+Map "ctg6" Ends Left 0.000
+Map "ctg6" Ends Right 36.000 Oldctg 6
+Gel_number    c1053B1
+Bands  8248 37
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1056C07"
+Map "ctg3" Ends Left 2.000
+Map "ctg3" Ends Right 35.000 Oldctg 3
+Gel_number    c1056A1
+Bands  8285 34
+Approximate_match_to_cosmid "c1096G11"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1058K17"
+Map "ctg3" Ends Left 43.000
+Map "ctg3" Ends Right 70.000 Oldctg 3
+Gel_number    c1058A1
+Bands  8319 28
+Approximate_match_to_cosmid "c0105F22"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1060H12"
+Map "ctg8" Ends Left 12.000
+Map "ctg8" Ends Right 32.000 Oldctg 8
+Gel_number    c1060D1
+Bands  8365 21
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1060I02"
+Map "ctg8" Ends Left 13.000
+Map "ctg8" Ends Right 30.000 Oldctg 8
+Gel_number    c1060B1
+Bands  8347 18
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1063E08"
+Map "ctg2" Ends Left 43.000
+Map "ctg2" Ends Right 80.000 Oldctg 2
+Gel_number    c1063B1
+Bands  8386 38
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1065C21"
+Map "ctg3" Ends Left 110.000
+Map "ctg3" Ends Right 143.000 Oldctg 3
+Gel_number    c1065A1
+Bands  8424 34
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1067H04"
+Map "ctg6" Ends Left 3.000
+Map "ctg6" Ends Right 32.000 Oldctg 6
+Gel_number    c1067D1
+Bands  8458 30
+Exact_match_to_cosmid "c1053I24"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1074L23"
+Map "ctg2" Ends Left 4.000
+Map "ctg2" Ends Right 45.000 Oldctg 2
+Gel_number    c1074C1
+Bands  8529 42
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1074P15"
+Map "ctg2" Ends Left 0.000
+Map "ctg2" Ends Right 40.000 Oldctg 2
+Gel_number    c1074C1
+Bands  8488 41
+Positive_STS "A05" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1082B18"
+Map "ctg4" Ends Left 42.000
+Map "ctg4" Ends Right 81.000 Oldctg 4
+Gel_number    c1082D1
+Bands  8681 40
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1082E16"
+Map "ctg4" Ends Left 44.000
+Map "ctg4" Ends Right 81.000 Oldctg 4
+Gel_number    c1082B1
+Bands  8571 38
+Positive_STS "D58" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "c1082F03"
+Map "ctg3" Ends Left 96.000
+Map "ctg3" Ends Right 132.000 Oldctg 3
+Gel_number    c1082C1
+Bands  8609 37
+Positive_STS "B01" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1082F16"
+Map "ctg4" Ends Left 46.000
+Map "ctg4" Ends Right 80.000 Oldctg 4
+Gel_number    c1082D1
+Bands  8646 35
+Exact_match_to_cosmid "c1082B18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1083A18"
+Map "ctg3" Ends Left 1.000
+Map "ctg3" Ends Right 34.000 Oldctg 3
+Gel_number    c1083B1
+Bands  8721 34
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1085E05"
+Map "ctg6" Ends Left 57.000
+Map "ctg6" Ends Right 93.000 Oldctg 6
+Gel_number    c1085A1
+Bands  8755 37
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1086K04"
+Map "ctg1" Ends Left 11.000
+Map "ctg1" Ends Right 46.000 Oldctg 1
+Gel_number    c1086B1
+Bands  8850 36
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1091I04"
+Gel_number    c1091B1
+Bands  8886 49
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "c1096G11"
+Map "ctg3" Ends Left 0.000
+Map "ctg3" Ends Right 35.000 Oldctg 3
+Gel_number    c1096A1
+Bands  8935 36
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1098F02"
+Map "ctg9" Ends Left 20.000
+Map "ctg9" Ends Right 61.000 Oldctg 9
+Gel_number    c1098D1
+Bands  9010 42
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1098L12"
+Map "ctg3" Ends Left 96.000
+Map "ctg3" Ends Right 134.000 Oldctg 3
+Gel_number    c1098D1
+Bands  8971 39
+Positive_STS "B01" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1100K19"
+Map "ctg3" Ends Left 37.000
+Map "ctg3" Ends Right 63.000 Oldctg 3
+Gel_number    c1100A1
+Bands  9097 27
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "c1100K23"
+Map "ctg5" Ends Left 17.000
+Map "ctg5" Ends Right 61.000 Oldctg 5
+Gel_number    c1100A1
+Bands  9052 45
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1114F11"
+Map "ctg8" Ends Left 44.000
+Map "ctg8" Ends Right 85.000 Oldctg 8
+Gel_number    c1114C1
+Bands  9384 42
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "c1115H20"
+Map "ctg7" Ends Left 1.000
+Map "ctg7" Ends Right 31.000 Oldctg 7
+Gel_number    c1115D1
+Bands  9426 31
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0002F22"
+Map "ctg2" Ends Left 66.000
+Map "ctg2" Ends Right 104.000 Oldctg 2
+Gel_number    6002D1
+Bands  1255 39
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0003F14"
+Map "ctg6" Ends Left 21.000
+Map "ctg6" Ends Right 41.000 Oldctg 6
+Gel_number    3003D1
+Bands  438 21
+Approximate_match_to_cosmid "H0107A17"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0003F17"
+Map "ctg5" Ends Left 32.000
+Map "ctg5" Ends Right 57.000 Oldctg 5
+Gel_number    3003C1
+Bands  412 26
+Approximate_match_to_cosmid "c1100K23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0005K08"
+Map "ctg5" Ends Left 15.000
+Map "ctg5" Ends Right 48.000 Oldctg 5
+Gel_number    4005B1
+Bands  603 34
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0005N23"
+Map "ctg3" Ends Left 139.000
+Map "ctg3" Ends Right 166.000 Oldctg 3
+Gel_number    3005C1
+Bands  459 28
+Positive_STS "A10" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0006G12"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 77.000 Oldctg 3
+Gel_number    6006B1
+Bands  1294 26
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0012B12"
+Map "ctg3" Ends Left 20.000
+Map "ctg3" Ends Right 34.000 Oldctg 3
+Gel_number    6012D1
+Bands  1320 15
+Exact_match_to_cosmid "b1584L19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0013D12"
+Map "ctg3" Ends Left 44.000
+Map "ctg3" Ends Right 64.000 Oldctg 3
+Gel_number    8013D1
+Bands  2911 21
+Approximate_match_to_cosmid "c0105F22"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0016J06"
+Map "ctg2" Ends Left 67.000
+Map "ctg2" Ends Right 96.000 Oldctg 2
+Gel_number    7016D1
+Bands  2429 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0018F10"
+Map "ctg4" Ends Left 43.000
+Map "ctg4" Ends Right 75.000 Oldctg 4
+Gel_number    4018D1
+Bands  637 33
+Approximate_match_to_cosmid "c1082B18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0019P09"
+Gel_number    6019C1
+Bands  1335 32
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "H0020I07"
+Map "ctg3" Ends Left 121.000
+Map "ctg3" Ends Right 138.000 Oldctg 3
+Gel_number    4020A1
+Bands  670 18
+Exact_match_to_cosmid "c1065C21"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0022F06"
+Map "ctg2" Ends Left 46.000
+Map "ctg2" Ends Right 74.000 Oldctg 2
+Gel_number    6022D1
+Bands  1367 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0024E12"
+Map "ctg3" Ends Left 14.000
+Map "ctg3" Ends Right 32.000 Oldctg 3
+Gel_number    6024B1
+Bands  1396 19
+Exact_match_to_cosmid "H0087N20"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0029H15"
+Map "ctg10" Ends Left 1.000
+Map "ctg10" Ends Right 23.000 Oldctg 10
+Gel_number    4029B1
+Bands  688 23
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0029M07"
+Map "ctg1" Ends Left 29.000
+Map "ctg1" Ends Right 46.000 Oldctg 1
+Gel_number    7029A1
+Bands  2459 18
+Exact_match_to_cosmid "c0107A18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0031K16"
+Map "ctg1" Ends Left 43.000
+Map "ctg1" Ends Right 68.000 Oldctg 1
+Gel_number    4031B1
+Bands  711 26
+Approximate_match_to_cosmid "b1468I11"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0032K16"
+Map "ctg4" Ends Left 41.000
+Map "ctg4" Ends Right 65.000 Oldctg 4
+Gel_number    4032B1
+Bands  759 25
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0032O01"
+Map "ctg3" Ends Left 25.000
+Map "ctg3" Ends Right 46.000 Oldctg 3
+Gel_number    4032A1
+Bands  737 22
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0032P15"
+Map "ctg2" Ends Left 45.000
+Map "ctg2" Ends Right 67.000 Oldctg 2
+Gel_number    6032C1
+Bands  1415 23
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0033N07"
+Map "ctg8" Ends Left 66.000
+Map "ctg8" Ends Right 92.000 Oldctg 8
+Gel_number    4033C1
+Bands  784 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0034I07"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 72.000 Oldctg 3
+Gel_number    6034A1
+Bands  1461 21
+Approximate_match_to_cosmid "b1644B11"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0034O07"
+Map "ctg7" Ends Left 7.000
+Map "ctg7" Ends Right 29.000 Oldctg 7
+Gel_number    6034A1
+Bands  1438 23
+Approximate_match_to_cosmid "c1115H20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0036E07"
+Map "ctg5" Ends Left 26.000
+Map "ctg5" Ends Right 55.000 Oldctg 5
+Gel_number    6036A1
+Bands  1482 30
+Approximate_match_to_cosmid "c1100K23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0041J05"
+Map "ctg4" Ends Left 60.000
+Map "ctg4" Ends Right 90.000 Oldctg 4
+Gel_number    3041C1
+Bands  487 31
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0043D23"
+Map "ctg6" Ends Left 54.000
+Map "ctg6" Ends Right 76.000 Oldctg 6
+Gel_number    6043C1
+Bands  1512 23
+Approximate_match_to_cosmid "b1081C22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0052J20"
+Map "ctg5" Ends Left 33.000
+Map "ctg5" Ends Right 58.000 Oldctg 5
+Gel_number    9052D1
+Bands  3121 26
+Approximate_match_to_cosmid "b0319B18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0054L03"
+Map "ctg4" Ends Left 67.000
+Map "ctg4" Ends Right 96.000 Oldctg 4
+Gel_number    3054C1
+Bands  518 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0054L23"
+Map "ctg8" Ends Left 48.000
+Map "ctg8" Ends Right 77.000 Oldctg 8
+Gel_number    3054C1
+Bands  548 30
+Approximate_match_to_cosmid "c1114F11"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0057O04"
+Map "ctg4" Ends Left 72.000
+Map "ctg4" Ends Right 94.000 Oldctg 4
+Gel_number    7057B1
+Bands  2477 23
+Exact_match_to_cosmid "H0054L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0058L05"
+Map "ctg1" Ends Left 14.000
+Map "ctg1" Ends Right 44.000 Oldctg 1
+Gel_number    8058C1
+Bands  2932 31
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0073O01"
+Gel_number    7073A1
+Bands  2500 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "H0075O08"
+Map "ctg3" Ends Left 54.000
+Map "ctg3" Ends Right 74.000 Oldctg 3
+Gel_number    6075B1
+Bands  1535 21
+Exact_match_to_cosmid "H0006G12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0078I12"
+Map "ctg9" Ends Left 24.000
+Map "ctg9" Ends Right 46.000 Oldctg 9
+Gel_number    6078B1
+Bands  1556 23
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0078L17"
+Map "ctg3" Ends Left 89.000
+Map "ctg3" Ends Right 117.000 Oldctg 3
+Gel_number    4078C1
+Bands  811 29
+Approximate_match_to_cosmid "H0105A07"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0080I09"
+Map "ctg2" Ends Left 12.000
+Map "ctg2" Ends Right 38.000 Oldctg 2
+Gel_number    6080A1
+Bands  1579 27
+Approximate_match_to_cosmid "c1074L23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0082F20"
+Map "ctg6" Ends Left 26.000
+Map "ctg6" Ends Right 51.000 Oldctg 6
+Gel_number    7082D1
+Bands  2561 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0082I21"
+Map "ctg8" Ends Left 35.000
+Map "ctg8" Ends Right 68.000 Oldctg 8
+Gel_number    7082A1
+Bands  2527 34
+Approximate_match_to_cosmid "b1257O16"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0086A08"
+Map "ctg3" Ends Left 77.000
+Map "ctg3" Ends Right 109.000 Oldctg 3
+Gel_number    7086B1
+Bands  2612 33
+Positive_STS "B13" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0086M22"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 102.000 Oldctg 3
+Gel_number    7086B1
+Bands  2587 25
+Positive_STS "B13" New
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0086N12"
+Map "ctg1" Ends Left 42.000
+Map "ctg1" Ends Right 68.000 Oldctg 1
+Gel_number    4086D1
+Bands  840 27
+Approximate_match_to_cosmid "b1468I11"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0087F20"
+Map "ctg3" Ends Left 17.000
+Map "ctg3" Ends Right 36.000 Oldctg 3
+Gel_number    9087D1
+Bands  3167 20
+Exact_match_to_cosmid "b1584L19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0087N20"
+Map "ctg3" Ends Left 13.000
+Map "ctg3" Ends Right 32.000 Oldctg 3
+Gel_number    9087D1
+Bands  3147 20
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0089H14"
+Map "ctg4" Ends Left 0.000
+Map "ctg4" Ends Right 27.000 Oldctg 4
+Gel_number    9089D1
+Bands  3187 28
+Positive_STS "D57" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0095C16"
+Map "ctg9" Ends Left 14.000
+Map "ctg9" Ends Right 36.000 Oldctg 9
+Gel_number    5095B1
+Bands  1138 23
+Exact_match_to_cosmid "b1512M05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0097H04"
+Map "ctg4" Ends Left 9.000
+Map "ctg4" Ends Right 33.000 Oldctg 4
+Gel_number    6097D1
+Bands  1606 25
+Approximate_match_to_cosmid "H0165J23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0098A20"
+Map "ctg4" Ends Left 4.000
+Map "ctg4" Ends Right 26.000 Oldctg 4
+Gel_number    4098B1
+Bands  867 23
+Approximate_match_to_cosmid "H0089H14"
+Positive_STS "D57" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0104J07"
+Map "ctg3" Ends Left 2.000
+Map "ctg3" Ends Right 30.000 Oldctg 3
+Gel_number    4104C1
+Bands  890 29
+Approximate_match_to_cosmid "c1096G11"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0105A07"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 113.000 Oldctg 3
+Gel_number    6105A1
+Bands  1631 36
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0107A17"
+Map "ctg6" Ends Left 21.000
+Map "ctg6" Ends Right 48.000 Oldctg 6
+Gel_number    6107A1
+Bands  1667 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0108P07"
+Map "ctg3" Ends Left 53.000
+Map "ctg3" Ends Right 71.000 Oldctg 3
+Gel_number    5108C1
+Bands  1161 19
+Exact_match_to_cosmid "b1604O19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0109L05"
+Map "ctg5" Ends Left 44.000
+Map "ctg5" Ends Right 70.000 Oldctg 5
+Gel_number    6109C1
+Bands  1695 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0112D17"
+Map "ctg8" Ends Left 12.000
+Map "ctg8" Ends Right 30.000 Oldctg 8
+Gel_number    7112C1
+Bands  2645 19
+Exact_match_to_cosmid "b1622G13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0116L11"
+Map "ctg7" Ends Left 4.000
+Map "ctg7" Ends Right 25.000 Oldctg 7
+Gel_number    6116C1
+Bands  1722 22
+Exact_match_to_cosmid "c1115H20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0119D09"
+Map "ctg8" Ends Left 16.000
+Map "ctg8" Ends Right 31.000 Oldctg 8
+Gel_number    4119C1
+Bands  948 16
+Exact_match_to_cosmid "b1622G13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0119L15"
+Map "ctg1" Ends Left 64.000
+Map "ctg1" Ends Right 92.000 Oldctg 1
+Gel_number    4119C1
+Bands  919 29
+Approximate_match_to_cosmid "b1046O08"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0122E03"
+Map "ctg8" Ends Left 24.000
+Map "ctg8" Ends Right 50.000 Oldctg 8
+Gel_number    6122A1
+Bands  1744 27
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0122F13"
+Map "ctg1" Ends Left 54.000
+Map "ctg1" Ends Right 82.000 Oldctg 1
+Gel_number    6122C1
+Bands  1771 29
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0122H11"
+Map "ctg2" Ends Left 63.000
+Map "ctg2" Ends Right 90.000 Oldctg 2
+Gel_number    6122C1
+Bands  1800 28
+Approximate_match_to_cosmid "H0276H10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0125D19"
+Gel_number    6125C1
+Bands  1828 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "H0130C22"
+Map "ctg4" Ends Left 68.000
+Map "ctg4" Ends Right 93.000 Oldctg 4
+Gel_number    4130B1
+Bands  964 26
+Exact_match_to_cosmid "H0054L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0133H10"
+Map "ctg6" Ends Left 63.000
+Map "ctg6" Ends Right 84.000 Oldctg 6
+Gel_number    4133D1
+Bands  990 22
+Exact_match_to_cosmid "c1085E05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0135A11"
+Map "ctg6" Ends Left 63.000
+Map "ctg6" Ends Right 89.000 Oldctg 6
+Gel_number    6135A1
+Bands  1858 27
+Exact_match_to_cosmid "c1085E05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0136F10"
+Map "ctg5" Ends Left 2.000
+Map "ctg5" Ends Right 27.000 Oldctg 5
+Gel_number    6136D1
+Bands  1915 26
+Approximate_match_to_cosmid "H0136N10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0136N10"
+Map "ctg5" Ends Left 0.000
+Map "ctg5" Ends Right 29.000 Oldctg 5
+Gel_number    6136D1
+Bands  1885 30
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0137D12"
+Map "ctg5" Ends Left 9.000
+Map "ctg5" Ends Right 37.000 Oldctg 5
+Gel_number    6137D1
+Bands  1941 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0140O13"
+Map "ctg3" Ends Left 128.000
+Map "ctg3" Ends Right 150.000 Oldctg 3
+Gel_number    7140A1
+Bands  2664 23
+Exact_match_to_cosmid "b1498E10"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0142H08"
+Map "ctg2" Ends Left 57.000
+Map "ctg2" Ends Right 83.000 Oldctg 2
+Gel_number    9142D1
+Bands  3215 27
+Exact_match_to_cosmid "b1588O10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0143A14"
+Map "ctg6" Ends Left 29.000
+Map "ctg6" Ends Right 47.000 Oldctg 6
+Gel_number    9143B1
+Bands  3242 19
+Exact_match_to_cosmid "H0082F20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0143J16"
+Map "ctg4" Ends Left 24.000
+Map "ctg4" Ends Right 52.000 Oldctg 4
+Gel_number    7143D1
+Bands  2687 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0155G20"
+Map "ctg8" Ends Left 59.000
+Map "ctg8" Ends Right 83.000 Oldctg 8
+Gel_number    6155B1
+Bands  1970 25
+Approximate_match_to_cosmid "c1114F11"
+Positive_STS "F100" New
+Positive_STS "J_z10" New
+Positive_STS "J_d34" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0158A08"
+Map "ctg4" Ends Left 72.000
+Map "ctg4" Ends Right 96.000 Oldctg 4
+Gel_number    0158B1
+Bands  1 25
+Approximate_match_to_cosmid "c1050F12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0162A06"
+Map "ctg3" Ends Left 80.000
+Map "ctg3" Ends Right 98.000 Oldctg 3
+Gel_number    6162B1
+Bands  2023 19
+Exact_match_to_cosmid "H0086M22"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0162H15"
+Map "ctg1" Ends Left 73.000
+Map "ctg1" Ends Right 100.000 Oldctg 1
+Gel_number    5162C1
+Bands  9457 28
+Exact_match_to_cosmid "c1032M05"
+Remark "new_add"
+Remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "H0162K16"
+Map "ctg1" Ends Left 57.000
+Map "ctg1" Ends Right 84.000 Oldctg 1
+Gel_number    6162B1
+Bands  1995 28
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0165J23"
+Map "ctg4" Ends Left 7.000
+Map "ctg4" Ends Right 38.000 Oldctg 4
+Gel_number    0165C1
+Bands  26 32
+Positive_STS "D57" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0171H15"
+Map "ctg4" Ends Left 26.000
+Map "ctg4" Ends Right 51.000 Oldctg 4
+Gel_number    0171C1
+Bands  58 26
+Exact_match_to_cosmid "H0143J16"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0172C08"
+Map "ctg10" Ends Left 5.000
+Map "ctg10" Ends Right 27.000 Oldctg 10
+Gel_number    6172B1
+Bands  2042 23
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0173G03"
+Map "ctg3" Ends Left 17.000
+Map "ctg3" Ends Right 44.000 Oldctg 3
+Gel_number    6173A1
+Bands  2065 28
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0175M06"
+Map "ctg2" Ends Left 10.000
+Map "ctg2" Ends Right 35.000 Oldctg 2
+Gel_number    8175B1
+Bands  2963 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0182L09"
+Map "ctg1" Ends Left 30.000
+Map "ctg1" Ends Right 53.000 Oldctg 1
+Gel_number    9182A1
+Bands  3261 24
+Exact_match_to_cosmid "c0107A18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0184E07"
+Map "ctg4" Ends Left 60.000
+Map "ctg4" Ends Right 86.000 Oldctg 4
+Gel_number    5184A1
+Bands  1204 27
+Approximate_match_to_cosmid "b1509B02"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0184K11"
+Map "ctg1" Ends Left 34.000
+Map "ctg1" Ends Right 57.000 Oldctg 1
+Gel_number    5184A1
+Bands  1180 24
+Exact_match_to_cosmid "c1024E06"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0186L03"
+Map "ctg8" Ends Left 20.000
+Map "ctg8" Ends Right 46.000 Oldctg 8
+Gel_number    8186C1
+Bands  2989 27
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0190K17"
+Map "ctg1" Ends Left 2.000
+Map "ctg1" Ends Right 26.000 Oldctg 1
+Gel_number    3190A1
+Bands  578 25
+Exact_match_to_cosmid "b0333F08"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0192D10"
+Map "ctg3" Ends Left 88.000
+Map "ctg3" Ends Right 117.000 Oldctg 3
+Gel_number    8192D1
+Bands  3016 30
+Approximate_match_to_cosmid "c1041H02"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0192I13"
+Map "ctg8" Ends Left 29.000
+Map "ctg8" Ends Right 54.000 Oldctg 8
+Gel_number    6192A1
+Bands  2093 26
+Exact_match_to_cosmid "b0303H17"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0197G20"
+Map "ctg4" Ends Left 26.000
+Map "ctg4" Ends Right 54.000 Oldctg 4
+Gel_number    7197B1
+Bands  2716 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0198H02"
+Map "ctg3" Ends Left 61.000
+Map "ctg3" Ends Right 88.000 Oldctg 3
+Gel_number    4198D1
+Bands  1012 28
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0201H09"
+Map "ctg10" Ends Left 5.000
+Map "ctg10" Ends Right 31.000 Oldctg 10
+Gel_number    7201C1
+Bands  2745 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0208L03"
+Map "ctg2" Ends Left 7.000
+Map "ctg2" Ends Right 38.000 Oldctg 2
+Gel_number    4208C1
+Bands  1040 32
+Approximate_match_to_cosmid "c1074L23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0210F04"
+Map "ctg6" Ends Left 52.000
+Map "ctg6" Ends Right 77.000 Oldctg 6
+Gel_number    9210D1
+Bands  3285 26
+Positive_STS "F100" New
+Positive_STS "J_z10" New
+Positive_STS "J_d34" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0214L16"
+Map "ctg4" Ends Left 35.000
+Map "ctg4" Ends Right 60.000 Oldctg 4
+Gel_number    7214D1
+Bands  2772 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0215I18"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 101.000 Oldctg 3
+Gel_number    9215B1
+Bands  3311 24
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0216G18"
+Map "ctg2" Ends Left 46.000
+Map "ctg2" Ends Right 73.000 Oldctg 2
+Gel_number    6216B1
+Bands  2119 28
+Approximate_match_to_cosmid "b1537B07"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0219D08"
+Map "ctg2" Ends Left 13.000
+Map "ctg2" Ends Right 49.000 Oldctg 2
+Gel_number    6219D1
+Bands  2192 37
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0219N07"
+Map "ctg2" Ends Left 9.000
+Map "ctg2" Ends Right 35.000 Oldctg 2
+Gel_number    6219C1
+Bands  2147 27
+Exact_match_to_cosmid "c1074L23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0219P12"
+Map "ctg3" Ends Left 67.000
+Map "ctg3" Ends Right 84.000 Oldctg 3
+Gel_number    6219D1
+Bands  2174 18
+Approximate_match_to_cosmid "b1604O19"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0220C13"
+Map "ctg2" Ends Left 2.000
+Map "ctg2" Ends Right 28.000 Oldctg 2
+Gel_number    9220A1
+Bands  3335 27
+Exact_match_to_cosmid "c1074P15"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0226F06"
+Map "ctg8" Ends Left 58.000
+Map "ctg8" Ends Right 82.000 Oldctg 8
+Gel_number    9226D1
+Bands  3362 25
+Approximate_match_to_cosmid "c1114F11"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0228N15"
+Map "ctg3" Ends Left 141.000
+Map "ctg3" Ends Right 165.000 Oldctg 3
+Gel_number    6228C1
+Bands  2229 25
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0230B18"
+Map "ctg3" Ends Left 52.000
+Map "ctg3" Ends Right 74.000 Oldctg 3
+Gel_number    7230D1
+Bands  2798 23
+Exact_match_to_cosmid "b1503B23"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0232L09"
+Map "ctg2" Ends Left 68.000
+Map "ctg2" Ends Right 90.000 Oldctg 2
+Gel_number    7232C1
+Bands  2821 23
+Exact_match_to_cosmid "H0276H10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0233C08"
+Map "ctg10" Ends Left 4.000
+Map "ctg10" Ends Right 29.000 Oldctg 10
+Gel_number    0233B1
+Bands  128 26
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0233E10"
+Map "ctg4" Ends Left 70.000
+Map "ctg4" Ends Right 92.000 Oldctg 4
+Gel_number    0233B1
+Bands  105 23
+Exact_match_to_cosmid "H0054L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0233M10"
+Map "ctg4" Ends Left 70.000
+Map "ctg4" Ends Right 90.000 Oldctg 4
+Gel_number    0233B1
+Bands  84 21
+Exact_match_to_cosmid "H0054L03"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0234C20"
+Map "ctg8" Ends Left 32.000
+Map "ctg8" Ends Right 58.000 Oldctg 8
+Gel_number    8234B1
+Bands  3046 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0242H23"
+Map "ctg1" Ends Left 47.000
+Map "ctg1" Ends Right 72.000 Oldctg 1
+Gel_number    6242C1
+Bands  2254 26
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0244G24"
+Map "ctg3" Ends Left 31.000
+Map "ctg3" Ends Right 52.000 Oldctg 3
+Gel_number    7244B1
+Bands  2844 22
+Approximate_match_to_cosmid "b1246P23"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0249G07"
+Map "ctg2" Ends Left 1.000
+Map "ctg2" Ends Right 28.000 Oldctg 2
+Gel_number    6249A1
+Bands  2280 28
+Approximate_match_to_cosmid "c1074P15"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0250K02"
+Map "ctg9" Ends Left 13.000
+Map "ctg9" Ends Right 31.000 Oldctg 9
+Gel_number    9250B1
+Bands  3387 19
+Exact_match_to_cosmid "b0297K22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0252C07"
+Map "ctg2" Ends Left 25.000
+Map "ctg2" Ends Right 54.000 Oldctg 2
+Gel_number    9252A1
+Bands  3406 30
+Approximate_match_to_cosmid "b1522B13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0257L03"
+Map "ctg7" Ends Left 4.000
+Map "ctg7" Ends Right 20.000 Oldctg 7
+Gel_number    9257C1
+Bands  3436 17
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0268I13"
+Map "ctg4" Ends Left 63.000
+Map "ctg4" Ends Right 86.000 Oldctg 4
+Gel_number    9268A1
+Bands  3453 24
+Exact_match_to_cosmid "H0041J05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0269O12"
+Map "ctg4" Ends Left 47.000
+Map "ctg4" Ends Right 74.000 Oldctg 4
+Gel_number    9269B1
+Bands  3477 28
+Approximate_match_to_cosmid "c1082B18"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0272M18"
+Map "ctg2" Ends Left 68.000
+Map "ctg2" Ends Right 101.000 Oldctg 2
+Gel_number    6272B1
+Bands  2308 34
+Approximate_match_to_cosmid "H0002F22"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0274O10"
+Map "ctg7" Ends Left 9.000
+Map "ctg7" Ends Right 26.000 Oldctg 7
+Gel_number    9274B1
+Bands  3505 18
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0276H10"
+Map "ctg2" Ends Left 59.000
+Map "ctg2" Ends Right 90.000 Oldctg 2
+Gel_number    6276D1
+Bands  2342 32
+Positive_STS "A01" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0278N12"
+Gel_number    9278D1
+Bands  3523 28
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 6 18 22 
+  
+BAC : "H0309N09"
+Map "ctg7" Ends Left 1.000
+Map "ctg7" Ends Right 20.000 Oldctg 7
+Gel_number    4309C1
+Bands  1072 20
+Approximate_match_to_cosmid "c1115H20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0310J14"
+Map "ctg6" Ends Left 68.000
+Map "ctg6" Ends Right 98.000 Oldctg 6
+Gel_number    9310D1
+Bands  3551 31
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0315J01"
+Map "ctg4" Ends Left 65.000
+Map "ctg4" Ends Right 87.000 Oldctg 4
+Gel_number    9315C1
+Bands  3582 23
+Exact_match_to_cosmid "H0041J05"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0315P21"
+Map "ctg3" Ends Left 122.000
+Map "ctg3" Ends Right 142.000 Oldctg 3
+Gel_number    9315C1
+Bands  3605 21
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0328F06"
+Map "ctg1" Ends Left 61.000
+Map "ctg1" Ends Right 94.000 Oldctg 1
+Gel_number    9328D1
+Bands  3626 34
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0330L10"
+Map "ctg4" Ends Left 28.000
+Map "ctg4" Ends Right 60.000 Oldctg 4
+Gel_number    6330D1
+Bands  2374 33
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0339H13"
+Map "ctg1" Ends Left 54.000
+Map "ctg1" Ends Right 79.000 Oldctg 1
+Gel_number    7339C1
+Bands  2866 26
+Exact_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0345A04"
+Map "ctg4" Ends Left 2.000
+Map "ctg4" Ends Right 28.000 Oldctg 4
+Gel_number    9345B1
+Bands  3660 27
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0352B08"
+Map "ctg9" Ends Left 0.000
+Map "ctg9" Ends Right 28.000 Oldctg 9
+Gel_number    8352D1
+Bands  3073 29
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0352C06"
+Map "ctg1" Ends Left 76.000
+Map "ctg1" Ends Right 99.000 Oldctg 1
+Gel_number    9352B1
+Bands  9485 24
+Exact_match_to_cosmid "c1032M05"
+Remark "new_add"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 46 
+  
+BAC : "H0352N23"
+Map "ctg1" Ends Left 0.000
+Map "ctg1" Ends Right 23.000 Oldctg 1
+Gel_number    5352C1
+Bands  1231 24
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0353F22"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 96.000 Oldctg 3
+Gel_number    8353D1
+Bands  3102 19
+Exact_match_to_cosmid "H0215I18"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0355I07"
+Map "ctg3" Ends Left 35.000
+Map "ctg3" Ends Right 53.000 Oldctg 3
+Gel_number    7355A1
+Bands  2892 19
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0364L06"
+Map "ctg5" Ends Left 46.000
+Map "ctg5" Ends Right 67.000 Oldctg 5
+Gel_number    9364D1
+Bands  3687 22
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0364N13"
+Map "ctg3" Ends Left 50.000
+Map "ctg3" Ends Right 73.000 Oldctg 3
+Gel_number    4364C1
+Bands  1114 24
+Approximate_match_to_cosmid "b1503B23"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0364P05"
+Map "ctg3" Ends Left 78.000
+Map "ctg3" Ends Right 99.000 Oldctg 3
+Gel_number    4364C1
+Bands  1092 22
+Approximate_match_to_cosmid "c0186L14"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0371E18"
+Map "ctg6" Ends Left 49.000
+Map "ctg6" Ends Right 70.000 Oldctg 6
+Gel_number    6371B1
+Bands  2407 22
+Exact_match_to_cosmid "c1052J13"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0401L03"
+Map "ctg1" Ends Left 50.000
+Map "ctg1" Ends Right 73.000 Oldctg 1
+Gel_number    0401C1
+Bands  154 24
+Exact_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0406D08"
+Map "ctg2" Ends Left 31.000
+Map "ctg2" Ends Right 53.000 Oldctg 2
+Gel_number    0406D1
+Bands  178 23
+Approximate_match_to_cosmid "b1566O10"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0408L06"
+Map "ctg4" Ends Left 40.000
+Map "ctg4" Ends Right 64.000 Oldctg 4
+Gel_number    0408D1
+Bands  201 25
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0416M08"
+Map "ctg8" Ends Left 18.000
+Map "ctg8" Ends Right 34.000 Oldctg 8
+Gel_number    0416B1
+Bands  226 17
+Positive_STS "A07" New
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 13 45 
+  
+BAC : "H0424C13"
+Map "ctg6" Ends Left 55.000
+Map "ctg6" Ends Right 74.000 Oldctg 6
+Gel_number    0424A
+Bands  243 20
+Approximate_match_to_cosmid "H0210F04"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0426L02"
+Map "ctg3" Ends Left 97.000
+Map "ctg3" Ends Right 125.000 Oldctg 3
+Gel_number    0426D1
+Bands  263 29
+Approximate_match_to_cosmid "c1098L12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0427E10"
+Map "ctg5" Ends Left 31.000
+Map "ctg5" Ends Right 53.000 Oldctg 5
+Gel_number    0427B1
+Bands  292 23
+Approximate_match_to_cosmid "c1100K23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0428F15"
+Map "ctg8" Ends Left 50.000
+Map "ctg8" Ends Right 74.000 Oldctg 8
+Gel_number    0428C1
+Bands  315 25
+Approximate_match_to_cosmid "c1114F11"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0429F01"
+Map "ctg3" Ends Left 54.000
+Map "ctg3" Ends Right 75.000 Oldctg 3
+Gel_number    0429C1
+Bands  340 22
+Exact_match_to_cosmid "H0006G12"
+Fpc_remark "test"
+Fpc_remark "second"
+Creation_date 105 6 6 18 22 
+Modified_date 106 8 10 14 22 
+  
+BAC : "H0446L04"
+Map "ctg4" Ends Left 49.000
+Map "ctg4" Ends Right 71.000 Oldctg 4
+Gel_number    0446D1
+Bands  362 23
+Exact_match_to_cosmid "b1613P20"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0459F06"
+Map "ctg1" Ends Left 48.000
+Map "ctg1" Ends Right 73.000 Oldctg 1
+Gel_number    9459D1
+Bands  3709 26
+Approximate_match_to_cosmid "c1017J12"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0495D18"
+Map "ctg7" Ends Left 4.000
+Map "ctg7" Ends Right 24.000 Oldctg 7
+Gel_number    9495D1
+Bands  3735 21
+Exact_match_to_cosmid "b0336D23"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+  
+BAC : "H0501H21"
+Map "ctg1" Ends Left 64.000
+Map "ctg1" Ends Right 90.000 Oldctg 1
+Gel_number    0501C1
+Bands  385 27
+Exact_match_to_cosmid "b1046O08"
+Creation_date 105 6 6 18 22 
+Modified_date 105 6 10 15 24 
+
+Markerdata
+
+Marker_STS : "A01"
+Anchor_bin "X"
+Anchor_pos     1.1 P
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "A05"
+Anchor_bin "3"
+Anchor_pos     1.0 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "A07"
+Anchor_bin "3"
+Anchor_pos     1.3 P
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "A10"
+Anchor_bin "X"
+Anchor_pos     2.1 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "B01"
+Anchor_bin "X"
+Anchor_pos     3.7 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "B13"
+Anchor_bin "1"
+Anchor_pos     5.8 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "C24"
+Anchor_bin "1"
+Anchor_pos     7.8 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "D57"
+Anchor_bin "2"
+Anchor_pos     3.5 F
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "D58"
+Anchor_bin "2"
+Anchor_pos     9.7 F
+Remark "test"
+Remark "test2"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 16 1 
+
+Marker_STS : "F100"
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_Locus : "J5"
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 16 2 
+
+Marker_STS : "J9"
+Remark "test"
+Remark "foo"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "J130"
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "J_d34"
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+Marker_STS : "J_z10"
+Remark "test"
+Creation_date 106 8 10 13 45 
+Modified_date 106 8 10 15 29 
+
+
+Contigdata 10
+
+Ctg0 10/6/105 15:24 Ok  -1 # High 0 Avg 0 Low 0 
+Chr_remark "" Pos   0.000  
+Ctg1 10/6/105 15:24 Ok  0 # High 924 Avg 895 Low 792 
+Chr_remark "" Pos  -1.000  
+Ctg2 10/6/105 15:24 Ok  0 # High 932 Avg 917 Low 905 
+Chr_remark "-    [ chX/1 Fw1 Pm1]" Pos  -1.000  
+User_remark "test"
+Trace_remark "test"
+Ctg3 10/6/105 15:24 Ok  0 # High 901 Avg 881 Low 852 
+Chr_remark "+    [ ch1/1 chX/2 Fw3]" Pos  -1.000  
+Ctg4 10/8/106 13:46 Ok  0 # High 929 Avg 919 Low 903 
+Chr_remark "Chr2 [2 Fw2]" Pos   6.600  
+Ctg5 10/6/105 15:24 Ok  0 # High 914 Avg 891 Low 803 
+Chr_remark "" Pos  -1.000  
+Ctg6 10/6/105 15:24 Ok  8 # High 779 Avg 754 Low 715 
+Chr_remark "" Pos  -1.000  
+Ctg7 10/6/105 15:24 Ok  0 # High 965 Avg 947 Low 922 
+Chr_remark "" Pos  -1.000  
+Ctg8 10/6/105 15:24 Ok  0 # High 939 Avg 893 Low 641 
+Chr_remark "Chr3 {*   [2 Fw1 Pm1]}" Pos   1.150  
+Ctg9 10/6/105 15:24 Ok  0 # High 981 Avg 958 Low 939 
+Chr_remark "Chr1 [1 Fw1]" Pos   7.800  
+Ctg10 10/6/105 15:24 Ok  0 # High 983 Avg 959 Low 928 
+Chr_remark "-    [ chX/1 Fw1]" Pos  -1.000  

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/feature_data.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/biographics/feature_data.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/biographics/feature_data.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,67 @@
+[general]
+pixels = 750
+bases = -1000..21000
+height = 12
+reference = B0511
+
+[Cosmid]
+glyph = segments
+fgcolor = blue
+key = C. elegans conserved regions
+
+[EST]
+glyph = segments
+bgcolor= yellow
+connector = solid
+height = 5
+
+[FGENESH]
+glyph = transcript2
+bgcolor = green
+description = 1
+
+[SwissProt]
+glyph = arrow
+base  = 1
+linewidth = 2
+fgcolor = red
+description = 1
+
+[P-element]
+glyph = triangle
+orient = S
+bgcolor   = red
+fgcolor   = white
+fontcolor = black
+label = 1
+point = 1
+
+Cosmid	B0511	516-619
+Cosmid	B0511	3185-3294
+Cosmid	B0511	10946-11208
+Cosmid	B0511	13126-13511
+Cosmid	B0511	11394-11539
+Cosmid	B0511	14383-14490
+Cosmid	B0511	15569-15755
+Cosmid	B0511	18879-19178
+Cosmid	B0511	15850-16110
+Cosmid	B0511	66-208
+Cosmid	B0511	6354-6499
+Cosmid	B0511	13955-14115
+Cosmid	B0511	7985-8042
+Cosmid	B0511	11916-12046
+P-element	""	500-500
+P-element	MrQ	700-700
+P-element	MrR	10000-10000
+EST	yk260e10.5	15569-15724
+EST	yk672a12.5	537-618,3187-3294
+EST	yk595e6.5	552-618,3187-3294
+EST	yk846e07.3	11015-11208
+EST	yk53c10
+	yk53c10.3	12876-13577,13882-14121,14169-14535
+	yk53c10.5	18892-19154,15853-16219
+SwissProt	"PECANEX Protein"	5513-16656	"From SwissProt"
+FGENESH	"Predicted gene 1"	-1200--500,518-616,661-735,3187-3365,3436-3846	description=Pfam;score=20
+FGENESH	"Predicted gene 2"	5513-6497,7968-8136,8278-8383,8651-8839,9462-9515,10032-10705,10949-11340,11387-11524,11765-12067,12876-13577,13882-14121,14169-14535,15006-15209,15259-15462,15513-15753,15853-16219	Mysterious
+FGENESH	"Predicted gene 3"	16626-17396,17451-17597
+FGENESH	"Predicted gene 4"	18459-18722,18882-19176,19221-19513,19572-30000	note=Transmembrane+protein;score=20,50;score=80

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version1.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version2.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version3.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version3.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version4.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version4.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version5.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version5.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version6.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version6.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version7.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version7.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version8.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t1/version8.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version1.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version10.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version10.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version11.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version11.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version12.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version12.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version13.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version13.png
___________________________________________________________________
Name: svn:executable
   + 
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version14.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version14.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version2.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version3.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version3.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version4.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version4.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version5.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version5.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version6.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version6.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version7.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version7.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version8.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version8.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version9.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t2/version9.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version1.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.gif
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.gif
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version2.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version3.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version3.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version4.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version4.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version5.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version5.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version6.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version6.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version7.png
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/biographics/t3/version7.png
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,60 @@
+Query= 
+         (180 letters)
+
+>human
+          Length = 179
+
+ Score = 54.0 bits (27), Expect = 2e-12
+ Identities = 83/94 (88%), Gaps = 7/94 (7%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 94  gtggctgggctc-tgaagcatttggg--tgagcccagggg-ctcagggcagggcacct-g 148
+           |||||||||||| |||||||| ||||  |||||||||||| | || |||||||||||| |
+Sbjct: 86  gtggctgggctcgtgaagcatgtgggggtgagcccaggggccccaaggcagggcacctgg 145
+
+                                             
+Query: 149 ccttcag-cggcctcag-cctgcctgtctcccag 180
+           ||||||| | ||||||| ||||||||||||||||
+Sbjct: 146 ccttcagcctgcctcagccctgcctgtctcccag 179
+
+
+
+ Score = 36.2 bits (18), Expect = 4e-07
+ Identities = 18/18 (100%)
+ Strand = Plus / Plus
+
+                            
+Query: 1  gtctgttccaagggcctt 18
+          ||||||||||||||||||
+Sbjct: 1  gtctgttccaagggcctt 18
+
+
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 7
+Number of Sequences: 0
+Number of extensions: 7
+Number of successful extensions: 7
+Number of sequences better than 10.0: 1
+length of query: 180
+length of database: 179
+effective HSP length: 6
+effective length of query: 174
+effective length of database: 173
+effective search space:    30102
+effective search space used:    30102
+T: 0
+A: 30
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 6 (12.4 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn.rev
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn.rev	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastn.rev	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+Query= 
+         (180 letters)
+
+>human
+          Length = 179
+
+ Score = 54.0 bits (27), Expect = 2e-12
+ Identities = 83/94 (88%), Gaps = 7/94 (7%)
+ Strand = Plus / Minus
+
+                                                                       
+Query: 94  gtggctgggctc-tgaagcatttggg--tgagcccagggg-ctcagggcagggcacct-g 148
+           |||||||||||| |||||||| ||||  |||||||||||| | || |||||||||||| |
+Sbjct: 94  gtggctgggctcgtgaagcatgtgggggtgagcccaggggccccaaggcagggcacctgg 35
+
+                                             
+Query: 149 ccttcag-cggcctcag-cctgcctgtctcccag 180
+           ||||||| | ||||||| ||||||||||||||||
+Sbjct: 34  ccttcagcctgcctcagccctgcctgtctcccag 1
+
+
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 6
+Number of Sequences: 0
+Number of extensions: 6
+Number of successful extensions: 6
+Number of sequences better than 10.0: 1
+length of query: 180
+length of database: 179
+effective HSP length: 6
+effective length of query: 174
+effective length of database: 173
+effective search space:    30102
+effective search space used:    30102
+T: 0
+A: 30
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 6 (12.4 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastx.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastx.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.blastx.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,106 @@
+Query= AE000111.1 Escherichia coli K-12 MG1655 section 1 of 400 of the
+complete genome
+         (720 letters)
+
+>AK1H_ECOLI P00561 Bifunctional aspartokinase/homoserine
+           dehydrogenase I (AKI-HDI) [Includes: Aspartokinase I ;
+           Homoserine dehydrogenase I ]
+          Length = 820
+
+ Score =  248 bits (634), Expect = 2e-70
+ Identities = 128/128 (100%), Positives = 128/128 (100%)
+ Frame = -1
+
+Query: 384 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 205
+           MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA
+Sbjct: 1   MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60
+
+Query: 204 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 25
+           LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA
+Sbjct: 61  LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120
+
+Query: 24  ALICRGEK 1
+           ALICRGEK
+Sbjct: 121 ALICRGEK 128
+
+
+
+ Score = 19.2 bits (38), Expect = 0.29
+ Identities = 15/57 (26%), Positives = 28/57 (48%)
+ Frame = -1
+
+Query: 420 LFFSTKGNEVTTMRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITN 250
+           + F+T  ++V  + V+  GG   A  E+  R    L++     +V  V ++ A +TN
+Sbjct: 458 MLFNT--DQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTN 512
+
+
+
+ Score = 18.5 bits (36), Expect = 0.49
+ Identities = 11/42 (26%), Positives = 18/42 (42%)
+ Frame = -1
+
+Query: 360 TSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAM 235
+           T +   E+ L V   LES     +    ++A     +H+V M
+Sbjct: 146 TVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLM 187
+
+
+
+ Score = 17.7 bits (34), Expect = 0.84
+ Identities = 10/35 (28%), Positives = 17/35 (48%)
+ Frame = -1
+
+Query: 267 PAKITNHLVAMIEKTISGQDALPNISDAERIFAEL 163
+           P K  ++L  M   ++SG      +  A R+FA +
+Sbjct: 305 PVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAM 339
+
+
+
+ Score = 15.0 bits (27), Expect = 5.5
+ Identities = 5/14 (35%), Positives = 8/14 (56%)
+ Frame = -3
+
+Query: 388 NHASVEVRRYISGK 347
+           NH    + + ISG+
+Sbjct: 45  NHLVAMIEKTISGQ 58
+
+
+
+ Score = 15.0 bits (27), Expect = 5.5
+ Identities = 5/7 (71%), Positives = 7/7 (99%)
+ Frame = +1
+
+Query: 640 PVQKLLS 660
+           PV+KLL+
+Sbjct: 150 PVEKLLA 156
+
+
+Lambda     K      H
+   0.318    0.135    0.401 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 1264
+Number of Sequences: 0
+Number of extensions: 25
+Number of successful extensions: 6
+Number of sequences better than 10.0: 2
+Number of HSP's better than 10.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 6
+length of database: 820
+effective HSP length: 33
+effective length of database: 787
+effective search space used:   162122
+frameshift window, decay const: 50,  0.1
+T: 12
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 25 (14.3 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.bug940.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.bug940.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.bug940.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,145 @@
+Query= zinc finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id
+RS.ctg14243-000000.6.0
+         (469 letters)
+
+>gi|4507985| zinc finger protein 135 (clone pHZ-17) [Homo sapiens].
+           neo_id RS.ctg14243-000000.6.0
+          Length = 469
+
+ Score =  637 bits (1626), Expect = 0.0
+ Identities = 311/469 (66%), Positives = 330/469 (70%), Gaps = 120/469 (25%)
+
+Query: 121 MGNTWKKGEARPKCFTENCVKEKPYKCQECGKAFSHSSALIEHHRTHTGERPYECHECLK 180
+           MGNTWKKGEARPKCFTENCVKEKPYKCQECGKAFSHSSALIEHHRTHTGERPYECHECLK
+Sbjct: 1   MGNTWKKGEARPKCFTENCVKEKPYKCQECGKAFSHSSALIEHHRTHTGERPYECHECLK 60
+
+Query: 181 GFRNSSALTKHQRIHTGEKPYKCTQCGRTFNQIAPLIQHQRTHTGEKPYECSECGKSFSF 240
+           GFRNSSALTKHQRIHTGEKPYKCTQCGRTFNQIAPLIQHQRTHTGEKPYECSECGKSFSF
+Sbjct: 61  GFRNSSALTKHQRIHTGEKPYKCTQCGRTFNQIAPLIQHQRTHTGEKPYECSECGKSFSF 120
+
+Query: 241 RSSFSQHERTHTGEKPYECSECGKAFR--------------------------------- 267
+           RSSFSQHERTHTGEKPYECSECGKAFR                                 
+Sbjct: 121 RSSFSQHERTHTGEKPYECSECGKAFRQSIHLTQHLRIHTGEKPYQCGECGKAFSHSSSL 180
+
+Query: 268 ---QSIH--------------------LTQHLRIHTGEKPYQCGECGKAFSHSSSL---- 300
+              Q IH                    L QH R HTGEKPY+CGECGKAFS S+ L    
+Sbjct: 181 TKHQRIHTGEKPYECHECGKAFTQITPLIQHQRTHTGEKPYECGECGKAFSQSTLLTEHR 240
+
+Query: 301 ----GEKPYECNDCGKAFSHSSSLTKHQRIHTGEKPYECNQCGRAFSQLAPLIQHQRIHT 356
+               GEKPY CN+CGK FSHSSSL++H+R HTGEKPYEC+QCG+AF Q   L QHQRIHT
+Sbjct: 241 RIHTGEKPYGCNECGKTFSHSSSLSQHERTHTGEKPYECSQCGKAFRQSTHLTQHQRIHT 300
+
+Query: 357 GEKP----------------TKHQRIHTGEKPYECHECGKAFTQITPLIQHQRTHTGEKP 400
+           GEKP                TKHQRIHTGEKPYEC++CG+AF+Q+ PLIQHQR HTGEKP
+Sbjct: 301 GEKPYECNDCGKAFSHSSSLTKHQRIHTGEKPYECNQCGRAFSQLAPLIQHQRIHTGEKP 360
+
+Query: 401 YECGECGKAFSQSTLLTEHR---------------------------------------- 420
+           YEC +CG+A +++TLL EH+                                        
+Sbjct: 361 YECNQCGRASARATLLIEHQRIHTKEKPYGCNECGKSFSHSSSLSQHERTHTGEKPYECH 420
+
+Query: 421 DCGKSFRQSTHLTQHRRIHTGEKPYACRDCGKAFTHSSSLTKHQRTHTG 469
+           DCGKSFRQSTHLTQHRRIHTGEKPYACRDCGKAFTHSSSLTKHQRTHTG
+Sbjct: 421 DCGKSFRQSTHLTQHRRIHTGEKPYACRDCGKAFTHSSSLTKHQRTHTG 469
+
+
+ Score =  598 bits (1524), Expect = e-175
+ Identities = 275/444 (61%), Positives = 324/444 (72%), Gaps = 30/444 (6%)
+
+Query: 6   EKPYGCNECGKTFSHSSSLSQHERTHTGEKPYECSQCGKAFRQSTHLTQHQRIHT----Y 61
+           EKPY C ECGK FSHSS+L +H RTHTGE+PYEC +C K FR S+ LT+HQRIHT    Y
+Sbjct: 22  EKPYKCQECGKAFSHSSALIEHHRTHTGERPYECHECLKGFRNSSALTKHQRIHTGEKPY 81
+
+Query: 62  ECNQCGRASARATLLIEHQRIHTKEKPYGCNECGKXXXXXXXXXQHERTHTGEKPYEC-H 120
+           +C QCGR   +   LI+HQR HT EKPY C+ECGK         QHERTHTGEKPYEC  
+Sbjct: 82  KCTQCGRTFNQIAPLIQHQRTHTGEKPYECSECGKSFSFRSSFSQHERTHTGEKPYECSE 141
+
+Query: 121 MGNTWKKGEARPKCFTENCVKEKPYKCQECGKAFSHSSALIEHHRTHTGERPYECHECLK 180
+            G  +++     +    +   EKPY+C ECGKAFSHSS+L +H R HTGE+PYECHEC K
+Sbjct: 142 CGKAFRQSIHLTQHLRIH-TGEKPYQCGECGKAFSHSSSLTKHQRIHTGEKPYECHECGK 200
+
+Query: 181 GFRNSSALTKHQRIHTGEKPYKCTQCGRTFNQIAPLIQHQRTHTGEKPYECSECGKSFSF 240
+            F   + L +HQR HTGEKPY+C +CG+ F+Q   L +H+R HTGEKPY C+ECGK+FS 
+Sbjct: 201 AFTQITPLIQHQRTHTGEKPYECGECGKAFSQSTLLTEHRRIHTGEKPYGCNECGKTFSH 260
+
+Query: 241 RSSFSQHERTHTGEKPYECSECGKAFRQSIHLTQHLRIHTGEKPYQCGECGKAFSHSSSL 300
+            SS SQHERTHTGEKPYECS+CGKAFRQS HLTQH RIHTGEKPY+C +CGKAFSHSSSL
+Sbjct: 261 SSSLSQHERTHTGEKPYECSQCGKAFRQSTHLTQHQRIHTGEKPYECNDCGKAFSHSSSL 320
+
+Query: 301 --------GEKPYECNDCGKAFSHSSSLTKHQRIHTGEKPYECNQCGRAFSQLAPLIQHQ 352
+                   GEKPYECN CG+AFS  + L +HQRIHTGEKPYECNQCGRA ++   LI+HQ
+Sbjct: 321 TKHQRIHTGEKPYECNQCGRAFSQLAPLIQHQRIHTGEKPYECNQCGRASARATLLIEHQ 380
+
+Query: 353 RIHTGEKP----------------TKHQRIHTGEKPYECHECGKAFTQITPLIQHQRTHT 396
+           RIHT EKP                ++H+R HTGEKPYECH+CGK+F Q T L QH+R HT
+Sbjct: 381 RIHTKEKPYGCNECGKSFSHSSSLSQHERTHTGEKPYECHDCGKSFRQSTHLTQHRRIHT 440
+
+Query: 397 GEKPYECGECGKAFSQSTLLTEHR 420
+           GEKPY C +CGKAF+ S+ LT+H+
+Sbjct: 441 GEKPYACRDCGKAFTHSSSLTKHQ 464
+
+
+ Score =  499 bits (1270), Expect = e-145
+ Identities = 226/372 (60%), Positives = 274/372 (72%), Gaps = 18/372 (4%)
+
+Query: 1   RIHTGEKPYGCNECGKTFSHSSSLSQHERTHTGEKPYECSQCGKAFRQSTHLTQHQRIHT 60
+           R HTGEKPY C+ECGK+FS  SS SQHERTHTGEKPYECS+CGKAFRQS HLTQH RIHT
+Sbjct: 101 RTHTGEKPYECSECGKSFSFRSSFSQHERTHTGEKPYECSECGKAFRQSIHLTQHLRIHT 160
+
+Query: 61  ----YECNQCGRASARATLLIEHQRIHTKEKPYGCNECGKXXXXXXXXXQHERTHTGEKP 116
+               Y+C +CG+A + ++ L +HQRIHT EKPY C+ECGK         QH+RTHTGEKP
+Sbjct: 161 GEKPYQCGECGKAFSHSSSLTKHQRIHTGEKPYECHECGKAFTQITPLIQHQRTHTGEKP 220
+
+Query: 117 YECHMGNTWKKGEARPKCFTEN---CVKEKPYKCQECGKAFSHSSALIEHHRTHTGERPY 173
+           YEC       K  ++    TE+      EKPY C ECGK FSHSS+L +H RTHTGE+PY
+Sbjct: 221 YEC---GECGKAFSQSTLLTEHRRIHTGEKPYGCNECGKTFSHSSSLSQHERTHTGEKPY 277
+
+Query: 174 ECHECLKGFRNSSALTKHQRIHTGEKPYKCTQCGRTFNQIAPLIQHQRTHTGEKPYECSE 233
+           EC +C K FR S+ LT+HQRIHTGEKPY+C  CG+ F+  + L +HQR HTGEKPYEC++
+Sbjct: 278 ECSQCGKAFRQSTHLTQHQRIHTGEKPYECNDCGKAFSHSSSLTKHQRIHTGEKPYECNQ 337
+
+Query: 234 CGKSFSFRSSFSQHERTHTGEKPYECSECGKAFRQSIHLTQHLRIHTGEKPYQCGECGKA 293
+           CG++FS  +   QH+R HTGEKPYEC++CG+A  ++  L +H RIHT EKPY C ECGK+
+Sbjct: 338 CGRAFSQLAPLIQHQRIHTGEKPYECNQCGRASARATLLIEHQRIHTKEKPYGCNECGKS 397
+
+Query: 294 FSHSSSL--------GEKPYECNDCGKAFSHSSSLTKHQRIHTGEKPYECNQCGRAFSQL 345
+           FSHSSSL        GEKPYEC+DCGK+F  S+ LT+H+RIHTGEKPY C  CG+AF+  
+Sbjct: 398 FSHSSSLSQHERTHTGEKPYECHDCGKSFRQSTHLTQHRRIHTGEKPYACRDCGKAFTHS 457
+
+Query: 346 APLIQHQRIHTG 357
+           + L +HQR HTG
+Sbjct: 458 SSLTKHQRTHTG 469
+
+
+Lambda     K      H
+   0.318    0.131    0.428 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 38599
+Number of Sequences: 0
+Number of extensions: 270
+Number of successful extensions: 108
+Number of sequences better than 10.0: 1
+Number of HSP's better than 10.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 8
+length of query: 469
+length of database: 469
+effective HSP length: 23
+effective length of query: 446
+effective length of database: 446
+effective search space:   198916
+effective search space used:   198916
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 25 (14.4 bits)
+S2: 26 (14.5 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,64 @@
+>ALEU_HORVU
+          Length = 362
+
+ Score =  191 bits (481), Expect = 2e-53
+ Identities = 111/322 (34%), Positives = 167/322 (51%), Gaps = 27/322 (8%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  ++ K Y S  E   RF IF  +L ++   N   + ++     G+N+F+D+S +
+Sbjct: 60  RFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYR----LGINRFSDMSWE 115
+
+Query: 87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           EF+   L    A  T    +A         ++P   DWR  G V+PVKNQ  CGSCW+FS
+Sbjct: 116 EFQATRLG---AAQTCSATLAGNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFS 172
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           TTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  NG
+Sbjct: 173 TTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNG 224
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADAV 265
+           GI TE SYPY    G  C++ + N   ++ +   I  N E  +   +    P+++A   +
+Sbjct: 225 GIDTEESYPYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVI 283
+
+Query: 266 E-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQG 321
+           + ++ Y  GV+        P+ ++H +L VGY  +N      +PYW++KNSWGADWG+ G
+Sbjct: 284 DGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVEN-----GVPYWLIKNSWGADWGDNG 338
+
+Query: 322 YIYLRRGKNTCGVSNFVSTSII 343
+           Y  +  GKN C ++   S  ++
+Sbjct: 339 YFKMEMGKNMCAIATCASYPVV 360
+
+
+Lambda     K      H
+   0.316    0.135    0.414 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 368
+Number of Sequences: 0
+Number of extensions: 19
+Number of successful extensions: 6
+Number of sequences better than 10.0: 1
+Number of HSP's better than 10.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 1
+length of query: 343
+length of database: 362
+effective HSP length: 23
+effective length of query: 320
+effective length of database: 339
+effective search space:   108480
+effective search space used:   108480
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 24 (13.8 bits)
+S2: 24 (13.8 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.tblastx.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.tblastx.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bl2seq.tblastx.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,475 @@
+Query= Escherichia coli K-12 MG1655 section 1 of 400 of the complete
+genome
+         (720 letters)
+
+>gi|1786181|gb|AE000111.1|AE000111 Escherichia coli K-12 MG1655
+           section 1 of 400 of the complete genome
+          Length = 720
+
+ Score =  515 bits (1118), Expect = e-151
+ Identities = 229/240 (95%), Positives = 229/240 (95%)
+ Frame = +1 / +1
+
+                                                                       
+Query: 1   SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LRSLNTLTNIGIAHRQIKITEY 180
+           SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LRSLNTLTNIGIAHRQIKITEY
+Sbjct: 1   SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LRSLNTLTNIGIAHRQIKITEY 180
+
+                                                                       
+Query: 181 TTSMKRISXXXXXXXXXXXGNGAG*RVQETQKKART*QCGLFFSTKGNEVTTMRVLKFGG 360
+           TTSMKRIS           GNGAG*RVQETQKKART*QCGLFFSTKGNEVTTMRVLKFGG
+Sbjct: 181 TTSMKRISTTITTTITITTGNGAG*RVQETQKKART*QCGLFFSTKGNEVTTMRVLKFGG 360
+
+                                                                       
+Query: 361 TSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAE 540
+           TSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAE
+Sbjct: 361 TSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAE 540
+
+                                                                       
+Query: 541 RIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEK 720
+           RIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEK
+Sbjct: 541 RIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEK 720
+
+
+
+ Score =  508 bits (1103), Expect = e-149
+ Identities = 218/240 (90%), Positives = 218/240 (90%)
+ Frame = -1 / -1
+
+                                                                       
+Query: 720 FLATANQRSVDAIRALPQQTNAMQDMFYLGKFLIDESFQLRQREPRLGGGESRQKFGKNT 541
+           FLATANQRSVDAIRALPQQTNAMQDMFYLGKFLIDESFQLRQREPRLGGGESRQKFGKNT
+Sbjct: 720 FLATANQRSVDAIRALPQQTNAMQDMFYLGKFLIDESFQLRQREPRLGGGESRQKFGKNT 541
+
+                                                                       
+Query: 540 FGIADIG*SILAANGFFNHRHQVVXXXXXXXXXXGHLPLPGIAFQNIGNTQKTFCICH*C 361
+           FGIADIG*SILAANGFFNHRHQVV          GHLPLPGIAFQNIGNTQKTFCICH*C
+Sbjct: 540 FGIADIG*SILAANGFFNHRHQVVGDFGGGREDGGHLPLPGIAFQNIGNTQKTFCICH*C 361
+
+                                                                       
+Query: 360 TAELQHSHGCYLVTFGRKKKPALSGAGFFLCFLYASARTVTCXXXXXXXXXXXXAFHGCC 181
+           TAELQHSHGCYLVTFGRKKKPALSGAGFFLCFLYASARTVTC            AFHGCC
+Sbjct: 360 TAELQHSHGCYLVTFGRKKKPALSGAGFFLCFLYASARTVTCGNGDGGGNGGANAFHGCC 181
+
+                                                                       
+Query: 180 VLCNFYLSVRYAYIG*SI**PKSIKF*FTHGR*PVQKLLSDTLFLIHTETYCPLQSE*KA 1
+           VLCNFYLSVRYAYIG*SI**PKSIKF*FTHGR*PVQKLLSDTLFLIHTETYCPLQSE*KA
+Sbjct: 180 VLCNFYLSVRYAYIG*SI**PKSIKF*FTHGR*PVQKLLSDTLFLIHTETYCPLQSE*KA 1
+
+
+
+ Score =  416 bits (902), Expect(2) = e-160
+ Identities = 161/161 (100%), Positives = 161/161 (100%)
+ Frame = +3 / +3
+
+                                                                       
+Query: 237 R*RCGLTRTGNTEKSPHLTVRAFFFDQR*RGNNHASVEVRRYISGKCRTFSACCRYSGKQ 416
+           R*RCGLTRTGNTEKSPHLTVRAFFFDQR*RGNNHASVEVRRYISGKCRTFSACCRYSGKQ
+Sbjct: 237 R*RCGLTRTGNTEKSPHLTVRAFFFDQR*RGNNHASVEVRRYISGKCRTFSACCRYSGKQ 416
+
+                                                                       
+Query: 417 CQAGAGGHRPLCPRQNHQPPGGDD*KNH*RPGCFTQYQRCRTYFCRTFDGTRRRPAGVPA 596
+           CQAGAGGHRPLCPRQNHQPPGGDD*KNH*RPGCFTQYQRCRTYFCRTFDGTRRRPAGVPA
+Sbjct: 417 CQAGAGGHRPLCPRQNHQPPGGDD*KNH*RPGCFTQYQRCRTYFCRTFDGTRRRPAGVPA 596
+
+                                                    
+Query: 597 GAIENFRRSGICPNKTCPAWH*FVGAVPG*HQRCADLPWRE 719
+           GAIENFRRSGICPNKTCPAWH*FVGAVPG*HQRCADLPWRE
+Sbjct: 597 GAIENFRRSGICPNKTCPAWH*FVGAVPG*HQRCADLPWRE 719
+
+
+
+ Score =  389 bits (843), Expect(2) = e-153
+ Identities = 161/161 (100%), Positives = 161/161 (100%)
+ Frame = -2 / -2
+
+                                                                       
+Query: 719 FSPRQISAALMLSGHCPNKLMPCRTCFIWANS*STKVFNCASGNPGWAAASPVKSSAKIR 540
+           FSPRQISAALMLSGHCPNKLMPCRTCFIWANS*STKVFNCASGNPGWAAASPVKSSAKIR
+Sbjct: 719 FSPRQISAALMLSGHCPNKLMPCRTCFIWANS*STKVFNCASGNPGWAAASPVKSSAKIR 540
+
+                                                                       
+Query: 539 SASLILGKASWPLMVFSIIATRWLVILAGAERTVATCPCLALLSRISATRRKRSAFATDV 360
+           SASLILGKASWPLMVFSIIATRWLVILAGAERTVATCPCLALLSRISATRRKRSAFATDV
+Sbjct: 539 SASLILGKASWPLMVFSIIATRWLVILAGAERTVATCPCLALLSRISATRRKRSAFATDV 360
+
+                                                    
+Query: 359 PPNFNTRMVVTSLPLVEKKSPHCQVRAFFCVSCTRQPAPLP 237
+           PPNFNTRMVVTSLPLVEKKSPHCQVRAFFCVSCTRQPAPLP
+Sbjct: 359 PPNFNTRMVVTSLPLVEKKSPHCQVRAFFCVSCTRQPAPLP 237
+
+
+
+ Score =  326 bits (706), Expect(2) = e-129
+ Identities = 138/161 (85%), Positives = 138/161 (85%)
+ Frame = +2 / +2
+
+                                                                       
+Query: 236 QVTVRADAYRKHRKKPAPDSAGFFFRPKVTR*QPCEC*SSAVHQWQMQNVFCVLPIFWKA 415
+           QVTVRADAYRKHRKKPAPDSAGFFFRPKVTR*QPCEC*SSAVHQWQMQNVFCVLPIFWKA
+Sbjct: 236 QVTVRADAYRKHRKKPAPDSAGFFFRPKVTR*QPCEC*SSAVHQWQMQNVFCVLPIFWKA 415
+
+                                                                       
+Query: 416 MPGRGRWXXXXXXXXXXXTTWWR*LKKPLAARMLYPISAMPNVFLPNFXXXXXXXXXXXX 595
+           MPGRGRW           TTWWR*LKKPLAARMLYPISAMPNVFLPNF            
+Sbjct: 416 MPGRGRWPPSSLPPPKSPTTWWR*LKKPLAARMLYPISAMPNVFLPNF*RDSPPPSRGSR 595
+
+                                                    
+Query: 596 WRN*KLSSIRNLPK*NMSCMALVCWGSARIASTLR*FAVAR 718
+           WRN*KLSSIRNLPK*NMSCMALVCWGSARIASTLR*FAVAR
+Sbjct: 596 WRN*KLSSIRNLPK*NMSCMALVCWGSARIASTLR*FAVAR 718
+
+
+
+ Score =  198 bits (427), Expect(3) = e-136
+ Identities = 79/79 (100%), Positives = 79/79 (100%)
+ Frame = -3 / -3
+
+                                                                       
+Query: 718 SRHGKSAQR*CYPGTAPTN*CHAGHVLFGQIPDRRKFSIAPAGTPAGRRRVPSKVRQKYV 539
+           SRHGKSAQR*CYPGTAPTN*CHAGHVLFGQIPDRRKFSIAPAGTPAGRRRVPSKVRQKYV
+Sbjct: 718 SRHGKSAQR*CYPGTAPTN*CHAGHVLFGQIPDRRKFSIAPAGTPAGRRRVPSKVRQKYV 539
+
+                              
+Query: 538 RHR*YWVKHPGR*WFFQSS 482
+           RHR*YWVKHPGR*WFFQSS
+Sbjct: 538 RHR*YWVKHPGR*WFFQSS 482
+
+
+
+ Score =  170 bits (365), Expect(3) = e-136
+ Identities = 65/65 (100%), Positives = 65/65 (100%)
+ Frame = -3 / -3
+
+                                                                       
+Query: 430 APAWHCFPEYRQHAENVLHLPLMYRRTSTLAWLLPRYLWSKKKARTVRCGLFSVFPVRVS 251
+           APAWHCFPEYRQHAENVLHLPLMYRRTSTLAWLLPRYLWSKKKARTVRCGLFSVFPVRVS
+Sbjct: 430 APAWHCFPEYRQHAENVLHLPLMYRRTSTLAWLLPRYLWSKKKARTVRCGLFSVFPVRVS 251
+
+                
+Query: 250 PHRYL 236
+           PHRYL
+Sbjct: 250 PHRYL 236
+
+
+
+ Score =  155 bits (332), Expect(2) = e-153
+ Identities = 65/65 (100%), Positives = 65/65 (100%)
+ Frame = -3 / -3
+
+                                                                       
+Query: 196 VSWMLCTL*FLSVCALCLYWLKYLVT*VNKILIYSRQVTSSEAAIRHSFFNPHRDILPVA 17
+           VSWMLCTL*FLSVCALCLYWLKYLVT*VNKILIYSRQVTSSEAAIRHSFFNPHRDILPVA
+Sbjct: 196 VSWMLCTL*FLSVCALCLYWLKYLVT*VNKILIYSRQVTSSEAAIRHSFFNPHRDILPVA 17
+
+                
+Query: 16  VRMKS 2
+           VRMKS
+Sbjct: 16  VRMKS 2
+
+
+
+ Score =  154 bits (330), Expect(2) = e-160
+ Identities = 65/65 (100%), Positives = 65/65 (100%)
+ Frame = +3 / +3
+
+                                                                       
+Query: 3   LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*VTKYFNQYRHSAQTDKNYRVH 182
+           LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*VTKYFNQYRHSAQTDKNYRVH
+Sbjct: 3   LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*VTKYFNQYRHSAQTDKNYRVH 182
+
+                
+Query: 183 NIHET 197
+           NIHET
+Sbjct: 183 NIHET 197
+
+
+
+ Score =  141 bits (302), Expect(3) = e-136
+ Identities = 59/59 (100%), Positives = 59/59 (100%)
+ Frame = -2 / -2
+
+                                                                      
+Query: 179 YSVIFICLCAMPILVKVFSDLSQ*NFNLLTAGNQFRSCYQTLFF*STQRHIARCSQNEK 3
+           YSVIFICLCAMPILVKVFSDLSQ*NFNLLTAGNQFRSCYQTLFF*STQRHIARCSQNEK
+Sbjct: 179 YSVIFICLCAMPILVKVFSDLSQ*NFNLLTAGNQFRSCYQTLFF*STQRHIARCSQNEK 3
+
+
+
+ Score =  139 bits (298), Expect(2) = e-129
+ Identities = 62/62 (100%), Positives = 62/62 (100%)
+ Frame = +2 / +2
+
+                                                                       
+Query: 2   AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDLGH*IL*PI*A*RTDR*KLQST 181
+           AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDLGH*IL*PI*A*RTDR*KLQST
+Sbjct: 2   AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDLGH*IL*PI*A*RTDR*KLQST 181
+
+             
+Query: 182 QH 187
+           QH
+Sbjct: 182 QH 187
+
+
+
+ Score = 24.9 bits (48), Expect = 0.002
+ Identities = 9/12 (75%), Positives = 10/12 (83%)
+ Frame = -2 / +3
+
+                       
+Query: 308 KKSPHCQVRAFF 273
+           +KSPH  VRAFF
+Sbjct: 273 EKSPHLTVRAFF 308
+
+
+
+ Score = 24.9 bits (48), Expect = 0.002
+ Identities = 9/13 (69%), Positives = 12/13 (92%)
+ Frame = -3 / +1
+
+                        
+Query: 313 SKKKARTVRCGLF 275
+           ++KKART +CGLF
+Sbjct: 268 TQKKART*QCGLF 306
+
+
+
+ Score = 24.9 bits (48), Expect = 0.002
+ Identities = 9/12 (75%), Positives = 10/12 (83%)
+ Frame = +3 / -2
+
+                       
+Query: 273 EKSPHLTVRAFF 308
+           +KSPH  VRAFF
+Sbjct: 308 KKSPHCQVRAFF 273
+
+
+
+ Score = 24.9 bits (48), Expect = 0.002
+ Identities = 9/13 (69%), Positives = 12/13 (92%)
+ Frame = +1 / -3
+
+                        
+Query: 268 TQKKART*QCGLF 306
+           ++KKART +CGLF
+Sbjct: 313 SKKKARTVRCGLF 275
+
+
+
+ Score = 22.1 bits (42), Expect = 0.012
+ Identities = 8/12 (66%), Positives = 9/12 (74%)
+ Frame = +2 / -1
+
+                       
+Query: 272 RKKPAPDSAGFF 307
+           +KKPA   AGFF
+Sbjct: 309 KKKPALSGAGFF 274
+
+
+
+ Score = 22.1 bits (42), Expect = 0.012
+ Identities = 8/12 (66%), Positives = 9/12 (74%)
+ Frame = -1 / +2
+
+                       
+Query: 309 KKKPALSGAGFF 274
+           +KKPA   AGFF
+Sbjct: 272 RKKPAPDSAGFF 307
+
+
+
+ Score = 18.5 bits (34), Expect = 0.15
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+ Frame = -3 / +3
+
+                 
+Query: 598 PAGTPA 581
+           PAG PA
+Sbjct: 579 PAGVPA 596
+
+
+
+ Score = 18.5 bits (34), Expect = 0.15
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+ Frame = +3 / -3
+
+                 
+Query: 579 PAGVPA 596
+           PAG PA
+Sbjct: 598 PAGTPA 581
+
+
+
+ Score = 18.0 bits (33), Expect = 0.20
+ Identities = 6/14 (42%), Positives = 6/14 (42%)
+ Frame = +3 / -3
+
+                         
+Query: 243 RCGLTRTGNTEKSP 284
+           RCGL        SP
+Sbjct: 289 RCGLFSVFPVRVSP 248
+
+
+
+ Score = 18.0 bits (33), Expect = 0.20
+ Identities = 6/14 (42%), Positives = 6/14 (42%)
+ Frame = -3 / +3
+
+                         
+Query: 289 RCGLFSVFPVRVSP 248
+           RCGL        SP
+Sbjct: 243 RCGLTRTGNTEKSP 284
+
+
+
+ Score = 17.6 bits (32), Expect = 0.28
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+ Frame = -3 / -1
+
+                 
+Query: 301 ARTVRC 284
+           ARTV C
+Sbjct: 252 ARTVTC 235
+
+
+
+ Score = 15.3 bits (27), Expect = 1.4
+ Identities = 5/14 (35%), Positives = 8/14 (56%)
+ Frame = +1 / +3
+
+                         
+Query: 469 NHLVAMIEKTISGQ 510
+           NH    + + ISG+
+Sbjct: 333 NHASVEVRRYISGK 374
+
+
+
+ Score = 15.3 bits (27), Expect = 1.4
+ Identities = 5/14 (35%), Positives = 8/14 (56%)
+ Frame = +3 / +1
+
+                         
+Query: 333 NHASVEVRRYISGK 374
+           NH    + + ISG+
+Sbjct: 469 NHLVAMIEKTISGQ 510
+
+
+
+ Score = 14.8 bits (26), Expect = 1.9
+ Identities = 8/20 (40%), Positives = 10/20 (50%)
+ Frame = +1 / -1
+
+                               
+Query: 241 NGAG*RVQETQKKART*QCG 300
+           +GAG  +      ART  CG
+Sbjct: 291 SGAGFFLCFLYASARTVTCG 232
+
+
+
+ Score = 14.8 bits (26), Expect = 1.9
+ Identities = 2/6 (33%), Positives = 4/6 (66%)
+ Frame = +2 / -3
+
+                 
+Query: 389 CVLPIF 406
+           C L ++
+Sbjct: 157 CALCLY 140
+
+
+
+ Score = 14.8 bits (26), Expect = 1.9
+ Identities = 2/6 (33%), Positives = 4/6 (66%)
+ Frame = -3 / +2
+
+                 
+Query: 157 CALCLY 140
+           C L ++
+Sbjct: 389 CVLPIF 406
+
+
+
+ Score = 14.3 bits (25), Expect = 2.6
+ Identities = 6/18 (33%), Positives = 8/18 (44%)
+ Frame = -2 / +2
+
+                             
+Query: 299 PHCQVRAFFCVSCTRQPA 246
+           P   VRA       ++PA
+Sbjct: 233 PQVTVRADAYRKHRKKPA 286
+
+
+
+ Score = 13.4 bits (23), Expect = 4.9
+ Identities = 11/31 (35%), Positives = 11/31 (35%)
+ Frame = +3 / -3
+
+                                          
+Query: 57  CLIAASELVTCRE*IKILLT*VTKYFNQYRH 149
+           CL     LVT    I I    VT      RH
+Sbjct: 148 CLYWLKYLVT*VNKILIYSRQVTSSEAAIRH 56
+
+
+
+ Score = 13.4 bits (23), Expect = 4.9
+ Identities = 11/31 (35%), Positives = 11/31 (35%)
+ Frame = -3 / +3
+
+                                          
+Query: 148 CLYWLKYLVT*VNKILIYSRQVTSSEAAIRH 56
+           CL     LVT    I I    VT      RH
+Sbjct: 57  CLIAASELVTCRE*IKILLT*VTKYFNQYRH 149
+
+
+
+ Score = 13.0 bits (22), Expect = 6.7
+ Identities = 5/14 (35%), Positives = 6/14 (42%)
+ Frame = -3 / -2
+
+                         
+Query: 370 PLMYRRTSTLAWLL 329
+           PLM        WL+
+Sbjct: 506 PLMVFSIIATRWLV 465
+
+
+
+ Score = 13.0 bits (22), Expect = 6.7
+ Identities = 5/14 (35%), Positives = 6/14 (42%)
+ Frame = -2 / -3
+
+                         
+Query: 506 PLMVFSIIATRWLV 465
+           PLM        WL+
+Sbjct: 370 PLMYRRTSTLAWLL 329
+
+
+
+ Score = 12.5 bits (21), Expect = 9.2
+ Identities = 3/5 (60%), Positives = 5/5 (100%)
+ Frame = +2 / -2
+
+                
+Query: 275 KKPAP 289
+           ++PAP
+Sbjct: 257 RQPAP 243
+
+
+Lambda     K      H
+   0.318    0.135    0.401 
+
+
+Matrix: BLOSUM62
+Number of Hits to DB: 2790
+Number of Sequences: 0
+Number of extensions: 62
+Number of successful extensions: 40
+Number of sequences better than 10.0: 1
+length of database: 240
+effective HSP length: 19
+effective length of database: 221
+effective search space used:    48620
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 0 ( 0.0 bits)
+S1: 41 (21.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/blast.report
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/blast.report	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/blast.report	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,434 @@
+TBLASTN 2.0.4 [Feb-24-1998]
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A.
+Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman
+(1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search
+ programs", Nucleic Acids Res. 25:3389-3402.
+
+Query=
+gi|1401126 
+         (504 letters)
+
+Database: Non-redundant GenBank+EMBL+DDBJ+PDB sequences
+           336,723 sequences; 677,679,054 total letters
+
+Searchingdone
+
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|U49928|HSU49928 Homo sapiens TAK1 binding protein (TAB1) mRNA...  1009  0.0
+emb|Z36985|PTPP2CMR P.tetraurelia mRNA for protein phosphatase t...    58  4e-07
+emb|X77116|ATMRABI1 A.thaliana mRNA for ABI1 protein                   53  1e-05
+gb|U12856|ATU12856 Arabidopsis thaliana Col-0 abscisic acid inse...    53  1e-05
+dbj|D38109|ATHPP2CA Arabidopsis thaliana mRNA for protein phosph...    52  3e-05
+emb|Y08965|ATABI2RNA A.thaliana mRNA for ABI2 protein                  47  8e-04
+emb|AL010222|PFSC04009 Plasmodium falciparum DNA *** SEQUENCING ...    45  0.004
+gb|AC002409|ATAC002409 Arabidopsis thaliana chromosome II BAC T2...    43  0.012
+emb|Z98762|SPAC4A8 S.pombe chromosome I cosmid c4A8                    42  0.020
+gb|AF006827|AF006827 Magnaporthe grisea adenylate cyclase (MAC1)...    41  0.045
+emb|Y13936|HSY13936 Homo sapiens mRNA for protein phosphatase 2C...    41  0.059
+gb|U81159|BTU81159 Bos taurus magnesium-dependent calcium inhibi...    40  0.077
+emb|Y10438|SPFKBAD Streptomyces sp. MA6548 fkbA gene and partial...    40  0.077
+gb|AF012921|AF012921 Magnaporthe grisea adenylate cyclase (mac1)...    40  0.10
+gb|U42383|MMU42383 Mus musculus fibroblast growth factor inducib...    40  0.10
+gb|AF023665|AF023665 Plasmodium falciparum protein phosphatase 2...    39  0.13
+gb|M24942|YSPCYR1A Yeast (S.pombe) adenylate cyclase (CYR1) gene...    39  0.17
+gb|M26699|YSPADC Yeast (S.pombe) cyr1 gene encoding adenylyl cyc...    39  0.17
+gb|L43413|PANADCY Podospora anserina adenyl cyclase gene, exons 1-4    38  0.30
+gb|M89651|HUMMMDBC Human DNA from cosmid DNA MMDB (f10080) and M...    38  0.30
+emb|Z99161|SPAC11G7 S.pombe chromosome I cosmid c11G7                  38  0.30
+emb|X56042|SKADECYC Saccharomyces kluyveri gene for adenylyl cyc...    37  0.88
+gb|M29235|CELPOLII C.elegans RNA polymerase II largest subunit (...    36  1.1
+dbj|D45132|HUMHOXY1 Human kidney mRNA for zinc-finger DNA-bindin...    36  1.1
+gb|U70654|MMU70654 Mus musculus musculus sex determining protein...    36  1.1
+gb|U53333|CELF36A4 Caenorhabditis elegans cosmid F36A4.                36  1.1
+gb|L29028|CREWP6A Chlamydomonas eugametos WP6 mRNA, complete cds       36  1.1
+gb|U70653|MMU70653 Mus musculus musculus sex determining protein...    36  1.1
+gb|U17838|HSU17838 Human zinc finger protein RIZ mRNA, complete cds    36  1.1
+emb|X56432|DMRUNTR D.melanogaster mRNA for runt segmentation gene      36  1.5
+emb|X67204|MMSRYLOC M.musculus DNA sequence of Sry locus               36  1.5
+gb|U00051|CELF42G9 Caenorhabditis elegans cosmid F42G9.                36  1.5
+gb|U70655|MMU70655 Mus musculus sex determining protein (Sry) ge...    36  1.5
+gb|AF019985|AF019985 Dictyostelium discoideum Spalten (spnA) mRN...    36  2.0
+emb|X78886|ATABI1G A.thaliana (Landsberg erecta) ABI1 gene             36  2.0
+gb|U70651|MMU70651 Mus musculus musculus sex determining protein...    36  2.0
+gb|U70652|MMU70652 Mus musculus musculus sex determining protein...    36  2.0
+emb|X58924|HVRPL11E H.volcanii genes for ribosomal proteins L11e...    36  2.0
+emb|X05806|CHAMPER Acetabularia plastid DNA homologous to Drosop...    36  2.0
+gb|U82833|OSU82833 Oryza sativa S-adenosyl-L-methionine syntheta...    35  2.6
+emb|Z27084|HLHOLLI H.lanatus mRNA for allergen Hol-lI                  35  2.6
+dbj|D00676|SH1RSP40 Pseudorabies virus genome, RSp40 and pk genes      35  2.6
+emb|Y10421|CCATCOLE1 C.curvatus strain ATCC 20509 Ole1 gene            35  2.6
+gb|L14320|HSBBICP4A Bovine herpesvirus type 1 early-intermediate...    35  2.6
+emb|AJ004801|BHV1CGEN Bovine herpesvirus 1 complete genome             35  2.6
+dbj|AB010074|AB010074 Arabidopsis thaliana genomic DNA, chromoso...    35  3.4
+gb|AC004490|AC004490 Homo sapiens chromosome 19, cosmid R29381, ...    35  3.4
+dbj|AB011474|AB011474 Arabidopsis thaliana genomic DNA, chromoso...    35  3.4
+gb|U03645|MMU03645 Mus musculus domesticus Torino (Sry) gene, co...    35  3.4
+gb|U70641|MMU70641 Mus musculus domesticus sex determining prote...    35  3.4
+gb|U70650|MMU70650 Mus musculus domesticus sex determining prote...    35  3.4
+gb|U70642|MMU70642 Mus musculus domesticus sex determining prote...    35  3.4
+gb|U70647|MMU70647 Mus musculus domesticus sex determining prote...    35  3.4
+emb|Y11840|ATABI2 Arabidopsis thaliana ABI2 gene                       35  3.4
+emb|Y08966|ATABI2DNA A.thaliana gene encoding ABI2 protein             35  3.4
+emb|X16144|SVGRA S. violaceoruber DNA for granaticin polyketide ...    35  3.4
+emb|X16300|SVPKS Streptomyces violaceoruber polyketide synthase ...    35  3.4
+gb|U70657|MMU70657 Mus musculus castaneus sex determining protei...    35  3.4
+gb|M57417|HUMMUCCF Human (cystic fibrosis patient) mucin mRNA, p...    35  3.4
+gb|L04286|DDIGP100 Dictyostelium discoideum glycoprotein gp100 (...    35  3.4
+emb|Z46938|BTTESDNA B.taurus DNA (protamine gene cluster)              35  3.4
+gb|L13054|DROZESTEL Drosophila melanogaster zeste (z) gene, part...    34  4.4
+gb|L13058|DROZESTEP Drosophila melanogaster zeste (z) gene, part...    34  4.4
+gb|M60590|YSCAAGLCS S.cerevisiae a-agglutinin core subunit (AGA1...    34  4.4
+gb|AF009521|AF009521 Mus spretus sex determining protein (Sry) g...    34  4.4
+emb|Z71659|SCYNR044W S.cerevisiae chromosome XIV reading frame O...    34  4.4
+gb|U19361|PMU19361 Petromyzon marinus neurofilament subunit NF-1...    34  4.4
+gb|U70646|MMU70646 Mus musculus domesticus sex determining prote...    34  4.4
+gb|U23477|DDU23477 Dictyostelium discoideum phosphatidylinositol...    34  4.4
+emb|X55695|LEEXTEN15 Tomato mRNA for a glycine-rich protein (clo...    34  4.4
+emb|Z95556|MTCY07A7 Mycobacterium tuberculosis cosmid SCY07A7          34  4.4
+dbj|AB007645|AB007645 Arabidopsis thaliana genomic DNA, chromoso...    34  5.8
+emb|X64346|HSGEND Herpesvirus saimiri complete genome DNA              34  5.8
+emb|Z95620|SPBC3D6 S.pombe chromosome II cosmid c3D6                   34  5.8
+gb|U70644|MMU70644 Mus musculus domesticus sex determining prote...    34  5.8
+gb|U70649|MMU70649 Mus musculus domesticus sex determining prote...    34  5.8
+gb|U43491|SCU43491 Saccharomyces cerevisiae cosmid clone pEOA156...    34  5.8
+dbj|D00909|NEUNAC N.crassa nac gene coding for adenylate cyclase...    34  5.8
+gb|U70645|MMU70645 Mus musculus domesticus sex determining prote...    34  5.8
+gb|AF009519|AF009519 Mus musculus domesticus sex determining pro...    34  5.8
+gb|M77174|MUSPERPA Mouse perlecan mRNA, complete cds.                  34  5.8
+emb|Z74917|SCYOR009W S.cerevisiae chromosome XV reading frame OR...    34  5.8
+gb|J04054|MUSPCGBM Mouse basement membrane proteoglycan mRNA, pa...    34  5.8
+emb|Z47072|CEF26C11 Caenorhabditis elegans cosmid F26C11, comple...    34  5.8
+gb|U70643|MMU70643 Mus musculus sex determining protein (Sry) ge...    34  5.8
+gb|AC003671|AC003671 Arabidopsis thaliana chromosome 1 BAC F17O7...    34  5.8
+emb|X16481|RN11ZNBP Rat mRNA for zinc(2+) binding protein              34  5.8
+gb|U46156|SSU46156 Synechococcus sp. CcmK (ccmK) gene, complete ...    34  5.8
+gb|AC000098|YUP8H12 Arabidopsis thaliana chromosome 1 YAC yUP8H1...    34  5.8
+emb|Z71781|SCCIVL37K S.cerevisiae chromosome IV left arm (EU) DN...    34  5.8
+emb|Y13332|SSTO1AMY Streptomyces sp. TO1 amy gene                      34  5.8
+gb|M30473|NEULEURSC N.crassa cytoplasmic leucyl-tRNA synthetase ...    34  5.8
+emb|Z74085|SCYDL037C S.cerevisiae chromosome IV reading frame OR...    34  5.8
+gb|AF029858|AF029858 Sorghum bicolor cytochrome P450 CYP71E1 (CY...    34  5.8
+gb|S47414|S47414 glycine-rich protein {clone atGRP-5} [Arabidops...    34  7.6
+emb|X60294|SCSEC1A S.cereale Sec1 gene for omega secalin               34  7.6
+emb|Z37975|BTPLAKOPH B.taurus mRNA for plakophilin.                    34  7.6
+emb|X60295|SCSEC1B S.cereale Sec1 gene for omega secalin               34  7.6
+dbj|D87895|D87895 Aspergillus nidulans DNA for chitinase, comple...    34  7.6
+gb|AF017789|AF017789 Homo sapiens putative transcription factor ...    34  7.6
+
+>gb|U49928|HSU49928 Homo sapiens TAK1 binding protein (TAB1) mRNA, complete cds.
+           Length = 3096
+           
+ Score = 1009 bits (2580), Expect = 0.0
+ Identities = 504/504 (100%), Positives = 504/504 (100%)
+
+Query: 1   MAAQRRSLLQSEQQPSWTDDLPLCHLSGVGSASNRSYSADGKGTESHPPEDSWLKFRSEN 60
+           MAAQRRSLLQSEQQPSWTDDLPLCHLSGVGSASNRSYSADGKGTESHPPEDSWLKFRSEN
+Sbjct: 21  MAAQRRSLLQSEQQPSWTDDLPLCHLSGVGSASNRSYSADGKGTESHPPEDSWLKFRSEN 200
+
+Query: 61  NCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLES 120
+           NCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLES
+Sbjct: 201 NCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLES 380
+
+Query: 121 IDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYV 180
+           IDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYV
+Sbjct: 381 IDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYV 560
+
+Query: 181 ANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQEST 240
+           ANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQEST
+Sbjct: 561 ANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQEST 740
+
+Query: 241 RRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAH 300
+           RRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAH
+Sbjct: 741 RRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAH 920
+
+Query: 301 GPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDRVKRIHSDTFASGGERARFCPRHEDMTL 360
+           GPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDRVKRIHSDTFASGGERARFCPRHEDMTL
+Sbjct: 921 GPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDRVKRIHSDTFASGGERARFCPRHEDMTL 1100
+
+Query: 361 LVRNFGYPLGEMSQPTPSPAPAAGGRVYPVSVPYSSAQSTSKTSVTLSLVMPSQGQMVNG 420
+           LVRNFGYPLGEMSQPTPSPAPAAGGRVYPVSVPYSSAQSTSKTSVTLSLVMPSQGQMVNG
+Sbjct: 1101LVRNFGYPLGEMSQPTPSPAPAAGGRVYPVSVPYSSAQSTSKTSVTLSLVMPSQGQMVNG 1280
+
+Query: 421 AHSASTLDEATPTLTNQSPTLTLQSTNTHTQSSSSSSDGGLFRSRPAHSLPPGEDGRVEP 480
+           AHSASTLDEATPTLTNQSPTLTLQSTNTHTQSSSSSSDGGLFRSRPAHSLPPGEDGRVEP
+Sbjct: 1281AHSASTLDEATPTLTNQSPTLTLQSTNTHTQSSSSSSDGGLFRSRPAHSLPPGEDGRVEP 1460
+
+Query: 481 YVDFAEFYRLWSVDHGEQSVVTAP 504
+           YVDFAEFYRLWSVDHGEQSVVTAP
+Sbjct: 1461YVDFAEFYRLWSVDHGEQSVVTAP 1532
+
+
+>emb|Z36985|PTPP2CMR P.tetraurelia mRNA for protein phosphatase type 2C
+           Length = 969
+           
+ Score = 57.8 bits (137), Expect = 4e-07
+ Identities = 64/261 (24%), Positives = 112/261 (42%), Gaps = 2/261 (0%)
+
+Query: 64  LYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLESIDD 123
+           ++GVF+G+ G  V  FV +    ELL  +                  +  E+ F E++ +
+Sbjct: 182 VFGVFDGHGGREVA*FVEKHFVDELLKNK------------------NFKEQKFEEALKE 307
+
+Query: 124 ALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREIS-GGAMAVVAVLLNNKLYVAN 182
+              +   L            L P+ QK L   K  + + S  G  A VA++  N LYVAN
+Sbjct: 308 TFLKMDELL-----------LTPEGQKELN*YKATDTDESYAGCTANVALIYKNTLYVAN 454
+
+Query: 183 VGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQESTRR 242
+            G +R++LC++  +      ++VDH  +N +E  R+ + G   G +    +      +R 
+Sbjct: 455 AGDSRSVLCRNNTN----HDMSVDHKPDNPEEKSRIERAG---GFVSDGRVNGNLNLSRA 613
+
+Query: 243 IGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAHGP 302
+           +GD + K      D    +    IIA P++   + L     F+++  +G+++ L      
+Sbjct: 614 LGDLEYKR-----DNKLRSNE*LIIALPDVKKTE-LTP*DKFILMGCDGVFETLNH*ELL 775
+
+Query: 303 GQANQEIA-AMIDTEFAKQTSLD 324
+            Q N  I  A +  E  K+ + D
+Sbjct: 776 KQVNSTIG*AQVTEELLKKAAED 844
+
+
+>emb|X77116|ATMRABI1 A.thaliana mRNA for ABI1 protein
+            Length = 1981
+            
+ Score = 52.7 bits (124), Expect = 1e-05
+ Identities = 59/242 (24%), Positives = 105/242 (43%), Gaps = 6/242 (2%)
+
+Query: 55   KFRSENNCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVE 114
+            +F  ++    +GV++G+ G++V N+  +R+   L      AE   A  + +L      +E
+Sbjct: 918  RFDPQSAAHFFGVYDGHGGSQVANYCRERMHLAL------AEEI-AKEKPMLCDGDTWLE 1076
+
+Query: 115  RSFLESIDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLL 174
+            +      +  L   + ++S  PE V                         G+ +VVAV+ 
+Sbjct: 1077 KWKKALFNSFLRVDSEIESVAPETV-------------------------GSTSVVAVVF 1181
+
+Query: 175  NNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQ---- 230
+             + ++VAN G +RA+LC+    G     L+VDH  + EDE  R+   G   GK+ Q    
+Sbjct: 1182 PSHIFVANCGDSRAVLCR----GKTALPLSVDHKPDREDEAARIEAAG---GKVIQWNGA 1340
+
+Query: 231  --VGIICGQESTRRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLM 288
+               G++     +R IGD  +K                II +PE+   + +      L+L 
+Sbjct: 1341 RVFGVLA---MSRSIGDRYLK--------------PSIIPDPEVTAVKRVK-EDDCLILA 1466
+
+Query: 289  SEGLYKAL 296
+            S+G++  +
+Sbjct: 1467 SDGVWDVM 1490
+
+
+>gb|U12856|ATU12856 Arabidopsis thaliana Col-0 abscisic acid insensitive protein (ABI1)
+            mRNA, complete cds.
+            Length = 2000
+            
+ Score = 52.7 bits (124), Expect = 1e-05
+ Identities = 59/242 (24%), Positives = 105/242 (43%), Gaps = 6/242 (2%)
+
+Query: 55   KFRSENNCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVE 114
+            +F  ++    +GV++G+ G++V N+  +R+   L      AE   A  + +L      +E
+Sbjct: 918  RFDPQSAAHFFGVYDGHGGSQVANYCRERMHLAL------AEEI-AKEKPMLCDGDTWLE 1076
+
+Query: 115  RSFLESIDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLL 174
+            +      +  L   + ++S  PE V                         G+ +VVAV+ 
+Sbjct: 1077 KWKKALFNSFLRVDSEIESVAPETV-------------------------GSTSVVAVVF 1181
+
+Query: 175  NNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQ---- 230
+             + ++VAN G +RA+LC+    G     L+VDH  + EDE  R+   G   GK+ Q    
+Sbjct: 1182 PSHIFVANCGDSRAVLCR----GKTALPLSVDHKPDREDEAARIEAAG---GKVIQWNGA 1340
+
+Query: 231  --VGIICGQESTRRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLM 288
+               G++     +R IGD  +K                II +PE+   + +      L+L 
+Sbjct: 1341 RVFGVLA---MSRSIGDRYLK--------------PSIIPDPEVTAVKRVK-EDDCLILA 1466
+
+Query: 289  SEGLYKAL 296
+            S+G++  +
+Sbjct: 1467 SDGVWDVM 1490
+
+
+>dbj|D38109|ATHPP2CA Arabidopsis thaliana mRNA for protein phosphatase 2C
+           Length = 1371
+           
+ Score = 51.5 bits (121), Expect = 3e-05
+ Identities = 73/290 (25%), Positives = 132/290 (45%), Gaps = 13/290 (4%)
+
+Query: 47  HPPEDSWLKFRSENNCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVL 106
+           HP   S+L+  SEN+ F YGVF+G+  + V     +RL  +++  ++    A  +    +
+Sbjct: 447 HP---SFLQRNSENHHF-YGVFDGHGCSHVAEKCRERLH-DIVKKEVEVM-ASDEWTETM 608
+
+Query: 107 LQAFDVVERSFLESIDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGA 166
+           +++F  +++   +   + +   A+    +          PQ   +             G+
+Sbjct: 609 VKSFQKMDKEVSQRECNLVVNGAT--RSMKNSCRCELQSPQCDAV-------------GS 743
+
+Query: 167 MAVVAVLLNNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAG 226
+            AVV+V+   K+ V+N G +RA+LC++ V       L+VDH  +  DEL R+ Q G   G
+Sbjct: 744 TAVVSVVTPEKIIVSNCGDSRAVLCRNGV----AIPLSVDHKPDRPDELIRIQQAG---G 902
+
+Query: 227 KI------KQVGIICGQESTRRIGD-YKVKYGYTDIDLLSAAKSKP----IIAEPEIHGA 275
+           ++      + +G++     +R IGD Y   Y   D ++    ++      I+A   +   
+Sbjct: 903 RVIYWDGARVLGVLA---MSRAIGDNYLKPYVIPDPEVTVTDRTDEDECLILASDGLWDV 1073
+
+Query: 276 QPLDGVTGF--LVLMSEGLYKALEAAHGPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDR 333
+            P +   G   + L   G     +AAH    A  + A ++      + S D V+  VVD 
+Sbjct: 1074VPNETACGVARMCLRGAGAGDDSDAAH---NACSDAALLLTKLALARQSSDNVSVVVVDL 1244
+
+Query: 334 VKR 336
+            KR
+Sbjct: 1245RKR 1253
+
+
+>emb|Y08965|ATABI2RNA A.thaliana mRNA for ABI2 protein
+           Length = 1470
+           
+ Score = 46.9 bits (109), Expect = 8e-04
+ Identities = 55/241 (22%), Positives = 100/241 (40%), Gaps = 7/241 (2%)
+
+Query: 56  FRSENNCFLYGVFNGYDGNRVTNFVAQRLSAELL--LGQLNAEHAEADV-----RRVLLQ 108
+           F    +   +GV++G+ G++V N+  +R+   L   + +   E  + D      ++ L  
+Sbjct: 504 FNPHLSAHFFGVYDGHGGSQVANYCRERMHLALTEEIVKEKPEFCDGDTWQEKWKKALFN 683
+
+Query: 109 AFDVVERSFLESIDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMA 168
+           +F  V+ S +E++  A           PE V                         G+ +
+Sbjct: 684 SFMRVD-SEIETVAHA-----------PETV-------------------------GSTS 752
+
+Query: 169 VVAVLLNNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKI 228
+           VVAV+    ++VAN G +RA+LC+    G     L+VDH  + +DE  R+   G    + 
+Sbjct: 753 VVAVVFPTHIFVANCGDSRAVLCR----GKTPLALSVDHKPDRDDEAARIEAAGGKVIRW 920
+
+Query: 229 KQVGIICGQESTRRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLM 288
+               +      +R IGD  +K                +I +PE+   + +      L+L 
+Sbjct: 921 NGARVFGVLAMSRSIGDRYLK--------------PSVIPDPEVTSVRRVK-EDDCLILA 1055
+
+Query: 289 SEGLYKAL 296
+           S+GL+  +
+Sbjct: 1056SDGLWDVM 1079
+
+
+>emb|AL010222|PFSC04009 Plasmodium falciparum DNA *** SEQUENCING IN PROGRESS *** from contig
+            4-9, complete sequence [Plasmodium falciparum]
+            Length = 5332
+            
+ Score = 44.5 bits (103), Expect = 0.004
+ Identities = 48/183 (26%), Positives = 83/183 (45%), Gaps = 15/183 (8%)
+
+Query: 163  SGGAMAVVAVLLNNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTE-NEDELFRLSQL 221
+            S G  A V+V+  N LYVAN+G +R ++ K+     +   L VDH    N+ E  R+ + 
+Sbjct: 2760 SSGTTACVSVIFKNMLYVANIGDSRCIISKNG----RAIVLTVDHRASINKKEQDRILKS 2593
+
+Query: 222  GLDAGKIKQVGIICGQESTRRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGV 281
+            G   G +   G + G     R        G+      +  K K +I EP++   +  D  
+Sbjct: 2592 G---GILDDEGYLGGCLGVCR--------GFGSFHKKTKEKLKGLICEPDLFHIKLTDD- 2449
+
+Query: 282  TGFLVLMSEGLYKALEAAHGPGQANQEIAAMIDTEFA---------KQTSLDAVAQAVV- 331
+              FL++  +G++  + +          +    D + A         K+ SLD ++  VV 
+Sbjct: 2448 DEFLIICCDGIFDVITSQEAVNTVKNSLIQSRDAKTAAEALCQLAYKKKSLDNLSVLVVI 2269
+
+Query: 332  ----DRVKRIHSDTFASG 345
+                D+  ++ S   +SG
+Sbjct: 2268 FQNPDKNNKVSSINESSG 2215
+
+
+>gb|AC002409|ATAC002409 Arabidopsis thaliana chromosome II BAC T20B5 genomic sequence, complete
+             sequence [Arabidopsis thaliana]
+             Length = 72839
+             
+ Score = 43.0 bits (99), Expect = 0.012
+ Identities = 21/57 (36%), Positives = 38/57 (65%)
+
+Query: 165   GAMAVVAVLLNNKLYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQL 221
+             G  A+ ++L+ NKL+VANVG +RA+LC++      ++++  D+ TE+E  L+ +  L
+Sbjct: 24355 GCTAIASLLVENKLFVANVGDSRAILCRAG-HPFALSKVR*DYHTESELSLYSIGAL 24188
+
+
+>emb|Z98762|SPAC4A8 S.pombe chromosome I cosmid c4A8
+            Length = 43895
+            
+ Score = 42.2 bits (97), Expect = 0.020
+ Identities = 56/230 (24%), Positives = 104/230 (44%), Gaps = 20/230 (8%)
+
+Query: 63   FLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLESID 122
+            F YG+F+G+ G   + F++  L   +    LN      D  ++L +   V    ++  + 
+Sbjct: 3939 FFYGLFDGHGGTECSEFLSTNLGKIIENQDLN------DTEKILKEVHSV--GGYMAGLK 3784
+
+Query: 123  DALAEKASLQSQLPEGVPQHQLPPQY-QKILERLKTLER----EISGGAMAVVAVLLNNK 177
+               + +  LQS+  + + + +L   + Q  ++ L    R        GA+  VA++ +  
+Sbjct: 3783 PPFSLRTVLQSRDEDLLWRARLYYSFLQADMDYLTNYARPSPDSAVPGAVGTVAIITSKN 3604
+
+Query: 178  -----------LYVANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGL--- 223
+                       +++A+VG  RALLC S     +  +L   H   + +E  RL +  +   
+Sbjct: 3603 NLSYWESDSYIIHLAHVGDTRALLCDSRTG--RAHRLTFQHHPADVEEARRLRRYNMGFS 3430
+
+Query: 224  -DAGKIKQVGIICGQESTRRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVT 282
+             D+   K+   +    +TR  GD     GY         K   ++AEP++     L    
+Sbjct: 3429 RDSFGQKRFAWVA---NTRSFGD-----GY-------KLKKLGVVAEPQLTSIHSLRDDW 3295
+
+Query: 283  GFLVLMSEGL 292
+             FL L+S+G+
+Sbjct: 3294 SFLTLLSDGI 3265
+
+
+>gb|AF006827|AF006827 Magnaporthe grisea adenylate cyclase (MAC1) gene, complete cds
+            Length = 8678
+            
+ Score = 41.0 bits (94), Expect = 0.045
+ Identities = 63/259 (24%), Positives = 111/259 (42%), Gaps = 8/259 (3%)
+
+Query: 14   QPSWTDDLPLCHLSGVGSASNR-SYS-ADGKGTESHPPEDSWL--KFRSENNCFLYGVFN 69
+            QPS  +    C +   GS++    Y+ AD  G   H      +  +F +     L G+F+
+Sbjct: 5314 QPSIPEQSEDCRVRTSGSSAGYLPYAMADTLGKNEHLSTVDLVVPRFNASETETLLGLFD 5493
+
+Query: 70   GY----DGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLESIDDAL 125
+            G      G+++  ++ +        GQ+ A    A    +     D + R+FL +++  L
+Sbjct: 5494 GQALSSGGSKIAKYLHENF------GQILATELRALKTGLKETPEDALRRAFL-ALNKEL 5652
+
+Query: 126  AEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYVANVGT 185
+               A   S+    VP H+   Q   IL +      +++ G +A V  L    LYVANVG 
+Sbjct: 5653 VTIAIQHSEDRPSVP-HRSGSQAHVILNK-----EDLNSGGVATVVYLQGQDLYVANVGD 5814
+
+Query: 186  NRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQESTRRIGD 245
+             +A++ +S      +T+    H      E  R+ + G   G + + G +  Q    R   
+Sbjct: 5815 AQAMIIQSDQTHKMLTR---KHDPAEPTERSRIREAG---GWVSRNGKLNDQLGVSR--- 5967
+
+Query: 246  YKVKYGYTDIDLLSAAKSKPIIAEPEI 272
+                +GY  +DL+ A ++ P ++   I
+Sbjct: 5968 ---AFGY--VDLMPAVQAAPHVSHVAI 6033
+
+
+CPU time:    78.72 user secs.	    0.85 sys. secs	   79.57 total secs.
+
+  Database: Non-redundant GenBank+EMBL+DDBJ+PDB sequences
+    Posted date:  Apr 16, 1998  9:38 AM
+  Number of letters in database: 677,679,054
+  Number of sequences in database:  336,723
+  
+Lambda     K      H
+   0.313    0.130    0.370 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 374080624
+Number of Sequences: 336723
+Number of extensions: 5779396
+Number of successful extensions: 37522
+Number of sequences better than 10: 214
+Number of HSP's better than 10.0 without gapping: 32
+Number of HSP's successfully gapped in prelim test: 85
+Number of HSP's that attempted gapping in prelim test: 35679
+Number of HSP's gapped (non-prelim): 445
+length of query: 504
+length of database: 225893018
+effective HSP length: 62
+effective length of query: 442
+effective length of database: 205016192
+effective search space: 90617156864
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.2 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 42 (21.9 bits)
+S2: 74 (33.2 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/blat.psLayout3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/blat.psLayout3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/blat.psLayout3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+psLayout version 3
+
+match	mis- 	rep. 	N's	Q gap	Q gap	T gap	T gap	strand	Q        	Q   	Q    	Q  	T        	T   	T    	T  	block	blockSizes 	qStarts	 tStarts
+     	match	match	   	count	bases	count	bases	      	name     	size	start	end	name     	size	start	end	count
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+1775	0	0	0	0	0	0	0	+	sequence_10	1775	0	1775	sequence_10	1775	0	1775	1	1775,	0,	0,
+70	0	0	0	3	334	3	334	-	sequence_10	1775	840	1244	sequence_10	1775	840	1244	4	14,21,21,14,	531,623,652,921,	840,1102,1131,1230,

Added: trunk/packages/bioperl/branches/upstream/current/t/data/blosum62.bla
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/blosum62.bla	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/blosum62.bla	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+#  Matrix made by matblas from blosum62.iij
+#  * column uses minimum score
+#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+#  Blocks Database = /data/blocks_5.0/blocks.dat
+#  Cluster Percentage: >= 62
+#  Entropy =   0.6979, Expected =  -0.5209
+ A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
+ 4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4 
+-1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4 
+-2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4 
+-2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4 
+ 0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 
+-1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4 
+-1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+ 0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4 
+-2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4 
+-1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4 
+-1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4 
+-1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4 
+-1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4 
+-2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4 
+-1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4 
+ 1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4 
+ 0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4 
+-3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4 
+-2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4 
+ 0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4 
+-2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4 
+-1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
+ 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4 
+-4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/branchSite.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/branchSite.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/branchSite.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+CODONML (in paml 3.15, November 2005)    test.phy   Model: several dN/dS ratios for branches 
+Codon frequencies: F3x4
+Site-class models:  discrete (4 categories)
+ns =   3  ls =   6
+
+Codon usage in sequences
+--------------------------------------------------------------------------
+Phe TTT  1  1  1 | Ser TCT  1  1  0 | Tyr TAT  0  0  0 | Cys TGT  0  0  0
+    TTC  0  0  0 |     TCC  0  0  1 |     TAC  0  0  0 |     TGC  0  0  0
+Leu TTA  0  0  0 |     TCA  0  0  0 | *** TAA  0  0  0 | *** TGA  0  0  0
+    TTG  0  0  0 |     TCG  0  0  0 |     TAG  0  0  0 | Trp TGG  0  0  0
+--------------------------------------------------------------------------
+Leu CTT  0  0  0 | Pro CCT  0  0  0 | His CAT  1  1  1 | Arg CGT  0  0  0
+    CTC  0  0  0 |     CCC  0  0  1 |     CAC  0  0  0 |     CGC  0  0  0
+    CTA  0  0  0 |     CCA  1  1  0 | Gln CAA  0  0  0 |     CGA  0  0  0
+    CTG  0  0  0 |     CCG  0  0  0 |     CAG  0  0  0 |     CGG  0  0  0
+--------------------------------------------------------------------------
+Ile ATT  0  0  0 | Thr ACT  0  0  0 | Asn AAT  0  0  0 | Ser AGT  0  0  0
+    ATC  0  0  0 |     ACC  0  0  0 |     AAC  0  0  0 |     AGC  0  0  0
+    ATA  0  0  0 |     ACA  0  0  0 | Lys AAA  0  0  0 | Arg AGA  0  0  0
+Met ATG  2  1  1 |     ACG  0  1  1 |     AAG  0  0  0 |     AGG  0  0  0
+--------------------------------------------------------------------------
+Val GTT  0  0  0 | Ala GCT  0  0  0 | Asp GAT  0  0  0 | Gly GGT  0  0  0
+    GTC  0  0  0 |     GCC  0  0  0 |     GAC  0  0  0 |     GGC  0  0  0
+    GTA  0  0  0 |     GCA  0  0  0 | Glu GAA  0  0  0 |     GGA  0  0  0
+    GTG  0  0  0 |     GCG  0  0  0 |     GAG  0  0  0 |     GGG  0  0  0
+--------------------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: test0          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.50000    C:0.33333    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#2: test1          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#3: test2          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.33333    C:0.33333    A:0.00000    G:0.33333
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT       3 | Ser S TCT       2 | Tyr Y TAT       0 | Cys C TGT       0
+      TTC       0 |       TCC       1 |       TAC       0 |       TGC       0
+Leu L TTA       0 |       TCA       0 | *** * TAA       0 | *** * TGA       0
+      TTG       0 |       TCG       0 |       TAG       0 | Trp W TGG       0
+------------------------------------------------------------------------------
+Leu L CTT       0 | Pro P CCT       0 | His H CAT       3 | Arg R CGT       0
+      CTC       0 |       CCC       1 |       CAC       0 |       CGC       0
+      CTA       0 |       CCA       2 | Gln Q CAA       0 |       CGA       0
+      CTG       0 |       CCG       0 |       CAG       0 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT       0 | Thr T ACT       0 | Asn N AAT       0 | Ser S AGT       0
+      ATC       0 |       ACC       0 |       AAC       0 |       AGC       0
+      ATA       0 |       ACA       0 | Lys K AAA       0 | Arg R AGA       0
+Met M ATG       4 |       ACG       2 |       AAG       0 |       AGG       0
+------------------------------------------------------------------------------
+Val V GTT       0 | Ala A GCT       0 | Asp D GAT       0 | Gly G GGT       0
+      GTC       0 |       GCC       0 |       GAC       0 |       GGC       0
+      GTA       0 |       GCA       0 | Glu E GAA       0 |       GGA       0
+      GTG       0 |       GCG       0 |       GAG       0 |       GGG       0
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.38889    C:0.44444    A:0.16667    G:0.00000
+position  3:    T:0.44444    C:0.11111    A:0.11111    G:0.33333
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+test0               
+test1               -1.0000 (0.0706 0.0000)
+test2                0.0510 (0.0706 1.3844) 0.0000 (0.0000 0.9745)
+
+
+TREE #  1:  (1, 2, 3);   MP score: 3
+lnL(ntime:  3  np:  9):    -28.298935     +0.000000
+   4..1     4..2     4..3  
+  0.31500  0.00000 18.45220 99.00000  0.00000  0.65968  0.00000  0.00000 999.00000
+
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =  18.76720
+
+(1: 0.314996, 2: 0.000004, 3: 18.452204);
+
+(test0: 0.314996, test1: 0.000004, test2: 18.452204);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) = 99.00000
+
+
+dN/dS for site classes (K=4)
+
+site class             0        1       2a       2b
+proportion       0.00000  0.65968  0.00000  0.34032
+background w     0.00000  0.00000  0.00000  0.00000
+foreground w     0.00000  0.00000999.00000 999.00000
+
+
+Naive Empirical Bayes (NEB) analysis (please use the BEB results.)
+Positive sites for foreground lineages Prob(w>1):
+
+     2 M 1.000**

Added: trunk/packages/bioperl/branches/upstream/current/t/data/brassica_ATH.WUBLASTN
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/brassica_ATH.WUBLASTN	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/brassica_ATH.WUBLASTN	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,193 @@
+BLASTN 2.0MP-WashU [22-Aug-2004] [linux24-i686-ILP32F64 2004-08-22T21:42:14]
+
+Copyright (C) 1996-2004 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2004) http://blast.wustl.edu
+
+Notice:  this program and its default parameter settings are optimized to find
+nearly identical sequences rapidly.  To identify weak protein similarities
+encoded in nucleic acid, use BLASTX, TBLASTN or TBLASTX.
+
+Query=  e15_99na_plate_1as gi|21844367|gb|BQ704948.1|BQ704948 Bn01_02k05_A
+    Bn01_AAFC_ECORC_transgenic_Brassica_napus_overexpressing_BNCBF17_constituti
+    vely_frost_tolerant Brassica napus cDNA clone Bn01_02k05, mRNA sequence
+        (598 letters)
+
+Database:  ATH1_chr_all.5con
+           5 sequences; 119,186,497 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+CHR1v01212004                                                  932  1.3e-79   3
+CHR5v01212004                                                 1035  6.9e-71   2
+
+
+
+>CHR1v01212004
+        Length = 30,432,563
+
+  Plus Strand HSPs:
+
+ Score = 932 (145.9 bits), Expect = 1.3e-79, Sum P(3) = 1.3e-79, Group = 1
+ Identities = 230/282 (81%), Positives = 240/282 (85%), Strand = Plus / Plus
+ Links = (1)-3-2
+
+Query:       23 AAAGGARATTAAGCACCGGSTGAAGCTTGGTGGCTTGTAGGCGATGAAGACTGATGCRCT 82
+                || |||+|||||   |||| |||||||||||||||||||||| ||||| ||||||||+||
+Sbjct: 25052120 AAGGGAAATTAA---CCGG-TGAAGCTTGGTGGCTTGTAGGCAATGAA-ACTGATGCACT 25052174
+
+Query:       83 GGACCTGRCGCGTTGTTGTCGAATCCAATGATTCTAATAAAGGCGKTAGGGKACTCCGTT 142
+                |||| ||+|| || |||||||||||| ||||| ||||| ||||| +| |||+||||| ||
+Sbjct: 25052175 GGACTTGACGGGT-GTTGTCGAATCCGATGATCCTAATGAAGGCATTGGGGTACTCC-TT 25052232
+
+Query:      143 CTTGSACTCTTGCACTTCCTTCAACACTTGAGARGRGTCGGKGCATCCGARCAAAGGAAG 202
+                ||||+|||||| |||||||||||||||||||| +|+|||||+||| ||||+||| |||||
+Sbjct: 25052233 CTTGCACTCTTCCACTTCCTTCAACACTTGAGCGGAGTCGGTGCAACCGAACAAGGGAAG 25052292
+
+Query:      203 CTTCCACATTGTCCAATAACGGCCATCGTAGTATCCAGGTGTGCTTCCATGCTCACGGTA 262
+                ||||||||||||||| || || ||||| |||||||| |||| | | || |||||||||||
+Sbjct: 25052293 CTTCCACATTGTCCAGTACCGTCCATCATAGTATCCGGGTGAGTTACCGTGCTCACGGTA 25052352
+
+Query:      263 CACAAATCCGTGCTC--CAACTCAAATTCAACACAAGGAATC 302
+                ||||||||||||||   |||  || ||   ||| ||  | ||
+Sbjct: 25052353 CACAAATCCGTGCTATACAAACCACATGTTACA-AACCACTC 25052393
+
+ Score = 580 (93.1 bits), Expect = 1.3e-79, Sum P(3) = 1.3e-79, Group = 1
+ Identities = 153/192 (79%), Positives = 158/192 (82%), Strand = Plus / Plus
+ Links = 1-3-(2)
+
+Query:      410 ACCTTCATGCAGCTAACTCTTCCTCCGTTGCTTGCAATGGAAGTAATATCAGTGTT-GCT 468
+                |||| ||||||| |||||||||| ||||||||||  ||||||||||| ||  |||| || 
+Sbjct: 25052740 ACCTGCATGCAGTTAACTCTTCCGCCGTTGCTTGTGATGGAAGTAATGTCGTTGTTAGCC 25052799
+
+Query:      469 TTTGCGGGTGASTGGGAATGCRGCGGATGACTTCAAG-CCGGTRAAKWGGAGC-ACCATG 526
+                || ||||||| +|||||| ||+||||| ||||| ||| ||| |+|| +||||| ||||| 
+Sbjct: 25052800 TT-GCGGGTGGCTGGGAAGGCAGCGGAGGACTT-AAGTCCGTTGAA-AGGAGCGACCATA 25052856
+
+Query:      527 GTGGCTTGAGCCGGTGAGGTAGCCACAGCGGCGGGAGGAGAGCATAGRAGGWAGCCATTA 586
+                ||||| |||||||| |||| | ||| ||  |||| || ||||||||| |||+||||||| 
+Sbjct: 25052857 GTGGCCTGAGCCGGAGAGGCAACCATAGTAGCGGAAG-AGAGCATAG-AGGAAGCCATTG 25052914
+
+Query:      587 CTACTTCTTTTC 598
+                 | ||||||| |
+Sbjct: 25052915 TT-CTTCTTTAC 25052925
+
+ Score = 512 (82.9 bits), Expect = 1.3e-79, Sum P(3) = 1.3e-79, Group = 1
+ Identities = 118/139 (84%), Positives = 121/139 (87%), Strand = Plus / Plus
+ Links = 1-(3)-2
+
+Query:      275 CTCCAACTCAAATTCAACACAAGGAATCCACTCTGTTGCGGAGAAGGTAGTCAACTTCTT 334
+                ||||||||| |||||||||||||||||||||| ||||||||| ||||||||||||||| |
+Sbjct: 25052501 CTCCAACTCGAATTCAACACAAGGAATCCACT-TGTTGCGGATAAGGTAGTCAACTTCCT 25052559
+
+Query:      335 TGGCCAATTCAACGTCAGTRAGGTCAGGRAGATAAGAGAGAGTCTCAGHACTTCTTCTTT 394
+                | ||||||||    || ||+||||||||+|| |||||||||||||||  |||||||||||
+Sbjct: 25052560 TAGCCAATTCGGAATCGGTAAGGTCAGGAAGGTAAGAGAGAGTCTCA-AACTTCTTCTTT 25052618
+
+Query:      395 SCGACTGGKGGGCACACCT 413
+                +| |  || || |||||||
+Sbjct: 25052619 CCAATCGGAGGCCACACCT 25052637
+
+
+>CHR5v01212004
+        Length = 26,992,728
+
+  Plus Strand HSPs:
+
+ Score = 1035 (161.3 bits), Expect = 6.9e-71, Sum P(2) = 6.9e-71, Group = 1
+ Identities = 272/348 (78%), Positives = 281/348 (80%), Strand = Plus / Plus
+ Links = (1)-2
+
+Query:        1 GATTATT-ATATGATTGTTTTAGAAAGGARATTAAGCACCGGSTGAAGCTTGGTGGCTTG 59
+                ||| ||| ||| || ||||||||||| ||+|||||||  ||| |||||||||| ||||||
+Sbjct: 15394700 GATAATTCATAAGAATGTTTTAGAAAAGAAATTAAGCTTCGG-TGAAGCTTGGGGGCTTG 15394758
+
+Query:       60 TAGGCGATGAAGACTGATGCRCTGGACCTGRCGCGTTGTTGTCGAATCCAATGATTCTAA 119
+                ||||| ||||| ||||||||+ ||||| ||+|| || |||||||||||| ||||| ||||
+Sbjct: 15394759 TAGGCAATGAA-ACTGATGCATTGGACTTGACGGGT-GTTGTCGAATCCGATGATCCTAA 15394816
+
+Query:      120 TAAAGGCGKTAGGGKACTCCGTTCTTGSACTCTTGCACTTCCTTCAACACTTGAGARGRG 179
+                | ||||||   |||+||||| ||||||+| ||||  ||||||||||||||||||| +|+|
+Sbjct: 15394817 TGAAGGCGCCCGGGTACTCC-TTCTTGCATTCTTCAACTTCCTTCAACACTTGAGCGGAG 15394875
+
+Query:      180 TCGGKGCATCCGARCAAAGGAAGCTTCCACATTGTCCAATAACGGCCATCGTAGTATCCA 239
+                ||||+||||||||+||| |||||||||||||||||||| || || |||||||||||||| 
+Sbjct: 15394876 TCGGTGCATCCGAACAATGGAAGCTTCCACATTGTCCAGTACCGTCCATCGTAGTATCCG 15394935
+
+Query:      240 GGTGTGCTTCCATGCTCACGGTACACAAATCCGTGCTCCAA-CTCA-AATTCAACACAAG 297
+                || ||| |||| |||||||||||||||||||||||||  || | || ||   ||| ||  
+Sbjct: 15394936 GGAGTGTTTCCGTGCTCACGGTACACAAATCCGTGCTATAAACACATAAGAAAAC-CATT 15394994
+
+Query:      298 GAATCCACTCTGTTGC-GGAGAAGGTAGTCAACTTCTT-TGGCCAATT 343
+                 ||| || | || |   |||| |  |   |||  |  | | | | |||
+Sbjct: 15394995 CAATACAAT-TGATATTGGAGGATATGCACAAGGTGATATAGTCTATT 15395041
+
+ Score = 739 (116.9 bits), Expect = 6.9e-71, Sum P(2) = 6.9e-71, Group = 1
+ Identities = 166/189 (87%), Positives = 171/189 (90%), Strand = Plus / Plus
+ Links = 1-(2)
+
+Query:      410 ACCTTCATGCAGCTAACTCTTCCTCCGTTGCTTGCAATGGAAGTAATATCAGTGTTGCTT 469
+                ||||||||||||||||||||||| ||||||||||| |||||||| || ||  ||||| | 
+Sbjct: 15395364 ACCTTCATGCAGCTAACTCTTCCCCCGTTGCTTGCGATGGAAGTGATGTCCTTGTTGGTC 15395423
+
+Query:      470 TTGCGGGTGASTGGGAATGCRGCGGATGACTTCAAGCCGGTRAAKWGGAGC-ACCATGGT 528
+                ||||||||||+ ||||||||+||||||||||||||||||||+||+ ||||| ||||||||
+Sbjct: 15395424 TTGCGGGTGACCGGGAATGCAGCGGATGACTTCAAGCCGGTGAAT-GGAGCGACCATGGT 15395482
+
+Query:      529 GGCTTGAGCCGGTGAGGTAGCCACAGCGGCGGGAGGAGAGCATAGRAGGWAGCCATTACT 588
+                ||| |||||||| || ||| |||||||||||| |||||||||||| |||+||||||||||
+Sbjct: 15395483 GGCCTGAGCCGGGGATGTAACCACAGCGGCGG-AGGAGAGCATAG-AGGAAGCCATTACT 15395540
+
+Query:      589 ACTTCTTTT 597
+                ||||||| |
+Sbjct: 15395541 ACTTCTTGT 15395549
+
+
+Parameters:
+  -i /home/jasons/pubbrassica/pub_brassica.4
+  -d /home/jasons/pubbrassica/ATH1_chr_all.5con
+  E=1e-5
+  topcomboE=10
+  wordmask=seg
+  hspsepsmax=1000
+  links
+
+  ctxfactor=2.00
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Strand MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +1      0   +5,-4           0.195   0.175   0.361    same    same    same
+               Q=10,R=10       0.104   0.0151  0.0600    n/a     n/a     n/a
+   -1      0   +5,-4           0.195   0.175   0.361    same    same    same
+               Q=10,R=10       0.104   0.0151  0.0600    n/a     n/a     n/a
+
+  Query
+  Strand MatID  Length  Eff.Length     E     S  W   T   X   E2      S2
+   +1      0      598       598   1.0e-05  299 11 n/a  72  0.023    78
+                                                      134  0.025   117
+   -1      0      598       598   1.0e-05  299 11 n/a  72  0.023    78
+                                                      134  0.025   117
+
+
+Statistics:
+
+  Database:  /home/jasons/pubbrassica/ATH1_chr_all.5con
+   Title:  ATH1_chr_all.5con
+   Posted:  11:14:54 AM EST Sep 1, 2004
+   Created:  12:14:50 PM EETDT Sep 01, 2004
+   Format:  XDF-1
+   # of letters in database:  119,186,497
+   # of sequences in database:  5
+   # of database sequences satisfying E:  2
+  No. of states in DFA:  233 (233 KB)
+  Total size of DFA:  254 KB (2059 KB)
+  Time to generate neighborhood:  0.00u 0.01s 0.01t  Elapsed: 00:00:00
+  No. of threads or processors used:  2
+  Search cpu time:  0.58u 0.06s 0.64t  Elapsed: 00:00:00
+  Total cpu time:  0.66u 0.29s 0.95t  Elapsed: 00:00:01
+  Start:  Wed Sep  1 13:19:06 2004   End:  Wed Sep  1 13:19:07 2004

Added: trunk/packages/bioperl/branches/upstream/current/t/data/bug2120.phd
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/bug2120.phd	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/bug2120.phd	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1370 @@
+BEGIN_SEQUENCE HEX1549K12_01_A01.b_015-1516
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: HEX1549K12_01_A01.b_015-1516
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.020425.c
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Sun Oct  8 12:48:40 2006
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 16302
+TRIM: 34 849 0.0500
+TRACE_PEAK_AREA_RATIO: 0.0020
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+g 6 14
+c 6 25
+g 6 39
+c 6 42
+c 6 60
+g 6 63
+t 6 79
+g 6 90
+t 9 106
+g 9 113
+g 9 121
+g 12 130
+g 12 145
+g 8 153
+c 8 169
+c 9 177
+t 8 203
+t 8 210
+a 8 218
+t 9 234
+g 8 244
+c 6 257
+t 6 270
+a 8 275
+g 8 285
+c 8 304
+g 8 316
+a 8 333
+a 10 339
+t 8 350
+c 9 359
+g 9 370
+c 9 378
+c 10 392
+t 17 408
+t 12 421
+c 14 429
+t 18 444
+t 24 457
+c 28 467
+t 24 479
+c 24 486
+c 22 499
+c 23 516
+c 23 526
+t 19 540
+a 24 548
+a 24 559
+g 37 570
+t 44 582
+c 47 592
+t 47 604
+t 37 616
+g 37 628
+t 39 641
+a 41 650
+c 41 659
+g 47 672
+t 47 684
+c 57 696
+g 57 707
+t 57 721
+c 57 732
+t 57 744
+t 59 757
+g 59 769
+t 59 781
+g 59 793
+g 48 807
+g 48 819
+a 48 831
+g 48 841
+a 48 853
+g 57 864
+t 57 874
+t 57 886
+g 68 898
+a 68 909
+a 68 920
+t 68 930
+c 68 942
+a 68 951
+t 68 963
+c 68 975
+c 68 987
+c 50 999
+a 50 1007
+a 50 1019
+t 50 1032
+c 68 1044
+c 68 1056
+t 68 1068
+g 68 1081
+g 68 1094
+a 68 1107
+g 57 1118
+c 57 1129
+a 57 1139
+a 57 1149
+a 57 1160
+g 57 1172
+t 59 1184
+t 68 1195
+c 68 1207
+g 68 1220
+a 68 1232
+g 68 1243
+g 68 1256
+a 68 1268
+a 68 1279
+c 68 1289
+t 68 1301
+t 68 1313
+g 68 1325
+a 68 1338
+c 68 1348
+c 68 1360
+c 68 1372
+t 68 1384
+c 68 1396
+c 68 1407
+c 68 1419
+a 57 1428
+a 57 1439
+g 57 1451
+g 59 1463
+g 59 1476
+g 68 1488
+c 68 1500
+t 68 1513
+t 68 1525
+g 68 1536
+g 68 1549
+t 68 1562
+g 68 1574
+a 68 1586
+g 68 1597
+c 68 1608
+a 68 1619
+g 68 1632
+g 68 1644
+t 68 1656
+c 68 1667
+t 68 1679
+g 68 1692
+c 68 1703
+g 68 1716
+a 68 1728
+g 68 1739
+c 68 1751
+t 68 1764
+g 68 1776
+a 68 1788
+t 68 1799
+c 68 1810
+c 68 1822
+t 68 1835
+t 68 1847
+g 68 1859
+g 68 1872
+t 68 1884
+g 68 1896
+t 68 1908
+t 68 1920
+g 68 1932
+a 68 1943
+t 68 1954
+g 68 1966
+t 68 1978
+a 68 1989
+g 68 2001
+c 68 2013
+t 59 2026
+c 59 2038
+g 68 2050
+c 68 2062
+c 68 2074
+t 68 2087
+c 68 2098
+g 68 2110
+a 68 2123
+t 68 2134
+g 68 2146
+c 68 2157
+t 68 2170
+c 68 2182
+c 68 2193
+c 68 2205
+t 68 2218
+t 68 2230
+c 68 2242
+t 68 2254
+t 68 2266
+c 68 2277
+c 68 2289
+a 68 2299
+c 68 2310
+a 68 2320
+c 68 2332
+a 68 2343
+g 68 2356
+c 68 2369
+t 68 2382
+g 68 2393
+c 68 2406
+g 68 2418
+g 68 2432
+a 68 2444
+t 68 2455
+g 68 2467
+a 68 2479
+a 68 2491
+g 68 2503
+t 68 2514
+g 68 2526
+a 68 2539
+t 68 2549
+a 68 2561
+c 68 2572
+c 68 2584
+t 62 2597
+c 62 2609
+a 62 2620
+g 62 2633
+t 62 2646
+c 62 2657
+g 62 2670
+g 52 2683
+a 62 2695
+t 62 2706
+g 62 2718
+t 62 2730
+g 68 2742
+c 68 2753
+t 68 2766
+t 68 2778
+g 68 2790
+c 68 2802
+t 68 2815
+c 68 2826
+c 68 2838
+g 68 2850
+t 68 2863
+t 68 2875
+c 68 2886
+g 68 2898
+t 68 2910
+g 68 2922
+g 68 2934
+a 68 2947
+a 68 2958
+c 68 2969
+a 68 2979
+c 68 2990
+g 68 3003
+g 68 3016
+g 68 3028
+g 68 3040
+t 68 3053
+t 68 3065
+c 68 3077
+t 68 3089
+t 68 3101
+t 68 3113
+g 68 3125
+c 68 3137
+c 68 3149
+a 68 3159
+g 68 3171
+g 68 3184
+g 68 3195
+c 68 3208
+c 68 3220
+a 68 3231
+g 68 3243
+g 68 3255
+g 68 3267
+c 68 3280
+g 68 3292
+g 68 3305
+a 68 3318
+c 68 3328
+t 68 3340
+t 68 3352
+g 68 3364
+c 68 3376
+t 68 3389
+g 68 3401
+t 68 3413
+c 68 3425
+c 68 3436
+a 68 3447
+c 68 3458
+c 68 3470
+a 68 3481
+g 68 3494
+g 68 3506
+a 68 3519
+g 68 3531
+c 68 3543
+t 68 3555
+g 68 3567
+c 68 3579
+a 68 3590
+c 68 3601
+c 68 3613
+g 68 3625
+t 68 3638
+t 68 3651
+c 68 3663
+t 68 3675
+g 68 3687
+g 68 3700
+t 68 3711
+g 68 3723
+t 68 3735
+c 68 3746
+t 68 3758
+t 68 3770
+g 68 3782
+a 68 3795
+a 68 3806
+c 62 3817
+g 62 3830
+a 62 3841
+g 62 3853
+a 62 3866
+a 62 3877
+g 62 3889
+a 68 3901
+t 68 3911
+c 68 3923
+a 68 3934
+c 68 3946
+c 68 3958
+a 68 3969
+a 68 3981
+g 68 3994
+t 68 4006
+a 68 4018
+g 62 4030
+t 62 4042
+c 62 4054
+g 62 4067
+a 62 4079
+g 62 4091
+c 62 4103
+a 68 4114
+a 68 4126
+g 68 4138
+c 68 4149
+c 68 4161
+a 68 4173
+g 68 4185
+a 68 4198
+g 68 4209
+c 68 4221
+g 68 4234
+c 68 4245
+c 68 4257
+t 68 4270
+g 68 4282
+a 68 4294
+g 68 4305
+t 68 4318
+a 68 4329
+c 68 4340
+a 68 4351
+g 68 4364
+g 68 4376
+c 68 4388
+g 68 4401
+g 68 4414
+t 68 4426
+g 68 4437
+g 68 4450
+a 68 4463
+g 68 4474
+g 68 4487
+c 68 4498
+c 68 4510
+g 68 4522
+c 68 4534
+t 68 4547
+a 68 4558
+t 68 4569
+g 68 4581
+t 68 4594
+a 68 4605
+c 68 4616
+t 68 4628
+c 68 4640
+a 68 4651
+g 68 4664
+c 68 4676
+c 68 4688
+t 68 4701
+c 68 4713
+a 68 4724
+c 68 4735
+a 68 4746
+g 68 4758
+c 68 4771
+t 68 4783
+g 68 4795
+g 68 4808
+a 68 4821
+c 68 4831
+a 68 4843
+g 68 4855
+g 68 4867
+g 68 4878
+c 68 4891
+c 68 4903
+a 68 4914
+c 68 4926
+c 68 4938
+a 68 4949
+c 68 4961
+c 68 4973
+t 68 4986
+g 68 4998
+c 68 5011
+t 68 5023
+g 68 5035
+c 68 5047
+t 68 5059
+t 68 5071
+g 68 5083
+a 68 5096
+c 68 5106
+t 68 5119
+g 68 5131
+a 68 5143
+t 68 5154
+t 68 5166
+g 68 5179
+c 62 5190
+c 62 5202
+a 62 5213
+g 62 5225
+c 62 5238
+t 62 5250
+c 62 5262
+a 62 5273
+c 68 5284
+g 68 5297
+a 62 5309
+g 62 5321
+a 62 5334
+c 62 5345
+a 62 5356
+c 68 5367
+t 68 5380
+t 68 5392
+g 68 5404
+c 68 5417
+c 68 5429
+g 68 5442
+a 68 5454
+g 68 5465
+g 62 5477
+a 68 5490
+a 68 5502
+g 68 5513
+a 68 5525
+g 68 5537
+g 68 5549
+a 68 5562
+g 62 5573
+g 62 5585
+a 62 5598
+t 62 5608
+c 68 5620
+c 68 5632
+c 68 5644
+g 68 5657
+c 68 5669
+t 68 5682
+c 68 5694
+g 68 5706
+t 68 5718
+g 68 5730
+c 68 5742
+t 68 5754
+c 68 5765
+t 68 5777
+t 68 5790
+g 68 5802
+c 68 5813
+t 68 5826
+g 68 5837
+g 68 5849
+t 68 5862
+g 62 5874
+t 62 5886
+c 62 5897
+g 62 5910
+a 62 5922
+t 62 5933
+g 62 5945
+t 68 5956
+c 68 5968
+g 68 5981
+c 68 5992
+c 68 6005
+g 68 6017
+g 68 6030
+c 68 6042
+g 68 6054
+t 68 6067
+g 68 6078
+g 68 6091
+t 68 6103
+c 68 6115
+g 68 6127
+c 68 6138
+t 68 6152
+g 57 6164
+t 57 6176
+c 59 6187
+g 59 6200
+c 59 6212
+t 59 6224
+g 57 6236
+t 68 6248
+a 68 6260
+c 68 6270
+c 68 6282
+c 68 6294
+g 59 6307
+a 59 6320
+c 57 6331
+g 57 6343
+a 57 6356
+a 59 6368
+g 59 6380
+t 59 6391
+g 59 6404
+t 59 6415
+g 68 6428
+c 68 6439
+c 68 6452
+g 68 6464
+c 68 6476
+c 68 6488
+c 59 6500
+c 59 6512
+a 59 6523
+g 59 6535
+g 59 6547
+a 59 6560
+c 59 6571
+a 59 6582
+c 59 6593
+c 59 6606
+t 59 6618
+a 59 6630
+g 59 6642
+g 68 6655
+g 68 6667
+t 68 6680
+a 68 6691
+g 68 6703
+t 68 6715
+g 68 6727
+g 68 6741
+a 68 6752
+g 68 6764
+g 68 6777
+c 68 6788
+c 68 6800
+g 68 6813
+t 68 6825
+g 68 6837
+g 68 6849
+t 68 6862
+c 68 6874
+g 68 6886
+a 68 6899
+g 68 6910
+g 68 6923
+g 68 6935
+t 68 6946
+c 68 6958
+c 68 6970
+c 68 6981
+t 68 6994
+g 68 7006
+c 68 7018
+t 68 7031
+a 68 7042
+t 68 7054
+g 59 7066
+t 59 7078
+a 59 7090
+g 59 7102
+c 59 7114
+g 57 7127
+g 57 7140
+a 57 7152
+c 57 7162
+g 57 7175
+a 57 7187
+t 59 7198
+c 59 7210
+c 68 7223
+t 68 7235
+c 68 7247
+t 57 7259
+t 57 7272
+c 57 7284
+a 57 7294
+c 57 7305
+t 57 7318
+g 68 7330
+c 68 7342
+c 68 7355
+t 68 7367
+g 68 7379
+c 68 7391
+t 68 7404
+g 68 7415
+g 68 7429
+t 68 7439
+g 68 7451
+t 68 7463
+t 59 7475
+c 59 7487
+c 59 7499
+g 59 7512
+a 59 7524
+c 59 7535
+g 59 7547
+t 57 7560
+t 57 7572
+g 68 7583
+g 68 7598
+t 68 7609
+c 68 7621
+g 59 7633
+c 57 7645
+t 54 7657
+c 54 7668
+c 54 7682
+a 54 7692
+t 54 7704
+g 57 7716
+a 59 7729
+a 68 7741
+c 68 7752
+c 68 7764
+g 68 7777
+a 68 7789
+c 68 7801
+t 59 7813
+g 59 7825
+a 59 7838
+c 57 7849
+a 57 7861
+t 59 7872
+a 59 7884
+g 59 7897
+c 59 7909
+c 59 7922
+g 59 7934
+a 48 7946
+c 48 7958
+g 43 7969
+g 43 7983
+a 40 7996
+g 40 8006
+a 41 8019
+a 43 8031
+t 43 8041
+g 48 8054
+c 48 8065
+c 46 8078
+a 47 8089
+a 47 8101
+g 59 8113
+t 59 8125
+c 59 8137
+c 43 8149
+g 41 8161
+g 37 8175
+c 41 8186
+c 41 8199
+g 47 8211
+t 47 8223
+g 47 8235
+t 59 8248
+g 47 8260
+t 47 8271
+g 47 8284
+a 47 8297
+g 47 8309
+t 47 8320
+g 59 8333
+a 41 8344
+g 37 8355
+g 37 8368
+t 41 8381
+a 41 8393
+g 44 8405
+c 47 8416
+g 39 8430
+a 39 8442
+a 35 8454
+g 31 8464
+g 31 8478
+c 39 8489
+t 39 8501
+c 47 8513
+c 47 8525
+c 59 8536
+c 47 8548
+a 41 8560
+c 38 8571
+a 34 8582
+a 34 8596
+g 35 8606
+g 32 8620
+c 39 8632
+g 41 8644
+t 37 8656
+t 37 8669
+g 44 8680
+g 44 8694
+t 44 8706
+a 47 8717
+c 47 8728
+t 59 8740
+g 43 8753
+c 41 8765
+g 37 8778
+t 41 8790
+a 37 8802
+g 41 8814
+c 37 8826
+a 59 8838
+t 39 8849
+c 37 8860
+c 37 8873
+a 41 8884
+c 37 8895
+t 36 8907
+t 31 8921
+c 32 8931
+c 32 8945
+t 39 8958
+c 31 8968
+c 32 8982
+g 39 8993
+t 39 9006
+c 43 9017
+g 39 9030
+t 40 9042
+g 40 9054
+c 40 9066
+t 46 9078
+g 47 9091
+t 47 9102
+c 37 9114
+g 37 9127
+c 37 9138
+g 35 9151
+g 32 9163
+c 41 9175
+t 34 9188
+c 34 9200
+a 28 9211
+g 31 9223
+t 24 9234
+t 32 9247
+t 32 9259
+c 35 9271
+a 35 9282
+g 31 9295
+c 29 9305
+c 29 9318
+t 34 9331
+c 41 9343
+t 41 9356
+c 32 9366
+c 37 9378
+t 37 9391
+c 30 9402
+c 25 9414
+a 32 9426
+t 32 9438
+c 25 9449
+g 19 9460
+g 19 9477
+a 22 9487
+g 22 9499
+t 28 9512
+g 34 9524
+a 32 9535
+g 35 9547
+a 35 9560
+g 37 9572
+c 37 9582
+t 41 9595
+g 31 9607
+g 34 9620
+a 35 9633
+t 33 9643
+g 28 9656
+g 25 9669
+c 32 9681
+a 31 9693
+g 37 9706
+t 37 9717
+c 32 9729
+g 27 9741
+g 25 9755
+t 31 9767
+g 31 9779
+a 30 9792
+g 30 9804
+c 25 9816
+c 34 9826
+c 27 9838
+a 32 9851
+g 35 9864
+c 37 9875
+t 39 9888
+a 39 9900
+g 39 9913
+c 35 9924
+t 37 9937
+c 32 9948
+a 25 9960
+a 27 9969
+c 30 9982
+g 34 9995
+a 28 10008
+t 33 10019
+g 33 10031
+c 28 10043
+g 28 10055
+c 28 10069
+t 30 10081
+t 26 10092
+g 29 10106
+g 29 10116
+c 32 10131
+g 17 10143
+t 15 10155
+a 9 10166
+g 9 10174
+g 9 10191
+c 13 10203
+g 14 10215
+g 19 10229
+a 26 10240
+c 13 10252
+t 13 10264
+g 12 10276
+t 18 10289
+c 12 10299
+g 12 10312
+a 12 10326
+a 12 10335
+g 12 10346
+c 12 10360
+g 19 10374
+c 18 10384
+g 28 10397
+a 13 10408
+t 11 10420
+c 9 10431
+c 10 10449
+c 8 10456
+g 8 10467
+g 10 10482
+a 14 10497
+g 11 10507
+t 8 10520
+g 8 10535
+t 8 10552
+t 8 10559
+c 8 10566
+c 8 10575
+t 8 10593
+g 12 10607
+g 12 10617
+t 16 10632
+g 16 10644
+c 10 10654
+a 10 10666
+c 10 10681
+c 8 10689
+t 10 10703
+c 8 10716
+a 8 10721
+a 8 10731
+t 13 10751
+c 14 10762
+c 20 10774
+c 15 10785
+t 19 10796
+a 11 10810
+g 11 10823
+a 8 10835
+t 8 10846
+a 6 10862
+g 6 10866
+a 6 10886
+a 6 10894
+g 8 10905
+g 10 10919
+a 11 10930
+g 11 10945
+a 10 10956
+g 10 10965
+g 10 10977
+a 10 10994
+g 10 11007
+c 8 11016
+c 13 11029
+c 16 11045
+c 11 11052
+a 10 11066
+g 12 11073
+g 9 11088
+t 14 11103
+c 10 11116
+a 11 11127
+c 10 11140
+t 12 11147
+c 10 11161
+a 9 11170
+t 10 11184
+c 13 11197
+t 13 11213
+g 12 11223
+g 10 11232
+a 11 11245
+a 11 11256
+c 15 11271
+g 10 11283
+t 10 11294
+g 10 11305
+g 9 11313
+c 9 11329
+c 10 11342
+t 18 11360
+t 12 11370
+c 10 11382
+a 13 11396
+t 12 11406
+g 19 11418
+t 19 11431
+c 23 11443
+t 22 11455
+t 22 11465
+c 18 11480
+c 12 11492
+t 11 11504
+t 10 11512
+t 8 11518
+g 10 11528
+a 8 11545
+c 10 11563
+g 10 11577
+c 10 11588
+c 12 11598
+g 12 11613
+c 12 11624
+c 12 11634
+a 12 11646
+c 11 11658
+c 12 11668
+t 16 11681
+c 15 11697
+t 10 11707
+g 10 11720
+c 10 11736
+a 9 11744
+t 9 11757
+c 10 11767
+t 8 11784
+t 8 11792
+t 8 11802
+g 8 11822
+g 9 11831
+t 8 11841
+g 8 11857
+c 8 11871
+c 8 11879
+g 10 11900
+g 8 11909
+t 9 11921
+g 11 11931
+a 16 11946
+t 10 11958
+c 10 11967
+a 10 11981
+c 10 11997
+c 10 12005
+a 10 12020
+g 11 12032
+g 11 12042
+t 9 12053
+c 9 12070
+g 9 12085
+t 9 12093
+c 9 12105
+a 9 12113
+a 9 12128
+c 9 12146
+g 9 12154
+t 9 12171
+a 9 12179
+g 7 12195
+a 7 12204
+a 8 12212
+c 8 12232
+c 10 12248
+c 11 12258
+c 9 12267
+a 9 12275
+c 9 12293
+c 9 12301
+a 9 12313
+g 8 12324
+c 8 12341
+a 8 12349
+g 8 12369
+g 9 12378
+g 8 12387
+g 8 12397
+c 8 12404
+t 8 12423
+t 9 12434
+t 10 12442
+t 10 12451
+c 8 12466
+t 8 12484
+t 8 12491
+c 7 12503
+a 8 12521
+c 10 12535
+t 10 12549
+c 12 12559
+c 16 12571
+c 17 12580
+c 10 12591
+c 9 12605
+g 10 12623
+t 10 12635
+c 10 12650
+g 14 12662
+g 12 12672
+t 12 12687
+a 11 12699
+g 11 12711
+a 11 12723
+c 11 12734
+g 8 12746
+a 6 12759
+c 7 12771
+a 6 12775
+g 10 12794
+c 9 12803
+c 10 12816
+t 13 12835
+c 11 12849
+a 11 12863
+t 12 12874
+g 13 12885
+c 12 12897
+g 9 12908
+g 9 12916
+g 9 12928
+c 9 12946
+t 10 12964
+c 10 12973
+t 10 12983
+g 12 12996
+c 12 13007
+t 12 13021
+c 9 13033
+g 10 13047
+a 10 13063
+a 10 13071
+a 10 13080
+a 8 13089
+c 9 13101
+c 9 13111
+a 9 13130
+t 8 13143
+c 8 13154
+t 6 13174
+t 6 13178
+c 6 13189
+c 6 13199
+t 10 13218
+t 10 13233
+a 9 13246
+g 10 13260
+c 10 13271
+g 10 13281
+t 10 13300
+g 9 13312
+g 7 13322
+a 8 13335
+a 8 13344
+g 8 13348
+t 8 13362
+c 6 13378
+c 8 13388
+g 8 13407
+g 8 13415
+t 8 13433
+t 9 13441
+t 9 13450
+t 8 13459
+g 8 13474
+g 8 13484
+c 8 13490
+g 8 13504
+t 8 13522
+t 10 13531
+c 10 13548
+c 10 13559
+c 11 13574
+c 11 13585
+a 11 13594
+c 11 13607
+c 16 13616
+c 16 13627
+c 16 13640
+g 16 13654
+g 12 13668
+g 15 13676
+g 8 13686
+g 8 13695
+g 9 13714
+g 9 13723
+c 9 13742
+t 9 13760
+g 9 13772
+c 11 13781
+c 12 13790
+c 10 13806
+c 10 13816
+c 9 13834
+a 8 13842
+g 8 13856
+g 8 13868
+g 8 13873
+c 8 13890
+a 8 13911
+a 8 13927
+a 10 13940
+a 13 13956
+a 8 13964
+a 12 13972
+a 8 13984
+a 8 14008
+c 8 14017
+c 8 14029
+t 8 14045
+t 8 14053
+t 8 14060
+g 8 14078
+c 8 14093
+c 9 14102
+c 9 14113
+c 8 14122
+g 8 14142
+g 9 14158
+g 9 14166
+g 14 14180
+g 22 14192
+g 22 14204
+a 15 14215
+a 12 14229
+c 11 14244
+c 10 14258
+a 10 14268
+c 10 14279
+c 8 14289
+t 8 14304
+t 8 14320
+g 8 14326
+c 8 14347
+c 8 14357
+c 7 14366
+c 8 14377
+c 8 14389
+t 6 14393
+c 6 14405
+c 6 14415
+t 10 14433
+g 10 14442
+g 10 14453
+g 10 14468
+c 10 14486
+g 9 14495
+g 9 14508
+a 9 14521
+a 18 14541
+a 11 14552
+a 8 14563
+a 6 14577
+t 6 14584
+c 6 14603
+t 6 14607
+c 6 14611
+a 6 14631
+a 6 14646
+a 8 14651
+a 8 14670
+c 8 14684
+c 8 14698
+c 11 14705
+c 9 14721
+c 10 14731
+g 16 14743
+g 13 14754
+g 12 14764
+g 15 14777
+g 15 14796
+g 10 14805
+g 7 14823
+g 7 14834
+t 6 14842
+g 6 14843
+g 8 14864
+a 6 14875
+t 10 14887
+g 8 14906
+g 8 14920
+c 8 14939
+c 6 14946
+a 6 14956
+t 6 14978
+a 6 14981
+a 6 14993
+a 6 15002
+a 12 15019
+a 12 15029
+a 11 15039
+c 11 15051
+c 11 15064
+c 10 15077
+t 10 15088
+t 6 15099
+c 6 15119
+c 6 15130
+t 6 15132
+c 6 15146
+c 8 15157
+c 10 15166
+c 10 15184
+c 10 15196
+t 10 15213
+t 10 15224
+c 10 15233
+c 9 15250
+g 10 15263
+a 11 15274
+t 11 15287
+c 9 15301
+g 9 15313
+c 9 15323
+g 9 15332
+c 6 15343
+c 8 15362
+c 8 15375
+g 8 15381
+g 8 15398
+t 8 15415
+t 8 15420
+g 8 15437
+a 8 15451
+a 11 15465
+a 11 15483
+a 10 15494
+a 11 15505
+a 15 15520
+a 11 15529
+a 10 15539
+a 10 15548
+g 8 15566
+g 8 15577
+c 8 15594
+g 8 15600
+c 8 15605
+c 8 15624
+c 8 15639
+c 8 15654
+a 8 15659
+c 8 15674
+t 8 15690
+t 8 15697
+t 8 15707
+a 8 15719
+a 10 15738
+a 10 15748
+c 10 15758
+a 10 15776
+c 10 15786
+t 8 15800
+c 8 15811
+c 8 15826
+a 8 15833
+c 8 15838
+c 8 15847
+t 8 15860
+c 8 15867
+a 8 15887
+t 7 15901
+g 9 15918
+g 10 15930
+t 10 15939
+g 10 15957
+g 10 15973
+g 13 15985
+g 21 15995
+g 28 16005
+g 13 16020
+g 10 16034
+g 8 16045
+g 8 16052
+g 8 16069
+a 8 16086
+a 8 16096
+g 8 16103
+a 8 16118
+g 8 16140
+c 8 16148
+a 8 16167
+c 8 16177
+a 8 16182
+c 8 16197
+c 8 16214
+c 8 16221
+c 8 16238
+c 13 16253
+c 9 16270
+c 9 16276
+c 9 16297
+END_DNA
+
+END_SEQUENCE

Added: trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,420 @@
+BLASTN 2.2.4 [Aug-26-2002]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= contig_200
+         (5116 letters)
+
+Database: /data/blast/yeast.nt 
+           17 sequences; 12,155,026 total letters
+
+Searching.done
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+gi|6323989|ref|NC_001146.1| Saccharomyces cerevisiae chromosome ...   648.7   0.0  
+
+>gi|6323989|ref|NC_001146.1| Saccharomyces cerevisiae chromosome XIV,
+              complete chromosome sequence
+          Length = 784328
+
+ Score =  648.7 bits (327), Expect = 0.0
+ Identities = 830/1000 (83%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 2697   ataggccaaaatcttcacatgattcaaaatttcaagaaaatgccccaatatttctcccat 2756
+              ||||| ||||||||||| ||||||||||| ||||| ||||||||||| |||||||| |||
+Sbjct: 133942 atagggcaaaatcttcatatgattcaaaacttcaaaaaaatgccccagtatttctctcat 133883
+
+                                                                          
+Query: 2757   ctaacccttgaaagtcacttgaaaatcattcaagcttggttatttgctgtggaacaaaca 2816
+               ||||| | |||||||| |||||||| |||||||| ||||||||||| || |||||||| 
+Sbjct: 133882 ttaaccttggaaagtcatttgaaaattattcaagcctggttatttgccgttgaacaaact 133823
+
+                                                                          
+Query: 2817   ctagatggaagaggcaaaatgggttccttggtaggggcctctgatgaatttaaaaacggt 2876
+               ||||||||||||| |||||||||||||| ||||| || || |||||||||||||| |||
+Sbjct: 133822 ttagatggaagaggaaaaatgggttccttagtaggcgcgtcggatgaatttaaaaatggt 133763
+
+                                                                          
+Query: 2877   ggaggtgaaaatgccattttagaatattgtcaatggaccatggtctttatcaatcacttg 2936
+              || ||||||||||| ||| ||||||| || |||||||| ||||| ||||||||||| || 
+Sbjct: 133762 gggggtgaaaatgcgattctagaatactgccaatggacaatggtttttatcaatcattta 133703
+
+                                                                          
+Query: 2937   tgttcttgttctgataatgttaatcaaagaatgcttttgagaacaaagctagaaaattgt 2996
+              || ||||| ||||||||  | |||||||||||||| || || || || ||||||||||||
+Sbjct: 133702 tgctcttgctctgataacataaatcaaagaatgctattaaggacgaaactagaaaattgt 133643
+
+                                                                          
+Query: 2997   ggtattctacgaatcatgaataaaataaaattgctagattatgacaaagtgattgatcaa 3056
+              || ||| ||||||| ||||| ||||||||| |||||||||| ||||| || ||||| |||
+Sbjct: 133642 ggaattttacgaattatgaacaaaataaaactgctagattacgacaaggtaattgaccaa 133583
+
+                                                                          
+Query: 3057   attgaattatatgacaataataaactcgatgactttaacgtaaagttggaggcaagtaac 3116
+              ||||| |||||||||||||||||||| || || |||||||| || || ||||| |  || 
+Sbjct: 133582 attgagttatatgacaataataaacttgacgattttaacgttaaattagaggccaacaat 133523
+
+                                                                          
+Query: 3117   aaagcatttaatgtggatttaaaagatccactatcgttgttgaagaatctttgggaaata 3176
+              || || |||||||| |||||| | ||||||||||| || || || || |||||||| |||
+Sbjct: 133522 aaggcttttaatgtagatttacacgatccactatcattattaaaaaacctttgggatata 133463
+
+                                                                          
+Query: 3177   tgtaaaggcacggataatgagaaactgttggtgtctttagtacaacatcttttcctttcc 3236
+              |||||||| || || ||||| || || || || |||||||| ||||||||||||||||| 
+Sbjct: 133462 tgtaaaggtaccgagaatgaaaagcttttagtatctttagttcaacatcttttcctttct 133403
+
+                                                                          
+Query: 3237   agctcgaaattaatcgaagagaatcagaatccctccaaactatccaaacagttgaaatta 3296
+              || || || ||||| ||||| || || ||| |||| |||||  | || || |||||| | 
+Sbjct: 133402 agttcaaagttaatagaagaaaaccaaaattcctctaaacttactaagcaattgaaactt 133343
+
+                                                                          
+Query: 3297   atggattctttagtgactaatgttagtgttgcctcaacagccgacgaagaatcgaatatg 3356
+              ||||||||||| ||||||||||||||||||||||||||  | || |||||| | ||||||
+Sbjct: 133342 atggattctttggtgactaatgttagtgttgcctcaacttctgatgaagaaactaatatg 133283
+
+                                                                          
+Query: 3357   aacatggccattcagcggctttatgatgctatgcaaactgatgaagtagcacgtcgtgca 3416
+              || ||||| || ||||||||||| ||||| ||||| ||||||||||| |||||||| || 
+Sbjct: 133282 aatatggctatccagcggctttacgatgccatgcagactgatgaagttgcacgtcgagct 133223
+
+                                                                          
+Query: 3417   atacttgaaagtagaactctgacnnnnnnnttagaagaaattcaagcggagagggattcc 3476
+              ||| | ||||||||| |||| ||       || ||||||||||| || || |||||||| 
+Sbjct: 133222 atattagaaagtagagctctaacgaagaaattggaagaaattcaggcagaaagggattct 133163
+
+                                                                          
+Query: 3477   ctaagtgaaaagctaggtaaagcagagcacgggctcgttgggcaattagagaacgagcta 3536
+               ||||||||||| || | || || |||||||| |||||||| || |||||  | ||| | 
+Sbjct: 133162 ttaagtgaaaagttaagcaaggcggagcacggactcgttggacagttagaagatgagttg 133103
+
+                                                                          
+Query: 3537   cacggaagagatcgtatactggccaagaaccaaagggttatgcaacagctggaatccgaa 3596
+              || |  |||||||||||  | || ||||| || || |||||||| ||||||||| | || 
+Sbjct: 133102 catgagagagatcgtattttagctaagaatcagagagttatgcagcagctggaagctgag 133043
+
+                                                                          
+Query: 3597   ctagaagaactgaaaaagaaacatcttctggaaaagcatcagcaagaagttgaactgaga 3656
+               |||||||  |||||||||| |||||| ||||||||||||||||||| || ||| |||||
+Sbjct: 133042 ttagaagagttgaaaaagaagcatcttttggaaaagcatcagcaagaggtagaattgaga 132983
+
+                                                      
+Query: 3657   aaaatgttaaccatattaaattccaggcctgaagaaagct 3696
+              |||||||| || ||||| ||||| ||||||||||||||||
+Sbjct: 132982 aaaatgttgactatattgaattcgaggcctgaagaaagct 132943
+
+
+
+ Score =  317 bits (160), Expect = 1e-85
+ Identities = 388/464 (83%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 2170   gtttgaaaaagcaaccatctctaaacagcatttatcgaggtgggcctgaaattaatacaa 2229
+              ||||||||||||| |||||||||||||  |||||| |||||||| |||| |  ||||| |
+Sbjct: 134469 gtttgaaaaagcagccatctctaaacaatatttatagaggtggggctgagaacaatacga 134410
+
+                                                                          
+Query: 2230   gcgcgtccactcttccaggagacagaactaatagaccgccaatacactatgtgcaacgca 2289
+              | || || |||||||||||||||||||| || ||||| || ||||| ||||||||||| |
+Sbjct: 134409 gtgcatctactcttccaggagacagaacaaacagaccaccgatacattatgtgcaacgaa 134350
+
+                                                                          
+Query: 2290   tcctggccgataagcttacgaccgacgaaatgaaagatttatgggttaccttgaggacag 2349
+              ||||||| || || ||||| | ||| || ||||||||||||||||| || || || || |
+Sbjct: 134349 tcctggcagacaaacttactagcgatgagatgaaagatttatgggtaactttaagaactg 134290
+
+                                                                          
+Query: 2350   aacaattagactgggtagacgcctttatagaccatcagggtcacattgccatggctaacg 2409
+              |||| ||||||||||||||||| |||||||||||||||||||||||||| |||||||| |
+Sbjct: 134289 aacagttagactgggtagacgcttttatagaccatcagggtcacattgctatggctaatg 134230
+
+                                                                          
+Query: 2410   tattgatgaactcaatatacaagacggcgcctcgggaacatttgacgaaggagttattgg 2469
+              || | |||||||| || || ||||| || || || ||| | ||||| || || ||||| |
+Sbjct: 134229 tactaatgaactctatttataagactgcaccccgagaaaacttgactaaagaattattag 134170
+
+                                                                          
+Query: 2470   aaaaggaaaattccttttttaaatgttttagagttttatcgatgctttcacaaggtttat 2529
+              | || |||||||| ||||||||||| || ||||| ||||| ||||| ||||||||||| |
+Sbjct: 134169 agaaagaaaattcattttttaaatgcttcagagtcttatcaatgctctcacaaggtttgt 134110
+
+                                                                          
+Query: 2530   atgaattcagcacgcatagactgatgaccgatactgtcgcagaaggtttgttctccacga 2589
+              |||| ||||| || |||||  | ||||| |||||||| ||||||||||| ||||| ||||
+Sbjct: 134109 atgagttcagtacacataggttaatgactgatactgttgcagaaggtttattctctacga 134050
+
+                                                          
+Query: 2590   agttggttaccaggaaaatggctactgaaattttcgtctgtatg 2633
+              |  |||  || ||||||||||| ||||||||||| || ||||||
+Sbjct: 134049 aactggccacaaggaaaatggccactgaaatttttgtttgtatg 134006
+
+
+
+ Score =  289 bits (146), Expect = 3e-77
+ Identities = 701/886 (79%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 1246   aaatgttgaagaattcaggttccaagaattcgaattcgaaggaaagtaactccagttcaa 1305
+              ||||||||||||||||||| |||||  ||||||| || ||||||||| | || | ||| |
+Sbjct: 135384 aaatgttgaagaattcaggctccaaacattcgaactcaaaggaaagtcattcgaattcga 135325
+
+                                                                          
+Query: 1306   gttccggtcttttccaaaacttgaagcgtttggctaattcaagcgcatcgaataataatg 1365
+              |||| ||| | |||||||| ||||||||||||||||| ||||  ||| ||||||  ||| 
+Sbjct: 135324 gttctggtatattccaaaatttgaagcgtttggctaactcaaatgcaacgaatagcaata 135265
+
+                                                                          
+Query: 1366   ccacctctccgaacaatgcatcagaacaccagcatcctcccgttggacaggaaacatcca 1425
+              |    ||||||| | |||||||  |||| |||||  |||| || ||| | |||   ||||
+Sbjct: 135264 cgggttctccgacctatgcatcccaacaacagcactctccagtgggaaacgaagtgtcca 135205
+
+                                                                          
+Query: 1426   cttcatctgcatcgtcctcgtcttttagaaggctaaatggaccctccaggtctacatcca 1485
+              ||||| ||||||| || || ||||||||||  || |||| ||| || ||||||||||| |
+Sbjct: 135204 cttcacctgcatcttcgtcatcttttagaaaactgaatgcaccttctaggtctacatcta 135145
+
+                                                                          
+Query: 1486   ctgaagcaaggccgctgaacaagaaagcgacactcaacactcaaaatctgtctcagtaca 1545
+              |||| || |||||| |||| || ||| ||||| | ||||| |||||  ||||||| || |
+Sbjct: 135144 ctgaggcgaggccgttgaataaaaaatcgacattaaacacacaaaacttgtctcaatata 135085
+
+                                                                          
+Query: 1546   tgaacggcaagataagtaccgatgcgccaacatcgtcccagcatgcaaggtcgcattcag 1605
+              |||| || ||  |||||   ||||  ||   ||| || ||||| ||||||||||||||| 
+Sbjct: 135084 tgaatggtaaattaagtggagatgttcccgtatcctcacagcacgcaaggtcgcattcaa 135025
+
+                                                                          
+Query: 1606   tacagtccaaatattcgtactccaagaggacttcctctcaggcgtctaacaagctaacga 1665
+              | || || ||||||||||| |||||||||| ||| || |||||||| || |||||||| |
+Sbjct: 135024 tgcaatcgaaatattcgtattccaagaggaattcttcacaggcgtccaataagctaacaa 134965
+
+                                                                          
+Query: 1666   ggcaacacactgggcagagtcattctgcgacaagtcttttatccctcggctctttaacaa 1725
+              |||| || || ||||| ||||| || ||  |||||||| | || |  || ||  |||| |
+Sbjct: 134964 ggcagcataccgggcaaagtcactccgcatcaagtcttctctctcaaggttcgctaacta 134905
+
+                                                                          
+Query: 1726   acttaagcaaattcaccactcctgatggcaaaatccatctagagatgccctcggacccat 1785
+              |||| ||||||||||| || || ||||| |||||  || |||| ||||| ||||||||||
+Sbjct: 134904 acttgagcaaattcactacacccgatggtaaaatttatttagaaatgccgtcggacccat 134845
+
+                                                                          
+Query: 1786   acgaagtggaagttttgttcgaggacattatgtacaaaagaaacattttccaatctctat 1845
+              |||| |||||||||||||| || || |||||||| |||||||| ||||| || ||| |||
+Sbjct: 134844 acgaggtggaagttttgtttgaagatattatgtataaaagaaatatttttcagtctttat 134785
+
+                                                                          
+Query: 1846   caggtgacaaacaagaagagctaatgagctacagtacggaaaagaaatggttgattgtga 1905
+              |||  |||||||||||||  || ||| ||||||| |  || ||||||||| ||||||| |
+Sbjct: 134784 cagaggacaaacaagaagctctgatgggctacagcattgagaagaaatggctgattgtta 134725
+
+                                                                          
+Query: 1906   aacaagatttacagaacgaattgaaaaagattagagcaaacacgacttcttcttccgctg 1965
+              | || |||||||| || ||  | ||||| ||  |||||||||| || ||||||||| |||
+Sbjct: 134724 agcaggatttacaaaatgagctaaaaaaaatgcgagcaaacactacatcttcttccactg 134665
+
+                                                                          
+Query: 1966   cttctcgtacttcgatagcatcggatcaacatcccatactcacggctaattcaacacttt 2025
+              | ||  | |||||||| || ||||| || ||||| || || |||||||||||| | ||||
+Sbjct: 134664 cctccagaacttcgatggcgtcggaccatcatcctatccttacggctaattcatctcttt 134605
+
+                                                                          
+Query: 2026   catcgcctaaatctgctctgatgacgagtgcttcttctccgacttctactgtatatagta 2085
+              | || || ||||||| | ||||||| || || |||||||| || || |||||||| || |
+Sbjct: 134604 cgtctcccaaatctgttttgatgacaagcgcctcttctcccacctcaactgtatacagca 134545
+
+                                                            
+Query: 2086   acaccctaaaccattccaccactctttcatctgtgggcacgtcaac 2131
+              |||   |||| ||||| || ||||||||||| || || ||||||||
+Sbjct: 134544 acagtttaaatcattctactactctttcatcggtaggtacgtcaac 134499
+
+
+
+ Score =  200 bits (101), Expect = 2e-50
+ Identities = 512/649 (78%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 35     catcccccttcatgatcagcatcgagaattccggcaccaaggtcctcccagacatattca 94
+              |||| ||||| ||||| ||||| |||||||| ||||| || || |||||||| ||||| |
+Sbjct: 136636 catcgccctttatgataagcattgagaattcaggcacaaaagttctcccagatatattta 136577
+
+                                                                          
+Query: 95     acgcggtcgtgctgatcaccatagtctccgcgggcaactccaacgtgtacatcggctcaa 154
+              ||||||||||||| ||||||||  | ||||| || ||||| || |||||||| |||||||
+Sbjct: 136576 acgcggtcgtgctaatcaccattctttccgcaggtaactctaatgtgtacattggctcaa 136517
+
+                                                                          
+Query: 155    gggtcctgtacagtctatccaaaaacgacttggcgccccggttcctgtcgatcgtgacca 214
+              | || ||||| ||||| || |||||   |||||||||  ||||| |||| | ||||||||
+Sbjct: 136516 gagtactgtatagtctgtctaaaaatagcttggcgccaaggttcttgtctaacgtgacca 136457
+
+                                                                          
+Query: 215    agggcggggtcccctactttgccgttctggcaacatctgtattcgggttcctggccttct 274
+                || || || || |||||| | |||||  | ||||| || ||||| ||  |||| ||||
+Sbjct: 136456 gaggtggtgttccatacttttctgttctatctacatccgtgttcggatttttggctttct 136397
+
+                                                                          
+Query: 275    tggaaacctccgcaggtagtggcaaggctttcaactggctgttaaacattactggtgtgg 334
+              | ||    || ||||| || |||||||| || |||||| | || ||||| ||||||||||
+Sbjct: 136396 tagaggtttctgcaggcagcggcaaggcctttaactggttattgaacataactggtgtgg 136337
+
+                                                                          
+Query: 335    caggatttttcgcatggctgctgatttcattttcgcacatccgctttatgcaagcgataa 394
+              | || || || || |||||  ||||||||||||| || ||||| || |||||||| ||||
+Sbjct: 136336 ccggtttctttgcctggcttttgatttcattttctcatatccgtttcatgcaagccataa 136277
+
+                                                                          
+Query: 395    gcaaacgcggaatatcgaggaacgatctgccttacaaggctcgcatgatgccttatctag 454
+              | ||||| || ||||||||| | ||  | ||||| || ||||  |||||||||| | | |
+Sbjct: 136276 ggaaacgtggtatatcgagggatgacttaccttataaagctcaaatgatgccttttttgg 136217
+
+                                                                          
+Query: 455    cctactacgcatcctttttcatcgccctgatagtcctgatccagggcttcactgcatttg 514
+              | || || ||||| ||||||||||| || || || ||||||||||||||||| || || |
+Sbjct: 136216 catattatgcatcttttttcatcgctctaattgttctgatccagggcttcaccgctttcg 136157
+
+                                                                          
+Query: 515    cgcccagtttccagcctgtagatttcgttgccgcatacatatccgtcttcctatttgttg 574
+              | || | ||| |||||| |||| || || || ||||| |||||  | ||||||||| | |
+Sbjct: 136156 cacctacttttcagcctatagactttgtcgctgcatatatatcaatattcctatttttgg 136097
+
+                                                                          
+Query: 575    caatctggttttcattccagctttggttcaagtgtcccataatcttgaagttgcaggacg 634
+              | || ||||| ||||||||  ||||||| ||||| | | || ||| |||| |||||||  
+Sbjct: 136096 ccatatggttatcattccaagtttggtttaagtgccgcttactctggaagctgcaggata 136037
+
+                                                               
+Query: 635    tcgacatagattccgaccgccgcgaaatagaggagcaggtatggataga 683
+              |||| || ||||| ||||||||  |||||||||||  ||||||||||||
+Sbjct: 136036 tcgatatcgattctgaccgccggcaaatagaggagttggtatggataga 135988
+
+
+
+ Score =  174 bits (88), Expect = 1e-42
+ Identities = 154/176 (87%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 4041   aaactaaatgatttaaggagggccttggctgaaattcaaatggaaagtaatgatatttcc 4100
+              |||||||||||  ||||| |||| ||| ||||||| ||||||||||||||||||||||||
+Sbjct: 132598 aaactaaatgacctaaggcgggctttgactgaaatccaaatggaaagtaatgatatttcc 132539
+
+                                                                          
+Query: 4101   aagttcaatgtcgaagaacgtgttaatgagctatttaacgaaaagaagtctttggctttg 4160
+              || || || ||||||||||||||||||||||||||||| |||||||| ||| ||||||| 
+Sbjct: 132538 aaatttaacgtcgaagaacgtgttaatgagctatttaatgaaaagaaatctctggcttta 132479
+
+                                                                      
+Query: 4161   aagaggttaaaggaactagaaaccaaatataagggattcggtattgattttaatgt 4216
+              |||||  | || ||||||||||| ||||| || ||||| |||||||| ||||||||
+Sbjct: 132478 aagagactgaaagaactagaaacaaaatacaaaggatttggtattgactttaatgt 132423
+
+
+
+ Score =  151 bits (76), Expect = 2e-35
+ Identities = 169/200 (84%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 3756   aatattcagaaggtattacaggacggattgtcgagagcgaagaaagattataaagatgac 3815
+              ||||| || ||||| |||||||||||||| || ||||| || || ||||| || ||||| 
+Sbjct: 132883 aatatccaaaaggtgttacaggacggattatcaagagcaaaaaaggattacaaggatgat 132824
+
+                                                                          
+Query: 3816   tcgaaaaaattcggtatgacacttcaacctaataaaaggctaaagatgctaagaatgcaa 3875
+              || ||||||||||| |||||||||||||||||||| ||  |||| ||| |||||||||||
+Sbjct: 132823 tcaaaaaaattcggcatgacacttcaacctaataagagattaaaaatgttaagaatgcaa 132764
+
+                                                                          
+Query: 3876   atggaaaatattgaaaatgaagcaagacagttggaaatgacaaactttgctgagtttgaa 3935
+              ||||||||||||||||||||||| || ||  | || |||||||| |||||||| ||||| 
+Sbjct: 132763 atggaaaatattgaaaatgaagccaggcaactagagatgacaaattttgctgaatttgag 132704
+
+                                  
+Query: 3936   aaggaacgcctcgaacctcc 3955
+              || || || || ||||||||
+Sbjct: 132703 aaagatcgtcttgaacctcc 132684
+
+
+
+ Score =  129 bits (65), Expect = 7e-29
+ Identities = 119/137 (86%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 4437   gaaatataccaagatgcttctccaagtcaagaaagaagaggcgagtactcagagctatct 4496
+              |||||||||||||| || ||||||| |||||||  |||  ||||  ||||||||||||||
+Sbjct: 132196 gaaatataccaagacgcatctccaactcaagaactaaggagcgaacactcagagctatct 132137
+
+                                                                          
+Query: 4497   gcaggatcaggtcccggatcttttcttgatgccttatcccaaaaatatggcactggtcaa 4556
+               ||||||||||||| || |||||||| ||||||||||| ||||||||||| |||||||||
+Sbjct: 132136 tcaggatcaggtcctgggtcttttctcgatgccttatctcaaaaatatggtactggtcaa 132077
+
+                               
+Query: 4557   aacgtcaccgcatcggc 4573
+              || || || ||||||||
+Sbjct: 132076 aatgttactgcatcggc 132060
+
+
+
+ Score = 85.7 bits (43), Expect = 1e-15
+ Identities = 101/119 (84%), Gaps = 1/119 (0%)
+ Strand = Plus / Minus
+
+                                                                          
+Query: 890    tttttgacatggaagaggcaagaagagaaaaaacagatgtgg-aaactataatccctttt 948
+              |||||||| ||||||||| ||  | ||||||||||||||||| |||||| ||||||||||
+Sbjct: 135742 tttttgacgtggaagagggaaagaaagaaaaaacagatgtggaaaactagaatccctttt 135683
+
+                                                                         
+Query: 949    acaaacaatagcacggacgctttccggagacaacaagacactgacggctgtgtggtaat 1007
+              |  ||||||||||||||| ||| |   ||| || | |||||||||||||||||| ||||
+Sbjct: 135682 aagaacaatagcacggacccttcctacagataagaggacactgacggctgtgtgttaat 135624
+
+
+  Database: /data/blast/yeast.nt
+    Posted date:  Aug 12, 2003 12:35 PM
+  Number of letters in database: 12,155,026
+  Number of sequences in database:  17
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 45,084
+Number of Sequences: 17
+Number of extensions: 45084
+Number of successful extensions: 26
+Number of sequences better than 1.0e-10: 1
+length of query: 5116
+length of database: 12,155,026
+effective HSP length: 18
+effective length of query: 5098
+effective length of database: 12,154,720
+effective search space: 61964762560
+effective search space used: 61964762560
+T: 0
+A: 0
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 35 (69.9 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN.m8
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN.m8	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/c200-vs-yeast.BLASTN.m8	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+contig_200	gi|6323989|ref|NC_001146.1|	83.50	1000	165	0	2697	3696	133942	132943	0.0	648.7
+contig_200	gi|6323989|ref|NC_001146.1|	83.62	464	76	0	2170	2633	134469	134006	1.5e-85	317.7
+contig_200	gi|6323989|ref|NC_001146.1|	79.12	886	185	0	1246	2131	135384	134499	3.3e-77	289.9
+contig_200	gi|6323989|ref|NC_001146.1|	78.89	649	137	0	35	683	136636	135988	2.4e-50	200.7
+contig_200	gi|6323989|ref|NC_001146.1|	87.50	176	22	0	4041	4216	132598	132423	1.3e-42	174.9
+contig_200	gi|6323989|ref|NC_001146.1|	84.50	200	31	0	3756	3955	132883	132684	2.0e-35	151.2
+contig_200	gi|6323989|ref|NC_001146.1|	86.86	137	18	0	4437	4573	132196	132060	7.2e-29	129.3
+contig_200	gi|6323989|ref|NC_001146.1|	84.87	119	17	1	890	1007	135742	135624	9.6e-16	85.73

Added: trunk/packages/bioperl/branches/upstream/current/t/data/calm.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/calm.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/calm.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,282 @@
+ID   CALM_HUMAN     STANDARD;      PRT;   148 AA.
+AC   P62158; P02593; P70667; P99014; Q61379; Q61380;
+DT   21-JUL-1986 (Rel. 01, Created)
+DT   21-JUL-1986 (Rel. 01, Last sequence update)
+DT   01-OCT-2004 (Rel. 45, Last annotation update)
+DE   Calmodulin (CaM).
+GN   Name=CALM1; Synonyms=CAM1, CALM, CAM;
+GN   and
+GN   Name=CALM2; Synonyms=CAM2, CAMB;
+GN   and
+GN   Name=CALM3; Synonyms=CAM3, CAMC;
+OS   Homo sapiens (Human).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606;
+RN   [1]
+RP   SEQUENCE, ACETYLATION SITE ALA-1, AND METHYLATION SITE LYS-115.
+RC   TISSUE=Brain;
+RX   MEDLINE=82231946; PubMed=7093203;
+RA   Sasagawa T., Ericsson L.H., Walsh K.A., Schreiber W.E., Fischer E.H.,
+RA   Titani K.;
+RT   "Complete amino acid sequence of human brain calmodulin.";
+RL   Biochemistry 21:2565-2569(1982).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RX   MEDLINE=89034207; PubMed=3182832;
+RA   Fischer R., Koller M., Flura M., Mathews S., Strehler-Page M.A.,
+RA   Krebs J., Penniston J.T., Carafoli E., Strehler E.E.;
+RT   "Multiple divergent mRNAs code for a single human calmodulin.";
+RL   J. Biol. Chem. 263:17055-17062(1988).
+RN   [3]
+RP   SEQUENCE FROM N.A.
+RX   MEDLINE=88059053; PubMed=2445749;
+RA   Sengupta B., Friedberg F., Detera-Wadleigh S.D.;
+RT   "Molecular analysis of human and rat calmodulin complementary DNA
+RT   clones. Evidence for additional active genes in these species.";
+RL   J. Biol. Chem. 262:16663-16670(1987).
+RN   [4]
+RP   SEQUENCE FROM N.A.
+RX   MEDLINE=85022688; PubMed=6385987;
+RA   Wawrzynczak E.J., Perham R.N.;
+RT   "Isolation and nucleotide sequence of a cDNA encoding human
+RT   calmodulin.";
+RL   Biochem. Int. 9:177-185(1984).
+RN   [5]
+RP   SEQUENCE FROM N.A. (CALM1).
+RC   TISSUE=Blood;
+RX   MEDLINE=95010144; PubMed=7925473;
+RA   Rhyner J.A., Ottiger M., Wicki R., Greenwood T.M., Strehler E.E.;
+RT   "Structure of the human CALM1 calmodulin gene and identification of
+RT   two CALM1-related pseudogenes CALM1P1 and CALM1P2.";
+RL   Eur. J. Biochem. 225:71-82(1994).
+RN   [6]
+RP   SEQUENCE FROM N.A.
+RC   TISSUE=Lymphoma;
+RA   Kato S.;
+RL   Submitted (FEB-1995) to the EMBL/GenBank/DDBJ databases.
+RN   [7]
+RP   SEQUENCE FROM N.A. (CALM2).
+RX   MEDLINE=98346173; PubMed=9681195;
+RA   Toutenhoofd S.L., Foletti D., Wicki R., Rhyner J.A., Garcia F.,
+RA   Tolon R., Strehler E.E.;
+RT   "Characterization of the human CALM2 calmodulin gene and comparison of
+RT   the transcriptional activity of CALM1, CALM2 and CALM3.";
+RL   Cell Calcium 23:323-338(1998).
+RN   [8]
+RP   SEQUENCE FROM N.A.
+RA   Kalnine N., Chen X., Rolfs A., Halleck A., Hines L., Eisenstein S.,
+RA   Koundinya M., Raphael J., Moreira D., Kelley T., LaBaer J., Lin Y.,
+RA   Phelan M., Farmer A.;
+RT   "Cloning of human full-length CDSs in BD Creator(TM) system donor
+RT   vector.";
+RL   Submitted (MAY-2003) to the EMBL/GenBank/DDBJ databases.
+RN   [9]
+RP   SEQUENCE FROM N.A. (CALM1).
+RX   MEDLINE=22459283; PubMed=12508121; DOI=10.1038/nature01348;
+RA   Heilig R., Eckenberg R., Petit J.-L., Fonknechten N., Da Silva C.,
+RA   Cattolico L., Levy M., Barbe V., De Berardinis V., Ureta-Vidal A.,
+RA   Pelletier E., Vico V., Anthouard V., Rowen L., Madan A., Qin S.,
+RA   Sun H., Du H., Pepin K., Artiguenave F., Robert C., Cruaud C.,
+RA   Bruels T., Jaillon O., Friedlander L., Samson G., Brottier P.,
+RA   Cure S., Segurens B., Aniere F., Samain S., Crespeau H., Abbasi N.,
+RA   Aiach N., Boscus D., Dickhoff R., Dors M., Dubois I., Friedman C.,
+RA   Gouyvenoux M., James R., Madan A., Mairey-Estrada B., Mangenot S.,
+RA   Martins N., Menard M., Oztas S., Ratcliffe A., Shaffer T., Trask B.,
+RA   Vacherie B., Bellemere C., Belser C., Besnard-Gonnet M.,
+RA   Bartol-Mavel D., Boutard M., Briez-Silla S., Combette S.,
+RA   Dufosse-Laurent V., Ferron C., Lechaplais C., Louesse C., Muselet D.,
+RA   Magdelenat G., Pateau E., Petit E., Sirvain-Trukniewicz P., Trybou A.,
+RA   Vega-Czarny N., Bataille E., Bluet E., Bordelais I., Dubois M.,
+RA   Dumont C., Guerin T., Haffray S., Hammadi R., Muanga J., Pellouin V.,
+RA   Robert D., Wunderle E., Gauguet G., Roy A., Sainte-Marthe L.,
+RA   Verdier J., Verdier-Discala C., Hillier L.W., Fulton L., McPherson J.,
+RA   Matsuda F., Wilson R., Scarpelli C., Gyapay G., Wincker P., Saurin W.,
+RA   Quetier F., Waterston R., Hood L., Weissenbach J.;
+RT   "The DNA sequence and analysis of human chromosome 14.";
+RL   Nature 421:601-607(2003).
+RN   [10]
+RP   SEQUENCE FROM N.A.
+RC   TISSUE=Brain, Lung, Lymph, Placenta, and Urinary bladder;
+RX   MEDLINE=22388257; PubMed=12477932; DOI=10.1073/pnas.242603899;
+RA   Strausberg R.L., Feingold E.A., Grouse L.H., Derge J.G.,
+RA   Klausner R.D., Collins F.S., Wagner L., Shenmen C.M., Schuler G.D.,
+RA   Altschul S.F., Zeeberg B., Buetow K.H., Schaefer C.F., Bhat N.K.,
+RA   Hopkins R.F., Jordan H., Moore T., Max S.I., Wang J., Hsieh F.,
+RA   Diatchenko L., Marusina K., Farmer A.A., Rubin G.M., Hong L.,
+RA   Stapleton M., Soares M.B., Bonaldo M.F., Casavant T.L., Scheetz T.E.,
+RA   Brownstein M.J., Usdin T.B., Toshiyuki S., Carninci P., Prange C.,
+RA   Raha S.S., Loquellano N.A., Peters G.J., Abramson R.D., Mullahy S.J.,
+RA   Bosak S.A., McEwan P.J., McKernan K.J., Malek J.A., Gunaratne P.H.,
+RA   Richards S., Worley K.C., Hale S., Garcia A.M., Gay L.J., Hulyk S.W.,
+RA   Villalon D.K., Muzny D.M., Sodergren E.J., Lu X., Gibbs R.A.,
+RA   Fahey J., Helton E., Ketteman M., Madan A., Rodrigues S., Sanchez A.,
+RA   Whiting M., Madan A., Young A.C., Shevchenko Y., Bouffard G.G.,
+RA   Blakesley R.W., Touchman J.W., Green E.D., Dickson M.C.,
+RA   Rodriguez A.C., Grimwood J., Schmutz J., Myers R.M.,
+RA   Butterfield Y.S.N., Krzywinski M.I., Skalska U., Smailus D.E.,
+RA   Schnerch A., Schein J.E., Jones S.J.M., Marra M.A.;
+RT   "Generation and initial analysis of more than 15,000 full-length human
+RT   and mouse cDNA sequences.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 99:16899-16903(2002).
+RN   [11]
+RP   STRUCTURE BY NMR OF 94-103.
+RX   PubMed=9927666;
+RA   Siedlecka M., Goch G., Ejchart A., Sticht H., Bierzyski A.;
+RT   "Alpha-helix nucleation by a calcium-binding peptide loop.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 96:903-908(1999).
+RN   [12]
+RP   STRUCTURE BY NMR OF 1-76 AND 82-148.
+RX   PubMed=11685248; DOI=10.1038/nsb1101-990;
+RA   Chou J.J., Li S., Klee C.B., Bax A.;
+RT   "Solution structure of Ca(2+)-calmodulin reveals flexible hand-like
+RT   properties of its domains.";
+RL   Nat. Struct. Biol. 8:990-997(2001).
+RN   [13]
+RP   X-RAY CRYSTALLOGRAPHY (1.7 ANGSTROMS).
+RX   PubMed=1474585;
+RA   Chattopadhyaya R., Meador W.E., Means A.R., Quiocho F.A.;
+RT   "Calmodulin structure refined at 1.7 A resolution.";
+RL   J. Mol. Biol. 228:1177-1192(1992).
+RN   [14]
+RP   X-RAY CRYSTALLOGRAPHY (2.45 ANGSTROMS).
+RX   PubMed=7803388;
+RA   Cook W.J., Walter L.J., Walter M.R.;
+RT   "Drug binding by calmodulin: crystal structure of a calmodulin-
+RT   trifluoperazine complex.";
+RL   Biochemistry 33:15259-15265(1994).
+RN   [15]
+RP   X-RAY CRYSTALLOGRAPHY (2.75 ANGSTROMS) OF 5-148.
+RX   MEDLINE=21666515; PubMed=11807546; DOI=10.1038/415396a;
+RA   Drum C.L., Yan S.-Z., Bard J., Shen Y.Q., Lu D., Soelaiman S.,
+RA   Grabarek Z., Bohm A., Tang W.-J.;
+RT   "Structural basis for the activation of anthrax adenylyl cyclase
+RT   exotoxin by calmodulin.";
+RL   Nature 415:396-402(2002).
+RN   [16]
+RP   X-RAY CRYSTALLOGRAPHY (3.6 ANGSTROMS) OF 1-148.
+RX   PubMed=12485993;
+RA   Shen Y., Lee Y.-S., Soelaiman S., Bergson P., Lu D., Chen A.,
+RA   Beckingham K., Grabarek Z., Mrksich M., Tang W.-J.;
+RT   "Physiological calcium concentrations regulate calmodulin binding and
+RT   catalysis of adenylyl cyclase exotoxins.";
+RL   EMBO J. 21:6721-6732(2002).
+RN   [17]
+RP   X-RAY CRYSTALLOGRAPHY (2.0 ANGSTROMS).
+RX   PubMed=12577052; DOI=10.1038/nsb900;
+RA   Yamauchi E., Nakatsu T., Matsubara M., Kato H., Taniguchi H.;
+RT   "Crystal structure of a MARCKS peptide containing the calmodulin-
+RT   binding domain in complex with Ca2+-calmodulin.";
+RL   Nat. Struct. Biol. 10:226-231(2003).
+CC   -!- FUNCTION: Calmodulin mediates the control of a large number of
+CC       enzymes by Ca(2+). Among the enzymes to be stimulated by the
+CC       calmodulin-Ca(2+) complex are a number of protein kinases and
+CC       phosphatases.
+CC   -!- PTM: Ubiquitylation results in a strongly decreased activity (By
+CC       similarity).
+CC   -!- PTM: Phosphorylation results in a decreased activity (By
+CC       similarity).
+CC   -!- MISCELLANEOUS: This protein has four functional calcium-binding
+CC       sites.
+CC   -!- SIMILARITY: Contains 4 EF-hand calcium-binding domains.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; J04046; AAA51918.1; -.
+DR   EMBL; M19311; AAA35641.1; -.
+DR   EMBL; M27319; AAA35635.1; -.
+DR   EMBL; U12022; AAB60644.1; -.
+DR   EMBL; U11886; AAB60644.1; JOINED.
+DR   EMBL; D45887; BAA08302.1; -.
+DR   EMBL; U94728; AAC83174.1; -.
+DR   EMBL; U94725; AAC83174.1; JOINED.
+DR   EMBL; U94726; AAC83174.1; JOINED.
+DR   EMBL; BT006818; AAP35464.1; -.
+DR   EMBL; BT006855; AAP35501.1; -.
+DR   EMBL; BT009916; AAP88918.1; -.
+DR   EMBL; AC006536; AAD45181.1; -.
+DR   EMBL; BC000454; AAH00454.1; -.
+DR   EMBL; BC003354; AAH03354.1; -.
+DR   EMBL; BC005137; AAH05137.1; -.
+DR   EMBL; BC006464; AAH06464.1; -.
+DR   EMBL; BC008597; AAH08597.1; -.
+DR   EMBL; BC011834; AAH11834.1; -.
+DR   EMBL; BC017385; AAH17385.1; -.
+DR   EMBL; BC018677; AAH18677.1; -.
+DR   EMBL; BC026065; AAH26065.1; -.
+DR   EMBL; BC047523; AAH47523.1; -.
+DR   PIR; S48728; MCHU.
+DR   PDB; 1AJI; 17-SEP-97.
+DR   PDB; 1CLL; 31-OCT-93.
+DR   PDB; 1CTR; 20-DEC-94.
+DR   PDB; 1IWQ; 11-MAR-03.
+DR   PDB; 1J7O; 07-NOV-01.
+DR   PDB; 1J7P; 07-NOV-01.
+DR   PDB; 1K90; 23-JAN-02.
+DR   PDB; 1K93; 23-JAN-02.
+DR   PDB; 1LVC; 04-DEC-02.
+DR   PDB; 1NKF; 23-MAR-99.
+DR   SWISS-2DPAGE; P62158; HUMAN.
+DR   Aarhus/Ghent-2DPAGE; 9048; IEF.
+DR   OGP; P02593; -.
+DR   Genew; HGNC:1442; CALM1.
+DR   Genew; HGNC:1445; CALM2.
+DR   Genew; HGNC:1449; CALM3.
+DR   Reactome; P62158; -.
+DR   MIM; 114180; -.
+DR   MIM; 114182; -.
+DR   MIM; 114183; -.
+DR   GO; GO:0005737; C:cytoplasm; TAS.
+DR   GO; GO:0005886; C:plasma membrane; TAS.
+DR   GO; GO:0005509; F:calcium ion binding; TAS.
+DR   GO; GO:0005515; F:protein binding; NAS.
+DR   GO; GO:0007186; P:G-protein coupled receptor protein signalin...; TAS.
+DR   InterPro; IPR002048; EF-hand.
+DR   Pfam; PF00036; efhand; 4.
+DR   PRINTS; PR00450; RECOVERIN.
+DR   ProDom; PD000012; EF-hand; 2.
+DR   PROSITE; PS00018; EF_HAND; 4.
+KW   3D-structure; Acetylation; Calcium-binding; Direct protein sequencing;
+KW   Methylation; Phosphorylation; Repeat; Ubl conjugation.
+FT   INIT_MET      0      0
+FT   MOD_RES       1      1       N-acetylalanine.
+FT   CA_BIND      20     31       EF-hand 1.
+FT   CA_BIND      56     67       EF-hand 2.
+FT   CA_BIND      93    104       EF-hand 3.
+FT   CA_BIND     129    140       EF-hand 4.
+FT   BINDING      21     21       Ubiquitin (multi-) (By similarity).
+FT   MOD_RES      44     44       Phosphothreonine (by CaMK4) (By
+FT                                similarity).
+FT   MOD_RES     115    115       N6,N6,N6-trimethyllysine.
+FT   HELIX         6     18
+FT   TURN         21     22
+FT   TURN         29     30
+FT   HELIX        33     36
+FT   TURN         37     40
+FT   TURN         45     46
+FT   HELIX        47     54
+FT   TURN         55     55
+FT   HELIX        66     72
+FT   TURN         73     74
+FT   HELIX        75     77
+FT   TURN         78     78
+FT   TURN         81     81
+FT   HELIX        82     92
+FT   TURN         94     95
+FT   STRAND       99    100
+FT   HELIX       102    111
+FT   TURN        112    113
+FT   HELIX       118    128
+FT   STRAND      136    137
+FT   HELIX       138    146
+SQ   SEQUENCE   148 AA;  16706 MW;  464B8A287475A1CA CRC64;
+     ADQLTEEQIA EFKEAFSLFD KDGDGTITTK ELGTVMRSLG QNPTEAELQD MINEVDADGN
+     GTIDFPEFLT MMARKMKDTD SEEEIREAFR VFDKDGNGYI SAAELRHVMT NLGEKLTDEE
+     VDEMIREADI DGDGQVNYEE FVQMMTAK
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/catalase-webblast.BLASTP
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/catalase-webblast.BLASTP	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/catalase-webblast.BLASTP	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,444 @@
+BLASTP 2.2.10 [Oct-19-2004]
+
+Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman 
+(1997), "Gapped BLAST and PSI-BLAST: a new generation of 
+protein database search programs",  Nucleic Acids Res. 25:3389-3402.
+RID: 1118324516-16598-103707467515.BLASTQ1
+Query= anid_AN8553.1 hypothetical protein 51885 54086 +
+          (528 letters)
+Database: All non-redundant GenBank CDS
+translations+PDB+SwissProt+PIR+PRF excluding environmental samples 
+           2,506,223 sequences; 849,940,114 total letters
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (Bits)  Value
+
+gi|40747822|gb|EAA66978.1|  hypothetical protein AN8553.2 [Asp...  1085    0.0   
+gi|66846787|gb|EAL87118.1|  catalase, putative [Aspergillus fumig   785    0.0   
+gi|38326687|gb|AAR17472.1|  catalase [Cochliobolus heterostrophus   716    0.0   
+gi|42553486|gb|EAA76329.1|  hypothetical protein FG06596.1 [Gi...   584    2e-166
+gi|2776|emb|CAA39856.1|  catalase [Pichia angusta] >gi|231690|...   336    1e-91 
+gi|49651945|emb|CAG78888.1|  unnamed protein product [Yarrowia...   331    3e-90 
+gi|56541435|dbj|BAD77826.1|  catalase [Candida dubliniensis]        323    7e-88 
+gi|40738591|gb|EAA57781.1|  hypothetical protein AN5918.2 [Asp...   323    9e-88 
+gi|46440608|gb|EAK99912.1|  hypothetical protein CaO19.6229 [C...   323    9e-88 
+gi|56541439|dbj|BAD77828.1|  catalase [Candida tropicalis]          321    3e-87 
+ALIGNMENTS
+>gi|40747822|gb|EAA66978.1| hypothetical protein AN8553.2 [Aspergillus nidulans FGSC A4]
+ gi|49125822|ref|XP_412690.1| hypothetical protein AN8553.2 [Aspergillus nidulans FGSC A4]
+          Length=528
+
+ Score = 1085 bits (2807),  Expect = 0.0
+ Identities = 528/528 (100%), Positives = 528/528 (100%), Gaps = 0/528 (0%)
+
+Query  1    MVTTAQSQCRHATEVRPPEACLWPQTRFFFRNSSTSTGRSCWSAWFILANSSGGSGAFGH  60
+            MVTTAQSQCRHATEVRPPEACLWPQTRFFFRNSSTSTGRSCWSAWFILANSSGGSGAFGH
+Sbjct  1    MVTTAQSQCRHATEVRPPEACLWPQTRFFFRNSSTSTGRSCWSAWFILANSSGGSGAFGH  60
+
+Query  61   FEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFYTGEGNYD  120
+            FEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFYTGEGNYD
+Sbjct  61   FEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFYTGEGNYD  120
+
+Query  121  IVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGMMFFSDHG  180
+            IVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGMMFFSDHG
+Sbjct  121  IVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGMMFFSDHG  180
+
+Query  181  TPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGEDPDYSKRE  240
+            TPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGEDPDYSKRE
+Sbjct  181  TPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGEDPDYSKRE  240
+
+Query  241  LWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGKLTLNKNP  300
+            LWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGKLTLNKNP
+Sbjct  241  LWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGKLTLNKNP  300
+
+Query  301  ENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPVNCPFMAS  360
+            ENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPVNCPFMAS
+Sbjct  301  ENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPVNCPFMAS  360
+
+Query  361  SYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSHFFHEGKA  420
+            SYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSHFFHEGKA
+Sbjct  361  SYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSHFFHEGKA  420
+
+Query  421  SEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEKYARGVYDL  480
+            SEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEKYARGVYDL
+Sbjct  421  SEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEKYARGVYDL  480
+
+Query  481  LPEKKFGFDEVQSFakgaevagkeakFRPNMPTDKLLGLCPAMAVYGP  528
+            LPEKKFGFDEVQSFAKGAEVAGKEAKFRPNMPTDKLLGLCPAMAVYGP
+Sbjct  481  LPEKKFGFDEVQSFAKGAEVAGKEAKFRPNMPTDKLLGLCPAMAVYGP  528
+
+
+>gi|66846787|gb|EAL87118.1| catalase, putative [Aspergillus fumigatus Af293]
+          Length=573
+
+ Score =  785 bits (2027),  Expect = 0.0
+ Identities = 383/473 (80%), Positives = 421/473 (89%), Gaps = 9/473 (1%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGAFG+FE TKDVS LTKAHFLRS G++TPVF RFSTVTLGRE+PD ARNPRGFAVKFY
+Sbjct  108  GSGAFGYFETTKDVSSLTKAHFLRSVGVRTPVFARFSTVTLGREFPDEARNPRGFAVKFY  167
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM
+Sbjct  168  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  227
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            MFFSDHGTP GW+N+HGYGCHTFK VN  G+FVYIKYHF+AD GQKQ  ADEA++  GED
+Sbjct  228  MFFSDHGTPVGWRNLHGYGCHTFK-VNKRGEFVYIKYHFIADRGQKQSTADEAIQMCGED  286
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PD+SKR+L++ IE G+++SWTA+VQ+MKPE+ADP KLGFDPFDVTK        L EFGK
+Sbjct  287  PDFSKRDLYQAIEKGEKISWTAHVQIMKPEEADPTKLGFDPFDVTK--------LHEFGK  338
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+            L LNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQ+PV
+Sbjct  339  LVLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQIPV  398
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCPFMASSYSSLNFDG LRVDANHAMNPQYAPNSFVHKFR DTAEAPYQLAD TVSRKSH
+Sbjct  399  NCPFMASSYSSLNFDGPLRVDANHAMNPQYAPNSFVHKFRPDTAEAPYQLADNTVSRKSH  458
+
+Query  414  FFHEGKASEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEKY  473
+            F+HEGK SEYDQPR LY++VMD + R+HLH NTAR+LK+VEYP+IQ +YL QL  I+ +Y
+Sbjct  459  FYHEGKLSEYDQPRALYQKVMDARGREHLHCNTARMLKVVEYPEIQLRYLTQLYCIAPEY  518
+
+Query  474  ARGVYDLLPEKKFGFDEVQSFakgaevagkeakFRPNMPTDKLLGLCPAMAVY  526
+            ARGVYDLLPE+KF F +V++ A+GAE  GKEAKFRP+  TD L G CPA  VY
+Sbjct  519  ARGVYDLLPEQKFDFSQVKAQAQGAERVGKEAKFRPSKDTDILAGKCPATPVY  571
+
+
+>gi|38326687|gb|AAR17472.1| catalase [Cochliobolus heterostrophus]
+          Length=547
+
+ Score =  716 bits (1849),  Expect = 0.0
+ Identities = 339/473 (71%), Positives = 398/473 (84%), Gaps = 7/473 (1%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGAFG+FE TKDV+ LTKA FL++ G KTP+FIRFSTVT GREYPD ARNPRGFA+KFY
+Sbjct  80   GSGAFGYFETTKDVTHLTKADFLKTVGEKTPIFIRFSTVTPGREYPDEARNPRGFAIKFY  139
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQ+RNP+NFL D++++FDLLANTPEGNHAG+
+Sbjct  140  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQWRNPKNFLFDYDAIFDLLANTPEGNHAGL  199
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            MFFSDHGTP GW+  HGYGCHTFKWVN +G+FVYIKYHF+A+HGQKQF  ++A +  GED
+Sbjct  200  MFFSDHGTPQGWRFNHGYGCHTFKWVNKDGQFVYIKYHFVAEHGQKQFTQEQATQMCGED  259
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PDYSKR+L+ TIENG+E+ W A+VQVM PE+ADP+KLGFDPFDVTKVWP+KQFP+QEFG+
+Sbjct  260  PDYSKRDLYETIENGEEVVWKAHVQVMSPEEADPDKLGFDPFDVTKVWPRKQFPMQEFGR  319
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+            L LNKNPENFHRDVEQAAFSPGSMVPG+EDSPDPLLQFRMFFYRDAQYHRIGVNLHQ+PV
+Sbjct  320  LVLNKNPENFHRDVEQAAFSPGSMVPGVEDSPDPLLQFRMFFYRDAQYHRIGVNLHQIPV  379
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCPFMA S++SLNFDGQ+RVDANHA N  YAPNSF HKFR D AEAPYQ+ D  +SRKSH
+Sbjct  380  NCPFMAKSFASLNFDGQMRVDANHAGNKPYAPNSFAHKFRPDVAEAPYQVNDNIMSRKSH  439
+
+Query  414  FFHEGKASEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEKY  473
+            ++HEGK +EYDQ +EL+ RVM  + +Q+   NTA +LK V YP+IQ   +G  L      
+Sbjct  440  YWHEGKKNEYDQAKELWSRVMSVQEKQNTIKNTANMLKFVRYPEIQQYNIGTDL------  493
+
+Query  474  ARGVYDLLPEKKFGFDEVQSFakgaevagkeakFRPNMPTDKLLGLCPAMAVY  526
+            +RG+YDLLP+  F F EV+  +  A    KE KFR ++  ++L G  P+M VY
+Sbjct  494  SRGIYDLLPKPAFDFSEVEELSHTAHEWYKEKKFR-SIDGERLTGFPPSMPVY  545
+
+
+>gi|42553486|gb|EAA76329.1| hypothetical protein FG06596.1 [Gibberella zeae PH-1]
+ gi|46124437|ref|XP_386772.1| hypothetical protein FG06596.1 [Gibberella zeae PH-1]
+          Length=539
+
+ Score =  584 bits (1506),  Expect = 2e-166
+ Identities = 281/481 (58%), Positives = 338/481 (70%), Gaps = 57/481 (11%)
+
+Query  52   SGGSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVK  111
+            S G+GAFG+FE T+DV++LTKA+FL S G KTPVF+RFST TLGRE+PD +RNPRGFA+K
+Sbjct  112  SCGAGAFGYFECTRDVTELTKANFLSSVGEKTPVFVRFSTATLGREFPDASRNPRGFAIK  171
+
+Query  112  FYTGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHA  171
+            FYT EGNYDIVGLNFPVFFCRDPIQGPDVIRSQ RNP NFLLDH++LFD LAN PE NHA
+Sbjct  172  FYTKEGNYDIVGLNFPVFFCRDPIQGPDVIRSQSRNPSNFLLDHDALFDFLANNPEANHA  231
+
+Query  172  GMMFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGG  231
+            G+M FSDHGTP GW+  HGYGCHTFKWVNA+G+F+Y+KYHF+A HGQKQF   EA++  G
+Sbjct  232  GIMLFSDHGTPQGWRFSHGYGCHTFKWVNADGEFMYVKYHFIAKHGQKQFADSEAMQMCG  291
+
+Query  232  EDPDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEF  291
+            EDPDYSKR+LW  IE G+++ W  +VQ+M P  ADP+ LGFDPFD TK+WP+ +F ++E 
+Sbjct  292  EDPDYSKRDLWEAIEKGEDIEWMVHVQIMDPRQADPDTLGFDPFDATKIWPRSEFRMKEL  351
+
+Query  292  GKLTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQV  351
+            G+L LNKNPENFHRDVEQA FSPGSMVPGIEDSPDPLLQFRMF YRDAQYHR+G NLHQ+
+Sbjct  352  GRLVLNKNPENFHRDVEQAVFSPGSMVPGIEDSPDPLLQFRMFLYRDAQYHRVGTNLHQI  411
+
+Query  352  PVNCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRK  411
+            PVNCPFMA SY+S  FDG + VD + A + QY  NSF HKFR DT EAPY++ D  +   
+Sbjct  412  PVNCPFMAKSYAS-PFDGPMHVDTDRAGSKQYPLNSFAHKFRPDTDEAPYEVNDNFI---  467
+
+Query  412  SHFFHEGKASEYDQPRELYERVMDEKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISE  471
+                                                             KYL Q+  IS 
+Sbjct  468  -------------------------------------------------KYLAQVYIISV  478
+
+Query  472  KYARGVYDLLPEKKFGFDEVQSFakgaevagkeakFRP----NMPTDKLLGLCPAMAVYG  527
+             YA+G+Y+LL E +F F +V+   +       +  ++     + P  KL+G  P +AVY 
+Sbjct  479  DYAQGIYNLLDEPRFEFSKVKKLGETKFAEAADDWYKEPKFRDKPGSKLVGYPPEIAVYQ  538
+
+Query  528  P  528
+            P
+Sbjct  539  P  539
+
+
+>gi|2776|emb|CAA39856.1| catalase [Pichia angusta]
+ gi|231690|sp|P30263|CATA_PICAN Peroxisomal catalase
+ gi|228770|prf||1811225A catalase
+          Length=507
+
+ Score =  336 bits (862),  Expect = 1e-91
+ Identities = 163/331 (49%), Positives = 216/331 (65%), Gaps = 2/331 (0%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            G+GA+G FEVT D++D+  A FL + G KT +F RFSTV   +   D AR+PRGFA KFY
+Sbjct  68   GAGAYGVFEVTDDITDVCSAKFLDTVGKKTRIFTRFSTVGGEKGSADTARDPRGFATKFY  127
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T +GN D+V  N P+FF RDPI+ P  I +Q RNP   L D N  +D L    E  H  M
+Sbjct  128  TEDGNLDLVYNNTPIFFIRDPIKFPHFIHTQKRNPATNLKDPNMFWDYLTANDESLHQVM  187
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+              FS+ GTPA ++ ++GY  HT+KW N++G++VY++ HF+A+ G      +EA R  GED
+Sbjct  188  YLFSNRGTPASYRTMNGYSGHTYKWYNSKGEWVYVQVHFIANQGVHNLLDEEAGRLAGED  247
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PD+S R+LW  IE G   SW  Y+Q M  E +  +KL F  FD+TKVWP K FPL+ FG+
+Sbjct  248  PDHSTRDLWEAIEKGDYPSWECYIQTMTLEQS--KKLPFSVFDLTKVWPHKDFPLRHFGR  305
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+             TLN+NP+N++ + EQ AFSP   VPG+E S DP+LQ R+F Y D   HR+G N HQ+PV
+Sbjct  306  FTLNENPKNYYAETEQIAFSPSHTVPGMEPSNDPVLQSRLFSYPDTHRHRLGPNYHQIPV  365
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYA  384
+            NCP  + S++ +N DG + VD N    P YA
+Sbjct  366  NCPLKSGSFNPINRDGPMCVDGNLGGTPNYA  396
+
+
+>gi|49651945|emb|CAG78888.1| unnamed protein product [Yarrowia lipolytica CLIB99]
+ gi|50557334|ref|XP_506075.1| hypothetical protein [Yarrowia lipolytica]
+          Length=492
+
+ Score =  331 bits (849),  Expect = 3e-90
+ Identities = 189/438 (43%), Positives = 251/438 (57%), Gaps = 28/438 (6%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGA+G FEVT D++DL  A FL   G KT  F RFSTV   +   D AR+PRGFA KFY
+Sbjct  60   GSGAYGEFEVTDDITDLNCADFLSKIGKKTKTFTRFSTVGGEKGSADAARDPRGFATKFY  119
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T EGN D V  N PVFF RDP + P  I +Q RNP+  L D   ++D +AN  E  H  M
+Sbjct  120  TDEGNIDWVYNNTPVFFIRDPSKFPVFIHTQKRNPETNLKDATMMWDYIANNQECCHQIM  179
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            + FSD GTPA ++ ++GY  HT+KW+  +G F Y++ H   D G K    DEA+   G +
+Sbjct  180  VLFSDRGTPANYRQMNGYSGHTYKWIKKDGSFNYVQIHMKTDQGIKNLTNDEAVALSGTN  239
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PD+++ +L+ +I++G   SWT YVQV  PE A  EKL +  FD+TKVWP  QFPL+ FGK
+Sbjct  240  PDHAQEDLFNSIKSGSFPSWTCYVQVCTPEQA--EKLKWSVFDLTKVWPHDQFPLRRFGK  297
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+            LTLNKN +N+  + EQAAFSP + VPG E S DP+LQ R+F Y D Q HR+G N  Q+PV
+Sbjct  298  LTLNKNVQNYFAETEQAAFSPSNTVPGWETSADPVLQSRLFSYPDTQRHRLGTNFAQIPV  357
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCP+ A  ++  + DGQ+ V+ N    P Y P+SF           P Q        + H
+Sbjct  358  NCPYHA--HTPYHRDGQMAVNGNSGSLPNY-PSSF----------EPLQYRQDINLHEKH  404
+
+Query  414  FFHEGKASEY-----------DQPRELYERV-MDEKARQHLHTNTARLLKLVEYPKIQAK  461
+                G+A  Y            QP EL++ +      ++HL  N A  L     P++Q K
+Sbjct  405  EKWVGEAVAYQWVVGTDGVDFQQPAELWKVLGKTPDQQEHLVYNIAVSLSGAR-PEVQDK  463
+
+Query  462  YLGQLLRISEKYARGVYD  479
+              G   ++   + + V D
+Sbjct  464  TFGMFDKVDAHFGKLVRD  481
+
+
+>gi|56541435|dbj|BAD77826.1| catalase [Candida dubliniensis]
+          Length=485
+
+ Score =  323 bits (829),  Expect = 7e-88
+ Identities = 166/390 (42%), Positives = 239/390 (61%), Gaps = 6/390 (1%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGA+G FEVT D++D+  A FL + G KT VF RFSTV       D AR+PRGFA KFY
+Sbjct  56   GSGAYGVFEVTDDITDVCAAKFLDTVGKKTRVFTRFSTVGGELGSADTARDPRGFATKFY  115
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T EGN D+V  N PVFF RDP + P  I +Q RNP+  L D N  +D L +  E  H  M
+Sbjct  116  TEEGNLDLVYNNTPVFFIRDPSKFPHFIHTQKRNPETHLKDANMFWDYLTSNEESIHQVM  175
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            + FSD GTPA ++ ++GY  HT+KW N +G++ Y++ HF++D G K    +EA    G +
+Sbjct  176  ILFSDRGTPASYREMNGYSGHTYKWSNKKGEWFYVQVHFISDQGIKTLTNEEAGALAGSN  235
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PDY++ +L++ I  G   SWTAY+Q M   +A+ ++  F  FD+TKVWP K++PL+ FGK
+Sbjct  236  PDYAQEDLFKNIAAGNYPSWTAYIQTM--TEAEAKEADFSVFDLTKVWPHKKYPLRRFGK  293
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+             TLN+NP+N+  +VEQAAFSP   VP +E S DP+LQ R+F Y D   HR+G N  Q+PV
+Sbjct  294  FTLNENPKNYFAEVEQAAFSPAHTVPYMEPSADPVLQSRLFSYADTHRHRLGTNYTQIPV  353
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCP   + ++    DG + V+ N   +P Y  +    +F+  + +   ++ +G     + 
+Sbjct  354  NCPVTGAVFNPHMRDGAMTVNGNLGSHPNYLASDKPVEFKQFSLQEDQEVWNGAA---TP  410
+
+Query  414  FFHEGKASEYDQPRELYERVMDEKARQHLH  443
+            F  +   +++ Q +EL+ +V+     Q  H
+Sbjct  411  FHWKATPADFKQAQELW-KVLKRYPNQQEH  439
+
+
+>gi|40738591|gb|EAA57781.1| hypothetical protein AN5918.2 [Aspergillus nidulans FGSC A4]
+ gi|49097190|ref|XP_410055.1| hypothetical protein AN5918.2 [Aspergillus nidulans FGSC A4]
+          Length=501
+
+ Score =  323 bits (828),  Expect = 9e-88
+ Identities = 162/336 (48%), Positives = 214/336 (63%), Gaps = 6/336 (1%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            G+GA+G FEVT D+SD+T    L+  G KT  F+RFSTV   +  PD AR+PRGFA KFY
+Sbjct  69   GAGAYGEFEVTDDISDITVIDMLKGVGKKTKTFVRFSTVGGEKGSPDSARDPRGFACKFY  128
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T EGN+D V  N PVFF RDP + P  I +Q RNPQ  L D    +D L+   E  H  M
+Sbjct  129  TEEGNWDWVFNNTPVFFLRDPSKFPMFIHTQKRNPQTNLKDATMFWDYLSTHQEAVHQVM  188
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+              FSD GTP  +++++GY  HT+KW+  +G F Y++ H   D G K F   EA R   E+
+Sbjct  189  HLFSDRGTPYSYRHMNGYSGHTYKWIKPDGTFNYVQLHLKTDQGNKTFTDAEATRLAAEN  248
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PD+  ++L+  I  G+  SWT YVQ + PE A  EK  ++ FD+TKVWP+ + PL+ FG+
+Sbjct  249  PDWHTQDLFNAIARGEYPSWTCYVQTLSPEQA--EKFRWNIFDLTKVWPQSEVPLRRFGR  306
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGV-NLHQVP  352
+             TLNKNPEN+  +VEQAAFSP  +VPG+E S DP+LQ R+F Y D   HR+G  N   +P
+Sbjct  307  FTLNKNPENYFAEVEQAAFSPSHLVPGVEPSADPVLQARLFSYPDTHRHRLGTSNYQSIP  366
+
+Query  353  VNCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSF  388
+            VNCP  A  ++  + DG + V+ NH  NP Y P++F
+Sbjct  367  VNCPLRA--FTPFHRDGAMSVNGNHGANPNY-PSTF  399
+
+
+>gi|46440608|gb|EAK99912.1| hypothetical protein CaO19.6229 [Candida albicans SC5314]
+ gi|46440519|gb|EAK99824.1| hypothetical protein CaO19.13609 [Candida albicans SC5314]
+ gi|2317282|dbj|BAA21767.1| catalase [Candida albicans]
+          Length=485
+
+ Score =  323 bits (828),  Expect = 9e-88
+ Identities = 165/390 (42%), Positives = 239/390 (61%), Gaps = 6/390 (1%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGA+G FEVT D++D+  A FL + G KT +F RFSTV       D AR+PRGFA KFY
+Sbjct  56   GSGAYGVFEVTDDITDICAAKFLDTVGKKTRIFTRFSTVGGELGSADTARDPRGFATKFY  115
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T EGN D+V  N PVFF RDP + P  I +Q RNP+  L D N  +D L +  E  H  M
+Sbjct  116  TEEGNLDLVYNNTPVFFIRDPSKFPHFIHTQKRNPETHLKDANMFWDYLTSNEESIHQVM  175
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            + FSD GTPA ++ ++GY  HT+KW N +G++ Y++ HF++D G K    +EA    G +
+Sbjct  176  VLFSDRGTPASYREMNGYSGHTYKWSNKKGEWFYVQVHFISDQGIKTLTNEEAGALAGSN  235
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PDY++ +L++ I  G   SWTAY+Q M   +A+ ++  F  FD+TKVWP K++PL+ FGK
+Sbjct  236  PDYAQEDLFKNIAAGNYPSWTAYIQTM--TEAEAKEAEFSVFDLTKVWPHKKYPLRRFGK  293
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+             TLN+NP+N+  +VEQAAFSP   VP +E S DP+LQ R+F Y D   HR+G N  Q+PV
+Sbjct  294  FTLNENPKNYFAEVEQAAFSPAHTVPYMEPSADPVLQSRLFSYADTHRHRLGTNYTQIPV  353
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCP   + ++    DG + V+ N   +P Y  +    +F+  + +   ++ +G     + 
+Sbjct  354  NCPVTGAVFNPHMRDGAMTVNGNLGSHPNYLASDKPVEFKQFSLQEDQEVWNGAA---TP  410
+
+Query  414  FFHEGKASEYDQPRELYERVMDEKARQHLH  443
+            F  +   +++ Q +EL+ +V+     Q  H
+Sbjct  411  FHWKATPADFKQAQELW-KVLKRYPNQQEH  439
+
+
+>gi|56541439|dbj|BAD77828.1| catalase [Candida tropicalis]
+          Length=485
+
+ Score =  321 bits (823),  Expect = 3e-87
+ Identities = 171/436 (39%), Positives = 254/436 (58%), Gaps = 11/436 (2%)
+
+Query  54   GSGAFGHFEVTKDVSDLTKAHFLRSPGIKTPVFIRFSTVTLGREYPDLARNPRGFAVKFY  113
+            GSGA+G FEVT D++D+  A FL + G KT +F RFSTV       D AR+PRGFA KFY
+Sbjct  56   GSGAYGVFEVTDDITDICAAKFLDTVGKKTRIFTRFSTVGGEAGSADTARDPRGFATKFY  115
+
+Query  114  TGEGNYDIVGLNFPVFFCRDPIQGPDVIRSQYRNPQNFLLDHNSLFDLLANTPEGNHAGM  173
+            T EGN D+V  N PVFF RDP + P  I +Q RN +  L D N  +D L +  E NH  M
+Sbjct  116  TEEGNLDLVYNNTPVFFIRDPSKFPHFIHTQKRNSETHLKDANMFWDYLTSNEESNHQVM  175
+
+Query  174  MFFSDHGTPAGWQNIHGYGCHTFKWVNAEGKFVYIKYHFLADHGQKQFNADEALRYGGED  233
+            + FSD GTPA ++ ++GY  HT+KW N +G++ Y++ HF+ D G K    +EA    G +
+Sbjct  176  ILFSDRGTPASYREMNGYSGHTYKWSNKKGEWHYVQVHFITDQGNKTLTNEEAGSLAGSN  235
+
+Query  234  PDYSKRELWRTIENGKELSWTAYVQVMKPEDADPEKLGFDPFDVTKVWPKKQFPLQEFGK  293
+            PDY++ +L++ +  G   SWT Y+Q M   +A  ++  F  FD+TKVWP  ++PL+ FGK
+Sbjct  236  PDYAQEDLFKNMLAGNYPSWTCYIQTM--TEAQAKEADFSVFDLTKVWPHGKYPLRRFGK  293
+
+Query  294  LTLNKNPENFHRDVEQAAFSPGSMVPGIEDSPDPLLQFRMFFYRDAQYHRIGVNLHQVPV  353
+             TLN+NP+N+  +VEQAAFSP   VP +E S DP+LQ R+F Y D   HR+G N  Q+PV
+Sbjct  294  FTLNENPKNYFAEVEQAAFSPAHTVPYMEPSADPVLQSRLFSYSDTHRHRLGTNYTQIPV  353
+
+Query  354  NCPFMASSYSSLNFDGQLRVDANHAMNPQYAPNSFVHKFRTDTAEAPYQLADGTVSRKSH  413
+            NCP   + ++    DG + V+ N   +P Y  +    +F+  + +   ++  G     + 
+Sbjct  354  NCPVTGAVFNPHMRDGAMNVNGNLGSHPNYLASDKPIEFKQFSLQEDQEVWHGAA---TP  410
+
+Query  414  FFHEGKASEYDQPRELYERVMD-EKARQHLHTNTARLLKLVEYPKIQAKYLGQLLRISEK  472
+            F  +   +++ Q +EL++ +      ++HL  N A      + P IQ K +    ++S +
+Sbjct  411  FHWKATPADFKQSQELWKVLKRYPNQQEHLAHNVAVHASAADAP-IQDKVIAYFAKVSPE  469
+
+Query  473  YA----RGVYDLLPEK  484
+             +    + + +L P K
+Sbjct  470  LSNLIKKEILELSPRK  485
+
+
+Database: All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding 
+environmental samples
+  Posted date:  Jun 8, 2005  3:44 AM
+Number of letters in database: 849940114
+Number of sequences in database:  2506223
+Lambda     K      H
+   0.321    0.138    0.437 
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Sequences: 147123
+Number of Hits to DB: 13821951
+Number of extensions: 654954
+Number of successful extensions: 1258
+Number of sequences better than 10: 15
+Number of HSP's better than 10 without gapping: 14
+Number of HSP's gapped: 1225
+Number of HSP's successfully gapped: 15
+Number of extra gapped extensions for HSPs above 10: 1206
+Length of query: 528
+Length of database: 849940114
+Length adjustment: 116
+Effective length of query: 412
+Effective length of database: 69377587
+Effective search space: 28583565844
+Effective search space used: 21552263428
+T: 11
+A: 40
+X1: 16 (7.4 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (20.4 bits)
+S2: 69 (31.2 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cds-266.fas
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cds-266.fas	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cds-266.fas	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,259 @@
+>183.m01790 |||similar to unknown protein||chr_13|chr13|183
+ATGGACGACAAAGAACTCGAAATACCGGTAGAACATTCCACGGCTTTCGGTCAGCTCGTG
+ACGGGTCCCCCGGGAGCGGGTAAATCGACCTATTGTCATGGCTTACATCAGTTCCTTACA
+GCCATCGGTAGACCAGTGCATATCATCAACCTCGATCCTGCAGTCCCAAACCCTCCGTAT
+CCATGCTCTATAAACATCACGGAACTCATCACACTCGAAAGTGTTATGGAGGAATACAAT
+CTAGGACCGAATGGGGCGATGCTTTATTGTATAGAATTCTTAGAGGCCAATTTTGACTGG
+CTAGTGGAGAGGCTGGATGAGGTCTTGGCTGAAGAGGGGGGGAATGGATATGTGGTGTTT
+GATACGCCGGGTCAAGCAGAGTTATGGACGAACCATGATAGTTTGAAGAACGTGGTCGAA
+AAGTTGGTCAAGATGGACTATAGACTAGCGGCTGTGCATCTCAGCGACGCGCACTACATA
+ACAGATGCCTCAAAATTCATCTCTGTAGTTTTGCTAGCTCTTCGGGCGATGCTGCAAATG
+GAAATGCCGCATTTGAATGTGCTCAGCAAAATAGATTTGATATCAACTTATGGAGAGCTC
+CCGTTCGACTTGAGCTATTACACAGAAGTCCAAGATCTGTCATACTTACTGGGCAGTCTG
+GATTCAGACCCTCGAACAGCAAAGTACCACAAGTTAAATAAAGCGTTGGTAGAGCTTATA
+GAAGGCTTTTCATTAGTCGGATTTCAAACCCTCGCTGTTGAGGACAAAGAATCAATGCTT
+AATATCGTCCGTCTTGTCGATAAGATGACGGGCTACATATTTATTCCGTCTGGCGACCTC
+GAAGGAACCAACGCCATCAATACCCAAGCTCTGTTTGGTAGTGCCATGTCGTCGGCGAAG
+CTTACAGGAAGAGCAGGCGGGGACGTAAGAGATGTTCAGGAGAGATGGATGGATAACAAG
+GAGGCTTGGGATGAATGGGAGAAGAAAGAATGGAAGAGAGAAGCAGAGATAAGAGCCCAG
+ATGGGCACTGGAATACCAGAAGGGATGAAAGGCGGTGAAGATGCGGAAAGTACAGGTATA
+>AAL117C location=AgChr1:complement(140329..141372)
+ATGGCGTATGGACAGATTGTGATAGGTCCACCGGGGTCTGGGAAGTCGACATACTGTAAT
+GGGTGCAGCCAGTTCTTTAATGCCATCGGCAGACACGCTCGGATCGTGAACATGGACCCT
+GCAAACGACTCGCTGCCCTACCAATGCGATGTAGACATTCGAGACTTTATTACTCTGGAG
+GAAATCATGAACGAGCAGCACCTGGGGCCCAACGGAGGGCTGGTGTATGCGTTTGAGTCG
+GTGGAGCACTCACTGTCGCTGTTTGCGCTGCAGATCAAGACGCTGGTCAAGGATGAGAAC
+GCATATCTCGTCTTTGACTGCCCCGGTCAGGTGGAGCTGTTCACGCATCACTCGGCGCTC
+TCCAAGATATTCCAGCAGCTGGTGCGCGACTTGGACCTACGAGTGTGCGTGGTGAACTTG
+ATGGACAGCATCTACATTACATCGCCGTCGCAGTATGTCTCGGTACTGCTGCTGGCGCTG
+CGCTCAATGTTGATGATGGACCTGCCCCATATTAACGTTCTCTCTAAGATCGATATGCTG
+AGCTCGTACGGCGACCTGCCGTTCCGGCTCGACTACTATACCGAGGTGCAAGACTTGGAG
+TATCTGCAACCGCATATTGAACGCGAACACAAGGGAGCCAAGGCGTTGAGGTACCGCCGA
+CTAACGGAGGCCATAGGAGAGGTGGTTTCGGACTTCAACCTGGTCGCCTTCGAGGTGCTT
+TGCGTCGATGACAAACAGAGCATGATCAACTTGCAAAGCGCAATCGACAAGGCCAATGGT
+TATATTTTTGGTGCCTCCGAAGTTGGTGGCGATACTGTGTGGGCGGAGGCAACCCGCCAG
+GGCACTGCTGCAATTGAATATGACATTCAGGACAGATGGATCGACAACAAGGACTTTTAT
+GACAAGGAGGAAGAGGCTAGGCGCAAGAAGTTACTTGAGGAGCATGAGCTTCTGGAGAAA
+GAAGTTGATGTCAACCAGGATGATGAATGGGAACGCGCAGTGAAGGAATGGGAGTCCCAG
+CACTCTGTGAACTTCGTTAAA
+>AN2438.1 hypothetical protein (53856 - 52862)
+ATGAGTGAGGATCAATTGGGTCCGAACGGCGGTGTTTTGTATGCGTTGGAAGAGCTAGAG
+GAGAACTTTGACTTCTTGGAGGAAGGGTTGAAAGAGCTCGGAGAGGACTATATTATCTTC
+GATTGTCCCGGCCAGGTAGAAATTTTCACTCACCATTCGTCCTTACGGAATATCTTCTTC
+AAGATCCAGAAGATGGGCTATAGACTAATAGTACTACACCTAATCGACTCCTACAACCTC
+ACCCTGCCATCGATGTACATCTCCTCTCTTATTCTATGCTTGCGTGCCATGCTCCAAATG
+GACCTTCCACATCTCAACGTCCTAACAAAAATCGATAATTTGTCCAATTATACTTCGCTG
+CCTTTCAACCTAGATTTCTACACCGAGGTTCAGGACCTTACATACCTCCTCCCCCACTTA
+GAGGCAGAGTCCTCCCGGCTATCGCACGAGAAGTTCGGAGCACTGAACAACGCCATCATC
+ACACTGATTGAGGAGTTTGGACTCGTGGGCTTCGAAACACTGGCTGTAGAAGATAAAAAG
+AGCATGATGAATTTGCTCCGGGCCATTGACCGCGCAAGTGGATACGTGTTTGGGCCTGCA
+GAAGGCGCAAATGACTCCGTTTGGCAAGTGGCTGTTCGGGAAGGAATGGGGTCCATGGAT
+ATCCGTGATATTCAAGAGCGTTGGATAGATGCCAAAGACGAGTACGATGAGTTGGAACGA
+CGGCAGCGAGAGGAGGAGATAAAAAATCACCAGCAAGCTGCAACCTACCAGGCAGGGAAC
+GAGGACGACGACGATGATAACGATTACGAATTCGGGCGCAGGATGCCTGTACCAGACAGT
+GGAGTGAAAGTGATGCGGAAG
+>FG05298.1 hypothetical protein (258181 - 259340)
+ATGCCTTTCGCGCAACTCGTTCTCGGTAGTCCGGGCTGCGGAAAGAGTACATACTGTGAT
+GGCATACAGCTGACCGGTCAAGTGCATCAGTTCCTAGGCGCCATCGGGCGAGCCTGTTCA
+GTCGTCAATCTCGATCCTGCCAACGATCATACCAACTACCCTGCAGCTCTCGACATTCGC
+AGTTTGATTAAGCTCGAGGAGATTATGAAAGATGATAAATTAGGACCTAATGGCGGCATC
+CTGTATGCCCTCGAAGAGTTGGAACACAATTTCGAGTGGTTGGAAGAAGGACTGAAAGAA
+TTCAGCGAAGACTATATTCTTTTCGACTGTCCGGGACAAGTGGAACTATATACACACCAC
+AACTCCTTGCGAAACATATTCTACAAGCTCCAGAAGATTGGATTCAGGCTTGTTTCCGTC
+CACCTCTCCGACTCCTTCTGCCTCACGCAACCGTCGTTATACGTATCGAACGTCCTCCTC
+TCCCTTCGTGCGATGATCCAGATGGATATGCCACACATAAATATTCTCTCCAAGATCGAC
+AAAGTTGCCGACTACGACGAACTCCCTTTCAACCTCGATTACTACACAGACGTGGACGAC
+CTTACATATTTGACACCCCATCTTGAGACAGAGTCGCCCGCTCTGAGGAGTGAGAAATTC
+GGCAAGCTCAACGAGGCGATTGCGAATCTGATCGAGAGCTACGGTCTGGTGCGCTATGAA
+GTCCTGGCTGTCGAGAACAAGAAAAGCATGATGCATATCCTCCGTGTCATTGACCGTGCT
+GGTGGATACGTCTTTGGTAGTGCTGAAGGAGCCAATGATACAGTCTGGTCAGTTGCCATG
+AGGAACGAGTCGTCCATGTTGGGGGTGCAGGACATCCAAGAGCGTTGGATCGACCAAAAG
+GTGGAATATGATCAAATGGAGCGTGAGGCCGAAGAAGAACAGGCGCGCATCCAAGAAGAA
+CAAGCCATGGAGATGGAACAATCACAGCCACCTCCTGCGCCGACAGGTGGCATGGATCCT
+GATTTTGGTGACATGACGGTGCCCAAAGATAGTGGGATCAAAGTAGTTAGAAAG
+>MG06110.4 hypothetical protein similar to (NCU09745.1) hypothetical protein (25629 - 24026)
+ATGGGATTTCTAGGCGCAATAGGGAGAGCATGTTCCGTAGTAAACCTTGACCCGGCCAAT
+GACCATACGAGCTATCCATGTGCCCTCGACATACGAAATCTTGTCACGCTGGAGGAAATC
+ATGGGAGACGACAATTTGGGGCCAAACGGTGGCATCCTCTACGCTATTGAAGAGCTGGAG
+CATAACTTTGAGTGGTTGGAAGATGGTCTGAAAGAGCTTGGGGACGACTACATACTATTC
+GACTGCCCGGGCCAGGTCGAGCTGTACACACATCACAATTCATTGCGCAATATCTTCTTC
+AAGTTACAAAAGCTCGGCTACAGACTTGTGGTTGTTCACCTCTCGGACAGCATTTGCCTC
+ACTCAACCATCGTTGTACATCTCGAATCTCCTCCTCGCTTTGCGCGCCATGCTCCAGATG
+GATCTTTCCCATGTCAATGTCCTCACCAAAATCGACAAGGTGTCTTCATATGACAGACTA
+GCCTTCAACCTCGACTTTTATACCGAGGTCCACGATCTTTCGTACCTCCTCCCCGAGCTC
+GAAGCCGAGAATCCGTCGCTACGCAGCGAAAAGTTCGCCAAGCTAAACCGAGCCGTCGCA
+AACTTGATTGAAGACTTTGGGCTCGTCCGGTTCGAAGTCTTGGCTGTCGAGAATAAGAAA
+AGTATGATGCATTTGCTCCGGGTCCTCGATCGTGCCAACGGGTACGTTTTTGGTGGGGCC
+GAGGGAGCCAACGACACCGTTTGGCAAGTAGCCATGCGCAACGAGGGCTCCCTGATGGGG
+GTCCAAGATATCCAGGAGCGCTGGATCGATAACAAAGAGGCTTATGACGAGATGGAGCAG
+CGTGAATGGGAGGAACAGGTCAAGGCACAAGAAGCCATGGCCGAAGCCGATGCAGCAGCT
+GCTGAAGAGGGCGACGATGACTTGATGGGAGGCCCAGGTGCTCGA
+>NCU09745.1 (NCU09745.1) hypothetical protein (81475 - 83184)
+ATGACCTCCCCACTGCCAGTGCAGCAGTTTATGGGCGCCATCGGGCGACAATGCTCGGTA
+GTCAACCTCGACCCTGCGAACGACCACACCAACTACCCATGCGCGCTCGACATTCGCGAC
+CTTGTCACTTTGGAGGAGATTATGGCAGACGACAAATTGGGTCCCAATGGCGGTATTCTG
+TACGCACTTGAAGAGCTGGAAAATAACATGGAATGGCTCGAGAACGGCCTCAAGGAGCTT
+GGAGAAGACTATGTGCTTTTTGACTGCCCTGGTCAAGTCGAGCTCTACACCCACCACAAC
+TCGTTACGCAACATCTTTTACCGGTTACAGAAGCTGGGCTACAGGCTGGTAGTTGTCCAC
+CTTTCCGACTGCTTCTGCCTCACACAACCATCGCTCTACATTTCCAACGTCCTCCTCTCT
+TTGCGCGCCATGTTGCAAATGGACCTTCCCCACATCAACGTCCTGACCAAGATTGACAAG
+ATCTCGTCCTACGATCCTCTTCCATTCAACCTCGACTATTACACCGAAGTACAAGACCTA
+CGGTACCTCATGCCGTCCCTCGACGCGGAATCGCCTGCCCTGAAGAAAGGCAAGTTCACC
+AAGCTTAACGAGGCCGTTGCGAACATGGTTGAGCAGTTCGGCCTTGTCAGCTTCGAGGTG
+CTGGCAGTCGAGAACAAGAAGAGTATGATGCATCTGTTGCGCGTGATTGACCGTGCAAGT
+GGGTACGTCTTTGGCGGCGCTGAGGGAACGAACGACACCGTCTGGCAGGTTGCCATGCGC
+AACGAGTCATCATTGCCCGATGCTCTTGATATTCAAGAGAGGTGGATCGATAGCAAAGAA
+GAGTATGACGAGATGGAGCGGAAGGAGGAGGAAGAACAAGAAAAACTGCGGGCGGAGCAG
+GCACGGGCCGCTGAAGAAGCAGGTCTCGGTGACGGCTCGGTCCCTGGAGTGGCGCCACAG
+TTCACCAGTGGCTCGGGAATCCGTGTGACGCTTAGCCTAGTGGCCGCTTTTACCAAATAT
+AGCGATCTT
+>SPAC144.07c SPAC144.07c conserved eukaryotic protein; ATP-binding protein; similar to S. cerevisiae YOR262W
+ATGCCATTTTGTCAAGTGGTCGTTGGACCTCCGGGTTCTGGGAAATCAACTTACTGTTTC
+GGAATGTACCAATTATTATCTGCCATAGGAAGGAGTAGTATTATCGTCAATCTTGACCCA
+GCAAATGACTTTATCAAATACCCATGCGCAATTGATATTCGTAAAGTTCTCGATGTTGAG
+ATGATCCAAAAAGACTATGATTTAGGACCAAATGGAGCACTTATTTATGCTATGGAAGCA
+ATTGAATATCACGTTGAATGGTTGCTTAAGGAGCTAAAAAAGCATCGAGATTCATATGTG
+ATATTTGATTGCCCTGGTCAAGTTGAGTTATTTACAAACCATAATTCCTTACAAAAAATA
+ATCAAAACTTTGGAAAAGGAACTGGATTATAGACCTGTGTCCGTACAACTTGTAGATGCA
+TATTGCTGCACGAATCCTTCTGCATATGTTAGTGCACTGCTTGTTTGCCTAAAGGGGATG
+CTTCAGCTGGACATGCCACATGTAAATATTTTGTCGAAGGCTGATTTGCTTTGTACGTAT
+GGAACTTTACCAATGAAACTAGATTTTTTTACCGAAGTACAAGACCTTTCATATTTGGCG
+CCTTTGCTTGATAGAGATAAACGTCTTCAGCGCTATAGTGATTTAAACAAAGCTATTTGT
+GAACTTGTTGAAGATTTTAATCTTGTTTCTTTTGAAGTTGTTGCAGTAGAAAATAAAGCC
+AGTATGTTACGTGTTCTTCGAAAAATCGATCAAGCAGGTGGATATGCATATGGATCTACA
+GAAATTGGTGGTGATGCCGTTTGGGTGAATGCCGTTCGTCAAGGTGGAGACCCTCTTCAA
+GGTATTTCGCCTCAGGAAAGATGGATTGACAAGAAAGAGGAATATGACAAATATGAATGG
+GAATTAGAGCAAAAATCGACCATGGACGAAGATGAAAATGAAGGG
+>Sbay_Contig635.43 YOR262W, Contig c635 67551-68594
+ATGCCTTTTGCTCAGATTGTTATTGGACCCCCGGGTTCAGGGAAGTCTACGTATTGTAAC
+GGATGTTCACAATTTTTTAATGCTATTGGGAGACATTCTCAGGTGGTAAATATGGATCCC
+GCCAATGATGCCTTACCTTATCCGTGTGCTGTGGATATCAGAGATTTTATAACTTTGGAA
+GAGATCATGAAAGAGCAACACTTGGGCCCTAATGGTGGTTTGATGTATGCCGTTGAATCT
+CTAGATAAGTCCATTGATTTATTTATACTACAGATCAAATCACTTGTAGAAGAAGAGAAG
+GCATATGTTGTGTTTGACTGCCCGGGACAAGTTGAGCTGTTTACGCATCATTCTTCATTA
+TTCAGCATTTTCAAGAAATTAGAAAAAGAACTAGATATGAGATTCTGTGTGGTGAATTTG
+ATTGATTGTTTTTACATGACATCTCCTTCACAATATGTCTCGATTTTGCTCCTGGCATTA
+AGGTCTATGCTGATGATGGACCTGCCCCATATCAACGTCTTTTCGAAGATAGATAAGTTG
+AAATCATATGGAGAATTGCCATTTAGATTAGATTATTATACAGAAGTTCAAGATTTGGAT
+TATTTGGAGCCGTATATTGAAAAAGAAGGTTCTGGTGCACTGGGAAAAAGATATAGCAAA
+TTGACTGAAACGATTAGTGAGCTGGTTTCTGATTTTAACCTGGTTTCCTTTGAAGTTTTG
+GCTGTGGATGACAAAGAAAGTATGATAAATCTCCAGGGTGTTATTGATAAAGCCAATGGT
+TACATATTTGGTGCATCTGAAGTGGGCGGCGACACGGTATGGGCCGAGGCCTCGAGAGAA
+GGTGCATTGCTAGCAAGCTATGATATTCAAGATAGGTGGATAGATAATAAAGAAAAATAT
+GATAAAGAAGAACAAGAGAAACGGGCTGCAATGGTGAAAGAGCAGGAACTGCAAAATAAA
+GAGGTTAATGTAGACGAAGAAGACGAGTGGGAAAATGCACTAAACGACTGGGAAGAAAAA
+CAAGGCACAGATTTTGTCAGG
+>Scas_Contig692.20 YOR262W, Contig c692 40768-41811
+ATGCCATTTGCCCAAATTGTTATCGGACCCCCCGGTTCAGGAAAATCAACATACTGTAAC
+GGGTGTTCTCAATTTTTCAACGCCATCGGCAGGCATGGCCAAATAGTGAACATGGATCCA
+GCTAATGATGCTCTACCATATCCATGTGCAGTAGACATTCGAGATTTTGTGACTCTGGAG
+GAGATTATGCAAGAGCAACAACTGGGCCCCAATGGAGGGTTGATGTATGCTGTGGAATCG
+TTAGATGAATCCATCGATCTTTTCATACTACAAATAAAATCTCTAGTTCAAGAGGAGAAG
+GCATATTTAGTCTTTGATTGTCCTGGACAAGTAGAGTTGTTTACTCATCATTCATCTCTG
+TTCAAAATCTTCAAAAAATTGGAAAAGGAACTAGATATGCGATTTTGTGTGGTGAATTTG
+ATTGATTCTTTCTATATTACCTCCCCATCACAGTATGTTTCCATTTTGCTGTTGGCTTTG
+AGATCTATGTTAATGATGGACCTACCGCAAATCAATGTTTTCTCCAAGATTGATATGCTG
+AAATCCTATGGAGAACTACCTTTTAGATTGGATTATTACACAGAAGTGCAAGATTTAGAT
+TATTTACAGCCATTTATTGAGAAGGAGAGTTCCAGTGTTTTGGGTAGAAGATATAGCAAG
+TTAACAGAAACGATTAGTGAATTGGTTTCCGATTTTAATTTGGTCTCATTTGAAGTCTTA
+GCTGTAGATGATAAACAAAGCATGATTAATTTACAAAGTGTAGTAGACAAGGCTAATGGA
+TATATATTTGGAGCATCTGAAGTAGGTGGTGATACTGTTTGGGCAGAAGCCACGCGAGAA
+GGTGCAATGATGGTAAATTATGATATACAGGACAGATGGATAGATAACAAAGAAAAGTAC
+GATGAAGAGGAGAGAAAAAGACAAGAGGAACAAGCCAAAGAGCAGAACATGCAAGAAAAG
+GAGGTAGACGTGGATAATGAGGACGAATGGGAAAAGGCATTGAAGGATTGGGAAGAAAAA
+CAAGGAACAGGCTATGTAAGG
+>Sklu_Contig2277.4 YOR262W, Contig c2277 4093-5136
+ATGCCCTTTGGTCAGATTGTTATCGGCCCTCCTGGTTCAGGAAAGTCTACCTATTGTAAT
+GGTTGCTCCCAGTTTTTTAATGCTGTCGGTAGACATGCCCAAGTAATCAACATGGATCCA
+GCAAATGATTCGTTACCTTACCCATGTGCCGTTGACATTCGAGATTTCATCACCTTAGAG
+GAAATTATGACAGAACAGCAGCTGGGGCCTAATGGTGGATTGATGTACGCCCTAGAATCT
+TTGGATAAATCAATCGACTTATTTGTTTTGCAGATCAAATCACTAGTTCAGGATGAACAT
+GCTTACGTAGTATTTGATTGTCCGGGGCAAGTGGAGCTTTTTACGCACCATTCGTCCTTG
+TTCCGCATATTCAAGAAGTTGGAAAGAGAACTAGATATGAGGTTATGCGTGGTTAATTTA
+ATCGATTGTTTTTACATCACCTCTCCTTCACAGTATGTCTCTATTCTTTTGCTAGCTTTG
+AGGTCGATGCTGATGATGGACTTACCACACATTAATGTCTTTTCTAAAATTGATTTGTTG
+AAATCCTACGGTGAGCTGCCATTCCGACTAGATTATTATACCGAAGTTCAAGAGCTAGAT
+TACTTGAAGCCACATATTGACAAGGAAGGGAGCAGCGTCCTTGGAAGGAAATATAGTAGG
+TTGACAGAAACCATTAGTGAACTGGTTTCTGACTTTAATCTGGTTTCCTTTGAAGTTTTG
+TGTGTTGATGATAAGCAGAGCATGATCAATTTGCAAAGTATTGTGGATAAAGCAAATGGT
+TACATATTTGGTGTTTCTGAGATCGGTGGAGATACGGTATGGGCAGAGGCAACGCGACAA
+GGCAGTGCAATTGCTAATTACGACATTCAAGAGAGATGGATAGATAATAAAGATATGTAC
+GACAGAGAGGAACAGGAAAAACGTGAACAGTTGCTCAAAGAAGAAGAGCTACAGAATAAA
+GAAGTAGACGTGGATAAAGGTGATGAGTGGGAAAATGCTTTAAAAGAATGGGAAGAAAAG
+CAAGGCATGAGTTATGTAAAA
+>Skud_Contig1703.7 YOR262W, Contig c1703 9292-10335 reverse complement
+ATGCCATTTGCTCAAATTGTTATCGGCCCACCAGGCTCGGGAAAGTCAACGTATTGTAAC
+GGGTGTTCGCAGTTCTTCAACGCCATTGGAAGACATTCTCAAGTGGTGAATATGGATCCC
+GCTAATGATGCTTTGCCTTATCCGTGTGCTGTAGATATTAGAGATTTTATAACTTTGGAA
+GAGGTTATGCAGGAGCAACAGTTGGGTCCTAATGGTGGTTTAATGTATGCCGTTGAATCC
+CTAGATAACTCCATTGATCTATTCATATTACAGATCAAGTCACTTGTAGAAGAAGAAAAG
+GCCTACCTTGTGTTTGACTGTCCTGGACAAGTTGAGCTATTCACGCACCATTCATCTTTA
+TTTAGCATTTTCAAGAAAATGGAGAAAGAATTGGATATGAGATTCTGTGTCGTAAACTTG
+ATTGATTGCTTTTATATGACATCTCCTTCTCAGTATGTTTCAATTTTGCTACTGGCATTA
+AGGTCCATGCTAATGATGGATTTGCCTCACATAAACGTTTTTTCCAAAATAGATATGTTA
+AAATCATATGGGGAATTACCCTTCAGATTGGATTATTATACAGAGGTCCAGGAGCTAGAT
+CATTTGGAGCCATATATTGAAAAGGAAGGCTCTAGCGTTCTAGGAAAAAAATATAGTAAG
+TTGACTGAAACGATCAAAGAATTAGTCTCCGATTTTAACTTAGTTTCTTTTGAGGTTCTG
+TCCGTGGATGACAAAGAAAGTATGATAAATCTCCAGGGTGTTATTGATAAAGCGAATGGC
+TACATATTCGGAGCATCCGAAGTTGGAGGTGATACAGTGTGGGCCGAAGCTTCGAGAGAA
+GGTGCATTGTTAGAAAACTACGACATACAGGATAGGTGGATAGATAATAAAGAAACGTAT
+GATAAAGAAGAACAAGAGAAGCGTGCATCGCTGTTAAAAGAACAAGAACTGCAGAATAAA
+ACGGTTGATGTGAAAGAAGAAGATGAATGGGAAAATGCATTAAAGGAGTGGGAAGAAAAG
+CAAGATACGGAGTTTGTCAGA
+>Smik_Contig1103.1 YOR262W, Contig c1103 447-1490 reverse complement
+ATGCCGTTTGCTCAGATTGTTATTGGCCCACCGGGTTCAGGCAAGTCCACTTATTGTAAC
+GGCTGCTCACAGTTCTTCAATGCCATTGGGAGACATTCTCAGGTGGTGAACATGGATCCC
+GCTAATGATGCTTTGCCTTATCCTTGTGCTGTGGATATCAGAGATTTTATAACGTTGGAA
+GAGATTATGCAAGAGCAACAGTTAGGCCCCAATGGTGGTTTAATGTATGCAGTCGAATCC
+TTGGATAAGTCTATTGATTTGTTTTTATTACAGATCAAATCGCTTGTAGAAGAAGAAAAA
+GCCTATCTTGTATTCGACTGTCCAGGCCAGGTCGAGTTATTTACTCATCACTCATCCTTA
+TTCAATATATTTAAGAAAATGGAGAAAGAATTGGACATGAGGTTCTGTGTAATAAACTTG
+ATTGACTGTTTTTACATGACGTCACCCTCACAATATGTCTCAATTTTACTGCTTGCACTA
+AGATCCATGTTGATGATGGATCTGCCCCACATAAATGTTTTTTCTAAGATAGATATGTTG
+AAATCATATGGAGAACTACCATTTAGACTAGATTATTATACAGAGGTACAGGATCTAGAT
+TATTTGGAACCGTATATTGAAAAAGAAGGCTCTAGTGTATTAGGAAAGAAATACAATAAG
+TTGACCGACGCAATCAAAGAGCTTGTTTCTGATTTTAACTTGGTTTCCTTTGAGGTTTTG
+TCCGTGGATGACAAAGAAAGTATGATAAATCTCCAGGGTGTGATTGATAAAGCAAATGGC
+TACATATTTGGTGCGTCTGAGGTTGGTGGTGATACAGTGTGGGCAGAGGCTTCTAGGGAA
+GGTGCTCTTTTAACAAGTTACGATATTCAAGATAGGTGGATAGATAATAAGGAAAAGTAT
+GACAAAGAAGAAGAAGAGAAACGTGTAATCTTGTTAAAAGAGCAAGAGCTGCAAAATAAA
+GCAGTTGACGTGAATGAAGACGATGAGTGGGAAAGTGCGCTCAAGGAATGGGAAGAAAAA
+CAAGGTATGGATTTTGTTAGA
+>Spar_21273 YOR262W, Contig c261 8817-9860
+ATGCCCTTTGCTCAAATTGTTATTGGCCCACCGGGTTCAGGAAAATCAACCTATTGCAAC
+GGCTGTTCACAGTTTTTCAATGCCATTGGAAGACATTCTCAGGTAGTAAATATGGACCCT
+GCTAATGATGCGTTACCTTACCCATGTGCTGTGGATATTCGAGATTTTATAACTTTGGAG
+GAGATTATGCAAGAGCAACAGTTAGGCCCCAATGGTGGTTTGGTGTATGCTGTTGAATCC
+TTGGATAAGTCCATTGACTTGTTCATATTACAAATCAAGTCGCTTGTAGAAGAAGAAAAG
+GCATATCTCGTATTTGACTGTCCCGGACAAGTGGAGTTATTTACTCATCACTCATCTTTA
+TTCAGCATTTTTAAGAAAATGGAAAAAGAATTGGACATGAGATTCTGTGTAGTAAATTTG
+ATAGACTGTTTTTACATGACTTCTCCTTCACAATACATCTCCATTTTGCTACTCGCATTA
+AGGTCTATGTTAATGATGGATCTACCCCACATTAACGTTTTTTCTAAGATAGATATGTTG
+AAATCCTACGGGGAATTACCCTTTAGATTAGATTATTATACAGAGGTTCAGGATCTAGAT
+TATTTGGAGCCATATATCGAAAAGGAAGGCTCTAGTGTACTGGGAAAGAAATATAGCAAG
+TTAACTGAGACAATCAAAGAGCTTGTTTCAGATTTCAATCTGGTTTCATTTGAGGTCCTG
+TCTGTGGATGATAAAGAAAGTATGATAAATCTTCAAGGTGTTATAGATAAAGCAAATGGC
+TACATATTCGGCGCATCTGAAGTTGGCGGTGATACAGTTTGGGCTGAGGCCTCTAGAGAA
+GGTGCATTACTAGCAAATTACGACATTCAGGACAGATGGATAGACAATAAAGAGAAGTAC
+GATAAAGAGGAAGAAGAGAAACGCGCGGCGTTGCTAAAAGAACAAGAGTTGCAAAATAAA
+GCCGTTGATGTGAATGAAGAGGATGAGTGGGAAAATGCGCTGAAGGAATGGGAAGAAAAA
+CAGGGTACGGATTTCGTTAGA
+>YOR262W YOR262W SGDID:S0005788, Chr XV from 817289-818332
+ATGCCCTTCGCTCAGATTGTTATTGGTCCACCAGGTTCAGGGAAGTCAACCTATTGCAAC
+GGCTGCTCACAGTTCTTCAATGCCATCGGAAGACATTCCCAGGTAGTGAATATGGATCCT
+GCTAATGATGCCTTACCTTACCCATGCGCTGTGGATATTCGTGATTTTATAACATTAGAG
+GAGATCATGCAAGAGCAACAGTTAGGCCCTAATGGAGGTTTGATGTATGCTGTTGAATCA
+TTGGATAATTCTATTGATTTGTTCATTTTACAGATCAAGTCACTTGTAGAAGAAGAAAAA
+GCATATCTTGTATTCGACTGTCCGGGCCAAGTGGAGCTATTTACTCATCACTCATCTTTG
+TTCAACATCTTTAAAAAAATGGAAAAGGAATTGGACATTAGGTTTTGTGTTGTAAATTTG
+ATTGACTGTTTTTACATGACATCCCCTTCACAATATATCTCGATTTTGTTACTTGCATTG
+AGGTCTATGTTAATGATGGATCTCCCTCACATCAACGTTTTTTCTAAAATAGATATGCTG
+AAATCATACGGAGAATTACCCTTTAGATTAGACTATTATACAGAGGTCCAGGATCTGGAT
+TATTTGGAGCCATATATTGAAAAGGAAGGCTCTAGTGTACTGGGAAAGAAATATAGCAAG
+TTAACTGAAACAATCAAAGAGCTAGTCTCAGATTTCAACTTAGTATCATTTGAGGTTTTG
+TCCGTGGATGACAAAGAAAGTATGATAAATCTTCAAGGTGTTATAGATAAAGCAAATGGC
+TACATATTCGGCGCATCCGAAGTTGGTGGTGATACCGTGTGGGCTGAGGCTTCGCGAGAA
+GGTGCATTAATAGCGAATTACGACATTCAAGACAGGTGGATAGACAATAAAGAGAAGTAT
+GATAAAGAAGAAGAAGAAAAACGTACGGCGTTGTTAAAAGAACAAGAATTGCAAAATAAA
+GCTGTTGATGTGAATGAAGAAGATGAGTGGGAAAATGCGCTGAAGGAGTGGGAAGAGAAA
+CAAGGAATGGATTTTGTTAGG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/chad100.scf
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/chad100.scf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/char-interleave.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/char-interleave.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/char-interleave.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=6;
+      format interleave datatype=protein missing=? gap=-;
+      charlabels one two three four five six;
+      matrix
+A     W I T
+B     W I T
+C     W I T
+D     W I T
+E     W I T
+F     W I T
+G     W I T
+H     W I T
+
+A     H - B
+B     H - A
+C     H - D
+D     H - C
+E     H - F
+F     H - E
+G     H - H
+H     H - G;
+END;
+
+BEGIN TREES;
+       tree basic_bush = (((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/char-matrix-spaces.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/char-matrix-spaces.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/char-matrix-spaces.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,25 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=6;
+      format datatype=protein missing=? gap=-;
+      charlabels one two three four five six;
+      matrix
+A     W I T  H - B
+B     W I T  H - A
+C     W I T  H - D
+D     W I T  H - C
+E     W I T  H - F
+F     W I T  H - E
+G     W I T  H - H
+H     W I T  H - G;
+END;
+
+BEGIN TREES;
+       tree basic_bush = (((A:1,B:1):1,(C:1,D:1):1):1,((E:1,F:1):1,(G:1,H:1):1):1);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,190 @@
+CODONML (in paml 3.12 February 2002)    abglobin.nuc   Model: several dN/dS ratios for branches 
+Codon frequencies: F3x4
+
+ns = 5  	ls = 285
+# site patterns = 223
+    9    2    1    1    1    4    3    1    1    1    2    1    1    3    1
+    7    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    5    1    1    1    4    2    2    1    6    1    1    1    1
+    1    3    1    1    3    1    1    1    2    3    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    3    1    1
+    1    1    6    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    2    1    1    1    1    1    1    1    1    1    1    1    1
+    3    1    1    2    1    1    1    1    1    1    1    1    1    1    1
+    3    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    2    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    2    1    1    1    1    1    2    1    1    1
+    1    2    1    1    1    1    1    1    1    1    1    1    1    2    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1
+
+1       
+human                 GTG CTG TCT CCT GCC GAC AAG ACC AAC GTC AAG GCC GCC TGG GGC AAG GTT GGC GCG CAC GCT GGC GAG TAT GGT GCG GAG GCC CTG GAG AGG ATG TTC CTG TCC CCC ACC ACC TAC CCG CAC TTC GAC CTG AGC CAC GGC TCT GCC CAG GTT AAG GGC CAC GGC AAG GTG GCC GAC GCG CTG ACC AAC GCC GTG GCG GTG GAC ATG CCC AAC GCG CTG TCC GCC CTG AGC CTG CAC GCG CTT CGG GAC CCG GTC AAC TTC CTC CTA AGC TGC CTG CTG GCC GCC CTC CCC GCC GAG TTC ACC CCT GCG GTG CAC GCC TCC CTG GCT TCT AGC ACC TCC AAA TAC CGT CTG ACT CCT GAG GAG TCT GCC GTT ACT GCC CTG GGC AAC GTG GAT GAA GTT GGT GGT CTG AGG CTG GTG GTC CCT ACC CAG TTC TTT GAG TCC TTT GGG GAT CTG TCC ACT CCT GAT GCT GTT ATG GGC AAC CCT GTG AAG GCT CAT AAG AAA CTC GGT GCC TTT AGT GAT GGC CTG GCT CAC CTG GAC AAC CTC AAG TTT GCC ACA CTG AGT GAG TGT CAC GAT CCT AAC AGG CTC GGC AAC GTG CTG GTC TGT GTG GCC CAT CAC TTT AAA GAA TTC ACC CCA CCA GTG GCT GCC TAT GTG GGT AAT GCC AAG TAT CAC 
+goat-cow              ... ... ... G.C ... ... ... T.. ..T ... ... ... ... ... ... ... ... ... .GC A.. ... ..A .CT ... ..C ..A ... ..T ... ... ... ... ... ... AG. ... ... ... ... ..C ... ... ... ... ... ... ... ..G ... ... ..C ... ... ... ... G.. ... ... .C. ... ... ... ..A ..G ... .GC C.. ... C.. ... GGT A.T ... ..T .AT ... ..T ... ... ..C ..G ..T ... ... ... ... ..T ..T ..G ... .C. ... ... ... TG. ... ... AAT ..T ... ... ..C ... ..C ... ... ... T.. ..C AAC ... ... ... ... ... ... ... ... G.. ... ... G.. ... ..C ..C ... T.T ... ..A ... ... ... ... ... ... ... ... ... ..T ... ..C ..T ... ... ... ... ... ... ... ..C T.. ... ... G.. ... ... ... ... AA. ... ... ... ... ..C ... ... ..G ..A .A. T.. ... ... A.. ... A.. AAG ..T ..C ..T G.. ... ... ... ..T G.G ... ... ... ... ..T ... ... ... .A. ... ... ... ... ..A ..G GT. ... ..T .GC A.T ... ..G ... ... ... ..G GTG C.. ... .A. .T. ... ... ... ... .GA ... ..T 
+rabbit                ... ... ... ..C ..T ... ... ... ... A.. ... A.T ... ... .AA ... A.C ... AGC ... .G. ... ... ... ..C ..C ... ... G.. ... ... ... ... T.. GG. ... ... ... ... ..C ... ... ... T.C .C. ... ... ... .AG ... A.C ..A .C. ... ... ... ... T.. ..A ..C ... ... ..G ... ... .GC C.. ... C.. ... GG. ..C ... ..T A.T ..C ... ... ... ... ..G ... ... ... ..G ..T ... ... ..G TC. ... ... ... ... AA. .A. ... AGT ..A ... ... ... ... ... ..T ... ... ... ..C AAC ... ... ... ... ..T ... ... T.C AG. ... ... ... ..G ..C ... ... ... ... ..T ... ..A ... ... ... ... ... ... ... ..T ... ..A ... ... ... ..C ... ... ... ... ..C ... ... T.. G.A A.. ... ... ... AA. ..T ... ... ... ... ... ... ..G ..G .C. ... ..C ... ..G ..T ... AG. ... ... ... ... ... ..A ... ..T .AG ... ... ..A ... ... ... ... ... ... ... ... ... ... ... ..T AT. ... T.T ... ..T ... ... ... ... ..T ..T .AG ... ... ... ... ... ... ... ..T ..A ..C ... 
+rat                   ... ..C ... G.A .AT ... ..A ... ... A.. ... AA. TG. ... ..G ... A.. ..T .GC ..T .G. ..T ..A ... ..C .A. ... ... ..A C.. ... ... ... GCT G.. ... ... ... ... T.T ... A.T ..T G.A ... .C. ... ... ... ... ..C ... .CT ... ... ... ..T ..T ..T ..C T.. G.. ..A ..T .CA .AC ..C ..A C.. ..T GGT ..C ... ... A.T ... ... ... ..T ..C ..G ..T ..T ..T ... ... ... T.. ..G ... ... ... T.. ..T TG. .A. ..T .GA ..T ... ..A ..C ..C A.. ... ... ..T ..T ..C ... ... ..T ... ..G ... ... ..A ... GA. .CT ... G.. ..T ... .A. ... ... ..A ... CCT ... ..T ... ... ..C ... ... ... ..T ... ... ... ... .A. ... ..T AG. ... ... ..C ... ... T.. G.C TC. ... A.C ... ..T ... ... ... ... ..C ... ... ..G A.A AAC ... ..C .A. ... ... ... AAA ... T.. ... ... ... ... ... ..T CAT ... ... ..A ... ..T ... ... ... ... ... ... ..T A.. A.T ..G AT. ... .G. ..C ... C.G ..G ... ... ... ..C TGT .CA ... ... .TC ... ..A .G. ..T ... ..C ... 
+marsupial             ... ..C ..G GA. ..T ... ... ..T C.. ..G ..A ... AT. ... ..T ... ..G ..A .GC ... ..C ..T .CC ..C .CA ..T ..A ..T ..T .CC ..A .CC ... ..C ... ... ..T ... ... ..C ... ... ... ... TC. .C. ... ..C ... ... A.C C.. ..T ..T ..T ... ..A ... ..T ..C ..T T.. C.G ..T ..T ..C C.. ... C.. ... GGA A.C A.. ... AAA ..A ... ... ... ..C ..G A.A ..T ..C ..G ... ... ... ..C TCT ... A.C ... ... ... ..G AG. AAG ..T ..G ..T ..C .AA ... ... ... ... T.T ..C ... GCT ... ..G ..G ... ... T.. ... T.. ... ... AAC TG. A.C ... A.. A.C TCT C.G ..T ..C C.G AC. ... ... ..T ... ..C ..T ... ..C ... ACC ..T ... .G. AG. ... ..T ... ... ... T.. ... .GC ... ..C ... TCA ..T T.. ..T C.A ..C ... GCT ..G T.G ACC T.. ..C G.. ..A .CA G.C AAG ..T T.. ... ... ..G ... .A. ... .AG T.. ... ... ... ..T ..C ... ... .A. A.G ..G ..T A.C A.T ..G ATC TGC ..T G.G ... ... ..G ..T ..T ..T ..T GA. TGT .T. ..T .GG C.C ..A C.. ... ... ..C ... 
+
+Codon usage in sequences
+--------------------------------------------------------------------------------------------------
+Phe TTT  5  8  3  3  6 | Ser TCT  4  2  6  7  6 | Tyr TAT  3  2  3  1  1 | Cys TGT  2  1  1  2  2
+    TTC 10  9 13 11  8 |     TCC  6  7  7  3  8 |     TAC  3  3  3  5  5 |     TGC  1  1  1  3  3
+Leu TTA  0  0  0  0  0 |     TCA  0  0  0  0  1 | *** TAA  0  0  0  0  0 | *** TGA  0  0  0  0  0
+    TTG  0  2  1  4  5 |     TCG  0  1  0  0  2 |     TAG  0  0  0  0  0 | Trp TGG  3  3  3  3  4
+--------------------------------------------------------------------------------------------------
+Leu CTT  1  1  0  1  3 | Pro CCT  7  2  4  7  3 | His CAT  2  4  4  5  6 | Arg CGT  1  2  1  2  1
+    CTC  5  4  4  4  7 |     CCC  3  6  5  4  7 |     CAC 16 11 15 14 12 |     CGC  0  1  0  0  0
+    CTA  1  2  0  2  1 |     CCA  2  0  1  0  0 | Gln CAA  0  0  0  0  1 |     CGA  0  0  0  0  0
+    CTG 29 28 30 21 15 |     CCG  2  2  1  0  0 |     CAG  4  4  5  5  7 |     CGG  1  0  1  0  0
+--------------------------------------------------------------------------------------------------
+Ile ATT  0  0  1  4  1 | Thr ACT  3  4  4  3 10 | Asn AAT  1  5  5  3  2 | Ser AGT  2  3  5  2  1
+    ATC  0  0  3  2  7 |     ACC 12 11 12  9  9 |     AAC  9  7  7  8  4 |     AGC  4  4  3  5  3
+    ATA  0  0  0  1  0 |     ACA  1  0  0  1  0 | Lys AAA  4  3  5  5  3 | Arg AGA  0  1  0  0  2
+Met ATG  3  3  2  4  5 |     ACG  0  0  0  0  0 |     AAG 18 21 19 19 21 |     AGG  4  3  4  4  2
+--------------------------------------------------------------------------------------------------
+Val GTT  5  5  4  4  6 | Ala GCT  8 11  8 13 10 | Asp GAT  5  8  1 11  6 | Gly GGT  5  4  5  6 10
+    GTC  4  6  2  4  3 |     GCC 21 18 16 18 19 |     GAC 10 10 10  8 10 |     GGC 14 15 14 12  5
+    GTA  0  0  0  1  1 |     GCA  0  1  1  3  2 | Glu GAA  2  2  7  4  4 |     GGA  0  1  0  3  3
+    GTG 21 19 21 14 14 |     GCG  7  4  3  0  0 |     GAG 10  9 10  5  6 |     GGG  1  1  1  2  2
+--------------------------------------------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: human          
+position  1:    T:0.12982    C:0.25965    A:0.21404    G:0.39649
+position  2:    T:0.29474    C:0.26667    A:0.30526    G:0.13333
+position  3:    T:0.18947    C:0.41404    A:0.03509    G:0.36140
+
+#2: goat-cow       
+position  1:    T:0.13684    C:0.23509    A:0.22807    G:0.40000
+position  2:    T:0.30526    C:0.24211    A:0.31228    G:0.14035
+position  3:    T:0.21754    C:0.39649    A:0.03509    G:0.35088
+
+#3: rabbit         
+position  1:    T:0.14386    C:0.24912    A:0.24561    G:0.36140
+position  2:    T:0.29474    C:0.23860    A:0.32982    G:0.13684
+position  3:    T:0.19298    C:0.40351    A:0.04912    G:0.35439
+
+#4: rat            
+position  1:    T:0.14737    C:0.22807    A:0.24561    G:0.37895
+position  2:    T:0.28070    C:0.23860    A:0.32632    G:0.15439
+position  3:    T:0.25965    C:0.38596    A:0.07018    G:0.28421
+
+#5: marsupial      
+position  1:    T:0.17895    C:0.22105    A:0.24561    G:0.35439
+position  2:    T:0.28772    C:0.27018    A:0.30877    G:0.13333
+position  3:    T:0.25965    C:0.38596    A:0.06316    G:0.29123
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT      25 | Ser S TCT      25 | Tyr Y TAT      10 | Cys C TGT       8
+      TTC      51 |       TCC      31 |       TAC      19 |       TGC       9
+Leu L TTA       0 |       TCA       1 | *** * TAA       0 | *** * TGA       0
+      TTG      12 |       TCG       3 |       TAG       0 | Trp W TGG      16
+------------------------------------------------------------------------------
+Leu L CTT       6 | Pro P CCT      23 | His H CAT      21 | Arg R CGT       7
+      CTC      24 |       CCC      25 |       CAC      68 |       CGC       1
+      CTA       6 |       CCA       3 | Gln Q CAA       1 |       CGA       0
+      CTG     123 |       CCG       5 |       CAG      25 |       CGG       2
+------------------------------------------------------------------------------
+Ile I ATT       6 | Thr T ACT      24 | Asn N AAT      16 | Ser S AGT      13
+      ATC      12 |       ACC      53 |       AAC      35 |       AGC      19
+      ATA       1 |       ACA       2 | Lys K AAA      20 | Arg R AGA       3
+Met M ATG      17 |       ACG       0 |       AAG      98 |       AGG      17
+------------------------------------------------------------------------------
+Val V GTT      24 | Ala A GCT      50 | Asp D GAT      31 | Gly G GGT      30
+      GTC      19 |       GCC      92 |       GAC      48 |       GGC      60
+      GTA       2 |       GCA       7 | Glu E GAA      19 |       GGA       7
+      GTG      89 |       GCG      14 |       GAG      40 |       GGG       7
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.14737    C:0.23860    A:0.23579    G:0.37825
+position  2:    T:0.29263    C:0.25123    A:0.31649    G:0.13965
+position  3:    T:0.22386    C:0.39719    A:0.05053    G:0.32842
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+human               
+goat-cow             0.2507 (0.0863 0.3443)
+rabbit               0.2627 (0.0867 0.3301) 0.2943 (0.1054 0.3581)
+rat                  0.2045 (0.1261 0.6164) 0.2462 (0.1493 0.6065) 0.2178 (0.1348 0.6187)
+marsupial            0.1902 (0.1931 1.0148) 0.1891 (0.1910 1.0099) 0.2184 (0.2111 0.9668) 0.2716 (0.2404 0.8852)
+
+pairwise comparison, codon frequencies: F3x4.
+
+
+2 (goat-cow) ... 1 (human)
+lnL =-1508.607268
+  0.47825  2.29137  0.19479
+
+t= 0.4783  S=   186.0  N=   669.0  dN/dS= 0.1948  dN= 0.0839  dS= 0.4309
+
+
+3 (rabbit) ... 1 (human)
+lnL =-1512.583367
+  0.46755  2.19039  0.19819
+
+t= 0.4676  S=   179.9  N=   675.1  dN/dS= 0.1982  dN= 0.0842  dS= 0.4247
+
+
+3 (rabbit) ... 2 (goat-cow)
+lnL =-1557.337680
+  0.53837  2.26427  0.22670
+
+t= 0.5384  S=   183.5  N=   671.5  dN/dS= 0.2267  dN= 0.1036  dS= 0.4570
+
+
+4 (rat) ... 1 (human)
+lnL =-1649.727994
+  0.82576  1.78920  0.15108
+
+t= 0.8258  S=   190.2  N=   664.8  dN/dS= 0.1511  dN= 0.1223  dS= 0.8097
+
+
+4 (rat) ... 2 (goat-cow)
+lnL =-1677.101606
+  0.88091  2.40576  0.18757
+
+t= 0.8809  S=   200.2  N=   654.8  dN/dS= 0.1876  dN= 0.1458  dS= 0.7773
+
+
+4 (rat) ... 3 (rabbit)
+lnL =-1666.440696
+  0.85281  2.21652  0.16114
+
+t= 0.8528  S=   193.2  N=   661.8  dN/dS= 0.1611  dN= 0.1306  dS= 0.8105
+
+
+5 (marsupial) ... 1 (human)
+lnL =-1769.079306
+  2.29076  0.98664  0.05689
+
+t= 2.2908  S=   176.2  N=   678.8  dN/dS= 0.0569  dN= 0.1729  dS= 3.0396
+
+
+5 (marsupial) ... 2 (goat-cow)
+lnL =-1774.766235
+  1.80490  1.19637  0.08052
+
+t= 1.8049  S=   180.8  N=   674.2  dN/dS= 0.0805  dN= 0.1762  dS= 2.1879
+
+
+5 (marsupial) ... 3 (rabbit)
+lnL =-1794.595175
+  2.09985  1.06589  0.06930
+
+t= 2.0998  S=   173.1  N=   681.9  dN/dS= 0.0693  dN= 0.1882  dS= 2.7162
+
+
+5 (marsupial) ... 4 (rat)
+lnL =-1842.638722
+  1.66307  1.02118  0.12318
+
+t= 1.6631  S=   180.6  N=   674.4  dN/dS= 0.1232  dN= 0.2214  dS= 1.7973

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml315.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml315.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml315.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,132 @@
+CODONML (in paml 3.15, November 2005)    /tmp/I7ZhE4PgvE/V8dSv7iz0l   Model: One dN/dS ratio 
+Codon frequencies: F3x4
+ns =   4  ls = 573
+
+Codon usage in sequences
+--------------------------------------------------------------------------------------
+Phe TTT  4  4  5  3 | Ser TCT 16 15 11 15 | Tyr TAT 11 11 14 14 | Cys TGT  2  1  2  1
+    TTC  7  7  8  8 |     TCC 12  8 16  8 |     TAC 14 14 10 11 |     TGC  1  2  1  2
+Leu TTA  1  1  1  2 |     TCA  5  7  3  6 | *** TAA  0  0  0  0 | *** TGA  0  0  0  0
+    TTG  5  5  8  7 |     TCG  5  5  8  6 |     TAG  0  0  0  0 | Trp TGG  6  6  6  6
+--------------------------------------------------------------------------------------
+Leu CTT  7  7 10  8 | Pro CCT 22 21 18 17 | His CAT  5  3  3  5 | Arg CGT  3  2  2  2
+    CTC 14 15 13 17 |     CCC 11 13 12 16 |     CAC  4  3  5  5 |     CGC  4  5  5  4
+    CTA  1  1  0  2 |     CCA 12 13 14 11 | Gln CAA 38 38 40 38 |     CGA  4  3  4  2
+    CTG  7  7  4  4 |     CCG 14 13 12 13 |     CAG 33 34 28 31 |     CGG  2  3  4  5
+--------------------------------------------------------------------------------------
+Ile ATT  3  4  4  3 | Thr ACT  6  8  4  5 | Asn AAT  9 10  9  6 | Ser AGT  3  3  5  3
+    ATC 11 11  9 10 |     ACC  5  4  7  7 |     AAC 14 13 12 15 |     AGC  4  4  5  6
+    ATA  1  2  2  2 |     ACA  4  3  9  8 | Lys AAA  6  7  5  4 | Arg AGA  6  7  7  7
+Met ATG  9  9 11  8 |     ACG 10  9  5  5 |     AAG 16 15 17 19 |     AGG  7  7  4  5
+--------------------------------------------------------------------------------------
+Val GTT  3  4  5  5 | Ala GCT 16 16 16 18 | Asp GAT 14 14 15 18 | Gly GGT 17 16 17 18
+    GTC  8  8  8  8 |     GCC 19 18 15 18 |     GAC 13 15 13 10 |     GGC  8  9 10  9
+    GTA  7  6  7  6 |     GCA 16 17 11 14 | Glu GAA 16 17 19 15 |     GGA  7  7  4  6
+    GTG  5  7  6  7 |     GCG 10  8 18 10 |     GAG 24 22 21 23 |     GGG  6  6  6  6
+--------------------------------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: R265           
+position  1:    T:0.15532    C:0.31588    A:0.19895    G:0.32984
+position  2:    T:0.16230    C:0.31937    A:0.37871    G:0.13962
+position  3:    T:0.24607    C:0.26003    A:0.21640    G:0.27749
+
+#2: WM276          
+position  1:    T:0.15009    C:0.31588    A:0.20244    G:0.33159
+position  2:    T:0.17103    C:0.31065    A:0.37696    G:0.14136
+position  3:    T:0.24258    C:0.26003    A:0.22513    G:0.27225
+
+#3: H99            
+position  1:    T:0.16230    C:0.30366    A:0.20070    G:0.33333
+position  2:    T:0.17627    C:0.31239    A:0.36824    G:0.14311
+position  3:    T:0.24433    C:0.26003    A:0.21990    G:0.27574
+
+#4: JEC21          
+position  1:    T:0.15532    C:0.31414    A:0.19721    G:0.33333
+position  2:    T:0.17452    C:0.30890    A:0.37347    G:0.14311
+position  3:    T:0.24607    C:0.26876    A:0.21466    G:0.27051
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT      16 | Ser S TCT      57 | Tyr Y TAT      50 | Cys C TGT       6
+      TTC      30 |       TCC      44 |       TAC      49 |       TGC       6
+Leu L TTA       5 |       TCA      21 | *** * TAA       0 | *** * TGA       0
+      TTG      25 |       TCG      24 |       TAG       0 | Trp W TGG      24
+------------------------------------------------------------------------------
+Leu L CTT      32 | Pro P CCT      78 | His H CAT      16 | Arg R CGT       9
+      CTC      59 |       CCC      52 |       CAC      17 |       CGC      18
+      CTA       4 |       CCA      50 | Gln Q CAA     154 |       CGA      13
+      CTG      22 |       CCG      52 |       CAG     126 |       CGG      14
+------------------------------------------------------------------------------
+Ile I ATT      14 | Thr T ACT      23 | Asn N AAT      34 | Ser S AGT      14
+      ATC      41 |       ACC      23 |       AAC      54 |       AGC      19
+      ATA       7 |       ACA      24 | Lys K AAA      22 | Arg R AGA      27
+Met M ATG      37 |       ACG      29 |       AAG      67 |       AGG      23
+------------------------------------------------------------------------------
+Val V GTT      17 | Ala A GCT      66 | Asp D GAT      61 | Gly G GGT      68
+      GTC      32 |       GCC      70 |       GAC      51 |       GGC      36
+      GTA      26 |       GCA      58 | Glu E GAA      67 |       GGA      24
+      GTG      25 |       GCG      46 |       GAG      90 |       GGG      24
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.15576    C:0.31239    A:0.19983    G:0.33202
+position  2:    T:0.17103    C:0.31283    A:0.37435    G:0.14180
+position  3:    T:0.24476    C:0.26222    A:0.21902    G:0.27400
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+R265                
+WM276                0.2264 (0.0186 0.0821)
+H99                  0.1481 (0.0586 0.3959) 0.1481 (0.0611 0.4126)
+JEC21                0.1112 (0.0421 0.3787) 0.1173 (0.0466 0.3970) 0.1419 (0.0380 0.2679)
+
+pairwise comparison, codon frequencies: F3x4.
+
+
+2 (WM276) ... 1 (R265)
+lnL =-2569.912690
+  0.10404  3.29148  0.32693
+
+t= 0.1040  S=   541.4  N=  1177.6  dN/dS= 0.3269  dN= 0.0210  dS= 0.0644
+
+
+3 (H99) ... 1 (R265)
+lnL =-3019.970151
+  0.43604  3.31017  0.20554
+
+t= 0.4360  S=   540.2  N=  1178.8  dN/dS= 0.2055  dN= 0.0656  dS= 0.3193
+
+
+3 (H99) ... 2 (WM276)
+lnL =-3036.174074
+  0.45547  3.63495  0.20989
+
+t= 0.4555  S=   547.4  N=  1171.6  dN/dS= 0.2099  dN= 0.0691  dS= 0.3290
+
+
+4 (JEC21) ... 1 (R265)
+lnL =-2937.357570
+  0.38462  2.95077  0.15134
+
+t= 0.3846  S=   530.2  N=  1188.8  dN/dS= 0.1513  dN= 0.0470  dS= 0.3104
+
+
+4 (JEC21) ... 2 (WM276)
+lnL =-2966.384266
+  0.41002  3.25414  0.16413
+
+t= 0.4100  S=   538.1  N=  1180.9  dN/dS= 0.1641  dN= 0.0527  dS= 0.3210
+
+
+4 (JEC21) ... 3 (H99)
+lnL =-2844.283241
+  0.29120  4.67661  0.21504
+
+t= 0.2912  S=   566.5  N=  1152.5  dN/dS= 0.2150  dN= 0.0441  dS= 0.2049

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dN
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dN	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dN	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+     7
+Hsa_Human            
+Hla_gibbon             0.0133
+Cgu/Can_colobus        0.0742  0.0742
+Pne_langur             0.0725  0.0797  0.0267
+Mmu_rhesus             0.0562  0.0561  0.0473  0.0508
+Ssc_squirrelM          0.0633  0.0633  0.0775  0.0959  0.0559
+Cja_marmoset           0.0634  0.0633  0.0704  0.0886  0.0490  0.0099

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dS
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dS	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.dS	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+     7
+Hsa_Human            
+Hla_gibbon             0.0478
+Cgu/Can_colobus        0.0670  0.0671
+Pne_langur             0.0605  0.0863  0.0484
+Mmu_rhesus             0.0300  0.0550  0.0364  0.0364
+Ssc_squirrelM          0.1346  0.1349  0.1502  0.1645  0.1230
+Cja_marmoset           0.1341  0.1069  0.1496  0.1638  0.1225  0.0619

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/2NG.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+     7
+Hsa_Human            
+Hla_gibbon             0.0628
+Cgu/Can_colobus        0.2179  0.2179
+Pne_langur             0.2095  0.2435  0.0944
+Mmu_rhesus             0.1514  0.1676  0.1348  0.1430
+Ssc_squirrelM          0.2366  0.2366  0.2798  0.3324  0.2109
+Cja_marmoset           0.2366  0.2186  0.2628  0.3150  0.1944  0.0633

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/4fold.nuc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/4fold.nuc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/4fold.nuc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+     7      38
+
+Hsa_Human
+CTGACAAACATAATGCCAAATCCTGTAATCACAGATAG
+
+Hla_gibbon
+CTGACAAACATAATGCCAAATCCTGTAATCACAGATAA
+
+Cgu/Can_colobus
+CTGACAAACATAATGCCAAATCCTGTAATCACAGATAA
+
+Pne_langur
+CTGACAAACATAATGCCAAATCCTGTAATCACAGATAG
+
+Mmu_rhesus
+CTGACAAACATAATGCCAAATCCTGTAATCACAGATAG
+
+Ssc_squirrelM
+CTGACAAACATAATGCCAAATCCTGTGATCACAGATAA
+
+Cja_marmoset
+CTGACAAACATAATGCCAAATCCTGTGATCACAGATAA
+
+
+codons included
+  9 11 12 16 19 22 25 26 32 40 42 43 48 52 55 68 70 71 72 73 74 76 80 83 85 92 93 96 99 100 103 105 108 110 111 127 129 130

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lnf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lnf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lnf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,170 @@
+     2    130     81
+
+
+ 1
+
+     1      2    -4.4631936441       1.4983  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) 
+     2      1    -9.5051356295       0.0097  GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) 
+     3      1    -9.7022173688       0.0079  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) 
+     4      1    -4.2039253720       1.9418  GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) 
+     5      2    -4.5556175232       1.3660  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) 
+     6      7    -4.3823910150       1.6244  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) 
+     7      2    -4.4321334372       1.5456  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) 
+     8      3    -5.0765672523       0.8114  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) 
+     9      3    -4.7615724447       1.1118  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) 
+    10      1    -4.3184529418       1.7317  AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) 
+    11      2    -4.5084067380       1.4321  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) 
+    12      2    -5.4399561134       0.5642  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) 
+    13      2    -4.2372969984       1.8781  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) 
+    14      1   -10.9655217803       0.0022  AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) 
+    15      1   -16.0576876241       0.0000  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) 
+    16      5    -4.3016142564       1.7611  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) 
+    17      1   -18.1074372042       0.0000  ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) 
+    18      4    -4.1086751238       2.1358  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) 
+    19      3    -4.4725258929       1.4844  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) 
+    20      2    -4.5810698348       1.3317  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) 
+    21      1    -8.2006772519       0.0357  AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) 
+    22      1    -8.6032910323       0.0239  ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) 
+    23      3    -4.4856422368       1.4651  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) 
+    24      1    -5.2057180378       0.7131  CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) 
+    25      5    -4.6403385123       1.2551  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) 
+    26      4    -4.4201645295       1.5642  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) 
+    27      5    -4.5973658698       1.3102  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) 
+    28      1    -9.7392781217       0.0077  ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) 
+    29      1    -4.2139700025       1.9224  AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) 
+    30      1   -19.3633273590       0.0000  GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) 
+    31      1    -9.2797480927       0.0121  TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) 
+    32      2    -4.6757503564       1.2114  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) 
+    33      1   -25.8812469730       0.0000  CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) 
+    34      3    -4.4744717992       1.4815  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) 
+    35      2    -4.1412516220       2.0674  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) 
+    36      1   -11.8290710328       0.0009  GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) 
+    37      1    -8.0038078071       0.0434  GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) 
+    38      1   -19.2209541782       0.0000  AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) 
+    39      1    -4.2784689093       1.8023  TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) 
+    40      1    -4.5413001563       1.3857  GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) 
+    41      1    -4.5300265454       1.4015  ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) 
+    42      1    -4.6476460878       1.2459  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) 
+    43      2    -4.9562014725       0.9152  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) 
+    44      2    -4.6725375419       1.2153  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) 
+    45      1   -13.9526908087       0.0001  CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) 
+    46      1    -7.7072272546       0.0584  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) 
+    47      1    -8.6619538016       0.0225  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) 
+    48      1    -7.6386523715       0.0626  GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) 
+    49      1    -7.6180808245       0.0639  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) 
+    50      1    -4.7949680407       1.0753  ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) 
+    51      2    -5.3234573070       0.6339  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) 
+    52      2    -4.3494722978       1.6788  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) 
+    53      1    -7.9306430266       0.0467  AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) 
+    54      1    -4.6439287862       1.2506  CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) 
+    55      1   -10.8202341235       0.0026  TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) 
+    56      1    -5.0984488784       0.7938  TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) 
+    57      1    -4.6812000239       1.2048  TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) 
+    58      1   -12.7247989627       0.0004  AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) 
+    59      3    -4.7045988601       1.1770  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) 
+    60      1   -11.0532635455       0.0021  GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) 
+    61      1    -7.7553120801       0.0557  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) 
+    62      1   -12.8896050389       0.0003  GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) 
+    63      1   -15.5069596712       0.0000  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) 
+    64      1    -8.1360516790       0.0381  GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) 
+    65      1   -13.5299247810       0.0002  GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) 
+    66      1    -4.6436582159       1.2509  GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) 
+    67      1   -21.1344087505       0.0000  CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) 
+    68      1    -8.6870212211       0.0219  ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) 
+    69      1   -16.1125630985       0.0000  AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) 
+    70      1    -4.7433920156       1.1322  GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) 
+    71      1   -11.8747575648       0.0009  AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) 
+    72      1   -18.5148919180       0.0000  AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) 
+    73      1   -13.9739064280       0.0001  CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) 
+    74      1    -8.4292988369       0.0284  AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) 
+    75      1   -12.0273836865       0.0008  GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) 
+    76      1   -10.1811649098       0.0049  CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) 
+    77      1    -8.5983529672       0.0240  TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) 
+    78      1    -9.0044467619       0.0160  GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) 
+    79      1   -13.1118009094       0.0003  CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) 
+    80      1    -4.1980256429       1.9533  GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) 
+    81      1   -16.7711131668       0.0000  GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) 
+
+ 2
+
+     1      2    -4.4631960839       1.4983  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) 
+     2      1    -9.5051314395       0.0097  GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) 
+     3      1    -9.7022223614       0.0079  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) 
+     4      1    -4.2039287045       1.9418  GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) 
+     5      2    -4.5556198392       1.3660  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) 
+     6      7    -4.3823933327       1.6244  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) 
+     7      2    -4.4321358567       1.5456  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) 
+     8      3    -5.0765675735       0.8114  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) 
+     9      3    -4.7615736591       1.1118  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) 
+    10      1    -4.3184560927       1.7317  AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) 
+    11      2    -4.5084089014       1.4321  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) 
+    12      2    -5.4399559761       0.5642  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) 
+    13      2    -4.2373003501       1.8781  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) 
+    14      1   -10.9655163954       0.0022  AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) 
+    15      1   -16.0576801224       0.0000  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) 
+    16      5    -4.3016164401       1.7611  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) 
+    17      1   -18.1074279846       0.0000  ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) 
+    18      4    -4.1086794471       2.1358  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) 
+    19      3    -4.4725278080       1.4844  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) 
+    20      2    -4.5810712480       1.3317  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) 
+    21      1    -8.2006676359       0.0357  AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) 
+    22      1    -8.6032803791       0.0239  ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) 
+    23      3    -4.4856451080       1.4650  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) 
+    24      1    -5.2057180047       0.7131  CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) 
+    25      5    -4.6403399094       1.2551  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) 
+    26      4    -4.4201673653       1.5642  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) 
+    27      5    -4.5973665349       1.3102  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) 
+    28      1    -9.7392749110       0.0077  ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) 
+    29      1    -4.2139743758       1.9224  AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) 
+    30      1   -19.3632917534       0.0000  GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) 
+    31      1    -9.2797457778       0.0121  TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) 
+    32      2    -4.6757517792       1.2114  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) 
+    33      1   -25.8812555227       0.0000  CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) 
+    34      3    -4.4744739440       1.4815  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) 
+    35      2    -4.1412559724       2.0674  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) 
+    36      1   -11.8290647008       0.0009  GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) 
+    37      1    -8.0038170642       0.0434  GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) 
+    38      1   -19.2209226010       0.0000  AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) 
+    39      1    -4.2784711487       1.8023  TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) 
+    40      1    -4.5413014475       1.3857  GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) 
+    41      1    -4.5300288594       1.4014  ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) 
+    42      1    -4.6476474833       1.2459  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) 
+    43      2    -4.9562019622       0.9151  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) 
+    44      2    -4.6725389296       1.2153  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) 
+    45      1   -13.9526618018       0.0001  CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) 
+    46      1    -7.7072324162       0.0584  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) 
+    47      1    -8.6619636890       0.0225  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) 
+    48      1    -7.6386352162       0.0626  GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) 
+    49      1    -7.6180754543       0.0639  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) 
+    50      1    -4.7949692808       1.0753  ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) 
+    51      2    -5.3234581195       0.6339  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) 
+    52      2    -4.3494743162       1.6788  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) 
+    53      1    -7.9306463733       0.0467  AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) 
+    54      1    -4.6439311725       1.2506  CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) 
+    55      1   -10.8202230892       0.0026  TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) 
+    56      1    -5.0984494878       0.7938  TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) 
+    57      1    -4.6812015121       1.2048  TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) 
+    58      1   -12.7247851075       0.0004  AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) 
+    59      3    -4.7045995761       1.1770  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) 
+    60      1   -11.0532674855       0.0021  GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) 
+    61      1    -7.7553062341       0.0557  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) 
+    62      1   -12.8895989932       0.0003  GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) 
+    63      1   -15.5069647657       0.0000  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) 
+    64      1    -8.1360564354       0.0381  GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) 
+    65      1   -13.5299179395       0.0002  GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) 
+    66      1    -4.6436592953       1.2509  GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) 
+    67      1   -21.1344217788       0.0000  CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) 
+    68      1    -8.6870227601       0.0219  ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) 
+    69      1   -16.1125651833       0.0000  AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) 
+    70      1    -4.7433927068       1.1322  GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) 
+    71      1   -11.8747461571       0.0009  AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) 
+    72      1   -18.5148907665       0.0000  AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) 
+    73      1   -13.9738939870       0.0001  CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) 
+    74      1    -8.4293011794       0.0284  AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) 
+    75      1   -12.0273822050       0.0008  GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) 
+    76      1   -10.1811554378       0.0049  CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) 
+    77      1    -8.5983668126       0.0240  TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) 
+    78      1    -9.0044370152       0.0160  GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) 
+    79      1   -13.1117968019       0.0003  CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) 
+    80      1    -4.1980290700       1.9533  GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) 
+    81      1   -16.7711243929       0.0000  GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) 
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.ctl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.ctl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.ctl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+      seqfile = lysozymeSmall.txt
+     treefile = lysozymeSmall.trees
+      outfile = mlc
+
+        noisy = 9   * 0,1,2,3,9: how much rubbish on the screen
+      verbose = 1   * 1: detailed output, 0: concise output
+      runmode = 0   * 0: user tree;  1: semi-automatic;  2: automatic
+                    * 3: StepwiseAddition; (4,5):PerturbationNNI 
+
+      seqtype = 1   * 1:codons; 2:AAs; 3:codons-->AAs
+    CodonFreq = 2   * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table
+        model = 2
+                    * models for codons:
+                        * 0:one, 1:b, 2:2 or more dN/dS ratios for branches
+
+      NSsites = 0   * dN/dS among sites. 0:no variation, 1:neutral, 2:positive
+        icode = 0   * 0:standard genetic code; 1:mammalian mt; 2-10:see below
+
+    fix_kappa = 0   * 1: kappa fixed, 0: kappa to be estimated
+        kappa = 2   * initial or fixed kappa
+    fix_omega = 0   * 1: omega or omega_1 fixed, 0: estimate 
+        omega = 1   * initial or fixed omega, for codons or codon-transltd AAs
+
+    fix_alpha = 1   * 0: estimate gamma shape parameter; 1: fix it at alpha
+        alpha = .0  * initial or fixed alpha, 0:infinity (constant rate)
+       Malpha = 0   * different alphas for genes
+        ncatG = 4   * # of categories in the dG or AdG models of rates
+
+        clock = 0   * 0: no clock, unrooted tree, 1: clock, rooted tree
+        getSE = 0   * 0: don't want them, 1: want S.E.s of estimates
+ RateAncestor = 1   * (1/0): rates (alpha>0) or ancestral states (alpha=0)
+       method = 0   * 0: simultaneous; 1: one branch at a time
+
+
+* Specifications for duplicating results for the small data set in table 1
+* of Yang (1998 MBE 15:568-573).
+* see the tree file lysozyme.trees for specification of node (branch) labels

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.trees
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.trees	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.trees	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,36 @@
+2 
+
+((Hsa_Human: 0.02556, Hla_gibbon: 0.03889): 0.06798,
+((Cgu/Can_colobus: 0.04379, Pne_langur: 0.05254) #1 : 0.07637, Mmu_rhesus:
+0.02168): 0.04345, (Ssc_squirrelM: 0.04080, Cja_marmoset: 0.02392):
+0.12266);
+((Hsa_Human: 0.02556, Hla_gibbon: 0.03889): 0.06798,
+((Cgu/Can_colobus: 0.04379, Pne_langur: 0.05254) #1 : 0.07637, Mmu_rhesus:
+0.02168): 0.04345, (Ssc_squirrelM: 0.04080, Cja_marmoset: 0.02392):
+0.12266);
+
+
+((Hsa_Human, Hla_gibbon),((Cgu/Can_colobus, Pne_langur) #1,
+Mmu_rhesus), (Ssc_squirrelM, Cja_marmoset)); / * table 1B&F */
+
+
+((1,2), ((3,4) #1, 5), (6,7) );     / * table 1B&F */
+((1,2) #1, ((3,4), 5), (6,7) );        / * table 1C&G */
+((1,2) #1, ((3,4) #1, 5), (6,7) );     / * table 1D&H */
+
+((1,2) #1, ((3,4) #2, 5), (6,7) );     / * table 1E&I */
+((1,2) #2, ((3,4) #1, 5), (6,7) );     / * table 1E&J */
+
+
+((1,2), ((3,4), 5), (6,7) );
+
+For lysozymeSmall.nuc (Messier and Stewart 1997; Yang 1998)
+
+ 1: Hsa_Human
+ 2: Hla_gibbon
+ 3: Cgu/Can_colobus
+ 4: Pne_langur
+ 5: Mmu_rhesus
+ 6: Ssc_squirrelM
+ 7: Cja_marmoset
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/lysozymeSmall.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,64 @@
+  7   390
+
+Hsa_Human
+AAGGTCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGATTGGGAATGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGATGTGTTTGGCCAAATGGGAGAGTGGTTACAACACA
+CGAGCTACAAACTACAATGCTGGAGACAGAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCGCTACTGGTGTAATGATGGCAAAACCCCAGGAGCAGTTAATGCCTGTCATTTATCC
+TGCAGTGCTTTGCTGCAAGATAACATCGCTGATGCTGTAGCTTGTGCAAAGAGGGTTGTC
+CGTGATCCACAAGGCATTAGAGCATGGGTGGCATGGAGAAATCGTTGTCAAAACAGAGAT
+GTCCGTCAGTATGTTCAAGGTTGTGGAGTG		
+									
+Hla_gibbon								
+AAGGTCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGATTGGGAATGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGATGTGTTTGGCCAAATGGGAGAGTGGTTATAACACA
+CGAGCTACAAACTACAATCCTGGAGACAGAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCGCTACTGGTGTAATGATGGCAAAACCCCAGGAGCAGTTAATGCCTGTCATTTATCC
+TGCAATGCTTTGCTGCAAGATAACATCGCCGATGCTGTAGCTTGTGCAAAGAGGGTTGTC
+CGCGATCCACAAGGCATTAGAGCATGGGTGGCATGGAGAAATCGTTGTCAAAACAGAGAT
+CTCCGTCAGTATATTCAAGGTTGTGGAGTA		
+									
+Cgu/Can_colobus								
+AAGATCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAAATTGGGACTGGATGGCTAC
+AAGGGAGTCAGCCTAGCAAACTGGGTGTGTTTGGCCAAATGGGAGAGTGGTTATAACACA
+GACGCTACAAACTACAATCCTGGAGATGAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCGCTACTGGTGTAATAATGGCAAAACCCCAGGAGCAGTTAATGCCTGTCATATATCC
+TGCAATGCTTTGCTGCAAAATAACATCGCTGATGCTGTAGCTTGTGCAAAGAGGGTTGTC
+AGTGATCCACAAGGCATTCGAGCATGGGTGGCATGGAAAAAGCACTGTCAAAACAGAGAT
+GTCAGTCAGTATGTTGAAGGTTGTGGAGTA		
+									
+Pne_langur								
+AAGATCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAAATTGGGACTGGATGGCTAC
+AAGGGAGTCAGCCTAGCAAACTGGGTGTGTTTGGCCAAATGGGAGAGTGGTTATAACACA
+GAAGCTACAAACTACAATCCTGGAGACGAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCGCTACTGGTGTAATAATGGCAAAACCCCAGGAGCAGTTGATGCCTGTCATATATCC
+TGCAGTGCTTTGCTGCAAAACAACATCGCTGATGCTGTAGCTTGTGCAAAGAGGGTTGTC
+AGTGATCCACAAGGCGTTCGAGCATGGGTGGCATGGAGAAATCACTGTCAAAACAAAGAT
+GTCAGTCAGTACGTTAAAGGTTGTGGAGTG		
+									
+Mmu_rhesus								
+AAGATCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGATTGGGACTGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGGTGTGTTTGGCCAAATGGGAGAGTAATTATAACACA
+CAAGCTACAAACTACAATCCTGGAGACCAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCACTACTGGTGTAATAATGGCAAAACCCCAGGAGCAGTTAATGCCTGTCATATATCC
+TGCAATGCTTTGCTGCAAGATAACATCGCTGATGCTGTAACTTGTGCAAAGAGGGTTGTC
+AGTGATCCACAAGGCATTAGAGCATGGGTGGCATGGAGAAATCACTGTCAAAACAGAGAT
+GTCAGTCAGTATGTTCAAGGTTGTGGAGTG		
+									
+Ssc_squirrelM								
+AAGGTCTTCGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGGCTTGGAATGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGATGTGTTTGGCCAAATGGGAGAGTGACTATAACACA
+CGTGCTACAAACTACAATCCTGGAGACCAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCACTATTGGTGTAATAATGGCAGAACCCCAGGAGCAGTTAATGCCTGTCATATATCC
+TGCAATGCTTTGCTGCAAGATGACATCACTCAAGCTGTGGCCTGTGCAAAGAGGGTTGTC
+CGTGATCCACAAGGCATTAGAGCATGGGTGGCATGGAAAGCTCATTGTCAAAACAGAGAT
+GTCAGTCAGTATGTTCAAGGTTGTGGAGTA		
+									
+Cja_marmoset								
+AAGGTCTTTGAAAGGTGTGAGTTGGCCAGAACTCTGAAAAGGTTTGGACTGGATGGCTAC
+AGGGGAATCAGCCTAGCAAACTGGATGTGTTTGGCCAAATGGGAGAGTGATTATAACACA
+CGTGCTACAAACTACAATCCTGGAGACCAAAGCACTGATTATGGGATATTTCAGATCAAT
+AGCCACTATTGGTGTAACAATGGCAGAACCCCAGGAGCAGTTAATGCCTGTCATATATCC
+TGCAATGCTTTGCTGCAAGATGACATCACTGAAGCTGTGGCCTGTGCAAAGAGGGTTGTC
+CGCGATCCACAAGGCATTAGGGCATGGGTGGCATGGAAAGCTCATTGTCAAAACAGAGAT
+GTCAGTCAGTATGTTCAAGGTTGTGGAGTA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,273 @@
+
+seed used = 162469585
+
+Hsa_Human                            AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon                           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus                      AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur                           AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus                           AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM                        AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset                         AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+
+CODONML (in paml 3.14, January 2004)    lysozymeSmall.txt   Model: several dN/dS ratios for branches 
+Codon frequencies: F3x4
+
+ns = 7  	ls = 130
+# site patterns = 81
+    2    1    1    1    2    7    2    3    3    1    2    2    2    1    1
+    5    1    4    3    2    1    1    3    1    5    4    5    1    1    1
+    1    2    1    3    2    1    1    1    1    1    1    1    2    2    1
+    1    1    1    1    1    2    2    1    1    1    1    1    1    3    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1
+
+1       
+Hsa_Human             AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG ATC AGC CTA GCA AAC TGG ATG AGT GGT TAC ACA CGA GCT AAT GCT GAC AGA TAT GGG ATA TTT CAG ATC CGC TAC AAT GAT AAA ACC CCA GTT AAT CAT TTA TCC TGC AGT CAA GAT AAC GCT GAT GTA GCT GTC CGT ATT AGA GTG AGA AAT CGT AGA GTC CGT TAT GTT CAA GGT GTG 
+Hla_gibbon            ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ..T ... ... ... ... C.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .A. ... ... ... ..C ... ... ... ... ..C ... ... ... ... ... ... ... C.. ... ... A.. ... ... ..A 
+Cgu/Can_colobus       ... A.. ... ... ... ... ... ... ... ... ... ... ... .A. ... ... C.. ... ... ... .A. G.. ... ... ... ... ... G.. ... ... ..T ... GAC ... ... C.. ..T GA. ... ... ... ... ... ... ... ... ... A.. ... ... ... ... ... ... A.. ... ... .A. ... A.. ... ... ... ... ... ... A.. ... C.. ... .A. ..G .AC ... ... A.. ... ... G.. ... ..A 
+Pne_langur            ... A.. ... ... ... ... ... ... ... ... ... ... ... .A. ... ... C.. ... ... ... .A. G.. ... ... ... ... ... G.. ... ... ..T ... GA. ... ... C.. ... GA. ... ... ... ... ... ... ... ... ... A.. ... ... ... ... G.. ... A.. ... ... ... ... A.C ... ... ... ... ... ... A.. G.. C.. ... ... ... .AC .A. ... A.. ..C ... A.. ... ... 
+Mmu_rhesus            ... A.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... C.. ... ... ... ... ... ... ... ... ... ... G.. ... AA. ..T ... .A. ... ... C.. ... CA. ... ... ... ... ... ... .A. ... ... A.. ... ... ... ... ... ... A.. ... ... .A. ... ... ... ... ... ... A.. ... A.. ... ... ... ... ... .AC ... ... A.. ... ... ... ... ... 
+Ssc_squirrelM         ... ... ..C ... ... ... ... ... ... ... ... ... ... ..G C.T ... ... ... ... ... ... ... ... ... ... ... ... ... ... .AC ..T ... ..T ... ... C.. ... CA. ... ... ... ... ... ... .A. ..T ... A.. .G. ... ... ... ... ... A.. ... ... .A. ... ... G.. A.. C.A ..G ..C ... ... ... ... ... .A. GC. .A. ... ... A.. ... ... ... ... ..A 
+Cja_marmoset          ... ... ... ... ... ... ... ... ... ... ... ... ... ..G ..T ... C.. ... ... ... ... ... ... ... ... ... ... ... ... .A. ..T ... ..T ... ... C.. ... CA. ... ... ... ... ... ... .A. ..T ..C A.. .G. ... ... ... ... ... A.. ... ... .A. ... ... G.. A.. ..A ..G ..C ... ..C ... ..G ... .A. GC. .A. ... ... A.. ... ... ... ... ..A 
+
+Codon usage in sequences
+--------------------------------------------------------------------------------------------------------------
+Phe TTT  2  2  2  2  2  1 | Ser TCT  0  0  0  0  0  0 | Tyr TAT  2  3  3  2  3  4 | Cys TGT  7  7  7  7  7  7
+    TTC  0  0  0  0  0  1 |     TCC  1  1  1  1  1  1 |     TAC  4  3  3  4  3  2 |     TGC  1  1  1  1  1  1
+Leu TTA  1  1  0  0  0  0 |     TCA  0  0  0  0  0  0 | *** TAA  0  0  0  0  0  0 | *** TGA  0  0  0  0  0  0
+    TTG  4  4  4  4  4  3 |     TCG  0  0  0  0  0  0 |     TAG  0  0  0  0  0  0 | Trp TGG  5  5  5  5  5  5
+--------------------------------------------------------------------------------------------------------------
+Leu CTT  0  0  0  0  0  1 | Pro CCT  0  1  1  1  1  1 | His CAT  1  1  1  1  1  2 | Arg CGT  3  2  0  0  0  2
+    CTC  0  1  0  0  0  0 |     CCC  0  0  0  0  0  0 |     CAC  0  0  1  1  2  1 |     CGC  1  2  1  1  0  0
+    CTA  1  1  1  1  1  1 |     CCA  2  2  2  2  2  2 | Gln CAA  4  4  3  3  6  6 |     CGA  1  1  1  1  0  0
+    CTG  2  2  3  3  3  2 |     CCG  0  0  0  0  0  0 |     CAG  2  2  2  2  2  2 |     CGG  0  0  0  0  0  0
+--------------------------------------------------------------------------------------------------------------
+Ile ATT  1  2  1  0  1  1 | Thr ACT  2  2  2  2  3  3 | Asn AAT  5  6  7  5  8  6 | Ser AGT  2  1  3  4  3  2
+    ATC  3  3  3  3  4  3 |     ACC  1  1  1  1  1  1 |     AAC  5  5  5  6  5  4 |     AGC  3  3  3  3  3  3
+    ATA  1  1  2  2  2  2 |     ACA  2  2  2  2  2  2 | Lys AAA  3  3  5  6  3  3 | Arg AGA  6  6  2  2  5  4
+Met ATG  2  2  0  0  0  2 |     ACG  0  0  0  0  0  0 |     AAG  2  2  4  3  2  2 |     AGG  3  3  2  2  3  4
+--------------------------------------------------------------------------------------------------------------
+Val GTT  3  2  3  4  3  3 | Ala GCT  6  4  5  5  4  4 | Asp GAT  7  7  6  6  6  5 | Gly GGT  2  2  2  2  1  1
+    GTC  3  2  3  3  2  3 |     GCC  3  4  3  3  3  4 |     GAC  1  1  1  1  1  3 |     GGC  3  3  3  3  3  3
+    GTA  1  2  2  1  1  1 |     GCA  5  5  5  5  5  5 | Glu GAA  1  1  3  3  1  1 |     GGA  5  5  5  5  5  5
+    GTG  2  1  2  3  3  2 |     GCG  0  0  0  0  0  0 |     GAG  2  2  2  2  2  2 |     GGG  1  1  1  1  1  1
+--------------------------------------------------------------------------------------------------------------
+
+--------------------------------------------------
+Phe TTT  3 | Ser TCT  0 | Tyr TAT  4 | Cys TGT  7
+    TTC  0 |     TCC  1 |     TAC  2 |     TGC  1
+Leu TTA  0 |     TCA  0 | *** TAA  0 | *** TGA  0
+    TTG  3 |     TCG  0 |     TAG  0 | Trp TGG  5
+--------------------------------------------------
+Leu CTT  0 | Pro CCT  1 | His CAT  2 | Arg CGT  1
+    CTC  0 |     CCC  0 |     CAC  1 |     CGC  1
+    CTA  1 |     CCA  2 | Gln CAA  5 |     CGA  0
+    CTG  3 |     CCG  0 |     CAG  2 |     CGG  0
+--------------------------------------------------
+Ile ATT  1 | Thr ACT  3 | Asn AAT  5 | Ser AGT  2
+    ATC  3 |     ACC  1 |     AAC  5 |     AGC  3
+    ATA  2 |     ACA  2 | Lys AAA  3 | Arg AGA  3
+Met ATG  1 |     ACG  0 |     AAG  2 |     AGG  5
+--------------------------------------------------
+Val GTT  3 | Ala GCT  4 | Asp GAT  6 | Gly GGT  1
+    GTC  3 |     GCC  4 |     GAC  2 |     GGC  3
+    GTA  1 |     GCA  5 | Glu GAA  2 |     GGA  5
+    GTG  2 |     GCG  0 |     GAG  2 |     GGG  1
+--------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: Hsa_Human      
+position  1:    T:0.20769    C:0.13077    A:0.31538    G:0.34615
+position  2:    T:0.20000    C:0.16923    A:0.30000    G:0.33077
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#2: Hla_gibbon     
+position  1:    T:0.20769    C:0.14615    A:0.32308    G:0.32308
+position  2:    T:0.20000    C:0.16923    A:0.30769    G:0.32308
+position  3:    T:0.32308    C:0.23077    A:0.26154    G:0.18462
+
+#3: Cgu/Can_colobus
+position  1:    T:0.20000    C:0.12308    A:0.32308    G:0.35385
+position  2:    T:0.20000    C:0.16923    A:0.35385    G:0.27692
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#4: Pne_langur     
+position  1:    T:0.20000    C:0.12308    A:0.31538    G:0.36154
+position  2:    T:0.20000    C:0.16923    A:0.34615    G:0.28462
+position  3:    T:0.31538    C:0.23846    A:0.25385    G:0.19231
+
+#5: Mmu_rhesus     
+position  1:    T:0.20000    C:0.13846    A:0.34615    G:0.31538
+position  2:    T:0.20000    C:0.16923    A:0.34615    G:0.28462
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#6: Ssc_squirrelM  
+position  1:    T:0.19231    C:0.15385    A:0.32308    G:0.33077
+position  2:    T:0.20000    C:0.17692    A:0.33077    G:0.29231
+position  3:    T:0.33077    C:0.23077    A:0.24615    G:0.19231
+
+#7: Cja_marmoset   
+position  1:    T:0.20000    C:0.14615    A:0.31538    G:0.33846
+position  2:    T:0.20000    C:0.17692    A:0.33077    G:0.29231
+position  3:    T:0.33077    C:0.23077    A:0.23846    G:0.20000
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT      14 | Ser S TCT       0 | Tyr Y TAT      21 | Cys C TGT      49
+      TTC       1 |       TCC       7 |       TAC      21 |       TGC       7
+Leu L TTA       2 |       TCA       0 | *** * TAA       0 | *** * TGA       0
+      TTG      26 |       TCG       0 |       TAG       0 | Trp W TGG      35
+------------------------------------------------------------------------------
+Leu L CTT       1 | Pro P CCT       6 | His H CAT       9 | Arg R CGT       8
+      CTC       1 |       CCC       0 |       CAC       6 |       CGC       6
+      CTA       7 |       CCA      14 | Gln Q CAA      31 |       CGA       4
+      CTG      18 |       CCG       0 |       CAG      14 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT       7 | Thr T ACT      17 | Asn N AAT      42 | Ser S AGT      17
+      ATC      22 |       ACC       7 |       AAC      35 |       AGC      21
+      ATA      12 |       ACA      14 | Lys K AAA      26 | Arg R AGA      28
+Met M ATG       7 |       ACG       0 |       AAG      17 |       AGG      22
+------------------------------------------------------------------------------
+Val V GTT      21 | Ala A GCT      32 | Asp D GAT      43 | Gly G GGT      11
+      GTC      19 |       GCC      24 |       GAC      10 |       GGC      21
+      GTA       9 |       GCA      35 | Glu E GAA      12 |       GGA      35
+      GTG      15 |       GCG       0 |       GAG      14 |       GGG       7
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.20110    C:0.13736    A:0.32308    G:0.33846
+position  2:    T:0.20000    C:0.17143    A:0.33077    G:0.29780
+position  3:    T:0.32747    C:0.22857    A:0.25165    G:0.19231
+
+Codon frequencies under model, for use in evolver:
+  0.01378574  0.00962226  0.01059374  0.00809565
+  0.01181635  0.00824765  0.00908035  0.00693913
+  0.02279949  0.01591374  0.00000000  0.00000000
+  0.02052711  0.01432765  0.00000000  0.01205451
+  0.00941649  0.00657258  0.00723616  0.00552982
+  0.00807127  0.00563364  0.00620242  0.00473984
+  0.01557342  0.01087004  0.01196749  0.00914546
+  0.01402125  0.00978665  0.01077472  0.00823396
+  0.02214758  0.01545871  0.01701945  0.01300613
+  0.01898364  0.01325032  0.01458810  0.01114811
+  0.03662868  0.02556633  0.02814755  0.02151013
+  0.03297798  0.02301819  0.02534214  0.01936627
+  0.02320222  0.01619484  0.01782990  0.01362547
+  0.01988762  0.01388129  0.01528277  0.01167897
+  0.03837291  0.02678377  0.02948791  0.02253443
+  0.03454836  0.02411429  0.02654891  0.02028847
+
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+Hsa_Human           
+Hla_gibbon           0.2782 (0.0133 0.0478)
+Cgu/Can_colobus      1.1086 (0.0742 0.0670) 1.1055 (0.0742 0.0671)
+Pne_langur           1.1979 (0.0725 0.0605) 0.9234 (0.0797 0.0863) 0.5517 (0.0267 0.0484)
+Mmu_rhesus           1.8744 (0.0562 0.0300) 1.0215 (0.0561 0.0550) 1.2973 (0.0473 0.0364) 1.3970 (0.0508 0.0364)
+Ssc_squirrelM        0.4701 (0.0633 0.1346) 0.4688 (0.0633 0.1349) 0.5159 (0.0775 0.1502) 0.5833 (0.0959 0.1645) 0.4544 (0.0559 0.1230)
+Cja_marmoset         0.4725 (0.0634 0.1341) 0.5925 (0.0633 0.1069) 0.4702 (0.0704 0.1496) 0.5411 (0.0886 0.1638) 0.3995 (0.0490 0.1225) 0.1595 (0.0099 0.0619)
+
+
+TREE #  1:  ((1, 2), ((3, 4), 5), (6, 7));   MP score: 65
+lnL(ntime: 11  np: 14):   -904.636553     +0.000000
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+  0.07000  0.02557  0.03893  0.04388  0.07904  0.04388  0.05215  0.01949  0.12131  0.04103  0.02378  4.56118  0.68580  3.50575
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.55906
+
+((1: 0.025570, 2: 0.038929): 0.070000, ((3: 0.043878, 4: 0.052151): 0.079045, 5: 0.019487): 0.043879, (6: 0.041033, 7: 0.023779): 0.121307);
+
+((Hsa_Human: 0.025570, Hla_gibbon: 0.038929): 0.070000, ((Cgu/Can_colobus: 0.043878, Pne_langur: 0.052151): 0.079045, Mmu_rhesus: 0.019487): 0.043879, (Ssc_squirrelM: 0.041033, Cja_marmoset: 0.023779): 0.121307);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) =  4.56118
+
+dN & dS for each branch
+
+ branch           t        S        N    dN/dS       dN       dS   S*dS   N*dN
+
+   8..9       0.070    107.8    282.2   0.6858   0.0207   0.0302    3.3    5.8
+   9..1       0.026    107.8    282.2   0.6858   0.0076   0.0110    1.2    2.1
+   9..2       0.039    107.8    282.2   0.6858   0.0115   0.0168    1.8    3.2
+   8..10      0.044    107.8    282.2   0.6858   0.0130   0.0189    2.0    3.7
+  10..11      0.079    107.8    282.2   3.5057   0.0328   0.0094    1.0    9.3
+  11..3       0.044    107.8    282.2   0.6858   0.0130   0.0189    2.0    3.7
+  11..4       0.052    107.8    282.2   0.6858   0.0154   0.0225    2.4    4.4
+  10..5       0.019    107.8    282.2   0.6858   0.0058   0.0084    0.9    1.6
+   8..12      0.121    107.8    282.2   0.6858   0.0359   0.0523    5.6   10.1
+  12..6       0.041    107.8    282.2   0.6858   0.0121   0.0177    1.9    3.4
+  12..7       0.024    107.8    282.2   0.6858   0.0070   0.0103    1.1    2.0
+
+tree length for dN:      0.17485
+tree length for dS:      0.21644
+
+dS tree:
+((Hsa_Human: 0.011031, Hla_gibbon: 0.016794): 0.030198, ((Cgu/Can_colobus: 0.018929, Pne_langur: 0.022498): 0.009367, Mmu_rhesus: 0.008407): 0.018929, (Ssc_squirrelM: 0.017702, Cja_marmoset: 0.010258): 0.052332);
+dN tree:
+((Hsa_Human: 0.007565, Hla_gibbon: 0.011517): 0.020710, ((Cgu/Can_colobus: 0.012982, Pne_langur: 0.015429): 0.032838, Mmu_rhesus: 0.005765): 0.012982, (Ssc_squirrelM: 0.012140, Cja_marmoset: 0.007035): 0.035889);
+
+
+
+TREE #  2:  ((1, 2), ((3, 4), 5), (6, 7));   MP score: 65
+lnL(ntime: 11  np: 14):   -904.636553     -0.000000
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+  0.07000  0.02557  0.03893  0.04388  0.07905  0.04388  0.05215  0.01949  0.12131  0.04103  0.02378  4.56122  0.68581  3.50574
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.55906
+
+((1: 0.025570, 2: 0.038929): 0.070001, ((3: 0.043878, 4: 0.052150): 0.079045, 5: 0.019488): 0.043879, (6: 0.041033, 7: 0.023779): 0.121307);
+
+((Hsa_Human: 0.025570, Hla_gibbon: 0.038929): 0.070001, ((Cgu/Can_colobus: 0.043878, Pne_langur: 0.052150): 0.079045, Mmu_rhesus: 0.019488): 0.043879, (Ssc_squirrelM: 0.041033, Cja_marmoset: 0.023779): 0.121307);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) =  4.56122
+
+dN & dS for each branch
+
+ branch           t        S        N    dN/dS       dN       dS   S*dS   N*dN
+
+   8..9       0.070    107.8    282.2   0.6858   0.0207   0.0302    3.3    5.8
+   9..1       0.026    107.8    282.2   0.6858   0.0076   0.0110    1.2    2.1
+   9..2       0.039    107.8    282.2   0.6858   0.0115   0.0168    1.8    3.2
+   8..10      0.044    107.8    282.2   0.6858   0.0130   0.0189    2.0    3.7
+  10..11      0.079    107.8    282.2   3.5057   0.0328   0.0094    1.0    9.3
+  11..3       0.044    107.8    282.2   0.6858   0.0130   0.0189    2.0    3.7
+  11..4       0.052    107.8    282.2   0.6858   0.0154   0.0225    2.4    4.4
+  10..5       0.019    107.8    282.2   0.6858   0.0058   0.0084    0.9    1.6
+   8..12      0.121    107.8    282.2   0.6858   0.0359   0.0523    5.6   10.1
+  12..6       0.041    107.8    282.2   0.6858   0.0121   0.0177    1.9    3.4
+  12..7       0.024    107.8    282.2   0.6858   0.0070   0.0103    1.1    2.0
+
+tree length for dN:      0.17485
+tree length for dS:      0.21644
+
+dS tree:
+((Hsa_Human: 0.011031, Hla_gibbon: 0.016794): 0.030198, ((Cgu/Can_colobus: 0.018929, Pne_langur: 0.022497): 0.009367, Mmu_rhesus: 0.008407): 0.018929, (Ssc_squirrelM: 0.017702, Cja_marmoset: 0.010258): 0.052331);
+dN tree:
+((Hsa_Human: 0.007565, Hla_gibbon: 0.011517): 0.020710, ((Cgu/Can_colobus: 0.012982, Pne_langur: 0.015429): 0.032838, Mmu_rhesus: 0.005766): 0.012982, (Ssc_squirrelM: 0.012140, Cja_marmoset: 0.007035): 0.035889);
+
+
+Tree comparisons (Kishino & Hasegawa 1989; Shimodaira & Hasegawa 1999)
+Number of replicates: 10000
+
+  tree           li       Dli     +- SE     pKH       pSH    pRELL
+
+     1*    -904.637     0.000     0.000  -1.000    -1.000    0.511
+     2     -904.637    -0.000     0.000   0.500     0.516    0.489
+
+pKH: P value for KH normal test (Kishino & Hasegawa 1989)
+pRELL: RELL bootstrap proportions (Kishino & Hasegawa 1989)
+pSH: P value with multiple-comparison correction (MC in table 1 of Shimodaira & Hasegawa 1999)
+(-1 for P values means N/A)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1165 @@
+Supplemental results for CODEML (seqf: lysozymeSmall.txt  treef: lysozymeSmall.trees)
+
+Number of codon sites with 0,1,2,3 position differences
+   2 vs.    1    122     8     0     0   0.2782 (0.0133 0.0478)
+   3 vs.    1    107    20     2     1   1.1086 (0.0742 0.0670)
+   3 vs.    2    108    18     3     1   1.1055 (0.0742 0.0671)
+   4 vs.    1    108    18     4     0   1.1979 (0.0725 0.0605)
+   4 vs.    2    105    20     5     0   0.9234 (0.0797 0.0863)
+   4 vs.    3    118    12     0     0   0.5517 (0.0267 0.0484)
+   5 vs.    1    114    13     3     0   1.8744 (0.0562 0.0300)
+   5 vs.    2    113    13     4     0   1.0215 (0.0561 0.0550)
+   5 vs.    3    115    13     2     0   1.2973 (0.0473 0.0364)
+   5 vs.    4    114    14     2     0   1.3970 (0.0508 0.0364)
+   6 vs.    1    106    19     5     0   0.4701 (0.0633 0.1346)
+   6 vs.    2    107    17     6     0   0.4688 (0.0633 0.1349)
+   6 vs.    3    104    20     4     2   0.5159 (0.0775 0.1502)
+   6 vs.    4     98    25     6     1   0.5833 (0.0959 0.1645)
+   6 vs.    5    110    14     6     0   0.4544 (0.0559 0.1230)
+   7 vs.    1    103    25     2     0   0.4725 (0.0634 0.1341)
+   7 vs.    2    106    21     3     0   0.5925 (0.0633 0.1069)
+   7 vs.    3    105    20     3     2   0.4702 (0.0704 0.1496)
+   7 vs.    4     99    25     5     1   0.5411 (0.0886 0.1638)
+   7 vs.    5    110    16     4     0   0.3995 (0.0490 0.1225)
+   7 vs.    6    122     8     0     0   0.1595 (0.0099 0.0619)
+
+
+TREE #  1
+
+Ancestral reconstruction by CODONML.
+
+((Hsa_Human: 0.007565, Hla_gibbon: 0.011517): 0.020710, ((Cgu/Can_colobus: 0.012982, Pne_langur: 0.015429): 0.032838, Mmu_rhesus: 0.005765): 0.012982, (Ssc_squirrelM: 0.012140, Cja_marmoset: 0.007035): 0.035889);
+
+((1, 2), ((3, 4), 5), (6, 7));
+
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+
+tree with node labels for Rod Page's TreeView
+((1_Hsa_Human: 0.007565, 2_Hla_gibbon: 0.011517) 9 : 0.020710, ((3_Cgu/Can_colobus: 0.012982, 4_Pne_langur: 0.015429) 11 : 0.032838, 5_Mmu_rhesus: 0.005765) 10 : 0.012982, (6_Ssc_squirrelM: 0.012140, 7_Cja_marmoset: 0.007035) 12 : 0.035889) 8 ;
+
+Nodes 8 to 12 are ancestral
+
+(1) Marginal reconstruction of ancestral sequences (eqn. 4 in Yang et al. 1995 Genetics 141:1641-1650).
+
+
+Prob of best character at each node, listed by site 
+
+Site   Freq   Data: 
+
+   1      2   AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) :  AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000)
+   2      1   GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) :  GTC V 0.967 (V 0.967) GTC V 1.000 (V 1.000) ATC I 0.992 (I 0.992) ATC I 1.000 (I 1.000) GTC V 1.000 (V 1.000)
+   3      1   TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) :  TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 0.985 (F 1.000)
+   4      1   GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) :  GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000)
+   5      2   AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000)
+   6      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+   7      2   GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) :  GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000)
+   8      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+   9      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  10      1   AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) :  AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000)
+  11      2   ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) :  ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000)
+  12      2   CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) :  CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000)
+  13      2   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) :  AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000)
+  14      1   AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) :  AGA R 0.990 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.998 (R 0.998) AAA K 0.994 (K 0.994) AGG R 0.998 (R 1.000)
+  15      1   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) :  TTG L 0.999 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTT F 0.990 (F 0.990)
+  16      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  17      1   ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) :  CTG L 0.871 (L 0.871) ATG M 1.000 (M 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 0.871 (L 0.871)
+  18      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+  19      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+  20      2   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) :  TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000)
+  21      1   AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 0.998 (R 0.998) AAG K 0.995 (K 0.995) AGG R 1.000 (R 1.000)
+  22      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  23      1   ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 0.999 (I 0.999) GTC V 0.996 (V 0.996) ATC I 1.000 (I 1.000)
+  24      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  25      1   CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) :  CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000)
+  26      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  27      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  28      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  29      1   ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) :  ATG M 0.974 (M 0.974) ATG M 1.000 (M 1.000) GTG V 0.993 (V 0.993) GTG V 1.000 (V 1.000) ATG M 1.000 (M 1.000)
+  30      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  31      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+  32      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  33      2   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) :  AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000)
+  34      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  35      2   GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) :  GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000)
+  36      1   AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) :  AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000)
+  37      1   GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) :  GAT D 0.742 (D 0.743) GGT G 0.996 (G 0.996) GAT D 0.696 (D 0.696) GGT G 0.993 (G 0.993) GAT D 0.959 (D 0.999)
+  38      1   TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 0.999 (Y 1.000) TAT Y 0.962 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000)
+  39      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  40      2   ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) :  ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000)
+  41      1   CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) :  CGA R 0.977 (R 0.978) CGA R 1.000 (R 1.000) CAA Q 0.990 (Q 0.990) GAA E 0.992 (E 0.992) CGT R 1.000 (R 1.000)
+  42      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  43      2   ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) :  ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000)
+  44      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  45      2   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) :  TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000)
+  46      2   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  47      1   GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) :  CCT P 1.000 (P 1.000) CCT P 0.998 (P 0.998) CCT P 1.000 (P 1.000) CCT P 1.000 (P 1.000) CCT P 1.000 (P 1.000)
+  48      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  49      1   GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) :  GAC D 1.000 (D 1.000) GAC D 1.000 (D 1.000) GAC D 1.000 (D 1.000) GAC D 0.989 (D 1.000) GAC D 1.000 (D 1.000)
+  50      1   AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 0.980 (Q 0.980) AGA R 0.994 (R 0.995) CAA Q 0.990 (Q 0.990) GAA E 0.998 (E 0.998) CAA Q 1.000 (Q 1.000)
+  51      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  52      2   ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) :  ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000)
+  53      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+  54      1   TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000)
+  55      1   GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) :  GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000)
+  56      1   ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) :  ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000)
+  57      1   TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) :  TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000)
+  58      2   CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) :  CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000)
+  59      2   ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000)
+  60      2   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  61      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  62      1   CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) :  CAC H 0.742 (H 0.742) CGC R 0.999 (R 0.999) CAC H 0.748 (H 0.748) CGC R 0.998 (R 0.998) CAC H 1.000 (H 1.000)
+  63      1   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) :  TAC Y 0.994 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAT Y 0.997 (Y 1.000)
+  64      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  65      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  66      1   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) :  AAT N 0.998 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.889 (N 1.000)
+  67      1   GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 0.970 (N 0.970) GAT D 0.994 (D 0.994) AAT N 0.999 (N 0.999) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  68      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+  69      1   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) :  AAA K 0.992 (K 0.992) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AGA R 0.998 (R 0.998)
+  70      1   ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) :  ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000)
+  71      2   CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) :  CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000)
+  72      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  73      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  74      2   GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+  75      1   AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.964 (N 0.964) AAT N 1.000 (N 1.000)
+  76      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  77      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  78      1   CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) :  CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000)
+  79      1   TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) :  ATA I 0.997 (I 0.997) TTA L 1.000 (L 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000)
+  80      1   TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) :  TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000)
+  81      1   TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) :  TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000)
+  82      1   AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) :  AAT N 0.998 (N 0.998) AAT N 0.957 (N 0.957) AAT N 0.999 (N 0.999) AAT N 0.963 (N 0.963) AAT N 1.000 (N 1.000)
+  83      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  84      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+  85      2   CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) :  CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000)
+  86      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+  87      1   GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 0.996 (D 0.997) AAT N 0.983 (N 0.993) GAT D 1.000 (D 1.000)
+  88      1   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) :  AAC N 0.993 (N 0.993) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) GAC D 0.998 (D 0.998)
+  89      2   ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000)
+  90      1   GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) :  GCT A 0.994 (A 0.994) GCT A 0.985 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) ACT T 0.998 (T 0.998)
+  91      1   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) :  GAT D 0.997 (D 0.997) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAA E 0.995 (E 0.995)
+  92      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  93      1   GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) :  GTA V 0.993 (V 1.000) GTA V 1.000 (V 1.000) GTA V 1.000 (V 1.000) GTA V 1.000 (V 1.000) GTG V 0.998 (V 1.000)
+  94      1   GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) :  GCT A 0.990 (A 0.998) GCT A 1.000 (A 1.000) GCT A 0.947 (A 0.947) GCT A 1.000 (A 1.000) GCC A 0.998 (A 1.000)
+  95      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  96      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  97      2   AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) :  AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000)
+  98      2   AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000)
+  99      2   GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+ 100      1   GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) :  GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000)
+ 101      1   CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) :  CGT R 0.973 (R 0.993) CGT R 0.970 (R 1.000) AGT S 0.995 (S 0.996) AGT S 1.000 (S 1.000) CGT R 0.935 (R 1.000)
+ 102      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+ 103      2   CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) :  CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000)
+ 104      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+ 105      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+ 106      1   ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) :  ATT I 1.000 (I 1.000) ATT I 1.000 (I 1.000) ATT I 1.000 (I 1.000) ATT I 0.978 (I 0.978) ATT I 1.000 (I 1.000)
+ 107      1   AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) :  AGA R 0.999 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.996 (R 1.000) CGA R 0.996 (R 1.000) AGA R 0.921 (R 1.000)
+ 108      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+ 109      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+ 110      1   GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) :  GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000)
+ 111      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+ 112      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+ 113      1   AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) :  AGA R 0.990 (R 0.990) AGA R 1.000 (R 1.000) AGA R 0.997 (R 0.997) AGA R 0.962 (R 0.962) AAA K 0.998 (K 0.998)
+ 114      1   AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) :  AAT N 0.984 (N 0.984) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.987 (N 0.988) GCT A 0.998 (A 0.998)
+ 115      1   CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) :  CAT H 0.952 (H 0.990) CGT R 0.998 (R 0.998) CAC H 0.998 (H 1.000) CAC H 1.000 (H 1.000) CAT H 1.000 (H 1.000)
+ 116      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+ 117      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+ 118      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+ 119      1   AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) :  AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.975 (R 0.975) AGA R 1.000 (R 1.000)
+ 120      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+ 121      1   GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) :  GTC V 1.000 (V 1.000) GTC V 0.998 (V 0.998) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000)
+ 122      1   CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) :  AGT S 0.993 (S 0.993) CGT R 0.999 (R 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000)
+ 123      2   CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) :  CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000)
+ 124      1   TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 0.993 (Y 1.000) TAT Y 1.000 (Y 1.000)
+ 125      1   GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 0.988 (V 0.988) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+ 126      1   CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) GAA E 0.517 (E 0.517) CAA Q 1.000 (Q 1.000)
+ 127      1   GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) :  GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000)
+ 128      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+ 129      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+ 130      1   GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) :  GTG V 0.797 (V 1.000) GTG V 0.795 (V 1.000) GTG V 0.942 (V 1.000) GTG V 0.937 (V 1.000) GTA V 0.998 (V 1.000)
+
+Summary of changes along branches.
+Check root for directions of change.
+
+Branch 1:    8..9  (n= 8.5 s= 0.5)
+
+	  17 L 0.871 -> M 1.000
+	  37 D 0.743 -> G 0.996
+	  50 Q 0.980 -> R 0.995
+	  62 H 0.742 -> R 0.999
+	  67 N 0.970 -> D 0.994
+	  79 I 0.997 -> L 1.000
+	 115 H 0.990 -> R 0.998
+	 122 S 0.993 -> R 1.000
+
+
+Branch 2:    9..1  (Hsa_Human)  (n= 2.0 s= 1.0)
+
+	  47 P 0.998 -> A
+	  82 N 0.957 -> S
+
+
+Branch 3:    9..2  (Hla_gibbon)  (n= 2.0 s= 3.0)
+
+	 121 V 0.998 -> L
+	 125 V 0.988 -> I
+
+
+Branch 4:    8..10 (n= 4.0 s= 1.0)
+
+	   2 V 0.967 -> I 0.992
+	  29 M 0.974 -> V 0.993
+	  41 R 0.978 -> Q 0.990
+	 101 R 0.993 -> S 0.996
+
+
+Branch 5:   10..11 (n= 9.0 s= 1.0)
+
+	  14 R 0.998 -> K 0.994
+	  21 R 0.998 -> K 0.995
+	  23 I 0.999 -> V 0.996
+	  37 D 0.696 -> G 0.993
+	  41 Q 0.990 -> E 0.992
+	  50 Q 0.990 -> E 0.998
+	  62 H 0.748 -> R 0.998
+	  87 D 0.997 -> N 0.993
+	 126 Q 1.000 -> E 0.517
+
+
+Branch 6:   11..3  (Cgu/Can_colobus)  (n= 3.0 s= 2.0)
+
+	  41 E 0.992 -> D
+	 113 R 0.962 -> K
+	 114 N 0.988 -> K
+
+
+Branch 7:   11..4  (Pne_langur)  (n= 5.0 s= 2.0)
+
+	  75 N 0.964 -> D
+	  82 N 0.963 -> S
+	 106 I 0.978 -> V
+	 119 R 0.975 -> K
+	 126 E 0.517 -> K
+
+
+Branch 8:   10..5  (Mmu_rhesus)  (n= 2.0 s= 0.0)
+
+	  37 D 0.696 -> N
+	  94 A 0.947 -> T
+
+
+Branch 9:    8..12 (n= 8.0 s= 6.0)
+
+	  15 L 1.000 -> F 0.990
+	  69 K 0.992 -> R 0.998
+	  88 N 0.993 -> D 0.998
+	  90 A 0.994 -> T 0.998
+	  91 D 0.997 -> E 0.995
+	 113 R 0.990 -> K 0.998
+	 114 N 0.984 -> A 0.998
+
+
+Branch 10:   12..6  (Ssc_squirrelM)  (n= 3.0 s= 2.0)
+
+	  15 F 0.990 -> L
+	  17 L 0.871 -> M
+	  91 E 0.995 -> Q
+
+
+Branch 11:   12..7  (Cja_marmoset)  (n= 0.0 s= 3.0)
+
+
+
+
+
+List of extant and reconstructed sequences
+
+Hsa_Human         AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon        AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus   AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM     AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset      AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+node #8           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #9           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #10          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #11          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTG 
+node #12          AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+
+Overall accuracy of the 5 ancestral sequences:
+  0.99108  0.99716  0.99439  0.99393  0.99633
+for a site.
+
+  0.27875  0.67341  0.43633  0.37872  0.60699
+for the sequence.
+
+
+Amino acid sequences inferred by codonml.
+
+Node #8           KVFERCELAR TLKRLGLDGY RGISLANWMC LAKWESDYNT RATNYNPGDQ STDYGIFQIN SHYWCNNGKT PGAVNACHIS CNALLQDNIA DAVACAKRVV RDPQGIRAWV AWRNHCQNRD VSQYVQGCGV 
+Node #9           KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNPGDR STDYGIFQIN SRYWCNDGKT PGAVNACHLS CNALLQDNIA DAVACAKRVV RDPQGIRAWV AWRNRCQNRD VRQYVQGCGV 
+Node #10          KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ STDYGIFQIN SHYWCNNGKT PGAVNACHIS CNALLQDNIA DAVACAKRVV SDPQGIRAWV AWRNHCQNRD VSQYVQGCGV 
+Node #11          KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE STDYGIFQIN SRYWCNNGKT PGAVNACHIS CNALLQNNIA DAVACAKRVV SDPQGIRAWV AWRNHCQNRD VSQYVEGCGV 
+Node #12          KVFERCELAR TLKRFGLDGY RGISLANWMC LAKWESDYNT RATNYNPGDQ STDYGIFQIN SHYWCNNGRT PGAVNACHIS CNALLQDDIT EAVACAKRVV RDPQGIRAWV AWKAHCQNRD VSQYVQGCGV 
+
+
+Changes at sites (syn nonsyn).
+
+   1  (0.0  0.0)
+   2  GTC.ATC  (0.0  1.0)
+   3  TTT.TTC  (1.0  0.0)
+   4  (0.0  0.0)
+   5  (0.0  0.0)
+   6  (0.0  0.0)
+   7  (0.0  0.0)
+   8  (0.0  0.0)
+   9  (0.0  0.0)
+  10  (0.0  0.0)
+  11  (0.0  0.0)
+  12  (0.0  0.0)
+  13  (0.0  0.0)
+  14  AGA.AAA  AGA.AGG  (1.0  1.0)
+  15  TTT.CTT  TTG.TTT  (0.0  2.0)
+  16  (0.0  0.0)
+  17  CTG.ATG  CTG.ATG  (0.0  2.0)
+  18  (0.0  0.0)
+  19  (0.0  0.0)
+  20  (0.0  0.0)
+  21  AGG.AAG  (0.0  1.0)
+  22  (0.0  0.0)
+  23  ATC.GTC  (0.0  1.0)
+  24  (0.0  0.0)
+  25  (0.0  0.0)
+  26  (0.0  0.0)
+  27  (0.0  0.0)
+  28  (0.0  0.0)
+  29  ATG.GTG  (0.0  1.0)
+  30  (0.0  0.0)
+  31  (0.0  0.0)
+  32  (0.0  0.0)
+  33  (0.0  0.0)
+  34  (0.0  0.0)
+  35  (0.0  0.0)
+  36  (0.0  0.0)
+  37  GAT.AAT  GAT.GAC  GAT.GGT  GAT.GGT  (1.0  3.0)
+  38  TAT.TAC  (1.0  0.0)
+  39  (0.0  0.0)
+  40  (0.0  0.0)
+  41  GAA.GAC  CGA.CAA  CAA.GAA  CGA.CGT  (1.0  3.0)
+  42  (0.0  0.0)
+  43  (0.0  0.0)
+  44  (0.0  0.0)
+  45  (0.0  0.0)
+  46  (0.0  0.0)
+  47  CCT.GCT  (0.0  1.0)
+  48  (0.0  0.0)
+  49  GAC.GAT  (1.0  0.0)
+  50  CAA.AGA  CAA.GAA  (0.5  2.5)
+  51  (0.0  0.0)
+  52  (0.0  0.0)
+  53  (0.0  0.0)
+  54  (0.0  0.0)
+  55  (0.0  0.0)
+  56  (0.0  0.0)
+  57  (0.0  0.0)
+  58  (0.0  0.0)
+  59  (0.0  0.0)
+  60  (0.0  0.0)
+  61  (0.0  0.0)
+  62  CAC.CGC  CAC.CGC  (0.0  2.0)
+  63  TAC.TAT  (1.0  0.0)
+  64  (0.0  0.0)
+  65  (0.0  0.0)
+  66  AAT.AAC  (1.0  0.0)
+  67  AAT.GAT  (0.0  1.0)
+  68  (0.0  0.0)
+  69  AAA.AGA  (0.0  1.0)
+  70  (0.0  0.0)
+  71  (0.0  0.0)
+  72  (0.0  0.0)
+  73  (0.0  0.0)
+  74  (0.0  0.0)
+  75  AAT.GAT  (0.0  1.0)
+  76  (0.0  0.0)
+  77  (0.0  0.0)
+  78  (0.0  0.0)
+  79  ATA.TTA  (0.0  1.0)
+  80  (0.0  0.0)
+  81  (0.0  0.0)
+  82  AAT.AGT  AAT.AGT  (0.0  2.0)
+  83  (0.0  0.0)
+  84  (0.0  0.0)
+  85  (0.0  0.0)
+  86  (0.0  0.0)
+  87  AAT.AAC  GAT.AAT  (1.0  1.0)
+  88  AAC.GAC  (0.0  1.0)
+  89  (0.0  0.0)
+  90  GCT.GCC  GCT.ACT  (1.0  1.0)
+  91  GAA.CAA  GAT.GAA  (0.0  2.0)
+  92  (0.0  0.0)
+  93  GTA.GTG  (1.0  0.0)
+  94  GCT.ACT  GCT.GCC  (1.0  1.0)
+  95  (0.0  0.0)
+  96  (0.0  0.0)
+  97  (0.0  0.0)
+  98  (0.0  0.0)
+  99  (0.0  0.0)
+ 100  (0.0  0.0)
+ 101  CGT.CGC  CGT.CGC  CGT.AGT  (2.0  1.0)
+ 102  (0.0  0.0)
+ 103  (0.0  0.0)
+ 104  (0.0  0.0)
+ 105  (0.0  0.0)
+ 106  ATT.GTT  (0.0  1.0)
+ 107  AGA.AGG  AGA.CGA  (2.0  0.0)
+ 108  (0.0  0.0)
+ 109  (0.0  0.0)
+ 110  (0.0  0.0)
+ 111  (0.0  0.0)
+ 112  (0.0  0.0)
+ 113  AGA.AAA  AGA.AAA  (0.0  2.0)
+ 114  AAT.AAG  AAT.GCT  (0.0  3.0)
+ 115  CAT.CGT  CAT.CAC  (1.0  1.0)
+ 116  (0.0  0.0)
+ 117  (0.0  0.0)
+ 118  (0.0  0.0)
+ 119  AGA.AAA  (0.0  1.0)
+ 120  (0.0  0.0)
+ 121  GTC.CTC  (0.0  1.0)
+ 122  AGT.CGT  (0.0  1.0)
+ 123  (0.0  0.0)
+ 124  TAT.TAC  (1.0  0.0)
+ 125  GTT.ATT  (0.0  1.0)
+ 126  GAA.AAA  CAA.GAA  (0.0  2.0)
+ 127  (0.0  0.0)
+ 128  (0.0  0.0)
+ 129  (0.0  0.0)
+ 130  GTG.GTA  GTG.GTA  GTG.GTA  (3.0  0.0)
+
+
+(2) Joint reconstruction of ancestral sequences (eqn. 2 in Yang et al. 1995 Genetics 141:1641-1650), using the algorithm of Pupko et al. (2000 Mol Biol Evol 17:890-896).
+
+Listed by site, reconstruction (prob.)
+
+
+Site   Freq   Data: 
+
+   1       2  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) : AAG AAG AAG AAG AAG   (0.9999)
+   2       1  GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) : GTC GTC ATC ATC GTC   (0.9589)
+   3       1  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) : TTT TTT TTT TTT TTT   (0.9846)
+   4       1  GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) : GAA GAA GAA GAA GAA   (0.9999)
+   5       2  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AGG AGG   (1.0000)
+   6       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+   7       2  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) : GAG GAG GAG GAG GAG   (0.9999)
+   8       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+   9       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  10       1  AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) : AGA AGA AGA AGA AGA   (0.9999)
+  11       2  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) : ACT ACT ACT ACT ACT   (1.0000)
+  12       2  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) : CTG CTG CTG CTG CTG   (1.0000)
+  13       2  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) : AAA AAA AAA AAA AAA   (0.9999)
+  14       1  AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) : AGA AGA AGA AAA AGG   (0.9789)
+  15       1  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) : TTG TTG TTG TTG TTT   (0.9894)
+  16       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  17       1  ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) : CTG ATG CTG CTG CTG   (0.8701)
+  18       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+  19       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+  20       2  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) : TAC TAC TAC TAC TAC   (1.0000)
+  21       1  AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AAG AGG   (0.9935)
+  22       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  23       1  ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) : ATC ATC ATC GTC ATC   (0.9950)
+  24       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  25       1  CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) : CTA CTA CTA CTA CTA   (1.0000)
+  26       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  27       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  28       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  29       1  ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) : ATG ATG GTG GTG ATG   (0.9668)
+  30       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  31       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+  32       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  33       2  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) : AAA AAA AAA AAA AAA   (0.9999)
+  34       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  35       2  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) : GAG GAG GAG GAG GAG   (0.9999)
+  36       1  AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) : AGT AGT AGT AGT AGT   (0.9998)
+  37       1  GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) : GAT GGT GAT GGT GAT   (0.6376)
+  38       1  TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (0.9616)
+  39       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  40       2  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) : ACA ACA ACA ACA ACA   (1.0000)
+  41       1  CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) : CGA CGA CAA GAA CGT   (0.9586)
+  42       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  43       2  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) : ACA ACA ACA ACA ACA   (1.0000)
+  44       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  45       2  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) : TAC TAC TAC TAC TAC   (1.0000)
+  46       2  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9998)
+  47       1  GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) : CCT CCT CCT CCT CCT   (0.9979)
+  48       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  49       1  GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) : GAC GAC GAC GAC GAC   (0.9890)
+  50       1  AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) : CAA AGA CAA GAA CAA   (0.9726)
+  51       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  52       2  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) : ACT ACT ACT ACT ACT   (1.0000)
+  53       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+  54       1  TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (1.0000)
+  55       1  GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) : GGG GGG GGG GGG GGG   (1.0000)
+  56       1  ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) : ATA ATA ATA ATA ATA   (1.0000)
+  57       1  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) : TTT TTT TTT TTT TTT   (1.0000)
+  58       2  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) : CAG CAG CAG CAG CAG   (1.0000)
+  59       2  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) : ATC ATC ATC ATC ATC   (1.0000)
+  60       2  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9998)
+  61       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  62       1  CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) : CAC CGC CAC CGC CAC   (0.7383)
+  63       1  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) : TAC TAC TAC TAC TAT   (0.9908)
+  64       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  65       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  66       1  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) : AAT AAT AAT AAT AAT   (0.8888)
+  67       1  GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT GAT AAT AAT AAT   (0.9642)
+  68       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+  69       1  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) : AAA AAA AAA AAA AGA   (0.9899)
+  70       1  ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) : ACC ACC ACC ACC ACC   (1.0000)
+  71       2  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) : CCA CCA CCA CCA CCA   (1.0000)
+  72       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  73       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  74       2  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9999)
+  75       1  AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9638)
+  76       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  77       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  78       1  CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) : CAT CAT CAT CAT CAT   (1.0000)
+  79       1  TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) : ATA TTA ATA ATA ATA   (0.9961)
+  80       1  TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) : TCC TCC TCC TCC TCC   (1.0000)
+  81       1  TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) : TGC TGC TGC TGC TGC   (1.0000)
+  82       1  AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9225)
+  83       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  84       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+  85       2  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) : CTG CTG CTG CTG CTG   (1.0000)
+  86       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+  87       1  GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) : GAT GAT GAT AAT GAT   (0.9799)
+  88       1  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) : AAC AAC AAC AAC GAC   (0.9905)
+  89       2  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) : ATC ATC ATC ATC ATC   (1.0000)
+  90       1  GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) : GCT GCT GCT GCT ACT   (0.9782)
+  91       1  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) : GAT GAT GAT GAT GAA   (0.9916)
+  92       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  93       1  GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) : GTA GTA GTA GTA GTG   (0.9911)
+  94       1  GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) : GCT GCT GCT GCT GCC   (0.9373)
+  95       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  96       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  97       2  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) : AAG AAG AAG AAG AAG   (0.9999)
+  98       2  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AGG AGG   (1.0000)
+  99       2  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9999)
+ 100       1  GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) : GTC GTC GTC GTC GTC   (1.0000)
+ 101       1  CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) : CGT CGT AGT AGT CGT   (0.9153)
+ 102       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+ 103       2  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) : CCA CCA CCA CCA CCA   (1.0000)
+ 104       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+ 105       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+ 106       1  ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) : ATT ATT ATT ATT ATT   (0.9779)
+ 107       1  AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) : AGA AGA AGA CGA AGA   (0.9144)
+ 108       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+ 109       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+ 110       1  GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) : GTG GTG GTG GTG GTG   (1.0000)
+ 111       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+ 112       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+ 113       1  AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) : AGA AGA AGA AGA AAA   (0.9527)
+ 114       1  AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) : AAT AAT AAT AAT GCT   (0.9702)
+ 115       1  CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) : CAT CGT CAC CAC CAT   (0.9482)
+ 116       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+ 117       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+ 118       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+ 119       1  AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) : AGA AGA AGA AGA AGA   (0.9748)
+ 120       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+ 121       1  GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) : GTC GTC GTC GTC GTC   (0.9981)
+ 122       1  CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) : AGT CGT AGT AGT AGT   (0.9920)
+ 123       2  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) : CAG CAG CAG CAG CAG   (1.0000)
+ 124       1  TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (0.9932)
+ 125       1  GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9882)
+ 126       1  CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA GAA CAA   (0.5165)
+ 127       1  GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) : GGT GGT GGT GGT GGT   (0.9998)
+ 128       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+ 129       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+ 130       1  GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) : GTG GTG GTG GTG GTA   (0.7827)
+
+
+List of extant and reconstructed sequences
+
+Hsa_Human         AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon        AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus   AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM     AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset      AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+node #8           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #9           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #10          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #11          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTG 
+node #12          AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+
+
+TREE #  2
+
+Ancestral reconstruction by CODONML.
+
+((Hsa_Human: 0.007565, Hla_gibbon: 0.011517): 0.020710, ((Cgu/Can_colobus: 0.012982, Pne_langur: 0.015429): 0.032838, Mmu_rhesus: 0.005766): 0.012982, (Ssc_squirrelM: 0.012140, Cja_marmoset: 0.007035): 0.035889);
+
+((1, 2), ((3, 4), 5), (6, 7));
+
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+
+tree with node labels for Rod Page's TreeView
+((1_Hsa_Human: 0.007565, 2_Hla_gibbon: 0.011517) 9 : 0.020710, ((3_Cgu/Can_colobus: 0.012982, 4_Pne_langur: 0.015429) 11 : 0.032838, 5_Mmu_rhesus: 0.005766) 10 : 0.012982, (6_Ssc_squirrelM: 0.012140, 7_Cja_marmoset: 0.007035) 12 : 0.035889) 8 ;
+
+Nodes 8 to 12 are ancestral
+
+(1) Marginal reconstruction of ancestral sequences (eqn. 4 in Yang et al. 1995 Genetics 141:1641-1650).
+
+
+Prob of best character at each node, listed by site 
+
+Site   Freq   Data: 
+
+   1      2   AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) :  AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000)
+   2      1   GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) :  GTC V 0.967 (V 0.967) GTC V 1.000 (V 1.000) ATC I 0.992 (I 0.992) ATC I 1.000 (I 1.000) GTC V 1.000 (V 1.000)
+   3      1   TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) :  TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 0.985 (F 1.000)
+   4      1   GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) :  GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000) GAA E 1.000 (E 1.000)
+   5      2   AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000)
+   6      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+   7      2   GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) :  GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000)
+   8      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+   9      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  10      1   AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) :  AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000)
+  11      2   ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) :  ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000)
+  12      2   CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) :  CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000)
+  13      2   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) :  AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000)
+  14      1   AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) :  AGA R 0.990 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.998 (R 0.998) AAA K 0.994 (K 0.994) AGG R 0.998 (R 1.000)
+  15      1   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) :  TTG L 0.999 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTT F 0.990 (F 0.990)
+  16      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  17      1   ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) :  CTG L 0.871 (L 0.871) ATG M 1.000 (M 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 0.871 (L 0.871)
+  18      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+  19      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+  20      2   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) :  TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000)
+  21      1   AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 0.998 (R 0.998) AAG K 0.995 (K 0.995) AGG R 1.000 (R 1.000)
+  22      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  23      1   ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 0.999 (I 0.999) GTC V 0.996 (V 0.996) ATC I 1.000 (I 1.000)
+  24      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  25      1   CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) :  CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000) CTA L 1.000 (L 1.000)
+  26      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  27      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  28      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  29      1   ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) :  ATG M 0.974 (M 0.974) ATG M 1.000 (M 1.000) GTG V 0.993 (V 0.993) GTG V 1.000 (V 1.000) ATG M 1.000 (M 1.000)
+  30      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  31      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+  32      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  33      2   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) :  AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000)
+  34      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  35      2   GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) :  GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000) GAG E 1.000 (E 1.000)
+  36      1   AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) :  AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000)
+  37      1   GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) :  GAT D 0.742 (D 0.743) GGT G 0.996 (G 0.996) GAT D 0.696 (D 0.696) GGT G 0.993 (G 0.993) GAT D 0.959 (D 0.999)
+  38      1   TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 0.999 (Y 1.000) TAT Y 0.962 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000)
+  39      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  40      2   ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) :  ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000)
+  41      1   CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) :  CGA R 0.977 (R 0.978) CGA R 1.000 (R 1.000) CAA Q 0.990 (Q 0.990) GAA E 0.992 (E 0.992) CGT R 1.000 (R 1.000)
+  42      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  43      2   ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) :  ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000) ACA T 1.000 (T 1.000)
+  44      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+  45      2   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) :  TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000)
+  46      2   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  47      1   GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) :  CCT P 1.000 (P 1.000) CCT P 0.998 (P 0.998) CCT P 1.000 (P 1.000) CCT P 1.000 (P 1.000) CCT P 1.000 (P 1.000)
+  48      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  49      1   GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) :  GAC D 1.000 (D 1.000) GAC D 1.000 (D 1.000) GAC D 1.000 (D 1.000) GAC D 0.989 (D 1.000) GAC D 1.000 (D 1.000)
+  50      1   AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 0.980 (Q 0.980) AGA R 0.994 (R 0.995) CAA Q 0.990 (Q 0.990) GAA E 0.998 (E 0.998) CAA Q 1.000 (Q 1.000)
+  51      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  52      2   ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) :  ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000) ACT T 1.000 (T 1.000)
+  53      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+  54      1   TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000)
+  55      1   GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) :  GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000) GGG G 1.000 (G 1.000)
+  56      1   ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) :  ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000)
+  57      1   TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) :  TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000) TTT F 1.000 (F 1.000)
+  58      2   CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) :  CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000)
+  59      2   ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000)
+  60      2   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  61      3   AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) :  AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000) AGC S 1.000 (S 1.000)
+  62      1   CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) :  CAC H 0.742 (H 0.742) CGC R 0.999 (R 0.999) CAC H 0.748 (H 0.748) CGC R 0.998 (R 0.998) CAC H 1.000 (H 1.000)
+  63      1   TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) :  TAC Y 0.994 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAC Y 1.000 (Y 1.000) TAT Y 0.997 (Y 1.000)
+  64      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+  65      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  66      1   AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) :  AAT N 0.998 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.889 (N 1.000)
+  67      1   GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) :  AAT N 0.970 (N 0.970) GAT D 0.994 (D 0.994) AAT N 0.999 (N 0.999) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000)
+  68      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+  69      1   AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) :  AAA K 0.992 (K 0.992) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AAA K 1.000 (K 1.000) AGA R 0.998 (R 0.998)
+  70      1   ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) :  ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000) ACC T 1.000 (T 1.000)
+  71      2   CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) :  CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000)
+  72      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+  73      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  74      2   GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+  75      1   AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) :  AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.964 (N 0.964) AAT N 1.000 (N 1.000)
+  76      3   GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) :  GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000) GCC A 1.000 (A 1.000)
+  77      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  78      1   CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) :  CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000) CAT H 1.000 (H 1.000)
+  79      1   TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) :  ATA I 0.997 (I 0.997) TTA L 1.000 (L 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000) ATA I 1.000 (I 1.000)
+  80      1   TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) :  TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000) TCC S 1.000 (S 1.000)
+  81      1   TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) :  TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000) TGC C 1.000 (C 1.000)
+  82      1   AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) :  AAT N 0.998 (N 0.998) AAT N 0.957 (N 0.957) AAT N 0.999 (N 0.999) AAT N 0.963 (N 0.963) AAT N 1.000 (N 1.000)
+  83      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  84      3   TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) :  TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000) TTG L 1.000 (L 1.000)
+  85      2   CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) :  CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000) CTG L 1.000 (L 1.000)
+  86      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+  87      1   GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 0.996 (D 0.997) AAT N 0.983 (N 0.993) GAT D 1.000 (D 1.000)
+  88      1   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) :  AAC N 0.993 (N 0.993) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) GAC D 0.998 (D 0.998)
+  89      2   ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) :  ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000) ATC I 1.000 (I 1.000)
+  90      1   GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) :  GCT A 0.994 (A 0.994) GCT A 0.985 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) ACT T 0.998 (T 0.998)
+  91      1   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) :  GAT D 0.997 (D 0.997) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAA E 0.995 (E 0.995)
+  92      3   GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) :  GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000) GCT A 1.000 (A 1.000)
+  93      1   GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) :  GTA V 0.993 (V 1.000) GTA V 1.000 (V 1.000) GTA V 1.000 (V 1.000) GTA V 1.000 (V 1.000) GTG V 0.998 (V 1.000)
+  94      1   GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) :  GCT A 0.990 (A 0.998) GCT A 1.000 (A 1.000) GCT A 0.947 (A 0.947) GCT A 1.000 (A 1.000) GCC A 0.998 (A 1.000)
+  95      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+  96      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+  97      2   AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) :  AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000) AAG K 1.000 (K 1.000)
+  98      2   AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) :  AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000) AGG R 1.000 (R 1.000)
+  99      2   GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+ 100      1   GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) :  GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000)
+ 101      1   CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) :  CGT R 0.973 (R 0.993) CGT R 0.970 (R 1.000) AGT S 0.995 (S 0.996) AGT S 1.000 (S 1.000) CGT R 0.935 (R 1.000)
+ 102      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+ 103      2   CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) :  CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000) CCA P 1.000 (P 1.000)
+ 104      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+ 105      3   GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) :  GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000) GGC G 1.000 (G 1.000)
+ 106      1   ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) :  ATT I 1.000 (I 1.000) ATT I 1.000 (I 1.000) ATT I 1.000 (I 1.000) ATT I 0.978 (I 0.978) ATT I 1.000 (I 1.000)
+ 107      1   AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) :  AGA R 0.999 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.996 (R 1.000) CGA R 0.996 (R 1.000) AGA R 0.921 (R 1.000)
+ 108      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+ 109      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+ 110      1   GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) :  GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000) GTG V 1.000 (V 1.000)
+ 111      5   GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) :  GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000) GCA A 1.000 (A 1.000)
+ 112      5   TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) :  TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000) TGG W 1.000 (W 1.000)
+ 113      1   AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) :  AGA R 0.990 (R 0.990) AGA R 1.000 (R 1.000) AGA R 0.997 (R 0.997) AGA R 0.962 (R 0.962) AAA K 0.998 (K 0.998)
+ 114      1   AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) :  AAT N 0.984 (N 0.984) AAT N 1.000 (N 1.000) AAT N 1.000 (N 1.000) AAT N 0.987 (N 0.988) GCT A 0.998 (A 0.998)
+ 115      1   CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) :  CAT H 0.952 (H 0.990) CGT R 0.998 (R 0.998) CAC H 0.998 (H 1.000) CAC H 1.000 (H 1.000) CAT H 1.000 (H 1.000)
+ 116      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+ 117      3   CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000)
+ 118      4   AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) :  AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000) AAC N 1.000 (N 1.000)
+ 119      1   AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) :  AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 1.000 (R 1.000) AGA R 0.975 (R 0.975) AGA R 1.000 (R 1.000)
+ 120      4   GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) :  GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000) GAT D 1.000 (D 1.000)
+ 121      1   GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) :  GTC V 1.000 (V 1.000) GTC V 0.998 (V 0.998) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000) GTC V 1.000 (V 1.000)
+ 122      1   CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) :  AGT S 0.993 (S 0.993) CGT R 0.999 (R 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000) AGT S 1.000 (S 1.000)
+ 123      2   CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) :  CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000) CAG Q 1.000 (Q 1.000)
+ 124      1   TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) :  TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 1.000 (Y 1.000) TAT Y 0.993 (Y 1.000) TAT Y 1.000 (Y 1.000)
+ 125      1   GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) :  GTT V 1.000 (V 1.000) GTT V 0.988 (V 0.988) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000) GTT V 1.000 (V 1.000)
+ 126      1   CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) :  CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) CAA Q 1.000 (Q 1.000) GAA E 0.517 (E 0.517) CAA Q 1.000 (Q 1.000)
+ 127      1   GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) :  GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000) GGT G 1.000 (G 1.000)
+ 128      7   TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) :  TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000) TGT C 1.000 (C 1.000)
+ 129      5   GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) :  GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000) GGA G 1.000 (G 1.000)
+ 130      1   GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) :  GTG V 0.797 (V 1.000) GTG V 0.795 (V 1.000) GTG V 0.942 (V 1.000) GTG V 0.937 (V 1.000) GTA V 0.998 (V 1.000)
+
+Summary of changes along branches.
+Check root for directions of change.
+
+Branch 1:    8..9  (n= 8.5 s= 0.5)
+
+	  17 L 0.871 -> M 1.000
+	  37 D 0.743 -> G 0.996
+	  50 Q 0.980 -> R 0.995
+	  62 H 0.742 -> R 0.999
+	  67 N 0.970 -> D 0.994
+	  79 I 0.997 -> L 1.000
+	 115 H 0.990 -> R 0.998
+	 122 S 0.993 -> R 1.000
+
+
+Branch 2:    9..1  (Hsa_Human)  (n= 2.0 s= 1.0)
+
+	  47 P 0.998 -> A
+	  82 N 0.957 -> S
+
+
+Branch 3:    9..2  (Hla_gibbon)  (n= 2.0 s= 3.0)
+
+	 121 V 0.998 -> L
+	 125 V 0.988 -> I
+
+
+Branch 4:    8..10 (n= 4.0 s= 1.0)
+
+	   2 V 0.967 -> I 0.992
+	  29 M 0.974 -> V 0.993
+	  41 R 0.978 -> Q 0.990
+	 101 R 0.993 -> S 0.996
+
+
+Branch 5:   10..11 (n= 9.0 s= 1.0)
+
+	  14 R 0.998 -> K 0.994
+	  21 R 0.998 -> K 0.995
+	  23 I 0.999 -> V 0.996
+	  37 D 0.696 -> G 0.993
+	  41 Q 0.990 -> E 0.992
+	  50 Q 0.990 -> E 0.998
+	  62 H 0.748 -> R 0.998
+	  87 D 0.997 -> N 0.993
+	 126 Q 1.000 -> E 0.517
+
+
+Branch 6:   11..3  (Cgu/Can_colobus)  (n= 3.0 s= 2.0)
+
+	  41 E 0.992 -> D
+	 113 R 0.962 -> K
+	 114 N 0.988 -> K
+
+
+Branch 7:   11..4  (Pne_langur)  (n= 5.0 s= 2.0)
+
+	  75 N 0.964 -> D
+	  82 N 0.963 -> S
+	 106 I 0.978 -> V
+	 119 R 0.975 -> K
+	 126 E 0.517 -> K
+
+
+Branch 8:   10..5  (Mmu_rhesus)  (n= 2.0 s= 0.0)
+
+	  37 D 0.696 -> N
+	  94 A 0.947 -> T
+
+
+Branch 9:    8..12 (n= 8.0 s= 6.0)
+
+	  15 L 1.000 -> F 0.990
+	  69 K 0.992 -> R 0.998
+	  88 N 0.993 -> D 0.998
+	  90 A 0.994 -> T 0.998
+	  91 D 0.997 -> E 0.995
+	 113 R 0.990 -> K 0.998
+	 114 N 0.984 -> A 0.998
+
+
+Branch 10:   12..6  (Ssc_squirrelM)  (n= 3.0 s= 2.0)
+
+	  15 F 0.990 -> L
+	  17 L 0.871 -> M
+	  91 E 0.995 -> Q
+
+
+Branch 11:   12..7  (Cja_marmoset)  (n= 0.0 s= 3.0)
+
+
+
+
+
+List of extant and reconstructed sequences
+
+Hsa_Human         AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon        AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus   AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM     AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset      AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+node #8           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #9           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #10          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #11          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTG 
+node #12          AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+
+Overall accuracy of the 5 ancestral sequences:
+  0.99108  0.99716  0.99439  0.99393  0.99633
+for a site.
+
+  0.27875  0.67341  0.43633  0.37872  0.60699
+for the sequence.
+
+
+Amino acid sequences inferred by codonml.
+
+Node #8           KVFERCELAR TLKRLGLDGY RGISLANWMC LAKWESDYNT RATNYNPGDQ STDYGIFQIN SHYWCNNGKT PGAVNACHIS CNALLQDNIA DAVACAKRVV RDPQGIRAWV AWRNHCQNRD VSQYVQGCGV 
+Node #9           KVFERCELAR TLKRLGMDGY RGISLANWMC LAKWESGYNT RATNYNPGDR STDYGIFQIN SRYWCNDGKT PGAVNACHLS CNALLQDNIA DAVACAKRVV RDPQGIRAWV AWRNRCQNRD VRQYVQGCGV 
+Node #10          KIFERCELAR TLKRLGLDGY RGISLANWVC LAKWESDYNT QATNYNPGDQ STDYGIFQIN SHYWCNNGKT PGAVNACHIS CNALLQDNIA DAVACAKRVV SDPQGIRAWV AWRNHCQNRD VSQYVQGCGV 
+Node #11          KIFERCELAR TLKKLGLDGY KGVSLANWVC LAKWESGYNT EATNYNPGDE STDYGIFQIN SRYWCNNGKT PGAVNACHIS CNALLQNNIA DAVACAKRVV SDPQGIRAWV AWRNHCQNRD VSQYVEGCGV 
+Node #12          KVFERCELAR TLKRFGLDGY RGISLANWMC LAKWESDYNT RATNYNPGDQ STDYGIFQIN SHYWCNNGRT PGAVNACHIS CNALLQDDIT EAVACAKRVV RDPQGIRAWV AWKAHCQNRD VSQYVQGCGV 
+
+
+Changes at sites (syn nonsyn).
+
+   1  (0.0  0.0)
+   2  GTC.ATC  (0.0  1.0)
+   3  TTT.TTC  (1.0  0.0)
+   4  (0.0  0.0)
+   5  (0.0  0.0)
+   6  (0.0  0.0)
+   7  (0.0  0.0)
+   8  (0.0  0.0)
+   9  (0.0  0.0)
+  10  (0.0  0.0)
+  11  (0.0  0.0)
+  12  (0.0  0.0)
+  13  (0.0  0.0)
+  14  AGA.AAA  AGA.AGG  (1.0  1.0)
+  15  TTT.CTT  TTG.TTT  (0.0  2.0)
+  16  (0.0  0.0)
+  17  CTG.ATG  CTG.ATG  (0.0  2.0)
+  18  (0.0  0.0)
+  19  (0.0  0.0)
+  20  (0.0  0.0)
+  21  AGG.AAG  (0.0  1.0)
+  22  (0.0  0.0)
+  23  ATC.GTC  (0.0  1.0)
+  24  (0.0  0.0)
+  25  (0.0  0.0)
+  26  (0.0  0.0)
+  27  (0.0  0.0)
+  28  (0.0  0.0)
+  29  ATG.GTG  (0.0  1.0)
+  30  (0.0  0.0)
+  31  (0.0  0.0)
+  32  (0.0  0.0)
+  33  (0.0  0.0)
+  34  (0.0  0.0)
+  35  (0.0  0.0)
+  36  (0.0  0.0)
+  37  GAT.AAT  GAT.GAC  GAT.GGT  GAT.GGT  (1.0  3.0)
+  38  TAT.TAC  (1.0  0.0)
+  39  (0.0  0.0)
+  40  (0.0  0.0)
+  41  GAA.GAC  CGA.CAA  CAA.GAA  CGA.CGT  (1.0  3.0)
+  42  (0.0  0.0)
+  43  (0.0  0.0)
+  44  (0.0  0.0)
+  45  (0.0  0.0)
+  46  (0.0  0.0)
+  47  CCT.GCT  (0.0  1.0)
+  48  (0.0  0.0)
+  49  GAC.GAT  (1.0  0.0)
+  50  CAA.AGA  CAA.GAA  (0.5  2.5)
+  51  (0.0  0.0)
+  52  (0.0  0.0)
+  53  (0.0  0.0)
+  54  (0.0  0.0)
+  55  (0.0  0.0)
+  56  (0.0  0.0)
+  57  (0.0  0.0)
+  58  (0.0  0.0)
+  59  (0.0  0.0)
+  60  (0.0  0.0)
+  61  (0.0  0.0)
+  62  CAC.CGC  CAC.CGC  (0.0  2.0)
+  63  TAC.TAT  (1.0  0.0)
+  64  (0.0  0.0)
+  65  (0.0  0.0)
+  66  AAT.AAC  (1.0  0.0)
+  67  AAT.GAT  (0.0  1.0)
+  68  (0.0  0.0)
+  69  AAA.AGA  (0.0  1.0)
+  70  (0.0  0.0)
+  71  (0.0  0.0)
+  72  (0.0  0.0)
+  73  (0.0  0.0)
+  74  (0.0  0.0)
+  75  AAT.GAT  (0.0  1.0)
+  76  (0.0  0.0)
+  77  (0.0  0.0)
+  78  (0.0  0.0)
+  79  ATA.TTA  (0.0  1.0)
+  80  (0.0  0.0)
+  81  (0.0  0.0)
+  82  AAT.AGT  AAT.AGT  (0.0  2.0)
+  83  (0.0  0.0)
+  84  (0.0  0.0)
+  85  (0.0  0.0)
+  86  (0.0  0.0)
+  87  AAT.AAC  GAT.AAT  (1.0  1.0)
+  88  AAC.GAC  (0.0  1.0)
+  89  (0.0  0.0)
+  90  GCT.GCC  GCT.ACT  (1.0  1.0)
+  91  GAA.CAA  GAT.GAA  (0.0  2.0)
+  92  (0.0  0.0)
+  93  GTA.GTG  (1.0  0.0)
+  94  GCT.ACT  GCT.GCC  (1.0  1.0)
+  95  (0.0  0.0)
+  96  (0.0  0.0)
+  97  (0.0  0.0)
+  98  (0.0  0.0)
+  99  (0.0  0.0)
+ 100  (0.0  0.0)
+ 101  CGT.CGC  CGT.CGC  CGT.AGT  (2.0  1.0)
+ 102  (0.0  0.0)
+ 103  (0.0  0.0)
+ 104  (0.0  0.0)
+ 105  (0.0  0.0)
+ 106  ATT.GTT  (0.0  1.0)
+ 107  AGA.AGG  AGA.CGA  (2.0  0.0)
+ 108  (0.0  0.0)
+ 109  (0.0  0.0)
+ 110  (0.0  0.0)
+ 111  (0.0  0.0)
+ 112  (0.0  0.0)
+ 113  AGA.AAA  AGA.AAA  (0.0  2.0)
+ 114  AAT.AAG  AAT.GCT  (0.0  3.0)
+ 115  CAT.CGT  CAT.CAC  (1.0  1.0)
+ 116  (0.0  0.0)
+ 117  (0.0  0.0)
+ 118  (0.0  0.0)
+ 119  AGA.AAA  (0.0  1.0)
+ 120  (0.0  0.0)
+ 121  GTC.CTC  (0.0  1.0)
+ 122  AGT.CGT  (0.0  1.0)
+ 123  (0.0  0.0)
+ 124  TAT.TAC  (1.0  0.0)
+ 125  GTT.ATT  (0.0  1.0)
+ 126  GAA.AAA  CAA.GAA  (0.0  2.0)
+ 127  (0.0  0.0)
+ 128  (0.0  0.0)
+ 129  (0.0  0.0)
+ 130  GTG.GTA  GTG.GTA  GTG.GTA  (3.0  0.0)
+
+
+(2) Joint reconstruction of ancestral sequences (eqn. 2 in Yang et al. 1995 Genetics 141:1641-1650), using the algorithm of Pupko et al. (2000 Mol Biol Evol 17:890-896).
+
+Listed by site, reconstruction (prob.)
+
+
+Site   Freq   Data: 
+
+   1       2  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) : AAG AAG AAG AAG AAG   (0.9999)
+   2       1  GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) GTC (V) GTC (V) : GTC GTC ATC ATC GTC   (0.9589)
+   3       1  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTC (F) TTT (F) : TTT TTT TTT TTT TTT   (0.9846)
+   4       1  GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) GAA (E) : GAA GAA GAA GAA GAA   (0.9999)
+   5       2  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AGG AGG   (1.0000)
+   6       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+   7       2  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) : GAG GAG GAG GAG GAG   (0.9999)
+   8       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+   9       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  10       1  AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) AGA (R) : AGA AGA AGA AGA AGA   (0.9999)
+  11       2  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) : ACT ACT ACT ACT ACT   (1.0000)
+  12       2  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) : CTG CTG CTG CTG CTG   (1.0000)
+  13       2  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) : AAA AAA AAA AAA AAA   (0.9999)
+  14       1  AGA (R) AGA (R) AAA (K) AAA (K) AGA (R) AGG (R) AGG (R) : AGA AGA AGA AAA AGG   (0.9789)
+  15       1  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) CTT (L) TTT (F) : TTG TTG TTG TTG TTT   (0.9894)
+  16       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  17       1  ATG (M) ATG (M) CTG (L) CTG (L) CTG (L) ATG (M) CTG (L) : CTG ATG CTG CTG CTG   (0.8701)
+  18       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+  19       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+  20       2  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) : TAC TAC TAC TAC TAC   (1.0000)
+  21       1  AGG (R) AGG (R) AAG (K) AAG (K) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AAG AGG   (0.9935)
+  22       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  23       1  ATC (I) ATC (I) GTC (V) GTC (V) ATC (I) ATC (I) ATC (I) : ATC ATC ATC GTC ATC   (0.9950)
+  24       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  25       1  CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) CTA (L) : CTA CTA CTA CTA CTA   (1.0000)
+  26       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  27       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  28       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  29       1  ATG (M) ATG (M) GTG (V) GTG (V) GTG (V) ATG (M) ATG (M) : ATG ATG GTG GTG ATG   (0.9668)
+  30       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  31       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+  32       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  33       2  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) : AAA AAA AAA AAA AAA   (0.9999)
+  34       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  35       2  GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) GAG (E) : GAG GAG GAG GAG GAG   (0.9999)
+  36       1  AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) : AGT AGT AGT AGT AGT   (0.9998)
+  37       1  GGT (G) GGT (G) GGT (G) GGT (G) AAT (N) GAC (D) GAT (D) : GAT GGT GAT GGT GAT   (0.6376)
+  38       1  TAC (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (0.9616)
+  39       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  40       2  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) : ACA ACA ACA ACA ACA   (1.0000)
+  41       1  CGA (R) CGA (R) GAC (D) GAA (E) CAA (Q) CGT (R) CGT (R) : CGA CGA CAA GAA CGT   (0.9586)
+  42       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  43       2  ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) ACA (T) : ACA ACA ACA ACA ACA   (1.0000)
+  44       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+  45       2  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) : TAC TAC TAC TAC TAC   (1.0000)
+  46       2  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9998)
+  47       1  GCT (A) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) CCT (P) : CCT CCT CCT CCT CCT   (0.9979)
+  48       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  49       1  GAC (D) GAC (D) GAT (D) GAC (D) GAC (D) GAC (D) GAC (D) : GAC GAC GAC GAC GAC   (0.9890)
+  50       1  AGA (R) AGA (R) GAA (E) GAA (E) CAA (Q) CAA (Q) CAA (Q) : CAA AGA CAA GAA CAA   (0.9726)
+  51       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  52       2  ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) ACT (T) : ACT ACT ACT ACT ACT   (1.0000)
+  53       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+  54       1  TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (1.0000)
+  55       1  GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) GGG (G) : GGG GGG GGG GGG GGG   (1.0000)
+  56       1  ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) : ATA ATA ATA ATA ATA   (1.0000)
+  57       1  TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) TTT (F) : TTT TTT TTT TTT TTT   (1.0000)
+  58       2  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) : CAG CAG CAG CAG CAG   (1.0000)
+  59       2  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) : ATC ATC ATC ATC ATC   (1.0000)
+  60       2  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9998)
+  61       3  AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) AGC (S) : AGC AGC AGC AGC AGC   (0.9999)
+  62       1  CGC (R) CGC (R) CGC (R) CGC (R) CAC (H) CAC (H) CAC (H) : CAC CGC CAC CGC CAC   (0.7383)
+  63       1  TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAC (Y) TAT (Y) TAT (Y) : TAC TAC TAC TAC TAT   (0.9908)
+  64       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+  65       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  66       1  AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) AAC (N) : AAT AAT AAT AAT AAT   (0.8888)
+  67       1  GAT (D) GAT (D) AAT (N) AAT (N) AAT (N) AAT (N) AAT (N) : AAT GAT AAT AAT AAT   (0.9642)
+  68       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+  69       1  AAA (K) AAA (K) AAA (K) AAA (K) AAA (K) AGA (R) AGA (R) : AAA AAA AAA AAA AGA   (0.9899)
+  70       1  ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) ACC (T) : ACC ACC ACC ACC ACC   (1.0000)
+  71       2  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) : CCA CCA CCA CCA CCA   (1.0000)
+  72       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+  73       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  74       2  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9999)
+  75       1  AAT (N) AAT (N) AAT (N) GAT (D) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9638)
+  76       3  GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) GCC (A) : GCC GCC GCC GCC GCC   (1.0000)
+  77       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  78       1  CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) CAT (H) : CAT CAT CAT CAT CAT   (1.0000)
+  79       1  TTA (L) TTA (L) ATA (I) ATA (I) ATA (I) ATA (I) ATA (I) : ATA TTA ATA ATA ATA   (0.9961)
+  80       1  TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) TCC (S) : TCC TCC TCC TCC TCC   (1.0000)
+  81       1  TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) TGC (C) : TGC TGC TGC TGC TGC   (1.0000)
+  82       1  AGT (S) AAT (N) AAT (N) AGT (S) AAT (N) AAT (N) AAT (N) : AAT AAT AAT AAT AAT   (0.9224)
+  83       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  84       3  TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) TTG (L) : TTG TTG TTG TTG TTG   (1.0000)
+  85       2  CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) CTG (L) : CTG CTG CTG CTG CTG   (1.0000)
+  86       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+  87       1  GAT (D) GAT (D) AAT (N) AAC (N) GAT (D) GAT (D) GAT (D) : GAT GAT GAT AAT GAT   (0.9799)
+  88       1  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) GAC (D) GAC (D) : AAC AAC AAC AAC GAC   (0.9905)
+  89       2  ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) ATC (I) : ATC ATC ATC ATC ATC   (1.0000)
+  90       1  GCT (A) GCC (A) GCT (A) GCT (A) GCT (A) ACT (T) ACT (T) : GCT GCT GCT GCT ACT   (0.9782)
+  91       1  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) CAA (Q) GAA (E) : GAT GAT GAT GAT GAA   (0.9916)
+  92       3  GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) GCT (A) : GCT GCT GCT GCT GCT   (1.0000)
+  93       1  GTA (V) GTA (V) GTA (V) GTA (V) GTA (V) GTG (V) GTG (V) : GTA GTA GTA GTA GTG   (0.9911)
+  94       1  GCT (A) GCT (A) GCT (A) GCT (A) ACT (T) GCC (A) GCC (A) : GCT GCT GCT GCT GCC   (0.9373)
+  95       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+  96       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+  97       2  AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) AAG (K) : AAG AAG AAG AAG AAG   (0.9999)
+  98       2  AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) AGG (R) : AGG AGG AGG AGG AGG   (1.0000)
+  99       2  GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9999)
+ 100       1  GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) : GTC GTC GTC GTC GTC   (1.0000)
+ 101       1  CGT (R) CGC (R) AGT (S) AGT (S) AGT (S) CGT (R) CGC (R) : CGT CGT AGT AGT CGT   (0.9153)
+ 102       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+ 103       2  CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) CCA (P) : CCA CCA CCA CCA CCA   (1.0000)
+ 104       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+ 105       3  GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) GGC (G) : GGC GGC GGC GGC GGC   (0.9999)
+ 106       1  ATT (I) ATT (I) ATT (I) GTT (V) ATT (I) ATT (I) ATT (I) : ATT ATT ATT ATT ATT   (0.9779)
+ 107       1  AGA (R) AGA (R) CGA (R) CGA (R) AGA (R) AGA (R) AGG (R) : AGA AGA AGA CGA AGA   (0.9144)
+ 108       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+ 109       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+ 110       1  GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) GTG (V) : GTG GTG GTG GTG GTG   (1.0000)
+ 111       5  GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) GCA (A) : GCA GCA GCA GCA GCA   (1.0000)
+ 112       5  TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) TGG (W) : TGG TGG TGG TGG TGG   (1.0000)
+ 113       1  AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AAA (K) AAA (K) : AGA AGA AGA AGA AAA   (0.9527)
+ 114       1  AAT (N) AAT (N) AAG (K) AAT (N) AAT (N) GCT (A) GCT (A) : AAT AAT AAT AAT GCT   (0.9702)
+ 115       1  CGT (R) CGT (R) CAC (H) CAC (H) CAC (H) CAT (H) CAT (H) : CAT CGT CAC CAC CAT   (0.9482)
+ 116       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+ 117       3  CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA CAA CAA   (1.0000)
+ 118       4  AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) AAC (N) : AAC AAC AAC AAC AAC   (0.9999)
+ 119       1  AGA (R) AGA (R) AGA (R) AAA (K) AGA (R) AGA (R) AGA (R) : AGA AGA AGA AGA AGA   (0.9748)
+ 120       4  GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) GAT (D) : GAT GAT GAT GAT GAT   (0.9998)
+ 121       1  GTC (V) CTC (L) GTC (V) GTC (V) GTC (V) GTC (V) GTC (V) : GTC GTC GTC GTC GTC   (0.9981)
+ 122       1  CGT (R) CGT (R) AGT (S) AGT (S) AGT (S) AGT (S) AGT (S) : AGT CGT AGT AGT AGT   (0.9920)
+ 123       2  CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) CAG (Q) : CAG CAG CAG CAG CAG   (1.0000)
+ 124       1  TAT (Y) TAT (Y) TAT (Y) TAC (Y) TAT (Y) TAT (Y) TAT (Y) : TAT TAT TAT TAT TAT   (0.9932)
+ 125       1  GTT (V) ATT (I) GTT (V) GTT (V) GTT (V) GTT (V) GTT (V) : GTT GTT GTT GTT GTT   (0.9882)
+ 126       1  CAA (Q) CAA (Q) GAA (E) AAA (K) CAA (Q) CAA (Q) CAA (Q) : CAA CAA CAA GAA CAA   (0.5165)
+ 127       1  GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) GGT (G) : GGT GGT GGT GGT GGT   (0.9998)
+ 128       7  TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) TGT (C) : TGT TGT TGT TGT TGT   (1.0000)
+ 129       5  GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) GGA (G) : GGA GGA GGA GGA GGA   (0.9999)
+ 130       1  GTG (V) GTA (V) GTA (V) GTG (V) GTG (V) GTA (V) GTA (V) : GTG GTG GTG GTG GTA   (0.7827)
+
+
+List of extant and reconstructed sequences
+
+Hsa_Human         AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon        AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus   AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus        AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM     AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset      AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+node #8           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #9           AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #10          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+node #11          AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTG 
+node #12          AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rst1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+  1    -0.00    0.511    0.489   -1.000     0.52     0.00

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rub
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rub	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_lysozyme/rub	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,48 @@
+
+
+TREE # 1
+
+  1 57.0511    911.866966  x:  0.06268   0.02434   0.03918   0.04580   0.06920   0.04580   0.05520   0.02250   0.11892   0.03972   0.02397   2.00271   0.99739   1.00045  
+  2 18.9464    911.763959  x:  0.06764   0.02691   0.03838   0.04277   0.07495   0.04374   0.05085   0.02078   0.12004   0.03946   0.02493   2.00731   0.99290   1.00125  
+  3 17.2055    911.691402  x:  0.06952   0.02343   0.03790   0.04130   0.07813   0.04241   0.05090   0.02232   0.12022   0.03952   0.02308   2.01485   0.98551   1.00259  
+  4 41.0047    910.628609  x:  0.07137   0.02620   0.04439   0.03804   0.09645   0.03989   0.07654   0.01714   0.11898   0.04587   0.02567   2.18502   0.81862   1.03300  
+  5 330.0384    908.660991  x:  0.07033   0.03282   0.03599   0.04791   0.09682   0.04206   0.07188   0.02569   0.11667   0.03794   0.02291   2.45879   0.55009   1.08221  
+  6 273.9810    908.174440  x:  0.06841   0.02641   0.03736   0.04422   0.09012   0.04075   0.06501   0.01808   0.11687   0.04088   0.02517   2.53001   0.48216   1.09516  
+  7 32.0852    908.121354  x:  0.06962   0.02714   0.04113   0.04201   0.08801   0.04202   0.06373   0.01990   0.11788   0.04271   0.02403   2.53971   0.47611   1.09709  
+  8 10.2883    908.049989  x:  0.07394   0.02754   0.03882   0.04039   0.08345   0.04272   0.06595   0.01893   0.12390   0.04249   0.02214   2.56256   0.47895   1.10243  
+  9 43.9147    907.929599  x:  0.07633   0.02582   0.03897   0.04193   0.08067   0.04109   0.06609   0.02170   0.13092   0.04309   0.02766   2.60763   0.48889   1.11319  
+ 10 113.4636    907.253062  x:  0.06522   0.02536   0.04672   0.04661   0.08046   0.03475   0.05796   0.01902   0.14707   0.03974   0.02253   2.85340   0.55093   1.17235  
+ 11 321.0222    906.555179  x:  0.05575   0.02365   0.04095   0.03870   0.08148   0.04813   0.04995   0.01976   0.14572   0.04344   0.02458   3.11682   0.61923   1.23610  
+ 12 218.4010    905.867140  x:  0.06763   0.02337   0.04059   0.04004   0.07126   0.04482   0.04620   0.01794   0.11796   0.04643   0.02419   3.36606   0.68506   1.29708  
+ 13 10.3815    905.858148  x:  0.06797   0.02299   0.04143   0.03927   0.07104   0.04529   0.04644   0.01777   0.11741   0.04411   0.02477   3.36949   0.68592   1.29825  
+ 14  0.9990    904.942858  x:  0.07115   0.02876   0.03573   0.04652   0.08159   0.04513   0.05585   0.02212   0.11829   0.03704   0.02354   3.94359   0.69088   2.38587  
+ 15  0.3806    904.698026  x:  0.07011   0.02501   0.04000   0.04494   0.07830   0.04277   0.05139   0.01930   0.12412   0.04343   0.02357   4.29118   0.68983   2.81855  
+ 16  0.2244    904.645420  x:  0.06969   0.02541   0.03882   0.04333   0.07840   0.04396   0.05141   0.01939   0.12040   0.04029   0.02387   4.54266   0.68846   3.09347  
+ 17  0.0853    904.640565  x:  0.06999   0.02558   0.03888   0.04375   0.07907   0.04392   0.05259   0.01951   0.12142   0.04125   0.02378   4.58691   0.68596   3.19513  
+ 18  0.0476    904.639140  x:  0.07001   0.02561   0.03907   0.04408   0.07910   0.04381   0.05186   0.01956   0.12174   0.04098   0.02376   4.60048   0.68393   3.28862  
+ 19  0.0549    904.636957  x:  0.07016   0.02550   0.03890   0.04390   0.07906   0.04381   0.05212   0.01934   0.12067   0.04109   0.02378   4.57807   0.68577   3.47164  
+ 20  0.0294    904.636560  x:  0.07006   0.02557   0.03891   0.04387   0.07901   0.04390   0.05216   0.01948   0.12134   0.04102   0.02378   4.56178   0.68562   3.50174  
+ 21  0.0035    904.636554  x:  0.07001   0.02557   0.03893   0.04388   0.07905   0.04388   0.05215   0.01949   0.12130   0.04104   0.02378   4.56086   0.68574   3.50529  
+ 22  0.0004    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07904   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56116   0.68580   3.50580  
+ 23  0.0001    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07904   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56117   0.68580   3.50577  
+ 24  0.0001    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07904   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56118   0.68580   3.50575  
+ 25  0.0000    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07904   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56118   0.68580   3.50575  
+
+TREE # 2
+
+  1 64.8459    904.711965  x:  0.06603   0.02485   0.04005   0.04745   0.07283   0.04652   0.05480   0.01985   0.11993   0.04141   0.02359   4.56116   0.68595   3.50574  
+  2 17.6419    904.639797  x:  0.07001   0.02633   0.03883   0.04446   0.07825   0.04417   0.05145   0.01925   0.12104   0.04098   0.02420   4.56116   0.68598   3.50574  
+  3  4.6734    904.636959  x:  0.07020   0.02548   0.03884   0.04411   0.07884   0.04400   0.05172   0.01950   0.12114   0.04097   0.02372   4.56116   0.68598   3.50574  
+  4  1.3499    904.636631  x:  0.07016   0.02554   0.03899   0.04395   0.07907   0.04400   0.05210   0.01938   0.12122   0.04106   0.02378   4.56116   0.68597   3.50574  
+  5  0.3002    904.636616  x:  0.07014   0.02557   0.03898   0.04399   0.07911   0.04400   0.05213   0.01944   0.12126   0.04106   0.02378   4.56116   0.68597   3.50574  
+  6  0.1248    904.636613  x:  0.07012   0.02556   0.03896   0.04401   0.07913   0.04399   0.05213   0.01942   0.12130   0.04105   0.02378   4.56116   0.68597   3.50574  
+  7  0.0510    904.636612  x:  0.07011   0.02556   0.03898   0.04400   0.07912   0.04399   0.05212   0.01943   0.12132   0.04106   0.02378   4.56116   0.68597   3.50574  
+  8  0.0209    904.636612  x:  0.07011   0.02556   0.03897   0.04400   0.07912   0.04399   0.05212   0.01943   0.12133   0.04106   0.02378   4.56116   0.68596   3.50574  
+  9  0.0161    904.636612  x:  0.07012   0.02556   0.03897   0.04400   0.07911   0.04399   0.05212   0.01943   0.12134   0.04106   0.02378   4.56116   0.68596   3.50574  
+ 10  0.0156    904.636611  x:  0.07012   0.02556   0.03898   0.04401   0.07911   0.04398   0.05212   0.01943   0.12134   0.04105   0.02378   4.56116   0.68592   3.50574  
+ 11  0.0916    904.636598  x:  0.07010   0.02556   0.03896   0.04398   0.07911   0.04397   0.05212   0.01943   0.12132   0.04105   0.02378   4.56121   0.68506   3.50574  
+ 12  0.0063    904.636596  x:  0.07009   0.02557   0.03896   0.04398   0.07911   0.04396   0.05212   0.01944   0.12132   0.04108   0.02377   4.56122   0.68508   3.50574  
+ 13  0.0706    904.636554  x:  0.07001   0.02557   0.03893   0.04387   0.07905   0.04388   0.05214   0.01948   0.12131   0.04103   0.02378   4.56129   0.68581   3.50574  
+ 14  0.0001    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07905   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56124   0.68581   3.50574  
+ 15  0.0000    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07905   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56122   0.68581   3.50574  
+ 16  0.0000    904.636553  x:  0.07000   0.02557   0.03893   0.04388   0.07905   0.04388   0.05215   0.01949   0.12131   0.04103   0.02378   4.56122   0.68581   3.50574  
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/codeml_nssites.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/codeml_nssites.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/codeml_nssites.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,329 @@
+
+Hsa_Human             AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAC AAC ACA CGA GCT ACA AAC TAC AAT GCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AGT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT GTC CGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Hla_gibbon            AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA CGA GCT ACA AAC TAC AAT CCT GGA GAC AGA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT GAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT TTA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCC GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CGT TGT CAA AAC AGA GAT CTC CGT CAG TAT ATT CAA GGT TGT GGA GTA 
+Cgu/Can_colobus       AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAC GCT ACA AAC TAC AAT CCT GGA GAT GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA AAT AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT CGA GCA TGG GTG GCA TGG AAA AAG CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT GAA GGT TGT GGA GTA 
+Pne_langur            AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AAA TTG GGA CTG GAT GGC TAC AAG GGA GTC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT GGT TAT AAC ACA GAA GCT ACA AAC TAC AAT CCT GGA GAC GAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CGC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT GAT GCC TGT CAT ATA TCC TGC AGT GCT TTG CTG CAA AAC AAC ATC GCT GAT GCT GTA GCT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC GTT CGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AAA GAT GTC AGT CAG TAC GTT AAA GGT TGT GGA GTG 
+Mmu_rhesus            AAG ATC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG GTG TGT TTG GCC AAA TGG GAG AGT AAT TAT AAC ACA CAA GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAC TGG TGT AAT AAT GGC AAA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT AAC ATC GCT GAT GCT GTA ACT TGT GCA AAG AGG GTT GTC AGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AGA AAT CAC TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTG 
+Ssc_squirrelM         AAG GTC TTC GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG CTT GGA ATG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAC TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAT AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT CAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGT GAT CCA CAA GGC ATT AGA GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+Cja_marmoset          AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGG TTT GGA CTG GAT GGC TAC AGG GGA ATC AGC CTA GCA AAC TGG ATG TGT TTG GCC AAA TGG GAG AGT GAT TAT AAC ACA CGT GCT ACA AAC TAC AAT CCT GGA GAC CAA AGC ACT GAT TAT GGG ATA TTT CAG ATC AAT AGC CAC TAT TGG TGT AAC AAT GGC AGA ACC CCA GGA GCA GTT AAT GCC TGT CAT ATA TCC TGC AAT GCT TTG CTG CAA GAT GAC ATC ACT GAA GCT GTG GCC TGT GCA AAG AGG GTT GTC CGC GAT CCA CAA GGC ATT AGG GCA TGG GTG GCA TGG AAA GCT CAT TGT CAA AAC AGA GAT GTC AGT CAG TAT GTT CAA GGT TGT GGA GTA 
+
+
+CODONML (in paml 3.13, August 2002)    lysozymeSmall.txt   Model: One dN/dS ratio dGamma (ncatG=11) 
+Codon frequencies: F3x4
+Warning: Gamma model for codons.  See documentation.Site-class models
+
+ns = 7  	ls = 130
+# site patterns = 81
+    2    1    1    1    2    7    2    3    3    1    2    2    2    1    1
+    5    1    4    3    2    1    1    3    1    5    4    5    1    1    1
+    1    2    1    3    2    1    1    1    1    1    1    1    2    2    1
+    1    1    1    1    1    2    2    1    1    1    1    1    1    3    1
+    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
+    1    1    1    1    1    1
+
+1       
+Hsa_Human             AAG GTC TTT GAA AGG TGT GAG TTG GCC AGA ACT CTG AAA AGA TTG GGA ATG GAT GGC TAC AGG ATC AGC CTA GCA AAC TGG ATG AGT GGT TAC ACA CGA GCT AAT GCT GAC AGA TAT GGG ATA TTT CAG ATC CGC TAC AAT GAT AAA ACC CCA GTT AAT CAT TTA TCC TGC AGT CAA GAT AAC GCT GAT GTA GCT GTC CGT ATT AGA GTG AGA AAT CGT AGA GTC CGT TAT GTT CAA GGT GTG 
+Hla_gibbon            ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ..T ... ... ... ... C.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .A. ... ... ... ..C ... ... ... ... ..C ... ... ... ... ... ... ... C.. ... ... A.. ... ... ..A 
+Cgu/Can_colobus       ... A.. ... ... ... ... ... ... ... ... ... ... ... .A. ... ... C.. ... ... ... .A. G.. ... ... ... ... ... G.. ... ... ..T ... GAC ... ... C.. ..T GA. ... ... ... ... ... ... ... ... ... A.. ... ... ... ... ... ... A.. ... ... .A. ... A.. ... ... ... ... ... ... A.. ... C.. ... .A. ..G .AC ... ... A.. ... ... G.. ... ..A 
+Pne_langur            ... A.. ... ... ... ... ... ... ... ... ... ... ... .A. ... ... C.. ... ... ... .A. G.. ... ... ... ... ... G.. ... ... ..T ... GA. ... ... C.. ... GA. ... ... ... ... ... ... ... ... ... A.. ... ... ... ... G.. ... A.. ... ... ... ... A.C ... ... ... ... ... ... A.. G.. C.. ... ... ... .AC .A. ... A.. ..C ... A.. ... ... 
+Mmu_rhesus            ... A.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... C.. ... ... ... ... ... ... ... ... ... ... G.. ... AA. ..T ... .A. ... ... C.. ... CA. ... ... ... ... ... ... .A. ... ... A.. ... ... ... ... ... ... A.. ... ... .A. ... ... ... ... ... ... A.. ... A.. ... ... ... ... ... .AC ... ... A.. ... ... ... ... ... 
+Ssc_squirrelM         ... ... ..C ... ... ... ... ... ... ... ... ... ... ..G C.T ... ... ... ... ... ... ... ... ... ... ... ... ... ... .AC ..T ... ..T ... ... C.. ... CA. ... ... ... ... ... ... .A. ..T ... A.. .G. ... ... ... ... ... A.. ... ... .A. ... ... G.. A.. C.A ..G ..C ... ... ... ... ... .A. GC. .A. ... ... A.. ... ... ... ... ..A 
+Cja_marmoset          ... ... ... ... ... ... ... ... ... ... ... ... ... ..G ..T ... C.. ... ... ... ... ... ... ... ... ... ... ... ... .A. ..T ... ..T ... ... C.. ... CA. ... ... ... ... ... ... .A. ..T ..C A.. .G. ... ... ... ... ... A.. ... ... .A. ... ... G.. A.. ..A ..G ..C ... ..C ... ..G ... .A. GC. .A. ... ... A.. ... ... ... ... ..A 
+
+Codon usage in sequences
+--------------------------------------------------------------------------------------------------------------
+Phe TTT  2  2  2  2  2  1 | Ser TCT  0  0  0  0  0  0 | Tyr TAT  2  3  3  2  3  4 | Cys TGT  7  7  7  7  7  7
+    TTC  0  0  0  0  0  1 |     TCC  1  1  1  1  1  1 |     TAC  4  3  3  4  3  2 |     TGC  1  1  1  1  1  1
+Leu TTA  1  1  0  0  0  0 |     TCA  0  0  0  0  0  0 | *** TAA  0  0  0  0  0  0 | *** TGA  0  0  0  0  0  0
+    TTG  4  4  4  4  4  3 |     TCG  0  0  0  0  0  0 |     TAG  0  0  0  0  0  0 | Trp TGG  5  5  5  5  5  5
+--------------------------------------------------------------------------------------------------------------
+Leu CTT  0  0  0  0  0  1 | Pro CCT  0  1  1  1  1  1 | His CAT  1  1  1  1  1  2 | Arg CGT  3  2  0  0  0  2
+    CTC  0  1  0  0  0  0 |     CCC  0  0  0  0  0  0 |     CAC  0  0  1  1  2  1 |     CGC  1  2  1  1  0  0
+    CTA  1  1  1  1  1  1 |     CCA  2  2  2  2  2  2 | Gln CAA  4  4  3  3  6  6 |     CGA  1  1  1  1  0  0
+    CTG  2  2  3  3  3  2 |     CCG  0  0  0  0  0  0 |     CAG  2  2  2  2  2  2 |     CGG  0  0  0  0  0  0
+--------------------------------------------------------------------------------------------------------------
+Ile ATT  1  2  1  0  1  1 | Thr ACT  2  2  2  2  3  3 | Asn AAT  5  6  7  5  8  6 | Ser AGT  2  1  3  4  3  2
+    ATC  3  3  3  3  4  3 |     ACC  1  1  1  1  1  1 |     AAC  5  5  5  6  5  4 |     AGC  3  3  3  3  3  3
+    ATA  1  1  2  2  2  2 |     ACA  2  2  2  2  2  2 | Lys AAA  3  3  5  6  3  3 | Arg AGA  6  6  2  2  5  4
+Met ATG  2  2  0  0  0  2 |     ACG  0  0  0  0  0  0 |     AAG  2  2  4  3  2  2 |     AGG  3  3  2  2  3  4
+--------------------------------------------------------------------------------------------------------------
+Val GTT  3  2  3  4  3  3 | Ala GCT  6  4  5  5  4  4 | Asp GAT  7  7  6  6  6  5 | Gly GGT  2  2  2  2  1  1
+    GTC  3  2  3  3  2  3 |     GCC  3  4  3  3  3  4 |     GAC  1  1  1  1  1  3 |     GGC  3  3  3  3  3  3
+    GTA  1  2  2  1  1  1 |     GCA  5  5  5  5  5  5 | Glu GAA  1  1  3  3  1  1 |     GGA  5  5  5  5  5  5
+    GTG  2  1  2  3  3  2 |     GCG  0  0  0  0  0  0 |     GAG  2  2  2  2  2  2 |     GGG  1  1  1  1  1  1
+--------------------------------------------------------------------------------------------------------------
+
+--------------------------------------------------
+Phe TTT  3 | Ser TCT  0 | Tyr TAT  4 | Cys TGT  7
+    TTC  0 |     TCC  1 |     TAC  2 |     TGC  1
+Leu TTA  0 |     TCA  0 | *** TAA  0 | *** TGA  0
+    TTG  3 |     TCG  0 |     TAG  0 | Trp TGG  5
+--------------------------------------------------
+Leu CTT  0 | Pro CCT  1 | His CAT  2 | Arg CGT  1
+    CTC  0 |     CCC  0 |     CAC  1 |     CGC  1
+    CTA  1 |     CCA  2 | Gln CAA  5 |     CGA  0
+    CTG  3 |     CCG  0 |     CAG  2 |     CGG  0
+--------------------------------------------------
+Ile ATT  1 | Thr ACT  3 | Asn AAT  5 | Ser AGT  2
+    ATC  3 |     ACC  1 |     AAC  5 |     AGC  3
+    ATA  2 |     ACA  2 | Lys AAA  3 | Arg AGA  3
+Met ATG  1 |     ACG  0 |     AAG  2 |     AGG  5
+--------------------------------------------------
+Val GTT  3 | Ala GCT  4 | Asp GAT  6 | Gly GGT  1
+    GTC  3 |     GCC  4 |     GAC  2 |     GGC  3
+    GTA  1 |     GCA  5 | Glu GAA  2 |     GGA  5
+    GTG  2 |     GCG  0 |     GAG  2 |     GGG  1
+--------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: Hsa_Human      
+position  1:    T:0.20769    C:0.13077    A:0.31538    G:0.34615
+position  2:    T:0.20000    C:0.16923    A:0.30000    G:0.33077
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#2: Hla_gibbon     
+position  1:    T:0.20769    C:0.14615    A:0.32308    G:0.32308
+position  2:    T:0.20000    C:0.16923    A:0.30769    G:0.32308
+position  3:    T:0.32308    C:0.23077    A:0.26154    G:0.18462
+
+#3: Cgu/Can_colobus
+position  1:    T:0.20000    C:0.12308    A:0.32308    G:0.35385
+position  2:    T:0.20000    C:0.16923    A:0.35385    G:0.27692
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#4: Pne_langur     
+position  1:    T:0.20000    C:0.12308    A:0.31538    G:0.36154
+position  2:    T:0.20000    C:0.16923    A:0.34615    G:0.28462
+position  3:    T:0.31538    C:0.23846    A:0.25385    G:0.19231
+
+#5: Mmu_rhesus     
+position  1:    T:0.20000    C:0.13846    A:0.34615    G:0.31538
+position  2:    T:0.20000    C:0.16923    A:0.34615    G:0.28462
+position  3:    T:0.33077    C:0.22308    A:0.25385    G:0.19231
+
+#6: Ssc_squirrelM  
+position  1:    T:0.19231    C:0.15385    A:0.32308    G:0.33077
+position  2:    T:0.20000    C:0.17692    A:0.33077    G:0.29231
+position  3:    T:0.33077    C:0.23077    A:0.24615    G:0.19231
+
+#7: Cja_marmoset   
+position  1:    T:0.20000    C:0.14615    A:0.31538    G:0.33846
+position  2:    T:0.20000    C:0.17692    A:0.33077    G:0.29231
+position  3:    T:0.33077    C:0.23077    A:0.23846    G:0.20000
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT      14 | Ser S TCT       0 | Tyr Y TAT      21 | Cys C TGT      49
+      TTC       1 |       TCC       7 |       TAC      21 |       TGC       7
+Leu L TTA       2 |       TCA       0 | *** * TAA       0 | *** * TGA       0
+      TTG      26 |       TCG       0 |       TAG       0 | Trp W TGG      35
+------------------------------------------------------------------------------
+Leu L CTT       1 | Pro P CCT       6 | His H CAT       9 | Arg R CGT       8
+      CTC       1 |       CCC       0 |       CAC       6 |       CGC       6
+      CTA       7 |       CCA      14 | Gln Q CAA      31 |       CGA       4
+      CTG      18 |       CCG       0 |       CAG      14 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT       7 | Thr T ACT      17 | Asn N AAT      42 | Ser S AGT      17
+      ATC      22 |       ACC       7 |       AAC      35 |       AGC      21
+      ATA      12 |       ACA      14 | Lys K AAA      26 | Arg R AGA      28
+Met M ATG       7 |       ACG       0 |       AAG      17 |       AGG      22
+------------------------------------------------------------------------------
+Val V GTT      21 | Ala A GCT      32 | Asp D GAT      43 | Gly G GGT      11
+      GTC      19 |       GCC      24 |       GAC      10 |       GGC      21
+      GTA       9 |       GCA      35 | Glu E GAA      12 |       GGA      35
+      GTG      15 |       GCG       0 |       GAG      14 |       GGG       7
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.20110    C:0.13736    A:0.32308    G:0.33846
+position  2:    T:0.20000    C:0.17143    A:0.33077    G:0.29780
+position  3:    T:0.32747    C:0.22857    A:0.25165    G:0.19231
+
+Codon frequencies under model, for use in evolver:
+  0.01378574  0.00962226  0.01059374  0.00809565
+  0.01181635  0.00824765  0.00908035  0.00693913
+  0.02279949  0.01591374  0.00000000  0.00000000
+  0.02052711  0.01432765  0.00000000  0.01205451
+  0.00941649  0.00657258  0.00723616  0.00552982
+  0.00807127  0.00563364  0.00620242  0.00473984
+  0.01557342  0.01087004  0.01196749  0.00914546
+  0.01402125  0.00978665  0.01077472  0.00823396
+  0.02214758  0.01545871  0.01701945  0.01300613
+  0.01898364  0.01325032  0.01458810  0.01114811
+  0.03662868  0.02556633  0.02814755  0.02151013
+  0.03297798  0.02301819  0.02534214  0.01936627
+  0.02320222  0.01619484  0.01782990  0.01362547
+  0.01988762  0.01388129  0.01528277  0.01167897
+  0.03837291  0.02678377  0.02948791  0.02253443
+  0.03454836  0.02411429  0.02654891  0.02028847
+
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+Hsa_Human           
+Hla_gibbon           0.2782 (0.0133 0.0478)
+Cgu/Can_colobus      1.1086 (0.0742 0.0670) 1.1055 (0.0742 0.0671)
+Pne_langur           1.1979 (0.0725 0.0605) 0.9234 (0.0797 0.0863) 0.5517 (0.0267 0.0484)
+Mmu_rhesus           1.8744 (0.0562 0.0300) 1.0215 (0.0561 0.0550) 1.2973 (0.0473 0.0364) 1.3970 (0.0508 0.0364)
+Ssc_squirrelM        0.4701 (0.0633 0.1346) 0.4688 (0.0633 0.1349) 0.5159 (0.0775 0.1502) 0.5833 (0.0959 0.1645) 0.4544 (0.0559 0.1230)
+Cja_marmoset         0.4725 (0.0634 0.1341) 0.5925 (0.0633 0.1069) 0.4702 (0.0704 0.1496) 0.5411 (0.0886 0.1638) 0.3995 (0.0490 0.1225) 0.1595 (0.0099 0.0619)
+
+
+Model 0: one-ratio
+
+TREE #  1:  ((1, 2), ((3, 4), 5), (6, 7));   MP score: 65
+check convergence..
+lnL(ntime: 11  np: 14):   -906.017440     +0.000000
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+  0.06798  0.02556  0.03889  0.04345  0.07637  0.04379  0.05254  0.02168  0.12266  0.04080  0.02392  4.54006  0.80663  0.50000
+SEs for parameters:
+  0.02557  0.01516  0.01844  0.02016  0.02668  0.01990  0.02155  0.01551  0.03392  0.01916  0.01506  0.23600  0.23600 -1.00000
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.55765
+
+((1: 0.02556, 2: 0.03889): 0.06798, ((3: 0.04379, 4: 0.05254): 0.07637, 5: 0.02168): 0.04345, (6: 0.04080, 7: 0.02392): 0.12266);
+
+((Hsa_Human: 0.02556, Hla_gibbon: 0.03889): 0.06798, ((Cgu/Can_colobus: 0.04379, Pne_langur: 0.05254): 0.07637, Mmu_rhesus: 0.02168): 0.04345, (Ssc_squirrelM: 0.04080, Cja_marmoset: 0.02392): 0.12266);
+
+Detailed output identifying parameters
+kappa (ts/tv) =  4.54006
+
+alpha (gamma) =  0.50000
+r ( 1):  1.00000
+f:       1.00000
+
+dN & dS for each branch
+
+ branch           t        S        N    dN/dS       dN       dS   S*dS   N*dN
+
+   8..9       0.068    107.8    282.2   0.8066   0.0213   0.0263    2.8    6.0
+   9..1       0.026    107.8    282.2   0.8066   0.0080   0.0099    1.1    2.3
+   9..2       0.039    107.8    282.2   0.8066   0.0122   0.0151    1.6    3.4
+   8..10      0.043    107.8    282.2   0.8066   0.0136   0.0168    1.8    3.8
+  10..11      0.076    107.8    282.2   0.8066   0.0239   0.0296    3.2    6.7
+  11..3       0.044    107.8    282.2   0.8066   0.0137   0.0170    1.8    3.9
+  11..4       0.053    107.8    282.2   0.8066   0.0164   0.0204    2.2    4.6
+  10..5       0.022    107.8    282.2   0.8066   0.0068   0.0084    0.9    1.9
+   8..12      0.123    107.8    282.2   0.8066   0.0383   0.0475    5.1   10.8
+  12..6       0.041    107.8    282.2   0.8066   0.0128   0.0158    1.7    3.6
+  12..7       0.024    107.8    282.2   0.8066   0.0075   0.0093    1.0    2.1
+
+
+Time used: 00:02:05
+
+
+Model 1: neutral (2 categories)
+
+TREE #  1:  ((1, 2), ((3, 4), 5), (6, 7));   MP score: 65
+lnL(ntime: 11  np: 14):   -902.503869     +0.000000
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+  0.06961  0.02556  0.03938  0.04422  0.07778  0.04414  0.05228  0.02134  0.12574  0.04157  0.02367  4.29790  0.41271  0.50000
+SEs for parameters:
+  0.02665  0.01541  0.01888  0.02101  0.02769  0.02034  0.02188  0.01585  0.03565  0.01971  0.01530 -1.00000 -1.00000 -1.00000
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.56528
+
+((1: 0.02556, 2: 0.03938): 0.06961, ((3: 0.04414, 4: 0.05228): 0.07778, 5: 0.02134): 0.04422, (6: 0.04157, 7: 0.02367): 0.12574);
+
+((Hsa_Human: 0.02556, Hla_gibbon: 0.03938): 0.06961, ((Cgu/Can_colobus: 0.04414, Pne_langur: 0.05228): 0.07778, Mmu_rhesus: 0.02134): 0.04422, (Ssc_squirrelM: 0.04157, Cja_marmoset: 0.02367): 0.12574);
+
+Detailed output identifying parameters
+kappa (ts/tv) =  4.29790
+
+
+dN/dS for site classes (K=2)
+p:   0.41271  0.58729
+w:   0.00000  1.00000
+
+alpha (gamma) =  0.41271
+r ( 2):  0.00000  1.00000
+f:       0.41271  0.58729
+
+dN & dS for each branch
+
+ branch           t        S        N    dN/dS       dN       dS   S*dS   N*dN
+
+   8..9       0.070    107.1    282.9   0.5873   0.0195   0.0331    3.5    5.5
+   9..1       0.026    107.1    282.9   0.5873   0.0071   0.0122    1.3    2.0
+   9..2       0.039    107.1    282.9   0.5873   0.0110   0.0187    2.0    3.1
+   8..10      0.044    107.1    282.9   0.5873   0.0124   0.0210    2.3    3.5
+  10..11      0.078    107.1    282.9   0.5873   0.0217   0.0370    4.0    6.1
+  11..3       0.044    107.1    282.9   0.5873   0.0123   0.0210    2.2    3.5
+  11..4       0.052    107.1    282.9   0.5873   0.0146   0.0249    2.7    4.1
+  10..5       0.021    107.1    282.9   0.5873   0.0060   0.0102    1.1    1.7
+   8..12      0.126    107.1    282.9   0.5873   0.0351   0.0598    6.4    9.9
+  12..6       0.042    107.1    282.9   0.5873   0.0116   0.0198    2.1    3.3
+  12..7       0.024    107.1    282.9   0.5873   0.0066   0.0113    1.2    1.9
+
+
+Time used: 00:05:26
+
+
+Model 2: selection (3 categories)
+
+TREE #  1:  ((1, 2), ((3, 4), 5), (6, 7));   MP score: 65
+check convergence..
+lnL(ntime: 11  np: 16):   -900.076500     +0.000000
+   8..9     9..1     9..2     8..10   10..11   11..3    11..4    10..5     8..12   12..6    12..7  
+  0.07903  0.02598  0.04137  0.04535  0.08550  0.04486  0.05417  0.01935  0.13948  0.04319  0.02492  5.12250  0.38160  0.54916  6.17292  0.41271
+SEs for parameters:
+  0.03226  0.01618  0.02021  0.02390  0.03229  0.02158  0.02363  0.01746  0.04358  0.02088  0.01602  1.50414  0.14016  0.16630  4.30113 -1.00000
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.60321
+
+((1: 0.02598, 2: 0.04137): 0.07903, ((3: 0.04486, 4: 0.05417): 0.08550, 5: 0.01935): 0.04535, (6: 0.04319, 7: 0.02492): 0.13948);
+
+((Hsa_Human: 0.02598, Hla_gibbon: 0.04137): 0.07903, ((Cgu/Can_colobus: 0.04486, Pne_langur: 0.05417): 0.08550, Mmu_rhesus: 0.01935): 0.04535, (Ssc_squirrelM: 0.04319, Cja_marmoset: 0.02492): 0.13948);
+
+Detailed output identifying parameters
+kappa (ts/tv) =  5.12250
+
+
+dN/dS for site classes (K=3)
+p:   0.38160  0.54916  0.06924
+w:   0.00000  1.00000  6.17292
+
+alpha (gamma) =  0.38160
+r ( 3):  0.00000  1.00000  6.17292
+f:       0.38160  0.54916  0.06924
+
+dN & dS for each branch
+
+ branch           t        S        N    dN/dS       dN       dS   S*dS   N*dN
+
+   8..9       0.079    109.3    280.7   0.9766   0.0262   0.0268    2.9    7.3
+   9..1       0.026    109.3    280.7   0.9766   0.0086   0.0088    1.0    2.4
+   9..2       0.041    109.3    280.7   0.9766   0.0137   0.0140    1.5    3.8
+   8..10      0.045    109.3    280.7   0.9766   0.0150   0.0154    1.7    4.2
+  10..11      0.086    109.3    280.7   0.9766   0.0283   0.0290    3.2    7.9
+  11..3       0.045    109.3    280.7   0.9766   0.0149   0.0152    1.7    4.2
+  11..4       0.054    109.3    280.7   0.9766   0.0179   0.0184    2.0    5.0
+  10..5       0.019    109.3    280.7   0.9766   0.0064   0.0066    0.7    1.8
+   8..12      0.139    109.3    280.7   0.9766   0.0462   0.0473    5.2   13.0
+  12..6       0.043    109.3    280.7   0.9766   0.0143   0.0146    1.6    4.0
+  12..7       0.025    109.3    280.7   0.9766   0.0083   0.0085    0.9    2.3
+
+
+
+Positively selected sites Prob(w>1):
+
+    15 L 0.6588
+    17 M 0.6601
+    37 G 0.7155
+    41 R 0.9282
+    50 R 0.8292
+    62 R 0.5229
+   114 N 0.6491
+   126 Q 0.5226
+
+
+Time used: 00:13:43

Added: trunk/packages/bioperl/branches/upstream/current/t/data/compLD_missingtest.prettybase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/compLD_missingtest.prettybase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/compLD_missingtest.prettybase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,46 @@
+ProcR2973EA	01	C	T
+ProcR2973EA	02	N	N
+ProcR2973EA	03	C	C
+ProcR2973EA	04	C	T
+ProcR2973EA	05	C	C
+ProcR2973EA	06	C	T
+ProcR2973EA	07	C	C
+ProcR2973EA	08	C	C
+ProcR2973EA	09	C	T
+ProcR2973EA	10	N	N
+ProcR2973EA	11	C	C
+ProcR2973EA	12	C	C
+ProcR2973EA	13	C	T
+ProcR2973EA	14	T	T
+ProcR2973EA	15	T	T
+ProcR2973EA	16	C	T
+ProcR2973EA	17	C	T
+ProcR2973EA	18	C	T
+ProcR2973EA	19	C	T
+ProcR2973EA	20	C	C
+ProcR2973EA	21	T	T
+ProcR2973EA	22	C	C
+ProcR2973EA	23	C	C
+ProC9198EA	01	A	A
+ProC9198EA	02	A	A
+ProC9198EA	03	A	A
+ProC9198EA	04	A	A
+ProC9198EA	05	A	A
+ProC9198EA	06	A	A
+ProC9198EA	07	A	A
+ProC9198EA	08	A	A
+ProC9198EA	09	A	A
+ProC9198EA	10	A	A
+ProC9198EA	11	A	C
+ProC9198EA	12	A	A
+ProC9198EA	13	A	A
+ProC9198EA	14	A	A
+ProC9198EA	15	A	A
+ProC9198EA	16	A	A
+ProC9198EA	17	A	A
+ProC9198EA	18	A	A
+ProC9198EA	19	A	A
+ProC9198EA	20	N	N
+ProC9198EA	21	A	A
+ProC9198EA	22	A	C
+ProC9198EA	23	A	A

Added: trunk/packages/bioperl/branches/upstream/current/t/data/compLD_test.prettybase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/compLD_test.prettybase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/compLD_test.prettybase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,30 @@
+01	i1	A	A
+01	i2	A	A
+01	i3	A	A
+01	i4	G	G
+01	i5	G	G
+01	i6	G	G
+01	i7	A	G
+01	i8	A	G
+01	i9	A	G
+01	i10	A	G
+02	i1	C	C
+02	i2	C	C
+02	i3	C	C
+02	i4	T	T
+02	i5	T	T
+02	i6	T	T
+02	i7	C	T
+02	i8	C	T
+02	i9	C	T
+02	i10	C	T
+03	i1	A	G
+03	i2	G	G
+03	i3	A	A
+03	i4	A	G
+03	i5	G	G
+03	i6	A	A
+03	i7	A	G
+03	i8	G	G
+03	i9	A	A
+03	i10	A	A
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/compLD_test.prettybase
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,39 @@
+!autogenerated-by:     DAG-Edit version 1.311
+!saved-by:             hjd
+!date:                 Thu Sep 12 11:05:43 EDT 2002
+!version: $Revision: 1.1 $
+!note:     file automatically generated by GO-Editor
+$Gene_Ontology ; GO:0003673
+ <cellular_component ; GO:0005575
+  %cell ; GO:0005623
+   %ascus ; GO:0005627
+    <ascus lipid droplet ; GO:0005633 % lipid particle ; GO:0005811
+    <prospore membrane ; GO:0005628 % membrane ; GO:0016020
+    <spore wall (sensu Fungi) ; GO:0005619 % cell wall (sensu Fungi) ; GO:0009277
+     <chitosan layer of spore wall ; GO:0005631
+     <dityrosine layer of spore wall ; GO:0005630
+     <inner layer of spore wall ; GO:0005632
+   <axon ; GO:0030424
+   <bud ; GO:0005933
+    <bud neck ; GO:0005935 % site of polarized growth (sensu Saccharomyces) ; GO:0000134
+     <contractile ring (sensu Saccharomyces) ; GO:0000142 ; synonym:cytokinetic ring (sensu Saccharomyces) ; synonym:neck ring % contractile ring (sensu Fungi) ; GO:0030480
+      <septin ring (sensu Saccharomyces) ; GO:0000144 % septin ring (sensu Fungi) ; GO:0030481
+     <polarisome ; GO:0000133 < bud tip ; GO:0005934 < cell cortex ; GO:0005938 < shmoo tip ; GO:0005937
+    <bud tip ; GO:0005934 % site of polarized growth (sensu Saccharomyces) ; GO:0000134
+     <polarisome ; GO:0000133 < bud neck ; GO:0005935 < cell cortex ; GO:0005938 < shmoo tip ; GO:0005937
+   <cell fraction ; GO:0000267
+    %insoluble fraction ; GO:0005626
+    %membrane fraction ; GO:0005624 ; synonym:murein sacculus ; synonym:peptidoglycan
+     %integral membrane protein of membrane fraction ; GO:0000299
+     %microsome ; GO:0005792
+      %rough microsome ; GO:0019718
+      %smooth microsome ; GO:0019719
+     %peripheral membrane protein of membrane fraction ; GO:0000300
+     %synaptosome ; GO:0019717
+    %soluble fraction ; GO:0005625
+     <polarisomeX ; GO:0000666 < bud neck ; GO:0005935  % lipid particle ; GO:0005811 < cell cortex ; GO:0005938 % septin ring (sensu Fungi) ; GO:0030481 < shmoo tip ; GO:0005937
+     <polarisomeY ; GO:0000667 % bud neck ; GO:0005935  < lipid particle ; GO:0005811 % cell cortex ; GO:0005938 < septin ring (sensu Fungi) ; GO:0030481 % shmoo tip ; GO:0005937
+ %dibenzothiophene metabolism ; GO:0018895
+  %dibenzothiophene catabolism ; GO:0018896 ; UM-BBD_pathwayID:dbt2 % xenobiotic catabolism ; GO:0042178
+  %dibenzothiophene desulfurization ; GO:0018897 ; MetaCyc:PWY-681 ; UM-BBD_pathwayID:dbt; synonym:murein sacculus ; synonym:peptidoglycan
+  %thromboxane-A synthase ; GO:0004796, GO:0008400 ; EC:5.3.99.5 ; synonym:cytochrome P450 CYP5 % cytochrome P450 ; GO:0015034

Added: trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/component.ontology.test2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+!autogenerated-by:     DAG-Edit version 1.311
+!saved-by:             hjd
+!date:                 Thu Sep 12 11:05:43 EDT 2002
+!version: $Revision: 1.1 $
+!note:     file automatically generated by GO-Editor
+$Gene_Ontology ; GO:0003673
+ <cellular_component ; GO:0005575
+ %dibenzothiophene metabolism ; GO:0018895
+  %dibenzothiophene metabolism ; GO:0018896
+  %dibenzothiophene metabolism ; GO:0018897
+  %dibenzothiophene metabolism ; GO:0018898

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.contigs
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.contigs	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.contigs	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+>Contig1
+atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAg
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.contigs
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+>ML4922R CHROMAT_FILE: ML4922R PHD_FILE: ML4922R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+ccttggtacgcccgcaggtaccggtccggaattcccgggtcgacccacgc
+gtccgcggacgcgtggggtgaccccggaagattcgagtgcttcgtctggt
+aatcaaaatcaggtttctccggttggatctcaaactcaaggttatgccgt
+ggcacaaatgtcgcctgctgtttcatctgtcagtgatgctccctcattag
+acacacagagacaacgagatattcagtccaggctagctaacagtggttca
+gatgattctgatgacgaagatcctgatggagagactgtaacaacagtgaa
+tgatgatcctaccgatgttaagcgtgctaggaggatgctttcaaaccgag
+aatccgctaggcgctctagaagaagaaagcaagagcaaatgagtgaattt
+gattcacaggttggccaattacgtggtgagcattcaactatgcttaagcg
+tcttagtgaggtcactcagaagtttagcaccgctgttgtcgataacagaa
+tcttaaaagctgatatcgaaacattgagaacaaaggtgaagatggcagag
+gacactgtgaaaagagtgacagggatgaaccctaggcttttggcaaaacc
+atacagcataccattcgataggacactgatgggttcttcacagccaaact
+tgaatcaaaccgacatgattccaaatcaaatatcagaggacaacagtttt
+gcaagcaactcggttgttactttggaatccggttgcaatgcttttgaagc
+can
+>ML4924F CHROMAT_FILE: ML4924F PHD_FILE: ML4924F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+ttgttcttttctcgagggtacttgaggattctaccaccacaacaacaaag
+ccttatcatccagaaccgcaaattataaaaaagagccaacaccacaaaac
+aaaatgaaaagaaacaaactcttcaaccagtgatgttctgagacttgttg
+aggataacaccttcgtacttaacttggaaccacttcatggcatcatcctt
+ggtaactctatgctgaatcccaacgcgagtcttgcacctacggcgacgtg
+caacacggtatcccgggcgttcaagaacaacatagaagtccattccatag
+atacccgtagaaggatcgtacttgattccaagatcaatgtgctcctggat
+accgaatccaaaacagccagtgtcactgaagttcctcctcaacagctcgt
+actccttgactttcaagccactctcaacaagctgcattgccttgcctcct
+ctcacggtgacgtagcacgcaatcttttcattacgtctaataccgaaaga
+cctcacagtgcacctggccttggacaacacaggtgtttgaccactgagct
+gttacaaaaccttggaggcacgggtgagacgatcaccactctcaccaacg
+gagatgtcgagaacgagcttctgaaccttgatattcctcattgggctcca
+cagctctttctctgacgccattctgaggatctagggatgcacacccacaa
+ggacatgaccaaaatcgacatctgggcccactcg
+>ML4924R CHROMAT_FILE: ML4924R PHD_FILE: ML4924R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+actttggtcgcctgcaggtaccggtccgngattcccgggtcgacccacgc
+gtccgctttgctcatctccctctgactttgcatcccttaatcctcagaat
+ggcgtcagagaagaagctctcgaacccaatgagggatatcaaggttcaga
+agctcgttctcaacatctccgttggtgagagtggtgatcgtctcacccgt
+gcctccaaggttttggaacagctcagtggtcaaacacctgtcttctccaa
+ggcgaggtacactgtgaggtctttcggtattagacgtaatgaaaagattg
+cgtgctacgtcaccgtgagaggagacaaggcaatgcagcttcttgagagt
+ggcttgaaagtcaaggagtacgagctgttgaggaggaacttcagtgacac
+tggctgttttggattcggtatccaggagcacattgatcttggaatcaagt
+acgatccttctacgggtatctatggaatggacttctatgttgttcttgaa
+cgcccgggataccgtgttgcacgtcgccgtaggtgcaagactcgcgttgg
+gattcagcatagagttaccaaggatgatgccatgaagtggttccaagtta
+agtacgaaggtgttatcctcaacaagtctcagaacatcactggttgaaga
+gtttgtttcttttcattttgttttgtggtgttggctcttttttataattt
+gcggttctggatgataaggctttgttgttgtggtggtagaatcctcaagt
+accctcgagatat
+>ML4947F CHROMAT_FILE: ML4947F PHD_FILE: ML4947F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+ctgtgggacccactgatattaccgaacttttgaaccaaacagttatgtgt
+tggcgccaaaaataacgttaaagaaaccatcacaagagccttatgacaaa
+gagggagagatagacataacttaagacattacacaatttggtcaaacgat
+aaaccaaatccaacagacatttaaaccaaatccaacgccaacaaaacatt
+catatataagatatacccaaaccgtatcaaacgcagacgccgccctagta
+actgtatcctttcacaaacaagctctcggaggcagcagagtcaccagaag
+ccccagcaatgtatttcccaagtgtagcgtccgagttagccttgcacctg
+accaagaacgtggcctgagctttggctacgttctctttcttacctcccca
+agccttgagtgtgctttgctggagggctcggccaaaggagaaagtaagcg
+tccatggcttcaacagatcgagcttgttcattgcgttcagatttagcgtt
+gcttcctcttcgctctgtcctcctgagaggaacacgattcctggaaccgc
+tggtgggaccgtgcggcggagagcagtcactgtgtattccgcaatgactt
+ccggtgcaaccttggcgctgtctgagcctggagtgaccatgttaggttta
+agcagagtgccttcgaggaggacgtggtggtcgttcaaggccttgtacac
+ggcagaaagaaccgtctcagtcacagccgcacatttctt


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.log
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.log	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.log	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+No. words: 5978; after pruning: 5864


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.log
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+>ML4922R  CHROMAT_FILE: ML4922R PHD_FILE: ML4922R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+CCTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCGGACGCGTGGGGTGACCCCGGAAGATTCGAGTGCTTCGTCTGGT
+AATCAAAATCAGGTTTCTCCGGTTGGATCTCAAACTCAAGGTTATGCCGT
+GGCACAAATGTCGCCTGCTGTTTCATCTGTCAGTGATGCTCCCTCATTAG
+ACACACAGAGACAACGAGATATTCAGTCCAGGCTAGCTAACAGTGGTTCA
+GATGATTCTGATGACGAAGATCCTGATGGAGAGACTGTAACAACAGTGAA
+TGATGATCCTACCGATGTTAAGCGTGCTAGGAGGATGCTTTCAAACCGAG
+AATCCGCTAGGCGCTCTAGAAGAAGAAAGCAAGAGCAAATGAGTGAATTT
+GATTCACAGGTTGGCCAATTACGTGGTGAGCATTCAACTATGCTTAAGCG
+TCTTAGTGAGGTCACTCAGAAGTTTAGCACCGCTGTTGTCGATAACAGAA
+TCTTAAAAGCTGATATCGAAACATTGAGAACAAAGGTGAAGATGGCAGAG
+GACACTGTGAAAAGAGTGACAGGGATGAACCCTAGGCTTTTGGCAAAACC
+ATACAGCATACCATTCGATAGGACACTGATGGGTTCTTCACAGCCAAACT
+TGAATCAAACCGACATGATTCCAAATCAAATATCAGAGGACAACAGTTTT
+GCAAGCAACTCGGTTGTTACTTTGGAATCCGGTTGCAATGCTTTTGAAGC
+CAN
+>ML4924F  CHROMAT_FILE: ML4924F PHD_FILE: ML4924F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+TTGTTCTTTTCTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAG
+CCTTATCATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAAC
+AAAATGAAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTG
+AGGATAACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTT
+GGTAACTCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTG
+CAACACGGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAG
+ATACCCGTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGAT
+ACCGAATCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGT
+ACTCCTTGACTTTCAAGCCACTCTCAACAAGCTGCATTGCCTTGCCTCCT
+CTCACGGTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGA
+CCTCACAGTGCACCTGGCCTTGGACAACACAGGTGTTTGACCACTGAGCT
+GTTACAAAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACG
+GAGATGTCGAGAACGAGCTTCTGAACCTTGATATTCCTCATTGGGCTCCA
+CAGCTCTTTCTCTGACGCCATTCTGAGGATCTAGGGATGCACACCCACAA
+GGACATGACCAAAATCGACATCTGGGCCCACTCG
+>ML4924R  CHROMAT_FILE: ML4924R PHD_FILE: ML4924R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+ACTTTGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCTTTGCTCATCTCCCTCTGACTTTGCATCCCTTAATCCTCAGAAT
+GGCGTCAGAGAAGAAGCTCTCGAACCCAATGAGGGATATCAAGGTTCAGA
+AGCTCGTTCTCAACATCTCCGTTGGTGAGAGTGGTGATCGTCTCACCCGT
+GCCTCCAAGGTTTTGGAACAGCTCAGTGGTCAAACACCTGTCTTCTCCAA
+GGCGAGGTACACTGTGAGGTCTTTCGGTATTAGACGTAATGAAAAGATTG
+CGTGCTACGTCACCGTGAGAGGAGACAAGGCAATGCAGCTTCTTGAGAGT
+GGCTTGAAAGTCAAGGAGTACGAGCTGTTGAGGAGGAACTTCAGTGACAC
+TGGCTGTTTTGGATTCGGTATCCAGGAGCACATTGATCTTGGAATCAAGT
+ACGATCCTTCTACGGGTATCTATGGAATGGACTTCTATGTTGTTCTTGAA
+CGCCCGGGATACCGTGTTGCACGTCGCCGTAGGTGCAAGACTCGCGTTGG
+GATTCAGCATAGAGTTACCAAGGATGATGCCATGAAGTGGTTCCAAGTTA
+AGTACGAAGGTGTTATCCTCAACAAGTCTCAGAACATCACTGGTTGAAGA
+GTTTGTTTCTTTTCATTTTGTTTTGTGGTGTTGGCTCTTTTTTATAATTT
+GCGGTTCTGGATGATAAGGCTTTGTTGTTGTGGTGGTAGAATCCTCAAGT
+ACCCTCGAGATAT
+>ML4947F  CHROMAT_FILE: ML4947F PHD_FILE: ML4947F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+CTGTGGGACCCACTGATATTACCGAACTTTTGAACCAAACAGTTATGTGT
+TGGCGCCAAAAATAACGTTAAAGAAACCATCACAAGAGCCTTATGACAAA
+GAGGGAGAGATAGACATAACTTAAGACATTACACAATTTGGTCAAACGAT
+AAACCAAATCCAACAGACATTTAAACCAAATCCAACGCCAACAAAACATT
+CATATATAAGATATACCCAAACCGTATCAAACGCAGACGCCGCCCTAGTA
+ACTGTATCCTTTCACAAACAAGCTCTCGGAGGCAGCAGAGTCACCAGAAG
+CCCCAGCAATGTATTTCCCAAGTGTAGCGTCCGAGTTAGCCTTGCACCTG
+ACCAAGAACGTGGCCTGAGCTTTGGCTACGTTCTCTTTCTTACCTCCCCA
+AGCCTTGAGTGTGCTTTGCTGGAGGGCTCGGCCAAAGGAGAAAGTAAGCG
+TCCATGGCTTCAACAGATCGAGCTTGTTCATTGCGTTCAGATTTAGCGTT
+GCTTCCTCTTCGCTCTGTCCTCCTGAGAGGAACACGATTCCTGGAACCGC
+TGGTGGGACCGTGCGGCGGAGAGCAGTCACTGTGTATTCCGCAATGACTT
+CCGGTGCAACCTTGGCGCTGTCTGAGCCTGGAGTGACCATGTTAGGTTTA
+AGCAGAGTGCCTTCGAGGAGGACGTGGTGGTCGTTCAAGGCCTTGTACAC
+GGCAGAAAGAACCGTCTCAGTCACAGCCGCACATTTCTT


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,138 @@
+AS 1 2
+
+CO Contig1 708 2 51 U
+atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAg
+
+BQ
+ 0 0 0 12 17 32 26 25 25 41 49 50 47 47 51 50 56 55 59 50 50 53 55 55 53 59 57 51 54 61 56 54 53 60 48 48 57 58 58 64 60 62 58 64 57 53 53 59 64 67
+ 67 75 72 65 55 47 47 39 44 41 48 54 55 61 52 47 53 53 53 60 67 67 74 76 76 69 65 62 58 54 51 56 60 60 73 73 66 61 61 59 52 56 56 60 64 64 59 57 59 60
+ 60 59 71 82 74 69 61 64 61 83 68 72 71 73 75 71 77 66 62 62 60 64 61 63 63 61 75 75 64 64 60 64 64 75 75 72 80 83 61 64 64 72 64 64 61 56 57 60 62 62
+ 62 62 60 52 50 56 57 57 61 61 64 59 59 64 60 62 58 54 50 51 58 63 67 69 75 75 78 86 86 84 79 71 71 72 69 69 67 90 90 90 90 79 85 88 69 71 71 67 67 67
+ 78 75 75 80 84 80 80 82 86 86 90 90 90 90 90 76 75 67 67 67 70 69 78 75 81 75 73 73 69 68 60 66 70 73 70 70 70 70 69 74 75 72 69 73 75 72 72 88 89 75
+ 75 75 75 77 88 90 90 90 81 81 78 78 75 75 80 83 86 82 84 83 86 86 90 85 73 56 56 55 59 54 62 62 72 74 77 90 90 90 90 90 85 90 75 75 78 85 84 88 80 80
+ 72 76 72 74 74 90 90 88 75 75 75 72 72 72 77 77 77 80 77 77 90 90 90 90 90 90 90 90 90 90 90 90 88 86 86 86 90 86 77 83 77 77 77 83 83 90 90 90 90 90
+ 90 90 90 90 90 90 87 80 78 74 78 74 81 73 80 78 86 86 86 86 88 80 88 87 87 87 87 90 90 90 90 90 90 90 90 87 87 80 71 70 67 63 63 65 72 80 90 90 90 90
+ 86 86 86 88 86 90 89 76 76 78 78 77 77 76 89 89 71 71 56 51 51 43 43 43 43 58 58 80 80 80 80 89 89 71 71 58 38 38 38 43 43 60 60 60 60 60 58 58 58 75
+ 79 79 78 88 83 81 81 71 71 69 75 75 78 78 78 85 88 85 85 90 85 85 85 90 90 71 67 60 58 58 58 68 70 78 80 89 90 85 85 85 80 77 78 77 74 74 72 72 70 58
+ 58 58 43 43 51 51 51 43 43 43 43 43 58 66 66 66 51 51 45 45 51 51 51 51 60 60 60 69 69 69 66 66 72 76 80 85 81 81 60 60 60 60 58 58 51 51 51 51 56 56
+ 71 71 71 71 71 71 61 58 58 58 58 58 60 61 58 58 58 59 62 66 63 67 64 64 64 64 67 67 76 74 76 72 72 58 58 58 58 58 60 73 80 80 74 64 61 54 45 45 45 51
+ 51 51 51 51 51 45 40 39 39 49 49 49 54 52 64 65 67 71 72 72 73 73 43 36 39 39 39 39 39 51 51 51 51 51 51 51 51 51 51 51 51 51 40 39 39 39 39 35 35 40
+ 51 51 51 51 40 40 40 35 35 45 43 43 52 50 50 49 52 49 51 53 46 46 46 46 46 46 46 51 42 56 40 40 40 40 39 35 35 35 35 35 35 51 51 56 51 51 46 40 40 40
+ 40 40 40 40 39 34 34 0
+
+AF ML4924F U -5
+AF ML4924R C 1
+BS 1 9 ML4924R
+BS 10 11 ML4924F
+BS 12 16 ML4924R
+BS 17 51 ML4924F
+BS 52 54 ML4924R
+BS 55 63 ML4924F
+BS 64 64 ML4924R
+BS 65 109 ML4924F
+BS 110 110 ML4924R
+BS 111 111 ML4924F
+BS 112 113 ML4924R
+BS 114 114 ML4924F
+BS 115 117 ML4924R
+BS 118 121 ML4924F
+BS 122 128 ML4924R
+BS 129 133 ML4924F
+BS 134 138 ML4924R
+BS 139 141 ML4924F
+BS 142 144 ML4924R
+BS 145 160 ML4924F
+BS 161 161 ML4924R
+BS 162 174 ML4924F
+BS 175 180 ML4924R
+BS 181 187 ML4924F
+BS 188 188 ML4924R
+BS 189 200 ML4924F
+BS 201 201 ML4924R
+BS 202 203 ML4924F
+BS 204 221 ML4924R
+BS 222 222 ML4924F
+BS 223 223 ML4924R
+BS 224 240 ML4924F
+BS 241 244 ML4924R
+BS 245 245 ML4924F
+BS 246 258 ML4924R
+BS 259 262 ML4924F
+BS 263 291 ML4924R
+BS 292 292 ML4924F
+BS 293 331 ML4924R
+BS 332 332 ML4924F
+BS 333 372 ML4924R
+BS 373 377 ML4924F
+BS 378 453 ML4924R
+BS 454 454 ML4924F
+BS 455 595 ML4924R
+BS 596 596 ML4924F
+BS 597 609 ML4924R
+BS 610 624 ML4924F
+BS 625 659 ML4924R
+BS 660 670 ML4924F
+BS 671 708 ML4924R
+
+RD ML4924F 734 0 0
+ttgttcttttctcgAGGGTACTTGAGGATTCTACCACCACAACAACAAAG
+CCTTATCATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAAC
+AAAATGAAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTG
+AGGATAACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTT
+GGTAACTCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTG
+CAACACGGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAG
+ATACCCGTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGAT
+ACCGAATCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGT
+ACTCCTTGACTTTCAAGCCACTCTCAACaaGCTGCATTGCCTTgccTCCT
+CTCACGGTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGA
+CCTCACAGtgcaCctggcCTTGgacaacacAGGTGTTTGACCACTGAGCT
+gttacaAAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACG
+GAGATgtcgaGAACGAGCTTCTGAACCTTGATattcctCATtgggctcca
+cagctctttcTCTGACGCCATTCTGAGGatctagggatgcacacccacaa
+ggacatgaccaaaatcgacatctgggcccactcg
+
+QA 12 601 10 713
+DS CHROMAT_FILE: ML4924F PHD_FILE: ML4924F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+
+RD ML4924R 763 0 0
+atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAgxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxaccaaagt
+
+QA 10 707 1 708
+DS CHROMAT_FILE: ML4924R PHD_FILE: ML4924R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+
+
+WA{
+phrap_params phrap 010621:164002
+/usr/local/genome/bin/phrap test_project.fasta.screen -new_ace -view 
+phrap version 0.990329
+}
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+AS 1 2
+
+CO Contig1 708 2 51 U
+atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAg
+
+BQ
+ 0 0 0 12 17 32 26 25 25 41 49 50 47 47 51 50 56 55 59 50 50 53 55 55 53 59 57 51 54 61 56 54 53 60 48 48 57 58 58 64 60 62 58 64 57 53 53 59 64 67
+ 67 75 72 65 55 47 47 39 44 41 48 54 55 61 52 47 53 53 53 60 67 67 74 76 76 69 65 62 58 54 51 56 60 60 73 73 66 61 61 59 52 56 56 60 64 64 59 57 59 60
+ 60 59 71 82 74 69 61 64 61 83 68 72 71 73 75 71 77 66 62 62 60 64 61 63 63 61 75 75 64 64 60 64 64 75 75 72 80 83 61 64 64 72 64 64 61 56 57 60 62 62
+ 62 62 60 52 50 56 57 57 61 61 64 59 59 64 60 62 58 54 50 51 58 63 67 69 75 75 78 86 86 84 79 71 71 72 69 69 67 90 90 90 90 79 85 88 69 71 71 67 67 67
+ 78 75 75 80 84 80 80 82 86 86 90 90 90 90 90 76 75 67 67 67 70 69 78 75 81 75 73 73 69 68 60 66 70 73 70 70 70 70 69 74 75 72 69 73 75 72 72 88 89 75
+ 75 75 75 77 88 90 90 90 81 81 78 78 75 75 80 83 86 82 84 83 86 86 90 85 73 56 56 55 59 54 62 62 72 74 77 90 90 90 90 90 85 90 75 75 78 85 84 88 80 80
+ 72 76 72 74 74 90 90 88 75 75 75 72 72 72 77 77 77 80 77 77 90 90 90 90 90 90 90 90 90 90 90 90 88 86 86 86 90 86 77 83 77 77 77 83 83 90 90 90 90 90
+ 90 90 90 90 90 90 87 80 78 74 78 74 81 73 80 78 86 86 86 86 88 80 88 87 87 87 87 90 90 90 90 90 90 90 90 87 87 80 71 70 67 63 63 65 72 80 90 90 90 90
+ 86 86 86 88 86 90 89 76 76 78 78 77 77 76 89 89 71 71 56 51 51 43 43 43 43 58 58 80 80 80 80 89 89 71 71 58 38 38 38 43 43 60 60 60 60 60 58 58 58 75
+ 79 79 78 88 83 81 81 71 71 69 75 75 78 78 78 85 88 85 85 90 85 85 85 90 90 71 67 60 58 58 58 68 70 78 80 89 90 85 85 85 80 77 78 77 74 74 72 72 70 58
+ 58 58 43 43 51 51 51 43 43 43 43 43 58 66 66 66 51 51 45 45 51 51 51 51 60 60 60 69 69 69 66 66 72 76 80 85 81 81 60 60 60 60 58 58 51 51 51 51 56 56
+ 71 71 71 71 71 71 61 58 58 58 58 58 60 61 58 58 58 59 62 66 63 67 64 64 64 64 67 67 76 74 76 72 72 58 58 58 58 58 60 73 80 80 74 64 61 54 45 45 45 51
+ 51 51 51 51 51 45 40 39 39 49 49 49 54 52 64 65 67 71 72 72 73 73 43 36 39 39 39 39 39 51 51 51 51 51 51 51 51 51 51 51 51 51 40 39 39 39 39 35 35 40
+ 51 51 51 51 40 40 40 35 35 45 43 43 52 50 50 49 52 49 51 53 46 46 46 46 46 46 46 51 42 56 40 40 40 40 39 35 35 35 35 35 35 51 51 56 51 51 46 40 40 40
+ 40 40 40 40 39 34 34 0
+
+AF ML4924F U -5
+AF ML4924R C 1
+BS 1 9 ML4924R
+BS 10 11 ML4924F
+BS 12 16 ML4924R
+BS 17 51 ML4924F
+BS 52 54 ML4924R
+BS 55 63 ML4924F
+BS 64 64 ML4924R
+BS 65 109 ML4924F
+BS 110 110 ML4924R
+BS 111 111 ML4924F
+BS 112 113 ML4924R
+BS 114 114 ML4924F
+BS 115 117 ML4924R
+BS 118 121 ML4924F
+BS 122 128 ML4924R
+BS 129 133 ML4924F
+BS 134 138 ML4924R
+BS 139 141 ML4924F
+BS 142 144 ML4924R
+BS 145 160 ML4924F
+BS 161 161 ML4924R
+BS 162 174 ML4924F
+BS 175 180 ML4924R
+BS 181 187 ML4924F
+BS 188 188 ML4924R
+BS 189 200 ML4924F
+BS 201 201 ML4924R
+BS 202 203 ML4924F
+BS 204 221 ML4924R
+BS 222 222 ML4924F
+BS 223 223 ML4924R
+BS 224 240 ML4924F
+BS 241 244 ML4924R
+BS 245 245 ML4924F
+BS 246 258 ML4924R
+BS 259 262 ML4924F
+BS 263 291 ML4924R
+BS 292 292 ML4924F
+BS 293 331 ML4924R
+BS 332 332 ML4924F
+BS 333 372 ML4924R
+BS 373 377 ML4924F
+BS 378 453 ML4924R
+BS 454 454 ML4924F
+BS 455 595 ML4924R
+BS 596 596 ML4924F
+BS 597 609 ML4924R
+BS 610 624 ML4924F
+BS 625 659 ML4924R
+BS 660 670 ML4924F
+BS 671 708 ML4924R
+
+RD ML4924F 734 0 0
+ttgttcttttctcgAGGGTACTTGAGGATTCTACCACCACAACAACAAAG
+CCTTATCATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAAC
+AAAATGAAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTG
+AGGATAACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTT
+GGTAACTCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTG
+CAACACGGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAG
+ATACCCGTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGAT
+ACCGAATCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGT
+ACTCCTTGACTTTCAAGCCACTCTCAACaaGCTGCATTGCCTTgccTCCT
+CTCACGGTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGA
+CCTCACAGtgcaCctggcCTTGgacaacacAGGTGTTTGACCACTGAGCT
+gttacaAAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACG
+GAGATgtcgaGAACGAGCTTCTGAACCTTGATattcctCATtgggctcca
+cagctctttcTCTGACGCCATTCTGAGGatctagggatgcacacccacaa
+ggacatgaccaaaatcgacatctgggcccactcg
+
+QA 12 601 10 713
+DS CHROMAT_FILE: ML4924F PHD_FILE: ML4924F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+
+RD ML4924R 763 0 0
+atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAgxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+xxxxxaccaaagt
+
+QA 10 707 1 708
+DS CHROMAT_FILE: ML4924R PHD_FILE: ML4924R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:27:03 2001
+
+
+WA{
+phrap_params phrap 010621:164002
+/usr/local/genome/bin/phrap test_project.fasta.screen -new_ace -view 
+phrap version 0.990329
+}
+
+CT{
+Contig1 Annotation consed 40 50 060216:160521
+contig extra
+info
+}
+
+CT{
+Contig1 Annotation consed 20 30 060216:160521
+COMMENT{
+contig tag
+comment
+C}
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+>test_project.fasta.screen.Contig1
+ATATCTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT
+CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG
+AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA
+ACACCTTCGTACTTAACTTGGAACCACTTCATGGCATCATCCTTGGTAAC
+TCTATGCTGAATCCCAACGCGAGTCTTGCACCTACGGCGACGTGCAACAC
+GGTATCCCGGGCGTTCAAGAACAACATAGAAGTCCATTCCATAGATACCC
+GTAGAAGGATCGTACTTGATTCCAAGATCAATGTGCTCCTGGATACCGAA
+TCCAAAACAGCCAGTGTCACTGAAGTTCCTCCTCAACAGCTCGTACTCCT
+TGACTTTCAAGCCACTCTCAAGAAGCTGCATTGCCTTGTCTCCTCTCACG
+GTGACGTAGCACGCAATCTTTTCATTACGTCTAATACCGAAAGACCTCAC
+AGTGTACCTCGCCTTGGAGAAGACAGGTGTTTGACCACTGAGCTGTTCCA
+AAACCTTGGAGGCACGGGTGAGACGATCACCACTCTCACCAACGGAGATG
+TTGAGAACGAGCTTCTGAACCTTGATATCCCTCATTGGGTTCGAGAGCTT
+CTTCTCTGACGCCATTCTGAGGATTAAGGGATGCAAAGTCAGAGGGAGAT
+GAGCAAAG


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+>test_project.fasta.screen.Contig1 
+0 0 0 12 17 32 26 25 25 41 49 50 47 47 51 50 56 55 59 50 50 53 55 55 53 59 57 51 54 61 56 54 53 60 48 48 57 58 58 64 60 62 58 64 57 53 53 59 64 67 
+67 75 72 65 55 47 47 39 44 41 48 54 55 61 52 47 53 53 53 60 67 67 74 76 76 69 65 62 58 54 51 56 60 60 73 73 66 61 61 59 52 56 56 60 64 64 59 57 59 60 
+60 59 71 82 74 69 61 64 61 83 68 72 71 73 75 71 77 66 62 62 60 64 61 63 63 61 75 75 64 64 60 64 64 75 75 72 80 83 61 64 64 72 64 64 61 56 57 60 62 62 
+62 62 60 52 50 56 57 57 61 61 64 59 59 64 60 62 58 54 50 51 58 63 67 69 75 75 78 86 86 84 79 71 71 72 69 69 67 90 90 90 90 79 85 88 69 71 71 67 67 67 
+78 75 75 80 84 80 80 82 86 86 90 90 90 90 90 76 75 67 67 67 70 69 78 75 81 75 73 73 69 68 60 66 70 73 70 70 70 70 69 74 75 72 69 73 75 72 72 88 89 75 
+75 75 75 77 88 90 90 90 81 81 78 78 75 75 80 83 86 82 84 83 86 86 90 85 73 56 56 55 59 54 62 62 72 74 77 90 90 90 90 90 85 90 75 75 78 85 84 88 80 80 
+72 76 72 74 74 90 90 88 75 75 75 72 72 72 77 77 77 80 77 77 90 90 90 90 90 90 90 90 90 90 90 90 88 86 86 86 90 86 77 83 77 77 77 83 83 90 90 90 90 90 
+90 90 90 90 90 90 87 80 78 74 78 74 81 73 80 78 86 86 86 86 88 80 88 87 87 87 87 90 90 90 90 90 90 90 90 87 87 80 71 70 67 63 63 65 72 80 90 90 90 90 
+86 86 86 88 86 90 89 76 76 78 78 77 77 76 89 89 71 71 56 51 51 43 43 43 43 58 58 80 80 80 80 89 89 71 71 58 38 38 38 43 43 60 60 60 60 60 58 58 58 75 
+79 79 78 88 83 81 81 71 71 69 75 75 78 78 78 85 88 85 85 90 85 85 85 90 90 71 67 60 58 58 58 68 70 78 80 89 90 85 85 85 80 77 78 77 74 74 72 72 70 58 
+58 58 43 43 51 51 51 43 43 43 43 43 58 66 66 66 51 51 45 45 51 51 51 51 60 60 60 69 69 69 66 66 72 76 80 85 81 81 60 60 60 60 58 58 51 51 51 51 56 56 
+71 71 71 71 71 71 61 58 58 58 58 58 60 61 58 58 58 59 62 66 63 67 64 64 64 64 67 67 76 74 76 72 72 58 58 58 58 58 60 73 80 80 74 64 61 54 45 45 45 51 
+51 51 51 51 51 45 40 39 39 49 49 49 54 52 64 65 67 71 72 72 73 73 43 36 39 39 39 39 39 51 51 51 51 51 51 51 51 51 51 51 51 51 40 39 39 39 39 35 35 40 
+51 51 51 51 40 40 40 35 35 45 43 43 52 50 50 49 52 49 51 53 46 46 46 46 46 46 46 51 42 56 40 40 40 40 39 35 35 35 35 35 35 51 51 56 51 51 46 40 40 40 
+40 40 40 40 39 34 34 0 


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.contigs.qual
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.log
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.log	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.log	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+No. words: 2989; after pruning: 2826
+
+
+Histogram of relative pair offsets, for read pairs with multiple alignments:
+
+Pass 1
+             Gap  Score    
+
+Pass: 1
+#reads  #contigs (not counting singlets)
+    1      2
+
+N.B. Following not based on all pairs!!
+
+Lowest   # merges  # failures
+LLR score
+
+Gap   # merges  # failures
+ size
+
+Offset
+
+Pass 3
+             Gap  Score    
+Merge:(0)    0    0.0 15.1 (1,1:768)  628  3.27 0.00 0.00  ML4924F       10   713 (21)  C ML4924R   (3)   760    57 *
+
+Pass: 3
+#reads  #contigs (not counting singlets)
+    2      1
+
+N.B. Following not based on all pairs!!
+
+Lowest   # merges  # failures
+LLR score
+0.0       1        0
+
+Gap   # merges  # failures
+ size
+ 0       1        0
+
+Offset
+  0       1 
+
+Pass 4
+             Gap  Score    
+
+Pass: 4
+#reads  #contigs (not counting singlets)
+    2      1
+
+N.B. Following not based on all pairs!!
+
+Lowest   # merges  # failures
+LLR score
+
+Gap   # merges  # failures
+ size
+
+Offset
+
+Read equivalence class histogram:
+    1    2
+    2    1
+
+Chimera merges: 
+
+
+Contig 1:  302 nodes
+
+ 153 str. conn. components
+
+Path
+    1  C ML4924R         763 
+   19    ML4924F          25 
+  275  C ML4924R         489 
+  610    ML4924F         616 
+  622  C ML4924R         142 
+  660    ML4924F         666 
+  668  C ML4924R          96 
+Contig length: old 769, new 763
+
+Contig length: old 763, new 708
+New start: 0 
+
+Contig 1 unpadded => padded conversion:
+  200 =>   200
+  400 =>   400
+  600 =>   600
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.log
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems
===================================================================


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems.qual
===================================================================


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.problems.qual
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.qual
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.qual	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.qual	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,180 @@
+>ML4922R PHD_FILE: ML4922R.phd.1
+4 4 6 6 9 19 24 24 19 16 10 8 8 9 21 20 32 34 34 39
+39 40 39 46 40 37 32 32 28 28 25 25 34 32 40 40 40
+40 40 40 34 35 35 35 35 35 35 35 35 35 51 51 39 39
+39 35 31 31 31 31 31 32 35 35 45 51 56 51 39 39 40
+40 39 39 45 40 40 40 35 35 39 35 35 35 37 40 40 40
+40 40 40 40 45 51 51 51 51 46 46 40 40 39 39 39 39
+40 40 46 40 40 40 40 40 51 51 51 51 45 45 45 45 51
+51 56 56 56 56 51 51 51 51 51 40 40 40 40 40 40 51
+51 39 39 38 38 38 35 39 39 39 39 43 43 43 46 46 51
+51 56 56 45 35 35 35 35 35 35 43 43 43 43 43 43 43
+43 43 43 43 51 51 51 56 56 56 56 56 43 43 43 43 43
+43 43 43 43 43 43 45 45 43 43 43 43 43 43 43 43 43
+43 43 43 43 43 43 43 45 45 43 43 43 43 43 51 51 51
+51 51 51 51 56 56 45 43 43 43 43 43 43 43 43 43 43
+51 51 56 56 51 56 56 56 56 56 56 56 51 45 45 45 45
+45 45 51 45 43 43 43 43 43 45 51 51 51 51 51 45 45
+45 51 51 51 56 51 43 36 36 36 36 36 43 51 51 51 51
+51 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 43
+43 43 43 43 43 45 45 51 51 51 51 43 43 43 43 43 43
+43 43 43 43 51 56 56 56 43 43 43 43 43 43 43 51 51
+51 43 43 43 43 45 45 56 56 51 43 43 43 43 43 43 45
+43 43 43 43 43 43 45 56 56 56 56 56 56 56 56 56 56
+56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56
+56 56 56 56 56 56 51 51 45 45 45 45 51 51 56 56 56
+51 45 45 45 45 45 45 51 43 43 43 43 45 45 56 56 56
+56 56 56 56 51 51 51 43 43 43 43 43 43 43 43 43 43
+43 43 40 45 45 45 45 56 56 56 56 56 56 56 56 51 51
+40 40 40 40 45 45 56 56 56 56 51 51 51 51 51 51 51
+45 40 37 37 37 40 45 56 51 51 51 51 51 51 56 56 56
+56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51
+56 45 40 40 40 40 40 51 51 51 51 56 56 56 56 56 56
+56 56 51 51 51 51 51 51 56 56 56 56 56 56 56 56 51
+45 45 45 45 45 40 40 40 40 40 51 56 56 56 56 56 56
+56 56 51 51 51 40 40 40 56 56 46 48 46 46 44 44 40
+40 32 29 24 24 29 40 40 40 44 44 40 34 34 40 40 40
+40 40 40 40 44 56 56 56 56 51 40 45 45 45 45 40 45
+46 51 56 56 56 51 51 51 40 40 40 40 37 40 56 56 51
+40 40 40 40 40 40 40 40 44 32 32 32 25 24 24 29 40
+40 40 48 48 32 32 26 25 25 25 32 30 32 34 34 40 40
+40 32 32 29 25 25 29 29 34 40 40 40 44 44 40 40 40
+40 25 24 18 15 15 25 29 29 29 48 40 40 40 44 32 29
+27 25 24 26 29 29 25 25 32 32 29 29 34 25 25 25 22
+24 29 29 28 25 34 32 32 22 22 24 18 16 19 19 18 18
+23 20 19 22 25 27 29 25 29 26 29 29 29 24 17 12 8 4
+4 
+>ML4924F PHD_FILE: ML4924F.phd.1
+10 16 19 19 8 6 6 6 6 9 9 15 17 17 22 22 25 25 22 22
+22 25 34 34 42 35 35 38 40 40 35 34 31 31 33 34 35
+33 29 35 29 29 32 34 34 35 35 42 42 42 42 35 35 37
+35 35 35 35 35 31 33 24 24 24 29 26 29 29 29 29 29
+29 35 35 35 35 35 35 42 42 47 47 40 35 35 35 32 32
+32 32 42 42 42 37 37 30 30 35 35 35 35 35 37 32 32
+35 35 37 37 42 42 37 37 40 37 37 35 35 35 37 35 35
+37 37 37 37 35 32 29 31 31 29 35 35 35 35 35 35 35
+35 35 32 32 35 32 35 35 32 32 32 32 32 35 35 35 35
+35 35 37 37 35 35 32 32 32 32 32 30 35 35 35 35 35
+35 32 35 42 42 42 42 35 35 38 38 38 38 42 40 40 38
+35 35 35 42 50 56 56 50 56 56 42 42 42 38 38 38 38
+38 38 40 38 38 38 40 40 40 44 42 42 42 38 36 36 32
+32 32 35 35 38 41 41 41 41 41 42 41 41 41 41 41 41
+41 41 41 35 40 35 32 32 36 38 35 35 37 38 35 35 35
+35 37 37 41 41 41 41 41 41 41 35 35 35 32 35 31 33
+32 35 35 37 29 33 16 16 9 20 13 23 23 33 35 37 35 40
+40 45 40 40 50 30 30 33 40 33 37 29 29 21 25 21 23
+23 50 50 37 35 35 35 32 32 32 37 37 37 40 37 37 37
+37 35 35 44 44 44 44 44 56 44 56 37 35 35 35 35 35
+26 32 26 26 26 32 32 35 35 35 35 35 42 42 35 42 42
+42 42 35 33 29 33 29 30 28 35 33 35 35 35 35 37 37
+45 44 44 44 44 44 44 44 39 39 42 42 42 42 42 35 28
+27 24 20 20 22 29 29 40 40 42 42 35 35 35 37 35 35
+33 33 33 35 35 34 34 33 33 33 29 28 28 27 21 21 15
+15 21 21 25 29 29 29 29 33 33 25 27 23 23 11 10 10
+24 24 29 29 27 29 29 29 29 32 36 36 35 45 40 38 38
+28 28 26 32 32 35 35 35 42 37 34 34 42 29 29 29 35
+35 26 24 17 14 12 12 25 27 35 37 38 40 34 34 34 35
+32 33 32 29 29 21 21 27 33 26 16 16 10 10 10 20 12
+6 6 8 9 9 11 13 18 15 15 17 11 8 11 9 13 9 19 21 24
+24 18 15 15 21 25 29 29 25 25 14 14 9 9 9 24 14 11
+8 8 8 15 20 27 40 40 40 26 24 12 12 15 13 10 17 18
+15 12 15 16 19 23 20 24 21 21 21 21 24 16 20 18 20
+16 16 14 12 10 10 12 17 17 29 29 29 19 16 9 9 9 16
+13 11 9 11 9 10 9 19 14 14 10 10 10 14 14 25 26 28
+32 32 32 33 33 28 21 18 12 11 13 9 9 12 10 11 10 11
+12 11 11 10 10 10 10 8 8 10 9 9 9 6 6 8 8 11 9 9 10
+10 14 14 10 8 8 17 10 10 9 12 9 11 7 7 10 12 9 6 6
+6 6 8 8 11 12 10 9 9 8 8 12 14 20 16 17 10 10 12 17
+17 24 18 14 14 11 14 12 15 15 10 9 11 9 9 10 8 6 6
+8 8 7 7 9 9 7 6 10 6 6 6 6 
+>ML4924R PHD_FILE: ML4924R.phd.1
+6 6 6 8 8 12 18 16 14 11 9 6 6 9 19 20 32 34 34 39
+39 39 29 40 28 21 13 4 4 4 8 8 23 29 39 40 40 40 40
+46 34 33 29 29 32 33 34 34 34 34 40 40 34 34 34 34
+34 34 39 40 40 40 40 40 40 40 46 51 51 56 51 51 35
+35 35 35 35 35 39 40 40 40 40 56 42 51 46 46 46 46
+46 46 46 46 40 40 40 40 40 40 35 35 35 35 35 35 40
+40 40 51 51 51 51 40 35 35 39 39 39 39 40 51 51 51
+51 51 51 51 51 51 51 51 51 51 39 39 39 39 39 35 40
+40 40 40 40 39 39 39 39 38 40 39 39 39 39 39 40 45
+51 51 51 51 51 51 45 45 45 45 45 45 45 51 51 56 43
+43 43 43 43 43 56 56 56 56 56 51 43 43 43 43 43 43
+43 43 43 43 43 43 43 43 43 43 43 43 43 43 46 56 56
+56 56 56 56 56 56 51 51 51 51 43 43 45 45 45 45 56
+56 56 51 51 51 51 51 51 45 45 45 45 45 51 51 51 51
+45 45 51 51 51 51 51 43 43 43 43 43 43 51 51 51 43
+43 43 43 43 43 51 51 45 45 45 45 45 45 51 51 51 51
+51 43 43 43 43 43 43 43 43 43 45 56 56 56 56 56 56
+51 51 51 43 43 43 43 43 43 43 43 43 43 43 43 43 43
+43 43 43 43 43 43 45 45 45 45 45 43 43 38 38 38 43
+56 56 56 56 51 51 51 51 43 43 43 43 43 43 51 51 56
+56 56 56 56 43 43 43 43 43 43 43 56 56 51 51 51 51
+51 51 56 56 51 51 43 43 43 43 43 43 43 45 45 45 56
+56 56 56 56 56 56 51 43 43 43 43 43 43 51 51 51 51
+51 45 45 45 51 45 45 45 45 45 45 56 56 56 56 56 56
+56 56 56 56 56 51 51 51 51 51 51 51 51 56 51 51 51
+51 51 51 56 56 56 56 56 56 56 56 56 56 40 40 40 40
+40 40 40 40 40 40 40 40 51 56 56 51 51 51 51 51 51
+51 51 51 45 45 45 45 45 45 51 56 56 56 56 40 39 39
+39 39 39 39 40 40 40 40 56 56 51 51 51 51 51 51 51
+45 40 40 37 37 40 40 56 56 51 51 40 40 40 40 40 51
+51 37 37 37 37 37 40 40 34 34 29 29 29 29 32 29 25
+19 27 27 32 32 34 40 34 40 34 35 35 35 35 39 40 56
+56 56 51 51 46 46 42 42 42 46 40 37 37 40 29 29 29
+29 29 27 32 29 29 34 48 48 48 32 34 34 34 31 31 37
+46 48 48 40 40 40 27 25 21 16 16 18 19 23 27 25 29
+24 29 32 29 29 25 25 21 15 15 23 27 27 27 27 25 22
+24 29 32 32 40 29 29 29 48 48 40 40 40 29 29 25 29
+29 40 40 32 32 32 32 32 25 25 25 29 40 36 40 36 36
+37 33 46 24 24 24 32 32 40 34 22 25 25 27 25 22 29
+29 25 21 21 22 29 24 24 24 31 31 28 28 24 19 19 23
+27 25 22 29 34 32 32 32 25 18 18 18 18 23 32 26 25
+19 15 15 15 23 23 22 34 37 40 32 32 29 22 18 18 13
+22 16 20 25 29 24 24 25 19 19 25 24 21 21 27 21 20
+26 25 18 15 15 10 13 13 17 21 22 25 29 25 25 25 24
+19 10 10 11 17 17 12 19 16 11 
+>ML4947F PHD_FILE: ML4947F.phd.1
+9 13 11 21 24 34 34 34 31 31 25 25 25 25 34 34 38 29
+29 29 29 29 32 32 32 32 32 32 32 56 46 35 35 35 35
+35 35 40 45 40 40 40 46 40 40 40 40 40 40 37 37 35
+35 35 35 35 35 40 46 51 51 51 51 40 40 40 40 35 23
+19 10 13 10 16 16 31 33 33 40 40 40 44 46 51 51 51
+51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 56 56
+56 56 56 56 51 46 40 40 40 40 40 40 40 40 39 39 38
+38 38 39 40 40 40 40 39 39 39 39 39 40 51 51 51 51
+51 51 56 40 40 40 43 43 43 43 43 43 43 43 43 43 51
+51 51 51 51 51 51 51 51 51 51 43 43 43 43 43 43 43
+43 43 43 43 45 45 45 45 51 51 51 51 43 43 43 43 43
+43 43 43 43 43 43 45 51 45 45 45 45 45 45 51 56 56
+56 51 51 51 45 45 45 51 45 45 51 51 51 45 45 43 43
+43 43 45 45 56 56 56 45 40 40 40 38 35 36 36 36 43
+43 43 36 43 43 43 51 51 51 56 43 43 43 43 38 38 42
+43 44 51 51 51 43 43 43 43 43 43 45 51 51 43 45 45
+43 43 43 43 43 43 43 43 43 40 41 41 45 45 45 45 45
+45 45 45 45 45 41 43 41 43 43 45 45 45 45 45 45 45
+45 43 51 56 56 56 51 51 51 51 51 51 46 46 42 42 42
+45 43 43 43 43 43 43 43 43 45 43 45 45 45 45 45 45
+45 45 43 43 43 43 43 43 43 43 43 43 43 56 43 41 45
+45 41 41 42 46 46 51 51 51 51 51 51 51 51 45 43 43
+43 43 43 43 43 45 45 56 56 56 56 56 56 56 51 51 45
+45 45 41 45 45 51 56 56 51 51 51 51 51 51 51 51 51
+51 45 43 43 43 43 43 43 43 43 43 43 43 51 51 43 43
+43 40 43 43 45 45 45 45 45 51 56 56 56 56 56 56 51
+51 45 45 43 43 45 45 43 43 56 56 50 45 40 40 37 40
+40 51 51 51 56 56 45 40 40 40 40 40 40 40 45 37 42
+42 43 43 46 37 37 37 37 37 37 43 42 42 50 46 46 51
+37 37 37 40 35 35 35 35 35 35 37 51 51 51 51 51 45
+45 45 45 45 46 44 44 42 46 46 51 51 51 56 51 51 42
+46 42 51 51 37 37 37 37 40 45 56 56 56 56 56 51 51
+45 40 35 35 35 35 39 45 56 56 51 51 45 45 45 45 45
+40 45 45 40 40 40 45 40 42 42 46 56 51 42 42 37 37
+35 40 34 29 37 40 40 44 34 32 32 27 25 22 26 29 27
+32 29 27 25 24 29 29 29 40 28 29 24 25 29 32 32 32
+34 40 40 33 40 40 35 40 40 48 48 40 40 40 40 34 32
+29 34 40 32 29 29 29 29 29 29 32 27 32 25 22 18 21
+23 23 18 19 25 40 40 40 36 30 30 32 32 32 29 31 21
+21 18 15 14 25 24 24 25 25 36 32 32 40 40 40 32 27
+27 25 20 20 25 19 19 18 13 13 18 27 27 32 28 32 24
+27 24 22 20 27 27 27 25 29 29 29 22 22 23 27 23 23
+25 30 33 27 39 25 28 26 26 21 16 10 10 14 16 20 27
+27 24 22 21 27 21 21 


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.qual
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.singlets
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.singlets	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.singlets	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,33 @@
+>ML4922R  CHROMAT_FILE: ML4922R PHD_FILE: ML4922R.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+CCTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXCGGACGCGTGGGGTGACCCCGGAAGATTCGAGTGCTTCGTCTGGT
+AATCAAAATCAGGTTTCTCCGGTTGGATCTCAAACTCAAGGTTATGCCGT
+GGCACAAATGTCGCCTGCTGTTTCATCTGTCAGTGATGCTCCCTCATTAG
+ACACACAGAGACAACGAGATATTCAGTCCAGGCTAGCTAACAGTGGTTCA
+GATGATTCTGATGACGAAGATCCTGATGGAGAGACTGTAACAACAGTGAA
+TGATGATCCTACCGATGTTAAGCGTGCTAGGAGGATGCTTTCAAACCGAG
+AATCCGCTAGGCGCTCTAGAAGAAGAAAGCAAGAGCAAATGAGTGAATTT
+GATTCACAGGTTGGCCAATTACGTGGTGAGCATTCAACTATGCTTAAGCG
+TCTTAGTGAGGTCACTCAGAAGTTTAGCACCGCTGTTGTCGATAACAGAA
+TCTTAAAAGCTGATATCGAAACATTGAGAACAAAGGTGAAGATGGCAGAG
+GACACTGTGAAAAGAGTGACAGGGATGAACCCTAGGCTTTTGGCAAAACC
+ATACAGCATACCATTCGATAGGACACTGATGGGTTCTTCACAGCCAAACT
+TGAATCAAACCGACATGATTCCAAATCAAATATCAGAGGACAACAGTTTT
+GCAAGCAACTCGGTTGTTACTTTGGAATCCGGTTGCAATGCTTTTGAAGC
+CAN
+>ML4947F  CHROMAT_FILE: ML4947F PHD_FILE: ML4947F.phd.1 CHEM: term DYE: big TIME: Thu Jun 21 11:28:31 2001
+CTGTGGGACCCACTGATATTACCGAACTTTTGAACCAAACAGTTATGTGT
+TGGCGCCAAAAATAACGTTAAAGAAACCATCACAAGAGCCTTATGACAAA
+GAGGGAGAGATAGACATAACTTAAGACATTACACAATTTGGTCAAACGAT
+AAACCAAATCCAACAGACATTTAAACCAAATCCAACGCCAACAAAACATT
+CATATATAAGATATACCCAAACCGTATCAAACGCAGACGCCGCCCTAGTA
+ACTGTATCCTTTCACAAACAAGCTCTCGGAGGCAGCAGAGTCACCAGAAG
+CCCCAGCAATGTATTTCCCAAGTGTAGCGTCCGAGTTAGCCTTGCACCTG
+ACCAAGAACGTGGCCTGAGCTTTGGCTACGTTCTCTTTCTTACCTCCCCA
+AGCCTTGAGTGTGCTTTGCTGGAGGGCTCGGCCAAAGGAGAAAGTAAGCG
+TCCATGGCTTCAACAGATCGAGCTTGTTCATTGCGTTCAGATTTAGCGTT
+GCTTCCTCTTCGCTCTGTCCTCCTGAGAGGAACACGATTCCTGGAACCGC
+TGGTGGGACCGTGCGGCGGAGAGCAGTCACTGTGTATTCCGCAATGACTT
+CCGGTGCAACCTTGGCGCTGTCTGAGCCTGGAGTGACCATGTTAGGTTTA
+AGCAGAGTGCCTTCGAGGAGGACGTGGTGGTCGTTCAAGGCCTTGTACAC
+GGCAGAAAGAACCGTCTCAGTCACAGCCGCACATTTCTT


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.singlets
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.view
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.view	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.view	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+HEADER FORMAT1 test_project.fasta.screen 4 0 2 2 1
+READ ML4922R 753
+READ ML4947F 739
+CONTIG Contig1 708
+CONTIG_QUAL Contig1 0 0 0 12 17 32 26 25 25 41 49 50 47 47 51 50 56 55 59 50 50 53 55 55 53 59 57 51 54 61 56 54 53 60 48 48 57 58 58 64 60 62 58 64 57 53 53 59 64 67 67 75 72 65 55 47 47 39 44 41 48 54 55 61 52 47 53 53 53 60 67 67 74 76 76 69 65 62 58 54 51 56 60 60 73 73 66 61 61 59 52 56 56 60 64 64 59 57 59 60 60 59 71 82 74 69 61 64 61 83 68 72 71 73 75 71 77 66 62 62 60 64 61 63 63 61 75 75 64 64 60 64 64 75 75 72 80 83 61 64 64 72 64 64 61 56 57 60 62 62 62 62 60 52 50 56 57 57 61 61 64 59 59 64 60 62 58 54 50 51 58 63 67 69 75 75 78 86 86 84 79 71 71 72 69 69 67 90 90 90 90 79 85 88 69 71 71 67 67 67 78 75 75 80 84 80 80 82 86 86 90 90 90 90 90 76 75 67 67 67 70 69 78 75 81 75 73 73 69 68 60 66 70 73 70 70 70 70 69 74 75 72 69 73 75 72 72 88 89 75 75 75 75 77 88 90 90 90 81 81 78 78 75 75 80 83 86 82 84 83 86 86 90 85 73 56 56 55 59 54 62 62 72 74 77 90 90 90 90 90 85 90 75 75 78 85 84 88 80 80 72 76 72 74 74 90 90 88 75 75 75 72 72 72 77 77 77 80 77 77 90 90 90 90 90 90 90 90 90 90 90 90 88 86 86 86 90 86 77 83 77 77 77 83 83 90 90 90 90 90 90 90 90 90 90 90 87 80 78 74 78 74 81 73 80 78 86 86 86 86 88 80 88 87 87 87 87 90 90 90 90 90 90 90 90 87 87 80 71 70 67 63 63 65 72 80 90 90 90 90 86 86 86 88 86 90 89 76 76 78 78 77 77 76 89 89 71 71 56 51 51 43 43 43 43 58 58 80 80 80 80 89 89 71 71 58 38 38 38 43 43 60 60 60 60 60 58 58 58 75 79 79 78 88 83 81 81 71 71 69 75 75 78 78 78 85 88 85 85 90 85 85 85 90 90 71 67 60 58 58 58 68 70 78 80 89 90 85 85 85 80 77 78 77 74 74 72 72 70 58 58 58 43 43 51 51 51 43 43 43 43 43 58 66 66 66 51 51 45 45 51 51 51 51 60 60 60 69 69 69 66 66 72 76 80 85 81 81 60 60 60 60 58 58 51 51 51 51 56 56 71 71 71 71 71 71 61 58 58 58 58 58 60 61 58 58 58 59 62 66 63 67 64 64 64 64 67 67 76 74 76 72 72 58 58 58 58 58 60 73 80 80 74 64 61 54 45 45 45 51 51 51 51 51 51 45 40 39 39 49 49 49 54 52 64 65 67 71 72 72 73 73 43 36 39 39 39 39 39 51 51 51 51 51 51 51 51 51 51 51 51 51 40 39 39 39 39 35 35 40 51 51 51 51 40 40 40 35 35 45 43 43 52 50 50 49 52 49 51 53 46 46 46 46 46 46 46 51 42 56 40 40 40 40 39 35 35 35 35 35 35 51 51 56 51 51 46 40 40 40 40 40 40 40 39 34 34 0
+DISCREP_QUAL Contig1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 0 0 0 0 8 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 0 0 0 0 0 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+READ ML4924F 734 12.3 9 712 Contig1 3 706
+READ ML4924R 763 15.6 55 762 Contig1 707 0


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.fasta.screen.view
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.newtags
===================================================================


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.newtags
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.phrap.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.phrap.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.phrap.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,733 @@
+/usr/local/genome/bin/phrap test_project.fasta.screen -new_ace -view 
+phrap version 0.990329
+
+Run date:time  010621:164002
+Query file(s):  test_project.fasta.screen
+Presumed sequence type: DNA
+
+Pairwise comparison algorithm: banded Smith-Waterman
+
+Score matrix (set by value of penalty: -2)
+    A   C   G   T   N   X
+A   1  -2  -2  -2   0  -3
+C  -2   1  -2  -2   0  -3
+G  -2  -2   1  -2   0  -3
+T  -2  -2  -2   1   0  -3
+N   0   0   0   0   0   0
+X  -3  -3  -3  -3   0  -3
+
+Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, 
+Using complexity-adjusted scores. Assumed background frequencies:
+ A: 0.250  C: 0.250  G: 0.250  T: 0.250  N: 0.000  X: 0.000  
+
+minmatch: 14, maxmatch: 30, max_group_size: 20, minscore: 30, bandwidth: 14, indexwordsize: 10
+vector_bound: 80
+word_raw: 0
+trim_penalty: -2, trim_score: 20, trim_qual: 13, maxgap: 30
+repeat_stringency: 0.950000
+qual_show: 20
+confirm_length: 8, confirm_trim: 1, confirm_penalty: -5, confirm_score: 30
+node_seg: 8, node_space: 4
+forcelevel: 0, bypasslevel: 1
+max_subclone_size: 5000
+
+Sequence file: test_project.fasta.screen    4 entries
+Residue counts:
+  A      799
+  C      688
+  G      670
+  N        1
+  T      732
+  X       99
+Total   2989
+
+Read name analysis:
+ # Reads      # templates
+   1             4
+
+ Suffix counts:
+(no suffix)   4
+
+
+Templates inferred from description field:     0
+Templates inferred from name field:            4
+
+Read-template multiplicity analysis:
+ # Reads      # templates
+   1             4
+
+Chemistries inferred from description field:
+    0  dye-primer
+    0  old-dye-terminator
+    4  big-dye-terminator
+    0  other
+
+Chemistries inferred from name:
+    0  dye-primer
+    0  old-dye-terminator
+    0  big-dye-terminator
+    0  other
+
+Directions inferred from description field:
+    0  fwd
+    0  rev
+    0  unknown (set to fwd)
+
+Directions inferred from name:
+    0  fwd
+    0  rev
+    4  unknown (set to fwd)
+
+Quality file: test_project.fasta.screen.qual
+
+Input quality (quality, n_residues, %,  cum, cum %,  cum expected errs):
+ 56     281   9.4     281   9.4    0.00
+ 51     351  11.7     632  21.1    0.00
+ 50       7   0.2     639  21.4    0.00
+ 48      13   0.4     652  21.8    0.00
+ 47       2   0.1     654  21.9    0.00
+ 46      43   1.4     697  23.3    0.00
+ 45     206   6.9     903  30.2    0.01
+ 44      28   0.9     931  31.1    0.01
+ 43     334  11.2    1265  42.3    0.03
+ 42      60   2.0    1325  44.3    0.03
+ 41      29   1.0    1354  45.3    0.04
+ 40     284   9.5    1638  54.8    0.06
+ 39      64   2.1    1702  56.9    0.07
+ 38      36   1.2    1738  58.1    0.08
+ 37      76   2.5    1814  60.7    0.09
+ 36      19   0.6    1833  61.3    0.10
+ 35     193   6.5    2026  67.8    0.16
+ 34      59   2.0    2085  69.8    0.18
+ 33      30   1.0    2115  70.8    0.20
+ 32     111   3.7    2226  74.5    0.27
+ 31      19   0.6    2245  75.1    0.28
+ 30      10   0.3    2255  75.4    0.29
+ 29     121   4.0    2376  79.5    0.44
+ 28      17   0.6    2393  80.1    0.47
+ 27      38   1.3    2431  81.3    0.55
+ 26      18   0.6    2449  81.9    0.59
+ 25      72   2.4    2521  84.3    0.82
+ 24      46   1.5    2567  85.9    1.00
+ 23      21   0.7    2588  86.6    1.11
+ 22      25   0.8    2613  87.4    1.27
+ 21      33   1.1    2646  88.5    1.53
+ 20      18   0.6    2664  89.1    1.71
+ 19      25   0.8    2689  90.0    2.02
+ 18      24   0.8    2713  90.8    2.40
+ 17      15   0.5    2728  91.3    2.70
+ 16      23   0.8    2751  92.0    3.28
+ 15      23   0.8    2774  92.8    4.01
+ 14      18   0.6    2792  93.4    4.72
+ 13      14   0.5    2806  93.9    5.43
+ 12      20   0.7    2826  94.5    6.69
+ 11      21   0.7    2847  95.2    8.36
+ 10      40   1.3    2887  96.6   12.36
+  9      38   1.3    2925  97.9   17.14
+  8      26   0.9    2951  98.7   21.26
+  7       5   0.2    2956  98.9   22.26
+  6      26   0.9    2982  99.8   28.79
+  4       7   0.2    2989 100.0   31.58   (quality -1 = terminal quality 0)
+
+Avg. full length: 747.2, trimmed (qual > -1): 747.2
+Avg. quality: 37.7 per base
+
+Following regions converted to N's
+
+
+Exact duplicate reads:  None.
+
+Probable unremoved sequencing vector (matches excluded from assembly, quality reduced to 0):  None.
+
+Near duplicate reads:  None.
+
+Internal read matches (same orientation) :  None.
+
+No. of node-rejected pairs: None.
+
+Multi-segment reads (initially rejected segments in parentheses) -- XXX means segments flank X'd region: 
+
+0 reads with multiple segments.
+
+Probable deletion reads (excluded from assembly): None.
+
+Revised quality (quality, n_residues, %,  cum, cum %,  cum expected errs):
+ 90     124   4.1     124   4.1    0.00
+ 89      14   0.5     138   4.6    0.00
+ 88      22   0.7     160   5.4    0.00
+ 87      14   0.5     174   5.8    0.00
+ 86      38   1.3     212   7.1    0.00
+ 85      28   0.9     240   8.0    0.00
+ 84       8   0.3     248   8.3    0.00
+ 83      16   0.5     264   8.8    0.00
+ 82       6   0.2     270   9.0    0.00
+ 81      16   0.5     286   9.6    0.00
+ 80      44   1.5     330  11.0    0.00
+ 79       8   0.3     338  11.3    0.00
+ 78      34   1.1     372  12.4    0.00
+ 77      32   1.1     404  13.5    0.00
+ 76      20   0.7     424  14.2    0.00
+ 75      58   1.9     482  16.1    0.00
+ 74      24   0.8     506  16.9    0.00
+ 73      22   0.7     528  17.7    0.00
+ 72      42   1.4     570  19.1    0.00
+ 71      33   1.1     603  20.2    0.00
+ 70      18   0.6     621  20.8    0.00
+ 69      28   0.9     649  21.7    0.00
+ 68       6   0.2     655  21.9    0.00
+ 67      35   1.2     690  23.1    0.00
+ 66      15   0.5     705  23.6    0.00
+ 65       7   0.2     712  23.8    0.00
+ 64      45   1.5     757  25.3    0.00
+ 63      12   0.4     769  25.7    0.00
+ 62      24   0.8     793  26.5    0.00
+ 61      29   1.0     822  27.5    0.00
+ 60      46   1.5     868  29.0    0.00
+ 59      24   0.8     892  29.8    0.00
+ 58      43   1.4     935  31.3    0.00
+ 57      15   0.5     950  31.8    0.00
+ 56     217   7.3    1167  39.0    0.00
+ 55      22   0.7    1189  39.8    0.00
+ 54      14   0.5    1203  40.2    0.00
+ 53      19   0.6    1222  40.9    0.00
+ 52      10   0.3    1232  41.2    0.00
+ 51     273   9.1    1505  50.4    0.00
+ 50      14   0.5    1519  50.8    0.00
+ 49       8   0.3    1527  51.1    0.00
+ 48      16   0.5    1543  51.6    0.00
+ 47      10   0.3    1553  52.0    0.00
+ 46      32   1.1    1585  53.0    0.00
+ 45     157   5.3    1742  58.3    0.01
+ 44      23   0.8    1765  59.0    0.01
+ 43     252   8.4    2017  67.5    0.02
+ 42      20   0.7    2037  68.1    0.02
+ 41      14   0.5    2051  68.6    0.03
+ 40     179   6.0    2230  74.6    0.04
+ 39      46   1.5    2276  76.1    0.05
+ 38      14   0.5    2290  76.6    0.05
+ 37      24   0.8    2314  77.4    0.06
+ 36      14   0.5    2328  77.9    0.06
+ 35      39   1.3    2367  79.2    0.07
+ 34      16   0.5    2383  79.7    0.08
+ 33       4   0.1    2387  79.9    0.08
+ 32      34   1.1    2421  81.0    0.10
+ 31       3   0.1    2424  81.1    0.10
+ 30       5   0.2    2429  81.3    0.11
+ 29      44   1.5    2473  82.7    0.16
+ 28       7   0.2    2480  83.0    0.18
+ 27      22   0.7    2502  83.7    0.22
+ 26      11   0.4    2513  84.1    0.25
+ 25      38   1.3    2551  85.3    0.37
+ 24      25   0.8    2576  86.2    0.47
+ 23       9   0.3    2585  86.5    0.51
+ 22      12   0.4    2597  86.9    0.59
+ 21       9   0.3    2606  87.2    0.66
+ 20       6   0.2    2612  87.4    0.72
+ 19       6   0.2    2618  87.6    0.79
+ 18       9   0.3    2627  87.9    0.94
+ 17       3   0.1    2630  88.0    1.00
+ 16       4   0.1    2634  88.1    1.10
+ 15       9   0.3    2643  88.4    1.38
+ 14       3   0.1    2646  88.5    1.50
+ 13       3   0.1    2649  88.6    1.65
+ 12       1   0.0    2650  88.7    1.72
+ 11       3   0.1    2653  88.8    1.95
+ 10       5   0.2    2658  88.9    2.45
+  9       2   0.1    2660  89.0    2.71
+  8       4   0.1    2664  89.1    3.34
+  6       2   0.1    2666  89.2    3.84
+  2      71   2.4    2737  91.6   48.64
+ -1     252   8.4    2989 100.0  300.64   (quality -1 = terminal quality 0)
+
+Avg. full length: 747.2, trimmed (qual > -1): 684.2
+Avg. quality: 48.3 per base
+
+LLR score histogram:
+Score    #   cum # 
+ 15.0     1     1
+
+LLR score histogram:
+Score    #   cum # 
+ 15.0     1     1
+
+2d revised quality (quality, n_residues, %,  cum, cum %,  cum expected errs):
+ 90     124   4.1     124   4.1    0.00
+ 89      14   0.5     138   4.6    0.00
+ 88      22   0.7     160   5.4    0.00
+ 87      14   0.5     174   5.8    0.00
+ 86      38   1.3     212   7.1    0.00
+ 85      28   0.9     240   8.0    0.00
+ 84       8   0.3     248   8.3    0.00
+ 83      16   0.5     264   8.8    0.00
+ 82       6   0.2     270   9.0    0.00
+ 81      16   0.5     286   9.6    0.00
+ 80      44   1.5     330  11.0    0.00
+ 79       8   0.3     338  11.3    0.00
+ 78      34   1.1     372  12.4    0.00
+ 77      32   1.1     404  13.5    0.00
+ 76      20   0.7     424  14.2    0.00
+ 75      58   1.9     482  16.1    0.00
+ 74      24   0.8     506  16.9    0.00
+ 73      22   0.7     528  17.7    0.00
+ 72      42   1.4     570  19.1    0.00
+ 71      33   1.1     603  20.2    0.00
+ 70      18   0.6     621  20.8    0.00
+ 69      28   0.9     649  21.7    0.00
+ 68       6   0.2     655  21.9    0.00
+ 67      35   1.2     690  23.1    0.00
+ 66      15   0.5     705  23.6    0.00
+ 65       7   0.2     712  23.8    0.00
+ 64      45   1.5     757  25.3    0.00
+ 63      12   0.4     769  25.7    0.00
+ 62      24   0.8     793  26.5    0.00
+ 61      29   1.0     822  27.5    0.00
+ 60      46   1.5     868  29.0    0.00
+ 59      24   0.8     892  29.8    0.00
+ 58      43   1.4     935  31.3    0.00
+ 57      15   0.5     950  31.8    0.00
+ 56     217   7.3    1167  39.0    0.00
+ 55      22   0.7    1189  39.8    0.00
+ 54      14   0.5    1203  40.2    0.00
+ 53      19   0.6    1222  40.9    0.00
+ 52      10   0.3    1232  41.2    0.00
+ 51     273   9.1    1505  50.4    0.00
+ 50      14   0.5    1519  50.8    0.00
+ 49       8   0.3    1527  51.1    0.00
+ 48      16   0.5    1543  51.6    0.00
+ 47      10   0.3    1553  52.0    0.00
+ 46      32   1.1    1585  53.0    0.00
+ 45     157   5.3    1742  58.3    0.01
+ 44      23   0.8    1765  59.0    0.01
+ 43     252   8.4    2017  67.5    0.02
+ 42      20   0.7    2037  68.1    0.02
+ 41      14   0.5    2051  68.6    0.03
+ 40     179   6.0    2230  74.6    0.04
+ 39      46   1.5    2276  76.1    0.05
+ 38      14   0.5    2290  76.6    0.05
+ 37      24   0.8    2314  77.4    0.06
+ 36      14   0.5    2328  77.9    0.06
+ 35      39   1.3    2367  79.2    0.07
+ 34      16   0.5    2383  79.7    0.08
+ 33       4   0.1    2387  79.9    0.08
+ 32      34   1.1    2421  81.0    0.10
+ 31       3   0.1    2424  81.1    0.10
+ 30       5   0.2    2429  81.3    0.11
+ 29      44   1.5    2473  82.7    0.16
+ 28       7   0.2    2480  83.0    0.18
+ 27      22   0.7    2502  83.7    0.22
+ 26      11   0.4    2513  84.1    0.25
+ 25      38   1.3    2551  85.3    0.37
+ 24      25   0.8    2576  86.2    0.47
+ 23       9   0.3    2585  86.5    0.51
+ 22      12   0.4    2597  86.9    0.59
+ 21       9   0.3    2606  87.2    0.66
+ 20       6   0.2    2612  87.4    0.72
+ 19       6   0.2    2618  87.6    0.79
+ 18       9   0.3    2627  87.9    0.94
+ 17       3   0.1    2630  88.0    1.00
+ 16       4   0.1    2634  88.1    1.10
+ 15       9   0.3    2643  88.4    1.38
+ 14       3   0.1    2646  88.5    1.50
+ 13       3   0.1    2649  88.6    1.65
+ 12       1   0.0    2650  88.7    1.72
+ 11       3   0.1    2653  88.8    1.95
+ 10       5   0.2    2658  88.9    2.45
+  9       2   0.1    2660  89.0    2.71
+  8       4   0.1    2664  89.1    3.34
+  6       2   0.1    2666  89.2    3.84
+  4       1   0.0    2667  89.2    4.24
+  2      70   2.3    2737  91.6   48.41
+ -1     252   8.4    2989 100.0  300.41   (quality -1 = terminal quality 0)
+
+Avg. full length: 747.2, trimmed (qual > -1): 684.2
+Avg. quality: 48.3 per base
+
+No. confirmed reads: 2
+Avg. length: 748.5, confirmed: 671.0, str. confirmed: 670.0, trimmed: 703.0
+Preliminary clone size estimate: 671 bp, depth of coverage: 2.0
+
+Depth histogram (max_depth, #reads, cum #reads):
+
+ 1     2       2
+ 0     2       4
+
+Forward confirmed bases: 0
+
+Substitutions by nucleotide:
+       A      C      G      T      N      X      Z    Total
+A      0      0      0      0      0      0      0        0
+C      0      0      0      0      0      0      0        0
+G      0      0      0      0      0      0      0        0
+T      0      0      0      0      0      0      0        0
+N      0      0      0      0      0      0      0        0
+X      0      0      0      0      0      0      0        0
+Z      0      0      0      0      0      0      0        0
+
+Substitutions by quality: 
+         0      2      3      4      5    Total
+0        0      0      0      0      0        0
+2        0      0      0      0      0        0
+3        0      0      0      0      0        0
+4        0      0      0      0      0        0
+5        0      0      0      0      0        0
+
+Histogram of spacings between adjacent indel pairs:
+
+
+Reverse confirmed bases: 1408
+ Subs: 46 (3.27%), dels: 0 (0.00%), inserts: 0 (0.00%)
+
+Substitutions by nucleotide:
+       A      C      G      T      N      X      Z    Total
+A      0      1      8      0      0      0      0        9
+C      1      0     18      8      0      0      0       27
+G      2      2      0      1      0      0      0        5
+T      2      2      1      0      0      0      0        5
+N      0      0      0      0      0      0      0        0
+X      0      0      0      0      0      0      0        0
+Z      0      0      0      0      0      0      0        0
+
+Substitutions by quality: 
+         0      2      3      4      5    Total
+0        0      1      7      7      8       23
+2        1      0      0      0      0        1
+3        7      0      0      0      0        7
+4        7      0      0      0      0        7
+5        8      0      0      0      0        8
+
+Histogram of spacings between adjacent indel pairs:
+
+
+Blocked reads: 
+ML4924F 11 600  left 
+ML4924R 56 753  left 
+
+2 blocked reads: 2 left only, 0 right only, 0 both.
+0 reads (not shown) lack a high-quality segment.
+
+0 perfect duplicates
+
+2 isolated singlets (having no non-vector match to any other read): 
+  Read         Length      (# trimmed non-X bases)
+ ML4922R        753   (668)
+ ML4947F        739   (658)
+
+Contig 1.  2 reads; 708 bp (untrimmed), 704 (trimmed).  Isolated contig.
+     -5   728 ML4924F       628 (  0)  3.27 0.00 0.00    9 (  9)   21 ( 54) 
+C     1   763 ML4924R       703 (  0)  0.00 0.00 0.00    0 (  3)   55 ( 89) 
+
+Overall discrep rates (%):             1.63 0.00 0.00
+
+Contig quality (quality, n_residues, %,  cum, cum %,  cum expected errs):
+ 90      62   8.8      62   8.8    0.00
+ 89       7   1.0      69   9.7    0.00
+ 88      11   1.6      80  11.3    0.00
+ 87       7   1.0      87  12.3    0.00
+ 86      19   2.7     106  15.0    0.00
+ 85      14   2.0     120  16.9    0.00
+ 84       4   0.6     124  17.5    0.00
+ 83       8   1.1     132  18.6    0.00
+ 82       3   0.4     135  19.1    0.00
+ 81       8   1.1     143  20.2    0.00
+ 80      22   3.1     165  23.3    0.00
+ 79       4   0.6     169  23.9    0.00
+ 78      17   2.4     186  26.3    0.00
+ 77      16   2.3     202  28.5    0.00
+ 76      10   1.4     212  29.9    0.00
+ 75      29   4.1     241  34.0    0.00
+ 74      12   1.7     253  35.7    0.00
+ 73      12   1.7     265  37.4    0.00
+ 72      22   3.1     287  40.5    0.00
+ 71      22   3.1     309  43.6    0.00
+ 70       9   1.3     318  44.9    0.00
+ 69      14   2.0     332  46.9    0.00
+ 68       3   0.4     335  47.3    0.00
+ 67      18   2.5     353  49.9    0.00
+ 66       9   1.3     362  51.1    0.00
+ 65       4   0.6     366  51.7    0.00
+ 64      23   3.2     389  54.9    0.00
+ 63       6   0.8     395  55.8    0.00
+ 62      12   1.7     407  57.5    0.00
+ 61      15   2.1     422  59.6    0.00
+ 60      29   4.1     451  63.7    0.00
+ 59      11   1.6     462  65.3    0.00
+ 58      34   4.8     496  70.1    0.00
+ 57       7   1.0     503  71.0    0.00
+ 56      14   2.0     517  73.0    0.00
+ 55       6   0.8     523  73.9    0.00
+ 54       8   1.1     531  75.0    0.00
+ 53       9   1.3     540  76.3    0.00
+ 52       6   0.8     546  77.1    0.00
+ 51      48   6.8     594  83.9    0.00
+ 50       8   1.1     602  85.0    0.00
+ 49       6   0.8     608  85.9    0.00
+ 48       3   0.4     611  86.3    0.00
+ 47       5   0.7     616  87.0    0.00
+ 46       8   1.1     624  88.1    0.00
+ 45       7   1.0     631  89.1    0.00
+ 44       1   0.1     632  89.3    0.00
+ 43      16   2.3     648  91.5    0.00
+ 42       1   0.1     649  91.7    0.00
+ 41       2   0.3     651  91.9    0.00
+ 40      17   2.4     668  94.4    0.00
+ 39      14   2.0     682  96.3    0.01
+ 38       3   0.4     685  96.8    0.01
+ 36       1   0.1     686  96.9    0.01
+ 35      10   1.4     696  98.3    0.01
+ 34       2   0.3     698  98.6    0.01
+ 32       1   0.1     699  98.7    0.01
+ 26       1   0.1     700  98.9    0.01
+ 25       2   0.3     702  99.2    0.02
+ 17       1   0.1     703  99.3    0.04
+ 12       1   0.1     704  99.4    0.10
+ -1       4   0.6     708 100.0    4.10   (quality -1 = terminal quality 0)
+
+Avg. full length: 708.0, trimmed (qual > -1): 704.0
+Avg. quality: 65.6 per base
+
+Initial, terminal qual 0 segments:  1-3, 708-708
+
+Regions of LLR- adjusted quality < 2.0:
+1-5, 708, 
+
+2 regions, avg size 3.0, avg spacing 354.0
+
+First_start: 4, last_end: 674
+
+Slack, # used pairs (max_score), unused
+ 0     1  (15.1)     0 ( 0.0)        1
+
+LLR histograms (used, unused pairs): 
+ 10.0     1       0
+
+   DS Gap         Size      Closest read (Start)   Covers   Read length required
+                                                    now?        to cover
+Top strand: 
+ left -     3        3+
+  708 - right        1+      ML4924F      (  -5)    No            713+
+
+Bottom strand: 
+ left -     0        0+      ML4924R      ( 763)    No            763+
+  709 - right        0+
+
+Read/contig alignment summary, by read base; trace qualities
+Qual algn  cum    rcum    (%)    unalgn X    N  sub del ins  total (%)   cum  rcum (%)
+56     86     86   1409 (100.00)     0  0    0   0   0   0     0 (0.00)    0   23 (1.63)
+51    122    208   1323 ( 93.90)     0  0    0   0   0   0     0 (0.00)    0   23 (1.74)
+50      5    213   1201 ( 85.24)     0  0    0   0   0   0     0 (0.00)    0   23 (1.92)
+48      7    220   1196 ( 84.88)     0  0    0   0   0   0     0 (0.00)    0   23 (1.92)
+47      2    222   1189 ( 84.39)     0  0    0   0   0   0     0 (0.00)    0   23 (1.93)
+46     15    237   1187 ( 84.24)     0  0    0   0   0   0     0 (0.00)    0   23 (1.94)
+45     53    290   1172 ( 83.18)     0  0    0   0   0   0     0 (0.00)    0   23 (1.96)
+44     14    304   1119 ( 79.42)     0  0    0   0   0   0     0 (0.00)    0   23 (2.06)
+43     98    402   1105 ( 78.42)     0  0    0   0   0   0     0 (0.00)    0   23 (2.08)
+42     44    446   1007 ( 71.47)     0  0    0   0   0   0     0 (0.00)    0   23 (2.28)
+41     21    467    963 ( 68.35)     0  0    0   0   0   0     0 (0.00)    0   23 (2.39)
+40     99    566    942 ( 66.86)     0  0    0   0   0   0     0 (0.00)    0   23 (2.44)
+39     29    595    843 ( 59.83)     0  0    0   0   0   0     0 (0.00)    0   23 (2.73)
+38     26    621    814 ( 57.77)     0  0    0   0   0   0     0 (0.00)    0   23 (2.83)
+37     49    670    788 ( 55.93)     0  0    0   0   0   0     0 (0.00)    0   23 (2.92)
+36      8    678    739 ( 52.45)     0  0    0   0   0   0     0 (0.00)    0   23 (3.11)
+35    143    821    731 ( 51.88)     0  0    0   0   0   0     0 (0.00)    0   23 (3.15)
+34     28    849    588 ( 41.73)     0  0    0   0   0   0     0 (0.00)    0   23 (3.91)
+33     24    873    560 ( 39.74)     0  0    0   0   0   0     0 (0.00)    0   23 (4.11)
+32     64    937    536 ( 38.04)     0  0    0   0   0   0     0 (0.00)    0   23 (4.29)
+31     10    947    472 ( 33.50)     0  0    0   0   0   0     0 (0.00)    0   23 (4.87)
+30      6    953    462 ( 32.79)     0  0    0   0   0   0     0 (0.00)    0   23 (4.98)
+29     72   1025    456 ( 32.36)     0  0    0   0   0   0     0 (0.00)    0   23 (5.04)
+28     10   1035    384 ( 27.25)     0  0    0   0   0   0     0 (0.00)    0   23 (5.99)
+27     19   1054    374 ( 26.54)     0  0    0   0   0   0     0 (0.00)    0   23 (6.15)
+26     12   1066    355 ( 25.20)     0  0    0   0   0   0     0 (0.00)    0   23 (6.48)
+25     36   1102    343 ( 24.34)     0  0    0   0   0   0     0 (0.00)    0   23 (6.71)
+24     27   1129    307 ( 21.79)     0  0    0   1   0   0     1 (3.70)    1   23 (7.49)
+23     13   1142    280 ( 19.87)     0  0    0   0   0   0     0 (0.00)    1   22 (7.86)
+22     15   1157    267 ( 18.95)     0  0    0   0   0   0     0 (0.00)    1   22 (8.24)
+21     23   1180    252 ( 17.89)     0  0    0   1   0   0     1 (4.35)    2   22 (8.73)
+20     11   1191    229 ( 16.25)     0  0    0   0   0   0     0 (0.00)    2   21 (9.17)
+19     12   1203    218 ( 15.47)     0  0    0   0   0   0     0 (0.00)    2   21 (9.63)
+18     14   1217    206 ( 14.62)     0  0    0   0   0   0     0 (0.00)    2   21 (10.19)
+17     14   1231    192 ( 13.63)     0  0    0   2   0   0     2 (14.29)    4   21 (10.94)
+16     14   1245    178 ( 12.63)     0  0    0   0   0   0     0 (0.00)    4   19 (10.67)
+15     20   1265    164 ( 11.64)     0  0    0   0   0   0     0 (0.00)    4   19 (11.59)
+14     15   1280    144 ( 10.22)     0  0    0   2   0   0     2 (13.33)    6   19 (13.19)
+13      9   1289    129 (  9.16)     0  0    0   0   0   0     0 (0.00)    6   17 (13.18)
+12     18   1307    120 (  8.52)     0  0    0   1   0   0     1 (5.56)    7   17 (14.17)
+11     17   1324    102 (  7.24)     0  0    0   1   0   0     1 (5.88)    8   16 (15.69)
+10     32   1356     85 (  6.03)     0  0    0   5   0   0     5 (15.62)   13   15 (17.65)
+ 9     28   1384     53 (  3.76)     0  0    0   2   0   0     2 (7.14)   15   10 (18.87)
+ 8     15   1399     25 (  1.77)     0  0    0   4   0   0     4 (26.67)   19    8 (32.00)
+ 7      2   1401     10 (  0.71)     0  0    0   0   0   0     0 (0.00)   19    4 (40.00)
+ 6      8   1409      8 (  0.57)     0  0    0   4   0   0     4 (50.00)   23    4 (50.00)
+-1      3   1412      0 (  0.00)     4  0    0   0   0   0     0 (0.00)   23    0 (0.00)
+
+
+Read/contig alignment summary, by read base; adjusted qualities
+Qual algn  cum    rcum    (%)    unalgn X    N  sub del ins  total (%)   cum  rcum (%)
+90    124    124   1375 (100.00)     0  0    0   0   0   0     0 (0.00)    0   14 (1.02)
+89     14    138   1251 ( 90.98)     0  0    0   0   0   0     0 (0.00)    0   14 (1.12)
+88     22    160   1237 ( 89.96)     0  0    0   0   0   0     0 (0.00)    0   14 (1.13)
+87     14    174   1215 ( 88.36)     0  0    0   0   0   0     0 (0.00)    0   14 (1.15)
+86     38    212   1201 ( 87.35)     0  0    0   0   0   0     0 (0.00)    0   14 (1.17)
+85     28    240   1163 ( 84.58)     0  0    0   0   0   0     0 (0.00)    0   14 (1.20)
+84      8    248   1135 ( 82.55)     0  0    0   0   0   0     0 (0.00)    0   14 (1.23)
+83     16    264   1127 ( 81.96)     0  0    0   0   0   0     0 (0.00)    0   14 (1.24)
+82      6    270   1111 ( 80.80)     0  0    0   0   0   0     0 (0.00)    0   14 (1.26)
+81     16    286   1105 ( 80.36)     0  0    0   0   0   0     0 (0.00)    0   14 (1.27)
+80     44    330   1089 ( 79.20)     0  0    0   0   0   0     0 (0.00)    0   14 (1.29)
+79      8    338   1045 ( 76.00)     0  0    0   0   0   0     0 (0.00)    0   14 (1.34)
+78     34    372   1037 ( 75.42)     0  0    0   0   0   0     0 (0.00)    0   14 (1.35)
+77     32    404   1003 ( 72.95)     0  0    0   0   0   0     0 (0.00)    0   14 (1.40)
+76     20    424    971 ( 70.62)     0  0    0   0   0   0     0 (0.00)    0   14 (1.44)
+75     58    482    951 ( 69.16)     0  0    0   0   0   0     0 (0.00)    0   14 (1.47)
+74     24    506    893 ( 64.95)     0  0    0   0   0   0     0 (0.00)    0   14 (1.57)
+73     22    528    869 ( 63.20)     0  0    0   0   0   0     0 (0.00)    0   14 (1.61)
+72     42    570    847 ( 61.60)     0  0    0   0   0   0     0 (0.00)    0   14 (1.65)
+71     33    603    805 ( 58.55)     0  0    0   0   0   0     0 (0.00)    0   14 (1.74)
+70     18    621    772 ( 56.15)     0  0    0   0   0   0     0 (0.00)    0   14 (1.81)
+69     28    649    754 ( 54.84)     0  0    0   0   0   0     0 (0.00)    0   14 (1.86)
+68      6    655    726 ( 52.80)     0  0    0   0   0   0     0 (0.00)    0   14 (1.93)
+67     35    690    720 ( 52.36)     0  0    0   0   0   0     0 (0.00)    0   14 (1.94)
+66     15    705    685 ( 49.82)     0  0    0   0   0   0     0 (0.00)    0   14 (2.04)
+65      7    712    670 ( 48.73)     0  0    0   0   0   0     0 (0.00)    0   14 (2.09)
+64     45    757    663 ( 48.22)     0  0    0   0   0   0     0 (0.00)    0   14 (2.11)
+63     12    769    618 ( 44.95)     0  0    0   0   0   0     0 (0.00)    0   14 (2.27)
+62     24    793    606 ( 44.07)     0  0    0   0   0   0     0 (0.00)    0   14 (2.31)
+61     29    822    582 ( 42.33)     0  0    0   0   0   0     0 (0.00)    0   14 (2.41)
+60     46    868    553 ( 40.22)     0  0    0   0   0   0     0 (0.00)    0   14 (2.53)
+59     24    892    507 ( 36.87)     0  0    0   0   0   0     0 (0.00)    0   14 (2.76)
+58     43    935    483 ( 35.13)     0  0    0   0   0   0     0 (0.00)    0   14 (2.90)
+57     15    950    440 ( 32.00)     0  0    0   0   0   0     0 (0.00)    0   14 (3.18)
+56     24    974    425 ( 30.91)     0  0    0   0   0   0     0 (0.00)    0   14 (3.29)
+55     22    996    401 ( 29.16)     0  0    0   0   0   0     0 (0.00)    0   14 (3.49)
+54     14   1010    379 ( 27.56)     0  0    0   0   0   0     0 (0.00)    0   14 (3.69)
+53     19   1029    365 ( 26.55)     0  0    0   0   0   0     0 (0.00)    0   14 (3.84)
+52     10   1039    346 ( 25.16)     0  0    0   0   0   0     0 (0.00)    0   14 (4.05)
+51     52   1091    336 ( 24.44)     0  0    0   0   0   0     0 (0.00)    0   14 (4.17)
+50     12   1103    284 ( 20.65)     0  0    0   0   0   0     0 (0.00)    0   14 (4.93)
+49      8   1111    272 ( 19.78)     0  0    0   0   0   0     0 (0.00)    0   14 (5.15)
+48     10   1121    264 ( 19.20)     0  0    0   0   0   0     0 (0.00)    0   14 (5.30)
+47     10   1131    254 ( 18.47)     0  0    0   0   0   0     0 (0.00)    0   14 (5.51)
+46      9   1140    244 ( 17.75)     0  0    0   0   0   0     0 (0.00)    0   14 (5.74)
+45      8   1148    235 ( 17.09)     0  0    0   0   0   0     0 (0.00)    0   14 (5.96)
+44      9   1157    227 ( 16.51)     0  0    0   0   0   0     0 (0.00)    0   14 (6.17)
+43     17   1174    218 ( 15.85)     0  0    0   0   0   0     0 (0.00)    0   14 (6.42)
+42      4   1178    201 ( 14.62)     0  0    0   0   0   0     0 (0.00)    0   14 (6.97)
+41      6   1184    197 ( 14.33)     0  0    0   0   0   0     0 (0.00)    0   14 (7.11)
+40     31   1215    191 ( 13.89)     0  0    0   0   0   0     0 (0.00)    0   14 (7.33)
+39     25   1240    160 ( 11.64)     0  0    0   0   0   0     0 (0.00)    0   14 (8.75)
+38      5   1245    135 (  9.82)     0  0    0   0   0   0     0 (0.00)    0   14 (10.37)
+36      3   1248    130 (  9.45)     0  0    0   0   0   0     0 (0.00)    0   14 (10.77)
+35     16   1264    127 (  9.24)     0  0    0   0   0   0     0 (0.00)    0   14 (11.02)
+34      4   1268    111 (  8.07)     0  0    0   0   0   0     0 (0.00)    0   14 (12.61)
+33      2   1270    107 (  7.78)     0  0    0   0   0   0     0 (0.00)    0   14 (13.08)
+32      1   1271    105 (  7.64)     0  0    0   0   0   0     0 (0.00)    0   14 (13.33)
+31      2   1273    104 (  7.56)     0  0    0   0   0   0     0 (0.00)    0   14 (13.46)
+29      4   1277    102 (  7.42)     0  0    0   0   0   0     0 (0.00)    0   14 (13.73)
+28      2   1279     98 (  7.13)     0  0    0   0   0   0     0 (0.00)    0   14 (14.29)
+27      2   1281     96 (  6.98)     0  0    0   0   0   0     0 (0.00)    0   14 (14.58)
+26      4   1285     94 (  6.84)     0  0    0   0   0   0     0 (0.00)    0   14 (14.89)
+25      7   1292     90 (  6.55)     0  0    0   0   0   0     0 (0.00)    0   14 (15.56)
+24      9   1301     83 (  6.04)     0  0    0   0   0   0     0 (0.00)    0   14 (16.87)
+23      1   1302     74 (  5.38)     0  0    0   0   0   0     0 (0.00)    0   14 (18.92)
+22      2   1304     73 (  5.31)     0  0    0   0   0   0     0 (0.00)    0   14 (19.18)
+21      3   1307     71 (  5.16)     0  0    0   1   0   0     1 (33.33)    1   14 (19.72)
+20      1   1308     68 (  4.95)     0  0    0   0   0   0     0 (0.00)    1   13 (19.12)
+17      4   1312     67 (  4.87)     0  0    0   1   0   0     1 (25.00)    2   13 (19.40)
+16      1   1313     63 (  4.58)     0  0    0   0   0   0     0 (0.00)    2   12 (19.05)
+15      6   1319     62 (  4.51)     0  0    0   0   0   0     0 (0.00)    2   12 (19.35)
+14      1   1320     56 (  4.07)     0  0    0   0   0   0     0 (0.00)    2   12 (21.43)
+13      3   1323     55 (  4.00)     0  0    0   0   0   0     0 (0.00)    2   12 (21.82)
+12      5   1328     52 (  3.78)     0  0    0   0   0   0     0 (0.00)    2   12 (23.08)
+11     10   1338     47 (  3.42)     0  0    0   1   0   0     1 (10.00)    3   12 (25.53)
+10     11   1349     37 (  2.69)     0  0    0   4   0   0     4 (36.36)    7   11 (29.73)
+ 9     13   1362     26 (  1.89)     0  0    0   2   0   0     2 (15.38)    9    7 (26.92)
+ 8      9   1371     13 (  0.95)     0  0    0   3   0   0     3 (33.33)   12    5 (38.46)
+ 6      4   1375      4 (  0.29)     0  0    0   2   0   0     2 (50.00)   14    2 (50.00)
+-1     37   1412      0 (  0.00)     4  0    0   9   0   0     9 (24.32)   23    0 (0.00)
+
+
+Depth 0 regions:
+
+Block histogram:
+Qual   bases    cum    blocks
+  0       4        4        2
+ 12       1        5        2
+ 17       1        6        2
+ 25       2        8        3
+ 26       1        9        3
+ 32       1       10        2
+ 34       2       12        2
+ 35      10       22        5
+ 36       1       23        6
+ 38       3       26        7
+ 39      14       40        9
+ 40      17       57        9
+ 41       2       59       10
+ 42       1       60       11
+ 43      16       76       15
+ 44       1       77       14
+ 45       7       84       15
+ 46       8       92       16
+ 47       5       97       18
+ 48       3      100       19
+ 49       6      106       21
+ 50       8      114       24
+ 51      48      162       21
+ 52       6      168       21
+ 53       9      177       23
+ 54       8      185       23
+ 55       6      191       24
+ 56      14      205       22
+ 57       7      212       23
+ 58      34      246       29
+ 59      11      257       28
+ 60      29      286       30
+ 61      15      301       32
+ 62      12      313       29
+ 63       6      319       30
+ 64      23      342       24
+ 65       4      346       24
+ 66       9      355       23
+ 67      18      373       25
+ 68       3      376       26
+ 69      14      390       30
+ 70       9      399       29
+ 71      22      421       30
+ 72      22      443       32
+ 73      12      455       30
+ 74      12      467       33
+ 75      29      496       32
+ 76      10      506       30
+ 77      16      522       33
+ 78      17      539       30
+ 79       4      543       30
+ 80      22      565       29
+ 81       8      573       27
+ 82       3      576       27
+ 83       8      584       25
+ 84       4      588       24
+ 85      14      602       25
+ 86      19      621       26
+ 87       7      628       27
+ 88      11      639       20
+ 89       7      646       17
+ 90      62      708        1
+
+SS region: 4 (0.56%), flagged: 0 (0.00%)
+
+Sites with total LLR scores < -3.0  [max pos LLR read, max neg LLR read]  (#discrep top reads, #discrep bottom reads): None.
+
+Read/contig discrepancies (* = higher-quality): None.
+0 lower quality discrepant sites.
+
+Reads with neg LLR score, or confirmed or high-qual unaligned seg > 20 bases, or other problem: None.
+
+Gaps in unique-read coverage:  None.
+
+Subclone/read contig links and consistency checks (* = inconsistency; Contig 0 = singlets)
+Max subclone size: 5000
+
+Size histogram for consistent forward-reverse pairs (*** = inconsistent pairs)
+  ***     0
+
+ Consistent opp sense links (* = not used in chain, ** = multiple non-zero):
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.phrap.out
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.screen.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.screen.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.screen.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,146 @@
+/usr/local/genome/bin/cross_match test_project.fasta /etc/phredphrap/vectors -minmatch 12 -penalty -2 -minscore 20 -screen 
+cross_match version 0.990329
+
+Run date:time  010621:164002
+Query file(s):  test_project.fasta
+Subject file(s):   /etc/phredphrap/vectors
+Presumed sequence type: DNA
+
+Pairwise comparison algorithm: banded Smith-Waterman
+
+Score matrix (set by value of penalty: -2)
+    A   C   G   T   N   X
+A   1  -2  -2  -2   0  -3
+C  -2   1  -2  -2   0  -3
+G  -2  -2   1  -2   0  -3
+T  -2  -2  -2   1   0  -3
+N   0   0   0   0   0   0
+X  -3  -3  -3  -3   0  -3
+
+Gap penalties: gap_init: -4, gap_ext: -3, ins_gap_ext: -3, del_gap_ext: -3, 
+Using complexity-adjusted scores. Assumed background frequencies:
+ A: 0.250  C: 0.250  G: 0.250  T: 0.250  N: 0.000  X: 0.000  
+
+minmatch: 12, maxmatch: 12, max_group_size: 20, minscore: 20, bandwidth: 14, indexwordsize: 10
+vector_bound: 0
+word_raw: 0
+masklevel: 80
+
+Sequence file: test_project.fasta    4 entries
+Residue counts:
+  a      811
+  c      727
+  g      702
+  n        2
+  t      747
+Total   2989
+
+Quality file: test_project.fasta.qual
+
+Input quality (quality, n_residues, %,  cum, cum %,  cum expected errs):
+ 56     281   9.4     281   9.4    0.00
+ 51     351  11.7     632  21.1    0.00
+ 50       7   0.2     639  21.4    0.00
+ 48      13   0.4     652  21.8    0.00
+ 47       2   0.1     654  21.9    0.00
+ 46      43   1.4     697  23.3    0.00
+ 45     206   6.9     903  30.2    0.01
+ 44      28   0.9     931  31.1    0.01
+ 43     334  11.2    1265  42.3    0.03
+ 42      60   2.0    1325  44.3    0.03
+ 41      29   1.0    1354  45.3    0.04
+ 40     284   9.5    1638  54.8    0.06
+ 39      64   2.1    1702  56.9    0.07
+ 38      36   1.2    1738  58.1    0.08
+ 37      76   2.5    1814  60.7    0.09
+ 36      19   0.6    1833  61.3    0.10
+ 35     193   6.5    2026  67.8    0.16
+ 34      59   2.0    2085  69.8    0.18
+ 33      30   1.0    2115  70.8    0.20
+ 32     111   3.7    2226  74.5    0.27
+ 31      19   0.6    2245  75.1    0.28
+ 30      10   0.3    2255  75.4    0.29
+ 29     121   4.0    2376  79.5    0.44
+ 28      17   0.6    2393  80.1    0.47
+ 27      38   1.3    2431  81.3    0.55
+ 26      18   0.6    2449  81.9    0.59
+ 25      72   2.4    2521  84.3    0.82
+ 24      46   1.5    2567  85.9    1.00
+ 23      21   0.7    2588  86.6    1.11
+ 22      25   0.8    2613  87.4    1.27
+ 21      33   1.1    2646  88.5    1.53
+ 20      18   0.6    2664  89.1    1.71
+ 19      25   0.8    2689  90.0    2.02
+ 18      24   0.8    2713  90.8    2.40
+ 17      15   0.5    2728  91.3    2.70
+ 16      23   0.8    2751  92.0    3.28
+ 15      23   0.8    2774  92.8    4.01
+ 14      18   0.6    2792  93.4    4.72
+ 13      14   0.5    2806  93.9    5.43
+ 12      20   0.7    2826  94.5    6.69
+ 11      21   0.7    2847  95.2    8.36
+ 10      40   1.3    2887  96.6   12.36
+  9      38   1.3    2925  97.9   17.14
+  8      26   0.9    2951  98.7   21.26
+  7       5   0.2    2956  98.9   22.26
+  6      26   0.9    2982  99.8   28.79
+  4       7   0.2    2989 100.0   31.58   (quality -1 = terminal quality 0)
+
+Avg. full length: 747.2, trimmed (qual > -1): 747.2
+Avg. quality: 37.7 per base
+Maximal single base matches (low complexity regions):
+
+  36  5.77 1.92 0.00  ML4922R        4    55 (698)  C vector3:PsportI   (77)   169   117  
+
+  29  8.51 2.13 0.00  ML4924R        9    55 (708)  C vector3:PsportI   (82)   164   117  
+
+2 matching entries (first file).
+
+Discrepancy summary:
+Qual algn  cum    rcum    (%)    unalgn X    N  sub del ins  total (%)   cum  rcum (%)
+56      0      0     99 (100.00)     9  0    0   0   0   0     0 (0.00)    0    9 (9.09)
+51      2      2     99 (100.00)    49  0    0   0   0   0     0 (0.00)    0    9 (9.09)
+48      0      2     97 ( 97.98)     0  0    0   0   0   0     0 (0.00)    0    9 (9.28)
+46      2      4     97 ( 97.98)    14  0    0   0   1   0     1 (50.00)    1    9 (9.28)
+45      0      4     95 ( 95.96)    16  0    0   0   0   0     0 (0.00)    1    8 (8.42)
+44      0      4     95 ( 95.96)     0  0    0   0   0   0     0 (0.00)    1    8 (8.42)
+43      0      4     95 ( 95.96)     8  0    0   0   0   0     0 (0.00)    1    8 (8.42)
+42      0      4     95 ( 95.96)     1  0    0   0   0   0     0 (0.00)    1    8 (8.42)
+40     15     19     95 ( 95.96)    56  0    0   0   1   0     1 (6.67)    2    8 (8.42)
+39     10     29     80 ( 80.81)    35  0    0   0   0   0     0 (0.00)    2    7 (8.75)
+38      0     29     70 ( 70.71)     4  0    0   0   0   0     0 (0.00)    2    7 (10.00)
+37      1     30     70 ( 70.71)     1  0    0   0   0   0     0 (0.00)    2    7 (10.00)
+36      0     30     69 ( 69.70)     0  0    0   0   0   0     0 (0.00)    2    7 (10.14)
+35      9     39     69 ( 69.70)    30  0    0   2   0   0     2 (22.22)    4    7 (10.14)
+34     14     53     60 ( 60.61)     3  0    0   2   0   0     2 (14.29)    6    5 (8.33)
+33      2     55     46 ( 46.46)     0  0    0   0   0   0     0 (0.00)    6    3 (6.52)
+32      6     61     44 ( 44.44)     1  0    0   0   0   0     0 (0.00)    6    3 (6.82)
+31      0     61     38 ( 38.38)     5  0    0   0   0   0     0 (0.00)    6    3 (7.89)
+30      0     61     38 ( 38.38)     0  0    0   0   0   0     0 (0.00)    6    3 (7.89)
+29      4     65     38 ( 38.38)     0  0    0   0   0   0     0 (0.00)    6    3 (7.89)
+28      3     68     34 ( 34.34)     0  0    0   0   0   0     0 (0.00)    6    3 (8.82)
+27      0     68     31 ( 31.31)     0  0    0   0   0   0     0 (0.00)    6    3 (9.68)
+26      0     68     31 ( 31.31)     0  0    0   0   0   0     0 (0.00)    6    3 (9.68)
+25      2     70     31 ( 31.31)     0  0    0   0   0   0     0 (0.00)    6    3 (9.68)
+24      2     72     29 ( 29.29)     0  0    0   0   0   0     0 (0.00)    6    3 (10.34)
+23      1     73     27 ( 27.27)     0  0    0   0   0   0     0 (0.00)    6    3 (11.11)
+22      0     73     26 ( 26.26)     0  0    0   0   0   0     0 (0.00)    6    3 (11.54)
+21      2     75     26 ( 26.26)     0  0    0   0   0   0     0 (0.00)    6    3 (11.54)
+20      2     77     24 ( 24.24)     0  0    0   0   0   0     0 (0.00)    6    3 (12.50)
+19      3     80     22 ( 22.22)     0  0    0   0   0   0     0 (0.00)    6    3 (13.64)
+18      0     80     19 ( 19.19)     1  0    0   0   0   0     0 (0.00)    6    3 (15.79)
+17      0     80     19 ( 19.19)     0  0    0   0   0   0     0 (0.00)    6    3 (15.79)
+16      1     81     19 ( 19.19)     1  0    0   0   0   0     0 (0.00)    6    3 (15.79)
+15      0     81     18 ( 18.18)     0  0    0   0   0   0     0 (0.00)    6    3 (16.67)
+14      1     82     18 ( 18.18)     0  0    0   0   0   0     0 (0.00)    6    3 (16.67)
+13      1     83     17 ( 17.17)     0  0    0   0   0   0     0 (0.00)    6    3 (17.65)
+12      0     83     16 ( 16.16)     1  0    0   0   0   0     0 (0.00)    6    3 (18.75)
+11      1     84     16 ( 16.16)     0  0    0   0   0   0     0 (0.00)    6    3 (18.75)
+10      1     85     15 ( 15.15)     0  0    0   0   0   0     0 (0.00)    6    3 (20.00)
+ 9      4     89     14 ( 14.14)     0  0    0   0   0   0     0 (0.00)    6    3 (21.43)
+ 8      4     93     10 ( 10.10)     2  0    0   1   0   0     1 (25.00)    7    3 (30.00)
+ 6      3     96      6 (  6.06)     4  0    0   0   0   0     0 (0.00)    7    2 (33.33)
+ 4      3     99      3 (  3.03)     2  0    1   1   0   0     2 (66.67)    9    2 (66.67)
+
+
+Screened sequences written to  test_project.fasta.screen


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project.screen.out
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_projectNewChromats.fof
===================================================================


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_projectNewChromats.fof
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project_to_alu.cross
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project_to_alu.cross	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project_to_alu.cross	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4 @@
+/usr/local/genome/bin/cross_match test_project.contigs /usr/local/genome/lib/screenLibs/repeats.fasta -tags -minmatch 10 
+cross_match version 0.990329
+
+Run date:time  010621:164003
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/edit_dir/test_project_to_alu.cross
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4922R.phd.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4922R.phd.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4922R.phd.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,774 @@
+BEGIN_SEQUENCE ML4922R
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: ML4922R
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.980904.e
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Thu Jun 21 11:28:31 2001
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 8877
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+c 4 5
+c 4 17
+t 6 20
+t 6 30
+g 9 45
+g 19 55
+t 24 64
+a 24 72
+c 19 81
+g 16 96
+c 10 103
+c 8 117
+c 8 126
+g 9 139
+c 21 147
+a 20 159
+g 32 166
+g 34 178
+t 34 191
+a 39 200
+c 39 209
+c 40 221
+g 39 234
+g 46 244
+t 40 256
+c 37 269
+c 32 279
+g 32 291
+g 28 300
+a 28 317
+a 25 325
+t 25 331
+t 34 342
+c 32 352
+c 40 363
+c 40 372
+g 40 385
+g 40 394
+g 40 406
+t 40 417
+c 34 430
+g 35 441
+a 35 452
+c 35 459
+c 35 471
+c 35 482
+a 35 491
+c 35 498
+g 35 511
+c 35 521
+g 51 534
+t 51 544
+c 39 556
+c 39 567
+g 39 579
+c 35 587
+g 31 600
+g 31 610
+a 31 625
+c 31 631
+g 31 644
+c 32 652
+g 35 666
+t 35 676
+g 45 688
+g 51 699
+g 56 710
+g 51 723
+t 39 734
+g 39 746
+a 40 756
+c 40 764
+c 39 776
+c 39 787
+c 45 798
+g 40 811
+g 40 821
+a 40 835
+a 35 844
+g 35 853
+a 39 866
+t 35 874
+t 35 886
+c 35 896
+g 37 910
+a 40 919
+g 40 929
+t 40 941
+g 40 953
+c 40 962
+t 40 976
+t 40 987
+c 45 997
+g 51 1010
+t 51 1020
+c 51 1031
+t 51 1043
+g 46 1055
+g 46 1066
+t 40 1077
+a 40 1086
+a 39 1096
+t 39 1109
+c 39 1120
+a 39 1128
+a 40 1139
+a 40 1151
+a 46 1162
+t 40 1173
+c 40 1184
+a 40 1193
+g 40 1206
+g 40 1218
+t 51 1230
+t 51 1242
+t 51 1254
+c 51 1264
+t 45 1277
+c 45 1287
+c 45 1298
+g 45 1312
+g 51 1323
+t 51 1334
+t 56 1347
+g 56 1359
+g 56 1370
+a 56 1381
+t 51 1392
+c 51 1403
+t 51 1416
+c 51 1426
+a 51 1436
+a 40 1447
+a 40 1458
+c 40 1468
+t 40 1483
+c 40 1493
+a 40 1503
+a 51 1514
+g 51 1526
+g 39 1538
+t 39 1551
+t 38 1563
+a 38 1571
+t 38 1584
+g 35 1598
+c 39 1607
+c 39 1621
+g 39 1636
+t 39 1646
+g 43 1659
+g 43 1671
+c 43 1680
+a 46 1692
+c 46 1702
+a 51 1714
+a 51 1726
+a 56 1738
+t 56 1751
+g 45 1763
+t 35 1774
+c 35 1786
+g 35 1801
+c 35 1809
+c 35 1823
+t 35 1837
+g 43 1849
+c 43 1858
+t 43 1873
+g 43 1886
+t 43 1897
+t 43 1910
+t 43 1922
+c 43 1932
+a 43 1941
+t 43 1954
+c 43 1966
+t 51 1980
+g 51 1993
+t 51 2004
+c 56 2016
+a 56 2027
+g 56 2039
+t 56 2052
+g 56 2065
+a 43 2077
+t 43 2088
+g 43 2101
+c 43 2110
+t 43 2126
+c 43 2136
+c 43 2148
+c 43 2160
+t 43 2175
+c 43 2185
+a 43 2197
+t 45 2208
+t 45 2222
+a 43 2232
+g 43 2245
+a 43 2259
+c 43 2267
+a 43 2280
+c 43 2290
+a 43 2304
+c 43 2313
+a 43 2327
+g 43 2340
+a 43 2354
+g 43 2365
+a 43 2379
+c 43 2388
+a 43 2401
+a 43 2412
+c 45 2424
+g 45 2439
+a 43 2450
+g 43 2463
+a 43 2478
+t 43 2488
+a 43 2499
+t 51 2512
+t 51 2525
+c 51 2537
+a 51 2548
+g 51 2562
+t 51 2574
+c 51 2586
+c 56 2599
+a 56 2610
+g 45 2623
+g 43 2636
+c 43 2647
+t 43 2662
+a 43 2672
+g 43 2685
+c 43 2696
+t 43 2712
+a 43 2721
+a 43 2734
+c 43 2745
+a 51 2757
+g 51 2770
+t 56 2784
+g 56 2797
+g 51 2809
+t 56 2822
+t 56 2835
+c 56 2846
+a 56 2857
+g 56 2870
+a 56 2883
+t 56 2895
+g 51 2908
+a 45 2919
+t 45 2932
+t 45 2946
+c 45 2956
+t 45 2970
+g 45 2984
+a 51 2995
+t 45 3006
+g 43 3019
+a 43 3032
+c 43 3042
+g 43 3057
+a 43 3068
+a 45 3081
+g 51 3092
+a 51 3106
+t 51 3117
+c 51 3128
+c 51 3140
+t 45 3154
+g 45 3167
+a 45 3180
+t 51 3190
+g 51 3203
+g 51 3216
+a 56 3229
+g 51 3241
+a 43 3254
+g 36 3265
+a 36 3279
+c 36 3287
+t 36 3302
+g 36 3315
+t 43 3326
+a 51 3338
+a 51 3350
+c 51 3360
+a 51 3372
+a 51 3384
+c 56 3395
+a 56 3408
+g 56 3421
+t 56 3434
+g 56 3447
+a 56 3459
+a 56 3471
+t 56 3483
+g 56 3496
+a 56 3508
+t 56 3520
+g 56 3533
+a 56 3544
+t 51 3556
+c 51 3567
+c 43 3580
+t 43 3594
+a 43 3605
+c 43 3614
+c 43 3628
+g 43 3643
+a 45 3655
+t 45 3666
+g 51 3678
+t 51 3690
+t 51 3703
+a 51 3713
+a 43 3725
+g 43 3738
+c 43 3749
+g 43 3765
+t 43 3776
+g 43 3788
+c 43 3798
+t 43 3814
+a 43 3824
+g 43 3836
+g 51 3849
+a 56 3862
+g 56 3873
+g 56 3886
+a 43 3899
+t 43 3910
+g 43 3923
+c 43 3932
+t 43 3948
+t 43 3960
+t 43 3973
+c 51 3983
+a 51 3994
+a 51 4005
+a 43 4018
+c 43 4028
+c 43 4041
+g 43 4056
+a 45 4068
+g 45 4079
+a 56 4092
+a 56 4104
+t 51 4115
+c 43 4126
+c 43 4139
+g 43 4153
+c 43 4162
+t 43 4177
+a 43 4188
+g 45 4199
+g 43 4212
+c 43 4223
+g 43 4238
+c 43 4247
+t 43 4262
+c 43 4273
+t 45 4286
+a 56 4297
+g 56 4309
+a 56 4322
+a 56 4334
+g 56 4345
+a 56 4358
+a 56 4369
+g 56 4381
+a 56 4393
+a 56 4404
+a 56 4416
+g 56 4428
+c 56 4439
+a 56 4451
+a 56 4462
+g 56 4475
+a 56 4488
+g 56 4499
+c 56 4511
+a 56 4522
+a 56 4534
+a 56 4547
+t 56 4559
+g 56 4571
+a 56 4583
+g 56 4596
+t 56 4609
+g 56 4621
+a 56 4632
+a 56 4644
+t 56 4657
+t 56 4669
+t 56 4681
+g 51 4694
+a 51 4705
+t 45 4716
+t 45 4730
+c 45 4741
+a 45 4751
+c 51 4762
+a 51 4774
+g 56 4787
+g 56 4800
+t 56 4812
+t 51 4825
+g 45 4838
+g 45 4850
+c 45 4860
+c 45 4874
+a 45 4884
+a 45 4896
+t 51 4909
+t 43 4922
+a 43 4932
+c 43 4943
+g 43 4958
+t 45 4969
+g 45 4982
+g 56 4994
+t 56 5007
+g 56 5020
+a 56 5031
+g 56 5043
+c 56 5054
+a 56 5066
+t 51 5079
+t 51 5092
+c 51 5103
+a 43 5113
+a 43 5125
+c 43 5136
+t 43 5151
+a 43 5160
+t 43 5173
+g 43 5186
+c 43 5197
+t 43 5212
+t 43 5224
+a 43 5234
+a 43 5245
+g 40 5258
+c 45 5269
+g 45 5284
+t 45 5295
+c 45 5307
+t 56 5320
+t 56 5332
+a 56 5343
+g 56 5355
+t 56 5368
+g 56 5380
+a 56 5391
+g 56 5403
+g 51 5416
+t 51 5429
+c 40 5440
+a 40 5450
+c 40 5461
+t 40 5476
+c 45 5488
+a 45 5499
+g 56 5511
+a 56 5524
+a 56 5536
+g 56 5547
+t 51 5560
+t 51 5572
+t 51 5584
+a 51 5594
+g 51 5607
+c 51 5618
+a 51 5630
+c 45 5641
+c 40 5654
+g 37 5668
+c 37 5678
+t 37 5693
+g 40 5705
+t 45 5716
+t 56 5729
+g 51 5741
+t 51 5753
+c 51 5764
+g 51 5778
+a 51 5789
+t 51 5800
+a 56 5811
+a 56 5823
+c 56 5834
+a 56 5846
+g 56 5859
+a 56 5871
+a 56 5883
+t 56 5895
+c 56 5907
+t 56 5920
+t 56 5932
+a 56 5943
+a 56 5954
+a 56 5966
+a 51 5977
+g 51 5989
+c 51 6001
+t 51 6015
+g 51 6027
+a 51 6038
+t 56 6050
+a 45 6061
+t 40 6073
+c 40 6085
+g 40 6100
+a 40 6110
+a 40 6121
+a 51 6134
+c 51 6144
+a 51 6155
+t 51 6168
+t 56 6181
+g 56 6193
+a 56 6205
+g 56 6217
+a 56 6230
+a 56 6241
+c 56 6252
+a 56 6263
+a 51 6274
+a 51 6287
+g 51 6298
+g 51 6312
+t 51 6324
+g 51 6337
+a 56 6348
+a 56 6360
+g 56 6372
+a 56 6385
+t 56 6396
+g 56 6408
+g 56 6421
+c 56 6432
+a 51 6444
+g 45 6457
+a 45 6470
+g 45 6480
+g 45 6494
+a 45 6506
+c 40 6516
+a 40 6528
+c 40 6538
+t 40 6553
+g 40 6566
+t 51 6578
+g 56 6590
+a 56 6602
+a 56 6614
+a 56 6625
+a 56 6638
+g 56 6649
+a 56 6662
+g 56 6673
+t 51 6686
+g 51 6698
+a 51 6709
+c 40 6719
+a 40 6731
+g 40 6743
+g 56 6757
+g 56 6769
+a 46 6782
+t 48 6794
+g 46 6806
+a 46 6817
+a 44 6830
+c 44 6840
+c 40 6853
+c 40 6866
+t 32 6879
+a 29 6890
+g 24 6900
+g 24 6914
+c 29 6925
+t 40 6939
+t 40 6950
+t 40 6962
+t 44 6975
+g 44 6987
+g 40 6999
+c 34 7009
+a 34 7021
+a 40 7031
+a 40 7045
+a 40 7057
+c 40 7066
+c 40 7080
+a 40 7090
+t 40 7103
+a 44 7114
+c 56 7125
+a 56 7137
+g 56 7149
+c 56 7161
+a 51 7173
+t 40 7185
+a 45 7196
+c 45 7206
+c 45 7220
+a 45 7230
+t 40 7243
+t 45 7257
+c 46 7268
+g 51 7281
+a 56 7292
+t 56 7304
+a 56 7315
+g 51 7326
+g 51 7339
+a 51 7352
+c 40 7362
+a 40 7374
+c 40 7384
+t 40 7399
+g 37 7411
+a 40 7423
+t 56 7435
+g 56 7446
+g 51 7459
+g 40 7472
+t 40 7484
+t 40 7498
+c 40 7508
+t 40 7520
+t 40 7533
+c 40 7543
+a 40 7554
+c 44 7564
+a 32 7576
+g 32 7588
+c 32 7599
+c 25 7613
+a 24 7623
+a 24 7635
+a 29 7648
+c 40 7658
+t 40 7671
+t 40 7684
+g 48 7696
+a 48 7707
+a 32 7719
+t 32 7730
+c 26 7742
+a 25 7751
+a 25 7763
+a 25 7776
+c 32 7785
+c 30 7799
+g 32 7813
+a 34 7824
+c 34 7834
+a 40 7845
+t 40 7857
+g 40 7869
+a 32 7881
+t 32 7892
+t 29 7906
+c 25 7916
+c 25 7929
+a 29 7939
+a 29 7950
+a 34 7963
+t 40 7975
+c 40 7986
+a 40 7996
+a 44 8007
+a 44 8020
+t 40 8032
+a 40 8042
+t 40 8054
+c 40 8066
+a 25 8076
+g 24 8089
+a 18 8102
+g 15 8111
+g 15 8125
+a 25 8138
+c 29 8148
+a 29 8159
+a 29 8171
+c 48 8182
+a 40 8194
+g 40 8206
+t 40 8218
+t 44 8232
+t 32 8243
+t 29 8256
+g 27 8268
+c 25 8277
+a 24 8289
+a 26 8300
+g 29 8312
+c 29 8323
+a 25 8335
+a 25 8345
+c 32 8357
+t 32 8371
+c 29 8383
+g 29 8394
+g 34 8407
+t 25 8418
+t 25 8431
+g 25 8443
+t 22 8454
+t 24 8466
+a 29 8476
+c 29 8486
+t 28 8500
+t 25 8512
+t 34 8525
+g 32 8536
+g 32 8548
+a 22 8561
+a 22 8570
+t 24 8583
+c 18 8592
+c 16 8605
+g 19 8618
+g 19 8630
+t 18 8641
+t 18 8655
+g 23 8666
+c 20 8675
+a 19 8687
+a 22 8696
+t 25 8710
+g 27 8721
+c 29 8732
+t 25 8746
+t 29 8758
+t 26 8769
+t 29 8782
+g 29 8794
+a 29 8804
+a 24 8815
+g 17 8826
+c 12 8836
+c 8 8850
+a 4 8859
+n 4 8874
+END_DNA
+
+END_SEQUENCE


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4922R.phd.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924F.phd.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924F.phd.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924F.phd.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,755 @@
+BEGIN_SEQUENCE ML4924F
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: ML4924F
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.980904.e
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Thu Jun 21 11:27:03 2001
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 8444
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+t 10 3
+t 16 14
+g 19 26
+t 19 36
+t 8 47
+c 6 55
+t 6 74
+t 6 77
+t 6 95
+t 9 98
+c 9 105
+t 15 119
+c 17 128
+g 17 143
+a 22 148
+g 22 162
+g 25 173
+g 25 185
+t 22 197
+a 22 202
+c 22 213
+t 25 227
+t 34 239
+g 34 250
+a 42 260
+g 35 270
+g 35 282
+a 38 294
+t 40 303
+t 40 314
+c 35 323
+t 34 335
+a 31 344
+c 31 351
+c 33 364
+a 34 373
+c 35 381
+c 33 394
+a 29 404
+c 35 412
+a 29 424
+a 29 434
+c 32 443
+a 34 454
+a 34 463
+c 35 473
+a 35 484
+a 42 494
+a 42 506
+g 42 516
+c 42 526
+c 35 539
+t 35 551
+t 37 562
+a 35 571
+t 35 581
+c 35 592
+a 35 600
+t 35 612
+c 31 623
+c 33 635
+a 24 644
+g 24 655
+a 24 667
+a 29 677
+c 26 686
+c 29 698
+g 29 712
+c 29 719
+a 29 731
+a 29 740
+a 29 751
+t 35 763
+t 35 775
+a 35 782
+t 35 794
+a 35 803
+a 35 813
+a 42 824
+a 42 835
+a 47 846
+a 47 857
+g 40 869
+a 35 880
+g 35 891
+c 35 900
+c 32 913
+a 32 921
+a 32 932
+c 32 942
+a 42 953
+c 42 963
+c 42 976
+a 37 986
+c 37 996
+a 30 1007
+a 30 1018
+a 35 1030
+a 35 1041
+c 35 1050
+a 35 1061
+a 35 1071
+a 37 1083
+a 32 1095
+t 32 1106
+g 35 1119
+a 35 1128
+a 37 1139
+a 37 1150
+a 42 1161
+g 42 1174
+a 37 1184
+a 37 1194
+a 40 1206
+c 37 1215
+a 37 1226
+a 35 1236
+a 35 1249
+c 35 1259
+t 37 1274
+c 35 1284
+t 35 1298
+t 37 1310
+c 37 1319
+a 37 1328
+a 37 1339
+c 35 1349
+c 32 1362
+a 29 1371
+g 31 1385
+t 31 1400
+g 29 1410
+a 35 1419
+t 35 1433
+g 35 1445
+t 35 1457
+t 35 1469
+c 35 1478
+t 35 1492
+g 35 1505
+a 35 1514
+g 32 1526
+a 32 1538
+c 35 1547
+t 32 1562
+t 35 1573
+g 35 1586
+t 32 1596
+t 32 1608
+g 32 1622
+a 32 1630
+g 32 1644
+g 35 1656
+a 35 1666
+t 35 1679
+a 35 1687
+a 35 1698
+c 35 1709
+a 37 1721
+c 37 1731
+c 35 1745
+t 35 1760
+t 32 1773
+c 32 1782
+g 32 1796
+t 32 1808
+a 32 1817
+c 30 1826
+t 35 1842
+t 35 1854
+a 35 1862
+a 35 1874
+c 35 1884
+t 35 1900
+t 32 1913
+g 35 1924
+g 42 1936
+a 42 1948
+a 42 1958
+c 42 1968
+c 35 1981
+a 35 1992
+c 38 2002
+t 38 2018
+t 38 2030
+c 38 2040
+a 42 2052
+t 40 2064
+g 40 2077
+g 38 2089
+c 35 2098
+a 35 2112
+t 35 2124
+c 42 2135
+a 50 2147
+t 56 2159
+c 56 2171
+c 50 2183
+t 56 2196
+t 56 2209
+g 42 2222
+g 42 2234
+t 42 2246
+a 38 2256
+a 38 2267
+c 38 2277
+t 38 2293
+c 38 2303
+t 38 2318
+a 40 2328
+t 38 2339
+g 38 2353
+c 38 2363
+t 40 2378
+g 40 2390
+a 40 2401
+a 44 2413
+t 42 2425
+c 42 2435
+c 42 2448
+c 38 2460
+a 36 2471
+a 36 2483
+c 32 2492
+g 32 2507
+c 32 2517
+g 35 2532
+a 35 2543
+g 38 2555
+t 41 2569
+c 41 2579
+t 41 2593
+t 41 2605
+g 41 2617
+c 42 2627
+a 41 2638
+c 41 2649
+c 41 2662
+t 41 2676
+a 41 2686
+c 41 2697
+g 41 2711
+g 41 2723
+c 41 2735
+g 35 2749
+a 40 2759
+c 35 2769
+g 32 2784
+t 32 2796
+g 36 2809
+c 38 2818
+a 35 2830
+a 35 2842
+c 37 2852
+a 38 2865
+c 35 2876
+g 35 2891
+g 35 2903
+t 35 2915
+a 37 2926
+t 37 2938
+c 41 2950
+c 41 2962
+c 41 2974
+g 41 2989
+g 41 3000
+g 41 3012
+c 41 3023
+g 35 3038
+t 35 3050
+t 35 3063
+c 32 3071
+a 35 3083
+a 31 3094
+g 33 3107
+a 32 3120
+a 35 3131
+c 35 3140
+a 37 3153
+a 29 3164
+c 33 3174
+a 16 3187
+t 16 3198
+a 9 3210
+g 20 3223
+a 13 3236
+a 23 3247
+g 23 3259
+t 33 3272
+c 35 3281
+c 37 3294
+a 35 3305
+t 40 3317
+t 40 3331
+c 45 3341
+c 40 3353
+a 40 3364
+t 50 3375
+a 30 3388
+g 30 3400
+a 33 3413
+t 40 3424
+a 33 3434
+c 37 3444
+c 29 3458
+c 29 3470
+g 21 3485
+t 25 3497
+a 21 3508
+g 23 3519
+a 23 3532
+a 50 3543
+g 50 3554
+g 37 3567
+a 35 3579
+t 35 3591
+c 35 3600
+g 32 3616
+t 32 3627
+a 32 3637
+c 37 3647
+t 37 3662
+t 37 3675
+g 40 3686
+a 37 3698
+t 37 3709
+t 37 3722
+c 37 3731
+c 35 3744
+a 35 3755
+a 44 3766
+g 44 3778
+a 44 3791
+t 44 3802
+c 44 3813
+a 56 3824
+a 44 3836
+t 56 3849
+g 37 3861
+t 35 3873
+g 35 3885
+c 35 3894
+t 35 3910
+c 35 3918
+c 26 3932
+t 32 3945
+g 26 3958
+g 26 3969
+a 26 3982
+t 32 3992
+a 32 4003
+c 35 4012
+c 35 4025
+g 35 4040
+a 35 4051
+a 35 4063
+t 42 4075
+c 42 4085
+c 35 4097
+a 42 4108
+a 42 4119
+a 42 4131
+a 42 4143
+c 35 4153
+a 33 4165
+g 29 4178
+c 33 4189
+c 29 4202
+a 30 4212
+g 28 4225
+t 35 4236
+g 33 4250
+t 35 4261
+c 35 4273
+a 35 4283
+c 35 4293
+t 37 4308
+g 37 4321
+a 45 4332
+a 44 4344
+g 44 4355
+t 44 4367
+t 44 4378
+c 44 4391
+c 44 4402
+t 44 4415
+c 39 4427
+c 39 4438
+t 42 4450
+c 42 4461
+a 42 4472
+a 42 4483
+c 42 4493
+a 35 4505
+g 28 4517
+c 27 4529
+t 24 4543
+c 20 4555
+g 20 4567
+t 22 4576
+a 29 4589
+c 29 4599
+t 40 4613
+c 40 4624
+c 42 4637
+t 42 4648
+t 35 4662
+g 35 4673
+a 35 4685
+c 37 4694
+t 35 4708
+t 35 4720
+t 33 4733
+c 33 4743
+a 33 4752
+a 35 4764
+g 35 4776
+c 34 4787
+c 34 4801
+a 33 4811
+c 33 4821
+t 33 4835
+c 29 4847
+t 28 4860
+c 28 4872
+a 27 4881
+a 21 4894
+c 21 4908
+a 15 4917
+a 15 4928
+g 21 4939
+c 21 4949
+t 25 4964
+g 29 4976
+c 29 4984
+a 29 4998
+t 29 5010
+t 33 5023
+g 33 5034
+c 25 5044
+c 27 5058
+t 23 5071
+t 23 5083
+g 11 5094
+c 10 5104
+c 10 5117
+t 24 5130
+c 24 5139
+c 29 5152
+t 29 5165
+c 27 5177
+t 29 5189
+c 29 5202
+a 29 5209
+c 29 5220
+g 32 5233
+g 36 5245
+t 36 5258
+g 35 5269
+a 45 5281
+c 40 5291
+g 38 5305
+t 38 5317
+a 28 5328
+g 28 5338
+c 26 5351
+a 32 5363
+c 32 5372
+g 35 5387
+c 35 5397
+a 35 5410
+a 42 5421
+t 37 5432
+c 34 5445
+t 34 5456
+t 42 5468
+t 29 5480
+t 29 5491
+c 29 5505
+a 35 5513
+t 35 5526
+t 26 5538
+a 24 5548
+c 17 5557
+g 14 5574
+t 12 5585
+c 12 5598
+t 25 5609
+a 27 5618
+a 35 5630
+t 37 5643
+a 38 5653
+c 40 5663
+c 34 5677
+g 34 5690
+a 34 5700
+a 35 5711
+a 32 5725
+g 33 5735
+a 32 5747
+c 29 5756
+c 29 5770
+t 21 5782
+c 21 5794
+a 27 5805
+c 33 5815
+a 26 5827
+g 16 5838
+t 16 5851
+g 10 5863
+c 10 5873
+a 10 5885
+c 20 5894
+c 12 5908
+t 6 5921
+g 6 5931
+g 8 5946
+c 9 5950
+c 9 5970
+t 11 5981
+t 13 5995
+g 18 6004
+g 15 6017
+a 15 6030
+c 17 6042
+a 11 6050
+a 8 6064
+c 11 6077
+a 9 6086
+c 13 6093
+a 9 6106
+g 19 6119
+g 21 6132
+t 24 6146
+g 24 6156
+t 18 6168
+t 15 6181
+t 15 6194
+g 21 6205
+a 25 6215
+c 29 6225
+c 29 6239
+a 25 6248
+c 25 6260
+t 14 6274
+g 14 6285
+a 9 6298
+g 9 6309
+c 9 6322
+t 24 6333
+g 14 6345
+t 11 6355
+t 8 6369
+a 8 6383
+c 8 6391
+a 15 6400
+a 20 6410
+a 27 6422
+a 40 6433
+c 40 6444
+c 40 6457
+t 26 6470
+t 24 6481
+g 12 6493
+g 12 6503
+a 15 6517
+g 13 6528
+g 10 6540
+c 17 6549
+a 18 6565
+c 15 6574
+g 12 6586
+g 15 6597
+g 16 6609
+t 19 6621
+g 23 6635
+a 20 6645
+g 24 6658
+a 21 6670
+c 21 6679
+g 21 6693
+a 21 6703
+t 24 6712
+c 16 6725
+a 20 6736
+c 18 6746
+c 20 6763
+a 16 6771
+c 16 6781
+t 14 6796
+c 12 6808
+t 10 6819
+c 10 6829
+a 12 6841
+c 17 6850
+c 17 6864
+a 29 6873
+a 29 6885
+c 29 6893
+g 19 6908
+g 16 6919
+a 9 6934
+g 9 6941
+a 9 6956
+t 16 6965
+g 13 6978
+t 11 6990
+c 9 6998
+g 11 7014
+a 9 7025
+g 10 7037
+a 9 7049
+a 19 7059
+c 14 7069
+g 14 7081
+a 10 7093
+g 10 7104
+c 10 7118
+t 14 7129
+t 14 7142
+c 25 7154
+t 26 7166
+g 28 7176
+a 32 7187
+a 32 7198
+c 32 7208
+c 33 7222
+t 33 7234
+t 28 7244
+g 21 7256
+a 18 7268
+t 12 7279
+a 11 7288
+t 13 7300
+t 9 7314
+c 9 7322
+c 12 7336
+t 10 7346
+c 11 7359
+a 10 7368
+t 11 7378
+t 12 7394
+g 11 7402
+g 11 7413
+g 10 7423
+c 10 7439
+t 10 7454
+c 10 7462
+c 8 7477
+a 8 7484
+c 10 7497
+a 9 7506
+g 9 7518
+c 9 7527
+t 6 7544
+c 6 7546
+t 8 7557
+t 8 7574
+t 11 7590
+c 9 7600
+t 9 7611
+c 10 7621
+t 10 7632
+g 14 7644
+a 14 7655
+c 10 7666
+g 8 7678
+c 8 7690
+c 17 7703
+a 10 7711
+t 10 7719
+t 9 7729
+c 12 7746
+t 9 7755
+g 11 7770
+a 7 7781
+g 7 7787
+g 10 7802
+a 12 7813
+t 9 7821
+c 6 7837
+t 6 7840
+a 6 7852
+g 6 7864
+g 8 7880
+g 8 7893
+a 11 7903
+t 12 7915
+g 10 7925
+c 9 7938
+a 9 7947
+c 8 7961
+a 8 7968
+c 12 7978
+c 14 7991
+c 20 8004
+a 16 8013
+c 17 8026
+a 10 8036
+a 10 8045
+g 12 8059
+g 17 8071
+a 17 8084
+c 24 8096
+a 18 8107
+t 14 8116
+g 14 8128
+a 11 8140
+c 14 8149
+c 12 8162
+a 15 8174
+a 15 8183
+a 10 8193
+a 9 8205
+t 11 8214
+c 9 8228
+g 9 8237
+a 10 8252
+c 8 8259
+a 6 8275
+t 6 8279
+c 8 8286
+t 8 8305
+g 7 8318
+g 7 8327
+g 9 8341
+c 9 8349
+c 7 8365
+c 6 8372
+a 10 8387
+c 6 8400
+t 6 8400
+c 6 8415
+g 6 8437
+END_DNA
+
+END_SEQUENCE


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924F.phd.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924R.phd.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924R.phd.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924R.phd.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,784 @@
+BEGIN_SEQUENCE ML4924R
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: ML4924R
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.980904.e
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Thu Jun 21 11:27:03 2001
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 8792
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+a 6 1
+c 6 20
+t 6 17
+t 8 26
+t 8 35
+g 12 48
+g 18 58
+t 16 66
+c 14 83
+g 11 98
+c 9 104
+c 6 122
+t 6 128
+g 9 140
+c 19 147
+a 20 159
+g 32 167
+g 34 178
+t 34 190
+a 39 200
+c 39 208
+c 39 221
+g 29 233
+g 40 243
+t 28 255
+c 21 267
+c 13 277
+g 4 288
+n 4 297
+g 4 298
+a 8 314
+t 8 328
+t 23 339
+c 29 349
+c 39 359
+c 40 369
+g 40 381
+g 40 390
+g 40 401
+t 46 413
+c 34 425
+g 33 436
+a 29 447
+c 29 453
+c 32 466
+c 33 476
+a 34 485
+c 34 492
+g 34 505
+c 34 514
+g 40 527
+t 40 537
+c 34 548
+c 34 559
+g 34 572
+c 34 579
+t 34 592
+t 34 602
+t 39 613
+g 40 625
+c 40 633
+t 40 645
+c 40 656
+a 40 664
+t 40 673
+c 40 685
+t 46 697
+c 51 707
+c 51 718
+c 56 728
+t 51 738
+c 51 749
+t 35 760
+g 35 772
+a 35 783
+c 35 790
+t 35 803
+t 35 813
+t 39 824
+g 40 836
+c 40 844
+a 40 855
+t 40 865
+c 56 876
+c 42 887
+c 51 898
+t 46 909
+t 46 921
+a 46 931
+a 46 940
+t 46 951
+c 46 962
+c 46 974
+t 46 985
+c 40 995
+a 40 1004
+g 40 1015
+a 40 1028
+a 40 1037
+t 40 1048
+g 35 1060
+g 35 1071
+c 35 1080
+g 35 1096
+t 35 1106
+c 35 1116
+a 40 1125
+g 40 1138
+a 40 1150
+g 51 1160
+a 51 1172
+a 51 1182
+g 51 1193
+a 40 1204
+a 35 1214
+g 35 1226
+c 39 1235
+t 39 1250
+c 39 1259
+t 39 1272
+c 40 1282
+g 51 1295
+a 51 1305
+a 51 1315
+c 51 1325
+c 51 1337
+c 51 1348
+a 51 1358
+a 51 1368
+t 51 1381
+g 51 1393
+a 51 1403
+g 51 1415
+g 51 1427
+g 39 1440
+a 39 1450
+t 39 1462
+a 39 1470
+t 39 1483
+c 35 1494
+a 40 1503
+a 40 1514
+g 40 1528
+g 40 1540
+t 40 1552
+t 39 1565
+c 39 1574
+a 39 1583
+g 39 1598
+a 38 1609
+a 40 1619
+g 39 1632
+c 39 1642
+t 39 1657
+c 39 1666
+g 39 1680
+t 40 1691
+t 45 1703
+c 51 1713
+t 51 1726
+c 51 1736
+a 51 1746
+a 51 1757
+c 51 1767
+a 45 1778
+t 45 1791
+c 45 1802
+t 45 1816
+c 45 1826
+c 45 1838
+g 45 1852
+t 51 1863
+t 51 1875
+g 56 1887
+g 43 1899
+t 43 1911
+g 43 1924
+a 43 1933
+g 43 1946
+a 43 1958
+g 56 1970
+t 56 1982
+g 56 1994
+g 56 2006
+t 56 2017
+g 51 2030
+a 43 2041
+t 43 2053
+c 43 2063
+g 43 2079
+t 43 2089
+c 43 2100
+t 43 2114
+c 43 2124
+a 43 2135
+c 43 2144
+c 43 2157
+c 43 2169
+g 43 2184
+t 43 2195
+g 43 2207
+c 43 2216
+c 43 2229
+t 43 2244
+c 43 2253
+c 43 2265
+a 46 2277
+a 56 2288
+g 56 2299
+g 56 2311
+t 56 2324
+t 56 2336
+t 56 2348
+t 56 2360
+g 56 2373
+g 51 2385
+a 51 2397
+a 51 2408
+c 51 2418
+a 43 2430
+g 43 2443
+c 45 2454
+t 45 2469
+c 45 2480
+a 45 2491
+g 56 2503
+t 56 2516
+g 56 2528
+g 51 2540
+t 51 2552
+c 51 2564
+a 51 2574
+a 51 2585
+a 51 2598
+c 45 2608
+a 45 2620
+c 45 2630
+c 45 2644
+t 45 2658
+g 51 2671
+t 51 2682
+c 51 2693
+t 51 2707
+t 45 2719
+c 45 2730
+t 51 2743
+c 51 2753
+c 51 2765
+a 51 2775
+a 51 2787
+g 43 2798
+g 43 2810
+c 43 2822
+g 43 2837
+a 43 2847
+g 43 2859
+g 51 2872
+t 51 2884
+a 51 2894
+c 43 2904
+a 43 2917
+c 43 2927
+t 43 2943
+g 43 2956
+t 43 2967
+g 51 2980
+a 51 2992
+g 45 3003
+g 45 3017
+t 45 3029
+c 45 3039
+t 45 3053
+t 45 3065
+t 51 3077
+c 51 3088
+g 51 3102
+g 51 3113
+t 51 3125
+a 43 3136
+t 43 3148
+t 43 3161
+a 43 3170
+g 43 3183
+a 43 3196
+c 43 3205
+g 43 3220
+t 43 3231
+a 45 3243
+a 56 3254
+t 56 3266
+g 56 3279
+a 56 3290
+a 56 3302
+a 56 3314
+a 51 3325
+g 51 3337
+a 51 3350
+t 43 3360
+t 43 3373
+g 43 3385
+c 43 3394
+g 43 3410
+t 43 3421
+g 43 3434
+c 43 3443
+t 43 3458
+a 43 3468
+c 43 3477
+g 43 3493
+t 43 3505
+c 43 3515
+a 43 3527
+c 43 3536
+c 43 3550
+g 43 3565
+t 43 3576
+g 43 3588
+a 45 3601
+g 45 3611
+a 45 3625
+g 45 3635
+g 45 3648
+a 43 3660
+g 43 3672
+a 38 3684
+c 38 3692
+a 38 3705
+a 43 3717
+g 56 3729
+g 56 3741
+c 56 3752
+a 56 3765
+a 51 3776
+t 51 3789
+g 51 3801
+c 51 3811
+a 43 3824
+g 43 3836
+c 43 3846
+t 43 3862
+t 43 3874
+c 43 3884
+t 51 3897
+t 51 3909
+g 56 3922
+a 56 3933
+g 56 3944
+a 56 3957
+g 56 3968
+t 43 3980
+g 43 3992
+g 43 4004
+c 43 4013
+t 43 4029
+t 43 4041
+g 43 4053
+a 56 4064
+a 56 4075
+a 51 4087
+g 51 4098
+t 51 4111
+c 51 4121
+a 51 4132
+a 51 4144
+g 56 4155
+g 56 4168
+a 51 4181
+g 51 4192
+t 43 4205
+a 43 4215
+c 43 4225
+g 43 4240
+a 43 4250
+g 43 4263
+c 43 4274
+t 45 4289
+g 45 4301
+t 45 4312
+t 56 4324
+g 56 4336
+a 56 4347
+g 56 4358
+g 56 4371
+a 56 4383
+g 56 4394
+g 51 4407
+a 43 4419
+a 43 4430
+c 43 4440
+t 43 4455
+t 43 4467
+c 43 4477
+a 51 4488
+g 51 4500
+t 51 4513
+g 51 4525
+a 51 4536
+c 45 4546
+a 45 4558
+c 45 4569
+t 51 4583
+g 45 4595
+g 45 4607
+c 45 4618
+t 45 4633
+g 45 4645
+t 45 4656
+t 56 4668
+t 56 4680
+t 56 4692
+g 56 4704
+g 56 4716
+a 56 4727
+t 56 4738
+t 56 4751
+c 56 4762
+g 56 4775
+g 56 4787
+t 51 4798
+a 51 4809
+t 51 4821
+c 51 4831
+c 51 4844
+a 51 4854
+g 51 4866
+g 51 4879
+a 56 4892
+g 51 4903
+c 51 4914
+a 51 4926
+c 51 4936
+a 51 4948
+t 51 4960
+t 56 4973
+g 56 4985
+a 56 4997
+t 56 5008
+c 56 5020
+t 56 5033
+t 56 5045
+g 56 5057
+g 56 5069
+a 56 5080
+a 40 5091
+t 40 5104
+c 40 5115
+a 40 5124
+a 40 5136
+g 40 5149
+t 40 5162
+a 40 5172
+c 40 5182
+g 40 5197
+a 40 5207
+t 40 5220
+c 51 5231
+c 56 5243
+t 56 5256
+t 51 5268
+c 51 5279
+t 51 5292
+a 51 5302
+c 51 5312
+g 51 5325
+g 51 5338
+g 51 5350
+t 51 5362
+a 45 5372
+t 45 5384
+c 45 5395
+t 45 5409
+a 45 5419
+t 45 5431
+g 51 5444
+g 56 5456
+a 56 5468
+a 56 5479
+t 56 5492
+g 40 5503
+g 39 5516
+a 39 5528
+c 39 5537
+t 39 5552
+t 39 5564
+c 39 5575
+t 40 5588
+a 40 5597
+t 40 5609
+g 40 5622
+t 56 5634
+t 56 5646
+g 51 5658
+t 51 5669
+t 51 5682
+c 51 5692
+t 51 5705
+t 51 5718
+g 51 5730
+a 45 5741
+a 40 5752
+c 40 5762
+g 37 5776
+c 37 5785
+c 40 5798
+c 40 5810
+g 56 5823
+g 56 5834
+g 51 5846
+a 51 5859
+t 40 5870
+a 40 5880
+c 40 5890
+c 40 5904
+g 40 5918
+t 51 5929
+g 51 5941
+t 37 5953
+t 37 5966
+g 37 5978
+c 37 5987
+a 37 5999
+c 40 6009
+g 40 6023
+t 34 6035
+c 34 6047
+g 29 6060
+c 29 6068
+c 29 6083
+g 29 6096
+t 32 6107
+a 29 6118
+g 25 6128
+g 19 6142
+t 27 6154
+g 27 6167
+c 32 6176
+a 32 6187
+a 34 6199
+g 40 6212
+a 34 6224
+c 40 6234
+t 34 6248
+c 35 6258
+g 35 6272
+c 35 6281
+g 35 6296
+t 39 6307
+t 40 6320
+g 56 6331
+g 56 6342
+g 56 6355
+a 51 6367
+t 51 6378
+t 46 6391
+c 46 6401
+a 42 6411
+g 42 6424
+c 42 6435
+a 46 6447
+t 40 6459
+a 37 6469
+g 37 6482
+a 40 6494
+g 29 6506
+t 29 6518
+t 29 6531
+a 29 6539
+c 29 6549
+c 27 6563
+a 32 6573
+a 29 6585
+g 29 6596
+g 34 6609
+a 48 6622
+t 48 6633
+g 48 6645
+a 32 6656
+t 34 6668
+g 34 6681
+c 34 6690
+c 31 6704
+a 31 6714
+t 37 6726
+g 46 6739
+a 48 6750
+a 48 6762
+g 40 6774
+t 40 6786
+g 40 6797
+g 27 6810
+t 25 6821
+t 21 6834
+c 16 6842
+c 16 6856
+a 18 6865
+a 19 6877
+g 23 6889
+t 27 6901
+t 25 6915
+a 29 6923
+a 24 6936
+g 29 6947
+t 32 6960
+a 29 6970
+c 29 6980
+g 25 6995
+a 25 7004
+a 21 7018
+g 15 7026
+g 15 7041
+t 23 7053
+g 27 7065
+t 27 7075
+t 27 7089
+a 27 7098
+t 25 7110
+c 22 7120
+c 24 7133
+t 29 7147
+c 32 7157
+a 32 7168
+a 40 7179
+c 29 7189
+a 29 7200
+a 29 7211
+g 48 7224
+t 48 7237
+c 40 7248
+t 40 7261
+c 40 7272
+a 29 7282
+g 29 7294
+a 25 7306
+a 29 7317
+c 29 7327
+a 40 7338
+t 40 7350
+c 32 7362
+a 32 7372
+c 32 7383
+t 32 7397
+g 32 7409
+g 25 7422
+t 25 7432
+t 25 7445
+g 29 7457
+a 40 7467
+a 36 7479
+g 40 7491
+a 36 7503
+g 36 7514
+t 37 7525
+t 33 7537
+t 46 7549
+g 24 7561
+t 24 7571
+t 24 7584
+t 32 7596
+c 32 7606
+t 40 7619
+t 34 7630
+t 22 7642
+t 25 7654
+c 25 7665
+a 27 7674
+t 25 7685
+t 22 7698
+t 29 7709
+t 29 7721
+g 25 7734
+t 21 7744
+t 21 7757
+t 22 7767
+t 29 7779
+g 24 7791
+t 24 7802
+g 24 7814
+g 31 7827
+t 31 7838
+g 28 7850
+t 28 7861
+t 24 7873
+g 19 7884
+g 19 7897
+c 23 7906
+t 27 7921
+c 25 7931
+t 22 7943
+t 29 7954
+t 34 7965
+t 32 7976
+t 32 7988
+t 32 8000
+a 25 8009
+t 18 8020
+a 18 8031
+a 18 8039
+t 18 8053
+t 23 8066
+t 32 8077
+g 26 8089
+c 25 8099
+g 19 8112
+g 15 8124
+t 15 8134
+t 15 8148
+c 23 8157
+t 23 8170
+g 22 8182
+g 34 8193
+a 37 8205
+t 40 8216
+g 32 8227
+a 32 8238
+t 29 8250
+a 22 8259
+a 18 8269
+g 18 8281
+g 13 8294
+c 22 8305
+t 16 8318
+t 20 8328
+t 25 8341
+g 29 8353
+t 24 8364
+t 24 8376
+g 25 8388
+t 19 8398
+t 19 8412
+g 25 8422
+t 24 8433
+g 21 8444
+g 21 8455
+t 27 8467
+g 21 8479
+g 20 8491
+t 26 8502
+a 25 8512
+g 18 8524
+a 15 8536
+a 15 8545
+t 10 8558
+c 13 8566
+c 13 8581
+t 17 8593
+c 21 8603
+a 22 8613
+a 25 8624
+g 29 8635
+t 25 8647
+a 25 8657
+c 25 8666
+c 24 8678
+c 19 8691
+t 10 8703
+c 10 8712
+g 11 8726
+a 17 8736
+g 17 8747
+a 12 8759
+t 19 8768
+a 16 8778
+t 11 8787
+END_DNA
+
+END_SEQUENCE


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4924R.phd.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4947F.phd.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4947F.phd.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4947F.phd.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,760 @@
+BEGIN_SEQUENCE ML4947F
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: ML4947F
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.980904.e
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Thu Jun 21 11:28:31 2001
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 8660
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+c 9 4
+t 13 10
+g 11 23
+t 21 34
+g 24 47
+g 34 58
+g 34 70
+a 34 77
+c 31 87
+c 31 99
+c 25 109
+a 25 115
+c 25 124
+t 25 140
+g 34 152
+a 34 162
+t 38 172
+a 29 181
+t 29 191
+t 29 203
+a 29 209
+c 29 220
+c 32 233
+g 32 246
+a 32 257
+a 32 268
+c 32 274
+t 32 287
+t 32 298
+t 56 309
+t 46 320
+g 35 331
+a 35 343
+a 35 352
+c 35 359
+c 35 371
+a 35 379
+a 40 390
+a 45 401
+c 40 409
+a 40 419
+g 40 429
+t 46 441
+t 40 453
+a 40 463
+t 40 472
+g 40 485
+t 40 495
+g 40 509
+t 37 518
+t 37 530
+g 35 543
+g 35 554
+c 35 563
+g 35 578
+c 35 586
+c 35 598
+a 40 607
+a 46 617
+a 51 628
+a 51 638
+a 51 649
+t 51 658
+a 40 667
+a 40 677
+c 40 688
+g 40 702
+t 35 712
+t 23 724
+a 19 733
+a 10 744
+a 13 755
+g 10 766
+a 16 778
+a 16 787
+a 31 798
+c 33 807
+c 33 819
+a 40 828
+t 40 840
+c 40 851
+a 44 861
+c 46 871
+a 51 882
+a 51 893
+g 51 905
+a 51 918
+g 51 928
+c 51 938
+c 51 951
+t 51 963
+t 51 975
+a 51 985
+t 51 995
+g 51 1008
+a 51 1018
+c 51 1028
+a 51 1038
+a 51 1048
+a 51 1061
+g 51 1073
+a 56 1085
+g 56 1096
+g 56 1108
+g 56 1120
+a 56 1131
+g 56 1143
+a 51 1155
+g 46 1166
+a 40 1178
+t 40 1188
+a 40 1197
+g 40 1211
+a 40 1222
+c 40 1232
+a 40 1242
+t 40 1255
+a 39 1264
+a 39 1276
+c 38 1287
+t 38 1302
+t 38 1314
+a 39 1323
+a 40 1334
+g 40 1347
+a 40 1359
+c 40 1368
+a 39 1378
+t 39 1391
+t 39 1404
+a 39 1412
+c 39 1424
+a 40 1435
+c 51 1446
+a 51 1457
+a 51 1468
+t 51 1482
+t 51 1495
+t 51 1507
+g 56 1520
+g 40 1532
+t 40 1543
+c 40 1554
+a 43 1563
+a 43 1574
+a 43 1587
+c 43 1597
+g 43 1613
+a 43 1622
+t 43 1635
+a 43 1644
+a 43 1655
+a 43 1668
+c 51 1679
+c 51 1692
+a 51 1702
+a 51 1714
+a 51 1727
+t 51 1739
+c 51 1749
+c 51 1762
+a 51 1772
+a 51 1784
+c 51 1794
+a 43 1806
+g 43 1819
+a 43 1832
+c 43 1841
+a 43 1854
+t 43 1866
+t 43 1879
+t 43 1892
+a 43 1901
+a 43 1913
+a 43 1925
+c 45 1935
+c 45 1949
+a 45 1959
+a 45 1972
+a 51 1984
+t 51 1997
+c 51 2007
+c 51 2020
+a 43 2032
+a 43 2043
+c 43 2053
+g 43 2069
+c 43 2078
+c 43 2091
+a 43 2103
+a 43 2115
+c 43 2124
+a 43 2138
+a 43 2150
+a 45 2162
+a 51 2174
+c 45 2184
+a 45 2197
+t 45 2209
+t 45 2223
+c 45 2233
+a 45 2245
+t 51 2258
+a 56 2269
+t 56 2282
+a 56 2293
+t 51 2306
+a 51 2317
+a 51 2329
+g 45 2343
+a 45 2356
+t 45 2368
+a 51 2378
+t 45 2391
+a 45 2402
+c 51 2413
+c 51 2427
+c 51 2440
+a 45 2452
+a 45 2463
+a 43 2476
+c 43 2486
+c 43 2500
+g 43 2515
+t 45 2526
+a 45 2538
+t 56 2550
+c 56 2561
+a 56 2572
+a 45 2584
+a 40 2597
+c 40 2608
+g 40 2623
+c 38 2633
+a 35 2646
+g 36 2658
+a 36 2672
+c 36 2681
+g 43 2697
+c 43 2706
+c 43 2720
+g 36 2735
+c 43 2744
+c 43 2758
+c 43 2770
+t 51 2784
+a 51 2795
+g 51 2807
+t 56 2820
+a 43 2831
+a 43 2842
+c 43 2853
+t 43 2869
+g 38 2882
+t 38 2894
+a 42 2905
+t 43 2917
+c 44 2928
+c 51 2942
+t 51 2955
+t 51 2968
+t 43 2981
+c 43 2992
+a 43 3003
+c 43 3012
+a 43 3026
+a 43 3037
+a 45 3050
+c 51 3060
+a 51 3073
+a 43 3085
+g 45 3098
+c 45 3109
+t 43 3124
+c 43 3135
+t 43 3149
+c 43 3159
+g 43 3174
+g 43 3186
+a 43 3200
+g 43 3209
+g 43 3223
+c 40 3233
+a 41 3246
+g 41 3259
+c 45 3269
+a 45 3283
+g 45 3296
+a 45 3310
+g 45 3320
+t 45 3333
+c 45 3344
+a 45 3356
+c 45 3366
+c 45 3380
+a 41 3392
+g 43 3405
+a 41 3419
+a 43 3430
+g 43 3442
+c 45 3452
+c 45 3466
+c 45 3477
+c 45 3490
+a 45 3503
+g 45 3514
+c 45 3524
+a 45 3538
+a 43 3550
+t 51 3562
+g 56 3575
+t 56 3587
+a 56 3598
+t 51 3610
+t 51 3623
+t 51 3636
+c 51 3646
+c 51 3659
+c 51 3671
+a 46 3683
+a 46 3695
+g 42 3706
+t 42 3720
+g 42 3732
+t 45 3744
+a 43 3755
+g 43 3768
+c 43 3778
+g 43 3794
+t 43 3806
+c 43 3816
+c 43 3829
+g 43 3844
+a 45 3855
+g 43 3866
+t 45 3880
+t 45 3892
+a 45 3902
+g 45 3915
+c 45 3925
+c 45 3939
+t 45 3953
+t 45 3966
+g 43 3977
+c 43 3987
+a 43 4000
+c 43 4009
+c 43 4023
+t 43 4037
+g 43 4050
+a 43 4062
+c 43 4071
+c 43 4084
+a 43 4095
+a 56 4108
+g 43 4120
+a 41 4133
+a 45 4145
+c 45 4155
+g 41 4169
+t 41 4181
+g 42 4193
+g 46 4206
+c 46 4217
+c 51 4230
+t 51 4244
+g 51 4256
+a 51 4268
+g 51 4279
+c 51 4291
+t 51 4305
+t 51 4317
+t 45 4330
+g 43 4341
+g 43 4354
+c 43 4364
+t 43 4379
+a 43 4388
+c 43 4399
+g 43 4414
+t 45 4426
+t 45 4439
+c 56 4450
+t 56 4463
+c 56 4474
+t 56 4487
+t 56 4499
+t 56 4512
+c 56 4523
+t 51 4535
+t 51 4547
+a 45 4558
+c 45 4568
+c 45 4581
+t 41 4595
+c 45 4605
+c 45 4618
+c 51 4629
+c 56 4641
+a 56 4652
+a 51 4664
+g 51 4675
+c 51 4686
+c 51 4700
+t 51 4713
+t 51 4725
+g 51 4737
+a 51 4749
+g 51 4760
+t 51 4774
+g 45 4786
+t 43 4797
+g 43 4810
+c 43 4820
+t 43 4835
+t 43 4847
+t 43 4859
+g 43 4871
+c 43 4881
+t 43 4896
+g 43 4908
+g 43 4920
+a 51 4933
+g 51 4943
+g 43 4956
+g 43 4968
+c 43 4979
+t 40 4994
+c 43 5004
+g 43 5018
+g 45 5029
+c 45 5040
+c 45 5054
+a 45 5064
+a 45 5076
+a 51 5088
+g 56 5099
+g 56 5112
+a 56 5125
+g 56 5136
+a 56 5149
+a 56 5160
+a 51 5172
+g 51 5184
+t 45 5196
+a 45 5206
+a 43 5217
+g 43 5231
+c 45 5242
+g 45 5257
+t 43 5268
+c 43 5279
+c 56 5291
+a 56 5302
+t 50 5314
+g 45 5326
+g 40 5339
+c 40 5350
+t 37 5365
+t 40 5377
+c 40 5388
+a 51 5398
+a 51 5410
+c 51 5421
+a 56 5433
+g 56 5445
+a 45 5458
+t 40 5470
+c 40 5481
+g 40 5496
+a 40 5506
+g 40 5518
+c 40 5529
+t 40 5544
+t 45 5556
+g 37 5568
+t 42 5579
+t 42 5592
+c 43 5602
+a 43 5613
+t 46 5625
+t 37 5638
+g 37 5651
+c 37 5661
+g 37 5676
+t 37 5687
+t 37 5700
+c 43 5710
+a 42 5721
+g 42 5733
+a 50 5746
+t 46 5757
+t 46 5770
+t 51 5782
+a 37 5792
+g 37 5805
+c 37 5816
+g 40 5831
+t 35 5842
+t 35 5854
+g 35 5867
+c 35 5876
+t 35 5891
+t 35 5903
+c 37 5913
+c 51 5926
+t 51 5938
+c 51 5949
+t 51 5962
+t 51 5975
+c 45 5985
+g 45 5998
+c 45 6008
+t 45 6022
+c 45 6033
+t 46 6046
+g 44 6058
+t 44 6069
+c 42 6080
+c 46 6092
+t 46 6105
+c 51 6115
+c 51 6128
+t 51 6140
+g 56 6153
+a 51 6164
+g 51 6175
+a 42 6188
+g 46 6198
+g 42 6211
+a 51 6223
+a 51 6235
+c 37 6245
+a 37 6257
+c 37 6267
+g 37 6282
+a 40 6293
+t 45 6305
+t 56 6318
+c 56 6329
+c 56 6341
+t 56 6354
+g 56 6366
+g 51 6378
+a 51 6390
+a 45 6402
+c 40 6412
+c 35 6425
+g 35 6439
+c 35 6448
+t 35 6463
+g 39 6474
+g 45 6486
+t 56 6498
+g 56 6510
+g 51 6522
+g 51 6535
+a 45 6547
+c 45 6557
+c 45 6570
+g 45 6584
+t 45 6595
+g 40 6607
+c 45 6617
+g 45 6631
+g 40 6644
+c 40 6654
+g 40 6668
+g 45 6680
+a 40 6693
+g 42 6703
+a 42 6716
+g 46 6727
+c 56 6738
+a 51 6750
+g 42 6762
+t 42 6775
+c 37 6785
+a 37 6796
+c 35 6807
+t 40 6822
+g 34 6834
+t 29 6846
+g 37 6858
+t 40 6870
+a 40 6880
+t 44 6892
+t 34 6905
+c 32 6915
+c 32 6928
+g 27 6942
+c 25 6951
+a 22 6963
+a 26 6974
+t 29 6986
+g 27 6998
+a 32 7009
+c 29 7019
+t 27 7034
+t 25 7046
+c 24 7056
+c 29 7069
+g 29 7081
+g 29 7093
+t 40 7105
+g 28 7117
+c 29 7127
+a 24 7138
+a 25 7150
+c 29 7160
+c 32 7174
+t 32 7187
+t 32 7199
+g 34 7210
+g 40 7223
+c 40 7233
+g 33 7247
+c 40 7257
+t 40 7271
+g 35 7283
+t 40 7294
+c 40 7305
+t 48 7318
+g 48 7330
+a 40 7341
+g 40 7352
+c 40 7363
+c 40 7377
+t 34 7389
+g 32 7401
+g 29 7413
+a 34 7425
+g 40 7435
+t 32 7448
+g 29 7460
+a 29 7470
+c 29 7479
+c 29 7494
+a 29 7503
+t 29 7516
+g 32 7528
+t 27 7540
+t 32 7553
+a 25 7562
+g 22 7574
+g 18 7587
+t 21 7599
+t 23 7611
+t 23 7624
+a 18 7632
+a 19 7643
+g 25 7656
+c 40 7667
+a 40 7679
+g 40 7691
+a 36 7704
+g 30 7714
+t 30 7727
+g 32 7739
+c 32 7748
+c 32 7761
+t 29 7774
+t 31 7787
+c 21 7797
+g 21 7809
+a 18 7821
+g 15 7830
+g 14 7843
+a 25 7855
+g 24 7865
+g 24 7879
+a 25 7890
+c 25 7900
+g 36 7913
+t 32 7926
+g 32 7936
+g 40 7949
+t 40 7960
+g 40 7973
+g 32 7985
+t 27 7997
+c 27 8007
+g 25 8021
+t 20 8030
+t 20 8044
+c 25 8055
+a 19 8064
+a 19 8076
+g 18 8086
+g 13 8098
+c 13 8109
+c 18 8123
+t 27 8135
+t 27 8148
+g 32 8159
+t 28 8170
+a 32 8181
+c 24 8190
+a 27 8201
+c 24 8211
+g 22 8224
+g 20 8237
+c 27 8248
+a 27 8260
+g 27 8271
+a 25 8284
+a 29 8294
+a 29 8306
+g 29 8317
+a 22 8329
+a 22 8339
+c 23 8348
+c 27 8362
+g 23 8375
+t 23 8386
+c 25 8398
+t 30 8410
+c 33 8419
+a 27 8430
+g 39 8442
+t 25 8454
+c 28 8465
+a 26 8475
+c 26 8484
+a 21 8496
+g 16 8508
+c 10 8517
+c 10 8532
+g 14 8545
+c 16 8554
+a 20 8566
+c 27 8575
+a 27 8587
+t 24 8598
+t 22 8611
+t 21 8622
+c 27 8633
+t 21 8645
+t 21 8656
+END_DNA
+
+END_SEQUENCE


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/consed_project/phd_dir/ML4947F.phd.1
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crab.dat.cn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crab.dat.cn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crab.dat.cn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+ 13 sequences
+1 A-salina
+2 C-vittat
+3 C-sp.
+4 L-aequit
+5 P-camtsc
+6 E-tenuim
+7 L-splend
+8 P-bernha
+9 P-acadia
+10 P-p(NE)
+11 P-p(GU)
+12 P-l(NE)
+13 P-l(GU)
+ 14 and   2        0.097855
+ 14 and   3        0.097855
+ 15 and  20        0.063651
+ 15 and  14        0.013271
+ 15 and   1        0.155362
+ 16 and  10        0.008043
+ 16 and  11        0.008043
+ 17 and  12        0.001340
+ 17 and  13        0.001340
+ 18 and  16        0.010389
+ 18 and  17        0.017091
+ 19 and   4        0.006702
+ 19 and   5        0.006702
+ 20 and  23        0.017147
+ 20 and  18        0.029044
+ 21 and   8        0.002681
+ 21 and   9        0.002681
+ 22 and   6        0.012064
+ 22 and  21        0.009383
+ 23 and  24        0.005306
+ 23 and  19        0.023626
+ 24 and  22        0.012958
+ 24 and   7        0.025022
+
+file:crab.dat  constant rate for NJ (negative branches allowed)
+ p-distance was used.
+Number of nucleotide sites compared 373 (nsite=421)
+outgroup:  1 A-salina

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crab.nj
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crab.nj	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crab.nj	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+ 13 sequences
+1 A-salina
+2 C-vittat
+3 C-sp.
+4 L-aequit
+5 P-camtsc
+6 E-tenuim
+7 L-splend
+8 P-bernha
+9 P-acadia
+10 P-p(NE)
+11 P-p(GU)
+12 P-l(NE)
+13 P-l(GU)
+ 14 and   2        0.087619
+ 14 and   3        0.108092
+ 15 and   1        0.155362
+ 15 and  14        0.020241
+ 16 and  10        0.011208
+ 16 and  11        0.004878
+ 17 and  12        0.002136
+ 17 and  13        0.000545
+ 18 and  16        0.016086
+ 18 and  17        0.011394
+ 19 and   4        0.006423
+ 19 and   5        0.006982
+ 20 and  15        0.056669
+ 20 and  18        0.028117
+ 21 and   8        0.003267
+ 21 and   9        0.002095
+ 22 and   6        0.011003
+ 22 and  21        0.010445
+ 23 and  20        0.018557
+ 23 and  19        0.020149
+ 24 and  22        0.012944
+ 24 and   7        0.024589
+ 24 and  23        0.007331
+
+file:crab.dat   NJ tree   p-distance was used.
+Number of nucleotide sites compared 373 (nsite=421)
+seed=1850

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crab.njb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crab.njb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crab.njb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+   13 sequences     1000 bootstraping
+1 A-salina
+2 C-vittat
+3 C-sp.
+4 L-aequit
+5 P-camtsc
+6 E-tenuim
+7 L-splend
+8 P-bernha
+9 P-acadia
+10 P-p(NE)
+11 P-p(GU)
+12 P-l(NE)
+13 P-l(GU)
+ 14 and   2        0.098857      1000
+ 14 and   3        0.127932      1000
+ 15 and   1        0.197471      1000
+ 15 and  14        0.029273       874
+ 16 and  10        0.011732      1000
+ 16 and  11        0.004529      1000
+ 17 and  12        0.002258      1000
+ 17 and  13        0.000428      1000
+ 18 and  16        0.017512      1000
+ 18 and  17        0.010824       998
+ 19 and   4        0.006534      1000
+ 19 and   5        0.006992      1000
+ 20 and  15        0.070461      1000
+ 20 and  18        0.030579       998
+ 21 and   8        0.003339      1000
+ 21 and   9        0.002042      1000
+ 22 and   6        0.011142      1000
+ 22 and  21        0.010693       983
+ 23 and  20        0.020714       996
+ 23 and  19        0.020350      1000
+ 24 and  23        0.008665       826
+ 24 and  22        0.013457       972
+ 24 and   7        0.025598      1000
+
+JC distance was used
+
+Number of nucleotide sites compared 373 (nsite=421)
+seed=27165 ninap=0

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-0
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-0	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-0	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+
+seq1 = cn416, 630 bp
+seq2 = Contig147.fa (>Contig147), 1086 bp
+
+36-132  (191-286)   89% <-
+133-191  (343-401)   93%

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+
+seq1 = cn416, 630 bp
+seq2 = Contig147.fa (>Contig147), 1086 bp
+
+
+36-132  (191-286)   89% <-
+133-191  (343-401)   93%
+
+      0     .    :    .    :    .    :    .    :    .    :
+     36 AAAGCCATGCAATGCATCCCATGATACCAAAGATAAAATATCCAAAAATC
+        |||||||||||||||||||-||||||||||||| || |||||||||||||
+    191 AAAGCCATGCAATGCATCC ATGATACCAAAGACAATATATCCAAAAATC
+
+     50     .    :    .    :    .    :    .    :    .    :
+     86 CATGACCGATCCTGATGCAATAACCAAGAGACCGTTTATCCCGGGAA   
+         | ||  |||| | ||||||||||||||||||| |||||||||||||<<<
+    240 TAAGATTGATCATCATGCAATAACCAAGAGACCCTTTATCCCGGGAACTA
+
+    100     .    :    .    :    .    :    .    :    .    :
+    133       TGCCACAACGTAGTCCTTCTGTACATCCTGAACATTTCCATCAT
+        ...<<<|||||||||||||||||| |||||||||||||||||||||||||
+    290 ...CACTGCCACAACGTAGTCCTTTTGTACATCCTGAACATTTCCATCAT
+
+    150     .    :    .
+    177 CATCAACGAA AAGTA
+        |||| |||||-||-||
+    387 CATCTACGAACAA TA
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-4
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-4	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/crypto.sim4-4	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,30 @@
+
+seq1 = cn416, 630 bp
+seq2 = Contig147.fa (>Contig147), 1086 bp
+
+>cn416 lastmodified=[2002-10-29 11:40:27] project=[B3501] length=[630] clusterid=[371] coverage=[1] crawid=[1] sequencecount=[2] longestsequence=[k9k11j2.f1] sequences=[k9k11j2.f1|k9k11j2.f1C] clone=[|k9k11j2] 
+>Contig147
+
+36-132  (191-286)   89% <-
+133-191  (343-401)   93%
+
+      0     .    :    .    :    .    :    .    :    .    :
+     36 AAAGCCATGCAATGCATCCCATGATACCAAAGATAAAATATCCAAAAATC
+        |||||||||||||||||||-||||||||||||| || |||||||||||||
+    191 AAAGCCATGCAATGCATCC ATGATACCAAAGACAATATATCCAAAAATC
+
+     50     .    :    .    :    .    :    .    :    .    :
+     86 CATGACCGATCCTGATGCAATAACCAAGAGACCGTTTATCCCGGGAA   
+         | ||  |||| | ||||||||||||||||||| |||||||||||||<<<
+    240 TAAGATTGATCATCATGCAATAACCAAGAGACCCTTTATCCCGGGAACTA
+
+    100     .    :    .    :    .    :    .    :    .    :
+    133       TGCCACAACGTAGTCCTTCTGTACATCCTGAACATTTCCATCAT
+        ...<<<|||||||||||||||||| |||||||||||||||||||||||||
+    290 ...CACTGCCACAACGTAGTCCTTTTGTACATCCTGAACATTTCCATCAT
+
+    150     .    :    .
+    177 CATCAACGAA AAGTA
+        |||| |||||-||-||
+    387 CATCTACGAACAA TA
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ctgdemo.fpc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ctgdemo.fpc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ctgdemo.fpc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7687 @@
+// fpc project v7demo
+// 7.2  Date: 18:13 Fri 09 Apr 2004  User: cari
+// Contigs 0  Clones 618  Markers 150  Bands 2028946
+// Framework Demo Label Chromosome Abbrev Chr Genome 50 AvgBand 4096  AvgInsert 150000
+// Configure 429 Tol 7 Cut 1e-10 Apx 0.100 Gel 3300 Min 0 End 15 Kill -1 Bad 15 Best 10 Log 0 Std 1 Page 3000 Match 2
+// CpM Off 50 1 0 0 TBL 1 1e-09 2 1e-08 3 1e-07
+// Build 1/1/70 0:0 Cut 1e-10 Off 50 1 0 0 TBL 1 1e-09 2 1e-08 3 1e-07 
+// Clip(0 4600) MinMax(0 65535)  DQer(5,1)
+  
+Clone : "a0001B07"
+Map "ctg1" Ends Left 457.000
+Map "ctg1" Ends Right 485.000 Oldctg 2
+Fp_number "01B07"
+Gel_number    100131
+Bands  836 29
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0001G11"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 507.000 Oldctg 2
+Fp_number "01G11"
+Gel_number    100112
+Bands  4102 24
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 27 16 50 
+  
+Clone : "a0001I05"
+Map "ctg1" Ends Left 548.000
+Map "ctg1" Ends Right 579.000 Oldctg 2
+Fp_number "01I05"
+Gel_number    100113
+Bands  5201 32
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0001J19"
+Map "ctg1" Ends Left 162.000
+Map "ctg1" Ends Right 199.000 Oldctg 2
+Fp_number "01J19"
+Gel_number    100133
+Bands  6171 38
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0001M01"
+Map "ctg1" Ends Left 46.000
+Map "ctg1" Ends Right 82.000 Oldctg 2
+Fp_number "01M01"
+Gel_number    100114
+Bands  7729 37
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0001O24"
+Map "ctg1" Ends Left 493.000
+Map "ctg1" Ends Right 515.000 Oldctg 2
+Fp_number "01O24"
+Gel_number    100124
+Bands  9590 23
+Approximate_match_to_cosmid "a0014I02"
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0002G06"
+Map "ctg1" Ends Left 553.000
+Map "ctg1" Ends Right 586.000 Oldctg 2
+Fp_number "02G06"
+Gel_number    100222
+Bands  14248 34
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0002I06"
+Map "ctg1" Ends Left 579.000
+Map "ctg1" Ends Right 607.000 Oldctg 2
+Fp_number "02I06"
+Gel_number    100223
+Bands  15562 29
+Approximate_match_to_cosmid "a0037I23"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000321_15" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0002J07"
+Map "ctg1" Ends Left 378.000
+Map "ctg1" Ends Right 417.000 Oldctg 2
+Fp_number "02J07"
+Gel_number    100233
+Bands  16169 39
+Pseudo_match_to_cosmid "a0023C14"
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0002N24"
+Map "ctg1" Ends Left 205.000
+Map "ctg1" Ends Right 237.000 Oldctg 2
+Fp_number "02N24"
+Gel_number    100244
+Bands  19147 33
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0002O22"
+Map "ctg1" Ends Left 84.000
+Map "ctg1" Ends Right 113.000 Oldctg 2
+Fp_number "02O22"
+Gel_number    100224
+Bands  19717 30
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0003C15"
+Map "ctg1" Ends Left 516.000
+Map "ctg1" Ends Right 548.000 Oldctg 2
+Fp_number "03C15"
+Gel_number    100311
+Bands  21818 33
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0003I09"
+Map "ctg1" Ends Left 260.000
+Map "ctg1" Ends Right 289.000 Oldctg 2
+Fp_number "03I09"
+Gel_number    100313
+Bands  25078 30
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0003K20"
+Map "ctg1" Ends Left 271.000
+Map "ctg1" Ends Right 300.000 Oldctg 2
+Fp_number "03K20"
+Gel_number    100323
+Bands  26667 30
+Positive_STS "OJ000317_34" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0004D12"
+Map "ctg1" Ends Left 110.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "04D12"
+Gel_number    100441
+Bands  31925 36
+Approximate_match_to_cosmid "a0038D13"
+Positive_OVERGO "SOG1055" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0004G07"
+Map "ctg1" Ends Left 181.000
+Map "ctg1" Ends Right 219.000 Oldctg 2
+Fp_number "04G07"
+Gel_number    100412
+Bands  33802 39
+Positive_STS "OJ000207_10" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0004I02"
+Map "ctg1" Ends Left 227.000
+Map "ctg1" Ends Right 259.000 Oldctg 2
+Fp_number "04I02"
+Gel_number    100423
+Bands  35015 33
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0004L12"
+Map "ctg1" Ends Left 518.000
+Map "ctg1" Ends Right 546.000 Oldctg 2
+Fp_number "04L12"
+Gel_number    100443
+Bands  37433 29
+Approximate_match_to_cosmid "a0003C15"
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0005D14"
+Map "ctg1" Ends Left 48.000
+Map "ctg1" Ends Right 84.000 Oldctg 2
+Fp_number "05D14"
+Gel_number    100541
+Bands  42236 37
+Positive_STS "42D18r" 
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0005K03"
+Map "ctg1" Ends Left 108.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "05K03"
+Gel_number    100513
+Bands  46610 38
+Approximate_match_to_cosmid "a0038D13"
+Positive_STS "C1679" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0005L07"
+Map "ctg1" Ends Left 447.000
+Map "ctg1" Ends Right 481.000 Oldctg 2
+Fp_number "05L07"
+Gel_number    100533
+Bands  47296 35
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990923_08" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Positive_Probe "CSU469" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0006B21"
+Map "ctg1" Ends Left 589.000
+Map "ctg1" Ends Right 614.000 Oldctg 2
+Fp_number "06B21"
+Gel_number    100631
+Bands  51617 26
+Approximate_match_to_cosmid "a0041C20"
+Positive_STS "chlor" 
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0006G06"
+Map "ctg1" Ends Left 569.000
+Map "ctg1" Ends Right 600.000 Oldctg 2
+Fp_number "06G06"
+Gel_number    100622
+Bands  54655 32
+Positive_STS "C953" 
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0006I10"
+Map "ctg1" Ends Left 163.000
+Map "ctg1" Ends Right 179.000 Oldctg 2
+Fp_number "06I10"
+Gel_number    100623
+Bands  56103 17
+Exact_match_to_cosmid "a0021N13"
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0006N03"
+Map "ctg1" Ends Left 473.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "06N03"
+Gel_number    100634
+Bands  59123 29
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1346" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0007A20"
+Map "ctg1" Ends Left 371.000
+Map "ctg1" Ends Right 400.000 Oldctg 2
+Fp_number "07A20"
+Gel_number    100721
+Bands  61489 30
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0009A16"
+Map "ctg1" Ends Left 378.000
+Map "ctg1" Ends Right 416.000 Oldctg 2
+Fp_number "09A16"
+Gel_number    100921
+Bands  81626 38
+Pseudo_match_to_cosmid "a0025E13"
+Creation_date 99 11 12 9 2 
+Modified_date 103 2 5 13 55 
+  
+Clone : "a0009E22"
+Map "ctg1" Ends Left 173.000
+Map "ctg1" Ends Right 210.000 Oldctg 2
+Fp_number "09E22"
+Gel_number    100922
+Bands  84566 38
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0009F12"
+Map "ctg1" Ends Left 10.000
+Map "ctg1" Ends Right 40.000 Oldctg 2
+Fp_number "09F12"
+Gel_number    100942
+Bands  84962 31
+Approximate_match_to_cosmid "a0070F12"
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0009K24"
+Map "ctg1" Ends Left 297.000
+Map "ctg1" Ends Right 320.000 Oldctg 2
+Fp_number "09K24"
+Gel_number    100923
+Bands  88864 24
+Positive_STS "OJ990503_27" 
+Positive_STS "OJ991021_17" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0010G19"
+Map "ctg1" Ends Left 0.000
+Map "ctg1" Ends Right 32.000 Oldctg 2
+Fp_number "10G19"
+Gel_number    101012
+Bands  96669 33
+Shotgun Full_X SHOTGUN
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0010H08"
+Map "ctg1" Ends Left 245.000
+Map "ctg1" Ends Right 273.000 Oldctg 2
+Fp_number "10H08"
+Gel_number    101042
+Bands  97048 29
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0010M06"
+Map "ctg1" Ends Left 568.000
+Map "ctg1" Ends Right 600.000 Oldctg 2
+Fp_number "10M06"
+Gel_number    101024
+Bands  100422 33
+Positive_STS "C953" 
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0010N22"
+Map "ctg1" Ends Left 106.000
+Map "ctg1" Ends Right 141.000 Oldctg 2
+Fp_number "10N22"
+Gel_number    101044
+Bands  101569 36
+Approximate_match_to_cosmid "a0038D13"
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0011A23"
+Map "ctg1" Ends Left 163.000
+Map "ctg1" Ends Right 198.000 Oldctg 2
+Fp_number "11A23"
+Gel_number    101111
+Bands  103591 36
+Creation_date 99 11 12 9 2 
+Modified_date 99 11 12 14 45 
+  
+Clone : "a0011L22"
+Map "ctg1" Ends Left 156.000
+Map "ctg1" Ends Right 190.000 Oldctg 2
+Fp_number "11L22"
+Gel_number    101143
+Bands  110824 35
+Positive_STS "G107" 
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0011M14"
+Map "ctg1" Ends Left 152.000
+Map "ctg1" Ends Right 178.000 Oldctg 2
+Fp_number "11M14"
+Gel_number    101124
+Bands  111251 27
+Approximate_match_to_cosmid "a0011L22"
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0012B02"
+Map "ctg1" Ends Left 42.000
+Map "ctg1" Ends Right 79.000 Oldctg 2
+Fp_number "12B02"
+Gel_number    101241
+Bands  113921 38
+Creation_date 99 11 12 9 2 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0012O15"
+Map "ctg1" Ends Left 209.000
+Map "ctg1" Ends Right 235.000 Oldctg 2
+Fp_number "12O15"
+Gel_number    101214
+Bands  122792 27
+Approximate_match_to_cosmid "a0002N24"
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0013N10"
+Map "ctg1" Ends Left 115.000
+Map "ctg1" Ends Right 144.000 Oldctg 2
+Fp_number "13N10"
+Gel_number    101344
+Bands  131584 30
+Exact_match_to_cosmid "a0038D13"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0013P03"
+Map "ctg1" Ends Left 217.000
+Map "ctg1" Ends Right 244.000 Oldctg 2
+Fp_number "13P03"
+Gel_number    101334
+Bands  132571 28
+Approximate_match_to_cosmid "a0022E11"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0014I02"
+Map "ctg1" Ends Left 493.000
+Map "ctg1" Ends Right 515.000 Oldctg 2
+Fp_number "14I02"
+Gel_number    101423
+Bands  137990 23
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0014N06"
+Map "ctg1" Ends Left 122.000
+Map "ctg1" Ends Right 148.000 Oldctg 2
+Fp_number "14N06"
+Gel_number    101444
+Bands  141136 27
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0015D20"
+Map "ctg1" Ends Left 5.000
+Map "ctg1" Ends Right 35.000 Oldctg 2
+Fp_number "15D20"
+Gel_number    101541
+Bands  144994 31
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0015J18"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 33.000 Oldctg 2
+Fp_number "15J18"
+Gel_number    101543
+Bands  148482 31
+Approximate_match_to_cosmid "a0015D20"
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0015K07"
+Map "ctg1" Ends Left 335.000
+Map "ctg1" Ends Right 368.000 Oldctg 2
+Fp_number "15K07"
+Gel_number    101513
+Bands  148811 34
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0015N05"
+Map "ctg1" Ends Left 8.000
+Map "ctg1" Ends Right 35.000 Oldctg 2
+Fp_number "15N05"
+Gel_number    101534
+Bands  150464 28
+Approximate_match_to_cosmid "a0020E12"
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG1204" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0016B10"
+Map "ctg1" Ends Left 396.000
+Map "ctg1" Ends Right 430.000 Oldctg 2
+Fp_number "16B10"
+Gel_number    101641
+Bands  153073 35
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0016J14"
+Map "ctg1" Ends Left 546.000
+Map "ctg1" Ends Right 571.000 Oldctg 2
+Fp_number "16J14"
+Gel_number    101643
+Bands  158373 26
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0017B08"
+Map "ctg1" Ends Left 227.000
+Map "ctg1" Ends Right 240.000 Oldctg 2
+Fp_number "17B08"
+Gel_number    101741
+Bands  163070 14
+Exact_match_to_cosmid "a0004I02"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0017C04"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 506.000 Oldctg 2
+Fp_number "17C04"
+Gel_number    101721
+Bands  163629 21
+Approximate_match_to_cosmid "a0096O08"
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0017J17"
+Map "ctg1" Ends Left 116.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "17J17"
+Gel_number    101733
+Bands  168664 30
+Approximate_match_to_cosmid "a0038D13"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0980" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0017K20"
+Map "ctg1" Ends Left 502.000
+Map "ctg1" Ends Right 531.000 Oldctg 2
+Fp_number "17K20"
+Gel_number    101723
+Bands  169460 30
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0018B18"
+Map "ctg1" Ends Left 107.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "18B18"
+Gel_number    101841
+Bands  174269 39
+Creation_date 99 11 12 9 2 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0018B22"
+Map "ctg1" Ends Left 99.000
+Map "ctg1" Ends Right 143.000 Oldctg 2
+Fp_number "18B22"
+Gel_number    101841
+Bands  174398 45
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0018K01"
+Map "ctg1" Ends Left 452.000
+Map "ctg1" Ends Right 474.000 Oldctg 2
+Fp_number "18K01"
+Gel_number    101813
+Bands  180202 23
+Approximate_match_to_cosmid "a0005L07"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990923_08" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Positive_Probe "CSU469" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0019D09"
+Map "ctg1" Ends Left 230.000
+Map "ctg1" Ends Right 261.000 Oldctg 2
+Fp_number "19D09"
+Gel_number    101931
+Bands  186844 32
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0019K24"
+Map "ctg1" Ends Left 492.000
+Map "ctg1" Ends Right 512.000 Oldctg 2
+Fp_number "19K24"
+Gel_number    101923
+Bands  192161 21
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0019N07"
+Map "ctg1" Ends Left 337.000
+Map "ctg1" Ends Right 368.000 Oldctg 2
+Fp_number "19N07"
+Gel_number    101934
+Bands  193716 32
+Approximate_match_to_cosmid "a0015K07"
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0020E12"
+Map "ctg1" Ends Left 6.000
+Map "ctg1" Ends Right 36.000 Oldctg 2
+Fp_number "20E12"
+Gel_number    102022
+Bands  198227 31
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0020G09"
+Map "ctg1" Ends Left 523.000
+Map "ctg1" Ends Right 548.000 Oldctg 2
+Fp_number "20G09"
+Gel_number    102012
+Bands  199480 26
+Exact_match_to_cosmid "a0003C15"
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0021G13"
+Map "ctg1" Ends Left 492.000
+Map "ctg1" Ends Right 510.000 Oldctg 2
+Fp_number "21G13"
+Gel_number    102112
+Bands  210084 19
+Approximate_match_to_cosmid "a0019K24"
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0021N13"
+Map "ctg1" Ends Left 152.000
+Map "ctg1" Ends Right 184.000 Oldctg 2
+Fp_number "21N13"
+Gel_number    102134
+Bands  214583 33
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0021O04"
+Map "ctg1" Ends Left 315.000
+Map "ctg1" Ends Right 343.000 Oldctg 2
+Fp_number "21O04"
+Gel_number    102124
+Bands  214946 29
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0022E11"
+Map "ctg1" Ends Left 209.000
+Map "ctg1" Ends Right 245.000 Oldctg 2
+Fp_number "22E11"
+Gel_number    102212
+Bands  219203 37
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0023C14"
+Map "ctg1" Ends Left 394.000
+Map "ctg1" Ends Right 422.000 Oldctg 2
+Fp_number "23C14"
+Gel_number    102321
+Bands  228846 29
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0023I04"
+Map "ctg1" Ends Left 14.000
+Map "ctg1" Ends Right 44.000 Oldctg 2
+Fp_number "23I04"
+Gel_number    102323
+Bands  232493 31
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 55 
+  
+Clone : "a0023L13"
+Map "ctg1" Ends Left 80.000
+Map "ctg1" Ends Right 93.000 Oldctg 2
+Fp_number "23L13"
+Gel_number    102333
+Bands  234671 14
+Creation_date 99 11 12 9 2 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0023L15"
+Map "ctg1" Ends Left 320.000
+Map "ctg1" Ends Right 355.000 Oldctg 2
+Fp_number "23L15"
+Gel_number    102333
+Bands  234718 36
+Creation_date 99 11 12 9 2 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0023M15"
+Map "ctg1" Ends Left 250.000
+Map "ctg1" Ends Right 275.000 Oldctg 2
+Fp_number "23M15"
+Gel_number    102314
+Bands  235396 26
+Positive_STS "OJ000317_34" 
+Positive_OVERGO "SOG1450" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0024D10"
+Map "ctg1" Ends Left 348.000
+Map "ctg1" Ends Right 384.000 Oldctg 2
+Fp_number "24D10"
+Gel_number    102441
+Bands  239966 37
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 2 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0024D14"
+Map "ctg1" Ends Left 45.000
+Map "ctg1" Ends Right 80.000 Oldctg 2
+Fp_number "24D14"
+Gel_number    102441
+Bands  240080 36
+Positive_STS "42D18r" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0024L22"
+Map "ctg1" Ends Left 474.000
+Map "ctg1" Ends Right 495.000 Oldctg 2
+Fp_number "24L22"
+Gel_number    102443
+Bands  245679 22
+Approximate_match_to_cosmid "a0040L18"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0024M12"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 33.000 Oldctg 2
+Fp_number "24M12"
+Gel_number    102424
+Bands  246050 31
+Approximate_match_to_cosmid "a0010G19"
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0025A11"
+Map "ctg1" Ends Left 208.000
+Map "ctg1" Ends Right 240.000 Oldctg 2
+Fp_number "25A11"
+Gel_number    102511
+Bands  248526 33
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0025B18"
+Map "ctg1" Ends Left 265.000
+Map "ctg1" Ends Right 279.000 Oldctg 2
+Fp_number "25B18"
+Gel_number    102541
+Bands  249366 15
+Exact_match_to_cosmid "a0003I09"
+Positive_STS "OJ000317_34" 
+Positive_OVERGO "SOG1450" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0025E13"
+Map "ctg1" Ends Left 389.000
+Map "ctg1" Ends Right 425.000 Oldctg 2
+Fp_number "25E13"
+Gel_number    102512
+Bands  251114 37
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0025O08"
+Map "ctg1" Ends Left 614.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Fp_number "25O08"
+Gel_number    102524
+Bands  257565 30
+Positive_STS "chlor" 
+Creation_date 99 11 12 9 2 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0025O10"
+Map "ctg1" Ends Left 69.000
+Map "ctg1" Ends Right 102.000 Oldctg 2
+Fp_number "25O10"
+Gel_number    102524
+Bands  257625 34
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 9 2 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0026E03"
+Map "ctg1" Ends Left 137.000
+Map "ctg1" Ends Right 160.000 Oldctg 2
+Fp_number "26E03"
+Gel_number    102612
+Bands  261382 24
+Approximate_match_to_cosmid "a0032C10"
+Positive_STS "PIB1" 
+Creation_date 99 11 12 9 3 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0026N04"
+Map "ctg1" Ends Left 394.000
+Map "ctg1" Ends Right 425.000 Oldctg 2
+Fp_number "26N04"
+Gel_number    102644
+Bands  267367 31
+Pseudo_match_to_cosmid "a0023C14"
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0026P24"
+Map "ctg1" Ends Left 213.000
+Map "ctg1" Ends Right 240.000 Oldctg 2
+Fp_number "26P24"
+Gel_number    102644
+Bands  269209 28
+Approximate_match_to_cosmid "a0022E11"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0027M09"
+Map "ctg1" Ends Left 396.000
+Map "ctg1" Ends Right 431.000 Oldctg 2
+Fp_number "27M09"
+Gel_number    102714
+Bands  277492 36
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0028A11"
+Map "ctg1" Ends Left 569.000
+Map "ctg1" Ends Right 598.000 Oldctg 2
+Fp_number "28A11"
+Gel_number    102811
+Bands  280452 30
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0028D16"
+Map "ctg1" Ends Left 389.000
+Map "ctg1" Ends Right 427.000 Oldctg 2
+Fp_number "28D16"
+Gel_number    102841
+Bands  282500 39
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0028F17"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "28F17"
+Gel_number    102832
+Bands  283734 18
+Exact_match_to_cosmid "a0006N03"
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0028G23"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 505.000 Oldctg 2
+Fp_number "28G23"
+Gel_number    102812
+Bands  284513 20
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 56 
+  
+Clone : "a0029L17"
+Map "ctg1" Ends Left 553.000
+Map "ctg1" Ends Right 586.000 Oldctg 2
+Fp_number "29L17"
+Gel_number    102933
+Bands  297482 34
+Approximate_match_to_cosmid "a0053D17"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_OVERGO "SOG0313" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0030A14"
+Map "ctg1" Ends Left 186.000
+Map "ctg1" Ends Right 216.000 Oldctg 2
+Fp_number "30A14"
+Gel_number    103021
+Bands  300707 31
+Approximate_match_to_cosmid "a0040B22"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0030P03"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 32.000 Oldctg 2
+Fp_number "30P03"
+Gel_number    103034
+Bands  309539 30
+Positive_STS "10E20fA" 
+Creation_date 99 11 12 9 3 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0032A01"
+Map "ctg1" Ends Left 49.000
+Map "ctg1" Ends Right 78.000 Oldctg 2
+Fp_number "32A01"
+Gel_number    103211
+Bands  320804 30
+Exact_match_to_cosmid "a0001M01"
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0032C02"
+Map "ctg1" Ends Left 29.000
+Map "ctg1" Ends Right 70.000 Oldctg 2
+Fp_number "32C02"
+Gel_number    103221
+Bands  322181 42
+Creation_date 99 11 12 9 3 
+Modified_date 101 1 5 17 55 
+  
+Clone : "a0032C10"
+Map "ctg1" Ends Left 126.000
+Map "ctg1" Ends Right 161.000 Oldctg 2
+Fp_number "32C10"
+Gel_number    103221
+Bands  322444 36
+Positive_STS "PIB1" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0032J18"
+Map "ctg1" Ends Left 348.000
+Map "ctg1" Ends Right 384.000 Oldctg 2
+Fp_number "32J18"
+Gel_number    103243
+Bands  327168 37
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0033A10"
+Map "ctg1" Ends Left 88.000
+Map "ctg1" Ends Right 118.000 Oldctg 2
+Fp_number "33A10"
+Gel_number    103321
+Bands  331591 31
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0033H13"
+Map "ctg1" Ends Left 485.000
+Map "ctg1" Ends Right 506.000 Oldctg 2
+Fp_number "33H13"
+Gel_number    103332
+Bands  335911 22
+Exact_match_to_cosmid "a0001G11"
+Positive_STS "10E20fA" 
+Creation_date 99 11 12 9 3 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0034C08"
+Map "ctg1" Ends Left 355.000
+Map "ctg1" Ends Right 392.000 Oldctg 2
+Fp_number "34C08"
+Gel_number    103421
+Bands  342512 38
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0034D15"
+Map "ctg1" Ends Left 588.000
+Map "ctg1" Ends Right 602.000 Oldctg 2
+Fp_number "34D15"
+Gel_number    103431
+Bands  343338 15
+Exact_match_to_cosmid "a0091B10"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0034I17"
+Map "ctg1" Ends Left 317.000
+Map "ctg1" Ends Right 339.000 Oldctg 2
+Fp_number "34I17"
+Gel_number    103413
+Bands  346746 23
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 56 
+  
+Clone : "a0034N02"
+Map "ctg1" Ends Left 40.000
+Map "ctg1" Ends Right 81.000 Oldctg 2
+Fp_number "34N02"
+Gel_number    103444
+Bands  349688 42
+Creation_date 99 11 12 9 3 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0034P17"
+Map "ctg1" Ends Left 483.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "34P17"
+Gel_number    103434
+Bands  351379 22
+Approximate_match_to_cosmid "b0001O16"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0035D22"
+Map "ctg1" Ends Left 549.000
+Map "ctg1" Ends Right 578.000 Oldctg 2
+Fp_number "35D22"
+Gel_number    103541
+Bands  354048 30
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0035O09"
+Map "ctg1" Ends Left 597.000
+Map "ctg1" Ends Right 635.000 Oldctg 2
+Fp_number "35O09"
+Gel_number    103514
+Bands  361213 39
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0036C08"
+Map "ctg1" Ends Left 245.000
+Map "ctg1" Ends Right 274.000 Oldctg 2
+Fp_number "36C08"
+Gel_number    103621
+Bands  363882 30
+Approximate_match_to_cosmid "a0083M16"
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0036K03"
+Map "ctg1" Ends Left 131.000
+Map "ctg1" Ends Right 153.000 Oldctg 2
+Fp_number "36K03"
+Gel_number    103613
+Bands  368992 23
+Approximate_match_to_cosmid "a0032C10"
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0037C05"
+Map "ctg1" Ends Left 220.000
+Map "ctg1" Ends Right 250.000 Oldctg 2
+Fp_number "37C05"
+Gel_number    103711
+Bands  374413 31
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0037G06"
+Map "ctg1" Ends Left 594.000
+Map "ctg1" Ends Right 625.000 Oldctg 2
+Fp_number "37G06"
+Gel_number    103722
+Bands  376973 32
+Approximate_match_to_cosmid "a0091H17"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0037I23"
+Map "ctg1" Ends Left 578.000
+Map "ctg1" Ends Right 614.000 Oldctg 2
+Fp_number "37I23"
+Gel_number    103713
+Bands  378650 37
+Positive_OVERGO "SOG0279" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0038D13"
+Map "ctg1" Ends Left 106.000
+Map "ctg1" Ends Right 147.000 Oldctg 2
+Fp_number "38D13"
+Gel_number    103831
+Bands  385536 42
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0038H05"
+Map "ctg1" Ends Left 493.000
+Map "ctg1" Ends Right 511.000 Oldctg 2
+Fp_number "38H05"
+Gel_number    103832
+Bands  387956 19
+Exact_match_to_cosmid "a0019K24"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0038P04"
+Map "ctg1" Ends Left 123.000
+Map "ctg1" Ends Right 151.000 Oldctg 2
+Fp_number "38P04"
+Gel_number    103844
+Bands  393147 29
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0039B13"
+Map "ctg1" Ends Left 136.000
+Map "ctg1" Ends Right 163.000 Oldctg 2
+Fp_number "39B13"
+Gel_number    103931
+Bands  394642 28
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0039K03"
+Map "ctg1" Ends Left 404.000
+Map "ctg1" Ends Right 440.000 Oldctg 2
+Fp_number "39K03"
+Gel_number    103913
+Bands  400166 37
+Positive_STS "10E20fA" 
+Positive_STS "C51175" 
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0039N09"
+Map "ctg1" Ends Left 592.000
+Map "ctg1" Ends Right 614.000 Oldctg 2
+Fp_number "39N09"
+Gel_number    103934
+Bands  402278 23
+Pseudo_match_to_cosmid "a0094G11"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0040B22"
+Map "ctg1" Ends Left 178.000
+Map "ctg1" Ends Right 221.000 Oldctg 2
+Fp_number "40B22"
+Gel_number    104041
+Bands  405224 44
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0040H03"
+Map "ctg1" Ends Left 220.000
+Map "ctg1" Ends Right 244.000 Oldctg 2
+Fp_number "40H03"
+Gel_number    104032
+Bands  408862 25
+Approximate_match_to_cosmid "a0022E11"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0040J06"
+Map "ctg1" Ends Left 415.000
+Map "ctg1" Ends Right 447.000 Oldctg 2
+Fp_number "40J06"
+Gel_number    104043
+Bands  410276 33
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0040L18"
+Map "ctg1" Ends Left 470.000
+Map "ctg1" Ends Right 498.000 Oldctg 2
+Fp_number "40L18"
+Gel_number    104043
+Bands  412090 29
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0041C20"
+Map "ctg1" Ends Left 578.000
+Map "ctg1" Ends Right 616.000 Oldctg 2
+Fp_number "41C20"
+Gel_number    104121
+Bands  417019 39
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_OVERGO "SOG0279" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0042G06"
+Map "ctg1" Ends Left 485.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "42G06"
+Gel_number    104222
+Bands  430096 17
+Exact_match_to_cosmid "a0006N03"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0042K01"
+Map "ctg1" Ends Left 163.000
+Map "ctg1" Ends Right 180.000 Oldctg 2
+Fp_number "42K01"
+Gel_number    104213
+Bands  432631 18
+Exact_match_to_cosmid "a0001J19"
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0043C05"
+Map "ctg1" Ends Left 115.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "43C05"
+Gel_number    104311
+Bands  438364 31
+Approximate_match_to_cosmid "a0038D13"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0043F13"
+Map "ctg1" Ends Left 5.000
+Map "ctg1" Ends Right 34.000 Oldctg 2
+Fp_number "43F13"
+Gel_number    104332
+Bands  440357 30
+Approximate_match_to_cosmid "a0070F12"
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0043O12"
+Map "ctg1" Ends Left 431.000
+Map "ctg1" Ends Right 466.000 Oldctg 2
+Fp_number "43O12"
+Gel_number    104324
+Bands  445832 36
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0044D22"
+Map "ctg1" Ends Left 394.000
+Map "ctg1" Ends Right 425.000 Oldctg 2
+Fp_number "44D22"
+Gel_number    104441
+Bands  449244 32
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0044F13"
+Map "ctg1" Ends Left 40.000
+Map "ctg1" Ends Right 77.000 Oldctg 2
+Fp_number "44F13"
+Gel_number    104432
+Bands  450453 38
+Positive_STS "42D18r" 
+Creation_date 99 11 12 9 3 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0045F08"
+Map "ctg1" Ends Left 483.000
+Map "ctg1" Ends Right 506.000 Oldctg 2
+Fp_number "45F08"
+Gel_number    104542
+Bands  461042 24
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0045G04"
+Map "ctg1" Ends Left 141.000
+Map "ctg1" Ends Right 162.000 Oldctg 2
+Fp_number "45G04"
+Gel_number    104522
+Bands  461583 22
+Creation_date 99 11 12 9 3 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0045M07"
+Map "ctg1" Ends Left 227.000
+Map "ctg1" Ends Right 250.000 Oldctg 2
+Fp_number "45M07"
+Gel_number    104514
+Bands  465482 24
+Exact_match_to_cosmid "a0004I02"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0045P15"
+Map "ctg1" Ends Left 476.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "45P15"
+Gel_number    104534
+Bands  467587 26
+Exact_match_to_cosmid "a0006N03"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0046B03"
+Map "ctg1" Ends Left 259.000
+Map "ctg1" Ends Right 286.000 Oldctg 2
+Fp_number "46B03"
+Gel_number    104631
+Bands  468434 28
+Exact_match_to_cosmid "a0048L02"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0046C18"
+Map "ctg1" Ends Left 493.000
+Map "ctg1" Ends Right 511.000 Oldctg 2
+Fp_number "46C18"
+Gel_number    104621
+Bands  469569 19
+Exact_match_to_cosmid "a0019K24"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1745" 
+Positive_OVERGO "SOG2238" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0046K10"
+Map "ctg1" Ends Left 167.000
+Map "ctg1" Ends Right 199.000 Oldctg 2
+Fp_number "46K10"
+Gel_number    104623
+Bands  474577 33
+Creation_date 99 11 12 9 3 
+Modified_date 99 11 12 14 45 
+  
+Clone : "a0047E16"
+Map "ctg1" Ends Left 476.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "47E16"
+Gel_number    104722
+Bands  481492 26
+Approximate_match_to_cosmid "a0006N03"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0047M08"
+Map "ctg1" Ends Left 299.000
+Map "ctg1" Ends Right 328.000 Oldctg 2
+Fp_number "47M08"
+Gel_number    104724
+Bands  486469 30
+Positive_STS "OJ990503_27" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 47 
+  
+Clone : "a0047O06"
+Map "ctg1" Ends Left 245.000
+Map "ctg1" Ends Right 265.000 Oldctg 2
+Fp_number "47O06"
+Gel_number    104724
+Bands  487629 21
+Positive_STS "R753" 
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0048K04"
+Map "ctg1" Ends Left 34.000
+Map "ctg1" Ends Right 71.000 Oldctg 2
+Fp_number "48K04"
+Gel_number    104823
+Bands  495678 38
+Positive_STS "42D18r" 
+Creation_date 99 11 12 9 3 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0048L02"
+Map "ctg1" Ends Left 258.000
+Map "ctg1" Ends Right 294.000 Oldctg 2
+Fp_number "48L02"
+Gel_number    104843
+Bands  496288 37
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0049A12"
+Map "ctg1" Ends Left 555.000
+Map "ctg1" Ends Right 585.000 Oldctg 2
+Fp_number "0049A12"
+Gel_number    104921
+Bands  499969 31
+Approximate_match_to_cosmid "a0053D17"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0049D09"
+Map "ctg1" Ends Left 491.000
+Map "ctg1" Ends Right 510.000 Oldctg 2
+Fp_number "0049D09"
+Gel_number    104931
+Bands  501959 20
+Exact_match_to_cosmid "a0063D20"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0049E12"
+Map "ctg1" Ends Left 101.000
+Map "ctg1" Ends Right 137.000 Oldctg 2
+Fp_number "0049E12"
+Gel_number    104922
+Bands  502745 37
+Approximate_match_to_cosmid "b0091A24"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 3 
+Modified_date 104 4 9 18 13 
+  
+Clone : "a0049F06"
+Map "ctg1" Ends Left 104.000
+Map "ctg1" Ends Right 143.000 Oldctg 2
+Fp_number "0049F06"
+Gel_number    104942
+Bands  503272 40
+Positive_STS "C1679" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0049H15"
+Map "ctg1" Ends Left 360.000
+Map "ctg1" Ends Right 400.000 Oldctg 2
+Fp_number "0049H15"
+Gel_number    104932
+Bands  504985 41
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0049J04"
+Map "ctg1" Ends Left 407.000
+Map "ctg1" Ends Right 435.000 Oldctg 2
+Fp_number "0049J04"
+Gel_number    104943
+Bands  506062 29
+Approximate_match_to_cosmid "a0050I18"
+Positive_STS "C146" 
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0049O05"
+Map "ctg1" Ends Left 114.000
+Map "ctg1" Ends Right 144.000 Oldctg 2
+Fp_number "0049O05"
+Gel_number    104914
+Bands  509510 31
+Exact_match_to_cosmid "a0051O20"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0050I02"
+Map "ctg1" Ends Left 42.000
+Map "ctg1" Ends Right 81.000 Oldctg 2
+Fp_number "0050I02"
+Gel_number    105023
+Bands  516137 40
+Creation_date 99 11 12 9 3 
+Modified_date 99 11 12 14 28 
+  
+Clone : "a0050I18"
+Map "ctg1" Ends Left 396.000
+Map "ctg1" Ends Right 431.000 Oldctg 2
+Fp_number "0050I18"
+Gel_number    105023
+Bands  516598 36
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0050L04"
+Map "ctg1" Ends Left 111.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "0050L04"
+Gel_number    105043
+Bands  518213 36
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0051E17"
+Map "ctg1" Ends Left 503.000
+Map "ctg1" Ends Right 527.000 Oldctg 2
+Fp_number "0051E17"
+Gel_number    605112
+Bands  524104 25
+Approximate_match_to_cosmid "b0002J03"
+Creation_date 99 11 13 0 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0051F10"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "0051F10"
+Gel_number    105142
+Bands  524513 16
+Approximate_match_to_cosmid "b0081N07"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_Probe "PRC0214" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0051I19"
+Map "ctg1" Ends Left 12.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "0051I19"
+Gel_number    105113
+Bands  526482 35
+Exact_match_to_cosmid "a0060C20"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0051O20"
+Map "ctg1" Ends Left 106.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "0051O20"
+Gel_number    105124
+Bands  530100 40
+Positive_STS "OJ991015_10" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0052E04"
+Map "ctg1" Ends Left 206.000
+Map "ctg1" Ends Right 240.000 Oldctg 2
+Fp_number "0052E04"
+Gel_number    105222
+Bands  533610 35
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0052N06"
+Map "ctg1" Ends Left 206.000
+Map "ctg1" Ends Right 236.000 Oldctg 2
+Fp_number "0052N06"
+Gel_number    105244
+Bands  539950 31
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0053B14"
+Map "ctg1" Ends Left 57.000
+Map "ctg1" Ends Right 91.000 Oldctg 2
+Fp_number "0053B14"
+Gel_number    105341
+Bands  543015 35
+Approximate_match_to_cosmid "a0053J14"
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0053D17"
+Map "ctg1" Ends Left 550.000
+Map "ctg1" Ends Right 585.000 Oldctg 2
+Fp_number "0053D17"
+Gel_number    105331
+Bands  544532 36
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0053F20"
+Map "ctg1" Ends Left 498.000
+Map "ctg1" Ends Right 516.000 Oldctg 2
+Fp_number "0053F20"
+Gel_number    105342
+Bands  546031 19
+Approximate_match_to_cosmid "a0067L02"
+Positive_STS "OJ990820_14" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0053G14"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 505.000 Oldctg 2
+Fp_number "0053G14"
+Gel_number    105322
+Bands  546524 20
+Exact_match_to_cosmid "a0096O08"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0053J14"
+Map "ctg1" Ends Left 54.000
+Map "ctg1" Ends Right 92.000 Oldctg 2
+Fp_number "0053J14"
+Gel_number    105343
+Bands  548609 39
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0054G21"
+Map "ctg1" Ends Left 293.000
+Map "ctg1" Ends Right 317.000 Oldctg 2
+Fp_number "0054G21"
+Gel_number    105412
+Bands  557668 25
+Positive_STS "OJ990503_27" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0054P14"
+Map "ctg1" Ends Left 85.000
+Map "ctg1" Ends Right 119.000 Oldctg 2
+Fp_number "0054P14"
+Gel_number    105444
+Bands  563712 35
+Approximate_match_to_cosmid "a0082D09"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0055N23"
+Map "ctg1" Ends Left 78.000
+Map "ctg1" Ends Right 93.000 Oldctg 2
+Fp_number "0055N23"
+Gel_number    105534
+Bands  574301 16
+Exact_match_to_cosmid "a0091K03"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0056K12"
+Map "ctg1" Ends Left 441.000
+Map "ctg1" Ends Right 473.000 Oldctg 2
+Fp_number "0056K12"
+Gel_number    105623
+Bands  583074 33
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0056M10"
+Map "ctg1" Ends Left 45.000
+Map "ctg1" Ends Right 81.000 Oldctg 2
+Fp_number "0056M10"
+Gel_number    105624
+Bands  584323 37
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0057C14"
+Map "ctg1" Ends Left 569.000
+Map "ctg1" Ends Right 602.000 Oldctg 2
+Fp_number "0057C14"
+Gel_number    105721
+Bands  588475 34
+Approximate_match_to_cosmid "a0074A09"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0057C18"
+Map "ctg1" Ends Left 346.000
+Map "ctg1" Ends Right 375.000 Oldctg 2
+Fp_number "0057C18"
+Gel_number    105721
+Bands  588610 30
+Approximate_match_to_cosmid "a0089D17"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0057K18"
+Map "ctg1" Ends Left 261.000
+Map "ctg1" Ends Right 289.000 Oldctg 2
+Fp_number "0057K18"
+Gel_number    105723
+Bands  594185 29
+Approximate_match_to_cosmid "a0085N03"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0057M22"
+Map "ctg1" Ends Left 408.000
+Map "ctg1" Ends Right 440.000 Oldctg 2
+Fp_number "0057M22"
+Gel_number    105724
+Bands  595649 33
+Approximate_match_to_cosmid "a0087L03"
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0058C01"
+Map "ctg1" Ends Left 308.000
+Map "ctg1" Ends Right 336.000 Oldctg 2
+Fp_number "0058C01"
+Gel_number    105811
+Bands  599096 29
+Positive_STS "OJ990503_27" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0059H02"
+Map "ctg1" Ends Left 402.000
+Map "ctg1" Ends Right 435.000 Oldctg 2
+Fp_number "0059H02"
+Gel_number    105942
+Bands  613169 34
+Approximate_match_to_cosmid "a0050I18"
+Positive_STS "C146" 
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG1200" 
+Positive_Probe "CSU455" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0060C20"
+Map "ctg1" Ends Left 11.000
+Map "ctg1" Ends Right 49.000 Oldctg 2
+Fp_number "0060C20"
+Gel_number    106021
+Bands  620760 39
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0060H05"
+Map "ctg1" Ends Left 315.000
+Map "ctg1" Ends Right 344.000 Oldctg 2
+Fp_number "0060H05"
+Gel_number    106032
+Bands  623976 30
+Positive_STS "OJ991021_17" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0061B08"
+Map "ctg1" Ends Left 494.000
+Map "ctg1" Ends Right 515.000 Oldctg 2
+Fp_number "0061B08"
+Gel_number    106141
+Bands  631128 22
+Approximate_match_to_cosmid "a0063D20"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0061C05"
+Map "ctg1" Ends Left 154.000
+Map "ctg1" Ends Right 196.000 Oldctg 2
+Fp_number "0061C05"
+Gel_number    106111
+Bands  631753 43
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 27 15 58 
+  
+Clone : "a0061G09"
+Map "ctg1" Ends Left 326.000
+Map "ctg1" Ends Right 361.000 Oldctg 2
+Fp_number "0061G09"
+Gel_number    106112
+Bands  634431 36
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0061L15"
+Map "ctg1" Ends Left 415.000
+Map "ctg1" Ends Right 452.000 Oldctg 2
+Fp_number "0061L15"
+Gel_number    106133
+Bands  637716 38
+Positive_STS "C30" 
+Positive_STS "C146" 
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0061M11"
+Map "ctg1" Ends Left 133.000
+Map "ctg1" Ends Right 158.000 Oldctg 2
+Fp_number "0061M11"
+Gel_number    106114
+Bands  638266 26
+Approximate_match_to_cosmid "a0079E17"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0062B16"
+Map "ctg1" Ends Left 103.000
+Map "ctg1" Ends Right 127.000 Oldctg 2
+Fp_number "0062B16"
+Gel_number    106241
+Bands  641789 25
+Exact_match_to_cosmid "a0079O15"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0062P11"
+Map "ctg1" Ends Left 463.000
+Map "ctg1" Ends Right 494.000 Oldctg 2
+Fp_number "0062P11"
+Gel_number    106234
+Bands  650251 32
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 3 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0063A17"
+Map "ctg1" Ends Left 557.000
+Map "ctg1" Ends Right 594.000 Oldctg 2
+Fp_number "0063A17"
+Gel_number    106311
+Bands  650862 38
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 3 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0063D17"
+Map "ctg1" Ends Left 151.000
+Map "ctg1" Ends Right 179.000 Oldctg 2
+Fp_number "0063D17"
+Gel_number    106331
+Bands  652674 29
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0063D20"
+Map "ctg1" Ends Left 491.000
+Map "ctg1" Ends Right 513.000 Oldctg 2
+Fp_number "0063D20"
+Gel_number    106341
+Bands  652754 23
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0063H14"
+Map "ctg1" Ends Left 535.000
+Map "ctg1" Ends Right 573.000 Oldctg 2
+Fp_number "0063H14"
+Gel_number    106342
+Bands  655000 39
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0063J07"
+Map "ctg1" Ends Left 324.000
+Map "ctg1" Ends Right 361.000 Oldctg 2
+Fp_number "0063J07"
+Gel_number    106333
+Bands  655927 38
+Positive_STS "S13984" 
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0063N13"
+Map "ctg1" Ends Left 68.000
+Map "ctg1" Ends Right 96.000 Oldctg 2
+Fp_number "0063N13"
+Gel_number    106334
+Bands  658483 29
+Exact_match_to_cosmid "a0082H14"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0064C11"
+Map "ctg1" Ends Left 330.000
+Map "ctg1" Ends Right 362.000 Oldctg 2
+Fp_number "0064C11"
+Gel_number    106411
+Bands  661396 33
+Approximate_match_to_cosmid "a0063J07"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0064D11"
+Map "ctg1" Ends Left 184.000
+Map "ctg1" Ends Right 221.000 Oldctg 2
+Fp_number "0064D11"
+Gel_number    106431
+Bands  661929 38
+Approximate_match_to_cosmid "b0074P13"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0064G05"
+Map "ctg1" Ends Left 491.000
+Map "ctg1" Ends Right 510.000 Oldctg 2
+Fp_number "0064G05"
+Gel_number    106412
+Bands  663578 20
+Exact_match_to_cosmid "a0063D20"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0064N11"
+Map "ctg1" Ends Left 465.000
+Map "ctg1" Ends Right 491.000 Oldctg 2
+Fp_number "0064N11"
+Gel_number    106434
+Bands  668130 27
+Approximate_match_to_cosmid "a0062P11"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0065C23"
+Map "ctg1" Ends Left 425.000
+Map "ctg1" Ends Right 463.000 Oldctg 2
+Fp_number "0065C23"
+Gel_number    106511
+Bands  671263 39
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1855" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0065H04"
+Map "ctg1" Ends Left 485.000
+Map "ctg1" Ends Right 507.000 Oldctg 2
+Fp_number "0065H04"
+Gel_number    106542
+Bands  673361 23
+Approximate_match_to_cosmid "a0096O08"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0065J18"
+Map "ctg1" Ends Left 70.000
+Map "ctg1" Ends Right 104.000 Oldctg 2
+Fp_number "0065J18"
+Gel_number    106543
+Bands  674896 35
+Positive_STS "G1082x" 
+Creation_date 99 11 12 9 4 
+Modified_date 101 5 4 8 42 
+  
+Clone : "a0065K05"
+Map "ctg1" Ends Left 296.000
+Map "ctg1" Ends Right 319.000 Oldctg 2
+Fp_number "0065K05"
+Gel_number    106513
+Bands  675155 24
+Approximate_match_to_cosmid "b0062F24"
+Positive_STS "OJ990503_27" 
+Positive_Probe "AEST036" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0065M09"
+Map "ctg1" Ends Left 175.000
+Map "ctg1" Ends Right 215.000 Oldctg 2
+Fp_number "0065M09"
+Gel_number    106514
+Bands  676344 41
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0066I06"
+Map "ctg1" Ends Left 114.000
+Map "ctg1" Ends Right 149.000 Oldctg 2
+Fp_number "0066I06"
+Gel_number    106623
+Bands  683312 36
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0067C11"
+Map "ctg1" Ends Left 15.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "0067C11"
+Gel_number    106711
+Bands  689496 32
+Approximate_match_to_cosmid "a0060C20"
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0067J18"
+Map "ctg1" Ends Left 521.000
+Map "ctg1" Ends Right 553.000 Oldctg 2
+Fp_number "0067J18"
+Gel_number    106743
+Bands  693669 33
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0067L02"
+Map "ctg1" Ends Left 494.000
+Map "ctg1" Ends Right 515.000 Oldctg 2
+Fp_number "0067L02"
+Gel_number    106743
+Bands  694511 22
+Positive_STS "OJ990820_14" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0068E14"
+Map "ctg1" Ends Left 341.000
+Map "ctg1" Ends Right 369.000 Oldctg 2
+Fp_number "0068E14"
+Gel_number    106822
+Bands  699734 29
+Approximate_match_to_cosmid "a0089D17"
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0068F21"
+Map "ctg1" Ends Left 239.000
+Map "ctg1" Ends Right 271.000 Oldctg 2
+Fp_number "0068F21"
+Gel_number    106832
+Bands  700580 33
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0068G16"
+Map "ctg1" Ends Left 553.000
+Map "ctg1" Ends Right 581.000 Oldctg 2
+Fp_number "0068G16"
+Gel_number    106822
+Bands  701067 29
+Approximate_match_to_cosmid "a0053D17"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ000303_09" 
+Positive_STS "OJ000303_17" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0069J07"
+Map "ctg1" Ends Left 325.000
+Map "ctg1" Ends Right 360.000 Oldctg 2
+Fp_number "0069J07"
+Gel_number    106933
+Bands  711537 36
+Approximate_match_to_cosmid "a0063J07"
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0069N10"
+Map "ctg1" Ends Left 226.000
+Map "ctg1" Ends Right 257.000 Oldctg 2
+Fp_number "0069N10"
+Gel_number    106944
+Bands  713838 32
+Approximate_match_to_cosmid "a0070K16"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0070D12"
+Map "ctg1" Ends Left 122.000
+Map "ctg1" Ends Right 151.000 Oldctg 2
+Fp_number "0070D12"
+Gel_number    107041
+Bands  717387 30
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0070F12"
+Map "ctg1" Ends Left 7.000
+Map "ctg1" Ends Right 43.000 Oldctg 2
+Fp_number "0070F12"
+Gel_number    107042
+Bands  718551 37
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0070K16"
+Map "ctg1" Ends Left 226.000
+Map "ctg1" Ends Right 257.000 Oldctg 2
+Fp_number "0070K16"
+Gel_number    107023
+Bands  721422 32
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0071F13"
+Map "ctg1" Ends Left 18.000
+Map "ctg1" Ends Right 45.000 Oldctg 2
+Fp_number "0071F13"
+Gel_number    107132
+Bands  727932 28
+Exact_match_to_cosmid "a0060C20"
+Positive_STS "OJ990915_01" 
+Positive_Probe "AEST171" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0071K15"
+Map "ctg1" Ends Left 79.000
+Map "ctg1" Ends Right 105.000 Oldctg 2
+Fp_number "0071K15"
+Gel_number    107113
+Bands  730964 27
+Approximate_match_to_cosmid "a0079P13"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0071O23"
+Map "ctg1" Ends Left 158.000
+Map "ctg1" Ends Right 179.000 Oldctg 2
+Fp_number "0071O23"
+Gel_number    107114
+Bands  733623 22
+Approximate_match_to_cosmid "a0061C05"
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0072A19"
+Map "ctg1" Ends Left 250.000
+Map "ctg1" Ends Right 278.000 Oldctg 2
+Fp_number "0072A19"
+Gel_number    107211
+Bands  734553 29
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0072C01"
+Map "ctg1" Ends Left 222.000
+Map "ctg1" Ends Right 256.000 Oldctg 2
+Fp_number "0072C01"
+Gel_number    107211
+Bands  735275 35
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0072L19"
+Map "ctg1" Ends Left 366.000
+Map "ctg1" Ends Right 400.000 Oldctg 2
+Fp_number "0072L19"
+Gel_number    107233
+Bands  741565 35
+Exact_match_to_cosmid "a0049H15"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0072P22"
+Map "ctg1" Ends Left 35.000
+Map "ctg1" Ends Right 77.000 Oldctg 2
+Fp_number "0072P22"
+Gel_number    107244
+Bands  744042 43
+Approximate_match_to_cosmid "b0065P01"
+Positive_STS "RG472" 
+Creation_date 99 11 12 9 4 
+Modified_date 101 5 4 8 42 
+  
+Clone : "a0073B01"
+Map "ctg1" Ends Left 459.000
+Map "ctg1" Ends Right 487.000 Oldctg 2
+Fp_number "0073B01"
+Gel_number    107331
+Bands  744756 29
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0073L24"
+Map "ctg1" Ends Left 349.000
+Map "ctg1" Ends Right 391.000 Oldctg 2
+Fp_number "0073L24"
+Gel_number    107343
+Bands  751893 43
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0074A09"
+Map "ctg1" Ends Left 568.000
+Map "ctg1" Ends Right 603.000 Oldctg 2
+Fp_number "0074A09"
+Gel_number    107411
+Bands  754480 36
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_OVERGO "SOG0279" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0074B21"
+Map "ctg1" Ends Left 294.000
+Map "ctg1" Ends Right 320.000 Oldctg 2
+Fp_number "0074B21"
+Gel_number    107431
+Bands  755414 27
+Positive_STS "OJ990503_27" 
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0074N21"
+Map "ctg1" Ends Left 579.000
+Map "ctg1" Ends Right 614.000 Oldctg 2
+Fp_number "0074N21"
+Gel_number    107434
+Bands  762877 36
+Approximate_match_to_cosmid "a0041C20"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0075E09"
+Map "ctg1" Ends Left 329.000
+Map "ctg1" Ends Right 361.000 Oldctg 2
+Fp_number "0075E09"
+Gel_number    107512
+Bands  766285 33
+Exact_match_to_cosmid "a0061G09"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0075K10"
+Map "ctg1" Ends Left 477.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "0075K10"
+Gel_number    107523
+Bands  770100 25
+Approximate_match_to_cosmid "b0001O16"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0075L11"
+Map "ctg1" Ends Left 293.000
+Map "ctg1" Ends Right 317.000 Oldctg 2
+Fp_number "0075L11"
+Gel_number    107533
+Bands  770678 25
+Exact_match_to_cosmid "a0054G21"
+Positive_STS "OJ990503_27" 
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0076J03"
+Map "ctg1" Ends Left 126.000
+Map "ctg1" Ends Right 154.000 Oldctg 2
+Fp_number "0076J03"
+Gel_number    107633
+Bands  778564 29
+Approximate_match_to_cosmid "a0079E17"
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0076O03"
+Map "ctg1" Ends Left 501.000
+Map "ctg1" Ends Right 526.000 Oldctg 2
+Fp_number "0076O03"
+Gel_number    107614
+Bands  781611 26
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 56 
+  
+Clone : "a0076O24"
+Map "ctg1" Ends Left 315.000
+Map "ctg1" Ends Right 341.000 Oldctg 2
+Fp_number "0076O24"
+Gel_number    107624
+Bands  782192 27
+Approximate_match_to_cosmid "a0060H05"
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0077F13"
+Map "ctg1" Ends Left 502.000
+Map "ctg1" Ends Right 527.000 Oldctg 2
+Fp_number "0077F13"
+Gel_number    107732
+Bands  786257 26
+Positive_STS "OJ990820_14" 
+Positive_OVERGO "SOG1044" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0077N20"
+Map "ctg1" Ends Left 251.000
+Map "ctg1" Ends Right 276.000 Oldctg 2
+Fp_number "0077N20"
+Gel_number    107744
+Bands  791340 26
+Approximate_match_to_cosmid "a0087A15"
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1077" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0078C10"
+Map "ctg1" Ends Left 447.000
+Map "ctg1" Ends Right 477.000 Oldctg 2
+Fp_number "0078C10"
+Gel_number    107821
+Bands  794339 31
+Approximate_match_to_cosmid "b0006L19"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0078G15"
+Map "ctg1" Ends Left 569.000
+Map "ctg1" Ends Right 590.000 Oldctg 2
+Fp_number "0078G15"
+Gel_number    107812
+Bands  797064 22
+Approximate_match_to_cosmid "a0074A09"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_OVERGO "SOG0313" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0079E17"
+Map "ctg1" Ends Left 123.000
+Map "ctg1" Ends Right 154.000 Oldctg 2
+Fp_number "0079E17"
+Gel_number    107912
+Bands  805806 32
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0079F12"
+Map "ctg1" Ends Left 110.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "0079F12"
+Gel_number    107942
+Bands  806343 37
+Approximate_match_to_cosmid "a0051O20"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0079K21"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 38.000 Oldctg 2
+Fp_number "0079K21"
+Gel_number    107913
+Bands  809646 36
+Approximate_match_to_cosmid "a0070F12"
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0079O15"
+Map "ctg1" Ends Left 103.000
+Map "ctg1" Ends Right 141.000 Oldctg 2
+Fp_number "0079O15"
+Gel_number    107914
+Bands  811657 39
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0079P13"
+Map "ctg1" Ends Left 80.000
+Map "ctg1" Ends Right 112.000 Oldctg 2
+Fp_number "0079P13"
+Gel_number    107934
+Bands  812191 33
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0080B02"
+Map "ctg1" Ends Left 13.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "0080B02"
+Gel_number    108041
+Bands  813147 34
+Approximate_match_to_cosmid "a0060C20"
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0080B11"
+Map "ctg1" Ends Left 11.000
+Map "ctg1" Ends Right 45.000 Oldctg 2
+Fp_number "0080B11"
+Gel_number    108031
+Bands  813400 35
+Approximate_match_to_cosmid "a0060C20"
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0081A12"
+Map "ctg1" Ends Left 133.000
+Map "ctg1" Ends Right 148.000 Oldctg 2
+Fp_number "0081A12"
+Gel_number    108121
+Bands  822515 16
+Approximate_match_to_cosmid "a0038P04"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0082D01"
+Map "ctg1" Ends Left 108.000
+Map "ctg1" Ends Right 144.000 Oldctg 2
+Fp_number "0082D01"
+Gel_number    108231
+Bands  833550 37
+Approximate_match_to_cosmid "a0038D13"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0082D09"
+Map "ctg1" Ends Left 85.000
+Map "ctg1" Ends Right 122.000 Oldctg 2
+Fp_number "0082D09"
+Gel_number    108231
+Bands  833754 38
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0082H14"
+Map "ctg1" Ends Left 63.000
+Map "ctg1" Ends Right 97.000 Oldctg 2
+Fp_number "0082H14"
+Gel_number    108242
+Bands  836034 35
+Positive_STS "OJ990412_10" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0082K03"
+Map "ctg1" Ends Left 227.000
+Map "ctg1" Ends Right 256.000 Oldctg 2
+Fp_number "0082K03"
+Gel_number    108213
+Bands  837447 30
+Approximate_match_to_cosmid "a0070K16"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1177" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0083A16"
+Map "ctg1" Ends Left 548.000
+Map "ctg1" Ends Right 581.000 Oldctg 2
+Fp_number "0083A16"
+Gel_number    108321
+Bands  840681 34
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0083C24"
+Map "ctg1" Ends Left 443.000
+Map "ctg1" Ends Right 474.000 Oldctg 2
+Fp_number "0083C24"
+Gel_number    108321
+Bands  841901 32
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ990923_08" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0083K23"
+Map "ctg1" Ends Left 344.000
+Map "ctg1" Ends Right 377.000 Oldctg 2
+Fp_number "0083K23"
+Gel_number    108313
+Bands  846502 34
+Creation_date 99 11 12 9 4 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0083M05"
+Map "ctg1" Ends Left 219.000
+Map "ctg1" Ends Right 244.000 Oldctg 2
+Fp_number "0083M05"
+Gel_number    108314
+Bands  847190 26
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0083M16"
+Map "ctg1" Ends Left 240.000
+Map "ctg1" Ends Right 277.000 Oldctg 2
+Fp_number "0083M16"
+Gel_number    108324
+Bands  847407 38
+Shotgun Full_X SHOTGUN
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0084E08"
+Map "ctg1" Ends Left 256.000
+Map "ctg1" Ends Right 289.000 Oldctg 2
+Fp_number "0084E08"
+Gel_number    108422
+Bands  851864 34
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0085A07"
+Map "ctg1" Ends Left 495.000
+Map "ctg1" Ends Right 520.000 Oldctg 2
+Fp_number "0085A07"
+Gel_number    108511
+Bands  859081 26
+Approximate_match_to_cosmid "b0005M11"
+Positive_OVERGO "SOG2276" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0085B05"
+Map "ctg1" Ends Left 295.000
+Map "ctg1" Ends Right 319.000 Oldctg 2
+Fp_number "0085B05"
+Gel_number    108531
+Bands  859629 25
+Approximate_match_to_cosmid "b0062F24"
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0085L10"
+Map "ctg1" Ends Left 592.000
+Map "ctg1" Ends Right 626.000 Oldctg 2
+Fp_number "0085L10"
+Gel_number    108543
+Bands  865367 35
+Approximate_match_to_cosmid "a0091H17"
+Positive_STS "OJ000321_15" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0085N03"
+Map "ctg1" Ends Left 257.000
+Map "ctg1" Ends Right 288.000 Oldctg 2
+Fp_number "0085N03"
+Gel_number    108534
+Bands  866260 32
+Creation_date 99 11 12 9 4 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0085O04"
+Map "ctg1" Ends Left 465.000
+Map "ctg1" Ends Right 497.000 Oldctg 2
+Fp_number "0085O04"
+Gel_number    108524
+Bands  866752 33
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0086E10"
+Map "ctg1" Ends Left 578.000
+Map "ctg1" Ends Right 613.000 Oldctg 2
+Fp_number "0086E10"
+Gel_number    108622
+Bands  870151 36
+Positive_STS "chlor" 
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_OVERGO "SOG0313" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0086K13"
+Map "ctg1" Ends Left 7.000
+Map "ctg1" Ends Right 32.000 Oldctg 2
+Fp_number "0086K13"
+Gel_number    108613
+Bands  873705 26
+Approximate_match_to_cosmid "a0070F12"
+Positive_STS "OJ000323_02" 
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0087A15"
+Map "ctg1" Ends Left 245.000
+Map "ctg1" Ends Right 274.000 Oldctg 2
+Fp_number "0087A15"
+Gel_number    108711
+Bands  877048 30
+Creation_date 99 11 12 9 4 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0087D14"
+Map "ctg1" Ends Left 589.000
+Map "ctg1" Ends Right 614.000 Oldctg 2
+Fp_number "0087D14"
+Gel_number    108741
+Bands  878833 26
+Approximate_match_to_cosmid "a0041C20"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0087L03"
+Map "ctg1" Ends Left 405.000
+Map "ctg1" Ends Right 445.000 Oldctg 2
+Fp_number "0087L03"
+Gel_number    108733
+Bands  883623 41
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0087M11"
+Map "ctg1" Ends Left 85.000
+Map "ctg1" Ends Right 110.000 Oldctg 2
+Fp_number "0087M11"
+Gel_number    108714
+Bands  884372 26
+Approximate_match_to_cosmid "a0079P13"
+Positive_STS "G1082x" 
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0088C05"
+Map "ctg1" Ends Left 108.000
+Map "ctg1" Ends Right 143.000 Oldctg 2
+Fp_number "0088C05"
+Gel_number    108811
+Bands  888056 36
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0089D17"
+Map "ctg1" Ends Left 340.000
+Map "ctg1" Ends Right 375.000 Oldctg 2
+Fp_number "0089D17"
+Gel_number    108931
+Bands  898465 36
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0089G07"
+Map "ctg1" Ends Left 247.000
+Map "ctg1" Ends Right 270.000 Oldctg 2
+Fp_number "0089G07"
+Gel_number    108912
+Bands  899812 24
+Approximate_match_to_cosmid "a0083M16"
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0089J14"
+Map "ctg1" Ends Left 330.000
+Map "ctg1" Ends Right 365.000 Oldctg 2
+Fp_number "0089J14"
+Gel_number    108943
+Bands  901768 36
+Approximate_match_to_cosmid "a0063J07"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0089L15"
+Map "ctg1" Ends Left 34.000
+Map "ctg1" Ends Right 47.000 Oldctg 2
+Fp_number "0089L15"
+Gel_number    108933
+Bands  902817 14
+Exact_match_to_cosmid "b0026K13"
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0089N07"
+Map "ctg1" Ends Left 327.000
+Map "ctg1" Ends Right 360.000 Oldctg 2
+Fp_number "0089N07"
+Gel_number    108934
+Bands  903702 34
+Approximate_match_to_cosmid "a0061G09"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0090A16"
+Map "ctg1" Ends Left 291.000
+Map "ctg1" Ends Right 313.000 Oldctg 2
+Fp_number "0090A16"
+Gel_number    109021
+Bands  905234 23
+Approximate_match_to_cosmid "b0008B04"
+Positive_STS "OJ990503_27" 
+Positive_STS "OJ991021_17" 
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0091B10"
+Map "ctg1" Ends Left 577.000
+Map "ctg1" Ends Right 608.000 Oldctg 2
+Fp_number "0091B10"
+Gel_number    109141
+Bands  914843 32
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000321_15" 
+Positive_OVERGO "SOG0279" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0091H17"
+Map "ctg1" Ends Left 593.000
+Map "ctg1" Ends Right 627.000 Oldctg 2
+Fp_number "0091H17"
+Gel_number    109132
+Bands  918695 35
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0091J17"
+Map "ctg1" Ends Left 330.000
+Map "ctg1" Ends Right 361.000 Oldctg 2
+Fp_number "0091J17"
+Gel_number    109133
+Bands  920035 32
+Exact_match_to_cosmid "a0093K20"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 4 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0091K03"
+Map "ctg1" Ends Left 64.000
+Map "ctg1" Ends Right 97.000 Oldctg 2
+Fp_number "0091K03"
+Gel_number    109113
+Bands  920297 34
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 9 4 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0092B07"
+Map "ctg1" Ends Left 109.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "0092B07"
+Gel_number    109231
+Bands  924800 38
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0093C23"
+Map "ctg1" Ends Left 473.000
+Map "ctg1" Ends Right 497.000 Oldctg 2
+Fp_number "0093C23"
+Gel_number    109311
+Bands  935783 25
+Exact_match_to_cosmid "a0085O04"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0093K20"
+Map "ctg1" Ends Left 329.000
+Map "ctg1" Ends Right 365.000 Oldctg 2
+Fp_number "0093K20"
+Gel_number    109323
+Bands  941078 37
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0093L07"
+Map "ctg1" Ends Left 296.000
+Map "ctg1" Ends Right 321.000 Oldctg 2
+Fp_number "0093L07"
+Gel_number    109333
+Bands  941412 26
+Positive_STS "OJ990503_27" 
+Positive_STS "OJ991021_17" 
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0093M03"
+Map "ctg1" Ends Left 240.000
+Map "ctg1" Ends Right 270.000 Oldctg 2
+Fp_number "0093M03"
+Gel_number    109314
+Bands  941852 31
+Approximate_match_to_cosmid "a0083M16"
+Creation_date 99 11 12 9 5 
+Modified_date 101 5 9 11 19 
+  
+Clone : "a0094G11"
+Map "ctg1" Ends Left 582.000
+Map "ctg1" Ends Right 620.000 Oldctg 2
+Fp_number "0094G11"
+Gel_number    109412
+Bands  947953 39
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Positive_OVERGO "SOG0279" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0094G23"
+Map "ctg1" Ends Left 10.000
+Map "ctg1" Ends Right 30.000 Oldctg 2
+Fp_number "0094G23"
+Gel_number    109412
+Bands  948280 21
+Approximate_match_to_cosmid "a0060C20"
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 9 5 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0094M18"
+Map "ctg1" Ends Left 437.000
+Map "ctg1" Ends Right 469.000 Oldctg 2
+Fp_number "0094M18"
+Gel_number    109424
+Bands  951995 33
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0094O11"
+Map "ctg1" Ends Left 147.000
+Map "ctg1" Ends Right 172.000 Oldctg 2
+Fp_number "0094O11"
+Gel_number    109414
+Bands  953170 26
+Positive_STS "OJ990407_02" 
+Positive_STS "OJ990412_10" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0094P02"
+Map "ctg1" Ends Left 546.000
+Map "ctg1" Ends Right 577.000 Oldctg 2
+Fp_number "0094P02"
+Gel_number    109444
+Bands  953509 32
+Approximate_match_to_cosmid "a0063H14"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ000301_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 9 5 
+Modified_date 102 2 1 22 48 
+  
+Clone : "a0095F06"
+Map "ctg1" Ends Left 15.000
+Map "ctg1" Ends Right 47.000 Oldctg 2
+Fp_number "0095F06"
+Gel_number    109542
+Bands  957133 33
+Approximate_match_to_cosmid "a0060C20"
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0095M19"
+Map "ctg1" Ends Left 333.000
+Map "ctg1" Ends Right 367.000 Oldctg 2
+Fp_number "0095M19"
+Gel_number    109514
+Bands  961454 35
+Approximate_match_to_cosmid "a0093K20"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0095O20"
+Map "ctg1" Ends Left 144.000
+Map "ctg1" Ends Right 171.000 Oldctg 2
+Fp_number "0095O20"
+Gel_number    109524
+Bands  962710 28
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Positive_OVERGO "SOG1231" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0096B09"
+Map "ctg1" Ends Left 165.000
+Map "ctg1" Ends Right 195.000 Oldctg 2
+Fp_number "0096B09"
+Gel_number    109631
+Bands  964428 31
+Approximate_match_to_cosmid "a0061C05"
+Creation_date 99 11 12 9 5 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0096E08"
+Map "ctg1" Ends Left 293.000
+Map "ctg1" Ends Right 317.000 Oldctg 2
+Fp_number "0096E08"
+Gel_number    109622
+Bands  966093 25
+Exact_match_to_cosmid "a0054G21"
+Positive_STS "OJ990503_27" 
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0096I09"
+Map "ctg1" Ends Left 443.000
+Map "ctg1" Ends Right 473.000 Oldctg 2
+Fp_number "0096I09"
+Gel_number    109613
+Bands  968385 31
+Approximate_match_to_cosmid "b0079F11"
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Positive_Probe "BCD1424" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0096N24"
+Map "ctg1" Ends Left 337.000
+Map "ctg1" Ends Right 362.000 Oldctg 2
+Fp_number "0096N24"
+Gel_number    109644
+Bands  971298 26
+Exact_match_to_cosmid "a0093K20"
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0096O08"
+Map "ctg1" Ends Left 482.000
+Map "ctg1" Ends Right 508.000 Oldctg 2
+Fp_number "0096O08"
+Gel_number    109624
+Bands  971452 27
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990820_14" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1746" 
+Creation_date 99 11 12 9 5 
+Modified_date 103 4 22 10 21 
+  
+Clone : "a0557J22"
+Map "ctg1" Ends Left 334.000
+Map "ctg1" Ends Right 371.000 Oldctg 2
+Fp_number "0557J22"
+Gel_number    105743
+Bands  976724 38
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 56 
+  
+Clone : "a0557K12"
+Map "ctg1" Ends Left 413.000
+Map "ctg1" Ends Right 440.000 Oldctg 2
+Fp_number "0557K12"
+Gel_number    105723
+Bands  977095 28
+Approximate_match_to_cosmid "a0039K03"
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+Clone : "a0557K23"
+Map "ctg1" Ends Left 255.000
+Map "ctg1" Ends Right 292.000 Oldctg 2
+Fp_number "0557K23"
+Gel_number    105713
+Bands  977342 38
+Creation_date 99 11 12 9 3 
+Modified_date 101 4 11 16 57 
+  
+BAC : "AC091086sd1"
+Map "ctg1" Ends Left 8.000
+Map "ctg1" Ends Right 40.000 Oldctg 2
+Gel_number    20030428
+Bands  2020512 33
+Remark "a0010G19, Chr1 - Hahn,J.-H., Eun"
+Remark "eMRK by Gramene (S13157S)"
+Shotgun Full_X FINISHED
+Positive_eMRK "S13157S" 
+Creation_date 103 4 28 17 3 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AL662937sd1"
+Map "ctg1" Ends Left 590.000
+Map "ctg1" Ends Right 616.000 Oldctg 2
+Gel_number    20030428
+Bands  1971368 27
+Remark "a0027G07, Chr4 - Fu,G., Wang,S.Y."
+Remark "eMRK by Gramene (RZ889)"
+Remark "eMRK by Gramene (RM177)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM177" 
+Positive_eMRK "RZ889" 
+Creation_date 103 4 28 16 50 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002483sd1"
+Map "ctg1" Ends Left 322.000
+Map "ctg1" Ends Right 360.000 Oldctg 2
+Gel_number    20030428
+Bands  2022839 39
+Remark "P0019D06, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S20154S)"
+Remark "eMRK by Gramene (S5853)"
+Remark "eMRK by Gramene (RM6324)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6324" 
+Positive_eMRK "S5853" 
+Positive_eMRK "S20154S" 
+Creation_date 103 4 28 17 4 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002487sd1"
+Map "ctg1" Ends Left 102.000
+Map "ctg1" Ends Right 124.000 Oldctg 2
+Gel_number    20030428
+Bands  2021324 22
+Remark "P0684C01, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (C1679)"
+Shotgun Full_X FINISHED
+Positive_STS "C1679" 
+Creation_date 103 4 28 17 4 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002521sd1"
+Map "ctg1" Ends Left 510.000
+Map "ctg1" Ends Right 540.000 Oldctg 2
+Gel_number    20030428
+Bands  2020761 31
+Remark "P0041E11, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C804)"
+Shotgun Full_X FINISHED
+Positive_eMRK "C804" 
+Creation_date 103 4 28 17 3 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002522sd1"
+Map "ctg1" Ends Left 441.000
+Map "ctg1" Ends Right 477.000 Oldctg 2
+Gel_number    20030428
+Bands  2020227 37
+Remark "P0009G03, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C1127)"
+Remark "eMRK by Gramene (R106)"
+Remark "eMRK by Gramene (R1841)"
+Remark "eMRK by Gramene (S20229S)"
+Remark "eMRK by Gramene (RM3740)"
+Remark "eMRK by Gramene (RM1869)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM1869" 
+Positive_eMRK "RM3740" 
+Positive_eMRK "S20229S" 
+Positive_eMRK "R1841" 
+Positive_eMRK "R106" 
+Positive_eMRK "C1127" 
+Creation_date 103 4 28 17 3 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002526sd1"
+Map "ctg1" Ends Left 217.000
+Map "ctg1" Ends Right 247.000 Oldctg 2
+Gel_number    20030428
+Bands  2019208 30
+Remark "P0504H10, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S13048)"
+Remark "eMRK by Gramene (RM6236)"
+Remark "eMRK by Gramene (RM7536)"
+Remark "eMRK by Gramene (RM8068)"
+Remark "eMRK by Gramene (RM1247)"
+Remark "eMRK by Gramene (RM6045)"
+Remark "eMRK by Gramene (RM1331)"
+Remark "eMRK by Gramene (RM7278)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM7278" 
+Positive_eMRK "RM1331" 
+Positive_eMRK "RM6045" 
+Positive_eMRK "RM1247" 
+Positive_eMRK "RM8068" 
+Positive_eMRK "RM7536" 
+Positive_eMRK "RM6236" 
+Positive_eMRK "S13048" 
+Creation_date 103 4 28 17 3 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002538sd1"
+Map "ctg1" Ends Left 188.000
+Map "ctg1" Ends Right 226.000 Oldctg 2
+Gel_number    20030428
+Bands  2017615 39
+Remark "P0408F06, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C749)"
+Remark "eMRK by Gramene (S13048)"
+Remark "eMRK by Gramene (RM6236)"
+Remark "eMRK by Gramene (RM7536)"
+Remark "eMRK by Gramene (RM8068)"
+Remark "eMRK by Gramene (RM8069)"
+Remark "eMRK by Gramene (RM1247)"
+Remark "eMRK by Gramene (RM6470)"
+Remark "eMRK by Gramene (RM1141)"
+Remark "eMRK by Gramene (RM1331)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM1331" 
+Positive_eMRK "RM1141" 
+Positive_eMRK "RM6470" 
+Positive_eMRK "RM1247" 
+Positive_eMRK "RM8069" 
+Positive_eMRK "RM8068" 
+Positive_eMRK "RM7536" 
+Positive_eMRK "RM6236" 
+Positive_eMRK "S13048" 
+Positive_eMRK "C749" 
+Creation_date 103 4 28 17 3 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002539sd1"
+Map "ctg1" Ends Left 505.000
+Map "ctg1" Ends Right 535.000 Oldctg 2
+Gel_number    20030428
+Bands  2004290 30
+Remark "P0433F09, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C804)"
+Remark "eMRK by Gramene (RZ288)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RZ288" 
+Positive_eMRK "C804" 
+Creation_date 103 4 28 17 0 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002540sd1"
+Map "ctg1" Ends Left 408.000
+Map "ctg1" Ends Right 448.000 Oldctg 2
+Gel_number    20030428
+Bands  1988092 40
+Remark "P0434B04, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (C146)"
+Remark "electronic by Gramene (C30)"
+Remark "eMRK by Gramene (C936)"
+Remark "eMRK by Gramene (R106)"
+Remark "eMRK by Gramene (R1841)"
+Remark "eMRK by Gramene (RM1869)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM1869" 
+Positive_eMRK "R1841" 
+Positive_eMRK "R106" 
+Positive_eMRK "C936" 
+Positive_STS "C30" 
+Positive_STS "C146" 
+Creation_date 103 4 28 16 55 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002541sd1"
+Map "ctg1" Ends Left 54.000
+Map "ctg1" Ends Right 88.000 Oldctg 2
+Gel_number    20030428
+Bands  1985566 35
+Remark "P0494A10, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S1543)"
+Remark "eMRK by Gramene (RM6340)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6340" 
+Positive_eMRK "S1543" 
+Creation_date 103 4 28 16 54 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002747sd1"
+Map "ctg1" Ends Left 37.000
+Map "ctg1" Ends Right 71.000 Oldctg 2
+Gel_number    20030428
+Bands  2007710 35
+Remark "P0698G03, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S13157S)"
+Remark "eMRK by Gramene (S1543)"
+Shotgun Full_X FINISHED
+Positive_eMRK "S1543" 
+Positive_eMRK "S13157S" 
+Creation_date 103 4 28 17 1 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002860sd1"
+Map "ctg1" Ends Left 161.000
+Map "ctg1" Ends Right 189.000 Oldctg 2
+Gel_number    20030428
+Bands  2013899 29
+Remark "P0409B08, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (L451)"
+Remark "eMRK by Gramene (RM8069)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM8069" 
+Positive_eMRK "L451" 
+Creation_date 103 4 28 17 2 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002867sd1"
+Map "ctg1" Ends Left 9.000
+Map "ctg1" Ends Right 39.000 Oldctg 2
+Gel_number    20030428
+Bands  2012147 30
+Remark "P0463F06, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S13157S)"
+Shotgun Full_X FINISHED
+Positive_eMRK "S13157S" 
+Creation_date 103 4 28 17 2 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002868sd1"
+Map "ctg1" Ends Left 67.000
+Map "ctg1" Ends Right 99.000 Oldctg 2
+Gel_number    20030428
+Bands  2011674 33
+Remark "P0698A04, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (C1679)"
+Remark "eMRK by Gramene (S10623)"
+Remark "eMRK by Gramene (RM6340)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6340" 
+Positive_eMRK "S10623" 
+Positive_STS "C1679" 
+Creation_date 103 4 28 17 2 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002872sd1"
+Map "ctg1" Ends Left 379.000
+Map "ctg1" Ends Right 421.000 Oldctg 2
+Gel_number    20030428
+Bands  2003929 43
+Remark "P0416D03, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (C30)"
+Shotgun Full_X FINISHED
+Positive_STS "C30" 
+Creation_date 103 4 28 17 0 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002909sd1"
+Map "ctg1" Ends Left 138.000
+Map "ctg1" Ends Right 166.000 Oldctg 2
+Gel_number    20030428
+Bands  2003848 29
+Remark "P0044F08, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (G107)"
+Remark "eMRK by Gramene (L451)"
+Remark "eMRK by Gramene (RZ543)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RZ543" 
+Positive_eMRK "L451" 
+Positive_STS "G107" 
+Creation_date 103 4 28 17 0 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002912sd1"
+Map "ctg1" Ends Left 581.000
+Map "ctg1" Ends Right 609.000 Oldctg 2
+Gel_number    20030428
+Bands  1940366 28
+Remark "P0028E10, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (R1613)"
+Remark "eMRK by Gramene (RG246)"
+Remark "eMRK by Gramene (RM7383)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM7383" 
+Positive_eMRK "RG246" 
+Positive_eMRK "R1613" 
+Creation_date 103 4 28 16 41 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP002913sd1"
+Map "ctg1" Ends Left 301.000
+Map "ctg1" Ends Right 327.000 Oldctg 2
+Gel_number    20030428
+Bands  2006651 26
+Remark "P0480E02, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C50986S)"
+Remark "eMRK by Gramene (E30207S)"
+Remark "eMRK by Gramene (RM1320)"
+Remark "eMRK by Gramene (RM1254)"
+Remark "eMRK by Gramene (RM5423)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM5423" 
+Positive_eMRK "RM1254" 
+Positive_eMRK "RM1320" 
+Positive_eMRK "E30207S" 
+Positive_eMRK "C50986S" 
+Creation_date 103 4 28 17 0 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003045sd1"
+Map "ctg1" Ends Left 463.000
+Map "ctg1" Ends Right 495.000 Oldctg 2
+Gel_number    20030428
+Bands  1987104 32
+Remark "P0030H07, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C1127)"
+Remark "eMRK by Gramene (RM3740)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM3740" 
+Positive_eMRK "C1127" 
+Creation_date 103 4 28 16 54 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003046sd1"
+Map "ctg1" Ends Left 107.000
+Map "ctg1" Ends Right 141.000 Oldctg 2
+Gel_number    20030428
+Bands  1992652 35
+Remark "P0445D12, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (R87)"
+Remark "eMRK by Gramene (RM6515)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6515" 
+Positive_eMRK "R87" 
+Creation_date 103 4 28 16 56 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003209sd1"
+Map "ctg1" Ends Left 532.000
+Map "ctg1" Ends Right 568.000 Oldctg 2
+Gel_number    20030428
+Bands  1918448 36
+Remark "B1189A09, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (RM6873)"
+Remark "eMRK by Gramene (RM1177)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM1177" 
+Positive_eMRK "RM6873" 
+Creation_date 103 4 28 16 35 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003214sd1"
+Map "ctg1" Ends Left 241.000
+Map "ctg1" Ends Right 279.000 Oldctg 2
+Gel_number    20030428
+Bands  1946333 39
+Remark "a0083M16, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (R753)"
+Remark "eMRK by Gramene (RM6045)"
+Remark "eMRK by Gramene (RM7278)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM7278" 
+Positive_eMRK "RM6045" 
+Positive_STS "R753" 
+Creation_date 103 4 28 16 43 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003225sd1"
+Map "ctg1" Ends Left 490.000
+Map "ctg1" Ends Right 508.000 Oldctg 2
+Gel_number    20030428
+Bands  1946935 19
+Remark "P0011G08, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (RZ288)"
+Remark "eMRK by Gramene (RM6433)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6433" 
+Positive_eMRK "RZ288" 
+Creation_date 103 4 28 16 43 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003233sd1"
+Map "ctg1" Ends Left 132.000
+Map "ctg1" Ends Right 160.000 Oldctg 2
+Gel_number    20030428
+Bands  1933893 28
+Remark "P0037C04, Chr1 - Sasaki,T."
+Remark "electronic by Gramene (G107)"
+Remark "eMRK by Gramene (R87)"
+Remark "eMRK by Gramene (RZ543)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RZ543" 
+Positive_eMRK "R87" 
+Positive_STS "G107" 
+Creation_date 103 4 28 16 39 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003250sd1"
+Map "ctg1" Ends Left 266.000
+Map "ctg1" Ends Right 294.000 Oldctg 2
+Gel_number    20030428
+Bands  1941929 28
+Remark "P0443D08, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C50986S)"
+Remark "eMRK by Gramene (C90)"
+Shotgun Full_X FINISHED
+Positive_eMRK "C90" 
+Positive_eMRK "C50986S" 
+Creation_date 103 4 28 16 41 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003301sd1"
+Map "ctg1" Ends Left 565.000
+Map "ctg1" Ends Right 601.000 Oldctg 2
+Gel_number    20030428
+Bands  1949025 37
+Remark "P0701D05, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (R1613)"
+Remark "eMRK by Gramene (RM6873)"
+Remark "eMRK by Gramene (RM7383)"
+Remark "eMRK by Gramene (RM5336)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM5336" 
+Positive_eMRK "RM7383" 
+Positive_eMRK "RM6873" 
+Positive_eMRK "R1613" 
+Creation_date 103 4 28 16 44 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003311sd1"
+Map "ctg1" Ends Left 340.000
+Map "ctg1" Ends Right 380.000 Oldctg 2
+Gel_number    20030428
+Bands  1991148 41
+Remark "P0024G09, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (S20154S)"
+Remark "eMRK by Gramene (S5853)"
+Remark "eMRK by Gramene (RM6324)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM6324" 
+Positive_eMRK "S5853" 
+Positive_eMRK "S20154S" 
+Creation_date 103 4 28 16 56 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003338sd1"
+Map "ctg1" Ends Left 10.000
+Map "ctg1" Ends Right 38.000 Oldctg 2
+Gel_number    20030428
+Bands  1919025 29
+Remark "OJ1212_B09, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (C51033)"
+Remark "eMRK by Gramene (S13157S)"
+Remark "eMRK by Gramene (RM3148)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM3148" 
+Positive_eMRK "S13157S" 
+Positive_eMRK "C51033" 
+Creation_date 103 4 28 16 35 
+Modified_date 103 6 11 23 19 
+  
+BAC : "AP003339sd1"
+Map "ctg1" Ends Left 588.000
+Map "ctg1" Ends Right 628.000 Oldctg 2
+Gel_number    20030428
+Bands  1942963 41
+Remark "OJ1276_B06, Chr1 - Sasaki,T."
+Remark "eMRK by Gramene (E222S)"
+Remark "eMRK by Gramene (R1613)"
+Remark "eMRK by Gramene (RG246)"
+Remark "eMRK by Gramene (RM3652)"
+Remark "eMRK by Gramene (RM7383)"
+Shotgun Full_X FINISHED
+Positive_eMRK "RM7383" 
+Positive_eMRK "RM3652" 
+Positive_eMRK "RG246" 
+Positive_eMRK "R1613" 
+Positive_eMRK "E222S" 
+Creation_date 103 4 28 16 42 
+Modified_date 103 6 11 23 19 
+  
+Clone : "b0001E17"
+Map "ctg1" Ends Left 147.000
+Map "ctg1" Ends Right 177.000 Oldctg 2
+Fp_number "E001E17"
+Gel_number    200112
+Bands  981466 31
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0001O16"
+Map "ctg1" Ends Left 476.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "E001O16"
+Gel_number    200124
+Bands  986315 29
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0002B22"
+Map "ctg1" Ends Left 414.000
+Map "ctg1" Ends Right 449.000 Oldctg 2
+Fp_number "E002B22"
+Gel_number    200241
+Bands  987868 36
+Approximate_match_to_cosmid "a0061L15"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0002E04"
+Map "ctg1" Ends Left 320.000
+Map "ctg1" Ends Right 354.000 Oldctg 2
+Fp_number "E002E04"
+Gel_number    200222
+Bands  988826 35
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0002J03"
+Map "ctg1" Ends Left 504.000
+Map "ctg1" Ends Right 532.000 Oldctg 2
+Fp_number "E002J03"
+Gel_number    200233
+Bands  990602 29
+Positive_STS "OJ990713_06" 
+Positive_OVERGO "SOG2238" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0002P19"
+Map "ctg1" Ends Left 122.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "E002P19"
+Gel_number    200234
+Bands  993311 25
+Exact_match_to_cosmid "a0066I06"
+Positive_STS "OJ991015_10" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0002P23"
+Map "ctg1" Ends Left 185.000
+Map "ctg1" Ends Right 219.000 Oldctg 2
+Fp_number "E002P23"
+Gel_number    200234
+Bands  993375 35
+Approximate_match_to_cosmid "b0074P13"
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0003D11"
+Map "ctg1" Ends Left 125.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "E003D11"
+Gel_number    200331
+Bands  995012 21
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG1231" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0003G18"
+Map "ctg1" Ends Left 249.000
+Map "ctg1" Ends Right 278.000 Oldctg 2
+Fp_number "E003G18"
+Gel_number    200322
+Bands  996383 30
+Positive_STS "OJ000317_34" 
+Creation_date 99 11 12 16 20 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0004I04"
+Map "ctg1" Ends Left 278.000
+Map "ctg1" Ends Right 297.000 Oldctg 2
+Fp_number "E004I04"
+Gel_number    200423
+Bands  1005389 20
+Approximate_match_to_cosmid "b0007H05"
+Positive_STS "OJ990503_27" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0005A09"
+Map "ctg1" Ends Left 347.000
+Map "ctg1" Ends Right 384.000 Oldctg 2
+Fp_number "E005A09"
+Gel_number    200511
+Bands  1009115 38
+Approximate_match_to_cosmid "a0073L24"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 16 20 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0005C01"
+Map "ctg1" Ends Left 1.000
+Map "ctg1" Ends Right 37.000 Oldctg 2
+Fp_number "E005C01"
+Gel_number    200511
+Bands  1009851 37
+Positive_STS "0057C09f" 
+Positive_STS "OJ990915_01" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0005L08"
+Map "ctg1" Ends Left 610.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Fp_number "E005L08"
+Gel_number    200543
+Bands  1013669 34
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 16 20 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0005M11"
+Map "ctg1" Ends Left 494.000
+Map "ctg1" Ends Right 520.000 Oldctg 2
+Fp_number "E005M11"
+Gel_number    200514
+Bands  1014245 27
+Positive_OVERGO "SOG2238" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0006A09"
+Map "ctg1" Ends Left 376.000
+Map "ctg1" Ends Right 405.000 Oldctg 2
+Fp_number "E006A09"
+Gel_number    200611
+Bands  1015969 30
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 16 20 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0006H11"
+Map "ctg1" Ends Left 56.000
+Map "ctg1" Ends Right 86.000 Oldctg 2
+Fp_number "E006H11"
+Gel_number    200632
+Bands  1019531 31
+Approximate_match_to_cosmid "b0096N17"
+Positive_STS "OJ990605_38" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0006K10"
+Map "ctg1" Ends Left 89.000
+Map "ctg1" Ends Right 124.000 Oldctg 2
+Fp_number "E006K10"
+Gel_number    200623
+Bands  1021291 36
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 16 20 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0006L19"
+Map "ctg1" Ends Left 446.000
+Map "ctg1" Ends Right 480.000 Oldctg 2
+Fp_number "E006L19"
+Gel_number    200633
+Bands  1021995 35
+Positive_STS "RG353" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG2203" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0006M06"
+Map "ctg1" Ends Left 256.000
+Map "ctg1" Ends Right 282.000 Oldctg 2
+Fp_number "E006M06"
+Gel_number    200624
+Bands  1022212 27
+Positive_STS "OJ000317_34" 
+Positive_OVERGO "SOG1450" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0007C14"
+Map "ctg1" Ends Left 567.000
+Map "ctg1" Ends Right 596.000 Oldctg 2
+Fp_number "E007C14"
+Gel_number    200721
+Bands  1025914 30
+Positive_OVERGO "SOG0313" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0007H05"
+Map "ctg1" Ends Left 280.000
+Map "ctg1" Ends Right 309.000 Oldctg 2
+Fp_number "E007H05"
+Gel_number    200732
+Bands  1028432 30
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0007P11"
+Map "ctg1" Ends Left 588.000
+Map "ctg1" Ends Right 616.000 Oldctg 2
+Fp_number "E007P11"
+Gel_number    200734
+Bands  1033249 29
+Creation_date 99 11 12 16 20 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0008A04"
+Map "ctg1" Ends Left 296.000
+Map "ctg1" Ends Right 321.000 Oldctg 2
+Fp_number "E008A04"
+Gel_number    200821
+Bands  1033512 26
+Creation_date 99 11 12 16 20 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0008B04"
+Map "ctg1" Ends Left 281.000
+Map "ctg1" Ends Right 314.000 Oldctg 2
+Fp_number "E008B04"
+Gel_number    200841
+Bands  1034058 34
+Creation_date 99 11 12 16 20 
+Modified_date 101 4 27 15 58 
+  
+Clone : "b0008E06"
+Map "ctg1" Ends Left 154.000
+Map "ctg1" Ends Right 183.000 Oldctg 2
+Fp_number "E008E06"
+Gel_number    200822
+Bands  1035911 30
+Approximate_match_to_cosmid "a0061C05"
+Creation_date 99 11 12 16 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0008G08"
+Map "ctg1" Ends Left 57.000
+Map "ctg1" Ends Right 94.000 Oldctg 2
+Fp_number "E008G08"
+Gel_number    200822
+Bands  1037121 38
+Positive_OVERGO "SOG2244" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0008J05"
+Map "ctg1" Ends Left 288.000
+Map "ctg1" Ends Right 310.000 Oldctg 2
+Fp_number "E008J05"
+Gel_number    200833
+Bands  1038669 23
+Approximate_match_to_cosmid "b0008B04"
+Creation_date 99 11 12 16 20 
+Modified_date 101 4 27 15 58 
+  
+Clone : "b0009H13"
+Map "ctg1" Ends Left 137.000
+Map "ctg1" Ends Right 160.000 Oldctg 2
+Fp_number "E009H13"
+Gel_number    200932
+Bands  1046593 24
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0009I24"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 506.000 Oldctg 2
+Fp_number "E009I24"
+Gel_number    200923
+Bands  1047390 23
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0009K15"
+Map "ctg1" Ends Left 224.000
+Map "ctg1" Ends Right 253.000 Oldctg 2
+Fp_number "E009K15"
+Gel_number    200913
+Bands  1048124 30
+Approximate_match_to_cosmid "b0066J19"
+Positive_STS "40C03f" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Creation_date 99 11 12 16 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0011E18"
+Map "ctg1" Ends Left 504.000
+Map "ctg1" Ends Right 528.000 Oldctg 2
+Fp_number "E011E18"
+Gel_number    201122
+Bands  1053853 25
+Approximate_match_to_cosmid "b0002J03"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0011F05"
+Map "ctg1" Ends Left 617.000
+Map "ctg1" Ends Right 645.000 Oldctg 2
+Fp_number "E011F05"
+Gel_number    201132
+Bands  1054097 29
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0011F06"
+Map "ctg1" Ends Left 616.000
+Map "ctg1" Ends Right 642.000 Oldctg 2
+Fp_number "E011F06"
+Gel_number    201142
+Bands  1054126 27
+Approximate_match_to_cosmid "b0005L08"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0011F07"
+Map "ctg1" Ends Left 482.000
+Map "ctg1" Ends Right 502.000 Oldctg 2
+Fp_number "E011F07"
+Gel_number    201132
+Bands  1054153 21
+Approximate_match_to_cosmid "b0001O16"
+Positive_STS "OJ000310_29" 
+Positive_STS "OJ990923_08" 
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0011I10"
+Map "ctg1" Ends Left 416.000
+Map "ctg1" Ends Right 448.000 Oldctg 2
+Fp_number "E011I10"
+Gel_number    201123
+Bands  1055784 33
+Positive_STS "C30" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0011M11"
+Map "ctg1" Ends Left 219.000
+Map "ctg1" Ends Right 238.000 Oldctg 2
+Fp_number "E011M11"
+Gel_number    201114
+Bands  1057930 20
+Positive_STS "43L01f" 
+Positive_STS "51J01r" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1790" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0011P06"
+Map "ctg1" Ends Left 612.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Fp_number "E011P06"
+Gel_number    201144
+Bands  1059487 32
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0012C02"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 505.000 Oldctg 2
+Fp_number "E012C02"
+Gel_number    201221
+Bands  1060985 20
+Exact_match_to_cosmid "b0039P22"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0012C07"
+Map "ctg1" Ends Left 507.000
+Map "ctg1" Ends Right 534.000 Oldctg 2
+Fp_number "E012C07"
+Gel_number    201211
+Bands  1061097 28
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0012F17"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 403.000 Oldctg 2
+Fp_number "E012F17"
+Gel_number    201232
+Bands  1063025 35
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0012O04"
+Map "ctg1" Ends Left 489.000
+Map "ctg1" Ends Right 506.000 Oldctg 2
+Fp_number "E012O04"
+Gel_number    201224
+Bands  1067525 18
+Positive_OVERGO "SOG1745" 
+Positive_OVERGO "SOG2238" 
+Creation_date 100 2 18 18 17 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0012P23"
+Map "ctg1" Ends Left 335.000
+Map "ctg1" Ends Right 355.000 Oldctg 2
+Fp_number "E012P23"
+Gel_number    201234
+Bands  1068462 21
+Approximate_match_to_cosmid "a0063J07"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0013H20"
+Map "ctg1" Ends Left 582.000
+Map "ctg1" Ends Right 608.000 Oldctg 2
+Fp_number "E013H20"
+Gel_number    201342
+Bands  1072717 27
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0013H23"
+Map "ctg1" Ends Left 27.000
+Map "ctg1" Ends Right 59.000 Oldctg 2
+Fp_number "E013H23"
+Gel_number    201332
+Bands  1072808 33
+Approximate_match_to_cosmid "b0095L23"
+Positive_STS "42D18r" 
+Creation_date 99 11 12 17 9 
+Modified_date 101 5 9 11 19 
+  
+Clone : "b0013I14"
+Map "ctg1" Ends Left 318.000
+Map "ctg1" Ends Right 343.000 Oldctg 2
+Fp_number "E013I14"
+Gel_number    201323
+Bands  1073197 26
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0013L08"
+Map "ctg1" Ends Left 30.000
+Map "ctg1" Ends Right 59.000 Oldctg 2
+Fp_number "E013L08"
+Gel_number    201343
+Bands  1074624 30
+Approximate_match_to_cosmid "b0026K13"
+Positive_STS "42D18r" 
+Creation_date 99 11 12 17 9 
+Modified_date 101 5 9 11 19 
+  
+Clone : "b0014G12"
+Map "ctg1" Ends Left 120.000
+Map "ctg1" Ends Right 147.000 Oldctg 2
+Fp_number "E014G12"
+Gel_number    201422
+Bands  1080878 28
+Approximate_match_to_cosmid "a0066I06"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0014H14"
+Map "ctg1" Ends Left 112.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "E014H14"
+Gel_number    201442
+Bands  1081530 35
+Creation_date 99 11 12 17 9 
+Modified_date 99 11 12 17 40 
+  
+Clone : "b0015C03"
+Map "ctg1" Ends Left 259.000
+Map "ctg1" Ends Right 275.000 Oldctg 2
+Fp_number "E015C03"
+Gel_number    201511
+Bands  1087253 17
+Positive_STS "OJ000317_34" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0015D07"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 401.000 Oldctg 2
+Fp_number "E015D07"
+Gel_number    201531
+Bands  1087836 33
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0015L12"
+Map "ctg1" Ends Left 149.000
+Map "ctg1" Ends Right 174.000 Oldctg 2
+Fp_number "E015L12"
+Gel_number    201543
+Bands  1092399 26
+Approximate_match_to_cosmid "b0001E17"
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0017D13"
+Map "ctg1" Ends Left 482.000
+Map "ctg1" Ends Right 502.000 Oldctg 2
+Fp_number "E017D13"
+Gel_number    201731
+Bands  1104900 21
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0017G05"
+Map "ctg1" Ends Left 284.000
+Map "ctg1" Ends Right 310.000 Oldctg 2
+Fp_number "E017G05"
+Gel_number    201712
+Bands  1105962 27
+Approximate_match_to_cosmid "b0007H05"
+Positive_STS "OJ990503_27" 
+Positive_STS "OJ991021_17" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0017I16"
+Map "ctg1" Ends Left 371.000
+Map "ctg1" Ends Right 405.000 Oldctg 2
+Fp_number "E017I16"
+Gel_number    201723
+Bands  1106949 35
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0017M11"
+Map "ctg1" Ends Left 602.000
+Map "ctg1" Ends Right 635.000 Oldctg 2
+Fp_number "E017M11"
+Gel_number    201714
+Bands  1108435 34
+Positive_OVERGO "SOG2111" 
+Creation_date 100 2 18 18 17 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0018O01"
+Map "ctg1" Ends Left 152.000
+Map "ctg1" Ends Right 179.000 Oldctg 2
+Fp_number "E018O01"
+Gel_number    201814
+Bands  1117553 28
+Approximate_match_to_cosmid "b0087J21"
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0018O08"
+Map "ctg1" Ends Left 338.000
+Map "ctg1" Ends Right 363.000 Oldctg 2
+Fp_number "E018O08"
+Gel_number    201824
+Bands  1117698 26
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0019B19"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 404.000 Oldctg 2
+Fp_number "E019B19"
+Gel_number    201931
+Bands  1119742 36
+Approximate_match_to_cosmid "a0049H15"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0019P13"
+Map "ctg1" Ends Left 464.000
+Map "ctg1" Ends Right 487.000 Oldctg 2
+Fp_number "E019P13"
+Gel_number    201934
+Bands  1127272 24
+Approximate_match_to_cosmid "a0062P11"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0020J05"
+Map "ctg1" Ends Left 35.000
+Map "ctg1" Ends Right 65.000 Oldctg 2
+Fp_number "E020J05"
+Gel_number    202033
+Bands  1132122 31
+Approximate_match_to_cosmid "b0065P01"
+Positive_STS "42D18r" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0020L08"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 401.000 Oldctg 2
+Fp_number "E020L08"
+Gel_number    202043
+Bands  1133341 33
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0021C19"
+Map "ctg1" Ends Left 279.000
+Map "ctg1" Ends Right 301.000 Oldctg 2
+Fp_number "E021C19"
+Gel_number    202111
+Bands  1137332 23
+Approximate_match_to_cosmid "b0007H05"
+Positive_STS "OJ990503_27" 
+Positive_OVERGO "SOG0307" 
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0021D08"
+Map "ctg1" Ends Left 75.000
+Map "ctg1" Ends Right 103.000 Oldctg 2
+Fp_number "E021D08"
+Gel_number    202141
+Bands  1137640 29
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0021G21"
+Map "ctg1" Ends Left 436.000
+Map "ctg1" Ends Right 465.000 Oldctg 2
+Fp_number "E021G21"
+Gel_number    202112
+Bands  1139669 30
+Approximate_match_to_cosmid "a0065C23"
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0021H16"
+Map "ctg1" Ends Left 119.000
+Map "ctg1" Ends Right 145.000 Oldctg 2
+Fp_number "E021H16"
+Gel_number    202142
+Bands  1140135 27
+Approximate_match_to_cosmid "a0066I06"
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0021O16"
+Map "ctg1" Ends Left 371.000
+Map "ctg1" Ends Right 403.000 Oldctg 2
+Fp_number "E021O16"
+Gel_number    202124
+Bands  1144139 33
+Approximate_match_to_cosmid "a0049H15"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0022B22"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 29.000 Oldctg 2
+Fp_number "E022B22"
+Gel_number    202241
+Bands  1145870 27
+Approximate_match_to_cosmid "a0070F12"
+Creation_date 99 11 12 17 9 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0022I18"
+Map "ctg1" Ends Left 410.000
+Map "ctg1" Ends Right 437.000 Oldctg 2
+Fp_number "E022I18"
+Gel_number    202223
+Bands  1149866 28
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0026K13"
+Map "ctg1" Ends Left 28.000
+Map "ctg1" Ends Right 59.000 Oldctg 2
+Fp_number "E026K13"
+Gel_number    202613
+Bands  1177062 32
+Positive_STS "42D18r" 
+Positive_OVERGO "SOG0961" 
+Positive_OVERGO "SOG2244" 
+Creation_date 99 11 12 17 9 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0027A06"
+Map "ctg1" Ends Left 147.000
+Map "ctg1" Ends Right 175.000 Oldctg 2
+Fp_number "E027A06"
+Gel_number    202721
+Bands  1180292 29
+Creation_date 100 2 18 18 17 
+Modified_date 100 2 23 10 44 
+  
+Clone : "b0027A18"
+Map "ctg1" Ends Left 19.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "E027A18"
+Gel_number    202721
+Bands  1180564 28
+Creation_date 100 2 18 18 17 
+Modified_date 101 4 11 16 55 
+  
+Clone : "b0027C05"
+Map "ctg1" Ends Left 545.000
+Map "ctg1" Ends Right 570.000 Oldctg 2
+Fp_number "E027C05"
+Gel_number    202711
+Bands  1181322 26
+Approximate_match_to_cosmid "b0095I21"
+Creation_date 100 2 18 18 17 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0027F23"
+Map "ctg1" Ends Left 331.000
+Map "ctg1" Ends Right 360.000 Oldctg 2
+Fp_number "E027F23"
+Gel_number    202732
+Bands  1183196 30
+Approximate_match_to_cosmid "a0093K20"
+Positive_OVERGO "SOG0657" 
+Creation_date 100 2 18 18 18 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0028E05"
+Map "ctg1" Ends Left 90.000
+Map "ctg1" Ends Right 124.000 Oldctg 2
+Fp_number "E028E05"
+Gel_number    202812
+Bands  1190321 35
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 17 9 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0029A01"
+Map "ctg1" Ends Left 50.000
+Map "ctg1" Ends Right 85.000 Oldctg 2
+Fp_number "E029A01"
+Gel_number    202911
+Bands  1196563 36
+Approximate_match_to_cosmid "a0053J14"
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 17 10 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0029A21"
+Map "ctg1" Ends Left 1.000
+Map "ctg1" Ends Right 30.000 Oldctg 2
+Fp_number "E029A21"
+Gel_number    202911
+Bands  1197081 30
+Creation_date 99 11 12 17 10 
+Modified_date 101 4 11 16 55 
+  
+Clone : "b0029B04"
+Map "ctg1" Ends Left 362.000
+Map "ctg1" Ends Right 395.000 Oldctg 2
+Fp_number "E029B04"
+Gel_number    202941
+Bands  1197254 34
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 17 10 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0029B16"
+Map "ctg1" Ends Left 227.000
+Map "ctg1" Ends Right 254.000 Oldctg 2
+Fp_number "E029B16"
+Gel_number    202941
+Bands  1197573 28
+Creation_date 99 11 12 17 10 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0029D12"
+Map "ctg1" Ends Left 521.000
+Map "ctg1" Ends Right 547.000 Oldctg 2
+Fp_number "E029D12"
+Gel_number    202941
+Bands  1198598 27
+Creation_date 99 11 12 17 10 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0029E05"
+Map "ctg1" Ends Left 264.000
+Map "ctg1" Ends Right 283.000 Oldctg 2
+Fp_number "E029E05"
+Gel_number    202912
+Bands  1198979 20
+Approximate_match_to_cosmid "a0085N03"
+Positive_STS "OJ000317_34" 
+Creation_date 99 11 12 17 10 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0029F18"
+Map "ctg1" Ends Left 315.000
+Map "ctg1" Ends Right 338.000 Oldctg 2
+Fp_number "E029F18"
+Gel_number    202942
+Bands  1199758 24
+Positive_STS "OJ991021_17" 
+Creation_date 99 11 12 17 10 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0030B05"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 503.000 Oldctg 2
+Fp_number "E030B05"
+Gel_number    203031
+Bands  1205762 20
+Exact_match_to_cosmid "b0087O07"
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0030E04"
+Map "ctg1" Ends Left 508.000
+Map "ctg1" Ends Right 533.000 Oldctg 2
+Fp_number "E030E04"
+Gel_number    203022
+Bands  1207382 26
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0031K18"
+Map "ctg1" Ends Left 437.000
+Map "ctg1" Ends Right 467.000 Oldctg 2
+Fp_number "E031K18"
+Gel_number    203123
+Bands  1219990 31
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0031L04"
+Map "ctg1" Ends Left 19.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "E031L04"
+Gel_number    203143
+Bands  1220257 28
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0032A21"
+Map "ctg1" Ends Left 506.000
+Map "ctg1" Ends Right 534.000 Oldctg 2
+Fp_number "E032A21"
+Gel_number    203211
+Bands  1223262 29
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0032A22"
+Map "ctg1" Ends Left 506.000
+Map "ctg1" Ends Right 534.000 Oldctg 2
+Fp_number "E032A22"
+Gel_number    203221
+Bands  1223291 29
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0032B06"
+Map "ctg1" Ends Left 604.000
+Map "ctg1" Ends Right 634.000 Oldctg 2
+Fp_number "E032B06"
+Gel_number    203241
+Bands  1223493 31
+Approximate_match_to_cosmid "b0033I06"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0032B10"
+Map "ctg1" Ends Left 559.000
+Map "ctg1" Ends Right 592.000 Oldctg 2
+Fp_number "E032B10"
+Gel_number    203241
+Bands  1223593 34
+Positive_OVERGO "SOG0279" 
+Positive_OVERGO "SOG0313" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0032E14"
+Map "ctg1" Ends Left 114.000
+Map "ctg1" Ends Right 143.000 Oldctg 2
+Fp_number "E032E14"
+Gel_number    203222
+Bands  1225535 30
+Exact_match_to_cosmid "b0048H24"
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0032J19"
+Map "ctg1" Ends Left 430.000
+Map "ctg1" Ends Right 463.000 Oldctg 2
+Fp_number "E032J19"
+Gel_number    203233
+Bands  1228438 34
+Positive_STS "OJ000214_10" 
+Positive_OVERGO "SOG0389" 
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0032O15"
+Map "ctg1" Ends Left 514.000
+Map "ctg1" Ends Right 540.000 Oldctg 2
+Fp_number "E032O15"
+Gel_number    203214
+Bands  1231029 27
+Positive_OVERGO "SOG1383" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0032P10"
+Map "ctg1" Ends Left 509.000
+Map "ctg1" Ends Right 533.000 Oldctg 2
+Fp_number "E032P10"
+Gel_number    203244
+Bands  1231495 25
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0033I06"
+Map "ctg1" Ends Left 601.000
+Map "ctg1" Ends Right 636.000 Oldctg 2
+Fp_number "E033I06"
+Gel_number    203323
+Bands  1236328 36
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0033L05"
+Map "ctg1" Ends Left 534.000
+Map "ctg1" Ends Right 563.000 Oldctg 2
+Fp_number "E033L05"
+Gel_number    203333
+Bands  1238077 30
+Approximate_match_to_cosmid "b0088P23"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0033O02"
+Map "ctg1" Ends Left 28.000
+Map "ctg1" Ends Right 58.000 Oldctg 2
+Fp_number "E033O02"
+Gel_number    203324
+Bands  1239771 31
+Exact_match_to_cosmid "b0026K13"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0033O15"
+Map "ctg1" Ends Left 149.000
+Map "ctg1" Ends Right 171.000 Oldctg 2
+Fp_number "E033O15"
+Gel_number    203314
+Bands  1240083 23
+Exact_match_to_cosmid "b0096L16"
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0034B22"
+Map "ctg1" Ends Left 216.000
+Map "ctg1" Ends Right 239.000 Oldctg 2
+Fp_number "E034B22"
+Gel_number    203441
+Bands  1241989 24
+Approximate_match_to_cosmid "a0083M05"
+Positive_STS "OJ000207_10" 
+Positive_OVERGO "SOG1176" 
+Positive_OVERGO "SOG1774" 
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0035D11"
+Map "ctg1" Ends Left 254.000
+Map "ctg1" Ends Right 279.000 Oldctg 2
+Fp_number "E035D11"
+Gel_number    203531
+Bands  1251090 26
+Approximate_match_to_cosmid "b0006M06"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0035E17"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 503.000 Oldctg 2
+Fp_number "E035E17"
+Gel_number    203512
+Bands  1251790 20
+Approximate_match_to_cosmid "b0001O16"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0035G22"
+Map "ctg1" Ends Left 238.000
+Map "ctg1" Ends Right 268.000 Oldctg 2
+Fp_number "E035G22"
+Gel_number    203522
+Bands  1252956 31
+Approximate_match_to_cosmid "a0083M16"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0036D04"
+Map "ctg1" Ends Left 515.000
+Map "ctg1" Ends Right 536.000 Oldctg 2
+Fp_number "E036D04"
+Gel_number    203641
+Bands  1259441 22
+Approximate_match_to_cosmid "b0032A21"
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0036O09"
+Map "ctg1" Ends Left 374.000
+Map "ctg1" Ends Right 402.000 Oldctg 2
+Fp_number "E036O09"
+Gel_number    203614
+Bands  1265631 29
+Approximate_match_to_cosmid "b0093C15"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0037K01"
+Map "ctg1" Ends Left 388.000
+Map "ctg1" Ends Right 403.000 Oldctg 2
+Fp_number "E037K01"
+Gel_number    203713
+Bands  1271360 16
+Approximate_match_to_cosmid "b0093C15"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0037K04"
+Map "ctg1" Ends Left 218.000
+Map "ctg1" Ends Right 247.000 Oldctg 2
+Fp_number "E037K04"
+Gel_number    203723
+Bands  1271428 30
+Positive_STS "S1001" 
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0038B06"
+Map "ctg1" Ends Left 159.000
+Map "ctg1" Ends Right 180.000 Oldctg 2
+Fp_number "E038B06"
+Gel_number    203841
+Bands  1274951 22
+Approximate_match_to_cosmid "a0061C05"
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0039F01"
+Map "ctg1" Ends Left 250.000
+Map "ctg1" Ends Right 277.000 Oldctg 2
+Fp_number "E039F01"
+Gel_number    203932
+Bands  1285003 28
+Creation_date 100 2 18 18 18 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0039P22"
+Map "ctg1" Ends Left 485.000
+Map "ctg1" Ends Right 505.000 Oldctg 2
+Fp_number "E039P22"
+Gel_number    203944
+Bands  1291198 21
+Positive_OVERGO "SOG1745" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0040B06"
+Map "ctg1" Ends Left 544.000
+Map "ctg1" Ends Right 569.000 Oldctg 2
+Fp_number "E040B06"
+Gel_number    204041
+Bands  1291886 26
+Approximate_match_to_cosmid "a0063H14"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0040F18"
+Map "ctg1" Ends Left 4.000
+Map "ctg1" Ends Right 36.000 Oldctg 2
+Fp_number "E040F18"
+Gel_number    204042
+Bands  1294331 33
+Positive_STS "OJ000323_02" 
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0040H01"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "E040H01"
+Gel_number    204032
+Bands  1295099 21
+Approximate_match_to_cosmid "b0001O16"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0040I02"
+Map "ctg1" Ends Left 33.000
+Map "ctg1" Ends Right 65.000 Oldctg 2
+Fp_number "E040I02"
+Gel_number    204023
+Bands  1295698 33
+Approximate_match_to_cosmid "b0065P01"
+Positive_STS "42D18r" 
+Creation_date 99 11 12 19 29 
+Modified_date 101 5 9 11 19 
+  
+Clone : "b0040M10"
+Map "ctg1" Ends Left 506.000
+Map "ctg1" Ends Right 533.000 Oldctg 2
+Fp_number "E040M10"
+Gel_number    204024
+Bands  1298080 28
+Approximate_match_to_cosmid "b0002J03"
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0041L18"
+Map "ctg1" Ends Left 159.000
+Map "ctg1" Ends Right 180.000 Oldctg 2
+Fp_number "E041L18"
+Gel_number    204143
+Bands  1305398 22
+Approximate_match_to_cosmid "a0061C05"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0043I08"
+Map "ctg1" Ends Left 238.000
+Map "ctg1" Ends Right 269.000 Oldctg 2
+Fp_number "E043I08"
+Gel_number    204323
+Bands  1319806 32
+Approximate_match_to_cosmid "a0083M16"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0044E16"
+Map "ctg1" Ends Left 19.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Fp_number "E044E16"
+Gel_number    204422
+Bands  1326029 28
+Positive_STS "OJ990915_01" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0044G05"
+Map "ctg1" Ends Left 102.000
+Map "ctg1" Ends Right 138.000 Oldctg 2
+Fp_number "E044G05"
+Gel_number    204412
+Bands  1326890 37
+Approximate_match_to_cosmid "b0091A24"
+Positive_STS "OJ990412_10" 
+Positive_STS "OJ991015_10" 
+Positive_STS "OJ991018_02" 
+Creation_date 99 11 12 19 29 
+Modified_date 104 4 9 18 13 
+  
+Clone : "b0044H22"
+Map "ctg1" Ends Left 89.000
+Map "ctg1" Ends Right 123.000 Oldctg 2
+Fp_number "E044H22"
+Gel_number    204442
+Bands  1327815 35
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0044K15"
+Map "ctg1" Ends Left 422.000
+Map "ctg1" Ends Right 456.000 Oldctg 2
+Fp_number "E044K15"
+Gel_number    204413
+Bands  1329090 35
+Approximate_match_to_cosmid "a0061L15"
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0044M05"
+Map "ctg1" Ends Left 32.000
+Map "ctg1" Ends Right 62.000 Oldctg 2
+Fp_number "E044M05"
+Gel_number    204414
+Bands  1329947 31
+Approximate_match_to_cosmid "b0065P01"
+Positive_STS "42D18r" 
+Positive_STS "OJ990605_38" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0044N09"
+Map "ctg1" Ends Left 543.000
+Map "ctg1" Ends Right 569.000 Oldctg 2
+Fp_number "E044N09"
+Gel_number    204434
+Bands  1330491 27
+Approximate_match_to_cosmid "b0083H05"
+Positive_STS "OJ000251_05" 
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0044P03"
+Map "ctg1" Ends Left 463.000
+Map "ctg1" Ends Right 490.000 Oldctg 2
+Fp_number "E044P03"
+Gel_number    204434
+Bands  1331453 28
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Creation_date 99 11 12 19 29 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0044P13"
+Map "ctg1" Ends Left 372.000
+Map "ctg1" Ends Right 402.000 Oldctg 2
+Fp_number "E044P13"
+Gel_number    204434
+Bands  1331670 31
+Approximate_match_to_cosmid "a0049H15"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0045F08"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 404.000 Oldctg 2
+Fp_number "E045F08"
+Gel_number    204542
+Bands  1334453 36
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0045P08"
+Map "ctg1" Ends Left 26.000
+Map "ctg1" Ends Right 60.000 Oldctg 2
+Fp_number "E045P08"
+Gel_number    204544
+Bands  1339300 35
+Positive_OVERGO "SOG2244" 
+Positive_STS "42D18r" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 28 17 14 
+  
+Clone : "b0046G04"
+Map "ctg1" Ends Left 284.000
+Map "ctg1" Ends Right 310.000 Oldctg 2
+Fp_number "E046G04"
+Gel_number    204622
+Bands  1342953 27
+Approximate_match_to_cosmid "b0007H05"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0046K18"
+Map "ctg1" Ends Left 317.000
+Map "ctg1" Ends Right 347.000 Oldctg 2
+Fp_number "E046K18"
+Gel_number    204623
+Bands  1345696 31
+Positive_OVERGO "SOG0657" 
+Creation_date 100 2 18 18 18 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0046O20"
+Map "ctg1" Ends Left 600.000
+Map "ctg1" Ends Right 630.000 Oldctg 2
+Fp_number "E046O20"
+Gel_number    204624
+Bands  1347881 31
+Approximate_match_to_cosmid "b0033I06"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0047A11"
+Map "ctg1" Ends Left 278.000
+Map "ctg1" Ends Right 301.000 Oldctg 2
+Fp_number "E047A11"
+Gel_number    204711
+Bands  1348820 24
+Positive_OVERGO "SOG0307" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0048G11"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 502.000 Oldctg 2
+Fp_number "E048G11"
+Gel_number    204812
+Bands  1360797 19
+Positive_OVERGO "SOG1745" 
+Positive_Probe "PRC0214" 
+Creation_date 99 11 12 19 29 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0048H12"
+Map "ctg1" Ends Left 300.000
+Map "ctg1" Ends Right 321.000 Oldctg 2
+Fp_number "E048H12"
+Gel_number    204842
+Bands  1361434 22
+Approximate_match_to_cosmid "b0062F24"
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0048H24"
+Map "ctg1" Ends Left 112.000
+Map "ctg1" Ends Right 143.000 Oldctg 2
+Fp_number "E048H24"
+Gel_number    204842
+Bands  1361692 32
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0049H13"
+Map "ctg1" Ends Left 260.000
+Map "ctg1" Ends Right 282.000 Oldctg 2
+Fp_number "E049H13"
+Gel_number    204932
+Bands  1370309 23
+Creation_date 99 11 12 19 29 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0050B15"
+Map "ctg1" Ends Left 370.000
+Map "ctg1" Ends Right 401.000 Oldctg 2
+Fp_number "E050B15"
+Gel_number    205031
+Bands  1376001 32
+Approximate_match_to_cosmid "a0049H15"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 12 22 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0050H04"
+Map "ctg1" Ends Left 425.000
+Map "ctg1" Ends Right 456.000 Oldctg 2
+Fp_number "E050H04"
+Gel_number    205042
+Bands  1378959 32
+Approximate_match_to_cosmid "b0057J05"
+Positive_STS "OJ000214_10" 
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG0389" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 22 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0051D10"
+Map "ctg1" Ends Left 497.000
+Map "ctg1" Ends Right 514.000 Oldctg 2
+Fp_number "E051D10"
+Gel_number    205141
+Bands  1385709 18
+Positive_STS "OJ990820_14" 
+Positive_OVERGO "SOG2238" 
+Creation_date 99 11 12 22 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0051N14"
+Map "ctg1" Ends Left 254.000
+Map "ctg1" Ends Right 279.000 Oldctg 2
+Fp_number "E051N14"
+Gel_number    205144
+Bands  1390764 26
+Positive_OVERGO "SOG1401" 
+Creation_date 99 11 12 22 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0052B16"
+Map "ctg1" Ends Left 515.000
+Map "ctg1" Ends Right 533.000 Oldctg 2
+Fp_number "E052B16"
+Gel_number    205241
+Bands  1392594 19
+Exact_match_to_cosmid "b0030E04"
+Creation_date 100 2 18 18 18 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0052K20"
+Map "ctg1" Ends Left 147.000
+Map "ctg1" Ends Right 175.000 Oldctg 2
+Fp_number "E052K20"
+Gel_number    205223
+Bands  1397544 29
+Positive_OVERGO "SOG0127" 
+Positive_OVERGO "SOG1225" 
+Creation_date 100 2 18 18 18 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0053P10"
+Map "ctg1" Ends Left 219.000
+Map "ctg1" Ends Right 236.000 Oldctg 2
+Fp_number "E053P10"
+Gel_number    205344
+Bands  1408007 18
+Exact_match_to_cosmid "b0037K04"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0053P12"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 406.000 Oldctg 2
+Fp_number "E053P12"
+Gel_number    205344
+Bands  1408049 38
+Positive_STS "OJ000221_13" 
+Positive_STS "OJ000350_20" 
+Creation_date 99 11 12 22 58 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0054A15"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "E054A15"
+Gel_number    205411
+Bands  1408710 21
+Exact_match_to_cosmid "b0001O16"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0057E10"
+Map "ctg1" Ends Left 604.000
+Map "ctg1" Ends Right 639.000 Oldctg 2
+Fp_number "E057E10"
+Gel_number    205722
+Bands  1432200 36
+Positive_OVERGO "SOG0252" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0057J05"
+Map "ctg1" Ends Left 414.000
+Map "ctg1" Ends Right 452.000 Oldctg 2
+Fp_number "E057J05"
+Gel_number    205733
+Bands  1434710 39
+Positive_OVERGO "SOG0389" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0057P12"
+Map "ctg1" Ends Left 96.000
+Map "ctg1" Ends Right 121.000 Oldctg 2
+Fp_number "E057P12"
+Gel_number    205744
+Bands  1437870 26
+Approximate_match_to_cosmid "b0044H22"
+Positive_STS "OJ991015_10" 
+Creation_date 99 11 12 22 58 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0058B12"
+Map "ctg1" Ends Left 508.000
+Map "ctg1" Ends Right 533.000 Oldctg 2
+Fp_number "E058B12"
+Gel_number    205841
+Bands  1438797 26
+Approximate_match_to_cosmid "b0002J03"
+Positive_STS "OJ990713_06" 
+Creation_date 99 11 12 22 58 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0058D05"
+Map "ctg1" Ends Left 426.000
+Map "ctg1" Ends Right 461.000 Oldctg 2
+Fp_number "E058D05"
+Gel_number    205831
+Bands  1439861 36
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0058N15"
+Map "ctg1" Ends Left 109.000
+Map "ctg1" Ends Right 138.000 Oldctg 2
+Fp_number "E058N15"
+Gel_number    205834
+Bands  1444949 30
+Approximate_match_to_cosmid "a0018B22"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 12 22 58 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0059H03"
+Map "ctg1" Ends Left 284.000
+Map "ctg1" Ends Right 301.000 Oldctg 2
+Fp_number "E059H03"
+Gel_number    205932
+Bands  1449650 18
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0059I06"
+Map "ctg1" Ends Left 117.000
+Map "ctg1" Ends Right 147.000 Oldctg 2
+Fp_number "E059I06"
+Gel_number    205923
+Bands  1449993 31
+Creation_date 99 11 12 22 58 
+Modified_date 99 11 12 23 38 
+  
+Clone : "b0060G19"
+Map "ctg1" Ends Left 155.000
+Map "ctg1" Ends Right 180.000 Oldctg 2
+Fp_number "E060G19"
+Gel_number    206012
+Bands  1456508 26
+Approximate_match_to_cosmid "a0061C05"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0060I06"
+Map "ctg1" Ends Left 149.000
+Map "ctg1" Ends Right 176.000 Oldctg 2
+Fp_number "E060I06"
+Gel_number    206023
+Bands  1457190 28
+Positive_STS "OJ000301_13" 
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG1670" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0062F24"
+Map "ctg1" Ends Left 292.000
+Map "ctg1" Ends Right 320.000 Oldctg 2
+Fp_number "E062F24"
+Gel_number    206242
+Bands  1472133 29
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0062O20"
+Map "ctg1" Ends Left 453.000
+Map "ctg1" Ends Right 484.000 Oldctg 2
+Fp_number "E062O20"
+Gel_number    206224
+Bands  1477118 32
+Approximate_match_to_cosmid "b0006L19"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0063D23"
+Map "ctg1" Ends Left 615.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Fp_number "E063D23"
+Gel_number    206331
+Bands  1479687 29
+Exact_match_to_cosmid "b0005L08"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0063E10"
+Map "ctg1" Ends Left 161.000
+Map "ctg1" Ends Right 179.000 Oldctg 2
+Fp_number "E063E10"
+Gel_number    206322
+Bands  1479942 19
+Approximate_match_to_cosmid "a0061C05"
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 12 22 58 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0063H06"
+Map "ctg1" Ends Left 515.000
+Map "ctg1" Ends Right 536.000 Oldctg 2
+Fp_number "E063H06"
+Gel_number    206342
+Bands  1481490 22
+Exact_match_to_cosmid "b0032O15"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0063M13"
+Map "ctg1" Ends Left 62.000
+Map "ctg1" Ends Right 89.000 Oldctg 2
+Fp_number "E063M13"
+Gel_number    206314
+Bands  1484095 28
+Approximate_match_to_cosmid "a0082H14"
+Positive_STS "OJ990605_38" 
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0064G15"
+Map "ctg1" Ends Left 602.000
+Map "ctg1" Ends Right 639.000 Oldctg 2
+Fp_number "E064G15"
+Gel_number    206412
+Bands  1489389 38
+Creation_date 100 2 18 18 19 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0065E14"
+Map "ctg1" Ends Left 107.000
+Map "ctg1" Ends Right 144.000 Oldctg 2
+Fp_number "E065E14"
+Gel_number    206522
+Bands  1496264 38
+Approximate_match_to_cosmid "a0051O20"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0065F10"
+Map "ctg1" Ends Left 319.000
+Map "ctg1" Ends Right 339.000 Oldctg 2
+Fp_number "E065F10"
+Gel_number    206542
+Bands  1496711 21
+Approximate_match_to_cosmid "a0060H05"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0065H04"
+Map "ctg1" Ends Left 53.000
+Map "ctg1" Ends Right 84.000 Oldctg 2
+Fp_number "E065H04"
+Gel_number    206542
+Bands  1497505 32
+Approximate_match_to_cosmid "b0008G08"
+Creation_date 99 11 12 22 58 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0065P01"
+Map "ctg1" Ends Left 32.000
+Map "ctg1" Ends Right 76.000 Oldctg 2
+Fp_number "E065P01"
+Gel_number    206534
+Bands  1501474 45
+Positive_OVERGO "SOG2244" 
+Creation_date 100 2 18 18 19 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0066F04"
+Map "ctg1" Ends Left 473.000
+Map "ctg1" Ends Right 501.000 Oldctg 2
+Fp_number "E066F04"
+Gel_number    206642
+Bands  1504476 29
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991210_12" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0066J19"
+Map "ctg1" Ends Left 222.000
+Map "ctg1" Ends Right 255.000 Oldctg 2
+Fp_number "E066J19"
+Gel_number    206633
+Bands  1506843 34
+Positive_OVERGO "SOG1774" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0068A07"
+Map "ctg1" Ends Left 463.000
+Map "ctg1" Ends Right 490.000 Oldctg 2
+Fp_number "E068A07"
+Gel_number    206811
+Bands  1517437 28
+Approximate_match_to_cosmid "b0044P03"
+Positive_OVERGO "SOG1200" 
+Creation_date 100 7 8 18 35 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0069J02"
+Map "ctg1" Ends Left 515.000
+Map "ctg1" Ends Right 534.000 Oldctg 2
+Fp_number "E069J02"
+Gel_number    206943
+Bands  1529281 20
+Approximate_match_to_cosmid "a0003C15"
+Creation_date 100 2 18 18 19 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0069M06"
+Map "ctg1" Ends Left 387.000
+Map "ctg1" Ends Right 420.000 Oldctg 2
+Fp_number "E069M06"
+Gel_number    206924
+Bands  1530818 34
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Positive_OVERGO "SOG0223" 
+Creation_date 99 11 12 22 58 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0070B17"
+Map "ctg1" Ends Left 4.000
+Map "ctg1" Ends Right 37.000 Oldctg 2
+Fp_number "E070B17"
+Gel_number    207031
+Bands  1533535 34
+Creation_date 100 2 18 18 19 
+Modified_date 101 4 11 16 55 
+  
+Clone : "b0070C06"
+Map "ctg1" Ends Left 201.000
+Map "ctg1" Ends Right 218.000 Oldctg 2
+Fp_number "E070C06"
+Gel_number    207021
+Bands  1533806 18
+Approximate_match_to_cosmid "a0002N24"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0071G06"
+Map "ctg1" Ends Left 251.000
+Map "ctg1" Ends Right 277.000 Oldctg 2
+Fp_number "E071G06"
+Gel_number    207122
+Bands  1544433 27
+Approximate_match_to_cosmid "b0003G18"
+Creation_date 100 2 18 18 19 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0072A10"
+Map "ctg1" Ends Left 232.000
+Map "ctg1" Ends Right 257.000 Oldctg 2
+Fp_number "E072A10"
+Gel_number    207221
+Bands  1549409 26
+Positive_OVERGO "SOG1176" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0072G05"
+Map "ctg1" Ends Left 354.000
+Map "ctg1" Ends Right 386.000 Oldctg 2
+Fp_number "E072G05"
+Gel_number    207212
+Bands  1552101 33
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0072N12"
+Map "ctg1" Ends Left 504.000
+Map "ctg1" Ends Right 529.000 Oldctg 2
+Fp_number "E072N12"
+Gel_number    207244
+Bands  1556347 26
+Approximate_match_to_cosmid "b0002J03"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0073F02"
+Map "ctg1" Ends Left 569.000
+Map "ctg1" Ends Right 599.000 Oldctg 2
+Fp_number "E073F02"
+Gel_number    207342
+Bands  1558329 31
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0073N09"
+Map "ctg1" Ends Left 238.000
+Map "ctg1" Ends Right 262.000 Oldctg 2
+Fp_number "E073N09"
+Gel_number    207334
+Bands  1562579 25
+Approximate_match_to_cosmid "a0083M16"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0074B19"
+Map "ctg1" Ends Left 29.000
+Map "ctg1" Ends Right 59.000 Oldctg 2
+Fp_number "E074B19"
+Gel_number    207431
+Bands  1565322 31
+Approximate_match_to_cosmid "b0095L23"
+Positive_OVERGO "SOG0961" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0074F15"
+Map "ctg1" Ends Left 18.000
+Map "ctg1" Ends Right 48.000 Oldctg 2
+Fp_number "E074F15"
+Gel_number    207432
+Bands  1567753 31
+Approximate_match_to_cosmid "a0060C20"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0074I20"
+Map "ctg1" Ends Left 344.000
+Map "ctg1" Ends Right 369.000 Oldctg 2
+Fp_number "E074I20"
+Gel_number    207423
+Bands  1569785 26
+Exact_match_to_cosmid "a0089D17"
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0074P13"
+Map "ctg1" Ends Left 179.000
+Map "ctg1" Ends Right 221.000 Oldctg 2
+Fp_number "E074P13"
+Gel_number    207434
+Bands  1574002 43
+Positive_STS "OJ990407_02" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0074P17"
+Map "ctg1" Ends Left 317.000
+Map "ctg1" Ends Right 346.000 Oldctg 2
+Fp_number "E074P17"
+Gel_number    207434
+Bands  1574133 30
+Approximate_match_to_cosmid "b0046K18"
+Creation_date 99 11 13 0 57 
+Modified_date 101 5 9 11 19 
+  
+Clone : "b0076P03"
+Map "ctg1" Ends Left 93.000
+Map "ctg1" Ends Right 127.000 Oldctg 2
+Fp_number "E076P03"
+Gel_number    207634
+Bands  1590425 35
+Creation_date 100 2 18 18 20 
+Modified_date 100 2 23 12 7 
+  
+Clone : "b0077L14"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 503.000 Oldctg 2
+Fp_number "E077L14"
+Gel_number    207743
+Bands  1596644 20
+Approximate_match_to_cosmid "b0001O16"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0078C03"
+Map "ctg1" Ends Left 534.000
+Map "ctg1" Ends Right 563.000 Oldctg 2
+Fp_number "E078C03"
+Gel_number    207811
+Bands  1599910 30
+Approximate_match_to_cosmid "b0088P23"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0078C04"
+Map "ctg1" Ends Left 356.000
+Map "ctg1" Ends Right 388.000 Oldctg 2
+Fp_number "E078C04"
+Gel_number    207821
+Bands  1599940 33
+Approximate_match_to_cosmid "a0073L24"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0078G10"
+Map "ctg1" Ends Left 1.000
+Map "ctg1" Ends Right 31.000 Oldctg 2
+Fp_number "E078G10"
+Gel_number    207822
+Bands  1602367 31
+Exact_match_to_cosmid "b0081H12"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0079E11"
+Map "ctg1" Ends Left 117.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "E079E11"
+Gel_number    207912
+Bands  1609875 30
+Approximate_match_to_cosmid "a0066I06"
+Positive_OVERGO "SOG0961" 
+Creation_date 100 2 18 18 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0079F04"
+Map "ctg1" Ends Left 439.000
+Map "ctg1" Ends Right 465.000 Oldctg 2
+Fp_number "E079F04"
+Gel_number    207942
+Bands  1610280 27
+Approximate_match_to_cosmid "b0031K18"
+Positive_OVERGO "SOG1200" 
+Positive_OVERGO "SOG1508" 
+Creation_date 100 2 18 18 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0079F11"
+Map "ctg1" Ends Left 439.000
+Map "ctg1" Ends Right 471.000 Oldctg 2
+Fp_number "E079F11"
+Gel_number    207932
+Bands  1610420 33
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0079K07"
+Map "ctg1" Ends Left 312.000
+Map "ctg1" Ends Right 334.000 Oldctg 2
+Fp_number "E079K07"
+Gel_number    207913
+Bands  1613317 23
+Approximate_match_to_cosmid "a0058C01"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0079L02"
+Map "ctg1" Ends Left 320.000
+Map "ctg1" Ends Right 350.000 Oldctg 2
+Fp_number "E079L02"
+Gel_number    207943
+Bands  1613837 31
+Approximate_match_to_cosmid "a0023L15"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0080E04"
+Map "ctg1" Ends Left 362.000
+Map "ctg1" Ends Right 395.000 Oldctg 2
+Fp_number "E080E04"
+Gel_number    208022
+Bands  1618991 34
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0080J15"
+Map "ctg1" Ends Left 483.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "E080J15"
+Gel_number    208033
+Bands  1622338 22
+Positive_OVERGO "SOG1745" 
+Creation_date 100 2 18 18 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0080M24"
+Map "ctg1" Ends Left 251.000
+Map "ctg1" Ends Right 277.000 Oldctg 2
+Fp_number "E080M24"
+Gel_number    208024
+Bands  1624239 27
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0080P18"
+Map "ctg1" Ends Left 169.000
+Map "ctg1" Ends Right 201.000 Oldctg 2
+Fp_number "E080P18"
+Gel_number    208044
+Bands  1625931 33
+Creation_date 100 2 18 18 20 
+Modified_date 101 5 9 11 19 
+  
+Clone : "b0081E09"
+Map "ctg1" Ends Left 484.000
+Map "ctg1" Ends Right 500.000 Oldctg 2
+Fp_number "E081E09"
+Gel_number    208112
+Bands  1628690 17
+Exact_match_to_cosmid "b0081N07"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081F15"
+Map "ctg1" Ends Left 175.000
+Map "ctg1" Ends Right 211.000 Oldctg 2
+Fp_number "E081F15"
+Gel_number    208132
+Bands  1629477 37
+Approximate_match_to_cosmid "b0074P13"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081G19"
+Map "ctg1" Ends Left 3.000
+Map "ctg1" Ends Right 31.000 Oldctg 2
+Fp_number "E081G19"
+Gel_number    208112
+Bands  1630233 29
+Approximate_match_to_cosmid "b0092L16"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081H12"
+Map "ctg1" Ends Left 0.000
+Map "ctg1" Ends Right 32.000 Oldctg 2
+Fp_number "E081H12"
+Gel_number    208142
+Bands  1630647 33
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081J21"
+Map "ctg1" Ends Left 119.000
+Map "ctg1" Ends Right 146.000 Oldctg 2
+Fp_number "E081J21"
+Gel_number    208133
+Bands  1632152 28
+Approximate_match_to_cosmid "a0088C05"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081M20"
+Map "ctg1" Ends Left 560.000
+Map "ctg1" Ends Right 592.000 Oldctg 2
+Fp_number "E081M20"
+Gel_number    208124
+Bands  1634059 33
+Positive_OVERGO "SOG0313" 
+Positive_OVERGO "SOG0316" 
+Creation_date 100 2 18 18 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0081M22"
+Map "ctg1" Ends Left 476.000
+Map "ctg1" Ends Right 499.000 Oldctg 2
+Fp_number "E081M22"
+Gel_number    208124
+Bands  1634092 24
+Approximate_match_to_cosmid "b0066F04"
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0081N07"
+Map "ctg1" Ends Left 481.000
+Map "ctg1" Ends Right 504.000 Oldctg 2
+Fp_number "E081N07"
+Gel_number    208134
+Bands  1634301 24
+Positive_OVERGO "SOG1200" 
+Creation_date 100 2 18 18 20 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0082F04"
+Map "ctg1" Ends Left 4.000
+Map "ctg1" Ends Right 34.000 Oldctg 2
+Fp_number "E082F04"
+Gel_number    208242
+Bands  1638867 31
+Approximate_match_to_cosmid "a0070F12"
+Positive_OVERGO "SOG0961" 
+Creation_date 100 2 18 18 21 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0082H15"
+Map "ctg1" Ends Left 523.000
+Map "ctg1" Ends Right 551.000 Oldctg 2
+Fp_number "E082H15"
+Gel_number    208232
+Bands  1640439 29
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0082I08"
+Map "ctg1" Ends Left 576.000
+Map "ctg1" Ends Right 604.000 Oldctg 2
+Fp_number "E082I08"
+Gel_number    208223
+Bands  1640796 29
+Creation_date 100 2 18 18 20 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0083C17"
+Map "ctg1" Ends Left 237.000
+Map "ctg1" Ends Right 268.000 Oldctg 2
+Fp_number "E083C17"
+Gel_number    208311
+Bands  1646756 32
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0083F19"
+Map "ctg1" Ends Left 44.000
+Map "ctg1" Ends Right 73.000 Oldctg 2
+Fp_number "E083F19"
+Gel_number    208332
+Bands  1648183 30
+Approximate_match_to_cosmid "b0065P01"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0083H05"
+Map "ctg1" Ends Left 543.000
+Map "ctg1" Ends Right 569.000 Oldctg 2
+Fp_number "E083H05"
+Gel_number    208332
+Bands  1648896 27
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0083L21"
+Map "ctg1" Ends Left 305.000
+Map "ctg1" Ends Right 333.000 Oldctg 2
+Fp_number "E083L21"
+Gel_number    208333
+Bands  1651488 29
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0083O07"
+Map "ctg1" Ends Left 37.000
+Map "ctg1" Ends Right 67.000 Oldctg 2
+Fp_number "E083O07"
+Gel_number    208314
+Bands  1653039 31
+Approximate_match_to_cosmid "b0065P01"
+Positive_OVERGO "SOG2244" 
+Creation_date 100 2 18 18 21 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0084D17"
+Map "ctg1" Ends Left 318.000
+Map "ctg1" Ends Right 345.000 Oldctg 2
+Fp_number "E084D17"
+Gel_number    208431
+Bands  1656246 28
+Approximate_match_to_cosmid "b0046K18"
+Positive_OVERGO "SOG0657" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0084E17"
+Map "ctg1" Ends Left 175.000
+Map "ctg1" Ends Right 212.000 Oldctg 2
+Fp_number "E084E17"
+Gel_number    208412
+Bands  1656869 38
+Positive_STS "OJ000301_13" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0084G04"
+Map "ctg1" Ends Left 300.000
+Map "ctg1" Ends Right 322.000 Oldctg 2
+Fp_number "E084G04"
+Gel_number    208422
+Bands  1657728 23
+Approximate_match_to_cosmid "a0093L07"
+Positive_STS "OJ990503_27" 
+Positive_STS "OJ991021_17" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0084N19"
+Map "ctg1" Ends Left 37.000
+Map "ctg1" Ends Right 66.000 Oldctg 2
+Fp_number "E084N19"
+Gel_number    208434
+Bands  1662416 30
+Approximate_match_to_cosmid "b0095L23"
+Positive_OVERGO "SOG2232" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0084N21"
+Map "ctg1" Ends Left 427.000
+Map "ctg1" Ends Right 456.000 Oldctg 2
+Fp_number "E084N21"
+Gel_number    208434
+Bands  1662471 30
+Approximate_match_to_cosmid "b0057J05"
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0085B18"
+Map "ctg1" Ends Left 447.000
+Map "ctg1" Ends Right 474.000 Oldctg 2
+Fp_number "E085B18"
+Gel_number    208541
+Bands  1664826 28
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0085C01"
+Map "ctg1" Ends Left 555.000
+Map "ctg1" Ends Right 577.000 Oldctg 2
+Fp_number "E085C01"
+Gel_number    208511
+Bands  1664999 23
+Approximate_match_to_cosmid "a0063H14"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0085E02"
+Map "ctg1" Ends Left 87.000
+Map "ctg1" Ends Right 118.000 Oldctg 2
+Fp_number "E085E02"
+Gel_number    208522
+Bands  1666285 32
+Approximate_match_to_cosmid "a0082D09"
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0085J15"
+Map "ctg1" Ends Left 577.000
+Map "ctg1" Ends Right 604.000 Oldctg 2
+Fp_number "E085J15"
+Gel_number    208533
+Bands  1669707 28
+Positive_STS "OJ000301_05" 
+Positive_OVERGO "SOG0279" 
+Positive_OVERGO "SOG0948" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0086F12"
+Map "ctg1" Ends Left 610.000
+Map "ctg1" Ends Right 640.000 Oldctg 2
+Fp_number "E086F12"
+Gel_number    208642
+Bands  1675115 31
+Approximate_match_to_cosmid "b0005L08"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0086F17"
+Map "ctg1" Ends Left 150.000
+Map "ctg1" Ends Right 177.000 Oldctg 2
+Fp_number "E086F17"
+Gel_number    208632
+Bands  1675235 28
+Positive_OVERGO "SOG1225" 
+Creation_date 100 2 18 18 21 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0087C20"
+Map "ctg1" Ends Left 592.000
+Map "ctg1" Ends Right 623.000 Oldctg 2
+Fp_number "E087C20"
+Gel_number    208721
+Bands  1682563 32
+Approximate_match_to_cosmid "a0091H17"
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0087C23"
+Map "ctg1" Ends Left 210.000
+Map "ctg1" Ends Right 235.000 Oldctg 2
+Fp_number "E087C23"
+Gel_number    208711
+Bands  1682645 26
+Exact_match_to_cosmid "a0052N06"
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0087G24"
+Map "ctg1" Ends Left 226.000
+Map "ctg1" Ends Right 257.000 Oldctg 2
+Fp_number "E087G24"
+Gel_number    208722
+Bands  1685217 32
+Approximate_match_to_cosmid "a0070K16"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0087J21"
+Map "ctg1" Ends Left 151.000
+Map "ctg1" Ends Right 181.000 Oldctg 2
+Fp_number "E087J21"
+Gel_number    208733
+Bands  1686952 31
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0087M11"
+Map "ctg1" Ends Left 460.000
+Map "ctg1" Ends Right 487.000 Oldctg 2
+Fp_number "E087M11"
+Gel_number    208714
+Bands  1688588 28
+Positive_STS "OJ000214_13" 
+Positive_STS "OJ990428_07" 
+Positive_STS "OJ991215_12" 
+Positive_OVERGO "SOG1200" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0087O07"
+Map "ctg1" Ends Left 483.000
+Map "ctg1" Ends Right 505.000 Oldctg 2
+Fp_number "E087O07"
+Gel_number    208714
+Bands  1689706 23
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0087O21"
+Map "ctg1" Ends Left 415.000
+Map "ctg1" Ends Right 449.000 Oldctg 2
+Fp_number "E087O21"
+Gel_number    208714
+Bands  1690037 35
+Approximate_match_to_cosmid "a0087L03"
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG1508" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0088M23"
+Map "ctg1" Ends Left 318.000
+Map "ctg1" Ends Right 347.000 Oldctg 2
+Fp_number "E088M23"
+Gel_number    208814
+Bands  1698762 30
+Approximate_match_to_cosmid "b0046K18"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0088P23"
+Map "ctg1" Ends Left 534.000
+Map "ctg1" Ends Right 567.000 Oldctg 2
+Fp_number "E088P23"
+Gel_number    208834
+Bands  1700725 34
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0089C07"
+Map "ctg1" Ends Left 36.000
+Map "ctg1" Ends Right 66.000 Oldctg 2
+Fp_number "E089C07"
+Gel_number    208911
+Bands  1702298 31
+Approximate_match_to_cosmid "b0065P01"
+Positive_OVERGO "SOG2244" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0090I01"
+Map "ctg1" Ends Left 272.000
+Map "ctg1" Ends Right 297.000 Oldctg 2
+Fp_number "E090I01"
+Gel_number    209013
+Bands  1715433 26
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0090K10"
+Map "ctg1" Ends Left 368.000
+Map "ctg1" Ends Right 404.000 Oldctg 2
+Fp_number "E090K10"
+Gel_number    209023
+Bands  1716859 37
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 56 
+  
+Clone : "b0090O14"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 403.000 Oldctg 2
+Fp_number "E090O14"
+Gel_number    209024
+Bands  1719367 35
+Approximate_match_to_cosmid "a0049H15"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0091A24"
+Map "ctg1" Ends Left 99.000
+Map "ctg1" Ends Right 139.000 Oldctg 2
+Fp_number "E091A24"
+Gel_number    209121
+Bands  1720701 41
+Positive_STS "OJ990412_10" 
+Creation_date 99 11 13 0 57 
+Modified_date 104 4 9 18 13 
+  
+Clone : "b0091B14"
+Map "ctg1" Ends Left 103.000
+Map "ctg1" Ends Right 140.000 Oldctg 2
+Fp_number "E091B14"
+Gel_number    209141
+Bands  1721107 38
+Approximate_match_to_cosmid "b0091A24"
+Creation_date 99 11 13 0 57 
+Modified_date 104 4 9 18 13 
+  
+Clone : "b0091L22"
+Map "ctg1" Ends Left 229.000
+Map "ctg1" Ends Right 261.000 Oldctg 2
+Fp_number "E091L22"
+Gel_number    209143
+Bands  1727635 33
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0092A02"
+Map "ctg1" Ends Left 515.000
+Map "ctg1" Ends Right 540.000 Oldctg 2
+Fp_number "E092A02"
+Gel_number    209221
+Bands  1730275 26
+Approximate_match_to_cosmid "b0032O15"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0092C14"
+Map "ctg1" Ends Left 250.000
+Map "ctg1" Ends Right 271.000 Oldctg 2
+Fp_number "E092C14"
+Gel_number    209221
+Bands  1731811 22
+Approximate_match_to_cosmid "a0087A15"
+Positive_STS "OJ000317_34" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0092H03"
+Map "ctg1" Ends Left 591.000
+Map "ctg1" Ends Right 623.000 Oldctg 2
+Fp_number "E092H03"
+Gel_number    209232
+Bands  1734730 33
+Approximate_match_to_cosmid "a0091H17"
+Positive_STS "OJ000321_15" 
+Positive_STS "OJ000330_31" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0092L16"
+Map "ctg1" Ends Left 0.000
+Map "ctg1" Ends Right 34.000 Oldctg 2
+Fp_number "E092L16"
+Gel_number    209243
+Bands  1737668 35
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0092L22"
+Map "ctg1" Ends Left 371.000
+Map "ctg1" Ends Right 404.000 Oldctg 2
+Fp_number "E092L22"
+Gel_number    209243
+Bands  1737824 34
+Approximate_match_to_cosmid "b0053P12"
+Positive_STS "OJ000350_20" 
+Positive_STS "OJ991201_11" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0092N13"
+Map "ctg1" Ends Left 146.000
+Map "ctg1" Ends Right 173.000 Oldctg 2
+Fp_number "E092N13"
+Gel_number    209234
+Bands  1738815 28
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0093C15"
+Map "ctg1" Ends Left 370.000
+Map "ctg1" Ends Right 408.000 Oldctg 2
+Fp_number "E093C15"
+Gel_number    209311
+Bands  1742105 39
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0093I19"
+Map "ctg1" Ends Left 306.000
+Map "ctg1" Ends Right 333.000 Oldctg 2
+Fp_number "E093I19"
+Gel_number    209313
+Bands  1746161 28
+Positive_STS "OJ990503_27" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0094A14"
+Map "ctg1" Ends Left 229.000
+Map "ctg1" Ends Right 260.000 Oldctg 2
+Fp_number "E094A14"
+Gel_number    209421
+Bands  1751276 32
+Approximate_match_to_cosmid "b0091L22"
+Positive_STS "OJ000207_10" 
+Positive_STS "OJ000306_03" 
+Positive_STS "OJ990325_01" 
+Creation_date 99 11 13 0 57 
+Modified_date 102 2 1 22 48 
+  
+Clone : "b0094B10"
+Map "ctg1" Ends Left 276.000
+Map "ctg1" Ends Right 301.000 Oldctg 2
+Fp_number "E094B10"
+Gel_number    209441
+Bands  1751777 26
+Positive_OVERGO "SOG0307" 
+Positive_OVERGO "SOG0961" 
+Creation_date 100 2 18 18 21 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0094D11"
+Map "ctg1" Ends Left 256.000
+Map "ctg1" Ends Right 281.000 Oldctg 2
+Fp_number "E094D11"
+Gel_number    209431
+Bands  1753106 26
+Positive_OVERGO "SOG1077" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0094L15"
+Map "ctg1" Ends Left 178.000
+Map "ctg1" Ends Right 218.000 Oldctg 2
+Fp_number "E094L15"
+Gel_number    209433
+Bands  1758047 41
+Approximate_match_to_cosmid "b0074P13"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0094O09"
+Map "ctg1" Ends Left 150.000
+Map "ctg1" Ends Right 175.000 Oldctg 2
+Fp_number "E094O09"
+Gel_number    209414
+Bands  1759895 26
+Approximate_match_to_cosmid "b0060I06"
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0095I21"
+Map "ctg1" Ends Left 545.000
+Map "ctg1" Ends Right 570.000 Oldctg 2
+Fp_number "E095I21"
+Gel_number    209513
+Bands  1766183 26
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0095K22"
+Map "ctg1" Ends Left 595.000
+Map "ctg1" Ends Right 628.000 Oldctg 2
+Fp_number "E095K22"
+Gel_number    209523
+Bands  1767469 34
+Approximate_match_to_cosmid "a0091H17"
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0095L23"
+Map "ctg1" Ends Left 28.000
+Map "ctg1" Ends Right 62.000 Oldctg 2
+Fp_number "E095L23"
+Gel_number    209533
+Bands  1768027 35
+Creation_date 100 2 18 18 21 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0096D16"
+Map "ctg1" Ends Left 264.000
+Map "ctg1" Ends Right 287.000 Oldctg 2
+Fp_number "E096D16"
+Gel_number    209641
+Bands  1772858 24
+Positive_OVERGO "SOG1450" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0096I21"
+Map "ctg1" Ends Left 156.000
+Map "ctg1" Ends Right 185.000 Oldctg 2
+Fp_number "E096I21"
+Gel_number    209613
+Bands  1776037 30
+Approximate_match_to_cosmid "a0061C05"
+Creation_date 99 11 13 0 57 
+Modified_date 101 4 11 16 57 
+  
+Clone : "b0096L08"
+Map "ctg1" Ends Left 141.000
+Map "ctg1" Ends Right 163.000 Oldctg 2
+Fp_number "E096L08"
+Gel_number    209643
+Bands  1777565 23
+Approximate_match_to_cosmid "a0095O20"
+Positive_OVERGO "SOG0127" 
+Positive_OVERGO "SOG1225" 
+Positive_OVERGO "SOG1231" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0096L16"
+Map "ctg1" Ends Left 148.000
+Map "ctg1" Ends Right 172.000 Oldctg 2
+Fp_number "E096L16"
+Gel_number    209643
+Bands  1777770 25
+Positive_STS "OJ990407_02" 
+Positive_OVERGO "SOG0127" 
+Positive_OVERGO "SOG1225" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0096N17"
+Map "ctg1" Ends Left 55.000
+Map "ctg1" Ends Right 87.000 Oldctg 2
+Fp_number "E096N17"
+Gel_number    209634
+Bands  1779108 33
+Positive_OVERGO "SOG1344" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0096P10"
+Map "ctg1" Ends Left 206.000
+Map "ctg1" Ends Right 237.000 Oldctg 2
+Fp_number "E096P10"
+Gel_number    209644
+Bands  1780284 32
+Approximate_match_to_cosmid "a0052E04"
+Positive_OVERGO "SOG1816" 
+Creation_date 99 11 13 0 57 
+Modified_date 103 4 22 10 21 
+  
+Clone : "b0097K12"
+Map "ctg1" Ends Left 528.000
+Map "ctg1" Ends Right 548.000 Oldctg 2
+Gel_number    b1097B2
+Bands  1787375 22
+Creation_date 100 2 18 18 21 
+Modified_date 101 8 24 13 54 
+  
+BAC : "b0099P02"
+Map "ctg1" Ends Left 508.000
+Map "ctg1" Ends Right 534.000 Oldctg 2
+Gel_number    b1099D2
+Bands  1805108 26
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 46 
+  
+BAC : "b0099P14"
+Map "ctg1" Ends Left 15.000
+Map "ctg1" Ends Right 46.000 Oldctg 2
+Gel_number    b1099D2
+Bands  1805281 31
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 46 
+  
+BAC : "b0101B08"
+Map "ctg1" Ends Left 237.000
+Map "ctg1" Ends Right 268.000 Oldctg 2
+Gel_number    b1101D1
+Bands  1808303 31
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 47 
+  
+BAC : "b0101C07"
+Map "ctg1" Ends Left 506.000
+Map "ctg1" Ends Right 535.000 Oldctg 2
+Gel_number    b1101A1
+Bands  1808754 29
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 48 
+  
+BAC : "b0101F04"
+Map "ctg1" Ends Left 272.000
+Map "ctg1" Ends Right 300.000 Oldctg 2
+Gel_number    b1101D1
+Bands  1810388 28
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 48 
+  
+BAC : "b0101H21"
+Map "ctg1" Ends Left 85.000
+Map "ctg1" Ends Right 121.000 Oldctg 2
+Gel_number    b1101C1
+Bands  1811808 36
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 49 
+  
+BAC : "b0101I17"
+Map "ctg1" Ends Left 514.000
+Map "ctg1" Ends Right 542.000 Oldctg 2
+Gel_number    b1101A2
+Bands  1812317 28
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 49 
+  
+BAC : "b0101K21"
+Map "ctg1" Ends Left 55.000
+Map "ctg1" Ends Right 86.000 Oldctg 2
+Gel_number    b1101A2
+Bands  1813574 31
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 50 
+  
+BAC : "b0101L23"
+Map "ctg1" Ends Left 19.000
+Map "ctg1" Ends Right 47.000 Oldctg 2
+Gel_number    b1101C2
+Bands  1814112 28
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 50 
+  
+BAC : "b0103I21"
+Map "ctg1" Ends Left 264.000
+Map "ctg1" Ends Right 288.000 Oldctg 2
+Gel_number    b1103A2
+Bands  1823739 24
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 55 
+  
+BAC : "b0104K09"
+Map "ctg1" Ends Left 407.000
+Map "ctg1" Ends Right 439.000 Oldctg 2
+Gel_number    b1104A2
+Bands  1829559 32
+Positive_OVERGO "SOG0223" 
+Creation_date 101 8 24 13 54 
+Modified_date 103 4 22 10 21 
+  
+BAC : "b0105G23"
+Map "ctg1" Ends Left 179.000
+Map "ctg1" Ends Right 221.000 Oldctg 2
+Gel_number    b1105A1
+Bands  1834073 42
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 45 
+  
+BAC : "b0105K06"
+Map "ctg1" Ends Left 463.000
+Map "ctg1" Ends Right 487.000 Oldctg 2
+Gel_number    b1105B2
+Bands  1835240 24
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 14 59 
+  
+BAC : "b0106B21"
+Map "ctg1" Ends Left 415.000
+Map "ctg1" Ends Right 445.000 Oldctg 2
+Gel_number    b1106C1
+Bands  1837649 30
+Positive_OVERGO "SOG0223" 
+Positive_OVERGO "SOG0389" 
+Creation_date 101 8 24 13 54 
+Modified_date 103 4 22 10 21 
+  
+BAC : "b0108A10"
+Map "ctg1" Ends Left 281.000
+Map "ctg1" Ends Right 307.000 Oldctg 2
+Gel_number    b1108B1
+Bands  1842276 26
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 46 
+  
+BAC : "b0108A22"
+Map "ctg1" Ends Left 55.000
+Map "ctg1" Ends Right 85.000 Oldctg 2
+Gel_number    b1108B1
+Bands  1842507 30
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 46 
+  
+BAC : "b0108J15"
+Map "ctg1" Ends Left 337.000
+Map "ctg1" Ends Right 365.000 Oldctg 2
+Gel_number    b1108C2
+Bands  1846051 28
+Positive_OVERGO "SOG0657" 
+Creation_date 101 10 3 11 41 
+Modified_date 103 4 22 10 21 
+  
+BAC : "b0108O23"
+Map "ctg1" Ends Left 356.000
+Map "ctg1" Ends Right 388.000 Oldctg 2
+Gel_number    b1108A2
+Bands  1848410 32
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 4 
+  
+BAC : "b0109D22"
+Map "ctg1" Ends Left 414.000
+Map "ctg1" Ends Right 443.000 Oldctg 2
+Gel_number    b1109D1
+Bands  1850230 29
+Positive_OVERGO "SOG0389" 
+Creation_date 101 8 24 13 54 
+Modified_date 103 4 22 10 21 
+  
+BAC : "b0109M07"
+Map "ctg1" Ends Left 35.000
+Map "ctg1" Ends Right 66.000 Oldctg 2
+Gel_number    b1109A2
+Bands  1854093 31
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 6 
+  
+BAC : "b0111D13"
+Map "ctg1" Ends Left 293.000
+Map "ctg1" Ends Right 318.000 Oldctg 2
+Gel_number    b1111C1
+Bands  1859629 25
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 9 
+  
+BAC : "b0111E09"
+Map "ctg1" Ends Left 251.000
+Map "ctg1" Ends Right 273.000 Oldctg 2
+Gel_number    b1111A1
+Bands  1860028 22
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 9 
+  
+BAC : "b0111F22"
+Map "ctg1" Ends Left 560.000
+Map "ctg1" Ends Right 590.000 Oldctg 2
+Gel_number    b1111D1
+Bands  1860827 30
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 9 
+  
+BAC : "b0111K16"
+Map "ctg1" Ends Left 612.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Gel_number    b1111B2
+Bands  1863245 31
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 49 
+  
+BAC : "b0112M03"
+Map "ctg1" Ends Left 504.000
+Map "ctg1" Ends Right 530.000 Oldctg 2
+Gel_number    b1112A2
+Bands  1869880 26
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 51 
+  
+BAC : "b0113L10"
+Map "ctg1" Ends Left 504.000
+Map "ctg1" Ends Right 528.000 Oldctg 2
+Gel_number    b1113D2
+Bands  1872623 24
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 53 
+  
+BAC : "b0114E16"
+Map "ctg1" Ends Left 89.000
+Map "ctg1" Ends Right 126.000 Oldctg 2
+Gel_number    b1114B1
+Bands  1874416 37
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 54 
+  
+BAC : "b0115E02"
+Map "ctg1" Ends Left 50.000
+Map "ctg1" Ends Right 81.000 Oldctg 2
+Gel_number    b1115B1
+Bands  1876064 31
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 54 
+  
+BAC : "b0115I17"
+Map "ctg1" Ends Left 279.000
+Map "ctg1" Ends Right 301.000 Oldctg 2
+Gel_number    b1115A2
+Bands  1877734 22
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 55 
+  
+BAC : "b0115K18"
+Map "ctg1" Ends Left 589.000
+Map "ctg1" Ends Right 615.000 Oldctg 2
+Gel_number    b1115B2
+Bands  1878435 26
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 55 
+  
+BAC : "b0116D19"
+Map "ctg1" Ends Left 369.000
+Map "ctg1" Ends Right 401.000 Oldctg 2
+Gel_number    b1116C1
+Bands  1881304 32
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 12 
+  
+BAC : "b0116E14"
+Map "ctg1" Ends Left 568.000
+Map "ctg1" Ends Right 598.000 Oldctg 2
+Gel_number    b1116B1
+Bands  1881484 30
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 12 
+  
+BAC : "b0117O21"
+Map "ctg1" Ends Left 486.000
+Map "ctg1" Ends Right 507.000 Oldctg 2
+Gel_number    b1117A2
+Bands  1888012 21
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 57 
+  
+BAC : "b0118F12"
+Map "ctg1" Ends Left 507.000
+Map "ctg1" Ends Right 535.000 Oldctg 2
+Gel_number    b1118D1
+Bands  1889133 28
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 58 
+  
+BAC : "b0118H07"
+Map "ctg1" Ends Left 226.000
+Map "ctg1" Ends Right 256.000 Oldctg 2
+Gel_number    b1118C1
+Bands  1889496 30
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 58 
+  
+BAC : "b0119I21"
+Map "ctg1" Ends Left 612.000
+Map "ctg1" Ends Right 643.000 Oldctg 2
+Gel_number    b1119A2
+Bands  1895188 31
+Creation_date 101 8 24 13 54 
+Modified_date 101 8 24 15 17 
+  
+BAC : "b0119N16"
+Map "ctg1" Ends Left 85.000
+Map "ctg1" Ends Right 110.000 Oldctg 2
+Gel_number    b1119D2
+Bands  1896877 25
+Creation_date 101 10 3 11 41 
+Modified_date 101 10 3 11 59 
+  
+BAC : "b0122H16"
+Map "ctg1" Ends Left 147.000
+Map "ctg1" Ends Right 175.000 Oldctg 2
+Gel_number    b1122D1
+Bands  1898823 28
+Positive_OVERGO "SOG0460" 
+Creation_date 101 8 24 13 54 
+Modified_date 103 4 22 10 21 
+
+Markerdata
+
+Marker_STS : "10E20fA"
+Creation_date 101 5 9 11 19 
+Modified_date 103 4 28 17 14 
+
+Marker_STS : "40C03f"
+Creation_date 101 5 9 11 19 
+Modified_date 101 5 9 11 19 
+
+Marker_STS : "42D18r"
+Creation_date 101 5 9 11 19 
+Modified_date 103 4 28 17 14 
+
+Marker_STS : "43L01f"
+Creation_date 100 4 27 14 58 
+Modified_date 103 4 28 17 14 
+
+Marker_STS : "51J01r"
+Creation_date 101 5 9 11 19 
+Modified_date 101 5 9 11 19 
+
+Marker_STS : "0057C09f"
+Creation_date 101 5 9 11 19 
+Modified_date 101 5 9 11 19 
+
+Marker_Probe : "AEST036"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_Probe : "AEST171"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_Probe : "BCD1424"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_STS : "C30"
+Anchor_bin "1"
+Anchor_pos    12.2 F
+Creation_date 100 3 22 21 6 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "C90"
+Anchor_bin "1"
+Anchor_pos    10.9 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 59 
+
+Marker_STS : "C146"
+Anchor_bin "1"
+Anchor_pos    12.2 F
+Creation_date 100 4 18 20 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "C749"
+Anchor_bin "1"
+Anchor_pos     8.7 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "C804"
+Anchor_bin "1"
+Anchor_pos    13.1 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "C936"
+Anchor_bin "1"
+Anchor_pos    12.2 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_STS : "C953"
+Creation_date 100 3 22 20 16 
+Modified_date 103 4 28 18 13 
+
+Marker_eMRK : "C1127"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_STS : "C1679"
+Anchor_bin "1"
+Anchor_pos     7.0 F
+Creation_date 100 3 9 11 8 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "C50986S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 59 
+
+Marker_eMRK : "C51033"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_STS : "C51175"
+Anchor_bin "10"
+Anchor_pos    17.1 F
+Creation_date 101 5 9 11 19 
+Modified_date 103 2 6 9 42 
+
+Marker_STS : "chlor"
+Creation_date 101 5 9 11 19 
+Modified_date 103 4 28 17 14 
+
+Marker_Probe : "CSU455"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_Probe : "CSU469"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_eMRK : "E222S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "E30207S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_STS : "G107"
+Anchor_bin "1"
+Anchor_pos     8.7 F
+Creation_date 100 3 9 11 11 
+Modified_date 103 4 28 17 59 
+
+Marker_STS : "G1082x"
+Creation_date 99 12 13 11 49 
+Modified_date 101 1 5 17 55 
+
+Marker_eMRK : "L451"
+Anchor_bin "1"
+Anchor_pos     8.7 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_STS : "OJ000207_10"
+Anchor_bin "1"
+Anchor_pos     9.5 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000214_10"
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000214_13"
+Anchor_bin "1"
+Anchor_pos    12.3 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000221_13"
+Anchor_bin "1"
+Anchor_pos    10.9 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000251_05"
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000301_05"
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000301_13"
+Anchor_bin "1"
+Anchor_pos     8.7 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000303_09"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ000303_17"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ000306_03"
+Anchor_bin "1"
+Anchor_pos     9.5 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000310_29"
+Anchor_bin "1"
+Anchor_pos    12.3 P
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ000317_34"
+Anchor_bin "1"
+Anchor_pos    10.9 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000321_15"
+Anchor_bin "1"
+Anchor_pos    16.1 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000323_02"
+Creation_date 102 2 1 22 48 
+Modified_date 102 2 1 22 48 
+
+Marker_STS : "OJ000330_31"
+Anchor_bin "1"
+Anchor_pos    16.1 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ000350_20"
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990325_01"
+Anchor_bin "1"
+Anchor_pos     9.5 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990407_02"
+Anchor_bin "1"
+Anchor_pos     8.7 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990412_10"
+Anchor_bin "1"
+Anchor_pos     7.0 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990428_07"
+Anchor_bin "1"
+Anchor_pos    12.3 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990503_27"
+Anchor_bin "1"
+Anchor_pos    10.9 P
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ990605_38"
+Anchor_bin "1"
+Anchor_pos     5.6 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990713_06"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ990820_14"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ990915_01"
+Anchor_bin "1"
+Anchor_pos     5.6 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ990923_08"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ991015_10"
+Anchor_bin "1"
+Anchor_pos     7.0 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ991018_02"
+Anchor_bin "1"
+Anchor_pos     7.0 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ991021_17"
+Anchor_bin "1"
+Anchor_pos    10.9 P
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ991201_11"
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "OJ991210_12"
+Creation_date 102 2 1 22 47 
+Modified_date 102 2 1 22 47 
+
+Marker_STS : "OJ991215_12"
+Anchor_bin "1"
+Anchor_pos    12.3 P
+Creation_date 102 2 1 22 47 
+Modified_date 103 4 22 10 21 
+
+Marker_STS : "PIB1"
+Creation_date 101 5 9 11 19 
+Modified_date 103 4 28 17 14 
+
+Marker_Probe : "PRC0214"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 24 
+
+Marker_eMRK : "R87"
+Anchor_bin "1"
+Anchor_pos     8.4 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 59 
+
+Marker_eMRK : "R106"
+Anchor_bin "1"
+Anchor_pos    12.2 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_STS : "R753"
+Anchor_bin "1"
+Anchor_pos    10.9 F
+Creation_date 100 4 6 15 17 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "R1613"
+Anchor_bin "1"
+Anchor_pos    16.1 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "R1841"
+Anchor_bin "1"
+Anchor_pos    12.2 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RG246"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_STS : "RG353"
+Creation_date 101 1 2 17 56 
+Modified_date 101 7 13 16 45 
+
+Marker_STS : "RG472"
+Creation_date 100 3 13 22 58 
+Modified_date 100 8 2 11 45 
+
+Marker_eMRK : "RM177"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 45 
+
+Marker_eMRK : "RM1141"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM1177"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "RM1247"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM1254"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "RM1320"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "RM1331"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM1869"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 37 
+
+Marker_eMRK : "RM3148"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM3652"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM3740"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "RM5336"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM5423"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "RM6045"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "RM6236"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM6324"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM6340"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "RM6433"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 42 
+
+Marker_eMRK : "RM6470"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM6515"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "RM6873"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM7278"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 58 
+
+Marker_eMRK : "RM7383"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "RM7536"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM8068"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RM8069"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "RZ288"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 59 
+
+Marker_eMRK : "RZ543"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 59 
+
+Marker_eMRK : "RZ889"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 45 
+
+Marker_STS : "S1001"
+Creation_date 101 6 21 21 25 
+Modified_date 101 6 21 21 25 
+
+Marker_eMRK : "S1543"
+Anchor_bin "1"
+Anchor_pos     5.6 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "S5853"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "S10623"
+Anchor_bin "1"
+Anchor_pos     7.0 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 57 
+
+Marker_eMRK : "S13048"
+Anchor_bin "1"
+Anchor_pos     9.5 F
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_eMRK : "S13157S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_STS : "S13984"
+Anchor_bin "2"
+Anchor_pos   103.9 F
+Creation_date 100 3 9 21 3 
+Modified_date 103 4 28 18 36 
+
+Marker_eMRK : "S20154S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 18 1 
+
+Marker_eMRK : "S20229S"
+Creation_date 103 4 28 17 13 
+Modified_date 103 4 28 17 56 
+
+Marker_OVERGO : "SOG0127"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0223"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0252"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0279"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0307"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0313"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0316"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0389"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0460"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0657"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0948"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0961"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG0980"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1044"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1055"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1077"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1176"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1177"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1200"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1204"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1225"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1231"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1344"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1346"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1383"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1401"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1450"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1508"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1670"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1745"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1746"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1774"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1790"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1816"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG1855"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2111"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2203"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2232"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2238"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2244"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+Marker_OVERGO : "SOG2276"
+Remark "by Paterson lab"
+Creation_date 103 4 22 10 21 
+Modified_date 103 4 22 10 23 
+
+
+Contigdata 1
+
+Ctg1 9/4/104 18:13 Ok  -1 #
+Chr_remark "Chr1" Pos   0.000  
+User_remark "This is FPC V7!!!"

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cys1_dicdi.water
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cys1_dicdi.water	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cys1_dicdi.water	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,146 @@
+########################################
+# Program:  water
+# Rundate:  Tue Feb 12 21:31:03 2002
+# Report_file: cys1_dicdi.water
+########################################
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: CYS1_DICDI
+# 2: CYS1_DICDI
+# Matrix: EBLOSUM62
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 343
+# Identity:     343/343 (100.0%)
+# Similarity:   343/343 (100.0%)
+# Gaps:           0/343 ( 0.0%)
+# Score: 1841.0
+# 
+#
+#=======================================
+
+CYS1_DICDI         1 MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFE     50
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI         1 MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFE     50
+
+CYS1_DICDI        51 IFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF    100
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI        51 IFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF    100
+
+CYS1_DICDI       101 TDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGN    150
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI       101 TDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGN    150
+
+CYS1_DICDI       151 VEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN    200
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI       151 VEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN    200
+
+CYS1_DICDI       201 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAG    250
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI       201 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAG    250
+
+CYS1_DICDI       251 YIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF    300
+                     ||||||||||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI       251 YIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF    300
+
+CYS1_DICDI       301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII    343
+                     |||||||||||||||||||||||||||||||||||||||||||
+CYS1_DICDI       301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII    343
+
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: CYS1_DICDI
+# 2: ALEU_HORVU
+# Matrix: EBLOSUM62
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 339
+# Identity:     112/339 (33.0%)
+# Similarity:   169/339 (49.9%)
+# Gaps:          63/339 (18.6%)
+# Score: 510.5
+# 
+#
+#=======================================
+
+CYS1_DICDI        29 FLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV     77
+                     |..|..::.|.| |..|...||.||..:|.::...|...:.:    :.|:
+ALEU_HORVU        61 FARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPY----RLGI    106
+
+CYS1_DICDI        78 NKFADLSSDEFK----------------NYYLNNKEAIFTDDLPVADYLD    111
+                     |:|:|:|.:||:                |:.:.:..|             
+ALEU_HORVU       107 NRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRDAAA-------------    143
+
+CYS1_DICDI       112 DEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK    161
+                           :|...|||..|.|:|||||..|||||:|||||.:|..:..:..|
+ALEU_HORVU       144 ------LPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGK    187
+
+CYS1_DICDI       162 LVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTE    211
+                     .:|||||.||||......:        ||||||...|:.||..||||.||
+ALEU_HORVU       188 NISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNGGIDTE    229
+
+CYS1_DICDI       212 SSYPYTAETGTQCNFNSANIGAKI---SNFTMIPKNETVMAGYIVSTGPL    258
+                     .||||....|. |::.:.|...::   .|.|:..::|...|..:|.  |:
+ALEU_HORVU       230 ESYPYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAVGLVR--PV    276
+
+CYS1_DICDI       259 AIAADAVE-WQFYIGGVFDIP-C--NPNSLDHGILIVGYSAKNTIFRKNM    304
+                     ::|...:: ::.|..||:... |  .|:.::|.:|.|||..:|     .:
+ALEU_HORVU       277 SVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVEN-----GV    321
+
+CYS1_DICDI       305 PYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII    343
+                     |||::|||||||||:.||..:..|||.|.::...|..::
+ALEU_HORVU       322 PYWLIKNSWGADWGDNGYFKMEMGKNMCAIATCASYPVV    360
+
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: CYS1_DICDI
+# 2: CATH_HUMAN
+# Matrix: EBLOSUM62
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 323
+# Identity:     122/323 (37.8%)
+# Similarity:   165/323 (51.1%)
+# Gaps:          40/323 (12.4%)
+# Score: 511.0
+# 
+#
+#=======================================
+
+CYS1_DICDI        32 FQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFA     81
+                     :..|..|.||.|||..|.:.|.||..||...|    |.....|..:|:|:
+CATH_HUMAN        38 WMSKHRKTYSTEEYHHRLQTFASNWRKINAHN----NGNHTFKMALNQFS     83
+
+CYS1_DICDI        82 DLSSDEFKNYYL----NNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTR    127
+                     |:|..|.|:.||    .|..|      ..::||  ......|.:.|||.:
+CATH_HUMAN        84 DMSFAEIKHKYLWSEPQNCSA------TKSNYL--RGTGPYPPSVDWRKK    125
+
+CYS1_DICDI       128 G-AVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE    176
+                     | .|:||||||.|||||:|||||.:|....|:..|::||:||.||||..:
+CATH_HUMAN       126 GNFVSPVKNQGACGSCWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQD    175
+
+CYS1_DICDI       177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF    226
+                     ...|        ||.|||...|:.||:.|.||..|.:|||..:.| .|.|
+CATH_HUMAN       176 FNNY--------GCQGGLPSQAFEYILYNKGIMGEDTYPYQGKDG-YCKF    216
+
+CYS1_DICDI       227 NSAN-IG--AKISNFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIG    272
+                     .... ||  ..::|.|:.  :|..|...:....|::.|.:.. ::..|..
+CATH_HUMAN       217 QPGKAIGFVKDVANITIY--DEEAMVEAVALYNPVSFAFEVTQDFMMYRT    264
+
+CYS1_DICDI       273 GVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE    319
+                     |:: ...|:  |:.::|.:|.|||..||.|     ||||||||||..||.
+CATH_HUMAN       265 GIYSSTSCHKTPDKVNHAVLAVGYGEKNGI-----PYWIVKNSWGPQWGM    309
+
+CYS1_DICDI       320 QGYIYLRRGKNTCGVSNFVSTSI    342
+                     .||..:.||||.||::...|..|
+CATH_HUMAN       310 NGYFLIERGKNMCGLAACASYPI    332
+
+
+#---------------------------------------
+#---------------------------------------

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,58 @@
+>CYS1_DICDI
+MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
+ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
+SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
+ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
+PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+
+
+>ALEU_HORVU
+MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
+SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
+AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
+GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
+QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
+IATCASYPVVAA
+
+
+>CATH_HUMAN
+MWATLPLLCAGAWLLGVPVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHNN
+GNHTFKMALNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPPSVDWRKKGNFVSPVKNQGACGS
+CWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNYGCQGGLPSQAFEYILYNKGIMGEDTYPYQGK
+DGYCKFQPGKAIGFVKDVANITIYDEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVN
+HAVLAVGYGEKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV
+
+
+>CATH_RAT
+MWTALPLLCAGAWLLSAGATAELTVNAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRN
+HTFKMGLNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCW
+TFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNG
+QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHA
+VLAVGYGEQNGLLYWIVKNSWGSNWGNNGYFLIERGKNMCGLAACASYPIPQV
+
+
+>CATL_HUMAN
+MNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGK
+HSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWA
+FSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEES
+CKYNPKYSVANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLV
+VGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTV
+
+
+>CATL_RAT
+MTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGK
+HGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQEPLMLQIPKTVDWREKGCVTPVKNQGQCGSCWA
+FSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS
+CKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLV
+VGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIVN
+
+
+>PAPA_CARPA
+MAMIPSISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKIYRFE
+IFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDW
+RQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQ
+YGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGG
+IFVGPCGNKVDHAVAAVGYGPNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVKN
+
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.fa
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.msf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.msf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.msf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,89 @@
+PileUp
+
+
+
+   MSF:  393  Type: P    Check:  3400   .. 
+
+ Name: CATH_HUMAN oo  Len:  393  Check:  4989  Weight:  10.0
+ Name: CATH_RAT oo  Len:  393  Check:  6434  Weight:  10.6
+ Name: ALEU_HORVU oo  Len:  393  Check:  5288  Weight:  16.7
+ Name: PAPA_CARPA oo  Len:  393  Check:  3242  Weight:  18.9
+ Name: CATL_HUMAN oo  Len:  393  Check:  4708  Weight:  12.2
+ Name: CATL_RAT oo  Len:  393  Check:  6797  Weight:  12.2
+ Name: CYS1_DICDI oo  Len:  393  Check:  1942  Weight:  18.9
+
+//
+
+
+
+CATH_HUMAN      ....MWATLP LLCAGAWLLG VPVCGAAELS VN........ .......... 
+CATH_RAT        ....MWTALP LLCAGAWLLS AGAT..AELT VN........ .......... 
+ALEU_HORVU      ....MAHARV LLLALAVLAT AAVAVASSSS FADSNPIRPV TDRAASTLES 
+PAPA_CARPA      MAMIPSISKL LFVAICLFVY MGLSFGDFSI VG........ .........Y 
+CATL_HUMAN      ......MNPT LILAAFCLGI ASATLTFDHS LE........ .......... 
+CATL_RAT        ......MTPL LLLAVLCLGT ALATPKFDQT FN........ .......... 
+CYS1_DICDI      ......MKVI LLFVLAVFTV FVSSRGIPPE EQ........ .......... 
+
+
+CATH_HUMAN      ........SL EKFHFKSWMS KHRKTYS.TE EYHHRLQTFA SNWRKINAHN 
+CATH_RAT        ........AI EKFHFTSWMK QHQKTYS.SR EYSHRLQVFA NNWRKIQAHN 
+ALEU_HORVU      AVLGALGRTR HALRFARFAV RYGKSYESAA EVRRRFRIFS ESLEEVRSTN 
+PAPA_CARPA      SQNDLTSTER LIQLFESWML KHNKIYKNID EKIYRFEIFK DNLKYIDETN 
+CATL_HUMAN      .......... ..AQWTKWKA MHNRLYG.MN EEGWRRAVWE KNMKMIELHN 
+CATL_RAT        .......... ..AQWHQWKS THRRLYG.TN EEEWRRAVWE KNMRMIQLHN 
+CYS1_DICDI      .......... ..SQFLEFQD KFNKKYS.HE EYLERFEIFK SNLGKIEELN 
+
+
+CATH_HUMAN      ....NGNHTF KMALNQFSDM SFAEIKHKYL WSEPQNCSAT KS..NYLRGT 
+CATH_RAT        ....QRNHTF KMGLNQFSDM SFAEIKHKYL WSEPQNCSAT KS..NYLRGT 
+ALEU_HORVU      ....RKGLPY RLGINRFSDM SWEEFQATRL G.AAQTCSAT LAGNHLMRDA 
+PAPA_CARPA      ....KKNNSY WLGLNVFADM SNDEFKEKYT GSIAGNYTTT ELSYEEVLND 
+CATL_HUMAN      QEYREGKHSF TMAMNAFGDM TSEEFRQVMN GFQ....NRK PRKGKVFQEP 
+CATL_RAT        GEYSNGKHGF TMEMNAFGDM TNEEFRQIVN GYR....HQK HKKGRLFQEP 
+CYS1_DICDI      LIAINHKADT KFGVNKFADL SSDEFKNYYL NNKEAIFTDD LPVADYLDDE 
+
+
+CATH_HUMAN      G..PYPPSVD WRKKGNFVSP VKNQGACGSC WTFSTTGALE SAIAIATGKM 
+CATH_RAT        G..PYPSSMD WRKKGNVVSP VKNQGACGSC WTFSTTGALE SAVAIASGKM 
+ALEU_HORVU      A..ALPETKD WREDG.IVSP VKNQAHCGSC WTFSTTGALE AAYTQATGKN 
+PAPA_CARPA      GDVNIPEYVD WRQKG.AVTP VKNQGSCGSC WAFSAVVTIE GIIKIRTGNL 
+CATL_HUMAN      LFYEAPRSVD WREKG.YVTP VKNQGQCGSC WAFSATGALE GQMFRKTGRL 
+CATL_RAT        LMLQIPKTVD WREKG.CVTP VKNQGQCGSC WAFSASGCLE GQMFLKTGKL 
+CYS1_DICDI      FINSIPTAFD WRTRG.AVTP VKNQGQCGSC WSFSTTGNVE GQHFISQNKL 
+
+
+CATH_HUMAN      LSLAEQQLVD CAQDF..... ...NNYGCQG GLPSQAFEYI LYNKGIMGED 
+CATH_RAT        MTLAEQQLVD CAQNF..... ...NNHGCQG GLPSQAFEYI LYNKGIMGED 
+ALEU_HORVU      ISLSEQQLVD CAGGF..... ...NNFGCNG GLPSQAFEYI KYNGGIDTEE 
+PAPA_CARPA      NEYSEQELLD CDR....... ...RSYGCNG GYPWSALQLV AQYG.IHYRN 
+CATL_HUMAN      ISLSEQNLVD CSGP....QG ....NEGCNG GLMDYAFQYV QDNGGLDSEE 
+CATL_RAT        ISLSEQNLVD CSHD....QG ....NQGCNG GLMDFAFQYI KENGGLDSEE 
+CYS1_DICDI      VSLSEQNLVD CDHECMEYEG EEACDEGCNG GLQPNAYNYI IKNGGIQTES 
+
+
+CATH_HUMAN      TYPYQGKDGY .CKFQPGKAI GFVKDVANIT IYDEEAMVEA VALYNPVSFA 
+CATH_RAT        SYPYIGKNGQ .CKFNPEKAV AFVKNVVNIT LNDEAAMVEA VALYNPVSFA 
+ALEU_HORVU      SYPYKGVNGV .CHYKAENAA VQVLDSVNIT LNAEDELKNA VGLVRPVSVA 
+PAPA_CARPA      TYPYEGVQRY .CRSREKGPY AAKTDGVRQV QPYNEGALLY SIANQPVSVV 
+CATL_HUMAN      SYPYEATEES .CKYNPKYSV ANDTGFVDIP K.QEKALMKA VATVGPISVA 
+CATL_RAT        SYPYEAKDGS .CKYRAEYAV ANDTGFVDIP Q.QEKALMKA VATVGPISVA 
+CYS1_DICDI      SYPYTAETGT QCNFNSANIG AKISNFTMIP K.NETVMAGY IVSTGPLAIA 
+
+
+CATH_HUMAN      FEVT.QDFMM YRTGIYSSTS CHKTPDKVNH AVLAVGYGEK NG.....IPY 
+CATH_RAT        FEVT.EDFMM YKSGVYSSNS CHKTPDKVNH AVLAVGYGEQ NG.....LLY 
+ALEU_HORVU      FQVI.DGFRQ YKSGVYTSDH CGTTPDDVNH AVLAVGYGVE NG.....VPY 
+PAPA_CARPA      LEAAGKDFQL YRGGIFVG.. ..PCGNKVDH AVAAVGYGPN .........Y 
+CATL_HUMAN      IDAGHESFLF YKEGIYFEPD ..CSSEDMDH GVLVVGYGFE STESDNNK.Y 
+CATL_RAT        MDASHPSLQF YSSGIYYEPN ..CSSKDLDH GVLVVGYGYE GTDSNKDK.Y 
+CYS1_DICDI      ADA..VEWQF YIGGVFDIP. ..CNPNSLDH GILIVGYSAK NTIFRKNMPY 
+
+
+CATH_HUMAN      WIVKNSWGPQ WGMNGYFLIE RGK....NMC GLAACASYPI PLV
+CATH_RAT        WIVKNSWGSN WGNNGYFLIE RGK....NMC GLAACASYPI PQV
+ALEU_HORVU      WLIKNSWGAD WGDNGYFKME MGK....NMC AIATCASYPV VAA
+PAPA_CARPA      ILIKNSWGTG WGENGYIRIK RGTGNSYGVC GLYTSSFYPV KN.
+CATL_HUMAN      WLVKNSWGEE WGMGGYVKMA KDRR...NHC GIASAASYPT V..
+CATL_RAT        WLVKNSWGKE WGMDGYIKIA KDRN...NHC GLATAASYPI VN.
+CYS1_DICDI      WIVKNSWGAD WGEQGYIYLR RGK....NTC GVSNFVSTSI I..
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.needle
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.needle	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.needle	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+Global: PAPA_CARPA vs CATL_HUMAN
+Score: 499.50
+
+PAPA_CARPA      1        MAMIPSISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLI 45      
+                           | |:   |:  | ||              | :   ||    | 
+CATL_HUMAN      1          MNPT...LILAAFCL..............GIASATLTFDHSLE 26      
+
+PAPA_CARPA      46       QLFESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKK....N 86      
+                           :  |   ||::|  ::|: :|  ::: |:| |:  |::     
+CATL_HUMAN      27       AQWTKWKAMHNRLY.GMNEEGWRRAVWEKNMKMIELHNQEYREGK 70      
+
+PAPA_CARPA      87       NSYWLGLNVFADMSNDEFKEKYTG.....SIAGNYTTTELSYEEV 126     
+                         :|: : :| | ||:::||::   |        |      | ||  
+CATL_HUMAN      71       HSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYE.. 113     
+
+PAPA_CARPA      127      LNDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGII 171     
+                                 |  ||||:|| |||||||| |||||||||   :|| :
+CATL_HUMAN      114      .......APRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQM 151     
+
+PAPA_CARPA      172      KIRTGNLNEYSEQELLDCD..RRSYGCNGGYPWSALQLVAQY.GI 213     
+                           :|| |   ||| |:||   : : |||||    | | |    |:
+CATL_HUMAN      152      FRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGL 196     
+
+PAPA_CARPA      214      HYRNTYPYEGVQRYCRSREKGPYAAKTD.GVRQVQPYNEGALLYS 257     
+                             :||||  :  |:   |  |:   | |   : |  | ||: :
+CATL_HUMAN      197      DSEESYPYEATEESCKYNPK..YSVANDTGFVDI.PKQEKALMKA 238     
+
+PAPA_CARPA      258      IAN.QPVSVVLEAAGKDFQLYRGGIFVGP.CGNK.VDHAVAAVGY 299     
+                         :|   |:|| ::|  : |  |: ||:  | | :: :|| |  |||
+CATL_HUMAN      239      VATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGY 283     
+
+PAPA_CARPA      300      G........PNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLY 336     
+                         |          | |:|||||  ||  ||::: :   |    ||: 
+CATL_HUMAN      284      GFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNH...CGIA 325     
+
+PAPA_CARPA      337      TSSFYPVKN                                     345     
+                         ::: ||   
+CATL_HUMAN      326      SAASYPTV                                      333     
+
+%id = 39.81			%similarity = 59.22
+Overall %id = 35.65		Overall %similarity = 53.04

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.tblastn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.tblastn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.tblastn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,855 @@
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CYS1_DICDI
+         (343 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      23  0.64
+gnl|alu|M30798_HSAL001637 (Alu-J)                                      20  5.6
+gnl|alu|M64231_HSAL003023 (Alu-Sb)                                     20  5.6
+gnl|alu|Y00326_HSAL000959 (Alu-J)                                      19  9.5
+gnl|alu|X69908_HSAL000290 (Alu-J)                                      19  9.5
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     19  9.5
+gnl|alu|M80812_HSAL001801 (Alu-J)                                      19  9.5
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 23.1 bits (48), Expect = 0.64
+ Identities = 11/32 (34%), Positives = 15/32 (46%)
+ Frame = +1
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 168
+           G CGSCW        + QHF    ++  LS +
+Sbjct: 19  GYCGSCW--------KSQHFGRLRQVEGLSSE 90
+
+
+>gnl|alu|M30798_HSAL001637 (Alu-J)
+          Length = 331
+
+ Score = 20.0 bits (40), Expect = 5.6
+ Identities = 6/7 (85%), Positives = 7/7 (99%)
+ Frame = +3
+
+Query: 278 PCNPNSL 284
+           PCNPN+L
+Sbjct: 12  PCNPNTL 32
+
+
+>gnl|alu|M64231_HSAL003023 (Alu-Sb)
+          Length = 257
+
+ Score = 20.0 bits (40), Expect = 5.6
+ Identities = 8/14 (57%), Positives = 9/14 (64%)
+ Frame = +3
+
+Query: 294 SAKNTIFRKNMPYW 307
+           S KNT  +KN P W
+Sbjct: 72  STKNTKKKKNGPAW 113
+
+
+>gnl|alu|Y00326_HSAL000959 (Alu-J)
+          Length = 260
+
+ Score = 19.2 bits (38), Expect = 9.5
+ Identities = 6/10 (60%), Positives = 6/10 (60%)
+ Frame = -2
+
+Query: 177 CMEYEGEEAC 186
+           C EY GE  C
+Sbjct: 34  CWEYRGEPPC 5
+
+
+>gnl|alu|X69908_HSAL000290 (Alu-J)
+          Length = 292
+
+ Score = 19.2 bits (38), Expect = 9.5
+ Identities = 9/30 (30%), Positives = 13/30 (43%), Gaps = 3/30 (10%)
+ Frame = -1
+
+Query: 118 IPTAFDWRTRGAVTPVK---NQGQCGSCWS 144
+           +P   +WR   ++ P      Q  C S WS
+Sbjct: 256 LPILDNWRDHSSLKPPAPGFKQSSCLSFWS 167
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 19.2 bits (38), Expect = 9.5
+ Identities = 5/7 (71%), Positives = 5/7 (71%)
+ Frame = -1
+
+Query: 137 GQCGSCW 143
+           G C SCW
+Sbjct: 115 GACSSCW 95
+
+
+>gnl|alu|M80812_HSAL001801 (Alu-J)
+          Length = 202
+
+ Score = 19.2 bits (38), Expect = 9.5
+ Identities = 5/10 (50%), Positives = 7/10 (70%)
+ Frame = +2
+
+Query: 168 QNLVDCDHEC 177
+           + +V CDH C
+Sbjct: 113 ETIVHCDHTC 142
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.316    0.135    0.414 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 35241
+Number of Sequences: 327
+Number of extensions: 483
+Number of successful extensions: 7
+Number of sequences better than 10.0: 14
+Number of HSP's better than 10.0 without gapping: 6
+Number of HSP's successfully gapped in prelim test: 1
+Number of HSP's that attempted gapping in prelim test: 1
+Number of HSP's gapped (non-prelim): 7
+length of query: 343
+length of database: 26,835
+effective HSP length: 31
+effective length of query: 312
+effective length of database: 16,698
+effective search space:  5209776
+effective search space used:  5209776
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.3 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= ALEU_HORVU
+         (362 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      23  0.51
+gnl|alu|M59213_HSAL001809 (Alu-J)                                      20  7.5
+gnl|alu|M19889_HSAL002725 (Alu-J)                                      19  9.9
+gnl|alu|X05322_HSAL000874 (Alu-Sx)                                     19  9.9
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 23.5 bits (49), Expect = 0.51
+ Identities = 13/40 (32%), Positives = 19/40 (47%), Gaps = 3/40 (7%)
+ Frame = +1
+
+Query: 164 HCGSCWTFSTTGAL---EAAYTQATGKNISLSEQQLVDCA 200
+           +CGSCW     G L   E   ++  G +I ++     DCA
+Sbjct: 22  YCGSCWKSQHFGRLRQVEGLSSEVRGSSIQVTVS--YDCA 135
+
+
+>gnl|alu|M59213_HSAL001809 (Alu-J)
+          Length = 339
+
+ Score = 19.6 bits (39), Expect = 7.5
+ Identities = 7/12 (58%), Positives = 7/12 (58%)
+ Frame = +2
+
+Query: 164 HCGSCWTFSTTG 175
+           HCGSC  F   G
+Sbjct: 8   HCGSCL*FQQFG 43
+
+
+>gnl|alu|M19889_HSAL002725 (Alu-J)
+          Length = 183
+
+ Score = 19.2 bits (38), Expect = 9.9
+ Identities = 5/8 (62%), Positives = 7/8 (87%)
+ Frame = -1
+
+Query: 236 GVNGVCHY 243
+           G NG+CH+
+Sbjct: 54  GTNGMCHH 31
+
+
+>gnl|alu|X05322_HSAL000874 (Alu-Sx)
+          Length = 225
+
+ Score = 19.2 bits (38), Expect = 9.9
+ Identities = 6/11 (54%), Positives = 7/11 (63%)
+ Frame = -3
+
+Query: 237 VNGVCHYKAEN 247
+           + GVCHY   N
+Sbjct: 163 ITGVCHYAQLN 131
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.318    0.132    0.400 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 31527
+Number of Sequences: 327
+Number of extensions: 291
+Number of successful extensions: 6
+Number of sequences better than 10.0: 8
+Number of HSP's better than 10.0 without gapping: 4
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 2
+Number of HSP's gapped (non-prelim): 4
+length of query: 362
+length of database: 26,835
+effective HSP length: 32
+effective length of query: 330
+effective length of database: 16,371
+effective search space:  5402430
+effective search space used:  5402430
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.4 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATH_HUMAN
+         (335 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|M63796_HSAL002417 (Alu-J)                                      23  0.85
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      22  1.1
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     21  2.5
+gnl|alu|M88005_HSAL001281 (Alu-Sx)                                     21  3.3
+gnl|alu|M64231_HSAL003024 (Alu-J)                                      20  4.3
+
+>gnl|alu|M63796_HSAL002417 (Alu-J)
+          Length = 322
+
+ Score = 22.7 bits (47), Expect = 0.85
+ Identities = 11/34 (32%), Positives = 15/34 (43%)
+ Frame = +1
+
+Query: 302 SWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV 335
+           SW P W        +  KN  G+   A Y +P+V
+Sbjct: 76  SWRPAWASGHLIFTKNKKN*LGMVV*ACY-VPVV 174
+
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 22.3 bits (46), Expect = 1.1
+ Identities = 8/23 (34%), Positives = 11/23 (47%)
+ Frame = +1
+
+Query: 136 GACGSCWTFSTTGALESAIAIAT 158
+           G CGSCW     G L     +++
+Sbjct: 19  GYCGSCWKSQHFGRLRQVEGLSS 87
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 21.2 bits (43), Expect = 2.5
+ Identities = 6/7 (85%), Positives = 6/7 (85%)
+ Frame = -1
+
+Query: 136 GACGSCW 142
+           GAC SCW
+Sbjct: 115 GACSSCW 95
+
+
+>gnl|alu|M88005_HSAL001281 (Alu-Sx)
+          Length = 225
+
+ Score = 20.8 bits (42), Expect = 3.3
+ Identities = 9/29 (31%), Positives = 16/29 (55%)
+ Frame = -1
+
+Query: 118 PSVDWRKKGNFVSPVKNQGACGSCWTFST 146
+           P++      NFV  VKN+ +C   W+ ++
+Sbjct: 165 PTIPNASLTNFVFFVKNRVSCWPGWSLNS 79
+
+
+>gnl|alu|M64231_HSAL003024 (Alu-J)
+          Length = 287
+
+ Score = 20.4 bits (41), Expect = 4.3
+ Identities = 10/24 (41%), Positives = 13/24 (53%)
+ Frame = -2
+
+Query: 96  WSEPQNCSATKSNYLRGTGPYPPS 119
+           WS   +  A  +  LRG+G  PPS
+Sbjct: 232 WSAVASIMAHYTLGLRGSGDPPPS 161
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.319    0.134    0.436 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 44068
+Number of Sequences: 327
+Number of extensions: 498
+Number of successful extensions: 7
+Number of sequences better than 10.0: 10
+Number of HSP's better than 10.0 without gapping: 5
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 2
+Number of HSP's gapped (non-prelim): 5
+length of query: 335
+length of database: 26,835
+effective HSP length: 29
+effective length of query: 306
+effective length of database: 17,352
+effective search space:  5309712
+effective search space used:  5309712
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.5 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATH_RAT
+         (333 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      24  0.36
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     21  2.4
+gnl|alu|L11016_HSAL003845 (Alu-J)                                      19  9.2
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 23.9 bits (50), Expect = 0.36
+ Identities = 9/23 (39%), Positives = 11/23 (47%)
+ Frame = +1
+
+Query: 134 GACGSCWTFSTTGALESAVAIAS 156
+           G CGSCW     G L     ++S
+Sbjct: 19  GYCGSCWKSQHFGRLRQVEGLSS 87
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 21.2 bits (43), Expect = 2.4
+ Identities = 6/7 (85%), Positives = 6/7 (85%)
+ Frame = -1
+
+Query: 134 GACGSCW 140
+           GAC SCW
+Sbjct: 115 GACSSCW 95
+
+
+>gnl|alu|L11016_HSAL003845 (Alu-J)
+          Length = 197
+
+ Score = 19.2 bits (38), Expect = 9.2
+ Identities = 7/15 (46%), Positives = 10/15 (66%)
+ Frame = +3
+
+Query: 124 GNVVSPVKNQGACGS 138
+           GN+V P   + +CGS
+Sbjct: 93  GNIVRPHFYKASCGS 137
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.319    0.131    0.412 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 35152
+Number of Sequences: 327
+Number of extensions: 361
+Number of successful extensions: 10
+Number of sequences better than 10.0: 6
+Number of HSP's better than 10.0 without gapping: 3
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 7
+Number of HSP's gapped (non-prelim): 3
+length of query: 333
+length of database: 26,835
+effective HSP length: 31
+effective length of query: 302
+effective length of database: 16,698
+effective search space:  5042796
+effective search space used:  5042796
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 35 (19.0 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATL_HUMAN
+         (333 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      22  1.1
+gnl|alu|M63544_HSAL002045 (Alu-J)                                      21  3.1
+gnl|alu|Z17809_HSAL003365 (Alu-Sx)                                     20  7.0
+gnl|alu|X16277_HSAL000821 (Alu-J)                                      19  9.2
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     19  9.2
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 22.3 bits (46), Expect = 1.1
+ Identities = 8/15 (53%), Positives = 8/15 (53%)
+ Frame = +1
+
+Query: 133 GQCGSCWAFSATGAL 147
+           G CGSCW     G L
+Sbjct: 19  GYCGSCWKSQHFGRL 63
+
+
+>gnl|alu|M63544_HSAL002045 (Alu-J)
+          Length = 265
+
+ Score = 20.8 bits (42), Expect = 3.1
+ Identities = 9/22 (40%), Positives = 14/22 (62%), Gaps = 2/22 (9%)
+ Frame = -2
+
+Query: 260 KEGIYFEP--DCSSEDMDHGVL 279
+           ++G+   P  +CSS  MDH +L
+Sbjct: 252 RQGLALSPRLECSSAIMDHCIL 187
+
+
+>gnl|alu|Z17809_HSAL003365 (Alu-Sx)
+          Length = 207
+
+ Score = 19.6 bits (39), Expect = 7.0
+ Identities = 6/14 (42%), Positives = 10/14 (70%)
+ Frame = -3
+
+Query: 174 GNEGCNGGLMDYAF 187
+           G +GC  GLM +++
+Sbjct: 172 GVQGCGHGLMGFSY 131
+
+
+>gnl|alu|X16277_HSAL000821 (Alu-J)
+          Length = 287
+
+ Score = 19.2 bits (38), Expect = 9.2
+ Identities = 6/14 (42%), Positives = 11/14 (77%)
+ Frame = +3
+
+Query: 64 QEYREGKHSFTMAM 77
+          QE+  G+HS T+++
+Sbjct: 72 QEFEPGQHSETLSL 113
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 19.2 bits (38), Expect = 9.2
+ Identities = 5/7 (71%), Positives = 5/7 (71%)
+ Frame = -1
+
+Query: 133 GQCGSCW 139
+           G C SCW
+Sbjct: 115 GACSSCW 95
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.317    0.133    0.417 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 39061
+Number of Sequences: 327
+Number of extensions: 451
+Number of successful extensions: 9
+Number of sequences better than 10.0: 10
+Number of HSP's better than 10.0 without gapping: 5
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 4
+Number of HSP's gapped (non-prelim): 5
+length of query: 333
+length of database: 26,835
+effective HSP length: 31
+effective length of query: 302
+effective length of database: 16,698
+effective search space:  5042796
+effective search space used:  5042796
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.3 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATL_RAT
+         (334 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|Z17809_HSAL003365 (Alu-Sx)                                     22  1.4
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      22  1.9
+gnl|alu|Z30166_HSAL006097 (Alu-J)                                      21  2.5
+gnl|alu|L26953_HSAL004755 (Alu-Sx)                                     20  4.2
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     19  9.5
+
+>gnl|alu|Z17809_HSAL003365 (Alu-Sx)
+          Length = 207
+
+ Score = 21.9 bits (45), Expect = 1.4
+ Identities = 8/14 (57%), Positives = 10/14 (71%)
+ Frame = -3
+
+Query: 174 GNQGCNGGLMDFAF 187
+           G QGC  GLM F++
+Sbjct: 172 GVQGCGHGLMGFSY 131
+
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 21.6 bits (44), Expect = 1.9
+ Identities = 6/7 (85%), Positives = 6/7 (85%)
+ Frame = +1
+
+Query: 133 GQCGSCW 139
+           G CGSCW
+Sbjct: 19  GYCGSCW 39
+
+
+>gnl|alu|Z30166_HSAL006097 (Alu-J)
+          Length = 168
+
+ Score = 21.2 bits (43), Expect = 2.5
+ Identities = 8/22 (36%), Positives = 12/22 (54%)
+ Frame = -2
+
+Query: 108 EPLMLQIPKTVDWREKGCVTPV 129
+           +P  L +PK  D+R   C  P+
+Sbjct: 77  DPAHLSLPKCWDYRLSHCAWPL 12
+
+
+>gnl|alu|L26953_HSAL004755 (Alu-Sx)
+          Length = 337
+
+ Score = 20.4 bits (41), Expect = 4.2
+ Identities = 6/9 (66%), Positives = 7/9 (77%)
+ Frame = +1
+
+Query: 171 HDQGNQGCN 179
+           HD G +GCN
+Sbjct: 223 HDPGGRGCN 249
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 19.2 bits (38), Expect = 9.5
+ Identities = 5/7 (71%), Positives = 5/7 (71%)
+ Frame = -1
+
+Query: 133 GQCGSCW 139
+           G C SCW
+Sbjct: 115 GACSSCW 95
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.317    0.134    0.426 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 36291
+Number of Sequences: 327
+Number of extensions: 428
+Number of successful extensions: 8
+Number of sequences better than 10.0: 10
+Number of HSP's better than 10.0 without gapping: 5
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 3
+Number of HSP's gapped (non-prelim): 5
+length of query: 334
+length of database: 26,835
+effective HSP length: 30
+effective length of query: 304
+effective length of database: 17,025
+effective search space:  5175600
+effective search space used:  5175600
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.4 bits)
+S2: 38 (19.2 bits)
+TBLASTN 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= PAPA_CARPA
+         (345 letters)
+
+Database: alu.n
+           327 sequences; 80,506 total letters
+
+Searching......................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gnl|alu|L13391_HSAL003871 (Alu-J)                                      22  1.1
+gnl|alu|X68486_HSAL004968 (Alu-J)                                      20  5.7
+gnl|alu|M65235_HSAL002711 (Alu-Sx)                                     20  5.7
+gnl|alu|M78078_HSAL003530 (Alu-Sc)                                     19  9.8
+gnl|alu|M63322_HSAL001805 (Alu-J)                                      19  9.8
+
+>gnl|alu|L13391_HSAL003871 (Alu-J)
+          Length = 193
+
+ Score = 22.3 bits (46), Expect = 1.1
+ Identities = 9/21 (42%), Positives = 12/21 (56%), Gaps = 3/21 (14%)
+ Frame = +1
+
+Query: 153 GSCGSCWA---FSAVVTIEGI 170
+           G CGSCW    F  +  +EG+
+Sbjct: 19  GYCGSCWKSQHFGRLRQVEGL 81
+
+
+>gnl|alu|X68486_HSAL004968 (Alu-J)
+          Length = 249
+
+ Score = 20.0 bits (40), Expect = 5.7
+ Identities = 11/49 (22%), Positives = 20/49 (40%), Gaps = 10/49 (20%)
+ Frame = -2
+
+Query: 14  AICLFVYMGLSFGDFSIVGYS----------QNDLTSTERLIQLFESWM 52
+           A+CLF++    FG   +  +            N L    + + + ESW+
+Sbjct: 149 AVCLFIFSFFFFGRNGLFAHVAQAGLALLG*SNPLALASQSLGITESWV 3
+
+
+>gnl|alu|M65235_HSAL002711 (Alu-Sx)
+          Length = 325
+
+ Score = 20.0 bits (40), Expect = 5.7
+ Identities = 5/7 (71%), Positives = 6/7 (85%)
+ Frame = -1
+
+Query: 153 GSCGSCW 159
+           G+C SCW
+Sbjct: 115 GACSSCW 95
+
+
+>gnl|alu|M78078_HSAL003530 (Alu-Sc)
+          Length = 158
+
+ Score = 19.2 bits (38), Expect = 9.8
+ Identities = 7/14 (50%), Positives = 10/14 (71%)
+ Frame = -1
+
+Query: 204 ALQLVAQYGIHYRN 217
+           +  LVAQ G+ +RN
+Sbjct: 104 SFNLVAQAGVQWRN 63
+
+
+>gnl|alu|M63322_HSAL001805 (Alu-J)
+          Length = 345
+
+ Score = 19.2 bits (38), Expect = 9.8
+ Identities = 6/15 (40%), Positives = 11/15 (73%)
+ Frame = +2
+
+Query: 73  KDNLKYIDETNKKNN 87
+           K+ +KY+ + NKK +
+Sbjct: 284 KNKIKYVFKNNKKKS 328
+
+
+  Database: alu.n
+    Posted date:  Feb 26, 2001  4:36 AM
+  Number of letters in database: 80,506
+  Number of sequences in database:  327
+  
+Lambda     K      H
+   0.318    0.138    0.428 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 33866
+Number of Sequences: 327
+Number of extensions: 356
+Number of successful extensions: 7
+Number of sequences better than 10.0: 10
+Number of HSP's better than 10.0 without gapping: 5
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 2
+Number of HSP's gapped (non-prelim): 5
+length of query: 345
+length of database: 26,835
+effective HSP length: 30
+effective length of query: 315
+effective length of database: 17,025
+effective search space:  5362875
+effective search space used:  5362875
+frameshift window, decay const: 50,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 36 (19.4 bits)
+S2: 38 (19.2 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.water
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.water	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot.water	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+Local: PAPA_CARPA vs CATL_HUMAN
+Score: 501.50
+
+PAPA_CARPA      3        MIPSISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQL 47      
+                         | |:   |:  | ||              | :   ||    |   
+CATL_HUMAN      1        MNPT...LILAAFCL..............GIASATLTFDHSLEAQ 28      
+
+PAPA_CARPA      48       FESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKK....NNS 88      
+                         :  |   ||::|  ::|: :|  ::: |:| |:  |::     :|
+CATL_HUMAN      29       WTKWKAMHNRLY.GMNEEGWRRAVWEKNMKMIELHNQEYREGKHS 72      
+
+PAPA_CARPA      89       YWLGLNVFADMSNDEFKEKYTG.....SIAGNYTTTELSYEEVLN 128     
+                         : : :| | ||:::||::   |        |      | ||    
+CATL_HUMAN      73       FTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYE.... 113     
+
+PAPA_CARPA      129      DGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKI 173     
+                               |  ||||:|| |||||||| |||||||||   :|| :  
+CATL_HUMAN      114      .....APRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFR 153     
+
+PAPA_CARPA      174      RTGNLNEYSEQELLDCD..RRSYGCNGGYPWSALQLVAQY.GIHY 215     
+                         :|| |   ||| |:||   : : |||||    | | |    |:  
+CATL_HUMAN      154      KTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDS 198     
+
+PAPA_CARPA      216      RNTYPYEGVQRYCRSREKGPYAAKTD.GVRQVQPYNEGALLYSIA 259     
+                           :||||  :  |:   |  |:   | |   : |  | ||: ::|
+CATL_HUMAN      199      EESYPYEATEESCKYNPK..YSVANDTGFVDI.PKQEKALMKAVA 240     
+
+PAPA_CARPA      260      N.QPVSVVLEAAGKDFQLYRGGIFVGP.CGNK.VDHAVAAVGYG. 300     
+                            |:|| ::|  : |  |: ||:  | | :: :|| |  |||| 
+CATL_HUMAN      241      TVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGF 285     
+
+PAPA_CARPA      301      .......PNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTS 338     
+                                  | |:|||||  ||  ||::: :   |    ||: ::
+CATL_HUMAN      286      ESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNH...CGIASA 327     
+
+PAPA_CARPA      339      SFYP                                          342     
+                         : ||
+CATL_HUMAN      328      ASYP                                          331     
+
+%id = 40.07			%similarity = 59.61
+Overall %id = 35.65		Overall %similarity = 53.04

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.FASTA
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.FASTA	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.FASTA	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,272 @@
+ FASTA searches a protein or DNA sequence data bank
+ version 3.3t08 Jan. 17, 2001
+Please cite:
+ W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+
+t/data/cysprot1.fa: 343 aa
+ >CYS1_DICDI
+ vs  /data_2/jason/blastdb/ecoli.aa library
+searching /data_2/jason/blastdb/ecoli.aa library
+
+       opt      E()
+< 20     0     0:
+  22     0     0:           one = represents 7 library sequences
+  24     0     0:
+  26     0     0:
+  28     0     1:*
+  30     4     6:*
+  32    13    23:== *
+  34    62    62:========*
+  36   130   127:==================*
+  38   252   210:=============================*======
+  40   310   293:=========================================*===
+  42   405   359:===================================================*======
+  44   401   396:========================================================*=
+  46   386   403:======================================================== *
+  48   348   386:==================================================     *
+  50   360   352:==================================================*=
+  52   290   309:==========================================  *
+  54   264   264:=====================================*
+  56   215   221:===============================*
+  58   145   181:=====================    *
+  60   144   147:====================*
+  62   119   118:================*
+  64    96    94:=============*
+  66    72    74:==========*
+  68    65    58:========*=
+  70    54    46:======*=
+  72    30    36:=====*
+  74    19    28:===*
+  76    26    22:===*
+  78    18    17:==*
+  80    19    13:=*=
+  82    14    10:=*
+  84     8     8:=*
+  86     4     6:*
+  88     2     5:*          inset = represents 1 library sequences
+  90     4     4:*
+  92     3     3:*         :==*
+  94     2     2:*         :=*
+  96     1     2:*         :=*
+  98     1     1:*         :*
+ 100     0     1:*         :*
+ 102     0     1:*         :*
+ 104     2     1:*         :*=
+ 106     0     0:          *
+ 108     1     0:=         *=
+ 110     0     0:          *
+ 112     0     0:          *
+ 114     0     0:          *
+ 116     0     0:          *
+ 118     0     0:          *
+>120     0     0:          *
+1358987 residues in  4289 sequences
+  Expectation_n fit: rho(ln(x))= 5.9493+/-0.00202; mu= 2.7408+/- 0.115
+ mean_var=77.5610+/-17.011, 0's: 0 Z-trim: 0  B-trim: 2 in 1/41
+ Lambda= 0.1456
+ Kolmogorov-Smirnov  statistic: 0.0234 (N=29) at  44
+
+FASTA (3.36 June 2000) function [optimized, BL50 matrix (15:-5)] ktup: 2
+ join: 37, opt: 25, gap-pen: -12/ -2, width:  16
+ Scan time:  1.110
+The best scores are:                                       opt bits E(4289)
+gi|1787478|gb|AAC74309.1| (AE000221) nitrate redu  ( 512)   92   29     1.2
+gi|1790635|gb|AAC77148.1| (AE000491) putative DEO  ( 251)   84   27     2.1
+gi|1786590|gb|AAC73494.1| (AE000145) orf, hypothe  (  94)   78   26     2.1
+gi|1790853|gb|AAC77345.1| (AE000509) soluble lyti  ( 654)   84   28     4.8
+gi|1789307|gb|AAC75975.1| (AE000377) biosynthetic  ( 658)   83   27     5.6
+gi|1788174|gb|AAC74937.1| (AE000280) orf, hypothe  ( 199)   74   25     7.4
+gi|1789138|gb|AAC75818.1| (AE000361) putative kin  ( 492)   79   26     7.8
+gi|1789427|gb|AAC76084.1| (AE000386) orf, hypothe  ( 354)   76   26     9.1
+
+>>gi|1787478|gb|AAC74309.1| (AE000221) nitrate reductase  (512 aa)
+ initn:  35 init1:  35 opt:  92  Z-score: 109.2  bits: 29.2 E():  1.2
+Smith-Waterman score: 92;  23.936% identity (26.012% ungapped) in 188 aa overlap (125-305:2-181)
+
+          100       110       120       130       140        150   
+CYS1_D NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTT-GNV--
+                                     . :. :  : :  .: .: . :.:  ::  
+gi|178                              MKIRSQVGMVLNLDKCIGCHTCSVTCKNVWT
+                                            10        20        30 
+
+               160       170        180       190       200        
+CYS1_D --EGQHFISQNKLVSLSEQNLVDCDHECME-YEGEEACDEGCNGGLQPNAYNYIIKNGGI
+         :: ..   :.. .   :..   : : .: :.:     .  :: :::   :  .  : :
+gi|178 SREGVEYAWFNNVETKPGQGF-PTDWENQEKYKGGWI--RKINGKLQPRMGNRAMLLGKI
+              40        50         60          70        80        
+
+      210       220       230       240       250        260       
+CYS1_D QTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGP-LAIAADAVEW
+        ..   :   .     .:.  :. .   .     :.. .     . ::  .:    . .:
+gi|178 FANPHLPGIDDYYEPFDFDYQNLHTAPEG----SKSQPIARPRSLITGERMAKIEKGPNW
+       90       100       110           120       130       140    
+
+       270       280       290       300       310       320       
+CYS1_D QFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR
+       .  .:: ::   . ...:. :  . ::  .. :   .:                      
+gi|178 EDDLGGEFDKLAKDKNFDN-IQKAMYSQFENTFMMYLPRLCEHCLNPACVATCPSGAIYK
+          150       160        170       180       190       200   
+
+       330       340                                               
+CYS1_D GKNTCGVSNFVSTSII                                            
+                                                                   
+gi|178 REEDGIVLIDQDKCRGWRMCITGCPYKKIYFNWKSGKSEKCIFCYPRIEAGQPTVCSETC
+           210       220       230       240       250       260   
+
+>>gi|1790635|gb|AAC77148.1| (AE000491) putative DEOR-typ  (251 aa)
+ initn:  46 init1:  46 opt:  84  Z-score: 104.9  bits: 27.4 E():  2.1
+Smith-Waterman score: 84;  22.078% identity (23.288% ungapped) in 77 aa overlap (99-171:119-195)
+
+       70        80        90       100       110       120        
+CYS1_D HKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG
+                                     :.:. ::.:.:: :.  .:.       ...
+gi|179 QLVNPGESVVINCGSTAFLLGREMCGKPVQIITNYLPLANYLIDQEHDSVIIMGGQYNKS
+       90       100       110       120       130       140        
+
+      130       140           150       160       170       180    
+CYS1_D AVTPVKNQGQCGSC----WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEE
+           .. ::. .:     : :..  .. .. . . . :....::....            
+gi|179 QSITLSPQGSENSLYAGHWMFTSGKGLTAEGLYKTDMLTAMAEQKMLSVVGKLVVLVDSS
+      150       160       170       180       190       200        
+
+          190       200       210       220       230       240    
+CYS1_D ACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN
+                                                                   
+gi|179 KIGERAGMLFSRADQIDMLITGKNANPEILQQLEAQGVSILRV                 
+      210       220       230       240       250                  
+
+>>gi|1786590|gb|AAC73494.1| (AE000145) orf, hypothetical  (94 aa)
+ initn:  37 init1:  37 opt:  78  Z-score: 104.8  bits: 25.9 E():  2.1
+Smith-Waterman score: 78;  36.842% identity (43.750% ungapped) in 38 aa overlap (242-278:42-74)
+
+             220       230       240       250       260       270 
+CYS1_D SSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFY-
+                                     :.. ::..: .    :     ::..:: : 
+gi|178 VKSIGFSSSSTGRASVGVMVEGEYTFSTAEPEEMTVISGALNVLLP-----DATDWQVYE
+              20        30        40        50             60      
+
+              280       290       300       310       320       330
+CYS1_D IGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKN
+        :.::..:                                                    
+gi|178 AGSVFNVPGHSEFHLQVAEPTSYLCRYL                                
+         70        80        90                                    
+
+>>gi|1790853|gb|AAC77345.1| (AE000509) soluble lytic mur  (654 aa)
+ initn:  61 init1:  61 opt:  84  Z-score: 98.5  bits: 27.6 E():  4.8
+Smith-Waterman score: 84;  32.692% identity (34.694% ungapped) in 52 aa overlap (104-152:104-155)
+
+            80        90       100       110       120       130   
+CYS1_D KFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPV
+                                     :: :  :...:.: .    :::   : .: 
+gi|179 YPYLEYRQITDDLMNQPAVTVTNFVRANPTLPPARTLQSRFVNELARREDWRGLLAFSPE
+            80        90       100       110       120       130   
+
+              140       150       160       170       180       190
+CYS1_D K---NQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGC
+       :   ...::.  ..  .::. :                                      
+gi|179 KPGTTEAQCNYYYAKWNTGQSEEAWQGAKELWLTGKSQPNACDKLFSVWRASGKQDPLAY
+           140       150       160       170       180       190   
+
+>>gi|1789307|gb|AAC75975.1| (AE000377) biosynthetic argi  (658 aa)
+ initn:  41 init1:  41 opt:  83  Z-score: 97.3  bits: 27.4 E():  5.6
+Smith-Waterman score: 83;  23.913% identity (24.176% ungapped) in 92 aa overlap (178-268:315-406)
+
+       150       160       170       180        190       200      
+CYS1_D TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA-CDEGCNGGLQPNAYNYIIKNG
+                                     ..::: ..  : . : ::.  : : :   :
+gi|178 TGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIG
+          290       300       310       320       330       340    
+
+        210       220       230       240       250       260      
+CYS1_D GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVE
+           :.. :. .    .    .:.  . .::.  . .:: ..    .  .: :. .    
+gi|178 DACEENGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTVPTAPAEDAPRALQSMWET
+          350       360       370       380       390       400    
+
+        270       280       290       300       310       320      
+CYS1_D WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLR
+       ::                                                          
+gi|178 WQEMHEPGTRRSLREWLHDSQMDLHDIHIGYSSGIFSLQERAWAEQLYLSMCHEVQKQLD
+          410       420       430       440       450       460    
+
+>>gi|1788174|gb|AAC74937.1| (AE000280) orf, hypothetical  (199 aa)
+ initn:  46 init1:  46 opt:  74  Z-score: 95.2  bits: 25.2 E():  7.4
+Smith-Waterman score: 74;  43.750% identity (50.000% ungapped) in 32 aa overlap (308-335:110-141)
+
+       280       290       300       310       320        330      
+CYS1_D PCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR-GKNT---CG
+                                     :.: .::: .: .  . ::: : .:   ::
+gi|178 PVDAPSPAKVLPENWWQHPAALGATDSDIEIIKRQWGAFYGTDLELQLRRRGIDTIVLCG
+      80        90       100       110       120       130         
+
+           340                                                     
+CYS1_D VSNFVSTSII                                                  
+       .:.                                                         
+gi|178 ISTNIGVESTARNAWELGFNLVIAEDACSAASAEQHNNSINHIYPRIARVRSVEEILNAL
+     140       150       160       170       180       190         
+
+>>gi|1789138|gb|AAC75818.1| (AE000361) putative kinase [  (492 aa)
+ initn:  36 init1:  36 opt:  79  Z-score: 94.7  bits: 26.5 E():  7.8
+Smith-Waterman score: 84;  19.136% identity (21.233% ungapped) in 162 aa overlap (34-192:165-313)
+
+            10        20        30        40        50        60   
+CYS1_D ILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELN
+                                     ::::     ...:   ..  . ::.:    
+gi|178 GEFKDNIANYFGQWPVDYKSWAWSEDAAVMDKFNIP---RHMLFDVQMPGTVLGHITPQA
+          140       150       160       170          180       190 
+
+            70        80        90       100       110       120   
+CYS1_D LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD
+        .: .  :     :   .:   . .    :... :...    .: ... . . . :.:. 
+gi|178 ALATHFPAGLPV-VCTTSDKPVEALGAGLLDDETAVISLGTYIALMMNGKALPKDPVAY-
+             200        210       220       230       240          
+
+           130          140       150       160       170       180
+CYS1_D WRTRGAVTPV---KNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY
+       :   ...  .   .. :   . :. :   .. :. .:.. .  .:: ..:..    :.  
+gi|178 WPIMSSIPQTLLYEGYGIRKGMWTVSWLRDMLGESLIQDARAQDLSPEDLLNKKASCVP-
+     250       260       270       280       290       300         
+
+              190       200       210       220       230       240
+CYS1_D EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM
+               ::::                                                
+gi|178 -------PGCNGLMTVLDWLTNPWEPYKRGIMIGFDSSMDYAWIYRSILESVALTLKNNY
+             310       320       330       340       350       360 
+
+>>gi|1789427|gb|AAC76084.1| (AE000386) orf, hypothetical  (354 aa)
+ initn:  65 init1:  40 opt:  76  Z-score: 93.5  bits: 25.8 E():  9.1
+Smith-Waterman score: 76;  22.619% identity (23.899% ungapped) in 168 aa overlap (141-303:81-244)
+
+              120       130       140       150       160       170
+CYS1_D DDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL
+                                     : :.      . :: : .::. .     : 
+gi|178 GDKIWQSSEYFMNVFCNNALPGPSPGEEYPSAWANIMMLLASGQDFYNQNSYTFGVTYNG
+               60        70        80        90       100       110
+
+              180       190       200        210       220         
+CYS1_D VDCDHECMEYEGEEACDEGCNGGLQPNAYNY-IIKNGGIQTESSYPYTAETGTQCNFNSA
+       :: :       .  .: .  ..:   :.:.   . .:: . . :  . ...  :  .. :
+gi|178 VDYDSTSPLPIAAPVCIDIKGAGTFGNGYKKPAVCSGGPEPQLSVTFPVRV--QLYIKLA
+              120       130       140       150       160          
+
+     230       240       250       260       270          280      
+CYS1_D NIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDI---PCNPN-SLD
+       . . :...  ..: .: .   .   .:  :: .:  .  : : :. .:    :  : .:.
+gi|178 KNANKVNKKLVLP-DEYIALEFKGMSGAGAIEVDK-NLTFRIRGLNNIHVLDCFVNVDLE
+      170       180        190       200        210       220      
+
+         290       300       310       320       330       340     
+CYS1_D HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII  
+        .  .: ..  :.   ::                                          
+gi|178 PADGVVDFGKINSRTIKNTSVSETFSVVMTKDPGAACTEQFNILGSFFTTDILSDYSHLD
+        230       240       250       260       270       280      
+
+
+
+343 residues in 1 query   sequences
+1358987 residues in 4289 library sequences
+ Scomplib [33t08]
+ start: Sat Dec  8 11:43:36 2001 done: Sat Dec  8 11:43:37 2001
+ Scan time:  1.110 Display time:  0.090
+
+Function used was FASTA [version 3.3t08 Jan. 17, 2001]

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+>CYS1_DICDI
+MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
+ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
+SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
+ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
+PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,24 @@
+>CYS1_DICDI
+MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
+ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
+SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
+ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
+PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+
+
+>ALEU_HORVU
+MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
+SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
+AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
+GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
+QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
+IATCASYPVVAA
+
+
+>CATH_HUMAN
+MWATLPLLCAGAWLLGVPVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHNN
+GNHTFKMALNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPPSVDWRKKGNFVSPVKNQGACGS
+CWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNYGCQGGLPSQAFEYILYNKGIMGEDTYPYQGK
+DGYCKFQPGKAIGFVKDVANITIYDEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVN
+HAVLAVGYGEKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.msf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.msf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1a.msf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+PileUp
+
+
+
+   MSF:  383  Type: P    Check:  3696   .. 
+
+ Name: ALEU_HORVU oo  Len:  383  Check:  9840  Weight:  34.4
+ Name: CATH_HUMAN oo  Len:  383  Check:  7134  Weight:  30.1
+ Name: CYS1_DICDI oo  Len:  383  Check:  6722  Weight:  35.4
+
+//
+
+
+
+ALEU_HORVU      MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG 
+CATH_HUMAN      ......MWAT LPLLCAGAWL LG........ VPVCGAAELS VNSLEK.... 
+CYS1_DICDI      .....MKVIL LFVLAVFTVF VS........ .......SRG IPPEEQ.... 
+
+
+ALEU_HORVU      ALGRTRHALR FARFAVRYGK SYESAAEVRR RFRIFSESLE EVRSTN.... 
+CATH_HUMAN      ........FH FKSWMSKHRK TYS.TEEYHH RLQTFASNWR KINAHN.... 
+CYS1_DICDI      ........SQ FLEFQDKFNK KYS.HEEYLE RFEIFKSNLG KIEELNLIAI 
+
+
+ALEU_HORVU      RKGLPYRLGI NRFSDMSWEE FQATRLG.AA QTCSATLAGN HLMRDAAA.. 
+CATH_HUMAN      NGNHTFKMAL NQFSDMSFAE IKHKYLWSEP QNCSATKS.. NYLRGTGP.. 
+CYS1_DICDI      NHKADTKFGV NKFADLSSDE FKNYYLNNKE AIFTDDLPVA DYLDDEFINS 
+
+
+ALEU_HORVU      LPETKDWRED G.IVSPVKNQ AHCGSCWTFS TTGALEAAYT QATGKNISLS 
+CATH_HUMAN      YPPSVDWRKK GNFVSPVKNQ GACGSCWTFS TTGALESAIA IATGKMLSLA 
+CYS1_DICDI      IPTAFDWRTR G.AVTPVKNQ GQCGSCWSFS TTGNVEGQHF ISQNKLVSLS 
+
+
+ALEU_HORVU      EQQLVDCAGG FNNF...... ..GCNGGLPS QAFEYIKYNG GIDTEESYPY 
+CATH_HUMAN      EQQLVDCAQD FNNY...... ..GCQGGLPS QAFEYILYNK GIMGEDTYPY 
+CYS1_DICDI      EQNLVDCDHE CMEYEGEEAC DEGCNGGLQP NAYNYIIKNG GIQTESSYPY 
+
+
+ALEU_HORVU      KGVNGVCHYK AENAAVQVLD SVNITLNAED ELKNAVGLVR PVSVAFQVID 
+CATH_HUMAN      QGKDGYCKFQ PGKAIGFVKD VANITIYDEE AMVEAVALYN PVSFAFEVTQ 
+CYS1_DICDI      TAETGTQCNF NSANIGAKIS NFTMIPKNET VMAGYIVSTG PLAIAADAVE 
+
+
+ALEU_HORVU      GFRQYKSGVY TSDHCGTTPD DVNHAVLAVG YGVEN..... GVPYWLIKNS 
+CATH_HUMAN      DFMMYRTGIY SSTSCHKTPD KVNHAVLAVG YGEKN..... GIPYWIVKNS 
+CYS1_DICDI      .WQFYIGGVF DIPCN...PN SLDHGILIVG YSAKNTIFRK NMPYWIVKNS 
+
+
+ALEU_HORVU      WGADWGDNGY FKMEMGKNMC AIATCASYPV VAA
+CATH_HUMAN      WGPQWGMNGY FLIERGKNMC GLAACASYPI PLV
+CYS1_DICDI      WGADWGEQGY IYLRRGKNTC GVSNFVSTSI I..
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,33 @@
+>CATH_RAT
+MWTALPLLCAGAWLLSAGATAELTVNAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRN
+HTFKMGLNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCW
+TFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNG
+QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHA
+VLAVGYGEQNGLLYWIVKNSWGSNWGNNGYFLIERGKNMCGLAACASYPIPQV
+
+
+>CATL_HUMAN
+MNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGK
+HSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWA
+FSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEES
+CKYNPKYSVANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLV
+VGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTV
+
+
+>CATL_RAT
+MTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGK
+HGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQEPLMLQIPKTVDWREKGCVTPVKNQGQCGSCWA
+FSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS
+CKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLV
+VGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIVN
+
+
+>PAPA_CARPA
+MAMIPSISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKIYRFE
+IFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDW
+RQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQ
+YGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGG
+IFVGPCGNKVDHAVAAVGYGPNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVKN
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.hmmsearch
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.hmmsearch	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.hmmsearch	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,177 @@
+hmmsearch - search a sequence database with a profile HMM
+HMMER 2.2g (August 2001)
+Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
+Freely distributed under the GNU General Public License (GPL)
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                   Peptidase_C1.hmm [Peptidase_C1]
+Sequence database:          cysprot1b.fa
+per-sequence score cutoff:  [none]
+per-domain score cutoff:    [none]
+per-sequence Eval cutoff:   <= 10        
+per-domain Eval cutoff:     [none]
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+Query HMM:   Peptidase_C1
+Accession:   PF00112
+Description: Papain family cysteine protease
+  [HMM has been calibrated; E-values are empirical estimates]
+
+Scores for complete sequences (score includes all domains):
+Sequence   Description                                  Score    E-value  N 
+--------   -----------                                  -----    ------- ---
+CATL_RAT                                                449.4     2e-135   1
+CATL_HUMAN                                              444.5   6.1e-134   1
+CATH_RAT                                                381.8   4.8e-115   1
+PAPA_CARPA                                              337.7     9e-102   1
+
+Parsed for domains:
+Sequence   Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+--------   ------- ----- -----    ----- -----      -----  -------
+CATL_RAT     1/1     114   332 ..     1   337 []   449.4   2e-135
+CATL_HUMAN   1/1     114   332 ..     1   337 []   444.5 6.1e-134
+CATH_RAT     1/1     114   330 ..     1   337 []   381.8 4.8e-115
+PAPA_CARPA   1/1     134   343 ..     1   337 []   337.7   9e-102
+
+Alignments of top-scoring domains:
+CATL_RAT: domain 1 of 1, from 114 to 332: score 449.4, E = 2e-135
+                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
+                      +P+++DWRe kg  VtpVK+QG qCGSCWAFSa g lEg+ ++kt  
+    CATL_RAT   114    IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT-- 155  
+
+                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
+                       gkl+sLSEQ+LvDC++ d gn+      GCnG Glmd Af+Yik+ 
+    CATL_RAT   156 ----GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE- 192  
+
+                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
+                       NgGl++E++Y     PY+    +kd                   g+
+    CATL_RAT   193 ----NGGLDSEESY-----PYE----AKD-------------------GS 210  
+
+                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
+                   Cky+  + ++     a+++g++d+p++     E+al+ka+a++GP+sVa+
+    CATL_RAT   211 CKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAM 249  
+
+                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
+                   das+ s    q+Y+sG       +Y+++    C+++   +LdH+Vl+VGY
+    CATL_RAT   250 DASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGY 283  
+
+                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
+                   G e+                                      ++++ +YW
+    CATL_RAT   284 GYEG-T------------------------------------DSNKDKYW 296  
+
+                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
+                   +VKNSWG++WG++GY++ia+++n    n+CG+a+ asypi   
+    CATL_RAT   297 LVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI    332  
+
+CATL_HUMAN: domain 1 of 1, from 114 to 332: score 444.5, E = 6.1e-134
+                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
+                      +P s+DWRe kg +VtpVK+QG qCGSCWAFSa+galEg+ ++kt  
+  CATL_HUMAN   114    APRSVDWRE-KG-YVTPVKNQG-QCGSCWAFSATGALEGQMFRKT-- 155  
+
+                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
+                       g l+sLSEQ+LvDC+g + gn+      GCnG Glmd+Af+Y+++ 
+  CATL_HUMAN   156 ----GRLISLSEQNLVDCSG-PQGNE------GCNG-GLMDYAFQYVQD- 192  
+
+                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
+                       NgGl++E++Y     PY+    +++                    +
+  CATL_HUMAN   193 ----NGGLDSEESY-----PYE----ATE-------------------ES 210  
+
+                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
+                   Ckyn +k s+     a+++g++d+p +     E+al+ka+a++GP+sVai
+  CATL_HUMAN   211 CKYN-PKYSV-----ANDTGFVDIPKQ-----EKALMKAVATVGPISVAI 249  
+
+                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
+                   da++ s   F +Yk G       +Y ++   +C+++   + dH+Vl+VGY
+  CATL_HUMAN   250 DAGHES---FLFYKEG-------IYFEP---DCSSE---DMDHGVLVVGY 283  
+
+                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
+                   G e+                                      e+++ +YW
+  CATL_HUMAN   284 GFES-T------------------------------------ESDNNKYW 296  
+
+                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
+                   +VKNSWG++WG+ GY+++a+++     n+CGIas asyp+   
+  CATL_HUMAN   297 LVKNSWGEEWGMGGYVKMAKDRR----NHCGIASAASYPT    332  
+
+CATH_RAT: domain 1 of 1, from 114 to 330: score 381.8, E = 4.8e-115
+                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
+                       P s+DWR+ kg  V+pVK+QG  CGSCW FS++galE++ +i++  
+    CATH_RAT   114    YPSSMDWRK-KGNVVSPVKNQG-ACGSCWTFSTTGALESAVAIAS-- 156  
+
+                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
+                       gk   L EQqLvDC   +++n+      GC+G Gl+++AfeYi++ 
+    CATH_RAT   157 ----GKMMTLAEQQLVDCAQ-NFNNH------GCQG-GLPSQAFEYILY- 193  
+
+                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
+                       N+G++ E++Y     PY     gk+                   g+
+    CATH_RAT   194 ----NKGIMGEDSY-----PYI----GKN-------------------GQ 211  
+
+                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
+                   Ck+n +++++     a++k+ ++++ n    dE+a+ +a+a + Pvs a+
+    CATH_RAT   212 CKFN-PEKAV-----AFVKNVVNITLN----DEAAMVEAVALYNPVSFAF 251  
+
+                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
+                   +++e    DF++YksG       vY++    +C +tp + ++HAVl+VGY
+    CATH_RAT   252 EVTE----DFMMYKSG-------VYSSN---SCHKTP-DKVNHAVLAVGY 286  
+
+                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
+                   G +n g                                          YW
+    CATH_RAT   287 GEQN-GLL----------------------------------------YW 295  
+
+                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
+                   IVKNSWG++WG nGYf i+Rgkn     +CG+a +asypi   
+    CATH_RAT   296 IVKNSWGSNWGNNGYFLIERGKN-----MCGLAACASYPI    330  
+
+PAPA_CARPA: domain 1 of 1, from 134 to 343: score 337.7, E = 9e-102
+                   *->lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgt
+                      +Pe +DWR+ kg aVtpVK+QG +CGSCWAFSav ++Eg+++i+t  
+  PAPA_CARPA   134    IPEYVDWRQ-KG-AVTPVKNQG-SCGSCWAFSAVVTIEGIIKIRT-- 175  
+
+                   kawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikke
+                       g+l  +SEQ+L+DCd+    ++      GCnG G+++ A++ + + 
+  PAPA_CARPA   176 ----GNLNEYSEQELLDCDR---RSY------GCNG-GYPWSALQLVAQ- 210  
+
+                   qIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgt
+                         G++    Y     PY+    g++                     
+  PAPA_CARPA   211 -----YGIHYRNTY-----PYE----GVQ-------------------RY 227  
+
+                   CkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVai
+                   C+++ +k+ +    +ak +g ++v+++    +E al + +a+ +PvsV  
+  PAPA_CARPA   228 CRSR-EKGPY----AAKTDGVRQVQPY----NEGALLYSIAN-QPVSVVL 267  
+
+                   dasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGY
+                   +a +    DFqlY++G       ++++    +Cg+     +dHAV++VGY
+  PAPA_CARPA   268 EAAGK---DFQLYRGG-------IFVG----PCGN----KVDHAVAAVGY 299  
+
+                   GteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYW
+                   G                                              +Y 
+  PAPA_CARPA   300 G---------------------------------------------PNYI 304  
+
+                   IVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi<-*
+                   ++KNSWGt WGEnGY+ri+Rg+++s ++ CG+ ++  yp+   
+  PAPA_CARPA   305 LIKNSWGTGWGENGYIRIKRGTGNS-YGVCGLYTSSFYPV    343  
+
+
+Histogram of all scores:
+score    obs    exp  (one = represents 1 sequences)
+-----    ---    ---
+> 337      4      -|====                                                       
+
+
+% Statistical details of theoretical EVD fit:
+              mu =  -195.8384
+          lambda =     0.1423
+chi-sq statistic =     0.0000
+  P(chi-square)  =          0
+
+Total sequences searched: 4
+
+Whole sequence top hits:
+tophits_s report:
+     Total hits:           4
+     Satisfying E cutoff:  4
+     Total memory:         16K
+
+Domain top hits:
+tophits_s report:
+     Total hits:           4
+     Satisfying E cutoff:  4
+     Total memory:         20K

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.msf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.msf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.msf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+PileUp
+
+
+
+   MSF:  366  Type: P    Check:  8252   .. 
+
+ Name: CATL_HUMAN oo  Len:  366  Check:  9087  Weight:  21.6
+ Name: CATL_RAT oo  Len:  366  Check:   271  Weight:  20.7
+ Name: CATH_RAT oo  Len:  366  Check:  8974  Weight:  27.9
+ Name: PAPA_CARPA oo  Len:  366  Check:  9920  Weight:  29.7
+
+//
+
+
+
+CATL_HUMAN      ......MNPT LILAAFCLGI .......... ...ASATLTF DHSLEAQWTK 
+CATL_RAT        ......MTPL LLLAVLCLGT .......... ...ALATPKF DQTFNAQWHQ 
+CATH_RAT        ...MWTALPL LCAGAWLLSA G......... ...ATAELTV NAIEKFHFTS 
+PAPA_CARPA      MAMIPSISKL LFVAICLFVY MGLSFGDFSI VGYSQNDLTS TERLIQLFES 
+
+
+CATL_HUMAN      WKAMHNRLYG .MNEEGWRRA VWEKNMKMIE LHNQEYREGK HSFTMAMNAF 
+CATL_RAT        WKSTHRRLYG .TNEEEWRRA VWEKNMRMIQ LHNGEYSNGK HGFTMEMNAF 
+CATH_RAT        WMKQHQKTYS .SREYSHRLQ VFANNWRKIQ AHNQRN.... HTFKMGLNQF 
+PAPA_CARPA      WMLKHNKIYK NIDEKIYRFE IFKDNLKYID ETNKKN.... NSYWLGLNVF 
+
+
+CATL_HUMAN      GDMTSEEFRQ VMNGFQNRKP RKGKVFQEPL FY....EAPR SVDWREKG.Y 
+CATL_RAT        GDMTNEEFRQ IVNGYRHQKH KKGRLFQEPL ML....QIPK TVDWREKG.C 
+CATH_RAT        SDMSFAEIKH KYLWSEPQNC SATKSNYLRG TG....PYPS SMDWRKKGNV 
+PAPA_CARPA      ADMSNDEFKE KYTGSIAGNY TTTELSYEEV LNDGDVNIPE YVDWRQKG.A 
+
+
+CATL_HUMAN      VTPVKNQGQC GSCWAFSATG ALEGQMFRKT GRLISLSEQN LVDCSGPQGN 
+CATL_RAT        VTPVKNQGQC GSCWAFSASG CLEGQMFLKT GKLISLSEQN LVDCSHDQGN 
+CATH_RAT        VSPVKNQGAC GSCWTFSTTG ALESAVAIAS GKMMTLAEQQ LVDCAQNFNN 
+PAPA_CARPA      VTPVKNQGSC GSCWAFSAVV TIEGIIKIRT GNLNEYSEQE LLDCDRR..S 
+
+
+CATL_HUMAN      EGCNGGLMDY AFQYVQDNGG LDSEESYPYE ATEESCKYNP KYSVANDTGF 
+CATL_RAT        QGCNGGLMDF AFQYIKENGG LDSEESYPYE AKDGSCKYRA EYAVANDTGF 
+CATH_RAT        HGCQGGLPSQ AFEYILYNKG IMGEDSYPYI GKNGQCKFNP EKAVAFVKNV 
+PAPA_CARPA      YGCNGGYPWS ALQLVAQYG. IHYRNTYPYE GVQRYCRSRE KGPYAAKTDG 
+
+
+CATL_HUMAN      VDIPK.QEKA LMKAVATVGP ISVAIDAGHE SFLFYKEGIY FEPDCSS..E 
+CATL_RAT        VDIPQ.QEKA LMKAVATVGP ISVAMDASHP SLQFYSSGIY YEPNCSS..K 
+CATH_RAT        VNITLNDEAA MVEAVALYNP VSFAFEVT.E DFMMYKSGVY SSNSCHKTPD 
+PAPA_CARPA      VRQVQPYNEG ALLYSIANQP VSVVLEAAGK DFQLYRGGIF VGP.CGN... 
+
+
+CATL_HUMAN      DMDHGVLVVG YGFESTESDN NKYWLVKNSW GEEWGMGGYV KMAKDRR... 
+CATL_RAT        DLDHGVLVVG YGYEGTDSNK DKYWLVKNSW GKEWGMDGYI KIAKDRN... 
+CATH_RAT        KVNHAVLAVG YG....EQNG LLYWIVKNSW GSNWGNNGYF LIERGK.... 
+PAPA_CARPA      KVDHAVAAVG YG........ PNYILIKNSW GTGWGENGYI RIKRGTGNSY 
+
+
+CATL_HUMAN      NHCGIASAAS YPTV..
+CATL_RAT        NHCGLATAAS YPIVN.
+CATH_RAT        NMCGLAACAS YPIPQV
+PAPA_CARPA      GVCGLYTSSF YPVKN.
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.newick
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.newick	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot1b.newick	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+(
+CATH_RAT:0.31356,
+(
+CATL_HUMAN:0.13938,
+CATL_RAT:0.12788)
+95:0.18794,
+PAPA_CARPA:0.34410);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/cysprot_vs_gadfly.FASTA
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/cysprot_vs_gadfly.FASTA	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/cysprot_vs_gadfly.FASTA	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,760 @@
+ FASTA searches a protein or DNA sequence data bank
+ version 3.3t08 Jan. 17, 2001
+Please cite:
+ W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
+
+ cysprot.fa, 2385 aa
+ vs /data_2/jason/blastdb/gadflypep2 library
+
+7177762 residues in 14334 sequences
+  Expectation_n fit: rho(ln(x))= 4.9846+/-0.000606; mu= 15.4125+/- 0.036
+ mean_var=84.2366+/-16.296, 0's: 5 Z-trim: 12  B-trim: 404 in 1/54
+ Lambda= 0.1397
+
+FASTA (3.36 June 2000) function [optimized, BL50 matrix (15:-5)] ktup: 2
+ join: 43, opt: 31, gap-pen: -12/ -2, width:  16
+The best scores are:                                       opt bits E(14315)
+Cp1|FBgn0013770|pp-CT20780|FBan0006692 GO:[cathep  ( 341) 1073  228 3.1e-59
+CG11459|FBgn0037396|pp-CT28891|FBan0011459 GO:[ca  ( 336)  771  167 6.4e-41
+CG4847|FBgn0034229|pp-CT15577|FBan0004847 GO:[end  ( 390)  762  165 2.5e-40
+26/29kD-proteinase|FBgn0028967|pp-CT25694|FBan000  ( 549)  712  155 3.5e-37
+CG12163|FBgn0037303|pp-CT8855|FBan0012163 GO:[] m  ( 475)  558  124 6.9e-28
+CG1075|FBgn0037397|pp-CT1381|FBan0001075 GO:[cath  ( 274)  413   95   3e-19
+CG6347|FBgn0033874|pp-CT19836|FBan0006347 GO:[cat  ( 235)  311   74 4.1e-13
+CG6337|FBgn0033873|pp-CT19824|FBan0006337 GO:[end  ( 340)  265   65 3.3e-10
+CG5367|FBgn0032228|pp-CT17056|FBan0005367 GO:[end  ( 228)  249   62 2.4e-09
+CG3074|FBgn0034709|pp-CT10308|FBan0003074 GO:[cat  ( 441)  252   62 2.4e-09
+CG10992|FBgn0030521|pp-CT30795|FBan0010992 GO:[ca  ( 340)  235   59 2.2e-08
+CG10460|FBgn0034443|pp-CT29364|FBan0010460 GO:[en  (  79)  131   37   0.016
+Ddx1|FBgn0015075|pp-CT25986|FBan0009054 GO:[helic  ( 727)  107   33     2.2
+CPTI|FBgn0027842|pp-CT32036|FBan0012891 GO:[carni  ( 780)  105   33       3
+CG4393|FBgn0039075|pp-CT14338|FBan0004393 GO:[cyt  ( 968)  105   33     3.5
+CG4392|FBgn0036283|pp-CT14260|FBan0004392 GO:[mit  ( 370)  100   32     3.6
+CG14034|FBgn0031691|pp-CT33593|FBan0014034 GO:[ph  ( 223)   93   30     6.8
+CG6357|FBgn0033875|pp-CT19866|FBan0006357 GO:[end  ( 439)   96   31     7.2
+CG11288|FBgn0039895|pp-CT31491|FBan0011288 GO:[ce  ( 202)   91   30     8.4
+CG13155|FBgn0033723|pp-CT32396|FBan0013155 GO:[]   ( 236)   91   30     9.3
+CG4810|FBgn0037994|pp-CT15451|FBan0004810 GO:[tra  ( 551)   95   31     9.6
+
+
+>>Cp1|FBgn0013770|pp-CT20780|FBan0006692 GO:[cathepsin L  (341 aa)
+ initn: 829 init1: 615 opt: 1073  Z-score: 1170.6  bits: 227.8 E(): 3.1e-59
+Smith-Waterman score: 1219;  53.043% identity (56.135% ungapped) in 345 aa overlap (1373-1706:5-341)
+
+           1350      1360      1370      1380      1390      1400  
+PAPA_C SNWGNNGYFLIERGKNMCGLAACASYPIPQVMNPTLILAAFCLGIASATLTFDHSLEAQW
+                                     :. : : :    :..:.: ..:   .  .:
+Cp1|FB                           MRTAVLLPLLAL----LAVAQA-VSFADVVMEEW
+                                         10             20         
+
+           1410       1420      1430      1440      1450      1460 
+PAPA_C TKWKAMHNRLY-GMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEF
+         .:  : . :   .:: .:  ....: . :  :::.. ::: :: .:.: ..:.  .::
+Cp1|FB HTFKLEHRKNYQDETEERFRLKIFNENKHKIAKHNQRFAEGKVSFKLAVNKYADLLHHEF
+      30        40        50        60        70        80         
+
+               1470            1480      1490      1500      1510  
+PAPA_C RQVMNGFQ---NRKPR------KGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCW
+       ::.::::.   ... :      :: .:  :     :.::::: :: :: ::.::.:::::
+Cp1|FB RQLMNGFNYTLHKQLRAADESFKGVTFISPAHVTLPKSVDWRTKGAVTAVKDQGHCGSCW
+      90       100       110       120       130       140         
+
+           1520      1530      1540      1550      1560      1570  
+PAPA_C AFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSE
+       :::.::::::: :::.: :.:::::::::::   ::.:::::::: ::.:..::::.:.:
+Cp1|FB AFSSTGALEGQHFRKSGVLVSLSEQNLVDCSTKYGNNGCNGGLMDNAFRYIKDNGGIDTE
+     150       160       170       180       190       200         
+
+           1580      1590      1600       1610      1620      1630 
+PAPA_C ESYPYEATEESCKYNPKYSVANDTGFVDIPK-QEKALMKAVATVGPISVAIDAGHESFLF
+       .:::::: ..::..:     :.: ::.:::. .:: . .:::::::.::::::.:::: :
+Cp1|FB KSYPYEAIDDSCHFNKGTVGATDRGFTDIPQGDEKKMAEAVATVGPVSVAIDASHESFQF
+     210       220       230       240       250       260         
+
+            1640      1650      1660      1670      1680      1690 
+PAPA_C YKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDR
+       :.::.: ::.:.....::::::::.: . .  :   :::::::::  ::  :..:: ...
+Cp1|FB YSEGVYNEPQCDAQNLDHGVLVVGFGTDESGED---YWLVKNSWGTTWGDKGFIKMLRNK
+     270       280       290       300          310       320      
+
+            1700      1710      1720      1730      1740      1750 
+PAPA_C RNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEE
+       .:.::::::.::: :                                             
+Cp1|FB ENQCGIASASSYPLV                                             
+        330       340                                              
+
+>>CG11459|FBgn0037396|pp-CT28891|FBan0011459 GO:[catheps  (336 aa)
+ initn: 350 init1: 191 opt: 771  Z-score: 841.7  bits: 166.9 E(): 6.4e-41
+Smith-Waterman score: 771;  37.143% identity (38.870% ungapped) in 315 aa overlap (1399-1705:27-335)
+
+     1370      1380      1390      1400      1410      1420        
+PAPA_C PIPQVMNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKN
+                                     ...: ..:: .:. :  :.. ..::..:. 
+CG1145     MGTPRLTVVHGLILLLLVELGLTAVSDTEWDQYKAKYNKQY-RNRDKYHRALYEQR
+                   10        20        30        40         50     
+
+     1430      1440      1450      1460      1470      1480        
+PAPA_C MKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFY---
+       .  .: ::: : .:: .: :..: :.:  .. . .  ...      . ... : . :   
+CG1145 VLAVESHNQLYLQGKVAFKMGLNKFSDTDQRILFNYRSSIPAPLETSTNALTETVNYKRY
+          60        70        80        90       100       110     
+
+         1490      1500       1510      1520      1530      1540   
+PAPA_C -EAPRSVDWREKGYVTPVKNQG-QCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCS
+        .  ...:::. ::..:: .:: .: ::::::..:.::..: .: : :. :: ..:::: 
+CG1145 DQITEGIDWRQYGYISPVGDQGTECLSCWAFSTSGVLEAHMAKKYGNLVPLSPKHLVDCV
+         120       130       140       150       160       170     
+
+          1550      1560      1570      1580      1590      1600   
+PAPA_C GPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPK
+        :  :.::.:: .. ::.:..:.: . ..:::::: .   : ..   :... .:.: . .
+CG1145 -PYPNNGCSGGWVSVAFNYTRDHG-IATKESYPYEPVSGECLWKSDRSAGTLSGYVTLGN
+          180       190        200       210       220       230   
+
+           1610      1620      1630      1640        1650      1660
+PAPA_C -QEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSS--EDMDHGVLVVGYGFES
+        .:. : ..: ..::..:.::  :: :  :. :.   : : :  .:. :.::.::.: . 
+CG1145 YDERELAEVVYNIGPVAVSIDHLHEEFDQYSGGVLSIPACRSKRQDLTHSVLLVGFGTHR
+           240       250       260       270       280       290   
+
+             1670      1680      1690      1700      1710      1720
+PAPA_C TESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTVMTPLLLLAVLCLGT
+         .:   ::..:::.: .:: .::.:.:..  : ::.::  .:::               
+CG1145 KWGD---YWIIKNSYGTDWGESGYLKLARNANNMCGVASLPQYPTF              
+              300       310       320       330                    
+
+             1730      1740      1750      1760      1770      1780
+PAPA_C ALATPKFDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGKHGFT
+
+>>CG4847|FBgn0034229|pp-CT15577|FBan0004847 GO:[endopept  (390 aa)
+ initn: 499 init1: 281 opt: 762  Z-score: 831.0  bits: 165.2 E(): 2.5e-40
+Smith-Waterman score: 762;  42.857% identity (45.896% ungapped) in 287 aa overlap (1431-1706:112-390)
+
+             1410      1420      1430      1440      1450      1460
+PAPA_C QWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEE
+                                     ..:  :  . .: :.: .:.:::.:.:  :
+CG4847 FGDFLSQSGKTYLSAADRALHEGAFASTKNLVEAGNAAFAQGVHTFKQAVNAFADLTHSE
+              90       100       110       120       130       140 
+
+             1470            1480      1490      1500      1510    
+PAPA_C FRQVMNGFQNRKPRKG------KVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAF
+       : . ..:..     :.      :. . :     : . ::::.: ::::: :: :::::::
+CG4847 FLSQLTGLKRSPEAKARAAASLKLVNLPA-KPIPDAFDWREHGGVTPVKFQGTCGSCWAF
+             150       160       170        180       190       200
+
+         1520      1530      1540         1550      1560       1570
+PAPA_C SATGALEGQMFRKTGRLISLSEQNLVDCSGPQ---GNEGCNGGLMDYAFQYVQD-NGGLD
+       ..:::.::. ::::: : .::::::::: ::    : .::.::... :: .... . :..
+CG4847 ATTGAIEGHTFRKTGSLPNLSEQNLVDC-GPVEDFGLNGCDGGFQEAAFCFIDEVQKGVS
+              210       220        230       240       250         
+
+             1580      1590      1600       1610      1620         
+PAPA_C SEESYPYEATEESCKYNPKYSVANDTGFVDIP-KQEKALMKAVATVGPISVAIDAGHESF
+       .: .:::  .. .:::. . : :.  ::. :: :.:. : :.:::.::.. ... : :..
+CG4847 QEGAYPYIDNKGTCKYDGSKSGATLQGFAAIPPKDEEQLKKVVATLGPVACSVN-GLETL
+     260       270       280       290       300       310         
+
+    1630      1640      1650      1660      1670      1680         
+PAPA_C LFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAK
+         :  ::: . .:.. . .:..:::::: :. ..    ::.::::: . ::  :: .. .
+CG4847 KNYAGGIYNDDECNKGEPNHSILVVGYGSEKGQD----YWIVKNSWDDTWGEKGYFRLPR
+      320       330       340       350           360       370    
+
+    1690      1700      1710      1720      1730      1740         
+PAPA_C DRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTN
+        . :.: ::   :::.:                                           
+CG4847 GK-NYCFIAEECSYPVV                                           
+           380       390                                           
+
+>>26/29kD-proteinase|FBgn0028967|pp-CT25694|FBan0008947   (549 aa)
+ initn: 428 init1: 244 opt: 712  Z-score: 774.7  bits: 155.2 E(): 3.5e-37
+Smith-Waterman score: 769;  40.625% identity (43.624% ungapped) in 320 aa overlap (1728-2036:238-546)
+
+      1700      1710      1720      1730      1740       1750      
+PAPA_C ASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTN-EEEWRRA
+                                     :.  .  .:..:  :   : .. :.: :. 
+26/29k DDSLQCVGFPGPGTGHYATFNPMQEFISGTDEHVDKAFHHFKRKHGVAYHSDTEHEHRKN
+       210       220       230       240       250       260       
+
+       1760      1770      1780      1790      1800       1810     
+PAPA_C VWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYRHQK-HKKGRLFQEP
+       ....:.:.:. .:     .:  .:. .: ..: :.::..    ::. .  .. :. :   
+26/29k IFRQNLRYIHSKN----RAKLTYTLAVNHLADKTEEELKA-RRGYKSSGIYNTGKPFPYD
+       270       280           290       300        310       320  
+
+           1820      1830      1840      1850      1860       1870 
+PAPA_C LML---QIPKTVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTG-KLISLSEQN
+       .     .::   :::  : :::::.:. :::::.:.. : ::: .:::.: .:. ::.: 
+26/29k VPKYKDEIPDQYDWRLYGAVTPVKDQSVCGSCWSFGTIGHLEGAFFLKNGGNLVRLSQQA
+            330       340       350       360       370       380  
+
+            1880      1890       1900       1910      1920         
+PAPA_C LVDCSHDQGNQGCNGGLMDF-AFQYIKENGGLDSEESY-PYEAKDGSCKYRAEYAVANDT
+       :.:::   ::.::.::  :: ..:.. ..::. .:: : :: ..:: :.      ::   
+26/29k LIDCSWAYGNNGCDGG-EDFRVYQWMLQSGGVPTEEEYGPYLGQDGYCHVNNVTLVAPIK
+            390        400       410       420       430       440 
+
+    1930      1940       1950      1960      1970        1980      
+PAPA_C GFVDIPQQEKALMK-AVATVGPISVAMDASHPSLQFYSSGIYYEPNCSS--KDLDHGVLV
+       :::.. ...   .: :.   ::.:::.:::  ...::: :.::::.:..    :::.::.
+26/29k GFVNVTSNDPNAFKLALLKHGPLSVAIDASPKTFSFYSHGVYYEPTCKNDVDGLDHAVLA
+             450       460       470       480       490       500 
+
+       1990      2000      2010      2020      2030      2040      
+PAPA_C VGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIVNMAMIPS
+       ::::    . : . ::::::::.  :: :::: ..  .:: ::. :  .:          
+26/29k VGYG----SINGEDYWLVKNSWSTYWGNDGYILMSAKKNN-CGVMTMPTYVEM       
+                 510       520       530        540                
+
+       2050      2060      2070      2080      2090      2100      
+PAPA_C ISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKI
+
+>>CG12163|FBgn0037303|pp-CT8855|FBan0012163 GO:[] mol_we  (475 aa)
+ initn: 356 init1: 356 opt: 558  Z-score: 607.7  bits: 124.1 E(): 6.9e-28
+Smith-Waterman score: 847;  41.615% identity (44.816% ungapped) in 322 aa overlap (29-343:169-474)
+
+                 10        20        30        40         50       
+PAPA_C   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLG
+                                     : .:: .:...: :  :   :..::..:: 
+CG1216 VQARHTRSVEWAEKKTHKKHSHRFDKVDHLFYKFQVRFGRRYVSTAERQMRLRIFRQNLK
+      140       150       160       170       180       190        
+
+        60        70        80        90        100       110      
+PAPA_C KIEELNLIAINHKADTKFGVNKFADLSSDEFKNYY-LNNKEAIFTDDLPVADYLDDEFIN
+        :::::    :. ...:.:...:::..:.:.:.   : ...   .    .:  .   . .
+CG1216 TIEELN---ANEMGSAKYGITEFADMTSSEYKERTGLWQRDEAKATGGSAA--VVPAYHG
+      200          210       220       230       240         250   
+
+        120       130       140       150       160       170      
+PAPA_C SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE
+        .:  :::: . ::: :::::.:::::.::.:::.:: . .. ..:  .:::.:.:::  
+CG1216 ELPKEFDWRQKDAVTQVKNQGSCGSCWAFSVTGNIEGLYAVKTGELKEFSEQELLDCD--
+           260       270       280       290       300       310   
+
+        180       190       200       210       220       230      
+PAPA_C CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS
+               . : .:::::. :::. :   ::.. :. ::: :. . ::.:: .   ....
+CG1216 --------TTDSACNGGLMDNAYKAIKDIGGLEYEAEYPYKAKKN-QCHFNRTLSHVQVA
+                     320       330       340        350       360  
+
+        240        250       260       270          280       290  
+PAPA_C NFTMIPK-NETVMAGYIVSTGPLAIAADAVEWQFYIGGV---FDIPCNPNSLDHGILIVG
+       .:. .:: :::.:  .....::..:. .:   ::: :::   .   :. ..::::.:.::
+CG1216 GFVDLPKGNETAMQEWLLANGPISIGINANAMQFYRGGVSHPWKALCSKKNLDHGVLVVG
+            370       380       390       400       410       420  
+
+             300       310       320       330       340       350 
+PAPA_C YSAKN-TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSIIMAHARVLL
+       :....   :.:..::::::::::  ::::::  . :: ::::::........        
+CG1216 YGVSDYPNFHKTLPYWIVKNSWGPRWGEQGYYRVYRGDNTCGVSEMATSAVLA       
+            430       440       450       460       470            
+
+             360       370       380       390       400       410 
+PAPA_C LALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRY
+
+>>CG1075|FBgn0037397|pp-CT1381|FBan0001075 GO:[cathepsin  (274 aa)
+ initn: 233 init1: 148 opt: 413  Z-score: 452.7  bits: 94.7 E(): 3e-19
+Smith-Waterman score: 413;  36.548% identity (39.130% ungapped) in 197 aa overlap (1522-1715:87-273)
+
+            1500      1510      1520      1530      1540      1550 
+PAPA_C DWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGC
+                                     :.  ::.:   .:: ..::::  :  :.::
+CG1075 QAVANHNGLYSQGRSGFRMGLNQLYIASASGEEVRKVG---DLSPKHLVDCF-PYPNQGC
+         60        70        80        90          100        110  
+
+            1560      1570      1580      1590      1600       1610
+PAPA_C NGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPKQ-EKALMK
+       :.: .. ::.. .: : . :.:::::.  .  :... . :...   .: . .. :. : :
+CG1075 NAGWVSVAFNFKRDYG-IASKESYPYKPENGECRWDRRKSTGTLREYVTLTSNDERELAK
+            120        130       140       150       160       170 
+
+             1620      1630      1640        1650      1660        
+PAPA_C AVATVGPISVAIDAGHESFLFYKEGIYFEPDC--SSEDMDHGVLVVGYGFESTESDNNKY
+       .:  .::. :.::  :: :  :  ::   :.:  .. :. :.::.::.  .   .:   :
+CG1075 VVYKIGPVEVSIDHLHEEFDQYFGGILRTPSCRNTNYDLKHSVLLVGFETHPKWGD---Y
+             180       190       200       210       220           
+
+     1670      1680      1690      1700      1710      1720        
+PAPA_C WLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFD
+       :..:::.: ::: .:: :.:.. ..:  . : .:   .. : :  :.             
+CG1075 WIIKNSYGTEWGESGYFKLARNCEQH--VISMTSIAKLVLPCLTEALN            
+      230       240       250         260       270                
+
+     1730      1740      1750      1760      1770      1780        
+PAPA_C QTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGD
+
+>>CG6347|FBgn0033874|pp-CT19836|FBan0006347 GO:[cathepsi  (235 aa)
+ initn: 273 init1: 273 opt: 311  Z-score: 342.4  bits: 74.0 E(): 4.1e-13
+Smith-Waterman score: 436;  37.000% identity (39.785% ungapped) in 200 aa overlap (1710-1899:15-210)
+
+    1680      1690      1700      1710      1720        1730       
+PAPA_C GMGGYVKMAKDRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKF-D-QTFNAQWHQ
+                                     : ::... :    . ::. : :.:.   .:
+CG6347                 MRMCSTMWLQMTLGLALLGAVSLQQLQSFPKLCDVQNFDDFLRQ
+                               10        20        30        40    
+
+      1740      1750      1760      1770      1780      1790       
+PAPA_C WKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQI
+             ..:. .:. .:....  .: .: : : . .::  :: . .:...::: .:.  .
+CG6347 TG----KVYSDEERVYRESIFAAKMSLITLSNKNADNGVSGFRLGVNTLADMTRKEIATL
+               50        60        70        80        90       100
+
+            1800       1810      1820      1830       1840         
+PAPA_C VN------GYRHQK-HKKGRLFQEPLMLQIPKTVDWREKGCVTPVKNQG-QCGSCWAFSA
+       ..      : :. . : .    ..:   ..:.  :::::: :::   ::  ::.::.:..
+CG6347 LGSKISEFGERYTNGHINFVTARNPASANLPEMFDWREKGGVTPPGFQGVGCGACWSFAT
+              110       120       130       140       150       160
+
+    1850      1860      1870      1880      1890      1900         
+PAPA_C SGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYP
+       .: :::..: .:: : :::.::::::. : ::.::.::.....:.::...          
+CG6347 TGALEGHLFRRTGVLASLSQQNLVDCADDYGNMGCDGGFQEYGFEYIRDHERNRRTSTAR
+              170       180       190       200       210       220
+
+    1910      1920      1930      1940      1950      1960         
+PAPA_C YEAKDGSCKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGI
+                                                                   
+CG6347 ESGENSRLCHHYSRG                                             
+              230                                                  
+
+>>CG6337|FBgn0033873|pp-CT19824|FBan0006337 GO:[endopept  (340 aa)
+ initn: 191 init1: 112 opt: 265  Z-score: 290.3  bits: 64.9 E(): 3.3e-10
+Smith-Waterman score: 346;  28.707% identity (31.488% ungapped) in 317 aa overlap (1073-1369:28-336)
+
+           1050      1060      1070      1080         1090         
+PAPA_C TALPLLCAGAWLLSAGATAELTVNAIEKFHFTSWMKQHQKTYSS---REYSHRLQVFANN
+                                     : ..  . .:::.:   :....   ..  :
+CG6337    MFKLLLCCLLLTIDSGWAFNHGQDLVDFQTYEDNFNKTYASTSARNFANYYFIYNRN
+                  10        20        30        40        50       
+
+    1100         1110       1120      1130      1140      1150     
+PAPA_C WRKIQAHN---QRNHT-FKMGLNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYP
+         ..  ::   .::.: .. ..:::::. .  :.   :  .  :  .. ..   ..    
+CG6337 --QVAQHNAQADRNRTTYREAVNQFSDIRL--IQFAALLPKAVNTVTSAASDPPASQAAS
+          60        70        80          90       100       110   
+
+        1160      1170       1180        1190      1200      1210  
+PAPA_C SSMDWRKKGNVVSPVKNQGA-CGSCWTFSTTGALE--SAVAIASGKMMTLAEQQLVDCAQ
+       .:.:     ...  :..::. :.: :...:. :.:  .::  :.    .:. :::.::: 
+CG6337 ASFDIITDFGLTVAVEDQGVNCSSSWAYATAKAVEIMNAVQTANPLPSSLSAQQLLDCAG
+           120       130       140       150       160       170   
+
+           1220      1230        1240         1250      1260       
+PAPA_C NFNNHGCQGGLPSQAFEYI--LYNKGIMGEDSYPY---IGKNGQCKFNPEKAVAFVK--N
+          . ::.   :  :..:.  : .  .. : .::    .   :.:.  : .. . ::  .
+CG6337 M--GTGCSTQTPLAALNYLTQLTDAYLYPEVDYPNNNSLKTPGMCQ-PPSSVSVGVKLAG
+             180       190       200       210        220       230
+
+        1270      1280      1290       1300      1310      1320    
+PAPA_C VVNITLNDEAAMVEAVALYNPVSFAFE-VTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAV
+         ... ::.::... :.   ::   .. .:  ::.:.:::: ...   :  : .. ...:
+CG6337 YSTVADNDDAAVMRYVSNGFPVIVEYNPATFGFMQYSSGVYVQETRALTNPKSSQFLVVV
+              240       250       260       270       280       290
+
+           1330      1340      1350      1360      1370      1380  
+PAPA_C GYGEQ--NGLLYWIVKNSWGSNWGNNGYFLIERGKNMCGLAACASYPIPQVMNPTLILAA
+       :: ..  ..: ::   ::.:..::..::. : : .:.  .:  : .:             
+CG6337 GYDHDVDSNLDYWRCLNSFGDTWGEEGYIRIVRRSNQ-PIAKNAVFPSALA         
+              300       310       320        330       340         
+
+           1390      1400      1410      1420      1430      1440  
+PAPA_C FCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREG
+
+>>CG5367|FBgn0032228|pp-CT17056|FBan0005367 GO:[endopept  (228 aa)
+ initn: 309 init1: 173 opt: 249  Z-score: 275.0  bits: 61.5 E(): 2.4e-09
+Smith-Waterman score: 493;  30.556% identity (40.741% ungapped) in 288 aa overlap (1425-1706:7-228)
+
+         1400      1410      1420      1430      1440      1450    
+PAPA_C DHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFG
+                                     .:.:.:.:: :::.:.::. :: .  : :.
+CG5367                         MRSYKAFEENFKVIEEHNQNYKEGQTSFRLKPNIFA
+                                       10        20        30      
+
+         1460           1470      1480      1490      1500         
+PAPA_C DMTSEEF-----RQVMNGFQNRKPRKGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCG
+       ::... .     : . .....     ...   ::. ..:.:.::: ::..::  :: .::
+CG5367 DMSTDGYLKGFLRLLKSNIEDSADNMAEIVGSPLMANVPESLDWRSKGFITPPYNQLSCG
+         40        50        60        70        80        90      
+
+    1510      1520      1530      1540      1550      1560         
+PAPA_C SCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGL
+       ::.::: . .. ::     :.                                       
+CG5367 SCYAFSIAESIMGQ----KGK---------------------------------------
+        100       110                                              
+
+    1570      1580      1590      1600       1610      1620        
+PAPA_C DSEESYPYEATEESCKYNPKYSVANDTGFVDIP-KQEKALMKAVATVGPISVAIDAGHES
+                     :.. :  ::.: :... .: ..:.:.. ::. .::....:.:. ..
+CG5367 --------------CQFVPDLSVVNVTSWAILPVRDEQAIQAAVTHIGPVAISINASPKT
+                         120       130       140       150         
+
+     1630      1640      1650      1660      1670      1680        
+PAPA_C FLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMA
+       : .:..::: .: ::: ...:...:.:.:        . ::..:: ::..:: .::... 
+CG5367 FQLYSDGIYDDPLCSSASVNHAMVVIGFG--------KDYWILKNWWGQNWGENGYIRIR
+     160       170       180               190       200       210 
+
+     1690      1700      1710      1720      1730      1740        
+PAPA_C KDRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGT
+       :   : ::::. :.:  :                                          
+CG5367 KGV-NMCGIANYAAYAIV                                          
+              220                                                  
+
+>>CG3074|FBgn0034709|pp-CT10308|FBan0003074 GO:[cathepsi  (441 aa)
+ initn: 288 init1: 124 opt: 252  Z-score: 274.7  bits: 62.4 E(): 2.4e-09
+Smith-Waterman score: 411;  35.124% identity (39.352% ungapped) in 242 aa overlap (1151-1369:184-422)
+
+             1130      1140      1150       1160      1170         
+PAPA_C SDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPSSMD-WRKKGNVVSPVKNQGACGSC
+                                     :   :::..   : .. .: : .:: ::. 
+CG3074 GRKYSEGLKLRLGTKEPTYRVKAMTRLKNPTDGLPSSFNALDKWSSYISEVPDQGWCGAS
+           160       170       180       190       200       210   
+
+    1180      1190        1200      1210      1220      1230       
+PAPA_C WTFSTTGALESAVAIAS-GKM-MTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGI
+       :..:::..  .  :: : ::  . :. :....:..   ..::.::  . :..: :..::.
+CG3074 WVLSTTSVASDRFAIQSKGKENVQLSAQNILSCTRR--QQGCEGGHLDAAWRY-LHKKGV
+           220       230       240         250       260        270
+
+      1240      1250        1260                   1270      1280  
+PAPA_C MGEDSYPYIGKNGQCKF--NPEKAVAF-VKNVVNI------------TLNDEAAMVEAVA
+       . :. :::  .   ::.  : ..  :   .. ::.            .:: :: ..  . 
+CG3074 VDENCYPYTQHRDTCKIRHNSRSLRANGCQKPVNVDRDSLYTVGPAYSLNREADIMAEIF
+              280       290       300       310       320       330
+
+           1290      1300      1310      1320       1330           
+PAPA_C LYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQ-NGLLYW--IVKN
+         .::. ...:..::. :..:::  .. ..      :.:  ::.::. ::  ::  :. :
+CG3074 HSGPVQATMRVNRDFFAYSGGVYRETAANRKAPTGFHSVKLVGWGEEHNGEKYWVSIAAN
+              340       350       360       370       380       390
+
+    1340      1350      1360        1370      1380      1390       
+PAPA_C SWGSNWGNNGYFLIERGKNMCGLA--ACASYPIPQVMNPTLILAAFCLGIASATLTFDHS
+       :::: ::..::: : ::.: ::.   . ::.:                            
+CG3074 SWGSWWGEHGYFRILRGSNECGIEEYVLASWPYVYSYYNVKPMEISRLIYF         
+              400       410       420       430       440          
+
+      1400      1410      1420      1430      1440      1450       
+PAPA_C LEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMT
+
+>>CG10992|FBgn0030521|pp-CT30795|FBan0010992 GO:[catheps  (340 aa)
+ initn: 193 init1: 146 opt: 235  Z-score: 257.6  bits: 58.9 E(): 2.2e-08
+Smith-Waterman score: 370;  24.620% identity (29.889% ungapped) in 329 aa overlap (753-1038:25-338)
+
+            730       740       750       760          770         
+PAPA_C PVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNW---RKINAHNNGNHT
+                                     ..:. . ... :..:   :...:  . .: 
+CG1099       MNLLLLVATAASVAALTSGEPSLLSDEFIEVVRSKAKTWTVGRNFDASVTEGHI
+                     10        20        30        40        50    
+
+     780       790       800       810       820       830         
+PAPA_C FKMALNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPPSVDWRKKGN---FVSP
+        ..       :.     ::.  . :..  .  . :. ..   :   : ::.      .. 
+CG1099 RRL-------MGVHPDAHKF--ALPDKREVLGDLYVNSVDELPEEFDSRKQWPNCPTIGE
+                  60          70        80        90       100     
+
+        840       850       860         870       880       890    
+PAPA_C VKNQGACGSCWTFSTTGALESAIAIATGKMLSL--AEQQLVDCAQDFNNYGCQGGLPSQA
+       ...::.:::::.:... :. . . : .:  ...  . ..::.: .  . .::.::.:. :
+CG1099 IRDQGSCGSCWAFGAVEAMSDRVCIHSGGKVNFHFSADDLVSCCHTCG-FGCNGGFPGAA
+         110       120       130       140       150        160    
+
+          900       910                                920         
+PAPA_C FEYILYNKGIMGEDTY-------PYQ------------------GKDGYCKF--QPGKAI
+       . :    :::..   :       ::.                  :.   :.   : : ..
+CG1099 WSYWTR-KGIVSGGPYGSNQGCRPYEISPCEHHVNGTRPPCAHGGRTPKCSHVCQSGYTV
+          170        180       190       200       210       220   
+
+       930            940       950       960       970       980  
+PAPA_C GFVKD----VANITIY-DEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPD
+        ..::      . ..  . . . : .   .::  :: : .:...:. :.:.    :.   
+CG1099 DYAKDKHFGSKSYSVRRNVREIQEEIMTNGPVEGAFTVYEDLILYKDGVYQ----HEHGK
+           230       240       250       260       270             
+
+             990        1000      1010      1020      1030         
+PAPA_C KVN-HAVLAVGYG--EKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPL
+       ... ::.  .:.:   .. ::::.. :::. .:: .:.: : ::.. ::. .  :  .: 
+CG1099 ELGGHAIRILGWGVWGEEKIPYWLIGNSWNTDWGDHGFFRILRGQDHCGIESSISAGLPK
+     280       290       300       310       320       330         
+
+    1040      1050      1060      1070      1080      1090         
+PAPA_C VMWTALPLLCAGAWLLSAGATAELTVNAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANN
+                                                                   
+CG1099 L                                                           
+     340                                                           
+
+>>CG10460|FBgn0034443|pp-CT29364|FBan0010460 GO:[endopep  (79 aa)
+ initn: 115 init1: 115 opt: 131  Z-score: 152.2  bits: 37.3 E(): 0.016
+Smith-Waterman score: 141;  29.231% identity (29.231% ungapped) in 65 aa overlap (1399-1463:6-70)
+
+     1370      1380      1390      1400      1410      1420        
+PAPA_C PIPQVMNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKN
+                                     . .:...:.  .. :  .:.  :: .. ..
+CG1046                          MSLVSDEEWVEYKSKFDKNYEAEEDLMRRRIYAES
+                                        10        20        30     
+
+     1430      1440      1450      1460      1470      1480        
+PAPA_C MKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYEAP
+          :: ::.....:. .. :..: ..:.: ::: :                         
+CG1046 KARIEEHNRKFEKGEVTWKMGINHLADLTPEEFAQRCGKKVPPN                
+          40        50        60        70                         
+
+     1490      1500      1510      1520      1530      1540        
+PAPA_C RSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGN
+
+>>Ddx1|FBgn0015075|pp-CT25986|FBan0009054 GO:[helicase (  (727 aa)
+ initn:  64 init1:  64 opt: 107  Z-score: 114.0  bits: 33.4 E():  2.2
+Smith-Waterman score: 107;  25.581% identity (29.412% ungapped) in 215 aa overlap (2142-2341:72-273)
+
+            2120      2130      2140      2150      2160           
+PAPA_C FKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIAGNYTTTELSYEE------
+                                     .: :    :.:.:  :   .:. .      
+Ddx1|F VLMAAETGSGKTGAFCLPILQIVWETLRDLEEGKAGKGGAIGGAVTPWTMSFFDRGNALA
+              50        60        70        80        90       100 
+
+        2170       2180      2190      2200      2210      2220    
+PAPA_C VLNDG-DVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQ
+       :  ::   .  :. .:.   :.: :...:.    . : :.:: ::. ..  :  .. .. 
+Ddx1|F VTPDGLRCQSREFKEWHGCRATTGVRGKGK----FYFEATVTDEGLCRV--GWSTQQANL
+             110       120       130           140         150     
+
+         2230      2240      2250      2260      2270       2280   
+PAPA_C ELLDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKG-PYAAKTDGV
+       .:  : : ..: .:    :  .   .::  . ..    :     ...: .    ... ::
+Ddx1|F DLGTC-RMGFGFGGTGKKSNNRQFDDYGEAFGKA-DVIGCLLDLKNQEVSFTKNGQNLGV
+         160        170       180        190       200       210   
+
+          2290      2300      2310             2320      2330      
+PAPA_C RQVQPYNEGALLYSIANQPVSVVLEAA------GK-DFQLYRGGIFVGPCGNKVDHAVAA
+           : : .   .  :     :::. :      :: ::.   :. ::: :    .:. : 
+Ddx1|F AFRLPDNLAKETFYPA-----VVLKNAEMQFNFGKTDFKYAPGNGFVGACQAGPEHSKAN
+           220            230       240       250       260        
+
+       2340      2350      2360      2370      2380                
+PAPA_C VGYGPNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVKN           
+          ::                                                       
+Ddx1|F PITGPAAGAPSAKPAPNAPQAIIMEPSRELAEQTYNQIEKFKYHLSNPEVRSLLLIGGVR
+      270       280       290       300       310       320        
+
+>>CPTI|FBgn0027842|pp-CT32036|FBan0012891 GO:[carnitine   (780 aa)
+ initn:  63 init1:  63 opt: 105  Z-score: 111.4  bits: 33.0 E():    3
+Smith-Waterman score: 105;  27.551% identity (30.000% ungapped) in 98 aa overlap (904-998:486-578)
+
+           880       890       900       910          920       930
+PAPA_C LVDCAQDFNNYGCQGGLPSQAFEYILYNKGIMGEDTYPYQ---GKDGYCKFQPGKAIGFV
+                                     :.:.: : :.   .  :   :::     ..
+CPTI|F TVCVGTNGRVGFNAEHTWADAPVLGHLWEYIFGDDIYGYDETGNTKGTPAFQPPTPTRLT
+         460       470       480       490       500       510     
+
+              940       950       960       970       980       990
+PAPA_C KDVANITIYDEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVNHAVLA
+        :.       ::: .... : : :.. . : ::   :  :....  :. .::   . .: 
+CPTI|F WDLKPCLAQIEEATIDVTKLINEVNLRILVHQD---YGKGFMKK--CRISPDAYIQMALQ
+         520       530       540          550         560       570
+
+             1000      1010      1020      1030      1040      1050
+PAPA_C VGYGEKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLVMWTALPLLCA
+       ..: .  :                                                    
+CPTI|F LAYYRDAGRFSLTYEASMTRLFREGRTETVRPCTIESSAWVKAMQNPNTTNDERVKMMQA
+              580       590       600       610       620       630
+
+>>CG4393|FBgn0039075|pp-CT14338|FBan0004393 GO:[cytoskel  (968 aa)
+ initn:  40 init1:  40 opt: 105  Z-score: 110.3  bits: 33.1 E():  3.5
+Smith-Waterman score: 105;  23.973% identity (25.547% ungapped) in 146 aa overlap (412-555:373-511)
+
+             390       400       410       420       430       440 
+PAPA_C RAASTLESAVLGALGRTRHALRFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRK
+                                     ::: .:: . . ::    ..   ..: :. 
+CG4393 NHLLNHSHSMQQSGYSKQRLSAGNGGPYNGGKSLDSALQPEDRFYQDLNAHSPLHSQNEM
+            350       360       370       380       390       400  
+
+             450       460       470         480       490         
+PAPA_C GLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGN--HLMRDAAALPETKDWREDGIV
+       :  :  ...  :..:   :. . ..  . ::.   :.  :.   : : :  :  :..  :
+CG4393 GGGY--SVSPSSSLS--SFEPASVSPRSRCSTGGLGQPMHMSTFAPAGPPKKPPRRNLSV
+              410         420       430       440       450        
+
+     500       510       520       530       540       550         
+PAPA_C SPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNGGLPSQA
+       ::.  .:  :. ...:. ..   . ... :.: .. .:   .   . .. :  ::.    
+CG4393 SPT--HAGPGQQFSYSSPSSQSQSQSHGHGQN-QVRRQPASESPYSHQSHGSVGGMSFDE
+      460         470       480        490       500       510     
+
+     560       570       580       590       600       610         
+PAPA_C FEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPV
+                                                                   
+CG4393 TQLRERQRNDRLYASARQSTRSAIAGGMSLSSSNDMLDRQCSSSELNSETSVPNSSGDSP
+         520       530       540       550       560       570     
+
+>>CG4392|FBgn0036283|pp-CT14260|FBan0004392 GO:[mitochon  (370 aa)
+ initn:  55 init1:  55 opt: 100  Z-score: 110.0  bits: 31.7 E():  3.6
+Smith-Waterman score: 104;  20.426% identity (22.642% ungapped) in 235 aa overlap (2099-2317:15-242)
+
+     2070      2080      2090      2100      2110      2120        
+PAPA_C SIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNNS
+                                     :.::..     ....: .  . .   .  .
+CG4392                 MLRNSTYQYNLPVDYHNIQQMAVAGDMMEDFVVIFRDMMGRYLD
+                               10        20        30        40    
+
+     2130      2140            2150      2160       2170      2180 
+PAPA_C YWLGLNVFADMSNDE------FKEKYTGSIAGNYTTTELS-YEEVLNDGDVNIPEYVDWR
+           .::  :... :      ...  .:.:::  . :  .  ...    .:: :.:.   
+CG4392 IGEDMNVPDDFTQKEMQTGLWWRHLVAGGIAGAVSRTCTAPLDRIKVYLQVNQPRYTVQT
+           50        60        70        80        90       100    
+
+                2190      2200      2210      2220      2230       
+PAPA_C QKGAVTP----VKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCN
+       :. ...     . :.:.  : :  ...    ...::   .  ...  : .   :   : .
+CG4392 QRMGISECMHIMLNEGGSRSMWRGNGI----NVLKIAPETAFKFAAYEQMK--RLIRGDD
+          110       120       130           140       150          
+
+      2240          2250      2260      2270      2280      2290   
+PAPA_C GGYPWSALQL----VAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGA
+       :.   : ..     .:  ::     ::.: ..     :. : ::. .:.. ..   .::.
+CG4392 GSRQMSIVERFYAGAAAGGISQTIIYPMEVLKTRLALRRTGQYAGIADAAVKIYK-QEGV
+      160       170       180       190       200       210        
+
+          2300       2310      2320      2330      2340      2350  
+PAPA_C LLYSIANQP-VSVVLEAAGKDFQLYRGGIFVGPCGNKVDHAVAAVGYGPNYILIKNSWGT
+         .  .  : .  .:  :: :. .:.                                  
+CG4392 RSFYRGYVPNILGILPYAGIDLAVYETLKRRYIANHDNNEQPSFLVLLACGSTSSTLGQL
+       220       230       240       250       260       270       
+
+>>CG14034|FBgn0031691|pp-CT33593|FBan0014034 GO:[phospho  (223 aa)
+ initn:  60 init1:  60 opt:  93  Z-score: 105.2  bits: 30.1 E():  6.8
+Smith-Waterman score: 93;  18.719% identity (21.965% ungapped) in 203 aa overlap (1434-1625:28-211)
+
+          1410      1420      1430      1440      1450      1460   
+PAPA_C KWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQ
+                                     :...: .:: .  .. .: :  .  . .. 
+CG1403    MLVAGLDDMNCFSLQNEICPNANISFWLYTKENQEGTKLSVFELNRFEFYHHKPLKV
+                  10        20        30        40        50       
+
+          1470      1480         1490          1500      1510      
+PAPA_C VMNGFQNRKPRKGKVFQEPLFYEAPR---SVDW----REKGYVTPVKNQGQCGSCWAFSA
+       ...::....  . ..  .:::        :.:.     :  :.  :.:    . : :   
+CG1403 LIHGFNGHRDFSPNTQLRPLFLTQDYNLISLDYPKLAYEPCYTEAVHNAKYVARCTA---
+        60        70        80        90       100       110       
+
+       1520         1530      1540      1550       1560      1570  
+PAPA_C TGALEGQMFR---KTGRLISLSEQNLVDCSGPQGNEGCNGGLM-DYAFQYVQDNGGLDSE
+             :..:   ..: :... . .:.  .      :  : .. .. ....    .::  
+CG1403 ------QLLRVLLESG-LVKIEDLHLIGLGLGAHVAGFIGQFLPEHKLEHIT---ALDPA
+                120        130       140       150          160    
+
+           1580      1590      1600      1610      1620      1630  
+PAPA_C ESYPYEATEESCKYNPKYSVANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFY
+       . . : . . . : .:     .:. :::. . . ...  . .:: ..  .. :       
+CG1403 KPF-YMVKDPALKLDP-----TDAKFVDVVHTDVTMLGLLDAVGHVDFYLNMGVSQPNCG
+           170            180       190       200       210        
+
+           1640      1650      1660      1670      1680      1690  
+PAPA_C KEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRR
+                                                                   
+CG1403 PINKS                                                       
+      220                                                          
+
+>>CG6357|FBgn0033875|pp-CT19866|FBan0006357 GO:[endopept  (439 aa)
+ initn: 174 init1:  92 opt:  96  Z-score: 104.8  bits: 31.0 E():  7.2
+Smith-Waterman score: 136;  31.852% identity (37.069% ungapped) in 135 aa overlap (344-461:1-133)
+
+           320       330       340       350          360       370
+PAPA_C GADWGEQGYIYLRRGKNTCGVSNFVSTSIIMAHARVLL---LALAVLATAAVAVASSSSF
+                                     :. ::.::   : : ....:  . .:.:: 
+CG6357                               MTSARILLGVPLLLYLMGVALGVPVSTSSP
+                                             10        20        30
+
+                380             390       400         410       420
+PAPA_C ADS--NPIRPVT------DRAASTLESAVLGALGRTRHALRFA--RFAVRYGKSYESAAE
+       : .  ::   ::      : .. :.: .  ..:.. ..  .::  :: : .   :..  :
+CG6357 ATQKINPEIGVTTGKSDADSSTPTIEHT--SGLSEFEEECQFAWQRFLVDFDVHYDNDYE
+               40        50          60        70        80        
+
+              430       440           450       460       470      
+PAPA_C VRRRFRIFSESLEEVRSTNRK---GL-PYRLGINRFSDMSWEEFQATRLGAAQTCSATLA
+        ..:  :: :. ..::. : :   :.  .. :::..::...::..               
+CG6357 RQKRRDIFCENWQKVRDHNLKYDLGVVSFKKGINQWSDLTFEEWKEKQTPKVMPEIASES
+       90       100       110       120       130       140        
+
+        480       490       500       510       520       530      
+PAPA_C GNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSE
+                                                                   
+CG6357 SKEERDKVNCQAAWEKFLIDFGAQYKNANETEKRRNVFCANWRAIVEHNVQYEKWAEPFK
+      150       160       170       180       190       200        
+
+>>CG11288|FBgn0039895|pp-CT31491|FBan0011288 GO:[cell ad  (202 aa)
+ initn:  61 init1:  61 opt:  91  Z-score: 103.5  bits: 29.6 E():  8.4
+Smith-Waterman score: 91;  24.731% identity (25.843% ungapped) in 93 aa overlap (1843-1931:48-140)
+
+           1820      1830      1840      1850      1860            
+PAPA_C QEPLMLQIPKTVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKT-GKLISLSE--
+                                     .:  .... :  :  . :  .: :...:  
+CG1128 ENLCAHICENTFDAYQCKCHPGFMLDNNNVTCSPMKTQICPSGYNLDKLDNKCIDIDECR
+        20        30        40        50        60        70       
+
+    1870      1880      1890      1900      1910      1920         
+PAPA_C QNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCKYRAEYAVAN-D
+       ..: ::. .:  .. :::   .  .  .   :.  ...:     : .:: :    . . :
+CG1128 EDLHDCKSSQYCHNTNGGYHCLNVKEKECPPGFHYDHDYDACKDDYKCKDRKCVKIQSCD
+        80        90       100       110       120       130       
+
+     1930      1940      1950      1960      1970      1980        
+PAPA_C TGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVG
+        ::                                                         
+CG1128 KGFSLHNGTCSDIDECSHKSLNNCHVNSNQECVNTVGSYSCNCLPGFNLDATLNKCVGKY
+       140       150       160       170       180       190       
+
+>>CG13155|FBgn0033723|pp-CT32396|FBan0013155 GO:[] mol_w  (236 aa)
+ initn:  67 init1:  67 opt:  91  Z-score: 102.7  bits: 29.7 E():  9.3
+Smith-Waterman score: 91;  27.397% identity (28.986% ungapped) in 73 aa overlap (165-237:146-214)
+
+          140       150       160       170       180       190    
+PAPA_C NQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL
+                                     :.:..: : :.    :  .:  :.::. . 
+CG1315 NFNGTGWQIIQFQWVRDGDESHPDTKEYSYLTENKLWDSDQA---YAYQEP-DDGCHINC
+         120       130       140       150          160        170 
+
+          200       210       220       230       240       250    
+PAPA_C QPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVS
+       ::..  : ...  .:     :   .  .. :.:: .  .:: .                 
+CG1315 QPETAVYPLQSEPVQEPVVQPTQEQQQSRTNINSEQDPGKIHDTINEVLEYIQQRILPTL
+             180       190       200       210       220       230 
+
+          260       270       280       290       300       310    
+PAPA_C TGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWG
+                                                                   
+CG1315 PIKGE                                                       
+                                                                   
+
+>>CG4810|FBgn0037994|pp-CT15451|FBan0004810 GO:[translat  (551 aa)
+ initn:  60 init1:  60 opt:  95  Z-score: 102.4  bits: 30.9 E():  9.6
+Smith-Waterman score: 95;  25.000% identity (28.409% ungapped) in 100 aa overlap (2138-2227:368-465)
+
+      2110      2120      2130      2140         2150      2160    
+PAPA_C RFEIFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEF---KEKYTGSIAGNYTTTELSYE
+                                     :..::     . :..: : :    ...   
+CG4810 QEPRFMFEEPNPFEEPGVDLASIGYRYRQWDLGNDVVLIARCKHNGVIQGPNGDVQFLSI
+       340       350       360       370       380       390       
+
+         2170      2180             2190      2200      2210       
+PAPA_C EVLNDGDVNIPEYVDWRQK-----GAV--TPVKNQGSCGSCWAFSAVVTIEGIIKIRTGN
+       ..::. : .. . :.::::     :::  . ..:..   . :.  ::..  :  ... : 
+CG4810 KALNEWDSKVTNSVEWRQKLDTQRGAVLASELRNNACKLARWTVEAVLA--GSDQLKLGY
+       400       410       420       430       440         450     
+
+      2220      2230      2240      2250      2260      2270       
+PAPA_C LNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYA
+       ..... .. :                                                  
+CG4810 VSRMNPRDHLRHVILGTQQFKPQEFATQINLNMDNAWGVLRCLIDLVMRQPDGKYLIMKD
+         460       470       480       490       500       510     
+
+
+
+
+2385 residues in 1 query   sequences
+7177762 residues in 14334 library sequences
+ Scomplib [33t08]
+ start: Tue May 21 16:24:31 2002 done: Tue May 21 16:24:44 2002
+ Scan time: 11.240 Display time:  1.580
+
+Function used was FASTA [version 3.3t08 Jan. 17, 2001]

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/1.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/1.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/1.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1362 @@
+>AW057118
+gcaatgtcacgttgtcaactcattgcaaacaaagactggaccaacattga
+acggagtgattggaagacaatccggacaagttgctggatttgactactcg
+gctgccaacaagaataaaggagttgtatgggacagacaaacacttttcga
+ctatttggctgatccgaaaaagtacatccccggaactaagatggtcttcg
+ctggtttgaagaaagctgaccaaacgagctgatctcatcaaatttattga
+agtggaagctgccaagaaaccatcggcataagcctctactaaataagaa
+>AW057119
+tcatgttggcttctcggggtttttatggattaatacattttccaaacgat
+tctttgcgccttctgtggtgccgccttctccgaaggaactgacgaaaaat
+gacgtggatttgctgacaaatccaggcgaggaatatttggacggattgat
+gaaatggcacggcgacgagcgacccgtgttcaaaagagaggacatttatc
+gttggtcggatagttttccagaatatcggctaagaatgatttgtctgaaa
+gacacgacaagggtcattgcagtcggtcaatattgttactttgatgctct
+gaaagaaaggagagcagccattgttcttcttaggattgggatggacggat
+cctgaatatcgtaatcgggcagttatggagcttcaagcttcgatggcgct
+ggaggagagggatcggtatccgactgccaacgcggcatcgcatccaaata
+agttcatgaaacgattttggcacatattcaacggcctcaaagagcacgag
+gacaaaggtcacaaggctgccgctgtttcatacaagagcttctacgacct
+canagacatgatcattcctgaaaatctggatgtcagtggtattactgtaa
+atgatgcacgaaaggtgccacaaagagatataatcaactacgatcaaaca
+tttcatccatatcatcgagaaatggttataatttctcacatgtatgacaa
+tgatgggtttggaaaagtgcgtatgatgaggatggaaatgtacttggaat
+tgtctagcgatgtctttanaccaacaagactgcacattagtcaattatgc
+agatagcc
+>AW057120
+aatctgtacatcttcaattgtggttcacttcttctatcgtcttgttcgag
+aaaaccacggagaaaaggagcaagaccgtggattgaaagacaccaaagaa
+accgccaaggatgtgctgggttttgtaaaaatgcttggaataatcctagc
+tatggttgtaggctttgccttgttggggtttgtcacgttttatctctatc
+agtatgcgag
+>AW057121
+atgggcgctggtggctatggattcggatatatgggatccaacgcctcatc
+gtcgggatttgcccgcgaagattatgcccaaggaggaaatggtggaggac
+agcagcaaaaccagggatctggaggaaacaccaacccgggaggacaagtc
+ttcaaggcccgtacggatcaatcttgttacctcggaccataagaggcaag
+aactcagccaa
+>AW057122
+gacaacttccatctctatcatagcattttgatgattagaacatgtcactc
+acaaaagatggttccgtagctgtgaccattctccaagcaatcatcttcat
+tcaattcggcttgtgcgttgcgatcacaattctgacaaccgtcggaatct
+catttgggtatccggtggcttctcattaccttatggctcttcttcaggca
+ttggttgcaattccgggaattgtgtacattgtgacacagacaaacatctg
+ggtggcagtctacatcagcttccaagtggtaaccgccgcgtgtgaagtct
+actggctcgtctacttgatcttcgacaatcaacccgctggatcttggatc
+gcacttgcaattatcacagccgtcaacattttggcagctgttgtcggtgt
+gtggttccgcaaaactgctttgaaccttccatgccttgataagaaaacga
+aaaaggctggtgatgcgaagaaggagaagcccanaatgaaggccccatca
+acttcaatgagtgatattgagaaaagtaaatccagagctg
+>AW057123
+catcggatgaaacggatgatgtacttgcaaatgaagacgaaatgttcaaa
+acaaaaaaggacaaatacaaagtgatcgcattactcggtaaaggcggata
+tggagccgtatactctgtactccgcctcagcgatatggagaagtttgcga
+taaagtgtgagaaggcgactgctggaaagaaggttcttctgatggattgc
+aatgtgatgaaagttgcaactcagatcaaaagtagacacttttgtactgt
+actggatcgagctaacgtcaaggatcgtttaatttcattgtgatgaagct
+tatcgggaagaatctttgggacctgagacaggatcgtggcgatggaaaat
+tcacaatgggcacctcgttgaaagcagcgtcacagtgtcttgtatccatt
+gaacacttgcacagtttcggctaccttcaccgtgacatcaagccggtgaa
+ctttgccgacggacgaaaggaatccaacgagcatcacgtcatcttcatgc
+ttgactttggtctctgcagggagtacgtgaagcgagctgagggaaagatc
+ttcgagcagcccgtacaactgcaccattccgtggtactactcgatatgct
+ccattgacttcgatgctccagcaagatcaatcacgacaagatgacattga
+atcctggctctacatggntgtagaatggacttactgagattgacgcggcg
+catgtngaagctcacgatcgagagaggctctgcattacaacagtactacg
+tcaacaccg
+>AW057124
+ttggatcaacagaaagtttaactcctcttctgacaactcaagagccataa
+agtccttatcttttccaacaaagaacaaaagtccatgaggcagctctgaa
+acgccacgtggctccgactgaaggattggctgttgctcagaatgacatac
+acaatttcctgttcgaatcacacggacaaatcttcgatatggcgatgatg
+tacgccgaatcggccggtttcaaacgagatcacctctcgtacgccgcgtt
+cggtttgatcgcattcttcttggttttcggatctgtggcacgtcttttgt
+gcaat
+>AW057125
+aacaactcgactaaccgtctccactcttcacttgcacaaatcttcatgca
+accaatcaacgtcatgctcgctgttcttctcgccttggcttcatttgctc
+aaggaggcagatctgttgctccggctggtgcagtcactgaaccaacagtt
+actcaagctgttccagaaggatcaggacttagttcagatgtcactgatcg
+tccaaacatcgactccactgatgttgtatcaaatgcaacttcggtggaag
+atttgcttggaagttcaacaaatgcaaacaacactggtacattcaactct
+taggacctttgtaattgctccaatgatgattcttgctttggt
+>AW057126
+gtgctcgacatcattgctcttctggcaattgtcgtttttcttgtcttttt
+gccggctgatagatgtctgaattttgaagcaagtgaacaaactccacgca
+acgtcgacaaaaagtgtcatcttcatccagtgcatcatccgaacttcagt
+ttcaccgaaaacttgaaaactaacgatcatttgattgatccaagtcatca
+agtcaccatttgtcaccgtttgaaatggtctcactattcattctgggata
+gcatgtatttgaatcttttgccggctctcacttgtgtttttctgattttc
+tacacagttcatgagatttgtgaatttgacgtgtttgttggacaggtaca
+aactactggcttggttattgtatcagcaatatcggtaatttactctatcg
+cagtaatcactcacgaaaagaatcgaattgagacggaatggccattgctc
+gctcatgcttatgtcctggagcaaaagaaggagaaacaaccgttaatgta
+ccaaaaacgtggcaatatagtattgctatgtgcattctgtcagcccttct
+caagatcggcagggttntggttcaacatttcattggtgataagacgttct
+tcttgatgacgatagccaaagggacgatcaaaatggtgaacctgtgctgt
+tggctggaagtgaagaagtgctgtaccactaccattctgaatgttccaga
+catccagaacttgaactgat
+>AW057127
+aattccttttaatcgattcaagcacatcgtacatacgcgacaaaacgacg
+gaaatatgatgagcccgttggactatctgcatgtcactttgctgctcatt
+ggcccacttgccattctcatcggatgtggaggaaagaaaaaagcaccacc
+accaaagtcggcatccaagatgactgctccacctgctcctgcatcagcgc
+ctcccgctgctccagacgcagcccctgctgctcctgatgctgcagctgca
+ccagctgatgg
+>AW057128
+gacgacgcttaccagtttttggctccgataactcgttcctccaccaccag
+tccaacaaccgccagcagttgctccaggagcatcacgtcccgcacctgct
+ccgtcgaatgagcttccacccgcgtcggaggcaaagaaattccaagtgca
+accggctaaaaaagcatcaaaatcgaagagcaaatcgaaagactcatcgc
+caagcaacggaaaggagaagaagaagcgcacaaagcgttcgggtggctcc
+aaatcgaaagaatcttccgagccacctccatcattgtnccgcctttaaat
+gccaaaaggcattcagtgcatgtcttgtccttatttcagatgctatcact
+ttgatgg
+>AW057129
+gagatcgttggaaggttctagctatgaacgtactagtgtcttcagaggat
+ggtgttaaagaattcgaaaagattgttgtggaacctgaagatatcgaata
+tgttgagattccggccgatgccaaaaacgttgacttgacgcgtcaccgta
+tcaaagaaatcggtgattattcgtggctcactcacgtcgaacacttctcg
+tttcgttggaatctgatcaaaaagattgaaaatctggattgtttgacaac
+gttgactcatctcgagttttacgataatcaaattaccaaaggtgaaaact
+ttggttagcctcgtcaatttggagtcactcgacctgtcattcaatcgtat
+caccaaaattgaaaatttggagaagttgacaaaactgaagactctctttt
+ttgttcataacaaaatcactaaaatcgagggtttggatacgttgactgag
+ctggaatatctcgaattgggtgacaatagaattgcgaaaatcgagaatct
+cgacaacaatctganactcgatagattgttccttggcgctaatcagattc
+gtcttattgaaaatgttgatcatctgaagaagctcacagttctcagtctt
+ccagccaatgcgattactgtagttgataacattgcgggacttcacaactt
+ngaagagaattatctggctcaaaatggcatcaagtacgtctgtggaatcg
+atgagcatcttccttcttgaaatctggatttcaatcagaatcgtcttgag
+aggtcgagatatncattcaatgagacactacagacttttgggcagagaaa
+tagttgatacctg
+>AW057130
+caaaagggctcggatagaaaaatgcccaacacttatcgaacttcggatga
+aattgtggctcagcgattggaggatggcagagatatgaatctgagtcttc
+aaaatatgaatctattcgcatctggagccttttcaaatgtctatcgtgga
+attgcacgcacagaatccaaccaccaaatggaaattgtcatcaaaaagac
+atggccacgtcataaaggatgtccattggaagtgaagattctcggaaaac
+ttggaaagttgaagcaccaagaacattgtccgccttctctttagttacca
+gaaacaacatgaaggtcgtatctgccttggtctaatcttcgagtacatcc
+caatgaatctccatcagtttctgaaggataacaatcgacgtgttgacatt
+atcgaggttaaactgattgtttggcagttgttccgtggacaagcacattt
+ggagaagtctgaaatttgtcatcgtgacatcaaaccacagaatttactgt
+acaatgctgaccactgtcttctgaagatttctgattatggatcatctgcg
+attgaatcagtgaagacaccacaacaaagctaccatgtcacaagatatta
+tagacccgcccgagttgcttctacgctccaaanactatggatgccagatt
+gtcactttgtcgtgtcgatgtgtctttggtgaatgctttaaggtggaatc
+tactggcaggcaatacagccagaatcagcagaagtgatttgtatggtcga
+gctccactgc
+>AW057131
+tatctatattctcgactttggatttgctcatcagtacatgattaaggatg
+gaacactgaaacctccgtcagctcatccatggaaatacgtgggaagtctt
+cgtcatatgccacgtgccgcatattcgaaagtggaattctcaagaatgga
+agacttggaaatgtggttctatatgagtgttgagctggttaagggatgtc
+ttccgtgggctcatttgaagaaaccaaaagaagtgcatgactatcaaaag
+ttgtgccgaaatggccttcaaatgcgtgaaatgctccgaggtctttcacc
+agaattcgtcgacattatgcanataggtgacaaactttcattcaccgaca
+ctccgaactacacagaaatctacggacttctcaccaacgcgattctcttc
+agtggcaaaaatgaattcccgtacgattgggaggaggctgagatcaacga
+gttcaagaatccgcagaagccaagtgtggagcaggcaacctaatagctct
+ctttctatggaaataaattga
+>AW057132
+ttcgataacttgctttattgtcgacaagttttatcgtaacaatgacagca
+ccgccaccgccactcgtggaacttcctccgggatctatggttgaaagatg
+gtcgattacaaagaagctgggagaaggaggctgtggagccgtctatttgt
+gcacggatgcaactggaaagtatgcactgaaagtcgagggaatctctgag
+gcaatgcaggtgctcaaaatggaagtgttggtgctcggagagctgacaaa
+gagaggaagtcgtcacttttgcaagattgaagacaaaggaagatatggcc
+aagttcaactatgttgtaatgacgcttgtcggaaagtctttgcaggatct
+tcgcaagggaaccgctcaacaatgtttgagtctggcctgttctctcagtg
+tcggaatccaatccttggaagctcttgaggatctccacaacattggatac
+ctgcaccgtgacgtgaagcccggaaactatacgattggccgtgccgagtt
+gaatgaacttcgtaaagtttatatcctcgacttcggaatggctcgcaagt
+tcacggacaacaatggagtgatccgcanaccaagagctgccgccggattc
+cgtggaactgttcgctatgcgccaatcgcttgccataagaatcangagct
+ttgaaganaagatgacgtgagttttgctctacatgcagttgagctgactg
+tttgacgtgtcccatggaagagatcacgacatgaacgcagttgacaagcc
+aagcnagcgatccgcatactccagaaagatgtt
+>AW057133
+tcgctgtctttaaaggagaaatatcgattctataaaaatggaattcggag
+cgtcgtttgcttatagaagaattgacgcccacgttgaagcaactcttcaa
+gaagctgctcaagttctcgaaaagttggtaattgaaagagatgccgagtc
+ggccataacggtcgcatcccaaggaactccagatgcaataagtatcactc
+aagctgctcatgaaaccatttcaattgattcaattcttcgatcttcgaag
+caaccaaaggctaccattcgtactccacgtgactctaacaagaatcggaa
+attaactttcccaccagcccgaattgggttcaaatccgagcaattggaaa
+cttgtgattctggaataactgatagcactattgaccaagatccaccgacc
+ccggactccttgttcccaagtgccatctacattccggcaaagcagaagcc
+tcaaatgaccgtttcaataagtgcaacaactgcttcttcttgttccaaca
+aatcgctctaccgtaagcacattgaaaaactagcaattgagccattggag
+gacatcaaacacttgaagtgccgtggactgangaagtcanagccagatga
+ccttcttctaaagccacttacgatgcgggaactggnaaaccctaaatggg
+gttgtctccgagtcaatggtgtcggtcacctttatatgcatctacactgc
+taattcaatagcttgctatcacccaatg
+>AW057134
+cttaccctcggaaaggcagaaagtcatatggacttactggaaatgtgaag
+aacgtggaaaagggtgtcaatgtgtggattgagttgttggaaaaaatgga
+ttttcgaaagacgatggaaccggaattcaaaccgattgacacaatgaaag
+cagatcttgagaagtgtcaagctttcaagaagaatattgatttttctcaa
+tcggagaatgttgaattgtacgatacaaatcgggtcaaaggaggtggaga
+agcggatttcttttatcacggaagcactctgactattccatcggtaacca
+acaaatcttacaatccttgtccaacttgcaattgtcgactcaccgcaatc
+tcttgaatcattctggataatgggtgcctcacaaaagatccaaagactct
+tcattcttcttggcgaagaagaactcgataaacctactctgagcgagtac
+tttccagaagacttcaaagagttcaagacgattcgtgtgaacaatcggaa
+gactgtgccgaagactgaagagcaagcaaacactcaattgtactatgaag
+ttgtgccaaaggattgtgcagaagctccatttgcaatgattgagatctgc
+gattcttggcctgatgcgaagattccaacgatgggttataacagaaattg
+ctgcgactgctgcagcgtgtctgaatctgatattgattgtgatgcttctt
+gtgctattgtgagcaactacngagccgctagagcacgatcatctacttgt
+gcagttctggctactgatgaagtgcacgcacgagaagccccaatattaaa
+gaaaatcgtctttagtacgttcgcacgcctgctgtattcgacattgctca
+tacatgtt
+>AW057135
+tctcatcatgatcacggaagctgagatttgcttcaatcggatatggcgtt
+gacaattctggttccatggaactttcagctcaagcccccaattctcgatt
+ggggaattaatttacctggtatttattgattaactacanaatatcccgat
+tattttccagtagttgtactttgcttggggttttaactttattttattgt
+taaaaagggaaaagctggagcaaaatgcccattcactg
+>AW057136
+atcgaacatcaaagcagtgaccataagtggaaagtgttgcgaaatatcta
+ctcgggaccgttttcagatgtttatgtcgttgcggatacagtaacgaacg
+aaaagtatgcgatgaaatgtgagagacaagaaggaaactctcgtccagtc
+ctgaagctagacgtgatggttctgatggcgacgaagggtcttcgaggatt
+tccgaactttgtggcggcaggcagaactgatgtttataggtactgcataa
+tgcaacctcgtccggaccggatcttggtcggctgcgtcgaacacgtncgg
+aacgcaagttctcccttccaacagctctccaaattctcggacaaactctc
+cgacgcctcgaggatcttcacaattgcggatggctttgtcgagaagtgaa
+ggcgccgaatttctgcattggtgtcggcgagaacgagtcgaccgtctata
+ttctggattttggattcgccagganatttgtagacaaggagggcaaaatc
+at
+>AW057137
+gaaacttgtgaagaaggtctgcaatggctgcctcaaagaaattgatgcca
+aagcgaatgaagaagaagatgcaggtggagagaatggctcatgccagagc
+ttgcaaggttgcaaagagagaggctcgcgtcgctgaggaagcatctggaa
+aatcaactggtggatctactcgcggagccaagtgatagccgagccacaac
+acatga
+>AW057138
+tcgaatcaatcgccttttgaatcggcaaagtagcaaagaagaaaaggaac
+agaagaatgggttccgagaagatgatcgagatttgtatcgacatggagga
+aggagaaccacttggtgcaactccaaatgacaagctcgttatcactaaga
+ttcaggctggaaccatttctgaaggaaaattgagaattggtgaccaagtt
+aaaaaagtgaacggacaaaattgcaaggattgtaacgactttttccgtgc
+gcttcgctttgctgctccatgtgcaaaaatcacggtgaaccgtgaccgaa
+aaaaaggctgaagagttagaagctcgtgttcatattcctgaggatcgtgc
+anagatcattcaacgccgtgagggatacgtttatgagttggcaacccttg
+tctgggttcaaaatggaccaaaacttggtttgggaatcaagcatttccag
+aaccgtgtgcttgtttcacgtgttgatcctggatcactagccgagaagtg
+tcttgttcttggagatcatttgtgtgatgttgatggaattccagtcagtg
+acaaggatgttgccagagatcttctcgtcaagaatatncaagagaaagga
+naagtcacttttgtcgtcgagcgccctgattcgatcgatgccaagcaatg
+ggcgatacaggcgttgctaccaatctcatgctaccaccatcagtccatat
+gaaccgagatgtgaaagcattgcttcgcatatcgtcaagctcttttcatg
+actgagcctncagcgaagagtgcattgtctacctgtccanaatgcttgcc
+gtgttctatcatcgagcaaactcaaaacttacgagatccggcatgatcat
+gacggaaagcttttcgcaaggtcagtga
+>AW057140
+caattgagaacgctgtgcagacatgtcgatgaccaagaatcctgtgaaat
+caaaaccagccgatggaacaacgtccgctaaggattttgagaaccttcag
+agtgacttcttctccttcttgtacgctgatcatggaccattctacaaaga
+gaatgtgaaaaagttggaagacgcaactggtctgaaacgtgaaatgctcg
+catatgggctcatcgggctcaattgtgtctacatgattattggaagtggc
+gcccgagttgggggcaatttgattggagttgcctatccggcttatgtttc
+tgttaaggcgatccgaactgaaggaactgacgatgatacaatgtggttga
+tctattggactgttttcggtgccttttcaattatcgacttcttcgccgca
+atcattatgtcatatttcccaatttattgngttgccaaagcagcatttct
+cttgtacctctacttgccagaaactcacggatcccacgtcatttaccacc
+aactgattgatccatttgttgctcatatggagaagagcatgtccagaaag
+cttncagcanacgctggaactgttccaaaatgatcaggatcagccttgga
+tgcgaantaaca
+>AW057142
+ttttcatgttcgccaaagcacagagaaaagaatagatggcagacaaatca
+gcatttgtgccagtcgatgcaatcggaaatcacaaaaacaccgatcttga
+cgttgatattgatgacgaattgttcggaaagaaaccaccaaaagccagtg
+caagtgcaaccaaagcagccgctccaccggctccggctccagccccagcc
+ccaccaaaacctgctgcagcacctgccccagccgctggaaagtaccaata
+caagaagtcgtcgacctatcagaagacctat
+>AW057143
+gactggtgctcctccaacagcgaaataagcagactgggctccgccacctc
+cacctccacctccggctcccatgtagactgatgttcctggtggtggtgca
+gcaccacctccaaccttcgaggctccaccgcctcctcctgctggtcctcc
+tccaactccaaagtaagccgattgtgcaccagtattgctcc
+>AW057144
+cgaaatgggagtcgagttatcgttggatccttctgtctgcccaatccaag
+ccaatggtggtgtttctacgcacaagatcattaatcactgcgacaaaatg
+ttggcctataagataaaatcttccaataactccaactacagtgtcaacac
+tatctttggacttattcagattgggtataccgctgatctgatcatcacta
+gaaagccaggaaagccgcaggccgacaagcttgttatccaattcgctgcc
+gttgagcagacttgccgtgatggctgtatttgccaaggctgtatttgcca
+attggactgccaactggagagtgctgtggagagacgatcatcaagctgtc
+agctgctgaatagaagaattatcacaataagatttgtgattatgaaaac
+>AW057145
+gcgaaaacaaattcaccgcccgttccgttgtcctgtcttggtgaccggcg
+tcgtcgtcggcggagccgctcttctggcgattgccgcctactactactgg
+agccagaaaaagaaaagctctgatacttcatctgccacgtcatcggagtc
+caacgatgttgtcatgatgtcatcatcggagcccagagccgatggaggag
+ccgattcgaaggcaaagttcaatattgaggatgaaaatgtgagaagagtc
+tgcgagaagctgttcatggagcagatggatttgggggaagcttattttgg
+aggatgaagaaaccgaggagctcggcgcaatccacatggccaacgcaatc
+gtgctcaccggagagactgctcagctgctcaaagtgctccgcggctcgat
+ttcaccggctcactttgccaatattcaaaagtacctcccatcggctgact
+tgcgtgttcaccagcttctccaagacgagctcgccattgagactattgcc
+cagcatttcgactaagctcaacttctttntttttttt
+>AW057146
+aatgtgtacagcggtggagtcactggagaaactcctgcctacttcaatga
+cttgcgaaaatgtggatgctggacagctccatctgtcaagcagttcgcgc
+aagatgagacagtctgcggtatcactgacaacagagatgttcatctagct
+ggaaatgtgctcaaggctgctgaagaagacgggaagatttatgctggacg
+attggtgcaaggaagcctgcaaatt
+>AW057147
+aattccggcttggacacgctacggtactagttgatttggaaggtgtaaag
+tttgtgacggacccagtttgggctgatcgagcgtcgtttacgagttttgc
+tggaccgaagagatacaggccaccaccgatgaagttggaggatctgccgg
+atttggattttgcagtggtgtcgcatgatcattacgatcatttggatgct
+gacgcggtgaagaagatcacagatcgcaacccccaaatcaagtggttcgt
+tccgctgggaatgaagaaatggatggaaggccagggcatcggagtcgacg
+ggatcttcaccgctgtcaccgagctcaactggggagagagctcagaattt
+gtgaagaacgganagacctacaccatctggtgtctgcctgctcaacactg
+nggacagcgtggacttttcgaccggaaccacagattatggtcaggctgng
+cggtgatcggcgagaatcggcgattctattattccggagatactggtcac
+tgtgacggagagtttaagaagtttggcgagaagcttggaccttttgatct
+ggcagctattccaattggagcatacgagcccagatggttcatgatatccc
+agcatatcaatccggaagaggcgattgaggttcataaactcattcgggct
+aagaacagtattggaatacactgggtaacgtaccatatgggctctactga
+gtactacctggaaccacgtgacaagctcaaagagcttattgatgctccgg
+gagatcttangaacacgagttttgcacaattggaaatgggtcgatttggg
+aggcgtngatca
+>AW057148
+agcagcaaagaagaaaaggaacagaagaatgggttccgagaagatgatcg
+agatttgtatcgacatggaggaaggagaaccacttggtgcaactccaaat
+gacaagctcgttatcactaagattcaggctggaaccatttctgaaggaaa
+attgagaattggtgaccaagttaaaaaagtgaacggacaaaattgcaagg
+attgtaacgactttttccgtgcgcttcgctttgctgctccatgtgcaaaa
+atcacggtgaaccgggnccaaaaaaaaggctgaagagttagaagctcgtg
+ttcatattcctgaggatcgtgcaaagatcattcaacgccgtgagggatac
+gtttatgagttggcaacccttgtctgggttcaaaatggaccaaaacttgg
+tttgggaatcaagcatttccagaaccgtgtgcttgtttcacgtgttgatc
+ctggatcactagccgagaagtgtcttgttcttggagatcatttgtgtgat
+gttgatggaattccagtcagtgacaaggatgttgccagagatcttctcgt
+caagaatatccaagagaaaaggaaagtcacttttgtcgtcgagcgccctg
+attcgatcgatgccaagcaatgggcgaaacagcgtggctaccaatctcat
+gcaccacctcagtccanatgacggagatggaaggcattgctcgcaatatc
+g
+>AW057149
+ttatgagaaagtattcacatccacatattgttcggattattggtaagata
+atcgtcaaacatcttccaaaagttggtttggcagtagatgctcatccact
+aatgattgtaatggagatgtgcccacatggatcacttctttcatttcttc
+gtaagaacaaagggaaaacgacactttccgaacgtcttcgtttttgtatt
+gaatcagccgatggtcttgcatatcttgagaaaaaacaatgtcttcatcg
+tgatattgcagccagaaattgtttactttcgatcaccgatcaaattaaaa
+tttcggaattttggtctttcggatgacaaacgaactgaaatgcatgatga
+cacactcgataaggtaccagtgaaatggttggctccggaagttatgcagg
+ataaattgtattcattgaaaagtgatgtttgggcatttggagtgctcatg
+tgggagatatatgcagatggagctgatccatatccgggaatgacaaactt
+agttacgagagccaagatcttctgcgatgattacagaatgactnttcctg
+agactaccgcaccaaccatttctgaaatcgctttgaaacaaatgctggcg
+aaacttcccatcnatcgtgccacgatgaaaactgtgcatcataagctaaa
+gacctctcaantgccatggtcgatgtaggctcagtatgaaaattgaacgt
+>AW057150
+tttcactcaaaacgttcaacaagaggcaaattcgacgaaaaatggacaca
+atggagcagcatcaggagcttgagcaagaagccatcggaccggctcttcc
+accgccgtcagctgctcaaaaatcggagctcttcgaggagcacaacgtcg
+agtacgagctgatcaacgggatcccgtgctaccagccggatcatgtggtc
+gacgggcaagttcagatcttcgaacgaatcggctacgacgacaaggtcgg
+cggaacgttcctcgggctcagcgccgacggaaaagagctcgtcgtccggn
+gtgcaccgatcgacgcgctgactcatgtggttcgtgccgaggcggcattc
+ctgtgcaaggtggaagccgagcttcaggactggcggctcttctcgcaagt
+tcacaagatctttctgaccgacgacgcgtggcacatgtcgctgtacttcc
+gtggtggaccgacgttggagcagtgctttgcgatgcggaacaagttcac
+>AW057151
+tatccgctggtttgttgtactcattttcctcatcatttccaataaaaaga
+aacggtcgatatcatgtttggtaatgaaaagagcgaagaatccggaagtt
+ctgggtttggtttagctgaagttaaaaaggttttccagtggattctcgga
+tgcacttatgcgaagaaaacattcaaaactagaatttctgaaatgtttca
+ttttgctgatgctccgcacattgtcgtgtatgagcgaagtgaggagcgac
+catggtattcgatggttggaataataattggagttcttcattgtagttat
+gagtctctattacacacttgaagctggataccgttcaattcgaaaactca
+tgctttcaatgctccaagatccgcaacagagtactgtatccactccaaga
+tgtctatcaccaacaaacatgggatccaagaaatcttcgcctcaaactcc
+aaaaactccggatgtgattcgtcaaaaagttccgatgaatgagccagtca
+attgtgtgttcattcgaccagttattccaaanactattccagagggtact
+gtagccgtgccgaatttgaattcggaagaagatattctattggatagaga
+tcatcgtattggagagacagggacaaagatatatggaagttagaagagac
+acatagtgatagaagtggatagaaaaaataaacagaaactaaaagagtc
+>AW057152
+gcaagaagaagtctgatacggcatccgttgttgctattccagaaggagac
+aatgagaaaggaaagaagatcttca
+>AW057153
+aagaacagtcttgaccaacacgagatgtaaactcgattgacgctcctctt
+cgacctcttgttgtctggaagcactctgctcgttcaatgtggtggaaaga
+aaaagggagcaacttctgccgaaggaaaatcttcgacgatgggcccggct
+cctggaggagctcctgctgctgcttccgctcaaggagaacctgaagagaa
+ggagtaa
+>AW057154
+atctacggaatgatgcgtcaagcttatggtgcatgtggatccaacgagaa
+tgctccgtacgattgggaaaatggaggtccagctgcctacttgctccact
+agaagaaatagacttga
+>AW057155
+gacaagtgttctccgatgctccttcgtgccanatttctatattttcatca
+gcgcaccatgggtgttgtccgttactgttaagcggaatatgaccgagtat
+gagcagaaaattcatatcaacctactcaatgggatccgtcagaagaatgc
+gattgatgagcaagtggctaatatgcatgagctggtctacgatcccgctc
+tggaatccttatcatatccagaatgcgaaatctccaacgatgatattacc
+gtaaggaacaatgatggcgtatccacatattataatgcattcccaccaac
+cggacagatatttgggtgtactatgcgcaacacattgattccctgcatac
+gtctgccgcatgccatcatctcacctgcaaatccggcagttccaataata
+acagtgtcagcggatgtatttttggacctgttcgtaaattcagcagctcg
+gaagtggtgaaaggagagcctggttcacagtgcccaaaaggaagatcttc
+actggattt
+>AW057156
+gtcgactgcaaagaagactcctgccaaaatacttagccgcagcagaagcc
+agcagaaggtgaaacgctcgatgagccgcaaaggttgaacggattg
+>AW057157
+aaattccaaaccactgaaaccagttagtancaatgactgcacaaatcatg
+ttgccaatgagtgcaattttcgtgctacttggatccatagcaaattgcgg
+tggcaagaagaaaggagcagctggagccggagccggagttgcagccaaca
+gcccaaaaaaggcagatggaccgtcgaagaaggaaacgaagaaagaagga
+gatgacggaaactacgaggaactcgccgtcccacaatgatccccctgcat
+ccctgtc
+>AW057158
+gctgggcggctgagctggatccacgtggctagcagaacatcactgactcg
+aatgttatcaatcgcgaggaatacgaggacgacgagtacggatcgccagt
+ataattttcataactcgttctacttctcgatagctcatcatagccaattt
+cgtgctataatcgtcttttttggcgcgggtttttcactcgtcttttcttt
+cttttttacttttactctgttctatactaatcgcggatatatat
+>AW057159
+aacggtactcgatgagactgctctggatactaaagatatgttgatgaccg
+cattggcactcatgtcactcattggactctgctcaactcgtcgtgcattt
+ggtgtcttcactcttctcttcatgatgcatgcatttgtattctcagcttt
+ccatcttgcacatacanttgcactcttcattaaatcattcgattctccat
+gtcaatatctgaaaactccctcaactggaacacttaattcggatatctgt
+catgctgttaatggagttactctggtgtgtgcagtgatttcaatgattgc
+tactgctcttgccagtatggccgtcttcattcgtctcactacagtcgtcg
+tcaaaatttcgga
+>AW057160
+tgaacaattgtcaaagagttcggatcccaacatctcctcgatgtacgttt
+tccatcaaggaattcaagtaaagcaggaaccaatcgatgatgaccaagag
+gaagagcaacaagtacaaaagcagcttgtattcaaaatcgagggctccga
+agacgaagaagctgtgaagaaggagt
+>AW057161
+caactttatttatacacacaatacataattttcagagaagttttcataat
+cacaaatcttattgtgataattcttctattcagcagctgacagcttgatg
+atcgtctctccacagcactctccagttggcagtccattggcaaatacagc
+cttggcaaatacagccatcacggcaagtctgctcaacggcagcgaattgg
+ataacaagctaggcggcctgcggctttcctggctttctagtgatgatcag
+atcagcggtattcccatcttaataaggccacagaagcggtgacactgcag
+ttggagttattggacgattttatcttataggccaacattttgtcgcagtg
+attaatgatcttgtgcgtagaaacaccacc
+>AW057162
+gaagagtgcagtagtccgattgaaaaggatcaacgtcgatccgccaactg
+gaaactatccggcaactggaggcaattcgacgcacaacatcacttccgaa
+tcggattcccgtcttgcattcaaggtgaagtcgtcgaacaacgagcacta
+ccgcgtccgtccagtttatgggttcgtggatgcaaagggaaagagcaaat
+tggacatcaatcgtcttccagggccaccaaaagaagacaaaattgtgatt
+caatacgcggaagtgccagccgaagagactgatccaatggcaccggtcaa
+ggcttgagctcaacaaggagaaatcattgtgaagctcatcgctgcttgaa
+tggaatgcaataactgaag
+>AW057163
+atcacgaggctcaaaaagcgtttcgctcaattgaacacggtttcattatg
+aagttcgatgtctcttcgttgtgccaatttgttgttcaactcctccaatt
+ctgcaatgtttttattgttcttgcagtgatgaccatcacctcatgcggaa
+agaaaaaagcatcaaatagcaaggaaaattgcaaaaagagtttgcaaacc
+ggccctggagcagccaccgaagccggagctgcttcttcgttagctccggt
+tgacgcgaccaagcttgccacacctgtaccagcggcgccaaaaaaggaag
+aagctccacctccagaagagcccaagaaagaggagaagccaaaggagaaa
+tcgaagaagtcggcgaaatcgaagaaatccagcaagtccaagaaagacaa
+gaaggatggagaggaagagaatggatatgaaaactgccaggatatgactc
+cggatcagttgaagaagattgc
+>AW057164
+tcatcagctcattcctcattcggtacatcaaatgactcgttttcttcagt
+ccaaagtgtgattctgccaccagttggaccatttggtcagaaaaagagaa
+cgtttgagtacattggtgtcacacttgttgtcgataagttgaaattggct
+gaatggatgaacgggatcgggaaattgtttggatccgcagagctccgtga
+taatgttaccaatcttcatatgcaattggtcccggtgattgatacgttta
+aaaa
+>AW057165
+tgagaaatatgagctagctatgagaaatacgcactgaagatgaaggagga
+atgttatctggtgttataatggatacttctgatcattatgagcgtgatta
+tacaatggatcatgatgttggaccttcttcaatgaaaatgtctcctatac
+caccacctccgatcaaagaagaatcacctccaccaccgccac
+>AW057166
+gaaagagtaaaaatacgaagaaagacggcgttgacaagaagaaaacttcg
+aaaacgaagaagaagtcaaatttgtcgatttccaaatcgcaaacttcggt
+ggatatgaatgaaaaggacaaatcaaaggaggcaaaggaagcaaaggagt
+tgaaagagaagaaggcaaaagaggaagccgagaagaaggttgttgccaca
+ccaaaaaaagatgcttcgaaggatcaggcaaagaaggaggaagatcctta
+ttagcaaaacgaaccaagtggaatggatgttcttgtgaaggaggacggca
+agaagacaaaaatggacgatggctacgaggatttcggtccaggcgccggt
+gctgctcaatgagcaaatggtcgagaaac
+>AW057167
+ggggaatctggagaactgattacagtacaaacatcataagtcgccacaac
+aaccactgcacgacccattaagaatattgctgcaacaactaccgtcgcac
+ctctccaattgattgcggatgcttcacttgttgacaatgacctacaatcg
+aatcttgaagcaactggagtgtatgtcgatggaaagtggtggtggtgggc
+aatctacctgggatttgtcttgggcactctccttactttggctatcgggg
+gtggaatatgttacgtgttgcgacgaactgtttatggatattggtaccgc
+ggcatgtacagacgatatggatgtgatgtctctgcgacaaccgctggtct
+cactggagttggattcggagcaactacgaccgcaatgcagacgatttctc
+ctggaaagacgggtgcgacaacattgggaagtacttcaagtaccactgga
+attactgaaactactggaactactggatccacggcaaccactggaact
+>AW057168
+tcccaacaacacctcaagatgaatgccatcttcactgccgtccttgttgc
+ttcaactctcgcctacactgcaatggcttggattggactcagcattgaag
+ccgccaacgaggatatgttctgaagtggtacc
+>AW057169
+aattccttctactaaccctttcgactacaattaacatggacatggacaag
+cgatcatcggatttggaagctgctcttcgaattgtgctccagcagacttt
+gaacatcgttttgcaagcgcaggagaagctccccgaggcaaatgtggtac
+cctcaactccgcccacctcaccgagcactgatatcggcgaacaaatggca
+tcgttctggaatattccatcacccaaccctcctgcaacct
+>AW057170
+gacaatttctcaagatggtcgaactgcaaatggagcaggaaatggagaaa
+atgaatgagattgagactgataaacttccgattgatcatcaattgagtga
+ctatcagaataacatcgaatcgggaaatgatcgtcaagttcaatcatgcc
+cagttgatgtgtctattccaaaagaagtcatgaagtgtgcaagctgtcct
+ttgctatgcttcaattgctcagttcaaatgcctgtctcacccgttccaaa
+caacaatcgaatcccgtaagatcaacgagactactcacttgatgatggaa
+ttttggaagattgttgcttcaaagtctgaagaggaaaagctcccatcact
+attcgaaaatgttgagggactgttttctgtcccattttcaactnttggaa
+cgtgggatgatgacaccctgtctggtgtcacatcgcttaattttgaaaag
+tctgatgaacaactctccgagcaagatgatgacaaaaccactgtttggag
+ttctaatttcccatcggctcatgttttaacagtctatgagaattctgaac
+agaagacggatganatggccgatgatgatatgtccgacacaacttcatct
+tttcttctactctttcacaacatgagtgctcaagtgccgcgctcatcttc
+tcagagagtcacttgcaagatcagt
+>AW057171
+gtcgccagactttcagcccaatgactttgttattgattggaggtttgaaa
+aatggttgttctgagaatgaaaataaggaagaggggaagtttgagaaaat
+tgacaaagttttctttcctcccgagactgccaacaatactaatccagtcg
+gacgcctcattggtccacgtggaatgacaattcgccaactcgaaaaagat
+ctcggatgcaagctgttcattcgcggaaaaggatgtacaaaagatgatgc
+caaagaagaacgtcttcgtgaacgtgttggctgggagcatctcaaagaaa
+ccgattcacggtgatgatttcagtccgcttcgattcggaagaggctgcat
+ccgagaaactgtcatctatcaagaaaatgcttcaagaatttttggaacat
+actgactcggaactcaaacgctctcagcttgcaactagctgttatt
+>AW057172
+aatgatcaacgtcgacccaccaactgttaatgttcctgcatctggaggta
+attcggttcacaacatcgtctcggagtcggattctcgtttggcattcaag
+gtgaaatcgtcgaacaatgagcactaccgtgttcgtcccgtctacggatt
+cattgatccgaagggaaaaaccaagttggatatcaatcgtattgctggac
+caccaaaagttgacaagcttgtcattcaatatgctgaagtgccagccgat
+gagactgatcctcaagcaccattcaaagctggagcacagcaaggtgaaag
+tcatcgtgaagatcactgccgaatgagaaaat
+>AW057173
+gctcgtacaagaatgctcgtcccgctgactgcaattgtgactacgtcgtt
+gccgatggttgccgccatcgctttttgtgccaagaatcgtaagacggtcc
+atgctaaaaacaagaataagaacaagagcagcaaatctgccaaatcgtcg
+aaatctactcgtggagcgtcgaagagtgggaaatctcgccgttcatcgaa
+agctaagcactccaagagatcgtcgaagtctagtaagaagggaacgtctg
+taaaatcttggaatgggaagccgcaagcgtggagggaaatcatcaaagtc
+ttcgaaatcgaagaacgtcaagactgctaccacctctggttctcaagttt
+caactgtttccgctgctactggtgtttctgataagcaatctaactcatcg
+aaatcttctcgtaagagctcaaagagttcgaagagccgtaagaatcgtcg
+acttgattcggatgcccagaagaaaatggagaaatcgggacagagcggca
+aagttgctcttattccagaaacgcaacacacaactggaagccaagctgcg
+catagccttgctgaagaagtcaattcgatcaagcactccaaggaaatgaa
+tgtggcttctgctaaactgctataccagacactttggcgagtcaatcana
+ttgtattgaaggatacttcatatgaacgtaagcttataagatcagtgctc
+ggatactccctttatcggtcaatccgctttatgatttg
+>AW057174
+tccaaatgacaatcgacatgaaggtggtggcctgggaaatgctcaaaggc
+tcggatcagccggtggacttggtccccaagaaggtcgtggttgccggaca
+ggaaatcacagttgattccaagtcgaagaacgaaa
+>AW057175
+atcaactaataagatgccctgtcaaaagaagtcaaacccaacggaattgc
+acatctccactggccgcgagatcgttcaacggaactttgtgttccgcaac
+accactggcaaggacttcctgctgaagttgcatgctacgaatgaagccgt
+caccttcccaacggaagttttccgttttccaccattggctcatcgtgcca
+tccagttccgtgtgaactcatcaaagctctcccaatgggacaagatgaat
+cttttgatccaaaggatcgtgttgccgatctatgcgaagagcctcaagca
+gttcattgatcagaagaaaactgcaggaactaaggagcaagaggcattct
+cattgtctgtcaagttcacggatcagttctcggctccccagacagtcatc
+aacttgccaggatatgccacgtgtatcgagtcgactgattatccggttga
+cgtggaagaattggacactacaactgcagtcaacatcgaaagagatgtct
+ccactgctgttccaattggttcaatgatgggatttgttgaggagtacaaa
+cgtcgtcaattgaacaaaggatgctgtctttcaactacatctttggaact
+gaaagcaaccggagagcagtcaatgagatctctcgtgatcagccgtcgtc
+gatcatctgcaagagctcaaggt
+>AW057176
+ataactctcccaacaacacctcaagatgaatgctttctacactgccgtcc
+ttgttgcttcaactctcgcctacactgcaatggcttggattggactcagc
+attgaagccgccaacgaggatatgatctgaagtggcgc
+>AW057177
+acggattgctcgcgagcatccggaacgtgcggtgactttgctcaaggcgc
+ttttcgctactgtgtcgacatttgatcaagaaggttatgtttgtgtggag
+gataagaagttcactgagaaacagtccaaataacttt
+>AW057178
+tctcgatccttgccgtttttgtccaccatggatttgctgctgctgaagaa
+gagaagaatacagcttcagtcgtcagccctgctccggactctgaagcagc
+ccaacctgctggaaacggaaccgaaacaccaaaagatgaggtgaaggatg
+aggcaccaaaagaaggtagtgaaactgaagcttcaccagaagccaagaca
+aaaggatctatggtattccatgctcttggagccatttccacagttgttct
+cgccggcattatgtgaagaagtctgccgaaag
+>AW057179
+atcaaccaccatgagttttgatgaaattgacatgaccttcggaaccaaga
+accgcgatcaaggatatgatttgctcaaagcgcgtctcgacaaaggtgat
+cgttcggtggaagtcttgtggagacttgctcaagtaattcatgagaagtc
+tgcatgtgttccaaaagctcaacgtaaggcaagtgtcaccgaaggactca
+agtttgctgaagaagctgtccagaaggatccaaaccatttcaaggcgctg
+aagtggaatgctgtgttgactggacaagcaaccgaatatatggcaaccaa
+aggaaaagttggattgcagtaagaagttcaaggaattgctcgacaaagct
+cttgctaaggagccaaaggatacggctctcctccatttgcgtggccgtta
+caagtactcggtggcatctctgacatggcttgagaagaagcttgctgcca
+cgttctatcagcaaccaccatcacattcctatgaagaggccaacgaggat
+ttccttgctgcttataaggtcaatccaaaatggatggagaacacattnta
+tgtgtccaaatgctacgtagcaattaaagacaagaacaacgctcgcaagt
+cccttaccgaagtgtgtgacatcgaaccgtattccgacgctgaacaagag
+tttgccgatgatgcgaagcagatgttgtctaagctttaa
+>AW057180
+gaaaaaatgatttcggcgttcacttcattcgctgtatcctacttcgtctt
+ggctatttcgttttacattgaaacaactgtcagcttgttccacctcgctt
+atttctcgtacagaaatccggcagtttcgaaggatctcatcaaaactgca
+ttccatcttttgaagacttcctacgacaacaaattgctgacatttgccga
+aatcatcgagactacacaaaacagtatgatcaagccaatggctcatcaga
+ataaacaacactttttggaggaaaaccagcgtaccgcacagttgcagacg
+atgaaaacatcaactgcttttcgcgttaa
+>AW057181
+tacaatggatcaaatcccaccatacgagttcaacaagtacgtgctctacg
+tccgtggtgccgtcatcgtctgtgcttcatttgagctcctgttggtccta
+ttcggttccctcgaagattgcaacttccttgctaagctcttctacttcat
+cttccttggcggagcagttgccatctcagctcacaacattggtctcaacg
+tggatggtcgcgaggagctcaacaaggttctttcatcgtcagagaatgaa
+gttcgtggaaaggtctgctgccttgattctggtgccagcgctctccggtg
+tcctcgtcttcttatgtgtctctggacatgcattcttctctggtgctgca
+ccgtcagcacaggatccagctgctgctccaccagcccaatagac
+>AW057183
+tatacgtgcatgtggaggatgaaaatccgaatgagaatgagccggcacgt
+ttgagagctggattcgagtgggctgctgaaccggatgagattctcattgc
+aggtgttcccaccaagttcattatgtttggtttgtcgtgtttccttgtaa
+ttctaacactcagtctatggattgccagcacccattattcttactttatc
+tggctcgtgtttgccacgttgtacatgggtcttatcgtgttccttcccga
+atatttctcgaatataatcagtttggctctgaactttatttactggatcg
+cttactgtattttcacttttattggaattattcttgatgttgttaggaga
+ggggacagttgtagttccggtatgagcaaggaagtttgtgatgctaatcg
+tcacggatacatgttggctatatgcttcggatgtgctgatctgctaattg
+ctggagttatcatgattctgatgttccgcatgctcaattactattacatc
+aatcgct
+>AW057184
+gacatgtcggttttcataagtttgtgttctgcttggccttattgcaatga
+tggcagcccaattcggactaaactcgggacttgggcttggagttggaccg
+gcgagagctaatgctaacctaaacggaggttttcaacgtggctacggcgg
+caatggctacggtaaccgaggtggatacggtcagcagggcggctattatg
+gccagcaaggaggttacggtcaacagggtggatatggtggcaatcagggc
+tactatgggcaacaaggaggcggntattgaggtggcc
+>AW057185
+attttttcaaccgatcgtccaacaagagacaatttcctctgaaaacgcaa
+tgccgaacccgccaccgaaggaagacacctgggcgtttcaaccaattgga
+gccccattcccaccgagtcctgtgaaatgtatgggagaacagaatatgta
+tgttgctctttggtacaagcacggtaaaccaatccacggtcgctcatgga
+acaatggaggagttgttgaatgttcattcccatataagcaagctgaattg
+acaaccaagcaacaactggaaggacagatccaggttcttcaatacgtang
+agaccataacaatcaaggtttctggtacgaatggattaagtacaaggatc
+gtattgagaagattgacgataaacatcaacttgtgcgttgtggtgattca
+ttcccaatcttctggaagcgtgccgaaggaaatcttcttggttatgtcga
+caacaagactgaggaggcttggttctcgttcaatggaaaagtgctgaaac
+aagttggaccacaactcaatgacatgtacatcatcacccgtaactgcatt
+ggtgggccaccatcttgtgattgtgccaactggtgaagtggaccanaggt
+tcgtgtcgagagagatgaatggatggacattcgtgaggtgatgcatggca
+actcgtcacttgtcaagctcttgatagactcttgatacttgccagtgtca
+atcagatcatacgttgcactttgacatgccagagaacctgtcatggtcgg
+atggaat
+>AW057186
+tcccaacaacacctcaagatgaatgccatcttctttgccgtccttgttgc
+ttcaactctcgcctacactgcaatggcttggattggactcagcattgaag
+ctgccaacgaggatctcatctagatcgatcggagaaaaccgccacaagaa
+attttg
+>AW057187
+actaactaactacaatcaactctactatacttatggtcaagaagattact
+gtctacactgcttttggacaattcctcgagatgatcgagcgtcaagccga
+acagagaagggaaacagttccagtcctttgtccgatcgttgaaaaggctc
+agccaaggacagctttgaacaaggttcaatcttgcccagttgttccaacg
+actgcaagagttacagaagagatcaagaagagcattagctgcccattgtt
+ggctctccac
+>AW057188
+tttctcggaacaactccaagcgaaaaaaatggcggacaagtcggcgtaca
+tgggtgctggtggctatggatccggatacatgggatccaacgcctcatcg
+tcgggatatgcccgcgaagattatgcacaaggaggaaatggaggcggaca
+acaacaaaaccagggaaacggaggaaacaccaacccaggaggacaggtct
+tcaaggcccgtaccgatcaatcgtgctaccttgggccataagtagctgct
+cgaataatgtgaagactcagccag
+>AW057189
+tctttcgttctcgccatcaccgccttgcccgcgattgccattttctgcgg
+aggaaagaaaggagccggtgaatctaaagaaaagccaaaggaggatgtgt
+atgaggatttggcaccaggagataagaagtaatacttgtgaactgacaga
+atgcacaatcgagcaacttc
+>AW057190
+ttttgaacaacaatactcgatgcccaagttaaatagaagaatcgttcgtg
+caagggattcaaaaggacgcttcctacccggaaagaaggccaagtctgtt
+gccagcaagtctcgttcgagatccagaagccgcagtgttgtgagccgtat
+gacgacccgtaccaattcgttgactcgtcgtcgttcatcgacgaaagctc
+cttcgtctgcccgtcaatccagatctcgctcaagatctcgctcaagatct
+cgctccaaatcccgctccacgtcttcccgccgttcccgctctcgctcagc
+ttgttgtgtctcgttcaagcgtggacgtcctgcttcgtttgctatgaaga
+gtcctgaagagaagacggccgcaaagaagacggccgcaaagataatcctt
+agagtagattagccacctggaatgagatacagaat
+>AW057191
+ccaacaacacctcaagatgaatgccatttactttgccgtccttgttggtt
+caactgtcggctacactgcaatggcttggattggactcagcattgaagcc
+gccaacgaggatatgatctggagtggc
+>AW057192
+gcacctggttgtgatattgaaatggatacgcgtactttgttgtggaatga
+ctacaatgctgcagttccaactaatatgtgggaaatcggaaaatgtacat
+tcaacttc
+>AW057194
+tcgntctactgagaaggatgaaccgncagtcaacttcttctcctcctcat
+cgactcgatccttgccgtttttgtccaccatggattggctgctgctgaag
+aagagaagaatacagcttcagtcgtcagccctgctccggactctgaagca
+gcccaacctgctggaaacggaaccgaaacaccaaaagatgaggtgaagga
+tgaggcaccaaaagaaggtagtgaaactgaagcttcaccagaagccaaga
+caaaaggatctatgggatttcatgctcttggagccatttccacagttgtt
+ctcgccggcattatgtgaagaagtctgccgaaa
+>AW057195
+gtagatgaacttgcaatcgttctacattttaatgtactcggatggcccac
+tgactcccttcgagtagtagttgttgaaaaccggagcttcatagttgacc
+ggaggagccgagtaggtcggtatcggagtggtgactggagcggccacgtg
+gatctccggaacttcctcctcttgctccacatcctttttacgagcgaagc
+agtacccagactcttcatcgtcatacatcttgtgaagcatcttcggagca
+acttttggcttttgctgaacttgttgaactggctcgacgactggaacatc
+tggtgccggctgctgctgctgctggatctgaagagcctgcagctgagcca
+tcaactgttgctctgtgaagtccggagctggagacgcaggtgcctgctgn
+gtcgtggtgtacttaacacggtaggacaacggaatatccgatggtccacc
+atagtagtccgagtgcacaatcggctgttccggagcaactggagccgctg
+cttgacgaaccggtgatgatgnggcagacgcagacaaggagagcaccgaa
+cggtagagacggatggcatcctggattttggatgagtcagagatcctcgc
+gattgggtgtagcgngaaggcagcgtggcagagaggagcttgctcgctgg
+ctct
+>AW057196
+agagattagtagatggatgtgataactttcgcatatcatcatcgtcatca
+ttgtcttcatttttgtcgtcttcgtgtattgttgttccacgattaataat
+tttcaacgccttctgagttcccttttctaacgaaatacttgtattcttgt
+taccacgatacgtagagttgttcatagatgaggactttttcagtctggaa
+ctaggtctcgcttgcat
+>AW057198
+ggataagaagattgtagtaattggatacattatcggaacgactgcagcgg
+tagacttggcagcttcaaatccggatagactagttggagttgtgctgatt
+gctccgttgaccagtgcactgaggatgttctgcaacaatcctgacaagga
+aacgacttgcattgacaaaatctgccacatcaacacccgagtgctcatct
+gccatggagaccatgatcaacgcatcccaatgactcatggaatggctctc
+tatgaaaatctcaaaaaccccagtgccaccactgatcgtccatggcgcca
+atcatcattcaatcattagcggagagtatattgaagtctttactagaatt
+gcaagcttcatgcggaacgagacccttctgagctgccgagccaatcaaat
+cgagtcgtcctcgtcgaagaaattcaaacatgaatgaatagta
+>AW057200
+cnaatnccttcaaaagtcgccggttgctcaatcagcataatatgagaaac
+aaagggcgtgataaaatgatggctgatgacaacgtctcattcacggatgc
+aggtgatccaacaccaggcgctaaacctcaaggaggatcgtcggcaatgc
+tggatctacttggaacactgaacaagaaggaagacaaaaagaagaaggat
+aagaagggcaaaaaaggaaagaagtcgaagggaaagtcgaagaccaagaa
+agtcagaaagacagacaagtttgagtcgcaaaacttcttggttcgcatcg
+agggaaccatattttgtgctggaatcgttgtcggattggtggtgctgctc
+gtcttcgttgcagttgcaatcttcttcagcgtgaagtctggaggaaacat
+ggtgcactacatccatccatggtggggaggacttgaagaatcgtcttcta
+attgagagacgaatcgaaagaaatgaaaaagtgacg
+>AW057201
+aattggagtttttcgacggataaagaagtttcagctggaatcgaaagatc
+taaacgatgatctcactggtttctgtgattgccaccgcctcggctactag
+caccgtgcttgccatgtgctccagcaaggatcgtcaagcggaccgtaaaa
+agactaagaagggttccggatccaagtcttcgcgtgtttcttcgaagtcc
+catagatcctcaaaaaccaacaagagatctggaaaatctggaaaatctgg
+aaaaactggaaagttctgaaaagtctggaaaatcaagccgtggaaaatca
+tcaaaatctaagaagccttccaaatcaaaggccggagttcttccaatccc
+cggagccgcaggaggtcccaagtctgaatcgaagaagctcagacgtgata
+gcacggacaagtcaaagtcacagagatccaaaagatcttcgaaatctaag
+aaatgcgacaagtcctccaagaagtgcgacttgaacaaggctaagaacct
+ctgcccaacggttaaccaggcagatgtttccgacgtgtccatcanatcgg
+actccggtgagaaatccgannagtcgaaagctctgaagttgttnccaacc
+agtcgaagacacattcccggagggacaggttctgctcagtgaagagtcga
+ctcttctgcatcgnccacaagctccattcgca
+>AW057202
+tctgctgcttttcatagctcaatgtgcatttggcggaccgacgaaactta
+ctgaggacgaacagtttgaatttttaaaacgagcaaataatttgatgcaa
+agcaaagctaaactcgacgcttatacttggatgaatttcgacgccgaact
+cgaggctgaaatattgaaaatgtcgtgcgatgaattggaaaaacagcgag
+gaattcgatttttcgagtcaaagtttggttaatcatgagattacaagtag
+aacaggagaaactggaattgcgtgcatcaagagtgattgtatgtacacag
+atctcataccagctgtgccgatcgctcatatgtgcctttacaatcgccca
+gtggtcacaacaaccgtcccaccatcaacgacttctggaactgacaatat
+tgcgagttttttctttggaattctcatttttggagttttgaacttgcctt
+g
+>AW057203
+aattcccaggaacatcaaagagtcagattagaaaaaagaagagaaggaaa
+gcaaggagaccgaagttttcatcctcaagctcgaaaacacaacggtaaca
+ttcacaaaccccaagcttggcgatgaagtgaccattgcatcactaaacct
+gacaaatccgaccaaggatcggtatgcattcaagatcaagtgcacttcaa
+accagcttttcaagatcaagtcaccagttggttatatcaatccagaggag
+agcttaacaattccggtctaccactacccagcaactgtcattccggagaa
+taataagcactacttcgtggtctactacatcaaggctgcgaacacagtaa
+aggaaaagattccagttcgtgatttgtggaaagcggcagcatcatcagaa
+ggaacccgacgtgtcttcatagacttcaaaaa
+>AW057204
+aacgaggcagcgaaaatttcccatcctctgccttcaaaaacttggtgact
+catccaaactaccgggattcgtctttcactccgttcgtcttgtttatttc
+cgatgacgttcctaacattcatgagtgcctcaaattcgaagagcgtatga
+gtgacattccaacgcagcacgtacttctcaaaaatgtcaaaaagatgcgt
+gacaacatcgaaaagaagtctcaaggtggaagaagagcatatgatttgac
+tcttgacaatat
+>AW057205
+ttacatgtcacctgaaagaattctagaattcgggtataatttcaaatccg
+atttatggtcgactggatgtctgttatacgaaatggcagctcttcagtca
+ccattctacggtgacaagatgaacctgtactcactttgcaagaaaatcga
+aaactgcgaatatccacctctgcctgctgatatttattcaacgcaactcc
+gcgacttggtatcccgttgtatacttccagaagcttcaaaacgaccagag
+accagtgaaaggtctacagggttgccgaacacatgaacaattacttctcg
+ccttccggggaccaatcaacaactccttcaacgcaattctaaaaaaagct
+ataacatttcaatttcaaacattttctttaaaacgtagtgttcttgtatt
+ttcaaaaggtggaaacattcgtcaatgaccacgtgaatccgtgatgtgct
+aaattttac
+>AW057206
+cagctttccatcttgcacatacaattgcactcttcattaaatcattcgat
+tctccatgtcaatatctgaaaactccctcaactgtaacacttaattcgga
+tatctgtcatgctgttaatggagttactctggtgtgtgcagtgatttcaa
+tgattgctactgctcttgccagtatggccgtcttcattcgtctcactaca
+gtcgtcgtcaaaatttcggataagagagttatggtcaccaagtcgttcat
+ttgacaactcaatgccacaattaatcagtagaaagtcaattgaatcggag
+aaagaagaagattgtttggagacaccaagaagaaaaatgggaca
+>AW057207
+atgtatacttctatcagtttgactatcattactctgcaggattcggagtg
+ttccggtggcttcttccgtttttgggctccacacattgcacggaaatgag
+atatgtgctcggcaaaggaataatctcgaaattccgaccaaatgataatg
+ataagaagatgcttcacgttatgacaacttattttacaaattttgcaaaa
+tatggaaaccctaatggagaaaaccaggagactggagaatggcaaaagca
+cgactcggcacaccccgtccgccatttcaagattgatctggacgattctg
+aaatggttgaggactatcaggaacggagagccgagctatgggataaactg
+agagcattaaatgttagcagggctcagatgtgaaattgct
+>AW057208
+aaatttcgaatcacatcaagacatgccgattgaagttacaatgtctacaa
+gcaatgaatccgcaaatcttctggtaccctcatttttgccacgtggcgaa
+gttctcggttggaatctgacgtcggtggtgaatccggtgactcgacgtaa
+agaatacacgtacacggtgtgtgtgcagactgtcggagcatttcagtgca
+ccgagcaagcgggcgttgtccttgtgaagtgtgatggtccatgtggaaac
+aagttgccaaccaatcatttgattgcattgggaaagtgtgatcacatgtt
+gtgtaaggcctgcttcggcattgtaaagaatccggatggatcctatggat
+gttcgaacttctattgctggtcggaaccacgtggaaacttccggaaggaa
+aaggccaactacaataaggtcatcaacaagcagatctgccgtgccagaaa
+gttcaaacaagacggggaagatttgcaagcatgcagcanatcgaatcttc
+ccaagactcctgctgacctatcggaccgtgagatgaactctgcgaaatcc
+tcggaatcggatacgtcgtcttnctgatggttcttcctaccaatccactg
+ttgactcgtactgactgccaanaagattctaa
+>AW057209
+ttgtgtattcgaaagaaacatggcagaaaatagtgtaacacttctacaac
+ttgcccatctcggttacagcatgctcgctccaatggtgttcaccggatac
+gtcattgacaatgtggaggaaagaagaagactggtggatcatcgggtggt
+ggtggagctaaaactggaggaggaggaggagatggtgcaaccagtgcaaa
+gagtgataagaaagaaaaatcatcgagtgctccaaacccagagggaccaa
+aggctccaagtgataagaatgctgtcgcagggacacatgatccaaattat
+caaactcttgctggagttgatggaaatgtgttccaagagaaaggaaaagc
+ttctcctgttgctgctgctggtggagcttctcctgctgctggtgctccaa
+aacctggaggtcctggaatggctgccacccacgaccccaactaccaaaca
+cttgctggaattggaaacgattgtttcgacaagaaagaaggtgcaaaacc
+agcttgtggtggtgcggctccaggtgctccaaaagcctgtggtcctggaa
+tggctgctactcatgatccgaactatcagacacttgctggaattggaagc
+gattgcttncagaagaagtgattgtgtcaactcgtgcanaatcgggtaca
+tnnacgaaatatgtga
+>AW057210
+aaaaatgaacgatcttgttattaatcagaagattcttccagatatttcga
+aatcgaaatgggatctcgacacgtattcaggtcgtgtaaaacactacttt
+gcatctgctaacccgatgacactcttcacctcatccaatacccaggaaat
+gtgcaggaaaatagttgtagactataaaaaggggataataaatccggaat
+tgacgatggatgagctatggagtgcaaagatcctctatgattcagtatat
+catcctgataccggcgaaaaagatgtctgtctcgggagaatgagcgctca
+gaccccagcaaatatggttatcactggaatgcttctcagctgctatcgta
+cctgtcctggtattatattctcccattggatcaatcagtcgttcaatgca
+attgtcaactataccaatcgaagcggaaattgcagaactaccaatcagca
+gctactctattcgtatttctgtgctactggagcggctacaacggcggctc
+tcggtctgaatatgatggtgaagaatagtcatggattggctggaagattg
+ttccatttgtggctgttgagttgcaaatgccattaatattccaatg
+>AW057211
+gaataaattcatttttacactactcaacaattgatttgctgcttctttct
+ccacaatctacaacttctgaaaaatgatgaaaccaccgatttgccgccgg
+atgagcctgccaccagttctcgcaaagaaggaaggtgaacgaattcaagt
+tccagaaggtggagaattggtatgctaccgaaaccaagaagatcaacatc
+ttttcttaaaaatagaatactcgcgcgaaacgatgattaaactttcatcg
+tctgttttctgttatgcacgaccaactggcctcaaagaagtttccgatga
+gttgccggaactacttggactaccccattcgattccactgacagattctc
+cgccgaattttgaatgtcttcctgaaccattggcacatgtttcgggatgc
+aattcacatttcagtatgaaccatcctcctccacttctgagccctctacg
+tggccctgtaactgaagctgatgaggcgatgcgcacactcagtagtcatc
+gagaacttcagaagcggcttcaaaacatttcaatgcgaggagagttgagc
+ccttcaggatgcctggttgacagttgtgacacacctgcaattcgtcagat
+tcgcaatcctcagttcacttga
+>AW057212
+tcgaagttactcggacttctcatcttcatcgctttttaatcaacatcatc
+atgtttggacgtttgaagcagaaagttaaggaaaagactggacgtgccaa
+ggcgacaactcttcccgcagaagtggacgatgcgatgggctacttcaaaa
+atctgacgccacgtgtcaaggaccttcacaagagcatgacaaacttggaa
+gatattagcaagtggcagaagaaggccagtttctctggcacccttgagaa
+ttactcgcgtctcggtgacaagatcaatgtgaaaccatttatggatgctg
+ttgatgctagaatgggtgccgaagctgatgccgtgaaaggggtcctcgcg
+atttgtgaaaaatacaagtcattctaccaaaacgagggaaaacttcacgc
+ggacagtatcgccaatttgaataggactcggctcgacatggacagtgcgg
+cggataaatatgcgaacaacgagactgaagttaacaagactcgtttggat
+acagtaccacggaatttgaagtggcttgtgagagaatgcgagaactggcg
+aacggaatcaagacaattgaatcgaaccattcttcctggcaagacgtctt
+atgaggagaataaagtgcngtgcgtanataa
+>AW057213
+gataggtgtcacccccaaccgattgtccttcaaaaaattcgcattttcaa
+tcatcgagatgatgaggtgaaaatcgataatttttcggctcgtcgagctc
+caaaactccgcgacgatgatagtgacgagctcaacgtggatgtaccagct
+gaagatgatgatgatgtggaactcgatgacgtcatcgttgctcaaaatcc
+ggcgttttacggcacaattgaggcggagaagtgtgcggaacgagttgctg
+cacatctttcgatggcctgtgagaacatggaacgtctgcaatttgtgagc
+gaggccgtgtatccacagagtgctgatcatttgaagaaacttcaagaaat
+cgatgatgacgtcaaggatttcaattggcagatgagagagcgtcgtgtca
+aggcttcaaatccagcaggaacagccacaaaagttgcacacttcatt
+>AW057214
+tcgaaacttgtgaagaaggtctgcaatggttgccgcaaagaaattgatgc
+caaagcgaatgaagaagaagatgcaggtggagagaatggctcatgccaga
+gcttgcaaggttgcaaagagagaggctcgcgtcgctgaggaagcatctgg
+aaaatctactggtggatctactcgcggagccaagtgatagccgagccaca
+acacatga
+>AW057215
+gaaacttgtgaagaaggtctgcaatggctgccgcaaagaaattgatgcca
+aagcgaatgaagaagaagatgcaggtggagagaatggctcatgccagagc
+ttgcaaggttgcaaagagagaggctcgcgtcgctgaggaagcatctggaa
+aatcaactggtggatctactcgcggagccaagtgatagccgagccacaac
+acatg
+>AW057216
+aattccgaattccattcgactaaccatctttttgcaaacttgcaccaaat
+caaccgacatgcaatcaatcaacatcttgttcgccatgctcctcatcttg
+gctccaattgtcaatggagacgatactgccgttgctgtgactgcaactga
+agtcactgaagatgcaactgaagtcactgaagatgcaactgtggctcaca
+ttgaaacaacagccgaagccgttgcagaagcagaaccagctacagaacca
+gttcaaaccacccgagctgttgaagaaactacacaagctgttgttgtaga
+atccactcaagagactgtaaatgctgtaaccaatactccagttgatactc
+cagctaccaacaacgtcgaagcaactactgaagcggcttctcgtccatcg
+ctttcatcgactgttgcatcaaatatgacttctgctgatgacttgctcgg
+agaaacttcaaccaatgccacaaaagctgcttacaacactggaaccttca
+ttgttgtcccaatggtcgttctcgctttgattcaatga
+>AW057217
+gatgaaccgtcagacaactgctctcctcctcatcgtctcgatccttgccg
+tttttgtccaccatggatttgctgctgctgaagaagagaagaatacagct
+tcagtcgtcagccctgctccggactctgaagcagcccaacctgctggaaa
+cggaaccgaaacaccaaaagatgaggtgaaggatgaggcaccaaaagaag
+gtagtgaaactgaagcttcaccagaagccaagacaaaaggatctatggta
+ttccatgctcttggagccattnccacagttgttctcgccggcattatgtg
+aagaagtctgccgaa
+>AW057218
+tctggaaagaggagtttcttctgaagacatantgattccttctgttcgtc
+ggggtgtcattccagtcaacactcttcgttaccaaatagaaaagcatctc
+gagatgtgtactccagcttctgaacaattgtcaaagagttcggatcccaa
+catctcctcgatgtacgttttccatcaaggaattcaagtaaagcaggaac
+caatcgatgatgaccaagaggaagagcaacaagtacaaaagcagcttgta
+ttcaaaatccgaggcttcgaaaacgaagaagctgtgaagaaggagtg
+>AW057219
+tcccaacaacacctcaagatgaatgccatttacactgccgtccttgttgc
+ttcaactctcgcctacactgcaatggcttggattggactcagcattgaag
+ccgccaacgaggatatgatctgaagtggcgcc
+>AW057220
+ctcttttctcttcgtcgccgnggatcaaaacgttgcccgacagggatgag
+ctcgaataagacctctcgctcaacttcgtcatcgtctgtcacatcatctt
+caggacatggtgcatcgagcttctccgaggattcgtctgttcgctctgtc
+accaacagtgttagaagtactagaagcgctggatctatcatgtcaatggc
+tagtgccgaggcaagtgtcgttgctccagatctgacaatctaccatggag
+atcgttagcaatcctaccagctcgctgacaaggggaaaatggtcgttatc
+aaccggaaaaatggggtgattgtctacatgcttcgttgtgtcgacggccg
+tcgtgtctacattgaganatcttccgaaggagccagtcttattctgacta
+atcaacgtggaaaagtgatcaaggcattggccgngcactactag
+>AW057221
+atcaactttgtctcctcaaaaataagtctacaaatgatcaacatcgatcc
+accatccggcgactacccagcttctggtggttcatcaactcactacattg
+tctccgaatcggaatctcgtttggcattcaaagtcaagtcgtccaacaat
+gaatcgtatcgtgttcgcccagtctatggattcgttgatgcgaagggaaa
+ggctaagctcgaagtgaatcgtttggctggaccagcgaaggaggacaagc
+ttgtcattcaatacgccgaaagttcagctgatgagaccgatccgaaggct
+ccgtttgcggctggtgctcaacaaggagaagtcgttgtcaagatggttgc
+ta
+>AW057222
+aacattcgactaaccatcatctttcaaacttgcaccaaatcaatcgacat
+gcaatcaatcaacatcttgttcgccatgctcctcatcttggctccaattg
+tcaatggagacgatactgccgttgctgtgactgcaactaaagtcactgaa
+gatgcaactgtggctctcatcgaaacaacagccaaagccgttgcagaaac
+agaaccagctacagaaccagttcaaaccaccgaagctgntgaagaaacta
+cagaagctgntgntgtagaaggcactcaagagactaaagatgctgaaacc
+aatgctncaagtgatactccagctgncaataacatcgaagctaccactga
+aagggcttctcgtccatcggtttcatcgactgttgcaccaaatgtaactc
+tggtgatgacctgttcgggaacctcaacaatgcacacaa
+>AW057223
+tcgattctcgaaatggctccaacacgtacttcacgacgcagttcagcgaa
+cttttcattcgacgatgtaaatgttgaggaacagaggcaagcgtatctcc
+gctacgaacaggaattgaaggatctcgcacttgctcgaaaccttgagaat
+gaattgaactgggggccaaatccagccaacccagcgcctcctcaaaatcc
+tccacaacctgaaactgttcatatccaagtgaatcgagataatcctcaag
+ctcaacaacaaaatctgacaggaaccatagctccagctggaagagaaggt
+actggagtacaggtggctgctgtggccccggatcccaccagcgcagcgac
+tgggtcacaaggacaaccaactccccagaatgcacaaaatcagccaacaa
+tcagagccgcagctggaagagaaggttatgganggcacgtggacgttnta
+accgccgttcttgcaagaaacttgccgtcacaaggaatcaatgaaaatgc
+cataaaccctggatgatgacgaatctgaagtcaattgtcagactcttttg
+aagctgctccatcgaatcagccaagcacatcacaaggcacctcaaatcat
+gcaggctgagtggcttcacgatcgcgccacgggngaagtgaccgctgttt
+gggactcagtgaccaaacaagg
+>AW057224
+gaatggatcgtagcattgatcgtcgtctttgtgggcactgtcaccaatcg
+attggaagtgaggcacttgtcgccatgaatcgtctctggcatccggacca
+cttcacgtgctcatcgtgcaaacgtccgatcaagcagacgttccaggctg
+ccgacaaccacgcctactgtgtccagtgctttgctcaaaagtacaacccg
+aaatgtgctggatgcatggaaactctcgtcgacacgtgccttcttgcttt
+ggaccgacactggcattcacgttgcttacgtgctcctcgtgcaatcgccc
+attgccaaatggagaattctatctggttgatgataagccgtacgacttgg
+attgtcactgggcaaagcgtctcgagaagagagagcacatggaacgtggt
+gaacgttaagaagaacgccgttaatttgtcgaacttccccactgtttttt
+tccttgtattcttgtgat
+>AW057225
+caactcaagaagaagaagaagatttgctgtcaaatcggcatacatgtccg
+ctggaggatacggatctggatacatgggagcaaatgcttcatcatctgga
+tatgctcgggaggactacgctcagggtggcgctggaggatccggaaatca
+gaatcagggaggatccggtggaaataccaatccaggaggacaagtgttca
+aggcacgtaccgatcaatcgtgttacttgggaccataagtctcaactgta
+ttcgaccggcaatt
+>AW057226
+aaaaaacttcatcaagatggctgcgaatatttatttatggatgtcgaaac
+gcaacgtcgaattgaatacacacgacagaaagtgatgagaatcgagcaaa
+tgaatgagcaacttcggaagttgcaagtcgaataagggtcgcgaggagaa
+gttggacagacttttgaaacgaaaggaatccttggaactggacgttgcaa
+gattgactgacgcgtcgatgagagctgagccagaggtcggagcggagctt
+cttcattctatcgaagagcccatggaagttgatatgatctacggtgaagc
+attccacgcaaagacttgtcaactgaaagtccttctcaacgagataattg
+ttcgaacaagcttcaacgagaaagcaatgtgtaaagagattggacatcag
+gaagctgaattcgagaatcgattgaaggaggtgatcagtggaagagctca
+attgacgctgaaatctgaagaagctcagaagaaatgtgaattgttgatga
+gagagcattcgaatgtttatgaagatgttcgagagatggaggataatatt
+gagaagtttgatgcatcgagatggttgttgaatgtggagaaaagacgagt
+ctctgatattctgatcgcaccaaagagagccgannatgggaatgagtgct
+gccagccatacattggtcttcngaagcttctcactttgagacttgcacat
+caaactgagtctcactcacgatcattt
+>AW057228
+acgaaagagatggcggacaagtcggcatacatgggtgctggtggatatgg
+atcgggctacatggggtccaacgcctcatcgtcgggatatgcccgcgagg
+attatgcgcaaggaggaaatggcggaggacaacagcagaatcaaggatct
+ggaggaaacaccaacccaggagggcaggccttcaatgcccgcactgatca
+atcgtgctaccttgggccataagtggcggttcgaataatacaagagcaag
+tagtcaaccacccc
+>AW057229
+atccgtccggaaaagctgcgaatgctgcgagagcttatggaagcgggatg
+aaaaactctgcagcctctttcaatcactcttgagagcagttcgataagct
+tttcgaagcttgtaacacattcaaggatcaaatcaatggaaacgtccttg
+ccaaattgatcagtttcaaagatgtggaatgcaaggatgtcgacactcaa
+atgacccagttgaagaaggctcagaaggattatgcgaataagaaatcaaa
+gatagttccagatcaagttcaggttgatgcagctgaagcaaacttaagaa
+gtt
+>AW057230
+aagatcactctgatcgtcgtcgcaccaacaccttttcttcttcagccgat
+gaggatggagttccaaatgaggtcgccgactacctggtctacttttcccg
+catggttgacgaacaaaatgtgccggaaattttgactctctacgatcaag
+cttttccagatctcaccgagagattcttccgtgatcgcatgtggcccgat
+gagaatgttgtcgaaagaattattatacagtattg
+>AW057231
+tctctggaagcactctgctcgttcaatgtggtggaaagaaaaagggagca
+acttctgccgaaggaaaatcttcgacgatgggcccggctcctggaggagc
+tcctgctgctgcttccgctcaaggagaacctgaagagaaggagtaa
+>AW057232
+gaattcgagcagcaaatctgcctaatcgttgaaatctactcgtggagcgt
+cgaagagtgggaaatctcgccgttcatcgaaagctaagcactccaagaga
+tcgtcgaagtctagtaagaagggaacgtctggaaaatctggaaagggaag
+cagcaagcgtggagggaaatcatcaaagtcttcgaaatcgaagaaagtca
+agactgctaccacctctggttctcaagtttcaactgtttccgctgctact
+ggtgtttctgataagcaatctaactcatcgaaatcttctcgtaagagctc
+aaagagttcgaagagccgtaagaatcgtcgacttgattcggatgcccaga
+agaaaatggagaaatcgggaaagagcggcaaagttgctcttattccaaaa
+acgcaacaaacaactggaagccaagttgggtatagccttgctgaagaagt
+caattcgatcaagcactccaaggaaatgaatgtggctcctgctaaacttc
+aataccagacacttggcggagtcaatcaaattgaattgaagaatacttca
+aatgaacgtaaagcttataagatcaagtgctcggataactccctctacgt
+gtcaatccgtttatgatttgctgagcacgttctccgttaagatgatggtg
+agat
+>AW057233
+ccggatccacagggaaagtactactgtattgtaggagctgatcgtgcgtt
+cgggagagaagtcgtcgagacacattaccgggcttgtcttcacgccggac
+tcaacatttttggaacaaatgccgaagtgactccaggacaatgggaattc
+caaattggaacctgcgaaggaatcgatatgggagatcagttgtggatgtc
+gagatacattctgcacagagttgctgaacaattcggtgtctgcgtatccc
+ttgatcccaaaccaaagggtcaccatgggagactggaacggagccggatg
+ccacaccaacttctcgactgccgaaatgcgtgctccaggtggaattgctg
+cgattgaagccgccatgacaggactcaagcggacacatttggaggcgatg
+aaggtgtacgatccacatggtggagaagacaatcttcgtcgtttgacagg
+acgtcatgagacaagttcggctgacaaattctcatggggagtcgccaatc
+gtggatgctcaatccgtattccgagacaggtggctgcggagagaaaagga
+tatnctggaggatcgtcgtccgtcatcanactgggatccttatcaagtga
+ctgcgatgattgcacagagcattctcttctag
+>AW057234
+gaaaattcaatcaataatcactatatcaatgttcattatcatcgtcgtct
+cctataatgagctgacagctgaggaaaatgataagaggctcgagacatgt
+ggaaatgagcatattgggaaaccatcgaaaaactcgataatctctcctgt
+ctcctggcttaccaaattgacaagatccgaaatttctgctccggcagtta
+taatatctcctcgacacttgatcacttcttcacggcttttcctcacaaaa
+tcagcttggaaaaatagcggagattcgattgattgtgatgacagcataaa
+gcacttggaagttccggtaaacaagctctcggatgtgattgagccgtgcc
+tttctcaaaaggagaattgctccccgaaagtgatgaattttgccagagca
+tatattctgaacttttgcaaatcaacattggtgcaaaaaagagtgtactc
+cttcccaatgattntggagcttgatgagaatttggaaggcaactcaagtt
+atccatgtctagctgatgaatcaatcanacttgccaaaggagatgccatt
+gancgcttatgatgacannacaattcgaatggagcatcgaanagtgatgt
+cggcgccgtgtagtccgatatctataccgactgt
+>AW057235
+agttactcaagctgttccagaaggatcagtacttagttcagatgtcactg
+atcgtccaaacatcgactccactgatgttgtatcaaatgcaacttcggtg
+gaagatttgcttggaagttcaacaaatgcaaacaacactggtacattcaa
+ctctaggacctttgtaattgctccaatgatgattcttgctttggt
+>AW057236
+ctcccaacaacacctcaagatgaatgccatttacattgccgtccttgttg
+cttcaactctcgcctacactgcaatggcttggattggactcagcattgaa
+gccgccaacgaggatatgatctgaagtggcgcccat
+>AW057237
+ttttgttttggtttgatttgtccgtaaaagttttcacaaaatcatcaatt
+ttctgttgtttttcttgctccttggcttgtttcttttccttaactgactc
+gacgaattctccaacttgttgagaaacctccttccatttctcgatcgctg
+cttcggcttcctttttcttggcttcagctttttcctgtttctccttttcc
+aattcatcggctctctgcttaactgcatcagatacctttaaagcgtttcg
+ttcagtttcttctgagtctctttcccgtatttgtctcttgctgctctcgc
+attntcaaaattaactctcagttcttcttccaatgccttttccgtatcaa
+ttactgactggaaggtgtcagcaagggagtagtagcgaacggatgaaacg
+ctcgacgcaaaaaa
+>AW057238
+gcaatgtcacgttgtcaactcattgcaaacaaagactggaccaacattga
+acggagtgattggaagacaatccggacaagttgctggatttgactactcg
+gctgccaacaagaataaaggagttgtatgggacagacaaacacttttcga
+ctatttggctgatccgaaaaagtacattccccggaactaagatggtcttc
+gctggtttgaagaaagctgacgaacgagctgatctcatcaaatttattga
+agtggaagctgccaagaaaccatcggcataagcctctactaaata
+>AW057239
+tcgtcagccctgctccggactctgaagcagcccaacctgctggaaacgga
+accgaaacaccaaaagatgaggtgaaggatgaggcaccaaaagaaggtag
+tgaaactgaagcttcaccagaagccaagacaaaaggatctatggtattcc
+atgctcttggagccattttcacagttgttctcgccggcattatgtgaaga
+agtctgccg
+>AW057241
+atgagccaaagtgtgtagatgttgttgaaggaaaggaaagttctggagtg
+tgcaagacgaaaggcggagtctgtcgctttggtcattgctgcccatcact
+taccctgacaattgcaccatctggaaatggtactgagtcagcgacgccta
+ccttgggcccatatccatacttgactaaattatccgtgtgatgctaacaa
+acctatcccatctcaattcagcacctatgcattttgcgatcctgacacta
+atcgcgttggtattttgggcaaaaggcacttaactggagaagaacgtact
+gaggtgaagggatcggcatgctcttctaacaaagactgcaagtcgggaac
+tgtttgcgtgtatgttaatatcaataaacacgtctgctactaccatccgc
+tgaagaaaatcgcccgtgatgtcagtcaaccatggctctatgtgctcatt
+agcttcctcatttgcggttntatttntgtcattntggcagtcatgagctt
+cgtctgctaccgttcgaagtctgtgtttgacaagtaccagccaaagaaga
+atgcaggaacacatggtagcagacagtgatgggaagaaaggaaagatagt
+gggaagaacgagacgatacttanagtcaacgagctcccagtcaagaccag
+agacagagctgatcgacagcgga
+>AW057242
+ttccacctggtaccactgtcgacactggaatcgtctctcccgaaggattt
+gatttctacctatgctctcactatggagtacagggaacttctcgccctgc
+gagatatcacgttcttctggatgaatgcaagttcactgctgatgaaattc
+aaaacatcacttatggaatgtgccatacatatggtcgttgtactcgttcg
+gtctccatcccaactccagtttactatgctgatttggttgctactcgtgc
+ccgctgtcacatcaagagaaagctcggtcttgccgacaacaatgactgtg
+acaccaactcgctctcttcatcacttgcttctttgctcaacgtgagaact
+ggaagtggaaaaggaaagaagtcacatgctccaagcgtcgatgatgaatc
+gtattctcttcctgacgctgcatctgatcaaatccttcaggactgcgtct
+cggttgcagctgactntangagtcgtatgtacttcatttgaagactcttc
+atgcagacggagccagagaaa
+>AW057243
+accgccttcgtcccaaatgacggttgcntgaactttgttgaagaaaacga
+cgaagtgctcgtatctggtttcggacgttccggtcacgccgtcggagata
+ttcccggagttcgtttcaagatcgtgaaggtcgccaacacctccctcatc
+gccctgttcaagggaaagaaggagagaccacg
+>AW057244
+ccgnctccactcttcacttgcactattcttatgcaaccaagcaacgncat
+gctcgctgttcttctcgccttggcttcatttgctcaaggaggcagatctg
+ttgctccggctggtgcagtcactgaaccaacagttactcaagctgttcca
+gaaggatcaggacttagttcagatgtcactgatcgtccaaacatcgactc
+cactgatgttgtatcaaatgcaacttcggtggaagatttgcttggaagtt
+caacaaatgcaaacaacactggtacattcaactctaggacctttgtaatt
+gctccaatgatgattcttgctttggtgcag
+>AW057245
+cgaaaatcgacaacgagacgcagtcacagtgagaaaagttcatcgagagg
+ttccatgtcatcaccgccaaccagattctatccatctgaagattcagagt
+cgatttactcgactcgaaaatgctccaaaaggactacgacgactgctact
+acggatgaggagaagccgaacaacagctactacattgacgatatttatga
+ttcgactgaagaatatcaagtgacattcccgacggttgagctgaaattgc
+cacgtcagagaaagcattgccgcaagcgatcgaagagacaggatcaggca
+cagggagagcatgtgacaatcacgaaatgtgttgatagaagacaagtcta
+cggagagcccgataataagaacaccatatccgagcactctacgtacacct
+actctacccatccggaacgttgctctcaggccggccgtacttctcgttcg
+aacagctattctgacgccacagatgccacatatcggactgg
+>AW057246
+gcttggttatccagcagcactactgcaattatgtttggaggtggagattc
+aaagcctatcgataagaagaaggaggacaagaaaggtttcgatactcgaa
+aattcttgattgatctggcctcgggaggaactgctgccgctgtttccaag
+actgctgttgctccgattgaacgtgtcaagcttctgttgcaagtacagga
+tgcttccctcaccattgctgccgataaacgttacaaaggaatcgtcgatg
+tcctcgttcgtgtccccaaagaccaaggatatgctgctctctggagagga
+aacttggctaacgtcatccgatacttcccgacccaggcgctgaacttcgc
+tttcaaggatacttacaaaaacattttccaaaagggattggataagaaga
+aggatttctggaagttcttcgccggaaatctagcttctggtggagcagct
+ggagccacttcgctctgttttgtctacccattggattttgctcgtacccg
+tttggctgctgatgtcggaaaagctaatgaacgtgaattcaaaggcctgg
+ccgattgtctcgtcaagatcgcanagtcggatggnaccaatcgactctac
+agaggtttctttgtctcggtacaaggtatcatcatctaccgcgccgctta
+ctttcggaatgtcgacacttgccaagatgtgttcactgctgatggcanga
+aactcaacttcttcgcttgcctggcttattgcctcagtngntacntnngt
+gatctgnatnctctnctatccatggnatcctgntcgtcgcgcatgatgat
+caagctggtcgcaagatgtctctaccagatactttgattggccg
+>AW057247
+tttctctgaataaccccaaacgaaagacattcgatgttggacaagtcggc
+ttacatgggcgctggaggctatggatcaggctacatgggatccaatgcct
+cgtcgtcgggatatgcccgcgaagactatgcacaaggaggaaatggtggt
+ggacaacaacagcagaaccagggttccggaggaaacaccaacccaggtgg
+acaggtcttcaaggcccgcactgaccaatcgtgctaccttggaccataag
+atgatcgacactaggagagccagtagccaaca
+>AW057248
+ttaaaagcaagcttcctcgtagctcgtcgtctggattctcgggaacagga
+tctgaaagatctatttcaagcaaaagatcgaaaattggaaacacttctgg
+gaaccgtaacaagatttcgcaaagtgctagaagaagacttttgcaagctg
+aaattgagagtcgacgcaatagatccaacaaatcaccaggttccagtcgt
+aaaagcattctgaagaaatctccgatgaaaaaaagaagatctctttcgag
+aaaatctttaccaaagaaagagcatattcctccagttcaaaagtttgcaa
+ttatcaagaatccagctgctcgtaatcaagttcgtggatttgttgctgaa
+tacgcacaagatgcagaatttgaagcttttgagcttttggttgacggaat
+tccatttgttgctctctctctgatgaatgctcatccagatcttcgtggaa
+gattcaaacctgttccaccaccaacaccgatgaagaagcatanctcaggt
+atncaatagcccacgaaacgntcctttgcgcgtgaatctatttccttcgt
+taannctcganaagtgttcgtttcactgantggaggatctctttntcgaa
+nagagctagaaaatcttcatccgggaatgcgattcatcggatcgtgagct
+tggagnagacagtatcgnatgccatgatntgacacttgtcgcagtacaat
+ggatttgtgggaagatgtgnaccatagtgtgaaaa
+>AW057249
+tgactgtgaatgattctctgctacaccattttattatcatcatctcggta
+tccaattccatattcttatttcgagttatccagaaatggcgatccattac
+aatatgtggagt
+>AW057250
+aattccttctacgcgaaaacaaatgcacctcccgttccgttgcccagtct
+tggtgaccggcgtcgtcgtcggcggagccgctcttctggcgattgccgcc
+tactactactggagccagaaaaagaaaagctctgatacttcatctgccac
+gtcatcggagtccaacgatgttgtcatgatgtcatcatcggagcccagag
+ccgatggaggagccgattcgaaggcaaagttcaatattgaggatgaaaat
+gtgagaagagtctgcgagaagctgttcatggagcagatggatttggggga
+agcttatttggaggatgaagaaaccgaggagctcggcgcaatccacatgg
+ctaacgcaatcgtgctcaccggagagactgctcagctgctcaaagtgctc
+ctcggctcgatttcaccggctcactttgccaatattcaaaagtacctccc
+atcggctgactngcgtgttcaccagcttctccaagacgagctcgccattg
+agactattgcccagcatttcgac
+>AW057252
+accattaccgtcgttttgaagaatgatcatcgtcgacccaccaactggaa
+acttccctgcatctggaggtaattcggttcacaacatcgtctcggagtcg
+gattctcgtttggcattcaaggtgaaatcgtcgaacaatgagcactaccg
+tgttcgtcccgtctacggattcattgatccgaagggaaaaaccaagttgg
+atatcaatcgtattgctggaccaccaaaagttgacaagcttgtcattcaa
+tatgctgaagtgccagccgatgagactgatcctcaagcaccattcaaagc
+tggagcacagcaaggtgaagtcatcgtgaagatcactgccgaatgagaaa
+atagatc
+>AW057253
+caaaagatagtacaaaaagtacctatgaatgtccaattcctcagggtgga
+ttctatggattagctgatcatccaaatcatgggcttattgcatcgatttt
+gaagaggaaacgccgcaaaatcaagatgacagaagacgtaaagaagatgt
+gcgggaaggttgaagcttacaagacatgctccgacaagctccatcaggca
+ctattgttcatgctcgtcgagagtccagaaatttcaaaggatttggtgac
+tcacttcaaaactgaaccaaaattatcgtatgctggaaaatatctcaaaa
+cttatgaggcaattgcaaataagggacgggataagacaaagtatgagagc
+ttggagccagcgattagtacgctttctctgttggatgcagaacgcgaatc
+tcgtgttcgcaagcagttggacaatttgaagccgctgacaaaattcattg
+gagaagaactattggagtacgcacggttgaggagagtgtactgggatggc
+ctggaagcctatgacgatgcgctgacccaacagagcnaagatcgcactga
+ggaagccgagcgaatcactgtcaacgctcaaaaatggagaaatgatgtcg
+ccagaaactgatggattcatcaaaaacgggattntcgacagagaccaaag
+cattgtgatgctattctgaaatccggatgaagcatcctctcncatcgcgc
+catgtcgaccacacaatcagccgaggntctgagaactgtggaaccccta
+>AW057254
+cagcactgggtatttcaggcccgggtgtttctgcccaaaataccgcaact
+ggtggtaaagttggtgaaactagtgaggtcacaacacaagtgtttcaagc
+ttcgacatatggtgctgtcaaagcaccgaagattgtggctgatgcccagc
+aaggaaccaatagaagttcagaaacacttgaaaataaaatggtacacgat
+ttcatgtggtggattgggtggagtcgttggagctattcttttctctggaa
+tattcttctttttcggtgtagcatctatgcgtgataaagtctactttttg
+gaaattcctgacattgtacaaatgtttggtataccctcgactgtccagaa
+tgcatcg
+>AW057255
+ttttatcaaagtgatacgctcaaacacgtgacaatttgaaaaaatgctta
+ttgaactcctcatatccatcaccgtgctcgcgtcgattctcgtctcgtgc
+aagaaagagaagaacgaagcggcaaaactgaagccacgtggcttcgatcg
+aaaatccagtggagcaccggctccggctggagccaacttggttaacaagt
+ctggcggttccggcggaggtagtggaggatctggagatgctaatagactt
+ctcaaaagagaaatcaaagaagtaaagatggaggagcgttcggctgacga
+taacgagacaatcaacgatgtaaagtccaactggggaactgtttgaaat
+>AW057256
+aaaatcgatttttcgtcatgattcacgacacgtacaatccgtacactgct
+gttcagcgaagcccatcacaatggttcatctttgtgccatcttctgcacc
+attggataatactacaatcattacattgtcccatgatgctgtcaatttgg
+ctagtattccaccaacggactctgcggaaagtcttcaaagtcgttatgta
+gtcccggaagtaattccattcaaaagctcatctgaagttgacactcgttc
+ggctggaaatccatatttccatcatccaaacttcaccactccatctcaat
+actttgaacatattatggattctgaaggagaacaacacaagatggagaat
+aaaaacgaagaaaatttgctgactattatggatatcaagaacttngagga
+tccatttggaaattatgatatggatggagaacaggcaccaatggttagca
+tcaatccaccaaatgatgtgactgttaacaagaacgccttctcttctgac
+tcaattgctgatattcagaacatgaacatctcattttcgatcaaagcttt
+cactgctccagctatccccatcagtctcctgagggacacttantaagaaa
+tgtgagcncgaaatatgttcgagtgagcagactgatggaga
+>AW057257
+aggtgatgtctacaagaaggcagtgcagttctactcaaacatcacggctc
+caagatcaacatctgtgcttgctccagtgatgtcgtcccttgaagtctac
+attaacaccaccacgacatctgcttttgcttcagctcagagcatcaaagt
+ggctgatattcttgaagaagatgcagatgcaatccgtgtgaagtcaatta
+gaatggctggattcattgcccaatgcatcatctttttatttgtctacacc
+atcgtcacgatggatgttgagatttggaaaattaatatggactggttgaa
+aattcaatattttcagcatttcgaagactccgctgctgaagttccggtct
+>AW057258
+aattccgaagaacactcctcgattgcaaattgtaataaccctaaaaatga
+ctcatttgaacttcgagactcgcatgcctcttggaacagccgtcatcgat
+caattcctgggacttcgcccgcatcccacaaagatccaggcgacctatgt
+gtggatcgatggaactggggagaatctgcgctcaaagacacgaacattcg
+ataaacttccgaagagaatcgaggattatccaatctggaattatgatgga
+agctcgacgggacagggcanagggacgtgactcggttcgttacttgcgtc
+cagtcgccgcttacccggatccatttttgggcggtgggaataagttggtg
+atgtgtgacacgttggatcatcaaatgcagccaacatcaaccagccatcg
+tcaagcctgcgccgagatcatgcatgagatccgtgacacccgcccgtggt
+tcggaatggagcaggaatatctgatcgtcgacagagatgagcacccactc
+ggatggccgaaacacggattcccggatccacagggaaagtactactgtag
+cgtangagctgatcggcgttcgggagagagtcgtcgagacacattaccgn
+gcttgtcttcacgccggactcaacattgttggaacaaatgccgaagtgac
+tccaggacaatgggaattncaaattggaacctgcgaaggaatcganatgg
+gagatcagttgtggatgcgagatacattctgcacagagttgctgaacaaa
+attcgtgtctgcgtatcncctgatccccaaaccaaagtcaccatgggaga
+cttgaacnggagccgatgccacaccaactttctcgacttgcngnaaatgc
+tgct
+>AW057259
+taaaatggatgtcaagcataatccaaaaaaagaagtgatcgaacgaagtc
+tgaatgcgactgaaaaagcgatcgaatcgattcgtgaaggtgacctagtc
+cgtcgtcacactgttttggagctccgtgacatgcaaatttcgatgagtga
+ggatttcgatcaaatccgtggaatgatgcatgaattggatcaccaaattg
+acaaggaacgcgctgaaaattcgaaatggatgaattggaaaattgagaag
+gccaaggcaactgctgatcaggcgttggcctctacactgatggtcaagga
+cgtccagctgttggaaaagaaggtcaatattctgaaggattctgtgattc
+aagttaacaaggcattctacaagtatgagaaggatgttgacatgaaagat
+ttgatggatcaagttactgacatggtgcatcgtacggaaaagaaggagca
+agatgcgttggagccccatgcgactgatgagcaagctatcgagaaagcct
+tccgtggagcaattgaaggcctttatggcctgaaatcttccaatccaaag
+gtcatggaggaagctaagctgttggctggagaaatgcgtgttttcagaga
+tgctngctgctacaagaacttncactcgatgatctcaaaagttgcgcctg
+gtaaatcggagtttccttgatacagcagctctgagactcatttcgacttc
+tgatcctctcgctgccaa
+>AW057260
+ctttacggcggaaaaagtgaaattttcggtagtttctcgaaaaatatttt
+tcaattcaagcaagatgccagacgatgtgtgcgacgaaacattgaaaatt
+ggagtggttgttgggaaaaagtaccgagttatccaacagctgggtcaagg
+cggctgtggatctgtgtacaaggtggaggacatcgaagacaagacaaagc
+agtacgcgatgaaagtcgagttcaactcgaatgccaacgctggaaatgtt
+ttgaaaatggagggtcagatcctcacccatcttgtgtctaagaaccacgt
+ggcaaaatgcatggcaagcggaaaaaaggatcgatactcgtacgtggtga
+tgacccttctcggcgaaagccttgaatcacttatgaagaaacacggacca
+tttttcaacgtgtctacccaaatgcgcatcgggatttgccttttgttcgg
+catcaagcagattcatgatattgggtttatccaccgtgacttgaagcctg
+ccaacgtggcattgngaaataaaggctcccctgacgaacgctacttcatc
+gtgctggactttggcttggcacgccagtacatcacggataaggaggacgg
+aaaaaaagagcgtcgcccacgtgagaaagctcntcttcgtggcancctcc
+cgttattgctcgtagctatggcacatcgtttcgagccagggagagtgatt
+acctgtgggccttgtctacatngctcgcggangtggatgccagctggnct
+tgtctgatttggatgataaggcgaaatcnnggaaatgaancnaacgtngc
+cgacccagatctcttggcaaaagcccaatcaaa
+>AW057261
+gccgagaatgtgctgggagcccctggagctggattcaaggtcgccatgga
+agcctttgacatgacgagaccaggagttgccgccggagcacttggactct
+cgtggagatgtttggacgagtcggccaagtatgctctggagagaaaggct
+ttcggaaccgtaattgccaatcaccaagccgtccagttcatgcttgctga
+catggctgtcaacctcgagctcgctcgtcttatcacctacaaatccgcca
+acgatgtcgacaacaaggtgcgctcttcatacaatgcttcgatcgccaag
+tgctgtgccgccgacactgccaaccaagctgccaccaacgctgttcagat
+ctttggaggaaatggcttcaactctgagtatccagtggagaaactgatgc
+gtgatgccaagatctatcagatctacgagggaacctcgcaaattcagcgc
+attgtcatctcgcgcatgcttctcggacatttcgcgcaaaatggaactag
+cagaatttaggatgtgccgtttttgagcaa

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/2.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/2.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/2.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,563 @@
+>AW057262
+gacgagggtccgtctcggcgtggtgcaatttgctttagagagcgatcagt
+ttcaagaaacggtgacaatgaaattgatttggataaaagtttgagcttga
+agaaaccatctgcatcgaagaatttggccagtttgttggaaaagaaggaa
+gaggcaagataaactgatgtaacatcttccatgatcagttcaaaacctcc
+aacttctcctggcacctccgtctacatgtacaacactggatcagccaatt
+cgacttttatgtctgctaaagatttgcatanagaacgtgttgcagcttct
+actggtcccagatctgcttcaaaatctccaaaaggaacacttcgcgtgaa
+ggaaactaaagtcattcgtgaagttcgccaagaagatggaaagccacctg
+agattagtgagaagaaagaagaaactgtgaaagaggaaaaagtgaatctg
+agtgagagattgagagccagatcgagagcttcatctcctgcaactccaac
+tttgaagagaacattcaatcagacggatgagtcgaatattgtgactttga
+gtgcagtcaaagaaacccatcaaactttggaaatgacaccaatcatagtg
+aattctgaaacagttccttcaacttcttatggacaacgtgcttttccctg
+agagtgttcaattggatatgctcattgacaagaaacgacttgt
+>AW057264
+aggctccgatcccaaagcctgaggaagatctcaagggctccacggatcaa
+agtaccactgagccaacgaagctcgcctgatcccaaagcccatcggaacg
+ccaggaaatccaacaaatgccgaattcattccgcgagttgcaggactcaa
+agttcaagccggctccaagcccaaagtctgaaaagggccccgcggagcaa
+agcttgtctgagccgtcgccggtccccgggaataaccgcaggaactctga
+gacatcacaagttgacacaatttccccggtgccaaccaagctcgttggaa
+cccaaagtccatcggaacgccaggaaattgagcaaatgcccaactcgttc
+cgcgagctgcaagactcgaagttcaagccggcgcaggctccaaacccaaa
+gcctgagcgcgttgagaggggctccgcggagcaaagcatgtcggagccgt
+tgtcgatttctagagttgcatttggctcgccgatcgctccgaaaccacgg
+ccatcgccactccaagctccgcttcttgagacgttggctactccaccgac
+aatcgacgctcctaccgctgcaatcgagacggcaatcgagagaagcgcgg
+aantttcgtcatctcactcggaggatccttccaactcactttttcaagtg
+tgcagnatgccgtaaggaagaatcgagtggtc
+>AW057265
+ttcgctaatcattcccttgttctactgatcgttggaaggttctagctatg
+aacgtcaccagtgtcacttcagaggatggtgttaaagaattcgaaaagat
+tgttgtggaacctgaagatatcgaatatgttgagattccggccgatgcca
+aaaacgttgacttgacgcgtcaccgtatcaaagaaatcggtgattattcg
+tggctcactcacgtcgaacacttctcgtttcgttggaatctgatcaaaaa
+gattgaaaatctggattgtttgacaacgttgactcatctcgagttttacg
+ataatcaaattacaaaagttgaaaacttggatagcctcgtcaatttggag
+tcactcgacctgtcattcaatcgtatcaccaaaattgaaaatttggagaa
+gttgacaaaactgaagactctcttttttgttcataacaaaatcactaaaa
+tcgagggtttggatacgttgactgagctggaatatctcgaattgggtgac
+aatagaattgcgaaaatcgagaatctcgacaacaatctgaaactcgatag
+attgttccttggcgctaatcagattcgtcttattgaaaatgttgatcatt
+tgaagaagctcacagttctcagtcttccagccaatgcgattactgtagtt
+gataacatttcgggacttcacaacttgaaagagattt
+>AW057266
+gaaagtctttggggagatgacgagcctgtacggaggaaggagaagatcag
+cactcggttgttgggggtccaattgcgttcagaatactcttgttttcagt
+cattttgtgaacagccatactgacggtattccacgtattactgattcatt
+ccatgacgacaggctgccatatgtttgcacgatggatgtcacgatgattc
+cgatcctt
+>AW057268
+ctactacaagctcggtgttggaatgaatgagtggaagaaccctgagcacc
+ttgccgagcacatcaatggagctgcttactccaactttgacattgcttac
+tatccatcggagaacgagcggttcactttgtacactccagaggaattctt
+gctgtatgttaagagattg
+>AW057269
+aaactcgaagaacagtcttgaccaacacgagatgtattcacttttgacgc
+tcctcttcgtcctcttcttctctggaagcactctgctcgttcaatgtggt
+ggaaagaaaaagggagcaacttctgccgaaggaaaatcttcgacgatggg
+cccggctcctggaggagctcctgctgctgcttccgctcaaggagaacctg
+aagagaaggagtaatgaaca
+>AW057270
+tcgctcgcttgcgtctcttgctcgccgcccgtgcacttgaatgcacagcc
+cgtcttcagaatgttactgttaagggagttgccgtgcgcaataagaagag
+attggcaaatgttgaagttcaactctatgagaaggacacccttgacccag
+atgatcttttggacaccaagaaatctgatgctgaaggagaattcagcgtt
+tacggagaagaagatgagactcatgctattgccccataccttttgattac
+ccatagctgcaacccatctaacccaattgtgtccgcatcgcaagtacttg
+gtgccagaggacaagatcggaggaacctacgacatgacctacgtcaccct
+cgacatcaaggttcacggagagaaggaaaaatgccagtaaaaagtgcaaa
+cttcctggattttattgactatctaaatatatattttttctatatga
+>AW057271
+aagcgatcatcggatttggaagctgctcttcgaattgtgctccagcagac
+tttgaacatcgttttgcaagcgcaggagaagctccccgaggcaaatgtgg
+taccctcaactccgcccacctcaccgagcactgatatcggcgaacaaatg
+gcatcgttctggaatattccatcacccaaccctcctgcaacc
+>AW057273
+gttgatgatctataccactggaaacaacaactcaagtgagcttgtggatc
+caatgagcattactctctgtgtactctaatgtgcccaccaacatgagaat
+tgccaaacaccaccatgcagagttgactggcatctgctcattttgtacct
+gcttgccacggccacggatactcaaactcacgtcaatgcatactagctaa
+ctcttgctaccaggactcaatctgtttgatgccgaatgaacaactagtgt
+tcactccaggaatgttt
+>AW057274
+tctcccaacaacacctcaagatgaatgccatctacactgccgtccttgtt
+gcttcaactctcgcctacactgcaatggcttggattggactcagcattga
+agccgccaacgaggatatgatctgaagtggcgccc
+>AW057276
+tcgactaaccgtctccactcttcacttgctcaaatcttcatgcaaccaat
+caacgtcatgctcgctgttcttctcgccttggcttcatttgctcaaggag
+gcagatctgttgctccggctggtgcagtcactgaaccaacagttactcaa
+gctgttccagaaggatcaggacttagttcagatgtcactgatcgtccaaa
+catcgactccactgatgttgtatcaaatgcaacttcggtggaagatttgc
+ttggaagttcaacaaatgcaaacaacacttgtacatccaactctaggacc
+tttgaattgctccaatgatgattcttgctttggtg
+>AW057277
+tcaaagttcataaacggatcaatacttgcaaatgatggcaaaatactttg
+gcgccacagatgcattcaatgcaattgttcaaaaagtcgacgaaacactt
+attcaagcagaatcccatcttcgtaatcttcatgaagatacagtgggagc
+aaagccgtctgatagtttgccggaccgcactatcgttccgtccccatctt
+ctcaatcggaacgttcatgctccccggagcctcgtattgttgctcctcaa
+ttgtctgcatactctggatcatccgctgcgtcttcttcttccgtgaatca
+tattgatgtgaagagcaagtcgtatttggcattggataagaagaaagcac
+tgatcatgacttcgctcaagtcaaagagagttatgaacgatagtgatgtg
+acaaaagttcagaaattgatcgatgacttgttcggaaaacaaacttcttc
+gtcctcatcttccatgtccatccttc
+>AW057278
+aataacctctcccaacaacacctcaagatgtttgccatctacactgccgt
+ccttgttgcttgctctcgcctacactgcaatggcttggattggactcagc
+attgaagccgccaacgaggatatgatctgaagtggcgcccatc
+>AW057279
+tcgaagtgatggattctccaacatcaccattgacttcttcaaatagtgga
+cttatcactgttctggaaagaggagtttcttctgaagacacattgattcc
+ttctgttcgtcgtggtgtcattccagtcaacactcttcgttaccaaatag
+aaaagcatctcgagatgtgtactccagcttctgaacaattgtcaaagagt
+tcggatcccaacatctcctcgatgtacgttttccatcaaggaattcaagt
+aaagcaggaaccaatcgatgatgaccaagaggaagagcaacaagtacaaa
+agcagcttgtattcaaaatcgagggctccgaagacgaagaagctgtgaag
+aatgagt
+>AW057280
+aaccctttcgactacaactaacatggacatggactagcgatcatcggatt
+tggaagctgctcttcgaattgtgctccagcagactttgaacatcgttttg
+caagcgcaggagaagctccccgaggcaaatgtggtaccctcaactccgcc
+cacctcaccgagcactgatatcggcgaacaaatggcatcgttctggaata
+ttccatcacccaaccctcctgcaacc
+>AW057281
+ggacaacaagactgaggaagcatggttctcgttcaatgggaaggtgatta
+agcagctcgggccacagctcaacgagatgtacatcatcacgcgcaactgc
+atcggaggaccaccacattgcccatgtgctgtgtgcggagctgctccacc
+accaccaaagccagtgccacgtgtcgagagagacgaatggatggacattc
+gtgagggagatccatggncgactcgnccaacttgtcaaggctcttgacaa
+gactctggacacccttccaggagtcaatccagaccaatatgttgctctct
+ggtacatgcaaggtgaacctgttatgggtcgtgtctggaatgaaggagga
+aaggtggctgccaacttctcgtggttcaacaacgagtattgcaagaatgt
+tggatctatccagcttctcatctatcttccggacagtgttcgtggttntg
+actatggatggatcccattcccggaggctgctcagtttggagacaaagct
+tggcatccagttcatgtcaacaaccacaagggagatatctncgttggagt
+tgttaacgttgctggaggaaagcagattcttgccaggggtgattgtccgt
+aacgagaagtatggttatggataccaaagaaaggagcattcttgcaa
+>AW057282
+agtacgctctttaacccatgaattgttgtgtccgggatgtcgtcaatgtc
+tgagtcggatagttatcaatcgagccaattgacaagcgagcccgatcttg
+tggcgctggatgccaagataatggcggtaatggatggaactgaagagctg
+gagagggagattggcaagatgatggctctgcagcaagcgattagtgatta
+caagaagagccatcaacatagaactcaaagaaccaaggagaagcttacgg
+atatgtctaaagacccttacaatcaccgggaagaacattgttaagtcgtg
+tgaagagcggctcaagcaaatctcggatgtcaccgagccatacatcaaca
+gcggaatctcatcggtggaagatcacacctcctcggttgtggagcgctgc
+attcaaatgctcggagcaatttctggacttggtggggcaatgaagaaaac
+ggagggattgtcttcgaaagtgctcgagcatcacagaaagctcgcgatca
+tggagcacaagcaagccgatgcaatttctcgctacgagaaagccgtcgga
+gttcagaagctcgtccgcgagcatccggctggagacccactgcacgagc
+>AW057283
+atcgatcatagtgcaaaatggagatttaatctaattgcattcaaaatgaa
+gagcaaaagaaggaaaaggaacttattgctgaaaaagcagcaaaggctgg
+atttgtcaaccgactttatgtgaacatcggacaaaaagttggagttgtcg
+agctgactaagttagagccacgttttgagaggaacatcgataagctcacc
+tcctaccacaacttaatctacaaaattgtgaatgtaatcgaacttcaagt
+tcaattcatgcccaaggcaatggcaaagaaagcagttttgtgtgctcccg
+gcgagattccatgggaagttcttggaggatggttgaattatttgggaaaa
+tatcagtttgatgggcaacattctaaaatgctggaaaaatacagtagcgc
+ctgtggaagaattgctcaaaaggagatccaggtgcagaaaagaactcgtt
+ctcatctgattaaaaagatgcgtttgtacactggagaggaaagtgagata
+ttgaattcgaacgtagaaaatttgaataacttgctccatgcaatcgatga
+ttctcgtcaccatgtgaagtcttcacaaaccacgaaagaggtgaaagcaa
+aaggcgaaacgtatcgtaaagccatcaatgctttcaacgaaacggcaaat
+gaagttcaagcattgattgacgaagttgcaatggtttgggtgccc
+>AW057285
+aaaaactacaacttcggagtaatggaaaatgagaaaagtaagacggagag
+tttgaagaaggacgaaatcgatgaggcaaactcagaatcttcaaaagtgc
+cactaacaattgatccagaggaagccaaactcccaaatgccggcggaaaa
+tcggagcatatggtggtcaacttcacttcaaaacgcatggcgatcaaagt
+gagatgtggcaatgcactatttcgtgttgagccaactcacatgatcatcg
+agccgaacaagtgccgccaactgacaatcaatcggatgcccggaccaatt
+caaaaggataaagcgatcgttcaatacctccaaattgaaaatgatgtgca
+agatccgaaggctgcgttcaaagcagcggacagtgctggaactaagattc
+cacacttgaagatcaagctggtggccggagcaagtggaggtcgtcagatg
+tcgagagaggtggtggatgagtagtttgggaaaaaaac
+>AW057287
+gaaaagaacatgaaaataattctcnggctgccgctagaattcagcaagtc
+attgcgaatgcagccggaattccatcatgtgaatatggaggaagtctctg
+gtataaacaagaagaaaggaataaactgaaggaaaccctcaaaactcagc
+atgatatttctggaagcagaaataatagtgatagtgggatatctggtgga
+ggaggaagcagtgataatttgagcatcgacgatttcgaatctgtttcgga
+gaaccagtgtgaggaaaatgtgataccggctatgaactgaatttgcacat
+tgtaatttttgttattaaatcatattgaaaatt
+>AW057288
+caaagttgctcaaaattttcattgccggattgaccttttagctgcattgc
+tgacaagatcaaaagaatggtcaaaggaaaaggttcaaaaagatcaacac
+caagtttgcgcgccaagaagaaaactggcacggatagacagaagccgtct
+gtaaaacaaaatgcatctcaaaactcaaagaagagtagcagacagaaaaa
+gacccccagtgttggaaaagaacgggaacaagcaacggataagaaacgag
+aaattgagaaaaaaccacaggaaaagactgctttggatgagcagcaaagg
+aaagctcaaacggagactatcagcaacttggaaatccttccggacaagaa
+tcctgctaaaatggatgacggttatgaagatttcggtcctggtgcagctg
+ctcgctaagt
+>AW057290
+tcgccatcaccgccttgctccgcgattgccattttctgcggaggaaagaa
+aggagccggagaatctaaagaaaagccataggaggatgtgtatgaggatc
+tggcaccaggagataagaag
+>AW057291
+cgatcaaccaccatgagttttgatgaaattacttgaccttcggaaccaag
+aaccgcgatcaaggatatgatttgctcaaagcgcgtctcgacatgtgatc
+gttcggtggaagtcttgtggagacttgctcaagtaattcatgagaagtct
+gcatgtgttccaaaagctcaacgtaaggcaagtgtcaccgaaggactcaa
+gtttgctgaagaagctgtccagaaggatccaaaccatttcaaggcgctga
+agtggaatgctgtgttgactggacaagcaaccgaatatatggcaacgaaa
+gaaaagttggaatgcagtaagaagttcaaggaattgctcgacaaagctct
+tgctaaggagccaaaggatacggctctcctccatttgcgtggccgttaca
+agtactcggttgcatctctgacatggcttgagaagaagcttgctgccacg
+ttctatcagcaaccaccatcacattcctatgaagaggccaacgaggattt
+ccttgctgcttataaggtcaatccaaaatggatggagaacacattttatg
+tgtccaaatgctacgtagcaattaaagacaagaacaacgctcgcaagtac
+ccttaccgaagtgtgtgacattcgaaccgtaattccgacgcttgaaacaa
+gagttttgccgatgatgccgaagcagatgttgtcttaagctttaa
+>AW057292
+tttgcgcattttgttgctgctcttctagcttacgtttaatctctctctgc
+ctttctgcctccttctttgcctcaatctcgagtcgttctttctcttcaga
+ttctcttctttttctctcgatttcagccaactcatcgcttttctgctgtt
+ttttgagatcagcaatcattgaatcaactcgtttcacataatgctcataa
+gcatttt
+>AW057293
+aatttcaaagtaataatgcaaaaagtttgtgaaaaacggatcgatagcgt
+cgtggcgtcggctgtcgagggttctggaatgttctacgcggtgagattcg
+agccgccgccgtcgaaaacttgctttgacgtgactctaaagacgtacgga
+ccgccctacaccgagtacatcgtaaccgtcgcaattccgccgaaattccc
+gttctcgccgccggcgatcacttgcaaaactgataaaaatatgaaattcc
+tgtttttggaggaaaatcaatggaaaccgtctaccggaattgttgcagtc
+cttatcgaagcctgcagtgtgatttcgcgtcgagacctggtcccccgtgc
+accggttcttccacgcatccgtccaccacaagcacgtacgccaactagtg
+cttcgcctgcaaagtcgccaca
+>AW057294
+tgagagatggtaaaaccggcaaatacttcttcttcacttatcactaaaag
+agttttgactctcggtaataatgtcacaattgatatatacgatcatcatt
+attatccaatgtggttttggattgtaatttctgttggatttgtcttctgt
+actctgagctgtgctgtttggtttatgtgtgctatgtggagattgaaaaa
+gggtaaagaatgtaaccatccatcgtttgaagcgcgtaccgttgtgacaa
+aagatggagaagagaaaccggatccccaaatggctcaaaaatcagaaaag
+acttgtaaaaaattaggtgcattgggtgaagctgaatcgttggccaagag
+cttcaaaagtatcagatcgaaaaagtcgatgaagtctacaaaatccaaga
+aatctgaaaaagatgtaggacatgatgatcataagaaggaagatgttcat
+ggtgatcaaaaggatgataataaagatcgaaacgatggaggacgagattc
+tcatgttgttcagatggaacataattctgaagaggaacacgagccgagtg
+gatttaaaaagctgggcaagtctttcttcaacttcaaaaagtagcaaact
+ctcttcaccttcacccaaagaatagc
+>AW057295
+atcaactgacaccaacacatcctgcttaatcgacatctatcatgtccagg
+aataacactcaaatgcacatcacaagttctcaattagaagacggatttcc
+atcgattaccaacaattttctgacagtgacagtgaacttcaactacgatc
+catcgaatccttctgagccaccaacaaaagtcctggagaagatgagtgat
+ctgattggccaacagattgcgaatcttcaaaagggaaaggcaccaaaggc
+aaacgacgacaagagcaaaggttccatgcctaccgttgaattctctcgga
+cccagtccatgactactcaacaaagtttggaggacgatgatactcaacga
+gaaaatgttcctcttgaaaagaaaaagaaggggaaatacagttcggagta
+cgctaatcttttggtggaaaagccaactcgttaccgtttggttccatcga
+aaaatgtgaaagtagttccagaagatgagctcccgaagaaaaagtttgac
+aaggatcggaagagaagagagtacgtggaaattggaaaactgtattctaa
+agaaccaattattgatgaaagtgaagttttgaaaaaggagaaagggaata
+caaagagaagaagca
+>AW057296
+tccaaatgacaatcgacatgaaggtggtgtttcttttaaatgctcaaagg
+ctcggatcagccggtggacttggtccccaagaaggtcgtggttgccggac
+aggaaatcacagttgattccaagtcgaagaacgaa
+>AW057297
+ttaaagcactgtcaactgatacgatttcattcaatacctggaaaagcgat
+ttccttccatatgaagaacgttgtctgcttgagttatgaccgtaatcatc
+ctattcaatcaattcgaaagttccggaagaaaatgagcagcaagggagta
+atggagcaatcaactcagccgaaggacagcacaaaggaactt
+>AW057298
+taaccctttcgactacaactaacatggacttgttcaagcgatcatcggat
+ttggaagctgctcttcgaattgtgctccagcagactttgaacatcgtttt
+gcaagcgcaggagaagctccccgaggcaaatgtggtaccctcaactccgc
+ccacctcaccgagcactgatatcggcgaacaaatggcatcgttctggaat
+attccatcacccaaccctcctgcaacct
+>AW057299
+cagaatccccatccaactaccaccggctggcttatcaatggacgagctgg
+aagtattggttcaacaagcagtcgctggtcagaatatggtcatcactctt
+ccggttccagcccacaagaagttgattgtcgagcagatcgttgtgaagtg
+cgatgaacatgttatcagcctgccagcactcattgtcaaacatcgttagg
+ttcttcagtagccgaactcttgtcgaactctctttctctttt
+>AW057300
+gtagaatctaggggaacggttcgttctgctccaatcgcctgccaccgtat
+caccgaaatgggaccgaaggatgactgagagagctggttctacttacttc
+tcgatctaatcgtacctagcggtctaccttggcgtaagctctctgacaag
+catgaggtgctcaaggagaaggaggaatgccgaaaagataagcgttcctc
+gatgttcgccgggctcacacagactgattatcctagtacagtttgtgact
+acattgacggaagagcttaccaggatcgtgtggactaccagttcatctac
+aagaatcttgcagaggcttgcaaggtttgtaatctt
+>AW057302
+tcgttcctcttctcgaaggccctcgtatacggattgttcggatctgctga
+gc
+>AW057303
+ccctgtcaaaagaagtcaaacccatttgatttgcacatctccactggccg
+cgaaatcgttcaacgcaactttgtgttccgcaacaccactggcaaagact
+tcctgctgaaattgcatgctacgaatggagctgtcacattcccaacggaa
+gtttttcgttctccaccactatcgcatcgcatcatccagttccgtgtgaa
+ctcatcaaagctctctcaatgggacaagatgaatctttcgatcagaggat
+acgtgttgccgatctatgcgaagagtctgaagcagttcattgatcagaaa
+aaaactgcaggaactaatgagcaagaggcattctcattgtctgtcaagtt
+cacggatcagttctcggctccgcagacagtcatcaacttgccaggatatg
+ccacgtgtatcgagtcgactgatcatccggttgacgtggaagaattggac
+actacaactgcagtcaacatcgaaagagatgtctccactgctgctccaat
+tggttcaatgatgggatttgttgatgagtacaaacgtcgtcaattgaaca
+aaggatgctggttgtccaactacatctgtggaactgaaaagcaaccggag
+aagcagtcaatgagatcttctcgtagatcaagccgttcgtcgaatcgttc
+tgcaaagagctcaaaggcttgccgtgttcaagcctaat
+>AW057304
+tcggtggcgagtgcgtgagttggtcaataaagttcagtcgactaaagcaa
+aaagtgaaagaaaagtacgggcagaagcaaggcgacagtgcttcccgccg
+aaatcgacgaggcgttgatctacttcaaatctctgaagccccgcgtccag
+gatctctacaagcacatggcaaatctgaacgacgtggcaaattggcaagt
+gaaagccaacttttcaggcccactggagaactatgctctgctcggagatc
+gtatcaacgtgcaaccgttcatcaattgtgttgacacgcggatggaggca
+gatgtcgagtcgatggataaggggctggcgatttgtgagcggtacaaggc
+gttcacacagaacgagagtaagcttcacacaaatacaattgccaatttga
+ataagacacgtctcgatatggatagtgctgcgaacaagtacgccagcaac
+gacactgacgtcaatcggactcggtttgatgatgccactcgtgagtttga
+ggtggcttgtgatcggatgcgtgagttggcgataagtattcagacaattg
+aggagactcattccatgtggcaagatgaattgatgcgggagataaaggcc
+ggaatgaggaaaccgaattga
+>AW057305
+tactgagaaggatgaaccgtcagacttctctctcctcctcatcgtctcga
+tccttgccgtttttgtccaccatggatttgctgctgctgaagaagagaag
+aatacagcttcagtcgtcagccctgctccggactctgaagcagcccaacc
+tgctggaaacggaaccgaaacaccaaaagatgaggtgaaggatgaggcac
+caaaagaaggtagtgaaactgaagcttcaccagaagccaagacaaaagga
+tctatggtattccatgcctcttggagcccattccacagttggtctcgccg
+gcattatgtgaagaagtctgccg
+>AW057306
+ttttttgagattaatttaatttattccacagtaaaagttactcaaagagt
+ttcatagccgatggtcttgaaatcgcattcttcatgactggggatggctt
+cttctcacgttgctcccgcttcaaatcaccgatgattccccacatatcga
+aaaccattccatccgg
+>AW057307
+ttgtccattacgagatgtattcacttttgacgctcctcttcgtcctcttc
+ttctctggaagcactctgctcgttcaatgtggtggaaagaaaaagggagc
+aacttctgccgaaggaaaatcttcgacgatgggcccggctcctggaggag
+ctcctgctgctgcttccgctcaaggagaacctgaagagaaggagtaa
+>AW057308
+ttggaaaagaaggagacaaagaagaaaggttagtataacaagaagagcaa
+gaagaaggcgaagaagggaaagaccaagaaggttcgaaaagcagacaagt
+acgagtctcaaaactttctgtttcgagtggaaggagccatgttctgtgcg
+ggaattatcgttgctatgattatgctgttcgtcatcattatctacggaat
+aatcacttcaagtcaaactggaggacagttcaacagatacatggccccac
+tattctgattggatcaggacagagaaatgtcgcaagagacaa
+>AW057309
+acggctacgagttctttgcctccaagaagatggtcaccattttctcggca
+cctcactactgcggacagtttgacaattcggctgcaacaatgaaggtcga
+tgagaacatggtctgcactttcgtcatgtacaagccaactccgaagtcca
+tgcgtcgaggataagctctgcaaactgtcaccaccatccaaccaaccaac
+caa
+>AW057310
+taccgtcttaccggccgtggctatcttctgtggaggaaagaaaggaggaa
+cgaaaggagagaagaaggattcagtgtacgaggatcttgcttgcagagac
+aagaagtagttggaacttcatcgacaccaccaatcaacaaaagacgtctt
+caatgctttttcatcgtcttcttcta
+>AW057311
+gaattccaagtttgagaaaatgaactgcttattctttccgctttcctcct
+tgttgccatctttgtcatatccgatgctgccgttgctcaacagcaggtta
+aggacggagaaaaagttgaaatcgatgctttcaagggagccaaggcaatc
+aagagaaccgttgccggtggagatcaaatcttccaccttgacggagataa
+caagggatcatttgttgatgctaagggaaagaagattgagtcaaccaatt
+atgaagctaataacggaatccttatcattaagaagttcaccaaggccgat
+gttggaacctactccgagcacccagctaaaaacaccgaaaccaagcacgc
+tgatggatccatctccgctgttccaggactcactcttgatatctccctgc
+aataaacaaa
+>AW057312
+gcaagtatatccagcggtttgactctgtcagaccttatggctacttagag
+aaatcatcgagcgtttgccactctaagagcctacgatgcgatgaggatcg
+ttgttcgacatgattgtgctcctagagctccttatgaacacgctcctcga
+caaattggctatgatgcacctgtttatggatctcacatgcacgcagcttc
+tgtcgattacctactaactcgacctgttgccggtgccaaagctcttgacg
+ttggctcaagaagtggatatttgacagtcagtatggcaatg
+>AW057313
+tcgaagaacgttgagacgatgactgcaattgattcagtcgaagtgatgga
+ttctccaacatcaccagtgacttcttcaaatagtggacttatcactgttc
+tggaaagaggagtttcttctgaagacacattgattccttctgttcgtcgt
+ggtgtcattccagtcaacactcttcgttaccaaatagaaaagcatctcga
+gatgtgtactccagcttctgaacaattgtcaaagagttcggatcccaaca
+tctcctcgatgtacgttttccatcaaggaattcaaagtaaagcaggaacc
+aatcgatgatgacaaagagcgaagccaccactccttcttcacagcttctt
+cgt
+>AW057314
+cgtctcctcaagtccgccgtcgcccaatcgaacaatgtcgtatgcacaga
+gtttctacgccgatcagaagaaagtcgagaagccagcggagcaagcttcc
+tctcctgccacggctgccttccccgctaccaccccaatcgctgaggatcc
+tctgactccatcccaaatccaggatgccatccgtctctaccgttcggtgc
+tctccttgtctgcgtctgccccatcatcaccggttcgtcaagcagcggct
+ccagttgctccggaacagccgattgtgcactcggactactatggtggacc
+atcggatattccgttgtcctaccgtgttaagtacaccacgacccagcagg
+cacctgcgtctccagctccggacttcacagagcaacagttgatggctcag
+ctgcaggctcttcagatccagcagcagcagcagccggcaccagatgttcc
+agtcgtcgagccagttcaacaagttcagcaaaagccaaaagttgctccga
+agatgcttcacaagatgtatgacgatgaagagtctgggtactgcttcgct
+cgtaaaaaggatgtggagcaagaaggagaagttccggagaatccacgtgg
+nccgctccagttcaccactttccgataccgacctactcgggctcctccgg
+tcaactatgaagccttccggggtttttttccc
+>AW057316
+catcatgttccttcgcaccctcgttgcccaattcacaagtttctgccatc
+agctccatcaccttgcaactctgtcaagagttctgtgctggtgtcaatgg
+tggtgaatcttacgcattctgctctccatggatcagttttgccactcaca
+gaaacaagacttgctacaatctctgtgttcataactgtgctgctgtctat
+gatggttcctgcacaactgataaagacttcagatgctgcttgaaaactac
+tccagccaagaaacaagaattcaagatgagtggttgcaacaagccttaca
+acaatctttaaatgagttctctggtt
+>AW057317
+tctatgcaaaggattgttttaacattggatgggcagcagataaatcaaga
+cgactggacatgatgtgcgtatttaacgatgctctagacacgagtggatg
+ttgttatagagacacctcaaacttctgttcagaggggatgtcagtgttgc
+catctcaacgatgtgatacccttgatgactgtaatatgcgaacaaatcaa
+actgcgcaaagatggtgtgatcccgtttcaaaatanttgctgtccgattt
+gaaaaaggaacaagcacttttatgcccggacaacagtacagctttaatga
+atgaacatcattgtattaactacgacgaaaaagatatttggagtggaaag
+tgtaagacaccgaacggaatttgcaaatatggacactgttgcccatcaaa
+taaaactgaaaaattgttacctggaactccatatcgcactcatcaaaagt
+gcactaacaaaacaattattcgtgatgatcaacgttttggatactgtgat
+cctaaaaccggaagggtattcataatgagtgaactcaattttcacgggca
+gagaaacaaggaactctcgtcatactgtaatactgcaagagattgcggtc
+ggtcgtttggaatggataacgtatgtgttcgaatgaataaagaacgctca
+atgctttcttcaa
+>AW057318
+gcgactgcgttggatcaaaccgacatggttcaaattcccaacactccaac
+attggttgccgaggaaaatttgaaccacaaacgctctaaagcaaatctcg
+tggtggctcaagagtctgtcgcaatggagcacatcgctgctcatcagctt
+ccagctcccgagccacgtcatcgtggaccggcgattaaggataagccgga
+gagaaaggatcgtcttccgacggttggagaatattttgaaaatgataaag
+gagatcgtttcattttgcgtcagaagctgggcgatggtgcaatgggacat
+gtttttctgagcatttttggtggcagaagtgttgcaatcaaagccgaaaa
+gtattcaacagggatgcttccaatggaaattaaggttttgttgagtatca
+gacgccacaatggagttcatttctgtgatatcattgattatggaaccatc
+cgtcgtgaatacaactacatgataatcagtattcttggaaaagatctcta
+ccgtcttcgtgccgaacaaccgactcgttcattcactctcaatacgacta
+caaagattgctcttgaaactattgaagctattgaagagcttcacaatatt
+ggatacctgagccgtgatgtcaagccaagcaactntgctccaggacaacg
+cgacaatggacagcataagacaattttcatgtttgaactttttggg
+>AW057319
+tcagttctcatagtttcttgagtcattcctgggtgtgatttattaaaaac
+atcctcgcatcgtggaataattaaacatggagagcaagccagttgcgaca
+aaccaaaatacggaattggagaaggcaaagctgctgaaaaaaaaaactcc
+cgaagaattggcagcactggcaagcaagaaggtattctcaacggaatcag
+ttgaagaaccagttccagttactcgtcgtccaagtgaattgtcaatgggt
+tcattgaataatcaattgaaagaagttcaaatgggtaaaagcattcaatt
+cggctgagaatcatgcatataaaactgccaatgacggagagaaaaaggac
+>AW057320
+aattccaatttttcgaaacgatggcngtcacatatgatacacttcgtgca
+gaaattgaagagaagaaagaagactcagtttccgaaaggacaactcgaaa
+tggaaaaatactgaatgcaaaggatgaaccagagtttggaatgaatatta
+ctccaaccacgttgttcttcaagtacccaattgggggaattggttattca
+ttctttacagtcaccaacacaacatcggagaaataagcattcaaggtgaa
+atccagtgacaacacttggttccgttcaaaaacccagcggtcggttcatt
+aagagtggtgaaaaagttcatgtgagagtcacattcaatagtccagacgc
+aggaaaagaaagacccgaaagaggctcagacaacaaaaatcatgttgcaa
+tcttccacgtggcagctggtgacgcgaagacgtacaaagaagcgtttgcg
+aagaaagcggcggacggagttcatcatttctttgcaagac
+>AW057321
+ctggaggagactatacacaaagagcaggaaatgattcaaacaccggtgat
+atctgcctcatgacgttccaagcggtggcaactgatatggatgctgctcg
+tgacttctgtaacatcaaagctccgtggcggcttagagaagcaaagattg
+ataaatcacaagacagtattccggtgattatctgcgacgttgaagctaca
+ttcacttgcaatgccggatggattcaaatgttcgggtattgcttcaagat
+gagtgaggtccatgatcgctacacacgtgaaaaagctgagcaatactgta
+aagatcaagctgggccaagttttcaaggagaaattgccggcattcatcac
+agatacatcttgacgccttggagaagct
+>AW057322
+tgccaaaaaatcagcgaagaagttaacaaatgatgacgtggatgttttga
+cgaatcccagcgacgaatacgtggattctttcatgaaatatcacggaaac
+ggtcgagcagtattcaagcgtgaagatttggctcagtggcgcgatagctt
+cccggactacaaattcaaagtgatcagcttaaaaggagcgccgagagtca
+tcgcaacggcacatttgtgcacatttcgcctcattgatccattcatccat
+taaatccgataatggtcattgggttccgtatggatagatcctggatttag
+atctcccggaccggcaaaacttcagaatgatatgtgtcgagtggaaatgg
+acagggaagatgataatattgtcttacagattaaccaaccggtcaaaaat
+ctctggcacatgctgtccaagcggtaagaattcttgtatctcggacacaa
+agccggcgacgttggctacaagacattctacagtgcacacgacgtcgtgc
+tccctgagaacttgcatttgtctggaattacagtgatagatgctcgagat
+gctccgaagagagatatcataaattatgatcagacactccatccgtatca
+ccgagacaagtacatcatctatcatatgtacgatcggtatggatctcgtg
+aagtgtcgtatgatgatgtagtgaatcgtgtatcgtcatctggcctagtc
+tggtatctatgatcatcagatcagatgtaatttatggcctaattatgctg
+tatatacacgtgtcgctacagcaatcgtt
+>AW057323
+gtcggatccggctgcaaccagcccctgttcatccaatctccaggttgtgc
+aggctgtggtggcacccagaagttcccagtgatgaccttcaatggtgtgc
+tcgtcggagaaattgtccgcctctacccgggattcatgcaggagatgttc
+accgatgcggacacctatattgttcatttcccaatggacatgccgccaat
+cttgaagcttctgcttgtcacctcggttttcctgatcgacttcacctact
+tcgaggatcgtaaccaggatcaacatcgtaacggcggaatgccttaccga
+acgcatagcagcttctaaacttataatg
+>AW057324
+ggaattgacaatcaggttcaatcctcacgcgatgatcgtctggtgtggct
+gacgttctgcccaactaaccttggttcaactgtacgagcatctgttcaca
+ttgctcttccaaaactcagcgctcgttaagactccaagcgattctgcgat
+aag
+>AW057325
+gtgaatggttccaaggagacggtatggttcgtcgcaagatcctccctatc
+gag
+>AW057326
+ctggatctatcatgtcaatggctagtgtttatgcaagtgtcgttgctcca
+gatctgacaatctaccatggagatcgtaagcaatcctaccattttcgctg
+acaaggggaaaatggtcgttatcaaccggaaaaatggggtgattgtctac
+atgcttcgttgtgtcgacggccgacgtgtctacattgagaaatcttccga
+aggagccagtcttattctgactaatcaacgtggaaaagtgat
+>AW057327
+tttttacgatacaaaaatcactttactcatttacaaccaaaacagaaatg
+atattctcgacgagaccagactttcccttgaaacctaatcctcactgaca
+ttgcttgtcttgaaggtgatggaagtggtttcacggctatccaaaggcat
+tgacactgacttgctgttcttgttctttctgattttcagactaccctttg
+aagatctatcagtgttttccgacttttcagttggagacttttgaacgatt
+cgatggaagaagacgagttggatttggaggcagctctggatttacttttc
+tcaatatcactcattgaagttgatggggccttcattttgggcttctcctt
+cttcgcatcaccagcctttttcgttttcttatcaaggcatggaaggttca
+aagcagttttgcggaaccacacaccgacaacagctgccaaaatgttgacg
+gctgtgataattgcaagtgcgatccaagatccagcgggttgattgtcgaa
+gatcaagtagacgagccagtagacttcacacgcg
+>AW057328
+taacgtgtaatattttagacgcaaattaaccacttccaattctggaatat
+ctggaatcattcagaatggtagttgttccagcaacttcttcacttccagc
+caacagcacaggttcaccattttgatcgtccctttggctatcgtcatcaa
+agaagaacttcttatcaccaatgaaatgttgaaccaaaacccttccgatc
+ttgaaaaggggttgacagaatgcacatagcaatactatattgccacgttt
+ttggtacattaacggatgtttctccttcttttgctccaggacataagcat
+gagcgagcaatggccattccgtctcaattcgattcttttcgtgagtgatt
+actgcgatagagtaaattaccgatattgctgatacaataaccaagccagt
+agtttgtacctgtccaacaaacacgtcaaattcacaaatctcatgaactg
+tgtagaaaatcagacaaacacaagtgagagccggcaagaggatcaaatac
+atgctatcccagattgaatagtgagaccatttcaaacggtgacaaatggt
+gactagatgacttgtatcaatcagatgatcgttagatctcaagttctcag
+tgatactgaagttcga
+>AW057329
+atgctaaagttcaagtattagacggactcagtgggatgatcatcagacgg
+attatcaagtacaacacgcgaaagctctgtctcattcttctcttcttttg
+gtggacggagcatcgaatgctcacggcactgagttccatcaagacgagta
+atttgttgcaagagatgaataacctttccacgataagtatccactgatgc
+aattggattgaaatcgaggtaaacactattgagttgtggaagttcaacca
+attcatccataatgctccagttatccaacttatttcctcttgcccaaaag
+tctgtaagtgtcttcaattgatggatattctcgaccttctcaagacgatt
+ctgattgaaatccagaatttcaagaggaagatgctcatcgattccacaaa
+cgtacttgatgccattttgagccagataaatctctttcaagtt
+>AW057330
+tgtatctcgagctgctcacattcatcgatgaaacatcttctgcagttgga
+actgcgaacatatcaaatacatcttctgcctgattcttggctgattccac
+tgccaggaagattgcacctcttagcaattcaccatagacacatacacacg
+accatgtgacaatcttgcatccatagtctatggagcctagaagcaactcg
+ggcgctctactatatcttgcgacatgatagctgtgatgtggtgtctccac
+tgatgcagtcgagatgacgcatatcacgagtctcagacgacagcgcctgc
+attgtcagcagatctgtgctcgatgcacg
+>AW057332
+tatttaatttcaataagactgccacttctcaaattgaataggactgccca
+ttaatgacgagttttgccaaattcttcttcttcatggacccatttaagat
+tccagacaaatcaataacatgatcaccgattgacaaattggtagaaaact
+ttgtgcgattctcaatgttcaattctggtactcgattgcaattgttttgg
+aggaattggagcatcatcaatacatcgttcacgttttttggacttttcac
+tattttttttcttttgcttttcaagaggctgaagccacagcttgacgcgt
+ccatcagaggaaacaaactccattggttgataggcaagactattggattt
+agcagttgtagatgcatatgaaaggtcgaccgacaccattgaactcggag
+acaacgccattttaggttttccagttcccgcatcgtaagttgcttttgga
+acaaggtcatctggctttgacttcttcagtccacggcacttcaagtgttt
+gatgtcctccaatggctcaattgctagtttttcaatgtgcttacngtaga
+gcgatttgttggaacaagatgaagcagttgctgcacttatcgaaacgtca
+tttgagcctctgcttgccggatgtagatgcacttgggaacaggagtccgg
+ggtcgtgatcttgtcatagtgctatagtattcagaatacagtttcaatgc
+tcgattggaaccttctggctgctggaa

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/3.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/3.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/3.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,899 @@
+>AW057334
+tttcatccatcaaaacatttttattcaatctagacagtgctattcatgac
+atcttgctgaattcttatcatcacttgaccttgcgaagagcttttccttc
+atgatcatgtccgatctcgtgagtttgagtttgctcgatgatagaaacac
+ggcgagcatttggaccggtagacattgcactcttcgctggaggcttcagt
+cctggaagagcttgacgatattgcgaagcaatgcctttcacatctttcgt
+tcatttggactgatggtggttgcatgagattggtagccaacgcctgtttc
+gcccattgcttggcatcgatcgaatcagggcgctcgacgacaaaagtgac
+ttttcctttctcttggatattcttgacgagaagatctctggcaacatcct
+tgtcactgactggaattccatcaacatcacacaaatgatctccaagaaca
+agacacttctcggctagtgatccaggatcaacacgtgaaacaagcacacg
+gttctggaaatgcttgattcccaaaccaagttntggtccattttgaaccc
+agacaagggttgccaactcataaacgtatccctcacggcgttgaatgatc
+tttgcacgatcctcangaatatgaacacgagcttctaactcttcagcctg
+tgtatcgtcacggctcaccgtgattattgcacatggagcagcacagcgaa
+gcgcacgngaaaagtcgttacaatncttgcaattntgtccgtgcactttn
+ttacttggcaccagtgctcatcttgctcagaaatgcttcagcctggatct
+agtgatacgagcttg
+>AW057335
+ggcggtggttgatcgggcttaatcacattgttattcgcatccaaggctga
+tccctgatcatttggaacagttccagcgtttgctggaagctttctggaca
+tgctcttctccatatgagcaacaaatggatcaatcagttggtggtaaatg
+acgtgggatccgtgagtttctggcaagtagaggtacaggaaaaatgctgc
+tttgcaacccaataaattgggaaatatgacataatgattgcggcgaagaa
+gtcgataattgaaaaggcaccgaaaacagtccaatagatcaaccacattg
+tatcatcgtcagttccttcagttcggatcgccttaacagaaacataagcc
+ggataggcaactccaatcaaattgcacacaaactcggcgccacttccaat
+aatcatgtagacacaattgagcccgatgagcccatatgcgagcatttcac
+gtttcagaaccagtgcgtcttccaactttttcacattctctttgtagaat
+ggtccatgatcagcgtacaagaaggagaagaagtcactctgagggtctca
+aaatccttagcgacgttgtccatcggctggtttgattccacagattcttg
+tcatcgacat
+>AW057336
+ttttttccataaaatacttcattcttaataaaaattcaaattcctcgtag
+tttatcactgatccgacgcctcccaaattcgacccatttcaattgtgaca
+aaactcgtgttcttaagatcttcccgagcatccataagctctttgagctt
+gtcacgtggctccaggtagtactcagtagagcccatatggtacgttcccc
+agtgtattccaatactgttcttagcccgaatgagtttatgaaccctcaat
+cgcctcttccggattgatatgctgggatttcatgaaccatctgggctcgt
+atgctccaattggaatagctgccagatcaaaaggtccaagcttctcgcca
+aacttcttaaactctccgtcacagtgaccagtatctccggaataatagaa
+tcgccgattctcgccgatcaccgcccagcctgaccataatctgtggttcc
+ggtcgaaaagtccacgctgtccccagtgttgagcaggcagacaccagatg
+gtgtaggtctttccgttcttcacaaattctgagctctctccccagttgag
+ctcggtgacagcggtggaggatccgtcgactccgatgccctggccttcca
+tccatttcttcattccagcggaacgaaccacttgattgggggttgcgatc
+tgtgatctcttcacgcgtcagcanccaatgatcgtatgatcatgcgaacc
+actgcaaaatc
+>AW057337
+ttgctgaattcttatcatcacttgaccttgcgaagagcttttccttcatg
+atcatgtccgatctcgtgagtttgagtttgctcgatgatagaaacacggc
+gagcatttggaccggtagacattgcactcttcgctggaggcttcagtcct
+ggaagagcttgacgatattgcgaagcaatgcctttcacatcttcgttcat
+ttggactgatggtggttgcatgagattggtagccaacgcctgtttcgccc
+attgcttggcatcgatcgaatcagggcgctcgacgacaaaagtgactttt
+cctttctcttggatattcttgacgagaagatctctggcaacatccttgtc
+actgactggaattccatcaacatcacacaaatgatctccaagaacaagac
+acttctcggctagtgatccaggatcaacacgtgaaacaagcacacggttc
+tggaaatgcttgattcccaanaccaagtttggtccattntgaacccagan
+cagggttgccaactcatanacgtatccctcacggcgttgaatgatctttg
+cacgatcctcangaatatgaacacgagcttctaactcttcagcctttttt
+cgtcacgggtcaccgtgatttttgcacatggagcagcanagcgaagcgca
+cggaaaaagtcgtaccatccttngcaatttgtccgtcacttttttaactt
+gtcaccaatctcaattttctttagaatggttcagcctgaatctta
+>AW057339
+cttcctcaaccgccggctcctcttcctccgcgatcgtcggaatctcatcg
+gcacggcacgccacaaaacgtggaagctcatccggcgtcggctcccgctc
+gaaatccatattctcctcgtgatccgtcacacctccgagctccttgtaga
+tttgctccgtgagccctgccaacttgacatattcctccttctcgacaacc
+gagtattccttggcgataaccacttcggcaatcttgtggaacacctctgg
+aagctccgcgaattccttgctcttctggaactcggccttcttgactccag
+cctcttcgcggctcagacttccccatgggagctcgccgagcaccaggtga
+accagctggtagaaccatgtctcgagatcctgccgggcgccaacaagacc
+atcgtcactgcacggagcataatccaagcagccagcgtagctagcaatcg
+gagctccatcatcaccagaaatgttcttgaccaaagacgagatgtccgcc
+atgaagaggtgacggcttgccgcatcgtagtggaagctgntgagatccat
+gttgcggacgaggtagccatgcttgtgagcacatcngaacacattgagca
+catcctcgggccagcgttcagccgncccaagcgtgacttgntccgcatcg
+caaagcactggctcaacgtcgtccacccacgaagacagcnacatgtgcca
+cgcgtcgtcgtccagaagatctggaactgcgagagagccgc
+>AW057340
+cactgcattcattttcagatgcagccaataaatgtgaatcatcaataaca
+ctatcagcatcttgctcatttccaacagtctgctttgctcctaggcttgg
+ctcacagtgagatgatgcttccgaggcttcttgaacatcagtctcactaa
+acgatggctcaatttcagctaaactagacatgagactttctgtaaccatt
+gacattaccgaaagattctcatccgaagtggctgccaaaatattagaatc
+tccaagttgggcttggataatatcttgaatgttcccaaaaaaatcattcg
+aactaccgtgaagatccatgccagagtcttgctaagaaatggcatcacat
+gcacgagcatccgacctatcttcatcacgattcctgcatccagcactctg
+ttcacaacccgtcattattggatcacattcgagtgtaagagtttgcacat
+gtgcatattcgaacatcaga
+>AW057341
+caatacaaagaaaggttgtcagttatatttcaagtctatttcttctagtg
+gagcaagtaggcagctggacctccattttcccaatcgtacggagcattct
+cgttggatccacatgcaccataagcttgacgcatcattccgtagattgct
+tgatagttgggtgcatcatagggaattcggaatt
+>AW057342
+tgccttcttcaacggtccatatgccttttttgggatgacggctgcaactc
+cggctccgactccagctgctcctttcttcttgccaccgcaattggctatg
+gatccaaggatctcgaaaattgcaatattagcaacatgattcgtgcaatc
+attgatactaactgaattgagaggcttggaattgt
+>AW057343
+atttattaaatcaattaaaatcacacaaagatatcagcttgtcccatttt
+tcttcttggtgtctccaaacaatcttcttctttctccgattcaattgact
+ttctactgattaattgtggcattgagttgtcaaatgaacgacttgttgcc
+ataactctcttatccgaaattttgacgacgactgtagtgagacgaatgaa
+gacggccatactggcaagagccgtagcatcaattgaattactgcacacac
+cagagtaactccattaacagcatgacagatatccgaattaagtgttccag
+ttgagggagttttcagatattgacatggagaatcgaatgatttaatgaag
+agtgcaattgtatgtgcaagatggaaagctgagaatacaaatgcatgcat
+catgaagagaagagtgaagacaccaaatgcacgacgagttgagcagagtc
+caatgagtgacatgagtgccaatgcggtcatcaacatatctttagtatcc
+agagcagtctcatcgagtaccgctcatgatcctgcaactttgtatgcagt
+tgagtagagaccactgactaggaagatgacagtgatgacagcacacagtg
+gtgcttcaatgacgataagcataccat
+>AW057344
+atacataaacctgattgacacggtaaagggaggtatccgagcacttgatc
+ttataagctttacgttcatttaaagtattctctaattcaattggattgac
+tccgccatgtgcctggtattgaagtttaacaggagccacattcatttcct
+tgaagtgctggatcgaattgactgcttcagcaaggctatgcccaacttgg
+ctatcagtcgtttgttgagattctggaataagagcaacttggccgatctc
+tcacgactcttcattctattctggcatccgaataagtcgacgattcttcc
+gctcttgaactctatagctctacgagagacttcgatgagt
+>AW057345
+cacagttcaatctaagcttgaatagttgatccaccacacggagttgcttc
+aaggcacacattgacatcacttttgttctttttattagcctgaatacggc
+aaacctttgagctctttgcagaatgattcgacgaacggcttgatctacga
+gaagatctcattgactgcttctccggttgcttttcagttccaaaagatgt
+agttgggaaaaccagcatcctttgttcaattgacgacgtttgtactcctc
+aacaaatcccatcattgaaccaattggaacagcagtggagacatctcttt
+cgatgttgactgcagttgtagtgtccaattcttccacgtcaaccggataa
+tcagtcgactcgatacacgtggcatatcctggcaagttgatgactgtctg
+gggagccgagaactgatccgtgaacttgacagacaatgagaatgcctctt
+gctccttagttcctgcagttttcttctgatcaatgaactgcttgaggctc
+ttcgcatagatcgggcacacgtatcctttgatcaaaagattcatcttgtc
+ccattgggagagctctgatgagttcacacggaactggatggcacgatgag
+ccaatggtgggaaacggaaaacttncgttgngaagtgacggctncattcg
+tagcatgcaacttcagcnagaaagtccttgccagtggtgtgcggaacaca
+aagttncgtntgacgatctcgcggccagtggaatgtgcaattccgtgggt
+tgacttctttgacaggcat
+>AW057346
+ctagaacgcatttgtggatgctatttaaatgttctggctagatttctcgg
+gggaaactgaataaactaaagcttatcaacatctgcttcgcatcatcggc
+aaactcttgttcagcgtcggaatacggttcgatgtcacacacttcggtaa
+ggtacttgcgagcgttgctcttgtctttaattgctaccgagcatttggac
+acataaaatgtggtctccatccattgtggattgaccttataagcagcaag
+gaaatcctcgttggcctcttcataggaatgtgatggtggttgctgataga
+acgtggcagcaagctttttctcaagccatgtcagagatgccaccgagtac
+ttgtaacggccacgcaaatggaggagagccgtatcctttggctccttagc
+aagagctttgtcgagcaattccttgaacttcttactgcattccaactgtt
+ctttcgttgccatatattcggttgcttgtccagtcaacacagcattccac
+ttcagcgccttgaaatggtttggatccttctggacagcttcatcagcaca
+cttgagtccctcggagacacttgccatacgttgagctcttgaacacatgc
+agacttctcatgaaatacttgagcangtctgcacatgactttcaccggac
+gatcacctttgtcgagacgcgctttgagcaaatcatatacttgatcgcgg
+ntcttggntcccgaagtntngtcattttcatcatacatcat
+>AW057348
+tagattgttcagatgatccataacacgagttattcagcttttggctcatc
+atctttgcaggtgagaatagttccctgtgacatcttatatgatccatcct
+cttgcttcttgaaatccataataagctcatcattctcttcgaaaacgaat
+ccgtttgctttaacatgcattgaaaggttatcctcacgagtgtatgcttg
+aatcaccttcttatgtcagatcagttgggaatttctctttttcaataaga
+gcatgcatgttgaatgcaacaaattgacccttgtacaaacgattaccgca
+catgttgaagatcatgtgtggcagaagccagttggtgagatcatggatat
+cttcactggttctcacgtgcattgtatcggcggccaactttgtggcgact
+gtctttgccaattccaagtctgcagcatcacgagcagcgtcacgtggctc
+cgcggtcaccgggatcagattactggtatcagcctccctttacacgaatg
+acataatgccactgagggcatgaacacgaaagtccagaaaagctgcat
+>AW057349
+ctcttctagcgattgatgtaatagtaattgagcatgcggaacatcagaat
+catgataactccagcaattagcagatcagcacatccgaagcatatagcca
+acatgtatccgtgacgattagcatcacaaacttccttgctcataccggaa
+ctacaactgtcccctctcctaacaacatcaagaataatttccaataaaag
+tgaaaatacagtaagcgatccagtaaataaagttcagagcaaaactgatt
+atattcgagaaatattcgggaaggaacacgataagacccatgtacaacgt
+ggcaaacacgagccagataaagtaagaataatgggtgctggcaatccata
+gactgagtgttagaattacaaggaaacacgacaaaccaaacataatgaac
+ttggtgggaacacctgcaatgagaatctcatccggttcagcagcccactc
+gaatccagctctcaaacgtgccggctcattctcattcggattttcatcct
+ccacatgcacgtattccaaatccacggatgcacaactgtctggattcat
+>AW057351
+ttttttttactagaaaatatctttttatcaataaacacatattcaataca
+gttcggaatggacagaattaaccatccaacttgcatcctggtttagcctt
+acggcacaagacagtagtattgtcagcacaagattttgcagaagcagtat
+gctcttttccttgataaccatatccgtacttttcattacgaacatcgacc
+tttgccaaaatttgctttcctccagaaacattgacaacttccactgagat
+gtcacccttgtggttgtcacatgaactggatgccattctttgtctccgaa
+tttggcagcttctgggaacgaaatccatccataatcgaaaccacgaaggt
+tatcaggcagatgaacgagaagttggatagatccaacattcttggtgtat
+tcgttattgaaccacgagaagttggcagccacctttccaccctcattcca
+tacacgacccatgacaggttctgcctgcatgtaccaaagtgcaacgtatt
+gatctggattgacacctggcaaagtatcaagagtcttatcaagagccttg
+acaagtngacgagtttggcatgcatcaccctcacgaatgtccatgcatgc
+atctctctcgacacgaaccttttgtccacttncacagttggcacaatcac
+aagatggtggcccaccaatgcagctacaggtgatgatgtacatgtcattg
+agtagtggtccaacttgttcagcacttttncattttgacgagaccaagcc
+tcctcagtnctgtngtcgacataaccaagaagaatttctttcggacgctt
+ccagagaatggggaatgatccacacnacgcaccaagtgatggtnatcg
+>AW057353
+tcgaacaatgtcgtatgcacagagtttctacgccgatcagaagaaagtcg
+agaagccagcggagcaagcttcctctcctgccacggctgccttccccgct
+accaccccaatcgccgaggatcctctgactccatcccaaatccaggatgc
+catccgtctctaccgttcggtgctctccttgtctgcgtctgccccatcat
+caccggttcgtcaagcagcggctccagtttgctccggaacagccgattgt
+gcactcggactactatggtggaccatcggatattccgttgtcctaccgtg
+ttaagtacaccacgacccagcaggcacctgcgtctccagctccggacttc
+acagagcaacagttgatggctcagctgcaggctcttcagatccagcagca
+gcagcagccggcaccagatgttccagtcgtcgagccagttcaacaagttc
+agcaaaagccaaaagttgctccgaagatgcttcacaagatgtatgacgat
+gaagagtctgngtactgcttcgctcgtaaaaaagatgtggagcaagaggg
+agaagttnncgagatccacgtggccgctccagtcaccactccgataccga
+cctactcngctcctcggtcaactatgaagctccggtntcaacaactacta
+ctcgaaaggagtcagtgggccattcgagtacattgaatgtcgaacgattg
+cagttcatctacg
+>AW057354
+ttttttttcactacaccaaaatttttattcaaatcaatgcatcatacttt
+tcacacacgatcaaaaaatctttttcgagaaactttttcttttggagtca
+gatgatggaggtctaggcacaatctagttggcaaccaaaggaatgacggt
+gagctcgcgtggtggatcactctcaaagactcccctggcacaattctgct
+cagcagaagcattggatgtgagaacaatcaagtgatcagtcttctcgact
+ccattatgacggagcacatcgatcgaaagcttgtccccgggctcaacaaa
+tccgaagactgggttgacgcggtagagaaggttgtcggaagttttgacct
+taaatgcctttctcgacttggtgttgttggcaatgctgactgtctgaact
+cctccggtcgttgcgaatggaagcttgttgggcgatgcacgaagagtcga
+cgtcttcactgaagcagaaccctgtccctccgggaatgtgtacttcgact
+ggttgggaaccaacttcagagctttcgacttttcggatttctcaccggag
+tccgatttgatggacacgtcggaaacatctgcctggttaaccgttgggca
+gaggttcttagccttgttcaagtcgcacttcttggaggacttgtcgcatt
+tcttagatttcgaagatcttttggatctctgtgactttgactttgccgtg
+ctatcacgtctgagcttcttcgattcagacttgggacctcctgccgctcc
+cgngatgggagaaactcngnctttgnattgaaggctnctagattt
+>AW057355
+tcacataatttcgttattgcaccgattttgcacgagttgacacaaatcac
+ttcttctggaagcaatcgcttccaattccagccagtgtctgatagttcgg
+atcatgagtagcagccattccaggaccaccggcttttggagcacctggag
+ccgcaccaccaccagctggttttgcaccttctttcttgtcgaaacaatcg
+tttccaattccagcaagtgtttggtaggtggggtcgtgggtggcagccat
+tccaggacctccaggttttggagcaccagcagcaggagaagctccaccag
+cagcagcaacaggagaagcttttcctttctcttggaacacatttccatca
+actccagcaagagtttgataatttggatcatgtgtccctgcgacagcatt
+cttatcacttggagcctttggtccctctgggtttggagcactcgatgatt
+tttctttcttatcactctntgcactggttgcaccatctcctcctcctcct
+ccagttntagctccaccaccacccgatgatccaccagtcttcttctttcc
+tccacattgtcaatgacgtatccggtgaacaccattggagcgagcatgct
+gtaaccgagatggggcagttgtagaagtgttcactattttctgc
+>AW057356
+tttcctctccaatatctgattctgcacgtcaacctccaactttctcacat
+gaattgcacttctctgcgggaaaagagcacatcccaacggtgtcatgaag
+taaagaccaatccccgccaaaaatgtctgaatcggcatctcggagtactt
+ttggactagtggacgagctttgtagtaggcggttcgcgtgaatcgattca
+tgatcaccgggctcagaaccatatcaggcatagccatgagaattcggctc
+agtgtgacttgagcaattgcgagagccgccagctgcttggattttgcgac
+gagatggtcgtcctcatcacatagctcaattccctcagaaagttcgcatt
+ggaatattaatggcatttgcaactgcaacagccacaaatggaaccaatct
+tccagccaatccatgactattcttcaccatcatattcagaccgagagccg
+ccgttgtagccgctccagtagcacagaaatacgaatagagtagctgctga
+ttggtagttctgcaatttccgcttcgattggtatagttgacaattgcatt
+gaacgactgattgatccaatgggagaatataataccangacaggtacgat
+agcagctgagaagcattccagtgataaccatatttgctgggtctgagcgc
+tcattctnccgagacagaaacatctttttcgcggtatcaggatgatatac
+ttgaaatcatagagatcttttgcactcatagctcaatcatcgtcaaatcc
+ggatt
+>AW057357
+atgtcagtatttagaatgttttcgaatcaagtgaactgaggattgcgaat
+ctgacgaattgcaggtgtgtcacaactgtcaaccaggcatcctgaagggc
+tcaactctcctcgcattgaaatgttttgaagccgcttctgaagttctcga
+tgactactgagtgtgcgcatcgcctcatcagcttcagttacagggccacg
+tagagggctcagaagtggaggaggatggttcatactgaaatgtgaattgc
+atnccgaaacatgtgccaatggttcaggaagacattcaaaattcggcgga
+gaatctgtcagtggaatcgaatggggtagtccaagtagttccggcaactc
+atcggaaacttctttgaggccagttggtcgtgcataacagaaaacagacg
+atgaaagtttaatcatcgtttcgcgcgagtattctatttttaagaaaaga
+tgttgatcttcttggtttcggtagcataccaattctccaccttctggaac
+ttgaattcgttcaccttccttctttgcgagaactggtggcaggctcatcc
+ggcggcaaatcggggtttcat
+>AW057358
+cgcaacgccactttaatctccttcataagaccgtcttgccaggaagaatg
+gttcgattcaattgtcttgattccgttcgccagttctcgcattctctcac
+aagccacttcaaattccgtggtactgttatccaaacgagtcttgttaact
+tcagtctcgttgttcgcatatttatccgccgcactgtccatgtcgagccg
+agtcctattcaaattggcgatactgtccgcgtgaagntttcccctcgttt
+tggtagaatgacttgtatttttcacaaatcgcgaggacccctttcacggc
+atcagcttcggcacccattctagcatcaacagcatccataaatggtttca
+cattgatcttgtcaccgagacgcgagtaattctcaagggtgccagagaaa
+ctggccttcttctgccacttgctaatatcttccaagtttgtcatgctctt
+gtgaaggtccttgacacgtggcgtcagatttttgaagtagcccatcgcat
+cgtccacttctgcgggaagagttgtcgccttggcacgtccagtcttttcc
+ttaactntctgcttcanacgtccaaacat
+>AW057359
+aaaattaaatcaatatggttgacgagaatgagaacgacgagttgctcgta
+tctgacggaggcgaaatgattgacgtcgagacttgatgatttccatgggt
+gatacgtcaggtgtggcgatccagcgaaggcgacatagtggacacgaagg
+ctgggtagaggaacggtgccacctttgaacacatgttttgcatccaataa
+cgttcatgcatcgattacagccttgtgggtcaactggagtctcaaaacag
+acggtacactctccccgtgcgccgatcggtgagccaactcgagctgcatg
+atttgaggtgccttgtgatgtgcttggctgattcgatggagcagcttcaa
+aagagtctgacaatttgacttcagattcgtcatcatcaagggttatggca
+ttttcattgattccttgtgacggcaagtttcttgcaagaacggccggtaa
+aacgtccacgtggcctccataaccttctcttccagctgcggctctgattg
+ttggctgattntgtgcattcctgggagttgttgtccttgtgacccagtcg
+ctgcgctggtggatcncgggccacagcagccacctgtactccagtacctt
+ctcttccagctggagctatggntcctgtcagatnntgtgntgagcttgag
+gatatctcgatcacttgatatgacagtntcaggtgtggaggattttgaga
+agcgctggttggctggatttgcccccagtc
+>AW057360
+taaattaaagagatgaaagctctttctcaatagccttcgagatcaaatcc
+atcaagttaattccactaatcggaagctgattcgcatgaaccttgcacgg
+ctgtttaaccgaaatcaacgagtttgaatgctcaatcgggcccaaattct
+tcttcttgttcttcgggcatccttccttatgaaccttcaccacttttcca
+atctcttcaacatcatcaaaaactacagtaggaatcggagctgtgccaag
+tttgaattgatgttccggagtaaaatgatcgtgagtgagacttccagttg
+atgtgacaaggtcttcaagtgaagaagcttccgaagacacaatgtatggc
+ttgcggcactcaattcccattttcggctctcttttggtgcgatccagaat
+atcagagactcgtcttttctccacattcaacaaccatctcgatgcatcaa
+acttctcaatattatcctccatctctcgaacatcttcataaacattcgaa
+tgctctctcatcaacaattcacatttcttctgagcttcttcagatttcag
+cgtcaattgagctcttccactgatcacctccttcaatcgattctcgaatt
+cagcttcctgatgtccaatctctttacacattgncttctcgntgaagctt
+ggtcgaacaaattatctcgtgagaanggacttcagttgacaagttcttgc
+gtggaatgcttcacccgtgatcatatcaactcccatgggctcttcgatag
+aa
+>AW057361
+gacttcaagattgatgtcgaatcgattgtttaggcagccaccaatgggac
+aaccatcatctcacgatgctccgcttggttagcaaatgcctcatgaggat
+tagtagagccgttggcattggcagtcaagagcacaagtttgtcagttttc
+tgctctccattcaatctcaacacatcaatcttaacggaagaacgtggctc
+agcaaatccataaaccggattgacacggtaaagggagttatccgagcact
+tgatcttataagctttacgttcatttgaagtattcttcaattcaatttga
+ttgactccgccaagtgtctggtattgaagtttagcaggagccacattcat
+ttccttggagtgcttgatcgaattgacttcttcagcaaggctatacccaa
+cttggcttccagttgtttgttgcgtttttggaataagagcaactttgccg
+ctctttcccgatttctccattttcttctgggcatccgaatcaagtcgacg
+attcttacggctcttcgaactctttgagctcttacgagaagatntcgatg
+agttagattgcttatcagaaacaccagtagcagcgggaacagttganact
+tgagaacccagagtggtagcagtcttgactttcttcgatttcgaagactt
+tgatgattnccctccacgcttgctgctttcctttncagattttnccgacg
+ttcccttcttactagacttcnacgatctcctggagtgcctagcttcga
+>AW057362
+caaatgggggtttttctttgttctggatatctcaaccgacatgattctag
+aagagaatgctctgtgcaatcatcgcagtcacttgataaggatcgcagtt
+tgatgacggacgacgatcctccagatatccttttctctccgcagccacct
+gtctcggaatacggattgagcatccacgattggcgactccccatgagaat
+ttgtcagccgaacttgtctcatgacgtcctgtcaaacgacgaagattgtc
+ttctccaccatgtggatcgtacaccttcatcgcctccaaatgtgtccgct
+tgagtcctgtcatggcggcttcaatcgcagcaattccacctggagcacgc
+atttcggcagtcgagaagttggtgtggcatccggctccgttccagtctcc
+catggtgacctttggtttgggatcaagggatacgcagacaccgaa
+>AW057363
+atttgaatctcttctggctgctccgttgtttcagcgccgcttggagatgt
+acttggaggctctggtgaatccgttggctctgcagtggacgctgttgatg
+gttgaacggtagtagtcgtggaagcttcagaacaaattccagccaattcg
+cacaacgctggatacattgctgagctcattttgtagtaccacgggtattt
+ccggcttccaatagttgctgctaaatcgaatccaaccatccaaactctgc
+cggaatcattttttgtgagaggtcccagatcgaatcccttttcggtattt
+ggaacagttccgtatagaatatccggactaacaccggcgccgacattcac
+ttttcgatgcttccattccgaattgtttgtacatccataagcgtcaatgg
+catctcctttggcaagtttgattgattcatcagctagacatggataactt
+gagttgccttccaaattctcatcaagctccaaaatcattgggaaggagta
+cactctnttttgcaccaatgttgatttgcaaaagttcagaatatatgctc
+tggcaaaattcatcactttcggngagcanttctccttttgagaaaggcac
+ggctcaatcacatccgagagcttgtttaccgaactttccagtgctttatg
+ctgtcatcacaatcaatcgaatctncgctattnttccaagctgatttgtg
+gagaaaagccgnngagagtgatcagtgtcgagagatatataa
+>AW057364
+aatatctagttcatctgagagctggtggctccttcaccagtagttccgct
+tgtcgaatcagcctctgtcttcttggtctttgacttggagctctttgact
+ttgaagtatcgtctcccttctttccacctttctttcctttctttccatca
+ctgncttttcccttttttcctcccttcttctttggcttgtacttgtcaaa
+cacagacttcgaacggtagcagacgaagctcatgactgccaaatgacaaa
+aataaaaccgcanatgaggaagctaatgagcacatagagccatggttgac
+tgacatcacgggcgattttcttcagcggatggtagtagcagacgtgttta
+ttgatattaacatacacgcaaacagttcccgacttgcagtctttgttaga
+agagcatgccgatcccttcacctcagtacgttcttctccagttaagtgcc
+ttttgcccaaaataccaacgcgattagtgtcaggatcgcaaaatgcatag
+gtgctgaattgagatgggataggtttgttagcatcacacggataattagt
+caagtatggatatggggcccaaggtaggcgtcgctgactcagtaccattt
+ccagatggtgcaattgtcagggtaagtgatgggcagcantgaccaaagcg
+acagactccggccctcgtcttgcacactccagaacttttcttttcttcaa
+cacatctacacactttggctcattataccatgtgtttgatagtcc
+>AW057365
+agagagaaaaagagaaaaaagtccagaattcgagaagttgagagaaaaat
+cgaagcccatctagttttgtggaatcggtgtcaaattggggcgagcttgt
+gagagcacgtggtccacgaacgagcaggtgacgtcacacattagacggcg
+cgaacggcagtatctggcagcaaactcacagtaggaggcaaaaatgatcg
+catccattctagtgaacatatttggcaattgacataaagaattgctcaac
+aacgacggcccagtagcgagcccacggatagctgagcctaacgacgagag
+caatgacacgtggaatcagaatttccaagttgcaaaccaccaagtagcac
+agaatgatgccgaacacgagcttggtgaatctcggattgttgcggaaaaa
+gttgccaatcgacgtggcacagttgccgagagtttgaagacgattct
+>AW057366
+gatcattaaatgaacttttggatttcatcgtagatggcaagaacgagtgc
+tccaccagttccacggaatacatttgacagggctcccttgaacatggcgg
+acattccttcgtttttgatgatcttgacggcacaatccaaagtattcttg
+tagagaacatctttgcgaccagactgcatcatcatgcgacgacgaacagt
+atcccatggataggagaggattccagatccaacagtaaccacttgagcaa
+tagcccaggcagcgaagaagttgagtttcttgccatcagcagtgaacacc
+atcttggcagtgtcgaacattccgaagtaagcggcgcggtagatgatgat
+accttgtaccgagacaaagaaacctctgtagagtccgattggtccatccg
+actttgcgatcttgacgagacaatcggccaggcctttgaattcacgttca
+ttagcttttccgacatcagcagccaaacgggtacgagcaaaatccaatgg
+gtagacaaaacagagcgaagtggctccagctgctccaccagaagctagat
+ttccggcgaagaacttccagaaatccttcttcttatccaatcccttttgg
+aaaatgntnttgtagtatccttgaaagcgaagtcagcgcctgggtcggaa
+gtatcggatgacgtagcaagtttctctcagagag
+>AW057367
+acagagatcaatcatttttcaacactatgtgcacgatccttccacaaatc
+attgtactggcgaccagtgtcaatcatggcatccgatactgtctctccaa
+gctcacgaatcgatgaatcgacatctccggatgaagattttctagctctt
+ttcgaaaaagaagatccctcatcagtgaaacgaacacttttcgagttaac
+ggaagaaatagattcacgacgcaaaggacgttttcgtgggctattgatac
+ctggagtatgcttcttcatcggtgttggtggtggaacaggtttgaatctt
+ccacgaagatctggatgagcattcatcagagagagagcaacaaatggaat
+tccgtcaaccaaaagctcaaaagcttcaaattctgcatcttgtgcgtatt
+cagcaacaaatccacgaacttgattacgagcagctggattcttgataatt
+gcaaacttttgaactggaggaatatgctccttctttggtaaagattttct
+cgaaagagatcttctttntttcatcggagatttcttcagaatgctnttac
+gactggaacctgntgattttgtggatctattgcgtcgactctcaatttca
+gcttngcaaagtcttcttctagcactttgcgaaatcttgntacggttccc
+cagaagtggttccaaattttcgatctttttgcttgaaatagatctttcag
+atccctgtcccgagaatccagacgacgagctacgaggaagcttgctttta
+>AW057368
+ttttaggggtttcacagcagtcttcagagcctcggctgattgttgtgggt
+cggacatggcgcgatgggagaggatggcttcatcacggaatttcagaata
+gcatcacaatgctttggtctctgatcgaaaatcccgtttttgatgaaatc
+catcagtttctggcgacaatcatttctccaattttgagcgttgacagtga
+ttcgctcggcttcctcagtgcgatctttgctctgttgggtcagcgcatcg
+tcataggctttcaggccatcccagtacactctcctcaaccgtgcgtactc
+ccaatagtcttctccaatgaattttgtcagcggcttcaaattgtccaact
+gcttgcgaacacgagattcgcgttctgcatccaacagagaaagcgtacta
+atcgctggctccaagctctcatactttgtcttatcccgtcccttatttgc
+aattgcctcataagttttgagatattttccagcataacgataatttggtt
+cagttttgaagtgagtcaccaaatcctttgaaatttctggactctcgacg
+agcatgaacaatagtgcctgatggagcttgtcggagcatgtctttgtagc
+ttcaaccttcccgcacatcttctttacgtcttctgtcatcttgatttttg
+cgcgtttcctcttcanaatcgatgcnnatagcccatgatttggatgatca
+gc
+>AW057369
+tatatctgtttgtccatttgctttcaatctctcggctaaatccctccgta
+cgcaacattttcagtccattcaaattgttgacgctcgtacggtgacgggg
+aaattccgattccagtggccacatctttcgaggatcccgagttgaaatca
+gtgtattgagacaagttgcgcatgttgcgaagttctgcgagagattcggg
+cgagtagtcacggaggacaagtgagtttgtaagcatctccttgagctcga
+attctccatcagtctgctccaactcgtacattgattcggngctcaacatt
+ttcttattaagtgttccgtcaggagacgtggatgggatagctggagcagt
+gaaagctttgatcggaaatgagatgttcatgttctgaatatcagcaattg
+agtcagaagagaaggcgttcttgttaacagtcacatcatttggtggattg
+atgctaaccattggtgcctgttctccatccatatcataatttccaaatgg
+atccttcaagttcttgatatccataatagtcagcaaattttcttcgnttt
+attctccatcttgtgtgttctccttcagaatccataatatgttcaaagta
+tttgagatggagtggggaagtttggatgatggngaatatnatgttccagg
+cngaacgagtgtcaactttagatgaagcctttgaatgcaataaccttccg
+gnactacataaccgactttgagacttttccgaaagtccgttgctggatac
+tagcca
+>AW057370
+tcgattccatttccataaattgtctcacgatcatcacattttggcgtgct
+ggcagtgggcaagtctgaatctcacgggccatgtagagcttgaagaccgg
+aacttcagcagcggagtcttcgaaatgctgaaaatattgaattttcaacc
+agtccatattaattttccaaatctcaacatccatcgtgacgatggtgtag
+acaaataaaaagatgatgcattgggcaatgaatccagccattctaattga
+cttcacacggattgcatctgcatcttcttcaagaatatcagccactttga
+tgctctgagctggagcaaaagcagatgtcgtggtggtgttaatgtagact
+tcaagggacgacatcactggagcaagcacagatgttgatcttggagccgt
+gatgtttgagtagaactgcactgcctt
+>AW057371
+ataagctttctgcttgcaaaggatttttcttctcgcgagcctcggatttc
+gctctctacgactgcagcgagaagtacggtgtccagatagctgatccatt
+tgatgaagatgctctccgactattcaacgatcttccaccaaaacaaataa
+ttgcgccagactcgaaattgttccgttcaagtcccacaaaagccggaaaa
+tgttccgaccacgcgatctcgttctgccaattatccgaaaaggatatgcg
+agttcttcaattcggaatgtcgctgaaagtattcggacgcggaggtcttg
+acacttcttcacatgagactaccgatttttgatcttctccagcagcttct
+ccagaatctatccaatagtatttctatcatgggacgtgtatttctcttct
+tgccgggtgtaaatttaattatttttgaaattaattatttttccataaaa
+ta
+>AW057372
+ttttcaaataaaattttattccgttcaattcaaatgggggtatttttttg
+ttctggatatcttaaccgacatgattctagaagagaatgctctgtgcaat
+catcgcagtcacttgataaggatcgcagtttgatgacggacgacgatcct
+ccagatatccttttctctccgcagccacctgtctcggaatacggattgag
+catccacgattggcgactccccatgagaatttgtcagccgaacttgtctc
+atgacgtcctgtcaaacgacgaagattgtcttctcaccatgtggatcgta
+caccttcatcgcctccaaatgtgtccgcttgagtcctgtcatggcggctt
+caatcgcagcaattccacctggagcacgcatttcggcagtcgagaagttg
+gtgtggcatccggctccgttccagtctcccatggtgacctttggtttggg
+atcaagggatacgcagacaccgaattgttcagcaactctgtgcagaatgt
+atctcgacatccacaactgatctcccatatcgattccttcgcaggttcca
+atttggaattcccattgtcctggagtcacttcggcatttgttccaaaaat
+gttgagtccggcgtgaagacaagcccgggtatgtgtctcgacgacttctc
+tcccgaacgcacgatcagctcctacgctacagtagtacttntccctgtgg
+atcccgggaatccgtgtctcggccatccgagtggntgctcatctctgtcg
+acgatcagatattcctggctcatttcgaacacggggcgttgtcacgg
+>AW057373
+agacagttcaatcaatggggaaatacatgtagaaattaatgcgatccaga
+tatagacgaagcagcggaagaagacgactttccattcggtttggccagcg
+aggaggatgcagaagtcgaaaatgagctctcagagctgctggaatccaag
+gaaactcccgatttaccaggcgcaacttttgagatcatcgagtggaagtt
+cttgttagcagcagcatctctgaaaacacgcattttctcagccaacagct
+tagcttcctccatgacctttggattggaagatttcaggccataaaggcct
+tcaattgctccacggaaggctttctcgatagcttgctcatcagtcgcatg
+gggctccaacgcatcttgctccttcttttccgtacgatgcaccatgtcag
+taacttgatccatcaaatctttcatgtcaacatccttctcatacttgtag
+aatgccttgttaacttgaatcacagaatccttcagaatattgaccttctt
+ttccaacagctggacgtccttgaccatcagtgtagaggccaacgcctgat
+cagcagttgccttggccttctccattntccgattcatccatttcgaattn
+tcagcgcgttccttgtcaatntggtgatccaattcatgcatcattccacg
+gnantgatcgaaatcctcactcatcngaatttgcatgtcacggagctcca
+aaacagtgtgacgaccgactaggtcaccttcacgaaatcgatcgatcgt
+>AW057374
+ccgagcgggtttgtgttgaagtcatcgacggtgaaaaaatcaacttcctc
+atttgatggtggtgtgacgccttttctggggacatttccaactgctggaa
+cacttggcttcttctttctcttttcgggctcccaatgatatgggtcggac
+cacttgtacttcgcgcttttcatggcatcattcaagagattgaaaagctt
+ttcgtagtcggggcggtgatagaattgcgtggcccgaacgatcttgacaa
+actccagcatttgaattgggctttttgcaaagagattctggtcggcgacg
+tgtcgcttcatttccccgatttcgaccttatcatccaaatcagaccaagc
+cagctggcatctcaactccgcgagcatgtagatcaaagcccacaggtcat
+cgactctcccctgctcgaaacgatcgtgcatagctaccgagcaataacgg
+gaggtgccacggaagagagccttctcacgtgggcgacgcatcttttttcc
+gtcctccttatccgtgatgtactggcgtgccaagccaaagtccagcacga
+tgaagtagcgttcgtcaggggagcctttatttcccaaatgccacgttgca
+gggctcaaagtacggtggataaacccaatatcatgaatctgcttgatgcc
+cgacaaaagggcaatcccgatgcgcaaattggtaagacacgttgaaaaat
+tggtcggttctttcatagtgattcaggctttcgccggagaggtca
+>AW057375
+tggacaataagacggttgaaggatggttctcgttcaatgtcattgtgatc
+aaacaagtcggcccacaggggtatgaatggtacatcatcactcgcaactg
+tatcggaggatcaccacactgcgaatgcgagaactgt
+>AW057376
+ttgtccatttgctttcaatctctcggctaaatccctccgtacgcaacatt
+ttcagtccattcaaattgttgacgctcgtacggtgacggggaaattccga
+ttccagtggccacatctttcgaggatcccgagttgaaatcagtgtattga
+gacaagttgcgcatgttgcgaagttctgcgagagattcgggcgagtagtc
+acggaggacaagtgagtttgtaagcatctccttgagctcgaattctccat
+cagtctgctccaactcgtacattgattcggggctcaacattttcttatta
+agtgttccgtcaggagacgtggatgggatagctggagcagtgaaagcttt
+gatcggaaatgagatgttcatgttctgaatatcagcaattgagtcagaag
+agaaggcgttcttgttaacagtcacatcatttggtggattgatgctaacc
+attggtgcctgttctccatccatatcataatttccaaatggatccttgca
+gttcttgatatccataatagtcagcaaattttcttcgttttattctccat
+cttgtgttgtctcttcagaaatccatatatgttcaaagtattgagatgga
+gtggtgaggttcgatgatgggaatatatgttcagccgacgagtgtcactt
+>AW057377
+ttcagctcaagtatctagtaatagtccagttatttttgctcctcgtcttt
+tttctcttcatctcccaaaatgtcactggcttgacgagcaattgttttag
+tatgactagtagtatttgcacttgtgagattccattctgagctcttttcc
+cacaacttttgtcctttggatgtgaaaccaagaatttcaacttctggatg
+tctctcagcaagaagcatgaaattaacattaagcacaagtcgtttcttgt
+caatgagcatatccaattgaacactctcagggaaagcacgttgtccataa
+gaagttgaaggaactgtttcagaattcactttgattggtgtcatttccaa
+agtttgatgggtttctttgactgcactcaaagtcacaatattcgactcat
+ccgtctgattgaatgttctcttcaaagttggagttgcaggagatgaagct
+ctcgatctggctctcaatctctcactcagattcacttcttcctctttcac
+agtctcttctttcttctcactaatctcangtggctctccatcttcttggc
+gaactttacgaatgactatagtctccttcacgcgaagtgttccttttcga
+gattctgagcagatctgtaccagtagagctgcacacgttcttatgcaatc
+tctacagacataaaggcgaat
+>AW057378
+tttatagtcacatttttattcaaaaaattcatagggctcaacatgatgat
+tgatgagacacgcgtgctgatgagccgcattcattgctccgagcattgga
+ttcattccggtcaagtaggtgtctggactattttcgagtggcgtgaagaa
+ggtttcggggagctccaagatttcatagctccctccagtgacgggatcca
+gcaaattcggcgggttatacttattgaagttgaccggcaacaccttcgtc
+gccgggatgtttgccttgatagatggatcatagacagcagcatccgaatc
+aatcaatagaccacctcgatccttcttacggcatctgcacactttgaaaa
+gtgagttggaaggatccatcgagtgagatgacgaaaattccgcgcttctc
+tcgattgccgtctcgattgcagcggtaggagcgtcgattgtcggtggagt
+agccaacgtctcaagaagcggagcttggagtggcgatggccgtggtttcg
+gagcgatcggcgagccatatgcaactctagacatcgacaacggctccgac
+atgctatgctccgcggagcccctctcaacgcgctcaggcttatggtctgg
+agcctgcgccggcttgaacttcgagtcttgcagctcgcgacacgagttgg
+gcattgctcaattcgctggcgtatcgatggactttggtttcaacgagctt
+ggttgcacagggaaattggtcaactgcgatgtctacagttctgcg
+>AW057379
+aacctaaaaatcttcatctagttgaataagggcgaaagtttatagtatta
+gacggactcaccgggatgaccatcagacggattatcaagtacaacacgcg
+aaagctctgtctcattcttctcttcttttggtggacggagcatcgaatgc
+tcacggcactgagttccatcaagacgagtaatttgttgcaagagatgaat
+aacctttccacgataagtatccactgatgcaattggattgaaatcgaggt
+aaacactattgggttgtggaagttcaaccaattcatccataatgctccag
+ttatccaacttatttcctcttgcccaaaagtctgtaagtgtcttcaattg
+atggatattctcgaccttctcaagacgattctgattgaaatccagaattt
+caagaggaagatgctcatcgattccacaaacgtacttgatgccattctga
+gccagataaatctctttcaagttgtgaagtcccgaaatgttatcaactac
+agtaatcgcattggctggaagactgagaactgtgagcttcttcaaatgat
+caacattatcaataagacgaatctgattagcgccaaggcacaatctatcg
+agtttcagattgttgtcgagattctcgatggtcgcaattctattgtcacc
+caattcgagatattgcagctcagtcaacgtatccaaacactcgattttag
+tgatatgttatgaacgacaagagagcctcagcttgtcactcgtcaaat
+>AW057381
+taccaaaaaactcaaaacaccgtcagcgacttgttgcatgcagaaataaa
+tcagaaatgattgccacttatccctttgcgttgagatgttgtggcgcctt
+gtaagccaactcgtgtacttctacattgtacagcttgacaacgattccgg
+aacgagcagccgtgagccaaatagctgaagctgagagtccgatttcgcaa
+ccaatgacaacggtgtttcagactttgattcccttggcggcgaaggctcg
+atcacaatcagccttcttcttgaatcccttggcagcacaaacttctacga
+ttgtgaagagcaattgatcctt
+>AW057382
+cagcttgtcccatttttcttcttggtgtctccaaacaatcttcttctttc
+tccgattcaattgactttctactgattaattgtggcattgagttgtcaaa
+tgaacgacttgttgccataactctcttatccgaaattttgacgacgactg
+tagtgagacgaatgaagacggccatactggcaagagcagtagcaatcatt
+gaaatcactgcacacaccagagtaactccattaacagcatgacagatatc
+cgaattaagtgttccagttgagggagttttcagatattgacatggagaat
+cgaatgatttaatgaagagtgcaattgtatgtgcaagatggaaagctgag
+aatacaaatgcatgcatcatgaagagaagagtgaagacaccaaatgcacg
+acgagttgagcagagtccaatgagtgacatgagtgccaatgcggtcatca
+acatatctctagtatccagagcagtctcatcgagtaccgtttatgatccg
+gcaactctgtatgcagttgagtagagaccactgactaggaagatgacagt
+gatgacagcacatagtggtgcttcacttgacgactagcatcccat
+>AW057383
+tatcatccgtagttgattgatatggtgtgtacttatccgtcgagcttgta
+tccagccttggccttacggcagagaacaatggtgttatcagcgcagttct
+tggcagttgcagaatgctcctttccttggtatccataaccatacttctcg
+ttacggacatccaccttggcaagaatctgctttcctccagcaacgttaac
+aactccaacggagatatctcccttgtggttgttgacatgaactggatgcc
+aagctttgtctccaaactgagcagcctccgggaatgggatccatccatag
+tcaaaaccacgaacactgtccggaagatagatgagaagctggatagatcc
+aacattcttgcaatactcgttgttgaaccacgagaagttggcagccacct
+ttcctccttcattccagacacgacccataacaggttcaccttgcatgtac
+cagagagcaacatattggtctggattgactcctggaaggtgtccagagtc
+ttgtcaagagccttgacangttggcgagtcggcccatgatctccctcacg
+aatgtccatccattcgtctctctccgacacgtgcaactgccttttgtggt
+gtggagcagctccgcacacagcacatggggcaaatggtgtgtcctcccga
+tgcagnttgcgcgttgatgatgtaccatctcgtttgagctgtgggcccga
+gcttgcttaatcacccttc
+>AW057384
+catgaactctatctggacatctctcnttgagcaattctcgctgtttccgt
+tattgccctcaacgagaatgaaaacaattatctgcaagagttgctcgatg
+ctggaatttcccaggaaaccgctaacaagcttgtagacatcacagccagc
+cacaacaacgatggagaaatttctgagaaatcaggaaaaactattttcca
+agaaatcatttctgagactgatgcagctatcaaacaagcaccagctaatg
+atcagcaagcctacaaggccttcgttgaaagcaaggcagctgaattcggt
+caaccagacgagatttccattcaagttgaatctgattccgaataattttc
+taaaactcaagtatcgtctgtattacaa
+>AW057385
+gagatggaattgcctttcgccgaccatcggcggagcctctgcgctttgtt
+atgaaggacagagccaaccagctagacgcactcaacaaccgatgggccag
+aacccccaggcaggtgcaagttacccacccaacatgaaccctgtgacaaa
+cggaccacgagcttggacactgaagcctggtggatacatacaatgggccc
+aggatcccggtagcatcaaatcacctaatccaccacgaggactcgtctac
+tatcagcctgagaactatacatacaagcctggaaaaggtggaaaatatat
+gcatgcgtgccctatcctccccaacgcaaacgaacgactctcgggtcaaa
+gcgaccagtacaccgtatccgacagaatcacggtacactggagcaagact
+gcggcatacagtctaagagtctatgtgctctatccggtgcagacagagag
+cgaggaggacatcttcctatccggcactattgctcaagtcatcgcgaacc
+tttatgacctcaaggaattacgaggaaacaagccgccaggtgtgagagca
+cacagaccgatgctgatgccgcaatagcgtagngcggatccacttagaag
+taggaagcgc
+>AW057386
+ggataaacttgacatccattttgttcctctccagctgatgctactgatgc
+tcaagccgctttcgctgcagtgactcctgctggaactgtcaccattccaa
+tgtcggccaccgcctaa
+>AW057387
+ttgaatgtgaataacgatattatctctgattattccaccagtacttatga
+gccaactagcttcactccaacttctggctccttcacccacacttcaacta
+gcggtgaaaatcttcatacaatgccaaatgagttgcagcctttgccactg
+tgtgatatttcatcggaatacgagtcgaaaaccgagccggacactggaaa
+acaccaggttgacgtgattgccgaggcaaatgggatggagatgaagctca
+atatgactggaatgcttaaagggcttaanttgaact
+>AW057388
+aattccaccgcccgttttttggtctctcattgcggagagcagaattcaaa
+gagccgtacaaaagaagacacaccgacgccaacaattctgatatcatgag
+ttggaattgccttttgccgattttcgtcggattctttgtgcttttttatg
+aaggacagtgccaaccagctagatggactcaacaaccgggtggccagaac
+ccccagccaggtgcaagtttcccacccaacatgaaccctgtcacaaacgg
+accaggatcttggacacaaaagcctggaggatacaacaatgggccaggat
+ccggaagtttcaaatcaactaagccaccacgaggactcgtctactatcag
+cccaagaactatccatacaagcctggaaaaggtggaaaatatatgcatgc
+ggaccctatcctccccaacgcaaatgaacgactttcggctcaaagcgacc
+agtacaccgtatccgacggaattacgggagactggagcaagattgtggca
+aacagtaaaggattctatgtgcttcatccggtgcagacaaaaagcgagga
+ggacattttcgatccgtacactattggtcaagtcattggggtacctttat
+gaggttncaagaatttggaggaaaagannagccggcaagtggaaaagtac
+accgattgagctgaatgggcaataaagattccggaccatttgaagaaaga
+aaagccg
+>AW057389
+aattccgaacaactcagatgctgccatgatagaagacgataaaagtaaac
+gagagaatgttccattgacagttgtctactcgaatccatcaatttctgta
+tcaaagaacttttccactacgccacttgctgagaatcaagctggagcatt
+tgataagctggatgatgatgaatttgagaagatgacgtcagatgttgacg
+aaaaggagattctgaagttggcgccacgtcttcttaaagtagccaaaaag
+cacgcggcaatcgagaaatgtctgacacccagagagaacgaagtacttgc
+caagttcttctcaggaaagcagaagctggattcaaatgtattggctgtct
+tggattctgcattggataagattattgattatttgcaaaagaataattgt
+gcggttgatgaggagacaaaggctgttatgaagaagagggataagctgaa
+agcagcaatgatgaaagagttcttggtgtcaccccaatatcttccaaaaa
+catggactgcaaaattcaatgaatggaaatctgaagccgaaaagcaaaag
+aatggaatcaactggttccgtgttttcttctcgtatccaaagcacaaatc
+atttgaagatggaccagaagacacttttggaaaatttccgccgtagnaaa
+tcgtggtattctgantgggacttattctgggacccggagattgtaaaa
+>AW057390
+acgaactgatctctgtgatcatatcataataatggctacaatttgtgagc
+ttgtccagttgccagttggaagtgaatgtggaaaatggacaattttgaag
+aaactcggagaaggcgcatttggtgcagtctatcttgtcagccaaaaaga
+aaaacccaaggtggaatacgcgttgaaagttgaagcagagtcggatccat
+tgggcttgctgaaaatggaagtggctgtgcttttggaagtgaaaaagcag
+aaaatcgttggacgccactttttggagttggctgacagaggaaacctgcc
+acaaaagttcaattacatggtgatgacgttggttggaaaaagtttgcagg
+atctccgcaaaactgctccattcaacaaattctcaatgggaaccgccatt
+tctgtagccagacaatcattggaagctgttgagga
+>AW057391
+aattccctctgaacaacctcaaacgaaagacaacgatggcggacaagtcg
+gcttacatgggcgctggtggctatggatcgggctacatgggatccaatgc
+ttcttcgtcaggttatgcccgcgaagactatgcacaaggaggcaatggtg
+gtggtggacagcagcagaaccaaggatccggaggaaacaccaacccaggt
+gggcaggtcttcaaggcccgtactgaccagtcctgttaccttggatcata
+agaaaatcgacacaagaagagccagtcgccc
+>AW057392
+tcaccgccgaacagctcaccgatcctccacaaatcccgacggccttgtcc
+aactcggtcaacactgcgattggtggaactccatccgactttgagtcgaa
+ctccgggctatctgacacctcggcaggatcgggccgcgccaactcggccg
+tttccgatacgaccacagcaatgtcggcgaacgtctccggagattattat
+gaatgatcttcgaaaaggaagcctgtttggccgacagatagggagtctgg
+ctaccacggatggagggccccagagtgttactatcattactttctcgaac
+actactaacatgtcaactgcttcccagcccaccagcctcgatgacaaatc
+gcaaaagtcgcagaaaactggatcaatgaagactggaattccgatgagat
+cgcctggatcttccatggctggcacaggtgcgatgtctcgtaaaaagtcg
+tcgcaaaagcagatggatgctctgaagatagagcaagtgccggctgctcc
+cgatctctcaatataattcaatacatctaagatatcgaagagtcgtaaag
+gc
+>AW057393
+tcgatcaaatcaattcgaaaaaatcatgccgtctttaaaaggaggatgat
+gtaatgaaaaatgtaactttcgctgaaggcaaaaaatttggtgactggaa
+aatcggcaaaacgatcgatgaaggaggatttgggaaggtttacattgcaa
+catcaatcagcgatccaaagaaagtggctgctttgaaagccgaatcaaat
+gaaatcgaaggaggatctgcaatcaaattggaggcaatgatcctaaacaa
+actgaatgccaatggacccgttccccacattccagtcgtccactaatgcg
+caaaacgaaagctctactgctacatggtgatgacgttgttggggagaaat
+ctacgaaaactgaaatccacaaatctcgtagtcaacaatggattctcccg
+tggaacgtggagccgaatcggaattcaatgcctgtatgcattgaaatatg
+tgcatgacaatggatttattcatcgagatgtgaagccacaaaacttcttg
+ctaggaaatgagacggatagtgaaagagcaagaattgttcatatcttgga
+ctttggtcttgcgagacctttcgctgtttttcatgcccgagagaataagt
+ggatcgcacgtagagctcgtggaactgcagagnttcgtggaactctccgt
+tacacgtctccgaatgttcatctncgaaagtaacaaggacgggttgacga
+tgtatggtccctgctatatgtcatcattgagctcaacggtgataagctct
+tccatggcaaaccgattctcaacgtcgacgtgtggagcaaatgaagctga
+acttgccggcgaaggtngtctgtcaatatgccagcctgtttgataagtga
+tgcct
+>AW057394
+agacaagtacgagtctcaaaactttctgtttcgagtggaaggagccatgt
+tctgcgcgggaattatcgttgctatgattatgctgttcgtcatcattatc
+tacggaataatcacttcaagtcaaactggaggacagctcaacagatacat
+ggccccactattc
+>AW057395
+tggcctgtgctaaacccgcttccaaccacctattcaattcctgtacgatc
+atccagactcctcatttcgaatgatggaattctagaagattgttgcttta
+aagtctgaagaggaaaagctcccatcactattcgaaaatgttgagggact
+gttgtctgtcccatctttaacttttggtacgtgggatgatgacaccctgt
+ctggtgtcacatctgttaatctagataagtctgatgaacaactctgcgag
+cgtgatgatgactacaccactgattggagtgctaat
+>AW057396
+ttgtctgcaatggctgccgcccagaatacgctgccaaagcgaatgaagaa
+gaagatgcaggtgtagagaatggctcatgccacagcttgcaaggttgcaa
+agagagaggctcgcgtcgctgaggaagcatctggaaaatcaactggtgga
+tctactcgcggagccaagtgatagccgagccacaacacatg
+>AW057397
+agattcaaacgccagagttggaagttccgtcaatcaacttggattcaagg
+gacatgggggaggacttggtccaagaatcattatggctggaactttgact
+gctcttcaatggttcatctatgattcgatcaaggttgcaatgaatcttcc
+tcgtccaccaccaccacaaatgccagaatctttgaagaagaagcttggaa
+ttccgggaaccactgaagttgctccagtcgctgaaaaagttgctgctcca
+gagaaaaactcaaaatgtgagaaacccagaaag
+>AW057399
+tctatcgaatttattcaagaaaaagttcggaactttatgataatattgca
+tatcttcatcatccaagtggtgtcaccgttgtcgttcttcgaaatattcc
+tgaaagtgaagttgtcgaagtggattttggaacgacgaagaagcacggcg
+cagatcgttcaacaaatcaagtttctggaaaaggaaaaaaaggagctctt
+attctacaacccgactcaaaattgtgtacattcaaatgcaaagatggttc
+agagcctgtcctgagagcaggtcgtcgtggaactcttgttgagatgaacg
+atcgcctgaaaactacaccagattttattagaacagcacctgataatcag
+ggatttatcgcgattatcacctacggagccggagtacgtgaaactgaagg
+aatgggagatgaccttcctccgaaaagacttttctta
+>AW057401
+taaatcatgttcaagtacgttctcctcgtcaatancttcatcgccctcat
+cgagatggcttcagccgatttttcgtgctatttctcggattccatctgca
+aatccatcacctgcagaaactgcaaagtcgccacctgtatcaccggagac
+tgcgtctgcaccctttgctaattttttgaataatttttttatcttttga
+>AW057402
+tctgaagcncttcatcagatcgtctaaataactgccagttgaacaattag
+ttgatgactccgggatgctcatttcgcgttgcgtcgaga
+>AW057403
+agctatggcttcctcgttcgacaaccaaatggatcaggatggaatgtgct
+ccgtgtactctgctcagccatcggagacaaattgctccatcaatgaggtg
+cttgccaaagaaattatcgctgtcaatgagacgccagatgatcaagctga
+ttcttcgatctacccaatcccaaaatcagaaacaaatgtgtcagctagcg
+aagggttccagccatgtcaagatatcaatcaattcaatttgtccggttac
+tctgctccaaaatccgagaccccagtgaccatgaatgagaagttcgagcg
+gtgcanagacttgatgaacgttcttgactactctgtctactcaatgccac
+catcagaagcaaatgttacaatgaatgttgcaagcttctcggagtacact
+gcccttgcctcggagaccaacgtcacaatggctgatgttctcaagaacgt
+tgctcaggatttggcgtcagagcacactgcaaaatcagctcatccaacat
+tcgacaccaccgcctacgtagagcgccttcaagccgagnctcggattccg
+gatagcaaagtgattggattggagtgcagcaatttttcgaatgcgaagat
+cattgattcaattgaatgccttcatcagttggacanattcaaaccaatcc
+ngtggattcgatcagaatnctgattttggaaaaactgcccaatctgtgga
+aagctacctnnctgcgtcatncaatgcagntcatncatcacagatctga
+>AW057404
+agcttccggtgttcgtcaggtctccaanctactcgattaaaatttatgga
+gcaattcaacatgacttcggcatgtctgaagatgaatcagtccaatatcc
+ttctccgatttctccatcctcggattcttcaagtgaaacaatcacccatg
+tctcccctggatatctccttcaggaagtcttcttgcagagagcagtcatt
+gaagaacaaataagattattgatggttttcatgaaattcagaagctcaa
+>AW057405
+ctatccgacagcgtcttttgatggagcagagtctgtcgatatttctgtgg
+actaaggaaaaagtccagtgagcacacacttgtcgttggcatcaaatcca
+actgaattcaagttcatggatgaaattgcacttttgaagcgtggccgcat
+ctacaaagacgctccaaagcatccgtacaatcgtcgtggccagcaaccaa
+tgatgaagaaaggtatattgtgtgacttgggtaatttcatcagcttcttg
+cctgctagaagtccatctatcatgacgttgatgggcggagttccagaggc
+cgaaaaagag
+>AW057407
+aaaaataagtctacaaatgatcaacatttatccaccatccggcgactacc
+cagcttctggtggttcatcaactcactacattgtctccgaatcggaatct
+cgtttggcattcaaagtcaagtcgtccaacaatgaatcgtatcgtgttcg
+cccagtctatggattcgttgatgcgaagggaaaggctaagctcgaagtga
+atcgtttggctggaccagcgaaggaggacaagcttgtcattcaatacgcc
+gaagttccagctgatgagaccgatccgaangctcccgttgggggcttgtg
+ctcaacaaggagaagtcgttgtcaagatggttgctagctaagaa
+>AW057408
+ctgccgaagtactgtctacaatacgaagattgattactctgatatgaaag
+attccgttgagctacttcgtcaggtgcttgtctacaatccgagcagacga
+ctctgtggaatagaatttctcacgaatccattcttcaccgtgttgttcaa
+cgagaagactgtgcgtttcaataaaaagaagatccaatgcgtgtcagctg
+tcgatctacaagctgtgaaatcgggagacgtcacactgacaaatgagtct
+gtagagcactccgacctaatc
+>AW057409
+aggtgatgtctacaagaaggcagtgcagttctactcaaacatcacggctc
+caagatcaacatctgtgcttgctccagtgatgtcgtcccttgaagtctac
+attaacaccaccacgacatctgcttttgctccagctcagagcattcaagn
+gggctgatattcttgaagaagatgcagatgcaatccgtgtgaagtcaatt
+agaatggctggattcattgcccaatgcatcatctttttatttgtctacac
+catcgtcacgatggatgttgagatttggaaaattaatatggactggttga
+aaattcaatattttcagcatttcgaagactccgctgctgaagttccggtc
+ttcaa
+>AW057410
+gactaccgtggcaaaagtcggatctaaaggttctaggatctacaaagaag
+acgattcaagctgtccgaccaacggtccgcacccaaggagcggttactcg
+ctctcaggctgctcttcgtggacatatggggatcactgactcttcgactt
+cgaccagttcatctcgaattccgaaggagaagttgaaaaagaaggcatca
+tctcgtagccgttcaagatcccgttcgaaatctactcgccgctcacgttc
+aaagtctactcgctcacgttccagatcgagatctcgaagccggagtagca
+ctcgtggaaagaagcgtgctccaaagaaggcagttaccacgaaagccgct
+cgatctatctctcccgtcaaagtgaaaaagacagaagccatcaaatcgcg
+tggaagcagcaaaaccgcccgtcgtgtgtctgcggctcacaagtaaataa
+tcgtcgcttcttgatggc
+>AW057411
+tcgactaaccgtctccacttttcacttgcacaaatcttcatgcaaccaat
+caacgtcatgctcgctgttcttctcgccttggcttcatttgctcaaggag
+gcagatctgttgctccggctggtgcagtcactgaaccaacagttactcaa
+gctgttccagaaggatcaggacttagttcagatgtcactgatcgtccaaa
+catcgactccactgatgttgtatcaaatgcaacttcggtggaagatttgc
+ttggaagttcaacaaatgcaaacaacactggtacattcaaactcttaaga
+cctttgtatttgctccaatgatgattcttgctttggtgc
+>AW057412
+ttcgaagaagcccaacaactaaccccaacaataatgtcttgcgttcaaca
+acaacgacgctccaccggactccgcatcgccgagcgattgaacaagtaca
+tcactcttccaaatcgccacagcttcacagtggattccaaggatgtgttc
+caacgtggtcaggtgctcagctacatccgatccaaagcacccttcctgct
+cgaacatatctccgaggcaaaggagcgattgataaccgtgacgtcacgcg
+gtttgatgatcatttacgagaatgacgaccacggatttggtgattgattt
+gcgatcggccaggaatgttctgtgcactgctgatcgttcgaaaaagcagc
+gtcatttccgctgtcacatcaaaatccgcatgcaacgtggcaatgttcac
+ctctttgtcggccataatgatgttcacaagtggacgtgcgcgattatgag
+agccgccggaaagtgcttgccgtctacagagccccgcgatgatggaagcc
+ttaatgtcgcgatgatgacggcggtggaggattctggaatctttgaagag
+atgtcgtctacatcgtcagcatcttatgactatgatgaggatgatgaagt
+cgacgaggtgaagcccactgccatcgagcaaataccacttccccatgtct
+ccgtnctgtctcttcgncagaaactcgagaaggagctcgtnctgaagcct
+aatgagcaagtgctcgccgagcaacaacacacagccaactgcgatgagcc
+aagctntgtctcttgaatcttctgcaccccaagagcagttntcagcgatg
+aaactgtagtctgctggagcccacgaatgccagatagaggagagaatacg
+aggcnca
+>AW057413
+ttatcgaaacttgtgaagaaggtctgcacattctgccgcaaagaaattga
+tgccaaagcgaatgaagaagaagatgcaggtggagagaatggctcatgcc
+agagcttgcaaggttgcaaagagagaggctcgcgtcgctgaggaagcatc
+tggaaaatcaactggtggatctactcgcggagccaagtgatagccgagcc
+acaacacat
+>AW057414
+tcccttctcgccatgtttgtggctcaggaagtcgccgaggaagctcttac
+ggatccagaagcagctgaagcggataatgccaaaaacaatgcaacggatg
+ctcccgctgatgctacaccgggatctggatcagatgctccagctgctcca
+gaaggatctggcgccgaagccgaagccaccacagcgaagagttctactgc
+tgcagtgaccatcattggagcaatcgccgtttttggagttgcccatctcc
+tctgagcattcttatcacttc
+>AW057415
+ctattccctacctcgatcaaccatgggtttggacaaaggaaagcaacaaa
+ctgccaagaaacctggtttcaactgcaaaccgtttgagttcgaaatctct
+tcaacgaaatttcaaattcccaacgacaagccattgaagtacactttgaa
+atgcactgctgatgagaaacaggatgttatcattcaagttcattcggttt
+tcttcgaaattgttggtgcaagacgtaagcatggagtcacttctcaagag
+tttcatgttcttgggaaaaaccgacgaattgacctttcgaaattggtctg
+aaggatttgaccaatgtttcgtacgataatctcgagcatgcgcaacttgc
+gtctgccgctggttttatcacgttcacacactacaagtcttctagagatg
+atgattccgatgctttatggggtcccaacaagaacttgctcatcgtcatc
+actgacggaatgggagaagtaggattctggaagaagaattgtcgttgaaa
+ctgaaccgatgagagaagatgtgaagaagatttgttgtgaacttgaagag
+a

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/4.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/4.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/4.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1106 @@
+>AW057442
+tgctgcatctaatggcaccactgatatggagcaggagcattatctgcagt
+tggagttgaaccagtgtcttcagttgatccttggaccacaaagacggcga
+gaa
+>AW057443
+tgagaaggatgaaccgtcagacaactgtcttttctcctcatcgtctcgat
+ccttgccgtttttgtccaccatggatttgctgctgctgaagaagagaaga
+atacagcttcagtcgtcagccctgctccggactctgaagcagcccaacct
+gctggaaacggaaccgaaacaccaaaagatgaggtgaaggatgaggcacc
+aaaagaaggtagtgaaactgaagcttcaccagaagccaagacaaaaggat
+ctatggtattccatgctcttggagccatttccacaagtggttctggccgg
+cattatgtgaagaagtctgccgaa
+>AW057444
+aggcgatgtctacaagaagccagtgcagttttacacgaatgtcacagtgc
+cagtgttcgctccagtggtatcacctctcgaggtctacacaaacaccacc
+aaggcaactgcttttgctccagctcagaacatcaaagtggctgctattct
+tgaagaagatgctgatgcaattcatgttaagtcaatgagaatcgctggat
+tcattgcacaatccatcctatttttgtttgtctacacaattgtcactatg
+gatgttgagaattccgaagaatctgaagcagaggttcccgtcttcaagct
+ctacagagcccgtgagattcagacatgcccactgccagctcgacaaaatg
+t
+>AW057446
+tacagtgcagcttctggtgctcttcttcaagttgccttcaagaacttgac
+tgctcagaatagaatccacatgtatcagattctcttggtctcttcgttcc
+tcttctcgaaggccctcgtatacggattgttcggatctgctgaacc
+>AW057447
+atcaatcaactacggatgatatggtgtgtacttatccgtcgagcttgtat
+ccagccttggccttacggcagagaacaatggtgttatcagcgcagttctt
+ggcagttgcagaatgctcctttccttggtatccataaccatacttctcgt
+tacggacatccaccttggcaagaatctgctttcctccagcaacgttaaca
+actccaacggagatatctcccttgtggttgttgacatgaactggatgcca
+agctttgtctccaaactgagcagcctccgggaatgggatccatccatagt
+caaaaccacgaacactgtccggaagatagatgagaagctggatagatcca
+acattcttgcaatactcgttgttgaaccacgagaagttggcagccacctt
+tcctccttcattccagacacgacccataacaggttcaccttgcatgtacc
+agagagcaacatattggtctggattgactcctggaagggtgtccagagtc
+ttgtcaagagccttgacaagttggcgagtcggccatggatctccctcacg
+aatgtccatccattcgtctctctcgacacgtggcactggcttntggtgtg
+gtggagcagctccgcacacagcacatgggcaatgtggtgtcctccgatca
+gtttggcgtgatgatgtacatctcgttgagctgtgcccgagctgcttatc
+acctcccattgacgagaccatgcttctcagtctgtgt
+>AW057448
+aattccgaattccttcgtctcaactgtccttctcctctctgttacaattg
+ctttggtgtctggatatccatcccaactccaaacaacttgtgtcacaaaa
+gctaaaagttgcaccatgttctttttaaatggagtatattgcaccgagtg
+cacgtattctggaactcttgaactgaaaattggctcaacatgtacttttt
+ccatttacgagaaaaaagtggcgagccagccaaatgaaaattcacaaaat
+gaagtagctcaatgcaaacagtcatcatgctactcaaatcaattttgtac
+cagttgactgtgcggctgcttttggaaatgaatatat
+>AW057449
+ataacctctcccaacaacacctcaagatgaatgccatctacactgccgtc
+cttgttgcttcaactctcgcctacactgcaatggcttggattggactcag
+cattgaagccgccaacgaggatatgatctgaagtggcgccc
+>AW057450
+tcgatcaaatcaattcgaaaaaatcatgccgtcggaaaaggaggatgatg
+taatgaaaaatgtaactttcgctgaaggcaaaaaatttggtgactggaaa
+atcggctaaacgatcgatgaaggaggatttgggaaggtttacattgcaac
+atcaatcagcgatccaaagaaagtggctgctttgaaagccgaatcaaatg
+aaatcgaaggaggatctgcaatcaaattggaggcaatgatcctaaacaaa
+ctgaatgccaatggacccgttccccacattccaatcgtccacttatgcgc
+aaaaacgaagctctactgctacatggtgatgacgttgttggggagaaatc
+tacgaaaactgaaatccacaaatctcgtagtcaacaatggattctcccgt
+ggaacgtggagccgaatcggaattcaatgcctgtatgcattgaaatatgt
+gcatgacaatggatttattcatcgagatgtgaagccacaaaacttcttgc
+taggaaatgagacggatagtgaaagagcaagaattgttcatatcttggac
+tttggtcttgcgagacctttcgctgttttcatgcccgagagaataagtgg
+gatcgacgtagagctcgtggaactgcagaagttcgtggaactctccgttc
+acgtctccgatgtcatctccgaaggacaaggacgggtgacatgatggtcc
+tgcttatgtcatcatgagctcaacgtggaaagctctccatggcaacccga
+c
+>AW057451
+aaaaaattaagtagttcacggacaccatctccaacacaccactggccagt
+tctatcaatcagttttcggttcgtttaaactcgaagaacagtcttgacca
+acacgagatgtattcacttttgacgctcctcttcgtcctcttcttctctg
+gaagcactctgctcgttcaatgtggtggaaagaaaaagggagcaacttct
+gccgaaggaaaatcttcgacgatgggcccggctcctggaggagctcctgc
+tgctgcttccgctcaaggagaacctgaagagaaggagtaatg
+>AW057452
+ttttctcgtggtgatcccaagcttctagtatatgacgttcaggctctggc
+tcctcggtcatttgctactgttccacgtgttattgataaaatccataagg
+cagtcatgaagcaagttcaggataaaccactcaagaaaatgattttgaat
+gcggcaatagcctataaactataccattataagatgacaggcaaagctac
+tcgtaaaacatgggtagataagtatgttttgcataaaatccagatgcttc
+tcggtcctaacatcagacaattgattcttggagctgcaaaatcagacgtt
+tccgcaatgaggtttgctcgtggggcttttggagttgaggttctggaagg
+atacggacaaactgaaacatctggaccaacaacacttcaattggttggag
+atacacgtattggatgtgttggaccaccgatggcttgtgcgatgataaaa
+ttaattgatgttcctgaacttggatattctgttgttaaaaatggtggaga
+agtacttgtgaagggtcataacgtcacttcaggatattacaagaatccag
+aagcaactgcatcatctttcactgaagatggatacatgacaactggagat
+attggaagattcacttgctgaggatctcttcacattattgatcgacgcan
+acacgttntcaacatgccacaaggacagttngtggctcagatctcacaga
+atccctctacactttctcgagtttgttcaacagattacgttcatggcgat
+actgacaaccgtggcttgtagcaatcgttggtccagatccagag
+>AW057454
+cgtttatactgattgcctttgttcttcagtttggagaaggatcaattgca
+gttcaagaagttaaggatggcgaaaaagtgcaaattgaacttttcaaagg
+agccaaggcaatccagagatccgttgacgctggcgaacagattttccatt
+tcgaaggagaaaacaaaggagtgtttgtggatgctaatggaaaagctatt
+gactcgtcaaattatgaagagaataacggacatttggtcatcaaaaagct
+tacaaaggctgttgtttgatcatattctgaatattcaacgaaaattatca
+aaacgaaaacggatcatggattttctggagttgccgcgccagttctcaaa
+ctttctcttc
+>AW057455
+agttactcaagatgctgaaagttcttttagcaatcggtctactgtgtttg
+atagatgtttcagctcagttccgtgctgaatgtgagcatccgcttcattt
+tggagttcagcaatgcaccaacacttcagttgtcagatatcacttcgaca
+tggaaagcaagaaatgccttgctttcaaatacactggatgtggaggaaac
+gagaacaatttcaaggattactcagcttgctcaaacttctgtattccaat
+ggactatttcacatgcccaggtggcagtgatagtgtcgctggaaaggaac
+gaaagagccactgtggaggaatggaacaattgaagtgcgatggcccgaat
+actttctgcttgaatggcccattcactggaatctgttgcgacacgagaat
+cagagacaaaatcgatgccgactacgccaaggagtgtggaccaggaaagc
+tgaagcatcaaattgatattggaggtgtcaagatcccaatgttcggaaaa
+acttgcgattctacattctgcccagcctatacaaagtgtcatcaaggaaa
+ttatcntgcttactgttgtgcttag
+>AW057456
+tttgagatgtctaaatacgcaattctctgccttgttctggtcggcaccgt
+tgcctctttggactttatcggtcgtacacaatctgctgctataaagggaa
+gattagtatgcgagggaaagccagcttcaggagttaaagtcaagttgatg
+gagtccgataacagttttggacctggattccttgacagcgatgataagat
+ggcatcaggaaaggctgactcgcatggagaattcaatttgagtggatcta
+ccaaggaaatcaccggaatcgagccctatttggtagtggttcatgattgc
+aaggacggaattacaccatgccaacgcgtgttccgtgtcaacgttccaaa
+atcgtacaccaacagcggaagctctgccaagaaaacctatgatgctggag
+tcatcgagcttgccggaaagtatccaggagagaccagaagttgcctcaac
+>AW057457
+tttctcggaacaactccaagcgaaaaaaattgttgacaagtcggcgtaca
+tgggtgctggtggctatggatccggatacatgggatccaacgcctcatcg
+tcgggatatgcccgcgaagattatgcacaaggaggaaatggaggcggaca
+acaacaaaaccagggaaacggaggaaacaccaacccaggaggacaggtct
+tcaaggcccgtaccgatcaatcgtgctaccttgggccataagtagctgct
+cgaataatgtgaagactcagccag
+>AW057459
+aattccttggaaggttctagctatgaacgtcaccagtgtcacttcagagg
+atggtgttaaagaattcgaaaagattgttgtggaacctgaagatatcgaa
+tatgttgagattccggccgatgccaaaaacgttgacttgacgcgtcaccg
+tatcaaagaaatcggtgattattcgtggctcactcacgtcgaacacttct
+cgtttcgttggaatctgatcaaaaagattgaaaatctggattgtttgaca
+acgttgactcatctcgagttttacgataatcaaattacaaaagttgaaaa
+cttggatagcctcgtcaatttggagtcactcgacctgtcattcaatcgta
+tcaccaaaattgaaaatttggagaagttgacaaaactgaagactctcttt
+tttgttcataacaaaatcactaaaatcgagggtttggatacgttgactga
+gctggaatatctcgaattgggtgacaatagaattgcgaaaatcgagaatc
+tcgacaacaatctgaaactcgatagattgttccttggcgctaatcagatt
+cgtcttattgaaaatgttgatcatttgaagaagctcacagttctcagtct
+ttcagccaatgcgattactgtagttgataacatttcgggacttcacaact
+tgaaagagatttatctggctcanaatggcatcaagtacgtttgtggaatc
+gatgagcatcttnctcttgaaattctggatttcaatcagaatcgtcttga
+gaggtcgagatatccattcattgagacactacagacttttggcaagagga
+aatagnggattactgagcattatgg
+>AW057460
+gcttcttgacatgataaccaagacggaagatcacttcgctcagagaattg
+tgtccagtgtcgcggtaaaccgaagcttcacgtgaaaacttcagaaattc
+attttcgtgaagtgggacaaccattgcaacttcgccaatcaatgcttgaa
+cttcatttgccgtttcgttgaaagcatagatggctttacgatacgtttcg
+ccttttgctttcacctctttcgtggtttttgaagacttcacatggtgacg
+agaatcatcgattgcatggaccaagttattcaaattttctacgttcgaat
+tcaatatctcactttcctctccagtgtacaaacgcatctttntaatcaga
+tgagaacgagttcttttctgcacctggatctccttttgagcaattcttcc
+acaggcgctactgtatttttccagcatcttagaatgttgcccatcaaact
+gatattttctcacataattcaaccatcctccaagaacttcccatggattc
+tcgccgggagcacacaaaactgctttcttgccattgccttggcatgacat
+gacttgaagtcgatacattcacattcgtgatta
+>AW057461
+tcatacacatcctcctatggcttttctttaaattcaccggctactttctt
+tcctccgaagaaaatggcaatcgcgggcaaggcggtgatggcgagaacga
+aggaagcacttatcacatttg
+>AW057462
+ttttttcactaacaaactttttattctttacatttctacaatacgagaac
+gcatttgtggatgctatttaaatgttctggctagatttctcgtgtggaaa
+ctgaataaacttaaagcttagacaacatctgcttcgcatcatcggcaaac
+tcttgttcagcgtcggaatacggttcgatgtcacacacttcggtaaggta
+cttgcgagcgttgttcttgtctttaattgctacgtagcatttggacacat
+aaaatgtgtnctccatccattttggattgaccttataagcagcaaggaaa
+tcctcgttggcctcttcataggaatgtgatggtggttgctgatagaacgt
+ggcagcaagcttcttctcaagccatgtcagagatgcaaccgagtacttgt
+aacggccacgcaaatggaggagagccgtatcctttggctccttagcaaga
+gctttgtcgagcaattccttgaacttcttactgcattccaacttttcttt
+cgttgccatatattcggttgcttgtccagtcaacacagcattccacttca
+gcgcctcgaaatggtttggatccttctggacagcttcttcagcatacttg
+agtccttcngtgacacttgccttacgttgagctnttggaacacatgcaga
+cttctcatgaattacttgagcaagtctncacaagacttccaccgaacgat
+cacatgtcgagacgcgctttgagcaaatcatatccttgatcgcgntcttg
+ggtccgaagtcttgtcattcatcaaactcat
+>AW057463
+ttttactcaaaactatctatccaagttaatcagtagtgttagttctagtt
+aagttattaaggcgcacggtctgtctccttgcttcttctctttgtatccc
+ctttctcctttttcaaaacttcactttcatcaataattggttctttagaa
+tacagttttccaatttccacgtactctcttctcttccgatccttgtcaaa
+ctttttcttcgggagctcatcttctggaactactttcacatttttcgatg
+gaaccaaacgggaacgagttggcttttccaccaaaagattagcgtactcc
+gaactgtatttccccttctttttcttttcaagaggaacattttctcgttg
+agtatcatcgtcctccaaactttgttgagtagtcatggactgggtccgag
+agaattcaacggtaggcatggaacctttgctcttgtcgtcgtttgccttt
+ggtgcctttcccttttgaa
+>AW057464
+tacaaaccctgcatgcctctgcaagattcttgctgatgaactggtactcc
+acacgatcctggtaagctgttccgtgtaatgtatccataactttactgag
+ataatacgtgtgtctgcgccccgcgaacatcgaggaccgctaacttctac
+ggcatttctctcttctccttgagcacctcatgcttgtcagagagctcacg
+ccaaggtataccgctagggactgattagcatctacacgcacttagtacca
+gctctcatagacatacttcggtcccatatcgctgttacgaccgtaggcga
+tcggatggaagcctaccgctctcttctatgcagctggttctgctagtgtc
+tccacctcattcttcgttttcacgcacttg
+>AW057465
+tcatgtctcctgcagcaagatcctcgtccactgaatccttcttctctcct
+ttggttgcttctttctttcctccacagaagattgccacggccggtaagac
+ggtaacagcaagggca
+>AW057466
+cgcggaccgaccgactatcagacgggtttgctaccactgtatgttcatga
+agattacgaatatgggattctgcttgaataagtgtttggtcggctttttg
+aacaattgcattgaatgcatgtgtggcgcctaagtattgtgccat
+>AW057467
+gtggaaaaatccccttcatttctaaaaccgttacaatagtatctgtttaa
+cacatcaagtataggattcattgcccgacaagctccagtttctaataatg
+ttcgatgcacgaggccaagccattgagcaccggcagctgatactccgact
+ccattgacatctggttgaccggcatgaaggctccgtagctgttctgagca
+actggggcgatgcgctcgacaacaattggagcagaggcttgaactggagt
+ttgagcattcttaataagccctctgaatcttgaagtaacagctggcgagg
+ctggaacttgagtgatgtcctcac
+>AW057469
+tttgtacgcaccggatgaagcacatagaatactttactgttcgccacatg
+cgtgatacagtatcacgcagtaccgacggatctgcgtactgttcgctatg
+cgccga
+>AW057470
+ttttggtaaaaactaaaagatttattactgaaatccatgggggtacattt
+atatcaggccgcttgcacctctcattgcctcgaagtaggcgatccaccac
+atggagttgcttcaagacacacattcacctgactcttcttgttcttctta
+cgagactgtgaacggcaactcgacgatccagcagactttgcacttttctt
+cgaagaacgacttgaacgacgggatgctcctctaatagatatcttctctt
+tatgctcctcttcgccaccgcaaatgtagttggacaaccagcatccacct
+ttgttagtgttgcgacgtttgtattcctcgacaaatcccatcagatttcc
+aattggagcagcggtcagggcgtctttctcaatgttaactgcagtgatcg
+tgtgcagctcttcaattccaacatggtgatccgattgtttctccacacct
+ggtacttcctggcattgtggctacaacatgtgaacgctattactgacatg
+tggtcttgactggaagtgaaaatcgcttctgatgctgaaacctgctgaga
+ctttgatcgat
+>AW057471
+taacctgctccttcttcttgaaaaccaacgtagcacgacgagtacggtcc
+ggagagccaaacagtgagcgcttcttcttcaaatggtccggaatcttttt
+gcccatcagctcaatcgtgtactttccacttggcggcttctttcctccaa
+attcttgaacctcataaaggtacccaatgacttgaccaatagtgtacgga
+tcgaaaatgttctcctcgctttttgtctgcaccggatgaagcacatagaa
+tcctttactgtttgccacaatcttgctccagtctcccgtaattccgtcgg
+atacggtgtactggtcgctttgagccgaaagtcgttcatttgcgttgggg
+aggatagggtccgcatgcatatattttccaccttttccaggcttgtatgg
+atagttcttgggctg
+>AW057472
+ttcactccggaaatgatttattggataaagggtggctagtgttatttctt
+tgagcttttactcctcttcaaagtgcgatgatcactgattgtcgatccat
+tatccttttctgcagttgccttttccattattttagtatccagaaacatt
+ccctgacaatcatcctctctttttgtctcttcaaatgattttacatctcc
+gggtcccagaataagtcccatcagaataccacgatttctacggcggaaat
+ttccaaaagtgtcttctggtccatcttcaaatgatttgtgctttggatac
+gagaagaaaacacggaaccagttgattccattctttggcttttcggcttc
+agatttccattcattgaattttgcagtccatgtttttggaagatattggg
+gtgacaccaagaactctttcatcattgctgctttcagcttatccctcttc
+ttcataacagcctttgtctcctcatcaaccgcacaattattcttttgcaa
+ataatcaataatcttatccaatgcagaatccangacagccaatacatttg
+aatccagcttctgctgtcctgagaagaaacttgcangtacttcgttctcc
+tctggtgtcagacatatctcgattgacgcgtgctatttgctactataaga
+gacgtggcgcaanctcagatctcctatcgtcacatctgacgcatctcctc
+aactcatcatcatccagctatcaaatgctcagcttgatctcagccagtgc
+gtagtggaaagttccttgatacagaactgatggatcgagtagacacatgt
+catgggaccatctctcgtttattttatcgtctgtttcttggc
+>AW057473
+aagtattgaggatcgcttgcttggcttgtgcaactgcgttcatgtcgagg
+atctcttaccatggaacacgtccaacagccagcttaacttgcatgtatag
+ccagacctccgcgtgattttttcttgcaagctgctgactcttatggcaag
+cgattggcgcatagcgaacaggtccacaggatccggcggcagctattggt
+ttgcggatcactccatagctgcgcgtgaacttgcgaccatggcgagagcg
+cgatataaacttacgaagtcattca
+>AW057474
+gatttggtgttaaccattgccttttcaaggagctcgtagatggtcgtgta
+ctccggagcatcgaagaactttcctttgtcaagaatcgggaaaacttcag
+tgaattcacgcggacaacctccaaacagacacctaagacgagtcgtttta
+cattccttcttgaacactcccacatcatcactttcagtcagattacgcca
+gggaagacgtccacatgtcatttcaacgaccatgtagagccaactttcaa
+tgtcatccttgcgacactgctcacgttgaatatggcaagccaatggagca
+tacttcaccgttccacggaatccagcacgagcacgtggattgcgaagagt
+tccgtcttcacgggcaaacttgcgtgccattccgaaatc
+>AW057475
+ttcataattatttattaaacatttaataagagctacaatgtttcccgtat
+ccaagaaacttctcgaaccagttgtagtcttcgcctttaacagtatccat
+ttcgaaaactccaatgtcaaaaatatgaacatccggtcgacatggattga
+cataccattccggaatgggaatctttacaagattcttggtacatctctcc
+ccattcatcacttctgttccgcaaaaattattgatccatagatacggttt
+gaatccaaaattcagcaatggaatgtcgactagagtctgaaaagaaatta
+tgtaatttctgtgttttcccacgctgtaactattcgaaattaatgggaat
+ttgtagaaataaaatttggtaccacatcattcccgttagtcgccatctcg
+aatctcccatcgactcctgttttttgagcggaccacatttggtcgagagg
+tttgggctcccaaatctgaaccatctccgcaacatatggtcttnctctgc
+acattagctggccttttacgtgaacagaagcacacgctgcataacagtag
+tttggagccagaagacacagaagaattagaaagagtgtcttcat
+>AW057476
+ctcatccttctcccaatcgtactttgaactcggtgttatcttctcgttct
+ccatcacttgccagaaacatttgaagatcatgtgataatccggtctttga
+tagtaatcgagagaggccaaatgaggcatcactttatccatacaggctgg
+catatttgacatgacaaccttcgccggcaagttcagcttcatttgctcca
+cacgttcacgttgagaatcggtttgccatggaagagcttttccaccgttg
+agctcaatgatgacatatagcagggaccatacatcgtcaacccg
+>AW057478
+aattttaaaattaaaaagtcgtttttttacttgaaaagcaacaattgaag
+aacaacaatatcaacaaacaaacaacttggagaataaattatcatgactt
+taagaaaagtcttttcggaggaaggtcatctcccattccttcagtttcac
+gtactccggctccgtaggtgataatcgcgataaatccctgattatcaggt
+gctgttctaataaaatctggtgtagttttcaggcgatcgttcatctcaac
+aagagttccacgaacacctgctctcacgacatgctctgaaccatctttgc
+atttgaatgtacacaattttgagtcgggttgtagaataagagctcctttt
+tttccttttccagaaacttgatttgttgaacgatctgcgccgtgcttctt
+cgtcgttccaaaatccactt
+>AW057479
+tttctccttacagaaacacaccatactcttctttcaccttattcatataa
+tcgggctcagtttcaacagacaatcacttcttccatataccaactccatc
+cattccgctagtgacgacaatgagcaaggtcttgttgccacccaatgaag
+catcagaataatcatctctagaagacttgcattgtgtgatcgtgatgaaa
+ccagcggctgacgcaggtggtgcatgctcgaattgatatgacgatacatc
+ggtgaagtccttcagaccaatctcagaaggcattccttcgattttctcaa
+gaacatgaaactcttgtaaagtgactccatgctcgcgcgctgcacgaaca
+gtgtctaacaacaccgagtgagcttgaatgatagcagtctgcctctatca
+ccagtgcagttcatgcgacttcactgcttg
+>AW057480
+tcttcagatctcgtgatggattgaactggcattggatgaacgcaagaggt
+agctttccacagatttggcagtttttccaaaatcaggattctgatcgaaa
+tccaccgggattggttggaatttgtccaactgatgaaggcattcaattga
+atcaatgatcttcgcattcgaaaaattgctgcactccaatccaatcactt
+tgctatccggaatcccgagctcggcttgaaggcgctctacgtaggcggtg
+gtgtcgaatgttggatgagctgattttgcagtgtgctctgacgccaaatc
+ctgagcaacgttcttgagaacatcagccattgtgacgttggtctccgagg
+caagggcagtgtactccgagaagcttgcaacattcattgtaacatttgct
+tctgatggtggcattgagtagacagagtagtcaagaacgttcatcaagtc
+tttgcaccgctcgaacttctcattcatggtcactggngtctcggattttg
+gagcagagtaaacggacaaattgaattgattgatatcttgacatggctgg
+aacccttcgctagctgacacatttgtttctgattttgggatggntagatc
+gaagaatcagcttgatcattctgcgtctcattgacagcgatatttctttt
+tgcagcacctcattgatggagcaattgtctccgatgctgagcgagtacac
+gagacattccatctgtccatt
+>AW057482
+tctagtccaccactgattttctctgtccgcctgcgcctcgtattctctcc
+tctctctggcattcgttggctccagcagaactacagtttcatcgctgaaa
+actgctcttggggtgcagaagatttcagagacaaagcttggctcatcgca
+gttggctttgtgttgttgctcggcgagcacttgctccttaggcttcagga
+cgagctccttctcgagtttctggcgaagagaccggacggagacatgggga
+agtggtatttgctcgatggcagtgggcttcacctcgtcgacttcatcatc
+ctcatcatagtcataagatgctgacgatgtagacgacatctcttcaaaga
+ttccagaatcctccaccgccgtcatcatcgcgacattaaggcttccatca
+tcgcggngctctgtagacggcaagcactttccggcggctctcataatcgc
+gcacgtccacttgtgaacatcattatggccgacaaagaggtgaacattgc
+cacgttgcatgcggattttgatgtgacagcggaaatgacgctgctttttc
+gacgatcagagtgcacagacattcctggcgatcgcaatcatcaccaatcg
+tgtcgtcatctcgtaatgacatcanacgcgtgacgtacgtatcatcgctc
+cttggctcgagat
+>AW057483
+tttacctccaaactttattaaataaaccgaatgaattacgaaataacaca
+ttcatatttctctttcaaatacttcttatgcatgtccttgattggtgcaa
+atccatcatcacgtgccgattgattcttcccaatttccttcctcattcta
+tccatttccattttatcaatctcttcatatttcttttctccttcaagttt
+cacaccaaactcttctttcaccttcttcatctcatcggtttcagtttcaa
+ccgacaatttcttcttccagaatccaacttcttccattccgtcagtgatg
+acgatgagcaagttcttgttgggaccccatgaagcatcggaatcatcatc
+tctagaagacttgtagtgtgtgaacgtgataaaaccagcggcagacgcaa
+gttgcgcatgctcgaaattatcgtacgaaacattggtcaaatccttcaga
+ccaatctcaaaagtcatttcttcgttntttccaagaacatgaaactcttg
+agaagtgactccatgcttacgtcttgcaccaacaatttcgaagaaaaccg
+aatgaactcgaatgatcacatcctgtttctcatcagcagtgcattttcaa
+gtgtactccaatggcttgtcgctgcgaatctgaatttcggtgaagagatt
+tcgaactcaaactgtttgcacgtgaaaccaggtttcttggcagtttgttg
+ctttccttctgtcaaacacat
+>AW057485
+ccgtttcgacgattcatcactagaatttgtgcgggatcttgcaaaagact
+tcgaccgatccatcgtcgcggtgaggttcaccagtcactggatcatacaa
+gaatccgcccatctcaagtccttcaaatccattattatcagttggcatga
+cacatgtgaacaagtcatcatcaacatcatatgtcccaatgcattttctg
+tagttggatccagccttgtatgtcatgagcacacgttctccgtatcgaac
+cacgtaattgccggcgtcaggcgtcttgaatgtgttttccggtcccgtgc
+aaacattatcaagagtttgagatttcttgaatacggttttttccaagttg
+acaaagtcggcattcaacaggaaatactccatattgcacttcttcacgac
+attgattcctttgaacggcatacaaaagtttgacccgcttttttcccaag
+ttggaatgtacgtttcctcgattccggaaaatgtgaagacgccgtcgaga
+aggataacgccaatcattgcgtatntataactatccctggtgcctgtacg
+canattgtccaccanaatcatagtgttgtggaaatcaccttcggccgncg
+gaacaaccatcggttttttcaattcgaagcccactttgtacatnncggtg
+ctgtgctttggctcntggtncgcgagtctgctgcaacgagactggcacag
+ntggagcatcataagactgcaagaaatgtgacgtc
+>AW057486
+actagtaaagttcataatcctcaatactcggcacatctgggctttctcgg
+tgcgtaccatgtcttgtttctttccatccaggcctcgtgcttagtattaa
+cggagtccgtgtcctgccacgaatagaactcctttggcggaatgttttct
+ttccagttctgaatgacgcgttcgagataaaacgcgactacattggcgtt
+cgtattcgtcgcagacagacagtactgtagaatctttgccatcctctcat
+tagtatctgggtccggaaacgtgcacgtggccagatagttgtacttcatt
+gttgccgccactatgtgatcctcctcatcgtgccattcgaaggtttccag
+cgtgataaaccgtgcacaaatgtaaaaccacgactcgtaatcgtcggcag
+gtcctcgtgccaatcggaaatgctgacgcctcgacaaatacgtgttgatg
+acgttcttcgacagtggctctcgctccgatttgttacgggcatgtcggga
+ctgttgaacgtaaagaggctgcagcccattgtgcttcgtgtacgaattta
+cgactgatgtttgaccgtttcgagttgagaagtattgacacacgactgct
+tgtgttctgagagagagacgagnaggacgatgcgagatgtgagc
+>AW057488
+tttttaacaatagcatttcattccataaatatttaaggggtggttattat
+atcagttctcttaccagtcttctccatcttcgaatgcattatcacgatct
+gcagtggatgccgttccatttagcttggcagttatcggatccgggatatc
+agttgaattggcaacaatagcatcagtgacagcaacagtgacaacagatt
+gggatgaactttcagcgagcgtagaagtttgaagcg
+>AW057490
+tatttcaatgagtacaatttttcgaaagaaaaacagattgaggataaaac
+ttgagtgatgagataaccgtaatatggagaattatatcagtgtcaagaag
+gcacattgttcagtttcatatttacagatgtttgggattaaatgaagatt
+cggtatgcatcgacgatcagaacaatgaacgagtgagttgaaagaccttt
+tgttgaatgatatcagttcttggagaagaccatctttcaattgcttcaaa
+ttcacgacgtcttttcaccagttgtaagaaggcggtcaacgacccgtcaa
+cgtttgcgtagtgtgtgcggcaggagaagagacaaattccatcatacgag
+agtccttggtgcagaatttcgtgaaccaaatcaaattctgcttcttgccg
+gtttagaagatctggtgccaaaatgatgtcgaatttttttccgccaagga
+acttcattgcctcttcaattgtaccacatgaaaccttggtcttgatcatt
+gnaatattatttctcttaagtgttggacgacagtaaagctccaaactagt
+cttatncattgtgtgcattgcatttcttctgcttcattttcgaagcatat
+actgaatgggagtcagttacgaatccgattncaaaacagatttncattcg
+anaagtcagttcatctcacattgacatatatt
+>AW057491
+ttttttagaataactttttatttcgaatgtaatctcagagcaagctttta
+gaatctttttggcagtccgtacgagtcaacagtggattggtaggaagaac
+catcagaagacgacgtatccgattccgaggatttcgcagagttcatctca
+cggtccgataggtcagcaggagtcttgggaagattcgatttgctgcatgc
+ttgcaaatcttccccgtcttgtttgaactttctggcacggcagatctgct
+tgttgatgaccttattgtagttggccttttccttccggaagtttccacgt
+ggttccgaccagcaatagaagttcgaacatccataggatccatccggatt
+ctttacaatgccgaagcaggccttacacaacatgtgatcacactttccca
+atgcaatcaaatgattggttggcaacttgtttccacatggaccatcacac
+ttcacaaggacaacgcccgcttgctcggtgcactgaaatgctccgacagt
+ctgcacacacaccgtgtacgtgtattctttacgtcgagtcaccggattca
+ccaccgacgtcagattacaaccgagaacttcgcacgtggcanaaatgagg
+gtccagaagatttgcggatcat
+>AW057492
+caccatacaaactttctgccattgcattcaaggccttcattatatcacag
+cgagcggccttatcactttgggatgcaaagattccatagcttgccatcct
+gaacatacgcttaatattcgattccatactaagtggaactggatattaat
+acacccaacgacaaacacggcacttggctttcaagtattgtgtcatgaag
+aggctgagagacggataggcaccacttttacgcacatccttctgatcatg
+atcgctgtgagcttattgtgactacttctgggcagtgtcttgactctcga
+cgcgaggagaagaaacatccactgtcggatcttgcgtaagatgtgtacaa
+tcaccagatggttgaggaagggtcgctggatcatcgaccatttggtcagc
+tgtaccagttgaggtataggttgttggcgctgtggaagacaccgatctag
+tgtgatacagcgcctcaaaccgaggagacgg
+>AW057493
+gattaagttcagaatgatttggaaccgataattgaagaggaacatcggtc
+cagtagaatggcttcaccaagtaaagactcttagtgagtggcaatattgg
+ggaatccataatttgtgcctctcctttcttgagcgaaataaagttcattt
+ggcaggtgggcttcttgatgcattgcaaaatatcatcgacagtctccaca
+tgctcatttcgtgtcaagtcaaccatcacattatcagaaatcacgaccac
+ttgtttttgctcaatttttgcaatatttttcttgaaaaatgcttgaatgc
+gcaagttgtagtcttcgatgttctctgcacgaataaatccttgcgatgga
+agatattggatattgattggataacccatattgaaaaacgcttttggaga
+gagcaacatttcttgaaaattagtcggcaacatttgacgatagctgagaa
+gagctggatcaatgttgaaggtggtgtaggaaagaccaagtcctttaatc
+aagaaagctgagtttgaactgatntcatgtcangagagcaaaatacttca
+natggacgaattnncgcattcaacagaccacgcccataagttcagacaca
+atttgnccacgtcagtgagccggagatcgaattttgtagtgtaaagtgtc
+tanngaactatttnggaagcatatcatatcatatggatgatattcattgg
+accacgacgatacagacacgaacccattttggaagatcaa
+>AW057494
+ctattagcaccgaagagatctagtccaccactgattttctctgtccgcct
+gcgcctcgtattctctcctctctctggcattcgttggctccagcagaact
+acagtttcatcgctgaaaactgctcttggggtgcagaagatttcagagac
+aaagcttggctcatcgcagttggctttgtgttgttgctcggcgagcactt
+gctccttaggcttcaggacgagctctttctcgagtttctggcgaagagac
+cggacggagacatggggaagtggtatttgctcgatggcagtgggcttcac
+ctcgtcgacttcatcatcctcatcatagtcataagatgctgacgatgtag
+acgacatctcttcaaagattccagaatcctccaccgccgtcatcatcgcg
+acattaaggctgtcatcatcgcggcgctctgtagacggcaagcactctcc
+ggcggctctcataatcgcgcacgtccacttgtgaacatcattatggccga
+caaagaggtgaacattgccacgttgcatgcggatcctgatgtgacagcng
+aaatgacgctgcttttcgaacgatcagcagtgcacagaacatttctggcc
+gatcgcaaatcaatcaccaatccgtggtcgtcatgctcgtaaatgatcat
+cataccgcgtgacgtcacggtatcaatcgctcttttgactcgcagatatg
+atcgagcaggaaggctgctctggaatcggattgagttgagcacctgacca
+cgtnggtacccatnccttggcattcactcgggacgctgagccgttatgca
+ggagtgatgtcactgatcactcgctcgcggatgccgagtcacggtgaacg
+tatatgttgtagacgcaa
+>AW057495
+atcttgctgaattcttatcatcacttgaccttgcgaagagcttttccttc
+atgatcatgtccgatctcgtgagtttgagtttgctcgatgatagaaacac
+ggcgagcatttggaccggtagacattgcactcttcgctggaggcttcagt
+cctggaagagcttgacgatattgcgaagcaatgcctttcacatcttcgtt
+catttggacctgatggtggttgcatgagattggtagccaacgcctgtttc
+gcccattgcttggcatcgatcgaatcagggcgctcgacgacaaaagtgac
+ttttcctttctcttggatattcttgacgagaagatctctggcaacatcct
+tgtcactgactggaattccatcaacatcacacaaatgatctccaagaaca
+agacacttctcggctagtgatccaggatcaacacgtgaaacaagcacacg
+gttctggaaatgcttgattcccaaaccaagttttggtccattntgaaccc
+agancagggttgccaactcataaacgtatccctcacggcgttgaatgatc
+tttgcacgatcctcangaatatgaacacgagcttctaactcttcagcctt
+tttttcgtcacggttcaccgtgatctttgcacatggagcagcatagcgaa
+gcgcacggaaaaaagtcgtaacatcccttgcattttggtccgtcactttt
+tttacntggtcaccatatctcatttttccttacgaatggttccagcctga
+ttctanggataccgagctgtcattnngagttgcacaaagcgttcccctcc
+ctcatgcnataccaatctcgacatcttcttggacccat
+>AW057496
+tttattcactcgatttggtttccccatgtgccaacgtctcgacctttgtc
+ttcggattgcacaatgccttgaacaatggaatcatgaagcagggatcttc
+attctttgcttcgattggagcttttttgctggatttcagagaatccggtt
+ttgcaactcttgggaggcatttcagcacggatttcgattcgaattgagca
+gcgcaggccgtgtcattttccagaaaagcacgaagaagtggtttactgtt
+gttcattttgattgcatatcccgtcgactcaatcattgtagatctcccgg
+gcaggtcgatgacaatcgttttcgccgaatactcggcatcgtcggcgaga
+aaattgagtgtctggacgagatgatgtggcttatacgacacgtgtccgtc
+aatccagtgacgaatgttttttcgaatgtacgggtaaatggccctagcgt
+agatatcgaccggcttgtttttcagcagagcagctttggatggtgtttca
+aggattgcgacttcagtatacttattggatagaaccaagcattcgtaacg
+gtcaagttttgggctttcgttgggatttacaatcttcactgctacgtcat
+anatcgaagtgtttcggagcacgattcgatattcagatttcacattttca
+cgaactaccacagaaagagtcat
+>AW057497
+tatgtggacaacaagactgaggaagcatggttctcgttcaatgggaaggt
+gattaagcagctcgggccacagctcaacgagatgtacatcatcacgcgca
+actgcatcggaggaccaccacattgcccatgtgctgtgtgcggagctgct
+ccaccaccaccaaagccagtgccacgtgtcgagagagacgaatggatgga
+cattcgtgagggagatccatggccgactcgccaacttgtcaaggctcttg
+acaagactctggacacccttccaggagtcaatccagaccaatatgttgct
+ctctggtacatgcaaggtgaacctgttatgggtcgtgtctggaatgaagg
+aggaaaggtggctgccaacttctcgtggttcaacaacgagtattgcaaga
+atgttggatctatccagcttctcatctatcttccggacagtgttcgtggt
+tttgactatggatggatcccattcccggaggctgctcagtttggagacaa
+agcttggcatccagttcatgtcaacaaaccacagggagatatctncgttg
+gagttgntaacgttgctggaggaaagcagattcttgccaaggtggatgtn
+ncgtacgagaagtatgttatggataccanngaaaggagcattctgcnnac
+tgccagaactgcgctgantaacacatttgtctcttgcgtnagggcaaggc
+tggataccagctcgac
+>AW057498
+acagtgataaacatgaaattcctaaattcctaaacagtaccccttcatat
+cttggccttttatcttcttctgtgagttctacaaattgtcgcagcacatc
+atctgcgagaatcggaatgttgcacaattgttcataggttttcccattga
+tgttgtgtttggttagcgcatatgaagcaagatactctggatctgggaca
+acgattgctacaagccacggtttttccatatcgccatgaacgtaaatctg
+ttggacaaaactcgaagaagtgtagagggattctgtgagatctggagcca
+caaactttccttgtggcatgttgaaaacgtttttgcgtcgatcaataatt
+tgaagagatccttcagcagtgaatcttccaatatctccagttttcatgta
+tccatcttcagtgaaagatgatgcagttgcttctggattcttgtaatatc
+ctgaagtgacgttatgacccttcacaagtacttctccaccatttttaaca
+acagaatatccaagttcaggaacatcaattaattttatcatcgcacaaag
+ccatcgtggtccaacacatnncatacgtgtatctccaaccaattgaagtg
+ttgtggtccagatgtttcagtttgtccgtattcttncagaaaacctcact
+ncaatagccccacgagcanacctcattgcngaaacgtctgattttgcagc
+ttccagaatcaattgtctgagggtaggaccgagaagcatctggatttatg
+caaacatacctatctaccatgtttacgatagcttgcctgcatcttata
+>AW057500
+gttagatactttattgtttaaaaaatcgagtttttttaaaattcaaatga
+ccgtaatttcagaaggcactatgcaaaaaatatatcccatttaatttaaa
+aacactaagcacaacagtaagcaaaataatttccttgatgacactttgta
+taggctgggcagaatgtagaatcgcaagtttttccgaacattgggatctt
+gacacctccaatatcaatttgatgcttcagctttcctggtccacactcct
+tggcgtagtcggcatcgattttgtctctgattctcgtgtcgcaacagatt
+ccagtgaatgggccattcaagcagaaagtattcgggccatcgcacttcaa
+ttgttccattcctccacagtggctctttccgttctttccagcgacactat
+cactgccacctgggcatgtgaaatagtccattggaatacagaagtttgag
+caagctgagtaatccttgaaattgttctcgtttcctccacatccagtgta
+tttgaaagcaaggcatttcttgctttccatgtcgaagtgatatctgacaa
+ctgaagtgttggtgcattgctgaactccaaaatgaagcggatgctcacat
+tcagcacggaactgagctgaaacatctatcaaacacagtagaccgattgc
+taaaagaactttcagcat
+>AW057502
+tcaagtattagacggactcagtgggatgatcatcagacggattatcaagt
+acaacacgcgaaagctctgtctcattcttctcttcttttggtggacggag
+catcgaatgctcacggcactgagttccatcaagacgagtaatttgttgca
+agagatgaataacctttccacgataagtatccactgatgcaattggattg
+aaatcgaggtaaacactattgagttgtggaagttcaaccaattcatccat
+aatgctccagttatccaacttatttcctcttgcccaaaagtctgtaagtg
+tcttcaattgatggatattctcgaccttctcaagacgattctgattgaaa
+tccagaatttcaagaggaagatgctcatcgattccacaaacgtacttgat
+gccattttgagccagataaatctctttcaagttgtgaagtcccgaaatgt
+tatcaactacagtaatcgcattggctggaagactgagaactgtgagcttc
+ttcanatgatcaacattntcaataagacgaatctgattagcgccaaggaa
+caatctatcgagtntcagaatggttgcgagaatctcgattttcgcaatnc
+tattgtcacccaaatcgagatatttcagctcagtcaacgatccaaaccct
+ncgatttagtgaatttgtatgaacaaaaaagaagtctcagtttgtcaact
+ctccaaattttcaatttgtgatacgat
+>AW057503
+taactttatacttgtatttcacaacttttcccaatttgcaccaaatcgga
+agtgactgactgtcgaccatcggatgaaactctggagcctgtcctttgaa
+gtgagagatggatacgctcatcatattgtcacacgtaggtttgcccttga
+tcttattttcttcgaccacaaagtttgccgttttcttgccacctagcaca
+acctcatagtgaggattacttggggcttggcaataccggttggttctgta
+tctctcaacactgttcgagactagtggagaacgcataagtgaatacgcct
+ctggttcaagtttagcgagctgttccatgctatccgggtctgggaagaat
+tgcgactccttcgagtacatgtaaggcttcaacactttcttgcaaacatc
+cgattggttactacttctggtgtatgaggaagccgatctaacaataatcc
+catccttgaccggcactgttggatagtagatntccgaatatctacggccc
+atatagttaatctcggcgggggtgattgcaggcttgtattngcagagagc
+ttngcactncacttttncattaaatctgatcatcgcattttgtgggactg
+agaagtcaggtgtacatcgtagaaggacagagcangatatcaccatcnac
+tttgtcgtcttgatacatactcatcccacgttcngggctcgaatncagat
+ttgtggagtcttggagtagcttctccagcgtcagatgatctgtatgatgc
+cggcattctcctgaaactggcccac
+>AW057504
+tttaacctccacgttttatttctgaatcagaattaaggcatgtatagttg
+agcgtgagttgggctgtagactttggaaaaatcgaaattttcaggcacac
+aatcagtgtagaccaggttacagacgtgcgaacgagttggttagtgtgtc
+aggaatggggtgagccagcaaaaactatccttattcagctgcccggatcg
+aacttcaaaactgacgaccgtcttctcggatgccttaatatcttttaaaa
+tctccgaaaacattgcctgagcgacacgtgtattttcagcataaataggc
+cctaaattacaatcttttctgttctcatagatcacagcttgcccgattcc
+aatcacatttccctcctcatcatacgccactttcccgaatccatcccgat
+cgtacatatgagagatgatgtacttttcgcggtgatacggatgaattgtc
+tgatcataatttatgatatctctcttcggaacttctcgagcatttttcac
+tgtaattgcagacaaatgcaacgtctcagggagcacgacgtcgtgtgcac
+tgtagaatgtcttgtagccaacgtcgccggctttgtgtccgagatccaag
+aattctttccgtttggacagctcgtgccagatatctgtgggccgttgggt
+tatctgtgagacaatattattatcttcgctgttcatttcactcgacacat
+atcattctgaaagtttgccgttccgggagatctgaattcaggatcaatcc
+atgcgcaaactcatgacattatctgatgattgattganggattcatgggg
+acgaatgtgcacaaatgtgctggtggatgactctcggcgtccttttaagc
+tatcactt
+>CEES071R
+ggcacgagtcaaccttcaccacaagcgtccgtgcaaccttctcatcattc
+cattttgatgaaaatcagcgcaaaaaggaatttggaaaagaagaagctgt
+gaagaagattcaaaagaaagcagcgaaggttgctcgtgntgattcaatgt
+tcagttctgaagaatttttccctgacattatcaagtgcatgtnncaccgt
+caaacgnagaatcgagcttcacacgaatgctttgacatacaatcacaagt
+taatgnagagatgcgaacaattctcatcgnttggttcagcgatgtcgtga
+aagagtacaattt
+>CEESA12F
+gaaagcaattagaagaagtactgcagagtgtgatcaagtcactttagaca
+atttaacgagtgtagaagaaaatcaaccaaactcaacantgtcgcctctt
+gcaaagataattgctaacattgaaaaaaggaatgaaaaagtgaaaatgac
+taagaaattcaaaaagtttggagttccgcttccaatgtnctcatcgaacc
+ttgaaagtggatataagcaatgtcgaatggacatcacatgtncttctggg
+tacagttgtgagaacantagtaaaacgagatgctgtatggaagcaaatca
+ttcaccggaaattgagcgaaaaactgaaganttcaagncatgcccgtttc
+aacttcaaatggcatacttctgccag
+>CEESA13F
+aaagtgatttatagatttacgaagcggactttgtttgggaattgtaaagt
+taaatacaaaggaaatagtgggaagaaattttnttttcgggacgaaaatt
+gaaatttaaaaaaagggttctcggggaatcacatgaggntagagactgga
+aaagagaaaatacaataaattaaaatcggttgaaaatgaacattggacag
+gagaaacaaacggaacggggaatcgttttttttatagaggcgacaaaaaa
+gagcaaaagcagcatcagaagcgtcaaatcagngtactcaaaaganggga
+atggatattccaattattcctgatcattcaacggcaagtgagt
+>CEESA14F
+acgtacgccagangagattcattcgaaaaaatgatgcaaacgtaaacaat
+aaagcaattttacacacaaaatagaaattattcccgagggttcagcgtct
+actttgaatcagctcgcgaagattcgtttcagcctccaccaatcctttct
+ccaaatagtctttctgcttctcgatagcttcaatttnctcttttgactgt
+ttagcttcagcttcatgtctggaaatttcagctggtttgtcggtcagcag
+gaaccatacggcccaacagatcgattagtacttgaggg
+>CEESA15F
+gaaaacgagtatttattgaggatttgtgagcaatggggatttgatgtgag
+gtaaaaaaaaataaaaacaaaaggtacaagantaaaatatacatatagga
+cccgcagaaattgagatttaaaaaaaaattcaaaaaaaagaaggaaattc
+aattaattgtgcatactattggtcatttctagcttaaaaggntcactgaa
+aagtgagggactttgtcggaaattataattatncgatgttgaagaagaag
+aagctccattaaccagcgtcggggaatcccaacttccaggttccattggg
+ttcttcacatccccttgatgattccgtattggntcctacatcatgatccg
+cttcaactgcggatagctctt
+>CEESA17F
+cgaagagtgaccatttatgcacattgatcggaaataaattaggaatccac
+gcacttcgaggatgaatcaataaaaacaggagaggggtgggaatagaata
+cgaaagatgaaagtaagaaaaaatcgcgtgagaaaattcgggagcgattc
+aaagggaacacagtgacaaccgggagtgaaggnttaancgtagtanttga
+gggacgcctttttctttgcctggncctccagantagcatccataaagtnt
+tcatgaagaatctcagtagcatctcgtcgaagagcaatcattccagcttc
+aacacaaaccgctttgcattggtgctccgttgaagtcatcggttacagcg
+gg
+>CEESA17R
+aggnaacaatattccgatatcggaggatgtgacaagcaaattcaggagct
+gattgaagctgttgtgcttccaatgactcacaaggntcgatttgttaatt
+tgggtattcancctccaaagggtgtgctcatgtatggaccaccaggaact
+ggtaaaacgatgatggcccgtgcggttgctgcccaaactaaatcaacatt
+cttgaagctcgcaggcccacaacttgttcagatgttcattggagatggag
+ccaagcttngttcgtgatgcttttgctcttgctaaggaaaagggttccag
+ctatttatttttcattgnatgagtttggatgcccattggtacgnagcgat
+ttcggattcagaggaaagcttggagg
+>CEESA18F
+ataacaaacagtttataaacaagaaatcacgcaacaatctcgaaacggag
+tgtggcgagaagttctggctcattgtnctgggtggtgacggtgtgagctc
+ctctaggggcgactggaagtctcttcaaagctgggactggttgtccagct
+gacttagcggccttcctactcgttacgacctggcgctagacaacgcccat
+gtcgcaacggcggagtataggtctctcgcttaagcgccatccatttncag
+ggctagttgattcggcaggtgagttgttacacactccttagcggataccg
+acttccatggccaccgtcctgctgtcaatatcaaccaaca
+>CEESA23F
+atagcacagaaaactatattnaatttaatattataatagcgattatnaag
+tcagctgctcactggaatganttccagagagggagagagaaatagacagt
+aaaacgagtctttgaagtaaagancaactcantacagaggcggggatgag
+tggtaaaaagattgcataatgtatttccaatattgaaagtagttatgtaa
+ttccgagacgacgggcttntccctcataanttaaaagtcgccacattt
+>CEESA24F
+aaaatttttttncattcttaaatattcgcagatgtcgtgggacagagttg
+caaaatgtcaagagtggtgtgtgtcggtatgggaaaataagaagcatgnc
+aatagtgtctgataagttaaaagaggggaagagggagggagagagagaga
+gtgtgcattttggnccagtagatgaagatgngtcgtaatnntgtaggaac
+acaattatatttatcagagaaaacgggataaaacaacaaactcgattcga
+gttgatcataaatctgtgttatcacaagaattcgacgnaacaggagttgg
+tggacgattgacgnggatattcgaccccgatggcaagggaaaaagtatt
+>CEESA49F
+ggaatgtgcatatatttatatatataantttaacaggaataacatggaaa
+acgtttcaaaaaactagcgagaaaacagagggttcgtattggaattatca
+caaaaggcacacaaggcgtagttggaaacatagtgatagattaggagtat
+agtgcaattaaaatacaatttnctttggtgaccataaaaaccctaattat
+gagaaaaggttagaaatttttaaaagcagattangagacggttacatagt
+taaaantgcatggcattataaagntcacaaattgggaaaggtntttttnc
+gagattcttcttctggtgggangtatagagcaagacggtcacgtcccata
+a
+>CEESA50F
+gataaatttcttatttagttgcacatgataaagtataaatgaaaataatt
+aaaattaaaaaaagagcaaaataatgtcacgtgaggtttaaaaaggagan
+taaaaagcccaaaaagtgaattgaacgnaggnaagatgtagangaganga
+gcattttgaaaaaataacgctaactatgctttaaaacagannganggtaa
+canaaaatgttgagaaccggtagag
+>CEESA51F
+gatcgaaatggtcagaccgttgtgaagcttgtggatcgtgccacattgct
+ccgagagcaagagcagaaggacaccgagaagaagcggaaggataaggaaa
+aggcggacaaggagcaaaaggctcgggagaaggctgataaggaggcggca
+gcgaagaagatcaagccggaagagctgttcaagcagggagagcacgtcgg
+gaaatactcgaagtttgatgaacgaggtgtaccgnctcatttggctgatg
+gaacggagatcacgaagagtcagatcaaaaagctggagaaggtgtacgga
+gctcaaaagnaaaagtatcagcaataaatattagtgcctaatataa
+>CEESA52F
+aaattttacaatgtttattgaagacgttgaacgtcaaattatcaaatttg
+atgaatgagataaataattataccagcatgaagattgtaagancacggag
+aacttacagggaagaaattggaaacaacataggacactagcgtagttcta
+tgtgtcattggggattgggaatgaggagatccgataagttagatgataga
+ngacagaggtaggancatattagaaggggaaaaggcagattatttaggcc
+ttggcggctggcttggcggccttcttggcagctggcttggcgatcttctt
+tggagcggccttcttggctggagactttngcgacccttctttggcaagct
+ggcttggcaaccttctttggggctcttgngccttccttgaccttcttaat
+ctccggtggccggccct
+>CEESA53F
+aacaataacaatttatttgaataaacaaatttaagccttagcttcggcct
+cggcaaggaaatctctcttgagttttcccatgaaagcacgcttctcggct
+gtggtttngaagcgaccgtgtccggtcttngaactggtgtcgatccactt
+gaggttgatcttctcgtgggcgactctcttggtttgggtgatgagcgact
+tncggagggtgataagacgcttctttggtccgagaacggntccacgaagc
+atgatgtagtcctcgttgacgataccgtatcttggggaatcctcccattt
+>CEESA54F
+attcttgaaaagttttttaaaagaaaagctgaaaattttacaataacgca
+gatgaaaatnccaaattttttggggatatttcgccaaaaaaatcattatg
+tgaatttntggtgatgaagatgatgacaatcgctttgaaaaaaaaaataa
+caaaaaatacaaaattcgggggaaaaaaatgaaaanttaacaataaaatt
+ggncattttagtttgagccaatttttggcagacagagagagngagagtaa
+gagtaaagagaggtagcagagannacagncaaaatttattcggggg
+>CEESA55R
+aaaagcttcgcaatgagttcaagttgtcagatttnnaacttctttacgat
+tacaagggaaataacttgagaagcgccatagttctnctnaaatatccaga
+tgcaatcaatctctgcaattctattcgttccaatccaactgtattcggaa
+aggaatggcacccgagagtcttcgaagtgctcgacgtagctgttcagccg
+ccaatcgataaataanctttttngtattttaatgcctgancctgtttttt
+ttgatgcttacatgaaaatttgt
+>CEESA56F
+actagttctctctnctttttttttttagaacaaacaatactttagtaaca
+actatgtgaaaaatgaaggcaaatgagagttaattcatcaattaatgtaa
+gagtgatatgacgattttaagcattgatttcagtttcccatttgtgcacc
+aatgttttgaacttccattcgtctgggtgtccgacgtggttgaccttgtc
+tttaaagtcaaactcagaccacttgaaagatggaaccttgccccatgttg
+gccctccttgtgcgacaaattgcaacttcttcatcaactcaacatttgna
+cccttgtaatcaagagctccgtggttaacgtgtccttgtcccggggaact
+cgtatgttccattagctggatt
+>CEESA56R
+gaggtttgctccacagcacattcgagatgaatgtntttattggcttactg
+ctagccactgttgtagcttctcaaagctcggaaggacgcgatgagagcta
+cacttacaagcaactttgcatagtggacgataagcctcaagttcttgatg
+gattcgactgccgcaaccaagttgctntcgccagatggcaaaacgctgtg
+aacacaactggctggactttcctggnagtcgnaaccaaggagaactactn
+nccacaaattcaaagcctactctgctgggatatctttaag
+>CEESA57F
+tccgatcaaatctacatggatatgcagaagttcggacgtgtccgtcgtca
+agccggaggatacggtggatatggtggatacggtagcggaccatctggac
+catccggaccatctggaccacacggtggattcccaggaggcccacaagga
+cacttcccaggaaatactggntcatcgaacaccccaactcttccaggagt
+tattggagttccaccatcagttactggacatccaggaggaagcccaatca
+acccagatggntccccatctgctggaccaggagacaagtgcaattgcaac
+accgaaaactcatgcccagctggnccagccggaccaaagggaactccagg
+acatgatggaccagatggaatttcaggagtt
+>CEESA58F
+atgagtgaggtgctttatttgaaaaatctttttggaattaaatttcaagt
+ttttttacagaaaaaaaaacaagttcagaaaggagcaaaaatacagaaac
+aaatttntggatgaaggggtacatgataattttnagggaggaaacatttt
+aagantagaattaagacaagatgcatcctggaaaattttgatcggcttcg
+gcgatttgcgacgcagtgaattgaagcatgaatagttggntccataaaat
+cctgataattcgattcagaaaccagagattctttatcagaagcaaaatcc
+atatctctatccaaataggaaatatcacttcccattgaacttcccgagtc
+gtcggcgatttgagaattcaatagcagt
+>CEESA59F
+gatcggcgccacagaattttttggagtaagactcgtcaatcgtgtatctg
+atatcttttataattggtctctcgatctcttgttcagctccactatggaa
+gacaggtagttgagattcttcaaaacctggagcataaccaagtcgtttga
+ttctagatagcatatccttgcaccgatttgcgacaaatttcgtttgaaaa
+tcatgaaaatcattttcagattgcttcttgatcagtttacaatgtttcgt
+gtaaatatcctcaaaggtcgattcgggattttcctcaatcattgtcttgg
+tgatctgttgatagtctgggttttccagaatatccttatgctgatcaatg
+ggntaaattggnttcatcacttcacaattgtttcgtcagctcattcccga
+ttttcc
+>CEESA60F
+ggtacagttcatttttncatttcagaataagcaacaaaaggtgttcatat
+gaatgtnctgtaggttagtaaaacactatgttaggttactctttgatgca
+aaaaggaatagttaaaaagttctcagaagctttcgaataaattataataa
+atacgttgcagaagtaactgggaaaggaatgatgatcgtnatttaaaaaa
+gatggaagagcttcaaaacaggggcttagaaaatncagttctataatant
+aaattgggaagagctcaaaanttcacaaaaaatggttaatgaatacgcat
+acaatgtcaccgcttgttgaccagcacactgagcgggtaaatttccgagt
+aagagatcgncgatatttcagcggatggagcacggtaggggaaagtagga
+ta
+>CEESA61F
+ataacaaacagtttataaacaagaaatcacgcaacaatctcgaaacggag
+tgtggcgagaagttctggctcattgtnctgggtggtgacggtgtgagctc
+ctctaggggcgactggaagtctctncaaagctgggactggttgtccagct
+gacttagcggccttnctctnctcgtcattggacttgacacggttcaagaa
+gtcggttctgcacttggatggcttgatgtgctcgatacggatgttgattc
+tctttggaagaatgtttccgcggactctcttgttgacgatgattccgacg
+gctcctctggtgacgttgaaagattctcccggttcttccgtggtaagcct
+tgaatggcata
+>CEESA64F
+aagagtttgaaacttttattagctgtttttttagttcaaagtgagaaaag
+atgaaagaaaaaacaataaacagtattatgttcagaagtgtattgaagag
+agatgggggcgaaactaatcctcaatgaatctaacttgaattatgttttt
+nctcatggaaaatcgcgataaaaggattactgtgtcttctacagtaaccc
+gaaacgtaagtttntgggtgttgggggtgggggagggttgattcgtgagc
+aggatttcggggcatttacacgaaacttttcctcatttttctcgtgccga
+attcctgcagcccgggggatccactagttctagagcggccgccaccgggt
+ggagctccagct
+>CEESA65F
+aataagtaacactttcatcacataaaacatcagtttagtgaaattgaccg
+gaaattgaagtaaaaataaacgcgggaaaggatggtgtgacttgactagg
+ttctaggcggcacgancagcaaatnttggttttnaagttattaaatgcaa
+aacgtttgatttttgantgttgggaaattgcacaatttagagggcattgc
+gagtntctgagaaaganatgaaatgttaattgttttgggcgctgaaatga
+aagatgaccagtggcaaagtacggatgagttaaggtgagtaaaaganata
+aatgcaaaggggtatgggtgggtaatgcgactagaaacactaagcnagta
+tatccgtaatggttggaaaattg
+>CEESA66F
+gaagcacacgaaactttatttttttttgttggagttcaacatgaaattca
+gcaattnacgaataaaataagacataaagaacggagagaaaagtggtgat
+gagatcggcggttcgntcgcaaaaatcaattttcgggatggaaaaatacg
+aggattatggtacaagttggnttaaatgaatattaaaagtgcttcgagaa
+ttggtgatggagcttaagcacgttctccgcggatgcgtctggcgagttgc
+atgtcctttggcatgatggtgacgcgcttggcgtggatggcgcacaggtt
+ggtgtcctcgaagagtccgacgaggtatgcttcaaatgcctcctggagag
+ctcccgattggcagccgnctggaaagccggnggtccagtcttt
+>CEESA67F
+acatagctaacatttatnagccatttgaggatcgggaataaatttgtata
+caaaacaagtataacaacgaacactaatgggggcggaaaaaaaggtgaca
+gagcaagtatttttttaaagagattcattgaaatcgatcaacagtaacaa
+gaaaaatgggatatactaatgcggatgctatccgtaccgttcatctcaca
+aaactcgcgaattggtcgatgaaagggtagtgatttattgctcatcggct
+ttcgtctcgttctttgaggcggcgtcgaagtctccaacaagttctggcac
+atcttcgtcctctcccttttccatctggtccgagctttggtaaccgttnt
+tggcaagttttcttgagggtggggtgaggggactctgggacc
+>CEESA68F
+aaaaaataaagaaattattcacgtaatcaaaaacagacagaaaaaaaaag
+taagctcgaataagcttatacatataccgagggtgaataagtgaattgta
+aatgtgagagttaggctttgaaacgttcgggagaagcgggaaagattgac
+aacttaaacgtgtaaaaccatganattccgtataatctagtttggggtgt
+gagcnttgaaatgtgcaggataacaacaacaaaanggtgggttgaaagan
+atctggngnaattaaacagttattagccgtgacgaacagaagcnccgggg
+ggctct
+>CEESA69F
+aaaaaaattcaattaaatttattatcaatgctccaaaactcatgccaaga
+agagatctgaaaacaggtgggtgtgtctgtgcaagtaaaaaattcaagaa
+aggacaagctggttggaaagaaaaatacaaaaaagtcgatggtctaacag
+aataaccagaacgagattgancgggaatncgnttgagangaaagcaagct
+tgtntgatggtagatgggatgnttggttgagatttcaatattaccaactg
+gctgagtattattnatgatttttnatcagcattgtcca
+>CEESA71F
+atcaattttttttattggaattcaacggtaaaacgagcgagggtggactg
+tattaattgaactacccaattgaggtctttncttgagaacacacacaaat
+taacaccaacgtatacaatattctncgatcggttttnttcggaggagatt
+tataaaaacactgccagagaactcatctttcaaaaaagaagacatcgggn
+ttgaaggacaacttgaaacaaatganggaaatgataatcacactaaaacc
+gagcatggtgcactaattanttataaaaaattaagagtgagagtaggacc
+gagagaaaagag
+>CEESA72R
+tcacaagtgattcaattgtttcgtaaaaatcaatagttttncttaattct
+gcttaaaaattggcctaaaatcttgaaaattaacaaagttatgaatttnc
+gaaaattttcaaaaaccaacaaaaaatttgattttttaaaatttaaaatc
+aataatctacaataaacttacaattaggcagatgaaaattccaattttng
+caaattttgaagctataacgctgaaaactcgtacagctaaaaactncgnc
+cattttggggtcccaccgcggncaacccaaaagtggggtgggaggcctag
+acgtnttagggggtcatttttcaaaaggtcttcggtg
+>CEESA73F
+agcagccaagtcctcacgagcctcggtgaactctccttcctccatacctt
+ctccgacgtaccantggacgaaggcacgcttggcgtacatcaagtcgaac
+ttgtagtcgagacgagaccaggcctcagcnatggcggtagtgttggagag
+catgcagacggcgcgtggcaccttggcaagatcacctcctggcacaacan
+ttggtggctggtagttgattccgaccttnaatccggttgggcaccaatcg
+acgaattggatggttctnttggtcttgattgcagcgatggcggtgttaac
+gtcctttngaacgacgtctactctgtacaagaggcacacagccatggtac
+tttccgtgacgtggatcaaaacttgaccatctggttaanccggcttcgaa
+agcangctattnggtgatg
+>CEESA74F
+gtgtgtgtgtgtgtgtcgaatcgttcgagaaaataggaaaatatgcgaaa
+aaaatgaaaaaaaatgaataagggagaaaaaagtacaagaaacagaaaaa
+ttagaagatatttttttattcaatcatcaccgggatgttcggggcagcaa
+ttcttccgtttgaaccaatcatcgatacattgtttatggtagatacatag
+acatggtaatcgtgctattttgtgtccagcttcgaggtcttccagacaaa
+tcgaacattctcctttgtcgtcttttagcacgncatcattataggnaatt
+ttgggtctcgtcaggcacatgnccagntgaatntccgcgtcgtccgatgg
+ttacaactttt
+>CEESA75R
+tatcaatgaatgtatttncccacctttcctatcaaattagcccttccagt
+caattcccccgccacctccctttccaatcatcagcacttgaccgatacag
+tcaacgcatctnagttgactccaatattttnccccgtctgatgttcttct
+tgtgttagtgaccttctcaatcatttctccccaaaattttttctctatca
+atgtgtactaacattgccaattctacggcggacttgtctccgttttagtg
+gtatganttatatacatatatataanntntnaatttaaaattgcatccta
+tttcgggtaatagg
+>CEESA76F
+accaaatgaggcatctcgattcattcgtagtattatggtacattcgacac
+aaaatgagcgacaaaaaaaaaagaaaaatgacaaaacaaaacaaagggaa
+aaaaaatggaatttgagttgggcataaattatatatatatntnnntatat
+aaancttgangaacttttttttgtgtttaanaagnggtgtggaacatttt
+tttaaggggaaaaggcattgaaacgtaagtagtcgngagggttttggctc
+gtgccgaattcctgcagcccgggggatccnctagttctagagcggccc
+>CEESA77F
+aagaatttctactttttattgatttnccgcataatgtaaagtaattttaa
+gaattacaagantaaaataattgaatgagaggncgtgggtgtgtttnctt
+aaaaacaacaacgagtgaagggggaattacagacaaaaagaaataaaana
+tgggatagagatgggggtgataggtggagatgaggggatatgaaaggtag
+aagancctggtaaaatggtctgncggggctcaangggaaatggggctcaa
+aaccaaaaacgaa
+>CEESA79FB
+gaaggggtgatttcattagatatttttaaaaattattccaaatttcacgc
+ataacagaaagaaaacaaaacaagaaggaatatcacaaaatgtttgatgg
+aaaccggtaagaagtgaggataataggcacgtnctgagtagctgatctat
+atagataaaatgtgaaaacaagttgaactaatctggcgtacgagaaaaga
+aaaggtaaatcgataaatatntatgtacaacgggtatagtggatcgtgag
+aaaagtgcatcgggacaagngacttagagntaaaaaacgtnaggcagagt
+tcactcaatanacancaaattttncgaaaaaaacatctatggattattca
+taaatgggncccttccgagtgt
+>CEESA79R
+attttagaaaagtatatcatcataatcaccacttcaaaaactttgacgtc
+ggcttcggaatttngacttctctttgggattatgttttccacacactcgg
+aatgggcccattataataatacatagatgtttttncggaaaatttgttgt
+ttattgagtgaactctgcctcacgtttttaatctctaagtctcttgtccc
+gattcacttttctcacgatccactatacccgtngtacatatatatttatc
+gatttaccttttcttttctcgtacgncagattagttcaacttggttttna
+acatttttnatctatataggtccagcttac
+>CEESA80F
+attgattcgaaataatttatcgtatacaacacaagcgatgagcatagaaa
+ttggaactcttttcattcaaaatttagaaaaaaataaaaagaagcgaatt
+aagcagaaattgatgcgagtncagtattatgcagattggagcaggcggca
+cgagtttaatactcttctccttcctcctcgtttcctccctcgttcgagtc
+agctccgacctcttcgtagtccttttcgagagcagccaagtcttcacgag
+cctcggtgaactctccctcctccattccttctccgacgtaccagtgaacg
+aaggcgcgcttggggtacataagatcgaacttgtagtcccaagcg
+>CEESA81F
+aaaatcgataaattcttcatcataattaatcaggaaatgtttgtnattga
+aaaaaaaacaagaaaaatggggcgtgtcgatgagaaattggggcaaaaaa
+aaaatcgataaatcgataaatcaagaggntctttgggcggaaaaatgaga
+ttttcagagagaaaaatggtgaaaaactaagaggtcagcgaaccgggaac
+acaagaaaaaantcaaaaaaaaantcgataaaatcgaatcatcgtccatt
+cggcatttncggcggcttttnctgggcctgggcctgagcctgagcctggg
+cctgtttgagcctgctggacttgggccaggnagcaaatttggctgtagac
+cgagcagtt
+>CEESA82F
+ggtgttgttaacagatttattgaaaacaaataacaagatctttagtcgaa
+gagaccgaagcccatgtcgtcatcggattcctcctttggctcctccttct
+tcttggtctcggcagctggggcggctcctccagcagctggagcagcagcg
+gcggcggctggagctggtccagatccggctccggaagagacagaagtgat
+gaggttcttcacatcaactccctcgagagccttggcgaagagtcctggcc
+agtatggctcgaactcgacgttggcggccttgagaagggtagcgatcttn
+tcgccggtgatggcgacctcgtcatcttgaaggatgagagcagcntagac
+gcaagccagtttcttggttcgaagccattnttcaatcgtaaagttncgcg
+gttttgcctta
+>CEESA83F
+gaatatttattttaaattgtgaaatgcaaattggtttcgttgaacttttc
+aagtgaaaatccatgcaataagagcgcaaaatcatacataatacagtgac
+gagaagcaatcgaaatatcacagaaaaagttaataagcgagatttttaga
+ttgggaatgagaaagtncttaatgggcttgcttcttggcgaggttctcag
+ccaatttntcgaggaattcaaaggtgttgaggtagtcggtacgagtgaca
+gctgaagcatttcctcccttcacgcaaatggcaagatccttggtgaggaa
+tccagcctccattgtctcaatgcacacagcttccaaattattggcgaatg
+tctcaagggcagagttcttgtcgagagtgggcacggtgagccaatccacg
+ggacccaggcgaagattggagggcaattgggattcgtgggaggttccttg
+tccccttttgggtgcattcctgtaaat
+>CEESA84F
+aaggaaaatagacagtttatattcggaatttataaaacaaatgtgataag
+aactgccggcggatacgnaaataccgaaaaaagtaatcaccgacgacacc
+gaaacggatggaaaatcgaaaaaaaataaataattgggggaaagaataca
+cacatcgacagangaccgcaatttagtgagtgatatccatggactcgacg
+acgtcgtacttgtaggtcttgtgtttcagcaccttggtggctccggcctg
+gaatttntagacgaggtcgttgggctctggtcgcttgcagaattctggca
+caattggccccttgaagacggtctcgcttggctcgatgatcacaagattc
+aaactgtttcctgaggcgttgtcaccgtgcataccggcttccagggcgcg
+ttggacaagcttctcggcttcaactttcgtcatatcaaccttaaaatcac
+gttccaaaatagtgatagcntgcata
+>CEESA85F
+gtattgtttgcaatttatttgaagacaaaaattggaaagaataattgggt
+agaattaaagggagaaaagagggaagaaaatagcttcacaagttttaaag
+tacaccagctaaccgaagaaagaaaanttgantgaaatatggttccttta
+tgaaaatctcttcgaaaaggaagatagttaacacaaagatggtccatcta
+gacaaaaaagcaggntcagcgactaanatgaaaancanctagaatattaa
+gaggttggtagagantgagaaagcnataaantaagggaataa
+>CEESA86R
+ggaccttnttgtnattaagccaaaggtgttcccagacgagcntggtttct
+tttccgagagttacaacaaaactgantgggcagagaaaatcggctacact
+gaggntcttcaacaggataaccactcgttctcccattatggcgttctccg
+tggtcttcatacccaaccacacatgggaaanttagttactgtggttagcg
+gcgagatcttcgatgtggctgttgacattcgcaaggncagtccaacgtac
+ggaaaatggcatggngtggtttctcaaccggggataataagcacgnnttc
+tgggatttccagcccggg
+>CEESA87F
+gcatggtaattgtcgaattttatttccaaacactcatgaaaaaagaaaaa
+taaataatacagtcaaatttttttncntgtagttggaaactnttaacgat
+aacgaaaattggtaaatgagaccacaagaaacagaggtcgataatttagg
+ggaaacaataggcattttactccatcagcacaatgccgggtaaaacgaag
+gtggaactcatatttagttgantgcataggagggtacatggaatcatttg
+gttcggccttcgatgtaggtagagacatcgatcttttctgggagctcagt
+gatgctgatatcgaagcggtcttgaacgctgttcaaggtcttggcgtcat
+tctcatcagaaacgaaagtgatggnccaaacccttggttcccaaaaccga
+ccagcacgtgcaactcttttgaaggntacgagtcgggaatcttcttggca
+tg
+>CEESA88F
+aacaataacaatttatttgaataaacaaatttaagccttagcttcggcct
+cggcaaggaaatctctcttgagttttcccatgaaagcacgcttctnggct
+gtggtttggaagcnaccgtntccggtcttggaactggtgtcgatccactt
+naggttgatcttctcgtgggcgactctnttggtttgggtgatgagcgact
+tgcggagggtgataagacgcttctttggtccgagaacggctccacgaagc
+atgatgtagtcctggttgacgataccgtatcttgggaatcctcccattgg
+ggtgatggtcttctgggtaagatcgaactcggtgg
+>CEESA89FB
+aatggtacattctatattgaaagaaatgctaaatagtttgtatgtacagg
+aaagtagccaaatacccataagcagagaaacagaagtggggaaaggaaaa
+cagacaagaaaaacagctagaaaggaaagtaagagatattaatcacaatg
+aaacgcggataacattgataagtgataatgttgataaactctgtgatgat
+gataaagcctacatacacaaacacacggatgaaaatactattcaaatgct
+caatgagagtgaccagaagctagaattgcggggacgacggctcctccaat
+ttattaacagaagctcttttgatcgtgtaacttngctcccagatgaatga
+ggaatttccctatttgaaggatggtgcagttgcatccacggcaagggga
+>CEESA89R
+tggaaatatccacgcacaaataggatattcactagttttggnatttgtac
+tcatgcttcttgttgatcaaattggaagtgtcacggtggcaagaaatgat
+agagcagggagaagccgaattggaatctctgccacaattgggctcgtggt
+acagctgcagcggacggtgtcgcattaggaagtgcttcagtcatcaacaa
+atccgatgttcaaataattgtttttggttgctataatgcttnacaaagca
+cccncttgcat
+>CEESA90F
+tgccgctcgtgccggcaaaaaaaaatcaatgggaaaatgtcatcataggt
+aatacaaaaaaataattttttgggagttttccagaaaaacgggcggcttt
+gaacaatgagaatttggagcaagaaattggtggaaaaaatcggcggtaaa
+aatcggaaaaatcaataatttatcgttaaaatcaataaaaatcaacgtcg
+actatgccgatccctgtccccccgatccctccgacgttccgaacttctcg
+accgttttcgacggcgatcccgttccgaagatcggcttctcgatcggtga
+ttatgtcgtgatgaagattgctctttttcacgacttttcgatcgtttttt
+gctctttttactcgattttntctccttctcatcgtctctcatctctngtt
+ctctcttnttttccgacgacgatttttccgattttttattaatcgccgnt
+ttttgaatctggaacggggtaaat
+>CEESA90R
+aatgaacaaagaggcccggaaaaaactgccggcctggntcctggaaggcc
+ttgaaaaggcganancgggagaagcaaaagcagtnggaaaaagaggaaaa
+gctgaagaaagcggaagaagaaaaagcccggcgaagagccgaagctggaa
+agagcaaatttgactcttcgtcggatgaagagagcccggaaaatgagaaa
+tttcctgttggtaatggaaaatctgaatatcaggaagatgataatgattc
+ggaagacgatttggaggagagacgagagcaatttatacgctgtgtgaaaa
+ctctaattgatgaacgtgctcctcgngtcttcgaatgacgtcatcatgcg
+tataatacaag
+>CEESA92F
+aaacacagacacgggatggaatattgaacttnttggtaaagtctcaatga
+gaggtaaaagtgcaagtttacatgtaagaaatcgttcatcattatccaga
+agcttctgggttagagtcgcagttnttggatatttntncttctgaggttg
+atagaagcntcgcacataaaatccgctcaagagaatgatttgggaaattt
+tgaagtgtatcactcgtggtggacggcccgacaaatgtttcaggttcttc
+aaatgaaaaaccagttggcattgatttacataatctnttgatggacgcga
+aaacttccgaagagttgtgcggtaaacgtccaaagttcgagtcttggtaa
+attaaaaaagcaatcctctcgcagagaccgagttttcacatcatatcgtc
+tttcgttttcgatgataatcaattatctctgcttgtgac
+>CEESA93R
+gaaagtcgaaaggtggtactcgtggtgaacaattcatttatgcggctgaa
+gcattcgattcgactaacaatgttccgataaaagtcggcgatctcacatc
+aaccaatactcatattattaaaaaaggaacagttgttgatgcgaaattcg
+cactggccgatcgttgtcatgtattcaaaaatgagattnatggaagtntc
+tntcaggcgacactntcgtttactgatcttacacagnataaggattcgta
+ttataagnttcaactgtt
+>CEESA94F
+gaaacagtaaatttatttcatagaaaantgaaaaaatgaataaaactata
+aacaantaaaancgacaaagtgggaaatatctatttagactttaccaatc
+gtgtacaattnctgagtgtagctggtccattccagtatcctttaatcaaa
+ccattccaatacagngctggcatgaaataccttttcatcaaatatgccca
+atatgtgggtttcgattgnncaantggagttgtctccatggctcctcgag
+gtccaaattctgcgaggttcacacgatttgtgctcacaaccagtgggcat
+gacgcgtancgatcggtactgcatacatggccgatttaccctgcatgact
+tgttgtcaaaatttctt
+>CEESA95F
+aagtggtcctaacaagtttattcatgttaagaaaccataaatataaagaa
+aatgaagtatctcatggaatttttntcaagtaaaatcgtgtttcgcctag
+aaatggaaggcacatnttgaaaaaaaaaatttgaaaaaaantcagcttag
+aagcactttcggtgaacaatcgatgggccggactcgtcgtactcctgctt
+ggagatccacatttnctggaaggttgagagggaagcaagantagntcctc
+cgnnccanacggagtacttgcgctctggnggagcaataatcttgatcttc
+attgtgcttggggccaaatnttgantctcctttt
+>CEESA96F
+atattggcattaaacatttatttttgacacactgaaatttaaacaaatgt
+atagtcaaaaaaaaacatgcaaataaattatgcttatacagatcagttgt
+gatttgaattcacaaccttcacagtggccaaccgatatatatatatatat
+ngnnccagatcagactagaatttggaatagaagcacctccaccttgtttt
+tatgtttgantttttctttttcatgatcatgttcatctgatcagaagttc
+atctttagccgctgttgagtagcaaagttcagcaatcgttctcaggtttg
+tagttgttgggccaggtagaatcggcttgatttgagtgatagttttaaca
+gcgtctaccgcccgtcccagtgtacaactngaatcaacgcttttgtgttg
+gangtattgcgga
+>CEESB01F
+aagtgcaatgtttgtagccattgaagctccttgtgtgagctcgttcactc
+ttgcaagaatgaatggggtgacactttgtgaagcgatgttttgttcaatg
+gcttcttgaacagcttggtcaattgctttctggatcgcatctccatcagc
+tgcatattttncaggaataggacaagccagaatagttccntaaggtaatc
+cgngagntttcnaggtttttaacaggtgaacaacttcttccaatgattca
+gtgcagaattgnagcttttctaactga

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/5.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/5.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/5.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,814 @@
+>CEESB04FB
+tttttttgttagagtattttatatatttattattattacagcttacagaa
+ctttgatttgttttacagaaaaaaggtgcaaccgcttagacaaattcaat
+ggattatcattatttgaaactttttgcagttccttattttcaaaaaaatc
+ttggtttttggtttgatcagggtgagaaaggatttcgggggtcgaaagct
+agaaaattatcaattttttgtgattttcgattgttttnatgtttttcatt
+tatacgagatagcgaagaaagaaagtttggnggcaggatggtagcattga
+atattggtcgaatcgtttaggcggcttcttcatcgacagtttccttgggc
+ggtgagtgggcttgagctcccggnttccggtgagtaccgacgnacnttct
+cctcagcg
+>CEESB04R
+gccaagaccagaagtnaacgaattcattggagtcaacgtcggattcggaa
+tcgccatcgttttcggtgttgctgtctctgccaaaatntncggaggacat
+attaaccctgccgtctcctttnccttcctttccgtcggacaaancaccat
+cgtccaattnattg
+>CEESB05F
+atgaaacgactttggaaaaaggttgggtctggtgtgaagaatgcnaatta
+tctcaaaaagtctgtagaagaagtagcgaaaattgaaaaaggggaaacaa
+aagtcgtcccaccaaaatatccaactgaaaaatcacgagaagttagcgan
+gaaatcaaaaaagaattagcgntgaaaaatgaaagtttngtgggaaatat
+gactaa
+>CEESB06F
+ctagcattccatganctgtgccaagcattctatcaacaaagattgaaaac
+tattcgatctagaaatattccgaataactcgccggcgttagtcgtacgac
+aacaattcaaattggcgtttctgtcaganctcagacaggatactcataca
+gcattgagaaattatcgnttggcttatgatcaatgtcgggatacagttga
+ncaatgggatggagtcgatgtttttangtggagaagtgttgttggattgc
+ttanttataaggnacagcagggnattncttgaaaacccacgaattccgtt
+attagtactnaggggtgtcccnttgccgggaaatttttgaaaa
+>CEESB07F
+gaagaagcaacaaactttatagtaaaaaaatgacaaaagagtatcattgg
+agaaaagaaaacggataactgggagagcaagcanttaancaattttnaaa
+aattgaaaattaagaaaaggtaatagcaaatgggtggagaaacagtgggg
+aagagatttcnttagaaagaancaaaatttgtaaatcggaaggaaancan
+gntgaaaacggnggtcg
+>CEESB09F
+aaagtgtataaatttattacaaaagcttttncaaattacaacagaagaga
+gactggaatgcatgatgattggtggtaaaaggggaaaaaggntcttatga
+aaaattagaacaaatcacagaaaatttcaggttcggacagacagacaggt
+ggataagcnctcgtgatatatatttccagngngtaaatcagaaatagaca
+agatatgatgcaaaggagctctacggtcaagacaatagangtnccnntag
+ggtgcttacnaaacacttcgaattttgt
+>CEESB10R
+agaagccgcgtgttgttaaggaagaagttatcgagccaggttcacaatct
+gaaactcaaaaagaatctccggagaaagttcgagttgttgtaccgaaagt
+tgaagttgaaagatcaccgtcgccaaaatcttctcgtgatcgtaagaagg
+ntcganagaaatctcgtgagaaagatcgtgaaagagatcgtgacagaaga
+gaaggttcaaaacatcgtgatagttatcatggncatcggnaacggcagca
+gtttctttccagtgtacgacggtat
+>CEESB11F
+actttgtctatttattggtttatnctaataaaataaangcagtaacgntt
+caaaaacgctcaaaacagaaattaaacgaagcanatntttaagtgctgag
+attcataaagttgagatggaatgtnttgagtgtcgattgaggaattctta
+attaaccgacgtcgtatccnatcctatcgncatttncaggagcatctcat
+tgggtgtttggttgtccgagcgtgtggagcattgatttcncggcagagat
+ccacgagttgaattcattttttnagctttcgg
+>CEESB11FB
+actttgtctatttattggtttattctaataaaataaatgcagtaacgttt
+caaaaacgctcaaaacagaaattaaacgaagcagatttttgagtgctgag
+attcataaagttgagatggaatgtgttgagtgtcgattgaggaattctta
+attaaccgacgtcgtatccaatcctatcgccattttcaggagcatctcat
+tgggtgtttggttgttcgagcgtntggagcattgatttctcggcagagat
+ccacgagttgaattcattttttgagctttcggttcttcttccgtttttct
+acgagcacttctatccaatttccaaatcttctcgcagttcccacgngtna
+ccatttgaaatcattaggcacttctacggattctactatggtcggcccat
+catacttttcactcg
+>CEESB11R
+aaaaatgccactgtccaagagacgagcggcaggtagaatcaatggactga
+aaggcccacccgcaaaaaaaagaaaggaaattgaacgacatgaagctctt
+ctcatgcatccgttagcgtatactgcagaacagtacgaacaagttgcaga
+ggagtcgaagttctacaaaanttgcttcgaaactactgccgccgagaacg
+ttgaacttctaaaatccaacggtcaactcaaaaaagagcttggaattttt
+gaaaaaccaacanaggggnatcagttgtaaaggtccgattattggaagtt
+tcaaaattccccaaattcg
+>CEESB13F
+caggagtcaattgatttatttacagaaatcatttagcaaaaaaagtaaaa
+ttggaagaaagaaaaatgagagaataaatcatttagaagagtctaaactt
+gagattgtgaagaattcctgcagtgattgttttacaagtccttgattggg
+tcattctccttcttgatggagtaagcgaggtaagcgtagaaagcggtcca
+tacagcgaatggaacgacaaggaggccagccttcttgtcaatcttgtaga
+atgcaaaanatgcagctgcagcagtcaaactgacaacagtggtattcttc
+cacaaacatcccaagctccttcttcttgacgatcggaaattgtttgcaaa
+cagcgnagagttgacactngctncatacaaaccaaggagccaaacttttg
+tg
+>CEESB14F
+acctttagtttgactttattggtagaacctgagaacgagagaaaaaaaat
+aaaatatataagtnaagctcattggctagagatgaaatgggaaaacaaat
+aataatttatgcggttncttcagtgcttttcgaaagaggttgcatttcat
+gcgagctgcggtacgaatctccctcctcgatctcttcaatactctccggc
+agtggttttcccatagtttcaggcaaaaagagcaacgtcatcactgcggc
+gagcacagccatacacccgaccggaataatcatgaaaaccttcccaaact
+tgcttcaagcgatccacatggagatngttacgaagcgggcgatcg
+>CEESB15F
+gaaaaaagtncgttattcgactttatttncagaatttcagacaaatacaa
+aataaaacccgcaaggaaaaaaagataccagattagtcaaaaattgtaca
+aattgttgtgttagttgtncagaaagtnccggatatattgttgatgtgct
+cgacaatcacatcgtagaatgtgtagatacccngtgcaatcatcacaatg
+antattgtgattatcancagtgttattatcagcactctgtgtgactgtcc
+tcgttcgattctctgcatttcttttatggggaagtgacagaggacgcatg
+tcgaaacaacgagnaatcttgagganttttgaa
+>CEESB16F
+aatttaaaagtttatttatcggaaatgttgataaggaagcacgaattaaa
+attgaaagagggggcggttgaggggggatacaattacaccgngtatcttg
+tcaatgaaggttttcatgtcattagaatgagacttgataaacgataaaaa
+atgcatctgaataactatgggcaatatgtgtgagatgggtaattaacaat
+gaaacatgggngataaaacgancaacatcctaataaaananctcttaaat
+acccncttgaaaacatcgnncaaggcgactgantactngctaaatcgaat
+ccnatgggcaatcaagagtggatttgttttaccccgtcttggggtccgac
+>CEESB17F
+gaagaagcaacaaactttatagtaaaaaaatgacaaaagagtatcattgg
+agaaaagaaaacggataactgggagagcaagcaattaancaattttnaaa
+aattgaaaattaagaaaaggtaatagcaaatgggtggagaaacagtgggg
+aagagatttcgatagaaaganacaaaanttgtaaatcggaaggaaaacaa
+gatgaaaaaggaggtcgntagactttgngagatggttatctntcgtaacc
+tncgtgtctgatggtttncttaatatcccnttctttcaatcgtcagggaa
+cacgcacatcgtcg
+>CEESB18F
+gaacaggtcgtctatttatnctgtnaaaaaccgtgctgttgtaccgtttg
+anactgtaaaacagcataggtcagaagaaaacaacatcactatacaaaat
+aattttggaaaacgggttgagctaattnatttattggttggcctttctna
+gttgatactcgacgacgatgaatggngcccagaatccgnttg
+>CEESB19F
+ctnattaaaagctttattatgaatgtggctcaaataatgagcatgattca
+nagaaaaaatggtttaaaatgtcaattttgtaatgagaaaatgggggtca
+tcggcagtaatagggtacaacaacaaaagtgattgcnttaaacctcaact
+tcaaaccaaagatacacagagaacctagttatacatgcctagattactac
+cggantagtattgaccaaatacaagagangttaccantgaagatttgggt
+gagaatgggaagcataatgcagtcggctagagaagttngaactattacta
+gcaatgtacacggagagggtgaaggaaaaa
+>CEESB20R
+aatggtgaaggaaacaggntattatgacgttntcggagtaaagccggatg
+cttcagacaatgagttnaagaaggcctaccgtaaaatggcgctcaaattc
+catccagacaagaatccagacggagctnagcaattnaagcagatctncca
+gggatacgaagtattat
+>CEESB21FB
+cgggtaaaaatgctcctagataaaaaagaactgtcacacagttaggggat
+ttaaaacatgatcagacaagtaaaagcgtgagtaggcgtgataaatataa
+actttgaaatatgaaaaaggaaaatgcatttgtggatcattaaaattcta
+taaagttatgaaggaaaacgcgacaaaaatagtgataagtacggtattgg
+ttactcggaatgtacatcggcaaaaaatgcgacagtgacaagntccaatg
+ggaaaaaaggtaaaaaccaaaatganaccatacattggggcccaatattg
+gggaaaaattttgctaccaa
+>CEESB21R
+gttactttccaaaacggacacctcatcagcgaattctggaagctcaccga
+ttctctgcaaaaatgttccnggnttaacgcctcaacagaagcgaatgtnt
+catgagaatccgaatatcatcaagtatctgatttctggacttcgaagtgc
+tcttcacacttgcgagtacacatttnaacgagaagcatngaactntactt
+taacacttccttggggttggaacttcacctcttcaaattgcctctcgtga
+atcagcttatgtggtacgcgatttttggcggnccngtgntgttctcactc
+gttggctcgtg
+>CEESB22F
+aancttcaataaatttatcagaaaacaaaaataaaacaaccgcgtaaaaa
+aaagagaaaacacaaaaaggctccaattattcctaaaatnccaagattgt
+taatcancggtccattcttcgncttctccatcttctactacctcgttttg
+aactttgcctttngcaggtaccttctgatccttagcaacaatgcaattgg
+tgacgncagcattctctccgatttcnactccatcacaaatnaatagggtt
+gcttatgggggnaccatttccaatgacaacgcctttggcaatgatactct
+cttttagctttgttttttca
+>CEESB25F
+agaatgaaatctatattatttggaaaaaagtttttaaaaaatccagattg
+tgaccgaaaaaaatnattcgaaaagaaaaaaacacaaaanttgaggnaaa
+acatgangaaagantggcaaaaagttttttgactcgcaaagaatcaccta
+aacatttcaaatttcgtatgaaagtttgtncgctttaatgataactttta
+aaattcacattagggcgcactttctgggggaaaagtcgaaggaagaagaa
+aaacgtgattatcacagaaaaatcaatgaaatgaaatggnatttgaagag
+gattgttggaaggaaaccttgtggattttcaacaaaaaaa
+>CEESB26F
+gttgtcaagtatactttcagatattgttgagttaatggctcagtgggtgc
+tctcattgaaatcgggtgctctacgagtncggaaactgtctgtaagtatc
+tttccatgtcgttttccgcagggatttcgtcgtagttaaccttatccaac
+accaacatatcccaccactcaatatccggtaccccattctccattttcgc
+cgtgcccgttggtgtaaccatcgcaagtttcacagccgatgagattcccg
+tcgattgtgcagccgatgagacctcattctgaagcctttcgagctttgcc
+attgctctttgcttattcgcaagtttctcgaattctcccttctcatggaa
+attgaacccccggcgacgtcgatctngcagttctcggctgaatccgtggg
+atccagatactcggatgagtttctccggttccttttggattcttttg
+>CEESB27F
+aattttttaatctagtagttctaaaataaatctcagactgataactgtga
+ccaaatacataattactcatacaatactcaggtcttctattagaataata
+atctctaattactaaacgataactaaagaaagactctaataatacataaa
+taaataagnntagtcctgcagttctaataataganccataagnggcaata
+atatttcataccgnataaacatcagggtaatctaaatatttacgtgggaa
+cccgtgtagtcctgcaaaatgtagcgggaaaaatgttaaatttaccccaa
+taaataataaaataaatactgcagntatcataaggttatctaacacatac
+cctggtaataaatcttcatccatag
+>CEESB29F
+cgttcacatttnctttctctgtcggtcttaactcaaccgctcccccgtta
+caaaaatctcagtcaaaaaaaattaattttgacacgncatatttgtngtn
+cttgaggnccccctgtgactttncaaaacgntattttaattgtnctattt
+tgtgtcaactactgatgaaagtcattgaaaatgaactcgtaaatttg
+>CEESB32F
+atagaaaaattgtttattaaaaataaattncnctctatattgatactacc
+tttaaattttttacaaaaaaaacatgtctgtgtgtaggcggatacaggga
+gggggntaccatcagtaattggngcttaaaattccggaaaaaattgggaa
+agaaagaaaccngtaacatttcggatgtntgggcgggcgggtgattgtgg
+taagtgagaattggatattctgagaaattttgagccatgggaggnaaaaa
+agnncaaaaactgggtgaggntcgggggacattttttttgttgcaaaata
+gtccccctacacgtactcaaaaattnggaagtcaaaaaaaaaattg
+>CEESB33F
+ggacaaatgttcattatcatttacaacttcaatagggcaaatagaaattc
+aataataaataaataaaatcagaaaattgacagcttgcnctgaattagac
+acttctnctgactgagattctggtgataaaaagaggcgggatcctaaggt
+gctcatcgacccgcaggagagaaactaaggtcgattagtgaggaaataca
+ttttaaaagataaatcaactaataaagaggaaagatgaattgatgatgtt
+tggtgatttntttgaggattgagattggagatcgcaattattaatgncac
+gcatcggtttggaacgaatccgtcanttctccatcacaatttcgggagtt
+tggagaactgcaaaacagcccantntccaaatcctgctccctaaatacca
+ccgcagcagtttaagataagtgttggtaaggtcatccc
+>CEESB34F
+atntttatgattttattttaacgtgaataaacatcacaaaagtnagctta
+ctcaaggggtggggtgtggggcggctaaaccaaccactaacaagtaacaa
+aaagaagggtgacagtaagaaaaaaaaacaggagatgggtatgcttagca
+actgggggaacgtgctaagagcacttggcaatgaacttattgcttctnag
+cggaaacgagaaccgatgcagcttcgtcgaccttcgagcggaacaattca
+ctatcttgaagcatcatgatcaactcggaattgtcgatttcgagcatcat
+tcctgtgaattttccctgcggtccttgtgtcccgggtagagtttctcaat
+gagnagccgtagatacgntcaccaagaagttg
+>CEESB34R
+tctgagagacacgctgaattgaaaaagaagcacgagcaacacaaggctga
+gcgtatgcagaagtatcaaggagtcaatctctacgtcaaaaacctcgacg
+agactgtcgatgatgatggcttgaagaagcaattcgagtcttacggaaac
+atcacgagtgctaaggtcatgactgacgaaaatgggagatcaaagggatt
+cggttttgtctgcttcgaaaagccggaggaagctacaagtgcagtnactg
+agatgaactcaaaaatggtgtgctcaaagccattntatgtngctattgct
+caacgtaaagaagatcgtcgtgcacagcttgcttctcaattacatgcaac
+gtcttgcagcatgngaatgcacggcaaactttccagg
+>CEESB35F
+aatcaacaatttattaccactttcgttcaagaggttcagaggttgggatg
+ggggataatagctgcaaccagcattcacatataatatttnagatgcgggg
+aagaggattattggaaaaaggagtgaacgaaagtttcagttgaatacatt
+atttcgagttacagaacaatgaaaagacaacgaaaatggggggaatgatg
+attgatttagttggtctggcttgggcagtcgcgggagatgtgtccggtct
+cctggcactggtagcaacgcttctcctcggccgagccactntcggtgcac
+tcgngggaaatgtgcctcgtgccgaattcctgcagcccgggggatccact
+agttctagagcggccgccaccgnggttggangctcccagctttttgtncc
+cttt
+>CEESB36F
+cgtaaaaatttgtctttattttgagttcgtcctgatttgcagagctctta
+tactaaatgaattgaattgcaaacaattgcgaatgatgatatccacagaa
+aaaaggatagtgaaatggagattctttcaagtgggggtgggatgagaagg
+agatggtggtgggggaagggtaacaattttaatgataaactggaataaaa
+cttgactattggtgttggcatctnacgccttgtccgtnatgtcctccgtg
+ttgtncttcgtccgaatcttcattgtaagcgtctccgccacgtcctcggg
+ctgtacttcttctcgtcatattcccataaggggaaaccttctgcggcagc
+tggggggtgacaattancaacctttttgaggtaggggaaggcangctttt
+tcagaaacgggcataa
+>CEESB37F
+gcgaagaaaaatataaatttnattcaaaaatgattcagaatagaaaaatt
+tgaaaagtgtcaaaaataaatgtggattcgacaaaaaccccagaaatttc
+cagataaaaattaatttagaaacataatggtaattatagaaaattaacaa
+taattaaaagttattaggantaaaacaaattatgaaagantaaagttaag
+agaagtcagtgctagagctggatgcagatgctccaaaattgtcaagaaac
+tcccgagaaagacccgntgaaggagcaaaccatgaagaaaactggggcat
+cagtggattttgagctggctcgtgccgaattcctgcagcccgggggntcc
+nctaggttctagagcggccgccaccgcggtgggagctccagcttttgttc
+cctttagtgagggntaatttcgagctttggcgtaatcatgggtcatagct
+gttttcc
+>CEESB38R
+attcactcctttttgcttatacccccttcttgtgagtacatcccaccctg
+tagatgtgctccttgcttgataaaccaggtccgcagtccgatttaggggc
+tcgtgttctggaacagttaaggaaaccatgctcttgttgttgttgttagc
+tgttttcgctgctactagcactccttttttgtcctgagttgatcgtgttt
+gaagcggatttccgatctaaatttttataaattaaaaagaacctttttcc
+aacaaaaaaaatccaaaggaaaagagtttggaaattcttnggactctttc
+ttcggacttttaaactccaattttttcactcgactttcttaggaataatt
+tattctaggaaaaaagtacggtttttcccaacttttccc
+>CEESB39FB
+atctggacatcttttatttcttgttatatatacaataagacaagacaata
+caagactactgtgacagatcaatgggaatcgaggacaagcaagcacgacc
+aattcaaattatgtacaattcctttattattaaacaataaattattcgaa
+ggaagagaaaacattaaagtacttgtggtggctaacctctacggtatcct
+gggcatcagaagagtgagccttaaacggntcagctacatcagaagctttt
+tccttaacggagtccataggctccagaaattttatcaccgattggtctca
+gtgatagatttggacttgtccttcgccgagtnagcaagcgtcttccagca
+gaattnccttgggcatcggagggccttggtcccttgggtgggtattccca
+aggcattcagaangccttgtcccttgggttgag
+>CEESB39R
+caaccgggacagcaaccattgagatcgcttagcttgacgtctgcggccaa
+gtcacctgaggaggttgagccggagcaggattcgaagaaaggagagccac
+gtgccaaggctgcaggattcggaggaggtggaggtggtggaaagagactc
+ccagaggcaagatctncgccaatggtcattccgaaccaggtagcagcgat
+gccggttcagatgactggcttcgtncaacttgtngncaacancattctca
+gcgncattttcccataccaactctgttnatggggttgtccaatgnatcaa
+ancccttcgagatctttta
+>CEESB40F
+gcaatagaataaatttataagcaataagcagagcaagacatgaacagcaa
+atgacaaccggcaacattctttaaataatttttacagagagaaaatacaa
+tataancagacattttcctttagattttacgtttagtagcagtgaaacgt
+tcttgataagcatcatatcctggtaaatcttgaaaatctttcaattggaa
+tcgatttccgtcgtatgacatcacggaaattgttgaagattctngtaaaa
+tcgcgagtgaactccgacttctccaaacagttttcatcgagacgaagagt
+cctcagtcgtgtagttttggcaagatttgntggatttanggagctcagtc
+gattttaattcagacttnaactcgataactttcaaat
+>CEESB41F
+aaatatgatttgatttattatagttatatttgtgatgaaaaaagacatca
+tggtgagatgagattgataataaatatacgaaaaagttacaagcgaaaaa
+ncgaaatgtnctgtagaagttgattaattagatcatgaaagtnccaatga
+gagagtgttagggacatgaataacgggtaaaatgctgtttaanatcaatt
+atagtaagttttttgataaagagtagaaatatataatgtaattccncaaa
+atgaaaagaaaaganaaaccacaacanctcattaaatanttgcaaacgac
+gactcatctacattgtccacaattgcgggattttcgattggaatttggtt
+tttccgacg
+>CEESB42R
+cggcacgagatgatgggacctataaaagaacagttggttcattggactcg
+gctatcgcaggcagattagaagctgaaggatctttgaatctggaaactct
+tgttcttcctccaaccaagcctcccggtgatgatgttacaactgggtctc
+gagggtttntcactccaacacattnaacgnctgcactcgatatgtcattt
+ttcactccacctcgncaaatgcgaactttggctgatgctgttcgtgaagn
+tgcacctgttggaagtgatccccgaatttttttgaacctgat
+>CEESB43F
+aagatactcacattttatactcaaaaaaagggtggaaatgtgactataag
+anggtgataaaagaagggtgggtaacagggaaagaaagancacaacacga
+ggaaaaggagaacatggaatagatggntggngatgaataataaagggggg
+aaatcngtgtgtacactaantattttncaatattatttatcaacnctgat
+aagttacaa
+>CEESB44F
+gtaagtaattagatttattatttgaagattattagaatatttagaactat
+tattaaatctgaatgttgttagtaatagtaacgggctgatcgancagcgc
+gttgagtagctctgaatggaagacgagagacagttggtccctggtatact
+gtaggagaagatccataggntcccatggccagtggaatcaccattgggta
+tccagaagtttttgggtacattggatatccattgagtagtaatggttgac
+cacctcccatcatcatatcagatgcagcgaaacgaggcttatacatgtta
+gatccatacgtatccattcccatcatactgctaggaaagtcccatacggg
+catcttgttgcaattnccttngcatngttatgggatactctgaaatt
+>CEESB47F
+aacctaccacaacttcattagagcacgagaaaattacgagagacaagttg
+tgcggaatgggatggtggtggaacttgaagtttaaataaataaatgtttg
+gttggataacgggtagattaaaaatgagcagaacatttgaaacacaaata
+cgggggaaaacgggatgcgtatatatttaattagaccctggaagatgttg
+agctttgtggagtaccagatggagttctgcggcttgaagggccaacgaat
+cggtttgagctcactgaagtagctgcagatgctcttgnattganttgatg
+atctggatccaatggaacttccagctnggcgtagttctggccgatgttgc
+tctgctccgaattcgcgctcanttgcaatcaacggggggacagattggct
+ntcggagcataacttccctgg
+>CEESB49F
+gaaaattaaaaaattattatgcacaaagaatatacaaaatgcttaattgg
+aaaattagatcaattgaaattncagcaaaaaatacagaaaaaaaaatgca
+atggtttcagtaacaatatctacatatgcncacacggnttcantagaaat
+tttaaaaaaagatataaatctacaagccagtnctctccataatagtttgc
+aagtctctctgaagaattatatttttngaaagtgtctcttcaagcattct
+ttgcagctttttgttcatttcacggacatctgaatctcgggaatcttggc
+cangtgtattgagcagttttcggaatgacgcttcangtttcggttgacca
+gangaaggagaagaagnaccccaggattattnctgttggttgaaatccgg
+tgcccaggagggtttttcgcggg
+>CEESB50F
+atcaaagcgcgcttaaatgcgaactccaaattttattcgccaaaaatgct
+tgcctcagagcgctgtgtgattagtgaaataaaataactaaattatatga
+ttattataatgtgtataaaataccaacaagttcaacaaaaaagtgatcaa
+aaaatgagggcagatgagaaaaggaaaacaaagaaaatcaacaattggta
+aaaaaaaaggntgaaaacaattggaacatacagttttttagagaagaaac
+aattnttcgaatttngttctcttattatnctgtcctccgaaacttccacc
+atcgtatgancgtttgaggnctccacgtccgcgncctccgaagtctcctc
+cacggcca
+>CEESB51F
+aaaactctgaattgatttnttgaaagctgaaagactttggatttgtgtgc
+accgagagaaaagaaaactgaatacaaaaatatacacatagagatgaaaa
+gatagagaaaaatttnatgttttgattaactcctaaaantttnccaaaaa
+ancgggaaagagtgaattatggaaaggagaaaaaatgatagangataaca
+aagggacactgggagaaactttgttttcagaaagngaagaagacccgtag
+ttttancttgagtaataantancgttnaagcgtattancggcgccaccat
+tagaataagtcgcttcgatgctgaaacaggctgctctt
+>CEESB51R
+cgatttcgaatcatcttcgcaagacgctctggcgacaccaaccaaaaagt
+tctcttcccaatgggaaaaagacgtcgatgacgttgaaggaaccgccaat
+gagcttgttcgtattgacgaacgtatatcggatattacagcacaagccga
+tgttattcaagacaagatccgtgaaacagaagttggaagttcagaagaag
+aaatgttgactgcatcatatcttgagttgacaaatgaacggaacactctt
+gtacatcgacaggaatactataatatcattgagacaattcgtcaggttac
+ttcggaattgaccattggggaaacaatcatgaagt
+>CEESB52F
+gagcacagcactgacgagatgaagaaacttgttgaaagtttgagtgaggc
+gtgcaaaaaagcagccgatgagttcgacagtaacgagaaaaatggtgatg
+ccggtgcagcggaaagtgaaaagaaggacatcgaaagaaaattcaaattt
+catacatgtgacgttaatctgaagcaaatcgaacgaagtcatgctgagct
+gaaaccattacacgaaatactcaagtcagaagaaacgaaaacttcattca
+aaccaccagcaaatgctaaattacaaaanggttgggatgttgattggagt
+cgacctgatgactcggcattgctcctgggtgtctggaagtacggttacgg
+tagttgggaagcgataaaaaatggatcctactctttggattggcctcgtg
+ccgaattccctgcagcccggggggatcc
+>CEESB53R
+gctgtacagtctaaatccaacccgtggtgtgcgtttccagactaatggca
+agtttgtcatgccagccagagtaaagtcggtgacgattatcaactacgac
+aaggaatttaatagaaacgtcgntatgtttnccgaagggcttgccaagca
+ttgctccgaacaaggaatgaagtttgatagccgcccgaacagctggaaaa
+aagttaatctcggctcatcagaccgacgaggtacaaaagtggagattaga
+agaagccatttcgaaacggcgtttacggtttgttttttttggaatttatt
+t
+>CEESB54F
+acgaaggacttcatcgtgatttcgcctgtnttctctactcaaagcttcaa
+aagaaactcactcaacaacgcatctacgatatcatcaaggacgctgtggc
+catcgaacaagaattcctgactgaggcacttccagttgacatgattggca
+tganctgtcgtcttatgtcacagtacatcgagtttntcgccgatcatttg
+ctcgtcgagctcggttgtgacaagctttacaagtcgaagaatccattcga
+cttcatggagaatatctcgatcgacggaaagactaacttcttcgagaagc
+gggtttccgagtntcaacgtcctggagtaatggtgaatnnaagncgcgag
+acagtttcgatctttaaggctgancttctaaaggaaaatatttccaaaat
+ttctaatttctaaaccc
+>CEESB55F
+agaatttacccaaaatttattgatacaagtattattaaaatttggnggca
+aaatagaatcacgngaatgaaaaattgtgtcagagtacagtcaatgcaca
+gtcaattatacagaaaaggtaaaaatttgaggcgaccnattcagaaatct
+tcatcatcctcaaaatcgatatcaatagcattaacagagttctgaagctc
+gtcgagcccggtgacttcttcgagacgaccnggcgagttcataacgtcgn
+gaacaaacttccagccattttnctgaaacttgtgcagcgataggngctcc
+ncaagcaacacattgntccgantcgtttttacagaaaggg
+>CEESB56F
+gtttttataaaatattattcatcaacaccctcaaataaattaaactgttg
+cgatgaagtggaccagccatcgattgcnctccgactagttcacagtggtg
+gtttcggagagtttgaccaaaaaagacggccaaatatcacataaattagg
+acagggctcgggctagaacgagcagccaacgccgtcgggtagcataggaa
+acgacacccggcaacgntcacaactaagcgnccagtcaccaagcttgtnt
+ccaagcaacatcaactgtntccggttccactccagcgatttcacgttctc
+ttgaactctctcttcaaagtcctttgcaactttccctcacgggacttgtt
+cgctatcgcantcgagtcgattatttagcctnagatgaagtttatcaccc
+actttaggg
+>CEESB57F
+atatatatttcatttatttagatatattatggttatttacgggacgtcat
+ttaaagaacaacatttaaaagttaaagaaaaccaaaaaaaagaagaaaaa
+aatagatcattaattgtagagggagagatttgtttttcctattccttgga
+ttcttccacaacttctgcgtctttatcctcagactcatcaataacaattg
+gntctncttctgtagatggctcagcagcctgctcagcactcgctggaact
+tcttnagaaggttcttcagntgattttncagcggatttctcggatacttt
+gtcanccttctnatcggttttctccntttcg
+>CEESB58F
+aatgtgaacaattttttaaatgaaaaccaacaacaataacaatagagaaa
+tcataacaacgaaaaaacaaatcgngtaatttatagaatggtcttgttca
+aattgctgtatcctacaccaacagcggtaaccatagctttatcggcactt
+cctgttttgagatcagcttcctcttccttcaatgtttttttaaagtcttc
+cattttaaactcgatctcaaaaaactgtttgagatgcctgagtgcgtgta
+cactatacattggaagaggaccgtagaggaagtgtgaaacatctttagga
+cagagtgtcataaatgtgcttgcaagtatttgagcactactgtcaagggc
+acctcccatgtaaatttgttgaaggaggagcatgaccggcttcaattcca
+atgtcttctggga
+>CEESB60R
+gctcaagctcctcgacgagttcctcattgtcaaggctggagctgctgagt
+cgaaggtcttctatctcaagntgaaaggagattactacagntacctcgcc
+gaggtcgcttnagaggatcgcgctgccgttgttgagaagtcccagaaggc
+ttaccaggaggctcttnatatcgctaaggacaagatgcagccaacccacc
+caattcgccttggacttgccctcaanttctctgtcttctactacggggat
+ctttgaacacttcag
+>CEESB61F
+ttcatcttgaaaattttttttaaaaatgcacaaaaatttgantttttggc
+aaaatttgtgttttcacatataaataaaataattccgaaaatcgtaataa
+aatgcaacaaaagttattgaataagagattaaaagcaggaggcacaacat
+agtagtcatgaatcctctcaacacgtgcataatcagtagaaaaagaagaa
+gaagaagacgtgaaaagagtatatgtatgtnggagagacgcagagaagca
+cacaaaacgaattggaattatgatgatgatgaagaaggaagcgacgatga
+ttcatttgggaagagtgtagagcaatcttattgagcgcttggcggatgtt
+caacttggttcgattgcagcaaatcgttgatattcttctccaaatcatcg
+attctcgttgtcatatcatccgattcgccgaataatctgatnccgccatg
+tgcttggaatcgatcccgtgtctgt
+>CEESB62F
+tccgggtagagcttgtttattcacaggtgtacaataagaataacggctaa
+aataaatagcaaaaaaaatggttctgtgtgctttttgggaacaaattgag
+attatgagatgattttttgtagatttttgtgtgatttatgaacagaaaat
+gtaaatttgaaaatcgctattactggttacgggaaacaacgggaaaaatt
+ctaagaagaatgatggagtctcgggatatgaagagaaaatattacacaat
+aaattattaggaaacatgtttcataaacatcttgatctataagtggccnc
+nttggaaggacattttggttgggaggaaacgnatcggaattggtttgaca
+agaacccgtaaaagtgcaccancaatctgaggtg
+>CEESB64F
+ggggttttttgtaagattatttgaagaaagtacaataggaaatgggaagg
+taaaaaaattggatgagaaattatgaaatgcagaatgaatactgctttca
+gtaccaaaaagtatagccaacaatttttncnctgaatatcagagaatatt
+acgaccttggcatgcaagatgaagttagagagcttagtttttagcagttg
+gagcagaagtagaagcagcagcactctgagcattggtgagcactccaaag
+gcttccncgaccttncttctgaacaactcagcgtcttggagcatcatgat
+caactcggcgttgtcaatctccagcatcattccagtgattttcccggcct
+cccttgtaggttccggggaacatcttctcggtaangagcataggata
+>CEESB64R
+ctcgtncaatctcggcagntgtncatttaagancttggntcgtgtcattg
+acaacaagtccgtctacgacactttctcgctttttggaaacattttgtct
+tgcaaagttgtcaccgacgatgaaggaaacagcaaaggatacggatttnt
+tcacttcgagactnagcactctgcgcaaactgccattnagaaagtcaatg
+gaatgcttctttctgataagaaagtctatgtcggaaaattccaacctcgc
+g
+>CEESB65F
+aaaanttttggtaaaaatttattagatgacccagtatgttttgacacgaa
+tgcaatgagagaagngacttgcattgcttacacaacacaaggggaaatac
+aaaaaagcaagngatagcaaaataagtagcacagggcagantaccntctt
+aactgacaacatcgtaaacaaaaccgntaatcgccttattagtaaagatg
+agatgatatgctgccaagagccgttttgagaaagggggaaagagangtaa
+aaatgaataagttaacggtgtttgacatttaacaacaggtccntggaacc
+ccncntgcgttgaact
+>CEESB66F
+gaatgntttgtaaaaactttatcatgttttaaaaataagancatctgaaa
+ttggaattgaataaaaatacaactaacttataggaagactnctgattatg
+aaacgaaaaattctacacaagaaagatagcagagggagcagagcacaggn
+ncttgtncattttattaatgagcatttaaaagtgaaggaagtgggancat
+ggagcaaaggtaagaaacatttggcaggagtatttcnnttttaaaatgta
+agtaaacgtcngggnaaaatgagc
+>CEESB67R
+gttgattcggaaatcagtgaggaggaagaagaagaagaaatatataataa
+acaaacaaaaactaatgcctcactcgactccattcatagaaggaagtcta
+aaccccaaccaatgctggagttcgacgcggagacgcaaaaaatgtttgat
+gatgcgtttcaaagtgacaaaaaatctacaaaagaaaagtatccgttcta
+atttctgaaaaaacatcagcacactgttctattggttccactttcctttt
+tattccatctttattgtggttaattatcccatctacttctctatatttcc
+ccttgataattaaaattggttttaatggttt
+>CEESB68F
+aattactattttacatttttattcttaacacgcatgactgcctgaaaatc
+tcagtttcaacaatggaaaacatattttacggttacaaaaacaaataaat
+gttatagagcnctattctaatttnncatttttaaacattttaccngcaac
+acaattaaaaaagtgggtatcaacagttagttggctaccncagangtatc
+acacaaggntcgggacg
+>CEESB69F
+aaatatacaacctttattgagaagagaccatttatatacttgtaagcttc
+taggaaaatttnagatactaaagagaagcatagattttaagacaagcagt
+taactaggtgaaagtaggatgagacagcttaggccttggtggcgatgtac
+gagangagatcaacaactctnttggagtatccganctcgttatcgtacca
+cgagacgagcttgacgaagtgtgggttgagtgagatggatgctccggcat
+cgaagatggaagagttggtatcggtcgacaaagtcggtggagacaacttg
+atccncagtgtaagcnaggaattncctcccattcggtcccntcagngggc
+agnctttcattaactttctttg
+>CEESB70R
+aaacaactcgaaattnaggctagctctctccgccgtgtggctcttgttgg
+agttgccgtctccttcaccgccacattggtgtgcgtcattgccgccccaa
+tgctctacaactacatgcaacacatgcaatccgttatgcaatccgaagtt
+gatttctgccgttcccgatctggaaanatctggagagaggtttcccgcac
+tcaagttctcgccaaggtttctnggaggagcccttcgttcccgncgtcaa
+gctnggatacggagagncgcccgggagttttgaaaggntctttcaagttt
+gggacaaccaaggg
+>CEESB71R
+acgctttcaatcagaggtaacaacatcagatacatcattttnccggatcc
+actcgctcttgacacccttttaatcgacgatgaaccaagaaagaaggccc
+gtgccgctcgcgccggagcttcacgtggacgtggtcgtggtggaatgcgc
+ggaggaagaggaggtcgtggtcgcggacgcggaggaccacgcggaggcgg
+tccacgtcgttaagctatcatcgggtcatagcaaatttgagtatcgaacg
+tcctatacttttgtatcacgttcctcaagtttaattcacattttgttncc
+cgttaaaagtttgctgacttttgttttaaanctttttgaaattaatttan
+ttacggg
+>CEESB73F
+aaacttcaatttttatnctaattaatcgtcaatattcaaatncgacgaaa
+attttcagattaccaaatatttggaaatttggagaggattttntgtggga
+gagggatggggatagagtataaganttncgagacgcagggtgaatatttc
+ctttaaaacaagaaatgggggaaaaaaaggataatgtaaaantaacatgg
+atttncaggtagtnctagatgggggtgggtttaaggcagatttcacggta
+gcncagggtttgtccggntatttnagaggaggttcttgagaatgagaata
+tc
+>CEESB74R
+caaaatatagaagtcactgcatctnattctgcagcagaacatgaagtgtt
+tgaaggaatatcatcaaatattgctgggaaaggagaaaagttagaagaag
+aaatagataacattggtattgtaatgcaaccagagccacgagttgtccat
+gaagcttccgaagtatcagacaacattgaacttaatatcaaagacgacct
+aaacttgaaaagtagactggacaacttcacaagagctaaattcaggcaat
+caaccaccgtaacacctaatattgttgctgtggagccttctattganggt
+gttgaagacggttttagatcat
+>CEESB75F
+gtatcgtctcatcaactttatttttaggcatgtacaatcacgtaaaggac
+acactgatgtncattggaggaaatgtgtgagaatctcacactgcataatt
+tttnccggtcggtgatttataggtataaatatagantggcggttaaaatt
+caaaagatgcatttnaattaggtgaaaaaaggaagaagtttttgggctgc
+cagagatgatgtaaaaaatagacagagaccatatcaggacaatgtgtgaa
+gtgtgaaggggaaggaagcgacatgtcgtttagaaatgtaatagagaaat
+aggcaaactgaagggtaatttantcctcgcaccagggcattctctttgcg
+g
+>CEESB76F
+gacgctcttcagtaatttattcaggattctcagaccgccagatgaaataa
+acgaagaacgaatgtntggtggtttggaagaaatcctttttcaatttctc
+gagaaaaatcagaggaagggaggagggagatttggagacaaaatagttta
+aaggggatgagagagaattgaggggatcaatctacaccgttcgatttact
+tttgaattataactcgtgccgaattcctgcagcccgggggatccactagt
+tctagagcggccgccaccgcggtggagctccagcttttntnccctttagt
+gagggttaatttcgagcttggcgtaatcatggtcatagctgtttcctgtg
+tgaaattgttatccgctcacaattccacacaacatacgagccggaagcat
+aaagtgtaaaagcctgggg
+>CEESB77F
+caagaaaacgtttnttttatctatcaagtgtagcagaaggaaaaaagaaa
+cagagtatacgngtagtacaacaataaaacggaaaacagtaacaatagaa
+attgaaaacaaaaantttggcaatttagtcggtatggaagtnagtgctca
+tctncggtgtgcttctttcttttncttggaattnctncttcatctgcctc
+cccaccttcagctccttcatttnccggttgtngctgctcgcggagtcgct
+tcaggncgggagnanaaagttaagcccggggggcccaangggggggaacn
+nnaannannggg
+>CEESB78F
+attaaaattttacttccatttcagaaagaaagaaaccaagacgattaaaa
+agcactgtncaggaaagaagagccacatgagatagaagggacaaattttt
+naaaggttcgcaagtnttatggaaggatgtatggggacagaggtacagaa
+cagttccaccaatttgaactagccaaatttcaagtagggggcataacana
+tgtgattcgattgaggggcaagatacgnttgcaaaanacatcnagnaaga
+aagcaacntgggaagtgngaaag
+>CEESB79FB
+ggantaaaatactttcatcgaaaatataattgaaatgaaaagtaatctta
+aagaggntaatttacattgcaaaatacaaaacagtcgaacgagcacctag
+aacgaaaatggtccnggnaaagctcaagctcttctcggaaggancatcaa
+ggcattttggtaataaattgtggaaaagccatagcacatacactttggag
+cnaggagaaattgtgggaaaagagattttaataattctagaaaaaaagtt
+tacactcgagaaaaggcaatgaaaaaaggttttaaaagnagaaacaagct
+gggattatggtagggtaaatttacaaaactcgcaaataaagc
+>CEESB79R
+caaatnccgcttttaagtcataccgtnatttatacttcgagggaggcgtc
+tcatcagtttacttctgggacttggataatggaggtttcgccggaattnt
+nctcatcaagaaagagggagacggagccaaaaatataacaggatgttggg
+attctnttcacgttattgagatcacggaaagagcacgccaggctcactac
+aagctcacctccactatcatgctgtggctgcagacaaacaaaagctccag
+cggtgtgatgaacctcggtggctccctttacaagacagcatgagatgggt
+gcaccaatcaacggtcagaacac
+>CEESB80F
+agatcacaacgtatttattacttctcctncttttcaagaaaaatagtcca
+gacaagcctaatgagagcctgaaagcctggaaattttgtcttgatagtga
+tctattcgatttcagtgaaaaaaatcgcaatccgtacagatggtagaggt
+ggaaatncgcaaaaaacgccaaaatctcagtggaaagttggcaaatttnc
+gggaaatcgcaaatttcgcgagagaaatttgntactttnccccnaaaata
+gccnagnaacagggnaaaagatgg
+>CEESB80R
+ggaaaaacgggttttctttcggcgaaaattttgaattatttagtgaaaaa
+atagccaaaattctcaaattttgcacggttttttcaatttttttgcaaat
+tttggtgcattttccgtaaaaatcgacaaaaaattgcgaaacacattttt
+ccgagttttttttttgccgatttcgtgcaaaaaacgtgggaaaatctgca
+aaaatgcagaaaacaggnaaaaattgattttctgccactattttgcaatt
+ttcggtcgattttagtgcatttttagccgattttgactgaaaatttgcaa
+aanattgattgaaattaaattttatcggttttttcccgatattttgg
+>CEESB81F
+cttaaatcgtttattattaaaattaaaaattgttataataacgnaaagcc
+attaaatgaaataaatattaaataaagatagaccagaaccacaaatgatt
+gtaccaccgtatcaggaaaagagcactcaagaaaaaagtgaatttttggt
+tgaaatataattttaaacaacaaaaaaagaaacatttttgaaatgtaata
+ntaaattatttagaaactttccaaacangtttctggcatctgatgtattc
+tgcgagatgaagtgtcagttggtncganttcatacgttttgcattggaaa
+>CEESB82F
+aatgtgtaaatgtgttcatgtncttcatgaataaaaaatagagtgataaa
+cgattacatgagatgacagagtgntaacaggaaaaatgtgggtatttttn
+aaaaccgtaaaagtctaagggtcaagaaaatgaaanttgaaaatcctatc
+tgtncacgngtgcaaaaatatgtnccacg
+>CEESB84F
+aaaaatatgatttactaaatgattagaaagcttgcaataccgaaatacaa
+aatattggcgaaaatggaaatcccgagcgatcggtactttcaagaaaagg
+aataaattaagangaaatacataaagtcatcacaatagaagagaactaga
+ctgaaatatgaaaagaaatagagacaggaagcaagantttagaagaaaat
+aagatgaattttaaaaatgcgagangaagaaaattcagattctggtcacc
+gaaaggnaaatggaacanttttagagaaaggagatggctgggggatgagg
+ggaaactctatgcacaaacacaagaagaaaaaagcaccaacacgncacaa
+tattcaaatagaagtatatatctncttaggaaattaaat
+>CEESB84R
+ggcacgagctcattcgaaaaaatccttgctgaagagcgtgaagctgagga
+gaatctctaagatcacctcggccacttcaaacagtgtgacatcgacgttc
+gacaaatctttaattatttatttctagtagatatatacttctatttgaat
+attgtgtcgtgttgtgcttttttcttcttgggtttgtgcatagagtttcc
+cctcatcccccagccatctcctttctctaaaattgttccattttcctttc
+ggtgnccagaatctgaattttcttcttctcgcatttttaaaatttcatc
+>CEESB85F
+ttttttaaatatgtatattcattttcaataaagcccatttaatgagaacg
+caaaagtacaagaaatacagaagtcagtgcaatgagatcgcatccacgtt
+gagaacgtcacttagttggtcgaaatcgctgagatccttggcgcactcag
+cacatcctttagtccattgcatgcagtacacaggatgatagttcagatgc
+acaatctgaccatcggcttttcctacccaaactaaaactcgacagccttc
+cgtctcacaatatctcccgtatctcgcgnatttnttgaacgccttcgtgt
+cgaaacgncttttactgttcaaggggaacggttcggctttctgggaactc
+aaatcgcagtagagtntgctcggttttgaagttttggcgaccccactttg
+cc
+>CEESB86F
+catcacttcttcttgctgagccttaacggcatcttcaggacgagaacgga
+gtgcagaagtcaaaacgctgggagattcacactcgatttgcacaatagta
+gctgctgatttggcttgttgtcggattaaatcaacttctccttcagtatg
+agcaacttcgcacagagcttccaaatacgtcatcgcctgtncagtttggt
+tgcagaaacgcgtgagtttcgaaatttgagcgttgatttctcgaatcttc
+ttgtcaaattctacagaatttcggaggaactcatcaatttccataaccgc
+atcggccttttctttacacatttgctggcatcttcccattcgagtactac
+aaagattcgctaaaacgacccccttcggcttccatttttcccctcgtggc
+cggaa
+>CEESB87F
+aagacataatagtgctttgttataagcaattcatcgaaaatttagtgctc
+ggcaacagcttttcctttctcctcgtacaatttcgccaattccttcggac
+aagtatgcatgtatcccttgttagcagcgcaaatctccttcattctagtg
+tacggntcgtagtttgcgaaaaattcttcatatttgttgtgacgtggcca
+gacgtaaaatgcgttgaaggcggcagtcgagacgacagcgacagccagtg
+aaacgnnaactcgtgccgaattcctgcagcccgggggatccactagttct
+agagcggccgccaccgcggtggagctccagcttttgttccctttagtgag
+ggttaattttcgagctttggcgtaatcatg
+>CEESB88F
+atattagtaagatcatcaataaacaaacacaaataaaaataatcacacta
+catcaacaaaatgtcaatataaaatagcaaattccaaccccagatgatga
+ttataattaaaatgattttttagaagacgtaaaaaattaaatgctaagan
+caaaccaccacacaaggcatganttccgtgaaatcccgtagataaataaa
+aaatccttccaaatactccgtctgcaatagaaaatctagcttctatatac
+tctattaattganttcctgtaaaataagcngccaataaacatgttaaaat
+tntactattagtacatcttttatttctaggtnatctgtggatgtgctcaa
+gtttactgtaacaccactcctttaattaaa
+>CEESB89F
+actcaaaaattgttcattcgaatcctataaaacggcgggaaaaagtgatt
+ggagtgtgatgaaataacggaaaaaacagaaaaacaataaaaattactag
+ctattcaaaaaaaaactacaaaaaaccggaaaaacattaaaaaaccagag
+gaaaattaaaagaaaattatttagagacgactccaacttgagcaggccga
+attggacgttctttcaggctgtagccgatctttgtacacacttcgatatg
+tccgacaggttgtttggcattagcagatggantctggaaaacggcttcgt
+ggaggattngggtcgaacttttccattggttggatccacggtcacaaagg
+nccatgctttgcgaatggtcttttgcccatgaccagtacggggtcatttg
+aaaactccttcc
+>CEESB91F
+caaagaaataaaatttattttaggcactgttgagcaaccngagttgtgga
+ataaaaataaaaattggaaaattaaaattncaaaaaaaaaatcgaaattt
+ttttaatttgcaaaaaacccgaaagtggngaaaagaaatgngaaattnta
+actggaatggttttttncgttgaattgttgactaggatgacacgtggata
+cacatatcagangctgataaggttaacggancaggtgaagacttntggag
+accncggcggtgagccatttcgggt
+>CEESB92F
+cgaaaaatttcaaatttatatgatgaacttgtttgggtgtgaaaagaata
+ggaaggaaagcagggaaatgggatggagancaaacaaaaaagtagttttt
+tttgaagaataagaagnaacatttggagaagaagttgaagcaaattatgc
+acaggtatcatgtaatttncgnaagnaaaaaaaacacggngnaaaatgat
+ttagagacgntcccaagagatttcagcctct
+>CEESB94F
+gagagagaacgtcaagcttcaaagcttccgggcgtaaaatctccgataca
+aaggcatgagggtgacagtaccatagcttccattgacactccgaagttga
+tgaggttggcaaaggcatggggctttaatactaaatattccacatcacca
+atgtgttctccctctggataatttggaatgactggatgtgcatcaccatc
+accaccaccgnctgctcgtttgacacgtttctgaggtttgttccagaacg
+gccactgcggccgtccttccgttgccagttttcgaattgcatcttctatc
+ctagtatctctttcttctctggtcatattggcgatgggcaagtagcttgt
+tggggtatcagcatagacaagctttgactt
+>CEESC01F
+gattataatccttcactggaaataaattcttccaattgataatgactgag
+gaagtgagcccaatcgacgtcttttgctatttgcagtttggaaacattac
+tctgagtgcagaatgcatcggttttgtagtgacaagttttatggcatacc
+attttgcagtctctgcactgataagcttgcttggaaaaactgcttcgaat
+cctttgctggcacacattgcacgtggcaccgcctttnactttgacggcaa
+caaatgtatgancgttgtagatgtgaagtttctttcctttcctcattaat
+cgagatgcagctggctcattgagcattgctggagggatgaggaagtattt
+tt
+>CEESC02F
+agacgaaggacttcatcgtgatttcgcctgtnttctctactcaaagcttc
+aaaagaaactcactcaacaacgcatctacgatatcatcaaggacgctgtg
+gccatcgaacaagaattcctgactgaggcacttccagttgacatgattgg
+catgaactgtcgtcttatgtcacagtacatcgagtttgtcgccgatcatt
+tgctcgtcgagctcggttgtgacaagccttacaagtcgaagaatccattc
+gacttcatggagaatatctcgatcgacgggaaagactaacttcttcgaga
+agcgggtttccgagtatcaacgtcctgggagtgatggtgaatgaagccga
+gagacagttcgatcttgaggctgacttctaaag
+>CEESC02R
+gcacgagaaaatatttccaaatttctttagaagtcagcctcaagatcgaa
+ctgtctctcggcttcattcaccatcactccaggacgttgatactcggaaa
+cccgcttctcgaagaagttagtctttccgtcgatcgagatattctccatg
+aagtcgaatggattcttcgacttgtaaggcttgtcacaaccgagctcgac
+gagcaaatgatcggcgacaaactcgatgtactgtgacataagacgacagt
+tcatgccaatcatgtcaactggaagtgcctcagtcaggaattct
+>CEESC03F
+aattcaaaaatttatacagaaaacagaatgcaaagaaatctgtacgtgag
+cttttcataaaagcgcattcaacaacaataagttctacagatataaataa
+atatcgaaatctcttgaggggttggaaagggagaaaatgaaatgagggga
+tattgtaattacacgtcattgatttggcggaggggtttcatttgaaagga
+cattattaaagctctaattaaaagttttnctttaaaaaaaagtgatgatg
+agctgcagaaaaagggacttcccgtgagttttcagatgtcaaaaagttaa
+ggtcagaggagttcagaaaaatgcaattgggagggcccgaagtgagatgc
+atttttcactagggagtttcagggaaattacgg
+>CEESC04F
+gatcatcgcctggttgaagaagaagaccggaccagtcgccaagccactcg
+ctgacgccgatgccgttaaggagcttcaagagtctgccgatgttgttgtc
+attgaaggattcacaaagttcctcgagaccaacggaaaggagggagctgg
+agcttccgaggaggagaaggccgaggaggaggctgatgaggagggacaca
+ccgagctctaaatccacattccaatacagttcaacgcatcggggttccat
+ggacctgttgttaaatgtcaaacatcgttaacttattcatttttacttct
+ctttccccctttctcaaaacggctcttggcagcatatcatctcatcttta
+ctaataaggcgattatcggttttggtttacggatgttgtcagtttaagga
+tgggtattcttgccctgtggct
+>CEESC05F
+aacttgcttcttgtatattcagagtccgaagatgatccaggaactctgaa
+gatcacagatttcggattggcaactaaataccgaaaggatggagaggaga
+tcatgttgagcgaggattgcgggtctaagccctatgcagcgccagaagtc
+tgcacagggaacgattatcgggggccacccgtcgatatctggtcggctgg
+agtcgttttgatgacgatgcttgttggggagcactcttggaaagttgcaa
+ataaagaaaaagacgcggcctacagcaactggntcaatgcaaaagacgaa
+aaggcgaacctgtggaatgtgatctccggaccaacgacggcgcttcttcg
+caaacttctccatgcgaaccnccgaaaaaaggggcaacaatggcg
+>CEESC05R
+nngaaaaaagggcaacaatggcgaaaattgtaccggaaccatggntccgc
+ttcaattttttngccattgttgcccttttttcggggttcgcatggagaag
+tttgcgaagangcgccgtcgttggtccggagatcacattccacaggttcg
+ccttttcgtcttttgcattgatccagttgctgtagg
+>CEESC06F
+atcaatagctgtttattgataacatagtgaacagtctgaacagtttctgg
+ggagagatatttcacgaaaacataaatttttaagggaaaaatggggagaa
+aatgtgtagaaaaaataggaacgacacaatgcagagatcaacgntcatga
+gatgaaaacatacaaatagatggaccaaaatagctgaaaatttaaaaaaa
+agaggnaaaattacataattgcgcaactatttctgattgattcaattatt
+gaagacttttgatttttaatagccggtggtgctgattcggtctcccacca
+cggacgtggacgctctccttttgacttttnatatcaatgatgtcggtcct
+catcatcgnccctcangtggacccatgttggaatggctccagaggaacgc
+ttgttggg
+>CEESC08F
+aatatgaaacagattttattttagtttcaaatgcaatatattgcaattac
+aaccacaaaaggggaaaggaaccgtaaagtgttcgagaagtactgagact
+gagaagtggggggagaaacaacattaaataaatagaaaacaacacaagtt
+atcttatcttatcacaatatcatcgagtgcataaagctaatggaatgggg
+agatgttttacattgtttagagctcagagtgctcctcctcaatcgttgtt
+tctncagcggcttctggctctggttcagcctcttcaatatgctgttcggg
+ttcaacttngagcatcttggggnnacatcgagccgattgggcgaagg
+>CEESC09F
+agtcgctgttcttttttattcagaaaaaaaaagcttcgaacaaaaatggc
+aaacaaaagctggatgggatagataacaacaacaacaaaaagaaactaac
+aacaagagtaaatatgagaataataaaaaaatatggaaaaagagaactgg
+ttgataaaaaacaagatttgaaaaataaaatcaacaatttaaatctaaag
+ggttctagttttagaatgtattctggcgttttccaacgtttacgtttcca
+ggaagttttttcagtccttttgtgaccagagcattcgggcgtcggctcac
+ttctttncgttcagctgcatgagctgcttgtttcgtctttgccagcttct
+gcacgaaggatccttatcacggtttcgaaatcgtga
+>CEESC10F
+gaagttggtgttacaactttattatgtgtattcaaaagcttgggggtgtt
+tcgaatgtcgagatttgaaagggggaaaagccgtaacaaaacggaacatt
+gaattgtatgggtagagacgggaagttttgatgagtgggctcagcacaga
+aatttgaataactnctgcatttggttgatgggaaaaggggagtgattgat
+gaatttatagaaaaaaatggaaagaaaaacgatggtttaactagtcaatg
+gtaataaataaatcatatgacaaatagtattatttatcattttcaagatc
+naggtgaggggtgtgtggcggtaacgtgtcggtntgaaggagtttaggtt
+g
+>CEESC11F
+ccctgcgaagggactttgcaactnattctttaccagttgttcaaccatct
+attgagacgcaattcaatatgcgaaaactatatgggagcactaaagtgcc
+tttntagccttttgttgaccacaccatcaccgcgaacacgcaggagggac
+gtattcatacacggtggactctctttcgacattttccaataatcttttgc
+aagaaggttgtgatgttcgattgtcgaagttcatcactgctgtgtcagct
+tggcgaaatgtgcgatgagagaattgaaggccccaaatgaatgaccattc
+gttgttctacatttgagtttaccaccatttcaacaatggcggatgtgagg
+tccgttttcgctcgtctttgcggattcccggagcttttgaaatccgtcct
+ccgtaggcccatgcaactgccgtccctctttaaatgacagtaaacttag

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/6.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/6.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/6.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,308 @@
+>CEESC12R
+gcacgagtccatctccatatgccaccacaacantggtcctgtcgaaccaa
+caaccagcttggctcaatgacaaaatgcttcgcgcgccanaatgccaaca
+aatcccgtgccaccagagccaccggcgcgatatgcagatcataccgctgg
+aagacgatctcgatcgagccgtgcatccgatgggagaggaactctgaatg
+gcggactccatcaccggactagcggaagtcaacggtcggatagtccacct
+cacacagatgtgagctatgttcagcttcactcatccgatggaactggtag
+tagtaaggaaagaantngggagcggagaacaccaccgaataaa
+>CEESC13F
+cttgcttgaaaaatttatataaatatttaagagaagaaaaataaataatc
+gcatctaatgacgtctgtccttgtatccctggtttccattgactggtgca
+ctttcctgtctttgaggacatggacaatattcggcatcagttcctggctc
+tccctcctctcctggtgctccagcagaaccgttctctccattatctccct
+tgtctccacgtggtccacgctctcctggtgctcctggaataccttgagct
+ccctcgtgccgaattcctgcagcccgggggatccactagttctagagcgg
+ccgccaccgcggtgggagctccagcttttgttncctttagtgagggttaa
+tttcgagcttggcgtaatcatggtcatagctgtttcctg
+>CEESC13R
+gcacgagggagctcaaggtattccaggagcaccaggagagcgtggaccac
+gtggagacaagggagataatggagagaacggttctgctggagcaccagga
+gaggagggagagccaggaactgatgccgaatattgtccatgtcctcaaag
+acaggaaagtgcaccagtcaatggaaaccagggatacaagaacagacgtc
+attagatgcgattatttatttttcttctcttaaatatttatataaatttt
+tcaagcaag
+>CEESC14F
+aaaaaatgcgaagntccaacagttccatgctatcgttatctggatatttc
+aaatggattattccttggaggccgtcctggaacttcgaagcaaatcgaga
+aggccttctctggatgtatttctgatttgtcagtggataaagaagatgtc
+gattttncaacgatcaaagaaatgcacaaagttggacaagttcatgaagg
+atgcaagcatcgtaaagatttttnctcaacttcggatggacaatgctcgg
+ctacctcgaagtgtgtcaatcgttggggaggcagaatttgtagctgtccg
+caatcggttcattcgactggtgaatgtgttggagcacttggaactcaaga
+tttacgtgggcattctctatttgaagaggaatcatttgtttttgtaccag
+ccaagccaagtatctgtaccgtttgaagtttcatttgaattccggacatc
+tcgagctgatatgcaagt
+>CEESC14R
+gcacgagtttttttttttcactgaaatcgatcttccatcaaccaaaacat
+tccgaatgcatccttcaaaacgtgatggatgtccagttccaggtgcaatt
+ccaaagtataagctctctagattcatatcagaaatggaagctttagcttc
+tgcactgtagattccatttatactagttgcaactgaatctgcttcaaact
+tgatgacaacattcatccaatgctttgatgttacttcaggagctggcaac
+tcaacttcagaatctccaatgttatatttcaaagttccattcatcaactt
+ccaagttgtaatgaacacttctttgg
+>CEESC15F
+cnctttgtaataaataatttattatgccncgaaaataattnccnccaaaa
+tcaatctttcagcgggtgggtgtaatcattgggaacngggaagtcactag
+gaaataaggaaatagngaaatacaataaataaaataataataataatagg
+cgactatgattagttagaaaacacagctctgggaattgtttggaagtgtt
+gagagaaattnttgattttttacaaatggggaatatgattgaccgttgga
+ataagtnaaaatattantaaaaatagcgctgantgaaaacttaataagtg
+acagtgaaaaggatttgaaaagntaattaanccaactacg
+>CEESC15R
+gcacgagggcggacaacctcaaggcgctacaccgggacaacccgatcaga
+actttgactacatgttcaagctcctgataatcggaaattcatcagttgga
+aaaacatcattcctcttccgttactgtgatgattcattcacttctgcctt
+cgtctctactgtcggaatcgatttcaaagtgaaaactgtgttccgtggag
+acaaacgagtcaaacttcaaatctgggataccgccggacaggagaggtac
+cgtaccatcaccaccgcctactatcgtggagcaatgggatttcattctga
+tgtatgacatcacttaatgaagggtcttttaatagtgttc
+>CEESC15RB
+ggcggacaacctcaaggcgctacaccgggacaacccgatcagaactttga
+ctacatgttcaagctcctgataatcggaaattcatcagttggaaaaacat
+cattcctcttccgttactgtgatgattcattcacttctgccttcgtctct
+actgtcggaatcgatttcaaagtgaaaactgtgttccgtggagacaaacg
+agtcaaacttcaaatctggggataccgccggacaggagaggtaccgtacc
+atcaccaccggcttactattcgtggagcaatggggattcatttctgatgt
+attgacatcactaattgaaggagtctttttaatagtgttccagggttggt
+gcactcaaatcaagncatactca
+>CEESC16F
+cccattttacaaatttatccagaggaatggattttcaattaaaatcttga
+aaaaaactaaaaagtagagaaaattggaaactttggtgggtttaaacgtt
+aaaagagattaaatttaaaaaaaaagggagatcgantcgaataatttggg
+tggatgggatcattgtacaatataaatagaaaaaaaggaagagttcaatt
+gggatagaaaaaaaaagtgaatttttttttttgataaggtagntagtgtg
+ggtggtggcggga
+>CEESC17F
+tttcctgaaaccgtcagtcttacttctcgacgaaccgaccaatcatttgg
+atttggaagcttgtgtgtggcttgaggaggaactcgctcagtataaaaga
+acctngttggtggtttctcactctcaagatttcatgaacggagtttgcac
+caacatcattcatttgttccaaaagcaattggnttactatggaggaaact
+acgaccagtttgtgaagacacgtcttgaattgctcgaaaatcaacaaaaa
+cgntacaactgggaacagtctcaactncaacacatgaaagattacgtcgc
+gaggttcggtcacggttctgccaaactcgctcgtcaagctcaatncaaag
+tgaaaa
+>CEESC17RB
+gaaaattcacacaaaacactacacatttagtgatgtgacaacaacaaagg
+agggtaattggaaaaaaagggtagaaacaggaaccggaccaacaattgga
+ggaaaaccgacaaaaattgggtcaaagagagtaaaagatgaatgaaaaca
+agagaaaatataatcaaaatcacaggaaaatgnaattgaaatatcctana
+ttgaanatggggggnaaggtgaataatgngagaaaaatctcgggaaatca
+gttcgattctaatattagaattggcagattttcgatgttttcggggggaa
+atagg
+>CEESC18F
+aagatcaatgatatggaatggtggaatcgattccttgattccgatcctcc
+aatcaatactaaggaagtgaagccagagaactcgaaattgagcgacttgg
+atggagagacacgtgccatggtcgaaaagatgatgcatgaactgttgcag
+catatcatgcttttccttctcatcagacgttgcggcacgagctcgtgcna
+aattcctgcagcccgggggatccactagttctagagcggccgccaccgcg
+gtggagctccagcttttgttccctttagtgagggttaatttcgagcttgg
+cgtaatcatgggtcatagctgtttcctgtgtgaaattgttatccgctcac
+aattccacacaacatacgagccggaagcataaagtgtaaagcctgggggt
+gcctaatgaagtgagctaactcacattaattgcggttgcg
+>CEESC18R
+ctcgtgccnaacgtctgatgagaagaaaaagcatgatatgctgcaacagt
+tcatgcatcatcttttcgaccatggcacgtgtctctccatccaagtcgct
+caatttcgagttctctggcttcacttccttagtattgattggaggatcgg
+aatcaaggaatcgattccaccattccatatcattgatcttctcgaggggg
+ggcccggtacccaattcgccctatagtgagtcgtattacaattcactggc
+cgtcgttttacaacgtcgtgactgggaaaaccctggggttacccaacttt
+aatcgccttgcagcacatccccctttcg
+>CEESC19F
+gctctcgactccatcattccaccacaacgcccaactgaccgaccactccg
+tctcccactccaggatgtgtacaagatcggaggaatcggaactgttccag
+tcggacgngttnagaccggaatcatcaagccaggaatggtcgttaccttc
+gntccacaaaacgtcaccactgaagtcaagtccgttgagatgcatcacga
+ntctnttccagaggccgtcccaggagacaacgttggattcaacgtcaaga
+acgtctccgtcaaggatattcgtcgtggatccgtctgctccgactccaag
+caagacccantcaaggaggcttcgnaccttccacgnccaggtcatcatca
+tgaaccatccagggcagatctccaacggantacactccantt
+>CEESC19R
+tggatccccngggctgcaggnaaaaatcaacaagataaactcaacaatga
+agatttacttcttctttggtgcagccttttgggcggacttggtgaccttt
+ccagaggatccatcagacttctcaacggacttgataactccgacagcgac
+ggtttgtctcatgtcacgaacggcgaaacgtccgagtngagcgtagtcgg
+tgaaggattcaacacaaagtggcttggttgggatgagctcgacgattcca
+gcatctncagacttgnggaactttgggaagtcctcaaccttgttaccggt
+acgacggtcaaccttctacttaagctcgttgaacttt
+>CEESC20F
+aacaacaaaattgattttaattgaaggagaggtagagaatgaaaacttgt
+gaattgaaaagaatagaaacaaaaaaattaaacagttatttagggcttat
+ggcgaggggtaacgaatgagaaaagccctttaactgtgagataaagtaaa
+aagaagaatgaaaaatagaaacaaaaaatatttaacagttatttagggct
+gaggctaaagaatgaaaatccattaactgtgagagaaattgaatggaaga
+atagaaaaaaaacaagttcaacagttattggttggcccaaaatcaacctc
+gatgagcgcttttcggatggcttctgtgcgcacttcgagaaccttctcgt
+tttcgacgttttcgagagcctctttctacg
+>CEESC21F
+gaagttgaggccaacgttccaaatgttatcaatgagcagctgagccggag
+tattccgaagacatgcagtttgatcagtgatctttgtctgattgcagccg
+acaagatcggctagtgctgttgatttttgttttgctcgtttcggagaatc
+cattgaccacttgttgtcgagggatcctgattggagaattccgttttgga
+ataatcccttcgacgccggtgcaattaagtgagctacaatcgaagctgct
+ccagccgattctccaaccaacgatatcctcgccggatttcctccaaacga
+gaaaatatgatcccttatccaataaagtgcgagttgttgatcaagcattc
+ccatatttccaggnacatcttcatgatctagg
+>CEESC21R
+tcgtnccgaattcggcagagntncntatgtgaatatttgggctccggcgg
+atgcttacaatcttactgtacttgtatggctgtttggtggtggcttctgg
+tatggntccccatnactgttactttacgncggaaaagaactagcaacacg
+tggaaatgtgatagtagtgaacatcaactatcgagttggaccatttggat
+acctgtttctagatcatgaagatgttcctggaaatatgggaatgcttgat
+caacaactcgcactttattggataagggatcatattt
+>CEESC22F
+aatcacaatgattttatagtcgaaaagaaataaaaaaacgcattatgctg
+agggcttcgacatacaaagtggaaagggttgggggaaatacatggaaatt
+nccttttttttttcggaaaaacaaatttttgttagtatttacaattacat
+tttgttaccagtcagacaagttttttgagggaaaaaaatccaataaaaat
+gagcatttttcagaaggacgtataatgtacacgaaggtggtngtgtnaaa
+aggagacaacaaaagggaaaaattgcgggttaaaaatggccgggaaaacc
+>CEESC23F
+tttttttgttagagtattttatatatttattattattacagcttacagaa
+ctttgatttgttttacagaaaaaaggtgcaaccgnttagacaaattcaat
+ggattatcattatttgaaactttttgcagttccttattttcaaaaaaatc
+ttggtttttggtttgatcagggtgagaaaggatttcgggggtcgaaagct
+agaaaattatcaattttttgtgattttcgattgtg
+>CEESC24F
+gtcttattaaaagctttattatgaatgtggctcaaataatgagcatgatt
+cagagaaaaaatggtttaaaatgtcaatttngtaatgagaaaatgggggt
+catcggcagtaatagggtacaacaacaaaagtgattgcattaaacctcaa
+cttcaaaccaaagttacacagngnacctagttatacatgcctagattact
+accggantagtattgancaaatacaagagaagttaccaatgaagatttgg
+gtgagantgggaagcataatgcagtcggctagagaagttgg
+>CEESC25F
+taaaccaatatgatttattattaaaattttaaaagaacaaaaacatgctt
+tagaattccaaaaatgattttaaacaagtgaatgaaagtatcacaaatac
+gaaaagagaacccgaagaagagaaaagaagaaattataaaaaaaatattt
+tagagctccgacttttgaaggntcgaataccgtttatcagatggcttaag
+ctctttgaacactgatggaggtggtgttgtgtcaattggacgagtagatg
+gagcttgagcttcatgatcatcagtgattccacgtgcagcttttgccttg
+gcgagctcgatcattcgttggatcaaggttctcgtggaagtccttgtgaa
+gctttccagagtgaagatccataacaaactctctaagtttacctgggata
+ttca
+>CEESC26F
+ctactcggcgaccagctccaccaagggaaacctcttcttctacctcaacg
+cgttgatcatctccatcgccccgctctacctnttctacggagttcaccag
+atggagatccaagactcgcttgtcgtgtggggactctntgccgtcggcac
+tgcctacctcctgtccctggcctgcaaaaaccagaagtgccttctcaagc
+atcaaatcgtgatgaagcgcggntcagctgtggaacgcgagatcagcgga
+caatatgctgctgacaagaaaatgactgttaaggagaaggaggagcgcgc
+gcttttccgcaaaaacgaggtcgncgacancgaatncacctacttgtcgg
+tcttctacacaaantcgctctacttgaccat
+>CEESC27F
+tcgaaagtttctccagaatttcgatcaattcacagtcgattagactattc
+actgaagactatgtcattctctcaggattacggagcaatcagtgaagaag
+gaccaatagaagtaggatcaggaattttgaaggtggagtcaattgaatat
+atttttgaatacgatgagaacatggntcaagtgaaaatcaaatgtttgtt
+ggccccggaacttgtgagattctcgaatgataagtcaataatcagaagat
+attttcattattacctggccgcttcacaacgaattgttcagcatgtcaaa
+ctcagggaaagtgacagtccatttcgaaaccttcaaagcctaaaanttcc
+atccatttcttgggactctaaacggagctagaggatatcctcaatttcat
+caattt
+>CEESC28F
+gttgaaaaacatttcattgaaagatcgatttttggtaaagcagatcagtc
+aaatttgaatgcagtgaaatgatgatctgtggggctggagagagcgattt
+agtggcaacaattgaaacgnggtaacagggtgaaactttggtttgtgtca
+aaaaattaattagttaaagcaaaaaaatgtggaaatgtcgggggaacaat
+aaacatgttaacangantaaaaaaccgtggatttatggaatggctcttct
+aacaatgttgttgcggaagaattcttgcattctgtgaaagtttccatcaa
+cattgccaactatggctcgaatattcgcctgcactgggataataattgat
+tcc
+>CEESC29F
+caagaagacaatttgttttgattggaaatggacgaatgatgaaaggaaat
+aaacatcttttaaaactctacaagtatgggttttcttgaatatttctgga
+actaatgaatacatatttncagacaccttcaatcggaaaaagtcatcttc
+ggaccgtcaattctcgaaaccttcaacgatcttgaggaagctgctctacc
+aagatcggataaattttgatttttggttgagcttaggtttttagatgata
+gtattcagtttctaacggatattcacttcatgtaactattattgatntca
+tattttnatgttt
+>CEESC30F
+agaaaatcaacaacaatttcatttgaatgaggaagagagtaacataacca
+acaatgatgaataaaaaacaaagaaatgaacaatttttggggaggggcgc
+ggggaaacgaacaataatggaaatagaagaaaagagcaaagcctacgtgc
+agaatagagtgaaagcgggaaatatttctcttctgcgtctctttctgttt
+gtgtgtgatttagaattccatactatccgtctttcggccttcgaccacaa
+ctgaagtgataacatgtccgtcttcggtttccttggattcgagctcacgg
+cgaatttcaggcgagatgagatagtgtccatggaatctgatcagctcata
+aatagctgccttctgctcggcattcaaatccgttcttgtatcgctggcac
+ag
+>CEESC32F
+gacggggaatggggagcaaaacaaagaacaatttgatatactataaacca
+ggggactgggaaattgaaagcagagaaagttgggatcacagattatttta
+tcagtttaatggtacttgcaaacagatggcactgtgcatccgtgcttctt
+gatgatttcttgagcagcataagatccacaggtgactgaagcttcgactc
+cctttcctttctctcgtgccgaattcctgcagcccgggggatccactagt
+tctagagcggccgccaccgcggtggagctccagcttttgttccctttagt
+gagggttaatttcgagcttggcgtaatcatggtcatagctgtttcctgtg
+tgaaattgttatccgctcacaattncacacaacatacgagccggaagcat
+aaagtgtaaagcctgggggtgcctaatgagtgag
+>CEESC33F
+aataaataaaattattttattaaagtattctcaaagtcaaaatggcaaat
+aaagcttganccaaaattttgttcactattattattacaacttccttgct
+aatttaatgtctccgccggttcttgaatagaactgatttggagcattata
+tttttnagttccattggaagatgttgagaagtaggcagtgacatctggaa
+tgacttgaggagtttgagaagcttttggcattgatacacccattcgacgg
+ccacggtattcacagcttttnccgaaatatggacgttcacagcggcaggc
+aaagttatagatagctggatttgttttcggtcgtggtaaa
+>CEESC33R
+tcggtctaccacaacaagactaccaataactactggtagtacacaaactg
+gtgaaccgtttcggactatggcaatcaactgaaatggggtgaagtttggt
+aggaaaacgaaaaaaagaagagaaagaagaagattgcactgccaacataa
+gatggtcattgtgtggtaccatcaactactgctagtacttctgatattta
+taggagctgccaaaggagctcagtacataggttcgggagcctcccaaccc
+aaccgaacggatgttgtgtggatggttccctcttggacatgtaaaaatga
+ntattcaattgatgtagaaaagtatgggattctgcaaaattgaggtncag
+ca
+>CEESC34F
+ccgtagttntttttttttttttttanttttatgattttattttaacgtga
+ataaacatcacaaaagtgagcttactcaaggggtggggtntggggcggct
+aaaccaaccactaacaagtaacaaaaagaagggtgacagtaagananaaa
+aacaggngatnggtatgcttagcaactnggggaacgtgctaagagcactt
+ggcaatgaacttattgcttctgagcggaaacgngaaccgatgcagcttcg
+tcgaccttcgagcggaacaattcactatcttgaagcatcatgatcaactc
+ggaattt
+>CEESC35R
+ggagacaagggacgtgagcgtgacaacatcaaggaggatcagaccctcta
+ctacaccgtccagctcgtcgatctgttccgcgctgtgccaggagagaagt
+ggaccaccgatgagggaatcgttattgagcagacacacaagattgatgag
+gataagtgcaagaagtcgaagagtggagacaccattcaccaacagtacgt
+gcttcatcttnaggacggaactttcgtcgattcgtctttttntcgcaacg
+ntccattcatcttcaagtttgaataattaatgaagtcattcaagggaatn
+ggacattncccattgactggaattgttgcga
+>CEESC36F
+aagattttaataaaactttattgaaatttgctcaatatcagangtaaata
+aatcagtatcaggataaatngtgaacagttatatttgcttctgtaaacag
+ttgggatttgaattcagatgtaaatttacataactnctcgttgctgaact
+tnatactccaaaatccatgtatccnctttatgactgangacaatanccgn
+gaagttgtttatatgaatggncagttg
+>CEESC37F
+agtttgaatgtttttatttttttactttaaaaaaaaatttaatttcaaaa
+ataaaaaaaaacagtttgtggcaaggaaaagggggaaaaattttntgagt
+gggcgtggggaacaacttgagttttttgaaagagtcattttggcgggaaa
+agcggaaattnttgcgaaatatctacccgttactcgcgtggctttttgta
+gnctaaaactttagtagaggaggagaaagaaaaanctggggaaaaaaatt
+tgggtcacagnaaaaaaatgcaattgattagcaancgcaagaaggtgggt
+agagcgtgtgaaa
+>CEESC39F
+agaattttagtgttttattgaattgttgaatacaaatcaataaaaaaata
+acatatgancggttattatgactttctttcatatatatatcccatatatg
+ggtttaccaaaatgtgcacgaaatgaatacaaataaattatttaatcagt
+gtccatcttcgcctcgaaacggcttcccaacccggtttcacttccagcga
+ctcctctattcacactatccagaacagagttccagtcgacgccgaatcga
+tcggttctttcttggattcgaagcacaacttggagagcatcagtggcgac
+agtcttcttctgttgagcacacagctcgtcggcctgctccaaaatgttgt
+caatcagcttcccagcggccactgg
+>CEESC40R
+cagagttacaaataaaagcgggcaacaatgtcagaaaaatcattgcaatc
+gaaaatnctttctactgtatttttncttgcaactctaattgcatttnctt
+ctgccgatggatatacctgtnccggaaatacgctgataaatccatttntn
+aatctttcggagccctactactatccagggncatggcgagaaaacatgga
+accagctgantatgctccagntcaaaagtgtaactggaagatca
+>CEESC41F
+aagaagaggatatatttatccaaactgcaacaacaaaaaacacaacaata
+taacttgaaaaataaaatacgctcataaaaaaacaattttaaaaattaaa
+aattattccttgtgctcggncaattgaaatcctgccttcgtcgagttcac
+tgatccntttgctttctggcgagggccccaattcttccctttattgcatt
+ggnaccgtacaaggntctctttcttggnctgttcgatggctccngggtga
+agagtgaagttgactgtgttggtgatgggttntcccagaatgtgatatcc
+ngctgattctttggccagacg

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/7.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/7.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dbfa/7.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1500 @@
+>Contig1
+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct
+aagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa
+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct
+aagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa
+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc
+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct
+aagcctaagcctaagcctaagcctaagcctaaaaaattgagataagaaaa
+cattttactttttcaaaattgttttcatgctaaattcaaaacgttttttt
+tttagtgaagcttctagatatttggcgggtacctctaattttgcctgcct
+gccaacctatatgctcctgtgtttaggcctaatactaagcctaagcctaa
+gcctaatactaagcctaagcctaagactaagcctaatactaagcctaagc
+ctaagactaagcctaagactaagcctaagactaagcctaatactaagcct
+aagcctaagactaagcctaagcctaatactaagcctaagcctaagactaa
+gcctaatactaagcctaagcctaagactaagcctaagactaagcctaaga
+ctaagcctaatactaagcctaagcctaagactaagcctaagcctaaaaga
+atatggtagctacagaaacggtagtacactcttctgaaaatacaaaaaat
+ttgcaatttttatagctagggcactttttgtctgcccaaatataggcaac
+caaaaataattgccaagtttttaatgatttgttgcatattgaaaaaaaca
+tttttcgggttttttgaaatgaatatcgtagctacagaaacggttgtgca
+ctcatctgaaagtttgtttttcttgttttcttgcactttgtgcagaattc
+ttgattcttgattcttgcagaaatttgcaagaaaattcgcaagaaatttg
+tattaaaaactgttcaaaatttttggaaattagtttaaaaatctcacatt
+ttttttagaaaaattatttttaagaatttttcattttaggaatattgtta
+tttcagaaaatagctaaatgtgatttctgtaattttgcctgccaaattcg
+tgaaatgcaataaaaatctaatatccctcatcagtgcgatttccgaatca
+gtatatttttacgtaatagcttctttgacatcaataagtatttgcctata
+tgactttagacttgaaattggctattaatgccaatttcatgatatctagc
+cactttagtataattgtttttagtttttggcaaaactattgtctaaacag
+atattcgtgttttcaagaaatttttcatggtttttcttggtcttttcttg
+gtatttttttgacaaaaatttttgtttcttgattcttgcaaaaatttttc
+cgtttgacggccttgatgtgcactaccttcgcttaaatactacattttct
+gaaaatgttataatagtgttcattgtttcatacaaatacttatttaatag
+tatttctggttatataatttgtataaaaagtggttgacataacaaggctg
+acgaaactttgtgatggctgaaaatattttcctagctttattgattttta
+tttatacgtgtttgaataacttggccaaatcgccgagaaggaatagaata
+ctggacgacattgtacatattttccaaaaaatcagaaagtagatgacggg
+accaattctttctgtcaggttttacaaccgcccagtgcgtctacgtcaca
+tgttgtataaatggttgtaaacaatatgcggaaacaatcaaatgcattcc
+cataaggcataatatagaggctacaggcaatgagtatcgctctttgcttt
+gtttaaagggggagtagagtttgtggggaaatatatgtttctgactctaa
+ttttgcccctgataccgaatatcgatgtgaaaaaatttaaaaaaatttcc
+ctgattttatattaatttttaaaatccgaaaatccattggatgcctatat
+gtgagtttttaaacgcaaaattttcccggcagagacgccccgcccacgaa
+accgtgccgcacgtgtgggtttacgagctgaatattttccttctattttt
+atttgattttataccgattttcgtcgatttttctcattttttctcttttt
+tttggtgttttttattgaaaattttgtgattttcgtaaatttattcctat
+ttattaataaaaacaaaaacaattccattaaatatcccattttcagcgca
+aaatcgactggagactaggaaaatcgtctggagatagaacggatcaacaa
+gattattattatatcattaataatatttatcaattttcttctgagagtct
+cattgagactcttatttacgccaagaaataaatttaacattaaaattgtt
+catttttgaaaaaaaaataattaaaaaaacacattttttggaaaaaaaaa
+taaataaaaaaaattgtcctcgaggatcctccggagcgcgtcgaatcaat
+gtttccggaactctgaaaattaaatgtttgtatgattgtagaaccctttc
+gctattgagatttgataacttttaagtaataaaattttcgcagtaagaca
+ttaaaacatttcacaattaagctggttctgaactgtgtgaagtatattga
+aaaaaactaactgatacaaaaatataattttatgatagttttctggatgt
+cccaatataaacgatgtcaattctgcgacatgctacagtcatccacgaaa
+gtaacccgaataccgacaaaagaagaggaacgccaactttggatagacgc
+tctaggggctgattttggtcggaaaatagtcgggaaaaaatagaggacat
+tacagatgaggatgaggatgaagatagaaatttgccgacaacttcgtcat
+gccgctgatttttttgatgttctacgcttaaattttcagcgaacgaacta
+ttttttatattttgattgtttttaaataatatttgccataagaaattctc
+acttttccaggaaacgtcgtttcgccgcgattttcctcgtctccagtcga
+ttttgcgctgaaaatgggatatttaatggaattgtttttgtttttattaa
+taaataggaataaatttacgaaaatcacaaaattttcaataaaaaacacc
+aaaaaaaaagagaaaaaatgagaaaaatcgacgaaaatcggtataaaatc
+aaataaaaatagaaggaaaatattcagctcgtaaacccgcaagtgcggca
+cggtttcgtgggcggggcgtctctggcgggaaaattttgcgtttgaaaac
+tcacatataggcatccaatggattttcggattttcaaaattaatataaaa
+tcagggaaatttttttaaattttgtcacatcgatattcggtatcaggggc
+aaaattagagtcagaaacatatatttccccacaaactctactcccccttt
+aacaaccacccgaggatatattcgacaaacgatctatctactaggaataa
+ctcgattattgacatattatagacttcttttagtatttgtaaaatagagg
+atcagacccaaaattcagcccgcgaaggcatgacgtcagcgcgaggcagt
+agtttccagaagaactctgtcgtctaccttaatgcctcaaatgcgaaccc
+gcttcggccatccttctcgctcagagaatggattagagttctcatcaact
+cctctgtctaattttcaactgcggcggttggcgaccggtattaccgcggc
+gaccgacacctcccgggttccgtcgatcgctgtctgttgtgtgcgccgcg
+actccgcccaccggtggtaactttttgtgggggaatctttgtttttggtc
+atttttcagcgcttttcagcgattattgaccaattttgaataaaattttc
+aacagaatatcatctaaaatattgcttaacatttatttaacagaaataac
+gtgagcacgcatgtaaaacatgaaattttcgggaaaattgcaattaaacg
+aataaaaatcgatatttaaatcaattattggtgaatccggtgtgttgagg
+cttcaatgcatacatttttactggataaatctcctttgggaatccggttt
+gcagtgctttcgagaccatgtccagttgagaatcggcgaacgctttaaga
+agctcgggctgaataatgaattgttttaaaaaatgtttagtaaaaaattg
+ttttcgtgcaaattgtcttcgatattatccaaacgtgacgttttgcgatt
+ttcgcgctaaaattacagtaagtggggtctcgacacgacaatttttgtga
+aatacaaacgggcgtgtgtctttaagaagtactgtagtttaaaaacttca
+tttctgtggaattttcatatatttttcatagtttttctctttaaataaat
+cacttttcaacaaaaaactatgagacaatagtttgaaattacagtattct
+ttaaaggtgcacgcctgctcgaatttcgcaaaaacgtgtcgtgtcgagac
+cccaattacagtatttttgacccgaatatcgcgaaatttcgagtctgggt
+gaaaacattgaaatttttggcaaaataaaagaaatatgtcctttttcaga
+atatattttctaaatttcgagacgaaacaacaattttaaatgaattttaa
+ttttaaatattaaatatttcggaatttggcgttttttatgcatgtcgatt
+cactaacgattttgtactacacgtgggcaagtttatacagtttttggcta
+aaatttgtgaatttgaaccgtttttcggcgaatatttgaaaaattggcaa
+aactggttcaaaaacaaaaattttttaaactgtacaaactgtccaaaaat
+tcgtcgtaaatcgacacacccttctcattttttcaaaattttaattgttt
+tcgaatgttttttttgcagaataatttgtaaaatgagccttttgtgaatt
+ttttttaatttcaaagtttttattattttttctcaaaccagcacctctgt
+tctcgtccaactatgatcatcatcgtcgaataaccgtttctcgtgatttg
+tcacattatccttgagcacaatacatccaccaggtttcagtcctttctga
+aaatgaaaattaattttaaaaaaattgaattattttaaatgaaacagttt
+tcagagatttctcaacttttgagtccaccaccaggcctgcacgtttttcg
+ggttttatcttttaaaaaactgaaaaatcgaaaaatttcaatttctgttt
+tgtggtcaaaattgtaattacaggtaagcaaatagtttaattttaaaatt
+gaaaattagggaaatgaccggacataagtttaaaaacccgattttttcaa
+taaaaaggaaaattgaaaatttaataaaacaggttgtaaatcaaggagat
+cgtattgattgaaaaaaaatccgaatgttccggatttttcagtggttttt
+tttgaaagaaaatcgaaaaagtaaatgtttttaatttttaaatttaaatt
+tttaatcggaaaaaatgtacgaaattgactttttaatgtgaaaaattgtt
+gttttaaaaaaaaattttaaccgatacagattttctagactcagtttttt
+cggttgaatattgttttttactattttttcattacagaaagaatccaatt
+ttatttcgcttaaaaaataaccggagcatcgaaaatatttttttttctgt
+tttactcaaagcatttcaattacctagaattttgtttaaaaactacatgc
+tttatttatgaacgtaataaataagaccccctcttatttataaactttca
+acatattttcagttttcagtgctatctagtgcttaccgcacatcttttaa
+agaaatcaaccaaatcctcatcaaccaaatgccctgaaacccattgaatc
+catatcaaatcataacgtcgttcgggcggtgcaaacgtctgcagtccttc
+gacgaatttatctccaattcgtggatgttttccaatatattgatcacttt
+tcgtgatcaactcctcgacgacgtcttccatatcaactttcgagaagaat
+ggcattaagagatgctttgtaacacgtccgatacccgctccgcagtccag
+tgcatagtcaaagtagccgaatagattctggaaaatatttataaaattca
+aagttggcccaggggtgaccggcaatttcaagcaaatcggcaaattgtca
+attttctgaatttgccgaaaatttgacaaaaacgacaatttgccggttcg
+ccgaatttaccttttttaaatttaattttcaattcaggcaaactgacgat
+tttccgtttgccggatatcaatttgcaggaatttctcaaaggaattttta
+ttaagacggaaacacagtgcttttttgaattttttttcccgttttcttca
+gatatttttatagaatttactgacttttcagaatagatgtaggacaattt
+tgttgttttaaaaattgaaattctgaaatttccaacaaaaaaacatgtgc
+aaacccacaagttggcaaaaatattttgcatttgccgtttttcccgtttg
+ccgaaaagtctaatttcggtaattgggccatttttcgaaattttgagcca
+cataaaaaactttgaaccatttttgagaagtattattacgacattcgttt
+atttgagcacaatttgggcctatactttcaaaatcggggtttgaaaaccc
+ctatatgttcgaccgaatgttaatctcataaaaatttgatgaaaataaaa
+ttttctacggctcataaacgtatagcccccgtcagtctcaaaatttatac
+gatagacactttttggcgtttatcgcctatattccgtcaaaaaccattat
+tcatcattctttcaatgttgttttttttaaggctaaaaaactttcatgca
+aatttgttagccgtgtcgtggtttatacgaaaatttcagaatttataaaa
+taaaggaaaacgaaaatgtttctatataccctatttatgttctctgattc
+cgaataccaatgtgaaaaattcaaaaaaaattccctgattttatataaat
+ttttgtaagcgacaaaaattgtcgtttgaatttcacacttggttacaaaa
+atttatgaaaatgaggaaaatttgttttaattttttcacattgatattcg
+gaatcaggaaaataaatagggtctatagaaaaattccgaaccttcactcc
+ttctctgagtataataaatttaaaataaatacagaaaatttcagttcaga
+cctcattaaatttgggtatatttctaggatccgagtttttacaccagatt
+tacaaacttttagcctttcaccgcctttttatgcgcatttcccatcagtc
+aactccaaaaaaatcgcaacttttgcctcatatttcaagaatattcccct
+ttctctccccattgaaagtcattttcgaaacaagcggaagattcgtcata
+tgtggtaatgtgtggcgtgcgttggcaaacaacaagaaagaatcattctc
+tgaaaacaaaaagcgttttgggtgccaaagtaatattgaaaatctgccgt
+gttttctcattttccatcaaaagaaaatgagaaaaaagtttcggcgtttt
+atttgatttccgggaaagaagactcggaaaaagatttaattgaatttttc
+atagcaaacctatattgcaacaactttctaaaaggtcagaaattgccgcg
+tagcctagaaaattggaaaactcttccagctggtattatttcagacatgg
+tgcatcgaaaattcgaaaattacagaaattaacattttggagcatctgcc
+agaaaattgagattacagtaccccacttctgccgagaaattcgaggtgga
+agaggtcttacaaaattttcggtcacgtgaaaatgggaagcgttcaggct
+ccacacgacggaattcacctagttttcaggtgagaagatatcgtacgagg
+agaattgacctccaaatcctgatcgtgactacaggtcgtcgttcggagct
+gtggaagagttttgaaaatcttcgaccatgagagaaatagacaggacgac
+caaacattttcagtggaagagcttttcctaggccatcaggatgctatttc
+gacagagctgagttatcctcaagtcgttacgaggtgtggaaaagttttcc
+aaaatccccgaccaggtagaaaatgagcacaccgattaagtttctccagt
+ggtagagttttttctaggccatcattatgctatctagaaaaaagcttcgg
+ccatggggtttttaggccgtctatttatttctcataactttctcagaaat
+tcgtctatttctcagaaccccccaatgataagttttttgcaaaaaaagtt
+ctgctttgctcatcagccgtaatcaggtgacctcattaggcctacccaaa
+cacagatttgtcattatttttcagacaaaaaacacgaaaaaaatcttcac
+gcatggggtgataacctgattttaaatcttactgtgccggctggcgcggc
+gagcttcgatcactgagccgaaagattatgaaaactatgggaatgacggc
+gtagcctagaaatcgtccaggcagagattctgtctaattttcgagcatat
+atctcccagttttgttattaatttaagtaaactcaaacctagaaacaagt
+aaaacgggagggggggggggaatatcagaaaattaaatcttgcgacactt
+ttccattgatactttcaaggtaatgcccagaggtgtgcggcaaattttga
+aacttgcgcatgccgcctttttttttttctagaaaacagtcagaattttt
+tgtcgaatttgttgaaaattcgctaatatactgtgagtttagaaaaaata
+acgaaaaaactcggaaaaggaggaagagatctgaaatatgtagatttttt
+tagaaaagaccagaaattactgaaaaattggcatttttcgtcgaaacccc
+aatatactaaattattcggatttttagaaaattttcaaattcaccataca
+gtgcattttttcctacttctacgactttaaaggggggagcatttatgcgg
+aagggtcttgccgcgcatttagtcatcatttttagcagtttctgtgtaaa
+attcgcgtagatcacatgaagatcacaaaatatttatcccatatttcgta
+tttctgttgctttttcacaaattaattgtgatctacgcgtgatctccgcg
+aattttgagcagactttgttaaaaatgatgactatgtgcacggcaagacc
+cttccgcataaatgcgcccccctttaaagtcgtagaagtggaaaaaaaat
+gcactgtagcaaaaaatcgaacatttctgttcgatttttgaatttctcga
+aattttttaaaataatttttaaaataacatttttattttatttcgaaaac
+taccgattttagaaaaattctaaaatttcgattttttttgttgatttttc
+gatttttaaaataaaatttcataattttttaaaccgatctttcttgcttt
+tcctgaaaaatcgatgatttctatacctttttcttcagtccttcaataaa
+tcgtttcgacgccgatatgtcgggcgcgtgaagcgcttcgaatccgccga
+gcattccgttgacgtcctggctcgcgcggctccagtattcctccgcctga
+aagagaatagttgaaaacattgttttgagacttaaaaattttttttttag
+tttttttcaaaaattcttacatgttatagagtttttttttcaaattttca
+gcttttttcagaaaaacttagtattttcgataattttaaataaaaaagtt
+ttttttcaaaaaatgtttcggttttttttttaatttttggtctaaaattc
+tccgcaaaagatttgcgtgctggccgaactttttgattttgtaccttttc
+ataaacatcttcaccattgtgaattctagaagatgatgaagagctcattt
+ttgatgttgtgacagctgctccgagcaatctggagacttttgtgacgaaa
+agacgagaggtcacggatatgatgatgatactggaaatgagatatttata
+tttactagttcatcgggaaaattattacgagaaagataaacagacatgtg
+cgtttttttaatggaagagaaacacaagaaaaatctggaaaactaggcca
+cggctatcagtgtcgatttacggcatacggtctcgacacgactatttttg
+ttaaatgtgaaggtatgcacctttaaagagtactgtagtttgtaactctc
+attgctgcaacatatttgacgctcagcgaaaactacagcaattcttcaaa
+agactactgtagcctttgtgttgacttacgggctcgattctcgaaacgaa
+tttctgctcgaattgtgacagccatattcaatttggtatagtcttttcgt
+attttttgccatttttctgttttcttctaatatttaatctattattaaat
+tatgtccgtaactccctccaaaattagaactgcgaccgaacagagattcg
+ttccgccccatattccggccaatcagatcgagtaggcggagttcgaagtc
+gctgattggtttgaaaagtcgcggaaatttgcaagttttaaggtagcgaa
+aactgatgactattgtagcgcgcttgtgtcgatttacggaatctcgattt
+tcaggaatgaatttttaattacattttttcgctcaattaatattctaaat
+aaataaataaatgatttgaattaatttaatttcattcgagcccgtagatc
+gacacatgtgctacagtaatcattagttttcgctacgagatattttgcgc
+gtaaaatattttcccgtaataactctactccgacaaacattacgacctcc
+atggaggcctccaggtataggtgagactcttgtatttccaattcagagac
+aatgcgtcactggaagagaaaacgaagcggaaaaaaaaacacggaaaccc
+aaaaatagtgtttgccccgctctattcttctccaataatttctgtgtcta
+attttgaaagactccacctgtgtatgccttctcgacataaaccccccccc
+ccccccctatcttacatggtactgataacactttcagtctttcacacttt
+tggcgcgcaacgccgctcttttttcgcggcgagctgatgacgtcatcaat
+ttttcatcgcttttgattatcttcaatgttctagaagggcacataggtca
+tccttattttttccttctctttctcgtgacggcccttgttgcgcatgccc
+gccccctagagcagggcgtggcctgaacggcggctccgagagctactcat
+tcttgccgcgtcaccctccagcgccacccaaacttcttcggttctagaga
+tcgagaagaacgtatgattttttaaaattataattgtttctttcgaaaaa
+aaaaatttcatttacagtaagccaaacatacacaatcaacatgaaactcg
+taattctgctatcttttgttgcgacagttgcggtttttggtgagtttatg
+ctttagataatacttttccgccaaaaatacagttgccggtctcggtatgg
+caatatttttgttaaattcgaaaagcagtgagtaatgtagtttcgaattt
+tcgtttctgcttaattttcatcaattcatcgtttttctcacgacttcttc
+tttatgaaaaatcaatgaaaattctgactaggtcagcttaggggtgaggt
+acctagagacgccacatatgccaaacggaagctgagatcattggctacaa
+gaatatgctttcaaattctgcaacggacctctgggagtctggaaattctt
+gtctgaaattatgcttttgaatgctcgaaagtggtaagaatttagaattt
+attacagaaaaacgtttaattaataaaattagttttatacttgaaacaag
+tactgtatgcactgtatcaaaacacattttcatcttttctaggtattcaa
+cttcacgtttttctgtaataaattctaaattcttaccactttcgagcatt
+caaaagcataatttcagacaagaatttccagactcccagaggtccgttgc
+agaatttgaaagcatattcttgtagccaatgatctcagcttccgtttggc
+atatgtggcgtctctaggtacctcacccctaagctgaccattccctagtg
+agcaaacaaaattttgaaattacagtactatttaaaggcacattgatttt
+ttgggtcaagcaaaaatttgtcgtgtcgagaccggctacggtattttcgc
+gaaaaatcgcaaaatcttgcggctgggatatacttgtgcgaaatactttt
+tgcattaattttgagcaaaattattttttttagactttttgaaatccaaa
+ttttttggattgcgaaaaaaacctgtgtccggttgtttcattaggccaac
+aaagttcctggaacactgatgaaaaccatgatagaggcggagcataatat
+cgatttttcgtactttcctgtatttcttcttctatatggccgagtagaac
+aggattaggggtaaagtcaaaatttttctcatatggatatcatatggata
+tcaaaatttttctcatatggatatggagaaaatttttctcatatggactt
+tgaaagttgaatcacttgacatctgggaaattagtattccaggcgtaagt
+cggatctgttagaaacggaatacttataggcttcgtgaattaggtagact
+ttcaattaatctgatccatgggagtcagacgcggtttccaggcctgacgc
+ctgcctccaacttgcccgcctcacgccggtctctcgcctcatttctgcac
+tgtgacgagacagacgaaggtcgccttctggcgcccgcatggaaatccta
+cgaatatgtcagcttctgatgggactccgtaaatcgacacacaggggtac
+ctcagacatttccctcccccttacaaattgttaggacaaggagggggaat
+tcatctccactcgagacacacatatgttgtcgtcagtgaagtgtaaagat
+ctaaacgattgcgtgtatgaaaaagcactctatgatcacctttttcatct
+tcctacaccctttttaggtgtggtgcccatcgagcactcacgccaggcag
+ggagagcaccggtccctgactaatgggattcgaatgttttagaccggaaa
+taggagcgatgaaagagcatagaaatgatcatttggaaatcacgtttaat
+taggttacggcgaaaatttgcaaaaaagagcaggaaacttggctcaaatc
+cttcgaaatataacaactaggacttccatgtaggcgttaaagcgccctgt
+ctctcaccccaatccgtaccttaagctgaaacaaacgtgaacttttttca
+tttcttaaaggagtatcgtcaatgggaaaattgttttaaaatgtagtatt
+tgtacttcaacttccaattattgcaaaagaaaaacggaaaaaatccgtta
+acattcagcattttaagtcgaagaaatctttaaaatttaactagagaaat
+cctaggccacgacgctcattcgaattttaatttgttttgatattgtattt
+tgaaaaaaaaacttaatacaattccttcttcccagttttctataactttt
+tgagaaaaaaacgaattaaattccgaaaaaactacatttaaatcaatatt
+ttgtttacgaatatggcctagaaatcgcgtggtggcctaggattcatttg
+cgcgcgaaattcaaattccgtcactttcgtcgatttcaacggctaaatgc
+tgaatgtcaacggatttttcccgtttttcttttgcaataattagaagttt
+gagtacaaatactacattttaaaacaattttatttttggtattttgacga
+aaaattgatttattggtttttttggttgtttgggaccaaaaaatccaaaa
+aaaatgtttggcgtgtctagtttcgactcgagactattctgtattaaaaa
+tacattaaaacatgtattttaacacagttgtgacgtcataaatgtatttt
+gatacattttgcaacattacttaaataaccccattaaaaattaacctaag
+catcaaaaattttttggtttttttggtttttcgaaaatttcaattttttt
+tgttttttggttttttttggtttttcaaaaacttcaattttttgtttttt
+ggtccaacatttttttttggtctcagctctgctgcctaccctagaagaac
+taatagcgcttcaaaaactgatgaaaacgttcaaatttgtcgaaatatta
+cgaaaatttgaaaagttggctcaaatctagattgaatcggccgattttcc
+acaagtttccaagtttccacaagtcgccacatatcccgagaaaaatcgat
+tcaaattgtttgaaaattggaatactgcgaattttgaaccaaatttccct
+ggcttctctgttgaaatacttgaaaataccgcgaagcaaacaaaaaatct
+aattattacgtgaacacaaaattctgaaaatgcgtatatattgcgcaaca
+tatttgacgcgcaaaatatctcgtagcgaaaactacattaattctttaaa
+tgacacgctgtatgtggtgatttacgggctcaaaaaattattttcgaaaa
+tcaagcccgtaaatccacacgtagtaattatataaagaattactgtagtt
+ttcgctacgagatattttgcgcgccaagtatgttgcgcaatacgcaaccc
+catatgttgatatatactgatgtgaggataaaaaacaacacaactttcag
+cggctccatcggctccggcaggtctcgaggagaagctgcgtgctcttcag
+gagcaactgtacagtctggagaaagagaacggagttgatgtgaagcaaaa
+ggagcaaccagcagcagccgacacattccttggatttgttccacagaaga
+gaatggtcgcgtggcagccgatgaagcggtcgatgatcaatgaggattct
+agagctccatgtaagttagtggtggtggccggaaaagagaaaactcggcc
+aagctgctcggagtttttgaatttttgataatccgaaataaaaattgatt
+gctcgaaaaggaacaatcttttggaaaaaaacgaattttgtcattttttt
+cagcaaaaattgattttcgaatttttccaataaaaaatcgataatttctc
+cccgtgcagtggaaaacaaacaatatttttttgttgatcgttctcttcca
+aacccggaataggtacacacattcctgcgtcatcccattctcttatcaca
+cttttttttcgaaaataaaagtgtagagacggaaaagtgagaaaggagtc
+aattttatgcgaaattttgcatgataatacactcaaattaaaaaaactgc
+gtggcgtgcactgcagaaaacctcatatttaggccccgcctttttctcgt
+ccactcacggagaaaaggcaaaaatttggggaccaaccaatatcaggccg
+ccgacatcctacgggttccgcgcgccgctatgtttaactcgctgtgggtg
+tggcgagctgtctccgcccgctgcgagttaaacatagcggcgcgcggaac
+ccgtaggaagtcggcggcctgatattgttggtccccaaattttttccttt
+tctccgtgagtggacgagaaaaaggcggggcctaattatgaggttttctg
+cagtacacgccacgcagtttttttattttgagtgtataggtctcgattct
+cgaaagtatgacagttatttaaatgatgaactcgtgatgactgttaaatt
+tttggaaatttcgggggaattatatcgatttttcgataaatttacaggaa
+aaaagtccaaaatctaggtattccatggtaggcaggcgcgatttcttgac
+gcctgcctggaatctgtccgcctcacaccaaaaaatgtcaatcattttgc
+tgaaaaccaaattaagaaatgaaaaagtgcacttagagatgatgacggag
+gtcgccttaaggtcagacaggttaaaaaaccgattttagttgagttttcc
+cgaaattttctgaacaaccgaattagaaatatgctgcttgtcatttttga
+gtaaaaattaacgaaaacttcgaccaaaaccacgaaaaaaatgaagaaaa
+taaagatttttcgagaaaataacaacaaaatccagcaaatagtgaaaaat
+agttttatccgagaaaaagtagtttagacgctatgaactctcgaaaatca
+gattttttcaatctaaaagccataaaattatcgattttttaaaaattctc
+actgaaaaccggcgaatttcagtgctccacgcaatcgaagcccgcttggc
+cgaagtgttgagagccggagaacgcctcggagtcaacccggaggaagttt
+tggcggatcttcgtgctcgtaatcaattccaataaatattctttgcccta
+aatactttaaattatccatctgacaactaaaatttcggttcttcttggct
+tcttctatttgtgaaatggtttattttcccccgaactctcaaaaggttta
+aatattgttcgattacccctttttatcaattattttcttcaatttcttat
+ttatcattatttttctaaacgaagacggatgtgattttaaattatgttaa
+tggactattttacaaactgaataaattcagcatgttggcaggttttttca
+gtagtttttgagtgaaaatagaggtaaaaagacagaaaatcaataaaaaa
+tgaaaacaaaactatgaaaaatggttgaaaatcgagcaaaaatcgttcaa
+aaaaaaataaattcaaaaaataattgcgtcgagaaacgcgtcagtagccg
+ctctctgcgtctctcacccttcagcacgcggagagagccacgagaaatgc
+gcaaaggctaaattcggcgcggaaaatcatttttcaaaataaattcgacg
+agaaaatcaatacttaagtaattatcgattttcagctcgttcaaaaaatt
+ttcagaaacgttttagtcgtttaaaggtttttttaaaattaaaatcgtcg
+gaagtaaaaaaatagcgcggatggaaatctacggagtgcggagcgaacaa
+acgcgcggtaattcaaatgggtagaatagtcaaaattgaaaattagccag
+catcgaccgatttttttaaaacttaatggattttttcgtttttcttttgt
+ggtatttcggcatttaggattagatagcacattttaaagtaaaattccca
+tccaagctactccaccttctccagactgtacagttaaaccaatttgaaaa
+gtgtattgtatcccgtttttttttctgaacaattttgaaaatttttcgtt
+tatccaggatacgataatcatgattcaaattcgttaacaaaaaatgaata
+tatgagagcgattaaagcatttgtgtcggaaaatatgggttaaatgggga
+gaagggggcggacatttggatggggtacaaaaaaatatgcaaaaaatggg
+ctaaaaacaatattttcaaattatgcccgacaaaggttcaaaagtcaata
+tatagaaatgagaacatgagtattatgccacgtggcgggaaaaatatgtg
+gaatgtaatacgatgagatccttgtgaatacaaagcttgtgacgacgtgg
+ccgagaagaactttttaagccaacgagaaaaaaggggttcaaggccgaaa
+ttttttttgggccacctattaagttaaattgaaaatttaaaaaaaacaca
+gcggatccaattatttgccgagttttgacttgagctcggcgcgatacgtg
+tcgattgactgaaaatattgtttttttttatttccgaataaaaaatggtg
+agtacctccaaaattagcttttcattgtccatatagaactttttgatttg
+ttccacagtttttgtggccatcaactcggcgatcaactcgaaattgtcct
+tgtaccagtggaaacctgaaggaatttcggatgtttttgcttaatcataa
+tcataataatcttaatcataagacttggaaaatgcgaaatttttcgagaa
+tattcaatttatcttcagattttattgcaacaaatcgattttcaacataa
+aattaatttttccaactttttttcccaatttatgagagtttaaagattgt
+tttaaagcaaaccgccaactttacataaaaaattaaaatattgtgaaaaa
+aatgatgaaatttagcagattttctgataaaaaattgaatttttttggat
+tcgcgcttcaatttcacattgttcttttagaaaagtcgaaattttatatt
+tccaattttcagatttaaaaaaatttaaaaaggaatgaacttttccaaag
+aaaaactgaatataaccagaaattgtgatttttcagcatttttttttagg
+tttgaatttttttttcatgattaatcacgtgaaaagtcaattttaccgca
+aaacatttaaaaaatcaagatttttcaattttctctgaattcctgcagat
+ttttcgatgaaaaattgaattttccttggaatttatatttttcgggtatt
+taaagtttcggatattaaaaaaaattttcaattttctctgaagttatcga
+taaaaattattttctgcaaaaaatctactttttttcgttgaatattccgg
+aaaaaaaatcagaatttcaaggcacatttccttttctaatctaattcgaa
+taattcaatattcttttaaaaattcggggtagaaaaggaattgtaccaat
+ttttatttttaaaagttaatttttctaattttcaaaattttcttgaattt
+tcgaattacagattttcaaaaaaattttttttgtttttttttctcgaaaa
+tttgaaatccatacatctaatagcattcttcttttcctcaggactccaac
+cataatttatcctgacttttccagatcgattgccatttgttgcagtagta
+tctagttcaggagtaaatctctcgaatcttcccttcaacgccatcatatc
+tttcttccaatttgcaatttctccttttggtacacggctgtatgtcattg
+ttgcacggaacatttgttgacgggcttcttcattcagaattctggaaaaa
+ttgatgttgtgcgattttttttggttaaaaaaaacaattttcgtaagttt
+aattaactaatattttaaaaaatctctcattttctgaggcaccacggatt
+caagatctggtgggattccggatctggcaccgtgccaacgcattaaatgc
+aatttttctgaaaaaagggcaacgaagatccgatttaaaaaaatttttca
+attatttttcaaaattttcactaactataagaaattagagatttttcaca
+aaaattccagttttctgttagaatttgaaaaaaaaattgaatttttccta
+aaaaatttgtaattttccgatatttcaagctgtcaaaacctaaaatctga
+aaactgaatttttaaaggaaaaattttgagcattcttatcaaaaaattgt
+ttcaactttttctcaaaatgtttcaacctttttctttctaaattctgaaa
+agcatatctcagcttttgctaaactatttttttcctcaatttttgagaaa
+attaaaatataatatataatatagtaaatattgcttattttctaataatt
+tttggtatttctattctttcgttttttttttcaaaaattccaaatagttt
+taaatgttcatattattttttttgacgaaaataaattttaattttaaacc
+ggaaaattgtttcgtaactttttttttcaaaaaatttgaattttcgacat
+gaaagatgtaaagtgtaatttaaaaataatagtgcaggtattttcagttt
+acagcaaaagtcagtttaaaaaatttcgactggttttcaaaatgagtttc
+cttattttttacacgtagaactttttttattttccgattttttttgttgc
+gcagaaattttttttccgcaaaatcaggaaaaattcagaaaaagacagtc
+aaaaaattgtagatacaattttttgactgtctttttctgaatttttcctg
+attttgcggaaaaaaaaatttattttttcatgaataaaaatcgaataccc
+atccaattccacaaacttactcgttctcctccatacatttcgtttgttta
+actctccaaacaagtggaacacacatatgatgttttctcttgatattatc
+aattaatgccagtgcagccggtgtatcgaagcaccgtgtcattctgcacg
+tattctcatcgattggatcagcttcaatcgattgctccacaatgtagggg
+cctgatggtttacggagaaggcagtcgtctggagaaaaatagaatagaat
+aatgatttttaggttattttacgtttaaaaatctaatttttaagacgcgt
+aaacgttgagctcatttataaaaattcggcaaaccggcaatttgccgaaa
+aatttcggaaaattgtcggtttgcacattttttcttgaaatttcagaact
+tcgatttcaaacggcaaaattgtatacatcctatcaaaacatcaatcttg
+aaaagccagtaaactctatgaaaatgtctaaagaaaagaaaacggtaaaa
+aaatacagttttaaatgtttccgtcttattaataacaaaattcgacaatt
+tgccggaattgaaatttttttttctccaatttccgaaaaaaacccaccga
+ccaccataatatcatcgtcttcttctttttcttttccaattccaagccgt
+ttgatcgcttttccgttggctggctccatgagctcaagatatccgtatac
+ataaattttcatgtctgaaagaaaattcaaatttcttctggaatcagtta
+ttcgaaactaacattctggacataaaactcgttgccgtcgttttgtcagt
+gcacggaggcttgccggacgtggaacacgcatcaaacggaaataaaggat
+acacggtttacattcgtgacgcgacattacacgatttagcttaaaattgt
+gaaattaattttttttaatagctctttatttttttgaaaatttctcccat
+gctttttccattttttcaacgagtttccttattttttgtccatttactgt
+aagttttttttgagaatttttttttgttaatttaacattttattagctca
+aaacatttattagcaaaaattttattagcaaaaaaattttttaatttttt
+taaattagctcaaaattctcgaaattttaaatttttagggtaaacaatat
+aaaacttagggagttttgagctataaaatgataaattgattttaaaaagg
+atgaaaaacttattttaaaaaaccgacaaaaatcgacaaaaatgaaggga
+acaggcagcagcttagccccatgcttagccagcagccccgtagcaaccca
+gtatcaataatatcccgtgccaattttcataaaactgaatataaattggg
+ttgatgttgctaaagggctgcgaaaaactgacctgggatgaagctgggct
+gcaaggggctgcgaagtgctgcgagggcaaagcgctacagtgctaaaagg
+gggctgagcccagaccctcaggaaaaaactcatactcgcagcccttcgca
+gcccacatttgcgctctgatcgcgtgctatccgcgcgcacagaatttcga
+aagtattttccaaattcggaatgcgcgcggagcagacgcaattagagcgc
+ggatctggcacgtaaggaagaagtgtgactggagcacgaaccagtaatct
+agtcgcgccccgtccgcgctccaggaggagcgatttgccgagcagttcag
+cccttcgcagccctttagcaacaaccaaatttatacagttttatgaaaat
+tggaacgggatattattgatacgcctaagcagccctattaaatagtgatg
+agggcgtaaatgaaattcgccatttccagctaaaatataaattttttgaa
+ttttttaacattgatattcggaatggattcagcagaaaatttgaagtcat
+ttgaaaatattttccagatttcggtactccacttttaaaattgaataaaa
+ctgtagtctttattcaatgtttcttcaaaatttaaaaagtagaatataac
+tgtgagaaaatttccaaaattgtcaaaatttcaaatagctgaaatatttc
+acggcccggcggggggtacatggatgagaattctctaccgtattccaatt
+tggctgactgcgtgctcaacgttgaatactcagtgtaaactttcgtacac
+cgttgcgtactgcacagcgcgcattttaattgacgacatttagcaaaaat
+tgaacataagatttttcggaattatgaagctcaattttcacaaaaataat
+gagttttttgtagaatttatgaaaaaacgtgaatatatagattttttgtt
+catgatattcaagaaaaagcgatttttagttcttcacagaggaatcctct
+cgcatttcacttgctcatgatgttttttgctccactttaggacgataaaa
+atgcgaattgttgataaaatgaatgaataatataaaaagtgcaaatatga
+cttcagcaagtgttaaatcccaaatttttcctgcgattttctgctagatt
+cctggttttgagtaaacagtctgatatattcatgattataatgataacaa
+taacgaacataataataaaaatggagagcacagagaaacaacaaattgca
+aaaacagcaactgatatcagaattaacgacgaccacggaaaccgcctcgg
+tctccacctcgcccaccacggaagccaccacctctgtcgcgtcctctgaa
+tcctcctcgatctccaccgaatccacctctaaatcctccatcgcggtctt
+ctgatctaccacggaagcctccacctccaccaggatctgttgaaagtcct
+ctgaagcctcctcgatcgccacctccacggaagccaccacgatccgcgga
+ttttcctctatagccttcgaggcttcagttgtaccccattcttcgttggc
+acgcttcagatctctacaaaaaaaacaaattagaagcattcaattatcga
+aatgtgtacctatcccgatttatcgcaatctgtctattcttctccttctg
+attctcaacttctttaacttgtccagtagcggcagcttgcttacgagcag
+cattttcccgaatcgccttcacctctgcctcctcagcatcctgttgctcc
+ttgacaatcgtaagtcttcgaatgacacgttgctcactctcctgctcacg
+acgctttttcatctgcttcttcttgtttatagtcaccgcattatgcttgt
+gatagagaacctctccctcatcgatttcttcttcaattttgacgagttcc
+agggtcagtcgggtccgatctcacgaagacggacgttgctattctggcca
+attccgcagtcacgtccttcataaatgtcttgtggaagttcttcttgctg
+agggggctgctgaaaccaatgtcggcatgatgagagttccggtcttctga
+atccatttcctgcgtgggctgtggcgacgagctgcacgtctgaaaatcaa
+gtttttgtaatttttgggcgcatgatatggagctgaatcattcgatttta
+gaatcagcatgcttttattcatattttaggatctttttaaaaaatctgga
+ccaacagttttcgaaaaaatttaatttttgttcagaaatgtgaatattca
+ctaaatcgaaaaaaataattgcaaaatccgtcagctgaacattcaaaact
+tatcaatttgaaatcagcatatttcagtgtataattaaaaaagtttcaaa
+aattctgagaccaatttttattgagaaaaataatttttcgctcgaattat
+tgaattttcactaaatgcaaaaaacagtaaacttgggcccatgctacaag
+cctgaatctttcaaattaagaaccagcatgattttttcaatattctagga
+cgtttaaaaaaaatctggaccaacagtttttgaggaacgtaattttttat
+acaaaaatgttctgatttttcactaaactcaaaaaaatagtcaagttggg
+cccatgctgtacacctaaatcattaaaattcagaaccgccatgtattttt
+tcttaccaaaggctctttaaaaaaaatctggaccaacagtttttgagata
+tttagaaaaacaactcacttttcgacgtttttcgccttttcgtggctcac
+ccggttgatttttgcggcgatttgtggtctttcgctgaaaatattatttt
+tatttcaattattaacgaagaaaacaagaaaaaacgacgagaaaacatca
+aaaaaacgcgaaaaaacatcgaaaaaccaccgcaacctcatgaacaaaaa
+aaaagcattgcagccgcgggactagttttcgcaactttctaggccatgtc
+ccgttcgccgtgccgtgtatttgtttaattccctttttggaaaaagtcaa
+catatttttctaacaaatcgtttttctattaatttttttctaaaactcac
+aatcaacagatcactttttgcattgcaattctcacaatatcccgacggaa
+ccctctccaaatgattgacctctttgaatagttcatcataagtgtcggtt
+tcattcaaatgcacattaatcattgttttatagttttgcacttttttcgt
+gttgtaatagtattggataatggaagaaagcgagcgttggggcatctgca
+aaaaataatgaaatttattttctttttatgattaaattaaattttcaaaa
+attccctttttttgacatatgcacttacagccgcatgaatcttcttgaac
+cgttttccgaaatgaaagaagcaagtggagaaaagactaatttcttctgc
+cgtccaatcatcatgaatttcttttcttctcatcgcttgaaccatcgcag
+cgtcgaaatcatttgactgtttgttcagaatgaacagagcctgtaaaagc
+agttagtttttttttcaaattcaaagtacatttccgaaaaataaaaaaaa
+ggcttgattttttaaaatctcgaatttttattatggtcaattgttatttt
+ttccagagaaaaactcattttctcccaattttcagacgtttctctctaaa
+tttggtgtttttccaatcgtaccctatctataggtaattgatatcgtcca
+gtagcttctgaaatgtattctgtaagccgattctcgttcatttcgtctgg
+aaacgcccaaatttgttgatctctgcacggttctttttccaattgctctg
+cagttggctgtataatcgcctgatattcggttcccacgtggattagattg
+tcgacgttggaaagtggatttgctggaagaaattgggaatttttcaaggt
+tttaagtggattttcaagctatttataaaagcatgaaaaagctcagaaat
+gactataaaacctttttttacgtcgtatttttttcaatgaaattacctac
+ttttaattaattgttcggcttaaaaccagaaaattgtttcatatcgattt
+tcccggtgaaaatcgaaggaatcgtcgcattctcaaagttttttcaccga
+tttgtttcaattttagcacaactaaatggaaaaatcacaaaaattccatt
+acagccgattttcgtgaattttcctacatttcgaactaaaaattgtcctt
+tcttctgtttaaaccggaaattctcttttgaaaaaccaatgaaaatttga
+attttctgggcttttcttcggaaaattattctcgaaatttatcaatcgat
+ccttgggctttttttgttccgcagaggctggcggagtttacaagcgtacg
+aagtggttcaacttttatataaagctttataaatgggacatagatgaata
+tttcgaatgctaaatgcaaaaagaatcagtaaaaaagcgcgcagccccgt
+ccttctctgacgaaaaacgccgtttaaggatcgattgctaaattttggca
+gtagttagaagtgtcaaaatttctgccggagagtcgtcaaatttcactga
+aacgtaacccggtaatttccacaattaatggtcgatttttcgcaaaaagt
+ggtatgtttgtcaggatttattagaaattgtggctgtccagattttaaag
+agtatttttgggcaaaaatgtcgaattttctctgaaaaagttcgattttt
+atcgaaaattcagattttttagatagttttcatcgattttcccagttttc
+agcctgagaactttactaacagaaagatgtgtcatgagcaccactttcat
+gatgctcacgagcttcagcttcttcatcttcgtcctcttcatcctccaaa
+tcttcatcctcatcgcccattgattccccagacgttgtttcgcgttttct
+catggatcttataggacgagccatctgaagtttcaattttagcttttaaa
+ttcaattttaccgcttaaaaatcgataattctcccgtactctgctggttt
+cttcttcttgttccgcctgctcctctggatcatcttcctccattggctcc
+ggcgatgcattcaacatattcaagccttcgtctgaaatatctggccaatt
+tatagaaaaaccgacaaaataataagcctcactttcttttcgagaggcgt
+cttcgtcagatgacgtgtacgaatccattttctggaatttgaggattttt
+gaatgtttttaaacaaactttatagagaaaacattcgaaacactagaagt
+tatgttgaaacacgagaaaattttttaaaaatccatgagaaaaacagttt
+tgaaaaatctgtttttggaggctctccggattttgaggaatcgtcacccc
+ggagacgcagattctccggtaatttttcattcatatttgagtttaagaac
+aaaacagtttaaaaaaatgtttttagtatttgaatgaaacttataatgta
+ttttttcttccattaaaacttaaaaaaaactacaaaattattatgaatca
+aatttgaaaccgtgaatcaatctccgcggaagggcgagtctatactgctg
+caagcgcactctatcgcaaatgtacaattggcggtttttcaaacaggaat
+taatcggattctcgtagtttattttggatttcttttttcgggaacatatt
+ggtgtttttgcgttcaatattcaaatttagaggaaaactgcttcaaatat
+ttaggtaaactcttgaaaccgctgaaaataggcaaaaataattatttttg
+tattttttaggctactttctatacttttgcgtaaatactatagtttttct
+ataaaacacccattaaaattatttttataaaatgatttttccaataaaaa
+taaaatgcgcaaaatgattcttttccagaatcctatatgcgcctttaaaa
+tctctcggattactgtagtttcaaagaaattatcctttatatttttaatt
+ttaaattttttcctgaatgtcaaatattaggggaaaaattataataatat
+gtgctttattcatatgagtgtagaattagtgaaaaagaaaaaaaacatgt
+atggactgtaaaattggaattttagcgagaaaataaaaataatatgcaga
+aaaaattaaaattttcaggaaaaaagtcagtaaagccatcaaaaactact
+cgattttgaaggaaatcagcaagaaaaattagaaaaaagtatttttaagt
+tggaaaacccctgcttgaatttgtacactaaattgggcataaaagcgtac
+aaattcgcaaaaaccggtaaaaatctggggatcgtgatggatggagtgtt
+ttgtgaaaaaatgcagcgaaaaattgagtagacaatttcaaaaatgtcga
+tttttgaaatttgtgacgaaaaaattgaacaaaaactgtttttttttgga
+attttcaacaagaagttttataaatttttttgtttaaaattttgaatatt
+atatgagtttggtttcacttaacagaacaattcgaacaaaagtattctag
+aaaggaaatgtgcgctccagcacactatttgcccgtggagcgcacttgtg
+tgcacgaacgctagcgagaatgtgtggtagaaagggagggaataggaaat
+attaacaaaattgggcaaaatatgtaagattcggagaaagaattggagaa
+aaatatgtatttcgagctccgcgagctgatcaatccaaaggctttctcca
+tccttttttcgagaggcacattgcattatagttacacacagcacgtgtat
+aatggaacattgaagcctggaaacgagccatcgctaccatcattaccacg
+tggatctgaaaaaattaaagtttgatgattcgaaaattttctggaaaagt
+tatgattgtgagataaattgaattctttgaaaaatcaaaattcaaaagct
+tgtagaaaattttatatatttttttaagcgtattttttccgtatacattt
+ccaaatttttttgttacccaattttaaagattttcttgaattttaaaatt
+tctttcagtaaaaactttttttcaactttttgattttttttccgcatttt
+ttaaaattttattcagaattattagattcttttgaatttaacgaattttt
+ttcgctaaaaaattgttcgatttttcccgaattaagaaaaatattatttg
+gtttttgaattattttcctgatttttttcgattaataaatttgtaaaaac
+aattttttttctaatttttggttttgatgattgtgttttttttctgaact
+ttacagttttcaaagtttacaccgaacttccacattaaaaaattctgata
+caaaaaagtattcacatgatttttaaaatttaaatatttttcaaaaaaaa
+taatatttaaactgtgtttttttcggaattttttttcgattttttccgag
+ttttttttggaattttttcctttctgctccaaaaatattcaaattcaatg
+ttgtgtagaaattttattcaaaaaaagtgttcaacttctgagtctaaacc
+ttttccgaatccttaaatcctggcagagctctcgtgaattcagttgtcaa
+tttatgtggatagcaagctgccagtttaatgaaagttttagttcctttgt
+caagtactcgattaattttcgaataatcataatcatcgactcgaacacca
+tataatccttgagtatagttccaaattgcttcacggaatgcagcagtgtc
+aatttcattctgattcacggcggctggtggttctccgtctccagatgcat
+gggatgagccggatggcctgaaaaattaattttttggaattattatattt
+ttctgtttttgaaatttcatgcatctcgaatattttaacaaaattaccaa
+attcaactagatttcttacaactttcactgtgtcgatttacgggttcgtt
+atacgaattgaatttgtttatcgatagaatattaaaatttagctaaaatt
+gagaagaatataagaagaaattaatttttttaatttcaaaaatcgagcca
+gtaaatcgacacgagcgatcgacacagtagtcatttaaagaccagtttcc
+gccacgaaatatttcgcgcttcaaacatgttgcgtagtacgtattctcaa
+aattgtgcgttcacgtataatatttatgcgaatttttggtctactttgtt
+agagaaatcatcactaacatattgccagtaagagtccgaatatgatcgaa
+cattcgatcaagccgtgacgtcagtgtatccgtatactcattcatcgtat
+tataaacatgatcccatccaaattcttcaactcggaatggcggaatatcc
+ttttcaggtcgctttctaaaatcaatatatccaaatgttcgatgatgcga
+gtaaattggataattacacggcggctctttttccataatatcttctccat
+tttcatcgatatttgcaagaagaagtacaggcgagtagtttttccgattg
+gaactatatgttgctgcaggagcactaattaatgattcaatagtttcagt
+agtcattgcacacatcttcgctggtggcctagtttgtcctttttccgtct
+ttttcagctcactgatcaaatattcgacttcagttggccgacgatctggg
+acttttcggaaataggccgacattctcgcctcccaatagtcgagatcatc
+gatattaaggaaatcaatctcatcttgtgtcaaatcaacacgacgttcca
+atccaatacagcatataactgtgcacattgcgtgagtcattgacattatt
+ccgacggcgtggtggagagagcaaaccgagaaaaacgcaggaccaccgtc
+tggcgtgcggcgagcgaagagcacctggaaattttcaaattcttgagaaa
+aacctaacatcgttgttatacgttcgttctcttggcattggagttggcag
+aatttgttttgaaaaaacgttgttttttttttgaaagaacatttttttat
+tacgggaccatgagatcatgagaattcctatttactggcgcgaaaatatt
+ggcaggccacggcaacgagagagcatatggcaaagagagacgcatcttat
+tttgtcttgtaatttttttttaaaataatttacaatcccttttcaactat
+cgtgattgtaaaatattacaaatttcagaatttcgctaccaaattattac
+tggaaaactaaactctgagaatgcgcattgagcaacatatttgacgcgca
+aagcatctcgtagcgaaaactacagttattctttaaatgactactgtagc
+gcttgtgtcgatttacgggttcggtttttgaaataattttcttttcgaga
+agtgacagtgatattccattttccttcttttcttcctattattttatcat
+tatttgcttaattttaatattcaattcataactaaattactttaattcat
+ttcgagtagacattcaaagaattccggtagttttcgcttcgagatatttt
+gcgcgtgaaatatgttgtgaaatacgcattcttagaatatggtgttcccg
+taatattcagaaaagaaaagatttccaagaactttctgaagatttcaata
+tttgcaaaatcagaaaccagttctgaatattctttatttttagaaatttt
+tcaaggttttctaaataacttttctaaataacctaccgtatttcttctat
+taatatggctgcaatactatttttcgatggtcttcccgcttgcaatacta
+ttagggagtgcaagtctaatagggagtgccatactattcttcagaaaatt
+tttctgtgttggggcttactagattctacttgaaaaaactccaattttat
+ttggaagtatagaaaatttgattgaaattgcaacaaaaaggtacaataac
+ttcaatctctaaaaattttgttataaactgttgcaaaataggcaaaaaat
+gttattaaaattttaaaattagtaaggagtgtttgcaacaaaaaaaagta
+ggtgcaagactattagggagtgcaacactaatagggagtgcaatactaat
+tttcggaaggtctccgaggggcaatactaatagggagtgcaaatctaata
+gggaggccatattaatagaagatatacggtatatatagctttgaaaaatc
+ggaaaatgcctaatttttactttttgaggtttgaaaatctctaaaaattc
+aataaaatttcaaattaccgctagatttttccaatgaatcatccatggtc
+tatgacagagcattcgattcaaataatccaattttcgaaatttcatgtat
+gaccaatcaatgcccaacaaccacatttgttgtccacccttttccagaaa
+tttgcgacgatgatgatccataagtgataggcatctgtgacgtgatgcag
+ccattagtgcaagataatgacgagccgaagctggtagatcacttatatca
+acgaacatatggccataacttcctgtcatatgaacatgtagagttgggtg
+tttacatgtgaaacggaataatctggaaacgtgagggaaattagttcgag
+acggggaggggcaggttggcggtgccaaccgacagccgaacattggggtt
+tctcagctggtagcgccagccgacagtctactgcagtactgcagataaat
+tttcgtcggctgtcggctggtgaaaattttcatgaaaatcaataatttta
+aagaaattgttgcaaatttttcccaaacttgaccaaatttgttggctggc
+tgtaccagccgacacccgaaatttagaacattgattagaggctgcttggc
+agaaataattttaaattcagaaattcaattcgttttcaaaaaatattttt
+taaaactttaccgatcaacttctggaatcggatcaaaattgagccaatcc
+atggcttttcgtcttttagttgtagtgtgcattgtgtagatctttttata
+ttgctgcgaggtgagtaaatgaagaattttcgcgacccgtttctgaaaaa
+actcagttttctaaggaaattttgaaaataaattcgagaaaaagaaactg
+agtcagcaaaagaaaattggaaatgtctgtctggaaatattcgaatatta
+tattcaaaagttttcaaaaaaacaacgaaattacaagcaattgtgatcag
+aaaccgcggaaggaactggacgaaaaaaattatctttgagacgaatctct
+ttgcatctttgtgatctaaaagattaataaaggttgtcatcacatttttc
+gagatttgggaatgtgataagggtgaaaaatggagattaattgtggtaaa
+atgaggaaaaacctaatttttggtgagaaaattgtggaaaaactataaaa
+gaatctttatggagtttaaaactcaagtttttcacgcttttccgcactgt
+gcggaacgttttttgagagaatttggccgaattcggtgattaaaaaaata
+atttcaaaactttgcgcctcaattgtgatgtattaccgtactctgttgcc
+attccaccaaaatttccttcattgttttgccatttttctgcataataact
+gttctgggtttttttgcttcatgtgcccaaatgtacgaatttccctaaaa
+attatacctattttttcaaaatttttaatcgctagaatttttttttctgc
+attttctttaaaaaaagagatttctcgcaagtagaaggagaaaaaatgtg
+tggctatacttcttcttaaagaatgcacgactagccatagctcaagcccc
+ctctggaacgttccatcttcctcccattttcccacgttcaagaatcatca
+gcttcttctccctcagcttctcttcttctaaaaccacaactagacaaatg
+ttcttgttttccaccctatttttcacataaaaccgccgagaaacccgcta
+tcacagactcaatgcgcaccggaggggctctttgtgtgtgtgtactgatc
+tctgcgttatattcgaacaccggcgcacactcggattgaaccagaggggg
+ggggggaggggggggggggggtgaaaaaagagaaatactctgaaattcca
+taaaatctagaagaagaaagaaaacaaaggaaaaattggacattccgaag
+tcaggctaaaaaatctcataaaacaaaatctattcgatttgtgaccattt
+tcatctatctctctcaaaacccgaataaacaaagcctcccgtccccaaag
+tgtgctctcatgctcttctggagccttctagactgtctgtagagcctaga
+gacagcggaattgcactgaagtgatggagagacgtagagaaaacgcctga
+agaaaaaaacgaacactttggtggaggaggagatggcttccctccaaata
+aacaacaatttctatcgtttctctgtgattgtgttctcttctatgtatac
+tgttacgatattgaacaggaaattaaattgagcactctgaatacataata
+cacaataaataaatacaaaaactatagtttcagcacaaaaaattcgaaaa
+aaaaacgattttttttgtccgagaggagtatatggcctagaaaaagaaaa
+ctcggccactctgatgcaataaatttaaaaaattatggccgaattttaga
+tttctcaggccaatttgatacgtttctcgaaaagccataaattagtcggt
+ttttcacgggcttcttgccttcctcattgcatttttcgcgctccattggc
+aatctcctgctggacaacgcgtgggaaatcgtgtgccccacacgggcaaa
+tacattttgttttacaaagaaaaccgtgccgcgacgcgacacgcaacgag
+ccgtaaatctaccccagatatggccgagctcaaatggcctaacctgtcaa
+aatcttccacttcaaaatatgagggaagccagaagcgcgtgttgtttctg
+aaaaaaaaacccgcctaaagttgatttaaattatcgtttttttggaaata
+ataaaatcgatgaatttgtagattttgataaatttccgataaaaaaaaaa
+ttttaaaagaggaaaaaaaatgtttcttcgccctttagtaccaaaaatac
+gcccaactaaccaaatcgttctttcaatcttttttaaatgtttgtgcgtc
+tataattgtcgcttcagaaaactacacaaaacacacacacacacaaggag
+aagaaaagaaaaaacgtgttccatgacctgccactgggatcgatctgtaa
+aagaattggggaaaattgaggtaaactggttttttatcgggaagattttt
+tcggaaggattgagatgaaagttcgaaaggtaattggcaaagttgaaaat
+tgaaaaattcgaaaaaaatctcaattctctgctgtaacccccaattttgc
+gtcatggcctagagtatgcagcgtggcctagaaattcctaacgtggccta
+aaagatcacggcggtacctatgattttctagcgtgacctagaatatacca
+gacctagaatttgatagcgtagaatttcccagtatatcctagcagtctta
+agtgacagtttctcagtacgtccaagaattcgtcagcatgacctaggatg
+ttaaagcgtggcctacaaattttcagagtcttctaggatattccagtcta
+aaaattttcagtgaggcctgaaatcatcgcgtgtcctagaatgtctaata
+attgcaaaaaaaagatttgaaaactagtatttaccctaaaattgcatttt
+gagcattatttttaatctagttttaaggaaaaaatcagaaaaaataaaca
+ttttttgattaaatcttccgatctacagatagaaagtgtgcaagaaagaa
+tgcaacattgtgctcggtggagcaagaagataaaagaaagagaaagaagg
+tcccccacccctccagtggtcgaaacaatgataaattggacaaacggagg
+accaaggggccgggcagacacaagagagagagtacgtgaactgaggaggg
+tgtgcagggaaaaatgggatgggggcaaatctagttcaaagatgagacac
+ttttcaggatctttgattctgagaaaaattttgaacaaaaagaatacttc
+aataatttaatggcacatagaaatattttcagattgttcttcaaaagaaa
+aatatttttatgcccggaaaatttatttattgcatttcttccaaaacagt
+ggccggtctcgacacgacaaatttttgttaaatgcgaagaggtgtgcgcc
+tttaaagagtactgtaatttcaaactttcgttttaatatttacttgtggg
+aaaacattaatgcttaacgaaaaattacagtactctttaaaagcgcacat
+cttttcgcatgtgacaaacattttcgcgtctcggtgacaacttttaagtt
+aaaggcacatagaacttttctgaagaattttatttatttttctgaaagtt
+aattgctacagtatcctttttcaagtcgcaccgagagccaaactgtagca
+aatcatcaaaaaaaagtcgacaaaacgtgccgaaatcagtaaacttgaga
+gctttaaaactctattatcagttcttcgccaacaaaaaaaaagagtaccg
+tatcaaaaacgaacttcgacttttttggctctcctgcatacggacatgat
+tctgattgacagttttcatgtttttttttgggagttttatttattgtgca
+tttaaaaaatcgtatagtttgatgcgtggcctagaatttgccagtgtgag
+cattaactctccacggtagccaagaaattttctacggtggcctaaaaact
+gccagtgtagcctaaaatattttattgtggcctaaattttccaatggtct
+gttttttttatagttgcctagaatttcttttcgtgacctagaagcgtaca
+gagtggtggcctagaaaacgattcatggcagagttttgaaaaaaaaacga
+aatttcgagaaacaagcgaacaaaaatcgtctgtcgaaagagtatttcga
+atgctggggatgcaaatcagcaaatcattcaaaaaaaacttttgtgataa
+gaaatcaaactgataagccagtgtcaaagtctcgaggattaaaaatagca
+tttcaggtcggggtacggtagggtttttgtagaaattaatgcaaaatttc
+agtgggaaacgagttcgtggcctagaaaaatcatgtctgaaaaattgcaa
+atgcgctcccccgaaatggttaaaaattttcaattgatagcctatttgaa
+gtggcggcctagaatatcaaataatggcctagaactcaaattggcggcct
+agaaatcaaactaatgacctagattagggcatcttgtaggcagcttagat
+cacctattataggcaggtgtaggtaaaattgtagacaaatgtaagtttct
+ttgaagataggcgtaggttcctttgcaggcatacatagatcatttattag
+gcagatgtaggcctgattgtaggtacagtgccggccaaaaatatatccta
+tttttgacttttgataaatttacaaattttccaaacgagcacaactttaa
+aactagaaatgttatcgaaaaaagttcaactcatgtatgtattgcccata
+attacgtctactcgtattcaattgtttgttgtttactagtgtcacgacaa
+caaatacagcggccgacatctcgtaagcccgtttttgacaacgtttactg
+attcggccgtatctcgaaaactaatttttttctgaaaatgttgttaaagt
+gaaatagttttcatgttatttgttatcatttgtgtttattcactttgttc
+tgaaaaatccagtaaaaaagttatgggagtgcaaacttgtcgctcactgc
+cactcacccgctacaatcaaaaatcaggttacttatagttagttctaatt
+ttttttttgtagagcattttttagaaataacacatgtaaaatcacaatga
+>Contig2
+cctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcc
+taagcctaagcctaagcctaagcctaagcctaagcctaagcctaagccta
+agcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaag
+cctaagcctaagcctaagcctaaaatagtgactctggcagttctctaaaa
+taagtgactctggcagttcaccaaaaattgtgactctgaccgttcaccaa
+aaatagtgactctgaccgttcaccaaaaatagtgactctgaccgttcaca
+aaaaatagtgactctgaccgttcaccaaatatagtgactctgaccgttca
+ccaaaaattgtgacaatgaccgttcaccaaaaattgtgactctgaccgtc
+actatttttattgaactgccagagtcactatttttagtgaacttccagag
+tcacaatttttagtgaactgccagagtcactatttttagtgaactgccag
+agtcacttattttggtgcactggggtgggtcacgcccccagttctcagtt
+atgggtactctgatccactcgggacccactttatcgtgttccccgtgcct
+catttaccctagagcttcctcctttacctctcctctcgctatctctaaca
+ttccaatggaaactcctatttgaattaccgccaccgatgtgcccgacgcg
+acttactgttagcccttgttttgcacaaatctgttggcttccatatttaa
+aagttaattaatgacccaatgttctttttttctctaaatctccacaagat
+gttctgttttccctactggacactatcgttcactgcgtctcaccaattca
+cattgtctctactttaccttttttgtcatagtacacgttcgccaacggtg
+tcgacggccaaatgctttgggcagcgtttgctttttttataattagtttt
+attttattaaaacaatagctctaaagtttacaagtcatttgttataggct
+aaatgagttatgtctaataagtaatttgaactagatacttccgtgtaagt
+gacaatgtatcggaaaagtcctcaaagtgcgatgtagaagttcacatgta
+ctttgtttggcatgttagtaaaagagccagtatgctgattcattttatat
+tctatatactcatgtaatatgcccatgtaaggtttaattccaaaaatatg
+agcgtgttctattttataatattttactaaaatacctttcagttaattgc
+actcaaatttgttgttcttcattctctcgttatgatttaatcttattgcg
+tcaaggtcattattttaggtccattagttatcgatctgaaacatgttgtt
+gtatttttctattcttgtgagctcaggacacctcatacaactccagagaa
+aatgtgtctcattattcttgtcttttttcaagatctaatcaattttctac
+attaacgacgtttttgtcgttctgcttctttttttcgttcgtttgtctcg
+tccatcagctgtccactcatttctctcccactcactaggcagtgctttgt
+ttggttccgattggcagctggctgcagggcctgcatctcttctatgtctc
+tcatttacttgcattcttttcttcgttaatttttgttatgatatttaaac
+gggaagaagagtttgtggttcttctttttataatcactaaaacttttgga
+taagtaacaattttctgataaaaatattttcacggcgaagaaaaaagaaa
+aagaagagtagtttttgcacgttttcatataattattttcgttgatcaaa
+tgttcttctggagttttctaataaatttcttatcgactttttttcagaaa
+tttttctcaacttgtcatgtcaatggtaagaaatgtatcaaatcagagcg
+aaaaattggaagtaagttctttataatttcatttatatactataagtttt
+ctcgatcacaggagaaacaaaaacaacagacaacacaaaaaacaataaaa
+caatattgctctagtaatcaatagtgttgtaaagagggaagaaaattgtt
+atctgtgtagcagtcaacgttgattgagatgttgtgtttgactatagagt
+tgaaaataataacttcaaacttgcaagtcatgacttatcaaacactgccg
+gaacttattctggatcaaaggaaagttgtccaactgtagagtcatgtttt
+tcaaaagaaaacacaatttttaagtataaatattttgaaaaagtatgttt
+tagaagtatgtcaaattaaaaaaaaaatccttggttaaaaaatgattttt
+ttggatatatgtgtatttttaactaaaaatatatactttacatatatatt
+ttggcgcagttatttgatctataaatcaaactttttgatagacatttttt
+tatatttacaacaactagggttgttatgaaaacgcctattattctacaaa
+ctaaattattttaatcatacattccccactatctaaaaactaatgcaatt
+ttcagattttgtcatgtaaatgggtaggatgtctcaaatcaacagaagtg
+ttcaaaacggttgaaaagttattagatcatgttacggctgatcatattcc
+agaagttattgtaaacgatgacgggtcggaggaagtcgtttgtcagtggg
+attgctgcgaaatgggtgccagtcgtggaaatcttcaaaaaaaggtattt
+ttaatttaatgtgcattttataatataaattcttcagaaagagtggatgg
+agaatcacttcaaaacacgtcatgttcgcaaagcaaaaatattcaaatgc
+ttaattgaggattgccctgtggtaaagtcaagtagtcaggaaattgaaac
+ccatctcagaataagtcatccaataaatccgaaaaaaggtattcacaatt
+tgcatgatattgttataatctaattttcagagagactgaaagagtttaaa
+agttctaccgaccacatcgaacctactcaagctaatagagtatggacaat
+tgtgaacggagaggttcaatggaagactccaccgcggtaagtgtgtttct
+ttaaaaattacttccttttttcaattgtttgaaattaacaagaaacctgt
+tggagcgtatttctgaacttttaaatcgaaaatatcatttgcaaaaaaac
+ttgaaaattgagaaacttttttaaaagtggagtagcgtctgcgggttttt
+ttgccctaaatgacagaatacatacccaatataccgaatataaccgtaat
+aaaattatgcgatttttatttttatttttcatgaatgttaggggcaaaaa
+acccacatgcgctactccgccttaagaagaatcagctgtgagcactatcc
+actatacattggaaatttacaaataaaatagagattaagtaatataattt
+ttaagggttaaaaaaaagactgtgatatactatgatgatgggccgaggta
+tgtatttccaacgggatgtgcgagatgcaactatgatagtgacgaatcag
+aactggaatcagatgagttttggtcagccacagagatgtcagataatgaa
+gagtacgttgttttgcaaattgattaaaagtggagtagcgtcagttaaaa
+actctaacatgtcttaggtttttcaaaagtttggtcaaagttttggcaaa
+ctgccaacttcttgaaaacttcgttaaaaaaattcttgaaatgatttgaa
+aatttgtattatgttattctcttatttctgcactattctatatggcgcta
+ctatacttttaattgatttcttgaaagcagttcaataataattaatttta
+gagtatatgtgaacttccgtggaatgaactgtatctcaacaggaaagtcg
+gccagtatggtcccgagcaaacgaagaaattggccaaaaagagtgaagaa
+aaggctatcgacacaaagaaacaatcagaaaactattcgaccaccagagc
+tgaataaaaataatatagagataaaagatatgaagtaagtcgaaattgac
+aaacagtggtttttgtttaagtttattgcgaaatattcaaaattagacat
+gttaaaattttgcgagataatctaaagattaggtatacagattttttcat
+gtaaagttacattcatcaaaatttttgtgttcaccaaattagacaaaaaa
+tgttagttacacagtatatttattttttatatcaataaaacctttttcag
+ctcaaataaccttgaagaacgcaacagagaagaatgcattcagcctgttt
+ctgttgaaaagaacatcctgcattttgaaaaattcaaatcaaatcaaatt
+tgcattgttcgggaaaacaataaatttagagaaggaacgagaagacgcag
+aaagaattctggtgaatcggaagacttgaaaattcatgaaaactttactg
+aaaaacgaagacccattcgatcatgcaaacaaaatataagtttctatgaa
+atggacggggatatagaagaatttgaagtgtttttcgatactcccacaaa
+aagcaaaaaagtacttctggatatctacagtgcgaagaaaatgccaaaaa
+ttgaggttgaagattcattagttaataagtttcattcaaaacgtccatca
+agagcatgtcgagttcttggaagtatggaagaagtaccatttgatgtgga
+aataggatattgattttataacgtgtaattgagttttggccaaaaaggta
+tggaaaggtggctgtttagttatatatttttctattatttatttgaaaca
+tgcaaaattgaagtgaacaataagtgatgttcatggaaatttaaactgtt
+ttatgatacttttttgagaaattgaaaaatctgttcattttagaaacaat
+gtccacatggttctaagagctaaaatttttattttcatccatttagagta
+ctttctcttttagagtacggccccagagcgatgttagaaacctgagatcg
+gtcaacacagaccgttaattttgggaagttgagaaattcgctagtttctg
+acacgaatttcagctaataccaaataatgtgcaattgcattttgcatgtc
+agcattcagcattcatacaaaaatttcaaagagccaacttttcatacgtt
+tatggtcaacactgtatgtgttacattgaacttttttaaattgtattatt
+tcatatttgaatcatttccatgccattttcaaatctttttttaacaaaaa
+tttagtttcaaagttttaaatttaggtgaaaacttgctacaaaataacac
+attctttagtcgtttcaattgctatctatccgcagactgcaattttgttt
+tcccacaaccattcacacaataaataagggtataaagttttgttcatata
+acacatttcaatactaacatttcaattttgaacaatttttctaaacttat
+ttcccttcgcccaaacgtcattcaacattctttgtacaaaacattaccat
+tatagaaaatctcatttttccactatttcatttatttttattgttccgcg
+ataaatataaataaacatttacgtgttccgagttcaaagttttaccacgt
+ttcagaataaaggaatcggaggggggggggggtgaaaaaatcatttcaac
+aaatcagaatttctcaaaatgtgagttttatcattttcattgttagaatc
+acgattagcttatattgaacaataacaataatttaatcccattcattcca
+tatcttctcatgatgaaaaaataagcattattcgttttcttttgacgcgt
+ttgatagggttctgccagcgccgaccaatattcttctcaattagattttc
+cagaactgcactaactgaaaattgttttagtaaatagaacaaaactgact
+attagttcaatattataattttacttccctattttctgacagcattttgc
+aaatactctttacagtcattcttgtattttgacaacaaaattcgaattta
+aaatttcatttttcttttaaaatatcagtacactcctggtagaacaaata
+ttttatttcaatgtttatgtatgagtagaaaagttaaacagaactattat
+ttggcatcaatttctgttttttttttcatctaaagtatggcatttattag
+caaaaaatacatgcttgaacaggaataactattaatttcctatagcgacc
+caaaaacaaccaaaaattgttttaaataatttttttttggtcgacttcca
+tagttatgagtggcaaaaactgagtaattgtcaccttttgacagtaaata
+aagaaattttcaaaaaatttttgaaaagttttattatgctattcgatcat
+tttggcaccatgtaggctttaacaccccactggcgctactccgcatttaa
+aagtgatttataaaaagtgtaaggaatactttgctccaattttcgtctag
+tgcatggcgtacccatattttgttcatgttgtgtctcttagcctctcctc
+ggaggcgagtttccatagctacccacaaagaatttatttttctatgtatg
+cttcgttggctccaccttcattttctcagtcgtttctcagttttctcatg
+aattctctttttgttcgtttttagaatagctatcgttttaccacgaatgc
+gctttttcagttattaagaaaacagttttttgtagtacatttttcatagt
+ctgaactttcagcatggaaatagtaaaaacaatcattccacacaaccgtt
+catacattgaaccgcccataccatctgccaccgagtggcttgcaagtaag
+ttgttttgtctaaagacataatttctgaaagaaatctatccgatgagcgg
+aaaataaacagaagttgtttttaagaagactttgatgcaattacaaaaat
+atatggctatgtgcgtaaaaattagacgtgactcgtcataaagaccatat
+aggtctacaaaggcgtttcgagtcttataaaagttcgttcaaactttttc
+ggggtcttgttaaaagcacaccaacaatacgttaaaggatttataaaatt
+gcaagtattcgtctcaattgcaatactttggaagccgaattttggcaaga
+tattggtaaaacaggtacattacaatagctatttttggtaaaatgtacta
+ggtatcttgtaatgagttgcgcttgcctcataggcatacatctaccgtat
+attctttactagtgctgcaggcagcactaatttctaggcccttttttaat
+gcagtactattagagactgcagtactactggagatgcagcgctaatagag
+aatatacggtatgtagatgcctagaacgctaaggttgttttattcaaata
+aatttcttagaaaggctaatattatttttaaaatcaagatttaagttaat
+taacacgacctcgtattctatatttcagatatgtcagttatgcatgtgtc
+atgtttaaccattgcttgtgtatttgttgctatcacatttttgtcatcat
+ttttccatttattttttgtattgaaatatgtgtctaatgagaggatacgc
+aatgatatgtacgcattgatatttatgtttcccgtgagtttataaatgga
+attatgaagagtttggaaaatatattttagataaccacatttgcaagttt
+agtaggaatgttcataccaagagcggctattttcctttatgcagtttccc
+ttgtgtaagtgtaccttaacaccacaaacagaaaaatgaactttattgat
+ttgttccagatatttcatgtttaccctcttcataatggtcactcttttat
+tcaacatttttggaggtcggcaagaaatgagtgcctacttgctccaaaga
+aatattcgagttaatttcactgttccaccactctgttttttcaagttcct
+gccaactgtagaaagtacagagtgagctcgaaaatcttatttttgtcttt
+ctaatatattttccagtcaaaacctacgtcgaatcgagtggttggttttt
+caaactcctattatacgaactcttttagagttagtctcagtcgttgtgtc
+tatggaacaagaaggaagacgagaaagcgtgtaagaccatgaaatgtttt
+ttgtgtgcgcaacgtagtgcgaaaagttagttatttggagaggaagttat
+ttgaaaagaatctttagaagatattgatattttatattctgtatgtaggc
+gctgtggcaaattctcgacttcacgggagaagcgcaaattttgtgctggt
+ttccataatgcgctcgaaaaatgttgtatatttacattattcaaatctca
+ccgattaaagttgaatttcagttggttcgtattctctcaactcatggcac
+tactctcgatgtgcattgccttttatggttgctacgttatggttcctctt
+ggaagagaaaagcatgctccataccgattcgattttctatttcgtacatg
+tgacattgctcagtgtatttacacaattcaaaagtttgtctttgagtttg
+ctgctgcagttggcttgatcacttcggatcgataccttccagctgctgcg
+aaggcattgtgtaagttcagaagtttaaatggagataggaatgtttcgat
+tttcgtttataaaaacacctgttaatattaaattgttgacgtcatagctg
+accattcatgcttctttgtctgattgtaattatataattgtaaaaataca
+aacaagttaaccgaaaaagttctctgttacgtttgcggtaaattagaact
+ttatgttgaaagatatcagcttaaagttaaaattttttgaaaattaaatt
+ttggctagaaattcaatttttctgataaaaagaaagtttggtgggaaatt
+taatttcattgatagaaaaactgaagtttgtgttttctgagaattaaaaa
+acagcgggcaatgaagattatgtctttgtaaatttcaatttaaaaatatt
+ccagggtgggcatcgtttatgtgcacctgggagatgatgctcctctctgc
+tctttgctcatattgtttgcgtccagccaagtgtaaattttttgatcttt
+accctggaaacgatatgccagctttatcggcaagagatggttcaaactcc
+cgtgttccgtcattttctcgacgtctatctattgaatatgagccacgaat
+tgctggagtgatgttggagccgccgtcaagaagttcactgagcattactc
+ctagggataaaatagaagacccaacaaccgtatcgtattttgctgataat
+tttgattctttatcgcaaatccaaggacaataattgaacttcctgccatt
+atttcattgctcaatttacactattcttaactttttgacatgaattaaat
+ctttttaaactgagtctaagcgtattattgtatcgtattttccccttctg
+atgtattcatttacttgtatttttgaaacccatcccagtgatacaactac
+ccaattttcctgtgccatgtttcttgaaacaaatcaaatgtgataaatag
+tttgaatgcctttatgtataataaaatcaatttttcaagccggatctccg
+tgtttgccattttgattacccagcgagcctgaaagatttgaaaagttata
+tgagcagcatgaaaagcctgttctttggtgtttaaaggtagcacacaaac
+caaaatttgttagcgcaaatttaaatttctatttcagttaccatgagtta
+gtctaacaaacatttttcaagattatcagacaactgataattttaactca
+ataagcatgattttgaacaatttcctaactggcgttacttcaccttgaat
+aagattgaagtggttattggttcttatgtacagtgcttatatatttagtt
+aataatactaactgaatatataagcactcgctaaacttgcgcacaaactt
+gcgcttagatttctcggcgttttcctgtgcagtctttcttcttcttcttc
+gattactggcatttcttcaacgaaccattttacattgttggcagacgggg
+aaatctgagaaaacattaattatttttgagaagatttttcaaaattaccg
+ccgattttccttcagcagaagcagtaattttcagaggttcttccggaatt
+tctatttttactgcctgttgttttgctattcgaatacgttcttgaacaag
+tgcaggcacatttatcacaacatcatctgtgttttctattggctcaattt
+cacgtaaatctgttgatgggcgacgatggttaaggttcaaagctgctacg
+cctaaaaagtttactttctcgatataatgttttccaaaatattttgtatt
+tactcacatacactagctctcctcggttcagcttctgatttataaaattt
+tgcgatagctgcagaaattttgttacgcggttgttgccacatatgtcgtg
+gcaacatcgaagcgttactttgcgttcttgtattttcagttgaaaaagac
+gatgcatcatcatcaccaaacgatctcgatcggcgtcgaacatacgaaat
+caaaccattgtccaagtcagcttcgttcctgaaaatcgtccacatttcaa
+atttcgaatcagatcatattaattcacatatttatgcaacttccttgcat
+atgagaatgagctttccgtgttgaagttggcgtgtccattgtttgtggtg
+tttcatcttcccaaaacggatctcttttaagtgtcggataccgcccgtag
+gcttgatctacgactgcataccctacctgcaaaacttctaatagttttca
+atcttattcgaggtgaagaataaatatctctttaacctggagattccgct
+caatgatccagtttgtttcaaaatcttcatcgtcttctccaagaggattt
+aacattacttctgatacttttagccagccgactataaatacaatttgtag
+agatgtcattattggaaaataaatgtctatcttccactttgcaccaggaa
+tgttattgttgttttcaagaaactgtcttccaaacaatgccaatacaaaa
+taagtccgaacagccaggttaacaacttgagtataaaccagaggaatcgg
+caccatatcgaaaatgaccagatttagaatttttgtacggaattctctca
+ttttctgaaatttggttgacattaatcattgtcagtactttactaacatc
+aatcaaatcaacgtagagataactgtctgcaataagcccttcatccttag
+caaccgttaccaatgagaacaaccattgaattggttgccagtatttgctc
+tgcgggctcgtgatggcatcaaattcagtcagctcatcttcagttaataa
+tccagcaccaattaggtgttttatggtaggaaatcgtcgtctaattgcag
+gtgaaacatcacgaaacaccatcacttgtgcaactatcatgtaccgcaca
+cagtttcttcgaattaatcgggctttttccgatgtaccacgaatatattg
+agcaattgtcaaagcggatctacaaaattattaagaatgcaggaatattt
+ttgaacagaaacaaacgtatcaatccatccaacgttatcgaacactttgg
+tccatcgattataaacaattgatacataaaaacctaacatgaatgttacc
+ggtataaatactgaaaatgtatcaaaaaaagtgcacagctgttcgaaaac
+tctgaaaaaagcaagttaagctcaataagaaattaattgcaacaaactct
+ctctgtgctttgttaagcaatattctataaatcactgataaaatagaata
+acacaaaagccaaattagtaactctgaccatattgatttccaaaccgatc
+ctttccatctcaataacacttttatttgagtgaacagacctgacgtggca
+acatctagagaataagcgacagtcatcagtcaaaaaagtatgtgtagcaa
+caaactgaataaaaaatattatttgtatcagtcggtgggtgagcgattag
+tatcaagtagcacaaacgctaccgtttcagatttgcatattttattgtta
+taggggttattcaggcataggtcggttgaattccgactttttattcacat
+ttttccagaaacaaatcgattctcctaattttatttttatgctttatctt
+tttgaaaatctggcatcactgtttgcggaaaaaaatataaacaagaggga
+atacagtttgtgggtattttgcttacgttactgatattatcgccttttaa
+tctatattttagtaatttatcttgcgtaaataccaaaatatggattaaaa
+ggagataatatcagagaagttaaattacagtagctgcgacaaagaaaagt
+ggccaaaatttctgattttagccaaatttggctttttttcgaaattttga
+cccgccataaaaaatttagaataattttataatttttttacagttatgct
+tggtacattgagactttattctatcattcaaaacaaaaaaataccacaaa
+tgcttctccaactttgagaattgtaaaattttcaataagccaaaagtcag
+ttactggtacctttgcacctatcagtacttgccatcaaaagaaatttccg
+agaatgttcgcatttcggagtgccgtaaaacttgttcctgagagatatat
+atcgtctcatcaattcggtatcagtcaacctcccatattgtgtcatccga
+tattcaatctacaacaaacgtcatgttgtttgttttcaaacaaagtgtat
+taacattggactttcagatagggttttctgattctttaaccctctaaaaa
+accatttccctcatttccataatatttattctattttatgcttaacaaat
+ttacacgagtttcaaactatttgattgttcatcaaaaaaaatcccaaaaa
+ctgttttgtttttatatattgaactcaacaacataatataaaaactttca
+aatcgtaaatcatctaagaaaagatcacatgaagtgagtagatgatagag
+aaccagttcttatttttatgtttccgttacttttttgttactaccactaa
+taacttggcatttttcaatcaatattttttacagaatgactgtaacttat
+tcactcgatgttgcttcttcttcttttttctgcttatacaaactactatt
+tcgatggaaggtgtgctaaacattgcagatattttgtttcattaaatttt
+taggggtcaatctggaaatcggtgtgggctgagcttgtagtttggctttg
+tctttatgcagtgcttagtgttatttatcgatgccttttaacaatgaagc
+aaagagcgtaagtactgttttcaaaaataaaccgggagtctgactttcag
+aacgttcgaagatctttgtatattttttgatacttattccaatttcattc
+caattacattcatgcttggattttatgtctctgctgttttcacacgatgg
+tggcagattttcgacaacatagggtggattgacacgtaaatgacttagtc
+gtattacgattataatatctaaataattctaggccttgtctttggataac
+tcaatatatcaaaggggaaacagagcgagcaaagtgtgtgagaagaaatt
+gtataagatactcaattcttacacaggctatggtaaatctgtgtgtaagc
+ctaactaataacaatagtctttttgaaggtgtaccgtgacgttgcagcaa
+gcgttcgcaaacgtttccccactttcaatcatttagttactgctggcttg
+atgacagaaaaagaaatggccgagttcgagtctatccctagtccacacgc
+aaaatattggcagccaatgcattggttgttttcgatgatcactttagcgc
+gagacgaaggaatgatttcaagtgatatcatatatgtagacttgatggag
+gtacataattcagaagatttttaaagagtaataaataataagtttcagaa
+aatgcgccaatttcgtgtcaacattctttcattaacattatttgattggg
+ttcctgtccctcttgtttatacacaagttgtccatcttgcagtacgatcg
+tatttcctgatagcgttgtttggtagacagtatctccatccggagagcaa
+ccgtttaaatgactttaagcaaactattgatttatatgtaccaattatgt
+cacttctccaatttatatttttcattgggtggatgaaagttgccgaagtg
+cttctcaatcctttgggagaggatgatgacgattttgaatgcaattggat
+acttgacagaaatttacaggtaaacgattaacataatcaagatttattat
+tattatttaatacgtttattgaaaagtgaaaatgatagaaaaatttgatt
+atttaattcaattttaagttagaaaaatatcctacacattttctgaagaa
+gtgtcgtaaatggggaaactttttaaacatgtacgttccaaacatgtgcg
+ttccaaagttccgaaaaaaaatttgtgtagtaccaaatattaaagaattt
+tcttcattcttaaaatagtcgtcttgatatacttctgatatgatagtaag
+tattgaaacattaactacactttttcagttattttttattcgcgatacca
+tccatttaataaaataagggagttcatctacacctgtgcccttctatcaa
+acttgattgaagtatattatttttaggtgggattgatggttgttgatact
+gcatataaccgttatccaactcttgaaaaagatcagttctgggaggacgc
+aattgcggagcctctttacactgcagagagtgcgatgagacctctgaatc
+cacaagtcggatcttgtgcggatatgtaattgagtgaatttgttccaaac
+aattgattttcatgttcaggccaaccgaagaagagcctttcatggttcgt
+ccacgaagacggacgctgtccagaatgtcacactgggatggcgacatgga
+agatactgatgttgttccggttgtgggtctgaaacacacgcgtgataata
+gtaattatgcttctggcgaatctctagcattttcgaatagctttgccaat
+ggtggtaggaaactgagtgagatgtttcgaagaatgagagctgggagcag
+aattggtgataggtataggaaacgcaactcgtcagcacaagactttgaaa
+atggaatggcaaagtttgtttcatgaaaatatgaatgttatttattattt
+cggctatttacagaaaaaacagtattgatgaaaatgcagatattcacagt
+aataggctcgatcaagcatccggtacaccaaaatcaggaaggctttggag
+ttcgatgcctcaaacacaattggaagaaatgcttaaggtttgtttattca
+ctggatttattcactggcgctttcacgtaatttcgagactgcaaatttga
+acttttaaaacaatattcggagaagaccattttcgagctaatctagcgtt
+agttttaaaaaatgttttaagtttttttttcaatcacaatggtgattgaa
+aatctcagaaaagtagagtacttgcgttcatacttaacgcatttctcata
+tttttatagaattacacgccgataaacaaatagttaagacatttcagaat
+aaaaactttaactctcctgtcaaatacaacactgatgggatgaaagaccg
+agagcttcaaaatccaacaccaatcactgatcacattgatttgcctttgc
+atgtggcgagtagtcaatcatggtttaacgaaagtttaccagtaatcaaa
+gaggaggaagaagctaaaagaaaatccaacacggatacaggtagacatag
+tcaattttgtcaaaaaaaattaaatgagtttttcagagtctccaaagtct
+agtaagcattcaagtatgtcaatcagaagatcggaattgagaagatcatc
+atcttcaggtagtgatctaggcaagtctggaaagcgggagagaaagaaga
+gcgagtgatttttgaacagtatgataaaatattttttgtttctcttttca
+ctctaaactgaagatccctttcatttcatttttacatatttattatattt
+taaatttcaaattgcttaattaattttctattttttaataaacaattgtg
+taaatatatatattttttaatacagtgtgggaaagttctataggaccccc
+cctaatttgaaggtttgaggaacttccgaaaatttttttgaaaaactgct
+aatgccattcgtttttaaattgaaaaaaacctatatacatttttttccag
+aagtttatctcaaaaactgaggtcgcgctggaaaaaacgtcaaaatccag
+tgtgaaacttctataggaccccccgttttttttcacgatttttactaaaa
+tcaacagattttggaatttttgacaaagctcaaatcaagtttgagttaga
+aatgagttcatataagcagttttgactttaaaaattaatacgaaatgttc
+tcgtgggatctccagactggttctgattcttccgatctttgatgttcaag
+tctgtttcaagcttcctggtgctctcggtaatgccaaaacttgataaact
+ctctttaacaagttcctactaaaattcctagcacacacaccataaacatt
+tttacgccatccccaagaaaccagtcagaaacagcgtattaacaagttgc
+agttatttttgatcaacaacagaacattcatatactaaaatcaagaaagg
+atcaatagttaatcgggtttccttgtgtgcggatgatctcaaacagtctg
+tcctccattgatctgaccaaacttttcagctggttgtccggaatagactt
+ccaagcgtcgagaattccttgcttcaacgatgcaactgttgggtaagtct
+tgttctgagcatacacgatacggacaagaatcccccacaaattttcgatt
+ggattgagatcaggacttcgagctggccaatcaagaaggttgatcttctt
+gagcttgaaatagtcgcgggttgagttgctcacatggattgtcgcattat
+cctgctgaaatctaaagttttttctggagtagtgacgaagatatttggag
+agctccagttccaagacgttctgatagtcagtgctgttcatcttgctact
+gacgaactgtatctcaagcttcttcttctccgtgaacgctccccaaacca
+tcaccgttcctcctccaaaattacgtctcgaaaaaaccattggttccttg
+cgcaaatcgcgccaatagtagcggcaaccgtcaggcccatcgagattgaa
+tttcttttcatcggagaagacaacctaaaacaatgatcctaattattcac
+tcttgcttttttaaattctcactttactccaattcgttcccatattgttc
+ttagcaaattccaatcgcttgagtttatggtctgcagagagtaacggagc
+agggcgaagtttctgacgaacgattacaccagatcgtttgatgacattga
+ggatggtcctttttgaagcagacaattgaagctcattgcgaatatctctt
+gccgtcttacaggagttggaggcagcacgaatcacatttcgttcgtcacg
+cacggagagagctttgcgacgaggagctcttttagatgtaccgtagctca
+ccggatccttcagatacacgcgaatacagtgtcgagaacgggaaattttc
+ctactcatttcatgcagggacacattgagcaatttcataacatccagctg
+agcgcgttcagtgtccgaaagggcagatcctcgaggcattgcaagttaga
+ctgctttcgaagtaagctttccagcctctatatgtgtgccacaacacatg
+ccacaattccacatttaataattcacgcaaaaaatagtaaataacatctg
+tgagggacaatttaacttgaaatattggtcccatggaaccttgtaatcaa
+agaaaaacgatttgattcctgataagccttccattgtttcctgctgcata
+ttttgccaaatcagcttgactacacagtcgaaacatctaaagtgcgtgct
+aggaattttagtaggaacttgttaaagagagtttatcaagttttggcatt
+accgagagcaccaggaagcttgaaacagacttgaacatcaaagatcggaa
+gaatcagaaccagtctggagatcccacgagaacatttcgtattaattttt
+aaagtcaaaactgcttatatgaactcatttctaactcaaacttgatttga
+gctttgtcaaaaattccaaaatctgttgattttagtaaaaatcgtgaaaa
+aaaacggggggtcctatagaagtttcacactggattttgacgttttttcc
+agcgcgacctcagtttttgagataaacttctggaaaaaaatgtatatagg
+tttttttcaatttaaaaacgaacggcattagcagtttttcgaaaaaattt
+tcggaagttcctcaaaccttcaaattagggggggtcctatagaactttcc
+cacactgtatattgcaaatacatgacataaatttagatgcagggcaaaaa
+ctatagatcaaaattttctattgcactttttatgtataatcaataaaaat
+tgaaaaaataaaaaactttgtaagttgatgccgaaacatttcagtttcta
+ccaaaatcgttcgattttatactgatcagttttgatcactttctggtaaa
+tttcgaaattcgcgtttttttcagttttagagttagaatattagttacta
+acaagtttagcaattttgaaggatttcttcaaaaaaaactgctcaaagga
+ctttgctgcaaacatactcaaatttgcagcaaagtcctttgagcagtttt
+ttttttgaagaaatccttgatttttccttattttctccttattttctaat
+tttattttctaataaatccttattttctaatttttcgtaaaaaattatta
+aaatttcaaatttttggaaacaattgtttttttttcagtaattgaccata
+tttttgaccttcttgtacgtgaatgctttcctttcctctattagggtgtg
+tgactgcgtgtgtgtgtgagagtgtgtgtatgtgtgtacgtgcgtgtgtt
+ccctggcgcggtggtggtgttggccacacggccctgcgacccccataaaa
+actcggttcgatagagagacacacgggaatgtgagagagtatgacgattc
+gagagacgcagacgcacgaggagaaacacacgtcacgcgaaacacgttcg
+cgtcgcgtcgatgagcgcgcgcacacgtccacatcgttgcctggatgagt
+gggtttttggtccgcacacacgaactgtttttttttaattcttgtcttcc
+ctagtagtgaagagttttccaaatttccaagtatgtagttttaagtttct
+gattaagaaaaatattattcatgtgttttgaaagtttgtcagaaaaatca
+atatataatatttttagacgccatgattttcaaccaaataataagtttaa
+ttttttttgtgaatctaacttattgatttctgtgtaatattttcaatcgg
+tgtgttttttactacattgatatctacattgatatatctacattgataca
+ttgatatttttcgtaaaaaatttttttactccattttactacattttact
+acaatttactacattgatataaatgatgatttttcagatgaaaagaatac
+tctcagatggagtcaatgagccaaaactatgcaaattcataaaagaagaa
+tcaccacataaagttaaacaggaaccatatgatgatgaagaccttgtaca
+tttgggatccgaatcaattccatcaccaacttcatccacttcgcctccat
+ttcctacagaacctgcggttcaaacaattaaacttcccaaatatatggag
+gtaaccatacacttttcatatatgtgtaacatggggcggaaatgtgaact
+gttcatcgagaccgaaaaaataatattttcaatgattacttttggtaaac
+ttttcaaaacaaaattggcaattttttcttacaacttcaaattgttattt
+atttccgattcatctttataacttcaataatttatttgagaattctattc
+aatgctattaaagtcaaaaatttgcgaaacgtggttttgcccgagttctc
+tgaaactttctgaatttggaggagtatagaaaatggttcgtaattttttg
+caaaaaagtttcaaaaggctgattaggccacgccctttttagagagttac
+tcgtcttctaaaaagtgtcactggttttcttgattcgttttctctaatgt
+taagtacataatgacataaatcaaacaaaaaacaatacagtgttcttggt
+aaacgagaaactgagtgcattttttaaaaaaatgtgaaaaagtattggta
+aattgctaaaattttgaaaaatataagattttgaggaaattcaaagcaat
+gtcgcatggtccgacctcaacccctacattggtccgacccctatacgagt
+aattaaaataaaattaaagtataaaaaatgtaggaaaaaaaaattttttt
+ggtcgaattccaaacttatgagtggaaaaaactgagaaaaaaatgcggat
+ggtgctctcttttgttgaaactttcaacgaaagcctttaaaaaaccgctg
+aaagcgtccaaggaatgtctaaaaattagaatgccgctgtgaaatttagt
+aagcgatcaaaaaagttaagcaatttactcaaattatttcagttaaaatg
+tggtgccctcgttgctcgacttcacactgaattgtttatttgtcctggaa
+ttcgagaaaaatgcatcgaagtactaggtcggtcagagagtattacataa
+gtgtcattgattattcacagactgtcccggagagttattgacacctgttg
+agtttacaatcaaggctgaaaagagcaagcaaaaagactggaaaggagcg
+ataaagcataatggaaggatgttaaggtatgcttcttgtagttttcaact
+cttaaaaacagaacaatttaacagaacactcatggaattcaaacaattgg
+atttttataatcatcatgagatgtgttcattcaagtgtcattctcgaaac
+tatatcacaaaaaacggtggatctgttccaaaacttccaccaaaaaatgt
+tcaacgtcgtcactcttctgcatcaacaacatcaaacgtttcacaaacag
+cgattaatcaattacttcaaggagagctgattaaaaatccaaattttcta
+gctgcgttcgctgctcattgtactgctgaaaatcagaaacgacaagaaga
+agctgagagaaagttgcaagaaaaacaaaacgccatcaagtgtctgatgg
+aaaccgactcggtcacgttctggaatcaaacgatacaatcaaaaacgtct
+actgtcgttttggatcgaatttctatggagctcggttcgttggctcagaa
+tctgatttctggtcgtgattttgcggcgagttcgtctaaaatcatccagg
+tacttcaagttctcggcttgtcggacaccgtttctcgggaaatgtgtggt
+caattcattcttccatcgtccgtgtcaactaatgttgatggtaaggaatt
+cgatccttttaattctatctgaagattagttaaaagtggagtaccgtaat
+ctcctttttaagcccaacatgacccaacactactgaatttcgcaataaaa
+ctttttggaaatttctcagaaaaaagttatggcgattcaaagttctgcaa
+aaaaaagactcaatttcagctaaaatcacaacttttaccattttctcctt
+gtcgcagcttctcgaatttaataatataatctttcagggcaaagctcatt
+agacgcacaactaccggttcaacatttgccgtcaaaagaaatgaaagccg
+ttgacccaatcgaaaaatcaccaaatgataataacaatgaaactctcagc
+tcttctgagaaactcgaactcatgatcagaaacgcgctctgatcgaacat
+tcatactctaacacattcctcttcacatctccagatgattattcatgtcc
+atcattttatcattaaatatctcattctatactctcttctcgctttattg
+tttctcgctcactccctacccaccattacataacatctctgaaatttcaa
+agttttgacattcttggctgtgccttttcctctcaatgatattttccaaa
+ttccattatttttcccccgcctttgattgcttttattggttacttgttta
+ttggtatacttctcggtattctttttaatgttctgcatgtttcatatggt
+ataaattgcatattacgttagacacaaattactgcaaactaaactcagtt
+tgtctcaatggaaattcgtcgaaacacatggtgtcaagctgtcccattac
+ggtttgatttacaaaaaatgtagatcaaaacaaaatcggacagcccgaaa
+ctaggtgtaaatatacttataagaattcaaaaagaccgaataacataata
+aaacattcctaagaattttagattttctaaaatttccagtcatagttttg
+gcaacttgccgaatttttaaaaagtatgagcttttgagaggatgcagaat
+gtttttacacaaataattaaaaaaaaaaggaaagcataaaaattttagaa
+tttttttttcggtagacttccaaagttatgagttacaaaaaatgagtaat
+tgtcgctttttgacagtgcatttaaaaacatgcaatttaaaaaaaaactg
+tcagctaaggtgccgactgtcagtgacagtctgtcacttgttggtaattt
+tttaatagattctagcttacattggtattattctgttctaaattatttgc
+tcaaatgaatcatcattctcgctgtgttgtcattttatggtacttgtatt
+attatcattatttagataatgaatatacacatattggatagaacattttc
+caaaatcagaacaaagcctcattacaaacttcaaatttcatttttcaaaa
+ctttgaatagaaaataaatttgggtaaatagtcagtaatcacccaatccc
+ttaacataatatccacattatcgagctagtgaagctgtttctctggcagt
+gtccaacccacttcttcgtcttcacctcattccttttcaactccgcccct
+taaaggaagtactcgtccatagcgcataggaacaggcaagcgataatgtc
+tgtgtctctatattttcacgcactgtctagtgccgcatccgtatcctcta
+ggacaccggtgccgtggcgtctataaaagagagtacgggtgtcttacgca
+gttcgtatccgattttcagtccagtgtccaaggaagacaagccgaatgtc
+ccatataaaccgcattcttatctattcacccattccaccattatgttctt
+gctttgttcccatttctcgtttttcgttttttttctcttaaactttagat
+attactatgttaataacccattattttaggcagtcacaacctaaaaatga
+tggagacttcggagcacaaagagctccgacgtgtggcgtttttcgccatt
+gttgtatctactgtagctgttattgcagctattgtaattcttccaatgct
+ctattcatatgttgctggtttccagagccatcttatcattgaagctgatt
+tctgtaaggttagtataaaacaaacatacgtattctattatacaagcaac
+gcttttttagactcggtctcgtgacatgtgggcccaaatccatgacatag
+atggaccacacctattccatcgtcagaagcgtcaatactcttcaccaaac
+ccaccagctgccggtggatatggagctccagttacgaactccgagccagc
+tccaacttgctgctcttgccaacaaggaccagccggaccaccaggaccac
+ctggagatgacggaaatggcgggcaagatggtgttcgtggaaacgatgga
+actgacggaaaggaaggaagccttttggaaagtgctattgtaaatgaacc
+atgcattatctgccctccaggaccaccaggaccacaaggaatggcaggag
+ctaaggggccacaaggaccaaagggaggaaatggagataatggaccagat
+ggaaaggctggagccaacggaatgcaaggaccaccaggaatgatgggccc
+accaggaagacaaggagtaagtggaccaaagggagctccaggacgtatca
+atcaaatcaatggaccagctggaccagctggacataagggagtccgcgga
+ccaccaggaccacgcggagaagctggacttgatggtggaaactctgaagg
+accacaaggaccacaaggagatgctggaagaccaggaccagttggagagc
+aaggaccacaaggaccagaggtatatttatttttattcgatatattcaag
+gctttacatttattaatttcagggaccacaaggaccaccaggagaaccag
+gaggctgtgagcattgcccaattccaagaacaccaccaggatattgaacc
+tgtactttttctcattaatttcgaattcatccgcccaaataattgggtgt
+ttacaatacaatgaattttttcattttaattcacagattataaattgcaa
+aatttttcagtatttgtcttattattactggtacagagagtgtagatagt
+tagagagtgccaggcatccgggacccaatggggcacatcaaaggctccca
+tcgatcgatatgcctaacatgttgaaaaccgattaaaacctcacgtttga
+atcccctctaaaaactgaatgtgtgccaacacagcgtcattgacgcattt
+acggtgtcttgacgcgatacgcgttttcaatacgaggcaaactcaaattt
+attattttcattttcaaaatatcaatttgttgaaaactagcaactactac
+tcatctcttcactcgtcattatggttaaattgcgcgatgaacagaagaca
+gagcttataaaacaggaaaaaaagcgccgacgcattgctcggttacgaca
+ggtatcgatttttctttcagttctcaaaaatattaattattattgacttc
+aggttcgccagcaaagtgcagcaaatgcaaagattacaagagacgtggtg
+aatcaacggaagcaagagttcattcaagaaattaggttgactttttcaaa
+tttaaaattataattgaaaatttatattttcagggaagaattgcacgagc
+aggtggatgcgctaatgacagaagtaactgaaaaatcgttaaaattacct
+gtctccgcaaaaaggaaaacttctacgcctcgctcacttgtaagtgttcg
+ttaaaatgatatttgcaaaaaaaaaccacgaattttcagaaaaagtcttc
+tcgttgtcgtgaaatgacggaaagcgatctggagttggcgaaaaaaagaa
+atgcagatgcaatgaagcatcttcgagaagcgaagaaaaagaaggaaaag
+gagcaagaggaaaagttggcaaagaagaaagaagccgcacgaaaagctaa
+tgcaattatgagaggagctcagatattgtaataataaacttttttttcat
+ttatgagtatatattgagatcaaacaaagaaagtgacaagaaatcgatat
+ttttaaacacaaaaaaaattaaaattgaattcctaccgatcacaaatggg
+cgaaagttagatgaaattagttttcacaagtgtatcggttgcggccccca
+tagtttattcttcgtggtggtcgttcacaaacgtcaaacgtcaatttcaa
+gcaagaaatttcattttcataaagaagccatggcagatagtcgtctctga
+aatattttacaaatttttgaatttcttttccattgaaaagttgttatttt
+ccgctgaaaaattcaatttttaaaaaaaaaacaacattatttgcaaaaat
+taaacttttttatttgaaataaatttttttctgaaaatttgaaaaatgca
+aaaaattcaaaaatttaaaatttgaaacaaattttttttcgaaaatttca
+aatttccgtgtaaaattaaatgaaacatgttatttttcattgaaaaatta
+tattttgcgcgtagagcatgttgaattggagcacacttaagtgtgctcca
+aatttgctatttttttcttctagatgcaccggtgcaccatgttaaaaatg
+cacttttttggcaaaggggaactaatcgatacattttttaaaattagata
+ctgtgctaaaattaaactttttatttgaataaatttgacattacaaaaaa
+aaaattttaaaattttaaaattgaaaaaaaagtttttgaaattttttttt
+gaaaattttaaattttcgcgtaaaattatatgaaaaatatgtttttttct
+attaaaaaaaaccgttttgtgcgcgaagtatggcgaattggggcacactt
+tattattgtcacgatgtaccatgtttaaaatgtaaaaatcgatttgcata
+ataaaggtggagtagagtcttttaagaattttgattttaataaattaggc
+tgtagggactgaatataactataaacaatttaatacaaaatttctgaatt
+tttatgatttttccaatttcgcaaaaattcaaaaaactagtcttactatt
+tttgaattcccacgcaaattaatgatcattgttggtttttcttgtttttt
+tttctttaatattcaatttgatgtttcagttcaccaaagtttcaagacat
+ttctagctaattagcaatattaaagaaaaaacaagacaaaccaataatca
+ttaatttgcgtgggaattcaaaaataatttaatatgaaaatgactcactt
+atgccacaaatttttcctatattctatgtaccactggctaaacttgtcaa
+attggccattattaacatcccaggtacgacgacgctcctccagctccttg
+gctacggtctcttcaatttgttcttcttttagtactcgttttcttaaaac
+tgatccagttgcgttgaaggctaatcggattttggcaattactgagccct
+aaacgatttctaatttaattttcccgaatttttagaactgacgtatccaa
+cttgttgaagacttcctagggcactcttataatctcctctatcaccttga
+aggaaatccatctcacaatcaccatccgcatcaaatgctgaccaatcatt
+gaaacagaactcctaaattttcaagtcaccattttaataagtaatttttg
+aaaaagacagttggagaaattcaaaatctcacaattatggattttttttc
+gaaagcttgcagtttgcagaaaaattgctggacaatttcttgatcatttt
+caagaaatttattatcttaaaattcacatcaaatctctttgaattaagag
+gtgggcggcaaacgatttttccggcaaatcggcaaattgtcgaaattgaa
+atttccggcaaactgtcggaattggaatttcggtcaaaatcgatttgccg
+aatttgccgaaaattatcggaaaattgtgattttgtacttttttcttgga
+aatttcagaatttcaattttaatcggcaaaattgtacacatcctataaat
+gttgctacatctattctgaacagtaagcaaattatatgatattattaaag
+aaaacgtgaaaaaattttcaaaaaagcacagttttaagtttttccgtctt
+ttaaaaaatccctcgaaacatttccgacacatggcaaatcgacaatttgc
+caaaaatgaaaatgaacggcaaaacgaacggcaattgccgcccacccctg
+cttcaattttttaagtgtatacctttctgaaagtataaaacccaggtcta
+tgcggacaattgatcccacgcaattgcccagccaaactattagattggtc
+ttcttcattctgctccaaattggtcaaagattcacatacatcacaaaaat
+aacaattgtttccatagccaccacatccattattactcgcgttttgacac
+ttcatgttggaaaagtcctttacattccaccactgatattccgctttcgc
+aaaaaaactattgtcaagcgcttctggtagatctttaactttgacattct
+taatttccaaagtaaagcatcctgggaatcgaacaacgtctggctttata
+tgaacttggctcgggttaatatccaatattttttttgctccgggtcgtgc
+ttaaaaatagcttctttgtgaatcaaacaattatggataacaaaaaatgg
+aaaagtacatactgcaacttgcgtcagcccttccgggaaccagccaaatg
+tcctgatagttgcatttgtggttgagcttcttcctagcatcggtgtgtct
+gactccattcctatactttgcacgacatgcaggatccaatatatcgcagt
+caagttgggcaaaaatcgataatataaatgagaaaataatcagaatggat
+atgttgaaggtgtaattcgaagatcgcatactaaatgtgaatatgtgtat
+gtttaaaatggaattttcaatacaattcgaataaaaggaaagtaaaatat
+ttgtagaagcaaatcaatttctaatgataattgcgtcacagtgtgtcctc
+taaagtgtcttctttcttgtctgcattttctcttatgcctctctttgtaa
+caagattggccgtacatgcccctcttccgacgcacctaacctgttgacca
+tggagaccttggaggtcatcattttttttgtgcgaaatttggcatttagc
+aagagaagggatcccttatgggagagaaataaagagtaaagataggcaat
+tatgctttgtaccacgagaaaaaaaaactcatttgacacatgaaaatctt
+ttgaaaatggaacaatcttatactattcagtacatgtgctccatgtcgta
+aagcggttttttacagtttcttgaatgaaatctcacgtggtgtcaggctg
+tcccttcgctctactgcacatgaaaacttatgacgtcacagcgaacttcg
+aataattgtttttcaatttaagagccgattttcgtgaaattgttttatca
+ttttttgaagcaaaatgcaataaaaacacaattttattttaaacattata
+tttaaaaattatgaaaatcgaagcttaaattgaaaatcaattattcgaaa
+tgcgatgtgacgtcacaattattgcaaaaaaacatttttcccatctttcg
+tgtgcagtcaagcgatgggacaacttgacaccacgtgactatacgagttg
+gtgattagaatttcaaatacaaaaacaatttaggaaaatactctgaaatt
+aggaactttagcaaaaagaaacattttaaaactattgctcgaggagtaca
+cgagctgtggaaatcgacatattttccaatttattattacggcaacaaaa
+aattctgatacttaatgcatattgcacatcatatttgacgcgcaaaatat
+ctgtgtagcgaaaactacagtgactatttaaatgactactggagatcttg
+cgtcgattttcaaaagaatttctcagtgacagcgatattacattttcctt
+cgttttttttttgtattactgtctcatttaaattaaataatatattgctt
+tcaattcattaacagaaaatcgaacccgtaaatcgacacaagagctacag
+tagtcatttaaagagatactgtaattttcgttacgagatatttagcgcgt
+caaatatgttgtggaatacgcattctcaaaattttgtgtatatcccgtaa
+taattgcaaaaatacacttcaattttaagaaaatttgaaagttgttcata
+aattggcggcattttttttttgagaatcctacagacaaaaaaaacgtgaa
+attttaaagaagcggtgtcggtgaatagagacgaagagggattaggagat
+aaaaataattgatcgaagcagcagaaaaaccgattttcttcattttcttg
+cccatctctctctctgtctctttttatttcgaaaaaagacagtcagactg
+gatttgagtggaatagaggggaaagggagggacagttctacgaagcgaaa
+cgaaaacgattatccatctctgttggcagtctcatatggaaaaggttgaa
+aattgaaaatattcgggaacaagaagaacacaataatatttatttcgaca
+tacaaaaggattatatttcttttttcaacgaagaaaaagaagttgaagac
+aagaagagagagagttagtactctgacacgaaaagggtaaaacttacctg
+aaaaggaaattggaaaataggggggggggggggggggaataaaataatga
+ataaataaaaaacatttaatgctccatagatctatcgattttcgactgat
+gtttatgcttttgattgaatgattcgatgaactgtgaagatgattgtccg
+tttgactcggccttatatttttccggtaccgggacattgaggatttcagg
+taacatccatggtttaagccatccgactagaggtgattgtccaggttggt
+atacaaccgaattatgatagaagaggagtccatcgagacggaaactggaa
+caaatattaaaggtgtagtagggaaatttgactttttgagggaatttcag
+ggaatttgagtgcatttgcagacccaaaacggtccaaaactaccaagtta
+aattaaacgttgtgaaaatttctcaaaaaaatgttacagtatttcttcta
+tattctattcgaaaattaaattttttgaaattttgaaactttttgaaaca
+aagcaaaaagcgagaatttgaataaaccgatttcgcaatttttggttttt
+ttatttattattacctgatctcgtttttcataagctccgccatctgatct
+tgggagcacggacagctgggaatcggtgagaaaatattcctgaagccagg
+cgtagatttcgctagttcaggagcctcttcaagtttagactttagcataa
+attggcggaaatcgtatggactctcaacatattcgtgtgcattccatgaa
+agaagatcgagcacatagtatgtctgattcgagtagatgcagtcaagaat
+tgtccaagcctgatctataaagtgagcacattgaaatcgattttttattt
+atttctaaaaacacacatacttttagcacgagtatttccaccaggaaggc
+gagactgaaatcgcgatacttctcttccgcctttgttatatgcaactgtg
+aagccctgaaatacaagattatcccgggtttttaataattaaatcattaa
+aaacaaactcgtgatgcaacgacaagtgtccgtttgcccactggagccat
+aaccatcgtccagtcggaagaaagtgattccggaatatccacaagccact
+cactcaacatcattttgtcggcatatctttaaaaattgaaatatagtttt
+tttttagtttaagaattcgaacttttaggaaattacttactttttaaatt
+ttccagtgtgtttttcagcagtagttgtctcaattttttgttcatcgtct
+tcgtcagaagtaacgtcatcaaaagctaaatttcgtagtttcataaatgt
+gtcaaagcggccatttttttgccgctcaagcgtttcttcacgacgctttg
+cttgttgttctgcagcttttgtaagatttttgtactgcgaatacctcgga
+tgttcacctgaaaaaatcaatgtttactttttgtgtttaaaaaaggagtc
+tcaccaagagctagaggatcgacctgaaatccactgctaagctggtctgt
+caacgaatctaaatcgtcccccatttgtgatactggaaaaaataatttta
+aattagtttaatcaaacttttaactaaatgagtataagaaaaagctttta
+caacaattaaaaaactgtaaaagtcgaaaaagttgctctgttgatttacg
+ataaacatctctcgttgcgggatagtggagtgtcgttgggggaaaatatt
+ttttcgatttcgaaatcgatttcaacacattcacagcgagaaaaaacaat
+tgcatataatcgattttttatattatggcctgtgtagacatatcaccact
+atcatatgaatctgtaaaatccgttgtcaaatatttgagcctgaaaaaaa
+ggtatactatttaatattgatatttaaaaaaatactggttcattaaccgc
+acggttaattaaccagtatttattacgggaacacaaaattctgagaatac
+gtattgcgcaacatgtttatatctcgtagcgaaaaccacagtaattttgt
+aaatgactactgaacgcgtaaatcgccaccagcgctacagtagtcattta
+aagaattactgtgattttcgctacgagatattttgcgcgtcaaatatgtt
+gtgcaatacgcattctcagaattttgtgttcatgtaatttgaacacattg
+aaaaacaaaacattttcagggagcaaattaatttatcgattccggcatat
+cataaaacagccaaacttgcacctttcaaactagaaaacgttgggttatc
+agatttgtacattgaagtcaatgatagaacgtggactttgagtgaaactg
+acatcattggttgcactcttgttttgtatcgctcaaacggctacaacttc
+cagcacattatttcaataactcctgaagttgcgttccagaagcttatatc
+cgaatatttgagaaatggaacacgtattaaaaaatttcacttgtatgccc
+ttccaagtctgctcaaaaacgttgagcttcatgtgtcagagctgaaaatt
+gacggatgcgactatgaaacatttgcaaagttccgtaaattcattgatca
+taaccaaaaaatattaaaggagattgggtttgtggtgcgtcagaatactt
+tatggatgtttgatgaacaattggtacagaacacttcaagctttccgcct
+tcaacttttgataaatttcaggtgaaaaacagtaaggagatttatctcca
+gtttccgtattataatccggtcccgttggacgatatgcttcttcggctga
+aaaacaatcatgtacacattatctatctgggcttcccgattgaaaaacta
+caagaattggctatggtgagaaattttgtaatattatactttggccaagt
+ttggtgcggatctttttgattttccactcagaaatacggtacccggtcct
+gtcacgaacagaacaataaggtctgcgcttttaagtacgtggtgtcagag
+tgccccatattgatttgatctacgcagatctatgagaatcgcgggaattt
+agatgcagatttcttaactggttttgaatggttaagaacgtgctaacgtg
+aaattttttggaaaaaaattcccgcattttttgtcgatcaaaccgcaatg
+agacagccggacaccatgtgtttaagtaaagcacagtatttaacccacta
+ctagttaaaaaaaaaatataattcagaactggatcgacactaagaagcca
+attggtacctcactcttcctggttgttgagcattatacatatgttatcga
+cgtgtttgagcatttaaccaaatactctaaagctattccttcaagactca
+ctgatcttgggtaaggtcttagaactctataatatacaaataagcttgac
+tcattcagaacttccttcttttcacattgtgtaacaatgcatattgatga
+aagttctgaattggtgctatttggaggtccaatggtaattgaatggaaac
+cgttcaaatggacgttgagaatgacgatcatgaaacgcggatcgacaatt
+tcaaaatgatttggaatcctccaaaattctgattttcctcaataaacatt
+ttaatattttatgttgcaatgtgaatacaattttccttctctgtcaggat
+actccagaaggaatgtatggaatcttgatgttcagaacatcagcatccct
+tgatcaaacatcgaatctgttattacactaaagtatgtgttcagtgaaag
+tgctgtaaaagatgattaacgtcctcaagaagcaaatgctggccgatgac
+agtttttttgaagattcagctaatttcattagatattctagttgattcaa
+tatcttgaagaaattttttcgcgagatgatgatgttctttcaagttaaaa
+ttttgataagtttacgttgtcaaaaataacaacagaaaactacagctggg
+agtaacattgcatataaataacataagtattaatttataattacactata
+aaatagttttaaaattgcctcagaacatgtaaacagtctaaataattact
+ttttagattattgtagataacaagaaaacaagtacaaaatcagtaagtat
+catgtcattacacatttgctagaaccagtattttgaacacacttgcggac
+acaggtcaacacacttgttcttagcacccgcctacagcttagatgttccc
+tttaaaaggaagtacttacgtcgctacaagggcagatttctccctacttg
+gctatatctctttgttcaatgattaaacgtcttctctgctttctcttttt
+tggcttaactttttcagctccggtgccattacccctagaagaagatttta
+atgttttccatacaaaacaggaaaaccatggtacgttttctcatcttttt
+tgaaatttgtatcaattaacagagcaaagattccttcgtgttgatcacaa
+tagagacaaagtagtctcattcgacgaatttcttcacatggaacttgcat
+atgttgatgcaaagaaagaagagtttgatacgttagataagaaccgtaag
+tgatgtttaaagcgttgagaaattagcaaactactttagatgacgggaaa
+gtgagccttgccgagtacgaagagcatttccacgaagcctcaagcaagaa
+tgaaaaatctcggaccgcatactttgccaaagttttcgaagatttcgatg
+aagatttcaatatggcattgagtcgtgaagaacttgaacgtgtgcttgct
+gaacgattcttagtcaaaccaagagaaaactttcccaaattgtttttcaa
+gtttgacgtagacaagtccggaggacttgatctgactggtgagctaagtg
+ctcgttttatacatctcgaagtttttttttacagaatacatgaaatttga
+tgctgaattcccattcgatcaaaccgatccagttggtggaggaccatcca
+aatctaacaatcaccacgatcaaatgcacacagaagtccctcaagatgct
+gacgctgctgcaattgcagccgtgttggcacaagcttcaccaacactgaa
+taagagtccagtcggtgttgctccaccccaaacgtcacctggattgcatc
+cagtggcaccagggttattccagacggcggtgccaatcaaaaaagtttga
+ttctaacagtaacttttacaattgtaatttttaattttctctaatactgg
+tatcacaaatcactgtcattcaccatatcaattgtttttgtaaataaagc
+tttatagttaaaatattatatacaaaaatgcattgcacaaaaggctgaaa
+tcagtgaactttagtatcagaggttctcgacataatctcattatgtacat
+caatcttactcaatttttgaccttctttgctgtgacggataccataacaa
+atgtagattacaatgccaatcgccaaccatacaaaaagacgaatccaagt
+cattgagtttagatacaccatcatgaagacattgatgagtagaccaaggc
+atggaatgaatggaacaagtggcaccttaaacttttgaacttattatttt
+ttgtaaaatgtttctgttcaccttatacgtagatgttgacttattctgtt
+catggcccaaaataaaaacgaatgataacagtgaaaaggcagcaccgact
+gtcaataaaataataccaccagcattgctaaatattcctgttctgaatgg
+aattgccaggcaaatataaccaaaaattaatcctgcgacagctacgcgaa
+ttgaaatcccctctgagaaattttcccaaacaccttgaaatggtacccac
+gacttcaggcatccgccttaaaatttttattttgttaattttgctaaaca
+cgtgtgtgttgctttataaaatcttaccattatcataatcagttgcacta
+ccatctacaagatgggattgatgccgcaaaataataacacagattgagac
+cattgaataagccaacagagttccaattgacaaaaagtcgacaagcgcct
+gcagatcgaagaccagtgccaaaattgcgtttattatagtaaacacaata
+gttgcattgagaggggtttttgtttttgagtttatcaccccaaaccaccc
+gaagataagaccatcatcagccattgcatatacagctcgtggaagagcga
+atgatccagtaactagattgttcaacatacccgctagtgcaccaacactc
+attataatcttggcaacagttgctcctttcatttcgaatgcagcagcaaa
+cgcggcatcaggatcaactaagtcgtatggtatcatcagcgtcaatgagg
+cgcccataagtacatatatgacacttataattgccaaagaagtgaaagta
+gccagtgggattgttctatgtgggtttttagcttcttctccagcagtggc
+taaagcttcaaatccgataaatgcgaaaaagcatgtggaagcgccagaaa
+tagctccttgtattccatacgggaaaaattttgaacgaccatcctggtat
+gttcctgaccagagtgaaaagtctgcataggttagtccacaaataatcac
+aaacgccaaaacctgaatgaacattttgtaataggttttgacactttaac
+ataccgccagatttaaaaatacaaaactagtattcacgttggcagagaac
+tttgatcccattgcaaccgccactgctacaagaaacaataggaagaacgc
+taaaaagtcagggtacagtgcaaaaaatccttttccctgtaagttgaaac
+gcgtattagcatatctgattcaaaaatggcacgaacatcactcaatcgtc
+caacagtatccaatgtccagttggacacacttttagatacaagattgtcg
+aaataagcagaccacgaacgagcgacagcggcgtttccaatcatgtactc
+tagtggtactgtccatccgacaatgaatgcccaaatttcacccattccta
+cataactgtatgtgtatgcacttccagctcggggaaacctggaagtttat
+cagtttaaacatgtaaagttaaaaaataataaaataaaccttgctccaaa
+ttcagcatagctgaaagctgacaaaagtgcagcaaagccagagaaaataa
+aagatagtataattgctggtcctgcttgatttcgaacaacggatcccgtg
+agtacgtatattccagctccaatcatgtgtccaatcgcaatgaacattac
+gtcaagaattgttaaacatcgtttcatttgtgattcaagatgtgatcctc
+cgtcaaatgtctttttacgaaacaagacatcggctatttgatgtaccttc
+atggttgttttttctgaaattatatatttctgaaggtgaaaaaactattc
+atacaagaaagattaaaaatatgaaaaattcggtatacttttatatggta
+atattgttttagttttaagataaggcttctggaattgtgaatcaaaaatg

Added: trunk/packages/bioperl/branches/upstream/current/t/data/directives.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/directives.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/directives.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+##gff-version 3
+##sequence-region foo
+##feature-ontology bar
+##attribute-ontology baz
+##source-ontology boo
+###
+##FASTA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dmel_2Lchunk.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dmel_2Lchunk.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dmel_2Lchunk.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,456 @@
+LOCUS       2L:14000-19000          5001 bp    dna     linear   UNK 
+DEFINITION  THIS IS TEST DATA FOR THE UNFLATTENER - ERRONEOUS CDS RECORDS
+ACCESSION   unknown
+KEYWORDS    .
+SOURCE      fruit fly
+  ORGANISM  Drosophila melanogaster
+            Eukaryota; Metazoa; Arthropoda; Insecta; Pterygota; Neoptera;
+            Endopterygota; Diptera; Brachycera; Muscomorpha; Ephydroidea;
+            Drosophilidae; Drosophila.
+FEATURES             Location/Qualifiers
+     source          1..5001
+                     /location="14000..19000"
+                     /chromosome="2L"
+                     /type="tile"
+                     /organism="Drosophila melanogaster"
+                     /mol_type="genomic dna"
+                     /md5checksum="d53c791f329d9bb3953522c37087626e"
+     gene            complement(-4163..7373)
+                     /gene="l(2)gl"
+                     /comment="person=bmatthew; date=Tue May 14 09:00:55 PDT
+                     2002; text=Only one EST supports this alternative
+                     transcript"
+                     /dbxref="GO:0007391"
+                     /dbxref="GO:0005918"
+                     /dbxref="GO:0016082"
+                     /dbxref="GO:0005938"
+                     /dbxref="GO:0005578"
+                     /dbxref="GO:0017022"
+                     /dbxref="GO:0016327"
+                     /dbxref="GO:0045200"
+                     /dbxref="GO:0007559"
+                     /dbxref="GO:0008021"
+                     /dbxref="GO:0030154"
+                     /dbxref="GO:0007179"
+                     /dbxref="GO:0005856"
+                     /dbxref="GO:0002009"
+                     /dbxref="GO:0045184"
+                     /dbxref="GO:0005737"
+                     /dbxref="GO:0007269"
+                     /dbxref="GO:0008360"
+                     /dbxref="FlyBase:FBgn0002121"
+                     /dbxref="GO:0019991"
+                     /dbxref="GO:0016336"
+                     /dbxref="GO:0016335"
+                     /dbxref="GO:0016334"
+                     /dbxref="GO:0016333"
+                     /dbxref="flybase:FBan0002671"
+                     /dbxref="GO:0016332"
+                     /dbxref="GO:0005829"
+                     /dbxref="GO:0008283"
+                     /dbxref="GO:0045159"
+                     /dbxref="GO:0007399"
+                     /dbxref="GO:0005614"
+                     /dbxref="GO:0005886"
+                     /dbxref="GO:0045197"
+                     /dbxref="GO:0045175"
+                     /dbxref="GO:0045196"
+                     /dbxref="GO:0042127"
+                     /gbunit="AE003590"
+                     /sp_status="Perfect match to SwissProt real
+                     (computational)"
+                     /cyto_range="21A2-21A2"
+                     /standard_name="CG2671"
+     mRNA            complement(-4163..7373)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RB"
+                     /product_desc="CG2671-RB transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5154
+                     transcript_boundaries:(2L:9,836..21,372[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1712),complement(5886..5945))
+                     /translation="MLKFIRGKGQQPSADRQRYITAQHGFPHKPSALAYDPVLKLMAI
+                     GTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYGTGRILSLTAANQLILWE
+                     PVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGNIYQLDLHTFTIKEPVIY
+                     HDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLCVLWDFESASVQRAYIAP
+                     GHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVNYVPYGPDPCKSINRLYK
+                     GKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFTSKVIDFFVTFENNRDVA
+                     EVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNYLASEVVQSVYESILRAG
+                     DEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTGHEDGSVKFWDCTGVLLK
+                     PIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSGLFDPYSDDPRLAVKKIA
+                     FCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSDRDGFVWKGHDQLNVRSN
+                     LLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVSGGTAHGLVLFDFKNFVP
+                     VFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRSTRTNQSNQVPTTLEARP
+                     VERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSATNASTVSVFLLHLPPAQ
+                     TAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGISIFDQAGSPVDQLNAGEN
+                     GSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRRIHFGSFSCRISPETLQS
+                     MHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALICLTNMGDIMVLSVPELKR
+                     QLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALATSRVVQPTGVVPVEPLE
+                     NEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRPAEENVGRNSVQQVNGVN
+                     ISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTEETIGRLSVLSTQTNKAS
+                     TTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITNISHEKTNGDNKIGTPKT
+                     APEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PB"
+                     /sp_comment="Imperfect match to REAL SP with
+                     corresponding FBgn"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:05:05 PDT 2002"
+                     /protein_id="AAG22256"
+                     /codon_start=5943
+                     /timestamp="1021392305000"
+                     /standard_name="CG2671-RB"
+                     /product_desc="CG2671-PB translation from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3  mol_weight=125897
+                     cds_boundaries:(2L:11218..19944[-])  aa_length:1153
+                     transcript_info:[CG2671-RB  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-])  (0 ""reduction of
+                     virulence"")]"
+     mRNA            complement(-4163..7373)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RD"
+                     /product_desc="CG2671-RD transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5229
+                     transcript_boundaries:(2L:9,836..21,372[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1649))
+                     /translation="MAIGTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYG
+                     TGRILSLTAANQLILWEPVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGN
+                     IYQLDLHTFTIKEPVIYHDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLC
+                     VLWDFESASVQRAYIAPGHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVN
+                     YVPYGPDPCKSINRLYKGKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFT
+                     SKVIDFFVTFENNRDVAEVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNY
+                     LASEVVQSVYESILRAGDEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTG
+                     HEDGSVKFWDCTGVLLKPIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSG
+                     LFDPYSDDPRLAVKKIAFCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSD
+                     RDGFVWKGHDQLNVRSNLLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVS
+                     GGTAHGLVLFDFKNFVPVFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRS
+                     TRTNQSNQVPTTLEARPVERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSA
+                     TNASTVSVFLLHLPPAQTAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGIS
+                     IFDQAGSPVDQLNAGENGSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRR
+                     IHFGSFSCRISPETLQSMHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALIC
+                     LTNMGDIMVLSVPELKRQLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALA
+                     TSRVVQPTGVVPVEPLENEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRP
+                     AEENVGRNSVQQVNGVNISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTE
+                     ETIGRLSVLSTQTNKASTTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITN
+                     ISHEKTNGDNKIGTPKTAPEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PD"
+                     /sp_comment="Imperfect match to REAL SP with
+                     corresponding FBgn"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:06:14 PDT 2002"
+                     /protein_id="AAN10501"
+                     /codon_start=1647
+                     /timestamp="1021392374000"
+                     /standard_name="CG2671-RD"
+                     /product_desc="CG2671-PD translation from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3  mol_weight=121250
+                     cds_boundaries:(2L:11218..15648[-])  aa_length:1112
+                     transcript_info:[CG2671-RD  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-])  (0 ""reduction of
+                     virulence"")]"
+     mRNA            complement(-4163..7373)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RE"
+                     /product_desc="CG2671-RE transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5224
+                     transcript_boundaries:(2L:9,836..21,372[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1649))
+                     /translation="MAIGTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYG
+                     TGRILSLTAANQLILWEPVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGN
+                     IYQLDLHTFTIKEPVIYHDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLC
+                     VLWDFESASVQRAYIAPGHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVN
+                     YVPYGPDPCKSINRLYKGKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFT
+                     SKVIDFFVTFENNRDVAEVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNY
+                     LASEVVQSVYESILRAGDEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTG
+                     HEDGSVKFWDCTGVLLKPIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSG
+                     LFDPYSDDPRLAVKKIAFCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSD
+                     RDGFVWKGHDQLNVRSNLLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVS
+                     GGTAHGLVLFDFKNFVPVFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRS
+                     TRTNQSNQVPTTLEARPVERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSA
+                     TNASTVSVFLLHLPPAQTAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGIS
+                     IFDQAGSPVDQLNAGENGSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRR
+                     IHFGSFSCRISPETLQSMHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALIC
+                     LTNMGDIMVLSVPELKRQLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALA
+                     TSRVVQPTGVVPVEPLENEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRP
+                     AEENVGRNSVQQVNGVNISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTE
+                     ETIGRLSVLSTQTNKASTTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITN
+                     ISHEKTNGDNKIGTPKTAPEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PE"
+                     /sp_comment="Imperfect match to REAL SP with
+                     corresponding FBgn"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:02:48 PDT 2002"
+                     /protein_id="AAN10502"
+                     /codon_start=1647
+                     /timestamp="1021392168000"
+                     /standard_name="CG2671-RE"
+                     /product_desc="CG2671-PE translation from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3  mol_weight=121250
+                     cds_boundaries:(2L:11218..15648[-])  aa_length:1112
+                     transcript_info:[CG2671-RE  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-])  (0 ""reduction of
+                     virulence"")]"
+     mRNA            complement(-4163..7373)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RF"
+                     /product_desc="CG2671-RF transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5081
+                     transcript_boundaries:(2L:9,836..21,372[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1649))
+                     /translation="MAIGTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYG
+                     TGRILSLTAANQLILWEPVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGN
+                     IYQLDLHTFTIKEPVIYHDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLC
+                     VLWDFESASVQRAYIAPGHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVN
+                     YVPYGPDPCKSINRLYKGKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFT
+                     SKVIDFFVTFENNRDVAEVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNY
+                     LASEVVQSVYESILRAGDEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTG
+                     HEDGSVKFWDCTGVLLKPIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSG
+                     LFDPYSDDPRLAVKKIAFCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSD
+                     RDGFVWKGHDQLNVRSNLLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVS
+                     GGTAHGLVLFDFKNFVPVFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRS
+                     TRTNQSNQVPTTLEARPVERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSA
+                     TNASTVSVFLLHLPPAQTAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGIS
+                     IFDQAGSPVDQLNAGENGSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRR
+                     IHFGSFSCRISPETLQSMHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALIC
+                     LTNMGDIMVLSVPELKRQLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALA
+                     TSRVVQPTGVVPVEPLENEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRP
+                     AEENVGRNSVQQVNGVNISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTE
+                     ETIGRLSVLSTQTNKASTTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITN
+                     ISHEKTNGDNKIGTPKTAPEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PF"
+                     /sp_comment="Imperfect match to REAL SP with
+                     corresponding FBgn"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:02:52 PDT 2002"
+                     /protein_id="AAN10503"
+                     /codon_start=1647
+                     /timestamp="1021392172000"
+                     /standard_name="CG2671-RF"
+                     /product_desc="CG2671-PF translation from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3  mol_weight=121250
+                     cds_boundaries:(2L:11218..15648[-])  aa_length:1112
+                     transcript_info:[CG2671-RF  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-])  (0 ""reduction of
+                     virulence"")]"
+     mRNA            complement(-4163..4584)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RC"
+                     /product_desc="CG2671-RC transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5264
+                     transcript_boundaries:(2L:9,836..18,583[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1712),complement(3054..3137))
+                     /translation="MLKFIRGKGQQPSADRHRLQKDLFAYRKTAQHGFPHKPSALAYD
+                     PVLKLMAIGTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYGTGRILSLTA
+                     ANQLILWEPVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGNIYQLDLHTF
+                     TIKEPVIYHDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLCVLWDFESAS
+                     VQRAYIAPGHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVNYVPYGPDPC
+                     KSINRLYKGKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFTSKVIDFFVT
+                     FENNRDVAEVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNYLASEVVQSV
+                     YESILRAGDEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTGHEDGSVKFW
+                     DCTGVLLKPIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSGLFDPYSDDP
+                     RLAVKKIAFCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSDRDGFVWKGH
+                     DQLNVRSNLLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVSGGTAHGLVL
+                     FDFKNFVPVFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRSTRTNQSNQV
+                     PTTLEARPVERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSATNASTVSVF
+                     LLHLPPAQTAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGISIFDQAGSPV
+                     DQLNAGENGSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRRIHFGSFSCR
+                     ISPETLQSMHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALICLTNMGDIMV
+                     LSVPELKRQLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALATSRVVQPTG
+                     VVPVEPLENEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRPAEENVGRNS
+                     VQQVNGVNISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTEETIGRLSVL
+                     STQTNKASTTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITNISHEKTNGD
+                     NKIGTPKTAPEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PC"
+                     /sp_comment="Perfect match to REAL SP with corresponding
+                     FBgn"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:02:56 PDT 2002"
+                     /protein_id="AAF51570"
+                     /codon_start=3135
+                     /timestamp="1021392176000"
+                     /standard_name="CG2671-RC"
+                     /product_desc="CG2671-PC translation from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3  mol_weight=126893
+                     cds_boundaries:(2L:11218..17136[-])  aa_length:1161
+                     transcript_info:[CG2671-RC  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-])  (0 ""reduction of
+                     virulence"")]"
+     mRNA            complement(-4163..4584)
+                     /gene="l(2)gl"
+                     /standard_name="CG2671-RA"
+                     /product_desc="CG2671-RA transcript from_gene[CG2671
+                     l(2)gl FBgn0002121] seq_release:3 mRNA_length:5407
+                     transcript_boundaries:(2L:9,836..18,583[-])
+                     cyto:21A2-21A2 gene_info:[CG2671 gene symbol:l(2)gl
+                     FBgn0002121 (0 ""reduction of virulence"")]"
+     CDS             join(complement(-2781..-2655),complement(-2589..-2481),
+                     complement(-2220..-1778),complement(-1713..-1071),
+                     complement(-479..-374),complement(-316..875),
+                     complement(934..1712),complement(3054..3137))
+                     /translation="MLKFIRGKGQQPSADRHRLQKDLFAYRKTAQHGFPHKPSALAYD
+                     PVLKLMAIGTQTGALKVFGQPGVELYGQHTLLNNSASELNVQLLEWVYGTGRILSLTA
+                     ANQLILWEPVGATLLPIKTLPFDGKLKKVSSLCCSLSKDLLWIGTEGGNIYQLDLHTF
+                     TIKEPVIYHDVVLEQVPPAYKLNPGAIESIRQLPNSPSKLLVAYNRGLCVLWDFESAS
+                     VQRAYIAPGHGQSVGLTVNFEGSEFTWYHADGSYATWSIDNPEPPSNVNYVPYGPDPC
+                     KSINRLYKGKRRSNDVIVFSGGMPRSAYGDHNCVSVHASDGHKVCLDFTSKVIDFFVT
+                     FENNRDVAEVLVVLLEEELCAYDLTDPNICAIKAPYLHSVHASAVTCNYLASEVVQSV
+                     YESILRAGDEQDIDYSNISWPITGGTLPDNLEESVEEDATKLYEILLTGHEDGSVKFW
+                     DCTGVLLKPIYNFKTSSIFGSESDFRDDAAADMSAEQVDEGEPPFRKSGLFDPYSDDP
+                     RLAVKKIAFCPKTGQLIVGGTAGQIVIADFIDLPEKVSLKYISMNLVSDRDGFVWKGH
+                     DQLNVRSNLLDGEAIPTTERGVNISGVLQVLPPASITCMALEASWGLVSGGTAHGLVL
+                     FDFKNFVPVFHRCTLNPNDLTGAGEQLSRRKSFKKSLRESFRKLRKGRSTRTNQSNQV
+                     PTTLEARPVERQIEARCADDGLGSMVRCLLFAKTYVTNVNITSPTLWSATNASTVSVF
+                     LLHLPPAQTAATAVPSASGNAPPHMPRRISAQLAKEIQLKHRAPVVGISIFDQAGSPV
+                     DQLNAGENGSPPHRVLIASEEQFKVFSLPQLKPINKYKLTANEGARIRRIHFGSFSCR
+                     ISPETLQSMHGCSPTKSTRSHGDGEADPNISGSLAVSRGDVYNETALICLTNMGDIMV
+                     LSVPELKRQLNAAAVRREDINGVSSLCFTNSGEALYMMSSSELQRIALATSRVVQPTG
+                     VVPVEPLENEESVLEENDAENNKETYACDEVVNTYEIKNPSGISICTRPAEENVGRNS
+                     VQQVNGVNISNSPNQANETISSSIGDITVDSVRDHLNMTTTTLCSINTEETIGRLSVL
+                     STQTNKASTTVNMSEIPNINISNLEDLESKRNTTETSTSSVVIKSIITNISHEKTNGD
+                     NKIGTPKTAPEESQF"
+                     /status="all done"
+                     /gene="l(2)gl"
+                     /product="CG2671-PA"
+                     /sp_comment="Perfect match to REAL SP with corresponding
+                     FBgn"
+                     /comment="person=bmatthew; date=Tue May 14 08:53:50 PDT
+                     2002; text=Only one EST supports this alternative
+                     transcript"
+                     /author="bmatthew"
+                     /date="Tue May 14 09:03:00 PDT 2002"
+                     /protein_id="AAG22255"
+                     /codon_start=3135
+                     /timestamp="1021392180000"
+                     /standard_name="CG2671-RA"
+                     /product_desc="CG2671-PA translation from_gene[CG2671
+                     l(2)gl FBgn0002121]seq_release:3  mol_weight=126893
+                     cds_boundaries:(2L:11218..17136[-])  aa_length:1161
+                     transcript_info:[CG2671-RA  seq_release:3]
+                     gene_info:[gene symbol:l(2)gl FBgn0002121
+                     gene_boundaries:(2L:9836..21372[-]) (0 ""reduction of
+                     virulence"")]"
+BASE COUNT     1613 a    930 c    879 g   1579 t
+ORIGIN
+        1 cttctccgtc taataagttc gatcgcacgt ttaactgatc gtgacccttc cacacaaatc
+       61 catcacgatc gctgaccaaa ttcattgaaa tgtattttaa agacactttt tcgggtaagt
+      121 ctatgaagtc ggctataact atttggcccg ctgtgccacc aacaataagt tgtccggttt
+      181 ttgggcagaa tgctattttc ttcactgcta aacgagggtc atctgaataa ggatcaaaaa
+      241 gtcctgattt ccgaaatggc ggttctcctt catcgacttg ttccgcactc atatctgcag
+      301 ctgcgtcatc tcggaagtct gactcacttc caaaaatgct gctagtttta aaattataaa
+      361 ttggtttaag caacactcca gtgcagtccc aaaatttaac agaaccatct tcgtgaccag
+      421 ttaacaaaat ctcataaagc ttagtcgcgt cctcttctac agattcttct aagttatccg
+      481 ggagagtacc gccagtgata ggccagctaa tattgctata gtcaatgtct tgttcatctc
+      541 cagctcttaa aatactttca tataccgact gtacgacttc agaagcaagg taattgcaag
+      601 tgacagctga tgcatggaca gagtgaagat atggcgcttt gatagcacaa atattagggt
+      661 cagtaagatc gtaagcgcag agttcctctt caagtagtac aacaagaact tcagcgacat
+      721 ctctattatt ttcaaaggtc acaaaaaagt caatcacttt agacgtaaag tcaagacaca
+      781 ctttgtgtcc atcgctggcg tgaacggaca cacaattgtg atcaccatat gctgaccgtg
+      841 gcatgccgcc ggaaaaaaca attacatcgt tggatctaaa agaacgagaa attagtacac
+      901 atctaaaaaa ctttgtgttt aagttatatt aaccttcgct tgcctttgta cagtcgattt
+      961 atgcttttgc atggatcagg tccataaggc acataattaa cattcgacgg cggttctggg
+     1021 ttatctatgc tccaagtggc gtatgaacca tcagcgtggt accaggtaaa ttcagatcct
+     1081 tcgaagttca ctgtaagacc aacgctctgt ccatgtccag gggctatgta tgctcgctgg
+     1141 acagatgcgc tttcaaaatc ccacaaaaca caaaggccgc gattgtatgc aactagaagt
+     1201 ttgctagggg agtttggaag ttggcggatt gactcaattg caccaggatt tagcttgtag
+     1261 gctggtggca cctgctctag cacaacgtca tggtaaatta caggctcctt aatggtaaat
+     1321 gtatgtaaat ccagttgata gatgtttcca ccttctgttc caatccatag cagatcctta
+     1381 ctgagagaac agcacagcga tgaaactttt ttaagtttgc cgtcaaacgg tagtgttttg
+     1441 attggcagca acgttgctcc aactggctcc catagaatta attgattcgc tgccgtcaac
+     1501 gaaagtatgc gaccagttcc atacacccat tcaagtaatt gtacattaag ctccgatgct
+     1561 gaattgttta acaaagtatg ctgaccgtac aattcaactc cgggttgacc gaaaactttt
+     1621 aaagcccctg tttgcgtccc tattgccata agtttcaaaa ctggatcata cgcaagagcc
+     1681 gaaggcttat gaggaaagcc atgctgtgcc gtctaaaaga aaaaaatatt aaaaagtttc
+     1741 aaaaagcaat tacagattct taataattca aaattgtttg gttcatttct taattgtaaa
+     1801 agggttttcc aaaacattct agttttttca gttcccaaga tctggtaaat cgtcagatta
+     1861 cttgccactt tatagagtcg ggaacgcttt cttaaaccat ttacatactt ttaccgaatc
+     1921 taatacagca tttttcttta caattaacgg gaataaaaac ctcaataaat attgaggttt
+     1981 gaatttaaat taagttcaaa aagcgcccaa agctttaaat gctgctctta agtatgtctc
+     2041 ttataaacat cactgtagac agcagtccct actatcatac tatatttaca ttgatcgaaa
+     2101 aaaggtttgg taaatcttat ctaattgtat aaattgtatt tttatacaaa aaaagtttta
+     2161 agaagttgct atcagatctc gttatgagtt gattttgttt gattcatatt gcaggtgtat
+     2221 ttggatttct aacgcagtta caggcgaata aattcttttt cttttgactt taaaatctta
+     2281 tcaattttac gccgtctatg acatacttat tttgtgcctg aaataagggt gtttgtgaaa
+     2341 ccaagccatt tacccaaccc attaagcaca taaaagcaat cttatcagct cccccctctt
+     2401 tctggagctg gatggcgatt acctctgacc tacttttatt tgtttaataa catgtttata
+     2461 ctaacatggg ataaaaaaaa ttcttataaa atttcatata cgtatattta ttaaactatt
+     2521 atggtaatag ggaaacatct atggtatatt taattcgaaa atatttaaaa ggaatggaca
+     2581 acattacgag cacaacgata taatagaatt tttaaagtag ccaaaaacag gcggtggaaa
+     2641 aagacactca caaaatcaac aaagcttcag gtacctcctc atatttggca ccgcatagaa
+     2701 ggagtgcaca aaagaaacga gcaaaaaagc accgtaacct tttttatgcg gtgctttgtg
+     2761 gcgcgagtct acaaaatact gtgtgtgtat gcggtttact tgtagttgcg gtggaaaaaa
+     2821 tttacctaaa tgttaatgcc caaaagtatg cacataggag aagacataac aaaagattga
+     2881 aaaggaggta tttatttatt cttaataaat tataagcttt cttctaactt ccataaaaat
+     2941 gtatttacca cccaggatca aaataaatga caccccacaa aatataccaa tcggctacat
+     3001 atacgaaaaa aaaaaaataa aatagaatgg aaagagaaaa ttgtttgttg taccttacga
+     3061 taagcaaaaa ggtccttctg taggcggtgt ctgtcagcac tgggctgctg cccttttcct
+     3121 ctgataaact ttaacataat tggttatgct cttgtggtgg gttggtttcg atgaataaat
+     3181 atgtatacta attgtggccg tatttattct aaactgaaaa taataaataa aaattaatca
+     3241 cattttcaat aagtaaaaaa ttaaaaagga acttgtatat tttttcactc ttatgaataa
+     3301 aaccgagaaa ttaaatttag gtaatactat taataccttt tatagatacg gacgatacgt
+     3361 ttaatattat tctggtatac tttaatcaaa atttgtttaa caaaaagggt tttacattct
+     3421 tggtcatagg aaattcagtg gacagataga tcttggctcc caagattttc caaactagtt
+     3481 gtgcttattt tcttaaaact tcaaagattt gcatgtaggc agtttaataa acatgtttgt
+     3541 acatatgtat atgtacatta ggaaaaatga gatgcaagat gagcgatctg tactctttgc
+     3601 aatgtgtatg tatgtacata tgtacctccc tttaatgaac tagcaattcc gtcacgcaca
+     3661 cgaaatattt ttttatttgt aaatcttttt ttaagtttgt ttaaacaatt tagttactat
+     3721 taaagctctg aaattcataa ataaataata ataaatgtat taattatgac aaaaaacaac
+     3781 ttaaaacata tacgtttatt tttgtaaaag cgttttaggc aaacgctagc tggtcggtac
+     3841 taatattttt tcgaagactg tataccgtat acatacattt atgtacgtag gtatgtatcc
+     3901 gtgagtaagc tgaggtgcaa aaccgtatca ccatttgctt ttcggctgat aaccaatacc
+     3961 aagtaagtca actgtttccc aaatgtatct gcataacaag gtgatgacct aattttgagt
+     4021 acataccgat ctttacagtt gaccagcttt gcgattgcat accggtatct gtttattcac
+     4081 atatacacac acaaattgcg tgttagacag acagcaaaat gacgactgtt aactgttggc
+     4141 gtggctgact caccgactac gcctctatgc taagaacata catatttgtg gacactttat
+     4201 ttaagaagtt tgaaatataa atcaattgcc taatcagtaa tactgttgtt gagcccttta
+     4261 cctttatgta tttccgtttg tacggttaag gcggtggccg agtaattttt tgaactattt
+     4321 tatttgctca ccaacacgca taagtttgta tgtattctta tgttgagaaa cgcccgcact
+     4381 gttcttcaca tcgttgcttt aaatgagaat aaatgaaata aacgtgtcaa ccaagaacac
+     4441 acagataata atacgaccgg tcagagctta accgtgtatc tgttttataa aacgtgaaca
+     4501 atattattag cccaaaacga tatgcgcgtc attttaacac acacaaagtc gcgatcggtg
+     4561 ggtctagtgt gcccgtgtat ctctatcgaa aaaatcatat attttttaga aggtatattt
+     4621 tccatgactg actggaatgg gtggagaaca tcgctttggg agtgaaactc cgctataaat
+     4681 tgtcatttgt ttttatagca acctttttga tatggtcgcc aacttttata taattataat
+     4741 aataattttt gtaattaatc accgctgtcg cacgaaaaat ttaggaaatg tgccaacgag
+     4801 cggaattttt cggaacgatc gatgttttac cataacctgc tatataaatt gtgaaccaca
+     4861 ttttcaacac aaaactagat gtgcttcatt tataacgcaa tgtcgtgatt ttgtgggcag
+     4921 tgtgaccatg tccaatctgc gttcaagtaa aaccatattt agtgacacaa actttccatt
+     4981 acaaaatacc aaacaacttt a
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dna1.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dna1.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dna1.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4 @@
+>Test1
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
+TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dna2.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dna2.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dna2.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+>Test1
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
+TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
+ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG
+>Test2
+CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA
+GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
+AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG
+AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub-prot.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub-prot.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub-prot.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+>gi|142865|gb|AAA22406.1| DNA primase
+MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNV
+FSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARPESSGEQKMAEAHELLKKFYHHLLINTKEGQE
+ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFP
+IHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSD
+VKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELLQKKGCKVRVAMIPDGLDPDDYI
+KKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQLASEF
+SLSQESLTEQLSVFSKQNKPADNSGETKTRRAHLTTKARQKRLRPAYENAERLLLAHMLRDRSVIKKVID
+RVGFQFNIDEHRALAAYLYAFYEEGAELTPQHLMARVTDDHISQLLSDILMLQVNQELSEAELSDYVKKV
+LNQRNWSMIKEKEAERAEAERQKDFLRAASLAQEIVTLNRSLK

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dnaE-bsub.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,30 @@
+>gi|142864|gb|M10040.1|BACDNAE B.subtilis dnaE gene encoding DNA primase, complete cds
+GTACGACGGAGTGTTATAAGATGGGAAATCGGATACCAGATGAAATTGTGGATCAGGTGCAAAAGTCGGC
+AGATATCGTTGAAGTCATAGGTGATTATGTTCAATTAAAGAAGCAAGGCCGAAACTACTTTGGACTCTGT
+CCTTTTCATGGAGAAAGCACACCTTCGTTTTCCGTATCGCCCGACAAACAGATTTTTCATTGCTTTGGCT
+GCGGAGCGGGCGGCAATGTTTTCTCTTTTTTAAGGCAGATGGAAGGCTATTCTTTTGCCGAGTCGGTTTC
+TCACCTTGCTGACAAATACCAAATTGATTTTCCAGATGATATAACAGTCCATTCCGGAGCCCGGCCAGAG
+TCTTCTGGAGAACAAAAAATGGCTGAGGCACATGAGCTCCTGAAGAAATTTTACCATCATTTGTTAATAA
+ATACAAAAGAAGGTCAAGAGGCACTGGATTATCTGCTTTCTAGGGGCTTTACGAAAGAGCTGATTAATGA
+ATTTCAGATTGGCTATGCTCTTGATTCTTGGGACTTTATCACGAAATTCCTTGTAAAGAGGGGATTTAGT
+GAGGCGCAAATGGAAAAAGCGGGTCTCCTGATCAGACGCGAAGACGGAAGCGGATATTTCGACCGCTTCA
+GAAACCGTGTCATGTTTCCGATCCATGATCATCACGGGGCTGTTGTTGCTTTCTCAGGCAGGGCTCTTGG
+CAGCCAGCAGCCTAAGTATATGAACAGTCCTGAAACCCCGCTCTTTCATAAAAGCAAACTGCTTTACAAT
+TTTTATAAGGCCCGCCTTCATATCAGAAAGCAGGAAAGAGCAGTCTTATTTGAAGGGTTTGCTGATGTCT
+ATACGGCCGTAAGCTCGGATGTAAAGGAAAGCATAGCCACGATGGGAACGTCTCTTACAGATGATCATGT
+CAAGATCCTGAGAAGAAACGTCGAAGAAATCATTCTTTGCTATGACTCTGATAAAGCCGGTTATGAAGCC
+ACCTTAAAAGCTTCGGAGCTTCTGCAAAAAAAAGGCTGCAAAGTCAGAGTTGCAATGATTCCTGACGGAT
+TGGACCCTGATGATTACATCAAAAAATTCGGCGGGGAAAAATTTAAAAACGACATTATTGACGCAAGTGT
+CACCGTAATGGCGTTCAAAATGCAATATTTCCGAAAAGGAAAGAACCTGTCCGATGAAGGCGACCGCCTA
+GCTTACATTAAAGACGTACTGAAAGAAATCAGCACGCTTTCAGGGTCTCTAGAGCAGGAAGTCTATGTAA
+AGCAGCTTGCTTCAGAGTTTTCGCTTTCACAGGAGTCTTTAACTGAGCAGCTGTCTGTTTTCAGCAAGCA
+AAACAAACCTGCTGACAATAGCGGTGAAACTAAAACGCGGCGAGCGCATCTGACGACAAAAGCAAGGCAA
+AAACGTTTGCGTCCGGCGTATGAAAATGCAGAAAGGCTGTTACTCGCTCACATGCTTCGAGATCGGAGCG
+TCATCAAAAAAGTGATTGACCGGGTAGGGTTTCAATTTAATATTGATGAGCACCGGGCATTAGCCGCTTA
+TCTTTATGCTTTTTATGAAGAGGGAGCCGAGCTGACGCCTCAGCATCTGATGGCCAGGGTGACGGATGAT
+CATATAAGCCAGCTCTTGTCCGATATATTAATGCTTCAGGTTAATCAAGAGCTTAGCGAAGCCGAGTTAT
+CAGATTATGTAAAAAAAGTGTTGAATCAAAGAAATTGGTCAATGATAAAAGAAAAAGAGGCGGAAAGAGC
+CGAAGCAGAAAGGCAAAAAGATTTTTTAAGAGCTGCTTCTTTGGCTCAAGAAATCGTTACATTGAACCGA
+TCTTTAAAATAACTGGAGAACTGATGAGGAGCATTTATTGGCAATGATTCCTTGCGGAGGAGCAAATAGA
+TCGCTTAACCTCATCATGAATTGTCATTTCATTATTCGCACATTGTTAAAGGCAGTTCACATAGAAAACG
+CCTGAATGGACCGAATAAGAATCATACCGCTTATAGAATTC

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wublastx
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wublastx	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wublastx	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,145 @@
+BLASTX 2.0MP-WashU [12-Feb-2001] [linux-i686 01:36:08 31-Jan-2001]
+
+Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2000) http://blast.wustl.edu
+Gish, Warren and David J. States (1993).  Identification of protein coding
+regions by database similarity search.  Nat. Genet. 3:266-72.
+
+Notice:  statistical significance is estimated under the assumption that the
+equivalent of one entire reading frame in the query sequence codes for protein
+and that significant alignments will involve only coding reading frames.
+
+Query=  gi|142864|gb|M10040.1|BACDNAE B.subtilis dnaE gene encoding DNA
+    primase, complete cds
+        (2001 letters)
+
+  Translating both strands of query sequence in all 6 reading frames
+
+Database:  ecoli.aa
+           4289 sequences; 1,358,990 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+gi|1789447|gb|AAC76102.1| (AE000388) DNA biosynthesis; DN...   671  1.1e-74   1
+
+
+
+>gi|1789447|gb|AAC76102.1| (AE000388) DNA biosynthesis; DNA primase
+            [Escherichia coli]
+        Length = 581
+
+  Plus Strand HSPs:
+
+ Score = 671 (265.8 bits), Expect = 1.1e-74, P = 1.1e-74
+ Identities = 151/421 (35%), Positives = 223/421 (52%), Frame = +3
+
+Query:    21 MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFH 200
+             M  RIP   ++ +    DIV++I   V+LKKQG+N+   CPFH E TPSF+V+ +KQ +H
+Sbjct:     1 MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYH 60
+
+Query:   201 CFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKM 371
+             CFGCGA GN   FL   +   F E+V  LA  + ++ P +    +G+ P   E    Q +
+Sbjct:    61 CFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTL 116
+
+Query:   372 AEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKR 551
+              +  + L  FY   L        A  YL  RG + E+I  F IG+A   WD + K     
+Sbjct:   117 YQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGN 175
+
+Query:   552 GFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSP 731
+               +   +  AG+L+  + G  Y DRFR RVMFPI D  G V+ F GR LG+  PKY+NSP
+Sbjct:   176 PENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSP 234
+
+Query:   732 ETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHV 911
+             ET +FHK + LY  Y+A+    +  R ++ EG+ DV       +  ++A++GTS T DH+
+Sbjct:   235 ETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHI 294
+
+Query:   912 KILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFG 1082
+             ++L R    +I CYD D+AG +A  +A E        G ++R   +PDG DPD  ++K G
+Sbjct:   295 QLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEG 354
+
+Query:  1083 GEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVK 1262
+              E F+  + + ++ + AF         +LS    R       L  IS + G   + +Y++
+Sbjct:   355 KEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLR 412
+
+Query:  1263 Q 1265
+             Q
+Sbjct:   413 Q 413
+
+
+Parameters:
+  novalidctxok
+  nonnegok
+  gapall
+  Q=12
+  R=1
+  cpus=1
+  filter=seg
+  matrix=blosum62
+  W=3
+  S2=41
+  gapS2=68
+  X=16
+  gapX=38
+  hitdist=40
+  gi
+  gapL=0.27
+  gapK=0.047
+  gapH=0.23
+
+  ctxfactor=5.99
+  E=10
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +3      0   blosum62        0.318   0.135   0.401    0.324   0.139   0.405  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+   +2      0   blosum62        0.318   0.135   0.401    0.365   0.163   0.618  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+   +1      0   blosum62        0.318   0.135   0.401    0.356   0.155   0.528  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+   -1      0   blosum62        0.318   0.135   0.401    0.350   0.155   0.543  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+   -2      0   blosum62        0.318   0.135   0.401    0.350   0.155   0.505  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+   -3      0   blosum62        0.318   0.135   0.401    0.358   0.157   0.543  
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W   T  X   E2     S2
+   +3      0      666       666       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+   +2      0      666       666       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+   +1      0      667       667       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+   -1      0      667       667       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+   -2      0      666       666       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+   -3      0      666       666       10.  59 3  12 16  0.021   41
+                                                    38  0.0     59
+
+
+Statistics:
+
+  Database:  /home/jes12/db/ecoli.aa
+   Title:  ecoli.aa
+   Posted:  2:52:35 PM EST Nov 18, 2001
+   Created:  9:46:47 AM EST Nov 18, 2001
+   Format:  XDF-1
+   # of letters in database:  1,358,990
+   # of sequences in database:  4289
+   # of database sequences satisfying E:  1
+  No. of states in DFA:  600 (64 KB)
+  Total size of DFA:  655 KB (1283 KB)
+  Time to generate neighborhood:  0.04u 0.01s 0.05t  Elapsed: 00:00:00
+  No. of threads or processors used:  1
+  Search cpu time:  0.44u 0.01s 0.45t  Elapsed: 00:00:01
+  Total cpu time:  0.48u 0.02s 0.50t  Elapsed: 00:00:01
+  Start:  Sat Apr 20 14:39:05 2002   End:  Sat Apr 20 14:39:06 2002

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+TBLASTN 2.0MP-WashU [12-Feb-2001] [linux-i686 01:36:08 31-Jan-2001]
+
+Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2000) http://blast.wustl.edu
+
+Notice:  statistical significance is estimated under the assumption that the
+equivalent of one entire reading frame of the database codes for protein and
+that significant alignments will involve only coding reading frames.
+
+Query=  gi|142865|gb|AAA22406.1| DNA primase
+        (603 letters)
+
+Database:  ecoli.nt
+           400 sequences; 4,662,239 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                     Reading  High  Probability
+Sequences producing High-scoring Segment Pairs:        Frame Score  P(N)      N
+
+gi|1789441|gb|AE000388.1|AE000388 Escherichia coli K-1... +2   671  1.4e-73   1
+
+
+
+>gi|1789441|gb|AE000388.1|AE000388 Escherichia coli K-12 MG1655 section 278 of
+            400 of the complete genome
+        Length = 10,334
+
+  Plus Strand HSPs:
+
+ Score = 671 (265.8 bits), Expect = 1.4e-73, P = 1.4e-73
+ Identities = 151/421 (35%), Positives = 223/421 (52%), Frame = +2
+
+Query:     1 MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFH 60
+             M  RIP   ++ +    DIV++I   V+LKKQG+N+   CPFH E TPSF+V+ +KQ +H
+Sbjct:  4778 MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYH 4957
+
+Query:    61 CFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKM 117
+             CFGCGA GN   FL   +   F E+V  LA  + ++ P +    +G+ P   E    Q +
+Sbjct:  4958 CFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTL 5125
+
+Query:   118 AEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKR 177
+              +  + L  FY   L        A  YL  RG + E+I  F IG+A   WD + K     
+Sbjct:  5126 YQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGN 5302
+
+Query:   178 GFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSP 237
+               +   +  AG+L+  + G  Y DRFR RVMFPI D  G V+ F GR LG+  PKY+NSP
+Sbjct:  5303 PENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSP 5479
+
+Query:   238 ETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHV 297
+             ET +FHK + LY  Y+A+    +  R ++ EG+ DV       +  ++A++GTS T DH+
+Sbjct:  5480 ETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHI 5659
+
+Query:   298 KILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFG 354
+             ++L R    +I CYD D+AG +A  +A E        G ++R   +PDG DPD  ++K G
+Sbjct:  5660 QLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEG 5839
+
+Query:   355 GEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVK 414
+              E F+  + + ++ + AF         +LS    R       L  IS + G   + +Y++
+Sbjct:  5840 KEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLR 6013
+
+Query:   415 Q 415
+             Q
+Sbjct:  6014 Q 6016
+
+
+Parameters:
+  novalidctxok
+  nonnegok
+  gapall
+  Q=12
+  R=1
+  cpus=1
+  filter=seg
+  matrix=blosum62
+  W=3
+  S2=41
+  gapS2=68
+  X=16
+  gapX=38
+  hitdist=40
+  gi
+  gapL=0.27
+  gapK=0.047
+  gapH=0.23
+
+  ctxfactor=6.00
+  E=10
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +0      0   blosum62        0.320   0.136   0.387    same    same    same
+               Q=12,R=1        0.270   0.0470  0.230     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W   T  X   E2     S2
+   +0      0      603       603       10.  59 3  13 16  0.020   41
+                                                    38  0.0     59
+
+
+Statistics:
+
+  Database:  /home/jes12/db/ecoli.nt
+   Title:  ecoli.nt
+   Posted:  3:16:20 PM EST Nov 18, 2001
+   Created:  10:10:31 AM EST Nov 18, 2001
+   Format:  XDF-1
+   # of letters in database:  4,662,239
+   # of sequences in database:  400
+   # of database sequences satisfying E:  1
+  No. of states in DFA:  538 (57 KB)
+  Total size of DFA:  76 KB (1061 KB)
+  Time to generate neighborhood:  0.00u 0.01s 0.01t  Elapsed: 00:00:00
+  No. of threads or processors used:  1
+  Search cpu time:  0.46u 0.01s 0.47t  Elapsed: 00:00:01
+  Total cpu time:  0.46u 0.03s 0.49t  Elapsed: 00:00:03
+  Start:  Sat Apr 20 18:21:51 2002   End:  Sat Apr 20 18:21:54 2002

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastx
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastx	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dnaEbsub_ecoli.wutblastx	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,226 @@
+TBLASTX 2.0MP-WashU [12-Feb-2001] [linux-i686 01:36:08 31-Jan-2001]
+
+Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2000) http://blast.wustl.edu
+
+Notice:  statistical significance is estimated under the assumption that the
+equivalent of one entire reading frame of the query sequence and one entire
+reading frame of the database code for protein and that significant alignments
+will only involve coding reading frames.
+
+Query=  gi|142864|gb|M10040.1|BACDNAE B.subtilis dnaE gene encoding DNA
+    primase, complete cds
+        (2001 letters)
+
+  Translating both strands of query sequence in all 6 reading frames
+
+Database:  ecoli.nt
+           400 sequences; 4,662,239 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                     Reading  High  Probability
+Sequences producing High-scoring Segment Pairs:        Frame Score  P(N)      N
+
+gi|1789441|gb|AE000388.1|AE000388 Escherichia coli K-1... +2   318  6.4e-70   5
+gi|2367383|gb|AE000509.1|AE000509 Escherichia coli K-1... -3    59  0.9992    2
+
+
+
+>gi|1789441|gb|AE000388.1|AE000388 Escherichia coli K-12 MG1655 section 278 of
+            400 of the complete genome
+        Length = 10,334
+
+  Plus Strand HSPs:
+
+ Score = 318 (148.6 bits), Expect = 6.4e-70, Sum P(5) = 6.4e-70
+ Identities = 57/127 (44%), Positives = 81/127 (63%), Frame = +3 / +2
+
+Query:   618 FDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIR 797
+             +DRFR RVMFPI D  G V+ F GR LG+  PKY+NSPET +FHK + LY  Y+A+    
+Sbjct:  5366 YDRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNA 5545
+
+Query:   798 KQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYE 977
+             +  R ++ EG+ DV       +  ++A++GTS T DH+++L R    +I CYD D+AG +
+Sbjct:  5546 EPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRD 5725
+
+Query:   978 ATLKASE 998
+             A  +A E
+Sbjct:  5726 AAWRALE 5746
+
+ Score = 247 (116.1 bits), Expect = 6.4e-70, Sum P(5) = 6.4e-70
+ Identities = 44/98 (44%), Positives = 61/98 (62%), Frame = +3 / +2
+
+Query:    21 MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFH 200
+             M  RIP   ++ +    DIV++I   V+LKKQG+N+   CPFH E TPSF+V+ +KQ +H
+Sbjct:  4778 MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYH 4957
+
+Query:   201 CFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFP 314
+             CFGCGA GN   FL   +   F E+V  LA  + ++ P
+Sbjct:  4958 CFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVP 5071
+
+ Score = 85 (41.8 bits), Expect = 6.4e-70, Sum P(5) = 6.4e-70
+ Identities = 19/59 (32%), Positives = 28/59 (47%), Frame = +3 / +2
+
+Query:   441 ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGY 617
+             A  YL  RG + E+I  F IG+A   WD + K       +   +  AG+L+  + G  Y
+Sbjct:  5192 ARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY 5368
+
+ Score = 65 (32.7 bits), Expect = 6.4e-70, Sum P(5) = 6.4e-70
+ Identities = 12/28 (42%), Positives = 18/28 (64%), Frame = +3 / +2
+
+Query:  1014 GCKVRVAMIPDGLDPDDYIKKFGGEKFK 1097
+             G ++R   +PDG DPD  ++K G E F+
+Sbjct:  5771 GRQLRFMFLPDGEDPDTLVRKEGKEAFE 5854
+
+ Score = 48 (24.9 bits), Expect = 0.89, Sum P(4) = 0.59
+ Identities = 15/49 (30%), Positives = 21/49 (42%), Frame = +3 / +2
+
+Query:   639 VMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKAR 785
+             V F  HD    V A   R+  +  P Y     T LFH+ K    F++ +
+Sbjct:  4046 VRFYAHDRGRPVQARRRRSQRAISPDYTIGWLTNLFHRRKWRCRFHRRK 4192
+
+ Score = 46 (24.0 bits), Expect = 1.5, Sum P(4) = 0.78
+ Identities = 8/17 (47%), Positives = 10/17 (58%), Frame = +3 / +2
+
+Query:   150 GESTPSFSVSPDKQIFH 200
+             G  TP +  SP+  IFH
+Sbjct:  5447 GNDTPKYLNSPETDIFH 5497
+
+ Score = 42 (22.1 bits), Expect = 6.4e-70, Sum P(5) = 6.4e-70
+ Identities = 6/16 (37%), Positives = 10/16 (62%), Frame = +1 / +2
+
+Query:  1129 WRSKCNISEKERTCPM 1176
+             W S   + +K+R CP+
+Sbjct:  7151 WNSTIVLKQKKRVCPI 7198
+
+ Score = 42 (22.1 bits), Expect = 2.0e-42, Sum P(5) = 2.0e-42
+ Identities = 6/18 (33%), Positives = 14/18 (77%), Frame = +3 / +2
+
+Query:    21 MGNRIPDEIVDQVQKSAD 74
+             +  R+PD++VD++++  D
+Sbjct:   206 ISTRMPDDVVDKLKQLKD 259
+
+  Minus Strand HSPs:
+
+ Score = 102 (49.6 bits), Expect = 4.9e-08, Sum P(3) = 4.9e-08
+ Identities = 32/83 (38%), Positives = 42/83 (50%), Frame = -3 / -3
+
+Query:   319 SSGKSIWYLSAR*ETDSAKE*PSICLKKEKTLPPAPQPKQ*KICLSGDTENEGVLSP*KG 140
+             S+G S   ++A   T S     S  ++K   LP AP PKQ   C S  T  +GV S   G
+Sbjct:  5076 SNGTSRLCIAASSSTVSTNSSLS*FIRKSIALPCAPHPKQW*NCFSPLTVKDGVFSLWNG 4897
+
+Query:   139 QSPK*FRPCFFN*T*SPMTSTIS 71
+             Q    F PCFF+ T + + ST+S
+Sbjct:  4896 QHAWKFLPCFFSFTRASIRSTMS 4828
+
+ Score = 71 (35.4 bits), Expect = 4.9e-08, Sum P(3) = 4.9e-08
+ Identities = 18/35 (51%), Positives = 20/35 (57%), Frame = -3 / -3
+
+Query:   724 FIYLGCWLPRALPEKATTAP**SWIGNMTRFLKRS 620
+             F YLG  LP   P K  T P  S +GN+TR  KRS
+Sbjct:  5472 FRYLGVSLPSTRPPKPITRPRLSRMGNITRSRKRS 5368
+
+ Score = 65 (32.7 bits), Expect = 0.0034, Sum P(3) = 0.0034
+ Identities = 18/71 (25%), Positives = 34/71 (47%), Frame = -2 / -2
+
+Query:   233 ENIAARSAAKAMKNLFVGRYGKRRCAFSMKRTESKVVSALLL*LNIITYDFNDICRLLHL 54
+             + +A R+ +KA+  LF+   G+ R    ++ T    + ALL  L+      +D+     +
+Sbjct:  4990 DRVAVRATSKAVVKLFLTVNGEGRGFLVVEWTTRVEILALLFQLHTGIDQIDDVSACQQI 4811
+
+Query:    53 IHNFIWYPISH 21
+             I+ + W   SH
+Sbjct:  4810 INEYAWDSSSH 4778
+
+ Score = 42 (22.1 bits), Expect = 4.9e-08, Sum P(3) = 4.9e-08
+ Identities = 10/18 (55%), Positives = 12/18 (66%), Frame = -3 / -3
+
+Query:  1093 NFSPPNFLM*SSGSNPSG 1040
+             N S P+F    SGS+PSG
+Sbjct:  5850 NASLPSFRTSVSGSSPSG 5797
+
+
+>gi|2367383|gb|AE000509.1|AE000509 Escherichia coli K-12 MG1655 section 399 of
+            400 of the complete genome
+        Length = 10,589
+
+  Minus Strand HSPs:
+
+ Score = 59 (29.9 bits), Expect = 7.1, Sum P(2) = 1.00
+ Identities = 15/40 (37%), Positives = 19/40 (47%), Frame = -3 / -3
+
+Query:   706 WLPRALPEKATTAP**SWIGNMTRFLKRSKYPLPSSRLIR 587
+             WL R     +T +P   WI  M   L  SK  LPS++  R
+Sbjct:  4227 WLSRTTVGSSTVSPRTFWITRMKVKLSSSKVTLPSTKSTR 4108
+
+ Score = 51 (26.3 bits), Expect = 7.1, Sum P(2) = 1.00
+ Identities = 8/15 (53%), Positives = 10/15 (66%), Frame = -3 / -1
+
+Query:   382 CASAIFCSPEDSGRA 338
+             C S + C P+D GRA
+Sbjct:  1097 CTSLMLCRPQDGGRA 1053
+
+
+Parameters:
+  novalidctxok
+  nonnegok
+  gapall
+  Q=12
+  R=1
+  cpus=1
+  filter=seg
+  nogaps
+  matrix=blosum62
+  W=3
+  S2=41
+  gapS2=68
+  X=16
+  gapX=38
+  hitdist=40
+  gi
+  gapL=0.27
+  gapK=0.047
+  gapH=0.23
+
+  ctxfactor=36.0
+  E=10
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +3      0   blosum62        0.318   0.135   0.401    0.324   0.139   0.405  
+   +2      0   blosum62        0.318   0.135   0.401    0.365   0.163   0.618  
+   +1      0   blosum62        0.318   0.135   0.401    0.356   0.155   0.528  
+   -1      0   blosum62        0.318   0.135   0.401    0.350   0.155   0.543  
+   -2      0   blosum62        0.318   0.135   0.401    0.350   0.155   0.505  
+   -3      0   blosum62        0.318   0.135   0.401    0.358   0.157   0.543  
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W   T  X   E2     S2
+   +3      0      666       666       10.  54 3  13 16  0.021   41
+   +2      0      666       666       10.  54 3  13 16  0.021   41
+   +1      0      667       667       10.  54 3  13 16  0.021   41
+   -1      0      667       667       10.  54 3  13 16  0.021   41
+   -2      0      666       666       10.  54 3  13 16  0.021   41
+   -3      0      666       666       10.  54 3  13 16  0.021   41
+
+
+Statistics:
+
+  Database:  /home/jes12/db/ecoli.nt
+   Title:  ecoli.nt
+   Posted:  3:16:20 PM EST Nov 18, 2001
+   Created:  10:10:31 AM EST Nov 18, 2001
+   Format:  XDF-1
+   # of letters in database:  4,662,239
+   # of sequences in database:  400
+   # of database sequences satisfying E:  2
+  No. of states in DFA:  598 (64 KB)
+  Total size of DFA:  416 KB (1196 KB)
+  Time to generate neighborhood:  0.02u 0.02s 0.04t  Elapsed: 00:00:00
+  No. of threads or processors used:  1
+  Search cpu time:  2.41u 0.01s 2.42t  Elapsed: 00:00:05
+  Total cpu time:  2.44u 0.03s 2.47t  Elapsed: 00:00:07
+  Start:  Sat Apr 20 18:19:43 2002   End:  Sat Apr 20 18:19:50 2002

Added: trunk/packages/bioperl/branches/upstream/current/t/data/dq519393.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/dq519393.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/dq519393.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,45 @@
+LOCUS       DQ519393                 748 bp    RNA     linear   VRL 04-JUL-2006
+DEFINITION  Hepatitis delta virus genotype I delta antigen gene, complete cds.
+ACCESSION   DQ519393
+VERSION     DQ519393.1  GI:109726906
+KEYWORDS    .
+SOURCE      Hepatitis delta virus
+  ORGANISM  Hepatitis delta virus
+            Viruses; Deltavirus.
+REFERENCE   1  (bases 1 to 748)
+  AUTHORS   Wang,T.-C. and Chao,M.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (27-APR-2006) Microbiology and Immunology, Chang Gung
+            Unversity, 259, Wen-Hwa 1st Road, Kwei-Shan, Tao-Yuan 333, Taiwan
+FEATURES             Location/Qualifiers
+     source          1..748
+                     /organism="Hepatitis delta virus"
+                     /mol_type="genomic RNA"
+                     /db_xref="taxon:12475"
+                     /country="Taiwan"
+                     /note="genotype: I"
+     CDS             34..675
+                     /codon_start=1
+                     /product="delta antigen"
+                     /protein_id="ABG45868.1"
+                     /db_xref="GI:109726907"
+                     /translation="MSQPDPRRPRKGREETLGKWIDARRRKEELERDLRKVNKTIKRL
+                     EEDNPWLGNIRGIIRKDKDGDGAPPAKRARTDQMEVDSGPRKRKHPGGFTEQERRDHR
+                     RRKALENKKKQLSSGGKSLSREEEEELRRLTEEDERRERRVAGPRVGDVNPLDGGPRG
+                     APGGGFVPSMLGVPESPFTRRGDGLDTMGTQEFPWVSPQPPPPRLPLLECTPQ"
+ORIGIN      
+        1 aagagttctg aggaccgccg cccgcaatcg aggatgagcc agcccgatcc gaggagaccc
+       61 cggaagggga gagaagagac actcggaaag tggatcgatg caagaagaag aaaggaggag
+      121 ctcgagaggg acctccggaa ggtcaacaag actatcaaga gacttgagga ggacaacccc
+      181 tggctgggaa acatccgagg aataataaga aaagacaagg acggggatgg agctcccccg
+      241 gcgaagagag ccaggacgga tcagatggag gtcgactccg gacctaggaa gaggaaacac
+      301 ccgggagggt tcaccgaaca ggagagacgg gatcaccgaa ggaggaaggc cctggagaac
+      361 aagaagaagc aactctcctc gggagggaag agcctcagca gggaggaaga agaggaactt
+      421 cgaaggttga ccgaggaaga cgagagaagg gaaagaagag tagccggccc gcgggttggg
+      481 gatgtgaacc ccctcgacgg aggtccgagg ggagcgcccg gaggcgggtt cgtccccagc
+      541 atgctaggag tcccggagtc tcccttcact cggagagggg acggcctgga caccatggga
+      601 actcaggaat ttccgtgggt tagccctcag ccccctcctc cccgccttcc actcctcgag
+      661 tgtacccccc aataaaggat cgagggttcc actcacggtt cgcgtccgtg tccttccttt
+      721 cctcttcggg tcggcatggc ataagggc
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecoli-trna-qrna.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecoli-trna-qrna.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecoli-trna-qrna.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,811 @@
+#---------------------------------------------------------------------------------
+#      qrna 1.2b (Tue Dec 18 15:04:38 CST 2001) using squid 1.5m (Sept 1997)
+#---------------------------------------------------------------------------------
+#      PAM model =  BLOSUM62 scaled by 1.000
+#---------------------------------------------------------------------------------
+#      RNA model =  /mix_tied_linux.cfg
+#---------------------------------------------------------------------------------
+#      seq file  =  ecoli-trna.blast.q
+#                   #seqs: 94 (max_len = 78)
+#---------------------------------------------------------------------------------
+#      full length version:  -- length range = [0,1000]
+#---------------------------------------------------------------------------------
+# 1  [+ strand] 
+>DA0780-1- (76)
+>ECOLI-225501- (76)
+
+length alignment: 76 (id=77.63)
+posX: 0-75 [0-72](73) -- (0.18 0.30 0.36 0.16) 
+posY: 0-75 [0-71](72) -- (0.17 0.29 0.33 0.21) 
+ 21.414226 -10.515196 16.075247
+ 19.988807 -3.972533 12.698167
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 74 0
+RNA  ends = 0 12
+              OTH =       20.871             COD =       -4.957             RNA =       15.208 
+   logoddspostOTH =        0.000  logoddspostCOD =      -25.828  logoddspostRNA =       -5.663 
+
+# 2  [+ strand] 
+>DA0780-1- (76)
+>ECOLI-4034592- (76)
+
+length alignment: 76 (id=77.63)
+posX: 0-75 [0-72](73) -- (0.18 0.30 0.36 0.16) 
+posY: 0-75 [0-71](72) -- (0.17 0.29 0.33 0.21) 
+ 21.414226 -10.515196 16.075247
+ 19.988807 -3.972533 12.698167
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 74 0
+RNA  ends = 0 12
+              OTH =       20.871             COD =       -4.957             RNA =       15.208 
+   logoddspostOTH =        0.000  logoddspostCOD =      -25.828  logoddspostRNA =       -5.663 
+
+# 3  [+ strand] 
+>DA0780-1- (70)
+>ECOLI-3979754- (70)
+
+length alignment: 70 (id=72.86)
+posX: 0-69 [0-68](69) -- (0.17 0.29 0.38 0.16) 
+posY: 0-69 [0-69](70) -- (0.16 0.30 0.36 0.19) 
+ 21.384909 12.463364 22.249356
+ 21.249381 17.921616 22.037246
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 69
+COD  ends = 68 1
+RNA  ends = 3 69
+              OTH =       21.319             COD =       16.954             RNA =       22.147 
+   logoddspostOTH =        0.000  logoddspostCOD =       -4.365  logoddspostRNA =        0.828 
+
+# 4  [+ strand] 
+>DA0780-8- (56)
+>ECOLI-779995- (56)
+
+length alignment: 56 (id=71.43)
+posX: 0-55 [0-55](56) -- (0.20 0.29 0.34 0.18) 
+posY: 0-55 [0-54](55) -- (0.18 0.29 0.35 0.18) 
+ 14.761275 -0.369781 17.076846
+ 14.838704 10.225100 21.877122
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 55 0
+COD  ends = 54 0
+RNA  ends = 35 19
+              OTH =       14.801             COD =        9.226             RNA =       20.928 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.574  logoddspostRNA =        6.127 
+
+# 5  [+ strand] 
+>DA0780-8- (56)
+>ECOLI-780298- (56)
+
+length alignment: 56 (id=71.43)
+posX: 0-55 [0-55](56) -- (0.20 0.29 0.34 0.18) 
+posY: 0-55 [0-54](55) -- (0.18 0.29 0.35 0.18) 
+ 14.761275 -0.369781 17.076846
+ 14.838704 10.225100 21.877122
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 55 0
+COD  ends = 54 0
+RNA  ends = 35 19
+              OTH =       14.801             COD =        9.226             RNA =       20.928 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.574  logoddspostRNA =        6.127 
+
+# 6  [+ strand] 
+>DA0780-8- (56)
+>ECOLI-2518730- (56)
+
+length alignment: 56 (id=71.43)
+posX: 0-55 [0-55](56) -- (0.20 0.29 0.34 0.18) 
+posY: 0-55 [0-54](55) -- (0.18 0.29 0.35 0.18) 
+ 14.761275 -0.369781 17.076846
+ 14.838704 10.225100 21.877122
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 55 0
+COD  ends = 54 0
+RNA  ends = 35 19
+              OTH =       14.801             COD =        9.226             RNA =       20.928 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.574  logoddspostRNA =        6.127 
+
+# 7  [+ strand] 
+>DA0780-8- (56)
+>ECOLI-2518850- (56)
+
+length alignment: 56 (id=71.43)
+posX: 0-55 [0-55](56) -- (0.20 0.29 0.34 0.18) 
+posY: 0-55 [0-54](55) -- (0.18 0.29 0.35 0.18) 
+ 14.761275 -0.369781 17.076846
+ 14.838704 10.225100 21.877122
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 55 0
+COD  ends = 54 0
+RNA  ends = 35 19
+              OTH =       14.801             COD =        9.226             RNA =       20.928 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.574  logoddspostRNA =        6.127 
+
+# 8  [+ strand] 
+>DA0780-8- (56)
+>ECOLI-2518972- (56)
+
+length alignment: 56 (id=71.43)
+posX: 0-55 [0-55](56) -- (0.20 0.29 0.34 0.18) 
+posY: 0-55 [0-54](55) -- (0.18 0.29 0.35 0.18) 
+ 14.761275 -0.369781 17.076846
+ 14.838704 10.225100 21.877122
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 55 0
+COD  ends = 54 0
+RNA  ends = 35 19
+              OTH =       14.801             COD =        9.226             RNA =       20.928 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.574  logoddspostRNA =        6.127 
+
+# 9  [+ strand] 
+>DA0780-2- (73)
+>ECOLI-2284230- (73)
+
+length alignment: 73 (id=64.38)
+posX: 0-72 [0-71](72) -- (0.18 0.31 0.35 0.17) 
+posY: 0-72 [0-72](73) -- (0.15 0.27 0.38 0.19) 
+ 13.918545 -8.608490 24.007519
+ 15.073192 7.016852 12.344431
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 72 0
+COD  ends = 72 0
+RNA  ends = 0 70
+              OTH =       14.608             COD =        6.017             RNA =       23.008 
+   logoddspostOTH =        0.000  logoddspostCOD =       -8.592  logoddspostRNA =        8.400 
+
+# 10  [+ strand] 
+>DA0780-7- (59)
+>ECOLI-2945191- (59)
+
+length alignment: 59 (id=67.80)
+posX: 0-58 [0-55](56) -- (0.20 0.27 0.36 0.18) 
+posY: 0-58 [0-56](57) -- (0.19 0.28 0.32 0.21) 
+ 7.994668 -17.443195 7.730549
+ 8.785962 -9.672509 13.155362
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 58 0
+COD  ends = 58 1
+RNA  ends = 39 21
+              OTH =        8.444             COD =      -10.666             RNA =       12.189 
+   logoddspostOTH =        0.000  logoddspostCOD =      -19.110  logoddspostRNA =        3.745 
+
+# 11  [+ strand] 
+>DA0780-7- (59)
+>ECOLI-2945301- (59)
+
+length alignment: 59 (id=67.80)
+posX: 0-58 [0-55](56) -- (0.20 0.27 0.36 0.18) 
+posY: 0-58 [0-56](57) -- (0.19 0.28 0.32 0.21) 
+ 7.994668 -17.443195 7.730549
+ 8.785962 -9.672509 13.155362
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 58 0
+COD  ends = 58 1
+RNA  ends = 39 21
+              OTH =        8.444             COD =      -10.666             RNA =       12.189 
+   logoddspostOTH =        0.000  logoddspostCOD =      -19.110  logoddspostRNA =        3.745 
+
+# 12  [+ strand] 
+>DA0780-7- (59)
+>ECOLI-2945411- (59)
+
+length alignment: 59 (id=67.80)
+posX: 0-58 [0-55](56) -- (0.20 0.27 0.36 0.18) 
+posY: 0-58 [0-56](57) -- (0.19 0.28 0.32 0.21) 
+ 7.994668 -17.443195 7.730549
+ 8.785962 -9.672509 13.155362
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 58 0
+COD  ends = 58 1
+RNA  ends = 39 21
+              OTH =        8.444             COD =      -10.666             RNA =       12.189 
+   logoddspostOTH =        0.000  logoddspostCOD =      -19.110  logoddspostRNA =        3.745 
+
+# 13  [+ strand] 
+>DA0780-1- (76)
+>ECOLI-3424373- (76)
+
+length alignment: 76 (id=77.63)
+posX: 0-75 [0-72](73) -- (0.16 0.36 0.30 0.18) 
+posY: 0-75 [0-71](72) -- (0.21 0.33 0.29 0.17) 
+ 20.682006 -9.755632 13.248665
+ 21.241284 -22.237923 16.258281
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 75 0
+COD  ends = 1 68
+RNA  ends = 75 63
+              OTH =       20.989             COD =      -10.755             RNA =       15.427 
+   logoddspostOTH =        0.000  logoddspostCOD =      -31.744  logoddspostRNA =       -5.561 
+
+# 14  [+ strand] 
+>DA0780-6- (71)
+>ECOLI-2815582- (71)
+
+length alignment: 71 (id=71.83)
+posX: 0-70 [0-67](68) -- (0.16 0.34 0.31 0.19) 
+posY: 0-70 [0-68](69) -- (0.19 0.32 0.28 0.22) 
+ 16.431838 -14.269383 12.463979
+ 16.326307 -7.331733 13.720289
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 70
+COD  ends = 70 1
+RNA  ends = 24 8
+              OTH =       16.380             COD =       -8.320             RNA =       13.225 
+   logoddspostOTH =        0.000  logoddspostCOD =      -24.700  logoddspostRNA =       -3.155 
+
+# 15  [+ strand] 
+>DA0780-6- (71)
+>ECOLI-2815857- (71)
+
+length alignment: 71 (id=71.83)
+posX: 0-70 [0-67](68) -- (0.16 0.34 0.31 0.19) 
+posY: 0-70 [0-68](69) -- (0.19 0.32 0.28 0.22) 
+ 16.431838 -14.269383 12.463979
+ 16.326307 -7.331733 13.720289
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 70
+COD  ends = 70 1
+RNA  ends = 24 8
+              OTH =       16.380             COD =       -8.320             RNA =       13.225 
+   logoddspostOTH =        0.000  logoddspostCOD =      -24.700  logoddspostRNA =       -3.155 
+
+# 16  [+ strand] 
+>DA0780-6- (71)
+>ECOLI-2815996- (71)
+
+length alignment: 71 (id=71.83)
+posX: 0-70 [0-67](68) -- (0.16 0.34 0.31 0.19) 
+posY: 0-70 [0-68](69) -- (0.19 0.32 0.28 0.22) 
+ 16.431838 -14.269383 12.463979
+ 16.326307 -7.331733 13.720289
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 70
+COD  ends = 70 1
+RNA  ends = 24 8
+              OTH =       16.380             COD =       -8.320             RNA =       13.225 
+   logoddspostOTH =        0.000  logoddspostCOD =      -24.700  logoddspostRNA =       -3.155 
+
+# 17  [+ strand] 
+>DA0780-6- (71)
+>ECOLI-2816271- (71)
+
+length alignment: 71 (id=71.83)
+posX: 0-70 [0-67](68) -- (0.16 0.34 0.31 0.19) 
+posY: 0-70 [0-68](69) -- (0.19 0.32 0.28 0.22) 
+ 16.431838 -14.269383 12.463979
+ 16.326307 -7.331733 13.720289
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 70
+COD  ends = 70 1
+RNA  ends = 24 8
+              OTH =       16.380             COD =       -8.320             RNA =       13.225 
+   logoddspostOTH =        0.000  logoddspostCOD =      -24.700  logoddspostRNA =       -3.155 
+
+# 18  [+ strand] 
+>DA0780-1- (76)
+>ECOLI-2515836- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-72](73) -- (0.16 0.36 0.30 0.18) 
+posY: 0-75 [0-71](72) -- (0.21 0.33 0.26 0.19) 
+ 14.736984 -14.375325 8.884532
+ 14.851236 -24.156886 9.868232
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 75 0
+COD  ends = 1 75
+RNA  ends = 75 63
+              OTH =       14.795             COD =      -15.374             RNA =        9.459 
+   logoddspostOTH =        0.000  logoddspostCOD =      -30.169  logoddspostRNA =       -5.337 
+
+# 19  [+ strand] 
+>DA0780-1- (76)
+>ECOLI-2515951- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-72](73) -- (0.16 0.36 0.30 0.18) 
+posY: 0-75 [0-71](72) -- (0.21 0.33 0.26 0.19) 
+ 14.736984 -14.375325 8.884532
+ 14.851236 -24.156886 9.868232
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 75 0
+COD  ends = 1 75
+RNA  ends = 75 63
+              OTH =       14.795             COD =      -15.374             RNA =        9.459 
+   logoddspostOTH =        0.000  logoddspostCOD =      -30.169  logoddspostRNA =       -5.337 
+
+# 20  [+ strand] 
+>DA0780-6- (52)
+>ECOLI-2155594- (52)
+
+length alignment: 52 (id=71.15)
+posX: 0-51 [0-48](49) -- (0.16 0.41 0.27 0.16) 
+posY: 0-51 [0-48](49) -- (0.18 0.41 0.29 0.12) 
+ 5.862400 -19.930995 -1.194701
+ 7.783836 -15.355771 0.726735
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 51 0
+COD  ends = 50 0
+RNA  ends = 52 52
+              OTH =        7.122             COD =      -16.296             RNA =        0.065 
+   logoddspostOTH =        0.000  logoddspostCOD =      -23.418  logoddspostRNA =       -7.057 
+
+# 21  [+ strand] 
+>DA0780-7- (59)
+>ECOLI-3315645- (59)
+
+length alignment: 59 (id=66.10)
+posX: 0-58 [0-55](56) -- (0.18 0.36 0.27 0.20) 
+posY: 0-58 [0-56](57) -- (0.21 0.30 0.28 0.21) 
+ 7.886726 -12.920042 12.597060
+ 6.999592 -16.997763 6.986972
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 58
+COD  ends = 0 58
+RNA  ends = 20 36
+              OTH =        7.510             COD =      -13.837             RNA =       11.626 
+   logoddspostOTH =        0.000  logoddspostCOD =      -21.347  logoddspostRNA =        4.116 
+
+# 22  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-225500- (77)
+
+length alignment: 77 (id=75.32)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.16 0.31 0.33 0.20) 
+ 24.181055 -0.353323 26.401988
+ 23.109013 11.590026 23.929123
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 76
+COD  ends = 76 0
+RNA  ends = 0 73
+              OTH =       23.742             COD =       10.590             RNA =       25.641 
+   logoddspostOTH =        0.000  logoddspostCOD =      -13.152  logoddspostRNA =        1.899 
+
+# 23  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-4034591- (77)
+
+length alignment: 77 (id=75.32)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.16 0.31 0.33 0.20) 
+ 24.181055 -0.353323 26.401988
+ 23.109013 11.590026 23.929123
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 76
+COD  ends = 76 0
+RNA  ends = 0 73
+              OTH =       23.742             COD =       10.590             RNA =       25.641 
+   logoddspostOTH =        0.000  logoddspostCOD =      -13.152  logoddspostRNA =        1.899 
+
+# 24  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-779777- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 25  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-780066- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 26  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-780370- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 27  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-780592- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 28  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-780800- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 29  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-2519045- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-76 [0-74](75) -- (0.21 0.24 0.27 0.28) 
+ 19.326669 -6.735706 19.727199
+ 19.360233 -2.001147 14.966454
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 76 0
+COD  ends = 74 1
+RNA  ends = 0 73
+              OTH =       19.344             COD =       -2.948             RNA =       18.779 
+   logoddspostOTH =        0.000  logoddspostCOD =      -22.291  logoddspostRNA =       -0.564 
+
+# 30  [+ strand] 
+>DA0940-1- (78)
+>ECOLI-563946- (78)
+
+length alignment: 78 (id=69.23)
+posX: 0-77 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-77 [0-75](76) -- (0.18 0.30 0.32 0.20) 
+ 17.963663 -9.269558 17.104482
+ 18.280556 -6.326426 22.143566
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 77 0
+COD  ends = 72 1
+RNA  ends = 44 28
+              OTH =       18.131             COD =       -7.150             RNA =       21.187 
+   logoddspostOTH =        0.000  logoddspostCOD =      -25.281  logoddspostRNA =        3.056 
+
+# 31  [+ strand] 
+>DA0940-1- (78)
+>ECOLI-3979754- (78)
+
+length alignment: 78 (id=69.23)
+posX: 0-77 [0-75](76) -- (0.14 0.34 0.37 0.14) 
+posY: 0-77 [0-75](76) -- (0.14 0.33 0.36 0.17) 
+ 18.718315 -12.877073 20.514241
+ 18.551819 -5.649083 11.109117
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 77
+COD  ends = 76 1
+RNA  ends = 0 74
+              OTH =       18.637             COD =       -6.639             RNA =       19.516 
+   logoddspostOTH =        0.000  logoddspostCOD =      -25.277  logoddspostRNA =        0.879 
+
+# 32  [+ strand] 
+>DA0940-2- (75)
+>ECOLI-3980115- (75)
+
+length alignment: 75 (id=64.00)
+posX: 0-74 [0-74](75) -- (0.15 0.35 0.36 0.15) 
+posY: 0-74 [0-74](75) -- (0.16 0.27 0.36 0.21) 
+ 16.673417 5.650160 34.947176
+ 16.791520 4.389619 18.597200
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 74 0
+COD  ends = 2 73
+RNA  ends = 0 70
+              OTH =       16.734             COD =        5.153             RNA =       33.947 
+   logoddspostOTH =        0.000  logoddspostCOD =      -11.580  logoddspostRNA =       17.214 
+
+# 33  [+ strand] 
+>DA0940-8- (58)
+>ECOLI-3979896- (58)
+
+length alignment: 58 (id=68.97)
+posX: 0-57 [0-56](57) -- (0.18 0.32 0.33 0.18) 
+posY: 0-57 [0-55](56) -- (0.16 0.21 0.30 0.32) 
+ 11.514323 -1.612738 9.963681
+ 11.181613 -0.641287 8.426787
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 57
+COD  ends = 57 0
+RNA  ends = 20 37
+              OTH =       11.358             COD =       -1.047             RNA =        9.391 
+   logoddspostOTH =        0.000  logoddspostCOD =      -12.404  logoddspostRNA =       -1.967 
+
+# 34  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-3424371- (77)
+
+length alignment: 77 (id=75.32)
+posX: 0-76 [0-75](76) -- (0.14 0.37 0.34 0.14) 
+posY: 0-76 [0-74](75) -- (0.20 0.33 0.31 0.16) 
+ 23.109013 5.520843 24.913393
+ 24.181055 9.917873 33.181724
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 76 0
+COD  ends = 76 2
+RNA  ends = 76 3
+              OTH =       23.742             COD =        8.985             RNA =       32.186 
+   logoddspostOTH =        0.000  logoddspostCOD =      -14.758  logoddspostRNA =        8.444 
+
+# 35  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-2515834- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.37 0.34 0.14) 
+posY: 0-76 [0-74](75) -- (0.20 0.33 0.28 0.19) 
+ 18.645991 3.152473 25.533618
+ 18.627918 3.690656 29.967242
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 76
+COD  ends = 76 2
+RNA  ends = 76 3
+              OTH =       18.637             COD =        3.447             RNA =       29.033 
+   logoddspostOTH =        0.000  logoddspostCOD =      -15.190  logoddspostRNA =       10.396 
+
+# 36  [+ strand] 
+>DA0940-1- (77)
+>ECOLI-2515949- (77)
+
+length alignment: 77 (id=70.13)
+posX: 0-76 [0-75](76) -- (0.14 0.37 0.34 0.14) 
+posY: 0-76 [0-74](75) -- (0.20 0.33 0.28 0.19) 
+ 18.645991 3.152473 25.533618
+ 18.627918 3.690656 29.967242
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 76
+COD  ends = 76 2
+RNA  ends = 76 3
+              OTH =       18.637             COD =        3.447             RNA =       29.033 
+   logoddspostOTH =        0.000  logoddspostCOD =      -15.190  logoddspostRNA =       10.396 
+
+# 37  [+ strand] 
+>DA0940-5- (76)
+>ECOLI-2815580- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-71](72) -- (0.15 0.35 0.35 0.15) 
+posY: 0-75 [0-71](72) -- (0.18 0.31 0.31 0.21) 
+ 13.333636 -36.156781 13.057277
+ 13.159042 -36.377514 9.144681
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 0 73
+RNA  ends = 32 52
+              OTH =       13.249             COD =      -36.263             RNA =       12.150 
+   logoddspostOTH =        0.000  logoddspostCOD =      -49.512  logoddspostRNA =       -1.099 
+
+# 38  [+ strand] 
+>DA0940-5- (76)
+>ECOLI-2815855- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-71](72) -- (0.15 0.35 0.35 0.15) 
+posY: 0-75 [0-71](72) -- (0.18 0.31 0.31 0.21) 
+ 13.333636 -36.156781 13.057277
+ 13.159042 -36.377514 9.144681
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 0 73
+RNA  ends = 32 52
+              OTH =       13.249             COD =      -36.263             RNA =       12.150 
+   logoddspostOTH =        0.000  logoddspostCOD =      -49.512  logoddspostRNA =       -1.099 
+
+# 39  [+ strand] 
+>DA0940-5- (76)
+>ECOLI-2815994- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-71](72) -- (0.15 0.35 0.35 0.15) 
+posY: 0-75 [0-71](72) -- (0.18 0.31 0.31 0.21) 
+ 13.333636 -36.156781 13.057277
+ 13.159042 -36.377514 9.144681
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 0 73
+RNA  ends = 32 52
+              OTH =       13.249             COD =      -36.263             RNA =       12.150 
+   logoddspostOTH =        0.000  logoddspostCOD =      -49.512  logoddspostRNA =       -1.099 
+
+# 40  [+ strand] 
+>DA0940-5- (76)
+>ECOLI-2816269- (76)
+
+length alignment: 76 (id=71.05)
+posX: 0-75 [0-71](72) -- (0.15 0.35 0.35 0.15) 
+posY: 0-75 [0-71](72) -- (0.18 0.31 0.31 0.21) 
+ 13.333636 -36.156781 13.057277
+ 13.159042 -36.377514 9.144681
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 75
+COD  ends = 0 73
+RNA  ends = 32 52
+              OTH =       13.249             COD =      -36.263             RNA =       12.150 
+   logoddspostOTH =        0.000  logoddspostCOD =      -49.512  logoddspostRNA =       -1.099 
+
+# 41  [+ strand] 
+>DA0940-9- (69)
+>ECOLI-1804478- (69)
+
+length alignment: 69 (id=66.67)
+posX: 0-68 [0-67](68) -- (0.15 0.34 0.35 0.16) 
+posY: 0-68 [0-66](67) -- (0.13 0.39 0.31 0.16) 
+ 10.953909 -9.639667 10.662024
+ 12.823044 -5.075973 10.777964
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 68 0
+COD  ends = 66 1
+RNA  ends = 51 31
+              OTH =       12.172             COD =       -6.016             RNA =       10.721 
+   logoddspostOTH =        0.000  logoddspostCOD =      -18.188  logoddspostRNA =       -1.451 
+
+# 42  [+ strand] 
+>DA0940-8- (58)
+>ECOLI-3706024- (58)
+
+length alignment: 58 (id=68.97)
+posX: 0-57 [0-55](56) -- (0.18 0.34 0.30 0.18) 
+posY: 0-57 [0-55](56) -- (0.20 0.39 0.25 0.16) 
+ 9.771742 -15.688073 2.357499
+ 9.187335 -13.325765 1.773092
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 57
+COD  ends = 55 1
+RNA  ends = -1 -1
+              OTH =        9.509             COD =      -14.069             RNA =        2.095 
+   logoddspostOTH =        0.000  logoddspostCOD =      -23.578  logoddspostRNA =       -7.414 
+
+# 43  [+ strand] 
+>DA0940-7- (73)
+>ECOLI-695888- (73)
+
+length alignment: 73 (id=64.38)
+posX: 0-72 [0-69](70) -- (0.16 0.34 0.34 0.16) 
+posY: 0-72 [0-69](70) -- (0.24 0.26 0.26 0.24) 
+ 10.281468 -22.207847 3.873799
+ 9.636380 -19.716135 1.831177
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 72
+COD  ends = 72 1
+RNA  ends = 10 26
+              OTH =        9.995             COD =      -20.480             RNA =        3.187 
+   logoddspostOTH =        0.000  logoddspostCOD =      -30.475  logoddspostRNA =       -6.807 
+
+# 44  [+ strand] 
+>DA0940-7- (73)
+>ECOLI-696281- (73)
+
+length alignment: 73 (id=64.38)
+posX: 0-72 [0-69](70) -- (0.16 0.34 0.34 0.16) 
+posY: 0-72 [0-69](70) -- (0.24 0.26 0.26 0.24) 
+ 10.281468 -22.207847 3.873799
+ 9.636380 -19.716135 1.831177
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 72
+COD  ends = 72 1
+RNA  ends = 10 26
+              OTH =        9.995             COD =      -20.480             RNA =        3.187 
+   logoddspostOTH =        0.000  logoddspostCOD =      -30.475  logoddspostRNA =       -6.807 
+
+# 45  [+ strand] 
+>DA0940-6- (62)
+>ECOLI-695990- (62)
+
+length alignment: 62 (id=62.90)
+posX: 0-61 [0-60](61) -- (0.16 0.38 0.30 0.16) 
+posY: 0-61 [0-60](61) -- (0.25 0.30 0.26 0.20) 
+ 10.452429 5.015902 10.936383
+ 9.772183 2.186248 11.450894
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 61
+COD  ends = 0 61
+RNA  ends = 38 22
+              OTH =       10.152             COD =        4.206             RNA =       11.216 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.946  logoddspostRNA =        1.064 
+
+# 46  [+ strand] 
+>DA0940-6- (62)
+>ECOLI-696099- (62)
+
+length alignment: 62 (id=62.90)
+posX: 0-61 [0-60](61) -- (0.16 0.38 0.30 0.16) 
+posY: 0-61 [0-60](61) -- (0.25 0.30 0.26 0.20) 
+ 10.452429 5.015902 10.936383
+ 9.772183 2.186248 11.450894
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = RNA 
+OTH  ends = 0 61
+COD  ends = 0 61
+RNA  ends = 38 22
+              OTH =       10.152             COD =        4.206             RNA =       11.216 
+   logoddspostOTH =        0.000  logoddspostCOD =       -5.946  logoddspostRNA =        1.064 
+
+# 47  [+ strand] 
+>DA0940-10- (69)
+>ECOLI-2816349- (69)
+
+length alignment: 69 (id=63.77)
+posX: 0-68 [0-66](67) -- (0.15 0.34 0.36 0.15) 
+posY: 0-68 [0-65](66) -- (0.18 0.27 0.36 0.18) 
+ 9.155278 -17.238488 5.252253
+ 9.115589 -17.289968 6.575597
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 0 68
+COD  ends = 0 56
+RNA  ends = 26 10
+              OTH =        9.136             COD =      -17.264             RNA =        6.061 
+   logoddspostOTH =        0.000  logoddspostCOD =      -26.400  logoddspostRNA =       -3.075 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rps.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rps.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rps.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,610 @@
+RPS-BLAST 2.1.3 [Apr-1-2001]
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_0</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786182|gb|AAC73112.1| (AE000111) thr operon leader peptide [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>21</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>4.16497e+11</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>820</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam00742</Hit_id>
+          <Hit_def>HomoS_dh, HomoS dehydrogenase</Hit_def>
+          <Hit_accession>pfam00742</Hit_accession>
+          <Hit_len>310</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>327.405</Hsp_bit-score>
+              <Hsp_score>838</Hsp_score>
+              <Hsp_evalue>1.46134e-90</Hsp_evalue>
+              <Hsp_query-from>498</Hsp_query-from>
+              <Hsp_query-to>815</Hsp_query-to>
+              <Hsp_hit-from>3</Hsp_hit-from>
+              <Hsp_hit-to>310</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>123</Hsp_identity>
+              <Hsp_positive>171</Hsp_positive>
+              <Hsp_gaps>26</Hsp_gaps>
+              <Hsp_align-len>326</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS</Hsp_qseq>
+              <Hsp_hseq>GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR</Hsp_hseq>
+              <Hsp_midline>  V G+ +S+ +L +  GL LE W+  L   ++P  +LG+L      + +++     V+V+ T    VA   Y D L EG HVVT NK  N S  D Y  L   AEKS    LY+  V  G+P+I+ L+ LL  GD ++K  GI +G+ ++I  ++DE G+ FS+    A+E+GYTE DPRDD+ G+D ARKL ILAR   G ELEL D+ +E + P           F   L  LD+    RV  A   G+VLRY   I E             + PL  VK  +NA+A     Y   PL+  G GAG + TA+ V AD++R   </Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>2</Hit_num>
+          <Hit_id>gnl|Pfam|pfam00696</Hit_id>
+          <Hit_def>aakinase, Amino acid kinase family. This family includes kinases that phosphorylate a variety of amino acid substrates, as well as uridylate kinase and carbamate kinase. This family includes: Aspartokinase EC:2.7.2.4, AcetylE kinase EC:2.7.2.8, E 5-kinase EC:2.7.2.11, Uridylate kinase EC:2.7.4.-, Carbamate kinase EC:2.7.2.2</Hit_def>
+          <Hit_accession>pfam00696</Hit_accession>
+          <Hit_len>236</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>123.25</Hsp_bit-score>
+              <Hsp_score>308</Hsp_score>
+              <Hsp_evalue>4.18565e-29</Hsp_evalue>
+              <Hsp_query-from>1</Hsp_query-from>
+              <Hsp_query-to>284</Hsp_query-to>
+              <Hsp_hit-from>2</Hsp_hit-from>
+              <Hsp_hit-to>236</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>75</Hsp_identity>
+              <Hsp_positive>115</Hsp_positive>
+              <Hsp_gaps>71</Hsp_gaps>
+              <Hsp_align-len>295</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>MRVLKFGGTSVANA--ERFLRVADILESNARQG-QVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAV--GHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSY------FGAKVLHPRTITPIAQFQIPCLIKN</Hsp_qseq>
+              <Hsp_hseq>RIVIKLGGSALSDEDDERLERVAEEIAKLREEGREVVVVHGGGPQVGRLLLKL-----------------------------AKKPGSRVTDAATL--------------------------DALGAVGEGLSGALLSAALEAPGIPAGQLSGTEDFGIDAEGRGGNAVVDSVGVEKEAIEELLEKGKIPIVAGGGGVPV-----TLGRGDSDTAAALLAALLKADLLIILTDVDGVYTADPKKVPDAKLIDELSYEEALELAGGESGFGTGGMVPKVRAAILAVRSGIPVIITN</Hsp_hseq>
+              <Hsp_midline>  V+K GG+++++   ER  RVA+ +     +G +V  V     ++   L+ +                             A +PG  +    T                            AL   GE +S A+++  LEA G     +   E       G    + VD     +      +    + ++AG            LGR  SD +AA+LAA L+AD   I TDVDGVYT DP++VPDA+L+  +SY+EA+EL+        G  V   R      +  IP +I N</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>3.82682e+07</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_2</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786184|gb|AAC73114.1| (AE000111) homoserine kinase [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>310</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam00288</Hit_id>
+          <Hit_def>GHMP_kinases, GHMP kinases putative ATP-binding protein</Hit_def>
+          <Hit_accession>pfam00288</Hit_accession>
+          <Hit_len>73</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>55.4546</Hsp_bit-score>
+              <Hsp_score>132</Hsp_score>
+              <Hsp_evalue>4.05207e-09</Hsp_evalue>
+              <Hsp_query-from>69</Hsp_query-from>
+              <Hsp_query-to>132</Hsp_query-to>
+              <Hsp_hit-from>1</Hsp_hit-from>
+              <Hsp_hit-to>73</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>22</Hsp_identity>
+              <Hsp_positive>34</Hsp_positive>
+              <Hsp_gaps>9</Hsp_gaps>
+              <Hsp_align-len>73</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>WERFCQELGK----QIPVAMTLEKNMPIGSGLGSSAC-SVVAALMAMNE----HCGKPLNDTRLLALMGELEG</Hsp_qseq>
+              <Hsp_hseq>WANYLKGGLKVIQPLPGLDVVISSNIPLGSGLGSSAAIAVVAGAVLANEFVAGLNGLKLSLANIQHLENQFEG</Hsp_hseq>
+              <Hsp_midline>W  + +   K       + + +  N+P+GSGLGSSA  +VVA  +  NE      G  L+   +  L  + EG</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>3.85928e+07</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_3</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786185|gb|AAC73115.1| (AE000111) threonine synthase [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>428</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam00291</Hit_id>
+          <Hit_def>PALP, Pyridoxal-phosphate dependent enzyme. Members of this family are all pyridoxal-phosphate dependent enzymes. This family includes: S dehydratase EC:4.2.1.13 P20132, threonine dehydratase EC:4.2.1.16, tryptophan synthase beta chain EC:4.2.1.20, threonine synthase EC:4.2.99.2, cysteine synthase EC:4.2.99.8 P11096, cystathionine beta-synthase EC:4.2.1.22, 1-aminocyclopropane-1-carboxylate deaminase EC:4.1.99.4</Hit_def>
+          <Hit_accession>pfam00291</Hit_accession>
+          <Hit_len>298</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>129.028</Hsp_bit-score>
+              <Hsp_score>323</Hsp_score>
+              <Hsp_evalue>3.98147e-31</Hsp_evalue>
+              <Hsp_query-from>96</Hsp_query-from>
+              <Hsp_query-to>363</Hsp_query-to>
+              <Hsp_hit-from>28</Hsp_hit-from>
+              <Hsp_hit-to>280</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>69</Hsp_identity>
+              <Hsp_positive>101</Hsp_positive>
+              <Hsp_gaps>19</Hsp_gaps>
+              <Hsp_align-len>270</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>LELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQL-VVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMREL-KELGYTSEPHAAVAYRALRD</Hsp_qseq>
+              <Hsp_hseq>LEDLN-PTGSFKDRGA-LNMILLAEKLGKKGGIVPGATSGNTGIALAYA-AALLGLKCTIVMPAT-TSREKVAQLRALGAENIVVPVVGGFDDLADAVKKALELAEENPK-NAYLVNQ-FDNPANVEA-GQKTIGLEIWEQLGGKPDAVVVPVGGGGTIAGIARYLKELLPVKVIGVEPEGSAVLSGFLKPG--SPVTLPETLSIAIGLGVPFVFPILDELL--------DDEVVTVTDEEALEAARLLAREEGIFVEPSSGAAVAAALK</Hsp_hseq>
+              <Hsp_midline>LE  + PT +FKD G      +L    G K   +  ATSG+TG A+A+A   L  +K  I+ P    S  +      LG     V + G FD     VK+A +  E          N    +    +         ++ ++   +   V VP G  G +          LPVK        +  +  FL  G  SP     TLS A+ +  P  +P ++EL         +     V DE   +  R L +E G   EP +  A  A</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>1.52967e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_4</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786186|gb|AAC73116.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>98</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>3.59021e+11</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_5</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786187|gb|AAC73117.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>258</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>3.3722e+11</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_6</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786188|gb|AAC73118.1| (AE000111) inner membrane transport protein [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>476</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam01235</Hit_id>
+          <Hit_def>Na_Ala_symp, Sodium:alanine symporter family</Hit_def>
+          <Hit_accession>pfam01235</Hit_accession>
+          <Hit_len>415</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>558.525</Hsp_bit-score>
+              <Hsp_score>1438</Hsp_score>
+              <Hsp_evalue>2.26239e-160</Hsp_evalue>
+              <Hsp_query-from>48</Hsp_query-from>
+              <Hsp_query-to>458</Hsp_query-to>
+              <Hsp_hit-from>7</Hsp_hit-from>
+              <Hsp_hit-to>415</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>233</Hsp_identity>
+              <Hsp_positive>300</Hsp_positive>
+              <Hsp_gaps>2</Hsp_gaps>
+              <Hsp_align-len>411</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>KNSIHPQPGGLTSFQSLCTSLAARVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMRWMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPLMAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPNAAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAEFVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMAITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRY</Hsp_qseq>
+              <Hsp_hseq>KFGKKDEGGDVSSFQALTTSLAGRVGTGNIAGVATAIAAGGPGAVFWMWVTAFIGMATAFVESTLAQLYKERDKDGNFRGGPAYYIKKGLGMRWLAILFAVAVIVSFGVIFSGVQANSIADAMSNAFGIPPLVTGIVLAILTALIIFGGVKRIAAISSIVVPFMAIIYLITALAIIAMNIEKVPDVIGLIFKSAFGFDAAAGGALGATVSKAIMWGVKRGLFSNEAGMGSAPNAAAAAHVS--HPAKQGLVQMLGIFLDTFIVCTATALVILLTGNYTNVETLKGAQLTQKAFDTLIGGFGATFVAIALLLFAFSTIIANYYYAETNLAYLVRSGPRGVALYRLAYLAAVFYGTVLSLTLVWALADIVMGIMALPNLIAILLLSKVAYEALKDYERQLKQGKDPEFDADEY</Hsp_hseq>
+              <Hsp_midline>K     + G ++SFQ+L TSLA RVG+GN+AGVA AI AGGPGAVFWMWV AFIGMAT+F E +LAQLYKERD +G FRGGPA+Y+ +GLGMRW+ +LFAV +++++G+IFSGVQAN++A A+S +F  PPLVTGI+LA+ T L I  G+  +A +    VP MAII+++T+L I  MNI ++P VI  IF+SAFG+  AAGGA G T+S+AI  G +R +FSNEAGMGS PNAAAAA     HPA QG+VQM+GIF+DT ++CTA+A++ILL GN T    L+G QL QKA   L+G +GA FV + ++LFAFS+I+ANY YAE NL +L  + P+ +   R+   A V  GT+LSL L+W LADI+M  MA+ NL AILLLS V +    DY RQ K G  P FD   Y</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>1.95607e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_7</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786189|gb|AAC73119.1| (AE000111) transaldolase B [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>317</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam00923</Hit_id>
+          <Hit_def>Transaldolase, Transaldolase</Hit_def>
+          <Hit_accession>pfam00923</Hit_accession>
+          <Hit_len>282</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>306.99</Hsp_bit-score>
+              <Hsp_score>785</Hsp_score>
+              <Hsp_evalue>7.90128e-85</Hsp_evalue>
+              <Hsp_query-from>13</Hsp_query-from>
+              <Hsp_query-to>313</Hsp_query-to>
+              <Hsp_hit-from>1</Hsp_hit-from>
+              <Hsp_hit-to>282</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>161</Hsp_identity>
+              <Hsp_positive>191</Hsp_positive>
+              <Hsp_gaps>29</Hsp_gaps>
+              <Hsp_align-len>306</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>TVVADTGDIAAMKLYQP----QDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQIVDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGIRAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIYQYYKEHGYETVVMGASFRNIGEILE-LAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESEFLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMI</Hsp_qseq>
+              <Hsp_hseq>KVFLDTGDIEEIKKLAPIGIIQGVTTNPSLIAKAAKKSAYEKL----DAVGKKKGKTIKEQVENACDKLAVEF-PEILKIVPGRVSTEVDARLSFDAEAMIKEAKRLIKL-----ISKPNIVIKIPVTWEGLKAVKALEAEGIPVNVTLLFSAAQALAAAEAGVTYISPFVGRIDDWIDALTDK---NYEGDPGVQSVKDIYQYYKKHGYKTEVLAASFRNPGYILELLAGCDSLTIPPALLDQLLDHEPL---------NRKETGEKISEKKF--NIDEDAMAVELLDEGIRKFKKDFEKLLKST</Hsp_hseq>
+              <Hsp_midline> V  DTGDI  +K   P    Q  TTNPSLI  AA+   Y KL     A  K++     +Q+ +A DKLAV    EILK+VPGR+STEVDARLS+D EA I +AKRLIKL     IS   I+IK+  TW+G++A + LE EGI  N+TLLFS AQA A AEAGV  ISPFVGRI DW  A TDK       DPGV SV +IYQYYK+HGY+T V+ ASFRN G ILE LAGCD LTI PALL +L + E             K    +I+E +F    ++D MAV+ L EGIRKF  D EKL K  </Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>1.4562e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_8</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786190|gb|AAC73120.1| (AE000111) required for the efficient incorporation of molybdate into molybdoproteins [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>195</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>3.35238e+11</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-1-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>/data_2/jason/db/cdd/cdd/Pfam</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY_9</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|1786191|gb|AAC73121.1| (AE000111) orf, hypothetical protein [Escherichia coli]</BlastOutput_query-def>
+  <BlastOutput_query-len>188</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>1e-05</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>F</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|Pfam|pfam01184</Hit_id>
+          <Hit_def>Grp1_Fun34_YaaH, GPR1/FUN34/yaaH family</Hit_def>
+          <Hit_accession>pfam01184</Hit_accession>
+          <Hit_len>210</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>218.009</Hsp_bit-score>
+              <Hsp_score>554</Hsp_score>
+              <Hsp_evalue>2.86271e-58</Hsp_evalue>
+              <Hsp_query-from>5</Hsp_query-from>
+              <Hsp_query-to>188</Hsp_query-to>
+              <Hsp_hit-from>16</Hsp_hit-from>
+              <Hsp_hit-to>210</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>137</Hsp_identity>
+              <Hsp_positive>146</Hsp_positive>
+              <Hsp_gaps>13</Hsp_gaps>
+              <Hsp_align-len>196</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>KLANPAPLGLMGFGMTTILLNLHNVGYFALD--GIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFTSYGSFWLTLVAILLMPKLGLTDAPNAQ-----FLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAIGNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTV-----LPIGESH</Hsp_qseq>
+              <Hsp_hseq>KFANPAPLGLSGFALTTIVLSLHNVGAFGLDNPGIIVGMAIFYGGIAQIFAGLWEYKKENTFGLTALTSYGGFWLSLVAIL-MPKFGITDAYNDQIEVQNALGVYLGGWGVFTLFLFFCTLKSTRVFFFLFFSLTVTFLLLAIANITGNAAIIHFGGWLGLICAFSAIYLAYAGVANEQNSYIVPVPLDLPIGEKH</Hsp_hseq>
+              <Hsp_midline>K ANPAPLGL GF +TTI+L+LHNVG F LD  GII+ M IFYGGIAQIFAGL EYKK NTFGLTA TSYG FWL+LVAIL MPK G+TDA N Q      LGVYLG WGVFTLF+FF TLK  RV  F+FFSLTV F LLAI NI GNAAIIHF GW+GLIC  SAIYLA   V NEQ    V     LPIGE H</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>0</Statistics_db-num>
+          <Statistics_db-len>0</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>8.17579e+07</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rpsblast
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rpsblast	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecoli_domains.rpsblast	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,155 @@
+RPS-BLAST 2.2.4 [Aug-26-2002]
+
+Query= gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,
+homoserine dehydrogenase I [Escherichia coli]
+         (820 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+gnl|CDD|3919 smart00483, POLXc, DNA polymerase X family; include...    28   0.064
+gnl|CDD|7379 smart00533, MUTSd, DNA-binding domain of DNA mismat...    26   0.47 
+gnl|CDD|178 smart00202, SR, Scavenger receptor Cys-rich; The sea...    22   4.5  
+gnl|CDD|8977 smart00359, PUA, Putative RNA-binding Domain in Pse...    22   7.0  
+gnl|CDD|7370 smart00486, POLBc, DNA polymerase type-B family; DN...    22   7.6  
+gnl|CDD|28 smart00035, CLa, CLUSTERIN alpha chain;                     21   7.6  
+gnl|CDD|8994 smart00450, RHOD, Rhodanese Homology Domain; An alp...    22   8.0  
+
+>gnl|CDD|3919 smart00483, POLXc, DNA polymerase X family; includes vertebrate
+           polymerase beta and terminal
+           deoxynucleotidyltransferases
+          Length = 335
+
+ Score = 28.3 bits (63), Expect = 0.064
+ Identities = 11/57 (19%), Positives = 21/57 (36%), Gaps = 3/57 (5%)
+
+Query: 599 SRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSE 655
+           ++RK  Y         V+++L   +N+  +L    GI       I   ++ G     
+Sbjct: 23  NKRKCSY---FRKAASVLKSLPFPINSMKDLKGLPGIGDKIKKKIEEIIETGKLSKA 76
+
+
+>gnl|CDD|7379 smart00533, MUTSd, DNA-binding domain of DNA mismatch repair MUTS
+           family;
+          Length = 310
+
+ Score = 25.7 bits (56), Expect = 0.47
+ Identities = 19/107 (17%), Positives = 36/107 (32%), Gaps = 21/107 (19%)
+
+Query: 9   TSVANAERFLR------VADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALP 62
+                 +R LR      + D+ E N R                 L+   E     +  L 
+Sbjct: 13  CKTPMGKRLLRRWLLQPLTDLKEINERL-----------DAVEELLENPELRQDLRGLLK 61
+
+Query: 63  NISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGIS 109
+            I D ER+ + +    A+ +    L +L   ++    +I+ +L  + 
+Sbjct: 62  RIPDLERLLSRIKLSRASPR---DLLRLYDSLEG-LKEIRKLLESLE 104
+
+
+>gnl|CDD|178 smart00202, SR, Scavenger receptor Cys-rich; The sea urchin egg
+           peptide speract contains 4 repeats of SR domains that
+           contain 6 conserved cysteines. May bind bacterial
+           antigens in the protein MARCO
+          Length = 101
+
+ Score = 22.2 bits (47), Expect = 4.5
+ Identities = 13/53 (24%), Positives = 20/53 (37%), Gaps = 2/53 (3%)
+
+Query: 648 DEGMSFSEATTLAREMGYTEP--DPRDDLSGMDVARKLLILARETGRELELAD 698
+           D+G    +A  + R++G+            G       L   R TG E  L+D
+Sbjct: 27  DDGWDLRDANVVCRQLGFGGAVSASGSAYFGPGSGPIWLDNVRCTGTEASLSD 79
+
+
+>gnl|CDD|8977 smart00359, PUA, Putative RNA-binding Domain in PseudoUridine
+           synthase and Archaeosine transglycosylase;
+          Length = 78
+
+ Score = 21.8 bits (46), Expect = 7.0
+ Identities = 10/40 (25%), Positives = 17/40 (42%), Gaps = 5/40 (12%)
+
+Query: 135 AGVLEARGH-----NVTVIDPVEKLLAVGHYLESTVDIAE 169
+            GV+   G       V ++D   + L +G    S+ +IA 
+Sbjct: 22  PGVVRVDGDIKEGDVVVIVDEKGEPLGIGLANMSSEEIAR 61
+
+
+>gnl|CDD|7370 smart00486, POLBc, DNA polymerase type-B family; DNA polymerase
+           alpha, delta, epsilon and zeta chain (eukaryota), DNA
+           polymerases in archaea, DNA polymerase II in e. coli,
+           mitochondrial DNA polymerases and and virus DNA
+           polymerases
+          Length = 475
+
+ Score = 21.7 bits (45), Expect = 7.6
+ Identities = 8/33 (24%), Positives = 14/33 (42%), Gaps = 3/33 (9%)
+
+Query: 247 KSMSYQEAMELSYFGAKVLHPRTI---TPIAQF 276
+           K +  +   ++ Y G KVL P+      P+   
+Sbjct: 275 KGLEPELKKKVKYEGGKVLEPKKGFYENPVLVL 307
+
+
+>gnl|CDD|28 smart00035, CLa, CLUSTERIN alpha chain;
+          Length = 215
+
+ Score = 21.5 bits (45), Expect = 7.6
+ Identities = 7/13 (53%), Positives = 9/13 (68%)
+
+Query: 479 ALLEQLKRQQSWL 491
+           +LLEQL  Q  W+
+Sbjct: 129 SLLEQLNEQFGWV 141
+
+
+>gnl|CDD|8994 smart00450, RHOD, Rhodanese Homology Domain; An alpha beta fold
+           found duplicated in the Rhodanese protein. The the
+           Cysteine containing enzymatically active version of the
+           domain is also found in the CDC25 class of protein
+           phosphatases and a variety of proteins such as sulfide
+           dehydrogenases and stress proteins such as Senesence
+           specific protein 1 in plants, PspE and GlpE in bacteria
+           and cyanide and arsenate resistance proteins. Inactive
+           versions with a loss of the cysteine are also seen in
+           Dual specificity phosphatases, ubiquitin hydrolases from
+           yeast and in sulfuryltransferases. These are likely to
+           play a role in protein interactions
+          Length = 109
+
+ Score = 21.7 bits (45), Expect = 8.0
+ Identities = 14/56 (25%), Positives = 17/56 (30%)
+
+Query: 520 NWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVV 575
+              E L +  E  +L   +          PVIV C S    A         GF  V
+Sbjct: 38  PLSELLDRRGETDSLFEELLGSLGLDKDKPVIVYCRSGNRSAKAAWLLRELGFKNV 93
+
+
+Lambda     K      H
+   0.319    0.136    0.384 
+
+Gapped
+Lambda     K      H
+   0.267   0.0574    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 1100, Extension: 100
+Number of Hits to DB: 194,372
+Number of Sequences: 0
+Number of extensions: 14001
+Number of successful extensions: 17
+Number of sequences better than 10.0: 1
+Number of HSP's better than 10.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 17
+length of query: 438
+length of database: 75,508
+effective HSP length: 68
+effective length of query: 438
+effective length of database: 31,988
+effective search space: 14010744
+effective search space used: 24054976
+T: 11
+A: 40
+X1: 1600 (737.2 bits)
+X2: 3800 (1463.8 bits)
+X3: 6400 (2465.3 bits)
+S1: 4100 (1892.0 bits)
+S2: 43 (20.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.bls
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.bls	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.bls	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,245 @@
+BLASTP 2.1.3 [Apr-11-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,
+homoserine dehydrogenase I [Escherichia coli]
+         (820 letters)
+
+Database: ecoli.aa
+           4289 sequences; 1,358,990 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogen...  1567  0.0
+gb|AAC76922.1| (AE000468) aspartokinase II and homoserine dehydr...   332  1e-91
+gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensitive [E...   184  3e-47
+gb|AAC73282.1| (AE000126) uridylate kinase [Escherichia coli]          42  3e-04
+
+>gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia
+           coli]
+          Length = 820
+
+ Score = 1567 bits (4058), Expect = 0.0
+ Identities = 806/820 (98%), Positives = 806/820 (98%)
+
+Query: 1   MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60
+           MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA
+Sbjct: 1   MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60
+
+Query: 61  LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120
+           LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA
+Sbjct: 61  LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120
+
+Query: 121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180
+           ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP
+Sbjct: 121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180
+
+Query: 181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+           ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV
+Sbjct: 181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+
+Query: 241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+           PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD
+Sbjct: 241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+
+Query: 301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360
+           EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF
+Sbjct: 301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360
+
+Query: 361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420
+           CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL
+Sbjct: 361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420
+
+Query: 421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQXXXXXXXXXXXXXXAL 480
+           ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQ              AL
+Sbjct: 421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL 480
+
+Query: 481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540
+           LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL
+Sbjct: 481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540
+
+Query: 541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600
+           VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR
+Sbjct: 541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600
+
+Query: 601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660
+           RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA
+Sbjct: 601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660
+
+Query: 661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720
+           REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA
+Sbjct: 661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720
+
+Query: 721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780
+           NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF
+Sbjct: 721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780
+
+Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
+           YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
+Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
+
+
+>gb|AAC76922.1| (AE000468) aspartokinase II and homoserine dehydrogenase II
+           [Escherichia coli]
+          Length = 810
+
+ Score =  332 bits (850), Expect = 1e-91
+ Identities = 243/821 (29%), Positives = 403/821 (48%), Gaps = 44/821 (5%)
+
+Query: 5   KFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNI 64
+           KFGG+S+A+ + +LRVA I+   ++   +  V+SA    TN L+  ++ + + + +   +
+Sbjct: 16  KFGGSSLADVKCYLRVAGIMAEYSQPDDMM-VVSAAGSTTNQLINWLKLSQTDRLSAHQV 74
+
+Query: 65  SDAERIF-AELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALI 123
+               R +  +L++GL  A+    L  +  FV         +  GI+      D++ A ++
+Sbjct: 75  QQTLRRYQCDLISGLLPAEEADSL--ISAFVSDLERLAALLDSGIN------DAVYAEVV 126
+
+Query: 124 CRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADH 183
+             GE  S  +M+ VL  +G     +D  E L A      +   + E        ++   H
+Sbjct: 127 GHGEVWSARLMSAVLNQQGLPAAWLDAREFLRAER---AAQPQVDEGLSYPLLQQLLVQH 183
+
+Query: 184 ---MVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+               +++ GF + N  GE V+LGRNGSDYSA  + A        IW+DV GVY+ DPR+V
+Sbjct: 184 PGKRLVVTGFISRNNAGETVLLGRNGSDYSATQIGALAGVSRVTIWSDVAGVYSADPRKV 243
+
+Query: 241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+            DA LL  +   EA EL+   A VLH RT+ P++  +I   ++ +  P       G++R 
+Sbjct: 244 KDACLLPLLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQ-----GSTRI 298
+
+Query: 301 EDELP----VKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEY 356
+           E  L      + +++ +++ +     P  +        +   + RA++  + +   +   
+Sbjct: 299 ERVLASGTGARIVTSHDDVCLIEFQVPASQDFKLAHKEIDQILKRAQVRPLAVGVHNDRQ 358
+
+Query: 357 SISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKF 416
+            + FC        A + + E        GL   L + + LA++++VG G+        +F
+Sbjct: 359 LLQFCYTSEVADSALKILDEA-------GLPGELRLRQGLALVAMVGAGVTRNPLHCHRF 411
+
+Query: 417 FAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQXXXXXXXXXXXX 476
+           +  L    +      Q     S+  V+      + ++  HQ +F  ++            
+Sbjct: 412 WQQLKGQPVEFTW--QSDDGISLVAVLRTGPTESLIQGLHQSVFRAEKRIGLVLFGKGNI 469
+
+Query: 477 XXALLEQLKRQQSWLKNKH-IDLRVCGVANSKALLTNVHGLN----LENWQEELAQAKEP 531
+               LE   R+QS L  +   +  + GV +S+  L +  GL+    L  + +E  +  E 
+Sbjct: 470 GSRWLELFAREQSTLSARTGFEFVLAGVVDSRRSLLSYDGLDASRALAFFNDEAVEQDEE 529
+
+Query: 532 FNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQ 591
+                L   ++ +   + V++D T+SQ +ADQY DF   GFHV++ NK A  S  + Y Q
+Sbjct: 530 ----SLFLWMRAHPYDDLVVLDVTASQQLADQYLDFASHGFHVISANKLAGASDSNKYRQ 585
+
+Query: 592 LRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGM 651
+           +  A EK+ R +LY+  VGAGLP+   +++L+++GD ++  SGI SG+LS++F + D  +
+Sbjct: 586 IHDAFEKTGRHWLYNATVGAGLPINHTVRDLIDSGDTILSISGIFSGTLSWLFLQFDGSV 645
+
+Query: 652 SFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNA 711
+            F+E    A + G TEPDPRDDLSG DV RKL+ILARE G  +E   + +E ++PA    
+Sbjct: 646 PFTELVDQAWQQGLTEPDPRDDLSGKDVMRKLVILAREAGYNIEPDQVRVESLVPAHCEG 705
+
+Query: 712 EGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKV 771
+            G +  F  N  +L++    R+  AR+ G VLRYV   D +G  RV +  V  + PL  +
+Sbjct: 706 -GSIDHFFENGDELNEQMVQRLEAAREMGLVLRYVARFDANGKARVGVEAVREDHPLASL 764
+
+Query: 772 KNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLR 812
+              +N  A  S +Y+  PLV+RG GAG DVTA  + +D+ R
+Sbjct: 765 LPCDNVFAIESRWYRDNPLVIRGPGAGRDVTAGAIQSDINR 805
+
+
+>gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensitive [Escherichia coli]
+          Length = 449
+
+ Score =  184 bits (467), Expect = 3e-47
+ Identities = 142/471 (30%), Positives = 228/471 (48%), Gaps = 41/471 (8%)
+
+Query: 3   VLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQ---- 58
+           V KFGGTSVA+ +   R ADI+ S+A    V  VLSA A ITN LVA+ E    G+    
+Sbjct: 6   VSKFGGTSVADFDAMNRSADIVLSDANVRLV--VLSASAGITNLLVALAEGLEPGERFEK 63
+
+Query: 59  -DALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCP-- 115
+            DA+ NI                    F + +   + +    +I+ +L  I++L +    
+Sbjct: 64  LDAIRNIQ-------------------FAILERLRYPNVIREEIERLLENITVLAEAAAL 104
+
+Query: 116 ---DSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTR 172
+               ++   L+  GE MS  +   +L  R       D  + +     +  +  DIA    
+Sbjct: 105 ATSPALTDELVSHGELMSTLLFVEILRERDVQAQWFDVRKVMRTNDRFGRAEPDIAALAE 164
+
+Query: 173 RIAASRIPA--DHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVD 230
+             A   +P   + +V+  GF     KG    LGR GSDY+AA+LA  L A   +IWTDV 
+Sbjct: 165 LAALQLLPRLNEGLVITQGFIGSENKGRTTTLGRGGSDYTAALLAEALHASRVDIWTDVP 224
+
+Query: 231 GVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQA 290
+           G+YT DPR V  A+ +  +++ EA E++ FGAKVLHP T+ P  +  IP  + ++ +P+A
+Sbjct: 225 GIYTTDPRVVSAAKRIDEIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRA 284
+
+Query: 291 PGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT 350
+            GTL+  ++ E+    + ++   N  + ++    M    G  A VF  ++R  ISV LIT
+Sbjct: 285 GGTLV-CNKTENPPLFRALALRRNQTLLTLHSLNMLHSRGFLAEVFGILARHNISVDLIT 343
+
+Query: 351 QSSSEYSISFCVPQSDCV-RAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTL 409
+             +SE S++  +  +      +  + +   +EL    L  + V E LA+++++G+ +   
+Sbjct: 344 --TSEVSVALTLDTTGSTSTGDTLLTQSLLMEL--SALCRVEVEEGLALVALIGNDLSKA 399
+
+Query: 410 RGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLF 460
+            G+  + F  L   NI +  I  G+S  ++  +V  +DA   V+  H  LF
+Sbjct: 400 CGVGKEVFGVLEPFNIRM--ICYGASSHNLCFLVPGEDAEQVVQKLHSNLF 448
+
+
+>gb|AAC73282.1| (AE000126) uridylate kinase [Escherichia coli]
+          Length = 241
+
+ Score = 41.6 bits (96), Expect = 3e-04
+ Identities = 28/97 (28%), Positives = 44/97 (44%), Gaps = 8/97 (8%)
+
+Query: 199 LVVLGRNGSDYSAAVLAACLR-----ADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQE 253
+           +++    G+ +     AACLR     AD     T VDGV+T DP + P A + + ++Y E
+Sbjct: 132 VILSAGTGNPFFTTDSAACLRGIEIEADVVLKATKVDGVFTADPAKDPTATMYEQLTYSE 191
+
+Query: 254 AMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQA 290
+            +E      KV+     T     ++P  + N   P A
+Sbjct: 192 VLEKE---LKVMDLAAFTLARDHKLPIRVFNMNKPGA 225
+
+
+  Database: ecoli.aa
+    Posted date:  Dec 6, 2001  1:58 PM
+  Number of letters in database: 1,358,990
+  Number of sequences in database:  4289
+  
+Lambda     K      H
+   0.319    0.135    0.383 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 2022122
+Number of Sequences: 4289
+Number of extensions: 82424
+Number of successful extensions: 256
+Number of sequences better than 1.0e-03: 4
+Number of HSP's better than  0.0 without gapping: 3
+Number of HSP's successfully gapped in prelim test: 1
+Number of HSP's that attempted gapping in prelim test: 243
+Number of HSP's gapped (non-prelim): 4
+length of query: 820
+length of database: 1,358,990
+effective HSP length: 47
+effective length of query: 773
+effective length of database: 1,157,407
+effective search space: 894675611
+effective search space used: 894675611
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (21.8 bits)
+S2: 92 (40.0 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+>gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia coli]
+MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAA
+AQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHY
+LESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV
+PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSV
+SGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIIS
+VVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL
+LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV
+ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSL
+SYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA
+NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGND
+VTAAGVFADLLRTLSWKLGV

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.noseqs.wublastp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.noseqs.wublastp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.noseqs.wublastp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,96 @@
+BLASTP 2.0MP-WashU [18-Mar-2004] [linux26-ILP32F64 2004-03-13T04:13:13]
+
+Copyright (C) 1996-2004 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2004) http://blast.wustl.edu
+
+Query=  gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I, homoserine
+    dehydrogenase I [Escherichia coli]
+        (820 letters)
+
+Database:  ecoli.aa
+           4289 sequences; 1,358,990 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+gb|AAC73113.1| (AE000111) aspartokinase I, homoserine deh...  4141  0.        1
+gb|AAC76922.1| (AE000468) aspartokinase II and homoserine...   907  6.6e-93   1
+gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensi...   483  2.8e-47   1
+
+
+
+>gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I
+            [Escherichia coli]
+        Length = 820
+
+ Score = 4141 (1462.8 bits), Expect = 0., P = 0.
+ Identities = 820/820 (100%), Positives = 820/820 (100%)
+
+Query: 1 - 820
+Sbjct: 1 - 820
+
+
+>gb|AAC76922.1| (AE000468) aspartokinase II and homoserine dehydrogenase II
+            [Escherichia coli]
+        Length = 810
+
+ Score = 907 (324.3 bits), Expect = 6.6e-93, P = 6.6e-93
+ Identities = 250/818 (30%), Positives = 413/818 (50%)
+
+Query: 5 - 812
+Sbjct: 16 - 805
+
+
+>gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensitive [Escherichia
+            coli]
+        Length = 449
+
+ Score = 483 (175.1 bits), Expect = 2.8e-47, P = 2.8e-47
+ Identities = 149/467 (31%), Positives = 233/467 (49%)
+
+Query: 3 - 460
+Sbjct: 6 - 448
+
+
+Parameters:
+  -i t/data/ecolitst.fa
+  -d /data/blast/ecoli.aa
+  noseqs
+  E=1e-5
+  postsw
+
+  ctxfactor=1.00
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +0      0   BLOSUM62        0.319   0.135   0.384    same    same    same
+               Q=9,R=2         0.244   0.0300  0.180     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E     S W   T  X   E2     S2
+   +0      0      820       820   1.0e-05  121 3  11 22  0.19    34
+                                                     37  0.22    37
+
+
+Statistics:
+
+  Database:  /data/blast/ecoli.aa
+   Title:  ecoli.aa
+   Posted:  1:55:19 PM EDT Jun 3, 2004
+   Created:  1:55:19 PM EDT Jun 3, 2004
+   Format:  XDF-1
+   # of letters in database:  1,358,990
+   # of sequences in database:  4289
+   # of database sequences satisfying E:  3
+  No. of states in DFA:  618 (66 KB)
+  Total size of DFA:  358 KB (2179 KB)
+  Time to generate neighborhood:  0.00u 0.01s 0.01t  Elapsed: 00:00:00
+  No. of threads or processors used:  2
+  Search cpu time:  1.54u 0.03s 1.57t  Elapsed: 00:00:01
+  Total cpu time:  1.58u 0.04s 1.62t  Elapsed: 00:00:01
+  Start:  Thu Jun  3 14:00:59 2004   End:  Thu Jun  3 14:01:00 2004

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.wublastp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.wublastp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ecolitst.wublastp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,266 @@
+BLASTP 2.0MP-WashU [12-Feb-2001] [linux-i686 01:36:08 31-Jan-2001]
+
+Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2000) http://blast.wustl.edu
+
+Query=  gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I, homoserine
+    dehydrogenase I [Escherichia coli]
+        (820 letters)
+
+Database:  ecoli.aa
+           4289 sequences; 1,358,990 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+gb|AAC73113.1| (AE000111) aspartokinase I, homoserine deh...  4141  0.0       1
+gb|AAC76922.1| (AE000468) aspartokinase II and homoserine...   844  3.1e-86   1
+gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensi...   483  2.8e-47   1
+gb|AAC73282.1| (AE000126) uridylate kinase [Escherichia c...    97  0.0010    1
+
+
+
+>gb|AAC73113.1| (AE000111) aspartokinase I, homoserine dehydrogenase I
+            [Escherichia coli]
+        Length = 820
+
+ Score = 4141 (1462.8 bits), Expect = 0.0, P = 0.0
+ Identities = 820/820 (100%), Positives = 820/820 (100%)
+
+Query:     1 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60
+             MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA
+Sbjct:     1 MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA 60
+
+Query:    61 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120
+             LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA
+Sbjct:    61 LPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINA 120
+
+Query:   121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180
+             ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP
+Sbjct:   121 ALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIP 180
+
+Query:   181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+             ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV
+Sbjct:   181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+
+Query:   241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+             PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD
+Sbjct:   241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+
+Query:   301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360
+             EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF
+Sbjct:   301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360
+
+Query:   361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420
+             CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL
+Sbjct:   361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 420
+
+Query:   421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL 480
+             ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL
+Sbjct:   421 ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL 480
+
+Query:   481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540
+             LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL
+Sbjct:   481 LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRL 540
+
+Query:   541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600
+             VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR
+Sbjct:   541 VKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSR 600
+
+Query:   601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660
+             RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA
+Sbjct:   601 RKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLA 660
+
+Query:   661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720
+             REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA
+Sbjct:   661 REMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA 720
+
+Query:   721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780
+             NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF
+Sbjct:   721 NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAF 780
+
+Query:   781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
+             YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
+Sbjct:   781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
+
+
+>gb|AAC76922.1| (AE000468) aspartokinase II and homoserine dehydrogenase II
+            [Escherichia coli]
+        Length = 810
+
+ Score = 844 (302.2 bits), Expect = 3.1e-86, P = 3.1e-86
+ Identities = 222/705 (31%), Positives = 356/705 (50%)
+
+Query:   116 DSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIA 175
+             D++ A ++  GE  S  +M+ VL  +G     +D  E L A     +  VD   S   + 
+Sbjct:   119 DAVYAEVVGHGEVWSARLMSAVLNQQGLPAAWLDAREFLRAE-RAAQPQVDEGLSYPLLQ 177
+
+Query:   176 ASRI--PADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVY 233
+                +  P   +V+  GF + N  GE V+LGRNGSDYSA  + A        IW+DV GVY
+Sbjct:   178 QLLVQHPGKRLVV-TGFISRNNAGETVLLGRNGSDYSATQIGALAGVSRVTIWSDVAGVY 236
+
+Query:   234 TCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGT 293
+             + DPR+V DA LL  +   EA EL+   A VLH RT+ P++  +I   ++ +  P    T
+Sbjct:   237 SADPRKVKDACLLPLLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGST 296
+
+Query:   294 LIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSS 353
+              I           + +++ +++ +     P  +        +   + RA++  + +   +
+Sbjct:   297 RIERVLASGT-GARIVTSHDDVCLIEFQVPASQDFKLAHKEIDQILKRAQVRPLAVGVHN 355
+
+Query:   354 SEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGIS 413
+                 + FC        A + + E        GL   L + + LA++++VG G+ T   + 
+Sbjct:   356 DRQLLQFCYTSEVADSALKILDEA-------GLPGELRLRQGLALVAMVGAGV-TRNPLH 407
+
+Query:   414 A-KFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIG 472
+               +F+  L    +      Q     S+  V+      + ++  HQ +F  ++ I + + G
+Sbjct:   408 CHRFWQQLKGQPVEFTW--QSDDGISLVAVLRTGPTESLIQGLHQSVFRAEKRIGLVLFG 465
+
+Query:   473 VGGVGGALLEQLKRQQSWLKNKH-IDLRVCGVANSKALLTNVHGLN----LENWQEELAQ 527
+              G +G   LE   R+QS L  +   +  + GV +S+  L +  GL+    L  + +E  +
+Sbjct:   466 KGNIGSRWLELFAREQSTLSARTGFEFVLAGVVDSRRSLLSYDGLDASRALAFFNDEAVE 525
+
+Query:   528 AKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMD 587
+               E      L   ++ +   + V++D T+SQ +ADQY DF   GFHV++ NK A  S  +
+Sbjct:   526 QDEE----SLFLWMRAHPYDDLVVLDVTASQQLADQYLDFASHGFHVISANKLAGASDSN 581
+
+Query:   588 YYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKL 647
+              Y Q+  A EK+ R +LY+  VGAGLP+   +++L+++GD ++  SGI SG+LS++F + 
+Sbjct:   582 KYRQIHDAFEKTGRHWLYNATVGAGLPINHTVRDLIDSGDTILSISGIFSGTLSWLFLQF 641
+
+Query:   648 DEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPA 707
+             D  + F+E    A + G TEPDPRDDLSG DV RKL+ILARE G  +E   + +E ++PA
+Sbjct:   642 DGSVPFTELVDQAWQQGLTEPDPRDDLSGKDVMRKLVILAREAGYNIEPDQVRVESLVPA 701
+
+Query:   708 EFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDP 767
+                  G +  F  N  +L++    R+  AR+ G VLRYV   D +G  RV +  V  + P
+Sbjct:   702 HCEG-GSIDHFFENGDELNEQMVQRLEAAREMGLVLRYVARFDANGKARVGVEAVREDHP 760
+
+Query:   768 LFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLR 812
+             L  +   +N  A  S +Y+  PLV+RG GAG DVTA  + +D+ R
+Sbjct:   761 LASLLPCDNVFAIESRWYRDNPLVIRGPGAGRDVTAGAIQSDINR 805
+
+ Score = 321 (118.1 bits), Expect = 3.6e-27, P = 3.6e-27
+ Identities = 108/406 (26%), Positives = 191/406 (47%)
+
+Query:     5 KFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNI 64
+             KFGG+S+A+ + +LRVA I+   ++   +  V+SA    TN L+  ++ + + + +   +
+Sbjct:    16 KFGGSSLADVKCYLRVAGIMAEYSQPDDMM-VVSAAGSTTNQLINWLKLSQTDRLSAHQV 74
+
+Query:    65 SDAERIF-AELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLH-GISLLGQCPDSINAAL 122
+                 R +  +L++GL  A+    L  +  FV  +  ++  +L  GI+      D++ A +
+Sbjct:    75 QQTLRRYQCDLISGLLPAEEADSL--ISAFVS-DLERLAALLDSGIN------DAVYAEV 125
+
+Query:   123 ICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRI--P 180
+             +  GE  S  +M+ VL  +G     +D  E L A     +  VD   S   +    +  P
+Sbjct:   126 VGHGEVWSARLMSAVLNQQGLPAAWLDAREFLRAE-RAAQPQVDEGLSYPLLQQLLVQHP 184
+
+Query:   181 ADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV 240
+                +V+  GF + N  GE V+LGRNGSDYSA  + A        IW+DV GVY+ DPR+V
+Sbjct:   185 GKRLVV-TGFISRNNAGETVLLGRNGSDYSATQIGALAGVSRVTIWSDVAGVYSADPRKV 243
+
+Query:   241 PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRD 300
+              DA LL  +   EA EL+   A VLH RT+ P++  +I   ++ +  P    T I     
+Sbjct:   244 KDACLLPLLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLA 303
+
+Query:   301 EDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISF 360
+                   + +++ +++ +     P  +        +   + RA++  + +   +    + F
+Sbjct:   304 SGT-GARIVTSHDDVCLIEFQVPASQDFKLAHKEIDQILKRAQVRPLAVGVHNDRQLLQF 362
+
+Query:   361 CVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGM 406
+             C        A + + E        GL   L + + LA++++VG G+
+Sbjct:   363 CYTSEVADSALKILDEA-------GLPGELRLRQGLALVAMVGAGV 401
+
+
+>gb|AAC76994.1| (AE000475) aspartokinase III, lysine sensitive [Escherichia
+            coli]
+        Length = 449
+
+ Score = 483 (175.1 bits), Expect = 2.8e-47, P = 2.8e-47
+ Identities = 149/467 (31%), Positives = 233/467 (49%)
+
+Query:     3 VLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQ---- 58
+             V KFGGTSVA+ +   R ADI+ S+A    V  VLSA A ITN LVA+ E    G+    
+Sbjct:     6 VSKFGGTSVADFDAMNRSADIVLSDANVRLV--VLSASAGITNLLVALAEGLEPGERFEK 63
+
+Query:    59 -DALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDS 117
+              DA+ NI      FA +L  L      +P   ++  +++    I  VL   + L   P +
+Sbjct:    64 LDAIRNIQ-----FA-ILERLR-----YPNV-IREEIERLLENIT-VLAEAAALATSP-A 109
+
+Query:   118 INAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVG-HYLESTVDIAESTRRIAA 176
+             +   L+  GE MS  +   +L  R       D V K++     +  +  DIA      A 
+Sbjct:   110 LTDELVSHGELMSTLLFVEILRERDVQAQWFD-VRKVMRTNDRFGRAEPDIAALAELAAL 168
+
+Query:   177 SRIPA--DHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYT 234
+               +P   + +V+  GF     KG    LGR GSDY+AA+LA  L A   +IWTDV G+YT
+Sbjct:   169 QLLPRLNEGLVITQGFIGSENKGRTTTLGRGGSDYTAALLAEALHASRVDIWTDVPGIYT 228
+
+Query:   235 CDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTL 294
+              DPR V  A+ +  +++ EA E++ FGAKVLHP T+ P  +  IP  + ++ +P+A GTL
+Sbjct:   229 TDPRVVSAAKRIDEIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTL 288
+
+Query:   295 IGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSS 354
+             +  ++ E+    + ++   N  + ++    M    G  A VF  ++R  ISV LIT  +S
+Sbjct:   289 V-CNKTENPPLFRALALRRNQTLLTLHSLNMLHSRGFLAEVFGILARHNISVDLIT--TS 345
+
+Query:   355 EYSISFCVPQSDCVRA-ERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGIS 413
+             E S++  +  +      +  + +   +EL    L  + V E LA+++++G+ +    G+ 
+Sbjct:   346 EVSVALTLDTTGSTSTGDTLLTQSLLMELSA--LCRVEVEEGLALVALIGNDLSKACGVG 403
+
+Query:   414 AKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLF 460
+              + F  L   NI +  I  G+S  ++  +V  +DA   V+  H  LF
+Sbjct:   404 KEVFGVLEPFNIRM--ICYGASSHNLCFLVPGEDAEQVVQKLHSNLF 448
+
+
+>gb|AAC73282.1| (AE000126) uridylate kinase [Escherichia coli]
+        Length = 241
+
+ Score = 97 (39.2 bits), Expect = 0.0010, P = 0.0010
+ Identities = 30/98 (30%), Positives = 45/98 (45%)
+
+Query:   199 LVVLGRN-GSDYSAAVLAACLR-----ADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQ 252
+             +V+L    G+ +     AACLR     AD     T VDGV+T DP + P A + + ++Y 
+Sbjct:   131 VVILSAGTGNPFFTTDSAACLRGIEIEADVVLKATKVDGVFTADPAKDPTATMYEQLTYS 190
+
+Query:   253 EAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQA 290
+             E +E      KV+     T     ++P  + N   P A
+Sbjct:   191 EVLEKEL---KVMDLAAFTLARDHKLPIRVFNMNKPGA 225
+
+
+Parameters:
+  E=0.01
+
+  ctxfactor=1.00
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +0      0   BLOSUM62        0.319   0.136   0.384    same    same    same
+               Q=9,R=2         0.244   0.0300  0.180     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W   T  X   E2     S2
+   +0      0      820       820     0.010  93 3  11 22  0.19    34
+                                                    37  0.22    37
+
+
+Statistics:
+
+  Database:  /home/jes12/db/ecoli.aa
+   Title:  ecoli.aa
+   Posted:  2:52:35 PM EST Nov 18, 2001
+   Created:  9:46:47 AM EST Nov 18, 2001
+   Format:  XDF-1
+   # of letters in database:  1,358,990
+   # of sequences in database:  4289
+   # of database sequences satisfying E:  4
+  No. of states in DFA:  573 (61 KB)
+  Total size of DFA:  281 KB (1149 KB)
+  Time to generate neighborhood:  0.00u 0.02s 0.02t  Elapsed: 00:00:00
+  No. of threads or processors used:  1
+  Search cpu time:  1.58u 0.00s 1.58t  Elapsed: 00:00:01
+  Total cpu time:  1.59u 0.02s 1.61t  Elapsed: 00:00:01
+  Start:  Thu Dec  6 11:09:14 2001   End:  Thu Dec  6 11:09:15 2001

Added: trunk/packages/bioperl/branches/upstream/current/t/data/empty.bl2seq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/empty.bl2seq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/empty.bl2seq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,31 @@
+Query= 
+         (468 letters)
+
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 0
+Number of Sequences: 0
+Number of extensions: 0
+Number of successful extensions: 0
+Number of sequences better than 10.0: 0
+length of query: 468
+length of database: 2400
+effective HSP length: 9
+effective length of query: 459
+effective length of database: 2355
+effective search space:  1080945
+effective search space used:  1080945
+T: 0
+A: 30
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 9 (18.3 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/entrezgene.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/entrezgene.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/entrezgene.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,27729 @@
+Entrezgene ::= {
+  track-info {
+    geneid 1 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 14 ,
+        hour 13 ,
+        minute 15 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "19" } } } ,
+  gene {
+    locus "A1BG" ,
+    desc "alpha-1-B glycoprotein" ,
+    maploc "19q13.4" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 138670 } } ,
+    syn {
+      "A1B" ,
+      "ABG" ,
+      "GAB" ,
+      "HYST2477" ,
+      "DKFZp686F0970" } ,
+    locus-tag "HGNC:5" } ,
+  prot {
+    name {
+      "alpha 1B-glycoprotein" ,
+      "alpha-1B-glycoprotein" } } ,
+  summary "The protein encoded by this gene is a plasma glycoprotein of
+ unknown function. The protein shows sequence similarity to the variable
+ regions of some immunoglobulin supergene family member proteins." ,
+  location {
+    {
+      display-str "19q13.4" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 1 ,
+    src-str2 "1" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000019" ,
+      version 8 ,
+      seqs {
+        int {
+          from 63548355 ,
+          to 63556668 ,
+          strand minus ,
+          id
+            gi 42406306 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_130786" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 63548355 ,
+                to 63550206 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63550530 ,
+                to 63550817 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63553547 ,
+                to 63553828 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63554568 ,
+                to 63554864 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63555460 ,
+                to 63555732 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63556105 ,
+                to 63556374 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63556469 ,
+                to 63556504 ,
+                strand minus ,
+                id
+                  gi 42406306 } ,
+              int {
+                from 63556581 ,
+                to 63556668 ,
+                strand minus ,
+                id
+                  gi 42406306 } } } ,
+          seqs {
+            whole
+              gi 21071029 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_570602" ,
+              version 2 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 63550199 ,
+                    to 63550206 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63550530 ,
+                    to 63550817 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63553547 ,
+                    to 63553828 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63554568 ,
+                    to 63554864 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63555460 ,
+                    to 63555732 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63556105 ,
+                    to 63556374 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63556469 ,
+                    to 63556504 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } ,
+                  {
+                    from 63556581 ,
+                    to 63556614 ,
+                    strand minus ,
+                    id
+                      gi 42406306 } } } ,
+              seqs {
+                whole
+                  gi 21071030 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_011109" ,
+      version 15 ,
+      seqs {
+        int {
+          from 31124733 ,
+          to 31133046 ,
+          strand minus ,
+          id
+            gi 29800594 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_130786" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 31124733 ,
+                to 31126584 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31126908 ,
+                to 31127195 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31129925 ,
+                to 31130206 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31130946 ,
+                to 31131242 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31131838 ,
+                to 31132110 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31132483 ,
+                to 31132752 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31132847 ,
+                to 31132882 ,
+                strand minus ,
+                id
+                  gi 29800594 } ,
+              int {
+                from 31132959 ,
+                to 31133046 ,
+                strand minus ,
+                id
+                  gi 29800594 } } } ,
+          seqs {
+            whole
+              gi 21071029 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_570602" ,
+              version 2 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 31126577 ,
+                    to 31126584 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31126908 ,
+                    to 31127195 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31129925 ,
+                    to 31130206 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31130946 ,
+                    to 31131242 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31131838 ,
+                    to 31132110 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31132483 ,
+                    to 31132752 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31132847 ,
+                    to 31132882 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } ,
+                  {
+                    from 31132959 ,
+                    to 31132992 ,
+                    strand minus ,
+                    id
+                      gi 29800594 } } } ,
+              seqs {
+                whole
+                  gi 21071030 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086907" ,
+      version 1 ,
+      seqs {
+        int {
+          from 8163589 ,
+          to 8172398 ,
+          strand minus ,
+          id
+            gi 51475048 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_130786" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 8163589 ,
+                to 8165440 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8165763 ,
+                to 8166050 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8169274 ,
+                to 8169555 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8170297 ,
+                to 8170593 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8171190 ,
+                to 8171462 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8171835 ,
+                to 8172104 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8172199 ,
+                to 8172234 ,
+                strand minus ,
+                id
+                  gi 51475048 } ,
+              int {
+                from 8172311 ,
+                to 8172398 ,
+                strand minus ,
+                id
+                  gi 51475048 } } } ,
+          seqs {
+            whole
+              gi 21071029 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_570602" ,
+              version 2 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 8165433 ,
+                    to 8165440 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8165763 ,
+                    to 8166050 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8169274 ,
+                    to 8169555 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8170297 ,
+                    to 8170593 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8171190 ,
+                    to 8171462 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8171835 ,
+                    to 8172104 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8172199 ,
+                    to 8172234 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } ,
+                  {
+                    from 8172311 ,
+                    to 8172344 ,
+                    strand minus ,
+                    id
+                      gi 51475048 } } } ,
+              seqs {
+                whole
+                  gi 21071030 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "A1BG" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "alpha-1-B glycoprotein" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 3458201 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5554 } ,
+                  anchor "molecular_function unknown" ,
+                  post-text "evidence: ND" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4 } ,
+                  anchor "biological_process unknown" ,
+                  post-text "evidence: ND" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 3458201 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5576 } ,
+                  anchor "extracellular region" ,
+                  post-text "evidence: IDA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 11167 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=19&
+MAPS=genes-r-org/rat-chr/human%3A19,genes-r-org/mouse-chr/human%3A19,genes-r-o
+rg/human-chr19&query=e%3A1[id]+AND+gene[obj_type]&QSTR=a1bg&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 5 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15461460 ,
+        pmid 15221005 ,
+        pmid 14702039 ,
+        pmid 12477932 ,
+        pmid 8889549 ,
+        pmid 3610142 ,
+        pmid 3458201 ,
+        pmid 2591067 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 89991 } ,
+              anchor "SHGC-67307" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH80032" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH86145" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 152074 } ,
+              anchor "D11S2921" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GDB:461809" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 155756 } ,
+              anchor "D10S16" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "D10S23" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GDB:193809" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_130786" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 21071029 } ,
+              anchor "NM_130786" } } ,
+          seqs {
+            whole
+              gi 21071029 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_570602" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 21071030 } ,
+                  anchor "NP_570602" ,
+                  post-text "alpha 1B-glycoprotein" } } ,
+              seqs {
+                whole
+                  gi 21071030 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Consensus CDS (CCDS)" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "CCDS" ,
+                        tag
+                          str "CCDS12976.1" } ,
+                      anchor "CCDS12976.1" } } } ,
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 21071030 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 365 } ,
+                          anchor "smart00408: IGc2; Immunoglobulin C-2 Type" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 223 - 282  Blast Score: 103" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "AF414429,AK055885,AK056201" } ,
+                  anchor "AF414429,AK055885,AK056201" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC010642" ,
+          version 5 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 9929687 } ,
+              anchor "AC010642" } } ,
+          seqs {
+            int {
+              from 41119 ,
+              to 43581 ,
+              strand plus ,
+              id
+                gi 9929687 } } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB073611" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 51555784 } ,
+              anchor "AB073611" } } ,
+          seqs {
+            whole
+              gi 51555784 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAD38648" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 51555785 } ,
+                  anchor "BAD38648" } } ,
+              seqs {
+                whole
+                  gi 51555785 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF414429" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 15778555 } ,
+              anchor "AF414429" } } ,
+          seqs {
+            whole
+              gi 15778555 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAL07469" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 15778556 } ,
+                  anchor "AAL07469" } } ,
+              seqs {
+                whole
+                  gi 15778556 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK055885" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 16550723 } ,
+              anchor "AK055885" } } ,
+          seqs {
+            whole
+              gi 16550723 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK056201" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 16551539 } ,
+              anchor "AK056201" } } ,
+          seqs {
+            whole
+              gi 16551539 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC035719" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 23273475 } ,
+              anchor "BC035719" } } ,
+          seqs {
+            whole
+              gi 23273475 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH35719" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 23273476 } ,
+                  anchor "AAH35719" } } ,
+              seqs {
+                whole
+                  gi 23273476 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BX537419" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 31873339 } ,
+              anchor "BX537419" } } ,
+          seqs {
+            whole
+              gi 31873339 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAD97661" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 31873340 } ,
+                  anchor "CAD97661" } } ,
+              seqs {
+                whole
+                  gi 31873340 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P04217" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 46577680 } ,
+                  anchor "P04217" } } ,
+              seqs {
+                whole
+                  gi 46577680 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "1" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_011109.15&gene=A1BG&lid=1&from=31124734&to=31133047" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "1" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_011109.15&gene=A1BG&lid=1" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.529161" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.529161" } ,
+              anchor "Hs.529161" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=52
+9161" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "138670" } ,
+              anchor "138670" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "1" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+1[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 1 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=1" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:119638" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=AK055
+885" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+AK055885" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC035719" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC035719" } } } } } ,
+    {
+      type generif ,
+      text "A1BG-cysteine-rich secretory protein 3 complex displays a similar
+ function in protecting the circulation from a potentially harmful effect of
+ free CRISP-3" ,
+      version 0 ,
+      refs {
+        pmid 15461460 } ,
+      create-date
+        str "Nov  6 2004 10:01AM" ,
+      update-date
+        str "Nov  6 2004  3:27PM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 1 } ,
+    {
+      db "MIM" ,
+      tag
+        id 138670 } } ,
+  xtra-index-terms {
+    "LOC1" } }
+Entrezgene ::= {
+  track-info {
+    geneid 2 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 14 ,
+        hour 13 ,
+        minute 15 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "12" } } } ,
+  gene {
+    locus "A2M" ,
+    desc "alpha-2-macroglobulin" ,
+    maploc "12p13.3-p12.3" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 103950 } } ,
+    syn {
+      "FWP007" ,
+      "S863-7" ,
+      "DKFZp779B086" } ,
+    locus-tag "HGNC:7" } ,
+  prot {
+    name {
+      "alpha-2-macroglobulin" } } ,
+  summary "Alpha-2-macroglobulin is a protease inhibitor and cytokine
+ transporter. It inhibits many proteases, including trypsin, thrombin and
+ collagenase. A2M is implicated in Alzheimer disease (AD) due to its ability
+ to mediate the clearance and degradation of A-beta, the major component of
+ beta-amyloid deposits." ,
+  location {
+    {
+      display-str "12p13.3-p12.3" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 2 ,
+    src-str2 "2" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000012" ,
+      version 9 ,
+      seqs {
+        int {
+          from 9111576 ,
+          to 9159755 ,
+          strand minus ,
+          id
+            gi 51511728 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000014" ,
+          version 3 ,
+          genomic-coords {
+            mix {
+              int {
+                from 9111576 ,
+                to 9111701 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9112045 ,
+                to 9112086 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9112602 ,
+                to 9112704 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9113607 ,
+                to 9113675 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9114350 ,
+                to 9114440 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9116221 ,
+                to 9116348 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9116515 ,
+                to 9116733 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9118422 ,
+                to 9118645 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9120618 ,
+                to 9120798 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9121208 ,
+                to 9121282 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9121563 ,
+                to 9121719 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9123106 ,
+                to 9123193 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9123501 ,
+                to 9123677 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9123956 ,
+                to 9124039 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9133062 ,
+                to 9133113 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9133764 ,
+                to 9133885 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9134218 ,
+                to 9134344 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9135063 ,
+                to 9135291 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9137327 ,
+                to 9137441 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9138835 ,
+                to 9138946 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9139401 ,
+                to 9139562 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9142469 ,
+                to 9142618 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9143243 ,
+                to 9143385 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9145006 ,
+                to 9145069 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9145309 ,
+                to 9145536 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9148101 ,
+                to 9148262 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9150098 ,
+                to 9150207 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9150353 ,
+                to 9150467 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9151386 ,
+                to 9151506 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9153183 ,
+                to 9153267 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9153729 ,
+                to 9153897 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9154176 ,
+                to 9154196 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9156021 ,
+                to 9156073 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9156239 ,
+                to 9156398 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9157222 ,
+                to 9157405 ,
+                strand minus ,
+                id
+                  gi 51511728 } ,
+              int {
+                from 9159626 ,
+                to 9159755 ,
+                strand minus ,
+                id
+                  gi 51511728 } } } ,
+          seqs {
+            whole
+              gi 6226959 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_000005" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 9111685 ,
+                    to 9111701 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9112045 ,
+                    to 9112086 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9112602 ,
+                    to 9112704 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9113607 ,
+                    to 9113675 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9114350 ,
+                    to 9114440 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9116221 ,
+                    to 9116348 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9116515 ,
+                    to 9116733 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9118422 ,
+                    to 9118645 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9120618 ,
+                    to 9120798 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9121208 ,
+                    to 9121282 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9121563 ,
+                    to 9121719 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9123106 ,
+                    to 9123193 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9123501 ,
+                    to 9123677 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9123956 ,
+                    to 9124039 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9133062 ,
+                    to 9133113 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9133764 ,
+                    to 9133885 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9134218 ,
+                    to 9134344 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9135063 ,
+                    to 9135291 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9137327 ,
+                    to 9137441 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9138835 ,
+                    to 9138946 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9139401 ,
+                    to 9139562 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9142469 ,
+                    to 9142618 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9143243 ,
+                    to 9143385 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9145006 ,
+                    to 9145069 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9145309 ,
+                    to 9145536 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9148101 ,
+                    to 9148262 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9150098 ,
+                    to 9150207 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9150353 ,
+                    to 9150467 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9151386 ,
+                    to 9151506 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9153183 ,
+                    to 9153267 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9153729 ,
+                    to 9153897 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9154176 ,
+                    to 9154196 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9156021 ,
+                    to 9156073 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9156239 ,
+                    to 9156398 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9157222 ,
+                    to 9157405 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } ,
+                  {
+                    from 9159626 ,
+                    to 9159711 ,
+                    strand minus ,
+                    id
+                      gi 51511728 } } } ,
+              seqs {
+                whole
+                  gi 4557225 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_009714" ,
+      version 16 ,
+      seqs {
+        int {
+          from 1979283 ,
+          to 2027462 ,
+          strand minus ,
+          id
+            gi 37543832 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000014" ,
+          version 3 ,
+          genomic-coords {
+            mix {
+              int {
+                from 1979283 ,
+                to 1979408 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1979752 ,
+                to 1979793 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1980309 ,
+                to 1980411 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1981314 ,
+                to 1981382 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1982057 ,
+                to 1982147 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1983928 ,
+                to 1984055 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1984222 ,
+                to 1984440 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1986129 ,
+                to 1986352 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1988325 ,
+                to 1988505 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1988915 ,
+                to 1988989 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1989270 ,
+                to 1989426 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1990813 ,
+                to 1990900 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1991208 ,
+                to 1991384 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 1991663 ,
+                to 1991746 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2000769 ,
+                to 2000820 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2001471 ,
+                to 2001592 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2001925 ,
+                to 2002051 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2002770 ,
+                to 2002998 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2005034 ,
+                to 2005148 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2006542 ,
+                to 2006653 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2007108 ,
+                to 2007269 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2010176 ,
+                to 2010325 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2010950 ,
+                to 2011092 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2012713 ,
+                to 2012776 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2013016 ,
+                to 2013243 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2015808 ,
+                to 2015969 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2017805 ,
+                to 2017914 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2018060 ,
+                to 2018174 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2019093 ,
+                to 2019213 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2020890 ,
+                to 2020974 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2021436 ,
+                to 2021604 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2021883 ,
+                to 2021903 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2023728 ,
+                to 2023780 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2023946 ,
+                to 2024105 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2024929 ,
+                to 2025112 ,
+                strand minus ,
+                id
+                  gi 37543832 } ,
+              int {
+                from 2027333 ,
+                to 2027462 ,
+                strand minus ,
+                id
+                  gi 37543832 } } } ,
+          seqs {
+            whole
+              gi 6226959 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_000005" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 1979392 ,
+                    to 1979408 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1979752 ,
+                    to 1979793 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1980309 ,
+                    to 1980411 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1981314 ,
+                    to 1981382 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1982057 ,
+                    to 1982147 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1983928 ,
+                    to 1984055 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1984222 ,
+                    to 1984440 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1986129 ,
+                    to 1986352 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1988325 ,
+                    to 1988505 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1988915 ,
+                    to 1988989 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1989270 ,
+                    to 1989426 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1990813 ,
+                    to 1990900 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1991208 ,
+                    to 1991384 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 1991663 ,
+                    to 1991746 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2000769 ,
+                    to 2000820 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2001471 ,
+                    to 2001592 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2001925 ,
+                    to 2002051 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2002770 ,
+                    to 2002998 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2005034 ,
+                    to 2005148 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2006542 ,
+                    to 2006653 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2007108 ,
+                    to 2007269 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2010176 ,
+                    to 2010325 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2010950 ,
+                    to 2011092 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2012713 ,
+                    to 2012776 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2013016 ,
+                    to 2013243 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2015808 ,
+                    to 2015969 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2017805 ,
+                    to 2017914 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2018060 ,
+                    to 2018174 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2019093 ,
+                    to 2019213 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2020890 ,
+                    to 2020974 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2021436 ,
+                    to 2021604 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2021883 ,
+                    to 2021903 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2023728 ,
+                    to 2023780 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2023946 ,
+                    to 2024105 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2024929 ,
+                    to 2025112 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } ,
+                  {
+                    from 2027333 ,
+                    to 2027418 ,
+                    strand minus ,
+                    id
+                      gi 37543832 } } } ,
+              seqs {
+                whole
+                  gi 4557225 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086792" ,
+      version 1 ,
+      seqs {
+        int {
+          from 4173171 ,
+          to 4221277 ,
+          strand minus ,
+          id
+            gi 51471135 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000014" ,
+          version 3 ,
+          genomic-coords {
+            mix {
+              int {
+                from 4173171 ,
+                to 4173296 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4173640 ,
+                to 4173681 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4174197 ,
+                to 4174299 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4175201 ,
+                to 4175269 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4175944 ,
+                to 4176034 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4177816 ,
+                to 4177943 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4178110 ,
+                to 4178328 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4180017 ,
+                to 4180240 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4182213 ,
+                to 4182393 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4182803 ,
+                to 4182877 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4183158 ,
+                to 4183314 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4184702 ,
+                to 4184789 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4185097 ,
+                to 4185273 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4185552 ,
+                to 4185635 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4194661 ,
+                to 4194712 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4195363 ,
+                to 4195484 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4195817 ,
+                to 4195943 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4196662 ,
+                to 4196890 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4198926 ,
+                to 4199040 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4200364 ,
+                to 4200475 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4200930 ,
+                to 4201091 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4203997 ,
+                to 4204146 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4204771 ,
+                to 4204913 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4206534 ,
+                to 4206597 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4206837 ,
+                to 4207064 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4209629 ,
+                to 4209790 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4211627 ,
+                to 4211736 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4211882 ,
+                to 4211996 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4212916 ,
+                to 4213036 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4214705 ,
+                to 4214789 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4215251 ,
+                to 4215419 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4215698 ,
+                to 4215718 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4217543 ,
+                to 4217595 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4217761 ,
+                to 4217920 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4218744 ,
+                to 4218927 ,
+                strand minus ,
+                id
+                  gi 51471135 } ,
+              int {
+                from 4221148 ,
+                to 4221277 ,
+                strand minus ,
+                id
+                  gi 51471135 } } } ,
+          seqs {
+            whole
+              gi 6226959 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_000005" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 4173280 ,
+                    to 4173296 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4173640 ,
+                    to 4173681 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4174197 ,
+                    to 4174299 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4175201 ,
+                    to 4175269 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4175944 ,
+                    to 4176034 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4177816 ,
+                    to 4177943 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4178110 ,
+                    to 4178328 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4180017 ,
+                    to 4180240 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4182213 ,
+                    to 4182393 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4182803 ,
+                    to 4182877 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4183158 ,
+                    to 4183314 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4184702 ,
+                    to 4184789 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4185097 ,
+                    to 4185273 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4185552 ,
+                    to 4185635 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4194661 ,
+                    to 4194712 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4195363 ,
+                    to 4195484 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4195817 ,
+                    to 4195943 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4196662 ,
+                    to 4196890 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4198926 ,
+                    to 4199040 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4200364 ,
+                    to 4200475 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4200930 ,
+                    to 4201091 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4203997 ,
+                    to 4204146 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4204771 ,
+                    to 4204913 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4206534 ,
+                    to 4206597 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4206837 ,
+                    to 4207064 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4209629 ,
+                    to 4209790 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4211627 ,
+                    to 4211736 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4211882 ,
+                    to 4211996 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4212916 ,
+                    to 4213036 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4214705 ,
+                    to 4214789 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4215251 ,
+                    to 4215419 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4215698 ,
+                    to 4215718 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4217543 ,
+                    to 4217595 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4217761 ,
+                    to 4217920 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4218744 ,
+                    to 4218927 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } ,
+                  {
+                    from 4221148 ,
+                    to 4221233 ,
+                    strand minus ,
+                    id
+                      gi 51471135 } } } ,
+              seqs {
+                whole
+                  gi 4557225 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "A2M" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "alpha-2-macroglobulin" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 11435418 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19899 } ,
+                  anchor "enzyme binding" ,
+                  post-text "evidence: IPI" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9714181 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19966 } ,
+                  anchor "interleukin-1 binding" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10880251 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19959 } ,
+                  anchor "interleukin-8 binding" ,
+                  post-text "evidence: IPI" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8320 } ,
+                  anchor "protein carrier activity" ,
+                  post-text "evidence: NR" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4867 } ,
+                  anchor "serine-type endopeptidase inhibitor activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9714181 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 43120 } ,
+                  anchor "tumor necrosis factor binding" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 17114 } ,
+                  anchor "wide-spectrum protease inhibitor activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6886 } ,
+                  anchor "intracellular protein transport" ,
+                  post-text "evidence: NR" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 51260 } ,
+                  anchor "protein homooligomerization" ,
+                  post-text "evidence: NAS" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 14718574 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5576 } ,
+                  anchor "extracellular region" ,
+                  post-text "evidence: NAS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 37248 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=12&
+MAPS=genes-r-org/rat-chr/human%3A12,genes-r-org/mouse-chr/human%3A12,genes-r-o
+rg/human-chr12&query=e%3A2[id]+AND+gene[obj_type]&QSTR=a2m&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 7 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15511627 ,
+        pmid 15023809 ,
+        pmid 14760718 ,
+        pmid 14718574 ,
+        pmid 14715656 ,
+        pmid 14678766 ,
+        pmid 14675603 ,
+        pmid 14637088 ,
+        pmid 14506912 ,
+        pmid 12966032 ,
+        pmid 12755687 ,
+        pmid 12755614 ,
+        pmid 12631277 ,
+        pmid 12477932 ,
+        pmid 12221172 ,
+        pmid 12194978 ,
+        pmid 12175343 ,
+        pmid 12062545 ,
+        pmid 12042276 ,
+        pmid 12015318 ,
+        pmid 11916201 ,
+        pmid 11910179 ,
+        pmid 11901360 ,
+        pmid 11823454 ,
+        pmid 11811950 ,
+        pmid 11435418 ,
+        pmid 11162584 ,
+        pmid 11100124 ,
+        pmid 11023837 ,
+        pmid 10880251 ,
+        pmid 10731476 ,
+        pmid 10714547 ,
+        pmid 10652313 ,
+        pmid 10373500 ,
+        pmid 9831625 ,
+        pmid 9724081 ,
+        pmid 9714181 ,
+        pmid 8583572 ,
+        pmid 7680268 ,
+        pmid 7533769 ,
+        pmid 7533162 ,
+        pmid 7519849 ,
+        pmid 6191979 ,
+        pmid 6153632 ,
+        pmid 3289986 ,
+        pmid 2581245 ,
+        pmid 2476070 ,
+        pmid 2460294 ,
+        pmid 2459995 ,
+        pmid 2408344 ,
+        pmid 1707161 ,
+        pmid 1697852 ,
+        pmid 1370808 ,
+        pmid 1281457 ,
+        pmid 89758 } } ,
+    {
+      type comment ,
+      heading "Phenotypes" ,
+      version 0 ,
+      comment {
+        {
+          type phenotype ,
+          text "Alzheimer disease, susceptibility to" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  id 103950 } ,
+              anchor "MIM: 103950" } } } ,
+        {
+          type phenotype ,
+          text "Emphysema due to alpha-2-macroglobulin deficiency" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  id 103950 } ,
+              anchor "MIM: 103950" } } } } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 25036 } ,
+              anchor "RH1601" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH44109" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG1293" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 40245 } ,
+              anchor "SGC31674" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "EST130345" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH52474" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "WI-219" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 46849 } ,
+              anchor "RH11157" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH44108" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG1290R" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 95143 } ,
+              anchor "G44356" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "WIAF-1430-STS" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 147540 } ,
+              anchor "NoName" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 151442 } ,
+              anchor "NoName" ,
+              post-text "(e-PCR)" } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_000014" ,
+          version 3 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 6226959 } ,
+              anchor "NM_000014" } } ,
+          seqs {
+            whole
+              gi 6226959 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_000005" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4557225 } ,
+                  anchor "NP_000005" ,
+                  post-text "alpha-2-macroglobulin precursor" } } ,
+              seqs {
+                whole
+                  gi 4557225 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 4557225 } ,
+                      pre-text "(2)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5952 } ,
+                          anchor "pfam00207: A2M; Alpha-2-macroglobulin family" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 725 - 1463  Blast Score: 2322" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 25832 } ,
+                          anchor "pfam01835: A2M_N; Alpha-2-macroglobulin
+ family N-terminal region" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 21 - 628  Blast Score: 1948" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "M11313" } ,
+                  anchor "M11313" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF349032" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 13661813 } ,
+              anchor "AF349032" } } ,
+          seqs {
+            whole
+              gi 13661813 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK38109" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 13661814 } ,
+                  anchor "AAK38109" } } ,
+              seqs {
+                whole
+                  gi 13661814 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF349033" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 13661815 } ,
+              anchor "AF349033" } } ,
+          seqs {
+            whole
+              gi 13661815 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK38110" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 13661816 } ,
+                  anchor "AAK38110" } } ,
+              seqs {
+                whole
+                  gi 13661816 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X68728" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 450521 } ,
+              anchor "X68728" } } ,
+          seqs {
+            whole
+              gi 450521 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA48670" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 825615 } ,
+                  anchor "CAA48670" } } ,
+              seqs {
+                whole
+                  gi 825615 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "Z11711" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 24760 } ,
+              anchor "Z11711" } } ,
+          seqs {
+            whole
+              gi 24760 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA77774" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 24761 } ,
+                  anchor "CAA77774" } } ,
+              seqs {
+                whole
+                  gi 24761 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB209614" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 62088807 } ,
+              anchor "AB209614" } } ,
+          seqs {
+            whole
+              gi 62088807 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAD92851" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 62088808 } ,
+                  anchor "BAD92851" } } ,
+              seqs {
+                whole
+                  gi 62088808 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF109189" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 33337723 } ,
+              anchor "AF109189" } } ,
+          seqs {
+            whole
+              gi 33337723 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAQ13498" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 33337724 } ,
+                  anchor "AAQ13498" } } ,
+              seqs {
+                whole
+                  gi 33337724 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AY591530" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 46812314 } ,
+              anchor "AY591530" } } ,
+          seqs {
+            whole
+              gi 46812314 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAT02228" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 46812315 } ,
+                  anchor "AAT02228" } } ,
+              seqs {
+                whole
+                  gi 46812315 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC026246" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 45708660 } ,
+              anchor "BC026246" } } ,
+          seqs {
+            whole
+              gi 45708660 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH26246" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 45708661 } ,
+                  anchor "AAH26246" } } ,
+              seqs {
+                whole
+                  gi 45708661 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC040071" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 25303945 } ,
+              anchor "BC040071" } } ,
+          seqs {
+            whole
+              gi 25303945 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH40071" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 25303946 } ,
+                  anchor "AAH40071" } } ,
+              seqs {
+                whole
+                  gi 25303946 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BX647329" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34366357 } ,
+              anchor "BX647329" } } ,
+          seqs {
+            whole
+              gi 34366357 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "CR749334" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 51476395 } ,
+              anchor "CR749334" } } ,
+          seqs {
+            whole
+              gi 51476395 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAH18188" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 51476396 } ,
+                  anchor "CAH18188" } } ,
+              seqs {
+                whole
+                  gi 51476396 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "M11313" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 177869 } ,
+              anchor "M11313" } } ,
+          seqs {
+            whole
+              gi 177869 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA51551" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 177870 } ,
+                  anchor "AAA51551" } } ,
+              seqs {
+                whole
+                  gi 177870 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "M36501" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 177871 } ,
+              anchor "M36501" } } ,
+          seqs {
+            whole
+              gi 177871 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA51552" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 177872 } ,
+                  anchor "AAA51552" } } ,
+              seqs {
+                whole
+                  gi 177872 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P01023" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 112911 } ,
+                  anchor "P01023" } } ,
+              seqs {
+                whole
+                  gi 112911 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "2" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_009714.16&gene=A2M&lid=2&from=1979284&to=2027463" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "2" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_009714.16&gene=A2M&lid=2" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.212838" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.212838" } ,
+              anchor "Hs.212838" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=21
+2838" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "103950" } ,
+              anchor "103950" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "2" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+2[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 2 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=2" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:119639" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HGMD" ,
+                tag
+                  str "" } ,
+              url "http://www.uwcm.ac.uk/uwcm/mg/search/119639.html" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=CR749
+334" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+CR749334" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "KEGG" ,
+                tag
+                  str "" } ,
+              url "http://www.genome.ad.jp/dbget-bin/www_bget?hsa:2" } } } ,
+        {
+          type comment ,
+          text "PharmGKB" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "PharmGKB" ,
+                tag
+                  str "PA24357" } ,
+              anchor "PA24357" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC040071" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC040071" } } } } } ,
+    {
+      type comment ,
+      heading "Pathways" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "KEGG pathway: Alzheimer's disease" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "05010" ,
+                tag
+                  str "05010" } ,
+              anchor "05010" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa05010+2" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: Coagulation cascade" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "04610" ,
+                tag
+                  str "04610" } ,
+              anchor "04610" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa04610+2" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: Complement and coagulation cascades" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "04610" ,
+                tag
+                  str "04610" } ,
+              anchor "04610" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa04610+2" } } } ,
+        {
+          type comment ,
+          text "Reactome: Hemostasis" ,
+          version 0 ,
+          source {
+            {
+              anchor "49292" ,
+              url "http://www.reactome.org/cgi-bin/link?SOURCE=UniProt&ID=P010
+23" } } } } } ,
+    {
+      type generif ,
+      text "an important involvement of alpha2M in regulation of increased
+ proteolytic activity occurring in multiple sclerosis disease" ,
+      version 0 ,
+      refs {
+        pmid 15511627 } ,
+      create-date
+        str "Dec 18 2004 10:01AM" ,
+      update-date
+        str "Dec 18 2004  1:19PM" } ,
+    {
+      type generif ,
+      text "alpha2-macroglobulin inhibits human pepsin and gastricsin" ,
+      version 0 ,
+      refs {
+        pmid 14506912 } ,
+      create-date
+        str "Oct  7 2003 12:00AM" ,
+      update-date
+        str "Jul  7 2004  1:36PM" } ,
+    {
+      type generif ,
+      text "There is a significant genetic association of the 5 bp deletion
+ and two novel polymorphisms in alpha-2-macroglobulin alpha-2-macroglobulin
+ precursor with AD" ,
+      version 0 ,
+      refs {
+        pmid 12966032 } ,
+      create-date
+        str "Jun 27 2004  5:41PM" ,
+      update-date
+        str "Jun 27 2004  6:18PM" } ,
+    {
+      type generif ,
+      text "Alpha2-macroglobulin is a substrate and an endogenous inhibitor
+ for ADAMTS-4 and ADAMTS-5" ,
+      version 0 ,
+      refs {
+        pmid 14715656 } ,
+      create-date
+        str "Jun 14 2004  4:41PM" ,
+      update-date
+        str "Jun 14 2004  5:44PM" } ,
+    {
+      type generif ,
+      text "A2M-D allele played a weak Alzheimer disease protective role, and
+ APOE-E4 and A2M-G alleles might act synergistically in Alzheimer disease risk
+ for mainland Han Chinese." ,
+      version 0 ,
+      refs {
+        pmid 14675603 } ,
+      create-date
+        str "Feb 17 2004 12:00AM" ,
+      update-date
+        str "Apr 13 2004  1:35PM" } ,
+    {
+      type generif ,
+      text "Plasma from patients homozygous for the intronic deletion (DD)
+ showed normal alpha(2)M subunit size, conformation, and proteinase inhibitory
+ activity. Plasma alpha(2)M from two DD patients showed markedly increased
+ TGF-beta1 binding." ,
+      version 0 ,
+      refs {
+        pmid 14678766 } ,
+      create-date
+        str "Jan 20 2004 12:00AM" ,
+      update-date
+        str "Mar  7 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "The presence of MPO-G/G and A2M-Val/Val genotypes synergistically
+ increased the risk of AD (OR, 25.5; 95% CI, 4.65-139.75)." ,
+      version 0 ,
+      refs {
+        pmid 15023809 } ,
+      create-date
+        str "Mar 25 2004 12:00AM" ,
+      update-date
+        str "Apr 18 2004  7:02AM" } ,
+    {
+      type generif ,
+      text "alpha2-M deletion polymorphism is probably not associated with
+ functional deficiencies important in Alzheimer's disease pathology" ,
+      version 0 ,
+      refs {
+        pmid 14637088 } ,
+      create-date
+        str "Dec 31 2003 12:00AM" ,
+      update-date
+        str "Jan 11 2004  7:02AM" } ,
+    {
+      type generif ,
+      text "FGF-2 and this protein interact at specific binding sites,
+ involving different FGF-2 sequences." ,
+      version 0 ,
+      refs {
+        pmid 12755687 } ,
+      create-date
+        str "Sep  7 2003 12:00AM" ,
+      update-date
+        str "Sep 28 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "alpha(2)M-derived peptides target the receptor-binding sequence in
+ TGF-beta" ,
+      version 0 ,
+      refs {
+        pmid 12755614 } ,
+      create-date
+        str "Jun 25 2003 12:00AM" ,
+      update-date
+        str "Jul  6 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "These results suggest the possible involvement of cathepsin E in
+ disruption of the structural and functional integrity of alpha
+ 2-macroglobulin in the endolysosome system." ,
+      version 0 ,
+      refs {
+        pmid 12631277 } ,
+      create-date
+        str "May  8 2003 12:00AM" ,
+      update-date
+        str "May 11 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Alpha 2-macroglobulin enhances prothrombin activation and thrombin
+ potential by inhibiting the anticoagulant protein C/protein S system in cord
+ and adult plasma." ,
+      version 0 ,
+      refs {
+        pmid 12062545 } ,
+      create-date
+        str "Feb  6 2003 12:00AM" ,
+      update-date
+        str "Aug 17 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "relationship between serum VEGF levels, alpha(2)M levels and the
+ development of OHSS in hyperstimulated subjects undergoing IVF" ,
+      version 0 ,
+      refs {
+        pmid 12042276 } ,
+      create-date
+        str "Dec 16 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:00AM" } ,
+    {
+      type generif ,
+      text "Genetic association of alpha2-macroglobulin polymorphisms with
+ Alzheimer's disease" ,
+      version 0 ,
+      refs {
+        pmid 12221172 } ,
+      create-date
+        str "Sep 27 2002 12:00AM" ,
+      update-date
+        str "Oct  7 2002  8:10AM" } ,
+    {
+      type generif ,
+      text "Genetic association of argyrophilic grain disease with
+ polymorphisms in alpha-2 macroglobulin." ,
+      version 0 ,
+      refs {
+        pmid 12175343 } ,
+      create-date
+        str "Sep 24 2002 12:00AM" ,
+      update-date
+        str "Oct  7 2002  8:10AM" } ,
+    {
+      type generif ,
+      text "The three-dimensional structure of the dimer reveals its
+ structural organization in the tetrameric native and chymotrypsin alpha
+ 2-macroglobulin complexes." ,
+      version 0 ,
+      refs {
+        pmid 12015318 } ,
+      create-date
+        str "Sep  5 2002 12:00AM" ,
+      update-date
+        str "Sep 23 2002  6:27AM" } ,
+    {
+      type generif ,
+      text "has an important role in the AD-specific neurodegenerative process
+ but its exon 24 Val-1000-Ile polymorphism is not likely to be associated with
+ late-onset sporadic AD in the Hungarian population" ,
+      version 0 ,
+      refs {
+        pmid 11901360 } ,
+      create-date
+        str "Aug  6 2002 12:00AM" ,
+      update-date
+        str "Aug 28 2002  6:15PM" } ,
+    {
+      type generif ,
+      text "REVIEW: binds and neutralizes alfimeprase, which has direct
+ proteolytic activity against the fibrinogen Aalpha chain" ,
+      version 0 ,
+      refs {
+        pmid 11910179 } ,
+      create-date
+        str "Jun 12 2002 12:00AM" ,
+      update-date
+        str "Sep  7 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "distinct binding sites mediate interaction with beta-amyloid
+ peptide and growth factors" ,
+      version 0 ,
+      refs {
+        pmid 11823454 } ,
+      create-date
+        str "May  4 2002 12:00AM" ,
+      update-date
+        str "May 18 2002  6:08AM" } ,
+    {
+      type generif ,
+      text "Differential binding to ldl receptor related protein" ,
+      version 0 ,
+      refs {
+        pmid 11811950 } ,
+      create-date
+        str "Feb 14 2002 12:00AM" ,
+      update-date
+        str "Mar  4 2002  7:46AM" } ,
+    {
+      type generif ,
+      heading "HIV-1 protein interactions" ,
+      version 0 ,
+      comment {
+        {
+          type generif ,
+          text "Binding of HIV-1 Tat to LRP inhibits neuronal binding, uptake
+ and degradation of physiological ligands for LRP, including
+ alpha2-macroglobulin, apolipoprotein E4, amyloid precursor and amyloid
+ beta-protein" ,
+          version 0 ,
+          refs {
+            pmid 11100124 } ,
+          comment {
+            {
+              type comment ,
+              label "Tat" ,
+              accession "NP_057853" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "GeneID" ,
+                    tag
+                      id 155871 } } } } ,
+            {
+              type comment ,
+              accession "NP_000005" ,
+              version 1 } } ,
+          create-date
+            str "May 11 2004 12:48PM" ,
+          update-date
+            str "May 11 2004  1:10PM" } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 2 } ,
+    {
+      db "MIM" ,
+      tag
+        id 103950 } } ,
+  xtra-index-terms {
+    "LOC2" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 3 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 9 ,
+        second 0 } } ,
+  type pseudo ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "12" } } } ,
+  gene {
+    locus "A2MP" ,
+    desc "alpha-2-macroglobulin pseudogene" ,
+    maploc "12p13.3-p12.3" ,
+    locus-tag "HGNC:8" } ,
+  location {
+    {
+      display-str "12p13.3-p12.3" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 3 ,
+    src-str2 "3" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000012" ,
+      version 9 ,
+      seqs {
+        int {
+          from 9275468 ,
+          to 9278174 ,
+          strand minus ,
+          id
+            gi 51511728 ,
+          fuzz-from
+            lim lt ,
+          fuzz-to
+            lim gt } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_009714" ,
+      version 16 ,
+      seqs {
+        int {
+          from 2143175 ,
+          to 2145881 ,
+          strand minus ,
+          id
+            gi 37543832 ,
+          fuzz-from
+            lim lt ,
+          fuzz-to
+            lim gt } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086792" ,
+      version 1 ,
+      seqs {
+        int {
+          from 4337316 ,
+          to 4340022 ,
+          strand minus ,
+          id
+            gi 51471135 ,
+          fuzz-from
+            lim lt ,
+          fuzz-to
+            lim gt } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NG_001067" ,
+      version 1 ,
+      seqs {
+        int {
+          from 176 ,
+          to 2880 ,
+          strand plus ,
+          id
+            gi 20270626 ,
+          fuzz-from
+            lim lt ,
+          fuzz-to
+            lim gt } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "A2MP" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "alpha-2-macroglobulin pseudogene" ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 8 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "PROVISIONAL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 2478422 } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      comment {
+        {
+          type genomic ,
+          heading "Reference" ,
+          accession "NG_001067" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 20270626 } ,
+              anchor "NG_001067" } } ,
+          seqs {
+            int {
+              from 1 ,
+              to 3003 ,
+              strand plus ,
+              id
+                gi 20270626 } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "M24415" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 187575 } ,
+              anchor "M24415" } } ,
+          seqs {
+            int {
+              from 177 ,
+              to 2881 ,
+              strand plus ,
+              id
+                gi 187575 } } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "3" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_009714.16&gene=A2MP&lid=3&from=2143176&to=2145882" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "3" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_009714.16&gene=A2MP&lid=3" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 3 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=3" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:128103" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 3 } } ,
+  xtra-index-terms {
+    "LOC3" } }
+Entrezgene ::= {
+  track-info {
+    geneid 4 ,
+    status discontinued ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 9 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "1" } } } ,
+  gene {
+    desc "adenovirus-12 chromosome modification site 1C" ,
+    maploc "1q42-q43" ,
+    syn {
+      "A12M1" } } ,
+  prot {
+    name {
+      "adenovirus-12 chromosome modification site 1C" ,
+      "Adenovirus-12 chromosome modification site-1q1" } } ,
+  location {
+    {
+      display-str "1q42-q43" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 4 ,
+    src-str2 "4" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  comments {
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "WITHDRAWN" ,
+      version 0 } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:118950" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 4 } } ,
+  xtra-index-terms {
+    "LOC4" } }
+Entrezgene ::= {
+  track-info {
+    geneid 5 ,
+    status discontinued ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 9 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "1" } } } ,
+  gene {
+    desc "adenovirus-12 chromosome modification site 1A" ,
+    maploc "1p36" ,
+    syn {
+      "A12M2" } } ,
+  prot {
+    name {
+      "adenovirus-12 chromosome modification site 1A" ,
+      "Adenovirus-12 chromosome modification site-1p" } } ,
+  location {
+    {
+      display-str "1p36" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 5 ,
+    src-str2 "5" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  comments {
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "WITHDRAWN" ,
+      version 0 } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:118951" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 5 } } ,
+  xtra-index-terms {
+    "LOC5" } }
+Entrezgene ::= {
+  track-info {
+    geneid 6 ,
+    status discontinued ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 9 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "1" } } } ,
+  gene {
+    desc "adenovirus-12 chromosome modification site 1B" ,
+    maploc "1q21" ,
+    syn {
+      "A12M3" } } ,
+  prot {
+    name {
+      "adenovirus-12 chromosome modification site 1B" ,
+      "Adenovirus-12 chromosome modification site-1q2" } } ,
+  location {
+    {
+      display-str "1q21" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 6 ,
+    src-str2 "6" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  comments {
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "WITHDRAWN" ,
+      version 0 } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:118952" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 6 } } ,
+  xtra-index-terms {
+    "LOC6" } }
+Entrezgene ::= {
+  track-info {
+    geneid 7 ,
+    status discontinued ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 9 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "17" } } } ,
+  gene {
+    desc "adenovirus-12 chromosome modification site 17" ,
+    maploc "17q21-q22" ,
+    syn {
+      "A12M4" } } ,
+  prot {
+    name {
+      "adenovirus-12 chromosome modification site 17" } } ,
+  location {
+    {
+      display-str "17q21-q22" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 7 ,
+    src-str2 "7" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  comments {
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "WITHDRAWN" ,
+      version 0 } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:118953" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 7 } } ,
+  xtra-index-terms {
+    "LOC7" } }
+Entrezgene ::= {
+  track-info {
+    geneid 8 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 16 ,
+        hour 16 ,
+        minute 24 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "11" } } } ,
+  gene {
+    locus "AA" ,
+    desc "atrophia areata, peripapillary chorioretinal degeneration" ,
+    maploc "11p15" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 108985 } } ,
+    locus-tag "HGNC:11" } ,
+  location {
+    {
+      display-str "11p15" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 8 ,
+    src-str2 "8" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "AA" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "atrophia areata, peripapillary chorioretinal degeneration" ,
+          version 0 } } } ,
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 11 } } } } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 7795606 } } ,
+    {
+      type comment ,
+      heading "Phenotypes" ,
+      version 0 ,
+      comment {
+        {
+          type phenotype ,
+          text "Atrophia areata" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  id 108985 } ,
+              anchor "MIM: 108985" } } } ,
+        {
+          type phenotype ,
+          label "AA" ,
+          text "atrophia areata, peripapillary chorioretinal degeneration" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HGNC" ,
+                tag
+                  id 11 } ,
+              anchor "HGNC:11" } } } } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 31863 } ,
+              anchor "D11S902" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "072YD3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "AFM072yd3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GDB:187919" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "HS072YD3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH15041" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH3044" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH51860" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 31863 } ,
+              anchor "D11S902 (flanking)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "072YD3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "AFM072yd3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GDB:187919" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "HS072YD3" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH15041" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH3044" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH51860" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 67612 } ,
+              anchor "D11S1323" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "AFM248xf9" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH31202" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH73986" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-2070" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 67612 } ,
+              anchor "D11S1323 (flanking)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "AFM248xf9" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH31202" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH73986" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-2070" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "108985" } ,
+              anchor "108985" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:568984" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 8 } ,
+    {
+      db "MIM" ,
+      tag
+        id 108985 } } ,
+  xtra-index-terms {
+    "LOC8" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype" } ,
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 9 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 15 ,
+        hour 9 ,
+        minute 40 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "8" } } } ,
+  gene {
+    locus "NAT1" ,
+    desc "N-acetyltransferase 1 (arylamine N-acetyltransferase)" ,
+    maploc "8p23.1-p21.3" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 108345 } } ,
+    syn {
+      "AAC1" } ,
+    locus-tag "HGNC:7645" } ,
+  prot {
+    name {
+      "N-acetyltransferase 1" ,
+      "arylamine N-acetyltransferase 1" ,
+      "arylamide acetylase 1 (N-acetyltransferase 1)" } } ,
+  location {
+    {
+      display-str "8p23.1-p21.3" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 9 ,
+    src-str2 "9" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000008" ,
+      version 9 ,
+      seqs {
+        int {
+          from 18111894 ,
+          to 18125099 ,
+          strand plus ,
+          id
+            gi 51511724 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000662" ,
+          version 4 ,
+          genomic-coords {
+            mix {
+              int {
+                from 18111894 ,
+                to 18111968 ,
+                strand plus ,
+                id
+                  gi 51511724 } ,
+              int {
+                from 18121199 ,
+                to 18121277 ,
+                strand plus ,
+                id
+                  gi 51511724 } ,
+              int {
+                from 18123830 ,
+                to 18125099 ,
+                strand plus ,
+                id
+                  gi 51511724 } } } ,
+          seqs {
+            whole
+              gi 42741670 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000653" ,
+              version 3 ,
+              genomic-coords {
+                int {
+                  from 18123836 ,
+                  to 18124708 ,
+                  strand plus ,
+                  id
+                    gi 51511724 } } ,
+              seqs {
+                whole
+                  gi 42741671 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_030737" ,
+      version 9 ,
+      seqs {
+        int {
+          from 5912542 ,
+          to 5925747 ,
+          strand plus ,
+          id
+            gi 51466871 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000662" ,
+          version 4 ,
+          genomic-coords {
+            mix {
+              int {
+                from 5912542 ,
+                to 5912616 ,
+                strand plus ,
+                id
+                  gi 51466871 } ,
+              int {
+                from 5921847 ,
+                to 5921925 ,
+                strand plus ,
+                id
+                  gi 51466871 } ,
+              int {
+                from 5924478 ,
+                to 5925747 ,
+                strand plus ,
+                id
+                  gi 51466871 } } } ,
+          seqs {
+            whole
+              gi 42741670 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000653" ,
+              version 3 ,
+              genomic-coords {
+                int {
+                  from 5924484 ,
+                  to 5925356 ,
+                  strand plus ,
+                  id
+                    gi 51466871 } } ,
+              seqs {
+                whole
+                  gi 42741671 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086740" ,
+      version 1 ,
+      seqs {
+        int {
+          from 5593338 ,
+          to 5606579 ,
+          strand plus ,
+          id
+            gi 51467159 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000662" ,
+          version 4 ,
+          genomic-coords {
+            mix {
+              int {
+                from 5593338 ,
+                to 5593412 ,
+                strand plus ,
+                id
+                  gi 51467159 } ,
+              int {
+                from 5602680 ,
+                to 5602758 ,
+                strand plus ,
+                id
+                  gi 51467159 } ,
+              int {
+                from 5605310 ,
+                to 5606579 ,
+                strand plus ,
+                id
+                  gi 51467159 } } } ,
+          seqs {
+            whole
+              gi 42741670 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000653" ,
+              version 3 ,
+              genomic-coords {
+                int {
+                  from 5605316 ,
+                  to 5606188 ,
+                  strand plus ,
+                  id
+                    gi 51467159 } } ,
+              seqs {
+                whole
+                  gi 42741671 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "NAT1" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "N-acetyltransferase 1 (arylamine N-acetyltransferase)" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16407 } ,
+                  anchor "acetyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10908296 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4060 } ,
+                  anchor "arylamine N-acetyltransferase activity" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16740 } ,
+                  anchor "transferase activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8152 } ,
+                  anchor "metabolism" ,
+                  post-text "evidence: IEA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 37329 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=8&M
+APS=genes-r-org/rat-chr/human%3A8,genes-r-org/mouse-chr/human%3A8,genes-r-org/
+human-chr8&query=e%3A9[id]+AND+gene[obj_type]&QSTR=nat1&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 7645 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "VALIDATED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15487985 ,
+        pmid 15320760 ,
+        pmid 15226672 ,
+        pmid 15039438 ,
+        pmid 14705222 ,
+        pmid 14672957 ,
+        pmid 14608357 ,
+        pmid 14517345 ,
+        pmid 12946272 ,
+        pmid 12902152 ,
+        pmid 12888564 ,
+        pmid 12860276 ,
+        pmid 12832400 ,
+        pmid 12692115 ,
+        pmid 12682333 ,
+        pmid 12485520 ,
+        pmid 12477932 ,
+        pmid 12355549 ,
+        pmid 12052143 ,
+        pmid 12037388 ,
+        pmid 11955677 ,
+        pmid 11955676 ,
+        pmid 11927838 ,
+        pmid 11470991 ,
+        pmid 10908296 ,
+        pmid 10862520 ,
+        pmid 10767335 ,
+        pmid 9168895 ,
+        pmid 8110178 ,
+        pmid 7773298 ,
+        pmid 2340091 ,
+        pmid 1968463 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 24445 } ,
+              anchor "STS-R79401" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH40594" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "sts-R79401" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 57372 } ,
+              anchor "STS-D90041" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH75822" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "sts-D90041" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 63787 } ,
+              anchor "RH70671" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "U52007" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 271536 } ,
+              anchor "PMC165273P2" ,
+              post-text "(e-PCR)" } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_000662" ,
+          version 4 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 42741670 } ,
+              anchor "NM_000662" } } ,
+          seqs {
+            whole
+              gi 42741670 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_000653" ,
+              version 3 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 42741671 } ,
+                  anchor "NP_000653" ,
+                  post-text "N-acetyltransferase 1" } } ,
+              seqs {
+                whole
+                  gi 42741671 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Consensus CDS (CCDS)" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "CCDS" ,
+                        tag
+                          str "CCDS6007.1" } ,
+                      anchor "CCDS6007.1" } } } ,
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 42741671 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 1353 } ,
+                          anchor "pfam00797: Acetyltransf_2;
+ N-acetyltransferase" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 20 - 280  Blast Score: 1195" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "AV754344,BC047666,BQ024509" } ,
+                  anchor "AV754344,BC047666,BQ024509" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF008204" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2618821 } ,
+              anchor "AF008204" } } ,
+          seqs {
+            whole
+              gi 2618821 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAB84384" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2258431 } ,
+                  anchor "AAB84384" } } ,
+              seqs {
+                whole
+                  gi 2258431 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF032677" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2641552 } ,
+              anchor "AF032677" } } ,
+          seqs {
+            whole
+              gi 2641552 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAB86878" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2641553 } ,
+                  anchor "AAB86878" } } ,
+              seqs {
+                whole
+                  gi 2641553 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF032678" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2641554 } ,
+              anchor "AF032678" } } ,
+          seqs {
+            whole
+              gi 2641554 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAB86879" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2641555 } ,
+                  anchor "AAB86879" } } ,
+              seqs {
+                whole
+                  gi 2641555 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF067408" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3265061 } ,
+              anchor "AF067408" } } ,
+          seqs {
+            whole
+              gi 3265061 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC24707" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3265062 } ,
+                  anchor "AAC24707" } } ,
+              seqs {
+                whole
+                  gi 3265062 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF071552" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3265151 } ,
+              anchor "AF071552" } } ,
+          seqs {
+            whole
+              gi 3265151 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC24712" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3265152 } ,
+                  anchor "AAC24712" } } ,
+              seqs {
+                whole
+                  gi 3265152 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF082903" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3435297 } ,
+              anchor "AF082903" } } ,
+          seqs {
+            whole
+              gi 3435297 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD13343" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3435298 } ,
+                  anchor "AAD13343" } } ,
+              seqs {
+                whole
+                  gi 3435298 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF082904" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3435299 } ,
+              anchor "AF082904" } } ,
+          seqs {
+            whole
+              gi 3435299 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC32388" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3435300 } ,
+                  anchor "AAC32388" } } ,
+              seqs {
+                whole
+                  gi 3435300 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF308866" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 10834832 } ,
+              anchor "AF308866" } } ,
+          seqs {
+            whole
+              gi 10834832 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAG23842" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 10834833 } ,
+                  anchor "AAG23842" } } ,
+              seqs {
+                whole
+                  gi 10834833 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AJ278017" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 9663144 } ,
+              anchor "AJ278017" } } ,
+          seqs {
+            whole
+              gi 9663144 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAC01128" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 9663145 } ,
+                  anchor "CAC01128" } } ,
+              seqs {
+                whole
+                  gi 9663145 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AJ307007" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 14018355 } ,
+              anchor "AJ307007" } } ,
+          seqs {
+            whole
+              gi 14018355 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAC38345" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14018356 } ,
+                  anchor "CAC38345" } } ,
+              seqs {
+                whole
+                  gi 14018356 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AY338489" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 32815075 } ,
+              anchor "AY338489" } } ,
+          seqs {
+            whole
+              gi 32815075 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAP88036" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 32815076 } ,
+                  anchor "AAP88036" } } ,
+              seqs {
+                whole
+                  gi 32815076 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AY376850" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34555772 } ,
+              anchor "AY376850" } } ,
+          seqs {
+            whole
+              gi 34555772 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAQ74989" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 34555773 } ,
+                  anchor "AAQ74989" } } ,
+              seqs {
+                whole
+                  gi 34555773 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U80835" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2245375 } ,
+              anchor "U80835" } } ,
+          seqs {
+            whole
+              gi 2245375 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAB62398" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2245376 } ,
+                  anchor "AAB62398" } } ,
+              seqs {
+                whole
+                  gi 2245376 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X17059" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34993 } ,
+              anchor "X17059" } } ,
+          seqs {
+            whole
+              gi 34993 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA34905" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 34994 } ,
+                  anchor "CAA34905" } } ,
+              seqs {
+                whole
+                  gi 34994 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AV754344" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 10912192 } ,
+              anchor "AV754344" } } ,
+          seqs {
+            whole
+              gi 10912192 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC013732" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 16975545 } ,
+              anchor "BC013732" } } ,
+          seqs {
+            whole
+              gi 16975545 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC047666" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 28838335 } ,
+              anchor "BC047666" } } ,
+          seqs {
+            whole
+              gi 28838335 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH47666" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 28838336 } ,
+                  anchor "AAH47666" } } ,
+              seqs {
+                whole
+                  gi 28838336 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BQ024509" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 19759788 } ,
+              anchor "BQ024509" } } ,
+          seqs {
+            whole
+              gi 19759788 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BX647521" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34366678 } ,
+              anchor "BX647521" } } ,
+          seqs {
+            whole
+              gi 34366678 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "D90041" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219413 } ,
+              anchor "D90041" } } ,
+          seqs {
+            whole
+              gi 219413 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA14095" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219414 } ,
+                  anchor "BAA14095" } } ,
+              seqs {
+                whole
+                  gi 219414 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P18440" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 114234 } ,
+                  anchor "P18440" } } ,
+              seqs {
+                whole
+                  gi 114234 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "9" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_030737.9&gene=NAT1&lid=9&from=5912543&to=5925748" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "9" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_030737.9&gene=NAT1&lid=9" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.155956" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.155956" } ,
+              anchor "Hs.155956" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=15
+5956" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "108345" } ,
+              anchor "108345" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "9" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+9[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 9 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=9" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:125364" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=BC013
+732" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+BC013732" } } } ,
+        {
+          type comment ,
+          text "PharmGKB" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "PharmGKB" ,
+                tag
+                  str "PA17" } ,
+              anchor "PA17" } } } ,
+        {
+          type comment ,
+          text "Arylamine N-Acetyltransferase Nomenclature" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Arylamine N-Acetyltransferase Nomenclature" ,
+                tag
+                  str "Arylamine N-Acetyltransferase Nomenclature" } ,
+              anchor "Arylamine N-Acetyltransferase Nomenclature" ,
+              url "http://www.louisville.edu/medschool/pharmacology/NAT.html" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC047666" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC047666" } } } } } ,
+    {
+      type generif ,
+      text "In breast, NAT1 mRNA is transcribed from a strong promoter located
+ 11.8 kb upstream of the translated exon, and the mature spliced mRNA includes
+ at least one additional non-coding exon." ,
+      version 0 ,
+      refs {
+        pmid 15226672 } ,
+      create-date
+        str "Mar 26 2005 10:01AM" ,
+      update-date
+        str "Mar 26 2005  1:53PM" } ,
+    {
+      type generif ,
+      text "Polymorphism at the NAT1 locus has been associated with the
+ existence of at least 26 allelic variants, generating phenotypic variations
+ in terms NAT-1 catalytic activity. This genetic variation affects the
+ acetylator status of individuals." ,
+      version 0 ,
+      refs {
+        pmid 15320760 } ,
+      create-date
+        str "Dec 18 2004 10:01AM" ,
+      update-date
+        str "Mar 17 2005  6:52AM" } ,
+    {
+      type generif ,
+      text "NAT1 exists in the cell in either a stable acetylated state or an
+ unstable non-acetylated state and mutations in the NAT1 gene that prevent
+ protein acetylation produce a slow acetylator phenotype" ,
+      version 0 ,
+      refs {
+        pmid 15039438 } ,
+      create-date
+        str "Jul  6 2004 11:50AM" ,
+      update-date
+        str "Jul  6 2004 12:11PM" } ,
+    {
+      type generif ,
+      text "Analysis of the effect of active NAT-1 overexpression in a normal
+ luminal epithelial-derived cell line demonstrated enhanced growth properties
+ and etoposide resistance relative to control cells." ,
+      version 0 ,
+      refs {
+        pmid 14517345 } ,
+      create-date
+        str "Jun 23 2004  4:43PM" ,
+      update-date
+        str "Jun 23 2004  5:16PM" } ,
+    {
+      type generif ,
+      text "cellular generation of peroxynitrite may contribute to
+ carcinogenesis and tumor progression by weakening key cellular defense
+ enzymes such as arylamine N-acetyltransferase 1 (NAT1)" ,
+      version 0 ,
+      refs {
+        pmid 14672957 } ,
+      create-date
+        str "Mar 24 2004 12:00AM" ,
+      update-date
+        str "May  9 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "although there is little overall association between NAT genotypes
+ and risk of developing systemic lupus erythematosus, the interaction between
+ NAT1 and NAT2 and specific exposures such as hair dyes may be important." ,
+      version 0 ,
+      refs {
+        pmid 14705222 } ,
+      create-date
+        str "Apr 16 2004 12:00AM" ,
+      update-date
+        str "May  2 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "A putative RUNX1 binding site variant of NAT9 is associated with
+ susceptibility to psoriasis" ,
+      version 0 ,
+      refs {
+        pmid 14608357 } ,
+      create-date
+        str "Dec 18 2003 12:00AM" ,
+      update-date
+        str "Jan 11 2004  7:03AM" } ,
+    {
+      type generif ,
+      text "Identification of minimal promoter sequences for transcription
+ factor binding sites in the human N-acetyltransferase Type I gene. that binds" ,
+      version 0 ,
+      refs {
+        pmid 12946272 } ,
+      create-date
+        str "Nov 24 2003 12:00AM" ,
+      update-date
+        str "Dec  7 2003  7:02AM" } ,
+    {
+      type generif ,
+      text "Oxidative stress and cellular redox status may regulate NAT1
+ activity and have important consequences with regard to drug
+ biotransformation and cancer risk." ,
+      version 0 ,
+      refs {
+        pmid 12832400 } ,
+      create-date
+        str "Oct  7 2003 12:00AM" ,
+      update-date
+        str "Nov 16 2003  7:02AM" } ,
+    {
+      type generif ,
+      text "Variation in capacity for acetylation of 4ABP and PABA resulting
+ from human NAT1 transgene is insufficient to affect 4ABP genotoxicity in
+ mouse liver." ,
+      version 0 ,
+      refs {
+        pmid 12902152 } ,
+      create-date
+        str "Sep  6 2003 12:00AM" ,
+      update-date
+        str "Sep 14 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "It is unlikely that the NAT1*10 or NAT2 rapid/intermediate
+ genotypes are related to stomach cancer risk." ,
+      version 0 ,
+      refs {
+        pmid 12692115 } ,
+      create-date
+        str "Sep  4 2003 12:00AM" ,
+      update-date
+        str "Sep 28 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "genetic polymorphisms of NAT1 and NAT2 have no independent effect
+ on breast cancer risk, but they modulate breast cancer risk in the presence
+ of GSTM1 and GSTT1 null genotypes." ,
+      version 0 ,
+      refs {
+        pmid 12860276 } ,
+      create-date
+        str "Aug 18 2003 12:00AM" ,
+      update-date
+        str "Aug 24 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT1 * 10 has increased risk of sporadic colorectal adenocarcinoma
+ and significantly related to the later stage tumors, is not significantly
+ related to the tumor location" ,
+      version 0 ,
+      refs {
+        pmid 12485520 } ,
+      create-date
+        str "Jun 25 2003 12:00AM" ,
+      update-date
+        str "Jul 13 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Single nucleotide polymorphisms of NAT1 and NAT2, and acetylation
+ haplotype were not associated with increased risk for Parkinson disease" ,
+      version 0 ,
+      refs {
+        pmid 12682333 } ,
+      create-date
+        str "Apr 11 2003 12:00AM" ,
+      update-date
+        str "Apr 27 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT1 polymorphism (NAT1*10) indicates increased susceptibility to
+ prostate cancer" ,
+      version 0 ,
+      refs {
+        pmid 12355549 } ,
+      create-date
+        str "Oct 31 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "susceptibility gene for multifactorial adverse effects and
+ xenobiotic-related diseases (review)" ,
+      version 0 ,
+      refs {
+        pmid 12052143 } ,
+      create-date
+        str "Sep 12 2002 12:00AM" ,
+      update-date
+        str "Sep 16 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "NAT1 genotype acts as a modifier of diisothionate
+ exposure-associated asthma risk" ,
+      version 0 ,
+      refs {
+        pmid 11927838 } ,
+      create-date
+        str "Aug 30 2002 12:00AM" ,
+      update-date
+        str "Sep 16 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "NAT1 polymorphisms may be correlated with an increased risk of
+ larynx cancer" ,
+      version 0 ,
+      refs {
+        pmid 12037388 } ,
+      create-date
+        str "Aug 19 2002 12:00AM" ,
+      update-date
+        str "Aug 28 2002  6:15PM" } ,
+    {
+      type generif ,
+      text "paclitaxel affected human leukemia HL-60 cells arylamine
+ N-acetyltransferase (NAT) activity and DNA-2-aminofluorene adduct formation." ,
+      version 0 ,
+      refs {
+        pmid 11955677 } ,
+      create-date
+        str "Jun 19 2002 12:00AM" ,
+      update-date
+        str "Jun 24 2002  6:31AM" } ,
+    {
+      type generif ,
+      text "paclitaxel is an uncompetitive inhibitor to arylamine
+ N-acetyltransferase (NAT) enzyme" ,
+      version 0 ,
+      refs {
+        pmid 11955676 } ,
+      create-date
+        str "Jun 19 2002 12:00AM" ,
+      update-date
+        str "Jun 24 2002  6:31AM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 9 } ,
+    {
+      db "MIM" ,
+      tag
+        id 108345 } } ,
+  xtra-index-terms {
+    "LOC9" } }
+Entrezgene ::= {
+  track-info {
+    geneid 10 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 17 ,
+        hour 11 ,
+        minute 9 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "8" } } } ,
+  gene {
+    locus "NAT2" ,
+    desc "N-acetyltransferase 2 (arylamine N-acetyltransferase)" ,
+    maploc "8p22" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 243400 } } ,
+    syn {
+      "AAC2" } ,
+    locus-tag "HGNC:7646" } ,
+  prot {
+    name {
+      "arylamide acetylase 2" ,
+      "Arylamine N-acetyltransferase-2" ,
+      "arylamide acetylase 2 (N-acetyltransferase 2, isoniazid inactivation)" } } ,
+  summary "The intronless NAT2 gene encodes N-acetyltransferase 2 (arylamine
+ N-acetyltransferase 2). This enzyme functions to both activate and deactivate
+ arylamine and hydrazine drugs and carcinogens. Polymorphisms in this gene are
+ reponsible for the N-acetylation polymorphism in which human populations
+ segregate into rapid,intermediate, and slow acetylator phenotypes.
+ Polymorphisms in NAT2 are also associated with higher incidences of cancer
+ and drug toxicity.A second arylamine N-acetyltransferase gene (NAT1) is
+ located near NAT2." ,
+  location {
+    {
+      display-str "8p22" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 10 ,
+    src-str2 "10" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000008" ,
+      version 9 ,
+      seqs {
+        int {
+          from 18293034 ,
+          to 18302961 ,
+          strand plus ,
+          id
+            gi 51511724 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000015" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 18293034 ,
+                to 18293134 ,
+                strand plus ,
+                id
+                  gi 51511724 } ,
+              int {
+                from 18301787 ,
+                to 18302961 ,
+                strand plus ,
+                id
+                  gi 51511724 } } } ,
+          seqs {
+            whole
+              gi 4557782 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000006" ,
+              version 1 ,
+              genomic-coords {
+                int {
+                  from 18301793 ,
+                  to 18302665 ,
+                  strand plus ,
+                  id
+                    gi 51511724 } } ,
+              seqs {
+                whole
+                  gi 4557783 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_030737" ,
+      version 9 ,
+      seqs {
+        int {
+          from 6093682 ,
+          to 6103609 ,
+          strand plus ,
+          id
+            gi 51466871 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000015" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 6093682 ,
+                to 6093782 ,
+                strand plus ,
+                id
+                  gi 51466871 } ,
+              int {
+                from 6102435 ,
+                to 6103609 ,
+                strand plus ,
+                id
+                  gi 51466871 } } } ,
+          seqs {
+            whole
+              gi 4557782 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000006" ,
+              version 1 ,
+              genomic-coords {
+                int {
+                  from 6102441 ,
+                  to 6103313 ,
+                  strand plus ,
+                  id
+                    gi 51466871 } } ,
+              seqs {
+                whole
+                  gi 4557783 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086740" ,
+      version 1 ,
+      seqs {
+        int {
+          from 5773920 ,
+          to 5783845 ,
+          strand plus ,
+          id
+            gi 51467159 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_000015" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 5773920 ,
+                to 5774020 ,
+                strand plus ,
+                id
+                  gi 51467159 } ,
+              int {
+                from 5782671 ,
+                to 5783845 ,
+                strand plus ,
+                id
+                  gi 51467159 } } } ,
+          seqs {
+            whole
+              gi 4557782 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_000006" ,
+              version 1 ,
+              genomic-coords {
+                int {
+                  from 5782677 ,
+                  to 5783549 ,
+                  strand plus ,
+                  id
+                    gi 51467159 } } ,
+              seqs {
+                whole
+                  gi 4557783 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.5" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.5" } } } } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "NAT2" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "N-acetyltransferase 2 (arylamine N-acetyltransferase)" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16407 } ,
+                  anchor "acetyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 2340091 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4060 } ,
+                  anchor "arylamine N-acetyltransferase activity" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16740 } ,
+                  anchor "transferase activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8152 } ,
+                  anchor "metabolism" ,
+                  post-text "evidence: IEA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 32047 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=8&M
+APS=genes-r-org/rat-chr/human%3A8,genes-r-org/mouse-chr/human%3A8,genes-r-org/
+human-chr8&query=e%3A10[id]+AND+gene[obj_type]&QSTR=nat2&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 7646 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15671210 ,
+        pmid 15637738 ,
+        pmid 15609332 ,
+        pmid 15588473 ,
+        pmid 15225898 ,
+        pmid 15162844 ,
+        pmid 15005011 ,
+        pmid 14747882 ,
+        pmid 14705222 ,
+        pmid 14648207 ,
+        pmid 14618622 ,
+        pmid 14528063 ,
+        pmid 12884528 ,
+        pmid 12877350 ,
+        pmid 12860276 ,
+        pmid 12835615 ,
+        pmid 12773763 ,
+        pmid 12760253 ,
+        pmid 12724621 ,
+        pmid 12692115 ,
+        pmid 12682333 ,
+        pmid 12654968 ,
+        pmid 12622714 ,
+        pmid 12611196 ,
+        pmid 12477932 ,
+        pmid 12474054 ,
+        pmid 12469231 ,
+        pmid 12465141 ,
+        pmid 12430181 ,
+        pmid 12397635 ,
+        pmid 12360107 ,
+        pmid 12355549 ,
+        pmid 12235453 ,
+        pmid 12222688 ,
+        pmid 12163321 ,
+        pmid 12037388 ,
+        pmid 12016157 ,
+        pmid 12015038 ,
+        pmid 11915035 ,
+        pmid 11872636 ,
+        pmid 11846845 ,
+        pmid 11470991 ,
+        pmid 9202751 ,
+        pmid 8460648 ,
+        pmid 8110178 ,
+        pmid 8102597 ,
+        pmid 7915226 ,
+        pmid 7902079 ,
+        pmid 7773298 ,
+        pmid 2734109 ,
+        pmid 2340091 ,
+        pmid 2068113 ,
+        pmid 1996083 ,
+        pmid 1968463 ,
+        pmid 1676262 ,
+        pmid 1441598 ,
+        pmid 1381364 ,
+        pmid 1306121 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 17088 } ,
+              anchor "G06461" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "WI-7224" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 44576 } ,
+              anchor "WIAF-2120" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH11442" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH20" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "WIAF-2120-STS" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "gdb:4559542" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG40" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 137181 } ,
+              anchor "G59899" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-130680" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 155563 } ,
+              anchor "GDB:187676" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 156422 } ,
+              anchor "GDB:310612" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 156423 } ,
+              anchor "GDB:310613" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 157141 } ,
+              anchor "GDB:386004" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 272646 } ,
+              anchor "PMC310725P3" ,
+              post-text "(e-PCR)" } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_000015" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4557782 } ,
+              anchor "NM_000015" } } ,
+          seqs {
+            whole
+              gi 4557782 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_000006" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4557783 } ,
+                  anchor "NP_000006" ,
+                  post-text "arylamide acetylase 2" } } ,
+              seqs {
+                whole
+                  gi 4557783 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Consensus CDS (CCDS)" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "CCDS" ,
+                        tag
+                          str "CCDS6008.1" } ,
+                      anchor "CCDS6008.1" } } } ,
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 4557783 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 1353 } ,
+                          anchor "pfam00797: Acetyltransf_2;
+ N-acetyltransferase" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 20 - 280  Blast Score: 1005" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "D90042" } ,
+                  anchor "D90042" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF042740" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2801806 } ,
+              anchor "AF042740" } } ,
+          seqs {
+            whole
+              gi 2801806 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC03773" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2801807 } ,
+                  anchor "AAC03773" } } ,
+              seqs {
+                whole
+                  gi 2801807 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF055874" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3063613 } ,
+              anchor "AF055874" } } ,
+          seqs {
+            whole
+              gi 3063613 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC14117" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3063614 } ,
+                  anchor "AAC14117" } } ,
+              seqs {
+                whole
+                  gi 3063614 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF055875" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3063615 } ,
+              anchor "AF055875" } } ,
+          seqs {
+            whole
+              gi 3063615 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC14118" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3063616 } ,
+                  anchor "AAC14118" } } ,
+              seqs {
+                whole
+                  gi 3063616 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF320309" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 11321494 } ,
+              anchor "AF320309" } } ,
+          seqs {
+            whole
+              gi 11321494 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAG34181" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11321495 } ,
+                  anchor "AAG34181" } } ,
+              seqs {
+                whole
+                  gi 11321495 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF348074" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 14009675 } ,
+              anchor "AF348074" } } ,
+          seqs {
+            whole
+              gi 14009675 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK51710" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14009676 } ,
+                  anchor "AAK51710" } } ,
+              seqs {
+                whole
+                  gi 14009676 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF348075" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 14009677 } ,
+              anchor "AF348075" } } ,
+          seqs {
+            whole
+              gi 14009677 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK51711" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14009678 } ,
+                  anchor "AAK51711" } } ,
+              seqs {
+                whole
+                  gi 14009678 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AY230251" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 29423785 } ,
+              anchor "AY230251" } } ,
+          seqs {
+            whole
+              gi 29423785 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAO73561" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 29423786 } ,
+                  anchor "AAO73561" } } ,
+              seqs {
+                whole
+                  gi 29423786 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AY230252" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 29423789 } ,
+              anchor "AY230252" } } ,
+          seqs {
+            whole
+              gi 29423789 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAO73562" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 29423790 } ,
+                  anchor "AAO73562" } } ,
+              seqs {
+                whole
+                  gi 29423790 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AY331807" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 32402490 } ,
+              anchor "AY331807" } } ,
+          seqs {
+            whole
+              gi 32402490 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAP81164" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 32402491 } ,
+                  anchor "AAP81164" } } ,
+              seqs {
+                whole
+                  gi 32402491 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "D10870" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219871 } ,
+              anchor "D10870" } } ,
+          seqs {
+            whole
+              gi 219871 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA01640" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219872 } ,
+                  anchor "BAA01640" } } ,
+              seqs {
+                whole
+                  gi 219872 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "D10871" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219874 } ,
+              anchor "D10871" } } ,
+          seqs {
+            whole
+              gi 219874 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA01641" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219875 } ,
+                  anchor "BAA01641" } } ,
+              seqs {
+                whole
+                  gi 219875 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "D10872" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219877 } ,
+              anchor "D10872" } } ,
+          seqs {
+            whole
+              gi 219877 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA01642" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219878 } ,
+                  anchor "BAA01642" } } ,
+              seqs {
+                whole
+                  gi 219878 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "M75163" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 189075 } ,
+              anchor "M75163" } } ,
+          seqs {
+            whole
+              gi 189075 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA59906" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 553602 } ,
+                  anchor "AAA59906" } } ,
+              seqs {
+                whole
+                  gi 553602 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "M75164" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 189072 } ,
+              anchor "M75164" } } ,
+          seqs {
+            whole
+              gi 189072 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA59905" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 553601 } ,
+                  anchor "AAA59905" } } ,
+              seqs {
+                whole
+                  gi 553601 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U23052" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 747646 } ,
+              anchor "U23052" } } ,
+          seqs {
+            whole
+              gi 747646 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA64584" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 727413 } ,
+                  anchor "AAA64584" } } ,
+              seqs {
+                whole
+                  gi 727413 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U23434" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 727456 } ,
+              anchor "U23434" } } ,
+          seqs {
+            whole
+              gi 727456 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA64585" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 727457 } ,
+                  anchor "AAA64585" } } ,
+              seqs {
+                whole
+                  gi 727457 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U53473" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 1297331 } ,
+              anchor "U53473" } } ,
+          seqs {
+            whole
+              gi 1297331 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA98976" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 1297332 } ,
+                  anchor "AAA98976" } } ,
+              seqs {
+                whole
+                  gi 1297332 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X14672" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 28227 } ,
+              anchor "X14672" } } ,
+          seqs {
+            whole
+              gi 28227 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA32802" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 28228 } ,
+                  anchor "CAA32802" } } ,
+              seqs {
+                whole
+                  gi 28228 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC015878" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 16198419 } ,
+              anchor "BC015878" } } ,
+          seqs {
+            whole
+              gi 16198419 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH15878" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 16198420 } ,
+                  anchor "AAH15878" } } ,
+              seqs {
+                whole
+                  gi 16198420 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC067218" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 45501306 } ,
+              anchor "BC067218" } } ,
+          seqs {
+            whole
+              gi 45501306 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH67218" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 45501307 } ,
+                  anchor "AAH67218" } } ,
+              seqs {
+                whole
+                  gi 45501307 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "CR407631" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 47115198 } ,
+              anchor "CR407631" } } ,
+          seqs {
+            whole
+              gi 47115198 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAG28559" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 47115199 } ,
+                  anchor "CAG28559" } } ,
+              seqs {
+                whole
+                  gi 47115199 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "D90040" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219411 } ,
+              anchor "D90040" } } ,
+          seqs {
+            whole
+              gi 219411 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA14094" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219412 } ,
+                  anchor "BAA14094" } } ,
+              seqs {
+                whole
+                  gi 219412 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "D90042" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 219415 } ,
+              anchor "D90042" } } ,
+          seqs {
+            whole
+              gi 219415 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA14096" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 219416 } ,
+                  anchor "BAA14096" } } ,
+              seqs {
+                whole
+                  gi 219416 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P11245" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 114238 } ,
+                  anchor "P11245" } } ,
+              seqs {
+                whole
+                  gi 114238 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "10" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_030737.9&gene=NAT2&lid=10&from=6093683&to=6103610" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "10" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_030737.9&gene=NAT2&lid=10" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.2" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.2" } ,
+              anchor "Hs.2" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=2" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "243400" } ,
+              anchor "243400" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 10 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=10" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:125365" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=BC067
+218" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+BC067218" } } } ,
+        {
+          type comment ,
+          text "PharmGKB" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "PharmGKB" ,
+                tag
+                  str "PA18" } ,
+              anchor "PA18" } } } ,
+        {
+          type comment ,
+          text "Arylamine N-Acetyltransferase Nomenclature" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Arylamine N-Acetyltransferase Nomenclature" ,
+                tag
+                  str "Arylamine N-Acetyltransferase Nomenclature" } ,
+              anchor "Arylamine N-Acetyltransferase Nomenclature" ,
+              url "http://www.louisville.edu/medschool/pharmacology/NAT.html" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC015878,BC067218" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC015878
+,BC067218" } } } } } ,
+    {
+      type generif ,
+      text "there is no association between endometriosis and NAT2 in South
+ Indian women" ,
+      version 0 ,
+      refs {
+        pmid 15588473 } ,
+      create-date
+        str "Mar 19 2005 10:01AM" ,
+      update-date
+        str "Mar 19 2005 11:47AM" } ,
+    {
+      type generif ,
+      text "Risks for colorectal cancer are significantly associated with the
+ genetic polymorphisms of GSTT1 deletion, NAT2-rapid acetylator phenotype and
+ genotye and NAT2-rapid acetylator phenotype." ,
+      version 0 ,
+      refs {
+        pmid 15637738 } ,
+      create-date
+        str "Mar  7 2005  7:07PM" ,
+      update-date
+        str "Mar  8 2005 12:10AM" } ,
+    {
+      type generif ,
+      text "Cigarette smoking is associated with increased risk of breast
+ cancer in women with the NAT2 slow acetylator genotype." ,
+      version 0 ,
+      refs {
+        pmid 15225898 } ,
+      create-date
+        str "Nov 13 2004 10:01AM" ,
+      update-date
+        str "Nov 13 2004 10:50AM" } ,
+    {
+      type generif ,
+      text "N-acetyltransferase 2 polymorphism does not play an important role
+ in breast cancer risk of Turkish women by altering the capacity in
+ deactivation of environmental carcinogens" ,
+      version 0 ,
+      refs {
+        pmid 15162844 } ,
+      create-date
+        str "Oct  4 2004 10:33AM" ,
+      update-date
+        str "Oct  4 2004 11:19AM" } ,
+    {
+      type generif ,
+      text "NAT2 slow acetylator genotypes might modulate the effect of
+ carcinogenic arylamines contained in tobacco smoke." ,
+      version 0 ,
+      refs {
+        pmid 14648207 } ,
+      create-date
+        str "Sep 27 2004  4:34PM" ,
+      update-date
+        str "Sep 27 2004  5:11PM" } ,
+    {
+      type generif ,
+      text "Arylamine N-acetyltransferase 2 is a genetically polymorphic phase
+ II enzyme with a role in the metabolism of many xenobiotics." ,
+      version 0 ,
+      refs {
+        pmid 14747882 } ,
+      create-date
+        str "Aug  2 2004  1:09PM" ,
+      update-date
+        str "Aug  2 2004  1:38PM" } ,
+    {
+      type generif ,
+      text "although there is little overall association between NAT genotypes
+ and risk of developing systemic lupus erythematosus, the interaction between
+ NAT1 and NAT2 and specific exposures such as hair dyes may be important" ,
+      version 0 ,
+      refs {
+        pmid 14705222 } ,
+      create-date
+        str "Apr 16 2004 12:00AM" ,
+      update-date
+        str "May  2 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "The NAT2 acetylator genotype may modify esophageal cancer risk in
+ humans from exposure to barbecued/grilled meat." ,
+      version 0 ,
+      refs {
+        pmid 12773763 } ,
+      create-date
+        str "Feb 13 2004 12:00AM" ,
+      update-date
+        str "Feb 22 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "N-acetyltransferase 2 polymorphism does not differ between
+ patients suffering from atopic dermatitis and healthy subjects" ,
+      version 0 ,
+      refs {
+        pmid 14528063 } ,
+      create-date
+        str "Feb  5 2004 12:00AM" ,
+      update-date
+        str "Feb 15 2004  6:33PM" } ,
+    {
+      type generif ,
+      text "Genetic NAT2 polymorphism may play a role in lifestyle
+ factors-related hepato- carcinogenesis, particulary critical in smoking
+ related hepatocarcinogenesis." ,
+      version 0 ,
+      refs {
+        pmid 12877350 } ,
+      create-date
+        str "Feb  4 2004 12:00AM" ,
+      update-date
+        str "Feb 15 2004  6:32PM" } ,
+    {
+      type generif ,
+      text "Slow acetylation activity is associated with age-related cataract
+ formation." ,
+      version 0 ,
+      refs {
+        pmid 12724621 } ,
+      create-date
+        str "Jan  6 2004 12:00AM" ,
+      update-date
+        str "Feb 22 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "A significant association between Parkinson disease & the slow
+ acetylator genotype for N-acetyltransferase 2 in Hong Kong Chinesed provides
+ evidence for a possible functional relationship between NAT2 slow acetylator
+ genotype & PD in both racial groups." ,
+      version 0 ,
+      refs {
+        pmid 12654968 } ,
+      create-date
+        str "Dec 23 2003 12:00AM" ,
+      update-date
+        str "Jan 18 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "RFLP of the gene coding for N-acetyltransferase 2 (NAT2) was typed
+ in populations of the Volga-Ural region (Bashkirs, Tatars, Chuvashes,
+ Udmurts, and Russians) and in patients with chronic obstructive pulmonary
+ disease (COPD) and in healthy individuals" ,
+      version 0 ,
+      refs {
+        pmid 12884528 } ,
+      create-date
+        str "Dec 18 2003 12:00AM" ,
+      update-date
+        str "Jan 25 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "We conclude that smoking increases risk of colorectal adenomas and
+ that SULT1A1 and NAT2 only modestly modify this association." ,
+      version 0 ,
+      refs {
+        pmid 14618622 } ,
+      create-date
+        str "Dec 11 2003 12:00AM" ,
+      update-date
+        str "Dec 21 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "In human, NAT2 polymorphism may be a genetic risk factor for joint
+ destruction." ,
+      version 0 ,
+      refs {
+        pmid 15005011 } ,
+      create-date
+        str "May  7 2004 12:00AM" ,
+      update-date
+        str "May 16 2004  7:01AM" } ,
+    {
+      type generif ,
+      text "It is unlikely that the NAT1*10 or NAT2 rapid/intermediate
+ genotypes are related to stomach cancer risk." ,
+      version 0 ,
+      refs {
+        pmid 12692115 } ,
+      create-date
+        str "Sep  4 2003 12:00AM" ,
+      update-date
+        str "Sep 28 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "genetic polymorphisms of NAT1 and NAT2 have no independent effect
+ on breast cancer risk, but they modulate breast cancer risk in the presence
+ of GSTM1 and GSTT1 null genotypes." ,
+      version 0 ,
+      refs {
+        pmid 12860276 } ,
+      create-date
+        str "Aug 18 2003 12:00AM" ,
+      update-date
+        str "Aug 24 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT2 slow acetylator genotype plays an important role in
+ determining the risk of developing prostate cancer in Japanese men and is
+ also associated with more clinically advanced and pathologically aggressive
+ disease." ,
+      version 0 ,
+      refs {
+        pmid 12622714 } ,
+      create-date
+        str "Aug  9 2003 12:00AM" ,
+      update-date
+        str "Aug 24 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Polymorphism of Nat2 encoding phase 2 xenobiotic detoxication
+ enzyme was studied." ,
+      version 0 ,
+      refs {
+        pmid 12760253 } ,
+      create-date
+        str "Jun 26 2003 12:00AM" ,
+      update-date
+        str "Aug 10 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "This enzyme is polymorphic in various ethnic populations of South
+ India." ,
+      version 0 ,
+      refs {
+        pmid 12469231 } ,
+      create-date
+        str "May 24 2003 12:00AM" ,
+      update-date
+        str "Jul 27 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Maternal NAT2 acetylator status seems not to be an important
+ factor in the etiology of orofacial clefts." ,
+      version 0 ,
+      refs {
+        pmid 12397635 } ,
+      create-date
+        str "May  7 2003 12:00AM" ,
+      update-date
+        str "Jun  1 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Genotyping the NAT2 gene and estimating diplotype configuration
+ before administration of SSZ may reduce the frequency of adverse effects in
+ Japanese patients with RA." ,
+      version 0 ,
+      refs {
+        pmid 12465141 } ,
+      create-date
+        str "May  2 2003 12:00AM" ,
+      update-date
+        str "May 11 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Single nucleotide polymorphisms of NAT1 and NAT2, and acetylation
+ haplotype were not associated with increased risk for Parkinson disease" ,
+      version 0 ,
+      refs {
+        pmid 12682333 } ,
+      create-date
+        str "Apr 11 2003 12:00AM" ,
+      update-date
+        str "Apr 27 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "lack of association between polymorphism of this gene and systemic
+ lupus erythematosus" ,
+      version 0 ,
+      refs {
+        pmid 12360107 } ,
+      create-date
+        str "Apr  9 2003 12:00AM" ,
+      update-date
+        str "Apr 27 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Polymorphic N-acetyltransferase (NAT2) is involved in the
+ metabolism of several compounds relevant in pharmacology or toxicology, with
+ diverse clinical consequences--review" ,
+      version 0 ,
+      refs {
+        pmid 12611196 } ,
+      create-date
+        str "Mar 24 2003 12:00AM" ,
+      update-date
+        str "Mar 31 2003  6:30AM" } ,
+    {
+      type generif ,
+      text "N-acetyltransferase 2*19 possessing the C190T (R64W) single
+ nucleotide polymorphisms encodes a slow acetylator phenotype for both N- and
+ O-acetylation, due to a reduction in the amount and stability of the NAT2 19
+ allozyme" ,
+      version 0 ,
+      refs {
+        pmid 12222688 } ,
+      create-date
+        str "Mar 10 2003 12:00AM" ,
+      update-date
+        str "Apr 13 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT2 slow acetylation and GSTM1 null genotypes may increase
+ postmenopausal breast cancer risk in long-term smoking women." ,
+      version 0 ,
+      refs {
+        pmid 12835615 } ,
+      create-date
+        str "Mar  7 2004 12:00AM" ,
+      update-date
+        str "Apr 18 2004  7:02AM" } ,
+    {
+      type generif ,
+      text "A slow acetylator genotype of this enzyme is associated with an
+ increased risk of advanced cervical cancer." ,
+      version 0 ,
+      refs {
+        pmid 12474054 } ,
+      create-date
+        str "Feb 27 2003 12:00AM" ,
+      update-date
+        str "Apr 13 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Women with the GSTT1 null genotype were found to have a
+ significant 3.15-fold increased risk of breast cancer (95% CI = 1.7-5.8),
+ while GSTM1 and NAT2 genotypes were not associated with breast cancer risk." ,
+      version 0 ,
+      refs {
+        pmid 12430181 } ,
+      create-date
+        str "Nov 25 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT2 slow genotype with NAT1 polymorphism indicates increased
+ susceptibility to prostate cancer" ,
+      version 0 ,
+      refs {
+        pmid 12355549 } ,
+      create-date
+        str "Oct 31 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT2 slow acetylation genotype may be a risk factor of individual
+ susceptibility to rheumatoid arthritis." ,
+      version 0 ,
+      refs {
+        pmid 12235453 } ,
+      create-date
+        str "Oct  7 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "NAT2 acetylator status can influence susceptibility to breast
+ cancer after exposure to tobacco smoke carcinogens; effect appears to be
+ differential for active and passive smoke exposure" ,
+      version 0 ,
+      refs {
+        pmid 12163321 } ,
+      create-date
+        str "Sep  7 2002 12:00AM" ,
+      update-date
+        str "Sep 16 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "NAT2 polymorphisms may be correlated with an increased risk of
+ larynx cancer" ,
+      version 0 ,
+      refs {
+        pmid 12037388 } ,
+      create-date
+        str "Aug 19 2002 12:00AM" ,
+      update-date
+        str "Aug 28 2002  6:15PM" } ,
+    {
+      type generif ,
+      text "The combined effect of N-acetyltransferase 2 (NAT2) slow genotype
+ and exposure to smoking is observed during the development of laryngeal
+ cancer." ,
+      version 0 ,
+      refs {
+        pmid 12015038 } ,
+      create-date
+        str "Aug  9 2002 12:00AM" ,
+      update-date
+        str "Sep 16 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "urinary excretion of
+ N(2)-(beta-1-glucos-iduronyl)-2-hydroxyamino-1-methyl-6-phenylimidazo[4,5-b]p
+yridine relationship to activity levels of NAT2" ,
+      version 0 ,
+      refs {
+        pmid 12016157 } ,
+      create-date
+        str "Jun 12 2002 12:00AM" ,
+      update-date
+        str "Jun 24 2002  6:31AM" } ,
+    {
+      type generif ,
+      text "Polymorphism of the N-acetyltransferase 2 gene as a susceptibility
+ risk factor for antituberculosis drug-induced hepatitis." ,
+      version 0 ,
+      refs {
+        pmid 11915035 } ,
+      create-date
+        str "Apr 15 2002 12:00AM" ,
+      update-date
+        str "Apr 28 2002  6:17PM" } ,
+    {
+      type generif ,
+      text "Association between bone loss in periodontal disease and
+ polymorphism of N-acetyltransferase (NAT2)" ,
+      version 0 ,
+      refs {
+        pmid 11846845 } ,
+      create-date
+        str "Apr  9 2002 12:00AM" ,
+      update-date
+        str "May 14 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "we investigated the relationship between the levels of aromatic
+ DNA adducts in breast tissues and polymorphisms of the drug-metabolizing
+ genes CYP1A1, NAT2, and GSTM1 in 166 women having breast cancer" ,
+      version 0 ,
+      refs {
+        pmid 11872636 } ,
+      create-date
+        str "Apr  2 2002 12:00AM" ,
+      update-date
+        str "Apr  8 2002  3:24PM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 10 } ,
+    {
+      db "MIM" ,
+      tag
+        id 243400 } } ,
+  xtra-index-terms {
+    "LOC10" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 5 ,
+        hour 11 ,
+        minute 30 ,
+        second 0 } } ,
+  type pseudo ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "8" } } } ,
+  gene {
+    locus "AACP" ,
+    desc "arylamide acetylase pseudogene" ,
+    maploc "8p22" ,
+    syn {
+      "NATP" } ,
+    locus-tag "HGNC:15" } ,
+  location {
+    {
+      display-str "8p22" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11 ,
+    src-str2 "11" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "AACP" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "arylamide acetylase pseudogene" ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 15 } } } } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 2340091 } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X17060" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34995 } ,
+              anchor "X17060" } } ,
+          seqs {
+            whole
+              gi 34995 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:132838" } } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11 } } ,
+  xtra-index-terms {
+    "LOC11" } }
+Entrezgene ::= {
+  track-info {
+    geneid 12 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 14 ,
+        hour 13 ,
+        minute 15 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "14" } } } ,
+  gene {
+    locus "SERPINA3" ,
+    desc "serine (or cysteine) proteinase inhibitor, clade A (alpha-1
+ antiproteinase, antitrypsin), member 3" ,
+    maploc "14q32.1" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 107280 } } ,
+    syn {
+      "ACT" ,
+      "AACT" ,
+      "MGC88254" } ,
+    locus-tag "HGNC:16" } ,
+  prot {
+    name {
+      "serine (or cysteine) proteinase inhibitor, clade A, member 3" ,
+      "antichymotrypsin" ,
+      "alpha-1-antichymotrypsin" ,
+      "serine (or cysteine) proteinase inhibitor, clade A, member 3" } } ,
+  summary "The protein encoded by this gene is a plasma protease inhibitor and
+ member of the serine protease inhibitor class.  Polymorphisms in this protein
+ appear to be tissue specific and influence protease targeting.  Variation in
+ this protein's sequence have been implicated in Alzheimer's disease, and
+ deficiency of this protein has been associated with liver disease.  Mutations
+ have been identified in patients with Parkinson disease and chronic
+ obstructive pulmonary disease." ,
+  location {
+    {
+      display-str "14q32.1" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 12 ,
+    src-str2 "12" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000014" ,
+      version 7 ,
+      seqs {
+        int {
+          from 94150523 ,
+          to 94160143 ,
+          strand plus ,
+          id
+            gi 51511730 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001085" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 94150523 ,
+                to 94151173 ,
+                strand plus ,
+                id
+                  gi 51511730 } ,
+              int {
+                from 94155284 ,
+                to 94155557 ,
+                strand plus ,
+                id
+                  gi 51511730 } ,
+              int {
+                from 94158430 ,
+                to 94158580 ,
+                strand plus ,
+                id
+                  gi 51511730 } ,
+              int {
+                from 94159700 ,
+                to 94160143 ,
+                strand plus ,
+                id
+                  gi 51511730 } } } ,
+          seqs {
+            whole
+              gi 9665246 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_001076" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 94150531 ,
+                    to 94151173 ,
+                    strand plus ,
+                    id
+                      gi 51511730 } ,
+                  {
+                    from 94155284 ,
+                    to 94155557 ,
+                    strand plus ,
+                    id
+                      gi 51511730 } ,
+                  {
+                    from 94158430 ,
+                    to 94158580 ,
+                    strand plus ,
+                    id
+                      gi 51511730 } ,
+                  {
+                    from 94159700 ,
+                    to 94159935 ,
+                    strand plus ,
+                    id
+                      gi 51511730 } } } ,
+              seqs {
+                whole
+                  gi 4501843 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_026437" ,
+      version 11 ,
+      seqs {
+        int {
+          from 76080523 ,
+          to 76090143 ,
+          strand plus ,
+          id
+            gi 51493278 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001085" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 76080523 ,
+                to 76081173 ,
+                strand plus ,
+                id
+                  gi 51493278 } ,
+              int {
+                from 76085284 ,
+                to 76085557 ,
+                strand plus ,
+                id
+                  gi 51493278 } ,
+              int {
+                from 76088430 ,
+                to 76088580 ,
+                strand plus ,
+                id
+                  gi 51493278 } ,
+              int {
+                from 76089700 ,
+                to 76090143 ,
+                strand plus ,
+                id
+                  gi 51493278 } } } ,
+          seqs {
+            whole
+              gi 9665246 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_001076" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 76080531 ,
+                    to 76081173 ,
+                    strand plus ,
+                    id
+                      gi 51493278 } ,
+                  {
+                    from 76085284 ,
+                    to 76085557 ,
+                    strand plus ,
+                    id
+                      gi 51493278 } ,
+                  {
+                    from 76088430 ,
+                    to 76088580 ,
+                    strand plus ,
+                    id
+                      gi 51493278 } ,
+                  {
+                    from 76089700 ,
+                    to 76089935 ,
+                    strand plus ,
+                    id
+                      gi 51493278 } } } ,
+              seqs {
+                whole
+                  gi 4501843 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086807" ,
+      version 1 ,
+      seqs {
+        int {
+          from 23153523 ,
+          to 23163143 ,
+          strand plus ,
+          id
+            gi 51472322 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001085" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 23153523 ,
+                to 23154173 ,
+                strand plus ,
+                id
+                  gi 51472322 } ,
+              int {
+                from 23158284 ,
+                to 23158557 ,
+                strand plus ,
+                id
+                  gi 51472322 } ,
+              int {
+                from 23161430 ,
+                to 23161580 ,
+                strand plus ,
+                id
+                  gi 51472322 } ,
+              int {
+                from 23162700 ,
+                to 23163143 ,
+                strand plus ,
+                id
+                  gi 51472322 } } } ,
+          seqs {
+            whole
+              gi 9665246 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              label "precursor" ,
+              accession "NP_001076" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 23153531 ,
+                    to 23154173 ,
+                    strand plus ,
+                    id
+                      gi 51472322 } ,
+                  {
+                    from 23158284 ,
+                    to 23158557 ,
+                    strand plus ,
+                    id
+                      gi 51472322 } ,
+                  {
+                    from 23161430 ,
+                    to 23161580 ,
+                    strand plus ,
+                    id
+                      gi 51472322 } ,
+                  {
+                    from 23162700 ,
+                    to 23162935 ,
+                    strand plus ,
+                    id
+                      gi 51472322 } } } ,
+              seqs {
+                whole
+                  gi 4501843 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "SERPINA3" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "serine (or cysteine) proteinase inhibitor, clade A (alpha-1
+ antiproteinase, antitrypsin), member 3" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9880565 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 3677 } ,
+                  anchor "DNA binding" ,
+                  post-text "evidence: IC" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9880565 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 30569 } ,
+                  anchor "chymotrypsin inhibitor activity" ,
+                  post-text "evidence: NAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12709365 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5515 } ,
+                  anchor "protein binding" ,
+                  post-text "evidence: IPI" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4867 } ,
+                  anchor "serine-type endopeptidase inhibitor activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6953 } ,
+                  anchor "acute-phase response" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12475184 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6954 } ,
+                  anchor "inflammatory response" ,
+                  post-text "evidence: NAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 11835318 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19216 } ,
+                  anchor "regulation of lipid metabolism" ,
+                  post-text "evidence: NAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6810 } ,
+                  anchor "transport" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9880565 ,
+                pmid 14718574 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5576 } ,
+                  anchor "extracellular region" ,
+                  post-text "evidence: NAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9880565 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5622 } ,
+                  anchor "intracellular" ,
+                  post-text "evidence: NAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5634 } ,
+                  anchor "nucleus" ,
+                  post-text "evidence: IEA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 40658 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=14&
+MAPS=genes-r-org/rat-chr/human%3A14,genes-r-org/mouse-chr/human%3A14,genes-r-o
+rg/human-chr14&query=e%3A12[id]+AND+gene[obj_type]&QSTR=serpina3&cmd=focus&fil
+l=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 16 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15546506 ,
+        pmid 15542006 ,
+        pmid 15530656 ,
+        pmid 15014966 ,
+        pmid 14718574 ,
+        pmid 14702039 ,
+        pmid 14668352 ,
+        pmid 12709365 ,
+        pmid 12600202 ,
+        pmid 12477932 ,
+        pmid 12475184 ,
+        pmid 12324297 ,
+        pmid 12023832 ,
+        pmid 11992569 ,
+        pmid 11959399 ,
+        pmid 11936240 ,
+        pmid 11835318 ,
+        pmid 11798857 ,
+        pmid 11735417 ,
+        pmid 11692021 ,
+        pmid 11317942 ,
+        pmid 11289720 ,
+        pmid 11274154 ,
+        pmid 11096092 ,
+        pmid 10829039 ,
+        pmid 10759471 ,
+        pmid 10660528 ,
+        pmid 10512690 ,
+        pmid 10228625 ,
+        pmid 10048303 ,
+        pmid 9880565 ,
+        pmid 9824262 ,
+        pmid 9698370 ,
+        pmid 9635374 ,
+        pmid 9580375 ,
+        pmid 9428387 ,
+        pmid 9261179 ,
+        pmid 9042371 ,
+        pmid 8849841 ,
+        pmid 8739078 ,
+        pmid 8732755 ,
+        pmid 8718849 ,
+        pmid 8376411 ,
+        pmid 8365378 ,
+        pmid 8267879 ,
+        pmid 8244391 ,
+        pmid 8226889 ,
+        pmid 8216224 ,
+        pmid 8011628 ,
+        pmid 7966721 ,
+        pmid 7924407 ,
+        pmid 7873202 ,
+        pmid 7759598 ,
+        pmid 7588564 ,
+        pmid 7528097 ,
+        pmid 6762318 ,
+        pmid 6687683 ,
+        pmid 6606438 ,
+        pmid 6556193 ,
+        pmid 6547997 ,
+        pmid 3637050 ,
+        pmid 3492865 ,
+        pmid 3490907 ,
+        pmid 3485824 ,
+        pmid 3260956 ,
+        pmid 3257719 ,
+        pmid 3028924 ,
+        pmid 2945424 ,
+        pmid 2456771 ,
+        pmid 2435303 ,
+        pmid 2432851 ,
+        pmid 2404007 ,
+        pmid 2190106 ,
+        pmid 1618300 ,
+        pmid 1351206 ,
+        pmid 1311327 } } ,
+    {
+      type comment ,
+      heading "Phenotypes" ,
+      version 0 ,
+      comment {
+        {
+          type phenotype ,
+          text "Alpha-1-antichymotrypsin deficiency" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  id 107280 } ,
+              anchor "MIM: 107280" } } } ,
+        {
+          type phenotype ,
+          text "Cerebrovascular disease, occlusive" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  id 107280 } ,
+              anchor "MIM: 107280" } } } } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 32177 } ,
+              anchor "RH1625" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH77711" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG1393" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 37467 } ,
+              anchor "SHGC-32982" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "EST128561" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH32590" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH53830" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SGC32982" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 64155 } ,
+              anchor "D14S845" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GDB:588356" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH53900" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "UTR-04554" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "WI-7103" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 66238 } ,
+              anchor "G31126" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-19718" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 74665 } ,
+              anchor "RH78280" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG41027" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 86678 } ,
+              anchor "RH91507" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG44830" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 272387 } ,
+              anchor "PMC27764P1" ,
+              post-text "(e-PCR)" } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_001085" ,
+          version 3 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 50659079 } ,
+              anchor "NM_001085" } } ,
+          seqs {
+            whole
+              gi 50659079 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_001076" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 50659080 } ,
+                  anchor "NP_001076" ,
+                  post-text "serine (or cysteine) proteinase inhibitor, clade
+ A, member 3 precursor" } } ,
+              seqs {
+                whole
+                  gi 50659080 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 50659080 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 28234 } ,
+                          anchor "cd02056: alpha-1-antitrypsin_like;
+ alpha-1-antitrypsin_like" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 52 - 417  Blast Score: 1367" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "BC003559,BG565041" } ,
+                  anchor "BC003559,BG565041" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X00947" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 28331 } ,
+              anchor "X00947" } } ,
+          seqs {
+            whole
+              gi 28331 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA25459" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 28332 } ,
+                  anchor "CAA25459" } } ,
+              seqs {
+                whole
+                  gi 28332 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "X68733" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 439137 } ,
+              anchor "X68733" } } ,
+          seqs {
+            whole
+              gi 439137 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA48671" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 1340142 } ,
+                  anchor "CAA48671" } } ,
+              seqs {
+                whole
+                  gi 1340142 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB209060" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 62087699 } ,
+              anchor "AB209060" } } ,
+          seqs {
+            whole
+              gi 62087699 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAD92297" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 62087700 } ,
+                  anchor "BAD92297" } } ,
+              seqs {
+                whole
+                  gi 62087700 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF089747" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4165889 } ,
+              anchor "AF089747" } } ,
+          seqs {
+            whole
+              gi 4165889 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD08810" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4165890 } ,
+                  anchor "AAD08810" } } ,
+              seqs {
+                whole
+                  gi 4165890 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK093049" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 21751790 } ,
+              anchor "AK093049" } } ,
+          seqs {
+            whole
+              gi 21751790 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK096120" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 21755531 } ,
+              anchor "AK096120" } } ,
+          seqs {
+            whole
+              gi 21755531 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK123091" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34528556 } ,
+              anchor "AK123091" } } ,
+          seqs {
+            whole
+              gi 34528556 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC003559" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 13097704 } ,
+              anchor "BC003559" } } ,
+          seqs {
+            whole
+              gi 13097704 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH03559" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 13097705 } ,
+                  anchor "AAH03559" } } ,
+              seqs {
+                whole
+                  gi 13097705 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC010530" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 14714765 } ,
+              anchor "BC010530" } } ,
+          seqs {
+            whole
+              gi 14714765 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH10530" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14714766 } ,
+                  anchor "AAH10530" } } ,
+              seqs {
+                whole
+                  gi 14714766 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC013189" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 15341984 } ,
+              anchor "BC013189" } } ,
+          seqs {
+            whole
+              gi 15341984 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH13189" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 15341985 } ,
+                  anchor "AAH13189" } } ,
+              seqs {
+                whole
+                  gi 15341985 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC034554" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 21961492 } ,
+              anchor "BC034554" } } ,
+          seqs {
+            whole
+              gi 21961492 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH34554" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 21961493 } ,
+                  anchor "AAH34554" } } ,
+              seqs {
+                whole
+                  gi 21961493 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC070265" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 47124537 } ,
+              anchor "BC070265" } } ,
+          seqs {
+            whole
+              gi 47124537 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH70265" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 47124538 } ,
+                  anchor "AAH70265" } } ,
+              seqs {
+                whole
+                  gi 47124538 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BG565041" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 13572694 } ,
+              anchor "BG565041" } } ,
+          seqs {
+            whole
+              gi 13572694 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "J05176" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 177932 } ,
+              anchor "J05176" } } ,
+          seqs {
+            whole
+              gi 177932 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA51560" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 177933 } ,
+                  anchor "AAA51560" } } ,
+              seqs {
+                whole
+                  gi 177933 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "K01500" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 177808 } ,
+              anchor "K01500" } } ,
+          seqs {
+            whole
+              gi 177808 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA51543" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 177809 } ,
+                  anchor "AAA51543" } } ,
+              seqs {
+                whole
+                  gi 177809 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "M18906" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 177928 } ,
+              anchor "M18906" } } ,
+          seqs {
+            whole
+              gi 177928 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA51559" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 177931 } ,
+                  anchor "AAA51559" } } ,
+              seqs {
+                whole
+                  gi 177931 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P01011" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 112874 } ,
+                  anchor "P01011" } } ,
+              seqs {
+                whole
+                  gi 112874 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "12" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_026437.11&gene=SERPINA3&lid=12&from=76080524&to=76090144" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "12" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_026437.11&gene=SERPINA3&lid=12" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.534293" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.534293" } ,
+              anchor "Hs.534293" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=53
+4293" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "107280" } ,
+              anchor "107280" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "12" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+12[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 12 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=12" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:118955" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=AK093
+049" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+AK093049" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "KEGG" ,
+                tag
+                  str "" } ,
+              url "http://www.genome.ad.jp/dbget-bin/www_bget?hsa:12" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC003559,BC010530,BC034554" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC003559
+,BC010530,BC034554" } } } } } ,
+    {
+      type comment ,
+      heading "Pathways" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "KEGG pathway: Alzheimer's disease" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "05010" ,
+                tag
+                  str "05010" } ,
+              anchor "05010" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa05010+12" } } } } } ,
+    {
+      type generif ,
+      text "overexpression of ACT by astrocytes could thus contribute to
+ increased beta-amyloid fibril formation in Alzheimer's disease" ,
+      version 0 ,
+      refs {
+        pmid 15530656 } ,
+      create-date
+        str "Jan 29 2005 10:01AM" ,
+      update-date
+        str "Jan 29 2005  1:54PM" } ,
+    {
+      type generif ,
+      text "Multivariate analysis showed that ACT mRNA level, but not STC2
+ mRNA level, in HR-positive patients, was a significant prognostic factor (P =
+ 0.042), which was independent of tumor size and lymph node metastases." ,
+      version 0 ,
+      refs {
+        pmid 15546506 } ,
+      create-date
+        str "Jan 15 2005 10:01AM" ,
+      update-date
+        str "Jan 15 2005  4:35PM" } ,
+    {
+      type generif ,
+      text "Alpha1-antichymotrypsin-AA may confer a modest protection against
+ developing early-onset Parkinson disease in women" ,
+      version 0 ,
+      refs {
+        pmid 15542006 } ,
+      create-date
+        str "Jan 15 2005 10:01AM" ,
+      update-date
+        str "Jan 15 2005  1:01PM" } ,
+    {
+      type generif ,
+      text "Polymeric AACT specifically seeds the polymerization of native
+ protein by a serpin-dependent process that occurs under physiological
+ conditions and may be important in the deposition of AACT in plaques in
+ patients with Alzheimer's disease." ,
+      version 0 ,
+      refs {
+        pmid 12600202 } ,
+      create-date
+        str "May 26 2003 12:00AM" ,
+      update-date
+        str "Jun  1 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "alpha(1)-antichymotrypsin polymorphism: a risk factor for
+ hemorrhagic stroke in normotensive subjects." ,
+      version 0 ,
+      refs {
+        pmid 11692021 } ,
+      create-date
+        str "Mar  5 2003 12:00AM" ,
+      update-date
+        str "Mar 23 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "alpha(1)-antitrypsin and antichymotrypsin are produced by the
+ mammary gland and are present in milk in relatively high amounts in early
+ lactation" ,
+      version 0 ,
+      refs {
+        pmid 12324297 } ,
+      create-date
+        str "Oct 11 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "Gene regulation of the serine proteinase inhibitors
+ alpha1-antitrypsin and alpha1-antichymotrypsin." ,
+      version 0 ,
+      refs {
+        pmid 12023832 } ,
+      create-date
+        str "Oct  9 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:01AM" } ,
+    {
+      type generif ,
+      text "data suggested that the ApoE epsilon2/epsilon4 genotype might be a
+ susceptibility variant of moderate effect for sporadic idiopathic Parkinson
+ disease in our samples, whereas the ACT gene signal peptide polymorphism
+ might not" ,
+      version 0 ,
+      refs {
+        pmid 11992569 } ,
+      create-date
+        str "Sep 26 2002 12:00AM" ,
+      update-date
+        str "Oct  7 2002  8:10AM" } ,
+    {
+      type generif ,
+      text "no association seen between alpha 1-antichymotrypsin and time to
+ psychosis in Alzheimer's disease" ,
+      version 0 ,
+      refs {
+        pmid 11936240 } ,
+      create-date
+        str "Aug 28 2002 12:00AM" ,
+      update-date
+        str "Sep 16 2002  5:48AM" } ,
+    {
+      type generif ,
+      text "significantly higher in plasma of Alzheimer patients compared to
+ controls" ,
+      version 0 ,
+      refs {
+        pmid 11959399 } ,
+      create-date
+        str "Jul 22 2002 12:00AM" ,
+      update-date
+        str "Jul 29 2002  6:27PM" } ,
+    {
+      type generif ,
+      text "A25G and G39A substitutions in the AACT gene are probably one of
+ the risk factors to Alzheimer's disease (AD) in Han Chinese." ,
+      version 0 ,
+      refs {
+        pmid 11798857 } ,
+      create-date
+        str "Apr  7 2002 12:00AM" ,
+      update-date
+        str "Apr 13 2002  5:25PM" } ,
+    {
+      type generif ,
+      heading "Interactions" ,
+      version 0 ,
+      comment {
+        {
+          type generif ,
+          text "Alpha-1 ACT interacts with and inhibits cathepsin G." ,
+          version 0 ,
+          refs {
+            pmid 15131125 } ,
+          source {
+            {
+              src {
+                db "BIND" ,
+                tag
+                  id 149291 } ,
+              anchor "BIND" } } ,
+          comment {
+            {
+              type comment ,
+              label "SERPINA3" ,
+              accession "NP_001076" ,
+              version 1 } ,
+            {
+              type comment ,
+              label "CTSG" ,
+              accession "NP_001902" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "GeneID" ,
+                    tag
+                      id 1511 } } } } } ,
+          create-date
+            str "2005-01-21 20:10 EDT" ,
+          update-date
+            str "2005-01-21 20:19 EDT" } ,
+        {
+          type generif ,
+          text "LRP interacts with alpha-1-ACT." ,
+          version 0 ,
+          refs {
+            pmid 15131125 } ,
+          source {
+            {
+              src {
+                db "BIND" ,
+                tag
+                  id 149296 } ,
+              anchor "BIND" } } ,
+          comment {
+            {
+              type comment ,
+              label "SERPINA3" ,
+              accession "NP_001076" ,
+              version 1 } ,
+            {
+              type comment ,
+              label "LRP1" ,
+              accession "NP_002323" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "GeneID" ,
+                    tag
+                      id 4035 } } } } } ,
+          create-date
+            str "2005-01-21 20:10 EDT" ,
+          update-date
+            str "2005-01-21 20:19 EDT" } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 12 } ,
+    {
+      db "MIM" ,
+      tag
+        id 107280 } } ,
+  xtra-index-terms {
+    "LOC12" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 13 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 5 ,
+        hour 11 ,
+        minute 30 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "3" } } } ,
+  gene {
+    locus "AADAC" ,
+    desc "arylacetamide deacetylase (esterase)" ,
+    maploc "3q21.3-q25.2" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 600338 } } ,
+    syn {
+      "DAC" } ,
+    locus-tag "HGNC:17" } ,
+  prot {
+    name {
+      "arylacetamide deacetylase" } } ,
+  summary "Microsomal arylacetamide deacetylase competes against the activity
+ of cytosolic arylamine N-acetyltransferase, which catalyzes one of the
+ initial biotransformation pathways for arylamine and heterocyclic amine
+ carcinogens" ,
+  location {
+    {
+      display-str "3q21.3-q25.2" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 13 ,
+    src-str2 "13" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000003" ,
+      version 9 ,
+      seqs {
+        int {
+          from 153014563 ,
+          to 153028971 ,
+          strand plus ,
+          id
+            gi 51511463 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001086" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 153014563 ,
+                to 153014785 ,
+                strand plus ,
+                id
+                  gi 51511463 } ,
+              int {
+                from 153017851 ,
+                to 153018073 ,
+                strand plus ,
+                id
+                  gi 51511463 } ,
+              int {
+                from 153020868 ,
+                to 153020937 ,
+                strand plus ,
+                id
+                  gi 51511463 } ,
+              int {
+                from 153025148 ,
+                to 153025319 ,
+                strand plus ,
+                id
+                  gi 51511463 } ,
+              int {
+                from 153028061 ,
+                to 153028971 ,
+                strand plus ,
+                id
+                  gi 51511463 } } } ,
+          seqs {
+            whole
+              gi 4557226 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001077" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 153014648 ,
+                    to 153014785 ,
+                    strand plus ,
+                    id
+                      gi 51511463 } ,
+                  {
+                    from 153017851 ,
+                    to 153018073 ,
+                    strand plus ,
+                    id
+                      gi 51511463 } ,
+                  {
+                    from 153020868 ,
+                    to 153020937 ,
+                    strand plus ,
+                    id
+                      gi 51511463 } ,
+                  {
+                    from 153025148 ,
+                    to 153025319 ,
+                    strand plus ,
+                    id
+                      gi 51511463 } ,
+                  {
+                    from 153028061 ,
+                    to 153028657 ,
+                    strand plus ,
+                    id
+                      gi 51511463 } } } ,
+              seqs {
+                whole
+                  gi 4557227 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "3.1.1.-" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "3.1.1.-" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_005612" ,
+      version 14 ,
+      seqs {
+        int {
+          from 58027019 ,
+          to 58041427 ,
+          strand plus ,
+          id
+            gi 37550867 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001086" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 58027019 ,
+                to 58027241 ,
+                strand plus ,
+                id
+                  gi 37550867 } ,
+              int {
+                from 58030307 ,
+                to 58030529 ,
+                strand plus ,
+                id
+                  gi 37550867 } ,
+              int {
+                from 58033324 ,
+                to 58033393 ,
+                strand plus ,
+                id
+                  gi 37550867 } ,
+              int {
+                from 58037604 ,
+                to 58037775 ,
+                strand plus ,
+                id
+                  gi 37550867 } ,
+              int {
+                from 58040517 ,
+                to 58041427 ,
+                strand plus ,
+                id
+                  gi 37550867 } } } ,
+          seqs {
+            whole
+              gi 4557226 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001077" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 58027104 ,
+                    to 58027241 ,
+                    strand plus ,
+                    id
+                      gi 37550867 } ,
+                  {
+                    from 58030307 ,
+                    to 58030529 ,
+                    strand plus ,
+                    id
+                      gi 37550867 } ,
+                  {
+                    from 58033324 ,
+                    to 58033393 ,
+                    strand plus ,
+                    id
+                      gi 37550867 } ,
+                  {
+                    from 58037604 ,
+                    to 58037775 ,
+                    strand plus ,
+                    id
+                      gi 37550867 } ,
+                  {
+                    from 58040517 ,
+                    to 58041113 ,
+                    strand plus ,
+                    id
+                      gi 37550867 } } } ,
+              seqs {
+                whole
+                  gi 4557227 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "3.1.1.-" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "3.1.1.-" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086641" ,
+      version 1 ,
+      seqs {
+        int {
+          from 24339789 ,
+          to 24354221 ,
+          strand plus ,
+          id
+            gi 51464125 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001086" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 24339789 ,
+                to 24340011 ,
+                strand plus ,
+                id
+                  gi 51464125 } ,
+              int {
+                from 24343078 ,
+                to 24343300 ,
+                strand plus ,
+                id
+                  gi 51464125 } ,
+              int {
+                from 24346094 ,
+                to 24346163 ,
+                strand plus ,
+                id
+                  gi 51464125 } ,
+              int {
+                from 24350369 ,
+                to 24350540 ,
+                strand plus ,
+                id
+                  gi 51464125 } ,
+              int {
+                from 24353311 ,
+                to 24354221 ,
+                strand plus ,
+                id
+                  gi 51464125 } } } ,
+          seqs {
+            whole
+              gi 4557226 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001077" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 24339874 ,
+                    to 24340011 ,
+                    strand plus ,
+                    id
+                      gi 51464125 } ,
+                  {
+                    from 24343078 ,
+                    to 24343300 ,
+                    strand plus ,
+                    id
+                      gi 51464125 } ,
+                  {
+                    from 24346094 ,
+                    to 24346163 ,
+                    strand plus ,
+                    id
+                      gi 51464125 } ,
+                  {
+                    from 24350369 ,
+                    to 24350540 ,
+                    strand plus ,
+                    id
+                      gi 51464125 } ,
+                  {
+                    from 24353311 ,
+                    to 24353907 ,
+                    strand plus ,
+                    id
+                      gi 51464125 } } } ,
+              seqs {
+                whole
+                  gi 4557227 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "3.1.1.-" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "3.1.1.-" } } } } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "AADAC" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "arylacetamide deacetylase (esterase)" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9665742 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19213 } ,
+                  anchor "deacetylase activity" ,
+                  post-text "evidence: IDA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8152 } ,
+                  anchor "metabolism" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10318829 ,
+                pmid 15152005 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5789 } ,
+                  anchor "endoplasmic reticulum membrane" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16021 } ,
+                  anchor "integral to membrane" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 8063807 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5792 } ,
+                  anchor "microsome" ,
+                  post-text "evidence: TAS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 37436 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=3&M
+APS=genes-r-org/rat-chr/human%3A3,genes-r-org/mouse-chr/human%3A3,genes-r-org/
+human-chr3&query=e%3A13[id]+AND+gene[obj_type]&QSTR=aadac&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 17 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15152005 ,
+        pmid 12721789 ,
+        pmid 12477932 ,
+        pmid 10318829 ,
+        pmid 9665742 ,
+        pmid 9299245 ,
+        pmid 8063807 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 13170 } ,
+              anchor "STS-L32179" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH41545" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "sts-L32179" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 18863 } ,
+              anchor "D3S4189" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-9733" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 24707 } ,
+              anchor "RH36344" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "stSG13141" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 136046 } ,
+              anchor "RH124029" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "SHGC-105511" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_001086" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4557226 } ,
+              anchor "NM_001086" } } ,
+          seqs {
+            whole
+              gi 4557226 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_001077" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4557227 } ,
+                  anchor "NP_001077" ,
+                  post-text "arylacetamide deacetylase" } } ,
+              seqs {
+                whole
+                  gi 4557227 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 4557227 } ,
+                      pre-text "(2)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 10527 } ,
+                          anchor "COG0657: Aes; Esterase/lipase [Lipid
+ metabolism]" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 33 - 399  Blast Score: 380" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 16555 } ,
+                          anchor "cd00312: Esterase_lipase; Esterases and
+ lipases (includes fungal lipases, cholinesterases, etc" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 91 - 203  Blast Score: 144" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "L32179" } ,
+                  anchor "L32179" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC020706" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 18088402 } ,
+              anchor "BC020706" } } ,
+          seqs {
+            whole
+              gi 18088402 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC032309" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 21595550 } ,
+              anchor "BC032309" } } ,
+          seqs {
+            whole
+              gi 21595550 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH32309" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 21595551 } ,
+                  anchor "AAH32309" } } ,
+              seqs {
+                whole
+                  gi 21595551 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "L32179" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 537513 } ,
+              anchor "L32179" } } ,
+          seqs {
+            whole
+              gi 537513 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA35551" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 537514 } ,
+                  anchor "AAA35551" } } ,
+              seqs {
+                whole
+                  gi 537514 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P22760" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 57015294 } ,
+                  anchor "P22760" } } ,
+              seqs {
+                whole
+                  gi 57015294 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "13" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_005612.14&gene=AADAC&lid=13&from=58027020&to=58041428" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "13" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_005612.14&gene=AADAC&lid=13" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.506908" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.506908" } ,
+              anchor "Hs.506908" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=50
+6908" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "600338" } ,
+              anchor "600338" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "13" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+13[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 13 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=13" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:392587" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=BC032
+309" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+BC032309" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "KEGG" ,
+                tag
+                  str "" } ,
+              url "http://www.genome.ad.jp/dbget-bin/www_bget?hsa:13" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC032309" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC032309" } } } } } ,
+    {
+      type comment ,
+      heading "Pathways" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "KEGG pathway: Butanoate metabolism" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00650" ,
+                tag
+                  str "00650" } ,
+              anchor "00650" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00650+13" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: Alkaloid biosynthesis II" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00960" ,
+                tag
+                  str "00960" } ,
+              anchor "00960" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00960+13" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: 2,4-Dichlorobenzoate degradation" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00623" ,
+                tag
+                  str "00623" } ,
+              anchor "00623" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00623+13" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: Pentose and glucuronate interconversions" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00040" ,
+                tag
+                  str "00040" } ,
+              anchor "00040" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00040+13" } } } ,
+        {
+          type comment ,
+          text "KEGG pathway: Glycosylphosphatidylinositol(GPI)-anchor
+ biosynthesis" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00563" ,
+                tag
+                  str "00563" } ,
+              anchor "00563" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00563+13" } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 13 } ,
+    {
+      db "MIM" ,
+      tag
+        id 600338 } } ,
+  xtra-index-terms {
+    "LOC13" } }
+Entrezgene ::= {
+  track-info {
+    geneid 14 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 14 ,
+        hour 13 ,
+        minute 15 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "2" } } } ,
+  gene {
+    locus "AAMP" ,
+    desc "angio-associated, migratory cell protein" ,
+    maploc "2q35" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 603488 } } ,
+    locus-tag "HGNC:18" } ,
+  prot {
+    name {
+      "angio-associated, migratory cell protein" } } ,
+  summary "The gene product is an immunoglobulin-type protein. It is found to
+ be expressed strongly in endothelial cells, cytotrophoblasts, and poorly
+ differentiated colon adenocarcinoma cells found in lymphatics. The protein
+ contains a heparin-binding domain and mediates heparin-sensitive cell
+ adhesion." ,
+  location {
+    {
+      display-str "2q35" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 14 ,
+    src-str2 "14" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000002" ,
+      version 9 ,
+      seqs {
+        int {
+          from 218954357 ,
+          to 218960364 ,
+          strand minus ,
+          id
+            gi 51511462 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001087" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 218954357 ,
+                to 218954835 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218955247 ,
+                to 218955401 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218955598 ,
+                to 218955688 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218955806 ,
+                to 218955909 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218956058 ,
+                to 218956173 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218956291 ,
+                to 218956374 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218956670 ,
+                to 218956814 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218957074 ,
+                to 218957213 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218957721 ,
+                to 218957840 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218959609 ,
+                to 218959761 ,
+                strand minus ,
+                id
+                  gi 51511462 } ,
+              int {
+                from 218960193 ,
+                to 218960364 ,
+                strand minus ,
+                id
+                  gi 51511462 } } } ,
+          seqs {
+            whole
+              gi 4557228 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001078" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 218954760 ,
+                    to 218954835 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218955247 ,
+                    to 218955401 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218955598 ,
+                    to 218955688 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218955806 ,
+                    to 218955909 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218956058 ,
+                    to 218956173 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218956291 ,
+                    to 218956374 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218956670 ,
+                    to 218956814 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218957074 ,
+                    to 218957213 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218957721 ,
+                    to 218957840 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218959609 ,
+                    to 218959761 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } ,
+                  {
+                    from 218960193 ,
+                    to 218960364 ,
+                    strand minus ,
+                    id
+                      gi 51511462 } } } ,
+              seqs {
+                whole
+                  gi 4557229 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_005403" ,
+      version 15 ,
+      seqs {
+        int {
+          from 69338267 ,
+          to 69344274 ,
+          strand minus ,
+          id
+            gi 51461028 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001087" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 69338267 ,
+                to 69338745 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69339157 ,
+                to 69339311 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69339508 ,
+                to 69339598 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69339716 ,
+                to 69339819 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69339968 ,
+                to 69340083 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69340201 ,
+                to 69340284 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69340580 ,
+                to 69340724 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69340984 ,
+                to 69341123 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69341631 ,
+                to 69341750 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69343519 ,
+                to 69343671 ,
+                strand minus ,
+                id
+                  gi 51461028 } ,
+              int {
+                from 69344103 ,
+                to 69344274 ,
+                strand minus ,
+                id
+                  gi 51461028 } } } ,
+          seqs {
+            whole
+              gi 4557228 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001078" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 69338670 ,
+                    to 69338745 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69339157 ,
+                    to 69339311 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69339508 ,
+                    to 69339598 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69339716 ,
+                    to 69339819 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69339968 ,
+                    to 69340083 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69340201 ,
+                    to 69340284 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69340580 ,
+                    to 69340724 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69340984 ,
+                    to 69341123 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69341631 ,
+                    to 69341750 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69343519 ,
+                    to 69343671 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } ,
+                  {
+                    from 69344103 ,
+                    to 69344274 ,
+                    strand minus ,
+                    id
+                      gi 51461028 } } } ,
+              seqs {
+                whole
+                  gi 4557229 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086634" ,
+      version 1 ,
+      seqs {
+        int {
+          from 23772483 ,
+          to 23778490 ,
+          strand minus ,
+          id
+            gi 51463854 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001087" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 23772483 ,
+                to 23772961 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23773373 ,
+                to 23773527 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23773724 ,
+                to 23773814 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23773932 ,
+                to 23774035 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23774184 ,
+                to 23774299 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23774417 ,
+                to 23774500 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23774796 ,
+                to 23774940 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23775200 ,
+                to 23775339 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23775847 ,
+                to 23775966 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23777735 ,
+                to 23777887 ,
+                strand minus ,
+                id
+                  gi 51463854 } ,
+              int {
+                from 23778319 ,
+                to 23778490 ,
+                strand minus ,
+                id
+                  gi 51463854 } } } ,
+          seqs {
+            whole
+              gi 4557228 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001078" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 23772886 ,
+                    to 23772961 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23773373 ,
+                    to 23773527 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23773724 ,
+                    to 23773814 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23773932 ,
+                    to 23774035 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23774184 ,
+                    to 23774299 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23774417 ,
+                    to 23774500 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23774796 ,
+                    to 23774940 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23775200 ,
+                    to 23775339 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23775847 ,
+                    to 23775966 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23777735 ,
+                    to 23777887 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } ,
+                  {
+                    from 23778319 ,
+                    to 23778490 ,
+                    strand minus ,
+                    id
+                      gi 51463854 } } } ,
+              seqs {
+                whole
+                  gi 4557229 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "AAMP" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "angio-associated, migratory cell protein" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 7743515 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8201 } ,
+                  anchor "heparin binding" ,
+                  post-text "evidence: TAS" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6928 } ,
+                  anchor "cell motility" ,
+                  post-text "evidence: NR" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 846 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=2&M
+APS=genes-r-org/rat-chr/human%3A2,genes-r-org/mouse-chr/human%3A2,genes-r-org/
+human-chr2&query=e%3A14[id]+AND+gene[obj_type]&QSTR=aamp&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 18 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 12477932 ,
+        pmid 11969303 ,
+        pmid 10329261 ,
+        pmid 8660919 ,
+        pmid 7743515 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 52581 } ,
+              anchor "SGC31591" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "CM4.18" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "GC" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "RH23156" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "STS4-18" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_001087" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 55743074 } ,
+              anchor "NM_001087" } } ,
+          seqs {
+            whole
+              gi 55743074 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_001078" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 55743075 } ,
+                  anchor "NP_001078" ,
+                  post-text "angio-associated, migratory cell protein" } } ,
+              seqs {
+                whole
+                  gi 55743075 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 55743075 } ,
+                      pre-text "(2)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5398 } ,
+                          anchor "cd00200: WD40; WD40 domain, found in a
+ number of eukaryotic proteins that cover a wide variety of functions
+ including adaptor/regulatory modules in signal transduction, pre-mRNA
+ processing and cytoskeleton assembly" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 86 - 386  Blast Score: 457" ,
+                          version 0 } ,
+                        {
+                          type other ,
+                          text "Location: 325 - 430  Blast Score: 176" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "BG534787,CN277005,M95627" } ,
+                  anchor "BG534787,CN277005,M95627" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB209790" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 62089165 } ,
+              anchor "AB209790" } } ,
+          seqs {
+            whole
+              gi 62089165 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAD93027" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 62089166 } ,
+                  anchor "BAD93027" } } ,
+              seqs {
+                whole
+                  gi 62089166 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK131047" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34528300 } ,
+              anchor "AK131047" } } ,
+          seqs {
+            whole
+              gi 34528300 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC008809" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 39644831 } ,
+              anchor "BC008809" } } ,
+          seqs {
+            whole
+              gi 39644831 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH08809" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 39644832 } ,
+                  anchor "AAH08809" } } ,
+              seqs {
+                whole
+                  gi 39644832 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC014122" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 33874729 } ,
+              anchor "BC014122" } } ,
+          seqs {
+            whole
+              gi 33874729 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH14122" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 33874730 } ,
+                  anchor "AAH14122" } } ,
+              seqs {
+                whole
+                  gi 33874730 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC020244" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 18044383 } ,
+              anchor "BC020244" } } ,
+          seqs {
+            whole
+              gi 18044383 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH20244" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 18044384 } ,
+                  anchor "AAH20244" } } ,
+              seqs {
+                whole
+                  gi 18044384 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC039866" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 25123215 } ,
+              anchor "BC039866" } } ,
+          seqs {
+            whole
+              gi 25123215 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH39866" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 56200511 } ,
+                  anchor "AAH39866" } } ,
+              seqs {
+                whole
+                  gi 56200511 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "CR456755" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 48145626 } ,
+              anchor "CR456755" } } ,
+          seqs {
+            whole
+              gi 48145626 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAG33036" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 48145627 } ,
+                  anchor "CAG33036" } } ,
+              seqs {
+                whole
+                  gi 48145627 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "M95627" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 870802 } ,
+              anchor "M95627" } } ,
+          seqs {
+            whole
+              gi 870802 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA68889" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 870803 } ,
+                  anchor "AAA68889" } } ,
+              seqs {
+                whole
+                  gi 870803 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "Q13685" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3121739 } ,
+                  anchor "Q13685" } } ,
+              seqs {
+                whole
+                  gi 3121739 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "14" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_005403.15&gene=AAMP&lid=14&from=69338268&to=69344275" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "14" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_005403.15&gene=AAMP&lid=14" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.83347" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.83347" } ,
+              anchor "Hs.83347" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=83
+347" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "603488" } ,
+              anchor "603488" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "14" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+14[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 14 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=14" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:4573993" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=AB209
+790" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+AB209790" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC014122" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC014122" } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 14 } ,
+    {
+      db "MIM" ,
+      tag
+        id 603488 } } ,
+  xtra-index-terms {
+    "LOC14" } }
+Entrezgene ::= {
+  track-info {
+    geneid 15 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 20 ,
+        minute 30 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 14 ,
+        hour 13 ,
+        minute 15 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Homo sapiens" ,
+      common "human" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 9606 } } ,
+      syn {
+        "man" } ,
+      orgname {
+        name
+          binomial {
+            genus "Homo" ,
+            species "sapiens" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini;
+ Hominidae; Homo" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "PRI" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "17" } } } ,
+  gene {
+    locus "AANAT" ,
+    desc "arylalkylamine N-acetyltransferase" ,
+    maploc "17q25" ,
+    db {
+      {
+        db "MIM" ,
+        tag
+          id 600950 } } ,
+    syn {
+      "SNAT" ,
+      "AA-NAT" } ,
+    locus-tag "HGNC:19" } ,
+  prot {
+    name {
+      "arylalkylamine N-acetyltransferase" ,
+      "serotonin N-acetyltransferase" } } ,
+  summary "Arylalkylamine N-acetyltransferase belongs to the superfamily of
+ acetyltransferases. It is the penultimate enzyme in melatonin synthesis and
+ controls the night/day rhythm in melatonin production in the vertebrate
+ pineal gland. Melatonin is essential for seasonal reproduction, modulates the
+ function of the circadian clock in the suprachiasmatic nucleus, and
+ influences activity and sleep. This enzyme is rapidly inactivated when
+ animals are exposed to light at night. This protein is 80% identical to sheep
+ and rat AA-NAT. Arylalkylamine N-acetyltransferase may contribute a
+ multifactorial genetic diseases such as altered behavior in sleep/wake cycle." ,
+  location {
+    {
+      display-str "17q25" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 15 ,
+    src-str2 "15" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000017" ,
+      version 9 ,
+      seqs {
+        int {
+          from 71975245 ,
+          to 71977793 ,
+          strand plus ,
+          id
+            gi 51511734 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001088" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 71975245 ,
+                to 71975403 ,
+                strand plus ,
+                id
+                  gi 51511734 } ,
+              int {
+                from 71976348 ,
+                to 71976585 ,
+                strand plus ,
+                id
+                  gi 51511734 } ,
+              int {
+                from 71976849 ,
+                to 71977003 ,
+                strand plus ,
+                id
+                  gi 51511734 } ,
+              int {
+                from 71977341 ,
+                to 71977793 ,
+                strand plus ,
+                id
+                  gi 51511734 } } } ,
+          seqs {
+            whole
+              gi 4501844 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001079" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 71976423 ,
+                    to 71976585 ,
+                    strand plus ,
+                    id
+                      gi 51511734 } ,
+                  {
+                    from 71976849 ,
+                    to 71977003 ,
+                    strand plus ,
+                    id
+                      gi 51511734 } ,
+                  {
+                    from 71977341 ,
+                    to 71977646 ,
+                    strand plus ,
+                    id
+                      gi 51511734 } } } ,
+              seqs {
+                whole
+                  gi 4501845 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.87" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.87" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_010641" ,
+      version 15 ,
+      seqs {
+        int {
+          from 8389922 ,
+          to 8392470 ,
+          strand plus ,
+          id
+            gi 51474120 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001088" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 8389922 ,
+                to 8390080 ,
+                strand plus ,
+                id
+                  gi 51474120 } ,
+              int {
+                from 8391025 ,
+                to 8391262 ,
+                strand plus ,
+                id
+                  gi 51474120 } ,
+              int {
+                from 8391526 ,
+                to 8391680 ,
+                strand plus ,
+                id
+                  gi 51474120 } ,
+              int {
+                from 8392018 ,
+                to 8392470 ,
+                strand plus ,
+                id
+                  gi 51474120 } } } ,
+          seqs {
+            whole
+              gi 4501844 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001079" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 8391100 ,
+                    to 8391262 ,
+                    strand plus ,
+                    id
+                      gi 51474120 } ,
+                  {
+                    from 8391526 ,
+                    to 8391680 ,
+                    strand plus ,
+                    id
+                      gi 51474120 } ,
+                  {
+                    from 8392018 ,
+                    to 8392323 ,
+                    strand plus ,
+                    id
+                      gi 51474120 } } } ,
+              seqs {
+                whole
+                  gi 4501845 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.87" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.87" } } } } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_086886" ,
+      version 1 ,
+      seqs {
+        int {
+          from 11539555 ,
+          to 11542103 ,
+          strand plus ,
+          id
+            gi 51474508 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_001088" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 11539555 ,
+                to 11539713 ,
+                strand plus ,
+                id
+                  gi 51474508 } ,
+              int {
+                from 11540658 ,
+                to 11540895 ,
+                strand plus ,
+                id
+                  gi 51474508 } ,
+              int {
+                from 11541159 ,
+                to 11541313 ,
+                strand plus ,
+                id
+                  gi 51474508 } ,
+              int {
+                from 11541651 ,
+                to 11542103 ,
+                strand plus ,
+                id
+                  gi 51474508 } } } ,
+          seqs {
+            whole
+              gi 4501844 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_001079" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 11540733 ,
+                    to 11540895 ,
+                    strand plus ,
+                    id
+                      gi 51474508 } ,
+                  {
+                    from 11541159 ,
+                    to 11541313 ,
+                    strand plus ,
+                    id
+                      gi 51474508 } ,
+                  {
+                    from 11541651 ,
+                    to 11541956 ,
+                    strand plus ,
+                    id
+                      gi 51474508 } } } ,
+              seqs {
+                whole
+                  gi 4501845 } ,
+              comment {
+                {
+                  type property ,
+                  label "EC" ,
+                  text "2.3.1.87" ,
+                  version 0 ,
+                  xtra-properties {
+                    {
+                      tag "EC" ,
+                      value "2.3.1.87" } } } } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "HUGO Gene Nomenclature Committee" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "AANAT" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "arylalkylamine N-acetyltransferase" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "GOA" ,
+          url "http://www.ebi.ac.uk/GOA/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8415 } ,
+                  anchor "acyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 8661026 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4059 } ,
+                  anchor "aralkylamine N-acetyltransferase activity" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16740 } ,
+                  anchor "transferase activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 8661026 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 7623 } ,
+                  anchor "circadian rhythm" ,
+                  post-text "evidence: TAS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Mouse, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 31013 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=9606&chr=17&
+MAPS=genes-r-org/rat-chr/human%3A17,genes-r-org/mouse-chr/human%3A17,genes-r-o
+rg/human-chr17&query=e%3A15[id]+AND+gene[obj_type]&QSTR=aanat&cmd=focus&fill=1
+0" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HGNC" ,
+            tag
+              id 19 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15332344 ,
+        pmid 12736803 ,
+        pmid 12552951 ,
+        pmid 12477932 ,
+        pmid 12039872 ,
+        pmid 11934439 ,
+        pmid 11427721 ,
+        pmid 11336675 ,
+        pmid 11313340 ,
+        pmid 9238858 ,
+        pmid 8661026 ,
+        pmid 7502081 ,
+        pmid 2181999 } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_001088" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4501844 } ,
+              anchor "NM_001088" } } ,
+          seqs {
+            whole
+              gi 4501844 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_001079" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4501845 } ,
+                  anchor "NP_001079" ,
+                  post-text "arylalkylamine N-acetyltransferase" } } ,
+              seqs {
+                whole
+                  gi 4501845 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Consensus CDS (CCDS)" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "CCDS" ,
+                        tag
+                          str "CCDS11745.1" } ,
+                      anchor "CCDS11745.1" } } } ,
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 4501845 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 25558 } ,
+                          anchor "pfam00583: Acetyltransf_1; Acetyltransferase
+ (GNAT) family" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 82 - 174  Blast Score: 94" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "U40347" } ,
+                  anchor "U40347" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AB023793" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12248880 } ,
+              anchor "AB023793" } } ,
+          seqs {
+            whole
+              gi 12248880 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAB20312" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 12248881 } ,
+                  anchor "BAB20312" } } ,
+              seqs {
+                whole
+                  gi 12248881 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AB023794" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12248882 } ,
+              anchor "AB023794" } } ,
+          seqs {
+            whole
+              gi 12248882 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAB20313" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 12248883 } ,
+                  anchor "BAB20313" } } ,
+              seqs {
+                whole
+                  gi 12248883 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AB023795" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12248884 } ,
+              anchor "AB023795" } } ,
+          seqs {
+            whole
+              gi 12248884 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAB20314" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 12248885 } ,
+                  anchor "BAB20314" } } ,
+              seqs {
+                whole
+                  gi 12248885 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AB023796" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12248886 } ,
+              anchor "AB023796" } } ,
+          seqs {
+            whole
+              gi 12248886 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAB20315" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 12248887 } ,
+                  anchor "BAB20315" } } ,
+              seqs {
+                whole
+                  gi 12248887 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AB023797" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12248888 } ,
+              anchor "AB023797" } } ,
+          seqs {
+            whole
+              gi 12248888 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAB20316" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 12248889 } ,
+                  anchor "BAB20316" } } ,
+              seqs {
+                whole
+                  gi 12248889 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF360979" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 13936890 } ,
+              anchor "AF360979" } } ,
+          seqs {
+            whole
+              gi 13936890 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK49981" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 13936891 } ,
+                  anchor "AAK49981" } } ,
+              seqs {
+                whole
+                  gi 13936891 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U40391" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 1389593 } ,
+              anchor "U40391" } } ,
+          seqs {
+            whole
+              gi 1389593 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC50555" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 1389594 } ,
+                  anchor "AAC50555" } } ,
+              seqs {
+                whole
+                  gi 1389594 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC069434" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 46854803 } ,
+              anchor "BC069434" } } ,
+          seqs {
+            whole
+              gi 46854803 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH69434" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 46854804 } ,
+                  anchor "AAH69434" } } ,
+              seqs {
+                whole
+                  gi 46854804 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "U40347" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 1389591 } ,
+              anchor "U40347" } } ,
+          seqs {
+            whole
+              gi 1389591 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC50554" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 1389592 } ,
+                  anchor "AAC50554" } } ,
+              seqs {
+                whole
+                  gi 1389592 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "Q16613" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11387096 } ,
+                  anchor "Q16613" } } ,
+              seqs {
+                whole
+                  gi 11387096 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "15" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=9606&conti
+g=NT_010641.15&gene=AANAT&lid=15&from=8389923&to=8392471" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "15" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=96
+06&contig=NT_010641.15&gene=AANAT&lid=15" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Hs.431417" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Hs.431417" } ,
+              anchor "Hs.431417" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=43
+1417" } } } ,
+        {
+          type comment ,
+          text "MIM" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MIM" ,
+                tag
+                  str "600950" } ,
+              anchor "600950" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "15" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+15[loc]&TAXID=9606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "AceView" ,
+                tag
+                  id 15 } ,
+              anchor "AceView" ,
+              url "http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?c=l
+ocusid&org=9606&l=15" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "GDB" ,
+                tag
+                  str "GDB:700076" } } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Homo_sapiens/contigview?geneid=U4034
+7" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=human&position=
+U40347" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "KEGG" ,
+                tag
+                  str "" } ,
+              url "http://www.genome.ad.jp/dbget-bin/www_bget?hsa:15" } } } ,
+        {
+          type comment ,
+          text "PharmGKB" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "PharmGKB" ,
+                tag
+                  str "PA24366" } ,
+              anchor "PA24366" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC069434" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Hs&LIST=BC069434" } } } } } ,
+    {
+      type comment ,
+      heading "Pathways" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "KEGG pathway: Tryptophan metabolism" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "00380" ,
+                tag
+                  str "00380" } ,
+              anchor "00380" ,
+              url "http://www.genome.jp/dbget-bin/show_pathway?hsa00380+15" } } } } } ,
+    {
+      type generif ,
+      text "Data suggest that the -263G/C single nucleotide polymorphism of
+ arylalkylamine-N-acetyl-transferase (AA-NAT) may be an important determinant
+ of the late/short sleep pattern." ,
+      version 0 ,
+      refs {
+        pmid 15332344 } ,
+      create-date
+        str "Sep 27 2004  4:34PM" ,
+      update-date
+        str "Sep 27 2004  7:00PM" } ,
+    {
+      type generif ,
+      text "There is a significant increase in AANAT allele positivity at the
+ single nucleotide polymorphism (alanine 129--> threonine) at between patients
+ with DSPS & controls.  AA-NAT could be a susceptibility gene for DSPS." ,
+      version 0 ,
+      refs {
+        pmid 12736803 } ,
+      create-date
+        str "Jul 10 2003 12:00AM" ,
+      update-date
+        str "May 30 2004  7:11AM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 15 } ,
+    {
+      db "MIM" ,
+      tag
+        id 600950 } } ,
+  xtra-index-terms {
+    "LOC15" } }
+Entrezgene ::= {
+  track-info {
+    geneid 9996 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 320632 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 320632 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 9996 ,
+    src-str2 "9996" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 9996 } } ,
+  xtra-index-terms {
+    "LOC320632" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11286 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 50518 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 50518 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11286 ,
+    src-str2 "11286" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11286 } } ,
+  xtra-index-terms {
+    "LOC50518" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11287 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 19 ,
+        hour 12 ,
+        minute 12 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "6" } } } ,
+  gene {
+    locus "Pzp" ,
+    desc "pregnancy zone protein" ,
+    maploc "6 62.0 cM" ,
+    syn {
+      "A1m" ,
+      "A2m" ,
+      "MAM" } ,
+    locus-tag "MGI:87854" } ,
+  prot {
+    name {
+      "pregnancy zone protein" ,
+      "alpha 1 macroglobulin" } } ,
+  location {
+    {
+      display-str "6 F1-G3" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "6 62.0 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11287 ,
+    src-str2 "11287" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000072" ,
+      version 1 ,
+      seqs {
+        int {
+          from 129138521 ,
+          to 129181660 ,
+          strand minus ,
+          id
+            gi 51863714 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007376" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 129138521 ,
+                to 129138673 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129139811 ,
+                to 129139852 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129140505 ,
+                to 129140607 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129141065 ,
+                to 129141133 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129141840 ,
+                to 129141930 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129142350 ,
+                to 129142480 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129142828 ,
+                to 129143046 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129143853 ,
+                to 129144094 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129144629 ,
+                to 129144806 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129144966 ,
+                to 129145040 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129145470 ,
+                to 129145626 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129146112 ,
+                to 129146202 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129146443 ,
+                to 129146619 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129147104 ,
+                to 129147187 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129149375 ,
+                to 129149426 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129149655 ,
+                to 129149776 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129150140 ,
+                to 129150266 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129150847 ,
+                to 129151075 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129151526 ,
+                to 129151700 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129153906 ,
+                to 129153993 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129157123 ,
+                to 129157287 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129158420 ,
+                to 129158569 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129161683 ,
+                to 129161825 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129165292 ,
+                to 129165355 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129167103 ,
+                to 129167330 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129168683 ,
+                to 129168844 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129171020 ,
+                to 129171129 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129171763 ,
+                to 129171877 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129174339 ,
+                to 129174462 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129176048 ,
+                to 129176132 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129176963 ,
+                to 129177131 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129177687 ,
+                to 129177707 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129178724 ,
+                to 129178776 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129178917 ,
+                to 129179070 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129180062 ,
+                to 129180245 ,
+                strand minus ,
+                id
+                  gi 51863714 } ,
+              int {
+                from 129181526 ,
+                to 129181660 ,
+                strand minus ,
+                id
+                  gi 51863714 } } } ,
+          seqs {
+            whole
+              gi 6680607 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031402" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 129138651 ,
+                    to 129138673 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129139811 ,
+                    to 129139852 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129140505 ,
+                    to 129140607 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129141065 ,
+                    to 129141133 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129141840 ,
+                    to 129141930 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129142350 ,
+                    to 129142480 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129142828 ,
+                    to 129143046 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129143853 ,
+                    to 129144094 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129144629 ,
+                    to 129144806 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129144966 ,
+                    to 129145040 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129145470 ,
+                    to 129145626 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129146112 ,
+                    to 129146202 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129146443 ,
+                    to 129146619 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129147104 ,
+                    to 129147187 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129149375 ,
+                    to 129149426 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129149655 ,
+                    to 129149776 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129150140 ,
+                    to 129150266 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129150847 ,
+                    to 129151075 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129151526 ,
+                    to 129151700 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129153906 ,
+                    to 129153993 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129157123 ,
+                    to 129157287 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129158420 ,
+                    to 129158569 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129161683 ,
+                    to 129161825 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129165292 ,
+                    to 129165355 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129167103 ,
+                    to 129167330 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129168683 ,
+                    to 129168844 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129171020 ,
+                    to 129171129 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129171763 ,
+                    to 129171877 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129174339 ,
+                    to 129174462 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129176048 ,
+                    to 129176132 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129176963 ,
+                    to 129177131 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129177687 ,
+                    to 129177707 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129178724 ,
+                    to 129178776 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129178917 ,
+                    to 129179070 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129180062 ,
+                    to 129180245 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } ,
+                  {
+                    from 129181526 ,
+                    to 129181611 ,
+                    strand minus ,
+                    id
+                      gi 51863714 } } } ,
+              seqs {
+                whole
+                  gi 6680608 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039356" ,
+      version 3 ,
+      seqs {
+        int {
+          from 3999237 ,
+          to 4042376 ,
+          strand minus ,
+          id
+            gi 51712708 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007376" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 3999237 ,
+                to 3999389 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4000527 ,
+                to 4000568 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4001221 ,
+                to 4001323 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4001781 ,
+                to 4001849 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4002556 ,
+                to 4002646 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4003066 ,
+                to 4003196 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4003544 ,
+                to 4003762 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4004569 ,
+                to 4004810 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4005345 ,
+                to 4005522 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4005682 ,
+                to 4005756 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4006186 ,
+                to 4006342 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4006828 ,
+                to 4006918 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4007159 ,
+                to 4007335 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4007820 ,
+                to 4007903 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4010091 ,
+                to 4010142 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4010371 ,
+                to 4010492 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4010856 ,
+                to 4010982 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4011563 ,
+                to 4011791 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4012242 ,
+                to 4012416 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4014622 ,
+                to 4014709 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4017839 ,
+                to 4018003 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4019136 ,
+                to 4019285 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4022399 ,
+                to 4022541 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4026008 ,
+                to 4026071 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4027819 ,
+                to 4028046 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4029399 ,
+                to 4029560 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4031736 ,
+                to 4031845 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4032479 ,
+                to 4032593 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4035055 ,
+                to 4035178 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4036764 ,
+                to 4036848 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4037679 ,
+                to 4037847 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4038403 ,
+                to 4038423 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4039440 ,
+                to 4039492 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4039633 ,
+                to 4039786 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4040778 ,
+                to 4040961 ,
+                strand minus ,
+                id
+                  gi 51712708 } ,
+              int {
+                from 4042242 ,
+                to 4042376 ,
+                strand minus ,
+                id
+                  gi 51712708 } } } ,
+          seqs {
+            whole
+              gi 6680607 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031402" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 3999367 ,
+                    to 3999389 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4000527 ,
+                    to 4000568 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4001221 ,
+                    to 4001323 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4001781 ,
+                    to 4001849 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4002556 ,
+                    to 4002646 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4003066 ,
+                    to 4003196 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4003544 ,
+                    to 4003762 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4004569 ,
+                    to 4004810 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4005345 ,
+                    to 4005522 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4005682 ,
+                    to 4005756 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4006186 ,
+                    to 4006342 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4006828 ,
+                    to 4006918 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4007159 ,
+                    to 4007335 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4007820 ,
+                    to 4007903 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4010091 ,
+                    to 4010142 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4010371 ,
+                    to 4010492 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4010856 ,
+                    to 4010982 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4011563 ,
+                    to 4011791 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4012242 ,
+                    to 4012416 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4014622 ,
+                    to 4014709 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4017839 ,
+                    to 4018003 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4019136 ,
+                    to 4019285 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4022399 ,
+                    to 4022541 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4026008 ,
+                    to 4026071 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4027819 ,
+                    to 4028046 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4029399 ,
+                    to 4029560 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4031736 ,
+                    to 4031845 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4032479 ,
+                    to 4032593 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4035055 ,
+                    to 4035178 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4036764 ,
+                    to 4036848 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4037679 ,
+                    to 4037847 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4038403 ,
+                    to 4038423 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4039440 ,
+                    to 4039492 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4039633 ,
+                    to 4039786 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4040778 ,
+                    to 4040961 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } ,
+                  {
+                    from 4042242 ,
+                    to 4042327 ,
+                    strand minus ,
+                    id
+                      gi 51712708 } } } ,
+              seqs {
+                whole
+                  gi 6680608 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Pzp" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "pregnancy zone protein" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 5559967 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4866 } ,
+                  anchor "endopeptidase inhibitor activity" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4867 } ,
+                  anchor "serine-type endopeptidase inhibitor activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 17114 } ,
+                  anchor "wide-spectrum protease inhibitor activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 5559967 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5576 } ,
+                  anchor "extracellular region" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5615 } ,
+                  anchor "extracellular space" ,
+                  post-text "evidence: TAS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 56378 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=6&
+MAPS=genes-r-org/rat-chr/mouse%3A6,genes-r-org/mouse-chr6&query=e%3A11287[id]+
+AND+gene[obj_type]&QSTR=pzp&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 87854 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "PROVISIONAL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 12477932 ,
+        pmid 12466851 ,
+        pmid 7528166 ,
+        pmid 5559967 ,
+        pmid 1280217 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 140992 } ,
+              anchor "A2m" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "D6Uwa7" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "MGI:893149" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "Pzp" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 179416 } ,
+              anchor "AI838740" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "659717" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 179477 } ,
+              anchor "AI893533" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "681710" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_007376" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 6680607 } ,
+              anchor "NM_007376" } } ,
+          seqs {
+            whole
+              gi 6680607 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_031402" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 6680608 } ,
+                  anchor "NP_031402" ,
+                  post-text "pregnancy zone protein" } } ,
+              seqs {
+                whole
+                  gi 6680608 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 6680608 } ,
+                      pre-text "(2)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5952 } ,
+                          anchor "pfam00207: A2M; Alpha-2-macroglobulin family" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 738 - 1482  Blast Score: 1820" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 25832 } ,
+                          anchor "pfam01835: A2M_N; Alpha-2-macroglobulin
+ family N-terminal region" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 22 - 622  Blast Score: 1231" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "M93264" } ,
+                  anchor "M93264" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U06977" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 463883 } ,
+              anchor "U06977" } } ,
+          seqs {
+            whole
+              gi 463883 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA87890" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 463884 } ,
+                  anchor "AAA87890" } } ,
+              seqs {
+                whole
+                  gi 463884 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC057983" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34785995 } ,
+              anchor "BC057983" } } ,
+          seqs {
+            whole
+              gi 34785995 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH57983" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 34785996 } ,
+                  anchor "AAH57983" } } ,
+              seqs {
+                whole
+                  gi 34785996 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "FVB/N" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "M93264" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 199085 } ,
+              anchor "M93264" } } ,
+          seqs {
+            whole
+              gi 199085 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAA39508" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 199086 } ,
+                  anchor "AAA39508" } } ,
+              seqs {
+                whole
+                  gi 199086 } } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "Q61838" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2492496 } ,
+                  anchor "Q61838" } } ,
+              seqs {
+                whole
+                  gi 2492496 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11287" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039356.3&gene=Pzp&lid=11287&from=3999238&to=4042377" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11287" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039356.3&gene=Pzp&lid=11287" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.260144" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.260144" } ,
+              anchor "Mm.260144" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=26
+0144" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11287" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11287[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 87854 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:87854" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=BC057
+983" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+BC057983" } } } ,
+        {
+          type comment ,
+          text "Gene Expression Database (GXD) at MGI" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Gene Expression Database (GXD) at MGI" ,
+                tag
+                  str "MGI:87854" } ,
+              anchor "MGI:87854" ,
+              url "http://www.informatics.jax.org/searches/expression_report.c
+gi?id=MGI:87854" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC057983" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Mm&LIST=BC057983" } } } } } ,
+    {
+      type comment ,
+      heading "Alleles" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "The following allele types are documented at Mouse Genome
+ Informatics" ,
+          version 0 ,
+          source {
+            {
+              anchor "(MGI)" ,
+              url "http://www.informatics.jax.org/searches/allele_report.cgi?m
+arkerID=MGI:87854" } } } ,
+        {
+          type comment ,
+          heading "Targeted (knock-out) (1)" ,
+          version 0 ,
+          refs {
+            pmid 7544347 } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11287 } ,
+    {
+      db "MGI" ,
+      tag
+        id 87854 } } ,
+  xtra-index-terms {
+    "LOC11287" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11288 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 112131 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 112131 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11288 ,
+    src-str2 "11288" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11288 } } ,
+  xtra-index-terms {
+    "LOC112131" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11289 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 107417 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 107417 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11289 ,
+    src-str2 "11289" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11289 } } ,
+  xtra-index-terms {
+    "LOC107417" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11293 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 112132 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 112132 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11293 ,
+    src-str2 "11293" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11293 } } ,
+  xtra-index-terms {
+    "LOC112132" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11294 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 109413 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 109413 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11294 ,
+    src-str2 "11294" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11294 } } ,
+  xtra-index-terms {
+    "LOC109413" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11295 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 109414 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 109414 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11295 ,
+    src-str2 "11295" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11295 } } ,
+  xtra-index-terms {
+    "LOC109414" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11296 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 104236 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 104236 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11296 ,
+    src-str2 "11296" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11296 } } ,
+  xtra-index-terms {
+    "LOC104236" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11297 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 227290 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 227290 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11297 ,
+    src-str2 "11297" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11297 } } ,
+  xtra-index-terms {
+    "LOC227290" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11298 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 16 ,
+        hour 16 ,
+        minute 48 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "11" } } } ,
+  gene {
+    locus "Aanat" ,
+    desc "arylalkylamine N-acetyltransferase" ,
+    maploc "11 70.0 cM" ,
+    syn {
+      "Nat4" ,
+      "SNAT" ,
+      "Nat-2" } ,
+    locus-tag "MGI:1328365" } ,
+  prot {
+    name {
+      "arylalkylamine N-acetyltransferase" } } ,
+  location {
+    {
+      display-str "11 E2" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "11 70.0 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11298 ,
+    src-str2 "11298" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000077" ,
+      version 1 ,
+      seqs {
+        int {
+          from 116264815 ,
+          to 116268672 ,
+          strand plus ,
+          id
+            gi 51868608 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_009591" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 116264815 ,
+                to 116264902 ,
+                strand plus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 116266704 ,
+                to 116266921 ,
+                strand plus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 116267254 ,
+                to 116267408 ,
+                strand plus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 116267917 ,
+                to 116268672 ,
+                strand plus ,
+                id
+                  gi 51868608 } } } ,
+          seqs {
+            whole
+              gi 6752937 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_033721" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 116266765 ,
+                    to 116266921 ,
+                    strand plus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 116267254 ,
+                    to 116267408 ,
+                    strand plus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 116267917 ,
+                    to 116268222 ,
+                    strand plus ,
+                    id
+                      gi 51868608 } } } ,
+              seqs {
+                whole
+                  gi 6752938 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039521" ,
+      version 3 ,
+      seqs {
+        int {
+          from 27994169 ,
+          to 27998026 ,
+          strand plus ,
+          id
+            gi 51766637 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_009591" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 27994169 ,
+                to 27994256 ,
+                strand plus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 27996058 ,
+                to 27996275 ,
+                strand plus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 27996608 ,
+                to 27996762 ,
+                strand plus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 27997271 ,
+                to 27998026 ,
+                strand plus ,
+                id
+                  gi 51766637 } } } ,
+          seqs {
+            whole
+              gi 6752937 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_033721" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 27996119 ,
+                    to 27996275 ,
+                    strand plus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 27996608 ,
+                    to 27996762 ,
+                    strand plus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 27997271 ,
+                    to 27997576 ,
+                    strand plus ,
+                    id
+                      gi 51766637 } } } ,
+              seqs {
+                whole
+                  gi 6752938 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Aanat" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "arylalkylamine N-acetyltransferase" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8080 } ,
+                  anchor "N-acetyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8415 } ,
+                  anchor "acyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4060 } ,
+                  anchor "arylamine N-acetyltransferase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16740 } ,
+                  anchor "transferase activity" ,
+                  post-text "evidence: IEA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Human, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 31013 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=11
+&MAPS=genes-r-org/human-chr/mouse%3A11,genes-r-org/rat-chr/mouse%3A11,genes-r-
+org/mouse-chr11&query=e%3A11298[id]+AND+gene[obj_type]&QSTR=aanat&cmd=focus&fi
+ll=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 1328365 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "PROVISIONAL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 12138020 ,
+        pmid 9838107 ,
+        pmid 9708862 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 126289 } ,
+              anchor "D11Mit102" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "MGI:707739" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "MPC786" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_009591" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 6752937 } ,
+              anchor "NM_009591" } } ,
+          seqs {
+            whole
+              gi 6752937 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_033721" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 6752938 } ,
+                  anchor "NP_033721" ,
+                  post-text "arylalkylamine N-acetyltransferase" } } ,
+              seqs {
+                whole
+                  gi 6752938 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 6752938 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 25558 } ,
+                          anchor "pfam00583: Acetyltransf_1; Acetyltransferase
+ (GNAT) family" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 80 - 172  Blast Score: 118" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "AF004108" } ,
+                  anchor "AF004108" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "U83462" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4159794 } ,
+              anchor "U83462" } } ,
+          seqs {
+            whole
+              gi 4159794 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD08637" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4099097 } ,
+                  anchor "AAD08637" } } ,
+              seqs {
+                whole
+                  gi 4099097 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "129/Sv" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB013358" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3298499 } ,
+              anchor "AB013358" } } ,
+          seqs {
+            whole
+              gi 3298499 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAA31526" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 3298500 } ,
+                  anchor "BAA31526" } } ,
+              seqs {
+                whole
+                  gi 3298500 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "BALB/c" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF004108" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4159890 } ,
+              anchor "AF004108" } } ,
+          seqs {
+            whole
+              gi 4159890 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD09408" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4159891 } ,
+                  anchor "AAD09408" } } ,
+              seqs {
+                whole
+                  gi 4159891 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C3H/He" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF004109" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4159892 } ,
+              anchor "AF004109" } } ,
+          seqs {
+            whole
+              gi 4159892 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD09409" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4159893 } ,
+                  anchor "AAD09409" } } ,
+              seqs {
+                whole
+                  gi 4159893 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C3H/He" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF004110" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4159894 } ,
+              anchor "AF004110" } } ,
+          seqs {
+            whole
+              gi 4159894 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD09410" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4159895 } ,
+                  anchor "AAD09410" } } ,
+              seqs {
+                whole
+                  gi 4159895 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF004111" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 4159896 } ,
+              anchor "AF004111" } } ,
+          seqs {
+            whole
+              gi 4159896 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAD09411" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 4159897 } ,
+                  anchor "AAD09411" } } ,
+              seqs {
+                whole
+                  gi 4159897 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6" ,
+              version 0 } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "O88816" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11387071 } ,
+                  anchor "O88816" } } ,
+              seqs {
+                whole
+                  gi 11387071 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11298" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039521.3&gene=Aanat&lid=11298&from=27994170&to=27998027" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11298" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039521.3&gene=Aanat&lid=11298" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.42233" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.42233" } ,
+              anchor "Mm.42233" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=42
+233" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11298" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11298[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 1328365 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:1328365" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=AF004
+110" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+AF004110" } } } } } ,
+    {
+      type generif ,
+      text "AANAT expression and synthesis of N-acetylserotonin/melatonin
+ could play a role in addictive properties of cocaine" ,
+      version 0 ,
+      refs {
+        pmid 12138020 } ,
+      create-date
+        str "Aug 19 2002 12:00AM" ,
+      update-date
+        str "Aug 28 2002  6:19PM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11298 } ,
+    {
+      db "MGI" ,
+      tag
+        id 1328365 } } ,
+  xtra-index-terms {
+    "LOC11298" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11299 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 20249 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 20249 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11299 ,
+    src-str2 "11299" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11299 } } ,
+  xtra-index-terms {
+    "LOC20249" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11300 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 112133 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 112133 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type unknown ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11300 ,
+    src-str2 "11300" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  properties {
+    {
+      type property ,
+      label "Property" ,
+      text "phenotype only" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11300 } } ,
+  xtra-index-terms {
+    "LOC112133" } ,
+  xtra-properties {
+    {
+      tag "PROP" ,
+      value "phenotype only" } } }
+Entrezgene ::= {
+  track-info {
+    geneid 11301 ,
+    status secondary ,
+    current-id {
+      {
+        db "LocusID" ,
+        tag
+          id 20700 } ,
+      {
+        db "GeneID" ,
+        tag
+          id 20700 } } ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 9 ,
+        hour 17 ,
+        minute 35 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } } ,
+  gene {
+     } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11301 ,
+    src-str2 "11301" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11301 } } ,
+  xtra-index-terms {
+    "LOC20700" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11302 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 3 ,
+        day 26 ,
+        hour 10 ,
+        minute 32 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "11" } } } ,
+  gene {
+    locus "Aatk" ,
+    desc "apoptosis-associated tyrosine kinase" ,
+    maploc "11 E2" ,
+    syn {
+      "AATYK" ,
+      "mKIAA0641" } ,
+    locus-tag "MGI:1197518" } ,
+  prot {
+    name {
+      "apoptosis-associated tyrosine kinase" } } ,
+  location {
+    {
+      display-str "11 E2" ,
+      method
+        map-type cyto } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11302 ,
+    src-str2 "11302" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000077" ,
+      version 1 ,
+      seqs {
+        int {
+          from 119678449 ,
+          to 119692677 ,
+          strand minus ,
+          id
+            gi 51868608 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007377" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 119678449 ,
+                to 119679572 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119680403 ,
+                to 119680603 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119680735 ,
+                to 119680882 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119680962 ,
+                to 119683554 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119683851 ,
+                to 119684000 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119684576 ,
+                to 119684697 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119684778 ,
+                to 119684862 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119686587 ,
+                to 119686720 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119687234 ,
+                to 119687321 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119687441 ,
+                to 119687559 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119687856 ,
+                to 119687935 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119689873 ,
+                to 119690020 ,
+                strand minus ,
+                id
+                  gi 51868608 } ,
+              int {
+                from 119692530 ,
+                to 119692677 ,
+                strand minus ,
+                id
+                  gi 51868608 } } } ,
+          seqs {
+            whole
+              gi 6680609 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031403" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 119679532 ,
+                    to 119679572 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119680403 ,
+                    to 119680603 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119680735 ,
+                    to 119680882 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119680962 ,
+                    to 119683554 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119683851 ,
+                    to 119684000 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119684576 ,
+                    to 119684697 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119684778 ,
+                    to 119684862 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119686587 ,
+                    to 119686720 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119687234 ,
+                    to 119687321 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119687441 ,
+                    to 119687559 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119687856 ,
+                    to 119687935 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119689873 ,
+                    to 119690020 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } ,
+                  {
+                    from 119692530 ,
+                    to 119692574 ,
+                    strand minus ,
+                    id
+                      gi 51868608 } } } ,
+              seqs {
+                whole
+                  gi 6680610 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039521" ,
+      version 3 ,
+      seqs {
+        int {
+          from 31407803 ,
+          to 31422031 ,
+          strand minus ,
+          id
+            gi 51766637 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007377" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 31407803 ,
+                to 31408926 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31409757 ,
+                to 31409957 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31410089 ,
+                to 31410236 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31410316 ,
+                to 31412908 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31413205 ,
+                to 31413354 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31413930 ,
+                to 31414051 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31414132 ,
+                to 31414216 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31415941 ,
+                to 31416074 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31416588 ,
+                to 31416675 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31416795 ,
+                to 31416913 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31417210 ,
+                to 31417289 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31419227 ,
+                to 31419374 ,
+                strand minus ,
+                id
+                  gi 51766637 } ,
+              int {
+                from 31421884 ,
+                to 31422031 ,
+                strand minus ,
+                id
+                  gi 51766637 } } } ,
+          seqs {
+            whole
+              gi 6680609 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031403" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 31408886 ,
+                    to 31408926 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31409757 ,
+                    to 31409957 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31410089 ,
+                    to 31410236 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31410316 ,
+                    to 31412908 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31413205 ,
+                    to 31413354 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31413930 ,
+                    to 31414051 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31414132 ,
+                    to 31414216 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31415941 ,
+                    to 31416074 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31416588 ,
+                    to 31416675 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31416795 ,
+                    to 31416913 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31417210 ,
+                    to 31417289 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31419227 ,
+                    to 31419374 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } ,
+                  {
+                    from 31421884 ,
+                    to 31421928 ,
+                    strand minus ,
+                    id
+                      gi 51766637 } } } ,
+              seqs {
+                whole
+                  gi 6680610 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Aatk" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "apoptosis-associated tyrosine kinase" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5524 } ,
+                  anchor "ATP binding" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16301 } ,
+                  anchor "kinase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4672 } ,
+                  anchor "protein kinase activity" ,
+                  post-text "evidence: ISS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4674 } ,
+                  anchor "protein serine/threonine kinase activity" ,
+                  post-text "evidence: ISS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9444961 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4713 } ,
+                  anchor "protein-tyrosine kinase activity" ,
+                  post-text "evidence: ISS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16740 } ,
+                  anchor "transferase activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9444961 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6915 } ,
+                  anchor "apoptosis" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6468 } ,
+                  anchor "protein amino acid phosphorylation" ,
+                  post-text "evidence: ISS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 7223 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=11
+&MAPS=genes-r-org/rat-chr/mouse%3A11,genes-r-org/mouse-chr11&query=e%3A11302[i
+d]+AND+gene[obj_type]&QSTR=aatk&cmd=focus&fill=10" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 1197518 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "PROVISIONAL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 12670708 ,
+        pmid 12477932 ,
+        pmid 12466851 ,
+        pmid 11314039 ,
+        pmid 11217851 ,
+        pmid 11076861 ,
+        pmid 11042159 ,
+        pmid 10349636 ,
+        pmid 9444961 } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_007377" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 52345410 } ,
+              anchor "NM_007377" } } ,
+          seqs {
+            whole
+              gi 52345410 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_031403" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 52345411 } ,
+                  anchor "NP_031403" ,
+                  post-text "apoptosis-associated tyrosine kinase" } } ,
+              seqs {
+                whole
+                  gi 52345411 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 52345411 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5392 } ,
+                          anchor "cd00192: TyrKc; Tyrosine kinase, catalytic
+ domain" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 75 - 348  Blast Score: 588" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "BC080846" } ,
+                  anchor "BC080846" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AB093253" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 39104554 } ,
+              anchor "AB093253" } } ,
+          seqs {
+            whole
+              gi 39104554 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAC41437" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 39104555 } ,
+                  anchor "BAC41437" } } ,
+              seqs {
+                whole
+                  gi 39104555 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF011908" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2459992 } ,
+              anchor "AF011908" } } ,
+          seqs {
+            whole
+              gi 2459992 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAB71837" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2459993 } ,
+                  anchor "AAB71837" } } ,
+              seqs {
+                whole
+                  gi 2459993 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK036705" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26331621 } ,
+              anchor "AK036705" } } ,
+          seqs {
+            whole
+              gi 26331621 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAC29541" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 26331622 } ,
+                  anchor "BAC29541" } } ,
+              seqs {
+                whole
+                  gi 26331622 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK076214" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26096668 } ,
+              anchor "AK076214" } } ,
+          seqs {
+            whole
+              gi 26096668 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AY236858" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 29824955 } ,
+              anchor "AY236858" } } ,
+          seqs {
+            whole
+              gi 29824955 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAO92350" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 29824956 } ,
+                  anchor "AAO92350" } } ,
+              seqs {
+                whole
+                  gi 29824956 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AY236859" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 29824957 } ,
+              anchor "AY236859" } } ,
+          seqs {
+            whole
+              gi 29824957 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAO92351" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 29824958 } ,
+                  anchor "AAO92351" } } ,
+              seqs {
+                whole
+                  gi 29824958 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "129/SvJ" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC042837" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 27769340 } ,
+              anchor "BC042837" } } ,
+          seqs {
+            whole
+              gi 27769340 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC052323" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 30410866 } ,
+              anchor "BC052323" } } ,
+          seqs {
+            whole
+              gi 30410866 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC080846" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 51593713 } ,
+              anchor "BC080846" } } ,
+          seqs {
+            whole
+              gi 51593713 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH80846" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 51593714 } ,
+                  anchor "AAH80846" } } ,
+              seqs {
+                whole
+                  gi 51593714 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11302" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039521.3&gene=Aatk&lid=11302&from=31407804&to=31422032" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11302" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039521.3&gene=Aatk&lid=11302" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.6826" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.6826" } ,
+              anchor "Mm.6826" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=68
+26" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11302" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11302[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 1197518 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:1197518" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=BC052
+323" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+BC052323" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGC" ,
+                tag
+                  str "BC080846" } ,
+              anchor "MGC" ,
+              url "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Mm&LIST=BC080846" } } } } } ,
+    {
+      type generif ,
+      text "Expression of AATYK mRNA and protein increased during postnatal
+ brain development. In immature granule cells, overexpression of AATYK
+ promoted neurite outgrowth; a tyrosine kinase-defective mutant significantly
+ inhibited it." ,
+      version 0 ,
+      refs {
+        pmid 12670708 } ,
+      create-date
+        str "Jun 17 2003 12:00AM" ,
+      update-date
+        str "Aug  3 2003  7:06AM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11302 } ,
+    {
+      db "MGI" ,
+      tag
+        id 1197518 } } ,
+  xtra-index-terms {
+    "LOC11302" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11303 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 19 ,
+        hour 12 ,
+        minute 12 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "4" } } } ,
+  gene {
+    locus "Abca1" ,
+    desc "ATP-binding cassette, sub-family A (ABC1), member 1" ,
+    maploc "4 23.1 cM" ,
+    syn {
+      "Abc1" } ,
+    locus-tag "MGI:99607" } ,
+  prot {
+    name {
+      "ATP-binding cassette 1, sub-family A, member 1" ,
+      "ATP-binding cassette 1" } } ,
+  summary "The membrane-associated protein encoded by this gene is a member of
+ the superfamily of ATP-binding cassette (ABC) transporters. ABC proteins
+ transport various molecules across extra- and intracellular membranes. ABC
+ genes are divided into seven distinct subfamilies (ABC1, MDR/TAP, MRP, ALD,
+ OABP, GCN20, White). This protein is a member of the ABC1 subfamily. Members
+ of the ABC1 subfamily comprise the only major ABC subfamily found exclusively
+ in multicellular eukaryotes. In humans, this protein functions as a
+ cholesterol efflux pump in the cellular lipid removal pathway. Mutations in
+ the human gene have been associated with Tangier's disease and familial
+ high-density lipoprotein deficiency." ,
+  location {
+    {
+      display-str "4 A5-B3" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "4 23.1 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11303 ,
+    src-str2 "11303" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000070" ,
+      version 1 ,
+      seqs {
+        int {
+          from 52275832 ,
+          to 52404936 ,
+          strand minus ,
+          id
+            gi 51860768 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_013454" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 52275832 ,
+                to 52279127 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52280381 ,
+                to 52280624 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52281306 ,
+                to 52281398 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52281917 ,
+                to 52282020 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52283201 ,
+                to 52283335 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52283663 ,
+                to 52283804 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52285763 ,
+                to 52285869 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52286583 ,
+                to 52286645 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52287358 ,
+                to 52287478 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52287708 ,
+                to 52287837 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52289159 ,
+                to 52289282 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52290819 ,
+                to 52290963 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52291074 ,
+                to 52291189 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52294000 ,
+                to 52294177 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52295333 ,
+                to 52295502 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52295979 ,
+                to 52296053 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52304301 ,
+                to 52304406 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52305377 ,
+                to 52305409 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52306531 ,
+                to 52306625 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52307434 ,
+                to 52307623 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52310945 ,
+                to 52311043 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52312075 ,
+                to 52312199 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52313731 ,
+                to 52313879 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52314836 ,
+                to 52314949 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52315120 ,
+                to 52315168 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52316867 ,
+                to 52317069 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52317942 ,
+                to 52318014 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52319264 ,
+                to 52319484 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52320175 ,
+                to 52320312 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52320491 ,
+                to 52320633 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52321935 ,
+                to 52322066 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52322987 ,
+                to 52323158 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52324615 ,
+                to 52324728 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52325810 ,
+                to 52326014 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52326966 ,
+                to 52327187 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52328761 ,
+                to 52328983 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52329428 ,
+                to 52329604 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52330140 ,
+                to 52330345 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52331037 ,
+                to 52331234 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52334843 ,
+                to 52334959 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52335298 ,
+                to 52335437 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52337773 ,
+                to 52338013 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52340629 ,
+                to 52340721 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52352050 ,
+                to 52352226 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52354208 ,
+                to 52354329 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52372641 ,
+                to 52372759 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52373765 ,
+                to 52373906 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52377966 ,
+                to 52378059 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52388974 ,
+                to 52389139 ,
+                strand minus ,
+                id
+                  gi 51860768 } ,
+              int {
+                from 52404720 ,
+                to 52404936 ,
+                strand minus ,
+                id
+                  gi 51860768 } } } ,
+          seqs {
+            whole
+              gi 34577068 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_038482" ,
+              version 2 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 52278987 ,
+                    to 52279127 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52280381 ,
+                    to 52280624 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52281306 ,
+                    to 52281398 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52281917 ,
+                    to 52282020 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52283201 ,
+                    to 52283335 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52283663 ,
+                    to 52283804 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52285763 ,
+                    to 52285869 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52286583 ,
+                    to 52286645 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52287358 ,
+                    to 52287478 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52287708 ,
+                    to 52287837 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52289159 ,
+                    to 52289282 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52290819 ,
+                    to 52290963 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52291074 ,
+                    to 52291189 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52294000 ,
+                    to 52294177 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52295333 ,
+                    to 52295502 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52295979 ,
+                    to 52296053 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52304301 ,
+                    to 52304406 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52305377 ,
+                    to 52305409 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52306531 ,
+                    to 52306625 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52307434 ,
+                    to 52307623 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52310945 ,
+                    to 52311043 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52312075 ,
+                    to 52312199 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52313731 ,
+                    to 52313879 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52314836 ,
+                    to 52314949 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52315120 ,
+                    to 52315168 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52316867 ,
+                    to 52317069 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52317942 ,
+                    to 52318014 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52319264 ,
+                    to 52319484 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52320175 ,
+                    to 52320312 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52320491 ,
+                    to 52320633 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52321935 ,
+                    to 52322066 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52322987 ,
+                    to 52323158 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52324615 ,
+                    to 52324728 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52325810 ,
+                    to 52326014 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52326966 ,
+                    to 52327187 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52328761 ,
+                    to 52328983 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52329428 ,
+                    to 52329604 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52330140 ,
+                    to 52330345 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52331037 ,
+                    to 52331234 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52334843 ,
+                    to 52334959 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52335298 ,
+                    to 52335437 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52337773 ,
+                    to 52338013 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52340629 ,
+                    to 52340721 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52352050 ,
+                    to 52352226 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52354208 ,
+                    to 52354329 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52372641 ,
+                    to 52372759 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52373765 ,
+                    to 52373906 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52377966 ,
+                    to 52378059 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } ,
+                  {
+                    from 52388974 ,
+                    to 52389039 ,
+                    strand minus ,
+                    id
+                      gi 51860768 } } } ,
+              seqs {
+                whole
+                  gi 34577069 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039260" ,
+      version 3 ,
+      seqs {
+        int {
+          from 26554540 ,
+          to 26683644 ,
+          strand minus ,
+          id
+            gi 51709342 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_013454" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 26554540 ,
+                to 26557835 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26559089 ,
+                to 26559332 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26560014 ,
+                to 26560106 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26560625 ,
+                to 26560728 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26561909 ,
+                to 26562043 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26562371 ,
+                to 26562512 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26564471 ,
+                to 26564577 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26565291 ,
+                to 26565353 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26566066 ,
+                to 26566186 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26566416 ,
+                to 26566545 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26567867 ,
+                to 26567990 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26569527 ,
+                to 26569671 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26569782 ,
+                to 26569897 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26572708 ,
+                to 26572885 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26574041 ,
+                to 26574210 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26574687 ,
+                to 26574761 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26583009 ,
+                to 26583114 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26584085 ,
+                to 26584117 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26585239 ,
+                to 26585333 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26586142 ,
+                to 26586331 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26589653 ,
+                to 26589751 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26590783 ,
+                to 26590907 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26592439 ,
+                to 26592587 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26593544 ,
+                to 26593657 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26593828 ,
+                to 26593876 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26595575 ,
+                to 26595777 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26596650 ,
+                to 26596722 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26597972 ,
+                to 26598192 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26598883 ,
+                to 26599020 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26599199 ,
+                to 26599341 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26600643 ,
+                to 26600774 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26601695 ,
+                to 26601866 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26603323 ,
+                to 26603436 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26604518 ,
+                to 26604722 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26605674 ,
+                to 26605895 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26607469 ,
+                to 26607691 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26608136 ,
+                to 26608312 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26608848 ,
+                to 26609053 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26609745 ,
+                to 26609942 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26613551 ,
+                to 26613667 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26614006 ,
+                to 26614145 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26616481 ,
+                to 26616721 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26619337 ,
+                to 26619429 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26630758 ,
+                to 26630934 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26632916 ,
+                to 26633037 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26651349 ,
+                to 26651467 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26652473 ,
+                to 26652614 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26656674 ,
+                to 26656767 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26667682 ,
+                to 26667847 ,
+                strand minus ,
+                id
+                  gi 51709342 } ,
+              int {
+                from 26683428 ,
+                to 26683644 ,
+                strand minus ,
+                id
+                  gi 51709342 } } } ,
+          seqs {
+            whole
+              gi 34577068 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_038482" ,
+              version 2 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 26557695 ,
+                    to 26557835 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26559089 ,
+                    to 26559332 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26560014 ,
+                    to 26560106 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26560625 ,
+                    to 26560728 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26561909 ,
+                    to 26562043 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26562371 ,
+                    to 26562512 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26564471 ,
+                    to 26564577 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26565291 ,
+                    to 26565353 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26566066 ,
+                    to 26566186 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26566416 ,
+                    to 26566545 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26567867 ,
+                    to 26567990 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26569527 ,
+                    to 26569671 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26569782 ,
+                    to 26569897 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26572708 ,
+                    to 26572885 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26574041 ,
+                    to 26574210 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26574687 ,
+                    to 26574761 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26583009 ,
+                    to 26583114 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26584085 ,
+                    to 26584117 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26585239 ,
+                    to 26585333 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26586142 ,
+                    to 26586331 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26589653 ,
+                    to 26589751 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26590783 ,
+                    to 26590907 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26592439 ,
+                    to 26592587 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26593544 ,
+                    to 26593657 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26593828 ,
+                    to 26593876 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26595575 ,
+                    to 26595777 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26596650 ,
+                    to 26596722 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26597972 ,
+                    to 26598192 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26598883 ,
+                    to 26599020 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26599199 ,
+                    to 26599341 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26600643 ,
+                    to 26600774 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26601695 ,
+                    to 26601866 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26603323 ,
+                    to 26603436 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26604518 ,
+                    to 26604722 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26605674 ,
+                    to 26605895 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26607469 ,
+                    to 26607691 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26608136 ,
+                    to 26608312 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26608848 ,
+                    to 26609053 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26609745 ,
+                    to 26609942 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26613551 ,
+                    to 26613667 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26614006 ,
+                    to 26614145 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26616481 ,
+                    to 26616721 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26619337 ,
+                    to 26619429 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26630758 ,
+                    to 26630934 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26632916 ,
+                    to 26633037 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26651349 ,
+                    to 26651467 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26652473 ,
+                    to 26652614 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26656674 ,
+                    to 26656767 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } ,
+                  {
+                    from 26667682 ,
+                    to 26667747 ,
+                    strand minus ,
+                    id
+                      gi 51709342 } } } ,
+              seqs {
+                whole
+                  gi 34577069 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Abca1" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "ATP-binding cassette, sub-family A (ABC1), member 1" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5524 } ,
+                  anchor "ATP binding" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16887 } ,
+                  anchor "ATPase activity" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 11559713 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8203 } ,
+                  anchor "cholesterol metabolism" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 11559713 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 30301 } ,
+                  anchor "cholesterol transport" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12859204 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 42158 } ,
+                  anchor "lipoprotein biosynthesis" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10878804 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6911 } ,
+                  anchor "phagocytosis, engulfment" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10878804 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 45332 } ,
+                  anchor "phospholipid translocation" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6810 } ,
+                  anchor "transport" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10878804 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5794 } ,
+                  anchor "Golgi apparatus" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16021 } ,
+                  anchor "integral to membrane" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10878804 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5887 } ,
+                  anchor "integral to plasma membrane" ,
+                  post-text "evidence: IDA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10878804 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 43231 } ,
+                  anchor "intracellular membrane-bound organelle" ,
+                  post-text "evidence: IDA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Human, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 21130 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=4&
+MAPS=genes-r-org/human-chr/mouse%3A4,genes-r-org/rat-chr/mouse%3A4,genes-r-org
+/mouse-chr4&query=e%3A11303[id]+AND+gene[obj_type]&QSTR=abca1&cmd=focus&fill=1
+0" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 99607 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 15632021 ,
+        pmid 15269218 ,
+        pmid 15269217 ,
+        pmid 15163665 ,
+        pmid 15051721 ,
+        pmid 15033469 ,
+        pmid 15026428 ,
+        pmid 14993246 ,
+        pmid 14729855 ,
+        pmid 14726413 ,
+        pmid 14722086 ,
+        pmid 14703508 ,
+        pmid 14560020 ,
+        pmid 14559902 ,
+        pmid 12928428 ,
+        pmid 12897186 ,
+        pmid 12897148 ,
+        pmid 12859204 ,
+        pmid 12777468 ,
+        pmid 12754274 ,
+        pmid 12615679 ,
+        pmid 12586783 ,
+        pmid 12576511 ,
+        pmid 12551894 ,
+        pmid 12547832 ,
+        pmid 12511593 ,
+        pmid 12488454 ,
+        pmid 12487373 ,
+        pmid 12466851 ,
+        pmid 12384498 ,
+        pmid 12215451 ,
+        pmid 12196520 ,
+        pmid 12151852 ,
+        pmid 12105210 ,
+        pmid 12050168 ,
+        pmid 11972062 ,
+        pmid 11909955 ,
+        pmid 11896206 ,
+        pmid 11893753 ,
+        pmid 11752403 ,
+        pmid 11741998 ,
+        pmid 11559713 ,
+        pmid 11352567 ,
+        pmid 11217851 ,
+        pmid 11076861 ,
+        pmid 11042159 ,
+        pmid 10878804 ,
+        pmid 10760292 ,
+        pmid 10655069 ,
+        pmid 10581369 ,
+        pmid 10349636 ,
+        pmid 8889548 ,
+        pmid 8088782 ,
+        pmid 7774911 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 179001 } ,
+              anchor "X75926" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "3512" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "ND" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 271599 } ,
+              anchor "PMC16657P1" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 271951 } ,
+              anchor "PMC207565P1" ,
+              post-text "(e-PCR)" } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_013454" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34577068 } ,
+              anchor "NM_013454" } } ,
+          seqs {
+            whole
+              gi 34577068 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_038482" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 34577069 } ,
+                  anchor "NP_038482" ,
+                  post-text "ATP-binding cassette 1, sub-family A, member 1" } } ,
+              seqs {
+                whole
+                  gi 34577069 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 34577069 } ,
+                      pre-text "(3)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5341 } ,
+                          anchor "cd00267: ABC_ATPase; ABC (ATP-binding
+ cassette) transporter nucleotide-binding domain" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 899 - 1114  Blast Score: 462" ,
+                          version 0 } ,
+                        {
+                          type other ,
+                          text "Location: 1912 - 2121  Blast Score: 356" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 13428 } ,
+                          anchor "COG4152: COG4152; ABC-type uncharacterized
+ transport system, ATPase component [General function prediction only]" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 1912 - 2218  Blast Score: 360" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "AK045442,BB657864,CA885568,X75926" } ,
+                  anchor "AK045442,BB657864,CA885568,X75926" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AF287263" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 11611824 } ,
+              anchor "AF287263" } } ,
+          seqs {
+            whole
+              gi 11611824 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAG39073" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11611825 } ,
+                  anchor "AAG39073" } } ,
+              seqs {
+                whole
+                  gi 11611825 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK019548" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 12859824 } ,
+              anchor "AK019548" } } ,
+          seqs {
+            whole
+              gi 12859824 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK045442" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26090897 } ,
+              anchor "AK045442" } } ,
+          seqs {
+            whole
+              gi 26090897 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK051920" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26342297 } ,
+              anchor "AK051920" } } ,
+          seqs {
+            whole
+              gi 26342297 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAC34811" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 26342298 } ,
+                  anchor "BAC34811" } } ,
+              seqs {
+                whole
+                  gi 26342298 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BB657864" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 16491690 } ,
+              anchor "BB657864" } } ,
+          seqs {
+            whole
+              gi 16491690 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "CA885568" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 27337117 } ,
+              anchor "CA885568" } } ,
+          seqs {
+            whole
+              gi 27337117 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "CD-1" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "X75926" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 495256 } ,
+              anchor "X75926" } } ,
+          seqs {
+            whole
+              gi 495256 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA53530" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 495257 } ,
+                  anchor "CAA53530" } } ,
+              seqs {
+                whole
+                  gi 495257 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "DBA/2" ,
+              version 0 } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P41233" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 51338775 } ,
+                  anchor "P41233" } } ,
+              seqs {
+                whole
+                  gi 51338775 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11303" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039260.3&gene=Abca1&lid=11303&from=26554541&to=26683645" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11303" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039260.3&gene=Abca1&lid=11303" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.277376" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.277376" } ,
+              anchor "Mm.277376" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=27
+7376" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11303" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11303[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 99607 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:99607" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=X7592
+6" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+X75926" } } } ,
+        {
+          type comment ,
+          text "Gene Expression Database (GXD) at MGI" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Gene Expression Database (GXD) at MGI" ,
+                tag
+                  str "MGI:99607" } ,
+              anchor "MGI:99607" ,
+              url "http://www.informatics.jax.org/searches/expression_report.c
+gi?id=MGI:99607" } } } ,
+        {
+          type comment ,
+          text "Human ABC-Transporter Proteins" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Human ABC-Transporter Proteins" ,
+                tag
+                  str "Human ABC-Transporter Proteins" } ,
+              anchor "Human ABC-Transporter Proteins" ,
+              url "http://nutrigene.4t.com/humanabc.htm" } } } } } ,
+    {
+      type generif ,
+      text "Gene deletion protects aginst cerebral malaria from Plasmodium
+ berghei." ,
+      version 0 ,
+      refs {
+        pmid 15632021 } ,
+      create-date
+        str "Mar  7 2005  7:07PM" ,
+      update-date
+        str "Mar  8 2005  3:26AM" } ,
+    {
+      type generif ,
+      text "ABCA1-dependent cholesterol mobilization to apoA-I increased new
+ cholesterol synthesis, indicating depletion of the regulatory pool of
+ hepatocyte cholesterol during HDL formation" ,
+      version 0 ,
+      refs {
+        pmid 14993246 } ,
+      create-date
+        str "Mar  7 2005  7:07PM" ,
+      update-date
+        str "Mar  8 2005 12:40AM" } ,
+    {
+      type generif ,
+      text "a 21% decrease (P = 0.01) in fertility was observed between
+ ABCA1(-/-) males compared with WT controls across their reproductive
+ lifespans" ,
+      version 0 ,
+      refs {
+        pmid 15026428 } ,
+      create-date
+        str "Mar  7 2005  7:07PM" ,
+      update-date
+        str "Mar  7 2005  9:28PM" } ,
+    {
+      type generif ,
+      text "Verapamil increases ABCA1 expression through LXR-independent
+ mechanism and thereby increases apoA-I-mediated cellular lipid release and
+ production of HDL." ,
+      version 0 ,
+      refs {
+        pmid 14726413 } ,
+      create-date
+        str "Jan  8 2005 10:01AM" ,
+      update-date
+        str "Jan  8 2005  2:49PM" } ,
+    {
+      type generif ,
+      text "In murine macrophage cell line RAW264 cells, cAMP induced
+ expression of ABCA1, release of cellular phospholipid and cholesterol by
+ apoA-I, and reversible binding of apoA-I to the cell." ,
+      version 0 ,
+      refs {
+        pmid 14729855 } ,
+      create-date
+        str "Dec  4 2004 10:01AM" ,
+      update-date
+        str "Dec  4 2004  2:26PM" } ,
+    {
+      type generif ,
+      text "The time course and temperature dependence of ABCA1-mediated lipid
+ efflux to apoA-I support a role for endocytosis in this process" ,
+      version 0 ,
+      refs {
+        pmid 14703508 } ,
+      create-date
+        str "Dec  4 2004 10:01AM" ,
+      update-date
+        str "Dec  4 2004 11:09AM" } ,
+    {
+      type generif ,
+      text "conclude that intact ATP binding cassette transporter A1 (ABCA1)
+ function is necessary for proper maturation of dense bodies in platelets" ,
+      version 0 ,
+      refs {
+        pmid 15163665 } ,
+      create-date
+        str "Oct 30 2004 10:01AM" ,
+      update-date
+        str "Oct 30 2004 10:58AM" } ,
+    {
+      type generif ,
+      text "glial ABCA1 has a key role in apoE metabolism in the brain" ,
+      version 0 ,
+      refs {
+        pmid 15269218 } ,
+      create-date
+        str "Oct 30 2004 10:01AM" ,
+      update-date
+        str "Oct 30 2004 10:43AM" } ,
+    {
+      type generif ,
+      text "ABCA1 plays a critical role in central nervous system apoE
+ metabolism" ,
+      version 0 ,
+      refs {
+        pmid 15269217 } ,
+      create-date
+        str "Oct 30 2004 10:01AM" ,
+      update-date
+        str "Oct 30 2004 10:43AM" } ,
+    {
+      type generif ,
+      text "PPAR alpha influences cholesterol absorption through modulating
+ the ATP binding cassette transporter A1 activity in the intestine by a
+ mechanism involving liver X receptor alpha" ,
+      version 0 ,
+      refs {
+        pmid 12897186 } ,
+      create-date
+        str "Aug  9 2004  1:01PM" ,
+      update-date
+        str "Aug  9 2004  1:41PM" } ,
+    {
+      type generif ,
+      text "When endotoxin or cytokines (tumor necrosis factor and
+ interleukin-1) were incubated with J774 murine macrophages, the mRNA levels
+ of ABCA1 were decreased." ,
+      version 0 ,
+      refs {
+        pmid 12777468 } ,
+      create-date
+        str "Jul 12 2004 11:39AM" ,
+      update-date
+        str "Jul 12 2004 12:16PM" } ,
+    {
+      type generif ,
+      text "ABCA1 is stabilized by a specific structural element possessing a
+ linear array of acidic residues spanning two apoA-I amphipathic alpha-helices" ,
+      version 0 ,
+      refs {
+        pmid 15051721 } ,
+      create-date
+        str "Jul 12 2004 11:39AM" ,
+      update-date
+        str "Jul 12 2004 11:52AM" } ,
+    {
+      type generif ,
+      text "despite large induction of ABCA1 mRNA during differentiation,
+ cholesterol efflux through the ABCA1 pathway remains limited in adipocytes
+ and requires prolonged lipolysis" ,
+      version 0 ,
+      refs {
+        pmid 12754274 } ,
+      create-date
+        str "Jun 23 2004  4:43PM" ,
+      update-date
+        str "Jun 23 2004  5:25PM" } ,
+    {
+      type generif ,
+      text "ABCA1 turnover is modulated by alpha1-syntrophin" ,
+      version 0 ,
+      refs {
+        pmid 14722086 } ,
+      create-date
+        str "Jun 14 2004  4:41PM" ,
+      update-date
+        str "Jun 14 2004  5:46PM" } ,
+    {
+      type generif ,
+      text "Review. ABCA1-deficient mice are used to demonstrate the relation
+ between ABCA1 expression, cellular lipid efflux, & HDL metabolism. The
+ ABCA1-deficient mouse phenotype parallels that of human Tangier disease." ,
+      version 0 ,
+      refs {
+        pmid 12615679 } ,
+      create-date
+        str "Jan 25 2004 12:00AM" ,
+      update-date
+        str "Feb 29 2004  7:03AM" } ,
+    {
+      type generif ,
+      text "cellular ATP-binding cassette transporter A1 has a role in
+ transporting cholesterol and phospholipids from cells to lipoprotein
+ particles by a process involving PLTP interactions" ,
+      version 0 ,
+      refs {
+        pmid 14559902 } ,
+      create-date
+        str "Jan 20 2004 12:00AM" ,
+      update-date
+        str "Feb 15 2004  6:35PM" } ,
+    {
+      type generif ,
+      text "Results demonstrate that ABCA1 plays a key role in hepatic
+ cholesterol efflux, inducing pathways that modulate cholesterol homeostasis
+ in the liver, and establish the liver as a major source of plasma HDL-C." ,
+      version 0 ,
+      refs {
+        pmid 12576511 } ,
+      create-date
+        str "Jan  5 2004 12:00AM" ,
+      update-date
+        str "Feb 29 2004  7:03AM" } ,
+    {
+      type generif ,
+      text "ABCA1-mediated vesicle release involves lipid raft plasma membrane
+ domains" ,
+      version 0 ,
+      refs {
+        pmid 12928428 } ,
+      create-date
+        str "Nov 30 2003 12:00AM" ,
+      update-date
+        str "Jan  4 2004  7:03AM" } ,
+    {
+      type generif ,
+      text "These studies indicate a direct role of retinoic acid receptor
+ gamma/retinoid x receptor in induction of macrophage ABCA1." ,
+      version 0 ,
+      refs {
+        pmid 14560020 } ,
+      create-date
+        str "Nov 17 2003 12:00AM" ,
+      update-date
+        str "Dec  7 2003  7:05AM" } ,
+    {
+      type generif ,
+      text "As observed in knockout mice, ABCA1 is necessary for the adequate
+ lipidation of apoAI, which enables the interaction with LCAT and subsequent
+ maturation." ,
+      version 0 ,
+      refs {
+        pmid 12859204 } ,
+      create-date
+        str "Sep 30 2003 12:00AM" ,
+      update-date
+        str "Oct 12 2003  7:03AM" } ,
+    {
+      type generif ,
+      text "ABCA1 and SREBP1 are regulated by liver X receptor/corepressor
+ complexes" ,
+      version 0 ,
+      refs {
+        pmid 12897148 } ,
+      create-date
+        str "Aug 25 2003 12:00AM" ,
+      update-date
+        str "Aug 31 2003  7:04AM" } ,
+    {
+      type generif ,
+      text "Estrogen's antiatherogenic effects may occur via ABCA1-mediated
+ pathway, and circulating HDL levels may influence expression of ABCA1" ,
+      version 0 ,
+      refs {
+        pmid 12487373 } ,
+      create-date
+        str "Jun  3 2003 12:00AM" ,
+      update-date
+        str "Jun  8 2003  7:28AM" } ,
+    {
+      type generif ,
+      text "Hepatocyte expression of ABCA1 is central to the lipidation of
+ newly synthesized apoA-I but also contributes to the lipidation of exogenous
+ apoA-I." ,
+      version 0 ,
+      refs {
+        pmid 12547832 } ,
+      create-date
+        str "Apr  2 2003 12:00AM" ,
+      update-date
+        str "May 11 2003  7:05AM" } ,
+    {
+      type generif ,
+      text "Golgi is involved in ABCA1-mediated cholesterol efflux." ,
+      version 0 ,
+      refs {
+        pmid 12551894 } ,
+      create-date
+        str "Apr  1 2003 12:00AM" ,
+      update-date
+        str "May 11 2003  7:05AM" } ,
+    {
+      type generif ,
+      text "ATP-binding cassette transporter A1 (ABCA1)-mediated lipid efflux
+ is promoted by central helices of ApoA-I; amino acid residues 220-231 of the
+ wild-type ApoA-I are required for lipid efflux and high density lipoprotein
+ formation" ,
+      version 0 ,
+      refs {
+        pmid 12488454 } ,
+      create-date
+        str "Mar 20 2003 12:00AM" ,
+      update-date
+        str "Apr 13 2003  7:04AM" } ,
+    {
+      type generif ,
+      text "annexin I is externalized from pituitary folliculo-stellate cells
+ by an ABC transporter and the ATP binding cassette transporter A1 is a likely
+ candidate" ,
+      version 0 ,
+      refs {
+        pmid 12586783 } ,
+      create-date
+        str "Mar 19 2003 12:00AM" ,
+      update-date
+        str "Mar 31 2003  6:34AM" } ,
+    {
+      type generif ,
+      text "ATP-binding-cassette transporter A1 gene expression in macrophages
+ is downregulated by statins" ,
+      version 0 ,
+      refs {
+        pmid 15033469 } ,
+      create-date
+        str "Apr 26 2004 12:00AM" ,
+      update-date
+        str "May 23 2004  7:05AM" } ,
+    {
+      type generif ,
+      text "ABCA1 is regulated by PEST sequence-mediated calpain proteolysis
+ that appears to be reversed by apolipoprotein-mediated phospholipid efflux" ,
+      version 0 ,
+      refs {
+        pmid 12511593 } ,
+      create-date
+        str "Feb  7 2003 12:00AM" ,
+      update-date
+        str "Mar  9 2003  7:04AM" } ,
+    {
+      type generif ,
+      text "REVIEW: ATP-binding cassette transporter A1 and cholesterol
+ trafficking" ,
+      version 0 ,
+      refs {
+        pmid 12151852 } ,
+      create-date
+        str "Jan 24 2003 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:50AM" } ,
+    {
+      type generif ,
+      text "mediates cholesterol efflux and is defective in free
+ cholesterol-loaded macrophages; mechanism involves enhanced degradation and
+ full npc1 activity." ,
+      version 0 ,
+      refs {
+        pmid 12215451 } ,
+      create-date
+        str "Jan 16 2003 12:00AM" ,
+      update-date
+        str "Feb  9 2003  7:14AM" } ,
+    {
+      type generif ,
+      text "induction as a result of liver X receptor agonism increases
+ secreted Abeta levels" ,
+      version 0 ,
+      refs {
+        pmid 12384498 } ,
+      create-date
+        str "Jan  9 2003 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:50AM" } ,
+    {
+      type generif ,
+      text "Increased hepatobiliary and fecal cholesterol excretion upon
+ activation of the liver X receptor is independent of ABCA1." ,
+      version 0 ,
+      refs {
+        pmid 12105210 } ,
+      create-date
+        str "Nov 20 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:50AM" } ,
+    {
+      type generif ,
+      text "A novel serine (Ser-2054) on the ABCA1 protein crucial for PKA
+ phosphorylation and for regulation of ABCA1 transporter activity." ,
+      version 0 ,
+      refs {
+        pmid 12196520 } ,
+      create-date
+        str "Nov 16 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:50AM" } ,
+    {
+      type generif ,
+      text "studies establish a novel binding site for apoA-I on the
+ macrophage extracellular matrix that may function together with ABCA1 in
+ promoting cholesterol efflux" ,
+      version 0 ,
+      refs {
+        pmid 12050168 } ,
+      create-date
+        str "Sep 27 2002 12:00AM" ,
+      update-date
+        str "Feb  2 2003  7:49AM" } ,
+    {
+      type generif ,
+      text "Role as a phosphatidylserine translocase" ,
+      version 0 ,
+      refs {
+        pmid 11893753 } ,
+      create-date
+        str "Jul  8 2002 12:00AM" ,
+      update-date
+        str "Jul 22 2002  5:59AM" } ,
+    {
+      type generif ,
+      text "Leukocyte ABCA1 controls susceptibility to atherosclerosis and
+ macrophage recruitment into tissues" ,
+      version 0 ,
+      refs {
+        pmid 11972062 } ,
+      create-date
+        str "May 31 2002 12:00AM" ,
+      update-date
+        str "Jun 15 2002  8:10AM" } ,
+    {
+      type generif ,
+      text "Conditional disruption of the peroxisome proliferator-activated
+ receptor gamma gene in mice results in lowered expression of ABCA1, ABCG1,
+ and apoE in macrophages and reduced cholesterol efflux." ,
+      version 0 ,
+      refs {
+        pmid 11909955 } ,
+      create-date
+        str "Apr 16 2002 12:00AM" ,
+      update-date
+        str "Apr 28 2002  6:21PM" } ,
+    {
+      type generif ,
+      text "in the presence of apoE, overexpression of ABCA1 modulates HDL as
+ well as apoB-containing lipoprotein metabolism and reduces atherosclerosis in
+ vivo" ,
+      version 0 ,
+      refs {
+        pmid 11752403 } ,
+      create-date
+        str "Apr  2 2002 12:00AM" ,
+      update-date
+        str "Apr 28 2002  6:21PM" } ,
+    {
+      type generif ,
+      text "ABCA1 is expressed in a pattern consistent with its role in HDL-C
+ metabolism" ,
+      version 0 ,
+      refs {
+        pmid 11896206 } ,
+      create-date
+        str "Mar 27 2002 12:00AM" ,
+      update-date
+        str "Apr  8 2002  3:24PM" } ,
+    {
+      type generif ,
+      text "unsaturated fatty acids reduce the macrophage ABCA1 content by
+ enhancing its degradation rate" ,
+      version 0 ,
+      refs {
+        pmid 11741998 } ,
+      create-date
+        str "Mar  5 2002 12:00AM" ,
+      update-date
+        str "Apr  1 2002  5:34AM" } ,
+    {
+      type comment ,
+      heading "Alleles" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "The following allele types are documented at Mouse Genome
+ Informatics" ,
+          version 0 ,
+          source {
+            {
+              anchor "(MGI)" ,
+              url "http://www.informatics.jax.org/searches/allele_report.cgi?m
+arkerID=MGI:99607" } } } ,
+        {
+          type comment ,
+          heading "Targeted (knock-out) (2)" ,
+          version 0 ,
+          refs {
+            pmid 10760292 ,
+            pmid 10980140 } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11303 } ,
+    {
+      db "MGI" ,
+      tag
+        id 99607 } } ,
+  xtra-index-terms {
+    "LOC11303" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11304 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 15 ,
+        hour 9 ,
+        minute 44 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "3" } } } ,
+  gene {
+    locus "Abca4" ,
+    desc "ATP-binding cassette, sub-family A (ABC1), member 4" ,
+    maploc "3 61.8 cM" ,
+    syn {
+      "RmP" ,
+      "Abcr" ,
+      "Abc10" ,
+      "D430003I15Rik" } ,
+    locus-tag "MGI:109424" } ,
+  prot {
+    name {
+      "ATP-binding cassette, sub-family A, member 4" ,
+      "Rim protein" ,
+      "ATP-binding cassette 10" } } ,
+  summary "The membrane-associated protein encoded by this gene is a member of
+ the superfamily of ATP-binding cassette (ABC) transporters. ABC proteins
+ transport various molecules across extra- and intracellular membranes. ABC
+ genes are divided into seven distinct subfamilies (ABC1, MDR/TAP, MRP, ALD,
+ OABP, GCN20, White). This protein is a member of the ABC1 subfamily. Members
+ of the ABC1 subfamily comprise the only major ABC subfamily found exclusively
+ in multicellular eukaryotes. This protein was the first of the ABC
+ transporters to be observed in photoreceptors and may play a role in the
+ photoresponse. Mutations in the human gene are found in patients diagnosed
+ with Stargardt disease and are associated with retinitis pigmentosa-19 and
+ macular degeneration age-related 2." ,
+  location {
+    {
+      display-str "3 G1" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "3 61.8 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11304 ,
+    src-str2 "11304" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000069" ,
+      version 1 ,
+      seqs {
+        int {
+          from 122426366 ,
+          to 122561663 ,
+          strand plus ,
+          id
+            gi 51860767 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007378" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 122426366 ,
+                to 122426518 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122434867 ,
+                to 122434960 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122436157 ,
+                to 122436298 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122437283 ,
+                to 122437422 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122444741 ,
+                to 122444868 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122451134 ,
+                to 122451331 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122462504 ,
+                to 122462593 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122465447 ,
+                to 122465687 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122467129 ,
+                to 122467268 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122468154 ,
+                to 122468270 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122468889 ,
+                to 122469086 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122484278 ,
+                to 122484483 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122485037 ,
+                to 122485213 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122486910 ,
+                to 122487132 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122492064 ,
+                to 122492285 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122493968 ,
+                to 122494172 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122498257 ,
+                to 122498322 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122501632 ,
+                to 122501721 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122503233 ,
+                to 122503407 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122505161 ,
+                to 122505292 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122507723 ,
+                to 122507862 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122508424 ,
+                to 122508561 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122509727 ,
+                to 122509920 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122510918 ,
+                to 122511002 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122514126 ,
+                to 122514331 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122514655 ,
+                to 122514703 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122516604 ,
+                to 122516866 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122518583 ,
+                to 122518707 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122519172 ,
+                to 122519270 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122520026 ,
+                to 122520212 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122523293 ,
+                to 122523387 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122525002 ,
+                to 122525034 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122528800 ,
+                to 122528905 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122529033 ,
+                to 122529107 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122529332 ,
+                to 122529501 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122530477 ,
+                to 122530654 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122536879 ,
+                to 122536994 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122537940 ,
+                to 122538087 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122539437 ,
+                to 122539560 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122539916 ,
+                to 122540045 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122543903 ,
+                to 122544023 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122544463 ,
+                to 122544525 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122545390 ,
+                to 122545496 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122548250 ,
+                to 122548391 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122551495 ,
+                to 122551629 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122552344 ,
+                to 122552447 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122552531 ,
+                to 122552623 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122555381 ,
+                to 122555630 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122557676 ,
+                to 122557762 ,
+                strand plus ,
+                id
+                  gi 51860767 } ,
+              int {
+                from 122561316 ,
+                to 122561663 ,
+                strand plus ,
+                id
+                  gi 51860767 } } } ,
+          seqs {
+            whole
+              gi 6671494 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031404" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 122426453 ,
+                    to 122426518 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122434867 ,
+                    to 122434960 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122436157 ,
+                    to 122436298 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122437283 ,
+                    to 122437422 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122444741 ,
+                    to 122444868 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122451134 ,
+                    to 122451331 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122462504 ,
+                    to 122462593 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122465447 ,
+                    to 122465687 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122467129 ,
+                    to 122467268 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122468154 ,
+                    to 122468270 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122468889 ,
+                    to 122469086 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122484278 ,
+                    to 122484483 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122485037 ,
+                    to 122485213 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122486910 ,
+                    to 122487132 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122492064 ,
+                    to 122492285 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122493968 ,
+                    to 122494172 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122498257 ,
+                    to 122498322 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122501632 ,
+                    to 122501721 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122503233 ,
+                    to 122503407 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122505161 ,
+                    to 122505292 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122507723 ,
+                    to 122507862 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122508424 ,
+                    to 122508561 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122509727 ,
+                    to 122509920 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122510918 ,
+                    to 122511002 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122514126 ,
+                    to 122514331 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122514655 ,
+                    to 122514703 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122516604 ,
+                    to 122516866 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122518583 ,
+                    to 122518707 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122519172 ,
+                    to 122519270 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122520026 ,
+                    to 122520212 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122523293 ,
+                    to 122523387 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122525002 ,
+                    to 122525034 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122528800 ,
+                    to 122528905 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122529033 ,
+                    to 122529107 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122529332 ,
+                    to 122529501 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122530477 ,
+                    to 122530654 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122536879 ,
+                    to 122536994 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122537940 ,
+                    to 122538087 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122539437 ,
+                    to 122539560 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122539916 ,
+                    to 122540045 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122543903 ,
+                    to 122544023 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122544463 ,
+                    to 122544525 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122545390 ,
+                    to 122545496 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122548250 ,
+                    to 122548391 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122551495 ,
+                    to 122551629 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122552344 ,
+                    to 122552447 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122552531 ,
+                    to 122552623 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122555381 ,
+                    to 122555630 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122557676 ,
+                    to 122557762 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } ,
+                  {
+                    from 122561316 ,
+                    to 122561435 ,
+                    strand plus ,
+                    id
+                      gi 51860767 } } } ,
+              seqs {
+                whole
+                  gi 6671495 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039240" ,
+      version 3 ,
+      seqs {
+        int {
+          from 8569036 ,
+          to 8704333 ,
+          strand plus ,
+          id
+            gi 51708444 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007378" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 8569036 ,
+                to 8569188 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8577537 ,
+                to 8577630 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8578827 ,
+                to 8578968 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8579953 ,
+                to 8580092 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8587411 ,
+                to 8587538 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8593804 ,
+                to 8594001 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8605174 ,
+                to 8605263 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8608117 ,
+                to 8608357 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8609799 ,
+                to 8609938 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8610824 ,
+                to 8610940 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8611559 ,
+                to 8611756 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8626948 ,
+                to 8627153 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8627707 ,
+                to 8627883 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8629580 ,
+                to 8629802 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8634734 ,
+                to 8634955 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8636638 ,
+                to 8636842 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8640927 ,
+                to 8640992 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8644302 ,
+                to 8644391 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8645903 ,
+                to 8646077 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8647831 ,
+                to 8647962 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8650393 ,
+                to 8650532 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8651094 ,
+                to 8651231 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8652397 ,
+                to 8652590 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8653588 ,
+                to 8653672 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8656796 ,
+                to 8657001 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8657325 ,
+                to 8657373 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8659274 ,
+                to 8659536 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8661253 ,
+                to 8661377 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8661842 ,
+                to 8661940 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8662696 ,
+                to 8662882 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8665963 ,
+                to 8666057 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8667672 ,
+                to 8667704 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8671470 ,
+                to 8671575 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8671703 ,
+                to 8671777 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8672002 ,
+                to 8672171 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8673147 ,
+                to 8673324 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8679549 ,
+                to 8679664 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8680610 ,
+                to 8680757 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8682107 ,
+                to 8682230 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8682586 ,
+                to 8682715 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8686573 ,
+                to 8686693 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8687133 ,
+                to 8687195 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8688060 ,
+                to 8688166 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8690920 ,
+                to 8691061 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8694165 ,
+                to 8694299 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8695014 ,
+                to 8695117 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8695201 ,
+                to 8695293 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8698051 ,
+                to 8698300 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8700346 ,
+                to 8700432 ,
+                strand plus ,
+                id
+                  gi 51708444 } ,
+              int {
+                from 8703986 ,
+                to 8704333 ,
+                strand plus ,
+                id
+                  gi 51708444 } } } ,
+          seqs {
+            whole
+              gi 6671494 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031404" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 8569123 ,
+                    to 8569188 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8577537 ,
+                    to 8577630 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8578827 ,
+                    to 8578968 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8579953 ,
+                    to 8580092 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8587411 ,
+                    to 8587538 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8593804 ,
+                    to 8594001 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8605174 ,
+                    to 8605263 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8608117 ,
+                    to 8608357 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8609799 ,
+                    to 8609938 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8610824 ,
+                    to 8610940 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8611559 ,
+                    to 8611756 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8626948 ,
+                    to 8627153 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8627707 ,
+                    to 8627883 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8629580 ,
+                    to 8629802 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8634734 ,
+                    to 8634955 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8636638 ,
+                    to 8636842 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8640927 ,
+                    to 8640992 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8644302 ,
+                    to 8644391 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8645903 ,
+                    to 8646077 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8647831 ,
+                    to 8647962 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8650393 ,
+                    to 8650532 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8651094 ,
+                    to 8651231 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8652397 ,
+                    to 8652590 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8653588 ,
+                    to 8653672 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8656796 ,
+                    to 8657001 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8657325 ,
+                    to 8657373 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8659274 ,
+                    to 8659536 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8661253 ,
+                    to 8661377 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8661842 ,
+                    to 8661940 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8662696 ,
+                    to 8662882 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8665963 ,
+                    to 8666057 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8667672 ,
+                    to 8667704 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8671470 ,
+                    to 8671575 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8671703 ,
+                    to 8671777 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8672002 ,
+                    to 8672171 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8673147 ,
+                    to 8673324 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8679549 ,
+                    to 8679664 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8680610 ,
+                    to 8680757 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8682107 ,
+                    to 8682230 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8682586 ,
+                    to 8682715 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8686573 ,
+                    to 8686693 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8687133 ,
+                    to 8687195 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8688060 ,
+                    to 8688166 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8690920 ,
+                    to 8691061 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8694165 ,
+                    to 8694299 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8695014 ,
+                    to 8695117 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8695201 ,
+                    to 8695293 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8698051 ,
+                    to 8698300 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8700346 ,
+                    to 8700432 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } ,
+                  {
+                    from 8703986 ,
+                    to 8704105 ,
+                    strand plus ,
+                    id
+                      gi 51708444 } } } ,
+              seqs {
+                whole
+                  gi 6671495 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Abca4" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "ATP-binding cassette, sub-family A (ABC1), member 4" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5524 } ,
+                  anchor "ATP binding" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16887 } ,
+                  anchor "ATPase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9202155 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 42626 } ,
+                  anchor "ATPase activity, coupled to transmembrane movement
+ of substances" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10412977 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5548 } ,
+                  anchor "phospholipid transporter activity" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10412977 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 4012 } ,
+                  anchor "phospholipid-translocating ATPase activity" ,
+                  post-text "evidence: IMP" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10412977 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6649 } ,
+                  anchor "phospholipid transfer to membrane" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 11431429 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 45494 } ,
+                  anchor "photoreceptor maintenance" ,
+                  post-text "evidence: IMP" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 7600 } ,
+                  anchor "sensory perception" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6810 } ,
+                  anchor "transport" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 10412977 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 7601 } ,
+                  anchor "visual perception" ,
+                  post-text "evidence: IMP" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16021 } ,
+                  anchor "integral to membrane" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 9202155 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5887 } ,
+                  anchor "integral to plasma membrane" ,
+                  post-text "evidence: TAS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Human, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 298 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=3&
+MAPS=genes-r-org/human-chr/mouse%3A3,genes-r-org/rat-chr/mouse%3A3,genes-r-org
+/mouse-chr3&query=e%3A11304[id]+AND+gene[obj_type]&QSTR=abca4&cmd=focus&fill=1
+0" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 109424 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "REVIEWED" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 12477932 ,
+        pmid 12466851 ,
+        pmid 11431429 ,
+        pmid 11217851 ,
+        pmid 11076861 ,
+        pmid 11042159 ,
+        pmid 10852960 ,
+        pmid 10412977 ,
+        pmid 10349636 ,
+        pmid 9202155 ,
+        pmid 8889548 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 140994 } ,
+              anchor "Abca4" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "MGI:8523" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 159184 } ,
+              anchor "AF000149" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "192133" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "693384" ,
+              version 0 } ,
+            {
+              type other ,
+              label "Alternate name" ,
+              text "ND" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_007378" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 6671494 } ,
+              anchor "NM_007378" } } ,
+          seqs {
+            whole
+              gi 6671494 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_031404" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 6671495 } ,
+                  anchor "NP_031404" ,
+                  post-text "ATP-binding cassette, sub-family A, member 4" } } ,
+              seqs {
+                whole
+                  gi 6671495 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 6671495 } ,
+                      pre-text "(3)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5341 } ,
+                          anchor "cd00267: ABC_ATPase; ABC (ATP-binding
+ cassette) transporter nucleotide-binding domain" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 931 - 1144  Blast Score: 454" ,
+                          version 0 } ,
+                        {
+                          type other ,
+                          text "Location: 1937 - 2152  Blast Score: 353" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 13428 } ,
+                          anchor "COG4152: COG4152; ABC-type uncharacterized
+ transport system, ATPase component [General function prediction only]" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 1937 - 2253  Blast Score: 387" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "AF000149" } ,
+                  anchor "AF000149" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AF000149" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3243083 } ,
+              anchor "AF000149" } } ,
+          seqs {
+            whole
+              gi 3243083 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC23916" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 2547314 } ,
+                  anchor "AAC23916" } } ,
+              seqs {
+                whole
+                  gi 2547314 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK053043" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26095532 } ,
+              anchor "AK053043" } } ,
+          seqs {
+            whole
+              gi 26095532 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK084860" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26102289 } ,
+              anchor "AK084860" } } ,
+          seqs {
+            whole
+              gi 26102289 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC043937" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 27882179 } ,
+              anchor "BC043937" } } ,
+          seqs {
+            whole
+              gi 27882179 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH43937" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 27882180 } ,
+                  anchor "AAH43937" } } ,
+              seqs {
+                whole
+                  gi 27882180 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC057853" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 34783941 } ,
+              anchor "BC057853" } } ,
+          seqs {
+            whole
+              gi 34783941 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAH57853" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 34783942 } ,
+                  anchor "AAH57853" } } ,
+              seqs {
+                whole
+                  gi 34783942 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11304" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039240.3&gene=Abca4&lid=11304&from=8569037&to=8704334" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11304" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039240.3&gene=Abca4&lid=11304" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.3918" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.3918" } ,
+              anchor "Mm.3918" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=39
+18" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11304" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11304[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 109424 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:109424" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=BC057
+853" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+BC057853" } } } ,
+        {
+          type comment ,
+          text "Human ABC-Transporter Proteins" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Human ABC-Transporter Proteins" ,
+                tag
+                  str "Human ABC-Transporter Proteins" } ,
+              anchor "Human ABC-Transporter Proteins" ,
+              url "http://nutrigene.4t.com/humanabc.htm" } } } } } ,
+    {
+      type comment ,
+      heading "Alleles" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          text "The following allele types are documented at Mouse Genome
+ Informatics" ,
+          version 0 ,
+          source {
+            {
+              anchor "(MGI)" ,
+              url "http://www.informatics.jax.org/searches/allele_report.cgi?m
+arkerID=MGI:109424" } } } ,
+        {
+          type comment ,
+          heading "Targeted (knock-out) (1)" ,
+          version 0 ,
+          refs {
+            pmid 10412977 } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11304 } ,
+    {
+      db "MGI" ,
+      tag
+        id 109424 } } ,
+  xtra-index-terms {
+    "LOC11304" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11305 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 2 ,
+        day 17 ,
+        hour 12 ,
+        minute 54 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "2" } } } ,
+  gene {
+    locus "Abca2" ,
+    desc "ATP-binding cassette, sub-family A (ABC1), member 2" ,
+    maploc "2 12.6 cM" ,
+    syn {
+      "Abc2" ,
+      "mKIAA1062" ,
+      "D2H0S1474E" } ,
+    locus-tag "MGI:99606" } ,
+  prot {
+    name {
+      "ATP-binding cassette, sub-family A (ABC1), member 2" ,
+      "ATP-binding cassette 2" } } ,
+  location {
+    {
+      display-str "2 A2-B" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "2 12.6 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11305 ,
+    src-str2 "11305" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000068" ,
+      version 1 ,
+      seqs {
+        int {
+          from 25388791 ,
+          to 25408220 ,
+          strand plus ,
+          id
+            gi 51860766 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007379" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 25388791 ,
+                to 25388856 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25392017 ,
+                to 25392110 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25393119 ,
+                to 25393230 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25393315 ,
+                to 25393478 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25393703 ,
+                to 25393830 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25393985 ,
+                to 25394095 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25394330 ,
+                to 25394548 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25394911 ,
+                to 25395278 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25395520 ,
+                to 25395679 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25396389 ,
+                to 25396517 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25396610 ,
+                to 25396774 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25396851 ,
+                to 25396947 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25397266 ,
+                to 25397439 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25397509 ,
+                to 25397611 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25397686 ,
+                to 25397800 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25397914 ,
+                to 25398104 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25398252 ,
+                to 25398402 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25398479 ,
+                to 25398683 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25398822 ,
+                to 25398986 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25399088 ,
+                to 25399268 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25399348 ,
+                to 25399560 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25400546 ,
+                to 25400742 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25400831 ,
+                to 25401013 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25401209 ,
+                to 25401393 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25401500 ,
+                to 25401620 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25401896 ,
+                to 25402132 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25402212 ,
+                to 25402418 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25402671 ,
+                to 25402773 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25402856 ,
+                to 25403175 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25403250 ,
+                to 25403463 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25403550 ,
+                to 25403655 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25403747 ,
+                to 25403858 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25403948 ,
+                to 25404008 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25404091 ,
+                to 25404265 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25404346 ,
+                to 25404461 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25404543 ,
+                to 25404690 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25404835 ,
+                to 25404958 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25405093 ,
+                to 25405225 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25405329 ,
+                to 25405446 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25405571 ,
+                to 25405749 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25405991 ,
+                to 25406132 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25406232 ,
+                to 25406366 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25406459 ,
+                to 25406562 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25406636 ,
+                to 25406728 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25406812 ,
+                to 25406914 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25406999 ,
+                to 25407136 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25407221 ,
+                to 25407421 ,
+                strand plus ,
+                id
+                  gi 51860766 } ,
+              int {
+                from 25407862 ,
+                to 25408220 ,
+                strand plus ,
+                id
+                  gi 51860766 } } } ,
+          seqs {
+            whole
+              gi 11993938 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031405" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 25388791 ,
+                    to 25388856 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25392017 ,
+                    to 25392110 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25393119 ,
+                    to 25393230 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25393315 ,
+                    to 25393478 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25393703 ,
+                    to 25393830 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25393985 ,
+                    to 25394095 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25394330 ,
+                    to 25394548 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25394911 ,
+                    to 25395278 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25395520 ,
+                    to 25395679 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25396389 ,
+                    to 25396517 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25396610 ,
+                    to 25396774 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25396851 ,
+                    to 25396947 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25397266 ,
+                    to 25397439 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25397509 ,
+                    to 25397611 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25397686 ,
+                    to 25397800 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25397914 ,
+                    to 25398104 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25398252 ,
+                    to 25398402 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25398479 ,
+                    to 25398683 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25398822 ,
+                    to 25398986 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25399088 ,
+                    to 25399268 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25399348 ,
+                    to 25399560 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25400546 ,
+                    to 25400742 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25400831 ,
+                    to 25401013 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25401209 ,
+                    to 25401393 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25401500 ,
+                    to 25401620 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25401896 ,
+                    to 25402132 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25402212 ,
+                    to 25402418 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25402671 ,
+                    to 25402773 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25402856 ,
+                    to 25403175 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25403250 ,
+                    to 25403463 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25403550 ,
+                    to 25403655 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25403747 ,
+                    to 25403858 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25403948 ,
+                    to 25404008 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25404091 ,
+                    to 25404265 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25404346 ,
+                    to 25404461 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25404543 ,
+                    to 25404690 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25404835 ,
+                    to 25404958 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25405093 ,
+                    to 25405225 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25405329 ,
+                    to 25405446 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25405571 ,
+                    to 25405749 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25405991 ,
+                    to 25406132 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25406232 ,
+                    to 25406366 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25406459 ,
+                    to 25406562 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25406636 ,
+                    to 25406728 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25406812 ,
+                    to 25406914 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25406999 ,
+                    to 25407136 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25407221 ,
+                    to 25407421 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } ,
+                  {
+                    from 25407862 ,
+                    to 25407897 ,
+                    strand plus ,
+                    id
+                      gi 51860766 } } } ,
+              seqs {
+                whole
+                  gi 11993939 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039206" ,
+      version 3 ,
+      seqs {
+        int {
+          from 2857415 ,
+          to 2876844 ,
+          strand plus ,
+          id
+            gi 51706164 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "NM_007379" ,
+          version 1 ,
+          genomic-coords {
+            mix {
+              int {
+                from 2857415 ,
+                to 2857480 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2860641 ,
+                to 2860734 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2861743 ,
+                to 2861854 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2861939 ,
+                to 2862102 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2862327 ,
+                to 2862454 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2862609 ,
+                to 2862719 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2862954 ,
+                to 2863172 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2863535 ,
+                to 2863902 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2864144 ,
+                to 2864303 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2865013 ,
+                to 2865141 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2865234 ,
+                to 2865398 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2865475 ,
+                to 2865571 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2865890 ,
+                to 2866063 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2866133 ,
+                to 2866235 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2866310 ,
+                to 2866424 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2866538 ,
+                to 2866728 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2866876 ,
+                to 2867026 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2867103 ,
+                to 2867307 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2867446 ,
+                to 2867610 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2867712 ,
+                to 2867892 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2867972 ,
+                to 2868184 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2869170 ,
+                to 2869366 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2869455 ,
+                to 2869637 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2869833 ,
+                to 2870017 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2870124 ,
+                to 2870244 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2870520 ,
+                to 2870756 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2870836 ,
+                to 2871042 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2871295 ,
+                to 2871397 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2871480 ,
+                to 2871799 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2871874 ,
+                to 2872087 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2872174 ,
+                to 2872279 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2872371 ,
+                to 2872482 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2872572 ,
+                to 2872632 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2872715 ,
+                to 2872889 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2872970 ,
+                to 2873085 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2873167 ,
+                to 2873314 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2873459 ,
+                to 2873582 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2873717 ,
+                to 2873849 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2873953 ,
+                to 2874070 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2874195 ,
+                to 2874373 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2874615 ,
+                to 2874756 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2874856 ,
+                to 2874990 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2875083 ,
+                to 2875186 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2875260 ,
+                to 2875352 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2875436 ,
+                to 2875538 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2875623 ,
+                to 2875760 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2875845 ,
+                to 2876045 ,
+                strand plus ,
+                id
+                  gi 51706164 } ,
+              int {
+                from 2876486 ,
+                to 2876844 ,
+                strand plus ,
+                id
+                  gi 51706164 } } } ,
+          seqs {
+            whole
+              gi 11993938 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "NP_031405" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 2857415 ,
+                    to 2857480 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2860641 ,
+                    to 2860734 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2861743 ,
+                    to 2861854 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2861939 ,
+                    to 2862102 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2862327 ,
+                    to 2862454 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2862609 ,
+                    to 2862719 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2862954 ,
+                    to 2863172 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2863535 ,
+                    to 2863902 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2864144 ,
+                    to 2864303 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2865013 ,
+                    to 2865141 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2865234 ,
+                    to 2865398 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2865475 ,
+                    to 2865571 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2865890 ,
+                    to 2866063 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2866133 ,
+                    to 2866235 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2866310 ,
+                    to 2866424 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2866538 ,
+                    to 2866728 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2866876 ,
+                    to 2867026 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2867103 ,
+                    to 2867307 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2867446 ,
+                    to 2867610 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2867712 ,
+                    to 2867892 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2867972 ,
+                    to 2868184 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2869170 ,
+                    to 2869366 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2869455 ,
+                    to 2869637 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2869833 ,
+                    to 2870017 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2870124 ,
+                    to 2870244 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2870520 ,
+                    to 2870756 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2870836 ,
+                    to 2871042 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2871295 ,
+                    to 2871397 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2871480 ,
+                    to 2871799 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2871874 ,
+                    to 2872087 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2872174 ,
+                    to 2872279 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2872371 ,
+                    to 2872482 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2872572 ,
+                    to 2872632 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2872715 ,
+                    to 2872889 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2872970 ,
+                    to 2873085 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2873167 ,
+                    to 2873314 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2873459 ,
+                    to 2873582 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2873717 ,
+                    to 2873849 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2873953 ,
+                    to 2874070 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2874195 ,
+                    to 2874373 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2874615 ,
+                    to 2874756 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2874856 ,
+                    to 2874990 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2875083 ,
+                    to 2875186 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2875260 ,
+                    to 2875352 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2875436 ,
+                    to 2875538 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2875623 ,
+                    to 2875760 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2875845 ,
+                    to 2876045 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } ,
+                  {
+                    from 2876486 ,
+                    to 2876521 ,
+                    strand plus ,
+                    id
+                      gi 51706164 } } } ,
+              seqs {
+                whole
+                  gi 11993939 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Abca2" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "ATP-binding cassette, sub-family A (ABC1), member 2" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5524 } ,
+                  anchor "ATP binding" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16887 } ,
+                  anchor "ATPase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5215 } ,
+                  anchor "transporter activity" ,
+                  post-text "evidence: ISS" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 8203 } ,
+                  anchor "cholesterol metabolism" ,
+                  post-text "evidence: ISS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6810 } ,
+                  anchor "transport" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              refs {
+                pmid 12466851 } ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16021 } ,
+                  anchor "integral to membrane" ,
+                  post-text "evidence: TAS" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5765 } ,
+                  anchor "lysosomal membrane" ,
+                  post-text "evidence: ISS" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Human, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 55590 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=2&
+MAPS=genes-r-org/human-chr/mouse%3A2,genes-r-org/rat-chr/mouse%3A2,genes-r-org
+/mouse-chr2&query=e%3A11305[id]+AND+gene[obj_type]&QSTR=abca2&cmd=focus&fill=1
+0" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 99606 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "PROVISIONAL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 14621295 ,
+        pmid 12466851 ,
+        pmid 8088782 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 158928 } ,
+              anchor "AI413825" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "421529" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_007379" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 11993938 } ,
+              anchor "NM_007379" } } ,
+          seqs {
+            whole
+              gi 11993938 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_031405" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11993939 } ,
+                  anchor "NP_031405" ,
+                  post-text "ATP-binding cassette, sub-family A (ABC1), member
+ 2" } } ,
+              seqs {
+                whole
+                  gi 11993939 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 11993939 } ,
+                      pre-text "(2)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5341 } ,
+                          anchor "cd00267: ABC_ATPase; ABC (ATP-binding
+ cassette) transporter nucleotide-binding domain" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 1018 - 1205  Blast Score: 389" ,
+                          version 0 } ,
+                        {
+                          type other ,
+                          text "Location: 2051 - 2269  Blast Score: 372" ,
+                          version 0 } } } } } } } } ,
+          comment {
+            {
+              type other ,
+              heading "Source Sequence" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Nucleotide" ,
+                    tag
+                      str "X75927" } ,
+                  anchor "X75927" } } ,
+              comment {
+                {
+                  type other ,
+                  version 0 } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK129274" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 37360211 } ,
+              anchor "AK129274" } } ,
+          seqs {
+            whole
+              gi 37360211 } ,
+          products {
+            {
+              type peptide ,
+              accession "BAC98084" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 37360212 } ,
+                  anchor "BAC98084" } } ,
+              seqs {
+                whole
+                  gi 37360212 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "X75927" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 11990230 } ,
+              anchor "X75927" } } ,
+          seqs {
+            whole
+              gi 11990230 } ,
+          products {
+            {
+              type peptide ,
+              accession "CAA53531" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 11990231 } ,
+                  anchor "CAA53531" } } ,
+              seqs {
+                whole
+                  gi 11990231 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "DBA/2" ,
+              version 0 } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "P41234" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14916951 } ,
+                  anchor "P41234" } } ,
+              seqs {
+                whole
+                  gi 14916951 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11305" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039206.3&gene=Abca2&lid=11305&from=2857416&to=2876845" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11305" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039206.3&gene=Abca2&lid=11305" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.2210" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.2210" } ,
+              anchor "Mm.2210" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=22
+10" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11305" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11305[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 99606 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:99606" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=X7592
+7" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+X75927" } } } ,
+        {
+          type comment ,
+          text "Human ABC-Transporter Proteins" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Human ABC-Transporter Proteins" ,
+                tag
+                  str "Human ABC-Transporter Proteins" } ,
+              anchor "Human ABC-Transporter Proteins" ,
+              url "http://nutrigene.4t.com/humanabc.htm" } } } } } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11305 } ,
+    {
+      db "MGI" ,
+      tag
+        id 99606 } } ,
+  xtra-index-terms {
+    "LOC11305" } }
+Entrezgene ::= {
+  track-info {
+    geneid 11306 ,
+    status live ,
+    create-date
+      std {
+        year 2003 ,
+        month 8 ,
+        day 28 ,
+        hour 21 ,
+        minute 39 ,
+        second 0 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 19 ,
+        hour 12 ,
+        minute 12 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Mus musculus" ,
+      common "house mouse" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 10090 } } ,
+      syn {
+        "mouse" } ,
+      orgname {
+        name
+          binomial {
+            genus "Mus" ,
+            species "musculus" } ,
+        lineage "Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;
+ Euteleostomi; Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia;
+ Sciurognathi; Muridae; Murinae; Mus" ,
+        gcode 1 ,
+        mgcode 2 ,
+        div "ROD" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "X" } } } ,
+  gene {
+    locus "Abcb7" ,
+    desc "ATP-binding cassette, sub-family B (MDR/TAP), member 7" ,
+    maploc "X 39.0 cM" ,
+    syn {
+      "Abc7" } ,
+    locus-tag "MGI:109533" } ,
+  prot {
+    name {
+      "ATP-binding cassette, sub-family B (MDR/TAP), member 7" ,
+      "ATP-binding cassette 7" } } ,
+  location {
+    {
+      display-str "X C-D" ,
+      method
+        map-type cyto } ,
+    {
+      display-str "X 39.0 cM" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "LocusLink" ,
+    src-int 11306 ,
+    src-str2 "11306" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NC_000086" ,
+      version 1 ,
+      seqs {
+        int {
+          from 95684235 ,
+          to 95817474 ,
+          strand minus ,
+          id
+            gi 51869957 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "XM_356348" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 95684235 ,
+                to 95687836 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95694701 ,
+                to 95694808 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95698046 ,
+                to 95698149 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95699409 ,
+                to 95699580 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95703347 ,
+                to 95703476 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95703628 ,
+                to 95703791 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95704349 ,
+                to 95704506 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95705372 ,
+                to 95705546 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95706530 ,
+                to 95706617 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95706707 ,
+                to 95706795 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95707710 ,
+                to 95707978 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95708848 ,
+                to 95708980 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95726912 ,
+                to 95727031 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95746072 ,
+                to 95746158 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95748503 ,
+                to 95748580 ,
+                strand minus ,
+                id
+                  gi 51869957 } ,
+              int {
+                from 95817235 ,
+                to 95817474 ,
+                strand minus ,
+                id
+                  gi 51869957 } } } ,
+          seqs {
+            whole
+              gi 51772276 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "XP_356348" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 95687621 ,
+                    to 95687836 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95694701 ,
+                    to 95694808 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95698046 ,
+                    to 95698149 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95699409 ,
+                    to 95699580 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95703347 ,
+                    to 95703476 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95703628 ,
+                    to 95703791 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95704349 ,
+                    to 95704506 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95705372 ,
+                    to 95705546 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95706530 ,
+                    to 95706617 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95706707 ,
+                    to 95706795 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95707710 ,
+                    to 95707978 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95708848 ,
+                    to 95708980 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95726912 ,
+                    to 95727031 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95746072 ,
+                    to 95746158 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95748503 ,
+                    to 95748580 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } ,
+                  {
+                    from 95817235 ,
+                    to 95817474 ,
+                    strand minus ,
+                    id
+                      gi 51869957 } } } ,
+              seqs {
+                whole
+                  gi 38086485 } } } } } } ,
+    {
+      type genomic ,
+      heading "Reference" ,
+      accession "NT_039711" ,
+      version 3 ,
+      seqs {
+        int {
+          from 9312469 ,
+          to 9445708 ,
+          strand minus ,
+          id
+            gi 51772331 } } ,
+      products {
+        {
+          type mRNA ,
+          heading "Reference" ,
+          accession "XM_356348" ,
+          version 2 ,
+          genomic-coords {
+            mix {
+              int {
+                from 9312469 ,
+                to 9316070 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9322935 ,
+                to 9323042 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9326280 ,
+                to 9326383 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9327643 ,
+                to 9327814 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9331581 ,
+                to 9331710 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9331862 ,
+                to 9332025 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9332583 ,
+                to 9332740 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9333606 ,
+                to 9333780 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9334764 ,
+                to 9334851 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9334941 ,
+                to 9335029 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9335944 ,
+                to 9336212 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9337082 ,
+                to 9337214 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9355146 ,
+                to 9355265 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9374306 ,
+                to 9374392 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9376737 ,
+                to 9376814 ,
+                strand minus ,
+                id
+                  gi 51772331 } ,
+              int {
+                from 9445469 ,
+                to 9445708 ,
+                strand minus ,
+                id
+                  gi 51772331 } } } ,
+          seqs {
+            whole
+              gi 51772276 } ,
+          products {
+            {
+              type peptide ,
+              heading "Reference" ,
+              accession "XP_356348" ,
+              version 1 ,
+              genomic-coords {
+                packed-int {
+                  {
+                    from 9315855 ,
+                    to 9316070 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9322935 ,
+                    to 9323042 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9326280 ,
+                    to 9326383 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9327643 ,
+                    to 9327814 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9331581 ,
+                    to 9331710 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9331862 ,
+                    to 9332025 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9332583 ,
+                    to 9332740 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9333606 ,
+                    to 9333780 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9334764 ,
+                    to 9334851 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9334941 ,
+                    to 9335029 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9335944 ,
+                    to 9336212 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9337082 ,
+                    to 9337214 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9355146 ,
+                    to 9355265 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9374306 ,
+                    to 9374392 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9376737 ,
+                    to 9376814 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } ,
+                  {
+                    from 9445469 ,
+                    to 9445708 ,
+                    strand minus ,
+                    id
+                      gi 51772331 } } } ,
+              seqs {
+                whole
+                  gi 38086485 } } } } } } } ,
+  properties {
+    {
+      type comment ,
+      label "Nomenclature" ,
+      version 0 ,
+      source {
+        {
+          anchor "Mouse Genome Informatics" } } ,
+      properties {
+        {
+          type property ,
+          label "Official Symbol" ,
+          text "Abcb7" ,
+          version 0 } ,
+        {
+          type property ,
+          label "Official Full Name" ,
+          text "ATP-binding cassette, sub-family B (MDR/TAP), member 7" ,
+          version 0 } } } ,
+    {
+      type comment ,
+      heading "GeneOntology" ,
+      version 0 ,
+      source {
+        {
+          pre-text "Provided by" ,
+          anchor "MGI" ,
+          url "http://www.informatics.jax.org/" } } ,
+      comment {
+        {
+          type comment ,
+          label "Function" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5524 } ,
+                  anchor "ATP binding" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16887 } ,
+                  anchor "ATPase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 42626 } ,
+                  anchor "ATPase activity, coupled to transmembrane movement
+ of substances" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 17111 } ,
+                  anchor "nucleoside-triphosphatase activity" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 166 } ,
+                  anchor "nucleotide binding" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Process" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 6810 } ,
+                  anchor "transport" ,
+                  post-text "evidence: IEA" } } } } } ,
+        {
+          type comment ,
+          label "Component" ,
+          version 0 ,
+          comment {
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 19866 } ,
+                  anchor "inner membrane" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16021 } ,
+                  anchor "integral to membrane" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 16020 } ,
+                  anchor "membrane" ,
+                  post-text "evidence: IEA" } } } ,
+            {
+              type comment ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "GO" ,
+                    tag
+                      id 5739 } ,
+                  anchor "mitochondrion" ,
+                  post-text "evidence: IEA" } } } } } } } } ,
+  homology {
+    {
+      type comment ,
+      heading "Human, Rat" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "HomoloGene" ,
+            tag
+              id 3175 } ,
+          anchor "Map Viewer" ,
+          url "http://www.ncbi.nlm.nih.gov/mapview/maps.cgi?taxid=10090&chr=X&
+MAPS=genes-r-org/human-chr/mouse%3AX,genes-r-org/rat-chr/mouse%3AX,genes-r-org
+/mouse-chrX&query=e%3A11306[id]+AND+gene[obj_type]&QSTR=abcb7&cmd=focus&fill=1
+0" } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "LocusTagLink" ,
+      version 0 ,
+      source {
+        {
+          src {
+            db "MGI" ,
+            tag
+              id 109533 } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "MODEL" ,
+      version 0 } ,
+    {
+      type comment ,
+      version 0 ,
+      refs {
+        pmid 14651853 ,
+        pmid 12480705 ,
+        pmid 12477932 ,
+        pmid 11217851 ,
+        pmid 11076861 ,
+        pmid 11042159 ,
+        pmid 10922068 ,
+        pmid 10349636 ,
+        pmid 9143506 } } ,
+    {
+      type comment ,
+      heading "Markers (Sequence Tagged Sites/STS)" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 161105 } ,
+              anchor "RH124283" ,
+              post-text "(e-PCR)" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 178992 } ,
+              anchor "AW537380" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "953972" ,
+              version 0 } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UniSTS" ,
+                tag
+                  id 181314 } ,
+              anchor "AU019072" ,
+              post-text "(e-PCR)" } } ,
+          comment {
+            {
+              type other ,
+              label "Alternate name" ,
+              text "359130" ,
+              version 0 } } } } } ,
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "XM_356348" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 51772276 } ,
+              anchor "XM_356348" } } ,
+          seqs {
+            whole
+              gi 51772276 } ,
+          products {
+            {
+              type peptide ,
+              accession "XP_356348" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 38086485 } ,
+                  anchor "XP_356348" } } ,
+              seqs {
+                whole
+                  gi 38086485 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 38086485 } ,
+                      pre-text "(3)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5341 } ,
+                          anchor "cd00267: ABC_ATPase; ABC (ATP-binding
+ cassette) transporter nucleotide-binding domain" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 496 - 714  Blast Score: 577" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 14387 } ,
+                          anchor "COG5265: ATM1; ABC-type transport system
+ involved in Fe-S cluster assembly, permease and ATPase components
+ [Posttranslational modification, protein turnover, chaperones]" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 234 - 729  Blast Score: 1506" ,
+                          version 0 } } } ,
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 25581 } ,
+                          anchor "pfam00664: ABC_membrane; ABC transporter
+ transmembrane region" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 164 - 435  Blast Score: 247" ,
+                          version 0 } } } } } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AA517758" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 2257282 } ,
+              anchor "AA517758" } } ,
+          seqs {
+            whole
+              gi 2257282 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J x DBA/2J F1" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK040069" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26087598 } ,
+              anchor "AK040069" } } ,
+          seqs {
+            whole
+              gi 26087598 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AK084853" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 26102284 } ,
+              anchor "AK084853" } } ,
+          seqs {
+            whole
+              gi 26102284 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AU019072" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 3374656 } ,
+              anchor "AU019072" } } ,
+          seqs {
+            whole
+              gi 3374656 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "AW537380" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7179797 } ,
+              anchor "AW537380" } } ,
+          seqs {
+            whole
+              gi 7179797 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "C57BL/6J" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "BC035534" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 23243450 } ,
+              anchor "BC035534" } } ,
+          seqs {
+            whole
+              gi 23243450 } ,
+          products {
+            {
+              type peptide ,
+              text "None" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          heading "mRNA" ,
+          accession "U43892" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 1167981 } ,
+              anchor "U43892" } } ,
+          seqs {
+            whole
+              gi 1167981 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAC53152" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 1167982 } ,
+                  anchor "AAC53152" } } ,
+              seqs {
+                whole
+                  gi 1167982 } } } ,
+          comment {
+            {
+              type other ,
+              label "Strain" ,
+              text "DBA/2" ,
+              version 0 } } } ,
+        {
+          type other ,
+          text "None" ,
+          version 0 ,
+          products {
+            {
+              type peptide ,
+              accession "Q61102" ,
+              version 0 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 5902709 } ,
+                  anchor "Q61102" } } ,
+              seqs {
+                whole
+                  gi 5902709 } } } } } } ,
+    {
+      type comment ,
+      heading "Additional Links" ,
+      version 0 ,
+      comment {
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Evidence Viewer" ,
+                tag
+                  str "11306" } ,
+              anchor "Evidence Viewer" ,
+              url "http://www.ncbi.nlm.nih.gov/sutils/evv.cgi?taxid=10090&cont
+ig=NT_039711.3&gene=Abcb7&lid=11306&from=9312470&to=9445709" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "ModelMaker" ,
+                tag
+                  str "11306" } ,
+              anchor "ModelMaker" ,
+              url "http://www.ncbi.nlm.nih.gov/mapview/modelmaker.cgi?taxid=10
+090&contig=NT_039711.3&gene=Abcb7&lid=11306" } } } ,
+        {
+          type comment ,
+          text "UniGene" ,
+          version 0 ,
+          xtra-properties {
+            {
+              tag "UNIGENE" ,
+              value "Mm.262053" } } ,
+          source {
+            {
+              src {
+                db "UniGene" ,
+                tag
+                  str "Mm.262053" } ,
+              anchor "Mm.262053" ,
+              url "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Mm&CID=26
+2053" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "HomoloGene" ,
+                tag
+                  str "11306" } ,
+              anchor "HomoloGene" ,
+              url "http://www.ncbi.nlm.nih.gov/HomoloGene/homolquery.cgi?TEXT=
+11306[loc]&TAXID=10090" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "MGI" ,
+                tag
+                  id 109533 } ,
+              anchor "MGI" ,
+              url "http://www.informatics.jax.org/searches/accession_report.cg
+i?id=MGI:109533" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Ensembl" ,
+                tag
+                  str "" } ,
+              url "http://www.ensembl.org/Mus_musculus/contigview?geneid=BC035
+534" } } } ,
+        {
+          type comment ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "UCSC" ,
+                tag
+                  str "" } ,
+              url "http://genome.ucsc.edu/cgi-bin/hgTracks?org=mouse&position=
+BC035534" } } } ,
+        {
+          type comment ,
+          text "Gene Expression Database (GXD) at MGI" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Gene Expression Database (GXD) at MGI" ,
+                tag
+                  str "MGI:109533" } ,
+              anchor "MGI:109533" ,
+              url "http://www.informatics.jax.org/searches/expression_report.c
+gi?id=MGI:109533" } } } ,
+        {
+          type comment ,
+          text "Human ABC-Transporter Proteins" ,
+          version 0 ,
+          source {
+            {
+              src {
+                db "Human ABC-Transporter Proteins" ,
+                tag
+                  str "Human ABC-Transporter Proteins" } ,
+              anchor "Human ABC-Transporter Proteins" ,
+              url "http://nutrigene.4t.com/humanabc.htm" } } } } } ,
+    {
+      type generif ,
+      text "ABC7 positively regulates the expression of extramitochondrial
+ thioredoxin & that of an intramitochondrial iron-sulfur-containing protein,
+ ferrochelatase. ABC7 contributes to the production of heme during the
+ differentiation of erythroid cells." ,
+      version 0 ,
+      refs {
+        pmid 12480705 } ,
+      create-date
+        str "May 23 2003 12:00AM" ,
+      update-date
+        str "Jun 22 2003  7:07AM" } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 11306 } ,
+    {
+      db "MGI" ,
+      tag
+        id 109533 } } ,
+  xtra-index-terms {
+    "LOC11306" } }
+Entrezgene ::= {
+  track-info {
+    geneid 171590 ,
+    status live ,
+    create-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 7 ,
+        hour 13 ,
+        minute 32 ,
+        second 25 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 1 ,
+        day 18 ,
+        hour 23 ,
+        minute 7 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Caenorhabditis elegans" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 6239 } } ,
+      orgname {
+        name
+          binomial {
+            genus "Caenorhabditis" ,
+            species "elegans" } ,
+        lineage "Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+ Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis" ,
+        gcode 1 ,
+        mgcode 5 ,
+        div "INV" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "I" } } } ,
+  gene {
+    locus "1A478" ,
+    maploc "I;-17.75 cM (interpolated genetic position)" ,
+    db {
+      {
+        db "AceView/WormGenes" ,
+        tag
+          str "1A478" } ,
+      {
+        db "LocusID" ,
+        tag
+          id 171590 } ,
+      {
+        db "WormBase" ,
+        tag
+          str "Y74C9A.3" } } ,
+    syn {
+      "Y74C9A.3" ,
+      "CELK05052" } ,
+    locus-tag "1A478" } ,
+  prot {
+    name {
+      "AD-003 protein (26.6 kD) (1A478)" } } ,
+  rna {
+    type mRNA } ,
+  location {
+    {
+      display-str "I;-17.75 cM (interpolated genetic position)" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "Entrez Genomes" ,
+    src-int 25167480 ,
+    src-str1 "NC_003279" ,
+    src-str2 "CE:1A478" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      label "1A478" ,
+      accession "NC_003279" ,
+      version 2 ,
+      seqs {
+        int {
+          from 4220 ,
+          to 10147 ,
+          strand minus ,
+          id
+            gi 25167480 } } ,
+      products {
+        {
+          type mRNA ,
+          accession "NM_058260" ,
+          version 1 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 10094 ,
+                to 10147 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 9726 ,
+                to 9845 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 6036 ,
+                to 6326 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 5194 ,
+                to 5295 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 4220 ,
+                to 4357 ,
+                strand minus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 17510630 } ,
+          products {
+            {
+              type peptide ,
+              label "AD-003 protein (26.6 kD) (1A478)" ,
+              accession "NP_490660" ,
+              version 1 ,
+              refs {
+                pmid 9851916 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 10094 ,
+                    to 10147 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 9726 ,
+                    to 9845 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 6036 ,
+                    to 6326 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 5194 ,
+                    to 5295 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 4220 ,
+                    to 4357 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 17510631 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } } ,
+      comment {
+        {
+          type comment ,
+          text "Title: Caenorhabditis elegans gene 1A478, AD-003 protein." ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058260" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 17510630 } ,
+              anchor "NM_058260" } } ,
+          seqs {
+            whole
+              gi 17510630 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490660" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 17510631 } ,
+                  anchor "NP_490660" } } ,
+              seqs {
+                whole
+                  gi 17510631 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 17510631 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 24115 } ,
+                          anchor "pfam05891: DUF858; Eukaryotic protein of
+ unknown function (DUF858)" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 11 - 225  Blast Score: 824" ,
+                          version 0 } } } } } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024206" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7105656 } ,
+              anchor "AC024206" } } ,
+          seqs {
+            whole
+              gi 7105656 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAF36050" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14550391 } ,
+                  anchor "AAF36050" } } ,
+              seqs {
+                whole
+                  gi 14550391 } } } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "Reviewed" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 171590 } } ,
+  xtra-iq {
+    {
+      tag "NUCLEOTIDE" ,
+      value "7105656" } ,
+    {
+      tag "PROTEIN" ,
+      value "14550391" } } ,
+  non-unique-keys {
+    {
+      db "ID" ,
+      tag
+        id 25167480 } } }
+Entrezgene ::= {
+  track-info {
+    geneid 171591 ,
+    status live ,
+    create-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 6 ,
+        hour 18 ,
+        minute 33 ,
+        second 30 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 1 ,
+        day 18 ,
+        hour 23 ,
+        minute 7 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Caenorhabditis elegans" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 6239 } } ,
+      orgname {
+        name
+          binomial {
+            genus "Caenorhabditis" ,
+            species "elegans" } ,
+        lineage "Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+ Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis" ,
+        gcode 1 ,
+        mgcode 5 ,
+        div "INV" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "I" } } } ,
+  gene {
+    locus "1A489" ,
+    maploc "I;-17.80 cM (interpolated genetic position)" ,
+    db {
+      {
+        db "AceView/WormGenes" ,
+        tag
+          str "1A489" } ,
+      {
+        db "LocusID" ,
+        tag
+          id 171591 } ,
+      {
+        db "WormBase" ,
+        tag
+          str "Y74C9A.2" } } ,
+    syn {
+      "Y74C9A.2" ,
+      "CELK01753" } ,
+    locus-tag "1A489" } ,
+  prot {
+    name {
+      "putative protein, with a coiled coil-4 domain (13.7 kD) (1A489)" } } ,
+  rna {
+    type mRNA } ,
+  location {
+    {
+      display-str "I;-17.80 cM (interpolated genetic position)" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "Entrez Genomes" ,
+    src-int 25167480 ,
+    src-str1 "NC_003279" ,
+    src-str2 "CE:1A489" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      label "1A489" ,
+      accession "NC_003279" ,
+      version 2 ,
+      seqs {
+        int {
+          from 11640 ,
+          to 16584 ,
+          strand plus ,
+          id
+            gi 25167480 } } ,
+      products {
+        {
+          type mRNA ,
+          accession "NM_058259" ,
+          version 1 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 11640 ,
+                to 11688 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 14950 ,
+                to 15159 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 16472 ,
+                to 16584 ,
+                strand plus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 17510628 } ,
+          products {
+            {
+              type peptide ,
+              label "putative protein, with a coiled coil-4 domain (13.7 kD)
+ (1A489)" ,
+              accession "NP_490661" ,
+              version 1 ,
+              refs {
+                pmid 9851916 ,
+                pmid 11231151 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 11640 ,
+                    to 11688 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 14950 ,
+                    to 15159 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 16472 ,
+                    to 16584 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 17510629 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } } ,
+      comment {
+        {
+          type comment ,
+          text "Title: Caenorhabditis elegans essential expressed gene 1A489." ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058259" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 17510628 } ,
+              anchor "NM_058259" } } ,
+          seqs {
+            whole
+              gi 17510628 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490661" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 17510629 } ,
+                  anchor "NP_490661" } } ,
+              seqs {
+                whole
+                  gi 17510629 } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024206" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7105656 } ,
+              anchor "AC024206" } } ,
+          seqs {
+            whole
+              gi 7105656 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAF36049" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 7105658 } ,
+                  anchor "AAF36049" } } ,
+              seqs {
+                whole
+                  gi 7105658 } } } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "Reviewed" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 171591 } } ,
+  xtra-iq {
+    {
+      tag "NUCLEOTIDE" ,
+      value "7105656" } ,
+    {
+      tag "PROTEIN" ,
+      value "7105658" } } ,
+  non-unique-keys {
+    {
+      db "ID" ,
+      tag
+        id 25167480 } } }
+Entrezgene ::= {
+  track-info {
+    geneid 171592 ,
+    status live ,
+    create-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 6 ,
+        hour 18 ,
+        minute 31 ,
+        second 55 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 1 ,
+        day 18 ,
+        hour 23 ,
+        minute 7 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Caenorhabditis elegans" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 6239 } } ,
+      orgname {
+        name
+          binomial {
+            genus "Caenorhabditis" ,
+            species "elegans" } ,
+        lineage "Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+ Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis" ,
+        gcode 1 ,
+        mgcode 5 ,
+        div "INV" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "I" } } } ,
+  gene {
+    locus "1A492" ,
+    maploc "I;-17.86 cM (interpolated genetic position)" ,
+    db {
+      {
+        db "AceView/WormGenes" ,
+        tag
+          str "1A492" } ,
+      {
+        db "LocusID" ,
+        tag
+          id 171592 } ,
+      {
+        db "WormBase" ,
+        tag
+          str "Y74C9A.4a" } } ,
+    syn {
+      "Y74C9A.4a" ,
+      "Y74C9A.4b" ,
+      "CELK08126" } ,
+    locus-tag "1A492" } ,
+  prot {
+    name {
+      "suppressor of PResenilin defect SPR-1 related; similar to co-repressor
+ of REST RE1-silencing transcription factor (65.9 kD) (1A492)" ,
+      "suppressor of PResenilin defect SPR-1 related; similar to co-repressor
+ of REST RE1-silencing transcription factor (65.6 kD) (1A492)" } } ,
+  location {
+    {
+      display-str "I;-17.86 cM (interpolated genetic position)" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "Entrez Genomes" ,
+    src-int 25167480 ,
+    src-str1 "NC_003279" ,
+    src-str2 "CE:1A492" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      label "1A492" ,
+      accession "NC_003279" ,
+      version 2 ,
+      seqs {
+        int {
+          from 17910 ,
+          to 26777 ,
+          strand minus ,
+          id
+            gi 25167480 } } ,
+      products {
+        {
+          type mRNA ,
+          accession "NM_058261" ,
+          version 1 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 26732 ,
+                to 26777 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 26589 ,
+                to 26689 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 26370 ,
+                to 26523 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 25272 ,
+                to 25471 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 24928 ,
+                to 25089 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 24650 ,
+                to 24844 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 21012 ,
+                to 21138 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 20847 ,
+                to 20963 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 20270 ,
+                to 20477 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 19014 ,
+                to 19240 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 18005 ,
+                to 18114 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 17910 ,
+                to 17957 ,
+                strand minus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 17510632 } ,
+          products {
+            {
+              type peptide ,
+              label "suppressor of PResenilin defect SPR-1 related; similar to
+ co-repressor of REST RE1-silencing transcription factor (65.9 kD) (1A492)" ,
+              accession "NP_490662" ,
+              version 1 ,
+              refs {
+                pmid 9851916 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 26732 ,
+                    to 26777 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 26589 ,
+                    to 26689 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 26370 ,
+                    to 26523 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 25272 ,
+                    to 25471 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 24928 ,
+                    to 25089 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 24650 ,
+                    to 24844 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 21012 ,
+                    to 21138 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 20847 ,
+                    to 20963 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 20270 ,
+                    to 20477 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 19014 ,
+                    to 19240 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 18005 ,
+                    to 18114 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 17910 ,
+                    to 17957 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 17510633 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } ,
+        {
+          type mRNA ,
+          accession "NM_058262" ,
+          version 1 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 26732 ,
+                to 26777 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 26589 ,
+                to 26680 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 26370 ,
+                to 26523 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 25272 ,
+                to 25471 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 24928 ,
+                to 25089 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 24650 ,
+                to 24844 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 21012 ,
+                to 21138 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 20847 ,
+                to 20963 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 20270 ,
+                to 20477 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 19014 ,
+                to 19240 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 18005 ,
+                to 18114 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 17910 ,
+                to 17957 ,
+                strand minus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 17510634 } ,
+          products {
+            {
+              type peptide ,
+              label "suppressor of PResenilin defect SPR-1 related; similar to
+ co-repressor of REST RE1-silencing transcription factor (65.6 kD) (1A492)" ,
+              accession "NP_490663" ,
+              version 1 ,
+              refs {
+                pmid 9851916 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 26732 ,
+                    to 26777 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 26589 ,
+                    to 26680 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 26370 ,
+                    to 26523 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 25272 ,
+                    to 25471 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 24928 ,
+                    to 25089 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 24650 ,
+                    to 24844 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 21012 ,
+                    to 21138 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 20847 ,
+                    to 20963 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 20270 ,
+                    to 20477 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 19014 ,
+                    to 19240 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 18005 ,
+                    to 18114 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 17910 ,
+                    to 17957 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 17510635 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } } ,
+      comment {
+        {
+          type comment ,
+          text "Title: Caenorhabditis elegans gene 1A492, suppressor of
+ PResenilin defect SPR-1 related; similar to co-repressor of REST
+ RE1-silencing transcription factor." ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058261" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 17510632 } ,
+              anchor "NM_058261" } } ,
+          seqs {
+            whole
+              gi 17510632 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490662" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 17510633 } ,
+                  anchor "NP_490662" } } ,
+              seqs {
+                whole
+                  gi 17510633 } } } } ,
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058262" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 17510634 } ,
+              anchor "NM_058262" } } ,
+          seqs {
+            whole
+              gi 17510634 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490663" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 17510635 } ,
+                  anchor "NP_490663" } } ,
+              seqs {
+                whole
+                  gi 17510635 } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024206" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7105656 } ,
+              anchor "AC024206" } } ,
+          seqs {
+            whole
+              gi 7105656 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK67240" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 14550392 } ,
+                  anchor "AAK67240" } } ,
+              seqs {
+                whole
+                  gi 14550392 } } } } ,
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024206" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7105656 } ,
+              anchor "AC024206" } } ,
+          seqs {
+            whole
+              gi 7105656 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAF36052" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 7105661 } ,
+                  anchor "AAF36052" } } ,
+              seqs {
+                whole
+                  gi 7105661 } } } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "Reviewed" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 171592 } } ,
+  xtra-iq {
+    {
+      tag "NUCLEOTIDE" ,
+      value "7105656" } ,
+    {
+      tag "PROTEIN" ,
+      value "14550392" } ,
+    {
+      tag "PROTEIN" ,
+      value "7105661" } } ,
+  non-unique-keys {
+    {
+      db "ID" ,
+      tag
+        id 25167480 } } }
+Entrezgene ::= {
+  track-info {
+    geneid 171593 ,
+    status live ,
+    create-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 7 ,
+        hour 13 ,
+        minute 31 ,
+        second 41 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 1 ,
+        day 18 ,
+        hour 23 ,
+        minute 7 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Caenorhabditis elegans" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 6239 } } ,
+      orgname {
+        name
+          binomial {
+            genus "Caenorhabditis" ,
+            species "elegans" } ,
+        lineage "Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+ Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis" ,
+        gcode 1 ,
+        mgcode 5 ,
+        div "INV" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "I" } } } ,
+  gene {
+    locus "1A502" ,
+    maploc "I;-17.91 cM (interpolated genetic position)" ,
+    db {
+      {
+        db "AceView/WormGenes" ,
+        tag
+          str "1A502" } ,
+      {
+        db "LocusID" ,
+        tag
+          id 171593 } ,
+      {
+        db "WormBase" ,
+        tag
+          str "Y74C9A.5" } } ,
+    syn {
+      "Y74C9A.5" ,
+      "CELK09643" } ,
+    locus-tag "1A502" } ,
+  prot {
+    name {
+      "sestrin (1A502)" } } ,
+  rna {
+    type mRNA } ,
+  location {
+    {
+      display-str "I;-17.91 cM (interpolated genetic position)" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "Entrez Genomes" ,
+    src-int 25167480 ,
+    src-str1 "NC_003279" ,
+    src-str2 "CE:1A502" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      label "1A502" ,
+      accession "NC_003279" ,
+      version 2 ,
+      seqs {
+        int {
+          from 28279 ,
+          to 34904 ,
+          strand minus ,
+          id
+            gi 25167480 } } ,
+      products {
+        {
+          type mRNA ,
+          accession "NM_058263" ,
+          version 1 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 34694 ,
+                to 34904 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 32598 ,
+                to 32764 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 32411 ,
+                to 32541 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 31768 ,
+                to 32071 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 29768 ,
+                to 30423 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 29099 ,
+                to 29366 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 28915 ,
+                to 29025 ,
+                strand minus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 28279 ,
+                to 28404 ,
+                strand minus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 17510636 } ,
+          products {
+            {
+              type peptide ,
+              label "sestrin (1A502)" ,
+              accession "NP_490664" ,
+              version 1 ,
+              refs {
+                pmid 9851916 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 34694 ,
+                    to 34904 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 32598 ,
+                    to 32764 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 32411 ,
+                    to 32541 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 31768 ,
+                    to 32071 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 29768 ,
+                    to 30423 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 29099 ,
+                    to 29366 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 28915 ,
+                    to 29025 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 28279 ,
+                    to 28404 ,
+                    strand minus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 17510637 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } } ,
+      comment {
+        {
+          type comment ,
+          text "Title: Caenorhabditis elegans gene 1A502, sestrin." ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058263" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 17510636 } ,
+              anchor "NM_058263" } } ,
+          seqs {
+            whole
+              gi 17510636 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490664" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 17510637 } ,
+                  anchor "NP_490664" } } ,
+              seqs {
+                whole
+                  gi 17510637 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 17510637 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 15933 } ,
+                          anchor "pfam04636: PA26; PA26 p53-induced protein
+ (sestrin)" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 88 - 644  Blast Score: 1457" ,
+                          version 0 } } } } } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024206" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7105656 } ,
+              anchor "AC024206" } } ,
+          seqs {
+            whole
+              gi 7105656 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAF36051" ,
+              version 1 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 7105660 } ,
+                  anchor "AAF36051" } } ,
+              seqs {
+                whole
+                  gi 7105660 } } } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "Reviewed" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 171593 } } ,
+  xtra-iq {
+    {
+      tag "NUCLEOTIDE" ,
+      value "7105656" } ,
+    {
+      tag "PROTEIN" ,
+      value "7105660" } } ,
+  non-unique-keys {
+    {
+      db "ID" ,
+      tag
+        id 25167480 } } }
+Entrezgene ::= {
+  track-info {
+    geneid 171594 ,
+    status live ,
+    create-date
+      std {
+        year 2005 ,
+        month 4 ,
+        day 7 ,
+        hour 13 ,
+        minute 31 ,
+        second 47 } ,
+    update-date
+      std {
+        year 2005 ,
+        month 1 ,
+        day 18 ,
+        hour 23 ,
+        minute 7 ,
+        second 0 } } ,
+  type protein-coding ,
+  source {
+    genome genomic ,
+    origin natural ,
+    org {
+      taxname "Caenorhabditis elegans" ,
+      db {
+        {
+          db "taxon" ,
+          tag
+            id 6239 } } ,
+      orgname {
+        name
+          binomial {
+            genus "Caenorhabditis" ,
+            species "elegans" } ,
+        lineage "Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
+ Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis" ,
+        gcode 1 ,
+        mgcode 5 ,
+        div "INV" } } ,
+    subtype {
+      {
+        subtype chromosome ,
+        name "I" } } } ,
+  gene {
+    locus "1A527" ,
+    maploc "I;-18.07 cM (interpolated genetic position)" ,
+    db {
+      {
+        db "AceView/WormGenes" ,
+        tag
+          str "1A527" } ,
+      {
+        db "LocusID" ,
+        tag
+          id 171594 } ,
+      {
+        db "WormBase" ,
+        tag
+          str "Y48G1C.4" } } ,
+    syn {
+      "Y48G1C.4" ,
+      "CELK05819" } ,
+    locus-tag "1A527" } ,
+  prot {
+    name {
+      "putative phosphatidylglycerophosphate synthase, similar to
+ silencer-associated factor (51.7 kD) (1A527)" } } ,
+  rna {
+    type mRNA } ,
+  location {
+    {
+      display-str "I;-18.07 cM (interpolated genetic position)" ,
+      method
+        map-type cM } } ,
+  gene-source {
+    src "Entrez Genomes" ,
+    src-int 25167480 ,
+    src-str1 "NC_003279" ,
+    src-str2 "CE:1A527" ,
+    gene-display FALSE ,
+    locus-display FALSE ,
+    extra-terms FALSE } ,
+  locus {
+    {
+      type genomic ,
+      label "1A527" ,
+      accession "NC_003279" ,
+      version 2 ,
+      seqs {
+        int {
+          from 49920 ,
+          to 54359 ,
+          strand plus ,
+          id
+            gi 25167480 } } ,
+      products {
+        {
+          type mRNA ,
+          accession "NM_058265" ,
+          version 2 ,
+          refs {
+            pmid 9851916 } ,
+          genomic-coords {
+            mix {
+              int {
+                from 49920 ,
+                to 50015 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 50814 ,
+                to 51029 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 52282 ,
+                to 52409 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 52465 ,
+                to 52571 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 53265 ,
+                to 53336 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 53390 ,
+                to 53694 ,
+                strand plus ,
+                id
+                  gi 25167480 } ,
+              int {
+                from 53943 ,
+                to 54359 ,
+                strand plus ,
+                id
+                  gi 25167480 } } } ,
+          seqs {
+            whole
+              gi 25143330 } ,
+          products {
+            {
+              type peptide ,
+              label "putative phosphatidylglycerophosphate synthase, similar
+ to silencer-associated factor (51.7 kD) (1A527)" ,
+              accession "NP_490666" ,
+              version 2 ,
+              refs {
+                pmid 9851916 ,
+                pmid 11231151 } ,
+              genomic-coords {
+                mix {
+                  int {
+                    from 49920 ,
+                    to 50015 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 50814 ,
+                    to 51029 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 52282 ,
+                    to 52409 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 52465 ,
+                    to 52571 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 53265 ,
+                    to 53336 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 53390 ,
+                    to 53694 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } ,
+                  int {
+                    from 53943 ,
+                    to 54359 ,
+                    strand plus ,
+                    id
+                      gi 25167480 } } } ,
+              seqs {
+                whole
+                  gi 25143331 } } } ,
+          comment {
+            {
+              type comment ,
+              text "" ,
+              version 0 } } } } ,
+      comment {
+        {
+          type comment ,
+          text "Title: Caenorhabditis elegans essential gene 1A527, putative
+ phosphatidylglycerophosphate synthase, similar to silencer-associated factor." ,
+          version 0 } } } } ,
+  comments {
+    {
+      type comment ,
+      heading "NCBI Reference Sequences (RefSeq)" ,
+      version 0 ,
+      products {
+        {
+          type mRNA ,
+          heading "mRNA Sequence" ,
+          accession "NM_058265" ,
+          version 2 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 25143330 } ,
+              anchor "NM_058265" } } ,
+          seqs {
+            whole
+              gi 25143330 } ,
+          products {
+            {
+              type peptide ,
+              heading "Product" ,
+              accession "NP_490666" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 25143331 } ,
+                  anchor "NP_490666" } } ,
+              seqs {
+                whole
+                  gi 25143331 } ,
+              comment {
+                {
+                  type other ,
+                  heading "Conserved Domains" ,
+                  version 0 ,
+                  source {
+                    {
+                      src {
+                        db "PROT_CDD" ,
+                        tag
+                          id 25143331 } ,
+                      pre-text "(1)" ,
+                      anchor "summary" } } ,
+                  comment {
+                    {
+                      type other ,
+                      version 0 ,
+                      source {
+                        {
+                          src {
+                            db "CDD" ,
+                            tag
+                              id 5372 } ,
+                          anchor "cd00138: PLDc; Phospholipase D" } } ,
+                      comment {
+                        {
+                          type other ,
+                          text "Location: 20 - 172  Blast Score: 135" ,
+                          version 0 } } } } } } } } } } } ,
+    {
+      type comment ,
+      heading "Related Sequences" ,
+      version 0 ,
+      products {
+        {
+          type genomic ,
+          heading "Genomic" ,
+          accession "AC024796" ,
+          version 1 ,
+          source {
+            {
+              src {
+                db "Nucleotide" ,
+                tag
+                  id 7140352 } ,
+              anchor "AC024796" } } ,
+          seqs {
+            whole
+              gi 7140352 } ,
+          products {
+            {
+              type peptide ,
+              accession "AAK29892" ,
+              version 2 ,
+              source {
+                {
+                  src {
+                    db "Protein" ,
+                    tag
+                      id 16950493 } ,
+                  anchor "AAK29892" } } ,
+              seqs {
+                whole
+                  gi 16950493 } } } } } } ,
+    {
+      type comment ,
+      heading "RefSeq Status" ,
+      label "Reviewed" ,
+      version 0 } } ,
+  unique-keys {
+    {
+      db "LocusID" ,
+      tag
+        id 171594 } } ,
+  xtra-iq {
+    {
+      tag "NUCLEOTIDE" ,
+      value "7140352" } ,
+    {
+      tag "PROTEIN" ,
+      value "16950493" } } ,
+  non-unique-keys {
+    {
+      db "ID" ,
+      tag
+        id 25167480 } } }

Added: trunk/packages/bioperl/branches/upstream/current/t/data/example.hap
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/example.hap	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/example.hap	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,39 @@
+#Thu Mar 18 08:50:48 2004: HapMap genotype data dump, chr9 from 660000 to 759999
+#For details on file format, see http://www.hapmap.org/genotypes/
+#Settings:
+#   minimum MAF: 0.00
+rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID NA06985 NA06991 NA06993 NA06994 NA07000 NA07019 NA07022 NA07029 NA07034 NA07048 NA07055 NA07056 NA07345 NA07348 NA07357 NA10830 NA10831 NA10835 NA10838 NA10839 NA10846 NA10847 NA10851 NA10854 NA10855 NA10856 NA10857 NA10859 NA10860 NA10861 NA10863 NA11829 NA11830 NA11831 NA11832 NA11839 NA11840 NA11881 NA11882 NA11992 NA11993 NA11994 NA11995 NA12003 NA12004 NA12005 NA12006 NA12043 NA12044 NA12056 NA12057 NA12144 NA12145 NA12146 NA12154 NA12155 NA12156 NA12234 NA12236 NA12239 NA12248 NA12249 NA12264 NA12707 NA12716 NA12717 NA12740 NA12750 NA12751 NA12752 NA12753 NA12760 NA12761 NA12762 NA12763 NA12801 NA12802 NA12812 NA12813 NA12814 NA12815 NA12864 NA12865 NA12872 NA12873 NA12874 NA12875 NA12878 NA12891 NA12892
+rs4584192 A/G chr9 661053 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177478:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AG AG AG AG AG AA AA GG AG AG GG GG AG AG AG AG GG AA AA AA AG AA GG GG AG AG AG AA GG AG AG AG GG GG AG GG GG AG AA AG AG GG AG AG AA AA AA GG AA AG AG AA GG AA AG AG AG GG AG GG AG AG AG AG AG AA AG AG AG AG AG AG AA GG AG AG AG GG AA AG AA AG AA AG AG GG AA AA AA
+rs4740849 C/G chr9 662880 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177479:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CG CG CG CG CG CG CC CC GG CG CG GG CC CC CG CG CC GG CC CC CC CG CC CC GG CG CC CC CC CG CG CG CG CG GG CC CG CG CC CC CG CG CG CC CG CC CC CC CC CC CG CG CC CG CC CC CG CG CG CC GG CG CG CC CC CG CC CG CG CC CC CG CC CC CG CG CC CG GG CC CC CC CG CC CG CG CG CC CC CC
+rs4742215 G/T chr9 664017 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177480:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GT GT GT TT GT GT GG GT TT TT TT TT TT GT GT TT GT TT GG GT GG GT GG TT TT GT GT GT TT TT GT GT GT TT TT TT TT TT GT GT TT GT TT GT GT GT TT GG TT GG GT GT GT TT GT TT GT TT TT GT TT TT GT TT GT TT GT TT GT TT TT TT TT TT TT GT TT GT TT GT GT GT TT GG TT TT TT GT TT GT
+rs4742219 C/T chr9 665377 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177481:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CT CT CT TT CT CT CC CT TT TT TT TT TT CT CT TT CT TT CC CT CC CT CC TT TT CT CT CT TT TT CT CT CT TT TT TT TT TT CT CT TT CT TT CT CT CT TT CC TT CC CT CT CT TT CT TT CT TT TT CT TT TT CT TT CT TT CT TT CT TT CT TT TT TT CT CT TT CT TT CT CT CT TT CC TT TT TT CT TT CT
+rs4742220 A/G chr9 665419 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:245719:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AG AG GG AG AG AA AG GG GG GG GG GG AG AG GG AG GG AA AG AA AG AA GG GG AG AG AG GG GG AG AG NN GG GG GG GG GG AG AG GG AG GG AG NN AG GG NN GG AA AG AG AG GG AG GG AG GG GG AG GG GG AG GG AG GG AG GG AG GG GG GG GG GG GG AG GG AG NN AG AG NN GG AA GG GG GG AG GG AG
+rs4742222 A/G chr9 665463 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:238601:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AG AG AA AG AG GG AG AA AA AA AA AA AG AG AA AG AA GG AG GG AG GG AA AA AG AG AG AA AA AG AG AG AA AA AA AA AA AG AG AA AG AA AG AG AG AA GG AA GG AG AG AG AA AG AA AG AA AA AG AA AA AG AA AG AA AG AA AG AA AA AA AA AA AA AG AA AG AA AG AG AG AA GG AA AA AA AG AA AG
+rs4742223 C/T chr9 665600 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177482:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CT CT CC CT CT CT TT CT CC CC CC CC CC CT CT CC CT CC TT CT TT CT TT CC CC CT CT CT CC CC CT CT CT CC CC CC CC CC CT CT CC CT CC CT CT CT CC TT CC TT CT CT CT CC CT CC CT CC CC CT CC CC CT CC CT CC CT CC CT CC CC CC CC CC CC CT CC CT CC CT CT CT CC TT CC CC CC CT CC CT
+rs2296054 C/T chr9 667141 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177485:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT
+rs4740850 A/G chr9 667772 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177486:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG AG GG AG GG GG GG GG GG GG AG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG AG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG
+rs4742225 G/T chr9 667938 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177487:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GT GT GG GT GG GG GG GT GG GT GT GG TT GT GG GT GT GG GG GT GG GG GG NN GG GG GT GT TT GT GT GG GT GT GG TT GT GT GT TT GT GG GT GT GG GT TT GG TT GG GG GG GT GT GT TT GG GT GT GT GG GT GT TT GT GT GT GT GG TT TT GT TT TT GT GG TT GG GG GT GT GT GT GG GT GT GT GT TT GT
+rs4740851 A/T chr9 667986 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:245218:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AT AT AA AA AA AA AA AA AA AA AA AA TT AT AA AA AT AA AA AA AA AA AA TT AA AA AT AT AA AT AT AA AT AT AA AT AT AT AT AT AA AA AT AT AA AA AA AA TT AA AA AA AA AT AA AT AA AA AT AT AA AA AT AT AT AA AA AA AA AT AT AA AT AA AT AA AT AA AA AA AT AA AA AA AA AA AT AA AA AA
+rs4742227 C/G chr9 669184 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177488:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GG GG CG GG CG CG CC CG GG GG GG GG GG CG CG GG CG GG CC CG CC CG CC GG GG CG CG CG GG GG CG CG GG GG GG GG GG GG CG GG GG CG GG CG CG CG GG CC GG CC CG CG CG GG CG GG CG GG GG CG GG GG GG GG CG GG CG GG CG GG GG GG GG GG GG CG GG CG GG CG CG CG GG CC GG GG GG CG GG CG
+rs2025308 A/C chr9 673633 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177489:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA AA
+rs1323262 C/G chr9 674096 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177490:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CG CG CG CC CG CG GG CG CC CC CC CC CC CG CG CC GG CC GG CG GG CG GG GG CC CG GG GG CC CC CG CG CG CC CC CG CG CG CG CG CC CG CC CG CG CG CC GG GG GG CG CG CG CC CG CG CG CC CG CG CC CC CG CG GG CC CG CC CG CG CG CC CG CC CG CG CG CG CC CG GG CG CC GG CC CC CG CG CC CG
+rs4742236 A/G chr9 676753 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177491:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AA AG GG AG GG AG GG GG GG AG GG GG AG AG GG GG GG AG AG GG AG AG GG GG GG GG GG GG GG GG GG AG GG GG GG GG GG GG AG GG GG GG GG AG AG GG AG GG AG GG GG GG GG GG GG GG GG GG AG AG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG AG GG AG GG AG
+rs1359058 A/T chr9 679233 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32734:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AT AT AT AA AA TT TT AA AA AA AA TT AA AT AT AA TT AA AT AT AT AT TT TT AA AT TT TT AA AA AT AT AT AA AA AT AT AT AT AT AA AT AA AA TT AT AA TT TT TT AT AT AA AA AA AT AT AA AT AT AA AA AT AT AT AA AT AA AT AT AT AA AT AA AT AT AT AT AA AT TT AT AA TT AA AA AT TT AT AT
+rs1475202 C/G chr9 684826 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32735:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GG GG CG GG GG CG CC GG GG GG GG CG GG GG GG GG CC GG GG GG CG GG CG CC GG CG CC CC GG GG GG CG GG GG GG CG CG CG CG GG GG CG GG GG CG GG GG CG CC CG CG CG GG GG GG CG CG GG CG GG GG GG GG CG CG GG CG GG CG CG CG GG CG GG CG CG CG CG GG CG CC CG GG CC GG GG CG CG CG GG
+rs745876 A/T chr9 685143 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177494:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AT AA AA AT TT AA AA AA AA AT AA AA AA AA TT AA AA AA AT AA AT TT AA AT TT TT AA AA AA AT AA AA AA AT AT AT AT AA AA AT AA AA AT AA AA AT TT AT AT AT AA AA AA AT AT AA AT AA AA AA AA AT AT AA AT AA AT AT AT AA AT AA AT AT AT AT AA AT TT AT AA TT AA AA AT AT AT AA
+rs745877 C/T chr9 685310 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:246340:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 TT TT CT TT TT CT CC TT TT TT TT CT TT TT TT TT CC TT TT TT CT TT CT CC TT CT CT CC TT TT TT CT TT TT TT CC CT CT CT TT TT CT TT TT CT TT TT CT CT CT CT CT TT TT TT CT CT TT CT TT TT TT TT CT CT TT CT TT CT CT CT TT CT TT CT CT CT CT TT CT CC CT TT CC TT TT CT CT CT TT
+rs732119 C/T chr9 685607 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32738:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 TT TT CT TT TT CT CC TT TT TT TT CT TT TT TT TT CC TT TT TT CT TT CT CC TT CT CC CC TT TT TT CT TT TT TT CT CT CT CT TT TT CT TT TT CT TT TT CT CC CT CT CT TT TT TT CT CT TT CT TT TT TT TT CT CT TT CT TT CT CT CT TT CT TT CT CT CT CT TT CT CC CT TT CC TT TT CT CT CT TT
+rs732118 A/C chr9 685614 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32739:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AC AA AA AC CC AA AA AA AA AC AA AA AA AA CC AA AA AA AC AA AC CC AA AC CC CC AA AA AA AC AA AA AA AC AC AC AC AA AA AC AA AA AC AA AA AC CC AC AC AC AA AA AA AC AC AA AC AA AA AA AA AC AC AA AC AA AC AC AC AA AC AA AC AC AC AC AA AC CC AC AA CC AA AA AC AC AC AA
+rs1570473 C/G chr9 685894 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32740:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GG GG CG GG GG CG CC GG GG GG GG CG GG GG GG GG CC GG GG GG CG GG CG CC GG CG CC CC GG GG GG CG GG GG GG CG CG CG CG GG GG CG GG GG CG GG GG CG CC CG CG CG GG GG GG CG CG GG CG GG GG GG GG CG CG GG CG GG CG CG CG GG CG GG CG CG CG CG GG CG CC CG GG CC GG GG CG CG CG GG
+rs912175 C/G chr9 702137 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177496:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CG CG CC CG CC CC CC CG CC CG CG CG CG CG CC GG CC CG CG CG CC CC CC CC CG CC CC CC CG GG CC CC CC CC CG CC CG CC CG CC CG CG CG CG CG CG GG CC CC CC CG CC CG CG CG CG CC CC CG CG CC GG CC CG CC CG CC CG CC CG CG CG CC GG CG CC CG CC CC CG CC CC CG CC CC CG CC CC CG CC
+rs912174 A/C chr9 702156 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:32741:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AC AC AA AC AA AA AA AC AA AC AC AC AC AC AA CC AA AC AC AC AA AA AA AA AC AA AA AA AC CC AA AA AA AA AC AA AC AA AC AA AC AC AC AC AC AC CC AA AA AA AC AA AC AC AC AC AA AA AC AC AA CC AA AC AA AC AA AC AA AC AC AC AA CC AC AA AC AA AC AC AA AA AC AA AA AC AA AA AC AA
+rs4740866 G/T chr9 706570 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177499:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 TT TT TT TT TT TT GT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT GT TT GT GT TT GT TT TT TT TT TT TT TT TT TT TT TT TT TT TT GT TT TT TT TT TT TT GT TT TT TT TT GT TT TT TT TT TT TT TT TT TT GT TT TT TT TT TT GT TT TT GT TT TT GT TT TT GT GT TT
+rs1022827 A/G chr9 709608 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177500:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 GG GG AG GG AG AA AG AG GG GG GG AG GG GG GG GG AG GG GG GG AG GG GG AA GG AG AA AG GG GG GG AG GG GG GG AG AG GG AG GG GG AG GG GG GG GG GG AG AA AG GG GG AG GG AG GG AG GG AG GG GG GG GG AG AA GG AG GG AG GG GG GG GG GG GG AG AG AG GG AG AA AG GG AA GG GG AG GG GG AG
+rs4742292 A/G chr9 710832 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177501:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AG AA AG GG AG AG AA AA AA AG AA AA AA AA AG AA AA AA AG AA AA GG AA AG GG AG AA AA AG AG AA AG AA GG AG AA AG AA AA AG AA AA AA AA AA AG GG AG AG AA AG AA AG AA AG AG AG AA AA AA AG AG GG AA AG AA AG AA AA AA AG AA AA AG AG AG AG AG GG GG AA GG AG AA AG AA AA AG
+rs2296050 C/G chr9 730592 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177505:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CG CG CG CG CG CC CG CC GG CG CG CC CG CC CG CC CG CG CC CC CC GG CC CC CG CG CC CG CG CC CG CG GG CG CG CC CC GG CC GG CG CC CG CG CC CG CC CC CG CC CC CC CC CG CC CG CC CG CC CG GG CC CG CC CC CG CG GG CG GG CG GG CG CG CG CG CC CG CG CC CC CC CG CC CG CG CG CC CC CG
+rs2296049 A/G chr9 730901 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177506:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AG AG GG AG GG GG GG AG GG AG AG AG AG AA AG AA GG AG AA AA GG GG AA GG AG GG GG GG AG AA GG GG GG GG AG GG GG GG AG GG AG AG AG AG AA AG AA AG GG AG AA AG AG AG AG AG AG GG AG AG GG AA GG AG GG AG GG GG GG GG AG GG GG AG AG GG AG GG GG AG GG GG AG GG GG AG GG AG AG AG
+rs3739586 C/T chr9 735413 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177507:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CT CC CC CC CC CC CC CC CC CT CC CC CC CC CC CC CC CC CC CC CC CC CC CT CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC NN CC CC CC CC CC CC CC CC CC
+rs962817 C/T chr9 740892 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177508:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 CT CT CC CT CC CC CC CT CC CT CT CT TT TT CT TT CC CC TT TT CC CT TT CT CC CC CC CC CT CC TT CC CC CC CC CC CT CC CC CC CT CC CC CT TT CT TT CT CC CT CT CC CT CT CT CT CT CT CT CC CC CT CT CT CC CT CC CC CC CT CT TT CT CT CT CC CC CC CC CC CC CT CT CC CT CT CC TT TT CT
+rs4400444 G/T chr9 744655 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177509:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 TT GT GT GT GT TT TT TT TT TT TT TT TT GT GT TT TT TT GT GT TT GT GT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT GT GT TT GT TT GT TT TT TT GT TT TT TT TT TT TT TT TT GT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT TT GT TT GT TT GT
+rs881684 A/G chr9 750405 + ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177510:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AG AA AG AA AA AG AA AA AA AA AA AG AG AA AG AG AG AG AG AG AG AG AA AG GG AA AA AA AA AG AA AG AG GG AG AA AA AA AA AG AA AA GG AG AA GG AG GG AA AG AA AG AG AA AG AG AA AA AA AG AA AG GG AA AG AA AG AA AA AA AA AG AA AG GG AG AG GG GG AA AG AA AA AA AG AG AA AG
+rs2025307 A/G chr9 752654 - ncbi_b34 illumina urn:LSID:illumina.hapmap.org:Protocol:Golden_Gate_1.0.0:1 urn:LSID:illumina.hapmap.org:Assay:177511:1 urn:lsid:dcc.hapmap.org:Panel:CEPH-30-trios:1 AA AA AG AG AA AA AA AG AA AA AA AG AG GG AG AG AG AG GG AG AG AG GG GG AA AG AG AA AG AA AG AG AA AG AG GG GG AA AA AG AA AA AA AG GG AG AA GG AA GG GG AG AA AG AA AA AG AG AG AA AA AG AG GG AG AG AG AA AG AA AA AG AA AA AG AG GG AG AG GG GG AA AA AA AA AA AA AG AA AG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/example.phase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/example.phase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/example.phase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,13 @@
+3
+5
+P 300 1313 1500 2023 5635
+MSSSM
+#1
+12 1 0 1 3
+11 0 1 0 3
+#2
+12 1 1 1 2
+12 0 0 0 3
+#3
+-1 ? 0 0 2
+-1 ? 1 1 13
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/expected.blast.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/expected.blast.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/expected.blast.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+QUERY	Q_LEN	SBJCT	S_LEN	EXPCT	SCORE	BITS	HSPS	IDEN	CONSV	Q_ALN	S_ALN	Q_GAP	S_GAP	Q_BEG	Q_END	S_BEG	S_END	Q_STR	S_STR	FRAM	DESC
+-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----
+gi|1401126	504	gb|U49928|HSU49928	3096	0.0e+00	2580	1009	1	1.00	1.00	504	504	0	0	1	504	21	1532		1		Homo sapiens TAK1 binding protein (TAB1) mRNA, complete cds
+gi|1401126	504	emb|Z36985|PTPP2CMR	969	4.0e-07	137	57	1	0.25	0.43	261	221	2	42	64	324	182	844		1		P.tetraurelia mRNA for protein phosphatase type 2C
+gi|1401126	504	emb|X77116|ATMRABI1	1981	1.0e-05	124	52	1	0.24	0.43	242	191	6	57	55	296	918	1490		1		A.thaliana mRNA for ABI1 protein
+gi|1401126	504	gb|U12856|ATU12856	2000	1.0e-05	124	52	1	0.24	0.43	242	191	6	57	55	296	918	1490		1		Arabidopsis thaliana Col-0 abscisic acid insensitive protein (ABI1) mRNA, complete cds
+
+
+QUERY	Q_LEN	SBJCT	S_LEN	EXPCT	SCORE	BITS	HSPS	FR_ID	FR_CN	FR_ALQ	FR_ALS	Q_ALN	S_ALN	Q_GAP	S_GAP	Q_BEG	Q_END	S_BEG	S_END	AMBIG
+-----	-----	------	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----	-----
+gi|1401126	504	gb|U49928|HSU49928	3096	0.0e+00	2580	1009	1	1.00	1.00	1.00	0.49	504	504	0	0	1	504	21	1532	-	Homo sapiens TAK1 binding protein (TAB1) mRNA, complete cds
+gi|1401126	504	emb|Z36985|PTPP2CMR	969	4.0e-07	137	57	1	0.25	0.43	0.52	0.68	261	221	2	42	64	324	182	844	-	P.tetraurelia mRNA for protein phosphatase type 2C
+gi|1401126	504	emb|X77116|ATMRABI1	1981	1.0e-05	124	52	1	0.24	0.43	0.48	0.29	242	191	6	57	55	296	918	1490	-	A.thaliana mRNA for ABI1 protein
+gi|1401126	504	gb|U12856|ATU12856	2000	1.0e-05	124	52	1	0.24	0.43	0.48	0.29	242	191	6	57	55	296	918	1490	-	Arabidopsis thaliana Col-0 abscisic acid insensitive protein (ABI1) mRNA, complete cds
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/factor7.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/factor7.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/factor7.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,323 @@
+ID   HSCFVII    standard; DNA; HUM; 12850 BP.
+XX
+AC   J02933;
+XX
+SV   J02933.1
+XX
+DT   02-APR-1988 (Rel. 15, Created)
+DT   02-JUL-1999 (Rel. 60, Last updated, Version 6)
+XX
+DE   Human blood coagulation factor VII gene, complete cds.
+XX
+KW   coagulation factor; coagulation factor VII.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [1]
+RP   1-12850
+RX   MEDLINE; 87260948.
+RA   O'Hara P.J., Grant F.J., Haldeman B.A., Gray C.L., Insley M.Y., Hagen F.S.,
+RA   Murray M.J.;
+RT   "Nucleotide sequence of the gene coding for human factor VII, a vitamin
+RT   K-dependent protein participating in blood coagulation";
+RL   Proc. Natl. Acad. Sci. U.S.A. 84(15):5158-5162(1987).
+XX
+RN   [2]
+RP   856-12446
+RX   MEDLINE; 88329723.
+RA   O'Hara P.J., Grant F.J.;
+RT   "The human factor VII gene is polymorphic due to variation in repeat copy
+RT   number in a minisatellite";
+RL   Gene 66(1):147-158(1988).
+XX
+DR   GDB; 119897; F7.
+DR   SWISS-PROT; P08709; FA7_HUMAN.
+XX
+CC   [2]  minisatellite imperfect repeats only.
+CC   Draft entry and computer-readable copy of sequence in [1] kindly
+CC   provided by P.J.O'Hara, 26-JUN-1987.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..12850
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT                   /map="13q34"
+FT   prim_transcript 487..12686
+FT                   /note="factor VII pre-mRNA (alt.)"
+FT   prim_transcript 487..12660
+FT                   /note="factor VII pre-mRNA (alt.)"
+FT   prim_transcript 487..12664
+FT                   /note="factor VII pre-mRNA (alt.)"
+FT   exon            <522..585
+FT                   /note="factor VII; G00-119-897"
+FT                   /gene="F7"
+FT   gene            522..585
+FT                   /gene="F7"
+FT   CDS             join(522..585,1654..1719,4294..4454,6383..6407,6478..6591,
+FT                   8307..8447,9419..9528,10124..10247,11064..11659)
+FT                   /codon_start=1
+FT                   /db_xref="SWISS-PROT:P08709"
+FT                   /note="factor VII"
+FT                   /protein_id="AAA51983.1"
+FT                   /translation="MVSQALRLLCLLLGLQGCLAAGGVAKASGGETRDMPWKPGPHRVF
+FT                   VTQEEAHGVLHRRRRANAFLEELRPGSLERECKEEQCSFEEAREIFKDAERTKLFWISY
+FT                   SDGDQCASSPCQNGGSCKDQLQSYICFCLPAFEGRNCETHKDDQLICVNENGGCEQYCS
+FT                   DHTGTKRSCRCHEGYSLLADGVSCTPTVEYPCGKIPILEKRNASKPQGRIVGGKVCPKG
+FT                   ECPWQVLLLVNGAQLCGGTLINTIWVVSAAHCFDKIKNWRNLIAVLGEHDLSEHDGDEQ
+FT                   SRRVAQVIIPSTYVPGTTNHDIALLRLHQPVVLTDHVVPLCLPERTFSERTLAFVRFSL
+FT                   VSGWGQLLDRGATALELMVLNVPRLMTQDCLQQSRKVGDSPNITEYMFCAGYSDGSKDS
+FT                   CKGDSGGPHATHYRGTWYLTGIVSWGQGCATVGHFGVYTRVSQYIEWLQKLMRSEPRPG
+FT                   VLLRAPFP"
+FT   intron          586..1653
+FT                   /note="intron A1"
+FT   exon            1654..1719
+FT                   /note="optional"
+FT   intron          1720..4293
+FT                   /note="intron A"
+FT   exon            4294..4454
+FT                   /number=2
+FT   intron          4455..6382
+FT                   /note="intron B"
+FT   exon            6383..6407
+FT                   /number=3
+FT   intron          6408..6477
+FT                   /note="intron C"
+FT   exon            6478..6591
+FT                   /number=4
+FT   intron          6592..8306
+FT                   /note="Intron D"
+FT   exon            8307..8447
+FT                   /number=5
+FT   intron          8448..9418
+FT                   /note="Intron E"
+FT   exon            9419..9528
+FT                   /number=6
+FT   intron          9529..10123
+FT                   /note="Intron F"
+FT   exon            10124..10247
+FT                   /number=7
+FT   intron          10248..11063
+FT                   /note="Intron G"
+FT   exon            11064..>11659
+FT                   /note="factor VII"
+XX
+SQ   Sequence 12850 BP; 2532 A; 3888 C; 3902 G; 2528 T; 0 other;
+     cccggcactt ctcagtgagg ctctgtggct cacctaagaa accagcctcc cttgcaggca        60
+     acgcctagct ggcctggtct ggaggctctc ttcaaatatt tacatccaca cccaagatac       120
+     ggtcttgaga tttgactcgc atgattgcta tgggacaagt tttcatctgc agtttaaatc       180
+     tgtttcccaa cttacattag gggtttggaa ttctagatcg tatttgaagt gttggtgcca       240
+     cacacacctt aacacctgca cgctggcaac aaaaccgtcc gctctgcagc acagctgggg       300
+     tcacctgacc tttctcctgt cccccccact tgagctcagt ggctgggcag caggggatgc       360
+     atggccactg gcggccaggt gcagctctca gctggggtgt tcagaggacg cctgtgtcct       420
+     cccctccccc atccctctgt cacccttgga ggcagagaac tttgcccgtc agtcccatgg       480
+     ggaatgtcaa caggcagggg cagcactgca gagatttcat catggtctcc caggccctca       540
+     ggctcctctg ccttctgctt gggcttcagg gctgcctggc tgcaggtgcg tccggggagg       600
+     ttttctccat aaacttggtg gaagggcagt gggcaaatcc aggagccagc ccgggcttcc       660
+     caaaccccgc ccttgctccg gacaccccca tccaccagga gggttttctg gcggctcctg       720
+     ttcaatttct ttccttctag aaaccagcat ccaggcacag gaggggaggc ccttcttggt       780
+     agcccaggct ttggcgggat tatttttcaa agaactttag gagtgggtgg tgctttcctg       840
+     gcccccatgg ccctgcctgt gaggtcggac aagcgcaggg agtctggggc ctctcagagt       900
+     gcaggaagtg cgcacagggt gctcccaggc tggggagcac aggtagggga cggtgcgtgg       960
+     gggatggcgc ctggggcatg ggggatgggg tgtgggaaac ggcatgtggg gcgtagggga      1020
+     tggggtgtgg aggatcgggg gtggggatgg cgtgtggggt gtgggggatg ggccgtgggg      1080
+     gggtggggcc tgggaaacag catgtggggc atggggtgtg ggggtgaggt gtgggaaagt      1140
+     gtgtggggtg tgggggatgg ggcatggaaa gggcgtgtgg ggtgcagggg atggggcatg      1200
+     gaggtgtggg ggatggggtg tgtggggtgt cggggatggg gcatgtgggg tgtgggggat      1260
+     ggggcatgga aaggcgtgtg gggtgcagag gatggggcat ggaggtctgg ggcatggggt      1320
+     gtgtggggtg tcggggatgg ggcatggaaa gggtgtgtgg ggtgtgggga tagggtcagg      1380
+     ggatggcgtg gggggtgtgg catggggatg gcacgtgtgg catggggatg gggatggggg      1440
+     gtggggcatg gccgagtggg gctggggctg ggaatggtga gtggggcatg gggatggcga      1500
+     gtagggggtg tggcgtgagg atggctagtg gggcgtgggg atggcgtgtg gggatggcga      1560
+     gtggggggtg ggctgtgagg gacagtgcct gggatgtggc tgcagcccta gctcacagca      1620
+     tggccttatg accccggcca ccttcctgcc caggcggggt cgctaaggcc tcaggaggag      1680
+     aaacacggga catgccgtgg aagccggggc ctcacagagg tgagcaggga ctgccactgg      1740
+     ttttgtcctg gggcccagtg ggggcaacat cacctccttc ccctcccatg gcaaagagcc      1800
+     agcccgcggg gtggctactg cagtgccccc caaggagggt gttccctgct cgagaggaag      1860
+     tgaccgctcc agcttggcct tccctgggac tggggtgcag gcgattttat cttctttgct      1920
+     ccattctgtt ccttccagat aatcgtgtgt tcttcatcag gttttcctca gttcttgaga      1980
+     gcttttctga tgcaaatctg ctttcacccc agggcggtca ccggctctgc tcacaccagc      2040
+     ctccaagggt gtgggtgtcc cgggagtgtg ggtgtcccgg gggcgtgggt gtcccgggag      2100
+     tgtgggtgtc ccgggggcgt gggtgtcccg ggagtgtggg tgtcccgggg gcgtgggtgt      2160
+     cccgggagtg tgggtgtccc gggggagtgg gtgtcccggg agtgtgggtg tcccaggggc      2220
+     gtgggtgtcc cgggagtgtg ggtgtcccgg gggcgtgggt gtcccgggag tgtgggtgtc      2280
+     ccggaggcga gggtgtcccg ggagtgtggg tgtcccgggg gcgtgggtgt cccgggagtg      2340
+     tgggtgtccc gggggagtgg gtgtcccggg agtgtgggtg tcccaggggc gtgggtgtcc      2400
+     cgggagtgtg ggtgtcccgg gggcgtgggt gtcccgggag tgtgggtgtc ccggagcgag      2460
+     ggtgtcccgg gagtgtgggt gtcccggggg cgtgggtgtc ccggaggcga gggtgtccca      2520
+     ggagtgtggg tgtcccgggg gcgtgggtgt cccgggagtg tgggtgtccc ggaggcgagg      2580
+     gtgtcccggg agtgtgggtg tcccgggggc gtgggtgtcc cggaggcgag ggtgtcccag      2640
+     gagtgtgggt gtcccggggg cgtgggtgtc ccgggagtgt gggtgttcca gaggcgaggg      2700
+     tatcccagaa gtgtgagtgt cccgggggtg tgggtgtccc gggggcgtgg gtgtcccggg      2760
+     agtgtgggtg tcccgggggc gtgggtatcc cagaagtgtg agtgtcccag gggcgtgggt      2820
+     gtccgggggc gtgggtgtcc cgggggtgtg ggtgtcccgg gggtcgtggg tgtcccggga      2880
+     gcgtgggtgt cggggactgc agggacatgg gcctcccctc ccactcctgc cgcccagggc      2940
+     acctcctgtg aggactcgga gtccgtgagt tcccacctcc ttgagcccga ttctttggtg      3000
+     tccccgcctg catcctcagc ctccttccaa accagaccag ttctctaggg gcgtcgacgt      3060
+     gtgaaactga ttttaaagaa aacaggcggt ggcctttctc tcggccccac gtggcccagt      3120
+     agcgctcacc ttccgtccct tcttccgcgc tcagtaacca atttaggccg ctcctgcaga      3180
+     actcgggctc ctgcccaccg gcccacagcg tccacctgag gcctcttcct cccagcaaag      3240
+     gtcgtccctc cggaacgcgc ctcctgcggc ctctccagag cccctcccgc gcgtcctctc      3300
+     agccccgctc gcctcctccc ggggcctccc tctcccgcct gcccccaggc ccgtctccct      3360
+     cgcgggctga ggcaggttcg gcagcacggc gcccggggcg ggggtcactc tccaccaccg      3420
+     cgtggtgccc acagctcacg gcgctcccgg gtgacggtcc cctcggctgt agggcgtcct      3480
+     gaagagcggc ctgctcggag ctgagcgcac ggggttgcct gcccctgggc gtctctggcc      3540
+     ctcaccagcc ccgtcttccc atgggcaaaa cggcggtcct gtttgtccac aagtaaccgt      3600
+     cggggttacg gaggggccag gagctgcggc ggggggctgt gctctcagga ccggccccag      3660
+     gaggatccgc gcgaggtctg gagctctcag gggtcgcggg ggacagaggg gccccaagcg      3720
+     gaggcgggaa ggcggcagaa gcccaggacc gccaagagct ggcgaggaag cccggggctc      3780
+     gctgtcgggg gagccgggca ggggccgcgc ctcggcacca ggacgcgagg cctgggaagg      3840
+     cggatctggc cgcgagcacg cggtgcgggt ggagacgcag ggatttggat ttccgcgggc      3900
+     gctgcacgga tttccacgcg cggttcacgt gggccccagg gggtgcccgg cacccggggc      3960
+     cgcgccgcct tctcctgccc ggcatcgacc cgcagcctca cgtttaccgc ggcgcccgca      4020
+     gcccccttcg cccgcttccg cgcgtgcccc cgagcgcgcc ctcgggatca gcccccggaa      4080
+     gcagagaggc caggccggga aggatgggcg aacggggtgg ctgacccggg agcacggcag      4140
+     ggaggacacc cagccaggcc cgcgagcagc gccgctcccc tcctccagga cgggcgggaa      4200
+     cctgcgatgc ccccgccgcg tgggccgtgg ggcggtctcc gaggcactgg gcggggcacg      4260
+     cggtgggcgc ttcacggaac tcgcatttcc cagtcttcgt aacccaggag gaagcccacg      4320
+     gcgtcctgca ccggcgccgg cgcgccaacg cgttcctgga ggagctgcgg ccgggctccc      4380
+     tggagaggga gtgcaaggag gagcagtgct ccttcgagga ggcccgggag atcttcaagg      4440
+     acgcggagag gacggtgagc ccagcctcgg ggcgccccgc gcggacactg cacggcggcg      4500
+     gtgaaccagg ccgcgtgggg ccgcctgcgt ctctttggct gcggcctgtg ggcggcgaac      4560
+     acgcagcggc gcccgcgcgc gcgctctctc tgcgggggtc gctttccgcc cggggtgact      4620
+     ccgctttcct gggcgatgcc cccaccccca ggcacgcgct ctccccgtgc ggccgcaccg      4680
+     cgcatgccgg ttttcacatc agaaaatacg atttgcacaa gcacacttag ggtgtccccc      4740
+     ttaacttccc aagggagtcc ccccagtccc cgaagtccag ggcagcctgc gcatcgcaga      4800
+     cgcgcgcggc tcgcagaagg gacgtggtga gaagctggcc cacagcatgc caccagcggc      4860
+     acctcctcag ggcacgtgtc ggggagaaac aacacttagg gaccctggga ctttctccag      4920
+     ctcacgctca cgggtccacc tcacactacc aagatcacct caatagacgg acactcacac      4980
+     agggcacact tcacactcac aggtcacctc acactcacag gacacctcac actcacaggg      5040
+     cacacttcac actcacgggt cacctcacac tccaagatca cctaaagagg acacctcaca      5100
+     cagggcacac ttcacactca caggtcacac ctcacacaga tcatctcatt ctcacaggac      5160
+     acctccctct cacaggtcac ctcacactca caggacacct cacagaggtc acctcacacc      5220
+     cacaggacac ctcacagagg tcacctcaca cggggcacac ttcacactca ggtcacctca      5280
+     cacccacagg acacctcaca gaggtcacct cacacccaca ggacaactca cagaggtcac      5340
+     ctcacacagg acacctcaca aaggtcacct cacacccaca ggacacctca cactcatagg      5400
+     cacctcagtc ttacaggaca actcacactc acaggtcacc tatctcacag gacacctcac      5460
+     actcacaggt caccttactc tcacaggaca cctcacacag ggcacacttc actccacagg      5520
+     tcaccatacc tcacacagat cacctcatac tcacagatca cttcattcat tctcacagga      5580
+     tacctcacac tcagggcaca cttcacactc acaggtcaca cctcacacag atcatctcat      5640
+     tctcacagga cacctccctc tcacaggtca ccttacactc atctcacact cacaggtcgc      5700
+     cacacctcac actcacagga tgcctcacac tcacagaacc acatctcata tgcacaagac      5760
+     acctcacact caggacacct catgctcaaa gaagcctcac actcacagga ggtccagctg      5820
+     tctgaggcaa aggctaacat gaccctttcc agacaaattg aggatggtca tgcctagcat      5880
+     ttttatacac ctagttttga aagcatttct catctgttgt attctcacag caccccgtga      5940
+     gtttaagttc aggtggccaa cagtttcttc agcaatcact tttttctgtg gagtgctttt      6000
+     gctgtttgtg gaatattttg catctgctac tgcaccctct ccccgtatgt gtggccaccc      6060
+     tgtcagaggt ggagctgtgg ctcagagcct gtgtacctcg tcccaggtcc acagctcagc      6120
+     gacagaagag tcagggttga acctcgggtg ttctgacttg ggagcaggaa atgtgtggtc      6180
+     acccatagtt ccagatgtcc tggggagggg ccaagattag aagaaaccta cctcagctcc      6240
+     agaggaaagt ctggcttcct gagcccaccc cgccagaccc aggtccaagt cccccaaccc      6300
+     cagttcatgg tgtgtccagt gcttaccgtt gggtgctctg gtgaaggtgc atctcacgag      6360
+     gcttgctctc ttgttccttc agaagctgtt ctggatttct tacagtggtg agtggatgat      6420
+     caccaccagt cctgcctgca acccttctca gcttactgac accagcccac tccacagatg      6480
+     gggaccagtg tgcctcaagt ccatgccaga atgggggctc ctgcaaggac cagctccagt      6540
+     cctatatctg cttctgcctc cctgccttcg agggccggaa ctgtgagacg cgtaaggccc      6600
+     cactttgggt cccatatttg cagagggccc tggggagctg gtggaggtgg cctggccaac      6660
+     cgggctgcag ggtgcaacaa cctggtgggg tgtgtaggcc gggcattcag ggctcagccc      6720
+     agttggaaat tggtctaggt gacctttaaa tcccttccag tctgaggtct ttgacaggga      6780
+     cccaaggttc tgattatcag actcagtggc ccccttcgcg gtcccggccc tgggcaactt      6840
+     ctcagccctg gagactggcc cagttgagag tccctgtgtc ccgtgtgccc attccagatc      6900
+     ccacctagct aggtacccgt ttggtaaact tccccttctc ctactttcca ttacaaaggt      6960
+     ttgaggggtt tgtttttttt tttaaccatc tgaatattaa attaatcaca aagtttaggg      7020
+     cccccaacct cccttgggtt cagtaattca ctagaaggac acatagaaat ccaaatatcc      7080
+     actgagtgga tacactcaca ggtaccgttt attacagcaa aggatgcagg cttaagtctg      7140
+     cagagggacc agggacaagc ttccccttgt cctctcctgt ggggtcatgt ggacatcctt      7200
+     aattctccca gaatgacgtg tgacgagaac gtgggaagta ctgccaaact tggggaacgc      7260
+     tacgagcccc gtgtccagag gtttgatcag ggctcaatga catagaccca gctgaccagg      7320
+     cacgcatggc tgacctcagt ctcagcccct ccagagctac gccgataatg cggccaaggc      7380
+     cccaccatac atcacattgt cagctagacc atccagcatg gctcaaggcc caggtaaaca      7440
+     ccaacattcc ctcaggcaag accttccaag ggcttagcgg tcatttccca ggagccaagg      7500
+     caaaggctac cctttctctg gcacagcagt tcatccttga ccacccaaga ccacattctt      7560
+     acactgaatg agctctcctg tgcagcagcc attttcttct ctaagcagaa gagagcccag      7620
+     caagctggag gaggctgaag agagaggctt cctgctggtc atctgggtcc agaatgcctg      7680
+     gagatctctg ctcagccctg gtgcccagca gccctggtgt gcatcctgca gggcagcctt      7740
+     cccgccggag tcctggactt gctcagggcc actccccttg cccatgtcaa ccaaagtcag      7800
+     gctgccggtt ctgcttcttc tgtctgagcc catgaccagt gctgggacta actgtccccc      7860
+     aggcgggctc acggtggtac gaggccagct tggagaactg tctcagctct ctggtcctct      7920
+     cgtcagttgg gtctctgatt ggaaagtccc ttggacactt taccatcccc attggacttt      7980
+     cactttcccc caggctccca tcagctgctc ggaagagtgg tcaccctgga ggccactgcc      8040
+     caccagccag gcacccccca aatgcaaccg cagccagcac tgccagccac tggcaaggct      8100
+     gttcagacat gtggctcctc tgatccacgc cttgtccttt ggatcagtcc acggagcagt      8160
+     gtgccaagct caggctctgt cacccacagc tcatgccacc ttccaggcag aacaccactg      8220
+     ctgacccagg ggcatggcca ccccgggggc tggcgtctcg ctgaccccca gaagcccctc      8280
+     tcagggtgtc cccttcctgt ccccagacaa ggatgaccag ctgatctgtg tgaacgagaa      8340
+     cggcggctgt gagcagtact gcagtgacca cacgggcacc aagcgctcct gtcggtgcca      8400
+     cgaggggtac tctctgctgg cagacggggt gtcctgcaca cccacaggtg accaggcttc      8460
+     atgtcccagt cccagatgac accagtccct gtcccactag gattatctta ctggacaaaa      8520
+     gacgggtggg actggccttc acatctactg agcactaact atgcactgac caattgtgag      8580
+     gtgggatctg ggcaccaagg gtggcacagg ccagcagcga ccagtgacta ggatgggcac      8640
+     cctgggggca atccctgaat ggcctcaggc cccctgccaa cttctaggca gaccagggga      8700
+     gccaagcaag gcactatctc acgtccaact gcccactcgc aggaatcctc cgccagggtt      8760
+     catgaatcta cttcggcaca gccaatgtct gtactgactg ctgcccactc tgcattccaa      8820
+     aactcgtaaa ggctcctggg aaaatgggat gtttctccaa accagcctgg aacgaatggg      8880
+     ctgcacttcc aaaagcaggg acaccccaca cccactgtct ctaaagaggc ggaacgtgcc      8940
+     caccctggcc acacagcctg ggactcagcc tgccacctcc tcgggcttcc tttctggccc      9000
+     aagaccttga ttgaagcaga tcaaaactaa gcatgggatc aaaacaacac agtttgattc      9060
+     atctttaggt agaatttcat tcaccttcta ctaaagtcaa acaacacatc ttctccctga      9120
+     aaagtgagca gagggcggtt ttaagacgta agccctctgt ttcctccaaa accagccctg      9180
+     accattgtct cctcagccag ccacttcttc aagggcctct catggccggg ccccaccagt      9240
+     caggcccagc cgaggccctg ccttccacca cccctgggcc ctgggagctc ctgctcctgg      9300
+     gggcctccca tagcctcggc ctcaaggcct ctcagaggat gggtgtttct gaatctttcc      9360
+     tagtggcacg ttcatccctc acaaatctct gcatctttct gacttttgtt ttacacagtt      9420
+     gaatatccat gtggaaaaat acctattcta gaaaaaagaa atgccagcaa accccaaggc      9480
+     cgaattgtgg ggggcaaggt gtgccccaaa ggggagtgtc catggcaggt aaggcttccc      9540
+     ctggcttcag gattccaagc cctgagggtc ttgaagcctt ttgaatgtga acaacagctc      9600
+     tggaagggaa aatgggcagg tcagcccaag cccacaggct ccaagtcagc acacctagca      9660
+     cctccagctc gcggcacccc catgctttta gtggggcaag gaaggagaaa agaaaacgac      9720
+     actcactgag ggtctaccct gtgcagagaa ccctgcgaga tgccccatcc gagttgtcac      9780
+     gtcgtcctca cggttactct ttgaggtggg atctttgcct gatctttgca aaatcaggag      9840
+     cattggatca aagctatgtg aagatcctgt gaggtgaaca gtgaaatctc acagcgacat      9900
+     ttgtattctt gggccgtgcc caagagcacg tctcggctag agaggggcac agcctcccag      9960
+     agccaggtct gagcagcttt gcctgggagg gatctgcaaa gaccccagga tttcagaaag     10020
+     aaattgtgca atgccagagg ttccttggca tgcccgggag ggcgagtcat cagagaaaca     10080
+     atgacagcaa tgtgacttcc acacctcctg tccccccgcc caggtcctgt tgttggtgaa     10140
+     tggagctcag ttgtgtgggg ggaccctgat caacaccatc tgggtggtct ccgcggccca     10200
+     ctgtttcgac aaaatcaaga actggaggaa cctgatcgcg gtgctgggtg ggtaccactc     10260
+     tcccctgtcc gaccgcggtg ctgggtgggt gccactcttc cctgtccgac cgcggtgctg     10320
+     ggtgggtgcc actctcccct gtccgaccgc ggtgctgggt gggtgccact ctcccctgtc     10380
+     cgaccgcggt gctgggtggg tgccactctc cgctgtccga ccgcggtgct gggtgggtac     10440
+     cactctcccc tgtctgaccg cagctctcaa gtgtctcagg ggctgtggct ctgggcttcg     10500
+     tgctgtcact tccacagaca gacagacatc cccaaaaggg gagcaaccat gctgggcacg     10560
+     actgcctgtg gcaccgtgct ctcagccact ttcccatgcc caaataaaac gataaaagac     10620
+     tgggggcttc tgcccatcct gcctcacttg accaagagcc cagaagagga tgcgacaccc     10680
+     agggcctcat gggaccaccg gctggcaggg gttctgctca ctgggtttat gggtgagacg     10740
+     agcactccca ggagggccac tgggccggga agaactgtgg agaatcgggg cacgccctgt     10800
+     cctcccagct gccagggcac agcatccctt ccccacctgc aacacccaga ccccagattc     10860
+     accccagttc acttgtcccc acacgagcca caggctgcca cctggggcag gctggcccac     10920
+     cttggggtta gatgcaggtc cccttgcccc agaaggagac tgcagcccct gcagacctag     10980
+     aaatggccac agcccatccc catgcaccag ggggtgaggt ggcaggtggt ggaaagggcc     11040
+     tgaggggggc ttcttccttc caggcgagca cgacctcagc gagcacgacg gggatgagca     11100
+     gagccggcgg gtggcgcagg tcatcatccc cagcacgtac gtcccgggca ccaccaacca     11160
+     cgacatcgcg ctgctccgcc tgcaccagcc cgtggtcctc actgaccatg tggtgcccct     11220
+     ctgcctgccc gaacggacgt tctctgagag gacgctggcc ttcgtgcgct tctcattggt     11280
+     cagcggctgg ggccagctgc tggaccgtgg cgccacggcc ctggagctca tggtcctcaa     11340
+     cgtgccccgg ctgatgaccc aggactgcct gcagcagtca cggaaggtgg gagactcccc     11400
+     aaatatcacg gagtacatgt tctgtgccgg ctactcggat ggcagcaagg actcctgcaa     11460
+     gggggacagt ggaggcccac atgccaccca ctaccggggc acgtggtacc tgacgggcat     11520
+     cgtcagctgg ggccagggct gcgcaaccgt gggccacttt ggggtgtaca ccagggtctc     11580
+     ccagtacatc gagtggctgc aaaagctcat gcgctcagag ccacgcccag gagtcctcct     11640
+     gcgagcccca tttccctagc ccagcagccc tggcctgtgg agagaaagcc aaggctgcgt     11700
+     cgaactgtcc tggcaccaaa tcccatatat tcttctgcag ttaatggggt agaggagggc     11760
+     atgggaggga gggagaggtg gggagggaga cagagacaga aacagagaga gacagagaca     11820
+     gagagagact gagggagaga ctctgaggac atggagagag actcaaagag actccaagat     11880
+     tcaaagagac taatagagac acagagatgg aatagaaaag atgagaggca gaggcagaca     11940
+     ggcgctggac agaggggcag gggagtgcca aggttgtcct ggaggcagac agcccagctg     12000
+     agcctcctta cctcccttca gccaagcccc acctgcacgt gatctgctgg ccctcaggct     12060
+     gctgctctgc cttcattgct ggagacagta gaggcatgaa cacacatgga tgcacacaca     12120
+     cacacgccaa tgcacacaca cagagatatg cacacacacg gatgcacaca cagatggtca     12180
+     cacagagata cgcaaacaca ccgatgcaca cgcacataga gatatgcaca cacagatgca     12240
+     cacacagata tacacatgga tgcacgcaca tgccaatgca cgcacacatc agtgcacacg     12300
+     gatgcacaga gatatgcaca caccgatgtg cgcacacaca gatatgcaca cacatggatg     12360
+     agcacacaca caccaagtgc gcacacacac cgatgtacac acacagatgc acacacagat     12420
+     gcacacacac cgatgctgac tccatgtgtg ctgtcctctg aaggcggttg tttagctctc     12480
+     acttttctgg ttcttatcca ttatcatctt cacttcagac aattcagaag catcaccatg     12540
+     catggtggcg aatgccccca aactctcccc caaatgtatt tctcccttcg ctgggtgccg     12600
+     ggctgcacag actattcccc acctgcttcc cagcttcaca ataaacggct gcgtctcctc     12660
+     cgcacacctg tggtgcctgc cacccactgg gttgcccatg attcattttt ggagcccccg     12720
+     gtgctcatcc tctgagatgc tcttttcttt cacaattttc aacatcactg aaatgaaccc     12780
+     tcacatggaa gctatttttt aaaaacaaaa gctgtttgat agatgtttga ggctgtagct     12840
+     cccaggatcc                                                            12850
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/footprinter.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/footprinter.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/footprinter.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,262 @@
+>TETRAODON
+tcgtcaatgccctaatataattcttagtaaaaatacaggtataaaaatgcaacgtattttaatacaggcattttaataca
+                                                                                
+                                                                                
+
+ggtatacaaatggcacgtattgtttgaaacctttaaacatgtgataagtgttaacacgtacactagccgtgtgtttttac
+                                                                                
+                                                                                
+
+tggaactgctcaaacaatacgtggtgtttaagtttacttttgatgtacttttgtagtgttggatgtacttttgctgaccg
+                                                                                
+                                                                                
+
+ttatcccagttctttagttttcacaacagctgttgtgtttgttgtcttacgtaacgtggtgcagatgtcgccaatgccgg
+                                                                                
+                                                                                
+
+agagcctgtttcccagcaggcccgccccttttacaggatgcaccatatttggaaaatgacgtactggggatcccgtttct
+                               2222222222222222222222                           
+                               2222222222211111111111                           
+
+ggaatattccatttattggaggtaccattaaacgacaacttttacgttagaatttccgctttaaacggtaaaaattctca
+                                                                                
+                                                                                
+
+aatatgttaagtttttgcgtgttagatagaagaacgtgagtggttttgtgttactgccgcgttgttaggtcggtgttttg
+                                                                                
+                                                                                
+
+gtgtttgcttcgcggcgtttgaaagggaacgctccccttttgctgacgtcatcagctcaggctatataagcgcgcgtgcg
+                                                                                
+                                                                                
+
+gcgactccgccgaaagtttcagacgagagcaagagaagccagagcacgtccgcgggctcacacaccgacctgaccgaaga
+                                                                                
+                                                                                
+
+ttccctcctttccagggttgaaatcccgaccgcagcctttgtttggatttaccagcttgctttttttcgcccggacagcg
+                                                                                
+                                                                                
+
+
+
+
+
+>CHICKEN
+ggctctccggagggccacgcggccccggggccgccccgcactgcttcccggccctccgtgcgcgtgaccccgccgggcgg
+                                                                                
+                                                                                
+
+acgcgtccccgccgcgagccgcgacggccccgcacacgcggcacccgcccgccgcagggcacggggacgaagggcgccgc
+                                                                                
+                                                                                
+
+gccccgccgcgccccacgcgtggccgcgcccggccccgttagtacggaacccccgccgggccgctcggggcggggggtgc
+                                                                                
+                                                                                
+
+cggcggccggggggcgctacgcgggggggggggggggctggggcggccccgtcccaccgcgtcccgtcccgcgccgcact
+                                                                                
+                                                                                
+
+tggccgcagcctggagcgcttcccgtcagtcacgccccctcgcacaggatgtatcccatataaggatatctgcgtcagtg
+                                          22222222222  22222222222              
+                                          22222222222  11111111111              
+
+ggttcccgagcccggccgtaccactccgcgaggggggggggaaacccggaaccgctccccccgccccgccgcgctcctcc
+                                                                                
+                                                                                
+
+cgccccgcggccccgttgttcccggcgcccggcggctcggcagggcgggcggcggggccgggcggggcgggacgagacgg
+                                                                                
+                                                                                
+
+acgcgggcagagaggccccctccgcgggcgggcgtggtgcgtcccacgtgacgtaggcgattcacgcggcggcagggtat
+                                                                                
+                                                                                
+
+aaagggggcggcgccgagcgatactccagcggcagaggcagctccgacagcggagagagagacgcgcggcgcggaccgga
+                                                                                
+                                                                                
+
+cacagggacgcagccggcccctcttcgccttctcccgcagggcgctccgctagcaccgcacggccccgcaaacagccgac
+                                                                                
+                                                                                
+
+
+
+
+
+>MOUSE
+ataccagagactcaaaaaaaaaaaaaaagttccagattgctggacaatgacccgggtctcatcccttgaccctgggaacc
+                                                                                
+                                                                                
+
+gggtccacattgaatcaggtgcgaatgttcgctcgccttctctgcctttcccgcctcccctcccccggccgcggccccgg
+                                                                                
+                                                                                
+
+ttccccccctgcgctgcaccctcagagttggctgcagccggcgagctgttcccgtcaatccctccctcctttacacagga
+                                                                         2222222
+                                                                         2222222
+
+tgtccatattaggacatctgcgtcagcaggtttccacggccggtccctgttgttctgggggggggaccatctccgaaatc
+22222222222222                                                                  
+22211111111111                                                                  
+
+ctacacgcggaaggtctaggagaccccctaagatcccaaatgtgaacactcataggtgaaagatgtatgccaagacgggg
+                                                                                
+                                                                                
+
+gttgaaagcctggggcgtagagttgacgacagagcgcccgcagagggccttggggcgcgcttccccccccttccagttcc
+                                                                                
+                                                                                
+
+gcccagtgacgtaggaagtccatccattcacagcgcttctataaaggcgccagctgaggcgcctactactccaaccgcga
+                                                                                
+                                                                                
+
+ctgcagcgagcaactgagaagactggatagagccggcggttccgcgaacgagcagtgaccgcgctcccacccagctctgc
+                                                                                
+                                                                                
+
+tctgcagctcccaccagtgtctacccctggaccccttgccgggctttccccaaacttcgacc
+                                                              
+                                                              
+
+>HAMSTER
+ggctgagaaaattataacatttttttttgcactgatttgcggtgggcaagcttttcaagcctttagggctgcgtaccagt
+                                                                                
+                                                                                
+
+ttctcctaatatcagattcagaaatgtcccaaattcctggacaatggaaaggaggcgacccccgccccccccaccccagt
+                                                                                
+                                                                                
+
+ttcatggccctgacctgggaacagggtccacattgaatcaggtgcgaatgttcgctcgccttctctacctttcccgcctc
+                                                                                
+                                                                                
+
+ccctcccccggccgcggccccagctcccccctcccccgcgctgcaccctcagagttggctgcagccggcaagcagttccc
+                                                                                
+                                                                                
+
+gtcaatccctttcccacaggatgtccatattaggacatctgcgtcagcaggtttccacggccggtccttgtagacctggg
+              222222222222222222222                                             
+              222222222211111111111                                             
+
+ggtgacgatccccaaaatcctacatgcggagagtccaggagaccccctaagacccctattgtgaacacaaatgggtgaaa
+                                                                                
+                                                                                
+
+attacatgtcaagacggaggcgggggacccggggcgcggagttgacgccagggcggccgcagaaggcctgggggcgcgcg
+                                                                                
+                                                                                
+
+gctcccctccgtcgccacagttccgcccagtgacgtgtaatgttcattcacaagcgcttctataaaggcaccggctgagg
+                                                                                
+                                                                                
+
+cgcctactactccaaccgcgactgcagctagcgactgagaagactggatagagccggcggcgggaacgagcagtgaccgc
+                                                                                
+                                                                                
+
+gctcccacccagctctgctccgcagctcgcaccagtgtctacccctggacccctcgccggactttccctagatctcgacc
+                                                                                
+                                                                                
+
+
+
+
+
+>HUMAN
+agaattggggatgggggtaggggcgcattccttcgggagccgaggcttaagtcctcggggtcctgtactcgatgccgttt
+                                                                                
+                                                                                
+
+ctcctatctctgagcctcagaactgtcttcagtttccgtacaagggtaaaaaggcgctctctgccccatcccccccgacc
+                                                                                
+                                                                                
+
+tcgggaacaagggtccgcattgaaccaggtgcgaatgttctctctcattctgcgccgttcccgcctcccctcccccagcc
+                                                                                
+                                                                                
+
+gcggcccccgcctccccccgcactgcaccctcggtgttggctgcagcccgcgagcagttcccgtcaatccctcccccctt
+                                                                                
+                                                                                
+
+acacaggatgtccatattaggacatctgcgtcagcaggtttccacggcctttccctgtagccctggggggagccatcccc
+ 222222222222222222222                                                          
+ 222222222211111111111                                                          
+
+gaaacccctcatcttggggggcccacgagacctctgagacaggaactgcgaaatgctcacgagattaggacacgcgccaa
+                                                                                
+                                                                                
+
+ggcgggggcagggagctgcgagcgctggggacgcagccgggcggccgcagaagcgcccaggcccgcgcgccacccctctg
+                                                                                
+                                                                                
+
+gcgccaccgtggttgagcccgtgacgtttacactcattcataaaacgcttgttataaaagcagtggctgcggcgcctcgt
+                                                                                
+                                                                                
+
+actccaaccgcatctgcagcgagcaactgagaagccaagactgagccggcggccgcggcgcagcgaacgagcagtgaccg
+                                                                                
+                                                                                
+
+tgctcctacccagctctgcttcacagcgcccacctgtctccgcccctcggcccctcgcccggctttgcctaaccgccacg
+                                                                                
+                                                                                
+
+
+
+
+
+>PIG
+gcaccgaattagtgatagaatgaggaatgggggtggaggcgcattccttcgggggccaaggcttaagcccaaggggctgt
+                                                                                
+                                                                                
+
+gtacctatctcgcctatacctgaacctcagaactcttcccgggtttctgtacacaagtaaaaaggcgcctacgccccatc
+                                                                                
+                                                                                
+
+ccccaaccccgggaacaagggtccgcattgaaccaggtgcgaatgttcgctcgccttctctgcctttcccgcctcccctc
+                                                                                
+                                                                                
+
+cccccggccacggcccccgcctcccccccgcgctgcacccttggtgttggctgcagcccgcgagcagttcccgtcaatcc
+                                                                                
+                                                                                
+
+ctccctcccgtttacacaggatgtccatattaggacatctgcgtcagcaggtttccacggccgttccctgaagttgtggg
+              222222222222222222222                                             
+              222222222211111111111                                             
+
+gggagccatccccgaagtccctcattttagggggtctacgcgaccccaagaccgagactgagaaaagctcagaagataaa
+                                                                                
+                                                                                
+
+gaaacacaataagacgaaaggcagggggctgagacggcggaagaggcagcaaagaggctgcagaggtggcaagatccgag
+                                                                                
+                                                                                
+
+cgccacccctcgagagctgcaatggtagcgctccgtgacgtattgggctcattcataaaatgctgttataaaagcagtgg
+                                                                                
+                                                                                
+
+cctagtactccaaccgcatctgcagcgagcagctaagctgagacagaaccggcagcggcgcagcgagcgagcagcgaccg
+                                                                                
+                                                                                
+
+cgctcttacccagctcagccccgcagctcctacctgtctccgcccctcagcccctcgcccggctttgactacctgcgatc
+                                                                                
+                                                                                
+
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/frac_problems.blast
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/frac_problems.blast	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/frac_problems.blast	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,485 @@
+BLASTN 2.2.14 [May-07-2006]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+Database: LTR_STRUC_8x_na 
+           791 sequences; 6,808,925 total letters
+
+Searching
+
+Query= jgi|Phypa1|75037|fgenesh1_pg.scaffold_51000008
+         (2511 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+scaffold_5_1_Scaffolds_seq10928_PT_B1_L1_270_rprt_txt                1356   0.0  
+
+>scaffold_5_1_Scaffolds_seq10928_PT_B1_L1_270_rprt_txt
+          Length = 16463
+
+ Score = 1356 bits (684), Expect = 0.0
+ Identities = 684/684 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1088 ctacaatacgaggtacaataggttgggaggatcctgttgaatctctttcagttcatgcat 1147
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8914 ctacaatacgaggtacaataggttgggaggatcctgttgaatctctttcagttcatgcat 8973
+
+                                                                        
+Query: 1148 atattgctaagagccaacatgaagctttgatggaagaaaagaaacgaggaaattttgatg 1207
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8974 atattgctaagagccaacatgaagctttgatggaagaaaagaaacgaggaaattttgatg 9033
+
+                                                                        
+Query: 1208 atacgagagaaggaaattcttctaagaggcaaactcgaggagacaaggctcgggaggcag 1267
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9034 atacgagagaaggaaattcttctaagaggcaaactcgaggagacaaggctcgggaggcag 9093
+
+                                                                        
+Query: 1268 cttcacaagaattgcctctaaaggatacctcaacatcattggaagaaaaaacaagggaga 1327
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9094 cttcacaagaattgcctctaaaggatacctcaacatcattggaagaaaaaacaagggaga 9153
+
+                                                                        
+Query: 1328 caaatgataagggcaaatcaattgcttataaactactttctgacattgaagtggcaacaa 1387
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9154 caaatgataagggcaaatcaattgcttataaactactttctgacattgaagtggcaacaa 9213
+
+                                                                        
+Query: 1388 atttgaaaggggtgttggaagaacgtatattaaatgctaaggtggaattcacattgaaag 1447
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9214 atttgaaaggggtgttggaagaacgtatattaaatgctaaggtggaattcacattgaaag 9273
+
+                                                                        
+Query: 1448 aagttttggaaattgcaaagaaggagtttcatgatgttatcattgacagcattaagcaaa 1507
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9274 aagttttggaaattgcaaagaaggagtttcatgatgttatcattgacagcattaagcaaa 9333
+
+                                                                        
+Query: 1508 agagaaaattaatggacaaaactggaatgagtcatactattgatgcaagaatatataaag 1567
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9334 agagaaaattaatggacaaaactggaatgagtcatactattgatgcaagaatatataaag 9393
+
+                                                                        
+Query: 1568 atgaagaagaggttgatattggttacaagcaactaaaaaatgaaaagaataggtataatc 1627
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9394 atgaagaagaggttgatattggttacaagcaactaaaaaatgaaaagaataggtataatc 9453
+
+                                                                        
+Query: 1628 aacgaatacgttttgaagattatattgataaggaaatggaaaccttgagccattatactc 1687
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9454 aacgaatacgttttgaagattatattgataaggaaatggaaaccttgagccattatactc 9513
+
+                                                                        
+Query: 1688 gaaagcattggacaagagcaactaatgaggtattggtgaaagtaggagatatagaagaac 1747
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9514 gaaagcattggacaagagcaactaatgaggtattggtgaaagtaggagatatagaagaac 9573
+
+                                    
+Query: 1748 caattgttgttttagttgaccatg 1771
+            ||||||||||||||||||||||||
+Sbjct: 9574 caattgttgttttagttgaccatg 9597
+
+
+
+ Score = 1154 bits (582), Expect = 0.0
+ Identities = 582/582 (100%)
+ Strand = Plus / Plus
+
+                                                                         
+Query: 1930  ccagattatggaggtacaaaagaggaggtactctctcgagatgaaggtgccattgatgta 1989
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10015 ccagattatggaggtacaaaagaggaggtactctctcgagatgaaggtgccattgatgta 10074
+
+                                                                         
+Query: 1990  agttttattggtattttggagaatgaaaatattatgtataagaaagttgatgattttaaa 2049
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10075 agttttattggtattttggagaatgaaaatattatgtataagaaagttgatgattttaaa 10134
+
+                                                                         
+Query: 2050  agaaaagaaagaaaaaaaatatatcatattgaaggagaattatgcaagttaggtacattg 2109
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10135 agaaaagaaagaaaaaaaatatatcatattgaaggagaattatgcaagttaggtacattg 10194
+
+                                                                         
+Query: 2110  caagatgatgagaggattttaggtgtttatgaagaaaatgggctggaagattcattggaa 2169
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10195 caagatgatgagaggattttaggtgtttatgaagaaaatgggctggaagattcattggaa 10254
+
+                                                                         
+Query: 2170  ttgattacatctattcggttagaagataatgataagattgttgagattcattcgaaagaa 2229
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10255 ttgattacatctattcggttagaagataatgataagattgttgagattcattcgaaagaa 10314
+
+                                                                         
+Query: 2230  gtttataccatacttgaatcttttcaagcaccagaagtcactatagagacaagatataaa 2289
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10315 gtttataccatacttgaatcttttcaagcaccagaagtcactatagagacaagatataaa 10374
+
+                                                                         
+Query: 2290  atggcagataagaagataaagcctgttgccggaccattaccaaaagattctaaggaacaa 2349
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10375 atggcagataagaagataaagcctgttgccggaccattaccaaaagattctaaggaacaa 10434
+
+                                                                         
+Query: 2350  atgggagaagaatcaaaagaagcaagtttaagggatccaatgagtattggacatcagttt 2409
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10435 atgggagaagaatcaaaagaagcaagtttaagggatccaatgagtattggacatcagttt 10494
+
+                                                                         
+Query: 2410  actaaagagacttttgaagaattgaagattgattctgatggttctttattgccggaagaa 2469
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10495 actaaagagacttttgaagaattgaagattgattctgatggttctttattgccggaagaa 10554
+
+                                                       
+Query: 2470  attacttacttcaaaaagatgttggctaaacaaggtagatag 2511
+             ||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 10555 attacttacttcaaaaagatgttggctaaacaaggtagatag 10596
+
+
+
+ Score =  918 bits (463), Expect = 0.0
+ Identities = 463/463 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 336  ggaatatttcaaggtatttacgataccaaagataagagaccatattacattgattacgga 395
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7931 ggaatatttcaaggtatttacgataccaaagataagagaccatattacattgattacgga 7990
+
+                                                                        
+Query: 396  tcgttatggaaattcatgggaggttttttcacatgcattgaaagatgaatatttcttgga 455
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7991 tcgttatggaaattcatgggaggttttttcacatgcattgaaagatgaatatttcttgga 8050
+
+                                                                        
+Query: 456  agatgctgatcacattactaagaagttgtttctggaatggattgaacggcctaataagaa 515
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8051 agatgctgatcacattactaagaagttgtttctggaatggattgaacggcctaataagaa 8110
+
+                                                                        
+Query: 516  tttacaagcaacagagctactaagggaatttgagaggcagtactctcaattattgaaggt 575
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8111 tttacaagcaacagagctactaagggaatttgagaggcagtactctcaattattgaaggt 8170
+
+                                                                        
+Query: 576  agaaaaattgacattggagccgaacaaatttgatctatttcttcaagctgctgatggaga 635
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8171 agaaaaattgacattggagccgaacaaatttgatctatttcttcaagctgctgatggaga 8230
+
+                                                                        
+Query: 636  attgcaagggaaattagaattccttcttgaagacaaagaagaaggtgaaggattgacaac 695
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8231 attgcaagggaaattagaattccttcttgaagacaaagaagaaggtgaaggattgacaac 8290
+
+                                                                        
+Query: 696  aaagtggaagaacattgaagatgcggtggagttgcttacaaaaagagaaaggaggaaaga 755
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8291 aaagtggaagaacattgaagatgcggtggagttgcttacaaaaagagaaaggaggaaaga 8350
+
+                                                       
+Query: 756  taggagtaatattccgaaaatagtacaagcacccaaagctcca 798
+            |||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8351 taggagtaatattccgaaaatagtacaagcacccaaagctcca 8393
+
+
+
+ Score =  573 bits (289), Expect = e-163
+ Identities = 289/289 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 799  aatgtttcaaaacaaggatatgtacaaagatgcatttggtgtgatgatgctttatacact 858
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8550 aatgtttcaaaacaaggatatgtacaaagatgcatttggtgtgatgatgctttatacact 8609
+
+                                                                        
+Query: 859  aggaaagattgcaatgaatttaataatatgatacggcaaggaattatttgctggaaagat 918
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8610 aggaaagattgcaatgaatttaataatatgatacggcaaggaattatttgctggaaagat 8669
+
+                                                                        
+Query: 919  ggaaagatagcattaaaagatagggaggatcttcttctaaccaactttgacaagagagga 978
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8670 ggaaagatagcattaaaagatagggaggatcttcttctaaccaactttgacaagagagga 8729
+
+                                                                        
+Query: 979  atgagagcattggttcaagattacttaaaagagcatgaaacagcagcacgagaaagtaca 1038
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8730 atgagagcattggttcaagattacttaaaagagcatgaaacagcagcacgagaaagtaca 8789
+
+                                                             
+Query: 1039 agttatggtgctagagtagatgatgaccttagtggaagtacggaaacta 1087
+            |||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 8790 agttatggtgctagagtagatgatgaccttagtggaagtacggaaacta 8838
+
+
+
+ Score =  315 bits (159), Expect = 2e-85
+ Identities = 159/159 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1771 gctaataacacatggggcgaattatatggagctcgtccagatgtcaaaatttggattggg 1830
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9684 gctaataacacatggggcgaattatatggagctcgtccagatgtcaaaatttggattggg 9743
+
+                                                                        
+Query: 1831 gatgttgcgacggagcaacatttctttgttcaagatacaacgtcttatccgttgatttta 1890
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 9744 gatgttgcgacggagcaacatttctttgttcaagatacaacgtcttatccgttgatttta 9803
+
+                                                   
+Query: 1891 gaacaaccttatattatggcaatacggatggaaacaaaa 1929
+            |||||||||||||||||||||||||||||||||||||||
+Sbjct: 9804 gaacaaccttatattatggcaatacggatggaaacaaaa 9842
+
+
+
+ Score =  230 bits (116), Expect = 7e-60
+ Identities = 116/116 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 95   acagtcagtggatacaagatcgtgagtataaagcggtatcgaagattagcagattgggtt 154
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7360 acagtcagtggatacaagatcgtgagtataaagcggtatcgaagattagcagattgggtt 7419
+
+                                                                    
+Query: 155  gttcgggattctttgtaacgtctagagacctaccggctgaggcaaaaaggattttg 210
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7420 gttcgggattctttgtaacgtctagagacctaccggctgaggcaaaaaggattttg 7475
+
+
+
+ Score =  186 bits (94), Expect = 1e-46
+ Identities = 94/94 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    atggtgtttaattgggcagcttatgtagcaactcgtattcatgcggaaatgggagccaag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 6461 atggtgtttaattgggcagcttatgtagcaactcgtattcatgcggaaatgggagccaag 6520
+
+                                              
+Query: 61   tgcaaaatagagaagtttacagtgttgtcgtatt 94
+            ||||||||||||||||||||||||||||||||||
+Sbjct: 6521 tgcaaaatagagaagtttacagtgttgtcgtatt 6554
+
+
+
+ Score =  182 bits (92), Expect = 2e-45
+ Identities = 110/116 (94%)
+ Strand = Plus / Plus
+
+                                                                         
+Query: 95    acagtcagtggatacaagatcgtgagtataaagcggtatcgaagattagcagattgggtt 154
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 14136 acagtcagtggatacaagatcgtgagtataaagcggtatcgaagattagcagattgggtt 14195
+
+                                                                     
+Query: 155   gttcgggattctttgtaacgtctagagacctaccggctgaggcaaaaaggattttg 210
+             ||||||| |||| ||||| ||||||||||||||| |||| | ||||||||||||||
+Sbjct: 14196 gttcggggttctctgtaatgtctagagacctaccagctgggacaaaaaggattttg 14251
+
+
+
+ Score =  139 bits (70), Expect = 2e-32
+ Identities = 70/70 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 210  gagaacgaaaataaaaattttgaaggatctttgggaggagtttctggtggaggttcttct 269
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7717 gagaacgaaaataaaaattttgaaggatctttgggaggagtttctggtggaggttcttct 7776
+
+                      
+Query: 270  caaggacaag 279
+            ||||||||||
+Sbjct: 7777 caaggacaag 7786
+
+
+
+ Score =  119 bits (60), Expect = 2e-26
+ Identities = 60/60 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 278  agcaacagcaaagcaatgatcaagaggaagctttggcaaccaaagcattgcaagcagtgg 337
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 7844 agcaacagcaaagcaatgatcaagaggaagctttggcaaccaaagcattgcaagcagtgg 7903
+
+
+  Database: LTR_STRUC_8x_na
+    Posted date:  Sep 4, 2006  3:29 PM
+  Number of letters in database: 6,808,925
+  Number of sequences in database:  791
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Sequences: 791
+Number of Hits to DB: 350,368
+Number of extensions: 23312
+Number of successful extensions: 1851
+Number of sequences better than 1.0e-04: 233
+Number of HSP's gapped: 1699
+Number of HSP's successfully gapped: 1236
+Length of query: 2511
+Length of database: 6,808,925
+Length adjustment: 17
+Effective length of query: 2494
+Effective length of database: 6,795,478
+Effective search space: 16947922132
+Effective search space used: 16947922132
+X1: 11 (21.8 bits)
+X2: 15 (29.7 bits)
+X3: 25 (49.6 bits)
+S1: 15 (30.2 bits)
+S2: 24 (48.1 bits)
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+Database: LTR_STRUC_8x_na 
+           791 sequences; 6,808,925 total letters
+
+Searching
+
+Query= jgi|Phypa1|96131|fgenesh1_pg.scaffold_296000015
+         (456 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+scaffold_5_6_Scaffolds_seq_8392_PT_B8_L9_282_rprt_txt                 276   2e-74
+
+>scaffold_5_6_Scaffolds_seq_8392_PT_B8_L9_282_rprt_txt
+          Length = 8149
+
+ Score =  276 bits (139), Expect = 2e-74
+ Identities = 157/163 (96%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 1    atgaatatgaaagaccttaatgaaggtacagagattccacagcaaaaagatgaaagtgat 60
+            ||||||||| |||||||||||||||||||||||||||| |||||| ||||||||||||||
+Sbjct: 1524 atgaatatggaagaccttaatgaaggtacagagattccgcagcaagaagatgaaagtgat 1465
+
+                                                                        
+Query: 61   atggataatgaaggtgacgaagacgaactggacaaagacgtaggggatatattcagaatc 120
+            |||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||
+Sbjct: 1464 atggataatgaaggtgacgaagacgaactggacaaagacgtaggggatatattctgaatc 1405
+
+                                                       
+Query: 121  aaacagcaattgggtaaattggtgcataccggaggatgcacag 163
+            ||| ||||||||| |||||||||||||||||||||||||||||
+Sbjct: 1404 aaatagcaattggataaattggtgcataccggaggatgcacag 1362
+
+
+
+ Score =  151 bits (76), Expect = 9e-37
+ Identities = 100/108 (92%)
+ Strand = Plus / Minus
+
+                                                                        
+Query: 164  tcgtttatattccagcagtcaaaactgatagactgcttgactatcgacgaatctccatac 223
+            ||||||||||||||||||| |||||||||||||||||||||||||| || ||||||||||
+Sbjct: 6919 tcgtttatattccagcagtaaaaactgatagactgcttgactatcggcggatctccatac 6860
+
+                                                            
+Query: 224  taccttctacagctagacgcccgacgcgttctcgagcaggaaagcacg 271
+            ||||||||| ||| ||| ||||| ||||||||||||||||| ||||||
+Sbjct: 6859 taccttctagagccagaggcccggcgcgttctcgagcaggagagcacg 6812
+
+
+
+ Score =  143 bits (72), Expect = 2e-34
+ Identities = 96/104 (92%)
+ Strand = Plus / Minus
+
+                                                                       
+Query: 161 cagtcgtttatattccagcagtcaaaactgatagactgcttgactatcgacgaatctcca 220
+           |||||||||||||| ||||||| |||||||||||||||||||||||||| || |||||||
+Sbjct: 120 cagtcgtttatatttcagcagtaaaaactgatagactgcttgactatcggcggatctcca 61
+
+                                                       
+Query: 221 tactaccttctacagctagacgcccgacgcgttctcgagcagga 264
+           |||||||||||| ||| ||||||||| | |||||||||||||||
+Sbjct: 60  tactaccttctagagccagacgcccggctcgttctcgagcagga 17
+
+
+
+ Score = 60.0 bits (30), Expect = 3e-09
+ Identities = 42/46 (91%)
+ Strand = Plus / Minus
+
+                                                         
+Query: 312 ggagccaattctagatccaagacggaagattgaagaaattcagagg 357
+           |||||||||||  |||| |||| |||||||||||||||||||||||
+Sbjct: 739 ggagccaattccggatctaagatggaagattgaagaaattcagagg 694
+
+
+
+ Score = 52.0 bits (26), Expect = 6e-07
+ Identities = 41/46 (89%)
+ Strand = Plus / Minus
+
+                                                          
+Query: 312  ggagccaattctagatccaagacggaagattgaagaaattcagagg 357
+            ||||||||||   |||| |||| |||||||||||||||||||||||
+Sbjct: 7543 ggagccaatttcggatctaagatggaagattgaagaaattcagagg 7498
+
+
+  Database: LTR_STRUC_8x_na
+    Posted date:  Sep 4, 2006  3:29 PM
+  Number of letters in database: 6,808,925
+  Number of sequences in database:  791
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Sequences: 791
+Number of Hits to DB: 50,062
+Number of extensions: 3603
+Number of successful extensions: 1243
+Number of sequences better than 1.0e-04: 157
+Number of HSP's gapped: 985
+Number of HSP's successfully gapped: 621
+Length of query: 456
+Length of database: 6,808,925
+Length adjustment: 16
+Effective length of query: 440
+Effective length of database: 6,796,269
+Effective search space: 2990358360
+Effective search space used: 2990358360
+X1: 11 (21.8 bits)
+X2: 15 (29.7 bits)
+X3: 25 (49.6 bits)
+S1: 14 (28.2 bits)
+S2: 23 (46.1 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/geneid_1.0.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/geneid_1.0.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/geneid_1.0.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,37 @@
+## source-version: GeneId v 3.0 EvoI -- geneid at imim.es
+## date Tue Jun 13 10:17:01 2000
+# Optimal Gene Structure. 3 genes. Score = 140.109665 
+# Gene 1(Forward). 2 exons. 155 aa. Score = 36.870944 
+   First     6090     6155	 1.40	+ 0 0	 2.15	 3.63	12.34	 0.00	AA   0: 22 gene_1
+Terminal     6881     7276	35.47	+ 0 0	 3.33	 2.30	97.73	 0.00	AA  23:155 gene_1
+
+>10|GeneId_predicted_protein_1|156_AA
+MVVKAVCVINGDAKGTVFFEQESSGTPVKVSGEVCGLAKGLHGFHVHEFGDNTNGCMSSG
+PHFNPYGKEHGAPVDENRHLGDLGNIEATGDCPTKVNITDSKITLFGADSIIGRTVVVHA
+DADDLGQGGHELSKSTGNAGARIGCGVIGIAKV*
+
+# Gene 2(Reverse). 5 exons. 401 aa. Score = 70.179051 
+Terminal     9100     9217	 5.49	- 1 0	 0.09	 6.09	21.94	 0.00	AA 362:401 gene_2
+Internal     9294     9387	 0.61	- 2 2	 0.89	 4.23	11.34	 0.00	AA 331:362 gene_2
+Internal     9479    10085	46.13	- 0 1	 3.97	 5.00	119.37	 0.00	AA 128:331 gene_2
+Internal    10140    10273	 6.48	- 2 0	 3.37	 2.05	25.56	 0.00	AA  83:127 gene_2
+   First    10334    10580	11.47	- 0 1	 5.04	-1.96	41.57	 0.00	AA   0: 83 gene_2
+
+>10|GeneId_predicted_protein_2|402_AA
+MQETGEHQHVLSRNANRSSVLRFNSNPYRNDHDDSAVPLTTEVGKIYGEYLMLDKLLDAQ
+CMLSEEDKRPVHDEHLFIITHQAYELWFKQIIFEFDSIRDMLDAEVIDETKTLEIVKRLN
+RVVLILKLLVDQVPILETMTPLDFMDFRKYLAPASGFQSLQFRLIENKLGVLTEQRVRYN
+QKYSDVFSDEEARNSIRNSEKDPSLLELVQRWLERTPGLEESGFNFWAKFQESVDRFLEA
+QVQSAMEEPVEKAKNYRLMDIEKRREVYRSIFDPAVHDALVRRGDRRFSHRALQGAIMIT
+FYRDEPRFSQPHQLLTLLMDIDSLITKWRYNHVIMVQRMIGSQQLGTGGSSGYQYLRSTL
+SDRYKVFLDLFNLSTFLIPREAIPPLDETIRKKLINKSV*
+
+# Gene 3(Reverse). 1 exons. 187 aa. Score = 33.059670 
+  Single    20955    21515	33.06	- 0 0	 0.48	-2.76	103.58	 0.00	AA   0:187 gene_3
+
+>10|GeneId_predicted_protein_3|188_AA
+MVPSTVEKIVEVDKHIGCATSGLMADARTLIERARVECQNHWFVYNERMSIESCAQAVST
+LAIQFGDSGDSDGAAAMSRPFGVAILFAGIEAGQPQLWHMDPSGTFVGHGAKAIGSGSEG
+AQQNLQDLFRPDLTLDEAIDISLNTLKQVMEEKLNSTNVEVMTMTKEREFYMFTKEEVEQ
+HIKNIA*
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genemark.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genemark.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genemark.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,151 @@
+GeneMark.hmm (Version 2.2a)
+Sequence name: Hvrn.contig8
+Sequence length: 50124 bp
+G+C content: 44.82%
+Matrices file: /home/software/analysis/gene-prediction/genemark/matdir/osativa.mtx (Oryza sativa)
+Thu Mar 22 10:25:00 2001
+
+Predicted genes/exons
+
+Gene Exon Strand Exon           Exon Range     Exon      Start/End
+  #    #         Type                         Length       Frame
+  1     1   -  Initial       1805      2176     372          3 1
+
+  2     5   -  Terminal      3108      3229     122          3 2
+  2     4   -  Internal      3869      4501     633          1 2
+  2     3   -  Internal      4820      4888      69          1 2
+  2     2   -  Internal      4981      5061      81          1 2
+  2     1   -  Initial       5296      5656     361          1 1
+
+  3     2   -  Terminal      7171      7288     118          3 3
+  3     1   -  Initial       7540      7787     248          2 1
+
+  4     1   +  Single       15431     15757     327          1 3
+
+  5     1   +  Initial      17526     17696     171          1 3
+  5     2   +  Internal     17772     17887     116          1 2
+  5     3   +  Internal     18005     18074      70          3 3
+  5     4   +  Internal     18456     18539      84          1 3
+  5     5   +  Internal     18628     18714      87          1 3
+  5     6   +  Internal     18807     18870      64          1 1
+  5     7   +  Internal     19944     20038      95          2 3
+  5     8   +  Internal     20139     20293     155          1 2
+  5     9   +  Terminal     20779     20788      10          3 3
+
+  6     5   -  Terminal     23000     23061      62          3 2
+  6     4   -  Internal     23397     24101     705          1 2
+  6     3   -  Internal     24708     24821     114          1 2
+  6     2   -  Internal     25079     25356     278          1 3
+  6     1   -  Initial      26970     26977       8          2 1
+
+  7     3   -  Terminal     34218     34310      93          3 1
+  7     2   -  Internal     35900     36301     402          3 1
+  7     1   -  Initial      36392     36448      57          3 1
+
+  8     1   +  Initial      36531     37064     534          1 3
+  8     2   +  Terminal     37153     37161       9          1 3
+
+  9     3   -  Terminal     37880     37917      38          3 2
+  9     2   -  Internal     38938     39006      69          1 2
+  9     1   -  Initial      39080     40214    1135          1 1
+
+ 10     2   -  Terminal     41091     41554     464          3 2
+ 10     1   -  Initial      41635     41713      79          1 1
+ 11     1   -  Single       41744     42061     318          3 1
+
+ 12     1   +  Initial      42171     42212      42          1 3
+ 12     2   +  Terminal     42432     42824     393          1 3
+
+ 13     7   -  Terminal     43798     43932     135          3 1
+ 13     6   -  Internal     44220     44297      78          3 1
+ 13     5   -  Internal     47595     47685      91          3 3
+ 13     4   -  Internal     48393     48526     134          2 1
+ 13     3   -  Internal     48643     49024     382          3 3
+ 13     2   -  Internal     49118     49149      32          2 1
+ 13     1   -  Initial      49457     49507      51          3 1
+
+Predicted gene sequence(s):
+
+>Hvrn.contig8|GeneMark.hmm|gene 1|124_aa
+MEVAVKGYADASFDTDPDDSKSQTGYVFILNGGAVSWCSSKQSVVADSRCEAEYMAALEA
+AKEGVWMKQFMTDLGVVSSALDPLTLLCDNTRAIALAKEPRFHNKTRHIKRRFNLIRDYV
+EGED
+
+>Hvrn.contig8|GeneMark.hmm|gene 2|421_aa
+MAHAKVTLNFNTFLEKAKLKDDGSNFVDWARNLKLLLQAGKKDYVLNVALGDEPPAAADQ
+DAKNAWLACKEDYSVVQCAVLYGLEPGLQRCFERHGAYEMFQELKFIFQKNARIERYETS
+ESELRKEHQVLMVNKATSFKRSGKGKKGYGSLEAQLSKYLAGKKAAKEKSENNGCSISMS
+NIFYGHAPNVRGLFILNLDSDNTHIHNIETKRVRVNNDSAMFLWHCRLGHIGVKRMKKLH
+TDGLLESLDFDSLDTCEPCLMGKMTKTPFSGTMERASDLLEIIHTDVCGPMSAEARGGYR
+YFLTFIDDLSRYGYVYLMKHKSETFEKFKQFQSEVENHRNKKIKFLRSDHGGEYLSFEFG
+AHLRQCGIVSQLTPLGTPQRNEAMVGPDSNKWLEAMKSEIGSMYGNKVWTLEVLPEGRKA
+I
+
+>Hvrn.contig8|GeneMark.hmm|gene 3|121_aa
+MVRRQRLIYRMTSFDYRKVFGHYRECTESDEWVPNVHREGPTHPGKPIGPRGGAPALGGL
+VGQPKRALCAKDRKSKRKKKRKRSRYFTTTGAPSRCRRTHLLIRLACWIKKAEIIIELYV
+C
+
+>Hvrn.contig8|GeneMark.hmm|gene 4|108_aa
+MFTTPKAGGGMYLCLSVGWGIVGRRRVMSGCGQGSEMGLVGLRTRRHWAKTGRGGAAGGA
+ASIGDGPRRAADKATLGEDGPGRGVGRGGVGRRRVASGGGDREEDEWS
+
+>Hvrn.contig8|GeneMark.hmm|gene 5|283_aa
+MDAAVQEAKLLRQVNALIVAHLRDQNLTQAAAAVAAATMTPKADASLPNHLLRLVAKGLA
+AEREEAARGGGAPPAFDSAGGGGLARPLGTSAVDFSVQNVRGPSKTFPKHETRHISDHKN
+VARCAKFSPDGKHFATGSGDTSIKFFEVSKIKQTMLGDSKEGPGRPVVRTFYDHVQLLTQ
+LLVHSTDKVSSFVTNIPGTDHPVAHLYDVNTFTCFLSANPQDSSAAINQVRYSGTGSMYV
+TASKDGSLRIWDGVSAECVRPIIGAHGSVEATSAIFTKDESGF
+
+>Hvrn.contig8|GeneMark.hmm|gene 6|388_aa
+MGSVVFLEGSEGNLQALKDTLQAYQVASAQKVNLQKSSILDGKGCRDEDKGTLKQTIGID
+SEALSERYSGLPTVVGRLKDGSFEYVRERSKGKVSGSVGKASVALQFPSSLCARVLKARY
+FKECTIMNTTCPNAMFWKVLSSEKWVPVAIPPVSEGPHGELASWLLRWFAEVGDPERELM
+VHAVYGLWLARNEARDGKRIVDPRVVEENVYQHIIEWNAIHMKKPRSTTPTLAVRWSPPE
+QGWLKANSDGALAKLRDRGGGGVVLRDHDGAYRGGACYVFRDVSDPEVVEILACRKAVHL
+AVQTGATRVHVEVDSKGMAAMLNDQAKNLSAAGPIVEEIKLLGRTLQGFIVSRVRRSGNH
+GAHLLAREVRSVYTHVILKQPLFDTCRL
+
+>Hvrn.contig8|GeneMark.hmm|gene 7|183_aa
+MVLTEKEAKGFVFSGPVEEAWGLHHDAQFRDLGNNLFLVHFGGEGDWKHSRNNGPWQFDF
+MILKGYDGKTRPSEMVFDSVEAWVRVEDLPLDRRTREFGEALGNWLGEVVKVDVERDGFA
+KGKYLRVRAKIFVYEPVVRYFNLKESVDDEVETAEGQAGPLEAEAEARRGASVSAHSFGR
+WGK
+
+>Hvrn.contig8|GeneMark.hmm|gene 8|180_aa
+MASTVSPWSETPQDILGLVIDRLHSSPDHEEPRLSAAWSRFLLAVPVAAANRRGFQRARR
+TRHSAAADRARFRAVCRSWHLAMRQHVSTPRVLPWIILSDGYFFTPSDNGCRAPRRLPSL
+PKNARCIGSTDGWLALDCTDARNVHTYLLHNPFSDTTVPLPELDPIIANVSEFFAVRKAA
+
+
+>Hvrn.contig8|GeneMark.hmm|gene 9|413_aa
+MPLKFWDETFSTAVYLINRVPSRVIHNQTPLERLFGLTPNYTFLRIFGCAVWPNLRPFNK
+HKLEYRSKQCVFIGYNYLHKGYKCLDVSTGRVYVSQDVIFDEHIFPFASLHPNAGAQLRA
+ELVLLPPTLLNLSSPLTPSAAPNDPMAISTIYAPTSANSVQDSAGISHDFMQPNVSTDLV
+ATENPGLHASESATAAPGAGDPPLQASGSAAAAPGSSPGFVHQPAASVGRSPASTSDPAR
+QPDASAARPPVSDPVRPTTVATALFPASDLVRSPQEIRLQRRAPPTAPWIGRGLPRVVGP
+PCLLPWTREISLDVVTRYRLLRLRPMQRRRCPMQRPPRLLFLLVCHLIRYLLTLRCPVVS
+STICNPCNQHLHPLGLILGEPENLKEAIADPKWKAAMDEEFDWAGCPDDRRST
+
+>Hvrn.contig8|GeneMark.hmm|gene 10|180_aa
+MAAAGKPLDDDELVSYILQGLDSDYNPEARIDAQNGSNTNSFSINLASKGGSRNNNDTRP
+SGPGGGNPAAYRGAGGGFFPNTLVAPPPSGGRDETCQICKRQGHATWHCFKRYDKNFNPP
+PKRQGGGGGNNSGGGGNSSGGNTKSANTVPAAYDVDTNWYLDTGAMDHVTGELEKLAMHD
+
+
+>Hvrn.contig8|GeneMark.hmm|gene 11|105_aa
+MGYLDGTMAEPPAVLTTETDVAGKKEISSTPNPAHVLWYTQDQQVLTFLLASLSRDVLLQ
+VHSLASATGVWTAIQQMFASHSRARHIQLRGQLGNTKKGDSPVAI
+
+>Hvrn.contig8|GeneMark.hmm|gene 12|144_aa
+MVELEEEDDMSMEEVALMTNNSNYLIILIRPGKGVWLPKPDTAPFNLFIDIVFLQGKLYG
+ITQAEDLASVSIDFDDCGMPTVTTVERLIKHPPLESCEFDVWSDAGEKLEADGDMGDEDQ
+VENGGEDHDEALNEVDARIQKENR
+
+>Hvrn.contig8|GeneMark.hmm|gene 13|300_aa
+MSTATSLWDKAALMMREELAVAAVVAGCLDMTKLYVVGAGMFSCVTVALYPVSVIKTRMQ
+VASGEAMRRNALATFKNILKVDGVPGLYRGFGTVITGAIPARIIFLTALEKTKATSLKLV
+EPLQLSESMEAALANGLGGLTASLCSQAVFVPIDVVSQKLMVQGYSGHVRYKGGIDVVQK
+IMKADGPRGLYRGFGLSVMTALGRLDDKEDTPSQLKIVGVQATGGMVAGATSLEDNPLSD
+NVPQFAETSSAGSPLEKERVRQRASATISVTRDCQCSRRPTIGGVRQLGRSLPMRRDGAT
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genewise.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genewise.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genewise.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,325 @@
+genewise $Name: branch-1-5-2 $ (unreleased release)
+This program is freely distributed under a GPL. See source directory
+Copyright (c) GRL limited: portions of the code are from separate copyright
+
+Query protein:       SINFRUP00000067802
+Comp Matrix:         blosum62.bla
+Gap open:            12
+Gap extension:       2
+Start/End            default
+Target Sequence      Scaffold_2042.1
+Strand:              forward
+Start/End (protein)  default
+Gene Paras:          human.gf
+Codon Table:         codon.table
+Subs error:          1e-05
+Indel error:         1e-05
+Model splice?        model
+Model codon bias?    flat
+Model intron bias?   tied
+Null model           syn
+Algorithm            623
+
+genewise output
+Score 2054.68 bits over entire alignment
+Scores as bits over a synchronous coding model
+
+Warning: The bits scores is not probablistically correct for single seqs
+See WWW help for more info
+
+SINFRUP00000067    1 LDEDQLSHSSSRLSRSPLKGVKKAKIMQCKVTLLDNSDYTIDVE      
+                     LDEDQLSHSSSRLSRSPLKGVKKAKIMQCKVTLLDNSDYTIDVE      
+                     LDEDQLSHSSSRLSRSPLKGVKKAKIMQCKVTLLDNSDYTIDVE      
+Scaffold_2042.122265 cgggcttcaaacctatccaggaagaaactagatcgatgtaaggg      
+                     taaaatcaggggtcgcctagtaacattagatcttaacaactata      
+                     gcgcgactccttccgtcgaacgaggaggtacaggcccccattag      
+
+
+SINFRUP00000067   45                            KGARGQMLFDKVCEHLNLLERD 
+                                                KGARGQMLFDKVCEHLNLLERD 
+                                                KGARGQMLFDKVCEHLNLLERD 
+Scaffold_2042.122397 GTGAGCA  Intron 1       CAGaggagcactgagtgccatcgag 
+                     <0-----[22397  :  24223]-0>agcggatttaatgaatattaga 
+                                                aacgcagcttaatgtatacgac 
+
+
+SINFRUP00000067   67 YFGITYRDVENQK                           NWLDPSKEL 
+                     YFGITYRDVENQK                           NWLDPSKEL 
+                     YFGITYRDVENQK                           NWLDPSKEL 
+Scaffold_2042.124290 ttgaatagggacaGTAAATA  Intron 2       CAGatcgctagc 
+                     atgtcagataaaa<0-----[24329  :  24470]-0>agtaccaat 
+                     ctccatactgtgg                           tggtttagg 
+
+
+SINFRUP00000067   89 KKQIR                              GPWNFAFNVKFYPP 
+                     KKQIR                              GPWNFAFNVKFYPP 
+                     KKQIR          T:T[act]            GPWNFAFNVKFYPP 
+Scaffold_2042.124498 aacacAGTAAGCA  Intron 3       CAGCTgctatgtagattcc 
+                     aaatg <1-----[24514  :  24586]-1>  gcgatctatatacc 
+                     aggcg                              tcgctcctcgtcta 
+
+
+SINFRUP00000067  109 DPVQLAEDIT                              YYLCLQLRD 
+                     DPVQLAEDIT                              YYLCLQLRD 
+                     DPVQLAEDIT           R:R[agg]           YYLCLQLRD 
+Scaffold_2042.124631 gcgccgggaaAGGTAAGAG  Intron 4       CAGGttttcccag 
+                     actatcaatc  <2-----[24663  :  24816]-2> aatgtatga 
+                     ttcggtgcca                              ctgtgggat 
+
+
+SINFRUP00000067  129 DVVSGRLPCSFATHTVLGSYTVQSELGDYDHEELGSDYLSELRLAPNQT 
+                     DVVSGRLPCSFATHTVLGSYTVQSELGDYDHEELGSDYLSELRLAPNQT 
+                     DVVSGRLPCSFATHTVLGSYTVQSELGDYDHEELGSDYLSELRLAPNQT 
+Scaffold_2042.124845 gggtgccctttgacagcgttagctgcggtgcggcgagtcagcccgcaca 
+                     attcggtcgctccacttgcactacatgaaaaaatggaatgatgtccaac 
+                     tgtgctgccttcccatactcgggtagactctgggccccctggctaccgc 
+
+
+SINFRUP00000067  178 KELEEKVMELHKSY                              GMTPG 
+                     KELEEKVMELHKSY                              GMTPG 
+                     KELEEKVMELHKSY           K:K[aag]           GMTPG 
+Scaffold_2042.124992 agcggagagccaatAAGTTGGTT  Intron 5       CAGGgaacg 
+                     aataaattataaga  <2-----[25036  :  25161]-2> gtccg 
+                     agtgagcgaccgcc                              ggatg 
+
+
+SINFRUP00000067  198 EAEMHFLENAKKLSMYGVDLHHAK                          
+                     EAEMHFLENAKKLSMYGVDLHHAK                          
+                     EAEMHFLENAKKLSMYGVDLHHAK                          
+Scaffold_2042.125178 gggactcgagaactatgggcccgaGTAATAA  Intron 6         
+                     acatattaacaatctagtataaca<0-----[25250  :  26185]  
+                     aaggccgatcgggcgccttccctg                          
+
+
+SINFRUP00000067  222    DSEGVEIMLGVCSSGLLIYRDRLRINRFAWPKILKISYKRNNFYIK 
+                        DSEGVEIMLGVCSSGLLIYRDRLRINRFAWPKILKISYKRNNFYIK 
+                        DSEGVEIMLGVCSSGLLIYRDRLRINRFAWPKILKISYKRNNFYIK 
+Scaffold_2042.126183 TAGgtggggaacggttagccatagatcaaatgtcaacaattaaaattaa 
+                     -0>acagtatttgtgcggtttagagtgtagtcgcattatcaagaatata 
+                        ctgatgcggatcgccgtccacgggccgttgtaacgttcggtcccca 
+
+
+SINFRUP00000067  268 IRPGE                           FEQFESTIGFKLPNHRA 
+                     IRPGE                           FEQFESTIGFKLPNHRA 
+                     IRPGE                           FEQFESTIGFKLPNHRA 
+Scaffold_2042.126324 accggGTAATTA  Intron 7       CAGtgctgaaagtaccaccg 
+                     tgcga<0-----[26339  :  26457]-0>taatagctgtatcaagc 
+                     ctgcg                           tggtgcgctcgtctttt 
+
+
+SINFRUP00000067  290 AKRLWKVCVEHHTFFRQVMKEVL                           
+                     AKRLWKVCVEHHTFFRQVMKEVL                           
+                     AKRLWKVCVEHHTFFRQVMKEVL           G:G[ggg]        
+Scaffold_2042.126509 gaactagtggccattacgaaggcGGGTTGCCA  Intron 8        
+                     cagtgatgtaaacttgattaatt  <2-----[26580  :  26864] 
+                     cagcggctggtccccgacggggg                           
+
+
+SINFRUP00000067  313     LVSPEAPPKKFLSLGSKFRYSGRTQAQTRRASSQIIRPAPFFERS 
+                         LVSPEAPPKKFLSLGSKFRYSGRTQAQTRRASSQIIRPAPFFERS 
+                         LVSPEAPPKKFLSLGSKFRYSGRTQAQTRRASSQIIRPAPFFERS 
+Scaffold_2042.126862 CAGGcgtcggccaatcacgtatctagaacgcaccgatcaaacgcttgct 
+                     -2> ttccacccaattgtgcatgagggcacacggcgcattgcccttagc 
+                         agttggtaggcgttctgtcctcaggtgcagcctgctattgctgtc 
+
+
+SINFRUP00000067  359 SSKRYNMSRSLDG                              SSADGV 
+                     SSKRYNMSRSLDG                              SSADGV 
+                     SSKRYNMSRSLDG          D:D[gac]            SSADGV 
+Scaffold_2042.127001 taactaatcatggGGTAGGAT  Intron 9       AAGACatgggg 
+                     cgagaatcggtag <1-----[27041  :  27314]-1>  gccagt 
+                     ccacccgcttata                              ctttag 
+
+
+SINFRUP00000067  379 ARVKGNIITTVTPERKAEEENGEQEDAKNDAADKPDPAASSPLHQETKV 
+                     ARVKGNIITTVTPERKAEEENGEQEDAKNDAADKPDPAASSPLHQETKV 
+                     ARVKGNIITTVTPERKAEEENGEQEDAKNDAADKPDPAASSPLHQETKV 
+Scaffold_2042.127335 gcgagaaaaagacgaaggggaggcgggaaggggacgcggtaccccgaag 
+                     cgtagattcctccagacaaaagaaaacaaaccaacaccccgctaaacat 
+                     tatgatccctatagaaaaagcagggtcgctgacgactttcctccaaaga 
+
+
+SINFRUP00000067  428 TL                              DPLRSELSLPSSPISFT 
+                     TL                              DPLRSELSLPSSPISFT 
+                     TL          D:D[gac]            DPLRSELSLPSSPISFT 
+Scaffold_2042.127482 acGGTCAACC  Intron 10      CAGACgccctgctccttcatta 
+                     ct <1-----[27489  :  28576]-1>  actgcatctcccctctc 
+                     aa                              tccctgctccattcact 
+
+
+SINFRUP00000067  448 KIRRRRRENAHKRASSVSPGKSSTGRCRRQALADRKAALLDEQALLLVA 
+                     KIRRRRRENAHKRASSVSPGKSSTGRCRRQALADRKAALLDEQALLLVA 
+                     KIRRRRRENAHKRASSVSPGKSSTGRCRRQALADRKAALLDEQALLLVA 
+Scaffold_2042.128630 aaccacagagcacgttgacgaaaagctcccgcggcaggccggcgtccgg 
+                     atgggggaacaagccctgcgaggcgggggactcagaccttaaacttttc 
+                     aagggcggcatgaccactacgccagtcctgctttcatggatgacggcga 
+
+
+SINFRUP00000067  497 RKQRLEQGRNHGGTLFSFSLHLPDLSSVLDEDGYITFPDLSKMRFLPEC 
+                     RKQRLEQGRNHGGTLFSFSLHLPDLSSVLDEDGYITFPDLSKMRFLPEC 
+                     RKQRLEQGRNHGGTLFSFSLHLPDLSSVLDEDGYITFPDLSKMRFLPEC 
+Scaffold_2042.128777 cacacgcgaacggacttttccccgcttgtggggtaatcgctaactccgt 
+                     gaagtaaggaaggcttctctatcatccttaaagatctcatcatgttcag 
+                     aggagggcgttccacccccgcgccgctcgcactccccctgcagccctgt 
+
+
+SINFRUP00000067  546 AQNFLPIKS                              YLCSLLLFTL 
+                     AQNFLPIKS                              YLCSLLLFTL 
+                     AQNFLPIKS           P:P[cct]           YLCSLLLFTL 
+Scaffold_2042.128924 gcatccaatCCGTCACTC  Intron 11      CAGTtcttccctac 
+                     caattctac  <2-----[28953  :  30816]-2> atgcttttct 
+                     ggccgccga                              ttcggatctc 
+
+
+SINFRUP00000067  566 FTFFFLCLLNSLHLSSFAVSRKQTRNLKYRLRYTGLTFIDRPCSMHQIL 
+                     FTFFFLCLLNSLHLSSFAVSRKQTRNLKYRLRYTGLTFIDRPCSMHQIL 
+                     FTFFFLCLLNSLHLSSFAVSRKQTRNLKYRLRYTGLTFIDRPCSMHQIL 
+Scaffold_2042.130848 tatttctccatccctttggacacaaacatacatagcatagccttaccac 
+                     tcttttgttactatcctctggaacgataagtgacgtcttagcgctaatt 
+                     cccttttaatttccctctctgggggtgacacgctgcttctgccggtact 
+
+
+SINFRUP00000067  615 DILTRCPEPSASTISHFAVSILAYCF                        
+                     DILTRCPEPSASTISHFAVSILAYCF                        
+                     DILTRCPEPSASTISHFAVSILAYCF                        
+Scaffold_2042.130995 gacaatcgctgtaaactggtatgttt                        
+                     attcggcaccccctgatctcttcagt                        
+                     cccaaccgagaccctcttcccgtccc                        
+
+
+SINFRUP00000067  641                               EVVKHQTNISELKRSFLET 
+                                                   EVVKHQTNISELKRSFLET 
+                               S:S[tct]            EVVKHQTNISELKRSFLET 
+Scaffold_2042.131073 TGTTGTCA  Intron 12      CAGCTgggaccaaaagcacttcga 
+                      <1-----[31074  :  31980]-1>  attaaacatgatagcttac 
+                                                   ggcacgccccaggcccggg 
+
+
+SINFRUP00000067  661 GDGTQGPTEWEKRLSSSPMCSPRFDEAPMIEPLDVSV             
+                     GDGTQGPTEWEKRLSSSPMCSPRFDEAPMIEPLDVSV             
+                     GDGTQGPTEWEKRLSSSPMCSPRFDEAPMIEPLDVSV             
+Scaffold_2042.132040 gggacgcagtgaactttcattcatgggcaagccggag             
+                     gagcagccagaagtcccctgccgtaaccttactatgt             
+                     ttcgacgcagggggcgtcgcaaattgaggtgggtacc             
+
+
+SINFRUP00000067  698                               ERTSDSQEDVSERSMLKVS 
+                                                   ERTSDSQEDVSERSMLKVS 
+                               D:D[gat]            ERTSDSQEDVSERSMLKVS 
+Scaffold_2042.132151 GGTCTGCA  Intron 13      CAGATgcatgtcgggtgatataga 
+                      <1-----[32152  :  33223]-1>  agccacaaatcagcttatg 
+                                                   gactccggcggggagggcc 
+
+
+SINFRUP00000067  718 PRAVTQEVSQAISDKKGRLIILKDAEHKEEDEEPGEAEGVEKELPPSKE 
+                     PRAVTQEVSQAISDKKGRLIILKDAEHKEEDEEPGEAEGVEKELPPSKE 
+                     PRAVTQEVSQAISDKKGRLIILKDAEHKEEDEEPGEAEGVEKELPPSKE 
+Scaffold_2042.133283 caggacggtcgaagaagacaatagggcagggggcgggggggagtcctag 
+                     cgctcaatcactgaaaggttttaacaaaaaaaacgacagtaaatcccaa 
+                     cattagagggcctcggagttcgattataggcaacagtacgggagtaaag 
+
+
+SINFRUP00000067  767 KEMVKEDAADVLASITREIKKQHPKVEIKTNGTQQINGSDSPKKAMVSW 
+                     KEMVKEDAADVLASITREIKKQHPKVEIKTNGTQQINGSDSPKKAMVSW 
+                     KEMVKEDAADVLASITREIKKQHPKVEIKTNGTQQINGSDSPKKAMVSW 
+Scaffold_2042.133430 agagaggggggcgtaaagaaacccaggaaaagaccaagagtcaagagtt 
+                     aattaaaccattcctcgataaaacatatacagcaataggaccaacttcg 
+                     aagtaacctttgctaagacgggtgggaaatccgagtcttcgtagcgggg 
+
+
+SINFRUP00000067  816 ISEEVKTVATKEVSEGESVEAPQQTAEIFTFEEEQSKSSPTQITVSESS 
+                     ISEEVKTVATKEVSEGESVEAPQQTAEIFTFEEEQSKSSPTQITVSESS 
+                     ISEEVKTVATKEVSEGESVEAPQQTAEIFTFEEEQSKSSPTQITVSESS 
+Scaffold_2042.133577 atgggaaggaaggagggagggcccaggatatgggctatacacaagtgtt 
+                     tcaatactccaatgagagtaccaaccattctaaaacacgccatctcacc 
+                     ctggggtcgagaatgagctagggagggccctagggagccgcgcattata 
+
+
+SINFRUP00000067  865 ASFAVV                              LDTTDAGTKEMPV 
+                     ASFAVV                              LDTTDAGTKEMPV 
+                     ASFAVV           H:H[cat]           LDTTDAGTKEMPV 
+Scaffold_2042.133724 gttgggCAGTATAAA  Intron 14      GAGTcgaagggaagacg 
+                     cctctt  <2-----[33744  :  35243]-2> taccacgcaatct 
+                     tttgga                              tcctttacaggcg 
+
+
+SINFRUP00000067  885 IHTETKTITYESAE                           VDTNGDLD 
+                     IHTETKTITYESAE                           VDTNGDLD 
+                     IHTETKTITYESAE                           VDTNGDLD 
+Scaffold_2042.135284 acagaaaaatgtggGTACACA  Intron 15      CAGggaaggtg 
+                     tacacactcaacca<0-----[35326  :  36271]-0>tacagata 
+                     ccagaatcccgtag                           tctttcgc 
+
+
+SINFRUP00000067  907 PGVLLSAQTITSEITSTTTTTHITK                         
+                     PGVLLSAQTITSEITSTTTTTHITK                         
+                     PGVLLSAQTITSEITSTTTTTHITK                         
+Scaffold_2042.136296 cggccagcaaatgaaaaaaaacaaaGTCAGTT  Intron 16       
+                     cgtttgcactccatcgcccccatca<0-----[36371  :  36567] 
+                     cttcgttgcctaatccccacaccag                         
+
+
+SINFRUP00000067  932    MVKGGISETRIEKRIVISGDADIDHDQ                    
+                        MVKGGISETRIEKRIVISGDADIDHDQ                    
+                        MVKGGISETRIEKRIVISGDADIDHDQ                    
+Scaffold_2042.136565 CAGagaggatgaaagaaagatggggagcgc                    
+                     -0>ttaggtcacgtaagtttcgacataaaa                    
+                        ggaagtaaaacgagcccaatacctctg                    
+
+
+SINFRUP00000067  959                            ALAQAIKEAKEQHPDMSVTKVV 
+                                                ALAQAIKEAKEQHPDMSVTKVV 
+                                                ALAQAIKEAKEQHPDMSVTKVV 
+Scaffold_2042.136649 GTCTCTT  Intron 17      CAGgcgcgaaggagcccgatgaagg 
+                     <0-----[36649  :  36954]-0>ctcactaacaaaacatctcatt 
+                                                tgtgcaagtaagttcgagcaag 
+
+
+SINFRUP00000067  981 VHKETEITPEEGED                                    
+                     VHKETEITPEEGED                                    
+                     VHKETEITPEEGED                                    
+Scaffold_2042.137021 gcagagaacggggg                                    
+                     taaacatccaagaa                                    
+                     atagagcgaagggc                                    
+
+
+//
+Gene 1
+Gene 22265 37062 
+  Exon 22265 22396 phase 0
+     Supporting 22265 22396 1 44
+  Exon 24224 24328 phase 0
+     Supporting 24224 24328 45 79
+  Exon 24471 24513 phase 0
+     Supporting 24471 24512 80 93
+  Exon 24587 24662 phase 1
+     Supporting 24589 24660 95 118
+  Exon 24817 25035 phase 2
+     Supporting 24818 25033 120 191
+  Exon 25162 25249 phase 2
+     Supporting 25163 25249 193 221
+  Exon 26186 26338 phase 0
+     Supporting 26186 26338 222 272
+  Exon 26458 26579 phase 0
+     Supporting 26458 26577 273 312
+  Exon 26865 27040 phase 2
+     Supporting 26866 27039 314 371
+  Exon 27315 27488 phase 1
+     Supporting 27317 27487 373 429
+  Exon 28577 28952 phase 1
+     Supporting 28579 28950 431 554
+  Exon 30817 31073 phase 2
+     Supporting 30818 31072 556 640
+  Exon 31981 32151 phase 1
+     Supporting 31983 32150 642 697
+  Exon 33224 33743 phase 1
+     Supporting 33226 33741 699 870
+  Exon 35244 35325 phase 2
+     Supporting 35245 35325 872 898
+  Exon 36272 36370 phase 0
+     Supporting 36272 36370 899 931
+  Exon 36568 36648 phase 0
+     Supporting 36568 36648 932 958
+  Exon 36955 37062 phase 0
+     Supporting 36955 37062 959 994
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genewise_output.paracel_btk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genewise_output.paracel_btk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genewise_output.paracel_btk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,206 @@
+GeneWise BTK 5.1.3-88/88 2002-08-09 (Fdf Client 1.500)
+
+Copyright 2001 Paracel, Inc
+
+
+Query=   protein sequence #1
+        (10 letters)
+
+Database:  /fdf/gm2build24/gm0/0//bwb/tmp/XrhARf0Sal.codon
+           2 sequences; 64 total letters.
+Searching.......................................................done.
+
+WARNING:  No calibration or zscore calculation was done.
+          Statistical information may not be valid.
+                                                                        E
+Sequences producing significant alignments:                    Score  Value
+
+DNA sequence #1                                                   26  0.0
+reverse-complement of DNA sequence #1                             26  0.0
+
+>DNA sequence #1
+          Length = 30
+          
+ Score = 26.4 bits (26), Expect = 0.0
+ Identities = 10/10 (100%), Positives = 10/10 (100%)
+
+Query: 1  SWPLSEALMR 10
+          SWPLSEALMR
+Sbjct: 1  SWPLSEALMR 28
+
+
+
+>reverse-complement of DNA sequence #1
+          Length = 30
+          
+ Score = 26.4 bits (26), Expect = 0.0
+ Identities = 10/10 (100%), Positives = 10/10 (100%)
+
+Query: 1  SWPLSEALMR 10
+          SWPLSEALMR
+Sbjct: 30 SWPLSEALMR 3
+
+
+
+
+
+Histogram of all scores:
+  score    obs    exp  (one '=' represents 1 hits)
+  -----    ---    ---
+<     6      2      - |==
+      6      0      - |
+      7      0      - |
+      8      0      - |
+      9      0      - |
+     10      0      - |
+     11      0      - |
+     12      0      - |
+     13      0      - |
+     14      0      - |
+     15      0      - |
+     16      0      - |
+     17      0      - |
+     18      0      - |
+     19      0      - |
+     20      0      - |
+     21      0      - |
+     22      0      - |
+     23      0      - |
+     24      0      - |
+     25      0      - |
+>=   26      2      - |==
+unknown(8) BTK 5.1.3-88/88 2002-08-09 (Fdf Client 1.500)
+
+Copyright 2001 Paracel, Inc
+
+
+Query=   protein sequence #2
+        (9 letters)
+
+Database:  /fdf/gm2build24/gm0/0//bwb/tmp/XrhARf0Sal.codon
+           2 sequences; 64 total letters.
+Searching.......................................................done.
+
+WARNING:  No calibration or zscore calculation was done.
+          Statistical information may not be valid.
+                                                                        E
+Sequences producing significant alignments:                    Score  Value
+
+DNA sequence #1                                                   24  0.0
+reverse-complement of DNA sequence #1                             24  0.0
+
+>DNA sequence #1
+          Length = 30
+          
+ Score = 23.8 bits (24), Expect = 0.0
+ Identities = 9/9 (100%), Positives = 9/9 (100%)
+
+Query: 1  SWPLSEALM 9
+          SWPLSEALM
+Sbjct: 1  SWPLSEALM 25
+
+
+
+>reverse-complement of DNA sequence #1
+          Length = 30
+          
+ Score = 23.8 bits (24), Expect = 0.0
+ Identities = 9/9 (100%), Positives = 9/9 (100%)
+
+Query: 1  SWPLSEALM 9
+          SWPLSEALM
+Sbjct: 30 SWPLSEALM 6
+
+
+
+
+
+Histogram of all scores:
+  score    obs    exp  (one '=' represents 1 hits)
+  -----    ---    ---
+<     5      2      - |==
+      5      0      - |
+      6      0      - |
+      7      0      - |
+      8      0      - |
+      9      0      - |
+     10      0      - |
+     11      0      - |
+     12      0      - |
+     13      0      - |
+     14      0      - |
+     15      0      - |
+     16      0      - |
+     17      0      - |
+     18      0      - |
+     19      0      - |
+     20      0      - |
+     21      0      - |
+     22      0      - |
+>=   23      2      - |==
+unknown(8) BTK 5.1.3-88/88 2002-08-09 (Fdf Client 1.500)
+
+Copyright 2001 Paracel, Inc
+
+
+Query=   protein sequence #3
+        (8 letters)
+
+Database:  /fdf/gm2build24/gm0/0//bwb/tmp/XrhARf0Sal.codon
+           2 sequences; 64 total letters.
+Searching.......................................................done.
+
+WARNING:  No calibration or zscore calculation was done.
+          Statistical information may not be valid.
+                                                                        E
+Sequences producing significant alignments:                    Score  Value
+
+DNA sequence #1                                                   21  0.0
+reverse-complement of DNA sequence #1                             21  0.0
+
+>DNA sequence #1
+          Length = 30
+          
+ Score = 21.1 bits (21), Expect = 0.0
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1  SWPLSEAL 8
+          SWPLSEAL
+Sbjct: 1  SWPLSEAL 22
+
+
+
+>reverse-complement of DNA sequence #1
+          Length = 30
+          
+ Score = 21.1 bits (21), Expect = 0.0
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1  SWPLSEAL 8
+          SWPLSEAL
+Sbjct: 30 SWPLSEAL 9
+
+
+
+
+
+Histogram of all scores:
+  score    obs    exp  (one '=' represents 1 hits)
+  -----    ---    ---
+<     5      2      - |==
+      5      0      - |
+      6      0      - |
+      7      0      - |
+      8      0      - |
+      9      0      - |
+     10      0      - |
+     11      0      - |
+     12      0      - |
+     13      0      - |
+     14      0      - |
+     15      0      - |
+     16      0      - |
+     17      0      - |
+     18      0      - |
+     19      0      - |
+>=   20      2      - |==

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genomewise.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genomewise.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genomewise.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+Gene 1
+Gene 4761 6713 
+  Exon 4761 4874 phase 0
+  Exon 4921 5003 phase 0
+  Exon 5061 5236 phase 2
+  Exon 5289 6623 phase 1
+  Exon 6679 6713 phase 1
+Gene 2
+Gene 9862 10316 
+  Exon 9862 9972 phase 0
+  Exon 10024 10211 phase 0
+  Exon 10262 10316 phase 2
+Gene 3
+Gene 10475 11726 
+  Exon 10475 10921 phase 0
+  Exon 10983 11726 phase 0
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.epcr
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.epcr	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.epcr	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+HSBA536C5  6934..7060       SGC31907        (+) Chr.1, between D1S484 and D1S426
+HSBA536C5  9167..9289       SHGC-474        (-) Chr.1, between D1S2707 and D1S2705
+HSBA536C5  89424..89607     stSG1902        (+) Chr.1, between D1S2635 and D1S484
+HSBA536C5  89436..89729     IB289           (-) Chr.1, between D1S2635 and D1S484
+HSBA536C5  89440..89572     TIGR-A002L26    (+) Chr.1, between D1S2635 and D1S484
+HSBA536C5  90058..90332     WI-9524         (-) Chr. 1, between D1S2635 and D1S484
+HSBA536C5  161589..161812   SGC31641        Chr.1, between D1S2635 and D1S484

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2812 @@
+>HSBA536C5 LOCUS       HSBA536C5  168628 bp    DNA             HTG       16-MAY-2000
+GCTGACGCTGCTGCTGCTGCTAATGCTAGGTAAGCCAGGCCCCCGTTCCGGTTCCCGGCC
+CTGCCGGGCAGGTGCCTGCCATCCAGCACCCCCCTAGCATTTCGGGGCTCCTCACATCCT
+CCTCCGTCAGGGGTACCTCCAGCCTCCCCGGGGTCGGCGTCCAGCTGCGCTTTGTGGCTG
+TCCGCCGAGCCAGTCTTGGGAAGAGTGCAGGCTCCCCAGCTCTAAGCCAGGGACTGGGGC
+GTAGAGAGGACCCACTAAGGTGCTGCCCCAGGCACTGCTCGGCCTCCCGGGGCAGGCCAC
+ACATCTCAGCTAGCATACCCACGTACTGGGGTCGCTGCACCAAATGCTGCGACTCCTTTC
+TGCCCCACACTCAGCTCCAGGCCTGGCCTTGGGTGCCCGCCCTACGAGGGCAGTGCCTGC
+CCAGCTTCATAATTGCCCCAGAGTTGGCTGGAGGCCCAGAGGCCAGTTCTGCCCACGATC
+TTTGAAGTAAGTCTCTCCCAGTTGCGCTCCACGCACTTCATCAACTGGGGGCAGTTCCCT
+CTCTCACCCCCATCTCCACCCTGGGTTGGGTCCCTCTTTGCCCTTCTTCCTGTCTCCCAA
+ATGGCACTTGGCTCTGCAGCCACCCATCAGTTTTCACACCGCTTAGTTCCAGCTTCCTTG
+CCTCAGGAGAAACCAGGTTCCATGGAGACTGATCCTGGGATCTCCAAGCACCTTGTGCCT
+GCTGTGGTGGGTGTTCATGGTGGGGTGTTCTGGGCCAATCTTGCATGGATGCATGGGGAT
+AATGACAGTGGAATTGTTTTTGGCCTGAACTGGAGGGCTCCCTGTAGGAGGTGGTCTGGG
+GAAGAGTATGGCATGGAGACAGTGAAACTGCCAGTACAGATGTTGATGAGGCTGAAGCCC
+TTGAAATGAGAGTGAAGCGTGGGGGTGGGGATGGTGACTTCAGAGCTGCTGTCGTCAAGG
+CAGAGGCTGAAGGTAGCCCCAGCTGCCTGTTTCTAGACCAGAGCTCCACAGTGGCACCAA
+GCAAAGCCTTCCCCTGCTGCAGGTCCCTGGGCCCTGCAGGGTTAGGATGAGTCCTCTCCC
+TGGTCCCTGGGCAGGTGGCAGGAGGATGTACAGAGAGGAAGGCGCTGTGGGGCCTGGGTT
+GCCTAGCACTTGGTGTGTTTTCCCCAGTGGGCAGCATGGAGCAGGGAGGAGCCCAGGGAG
+GTGGGGAAGTGCTGGCCTGTGTTTGAAAAAGGAGCACGCAGAGGTCTGAGCACCCACTGC
+CTGTGGGCACTTACATTCATTTCACAAATGAGGAAAATGAAGTAACTTGCCCCAGAGAGC
+ACATTTAGTGAGTGTTGGGGCTAGGATTTGAACACAGATCTGATTCCAAAGCAGGCCTCT
+TTGCTGTCCCTTTCCTTGCTGCGACTGATACAGAGGAGGGGAGAGAGAGCTGTGTGGCAA
+CTGATTTCCTAAGGCTGTGGAGGTGACTTTCCCATGGGCATCTCCAAGCATCAGCAGTTG
+TGGAGCCAGGAAAATGGGAGGGGGCCTTCTGAGCAGTCTGCCCTGAGGCAGTCAGGGAAG
+GTGTCAGCTTGGCTGTCTCCACCGTCACTCTGATTATGAAACTTGCCTGGCCTGGGGCAG
+GGCTCTCCTGAGTCACATACGCACTCACACATGGTCAGAGCTGGAGCTCTTAGTTCTCCT
+CCAGTGTGGAAACTGAGGCCTAGGACAGAAAGAGACTGGATCCCAGGCCACACAGCTGGG
+CATGGGGATGGTGCTGGAATCCCACTTCCTGACTGGCAGCTGTGCCCTTAACAACAGACC
+ATGAGTTCTCCCTGTGACCCAGCCAGAGGATGAAGGAAGGGGCTAGGAGCCCAGAAGCAG
+GGGTGGGGAAGTTCAGAGGAGCCGTTCTAGAGAAGCTGGAGGGTAGAGTTTGACCACAGA
+CTCCTGGGCATTTTCCTCTAGCTCCTGCAGAAAGTCCATGGTATCCCCCTGGGAGGCCTA
+AAAGTGACACTCTGGTTGGTGAGGTCAGTCCCTTCAGGCCTGGCCTTGGAGTTTCCCCAT
+CTTCCTCTCTGGTTTTGCCCCTCTGGAGGAGTGTGTGGAAGCCATCTAGGGACAGCATGA
+TCTTGGCTAACATATTATACTTTTCCTGCCTTACCTTTCTCATCTGTAAAATTGGGAGAG
+GTGGGGAGGGGTGGCCTAGCCCTCTTTACATTGAGGTTCTGTGATCTTATCTAGTGGGAG
+CTATGGAAGGAAAGAAGGGAGGAGCTAAGCCCTCTAAGTCTGCCAGGAGTATCTGCTGGG
+GTAGGGTTGGGCATAGGGGTGGAGTAGGGTGAAGAAGAGAGGTTTGGCCTTTCTGAAGGG
+AGAGAGATGAGAAGGGGCAGCTGGGGATTTAGTGCCTCCCTACAGAAAGAGAGCCATATA
+TGTAAATGAATGCATACCATGTGCAAATGAAGGCTTTTGTTAAGGAACTGGGGCAGGCAC
+CTTCTGTATCTGCTGGGCTAGCCCGGAAGGTTGGCAGGCACTGGAGGAGGGGATCTTCTG
+CCCCCAATGGCCAGGGCAGAGCCTGGATCTTGCCCCAGAAGCCCCAGCTGCCTGGGCAGC
+CCTGTGGGGTGAGAGAAGCAGTGAGCTCATGGCGCAGCTGGAGAGGGGAGGGCATTGTGT
+GGTGAGAACTGACTGGGCCTGTGCCCTTTCCAGCTAGGGTCACTGGCTCCATTGTCTCGC
+CCAGGTGACACAGCTGAGCCAACTAGCACTTCTGAGCAAGACTGCCAGCTCCCCTCAAGC
+AAAGGAGAGGGACATGTAGTCTCAGAGTCCCCTAGCCAGAAGGCTCCCCTTGCCAGGTGC
+TGAGGAAGGAATGGCAGACATGGAGGTGGGTGAGTGGCAGGGACAGATCAAGGTTCAGTT
+GGAACCAAGGCCTGGTTAGGTCAGGAATCAGATGTACTCCTTTCTGCCTTCAGGTGCTCT
+GCTTCTAGTCCGCCAGAAATGGGATGCATCTGGATGCCAAGGCAATCCTGGGCCCATGCC
+ACCTCCTTGAGATCCAGGTGTCTTATATCACAGCATCCCTTTCCTATTCTCAGAACAGGG
+TCCAAGCTGTGGACAAGCTTTCCCAGGTCTCCTTTCCTTTTGTAGGCAGAGATCAAGAAT
+GGTGCTTGTGGCGGCAGAGGGCATGGAATGAGGAACCAGGCATCTCCAACTCCTTCTGCC
+TTTCCCTACAGGAATGGGATGCTGGGCCCGGGAGGTGCTGGTCCCCGAGGGGCCCTTGTA
+CCGCGTGGCTGGCACAGCTGTCTCCATCTCCTGCAATGTGACCGGCTATGAGGGCCCTGC
+CCAGCAGAACTTCGAGTGGTTCCTGTATAGGCCCGAGGCCCCAGATACTGCACTGGGCAT
+TGTCAGTACCAAGGATACCCAGTTCTCCTATGCTGTCTTCAAGTCCCGAGTGGTGGCGGG
+TGAGGTGCAGGTGCAGCGCCTACAAGGTGATGCCGTGGTGCTCAAGATTGCCCGCCTGCA
+GGCCCAGGATGCCGGCATTTATGAGTGCCACACCCCCTCCACTGATACCCGCTACCTGGG
+CAGCTACAGCGGCAAGGTGGAGCTGAGAGGTACTGGGCCCTGGGACGGGGTGGAGCCACA
+CACCCTCTCAAGGCAGAAAGTCAAGGCCACAAGTGACCTATCTAGTTGCCCCTTAGGTTT
+TGTAGCCAAGGAACTTGAGGCCCAAAGAGGTTAAGTGATTTGCACAAAGTCACCCAATAA
+CTCAGTGGCTGATGTGGGACTCAAACCCAGAGTTCCTCACCCAGGCCAGTGCTCATCTTA
+TCATATTAAGACCCCTCCTGGGAGAAGAAATGTCCTTAAGTTGTCCAGTCCACTTTCCAT
+TCCCAAGAACGAAGAAGACTGTTGGGTTGAATAGCCTCTACTCCTGGACAGTACAGGACC
+CAGATCTGGGAAAGAGAGTAGAGTTAGGGAAGGAGTTAGACCATGTGACATTCTCACTCT
+CTTAAGTTTCTTTGTCTTCTGAGAAGAAAGAGCCGCTTTTCTCAGTATTTCAAACAGAGG
+TCAGTCTCAAACATAAGGGTGAGTAGGCGATGGGCTGGTTCTATGAAATTCTACAAGTAT
+TTATTGAGCGCCTGTGCTGTGCCCAGTACTGCCCTTGGCACCACTGAGTTACAGAAGAAA
+TAATACAACATGCGATCTTTGCCCTCTGGGAGCTCACAGTTAATTTAGGGAGATAGGCTA
+TACACAGGAAACAGTAACAAGGGCTGCCAGCTCCCAGCCCTCCCTCACTCCAGCTGTTCT
+CTCCAGTTCTTCCAGATGTCCTCCAGGTGTCTGCTGCCCCCCCAGGGCCCCGAGGCCGCC
+AGGCCCCAACCTCACCCCCACGCATGACGGTGCATGAGGGGCAGGAGCTGGCACTGGGCT
+GCCTGGCGAGGACAAGCACACAGAAGCACACACACCTGGCAGTGTCCTTTGGGCGATCTG
+TGCCCGAGGCACCAGTTGGGCGGTCAACTCTGCAGGAAGTGGTGGGAATCCGGTCAGACT
+TGGCCGTGGAGGCTGGAGCTCCCTATGCTGAGCGATTGGCTGCAGGGGAGCTTCGTCTGG
+GCAAGGAAGGGACCGATCGGTACCGCATGGTAGTAGGGGGTGCCCAGGCAGGGGACGCAG
+GCACCTACCACTGCACTGCCGCTGAGTGGATTCAGGATCCTGATGGCAGCTGGGCCCAGA
+TTGCAGAGAAAAGGGCCGTCCTGGCCCACGTGGATGTGCAGACGCTGTGTAAGTGCCACT
+TATACATCCCACTGTGGGAGCTGGGAGGGACAGTATCCTGGTGGGCACTGTGGCCACAGG
+GGGTACCAGATAGCCATCATCTGACTCCAAGGAACTCATCTTCCACAGAGTGAGCAGCCT
+TGCTAGCATTTCTCTCCCCCGGCCCTTGATGGGTTTTCTCTGCTCAGAAATTGGGTATAG
+AGATAGGAGGCCCATGCTGAGGGGTGAGATTCAAGCCCCTAAATAGAAAGATGGTCATAC
+CTAGTGAGGGGGAGACAGGAGGTTTTGAAGTTGCTGCTGTCAAGTAGGAAACAGGAAGTT
+GGGGAATCGGGTGCCTCTGTGTCCCCTCCTGCCTCCATGACACCTCTTCCCCTACCTGTG
+TTTCAGCCAGCCAGCTGGCAGTGACAGTGGGGCCTGGTGAACGTCGGATCGGCCCAGGGG
+AGCCCTTGGAACTGCTGTGCAATGTGTCAGGGGCACTTCCCCCAGCAGGCCGTCATGCTG
+CATACTCTGTAGGTTGGGAGATGGCACCTGCGGGGGCACCTGGGGCCCGGCCGCCTGGTA
+GCCCAGCTGGACACAGAGGGTGTGGGCAGCCTGGGCCCTGGCTATGAGGGCCGACACATT
+GCCATGGAGAAGGTGGCATCCAGAACATACCGGCTACGGCTAGAGGCTGCCAGGCCTGGT
+GATGCGGGCACCTACCGCTGCCTCGCCAAAGCCTATGTTCGAGGGTCTGGGACCCGGCTT
+CGTGAAGCAGCCAGTGCCCGTTCCCGGCCTCTCCCTGTACATGTGCGGGAGGAAGGTGAG
+AGGGGGCTGGGCCCTGGACGGGGTTCGAGGATTGTCAACCCCTTTTCCTTCTGTTTCCAT
+GACCCCTCCCTCTCCGTCAGCTGTTTGCAGGCCACAGCCTCACGCTCCTCTTCTACCAAG
+ATTGTGGGAATACCCTGAGTTTCCTGGGGCGGTGCTTGGCGGCAGCCCTAACCCCATGCT
+CCCTGGCCCTGCACCCTCTGACCCCATGTCCTTTGTTCTGCAGGTGTGGTGCTGGAGGCT
+GTGGCATGGCTAGCAGGAGGCACAGTGTACCGCGGGGAGACTGCCTCCCTGCTGTGCAAC
+ATCTCTGTGCGGGGTGGCCCCCCAGGACTGCGGCTGGCCGCCAGCTGGTGGGTGGAGCGA
+CCAGAGGACGGAGAGCTCAGCTCTGTCCCTGCCCAGCTGGTGGGTGGCGTAGGCCAGGAT
+GGTGTGGCAGAGCTGGGAGTCCGGCCTGGAGGAGGCCCTGTCAGCGTAGAGCTGGTGGGG
+CCCCGAAGCCATCGGCTGAGACTACACAGCTTGGGGCCCGAGGATGAAGGCGTGTACCAC
+TGTGCCCCCAGCGCCTGGGTGCAGCATGCCGACTACAGCTGGTACCAGGCGGGCAGTGCC
+CGCTCAGGGCCTGTTACAGTCTACCCCTACATGCATGGTGAGTGACACCCCCTCCACCCT
+CCTCACTCTGCCTTCCTCCTGGCCTCTGCCACTGGCCTTCCCTTCCCATCTTCTGACCCT
+CCTGCTACTATCTCTCTCCTCCACATTATGTCACATGAAGTCTCAAAAAATCCAACTTCC
+AGCCCTGCAGTGCCCACCTGCACGGGGTCCTCTGTGGTTGATGCTGACTTGCATGCTGAG
+GGGGCAGGCTGTGGGCAGGGATGGGGGTGTCTGTGAATGGAGAAGGTGGGTTCAGAGAAC
+TTGATGGGGAAAGCGCAGAGGCAAGTGTATGGGTGGAGGATGATGGCATCCTCTCTGAAC
+AGTCATCCTCTCTCCCCCAGCCCTGGACACCCTATTTGTGCCTCTGCTGGTGGGTACAGG
+GGTGGCCCTAGTCACTGGTGCCACTGTCCTTGGTACCATCACTTGCTGCTTCATGAAGAG
+GCTTCGAAAACGGTGATCCCTTACTCCCCAGGTGAGGGAAAAGAACCCCCCAACCTTGTT
+TTCATTGGGCTTCCCACCCCCAAGTCCTGAACCAAGCTGCTCCCTCCCAGGCCCCACCTG
+GAGGAGGAGTAGGCAGCTCAGATCCACAGGGCCCTGTTAAGGGGTGGAGGCCGCGAGGGA
+GCCCCACTCAGGGCTCTCTGTCCTTAGCCCACACCGGGCACCCTTTTCAGGTCTTGCAGG
+TGTCGACTGTCTTCCGGCCCAGCTCCAAGCCCTCCTCTGGTTGCCTGGACACCCTCTCCC
+TCTGTCCACTCTTCCTTTAATTTATTTGACCTCCCACTACCCAGAATGGGAGACGTGCCT
+CCCCTTCCCCACTCCTTCCCTCCCAAGCCCCTCCCTCTGGCCTTCTGTTCTTGATCTCTT
+AGGGATCCTATAGGGAGGCCATTTCCTGTCCTGGAATTAGTTTTTCTAAAATGTGAATAA
+ACTTGTTTTATAAAAAGCAGGCTTGGCTGTGTCTTCCCTACCATGAGACTGTAAAACGCT
+CAGGGTGCATAGTAGGGAGGTGGTTGGGGATTGACAACCTATTCTGGATGACCCAGGCCT
+CTCTTCCCACCACAAACACCATCTGGCCTCCATGGGCCATGCCACCCACCCCAGGAGGGG
+ACACCTGGGTTCCATCTGCAGCCTGAACTGTCGGGGCTGAGACAACTCCTGATTGGACTC
+CTACTTGGGGAGGGAAGCATAGAAAAGGAGGGTAGGTCCCAGCTGCCTGTGGTCACCCCC
+GCACCAGCTTCTCCCATGTTAAAGGGCATGGGCCTCCAGGGGCAGTGCCTTTTCTGAGCT
+TCCCAGACTTTCCCCAACTCTTCCCACGTTTCATTCTCCTGGTCAGGCTAGTCTTGGACC
+TGACTCCTCCCACTAGGCCCTGGGCTGAAATTGTTGTTCCTTTTGACCCATAGCAGCAAG
+AATAGAAAGCCTCAGAACACAACTGATACTCAGGCTGGGACAACAGGCAGTCAGGGGAGG
+GCCTGTGTGTCCACAGAGCCATTGCACCCACATGTACTCGCTGTACCTGCATATACCCTC
+TACCCGCATATACCCACTACACCTGCATATACCCGCTTCACCTGCCTGTACCTACCCTGG
+GGTCTCCTCCCTCCCACCCTTCAGCTTCACCCCTCCTCCAGTGCCCCAGGAGTTTCATGA
+ATTTTCTTGCCTAAGGATTCTCTGATCCTAAAATTCTTACCTGCACGGCACCTTAAGAGG
+CATCTGGCAGGCCTCCTTCAGTTGTTATTTAAACATTTATTGAGCATTACGAGTTTGCTG
+TCATTTTTACAGATGAGGAAACTGAGGCCCAGATATGCGATTTGCCCAGAGTCATAGAGC
+TAATTAGCAAGTGTCAGGAGTAGGACCTAGGTCTCCTGACTTGTTGCACTGTACAGTGGT
+GCCCTGAGTGGTGACAGTGGCGAGGAGAGGTGTGGCAGGGTCTCAGTGACTTCAGCAGGC
+AGGAGGGGCCTTTCCAGACTGAGTTTCTCTGCTTGTTCAGCTGCAATGGGTTGGCTCCTG
+TGTGAGTGCGTGTGTGTGGATACCAAGTAGCCCACCCGCTCTTGTCCAGACCCCCCTGTT
+TTACTGAAACCCAAATGAATCCCCATTCTCCATCCCAACATCTGTTCTTAGGGGACCAGT
+CAATGCCAGGCTTCAGGAATCTCATCTTTGATGACAGGGCCCCAGTGGCCCTGTGACCTG
+AGGCAAAGATATCCGTGTTTTGACTGAAAGGAAGGGACTGGGACTTGGGTCCTCTCTGAG
+TGAACCCCAGCTGTCTGGGCTGCTTCCATGGCCCCCTTTCTTGCTTCCAGAGGACCCATA
+GGGCCCTGGCTATGCCCATAGGACCCTGGCTGTGCCCCATAGGACCCTATCTGCCCAGCC
+TGGAACAGAGAAGACTGGAGTGCTTTCCCCAGGGGGCTTATGAAAGGGCAGGAGTGGGAT
+GTGGCCAGGGCCAAGGGAGGAGGTCCTTGCTGGGCCAGTGCTGCTGTGTGGGAGACAGGC
+GCTGTCCCAGGGCAGGCTCTCCTGGGCCTATGCTTTTCTCCAGGCCAGCTTCAGGTCCCT
+TCCACCCCTCTGCCTCTCTCAACTTGCAGTTGTTCTCCCTCACTATCCAGGAAGCTGCCG
+CTGGTGACTGGGATGTGATCAGGAAGCAGGTGGAGGTGGGGTGAGAATATGCGTCTTTGG
+GACCCCCTGGCCCAATTGGGCTGACAGCAGGCTGCAGACACATCCAGATGTTCTAACTCC
+TATCGCTTCTCTCCGATTTTCTCCCACATTCAATCTTGCCTCCAGTCCTACAAGGGGTTC
+TGTCCTTTGCCCCTAAAACTCTTCCTGCTCCACCTCCCCCCGACCTCCCCAGGAGCCCCC
+TCAAGTCCTTCTGATGGCTTCATCTCTCTGTCGGGCCTGCTCAGCTTCCAGACACCCCCT
+CCACTTGTCACTGCCCTGTTTCCATTGATGGTTGAGTTTCTGGGCCCAAAGTTAAACTCT
+TCAGCCTGGGCTTCGGGTCCCTTTCCCACACATAAAACCCCACCCTTCACATTTCTCTGT
+CTGCTCCTGTGAGGATCCTCAGCTTCTTCTCAGCTTCTCTTCATCTTCCTCTCAGCTACT
+CCTCAGTTTCTATGCCCCATGTCCTGAAACGACTTCCAAGTATTCACTTAGAGCACAGGT
+TCTCAAAGTGTGGTCCTCAGTCCAGCAACAGCAGCATGACCTGGGGACTTCTTAGGAATG
+CATGGTTTTGGTCCCCACCTCTTGGGTCAGAAACCCTTGGAGTGGAGCCCAAAAATCTGT
+GTTTTACCAAGCCCTCCAGGTGATTCCAACGCACACTGGAGATTTCTAACTCCTGACTTA
+CATATTCCCTATGCCTTACACTGGCCCCAGCCTTCTGATCACCCACACTCCCCTACCCTC
+CACGGGACTGCACAGGTTGTCAGTTTGAATTATCCCTCGTTCAGTCACCACAGGTAGATT
+CCTCTGTTTTCCCAGGTGCCCCTTATTTTGAGGACAGGAACCAGGTTTTTGAATGTCCAC
+TCCACATGGCACCCAATCAGTGATGTGAATGACTGTGCCCCAAGGACACACTGCTTTCCT
+AGGGGCTCCCTGTTTCCCAGCTCTAGTGAACCCACTTTCTAGATCTCTCTCCTGGCCCTT
+CCTCCCCAGAACCTCTCTCTCTCTCTTCCTGCCTGTGCCTGATACCTGTAAGATCTCACT
+CCACTGTAAGAAAATAGCTATCCCTGGTTTTCCTTTCCCAGGCCCCTCTCCAGGCCCCAG
+CTCAGGCACCTCCCCGCTCTGCCTGCTTCCCTGAACTTGCGGACTAAGGTCATTCACACC
+TTGCTGAGCCCCTTGCACGCCTTCATCCTGGTGAGGGTGGAGCTAGGAGGGCACCCTCCT
+CATGTGCCCATCTTGGTCCGGCTCCTTTCTCCTTCACTTCAGCTTGGCCTACCGTGTCAT
+TAGACAGTTGTGCCATATGTGGGGAGGCTGGCAGAAGAGGTGACTGGGGACTGCAATCCA
+TCCTATACTCTGCTCCCCAAGCCATGCACCCTGGTTTGGGGCTGTATCTGTCTGGAGGAA
+AAGGGTTCCTTTTTAAAATTTGCACAGGGGTGTTCTACAGGTTAGTGGCAGCCCTGAGTG
+GACCAGTTGTCTACTGGTCCATCTATGTATCTGTCAGCTGTCTTTATCTGCTACCATCTT
+CCCATCCAACCCCCAAAAGTCCATCATTCTGTCCATCCATCCACTCATGAACAAATGTGA
+AGGTCAAACACAGACTGGCATTTTCACCCGGCCCCACCCTCCATTTCCCTGAAATCAGGA
+CCCCTCCCTTCCATACTGGTTTCTAGACTTGAGGTTCTGGGGGACGGGGACTTTCCAACT
+TTAGCACATTTCCAGGGGCTTTGCCTTCCTCCTCCTCCTCCTCCTCCACCACCTCCGATA
+TGCAGTTCCCCATGTATCTGTGTCTGGCCCCCGGTCTGCCACAGGGGTCTGGAGGAAGCT
+GGTCACACCTTGGACTCACTCTCTGGGGGTGGCAGGCAGCCATTCTGCTCCTTGTCAGCC
+CCAGCTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCCCCTCCCCCGCC
+CCCTCCTCCTCCACCTTCTCATCCAGCCGGCTGGGGATGGACCAGTAGAGATGGGCATCA
+AGGCGGGCGGCAGCCTCTGCCAGCTCTCGAGCACTGCACGAAGGTGTGGGCACCTCAAAA
+GTCTCGTGAAAGCTGGCATAGTCCACTTCGTAGAAGCCGTCCTCCAGAGTCAGCACTGAC
+GTGAAGCGGTGGCCCCACAGCCCCTCGTCTACCAGGTAGGAGCTCCGAGCTTGGCATGTC
+ATTCCTGGGAGGAGGGGCACCGGTCAGGGGGAATCTCAGCTCCAGGCTTCCCTCTCCTCT
+TCTCATTCCACAACCCTACTCCCAGCACCCTCCCTTCTATTCCTTCCTAAGGGCTCCCAC
+CTCCCTTTGGTCCAGTCCATTTCCCCCCCTCCCTAGCTCCTGTTCTTACCCAGCTCCCTC
+GTGTCTTCTCCATTTGCCCCAACTCCTCTTAATGTCCAGCTCTTTTTTCCCCTCCTGTCA
+TTTGGTGATGCCAACCAAAAAAAGCTTAACCTTGAAACTAACCTTAGGGATCCTCTAGTC
+CAACTTCTCACTCCATACAGGGCTCCCCAAAGGAGCATTTCTAACAATGGGCATCTAGCT
+TATACTTGATTACCTCCCAGGATTGATAGCTCACCACTTCATAAAGGGCCCACTCCTTTG
+TATGATAGCTCTAATAGTTGGGTTGTTCTTTATACAGAGCTAAAACCTGTACCCTGGGAT
+TTCCAACTCTTGGCTTCATTCCCCTCCCACTTCACAGTCCTCTGAGTATCTGAAAACAGG
+AGTCTCAGATTCCCTCATGTTTTCTCTCTTGACTTAACATCCCTGGCTTCCTCCACGATT
+CCTCATTTGACCTGGTTTTCCAGAACTCTACTTTTTTGATTACAAGCCAGAATGGCCAAG
+GAGGGCCCTTTAAGCATTTTGAGTATAGTAAATTTTCCAACTGTGGCCTGCTCCGTGCAT
+AAGAGAGCGCGCACCATCTCTCTCTTCTGGCCATGATGCTTTCACTAATAGGGCCCAGGA
+TAACATTACATAATCATTTTGGAGGCTGCATCACATTGCTACGACATCAAGACTGTAGCC
+AACTAAAACTCCCAGTTCTTTTTTCACCAGTGCTCTTCCATGCTGTGCTTACATAAGTGA
+TTTTTTTAAAGCCTAAATGTAGAATTTTATATTTATTCATTTTAAATGTCATCTTGCTGG
+GTTTTGGCTCATTATTCTTGCTTTTCAAAAGCGTTTTGAATCCTGACTGTCTCACCCAAA
+ATATTACCATCCTTTCCAGCTTTATCTCTCTAGAAAAACAGACAGATTCCTATCCTCATC
+TCCTCTGTCCTGGTCCTCCCTGCCCGGGGCTTGTGTCCTATCTTCCTGTTGGGCTGGTGC
+ACCCCTCAGGGCAGGTCATTCTCCTTTCTGCCACAGGGTCTTGCCCCCAACCCATATCCA
+TCTCGGAACAAGGGGAAACAGAAGAGTGAACATTAGCTAACCGGAGAGTGGCTGTCAACT
+CGGAAAGCCTGGCGCTGAGACATTGGTATTCCTCGGTGAAGTTGGAATAGAAAGTATCCA
+GTGGGAGAGAAGAGAAAGAGGCATAAGGGACACTGGGGAGAGAAGGGCTTCGAGGAGGAG
+AGGGCAGGGCTGGGGGTACAAAGAGAAAAGAGGGCCGGGCAGCAACTTCCCTGGAGGGTG
+AGGGGCCTGCCTGAGATACTGGCAGAAAAGGAGTGACAGCTGGAGAGAGAGGAAAGGGAA
+CGGGGTGAGTGGGGGAAGAGGAAAGAGTCGGGGTGAAGCAGGAATGGGAACAGTGATGCG
+GAGCTGGGGTTGGGGTGTGAAGCGGAGTCTCAGCCTAAAGGGGCCAGGGAAAGGATCTGA
+AGGCACCAGCTTGTCAGCGAAGCCGGCAGGGAGCATCAGCAGCTCTGGCGTCAGAGGAGG
+GACTGCGGCGCCCCCTAGTGGGCCTTTGGTCCTCCACCCGCAGTTTCCTCATCCTCATCT
+ATCCTCTAGATCTCAAGTCCTAAGTGACCTCCTTTGCGACTCCTGTAACACTGGTCTTTT
+TTCACCTGCTCCCAAAGCCTTCACTGTGTAGGTCTGGGTTGGACTTATTGACATCACAGA
+TGAGTTGCCAGTTCCTTTTGGAAAAGAGCTAGAAAAGGTAACATCATCTATATCCCTTGT
+CAGTGGAAATGGATGAATTGGGAAACCGAGGTACAAGTGAGACTAGGTATTAAACTCCCC
+TCAAGATCCGGCCTAGAAATAACTCAGGCAATGGAAAACCTTCACCGTCAAATTCCAGGG
+GCCTGAACCTGACTGCAGTGAGAAAACCCCTGAACCTAACCCCTCCTTCCAACTCGGGAC
+CACTAAACATTGAAGCCACATTGCCTGTGTCCAATCCAGGCATGGCCACTTATTGGCTTA
+GGAAAGTGGACAACTTCCTTAATTTCTCATTGGAGCAATCTCATCTCTAAAAAGTGAGCA
+CAATAACAAGAAGTTACCTTCAGAGGGTTCTTGTTAGGATTAAATCGACTCAGAGGTAGA
+AAGTGCTTTGGAACTGCGCATGACACAGGGTGAATACTGGGCGAGCACTAGCTATTATTA
+CTGTCCCTATTCCAAGTGACACATTCCTCTGGGCCCAGGGCCCCACGCCAGTGCCATTTC
+GGAGAATTCCTCCAAGCTTTCTCTTGGCCTTCTGCGGCGTTGCTCGGTTCCACTCCCGCC
+TCCGACCCCTGTCTCACCTCACTGTCTCATCCTCCACCAGCCCCTCCATCCATCCTCCCC
+AGCTCCCCTGGCCCCTAGTCTCGCCCCTGCCTTCCTCGGTCCCGCCCACCCTCTGCCAAC
+CCCGCTCCCCTCCCCAGGCCTGCTCGCACCCGTGGCTTCCACCATGCCCTCGAGGATAAC
+GACGATCTCGAAGTCGTCCCTCTCGAGGGCACGGCGCGACGCCTCCCAGAAGGGGCTGGC
+GGCGTCGATCTCGTGGCTGATAACCAGCGGCGAGACGAGGAAGAGGCGGTCGTCTCCCGT
+GTCGAAGCCCACGCTGAGGTCGGTCTGGTGCAGCGGGATGAACTCGCCCTCCAGCGTCTG
+GCGCGAGCGGATGAGCTTGGCGCGGATGGAGGCCTCCACTATGTGTGAGGAGCGCAAGTC
+GCCCACGCGGAACATGAGGCAGAGGCGCCCGTCGCGCAGCGACACCACGGCGTGCGAGGA
+GAAGACGAGCGTGGCTGCGCGCTTGTTGGGCTGCGAGATCTTGACGAACATGCAGCCCAC
+CATGAAGGCGTTCACCATGGAGCCCAGGATGGCCTGCAGCAGCAGCAGCACGATGCCCTC
+GGGGCACTGGTCGGTGATGACGCGGTGCCCGTAGCCGATGGTGGTCTCGGTCTCGATGGA
+GAAGAGGAAGGCGGCCACGAAGCCGTTGAGGTTGTTGACGCACGGCGTCCACGCGGTGTC
+CTCCAGGTGCTCCAGGTCGCCGCGGCCGTAGGCGATCAGCCACCAGATGGCGCCGAAGAA
+GAGCCAGGTGAGCGCGTAGGCCAGGACGAAGAACAACAGGCTGAGGCGCCACTGCAGGTC
+CACCAGCGTGGTGAACAGGTCCGTCAGGTAGCGGTATGTCTCGCGCACGTTGCCCTGCTG
+CACGTTGCACCGGCCATCCTTCTCCACGTAGCGCTGGCGGCCGCGGCGCCGCGGCGGCTC
+CTCCTGCCCGGGCGAGAAGGCCGCGTTCTCCTGCGCCATGGCGGCCGCGTCAGGGCGAGC
+GCTGCGGGCGCCTGGAGTGCGGCGGGGGCCCGAGCTGCTGCCACCTGCCGCGCGTCGCTG
+GGCGCCTAGGCGGCGTCGGGGGTCCTGGAGGGGCTTAATAAAGGTTTGCCGAATGAGTGG
+CCCCTCGGGAGATGGGGAGGCACGGGGCATCAGGGCCAGCACCGAGCCCTGGGGGGCCCT
+GGGCGAGTCCCTTCGTGTTGCCTGGCCTCTATTTCTGGGAATAACACATTCAGCCCTAAC
+TGCCCCCCAGGCTCAGAAAGTTGTGAGGGTCAAATGGAATGATGTACCTGAACTGGATAG
+AACTGTGCAAGTTCCACGATGATTTTCAGGGGAAGGAGGTACCAGATATTTTGGTGCCAT
+CACCTCCAGCTGGGGTGTGCAGCACCTCAGGGGCAGCCCCGGGGCCAAAGACCCAGGCCC
+AGATCCCCCTAGTGATTTCCTGAAATCAATAGACTGGGGGCTTTTTGGGGAGGGGGGCAG
+GCGGGTAATGGAGCCCTCCCTGCCCTAAGGACTTCCACCATCCCTTCCCCAGCTGTAGCC
+AGTGCAGCCACCAATGGCTCTCCTCGCAGGCTGTTTTGAAGTTCACCCCAAGACTGATAC
+ACAGTCTTTCTCATTCTTCCCTCCTTGGACCTCACTCACTCCGCTCCATTCCCTCCCCTC
+AACATGCTTACAAACGAGAAAGAAATGAAAGCTGTGGAAAGTCAGTGGAAACCCTCCAGT
+GCACAAGGGTGGAACATTAGGAAATTCCCACTAGAAATCCTCCCTGAGACCTACCCACGA
+TCACTGCTGCTCTTGGGCCAGCCTCATTTCCCTTGGGTTTTTTCTCCTAGTGGAGACAGC
+CTGCAAATGGGGAGGGGAACTGGAAGATCCCAACACAGAAGCCTGGGAAAGGAAGCCCAG
+AGTTATAAAGAGTTAGGCTCGTATCCCTCTGGGATCCTATTCTTCATGAGAGACATGGAA
+TCAGATCAGGTGAAGCACTAAGCACCCACATTTATTTCCCCACTCCCTCTAGTTATATTT
+TCCCCCAGCCTCTTACTTCCCATGGACTATCCTAACACCAGCCCCGCCCCCACCTGCCAT
+ATGCCCCAAAGCCCACATTCCAAGCGAAGCCCACCTGATTTTTCTTCACATGCTTGCCTC
+TGAGGAGCTCTCTGAGCTGCAATCTGCTCTGAGAGAGGTCCAGAGCCCTTCCCCAAAGCA
+CCCAGGGTCCTCTAGGAGATATGGACCATACAGGGAAAGGTTTAGAATAGGATTTGGGAC
+TCTTCCACCTTCCCTTCTCTTTCCTGGGAATGATCCTCTCCTCCCACTCCACGGAAACGT
+GGGCTGGCTAGTGGGTGGGGGATGGAGGAGTGTGCCATAGGAACCACAGATGGCCTGGAT
+GGTTGACCAGGTCCTGGGGAGAAACTGCCTGGGTATGTGACTCTAAACCTAGCACTCCAG
+CATCTTATGTTCTGGGGTGAGGTCCTCTGCAGTCAGCACTCTGGAGAGGGTGTGTGTGTG
+TGTGTTTGTGTGTCTTGGGGTAGAGAAGGTAGGAAGAGAAATCAAAATGAAATGGCTTTG
+AGCTTAGGGGCACCCCCTGCATTTATTGTCCATGGAGTCAGGGGACACAGGGTCCAAGAG
+CCCCATGGCAATGGAAGATTAGGGTTTCCCCTGCCCCCAACAATGGGCTTCCTTTTCCTT
+CCCTGAAGCTCAGATGTCCTCAGGAAGAGTAAGCATTGACTGGCTCAGAGACCCCAGGTC
+ATGCAGAAAACGACCCTGCTCCCATTATCCTAGCTTTAGTCCCAGGTGGGCTTCCAGATG
+TTCTCTCAGGGTCTTAGGTAATCTGACCCCCAGTCGGCTTGCCTAGACAGATCTCTCTGC
+CCAGGGATGGGGAAGGAACCCCGATGTCGGATCACCCCCAGAACTCTCTCACTCTCCATT
+GCCCCAGGGATCTAGGTAAACCTTATTTTGAATGTGTCCCACATCCTGTCTTCCTGAGTG
+GGGGACTGCACTTTTCTCAGGGGAAGGAAAAGTGTGAAATTGTTACCTCTATCTCAAGAT
+CAAGGACTTGATTTCTCCATCTTCCCTCCAAGAATTTCAGTTCCTCCAACCTACTCCTCA
+TTTCTTGTGCATGCTATGACCCCTTTGCAGAAGATTGCAATACCACTTTTCCTGGGCCCC
+TCTTCCTCCCTCTAGAGAGCTGGGGAGTAAGGGGGCTCATTTTTTGGAAGGAGAGACCTG
+GTGAAAACTCCCCCAAGAACCAGCTCCTCCCAAACCCCCACTTGCCTTGGAAATAGGGGC
+CCAAACCCCAAGTCCCCCGCAACTCAACTCACAGCACGTGGAGAGGAGAGTAGCCGGCAG
+CAGAGACCCCTGGGGGGTGGCGTCCATGCCCCTGACACCGCCCCCCCCACGGGCCCCCCG
+AAGGGTGGGAGCCGCAGACACCGCTGTTTCTCCTAAATGTAGCGGCAGCTCTGACTAGGA
+CTTGCTATCTGTGTCATCCCTACTTCCCAGGCTCCCCTGCTCCTCTCTTGTTTCCACGGC
+AACCAGTCTGGCGACAGCTCCAGAGCATCCCCCTCCCCTCCCCAGTCCTGGGTACCACTA
+GGGGTTGATCCAAACCACACGCCATAGCTATTTTTAGCCAAAGTCCCTCCTCCTGGACAA
+CTCAGAGCTTGGCTCCTCACCCCCTCATAGGAAAAAAAAAAAAAGAAGAAGAAAGAAAAA
+CAAAGCTGGCACGCTGGCTCTTTCCTAAGAAGCTCCTTCCTGGGGGAAAATACAAGCCCT
+CATGTATTTGAGAATAAGGTGCTAGGTTCCCTGGGGTTTCAGAGGGCCATCTGGTAGCTG
+GCAGCTCACCACCTTCAACATGGGTCTCTGCGTGCAGGTGCTGCTCTCAGGTCAGGCTGA
+CCATCATGGTCTAGTTTTGGAGAGGCCATGGATCCAGAAGGCGGTAGCTAGAAGGAAAAT
+AGATCTACTAAAGGAAGGGAGGCAGGGTCAGGGGTTCTAATTATTGCCACTGATTTGCAC
+TGTGTCCCATAGGAGTCACTTCACTTCCCTGAGCCACAGTTATTCCATCTGTACCAGGGT
+GTGGACAAGAGCAAATATCCCCTTTCTGCCTGCTTCACAGAGTCAGCAGATCAAACGAAG
+AAAAAACAAACAAACAAAAACCAAAAAAACCACAAAGGGCTTTGAAATGTGTTTTATTCA
+TAGCAAAAACCCTAGAAACAACCCAAATGTCCACCAGAGAGAAAAGCCTAGGTTAGGGCT
+TACTAAGTATTCACTCGGCAAACTTCTGCGGAGCCCAAGTGCCAGGCAGTGTGTTAGATG
+GGGTGGTTACAGAATGGGTGAGTCCCTGCCCACAAGGAGCTCATTAACCCAGGAGGGATA
+AAGACACACAAACACACAGTGGCTGCTGCTACTAATTCACTGAGCATGGGATATGAACGA
+GACACATGCCTCATGTCATCCCCATGGCAACTCTATAAGACAGGTGTTATTTTTCCCAGT
+TATAGATGAGGAAACTGAAACAGAGACATGTTAAGTAATTTATCGAATGTCACAGAGTTG
+GGCAGAGACAGGAGCCAAACTCCCACATCTGTATCACCTCATGCTTTGTAAATTGTTTTA
+AAAGAAGACTTCTTAAAGGGTAGTTTTAGATTCAGAGCAAAACTGAGAGGAAAGTTATAG
+AGATTTTCCGTATGCCCCCTGTCCCCACACATCCACCAGAATGGCACATTTGTTACAAGT
+GATGAAGCTACATTGACATCATAATCGCCCAAAGTCCATAGCTGACATTAGGGTTCACTT
+TTGGTGTTGTACACTCTGTGGGTTTGGACAAATATATAATGATGTGTATCCACCATTATA
+GTCTCATTGCTGTAAAAGTCCTCTATGCTCTGCCTAGTCAATCCCCCTTCCCTAACCCCT
+GGCAACAACTGCTCTTTTTCCTGTTTCCATAATTTTCCTTTTCCAGAATGTCATGTCGTT
+GGAATTGTACAATATGTAGCCATTTCAAATTGGCTTCTTTCACTTAGTAATGTAAATTTA
+AGGTTCCTCCATGTCTTTTCATGGCTTTGTAGTTCATTTCTTTTTGGCACTGAATAATAT
+TCCATTTCAGGATGGCCATGGTTTATTTATCTATTTACCTACTGAAGGACATCTCGGTTG
+CTTTCAAGATTTGGCCATGATGAATACAGCTGCTATAAACATCCATCGGCAGATTTTCAT
+GTGGACGTAAGTTTTCATCTCCTTTGGGTAGATACCAAGGAGCATGATTCCTGGATCGTA
+TGGTAAGAGTATGTTTAGTTTTTTAGGAAACTGCCACACTTTCTTCCAAAGTAGCTGTAC
+CATTTTGCATTCCCATCAGCCACGAATTCCTGTTCCTGTTGCTCCACATTCCTGCCAGCA
+TTTGGTGTTGTCAGCCTTAGCCTAGTGTGCCTCAGTAATGTAATATCTGTGGAATATTAA
+TATCATGGGTTACTGTAAAGCCATAATAATTGCTGTGTTTAAATATGTGAATGTGTATAT
+AACACACTGTTACATAAAAAAGCAGAACCTAAAAATATTCTTAGAAATTGATCATGACTA
+TATTGAAATAGCTTCAGGAACACTGATAAAAATCAGAACAAAATTTGGAGAAACAGAAAT
+ATCTTATTGTTTATTTTTCCCCAGGGAAATAATGAAGAGAAGAAAGTGGTTTACAAACGC
+AAAGTGTTGTTGCTGTTGTTATCAGTATTATTATATCCCAGATATGGATCTATTCTTATC
+CAGATCCTTTAAATTTGGATCAAGAAAATATGAGCTAGAGGAGCAGGAAGACATAGAGAT
+GGACACGGAGGGCTAGAAGCTCCTGGAATGACCTGGCAAGGTGGGAAATTCGGTACTGGA
+TAATATAAGCTCAGGAGGCAATGACTTGGGTTCAAATCCCACCTCTGTCCACTACTAGCT
+GGGTGAACCTGAGCAATTATTTAACCCGTTAAACCTCAGATTCTTCATCTAAAAAAAAGA
+AAACCATATTATTCATCTCTTAGAGGGTTGTCATGAAGACTACATAAGGCAATGCCTGAC
+ACATTGTTAGTACTCAGTAAATGCTAGCTACTGTCTTCCTCTTCCTCCTCCTCATTGTCT
+TCTTCATGGTCATCATCATTGTCACTGTCATCACCATCATCTCAGGGATGTCCTGAAAGC
+CCCACCCTGCCTGTGCCACCTTCCTCTGTTCTGGCACCACCCTGTGAGGCCTGCAGGGGG
+CGCCCCCGGTATACTGCCCTCTCCTCAGCTCTCTTGCTGGTGATTCTTGTTGTTCCAAGT
+GAAAATATCGAGTTTTCAGCCCGAGATTGGCATCAAGAGTGCTGTTAAAGCCCCAGTCAA
+CATCCCCGTCTCCTGGGCCTTGGTCCTGTCACCCGAGCTTTGGGACTGCTAGCCCTGCCC
+TTCTCTGTCCTGCCAAGCAGCTGCTCACATTGGTTCCACTGTGCCAGGAGCTCAGAGAGA
+AGAAACCAAACTCCTGGGCAAGAACACATTTGCATAGAGCTGGCCCTATTTGCTGGAGGG
+GGCATCACTGGCAGGGCCCTCCACAAGGCAATTTCAGCCAGCCCCTCGCCTCAGTCCAGG
+TGCCCCTGAACCTAGCCTGGCTGACAAGGAAGAAGACTACCACTCCCTCACCTGATTCAC
+TCATCCTGCAGGTCCTAGACTTTACCTGTTATCTAACCTCAGTTCCTTATTTATCACTGA
+GCTGAACTGAGGCAGGAGAAAAAAAGTTTTCCCAGGCACCAGGACACAAAAGCATAGACA
+AGATTCTAGCTTTTGGCTTGGCACGGTGGCTCGAACCTATAATCCCAGCACTTTGGGAGG
+CTGAGGTACACGGATCGCTTGAACCCAGGAGATGGAGACCAGCCTGGGCAACATGGCGAA
+ACCCTGTCTCTACAAAAAATACAAAATTAGATGGGTGTAGTGGTATGTGCATGTGGTCCC
+AGCCACCTTGGAGGTTGAGGTGGGAGGATCACCTGAGCCTGGGAGCTCAAGGCTGCAGTG
+AGCCGTGATTGCGCCACTGCACTCCAGCCTGGACAACAGAGTGAGACCTTGTCTCAAAAA
+AAAAAAAAAAAAAAAAAAAAAAAAAGGAACCGTGGTGCAAAATAGGAAAAGAGCTTAAAA
+AGAGCCTACTGTGGCCTGTCACTTGGAGAACAGGGAACAGGTGATACTTTTGGGAGAAAA
+TAAAAGGTTCCCTTCCTCCAGCAGACACAGCTCTGAGCTGGGTGCAGGAATTGGCAAGCT
+GAATGCAGTTCAGTCCTCAACTTGCCCCATCAGCAGGTGCCTTTGTGGCAGCCCTGAGAT
+CAGCCAAACTGGGGAGGCTTCCCTGGACAAGATCATGAGACATCTTGTGCAAAATCTGGC
+AAGTGGAAGAGGAAGCATGATGGGGAAGACAGCAGACAGTTGAGGATATGAGTTTAGTGG
+AGCAGGGAGGCCAATCTGGTTGATTGCAAAAATTTGTATAAATTCAAGGGGGATGGGGCT
+CTAGAAAACTCAGAGCAGAGGAAAGATATGGTTGAAGGAGTAGATGTGGCAAATGACTGA
+AGCTGAAGATTAGAGTGAAATGGGGAGAAGCAGAGCACAGCTCCAGGCCAGACGGAGGCA
+ATTTCAGCCAGCCCCTTGCCTCAGGCTGGGTTCCTGAGGCTGAGGAGCTTGCTGCCTTTC
+TGGCTACTTTCAGCAGAGCAGGAGGATCAGAAGCCTTCACACAGAGTTCTATCTTGCTTT
+ATAGGACTGGGTAGAGATAGGCTAAGATTTAAAAGTTATATGACGGTTAGAGGTGGTACA
+GGATTAGCCCTTACTTTACAGGTAGGAATAGGTCCCAAGAACATAGTCAGGACTAAGACT
+CAGGCCACCCGACTCGAGATTCCTCGTGTTTCTTTATTCCCAGTTGCCTCCCTCCTATCA
+CAGTGCATTGGATGTTAATGGAATTTCTATCAATTGGACCCAACTGGAAAGGTACTAGAA
+GTCCCTAGTTAGAGAAATTTGTTTTTTTCTTTTTTCCTTTTTTTTTATTCCTTTTTGAGA
+CAGAGTTCCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCGCGATCTCCGCTCACTGCAAC
+CCCCACCGCCCGGATTCAAGTGATTCTCTTGCCTCGCCCTCCTGAGTAGCTGAGATTACA
+GGCACACACCACTATGCCTGGCTAATTTTTTTTTTTTTTTTGTATTTTCAGTAGAGATGG
+GGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTTAGGTGATCCACCCACCT
+TGGCTTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCCATGTTTTTCTT
+TTTTAAAGGAAAGAATTCTTTAGTAGGTAACTAACTATTCCCTAAATTATGTCTTATAAA
+TGTGGGTTTGTGTATATTGCATTTATTTAAAGGGGGTGATCTACTTACATAATGATTCGT
+TAGTGACTAGGGGAATGTAGCTCAGTCCTTCTCTTGGATTTCTCCTCTGCGGCCTGAGTC
+ATTCAGCTATGAGATGCTCCACTACTAATGGAATTTGTTCAGCACTGTGGGATGTGAGCA
+CCAAAACGCCACTTGGAATCCTGAATCCTGTGGTGAGCCATGCCAGTGTCTCTGTTCTTT
+CTAGCCACTTCCCCTGACTTGTTAAATACTAACTATTCTGAGTCTCTATCATGCCTCCCA
+ACCCAAAGCTTTCCTGATTCCCTCTGTCCACATCTGCTTTGCCCAGCGCAAGCCCCTCAG
+ACTGCTGCCCACTTGTGGATTCTTTCTAAAACAGGTGACACACCCCAACTCTCCTCCTCC
+TCTGCTCTGAGGTAGCCTCTGCCCCATTATAAATGAAGTAAAAGTAGAGATGAGGCCAAA
+GCTGGAAGCAGTGAGTCTGAAAGGAGAAATGGAGGAGAATAAGGAAGACGCAAGTTGGGA
+AACTGAAAGAAGAGAGAGGGGTCCTCAAAAAGAGAAGTTTCCTCTGTCATGTTTCATGGT
+GGACATTGGATGTTGTAATTCTGGGGAATTTTTCATACCAACATGTACTCAGGGGTCATA
+GCCTCTGGTTGGCAAGATTGTGTTTAAAACGTGCAAATTCCCTCTGGCATTATTCTGAGA
+CAAGCTTTGGGGTCAAATATATGTGGATTCAAATCTCAAAACACTTTCCAGCTATGTTAC
+CATAGGCAAACCACTTAACTCCTCAGAACTTCAGTTTCCCCATCTTCTGATCTATCTGAT
+GTAGTAAACACTCACAAATATTAGTGTCCTATGCCCCTTATTACTGATTCCCTACTGGGC
+TCCTAGAGACTGTCCTAGGACTTCTCTTTTGCAAACAAATTTAAAAATGAAAACCTGACA
+TTTGATATGTACAAAAGAATATAGATGGTGGACATATGTCTATATGTTATAAAGCATAAT
+TTAAGAACAAAAACTCAATTTAAATCACTCTTTTAAATTCAGAACATTACAGTGCTGTTG
+AAGCTACCTGTGTACTTTTCCACAAGCTCCCACCCCTGCTGTTCAAGGACTTATAAAGTC
+TTTCTAACTGCCCAGTCCCACCCGACTCTATCCTCACTCTGATTGTTAGTAGAGGACACT
+TCTGTAGATCACTTGTAGTTTTTTAGTCTGATTGACCAGAAGTAACATAGAATTAAGGAA
+GTCATGAGCACTTCCAGCCTCAGAGAGAAAGATGCCGAGTTCAGTCTGAGGGCCTGTCAC
+GTTTTTTATCTTCTTTTTTTTTTTTCTAGGTCTTCTGTGGTGAAGACCTCCTTCACTGTG
+ACCTGTAAGCACGAGGCAACTGTTTGGGGTTTTAAAACAAGGGCACAGGGACCGACATGA
+TGAAAACAGGTTGCCTCTTCCTTTGAGACTGGAGAGAAAGGTGAAGGATGGGTCAGGTCC
+TGATCTTTCTGATTCTGATAGTGGTCATAGGAATGAAAAAGCACATTGCAAACAGATGAA
+AGAAAGGGAGACTTGACAGCATTTGTTGGCTTATTTGACATGTCCAGGAATGGGCCCAAG
+GCAACGGTGAAGAGTAAAGGTCACTTGTAGTTTTTTATTCTAATTGACCAGGAGTAACAC
+AGAACTAAGGAAGTCACGAGCACTTCCACCTTTGGGGAGGAAGGTACTGAGTTCAGTTTG
+GGGCCTGTCAAGTCAAGATGGAAATCCCCTGCAAGCAACTAGAAATATGGAGCTCCAAAA
+AGAGTGTAGGATTGAAGATAGAGATTTGGGCATCACTCAGGGACAATCGTTGGAGCTATG
+TAAATGACAAAGGCTCAAGGGGCAAGACACAGAAAGAGAACATAAGGCTGAACCTTGAGT
+GTTAAAAGAAGTAGCTGTAGCTTTGGGTGTATAGGGGCAGTGGAAGAAGATAAGGCTGGA
+ATGGCACCAGGTCCAGGTTATAATAGGCCTTCTCTTCCAGGGAAAGGAACTCGGACTTTA
+CTTATAGATAGTTGGCAGTTACTGAAGGTTTTAAAACTTGGAAGTGGTACGGTCAGATGT
+GTGTTTCAGAGAGATGTACTTTACTGGTGCTTAATTACTTTCAAATAAAGAAATTCATAA
+TCCAAAGAATTATTAGAGTGGAGGAAGACTGGAGGCAGGGAGAGTAGTTAGAAGGCTGCT
+GCAGTAGCCCTGGTGTATTAGGCCATTCTTGCATTGCTATAATGAAATACCTGAGATAGG
+CCATGGTGGCTCAAGCCTGTAATCCAGCATTTTGGGAGGCTGAGGTGGGAGGACTGCTTG
+AGCTCACAAGTTCAAGACCATCCTGATCTACATAGCGATTCTTCGTCTCTACTGAAAGTT
+AAAAAAAAAAAAAAATAGCTGGGTGTAATGGTGAACACCTGTAGTTCCAGCTACGTTTGA
+GGTTGAGGCAGGAAGACTACTTCAGCACAGGAGTTCAGGGGTACAAGTGATCTGTGATCA
+CACCACTGCACTCCAGCTTAGGGGACAGAGTGAAACCCTGTCTCAAAAAAAAAAAAAAGA
+TTAAAAATTAAAAAAAAAAAACCCCAGAGAGTGGGTAATTTATAAAGAAAAAAGGTTTAA
+TTGGCACACAGTTCTGCAGGATTTACAGGAAGCATGGTGCTGACATCTGCTCAGCTTCTC
+AGGAGACCTCAGGAAGCTTTTACTTGTGGTGGAAGGCAAAGCAGGAGCAGGCACATCACA
+TGGCAAAAGCAGGAGCAAGAGAGAGTTGATGATGGGGGATGTTGCCATATACTTTTAAAC
+ACCCAGATCTCACGAGAACTCACTATCATGAAGATAGCACCAAGCCGCTGAGGGACCTGC
+CCCCATGATCAAAACACCTCCCACCAGGCCCATGTCCAGCATTGGCATTAAAATCCAGCA
+TGAAATTTGAGCAGGATCAAAGATCCAAACTATTATTACCTGGTGAGAGGTAATGGTGTC
+CTAGACTACAGCAATGATAGTGGAATAGAGAGGAAGGGGACAATATTGAGGCTATTTAGA
+ATACAGATGCATTGATTCAATAAGGGAGATGAGGTGAGGAAGATTGAAGAATCTAGGATG
+GCTGCCATGTTTCTGGTTGGGTGATGATAGTGTATCTACCATGGTAGAGAATAAATGGAG
+AGGAGCACTCTTGTGAGCAAGGTGACATGTTCGACTTTAAATATGTTGAGTTTGAAATGG
+ATATCCGGGTGGGCTGGACATATTGACACATGGGTGGTAGTTGAAGCTAAATGGATGGAT
+AACATTTTTGGAAAAATAATAAGTGCACAATAAATGTCTGTTAATGATCCATAAATCATT
+ACAGTCCCTTTGCTTTCTCTTCTGTGGTGTTCTTCATGTTTATTTTGGAGTTCCACCAGA
+GCCTCCAAGCTAAGCTTGTCCAACCCACCGCCTGTGGGCTGCATGCAGCCCAGGACAGCT
+TTGAATGAGGCCCAACACAAATTTGTAAACTTTCTTAAAACATCATGAGGGTTTTTTTTG
+TGATTTTTTTTTTTTAAAGCTCATCAGCTACTGTTAGTGTTAGTGTATTTTATGTGTGGC
+CCAAGACAATTCTTCTTCTTCCAATGTGGCCCAGGGAAACCAAAAAATTGGACACCCCTG
+CCTAAGTCCTCTCTGTCTGTGAATGTCAAGACTGTGGCCCCCTTAAGGATCTTTTGGATG
+TCCTATTTTTTGAAACTTAAAGGTCTTATACCAGTCTTGCAGGCTGGCCCTCAGACCACC
+TCACCACAGCCCCTCATCCATATTGACTTAGGAAGAGAGGGCATTCGGAGTGTGAAGAAG
+AAAAACTGGAAGACTGTTTCTTTTTCTTTTTATTTTGTGTTTTGTTTTGTTTTGTTTTTG
+GAGACAGAGTCTAGCTCTGTCCCCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTG
+CAACCTCTGCCTCCCAGGTTCAAGCGATTCACATGCCTCAGTCTCCCAAGTAGCTGGGAT
+TACAGGCGCGTGCCACCAGGCCCGGCTAATTTTTGTATTTTTAGTAGAGATGGGTTTTTG
+CCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCACCCACCTTGGCCTC
+CCAAAGTGCTGGGATTACAGGCATGAGCCACCTGGCCTGGCCTGTTTCTAGTACATTGTC
+TGGTTTCTATTTCTCTCCTCTCTATGAGTAGAGTTCTCAAAAGTGTAACCTATACTTGCT
+GACAGTTTCTCACTTTCTAGTGGTAACTTGTAATCTGGTTCCCACCTTGCTCACTCTTCT
+GAAATTGCTCTCAAATGTCACTAGAGACTTTGTCACTGCCAAACCCAATGCCTCTTCTCT
+CTGTACTTGTTGGCACTGTTAGAACCTCCTCCTTAACAGTCTCTCCTTCCTTAGCTCCAT
+GACATCTCACTTTCTTTGTCTCCTTCCTGCCTGTCTGGTGGATTCTCTTTCACTCTTTGG
+GGGTCCTTCTTCTTCCCATCCCCTAAACATAGGTGTTCCTCACTCTCTGTGCCAGGCTCT
+CATTTCTCTGTGCAGTCTTTGTTGGGATCTACCCTGTTCTCATAGCTTCAACTCACACTG
+CCCTGGCAGCAATGACCTCCTTTCTCTCTCTGGTTCTGACCCCTCTCCACAGCTTCCAGC
+TCTGAACTCATCACTTCCTCTTCCCCCACCTCAGTCTCCTCACCAGGTGTGATCCCCCCC
+AGGCTCCCCTATGCCCGTGAATGACCTCGCAGTTCTGATCACCTGACTTGAGACTCTAGT
+ATGATCTCTGAAGCTTCCCTCTTTTGCTCAACATGTCAAGTTGGCTTCAAACTTTTTTCA
+TTTTCTTTTGATTTTTCTGTACTCCAATTCATTTTCTGTGTTCCACTTTTCACCATTGCC
+ACGCTGATTTGGGCTGTCATCTCTTCATGCTTATATTATTGCAATTATTTTCTATCGTCT
+ACTTGCCTACTATATCCACTTGGTTCAAACTATTTTGTCACCTCCCGATTAATTTTCCTA
+AAATGCCATAGCTTTCTCCTACTCCATCCTTCCCAAGAACCTTCAATTGCTCTCCGTAGC
+CTACAGAAACTCTGGCCTGACCACAAATCAGCACAACTCTGTGTTTCTTCCGGATGTTCC
+CCTTAACACAAACACTGCCTATGGTGCCTAACCTTTAGTCCCGTCCTTCCAGCTTCTCCA
+ATGAGGCTATTTGAATGGACATTTACTTATCTTATGGCTTTTAGTGGTTAAACTAAGCTA
+CATTCCTAGTTTCTTTCCTTTTTTTACCTGAATTTATAGCAATTCAATATGGTAACACTG
+GTTATTGATTAGCAGTTAAATGTTTATGATGATCAGAAATGAGGAAATAATTATATTCTT
+TTTAAATAAATGCTGGGGGTGGGGGTGAATTAAAACCTCTCAACACATAGATTCAGAAAA
+TTAAAGAAATCTTTGGTGATGAAAGGGTTGAGAACCATCACCCTGCACTCTAATCTAGTA
+AATGGGACAGTTTGATGCTGTTTCCCAAGTGAACTTGTATTTTCACGTCTCCCCAGGACT
+CTGCTACGTTGTTCCCTCTGCCTTAAATGCAACAGAGTCCTGGGGACCATCTCTCAGCAC
+CTGAATCTTCTGCTTCCTTTGGGACCCTAATCAGATGTTACCTCTTTCCTCCCACATCTG
+CTAGGAGCCCTCTGAGTAGAATGACCTCTCCCTTTTACAGCCTTCAGTACCTCCCGCACT
+TCGCTGAGGGTTCCGGCCTTATCTTTCCTTACAGAGAAAGCCGCTGCAGGTGGGCACCGT
+GTGCTGCGCGTCTCTCACCCAGCACCGCACCGGGGACTTAGGAGACGGTGAACAACACGG
+GAATTGTGCCCAAAGATACAGTCCTGTGGCTCAGGGAGCCCAGGCCTACTCCGCCCCAAT
+CTCACTTCCAGGCGCCGCCTTCGGCGCTGCCTGCAGGGCCACTGGGCCGGTCCTGAATTC
+ATTTCCACTTCGTTAACTCGGTCCCAGTCCCGCCCCCCGGCTCCAGCTCTCGTAACCCAT
+CGCTGACAGGACCGCCTCCCTTTTCTTCCTTCTCCCTTCCCCTGTCACCCTGCCAGCTCA
+GCTCCTATCGTTTCGCATATCGTCTCTTCATCTCTCTTTCTCCCCGGGTCTGTCTGGGTC
+TCTGTCTTTGGATATTTCTCTCTGCGCCTGCAGCTGTCTGGGTCTCCGCTTCCTTGCCCC
+GCACGCCCCGCCCCCTGCCCAAGACCCTCCAATCTCGGCGGCTCGAGCCTGGCGCTCCAC
+TCTCCCATTGGTCTGTTCGGAGCCCCCAGCCCCCGGCTGACCGGCCCCCCTCCGGGCCCC
+TTCATTCACACAAAGTATGGCTCCGNCGCTGCGGATGGAGGGGGCGGCCCGGCCCGGCCC
+AGCTCTGCCCTCGGCCGGCCCGACTCCGGCCCCGGCCCCCGGACAAGCCCTTATCTGATC
+ACAGCTCCGGGTTTAAGAGTCCTGGCCCGGCCCGTCGCACAGCTCTGCTCCTAACTCCTG
+CCCGCCCCGTCCGTCCATCTGTCCCGCTGCCCCGCGGCCCATCCAAGGGGCCACTCCACC
+TCGGACCCAAGGTAAGACCCTGGCCCTGATGAAGGAGTTCTGCCGCCCCGGCCGGGTCCT
+GTCCCCCTAACTCTCCAGAGACATCGGGGTCCCAGATCCTCAGTCCCTACCCTGCCTGGC
+CTCAGCGGTACCACCGTCTTGTTCCCCTGCCTCCCGCCTTGGGCAGTCCCAAATGGAGCC
+TCCTGGGAAGGGCTCCCCACGAGGAAACTAGCCAGGCTGGTCACCTCAGCCCTTTCATCT
+AAGGAAGGGGGATCCTCTAGAGTCGACGTGCAGGCAGCAAGCTGTCTGGGGGAAATGAGA
+AGAGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCCGGGTAAGGGA
+TATCACTGGGGTCTCTGCATTAGAGCCACAGGGGGGCTGTGAACCATAACCCTGCATACC
+CGTAACTATCCAAGTTGCTACACCATNACTCCATTTGATGTAAGGAAGGGGACTTTGCTC
+TTCCCTGCTACAACTGGGCTGACCAGGCTCTGAGTGCGGGGTGTGTAGATAATACAGTGA
+CAACCACACATAACCAGGCACCCCATACACAGGCACAACAGACAGTGCGCCCTCCTCCCC
+ATATGGACAGGGTACTCCCATCCCCAGCCCCTCCCCCACTCCCCATACAGTGACAATCAC
+ATAGTGACACCCATGATCGCTAGTTCCCACATGGGGAGAAAAACAGATACTGGCAGCTTA
+AACCGGGGAGCCACCAAAGCCTGGTGTGCTCCAGACCCCTGCCCACCCAATCTCTTCCTT
+CTATTCTGAGTGGCTCCCATTTTAAGACATCCCTCCTCATGGCCCCTCACCCTCACCCCA
+GCTGTTTCGCACTGGGATTTCCAGAAGGAAAGGAGCAGGGCCACTTCTGCTAGAGTGAGA
+AGGGCAGATTGGGTTCTATAAACTCACCCTCCAGATGGAGACGCCCCTTCCCATTCCATT
+CCTCCTCTCATCAGAACTAACCCAGGGCACCTGTGTTAGGGAACAGATGAAGGATGGAAC
+TTCCTGATTTGGGGGAGGAGAGCTGAACCCAGGAATGGGTAGAAGGACAACACTGGGAAA
+AAGTGAGGGTGGGTTCTGGTGTCTTGAAGTTTAATTCCAGTGAGTGCCCCAGGCAGGGTG
+AGGCAGATGGGGAAGAAGCTGGTGCAGATTTGGACCCCAAAGAGGCCAGTGCTTAAAGCC
+TGCTTCTGTTGCTTACCAATACAAATTCTGACTAAAGATGTCTGGCCAGTCTGCTCTTGC
+TGTGGTGTTTGAATTTTTTTTCTCCCCACTGGGACTGCTTTAGGCTGCCTGGGTGGGAGG
+GAGATAATGACAGGAGATTTAGGGCCCCCATGTGAGAGATGGCCTTGTTTAAGGGTGCTG
+AGGAGGGCCTGGAGTGGTTGTCATGAAATTAGCCCCAGGAGGGGTGGGCAGCCACCAGTG
+AGTTCAATTTAGTTTCATCCTGAAACTCTCTCCCTAATCTCCAAGGATTTTTGAACATAT
+TCCTGGCATGATGGTGGGTACCATCCTGTGCCAAGTGGTAGGGCTTGTAAATTTCTCTGG
+GGGTAGACTGGGTATAGGGGACCAAGTTTGGAAGGAACCAGGCATCCTGACTCATAGCTG
+GTTCTTCAGGGAGGAAGAAACGAGGGAGGAGGGGGATGGAAGGCATCATGCCAGGGAGAG
+GGTCCAGGGCTGGATTGTGGAGGCAATAGGCACGGCCCAGGCAGTGCCAACTCCTTTCCC
+CGCTGCCTTCCCCCAGTCCTTTGGTGGTGGTGGCAAGGGAGGAAGGGAGGGAGAGGGTGT
+TGGGGGGTGATCTGGGCTGGTCCAGCCTCCCAAAGCCCTTGCCAACCTGGGTCCTGAAAG
+GAGCTGCTGTTTTCTGAGCCATCCCCCCACCCCCCACTCAGAGCCTGAGCAGCCAAGTTG
+TCTCAGCGGATATCAAATGGCAATGACTCCACCGATGGAGGGTGGGTGGGGAGTGGGAGT
+GGCAGGGGTAGCAGGGTTATGGTTCCAGCCCCTCTGTGGCTCTAATGCAGAACCCCTCTG
+CTATCCCTTACCCTGAGGAAAAAATAAATCCATCTTCCCCAACATCTGACCCACATCCAG
+TCTATGGGTGCATATGTTTGTGTGAGGGGATTAATGATCTGCCCGCTGGTATCCCCTACC
+TCCTCCGTGTGCCCCATTGAACCACATTCTAGCTCCCTCCCACCTGCATCACTGTGCATA
+AGCTCATTTACAGCTCCAGCTTGGTTGGCTTGAGCTACAAGGAAGTGACTGGCAGAGGAC
+CCGGGGACCTTCCTTCCCTTGACCTTCCTTGGGACCAGAGAGGCTGGGACACTTGGATCA
+CGGTGGGCTCAGATGAAGTCCTCTGGAGTCATTCTGTGATTGGCAGGATAGGGGGTTTAA
+AGCTCTAGGGGGTGGGTCATGCAGAAAGATCTGTTTCCTCAAAGGATACTCCTGAGCCTC
+CTGGTAAAACTCTTGGTTTCTGCACACAAGATGGAAAGATAGGCCGAGCCTCCTGGCATT
+GGTGGGGAGGAAAAGAGACATTGGGCAATGGAGTCCCTAAGCTTTCCAGCCACAGGTTAT
+TTAGGACGGAAGAAAAAAACCTCCACTCTCCCTCCCCTCCTCGGCCTAGATTCTTCTCTT
+GCCTGACTGGTCAGTGCTACTGTCCAGGCTGAGGCCAAGCTGTCCAGGATAGCAGTGGGG
+CCCTGGAAGGGGGCAGAGAAGGAATCAGCCAAGGCACATCGTTGAGATGAAACTACAGAA
+AGATTGGAGAGGAAAGGGAGATTTAAAATCTAAGTGTTTTAGCACCCAATGGTGGGTGGT
+TGGCTGGGCAGCTCCTGGGAATGGGAGTTTGCCATGTCACTTCGGTCTCTCCAGCAGGCT
+GAGTTCAATGCTTTTCTTCCCTCATTTGAGTTTAGCCTACATGTTTGCCACCATTCTTTA
+GGGAGAGAGTGCATCTGGTGGCCCCTCTTCCTCCCCTCCCCCATCAGAACCTTGAAAGGG
+CAAAGAGAAAGGGATTGTGGGAAATGCCCTCCCGTGGGTGTTTGCCAGCTTGGAGCCTGG
+ATTTGAGGTGGTGGGGGAAGGGAGGGGCTGGCTCTGGAATGTGAGAGAGAAGTAAGAACA
+GAACTTTGTGGCATGGAGAAGGAGGTTGGAGAAGCAGGAGCTAGAAGAGGACTGAATCAG
+ACCTCAAGCCTCCAACAAGGTGCAGCACAGAAAGGGTATACATGGCCATTGTGCAACTTT
+TCCGATGGCACAACACCTGGGGAAAGGATTGTGGCTGCAGCTAGAAGGACTTAGGTTAGA
+TCTCAGGAAGCACATCCTAGAAAAGGAAAGACATTTGAAATGGAGACTGGGAAGGAAAGA
+GAGACCACTTTCTGGACCAAAAAAACCAACCAAAACAAAACAAAAACCACCAACAAAATC
+CCCAAACACAGAAGCCCATCCTTAAAGCTTCCGAGAGGAGTGATGGAGCTCAGAGTGCAG
+GGATTATTGTGACAACTGTCAGGGGCCTCCCCTGGGTGGCTGCTGCACCTCTCTCTTGCT
+GTTCCAGGGCTACTTTCCGCTCTGAGAGACAGCAGGCACTATGGTTAAGAGCATGGCCTT
+GGAGTCTGACTTTGTGGGATTCGGTCCTGGTTTACCACTTAGTAGCTAAGGAAGTCCCAT
+CCTCACAATAGGGCTCATAACTGCTGTGAGGATGCACAGTGACTACCGTAAGGGGCTTGG
+AATGGTCCTTAGTCTGTAAAAAGTGGAGTTGTGGGCCATTGCTTTTCTCCCACCACTCTA
+ATGTTTTCTACTGTGTCATCTTTCCTTGTTCCTGGGCTTTGTCCCAGCCCTTCTTTGTGG
+GTTTTGCTTTGTTTTGTTTTGAGATTCTGTCCTAGGTTTTTTCTTCCCACCCTTCTGGAG
+TTTGGGAAGGTAGAAGAATTATCAAGCAGTTTAAGATGCATTTCCTTCCTTGATTCCAAT
+AGGCTGGGCATCACTTGAGGTGGGGTCTCCACACCCACAGCCCCTGCTTTCCACAAGTTT
+TTGCCAAGCAGATGTCCTGTCTTCAGATTCAATAAATGAGAACTGAGTGGTTGAGCTTTC
+ACCAAATAGTCTCTTTCCCCATTCTCAGCAACTTCTCTTTCAGCCAAAGACTTCTGTCAG
+AGACTTCTTCTGTCGGTCAAGACAACCCCCACATCTCATTCCTGCTCCCATGGTCCTGCC
+TATCGCTCTTGTATTCTTCATCCTGCCTCTCCCTCTCCCGGCCCAGGTTTGCCCCCACAA
+CTTCTCTCAGGTTTATATGCCCTTCTCTTCTTGTGTCTCCTGATTGGCTTGTTCATCCCA
+TCAGCCTGGATGCCCACAGCAGCCTCCTTAATGACCTCCCTGACTCCAGCCCCCTCTCTC
+CTCGACTCCTTCCCTGTTACCCTATAATGAAATTATCACTGGAATTGTCATCATGAAGCA
+TCAGTTTTATCTTGCCCTTTCTCTACCCAGACACATCTGTTGGCATTGGATAAAAATTTC
+CCAATCACAGGCTTCAAGAGACTTTATGCCTTTCTCCCATCCCCTCCAGCCCCATATCTT
+GTGGAACTGCACTCTTCAGACCCTGATCCTGGGGAGCATATCCCAGGCAGTCTCTGAGCC
+ATGCTCCTCCCCACACTCTGGACCCTCACAAAGGCCATTCCAAGTCTCACCCCTCCCGCA
+GATCTCTATTCTCCTGTTTCCCTCTCTGCTCTGCCATCTTGCTTGTTACTTGCATCTGCA
+ACTTGGCCCAGGGTTCTCCATGCATGGCGATGTTGGAGGAGACTTGTCTCCCTCATTAGC
+CGGGGGTTTCCTCTAGGGTGAGGCCTGTACCTCCGTCAGATTGGCAAAGGCATCTCTTCT
+CTCTCTGGACCCTCTGGCACCCAGCAAAGGGCCTGCCTCCTAAGACACAGAGTAGCTACT
+GTCCAGTTTGACTCTAGGTAGGAAACATTTCTGATCGCTTATTCTTGGGGAAGGAGCCTG
+ACTTTGGAGGTTTATGTGAGCATCCTGCAGCAGGGTGTGTATAGGATCCTATTATCTAAT
+TCTATAGGAGATCTCAAGAGAAACCCCTCTCCTACTAACGACCCCATCCTGTAGACATGG
+CTTCACCCTAAGATTACAAGCCCTAGAACCCCCAATTTATAGTCTTCTAAGCATTGTTGC
+TCCACCCACCCCCCCAACACAAATGCAGGTATGTAGGCCCCAGAGAGCCTTTCTCCCCTC
+ACCAGGGCAGTCTGACTCCAGCCTGAGGCCGGGCTTGTCAGCAGCATCGTCTGGAATGAA
+AAAGGGGGATAAAGGTCAGAGTCTAGTCTGAGGCAGCCCCATTGTCCTGAGCATACCTCT
+GAGCAGGGGCTTGCAAGGTGGCAGACATGTGAGGTCACCTCCCAAGTGGGGTATAGGTCT
+GACCTCTTAGCACTGCATTCTCCCTGGACCAGCCTCCTCAGCTCCACTTCTGATCCCAGT
+TCCAACTGGTAGAGCCAGGCACACTCCTTCTCCACGTGTCTAAACCTCCCGGTTGACCCT
+GTGATAAAAAAGATATAGGCAACTGGTAAGAGGAGAGGGTGAGTGTATTAGAGTCTAGAG
+CACTGGTTTTTAAACTTTTTGACCTCAGGACCACTTTACATTCTAAAAAGTTACTGAAGA
+CTCCAAAGAGCTTTGGTTTGTGTGGGTTCTATCTATGAACATTTACTGTGTTAGAAATGA
+ACACTGGGACACATGAAATAATTATTAATTTTTGCATGTTAACAAAATGACGTATTTTTA
+ATGAAAAACAGCTGTATTAAAGTAAACAAAAAGTTAATAAGAAGAGTGACATTGTCTTAT
+ATTTTCACAAGTCTTTTTAAACATCTGGCTTAATCAAAGACAGTTGCTTTCTCATCTGCT
+TCTGCATTTAGTGTGTTGCAATATCATAGGTCATGTACCTTCTGGAAAACTCTACTGAAC
+ACTGTGAAATAATGAAAGTGAAAAGGGCAAATAATGTATTGTTAGCATTATTATGAAGAT
+AGGTTTGACTTTGTAGAGCCTCCGAAAGGGTCTCAGGAACCCTCCGAAAGGGTCCAGGGA
+CCATACTTTGAGAACTGCTAGTCTAGAGGAACCTTAGAAGACAGTTTCAAAAACCTGGGC
+CAATTTGAATATCCTCGTTGATTTCCTTCAGCTGCCATGGTCTTTTGAATGTTCTCACCA
+GGGAGGTAGTATGGGGTAGTGGTTCATCATGTTAAGTTCTGCCTCTGTTCTACTACTCAC
+TAGCTATGTGGCCATGGGCAAATTATTAATATCTCTGTGCTTCAGTTTTCTCATCTATAA
+AATGGGACTTTAAAAAGCACCCACCTTTAGAGTTGTTGAGAATTAAATGACATAATAGGA
+AAGGGCTCAGCGTAGAGCTCAATGATGTGAGCTCTCATTAAATTCCGGAGCCTTGCCCCT
+GAATCCACTGCGCATTGTCCCGAAGGCAGGGAGCTGCAGAGGAGATCCATCCAATGCTAC
+ACATAGGAGCTCGGCCCAGAGAGGGAAAGTGACCTGTGCAAGTTCCCAGAGAGAGTTGAC
+AGGGTTGTTGGGCCCCGAACCCAGGAGTTCTAAGGCCTAGCTCCATCTCTGCTCTCCCCG
+CCAGAATTTTTTTGTGGAGAGTGGAGGAGCAAGGTCCTAAGACGGGGAAAGAAGCAGACT
+TTACCTTTCTTTTCCCCAGCAATCTCAGGTGAGCCCAGGGCCAGGGCAGTACAGTGGGAA
+GCCTTGAGGGCAGACAGCAGGGAGAACTTCCGATACTGACAGCAACTGAAAAGAGGAGAA
+AGAAAGCTAGGAAGACATTTTCAAGCCCAGAGATTGTTCTGTCTTGCCGTGTGGGGAGCA
+GTTCTGCAAAGTGTCCCCGGCCTTCTCTTCCTGGGGCTCGTAGGAAGTCTGTGATTGGGC
+AACCCTGCTTCTCATGAGGTGCTCAGTCCCGCTCACTGCCCTCCTCTCAGCTCTCCCACC
+ACCCAAGGCCCAGGAGCCCCTCCCCAGATCATGCCTTCTGGCCCTGCCCTTTGTGCTGAG
+GCACCACAAGGCACAACATGGTGCCTTGGAAAAGCCAGTTGGTGGTGCTGAGGGCATTAA
+TGAGGATAGGGGTGCCTGCTGGGGTTCCCTGGGCAGGAGAAGGGAAGCTTCTCCTCACAG
+ATTGTAGAACAGGAGGAGGCAGATGGGGAGTTAGACATATACTGGGAAGAGCTAGTCCTG
+GCTCATTGGCCCTTCAGTCAAATTCCCCTGAGCTGGGCTTCCAGCTCCACCTTCACTAGT
+TCAGGTTGAATTCCCTGGAAGAGCACAGGTGGCAGGCGACTCTTGTCCCTAACCCCTGCT
+TCCCCTGTAGTAGCTTCTTCAGCCTCCAGAACCAGAGCAGGTGGAGATCTCAAAGAAGGT
+GGCACCCCAAGGAAGTAGAACCCCAAAGACCCCAAACCCTGAGCACATGCCCTTCCTGCT
+GGGGTTGGGGAACTTGCTATGTATTACTCTGTTTTTCTTCTCCTGCCCACAAAATTCCTT
+TGCATAGATGGTAAAACTGAGGCAAAACGTGGGAGGACTTAGACTGCCCACCCACTTCTT
+TCTAGAGCCCAGCCTGGGACCAAGGGGCTTGGCATCCCGCAGCCCCTCCCGCTCCACTCC
+TACCAGTCCCCGTGGCTTTTTCCATTTGGAAAGTCCTTTCTGGCGAGTGGCAGGTTTGGA
+GGCCCTAGTCTAGTCTCCTCCCCTGTCTCCTGGGCAGTGCCCCCAGGGGAGCCCCTGTGG
+GCCAAGATCTCCTTAGTCTCAGTCCTGGGCCCCCCTCACCCTCTACCAGCAGTCCCCCAG
+CTCCCGCCCCTCGGCTGTGGGACAGGGAGAGGGCTGAGGGGCTGGCTCTGCATACTAATG
+TCCTGCCCATTGTCAAAGCCCCTTTGTGCCAGGGAGCTCCATTGAGGCGGCTCCGGGGCA
+GGCACAATGGGGGCTCTGTCCCCTAGTTCCCCCAAACAGTCACCTGCTTTCAGAGCCTTC
+CCCACCACCCCCCCTTCGCCCCAGTGTCCGGAGGGCCCAGAGAGCACGGGGGAGTGGGCC
+CTGGGCCAGGGGGGATGGGCAGGAGGTGACTCCCCCACTCTCTGGGCAGGCTGGGGAGAT
+GGGCATTTGACAGGGCAGGCTGCAGAGGGCACACAGAGGGGAGAGGCCCTAGGCTCACAG
+GGGCAGGGGCAGTCTGTGGAGGAGTTGGAATGCAGGTTCTCTGAGGCCTGAGAAGGGGAA
+GGGGCACTCAGAGCCTGCCCACTTCTCGGTTCCGTATGCCCAGCCTGGACCTGTGGCTGC
+CACCTCTCCTCCTCCCACTACAGGACTCCCCGGGGCTCATATATGCAGAGACACACAGGC
+ATACACTCATCTATGCGGCCCCACACACGGCTTCCAACACACACCCAAACGGATGTGCCC
+AGACACAACACACTGACCCCAGCACACAGATCGCACGTCCACAACACCACCTTCTGGCAT
+ATGCACCTGCACACCCACCCTCCCTCCACCCCACCCCCCTTTCCCTTTTGCACCCCCTCC
+ACGGCCGCAGGCATTTCAGCACCATGTCCCTCCCCCCCATGCCCTTGCTGCCCTCTGCTG
+CTCTGACACTGGTTATAACAAGTTTGCACTTGTTGTGACCTTAAGAATGTGGCCCTGGGG
+CTAAGAATGTCCCGCTGCCCCAGACTGCACCTGCTTATACGTTAACCATGGACACAGCCT
+CTTGCTGCACTCTAAGCAATGCTCCCATTCCTGCCTCAACCATGGAAGCAGTTAGGTGGG
+CCAGGCCCCCCACCCCAGAGCTGAGTCAAGTACCCTGGACTGAGCTCTATGTGAATAACT
+AAAGCCCCACCTGGCCTCGCTGGCACAGGTCCAGTGTGTCCGTCTTTGCATTTGCCCAGG
+GCACATAGATATTTCCTAGCCCTTCCTGGCCCTGGGAGGTAGATGGAACAGTTATCATTT
+TCCCAGCAAGGCTCTGAGAGGTAAGATGATGGTGTCCAGGCAGCTGGTGGCAAGTCTGGG
+ACTAGAATGGAGGTCTCTCACCGCTAGTTTTTTTGTGTTTTTCCCCCAACTATTTGTCTC
+TTGGTTTCACTCAGATGACCCCACCCCCAACCCTATCTACAATTCCAACAGAGCTGTCTT
+TTAGCCACCTTTTTTTGGGAAGGAGTGGGGGCATGGTGAGGGGAAGGGGTGGGGAGAAGA
+ACGGGTCTAGGAGCCTTAAGTGGGCTTCTGGGTCTAGAAAGAACCTCCGAAGGTCACTTT
+GACCCTTGCACTACCCCTAGGCAGAGTGAGCCTACCTCCTTTCACTTCCCACCCTCCTAC
+CGGCCCCACTCCTGCTACGAACTCTGGATCACTGGAAAGTAACCCCACAGCCTTCTTAGC
+TAGGGTTTGGAGTTGGGGATCTCTATCAATTATAAGTCCTTCATGCTGCCTAGTACTGGC
+CTTTCCAGTTATTATCCTACGTCTGCACGAGGATCTGTAGGCTGTATCTACGGCTCACCT
+GCTATTCTGGTCCAGAGCATCTTCTAGCAGTTGGGAGCAGATGCCCTAAAAAGGGAAGAG
+AGGAAGGCCCATTGGGATCCCAGAGACTTGAGGGCTGACTGCACAGTGGTGTGCTGGGGC
+CTGGGAGGGTGGATGGGGTCACAGTTGCAGAAACTCTGTTAGATAGAGAACTAAGCTGAT
+ATTGGTTTGCTCATCAGCAAATATTATTGAGTGCTAATCTCCATGATGTTTCCTCCCATC
+AAAATTGCATGGGAGAACCTCAGGGCCCTTCTACTCCTTCTCCTTTCCCCTCATCTAAAA
+TGCTTCCCCTCTGGCTACTGGATTCTCCTTAAGCCGCCTCATCCATCTGTCTTCCTTCAT
+GCAGGAACACACCTCCCCCAGCCCTTATACTCTGGTGTGATTCTTTTGCTCCAAATGTTC
+CCTACTGGAGAAAGAGAACTGCTCAATGTATGTAACTTCAGCTTTCCCTGCTGTGCTCGG
+GCAGTTAGTTACTATAGGAGACAGAGGACAGTGAGACACTCACCTCTGAATCTAGTTCTG
+TATCTAGTAAGTGTCCTCATGCCATCCCTACCCCATCCTCCTTCTTAAGGGAGGAATGTG
+GATAAGGAATAAGTAAAAAGTTGAGAGGGGGCAGAGAGATTGCCTCCCAGAGTCATGGTG
+AGCTCCCTAGACTCCTCTGAGTGAGGGTCAGAGGCTGCTCTTCATGCAGGGAACAGGAGC
+CCCAAGAAGGAAAGGGAGCTGAGAGGACACCTGTCAGGGAAACAGGCTCAATTTTCTGAA
+TATGAATGACAGGTACAGAGTACCTCTGTGTCTCCAGCTCCATTAAGGGTACAAATAGGA
+AGAAATTGCTCAGATAGCAACAGGTAAATTTTGACTTAACTGGAAGAATTCCCTGGGCAT
+AAGGGGAGGGACAGAAAGAGGTGGAGAACTCATTGCCTTGATGAGAAGGAAAAGTCCCCC
+TTTAGTCACCTACTGAACAAAGCTTGAGTGGCCTGGAAGATAGTTCCAATGACTCTCAGA
+GCCCCTGTGGGGTTTCCAGCGGCCTCAAAGGAATCCCTTGATCATTTCCACAGACCCCAG
+TAAGAGCATTGCTGAATGTCTCCTCCTCGTGGACCTCACTATTTGCTGCTTATTCTAACT
+CTTCTTGTTGGGCCAGCTTCTGCAGGGGTGAGGGAGACCCTGAGACCTACTAGGTGGGAG
+GCCCAGATTGAAGATAATGGGCTGTGGCTGTGGCTGTGGTTGGCTGGGGAAGAGTTTCTG
+TGGATAATGCTCCAAGGTTGGCTCCACAGAAACTGGCACTTTTGGACCCCAGGGTGAGGA
+AAAGGATCTTGTAGGAAGAGATAAGTTGAGATGAAGAAGAAGACCAGGGTTGTTGCTCTG
+ATTCTGATGTAAAATAACATGAGCTACCAGCTAGCAGTGTGATCTTGGGAAAGTTACCCC
+CCTCTTCTAGGCCTTGGCTTCCTCTTTAGTAAAATGTGGGTATTAAACGTGATGACCATA
+GAGGACTTCCCAGCATGAATACTCTGTGACACAGTCCAGTAGCAAGTTGAGAAGCCCAGA
+CTTTAAGCAGATTTAAGCAGGCCTGGGTTCAACTTGCAGTGCACCCCTTACGAGCAGTCC
+TCCTGGGCAAGTTAGTTAACCCATGTAAGTCTAGTTTCCACCCGTTCATCTGAGGGCAAT
+GCTATTGCCCCCCTAGGGAGGCTAGGATGAAGCACGGCATGGAGCGTCCAGCACAATGCC
+TGGCGCACCGTAAAGCCTGAGGTAGGAAGTTCATGGTACTGTGGGAACACAGGGGGATAG
+CAGCCCATGTTGTGGGTGGGGGCATCCAGAAAGGCTTTCCAGAGAAAGTGACATCTAAGC
+TGAAGCCTGAAGGAGGAGTAGAAATTAAGCAAAGTGGGAAAGGAAGAGTGTTCCATGGAA
+GAGACCCCATTATTTGCTAAGAAAAGCTTGGTCTGTTCAAGAACCGAGAGGTTTTGTGTG
+GCTGGGGACATGATGCATGCGTGAAGTGGGTGGGTACAGATAGAGAACACTGATGCTAGG
+GAGGTCAGCAGAGACCAGGTCATAAAGGACTCTGTTCCACCAGTAATTCTAGGGACTTCA
+GAGATAGGGTCCCTCCACTTCCTCTCCTCTTCTTCCTACTTTTCATTCATCCTTTCTCCC
+AAGACTAATTTCTCTTGTCCTTGGTCACACGAGCACAAGGAAGCATTGGGCTCCCCTAGG
+ACGACCTGGCCAGTGCAGGAAGAAGCTTTAAGTTCAGAGTTGTCTGCCTCTGGACAAGTT
+TTGGCTTAGGCCTGAAGTTTTCTGCCTCTGTCCTTTCTCACCACTAGAGAGAGATGTGCC
+TTCCACAGAAGACCCTGGATTTTATCACTGAGGCACTCCTACTAAGCACTCCCACAGTGG
+GCCTCAGTTTCCCCATCTGCCCCTCTCATTGTAGAGAGTAGGGAGAGTGTTGACAGGGTG
+GGGCTGAGCACAGCCTGCAGAGAAGAGCTAGGGAAACTGACTCAGTGCCCCCAAGTGTCC
+AAACGTTGAAACTTCAATGGGAAGCTCAGAGGAAGGACTTCCGAAGGATGAAGTGGGTGG
+CACATGGAAAAGGGGGCCTGAAGGAGGAAAAGTGATGTAGAACATCAATCCTACAGGCTC
+TTTAAAGTCAAGAAATGCCAGGCACTGAGAAGGACACAAAGATGAACAAAATACAGATTA
+AGAAGCAGTCCAGAAAAGGTTCTCTGTTTGGGAAGATGGGGATGGTCCTTCCTGGAAGGC
+ACAGAGCTGATGAAACAGCCTCTACAGGAGTTGGCCCTGGGGGTGCTCTCACAATGGGGA
+GTAGCTCTTCTGCCTCCCCTACCCGCCACTACCTCTTGGGCCCTCCACCCCCTAGGTCCA
+AGCCTCCCCTCATTTCCAGCCAACACCACTTCAAGTTCAAAGACAGGAAAGTGTACAGAG
+AAGGGCCCTTCAGCTGGGATCTGGTCAGCCTCCCCCGCCCTTCTCTCAGGTGGAGAACCC
+TGTCTTGGCCCTGGGACTCCAAGGAATAGGTCTCATCATACCCTCCCTAGTTTCTCAAAC
+TCTTCTGGTAGATCAGGTGACAACTCATTAGCCTTCAGCCCAAGAACATTATCTCCAGGG
+CCCTGGCCATCCTAGGCCTTGAGAAGGCTGCCAGCTGCCTGATACTCCTGAGGCTGGGGG
+CAGACAAGAGGGCGAGAGGCTGAGGGATAGTAATTAAGGAACTTGGCTCTACTCAGCAGT
+CTTCAGCACACACTTGCCCACCTCTTGGCAGGCTGAGAGGGTTTTGGAGGAATTATAGAG
+AGGTGGGATAAAGAATAGTGGGTTGGAAGTCCCCTTCAATTCTGGGGCTAGAGTAAATCT
+TACCCCACAACTCACCTGGAGAAAGGAGATCGAAGGGATGGTGTAACTTGAGCATCATCA
+AGGCCACTGGGGTGTGGAGAGGCAACGTGAGGGAAAAGCCCACAGGATTTGGAGACATGG
+GGTCTGGCTTTGCCATTAAATAAACGTCAGCAAGTGACTTTATGTCTCTGAACCTTAGTT
+TCTCTGATTCAGTAATATTTTTCATTCATCTGCAAAGTAGGATAATAACATCTTTATTTG
+CATAAGGGCCTTACAGATTTCATATACTTTAAATCTTGTAATCCTGACCACATAGTGGAA
+GGTTTTATTGCCTCCTTAGATGAGGAAGCTGAGCTCTAGAGAGATTAAGAGATTGACTGA
+AGTTCCCCTAGCTAGTAAGTGACACAGACAGGATGGCAAATAGCGGGAAAGGAGAATACA
+TAGGGACCTTTGTGACTCAGACATGCCAGGATAAGAAATAGGTTTCATTTGTTGGGCTAA
+CTCCATTTGATTGAAAATGACTCTCTGTATCACCATATTGAGGATGATTCTGAGGCTCAG
+TCAGATAAAAAGCACTGTGATGAATTAGTGTTATCTGCTAAGGGGTGGTGGGGTGATGAG
+TGATGGCTCACATGCTGTATAGTTGACATTCCTAGCCCAGAATCACCCAGAGATTCTAAT
+TCCTCCCTCTTTACTAACCAATTAGGTTAAATTTAAAGGTAACAGCTTACAAAGGTGACT
+AAAATACAGTCTGTGCCTTCAAAGAGCTCACAATCTCTAGAGGAGAACAAAGGATGATGT
+GGCAAATGTCCCGGGAACATAGAGGGAGAGTAACTATTTCTACCTGGGAGGTGGGGAAGG
+CTCCACAGTGGAGAGAGCACTGGGGAAGGGCCTTGAAGGAGGAAGGGGATTTGAAAGGGA
+AGACAGCAGTCTGGAGGGGAGTCTTGGAGCAGGCAGAGGATGTTCTAGGTAGAGCGAGCT
+GCCCGAGCAAAGACAGGGAGACAGACAAGTGGAGCACAGGCTATGTTTCACGTGGCTAGA
+ATATGGGCTGCACAGGGAAAATATTCCAGGGGAGGCTGTGAACAGTAAGGTGGAGTCAGA
+CATTGAAGGATTTTGCCAAGAAGTTTGAATTTTATCACGAAAGCTGTGGGAACTTTTTGA
+AGATTTTAAAGCCAAGTGATGGGGTCAGAATGTGTGCACTGAGGCAGCCGAGGACCGAGA
+TGAGCACAGTGAGCCTGGGTGCAAGAGAACCAGGAGACCATTGCAACACTTCAAGTGAAT
+GAACTAGGGCAGCCTCCATGGACATGAAGAGGAGTCATGAAGGCAGAAATGATGGACCTA
+GTTACCAATCAGATGAGGGGGAAGAGAGAAAGCAGGGGTAATTCTAAGGCTTGGGCACCT
+AGTGGAGGGTAATTCTGTGAACAAGAAGGAAAACACAAAAGAGACTCTTCTGAGGAGACA
+GGACGCTGAGTCTGAACATGTTAAAATTGGAATGCTCCAGGTAGCGGGAGCCTGTAAGTA
+CTTGGACACCCAGGGCAGAGGTGACGTGGAATCCACGGAACGGGGGAAACAGTTGAGGAA
+GATGGAGTACAGTGATCAAGGATGAGAGTCAAGAGTGGATGGATTCCTGAGAAACACCAA
+CAGTTAAGGTGCAGAGGAGAGATTACAGCAAGAGAGATGATCCAAAGTAAACACAGAGGT
+AGAGAAAGAACCCCGAGAACATGGTGTCCGAGAGTAACAGTGAGACTGTGCACACATGTA
+GTGTTCACAACCCATGCAGTGTTCTCACACATATGATCACATGGAACTCTCCCGTCACCT
+CTGTATTATTATGCCCCTTTTGCAGATAAGGGAAATAGAAACTCATGCATTTAAGTGATT
+TGCCCCAAATCACAAAACCTAGAAATGGCAGAGCTACTTAATGGCTAATCAAGGTAGATA
+AAATCAGTGGATTCCCACCACACCTCAATAAAAGCCAAAAGAGAGTGTTAAGAACAGACC
+GGTCAGTCATCAGTCTCAAGTGTAGCAGAGACTGCTGGGGTCAAGGGCTGAGAAGACACC
+CTTGGATTTAATGATGGAAGGTCAGCTGTGACACTGGAGAGAGCTTCCTGCTTCCTGTTG
+CTTTAACTGCCCCCTGCCCCCCCTAGACTGGCTGTGTGGGGCAAATGGAGTAATTCTGAA
+TTTATACAAGTGTTTTGTAGACTTCAAAACCCCATATTACAGAGGTGATTGTTACTGTTA
+TTTTTACTGTTATTAGGTATTGTTAGATTGAACCCTTTGATATTGCCAATAGGGAACCCA
+TGTATTAAATGGATGATTTCATAGAATTCAACCTAATAGTAAAAGGAGTGAGCTGGAGTC
+GGGGAAGGGGCTGTGAGGTTATGAGACAGGCAACTGCGCAGGCAAGGGTGGCCCCAGGGG
+ACTGGGGAGCCCCACCCCCTGATGGTTGGGGAGGTCAGTCTTCCGCATGGGGTGGAGAGT
+GGTAAATCGCTGGTGAAGCTTGAAATTTGCAGGAGTTCCCTTTCACCTTCACCACTTCCA
+CACTCCCCATCTGCAACCTGGTCAGCCCCCTCTGCAGAGACTAGGGGTGGGGGTACACTC
+TCGTGAGGGGCTCTGACTGAAAAAATACTTCTGGTACCTAAACTAGGTTGATCCCCTCAA
+CCAGCAGTCTTTGGAGATAAGTTGGGGATGGATTCTTCCTATGGGGAATCTCGGGGCCTA
+GGCCAGGTGATGACTGATTGGAGATCCAGGCAGAGCTGACACAGGAAGGAGGGTCCTGAA
+CAGACTTCTCGTTTATCACCTTGACTCTCCCTAACCTAGCTCGGATCCTCTTGAGATTTT
+CCCTAGTGTCCTTCGGAACCTTTTCAGCATCGCATGTCTTAGCGGTAACAACAGGAATAA
+CAATAGCAACAAGAGTTATTTCTATAGATGCCATTGCTTCCAGCTGACTCTTAGCACTGA
+TACTAATTCTTCTGGCTGTTCAGTTAAGATGAGGAGGGATGGGTTAACATCTCCCAGTAG
+ATGTTCTTCTGAGGTGAGGACCCAGATGGGGCCAGGCTGGGAGGCTACTGGGATTGCTGG
+AGCTGGGGCTGAGGAGACAGTGGTATAGCCTCTGTCCTCAGTTGAACCCTGCCTAGTGCC
+AAGCTCCTCACAAAGGACCAGCTCCTGTTGAGCCTTGCTGGCCCAGACAATGATTAGCAG
+CAGATACCGGGATGGGGTGATGACTCAGGAGGGCGTGCAATAGGGGTAGTGTGAAAGGTT
+GAGAGGGCAGGTGCATCAAAGAGGCAGCACCCACTTGTCACAGGAACCAGGAAGTCCTCC
+TCCCTCTCCAGCCGGCGAAACAGGGGGTCCCCAAGATGCCATCCTGTGGGCAGGTCTCTA
+CAGAACATTGTTTCTTTCTTGCAGGGGACAGAAGGCACCGCTTGGGGATTGAGGTGGAGG
+AAGGGGCAGTGGTCCTAGTCAGCTCACATTGACAGGCAGGGGTGGTGTCTGGCCAGAGAA
+TATCTGATGTGGCCCCATCTTTTGTCCCTCCCTCTCTTGACTGGAAAACACACAGGCAGG
+TCAGAAGGGCCATGCAGAGGTTACAGACTCCCAGGAAATGCTTTGAAGGCTTTTATCCTT
+TCTTTTGATCAGACTATGAGAACTTTCCAAACGTTGACACAAGAGAAGAAAAGCCTATTC
+TTCCATGGGCCAGGGCCTCGATGTCAGGCTGGGGCTAGAGGATGGGAGGGAAAGGGGGAA
+GGAGACCAGGGAGTGGGAGACAGAGGCAACAGTGATTCAGCTGGAGAGTCAGGCCCCAGG
+ACACACACACACACACACACACACACTGCCCTCTTTTTCCCCACCTCAGGATTTGTGTGC
+ATCTGGCTCCATTTCTATTTGTTTATTTTCCTTGGGGCAAGACAGGAGGCGGATTAATGA
+ATAAAGACATCAAGTTTGAAAAGAACCCCAAATCCAGCTCCATTCTTCTTCCAACCTTGA
+GGGCTTACATAACCATGCAGTGCTTTTCTTCCTGCCCTTCCTCTCTTACCCCTCTCCTTC
+ATTTTCTCTTCCCCAACCCTCATTAAGAACACTTGGAAGCCCAGAAGTGCTTCTTTTTCA
+TCCCAATGCAACTGATCTCCTTCTGGAGGACTCTAAGGTCGCTATTCAAGTTGGAACTGG
+TATCCAAAAACAGATAATATGGTTGGTGAGATGAGGCTGAGTAGCTGTTTGCTCTCTCCA
+CCGAGGCCGGGGGGAGGAGTGGGACCAGACTGCTGTGGAGGCAACTTGGGCTAGACATAG
+AGATTTACAGCAGGAAGTGACCCTAATCATCCTCTAACGTAGCACCTCCCAGACTGACCT
+CGCAATGTGTCCTGAAAGGCAGAGGAGAGGGAATTTGTGCCCTAGAATCTACAGGGATGA
+GCTGTAAGCGGCCACACACTGGACATCCTTGAAATCTCTTTTTGTCTCAAGCCATTCAAA
+ATGTTTATTCTACTTCATTGTACATTGTTGGCATAATATTAAAACAACATTTTCCCTCAA
+AATCTGAGCAAAATTGATAACTTCAGGAAATATACTAAATTTAGTCCTGTGGCTTTGAGT
+GCCTCCTGACATACTAGTGTATGCCCTAGTTTGAGAAGTGTGAAACTAATACCAACCCCT
+ATTCCACAATGAAGGAAATACAGATCCGAGAGGCTGAGAGGCTTGACTGAGGTCACAGCT
+AGTCAGTGGCAGAGTTGAGATCGGCACCCAAGTCTCTTACTTCCCAGTCCAGTGCATGTT
+CCACCAAGCAGTGCTGTCTCTTAGTTCTGAGTGGCTTTTACACATCCTTGAGCCTAATTC
+TCTCACTCTATCTTCAGAATACTGGGCATGGACAGATACCTAGCTCAACTGTCTAAGATA
+AGTGTGGGTGTGGAGACCAGCCTAATCTCAGGTGCTGAGACATAGTGCTCAAAACCCCTC
+TACTGCATACAGGGCTCTCCCAACATGGAAGAGGAAGGAGAAGGATAGGACAGGAAGGGA
+AGGAAGAAAAGTACTGCAAGGATTATGTAAGCCCTCAAGGTTGGAAGAAGAACAGAGCTG
+GATCTGGGGTTCTTGAGCATGATGGACAGATAGTAGGACAGAGAGCAAGCTGAGTCCCTA
+TTTCTATCCACCTATTCATTTGTCTTTACCAAACAGTGATTGGTTCAGAAAGACCTTCAA
+ACCTAAGATAATTGGATTAGTTAATAGCTATAGTAGGCACTGTTCTTATTTTCCAGGAAT
+CTGCCTCCCTTTCTGATCTCATTTCCAGATCCCTCAATCAAAAGGCTCTTTTCTACAGGG
+AAGAAGTAGGGAGAAGAATAGGAGGCTTTTGGGCTTAGAATATCTCTATTAATATGGACA
+CATAGGCTGTGTGATACAGAAGAAGAATAGAAGGATAAAGTGTGGTAAAGAATTTGAAGG
+CGAGAGTTATGATTAGATGGATAGCATACTATTACCAGGATGAGATACAGGAAGCTGGAG
+AAACCATAAGGAAAAGAAGGGACAGCCAGAGAGATGTGTAGGCAATACAAAGGCAAAGAG
+GGAGGTGAGAAGGAGGAACGGGTCGGGGAGACATTTTAGTGACAGCCAGGATGAAAAGAG
+AGGACAAGCAGAGAAGCACCATTGATAGAGATGAGAAGACCGTTGGGAGGAGAGTCCACA
+TGAAGGAAGTGGGGAGAATAAAGACAAATCATGCAAAACTAGGAGAAACAATGGGATCAA
+GCAGCCATGAAGGAAGGGCGTCGCCTCCAACCCTTGGATACATCCTGGGCATCACTGCTG
+GTGAATTTTGCATACCTGTCTGGCTCCTGCTAACCTCCACTATGCTTAAGGTGTCATCTC
+TGCTTGTTGAGTGTTGAGTTTGCTCAGTGAAGACAAATAAACTATAGTATTAATTTGAAG
+TAATGGTGGAAAGGACTTTGGAATCCAGCCAGGAAGAGGCATAAATTTCAGCTCTGCTAC
+CAATAAATTGTGTCACCTGGGCTCTCTGAACCTCAATTTCCTCAGCAGTAATTATGAAGA
+TAATAAGATTTGTTTTACAGAGTTAAAAAGATGAAATATGAAGTGCTGGTCTACAGTAGG
+CATTCAATACATGATAGTTCCCCTCCATCTCTCCTTCCATGTCCCTGAACCAAACATTTG
+CAAGAGGAAATACTCCACAGTCAAAAAACAGAAATCATCTAGTGGCTCTAAGAAAGAATG
+ACCAGCAGGCTAGGCCAGACACCGTAGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCT
+GAGGCAGGTGGATCATGATGTAAAGAAATCGAGACCATCCTGGCCAACATGGTGGCCAGC
+TGGCCAGCTGGTTATGGTGGCTCATGCCTGTAATTCCAGCTACTCGGGAGGCTGAGGCAG
+AAGAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATTGCATCACTGCACT
+CCAGCCTGGAGACAGAGTGAGACTCCACAAAAAAAGAAAAGAAGAGAAAAGAAAAGACCA
+GACCAGCAGGCTGGAGCACCATGGAGGAAGCCAAGGCTCTGTACCTGACCCTTGCAGGAG
+CTGCAGGGCCTGACCCTGGCCCTGCAGCTGGCCATGAAGGAATCCACAGGGGCTGGCCAC
+AAAGCAGGAGGGTGTTGAGGGGTCACAGCACATCACTGTTTTCCCAGTGTGTCCTATTAA
+GATCTTTCATGTGCTCCCAACGCTTCAACCAAAATGAATGGATCCTTATTTCCTTATTTC
+CTGGGTGGCCCTGGATTTCCTCATCTTTCCCAACCCTGTTCCCTTGCACTAGATGTCAGT
+TTTCCCTTCACAAGCTCTTCTTGTACTTCACTGATGCCTTTTCCATGGCACTTATCACTT
+CCTGCCTGTTTTACAGTTTTCTCCTTTATTAGATTGCAAGGTCTTCACGGAAAGAATCTA
+CATCTGATCCATCTCCATAATTCCATTGTCTTGTTTAGAATTTGGCACCCAGTAGTTGCT
+GGAAAAAAAAATGTTTACTGAGGCTGGGCATGATGACTCATGCTTGTAATCTCTGCATTT
+AGGGAGGCTGAGGTGGGAGGATGGCTTGAGCCCAGGAGTTTGAGACAAGCCTGGGCAACA
+TAGTGGGATCCCATCTCTACAAAAAAAAAAAAATTAGCTGGGTGTGGTGGTGTGCACCTG
+TAGTCCCAGGTACTCAGGAGGATGAGGTAGTAGAATCACTTGAGCCCAGGAGGTCGAGGC
+TGCAGTGAGCCATGATCACACCACTGCACTCCAGCCTGGGTGACAGAGTGAGACCCAGTT
+TCAATAACAATAACAAAGTGTTTATTGTATAAATGAACAAAAACACAGCTCATACTGAAT
+GTGGTGGGCAACATTAATACAGATGAAAACATTTGGTTTGTTTGCATACCTATTAGTGTT
+GATAATTAGTTATCTCGGGTTTCCATGTGTACAAGGAAAAAAGAAAACATCATTAAAAAA
+TCAAGAGAAGATACAGACAAGAAGGGGGAATGAGAAGGCCCCAGAGTGAGAAGTGCTGGA
+CCAGAGTGAAGTTCCATCCATTCTACCTAGGACAAGAACTTCCACATCTCCCTGGCCCTA
+TGATGATGGAGTCAGTTAGATTGGCCCCTATGGCTGGAGTGGGTTAGGAGTCCCCTCCTA
+CTTTTAATTCAATCATAAAATACTTCTTGAGCACTTACTATGTGTCATACATTGTGCTAA
+TCTTTAGGATAACATATATATATATATATATATATATATATATATATATATATATATAAA
+TAAATAAAATGCTGGATCCTTCTTTCAATGACCTTATTTGTAGTAGAATACAAATGATAC
+ACCATGTACTAAGAGCGGTGCCATTACGGGTATAGTTGGATATAGATATACATATATGGA
+AACTCAGAAAAGAGAGGCATTACTTAAGGTTGTAGGAATCAGAGAAGGCTTCATGGAAGA
+GGGGTTATTTGAACTGGACTCTGAAAGATAAGACTTGGATAGGTAAAGACAAGGAGGAAG
+GGGAGCCCAGGAAGAGAGAAGTGCAGGATCATGGAAGGCAGAAAAGCATGGGAGTGCACA
+GCAGGGAGCAGGGAGTGGTTGGTTTGCTTGCAATATGCAGTGTGCCATAGCAGAAAAGTG
+GTCGGAGATAAGGCTAGAAAGCAGGTTCAGAAGCAGACTGCGGAAGGCCTTATATGCCAG
+CCTGGGTTTGGCCTGTCTTTGATAGATAATGGGGAGACATTAAAATGTTTTGAGCTAGAG
+AAAGAACTAAGACAATTATAGTAAAATGAAAAGTGGATGGATTTGGGAGACGGGACCTGG
+GCATGAATCTAATTCCACAGTTTACTGAGTCACACCATCTCTCTGAGCCTCAGTTTCCTC
+ATTCGTAAAACAAGGATTATAATCTTAATATTTCATAGGAATGCCATGAGAATAAAATGT
+GAAAGTGCTTTCTAAAGTACTGGATAAATGTTCATTGATTCCTGATCAGAGGTGTATTTG
+AGGAAGAGAAACCCAGGAGCAATGTAGGATGGATTTGAGGAGAGAAAGAACAGAGACATG
+AGATGAGTAGGAGGCTGATGCAGTGATTCAGCCACAAGGCATTAGGACCTGAAAAAAGAC
+AATGGAATGAAGGAGAGGAGGCACAAATAATAAATCCACAGCACATGGCTTTAGATAAAT
+GTTGAGTTGAGGCAAGAAGAGAGTTTGAGATGATCCTGGCTTTGAGTGTGGGTAATGGGT
+AGGATGATGGTCCCTTTAACTATTTTAGGAACCAAAATAGAGCAGCAAGTAGGAGAAGAA
+AGGATTTTACAATTGGAAGTGAATGCATTTAGTATTCATATCCCAAAGCTCCTTTCAGTC
+AACTTGCTCTTGCAACAGAGAAAACAGACAAAGCACTTTCCCTGTGGGTGCAGCTTTTCC
+CCTCCCCCTGCACCTGCTGCCCCTGGGAGGGGGTTGAAGCATCTTCTCTAGTCACCTAAT
+TTAGCATAATCAACTCCAATGCTCTGTAGCAATTTACAAAGGTCATTCTCTCCCCAGCCC
+CCTGCAAATGGACAGTTGGCCCAGAAGAGAGCAGGGACTGATATCTTGGGCTTTTATTTT
+CTTACCTTCTGAAAACAGGGTGTAGGCATCTGCAGCACTAGCAAAGCCTGTGTAAACCTG
+GGTCCCCTCGCACTCTGAAGTGGCTCCTCCTGAAGGTCTGTGTGAACTAGGTGTCTCCTG
+TGAGGCAGATAAAAACTGCAGCCCTCAGGGGGGCCTTGGCCCGTCCAGATCAGGCTTTCA
+GTCATTCCCTTACTCTTTCTTAACAAACTATACTAGGCACTGGGGAAACAGATGAAAGGC
+AGAGTCAGTGGTCTTAAATTAGGAGCACAAAGATGGTAAACAATGATAAACAAACCATCT
+CAAAGCACTATAATGCTTCAAAATAGGAATATGTACAAGAGGGCCAGGTGCAGTGGCTCA
+CACCTGTAATCCCAGCACTTTGGGAGGCCCAGGTGGGCAGATCACTTAAGTCCGGGAGTT
+TGAGACCAGCCTGGGCAACAGGGTGAAACCCCGTCTCTACTAAAAGTACAAAAATTAGCT
+GGGCATGGTGGCACATGCCTATAGTGCCAGCTACTTAGGAGGATGAGGTGGGAGGACTGC
+TTGAGTCCAGGAGGCAGAGGTTGTAGCGAGCTGAGATTGCACCACTGCACTCCTGCACTT
+CAGCCTGGGTAACAGAGTGAGATCCCATCTGGCGGGGGGTGAGGTGGGGGAAAGAGAATA
+TGTACAAGAGCAAGCATAGTAAAGTAAACCCATTCTGGGCAGGGATGGGAGGGACAGAGA
+AGATGAATTGTTTTTTTGAGAGTCAGGGAAGGCGTCAAAGGGAGTAACTAACAGTTGAGT
+CGAGTTCTGAAAGATAAGGAGATCATCAGGTAGGCACGGAGCAGGGCAGTGCAGAAGCAG
+GTCCAAAGGTAGAAAGGATTGGGTAGTGAGGCTCTGCGTTGGAGTTCAGGGGGCCTGTGA
+AAGATTGTCAAGGGTAAAGTCAGAAGTAGGCAGAGACCAAATCATGAATGGTGTGCTGAG
+AGGCTACAGAGAAAACAGATAAAGCACTTTCCCTGCTGGTGCAGCTTTTCTCTTTCCCCT
+GTACCTGCTGTCCCTGGGAGGAGGTTGAAGCACTCTTTCTAGTCAGAAGTGTATTTGAGG
+AAGAGAAACCCAGGAGCGAAGTAGCAAAAAGCAAAGGGGAGCCACAGATCAAGTTATTGA
+GAGTCACAAGAACAACAGTGTCCTTACTCAAGAAAGGCCCTGATAAATCCATGTATTATC
+CTGGAGAATAAATGACTCTCAAGTAGAACAAATGACTCTCAAGTAAAATAAATGAGTGCA
+CAGGCAAAAAAAGACAGCTTCAGGAAGAACTTCCGGGGGTAGCAGTAAGTGGGAGTGGTA
+CTTGCTGAGGGAACGTGCTGATCATCACTGTAGCTATGACTGGCTTTGTTTTAGGGAAAG
+GATCACTAGTTCAGTCTTAGAGGAGAAAGAAGGTGAATATCTCTGTTTCTGTCTCTTTCG
+CCACTCAGGCTGGTCTTCAGAATTCTGTCTCTGGTATTTGGCCGGCTGGGACTGTTTTGG
+GGAAATAGCACCCTCTAGAGGAAAGAGTATGTTTTATTTTGCGGGGGTGCCCCGGGGTGG
+TGGGGTGCGCAACAGATGATTTCCAGAGCTTTGGCTCGATTCCTTCCCCAGGGGCCTGGC
+CAGCTTTGGGGTGGAGTGTAGCTGATTGTTTATCCGCTTCTGACTCTGGCCACCGCAGAG
+AACAAAGCTGCCTGGCTGTGGGAGGTGGCACAACAGCTGGGAGCAGCAGATAACAGCTGG
+AGGTGGCAGATACAAAGGCTGGCAACCACAGGCACTACAGCTGGAGGTGGCAATCACAAC
+AGCCAGAGGCAACAAAGAGGCTAGCAGTGGCCGAAATCTCCAGTACCCTTGAGTCTAGAG
+CCTTAACCCTGGGATGTGTGGTCTTACATACAGCGTACCATAGTGGCACCTGCTGAGAGC
+ATGAGGTCCAAGAGAATGAACCTGGGGCACACAAAAGCAGTTGCTACAATTAAAAAGTTC
+TGGACCAGGAGCTGGGAGACCTGGATTCTGGTTCCAACTTTACCACTAATCTACTTGGGG
+CAAAGCTCTCTGAGCCTTAGCTTCTATGGTGTCTTTCTTACCTACCTCATGGTGTGAGGA
+TCAAACTATATATTTTATAAATTTTAAAGTGAGAAGGAATTATAGACTAATATTTTGGGA
+ATGATAAGCAAGGCTTTTCTTGGGCCCTAAGAGCTTCCATTCTCCAAGATATGTAGAATC
+TCCCCTCAACATAAACAAAGCTAGGACTTCAGTAAAGGATCCCTGAATGTCACCATAGAA
+AGTGCAGGGGTAGAAGTGGGGTGGAGAAGGCAGACATCCTCACCCACCATGGGTATAGAC
+AGAATCTTCAAGACAGTCTTTCAATCTCCCATCCAAAGCTTTTATCTCCGCCATAGGATC
+TCTGCAAAGTGCTTGTCCAAAGTGCCTTCCTGATTGTTACAATGCTCTTTATAATATCAA
+CTTGCAATCTTTATCCCTGCAGTATGAACTCCCTGATTGTTCCTCTTGGAGCTCAAAGGA
+CAAGTCCAAGCCTTTGATCACGTGATAGTCCTTCAAACCAAATATTTGCACACATTCATT
+ATATCTTTCTGATTGTCTCTTCTCCTGAATCAACTTCCCTATTTCCTTCAATCATTTCTC
+ATGTGGTCTGGTTCTGAATCCTCTCATATCCTGAGTGTTCCTTGATGCTGGATACTGCAG
+GTGTGGGATGAACAGAGCAGAGTAGAATCAACTACTTTTTTGTTCTAGGGACTTCAGTTC
+AACTGAAGGAGCTTAAGATTAGTTTGCTTTTTTGGCAATCAAAACACATGAATAACTCAT
+ATTGGATTATTTTCTGCTAATATGTTTAATTTTTTTATATTGTACTGCTTTGCTAAATCA
+CATCTTCTCAATCCAGAACTTGCTCAGGTAATTTTTGGACCCAAGTACATAATCCAGCAC
+TTAACATTTTTGTTAGACTTGGCCTACTTATTTGACTCCTTTTGGATCCTAATTCTGCCC
+CAGCTCTTGCATTTGCTATCTGTCCCAGCTTTCTGTCAGGCACACAATTGATGGCCATAG
+CATTCTATAGTTTTGAATGAAGTTCATGTTAAGAAAATTGAATAGGGCAAGGCCAAAGAT
+GAAGCTCTCTGGTGTATCAATAGAAAATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNGGTTTTTATTTATTTTATTTTATTATTATTTTATAAATAACACACCCCCCCG
+CCCCCCCATGACCACCTCCTGCATGCACGTCGACTCTAGAGGATCCCCCACCAGGACTAC
+AGTCGGACTCCAATCCTGGCTCCTCCCCGGGCCCCGGCCCCGCCCCAGTCCCAAGCCGCA
+CCCCTTCCCNGTCCCCGCAGGGCTAACGTCAGCCTCCAATCCTGGCTCCGCCCTGGACCC
+CGGCCTCGCCCCGCCCCTGGCCCTGGCTCCGCCCGAGGCCCCCGCAGGAGTGAGCTAACT
+GCACCTCTGCGCATCGAAATTCCCACCCACCCTCGCACAGAGCGCATTCCACCCCGCACC
+TGCCAGCCTTTCCTGGAGAGTTGGGTGCAGGGTCCCTGGGATTGGCGAGGTGACTGTGAC
+CACGCATTTAGAATTCAGTTATTTGCTCTGAGCCATAGTCCTCGCTGCAAACCCTGCTGA
+AGTAGGGGTTGGCGGAAGCCAGGAGTTCCTGAATGCGAAGGGTTTGAGCTGAAGGGCGCT
+TCCAGGATCCAGAAGGTCACTGGAGACCTGTTTTTCACCCCCTCAGAGGGCAAAACCAAA
+AGAAAAATGGATTAGGAGAGGGGGCCGTCCATGTGACGTCATTTCTGGGTTGGGTGATCC
+TAGCCCAGTTGTTTAACCTCCTTCAACTTCAGTTTTCTCCTCTGCAAATTGAATTGAGGT
+TGATCACATCACCCAAAGGGTTATTGTGATGATAGGTAATGTACATAAAGTGTTTGGCAC
+AGGAAACTAACACAAAATGGGAGATAAGGATTATTTTGTTTTAGGTTTTGAATTATTATT
+TGAACTGCTTCCTAAGAGGCTTAGTTTAGACCTGGCATTTAGGTGAAGGAGTCTCTATGT
+TCAGTGACAGTGGGAAGAGATGAACATTGCTCTGTTCTAGGAAGAGTGGGGCTTGCCCTG
+GAGGTGGGAGAATGGAGGAGGCACCCTTCCCAGGTGCTCTGTCTCTTCCTGAACTGTAAA
+GATGGGAAGTGGGCAGGGGCAGGAAGGATGAGACCAGAATGGGGAGAAGAGATGGCCAGG
+TTTGCAAAGCCAAGGCTGTCAGAGTGGCTGTCAGTGGGAGGTGACCCCAGAGCCCATTTA
+CCACCCTCTACCCTCCAACATCCTCCCTCTCCCCTCTCATCCCCCTTCCCTCCCACTCAC
+TCACCCCCCTTTACAGAAGAGGAAACTGAGGCCCAGAGAGGGTAAGTGGCACAGGCCAGA
+CCAGACCTTCCTTGACATGGTATTCCTCAAGCGTGGCACAGTGCCTGACTCTGAGAAGAT
+GCTCACGGGGTTGCCAGTAGAAGCCACATTGAGGAACCCTGGAGAAACCAGAGAAACTGA
+GGCCTTGAAGGGAGATCAGAGGTCCAGCCCTCACTTCCCAGGACTGCCAGCACTGGCCCA
+TGAACAGGGCAATAAGGTGCCTGTTTATTCCCACTTTGAAATATCCAGAAGGTTGCTCCA
+GAACCTCATCATCTAAATCCATCCACAGGCATATCGAGCTATGTGAAAACTGAAACTCAT
+AGGGGAAGCTTTTGGAAGAATACGTCTCTCCCTTTATAATGGATTTAATTATCGAGTGGA
+TTTAATGCCTTCATGATGCTTGAGGGATGAAAGATGTTACCAATTGTAATAATGATAGAT
+GTCATTTATGAAATGCTTACACAAGAAGGCCCTTTTGGATATTAGTCCTAGTCCTTATAG
+CATATTTTGCAAGGTGGTGTCATTTAACAGAAGAGAAACTTGAGGTTAGGCAGGTTAAGT
+ACCTTGCCCAAGGCCACACAGTTTTTAAGTGGCTTAGGGGTGACTTCAGCCCAGGCTTGT
+CTGTCTGTCCCCAAAGCCCCAAATCTTCCTACACCTCACTGTCCCTCCTCCCAAGCCTCT
+CCACCCTGTAAACTCAGTCATTCAACAAATATTGATTGGGCCAGGCCTTGTACTGGGCAC
+TGGAGATGCCATGGTAAACAAGGTGGGCCTGCTCCCCTGCCCTCAGGGAACATAGTTCTA
+TATGTTAGGGATACATTTCATAAACAAGTCATAACCAACCCCCAGTATGAGTATGATGAG
+AGGGGTCTTCAGGGAGGTGAGGTATCCTCTCTTACCCTGTGGCCTGGGGCTGGGAAGGAG
+CTGGGGGCAGGTTGGCGTTGAGAACTGGGTTGGTGTCTCCCTCCAGAGGGGATAAGGCCC
+AAGGAGAGAACAGTGTCTTCAGCAGGCCCCTCTGGAACCCAGGCCCCTCTGGAACCCAGA
+CTGCGGGTTCCAGAGGCGGCCAGCAGAGGGAGCCCACAGCCCAGGCCTGACGGACCCCTC
+AGCTGCTTATGTAACTGGGAAAGCTGGGGGAGGAGAACAAACCCCAGGCACGAGGCACGT
+CACACACACAGACACATGTCCCCACTGTAATGGAGCGGGGATCAGACAGAAAGACGGCTC
+CCAGAAACACACCCCCACAGAGCCATGGGCAAACACACACAAGCTGCAAAACAGCGATGC
+ATGGTGTAGATTGGAGGGCACTCATCTCATCCATTAAAAACACACACCTAGGCTGGTGCC
+CGAGGGTGGGGTGAGCAGAGAAAGGAGGAAGGAAGCCCTTCAGTGAGTTTCCAGGCCAGA
+CTGAGACACGAAGGAGGAGGGAGCCTGGGTTTGCAGAGAGACCTGGCAGGTATGCAGTAG
+TGACCATGAGGGACCCTGATGCCCGGCTGGGCTGTGTTGTCACTGCCATCACCAGCTTCC
+CACCTGCCCTCCATGTACTACCCCAGGCATGGGCCTGCATCCGACACCCGTCCTGTTTCA
+CATGAAACAGGTCTCATTTCACCTCAAGATGACATATACACAAGACACCAAGGTGACACT
+GAGGTGTCATGAGTATTGTTAAAGATAATTGAGACCCACGCTCTGCCCAAAAGCAGTTTA
+TAATCCAGCTGAGATGTGACACACCACAGTCACAACCAAGCAGACAGCTCTCAGAGAGGA
+GCACGTTCAAAGACAGGACCATTCACATATGCTGACCTGTGTCAGGCACTAAATGTGATC
+TCATTTATCCTCATGTGCTATTATTATCCCTGTGAGAAAACTGAGGCATAGAAAAGTTAA
+GAAACTTGTCCAAGGTTAATGCATTAGTTACTGTCCACACAGGGATTCGAAATGGATCGC
+CTCACTGTGGAGCCTGTGCTTCTAACTCCTGTTATGTTGCTGCTTAAAGACAAACGTCAC
+TAGGGACACAGTTAAATATGGGCTTAGCACACATGAGTATACACCAAGCTGTAGACACAG
+ATACTGCCGTGGGGACAAAGTGACAAATACATCAAATACATAAGCACACATGCTCGTCAG
+ACTGTCAGACATATAGAAATGGCACAAGCATACACATAGGACTCAGCTCTCTCTCTTTTA
+AATGTGTATCAACCGTAGCTTTGTGGGGAGCTAAGGATCACACACAAGAATGTCCTGTGA
+TTTGTGTGTGTTTATAAAGCATTGGCCCATACTGCCCTGATGAGCTATATTTTCCCACCC
+TCCCAGAAGGGAAGATTCTCTCTGCTGAGTTGCTGAGAGGCTCAAGCAGAGTCAGAGACA
+GAAGTAGTGCAGAAACCTACCCTCGCCCTGATAACAAACTCATTCAGAGACACACTGAAA
+CAGAAGTGCACACACAAAGATAAACACAGAGACATTATGCACACCCAAACACCAAACACA
+CTCACAGAACCACAAAGCCATCTCCCCCCCGACACACACACACACATTCGCTTTCCACCC
+TCTCCCTCCCCTCTTTTCTCCCTGAGCCAGGCATCTGCACCTTCATGGTGAACTGTGTGT
+GTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTCTACTCTCTGAACCCTCCTCTT
+TCCCTGGCTAGGCCCACACTCCTGCCCCCATTCTTGAACCCTATGCCAAGGGGAGCAGCA
+CAGCTGTTCAGGAATAGCAGAGCCGCGATGCCACTGGGGCTTTGGCACGACCTGGCCCTG
+ACTCCTGAGGGGGCCCTGCCAGTTCTGAACCAAGGATTCGTGCCAGCTGCCACCCACGCT
+CAAATGCCACACATGGATTCCTCAGAGCTGGAATCCCCTCGGTCTCCCTTCTGACCTCCT
+GACAAGTTCTTTGTTCCCCAGTCCCATCGGGTGGGTGCTGGGAGCCTGGGCTCCGCCCCA
+GATGCTCACCCATCCCACTTCTCCATTCCCAGTCTGCTAAGAGGGAACTGACAGCTCAGA
+TATATGTCTGGAGGGTAGCGTGGGACTGAAGCAACCCCCACTGTGTGATATTACTTCTTC
+TCTACCCTGAGCAGCCTAGGGTGGGGTGGGGGTCAGCCAAGGGCAGGAGATGGCAGTGGT
+TTGGTTAGACAGAGGTTTGTGGGCTGAGTGTTGTAGATGAGCCTCAGGGGGCAGGAGGTG
+GGTAGCACCAGGTGACTGCCAGTCATTTACGAGTTTGTCTCCACCCAGCTCTGAGTGGGG
+TGAGAAGGGAACAATTACTAATACCTACAGAGCACCTACTTCATGCTGGGCACCGTTGCA
+CACACTCTGTGCACATTCATTCATTCAGCTTTCACAACAACCCCATGAGGTGAATACTAC
+GAGTATCTCTATTTTACAGCTGGGAGACAGAAGCACAGAAAGCTCATATAATTTACCCGA
+GGTCACACAGCTAGCAAATGGTAGCACCCAGATGCAAATGCAAGCTACCTGATGCCCAAA
+CTTCAGTTTCCCAAACATTCTTCTTCCCTTCTTCCTCCTGTTTCCCTAGCCTGCCCACAT
+TGATTCATACATACATGTGCATATATTCAGGCACACACATGCATATCTGGTGCCTATGCA
+ACCTTCTGAAATAACTTTTTCCACTACACAGCTCCAGCTTCCAGCACATAGTGAGTGTTC
+AATAAATATGTGTTGAATGGATGAATGCAGAATTGAAGGCCCAGGGAACAATGAGCGTTG
+TGTGTGTGGTGGTATGGGGAAAGAGGGAGGGGAAAGGAGGGATTTCACACACACACACAC
+ACACACACACAGACACACACATACACAGCCCTCTGCTGTGAATAATGTCAAAGACTCACT
+TCTAACTCTGGGGTCCCAGCCCTTCACAGCTACCTGGGTGGGAAAAGAGCAATGTAACCA
+GGCTTTACAGGATTCTCTGAAGTGGAAGAAGTCTCAACTCTCTCTTCCAATCAGAACAGA
+GGACAGGAGATGCAGAGAGAGCTCTGGAAGAGGACTCGAGGCACACAGCCACCTGGGCCT
+TCCAGCCAGGCACAGCTCATCCTGTGTTTTCACCTCTCCCTGCCCCATGCCTGCCGCGCT
+GCAGGCTCTCCCCTGGCTCCCTCGGGACAGTCCCTGCTTCCTCTGCTTGTCCACCTGGAC
+TCTCCAGGAACCCGGCACCCCTTGGTTATTTTGGCTGAAGAGTAGACAATTCTCCTTTTC
+CCTGTTCCAACTGAAACAAGGGTGGAATTTTTCCAAAGGCTCTCTATTAGGAAAGTGAAG
+GGAAAACAAAGCCAGAAATAAAGACTCTGAAAAAACAAAGAAACAGATGGAACAGTGGGG
+GTGACCGAGCTGGCCATGGGAGAGAGCGGGGGCCCAGAGTTTTGATGCTAACGCGACCCA
+CCCCCTTGCTCCCACTGCACCCTGTTCTCAGCTCCTTGGTAGAGTTTCCCACTCTGCATT
+GTAATTACAGTGCTTGTTTATATCTGCCTCTCCATCCTGGCTGTGAGCTTCTCAAAGCCA
+CTGTTAACTCTTTTTTGCCTTTCCCCAAAGCCTACAGAGTTTCCTGGCAAGGAGTAGAAG
+CTCAACAGACATTTGCTGTGTGACTGAGGAAGTGAGGACATGCCGAGTGACAAGAGAAGG
+AATCTGGGCCCTTCCTCTCTTGTGGTCATGTCACCTCCACAGTGGACACTGTTAGTTCCT
+CTGGAGTCCATCATACACTAGTCACTTCCATCTACAGCTGATAGAAAGTTCCTCCTCTTA
+TTTCAGAACCTGAACCTTCCACCCCTCAGGAAACAACCAGGCTGGCTTCCTAAGCAGCCT
+AGGAAGGGCTGAGTTCTGTCCTGTAGGGCGGGAAGGACGCTGGAGATCAGATCAAGCTGA
+AAAAGAAATCACAGTCTGAAGGCCAGGACATTGGAAATCAAGGGCATAAAATAATTGCTA
+ACCTTCACTGGGCACTGTAATTCTCAAAATGACTCTATAAGTTAGAGCTGTTAAAAAAAA
+TTGAAGTTCAGAAAAGCTAAAAAATTTGCCCAAGCCAGGCACAGTGGCTCACACCTGTAA
+TCCCAGCACTTTGGGAGGCCAAGGCCGGAGGATTGCTTGAGCCTAGGTGTTCCAGACCAG
+CCTGGGCAACATAGCAAGACCCTGTCACTACTAAAAATTTTTCAAAAAAACAAACAACGA
+GCCAGACATGGTGGGCACCAACTATAGTCCCAGCTACTCAAGAGGCTGAGGCAGGAGGTT
+TGCTTGAGCCTAGGAAGGTGAGGCTGCAGTAAGCCACAGTTGCACCACTACACTCCAGCC
+TGGACAACACAGCAAGACCCTGTCTCAAAAAAAAATGTGTCCAATTAGTGGTAGACCTAG
+GATTTCTTCTCCGGGCATTCTGACTCCACAGCACAAGCCTTCAGCCATGTTGCGGGCCGC
+ATCTATACCATACAAATTATTGTCCTCAGAAGGAGCTGGGAGACAACAAAGATGTAGAGG
+TCCAACCAGCCAGCAGGGCAGGGTGACAGGGAATCCAGCTGGTACTGAGGCCCTGAGCAC
+TGGCATGAAGGATCCCAGCTCCTGAACTGGATAGCCCTGGGGAAAGGGGACTGGCCAGAA
+CGAACCTGGAAATCAGGGGATGAGCCAGGTTACCAATCAGGAGCTCAGGCGTAAGAAATA
+AAGCTTCTGTGTTTTCAATTAAGTAAAAAGCTTGGGGGCTGGGATTTCAGAAGCAGACAG
+CACAGACCTGAATTTGCCAGCTGTGTAGCCTGAATCCCGTTACTCACTTTCTCTAGCCTC
+AGTGTCATCATCTGTAAAGTGAGAATTACTAGAGTACCGATCTCATTATTTTTTAAGGAT
+TGTTGTGAAGATGAAATGAAAACATGTGTGTCATGGTGCCCCATTATGATCAAAGCTAGG
+GCCCACAGGCGCAGTGGGACCAGTGGCAAAGGAGAGGAGACTTCTAAATGGGTCTCTACT
+GTTCACTGCTTCCTCCATCTGCTGTCTCAGTTTACCCAGTCTTCTTTCCCCTTCCATCTC
+CCACAATCCCCTTGGGAGAGGATAACCTACGCCCTGCCTCAGCTTCCTCACCACACTCCT
+CAACGCTTCACAGGTTTATTTCCAAACTCAACCTGACATGGAAATGGTTTTCCCAAAGGA
+CACACCAGTGACCTCTGAGTTGACAAAAAAAAAAGTGACCAGCTTTCCTTTCCCCCTGGC
+ATCTGAAATCTGAAACTGTTAGTTTTTTGCCCCTTAAAACTTGTTCCTCTCTCAGCTTCC
+TGGCCCCTCTACTCGGCCTAATTCTCCATCTCTCTGGCTTCCCTACCTTGCCATCTTCCA
+GATCCTCTTCCTCTACCATAGGTGTTACTACCCAGGATCCTCAGACCTCTGTCCTCTAGC
+TCCCCTGCCTGGGCCATCTCCTCTGCTCTCCTGGCTCAGTGTTCCCCTCCATGCAGGGGA
+TTCCCATGGCCCATGCCCTCTCCCAAGCTCCAGTCCCCATTTCTATCACCCCCCACGTGC
+CTTCATCTGAAGCCCTGCCTGGAGCACCTCAACCTTCATATGCCCCCATGCAAACTTGCG
+GCTTATTTCCCAATCTCCTTTCTCCCCTACCCTTATTTTAGTTAACTTTGCACCAACGTC
+CTCACCACCCCCACCATCTCCAACGGCTTCCTTCCTTACTCTGATGTCCAATCAGCTGCC
+AAATACTGAAATTCTACCTCCCAAACATCTTTTTCATCCATCCCTGTCTCCTCTCTGTCT
+TGCTCCAGACCTTCACCACCTATAGCCCCCATCAGTCTCTCCCTTCCAGCCCAACATCCC
+GTCCACGTACAGGCACACACCCTGCCATCAAATGAGCCTTCCTAGAACGCAATTTCAATG
+AGATTACTCCCCTGCTGAAAAACTTTAAGTGGCTCCATATTGTTTATAGCGATATTTCTC
+AAACTTGCTTGGTGGTAGGAATACAGGTTCCCACTCTCCACGCCCACCCTCATGAATCAG
+AATCTTGAAGAAATGTACCTGTTCTTTTATAGTTTTAACAAGCACCCAGGTGATTCTTAT
+CATCAGAGCCTTTTTAGGAAACACTAGCAATTAGAGTGGCAATTACTCTGGCATTCAAGG
+CCCTCCATGAAATGGCTCCAACATCTATTTCCAGTAGTACCGCCCATATTCCTTCCTTGT
+GCAGAGTGCTTCTGCTGTCTGTGCCTGTGCTCACATTGTTCCTTCTCCTTTATCTCTCTG
+AGTCAGATCCTACCTATCTGTCCAGGCCCAACTAGAATGCCACCTCTTCCATGAAGACCT
+TTTTGATTCTTCCAACTGAGAACCATCTCCCCCTTCTCTGGGCTCCCACCACCCTCTAAA
+TGCTATGGTGAGCAACCAGCAGCCAGCAATACCTGGCTCAACACAGATGCACAAGAGTTG
+GCACTTTTTCTTCCCCTTTATCTTCTGGAGCACTCCTCACTGTCACCACTGCATCCAGAC
+AGTTCTGGTCATTTACTCCTGGGGACAAGAACCCAAAGGAGAAGCTTCAGTGGGTCCTTA
+CAGCATGGAAGTTAAATGTGCAGGCTCTGGAAACAAACTAACCAGGTTTGAACCGCAGCC
+CCCCTACTTGGGTAAGCTGAGGATCTATGTGCCTCAGTTTTCCCATCTGTAAAATAGAGT
+TGTGAGGACTAAGTTAAGTACTACATGCTGTCTTCATTAGTTCTGGAGACTGGGAAGTCC
+AAGATCCAGGTGCTGGCAGATCCAGTATCTGGTGAGGGGTCCCTTCCTGGTTTGCAGATA
+GCCACCTTCTTGCTGTGTACCCCACATGATGGCCAGCAGAGAGAAGAAGAAAGCTCTCTC
+ATGCCTCCTCTTAAAAGAGCACTAATCCCACCCTCATGACCTAATTATCTCCCAAGTGCC
+CCACCTCCTAATACCATCACATCAGAGGTTCAATCTGTAAATTCTGGGGAGACACAAACA
+TGCAATCTACAATAAGTGCTCACTAACCGTTAGCTATGATGAGCTACAGATCTTAGGGAC
+AATATAGTTCAGTGGTTCTCAAATCTGCCTGTTGATTCATCTGGGGAGTTAAAAAAAATA
+CCGATGCGGCTGGGCGCAGTGGCTCACACCTGTAAGCCCGGCACTTTGGGAGGCCAAAGT
+GCGTGGATCACGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACGTGGTGAAACCCCGTCT
+TTACTAAAAATACAAAAATTAGCCAGGTGTGGTGGTGCATGCCTGTTATCCCAGCTACTC
+AGGAGGCTGAGGCAGGAGAATCGCCTGAACCTGGGAGGTGGAAGTTGCAGTGAGCCGAGA
+TTGTGCCATTGCACTGCAGCCTGGGCGACAGAGTGAGACTCAGTCTCGAAAAACAAAACA
+AAACACTGATGCCTAAGCCCCACCTTAGATCAATTAAAAACATGGTACCTGAAGATGAGG
+CCTGGGCATGGGATGTTAACATGAAGCTAAGGGTGAGAATCCTTAGCTAGTCCAACCCTC
+CACCCACTACTTTATTGACGAGAGGTAAAGGACTTATTCAAGTGCGCACAGCTGGTTAAC
+TGTAGAGCAAGTCCCACACTGAAAGCCTCTGCCCTCCCTCTAGCCTAATCCAGAGTGATT
+TCCAATACGATTTCCAATACGCCATTGCGCAGCCTTTGCTCCCGCGTCATTGGGTTGGGA
+GAATCATCTAGGCAGGAGGCAGAGCCAGGCCCACTCAACCTCACTCAGCAAAAGTAGAGG
+TAAAGAAGTGGGGAAAGTTTGAGGAAATGGAAGTGGGGAGTCCCTAAGGACTATGAGCTT
+GGGAGGACAAGGTCAAGAATGGATGGGGGGATTTTGTGACTAAATGGCATGAGATTCTGA
+GAGAAAAGACAATGAGGGTGGGGATCTTGTTTGCCTTGATTGTAACTGTCTTTCTAGTAC
+CTTGCACAGAATATAGCATAAAGTACGTGCTCAACAAATGTTTGTTGAATGTCTAAATGA
+TGCAATAACATCAAAAAGCACATGTTCTGTTTATTAAGTCACCCAGAGGGCACTCACATT
+CCTCAAGGGGGCAGTGGTGAGAACATCCAGAGGGGAGCACAAAAGTAAAAGAATCTTGGA
+CTTTCAGAGACACTGAGGCACAGAGGAGGCCAAGCTCCCTGAAATTCCGAGGAGAGGCTG
+CTATGTGGGAGGGAGGCAGTGGCCCTGTGTGCCAGATGTTTGCAGGGCATCCTGAACTCT
+CTGAGCATACAGAATGCCACAGGGGACAGGCAGATAAACAGAGCTGTATCCTGGGCCAAG
+TGGAGGGGGCCTGGTAAGGGAAGAGGGAGGTGGGAAGAAGGAGGCTGGGGGCCAAGCCAG
+CTGCCTCTTGATAGGGGGAGCGGGGAGGCGGGGTGCTCTGGGCCAGTGGTTGGTTCAGAT
+CTCTGGCCAAAGCAAACCTGCCTCAGGGGGCTGGGGGACCTCTCCCTCCCTGCCCGGATG
+GAATCAAAGGCTAACAGGAACCCTGGAGGGTCCAGCTGTGACCCTTGCTCCTCACACTCA
+GCAGGGCCCATTAACCCCGTCCTCCCCATATCCTCAACACACTTTTCCCAGCCCAAAGAA
+TCTTTGCAAACAGACTCCCCTGGTATTTGCTGATCAGATACCAGCCAAGAGATGTCAGGT
+TTTCCCAGTCTCACCACCAGCCCTGCCTTCCTCCCCCAAAGATACCCAGACCCTGTCCCA
+CCTCTCACAGCGGTGCTCTGTAAGAACCCAGGGTAGCCTTTGCTCAAACCGGAGCCCTTC
+CTTTTTTCCATGCCAAGAGTTAAAGGGATTCGATTCATTTCTATGAGTCATAATAATAAA
+CAAAATAGGAGCACCTGCCTCCATGCTCACAAACAAGACAGGACAACCTACCCTCTTGGA
+TCTCCTTCTGAACCCAGGTCTCCAACTGATGGTTCTCACACAGGTACAACCTGCAGGTAA
+CTCCTGCTGTTTACCTGAGCTGAGCTGTGCGCTGAGGCAGAAATAGCCTGGAATTTGGGG
+ATCATCAGAACTGGAGAGGGACCTCAGAGATTTTCTTTTTTTTTTCTTTTTTTGAGACAG
+AGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAACCTCAGCTCACTGCAACCTG
+CACCTCCCAGGATCAAGCCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGT
+GTCTGCCACCACGCCCAGCTAATTTTGTGTGCATGTGTATTTTTAGTAGAGATGGAGTTT
+CACGCATTAGCCAGGATGGTCTCAATCTCCTGACCTCGTGCCTCCCAAAGTGCTGGAATT
+ACTCACAGATTTTCTCATCCCGTTCCTTGTCATTCAGATGAAGATGCCAAGGTCAGAGTA
+GAGAAGCCTCTTGTCCAACATCACACAGGTGGTTAGTGGCTTCAGCAGGACTAGCCTCAG
+CTGACTTGACTTTCAGGCAAGAGTCCATTGCATTGGACCACGGTGGGGACTGTGAGAGGA
+CAGGAGGGAAAGCAGAGGCACAGAATAGCCCAGGCCTTCATGCTCCTGGGAAGGGTTGAC
+ATGGGTTCCCTGGGCTACACCATGGGTCCTTTCCTTTTTCTCCCTTGGGACTTCCACGTG
+AAGAAGGCACAGGCAGAACACAACATGGAGCTATCTGAATCCAGAGAGAAGTAAGAACGA
+AAGGAGGTGGGAATGCAGCACAGGCAGAGGCTGGGGAAAATGCTTCTGGGTGCTTTTACA
+TCCTATTTTCTAAATTCTATACAGTGTGGTTTTAACAATAAAGAGGATAATAAAAATACT
+CTGGGGAGAGATTAAAATGACAGAACCAGCTGGGTGTGGTGGCGCATGCCAATGGATGCA
+CAACTCTGGAGGCTGAGGCAGGATGACGGCTTGAGCCCAGGAGGTCGAGGCTGCAGTTGA
+TCATGCCGCTGTACTCCAGCCTGGGTGACAGAGTGAGAATCTGTCCTTTTTTTTTAAGGG
+AGAGAGAAGAAGAATTGTTTTTTTTTGTCTGTTTATTGTTTTTTGTTTGTTGTTGTTGTT
+GTTGTTGTTGTTTTTGAGACAGGGTCTCACTTCGATTGCCCAGGCTGGAGTACAGTGGTG
+CAATCTTGGCTCACTGCAGCCTCTCAACCTCCCTGGGCTCAGGTGATTCTCCCACTTCAG
+CCTTATGAGTAGCTAAGACTACAGGTGCATGCCACCACGCTCAGCTAATTTTTTCGTATT
+TTTATTAGAGACAGGTTTCACCATGTTGCCCAGGCTGGTCTTGAACTCATGGACTCAGGT
+GATCTACCCACCTCAGCCTCCCAAAGTGCTGGGAAAACAGGCATGAGCCACCGCGCCCAG
+CCAGCAGAGATTTTTAGAATCTACTTTGAGTAGACTGAAAAGAAAGACCATGTTGAAGGA
+ATGGTTTAACAGATAAAGAAAAAAAATAAGATAAAAATAAAAGAAAGACCAAAAAGAAAA
+CGGAGGAGCAGATGTTAAAGGAGAGATGGGGAGAAAATTCAACACCATCCTTGTAGTCCA
+GTGAGGAAACTGACACAGCAGCAGGACTTGGACAGCCACTCTGGGGCTCTGAGCCCAACC
+TTTTTATTTATTTATTTATTTATTTATTTATTTATTTATTTATTTATTTTTGAGACGGAG
+TCTCGCTCTGTCGCCAGGCTGGAGTGCACTGGCACGATCTCGGCTCACTACAACCTCCGA
+CTCCCTGGTTCAAGCGATTCTTCTGCCTCAGCCCCCCGAGTAGCTGGGATTACAGGCATG
+TGCCACCACTCCCAGCTAATTTTTATATTTTTAGTAGAGACGGGGTTTCACCATGTTGGC
+CAGGATGGTCTCAAACTCCTGACTTCATGATCCGCCCACCTCGGCCTCCTAAAGTGCTGG
+GATTACAGGCATAAGCCACCCCACCTGGCTGATCCCAACCTTTTAACAGGTCACTATAAC
+CTGAGGGAGTGGGAGGACAGAGGGCTGGAGAGGCAGGGAGAGTGAGAAGTCATCTTTCTT
+GGACACTTGTCATGGCCAGTCCAGGCCTTCCCGGCTCCCCTCATCTCATCCCCCAAGGTG
+GGCTCCCTTTGGAAGCAGAAAAACTCCACCCACACTCCCAGAAGACCTCAGAAGCCTGAT
+AGCAGTGACAAGACCACAGGCCCTAAAGAAAAAAAGAAAGGAGAGTTAGAGGGAATGGAA
+GAAGGAGAAGAAGGAGAGATGCCCAGAGGGGGGTTGGCAGCCGAGAGGAGGACCAGAGGA
+GCAGGGCTGGAGAGCTGGGCCCAAATCAGGACTGGGGGAGAGAAGTGGGGTGCCGGGGCG
+GGAGGAGGGACGATGGGGGTAGCTCTTCCTTCCCTCCCACTCTCCCTCCTCCTCTCTCTC
+TTTCATGATCTTGAAACAACCTTCAAGTCTGGGCTGGAGGCCCCAGGAGGCCAGTCACTT
+AGGAAACACCTCTGTTTCTTGGAGACCTTGAAAGACCATCTGCAGCTGAGCAGCGTCCCC
+ACCCCCTCCCTGTGCGCACACGCAGCTCTCCTGGCTCTCGTGTCCTCTCTTACTTACGTC
+ATCATTAGTCCGTCTCAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCC
+ATCTGCAGCTGAGCAGCGTCCCCACCCCCTCCCTGTGCACACACACATTCTCCCATGGAC
+TCTCACGTACACCCACACCCGGCTTCACATGAGCTCCCCAATGGCCCACATGCACGCACA
+CACGGACTCCACACTTGATCTTAGCCAAAAGGCCGAGAAGCGATGCGCACTCACGGACTC
+ACACGCACCTCTGTACTCATGCTTGCATACAGACAACCGATTCATAGCAGATTGGCCACA
+CATCCGAATGAGTGTTGTCCTCTCCCCTGAGGAGCTCCTCCTTTGGAATGTGGTTTGGTA
+CCACCACTCCGTCTCATACCTCACACAGGACCCAACGCTCAGGTCCTCCCAGTCATTAAC
+TAGCACACATCTGTACACAGGCACATGCACACTCCTGTCTCTCTCCAACTGTCTTGGTCT
+CTCTCTCTCTCTCTCTCTCTCTCTGTCACACACACACACACACACACACACACACACACA
+CACACACACTTCTTACCAACCAAATATTTGATTCAGGACCACTCCATCCCCACTTTGCCC
+TTGGTGGATCCCCCTCCTGATCTGTGACTTTCTCAGCCCACTCCCCAGCCCAGGCATCTT
+GGCCTTCCATCCTCCTGCTTTCAAGTCTTTCTTTCTCTCTCCTTTGCTCCTCTTACTCCA
+AGGGCCTTGATCCAAACTGGTACTGCTGACTGTGTCTGAAGAAACAGTAGCCCAACCCAG
+GCACTCCAGATCAAACGATCAGGTGTCTACACCTGTTAATATCCTGGAGGCTGAGGTTCA
+TGGTGGACAAGAAGGAGGTAGACCACTCCAGAGCCTCAGCTCTCACTGGGCAAGGAGGGT
+GGTCAGTGATGCACGGTGGAAGTATAGGCCCTGGGTGTCCTGCCCACTCTTTGATTTCTC
+TGAGCCCTAAATATGTGAGCAGCAGCGCCCCTCGGGCTGTGCTCTCGCCCAAATGCAGGG
+CATGGCAGGGAGCCCCAGCAGGCTGTAGGGGCAGAGGAAGGGCACATCCTTTCCGGTATT
+GAGTTTGGCCACAGGGACCAGAGCCAGGGCCTTTGGAAGGACTGGGCAGTACTGGGAGGT
+GCCAGCAGGGGTGAGGGGTGGGATGAAAACTGTTAGAGTGCTTGAGGTTTGCACACCTTC
+AGATTCGGGGACTGACGTTTTCTCTCCTATCTCAGGTTGTGTTCTGGAGAGGGCTGGGCA
+GAAGATTACCCAGGAATTCCAACCCAGGAACCTACGGAACAGGCGCTTCCTAAACCCCAA
+TTTATCATGTTGTTCCCACAGCCCCCAGGCAAGCCTGCCCCAATGTTGATCTCCCTAGCT
+GCCTCGGGGCCACTCTGGAGTGACTTTGCTTTAGACAGTCTCTCCTGGTGTCTAACCACA
+GGCTTTCCCGCTACTCCCCTCTCACTCTCTCCTTAACACACAGGGGCTGAACTGGTATCT
+GTTCCCTTCATTGTTTCCTCCCTCTGCCCATTCTCTCCCAAGACCAAATCTGAGCTCGGA
+GAGGGGGGCCATGAGAAGGCCCTGTTGGCCCCATCCTACCCACCCTTTCTGACTGTAACC
+CAGAGAGGGAAGGAGCGTGGAAGTGAGATGTGACCTACCAGGTCCCTGTAATGGAAAATC
+TCAACTGCAGGCCCGGAGGGGGCCAGTTATTTAGGGAATTCAGATGTTAGCCTTGTGCCC
+TGAGAGTGGATTTGCTAGAGGAGAGGCTGATAATCCCTCTGCAGGGAGTTCCCTCTGGGG
+CCAGAGATGGACCCAGCCCAGCCAGCAGGCACCTTAGGCCTCTGGCCAAAGCCCCCCTTC
+CCTCCCTCCCTTCCTCTCTGCTTTCTTTGCACTCAGGACACTTAGACCTGCCCCACTCCG
+CTGGCTTCCCCTCACTCCATCTCTGTGCCCAGAGAAACAGGGGTACTCCAGGGGAATCCT
+GTCCTCTGACTGTTCCCAGTAGAAGGCGGGAGCAGAATTTTTTCTTTTCTCTCTGCCCCT
+TCTGGGTTCCCCTTCCTCTCTGTGCCCCTCATCCTTTGGATTACTCATCAAATGAGCAGC
+AACAACACCCCTAGACCAGCTGGTCCTGTCCTGGGTCATCTCGTTGTCCCCAGTTTCTAG
+CTGGTATAAGGTTAAAGATACCCTTTCCAGCCCCAGGCAAAGCCTTAGGCATTTCTGGGA
+TTGCTCCTGACTCTGATGGGCATCCTCACTCCTCAGACATTTCCAATCTCCCCCACTTCA
+CTGCAGAGAGGCCTGTTCTCCAGCCTCAGTCCTGCATGCTGCAGCTTTGCTTCCCCTTCC
+CCACTGGAAGAAGGAAACAGTCGCTCTCCTTTCCCGTTCACATGTCTGATATTTGCTATG
+ATATTGACCTTCACACACATCTTAGAGGTTCTGTATTTTCCTCTCTTTTGACTTCTCCCT
+ATCACTACTCTACTCAGGCATAGATTCAGTCTAGAGCTCTTCAACAGCAATGAAACGGTA
+AACGAGGGGTAAACAAAACCCAGAGATGTCATCAGGTGCCTGCTAGGAGTCCCACCCGTT
+CCCCTCTAGCCCTGGCCTGCCCCTCCCTACCAGAGTCTTGATTCTCCTTCAAAACTTTGG
+AGAGGTTACATCTGGCAGGGTTTGGGGACACCCTCTTGCCTCCATCTCCAAACTTCCCGC
+TGCCTCCCGAGCCACCTTTGACTCCTCCTGGGCCCCCCTTCCAGGTGCCTCAAGTGCAGA
+GGTCAGCCTTCGGTTCAAATTCTATTTCTGGGCTTCAGTGTCCCTGCAACCTCTATAATA
+GAAGGAACAGAGTGGTCTATGTTTCTTTCTTTCTCTGGGAGATTGGATGTAGACTCGGGA
+GACCTGAGTTCTGGCTCTGTCTTATCAATTCAAATTGTTTAATCTCTCTGAGCCTCAGTT
+TACCCACTTGGAAAAGAAGAGTTAGGCCACATGGTCTCTGTCATCCTGGGATTCTGAGTC
+AACTGCTCCCCTTCAGCCAGATCCAAGGGGGCATATCCCCCCAACCTGGGACCCAGGCCC
+CCTTCCCTTGCAGCCTCTCTCAGTTGTGAGTGCCACAGGCCAGAGGGGGTTGAGAGGAGC
+CAAGGGGGGGCATGCCGTGGGACAAACGGGCTGTCTAGGGCCAGTGGCTAAAGGGGCGGG
+AGGGGAGGAGTCCTCAGGGATCCTGTTTCAACAAACGTTTCTTTCGGAGGAGGGGAAGGC
+GGGGGAGAGGGGGAGAAGGACCTATTTAAAGCTACCCTGTTGCTTTGGCTTTCTCTGTCT
+GCCAGGGTCTCCGACTGTCCCAGACGGGCTGGTGTGGGCTTGGGATCCTCCTGGTGACCT
+CTCCCGCTAAGGTCCCTCAGCCACTCTGCCCCAAGATGGGCCGTGGGGTGAGTATCCCTA
+AAGAGCAGGGGTCGCAGTCTAGAGGGTGAGGGAGGCTGCTGAGGAAGGATGAAGTGGGAA
+TGGGATGTGGAAGGAGCCGGAAGAGAGGAGCTGAATTGGGATACTTGGAACCTTGAAACT
+AACAGTCTCCAAACTACCAATGTGTGTACAGATCCCACGGCGGTCACATACCCTAGTGCT
+CTCCTCTCCTCTGCACGCCCCAGCCCCCTGCCCACAGTTAGACCCACAGCCCACCACACA
+TGCCCCCTCCATCTCTCCCTCCACCCCACTCCCACATCTCTAGTCCTTCCCTGCAGCAGT
+CCCTCCTCTCACTGCATCTCCCGGCCTCCCAGCTGAAAGGATGTTTGGCACTGTGAGTCC
+TCCTGTCTTTGCCACCTCCTGAAAATATCCATTCCTCTGGGAGTCCTAGCCTGAGAGGCT
+GGGGGTCCATTTTGAGGTTAGAGAGGGGCAGTAGAGCATCCGGCTCCCAGAGTCACCAAG
+TAGTTCCGGACACCAGCTCCAGGGGCCCTGAGTGCCAAGGACAGCTGGGCGGGGTGTGGG
+GAGGAAGAATGATGACCCAGCCCCCACCCCAGCACCCCTCCGGATGTCCCGAGCAGCATT
+GTCCGTGGGTTCCAAGCCATGGGCTTTGAGGACTTTCCCAGAGGCTGGTGACAGGTGACC
+CCCCCCCCAGTCCACCCGTCCATGGCATTCTGCAGCAATGGCTACACCAAGAGGAAGATT
+AACAACAATGGGGCGGGCAGGGAGAGGGCTTTGAATTCTTGGAAATGAAATTCCCATATG
+GAGAAGAGTGGCCCAATCCTTTTACATGCTGTGCCTGGCTGCTTTCCTTTTGTGGAATAC
+TGGCAAAAGAGGACACAGAGGGAAAGGGCATGCTGCGGAACAGGAGGGTATGAGGAAGAC
+TGTGCCTAGGGGGAGCCGTGGGTGTTTCTGGGCCCAGGAGGAGTCAGGCAAGGACATGTT
+ATTGCACCTAGTTGAGCAGCTGGGAAATCCTGAGCCTGGACACCAGGGTTCCTGAGCTAG
+GGTTGGAGCCATAAATAGAATTGGAGTGCATGCATGTATCTGTGTGTCTGTGTCTGTGTG
+TGTGTGAGTGTGTGCCCTTAACCACGTGTTCTCCATAGGCGGGAAGTCTGCCAGTTACTC
+ATGGATGAGTACCTGTGTTTGTGTCTCAGCAGGCAAATTTGTAAATGTACAAGCCTTCAA
+GCCTGCACAAGCATAAATGTCACTGTGTGGGGGTGCTTGAGAGTCTGTGAGCCTGCCCTT
+AGGAGCTGTGAGGTATGGGCTTCACGGAGGAAGAGGTCATAGCTGCAGGAAGATATACTG
+TCTTGGGACCTTCAGCTGCATCTTGACATCTCTGGCTCCCCATGGAAAGGGCATCCCTAG
+CTGAGATGCAGGACCTCTGAGAAGAGGCAGGGGTTAAAGGATCAGATAAGCCCTCCATTT
+CCCCCATCCAAGTGAAGAGAGAAGGAGGGGAGGTAGCCCCCTACCCTGCTCCATCTTAGA
+GCAAGGTAGACCCAGCTCAGGAGGTCTTGCTTGGGAAGTGATGAGACCTTGACTTTTCCA
+GTCTGTCTTTTTCCCCTAGCACCCCCAAACTCCCCTTAATCACCATCCTAAGTTGCTGTG
+GGTGATGCAATAGCAAGATGAGGAGCAGATCTGGGCTGTTTAAACTAAGAGGCTGGGTGA
+GGTGGGGGTATTTAGGGCCTGGAGCTTAGAGTTCAACCTACCAACGACCCCTGAAGAGGG
+AAGGCATCTGACACCCACAACCTGTTCTAGGGATGATTTTTCCTCCAATCCCTTCTCCCC
+TGCATCTCCACTGCAGAGGCAGGCTTCACTGTCCCCCCATTACCCAGTGGCTGTGAAGGG
+CAGCGTGGGAGTTGGGGGAAGGGACGACACTGGTGGGAGGGAGCCCAGCCTGCTCCAGCT
+ACCACGGAGAGGCTGAGATGGGGGGAGCGTTGGCGGATTCCCAGCTGCCCCCACTCTGTC
+CCAGCCTCTGGCTTTCTCAAAAAGGACTCTCTGTTCTCCTTCAGCATTCAAGACCCAGAG
+AGGGGGACTTGTGGTTGGGGGAGGGAGGAGTGGAGGGAGGTTGGGGGGGTCCTTGCTTCC
+TCTCTTTCTTTCTTGCCTGGGCAGCCGCTGGCCCCAAATCTCTGCAGGCTCCTGGCTGCA
+GAGCCTGAGATCTTTGCCAGGACAGGAGGAGGGGGAAGGGGCAGTGTGTCTCAAGCTCTA
+AGCCTGCTGGAGAGCAGGGCGGGAGCTTGGGAAAAGGAGGCACTGCGTGGAGCTGCTTAG
+CTCAGCCACAATCCAGCATGCCAAAGTGCATGGACCAGCAAGTTTTTAAAAAGCATGCAT
+TTTATTCCAATTTTTATGAAATTTATTTCACATCTGAATATACAGAAATTCCTCTGCACC
+CTGCTCTTCTGCCCACAGACCCAGCCCTGCTCCTCTGTGTATGCCTGGGCCCCCTTCTGT
+CCCTGAAGTTCCTGAGGGTCACACTGAAGCTCAGCCAGCCTGATCTCTTGCCTCTGTTCC
+CTTGATTGCATTTCTTCCTTTTCTCAGCTCTGTCCCACCACAGACAGTTAGTGAATGTAA
+AGCATTTCAATCCACTCAATTAAATGAATCCTCTTAGGTCACCTGAGAAAGGCAGAGGCA
+GAACCACTCACAAGCCTTTCCCCACCCCTTCCATTGGGCTGCTCGGTCTCCCCAAGAAGA
+TCTGCCTTTAGGAGGCCAAAATTATACCAGATTCATAGATGCTTCAGGTGGAGACAAACC
+CACGATTTTACTGGTCACCTTTTCTGAGAAGAGCACTTCTGAGCAACACAGCCTCCAAAA
+GCTCATATCTTTTTAAATCATTGGTATCTTGGGAACAACAGTGTTGACTTCTCCTTGCAT
+CCCCCACCCCTACCCTTGCTTCTGATTGTACTGGTCAGACTTCTACCTCTCAAGATGCTG
+AATTGGCGACCTCTTATGAACAAGGCAGCTGCCATACTTCTGGGATTGCTCCTGACTCTG
+ATAAGTAAAAAAGGCATTACCCTGGAGCACAAAAGTCTTAGGGCAGTGAAGCTATTCCAT
+ATGATACCACAATGGCGGATACACGTCAGTATACATTTGCCCAAACCCATAGAATGTACA
+CCAAGAGTGAACTCAAACTTAAACTATGGGCTTTGGACGATAAAGATGTATCAATGCAGG
+TTCATGATTGCAGCAAATGTAACACTCTGATGCAGGGTGTTGATAGTGGGTGAGGCTGTG
+CATGTGTGGGGGCAGGAGATATATGAGAACTTTCTCTACTTCCCACTCAATTTTGCTGTG
+AACCAGAAAATTGCTCCAAAAAAAGTATATTTTTTAAAATGCAAACAAAACCCCCAAAAC
+TGCATTATCCTGCAAAAAAAAAAAAAAAAAAAAAAAAAAAGCAAACACCTTGGCTGTGGA
+CCCTACCCAGGTATCCACTCTTTATGGGAAACAAAAGTTAACATCATAGTGTGGGAAAAC
+TAATTCCTGAAAGCCTAGTGCAGACTTTGCAGAATCCAGTGGAGAAATAGGCACCAGGTG
+GAAACTGAGCAGCCCCTCTATGACAAGGACTCTATTTCCTGAGGGAAGTGGGAGGAGAGA
+AGACTGGGGCAGAAGAGGAAGTAAGAAGGTACCTTTTTGGGAACTTTTCATGGCTACCCA
+GGCCTTCCTCAGCCTCTATCCTATTCTATCACCCCAAGAGGGCCTCCTCTTAGAAGGAAA
+GATTTCCCACCCTGTCCCCAAGAAGTTCCCTGTGAGACCCTGGAAGGCTGGTAGTGGTGA
+GCCCACAGGCCCTGGAGTGGAAGGGAAATCAGGAGCCTCATAAGACTACAATTAAAATGT
+CCATCTGCTCAGGCGCAGTGGCTCACACCTATAATCCCAGCATTCTGGGAGGCCAAGGTG
+GGAGGATCACTTGAGCCCAGGCAATATAGTGAGACCTTATATCTAAAAAAAAAAAAAAAA
+AAGAAAGAAAGAAAGAAAATTGTTTTAAATTAGCTGAGCGTGGTGGTGCATGCTTGTGGC
+CCCAGCTACTTGAGAGGCTGAGGTGGGAGTATTGCTTGAGCCCTAGAAGCGGAGGGTGCA
+GTGAGCTGAAATTGCACACTGCATTCCATCCTGGGTGACAGAGTGAGACCCTGTCTCAAG
+AAAAAAAAAAAGGCCATCTGATTCACCAGATGACCCAGGAATGTCTAGACTTAAAAACTA
+TATAAAATGTAGTTTTAGATTTACTCTCAGATACAATTTCTTATGTTTTGGGACAAATGG
+GGCTCAGTTCCCATGTTACCCAATTGTGCTTTGGGGGTCCAGTCCTGTGAGTTGCCTCCC
+GTCTAGTAACGGGCAGGCCTGCCCAGCAGGATGGAGAGGGCTCCCAGCACACCCCATGTC
+TCCACTTCCTGGACATGCCCTGACTCTCCAGCTACTGCCTCCTCTCAGTCCATCAGGCCT
+GCTCTTTCCATCCCTACCATTCCACCACCAGACCTGGCTCTCCTCTGGACACGTTCCTGC
+CTAATCCCCTGGCTCCCCAAAGACTCTTGCTCCCCCAAGTTCCTGAGTATTTCTACCCCT
+CTCTAAGTAAAATTGCCATGGTGTTGGGCTAAGACATTCCTACAGAGTGATTCTACTCCC
+TCAACAACCCCCCACAAAACCCTGTCCACTCCCTTCCAGAATTTTCCCAAATAGTGTAGG
+ATACAGGTTTTCAAAATTTTTGTTTTGTTTTGTATTTAGCAGCAGGGTCCTTTCAACAAA
+CAAAATCTTAGCAGAAGCCCAAGCATTAAACCACTCAAGTGGTGCTTTTCAGTCCTGAGC
+CCCTCACCCTTCTCTGAAACTCACATGGGAGCCCCTGGTTCTGCCAATAGTCTACAAACT
+GAGGGCCTAGGGTCCCTCACCCTCCATCCCCCCTATCTCCCCCAAGGCAGCCGCTGCTTT
+TGAACACACACCCCCACTGCCTGGGCTCCCTGGCTGAGTGGTGGGAATGGAGGCCCCAGC
+CCCTCTCTTCCCTGACTCTCTGGCTCTCCCTTCCTCCCTCAGGCTGGCCGTGAGTACTCA
+CCTGCCGCCACCACGGCAGAGAATGGGGGCGGCAAGAAGAAACAGAAGGAGAAGGAACTG
+GATGAGCTGAAGAAGGAGGTGGCAATGGTGAGGGAACTGCTGGGCCATGGAGGAGGGGCC
+CCATGCTGGGAGAGCTGTCCCTGCAGCCCATTGCACTCAGAGAAACTCCGTGTCCCCCAT
+GCTGCTCAACTCACCCCTGTGCCCTGCAGTACCCTCATATGCATCTTAGATACCTCCTCC
+CCAAAGTAACTCACCCTCCCTTCCCCAGGATGACCACAAGCTGTCCTTGGATGAGCTGGG
+CCGCAAATACCAAGTGGACCTGTCCAAGGTGAGTGGAGGGGCTTCTAGGGAAGGAACAAA
+AGAGGCAAGAAAACCATGCAGCATCAAGGTGGCAGGAGCCTTAAAACTGTAATCCAGCCT
+TCTTTACAGATGAGGAAACTGAGGCCCAGAAACAAGGACTGGCCCAAGGACATGCAGCTA
+GTTGGTGGCACAGCCAGAACTAGAATTTGGATCCCCTGCATCCTAGCCCAAAGCTCTCTC
+CCTGTATACCCTAGAAGCCAGGACTCCCTATGACCCAGGCCCCAGAGGGCCTCCAGGCAG
+GGCCCTTCCCTATACCCCAAGCAACTTCAGTTGCACACAGCCTTCCACAGAGCTGACAGC
+TGATGCACATGGGCTGACAGCCCATTCCTGTGGTTACAGTGATTTGCTGGGCCCCTGCTA
+CATATGTGACATTGTGATAGATGCGTTACATACATTTCTCTAATCTTTACAACAACCCTT
+TGAGATAGGTATTGCAGACCTGTTTATTAAACAAAGGAGACTCAGAGAGGGAGAGTAACT
+TCCCAAGGTCACATGAACAGTAACTGGTACAGTCAATTGCTAAGCCCTTCAATTAAAAAT
+TTGTTTAAAAATCATTAATCAAAAAATGCTGGGTGTGGTGGCTCATGCCTATAATTTTAC
+CACTTTGGGAGGCCAAGGCAGGCAGATCACTTAAGGTCAGCGGTTTGAGAGCAGCCTGGG
+CAACATGGCAAAACCCCATCTCCACTAAAAATACAAAAATTAGCTTGGCACAGTGATGGG
+CACCTGTAATCCCACCTACTCGAGAGGCTGAAGCAGGAGAATGGCTTGAACCCAGGAGGC
+GGAGGTTTCAGTGAGCCGAGATCATACCACTGTACTCCAGACTGGGCGATAGAGTGAGAC
+TCCATCTTGAAACAAGCAAACAAACAGACAAATATTTAGTGGGTGTATATCTCCTTTATG
+GGCCTTCCCTGACTGCTCTTTCTAAAATTCTATGCCCCTATTCCTTTACCTTACTTTATT
+TTCCTTAGTAACACTTTTCTATCTGGCACTAATATGTATTTGTTTATTGTGTGTCACCTC
+TCCAGATCATAAGCTCCTCAAGGGCAGGGACTTTGTTGGTCTTGTCCATCTCTGCATCCG
+CAGAGTGACAATTGTGCCTGGCACTGGATATACATTTATTAAATGAATGAATGAATGAAA
+AAAAAAAAAAAAGAAACGGTCCCTGCTTTAAAGGAGCTCAAAGTCTAGTAGCAGAAAAAA
+TGCAATCCTAGGATTCCAATGCAATATGGTGAGTGCAGCTCTAGAATCAAGTATAGAACA
+CCATGGGGAACCAGAAAGAAGCATCTGAACCAGCCTGGGGCTGAGGGAATAGGGCCAGGC
+AAGGCTTTCTGTGGAAAGTGACAGGTGATTGCCAAAGTTGTGTGACACTGACGGTCAATA
+ATGTAGGACTTGAGCAGGGTGTGGAGGTGCCCACCTGTAGTCTCAGCTACTTGGGAGGCT
+GAGGTGGAAGGATTGCTTTAGCCCAGGGTTCAAGGATGCAGTGAGCTATAATTGCACCAC
+TGCGCTCCAGCCTGGGCAACATAGCAAGAATCCATCTCTTAAAAAAAAATCTAGGACTCT
+CCTAGAATCCTTGATAAAATAATCTGAAGGAAGAGAGATGGGGAAAAAACAGTTGAGGTG
+TCTTTCCAGGGAGCTTATCCAGACTGCCTCCAGGGGAGATGTTATGGCCCTAACCATAGG
+TGTTGTCTCAATCCAATATAATTAGGGGTCAAGCCCAGTCAAACCCTTCTCAGCCCTCAA
+GCCCTCCAAGTTGCATCACAAATGCCACCTCCTCCATGTGGTCTCTCCTGATGGTCCCCC
+ACCCCTTCCAACCAGGTGGGACTTTCCTTCTGGGCATTCTTCCCATGCCCGGGAGCTGAA
+GGGATGGGCATGGTGACTGGCTGGGTTGGCTCCGGATGCGTGCCCCTACGCCTCTCCTTG
+CTCCCTCAGGGCCTCACCAACCAGCGGGCTCAGGACGTTCTGGCTCGAGATGGGCCCAAC
+GCCCTCACACCACCTCCCACAACCCCTGAGTGGGTCAAGTTCTGCCGTCAGCTTTTCGGG
+GGGTTCTCCATCCTGCTGTGGATTGGGGCTATCCTCTGCTTCCTGGCCTACGGCATCCAG
+GCTGCCATGGAGGATGAACCATCCAACGACAATGTGAGCCCACACGCCCGACCCGGGAAC
+AGCCCGTGACTGTCCTCCAACCCTGAACCCCCAACACAGTGGGGGGTGGGCAGGGAACAA
+GGCCCTCACATAACAGTCCTACAGATGCCCCTGCATCTTAGGCTGGAAAGGGGAGAGGCT
+TCTATATATATCTGTAAAGTACTCCTCCGCCAACAGTGCATACACATTCACATACTCATT
+AATTAATGAAACAATCTCTCCCTGTTCCTCCCCGCTTAAGTGAGCCTGTCTGTGTGCCTG
+GGTCTTCATCCCCTAGCAAAGTCCTAACCCTGGGAACTTCCTAAGACTTTCCCTCCCATT
+TCTAGTCTTAACAGGCTTGAGGTTGGCAGATCAAGGGGGAGGTTAGTGAGAAGGGCTTTC
+CCCTACCATCATCACTCTCAGTCACAGACAAAGGTCTGGGCTGTCATCTTGGATGGCACT
+GCCTGCTCATCCCAAGTGGCAGCTGCCCCTTTAGGGTTGGGGGGAAGGTCAGGTCCCTGA
+AACTCTTTCTCCTTACCAGCTATATCTGGGTGTGGTGCTGGCAGCTGTGGTCATTGTCAC
+TGGCTGCTTCTCCTACTACCAGGAGGCCAAGAGCTCCAAGATCATGGATTCCTTCAAGAA
+CATGGTACCTCAGGTAAGATGGCAGGGCTGGGCTCTGGGCTAGGCTGTAAGGTTTTGGCA
+AGAGTCCAGCTCATCTTTTGTCAGCTCCCAGGCTCTAAGATAGAGATGGACAGAAAAGAT
+CCTCCAGCTTTCCATGCCAGCACCTAATTGTTTATGGGGCTTCTCCTTCTGCTTGACGGT
+GTGGGAGACCAGCAGGAGAAGAAGGCAGGGGCAGAGACAAGCATTTCATGAGCTGCCTGT
+GGCTCCCCACAGCAAGCCCTTGTGATCCGGGAGGGAGAGAAGATGCAGATCAACGCAGAG
+GAAGTGGTGGTGGGAGACCTGGTGGAGGTGAAGGGTGGAGACCGCGTCCCTGCTGACCTC
+CGGATCATCTCTTCTCATGGCTGTAAGGTGAGGAGGTCATACCAGAGCAAGCAGTTGAGT
+CTAAGGAGAAGGCTGTGTGCAGAGCTGAGAGGGGCCCAGTGAGGTTTAAAGGTGGAGAGA
+CCCAGGTCCAAATGTCAACACCATGCCTATGCCCCTGCTAAACCTTCTCTCAGTGTGGGG
+TGCTTGTGCAGTGCCTCCTTGCATCTGTGTGTTATGAATGCTCTATGCCCCAGGTATTTC
+TTTGTTGTCCACTTTTTAGCAATCATGTATTGAATACCAATTATGTGTCAGCTGCTTTAT
+ATATGTTCGATGTGCTATCTGATGTAATTCTGAAAGAGTGGTGTAATCATGCAGGGCTGC
+TGAATGTGGCCACACACGTTGCATCCTGAATGACCCCAGGAGGTGCATTTTCATAACTAC
+AGTGTTAGTGTCTCCCCCTGTGGTTGTGCAAGGCAGAGATTCTGTTTACATTCACCATTT
+TACAGTTGAGTACATGGAGGCTCTCTTGGTTAAGCAAGTAACCCGAGGTCACATAAGAAG
+TCAGGAGGAGTCAGAATTCCAAACCAGGGCAGGGGCAATAGATGAGTAATGCCATAAATA
+TGTGATGAGGGCAAGTGTGACTTTGTTCCTCGTGTCATGAAGATTGAGTGTGGTTGGAGG
+TGGTTCAGGAGACAGCTGTGTGCATACAAGTGGCTCTGCCAGTCTGATGACTATGCACTC
+CTTCCTCCTCAGGTGGATAACTCATCCTTAACAGGAGAGTCGGAGCCCCAGACCCGCTCC
+CCCGAGTTCACCCATGAGAACCCCCTGGAGACCCGCAATATCTGTTTCTTCTCCACCAAC
+TGTGTTGAAGGTGAGAAGCCAGGCTGCCCCCTGTAGGAAAGAGTCTGAATCCTGAATCCA
+TAGTCAGGATGAAGGGCTCTGGTAGTACTTACCTGGCAAAAGCTCTGCCATTGGTGGGGC
+AATTGAGGGGTCAGGGGGCCCTGAATATTATGGAAACACCCTCCACAGAGCAGGGTATTG
+ACTGAGGGCAGCCTGACTCCATGGGAAATGTATCTCCCTTCCCCAGGGAGATCTCTGTTC
+TGTCCACCCCAAGACTGAGTGGATTTCAAAGCTTTCAGGTATTGGGTTAAGAACCCCTTA
+CGCATGCGCACACACAGATGCACATACGCTCAGAACCAACCTGGATTCTTAGGAGTCTTT
+GGATTTGATTGATGCTTCTATTCCTTGTGGTTCTGGGCCTCGCTGCCCCCTTCGATAACC
+TTTGCTTTGGCCAGGGCCCCAGGGTCTGGGAGGGCCAAGAAGGCATTCTGGCCAGGTGAA
+CACTTGGGGCCTACTTCAGGCTGCTTCTAACCTTTGTTACATCAAAAGACATGTTTCACC
+ATGCACAGACTACCCAGTTTGCCCCATCCCTGTTCTGGGTTCTAGAGATAAACAAAGGTC
+AATAAGAACTCCCAGAGCTGCCATCACTTAATGGCAGGGGTCCAAGCACAACTGAAACCT
+GTGCCCTCTGTGGTAAAGCCAGTGAGCAGAGTCAGGGCTGCATCAGAGAAGGAGTAGGAG
+GGAAGAGACAGAAAGAGGCCTCACTAGACCAGTGGCCTCTGAAAATAGGGCAAAGGAGGC
+CTCATATCCCAATCCACACTTCCCCTGTCCATTACTCATTTCTCCTCAGTGTATTATGTT
+TCTCTGTGGTTTTTTTCCCTTCTTCGACAAGTTGGAATTCCACTGCTTGGAATCCATGAG
+GGATTGGTTCCAGGACCCCCTTCGTTACCAAAATCTAAGGATCCTCAAAGGCCCTGATAT
+GAAATCGCATAATATTTGCATATAATCTGCGCACATCCTCCCATATACTTTGAATCAACT
+CTAGATTACTTATAATACCGAGTGCAATGTAATTTTTATACTGTTTTGTTTTTAAATTTG
+TATTATTTTTATTGTTGTATTGTTATTTTTATTTAGCTCCAAATATTTTCGATCTGAGGT
+TGGTTGAATCTATGGACATGGAATCTGTGAATATGGAGGGCCAACTGTACAAATAAAAAT
+CACCACACATTACATTTCCTAAGTTTGTTTTTTGTTTTGCTTTTGAGACAAGATCTTGCT
+TTGTCACCCAGACTGGAGTGCAGTGGTGCAATCACAGCTCACTGCAGCCTTGACCTCCCA
+GGCTCAAGCGATCTCCCACCTCAGCCTCCAGAGTATTTGGGACTACAGGCGCGTGATACC
+ACACCCAGCTAATATATATATATGTATATTAGAGATGGGGTCTCACTATGTTACCCAGGC
+TGGCCTCAAACTCCTGGGCTCAAGTGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGGAT
+TACAGGCATGAGCCACCGAGCCTGGCCAAATTTCCTAAATTTTTAATGTACTGAGATTTT
+GAAATTTACATTTCCCACAATTGATTTATTCAGAAATGCCTAGTAATACATTCCTATTGC
+TTACTCTGCAATTGGCTTTGAAAAGAGGAAAGGAAATAAATGAGATATTTGTTGAGCACC
+CGCTATGTTCCAGGCACTGGCCTAAGTATATTAAAGCACAATCTCATCTAAATATTAAAA
+CAGCACCGAGATAGGAATTCAAATAAGGATTCAAACCCAAATTTGGCTGGCTCCAATGTG
+CATGCTTTTTTCAATACACCACCCTTCCTTCCCAGAAGACCCCTGTAATGTCACACAAGG
+GTTAGCAATTGCCTGGCTTCCTCTATAGAGATTCATGCCTAATGTTGAGAAAGAAAATCT
+TAAGACTGGATGGAAAATCCATAATGAGTGCTTTACTTTGTGTTCATAATATACTTTTGG
+ATGGTATCAAGTATGGGAATGATTTTACCAAAATGCCACAGGCAATTTTTATTTATTGAA
+TCTTGTCTATTTGAACTTTGAGATTATTTGCAAGCTTCTCCCCAGGCTCGAGTACAAGAG
+AAAATAATTTGCTGTTGTATTGATTCAAGGATTGGCGATCTATCCAGGGAAAGGAAGGCC
+AAGTGGGGACAACCTACTGAATGTGGCCTCCAGATCTGCGGCTTTGGCTCACCTATCCTG
+TGATGATTTTCCTTGCTCAGGAAATAGGATGGGACTGCAGTCCCTGGGAGCCACAAGGCA
+CCCAACCTGATGCCCCACCATGTTGCAGGCACTGCCAGGGGCATTGTGATTGCCACAGGA
+GACCGGACGGTGATGGGCCGCATAGCTACTCTCGCCTCAGGCCTGGAGGTTGGGCGGACA
+CCCATAGCAATGGAGATTGAACACTTCATCCAGCTGATCACAGGGGTCGCTGTATTCCTG
+GGGGTCTCCTTCTTCGTGCTCTCCCTCATCCTGGGCTACAGCTGGCTGGAGGCAGTCATC
+TTCCTCATCGGCATCATAGTGGCCAACGTGCCTGAGGGGCTTCTGGCCACTGTCACTGTG
+AGTGGGTCAGGCTGAGGTGCCACCAGGGGAGGGTCTCACTACTCTTTCCTCAGAGTGATA
+GAGGCACAGTTGCTTGTAGCTTCTCACTACTTTTTCCCCCTAGAGTCACTTATTGGATGC
+GATACTCAGAGATCACTTAATACATGGCTTAGGGTATGAGTTGAGAAATCTCAGGTGGGG
+CTATTCTTAGGCACTCAGTTTCTCTTTTGGAGCTTGTCACGGCTGCCTTTTCTCACGATC
+TGCCAGCAGTCTGGGTCTCATTCCGTCCAAGTCTCCTGTAGAGGCTGTCTCAAAGGTCAC
+CAGTCACTAACTAGCAAATCCAGGGACCTTTTCTCAGCTCTCCTCCCTAACTCTGCTGCT
+GAAACAGATACTGCTGTCCACCCTCCTACCATTCTCTCTTTTGTCGATTGCTGTGATGTG
+TCACTATCCTGGTTCGCCTCTCCTCTGTCCCTTCCTGCTCCCCCTACACTGATTCTCAGG
+CCTCAGCTGTCTGTTCTTCCTACACCCTGAGCAAGAGCCAATCTCCTCCCAGTTTGCAGT
+GTCCCTTCTGTGTTGATGACTCAGACATCCCTATGCTCAGCTCTGCTCTGGCCCTCTCTG
+GAGCTCTAGTCAAACGTCTTAAACCCCCTAATGGGCATTTCATCTTAACAGATACTCATG
+ATCTCAACACATCTAAAATTGAACATTCATTATTTACAGCTAAGTCCCACCCATTCAAGT
+TAGTGGAGTAGAATCAGGGGAGGAGGAATGGAGCCACGGTCTAGGGTAAGGTTATGGCCA
+TCTCCGGCTTCAGCCTTAACCTTTTTTATTCTCCTCTTTCTCTACCAGGTGTGCCTGACC
+CTGACAGCCAAGCGCATGGCACGGAAGAACTGCCTGGTGAAGAACCTGGAGGCGGTGGAG
+ACGCTGGGCTCCACGTCCACCATCTGCTCGGACAAGACGGGCACCCTCACCCAGAACCGC
+ATGACCGTCGCCCACATGTGGTTCGACAACCAAATCCATGAGGCTGACACCACCGAAGAT
+CAGTCTGGTGATTGGGTGCTCCAGAGGGGGTGGATAGGATTAGAGGAGGCTGAGGGCAGT
+GGCGTGGTGGGGTGAGTGGTTGAGATAAAGGCTCTAAAGGGAGCCACGCTCCTGGTTCCC
+CCTCATTTCCTCCCAGGGGCCACTTTTGACAAACGATCCCCTACGTGGACGGCCCTGTCT
+CGAATTGCTGGTCTCTGCAACCGCGCCGTCTTCAAGGCAGGACAGGAGAACATCTCCGTG
+TCTAAGGTAGGGGGTCAGGACACACACCAGGTATGTTTTGGGGGTGTCTCCAAAGCCTCT
+TGCTGGCCCCAGCTTTCCTTCTCACATGATGTGGCTGCCTTGGGGGTTTCAGTGCCGCCT
+TCACCTGATCCTCCACTCCCTTCCCTCCCATGCTGACACTGAATTCTTGTCTCTTCTGGC
+AGCGGGACACAGCTGGTGATGCCTCTGAGTCAGCTCTGCTCAAGTGCATTGAGCTCTCCT
+GTGGCTCAGTGAGGAAAATGAGAGACAGAAACCCCAAGGTGGCAGAGATTCCTTTCAACT
+CTACCAACAAGTACCAGGTCTGCTTGGGTTGCCAGGACAGAGGAAGAGAGAGGGATATAA
+ATGGGTAAGGGTGGACAAAGCCAAGGGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNTAACTCACATGTTGGTCTCCGAGGCCCTCCACCTTCCACCTGAAACCACC
+AAAACTGAGGATGAGAGAGGGATATAAATGGGTTAGGGTGGACAAAGCCAAGGGGAATCA
+TCACTAGCANAAGTGGGGGTGTCTGAGGGTCATGTTCCCTCCCCCTGCTAAGTCCCCCAG
+GCCAGCTCATATGACTGCATGTCTGATTGCAACTAGCCCCAGCTCCCAAACCTTACTCCA
+TCCCTTACTACAATCTGTCACTCTCTCAACTCTTCTTCCCATAGCTCCGCATCCTCTGGC
+CTAGCCTTCCAACTCTTCTACCCCAGTCAGTGAAACCATAATTCTGTAATTCCTTTATGG
+TGCCCCTTAGCCTTTAGGCACCTGTTAAATGCCAAGAATGCAATCTCAGCATTCCTTATT
+ATCCATAGAGGTAAAGCCCCCTGAGGCCAGGTGGTGGTGGGGATGCTGAGGAGGGAGCCG
+TTCAGCCCCTAGCACAGTGCCTGTCTTAACCTGACCTCAATAAATATTTGTTGAATGAAA
+GACTGTCCTACGGAGGTGGCTCTCAGGTTACAAGTGTTGGAACTGTGAGGTCTAAACACC
+CCCCTGCACAAGGAGATTCTCTTTGTTGACAATCTTTGATGGGTTGGGGCTACTTTTCTA
+AGGTGGTTTCCTTACCAGCTGCTGCTCTATGCCGCGCTACCAAGACAAGTATGGCCCTCT
+CTGTAACTACCTGTTGTCTCTCCAGCTGTCTATCCACGAGCGAGAAGACAGCCCCCAGAG
+CCACGTGCTGGTGATGAAGGGGGCCCCAGAGCGCATTCTGGACCGGTGCTCCACCATCCT
+GGTGCAGGGCAAGGAGATCCCGCTCGACAAGGAGATGCAAGATGCCTTTCAAAATGCCTA
+CATGGAGCTGGGGGGACTTGGGGAGCGTGTGCTGGGTGAGAGGCCAGAAACAGGAGGCTC
+AGAAGGGGATTCCCAAGCCTCTGCGGCATCCCTGGGGTGGGGGACTGTGGGGGCGTCCAG
+GAAGCCACTCTGCGGATCTCACTGATCCCTTCTGCCCCCCTTTAGGATTCTGTCAACTGA
+ATCTGCCATCTGGAAAGTTTCCTCGGGGCTTCAAATTCGACACGGATGAGCTGAACTTTC
+CCACGGAGAAGCTTTGCTTTGTGGGGCTCATGTCTATGATTGACCCTCCCCGGGCTGCTG
+TGCCAGATGCTGTGGGCAAGTGCCGAAGCGCAGGCATCAAGGTACTGGCCTCCCATCCTC
+CCCTCCATTCTAGCCTCCCCCATGCCAGAGTTCAAGGAGCTGCAGTGGCTGCTGCCCTGG
+AAAGGCCCAGGCCACGGTGGCCTCCTTCCCACTGACTCAGAGAAGAAGCTGTCCATCTGC
+AAGGAAAGGCCCACCCCTGCCTTGGGGCACTCACCCTTATCCCTTTTGCTCAGAGAGGCC
+AGTGTCCCATGCCCCTCCTCTCCCTCCCTGGCACAGCTCTTTGTCCCATCTGCATGTGTA
+CCCTTCCATTCTGATTTGAAGCATAATCTGGCACTCCTATCTTGAAGACCTAGTTGTCCT
+ACCCTTCCCAACTACTGCCCACCCTCTCTGTCAACTTAGGATGGGATTAGCAGGCTCTTG
+TATGCTCCCAGTGGCTCAGCCCATAACCCCACAGAATGCCTCCCACTCAAGGTTCTCTTG
+TTCAACTACAATTGCTTCTCACTGACCACTCCCTCCTCTGTGCTATGTTTGTTCTTCATT
+CAACAAATATTTACTGTTTACTATGTTTCAGACACTGTGCAAGGTGCAAACTCTAGCACC
+TTGCACAGTGCCTGATACATAGTAAACAATTTGTGTCACATGACACATCTAGATGGGACA
+ATGAGCTGTGCCAGGGAGGACACATATAAATCAGGCACTATTCTAAGCACTAAACCTCTA
+TGATTTCATTTAATCTACGCAACAACCCTGTGAATAAGCACCAGTATTTTTCCTTTTTTA
+CAAAGAGGGAAACTGAGGCACAAAGTAACATGCCAAGGCCACACAGCTAGTGTAAGTCAT
+AGAGCAGGTACTTGAACCAAGGCAGTCTGCTATTTCTTAACTATCATACCAGGAGACTTG
+TTTCTGTGGGTGTATGTGGTTTCTGAACCCCATAACCACTCTGTCTCCGAGGCAGGCATC
+ATTATCCCTGTTAAAAGTAAAACAAGGCTGGGCACAGTGGCTCATGCCTGTAATCCCAGC
+ACTTTGGGGGGGTCCAAGGCTGGTGGATCACCTGAGATAAGGAGTCCAAGACCAGCCTGG
+CCAACATGGTGAAACCCCGTCTCTACTGAAAAAAAAATACAAAAATTAGCCACGTGTAGT
+GGCAGGTGCCTGTAATCCCAGCTCCTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCCG
+GGAGGCAGAGGTTGCAGTGAGCCGAGATTAGGCCACTGCACTCCAGCCTAGGCGACAAGA
+GCAAGACTCCATCTCAAAAAAAAAAAAAAAGTAAAACAAATGGAAACTGCAAGTGGGTAA
+GTCTTTTCCCCAAAATAAAACCACCAGTAAGTGATGGACTGGGACTATGAAGCATGATCT
+GCTTGACCGGAAAGCCTGAGCTCCTGCTGCTTGGCCAGGTGCCTCTCTCTGTGATCAGAA
+CCATCACGGAGGCCTGGGCAGAGGACCTCTATGGGAAGTCAGCTCAGGGAAGCATTAATT
+CTGCCTCAGAGAACTGGACGATGTTTGACAAAAGGAATGATATTTGAGCTGGGCTTTGGA
+GGTCAAAGAGTATTCTGCAAAGCAGTGAGGGAGAGGAAGGCATGTCAAGATCAGGGAACA
+GTGTGTACCCTGGCGTGGCTAGACTGTACGGAGCGTGTGTAGGGGTTGGCAGGGGAGAGG
+TGGCAAGGAGTACTGGGGGTTGAGACTGGAACACTGGGTCAGAGCCAGTTGGAAGGACTT
+GGCTTAAATGCCAGACTTGGGCTTTATCCTGCAGGCCACAGAGAGCCATCAGTGTCACGG
+GGGAGGTTGAGCTTTTAGGAGACTACCTGGTTTGAGGAGGGACTGGATTTGGGGAAGAAC
+TGGAGTCAGGGAGAGACAGGTTAAGAGGCCATTGCCATAGTTCAGGCAAAGAATGATGAC
+CTGAACTAGGGCAGAAACAAAGGGAATGGAGAGGAATAGCCATGTCTGAGAAATGTTTTG
+AGAACTTGGTGACCAAACTGGATATGGAAGGTGAGGAAGATACAGGAGAGAAGGGGACAG
+GATGAATAGAGAGGACAACATCTAAGGTAGGGAGCAGTAGCAGGCTTGGTGGGGTGGCAG
+GAGTAGAGAAAAGGAGTTCAATGTTGACCTTAAGCTTAAAGAGTCTATAAGATACCCCAG
+GAAAGACAGATGTTCCAGTGAACATTGGAGTATGGTTTAGAGCTCATGAAAGATGGTCAG
+GACTGGAGATTTGAATCTCATTAGCACAAGGGTGCAAACTAAAGGCTTATAGTATGTTGT
+ATTATATTGCCCAGAAAGAGCAGACAGGTCAAGTGAAGGGAGGAGAATGAAATCCTGTGC
+CAAACCAACATTTAGGCACAAGAGGAACCAGTGAGGAGGGGAGAGGGTGGTAGGAACCAA
+TGCCATTTAATCTTCATGACCACCTAGAAAGGCAGGAATCATCTCTGCATTGTGCTGAGA
+CCTCAGTGCTTCACTTGCCTAAGGTGCTGTAGCTGGAGAAGGTCGGTGTTTAAACTTGAG
+CCCCTCATTCAACTCCAAAGCCACTGCTTCTTCCATCCTACTAGGCTGCCACCCTGAGAG
+GCTGGGAAAGAGCATGCAGAGGAGTGGAGGGAAAACAGGAATGGAGGAGTATGTGGAAGC
+CAGGGGAGGGCAGGGTTTCAAGGAGAAGAGGTCTACGGTGCCAAATGCCTCAGAGGGGTC
+AAGTAGGATGAGGACGGCAAAGACACTTTTCATTTTTCAATTGAGAGNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNTTGCCCTTAAGTGCTTTTGCTGCTATTTTTTTG
+CCCCCAGTTCCACAAGATCCAACCAAGAATTCTGTATCCTGGGACAGTCAGATTCTTCTA
+AATCAGGCCAGGAAGGAGGGGAAAAGAGTGAGAGAATGGTATTCCCAGATACTTCTTCCT
+CCTGCCCCTTTTCCCAGCAGCTCTGAGACCAGATGTTGGCTGCTGTACTTACTCCCTGAG
+GTAGGGAATGTGTGGTGATCGAGTGGTCTGTGTTCCTATTGCTGGTGGGGTGATAGGGTG
+GGCTAAAAACCATGCACTCTGGAATTTGTTGTATTTTCTCCCAGTAAAGCTTTTCTTCTC
+CCGACTGCTGCTCTGTGTGGTATGTGGTCCATTTATTCACAATGGATGGTTTTTGTTCCT
+ACTAATGTAGGTACCTAGCCTTCCCCTCTCATTCTGCACATGATACAGTGTTAGAAAGTC
+CATTTTGGGAGACCCGTATTTCCAGTGCTTGGGCTCCAATTGGAGACTGTTGGAAGTAGG
+ATACACTCCATTTAGCTCAGTGATACTTAGGAGATTGGGTGGGGTGAGGGTATACACTTG
+GGGTATTTTCCCAGCCCTTATTCTGGGCCATAAATATTGTTCCCCCTCCCTTCCCAGTGA
+TTCAAAGCCTGCAAAATCCTGGTCACTCACAGAAATTTTAAAATCAGTTAGTTGCAATAC
+AATCAATCACCTTTATTCCGGGTTAGAACAAGGCCGTGCACACTGCAGACAGAAGAGCAC
+AAGATGGGGCAATCTCACAGCAATATAAGGAGTGGGTGGGCAGGTTAGTCTTTTTAGATT
+ATTTTGCCTTACAGAGAAACTACTAGACTCTGCTGAAAAGAACCCTGTCTCTCTTCTCCC
+ATTTCTTCCATAAGAAAGGCTCCTTATTCTCTCTCTCACTCTAATGTAACATATCCCAAA
+TGGAAGGACAGTTCATGCTCATTCCCCCCACCCAATTCTACAAGTATCTCTCTCTTCTTA
+AGGGAGAAGCCAGACAGGGAAAGGGAGACTTGGGAAGGTCCTTTAAGGCAAAACTGTGGA
+GTCCTAGAGGTGTGTGTGTTAAGGCAGGTGTCTGCAGCAGTGGCCAGAGGGGCAGGGCAC
+GCTAGGCTGGGCTAGGAGTTAAGAGTACTGTCTAACCAGTCTGGCTTCTTCCTTCTTCCA
+GCAGCCAGCCCTCCCCTCCCATTAAAGTTTTACTCCCACATCCTAAAAGCTCCTAACTTG
+AACTTTGTCTAAATAGTCTCTGAGACTGCATGTGAATTTAATGTTTTTTGAAAAAGCCTT
+CTTTTTTGTTTGGACTATGTAGCAATTTGATTCTGCCACTCCCTTCACTGTGGCATTCCC
+CTCCCACCCCCGCTCTGGCCTAGACAATAAGTTAATGCTAGACACCACAAAAAAGGGCAG
+ACATGGCAGTGCAGTTTTAATATACATATATGTAGAATATATATGTACACACAGTTAGCA
+CGGTCAGGGTGGGGAGGGGCATCTTGGGGTAGGCAGGAGGCTGGGGTCACTCACTGTACT
+GTAGTGATGTGGCTAGGAGGTGTTCACAAGCCCAGGGGGCCAGGATATCTAGTAAAGAAG
+GGTTTCCTAAGTAAGTCCCTTCCGGGATTGAATTGGGGTGGGGATCGGTGCAAAGGAAAA
+CACTGTAGTTAACTTCTTTCTGGCTTGGGATGTTCTTCACCAAGGAGAGAGGGAAGGTTG
+CCTACAATAACCAAGTTGGGACCCTATAGATGGTGGTGTCAGGATTCGTGTTAAAACCTA
+AGGGTCTCATTCTTCAACCTCCCTAGGTTTAGAGTGCAACAGTCTGAGTAGGAACAACTG
+AGTGGAAGAGGGGGCTCCCCTCTCTCTGCCCTCATCCCCATCTCAGGCTCAGTGGTACAT
+GTTCCCAGCACAGTCTCTCCAGAGTACAGGTGGGTGGGGGCATCTAACGCTGGCTGCTCC
+ATGGGGTTTGATCTTAGGGACCAGGAAAGGATCCCTGGCTCATACCTATGCTAGGCCCAT
+GAGAGACAAAAAGGGACCTGCTGGTACTCAGGAAACAGTCCAGCACAGGAGTACAAACAT
+GGAACTCTGGCTTTTCTTTGCCCCTTCCAGGTGGGGGTTGAGTGGTCTCTTTGCCTTCCG
+TCCCCACTGCTTTTCCCTGGATCCACCTACCTCTCCCATACCTTACCCCTATGTCAGCTC
+CTCAATGTAGGAAAATGCAGGTCCTTTCTGCTTCCACACTTTAAAGATATGGGGGGAGTG
+GAGGAAAGGGAAGTAGGAGGAATGTGCTGATTTCTATTTGCATAGAAATAGCCCCCACCT
+CCTCCTAAAGTGGGGAAACTCAGCCCCCTCCCCTAGCCCCTGACCCCCATCCCCTTAAGA
+AGAGGAACTAGATGAGCTAGACATATAGACAAACAGCCAGAGCGTGCGCACACGCACACA
+CACGCGCACAGTAAGGGGGTTAGGACCAGGTTAGTAAGTGGGTAGATGGTGGGTTGCCCT
+TGCCCCCTCTCCTGGAGGATGGGGGAAGGGAGTGGTCTGATGAAGGTCCAACCTTCCTCC
+TCTTCCTCCCCCTTGCTCAGGCCTTCTTCGGTGGGGGAGCCAATTTGATGATCTCTTCCT
+CAGAGGGCTGCCGGATAATGTCTGGGGGCAAAAAGGTATGTGTCCAGGGACAGCAGTGTG
+GGAGATGCCAGGCAGGCCCATTGGGCATCTGCCTCCACTCTCCCAAAACTCAAGTACTCC
+AAGGATAGTCAACGGGACGAGGCAGAGGTGCAGCTAGTTAAAGGAGTGGCCGCTTACCTT
+TGTACTTCTTGGCACTGGGGATACGGGTTAGCTTGGTGTCCAGCTCATCCTCCTCAGAGA
+TCTTCAGCACACGGGTTCTGTAGTCAACCACCATAGTGAGTAGGTCAGGACGGCGGGAGA
+TCTCAAAGATGTGCTCAATGTAGGAGAGGTTGTCTGGAGGAAGGCAGGAGATAGGGTCAG
+AGCTAGGCTGTAGAGGTCAGCTTCCCCTACTCCACCAGACTAGTGACCTGGAACCAAAGC
+TGAGTTGCTGACATTGGTTATGGGAGGGTACAACACTGGAGGAAGAGGGGCTACTAATCC
+AGGAGAAGAAAGCTAAGGTACCCAAACAACAATACAGAGGACTGTTCGCAGGAAGGGCAG
+ATAGTTGGTCAGTGAAGTTTCTAGATTGTGGTTAGAATGAGGGGTTTAGCTTAGATACTA
+GCTCCCATATGTTGTGGGGAGGGGACAAAAGAAAAATCAACTTGACTTCCAGATACCCCA
+CCCCCACCCCCATCCTTTTCCACTTTCCATCCAAGTAAGTATCAGAATTTGCTGACAGCA
+CATGTAGGACCAGCAGGGTGGAATCTCTAGGCAACCCAGGAGAGATTCTGGGCCTGGTGA
+GAAAGGTTATTCTAAATGAGAGAAGGTATGGACACCACCAGAAGGACGATGTCCATGGCT
+TGGCCAATTCCAGGAGATTATAGATGCCCTTAGACCTTCTTCCCTGGCTCCTCCCCTCTG
+ACTGTTGCCATACAGTCTAACTGTAACACGTCCTCCTTTATACGTTCTCCCTGCCAGCCA
+CACAGCAGTCAGAATAGTATATCTGAAGGGCCCTGAGTTCACTCACCTCACCTATCCAAT
+ACCCAGGCTCTCTACAACACAGTCTTACCTTACCTTCCCAGCCATTTCTAAACACCCTAC
+ATCCTAGTCAAGCCAAGCAGGACTGATTGCATTTCCAAACACACTAGATACTCTGTGACA
+GTGTTCCTCTGCCTGGCATGCCCTCCTTTGGATCTTGCCTTAGAAATCCCACTCACCTTT
+CAGGGCCATCTTAAACACCAATTCTTCCCTAAAGCCTCCAGCTGTGAGGGATTACTCTAA
+TCCATAGATCCCACTCCCCAAGCAGTAGTCTCTCAAAGTGGATAAGAAAATGATAGAACT
+TGTAGTTTTAAATTTTACCAGTTTTTAAAAAATGCTTAACAAAGATTAGTATTACTATTT
+ACCAGTTAGATTACACTTCGAGTTCTCCCCTCATTCAGGTATTCTTCCATTTTTATTGTC
+TAGAATTCTTCCCAATTTTTGCCACTTAAAATTTTACTATTTTTAGATTGTTTTTATGTT
+CTGTTGTATATGTGATAATAGCACAGGAGTATCTGAATAACATTTTTTACCGATGAGGGG
+CACAGTGAAAAATGTCTAAGGCCCCCATACTCTTAGAGCACCTCTCTTCTGCCTTGCTAT
+AGAGGGGTACACATTGTGGATGTAAAAGCTTTGAGCTCATCTCTATTTGCTGAATGAACT
+GAGCCTGAATGACTGATGACAGGACCCCTGTGCTCCCCTCCCCCACCTTTGTCCAGCTTG
+TTGTGGCTCTCCAGGAAGCTAAACCAGGCACTGCCAGTAGTGATCTCCTCACTCTTTTCG
+CTGGGGATGTCTTCCTTGCAGGCCGACTTGAGCTGTTCTAGATCTTCAAGGGTGATGTTG
+TTGGTCAGGTCTTGCAGGAGGGTCCCGTACTCAGCCATGACTGGGGTTGGGAGGCGACAA
+AGGGATAGGAGTTGAGCTTAAGGTATGGAGTCTGGTTTACTCACTGGCACTACCCTCCTA
+CTCCCCACTCTGCCCTACCAGGCATGTGCCTGGAACATGGAGGCTGGAGGTGGCTAGGGC
+TGGAGGGAGAAAAGGAAGACCTAGGGGAGGCAGTCAGGGTAGACGGGAAGCTAGGAGCCA
+GGCTGGAGCCTAGAGTGACAAGTCATACCTCCTTCCCCCCACCATCTTTTTCTTCTGTTT
+TCCCCCTCTAACAGTAACCCTGGCAACAGTTCCCCTGACTGAGGAGGCGGTTACCATAGC
+AACCTGCTCCCGACTTTCTCACCCGCTCTCTCCAAGTCCCCCCTCAGAATCGCCCCACCC
+ACTTCCAACCCCTCCCCATGTCAGGGAAAAAAAACTCCAAAACAACCAGGGCGAGGCTTG
+CTGTGGCAGTGAGAAGCTGAGAAAGAGCTGGAGGGTCTGGTAGGAACTCATGTGTTCTAG
+CTTCCCCATTCTGCCTCAAAGGCAAAGGGGTCCCTCACTTCAACTCCCCACCACCCCACT
+AATGACTCATTTTGGATAGATGCCTAGTTGCTTGGTCTGATGGTTCCTTGGCCCCACCAT
+GCTAGCCTTGTATTCAGTCATTAGGGTTCGAGGTGGGGGAAGCAATGGACCACCCCCACC
+TTTGTGGAGCCCATTTTCTCCTTTGATGGTTACCAGGCCCCCACTCAGCTCTCTCTCCTA
+CCCTTTGTGCTGTGCCTGCAGCCCCCGTGCCCAAGAATACCAAGCAACTGGCTCTGGCCC
+CCTGCCCAGTTCCCAGGCTGGCTGGGTGGTGGGGGGAGGGGACTTTAGATGCGGCTTTGG
+GAGAATGGCAAAAGGGGACTGCCCCCCCTTCTCGTATCGTATCAACATCCTACTGGCACT
+ACCTTCAGCATGGTCTGATGAGATCAGCAAGGCCAGATCTCACCTAGGCTTTCACCCTAT
+CCCTCTTTCATGATCCTTAGGTGAGAAGTAGAGAACATTCAAGGCCTTCCTTCCACTAAG
+ATAGGAACTGTGGCTAAGACAGAAGAAAGAAGGTCAGATGTTTGAATGGACTTTCTGGAT
+TTGATAAAATGTGACACTAGTGGTAAGCTCAAAAATGGAACTTATTTCCACAATGAGTAT
+CTATTTCCTTTTAGTCAGAAAAGTAAAGCATGTTTTAAAAAGTAAATTGTATCATAAACA
+CTCTGTATGCGACTAGACCAAGGGGTGGGCAAGCAGCAAGATGACCTTGTCCCTGACGGA
+GGCTGCCTGCAAATCACCCCAGCATTAGGCTGGGAATGTGGCAGATCAAAGGGGCTTGGT
+TGACACATTCACATTTTCCTTCTTGCTCTACTGCAGGGGAACTGAGATTGGCATGGTGGT
+CACAAAATAAGAGATACAGACCCAGAAGTCCCCATTCCCAGCCCCCTACCTGGACCAGAA
+TAAACAAGCCTCCCTTGGTGGCAGGCACCATGACACAGGCAGACAATGAGCCAGACAGGA
+GGGCTACCATGACCCCCGCAAGAAACAGGCAGTCATAGGCATGCTCAGTTGCACACCCAG
+GTGCAACCAGACAGAAGGGCCCAGCAGTGGCCCTGCCAGGCCGGGCCAGGAACAATGCTC
+TCTGACTGTCAGCACCAGGAACTGTCTCTGCCTTGAAATCTAGGTCACAGCAGCAAGCTC
+TGGAGCCCAACCCCCCTCACTTCACTGATAATCCAAGGACAGAAGGAGGGGGGTACGGGT
+GAGGGGGATGATGGGGAGGGGACTAGAGATGGAGATAAGAATGAGTAGGGGGGCATGAGT
+ACAACTGGAAGGGAAAGCGCAGCAGGAGACAAGAAGACCCTGCCCCAGTGGAGGAAACTC
+TGGGGCCAAGCCCTCAGCTGGAAAAGGGGGCAGGATTGGGGGTTATACTTCCCTGCTTTC
+ATATGGAAGGAGAGTTTGTAAATATTACAGATCCTGCGGGAGGAAAGAGTGCATCTGTGT
+GTGTGTCTGTGTGTGTGTGTGTGTGTGTGTGTCGGGAGGAGTTCAGATTTAGGGCTTTGA
+ATCTAAAACAGATCTACAGAGGAGGTAGGCGGGAAGAGATTAATGTGTGTAACACACTCA
+CCCACAAAGACTTGGGGCCAGTGGGTAGGAGCTTTCTAAGAGAGTGAAAGATACAAGGAG
+GCCCCTAAGAGAGATGGGGGTAGGAATGTTAAAGTGGCAGAGGAGAAATGTGTGTGTGCT
+CGGATCTTATGGATGCTAATCCCCGGGTTGTGGGCACAGCTCTGGAGGAGGCAGTGGCTG
+GGTATCTGTGGGTGTGTTTGGATGTTCTGGTAATGGAGGTGTATGTGCCAGCACTGGATA
+TATGGCTGTGGGTGCACTGGCTGTGAGTATCTGTCATGTGTGTGTTTGTATGTGTCTAAA
+CCAGCCAAGAGCATGTGTAAGAAGGCATATAACCACAGGCTCACATCCAGAGGGGTGAGG
+GGAGCCTACAGAATGGAGAAAGAGAGCTGGAGTTGGGAGACAGAAAGAGACTAAGGCTGC
+AAATGAGGCATGGGGGGCGAGGGGGAGGAGAGCATACAGTCCACAACTATAGAAGGCTGT
+AGCCTCATCTCATCTGTTACTATAGCAACCTCAGTTCTGGGTGCCCCCCACCCCCAGTCG
+TTAGGATGACACCACTAGATGGGAGGAGTGTAGGGAATGGTGAGTAAGAAGGGACAGAAA
+GGATAGGAATAAGGCCCCCTGACGGTACAAGGAAAGGCAGCCCAGTTGCCTCCCTTCTTG
+GAGAGATCCATTTGCCTAGGCCTTAGTATGGCAGAAGAAGCAAAGGGAGTGTTGTTTAGG
+GGAACTGCTCTCAATTCTCACTGATCAACAGTTGTACCTGGGCTGGTCTGGCCTGGCCAT
+GGTGGGGCTTGGCAGAGCTTGTTTCCTTCATCCTCCATGTCACTTTCCTCACCAGAGATT
+CAGATTTCCTTAGGGCTCGTGGAGAAATCTGCTCTCTGGGCTCTGAAAAATCCTCCTGAT
+TTCTGGAGCAGTGCAAAGAGCTGCTCAGGGATCACCAGGCAGTCTGGGCGGAGGCCATTC
+TTGGGACAGGATACCAGGGCAGGAATCAGGAACCGGAGCCCCGGCTGCCAGGAAGTCAGT
+CCTCTTCCCTCCCAGCTTTCTGTGCTAGGCTCACCGCTTCCTCACACCTTCCTTCCTTCC
+TGCCTCCCTCTCCTTCCAAGCTGGCCCCAGGCACAGCCCTGACAAAGGTCAAACCAACTG
+AGGATGCTGGGGAAGAGGGGCAGGAGGTAAGGACCATTATATACTGACCTTTTAGGGTGG
+GACAGGTGGGAGCTCCATGGGCACTGGGGTAAGAAGAGCCAGGCCCTGGGAGTGCTTTGA
+GAAGCCTGGCAATTGGGATAACCACCTGTGGAGGAGGTGGCAGCACCAAAAGAATTTCTG
+TGCATTTTGACATTAAGTCGTGGTAGATTATCCCTTCTAACGTTTGGTTCCAAATCCTCT
+GTAAATTCGAGAAGGAAGCCTGAAGATTAAGAGTTGCTGCCCGTCCCTCTTCCACATCTG
+TTGCAGCTGCCTAAGCCTGAGGGGAGGGGCCATCCCAGACAGCTGCAAGGACCTAGAGGT
+CCCTGTATTGGGCAGGAGGGGACTCCCAGGCACTCAAGTCTGTCAGTCTTCTGCCTGAAG
+TTGGAAGTAATCACGGAGAGGCAGAGGGGTTGGAAGGGAGAGAGTAGGGGTCCAGCTTTC
+CTATTTTAAATTATAACTGGGCCCCAAATTCAAGCAGTCCTGTAGTTTATTTCCTTTAAT
+ATCATTCTCAATTAATTTCTTCTTTAAGCTTTTTATCCAGCTTTGTGGGGGAGGAGGGGG
+TTGTCTTTACTCCTGTAACAAGGAAATTGAGGGTTCTGAGGCTCCAGGGTCAGCCTGGAA
+GTGACGAGAGGCAGGACATCTGGGCCCTAAAATGTTCACTTCAGTCCAAGAAGCAAAATG
+GATTACCTTTGGGCATGGGGACAAGGATTAGCCTTCAGCCCTTGGTCTAGAAAGAGCCAG
+ACCCAAGATGGGGCATCAGGAGATAGCTGGCACTGGATCAGGCAGAGCAGAGGAGGGCCG
+GCTGATGGCGAGGAGGGGAATGGGGCCTAGGACTCTCCCCAGCCCATCTCTGTTTCCACT
+CTTCTGGTGTGGCACTCCCTGCTTCTATTTCTGTCTCTCTCCATCTCTCTCACTCTTTGT
+CATGCATTTCCTTTCTACTTCATCTTTCAGTCTCTCTTTTCCTCCTTTCTATAGGCTTTC
+ACCCCTTCTAGCATTTCCTACTCACCTATCCAACTTAGTGGTAGCCATGAATACTTAGGA
+CTCCTGAGATCTGTGCTTCACTGTCCCCATCTGAATATTCGGAGCTGCAATTAGGAAGGG
+GTGAGCATCCCTGCTACAAAAGACAGAGTAAGTTTGGTCGGAGTGGCTTGGAGATTCAGT
+GGTTGGCTGCCTACCAGCTGGTGAAGATATCCATCATGACATTGTGCGTGAGGATCTGTG
+TGAGAGAGTGAGGCGCTCTGTGATTCTGTGACCCTCTTCATGCATGATAATACATCCACA
+CATGATGGGCACGGAATTGAGGAAGTAGGCAACCAGGGAAGCCTGCAGCCTAGGGCATCA
+CTGGCAATTTCCCCAATGGAGTATTTAACCACAAAGGGAAGAGCAATTAAAGCTAATCGC
+TCTGACAGCCTGGGGCCCAGACACACGTACCACTTTCCCTAACTCCCAGTGGTCTCCCCC
+AACCTTCATCCTCCCTAATTTGTTTAGTGCACTCTCAAACACTGTTTGCACTTCTCTGAC
+CATCTCTTTCTCTCACTTCCTCCCTCTCCCTGGTTCCCTGACTATCTCTAGACCTCTCTG
+GAGATGTCTGTCTGCTTCTCACACACATTCCTCTCATTCTCTCTCTGACTTGGGTTCCAT
+TCTTGGCTGACTCATACCCATTCTTTTTCCTTTCCGTTTCTTCCACCCCCTCCTCCTCCA
+TCCCTTGGCCACTCTTACCCAGAAACTAGCCACTCCCAGCACGCTAAGCCCTAACTGGAC
+CCAGGAAGAGAAGCCACCAGAAGCCCCTACCCTCATTAGTCCGCAAAATCCTGTCTCCTT
+GTGTTAGAGGAATGAGACCAAGATTTTCTCCCATCTTTCCCCACAGTTTTAAAGAGGAAC
+AATCTGAAACCATGGGGCTGGGAAAAGCTGCCAGAAAATCAGGATGGATTTGGGGACAGT
+GGCCCTTTCTGTCCACTCAGCTGTCCTGTCGGACTGATGACTTGGAGCCCGCAGTTCAGA
+AGACTCCTCATGGGATGCCCCACCCCACAGAGTGGGGGTGTGGTGGGCGGCCGGATGCGT
+AGTCCAGGGACGTTTGTGATTTGGAGTGTGCTAGACTATGTGTATAAGTCACATGGGTGT
+GTGTCACTGTGTGTGCTGGCGTGCCTTATAGGTGTGCGTGTTTCCCTGTCAGTTTTGACA
+CGGATTATATGAAATTAATAAGGGTTTTTACGTTAGATATACATCACTTCTCTACACCCC
+CAGCTCCTCCTTCCCTCTGGACAAGCTTTCCGCCAAGCCCACCTCTTCCCCAGGTGCAGT
+GAGCTGGGGGTGAGGGCACATGGTACCAGCCGCCCGCGGAACCGAGACAGGGCTGGGCCG
+GGAGACCGCCAGCGGAGGCGAGTCCGAAGCCTGAGCGGGGAGGGAGGGGCGGGCGCTGGC
+CTGGCCGAGCCGGCTGGGCGGGAAGCAGGCTGCATCCTAATCCACAGGCCCCGGTCCGAC
+TCCCAAACTGCCGTTCAGCGTGGAACGGGCTGGCCGGAGCCACGCCGCCTCCAGCACACG
+CTCGGCGCTCGGGACGCACCGACCGAGGGACGGACCCCGTGAGGCCTGACACAAAGTGGA
+CTCGTCCGGGGTCCCCACACTCCCCGTCTGGCCCCCGGTGTCCCAGATCGTTGCCCCTCC
+AAGCCTAGACCCCGAAATCTGCCCCTTCCCCGCGACATGTCTCCTCTCCGCCCCTCTCAC
+CGCCTCTGGAGCTGAGCGCTCAGGGCGCGGGCACTCCTGAGTCCCCGGTTCCTAAGCAGC
+GTCCGGAACCTCCTCCTCGGCTCCCGCCGCCGCCGCCGCCGCTTCTGCCGGCGCCGGCGC
+CTCTTCCGCCCGCGGAGCCGGAGCCCGAGCCCAGCGCCGCTCACTTCACCCGGCTGGCCG
+GTGATGTCAGAGCCGGGGAGGGGCTGGGGGCGGAGAATGGCGGGAGGAGGGGGAGGACTT
+CTATTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGGAGGGGGAGGGAT
+GGAGAGGCAGGAGGGGGCGGGGGACCGAGGGGCTGGGGTTCTGGAAGACTCCTAGGATGG
+GGGACAGAATTGGGCGGAGGGCTGGAGCGGAGACGGTCTCTTCCCCCTGAACTGCGAAAC
+AATTCTCATATTTAATTCTTACCCAGTAGGGCACCCTCCTCCCCTTTTGACCTTTGGATG
+ACCACGGTGCCTAGGACCCCTGGGAGGCACAGAGTAGGTTTGGTTTAGGGGGTTGGAAAG
+CCTTTGGGGGCTGGATTCTCTTAGCTGCTGGCTTTAATATTTGTAAAAACTTCTTGCCTT
+GCCTTAAGGGTCAGGGGCGCAGTGGAAAATGACTGATCAGACTTATCCACATTGGGAGTG
+AGGGGTTGGGGCCACATGGGGACTTGAGGAGCCCTGGATCTTCTCTTTCCAGGATGATGC
+CTTCAGGCCCTCTTGCGTGCCTCTCACCTTTCCCAAACATGCCCCACTCCTGCCTTCCCC
+CTACCATGATGTAATTTTATTTTCTGGGGAGAAAAAGAGCAGCAGTGAGCTCAGCATGCA
+AATATAGACAACTTGCAAAGCCACACTTAGCTCCATCTCCTAGCTCTGCCCCTCTGCCCA
+GGCCCAATGTGGCAGCTCTCACTTCCCCTCTACCCCAGGGACTTTCCTCCCCCAGATTCT
+CAGCCAGACTGAAAAGAGGAAAGGAAGAATTGGAGAAGCATCTGTTTTCACCCCCATCTC
+TATAGCCTAAGGGGTCTTGCCTCAGCTCTGGGGCAGGGTTTGGCCCTGGAAAGAAGTCTG
+ACCCTGGGTAGGTCCTTCCTGCCAGGAACTCCAGCAGGCCCAGGTTGGGGGATTCCCCTC
+AGTGTTTGTGCCACTTCCAGGATCACCCATCTCATCTCAGTTCCTACCTGGCAGAGACTC
+CTCCAGAAACCTCAAACGAGCCCCTCCTTTCTTGGGTCTACACCATTCCTAGAAGAGGCC
+CAAGTTCCTGCTGCTCTGGGTTTCATAGCATTTCTCTGCTGTTTCCTCTCCCCAGCATTC
+CCTGTCACACACTAATCATTTGCCTTTGTATATATCATGTTTGCTCTGTGTTGTAGGGAC
+AAAAACTATTTCGTTGACCCATCTTTGAGACTCATGTAGTTTCTAAGTGAAAGGCTCAGG
+TCTGGAGGCAACAAGACACAGGGGCCTTGGGCTGCGTGCTAGTTCCACCCATTCCTTGGG
+CAAGTTATGCCTGAAAGCCTTAGTGTCCTCACCGGTCAGATCAGGGTGCCAATGTCTATT
+TCTGAGGGTTCTTCTGAGATTCAGAAATAGTAAGTCTCTAGGGACTGGCACAGATGGGTG
+CTTAAAAAATAGGAGCTATTATTTAGACCGGGGCCTTCCACTGACCTATGCTAGACAGTA
+AGCTCCACAATATTATTGTAAGACTAGCTTAGTGCCCATACTACTTATTAAACATTTATT
+AAATAGCTACAAGTTGGAATGAATACATAGTTTCTGCTTTCTACACCCCTGCCACATTCT
+CTAGGAACACTTCCTGGACTTGTCTGTGGAGAGTGAAAATATATCTAATCTCAGGTCTCA
+TCTTTCCGTACTTAGAAATCCTCTAAGTCTCTCAGCAACAACTTTACACCTGATCCATGA
+AAAATATAATGTTGGCTTGGCGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC
+TGAGGCAGGTGGATCACCTGAGGTCAGGAGTTCAAGACTAGCCTGACCAACATGGTGAAA
+CCCCATCTCTACTAAAACTACAAAAAAGTTAGCTGAGTGTGGTGGTGGGCACCTGTAGTC
+CCAGTTACCTGGGAGGCTGAGATAGGAGAATCTCTTGAACCCGGGAGTCGGAGGTTGCAG
+TGAACCGAGATCTCACCACTGCACTCCAACCTGGCAACAGAGTGAGACTCCATCTCAAAA
+AAAAAAAAAAAAAAGAAAAAAATATATATATATACACACACATATATATGTATATGTATA
+TATATATAATGTTTATTATTTATGGGCATCATCTCAAAAATATATATATAATGTTTATTA
+TTTATGGGCTTGTTTAGCTATTTGTATATATTATATATATAAAATGTTTATTGTTCATGG
+GCATCATCTCAAAACTATATATGTATATGTTTATTATTTATGGGCTTATTTAGCTATTTG
+GCCCTCCCATCCCCAACTCCCTAAACTCCTTTTTGCTTACCCCAGCCTGGGCCTATTCTG
+GAAAGACTGTGGGTCCTTTCTTTGCCAAGACCTCCAGCAGGGCCCAGGTCTGGGAGATCC
+CCATTGTATACAATATCTCCTCTCCATTCCCATGGCCACCACTTGGGTCCATGTTCTCCC
+AACATCACTCCAATTATTATAACAGCCACTGATTATGGGTTGGGCCCCTTACCTCTGGCA
+TCTCCATCATCTCCCCTCCCCATCCTGCTTATCGCTACCAGATTAAATTTTGCCCTGCCT
+CCTCGGCTGGCAGGATTCAATTCAAACAGCTCTACAACCCAGCCCCATTATGCTTACCCA
+ACTTCATGGCCACAACTTGATTTTTCTTCTTGTCTCTCTGGGCTCTGTGCCCCCAATATG
+ACAAACATATCCTGCCAGTACTTGATGTTTTCCACTGACTGAAATGTCTCCTCCTCAGCT
+CCTTTGTCTCCAACCAAATCCCATTCTTCAATTTAAGGCCTAGTAAAGGCTTACTTCCTT
+TGTGAAGTCATCAAGCCCACTCTGATCTCTCTTCTGAGTTCATTCTGTTCTTTCCTTTGT
+TCTTCCTTTAGGAAACATTTACTAGGGACAACTTTTGAGTGTCAGACCCTGTGGACACAA
+AGAAAAAGCTTGGGTTCCTGCCTTCAAGGGGCTCACAGTTGAGTGGGGAAGACAGACACG
+TAAATGAAGAACAATAACAGACTGTGCTGAGTACTGGAATGGAAGTGCAGCAGGAACTCA
+GAAGAGGTCATCACTGGAGGGGCCACCTCTGGGAAGGCTTCACCAAGGAAGTATAATTGG
+AGCTCTGTCTTGATGTTCAAATAAGAGCTGCAGAGAGGGTGCTATCGAGCAGCACATAAT
+CACTTAATTACATATTGCTTTGTATTCTTCTTCGATTGTTTAGTATGCTTGTCTTGACTT
+CTCCGTGAGGCTAATCAGCTCAAGATCAGGGACAGGCCTAGCACTCTCTGTTCTCCCAGA
+GCCTAACAGTGGCCCTGGACAAACACCATGTGTTCAGAAAAGGCTTGTTGACTTGAAGCA
+GATTAACAATCCAGGCTTCTCTGGGAAAGCTGCCTGGAGGAACCCCAGTCCCCACTATAA
+GAGTCCCCCAGCTCATCCCTAGCAAGGCACCCACATCTGAATTCCCCATCAGTGGCCACC
+TCCTCCTGCCATAGGGGTGAGACTTCACCAAAATACACTTGTTCATGCTGCCCCAGATGT
+CTTTATTGAGGCCTCTCATTTGACCCAAATACATTAACAAGTGATTGCTGAGATACAGTC
+CCAAACACAGTATTCTCCCTTCCTTCTGGCCAAGGACAGGACAGTCAGAAAGATACAGGA
+TTAGAGAGAGTGGGGGAAAGCTGTGCATAGATGGAGGGAACAGAACAGAGATGGGGCCCA
+TATATATGGCTAGAGGAGAATCACAAGAAAGAGTATGGAGAGTGATGGAAGAGACACATA
+ATAGATACAAGTAAGTTATGGGATAAGAAACAGAAGTCAGATAAGAGAGAGCTGGCCTGG
+TGTCTAGGGAAAAGTAGCTCATTTGCTCATCAAATGAGGGGGATCAGTGTCTCAGCACCC
+AGCATATAGACCCCAGTTAGCTCTATGGCAGAGAATAACCTAGTCTCCCTGGAGGAACTC
+AGGGAAGGACAAGGGAAGGCAGGAGAGAGCATGAAAAGACCAGTGGGGCTGAAAGATGGT
+TGCCATAGCAACTAGTCATCATCATCATCGTCATCGTCCTCTGTGTTGATCTCGCCCTCC
+AGGACATCCTCCAGCCAGTCCTCCAGCTCCTCAGCAGAAGGCAGGTCCTCCTCATCGTCC
+ATTTCCATCCATACGCTATCCGCCTAGGAGATGGCACAGGCAGGGACGGAAGCTAAGTCC
+AACCCGGGTCTTAGGATCTGTCTTCCCACTCACTGGACTCTGTACATCCTTCCCTACATT
+CTCAGTCCCTTCCTTTCATCTGCCAAAGGCCAGGGAAACTGTCCAGGTGAAGGTATGGGT
+TATGGGTTAGAATAGTGACAGGTGAGAGTGGGAAGAAAAGTTGGATTGGCAAATAGACCT
+TCTCCCTTCAATCTGTCTCCCGTGCTCTTGCCACACTGAACTTCCCTATTATGCCTTTGG
+TGCAAAACCACTGGTAGCTCTTCATGGCTTATAAGAATAAAGTTAGCAACTCTTCGGCAT
+TCAGGACATTTCTTTCTTTCTTTCTTTTCTAAAAGAGACATGGTCTCACTCTATTGCCCG
+GGCTGAAGCGCAGAGGGGCAGTCATGGCTCACTGCAGCCTTGAACACCTGGCCTCAAGTG
+ATTCTCCCACCTTGGCCTCCCAAAATGCTGGGATTACAGGCCTGAGCCACTACCCTGGCC
+CCAGGGCTTTTCATAACCTGCTTTTCTAGTACCATTCTCAGTGATCTCTAGACTCTCAAC
+CATAAAAATCCACCTGCCTTTTCCGGGAACCTGATATTCATTTTCCTGTTTTTAACTTTT
+GATCTTGCTTTTTCCTCAGTTTGGAAATGACTTCCCTGAATCATCACTTGTTGAAATCGT
+ATACATTGTAGAATCTCCATTTTGAATATCACTTTTTCTTAAACTTTTCTCTGATTCTGT
+CAAGCAGAAATCAATGTGATCCTGTGTCTTATTTGTATCGTTATTATAAAGTTTATCAGA
+TCTGGCCTTGTAGTTAATGGAACACTTGCCTCTCTAATTAGTCTGAGAATTCTATGAAAG
+CAGAGACTGTGGCTCATTCAGTTCAACTTAGCAGTTTTGAAGCACCTACTGTGTGTGCAT
+AAGAAGACTTCTGCCCTGAACAAGCTCAAAAATCTACTGGAAGAGACACACATAATACAT
+AAATAGCCATTTAAATGTCATGTTCTCAATGGCTATTGAGCCCTGGGAGTGCCAGGGAGG
+GAATGATTTTAAATGGGAGATCTGGAAAGACTTCACAGAGGAGAAAGAATTTGAACTGGG
+CCTTGAAGGATGAGCCACTGATTCCAACATGTGGGACAGGGCCACATTCAGATCCTAAGT
+CTCCCCCTCCATCCAGCCACTAGACTGGGCATGTCTGCGTCCTGTGAGAAGGGCCCAACT
+CCTAGTTAGCAGAGAGGAGGAGAGCTAGCTATGCTAGGAAGTAGAGTCGGGGGTCAACCC
+GGGATGAGGACAGGAAACTCACATCAGTAACATTGACGACTCCTATTTGTGGGGCTGACA
+AGTCGATGTCAAACGTCTTCTCCCAGTATGGGACCAGCTGTAGAGAGATCAAAATACAGC
+AACTTAATAGATACTAGGGAGCACATGAATCCAGGAGGATGAGTGTGAGGGTCAGCCCAG
+GGATGGAGAGGGGACAGTGCGAACTGGCAGTGGATGGGGACAGGTCCACCTGTGAGGGCA
+TCAAGGCAAGGGTAGAACTAAACAACAAAAGAGGAGAAATCCTAAAGGGAGAGAATCGGG
+AGGGTCAGCTGGGTGCACTGGCCCCTCTTTTCTGAGGAAAACTGGATAGGAAACCCTGTT
+CTGATCCTGGATCAGATCCTTGTGTTCAAGGTCAAGTCACTTCTCTCTGGCCCTCAGATC
+TTACATTTCTAAAATAAGGCAAGTATACCTGGTAAGTGCCAATGTTTCTCACGTTTCTCT
+TTGTTCTTTCAGTTACTCATTCAACAGCGAGTGCTAGGTGTTGGGAATGTAGCTGAGAAC
+AAAATCATCTCCTGAAACAGGGAATCTCTGTGCTATGATTCTATGATTGAGTGTGTATAC
+CCATGTGCAGGACTATTGTGCCTCACAACATTGGTTTTCTTTAGGAGTCAGCCCTAGCAC
+CTGAGCTACCCATCCAGCCTCTGCTCTGGGAATGTGTGTGATGTCCGTCCTGTTTCCCCT
+TGCTCAGGATGCCCTCCATGCCAACAGTGCCCTTAAGCATGCAACACGCATACCCTCCCA
+GGGCCCCCCAGCTAGGAGGTGGGGATGGGACCTCTGAAACAAAGCCCTCACTAAGAAGAT
+GTCACAGTCCAAGGCCCCACCTTAAGATAGCAGTGCCTGAGCTGTGCCTCTTACCAGGGG
+GAAGTCATCAGGGTCAATCCAGATGATGCTAAGATCTGGGTTTTCAGTGTTATCTTGGGC
+CACAGCCTTGAGAGTCTCTAAGAACTCGAAACCATCTTGGAGGGAACACAAGTACAAGGT
+ATGAGGTGTGTAGAGAGGAAAGGGACCCAGGTGAAACAGGAGTGCAGAGCTACCTCCCTG
+CTCCCCAAGCTTCACATGTCTAGCCCTAGGCTGTTTGCAAACTCCCAGGAAAGCCTCCTA
+GTGAGGCCCATGCTCCAATAAACACACCCACCTCCCCCTGCCTTCCCTCCAGTCCAACCC
+GGTATTCCTCCCTCACCAGGATCAGCTTCCTCTGCGAAGGCCACAATGTGGATTCCATCC
+ATATCATCCTCCTGTCAGGGGGTAAGAAGGAGAACAGGGTTTTGGAGAAGACCTTACTAT
+ACTCCCTGAAGTTGAGGAGCCCAGAACAGATCCTAGATGTTCATTGAACCTCAATTTTGT
+GGAAATCCAGCTTCAGGACCAGATATATTCTTTCGTTTTTTGGGGTTTGTTTTTTTTTGT
+TTTTGAGACAGAGTTTCACTCTGTCCCCCAGGGTAGAGTGCAATGGCGCGATCTCGGCTC
+ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTG
+GGATTACAGCCATGCGCCACCACACCTGGCTAATTTTGTATTTTTAGTACAACATTGTTT
+CACCTTGTTGATCGGGCTGGTCTCAAACTCCTGACCTCAGATGATCTGCCTGCCTCAGCC
+TCCCAAAGTGCTGGGATTACAGGCGTGAGCCACTGCGCCTGGCCATATATATTCTTGTGA
+TGGTGACCTTTTTCCAAGTGACACTCCTTTCAGCTCCAAGGCCCCACACTTCCTCTGATA
+ATCCCTTTATCCCCAGGCTTGAGGCTCTTCCCTTGTCATCTTCAATTATGTCTGTCTCCC
+ATTTCCTCACACATTCTTCAGTGTAGAAATCTTTCAGTTGCTGCTAGTGCACTGTTTTTT
+CTTTTTTCTTTTTTTTTGAGACGGAGTCTCACTCTGTCCCCCAGGCTGGTGTGCAGTGGC
+GGATCACTGCAAGCTCCGCCTCCTGGGTTCAAGCCATTCTCCTGCCTCAGCCTCCCAAGT
+AGCTGGGACTACAGGCGCCCGCCACCACACCCTGCTATTTTTTTTGTATTTTCAGTAGAG
+ATGGGGTTTCACTGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCAC
+CTTGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCACACCCAGCTGGTCAGTGC
+ACTGTTTTTTCTAAAAATGGGCCTGAGGAGCAGTAGGGTGGGGTTTTTCTAGGACTTGAG
+TCTTCTGGCACCCATGCTTCCCTGAGGGCTTGACCCTGGCCAGGGGCACTCACCCAGGTC
+TCATACATACTCTCCGGCTTCAGTTTCCTCAGGGTTGATCTAGAAGAGACAGACAGAGTG
+TCATTACTCAGTGGGTTAGAAGGATGCCCCCTCCAGAGGGTCCTGTAGCAAAGGCTGCCT
+CTAGGTCAGGCCCAGGGCACAGACAGCTTGTAGCCTACAACAAAGCCATCTGGCAAGGCC
+TCAACTCTGGGCACCAGGCCCGGCTCATTGCATGGTTAAAGCGGTGGGAGAGGGGAGGGT
+GGGCAAAGGGACTGGAAGAGAGACTGGGGGAGAAAGGCAAGAACTGGACATCAGGATATT
+CATTAATTTATTCATTCATTCACTCACTCACTATGATTGAGTGCCCGATATGTTCTAAGC
+ACTATGCTAGGTGCTGGGTACATACAGTTGAACCAGGCAAGCATGTTCCTGCCTTGATGA
+AGGGAGCCAGTAGTATGGGAGAGTAAGTACTGTATATACTGCAAGGGAGCTTGCAGTGAT
+CCACCACTGCACGCTAGCCTGGGGGACAGAGTGATTGGATATTTCCAGCTGTGGACTACT
+ATGAAGGAAGGGAATTATGTGTGAGTGCCCGGCAGGATTATTCAACCTCAGCACTATTGC
+CATTTGGAGCCAGATAATTCTTTGTAGTAGGGGGCTGTCCTGCGCACTGTAGAATGTTTA
+GCAGTATCCCTGGGCTCTAGCCACTAAATACTAGTAGCACTCTGCACAACCAAAAATGTG
+TCTAGACATTGCCAAACATCCTTTGGAGGGCAAAAACCTATGCCCACCTCGCCACGCCTT
+CCCACCTTGTAGAGAACCATTGGTGTACGATATTGTATTAGAAAGAACATGGGGCCAGGC
+GCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGACCGAGGCAGGCGCATCACTTGA
+GGTCAGGAGTTTGAGACCAGCCAGGCCAACGTGGTGAAACCCCATCTCTACTAAAAATAC
+AAAAATTAGCCAGGTGTGGTGGCGTGCCTGTAATTCCAGCTACCTGGGAGGCTGAGGAAG
+GAGAATCGCTTGAACCTGGGAGGCGGAGGCTGCAGTGAGCCAAGATGGCGCCACTGTGCT
+CCAGCCTAGGGGATAGAGTAAGACTCGGTCTCGGAAGAAAAAAAAAAAAAGAAGAAGAAG
+AAAAAAAGAACAAACATGGGACACATCAGGGTTAAAATCAGGGTTAAAAGCTCTGACACT
+TATTACCTGTAGAAACTTTCACTGAGGTTAAAAGGACTTAATATAATCATAGAGATGGAA
+AGTACCTTGCAGTTCCTGCCCCACCACAGGCAGTCAACACTCATTTGTTGACAGAGATTA
+TTTAAGTCTCAATCAACCTCACACTGAATTATACACACTTATGAAGGACCCTCTCGGTGT
+AGGGCTCAGGGCTAAGTAGTGGGGTATAAAATATTGTGCAAAATTGTCAGTGTATCAGAG
+AGCACTGGTGCCCAAAGTGTATTCATCTCCAGGCAAGATTGGCTTCCCGGTTGGCTAAGC
+TTTCCCCAGCCCATCTCACACAGTGTGTTCTAGCCCAAGGAGGAGCCGACCCCGCTGAGA
+GGGTTGCCCTTGGTCCCCACCTCCTGTGCTCCTCCACGAAGTTGACAATCTCCTCTTCGC
+TATTGGGCTTGTCTGGGATGGTCACAGGCTCTTCCATGAAGGCCTCGTAGAAATCAATCT
+CATTCAGCTTCAGGGTCAGCTTCTTTGCCACCTTTGGGAGAGGGGGTAGGAGGGGAGTGG
+AGCAGGAGACACAGGGTCATGTCGAGAAGCAGTATAATGGGGGAACAAATATTAGGAAGG
+GAGTCAGAAAGCCGGGATCCTTGTCACTGCCACTGTGACACCTGACAAGTGTCTTAACTT
+GCCTGGGGTTGGGTTTAACATGAGAGGGGTGTTAGATCATCTAAAGCCCCTCTTCAGCTC
+TAAATCTATGGAGTGGGAGGAGCCGGGGGGGGGGGCAGGTAGGGAGTGCCAGCACGTAGG
+AAGGGACCAGGTCAGCCATCGATTCTGGGAGAGGAAACTGGAGGACAGCTTCAGGACATG
+AGGCAGAACCAATACAGCTGCGGGGAGGAGAACCTTGCTGTCGAAGGTGGCGAAGAAGGG
+GATGTAGGGATGAAACTCCTCAGCTGCATCCTCGAAGGCTTTGTAATCTGGGGGGTGGAA
+TGCAGAGTCAGTCTCTGGGGAAACCAGGGTCTGCCCGGGAATCATCCCCCACTATCGTCC
+CCAATTCTTTGCAGGCAGGGTTCATTTTGAGACCCACCCCTTCTAGTTCTTCGAGAGTGA
+GGGGAAGGTGGAATACTGGAGGTGCTTGTCTGGGAACCCCCAGAGTACAGGGAGGTCAGA
+GCTGTTGATGGTGCTGACCACGGCTGGAGGAAGAGGTGGGAAGTGCAGAGGTAGAGGCTG
+AGACACTGGGTGATGTGGGGAGGTGGGAGGACAGGTGAGATGGGGATCCAGAGAGGGGCA
+CAGTGGAGTCTGAGGGTTACCCACGCTCTGAGTCTTTGCTCTTGAAGTAGCCAATGAGTT
+TGATCTCATCCTCAATATTCTCAAACGCCTGCAGCTCTCGTTCACCTTCAATCAATTCCA
+CAGGGTCCTCTAGGACCTGAAAGAGGAGGACATTGCAGGAAGAGAGTTTCTATCCTCAAG
+TAGAACCAGAAAGGGACCTAGGGAAAAGAAGGCTGGGGAACTGGAGGAGGGGGCTAGATG
+TTGACAGTGTGTGGGTGTGTGTGTGGGGGATTGCATGTGCGTGTGTGTGTATAGTGTGTG
+ATGTGTGCAGTGTGTGGTGTGCATGTGTGGTGTGTGTGTGCATGTGATATATATGTATGG
+TGGGTGTGTGGTATGTATATGATGTGTGTGTGATGTGTGTGGATGGTATGAGTGTGGTGT
+GCATGTGGTGCATATGTGTAGTGTTCGGTATGTAGGTGATGTGTGGCCTACTGTGTACAT
+GTGTGGCATGTGTGTGATGTGTGTGAATGTGTGGTGTGTGTACGTGTGGAGTGTGCATGT
+GGTGTGTGCATGTGTGTAGTGTGTGCATGTGTGGTGTGCAGGTATGTATGTGCATGTGGT
+GTGTGTGTGCGCATGTACGGTGTGTGTGCATGTGATATATCTGGTGGGTGTGTGGCATGT
+ATATGTTGTGTGTGGTGTGTATGGATGGGTGTGTGTGGTGTGTGTTGTGTGTGTGGTGTG
+CATGTGGTGCATATGCATAGTGTGTGGTATCTAGGTGATGTGTGGCCTATTGTGTGCATG
+TGTGGCATATGTGTGATGTGTGTGCATGTGTGTAGTGTGTGTGTACATGTAAAGAGTGTG
+TGGTGTGTGCATTTGCGTGGCATGGATGTGGTGTGTGTGCATGTGTGGTGTGTGTTTGTT
+GTGTATCGTCATGTGGTGTGTGATGTGTGTGGAGTGTGTGGNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNCCTGGATTAAGCCTAGGGGGCTAGCGTGGCCCCAGAGCT
+TTCTAGGGAGTAGCTGGGGGATCAGGTAGGGTGAGGGGAGGGAATAGAGAGGTGTAGATT
+CTAGCAGTATTTCTCCCCTTCTCTGACCGCAACGACTAGAGATGAGTATTCATGCCACCC
+CCTCTGAGCACTCTAGTAGCTGGTTCCCAGTCAAGCTCTTTAATTGCTATTACTGACCAG
+ATGACTTCATTATTACTGACCACCCCTCCCCCATGCATGTCCAGAATCCCGTCTCAAGCC
+TGTGCTGCCCCCTAGTGGCTGACTGCTCTAACAACCATGGCTCTGGATGGGAGGGGTTGG
+AAGCGGAGTTTGCAGAGGTCTGGCCAGCCCCGCCTGTATTCCCCGACTCCCCTTCCCTCC
+AGCACCTAGCCATGTTTTAAGGTAAGTATAAAGAAGGCCTCCCCCACCTCTTTCTGGGAT
+TTGAGCATGGAACTTCATAAGGAATAGAGAGGCAGAGTCTCCGAGCCATAACCCTCCCAA
+CCTCATATTTTCTCACCCCCTCTTTTAGAATAGACATAAATCTGAGAGATTCCCAAGCCC
+CTCGGATCCCAAGGACTGCCTGACAGGTCCTCCGCCCTAAGCCTCCCCTTGTCTCAATGC
+CACCTTCCCCTCCCCTGCCCTCCCCTCTCTTACCTAGTTTCTTGGCCACAGCTGCATCCT
+TCTCAGAGTCTACCAGCCCGAAGCCAACACCCTTGTCTTCTAGGACTTGGGCTGCTAACT
+GGAGGGAAGGATTATGATTAAAATTAGCCCCTGGAATTTTTATTATCCCCTATTCTCCCT
+CTCATGCTTTCAAGTTGGGTCAGTTGAGGTGCGGGAGATATAAGAGTTGACCTTGACCCT
+TTGCTAAGATCCAGAATATCCCCCAAATTCCCTACAGCCACACAGTTCTGTTTTTGGCTC
+AACTTAGTTCATGAATATTGATTGCTCATTTGCAATGTGCTTATTATGTTGGACTTGAAG
+ACACTGCACTCAATACTTCCCAGTGCCTAAGAGGTACTGAGGAGTCTGGCTTCAGAGTGA
+CTGAGGGCCCCTTCCCTCCTTCAGGGTTGAGTCCTGCATTCCATCCTGCTCAAGTCACAC
+CTGGAGGTCCCTGTCCCTGGTCAAGGGCCAAGCTCAGTGTGGGGTTGATCTTGGAAGTCC
+CTGAGATTCTTCAGAGTGACTCCCAATGGGAAGAATTTTGGAGGGTGGGAGGGTAGTGGT
+GGGTCTGGTCATGTTGTTGAGGAGTTGGGGAAGGCCAGAGAGCTGAGATAAGAGGTCACC
+ATTTGTTACCCCCCAGAGGGAGAGAGACTGACAGCGACCGGACAGTTCTCCCCATCAGGG
+CCCAACTTTCCCCTTTCGCACTCTACTTCAGCCTGTCCTTAGGCTGTGGACAGTCACACC
+TGGAGGTCCCTGCCCCCATAAGCCTTCCAGCTTAGCGTTCAACCCTGATTTTCTCTGCTG
+TGGGCACAGCTTGCAGCTCTACCCCCACTCAGCACAGACTCAGGTACTGTGATTCTCAGA
+GGGGTGGGGAAGGGAGGCGGTTGGCAGGTGGATGGCCAGGGATACTGAGCACTCAGGATA
+AAAATACTCGGCTCATTAATCAGATGGCAGGTTCTAGCACCCAGCAGGGGTGCACAAAAT
+GAGAGCCTGTGGTTCTGGTGTCTCACCCTGAAATGCTCCAAGCTCTGGACAAGCTGAGGC
+CAGAGAAGGGGCCACAGGAGCTTGGACAAGAAAACTCACATCCCATTTCCTAAAGGTGTG
+TGTGTATGAACACATTCCCAAACATAGGTTTTTTTTCTTTTCTTATGATAACTTCTTTCT
+CCTTTGGGCCTGAAGTCATTTTGGAGGTGAGGAGACTTTTCCCACAAAAAGAAGATTGGG
+GAAGACATTCTGACACCTCCTGCAACCATAAGAATTTTCCAGTGAGGGCATCCAGCTTAT
+CCAATGGAGACAAATGGGGAATTAGGGAGGGGGAAGGAAGAGAGAGGGGACTGGGAGTCC
+AATGGGAGGCATGATCTCTAGGAGATATTTAAACATGGCATGCAAGTATCATAACATTTA
+CATTTACACTCAGTTTGGATCTCTTTGGGGTGCAGCCCCTGAAGTATGTGGAGAGGACGT
+GTTCTGGCTAAGCAGATGGGTGGGGTAGGGGAGTGCAATATACACTGCTGTTCTCCCCTG
+GCCCTCCAAGTACCTCCCTTTTTCACCCTCAGTGTCCTAGCCAAACCTAAGTCAAATACT
+ACCAGCCTCCTATAGCAGGGAGGGGAGCTCTTCCTGCAGCACCCCTGCTAAAGGTTTCCT
+GACGCTGATAGTACTTTTGGGTTCCTGAGACTCTGTTTCTCATTTGACTAAAGCCACATG
+CCACTTGCATCCTGTTGGTCAGGACAGGGTTCCTACCACACTGCCCCTCACCCCCAGATC
+CCATCCTCAAAGAGCTACATGGGTGTAGGATAGGGGATTCCGGAGAGGGGCTAGACATGC
+TCTGCAGGCCTGCAGTGCCCCCAACTCACCTCCAGGATCAGCTCCTCCATCTCAAATTGT
+CTTTGTGAGGCCTTGTCATCCTCGGGGGGTTCATGGTAGAGGAGTGCCAGCACCTCATAC
+TTCTTGAACACATTCTTGTAGTTCTTTGCATTGACATTGATCACACGGTCCACACCATCG
+TACTCAGGGAAGTCCAGCCCTTCCTGCCCCTGTACCCCTGACTTGGGTGTCCCTAGCACC
+AGCAGCAACAGCAGTGCCAGCCGCAGACCCGGCACAGCTCTGGGCCCCATCCTGTCTGTA
+GCACTCATGGAGGTAGTGGGATCTGGGTTGGCTCTCCTGGTCCAGAAGAGGTTAGCTGGG
+GTAGGGAGGGGCCCCTGGGTCCCAGATTCTGAGTTAGGGGCTCAAGTGGGAGGGGGGATT
+GGGGAATGGCCCAGAGCAGTGGACAGAGGGCACTGCTGGGTCCTGGAGAAACTGTCGACA
+GAGCCAAGGGAAAAAGGGTCAGGTGGAGGAATAGGAGCAGGGGGCGGGGAGGGAACCGGA
+GCTGCCCCCTGAAATTGGCACCCCTCAGTCCCCACCTGGTCCTGTCTAAATATGTCTCCA
+TGGTTGGCAGGAGAGACGGATAACCTTCTGAGGCCAGGACAGCTCCATGCGGGCCGGAAT
+CCTCCCACCTCCTACACTTCTACCCCAGGCTCCCAGGGGCTCCTTCCCTCCCCCTCCTCC
+AGTGTCCAGGGCACCTCAGCCCCGGTCCCCACACTGGCCTAAGAGCTCATCGCCATATTA
+AGAAAGGAAAAGGAGAGATGCCGGAGCTAAAAATAAGACCAGATGAGTCGGAGGTAAATG
+AAGCTGGGGCTGGGGGAGGGGAGGTATAAGGGTAATTAGGGTGGGCCGAGGGTGGGGCAA
+GGGTAATTAGAAGTTAAATGCAGTGGAGAGGAGGAGCTAACTGGGTCAGAGGCTAAATTG
+ATAATATAGGACAAAAGGGGGACAAATGTGGTATTGGCTCTATGTCCTAAAGCCTGGGAA
+TCTTCTGTAGGTGAAGTCTGATTCTGATTCTTTGATACTATGTAGGAGAGTGGGGCGATA
+AAAGTGCAAGCCAGGCTGCAGGAAAGAGACCCCCTCCACTTTCTTTTAAAGAACCAGGAC
+CAAGGATGAGGTCATTCTCTGAATAAAAGATGGAGCCGCGTCCAAGACTGGCAACCTGGC
+TCAGCTCCAGCCAGTTTCCTTTGCTTCACTCAGCACTCCTAACTTGGGAATGCAGGGAAG
+GACAGGGCTGGCCCAGATCCTGTCCTGTAGAGATGTGGGTTTGGGGTTTCTCTGCCTATA
+AAGCCCACAATGAGAAGGGGTCTGTCCTTTGGTTGCCACCTGTGTGTGCACTCCCTACCC
+TGATCCTATGGGACTCGGTTTTCTGGGGCACAGTGGGAAGGGAGGTTTTCCCAGGCTGCA
+TGCTTCCAAGGAGAGTGAATAGCTACTATTTATTCCCATGTCTTTGACAAGACAAAGAGG
+ATCAGGTATTGACACCAGGGCAAAAAAAAATTAACATTAGGCAAGCATGAAGAGGTGAAC
+CCTGTGCCCTGAACCAAGACTGATGATTAATTCTGTGCACCCGAGATCCAAAGCAGAGAT
+GGGCGGGTAGGGGCTTAAACTTCTGGATCCAGCTTGGGGGGTGAGGACAGAGTAGTGAGT
+TTAACTCTGGATACGTAGGAAGGAAGGATCCTCTTCATGGCCCTCCAGGGTTCTTACTAT
+GCACAGGTAAACTGAGGCATGAGGGAGACCATAGAGGGTCAGATCAAAGTCACATGCTGC
+CACTGCACCCTGAGGTATCTCTGATTCTAGAGGGACAGTGGTGTTTTTTTTGTTTTGTTT
+TGCATCCAGGGAGGGTCATGTAATTATGTTAGCAGAATAACTTTTCCACATGTCTGTGTG
+GTGGACCTTACACAGCTGAACACGGAAGGCCACAGAAAATCTCAAAAGCATGGCAGGAGT
+GAGGCCAAGAGGGACAGCTGTCACTGTAGCAGGAGAGGTTGGAGCTAGCCTAAGCACAGC
+ACGTTGGAGCTGAAGAGACCTCAGAAGCCCATATCCTGGTCTCCCTGAAGAGTGGCTAAC
+GTGGGAAAGCCCCAAGGCCAGAGCAAAGGCAGCCAGGTGAGATGGGGAGGCCTGCCCAGA
+AGCAGATGATCTAGGTGATGTCCAGCCCTGATTTCTCACTTCTGCATTCCAGGGGAGGCG
+GGAGACACTGGGACAGAGACGCTGGAGCATAAAACTTGTTCCATCTTGAATCTGACCTTT
+TGTACCCCCAAGAGTATGAGATGGGACTCTGGTGGCGAGTCTGTTCCATCTCCTGGTCCT
+TTCCTACAGCTACCGGACATTGCCATCCATCCCGTTCCCTCTCGTTGCCCTCTCTGGCTC
+CAGTCCCACCCTTCCCCTTTCCAGCGCCCGCCCCCACTGCTCTCTCTTGATGCTTCACTC
+TGTCTACTTTCTACTCAGTCTCCTTCCCTTCAGGCCCTTGGACTTGAGAAGCCTCAGCCA
+CTGGTATCAAATGACCGTGAGTTGGGGCCTCTGTATTTTTAACCCACCAGACACCTGAAG
+CTCCTGCTACTGAAACCTGAGGAGCTGGCCCCTCTGTGCTCCTGCTTCCCACCTCACTCA
+CATATACCCTGGCCCACTGATCTTCCAGGGTCTGGTTCAGCGATGTCAGCAATTGGGGAA
+GGACCCCAGGCTGATATAGCACTGTTCCTAGGAGACCTTGTGATGGAAGAGGGAGGAGAA
+GAGGGAGAGAGACCTCTGAGGAGGGGCCTGATGTAGGGAAAATGGCGGGCGATACTCACG
+ATGGCCTTCTCTTTGCTAGCTCATGGCCTCTCATCCTCCAGACCCACCCCGGGTCCTTCC
+CCATCACTAGCCTATTTTTTTTGTTTGCTTTGTTTTGTTTTTGTGTATTTTTGGCCCAGC
+CCTCACTGAAAGACTAGTCTGAAAGTGATTAAGGGCCCACGTTCTGAAAATGGACAGACC
+TTGGTTTCTTCTTGGGTAACATGGAGTGGGTGGTAGGGAAACATTTATGTGACTTTCTTG
+ATGGTGTGTATATAAAACCCTCGTCCCAGTGTTAGGTGTAGTGTGCAGACACCTAGTACA
+TATGTGTTTTGTTGTGGAAATAGCTGATGGTGTTCCCCTAGCTTTCGTTAGCATTTCTCT
+TCCACCCTGTACACATTTGGGAACAGCCAATGCTCTACTCTGAAAGAGGCCTGAGAATGA
+ACTGACCCACATGGCACAGTGGAGGGGCTCACAGAGCAAAAGTGAATGAGCGGGTGAAGG
+GGAAGGAAGAAATGCTGAGACGAAGGTGGGCAGGAGAGAAAGGCCCTCCACGAGTTTTCA
+CTCATCTGAGAATGCACCTGGGCCCAGGCACAGAGGCCAGGCTCTTGGGAAAGTCCATCA
+CCCCGTGCAGAGCAGGGGCGGCTGCTTCACAGTAAAGGCCACAGGGATTCAGAAAGTCTT
+GTTACAGCCTCTATCTCAAGGCTACACCAGAGAAAACAGGCCGAGGGCCAGAATGGGGTT
+GAGGAGCCCTGATGACCGAGGGTAGAGGGCAGGGACCACCAGTATGCCTGGAGACTGTCC
+TTCCTACCTGTCCCTTTCTCAGATGAGGGACAACACTCAGCACATGGCCATTCCCAAGTC
+ATTTAACCTCTCTGGACTTCAGGCTTTTCACTGATGCAATCTGATCTCTTCTAGCACCAC
+AGCGCTGTGGTCTGCAAGCTGGTCTCAGGAACTGCAAGGTGGGGTCAGATGACTCCACCC
+CACCTACCCCCTGTGCGGATGCTGTGGACCCTCTTCCCACGTGTGACTTAGCTGCTCCTC
+TCTCCCCCACCTTCCATTTCTCTGATCTCAGCTGGAATTTTGAGGCAGTCATTTGCCCTG
+TCCTTGCTGTCCAGTGGCTCGGAGAGTGACTTTAAAGGGCAAGGATGCTTGATACTGATC
+CTATCCCAGCCTTGACTCTGGTCCTTAAAACACTTCCCCAGCCTGGTCATCATGAAGACC
+TGTGGAGGGAGCTTCACAGTAATCCCCACTCCCCCAAGATCAGATTTGACACATCCAGCT
+GGTGAGGGCTTTTCCAGACATTTACTGTCTAGACTGGCTACGGGATTCAGCAGGACCATG
+CCCCTCCCTGCGGGGCTCCAGCTAGATGCAGGCTGGGCTTCCCTTCAGCCCACCTCATCC
+TCTGCCCATGATTCCCTCAAGCCAGCCCAGGATTGCCCATCTTTCTCATCCAAGATAATT
+CATCCTAGTGTCTCAGATGTTGTGTCAAACTTATAATCTCTGGCCCCATCCCAGCTCTCA
+CAACACCCCTGTGTCACATGAAGCTCTTCATCTGCTGAGTTTAGTAGTACGTCTCCCTTT
+CCACCCAGCCTGTGGAAGAGAAGAGGCCAGCGTGAACTGGGAGAAGGAATTAGAGGCATC
+CAGGGGAGACAGGCAGAAGGGAAAGCAGGGACAAGGTTGGAGGTGGGAGGTCTGGAACAC
+AGGGCACTGAGGGTCAGATGGAGCAGAGGGGTGCAGCCAAGGGGTAGAGAGGTCTCGGCT
+CCAGGCGCAGTCAAGGCTGGCAGCGAGGAGGGGAGAAAGTCAGTGGTGGGTCTGGAGGGA
+CTAGGTGGGATGGGAGAGCTAGTGAGGGGGCTGGTGACCACTCAGTCAGAGCGGGGCCCA
+GGGGAGCCTCACCATCCGGGTGCTGACGGATGAGGAGTTTTCTGATTTCATCATAGACGA
+AGATGAGAATACTGTAGGGAATGGCACAGAGCCACCAGGTTATCCTGTAGAGAGCAAGAG
+CCAGTCAGCATGGCAGGAGAGAGAGATGCCAGAGGACAGGTGCAGAGGGCAATGCACAGG
+AATTGCACGTTTCATTGCTTGTGTCTACTGTGTGCGTGGGACTGTGCTGGGTGCTGGAGA
+CAGTGCAGTTGAATAAGATCTGGTTGATCCCTTTTTTTTTTTTTTTTTTTTTTGCGACAG
+AGTCTTGTTCTGTTGCCCAGGCTGGAGTGCAATCTCGGCTCACTGCAACCTCTGCCTCCC
+GGATTTAAGCGATTCTCATGCCTTAGCCTCCTGAGTAGCTGAGATTACAGGTGTGCACCA
+CCACGCCCAGCTAATTTTTTGCAATTTTTTTTTTTTTTTTTGAGACGGAGTCTCACTCTG
+TCACCCAGGCTGGAGTGCAGTGGCTTGATCTCGGCTCACTGCAAACTCTGCCTCCCAGGT
+TCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCTTCCACCAT
+GCCCGGCTAATTTTTGTATTTTTAGTAGAGGCGGGGTTTCACTGTGTTGGTCAGGATGGT
+CTCGAACTCCAGACCTTGTGATCCGCCTGCCTCCACCTCCCAGAGTGCTGGGATTATAAG
+TGTGAGCCACCATGCCTGGCCAAATTTTGTATTTTTAGGAGAGATGGGGTTTCGCCATGT
+TGCCCAGACCGGTCTTGAACTCTTGAGCTCAGGCAGTCCACCCGCCTCAGCCTCCCAAAG
+TGCTAGGATTATAGGCATGAGCCATCGTGCCCGGCCTGAGCCCTACCTTTGAGGAGGTCA
+AATCCTGTGGGATTAGTGATACGGGAGTGGAGAGGAGAGGCACGAGACAGAAACATAAAG
+AGCCAGTGATAGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCAGCCTG
+ACCAACATGAAGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGACG
+CGTGCCTGTAATCCCAGCTACTCAGGAGACTGAGGCAGGAGAATTGCTTGAACCAGCGGG
+GCGGAGTTTGCAGTGAGCCGAGATTGCGCTACTGCACTCCAGCCTGGGCAACAGAGTGAC
+ACTCCATCTCAAAAAAAAAAAAAAAAAGCATTATCAATAAGATATTTTACATTCTTCTTC
+TCAACCTAAGTCTTGGAAATTCAGTGAATATTTTTCACTGACAGAATGTTTGAACTCAGA
+TACAAAATTATTTTCAACAGTTGAAGGGGAACGTAGTCCAACCAAAATAATGAAGTTATA
+ATATAGAAAAACATTACTTTAGTTTTTGACTTTGATTAAAATTAAATAAAATTACCAATT
+TACTTCCCCAGCCTCCCTAGTCTCATATGTCAAAAGCACAATAGTTGTATATGGATAGTG
+GCCACTGTATTGGACAGTACAAGCCTAGGCCATGGCTCATTCCACACACACACAAATATA
+AACATTTCTACACAAACAGCCACCCCACCCCTTCATTCAACAAACAATTGTTTTATGTAC
+TCTGTGTCTGTGTTAGGTATTGGGGAAATAAAAATGATAACATGTTGGTCTCAAGTAGCT
+CTCAAGGAGCTCAGGGGAATCAGAGAAAAGAGGGTGAACACACACACACACAGAATACAG
+AATCGCAAGATTAGGTGCTAAGTTTTATGGTTAATGTGTACACAGAGTGTTTGAAAGCAC
+AGATAATACCAACCACATTTGTTATTATGTCTATAATGGCATTAATAACAGCAAGAATAA
+AGCTACTAGTAACAAGGCATCTACCATGACCTAAGCATGCTTGGTGCTTTATGCCCATTT
+CTACGTGTAAATCTCACAACACTCTGCAAGGTCTATCCCTGGAGTGTATTATTGAGACTT
+TGCAGGTGTAGCTGAGGCTCAGAATGGTTCCATACTTGTCCAAGGCCACAGAATAATACA
+TAGTAGAGTTGAGATTTCCACCTTGGATTGTGAGTGTCTAAGGCCAGTGCCAATGTCTCC
+TATTAAACATTAATTGAGTGCCTACATTGTATAAAACATCGTGTAAAACATTACAAGGGA
+TGGAAAGGTGAGCAGAGTAGAGACATGCATAGAAATGTGTTGGAAGAAGGCCGGATGTGG
+TGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCACAGGCAGGTGGATCACCTGAGGTC
+AGGAGTTCGAGACCATCCTGGCCAACATGGTGAAACCACGTCTCTACTAAAAATACAAAA
+AATTAGCCCGGCATGGTGGCGGGTGCCTGTAATCCCAGCTGCTTGGAAGGCTGAAGCAGG
+AGAATCACTTGAACCTGGGAGGCAGAGGTTGCAGTAAGCCGAGAATGCGCCATTTCACTC
+CAGCCTGGGCAGTAAGAGCAAAACTCCATCTCAAAATAAAAAATAAAATAAAATAAAGAA
+ATGTGTTGGAAGAATACATACTAAATCCTTAACTGAGGCTACCTATGCAAGTTGGGTTGG
+GTAATGGGGATGGGAGTGGAATGGAACATTTGCTTTTTTCTCTGTACATTTTTAAATATA
+AATTTTTCCACATCAAGCATATACTAATGTTAATGCAAAGCAAAACAAAGCAAAAACCTG
+GAGTTGAACAAGACAAGGCACCAGACAATAAGAATTTCAGTAGGCACGATCACATACATA
+TACATTAAAACATATTTGCATAAGTACAACAGAATTAGCTAAGCATAGATTAGGCTATTT
+TTATGGTTACTTCAGCTCCAAGCAATATGTATCCTTGGACAACTACAGCACACATCATCA
+CGAAAAAGATTTCACTCTGTTAAACACTGGTAAGTCTGCTAAATTTGTGTGTCTAGTGGG
+TTGTGATCATTTGGCACAGTCTCTAATATCTCCCTAAATAAGATCCACTTATCAGAGCAC
+AGATGACTCTCCTTGTATCACTGAGTGTACATGTAGCTCCATGTATCCAGGCAGCTCCTG
+GCTCATGACAATCCTAGTGAAACACACCCTTCCTCTGTATGCTCCCCCGTGCCTGCACAT
+CCCCTGCTTGCATCCCTTCCCTTACTCACTTGAGTGGGTACATTCGCAGGGCCACGTCCA
+TGCCTGGAGTGTAGGACAGAAATGCAGCCAAGAGTGTCTCCTCCAGGATCCCAAATATTA
+AGACTTTGTTTCTGGGAAGAAACATTCATGGGTGTGGCGGGGAGGGAGAAGAAAAGGCAG
+GGGGAAATGGCATGTCCTGGTGTGTGGGGCTTGGGAGAGTGCTGGGCTGTTTTGTGAGAT
+GTTCACCTCATGCCCTGCTGGAAAAGTGAGTTGCGGCGAGTCTTGGAGATGATGAGATCC
+GCCCACTGCACAACCACGATGGTGACAAAAAAGGCCGTTTGGCATGTGAACTCCACAACT
+TTTCGTTGCTCATAGGTCTAGAGACAGCAGGGAGAGAGGAAACAGTGTCAGAAGGGGATT
+CTGTACACCCTAAAGGCTTCTTTCCTCTCCCACCTTCCAGGACCCCTGGGCTGAGTCAGG
+TCCTCATTCTTTTTTTTTTTTTTTTTGAGACTAAGTCTCGCTCTTGTTGCCTAGGCTGGA
+GTGCAATGGTGTGGTCTCTGCTTACTGCAACCTCCGCCTCTGGGGTTCAAGCAATTCTCC
+TGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCTGCCACCACACCCAGCTAATTTT
+TGTGTATTTTTAGTAGAGACGGGGTTTCACCATATTGGCCAGACTGGTCTCGAACTCCTG
+ACCTCAGGCGATCTGACCGCCTCTGCTTCCCAAAGTGCTGGGAATACAGGCGTGACTCAC
+TGTGCCCGGCCAGGTCCTCACTCTTTAGACCCCACAGCACTTCACAGATGTCTCTGTTTT
+AGCGCTTTTCATGTTGTATCATGATTTGTTTATGTTTTCCTTGCCTGACTGAGAGCTTCT
+CAAGAACAAGCACTTGCCTTATTTAGCTTTATGTCCTCTCCCTGTGCTTAGTTCGGTGGA
+TGGCATGTAGCATGGCTCAATAAATATGGATTGAATTAATAAGAAGGATAGGGTAAGAGA
+AGAAGGTGGAGTTAGAAAGGAAGGCAGGCTGGGCGTGTGGTGGCTCATGCCTGTAATCCC
+AGCACTTTGGGAGGCCGAGGTGGGCAGATCACGAGGTCAGGAGATCAAGACCATCCTGGC
+TAACACAATGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTGGTGGCGG
+GTGCCTGTAGTCCCAGCTGCTCGGGAGGCTGAGGCGAGAGAATGGTGTGAACCCGGGAGG
+CGGAGCTTGTAGTGAGCCGAGATCATGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA
+CTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGAAGAAGGCAG
+AGTCAGGGGATATAGGTCAGAAAGAAAGGATGGTGGGGCCAGAGGACAATGTGAGTACAG
+GGGGGAAAGAAATACCCCAAAGAGTGGTATCAGATAGGAGACATTCATTCATTCATTTGT
+TCTTTCATTCACACTTTACACTTAAGGAACTTAGAGTCTAGTGGGGAAAATAATTATGCT
+AACCATGTCAGCGCAATAGACAGGTTATGAGAGAGAGACTGCAGAGGGCTATGGGAACAC
+AGAAGGGGTGCATCTAAATTAGACTGGAGAGAAAAAGCTTCTGTTTTAAAAACTAAAGCA
+GCATGTTTATCAACAACTCGTAGATCCAATAAATTTGAAAACAGGTAGGACCGCACAATA
+ATGAAATACTACATATCAAACCCCACGGGATGTGGCAAAGTGGTACTTGGAGAGGAATGT
+AGAGCCTTAAACGGTAGAACACAAAGGCTGGAAATTGATGAGCCGAGCATGGAACTTAGA
+ATAAGACCAATCAGTCAGTTGCCGTTACCCGCGTCACTCCCTATCATTTCTTGAAGATTT
+TAGCTTCTGACACTGTTACTCCACGATTACTTCTGTCCTTGTTTTTGGAGATTGAATTGT
+CCATGTTGATGATCCTTCCAATGCCGTGGTCTCACAGTTCCTTGATCTCCTCCAATGATG
+TTGTCCTGTACTCCCCACCTGGGCTGCTTGCTCCCATCCTTAAACTCTAGATATTATCCT
+TTCCAATACCTATTATTTCTCTGTAATTTCAAGCATCTTTCTCTCTCTGACTAGCATTTT
+TATAATTCCATCTCACTCTAGTATTCTAGTTCCAACACTTCTTGGACCCCCTTGGACTTA
+CAATATCTTGATTAAATATATTATATATTTGGTTATTATCTGTCTTCCCATCCCTAACTA
+GAAATAAGCTCCATGAGAGCAATAACTTGATGATTTTTGTTCACTATCATATCCCCAGGC
+CCTAGAACAGTACCTGGTACATAGAAGATGCTCAGTAAATATTGTCTGAATGAACAAATG
+AGTGAGGGAATCACTGGAATTAATGAAAGATATTTAGAAAGTAAGGAGCGATCCTTGCTT
+CTAGCAGTGCATCATGCACTTCCCGGGTGGGTGGCTCTATACCAGATTATCTGAAAATTA
+CAGTCATAGAAGAGACACGGACACGCCTTTGTAGAGTTTGTTGCTCAGGGCAACTGTTCT
+CAACACATTTTTGTGACACTTCAAGATGTCTTCAATGTGCAGGGGGGAATTCTTAACTTT
+TTACCTCAAATTACTTTTAATTACCTTTAATTTATTGTCAGTAAATTTATGTTATGCTGC
+ATCTTGGCAGCAGGGAGGAAGTATCAAGATGGAAAACAAGGGCTTGAGCTGGGAGTTGAG
+ATGTTGGCAGGGTACACAGCAATGAGAACTTTGGGTGTCACTGGATCCTGCAGCAGTGTT
+AATAGCTGGGAATATTGACATCTGATGGCCTTACAGCCTCAGCTAATGGAAGCCAACTGT
+ATGTGAGGGGCACCTTCCCAACAATTCAGATTTGAACACAATTTGAGAAACCTGCAATTG
+GCTTGGTTTGGAGACATCGTGGTGAAGTGGGAAAGTGCTGGGCAGGGATCTAGAGTCCAG
+AATTCAAGTCCAGCTCTTTTAGGGATTCACTGTGGGACTTTAAGCAAGTTACAGAATCTT
+CCTAGATCAGGTTCCTCATCTACAGAATGATGAAAATTTACTGTGTCCTCTATGACGCTA
+CAATTCTAGCAATAATTGCTGTCACATGTCACATGACCCATTGTCTTAGAAATTGGGACA
+GGAGTATCACAGATACCAAAAAAGCATACCTACTAAAATTCTTAAACACATATTGTTGGG
+GAGAGGGCTTCAGTTTGAGCCTCATTCATCTTGGTTGTTAGAAAAGACAACTATAGAGAG
+GTATCCTCTAGGTTAAAAAGGTTTAGAATGATTAGGAAGGGATATACAAAGAAAATAGGT
+TTTCAGGGTTTTGTTTTTTTTTTTCTTTGAGACGGAGTCTCGCTCTGTTGCCAGGCTGGA
+GTACAGTGGTGCAATCTTGGCTCACTGCAATCTCCGCCTCCTGGGATCAAGTGATTCTCC
+TGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCACCCACGACCATACCCAGCTTATTTT
+TGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGATGGTCTTGATCTCTTGAC
+CTTGTAATCTGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGTCACTGTG
+CCCAGCTGGTTTTCAAATTTTATATTTATTTATTTATTTATTTTTTGAGACAGAGTTTTA
+CTCTTGTTGCCCAGGCTGCAGTGCAATGGCGTGATCTTGGCTCACTGCAAACTTTGCCCC
+CCGGGTTCAAGTGATTCTCCTGACTCAGCCTCCCGAGTGGTTGGGATTACAGGCATGCGC
+CACCACGCCCGGCTAATTTTGTATTTTTAGTAGAGACGGGGTATCTCCATGTTGGCCAGG
+ATGGTCTCAACTCCCAACCTCAAGTGATCCGCCCACTTCGGCCTCCCAAAGTGCTGGGAT
+TACAGGCGTGAGCCACTGCACCCTGCCTCAAATTTTAAATCATATCATTTGAACTCTCAT
+GGAGGGAGAGTATAAGAGGAGAGGACCCAGCATTTATTTCTGGGGCAATCATGGTTGAAA
+CCTGGGAAAAGAGAAACCGGCTCAGAAAAGTTGATATGACAAAGACTCCAAGGAGCCAAA
+GGACTCAGACAGGCAGGGCCAGACAACAGACAGGAGGAGCCAACCAGAATGAGCCTATGC
+TAAACAGAAGAGGATTTGCTGTTGTGGCTGAAGACTGAGGGCAAGTGTGAGAAGGCTCAT
+ACAGGACTGAAGAAAAGGCATCAGCTCTGTTTGTGTGGTGCTTCCCTTCTTCTCTCTTAA
+AAAATTTGCTGCACTCACTCTCCCCTGTCACTCTCACTCTTACTGGTCTCAGCTCCTACC
+TTATCCCTTCTACTCACCCACTGCTGTCCGTAGCTGTCCTCCAGGTCATTCAAGTATTTA
+TCTTCCCAGTGGAGGCGGATGCCCAGCAGATCAACAGGCCTAAAACCATTCTCAGCCAGG
+ATTACAAAGTAGGTAAAGAATCCAGCCAGAGCCTGGATCATCCCTGTGAATGACAGAGTT
+GCAGGACAGGGGAGCCTGGTTCAGAGAAGGGCCCCAAAAGGTAGGCCAGGGGAGGGCTGG
+GGGCTGGGACTTGCTTCTGGCTGGGCTGGTGTGTGTCTGAAGAGTAATGCGGAAGAATCT
+TGCCATCAGCTGCCTAAGCCTGTTATCTCTCATCTCTGTGGGACTTTCCTTCAAGCCCTG
+ACATTTGTTTCTAAACTCTGACCCAGAGTCAGCTTTGAATCACATGGTCTTAAAACTAAA
+AAGGACTTTAGAGGCCACTGATTCCCTCTCCCAATTTTTTCTTTTGAGACAGGATCTTAC
+TCTGTTCATCACTCACCCTATTTTCTGAAATACCATTGCACCTGGAAAATCAAGAACAAA
+GACTTCTAATGACTCTCAAATGTCTGTCAAGGCTGACCCCAACACAAATGATATCTCTGT
+CTACAGTCTTATTTTTCACTACTTTATAAAGACTAAGGGCTCTACCAACCTGGTTTATTC
+AGTCTTCTTTAACTGTGCCATCTGAAATTCCCCACTCCTTTGGTCTCCCACACCTGACCA
+TACACTTACTAGTCTAAGCCCCACTCTCCCACACCAAGTTTAAGTCCTATTTCCTGCATA
+AGGCCTCTCTGGCTATCCTCATAATGGTGAGCCTGCTTGCTTTGTAATCCTGTAGTATGT
+TCTGTCTATACTCATTACTTGGCACTTGGAGATGCTATCATTTTATGTATGTTTGTTCCT
+TTTGTACTGGACTCTAAGTCAAGGACAGATCAGAGATGAGTGGTTCTTTGTGTTTTCAGG
+GCCTAGCACAGTTTGTTGCTGAGAAAATATCTCCCTGACTTCCAAACTGAGATGGCTCAC
+GTTGCCAGGCAGGGGAAACCACTCCCTGCTCCCTCCACCCCTCATTCCTCTGGGCGCACC
+AATCTGTCCATAGGCCATGCCAATGAGACGGTGGTTCACCAGATTATCCGTCTTTGGGTT
+CCTTGGAAGCCTCTTCATGATGTCGCTTTCAGCTGACTCATAAGCCAAGGAGATGGCAGG
+GACCTGGAGGATGGAGGGTGGGGATGACAGGGGTCACAAAGTTCAGAAGGAAACCCAGCA
+GTAATTCAAGCTCTACCATTCTGTCTTGACGCAGCCTCAATCCATCTTGCAGGAGGTGCG
+AGAGCTGGATCATAAGTGGGTTGTCATGAGTTTTAGTAAGATAGCTCCAGGCAGAAAGCA
+GGGAGCTTAGGAGGCAGAATTCTGGGAATCTCTTGCTCACCAGCTTGGCCCTTACCATGT
+CAGTGCCGAGATCAATGCAGAGGATGGTTATGGTTCCCAGAGGCAGGGGTATACCGAGGA
+TGATGAACATCAGGAAGGGCGTGATCTCGGGGATGTTGCTGGTCAGGGTGTACATGATGG
+ATTTCTTCAGGTTGTCAAAGATCAGGCGGCCTGGGGAGGAAGGTCAACCTCCTGTGAGAA
+GCCCTGGGAGAACACGCCTCCCTCAAACCAGCTCCGGGACAGACAGGCCACCTCAAAGAA
+GTGTCACAGTCCAGGAAAGGGTCTGGAGGTCCTAGACCTCCAGGGTCCCAGTATTGTTTC
+TAAATTGTGTGCCTTTTAATCAGAGCCATGCCGTCTGTGTACTGTTTGTTCTTAGATTCA
+CCCTAGCTAACGTTATCTGATCTCTGAAAATTAGCAGGGTTGGGCCTGGTTAATCCTGCG
+ATGTTTGAGAATGTCAGTATAAAAGCCTTTACAAGATTAAGGCTGGAATGGAAACAGCCA
+GGGCTTGGGACAGGCTTGTGTTCAAATCCCATCTTCTGCCATAGCTAGCTGTGACCTTGG
+GCACCCTCACCTCCGTGAGCTTCATTCCTCCTGCCTTGCAAGATTGCTGCATCCGTCTCT
+GACTGCCTTTTCAAAGATCGAATCCCTCTCTGCACTCTCTGCTTGGTCTGTCCCATGGCA
+CTTATCTTCTTTTAACATACTATATAATTTAATTATTTTATTGTTTGTCATCCCCACTAG
+AAATTAAGTTACACGAGGGCAGGGATTTGTGTCTTTGTTCTCCCTACTGTTGTATCCCCA
+GCATCAAGCATAGTGCCTCCCACATAGTCAGTGTTCAAACGAACTTATTGAATTGAGTGG
+ATATAAAATACCAAGCAAAGTGTCAGGATTATACAAGGTTTCAATGAAATGGTAATCATG
+ATTATGATGTGGATTGCCATATGTTTTAGTAAGATAGCTCCAGGCAGAAAGCAGGGAACC
+TGGAATGCAGAATTCTGGATCTCTCCTGTTCGCAGGAGGGCATTCTCTTCACAGTTCTCT
+GGGGAAGAGGGTGTGTAATTGGGCTCATGCAGTCAATTCACGGGTCAAGCTCCAGCTGAT
+CCTGATGTGTCACCATTATTTAGGCAACAGGTTCCTAGGGGACCCTCCTAAAGATTGCAG
+CGCATGGAATAGTCCCAGGGGTGTGGCTACATCTTGGAACTGGCTGTGGGCCTACTTGAA
+GTCCAGTGAGAGGACAATGACGATATTATGTATACCAGAGTCTCAGTCTCCCTGATAGGA
+AAACAAAATGAATAAAAGGGACTTTTCTCCGATCTCATCAAAAGGGGTTGCAGATAGTGA
+TGGTCTATGTACCAGCTGATCTAATTTGCAAAATTTTGTCTTACAGTGATTTTCTTCTGA
+GCCCGTACAGGCTCATGGAGGTTGGATGATGGGAAAACGGAAAGAGGGGATGTACACCAA
+CCCCCACCTGAGTCCAGGGTTGAAGTCTCCACCATGGGCACCCTGCCTCCTCACCCTCCT
+CCACCCCCGTGACGATGGAGGCAAAGTTGTCATCCAGCAGGATCATGTCGGCTGCCTGCT
+TAGAGACGTCAGAGCCAGAGATGCCCATGGCAATGCCAATGTCAGCCTTCTTCAGCGCAG
+GGGAGTCGTTCACCCCGTCACCTGTCACGGCCACAACGGCTCCCTGAGGAAGTCCAGACA
+GAGAGAGTGAACAATTTATTGCATCAGATCCAGAACTAGTCCCAAAGATCAGTACGCTTT
+CTGCATCCCTTGTTTCCTGATGTAATCATGCTTGATTTAGGGCTCCCGTCCTTCTCCCAC
+TCCTCCTGAGATATTTAAACTTTCAGGGAGCCCCAAACTGAAGGCCACCATGGAGGTTCC
+AGATGGGGTACTCTCTGGAGTCTCCTCTAGCCCAGCCCACATGTTCCTCTGCTTGGTTTG
+GGACGGGGCTAAGCCACAGGAGTTCTGCCAGTTTCCTTGGGGCTCCTTTTCGTTCCTTAC
+CAGCCTCTGACATCCCTCGACAATGATGAGCTTCTGCTGAGGGGAGGTCCGAGCAAACAC
+GATCTCAGGGTGGTTCTGGAGGATCTGATCAAGCTGCTTGGACTGTATGTCCTTCAGTTC
+TGCACCATGCACCACAATGGCTTTGGCAGCACTGAGGGAAAATAATTTCAAAAGGCTAGT
+TTTGAGTGTTGAGAATTCACCAGGGGTCTTGGTGCCCATCTTGACTATAGCTTCATTGCT
+TCTTCTTTCAACCTAGTCTCCTCACTTGTCCCTAGCCCTATATTGTCCCCAAATTGACAC
+ATCCTATTCCCTCCCCACACTTTTGCCAACCACTCCTCTCTTTACCTCTTTCTTCATCTT
+GCTTCCTTTGAAAACCCACTGACATATTCACTTTATTTTCCTATTATGTTTTACTTTGTA
+GTGTAAACCCCACACATCCTACCTATTGAAGGAGACTCCAAGAACTTTAGTAAAGTGGCA
+TCTGGGTGGGGCAGGCTGGGGATGGAGGGAACATGGCAGATCTTGAGTTCTTTAGTGATC
+TCACCTGGCATCGACCTTGCTGATAGGGATCTTAAGCCGGGCAGCGACTTCCTCTGCCGT
+CTCAGTGCCTTCTGAGATGATGCCCACACCCTTGGCAATGGCCTTAGCTGTAATGGGATG
+ATCTCCTGTTACCATGATCACCTGGGTTGGGAAGGAGAGGGAAGAGGAGCAGAATCATCT
+TCAGAGCCCGGATCCTTCATTTTTCTCCTCTGAGAACTGACAGACCACTGCTTTTTTCAA
+CAAGGGTTTTAGTCGCTCATTCCCAAGAACTTGTGGTGTAGACCAAAAACCATGCTACTG
+TGATTTCAACCCTTGGGAGTGGGTCAAGACCATGAGACCATAAAACAGGACAATTTGGGT
+CCTAGATTTACCAGGAATAAGCCAGAGATTTGACTATATTCAGAATTTCATTTAATTGCT
+CCAGAAGCTATTCTGCTTCCTTAAGTGCAAAGACAGAAACACCTACACATAGAACAAGTG
+GTAATAAAGGAATGTCTGGAATAATCCATCTGCCTAGCAAAATTTCATCTAAGGCCCAGT
+GCATTGAAATTCTCACTCTACCTTTCTTCTTTCTGTGGCCTCACCCTGTGCCTTGGTTTC
+AGCTTGGTTTTGGATACCTGGTACCTAGTCCTGTCCCATTTCTACCCAAAGATGAATACC
+CAGGCCACGTGCTTTCACAGCATTCCGTTCCTGCCATATTTCAGCACTTGCCACACTTCT
+CTGTATGGTGATTGCCTGTTTATCTCTCTTCCCCACTGAACTGAGAGCTCTGCAAGGGTG
+GCTTCCTTGTAAAGTTTCCTGCCCTAGCACAATGCCTAGCACATAGTAAGCCATCAACAA
+ATAATTACTGAATGAAGGAATGGAGGAATGATCACTACCTGGAAACCACTGCCCAGGGTT
+CCCAAGTCCCTCCCAGCACACATGGCCCAGACTGACCCTGGACAAACTGCCAACAGTCAC
+AGGTCTAACATAAAGCCAGTTCCAACAGAGAACACCTAGTTCTCCTTAAATACCTCCAAT
+CCCCCAAGTCTAGGAGCCAGAATACACAGGTAATCCAGAATGTAAGGTATGAGCACGGAA
+GGTGTCTATATCTAAGCTGTAAAGATGTATATGATGAGTCTTAGAATTTGACCTGGGAAA
+CATGGGACACAGAACACCTCAGAATTTGAAGAATTCTACTGCTGCCATCCCATGAAGTTC
+AAGGCCCACCCTGACCAAAGAGGATTTTCTACCTCTGGGTTTGTTCTGAAGCTGAACTTT
+CACTCTTGCTCTGACCGATTTTTGGTTTCCAGTTTTTAGACCTGGGCATATACCTCTTAG
+GGCAGCTGCTCACACCCATTGAAGATCCTTCAGCTCTCAATATCTCATTCTCACTATTGC
+CTGGTCCAGAGCCCCAGGCCTCCTAGCCTCACCCTTTTACCGCCCATCTCGGCAGCACAT
+TTACACGACATCTTCTAGCACTTTGCTATTCCTAGTGTGGTCTGCAGACCAGCAGCAACA
+GCATCGCCTGGGAGCTCGTTAAAAATGCAGTCTCTCAGGTTTCCATGGACCTTCTGAAGC
+CAGATCTGCATTTCAAAACAATCCCTGAGGGATCTGCACGCACGTTGGTCTGAGAAGCAC
+TGGGCTGGACGCTGGTTCTCAAACTTCTCTGCTCATTGGAATCTCCAGGAAAGTTTTTAA
+AAATACTGTTGCCAGGATCCTATCCTGAGAGTTTCTAATTAAATCGGTCTGGGGTGTGTC
+CTAGACATCCGATTTTTAAAGCCTCCCTGGTGATTCTAATGTTCAATGCAAGTAGAATTC
+TAACTGGAGAGGAAACTTACTATTAATTAAAAATCATAAGCCCAAGGAAACAGAAATCCG
+CTCAATTAAACTAAGGGTATTAAGCATCTACTCTGCGCAAGGCCTTCTAGTAGATGCTTC
+AACTTGTGACAGGTGAGGGGCTCCTGGTCTGGGCAAGTATTTACCTTAATTCCTGCACTG
+CGACACTTGCTCACAGCATCAGGCACTGCAGCTCGGGGAGGGTCAATCATGGATATGAGG
+CCCACAAAACAAAGGTTGTCCATGGGGAAATTTATTTCATCTGTATTAAATGGGAATCCC
+TTGGAGAAGCTGCTAGGCAGATTCAAGAAGCAGAAGCCTGGAGAGAGACAGAGGGAGGGA
+CCAGTAGTCATCCAGCACTCTACATTCCTATCTGCTTACCCCAACATCCTTGATTTCTCA
+CCCCAGGGATAATAACCATTTTGATGCCATTCTTTTAAAAACTTCTCCCAAAGCTCCTCA
+CCTAGCACACGTTCCCCCAGACCTCCCAGTTCTAAGTAGGCATTTTGGAAGGCTTCCTTC
+ATTTCATCGTTCATTGAGTACTCCTGCCCATTCAGAAGAAAGGTAGAACAAAACTCCAAG
+ATCCTCTCCGGAGCACCCTTCATCATCAGTACGTGGGTCTGGGAGCTGTCCTCCCGAAGG
+TGGATGGACATCTAGGGAGAGCAAAGGGAGCACTGATGGGACAGGAGGAGAGACCAAGGA
+TCAGGGGCAAAAAGAAGGGGAAGGTTTCAAAGGTAAAAAAATGTGTTTCATACCCCCATT
+TCTTTTGTTCCCTCCCCTCCCCAGTCACATCTCGCCCCAGGTTGAACCCAGCCCCTCCTC
+CTTTTTCCAGAAAACTGCAGCCTGGTATTATTTGCACAGGAATCTCCATTATCTTCTCAG
+CCTGAAGCTCCCTGGGGGCAGAACCAGGCACTCTCACTTTTCCCCTGCATCCTTGCCAAC
+TGGATGGCGAGAGAGCAGAAATAGTGGGGCTGGCAAAGAAAGAAAAGAGAGTAGACAGGA
+AAGAAAAAGAGTCGGAAGGCCAGGCGTAGCGGCTCATGCCTGTAATCCCAGCACTTTGGG
+AGGCCGAGGCGGGTGGATTGCTTGAGGTCAGGAGTTCAAGACTAGCCTGGCCAGCATGGC
+AAAACCCCGTCTCCTACTAAAAATACAAAAATTAGCTGGGCATGGTGGCATGTGCCCATA
+ATCCCAGCCACTTGGGAGGCTAAGGCAGGAGAATCGCTGGAACCCTGGAAGCAGAGGTTG
+CAGTGAGCCAAGATTGCGCTATTGCACTCTAGCCTGGGTGACAGAGCCAGACGCCGTCTC
+AAAAAAAAAAAAAACAAAAAAAGTCCAAAGACCTGTGTTCTGGGGCTAGCAAGAATGCAT
+TATGACCATTGGTCAGTCCATGTAACTTCGCAAAATCTTANNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNTCTGTCGCCCAGGCTGGAGTGCAGTGGTGTGATCTCAGCT
+CACTGCAACCTCTGCCTCTGGGTTCAAGTGATTCTCCTGCTTCAGCCTCCCGAGTAGCTG
+GGATTACAGGCACCGCCACTATGCCGGGCTGATTTTTTGAATTTTTAGCAGAGACGGGGG
+TTTTACCATGCTGGTCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCTACCAACCTTG
+GCCTCGGAAACTGCTGGGATTACAGGCACCGCCACTATGCCTGGCTGATTTTTTGCATTT
+TTAGCAGAGATGGGGGTTTCACCATGTTGGTCAGCCTGGTCTTGAATTCCTGACCTCAGG
+TGATCTGCCCACCTTGGCCTCCCAAACTGCTGGGATTACAGGCATGAGCCACACACCTGG
+CCCTTCATTGGGTTTTTATAAGAGATAGTGTAGAAAATACTTTGAAAACTGTGAAGTACA
+ATCCAAAAATAAGTTAGTCATTCTAGTCCCTGGGCCTCTGTCTCCTAAAAGCGTGAGAAG
+TCTCCTGAGAGGGGCTAGCTAGTGTGGTTGGGCATGGGAGTCCTTGTGGCTGTGGTGGGG
+GAGGGAAAGGGCACATTCATGAAATGAACCCGAGGGGACAACCCGCAGCAAGGAGAGAGA
+AATGTGGGTTTTGCATTTCCTCTTGATCTCTGATGTATGAAAAGGAGCTCTCCATGGGGT
+TCCTCTTGTGATGGACAGGAAATGCCCGCTAGGTCGCCTGTTGAGAACAAAATCGATGCA
+TTGAAAGGAACAGCGGGAGTGCAGAGGAGGCCGGACTGTACTGCCCACAGGGTAAGCAGG
+GTGGGAAACCAGTGCACAAAACGTGGTTACAACTGTTCTGTTCGTTTGTTCAGTTACATT
+CAAAATATGCTTAAGCAATTAAGAGGAAGAACCGTAACTCACTTAAGCAGAAAGCTGGAA
+AAAGTTTAAAGAGCTCAGTTTTCTAAGTGCTCAGTAAAATGTGTTGAATAAGTGAAGGGC
+CCAGGAGGTGATGGCAAGACCTACTAGTAATCTCAGAGGTCACGGAAGAAAATCACATCT
+TGATAGAAAATTACATATAAAAGAGACGGAGGGAGAAGTCTGGAAATGCTGGAACAGAGC
+ATCTTCTTCGTTTCATTTTTATCTCAGAGGGAGACAGACCAGGCTCTGCAAACCGCCAGC
+AGTCTGAGATGTCTCTTGCAAAGACTGGAGTCTCCACCTTTAGGGCAAGCTAAGGAAGGC
+AGCAAACTGAGGGCCTCCCAGAACAAGTGCCTGCCAGAGCTTGGCGGCCAGTGGGACAGC
+CTGTCACACTCGGTGAACCACTGAGGACAAGTCTGCCATTTGAGCCGCATGTTTGGTAGC
+TCGGAAGAGGCAGCAACATAGTCATCTGAGAGAAAGTCACTATTTGTTGGATGCCCCAAC
+AAATGTTGTGAATATGTTTTGCTTTTATTTTGAATCACATGGACGGAGAGGTACCTAATT
+CTTTCCAGTTAAAGGCTATGCTGCCGTCAAAGGCCCTGCCAAGCCACAGAATTCCTGTCA
+GACTAGTGAGGCCACTAAACATCAGAAAAAAATGGGAATAAGTTTGCCATTTGGGCCTAA
+GAGACCTATTTTATTACATTGACTAACAACCAGTTGAGACAGAAAAGGATAGATCTATTT
+AACCCTTACTTGCTCAGTTTTCAGTGTAAAACGATGAGTAATTCAGTTAAAAAAAATTTA
+TATAGAGGCCAGGCATGGTGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCTGAGGCG
+GGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCATAGCCAACATGGTAAAATCCCGTC
+TCTACTAAAAATACAAAAAATTAGCCGGGCGTGGTGGCACATGCCTGTAATCCCAGCTAT
+TCTGGAGGCTGAGACAGGAGAATCCCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCAA
+GATTGCGCCATTGCACTCCAGCCTGGGCAACAAGAGCAAAACTCCTTCTCAAAAAAAAAA
+AAAAAAAAAAAAGATACCAGCAGACTTGGGAGTCTCATTGTCATTTTCTCTGAGACATTT
+TACCCACACCTGAATGTCTGGCCTTTTCAGTAATGCAAGGAGTTGAAGAATCTACTTCCC
+TGAGCAAATCATGATTGCCCCGTGGAACTCAGCTTGAGCCTGGCAAAGCTCCAATTCTGA
+TAATGTCACTGTTCCTGCATCCTACCATAACCTTCTCTTCAGTCTGCCCAGATTCTACTT
+TGGAGAGAAAGACTCTTTCCTGTCCCTTCATGGTCCCTCACTCAGCATTTCCCCTGCTCT
+TGTCCACTCCTCCCCGGAGAGTGGACCAAAATGTTCCTGGGAAATGAAAAACTTCACGAA
+CACGAACACACGTAATTAGTTAAGGTTACATAGAAGTAGGGGAGGCCCTAATCTAATATG
+ATTTGCATCCTTATAAGAAAAGAAGTTACAAGACACACAGGGGAGAGGGTCAGGCGCCAA
+TGGAGGCAGAGACTCCTGATCGCCTACCCGCTGGCAGTCACTAGAACTGCAAGCCAAGTG
+TGGGGTGAAAGGAAATATTGATGGCTAATGAAATAAAGTGTCTGCAAATGCAACCTAATA
+CTGCAGCGGCAAATCTCCTAGTAGACACAGAGGGGCCTCCTAACAGCTGCATGTGACTCT
+AACATGACTTTCACCAACCAGCTTCTGCTGTCTACAGACATTTCAGAGAGGATTTTATGA
+CTCGCTTTTTCCCCAAGTGGCAGAGCTCAGATCTCTGCATAGGACTGACTGATGCCTCTC
+CCTTTATCACCTGTTTCGCCTCTCTTTACCTGTTTGTCACGTTCTCTTGATGTGAGTCTT
+CCGTTAATTGGGATGCAGGTTCTGCTGAGCTCCTCTGCTTCTCCGAGGTGAAGTTGAAGG
+CAGAGGCAAAGCTCCCCTTGTTCCAGTGATAAGCCCATCCCCCCACCACCATTCTCCTAC
+CTTTGTGGGTTCTGTACCTGGTACTTGTTGGTAGAATTAAAGGGAATCTCTGCCACCTTG
+GGGTTTTTCTCTCTCATCTCCGCCACAGAGCTGTAAGACTGCTCGATGAACTTGAGGAGG
+GCTGACTCGGAAGCATCACCTGTTGTGGCCCTCTGCCAGGATGGGGTTTGTATAGTAAGC
+CCCAGGGGCATGCTACCTGGGCATGAGGCATACCTGTGGCAACATGAAGAGGGGACAAAT
+GGGGAGGGCGGGTACCTGGGGGGAAATTACTGAGCTCCAGGAGAGGATTTTTTGGGAGAG
+GTCAGGTTACACCCTGGACACTGAGGTACCCTCTTCCCCTTGGGCCTGACACCTTAGCAA
+TGGGCAGGATCTCCTGATTAGCCTTAAAGTCAGCCCGGTTGCAGAGGCCAGCGATTCGGG
+CCAGCATAAACCAGGTATCAGAGCTCTTGGTAAATGTTTTTCCTAGGGAGAAAGAAAAGC
+AGAATTGTGAGCAGGGAATGAGAATTTTAAGCAAGCGGTTATTCACATCCTCTTTGGCAC
+CCAGATACTTCACAGGAGCAGCATTTCTTTTCCTTCACACCTCCCTCTTCTCCACTCCAC
+TTTCAGGCTCCCTCCTGTCTGTCTGGACTCACCATTCTCTCTCAGCTCACTCATCACATC
+CCCCAAATCCCTCCACTCTTGTTCCACCAATACCACTAGTCACCAGTCTGTTCTTCAGTG
+GTGTCGGCCTCATACACGGTCATATCAAACCACATGTGGGCGACGGTCATGCGGTTCTGG
+GTGAGGGTGCCCGTCTTGTCTGAGCAGATGGTGGACGTGGAGCCCAGCGTCTCCACCGCC
+TCCAGGTTCTTCACCAGGCAGTTCTTCCGCGCCATGCGCTTGGCTGTGAGGGTCAGGCAC
+ACCTAAAGCCAAGAAGAGAGGAGAGAATACACATGGGAGATGGAATACTCACACCATTTT
+GCATTGTGGATATTTGATCATTTTTAATGCTATTTTGATTTTTTGTACTTACTTCCCTTT
+TGTTCCCACCTACTCTTTACACATTTCATTTCTGGCTGTGGTAAAGGAAATTAGGAAAGT
+GATTGTGGTTTGTGCTTTATGGTAAGAGTTTCAAAAAAGAGTGTATTTAAAAAAAATGTC
+TTTTTCTGTTTTTGTTTTTGAGACAGAGTCTCACTCTGTGTTACCCAGGCTGGAGTGCAG
+AGTATGATCTTGGCTAACTGCAACCTCCATCCCCCAGGTTAAAGCAATTCTTCTGCCTCA
+GGCTCCCGAGTAGCTCAGACTACAGGCATGCACCACCATGTTCAGCTAATTTTTGTATTT
+TTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGGCTCAAGT
+GATCTGCATGCCTTGGCCTCCCAAAGTGCTGGGATTATAGATGTGAGCCACCGTGCCTGG
+CCAAAAAAGAGTGTATTTCTTTACCAGGGTATCATAGACAGGGATCCTCTTTATTCCCCC
+CTGGAATATTCTGCTCCCTCAGGATCTTTGCATCTTTTCTATTCAGCAATCTCTTTTTCA
+TTCAAGTTTAGGCCACACCTCTTTAAGTCCAATCTTTCCAGGGACCACATAGATAGCTAA
+TCTTTTATGGTGGTATAAGATGCTAGGTGTGGAGGGGGTGTTGGACCAATAGTTCTTTTT
+TTTTCTCGAGACGGAGTCTCATTCTGTGGCGTGATCTCTGCTGACTGCAAACTCCACGTC
+CCAGGTTCAAGCCATTCTCCTGCCTCATCTTCCCAAGTAGCTGGGACTACAGGCGCCAGC
+CACCATGCCTGTCTAATTTTTTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTA
+GCCAGAACGGTCTCGATCTCCTGACCTCGTGATCCGCCCACCTTGGCCTCCCAAAGTGCT
+GGGATTACAGGTGTGAGCCACCGCGCCCGGCAGACCAATAGTTTAAACAAAGAACGTGGA
+GGAGATAACAATAATATCACAGCATTGAAAAGGACTTGAAAAATCATTTAAACCAAAAGA
+CTTTTGTTGATTGTGAAGGCAATGTAAGTCAAGTGTGAAGCAAGATTGTTTTAAAAGCTA
+ATGAATTCTTAGGCTGTGGTTATCTAAGTATGGGCTTCTAATAACAGGATTAGGGGTTAT
+AATGTATTTACATAATATCACTAGTCCCTACATTTTTGTGCTAGTCCAAACACACCCCAA
+GTATTATTGTCATTTCTCTTTTAAGAATATTGACAAACTAGAGCATACCCTCAACAGGTG
+ATCATCAAGACAGGTGGCCTACAGACCATGTCATAGGAAGAATGGCTAAGGGAAGAGGGA
+TTATTTCACCTGCTACCACCAGAGTTTGAGGGGACATAATGGCTACCATCAAAAATCTAA
+AGGGCTAGTCTGTAGAAGAGAAAGAATTGACTTATTTTGTGTTTCTTCAGCTGGGAAACT
+TAGGACAAACAGGGATTTATAGTTCTGCTTAACACAGTGAAATGTAAATAGAGCAAGACA
+ACAATGAAACGAATTGAGACAGAAAGGGGTGGGCTTCCCAATTAATAGAAATAATAATCA
+TCTCCCATGGGTTTTATAGAGGATATTCAGCACTGGTTGAAAGGTTGAATTTATATGATC
+TTTGAAGTCCTTTCCTGTAGGATTCTATAATTCCAAATTAATCTTCCTCCATAAACTGAT
+GAATAAATTGAGGTCTGGAGATGTTAAGTGATTTAGCCAATACTAGAACTCACTCTCAGT
+GACTGATAATCCAATGCACTTTCCATTGAAACTTGCTTCCTCTCTCATTGAGGAAAGAGA
+AGACACCGAAATATATGGGCAGAAGGAGTCAGTGTAATATTGAGAGCTAAATTATTCCTG
+ATACCTATTTACATATCAGGAAGACCCTGCACTTGAAAGGTTCAACAACAAAAGAGACGC
+TAGTGATCCCTTTTCCCTGGGGTGGTTTGTCTGCCCTGAGGCCATTTTCCACCCTGTCTA
+CTCACAGTGACTGTGGCCAACAGCCCCTCAGGCACATTGGCCACAATGATGCCAATGAGA
+AAAATGATAGCCTCCAGCCAACCATAGCCCAAGAGAAGTGAGAGCGCAAAAAAAGTGACA
+CCAAGGAAGACGGCCACCACAGTGATCAGATGGATGAAGTGTTCGATCTCAGCAGCGATA
+GGTGTCTGGCCAACCGCCAGGCCTGACGTCAGGGAGGCAATTCTGCCCATCACTGTGGAG
+TCTCCCGTAGCAATCACAATACCCCGGGCGGTTCCTGTGGATGAAGCAGGAGAGGGCAAT
+GTGATGTTGTGATATGATAAGAAGTATAGATATTGGTCTCTGCCCCCACTTCCTGACATA
+GAGCTTCTAAAAACCCTTAAAGATAGGGGTGCTAGGAGAATCTTTTGTTCTAATATTTGG
+TCTTTGACCCAGTTCTGGACACAGAGCTCCTGAGACCCTTGTAATTTCTTGAGTGATAGG
+AGTGTCTGACACAGAACTCTTAAATGCCTTGGAGTTTCCTGGGTGATAGGAGTATCTTTG
+TTCTAATGAGGTGACTCTTGGTGGGTTCCTGGATAGCCACAGGATGGGGCTGGTGGCCAG
+GGCAGATAACCATGTGATTAGAGAGTTGGAACTTGCAGTTCATGGACTGACCTCTTTGGA
+AGGGAGGAGGGATGAAGGTTGAGTTGATCACCAATGGCTAATAATGTAACCGATTATACC
+TATGTAATGAAATCTCCATTAAAAACCTCAAAGGACTGGGTTTGGAAAGTGTCTATGTGT
+ATGACCATGTGGAGGGTTACTGGAGGGCACCCTGGACAGGAGAGGGCACAGACGCTCCTC
+CATGCTCCTTCCCACGTGCCTTGTCCTGTGCGTCTCTTCCATATGACTGCTCATCTGTAT
+CCTTGTAATATCCTTTATAATAATGGGTAAACCTAAGTAAGGTGTTTCCCTGAGTTCTGT
+GAGCCATCCTAGCAAATTAATCAAACCTGAGGAGGGAGTCGTGGGAACCCCAATTTATAG
+CCAGGCCATAACCTAGGACTTGTGACTGGGGTCTAGAGTTAGGGACTGTCTTGTGGGGCT
+GAGCCCTAACCTGTGGGATCTGATGCTATCTCCAGGTGGATTGTGTCAGAGTTGAATTGA
+ATTATAGGACACCCAATCATTGTCCTTTGGAGAACTGGTTGTTGGTGGGGAGAACTGTCT
+ACACATGTTGGACACAGAAGTGTTCTGTGTTCAGCGTGAGTGTAGAGAAAAAAAAAAGCT
+GTTTCTTGTTTTTTCCATCAGAGATGGTCACCCAAGACATGCCGAGTCTGAATTGAGGAA
+GCTGTTTGGTTTGGTTCCCACAAAGAGCAAGCTGTCAATGGTCAAAAACAAACTGGAGAC
+TCAAGGCCCTCTGAGAGGTCATTATGTTCTGCTCTCCTCATATTTAGTTTATACAAATAA
+AGGTCTTCTAGTCTGAAAAATCTCTGAAGAGGTCTCACAGTTCTCCTGAAATACCCAATT
+TTATTGCCTAATTTCCTTCACTGTCAAGAAGGCCTTTATTATACTTAATCTGAGTATCTT
+CTATTGCCTATATAGTTCTTATCTTGCCCTCAGCAGCTGAGGAGTTTCTAGATCCCAGGA
+ATAGTGGTTACCCTTCTGGTAGAGCACCTATGTTCCAACATCCCCTTATCCCACTAACAC
+AGTGGTTCTCAACTGGAGGCAATCCCTACCCACCAACCCCCTTCCCTGGGGACATTTGGC
+AATGCTTGGAGACATTTTTGGTGGTCACTACTGGGGTGGGAGAAGGCTCCTACTGGCATG
+CAGTGGGTAGAGGCCAGGATGCTGTTAAACATCCTGCCACGCACAAGACAGCTCTCCTGT
+CCCGCACAACAAAGAATTATCCAGCCCCAAATGTGAGTGGTTGAGAAATCCTGCTGTAAC
+TTGCGCCCCCTCCCTGCTTTGCTGGCTGGAGCCAATCTGACCTCTGCTGGGTAGCACGGG
+CCATAACAGCTGGGCTTCTGTGCTTATGAGGTCCCTCTTTCCTCTCCACCAGCACTGCCC
+CAGTCAGAAGGGCAGCTTGGACATCATGTTTGGCTTTACACACACATGATGCAGGCCACT
+CACCTTGAAATGAGGGGCATGACAAAATTCTTTTCTTAAAGTAATTCTTCCTGCTTTACC
+ATAATGACTGAAATTTCTACCATTGAAATCAGAGTCAGAATCACTTAGGCAACATATCAC
+TGAAACTGCATTTATTTTAAAAGCAGAAAGACTCAGGGCCGGAAGTTCAGCTTTCTAAAT
+TACTAGCTCTTTTCCTTGGGGCAAGTTACTTAAATGCTGAGAGTCAGGCTTCCTCATCTT
+CGAAATGGAATGAAAGCACTTACTAAACTGTGAGGACTGAATATTTTTAAGAGTCTAGAA
+GGTTAGGTACATCATAGGCACTTGAAAATTAGTTTCTTTCTCTTTCTTCTCATCATTTAG
+ATGAACTTAGGGTTTCCAGTAGGACCAATATAGACTCATACTTGTGACTTAGTAGGAGCA
+TCTCTAATCTGGGAGCTCCCATGAACATTGCTGCACGTGGTGGTCTCCGTTTTCTAAGGT
+TGCCTTTAGCTGTATGAGTCTGAGAGCAGGGTCCTACTCGAGGAGCAGACAGGTCAGGTC
+AGTGTTTATGTGGAGGAGGAATCCTGAAAAGATTCCCAGGCCGAGGCATTCTATAGATGG
+GCTTCAAACTGGTTGCCAAAGGCATATATTGGGTCCTATGGAATGAAGATGCTGAAATAC
+TTGAGAGATGCTGAGGAGTTTCCTATTTTGGGCCGGGTGGAGGAGGGTGGGTAGTGTATG
+GTGTCACCTACATATATCTTGTGTCATGGTTTAGTCTAGTTCTTCTATTTTATTCCTTCT
+GCTATGGTTTCCTCACACTTGGAGAAACGGGATTTAATAATTAATCTCAGTGTGTCTAGA
+GATGGGGTTGGGTTGATGTCAACCTCAGGGAAGGCTGAGAAGGCCAAGCATCCAGCACCC
+CAGGCCAGACTGGCTTCTTCATACACCTTCCATGTATTGCCAGAGAAATAGCTTGGGCTC
+CCTTGAAGATGCATGAGAACCCAAAGAGAAGTCTGTGCCACAGCCATGTCAAGGTCTACG
+GTGATGAGACATGGGAGGCAAGGGTAAAGGTTAAAGTTATGCTACTAATTTGCAAATTGG
+TCTAAGGCAAAGGTTAAAAGTCTAGGGTAAAGGTTAAAGGGCAAATTGATCTAGGGTAAA
+TGCTATCGGTCTGTGAATTGCTGAGAAATTCCTCAACTGAAAGTCCTTCCGCCCAGCTCC
+TTCTACCTAGCTCCACCTTCTTGGCCACATGTTGGGTTTTTTTAGCCTCCTACAAAACCA
+TTTTCTATCTAAACAGACCCTTGCTTCCCAGTCGAAATGCCACCATCCACTTTCCTCCAT
+ACCCTTTATCTGAACATATACTTAGCTGATGAAAACCCTACTTTGTAAGGGTAAATTTGG
+GCAAATTTTTTCCTAAAATCTAATAGTAGACATCTAGTGTTTTTGGGGAAAGCTTGCTAA
+CTTGCTTCTGTCCAATCAATTTTAAAACATTTTTAATATATGTATTTTATTTTCTAGTTG
+GATTCTTTCTCATAGTCTATTCTGTCTTAACATTACGCTTCTTCTTTAAGGTATTTTCAA
+ATTAAATAGCAATCTTTCAGCACAGAGCAGGGTCTGTGGAAGGAGCACGAGATATGGAGA
+CAGACCTACATTAGAATCCCTGACCCGCCTCCTTTCTATTGAGTTGTACTCTTGGATATG
+GTGCTTAATTTCTTTGAACCTTAGTCTGCTACATGTAAAATAGGGATAATATCTGGTATT
+GTGCGCATGTGGGAATTAGAGATAATATTTATAATATTTAACAAATACTATGCATTCAAA
+TAAAAAATATTTCTAGGCTGGGTATGGTGGCTCACGTCTGTAATCCCAGCACTTTGGGAG
+GCCGAGGAGGGCAGATCACTTGAGGTCAGTAGTTCGAGACCAGCCTGGCCAACATGGTGA
+AAACCCATCTCTACTAAAAATACACAAAATAACTGGGCGTGGTGGTGGGCACCTGTAATC
+CCAGCTACTCGGGAGGCTGAGGCAGGATAATTGCTTGAACTTGGGAGGCAGAGGTTGCAG
+TGAGCTGAGATCACACCACTGCATTCCAGCCTGAGCAACAGAGAGAGACTCTGTTTCAAA
+AAAACACAAAACAAACAAAAGAACTATTTTTAGATAACCAAACATTTGTTCTTAAGCATG
+AATAATAGTTTTCAAGTGTGCTCCATGGAGTCTTCAGGCCTTCTTAAGGGGAAAGGGCAA
+AGGAGAGATTGACAGAATGATGTTCAGTCCCCTTTTATTACCACTTTGCCAGAACTGTCT
+CCCAGTTCAAGGATTCCTAGATTCTAGGAGCAAAGCCAAAGTTTAAGTGGTAAAGTCTAA
+GTGGCACCTCTGGTAATTCAAGTTTCCAATGCCAATATCTTCTCTTGGATAGGGGACTAG
+TTAGAAGTTAGGGCTTACCTTAAGAGAAGAATGACTTAGCAGGGCTGAACAATTTTCCTC
+CCCTCTGTGCCATTTTCAGCTTAAACAGCATAGTCCATTCACAAGCTAAAATAATAGCTA
+ATAATTGTAGGTCACTTACAATGTATTAGGTCTTGTGCTGGGCATTTTACATATATTATC
+TCAATTCACACGAAAATCTACAGAGTAACGACTTTTACTGTCATCACTGTGCGAATGTGG
+AAATGATGACCTTGACCTTTCACCAGAGGTCATACAGCTCGTGATGGAGAAGCTCGACTT
+TTTACTGGCTCTTACCTACCTCGCTACCTTTAGAAGACAGTTATTAAAATCTTTATGTTT
+TGGTGCCTGCTTTTGAATCTATGCTACTTATGGTTCGATATTCACCTTCCACACAGTTGG
+TGGAAAAGAAGCAGATGTTTCGGGTCTCCAGAGGGTTCTCATGGGTGAAGTCAGGGGAGC
+GGCTCTGGGGTTCTGACTCCCCAGTCAAGGATGAGTTGTCCACCTGGGATGGGAGAGGGG
+ACGCGTTGTAAGGTGAGCATAGGAAAAAAGGCAGATTTATTTACAAAAATAGTATGTCTT
+CATCTGGTCACCAACATCATCACTGACAGACTGAGAGAAACAAATTTACACATCTGTTGT
+CTCTAACATCAAATCTGCACTTGTAACTTCTCAAGATTCTGAGTAATAAGCTTATACTTT
+CTCTAACGGCCCAGGTGTCTACCCTGCCATGAGATCACGCTTGGGTCCCTCACATAGCTT
+TCGGCATCCCCTCACCTTACATCCTTGTGCAGAGATAAGCCGGAGGTCAGCAGGGACTCG
+GTCTCCACCCTTGATTTCCACCAGGTCTCCCAACACCACCTCTTGTACATTAATTTGCAT
+CTTCTCTCCTCCTCGAATTACCAGAGCTTGCTATGAGAGGATAGTGCAGGATCATTTCCA
+AAACTTTTGGCTTTCCTCATCCCACTCTTAGCCTAAAACCTTCTGTTATTGGTCTCCCTC
+CTCCTTACTCCCCTTTCTTACTGTCACCCACCACATCATTCTTGAAGGTGTCTTTGGCAG
+ATTGCAAATTCCCATGATGCTGTTTGCAGGGCCCTTTGCAATGTGACTTTGACGCTCTTC
+CCATTAAGAGGTGGAGTCTATCTCTCTACCCCTAAACCTGGGCTTGACCACGTGACTTGC
+TTTGGCCCATAGAATGAATGTGATGGAAAGAGGAGTTTAAAAAGTGCTTTCAGGCCGTGA
+CTGGTGGCTCACACCTGTAATCCCAGCACACTGGGAGGCCGAGGTGGGAGGATTGCTTGA
+ACCCAGGAGTTTAAGACCAGACTGGGCAACATAGGGAGACCCTGTCTCTACAAAAATTGA
+AAAAAAAATTAGCCAGGTGTGGTGGCATGCACCTGTAGTCCCAGCTGTTTGGGAGGCTAA
+AGTTGGAGGATCGCTTGAGCCCAGAAGGTTAAGGCTGCAGTGAGCCGTGATCACATCACT
+ACGCTTCAGTCTGAGTGACAGAGTGAGACCCTGTCTCAAAAAATTAAAAAAAAAGTGCTT
+TCATGCTGGGGCTTGCATTCCTGTTTCTGAGAATCCTCTGCCACCCTATGAACAAGCCCA
+AGCTAACCTCTCTGTAAAAAGAGAGATCACGAAGAAAAAGGCCCCAGCTGTCCCAGTCAT
+CTCAGCTAAGGGCCCAGACATGTGGATGAGGTCATCTAAGACCATCCAGCCCCAGCTGAG
+GAACAACAGCACAGCCACAGAATTATGAGAAACAGGAGCTTGCTATTTTAAGCCACTAAG
+TTTTAGGGTAGTTTGTTAGGTAGCAAAAGCTAACCGACCATGCCTAAACTCCTAGTCTCC
+AAAACTGCCCAAACACTTTGTCCCAAAACTCTTACCCAAGGAAGGAATTAAGTGATACCA
+AAAAGAGAAGGAAATCCTGGGCTATACTACTGGAACATGATCATATTATAGGGAAAATGT
+GGAGCCTATTCCTTTGCAAGTTTCCAGTCCCGTGCCTAGGAACCTCTGTTTACCGTCGTA
+TTCTTTCCAGGTACCTTTAAAAAACATTTAACTTGAAAATAATTTTAAACTTTCAGAGAA
+GTTGCAAGAATAAAAATAGCACCAAGAACACCTATATAACCTTTATCCAGATTCACCTAT
+TAACATTTCACCCCAGTTACTTTGTCATTTGTTTTCATCCTCTCTCTCTCTGTGTATACT
+CTTTTCCTGAACTACTTGAGGGTAAGTTACCTACATCATGATCTTTTATCTTAAAAACTT
+CATTGTGCATTTCCCAAGAATTAAGATATTGTCTTAAACAACCACAATGTAGTTATCAAT
+TTCAGTAAGTTTCACATTGATACTTTAATCTACTGTATTAATCTGTCAGTATTCCAGTTT
+TGCCAATTGACCCAATACATCCTTTAACGCATTATCTTTGGTGGAGCTGGAGGACATTGT
+CCTAAGCAAACTAACAGGAACAGAAAGTCAAATACCTTATGTTCTCACTTATAACTGGAA
+GCTAAACACTGAGTACATATGGACACAAAGAAGGGGAAAAACAGACACCAAGGCCTACTT
+GAGGGTGGAGGGTGGGAGGAGCGTGAGGACAGAAAACCTACCTATCAGGTACTATGCTTA
+TTACCTGGGTGATGAAATAATTCGTACACCAAATCCCTGTGACACACAGTTTACTTATAT
+AACAAACCTGCACATGTATCCCCGAATCTAAAATAAAAGCTCAAAATAAAAAAGTAAAAA
+TAAAGCATTACTTTCTCCTCTGGCACAGAATCAGATCTAGGGTTAGATATTGCATTTAGT
+TGCCACGTCCCTGTCTTTTTTTTGAGACAGCGTCTCTGTCTGTCACCAGGCTGGAGTGCA
+ATGGCATGCAATCTCTGCCTTCCGTGCTCAAGCAATCCTCCTACCTCAGCCTCCTGAGTA
+GCTGGCACTACAGGCACGCTCAGCTAATTTTTAAATTTTTTTGTAGAGATAAGGTCTCAC
+TATACTACTCAGGCTCGTCTTGAACTCCTGGACTCAAGTAGTGCTCTTGCCTCGGTCCCC
+CAAACGGTTGGGATTACAGGTGGGAGCCACCACCCTCGGAATAGTTGTCATGCCTCTTTA
+GCCTCTTTTTATTATTATTATTATTTTTTGAGACAGGGTCTTGCCCAGGTTGGTCACAGT
+TCACTGCAGACTTAAATTCCTGGGTTCAAGTAATCCTCCCGTCTCAGCTGCCTGTGTAGC
+TTGGACTACAGATGTAGACCAGCTATTTTATTTTATTTTATTTTTGTAGAGATAGCATCT
+CAAACTCCTGGGTGATCCTGTTGCCTCAGCCTCCCAGAGTGCTGGGATTACAGGCGTGAG
+CCACTATGCCTGGTCTGTTTAATCTGGAACATCCCCACAGCTGTTCTTTGTCTTTGAGGT
+CATCGATATTTGAAGAATCCAGCCACCCCCCACCGCACCCCGCTCCCACCCCCACCAACA
+CTTGTCTCTTTAAAAAAATAGAATGTTCTTATTTGGGGTTTGTATGGTGTTTCCTCTTTA
+GATTCAGATTCTGCCTTCTAGATCAGAGCACTTCATAGGTGACGTTATGTCTTTCTCAGG
+GTTTCACAGCTTTTCCAGGTACTTTTAAGGACTGATACAGTACCTTGACCCCCTCATCAC
+CAACCTCACTCAGCTCACTTATCATAGGATATAATTTTCCACGGGACTTCATCATTGTGT
+ATTTGGCCACATCCTGCTCCTGCTCCCACTAGATCCTCCCACTCCAATCCTACCTGAGGC
+ACCATGTTCTTAAAAGACTCCATGATCTTGGAGCTCTTGGCCTCCTGATAATAGGAGAAG
+CAGCCAGTGACGATGACCACGACGGACAGTACGATGCTCAGGTAGAGCTGAGGGTGGGGA
+AAGAAAGCGTTTATCAGTGGGACCTTGTCGTCTTCTTCATCCTCAGAAAATCAGTCTGCA
+AGGGGCACAGCAGTTTCACCAGGTGTAAGAGACCGTAAGACTGAGTTGAGAGCAGAGGGG
+TAGAGAAGAATCTGAGGTGGGACCAGTACACTTGGCTTCCTGGGGAAGGAACCCTGAGCC
+ATGGTGAGCCGTCAGCAAAATTCCTGGTCAGAGAAGGCCTGTATCTTTCTCTCCAGGAAG
+GCTAATCTCTGGATGGAGGGGGTGGATTGTGAGGGAGGCTGGGAGGAAGGAGATGAGGTT
+AAAATTGTGGAGAAGAGAAAGGGACTGCAAAGTGCTTTGGGGATCAATTGGGAAGGTCAT
+CAAAGAACAGCAAGTTTTTCGGGGACATTGTGCACCTCAGGGAACTGGACCAGAAGCTTT
+TTAGATTAAAAGGTGAGAGGAACCAAGATGGAACTTAGAAAGTGGAGTATTAGGGATTTT
+GGTTTATTACATTAATGTACATTTATTACATTATTATGTAATGTACATTTATTACATTAA
+TGTACATTTAATTACATTTTATAATTAATTACATTCATTACAATGTAATTAATCAATTAC
+ATTGTAATTAATTACAATCAATTACATTTATAATTAATGTACATTTATTACATTATTACA
+TTAATGTAATAATGTAATTAGGGATTTGGGCCAGAGGACTTCAGGAGTGCTAGGCTGCTG
+AGCAAAAGAGTGTCTGGGGCTAAGCAGAGATAGGGCTGGCTAGTAGTAGCTGAAGAGACT
+CACGTTGTCTTTGGTAGGCTCCTCATTGAAATATATCTGGATGCTGTAGGCCACAAAGCA
+GAGAATGGCCCCAGTCCATAGTAGGAGGGAGAAGCCTCCGAACAGTTGCTTACAGAATTT
+GACCCATTCTGGAGTGGTGGGGGGTGGGGTAACAGTATTGGGTCCACCTCGAGTCAGGAT
+TTCCTTTGCCCTTTGGTGGCTATGGCCCTGTGCAGAGAAGAAATAGGGTTGGATAGAGAG
+CTGTGAAAAGAGGAACAGCCATAGCAAAAGGGAACCTGACATTTGGAGCCCCAGAATGGG
+TCTAGAGACTAAAGACGCTTTCAGAAGGTCAGTCTGGAAGTTCTTCCTGGATAAGGTGAG
+GTTTCAAAATTTGCATTAGAGGAGGGATGTCATTTAGCATGCTTACATAGGAAGGTTTTT
+GTTTATGGATTGATTCACAAACCTTTTTGAACTTGAGAACCTATCTCGGGTCCTAGGTGC
+TGAGGATACCAACATGAATAAGAATGGTCCCTACCCTTAAGGTGCTCCAGTGGGGAGATA
+AATACATACAGATAATTATAATGTGCAGGGCCAATGTCATGATGAAATATAATACTTAAT
+GTTGATAACATATTAACACCACGCTTTGACTCTCCAACTTAGAGGGAATAAGATCTAGTG
+TTTGGTAGCACAATAGGGCAACTATAGTTAATAATTTATTATGGATATCAAAATAATTAG
+AGGAGTGGATTTGGAATGTTCCCATGCAAAGAAATGATAAATGTTTGAGGTTATGGGTAT
+CCTAAATACCCTGATTTGATCATTACACATTGTATGCTTATATCAGAATAATACATGTAT
+ACCATAAATATGTACATCTACATATCCATTTTTTTTTTTGAGACAGAGTCTTGCTCTGTC
+ACCTAGGCTGGAGTGCAGTGGCATGATCTCGGCTTACTGCAACCTCCATCTCCCAGGTTC
+AAGTGATTCTCCTGCATTCAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGAGATTG
+CAGATGTGTGCCACTATGCCAGGATAATTTTTGTATTTTTAATACAGATGGGGTTTCACC
+ATGTTGGCCAGGCTGGTCTCCAACTACTGACCTCAAGTGATCTGCCTGCCTTGGATTACA
+GGTGTGAGCCACGGCACCTGGCCCATAAATTTTTAAAAAACATAAATAGAAAAGAATACT
+AACAAAAAACATGAATGCATAGATTCTCATTCCACTCTTGTGCACAAGCTTATGTGCTTT
+TTCCTGTTAATGAAAATGATAGTAGAACCTGCCAATCACCACCAGCCACTCCCCAACCTC
+TGCCTGAAACACCCGAGGGGCCCCAAAACCCTTTCAGGGGGTCCACAAGGTCAATTTTCA
+TAGTAATACTAAATCATTGTTTGCCTTTTTTTACTCTCATTTTCTTAAGAGTATATAGTG
+GAATTTTCTGAAGGCCAACTGGCATAACATCACAACAGATTTTATCCAGAAAATATTTGA
+GAATCCAGCTATCTTCTATTAAAAAGCAAGACATTGAAGAGGTTTGCAAAAATGTAAGAT
+GCTGCTAATTTTTTTGTTTTGAAAAATACAGTTATTTGTCAAAACATATATGAATATATG
+ATAACTTTATTTTTGTTATTTTTAAATGAATTGAGAAATACATTTTAAAACTCTCAATTT
+TAATTTCTAATATGGTAAATATAGACAGACTTAAGCCACATAAGCACAAGTTCTTTGCGG
+TCCTAGATAAATTCTAAGAGTGTAAAGGAGACCTAAGGACAAGACGTTTGACCAGCACTG
+CCCTAAAGCATTCTTATTCTCTTATCTAGTCCCACACTCTCTTTGAGGGTAGTTACCATT
+TGCAGCTTTTTCCAGGTAGTGTCCATGGAACGTGGCCAGGCAGAGGGTGAGGGGTGGGAA
+CGTTGGACTTGAAGTTAGGGGGCTGGATTCCTAGTTCAGCTTTTGTCCTGGCAGCCTCAC
+AGTCAGAGTTGGAGACTCTCTGCCTCCTCAGGGAGCTTCTTACTCACCTTTGTCAGGTCC
+ACGGAGTACTTGGTGCTCAGCTCTTCCAAGGTTAATTTGTGATCATCCTGAGGGGACAGT
+AGAGGCATTGAGGGACAGGGGGTGGCCCAGGTCATTTCCAGGGAGAGGGGGAGAAACAGA
+CTGGAGAATGTTAGTCCAAACTGAGACTGCTCCATTTTGTAAGCTCCCTGCAATTCTTTT
+TTTGAGATGGAGTCACTCTGTTGCCCAGGCTGGTGTACAGTGGTGCGATTTCGACTCACT
+GCAACCTCTGCCTCCCAACAGGTGTTTCTCCTGCCTCAGCCTCCCAAGCAGCTGGGATTA
+CAGGGGCCCATCACCAGGACCAGCTAATTTTTGTATTTTTAGAGGCGGGGTTTCACCATG
+TTGGCCAGGCTTGTCTCAAACTCCTGACCTCAGGTGATCTGCCTGCCTCAGCCTCCCAAA
+GTGCTGGGATTACAGGTGTGAGCCACCACGCCCGGCCTAGCTCCCCACTATTTTACAGGC
+CTTGGTCAAAGTGAAACATTTCACGGGGGTTCGGGCCAGAGAAACAGCCTGCCTAACCAC
+CTGACCACAGGGCAGACGAAGGCCCAATTAAAGACAAAGACCCCACTAAAGAAACATCCC
+TATCACATCCTGCTGGGCAAAGGTCCAGGGAATACCATGATTATATTCTGTGGAAACAAG
+GGCCAAAAATCACCTCATCATGAGAACATCTTATCGATATCCTGCCAGGCAGCAGGCCAT
+ACAGCCCAGACTCCTCCCATCCATCCCTATAAGTACCCAGCCTGTAAGTGGCAGTGGACT
+GTGGAATTAAGCTGATCCCCCACTTCCACAGGTGTCTGCAATATACCTGTGCTTGCTGTT
+GAGCTGCCCTGTCTCTCTGTGTGTATCTTTTTTTTTTTTTTTAACCCTCTCCTTCCCTCC
+AAAGCCTAACAGAGAAGAGATGTGGCTCTGGCCTGTGTGTTTCATTCTCAAGGTAAGATG
+TTAAAGATAGGTGGAGGAGTGTTTTCAGAGGGCAGCTGTCAGCGCCCACTTTGGGTGGCC
+TCACCATGACCACTTCCTTCTTCAGTTCCTCCATATTGCGCTTCTGTTTTTCCCTCTTCA
+CCATTTTTTTCTTGATAAGCCCTTTTTTAGGTCTTCGTCTTGGACTCTGGTCATGGGGAG
+CCACTGTCCCTTTCTTCCCCCAAAGCCCCATAGCTATCCCCAGAAAGAGCTGCCCGAGCT
+CAACTGTGGGAAGAAAGCTGAGAGAGTGAGGCAAGGGGAAGGGAAGAGGGCGCGCGGGCA
+AGGGGGCCACGAGAAGAGAGTGGTGGGGAGAGAATGAGGGAGCAGAAAGAAAGAGGCAGG
+GAGGGAGGTGTTGGGTGGTGAAGGTGAGAGAGAGGAAGGGAAGTGAGAGAGGAGAGACTG
+AGCGGTCCTCCAGCCGTCCGCTGGGAAGAGGAAGAGTCAGGAGTGAGGGAAGAGGGAGGA
+GAGAAGCCTAGGGTGGAAGGATAAAAAGGTAGAGGTGAGGGAATGCAGTGAGGGCAGGAA
+ACAAGAGGTGAAGCGTAACCAGGTGCCTCAGTGATGAAGATGCCGCAAGGCCCACAGGAA
+GGTGGGAAATGCAGTGAGAGTGAGGAGGCGGAGTGAGCTTCAAGGATCTGGGAACCCCCC
+AGTCCCAGGCCCTGTGGTTGCTGGGGCAATGACCCAGGTCTGGCATCACAAAGGCCTCCT
+GAAAGTTCTAACCAAGAATCCAGAAGTCTTGCCTTGGCGATGGTCCAGGTCTGCTGTCGC
+AAAGGCCTCATGAAGGTTTTAACCAAGAACCTAGAAGTCTCAGTCTTTGGGTTTGAGGAC
+GATTGGAGCAGGCCCTGCTACAGAAAGAGTTTCAGTCTGAGGGCAGAACTAGAACAAAGT
+TCTTGTGTGGTCCTGCAGACTTTGGAGGCTTAGAGGTATGCCAAATAGATGACTCAAAAA
+TAAAGCGGGTATTCGGCCGGGCGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGG
+CTGAGATCGAGACCTTCGTGGCCAGTATGGTGAAACCCCATCTCTACTAAAAATACAAAA
+TTTAGCCAGGTGTGGTGGTGGGCACCTGTAATCCCAGCTGCTCGGGAGGCTGAGGCAGGA
+GAAACATCTGAATCCAGAAGGCAGAGGTTGCAGTGAATTGAGATTGCATCACTGCACTCC
+AGCCTCGGTGACAGAGTGACTCCATCTCAGACAAACAAACAAACAAACAAACAAACAAAC
+AAACAAACAAAAAACCATAGCAGGGAGCTTACAAAATGGTGCAGTTGCAGTTTGGACTAA
+TATTCTCCCGTCTGTTTTTGAGACTCCATCTCAAAAAAAAAAAAAAAGCAGGTACTCTGT
+GTGTGTTTTTTAGCAAGGCACACTTTTCCACTAACATTGGGTTTGATCTTTTAATTTTAG
+GAGATCAGCTAGCAGAATAGATCAGATCTTGGGCTCTGGAACCAGTCCTGGATTTCAATC
+CCAACTCCTCCACTTGTCAGCTGTGTGGCTTTGAGCAAGTTACCTAACCTCTGTACCTCT
+CTTTTCTTATTTTTAAAAGAGGATACCTTCTACCTTAAGTGAGGTGCTCTACAAAAAGCA
+CTATGTGACATATATAGTATGTGCTCAATAGGTAGCTTGAATTCAGTAGTAGTTTTATTT
+GTGGATTAATATGCAGCTCTTCTCAGTGAGGGTAAAAATATGCTTTTCCATGTGATAGCG
+TTGCATTTTCTCATTTTAAAATGATGTACTTACCTCTTCCATTGACCTTCTCTGGTGTCT
+CAGTAAGTGCTTTGACTATATTAACTGTATATATGTTTTGCATGTCCATAGGATTCTACC
+TCCTTCCAAGAATAATCTGAAGCATCTATACTTTTCTGTCAACCATTATATATATTTTTC
+AATTATGCACGTCACATAGATTTTCCCATTCTGTTGTTTGATATTTGTTTATTTACTTTT
+TGAGACAGGATCTCACTTTGTTGCCCAGGCTGGAGTGTAATGGTGCGATTATGGCTTACT
+CCAGCCTCAACCTCCCAGGTCCAAGTGATCCTCCCACCTCAGCCTCCCAAGTAGCTGGGA
+CCACAGGCATGTGCCACCATGCCCGGCTAATTTTTTTTATTTTATTTTAGTAAAGACGGG
+GTGCCCCTGTGTTGCCCAGGCTGGTCTCAAACTCCTGGGCTCAAGCAATCCTTGGCCTTG
+GCCTCCTAAAATGTTGGGATTACATAAGTGAGACACCAAGCCTGGCCTGATATTTATTTA
+ATTGCATTTTGGTTGGGCATGCTTTTTAAAAATATGACTGTATCTGGAAACATTCATCTT
+GGTGCTGATGCAATCCTCCTTGAATATTCAGAAATAGAAATGCTCCCTTCCCAGCTGGGA
+TAAATGATCGTCTATTTCTTTAACAGATTGGATTTTTCTTCTCTATTAAAAATGTTTAAG
+ATAAAAATTGAAAGCCCCACCTTCCCTCCCTCCAAACTCAGATTCTCCCCACTGCTGAAG
+TTACTGCTGGTAACAATGTGCTGCATGTCCTTCCAGTCTTTTTTCTATGTTTATACAAAT
+AGATATCTAAAAGGAGGTATTTTTCAACTTTTAAACAATCATTCAACTCTCTACCAACCT
+GTCATGTACATAAAGCTTCTTTTTTTCAGTGCTGTGAGGCTTCCATATTCTGTATTGACC
+ATGCTCTATTCAATCATTCCCATTACTGATGGATTTATGAGATATTTTCCATTTTTTTAA
+CCATTAATAAAATTGCTAGAGTGTCATTCTTGCACATAGATATCATCAGGCCCTTGAGCT
+TCTATTTCTGTAGAATCAGTTCCTGTTGCTGAATCATAGAGTACACTCATTAACAATCTT
+AAAAGATTCTGCCAATTGGTCTTTAAAAGTGTTTATCTATCTGCATCCCAACAACAGGAT
+GCAAGAATGTCTGCCTTCCACACCTTTGATTATGCTAGATGACCTTGATTTTTACTATTA
+TAATCAGTGAACTTTGATATCTTGTAGTTTTAATTTGCATTTTTAAATTGTCAATGTGAG
+CATTTTCTCTTATGTTTACTGGCTTTCTGGTTTTCTTTCTTTCTTTCATTTCTTCTTTTC
+TTTCTTTCTCTCTTTCCTTTCTTCCTTCCTTTCTTTCTCTCTTTCTTTCTTTTGACAGGC
+TGGAGTGCAGCGGTGCAATCTCAGCTCACTGCAACCTCCACCACCTGTGTTCAAGCGATT
+CTCGTGCCTCAGCCTTCTAAGTAGCTGAGACTACAGGCACACGCCACCATACCCAGCTGA
+TTTTTTTGTATTTTTTAGTAGAGATGGGGTTTTGCCATGTTGGCCAGGCTGATCTTGAAC
+TCCTGACCTTGGGTGATCCGCCTGCCTGGGCATCCCAAAGTGCTGGGATTACAGGCATGA
+GCCACCATGCCTGGTCTTGGACTTTATTTATTTATTTTGTAAATTGTCATTCTTACAATT
+TGCCCAATTTTCCATTGTTTTCTTTTTTATATTGATTTGTAGGAAACCCTTGTCATATAT
+AACACAAATGTTTTCTCCCAGTCTTTGTGTTTAATGTAGATGAACATAAAATTCTTAAGA
+AGAAGGTTAGAATTAAAGTTAAAATTCATATGTTTTTCTATGCATCATCTATGTGACAAT
+AACACTTAGCTCAGTCTTGCAGATGGGCATTGCCCAAAAGCAACTTTATGGAGATACAAT
+CAATTGAAATTATTTTTAGAAACCTCATTTTGTTCTAGAGATCTTAGCTCAAAAGTCCAC
+CTCTTCGGAGAGGCCTTCTCTCATAGTTAAATCCAAAACAGCCCCTCCGGCGCCAGCCCT
+CTCTCTATCACTTTTGATCTTCACAACACTTCTCTCCCTCTGAAATTATCCTGCCTGTTA
+ACTGTTGACTTGTCTGATTGACTTGTCTTGTTGACTTGTCTGGTTCTCCATGACCCTAGG
+GACCTTCCCTGTTTTTTTCCACAGCTATATACCTGGCTCCTAGGATAGTGCTTGGCACAG
+GGCAGCTGGTCCACAAATAGTCCCTGAATAGATGTTGAATGACTAATGAGTTTTCAGGTT
+TTATCTTAATGCTACAAGGTTTTGATAATTGCCACTTTGGTCTGATTTTTTTTTTTTTTT
+TGAGACGAAGTCTCACTCTGTTGCCCAAACTGGAGTGCAGTGGTGCAATCTCAGCTCACT
+GCAACCTCCACCTACCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGA
+CTACAGACATGCGCCACCATGTCTGGCTAATTTTTGTATTTTTAGTAAAGACAGGGTTTC
+ACTATGTTGGCCAGGCTGGTCTTGAACTCCCGACCTCTTGATCTGCCGACCTTGGCCTCC
+CAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCAGTGGTCTGATGTGTTTTA
+AAATCTGTTGGGGTAAATATGTTAGCATCAGTGATCTATTCATTTGCTTCATAATTACTT
+AGAAAAAACCTCTGAGATTCTTGCCTGATTACTCTTGCAGCTGAATGTCACAACCAATAT
+GTCATGTTTTATAAAAATGCAGCGAGGATTTTAATTGATATTGAATTGACTCTTTCATAG
+TAACTTATTTTTATCTTTACTACGTTCTGCCTTCCCAAATGGCAGTCAAGTTTAGCTTTC
+CATGAATTCATGTTTTTTTATTTTTCTCATTGTACTTTTAAATTTTCTTTAATAGTTACT
+ATGTGCTGCACACAGTAAACCTTGAGCGTAATTTGTTTTCATTCTATTGTAAATAGAAGC
+TCTTTATGCTGTATTTTCTGTCTCCCTAATATTTAGGGATACATTTGCTTCTTTGAAAAC
+TGATACCCATATACCCTGTAACTTTGAATTTATTGCTTCTAGGATTTTTTTTGTTATTGA
+TGCTGTTGCTTTGTTTTGGCTGAATTTCTTGGATTTTTAGGGAGTTATGCTCGCTAAAAA
+CAAAGTTTTATTTATTTAGAATTTTTATTTATTTATTTTTATTTTTTGAAACAGAGTCTT
+GCTCTGTTACCCAGGCTGGAGTGCAGTGGTCCAACCATGGCTCATTGCAGCTTCAACCTC
+TCAGACTCAAGCGATCCTCCTCCCTCAGCCTCCCAAGTGACTGGGACCATAGGTGTGGGC
+CACCACCCCAGGCTAATTTTTTTTTTTTTTTTTTTAATAGAGATGAGGTCTCACTGTGTT
+GCCCAGGCTGGTCTCAAATTCCTGGGCTCAAGCAATCCTCCTGCGTCAGCTTCCCAAAGT
+GCTGGGGTTGTAGACGTGAACCATAGCACCTGGCTTATTTAGAACTTTTTTTTTTGGAGA
+TGGAGTCTCTGTTGCCCAGGCTGGGGTGCTGTGGCACCATCTCGGTTCATTGCAACCTCT
+GCCTCCTGGATTCAAGTGATTCTCAAACCTCAGCATCTCAAGTAGCTGGGATTACAGGCA
+CCCGCCACCATGCCTGGCTAATTTTTTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCA
+TGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGAGATCCGCCCACCTCAGCTTCCCA
+AAGTGCTGGGATTACAGACGTGAGCCACCGCGCTCGGCCTTTGTAGAACTTTTAAACACA
+AACTGCTTATCGAGGATTTCAAATGTTTTATTGCAATTATTTGTTATAGTTGCTGTTATT
+GGTTTTCTAGACTTACTACATCCTTTCCTCTCTTCTCTGAACATGATTACATTTTCTTTT
+TTTTTGAGACCGAGTTTCACTCTTGTACCCCAGACTGGAGGTGCAATGGCGGGATCTCAG
+CTCATTGCAACCTATGCCTTCCGGGTTCAAGTGATTCTCCTTCCTCAGCCACCCGAGTAG
+TTGGGATTACAGGCGCGCGTGCCACCACGCGCAAATTTTTGCATTTTTAGTAGAGACAAG
+GTTTCACTACGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACCCACCTACCTC
+GGCCTCCCAAAGTGCTAGGATTACAGGCGTGAGCCACCGCGACTGGCCCATGATTATATT
+TTCTGATACACTTTTTCCTCCAACATTTTTATCAGTAATAGATACTGAATTTTACTGATA
+GCCTCCTCAACCTTTTTTGAAATGATTATTTCATCCTTTGTCTAGTTAATGTTAGTTGTT
+TGTAACATTTTCCTTGTAATATGCTATCCTTGCATTCCACCATTTGGGGTAGTCAATTTA
+TTATCTATATTAAATATAAAATTTTAACAAAATAGTACTTATCTCATAAAATAGGCTGGG
+CAACACAAAACACAAAATAATTTTTTATATTCCAGCACATTTTAAATAATATAAGGAAAT
+TCTTTGAGGAATTTGCAAAAAATCCTTCATTGAATTCACTGGAGCCAAGGATATTTTTGA
+AAGACTCACTTTGTATTAAATGTCTTCCTTTGATATTGATCTAACCACTTTCTTCCTATT
+TTTTGCTGCGTTCTTAGCATTATGAATATTTAGTGACTATCTCTGATTATCTCTGATGCA
+TGATCACATTTCCTATTCTGTAGATAAAAAGAAAATGCCATGGAAAGACTTTCTGCCTGG
+GTCAAAAGTTGGCCTTAGCTTCTCAGATTACATTCAATAGTAAGATTCTTCTTTTTGCTA
+AAATCCTCTAGCAGACCATCAAATCTCTTTGGATCTTTTTCTTTCTTTCTTTTCTTTTCT
+TTCTTTCTTTTTTTTTTTTTTTTCTGAGATGAAGTCTTGCTCTGTCACCCAGGCTGGAGT
+ATAGTGGCACAATCTCGGCTCACTGCAGCCTCTGCCTCCTGGGTTCAAGCCATTCTTCTA
+CTTCAGCCTCCTGAGTAGCTGGGATTACAGGTGTATACCCGGCTAATTTTTGTATTTTTA
+GTGGAAGACAGGGTTTCACCATGTTGGCCAGGATGGTCTCGAACTCCTGACCTCAAGTGA
+TCTGCCCACCTTGGCTTCTCAAAGTGCTGGGATTACAGGCGTTAGCCACTGTGCCTGGCC
+TGGATCTTTTTCTTAACCCCCTACCGCATCTTTGAGATCATATCTTCTCCCTTTCTCACG
+GATCTGCCTTGCCCCTGCTGCTCAAATGTTAATTGTCCACAAATTCCTGGCTCCCTTGGC
+AGCCTCCCCAGTTCACTGAGCCATTGGGCTAGTCAGGCTCTGTCCATGCACATAGGCTTT
+CTGTTCTTGCAGTTGCAGAGCATGGCATGGTTAAGAGGGCAGGCCCTGGAGTCAGACTGG
+CCTAGGTTTGAATCCTGCTTCAGATGCTTACTACCTTTGAAACTTTGAATAATAATTTAA
+CTTTCTGAGCACCAGTTTTCTCATCTGTAAATTGGAGATAATAATAGTCTCCATCTTACT
+GAATTACTGTGAGGATCCTTTCAGTCATCCAACACATATTTATTAATTGTCTAACGTGTG
+TCAGATGTTTTAGGTGCTAAGGATAAAGGTGTTGAACAAAAGAAATAGAGTCTCTGTCTT
+CATGGAACTTATACTGGAGGAGTCAGACAAACAAAAGCATAAACAGCAACAAAATACATG
+TATATCTTTCTAAATATTACATCTAGCTGTCTTTCTATCTAAATTTAAGGTAATAAGTAT
+TTATGAACAAAAACGAAGAGAACAGAGAAAGAGGTTCCAGGAAGTCATTTTATTTGAGGT
+CCTCTGTGAGGATCTTACTGATGTTTAAGGAGTGATCTGAATGAAGTGACAAAGCCAGGC
+ATGTGAATAACTAGGGAAAAGTGTTCCTGGCAGAGGGACCATCAAGAATAAAGGCCCTGT
+GGTGGGATGAACGGAGACTATACAAATAAAATTTTGGCATAGTTATGGCACAAAATGAGC
+TGTGATTAAGGCTCTTATAATCACTACTAACTATAACTGTCATTAATGACACTGTTAAGA
+GTCATAATAAAGGGGCTGCAATATGGCTTGGGCAAGAATTGACTTCTGACCGTTATTCTG
+AAATCCTGGCATAGGTTGTAAGAGTTGGTGTCTTTCTCTCTCCCATTCCCAGGGCCCCCC
+TGGAGCAATGCACACCCACCCACCCACCCCTCCATGTCCAGCACACAAGCGTCCCCCCTA
+CTGCTCATTACATTCTGTCCATGATGGTTTCTGATGTCTACTACTCAGGTGAATTTCAAG
+GATTGCATCCACTTTACCCCCACCCCTTAAAGAGAGCACCCAGGACAAAGCTCTGCATGA
+AGTACAGATTGTGAATTGGTGTTGACTCTTGGGCTCCTCAACCCACTATCTCCTTCCCCC
+ATCCCATCAGATAGAGAATGGCCTGATTTTGGTCTGGAACAGAAAAAGCAGAAGTGCCCC
+TGAGGATCTTGTTCTTAATAGACTCACAGGCCCCTCCCTGTCCTGGACAAACTCTTGTGG
+CTCTTAGGGTGGGATCAGCATGAGGAAGGAGGCTCCTGCTATAGTTTCTTGTAGGTTATT
+TTGCAACCTAACTTCTTCCCCTCTTGCTTTCTCCACGGACATCATCTGCTCCCAGACCAT
+TTCCCTCCCATCCCTGACACTTGCTATCTCTTTGTTGCTTTCCCTACAAGCCGAGTTCTG
+CCACAGCACATATTTCAATATCACGATTTAGGAGGGAGGCAAGTATAAAGTGTAGAAATC
+TGAATTATGGAAGAGATTTCTTTTTTTTTCATAAAGAAATGCCACTTTATTATTTTCAAG
+TACATAGGTAGTACCTCAAACTAATGTCTTACAAAAGGTCACCAATAAAGATCCTGTAGA
+TGTTTCTTTAACAAATACATAAAGATTGATGATAATTAGCCCATGCACTCGCTGCTTCTA
+AAAGGATATTTCTAAAGTGCCTGAAAAGTATTGTTTTTGGTAGACTAAACATGCCACCTC
+TAGCAATATTGTTTATGCTCTTAATTTACTAAAAAAATGCCATTTTTATTGATTGTGCAA
+AGATAAACTTTTGTCCACAGTTAGTAATCTCCAATTAATCGGATCCAAATTAATATTGTC
+TCCCACTTAGATTCCCAGGCCCAAGGCAATGACTCCCTTACTTCCTTGCTTCTGATAAAG
+CAAAATTCCAGTGCAATGAAATGACAGCCTTTGGGAGGTGGTTTATGGGTCTGACCCAAT
+GCTTGTCTTTTGTGCTTCCTTCCTGTCTGCCTTAGGTCTTCCATTTTCCTCAGATGTCTT
+TCTCACTTGCATGACCTATCTAGGTCTCTCTCTACAGGACATTCCCAGTGCCTTCTAAGC
+CAGGCTTAGAGAATGGGCAAGGCTGCCTGCAGAGCAAAAGTGTTAGATAGGGAGTAAGGG
+AAAGAAAGCTATCTCCTGTAGGAACCTGAAACCAACTCTCCAGCCCAGGGTAAGTGGGCA
+CAATGATCGTCGGTTTGAAGGGCATTTGCCTGGAGGGAACAGCCCTGCCACCTAAGACGT
+CAAAAGCAAATCATTTTTTTTTTTTTTGAGATGGAGTCTCACCCTTTCGCCCAGGCTGGA
+GTGTAATGGCACGATCTCAGCTCACTGCAACCTCTTCCTCACAGGTTCAAGCAATTCTCC
+TGCCTCAGCCTCCTGAGTAGCTGAGATTACAGGCGTGTGCCACCATGACTGGCTAATTTT
+TTGTACTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTCGCCTCGAACTCCTGA
+CCTCATGATCCACCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGC
+ACCCAGAAAAAGCAAATCTCTTAGTATTTTTCCTCTTGTCCAAAGGTTCTGACCATGTTC
+ATGACCTAAGCTTGTCCCTGGAAGCATACATGTCCCTGGGAGACAGGAAGGTTCAGGGAA
+CTTCTCCACCCTGATTCTAGCCAATGGCCAGCGAACTCCTTTTCCAGGGTTGGCTTGGCA
+GTGTCGGGGAATGATGCAGGTTCATCGGGATGGGCATGAAAAGAAAGGGAATGGAAGAGG
+CTGGGAAAGAACTTGTGGCCATCCCTTTCAGAACAGCATGGAGCTCTGAGGACTTAGTAT
+TACAGAAGGTGAAGTGTTATTAAGTACATGAGTGACATCCGGACCTTGTGGACTAAGAGG
+ACCCCTAACCAAGGGTCAGCGGGGAACTCTGTGGCACACAAGTGTGGCATTTCTGGAAGC
+TCTTGATTGCCTTCTGGTCCTGCCTCTGCTGCTACCCTGATGAGATCTGAGTTTGTTCTG
+CTGGTGGGGTATTCTAAAATGTGGGACGTAGAGCTGGAGTCCACGACATCTATCTGGAGA
+CTCATGTCAAATGTCCTGTCATTGGCACTACCTCACTTCCTTTGTTGTTTCCTTCCTCCA
+GCCAGCCCTACCTCCATCTCCCACCTGTCTGCCCAGCTCTCCCTGAAGGTCCCTTACCAG
+CCTCGACTGGGAGACAAGTCCACCTTTTCCAGTCTCAGGCTCTTGCACTCACCAGAAGAA
+CCTTTTGTGATTGCAAGGTCTGAAGGGGAAGCCAGGGATAGAGTCCTGCAGGAAGCTAAC
+GGGGGTGGAAGTGGGAAATGGAGTGAGGCTGGGGTGATCTGAGAGGGCTTCCTTCTGGTT
+TAGATGGGTTCCTGCTTTTGGATGCTGATCTATTACAGTAGGAAGTGGGGTTGAAGGGAT
+GGGAGTGGGCAGGGTGGGGTAACCCTTAATTCCTCAGAAAAAATAGACAACATAGTGGAG
+TGGGATGGAAAAGGATCTGGTCGGGGGGACTCTAACACATAGTGTCTAATCTGATTGTTC
+ACAGTTAGGTCTATGGGGTCATCACCCCAATTTATATACCCCGCAACTTAGCCTGCTGAG
+TTGCCTCATTATCTCTCCCCAAATGTTGCCACCCAATGCCACCCGTTATTTCCATCCCTC
+TCCATCACCATCCCCACAACACCTCCAGAGCTCCCCATCTTTCCATGCCTGGTTCTTCTT
+CCAATGGGGTCAGTAGTATGTCTCCTTCTCCACCCAGCCTGTTAGAGAGATTGGGAGTGG
+AGATTGCAGCATGAGCTTAAAAGGAGGCAAGAGATCAATTTCCTAGGGGCTTGGGTCGCT
+GAAGCCAGGTGGAAGGAGAAGCCTGGTGAAGGGCACTGGGGGAAGTGGCTTAGGAAGGTG
+GAAGGAGGCTGGGTGCAGTGGCTCATGCCTGTAATCCCAGCATACTGGGAGGCTGAGGCA
+GGCGGAGATCAAGACTCCTGGCTAACCCAGTGAAACCCTGTCTGTACTAAAAATACAAAA
+AATTAGTCGGGCATAGTGGCATGCGCCTGCAATCCCAGCTACTCTGGAGGCTGAGGCAAG
+AGAATTGCTTGAACCCAGGAGGCGGAGCTTGCAGTGAGCCGAGATCGTGCCACTGCACTG
+CACTCCTGGTTGGGCGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAA
+AAAAAGAAGGTGAAAGGGAGGAGAAGGAGGTCCAGTGAGCTGGGGCCATAGGTTAAAGGA
+AGGGAGGGTGAACCTGCCTTTAGGGGAGACTTGTATTTAGCACATTGGAGGCTGGGGCAG
+AATAGGTCGAGGGTGAAAATGCAAACGCCCACAGGGCCTCTTCAGGTGACGAACGAGAGT
+CTAAGGGAGCAGCTGTCCATTCTTGCTGTGCAGAGATGTGGACATGGGATTAACAGCTCT
+TCTGATCTTCAGGAGAAGTCGAATTCAGATTTTATGTGAAAGACCCTAAATTTTCTATAT
+CGGACCAATTAATCCACCCTCCTCCCCCCACACACACAAAATATGGCACAGGTCAAACCA
+AACAGTATTCCTGGACAGTATTTGGTTTGTAAGCCATCATATTTTAACCTCTACTAAGTG
+ATTAAAAAACGCTTGTAGGAAAAACAATAACTTGTCGGTTGATGAGTGGGGATCAGGAGG
+CCTGGGTTCTAGTACCAGTCCTGCTATTCTTTGCTTTACGACCCTGAACAGATTCACCTC
+TGAGCCTCCTCTATCTGTAAAATATGGACGTGGGACTAGGTGACCTGGAAAGGCCCTTCT
+AATTCTGATGTTCTGTGGTTTCAGGTGGAAGGTGGAGGGCCTCGGAGACCAACTGTGAGT
+TAGGCGGGGTTTAGAATCTGATGGGAAGAGGGCAGAGACAACAGGTTCGAAAGAGTCAGG
+GCTCCTGGGACCTGGGACCCCAGGATCTGAGGGTGTTGAGGAACAGAAGCAGGAACCAGT
+AGTGGGAGTGGAGTCTGCGTGTGGTCCTGGAGGGAGGAGCTGGTGGTGGGGCTTGCACTT
+TGCTGGGGGGAATGTGGAGGGGCTTACCACCAGGATACCGCCGCAGGATGAGCTTTCGGA
+CCTCATCATAGATGAAGATGAGGAGGCTGTAGGGGAAGGCGCAGAACCACCAGGTGACTC
+TGAAAGCAATGGAGGAGAGTGTCAGCAGCATCAGAGGTTGGCTTGGTGGCTGTCACATAA
+AGGAGCATTCAAAGCGATCAGAACTTGGATCCTGGAGGGGGCTGAAGGCCCCCCGTATGA
+CTACTCAGGCCGCCCGAAAGAGACACTCACTTGAGCGGGTACATGCGGAGGGCTACACCC
+ATGCCTGGGCAGTAAGAGAGAAAGGCAGCCAACGCCGTCTCCTCCAGGAGCCCAAAAATC
+AGGATCTTGTTCCTGAAAGGCAAAGAAAGGAGGTGGCAGGTGAAGGGGGATCATGGAGGC
+AGAGATCCTGGGATACCCCTGGGGTGCAAGGGAAAGAGGAGAGAGAATTCTAAACGACAT
+AGTCGCATGTCTTCTTCCAACAGCCTCTTCTCATGCTTTATGTAACCAAAGGAAGATGTT
+ATTTGTTTTTCTGCTTACTGAGTTTACGTCCGAATGTTGTCTATTGCACGTATATTCATT
+CATTAAATAATTGTTTCTAGGTGTTGGGTGACGTGTTCTGTACTGGAGACACGGTGGTCA
+ACATGGGGCCTGTCTCATGATCTTGTTATTGCTCCAGGCACAGGGCCCATGAAAGTGTGT
+CTCCCCAGCAAGATGCTTGAAAGCTCCTTCAGTGCAGGGCCCATGTATGTCTTCTACCGT
+CTCAGCATCTCCCACAGTTCCTGGTGGTGGTTTCATTATAGATATTTAATTCAGAGAATG
+TAAAGGGGATCACTGCATAATAAGTGGTTAATATATGTTCACTTTATCAGAATTGAAAAG
+CAGGATAGAATAAGGAGGTTCTCAGGGCAGTTTCTGCAGTCAGACAGGTGGTGTCTATTC
+CCAGTTCTGCCATTGACTAGCTTTGGGTTGCAGGCAATGTTGAATTTCTCTGAGCCTCAC
+GTTCCTTCTCTATCAAATGGAGATTTTACCTTTTTATCATAGGGTTGCTGGGAAAATTAA
+GGGAGATAACACGTGAAAAGCTCTGCAATGTTCTGTCACTGAAATGTTCAGTGACAGACC
+TGGAAAATGTTCAGTCACATTATTACTCCACTTTTTTTTTCTGAGACAGAGTCTCACTCT
+GTTGCCCAGGCTGGACGATCACAGCTCACTGTAACCTCCACCTCTTGGGCTCAAGTGACC
+CTCTTGCCTCAGCCTCTCCAGTAGCTGGAATTACAGGTGTGTGCCACCACGCCCTGCTAA
+TTTTTATATTTTTTGTAGAGATAGAGTTTCACCATGTGCCCAGGCTAGTACCAAACTCTT
+GGGCTCAATTGATTCTCCTGTCTCAGCCTCCCAAAGTGCTGGGATTACAGGTGTGAACCA
+CTGTCCCCGGCTACTCCATCTTTTGTATTCTTACACAAGGTACCTCTTTTTGGTTCAAGC
+CAGTGATTTCCAAATTTTCACTCACTAAGGACCTCCTTATTTTCTTCTGTAAAGGCCACA
+TTTAATTGTTTGTCTCTCAAGCTACATTTGTGAATAAGTATTGAATACATTTCCCCATTT
+AAAATTAATTAGAACTATCCATGGCTTCCAGACAGAACTGTGAATCAGCATCCGATCACC
+ACGTCATGATATGCGCTCTTGGCTTTTGCCTAAGAATGACACATTAGTAAGCCTGGTTGC
+CTTCGCATAGGTGAGTGCTTTATTTTCATAAAGCTTTTCAAAATAAGGCAAATAGCATCT
+CTGAGGATCCCCAGTGGCCCAGGGACCTAAGGTGAGGCCTGTGGTGGTATTAGTACTCTT
+CCTCTGTACCCAGCCTGATTTTCATTTGCCATTTTTCCCATTTGCCTCTAAGATGGGCTG
+GATGCAGCCACTATGGCCATGTTCCCACCTTCGTGTGTCATCCCTGCAGTGCATAGCAAG
+TCCCAGGCCCCAGAATTATACCCAATTTCTTAGACTCTCTGCTCCTTGTTCTCTTCAAAT
+TGCATTTTTTTTCCATTAGTATTTTTAGCTTACTGGAATCTCCTTCCTTCATGTTACCTT
+TATTTATCTAACAGGCTTTTCACTTTATTCTCCCTTGGATCTTAAGTGTTATTAACTTCA
+CTTTGGCATCTTCTCCCCACCCTCCCCAGCCCCAAACTAAGAAACTGTAACAACCTCATG
+TGAGGAATTGTGGCCATTGTTTATGGTAGGAGAAAATAAAAGGAACCAGCATGAAACCTT
+CAGCGCTCAACCACCACCCATAATAGAATCACAGTCATTACTGAGTTCCAAACAGTGCCA
+GATGGTGTGAGCACAGAAATGTTGGGTAGTAGCCAGGGCTGGGTTTTAGAGACAGATCTG
+GTGAACCCTTACCATCTTGGTGACCTTGGGCACATCATTTAACCTGTTTGGGGCCTCTTA
+TCCATGAAATGTAAACAGTAATACCCACCTCATAGAGCTATCGTGGGGCTAAACAATGTG
+TTAGGCACTTAGCACAGTGCCTGGCACGCTGGAACTCACACTGCAAGAAGCGCTGAACCC
+GACCCTCGGCAGCACGTGGCCTCCAGAACCACAGTGCAAGTGCTATAAACTAGACTGCCT
+GCGGAAGGCCAGGGGAGGGGTGGGCACAGAAGACACTGGATTTAGGGCTGGGCAAAGGAT
+AGGAGCAGGTGGCACTCAAGTAGGAGATGGGGGAAAGAAGGGAGGGAGAGAAGGAGAGAA
+GAAAGGTGGAGAGAGACAATAACAAAATGCAAAATTAGGAGCCTGAAGCATGGCTTCTCT
+CTTCTATATGAAACCTATCTATGGATATACAGATGGAAGATTAAAATCTGATGCCCTCCT
+TACAGCTCTGGTCCAGGGCTGAGTTGGAAGCTTGGCCTGTGTGGGTTGGTGAGTGTGCCC
+AGAGCCTGGGTGGGTAGGTGCCATGGGGGTGGGCACTCACTTCATGCCCTGCTGGAAGAC
+TGAGTTGCGGCGGGTCTTGCAGATGATGAGGTCAGCCCACTGCACCACCACGATGCTGGC
+AAAGAATGCCGTGTGGCACGTGAACTCCACCACCTTCCGCTGCTCATAGGTCTGGAGGGA
+GGGTGGGCAGAGACAGATGAGAGTGTCGGAGGAAAGGGTAGGACCAGAAGGTAGCATGGT
+TTCTCCTGATCCACCCCTAGCCCCAGTCCCAGTTGGCCACCACTGGCCACTCCTGCCAGG
+CATTCAGCTTCCCGTACCTTCACACATGTGCGCTGTGTTTACACAGCACCACTCACCCAC
+TCCTGTCCATAGCTGTCCTCCAGATCATTCATGGTCCGGTCATCCCAGTCGAGGCGGATT
+CCCAGTAGCCGTGATGGCAGGAAACCGTTCTCTGCCAGGATCACAAAGTAGGTGAAGAAG
+CCACCCAGTGCCTGGATCATCCCTGTGGGTAGAGGGCCAAAGGCAGGGGCAGGGTCAGAG
+CAGGAAGCAGAAGGGGCACAGCCCCTCAGGGCCAGAGATGGAGGTCCCTGACCCTTGGGT
+AGCTGGCCTCTGTCCTGGAATTTTGCTTGGGGCTCATTCCTCTGAAAGCTGGGAAAAGAA
+CCCTGTTGGGGTTCCCTCCCGAGGCCCGGGGCTTGGCGCACCGATCTGTCCGTAGGCCAT
+GCTGATGAGCCTCTCATTCACCAGCTTGTCCGTCTGGGAGTTTCGTGGCTGCCGCTTCAT
+GATATCACTCTCAGCTGCCTCATAGGCCAAGGAGATGGCAGGGACCTGTGTGGGGTAGGA
+AATGGGGCAGGGAGGGCATTTGAAGGGACGTAGGAGATGACAGAAGCAATTGATCTTCGA
+CAGTGACGATCGTTGACACAGTTAAAAAAGCGTGTATTCAGACCCCATTGTCTTCCAGGC
+CTGTGCCTGTCTCCTGTAACCCCCACAGCTGGGGCCACTGCCCCAGTGCACCTCTCCTCC
+CTGCCACTGTGCCATCACGATTGCCTTGCCTGTCCCCTCCTCCACCTCCTGCGCTCACCA
+TATCTGTGCCCAGGTCAATGCAAAGGATGGTCACAGTGCCCAGAGGTAGGGGGATGTTGG
+CAATGATGAACAGCAGGAAGGGGGTGATCTCGGGGATGTTGCTGGTCAGGGTGTAGGCGA
+TGGATTTCTTCAAGTTGTCAAAGATCAGGCGGCCTGTGGGGAGATTCTGAGGGCATCAGG
+GAGGTCAGAGGGACTCTTCCCCACCCCCAGCCCCTGAAGCATGGCTGCCTGTCTTCCCAA
+GAGAGGAAGATTTTGTGTTCAAGGGAAGCTAAAGTGGGCTGGGCTGCCTAGAGTTGTGGA
+ATGATTCTTCAACTGGGGTAAAGGGACAGGGAACAGAGGTGCCGGCTTCAGTATCCTGCA
+AACCATCCCAACCCATGCAGCCTCCTCACCCTCCTCCACCCCCGTGACGATGGAGGCAAA
+GTTGTCATCCAGCAGGATCATGTCGGCTGCCTGCTTAGAGACGTCAGAGCCAGAGATGCC
+CATGGCAATGCCAATGTCAGCCTTCTTCAATGCAGGGGAGTCGTTCACCCCGTCACCCGT
+CACGGCCACAATGGCTCCCTGGAGGGAAGACGGCCACACTTGAGGACGAAACCCCTTCCC
+AAGCCTGGCTCCTCGCCTCTCACTCCATGCTTGTCTTGGCCCTGTCCCTGCCTCCCCGGT
+CCCTGCCTGTCATCTGCCTCCCGTGGCTGTGCTCACCTGCCTCTGACATCCCTCCACAAT
+GATGAGCTTCTGCTGGGGAGACGTTCGAGCAAAGACGATCTCTGTGTGGTTCTTGAGGAT
+CTCATCGAGCTGCTCCGATGTCATGTCCTTCAGGTCAGAGCCGTGCACCACGCATGCCTT
+GGCTTCTCTGGAGGGTGGGTGGGGACAGAGACAGCTGACCCCTGGCACCTGTACCAGCCC
+CTCCTGCCCCCTGCCTCTCTTCACCACCGCAGCCTGCAGGCTCCCTGCTGAGCCTGCTTC
+CCCTGTTGCTTCAGTTCTCCTCTCCTCAGTACTCACTTCCCCCATTTCTCTTTGAGAATG
+TGAGTCTAAATCCTCACCATGGGATCATCGCAGTTGTAAGTGCAGTTTGCTAGGCCCTGG
+GCTAGTTTCTTTACTCACATCCCACTCCTACAGCAGTGGAGATGGCATCCTAATTTTGTG
+GTGAGAAAATGGAGGCTCAGGAAGGGAATGTGACATCTCCAAGGTTACATGGGTATTCAG
+TGGCACAGCCTGGATACAAGCTCAGGGCTGAGGAACCAGTCACAATAGTGTCAGAGGTAA
+GGCCTATAAAGGTCCCAGAAATGCTATCCAAATACTCCCAGGGGACACCCAAGCCTTCTG
+TGCAAGAGGACGTGTTGATTAGGGCACAGGGGCTTCCTGCAGAGGCCTCACCTGGGGTTG
+ACTTGACTCATGGGAATGTTGAGCCGGGCTGCAATGTCCTCCACAGTCTCGTTACCCTCT
+GATATGATGCCCACGCCTTTGGCAATGG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.genscan
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.genscan	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.genscan	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,238 @@
+GENSCAN 1.0	Date run:  1-Aug-100	Time: 16:43:38
+
+Sequence HSBA536C5 : 168628 bp : 49.21% C+G : Isochore 2 (43 - 51 C+G%)
+
+Parameter matrix: HumanIso.smat
+
+Predicted genes/exons:
+
+Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr..
+----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------
+
+ 2.04 PlyA -   7901   7896    6                               1.05
+ 2.03 Term -  10642  10463  180  1  0   28   43   120 0.957  -0.89
+ 2.02 Intr -  11044  10815  230  2  2   84   44   310 0.981  23.79
+ 2.01 Init -  14499  13650  850  0  1  126   53  2079 0.818 202.23
+ 2.00 Prom -  16112  16073   40                              -5.56
+
+ 3.00 Prom +  18327  18366   40                              -5.06
+ 3.01 Init +  18680  18726   47  1  2   84  105    30 0.585   4.46
+ 3.02 Intr +  23250  23284   35  0  2  151   69    35 0.533   5.77
+ 3.03 Term +  26615  26664   50  0  2  108   43    36 0.267  -1.43
+ 3.04 PlyA +  27305  27310    6                               1.05
+
+ 8.32 PlyA - 114694 114689    6                               1.05
+ 8.31 Term - 117609 117581   29  1  2  139   37    35 0.986   1.74
+ 8.30 Intr - 118004 117913   92  1  2  126   77   101 0.988  12.44
+ 8.29 Intr - 121211 121110  102  1  0   85   89    95 0.997   8.59
+ 8.28 Intr - 121457 121327  131  2  2  130   51   125 0.999  12.49
+ 8.27 Intr - 125623 125478  146  2  2  108   92   121 0.958  14.50
+ 8.26 Intr - 126663 126540  124  0  1  113   58   151 0.981  14.76
+ 8.25 Intr - 127050 126896  155  1  2   72   91   196 0.685  18.09
+ 8.24 Intr - 128563 128395  169  1  1   91   72   343 0.999  32.52
+ 8.23 Intr - 129031 128881  151  0  1   68   95   202 0.996  19.06
+ 8.22 Intr - 129561 129425  137  0  2  113   94   171 0.999  19.57
+ 8.21 Intr - 131557 131385  173  2  2  121   94    69 0.957  10.46
+ 8.20 Intr - 131891 131702  190  2  1  126   66   153 0.780  16.06
+ 8.19 Intr - 135872 135738  135  2  0   37   92   171 0.802  13.16
+ 8.18 Intr - 136182 136073  110  1  2  139   33   122 0.867  11.80
+ 8.17 Intr - 136622 136424  199  2  1   96   22   400 0.999  33.12
+ 8.16 Intr - 138994 138726  269  2  2   89   74   152 0.257  11.15
+ 8.15 Intr - 143743 143626  118  1  1  100   63   113 0.289  10.04
+ 8.14 Intr - 144150 144016  135  0  0   43  100   129 0.999  10.36
+ 8.13 Intr - 147107 146994  114  2  0  102   91   154 0.995  17.74
+ 8.12 Intr - 148107 147904  204  0  0  104   92    97 0.839  11.10
+ 8.11 Intr - 149987 149928   60  2  0  114  113    90 0.999  13.03
+ 8.10 Intr - 151157 150965  193  1  1   75   77   125 0.355   9.59
+ 8.09 Intr - 161359 161278   82  2  1  105   95    51 0.520   6.20
+ 8.08 Intr - 163259 163168   92  1  2  117   91   174 0.980  20.24
+ 8.07 Intr - 163512 163411  102  2  0  141   89    85 0.999  13.19
+ 8.06 Intr - 166251 166121  131  0  2  113   81   212 0.999  22.49
+ 8.05 Intr - 166582 166437  146  2  2  111   92   215 0.999  24.20
+ 8.04 Intr - 166905 166782  124  0  1  107   70   221 0.999  22.36
+ 8.03 Intr - 167313 167159  155  1  2  116   89   268 0.999  29.49
+ 8.02 Intr - 167718 167550  169  0  1   96   72   360 0.999  34.72
+ 8.01 Intr - 168007 167857  151  0  1   75   99   227 0.984  22.66
+
+Predicted peptide sequence(s):
+
+Predicted coding sequence(s):
+
+
+>HSBA536C5|GENSCAN_predicted_peptide_2|419_aa
+MAQENAAFSPGQEEPPRRRGRQRYVEKDGRCNVQQGNVRETYRYLTDLFTTLVDLQWRLS
+LLFFVLAYALTWLFFGAIWWLIAYGRGDLEHLEDTAWTPCVNNLNGFVAAFLFSIETETT
+IGYGHRVITDQCPEGIVLLLLQAILGSMVNAFMVGCMFVKISQPNKRAATLVFSSHAVVS
+LRDGRLCLMFRVGDLRSSHIVEASIRAKLIRSRQTLEGEFIPLHQTDLSVGFDTGDDRLF
+LVSPLVISHEIDAASPFWEASRRALERDDFEIVVILEGMVEATGMTCQARSSYLVDEGLW
+GHRFTSVLTLEDGFYEVDYASFHETFEVPTPSCSARELAEAAARLDAHLYWSIPSRLDEK
+RVSPRCDQLPPDPCGRPGARHRYMGNCISEVVEEEEEEEGKAPGNVLKLESPRPPEPQV
+
+>HSBA536C5|GENSCAN_predicted_CDS_2|1260_bp
+atggcgcaggagaacgcggccttctcgcccgggcaggaggagccgccgcggcgccgcggc
+cgccagcgctacgtggagaaggatggccggtgcaacgtgcagcagggcaacgtgcgcgag
+acataccgctacctgacggacctgttcaccacgctggtggacctgcagtggcgcctcagc
+ctgttgttcttcgtcctggcctacgcgctcacctggctcttcttcggcgccatctggtgg
+ctgatcgcctacggccgcggcgacctggagcacctggaggacaccgcgtggacgccgtgc
+gtcaacaacctcaacggcttcgtggccgccttcctcttctccatcgagaccgagaccacc
+atcggctacgggcaccgcgtcatcaccgaccagtgccccgagggcatcgtgctgctgctg
+ctgcaggccatcctgggctccatggtgaacgccttcatggtgggctgcatgttcgtcaag
+atctcgcagcccaacaagcgcgcagccacgctcgtcttctcctcgcacgccgtggtgtcg
+ctgcgcgacgggcgcctctgcctcatgttccgcgtgggcgacttgcgctcctcacacata
+gtggaggcctccatccgcgccaagctcatccgctcgcgccagacgctggagggcgagttc
+atcccgctgcaccagaccgacctcagcgtgggcttcgacacgggagacgaccgcctcttc
+ctcgtctcgccgctggttatcagccacgagatcgacgccgccagccccttctgggaggcg
+tcgcgccgtgccctcgagagggacgacttcgagatcgtcgttatcctcgagggcatggtg
+gaagccacgggaatgacatgccaagctcggagctcctacctggtagacgaggggctgtgg
+ggccaccgcttcacgtcagtgctgactctggaggacggcttctacgaagtggactatgcc
+agctttcacgagacttttgaggtgcccacaccttcgtgcagtgctcgagagctggcagag
+gctgccgcccgccttgatgcccatctctactggtccatccccagccggctggatgagaag
+agagtgagtccaaggtgtgaccagcttcctccagacccctgtggcagaccgggggccaga
+cacagatacatggggaactgcatatcggaggtggtggaggaggaggaggaggaggaaggc
+aaagcccctggaaatgtgctaaagttggaaagtccccgtcccccagaacctcaagtctag
+
+>HSBA536C5|GENSCAN_predicted_peptide_3|43_aa
+MNTAAINIHRQIFMWTSSVVKTSFTVTFSSPGVIPPRLPYARE
+
+>HSBA536C5|GENSCAN_predicted_CDS_3|132_bp
+atgaatacagctgctataaacatccatcggcagattttcatgtggacgtcttctgtggtg
+aagacctccttcactgtgaccttctcctcaccaggtgtgatcccccccaggctcccctat
+gcccgtgaatga
+
+>HSBA536C5|GENSCAN_predicted_peptide_8|1429_aa
+XEAKACVVHGSDLKDMTSEQLDEILKNHTEIVFARTSPQQKLIIVEGCQRQGAIVAVTGD
+GVNDSPALKKADIGIAMGISGSDVSKQAADMILLDDNFASIVTGVEEGRLIFDNLKKSIA
+YTLTSNIPEITPFLLFIIANIPLPLGTVTILCIDLGTDMVPAISLAYEAAESDIMKRQPR
+NSQTDKLVNERLISMAYGQIGMIQALGGFFTYFVILAENGFLPSRLLGIRLDWDDRTMND
+LEDSYGQEWTYEQRKVVEFTCHTAFFASIVVVQWADLIICKTRRNSVFQQGMKNKILIFG
+LLEETALAAFLSYCPGMGVALRMYPLKVTWWFCAFPYSLLIFIYDEVRKLILRRYPGDLA
+ITKGSSGECKSLRLEKVDLSPSRGCFLPTVELGQLFLGIAMGLWGKKGTVAPHDQSPRRR
+PKKGLIKKKMVKREKQKRNMEELKKEVVMDDHKLTLEELSTKYSVDLTKGHSHQRAKEIL
+TRGGPNTVTPPPTTPEWVKFCKQLFGGFSLLLWTGAILCFVAYSIQIYFNEEPTKDNLYL
+SIVLSVVVIVTGCFSYYQEAKSSKIMESFKNMVPQQALVIRGGEKMQINVQEVVLGDLVE
+IKGGDRVPADLRLISAQGCKVDNSSLTGESEPQSRSPDFTHENPLETRNICFFSTNCVEG
+TARGIVIATGDSTVMGRIASLTSGLAVGQTPIAAEIEHFIHLITVVAVFLGVTFFALSLL
+LGYGWLEAIIFLIGIIVANVPEGLLATVTVCLTLTAKRMARKNCLVKNLEAVETLGSTST
+ICSDKTGTLTQNRMTVAHMWFDMTVYEADTTEEQTGKTFTKSSDTWFMLARIAGLCNRAD
+FKANQEILPIAKRATTGDASESALLKFIEQSYSSVAEMREKNPKVAEIPFNSTNKYQMSI
+HLREDSSQTHVLMMKGAPERILEFCSTFLLNGQEYSMNDEMKEAFQNAYLELGGLGERVL
+GFCFLNLPSSFSKGFPFNTDEINFPMDNLCFVGLISMIDPPRAAVPDAVSKCRSAGIKVI
+MVTGDHPITAKAIAKGVGIISEGTETAEEVAARLKIPISKVDASAAKAIVVHGAELKDIQ
+SKQLDQILQNHPEIVFARTSPQQKLIIVEGCQRLGAVVAVTGDGVNDSPALKKADIGIAM
+GISGSDVSKQAADMILLDDNFASIVTGVEEGRLIFDNLKKSIMYTLTSNIPEITPFLMFI
+ILGIPLPLGTITILCIDLGTDMVPAISLAYESAESDIMKRLPRNPKTDNLVNHRLIGMAY
+GQIGMIQALAGFFTYFVILAENGFRPVDLLGIRLHWEDKYLNDLEDSYGQQWTYEQRKVV
+EFTCQTAFFVTIVVVQWADLIISKTRRNSLFQQGMRNKVLIFGILEETLLAAFLSYTPGM
+DVALRMYPLKITWWLCAIPYSILIFVYDEIRKLLIRQHPDGWVERETYY
+
+>HSBA536C5|GENSCAN_predicted_CDS_8|4290_bp
+nnagaagccaaggcatgcgtggtgcacggctctgacctgaaggacatgacatcggagcag
+ctcgatgagatcctcaagaaccacacagagatcgtctttgctcgaacgtctccccagcag
+aagctcatcattgtggagggatgtcagaggcagggagccattgtggccgtgacgggtgac
+ggggtgaacgactcccctgcattgaagaaggctgacattggcattgccatgggcatctct
+ggctctgacgtctctaagcaggcagccgacatgatcctgctggatgacaactttgcctcc
+atcgtcacgggggtggaggagggccgcctgatctttgacaacttgaagaaatccatcgcc
+tacaccctgaccagcaacatccccgagatcacccccttcctgctgttcatcattgccaac
+atccccctacctctgggcactgtgaccatcctttgcattgacctgggcacagatatggtc
+cctgccatctccttggcctatgaggcagctgagagtgatatcatgaagcggcagccacga
+aactcccagacggacaagctggtgaatgagaggctcatcagcatggcctacggacagatc
+gggatgatccaggcactgggtggcttcttcacctactttgtgatcctggcagagaacggt
+ttcctgccatcacggctactgggaatccgcctcgactgggatgaccggaccatgaatgat
+ctggaggacagctatggacaggagtggacctatgagcagcggaaggtggtggagttcacg
+tgccacacggcattctttgccagcatcgtggtggtgcagtgggctgacctcatcatctgc
+aagacccgccgcaactcagtcttccagcagggcatgaagaacaagatcctgatttttggg
+ctcctggaggagacggcgttggctgcctttctctcttactgcccaggcatgggtgtagcc
+ctccgcatgtacccgctcaaagtcacctggtggttctgcgccttcccctacagcctcctc
+atcttcatctatgatgaggtccgaaagctcatcctgcggcggtatcctggtgaccttgca
+atcacaaaaggttcttctggtgagtgcaagagcctgagactggaaaaggtggacttgtct
+cccagtcgaggctgctttcttcccacagttgagctcgggcagctctttctggggatagct
+atggggctttgggggaagaaagggacagtggctccccatgaccagagtccaagacgaaga
+cctaaaaaagggcttatcaagaaaaaaatggtgaagagggaaaaacagaagcgcaatatg
+gaggaactgaagaaggaagtggtcatggatgatcacaaattaaccttggaagagctgagc
+accaagtactccgtggacctgacaaagggccatagccaccaaagggcaaaggaaatcctg
+actcgaggtggacccaatactgttaccccaccccccaccactccagaatgggtcaaattc
+tgtaagcaactgttcggaggcttctccctcctactatggactggggccattctctgcttt
+gtggcctacagcatccagatatatttcaatgaggagcctaccaaagacaacctctacctg
+agcatcgtactgtccgtcgtggtcatcgtcactggctgcttctcctattatcaggaggcc
+aagagctccaagatcatggagtcttttaagaacatggtgcctcagcaagctctggtaatt
+cgaggaggagagaagatgcaaattaatgtacaagaggtggtgttgggagacctggtggaa
+atcaagggtggagaccgagtccctgctgacctccggcttatctctgcacaaggatgtaag
+gtggacaactcatccttgactggggagtcagaaccccagagccgctcccctgacttcacc
+catgagaaccctctggagacccgaaacatctgcttcttttccaccaactgtgtggaagga
+accgcccggggtattgtgattgctacgggagactccacagtgatgggcagaattgcctcc
+ctgacgtcaggcctggcggttggccagacacctatcgctgctgagatcgaacacttcatc
+catctgatcactgtggtggccgtcttccttggtgtcactttttttgcgctctcacttctc
+ttgggctatggttggctggaggctatcatttttctcattggcatcattgtggccaatgtg
+cctgaggggctgttggccacagtcactgtgtgcctgaccctcacagccaagcgcatggcg
+cggaagaactgcctggtgaagaacctggaggcggtggagacgctgggctccacgtccacc
+atctgctcagacaagacgggcaccctcacccagaaccgcatgaccgtcgcccacatgtgg
+tttgatatgaccgtgtatgaggccgacaccactgaagaacagactggaaaaacatttacc
+aagagctctgatacctggtttatgctggcccgaatcgctggcctctgcaaccgggctgac
+tttaaggctaatcaggagatcctgcccattgctaagagggccacaacaggtgatgcttcc
+gagtcagccctcctcaagttcatcgagcagtcttacagctctgtggcggagatgagagag
+aaaaaccccaaggtggcagagattccctttaattctaccaacaagtaccagatgtccatc
+caccttcgggaggacagctcccagacccacgtactgatgatgaagggtgctccggagagg
+atcttggagttttgttctacctttcttctgaatgggcaggagtactcaatgaacgatgaa
+atgaaggaagccttccaaaatgcctacttagaactgggaggtctgggggaacgtgtgcta
+ggcttctgcttcttgaatctgcctagcagcttctccaagggattcccatttaatacagat
+gaaataaatttccccatggacaacctttgttttgtgggcctcatatccatgattgaccct
+ccccgagctgcagtgcctgatgctgtgagcaagtgtcgcagtgcaggaattaaggtgatc
+atggtaacaggagatcatcccattacagctaaggccattgccaagggtgtgggcatcatc
+tcagaaggcactgagacggcagaggaagtcgctgcccggcttaagatccctatcagcaag
+gtcgatgccagtgctgccaaagccattgtggtgcatggtgcagaactgaaggacatacag
+tccaagcagcttgatcagatcctccagaaccaccctgagatcgtgtttgctcggacctcc
+cctcagcagaagctcatcattgtcgagggatgtcagaggctgggagccgttgtggccgtg
+acaggtgacggggtgaacgactcccctgcgctgaagaaggctgacattggcattgccatg
+ggcatctctggctctgacgtctctaagcaggcagccgacatgatcctgctggatgacaac
+tttgcctccatcgtcacgggggtggaggagggccgcctgatctttgacaacctgaagaaa
+tccatcatgtacaccctgaccagcaacatccccgagatcacgcccttcctgatgttcatc
+atcctcggtatacccctgcctctgggaaccataaccatcctctgcattgatctcggcact
+gacatggtccctgccatctccttggcttatgagtcagctgaaagcgacatcatgaagagg
+cttccaaggaacccaaagacggataatctggtgaaccaccgtctcattggcatggcctat
+ggacagattgggatgatccaggctctggctggattctttacctactttgtaatcctggct
+gagaatggttttaggcctgttgatctgctgggcatccgcctccactgggaagataaatac
+ttgaatgacctggaggacagctacggacagcagtggacctatgagcaacgaaaagttgtg
+gagttcacatgccaaacggccttttttgtcaccatcgtggttgtgcagtgggcggatctc
+atcatctccaagactcgccgcaactcacttttccagcagggcatgagaaacaaagtctta
+atatttgggatcctggaggagacactcttggctgcatttctgtcctacactccaggcatg
+gacgtggccctgcgaatgtacccactcaagataacctggtggctctgtgccattccctac
+agtattctcatcttcgtctatgatgaaatcagaaaactcctcatccgtcagcacccggat
+ggctgggtggaaagggagacgtactactaa
+
+
+Explanation
+
+Gn.Ex : gene number, exon number (for reference)
+Type  : Init = Initial exon
+        Intr = Internal exon
+        Term = Terminal exon
+        Sngl = Single-exon gene
+        Prom = Promoter
+        PlyA = poly-A signal
+S     : DNA strand (+ = input strand; - = opposite strand)
+Begin : beginning of exon or signal (numbered on input strand)
+End   : end point of exon or signal (numbered on input strand)
+Len   : length of exon or signal (bp)
+Fr    : reading frame (a codon ending at x is in frame f = x mod 3)
+Ph    : net phase of exon (length mod 3)
+I/Ac  : initiation signal or acceptor splice site score (x 10)
+Do/T  : donor splice site or termination signal score (x 10)
+CodRg : coding region score (x 10)
+P     : probability of exon (sum over all parses containing exon)
+Tscr  : exon score (depends on length, I/Ac, Do/T and CodRg scores)
+
+Comments
+
+The SCORE of a predicted feature (e.g., exon or splice site) is a
+log-odds measure of the quality of the feature based on local sequence
+properties. Thus, for example, a predicted donor splice site with
+score > 100 is excellent; 50-100 is acceptable; 0-50 is weak; and
+below 0 is poor (probably not a real donor site).
+
+The PROBABILITY of a predicted exon is the estimated probability under
+GENSCAN's model of genomic sequence structure that the exon is correct.
+This probability depends in general on global as well as local sequence
+properties.  This information can be used to assess the reliability of the
+predicted exon, e.g., it would be better to design PCR primers based on
+a predicted exon with probability > 0.95 than one with lower probability.
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.mzef
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.mzef	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/genomic-seq.mzef	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+ ENTER NAME OF THE SEQUENCE FILE (in single quotes)
+genomic-seq.fasta
+ ENTER 1 FOR FORWARD, 2 FOR REVERSE
+1
+ ENTER PRIOR PROBABILITY (suggesting .04)
+.04
+ ENTER OVER LAPPING NUMBER (suggesting 0)
+0
+ Internal coding exons predicted by MZEF
+ File_Name: genomic-se  Sequence_length: 168628  G+C_content:  0.492
+  Coordinates    P    Fr1   Fr2   Fr3  Orf   3ss   Cds   5ss
+  3192 -  3569 0.965 0.393 0.636 0.377 212 0.575 0.545 0.501
+  4476 -  4728 0.885 0.436 0.396 0.593 221 0.521 0.540 0.586
+  5744 -  6157 0.997 0.571 0.443 0.394 122 0.573 0.535 0.649
+  8751 -  8800 0.553 0.368 0.428 0.627 221 0.515 0.559 0.547
+ 13638 - 13818 0.689 0.624 0.563 0.402 112 0.561 0.592 0.499
+ 35165 - 35248 0.774 0.400 0.344 0.488 121 0.545 0.461 0.642
+ 75943 - 76047 1.000 0.711 0.398 0.463 122 0.545 0.619 0.555
+ 76229 - 76288 1.000 0.359 0.712 0.511 211 0.548 0.601 0.691
+ 78250 - 78453 0.992 0.596 0.404 0.427 112 0.567 0.537 0.566
+ 78980 - 79093 1.000 0.395 0.660 0.354 111 0.521 0.569 0.658
+ 79333 - 79467 0.997 0.648 0.407 0.372 122 0.511 0.559 0.640
+ 80173 - 80290 0.995 0.587 0.341 0.395 122 0.565 0.506 0.651
+ 82589 - 82857 0.996 0.603 0.334 0.402 122 0.532 0.531 0.555
+ 83689 - 83887 1.000 0.703 0.376 0.480 122 0.541 0.605 0.412
+ 84017 - 84126 0.996 0.627 0.395 0.445 112 0.514 0.564 0.562
+ 84303 - 84484 0.936 0.356 0.411 0.543 221 0.529 0.508 0.571
+ 85346 - 85535 0.904 0.422 0.656 0.421 112 0.535 0.577 0.562
+ 85666 - 85841 0.970 0.426 0.420 0.592 221 0.581 0.552 0.532
+ 96381 - 96446 0.960 0.389 0.351 0.560 221 0.490 0.503 0.657
+115714 -115756 0.508 0.509 0.504 0.381 112 0.529 0.537 0.611
+125505 -125588 0.980 0.400 0.383 0.641 121 0.536 0.538 0.515
+135080 -135131 0.540 0.630 0.571 0.413 112 0.482 0.586 0.517
+147055 -147214 0.914 0.436 0.393 0.577 221 0.498 0.540 0.619

Added: trunk/packages/bioperl/branches/upstream/current/t/data/gf-s71.needle
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/gf-s71.needle	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/gf-s71.needle	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+########################################
+# Program:  needle
+# Rundate:  Thu Jun 20 10:53:54 2002
+# Report_file: /tmp/jason/water/1/gf.s71.44_Y50C1A.2.water
+########################################
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: gf.s71.44
+# 2: Y50C1A.2
+# Matrix: EBLOSUM62
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 468
+# Identity:     285/468 (60.9%)
+# Similarity:   339/468 (72.4%)
+# Gaps:          82/468 (17.5%)
+# Score: 1394.0
+# 
+#
+#=======================================
+
+gf.s71.44          1 MEDVTLHHFRWRKPVENKNGEIVYKTSETQTAEISRKDVECVANFQKSQE     50
+                     ||||||..|.||||: ...||||||||||||.|.::||||||||||:::|
+Y50C1A.2           1 MEDVTLFQFTWRKPI-RLQGEIVYKTSETQTIETNKKDVECVANFQENKE     49
+
+gf.s71.44         51 SQTDDFMQNGVGDGIKKEIRISKEVLGHIYDFLRDDSKVNYDRLLEFHKF    100
+                     .|||. :.||||:.:||:|.||||||..:|||:|||||||||||||||||
+Y50C1A.2          50 VQTDS-VDNGVGENVKKDITISKEVLNLLYDFVRDDSKVNYDRLLEFHKF     98
+
+gf.s71.44        101 DKVSLETVQKYHVETRNENIILMISNSSRKTLILFGGLSHETFCSHQARA    150
+                     |||:|||||||||||||||||||||:|||||||||||:||||||||||||
+Y50C1A.2          99 DKVALETVQKYHVETRNENIILMISSSSRKTLILFGGISHETFCSHQARA    148
+
+gf.s71.44        151 VLCSSSTTSSLPLPVCAISAVFYSSTQFLLGDISGNISMWTKEKMIFENK    200
+                     :||||||:.|:|||||||||||||||||:|||:||||||.:|:|:|||.|
+Y50C1A.2         149 LLCSSSTSFSIPLPVCAISAVFYSSTQFILGDVSGNISMCSKDKIIFEKK    198
+
+gf.s71.44        201 VTDGSVTSLELCRYGLLSGSDDGNVILWKVEESKIEKIEGIKLTVSDLSR    250
+                     :|||:||.||:||:||||||||||:|||::..|.:||:.|.|||||||||
+Y50C1A.2         199 ITDGAVTCLEMCRHGLLSGSDDGNIILWQIGTSGLEKLGGTKLTVSDLSR    248
+
+gf.s71.44        251 KIRRSSTSNKPVAIVSMQV----SGDEVCVATETGGLYLLTLPTLESKPL    296
+                     |||||||||||||||||||    ||:|.|||||||||||||||||:.|||
+Y50C1A.2         249 KIRRSSTSNKPVAIVSMQVYVWPSGEEACVATETGGLYLLTLPTLDYKPL    298
+
+gf.s71.44        297 T-QSATSIFKILYEHPYIAVVYHTSNSAIFNSEGLVDEIPFVATLAVRCG    345
+                     : |:||||.|||:|:.::||:|||||:|:|||||||||||||||||||  
+Y50C1A.2         299 SHQTATSINKILFENQFVAVIYHTSNAAVFNSEGLVDEIPFVATLAVR--    346
+
+gf.s71.44        346 AYFIFSNQSRLIIWSMNTRSTVIDENLNCHS-ICSLSND-----------    383
+                             .:|:::  .|...|.|..|||.| ....:|:           
+Y50C1A.2         347 --------PKLVLF--YTSVCVQDITLNCTSPFREFNNEYNPVIKFSKIR    386
+
+gf.s71.44        384 ---TLQVLDGDFNLNSQSENSATSESENLRISDLQNLRMLKLQNLRTSEF    430
+                        .|.|::| |..:|.:.|:                             
+Y50C1A.2         387 FSADLSVING-FRTSSPNSNN                                 406
+
+gf.s71.44        431 QNFRTSESQYFKKDNGEL    448
+                                       
+Y50C1A.2         407                       406
+
+
+#---------------------------------------
+#---------------------------------------

Added: trunk/packages/bioperl/branches/upstream/current/t/data/glimmer.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/glimmer.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/glimmer.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,32 @@
+GlimmerM (Version 3.0)
+Sequence name: BAC1Contig11
+Sequence length: 31797 bp
+
+Predicted genes/exons
+
+Gene Exon Strand  Exon            Exon Range      Exon
+   #    #         Type                           Length
+
+   1    1  +  Initial       13907      13985       79
+   1    2  +  Internal      14117      14594      478
+   1    3  +  Internal      14635      14665       31
+   1    4  +  Internal      14746      15463      718
+   1    5  +  Terminal      15497      15606      110
+
+   2    1  +  Initial       20662      21143      482
+   2    2  +  Internal      21190      21618      429
+   2    3  +  Terminal      21624      21990      367
+
+   3    1  -  Single        25351      25485      135
+
+   4    1  +  Initial       27744      27804       61
+   4    2  +  Internal      27858      27952       95
+   4    3  +  Internal      28091      28576      486
+   4    4  +  Internal      28636      28647       12
+   4    5  +  Internal      28746      28792       47
+   4    6  +  Terminal      28852      28954      103
+
+   5    3  -  Terminal      29953      30037       85
+   5    2  -  Internal      30152      30235       84
+   5    1  -  Initial       30302      30318       17
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hemoglobinA.meg
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hemoglobinA.meg	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hemoglobinA.meg	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,33 @@
+#mega
+!Title: Amino acid sequences of hemoglobin alpha chains;
+!Format datatype=protein identical=. indel=-;
+
+[
+M. Nei and S. Kumar (2000) Molecular Evolution and Phylogenetics.  
+Oxford University Press. New York.
+Data for Example 2.1.
+]
+
+
+
+#Human    V-LSPADKTN VKAAWGKVGA HAGEYGAEAL ERMFLSFPTT KTYFPHF-DL SHGSAQVKGH 
+#Horse    ....A..... .....S...G .......... .....G.... .......... ........A. 
+#Cow      ....A...G. .........G ..A....... .......... .......... .......... 
+#Kangaroo ....A...GH ...I.....G .....A..G. ..T.H..... .......... ......IQA. 
+#Newt     MK..AE..H. ..TT.DHIKG .EEAL..... F...T.L.A. R....AK... .E..SFLHS. 
+#Carp     S...DK..AA ..I..A.ISP K.DDI..... G..LTVY.Q. ....A.WA.. .P..GP..-. 
+
+#Human    GKKVA-DALT NAVAHVDDMP NALSALSDLH AHKLRVDPVN FKLLSHCLLV TLAAHLPAEF 
+#Horse    .......G.. L..G.L..L. G...D..N.. .......... .........S ...V...ND. 
+#Cow      .A....A... K..E.L..L. G...E..... .......... ......S... ...S...SD. 
+#Kangaroo ...I.....G Q..E.I..L. GT..K..... .......... .......... .F....GDA. 
+#Newt     ....M.G..S .....I..ID A..CK...K. .QD.M...A. .PK.A.NI.. VMGI..K.HL 
+#Carp     ....IMG.VG D..SKI..LV GG.AS..E.. .S......A. ..I.ANHIV. GIMFY..GD. 
+
+#Human    TPAVHASLDK FLASVSTVLT SKYR
+#Horse    .......... ..S....... ....
+#Cow      .......... ...N...... ....
+#Kangaroo ..E....... ...A...... ....
+#Newt     .YP..C.V.. ..DV.GH... ....
+#Carp     P.E..M.V.. .FQNLALA.S E...
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hg16_chroms.gff
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hg16_chroms.gff	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hg16_chroms.gff	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+##sequence-region chr1 1 246127941
+##sequence-region chr2 1 243615958
+##sequence-region chr3 1 199344050
+##sequence-region chr4 1 191731959
+##sequence-region chr5 1 181034922
+##sequence-region chr6 1 170914576
+##sequence-region chr7 1 158545518
+##sequence-region chr8 1 146308819
+##sequence-region chr9 1 136372045
+##sequence-region chrM 1 16571
+##sequence-region chrX 1 153692391
+##sequence-region chrY 1 50286555
+##sequence-region chr1_random 1 6515988
+##sequence-region chr2_random 1 1104831
+##sequence-region chr3_random 1 749256
+##sequence-region chr4_random 1 648024
+##sequence-region chr5_random 1 143687
+##sequence-region chr6_random 1 2055751
+##sequence-region chr7_random 1 632637
+##sequence-region chr8_random 1 1499381
+##sequence-region chr9_random 1 2766341
+##sequence-region chrX_random 1 3403558
+##sequence-region chr10 1 135037215
+##sequence-region chr11 1 134482954
+##sequence-region chr12 1 132078379
+##sequence-region chr13 1 113042980
+##sequence-region chr14 1 105311216
+##sequence-region chr15 1 100256656
+##sequence-region chr16 1 90041932
+##sequence-region chr17 1 81860266
+##sequence-region chr18 1 76115139
+##sequence-region chr19 1 63811651
+##sequence-region chr20 1 63741868
+##sequence-region chr21 1 46976097
+##sequence-region chr22 1 49396972
+##sequence-region chr10_random 1 1043775
+##sequence-region chr13_random 1 189598
+##sequence-region chr15_random 1 1132826
+##sequence-region chr17_random 1 2549222
+##sequence-region chr18_random 1 4262
+##sequence-region chr19_random 1 92689
+##sequence-region chrUn_random 1 3349625

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+hmmpfam - search a single seq against HMM database
+HMMER 2.1.1 (Dec 1998)
+Copyright (C) 1992-1998 Washington University School of Medicine
+HMMER is freely distributed under the GNU General Public License (GPL).
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                 pfam
+Sequence file:            /home/birney/src/wise2/example/road.pep
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Query:  roa1_drome  
+
+Scores for sequence family classification (score includes all domains):
+Model    Description                                    Score    E-value  N 
+-------- -----------                                    -----    ------- ---
+SEED                                                    146.1    6.3e-40   2
+
+Parsed for domains:
+Model    Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+-------- ------- ----- -----    ----- -----      -----  -------
+SEED       1/2      33   103 ..     1    77 []    71.2  2.2e-17
+SEED       2/2     124   194 ..     1    77 []    75.5  1.1e-18
+
+Alignments of top-scoring domains:
+SEED: domain 1 of 2, from 33 to 103: score 71.2, E = 2.2e-17
+                   *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf
+                      lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf
+  roa1_drome    33    LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGF 74   
+
+                   aFVeFeseedAekAlealnG.kelggrklrv<-*
+                   +F+++  ++  + A +    +++++gr+++    
+  roa1_drome    75 GFITYSHSSMIDEAQK--SRpHKIDGRVVEP    103  
+
+SEED: domain 2 of 2, from 124 to 194: score 75.5, E = 1.1e-18
+                   *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf
+                      lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +Gf
+  roa1_drome   124    LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGF 165  
+
+                   aFVeFeseedAekAlealnGkelggrklrv<-*
+                   aFVeF++++ ++k +     ++l+g+ + v   
+  roa1_drome   166 AFVEFDDYDPVDKVVL-QKQHQLNGKMVDV    194  
+
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam_fake.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam_fake.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam_fake.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,55 @@
+hmmpfam - search a single seq against HMM database
+HMMER 2.1.1 (Dec 1998)
+Copyright (C) 1992-1998 Washington University School of Medicine
+HMMER is freely distributed under the GNU General Public License (GPL).
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                 pfam
+Sequence file:            /home/birney/src/wise2/example/road.pep
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Query:  roa1_drome  
+
+Scores for sequence family classification (score includes all domains):
+Model    Description                                    Score    E-value  N 
+-------- -----------                                    -----    ------- ---
+SEED                                                    146.1    6.3e-40   2
+TEST                                                      5.0        7.2   1
+
+Parsed for domains:
+Model    Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+-------- ------- ----- -----    ----- -----      -----  -------
+SEED       1/2      33   103 ..     1    77 []    71.2  2.2e-17
+SEED       2/2     124   194 ..     1    77 []    75.5  1.1e-18
+
+Alignments of top-scoring domains:
+SEED: domain 1 of 2, from 33 to 103: score 71.2, E = 2.2e-17
+                   *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf
+                      lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf
+  roa1_drome    33    LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGF 74   
+
+                   aFVeFeseedAekAlealnG.kelggrklrv<-*
+                   +F+++  ++  + A +    +++++gr+++    
+  roa1_drome    75 GFITYSHSSMIDEAQK--SRpHKIDGRVVEP    103  
+
+SEED: domain 2 of 2, from 124 to 194: score 75.5, E = 1.1e-18
+                   *->lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGf
+                      lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +Gf
+  roa1_drome   124    LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGF 165  
+
+                   aFVeFeseedAekAlealnGkelggrklrv<-*
+                   aFVeF++++ ++k +     ++l+g+ + v   
+  roa1_drome   166 AFVEFDDYDPVDKVVL-QKQHQLNGKMVDV    194  
+
+//
+Query:  roa2_drome  
+
+Scores for sequence family classification (score includes all domains):
+Model    Description                                    Score    E-value  N 
+-------- -----------                                    -----    ------- ---
+SEED                                                    146.1    6.3e-40   2
+
+Parsed for domains:
+Model    Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+-------- ------- ----- -----    ----- -----      -----  -------
+SEED       1/2      33   103 ..     1    77 []    71.2  2.2e-17
+SEED       2/2     124   194 ..     1    77 []    75.5  1.1e-18
+//
\ No newline at end of file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/hmmpfam_fake.out
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hmmsearch.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hmmsearch.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hmmsearch.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2170 @@
+hmmsearch - search a sequence database with a profile HMM
+HMMER 2.0 (June 1998)
+Copyright (C) 1992-1998 Washington University School of Medicine
+HMMER is freely distributed under the GNU General Public License (GPL).
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+HMM file:                 HMM [SEED]
+Sequence database:        HMM.dbtemp.29591
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+Query HMM:  SEED  
+  [HMM has been calibrated; E-values are empirical estimates]
+
+Scores for complete sequences (score includes all domains):
+Sequence   Description                                  Score    E-value  N 
+--------   -----------                                  -----    ------- ---
+PAB2_ARATH P42731 POLYADENYLATE-BINDING PROTEIN 2 (PO   393.8   6.1e-114   4
+Q13310     Q13310 INDUCIBLE POLY(A)-BINDING PROTEIN.    385.6   1.8e-111   4
+Q93004     Q93004 POLY(A)-BINDING PROTEIN.              384.2   4.6e-111   4
+PABP_MOUSE P29341 POLYADENYLATE-BINDING PROTEIN (POLY   383.5   7.5e-111   4
+O22173     O22173 PUTATIVE POLY(A)-BINDING PROTEIN.     371.0   4.3e-107   4
+P87135     P87135 POLYADENYLATE-BINDING PROTEIN PABPP   370.6   5.7e-107   4
+PABP_SCHPO P31209 POLYADENYLATE-BINDING PROTEIN (POLY   370.6   5.7e-107   4
+PABP_YEAST P04147 POLYADENYLATE-BINDING PROTEIN, CYTO   364.7   3.5e-105   4
+PABP_HUMAN P11940 POLYADENYLATE-BINDING PROTEIN (POLY   364.2   4.7e-105   4
+PABP_XENLA P20965 POLYADENYLATE-BINDING PROTEIN (POLY   362.6   1.5e-104   4
+P93616     P93616 POLY(A)-BINDING PROTEIN.              362.1   2.1e-104   4
+Q62029     Q62029 POLY A BINDING PROTEIN 2 (POLYA BIN   356.6   9.3e-103   4
+PAB5_ARATH Q05196 POLYADENYLATE-BINDING PROTEIN 5 (PO   354.4   4.4e-102   4
+Q39953     Q39953 POLY(A)-MRNA BINDING PROTEIN.         348.3     3e-100   4
+PABP_DROME P21187 POLYADENYLATE-BINDING PROTEIN (POLY   342.4    1.8e-98   4
+Q15097     Q15097 POLYADENYLATE BINDING PROTEIN II.     333.6    7.8e-96   4
+Q19581     Q19581 F18H3.3B.                             329.1    1.8e-94   4
+Q19579     Q19579 F18H3.3A.                             329.1    1.8e-94   4
+Q17350     Q17350 POLYADENYLATE-BINDING PROTEIN.        328.4      3e-94   4
+Q92227     Q92227 PUTATIVE POLY(A)-BINDING PROTEIN FA   324.6    3.9e-93   4
+O04319     O04319 POLY(A)-BINDING PROTEIN ISOLOG.       292.2    2.3e-83   4
+TIA1_MOUSE P52912 NUCLEOLYSIN TIA-1 (RNA BINDING PROT   288.1    3.9e-82   3
+TIA1_HUMAN P31483 NUCLEOLYSIN TIA-1.                    285.7      2e-81   3
+Q27335     Q27335 POLY(A) BINDING PROTEIN.              280.8    6.3e-80   4
+Q12926     Q12926 ELAV-LIKE NEURONAL PROTEIN 1.         271.3    4.5e-77   3
+Q13235     Q13235 ELAV-LIKE NEURONAL PROTEIN 2 HEL-N2   271.3    4.5e-77   3
+NUCL_CHICK P15771 NUCLEOLIN (PROTEIN C23).              270.9      6e-77   4
+Q60899     Q60899 HU-ANTIGEN D (NERVOUS SYSTEM-SPECIF   270.3    8.9e-77   3
+Q24474     Q24474 RNA-BINDING PROTEIN.                  268.9    2.3e-76   3
+HUD_MOUSE  Q61701 PARANEOPLASTIC ENCEPHALOMYELITIS AN   268.7    2.7e-76   3
+HUD_HUMAN  P26378 PARANEOPLASTIC ENCEPHALOMYELITIS AN   268.7    2.7e-76   3
+HUD_RAT    O09032 PARANEOPLASTIC ENCEPHALOMYELITIS AN   268.7    2.7e-76   3
+O13620     O13620 HYPOTHETICAL 93.7 KD PROTEIN.         267.3    7.1e-76   5
+Q91585     Q91585 RIBONUCLEOPROTEIN.                    267.3    7.2e-76   3
+Q60900     Q60900 HU-ANTIGEN C (MHUC-L).                262.0    2.8e-74   3
+Q91583     Q91583 RIBONUCLEOPROTEIN.                    261.9      3e-74   3
+TIAR_HUMAN Q01085 NUCLEOLYSIN TIAR (TIA-1 RELATED PRO   261.8    3.2e-74   3
+Q91584     Q91584 RIBONUCLEOPROTEIN.                    261.5    3.9e-74   3
+Q90409     Q90409 RIBONUCLEOPROTEIN.                    260.6    7.4e-74   3
+P79736     P79736 ELAV/HUC HOMOLOG.                     260.0    1.1e-73   3
+Q91903     Q91903 XEL-1.                                259.2    1.9e-73   3
+Q06106     Q06106 SIMILAR TO POLYADENYLATE-BINDING PR   258.2      4e-73   5
+Q24473     Q24473 RNA-BINDING PROTEIN.                  256.8    1.1e-72   3
+Q26293     Q26293 RRM9 (FRAGMENT).                      255.9      2e-72   3
+Q14576     Q14576 (HUC).                                255.2    3.1e-72   3
+Q16135     Q16135 NEURON-SPECIFIC RNA RECOGNITION MOT   254.2    6.2e-72   3
+GBP2_YEAST P25555 SINGLE-STRAND TELOMERIC DNA-BINDING   253.6    9.6e-72   3
+Q91582     Q91582 RIBONUCLEOPROTEIN.                    253.2    1.3e-71   3
+Q15717     Q15717 HUR RNA BINDING PROTEIN.              250.3    9.7e-71   3
+Q20084     Q20084 F35H8.5.                              247.9    4.9e-70   3
+PUB1_YEAST P32588 NUCLEAR AND CYTOPLASMIC POLYADENYLA   247.8    5.3e-70   3
+TIAR_MOUSE P70318 NUCLEOLYSIN TIAR (TIA-1 RELATED PRO   243.5    1.1e-68   3
+Q06459     Q06459 NUCLEOLIN.                            242.4    2.3e-68   4
+ELAV_DROVI P23241 ELAV PROTEIN.                         241.8    3.3e-68   3
+ELAV_DROME P16914 ELAV PROTEIN (EMBRYONIC LETHAL ABNO   241.8    3.3e-68   3
+NUCL_XENLA P20397 NUCLEOLIN (PROTEIN C23).              240.9    6.4e-68   4
+P70372     P70372 ELAV G HOMOLOG.                       238.8    2.8e-67   3
+HRB1_YEAST P38922 HRB1 PROTEIN (TOM34 PROTEIN).         228.9    2.5e-64   3
+NUCL_MESAU P08199 NUCLEOLIN (PROTEIN C23).              225.1    3.5e-63   4
+ROM_HUMAN  P52272 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   224.6    5.3e-63   3
+NUCL_MOUSE P09405 NUCLEOLIN (PROTEIN C23).              223.4    1.2e-62   4
+NUCL_RAT   P13383 NUCLEOLIN (PROTEIN C23).              223.1    1.5e-62   4
+Q09959     Q09959 HYPOTHETICAL 49.8 KD PROTEIN C18A3.   221.0    6.4e-62   3
+Q08212     Q08212 NUCLEOLYSIN TIAR HOMOLOG (TIA-1 REL   220.3      1e-61   3
+Q91579     Q91579 RIBONUCLEOPROTEIN.                    213.1    1.5e-59   3
+P93843     P93843 DNA BINDING PROTEIN ACBF.             208.7      3e-58   3
+NUCL_HUMAN P19338 NUCLEOLIN (PROTEIN C23).              207.1    9.4e-58   4
+Q40270     Q40270 RNA-BINDING PROTEIN PRECURSOR.        207.0      1e-57   2
+RO31_NICSY P19683 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN   206.3    1.7e-57   2
+RO28_SPIOL P28644 CHLOROPLAST 28 KD RIBONUCLEOPROTEIN   205.8    2.4e-57   2
+PES4_YEAST P39684 PES4 PROTEIN (DNA POLYMERASE EPSILO   203.3    1.3e-56   4
+P92871     P92871 RNA-BINDING PROTEIN 2 (RNA-BINDING    202.5    2.4e-56   2
+Q39209     Q39209 RNA BINDING PROTEIN (FRAGMENT).       202.5    2.4e-56   2
+Q43350     Q43350 CP31 PRECURSOR.                       202.5    2.4e-56   2
+RO31_ARATH Q04836 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN   202.5    2.4e-56   2
+RO28_NICSY P19682 CHLOROPLAST 28 KD RIBONUCLEOPROTEIN   200.0    1.3e-55   2
+O18409     O18409 TESTIS-SPECIFIC RNP-TYPE RNA BINDIN   199.8    1.5e-55   3
+O02374     O02374 BRUNO.                                199.8    1.5e-55   3
+Q41834     Q41834 NUCLEIC ACID-BINDING PROTEIN PRECUR   199.7    1.6e-55   2
+Q15164     Q15164 POLYADENYLATE BINDING PROTEIN II (F   199.7    1.6e-55   2
+NSR1_YEAST P27476 NUCLEAR LOCALIZATION SEQUENCE BINDI   199.0    2.6e-55   2
+Q08935     Q08935 CHLOROPLAST 29 KD RIBONUCLEOPROTEIN   198.0    5.3e-55   2
+O24306     O24306 RIBONUCLEOPROTEIN.                    197.0    1.1e-54   2
+O23798     O23798 PS16 PROTEIN.                         195.8    2.3e-54   2
+NOP4_YEAST P37838 NUCLEOLAR PROTEIN NOP4 (NUCLEOLAR P   194.4    6.3e-54   4
+Q39062     Q39062 CHLOROPLAST RNA-BINDING PROTEIN CP3   190.4      1e-52   2
+Q39061     Q39061 CHLOROPLAST RNA-BINDING PROTEIN CP3   190.4      1e-52   2
+RO30_NICPL P49313 CHLOROPLAST 30 KD RIBONUCLEOPROTEIN   189.6    1.8e-52   2
+Q08948     Q08948 CHLOROPLAST 33 KD RIBONUCLEOPROTEIN   189.5    1.9e-52   2
+RO33_NICSY P19684 CHLOROPLAST 33 KD RIBONUCLEOPROTEIN   188.9    2.9e-52   2
+Q99628     Q99628 SIAH BINDING PROTEIN 1 (FRAGMENT).    188.9      3e-52   3
+Q43349     Q43349 CP29.                                 188.8      3e-52   2
+Q41367     Q41367 24 KDA RNA BINDING PROTEIN (FRAGMEN   188.8    3.1e-52   2
+GAR2_SCHPO P41891 GAR2 PROTEIN.                         188.0    5.4e-52   2
+O13707     O13707 PRE-RIBOPSOMAL PARTICLE ASSEMBLY PR   188.0    5.4e-52   2
+RO31_NICPL P49314 CHLOROPLAST 31 KD RIBONUCLEOPROTEIN   187.3      9e-52   2
+Q08937     Q08937 CHLOROPLAST 29 KD RIBONUCLEOPROTEIN   186.9    1.1e-51   2
+Q01491     Q01491 COLONY 1.                             184.8    4.8e-51   4
+Q17385     Q17385 ELAV-TYPE RIBONUCLEOPROTEIN.          183.7    1.1e-50   3
+MODU_DROME P13469 DNA-BINDING PROTEIN MODULO.           182.8      2e-50   4
+O17310     O17310 SEX-LETHAL PROTEIN.                   182.3    2.7e-50   2
+O17309     O17309 SEX-LETHAL PROTEIN (FRAGMENT).        182.3    2.7e-50   2
+YHH5_YEAST P38760 HYPOTHETICAL 75.9 KD PROTEIN IN SPO   181.3    5.4e-50   3
+NAM8_YEAST Q00539 NAM8 PROTEIN.                         181.3    5.5e-50   3
+P70055     P70055 RNA BINDING PROTEIN ETR-3.            180.3    1.1e-49   3
+Q92879     Q92879 CUG-BP/HNAB50.                        177.5    7.6e-49   3
+ROA1_RAT   P04256 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   177.3    8.9e-49   2
+ROA1_MOUSE P49312 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   177.3    8.9e-49   2
+ROA1_HUMAN P09651 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   177.3    8.9e-49   2
+ROA1_BOVIN P09867 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   177.3    8.9e-49   2
+Q99141     Q99141 SEX-LETHAL PROTEIN, ALTERNATIVELY S   177.0    1.1e-48   2
+SXLF_DROME P19339 SEX-LETHAL PROTEIN, FEMALE-SPECIFIC   177.0    1.1e-48   2
+Q24668     Q24668 SEX-LETHAL GENE.                      177.0    1.1e-48   2
+O13845     O13845 HYPOTHETICAL 69.4 KD PROTEIN.         176.6    1.4e-48   3
+Q92950     Q92950 ETR-3.                                176.6    1.5e-48   3
+Q60668     Q60668 AU-RICH ELEMENT RNA-BINDING PROTEIN   175.3    3.5e-48   2
+SP49_HUMAN Q15427 SPLICEOSOME ASSOCIATED PROTEIN 49 (   173.8      1e-47   2
+SQD_DROME  Q08473 RNA-BINDING PROTEIN SQUID (HETEROGE   173.5    1.2e-47   2
+O15187     O15187 T-CLUSTER BINDING PROTEIN.            173.4    1.3e-47   2
+Q12771     Q12771 P37 AUF1 RNA-BINDING PROTEIN.         172.9    1.9e-47   2
+Q14103     Q14103 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   172.9    1.9e-47   2
+Q14100     Q14100 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   172.9    1.9e-47   2
+Q14102     Q14102 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   172.9    1.9e-47   2
+Q14101     Q14101 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   172.9    1.9e-47   2
+Q39568     Q39568 GBP1P.                                172.8    2.1e-47   2
+PR24_YEAST P49960 U4/U6 SNRNA-ASSOCIATED SPLICING FAC   172.6    2.3e-47   3
+CABA_MOUSE Q99020 CARG-BINDING FACTOR-A (CBF-A).        171.7    4.4e-47   2
+ROA1_MACMU Q28521 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   171.1    6.4e-47   2
+Q93194     Q93194 C07A4.1.                              170.8    8.3e-47   2
+O01671     O01671 SEX-LETHAL PROTEIN.                   169.9    1.5e-46   2
+Q14498     Q14498 SPLICING FACTOR.                      169.5      2e-46   3
+Q14499     Q14499 SPLICING FACTOR.                      169.5      2e-46   3
+Q01858     Q01858 EII BINDING PROTEIN (HETEROGENEOUS    169.3    2.2e-46   2
+Q63568     Q63568 POLYPYRIMIDINE TRACT BINDING PROTEI   169.1    2.6e-46   4
+PTB_RAT    Q00438 POLYPYRIMIDINE TRACT-BINDING PROTEI   169.1    2.6e-46   4
+Q08940     Q08940 PUTATIVE CHLOROPLAST 33 KD RIBONUCL   167.7    6.9e-46   2
+Q90602     Q90602 SINGLE STRANDED D BOX BINDING FACTO   167.4    8.2e-46   2
+NGR1_YEAST P32831 NEGATIVE GROWTH REGULATORY PROTEIN    165.5    3.1e-45   3
+ROA1_XENLA P17130 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   165.1    4.2e-45   2
+Q60901     Q60901 HU-ANTIGEN C (MHUC-S) (FRAGMENT).     164.8    5.1e-45   2
+PTB_HUMAN  P26599 POLYPYRIMIDINE TRACT-BINDING PROTEI   163.6    1.1e-44   4
+Q39675     Q39675 (CEBP-1).                             163.1    1.7e-44   2
+O14979     O14979 A+U-RICH ELEMENT RNA BINDING FACTOR   160.3    1.2e-43   2
+Q99729     Q99729 ABBP-1.                               160.1    1.3e-43   2
+Q04150     Q04150 HETEROGENEOUS RIBONUCLEOPROTEIN C (   160.1    1.3e-43   2
+Q00880     Q00880 CUTINASE NEGATIVE ACTING PROTEIN.     160.0    1.4e-43   2
+Q17352     Q17352 RRM-TYPE RNA BINDING PROTEIN.         159.2    2.5e-43   2
+YP85_CAEEL Q09442 HYPOTHETICAL 40.9 KD PROTEIN C08B11   159.2    2.5e-43   2
+Q41124     Q41124 CHLOROPLAST RNA BINDING PROTEIN PRE   158.9      3e-43   2
+NAB4_YEAST Q99383 NUCLEAR POLYADENYLATED RNA-BINDING    158.8    3.3e-43   2
+O22791     O22791 PUTATIVE RIBONUCLEOPROTEIN.           158.4    4.2e-43   2
+ROA2_HUMAN P22626 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   157.2    9.9e-43   2
+RO32_XENLA P51992 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   156.1    2.1e-42   2
+Q90626     Q90626 RIBONUCLEOPROTEIN.                    156.1    2.1e-42   2
+O23093     O23093 SIMILAR TO NUCLEOLIN PROTEIN.         155.1    4.3e-42   3
+Q23795     Q23795 HNRNP PROTEIN.                        154.6    6.2e-42   2
+RB27_DROME P48809 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   154.4    6.7e-42   2
+Q17201     Q17201 BMSQD-2.                              154.4    7.2e-42   2
+Q17200     Q17200 BMSQD-1.                              154.4    7.2e-42   2
+PTB_PIG    Q29099 POLYPYRIMIDINE TRACT-BINDING PROTEI   154.3    7.3e-42   4
+Q90407     Q90407 RIBONUCLEOPROTEIN (FRAGMENT).         153.8      1e-41   2
+Q15584     Q15584 HTGR 1 MRNA.                          152.8    2.1e-41   2
+RO31_XENLA P51968 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   151.9    3.8e-41   2
+O04240     O04240 RNA- OR SSDNA-BINDING PROTEIN (FRAG   151.9    3.8e-41   2
+RO22_XENLA P51990 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   151.9    3.9e-41   2
+Q61474     Q61474 MUSASHI-1 HOMOLOG (RNA-BINDING PROT   151.6    4.9e-41   2
+RO21_XENLA P51989 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   151.1    6.6e-41   2
+ROA3_HUMAN P51991 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   150.5      1e-40   2
+Q60690     Q60690 MYELIN BASIC PROTEIN EXPRESSION FAC   149.1    2.7e-40   2
+Q91807     Q91807 NERVOUS SYSTEM-SPECIFIC RNA-BINDING   148.8    3.4e-40   2
+Q91920     Q91920 RIBONUCLEOPROTEIN.                    148.8    3.4e-40   2
+O08752     O08752 MLARK.                                148.8    3.4e-40   2
+Q22037     Q22037 HNRNP LIKE PROTEIN.                   148.8    3.4e-40   2
+Q91808     Q91808 NERVOUS SYSTEM-SPECIFIC RNA-BINDING   148.7    3.5e-40   2
+ROA1_SCHAM P21522 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   147.5    8.3e-40   2
+O02916     O02916 HLARK.                                147.4      9e-40   2
+Q24486     Q24486 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   147.2      1e-39   2
+RB87_DROME P48810 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   147.2      1e-39   2
+Q24847     Q24847 SURFACE ANTIGEN.                      146.6    1.5e-39   2
+ROA1_DROME P07909 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   146.1    2.2e-39   2
+Q99361     Q99361 HETEROGENEOUS RIBONUCLEOPROTEIN A1    146.1    2.2e-39   2
+Q24360     Q24360 NUCLEAR RIBONUCLEOPROTEIN.            146.1    2.2e-39   2
+Q24359     Q24359 NUCLEAR RIBONUCLEOPROTEIN.            146.1    2.2e-39   2
+O22855     O22855 HYPOTHETICAL PROTEIN.                 145.2      4e-39   3
+Q24409     Q24409 MUSASHI.                              145.2    4.1e-39   2
+Q13151     Q13151 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   145.1    4.4e-39   2
+Q21911     Q21911 R10E9.1.                              144.8    5.3e-39   2
+SR55_DROME P26686 SERINE-ARGININE PROTEIN 55 (SRP55)    144.8    5.6e-39   2
+Q24252     Q24252 52-KD BRACKETING PROTEIN.             144.8    5.6e-39   2
+Q23796     Q23796 HNRNP PROTEIN.                        144.2    7.9e-39   2
+SP33_HUMAN Q07955 PRE-MRNA SPLICING FACTOR SF2, P33 S   142.6    2.5e-38   2
+O04425     O04425 FLOWERING TIME CONTROL PROTEIN FCA.   140.4    1.1e-37   2
+O23475     O23475 FCA GAMMA.                            140.4    1.1e-37   2
+RB97_DROME Q02926 RIBONUCLEOPROTEIN RB97D.              139.7    1.9e-37   2
+Q41042     Q41042 PROTEIN LOCALIZED IN THE NUCLEOLI.    139.4    2.2e-37   2
+SR75_HUMAN Q08170 PRE-MRNA SPLICING FACTOR SRP75.       138.8    3.5e-37   2
+U2AF_HUMAN P26368 SPLICING FACTOR U2AF 65 KD SUBUNIT    138.0    5.8e-37   3
+U2AF_MOUSE P26369 SPLICING FACTOR U2AF 65 KD SUBUNIT    138.0    5.8e-37   3
+Q13809     Q13809 ALTERNATIVE SPLICING FACTOR.          137.7    7.4e-37   2
+YG5B_YEAST P53316 HYPOTHETICAL 89.5 KD PROTEIN IN MGA   136.4    1.8e-36   3
+Q15020     Q15020 ORF.                                  136.3      2e-36   2
+Q13242     Q13242 SPLICING FACTOR, ARGININE/SERINE RI   136.3      2e-36   2
+P92966     P92966 SPLICING FACTOR.                      133.6    1.3e-35   2
+O23189     O23189 RNA-BINDING PROTEIN HOMOLOG.          132.7    2.4e-35   3
+Q26692     Q26692 TBRRM1.                               132.7    2.5e-35   3
+Q13245     Q13245 SRP55-3 PRE-MRNA SPLICING FACTOR (F   132.6    2.6e-35   2
+Q13247     Q13247 SRP55-1 PRE-MRNA SPLICING FACTOR.     132.6    2.6e-35   2
+MSSP_HUMAN P29558 SINGLE-STRANDED DNA-BINDING PROTEIN   131.9      4e-35   2
+O23212     O23212 SPLICING FACTOR HOMOLOG.              129.4    2.4e-34   2
+CL4_RAT    Q09167 INSULIN-INDUCED GROWTH RESPONSE PRO   128.8    3.6e-34   2
+Q13243     Q13243 SRP40-1 PRE-MRNA SPLICING FACTOR.     128.8    3.6e-34   2
+P92965     P92965 ARGININE/SERINE-RICH SPLICING FACTO   128.2    5.5e-34   2
+RU1A_HUMAN P09012 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN    127.6    7.9e-34   2
+Q27199     Q27199 NUCLEOLAR PHOSPHOPROTEIN.             127.2    1.1e-33   2
+Q14869     Q14869 MSSP-2 MRNA.                          126.6    1.6e-33   2
+Q15433     Q15433 SCR2.                                 126.6    1.6e-33   2
+Q26658     Q26658 ACTIVATOR PROTEIN.                    126.2    2.1e-33   2
+Q19335     Q19335 HYPOTHETICAL PROTEIN F11A10.2.        125.5    3.6e-33   2
+NONA_DROME Q04047 NO-ON-TRANSIENT A PROTEIN.            125.4    3.7e-33   2
+RU1A_XENLA P45429 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN    125.4    3.8e-33   2
+PSF_HUMAN  P23246 PTB-ASSOCIATED SPLICING FACTOR (PSF   124.6    6.3e-33   2
+Q62189     Q62189 SMALL NUCLEAR RNA.                    124.4    7.3e-33   2
+PTB_MOUSE  P17225 POLYPYRIMIDINE TRACT-BINDING PROTEI   124.3    7.9e-33   4
+P92964     P92964 SPLICING FACTOR.                      124.2    8.8e-33   2
+O00201     O00201 NUCLEAR MATRIX PROTEIN 55.            123.7    1.3e-32   2
+Q12786     Q12786 54 KDA PROTEIN.                       123.7    1.3e-32   2
+O35737     O35737 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   123.6    1.3e-32   3
+ROH1_HUMAN P31943 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   123.6    1.3e-32   3
+P90727     P90727 SPLICING FACTOR U2AF65.               122.5    2.8e-32   3
+Q24024     Q24024 TESTIS-SPECIFIC-RRM-PROTEIN.          122.3    3.1e-32   2
+ROH2_HUMAN P55795 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   121.6    5.2e-32   3
+Q24534     Q24534 SPLICEOSOMAL PROTEIN.                 120.7    9.9e-32   2
+P70333     P70333 MURINE HOMOLOG OF HUMAN FTP-3.        120.6      1e-31   3
+P78814     P78814 FISSION YEAST (FRAGMENT).             120.5    1.1e-31   2
+Q24261     Q24261 BJ6 PROTEIN.                          120.2    1.4e-31   2
+Q15434     Q15434 SCR3.                                 120.2    1.4e-31   2
+O13759     O13759 RNA BINDING POST-TRANSCRIPTIONAL RE   120.0    1.5e-31   2
+Q94901     Q94901 RNA-BINDING PROTEIN LARK.             120.0    1.6e-31   2
+Q91581     Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN   119.7      2e-31   1
+CST2_HUMAN P33240 CLEAVAGE STIMULATION FACTOR, 64 KD    119.7      2e-31   1
+NOP3_YEAST Q01560 NUCLEOLAR PROTEIN 3 (MITOCHONDRIAL    119.5    2.2e-31   2
+Q24562     Q24562 RNA BINDING PROTEIN.                  118.7      4e-31   3
+Q40363     Q40363 NUM1 PROTEIN.                         118.5    4.6e-31   2
+Q63887     Q63887 NONO.                                 117.2    1.1e-30   2
+RU2B_HUMAN P08579 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN    116.7    1.5e-30   2
+Q24113     Q24113 NO-ON TRANSIENT A-LIKE PROTEIN (FRA   116.3      2e-30   2
+Q17430     Q17430 B0035.12.                             114.6    6.5e-30   2
+RU1A_DROME P43332 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN    114.3    8.1e-30   2
+Q61413     Q61413 COLD INDUCIBLE RNA-BINDING PROTEIN    114.2    8.9e-30   1
+Q14011     Q14011 GLYCINE-RICH RNA BINDING PROTEIN CI   114.2    9.1e-30   1
+P93486     P93486 GLYCINE-RICH RNA-BINDING PROTEIN PS   114.1    9.6e-30   1
+YHC4_YEAST P38741 HYPOTHETICAL 80.1 KD PROTEIN IN SNF   113.6    1.4e-29   2
+GRP_DAUCA  Q03878 GLYCINE-RICH RNA-BINDING PROTEIN.     113.4    1.6e-29   1
+GR10_BRANA Q05966 GLYCINE-RICH RNA-BINDING PROTEIN 10   113.0      2e-29   1
+ROG_HUMAN  P38159 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   113.0      2e-29   1
+O23288     O23288 RIBONUCLEOPROTEIN HOMOLOG.            111.4      6e-29   2
+O24106     O24106 RNA-BINDING PROTEIN.                  110.9    8.9e-29   1
+Q40426     Q40426 RNA-BINDING GLYCINE-RICH PROTEIN-1    110.4    1.2e-28   1
+O04070     O04070 SGRP-1 PROTEIN.                       110.3    1.3e-28   1
+Q41518     Q41518 SINGLE-STRANDED NUCLEIC ACID BINDIN   110.3    1.3e-28   1
+O24601     O24601 GLYCINE-RICH RNA BINDING PROTEIN 2.   110.2    1.4e-28   1
+O24188     O24188 OSGRP2.                               110.2    1.4e-28   1
+Q39105     Q39105 GLYCINE-RICH RNA-BINDING PROTEIN (F   109.8    1.8e-28   1
+RNPL_HUMAN P98179 PUTATIVE RNA-BINDING PROTEIN RNPL.    108.8    3.6e-28   1
+GRP1_SINAL P49310 GLYCINE-RICH RNA-BINDING PROTEIN GR   108.6    4.4e-28   1
+Q40052     Q40052 GLYCINE RICH PROTEIN, RNA BINDING P   108.6    4.4e-28   1
+O23793     O23793 RNA BINDING PROTEIN.                  108.1    6.1e-28   1
+Q40437     Q40437 RGP-3 (FRAGMENT).                     108.1    6.1e-28   1
+GRP8_ARATH Q03251 GLYCINE-RICH RNA-BINDING PROTEIN 8    107.9    7.1e-28   1
+GRP2_SINAL P49311 GLYCINE-RICH RNA-BINDING PROTEIN GR   107.8    7.3e-28   1
+O22314     O22314 ASF/SF2 HOMOLOG.                      107.6    8.4e-28   2
+O22315     O22315 ASF/SF2 HOMOLOG.                      107.6    8.4e-28   2
+Q40425     Q40425 RNA-BINDING GRICINE-RICH PROTEIN-1    106.7    1.6e-27   1
+GRP2_SORVU Q99070 GLYCINE-RICH RNA-BINDING PROTEIN 2.   106.5    1.8e-27   1
+RT19_ARATH P39697 MITOCHONDRIAL 40S RIBOSOMAL PROTEIN   106.4    1.9e-27   1
+O35326     O35326 HRS.                                  106.2    2.2e-27   2
+O35479     O35479 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   106.2    2.2e-27   1
+Q40427     Q40427 RNA-BINDING GLYCINE-RICH PROTEIN-1    106.0    2.6e-27   1
+Q41453     Q41453 PUTATIVE GLYCINE RICH RNA BINDING P   105.6    3.4e-27   1
+YA2B_SCHPO Q09702 HYPOTHETICAL 57.8 KD PROTEIN C2F7.1   105.4    3.8e-27   4
+U2AF_SCHPO P36629 SPLICING FACTOR U2AF LARGE SUBUNIT.   105.3    4.3e-27   2
+O24187     O24187 OSGRP1.                               105.2    4.4e-27   1
+O22390     O22390 GLYCINE-RICH PROTEIN.                 105.2    4.6e-27   1
+GRP7_ARATH Q03250 GLYCINE-RICH RNA-BINDING PROTEIN 7.   105.2    4.6e-27   1
+HS49_YEAST Q99181 HSH49 PROTEIN.                        104.7    6.2e-27   2
+Q40436     Q40436 RNA-BINDING GLYCINE RICH PROTEIN (R   103.7    1.3e-26   1
+GRPA_MAIZE P10979 GLYCINE-RICH RNA-BINDING, ABSCISIC    103.7    1.3e-26   1
+Q21900     Q21900 R10E4.2.                              103.7    1.3e-26   2
+O22385     O22385 GLYCINE-RICH PROTEIN.                 103.6    1.3e-26   1
+P90978     P90978 U2AF65.                               103.3    1.6e-26   3
+Q42412     Q42412 RNA-BINDING PROTEIN RZ-1.             102.8    2.3e-26   1
+YIS1_YEAST P40561 HYPOTHETICAL 29.0 KD PROTEIN IN BET   102.7    2.5e-26   1
+O22653     O22653 GLYCINE-RICH RNA-BINDING PROTEIN.     102.4    3.2e-26   1
+P90699     P90699 PUTATIVE RNA BINDING PROTEIN.         102.1    3.9e-26   1
+Q64283     Q64283 SILICA-INDUCED PROTEIN 41 (SIG41).    101.9    4.3e-26   1
+Q15815     Q15815 HTRA2-BETA.                           101.9    4.3e-26   1
+O22703     O22703 PUTATIVE RNA-BINDING PROTEIN.         101.6    5.6e-26   1
+O22384     O22384 GLYCINE-RICH PROTEIN.                 101.4    6.1e-26   1
+Q43472     Q43472 LOW TEMPERATURE-RESPONSIVE RNA-BIND   101.4    6.4e-26   1
+GRF1_HUMAN Q12849 G-RICH SEQUENCE FACTOR-1 (GRSF-1).    100.6    1.1e-25   3
+Q15376     Q15376 Y-CHROMOSOME RNA RECOGNITION MOTIF    100.2    1.4e-25   1
+Q15414     Q15414 RNA BINDING MOTIF PROTEIN 1, RELATE   100.2    1.4e-25   1
+Q09542     Q09542 HYPOTHETICAL 60.3 KD PROTEIN F25B5.   100.0    1.7e-25   2
+O24184     O24184 GLYCINE-RICH RNA-BINDING PROTEIN.      99.9    1.8e-25   1
+YNR5_YEAST P53883 HYPOTHETICAL 45.7 KD PROTEIN IN RPS    99.7      2e-25   2
+Q39244     Q39244 U1SNRNP-SPECIFIC PROTEIN.              99.6    2.2e-25   2
+Q62093     Q62093 PR264/SC35.                            99.6    2.2e-25   1
+SC35_CHICK P30352 SPLICING FACTOR SC35 (SC-35) (SPLIC    99.6    2.2e-25   1
+Q44555     Q44555 RNA-BINDING PROTEIN.                   99.5    2.3e-25   1
+Q15415     Q15415 YRRM2.                                 99.3    2.6e-25   1
+RN15_YEAST P25299 MRNA 3'-END PROCESSING PROTEIN RNA1    98.9    3.7e-25   1
+O25501     O25501 SS-DNA BINDING PROTEIN 12RNP2 PRECU    95.0    5.3e-24   1
+Q19706     Q19706 F22B5.2.                               95.0    5.3e-24   1
+Q39201     Q39201 RIBONUCLEOPROTEIN.                     94.9    5.7e-24   2
+ROF_HUMAN  P52597 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    94.8    5.9e-24   3
+Q23120     Q23120 W02B12.2.                              94.7    6.7e-24   2
+O22905     O22905 FCA GAMMA ISOLOG.                      94.5    7.3e-24   2
+Q44560     Q44560 RBPA1 PROTEIN.                         94.5    7.7e-24   1
+Q90408     Q90408 RIBONUCLEOPROTEIN (FRAGMENT).          93.9    1.2e-23   1
+Q08374     Q08374 RNA-BINDING PROTEIN RBPA.              93.7    1.3e-23   1
+Q13148     Q13148 TAR DNA-BINDING PROTEIN-43.            93.7    1.3e-23   2
+Q13595     Q13595 TRANSFORMER-2 ALPHA.                   93.5    1.5e-23   1
+Q60990     Q60990 RBM.                                   92.9    2.3e-23   1
+TRA2_DROME P19018 TRANSFORMER-2 SEX-DETERMINING PROTE    92.8    2.4e-23   1
+O13741     O13741 HYPOTHETICAL 49.4 KD PROTEIN.          92.4    3.3e-23   2
+P70807     P70807 RNA-BINDING PROTEIN.                   92.3    3.4e-23   1
+P73557     P73557 RNA-BINDING PROTEIN.                   91.8      5e-23   1
+Q23121     Q23121 W02B12.3.                              91.5      6e-23   2
+O35698     O35698 RNA-BINDING PROTEIN.                   90.8      1e-22   1
+O15414     O15414 CAGH4.                                 90.5    1.2e-22   1
+Q44556     Q44556 RNA-BINDING PROTEIN.                   90.3    1.4e-22   1
+Q21323     Q21323 SIMILAR TO U1 SMALL NUCLEAR RIBONUC    90.3    1.4e-22   2
+RN24_SCHPO Q09100 RNA-BINDING PROTEIN RNP24.             90.1    1.6e-22   2
+O02008     O02008 TRANSFORMER-2 PROTEIN ISOFORM 272.     90.0    1.7e-22   1
+O02009     O02009 TRANSFORMER-2 PROTEIN ISOFORM 225.     90.0    1.7e-22   1
+YDC1_SCHPO Q10422 HYPOTHETICAL 33.6 KD PROTEIN C25G10    89.7    2.1e-22   1
+Q44554     Q44554 RNA-BINDING PROTEIN RBPB.              89.3    2.8e-22   1
+SC35_HUMAN Q01130 SPLICING FACTOR SC35 (SC-35) (SPLIC    89.1    3.1e-22   1
+YIS5_YEAST P40565 HYPOTHETICAL 17.1 KD PROTEIN IN BET    88.9    3.6e-22   1
+Q41498     Q41498 U1SNRNP-SPECIFIC PROTEIN, U1A.         88.4      5e-22   2
+Q41810     Q41810 GLYCINE-RICH PROTEIN.                  88.2    6.1e-22   1
+Q46349     Q46349 RNA-BINDING PROTEIN.                   87.8      8e-22   1
+O22922     O22922 SPLICEOSOMAL PROTEIN U2B ISOLOG.       87.7    8.1e-22   2
+Q53322     Q53322 SS-DNA BINDING PROTEIN 12RNP2.         87.2    1.1e-21   1
+Q55343     Q55343 12KDA RNA-BINDING.                     86.9    1.4e-21   1
+RU17_HUMAN P08621 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN     86.9    1.5e-21   1
+Q99377     Q99377 SMALL NUCLEAR RIBONUCLEOPROTEIN (U1    86.9    1.5e-21   1
+Q62376     Q62376 U1RNA-ASSOCIATED 70-KDA PROTEIN (FR    86.9    1.5e-21   1
+P78493     P78493 68 KDA (U1) RIBONUCLEOPROTEIN (U1).    86.9    1.5e-21   1
+Q57014     Q57014 HYPOTHETICAL 11.0 KD PROTEIN.          86.1    2.5e-21   1
+Q41499     Q41499 SPLICEOSOMAL PROTEIN.                  86.0    2.7e-21   2
+RU17_DROME P17133 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN     85.8    3.1e-21   1
+Q62019     Q62019 16 KDA PROTEIN.                        84.8    6.3e-21   2
+Q18999     Q18999 HYPOTHETICAL PROTEIN D2089.4.          84.0    1.1e-20   4
+O22851     O22851 SMALL NUCLEAR RIBONUCLEOPROTEIN ISO    83.9    1.2e-20   1
+O00320     O00320 F25451_2.                              83.9    1.2e-20   1
+RU17_XENLA P09406 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN     83.4    1.7e-20   1
+Q94467     Q94467 SSRNA-BINDING PROTEIN.                 83.3    1.7e-20   1
+O08831     O08831 SRP20 GENE.                            82.8    2.5e-20   1
+X16_HUMAN  P23152 PRE-MRNA SPLICING FACTOR SRP20 (X16    82.8    2.5e-20   1
+P91414     P91414 SIMILARITY TO RNA RECOGNITION MOTIF    82.5      3e-20   1
+Q42404     Q42404 U1 SNRNP 70K PROTEIN.                  82.4    3.2e-20   1
+ROC_RAT    P17132 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    81.7    5.2e-20   1
+Q55345     Q55345 RNA-BINDING PROTEIN.                   81.7    5.4e-20   1
+YDB2_SCHPO Q10355 HYPOTHETICAL 24.4 KD PROTEIN C22E12    81.7    5.4e-20   1
+O18352     O18352 RNA BINDING PROTEIN.                   81.5    6.2e-20   1
+Q18318     Q18318 SIMILAR TO RNA-BINDING PROTEIN.        81.4    6.7e-20   1
+P93396     P93396 TRANSFORMER-SR RIBONUCLEOPROTEIN (F    81.4    6.8e-20   1
+Q09511     Q09511 PROBABLE SPLICING FACTOR SC35 (PR26    80.8    9.8e-20   1
+Q15351     Q15351 SEB4B (FRAGMENT).                      80.8      1e-19   1
+Q15350     Q15350 SEB4D (FRAGMENT).                      80.8      1e-19   1
+YD3D_SCHPO Q10277 HYPOTHETICAL 59.1 KD PROTEIN C13G7.    80.7    1.1e-19   2
+O23866     O23866 MEI2-LIKE PROTEIN.                     80.5    1.2e-19   2
+Q60399     Q60399 C23 NUCLEOLIN, GLYCINE RICH REGION     80.5    1.3e-19   1
+Q55342     Q55342 RNA-BINDING PROTEIN.                   80.3    1.4e-19   1
+Q62176     Q62176 SEB4.                                  80.2    1.5e-19   1
+Q21832     Q21832 R07E5.14.                              80.2    1.5e-19   1
+Q38915     Q38915 RNA-BINDING PROTEIN.                   79.7    2.1e-19   2
+Q16629     Q16629 SPLICING FACTOR, ARGININE/SERINE-RI    79.6    2.4e-19   1
+RNP1_YEAST P32385 RIBONUCLEOPROTEIN-1.                   79.0    3.5e-19   1
+YQOC_CAEEL Q09301 HYPOTHETICAL 21.6 KD PROTEIN EEED8.    79.0    3.6e-19   1
+Q16662     Q16662 SRP40-2.                               78.8    3.9e-19   1
+P78795     P78795 FISSION YEAST (FRAGMENT).              78.8      4e-19   1
+YQO4_CAEEL Q09295 HYPOTHETICAL 26.2 KD PROTEIN EEED8.    78.5      5e-19   1
+Q55341     Q55341 21KDA RNA-BINDING PROTEIN, 12RNP1.     78.4    5.4e-19   1
+Q53321     Q53321 SS-DNA BINDING PROTEIN 12RNP1.         78.4    5.4e-19   1
+O14801     O14801 EUKARYOTIC TRANSLATION INITIATION F    78.0      7e-19   1
+EWS_MOUSE  Q61545 RNA-BINDING PROTEIN EWS.               77.9    7.2e-19   1
+Q18409     Q18409 SIMILAR TO PRE-MRNA SPLICING FACTOR    77.9    7.7e-19   1
+O14875     O14875 RNA BINDING PROTEIN.                   77.5    9.9e-19   1
+EWS_HUMAN  Q01844 RNA-BINDING PROTEIN EWS.               77.4      1e-18   1
+O14327     O14327 RNA BINDING PROTEIN.                   77.4    1.1e-18   1
+O13829     O13829 PUTATIVE SMALL NUCLEAR RIBONUCLEOPR    77.4    1.1e-18   1
+O00425     O00425 PUTATIVE RNA BINDING PROTEIN KOC (K    76.4    2.1e-18   2
+Q61954     Q61954 NEOSIN (FRAGMENT).                     76.3    2.3e-18   2
+YNL0_YEAST P53927 HYPOTHETICAL 25.4 KD PROTEIN IN CYB    75.8    3.2e-18   1
+SSB1_YEAST P10080 SINGLE-STRANDED NUCLEIC ACID-BINDIN    75.6    3.6e-18   2
+O35935     O35935 POLY(A) BINDING PROTEIN II.            75.5      4e-18   1
+Q28165     Q28165 POLYA BINDING PROTEIN II.              75.5      4e-18   1
+Q99730     Q99730 TAT-SF1.                               75.2      5e-18   2
+Q27926     Q27926 RNA BINDING PROTEIN.                   75.1    5.3e-18   1
+Q16560     Q16560 U1-SNRNP BINDING PROTEIN HOMOLOG.      74.8    6.3e-18   1
+Q55765     Q55765 HYPOTHETICAL 16.6 KD PROTEIN.          74.8    6.5e-18   1
+O23146     O23146 HNRNP-LIKE PROTEIN.                    74.1      1e-17   1
+O35335     O35335 RNA BINDING PROTEIN.                   74.1      1e-17   1
+Q22030     Q22030 R74.5.                                 73.0    2.3e-17   1
+Q10572     Q10572 44.4 KD RNA-BINDING PROTEIN IN FOX-    72.7    2.8e-17   1
+Q22304     Q22304 SIMILAR TO C. ELEGANS PROTEIN R74.5    72.7    2.8e-17   1
+Q21155     Q21155 SIMILAR TO RNA BINDING PROTEINS.       72.6    2.9e-17   1
+Q22318     Q22318 T07F10.3.                              72.1    4.2e-17   2
+Q93233     Q93233 C17E4.5.                               72.0    4.3e-17   1
+Q02427     Q02427 RNA BINDING PROTEIN 1.                 71.9    4.7e-17   1
+Q13244     Q13244 SRP55-2 PRE-MRNA SPLICING FACTOR.      71.8      5e-17   1
+Q15287     Q15287 RNA-BINDING PROTEIN.                   71.4    6.7e-17   1
+Q13344     Q13344 FUS-LIKE PROTEIN (FRAGMENT).           71.3    7.4e-17   1
+FUS_HUMAN  P35637 RNA-BINDING PROTEIN FUS/TLS.           71.3    7.4e-17   1
+FUS_BOVIN  Q28009 RNA-BINDING PROTEIN FUS/TLS (NUCLEA    71.3    7.4e-17   1
+Q62826     Q62826 M4 PROTEIN HOMOLOG.                    71.3    7.5e-17   1
+Q24491     Q24491 RNA BINDING PROTEIN.                   70.8    1.1e-16   1
+O04432     O04432 GLYCINE-RICH PROTEIN.                  70.3    1.5e-16   1
+Q08208     Q08208 CHROMOSOME XV READING FRAME ORF YOL    70.0    1.8e-16   1
+Q27294     Q27294 RNA BINDING PROTEIN CABEZA.            69.6    2.3e-16   1
+O14369     O14369 PUTATIVE RNA-BINDING PROTEIN.          68.5    5.2e-16   1
+YSO5_CAEEL Q10130 HYPOTHETICAL 98.0 KD PROTEIN F56D1.    66.9    1.5e-15   1
+ROC_HUMAN  P07910 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    66.4    2.1e-15   1
+Q92751     Q92751 HTAFII68.                              66.2    2.5e-15   1
+Q92804     Q92804 PUTATIVE RNA BINDING PROTEIN RBP56.    66.2    2.5e-15   1
+WHI3_YEAST P34761 WHI3 PROTEIN.                          65.4    4.4e-15   1
+CB20_XENLA P52299 20 KD NUCLEAR CAP BINDING PROTEIN (    65.3    4.5e-15   1
+CB20_HUMAN P52298 20 KD NUCLEAR CAP BINDING PROTEIN (    64.1      1e-14   1
+O23131     O23131 CONTAINS PROCITE 'RNP1' PUTATIVE RN    64.0    1.1e-14   1
+Q21322     Q21322 SIMILAR TO U1 SMALL NUCLEAR RIBONUC    64.0    1.2e-14   2
+O42254     O42254 ZIPCODE-BINDING PROTEIN.               63.8    1.3e-14   2
+Q92909     Q92909 DAZLA.                                 63.6    1.5e-14   1
+Q92904     Q92904 RNA BINDING PROTEIN.                   63.6    1.5e-14   1
+Q95192     Q95192 RNA-BINDING PROTEIN.                   63.6    1.5e-14   1
+IF4B_HUMAN P23588 EUKARYOTIC TRANSLATION INITIATION F    63.5    1.6e-14   1
+Q93594     Q93594 F26A3.2.                               63.4    1.8e-14   1
+Q19018     Q19018 MEC-8 GENE.                            63.2      2e-14   2
+Q22039     Q22039 MEC-8 PROTEIN.                         63.2      2e-14   2
+ROC_XENLA  P19600 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    63.0    2.2e-14   1
+O15396     O15396 DAZLA PROTEIN.                         62.1    4.4e-14   1
+O13674     O13674 HYPOTHETICAL 73.9 KD PROTEIN.          61.8    5.2e-14   3
+O23646     O23646 RSZP22 PROTEIN.                        61.3    7.5e-14   1
+Q23287     Q23287 SIMILARITY TO 2 RNA RECOGNITION MOT    61.2    7.9e-14   1
+P90871     P90871 HYPOTHETICAL PROTEIN F39H2.2 IN CHR    61.1    8.7e-14   1
+O15042     O15042 KIAA0332 (FRAGMENT).                   60.7    1.1e-13   1
+YSX2_CAEEL Q10021 HYPOTHETICAL 24.0 KD PROTEIN T28D9.    60.7    1.1e-13   1
+Q64368     Q64368 DAZ-LIKE AUTOSOMAL (RNA RECOGNITION    60.3    1.5e-13   1
+Q09331     Q09331 CSX1+ (FRAGMENT).                      60.1    1.7e-13   1
+Q14151     Q14151 KIAA0138 PROTEIN.                      60.1    1.7e-13   1
+Q22412     Q22412 T11G6.8.                               60.0    1.8e-13   1
+Q09335     Q09335 CSX1+ (FRAGMENT).                      59.9      2e-13   1
+Q64012     Q64012 MERC=RNA-BINDING PROTEIN {ALTERNATI    59.7    2.2e-13   1
+Q15056     Q15056 MRNA (KIAA0038) FOR ORF, PARTIAL CD    59.5    2.6e-13   1
+Q42215     Q42215 NAM8 PROTEIN (FRAGMENT).               59.5    2.6e-13   1
+Q09584     Q09584 HYPOTHETICAL 36.0 KD PROTEIN K04G7.    59.4    2.7e-13   1
+Q14924     Q14924 NCBP INTERACTING PROTEIN 1.            59.3      3e-13   1
+Q20414     Q20414 F44G4.4.                               59.0    3.6e-13   2
+MLO3_SCHPO Q09330 MLO3 PROTEIN.                          59.0    3.6e-13   1
+GRP1_SORVU Q99069 GLYCINE-RICH RNA-BINDING PROTEIN 1     58.9    3.9e-13   1
+RU17_YEAST Q00916 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN     58.9      4e-13   1
+O08583     O08583 TRANSCRIPTIONAL COACTIVATOR ALY (AL    58.7    4.6e-13   1
+Q14730     Q14730 LA 4.1 PROTEIN (FRAGMENT).             58.4    5.4e-13   1
+Q91017     Q91017 GIZZARD PTB-ASSOCIATED SPLICING FAC    58.2    6.2e-13   1
+LA_HUMAN   P05455 LUPUS LA PROTEIN (SJOGREN SYNDROME     58.2    6.5e-13   1
+Q15367     Q15367 RIBONUCLEOPROTEIN (LA) (FRAGMENT).     58.2    6.5e-13   1
+Q08920     Q08920 CHROMOSOME XVI READING FRAME ORF YP    57.2    1.3e-12   1
+Q22135     Q22135 T04A8.6.                               56.1    2.7e-12   1
+O23645     O23645 RSZP21 PROTEIN.                        55.9    3.1e-12   1
+ARP2_PLAFA P13824 CLUSTERED-ASPARAGINE-RICH PROTEIN (    55.6    3.9e-12   2
+P97855     P97855 RAS-GTPASE-ACTIVATING PROTEIN SH3-D    55.5      4e-12   1
+Q24207     Q24207 BOULE PROTEIN.                         55.5    4.2e-12   1
+O14797     O14797 HRS (FRAGMENT).                        55.4    4.5e-12   2
+Q62150     Q62150 RIBONUCLEIC ACID BINDING PROTEIN S1    55.2    4.9e-12   1
+Q13283     Q13283 GAP SH3 BINDING PROTEIN.               54.2      1e-11   1
+Q22708     Q22708 ZK1067.6 (FRAGMENT).                   54.1    1.1e-11   2
+Q14136     Q14136 KIAA0122 PROTEIN (FRAGMENT).           54.1    1.1e-11   2
+Q04067     Q04067 D9461.16P.                             54.0    1.2e-11   1
+Q62379     Q62379 U2-SNRNP B'' (PRNP31) (FRAGMENT).      53.7    1.5e-11   1
+Q18724     Q18724 HYPOTHETICAL PROTEIN C50B8.1.          53.3    1.9e-11   1
+ROL_HUMAN  P14866 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    52.7    2.8e-11   3
+Q15424     Q15424 SCAFFOLD ATTACHMENT FACTOR (FRAGMEN    52.3    3.7e-11   1
+P97379     P97379 RAS-GTPASE-ACTIVATING PROTEIN SH3-D    52.2      4e-11   1
+SRP1_SCHPO Q10193 SRP1 PROTEIN.                          51.1    8.9e-11   1
+RDP_MOUSE  P19426 RD PROTEIN (WL623).                    50.2    1.7e-10   1
+RDP_HUMAN  P18615 RD PROTEIN.                            50.2    1.7e-10   1
+LA_BOVIN   P10881 LUPUS LA PROTEIN HOMOLOG (SJOGREN S    49.8    2.1e-10   1
+LA_RAT     P38656 LUPUS LA PROTEIN HOMOLOG (SJOGREN S    49.8    2.2e-10   1
+LU15_HUMAN P52756 PUTATIVE TUMOR SUPPRESSOR LUCA15.      49.5    2.7e-10   2
+O15237     O15237 MSSP-2 (FRAGMENT).                     49.4    2.8e-10   1
+O15236     O15236 MSSP-2 (FRAGMENT).                     49.4    2.8e-10   1
+O14102     O14102 SPLICOSOME ASSOIATED PROTEIN (FRAGM    49.3    3.1e-10   1
+Q62378     Q62378 U2-SNRNP B'' (PRNP11) (FRAGMENT).      49.1    3.4e-10   1
+Q15380     Q15380 Y-CHROMOSOME RNA RECOGNITION MOTIF     48.9    3.9e-10   1
+U2AG_HUMAN Q01081 SPLICING FACTOR U2AF 35 KD SUBUNIT     48.7    4.6e-10   1
+YFK2_YEAST P43607 HYPOTHETICAL 31.9 KD PROTEIN IN RPL    48.5    5.4e-10   1
+IF32_YEAST P06103 EUKARYOTIC TRANSLATION INITIATION F    48.0    7.6e-10   1
+LA_MOUSE   P32067 LUPUS LA PROTEIN HOMOLOG (SJOGREN S    47.6    9.8e-10   1
+CPO_DROME  Q01617 PUTATIVE COUCH POTATO PROTEIN.         47.3    1.2e-09   1
+Q17175     Q17175 50KDA LECTIN.                          46.6      2e-09   1
+Q13117     Q13117 PUTATIVE RNA BINDING DAZ PROTEIN.      46.1    2.8e-09   1
+O13801     O13801 HYPOTHETICAL 66.4 KD PROTEIN.          45.5    4.3e-09   1
+RU1A_YEAST P32605 U1 SMALL NUCLEAR RIBONUCLEOPROTEIN     45.3    4.9e-09   1
+Q12159     Q12159 RNA ANNEALING PROTEIN YRA1P.           45.2    5.2e-09   1
+NAB3_YEAST P38996 NUCLEAR POLYADENYLATED RNA-BINDING     44.7    7.5e-09   1
+Q08925     Q08925 CHROMOSOME XVI READING FRAME ORF YP    44.1    1.1e-08   4
+Q18601     Q18601 SIMILAR TO HETEROGENEOUS RIBONUCLEO    44.0    1.2e-08   2
+Q63627     Q63627 CTD-BINDING SR-LIKE PROTEIN RA4 (FR    44.0    1.2e-08   1
+O15758     O15758 RNA BINDING PROTEIN.                   43.9    1.3e-08   2
+MEI2_SCHPO P08965 MEI2 PROTEIN.                          43.8    1.4e-08   1
+Q23161     Q23161 W04D2.6 (FRAGMENT).                    43.3      2e-08   1
+O13649     O13649 SPLICEOSOMAL PROTEIN.                  43.0    2.4e-08   1
+P90797     P90797 HYPOTHETICAL PROTEIN D2089.1 (FRAGM    42.9    2.5e-08   1
+Q07655     Q07655 CHROMOSOME IV READING FRAME ORF YDL    42.8    2.7e-08   1
+O15759     O15759 RNA BINDING PROTEIN.                   42.8    2.7e-08   2
+RN12_YEAST P32843 RNA12 PROTEIN.                         42.8    2.8e-08   1
+Q07034     Q07034 RNA BINDING PROTEIN.                   42.3    3.9e-08   1
+Q24375     Q24375 LA RIBONUCLEOPROTEIN.                  40.7    1.1e-07   1
+O18219     O18219 Y57G11A.5.                             40.6    1.2e-07   1
+Q15686     Q15686 HU1-70K-LIKE PROTEIN (216 AA) (FRAG    40.4    1.5e-07   1
+Q15364     Q15364 RIBONUCLEOPROTEIN ANTIGEN.             40.4    1.5e-07   1
+D111_ARATH P42698 DNA-DAMAGE-REPAIR/TOLERATION PROTEI    40.4    1.5e-07   1
+LAB_XENLA  P28049 LUPUS LA PROTEIN HOMOLOG B.            39.8    2.2e-07   1
+LA_DROME   P40796 LA PROTEIN HOMOLOG.                    38.3      6e-07   1
+YAG3_SCHPO Q09868 HYPOTHETICAL 62.1 KD PROTEIN C12G12    38.2    6.8e-07   1
+LAA_XENLA  P28048 LUPUS LA PROTEIN HOMOLOG A.            37.7    9.1e-07   1
+Q92516     Q92516 WS-1/TYPE2.                            37.5    1.1e-06   1
+Q92517     Q92517 WS-1/TYPE3.                            37.5    1.1e-06   1
+Q93062     Q93062 WS-1/TYPE4.                            37.5    1.1e-06   1
+Q63623     Q63623 CTD-BINDING SR-LIKE PROTEIN RA8.       37.2    1.3e-06   1
+Q10667     Q10667 RNA-BINDING PROTEIN RNP-1.             36.7    1.9e-06   1
+YN26_YEAST P53830 HYPOTHETICAL 32.3 KD PROTEIN IN SEC    36.0    3.1e-06   2
+P92204     P92204 ANON-66DA PROTEIN.                     35.8    3.5e-06   1
+Q93733     Q93733 C17E4.11 (FRAGMENT).                   35.5    4.5e-06   1
+MAT3_RAT   P43244 MATRIN 3.                              35.4    4.7e-06   2
+O35833     O35833 MATRIN 3.                              35.4    4.7e-06   2
+P87216     P87216 VIP1 PROTEIN.                          35.2    5.2e-06   1
+Q16630     Q16630 HPBRII-4 MRNA.                         34.8    7.2e-06   1
+P87126     P87126 HYPOTHETICAL 46.4 KD PROTEIN.          34.4    8.9e-06   1
+Q18265     Q18265 SIMILAR TO NUCLEOLIN.                  34.4    9.2e-06   1
+YAS9_SCHPO Q10145 HYPOTHETICAL 82.4 KD PROTEIN C3H8.0    34.2    1.1e-05   1
+YQOA_CAEEL Q09299 HYPOTHETICAL 76.5 KD PROTEIN EEED8.    33.1    2.4e-05   1
+O01806     O01806 SIMILARITY TO AN RNA RECOGNITION MO    32.9    2.5e-05   1
+YIS9_YEAST P40567 HYPOTHETICAL 12.8 KD PROTEIN IN PRI    32.9    2.7e-05   1
+Q18220     Q18220 COSMID C26E6.                          32.3      4e-05   1
+Q18219     Q18219 COSMID C26E6.                          32.3      4e-05   1
+ROU2_HUMAN P07029 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO    31.8    5.6e-05   1
+Q60745     Q60745 RIBONUCLEOPROTEIN (FRAGMENT).          31.5    6.9e-05   2
+IF32_HUMAN P55884 EUKARYOTIC TRANSLATION INITIATION F    31.4    7.4e-05   1
+Q41988     Q41988 GLYCINE-RICH RNA-BINDING PROTEIN (F    31.0    9.7e-05   1
+YBF1_YEAST P34217 HYPOTHETICAL 73.8 KD PROTEIN IN SAS    30.9     0.0001   1
+U2AG_DROME Q94535 SPLICING FACTOR U2AF 35 KD SUBUNIT     29.0    0.00038   1
+U2AG_SCHPO Q09176 SPLICING FACTOR U2AF 23 KD SUBUNIT     28.9    0.00043   1
+IF32_SCHPO Q10425 PROBABLE EUKARYOTIC TRANSLATION INI    28.3    0.00063   1
+PRT1_PICAN P12806 PUTATIVE PRT1 PROTEIN.                 28.2    0.00068   1
+P70501     P70501 S1-1 PROTEIN.                          27.6     0.0011   2
+P87058     P87058 SLA1P.                                 27.4     0.0012   1
+ARP_YEAST  P32770 ARP PROTEIN.                           27.3     0.0012   1
+O13362     O13362 LA AUTOANTIGEN HOMOLOG.                27.3     0.0013   1
+MAT3_HUMAN P43243 MATRIN 3 (FRAGMENT).                   27.0     0.0015   1
+O04554     O04554 T7N9.7.                                26.9     0.0016   1
+Q26457     Q26457 LA AUTOANTIGEN HOMOLOG.                26.2     0.0026   1
+O15047     O15047 KIAA0339.                              25.8     0.0035   1
+NOT4_YEAST P34909 GENERAL NEGATIVE REGULATOR OF TRANS    25.7     0.0039   1
+JSN1_YEAST P47135 JSN1 PROTEIN.                          24.6     0.0081   1
+YAC4_SCHPO Q09818 PUTATIVE GENERAL NEGATIVE REGULATOR    24.6     0.0083   1
+P97343     P97343 PROTEIN KINASE.                        24.4     0.0094   1
+Q63285     Q63285 KIS PROTEIN (PAM COOH-TERMINAL INTE    24.4     0.0094   1
+O18254     O18254 Y57G11C.9.                             24.3     0.0099   1
+YLF1_CAEEL Q03571 HYPOTHETICAL 42.4 KD PROTEIN C40H1.    24.3       0.01   2
+Q18717     Q18717 SIMILAR TO S. CEREVISIAE GENERAL NE    23.6      0.016   1
+YKV4_YEAST P36036 HYPOTHETICAL 23.8 KD PROTEIN IN URA    23.0       0.02   1
+Q08287     Q08287 CHROMOSOME XV READING FRAME ORF YOL    22.5      0.022   1
+IF4B_YEAST P34167 EUKARYOTIC TRANSLATION INITIATION F    22.4      0.023   1
+Q05519     Q05519 ARGININE-RICH 54 KD NUCLEAR PROTEIN    22.3      0.023   1
+Q26273     Q26273 RNA RECOGNITION MOTIF-TYPE RNA-BIND    22.3      0.023   1
+Q14966     Q14966 NUCLEAR PROTEIN, NP220.                22.2      0.024   1
+P87143     P87143 HYPOTHETICAL 64.4 KD PROTEIN.          21.9      0.025   2
+Q20966     Q20966 F58B3.7.                               21.4      0.028   1
+Q24433     Q24433 OVARIAN PROTEIN.                       21.3      0.029   1
+P91156     P91156 SIMILARITY TO HUMAN HETEROGENEOUS N    21.1       0.03   1
+Q04142     Q04142 HYPOTHETICAL PROTEIN (FRAGMENT).       20.6      0.035   1
+Q12221     Q12221 HYPOTHETICAL 119.5 KD PROTEIN YPR03    20.6      0.035   1
+Q93021     Q93021 PUTATIVE TUMOR SUPPRESSOR.             19.5      0.044   1
+NRD1_YEAST P53617 NRD1 PROTEIN.                          18.9      0.051   1
+Q93237     Q93237 C17E4.11 (FRAGMENT).                   18.4      0.057   1
+O01159     O01159 D2089.1 (FRAGMENT).                    16.8      0.083   1
+YAX9_SCHPO Q10200 HYPOTHETICAL 57.1 KD PROTEIN C13F4.    16.7      0.084   1
+Q61464     Q61464 NUCLEAR PROTEIN, NP220.                16.1      0.098   2
+Q21351     Q21351 K08F4.2.                               15.9        0.1   1
+LAH1_YEAST P33399 LA PROTEIN HOMOLOG.                    15.8        0.1   1
+Q06477     Q06477 INTERFERON RESPONSE ELEMENT-BINDING    14.5       0.14   1
+U2R1_HUMAN Q15695 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN     14.1       0.15   1
+U2R2_HUMAN Q15696 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN     14.1       0.15   1
+U2R2_MOUSE Q62377 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN     13.4       0.18   1
+YN8T_YEAST P53741 HYPOTHETICAL 57.7 KD PROTEIN IN LYS    13.2       0.19   1
+O35404     O35404 SYNAPTOJANIN 2 (FRAGMENT).             13.1       0.19   1
+U2R1_MOUSE Q64707 U2 SMALL NUCLEAR RIBONUCLEOPROTEIN     13.1        0.2   1
+Q23391     Q23391 ZK1067.6 (FRAGMENT).                   12.9        0.2   1
+Q10458     Q10458 SLA1 (FRAGMENT).                       12.6       0.22   1
+P78332     P78332 G16 PROTEIN (FRAGMENT).                12.1       0.24   1
+Q18937     Q18937 HYPOTHETICAL PROTEIN D1046.1.          12.0       0.25   1
+O23612     O23612 HYPOTHETICAL 34.5 KD PROTEIN.          11.5       0.28   1
+Q23953     Q23953 D34 IMMUNODOMINANT ANTIGEN.            11.5       0.28   1
+Q26548     Q26548 CYCLOPHYLIN-LIKE PROTEIN TRANS-SPLI    11.2        0.3   1
+O15056     O15056 KIAA0348.                              11.1       0.31   1
+P70166     P70166 CYTOPLASMIC POLYADENYLATION ELEMENT    10.6       0.35   1
+BF41_MOUSE P28659 BRAIN PROTEIN F41.                     10.6       0.35   1
+Q24527     Q24527 MRNA SMOOTH FOR POLYPEPTIDE (HOMOLO    10.5       0.35   1
+O18964     O18964 SYNAPTOJANIN.                          10.2       0.38   1
+Q62504     Q62504 COCHLEAR MRNA (CLONE 28D2) (FRAGMEN     9.2       0.48   1
+Q91572     Q91572 CYTOPLASMIC POLYADENYLATION ELEMENT     9.1       0.49   2
+YQO1_CAEEL Q09293 HYPOTHETICAL 69.9 KD PROTEIN EEED8.     8.8       0.53   1
+Q17561     Q17561 C01F6.5.                                8.7       0.54   1
+YHS7_YEAST P38833 HYPOTHETICAL 27.1 KD PROTEIN IN NDT     8.6       0.55   1
+Q14206     Q14206 ZAKI-4 MRNA IN HUMAN SKIN FIBROBLAS     8.6       0.55   1
+O04526     O04526 F20P5.8.                                7.9       0.65   1
+Q17860     Q17860 SIMILAR TO DIACYLGLYCEROL KINASE.       7.3       0.75   1
+Q92615     Q92615 MYELOBLAST KIAA0217 (FRAGMENT).         7.3       0.75   1
+O22794     O22794 PUTATIVE SPLICING FACTOR U2AF LARGE     6.6       0.87   2
+YMC7_CAEEL P53806 HYPOTHETICAL 26.6 KD PROTEIN F54E7.     5.6        1.1   1
+Q12046     Q12046 CHROMOSOME IV READING FRAME ORF YDL     5.1        1.2   1
+Q07623     Q07623 CHROMOSOME IV READING FRAME ORF YDL     5.1        1.2   1
+Q10954     Q10954 HYPOTHETICAL 78.8 KD PROTEIN B0336.     4.9        1.3   1
+Q26276     Q26276 RNA RECOGNITION MOTIF-TYPE RNA-BIND     4.4        1.5   1
+O01835     O01835 SIMILARITY TO XENOPUS CYTOPLASMIC P     4.2        1.5   1
+O01691     O01691 SIMILAR TO A HUMAN PUTATIVE TUMOR S     4.0        1.6   1
+Q23452     Q23452 F07A11.6 (FRAGMENT).                    3.9        1.6   1
+ROAB_ARTSA P80350 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO     3.8        1.7   1
+YN8V_YEAST P53743 HYPOTHETICAL 36.4 KD PROTEIN IN POP     3.2        1.9   1
+Q26279     Q26279 RNA RECOGNITION MOTIF-TYPE RNA-BIND     2.9        2.1   1
+Q18317     Q18317 SIMILAR TO C. ELEGANS PROTEIN C40H1     2.6        2.2   1
+MUD2_YEAST P36084 SPLICING FACTOR MUD2.                   1.9        2.6   1
+Q21559     Q21559 M18.7.                                  1.2        3.1   1
+O29092     O29092 ACYL-COA DEHYDROGENASE, SHORT CHAIN     0.9        3.3   1
+YG3Q_YEAST P39927 HYPOTHETICAL 47.0 KD PROTEIN IN CYS    -0.2        4.3   1
+P70221     P70221 ORF2 PRODUCT (FRAGMENT).               -0.3        4.3   1
+Y051_NPVAC P41455 HYPOTHETICAL 37.5 KD PROTEIN IN LEF    -0.9          5   1
+YHR9_YEAST P38827 HYPOTHETICAL 123.9 KD PROTEIN IN OR    -1.2        5.4   1
+O35847     O35847 ADAPT78.                               -2.7        7.6   1
+HIPO_CAMJE P45493 HIPPURATE HYDROLASE (EC 3.5.1.32) (    -3.7        9.4   1
+Q60701     Q60701 SPLICING FACTOR, ARGININE/SERINE-RI    -3.7        9.5   1
+Q24424     Q24424 RNA BINDING PROTEIN (FRAGMENT).        -3.9         10   1
+BLSA_HUMAN Q02832 B-LYMPHOCYTE ANTIGEN PRECURSOR (B-L    -4.2         11   1
+XE7_HUMAN  Q02040 PROTEIN XE7.                           -4.2         11   1
+YAQ2_SCHPO Q10103 HYPOTHETICAL 108.7 KD PROTEIN C18G6    -6.5         18   1
+O00583     O00583 DOWN SYNDROME CRITICAL REGION 1 PRO    -6.7         19   1
+O13838     O13838 HYPOTHETICAL 40.3 KD PROTEIN.          -7.5         23   1
+O05954     O05954 UDP-MURNAC-TRIPEPTIDE SYNTHETASE.      -7.6         23   1
+YM28_YEAST Q03790 HYPOTHETICAL 52.6 KD PROTEIN IN IMP    -8.1         26   1
+Q19944     Q19944 F31F6.3.                               -8.7         30   1
+Q26274     Q26274 RNA RECOGNITION MOTIF-TYPE RNA-BIND    -8.9         32   1
+O01886     O01886 SIMILARITY TO THE PEPTIDASE FAMILY     -9.2         34   1
+Q19164     Q19164 HYPOTHETICAL PROTEIN F07D3.3.          -9.2         34   1
+ASM4_YEAST Q05166 ASM4 PROTEIN.                          -9.5         37   1
+Y117_HUMAN P42696 HYPOTHETICAL PROTEIN KIAA0117 (HAL8   -10.0         41   1
+O00582     O00582 DOWN SYNDROME CRITICAL REGION 1 PRO   -10.0         41   1
+O28580     O28580 PHOSPHORIBOSYLFORMYLGLYCINAMIDINE C   -10.9         50   1
+O35309     O35309 NMI.                                  -11.2         54   1
+O29837     O29837 SIGNAL-TRANSDUCING HISTIDINE KINASE   -11.7         61   1
+Q47952     Q47952 PRE-HGBA PRECURSOR.                   -12.8         78   1
+Q08646     Q08646 CHROMOSOME XV READING FRAME ORF YOR   -13.0         82   1
+Q47957     Q47957 HEMOGLOBIN-BINDING PROTEIN.           -13.1         84   1
+Q58954     Q58954 HYPOTHETICAL 21.3 KD PROTEIN 1559.    -13.2         86   1
+Q26278     Q26278 RNA RECOGNITION MOTIF-TYPE RNA-BIND   -13.5         92   1
+Q13287     Q13287 HOU.                                  -14.9    1.3e+02   1
+TKTC_METJA Q58092 PUTATIVE TRANSKETOLASE C-TERMINAL S   -15.0    1.3e+02   1
+PR06_YEAST P19735 PRE-MRNA SPLICING FACTOR PRP6.        -15.1    1.3e+02   1
+KHK_HUMAN  P50053 KETOHEXOKINASE (EC 2.7.1.3) (HEPATI   -15.5    1.5e+02   1
+PUR5_METJA Q57656 PROBABLE PHOSPHORIBOSYLFORMYLGLYCIN   -15.8    1.6e+02   1
+O31824     O31824 YNGD PROTEIN.                         -15.9    1.6e+02   1
+Q92518     Q92518 WS-1/TYPE5 (FRAGMENT).                -16.0    1.6e+02   1
+YD33_SCHPO Q10267 HYPOTHETICAL 30.9 KD PROTEIN C13G7.   -16.1    1.7e+02   1
+Q09135     Q09135 HETEROGENEOUS NUCLEAR RIBONUCLEOPRO   -16.1    1.7e+02   1
+O30057     O30057 HYPOTHETICAL 32.2 KD PROTEIN.         -16.3    1.8e+02   1
+Q42378     Q42378 U1 SNRNP 70K TRUNCATED PROTEIN.       -16.7    1.9e+02   1
+Q42482     Q42482 X P.DELTOIDES HYBRID WOUND RESPONSI   -16.8      2e+02   1
+O35002     O35002 PUTATIVE PROTEASE.                    -17.0    2.1e+02   1
+PGDS_RAT   P20786 ALPHA PLATELET-DERIVED GROWTH FACTO   -17.1    2.1e+02   1
+YY08_METJA Q60307 HYPOTHETICAL PROTEIN MJECS08.         -17.2    2.2e+02   1
+Q48827     Q48827 MAJOR OUTER MEMBRANE PROTEIN PRECUR   -17.8    2.5e+02   1
+Q23637     Q23637 ZK856.3.                              -17.9    2.5e+02   1
+Q48639     Q48639 BGLR.                                 -17.9    2.5e+02   1
+VJ01_VACCC P21032 PROTEIN J1.                           -18.3    2.8e+02   1
+HBA_ARAAR  P01996 HEMOGLOBIN ALPHA-A CHAIN.             -18.4    2.8e+02   1
+YAB9_SCHPO Q09809 HYPOTHETICAL 90.9 KD PROTEIN C2G11.   -18.4    2.8e+02   1
+P94393     P94393 HOMOLOGUE OF HYPOTHETICAL PROTEIN H   -18.7    3.1e+02   1
+Q26271     Q26271 RNA RECOGNITION MOTIF-TYPE RNA-BIND   -18.9    3.2e+02   1
+Q96423     Q96423 CYTOCHROME P450.                      -18.9    3.2e+02   1
+O00373     O00373 L1 ELEMENT L1.24 P40.                 -19.3    3.5e+02   1
+YHB0_YEAST P38748 HYPOTHETICAL 67.5 KD PROTEIN IN PRP   -19.4    3.6e+02   1
+VJ01_VACCV P07616 PROTEIN J1 (PROTEIN F7).              -19.4    3.6e+02   1
+SYH_METJA  Q58406 HISTIDYL-TRNA SYNTHETASE (EC 6.1.1.   -19.8    3.9e+02   1
+Q12452     Q12452 ORF YLR100W.                          -20.0    4.1e+02   1
+Q46102     Q46102 CDTC.                                 -20.2    4.3e+02   1
+O17002     O17002 T23B12.7 PROTEIN.                     -20.3    4.4e+02   1
+Q19942     Q19942 F31F6.1.                              -20.3    4.5e+02   1
+PHYA_SOLTU P30733 PHYTOCHROME A.                        -20.4    4.5e+02   1
+P75023     P75023 CARBOXYL-TERMINAL PROTEASE.           -20.4    4.5e+02   1
+SYF_METJA  Q57911 PROBABLE PHENYLALANYL-TRNA SYNTHETA   -20.4    4.6e+02   1
+Y447_METJA Q57889 HYPOTHETICAL PROTEIN MJ0447.          -20.6    4.7e+02   1
+Y383_METJA Q57828 HYPOTHETICAL PROTEIN MJ0383.          -20.6    4.8e+02   1
+Q94172     Q94172 SIMILAR TO EF-HAND CALCIUM BINDING    -20.8      5e+02   1
+CHLL_CHLRE Q00469 PROTOCHLOROPHYLLIDE REDUCTASE IRON-   -20.9      5e+02   1
+Q61283     Q61283 ALPHA-1 ANTITRYPSIN 1-2 PRECURSOR (   -20.9    5.1e+02   1
+A1A2_MOUSE P22599 ALPHA-1 ANTITRYPSIN 2 PRECURSOR (AL   -20.9    5.1e+02   1
+Q00898     Q00898 ALPHA-1 ANTITRYPSIN 1-5 PRECURSOR (   -20.9    5.1e+02   1
+Q00897     Q00897 ALPHA-1 ANTITRYPSIN 1-4 PRECURSOR (   -20.9    5.1e+02   1
+Q85381     Q85381 HOMOLOG OF VACCINIA VIRUS CDS J1R.    -21.0    5.2e+02   1
+VJ01_VARV  P33004 PROTEIN J1.                           -21.0    5.2e+02   1
+O34784     O34784 DNA-BINDING PROTEIN.                  -21.0    5.2e+02   1
+MENE_HAEIN P44565 O-SUCCINYLBENZOIC ACID--COA LIGASE    -21.1    5.3e+02   1
+T2C2_CHVP1 P31117 TYPE II RESTRICTION ENZYME CVIAII (   -21.2    5.4e+02   1
+CARA_BACSU P25993 CARBAMOYL-PHOSPHATE SYNTHASE, PYRIM   -21.2    5.4e+02   1
+SFCA_ECOLI P26616 PROBABLE MALATE OXIDOREDUCTASE (NAD   -21.2    5.5e+02   1
+Q23267     Q23267 PROBABLE CARBOXYLESTERASE ZC376.3 I   -21.5    5.8e+02   1
+DDLA_ECOLI P23844 D-ALANINE--D-ALANINE LIGASE A (EC 6   -21.5    5.8e+02   1
+KLP1_CHLRE P46870 KINESIN-LIKE PROTEIN KLP1.            -21.5    5.8e+02   1
+Q26272     Q26272 RNA RECOGNITION MOTIF-TYPE RNA-BIND   -21.7    6.1e+02   1
+CPC3_RABIT P00182 CYTOCHROME P450 IIC3 (EC 1.14.14.1)   -22.0    6.6e+02   1
+O29409     O29409 CONSERVED HYPOTHETICAL PROTEIN.       -22.0    6.6e+02   1
+O34925     O34925 PURINE NUCLEOSIDE PHOSPHORYLASE.      -22.2    6.9e+02   1
+Q26281     Q26281 RNA RECOGNITION MOTIF-TYPE RNA-BIND   -22.4    7.2e+02   1
+Q25988     Q25988 (CLONE PNM5) ORF (FRAGMENT).          -22.4    7.2e+02   1
+META_ECOLI P07623 HOMOSERINE O-SUCCINYLTRANSFERASE (E   -22.5    7.3e+02   1
+O04614     O04614 SIMILARITY TO NEBULIN.                -22.8    7.8e+02   1
+Q51350     Q51350 PVDS.                                 -22.8    7.9e+02   1
+P95425     P95425 PVDS.                                 -22.8    7.9e+02   1
+P75431     P75431 TYPE 1 RESTRICTION ENZYME.            -22.9      8e+02   1
+O00364     O00364 L1 ELEMENT L1.14 P40.                 -22.9      8e+02   1
+PRE1_STAAU P03857 PLASMID RECOMBINATION ENZYME (MOBIL   -22.9    8.1e+02   1
+Q96515     Q96515 AR192.                                -23.0    8.2e+02   1
+O27761     O27761 PHOSPHATE-BINDING PROTEIN PSTS HOMO   -23.1    8.4e+02   1
+Q51783     Q51783 IRON REGULATED TRANSCRIPTION ACTIVA   -23.2    8.6e+02   1
+Q00896     Q00896 ALPHA-1 ANTITRYPSIN 1-3 PRECURSOR (   -23.4      9e+02   1
+A1A1_MOUSE P07758 ALPHA-1 ANTITRYPSIN 1 PRECURSOR (AL   -23.4      9e+02   1
+Q20445     Q20445 F46B6.3.                              -23.5    9.3e+02   1
+YNC8_CAEEL P34541 HYPOTHETICAL 15.3 KD PROTEIN R05D3.   -23.5    9.3e+02   1
+Q57721     Q57721 HYPOTHETICAL 12.0 KD PROTEIN 0273.    -23.5    9.3e+02   1
+Y556_METJA Q57976 HYPOTHETICAL PROTEIN MJ0556.          -23.7    9.7e+02   1
+O28372     O28372 LSU RIBOSOMAL PROTEIN L19E (RPL19E)   -23.8    9.8e+02   1
+O00376     O00376 L1 ELEMENT L1.33 P40.                 -23.8    9.9e+02   1
+O00374     O00374 L1 ELEMENT L1.25 P40 AND PUTATIVE P   -23.8    9.9e+02   1
+O00371     O00371 L1 ELEMENT L1.21 P40 AND PUTATIVE P   -23.8    9.9e+02   1
+O00361     O00361 L1 ELEMENT L1.8 P40 AND PUTATIVE P1   -23.8    9.9e+02   1
+Q15605     Q15605 ORF1 CODES FOR A 40 KDA PRODUCT.      -23.8    9.9e+02   1
+O00377     O00377 L1 ELEMENT L1.39 P40 AND PUTATIVE P   -23.8    9.9e+02   1
+O00365     O00365 L1 ELEMENT L1.15 P40 AND PUTATIVE P   -23.8    9.9e+02   1
+Q12880     Q12880 RETROTRANSPOSABLE L1 ELEMENT LRE2 F   -23.8    9.9e+02   1
+O00369     O00369 L1 ELEMENT L1.20 P40 AND PUTATIVE P   -23.8    9.9e+02   1
+
+Parsed for domains:
+Sequence   Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
+--------   ------- ----- -----    ----- -----      -----  -------
+Q91581       1/1      18    89 ..     1    77 []   119.7    2e-31
+CST2_HUMAN   1/1      18    89 ..     1    77 []   119.7    2e-31
+Q61413       1/1       8    79 ..     1    77 []   114.2  8.9e-30
+Q14011       1/1       8    79 ..     1    77 []   114.2  9.1e-30
+P93486       1/1      38   109 ..     1    77 []   114.1  9.6e-30
+GRP_DAUCA    1/1       8    79 ..     1    77 []   113.4  1.6e-29
+Q14498       2/3     252   323 ..     1    77 []   113.3  1.7e-29
+Q14499       2/3     252   323 ..     1    77 []   113.3  1.7e-29
+GR10_BRANA   1/1       8    79 ..     1    77 []   113.0    2e-29
+ROG_HUMAN    1/1      10    81 ..     1    77 []   113.0    2e-29
+Q15097       3/4     168   238 ..     1    77 []   111.8  4.5e-29
+Q93004       3/4     193   263 ..     1    77 []   111.8  4.5e-29
+PABP_MOUSE   3/4     193   263 ..     1    77 []   111.2  7.2e-29
+O24106       1/1       8    79 ..     1    77 []   110.9  8.9e-29
+Q40426       1/1       8    79 ..     1    77 []   110.4  1.2e-28
+O04070       1/1       9    80 ..     1    77 []   110.3  1.3e-28
+Q41518       1/1       8    79 ..     1    77 []   110.3  1.3e-28
+O24601       1/1      10    81 ..     1    77 []   110.2  1.4e-28
+O24188       1/1      39   110 ..     1    77 []   110.2  1.4e-28
+Q39105       1/1      18    89 ..     1    77 []   109.8  1.8e-28
+RO28_SPIOL   2/2     151   222 ..     1    77 []   109.5  2.2e-28
+O22173       3/4     227   297 ..     1    77 []   109.4  2.5e-28
+PAB2_ARATH   3/4     217   287 ..     1    77 []   109.1    3e-28
+RNPL_HUMAN   1/1       8    79 ..     1    77 []   108.8  3.6e-28
+GRP1_SINAL   1/1      10    81 ..     1    77 []   108.6  4.4e-28
+Q40052       1/1       8    79 ..     1    77 []   108.6  4.4e-28
+Q40270       2/2     207   278 ..     1    77 []   108.1  5.9e-28
+Q15164       1/2       4    74 ..     1    77 []   108.1  5.9e-28
+Q13310       3/4     193   263 ..     1    77 []   108.1  5.9e-28
+O23793       1/1      40   111 ..     1    77 []   108.1  6.1e-28
+Q40437       1/1      40   111 ..     1    77 []   108.1  6.1e-28
+Q41834       2/2     221   292 ..     1    77 []   108.0  6.3e-28
+GRP8_ARATH   1/1       8    79 ..     1    77 []   107.9  7.1e-28
+GRP2_SINAL   1/1      10    81 ..     1    77 []   107.8  7.3e-28
+TIA1_MOUSE   2/3     108   179 ..     1    77 []   107.7  8.2e-28
+O24306       2/2     208   279 ..     1    77 []   107.5  9.4e-28
+RO31_NICSY   2/2     232   303 ..     1    77 []   107.5  9.4e-28
+P92871       2/2     232   303 ..     1    77 []   107.4  9.7e-28
+Q39209       2/2     227   298 ..     1    77 []   107.4  9.7e-28
+Q43350       2/2     231   302 ..     1    77 []   107.4  9.7e-28
+RO31_ARATH   2/2     246   317 ..     1    77 []   107.4  9.7e-28
+P93616       3/4     213   283 ..     1    77 []   106.9  1.4e-27
+Q40425       1/1       8    79 ..     1    77 []   106.7  1.6e-27
+GRP2_SORVU   1/1      10    81 ..     1    77 []   106.5  1.8e-27
+P87135       3/4     263   333 ..     1    77 []   106.5  1.9e-27
+PABP_SCHPO   3/4     249   319 ..     1    77 []   106.5  1.9e-27
+RT19_ARATH   1/1      33   104 ..     1    77 []   106.4  1.9e-27
+O35479       1/1      10    81 ..     1    77 []   106.2  2.2e-27
+Q40427       1/1       8    79 ..     1    77 []   106.0  2.6e-27
+Q41453       1/1       8    79 ..     1    77 []   105.6  3.4e-27
+O24187       1/1      10    81 ..     1    77 []   105.2  4.4e-27
+O22390       1/1      10    81 ..     1    77 []   105.2  4.6e-27
+GRP7_ARATH   1/1      10    81 ..     1    77 []   105.2  4.6e-27
+TIA1_HUMAN   2/3      97   168 ..     1    77 []   104.9  5.5e-27
+Q08935       2/2     190   261 ..     1    77 []   104.7  6.3e-27
+RO30_NICPL   2/2     196   267 ..     1    77 []   104.7  6.3e-27
+Q39953       3/4     206   276 ..     1    77 []   104.0    1e-26
+Q39061       1/2     118   189 ..     1    77 []   103.7  1.3e-26
+Q39062       1/2     110   181 ..     1    77 []   103.7  1.3e-26
+Q40436       1/1      42   113 ..     1    77 []   103.7  1.3e-26
+GRPA_MAIZE   1/1      10    81 ..     1    77 []   103.7  1.3e-26
+O22385       1/1      10    81 ..     1    77 []   103.6  1.3e-26
+RO28_NICSY   2/2     193   264 ..     1    77 []   103.6  1.4e-26
+GAR2_SCHPO   2/2     368   438 ..     1    77 []   103.2  1.8e-26
+O13707       2/2     368   438 ..     1    77 []   103.2  1.8e-26
+O23798       2/2     211   282 ..     1    77 []   102.9  2.1e-26
+Q42412       1/1       8    79 ..     1    77 []   102.8  2.3e-26
+YIS1_YEAST   1/1      66   136 ..     1    77 []   102.7  2.5e-26
+O22653       1/1       9    80 ..     1    77 []   102.4  3.2e-26
+P90699       1/1       5    76 ..     1    77 []   102.1  3.9e-26
+Q15815       1/1     120   191 ..     1    77 []   101.9  4.3e-26
+Q64283       1/1     120   191 ..     1    77 []   101.9  4.3e-26
+RO33_NICSY   1/2     116   187 ..     1    77 []   101.8  4.8e-26
+Q08948       1/2     111   182 ..     1    77 []   101.8  4.8e-26
+Q08940       1/2     105   176 ..     1    77 []   101.8  4.8e-26
+O22703       1/1      14    85 ..     1    77 []   101.6  5.6e-26
+O22384       1/1      10    81 ..     1    77 []   101.4  6.1e-26
+Q43472       1/1       8    79 ..     1    77 []   101.4  6.4e-26
+Q62029       3/4     193   263 ..     1    77 []   100.9  8.7e-26
+Q09959       2/3     177   248 ..     1    77 []   100.8  9.7e-26
+Q15376       1/1      10    80 ..     1    77 []   100.2  1.4e-25
+Q15414       1/1      10    80 ..     1    77 []   100.2  1.4e-25
+Q24474       1/3     112   183 ..     1    77 []   100.1  1.5e-25
+Q26293       1/3     112   183 ..     1    77 []   100.1  1.5e-25
+Q24473       1/3     112   183 ..     1    77 []   100.1  1.5e-25
+O24184       1/1      10    81 ..     1    77 []    99.9  1.8e-25
+PABP_YEAST   2/4     127   197 ..     1    77 []    99.8  1.9e-25
+Q92227       3/4     225   295 ..     1    77 []    99.6  2.2e-25
+SC35_CHICK   1/1      16    87 ..     1    77 []    99.6  2.2e-25
+Q62093       1/1      16    87 ..     1    77 []    99.6  2.2e-25
+NSR1_YEAST   1/2     170   241 ..     1    77 []    99.5  2.3e-25
+NSR1_YEAST   2/2     269   340 ..     1    77 []    99.5  2.3e-25
+Q44555       1/1       3    74 ..     1    77 []    99.5  2.3e-25
+PABP_HUMAN   1/4      13    84 ..     1    77 []    99.4  2.6e-25
+Q93004       1/4      13    84 ..     1    77 []    99.4  2.6e-25
+PABP_MOUSE   1/4      13    84 ..     1    77 []    99.4  2.6e-25
+Q15415       1/1      10    80 ..     1    77 []    99.3  2.6e-25
+TIA1_HUMAN   1/3       9    78 ..     1    77 []    99.2  2.8e-25
+TIA1_MOUSE   1/3       9    78 ..     1    77 []    99.2  2.8e-25
+Q13235       1/3      41   112 ..     1    77 []    99.2  2.8e-25
+Q12926       1/3      41   112 ..     1    77 []    99.2  2.8e-25
+Q60899       1/3      41   112 ..     1    77 []    99.2  2.8e-25
+RO31_NICPL   2/2     210   281 ..     1    77 []    98.9  3.5e-25
+RN15_YEAST   1/1      20    91 ..     1    77 []    98.9  3.7e-25
+RO31_NICSY   1/2     138   209 ..     1    77 []    98.8  3.8e-25
+Q40270       1/2     113   184 ..     1    77 []    98.7    4e-25
+O17309       1/2      97   168 ..     1    77 []    98.6  4.3e-25
+O17310       1/2     104   175 ..     1    77 []    98.6  4.3e-25
+PABP_XENLA   1/4      13    84 ..     1    77 []    98.5  4.7e-25
+TIAR_HUMAN   2/3      99   170 ..     1    77 []    98.5  4.8e-25
+O15187       1/2      60   131 ..     1    77 []    98.5  4.8e-25
+TIAR_MOUSE   2/3     116   187 ..     1    77 []    98.5  4.8e-25
+PAB2_ARATH   2/4     126   196 ..     1    77 []    98.3  5.4e-25
+Q08937       2/2     209   280 ..     1    77 []    98.3  5.4e-25
+Q13310       1/4      13    84 ..     1    77 []    98.3  5.5e-25
+Q91584       1/3      36   107 ..     1    77 []    98.1  6.1e-25
+P79736       1/3      40   111 ..     1    77 []    98.1  6.1e-25
+PES4_YEAST   1/4      93   164 ..     1    77 []    97.8  7.8e-25
+ELAV_DROME   3/3     404   475 ..     1    77 []    97.5  9.4e-25
+ELAV_DROVI   3/3     440   511 ..     1    77 []    97.5  9.4e-25
+PAB2_ARATH   4/4     320   390 ..     1    77 []    97.5  9.6e-25
+Q60900       1/3      41   112 ..     1    77 []    97.3    1e-24
+Q16135       1/3      32   103 ..     1    77 []    97.3    1e-24
+Q41367       1/2      40   111 ..     1    77 []    96.5  1.9e-24
+RO28_NICSY   1/2      99   170 ..     1    77 []    96.4    2e-24
+SQD_DROME    1/2      58   128 ..     1    77 []    96.3  2.1e-24
+RO28_SPIOL   1/2      57   128 ..     1    77 []    96.2  2.3e-24
+HUD_HUMAN    1/3      48   119 ..     1    77 []    96.2  2.3e-24
+HUD_MOUSE    1/3      53   124 ..     1    77 []    96.2  2.3e-24
+HUD_RAT      1/3      41   112 ..     1    77 []    96.2  2.3e-24
+Q91585       1/3      48   119 ..     1    77 []    96.2  2.3e-24
+Q90409       1/3      46   117 ..     1    77 []    96.2  2.3e-24
+PUB1_YEAST   2/3     163   234 ..     1    77 []    95.7  3.2e-24
+P93616       4/4     316   386 ..     1    77 []    95.6  3.6e-24
+P92871       1/2     138   209 ..     1    77 []    95.4  4.1e-24
+RO31_ARATH   1/2     152   223 ..     1    77 []    95.4  4.1e-24
+Q39209       1/2     133   204 ..     1    77 []    95.4  4.1e-24
+Q43350       1/2     137   208 ..     1    77 []    95.4  4.1e-24
+PABP_XENLA   3/4     193   263 ..     1    77 []    95.3  4.2e-24
+PABP_DROME   3/4     183   254 ..     1    77 []    95.3  4.4e-24
+Q24668       1/2     121   192 ..     1    77 []    95.2  4.5e-24
+SXLF_DROME   1/2     127   198 ..     1    77 []    95.2  4.5e-24
+Q99141       1/2     119   190 ..     1    77 []    95.2  4.5e-24
+Q39568       1/2      13    83 ..     1    77 []    95.1  4.9e-24
+O25501       1/1       4    74 ..     1    77 []    95.0  5.3e-24
+Q19706       1/1     178   249 ..     1    77 []    95.0  5.3e-24
+Q13235       3/3     265   336 ..     1    77 []    94.8    6e-24
+Q12926       3/3     278   349 ..     1    77 []    94.8    6e-24
+PABP_YEAST   3/4     220   290 ..     1    77 []    94.8  6.2e-24
+Q43349       2/2     251   322 ..     1    77 []    94.6    7e-24
+Q43349       1/2     101   172 ..     1    77 []    94.5  7.6e-24
+Q44560       1/1       3    74 ..     1    77 []    94.5  7.7e-24
+Q13310       4/4     296   365 ..     1    77 []    94.2  9.5e-24
+Q15164       2/2     107   176 ..     1    77 []    94.2  9.5e-24
+Q14100       1/2      11    81 ..     1    77 []    93.9  1.1e-23
+Q12771       1/2      78   148 ..     1    77 []    93.9  1.1e-23
+Q01858       1/2      78   148 ..     1    77 []    93.9  1.1e-23
+Q14101       1/2      99   169 ..     1    77 []    93.9  1.1e-23
+Q60668       1/2      68   138 ..     1    77 []    93.9  1.1e-23
+Q14103       1/2      99   169 ..     1    77 []    93.9  1.1e-23
+Q14102       1/2       8    78 ..     1    77 []    93.9  1.1e-23
+Q91584       3/3     267   338 ..     1    77 []    93.9  1.2e-23
+P79736       3/3     264   335 ..     1    77 []    93.9  1.2e-23
+Q90408       1/1     102   173 ..     1    77 []    93.9  1.2e-23
+Q91583       3/3     308   379 ..     1    77 []    93.8  1.2e-23
+Q60899       3/3     279   350 ..     1    77 []    93.8  1.2e-23
+Q91903       3/3     308   379 ..     1    77 []    93.8  1.2e-23
+Q08374       1/1       3    74 ..     1    77 []    93.7  1.3e-23
+Q08935       1/2      89   160 ..     1    77 []    93.7  1.3e-23
+Q08212       2/3      97   168 ..     1    77 []    93.7  1.3e-23
+Q20084       1/3      44   115 ..     1    77 []    93.6  1.4e-23
+Q13595       1/1     121   192 ..     1    77 []    93.5  1.5e-23
+O22173       2/4     136   206 ..     1    77 []    93.4  1.6e-23
+Q41367       2/2     137   208 ..     1    77 []    93.3  1.7e-23
+Q93194       1/2      29   100 ..     1    77 []    93.1    2e-23
+Q91585       3/3     285   356 ..     1    77 []    93.1    2e-23
+Q90409       3/3     286   357 ..     1    77 []    93.1    2e-23
+HUD_RAT      3/3     292   363 ..     1    77 []    93.1    2e-23
+HUD_HUMAN    3/3     299   370 ..     1    77 []    93.1    2e-23
+HUD_MOUSE    3/3     304   375 ..     1    77 []    93.1    2e-23
+PAB5_ARATH   3/4     227   297 ..     1    77 []    93.0  2.1e-23
+O01671       1/2      66   137 ..     1    77 []    93.0  2.1e-23
+Q60990       1/1      10    81 ..     1    77 []    92.9  2.3e-23
+O23798       1/2     117   188 ..     1    77 []    92.9  2.3e-23
+TRA2_DROME   1/1      99   170 ..     1    77 []    92.8  2.4e-23
+Q14576       1/3      41   112 ..     1    77 []    92.8  2.4e-23
+Q41124       2/2     206   277 ..     1    77 []    92.7  2.5e-23
+PABP_DROME   1/4       4    75 ..     1    77 []    92.7  2.5e-23
+Q19579       4/4     345   415 ..     1    77 []    92.6  2.9e-23
+Q19581       4/4     345   415 ..     1    77 []    92.6  2.9e-23
+Q13310       2/4     101   170 ..     1    77 []    92.5  3.1e-23
+NOP4_YEAST   1/4      28    98 ..     1    77 []    92.4  3.2e-23
+P70807       1/1       3    74 ..     1    77 []    92.3  3.4e-23
+Q24473       3/3     363   434 ..     1    77 []    92.2  3.6e-23
+Q26293       3/3     363   434 ..     1    77 []    92.2  3.6e-23
+Q24474       3/3     358   429 ..     1    77 []    92.2  3.6e-23
+Q60901       2/2     194   265 ..     1    77 []    92.2  3.7e-23
+Q60900       3/3     286   357 ..     1    77 []    92.2  3.7e-23
+SP49_HUMAN   2/2     102   174 ..     1    77 []    92.1  3.8e-23
+PAB2_ARATH   1/4      38   109 ..     1    77 []    92.1  3.9e-23
+PABP_YEAST   4/4     323   393 ..     1    77 []    92.0  4.2e-23
+ROA1_MOUSE   2/2     106   176 ..     1    77 []    92.0  4.2e-23
+ROA1_BOVIN   2/2     106   176 ..     1    77 []    92.0  4.2e-23
+ROA1_HUMAN   2/2     106   176 ..     1    77 []    92.0  4.2e-23
+ROA1_RAT     2/2     106   176 ..     1    77 []    92.0  4.2e-23
+P70372       1/3      22    93 ..     1    77 []    91.9  4.7e-23
+PABP_XENLA   2/4     101   170 ..     1    77 []    91.8  4.8e-23
+Q15097       2/4      76   145 ..     1    77 []    91.8    5e-23
+PABP_MOUSE   2/4     101   170 ..     1    77 []    91.8    5e-23
+PABP_HUMAN   2/4     101   170 ..     1    77 []    91.8    5e-23
+Q93004       2/4     101   170 ..     1    77 []    91.8    5e-23
+P73557       1/1       3    74 ..     1    77 []    91.8    5e-23
+P87135       1/4      82   153 ..     1    77 []    91.7  5.2e-23
+PABP_SCHPO   1/4      68   139 ..     1    77 []    91.7  5.2e-23
+HRB1_YEAST   1/3     138   207 ..     1    77 []    91.7  5.3e-23
+Q41834       1/2     127   198 ..     1    77 []    91.7  5.4e-23
+Q62029       1/4      13    84 ..     1    77 []    91.6  5.5e-23
+Q15717       1/3      22    93 ..     1    77 []    91.5  6.1e-23
+Q99628       1/3     113   184 ..     1    77 []    91.2  7.5e-23
+PABP_HUMAN   3/4     193   260 ..     1    77 []    91.2  7.6e-23
+CABA_MOUSE   1/2      77   147 ..     1    77 []    90.8  9.4e-23
+O35698       1/1      10    81 ..     1    77 []    90.8    1e-22
+O22173       4/4     330   400 ..     1    77 []    90.7    1e-22
+Q91582       3/3     246   317 ..     1    77 []    90.6  1.1e-22
+O15414       1/1     275   346 ..     1    77 []    90.5  1.2e-22
+Q91582       1/3      22    93 ..     1    77 []    90.3  1.3e-22
+Q44556       1/1       3    74 ..     1    77 []    90.3  1.4e-22
+PAB5_ARATH   4/4     330   400 ..     1    77 []    90.3  1.4e-22
+Q14576       3/3     278   349 ..     1    77 []    90.0  1.6e-22
+O02008       1/1     107   178 ..     1    77 []    90.0  1.7e-22
+O02009       1/1      60   131 ..     1    77 []    90.0  1.7e-22
+Q39675       1/2     114   185 ..     1    77 []    89.9  1.8e-22
+Q62029       2/4     101   170 ..     1    77 []    89.8  1.9e-22
+Q08937       1/2      89   160 ..     1    77 []    89.8    2e-22
+Q23795       1/2      42   112 ..     1    77 []    89.8    2e-22
+YDC1_SCHPO   1/1     103   174 ..     1    77 []    89.7  2.1e-22
+O24306       1/2     111   182 ..     1    77 []    89.7  2.1e-22
+PABP_SCHPO   2/4     156   226 ..     1    77 []    89.6  2.2e-22
+P87135       2/4     170   240 ..     1    77 []    89.6  2.2e-22
+RO31_NICPL   1/2      90   161 ..     1    77 []    89.6  2.3e-22
+ROA1_MACMU   2/2     106   176 ..     1    77 []    89.6  2.3e-22
+PABP_SCHPO   4/4     352   422 ..     1    77 []    89.5  2.4e-22
+P87135       4/4     366   436 ..     1    77 []    89.5  2.4e-22
+Q99729       1/2      70   140 ..     1    77 []    89.4  2.6e-22
+Q04150       1/2      70   140 ..     1    77 []    89.4  2.6e-22
+Q44554       1/1       3    74 ..     1    77 []    89.3  2.8e-22
+NAM8_YEAST   2/3     165   237 ..     1    77 []    89.3  2.8e-22
+SC35_HUMAN   1/1      16    87 ..     1    77 []    89.1  3.1e-22
+YIS5_YEAST   1/1      33   104 ..     1    77 []    88.9  3.6e-22
+PABP_DROME   2/4      92   162 ..     1    77 []    88.7  4.2e-22
+Q17350       4/4     319   389 ..     1    77 []    88.5  4.6e-22
+GBP2_YEAST   1/3     124   193 ..     1    77 []    88.5  4.7e-22
+TIAR_HUMAN   1/3      11    80 ..     1    77 []    88.4    5e-22
+Q41810       1/1      10    81 ..     1    77 []    88.2  6.1e-22
+Q91583       1/3      68   140 ..     1    77 []    88.1  6.5e-22
+ROA1_XENLA   2/2     107   177 ..     1    77 []    87.8    8e-22
+Q46349       1/1       3    74 ..     1    77 []    87.8    8e-22
+P93616       1/4      34   105 ..     1    77 []    87.7  8.4e-22
+Q08948       2/2     214   285 ..     1    77 []    87.6  8.7e-22
+Q15097       4/4     271   340 ..     1    77 []    87.5  9.7e-22
+Q93004       4/4     296   365 ..     1    77 []    87.5  9.7e-22
+PABP_MOUSE   4/4     296   365 ..     1    77 []    87.5  9.7e-22
+PABP_HUMAN   4/4     293   362 ..     1    77 []    87.5  9.7e-22
+Q91579       3/3     379   450 ..     1    77 []    87.4    1e-21
+PAB5_ARATH   2/4     134   204 ..     1    77 []    87.4    1e-21
+Q53322       1/1       2    73 ..     1    77 []    87.2  1.1e-21
+Q15717       3/3     246   317 ..     1    77 []    87.2  1.2e-21
+RO33_NICSY   2/2     219   290 ..     1    77 []    87.0  1.3e-21
+Q55343       1/1       3    74 ..     1    77 []    86.9  1.4e-21
+Q99377       1/1     105   176 ..     1    77 []    86.9  1.5e-21
+Q62376       1/1      35   106 ..     1    77 []    86.9  1.5e-21
+P78493       1/1     105   176 ..     1    77 []    86.9  1.5e-21
+RU17_HUMAN   1/1     282   353 ..     1    77 []    86.9  1.5e-21
+Q39062       2/2     213   284 ..     1    77 []    86.7  1.6e-21
+Q39061       2/2     221   292 ..     1    77 []    86.7  1.6e-21
+O13620       2/5     325   396 ..     1    77 []    86.7  1.7e-21
+Q15584       2/2     495   564 ..     1    77 []    86.5  1.9e-21
+ROM_HUMAN    3/3     655   724 ..     1    77 []    86.5  1.9e-21
+GBP2_YEAST   2/3     221   291 ..     1    77 []    86.5  1.9e-21
+Q91920       1/2      23    93 ..     1    77 []    86.5    2e-21
+Q57014       1/1       3    74 ..     1    77 []    86.1  2.5e-21
+Q39953       1/4      28    99 ..     1    77 []    86.0  2.7e-21
+RU17_DROME   1/1     104   175 ..     1    77 []    85.8  3.1e-21
+Q39953       2/4     116   186 ..     1    77 []    85.8  3.2e-21
+RO30_NICPL   1/2      89   160 ..     1    77 []    85.5  3.9e-21
+Q91903       1/3      68   140 ..     1    77 []    85.5  3.9e-21
+SR55_DROME   1/2       5    68 ..     1    77 []    85.5    4e-21
+Q24252       1/2       6    69 ..     1    77 []    85.5    4e-21
+Q24534       2/2     121   193 ..     1    77 []    85.4    4e-21
+PABP_YEAST   1/4      39   110 ..     1    77 []    85.4    4e-21
+Q23796       1/2       6    69 ..     1    77 []    85.4  4.1e-21
+PUB1_YEAST   1/3      76   146 ..     1    77 []    85.4  4.1e-21
+NAB4_YEAST   1/2     161   230 ..     1    77 []    85.4  4.2e-21
+ROA1_RAT     1/2      15    85 ..     1    77 []    85.1  4.9e-21
+ROA1_BOVIN   1/2      15    85 ..     1    77 []    85.1  4.9e-21
+ROA1_MOUSE   1/2      15    85 ..     1    77 []    85.1  4.9e-21
+ROA1_HUMAN   1/2      15    85 ..     1    77 []    85.1  4.9e-21
+Q90602       1/2      92   162 ..     1    77 []    85.1    5e-21
+PAB5_ARATH   1/4      47   117 ..     1    77 []    85.0  5.6e-21
+O13707       1/2     265   336 ..     1    77 []    84.7  6.5e-21
+GAR2_SCHPO   1/2     265   336 ..     1    77 []    84.7  6.5e-21
+Q13242       1/2      16    84 ..     1    77 []    84.6  7.3e-21
+Q90407       2/2     165   236 ..     1    77 []    84.5  7.7e-21
+Q90602       2/2     176   246 ..     1    77 []    84.4    8e-21
+O04240       2/2     210   281 ..     1    77 []    84.3  8.7e-21
+Q60668       2/2     153   225 ..     1    77 []    84.3    9e-21
+Q92879       3/3     399   470 ..     1    77 []    84.1    1e-20
+Q17350       2/4     122   192 ..     1    77 []    84.0    1e-20
+O22851       1/1      60   131 ..     1    77 []    83.9  1.2e-20
+O17310       2/2     190   263 ..     1    77 []    83.9  1.2e-20
+O17309       2/2     183   256 ..     1    77 []    83.9  1.2e-20
+O00320       1/1     242   313 ..     1    77 []    83.9  1.2e-20
+YP85_CAEEL   2/2     102   174 ..     1    77 []    83.8  1.2e-20
+Q17352       2/2     112   184 ..     1    77 []    83.8  1.2e-20
+P93843       3/3     297   362 ..     1    77 []    83.7  1.3e-20
+P70372       3/3     246   317 ..     1    77 []    83.7  1.4e-20
+O22314       1/2       9    77 ..     1    77 []    83.6  1.4e-20
+Q39201       1/2       9    77 ..     1    77 []    83.6  1.4e-20
+O22315       1/2       9    77 ..     1    77 []    83.6  1.4e-20
+Q61474       1/2      22    92 ..     1    77 []    83.6  1.5e-20
+NGR1_YEAST   2/3     194   266 ..     1    77 []    83.5  1.5e-20
+RO21_XENLA   2/2     102   172 ..     1    77 []    83.5  1.6e-20
+RU17_XENLA   1/1     105   179 ..     1    77 []    83.4  1.7e-20
+Q94467       1/1     202   272 ..     1    77 []    83.3  1.7e-20
+CABA_MOUSE   2/2     161   231 ..     1    77 []    83.3  1.7e-20
+Q90626       2/2     159   229 ..     1    77 []    83.1    2e-20
+Q17350       1/4      34   105 ..     1    77 []    83.0  2.2e-20
+PABP_XENLA   4/4     296   365 ..     1    77 []    82.9  2.2e-20
+O22173       1/4      48   119 ..     1    77 []    82.9  2.2e-20
+O08831       1/1      12    78 ..     1    77 []    82.8  2.5e-20
+X16_HUMAN    1/1      12    78 ..     1    77 []    82.8  2.5e-20
+Q62029       4/4     296   365 ..     1    77 []    82.6  2.8e-20
+RO22_XENLA   2/2     102   172 ..     1    77 []    82.6  2.8e-20
+NOP3_YEAST   1/2     127   190 ..     1    77 []    82.6  2.9e-20
+U2AF_SCHPO   1/2     312   383 ..     1    77 []    82.6    3e-20
+Q17385       3/3     501   572 ..     1    77 []    82.5    3e-20
+P91414       1/1     335   406 ..     1    77 []    82.5    3e-20
+MSSP_HUMAN   1/2      31   102 ..     1    77 []    82.5    3e-20
+Q42404       1/1     140   211 ..     1    77 []    82.4  3.2e-20
+O23288       2/2     107   177 ..     1    77 []    82.1  4.1e-20
+Q91808       1/2      22    92 ..     1    77 []    82.0  4.3e-20
+Q91807       1/2      22    92 ..     1    77 []    82.0  4.3e-20
+O18409       3/3     726   797 ..     1    77 []    82.0  4.3e-20
+O02374       3/3     522   593 ..     1    77 []    82.0  4.3e-20
+P70055       3/3     455   526 ..     1    77 []    82.0  4.4e-20
+O14979       1/2       1    71 [.     1    77 []    81.9  4.8e-20
+Q24668       2/2     207   280 ..     1    77 []    81.8    5e-20
+SXLF_DROME   2/2     213   286 ..     1    77 []    81.8    5e-20
+Q99141       2/2     205   278 ..     1    77 []    81.8    5e-20
+Q14100       2/2      96   168 ..     1    77 []    81.7  5.2e-20
+Q14103       2/2     184   256 ..     1    77 []    81.7  5.2e-20
+Q12771       2/2     163   235 ..     1    77 []    81.7  5.2e-20
+ROC_RAT      1/1      36   108 ..     1    77 []    81.7  5.2e-20
+Q14102       2/2      93   165 ..     1    77 []    81.7  5.2e-20
+Q14101       2/2     184   256 ..     1    77 []    81.7  5.2e-20
+Q55345       1/1       3    74 ..     1    77 []    81.7  5.4e-20
+YDB2_SCHPO   1/1      32   102 ..     1    77 []    81.7  5.4e-20
+SP49_HUMAN   1/2      15    86 ..     1    77 []    81.7  5.5e-20
+Q19579       1/4      59   130 ..     1    77 []    81.7  5.5e-20
+Q19581       1/4      59   130 ..     1    77 []    81.7  5.5e-20
+Q60690       2/2     360   429 ..     1    77 []    81.6  5.6e-20
+TIA1_HUMAN   3/3     205   270 ..     1    77 []    81.6  5.8e-20
+ROA2_HUMAN   2/2     114   184 ..     1    77 []    81.6  5.9e-20
+O18352       1/1     728   798 ..     1    77 []    81.5  6.2e-20
+ROA1_MACMU   1/2      15    85 ..     1    77 []    81.4  6.4e-20
+Q26658       2/2     101   171 ..     1    77 []    81.4  6.5e-20
+Q18318       1/1      40   111 ..     1    77 []    81.4  6.7e-20
+Q16135       3/3     269   340 ..     1    77 []    81.4  6.7e-20
+Q19581       3/4     240   310 ..     1    77 []    81.4  6.8e-20
+Q19579       3/4     240   310 ..     1    77 []    81.4  6.8e-20
+P93396       1/1      68   139 ..     1    77 []    81.4  6.8e-20
+O22791       2/2     112   182 ..     1    77 []    81.3  7.2e-20
+TIA1_MOUSE   3/3     216   281 ..     1    77 []    81.2  7.4e-20
+Q00880       2/2     358   429 ..     1    77 []    81.2  7.5e-20
+O23212       2/2     358   429 ..     1    77 []    80.9  9.4e-20
+Q92227       1/4      44   115 ..     1    77 []    80.9  9.6e-20
+Q09511       1/1      21    92 ..     1    77 []    80.8  9.8e-20
+Q15351       1/1      27    97 ..     1    77 []    80.8    1e-19
+Q15350       1/1      26    96 ..     1    77 []    80.8    1e-19
+U2AF_MOUSE   2/3     261   332 ..     1    77 []    80.6  1.1e-19
+U2AF_HUMAN   2/3     261   332 ..     1    77 []    80.6  1.1e-19
+Q99628       2/3     210   281 ..     1    77 []    80.6  1.2e-19
+Q60399       1/1      52   120 ..     1    77 []    80.5  1.3e-19
+NUCL_MOUSE   4/4     570   638 ..     1    77 []    80.5  1.3e-19
+NUCL_MESAU   4/4     573   641 ..     1    77 []    80.5  1.3e-19
+NUCL_RAT     4/4     576   644 ..     1    77 []    80.5  1.3e-19
+Q17430       2/2     685   755 ..     1    77 []    80.5  1.3e-19
+Q55342       1/1       3    74 ..     1    77 []    80.3  1.4e-19
+Q91583       2/3     155   226 ..     1    77 []    80.3  1.4e-19
+Q91903       2/3     155   226 ..     1    77 []    80.3  1.4e-19
+Q62176       1/1      34   104 ..     1    77 []    80.2  1.5e-19
+Q21832       1/1      57   128 ..     1    77 []    80.2  1.5e-19
+YHH5_YEAST   3/3     315   384 ..     1    77 []    80.1  1.6e-19
+HUD_MOUSE    2/3     139   210 ..     1    77 []    79.9  1.9e-19
+HUD_RAT      2/3     127   198 ..     1    77 []    79.9  1.9e-19
+HUD_HUMAN    2/3     134   205 ..     1    77 []    79.9  1.9e-19
+RO32_XENLA   2/2     120   190 ..     1    77 []    79.7  2.1e-19
+Q20084       3/3     376   446 ..     1    77 []    79.7  2.2e-19
+Q41042       1/2     357   426 ..     1    77 []    79.6  2.3e-19
+Q16629       1/1      13    79 ..     1    77 []    79.6  2.4e-19
+Q17350       3/4     215   284 ..     1    77 []    79.6  2.4e-19
+NUCL_CHICK   4/4     555   623 ..     1    77 []    79.5  2.4e-19
+PES4_YEAST   3/4     305   374 ..     1    77 []    79.4  2.6e-19
+Q19335       2/2     477   548 ..     1    77 []    79.4  2.6e-19
+GBP2_YEAST   3/3     351   421 ..     1    77 []    79.4  2.7e-19
+Q00880       1/2     253   324 ..     1    77 []    79.3  2.8e-19
+RB27_DROME   1/2       9    79 ..     1    77 []    79.3  2.8e-19
+RNP1_YEAST   1/1      37   109 ..     1    77 []    79.0  3.5e-19
+YQOC_CAEEL   1/1      63   133 ..     1    77 []    79.0  3.6e-19
+Q92950       3/3     407   478 ..     1    77 []    79.0  3.6e-19
+SP33_HUMAN   1/2      17    85 ..     1    77 []    78.9  3.7e-19
+Q13809       1/2      18    86 ..     1    77 []    78.9  3.7e-19
+SQD_DROME    2/2     138   208 ..     1    77 []    78.8  3.9e-19
+Q13243       1/2       6    69 ..     1    77 []    78.8  3.9e-19
+O35326       1/2       6    69 ..     1    77 []    78.8  3.9e-19
+CL4_RAT      1/2       6    69 ..     1    77 []    78.8  3.9e-19
+Q16662       1/1       6    69 ..     1    77 []    78.8  3.9e-19
+O14979       2/2      86   158 ..     1    77 []    78.8    4e-19
+P78795       1/1      51   122 ..     1    77 []    78.8    4e-19
+HRB1_YEAST   3/3     353   423 ..     1    77 []    78.8  4.1e-19
+Q13151       2/2     100   170 ..     1    77 []    78.7  4.3e-19
+Q17201       2/2     125   195 ..     1    77 []    78.6  4.4e-19
+Q17200       2/2     125   195 ..     1    77 []    78.6  4.4e-19
+YQO4_CAEEL   1/1     103   173 ..     1    77 []    78.5    5e-19
+Q91585       2/3     134   205 ..     1    77 []    78.4  5.1e-19
+Q53321       1/1       2    73 ..     1    77 []    78.4  5.4e-19
+Q55341       1/1       3    74 ..     1    77 []    78.4  5.4e-19
+Q93194       2/2     131   196 ..     1    77 []    78.1  6.5e-19
+Q27335       1/4      11    81 ..     1    77 []    78.0  6.9e-19
+O14801       1/1     241   312 ..     1    77 []    78.0    7e-19
+NGR1_YEAST   3/3     362   427 ..     1    77 []    78.0  7.1e-19
+EWS_MOUSE    1/1     362   441 ..     1    77 []    77.9  7.2e-19
+Q18409       1/1       5    71 ..     1    77 []    77.9  7.7e-19
+Q01858       2/2     163   235 ..     1    77 []    77.7  8.7e-19
+Q39568       2/2     145   215 ..     1    77 []    77.6  8.9e-19
+P93616       2/4     122   192 ..     1    77 []    77.6  9.5e-19
+Q19579       2/4     147   217 ..     1    77 []    77.5  9.5e-19
+Q19581       2/4     147   217 ..     1    77 []    77.5  9.5e-19
+O14875       1/1      10    80 ..     1    77 []    77.5  9.9e-19
+O04319       3/4     204   274 ..     1    77 []    77.5    1e-18
+Q17201       1/2      45   115 ..     1    77 []    77.4    1e-18
+Q17200       1/2      45   115 ..     1    77 []    77.4    1e-18
+O22791       1/2       8    77 ..     1    77 []    77.4    1e-18
+Q12926       2/3     127   198 ..     1    77 []    77.4    1e-18
+Q13235       2/3     127   198 ..     1    77 []    77.4    1e-18
+Q60899       2/3     127   198 ..     1    77 []    77.4    1e-18
+EWS_HUMAN    1/1     363   442 ..     1    77 []    77.4    1e-18
+O14327       1/1      57   127 ..     1    77 []    77.4  1.1e-18
+O13829       1/1     102   173 ..     1    77 []    77.4  1.1e-18
+ROA3_HUMAN   2/2     128   198 ..     1    77 []    77.3  1.1e-18
+Q22037       1/2      25    95 ..     1    77 []    77.2  1.2e-18
+Q14869       1/2      31   102 ..     1    77 []    77.2  1.2e-18
+Q15433       1/2      64   135 ..     1    77 []    77.2  1.2e-18
+Q92227       2/4     132   202 ..     1    77 []    77.2  1.2e-18
+ROA1_XENLA   1/2      16    86 ..     1    77 []    77.0  1.4e-18
+O01671       2/2     152   224 ..     1    77 []    77.0  1.4e-18
+ROA1_SCHAM   1/2      19    89 ..     1    77 []    76.9  1.5e-18
+Q24409       2/2     265   335 ..     1    77 []    76.9  1.5e-18
+Q24474       2/3     198   269 ..     1    77 []    76.7  1.7e-18
+Q24847       2/2     188   259 ..     1    77 []    76.5  1.9e-18
+Q39953       4/4     309   379 ..     1    77 []    76.5    2e-18
+Q21911       1/2      47   117 ..     1    77 []    76.4  2.1e-18
+Q15020       2/2     803   873 ..     1    77 []    76.3  2.2e-18
+O04319       2/4     114   183 ..     1    77 []    76.2  2.4e-18
+RO31_XENLA   1/2      29    99 ..     1    77 []    76.0  2.8e-18
+RO32_XENLA   1/2      29    99 ..     1    77 []    76.0  2.8e-18
+O08752       2/2      80   143 ..     1    77 []    76.0  2.8e-18
+YNL0_YEAST   1/1      93   164 ..     1    77 []    75.8  3.2e-18
+RO31_XENLA   2/2     120   190 ..     1    77 []    75.8  3.3e-18
+YP85_CAEEL   1/2      15    86 ..     1    77 []    75.6  3.6e-18
+Q17352       1/2      25    96 ..     1    77 []    75.6  3.6e-18
+Q16135       2/3     118   190 ..     1    77 []    75.6  3.8e-18
+Q24360       2/2     119   189 ..     1    77 []    75.5  3.8e-18
+ROA1_DROME   2/2     124   194 ..     1    77 []    75.5  3.8e-18
+Q24359       2/2     120   190 ..     1    77 []    75.5  3.8e-18
+Q99361       2/2     123   193 ..     1    77 []    75.5  3.8e-18
+Q24486       2/2     117   187 ..     1    77 []    75.5  3.9e-18
+RB87_DROME   2/2     117   187 ..     1    77 []    75.5  3.9e-18
+O35935       1/1     170   240 ..     1    77 []    75.5    4e-18
+Q28165       1/1     174   244 ..     1    77 []    75.5    4e-18
+O04319       4/4     306   376 ..     1    77 []    75.3  4.4e-18
+RB27_DROME   2/2      98   168 ..     1    77 []    75.3  4.5e-18
+SR75_HUMAN   1/2       4    67 ..     1    77 []    75.3  4.5e-18
+ROA2_HUMAN   1/2      23    93 ..     1    77 []    75.3  4.6e-18
+NONA_DROME   1/2     304   369 ..     1    77 []    75.2  4.8e-18
+Q27926       1/1      98   168 ..     1    77 []    75.1  5.3e-18
+YHH5_YEAST   1/3     113   184 ..     1    77 []    75.0  5.6e-18
+ELAV_DROME   1/3     151   235 ..     1    77 []    75.0  5.7e-18
+ELAV_DROVI   1/3     187   271 ..     1    77 []    75.0  5.7e-18
+O15187       2/2     168   233 ..     1    77 []    74.9  5.9e-18
+TIAR_MOUSE   3/3     224   289 ..     1    77 []    74.9  5.9e-18
+TIAR_HUMAN   3/3     207   272 ..     1    77 []    74.9  5.9e-18
+Q12786       1/2      76   141 ..     1    77 []    74.9    6e-18
+O00201       1/2      76   141 ..     1    77 []    74.9    6e-18
+Q90626       1/2      75   145 ..     1    77 []    74.8  6.3e-18
+Q16560       1/1      53   124 ..     1    77 []    74.8  6.3e-18
+Q55765       1/1       5    77 ..     1    77 []    74.8  6.5e-18
+PABP_DROME   4/4     287   357 ..     1    77 []    74.8  6.6e-18
+Q20084       2/3     130   201 ..     1    77 []    74.6  7.1e-18
+O02916       2/2      80   143 ..     1    77 []    74.6  7.2e-18
+O13845       2/3     342   412 ..     1    77 []    74.6  7.3e-18
+NAB4_YEAST   2/2     245   315 ..     1    77 []    74.4  8.6e-18
+RB97_DROME   1/2      34   104 ..     1    77 []    74.3  9.2e-18
+O23146       1/1      51   121 ..     1    77 []    74.1    1e-17
+O35335       1/1       6    69 ..     1    77 []    74.1    1e-17
+O13845       1/3     242   312 ..     1    77 []    73.6  1.5e-17
+NAM8_YEAST   3/3     315   380 ..     1    77 []    73.4  1.6e-17
+Q92227       4/4     328   449 ..     1    77 []    73.3  1.8e-17
+ROA3_HUMAN   1/2      37   107 ..     1    77 []    73.3  1.8e-17
+Q04150       2/2     154   225 ..     1    77 []    73.1  2.1e-17
+Q99729       2/2     154   225 ..     1    77 []    73.1  2.1e-17
+Q39675       2/2     210   281 ..     1    77 []    73.1  2.1e-17
+Q22030       1/1     102   171 ..     1    77 []    73.0  2.3e-17
+Q61954       1/2      26    89 ..     1    77 []    72.9  2.4e-17
+O02916       1/2       4    67 ..     1    77 []    72.9  2.4e-17
+O08752       1/2       4    67 ..     1    77 []    72.9  2.4e-17
+Q22304       1/1     194   263 ..     1    77 []    72.7  2.8e-17
+Q10572       1/1     155   224 ..     1    77 []    72.7  2.8e-17
+Q24847       1/2       4    74 ..     1    77 []    72.7  2.8e-17
+Q14576       2/3     127   198 ..     1    77 []    72.7  2.8e-17
+Q21155       1/1     150   222 ..     1    77 []    72.6  2.9e-17
+Q60900       2/3     127   198 ..     1    77 []    72.6  2.9e-17
+Q60901       1/2      42   113 ..     1    77 []    72.6  2.9e-17
+RB87_DROME   1/2      26    96 ..     1    77 []    72.5  3.1e-17
+Q24486       1/2      26    96 ..     1    77 []    72.5  3.1e-17
+RU1A_XENLA   1/2      12    84 ..     1    77 []    72.5  3.2e-17
+Q91582       2/3     108   179 ..     1    77 []    72.3  3.6e-17
+ROM_HUMAN    1/3      73   144 ..     1    77 []    72.1  4.1e-17
+Q06459       3/4     468   534 ..     1    77 []    72.1  4.3e-17
+Q93233       1/1      80   150 ..     1    77 []    72.0  4.3e-17
+P78814       1/2       2    65 ..     1    77 []    72.0  4.4e-17
+O23189       1/3      65   136 ..     1    77 []    72.0  4.5e-17
+NUCL_XENLA   4/4     504   572 ..     1    77 []    71.9  4.6e-17
+Q02427       1/1      13    79 ..     1    77 []    71.9  4.7e-17
+Q13247       1/2       4    67 ..     1    77 []    71.8    5e-17
+Q13244       1/1       4    67 ..     1    77 []    71.8    5e-17
+Q13245       1/2       4    67 ..     1    77 []    71.8    5e-17
+Q22037       2/2     116   186 ..     1    77 []    71.7  5.5e-17
+Q15717       2/3     108   179 ..     1    77 []    71.7  5.6e-17
+Q90409       2/3     132   206 ..     1    77 []    71.5  6.1e-17
+Q63887       1/2      78   143 ..     1    77 []    71.5  6.5e-17
+RU1A_HUMAN   1/2      12    84 ..     1    77 []    71.4  6.6e-17
+Q15287       1/1     163   235 ..     1    77 []    71.4  6.7e-17
+O23475       2/2     213   284 ..     1    77 []    71.4    7e-17
+O04425       2/2     213   284 ..     1    77 []    71.4    7e-17
+FUS_BOVIN    1/1     273   352 ..     1    77 []    71.3  7.4e-17
+FUS_HUMAN    1/1     287   366 ..     1    77 []    71.3  7.4e-17
+Q13344       1/1     290   369 ..     1    77 []    71.3  7.4e-17
+Q62826       1/1      12    83 ..     1    77 []    71.3  7.5e-17
+Q99361       1/2      32   102 ..     1    77 []    71.2  7.9e-17
+Q24359       1/2      29    99 ..     1    77 []    71.2  7.9e-17
+ROA1_DROME   1/2      33   103 ..     1    77 []    71.2  7.9e-17
+Q24360       1/2      28    98 ..     1    77 []    71.2  7.9e-17
+YHC4_YEAST   2/2     348   415 ..     1    77 []    71.2    8e-17
+Q62019       2/2      80   143 ..     1    77 []    71.1  8.1e-17
+Q23121       1/2       5    68 ..     1    77 []    70.9  9.3e-17
+ROA1_SCHAM   2/2     110   180 ..     1    77 []    70.9  9.6e-17
+Q27199       2/2     357   425 ..     1    77 []    70.8  9.9e-17
+Q24491       1/1       9    75 ..     1    77 []    70.8  1.1e-16
+O13741       2/2     272   343 ..     1    77 []    70.7  1.1e-16
+NUCL_XENLA   3/4     416   482 ..     1    77 []    70.6  1.2e-16
+Q15434       1/2      58   129 ..     1    77 []    70.4  1.3e-16
+Q27335       4/4     290   362 ..     1    77 []    70.4  1.3e-16
+Q06459       4/4     556   624 ..     1    77 []    70.3  1.5e-16
+O04432       1/1      10   117 ..     1    77 []    70.3  1.5e-16
+P92966       1/2       4    69 ..     1    77 []    70.2  1.5e-16
+Q06106       5/5     765   835 ..     1    77 []    70.2  1.6e-16
+Q24562       2/3     209   280 ..     1    77 []    70.2  1.6e-16
+TIAR_MOUSE   1/3      11    97 ..     1    77 []    70.1  1.7e-16
+Q24261       1/2     304   369 ..     1    77 []    70.0  1.8e-16
+Q08208       1/1     281   351 ..     1    77 []    70.0  1.8e-16
+Q91584       2/3     122   194 ..     1    77 []    69.9  1.9e-16
+NUCL_CHICK   3/4     463   530 ..     1    77 []    69.9  1.9e-16
+P92965       1/2       4    69 ..     1    77 []    69.7  2.3e-16
+Q27294       1/1     121   200 ..     1    77 []    69.6  2.3e-16
+Q06106       2/5     347   418 ..     1    77 []    69.6  2.4e-16
+Q09542       1/2     118   182 ..     1    77 []    69.4  2.6e-16
+ELAV_DROME   2/3     250   322 ..     1    77 []    69.4  2.7e-16
+ELAV_DROVI   2/3     286   358 ..     1    77 []    69.4  2.7e-16
+Q90407       1/2      27    99 ..     1    77 []    69.4  2.8e-16
+O23475       1/2     122   194 ..     1    77 []    69.0  3.5e-16
+O04425       1/2     122   194 ..     1    77 []    69.0  3.5e-16
+RO22_XENLA   1/2      11    81 ..     1    77 []    69.0  3.6e-16
+Q24409       1/2     177   247 ..     1    77 []    68.7  4.5e-16
+Q21911       2/2     136   206 ..     1    77 []    68.6  4.6e-16
+O14369       1/1      96   164 ..     1    77 []    68.5  5.2e-16
+P79736       2/3     126   197 ..     1    77 []    68.4  5.2e-16
+Q62189       1/2      18    90 ..     1    77 []    68.3  5.7e-16
+YG5B_YEAST   1/3     197   268 ..     1    77 []    68.2  6.3e-16
+Q61474       2/2     111   181 ..     1    77 []    68.0  7.1e-16
+NUCL_CHICK   2/4     373   440 ..     1    77 []    68.0  7.3e-16
+P93843       2/3     179   251 ..     1    77 []    67.9  7.5e-16
+Q60690       1/2      87   157 ..     1    77 []    67.8    8e-16
+O02374       1/3     153   225 ..     1    77 []    67.8    8e-16
+O18409       1/3     357   429 ..     1    77 []    67.8    8e-16
+Q27335       2/4      98   168 ..     1    77 []    67.7  8.6e-16
+Q08212       1/3       9    77 ..     1    77 []    67.7  8.9e-16
+O04240       1/2     119   190 ..     1    77 []    67.7  8.9e-16
+NUCL_MOUSE   3/4     488   555 ..     1    77 []    67.6  9.3e-16
+Q40363       1/2     378   447 ..     1    77 []    67.5    1e-15
+NUCL_HUMAN   3/4     487   554 ..     1    77 []    67.4  1.1e-15
+RO21_XENLA   1/2      11    81 ..     1    77 []    67.3  1.1e-15
+Q24113       1/2     279   344 ..     1    77 []    67.3  1.1e-15
+YNR5_YEAST   2/2     241   312 ..     1    77 []    67.1  1.4e-15
+NUCL_RAT     3/4     490   557 ..     1    77 []    66.9  1.5e-15
+YSO5_CAEEL   1/1     440   511 ..     1    77 []    66.9  1.5e-15
+PUB1_YEAST   3/3     342   407 ..     1    77 []    66.6  1.8e-15
+Q13151       1/2       9    79 ..     1    77 []    66.5    2e-15
+NUCL_MESAU   3/4     487   554 ..     1    77 []    66.5    2e-15
+RB97_DROME   2/2     125   196 ..     1    77 []    66.4  2.1e-15
+NUCL_XENLA   2/4     326   393 ..     1    77 []    66.4  2.1e-15
+ROC_HUMAN    1/1      18    82 ..     1    77 []    66.4  2.1e-15
+Q91807       2/2     111   181 ..     1    77 []    66.4  2.1e-15
+Q15584       1/2      46   116 ..     1    77 []    66.4  2.1e-15
+ROM_HUMAN    2/3     206   276 ..     1    77 []    66.4  2.1e-15
+Q91808       2/2     111   181 ..     1    77 []    66.3  2.3e-15
+Q41124       1/2     115   186 ..     1    77 []    66.2  2.5e-15
+Q92804       1/1     236   315 ..     1    77 []    66.2  2.5e-15
+Q92751       1/1     233   312 ..     1    77 []    66.2  2.5e-15
+Q21900       1/2      83   154 ..     1    77 []    66.0  2.8e-15
+Q23795       2/2     122   192 ..     1    77 []    65.9  3.1e-15
+Q08940       2/2     208   280 ..     1    77 []    65.8  3.2e-15
+WHI3_YEAST   1/1     540   614 ..     1    77 []    65.4  4.4e-15
+CB20_XENLA   1/1      34   105 ..     1    77 []    65.3  4.5e-15
+P90727       2/3     284   354 ..     1    77 []    65.1  5.2e-15
+PSF_HUMAN    1/2     299   364 ..     1    77 []    65.1  5.3e-15
+Q01491       1/4     312   380 ..     1    77 []    65.1  5.4e-15
+NUCL_HUMAN   4/4     573   639 ..     1    77 []    65.1  5.5e-15
+O23093       3/3     320   389 ..     1    77 []    65.0  5.7e-15
+P92964       2/2      95   159 ..     1    77 []    64.8  6.4e-15
+Q09959       3/3     283   347 ..     1    77 []    64.6  7.3e-15
+PR24_YEAST   3/3     212   284 ..     1    77 []    64.6  7.3e-15
+Q27335       3/4     186   260 ..     1    77 []    64.6  7.3e-15
+Q24473       2/3     198   274 ..     1    77 []    64.4  8.4e-15
+O22855       2/3     252   316 ..     1    77 []    64.3  9.2e-15
+P92966       2/2      98   162 ..     1    77 []    64.3  9.5e-15
+CB20_HUMAN   1/1      42   113 ..     1    77 []    64.1    1e-14
+Q91579       1/3       9    83 ..     1    77 []    64.1  1.1e-14
+O23131       1/1      30   100 ..     1    77 []    64.0  1.1e-14
+SP33_HUMAN   2/2     122   186 ..     1    77 []    63.6  1.5e-14
+Q92904       1/1      42   110 ..     1    77 []    63.6  1.5e-14
+Q92909       1/1      42   110 ..     1    77 []    63.6  1.5e-14
+Q95192       1/1      42   110 ..     1    77 []    63.6  1.5e-14
+Q26293       2/3     198   274 ..     1    77 []    63.5  1.6e-14
+IF4B_HUMAN   1/1      98   168 ..     1    77 []    63.5  1.6e-14
+Q06459       2/4     378   445 ..     1    77 []    63.5  1.6e-14
+SR75_HUMAN   2/2     106   172 ..     1    77 []    63.4  1.8e-14
+Q93594       1/1      39   110 ..     1    77 []    63.4  1.8e-14
+P70372       2/3     108   179 ..     1    77 []    63.3  1.9e-14
+O04319       1/4      23    94 ..     1    77 []    63.2    2e-14
+RU2B_HUMAN   1/2       9    81 ..     1    77 []    63.1  2.1e-14
+ROC_XENLA    1/1      19    83 ..     1    77 []    63.0  2.2e-14
+P92964       1/2       4    69 ..     1    77 []    62.6    3e-14
+O15396       1/1      42   110 ..     1    77 []    62.1  4.4e-14
+Q01491       3/4     479   549 ..     1    77 []    62.0  4.5e-14
+Q94901       1/2       9    72 ..     1    77 []    61.7  5.4e-14
+Q17385       2/3     144   215 ..     1    77 []    61.7  5.6e-14
+Q91920       2/2     112   182 ..     1    77 []    61.6  6.1e-14
+P92965       2/2      99   163 ..     1    77 []    61.5  6.4e-14
+Q24024       2/2     138   208 ..     1    77 []    61.5  6.5e-14
+Q91579       2/3      97   168 ..     1    77 []    61.5  6.6e-14
+NUCL_CHICK   1/4     283   352 ..     1    77 []    61.4  6.9e-14
+O23646       1/1       4    66 ..     1    77 []    61.3  7.5e-14
+HS49_YEAST   1/2      11    83 ..     1    77 []    61.3  7.6e-14
+Q23287       1/1      53   122 ..     1    77 []    61.2  7.9e-14
+P90871       1/1     173   244 ..     1    77 []    61.1  8.7e-14
+Q24024       1/2      34   104 ..     1    77 []    60.9  9.9e-14
+Q94901       2/2      88   151 ..     1    77 []    60.7  1.1e-13
+O15042       1/1     275   349 ..     1    77 []    60.7  1.1e-13
+YSX2_CAEEL   1/1       4    67 ..     1    77 []    60.7  1.1e-13
+RU1A_DROME   1/2       9    81 ..     1    77 []    60.6  1.2e-13
+O13620       4/5     621   697 ..     1    77 []    60.6  1.2e-13
+Q13245       2/2     112   178 ..     1    77 []    60.5  1.2e-13
+Q13247       2/2     112   178 ..     1    77 []    60.5  1.2e-13
+SSB1_YEAST   1/2      39   114 ..     1    77 []    60.4  1.3e-13
+Q64368       1/1      42   110 ..     1    77 []    60.3  1.5e-13
+HRB1_YEAST   2/3     238   308 ..     1    77 []    60.3  1.5e-13
+Q13148       1/2     106   175 ..     1    77 []    60.2  1.5e-13
+Q27199       1/2     265   335 ..     1    77 []    60.2  1.6e-13
+O13759       1/2     184   256 ..     1    77 []    60.1  1.7e-13
+Q09331       1/1      24    96 ..     1    77 []    60.1  1.7e-13
+Q14151       1/1     409   480 ..     1    77 []    60.1  1.7e-13
+Q41042       2/2     453   527 ..     1    77 []    60.0  1.8e-13
+Q22412       1/1     237   307 ..     1    77 []    60.0  1.8e-13
+Q15020       1/2     706   777 ..     1    77 []    60.0  1.8e-13
+O13759       2/2     299   364 ..     1    77 []    59.9    2e-13
+Q09335       1/1      31    96 ..     1    77 []    59.9    2e-13
+Q64012       1/1      23    87 ..     1    77 []    59.7  2.2e-13
+Q23120       1/2       4    67 ..     1    77 []    59.7  2.2e-13
+O22855       1/3      20    85 ..     1    77 []    59.7  2.3e-13
+PSF_HUMAN    2/2     373   443 ..     1    77 []    59.6  2.4e-13
+Q15056       1/1      46   115 ..     1    77 []    59.5  2.6e-13
+Q42215       1/1       5    77 ..     1    77 []    59.5  2.6e-13
+Q09584       1/1     105   172 ..     1    77 []    59.4  2.7e-13
+NUCL_HUMAN   2/4     394   460 ..     1    77 []    59.3  2.9e-13
+Q14924       1/1      42   113 ..     1    77 []    59.3    3e-13
+MLO3_SCHPO   1/1      57   129 ..     1    77 []    59.0  3.6e-13
+Q08212       3/3     223   289 ..     1    77 []    58.9  3.8e-13
+GRP1_SORVU   1/1       1    60 [.     1    77 []    58.9  3.9e-13
+ROH1_HUMAN   2/3     113   183 ..     1    77 []    58.9    4e-13
+O35737       2/3     113   183 ..     1    77 []    58.9    4e-13
+RU17_YEAST   1/1     109   183 ..     1    77 []    58.9    4e-13
+Q13809       2/2     123   187 ..     1    77 []    58.7  4.4e-13
+O08583       1/1     107   177 ..     1    77 []    58.7  4.6e-13
+Q23796       2/2     118   184 ..     1    77 []    58.5  5.1e-13
+NOP4_YEAST   3/4     292   378 ..     1    77 []    58.5  5.3e-13
+Q06106       4/5     665   741 ..     1    77 []    58.4  5.4e-13
+Q24252       2/2     122   188 ..     1    77 []    58.4  5.4e-13
+SR55_DROME   2/2     116   182 ..     1    77 []    58.4  5.4e-13
+Q14730       1/1       1    70 [.     1    77 []    58.4  5.4e-13
+Q91017       1/1      26    96 ..     1    77 []    58.2  6.2e-13
+LA_HUMAN     1/1     113   182 ..     1    77 []    58.2  6.5e-13
+Q15367       1/1      60   129 ..     1    77 []    58.2  6.5e-13
+MODU_DROME   3/4     342   410 ..     1    77 []    58.1  6.7e-13
+RN24_SCHPO   2/2     230   295 ..     1    77 []    57.8  8.2e-13
+Q26692       3/3     207   271 ..     1    77 []    57.8  8.3e-13
+O13620       5/5     723   793 ..     1    77 []    57.6  9.4e-13
+O23189       3/3     341   410 ..     1    77 []    57.6  9.6e-13
+O23093       1/3     118   215 ..     1    77 []    57.5  1.1e-12
+ROH2_HUMAN   2/3     113   183 ..     1    77 []    57.4  1.1e-12
+P70333       2/3     113   183 ..     1    77 []    57.4  1.1e-12
+Q08920       1/1      48   119 ..     1    77 []    57.2  1.3e-12
+PR24_YEAST   2/3     119   190 ..     1    77 []    57.1  1.3e-12
+P93843       1/3      86   157 ..     1    77 []    57.1  1.4e-12
+Q92950       2/3     110   181 ..     1    77 []    56.6  1.9e-12
+P70055       2/3     152   223 ..     1    77 []    56.6  1.9e-12
+RU1A_HUMAN   2/2     210   277 ..     1    77 []    56.6  1.9e-12
+Q62189       2/2     215   282 ..     1    77 []    56.6  1.9e-12
+Q22135       1/1      34   105 ..     1    77 []    56.1  2.7e-12
+O23645       1/1       4    68 ..     1    77 []    55.9  3.1e-12
+Q09959       1/3      48   156 ..     1    77 []    55.7  3.6e-12
+P97855       1/1     340   400 ..     1    77 []    55.5    4e-12
+Q24207       1/1      35   105 ..     1    77 []    55.5  4.2e-12
+Q62150       1/1     162   234 ..     1    77 []    55.2  4.9e-12
+O14797       1/2       6    66 ..     1    77 []    55.0    6e-12
+ARP2_PLAFA   2/2     364   438 ..     1    77 []    55.0    6e-12
+Q13283       1/1     342   402 ..     1    77 []    54.2    1e-11
+Q39244       1/2      20    92 ..     1    77 []    54.1  1.1e-11
+Q04067       1/1     193   265 ..     1    77 []    54.0  1.2e-11
+RU1A_DROME   2/2     144   211 ..     1    77 []    53.8  1.4e-11
+Q62379       1/1      51   118 ..     1    77 []    53.7  1.5e-11
+RU2B_HUMAN   2/2     153   220 ..     1    77 []    53.7  1.5e-11
+NUCL_RAT     2/4     398   464 ..     1    77 []    53.5  1.6e-11
+Q18724       1/1      27    97 ..     1    77 []    53.3  1.9e-11
+ROF_HUMAN    2/3     113   183 ..     1    77 []    53.2    2e-11
+RU1A_XENLA   2/2     210   277 ..     1    77 []    53.1  2.2e-11
+MODU_DROME   2/4     260   326 ..     1    77 []    52.8  2.6e-11
+Q15424       1/1     357   428 ..     1    77 []    52.3  3.7e-11
+Q26692       2/3     127   191 ..     1    77 []    52.3  3.9e-11
+Q41498       1/2      25    97 ..     1    77 []    52.2  3.9e-11
+P97379       1/1     300   371 ..     1    77 []    52.2    4e-11
+NUCL_MESAU   2/4     395   461 ..     1    77 []    52.1  4.3e-11
+NUCL_MOUSE   2/4     396   462 ..     1    77 []    52.0  4.6e-11
+Q92879       2/3     110   181 ..     1    77 []    51.8  5.5e-11
+YG5B_YEAST   3/3     542   633 ..     1    77 []    51.8  5.5e-11
+Q40363       2/2     477   552 ..     1    77 []    51.6    6e-11
+Q13242       2/2     113   177 ..     1    77 []    51.6  6.3e-11
+GRF1_HUMAN   2/3     196   265 ..     1    77 []    51.5  6.7e-11
+Q38915       1/2     152   220 ..     1    77 []    51.3  7.4e-11
+SRP1_SCHPO   1/1       9    81 ..     1    77 []    51.1  8.9e-11
+PR24_YEAST   1/3      43   111 ..     1    77 []    50.8  1.1e-10
+Q13243       2/2     110   176 ..     1    77 []    50.7  1.1e-10
+CL4_RAT      2/2     110   176 ..     1    77 []    50.7  1.1e-10
+NONA_DROME   2/2     378   448 ..     1    77 []    50.2  1.6e-10
+Q24261       2/2     378   448 ..     1    77 []    50.2  1.6e-10
+RDP_MOUSE    1/1     268   331 ..     1    77 []    50.2  1.7e-10
+RDP_HUMAN    1/1     264   327 ..     1    77 []    50.2  1.7e-10
+O02374       2/3     240   306 ..     1    77 []    50.1  1.8e-10
+O18409       2/3     444   510 ..     1    77 []    50.1  1.8e-10
+Q15097       1/4       1    59 [.     1    77 []    49.9  1.9e-10
+PTB_MOUSE    2/4     185   252 ..     1    77 []    49.9  2.1e-10
+PTB_PIG      2/4     186   253 ..     1    77 []    49.9  2.1e-10
+PTB_HUMAN    2/4     186   253 ..     1    77 []    49.9  2.1e-10
+PTB_RAT      2/4     185   252 ..     1    77 []    49.9  2.1e-10
+Q63568       2/4     185   252 ..     1    77 []    49.9  2.1e-10
+LA_BOVIN     1/1     113   182 ..     1    77 []    49.8  2.1e-10
+LA_RAT       1/1     113   182 ..     1    77 []    49.8  2.2e-10
+Q15434       2/2     137   207 ..     1    77 []    49.7  2.2e-10
+PTB_HUMAN    3/4     339   406 ..     1    77 []    49.6  2.5e-10
+O15236       1/1       9    79 ..     1    77 []    49.4  2.8e-10
+Q15433       2/2     143   213 ..     1    77 []    49.4  2.8e-10
+O15237       1/1       9    79 ..     1    77 []    49.4  2.8e-10
+MSSP_HUMAN   2/2     110   180 ..     1    77 []    49.4  2.8e-10
+Q14869       2/2     110   180 ..     1    77 []    49.4  2.8e-10
+Q21323       1/2      10    82 ..     1    77 []    49.3    3e-10
+O14102       1/1       1    66 [.     1    77 []    49.3  3.1e-10
+Q62378       1/1      25    92 ..     1    77 []    49.1  3.4e-10
+Q24113       2/2     352   422 ..     1    77 []    49.1  3.5e-10
+Q14499       1/3     155   225 ..     1    77 []    49.0  3.6e-10
+Q14498       1/3     155   225 ..     1    77 []    49.0  3.6e-10
+Q15380       1/1      10    75 ..     1    77 []    48.9  3.9e-10
+O00201       2/2     150   220 ..     1    77 []    48.9    4e-10
+Q12786       2/2     150   220 ..     1    77 []    48.9    4e-10
+O23212       1/2     238   314 ..     1    77 []    48.9    4e-10
+U2AG_HUMAN   1/1      67   142 ..     1    77 []    48.7  4.6e-10
+YFK2_YEAST   1/1      20   100 ..     1    77 []    48.5  5.4e-10
+P78814       2/2      96   162 ..     1    77 []    48.4  5.7e-10
+PTB_RAT      3/4     363   430 ..     1    77 []    48.2  6.3e-10
+Q63568       3/4     364   431 ..     1    77 []    48.2  6.3e-10
+O22905       2/2     213   284 ..     1    77 []    48.0  7.3e-10
+U2AF_HUMAN   1/3     151   226 ..     1    77 []    48.0  7.3e-10
+U2AF_MOUSE   1/3     151   226 ..     1    77 []    48.0  7.3e-10
+IF32_YEAST   1/1      79   157 ..     1    77 []    48.0  7.6e-10
+P90978       2/3     292   362 ..     1    77 []    47.9  7.9e-10
+LA_MOUSE     1/1     113   182 ..     1    77 []    47.6  9.8e-10
+Q19335       1/2     374   461 ..     1    77 []    47.6    1e-09
+CPO_DROME    1/1     453   526 ..     1    77 []    47.3  1.2e-09
+Q06459       1/4     286   355 ..     1    77 []    47.2  1.3e-09
+Q17175       1/1       7    79 ..     1    77 []    46.6    2e-09
+O22905       1/2     112   184 ..     1    77 []    46.5  2.1e-09
+Q26658       1/2       8    79 ..     1    77 []    46.3  2.4e-09
+HS49_YEAST   2/2     110   180 ..     1    77 []    46.1  2.8e-09
+Q13117       1/1      42   110 ..     1    77 []    46.1  2.8e-09
+Q63887       2/2     152   218 ..     1    77 []    46.1  2.9e-09
+Q39244       2/2     179   245 ..     1    77 []    45.5  4.2e-09
+O13801       1/1     246   317 ..     1    77 []    45.5  4.3e-09
+O13674       1/3     301   356 ..     1    77 []    45.4  4.4e-09
+RU1A_YEAST   1/1     229   294 ..     1    77 []    45.3  4.9e-09
+Q41499       2/2     159   226 ..     1    77 []    45.3    5e-09
+Q12159       1/1      80   151 ..     1    77 []    45.2  5.2e-09
+O22922       2/2     160   227 ..     1    77 []    45.1  5.5e-09
+NUCL_XENLA   1/4     234   303 ..     1    77 []    45.1  5.6e-09
+YD3D_SCHPO   1/2      81   153 ..     1    77 []    45.0  5.8e-09
+NAB3_YEAST   1/1     332   396 ..     1    77 []    44.7  7.5e-09
+Q63627       1/1     424   491 ..     1    77 []    44.0  1.2e-08
+MEI2_SCHPO   1/1     197   265 ..     1    77 []    43.8  1.4e-08
+Q24562       1/3      95   170 ..     1    77 []    43.6  1.6e-08
+Q06106       1/5       4    89 ..     1    77 []    43.6  1.6e-08
+Q23161       1/1      59   129 ..     1    77 []    43.3    2e-08
+O13649       1/1       6    78 ..     1    77 []    43.0  2.4e-08
+P90797       1/1      66   136 ..     1    77 []    42.9  2.5e-08
+Q07655       1/1     535   612 ..     1    77 []    42.8  2.7e-08
+RN12_YEAST   1/1     200   267 ..     1    77 []    42.8  2.8e-08
+O22922       1/2      12    84 ..     1    77 []    42.6  3.1e-08
+YHC4_YEAST   1/2      95   167 ..     1    77 []    42.4  3.6e-08
+Q07034       1/1     332   396 ..     1    77 []    42.3  3.9e-08
+Q22318       2/2     179   249 ..     1    77 []    41.8  5.4e-08
+Q92879       1/3      18    92 ..     1    77 []    41.7    6e-08
+P70055       1/3      60   134 ..     1    77 []    41.6  6.1e-08
+O13620       1/5       4    78 ..     1    77 []    41.4  7.3e-08
+Q22708       1/2      16    85 ..     1    77 []    41.3  7.7e-08
+MODU_DROME   4/4     422   484 ..     1    77 []    41.2  8.5e-08
+P90978       1/3     185   260 ..     1    77 []    41.0  9.5e-08
+Q92950       1/3      18    92 ..     1    77 []    41.0  9.7e-08
+Q21323       2/2     145   212 ..     1    77 []    40.9    1e-07
+P90727       1/3     177   252 ..     1    77 []    40.9    1e-07
+O23866       1/2     188   254 ..     1    77 []    40.9  1.1e-07
+Q41499       1/2      13    85 ..     1    77 []    40.8  1.1e-07
+Q24375       1/1     151   225 ..     1    77 []    40.7  1.1e-07
+O18219       1/1      19    96 ..     1    77 []    40.6  1.2e-07
+Q15686       1/1     155   215 ..     1    77 []    40.4  1.5e-07
+Q15364       1/1     105   165 ..     1    77 []    40.4  1.5e-07
+D111_ARATH   1/1     281   360 ..     1    77 []    40.4  1.5e-07
+Q21322       2/2     129   196 ..     1    77 []    39.9    2e-07
+LAB_XENLA    1/1     112   183 ..     1    77 []    39.8  2.2e-07
+O23866       2/2     273   339 ..     1    77 []    39.7  2.4e-07
+PTB_MOUSE    1/4      60   127 ..     1    77 []    39.5  2.7e-07
+Q17385       1/3      57   129 ..     1    77 []    39.4  2.9e-07
+O00425       1/2       4    70 ..     1    77 []    39.2  3.3e-07
+PTB_RAT      1/4      60   127 ..     1    77 []    39.2  3.4e-07
+Q63568       1/4      60   127 ..     1    77 []    39.2  3.4e-07
+PTB_PIG      1/4      61   128 ..     1    77 []    38.9    4e-07
+NUCL_MESAU   1/4     309   378 ..     1    77 []    38.5  5.3e-07
+LA_DROME     1/1     151   225 ..     1    77 []    38.3    6e-07
+Q01491       2/4     388   458 ..     1    77 []    38.3  6.3e-07
+YA2B_SCHPO   2/4     208   274 ..     1    77 []    38.3  6.4e-07
+Q99730       1/2     135   213 ..     1    77 []    38.2  6.6e-07
+PTB_PIG      3/4     365   432 ..     1    77 []    38.2  6.7e-07
+YAG3_SCHPO   1/1     234   305 ..     1    77 []    38.2  6.8e-07
+LAA_XENLA    1/1     113   182 ..     1    77 []    37.7  9.1e-07
+Q21900       2/2     164   236 ..     1    77 []    37.6  9.7e-07
+NOP4_YEAST   2/4     149   220 ..     1    77 []    37.6  9.8e-07
+Q93062       1/1      26    93 ..     1    77 []    37.5  1.1e-06
+Q92516       1/1      26    93 ..     1    77 []    37.5  1.1e-06
+Q92517       1/1      26    93 ..     1    77 []    37.5  1.1e-06
+ROH1_HUMAN   3/3     291   359 ..     1    77 []    37.3  1.2e-06
+O35737       3/3     291   359 ..     1    77 []    37.3  1.2e-06
+O00425       2/2      83   151 ..     1    77 []    37.2  1.3e-06
+Q63623       1/1     479   546 ..     1    77 []    37.2  1.3e-06
+PTB_HUMAN    1/4      61   128 ..     1    77 []    37.1  1.4e-06
+MODU_DROME   1/4     177   246 ..     1    77 []    37.0  1.5e-06
+Q99730       2/2     266   344 ..     1    77 []    36.9  1.6e-06
+NOP3_YEAST   2/2     202   270 ..     1    77 []    36.8  1.7e-06
+Q10667       1/1       5    67 ..     1    77 []    36.7  1.9e-06
+O23093       2/3     224   299 ..     1    77 []    36.3  2.4e-06
+YA2B_SCHPO   4/4     416   482 ..     1    77 []    36.3  2.5e-06
+Q41498       2/2     181   248 ..     1    77 []    36.2  2.7e-06
+P92204       1/1     168   231 ..     1    77 []    35.8  3.5e-06
+NUCL_MOUSE   1/4     310   379 ..     1    77 []    35.8  3.5e-06
+YD3D_SCHPO   2/2     367   436 ..     1    77 []    35.7  3.8e-06
+Q22039       1/2      33   105 ..     1    77 []    35.5  4.5e-06
+Q93733       1/1      33   105 ..     1    77 []    35.5  4.5e-06
+Q19018       1/2      33   105 ..     1    77 []    35.5  4.5e-06
+P87216       1/1       5    71 ..     1    77 []    35.2  5.2e-06
+Q24534       1/2      15   105 ..     1    77 []    35.2  5.3e-06
+Q23120       2/2     114   181 ..     1    77 []    34.9  6.7e-06
+PES4_YEAST   2/4     181   249 ..     1    77 []    34.8  6.8e-06
+Q16630       1/1      83   156 ..     1    77 []    34.8  7.2e-06
+Q17430       1/2     595   663 ..     1    77 []    34.7  7.4e-06
+Q20414       1/2     175   244 ..     1    77 []    34.6  7.9e-06
+ROF_HUMAN    3/3     291   359 ..     1    77 []    34.5  8.4e-06
+P87126       1/1     194   261 ..     1    77 []    34.4  8.9e-06
+Q18265       1/1     300   379 ..     1    77 []    34.4  9.2e-06
+YAS9_SCHPO   1/1     365   429 ..     1    77 []    34.2  1.1e-05
+NUCL_RAT     1/4     312   381 ..     1    77 []    34.0  1.2e-05
+P70333       3/3     291   359 ..     1    77 []    34.0  1.2e-05
+ROH2_HUMAN   3/3     291   359 ..     1    77 []    34.0  1.2e-05
+Q13148       2/2     193   257 ..     1    77 []    33.4  1.9e-05
+YNR5_YEAST   1/2     127   214 ..     1    77 []    33.1  2.2e-05
+YQOA_CAEEL   1/1     114   196 ..     1    77 []    33.1  2.4e-05
+O42254       2/2      83   151 ..     1    77 []    33.0  2.4e-05
+O01806       1/1     112   182 ..     1    77 []    32.9  2.5e-05
+YIS9_YEAST   1/1      30   101 ..     1    77 []    32.9  2.7e-05
+Q18999       3/4     392   459 ..     1    77 []    32.7  3.1e-05
+RN24_SCHPO   1/2     107   196 ..     1    77 []    32.3  3.9e-05
+Q18220       1/1     420   491 ..     1    77 []    32.3    4e-05
+Q18219       1/1     420   491 ..     1    77 []    32.3    4e-05
+YHH5_YEAST   2/3     201   269 ..     1    77 []    31.9  5.2e-05
+PTB_MOUSE    4/4     452   520 ..     1    77 []    31.8  5.5e-05
+PTB_RAT      4/4     480   548 ..     1    77 []    31.8  5.5e-05
+Q63568       4/4     481   549 ..     1    77 []    31.8  5.5e-05
+ROU2_HUMAN   1/1     174   224 ..     1    77 []    31.8  5.6e-05
+IF32_HUMAN   1/1     195   271 ..     1    77 []    31.4  7.4e-05
+Q41988       1/1      18    66 .]     1    77 []    31.0  9.7e-05
+Q06106       3/5     534   599 ..     1    77 []    30.9   0.0001
+YBF1_YEAST   1/1      87   158 ..     1    77 []    30.9   0.0001
+O42254       1/2       4    70 ..     1    77 []    30.7  0.00012
+Q09542       2/2     191   261 ..     1    77 []    30.6  0.00013
+Q22318       1/2      91   161 ..     1    77 []    30.3  0.00016
+ROH2_HUMAN   1/3      13    85 ..     1    77 []    30.2  0.00017
+LU15_HUMAN   1/2     100   173 ..     1    77 []    29.9   0.0002
+O23288       1/2       8    72 ..     1    77 []    29.6  0.00026
+Q18601       1/2      37    97 ..     1    77 []    29.4   0.0003
+P70333       1/3      13    85 ..     1    77 []    29.3  0.00032
+U2AG_DROME   1/1      51   144 ..     1    77 []    29.0  0.00038
+YN26_YEAST   1/2      47   125 ..     1    77 []    29.0  0.00039
+U2AG_SCHPO   1/1      73   136 ..     1    77 []    28.9  0.00043
+GRF1_HUMAN   1/3      96   168 ..     1    77 []    28.7  0.00048
+O13845       3/3     509   586 ..     1    77 []    28.7  0.00049
+Q14136       1/2     212   285 ..     1    77 []    28.4  0.00058
+Q38915       2/2     249   318 ..     1    77 []    28.4   0.0006
+IF32_SCHPO   1/1      41   124 ..     1    77 []    28.3  0.00063
+NUCL_HUMAN   1/4     308   377 ..     1    77 []    28.2  0.00066
+PRT1_PICAN   1/1      39   115 ..     1    77 []    28.2  0.00068
+ROL_HUMAN    1/3      73   140 ..     1    77 []    27.8   0.0009
+Q19018       2/2     230   298 ..     1    77 []    27.7  0.00096
+Q22039       2/2     230   298 ..     1    77 []    27.7  0.00096
+O35737       1/3      13    85 ..     1    77 []    27.5   0.0011
+ROH1_HUMAN   1/3      13    85 ..     1    77 []    27.5   0.0011
+P87058       1/1     156   238 ..     1    77 []    27.4   0.0012
+PTB_PIG      4/4     482   550 ..     1    77 []    27.3   0.0012
+ARP_YEAST    1/1     228   317 ..     1    77 []    27.3   0.0012
+O13362       1/1     156   238 ..     1    77 []    27.3   0.0013
+O35326       2/2     110   177 ..     1    77 []    27.2   0.0013
+PTB_HUMAN    4/4     456   524 ..     1    77 []    27.0   0.0015
+MAT3_HUMAN   1/1      70   138 ..     1    77 []    27.0   0.0015
+O35833       2/2     498   566 ..     1    77 []    27.0   0.0015
+MAT3_RAT     2/2     498   566 ..     1    77 []    27.0   0.0015
+O04554       1/1     353   421 ..     1    77 []    26.9   0.0016
+Q26457       1/1     143   219 ..     1    77 []    26.2   0.0026
+Q20414       2/2     261   333 ..     1    77 []    26.1   0.0029
+O15047       1/1      96   167 ..     1    77 []    25.8   0.0035
+NOT4_YEAST   1/1     139   227 ..     1    77 []    25.7   0.0039
+Q14136       2/2     383   459 ..     1    77 []    25.6   0.0041
+YA2B_SCHPO   1/4     117   183 ..     1    77 []    25.5   0.0045
+P70501       2/2     225   302 ..     1    77 []    25.0   0.0064
+JSN1_YEAST   1/1     342   421 ..     1    77 []    24.6   0.0081
+YAC4_SCHPO   1/1     118   197 ..     1    77 []    24.6   0.0083
+P97343       1/1     344   400 ..     1    77 []    24.4   0.0094
+Q63285       1/1     345   401 ..     1    77 []    24.4   0.0094
+O18254       1/1      52   118 ..     1    77 []    24.3   0.0099
+Q08925       4/4     524   599 ..     1    77 []    24.3     0.01
+Q21322       1/2      10    77 ..     1    77 []    24.1    0.012
+Q26692       1/3       6    69 ..     1    77 []    24.1    0.012
+O22314       2/2     121   192 ..     1    77 []    24.0    0.013
+O22315       2/2     121   192 ..     1    77 []    24.0    0.013
+Q18717       1/1     110   188 ..     1    77 []    23.6    0.016
+O15759       1/2      77   140 ..     1    77 []    23.3    0.019
+O15758       1/2      59   122 ..     1    77 []    23.3    0.019
+O13620       3/5     508   573 ..     1    77 []    23.1    0.019
+Q60745       1/2       1    61 [.     1    77 []    23.1    0.019
+YKV4_YEAST   1/1      66   133 ..     1    77 []    23.0     0.02
+U2AF_SCHPO   2/2     440   504 ..     1    77 []    22.8    0.021
+Q08287       1/1       9    78 ..     1    77 []    22.5    0.022
+IF4B_YEAST   1/1     103   178 ..     1    77 []    22.4    0.023
+Q05519       1/1      35   107 ..     1    77 []    22.3    0.023
+Q26273       1/1       1    44 []     1    77 []    22.3    0.023
+O13741       1/2     166   257 ..     1    77 []    22.2    0.023
+Q14966       1/1     678   746 ..     1    77 []    22.2    0.024
+ROL_HUMAN    2/3     163   232 ..     1    77 []    22.2    0.024
+O15758       2/2     140   204 ..     1    77 []    21.5    0.028
+Q20966       1/1     276   358 ..     1    77 []    21.4    0.028
+Q24433       1/1     578   654 ..     1    77 []    21.3    0.029
+O22855       3/3     363   431 ..     1    77 []    21.2     0.03
+P91156       1/1     275   342 ..     1    77 []    21.1     0.03
+Q12221       1/1     318   397 ..     1    77 []    20.6    0.035
+Q04142       1/1     318   397 ..     1    77 []    20.6    0.035
+GRF1_HUMAN   3/3     347   415 ..     1    77 []    20.4    0.036
+O15759       2/2     158   222 ..     1    77 []    20.4    0.036
+Q23121       2/2     131   197 ..     1    77 []    20.1    0.038
+LU15_HUMAN   2/2     233   310 ..     1    77 []    19.5    0.044
+Q93021       1/1     117   194 ..     1    77 []    19.5    0.044
+Q01491       4/4     595   665 ..     1    77 []    19.3    0.047
+NRD1_YEAST   1/1     341   404 ..     1    77 []    18.9    0.051
+SSB1_YEAST   2/2     188   269 ..     1    77 []    18.8    0.053
+Q18999       1/4     120   193 ..     1    77 []    18.7    0.053
+NAM8_YEAST   1/3      56   140 ..     1    77 []    18.5    0.055
+Q93237       1/1       1    68 [.     1    77 []    18.4    0.057
+Q18999       4/4     509   577 ..     1    77 []    18.3    0.058
+YG5B_YEAST   2/3     297   413 ..     1    77 []    17.7    0.068
+Q99628       3/3     446   526 ..     1    77 []    17.2    0.075
+O01159       1/1      12    85 ..     1    77 []    16.8    0.083
+YAX9_SCHPO   1/1     137   205 ..     1    77 []    16.7    0.084
+P90727       3/3     391   474 ..     1    77 []    16.5    0.089
+O13674       3/3     502   568 ..     1    77 []    15.9      0.1
+Q21351       1/1     392   472 ..     1    77 []    15.9      0.1
+LAH1_YEAST   1/1     125   211 ..     1    77 []    15.8      0.1
+Q18601       2/2     128   195 ..     1    77 []    14.6     0.14
+Q06477       1/1      62   128 ..     1    77 []    14.5     0.14
+P90978       3/3     399   482 ..     1    77 []    14.4     0.14
+Q18999       2/4     203   270 ..     1    77 []    14.3     0.15
+U2R2_HUMAN   1/1     240   299 ..     1    77 []    14.1     0.15
+U2R1_HUMAN   1/1     245   304 ..     1    77 []    14.1     0.15
+P87143       1/2      73   146 ..     1    77 []    13.7     0.17
+Q62019       1/2       3    67 ..     1    77 []    13.6     0.17
+U2R2_MOUSE   1/1     236   303 ..     1    77 []    13.4     0.18
+YN8T_YEAST   1/1     420   482 ..     1    77 []    13.2     0.19
+O35404       1/1     877   948 ..     1    77 []    13.1     0.19
+YLF1_CAEEL   2/2     180   244 ..     1    77 []    13.1      0.2
+U2R1_MOUSE   1/1     215   290 ..     1    77 []    13.1      0.2
+Q23391       1/1     179   249 ..     1    77 []    12.9      0.2
+Q08925       3/4     433   499 ..     1    77 []    12.9     0.21
+Q22708       2/2     124   199 ..     1    77 []    12.8     0.21
+Q10458       1/1       3    77 ..     1    77 []    12.6     0.22
+P78332       1/1     386   459 ..     1    77 []    12.1     0.24
+Q61464       1/2     678   746 ..     1    77 []    12.0     0.25
+Q18937       1/1      86   161 ..     1    77 []    12.0     0.25
+O23612       1/1       4    82 ..     1    77 []    11.5     0.28
+Q23953       1/1     250   313 ..     1    77 []    11.5     0.28
+Q26548       1/1       1    43 [.     1    77 []    11.2      0.3
+Q39201       2/2     121   192 ..     1    77 []    11.2      0.3
+YLF1_CAEEL   1/2      60   144 ..     1    77 []    11.2      0.3
+O15056       1/1     509   580 ..     1    77 []    11.1     0.31
+U2AF_MOUSE   3/3     400   461 ..     1    77 []    10.8     0.33
+U2AF_HUMAN   3/3     400   461 ..     1    77 []    10.8     0.33
+P70166       1/1     312   402 ..     1    77 []    10.6     0.35
+BF41_MOUSE   1/1       1    55 [.     1    77 []    10.6     0.35
+Q24527       1/1      82   151 ..     1    77 []    10.5     0.35
+O18964       1/1     907   966 ..     1    77 []    10.2     0.38
+Q62504       1/1     124   193 ..     1    77 []     9.2     0.48
+NOP4_YEAST   4/4     464   585 ..     1    77 []     9.1     0.49
+Q91572       1/2     315   392 ..     1    77 []     9.1     0.49
+YQO1_CAEEL   1/1     269   341 ..     1    77 []     8.8     0.53
+Q17561       1/1      74   142 ..     1    77 []     8.7     0.54
+Q60745       2/2     128   174 .]     1    77 []     8.6     0.55
+YHS7_YEAST   1/1     159   231 ..     1    77 []     8.6     0.55
+Q14206       1/1      13    78 ..     1    77 []     8.6     0.55
+O35833       1/2     400   468 ..     1    77 []     8.3     0.59
+MAT3_RAT     1/2     400   468 ..     1    77 []     8.3     0.59
+P87143       2/2     211   288 ..     1    77 []     8.2      0.6
+O04526       1/1     189   266 ..     1    77 []     7.9     0.65
+Q08925       1/4     203   269 ..     1    77 []     7.4     0.73
+Q17860       1/1     458   529 ..     1    77 []     7.3     0.75
+Q92615       1/1     173   241 ..     1    77 []     7.3     0.75
+Q14498       3/3     441   497 ..     1    77 []     7.2     0.77
+Q14499       3/3     447   503 ..     1    77 []     7.2     0.77
+YN26_YEAST   2/2     188   260 ..     1    77 []     7.1     0.77
+ROF_HUMAN    1/3      13    85 ..     1    77 []     7.0     0.79
+YA2B_SCHPO   3/4     325   391 ..     1    77 []     6.5     0.91
+O22794       1/2     212   277 ..     1    77 []     6.4     0.91
+Q08925       2/4     294   366 ..     1    77 []     5.9        1
+Q24562       3/3     341   403 ..     1    77 []     5.8      1.1
+YMC7_CAEEL   1/1      32   102 ..     1    77 []     5.6      1.1
+Q12046       1/1     137   214 ..     1    77 []     5.1      1.2
+Q07623       1/1      80   150 ..     1    77 []     5.1      1.2
+Q10954       1/1     368   435 ..     1    77 []     4.9      1.3
+Q26276       1/1       1    44 []     1    77 []     4.4      1.5
+O01835       1/1     297   390 ..     1    77 []     4.2      1.5
+PES4_YEAST   4/4     395   466 ..     1    77 []     4.2      1.5
+Q61464       2/2     904   971 ..     1    77 []     4.1      1.6
+O01691       1/1     109   179 ..     1    77 []     4.0      1.6
+NGR1_YEAST   1/3      36   159 ..     1    77 []     4.0      1.6
+Q23452       1/1     213   280 ..     1    77 []     3.9      1.6
+ARP2_PLAFA   1/2      26   107 ..     1    77 []     3.9      1.6
+ROAB_ARTSA   1/1      36    95 ..     1    77 []     3.8      1.7
+Q61954       2/2     102   153 .]     1    77 []     3.3      1.9
+YN8V_YEAST   1/1     116   199 ..     1    77 []     3.2      1.9
+PTB_MOUSE    3/4     337   413 ..     1    77 []     3.1        2
+O23189       2/3     155   254 ..     1    77 []     3.1        2
+Q26279       1/1       1    44 []     1    77 []     2.9      2.1
+ROL_HUMAN    3/3     353   420 ..     1    77 []     2.9      2.1
+Q18317       1/1     277   347 ..     1    77 []     2.6      2.2
+P70501       1/2      60   127 ..     1    77 []     2.6      2.2
+MUD2_YEAST   1/1     425   510 ..     1    77 []     1.9      2.6
+Q21559       1/1      76   144 ..     1    77 []     1.2      3.1
+O29092       1/1     362   435 ..     1    77 []     0.9      3.3
+O14797       2/2     107   173 ..     1    77 []     0.6      3.5
+O13674       2/3     411   477 ..     1    77 []     0.6      3.5
+O22794       2/2     375   460 ..     1    77 []     0.2      3.9
+Q91572       2/2     434   504 ..     1    77 []     0.0        4
+YG3Q_YEAST   1/1      22    90 ..     1    77 []    -0.2      4.3
+P70221       1/1     128   194 ..     1    77 []    -0.3      4.3
+Y051_NPVAC   1/1     225   284 ..     1    77 []    -0.9        5
+YHR9_YEAST   1/1     250   335 ..     1    77 []    -1.2      5.4
+O35847       1/1      17    83 ..     1    77 []    -2.7      7.6
+HIPO_CAMJE   1/1     254   322 ..     1    77 []    -3.7      9.4
+Q60701       1/1      85   143 ..     1    77 []    -3.7      9.5
+Q24424       1/1       2    43 .]     1    77 []    -3.9       10
+BLSA_HUMAN   1/1     149   248 ..     1    77 []    -4.2       11
+XE7_HUMAN    1/1     149   248 ..     1    77 []    -4.2       11
+YAQ2_SCHPO   1/1     313   388 ..     1    77 []    -6.5       18
+O00583       1/1      22    83 ..     1    77 []    -6.7       19
+O13838       1/1     172   248 ..     1    77 []    -7.5       23
+O05954       1/1     285   364 ..     1    77 []    -7.6       23
+YM28_YEAST   1/1     253   342 ..     1    77 []    -8.1       26
+Q19944       1/1     131   197 ..     1    77 []    -8.7       30
+Q26274       1/1       1    44 []     1    77 []    -8.9       32
+O01886       1/1      37   121 ..     1    77 []    -9.2       34
+Q19164       1/1      14    84 ..     1    77 []    -9.2       34
+ASM4_YEAST   1/1     271   384 ..     1    77 []    -9.5       37
+Y117_HUMAN   1/1     189   227 .]     1    77 []   -10.0       41
+O00582       1/1      22    83 ..     1    77 []   -10.0       41
+O28580       1/1     244   322 ..     1    77 []   -10.9       50
+O35309       1/1     160   233 ..     1    77 []   -11.2       54
+O29837       1/1     267   359 ..     1    77 []   -11.7       61
+Q47952       1/1     101   196 ..     1    77 []   -12.8       78
+Q08646       1/1     284   354 ..     1    77 []   -13.0       82
+Q47957       1/1     101   188 ..     1    77 []   -13.1       84
+Q58954       1/1      21    96 ..     1    77 []   -13.2       86
+Q26278       1/1       1    44 []     1    77 []   -13.5       92
+Q13287       1/1     159   233 ..     1    77 []   -14.9  1.3e+02
+TKTC_METJA   1/1     139   200 ..     1    77 []   -15.0  1.3e+02
+PR06_YEAST   1/1     599   668 ..     1    77 []   -15.1  1.3e+02
+KHK_HUMAN    1/1     166   223 ..     1    77 []   -15.5  1.5e+02
+PUR5_METJA   1/1     258   322 ..     1    77 []   -15.8  1.6e+02
+O31824       1/1      57   130 ..     1    77 []   -15.9  1.6e+02
+Q92518       1/1       2    48 ..     1    77 []   -16.0  1.6e+02
+YD33_SCHPO   1/1      14    90 ..     1    77 []   -16.1  1.7e+02
+Q09135       1/1      44    93 ..     1    77 []   -16.1  1.7e+02
+O30057       1/1     159   244 ..     1    77 []   -16.3  1.8e+02
+Q42378       1/1     140   193 ..     1    77 []   -16.7  1.9e+02
+Q42482       1/1      20    76 ..     1    77 []   -16.8    2e+02
+O35002       1/1     186   249 ..     1    77 []   -17.0  2.1e+02
+PGDS_RAT     1/1     447   515 ..     1    77 []   -17.1  2.1e+02
+YY08_METJA   1/1     879   966 ..     1    77 []   -17.2  2.2e+02
+Q48827       1/1     113   175 ..     1    77 []   -17.8  2.5e+02
+Q23637       1/1     311   382 ..     1    77 []   -17.9  2.5e+02
+Q48639       1/1      46   120 ..     1    77 []   -17.9  2.5e+02
+VJ01_VACCC   1/1      70   126 ..     1    77 []   -18.3  2.8e+02
+HBA_ARAAR    1/1      13    93 ..     1    77 []   -18.4  2.8e+02
+YAB9_SCHPO   1/1     241   300 ..     1    77 []   -18.4  2.8e+02
+P94393       1/1     303   365 ..     1    77 []   -18.7  3.1e+02
+Q26271       1/1       1    39 []     1    77 []   -18.9  3.2e+02
+Q96423       1/1      42   105 ..     1    77 []   -18.9  3.2e+02
+O00373       1/1     181   248 ..     1    77 []   -19.3  3.5e+02
+YHB0_YEAST   1/1     115   191 ..     1    77 []   -19.4  3.6e+02
+VJ01_VACCV   1/1      70   126 ..     1    77 []   -19.4  3.6e+02
+SYH_METJA    1/1     329   397 ..     1    77 []   -19.8  3.9e+02
+Q12452       1/1      50   101 ..     1    77 []   -20.0  4.1e+02
+Q46102       1/1      39   109 ..     1    77 []   -20.2  4.3e+02
+O17002       1/1     192   242 ..     1    77 []   -20.3  4.4e+02
+Q19942       1/1     125   197 ..     1    77 []   -20.3  4.5e+02
+PHYA_SOLTU   1/1     822   898 ..     1    77 []   -20.4  4.5e+02
+P75023       1/1     228   280 ..     1    77 []   -20.4  4.5e+02
+SYF_METJA    1/1      84   188 ..     1    77 []   -20.4  4.6e+02
+Y447_METJA   1/1      76   149 ..     1    77 []   -20.6  4.7e+02
+Y383_METJA   1/1     182   261 ..     1    77 []   -20.6  4.8e+02
+Q94172       1/1      32   112 ..     1    77 []   -20.8    5e+02
+CHLL_CHLRE   1/1     172   245 ..     1    77 []   -20.9    5e+02
+Q00898       1/1     136   190 ..     1    77 []   -20.9  5.1e+02
+Q00897       1/1     136   190 ..     1    77 []   -20.9  5.1e+02
+A1A2_MOUSE   1/1     136   190 ..     1    77 []   -20.9  5.1e+02
+Q61283       1/1     125   179 ..     1    77 []   -20.9  5.1e+02
+Q85381       1/1      70   137 ..     1    77 []   -21.0  5.2e+02
+VJ01_VARV    1/1      70   137 ..     1    77 []   -21.0  5.2e+02
+O34784       1/1     466   555 ..     1    77 []   -21.0  5.2e+02
+MENE_HAEIN   1/1     340   416 ..     1    77 []   -21.1  5.3e+02
+T2C2_CHVP1   1/1     220   286 ..     1    77 []   -21.2  5.4e+02
+CARA_BACSU   1/1     100   156 ..     1    77 []   -21.2  5.4e+02
+SFCA_ECOLI   1/1      84   158 ..     1    77 []   -21.2  5.5e+02
+Q23267       1/1     351   414 ..     1    77 []   -21.5  5.8e+02
+DDLA_ECOLI   1/1     120   200 ..     1    77 []   -21.5  5.8e+02
+KLP1_CHLRE   1/1     171   249 ..     1    77 []   -21.5  5.8e+02
+Q26272       1/1       1    43 []     1    77 []   -21.7  6.1e+02
+CPC3_RABIT   1/1      38   101 ..     1    77 []   -22.0  6.6e+02
+O29409       1/1     236   296 ..     1    77 []   -22.0  6.6e+02
+O34925       1/1      71   150 ..     1    77 []   -22.2  6.9e+02
+Q26281       1/1       1    39 [.     1    77 []   -22.4  7.2e+02
+Q25988       1/1       2    73 ..     1    77 []   -22.4  7.2e+02
+META_ECOLI   1/1      38   112 ..     1    77 []   -22.5  7.3e+02
+O04614       1/1      39   117 ..     1    77 []   -22.8  7.8e+02
+P95425       1/1      70   137 ..     1    77 []   -22.8  7.9e+02
+Q51350       1/1      70   137 ..     1    77 []   -22.8  7.9e+02
+P75431       1/1       1    78 [.     1    77 []   -22.9    8e+02
+O00364       1/1     181   249 ..     1    77 []   -22.9    8e+02
+PRE1_STAAU   1/1      89   166 ..     1    77 []   -22.9  8.1e+02
+Q96515       1/1     181   253 ..     1    77 []   -23.0  8.2e+02
+O27761       1/1     110   225 ..     1    77 []   -23.1  8.4e+02
+Q51783       1/1      70   137 ..     1    77 []   -23.2  8.6e+02
+A1A1_MOUSE   1/1     136   190 ..     1    77 []   -23.4    9e+02
+Q00896       1/1     136   190 ..     1    77 []   -23.4    9e+02
+Q20445       1/1      11   106 ..     1    77 []   -23.5  9.3e+02
+YNC8_CAEEL   1/1       3    52 ..     1    77 []   -23.5  9.3e+02
+Q57721       1/1       1    62 [.     1    77 []   -23.5  9.3e+02
+Y556_METJA   1/1     101   181 ..     1    77 []   -23.7  9.7e+02
+O28372       1/1      29    98 ..     1    77 []   -23.8  9.8e+02
+O00376       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00361       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00371       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00377       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+Q15605       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+Q12880       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00369       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00374       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+O00365       1/1     181   249 ..     1    77 []   -23.8  9.9e+02
+
+Histogram of all scores:
+score    obs    exp  (one = represents 6 sequences)
+-----    ---    ---
+  -67      2      0|=                                                          
+  -66      0      0|                                                           
+  -65      0      0|                                                           
+  -64      1      0|=                                                          
+  -63      0      0|                                                           
+  -62      4      0|=                                                          
+  -61      2      0|=                                                          
+  -60      5      0|=                                                          
+  -59     12      0|==                                                         
+  -58      3      0|=                                                          
+  -57      5      0|=                                                          
+  -56      7      3|*=                                                         
+  -55     16     12|=*=                                                        
+  -54     16     30|=== *                                                      
+  -53     20     61|====      *                                                
+  -52     37    103|=======          *                                         
+  -51     42    148|=======                 *                                  
+  -50     48    188|========                       *                           
+  -49     42    216|=======                            *                       
+  -48     71    231|============                          *                    
+  -47     61    232|===========                           *                    
+  -46     64    222|===========                         *                      
+  -45     96    204|================                 *                         
+  -44     96    183|================              *                            
+  -43    100    159|=================         *                                
+  -42     92    136|================      *                                    
+  -41     97    114|================= *                                        
+  -40    112     95|===============*===                                        
+  -39    109     78|============*======                                        
+  -38     95     64|==========*=====                                           
+  -37    108     52|========*=========                                         
+  -36     86     42|======*========                                            
+  -35     92     33|=====*==========                                           
+  -34     89     27|====*==========                                            
+  -33     81     21|===*==========                                             
+  -32     61     17|==*========                                                
+  -31     64     13|==*========                                                
+  -30     34     11|=*====                                                     
+  -29     25      8|=*===                                                      
+  -28     33      7|=*====                                                     
+  -27     14      5|*==                                                        
+  -26     30      4|*====                                                      
+  -25     25      3|*====                                                      
+  -24     22      2|*===                                                       
+  -23     13      2|*==                                                        
+  -22      9      1|*=                                                         
+  -21     16      1|*==                                                        
+  -20      5      1|*                                                          
+  -19      6      0|=                                                          
+  -18      5      0|=                                                          
+  -17      6      0|=                                                          
+  -16      6      0|=                                                          
+  -15      1      0|=                                                          
+  -14      4      0|=                                                          
+  -13      1      0|=                                                          
+  -12      2      0|=                                                          
+  -11      2      0|=                                                          
+  -10      4      0|=                                                          
+   -9      3      0|=                                                          
+   -8      2      0|=                                                          
+   -7      2      0|=                                                          
+   -6      0      0|                                                           
+   -5      2      0|=                                                          
+   -4      3      0|=                                                          
+   -3      1      0|=                                                          
+   -2      1      0|=                                                          
+   -1      3      0|=                                                          
+    0      1      0|=                                                          
+    1      2      0|=                                                          
+    2      2      0|=                                                          
+    3      3      0|=                                                          
+    4      4      0|=                                                          
+    5      3      0|=                                                          
+    6      1      0|=                                                          
+    7      3      0|=                                                          
+    8      4      0|=                                                          
+    9      2      0|=                                                          
+   10      4      0|=                                                          
+   11      5      0|=                                                          
+   12      3      0|=                                                          
+   13      4      0|=                                                          
+   14      3      0|=                                                          
+   15      2      0|=                                                          
+   16      3      0|=                                                          
+   17      0      0|                                                           
+   18      2      0|=                                                          
+   19      1      0|=                                                          
+   20      2      0|=                                                          
+   21      4      0|=                                                          
+   22      6      0|=                                                          
+   23      1      0|=                                                          
+   24      6      0|=                                                          
+   25      2      0|=                                                          
+   26      2      0|=                                                          
+   27      5      0|=                                                          
+   28      3      0|=                                                          
+   29      1      0|=                                                          
+   30      1      0|=                                                          
+   31      4      0|=                                                          
+   32      4      0|=                                                          
+   33      1      0|=                                                          
+   34      4      0|=                                                          
+   35      6      0|=                                                          
+   36      1      0|=                                                          
+   37      5      0|=                                                          
+   38      2      0|=                                                          
+   39      1      0|=                                                          
+   40      5      0|=                                                          
+   41      0      0|                                                           
+   42      6      0|=                                                          
+   43      5      0|=                                                          
+   44      2      0|=                                                          
+   45      3      0|=                                                          
+   46      2      0|=                                                          
+   47      3      0|=                                                          
+   48      3      0|=                                                          
+   49      7      0|==                                                         
+   50      2      0|=                                                          
+   51      1      0|=                                                          
+   52      3      0|=                                                          
+   53      2      0|=                                                          
+   54      4      0|=                                                          
+   55      6      0|=                                                          
+   56      1      0|=                                                          
+   57      1      0|=                                                          
+   58      7      0|==                                                         
+   59      8      0|==                                                         
+   60      6      0|=                                                          
+   61      4      0|=                                                          
+   62      1      0|=                                                          
+   63     10      0|==                                                         
+   64      2      0|=                                                          
+   65      2      0|=                                                          
+   66      4      0|=                                                          
+   67      0      0|                                                           
+   68      1      0|=                                                          
+   69      2      0|=                                                          
+   70      2      0|=                                                          
+   71      7      0|==                                                         
+   72      6      0|=                                                          
+   73      0      0|                                                           
+   74      4      0|=                                                          
+   75      6      0|=                                                          
+   76      2      0|=                                                          
+   77      7      0|==                                                         
+   78      6      0|=                                                          
+   79      3      0|=                                                          
+   80      9      0|==                                                         
+   81      6      0|=                                                          
+   82      4      0|=                                                          
+   83      5      0|=                                                          
+   84      1      0|=                                                          
+   85      1      0|=                                                          
+   86      7      0|==                                                         
+   87      3      0|=                                                          
+   88      3      0|=                                                          
+   89      5      0|=                                                          
+   90      5      0|=                                                          
+   91      2      0|=                                                          
+   92      4      0|=                                                          
+   93      4      0|=                                                          
+>  94    318      -|=====================================================      
+
+
+% Statistical details of theoretical EVD fit:
+              mu =   -46.9346
+          lambda =     0.2314
+chi-sq statistic =  1898.6975
+  P(chi-square)  =          0
+
+Whole sequence top hits:
+tophits_s report:
+     Total hits:           751
+     Satisfying E cutoff:  751
+     Total memory:         117K
+
+Domain top hits:
+tophits_s report:
+     Total hits:           1215
+     Satisfying E cutoff:  1215
+     Total memory:         618K


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/hmmsearch.out
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hs_est.est2genome
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hs_est.est2genome	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hs_est.est2genome	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+Note Best alignment is between forward est and forward genome, and splice  sites imply forward gene
+Exon       119 100.0   695   813 HSHNRNPA         1   119 hs_est        
++Intron    -20   0.0   814  1376 HSHNRNPA    
+Exon       117 100.0  1377  1493 HSHNRNPA       120   236 hs_est        
++Intron    -20   0.0  1494  1788 HSHNRNPA    
+Exon       144  99.3  1789  1935 HSHNRNPA       237   382 hs_est        
++Intron    -20   0.0  1936  2083 HSHNRNPA    
+Exon        97 100.0  2084  2180 HSHNRNPA       383   479 hs_est        
+
+Span       417  99.8   695  2180 HSHNRNPA         1   479 hs_est        
+
+Segment    119 100.0   695   813 HSHNRNPA         1   119 hs_est        
+Segment    117 100.0  1377  1493 HSHNRNPA       120   236 hs_est        
+Segment      4 100.0  1789  1792 HSHNRNPA       237   240 hs_est        
+Segment    142 100.0  1794  1935 HSHNRNPA       241   382 hs_est        
+Segment     97 100.0  2084  2180 HSHNRNPA       383   479 hs_est        

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hs_fugu.newick
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hs_fugu.newick	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hs_fugu.newick	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+((ENSP00000263374,SINFRUP00000082596),SINFRUP0000006110);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,18 @@
+CLUSTAL W (1.82) multiple sequence alignment
+
+
+human           gtctgttccaagggcctttgcgtcaggtggg--ctcagggtt-------ccagggtggct
+owlmonkey       gtctgttccaagggccttcgagccagtctgggccccagggctgccccactcggggttcca
+                ****************** * * ***   **  * ***** *        * ****  * 
+
+human           ggacc-ccaggccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgg
+owlmonkey       gagcagttggaccccaggtct-cagc-gggagggtgtggctgggctc-tgaagcatttgg
+                *  *     * ****** *** **** ******  ************ ******** ***
+
+human           gggtgagcccaggggccccaaggcagggcacctggccttcagcctgcctcagccctgcct
+owlmonkey       g--tgagcccaggggctc-agggcagggcacctg-ccttcagcg-gcctcagcc-tgcct
+                *  ************* * * ************* ********  ********* *****
+
+human           gtctcccag
+owlmonkey       gtctcccag
+                *********

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fas
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fas	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fas	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+>human 
+GTCTGTTCCAAGGGCCTTTGCGTCAGGTGGGCTCAGGGTTCCAGGGTGGC
+TGGACCCCAGGCCCCAGCTCTGCAGCAGGGAGGACGTGGCTGGGCTCGTG
+AAGCATGTGGGGGTGAGCCCAGGGGCCCCAAGGCAGGGCACCTGGCCTTC
+AGCCTGCCTCAGCCCTGCCTGTCTCCCAG
+>owlmonkey 
+GTCTGTTCCAAGGGCCTTCGAGCCAGTCTGGGCCCCAGGGCTGCCCCACT
+CGGGGTTCCAGAGCAGTTGGACCCCAGGTCTCAGCGGGAGGGTGTGGCTG
+GGCTCTGAAGCATTTGGGTGAGCCCAGGGGCTCAGGGCAGGGCACCTGCC
+TTCAGCGGCCTCAGCCTGCCTGTCTCCCAG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hs_owlmonkey.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+>human
+GTCTGTTCCAAGGGCCTTTGCGTCAGG-TGGGC-TCAGGGTT---------------CCAGGGTGGCTGG
+ACCCCAGGCCCCAGCTCTGCAGCAGGGAGGACGTGGCTGGGCTCGTGAAGCATGTGGGGGTGAGCCCAGG
+GGCCCCAAGGCAGGGCACCTGGCCTTCAGCCTGCCTCAGCCCTGCCTGTCTCCCAG
+>owlmonkey
+GTCTGTTCCAAGGGCCTTCGAGCCAGTCTGGGCCCCAGGGCTGCCCCACTCGGGGTTCCAGAGCAGTTGG
+ACCCCAGGTCTCAGC---------GGGAGGGTGTGGCTGGGCTC-TGAAGCATTT--GGGTGAGCCCAGG
+GGCTC-AGGGCAGGGCACCTG-CCTTCAGC-GGCCTCAGC-CTGCCTGTCTCCCAG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hsinsulin.blastcl3.blastn
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hsinsulin.blastcl3.blastn	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hsinsulin.blastcl3.blastn	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,443 @@
+BLASTN 2.2.6 [Apr-09-2003]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= human 
+         (179 letters)
+
+Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,
+or phase 0, 1 or 2 HTGS sequences)
+           1,787,533 sequences; -24,016,349 total letters
+
+Searching... please wait.. done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|J00265.1|HUMINS01 Human insulin gene, complete cds                 355  2e-95
+emb|V00565.1|HSINSU Human gene for preproinsulin, from chromosom...   355  2e-95
+gb|M10039.1|HUMINSPR Human alpha-type insulin gene and 5' flanki...   355  2e-95
+gb|L15440.1|HUMINSTHIG Homo sapiens tyrosine hydroxylase (TH) ge...   355  2e-95
+gb|AC132217.15| Homo sapiens chromosome 11, clone RP11-889I17, c...   347  4e-93
+gb|AC130303.8| Homo sapiens chromosome 11, clone RP4-539G11, com...   347  4e-93
+gb|AY138590.1|AY138589S2 Homo sapiens insulin (INS) gene, exons ...   347  4e-93
+emb|AJ009655.1|HSA9655 Homo sapiens ins gene, partial                 347  4e-93
+gb|AY137497.1|AY137496S2 Pan troglodytes insulin precursor (INS)...   339  9e-91
+emb|X61089.1|PTPPINS P.troglodytes gene for preproinsulin             315  1e-83
+gb|AY137500.1|AY137498S3 Gorilla gorilla insulin precursor (INS)...   262  2e-67
+gb|AY137503.1|AY137501S3 Pongo pygmaeus insulin precursor (INS) ...   222  1e-55
+emb|X61092.1|CEPPINS C.aethiops gene for preproinsulin                129  2e-27
+
+>gb|J00265.1|HUMINS01 Human insulin gene, complete cds
+          Length = 4044
+
+ Score =  355 bits (179), Expect = 2e-95
+ Identities = 179/179 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2228 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 2287
+
+                                                                        
+Query: 61   gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2288 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 2347
+
+                                                                       
+Query: 121  aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+            |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2348 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 2406
+
+
+>emb|V00565.1|HSINSU Human gene for preproinsulin, from chromosome 11. Includes a highly
+            polymorphic region upstream from the insulin gene
+            containing tandemly repeated sequences
+          Length = 4992
+
+ Score =  355 bits (179), Expect = 2e-95
+ Identities = 179/179 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2228 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 2287
+
+                                                                        
+Query: 61   gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2288 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 2347
+
+                                                                       
+Query: 121  aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+            |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2348 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 2406
+
+
+>gb|M10039.1|HUMINSPR Human alpha-type insulin gene and 5' flanking polymorphic region
+          Length = 3943
+
+ Score =  355 bits (179), Expect = 2e-95
+ Identities = 179/179 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2503 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 2562
+
+                                                                        
+Query: 61   gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2563 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 2622
+
+                                                                       
+Query: 121  aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+            |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 2623 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 2681
+
+
+>gb|L15440.1|HUMINSTHIG Homo sapiens tyrosine hydroxylase (TH) gene, 3' end; insulin (INS)
+            gene, complete cds; insulin-like growth factor 2 (IGF2)
+            gene, 5' end
+          Length = 12565
+
+ Score =  355 bits (179), Expect = 2e-95
+ Identities = 179/179 (100%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 4289 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 4348
+
+                                                                        
+Query: 61   gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 4349 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 4408
+
+                                                                       
+Query: 121  aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+            |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 4409 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 4467
+
+
+>gb|AC132217.15| Homo sapiens chromosome 11, clone RP11-889I17, complete sequence
+          Length = 170027
+
+ Score =  347 bits (175), Expect = 4e-93
+ Identities = 178/179 (99%)
+ Strand = Plus / Plus
+
+                                                                         
+Query: 1     gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+             ||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||
+Sbjct: 86461 gtctgttccaagggcctttgcgtcaggtgggctcaggattccagggtggctggaccccag 86520
+
+                                                                         
+Query: 61    gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+             ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 86521 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 86580
+
+                                                                        
+Query: 121   aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+             |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 86581 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 86639
+
+
+>gb|AC130303.8| Homo sapiens chromosome 11, clone RP4-539G11, complete sequence
+          Length = 171366
+
+ Score =  347 bits (175), Expect = 4e-93
+ Identities = 178/179 (99%)
+ Strand = Plus / Plus
+
+                                                                          
+Query: 1      gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+              ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 127800 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 127859
+
+                                                                          
+Query: 61     gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+              ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 127860 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 127919
+
+                                                                         
+Query: 121    aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+              ||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||
+Sbjct: 127920 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtcacccag 127978
+
+
+>gb|AY138590.1|AY138589S2 Homo sapiens insulin (INS) gene, exons 1, 2, 3, and complete cds;
+           and insulin-like growth factor (IGF2) gene, exon 1
+          Length = 4233
+
+ Score =  347 bits (175), Expect = 4e-93
+ Identities = 178/179 (99%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 1   gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 378 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 437
+
+                                                                       
+Query: 61  gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 438 gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 497
+
+                                                                      
+Query: 121 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||
+Sbjct: 498 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtcacccag 556
+
+
+>emb|AJ009655.1|HSA9655 Homo sapiens ins gene, partial
+          Length = 1393
+
+ Score =  347 bits (175), Expect = 4e-93
+ Identities = 178/179 (99%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 1   gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 6   gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 65
+
+                                                                       
+Query: 61  gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+           ||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 66  gccccagctgtgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 125
+
+                                                                      
+Query: 121 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+           |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 126 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 184
+
+
+>gb|AY137497.1|AY137496S2 Pan troglodytes insulin precursor (INS) gene, complete cds
+          Length = 4124
+
+ Score =  339 bits (171), Expect = 9e-91
+ Identities = 177/179 (98%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 1   gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 338 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 397
+
+                                                                       
+Query: 61  gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+           ||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||
+Sbjct: 398 gccccagctctgcagcagggaggacgtggctgggctcttgaagcatgtgggggtgagccc 457
+
+                                                                      
+Query: 121 aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+           |||||||||||||||||||||||||||||||||| ||||||||||||||||||||||||
+Sbjct: 458 aggggccccaaggcagggcacctggccttcagccggcctcagccctgcctgtctcccag 516
+
+
+>emb|X61089.1|PTPPINS P.troglodytes gene for preproinsulin
+          Length = 2483
+
+ Score =  315 bits (159), Expect = 1e-83
+ Identities = 175/179 (97%), Gaps = 1/179 (0%)
+ Strand = Plus / Plus
+
+                                                                        
+Query: 1    gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 60
+            ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 1028 gtctgttccaagggcctttgcgtcaggtgggctcagggttccagggtggctggaccccag 1087
+
+                                                                        
+Query: 61   gccccagctctgcagcagggaggacgtggctgggctcgtgaagcatgtgggggtgagccc 120
+            ||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||
+Sbjct: 1088 gccccagctctgcagcagggaggacgtggctgggctcttgaagcatgtgggggtgagccc 1147
+
+                                                                       
+Query: 121  aggggccccaaggcagggcacctggccttcagcctgcctcagccctgcctgtctcccag 179
+            |||||||||||||||||||| || |||||||||| ||||||||||||||||||||||||
+Sbjct: 1148 aggggccccaaggcagggcagct-gccttcagccggcctcagccctgcctgtctcccag 1205
+
+
+>gb|AY137500.1|AY137498S3 Gorilla gorilla insulin precursor (INS) gene, complete cds
+          Length = 4146
+
+ Score =  262 bits (132), Expect = 2e-67
+ Identities = 141/144 (97%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 36  gggttccagggtggctggaccccaggccccagctctgcagcagggaggacgtggctgggc 95
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 389 gggttccagggtggctggaccccaggccccagctctgcagcagggaggacgtggctgggc 448
+
+                                                                       
+Query: 96  tcgtgaagcatgtgggggtgagcccaggggccccaaggcagggcacctggccttcagcct 155
+           || |||||||||||||||||||||||||||||||||||||||||| ||||||||||||| 
+Sbjct: 449 tcttgaagcatgtgggggtgagcccaggggccccaaggcagggcaactggccttcagccg 508
+
+                                   
+Query: 156 gcctcagccctgcctgtctcccag 179
+           ||||||||||||||||||||||||
+Sbjct: 509 gcctcagccctgcctgtctcccag 532
+
+
+>gb|AY137503.1|AY137501S3 Pongo pygmaeus insulin precursor (INS) gene, complete cds
+          Length = 4126
+
+ Score =  222 bits (112), Expect = 1e-55
+ Identities = 136/144 (94%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 36  gggttccagggtggctggaccccaggccccagctctgcagcagggaggacgtggctgggc 95
+           ||||||||||||||||||||||||||| ||||||||||||| ||||||||||||||||||
+Sbjct: 384 gggttccagggtggctggaccccaggctccagctctgcagctgggaggacgtggctgggc 443
+
+                                                                       
+Query: 96  tcgtgaagcatgtgggggtgagcccaggggccccaaggcagggcacctggccttcagcct 155
+           || |||||||| ||||||||||||||||||||||| ||||||||||||||||||||||| 
+Sbjct: 444 tcttgaagcatttgggggtgagcccaggggccccagggcagggcacctggccttcagccg 503
+
+                                   
+Query: 156 gcctcagccctgcctgtctcccag 179
+            ||||||| |||||||||||||||
+Sbjct: 504 acctcagctctgcctgtctcccag 527
+
+
+>emb|X61092.1|CEPPINS C.aethiops gene for preproinsulin
+          Length = 1909
+
+ Score =  129 bits (65), Expect = 2e-27
+ Identities = 86/93 (92%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 36  gggttccagggtggctggaccccaggccccagctctgcagcagggaggacgtggctgggc 95
+           ||||||||||||||||||||||||||||||||||||||| |||||||||| |||||||||
+Sbjct: 514 gggttccagggtggctggaccccaggccccagctctgcaacagggaggacatggctgggc 573
+
+                                            
+Query: 96  tcgtgaagcatgtgggggtgagcccaggggccc 128
+           || |||||| | || |||||| |||||||||||
+Sbjct: 574 tcttgaagcgtttgagggtgaacccaggggccc 606
+
+
+  Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,
+  or phase 0, 1 or 2 HTGS sequences)
+    Posted date:  Jul 31, 2003  1:26 AM
+  Number of letters in database: 192,913,178
+  Number of sequences in database:  1,867,771
+  
+Lambda     K      H
+    1.37    0.711     0.00 
+
+Gapped
+Lambda     K      H
+    1.37    0.711 7.29e-304 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 1,670,626
+Number of Sequences: 1867771
+Number of extensions: 1670626
+Number of successful extensions: 118444
+Number of sequences better than 1.0e-20: 13
+Number of HSP's better than  0.0 without gapping: 13
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 118410
+Number of HSP's gapped (non-prelim): 33
+length of query: 179
+length of database: 8,782,847,770
+effective HSP length: 20
+effective length of query: 159
+effective length of database: 8,745,492,350
+effective search space: 1390533283650
+effective search space used: 1390533283650
+T: 0
+A: 0
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 54 (107.5 bits)
+
+Query= owlmonkey 
+         (180 letters)
+
+Searching... please wait.. done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gb|J02989.1|ATRINS Owl monkey (A.trivirgatus) insulin gene, comp...   357  4e-96
+
+>gb|J02989.1|ATRINS Owl monkey (A.trivirgatus) insulin gene, complete cds
+          Length = 2113
+
+ Score =  357 bits (180), Expect = 4e-96
+ Identities = 180/180 (100%)
+ Strand = Plus / Plus
+
+                                                                       
+Query: 1   gtctgttccaagggccttcgagccagtctgggccccagggctgccccactcggggttcca 60
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 551 gtctgttccaagggccttcgagccagtctgggccccagggctgccccactcggggttcca 610
+
+                                                                       
+Query: 61  gagcagttggaccccaggtctcagcgggagggtgtggctgggctctgaagcatttgggtg 120
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 611 gagcagttggaccccaggtctcagcgggagggtgtggctgggctctgaagcatttgggtg 670
+
+                                                                       
+Query: 121 agcccaggggctcagggcagggcacctgccttcagcggcctcagcctgcctgtctcccag 180
+           ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct: 671 agcccaggggctcagggcagggcacctgccttcagcggcctcagcctgcctgtctcccag 730
+
+
+  Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,
+  or phase 0, 1 or 2 HTGS sequences)
+    Posted date:  Jul 31, 2003  1:26 AM
+  Number of letters in database: 192,913,178
+  Number of sequences in database:  1,867,771
+  
+Lambda     K      H
+    1.37    0.711     0.00 
+
+Gapped
+Lambda     K      H
+    1.37    0.711 7.29e-304 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Hits to DB: 910,717
+Number of Sequences: 1867771
+Number of extensions: 910717
+Number of successful extensions: 72147
+Number of sequences better than 1.0e-20: 1
+Number of HSP's better than  0.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 72146
+Number of HSP's gapped (non-prelim): 1
+length of query: 180
+length of database: 8,782,847,770
+effective HSP length: 20
+effective length of query: 160
+effective length of database: 8,745,492,350
+effective search space: 1399278776000
+effective search space used: 1399278776000
+T: 0
+A: 0
+X1: 6 (11.9 bits)
+X2: 15 (29.7 bits)
+S1: 12 (24.3 bits)
+S2: 54 (107.5 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/humor.maf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/humor.maf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/humor.maf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+##maf version=1 scoring=zero
+a
+s NM_006987 0 5000 - 5000 gcacagcctttactccctgactgcgtttatattctgcctctcccgtgcctgttggtccttggggacGGCCCTCCGGCAGGTTCTGGCCTCAGGA--------CCCCGCACCAGCCCCGGCC-----GTGCCAGCTGCCCTCCCACTGGCCTGGCCCAGCTCTCCAGAGAGGCTTCATGCAGCCAGTTCCCCAGGGAACACCGTTGCCCACACGTTGCTAATGT-TAAAACATGAATTTATTGCATCTGTAGCATCTTACCTTGAGCACGGCTTCTTTCAATGCTTTTATTTTGCAATTATAG----GTTTATTTTCTTCCTT-----TCCTCTCTCTTGAGCTTTTTAAGGTGTTTTCTGTCACTCAGCTGGAGACTTTAAATTTCCTATTTATATTAATTATCAAATTA------TTTCATATATGA---GTCATTCAGCTCAGCCCTTTTTGGCTCCTTCTTGCAACTCTTACTTGTGGATTTATTGGGCAGTTCAGCGCCTACTTTCATG------------------------GGCAGA---CTCAACTGGCGA----GATTTTAACTGGAGGGTGAGAGATGTCCT------------GGCCACGGGGCCCTGTTG-----------------------CTCACAGTC--------------------------------------CCTGGATCAGAGGA--TGGTGCC---------------------GGATGGGCAGGTGCTGAGGATGCACATACCGCCCTCGGGTTA--------GCACCGAAGGTTCTTGTGTCAGACGTGAGGCTTCCTTCCTGGGTTCTTTCTGGCTGCGCTAGTCCAGAAGACCAGCAAACCCGAGATGGTCTGAGGTGGACGATGGAGCCTGAATGGAGAGCCTGGGGCGTGAGCCAG-GGTCTGGGATACCCTGGGACAGAGTTAGATGCCCCTGCAGACGTGGATGAGCGGCTGAAGACTAAGGGAGCAGGTCACACGTGGTGACAGACAGGAGAGGCTGCTGTGCCATCCAGGGGCTGGGGAAGGAGCCCCCG-TGGAGAGGCTCCATTTCGGCCACGTGGCTGCTGCAGACAACCGGGAGTCAGCGTCGGCACAAACACGGGTGCCTCGAAAGAGAGCAGTGCCGGCC-TGGTGTCTCCGGGCCCAGCTGCCACTGGCACAGGCCTCCGAAGGGGCAGGAGCAGGAGCAGGAGCAGAAAGCGCACA-TCAGGGCTcatgctgtgcctgtgaagatgtcgggcacgtccgttagtgtgtgtggtgcgcggtcgcctgtgaagacgactgttagtgtgtgtggtgcgtggtcgcctgtgaagacgactgttagtatgtgtggtctacggtcgcctgtgaagacgactgttagtatctgtggtgcgcggtcgcctgtgaagacgactgttagtatgtgtggtctatggtcgcctgtgaagacgactgttagtatctgtggtgcgcggtcgcctgtgaagacgactgttagtaagtgtggtgcgcggtcgcctgtgaagacgactgttagtaagtgtggtgcgcggtcgcctgtgaagacgtctgttagtaagtgtggtgcgcggtcgcctgtgaagacgactgttagtAAGTGTG-GTGCGCGGTCGCCTGTGAAGACGACTGTTAGTAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGTGTG----GTGCGCGGTCGCCTGTGAAGACGACTGTTAGTAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGTAAGTGTGGTGCGCGGTCGCCTGTGAAGACGTCTGTTAGTAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGTATGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGTATGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGCAAGTGTGGTGCGCGGTCGTCTGTGAAGACGACTGTTAGCAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGCAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGCAAGTGTGGTGCGCGGTCGCCTGTGAAGACGACTGTTAGTATGTGTGGTGCACggtcgcctgtgaagatgtcttttagtaagtgtggtgcacggtcgcctgtgaagacgactgttagtatgtgtggtgcacggtcgcctgtgaagacgactgttagtaagtgtggtgcgcggtcgcctgtgaagacgactgttagtaagtgtggtgcgcggtcgcctgtgaagatgactgttagtaagtgtggtgcacggtcacaCCTTCCCACACAGTGCAGGTGACttactcagggctctggatggcaaatgacaa----ggacccaattcaaactagcataagcaaaaaggcagctctgtgggcctggca-cctggggatcaaaacagtcgttcaggcacggctggatccagccattgaccatgggtatgaggtgactcaccttcatctctcagctctgtttagctgcaccggcagacaggccgtttc-aagggggagccaggtggctactggccatcgtgagctagtgccg-------tgcttaccgtcccag---------------------------------------------acagtgggtt---tattgca--------------caggaactctggcaag--accccaggaggactcagtatggctgggtctgggtcacatgcccagccctgagccaattaccctgccttgggggtgg-----------ggctattctaattggtcaTCCTCATCACATTCACCCAGGAAGAAGGGGAAGG-AGGGGCTGTTTCTCTTGCCCAGGAAGACTGGGCGTGACTACTTTGACCTGAAGAGTTATTCTCAAAAAGGAAAGGAATGCTGGGTAGACAAAATATATGCCCATTGCAGAAGGCCAGCAGGGGCCCCTGGCCTTCGCTTTGCTAAAGGTGGC-----------------AGGTGCACA--GCTGTCCGGACCCTGGAGCTCAGTGTGAGGAGTGTAAAGGTGTAGAAGGTGCCGTCTGTGCAGCAGTGTGGCGGCGTTCTGCCTTTCTCAGCTGAATGGAAGACAAACACTCAGGCCTCTTCACACCTTCGTGACTGGTCCCCCCTGGACCTTTGCGCAGAGCTGGCCTTTC--------CATGAAGCAAACTCTGGGCACATGTGGAA-----ACCAGCCTCTTGCTGCCATGCCTGCCCCAGGGACTGACCAGGGTCGGCTCCAGTCACTGACATAGTTTATCACATCTCTTTGGCCTCTGGTCTCTGGATCCACACCTCTGGCTCTGACGTCTGCATCGAGCCCCTGTCCCTGCTGGGTTCA-AGTCTCTTTTCCAGAAGCTTCACTTGGCTCGAGGCTCCACTTGAT-TTGGTG-TCATTTGC-GTTTCTGAGCTTGGTCCTCCCGACCTGTGGTCCTGCCAGGGGAAGAGCAGCGTGGCCTGGCTGGACCCTGGCAAAGGAGGCTTCTCTTCTGCATTTCTTCTACACGGAGTGCATGAGCAGGACAAAGACCAGCTGATTCCTGCAGGCCTGGGGCTGTGGTGTGCGGGTCTCCCACAGGCCTTGGtatgtgtgtggtgtgtggtcccgcacactcagagatggcttccagcacattccacactttctttctttctttcttttttttttgagtcggagtctcgctctgttgcccaggctggagtgcagtggcgcgatctcggctcactgcaagctccgccttccgggttcaagcgattctcctgcctcagcctcccgagtagttgggacgacagttgcccacgaccatgtctggccaagcctcagccttcccaagtgctagggttacaggcgtgagccactatgcctggcctacccatgaattttattttattttttaaattttaaggcgaggtctcactctgtcacctaggctggagtacggtggctagatcatggctcacagcagcctcaaactcctagactccagccaccctcccacctcagcctcctgaatggctggggctacagacatttgccaccacacctggctaatttttgtattttcagtagagatgggtttttactatgttggccaggctggtctcgaactcctgaccttaggtgatccacccgcctcggcctctggagtagctggggttacaggagtgagccaccgtgccagtcctccttccaccctttcatataaccagcagcgcatatcagcacttctcaacatctttactgcaggacCTTTTCTTCAAATGAAGTCTTACCTGGAAGCCTGAcagatgacgaggaaaaagctgagttgctctgggttgggctgtaataacccccttgttcccagccccctgcttctgcttgcacggtcctgaagaggggctccacacccctggctccttggaacccagtttgttgggttataataaccggccttgttcccagccccctgcttcgcctgtacagtcctgaagagggtctccacaccccTGGCTCCTTGGAACCCAGTTTGAG-------AGCCTCTTGGCAATTATATCCATCTGTCTGTCTCTCTTGTGGTAGCACCTGCTGCTGCTCCCCATGGGGAAAGGTTGCTGATGGTGTTTATTTTTTTTTTAAGCATGAAAACATTTTCTTTTTTCTATCAGTAGCTTGTTTGCACTATGAAAAGGTCAACAGAGAGATCCTTGTCATCTTCCttctccctgcaggagggtgtcagggtgtaagtgctccctcgctgtgcaggggttcatttcattcatttcatTACCCTTGCCCTCCTCGAGGTACCTCCGGGAAGCTGTTCCATTTACACATCTGTCAAGTTCTCTGTGCGTCAATTTGCCTTGCTCCTGAAGAGCCACACCCAAAAGGGGCCCCACTCCAGGCAGCGGGGCTTCAGGAAGCGATGAGATGCT---GACGCAGGCCCCGTGCACCACCACTGCTG----CCTGTAAGGGCTGTTTTGGATACAGAAAATGTGCCCTTTCTAACC--------CAAAAAA------------------------------------------------------------------------------------------------------------------------------TGCTTGAAATGTGTAAAAGTGGCCAGACT---AACAGTCCCAAAGAGGGCTGC---------CCTCTAAGAGGAAGCGTCCCAAATCTGT---TCAGTTTTAGAGACTACGTGACTGGG------GTACG-------TGGTGGGGCCTTACCAGACATCCACG---------AGGAGAATCCAGGCCTTGGTTTGGCTCCAGCTGGGCCTGCCTGGTGGCTGCCACTTATTGACTTAAGTCCCAGTGATTCAGCTCCTCATCTGGAACACCTCGGGTCACCCCCG
+s mm3       0 3424 + 3424 ..................................................................GACCCCCCTGTACCCACTGGCCTAAGGAGCAACCCCACCCACAGCAGCTCTGGCCCTGTCATGCCAACCACTGTCCCACTGACCCAGACTACCCCTG-AGAAGGACATTGTACAGCTAAGTCACAGAGTAACACTGC----------------ATGTGTAACACAAGCGTGTGTT-----TGTAACAT---GCCTTCACCAAGGTTTCTTCTAATGCTTTTGTTTTGCAGTTCTAGCTGTTTTCATTTCCCTCCTTGGATCTTCTCTCTCTTGAACTTTTTAAAGTGTCATA-GTCTATAAGTTAGCAACTTCA--CTCCTTACGTGTATTAATTGTCAAATTACTTATTTTTTCTATGTGAATTGCTGTTTA------TCATCTTTGCCTCCTTTTTGCAAACTTTACGTATTCGGGGATTAAGCATTTTAGCATTTGCTCTTCTATTTAGAGACTGAGCAAGCAGTGATGGCGGAGCCCTGTGCTGGTGA-TGCATTTTGGGCTGGGAAGAGAATTCTACCTTTAGCAAAGC-AAGGCCA-TGGCCTCTGCTGTCAGTATATGATGGACGTTCATTCTGACTGTCTAGCTTCCTTCTTCAGGTGTAAGTAATCATGTCGTCCTCCTGGAGAAGATAAATTGGTCCCTGCGCCTCCCTCCTTAGGAAAGGACTGGCAGGTGCTTAAG------------CCAGTGTGTTAAGCCAGGAGCACTCAGAATCCT---GTCAGCAACCCTGCTTCTTTGTCATGATCATTCTAGGCGCACTT-TCTGGAAGATGAG--------GGGTGGTCAGAAGTGGATGACAGAGTCTCCACGTACAACCTTGGAGATAGGCCAG-GGTGTGGGATGAGCCTGGGATCTCTTGGGGATCATTGGAGACATAGGTGTA-ATTTGAAGA------GAGCAGGTCATAT-----------------------------TATCCAGG---------------------AGCTGGAGCTCTCTTTGGGCCCCATAGCTACAGCAGAAGCGCAA-AGTTAGCCAAAGAATGAACACTGGAGGTTGAAAGGAAGGTGATCTCTGCCATGGCATCTCGGAGTC----------------AGGCCTCAGAGG----ACAGTTAGCTGCAGGAGCAGACAGCTCTCC-CCAAAGCT......................................................................................................................................................................................................................................................................................................................................................................................................AAGTGAGCGTGTGCAGTGCATTCAGAAGAACAGAATGAGCGTGTGCAGTGTGCAGTGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGTGCACTGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGTGCACTGCATTCAGAAGAACAGAGTGAGC--GTGTGCAGCGCATTCA-----GAAGAACAGAGTGAGCGTGTGCAGTGTGCAGCGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGCAT-------TCAGAAGAACAGAGTGAGCGTGTGCAGTGTGCACTGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGTGCAGTGCATTCAGGAGAACAGAGTGAGCGTGTGCAGTGTGCACTGCATTCAGAAGAACAGAGTGAGCATGTGCAGTGTGCAGTGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGTGCAGTGCATTCAGAAGAACAGAGTGAGCGTGTGCAGTGTGC..........................................................................................................................................................................................................................AGGTGATTTTCTCAGAGTTC---ATGATCAATCACAAATACGACTccagctcaaactacctgaagcaaaaatgggactctgcgagtctggcaaactgggaaccataaaagtgggtcaggcttggctgcacccagtgactgacaatatcttcaggcgtattcgcctccttccgtcgacttctctaagcatggtttc-agcctggcagtttctgagaggcagcaagatggccgcAGGATTTCCCACACCAGCACCATCAACACTGTTTGCTTCCCACACTGCCACCTATCCACTGCTCACTTCCCACGTGGACACTACACACCAAGATGCTGTCTGTACCGCAGCTCACTGTCACAAAAGGTACACTTTCCAGTAACCCAGGGAAGACACACCTTGGC-----CTAGGTCACATATACAGCCTTGAGCCAGCCACTCT--CCAGGAAGTGGAGGAGGGTGATGGACATCTTAATAGGTCCCTC-AGTCATGTCCACCCCCAAGGAGGAGGAAGCAGGGAACAGTTTCTTTTCTCCAGGAGGTC--GACATGGTTAGTTAAACCTAAAGA-------------AGGAA------GCTGGGCAGGCAAGATGGGTCTCCATCACCTAGAGCTAGCAGAGGCCTCTGGCCTTTGCTTTGCTAAAG-TAGAGTTGTCTTTCCAGCCTAAGATGCACACAGTTGTCTGGATCTTGGTGCT........................AAGGCTGTCATCTGCTTAGCATTGCCACAG--TTCTGGGGTATACAGT----------GTGAACCCT-AGCAATCTTCAAGCCTTCTTGTGAGGGGCATCGTAGGCCTTCTCATGGAGG----CTTCCCACCACCCTGTGTGGGCAATCC----CTCACATGGAACCTTCACAAGCAGTAAGCAGCTATGCTTG-----GGG--------GAAATAGCTTT--TCCCTAACATGCTT------GTGTCCCAGGATGCTGGCCCCTAG-----CACCACTTGCTCT-----TTGC-------ACCTCTCCTTGCCAGGCTCATGGTCTCTTTG--AGATGCTCATCTTGGCTCAGATCACAACCAGATGTTGGTG-TTACCTATAGTTTGTGGACA-GGTCCATCTGGTAGGTGGAGCTGC-AGGGAATGGACAGAGCAGCCAGGCTGGGCCCCGACAAAGGAGGC................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................CTCTTCTTCAAGTGGAGTCTTATCTGGAAAGCTGA.............................................................................................................................................................................................................aGGCTCCTTGGAACCCCGATTGAGGATTGAGAAGTGTTGGGTGGCTATGCTCATTGGTCTCTC-----------AGAATCGTTTTTTACCTCCCTGGGGATAAGGTTGCTG--------------------------GAAAACATTTTCTTCCTTC----ACTAGATTGTCTGT--TATGAAAAGGTCACTTGTGATCTCCTTGTCATCCTCC.......................................................................TGCCCCTGTGCTGGGCTAGCTGCCTTTGGGGATTTGTTCTTCTACTGCACTGCTCAAGTGGTCAGGGCCTCAGTTTCCCTTCTTCCCAAGAAGCCAGAGCCA---GAGGCTCCACTGCAGGC-----GACTCTGGTAAGCTCTGTGATGCTCACAACTTGGGCCCTG----CCAGGACGGATAATAACTTTTGAGAAATGTCT-------AAAAGCTAAGTCCTTTCTAACAACAACAGACAAACAAACCCTTCAGTGCTGGCAGGTCCCTcatgtttataattccaatcactgtgaaggatgaagcagtaagatcctgggttctaggccagcctgggctactcagcaagatcctgtctcaagaaatgacaaaTGTTTCCAATTTGTAT--GTGTCCAGACTGAAAACACATCCGCGAGGGGATGTAAGCTTCTGCCTTGGGTGGGAAATACCCCACAATTGTCTGTCAGCTGCTGCCACTGTGGGACTTGGTCTCCAGTACAATAGTGCTGGGGTGGCTTGGCCCGGCTTCCCTGCACTAGGGAGAGAGAATCCAGGCCTGGGTTTGGCTCCAACTGGGCCTTGCAGG--GCTGTTACTTATTGAGTTAAGTCAGAGTGGCTCAAATGTTGATATGCAGCACCT-GGGTCACCCTTG
+s rn3       0 1951 + 1951 ..................................................................................................................................................................................................................................................................................................................................................GAGCTTTTTAAAGTGTCAGA-GC----AAGTTATCAACTTCA--TTCCCTATGAGTATTAATTATCAAATTACTTACGTTTCCTATATAAATTGCCATTTA------TCATCTTTGCCTCTTTCTTGCAAACTTTGCGTGTTCGTGGATTAAGCAATTTAGCGTTTGCTTTTCTGTTTAGAGAGTGAGCAAGCAGTGATGGCGGAGCCCCGTGCTGGTGCCTGCAGTGTGGGCTGGGAGGAGAAGGCTAACTTTAGCAAAGCGAAGGTCA-GGGCCTCTGCTGTCAGTATGTGATGGACAGCCATTCTACCTGTCCAGCTTCCTTCTTCAGGTATAAGTAATCACGTCATCCTCCTGGAGAAGATAA-CTGGCCCCTGCCCCTTCGTCCTTAGGAAAGGACTGACTGGTGCTTAAG------------CCAGTGTGTTAAGCCAGGGGCACTCAGGGTCCT---GTCAGCAAGGCTGTTTCTTTGCAGTGATCATTCTGGGTGCAGTC-TCTGGAAGACAAG--------GGGTGGTCAGAAGTGGATGGCACAGTCTCAGTGTACAACCTGGGAGATGGGCCCGAGGTCTGGGAT---------CTCTCTTGGGGATCATTGGAGACATAGGTGTG-ATTTGAAGA------GA-CAGGTCAGAC-----------------------------CATCCCGGGACCACAGAGAAAGCACATGGAGCTGGATCTCTCTTTGGGCCCCATGGCTACAGCAGAAGAGCAA-AG----------AATGAACACTGGAGATTGGAGGGA------TCTCTGCCATGGTGTCTCTCAGCC----------------AGACCTCAGAGG----AGAGTTAGCTGCAGGAACAGACAGCTCCCCACCAGAGCT..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................CTTCTCCATTCTGCCTAGGTGATTTTCTCAGAGTTC---GTGATCAATCACAAATAAGAGTCCAGCTCAAA--------AGCAGAAATGGGACTCTGTGAGTCTGGCAACCTGGGAACTGTAAAAGTGGGTCAGGCTTGGCTGCACCCAGTGACTGACAATATCTTCAGGCATATTTGTCTTCTTCTGTTGGCTTCCCTAAGCATCATTTCCAGACTGGCTGCTTCTGAGAGGGGGCAAGATGGCCACAGGATTTCCCATGCCAGCACCACCAGCACTGTTTGCTCCCCACG---------------------------------------------ACAACACTGT---TACCACAGCTCACCGTCATGAAAGGTATACTTCTCAGTAACCCAGAGAAGACAAACCCTGGC-----CTAGGTCACCTG--CAGCCTTGGGCCAGCCACTCT--CCAGGAAGTGGAAGAGGGTGATGGATATCTTAATAGGTTCTCC-AGTCATGTCCAACCCTAAGGAGGAAGAAGC--------------------CAGGAGGAC--AACCTGGTTACTTAATCCTGAAGA-------------AGGAA------GCTGGGCAGGCAAGATGGTTGTCCTTCACCTAGAGCTAGCAGAAGCCCCTGGCCTTTGCTTTGCTAAAGGTAGAATTGTCTTCCCAGCCCAAGATGCACA--GCTGTCTGGACCTTGGTGC------TGCAGGGTTGAA-----TTAAGGCTATGATCTGCTTAGCATTGCCACAG--TTCTGGGGTACACAGT----------GTGAACGCT-AGTAATCTTCATGCTTTTGCGTAAAGGGCATCTTAGGCCTTCTCATGGAGG----CTTCC--------TGTGTGGGAAATCC----CTCACACGGCACCTTCACATGTGATAAGCTGCTATGCTTG-----GGG--------AAAATAGCTTT--TCCCTAACATGCTT------GTGTCCCAGGATGCTGGTCC-TAG-----CACCCCTTGGCCT-----CTGC------TACCTCTCCTTGCCAGGGTCACAGTCTCTTTG--GGATGCTACTCTCAGCTCAGATTTCAACCGGATGTTGGGGGTCACCTCTAGTTTGTGAACA-AGTCTATTTTGTGGGTGGACATGC-AGGGACGGGACAGAGCTGCCAGGCTGAGCCCTGACAAAGGAGGCACC----CGGTATGGCTTCGGAGCTCTCTGCACGCGCACAGCAA----CTTCCTAGTCTTGCTGCTCTGTGGCAGCTGCA--------CCCCACAAGCCCTGG.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................AAGTTCCTTGGAACCCCATTTGAG-------AAGCTCTGGGTGATTGTGTCCATTGGTCTCTC...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
+
+a
+s NM_018289 0 5000 + 5000 tttgtccatgttggtcaggctggtctcgaactccccacctcaggtgatccacccacctcggcctcccagagtgctgggattacaggcgtgagccaccgtgcccagcctggaagttgttttttttGAGCCCAGCTCATTTGTGAAAATATCACGTGCTCCTTATTTTCATACCCAAATTTCTCATATCCATAAACTTCCACATTTTATTTCTGTCAGAAGTATACAATCTCTTTCTAGATTTCCACCTCCTGTCAAAGGTATTTCCTTCTTTTGTTTGACATAAATGAATCAATCTAGTCTGTTATTACTTA----AAGAAGTT--------TTTTAAAACCCA----------------------------------------------------------------GTGTGT----------------------------------------------------------------CAACCTATCTCTC--------------------------------------------------------------------------------------------------------------------------------------------------------------TGCTCTTTTAACTTCCTTTACTTCTTTGTACTGCATAACTCTCCTAAACTCAAACTTATATTATTAGTAGCTT----ATCTGCAAAGAGGGAGAAGGGGTATCATGGAGTTGGGAGCTCCAGAGGAGGCAAAGGTTTTGGGTCACGGGAGAGTGCCATTGAGAACGGACAATGGATGCTGAGAAGGACAATGAGAATCAGTGGAAATAAGGCAACAGGAATTTGTGCTGAACATGGTCACCCTGGTTTCATGATTCTCCACAACAATATGGGAAGCTCAGAGGCAAAGAAAAAAACAAAAAACAAAAAACAGAGAGACCAGGGCTGATGGCAGGCATGGtaaacct-ggtcatgcccctcctctgcttacagtccgtcagaggcttcccattccttcaagaagaccttccagatccttcaccgtccggcagtgcccatttctccagcttcctcgtgtatcactctcccttgggctttcaaacttccatcaccctggccgactcttttcatactcatggcttttgcacattctgttccttttatctatgtgttatcttcacttccaacccttggcttcagcttaaaaa--tcacttcctcagaaaagcctttcctggccgggcttgatggctcacacccgtaatcccagcagttagggaggtcaaggcaggcggatcacctgaggtcaggagttcaagaccagcatggccaacatggagaaaccccgtctctactaaaaatacaaaaattagccgggtgtggtggcgtatgcctgtaatcccagctactcaggaggctgaggcaggcgaatcacttgaatccaggaggcggaggttgcggtgagccgaggtcgcgccattgcactccagcctgggcaacaagagagaaactctgtctcaaaaaaaaaaaaaaaaaagaaaaaaagaaaaagaaaaagaaagaaaaggctttcctgacccctaatccaatcagaactctaaccatatagtctctcaacatgtgctttacttctcttttatcggatctatcacaattatatgtttatttgtacaaatgcccttttcatacatagaaatagctctgtagaggcaagcatggggtctattttgttcactgctgaattcccggcacctagcttgagtacctgtcgctaacaggaacaggtgttcaataaatgtattgactagcttaatgaaCGGCAGGAAGACTtaggtatacggccttaggcaaatttttattttgttgcttgttttaactagatcttca-tctccta-tctgttagatgggtatagtaataaaaactaacaacctc--agaattgctgtgtgaattaaaagagacaatgcatgtaacatgcttcgaatggtacctgtcatatagtaagggctaggaaattttccttcttattATCATCATTAAAAATAAGCTCTCTgagcaaacagaactctcatacattgctggtagcaatgtgaaagtgatacagccactttggaaaacagtttaccagtttcttagaaagttaaacacagacatatcatataacacagcaatcccacatctacatatttacccaagtgacatgaaaacttatgttcacacaaaaatctgtacatgaatgtttatagcagctttattcataattgccaaaaactggaaa-aacccatatgtccttcaa-ccaggga-ctagataaacaaatagtggtgtattcatctagtggaataccagcaagaagacatgaa-ccacagatacatgcaacaacatggatgaattttagatactttatgacagatgaaagaagcca-gactgaaaaggtgtcatactgtatgattccatc-----------tatatgatattctggaaatatcaaaactatagggacaggccaggcgtggtggctcatgcctgtaaccccagcactttgggagcctgatgctggcagatcacctgaggtcaggagtttgagaccatcctgggcaacatgatgaaaccctgtctctactaaaaatacaaaaattagccaggtgtggtggcacgtgcctgtcatcccagctactcaggaggctgaggcaggagaatcgcttgaacccgggaggcggaggttgcagtgagctgagattgcaccactgcactccagcctgggtgatagagcgagactccgtctcaaaaaaacaaaaacaaaaacaaaaactatagggacagaaatcaagtcagtggttgccagtggatggggtgaggcaagagggaatttggggga--atgaaattgtcctataactagattgtggtagtagttacatggttatatacatttatttattttgttttattattgttttcttattatttgtttgtttgttttgagatggagtcttgctctgtcacccaggctggagtgcaatggtgcaatcttggctcactgcaacctccacctctcgggttcaagtgattctgtgcaccaccacggccagctacgttttgtatttttagtagatagagacgggttttaccatgttggccaggctggtctcgaacccctgacctcaagtgatctggcagcctcggcctccccacgtgctgggattataggcgtgagtcaccatgcccagtcgtttatacacatttatcaaaactca----ctgcatcataaattgcaag-agtaaattttactgtatgtaaatatcttaatTTTTTAAAGGAAAAAAAAAGTCCAAAATAAGTAGCTATAATTACCTTTTCCTTTTAGAAAAAGGCATTCATATTTTCTGAGCATGAGAGAACTACAGG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CTCATTGTAGACATTTGGGAAAATGTACAAAGGATGTACATTCATCACGTGCAGAAAAGTAATATTTCCTTGCTGTGAGCTTT--ATTGGCACAG-------------------------------------------------------------------------------------------AATACACacaaagctgggaccatgctatgcattttttaaatgaaattatacattttgctttctgaaatatattgtgcatat-ttttcatgtcattaa----ttttcaaaatcatgattttatgaatcaataatattctatgtttacttaagagacagagtctccctttgtggcccaggctggagtgcagtggcgcaaaggaatcattcctcactgcagcctcaaactcctgggctcaagcgatcctcagtggtgggatgagatctagacccctgatattcaatctcagaaagtcaataattccctgcctcagcctcctgagtagctggggctgcaggcctgtgccactgcaccaggttcaatgtatattttattttgtctttTTTTTCTTTTTTTAATGTATATTTTTTAGCA---TTCCTTTGTTGTTAGAAATTACATTGtttatata--------------------------------------------------attattatttgtcaattaaaaa----------taaaagaaaaaatCATACTGCTCCTATTTTTTACTTTGAGCAACTCTGATAACA-------------ACAACAAAAAAAAGTTCAGTGGGGCCAACGCGTTTCCCTCCTGGAGTTTCCCG--GTTCACCTGGTGGAGCGGGTCCTCCTTGGGCTGCAATTCCCTCCTTCTTTTCAGTGTCTGCAGGAAAGGTGGTTTCACAGGGTGATCGGTGATCTGGCTTATTTGTTCCTTTGTGGGGGTAAGGGCTGGGCACCTCACTATTTCCTGTATTACCAGCTTGAATTTACATCGCGCTGTTAATGTTTAATCCTCTCCACATCGA----------CGCGGGGTag-------taatttgttccaagtcacacaacttatcaatggtggactattcaggacttagatcccggatattcaacctcag----tAAACGTCTCCCCAGCTCCAGGGCGCTGCGTCATCTGGCTTTAAAAAAACGTTTCTggccgggcgcagtagctcacgcctgtcatcccagcactttgggaggccgaggcgggcggatcacgaggtcaagagttcgagaccagcctggccaacatggtgaaacctcgtctccactaaaaatacaaaaattagctgggcgtggtggcgggcgcctgtaatcccagctactcgggaggctgagacaggagaatcgcttgaacccagcaggcggggttgcagtgagccgagatcgcgccgttgcactccagcctgggcaacaagtaagactccgtctcaaaaaaaaaaaaaaaaaaaaaaaagaCGTTTCTCCCCA------TAAGGAGGCTGCAGTCACCTCGAAAAGCTTCTGCGCAGAGCCCGGCGCACGGGGAATTTCTATCGAGTGG----GCACTGTCCCGTCCTACGAATGTCCTTGGCGACATCGCCACTCGCCAGCTGGGATCACGTCGCCCCCG-TGGCCGGGG-ACACCCGCTGCCACTGACCGAGGGGGCCCCGGGAGCTGAGAGGTCGCCG-CTGCGCTGCCCGCGGAGGCTGTGGGCTGCGGTTGGCGACGCCG-CAGGGCTG-ACTTTCATCCGGGCGTCCT-GCTGGA-GGCCAGACCCTACCCCAACCTCGGGGTCCTCAGTGCGGGGCGCGCCTTGGG-GAGGGAAGAGGCCCCGCCCATCCTAAGGACGCGCTGAGGCCACGCGGGGCCGCCGGGATACTGCGTTGCCCGGGAGACACCT---GACGTGACAGGAACGCC-GCCAGTCTCAGTCCGCCGTGATCCCACAGTTCCCCGGT-CCCGGAAGTGCCCTGGCCTGCCGGAAGTGCcggagcggctgacagagcggctgacggagcCGGGCTCACCAGGTCGCTGCCGCGAGGGAGTTGCTGTGCTGGGGCCTGGGT
+s mm3       0 3181 + 3181 ............................................................................................................................AAGCCCAGTTCATTTGTGAGACTATCACACGCTC---ATTTTCATAGACAAATTATTCTTCTCTATAAACTTCCCCCTTATATTTATCTTTAAAACATAACATCTGTTTTT----------CTCCAGCCAAAGTTATTTCCTG-TTTTGCTTATTATAAA-----CAGCTTGGCCTATTGTTGCTTATCCTAATGCATTAAAAAACTTTTAAAATTAca..........................................................................................................................................................................................................................................................................................................................................................................................ATTTGCAATTAAAGACAATATAATA-TATAATGGAGTTGGGAGCTTCAGAAGGAAGAATGGTTTCCAGTTGTAGCAGATA-----TAACCACTGAAAATGGATACACAAGAGGACAAC------CAGCAAGAACCAGACAGTGTGAATGCAGCCTGGACATAGTAGCCTTGGTTTCA-GACTCTCTGTAAGAGTGCAGGAAGCTCAGACACAGAGAAAAAAACC------------CAGAGGGCC--------ATGACAG---CAGTGAGCCTAGGTCA--CCACTCCTC-GCCTCCAGTCTTTCAAAGGCTATCCATTTCCCTAGGACAGCTCTC---AACACTAAGGAGCTGGCTCCACCCAGTTCTTTAGCCTCTACTCCAACC--TCTGCTCTTGACTTTCAAAACTCTTT--------TCAAGTCTTTTCTTCCCTATGGCTCT-GCACATATTGTTACTTGGGTTT--GTCATGCCTTCTCTTCCGAA---TTgcttcagcttaaaaa--tcactctcttggagaaagcttcccc...........................................................................................................................................................................................................................................................................................................................................gcattccgcaccc-------------------------acactctctcagtgtgcactgtcctgacctagcttgggacttatcacgattacatctttatt-gtatgaatgccatctccttacatgaagttagatgtga--tgacaggcgtttgatgtatctggttctttgctgaat--tcagcaccgagcataggtgcctgtcatcagcaggaacaggtgttgaatacatttattaattgagtgaaGGAGCAACAGAA-------GCCAGAGGGTCTAGGG--------TGCTTGGTT---TGTTTGAACCAGCTctccattctctca-tctgttaaattgggattatggtaa----ttaaactct---ggaactgctttgtggattaa--gagtcaaggc-tgtagtatgcttggtatg-tacatgacatatagcaagcattaggaaacattagct---attatCACCATTAAAAGGAAACTCT--gagtatctgaaactcttagacactgtcagtaggaatgtgaaaataacac-----cctt-------------------ccttataattgtaaacacagacagaccataccacttagcaatcttactcctaggcatacacttaagcgaatgaaaaactta------cagaaaactttagacaggaacgtttatagcagctttattcacaacgtctcagaatttaaaacaactcggatgttttccaaatcaccta-gtagatagacaaat---------------caacgga------gcaagaa---acaaa-ctgatggcacaaataataacacagatgaatcacagatccacgatgctaagtgagagaagacaagagtcaaaaggtgacatgctgtaggactccattcctggagaatttatatgacattctggaaa.........................................................................................................................................................................................................................................................................................................................................................GCCAGTGATTTGCAgacaatagagtg-ggtgtgaagga-----ggggatgatggaagtgtctcc--gccggactgtgct-gtgctgacacgatgaca....................................................................................................................................................................................................................................................................................................................acatgcatttatcaaagctcatgtacagcactggaaaccggaaacggctaatctcactgcatgcaaat-tctatctaatttaaaaTAGAAAAATAAGGCACGATAA--AACTGCACATTCCTGTTCCTTCAAGAAgACAGCACCTATGGTTTCTGATAATAAAAGTACCATATGGAGAAAAAAAAAAAAAAAagctgggcagtggtggcgcatgcctttagtcccagcacttgggaggcagaggcaggcagatttctgagttcgaggccagcctggtctacagagtgagttccaggacagccaaggctacacagagaaaccctgtcttgaaaaacaaacaaacaaacaaacaaacaCAAAAAAAAGTACCATATGTTGATTGTAGATGTGTGGGAAAATAAAGAAAAAATATA--------------------------TTTCCCTGTCATTTGCTTTTGGTTGAGACAG-------------------------------------------------------------------------------------------..............................................TAAATATTCTGCTTTCTGAAATACACTGTGCATATATCGCTATGTCATTAAATAATTGTCAAAATAATACTTTTATGAGTCAATA...........................................................................................................................................................................................................................................................----------------------------------TGTTGCTGTTGCTAAAACTTATACTACTCACACATTTTTGGGGGGCAAACTAAAACAACAATAAAATTCCATTCCTAAAACTATAATGTTTTTTAAAAACTAAAAA----------AAAAAAAGCAAATTATATTATTCCTGA-----------AGCAATTTTTATAAAACTATATGTACGGAAAAAAAAAAAAAAGTTCAGCAGGGCCAGATACTCCTCTGTCGAGGTTTTCACAATGTTGAACTGGTAAGGCAACCCTTCCCTTGGCTGCAATTCCATCATT-GTTCCAGTGCCTGCTGGAAAAGCGGTTCCACAGGGCTATC-------TGGCTCCTCT----TTTTGTAATGTACGCGGCTAAACACCCTAACATTTCCTGTACTACCTGCCTAAGTTTGCATAATACT-TCCATATTTAATG-TCACCCCATCTAACGTAGTGTAAGCAGGGCAGATGGGAATACTTTGTTCAAGATCATA-AGGTTATCAAGGGTGTA--AATAAGGATGTACAGCCCGGACATTCGGTCTCAAAAACCAAACACTTCCCA---------GCGTTGGGTCGCT----------------TTTCT................................................................................................................................................................................................................................................................................................................CCTTACTCCCCACCCTAGCGAGGAGGCTTCAGACAA----------TTCTGCCCAG--------------TAGATCCTTATCTAAGGAGCAGAAGCTCTCCCAAACCTCAAATGTCTTTGGCAACACTGCCACTTACCAGCTGGGGTCATTTCACTCCCGATGGCAGGGGGACACAGACTGCCCTAGACCAAGGGGATCCAGCGAGCGGGTTGCACGTTGGCTTCATGGCCAGCGGGGCTTATTGCCCATCCATTGGGACACTGCCGTGGCTA-CCGCTCTCTGGGGCGCCCG--------GGC--GACACTAC------------GGCGCTGGTGGGGGATGCGCCCTGGGCGAAGAAGAGAGGGCTTTCAATCTTAAGAACCCGCCGCGTGCGAACAGGGAACTTGAGGTTAAGGGTTGTCTGGAGATTTTCCAGGGACTTGACAGCCATGCCTGCGGAAGTCAGTCCTCCGCTATCCCAGAGTCCCCCGGTTCCCGGAAGTGCCCAGTTCT-CCGGAAGTGCCAGGCTAGCCGAGTAAGTAGC-----------GGCTCCCTCGAGCCCCGTACCTGGAGGCTTGCTCTTCTGGGGTCCTGGT
+s rn3       0 3320 + 3320 ............................................................................................................................AAGCCCAGTTCATTTGTGAGACTATCACATGCTC---ATTTTCATATACAAATTATTCTTATCTGTAAATATCCACATTATATTTATCTTAAAAACATACCATCTGTTTTT----------CTCCAGCCAAAGGTATTTCCTG-TTTTGCTTATTATGAA-----CAGCTTGGCCTATTGTTACTTATCCTAATGCATT--------TCTAAAATTACATTTATTTATTTACTTATATTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTTGCTCCCTGAACTGCATATCAAGACCCTATTCAGTGGACTCGCAGGCAGTTAGGGAACTGTGTGCAGCCTGTCTCTGAAGAAATATAAGAGTGAGGTGGAGGCAAATTATGATATTTGGTTCTCTCCTTTTACCATGTGGGTTCAGGGACCATACTCAGGTCACTAGGCTTGGTGGCAAGTGTCTTTCCCACTGAGCCATTGGCCACAACAGTTTTTAAATCTAGGGTCTACCTGTGTTCTTTTAAAACAGCTTACTTTTT---ATTTTACAAACCCCCTACATT-GAACTTGCATT--TGATA--TT----ATTGTTAAAGACAATCTAATG-------------GAGAACTTCAGAGGGAAGAGTGGTTTCGAGTTGTAGCAGATAG----TGACCGCTAAAAATGGATACGCAAAAGGACAAC------CAGCAAGAACTAGAC--TGTGAATGTGGGTTGGGCATGGTGGCCTTCATTCCA-GGCTCTCTGTAACAGTGCAGGAAGCTCAGAAACAGAGAAAGAAAAA------------CAGAGGACCCTGAGCTGATGACAG---CAGTGAACCTGGGTCA--CCACTCCTC-ACCT--AGTCCTTCAGAAGCTATCCATTTCCCTATGACAACTC-----AACACTAAGCATCCGGCTCCACCCAATTCTTTAGCCTCTGCTTGTACC--TCTACTCTTGACTTTCAAAACTCCAT-GTCCTAGCTGTGTCTTTTCTTCCTCATGTCTCT-GCTCACATTGCTACTTTGGTTT--GTCATGCCTTCTCTTCCAAA---TTGCTTCAGCTTAAAAAAATCATTTCCTTTGAGAAAGCTTCCCC........................................................................................................................................................................................................................................................................................................................AAAG---------------GCATTCCACGCCC-------------------------GCACTCTCTCAGTGTGCACTGTCCTTACCTGGCTTGGGACCTATCACAATCACATCTCTATT-GTGTGACTACTATCTCCTTATGTGAAGCTAGCTTTGAGATGGCAGGCATATGATGTATCCAGCTCTTTGCTGAAT--CCAGCAGCCAGCATAGGTGCCTGTCATCAN---------GNGTTCAGTATATTAATTANNTGGGTGAATGAGCGACANNN-------GCCATANGATCTNGNGC-------TGTTTTGTT---TGTCTGAACCAGCTCTCCACTCNCNNNNTNTNNTAAGTTGNGANNATGGTNA----NNNNAANCTNNNGNNNCTACTTTGTGGATTAA--GAGTCAAGGC-TGTAGTATGCCTGGTATG-TGCATGACATATAGTAAGCATTAGGAAACATCCGCT---ATTATGGCCATTAAAAGGAAACTCTCTGAGTATCTGAAACTCTTAGACATTGTCAGTAGGAATGTGAAAATAATAC-----CCT--------------------CCTTATAATTGTAAACACAGACAGACCTTACCACTTAGCAGTCTTACTTCTAGGCATACACTTCAGCAAACAAAAAACTTA------CACAAAACTGTAGACAGGAATGTTTATAGCAGCTTTATTCACAACTTCTCAGAATTTAAAACAATG-------TTTCCAAATCACCTAAGTAGATAAACAAAT---------------CAAAGGA------GCAAGAA---ACAAAACTGATGACACAAATAATAATGTGGATGGATCACAGATCCATTACGCTAAGTGAGAGAAGACA-GAGTCGAAAGGTGACATGCTATAGGTCTCCATTCATGTAGAACTTATATGACATTCCGGAAA.........................................................................................................................................................................................................................................................................................................................................................ACCAGAGATTTCCAGACAATAGAGTG-GGTGTGAAGAAAAGTAGGGGATGATGGAAATATCTCA--ATTGGACTGTGGT-GTGCTGAGATGACAACATGCATTTAAT.................................................................................................................................................................................................................................................................................................................................................................................................................................................GACGGCACCTATAGTTTCTGATAATAAAACTACCGTATG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTCATTATAGACATGTGGGAAAATAAAGAAAAAATAT----------------------------TTCCTTGTCATTTGCTTTTGGTTGAGAAAGTAAATTCCATATCCACCTACCCTCCATCTTCCAAGGGTTGGGATTAAATATAGACTTATACCATCACACCCAGCCAAATTTGCTCTCAAAAAATAAATA--------------------------------------------TTCTGCTTTCTGAAATACACTGTGTATACATTCCCATGTTATTAAATAATTTTCAAAACAATG---TTATGAGTCAATG...........................................................................................................................................................................................................................................................CTCTGCACTTCTGCAATTATATTTTTTAAGCAGTTTCTTTGGTTGCTAAAAATTATACTACTTACACATTTTTTTGG------------------CAAA---------CCAAAACAAAATTCCATTCCTAATACTGCAATTTTTTTTAATGAAAAGAGCAAATTATATTATTCCTGC-----------AGAAATTTCGATAAAACTATATGTCAG--AAAAAAAAAAAAAGTTCAGCAGGGCCAGATACTCCTTGGTCGAGGGTTTCACAATGTCAAACTGGTAAGGCAACCCTTCCCTTGGCTGCAATTCCCTCATT-GTTCCAGTACCTGCTGGAAAAGCCGTTCCACAGGGCGATC-------TGGCTCCTCT----TTTTGTGATGTAGGAGGCTAAACACCCTAACATTTCCTGTACTACCTGCCTAAGTTTGCATAATACT-TCCATATTTAATGCTCACCCCATCTAACGTAGTGTACGCAGGGCAGATGGGAGTACTTTGTTCAAGGTCGTA-AGGTTATCAAGGGTGTA--AACAAGGATGTACAGCTTGGACGTCCAGTCTCAAAAATCAAACACTTTCCT---------GCGTTGAGTCATTTTCCC................................................................................................................................................................................................................................................................................................................................CCCTACTCCCCACCCTAGTGAGGAGGCTTCAGACAA----------TTTTGCGCAG--------------GGGATCCTTATCTAAG------ACACTCCCCCAAACTTCATATGCCCTTGGCAACACCGCCACTTACCAGC--GAGTCATTTCACCCGCTATGGTAGGGGGTCACAGACTGTTCCAGACCAAGGGGATCCAGCGAGTGGGTTGTATGTTGGCTTCGTTGCCAGAGGGGCTTACTGCCCATCCATTGAGACACTGACGGGGCCAACCGCTCTCTGGGGCGCCCCCGCTCTCCAGC--GACACTAC------------GGCGCCGGTGGGAGACGCGCCCTGGGCGAGGTGGGGAGGGTTCCCAATCTTAAGAACCCGCCGCGTGCGGCCAGGGAACCTGTGGTTAAGGGTTATCTGGAGATTTTCCAGGGATTTGACTGCCATG---GCTGAAGTCAGTCCTCCGCTATCCCAGAGTCCCCCGGTTCCCGGAAGTGCCCGACTCT-CCGGAAGTGCCGGGCTAGACGAGCGAGTAGC-----------GGCTTTCTCGAGCCCTGTACCTGGAGGATTGCAATTCTGGGGTCCTGGT

Added: trunk/packages/bioperl/branches/upstream/current/t/data/humts1.pal
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/humts1.pal	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/humts1.pal	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,107 @@
+Palindromes of:  HUMTS1 
+Sequence length is: 18596 
+Start at position: 1
+End at position: 18596
+Minimum length of Palindromes is: 10 
+Maximum length of Palindromes is: 100 
+Maximum gap between elements is: 100 
+Number of mismatches allowed in Palindrome: 0
+
+
+
+Palindromes:
+126   caaaaaaaaaaaaaaaa   142
+      |||||||||||||||||
+217   gtttttttttttttttt   201
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+215   tttttttttttttttt   200
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+214   tttttttttttttttt   199
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+213   tttttttttttttttt   198
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+212   tttttttttttttttt   197
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+211   tttttttttttttttt   196
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+210   tttttttttttttttt   195
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+209   tttttttttttttttt   194
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+208   tttttttttttttttt   193
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+207   tttttttttttttttt   192
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+206   tttttttttttttttt   191
+
+127   aaaaaaaaaaaaaaaa   142
+      ||||||||||||||||
+205   tttttttttttttttt   190
+
+127   aaaaaaaaaaaaaaaagaccgccagggct   155
+      |||||||||||||||||||||||||||||
+204   ttttttttttttttttctggcggtcccga   176
+
+2278  catttttaac  2287
+      ||||||||||
+2351  gtaaaaattg  2342
+
+3960  agcccagacc  3969
+      ||||||||||
+4056  tcgggtctgg  4047
+
+4367  gattgagaaa  4376
+      ||||||||||
+4449  ctaactcttt  4440
+
+4368  attgagaaat  4377
+      ||||||||||
+4399  taactcttta  4390
+
+4387  gaaatttctc  4396
+      ||||||||||
+4411  ctttaaagag  4402
+
+4408  tttctttgatt  4418
+      |||||||||||
+4456  aaagaaactaa  4446
+
+4428  tttctttgatt  4438
+      |||||||||||
+4456  aaagaaactaa  4446
+
+6809  cataggcatg  6818
+      ||||||||||
+6839  gtatccgtac  6830
+
+11270 atgtttccaa 11279
+      ||||||||||
+11339 tacaaaggtt 11330
+
+14171 ttacctgaat 14180
+      ||||||||||
+14208 aatggactta 14199
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hybrid1.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hybrid1.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hybrid1.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+##gff-version 3
+chr17	UCSC	mRNA	62467934	62469545	.	-	.	ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
+chr17	UCSC	CDS	62468039	62468236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62468490	62468654	.	-	2	Parent=A00469
+chr17	UCSC	CDS	62468747	62468866	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469076	62469236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469497	62469506	.	-	0	Parent=A00469
+##FASTA
+>A00469
+GATTACA
+GATTACA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/hybrid2.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/hybrid2.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/hybrid2.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+##gff-version 3
+chr17	UCSC	mRNA	62467934	62469545	.	-	.	ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
+chr17	UCSC	CDS	62468039	62468236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62468490	62468654	.	-	2	Parent=A00469
+chr17	UCSC	CDS	62468747	62468866	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469076	62469236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469497	62469506	.	-	0	Parent=A00469
+>A00469
+GATTACA
+GATTACA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/insulin.water
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/insulin.water	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/insulin.water	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+########################################
+# Program:  water
+# Rundate:  Tue Feb 12 21:26:34 2002
+# Report_file: hs_insulin.water
+########################################
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: hs_insulin
+# 2: 
+# Matrix: EDNAFULL
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 194
+# Identity:     160/194 (82.5%)
+# Similarity:   160/194 (82.5%)
+# Gaps:          21/194 (10.8%)
+# Score: 690.0
+# 
+#
+#=======================================
+
+hs_insulin         1 gtctgttccaagggcctttgcgtcaggtgggctcaggg------------     38
+                     ||||||||||||||||||.|||||||||||||||||||            
+                   1 gtctgttccaagggccttcgcgtcaggtgggctcagggctgcccacttgg     50
+
+hs_insulin        39 -----ccagggtggctggaccccaggccccagctctgcagcagggaggac     83
+                          ||||||||||||||||||||||||||||||||||.||||||||||
+                  51 gggttccagggtggctggaccccaggccccagctctgcaacagggaggac    100
+
+hs_insulin        84 gtggctgggctcgtgaagcatgtgggggtgagcccaggggccccaaggca    133
+                     .|||||||||||.||||||.|.||.||||||.|||||||| ||||.||||
+                 101 atggctgggctcttgaagcgtttgagggtgaacccagggg-cccagggca    149
+
+hs_insulin       134 gggcacctggccttcagcctgcctcagccctgcctgtctcccag    177
+                      ||||||||||| |||||..||||||| .|||||||||||.|||
+                 150 -ggcacctggcc-tcagctggcctcag-gctgcctgtctctcag    190
+
+
+#---------------------------------------
+#---------------------------------------

Added: trunk/packages/bioperl/branches/upstream/current/t/data/interpro_ebi.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/interpro_ebi.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/interpro_ebi.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<E	BIInterProScanResults xmlns="http://www.ebi.ac.uk/schema"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:noNamespaceSchemaLocation="http://www.ebi.ac.uk/schema/InterProScanResult.xsd">
+  <Header>
+	 <program name="InterProScan" version="4.0"
+		citation="PMID:11590104"/>
+	 <parameters>
+		<sequences total="1"/>
+		<databases total="10">
+		  <database number="1" name="PRODOM" type="sequences"/>
+		  <database number="2" name="PRINTS" type="matrix"/>
+		  <database number="3" name="PIR" type="model"/>
+		  <database number="4" name="PFAM" type="model"/>
+		  <database number="5" name="SMART" type="model"/>
+		  <database number="6" name="TIGRFAMs" type="model"/>
+		  <database number="7" name="PROSITE" type="strings"/>
+		  <database number="8" name="SUPERFAMILY" type="model"/>
+		  <database number="9" name="SIGNALP" type="model"/>
+		  <database number="10" name="TMHMM" type="model"/>
+		</databases>
+	 </parameters>
+  </Header>
+
+  <interpro_matches>
+
+	 <protein id="Sequence_1" length="85" crc64="E5E52028293ABD62">
+		
+		<interpro id="IPR009366" name="Protein of unknown function DUF1021" type="Family">
+		  <match id="PF06257.1" name="Protein of unknown function (DUF1021)"
+			 dbname="PFAM">
+			 <location start="5" end="78" score="3.7e-47" status="T"
+				evidence="HMMPfam"/>
+		  </match>
+		</interpro>
+
+		<interpro id="noIPR" name="unintegrated" type="unintegrated">
+		  <match id="PD061839" name="Q92F78_LISMO_Q92F78;" dbname="PRODOM">
+			 <location start="4" end="76" score="2e-36" status="T"
+				evidence="BlastProDom"/>
+		  </match>
+		</interpro>
+
+   </protein>
+	 
+  </interpro_matches>
+</EBIInterProScanResults>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/interpro_short.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/interpro_short.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/interpro_short.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,647 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- edited with XML Spy v4.4 U (http://www.xmlspy.com) by LYNN WHITE (EMBL OUTSTATION THE EBI) -->
+<!DOCTYPE interprodb SYSTEM "interpro.dtd">
+<interprodb>
+  <release>
+    <dbinfo dbname="INTERPRO" version="5.1" entry_count="5630" file_date="12-JUL-2002 00:00:00"/>
+    <dbinfo dbname="SWISS" version="40.22" entry_count="110823" file_date="24-JUN-2002 00:00:00"/>
+    <dbinfo dbname="TREMBL" version="21.2" entry_count="671586" file_date="05-JUL-2002 00:00:00"/>
+    <dbinfo dbname="PRINTS" version="33.0" entry_count="1650" file_date="24-JAN-2002 00:00:00"/>
+    <dbinfo dbname="PREFILE" version="N/A" entry_count="252" file_date="18-JUL-2001 00:00:00"/>
+    <dbinfo dbname="PROSITE" version="17.5" entry_count="1565" file_date="21-JUN-2002 00:00:00"/>
+    <dbinfo dbname="PFAM" version="7.3" entry_count="3865" file_date="17-MAY-2002 00:00:00"/>
+    <dbinfo dbname="PRODOM" version="2001.3" entry_count="1346" file_date="28-JAN-2002 00:00:00"/>
+    <dbinfo dbname="SMART" version="3.1" entry_count="509" file_date="16-NOV-2000 00:00:00"/>
+    <dbinfo dbname="TIGRFAMs" version="1.2" entry_count="814" file_date="03-AUG-2001 00:00:00"/>
+  </release>
+  <interpro id="IPR000001" type="Domain" short_name="Kringle" protein_count="129">
+    <name>Kringle</name>
+    <abstract>
+Kringles are autonomous structural domains, found throughout the blood 
+               clotting and fibrinolytic proteins.
+Kringle domains are believed to play a role in binding mediators (e.g., membranes,
+other proteins or phospholipids), and in the regulation of proteolytic activity
+<cite idref="PUB00002414"/>, <cite idref="PUB00001541"/>, <cite idref="PUB00003257"/>. 
+Kringle domains <cite idref="PUB00003400"/>, <cite idref="PUB00000803"/>, <cite idref="PUB00001620"/> are characterised by a triple loop, 3-disulphide bridge structure, whose  conformation is defined by a number of hydrogen bonds and small pieces of  anti-parallel beta-sheet. They are found in a varying number  of  copies,  in some serine proteases and
+plasma proteins.</abstract>
+    <example_list>
+      <example><db_xref dbkey="P00748" db="SWISS"/>Blood coagulation factor XII (Hageman factor) (1 copy)</example>
+      <example><db_xref dbkey="P00749" db="SWISS"/>Urokinase-type plasminogen activator (1 copy)</example>
+      <example><db_xref dbkey="Q08048" db="SWISS"/>Hepatocyte growth factor (HGF) (4 copies)</example>
+      <example><db_xref dbkey="Q04756" db="SWISS"/>Hepatocyte growth factor activator <cite idref="PUB00003400"/> (1 copy) <cite idref="PUB00002776"/></example>
+      <example>
+				<db_xref dbkey="P06867" db="SWISS"/>
+Plasminogen (5 copies)
+      </example>
+      <example>
+				<db_xref dbkey="P26927" db="SWISS"/>
+Hepatocyte growth factor like protein (4 copies) <cite idref="PUB00000355"/>
+			</example>
+      <example>
+				<db_xref dbkey="P00735" db="SWISS"/>
+Thrombin (2 copies)
+      </example>
+      <example>
+				<db_xref dbkey="P15638" db="SWISS"/>
+Tissue plasminogen activator (TPA) (2 copies)
+      </example>
+      <example>
+				<db_xref dbkey="P08519" db="SWISS"/>
+Apolipoprotein A (38 copies)
+      </example>
+    </example_list>
+    <pub_list>
+      <publication id="PUB00002414">
+	<author_list>Fujikawa K., McMullen B.A.</author_list>
+	<title>Primary structure of the heavy chain of human factor XIIa.</title>
+	<db_xref db="MEDLINE" dbkey="85182674"/>
+	<journal>J. Biol. Chem.</journal>
+	<location firstpage="5328" lastpage="5341" volume="260"/>
+	<year>1985</year>
+      </publication>
+      <publication id="PUB00001541">
+	<author_list>Patthy L., Trexler M., Vali V., Banyai L., Varadi A.</author_list>
+	<title>Kringles: Modules specialized for protein binding.</title>
+	<db_xref db="MEDLINE" dbkey="84208845"/>
+	<journal>FEBS Lett.</journal>
+	<location firstpage="131" lastpage="136" volume="171"/>
+	<year>1984</year>
+      </publication>
+      <publication id="PUB00003257">
+	<author_list>Atkinson R.A., Williams R.J.P.</author_list>
+	<title>Solution structure of the kringle 4 domain from human plasminogen by 1H nuclear magnetic resonance spectroscopy and distance geometry.</title>
+	<db_xref db="MEDLINE" dbkey="90219023"/>
+	<journal>J. Mol. Biol.</journal>
+	<location firstpage="541" lastpage="552" volume="212"/>
+	<year>1990</year>
+      </publication>
+      <publication id="PUB00003400">
+	<author_list>Castellino F.J., Beals J.M.</author_list>
+	<title>The genetic relationships between the kringle domains of human plasminogen, prothrombin, tissue plasminogen activator, urokinase, and coagulation factor XII.</title>
+	<db_xref db="MEDLINE" dbkey="88230478"/>
+	<journal>J. Mol. Evol.</journal>
+	<location firstpage="358" lastpage="369" volume="26"/>
+	<year>1987</year>
+      </publication>
+      <publication id="PUB00000803">
+	<author_list>Patthy L.</author_list>
+	<title>Evolution of the proteases of blood coagulation and fibrinolysis by assembly from modules.</title>
+	<db_xref db="MEDLINE" dbkey="85228216"/>
+	<journal>Cell</journal>
+	<location firstpage="657" lastpage="663" volume="41"/>
+	<year>1985</year>
+      </publication>
+      <publication id="PUB00001620">
+	<author_list>Takahashi K., Ikeo K., Gojobori T.</author_list>
+	<title>Evolutionary origin of numerous kringles in human and simian apolipoprotein(a).</title>
+	<db_xref db="MEDLINE" dbkey="91348198"/>
+	<journal>FEBS Lett.</journal>
+	<location firstpage="146" lastpage="148" volume="287"/>
+	<year>1991</year>
+      </publication>
+      <publication id="PUB00000355">
+	<author_list>Friezner Degen S.J., Stuart L.A., Han S., Jamison C.S.</author_list>
+	<title>Characterization of the mouse cDNA and gene coding for a hepatocyte growth factor-like protein: expression during development.</title>
+	<db_xref db="MEDLINE" dbkey="92002017"/>
+	<journal>Biochemistry</journal>
+	<location firstpage="9781" lastpage="9791" volume="30"/>
+	<year>1991</year>
+      </publication>
+      <publication id="PUB00002776">
+	<author_list>Miyazawa K., Shimomura T., Kitamura A., Kondo J., Morimoto Y., Kitamura N.</author_list>
+	<title>Molecular cloning and sequence analysis of the cDNA for a human serine protease reponsible for activation of hepatocyte growth factor. Structural similarity of the protease precursor to blood coagulation factor XII.</title>
+	<db_xref db="MEDLINE" dbkey="93252878"/>
+	<journal>J. Biol. Chem.</journal>
+	<location firstpage="10024" lastpage="10028" volume="268"/>
+	<year>1993</year>
+      </publication>
+    </pub_list>
+    <member_list>
+      <db_xref protein_count="91" db="PRINTS" dbkey="PR00018" name="KRINGLE"/>
+      <db_xref protein_count="126" db="PROFILE" dbkey="PS50070" name="KRINGLE_2"/>
+      <db_xref protein_count="161" db="PROSITE" dbkey="PS00021" name="KRINGLE_1"/>
+      <db_xref protein_count="128" db="PFAM" dbkey="PF00051" name="kringle"/>
+      <db_xref protein_count="126" db="PRODOM" dbkey="PD000395" name="Kringle"/>
+      <db_xref protein_count="115" db="SMART" dbkey="SM00130" name="KR"/>
+    </member_list>
+    <external_doc_list>
+      <db_xref db="BLOCKS" dbkey="IPB000001"/>
+      <db_xref db="PDOC" dbkey="PDOC00020"/>
+    </external_doc_list>
+  </interpro>
+  <interpro id="IPR000002" type="Domain" short_name="Fizzy" protein_count="57">
+    <name>Cdc20/Fizzy</name>
+    <abstract>
+&lt;p&gt;The Cdc20/Fizzy region is almost always associated with the G-protein beta WD-40 repeat (<db_xref db="INTERPRO" dbkey="IPR001680"/>). Ubiquitin-mediated proteolysis due to the anaphase-promoting complex/cyclosome
+ (APC/C) is essential for separation of sister chromatids, requiring degradation of the anaphase inhibitor Pds1, and for exit from mitosis, requiring inactivation of cyclin B  Cdk1 kinases <cite idref="PUB00006167"/>. In <taxon tax_id="4890">yeast</taxon> Cdc20 is required for two microtubule-dependent processes, nuclear movements prior to anaphase and chromosome separation. APC(Cdc20) allows activation of Cdc14 and promotes exit from mitosis by mediating proteolysis of Pds1  and the S phase cyclin Clb5 in the yeast <taxon tax_id="4932">Saccharomyces cerevisiae</taxon>. &lt;/p&gt;
+&lt;p&gt;This domain is also found in a number of, as yet, uncharacterised proteins. These include a <taxon tax_id="40674">mammalian</taxon> protein, p55CDC, that is present in dividing cells and is
+      associated with protein kinase activity.&lt;/p&gt;
+</abstract>
+    <example_list>
+      <example>
+				<db_xref dbkey="P26309" db="SWISS"/>Cell division control protein (Cdc20) from S. cerevisiae
+      </example>
+      <example>
+				<db_xref dbkey="Q09786" db="SWISS"/>A hypothetical protein from S. pombe
+      </example>
+    </example_list>
+    <pub_list>
+      <publication id="PUB00006167">
+	<author_list>Shirayama M., Toth A., Galova M., Nasmyth K.</author_list>
+	<title>APC(Cdc20) promotes exit from mitosis by destroying the anaphase inhibitor Pds1 and cyclin Clb5.</title>
+	<db_xref db="MEDLINE" dbkey="20110935"/>
+	<journal>Nature</journal>
+	<location firstpage="203" lastpage="207" volume="402" issue="6758"/>
+	<year>1999</year>
+      </publication>
+    </pub_list>
+    <member_list>
+      <db_xref protein_count="59" db="PREFILE" dbkey="PS50218" name="FIZZY_DOMAIN"/>
+      <db_xref protein_count="55" db="PRODOM" dbkey="PD004563" name="Fizzy"/>
+    </member_list>
+    <external_doc_list>
+      <db_xref db="QDOC" dbkey="QDOC50218"/>
+    </external_doc_list>
+  </interpro>
+  <interpro id="IPR000003" type="Family" short_name="RtnoidX_receptor" protein_count="75">
+    <name>Retinoid X receptor</name>
+    <abstract>
+Steroid or nuclear hormone receptors (4A nuclear receptor, NRs) constitute an important superfamily of transcription regulators that are involved in widely diverse physiological functions, including control of embryonic development, cell differentiation and homeostasis. Members of the superfamily include the steroid hormone receptors and receptors for thyroid hormone, retinoids, 1,25-dihydroxy-vitamin D3 and a variety of other ligands. The proteins function as dimeric molecules in nuclei to regulate the transcription of target genes in a ligand-responsive manner <cite idref="PUB00004464"/>, <cite idref="PUB00006168"/>. In addition to C-terminal ligand-binding domains, these nuclear receptors contain a highly-conserved, N-terminal zinc-finger that mediates specific binding to target DNA sequences, termed ligand-responsive elements. In the absence of ligand, steroid hormone receptors are thought to be weakly associated with nuclear components; hormone binding greatly increases receptor affinity.
+
+NRs are extremely important in medical research, a large number of them being implicated in diseases such as cancer, diabetes, hormone resistance syndromes, etc. While several NRs act as ligand-inducible transcription factors, many do not yet have a defined ligand and are accordingly termed "orphan" receptors. During the last decade, more than 300 NRs have been described, many of which are orphans, which cannot easily be named due to current nomenclature confusions in the literature. However, a new system has recently been introduced in an attempt to rationalise the increasingly complex set of names used to describe superfamily members.
+
+
+&lt;p&gt;The retinoic acid (retinoid X) receptor consists of 3 functional and 
+               structural domains: an N-terminal (modulatory) domain; a DNA binding domain
+               that mediates specific binding to target DNA sequences (ligand-responsive
+               elements); and a hormone binding domain. The N-terminal domain differs 
+               between retinoic acid isoforms; the small highly-conserved DNA-binding
+               domain (~65 residues) occupies the central portion of the protein; and 
+               the ligand binding domain lies at the receptor C-terminus.&lt;/p&gt;
+&lt;p&gt;Synonym(s): 2B nuclear receptor&lt;/p&gt;
+</abstract>
+    <class_list>
+      <classification id="GO:0003677" class_type="GO">
+	<category>Molecular Function</category>
+	<description>DNA binding</description>
+      </classification>
+      <classification id="GO:0004879" class_type="GO">
+	<category>Molecular Function</category>
+	<description>ligand-dependent nuclear receptor</description>
+      </classification>
+      <classification id="GO:0005496" class_type="GO">
+	<category>Molecular Function</category>
+	<description>steroid binding</description>
+      </classification>
+      <classification id="GO:0005634" class_type="GO">
+	<category>Cellular Component</category>
+	<description>nucleus</description>
+      </classification>
+      <classification id="GO:0006355" class_type="GO">
+	<category>Biological Process</category>
+	<description>regulation of transcription</description>
+      </classification>
+    </class_list>
+    <example_list>
+      <example>
+				<db_xref dbkey="Q06726" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="P81559" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="Q64104" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="Q91766" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="P28701" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="O75454" db="SWISS"/>
+			</example>
+    </example_list>
+    <pub_list>
+      <publication id="PUB00004464">
+	<author_list>Nishihara T., Nishikawa J.-I., Kitaura M., Imagawa M.</author_list>
+	<title>Vitamin D receptor contains multiple dimerisation interfaces that are functionally different.</title>
+	<db_xref db="MEDLINE" dbkey="95206940"/>
+	<journal>Nucleic Acids Res.</journal>
+	<location firstpage="606" lastpage="611" volume="23"/>
+	<year>1995</year>
+      </publication>
+      <publication id="PUB00006168">
+	<author_list>Schmitt J., De Vos P., Verhoeven G., Stunnenberg H.G.</author_list>
+	<title>Human androgen receptor expressed in HeLa cells activates transcription in vitro.</title>
+	<db_xref db="MEDLINE" dbkey="94218237"/>
+	<journal>Nucleic Acids Res.</journal>
+	<location firstpage="1161" lastpage="1166" volume="22" issue="7"/>
+	<year>1994</year>
+      </publication>
+    </pub_list>
+    <parent_list>
+      <rel_ref ipr_ref="IPR001723"/>
+    </parent_list>
+    <contains>
+      <rel_ref ipr_ref="IPR000536"/>
+    </contains>
+    <member_list>
+      <db_xref protein_count="75" db="PRINTS" dbkey="PR00545" name="RETINOIDXR"/>
+    </member_list>
+  </interpro>
+  <interpro id="IPR000004" type="Domain" short_name="SapB" protein_count="135">
+    <name>Saposin type B</name>
+    <abstract>
+Saposins are small lysosomal proteins that serve as activators of various
+lysosomal lipid-degrading enzymes <cite idref="PUB00005747"/>. They probably act by isolating the
+lipid substrate from the membrane surroundings, thus making it more 
+accessible to the soluble degradative enzymes. All <taxon tax_id="40674">mammalian</taxon> saposins
+are synthesized as a single precursor molecule (prosaposin) which contains
+four Saposin-B domains, yielding the active saposins after proteolytic
+cleavage, and two Saposin-A domains that are removed in the activation
+reaction. 
+The Saposin-B domains also occur in other 
+proteins, many of them active in the lysis of membranes <cite idref="PUB00005721"/>, <cite idref="PUB00005765"/>. &lt;p&gt;The 3D-structure of NK-lysin has recently been determined <cite idref="PUB00005798"/> and found to
+be very different from the one predicted in <cite idref="PUB00005747"/>.
+A group of <taxon tax_id="3193">plant</taxon> aspartic proteases related to cyprosin. These proteins
+have a peculiar SAP-B domain where the two halves are 'swapped' <cite idref="PUB00005742"/>.&lt;/p&gt;
+</abstract>
+    <example_list>
+      <example>
+				<db_xref dbkey="P28039" db="SWISS"/>Mammalian Acyloxyacyl-hydrolase
+      </example>
+      <example>
+				<db_xref dbkey="P42210" db="SWISS"/>Plant aspartic proteinase
+      </example>
+      <example>
+				<db_xref dbkey="P17405" db="SWISS"/>Mammalian acid sphingomyelinase
+      </example>
+      <example>
+				<db_xref dbkey="Q07831" db="SWISS"/>Nonpathogenic pore-forming peptide from entamoeba
+      </example>
+      <example>
+				<db_xref dbkey="P10960" db="SWISS"/>Saposins Sap-A, Sap-B, Sap-C, Sap-D
+      </example>
+    </example_list>
+    <pub_list>
+      <publication id="PUB00005747">
+	<author_list>O'Hara P.J., Munford R.S., Sheppard P.O.</author_list>
+	<title>Saposin-like proteins (SAPLIP) carry out diverse functions on a common backbone structure.</title>
+	<db_xref db="MEDLINE" dbkey="96048294"/>
+	<journal>J. Lipid Res.</journal>
+	<location firstpage="1653" lastpage="1663" volume="36" issue="8"/>
+	<year>1995</year>
+      </publication>
+      <publication id="PUB00005721">
+	<author_list>Ponting C.P.</author_list>
+	<title>Acid sphingomyelinase possesses a domain homologous to its activator proteins: saposins B and D.</title>
+	<db_xref db="MEDLINE" dbkey="94272336"/>
+	<journal>Protein Sci.</journal>
+	<location firstpage="359" lastpage="361" volume="3" issue="2"/>
+	<year>1994</year>
+      </publication>
+      <publication id="PUB00005765">
+	<author_list>Hofmann K., Tschopp J.</author_list>
+	<title>Cytotoxic T cells: more weapons for new targets?</title>
+	<db_xref db="MEDLINE" dbkey="97021725"/>
+	<journal>Trends Microbiol.</journal>
+	<location firstpage="91" lastpage="94" volume="4" issue="3"/>
+	<year>1996</year>
+      </publication>
+      <publication id="PUB00005798">
+	<author_list>Liepinsh E., Otting G., Andersson M., Ruysschaert J.M.</author_list>
+	<title>Saposin fold revealed by the NMR structure of NK-lysin.</title>
+	<db_xref db="MEDLINE" dbkey="97475218"/>
+	<journal>Nat. Struct. Biol.</journal>
+	<location firstpage="793" lastpage="795" volume="4" issue="10"/>
+	<year>1997</year>
+      </publication>
+      <publication id="PUB00005742">
+	<author_list>Ponting C.P., Russell R.B.</author_list>
+	<title>Swaposins: circular permutations within genes encoding saposin homologues.</title>
+	<db_xref db="MEDLINE" dbkey="95334819"/>
+	<journal>Trends Biochem. Sci.</journal>
+	<location firstpage="179" lastpage="180" volume="20" issue="5"/>
+	<year>1995</year>
+      </publication>
+    </pub_list>
+    <child_list>
+      <rel_ref ipr_ref="IPR003258"/>
+    </child_list>
+    <member_list>
+      <db_xref protein_count="133" db="PREFILE" dbkey="PS50015" name="SAP_B"/>
+      <db_xref protein_count="71" db="PRODOM" dbkey="PD001732" name="SapB"/>
+      <db_xref protein_count="109" db="SMART" dbkey="SM00118" name="SAPB"/>
+    </member_list>
+    <external_doc_list>
+      <db_xref db="QDOC" dbkey="QDOC50015"/>
+    </external_doc_list>
+  </interpro>
+  <interpro id="IPR000005" type="Domain" short_name="HTHAraC" protein_count="765">
+    <name>Helix-turn-helix, AraC type </name>
+    <abstract>
+&lt;p&gt;Many bacterial transcription regulation proteins bind DNA through a
+'helix-turn-helix' (HTH) motif. One major subfamily of these proteins <cite idref="PUB00004444"/>, <cite idref="PUB00003566"/> is related to the arabinose 
+operon regulatory protein AraC <cite idref="PUB00004444"/>, <cite idref="PUB00003566"/>.
+Except for celD <cite idref="PUB00001933"/>, all of these proteins seem to be positive transcriptional factors.&lt;/p&gt;
+
+&lt;p&gt;Although the sequences belonging to this family differ somewhat in length, in nearly every case the HTH motif is situated towards the C-terminus in the third quarter of most of the sequences. The minimal DNA binding domain spans roughly 100 residues and comprises two HTH subdomains; the classical HTH domain and another HTH subdomain with similarity to the classical HTH domain but with an insertion of one residue in the turn-region. The  N-terminal and  central regions of these proteins are presumed
+            to interact  with  effector molecules and may be involved in dimerization <cite idref="PUB00004817"/>.&lt;/p&gt;
+
+&lt;p&gt;The known structure of MarA (<db_xref db="SWISSPROT" dbkey="P27246"/>) shows that the AraC domain is alpha helical and shows the two HTH subdomains both bind the major groove of the DNA. The two HTH subdomains are separated by only 27
+angstroms, which causes the cognate DNA to bend.&lt;/p&gt;
+</abstract>
+    <class_list>
+      <classification id="GO:0003700" class_type="GO">
+	<category>Molecular Function</category>
+	<description>transcription factor</description>
+      </classification>
+      <classification id="GO:0005622" class_type="GO">
+	<category>Cellular Component</category>
+	<description>intracellular</description>
+      </classification>
+      <classification id="GO:0006355" class_type="GO">
+	<category>Biological Process</category>
+	<description>regulation of transcription</description>
+      </classification>
+    </class_list>
+    <example_list>
+      <example>
+				<db_xref dbkey="Q04248" db="SWISS"/>Virulence regulon transcriptional activator
+      </example>
+      <example>
+				<db_xref dbkey="P28809" db="SWISS"/>mmsAB peron regulatory  protein
+      </example>
+      <example>
+				<db_xref dbkey="Q48413" db="SWISS"/>
+			</example>
+      <example>
+				<db_xref dbkey="P35319" db="SWISS"/>Transcription regulator
+      </example>
+      <example>
+				<db_xref dbkey="Q52620" db="SWISS"/>Regulatory factor of blood coagulation
+      </example>
+      <example>
+				<db_xref dbkey="P17410" db="SWISS"/>CelD, the Escherichia coli cel operon repressor.
+      </example>
+      <example>
+				<db_xref dbkey="P19219" db="SWISS"/>AdaA, a Bacillus subtilis  bifunctional protein  that   acts  both  as  a transcriptional activator of the ada operon and as a methylphosphotriester-DNA alkyltransferase.
+      </example>
+      <example>
+				<db_xref dbkey="P03021" db="SWISS"/>AraC, the arabinose operon regulatory  protein, which activates the transcription of the araBAD genes.
+      </example>
+      <example>
+				<db_xref dbkey="P27246" db="SWISS"/>MarA,  which  may  be  a transcriptional activator of genes involved in the multiple antibiotic resistance (mar) phenotype.
+      </example>
+    </example_list>
+    <pub_list>
+      <publication id="PUB00004444">
+	<author_list>Gallegos M.-T., Michan C., Ramos J.L.</author_list>
+	<title>The XylS/AraC family of regulators.</title>
+	<db_xref db="MEDLINE" dbkey="93197143"/>
+	<journal>Nucleic Acids Res.</journal>
+	<location firstpage="807" lastpage="810" volume="21"/>
+	<year>1993</year>
+      </publication>
+      <publication id="PUB00003566">
+	<author_list>Henikoff S., Wallace J.C., Brown J.P.</author_list>
+	<title>Finding protein similarities with nucleotide sequence databases.</title>
+	<db_xref db="MEDLINE" dbkey="90190362"/>
+	<journal>Meth. Enzymol.</journal>
+	<location firstpage="111" lastpage="132" volume="183"/>
+	<year>1990</year>
+      </publication>
+      <publication id="PUB00001933">
+	<author_list>Parker L.L., Hall B.G.</author_list>
+	<title>Characterisation and nucleotide sequence of the cryptic cel operon of Escherichia coli K12.</title>
+	<db_xref db="MEDLINE" dbkey="90185127"/>
+	<journal>Genetics</journal>
+	<location firstpage="455" lastpage="471" volume="124"/>
+	<year>1990</year>
+      </publication>
+      <publication id="PUB00004817">
+	<author_list>Bustos S.A., Schleif R.F.</author_list>
+	<title>Functional domains of the AraC protein.</title>
+	<db_xref db="MEDLINE" dbkey="93296193"/>
+	<journal>Proc. Natl. Acad. Sci. U.S.A.</journal>
+	<location firstpage="5638" lastpage="5642" volume="90"/>
+	<year>1993</year>
+      </publication>
+    </pub_list>
+    <member_list>
+      <db_xref protein_count="478" db="PRINTS" dbkey="PR00032" name="HTHARAC"/>
+      <db_xref protein_count="762" db="PROFILE" dbkey="PS01124" name="HTH_ARAC_FAMILY_2"/>
+      <db_xref protein_count="855" db="PROSITE" dbkey="PS00041" name="HTH_ARAC_FAMILY_1"/>
+      <db_xref protein_count="747" db="PFAM" dbkey="PF00165" name="HTH_AraC"/>
+      <db_xref protein_count="683" db="SMART" dbkey="SM00342" name="HTH_ARAC"/>
+    </member_list>
+    <external_doc_list>
+      <db_xref db="BLOCKS" dbkey="IPB000005"/>
+      <db_xref db="PDOC" dbkey="PDOC00040"/>
+    </external_doc_list>
+  </interpro>
+	<interpro id="IPR000010" type="Family" short_name="Cystatin" protein_count="219">
+		<name>Cysteine proteases inhibitor</name>
+		<abstract>
+Members of this family are inhibitors of cysteine  proteases <cite idref="PUB00005324"/>, <cite idref="PUB00003412"/>, <cite idref="PUB00001614"/>, which are found in the tissues and body fluids of <taxon tax_id="33208">animals</taxon>, as well as in <taxon tax_id="3193">plants</taxon>. They can be grouped into three distinct but related families. These are the type 1 cystatins (or stefins), type 2 cystatins, and the kininogens.
+&lt;p&gt;Kininogen is the precursor  of  the  active  peptide  bradykinin that  plays a role in blood coagulation by  helping  to  position optimally prekallikrein and factor XI next  to factor XII. They  are  also  inhibitors  of  cysteine  proteases. Structurally,  kininogens  are  made  of  three  contiguous type-2 cystatin domains, followed  by an  additional  domain  (of  variable  length)  which contains  the  sequence of bradykinin.  The  first  of  the  three cystatin domains seems to have lost its inhibitory activity.&lt;/p&gt;
+</abstract>
+		<class_list>
+			<classification id="GO:0004869" class_type="GO">
+				<category>Molecular Function</category>
+				<description>cysteine protease inhibitor</description>
+			</classification>
+		</class_list>
+		<example_list>
+			<example>
+				<db_xref dbkey="P09229" db="SWISS"/>Cysteine proteinase inhibitor of rice
+      </example>
+			<example>
+				<db_xref dbkey="P29701" db="SWISS"/>Mammalian fetuin
+      </example>
+			<example>
+				<db_xref dbkey="P28325" db="SWISS"/>Type 2 cystatin
+      </example>
+			<example>
+				<db_xref dbkey="P37842" db="SWISS"/>Potato  multicystatin, an eight-domain cysteine proteinase inhibitor
+      </example>
+			<example>
+				<db_xref dbkey="P01045" db="SWISS"/>Kininogen
+      </example>
+			<example>
+				<db_xref dbkey="Q28986" db="SWISS"/>Type 1 cystatin
+      </example>
+			<example>
+				<db_xref dbkey="P31727" db="SWISS"/>Sarcocystatin A from the flesh fish
+      </example>
+		</example_list>
+		<pub_list>
+			<publication id="PUB00005324">
+				<author_list>Barrett A.J.</author_list>
+				<journal>Trends Biochem. Sci.</journal>
+				<location firstpage="193" lastpage="196" volume="12"/>
+				<year>1987</year>
+			</publication>
+			<publication id="PUB00003412">
+				<author_list>Rawlings N.D., Barrett A.J.</author_list>
+				<title>Evolution of proteins of the cystatin superfamily.</title>
+				<db_xref db="MEDLINE" dbkey="90189177"/>
+				<journal>J. Mol. Evol.</journal>
+				<location firstpage="60" lastpage="71" volume="30"/>
+				<year>1990</year>
+			</publication>
+			<publication id="PUB00001614">
+				<author_list>Bode W., Turk V.</author_list>
+				<title>The cystatins: protein inhibitors of cysteine proteinases.</title>
+				<db_xref db="MEDLINE" dbkey="91309737"/>
+				<journal>FEBS Lett.</journal>
+				<location firstpage="213" lastpage="219" volume="285"/>
+				<year>1991</year>
+			</publication>
+		</pub_list>
+		<child_list>
+			<rel_ref ipr_ref="IPR001363"/>
+			<rel_ref ipr_ref="IPR001713"/>
+			<rel_ref ipr_ref="IPR003243"/>
+			<rel_ref ipr_ref="IPR003244"/>
+		</child_list>
+		<member_list>
+			<db_xref protein_count="199" db="PROSITE" dbkey="PS00287" name="CYSTATIN"/>
+			<db_xref protein_count="209" db="PFAM" dbkey="PF00031" name="cystatin"/>
+			<db_xref protein_count="205" db="SMART" dbkey="SM00043" name="CY"/>
+		</member_list>
+		<external_doc_list>
+			<db_xref db="BLOCKS" dbkey="IPB000010"/>
+			<db_xref db="PDOC" dbkey="PDOC00259"/>
+		</external_doc_list>
+		<sec_list>
+			<sec_ac acc="IPR000078"/>
+		</sec_list>
+	</interpro>
+	<interpro id="IPR000413" type="Family" short_name="Integrin_alpha" protein_count="126">
+		<name>Integrins alpha chain</name>
+		<abstract>
+Integrins <cite idref="PUB00000811"/>, <cite idref="PUB00001505"/> are a large family of cell surface receptors that mediate cell
+to cell as well as cell to matrix adhesion. Some integrins recognize the R-G-D
+sequence in their extracellular matrix protein ligand. Structurally, integrins
+consist of a dimer of an alpha and a beta chain.  Each  subunit  has  a  large
+N-terminal extracellular domain followed by a transmembrane domain and a short
+C-terminal cytoplasmic  region.    Some    alpha  subunits   are cleaved post-
+translationally to produce  a  heavy  and a  light chain linked by a disulfide
+bond. Integrin alpha chains  share a  conserved sequence which is found at
+the  beginning  of  the  cytoplasmic  domain,  just   after  the  end  of  the
+transmembrane region. The exact pairing of alpha- and beta-subunits determines
+ligand specificity, localisation and function. Within the N-terminal domain of alpha subunits, seven sequence repeats, each
+of approximately 60 amino acids, have been found <cite idref="PUB00006166"/>. It has been predicted 
+that these repeats assume a beta-propeller fold. The domains contain seven 
+four-stranded beta-sheets arranged in a torus around a pseudosymmetry axis
+<cite idref="PUB00005772"/>. Integrin ligands and a putative Mg&lt;sup&gt;2+&lt;/sup&gt; ion are predicted to bind to the
+upper face of the propeller, in a manner analogous to the way in which the
+trimeric G-protein beta subunit (G beta) (which also has a beta-propeller
+fold) binds the G protein alpha subunit <cite idref="PUB00005772"/>.
+</abstract>
+		<class_list>
+			<classification id="GO:0004895" class_type="GO">
+				<category>Molecular Function</category>
+				<description>cell adhesion receptor</description>
+			</classification>
+			<classification id="GO:0007155" class_type="GO">
+				<category>Biological Process</category>
+				<description>cell adhesion</description>
+			</classification>
+			<classification id="GO:0007160" class_type="GO">
+				<category>Biological Process</category>
+				<description>cell-matrix adhesion</description>
+			</classification>
+			<classification id="GO:0008305" class_type="GO">
+				<category>Cellular Component</category>
+				<description>integrin</description>
+			</classification>
+		</class_list>
+		<example_list>
+			<example>
+				<db_xref dbkey="P23229" db="SWISS"/>The alpha-6 chain (VLA-6) which,  with  the beta-1  chain, forms a platelet laminin receptor.
+      </example>
+			<example>
+				<db_xref dbkey="P53708" db="SWISS"/>The alpha-8 chain which, with the  beta-1 chain  plays  a  possible role in cell-cell interactions during axon-growth and fasciculation.
+      </example>
+			<example>
+				<db_xref dbkey="P08648" db="SWISS"/>The alpha-5 chain (VLA-5) (CD49e) which,  with  the beta-1 chain,  forms  a receptor specific to fibronectin.
+      </example>
+			<example>
+				<db_xref dbkey="P12080" db="SWISS"/>The Drosophila position-specific antigen 2 alpha chain (PS2).
+      </example>
+			<example>
+				<db_xref dbkey="P26006" db="SWISS"/>The alpha-3 chain (VLA-3) (Galactoprotein B3).
+      </example>
+			<example>
+				<db_xref dbkey="P56199" db="SWISS"/>The alpha-1 chain (VLA-1) (CD49a)  which,  with the beta-1 chain, acts as a receptor for laminin and collagen.
+      </example>
+			<example>
+				<db_xref dbkey="P13612" db="SWISS"/>The alpha-4 chain (VLA-4) (CD49d)  which,  with the beta-1 chain, interacts with vascular cell adhesion protein 1 (VCAM-1).
+      </example>
+			<example>
+				<db_xref dbkey="P17301" db="SWISS"/>The alpha-2 chain (VLA-2) (CD49b)  which,  with the beta-1 chain, acts as a receptor that binds collagen.
+      </example>
+		</example_list>
+		<pub_list>
+			<publication id="PUB00000811">
+				<author_list>Hynes R.O.</author_list>
+				<title>Integrins: a family of cell surface receptors.</title>
+				<db_xref db="MEDLINE" dbkey="87131067"/>
+				<journal>Cell</journal>
+				<location firstpage="549" lastpage="554" volume="48"/>
+				<year>1987</year>
+			</publication>
+			<publication id="PUB00001505">
+				<author_list>Albelda S.M., Buck C.A.</author_list>
+				<title>Integrins and other cell adhesion molecules.</title>
+				<db_xref db="MEDLINE" dbkey="90337122"/>
+				<journal>FASEB J.</journal>
+				<location firstpage="2868" lastpage="2880" volume="4"/>
+				<year>1990</year>
+			</publication>
+			<publication id="PUB00006166">
+				<author_list>Springer T.A., Corbi A.L., Miller L.J., O'Connor K., Larson R.S.</author_list>
+				<title>cDNA cloning and complete primary structure of the alpha subunit of a leukocyte adhesion glycoprotein, p150,95.</title>
+				<db_xref db="MEDLINE" dbkey="88166645"/>
+				<journal>EMBO J.</journal>
+				<location firstpage="4023" lastpage="4028" volume="6" issue="13"/>
+				<year>1987</year>
+			</publication>
+			<publication id="PUB00005772">
+				<author_list>Springer T.A.</author_list>
+				<title>Folding of the N-terminal, ligand-binding region of integrin alpha-subunits into a beta-propeller domain.</title>
+				<db_xref db="MEDLINE" dbkey="97144395"/>
+				<journal>Proc. Natl. Acad. Sci. U.S.A.</journal>
+				<location firstpage="65" lastpage="72" volume="94" issue="1"/>
+				<year>1997</year>
+			</publication>
+		</pub_list>
+		<member_list>
+			<db_xref protein_count="79" db="PRINTS" dbkey="PR01185" name="INTEGRINA"/>
+			<db_xref protein_count="112" db="PREFILE" dbkey="PS50107" name="INTEGRIN_ALPHA_2"/>
+			<db_xref protein_count="104" db="PROSITE" dbkey="PS00242" name="INTEGRIN_ALPHA"/>
+			<db_xref protein_count="75" db="PFAM" dbkey="PF00357" name="integrin_A"/>
+			<db_xref protein_count="117" db="PFAM" dbkey="PF01839" name="FG-GAP"/>
+			<db_xref protein_count="110" db="SMART" dbkey="SM00191" name="Int_alpha"/>
+		</member_list>
+		<external_doc_list>
+			<db_xref db="BLOCKS" dbkey="IPB000413"/>
+			<db_xref db="PDOC" dbkey="PDOC00215"/>
+			<db_xref db="PROPRO" dbkey="integrins_alpha"/>
+		</external_doc_list>
+		<sec_list>
+			<sec_ac acc="IPR002458"/>
+			<sec_ac acc="IPR002476"/>
+		</sec_list>
+	</interpro>
+  <deleted_entries>
+    <del_ref id="IPR000001"/>
+    <del_ref id="IPR000005"/>
+    <del_ref id="IPR000019"/>
+    <del_ref id="IPR000027"/>
+    <del_ref id="IPR000078"/>
+  </deleted_entries>
+</interprodb>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/intrablock-comment.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/intrablock-comment.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/intrablock-comment.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,30 @@
+#NEXUS
+
+BEGIN TAXA;
+[block-level comment in the TAXA block]
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+[block-level comment in the CHARACTERS block]
+
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels 1 2 3 4 5;
+      matrix
+A     --ONE
+B     --ONE
+C     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+G     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+[block-level comment in the TREES block]
+
+       tree ladder = (((((((A:1,B:1):1,C:2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/kinases.tsv
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/kinases.tsv	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/kinases.tsv	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+Name	Accession	Group	Family	Subfamily	Pseudogene?	cDNA	Protein	Kinase Domain	Novelty
+A6	SK001	Atypical	A6		N	CCGCCGGCCGGGGCCGCTGGCTGCACTCAGCGCCGGAGCCGGGAGCTAGCGGCCGCCGCCATGTCCCACCAGACCGGCATCCAAGCAAGTGAAGATGTTAAAGAGATCTTTGCCAGAGCCAGAAATGGAAAGTACAGACTTCTGAAAATATCTATTGAAAATGAGCAACTTGTGATTGGATCATATAGTCAGCCTTCAGATTCCTGGGATAAGGATTATGATTCCTTTGTTTTACCCCTGTTGGAGGACAAACAACCATGCTATATATTATTCAGGTTAGATTCTCAGAATGCCCAGGGATATGAATGGATATTCATTGCATGGTCTCCAGATCATTCTCATGTTCGTCAAAAAATGTTGTATGCAGCAACAAGAGCAACTCTGAAGAAGGAATTTGGAGGTGGCCACATTAAAGATGAAGTATTTGGAACAGTAAAGGAAGATGTATCATTACATGGATATAAAAAATACTTGCTGTCACAATCTTCCCCTGCCCCACTGACTGCAGCTGAGGAAGAACTACGACAGATTAAAATCAATGAGGTACAGACTGACGTGGGTGTGGACACTAAGCATCAAACACTACAAGGAGTAGCATTTCCCATTTCTCGAGAAGCCTTTCAGGCTTTGGAAAAATTGAATAATAGACAGCTCAACTATGTGCAGTTGGAAATAGATATAAAAAATGAAATTATAATTTTGGCCAACACAACAAATACAGAACTGAAAGATTTGCCAAAGAGGATTCCCAAGGATTCAGCTCGTTACCATTTCTTTCTGTATAAACATTCCCATGAAGGAGACTATTTAGAGTCCATAGTTTTTATTTATTCAATGCCTGGATACACATGCAGTATAAGAGAGCGGATGCTGTATTCTAGCTGCAAGAGCCGTCTGCTAGAAATTGTAGAAAGACAACTACAAATGGATGTAATTAGAAAGATCGAGATAGACAATGGGGATGAGTTGACTGCAGACTTCCTTTATGAAGAAGTACATCCCAAGCAGCATGCACACAAGCAAAGTTTTGCAAAACCAAAAGGTCCTGCAGGAAAAAGAGGAATTCGAAGACTAATTAGGGGCCCAGCGGAAACTGAAGCTACTACTGATTAAAGTCATCACATTAAACATTGTAATACTAGTTTTTTAAAAGTCCAGCTTTTAGTACAGGAGAACTGAAATCATTCCATGTTGATATAAAGTAGGGAAAAAAATTGTACTTTTTGGAAAATAGCACTTTTCACTTCTGTGTGTTTTTAAAATTAATGTTATAGAAGACTCATGATTTCTATTTTTGAGTTAAAGCTAGAAAAGGGTTCAACATAATGTTTAATTTTGTCACACTGTTTTCATAGCGTTGATTCCACACTTCAAATACTTCTTAAAATTTTATACAGTTGGGCCAGTTCTAGAAAGTCTGATGTCTCAAAGGGTAAACTTACTACTTTCTTGTGGGACAGAAAGACCTTAAAATATTCATATTACTTAATGAATATGTTAAGGACCAGGCTAGAGTATTTTCTAAGCTGGAAACTTAGTGTGCCTTGGAAAAGGCCGCAAGTTGCTTACTCCGAGTAGCTGTGCTAGCTCTGTCAGACTGTAGGATCATGTCTGCAACTTTTAGAAATAGTGCTTTATATTGCAGCAGTCTTTTATATTTGACTTTTTTTTAATAGCATTAAAATTGCAGATCAGCTCACTCTGAAACTTTAAGGGTACCAGATATTTTCTATACTGCAGGATTTCTGATGACATTGAAAGACTTTAAACAGCCTTAGTAAATTATCTTTCTAATGCTCTGTGAGGCCAAACATTTATGTTCAGATTGAAATTTAAATTAATATCATTCAAAAGGAAACAAAAAATGTTGAGTTTTAAAAATCAGGATTGACTTTTTTCTCCAAAACCATACATTTATGGGCAAATTGTGTTCTTTATCACTTCCGAGCAAATACTCAGATTTAAAATTACTTTAAAGTCCTGGTACTTAACAGGCTAACGTAGATAAACACCTTAATAATCTCAGTTAATACTGTATTTCAAAACACATTTAACTGTTTTCTAATGCTTTGCATTATCAGTTACAACCTAGAGAGATTTTGAGCCTCATATTTCTTTGATACTTGAAATAGAGGGAGCTAGAACACTTAATGTTTAATCTGTTAAACCTGCTGCAAGAGCCATAACTTTGAGGCATTTTCTAAATGAACTGTGGGGATCCAGGATTTGTAATTTCTTGATCTAAACTTTATGCTGCATAAATCACTTATCGGAAATGCACATTTCATAGTGTGAAGCACTCATTTCTAAACCTTATTATCTAAGGTAATATATGCACCTTTCAGAAATTTGTGTTCGAGTAAGTAAAGCATATTAGAATAATTGTGGGTTGACAGATTTTTAAAATAGAATTTAGAGTATTTGGGGTTTTGTTTGTTTACAAATAATCAGACTATAATATTTAAACATGCAAAATAACTGACAATAATGTTGCACTTGTTTACTAAAGATATAAGTTGTTCCATGGGTGTACACGTAGACAGACACACATACACCCAAATTATTGCATTAAGAATCCTGGAGCAGACCATAGCTGAAGCTGTTATTTTCAGTCAGGAAGACTACCTGTCATGAAGGTATAAAATAATTTAGAAGTGAATGTTTTTCTGTACCATCTATGTGCAATTATACTCTAAATTCCACTACACTACATTAAAGTAAATGGACATTCCAGAATATAGATGTGATTATAGTCTTAAACTAATTATTATTAAACCAATGATTGCTGAAAATCAGTGATGCATTTGTTATAGAGTATAACTCATCGTTTACAGTATGTTTTAGTTGGCAGTATCATACCTAGATGGTGAATAACATATTCCCAGTAAATTTATATAGCAGTGAAGAATTACATGCCTTCTGGTGGACATTTTATAAGTGCATTTTATATCACAATAAAAATTTTTTCTCTTTAAAAAAAAAAAACAAGAAAAAAAAAAAA	MSHQTGIQASEDVKEIFARARNGKYRLLKISIENEQLVIGSYSQPSDSWDKDYDSFVLPLLEDKQPCYILFRLDSQNAQGYEWIFIAWSPDHSHVRQKMLYAATRATLKKEFGGGHIKDEVFGTVKEDVSLHGYKKYLLSQSSPAPLTAAEEELRQIKINEVQTDVGVDTKHQTLQGVAFPISREAFQALEKLNNRQLNYVQLEIDIKNEIIILANTTNTELKDLPKRIPKDSARYHFFLYKHSHEGDYLESIVFIYSMPGYTCSIRERMLYSSCKSRLLEIVERQLQMDVIRKIEIDNGDELTADFLYEEVHPKQHAHKQSFAKPKGPAGKRGIRRLIRGPAETEATTD	 	Known - Literature
+A6r	SK512	Atypical	A6		N	GACAGCCTCCGCCACATCCTCCACCTCTCTTGGTCCAGCGAGCGTTGCCGGGCCAGGGTCAAGCGGAGGGCTCCGACGGCGCGGACGGAGCGAAGCGCCGAGCCATGGCGCACCAAACGGGCATCCACGCCACGGAAGAGCTGAAGGAATTCTTTGCCAAGGCACGGGCTGGCTCTGTGCGGCTCATCAAGGTTGTGATTGAGGACGAGCAGCTCGTGCTGGGTGCCTCGCAGGAGCCAGTAGGCCGCTGGGATCAGGACTATGACAGGGCCGTGCTGCCACTGCTGGACGCCCAGCAGCCCTGCTACCTGCTCTACCGCCTCGACTCACAGAATGCTCAGGGCTTCGAATGGCTCTTCCTCGCCTGGTCGCCTGATAACTCCCCCGTGCGGCTGAAGATGCTGTACGCGGCCACGCGGGCCACAGTGAAAAAGGAGTTTGGAGGTGGCCACATCAAGGATGAGCTCTTCGGGACTGTGAAGGATGACCTCTCTTTTGCTGGGTACCAGAAACACCTGTCGTCCTGTGCGGCACCTGCCCCGCTGACCTCGGCTGAGAGAGAGCTCCAGCAGATCCGCATTAACGAGGTGAAGACAGAGATCAGTGTGGAAAGCAAGCACCAGACCCTGCAGGGCCTCGCCTTCCCCCTGCAGCCTGAGGCCCAGCGGGCACTCCAGCAGCTCAAGCAGAAAATGGTCAACTACATCCAGATGAAGCTGGACCTAGAGCGGGAAACCATTGAGCTGGTGCACACAGAGCCCACGGATGTGGCCCAGCTGCCCTCCCGGGTGCCCCGAGATGCTGCCCGCTACCACTTCTTCCTCTACAAGCACACCCATGAGGGCGACCCCCTTGAGTCTGTAGTGTTCATCTACTCCATGCCGGGGTACAAGTGCAGCATCAAGGAGCGAATGCTCTACTCCAGCTGCAAGAGCCGCCTCCTCGACTCCGTGGAGCAGGACTTCCATCTGGAGATCGCCAAGAAAATTGAGATTGGCGATGGGGCAGAGCTGACGGCAGAGTTCCTCTACGACGAGGTGCACCCCAAGCAACACGCCTTCAAGCAGGCCTTCGCCAAGCCCAAGGGCCCAGGGGGCAAGCGGGGCCATAAGCGCCTCATCCGCGGCCCGGGTGAAAATGGGGATGACAGCTAGGAGGCTGGAGCAGGGCCGGCCACGTGTGGACTGTGGGGCTGCCCACCTTCCGCTCCCTGCCACCATCCTCCTTCCTGGGCTCCAGGAAAGTGTTTCTGGGAGGTCAGGAGGGCTGGCAGCTGAACGCACTTGCAGCGTCCGAGGGCCACCGGGCTGGCATTTTGTGACCCTTCCCTGTTGCTGTCCCTGCATCTCGTCTGTGTGCCCAGGGTGTCCGGGGACCCTGCCTGGCTGGCTTAAGGGGGCTGGGTCAGGGGCCTGGCATGAACCTGGCCTCCCGGGGAGCTGAGACTAGGGTCCCAGCACAGCCCAGAAACCTTTGGCCACAAGAAGTGGGGTCAGTCAGGGCTGGGGCAGGGGTCACTGCAGTTTGGGATGGTTGAATGCTGTATTTTCTAAAGAATAAAATATTTTTAAATC	MAHQTGIHATEELKEFFAKARAGSVRLIKVVIEDEQLVLGASQEPVGRWDQDYDRAVLPLLDAQQPCYLLYRLDSQNAQGFEWLFLAWSPDNSPVRLKMLYAATRATVKKEFGGGHIKDELFGTVKDDLSFAGYQKHLSSCAAPAPLTSAERELQQIRINEVKTEISVESKHQTLQGLAFPLQPEAQRALQQLKQKMVNYIQMKLDLERETIELVHTEPTDVAQLPSRVPRDAARYHFFLYKHTHEGDPLESVVFIYSMPGYKCSIKERMLYSSCKSRLLDSVEQDFHLEIAKKIEIGDGAELTAEFLYDEVHPKQHAFKQAFAKPKGPGGKRGHKRLIRGPGENGDDS	 	Known - Literature
+A6ps1	SK752	Atypical	A6		Y	CCGCCGGCCGGGGCCGCTGGCTGCACTCAGCGCCGGAGCCGGGAGCTAGCGGCCGCCGCCATGTCCCACCAGACCGGCATCCAAGCAAGTGAAGATGTTAAAGAGATCTTTGCCAGAGCCAGAAATGGAAAGTACAGACTTCTGAAAATATCTATTGAAAATGAGCAACTTGTGATTGGATCATATAGTCAGCCTTCAGATTCCTGGGATAAGGATTATGATTCCTTTGTTTTACCCCTGTTGGAGGACAAACAACCATGCTATATATTATTCAGGTTAGATTCTCAGAATGCCCAGGGATATGAATGGATATTCATTGCATGGTCTCCAGATCATTCTCATGTTCGTCAAAAAATGTTGTATGCAGCAACAAGAGCAACTCTGAAGAAGGAATTTGGAGGTGGCCACATTAAAGATGAAATATTTGGAACAGTAAAGGAAGATGTATCATTACATGGATATAAAAAATACTTGCTGTCACAATCTTCCCCTGCCCCACTGACTGCAGCTGAGGAAGAATTACGACAGATTAAAATCAATGAGGTACAGACTGACGTGGGTGTGGACACTAAGCATCAAACACTACAAGGAGTAGCATTTCCCATTTCTCGAGAAGCCTTTCAGGCTTTGGAAAAATTGAATAACAGACAGCTCAACTATGTGCAGTTGGAAATAGATATAAAAAATGAAATTATAATTTTGGCCAACACAACAAATACAGAACTGAAAGATTTGCCAAAGAGGATTCCCAAGGATTCAGCTCGTTACCATTTCTTTCTGTATAAACATTCCCATGAAGGAGACTATTTAGAGTCCATAGTTNTTATTTATTCAATGCCTGGATACACATGCAGTATAAGAGAGCGGATGCTGTATTCTAGCTGCAAGAGCCCTCTGCTAGAAATTGTAGAAAGACAACTACAAATGGATGTAATTAGAAAGATCGAGATAGACAATGGGGATGAGTTGACTGCAGACTTCCTTTATGAAGAAGTACATCCCAAGCAGCAGGCACACAAGCAAAGTTTTGCAAAACCAAAAGGTCCTGCAGGAAAAAGAGGAATTCGAAGACTAATTAGGGGCCCAGCGGAAACTGAAGCTACTACTGATTAAAGTCGTCACATTAAACATTGCAATACTAGTTTTTTAAAAGTCCAGCTTTTAGTACAGGAGAACTGAAATCATTCCATGTTGATATAAAGTAGGGAAAAACATTGTACTTTTTGGAAAATAGCACTTTTCACTTCTGTGTGTTTTTAAAATTAATGTTATAGAAGACTCATGATTTCTATTTTTGAGTTAAAGCTAGAAAAGGGTTCAACATAATGTTTAATTTTGTCACACTGTTTTCATAGTGTTGATTCCACACTTCAAATACTTCTTAAAATTTTATACAGTTGGGCCAGCTCTAGAAAGTCTGATGTCTCAAAGGGTAAACTTACTACTTTCTTGTGGGACAGAAGGACCTTAAAATATTCATATTACTTAATGAATATGTTAAGGACCAGGCTAGAGTATTTTCTAAGCTGGAAACTTAGTGTGCCTCGGAAAAGGCCGCAAGTTGCTTACTCCGAGTAGCTGTGCTAGCTCTGTCAGACTGTAGGATCATGTCTGCAACTTTTAGAAATAGTGCTTTATATTGCAGCAGTCTTTTATATTTGACTTTTTTTTTAATAGCATTAAAATTGCAGATCAGCTCACTCTGAAACTTTAAGGGTACCAGATATTTTCTATACTGCAGGATTTCTAATGACATTGAAAGACTTTTAAACAGCCTTAGTAAATTATCTTTCTAATGCTCTGTGAGGCCAAACATTTATGTTCAGATTGAAATTTAAATTAATATCATTCAAAAGGAAACAAAAAATGTTGAGTTTTAAAAATCAGGATTGACTTTTTTCTCCAAAACCATACATTTATAGGCAAATTGTGTTCTTTATCACTTCTGAGCAAATACTCAGATTTAAAATTACTTTAAAGTCCTGGTACTTAACAGGCTAACGTAGATAAACACCTTAATAATCTCAGTTAATACTGTATTTCAAAACACATTTAACTGTTTTCTAATGCTTTGCATTATCAGTTACAACCTAGAGAGATTTTGAGCCTCATATTTCTTTGATACTTGAAATAGAGGGAGCTAGAACACTTAATGTTTAATCTGTTAAACCTGCTGCAAGAGCCATAACTTTGAGGCATTTTCTAAATGAACTGTGGGGATCCAGGATTTGTAATTTCTTGATCTAAACTTTATGCTGCATAAATCACTTATCGGAAATGCACATTTCATAGTGTGAAGCACTCATTTCTAAACCTTATTATCTAAGGTAATATATGCACCTTTCAGAAATTTGTGTTCGAGTAAGTAAAGCATATTAGAATAATTATGGGTTGACAGATTTTTAAAATAGAATTTAGAGTATTTGTGTGGGGTTTTGTTTGTTTACAAATAATCAGACTATAATATTTAAACATGCAAAATAACTGAGAATAATGTTGCACTTGTTTACTAAAGATATAAGTTGTTCCATGGGTGTACACGTAGACAGACACACATACACCCAAATTATTGCATTAAGAATCCTGGAGCAGACCATAGCTGAAGCTGTTATTTTCAGTCAGGAAGACTACCTGTCATGAAGGTATACAATAATTTAGAAGTGAATGTTTTTCTGTACCATCTATGTGCAATTATACTCTAAATTCCACTACACTACATTAAAGTAAATGGACATTCCAGAATATAGATGTGATTATAGTCTTAAACTAATTATTAAACCAATGATTGCTGAAAATCAGTGATGCATTTGTTATAGAGTATAACTCATCGTTTACAGTATGTTTTAGTTGGCAGTATCATACCTAGATGGTGAATAACATATTCCCAGTAAATTTATATAGCAGTGAAGAATTACATGCCTTCTGGTGGACATTTTATAAGTGCATTTTATATCACAATAAAATTTTTTCTCTTTAA 	MSHQTGIQASEDVKEIFARARNGKYRLLKISIENEQLVIGSYSQPSDSWDKDYDSFVLPLLEDKQPCYILFRLDSQNAQGYEWIFIAWSPDHSHVRQKMLYAATRATLKKEFGGGHIKDEIFGTVKEDVSLHGYKKYLLSQSSPAPLTAAEEELRQIKINEVQTDVGVDTKHQTLQGVAFPISREAFQALEKLNNRQLNYVQLEIDIKNEIIILANTTNTELKDLPKRIPKDSARYHFFLYKHSHEGDYLESIVXIYSMPGYTCSIRERMLYSSCKSPLLEIVERQLQMDVIRKIEIDNGDELTADFLYEEVHPKQQAHKQSFAKPKGPAGKRGIRRLIRGPAETEATTD*	 	Pseudogene
+A6ps2	SK766	Atypical	A6		Y	TCCCACCAGACCTGCATCCAAGCAAGTGAAGATGTTAAAGAGATCNNNGCCAGAGCCAGAAATGGAAAGTACAGACCTCTGAAAATATCTATTGAAAATGGGCAACTTATGATTGGATCATATNNNCAGCCTTCAGATTCCTGGGATAACGATTATGATTCCTTTGTTTTACCCCTGTTGGAGGACAAACAACTGTGCTATATATTATTCAGGTTAGATTCTCAGAATGCCCAGGGATATGAATGGATATTCATTGCATGGTTTCCAGATCATTCTCATGTCCGTCAAAAAAGGTTATATGCAGCAACAAGAGCAACTCTGGAAAAGGAATCTGGAGGTGGCCACGTTAAAGATGAAGTATTTGGAACAGTAAAGGAAGATGTATCATTACATGGATATAAAAAATGTTTGCTCTCACAATCTTCCCCTGCCCCACTGACTGCAGCTGAGGAAGAATTANNNGACATTAAAATCAATGAGGTACAGACTGACGTGGGTGTGGACGCTAAGCATCAAACACTACAAGGAGTAGCATTTCCTATTTCTCGAGAAGCTTTTCAGGCTTTGGAAAAAATAAATAACNNNCTGAACTATGTGCAGTTGGAAATAAACATAAAAAATGAAATTATAATTTTGGCCAACACAACAAATACAGAACTAAAAGATTTGCCAAAGAGGATTCCCAAGGATTCAGCTCGTTACCATTTCTTTCTGTATAAACATTCCCATGAAGGAGACTATTTAGAGTCCATAGTTTTTATCTATTCAATGCCCAGATACACATGCAGTATAAGAGAACGGATGCTGTATTCTAGCTGCAAGAGCCCTCTGCTAGAAATTGTAGAAAGACAACTANNNGTTGTAATGGATGTAATTAGAAAGATTGAGATAGACAATGAGGATTAGTTGACTTCAGACTTCCTTTGTGAAGAAGAAGTACATCCCAAGCAGCATGCAGGAAAAAGAAGAATTCGAAGACTAATTAGGGGCCCAGCGGAAAATGAAGCTACTACTGAT	SHQTCIQASEDVKEIXARARNGKYRPLKISIENGQLMIGSYXQPSDSWDNDYDSFVLPLLEDKQLCYILFRLDSQNAQGYEWIFIAWFPDHSHVRQKRLYAATRATLEKESGGGHVKDEVFGTVKEDVSLHGYKKCLLSQSSPAPLTAAEEELXDIKINEVQTDVGVDAKHQTLQGVAFPISREAFQALEKINNXLNYVQLEINIKNEIIILANTTNTELKDLPKRIPKDSARYHFFLYKHSHEGDYLESIVFIYSMPRYTCSIRERMLYSSCKSPLLEIVERQLXVVMDVIRKIEIDNED*LTSDFLCEEEVHPKQHAGKRRIRRLIRGPAENEATTD	 	Pseudogene
+CaMK2d	SK703	CAMK	CAMK2		N	ATGGCTTCGACCACCACCTGCACCAGGTTCACGGACGAGTATCAGCTTTTCGAGGAGCTTGGAAAGGGGGCATTCTCAGTGGTGAGAAGATGTATGAAAATTCCTACTGGACAAGGATATGCTGCCAAAATTATCAACACCAAAAAGCTTTCTGCTAGGGATCATCAGAAACTAGAAAGAGAAGCTAGAATCTGCCGTCTTTTGAAGCACCCTAATATTGTGCGACTTCATGATAGCATATCAGAAGAGGGCTTTCACTACTTGGTGTTTGATTTAGTTACTGGAGGTGAACTGTTTGAAGACATAGTGGCAAGAGAATACTACAGTGAAGCTGATGCCAGTCATTGTATACAGCAGATTCTAGAAAGTGTTAATCATTGTCACCTAAATGGCATAGTTCACAGGGACCTGAAGCCTGAGAATTTGCTTTTAGCTAGCAAATCCAAGGGAGCAGCTGTGAAATTGGCAGACTTTGGCTTAGCCATAGAAGTTCAAGGGGACCAGCAGGCGTGGTTTGGTTTTGCTGGCACACCTGGATATCTTTCTCCAGAAGTTTTACGTAAAGATCCTTATGGAAAGCCAGTGGATATGTGGGCATGTGGTGTCATTCTCTATATTCTACTTGTGGGGTATCCACCCTTCTGGGATGAAGACCAACACAGACTCTATCAGCAGATCAAGGCTGGAGCTTATGATTTTCCATCACCAGAATGGGACACGGTGACTCCTGAAGCCAAAGACCTCATCAATAAAATGCTTACTATCAACCCTGCCAAACGCATCACAGCCTCAGAGGCACTGAAGCACCCATGGATCTGTCAACGTTCTACTGTTGCTTCCATGATGCACAGACAGGAGACTGTAGACTGCTTGAAGAAATTTAATGCTAGAAGAAAACTAAAGGGTGCCATCTTGACAACTATGCTGGCTACAAGGAATTTCTCAGCAGCCAAGAGTTTGTTGAAGAAACCAGATGGAGTAAAGGAGTCAACTGAGAGTTCAAATACAACAATTGAGGATGAAGATGTGAAAGCACGAAAGCAAGAGATTATCAAAGTCACTGAACAACTGATCGAAGCTATCAACAATGGGGACTTTGAAGCCTACACAAAAATCTGTGACCCAGGCCTTACTGCTTTTGAACCTGAAGCTTTGGGTAATTTAGTGGAAGGGATGGATTTTCACCGATTCTACTTTGAAAATGCTTTGTCCAAAAGCAATAAACCAATCCACACTATTATTCTAAACCCTCATGTACATCTGGTAGGGGATGATGCCGCCTGCATAGCATATATTAGGCTCACACAGTACATGGATGGCAGTGGAATGCCAAAGACAATGCAGTCAGAAGAGACTCGTGTGTGGCACCGCCGGGATGGAAAGTGGCAGAATGTTCATTTTCATCGCTCGGGGTCACCAACAGTACCCATCAAGCCACCCTGTATTCCAAATGGGAAAGAAAACTTCTCAGGAGGCACCTCTTTGTGGCAAAACATCTGA	MASTTTCTRFTDEYQLFEELGKGAFSVVRRCMKIPTGQGYAAKIINTKKLSARDHQKLEREARICRLLKHPNIVRLHDSISEEGFHYLVFDLVTGGELFEDIVAREYYSEADASHCIQQILESVNHCHLNGIVHRDLKPENLLLASKSKGAAVKLADFGLAIEVQGDQQAWFGFAGTPGYLSPEVLRKDPYGKPVDMWACGVILYILLVGYPPFWDEDQHRLYQQIKAGAYDFPSPEWDTVTPEAKDLINKMLTINPAKRITASEALKHPWICQRSTVASMMHRQETVDCLKKFNARRKLKGAILTTMLATRNFSAAKSLLKKPDGVKESTESSNTTIEDEDVKARKQEIIKVTEQLIEAINNGDFEAYTKICDPGLTAFEPEALGNLVEGMDFHRFYFENALSKSNKPIHTIILNPHVHLVGDDAACIAYIRLTQYMDGSGMPKTMQSEETRVWHRRDGKWQNVHFHRSGSPTVPIKPPCIPNGKENFSGGTSLWQNI	YQLFEELGKGAFSVVRRCMKIPTGQGYAAKIINTKKLSARDHQKLEREARICRLLKHPNIVRLHDSISEEGFHYLVFDLVTGGELFEDIVAREYYSEADASHCIQQILESVNHCHLNGIVHRDLKPENLLLASKSKGAAVKLADFGLAIEVQGDQQAWFGFAGTPGYLSPEVLRKDPYGKPVDMWACGVILYILLVGYPPFWDEDQHRLYQQIKAGAYDFPSPEWDTVTPEAKDLINKMLTINPAKRITASEALKHPWI	Known - Refseq
+CaMKK2	SK482	Other	CAMKK	Meta	N	GAGCCGAGCCGAGCTGGGGGCGCAGACGCGGAGGCGGCGGCGGCGCGGAGCCCAGGTGGCTCCGTGCCGGATGGGAGTGCCCCAGTGTGCTGGATGAAGCTGGCGCATGCACCATGTCATCATGTGTCTCTAGCCAGCCCAGCAGCAACCGGGCCGCCCCCCAGGATGAGCTGGGGGGCAGGGGCAGCAGCAGCAGCGAAAGCCAGAAGCCCTGTGAGGCCCTGCGGGGCCTCTCATCCTTGAGCATCCACCTGGGCATGGAGTCCTTCATTGTGGTCACCGAGTGTGAGCCGGGCTGTGCTGTGGACCTCGGCTTGGCGCGGGACCGGCCCCTGGAGGCCGATGGCCAAGAGGTCCCCCTTGACACCTCCGGGTCCCAGGCCCGGCCCCACCTCTCCGGTCGCAAGCTGTCTCTGCAAGAGCGGTCCCAGGGTGGGCTGGCAGCCGGTGGCAGCCTGGACATGAACGGACGCTGCATCTGCCCGTCCCTGCCCTACTCACCCGTCAGCTCCCCGCAGTCCTCGCCTCGGCTGCCCCGGCGGCCGACAGTGGAGTCTCACCACGTCTCCATCACGGGTATGCAGGACTGTGTGCAGCTGAATCAGTATACCCTGAAGGATGAAATTGGAAAGGGCTCCTATGGTGTCGTCAAGTTGGCCTACAATGAAAATGACAATACCTACTATGCAATGAAGGTGCTGTCCAAAAAGAAGCTGATCCGGCAGGCCGCTTTTCCACGTCGCCCTCCACCCCGAGGCACCCGGCCAGCTCCTGGAGGCTGCATCCAGCCCAGGGGCCCCATTGAGCAGGTGTACCAGGAAATTGCCATCCTCAAGAAGCTGGACCACCCCAATGTGGTGAAGCTGGTGGAGGTCCTGGATGACCCCAATGAGGACCATCTGTACATGGTGTTCGAACTGGTCAACCAAGGGCCCGTGATGGAAGTGCCCACCCTCAAACCACTCTCTGAAGACCAGGCCCGTTTCTACTTCCAGGATCTGATCAAAGGCATCGAGTACTTACACTACCAGAAGATCATCCACCGTGACATCAAACCTTCCAACCTCCTGGTCGGAGAAGATGGGCACATCAAGATCGCTGACTTTGGTGTGAGCAATGAATTCAAGGGCAGTGACGCGCTCCTCTCCAACTACGTGGGCACGCCCGCCTTCATGGCTCCCGAGTCGCTCTCTGAGACCCGCAAGATCTTCTCTGGGAAGGCCAAGGATGTTTGGGCCATGGGTGTGACACTATACTGCTTTGTCTTTGGCCAGTGCCCATTCATGGACGAGCGGATCATGTGTTTACACAGTAAGATCAAGAGTCAGGCCCTGGAATTTCCAGACCAGCCCGACATAGCTGAGGACTTGAAGGACCTGATCACCCGTATGCTGGACAAGAACCCCGAGTCGAGGATCGTGGTGCCGGAAATCAAGCTGCACCCCTGGGTCACGAGGCATGGGGCGGAGCCGTTGCCGTCGGAGGATGAGAACTGCACGCTGGTCGAAGTGACTGAAGAGGAGGTCGAGAACTCAGTCAAACACATTCCCAGCTTGGCAACCGTGATCCTGGTGAAGACCATGATACGTAAACGCTCCTTTGGGAACCCATTCGAGGGCAGCCGGCGGGAGGAACGCTCACTGTCAGCGCCTGGAAACTTGCTCACCAAAAAACCAACCAGGGAATGTGAGTCCCTGTCTGAGCTCAAGGAAGCAAGGCAGCGAAGACAACCTCCAGGGCACCGACCCGCCCCCCGTGGGGGAGGAGGAAGTGCTCTTGTGAGAGGCAGTCCCTGCGTGGAAAGTTGCTGGGCCCCCGCCCCCGGCTCCCCCGCACGCATGCATCCACTGCGGCCGGAGGAGGCCATGGAGCCCGAGTAGCTGCCTGGATCGCTCGACCTCGCATGCGCGCCGCGTCGCCTCTGGGGGGCTGCTGCACCGCGTTTCCATAGCAGCATGTCCTACGGAAACCCAGCACGTGTGTTTCCATAGCAGCATGTCTGCAACCCAGACGCGGTGTGTAGAGCCTCGATCGTCATCTCTGGTTATTTGTTTTTTCCTTTGTTGTTTTAAAGGGACAAAAAAAAAAAAGGACTGACTCCATAGCTCGACCGTGGCCGCTGGCTGGCTGGACAGGCGGGTGTGAGGAGTTGCAGACCCAAACCCACGTGCATTTTGGGACAATTGCTTTTTAAAACGTTTTTATGCCAAAAATCCTTCATTGTGATTTTCAGAACCACGTCAGATATACCAAGTGACTGTGTGTGGGGTTTGACAACTGTGGAAAGGCGAGCAGAAAACTCCGGCGGTCTGAGGCCATGGAGGTGGTTGCTGCATTTGAGAGGGAGTAGGGGGCTAGATGTGGCTCCTAGTGCAAACCGGAAACCATGGCACCTTCCAGAGCCGTGGTCTCAAGGAGTCAGAGCAGGGAGCTTTGATGCAACTTATTTGTAAGAAGGATTTTTAAATTTTTTATGGGTAGAATTGTAGTCAGGAAAACAGAAAGGGCTTGAAATTTAATAAGTGCTGCTGGAGGGGATTTTCCAAGCCTGGAAGGGTATTCAGCAGCTGTGGTGGGGAAAGATTTCTCCTGAAAGACTGAACGTGTTTCTTCATGACAGCTGCTCAAAGCAGGTTTCTGAGATAGCTGACCGAGCTCTGGTAAATCTCTTTGTCAAATTACGAAAACTTCAGGGCGAAATCCTATGCTTCCATGTACATTACATGGCTTAAGTTAAACAAAAACATTTTTCAAGTCTCTAACTAGAGTGAA	MSSCVSSQPSSNRAAPQDELGGRGSSSSESQKPCEALRGLSSLSIHLGMESFIVVTECEPGCAVDLGLARDRPLEADGQEVPLDTSGSQARPHLSGRKLSLQERSQGGLAAGGSLDMNGRCICPSLPYSPVSSPQSSPRLPRRPTVESHHVSITGMQDCVQLNQYTLKDEIGKGSYGVVKLAYNENDNTYYAMKVLSKKKLIRQAAFPRRPPPRGTRPAPGGCIQPRGPIEQVYQEIAILKKLDHPNVVKLVEVLDDPNEDHLYMVFELVNQGPVMEVPTLKPLSEDQARFYFQDLIKGIEYLHYQKIIHRDIKPSNLLVGEDGHIKIADFGVSNEFKGSDALLSNYVGTPAFMAPESLSETRKIFSGKAKDVWAMGVTLYCFVFGQCPFMDERIMCLHSKIKSQALEFPDQPDIAEDLKDLITRMLDKNPESRIVVPEIKLHPWVTRHGAEPLPSEDENCTLVEVTEEEVENSVKHIPSLATVILVKTMIRKRSFGNPFEGSRREERSLSAPGNLLTKKPTRECESLSELKEARQRRQPPGHRPAPRGGGGSALVRGSPCVESCWAPAPGSPARMHPLRPEEAMEPE	YTLKDEIGKGSYGVVKLAYNENDNTYYAMKVLSKKKLIRQAAFPRRPPPRGTRPAPGGCIQPRGPIEQVYQEIAILKKLDHPNVVKLVEVLDDPNEDHLYMVFELVNQGPVMEVPTLKPLSEDQARFYFQDLIKGIEYLHYQKIIHRDIKPSNLLVGEDGHIKIADFGVSNEFKGSDALLSNYVGTPAFMAPESLSETRKIFSGKAKDVWAMGVTLYCFVFGQCPFMDERIMCLHSKIKSQALEFPDQPDIAEDLKDLITRMLDKNPESRIVVPEIKLHPWV	Known - Refseq
+AMPKa1	SK032	CAMK	CAMKL	AMPK	N	GCAGACTCAGTTCCTGGAGAAAGATGGCGACAGCCGAGAAGCAGAAACACGACGGGCGGGTGAAGATCGGCCACTACATTCTGGGTGACACGCTGGGGGTCGGCACCTTCGGCAAAGTGAAGGTTGGCAAACATGAATTGACTGGGCATAAAGTAGCTGTGAAGATACTCAATCGACAGAAGATTCGGAGCCTTGATGTGGTAGGAAAAATCCGCAGAGAAATTCAGAACCTCAAGCTTTTCAGGCATCCTCATATAATTAAACTGTACCAGGTCATCAGTACACCATCTGATATTTTCATGGTGATGGAATATGTCTCAGGAGGAGAGCTATTTGATTATATCTGTAAGAATGGAAGGCTGGATGAAAAAGAAAGTCGGCGTCTGTTCCAACAGATCCTTTCTGGTGTGGATTATTGTCACAGGCATATGGTGGTCCATAGAGATTTGAAACCTGAAAATGTCCTGCTTGATGCACACATGAATGCAAAGATAGCTGATTTTGGTCTTTCAAACATGATGTCAGATGGTGAATTTTTAAGAACAAGTTGTGGCTCACCCAACTATGCTGCACCAGAAGTAATTTCAGGAAGATTGTATGCAGGCCCAGAGGTAGATATATGGAGCAGTGGGGTTATTCTCTATGCTTTATTATGTGGAACCCTTCCATTTGATGATGACCATGTGCCAACTCTTTTTAAGAAGATATGTGATGGGATCTTCTATACCCCTCAATATTTAAATCCTTCTGTGATTAGCCTTTTGAAACATATGCTGCAGGTGGATCCCATGAAGAGGGCCTCAATCAAAGATATCAGGGAACATGAATGGTTTAAACAGGACCTTCCAAAATATCTCTTTCCTGAGGATCCATCATATAGTTCAACCATGATTGATGATGAAGCCTTAAAAGAAGTATGTGAAAAGTTTGAGTGCTCAGAAGAGGAAGTTCTCAGCTGTCTTTACAACAGAAATCACCAGGATCCTTTGGCAGTTGCCTACCATCTCATAATAGATAACAGGAGAATAATGAATGAAGCCAAAGATTTCTATTTGGCGACAAGCCCACCTGATTCTTTTCTTGATGATCATCACCTGACTCGGCCCCATCCTGAAAGAGTACCATTCTTGGTTGCTGAAACACCAAGGGCACGCCATACCCTTGATGAATTAAATCCACAGAAATCCAAACACCAAGGTGTAAGGAAAGCAAAATGGCATTTAGGAATTAGAAGTCAAAGTCGACCAAATGATATTATGGCAGAAGTATGTAGAGCAATCAAACAATTGGATTATGAATGGAAGGTTGTAAACCCATATTATTTGCGTGTACGAAGGAAGAATCCTGTGACAAGCACTTACTCCAAAATGAGTCTACAGTTATACCAAGTGGATAGTAGAACTTATCTACTGGATTTCCGTAGTATTGATGATGAAATTACAGAAGCCAAATCAGGGACTGCTACTCCACAGAGATCGGGATCAGTTAGCAACTATCGATCTTGCCAAAGGAGTGATTCAGATGCTGAGGCTCAAGGAAAATCCTCAGAAGTTTCTCTTACCTCATCTGTGACCTCACTTGACTCTTCTCCTGTTGACCTAACTCCAAGACCTGGAAGTCACACAATAGAATTTTTTGAGATGTGTGCAAATCTAATTAAAATTCTTGCACAATAAACAGAAAACTTTGCTTATTTCTTTTGCAGCAATAAGCATGCATAATAAGTCACAGCCAAATGCTTCCATTTGTAATCAAGTTATACATAATTATAACCGAGGGCTGGCGTTTTGGAATCGAATTTCGACAGGGATTGGAACATGATTTATAGTTAAAAGCCTAATATCGAGAAATGAATTAAGATCA	MATAEKQKHDGRVKIGHYILGDTLGVGTFGKVKVGKHELTGHKVAVKILNRQKIRSLDVVGKIRREIQNLKLFRHPHIIKLYQVISTPSDIFMVMEYVSGGELFDYICKNGRLDEKESRRLFQQILSGVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYAAPEVISGRLYAGPEVDIWSSGVILYALLCGTLPFDDDHVPTLFKKICDGIFYTPQYLNPSVISLLKHMLQVDPMKRASIKDIREHEWFKQDLPKYLFPEDPSYSSTMIDDEALKEVCEKFECSEEEVLSCLYNRNHQDPLAVAYHLIIDNRRIMNEAKDFYLATSPPDSFLDDHHLTRPHPERVPFLVAETPRARHTLDELNPQKSKHQGVRKAKWHLGIRSQSRPNDIMAEVCRAIKQLDYEWKVVNPYYLRVRRKNPVTSTYSKMSLQLYQVDSRTYLLDFRSIDDEITEAKSGTATPQRSGSVSNYRSCQRSDSDAEAQGKSSEVSLTSSVTSLDSSPVDLTPRPGSHTIEFFEMCANLIKILAQ	YILGDTLGVGTFGKVKVGKHELTGHKVAVKILNRQKIRSLDVVGKIRREIQNLKLFRHPHIIKLYQVISTPSDIFMVMEYVSGGELFDYICKNGRLDEKESRRLFQQILSGVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYAAPEVISGRLYAGPEVDIWSSGVILYALLCGTLPFDDDHVPTLFKKICDGIFYTPQYLNPSVISLLKHMLQVDPMKRASIKDIREHEWF	Known - Refseq
+AMPKa2	SK033	CAMK	CAMKL	AMPK	N	GGTAGCGGCGGCGGCGGCGGCTAGCGGAGCGGCAGGCGGTGGAGCGAGGCCGCGCGCGCCGAAGATGGCTGAGAAGCAGAAGCACGACGGGCGGGTGAAGATCGGACACTACGTGCTGGGCGACACGCTGGGCGTCGGCACCTTCGGCAAAGTGAAGATTGGAGAACATCAATTAACAGGCCATAAAGTGGCAGTTAAAATCTTAAATAGACAGAAGATTCGCAGTTTAGATGTTGTTGGAAAAATAAAACGAGAAATTCAAAATCTAAAACTCTTTCGTCATCCTCATATTATCAAACTATACCAGGTGATCAGCACTCCAACAGATTTTTTTATGGTAATGGAATATGTGTCTGGAGGTGAATTATTTGACTACATCTGTAAGCATGGACGGGTTGAAGAGATGGAAGCCAGGCGGCTCTTTCAGCAGATTCTGTCTGCTGTGGATTACTGTCATAGGCATATGGTTGTTCATCGAGACCTGAAACCAGAGAATGTCCTGTTGGATGCACACATGAATGCCAAGATAGCCGATTTCGGATTATCTAATATGATGTCAGATGGTGAATTTCTGAGAACTAGTTGCGGATCTCCAAATTATACAGCACCTGAAGTCATCTCAGGCAGATTGTATGCAGGTCCTGAAGTTGATATCTGGAGCTGTGGTGTTATCTTGTATGCTCTTCTTTGTGGCACCCTCCCATTTGATGATGAGCATGTACCTACGTTATTTAAGAAGATCCGAGGGGGTGTCTTTTATATCCCAGAATATCTCAATCGTTCTGTCGCCACTCTCCTGATGCATATGCTGCAGGTTGACCCACTGAAACGAGCAACTATCAAAGACATAAGAGAGCATGAATGGTTTAAACAAGGTTTGCCCAGTTACTTATTTCCTGAAGACCCTTCCTATGATGCTAACGTCATTGATGATGAGGCTGTGAAAGAAGTGTGTGAAAAATTTGAATGTACAGAATCAGAAGTAATGAACAGTTTATATAGTGGTGACCCTCAAGACCAGCTTGCAGTGGCTTATCATCTTATCATTGACAATCGGAGAATAATGAACCAAGCCAGTGAGTTCTACCTCGCCTCTAGTCCTCCATCTGGTTCTTTTATGGATGATAGTGCCATGCATATTCCCCCAGGCCTGAAACCTCATCCAGAAAGGATGCCACCTCTTATAGCAGACAGCCCCAAAGCAAGATGTCCATTGGATGCACTGAATACGACTAAGCCCAAATCTTTAGCTGTGAAAAAAGCCAAGTGGCGTCAAGGAATCCGAAGTCAGAGCAAACCGTATGACATTATGGCTGAAGTTTACCGAGCTATGAAGCAGCTGGATTTTGAATGGAAGGTAGTGAATGCATACCATCTTCGTGTAAGAAGAAAAAATCCAGTGACTGGCAATTACGTGAAAATGAGCTTACAACTTTACCTGGTTGATAACAGGAGCTATCTTTTGGACTTTAAAAGCATTGATGATGAAGTAGTGGAGCAGAGATCTGGTTCCTCAACACCTCAGCGTTCCTGTTCTGCTGCTGGCTTACACAGACCAAGATCAAGTTTTGATTCCACAACTGCAGAGAGCCATTCACTTTCTGGCTCTCTCACTGGCTCTTTGACCGGAAGCACATTGTCTTCAGTTTCACCTCGCCTGGGCAGTCACACCATGGATTTTTTTGAAATGTGTGCCAGTCTGATTACTACTTTAGCCCGTTGATCTGTCTCTAGTTTCTTTCTGTTATTGCACTATGAAAATCAGTTATATTCTTTAAATTTTTATCTTACTTTTGGATAATATCCACTGCAATACTAATTGAGAAACATGAATTATTTCCAGGGGCACACAATGCTATTGAAATTACTGAAAACAAAATATCTGACATCTTATTTACTTGTAGAAATCTGTAATTCTATTGTGCCTATGATAAATTCACATAGGCAATATCTTTAATAGGTTAATATCAATGAAGATTTTTAATTACAATAATGAGTTCACTACAGACGATTAACACACCACACTGGCGAACCATCTCAATGTAAGGGTGGTTTGGCAACACCTCCTTGCTTTGCTGTTTGGTGTAGTAAATCTAGTTTACTTCCTAAATTTCAGTAGGCTTTATGCTGTGTTTATCGCCCAATTTATTTTAACAAAAGAAGATTAAAAAGTAAAGAACCACGAGTAAGATATTATTTAAATGTTGAAATCTTAAAACCTGCCTCCAAGATTTCAGAAGCCAAGTTTTTCTAACAGTATTTGTACAAATACTGCCTAGTGTATTCAACAGAAGACTGTGGTCATGTAACAGGTAACCACAATTTTCAGGTTTCTTAAAAACAGCTGTAACTAACTCAGGA	MAEKQKHDGRVKIGHYVLGDTLGVGTFGKVKIGEHQLTGHKVAVKILNRQKIRSLDVVGKIKREIQNLKLFRHPHIIKLYQVISTPTDFFMVMEYVSGGELFDYICKHGRVEEMEARRLFQQILSAVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYTAPEVISGRLYAGPEVDIWSCGVILYALLCGTLPFDDEHVPTLFKKIRGGVFYIPEYLNRSVATLLMHMLQVDPLKRATIKDIREHEWFKQGLPSYLFPEDPSYDANVIDDEAVKEVCEKFECTESEVMNSLYSGDPQDQLAVAYHLIIDNRRIMNQASEFYLASSPPSGSFMDDSAMHIPPGLKPHPERMPPLIADSPKARCPLDALNTTKPKSLAVKKAKWRQGIRSQSKPYDIMAEVYRAMKQLDFEWKVVNAYHLRVRRKNPVTGNYVKMSLQLYLVDNRSYLLDFKSIDDEVVEQRSGSSTPQRSCSAAGLHRPRSSFDSTTAESHSLSGSLTGSLTGSTLSSVSPRLGSHTMDFFEMCASLITTLAR	YVLGDTLGVGTFGKVKIGEHQLTGHKVAVKILNRQKIRSLDVVGKIKREIQNLKLFRHPHIIKLYQVISTPTDFFMVMEYVSGGELFDYICKHGRVEEMEARRLFQQILSAVDYCHRHMVVHRDLKPENVLLDAHMNAKIADFGLSNMMSDGEFLRTSCGSPNYTAPEVISGRLYAGPEVDIWSCGVILYALLCGTLPFDDEHVPTLFKKIRGGVFYIPEYLNRSVATLLMHMLQVDPLKRATIKDIREHEWF	Known - Refseq
+MARK3	SK096	CAMK	CAMKL	MARK	N	GACGGCCCGGGCCAGGCCCGGGATCTAGAACGGCCGTAGGGGGAAGGGAGCCGCCCTCCCCACGGCGCCTTTTCGGAACTGCCGTGGACTCGAGGACGCTGGTCGCCGGCCTCCTAGGGCTGTGCTGTTTTGTTTTGACCCTCGCATTGTGCAGAATTAAAGTGCAGTAAAATGTCCACTAGGACCCCATTGCCAACGGTGAATGAACGAGACACTGAAAACCACACGTCACATGGAGATGGGCGTCAAGAAGTTACCTCTCGTACCAGCCGCTCAGGAGCTCGGTGTAGAAACTCTATAGCCTCCTGTGCAGATGAACAACCTCACATCGGAAACTACAGACTGTTGAAAACAATCGGCAAGGGGAATTTTGCAAAAGTAAAATTGGCAAGACATATCCTTACAGGCAGAGAGGTTGCAATAAAAATAATTGACAAAACTCAGTTGAATCCAACAAGTCTACAAAAGCTCTTCAGAGAAGTAAGAATAATGAAGATTTTAAATCATCCCAATATAGTGAAGTTATTCGAAGTCATTGAAACTCAAAAAACACTCTACCTAATCATGGAATATGCAAGTGGAGGTAAAGTATTTGACTATTTGGTTGCACATGGCAGGATGAAGGAAAAAGAAGCAAGATCTAAATTTAGACAGATTGTGTCTGCAGTTCAATACTGCCATCAGAAACGGATCGTACATCGAGACCTCAAGGCTGAAAATCTATTGTTAGATGCCGATATGAACATTAAAATAGCAGATTTCGGTTTTAGCAATGAATTTACTGTTGGCGGTAAACTCGACACGTTTTGTGGCAGTCCTCCATACGCAGCACCTGAGCTCTTCCAGGGCAAGAAATATGACGGGCCAGAAGTGGATGTGTGGAGTCTGGGGGTCATTTTATACACACTAGTCAGTGGCTCACTTCCCTTTGATGGGCAAAACCTAAAGGAACTGAGAGAGAGAGTATTAAGAGGGAAATACAGAATTCCCTTCTACATGTCTACAGACTGTGAAAACCTTCTCAAACGTTTCCTGGTGCTAAATCCAATTAAACGCGGCACTCTAGAGCAAATCATGAAGGACAGGTGGATCAATGCAGGGCATGAAGAAGATGAACTCAAACCATTTGTTGAACCAGAGCTAGACATCTCAGACCAAAAAAGAATAGATATTATGGTGGGAATGGGATATTCACAAGAAGAAATTCAAGAATCTCTTAGTAAGATGAAATACGATGAAATCACAGCTACATATTTGTTATTGGGGAGAAAATCTTCAGAGGTTAGGCCGAGCAGTGATCTCAACAACAGTACTGGCCAGTCTCCTCACCACAAAGTGCAGAGAAGTGTTTCTTCAAGCCAAAAGCAAAGACGCTACAGTGACCATGCTGGACCAGGTATTCCTTCTGTTGTGGCGTATCCGAAAAGGAGTCAGACCAGCACTGCAGATAGTGACCTCAAAGAAGATGGAATTTCCTCCCGGAAATCAACTGGCAGTGCTGTTGGAGGAAAGGGAATTGCTCCAGCCAGTCCCATGCTTGGGAATGCAAGTAATCCTAATAAGGCGGATATTCCTGAACGCAAGAAAAGCTCCACTGTCCCTAGTAGTAACACAGCATCTGGTGGAATGACACGACGAAATACTTATGTTTGCAGTGAGAGAACTACAGATGATAGACACTCAGTGATTCAGAATGGCAAAGAAAACAGCACTATTCCTGATCAGAGAACTCCAGTTGCTTCAACACACAGTATCAGTAGTGCAGCCACCCCAGATCGAATCCGCTTCCCAAGAGGCACTGCCAGTCGTAGCACTTTCCACGGCCAGCCCCGGGAACGGCGAACCGCAACATATAATGGCCCTCCTGCCTCTCCCAGCCTGTCCCATGAAGCCACACCATTGTCCCAGACTCGAAGCCGAGGCTCCACTACTCTCTTTAGTAAATTAACTTCAAAACTCACAAGGAGTCGCAATGTATCTGCTAAGCAAAAAGATGAAAACAAAGAAGCAAAGCCTCGATCCCTACGCTTCACCTGGAGCATGAAAACCACTAGTTCAATGGATCCCGGGGACATGATGCGGGAAATCCGCAAAGTGTTGGACGCCAATAACTGCGACTATGAGCAGAGGGAGCGCTTCTTGCTCTTCTGCGTCCACGGAGATGGGCACGCGGAGAACCTCGTGCAGTGGGAAATGGAAGTGTGCAAGCTGCCAAGACTGTCTCTGAACGGGGTCCGGTTTAAGCGGATATCGGGGACATCCATAGCCTTCAAAAATATTGCTTCCAAAATTGCCAATGAGCTAAAGCTGTAACCCAGTGATTATGATGTAAATTAAGTAGCAAGTAAAGTGTTTTCCTGAACACTGATGGAAATGTATAGAATAATATTTAGGCAATAACGTCTGCATCTTCTAAATCATGAAATTAAAGTCTGAGGACGAGAGCACGCCTGGGAGCGAAAGCTGGCCTTTTTTCTACGAATGCACTACATTAAAGATGTGCAACCTATGCGCCCCCTGCCCTACTTCCGTTACCCTGAGAGTCGGCGTGTGGCCCCATCTCCATGTGCCTCCCGTCTGGGTGGGTGTGAGAGTGGACGGTATGTGTGTGAAGTGGTGTATATGGAAGCATCTCCCTACACTGGCAGCCAGTCATTACTAGTACCTCTGCGGGAGATCATCCGGTGCTAAAACATTACAGTTGCCAAGGAGGAAAATACTGAATGACTGCTAAGAATTAACCTTAAGACCAGTTCATAGTTAATACAGGTTTACAGTTCATGCCTGTGGTTTTGTGTTTGTTGTTTTGTGTTTTTTTAGTGCAAAAGGTTTAAATTTATAGTTGTGAACATTGCTTGTGTGTGTTTTTCTAAGTAGATTCACAAGATAATTAAAAATTCACTTTTTCTCAGGT	MSTRTPLPTVNERDTENHTSHGDGRQEVTSRTSRSGARCRNSIASCADEQPHIGNYRLLKTIGKGNFAKVKLARHILTGREVAIKIIDKTQLNPTSLQKLFREVRIMKILNHPNIVKLFEVIETEKTLYLIMEYASGGEVFDYLVAHGRMKEKEARSKFRQIVSAVQYCHQKRIVHRDLKAENLLLDADMNIKIADFGFSNEFTVGGKLDTFCGSPPYAAPELFQGKKYDGPEVDVWSLGVILYTLVSGSLPFDGQNLKELRERVLRGKYRIPFYMSTDCENLLKRFLVLNPIKRGTLEQIMKDRWINAGHEEDELKPFVEPELDISDQKRIDIMVGMGYSQEEIQESLSKMKYDEITATYLLLGRKSSELDASDSSSSSNLSLAKVRPSSDLNNSTGQSPHHKVQRSVSSSQKQRRYSDHAGPAIPSVVAYPKRSQTSTADGDLKEDGISSRKSSGSAVGGKGIAPASPMLGNASNPNKADIPERKKSSTVPSSNTASGGMTRRNTYVCSERTTADRHSVIQNGKENSTIPDQRTPVASTHSISSAATPDRIRFPRGTASRSTFHGQPRERRTATYNGPPASPSLSHEATPLSQTRSRGSTNLFSKLTSKLTRSRNVSAEQKDENKEAKPRSLRFTWSMKTTSSMDPGDMMREIRKVLDANNCDYEQRERFLLFCVHGDGHAENLVQWEMEVCKLPRLSLNGVRFKRISGTSIAFKNIASKIANELKL	YRLLKTIGKGNFAKVKLARHILTGREVAIKIIDKTQLNPTSLQKLFREVRIMKILNHPNIVKLFEVIETQKTLYLIMEYASGGKVFDYLVAHGRMKEKEARSKFRQIVSAVQYCHQKRIVHRDLKAENLLLDADMNIKIADFGFSNEFTVGGKLDTFCGSPPYAAPELFQGKKYDGPEVDVWSLGVILYTLVSGSLPFDGQNLKELRERVLRGKYRIPFYMSTDCENLLKRFLVLNPIKRGTLEQIMKDRWI	Known - Refseq
+MARK2	SK120	CAMK	CAMKL	MARK	N	CGGCGGTGGTGGCGGCCATGTTGGGAGCAGCAGGTCCGGCGGCGGCTGCCTGTGTGCCGGGCGCGGAGCAGTGCCGCTGAGGGCAGGGGAGGAGCGAGGCAGGCGGCCGGCTGCGGCGGCAGAGAGTAGGCGGAGCGGCGCGGCCCGGCCGAAAGGCGGCACAGCCCAGCCGGGGGTCGGGGGGGTGCGGTCCGGAGCCGCTCGGAGCCGGCGCGGCCTAGCCCGAGCGGCGCATCCCCGGGCTGGCGTGAGCGGCTGCCCGGCCTCCCCGCACCCCCGGCCGGGGCCCATGCGGCGGGTGCTCCTGCTGTGAGAAGCCCCGCCCGGCCGGGCTCCGCGCCTTCCCTTCCCTCCCTTCCTCCAAGCTTCTCGGTTCCCTCCCCCGAGATACCGGCGCCATGTCCAGCGCTCGGACCCCCCTACCCACGCTGAACGAGAGGGACACGGAGCAGCCCACCTTGGGACACCTTGACTCCAAGCCCAGCAGTAAGTCCAACATGATTCGGGGCCGCAACTCAGCCACCTCTGCTGATGAGCAGCCCCACATTGGAAACTACCGGCTCCTCAAGACCATTGGCAAGGGTAATTTTGCCAAGGTGAAGTTGGCCCGACACATCCTGACTGGGAAAGAGGTAGCTGTGAAGATCATTGACAAGACTCAACTGAACTCCTCCAGCCTCCAGAAACTATTCCGCGAAGTAAGAATAATGAAGGTTTTGAATCATCCCAACATAGTTAAATTATTTGAAGTGATTGAGACTGAGAAAACGCTCTACCTTGTCATGGAGTACGCTAGTGGCGGAGAGGTATTTGATTACCTAGTGGCTCATGGCAGGATGAAAGAAAAAGAGGCTCGAGCCAAATTCCGCCAGATAGTGTCTGCTGTGCAGTACTGTCACCAGAAGTTTATTGTCCATAGAGACTTAAAGGCAGAAAACCTGCTCTTGGATGCTGATATGAACATCAAGATTGCAGACTTTGGCTTCAGCAATGAATTCACCTTTGGGAACAAGCTGGACACCTTCTGTGGCAGTCCCCCTTATGCTGCCCCAGAACTCTTCCAGGGCAAAAAATATGATGGACCCGAGGTGGATGTGTGGAGCCTAGGAGTTATCCTCTATACACTGGTCAGCGGATCCCTGCCTTTTGATGGACAGAACCTCAAGGAGCTGCGGGAACGGGTACTGAGGGGAAAATACCGTATTCCATTCTACATGTCCACGGACTGTGAAAACCTGCTTAAGAAATTTCTCATTCTTAATCCCAGCAAGAGAGGCACTTTAGAGCAAATCATGAAAGATCGATGGATGAATGTGGGTCACGAAGATGATGAACTAAAGCCTTACGTGGAGCCACTCCCTGACTACAAGGACCCCCGGCGGACAGAGCTGATGGTGTCCATGGGTTATACACGGGAAGAGATCCAGGACTCGCTGGTGGGCCAGAGATACAACGAGGTGATGGCCACCTATCTGCTCCTGGGCTACAAGAGCTCCGAGCTGGAAGGCGACACCATCACCCTGAAACCCCGGCCTTCAGCTGATCTAACCAATAGCAGCGCCCAATTCCCATCCCACAAGGTACAGCGAAGCGTGTCGGCCAATCCCAAGCAGCGGCGCTTCAGCGACCAGGCTGGTCCTGCCATTCCCACCTCTAATTCTTACTCTAAGAAGACTCAGAGTAACAACGCAGAAAATAAGCGGCCTGAGGAGGACCGGGAGTCAGGGCGGAAAGCCAGCAGCACAGCCAAGGTGCCTGCCAGCCCCCTGCCCGGTCTGGAGAGGAAGAAGACCACCCCAACCCCCTCCACGAACAGCGTCCTCTCCACCAGCACAAATCGAAGCAGGAATTCCCCACTTTTGGAGCGGGCCAGCCTCGGCCAGGCCTCCATCCAGAATGGCAAAGACAGCCTAACCATGCCAGGGTCCCGGGCCTCCACGGCTTCTGCTTCTGCCGCAGTCTCTGCGGCCCGGCCCCGCCAGCACCAGAAATCCATGTCGGCCTCCGTGCACCCCAACAAGGCCTCTGGGCTGCCCCCCACGGAGAGTAACTGTGAGGTGCCGCGGCCCAGCACAGCCCCCCAGCGTGTCCCTGTTGCCTCCCCATCCGCCCACAACATCAGCAGCAGTGGTGGAGCCCCAGACCGAACTAACTTCCCCCGGGGTGTGTCCAGCCGAAGCACCTTCCATGCTGGGCAGCTCCGACAGGTGCGGGACCAGCAGAATTTGCCCTACGGTGTGACCCCAGCCTCTCCCTCTGGCCACAGCCAGGGCCGGCGGGGGGCCTCTGGGAGCATCTTCAGCAAGTTCACCTCCAAGTTTGTACGCAGAAATCTGTCTTTCAGGTTTGCCAGAAGGAACCTGAATGAACCTGAAAGCAAAGACCGAGTGGAGACGCTCAGACCTCACGTGGTGGGCAGTGGCGGCAACGACAAAGAAAAGGAAGAATTTCGGGAGGCCAAGCCCCGCTCCCTCCGCTTCACGTGGAGTATGAAGACCACGAGCTCCATGGAGCCCAACGAGATGATGCGGGAGATCCGCAAGGTGCTGGACGCGAACAGCTGCCAGAGCGAGCTGCATGAGAAGTACATGCTGCTGTGCATGCACGGCACGCCGGGCCACGAGGACTTCGTGCAGTGGGAGATGGAGGTGTGCAAACTGCCGCGGCTCTCTCTCAACGGGGTTCGATTTAAGCGGATATCGGGCACCTCCATGGCCTTCAAAAACATTGCCTCCAAAATAGCCAACGAGCTGAAGCTTTAACAGGCTGCCAGGAGCGGGGGCGGCGGGGGCGGGCCAGCTGGACGGGCTGCCGGCCGTGCGCCGCCCCACCTGGGCGAGACTGCAGCGATGGATTGGTGTGTCTCCCTGCTGGCACTTCTCCCCTCCCTGGCCCTTCTCAGTTTTCTCCCACATTCACCCCTGCCCAGAGATTCCCCCTTCTCCTCTCCCCTACTGGAGGCAAAGGAAGGGGAGGGTGGATGGGGGGGCAGGGCTCCCCCTCGGTACTGCGGTTGCACAGAGTATTTCGCCTAAACCAAGAAATTTTTTATTACCAAAAAGA	MSSARTPLPTLNERDTEQPTLGHLDSKPSSKSNMIRGRNSATSADEQPHIGNYRLLKTIGKGNFAKVKLARHILTGKEVAVKIIDKTQLNSSSLQKLFREVRIMKVLNHPNIVKLFEVIETEKTLYLVMEYASGGEVFDYLVAHGRMKEKEARAKFRQIVSAVQYCHQKFIVHRDLKAENLLLDADMNIKIADFGFSNEFTFGNKLDTFCGSPPYAAPELFQGKKYDGPEVDVWSLGVILYTLVSGSLPFDGQNLKELRERVLRGKYRIPFYMSTDCENLLKKFLILNPSKRGTLEQIMKDRWMNVGHEDDELKPYVEPLPDYKDPRRTELMVSMGYTREEIQDSLVGQRYNEVMATYLLLGYKSSELEGDTITLKPRPSADLTNSSAQFPSHKVQRSVSANPKQRRFSDQAGPAIPTSNSYSKKTQSNNAENKRPEEDRESGRKASSTAKVPASPLPGLERKKTTPTPSTNSVLSTSTNRSRNSPLLERASLGQASIQNGKDSLTMPGSRASTASASAAVSAARPRQHQKSMSASVHPNKASGLPPTESNCEVPRPSTAPQRVPVASPSAHNISSSGGAPDRTNFPRGVSSRSTFHAGQLRQVRDQQNLPYGVTPASPSGHSQGRRGASGSIFSKFTSKFVRRNLSFRFARRNLNEPESKDRVETLRPHVVGSGGNDKEKEEFREAKPRSLRFTWSMKTTSSMEPNEMMREIRKVLDANSCQSELHEKYMLLCMHGTPGHEDFVQWEMEVCKLPRLSLNGVRFKRISGTSMAFKNIASKIANELKL	YRLLKTIGKGNFAKVKLARHILTGKEVAVKIIDKTQLNSSSLQKLFREVRIMKVLNHPNIVKLFEVIETEKTLYLVMEYASGGEVFDYLVAHGRMKEKEARAKFRQIVSAVQYCHQKFIVHRDLKAENLLLDADMNIKIADFGFSNEFTFGNKLDTFCGSPPYAAPELFQGKKYDGPEVDVWSLGVILYTLVSGSLPFDGQNLKELRERVLRGKYRIPFYMSTDCENLLKKFLILNPSKRGTLEQIMKDRWM	Known - Refseq
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/kinases.xls
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/kinases.xls
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/knownGene.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/knownGene.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/knownGene.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+##gff-version 3
+chr17	UCSC	mRNA	62467934	62469545	.	-	.	ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1
+chr17	UCSC	three_prime_UTR	62467934	62468038	.	-	.	Parent=A00469
+chr17	UCSC	CDS	62468039	62468236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62468490	62468654	.	-	2	Parent=A00469
+chr17	UCSC	CDS	62468747	62468866	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469076	62469236	.	-	1	Parent=A00469
+chr17	UCSC	CDS	62469497	62469506	.	-	0	Parent=A00469
+chr17	UCSC	five_prime_UTR	62469507	62469545	.	-	.	Parent=A00469
+chr9	UCSC	mRNA	90517946	90527968	.	-	.	ID=AB000114;Ontology_term=GO:0007155,GO:0005194;Ontology_term=GO:0005578;Dbxref=AFFX-U95:41031_at,Genbank-protein:BAA19055,Unigene:Hs.94070,AFFX-U133:205907_s_at,Genbank-mRNA:AB000114,Locuslink:4958,Swissprot:Q99983,Swissprot:OMD_HUMAN,Refseq-mRNA:NM_005014,Refseq-protein:NP_005005,PFAM:PF01462,PFAM:00560;Note=osteomodulin;Alias=OMD;
+#.	UCSC	protein	.	.	.	.	.	ID=BAA19055;Parent=AB000114
+chr9	UCSC	three_prime_UTR	90517946	90518841	.	-	.	Parent=AB000114
+chr9	UCSC	CDS	90518842	90519167	.	-	1	Parent=AB000114
+chr9	UCSC	CDS	90520309	90521248	.	-	0	Parent=AB000114
+chr9	UCSC	five_prime_UTR	90521249	90521264	.	-	.	Parent=AB000114
+chr9	UCSC	five_prime_UTR	90527892	90527968	.	-	.	Parent=AB000114
+chr9	BLASTN	match	90518850	90521248	0.0	+	.	ID=blastresult.1;Target=BC046356.1 178 1828;Gap=M78 I759 M1561

Added: trunk/packages/bioperl/branches/upstream/current/t/data/little.largemultifasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/little.largemultifasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/little.largemultifasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+>Human: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+CATCTCTGACTGTGGTCACACACCTGTGCATGCATGCTCCTGACGTCATGCACTGCATGCTGCAACTGCTGCATGCATGCA
+>Mouse: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+TATCTCTGACTGTGGTCACACACCTGTGCATGCATGCTCCTGACGTCATGCACTGCATGCTGCAACTGCTGCATGCATGCA
+>Rat: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+TTTCTCTGACTGTGGTCACACACCTGTGCATGCATGCTCCTGACGTCATGCACTGCATGCTGCAACTGCTGCATGCATGCA
+>Whatever: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+CATCTCTGACTGTGGTCACACACCTGTGCATGCATGCTCCTGACGTCATGCACTGCATGCTGCAACTGCTGCATGCATGCA
+>Extraterrestrial: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+CCCCCCTGACTGTGGTCACACACCTGTGCATGCATGCTCCTGACGTCATGCACTGCATGCTGCAACTGCTGCATGCATGCA
+>GeorgeWBush: 72.0:1018606-3386002; 73.0:0-14850845; 74.0:0-83355922; SPECIAL_hsApr2003_3.0:0-414023;
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/long-names.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/long-names.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/long-names.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B C D E F SupercalifragilisticexpialidociousOTU H;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      charlabels SupercalifragilisticexpialidociousLabel Two Three
+      Four Five;
+      format datatype=protein missing=? gap=-;
+      matrix
+A     --ONE
+B     --ONE
+C     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+SupercalifragilisticexpialidociousOTU     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+       tree SupercalifragilisticexpialidociousTree = (((((((A:1,B:1):1,C:2):1,D:3):1,E:4):1,F:5):1,SupercalifragilisticexpialidociousOTU:6):1,H:7);
+END;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lucy.info
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lucy.info	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lucy.info	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+TaLr1010B10R	CLR	86	505	CLB	11	577	CLN	11	577	CLZ	0	0	CLV	86	523

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lucy.qual
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lucy.qual	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lucy.qual	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,55 @@
+>TaLr1010B10R
+0 0 0 0 0 0 0 0 0 0 15 16 25 32 39 39 34 34 34 32 
+32 29 34 26 26 29 29 32 34 39 35 35 35 39 39 35 35 
+35 35 35 35 39 40 40 40 40 40 45 45 45 40 40 51 51 
+51 45 51 51 51 40 40 42 46 45 45 56 45 45 40 40 40 
+40 40 40 40 40 40 40 40 40 40 40 40 45 45 51 51 51 
+51 42 43 43 56 46 51 40 35 35 35 35 35 35 35 37 40 
+40 40 42 56 44 56 50 56 56 56 46 46 42 42 42 46 37 
+37 34 30 30 42 50 42 40 40 45 45 45 45 40 40 40 40 
+42 50 42 43 43 43 42 42 42 56 50 43 56 40 40 37 37 
+37 43 40 40 40 41 41 41 41 41 41 41 50 56 56 50 50 
+50 56 50 50 50 56 56 56 42 46 46 42 42 42 43 43 56 
+56 56 43 43 41 41 41 41 41 41 51 51 56 56 56 50 50 
+50 46 45 45 45 41 41 41 41 44 45 41 43 43 46 41 56 
+56 56 51 45 45 45 45 41 41 42 41 41 41 41 41 43 43 
+42 42 56 51 43 43 41 38 38 38 42 43 43 56 56 46 42 
+42 42 42 56 50 50 42 42 42 50 50 41 41 42 42 43 42 
+42 41 37 35 38 40 40 38 38 38 40 40 40 41 42 42 42 
+42 42 42 41 41 41 41 41 43 38 40 38 40 40 43 50 50 
+35 35 30 33 32 35 41 41 35 35 32 35 40 41 44 45 45 
+41 40 37 37 35 37 41 42 42 42 42 41 40 40 40 40 40 
+42 43 56 42 40 35 35 35 37 40 40 40 42 42 42 42 42 
+42 36 35 35 35 36 39 42 42 42 42 37 35 35 35 32 32 
+32 40 40 40 40 42 56 50 56 50 50 50 56 43 40 40 40 
+40 36 42 37 48 33 33 33 35 35 32 32 32 32 37 42 34 
+31 31 35 33 33 33 39 35 37 35 32 32 33 33 37 35 35 
+36 35 35 37 44 42 48 48 44 44 42 33 31 24 24 30 24 
+31 28 29 31 31 36 30 33 33 42 30 34 34 36 36 34 34 
+34 30 36 30 30 28 25 25 29 29 29 35 32 35 35 35 35 
+35 33 40 40 32 29 25 23 23 23 19 19 19 20 17 17 17 
+25 25 33 33 35 40 33 35 35 37 37 35 42 42 42 40 40 
+56 56 56 56 47 47 42 40 40 30 30 21 22 22 27 27 28 
+28 26 26 24 16 12 9 14 13 14 16 22 22 19 12 10 12 
+9 9 11 13 12 21 25 25 29 24 25 28 28 28 27 24 11 11 
+10 17 20 29 26 25 25 18 18 18 24 19 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lucy.seq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lucy.seq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lucy.seq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,23 @@
+>TaLr1010B10R 0 0 0 86 505
+ATACGCCAAGCTCGAAATTAACCCTCACTAAAGGGAACAAAAGCTGGAGC
+TCGCGCGCCTGCAGGTCGACACTAGTGGATCCAAAGAATTCGGCACGAGA
+CCTACCCGTTCGTCAAGGAGGGTGTGTCCAACGGAACCCTCAAGCTCGTT
+GGCGGCCACTACGACTTCGTCTCCGGCAAGTTCGAGACATGGGAGCAGTA
+AATCTTCCCCACCGGTTAACTCCTATATATACATGCGAATACAAACGCGT
+ACATATATCAAGATATCGTCCGACCGTCCGACCGACCGATGTGAATCCAA
+TGCCATGGAGTGCGTACTCGTTATTTTCCAGTACTGGATGCCGGGATGGC
+CCGATGTGAATTCGCCATAAGCAATAGAACCTTTTTTCTTTTACCATTTT
+CTGACGAGGAATTGTACTACCATGTGATGCATAATTTGATCGTCCTGTGA
+TCAAAAGACATCATATATAAGTTTAATAATATTTTCATGAACAGTTTACC
+CTTTTAAAAAAAAAAAAAAAAAACTCGAGAGTACTTTTAGAGCGGCCGCG
+GGCCCATCGATTTTCCACCCGGGTGGGGTACCAGGTAAGTGTACCCAATT
+CGCCCTATAGTGAGTCGTATTACAATTCACTGGCCGGCGTTTTACAACGG
+CGGGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTTGAGCACA
+TTCCCCTTTTGCCAGGTTGGCGTAATAACAAAAGAGAGCCCGAACCGATC
+GCCCTTTCCAACAAGTTGGGCAACTTGAATGGGTGAATGGAGAATCCAAA
+TTTTTAAGTGGTGTAAAGGGTGTTAAACTCACTGGGATCCTTAAAGGGTT
+GGGGTGATTATAAAAAACACAAACGCCCAAAGCGTTATATTCCAGGCCCC
+CTCAGGTCCCTTAACAGGGCTGTGTGATGGGTCATTAAACCGCCCCCTAC
+CCACCATTTCGTAGAAGAGGGTTTAACTTGCGTTTTAAAAAGAAAACTCT
+CACACCCCTCTCCTCCTCGGACCCTGGAGAACAATAAATAGAAAAGGCGA
+CGTTGTTTCG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lucy.stderr
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lucy.stderr	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lucy.stderr	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+lucy: ==> dropping TaLr1011A07R
+TaLr1010B10R has PolyA (right).

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.protml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.protml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.protml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,118 @@
+
+Relative Substitution Rate Matrix
+  Ala  157  163  241  172  165  314  519   65  108   92  110  135   41  574 1135 1343   26   42  869
+  157  Arg  132   45  306  897   94  407  951   72  110 1951  125   20  209  293  191  382   70   51
+  163  132  Asn 1602   93  225  169  229 1178  145   40  747   94   22   35 1464  683    8  207   48
+  241   45 1602  Asp   31  152 2260  375  301   34   18   83   54   10   37  170  123   17  134   93
+  172  306   93   31  Cys   27   16  166  217   46   49   15  120  206   37  639  140  339  641  188
+  165  897  225  152   27  Gln 1010   69 1683   23  210  887  132   14  472  161  154   52   76   54
+  314   94  169 2260   16 1010  Glu  332   72   33   29  515   50   13   56   91   97   35   19  138
+  519  407  229  375  166   69  332  Gly   60   16   21   81   38   15   62  554   94  167   16  141
+   65  951 1178  301  217 1683   72   60  His   54  160  157   95  136  335  217  140   39 1746   36
+  108   72  145   34   46   23   33   16   54  Ile  699   61 1410  236   29  120  758   41   92 2869
+   92  110   40   18   49  210   29   21  160  699  Leu   44 1115  751  311  174   80  161   72  525
+  110 1951  747   83   15  887  515   81  157   61   44  Lys  182    7   64  140  286   27   26   37
+  135  125   94   54  120  132   50   38   95 1410 1115  182  Met  128   47   82  606   60   55  882
+   41   20   22   10  206   14   13   15  136  236  751    7  128  Phe   44  280   41  165 1660  179
+  574  209   35   37   37  472   56   62  335   29  311   64   47   44  Pro  808  342   21   34   62
+ 1135  293 1464  170  639  161   91  554  217  120  174  140   82  280  808  Ser 1387   93  186  120
+ 1343  191  683  123  140  154   97   94  140  758   80  286  606   41  342 1387  Thr   24   60  338
+   26  382    8   17  339   52   35  167   39   41  161   27   60  165   21   93   24  Trp  229   73
+   42   70  207  134  641   76   19   16 1746   92   72   26   55 1660   34  186   60  229  Tyr   50
+  869   51   48   93  188   54  138  141   36 2869  525   37  882  179   62  120  338   73   50  Val
+
+Transition Probability Matrix (x1.0e7)  1PAM
+-124462    2710    2370    4235    1163    2282    6582   12978     505    1898
+   4091 -103605    1921     795    2070   12423    1971   10170    7387    1259
+   4243    2279 -128302   28151     629    3124    3536    5717    9155    2554
+   6271     780   23279 -105722     211    2112   47338    9373    2339     601
+   4477    5280    1352     549  -55626     380     338    4139    1689     803
+   4285   15453    3276    2678     185 -104892   21160    1731   13083     412
+   8175    1621    2452   39703     109   13993  -95728    8311     559     586
+  13504    7009    3322    6587    1119     959    6963  -62944     468     285
+   1689   16381   17115    5289    1469   23322    1506    1506 -113378     955
+   2810    1235    2112     601     309     325     699     406     422 -129830
+   2386    1903     585     316     334    2915     603     520    1244   12290
+   2864   33618   10853    1460     100   12284   10795    2033    1217    1074
+   3520    2147    1373     950     810    1830    1056     950     739   24779
+   1077     338     317     169    1394     190     275     380    1056    4139
+  14924    3594     513     646     248    6543    1176    1540    2600     513
+  29541    5056   21278    2987    4322    2228    1910   13846    1689    2106
+  34935    3293    9922    2162     945    2133    2033    2348    1088   13315
+    664    6577     121     302    2293     724     724    4163     302     724
+   1082    1214    3009    2349    4329    1056     396     396   13569    1610
+  22603     883     704    1625    1267     742    2893    3533     282   50403
+
+   2819    2194    1097     560    9885   26472   26768     121     450   19374
+   3396   38891    1010     265    3594    6841    3810    1805     762    1143
+   1238   14891     766     295     609   34143   13614      39    2240    1080
+    552    1657     439     130     634    3964    2453      81    1446    2063
+   1521     296     971    2788     634   14910    2788    1605    6927    4181
+   6469   17678    1071     185    8138    3750    3070     247     824    1195
+    886   10273     409     177     967    2125    1935     163     204    3079
+    639    1621     308     205    1062   12911    1872     788     171    3151
+   4922    3122     771    1836    5766    5068    2791     184   18878     808
+  21508    1218   11436    3184     504    2794   15108     195     991   63973
+ -67412     873    9041   10146    5365    4047    1597     761     780   11706
+   1346  -90530    1475     100    1102    3264    5698     129     286     830
+  34282    3625 -113124    1725     810    1901   12073     282     598   19675
+  23082     148    1035  -64432     760    6526     824     781   17951    3991
+   9574    1275     381     596  -71654   18849    6824      99     364    1391
+   5338    2791     661    3783   13932 -144242   27644     441    2008    2681
+   2463    5698    4911     558    5899   32329 -132323     115     644    7531
+   4948     543     483    2233     362    2172     483  -31919    2474    1629
+   2217     528     449   22438     581    4329    1188    1082  -62933    1109
+  16140     742    7155    2419    1075    2803    6732     346     538 -122884
+
+Transition Probability Matrix (x1.0e5)  1PAM
+      Ala   Arg   Asn   Asp   Cys   Gln   Glu   Gly   His   Ile   Leu   Lys   Met   Phe   Pro   Ser   Thr   Trp   Tyr   Val
+Ala 98755    27    24    42    12    23    66   130     5    19    28    22    11     6    99   265   268     1     4   194
+Arg    41 98964    19     8    21   124    20   102    74    13    34   389    10     3    36    68    38    18     8    11
+Asn    42    23 98717   282     6    31    35    57    92    26    12   149     8     3     6   341   136     0    22    11
+Asp    63     8   233 98943     2    21   473    94    23     6     6    17     4     1     6    40    25     1    14    21
+Cys    45    53    14     5 99444     4     3    41    17     8    15     3    10    28     6   149    28    16    69    42
+Gln    43   155    33    27     2 98951   212    17   131     4    65   177    11     2    81    37    31     2     8    12
+Glu    82    16    25   397     1   140 99043    83     6     6     9   103     4     2    10    21    19     2     2    31
+Gly   135    70    33    66    11    10    70 99371     5     3     6    16     3     2    11   129    19     8     2    32
+His    17   164   171    53    15   233    15    15 98866    10    49    31     8    18    58    51    28     2   189     8
+Ile    28    12    21     6     3     3     7     4     4 98702   215    12   114    32     5    28   151     2    10   640
+Leu    24    19     6     3     3    29     6     5    12   123 99326     9    90   101    54    40    16     8     8   117
+Lys    29   336   109    15     1   123   108    20    12    11    13 99095    15     1    11    33    57     1     3     8
+Met    35    21    14    10     8    18    11    10     7   248   343    36 98869    17     8    19   121     3     6   197
+Phe    11     3     3     2    14     2     3     4    11    41   231     1    10 99356     8    65     8     8   180    40
+Pro   149    36     5     6     2    65    12    15    26     5    96    13     4     6 99283   188    68     1     4    14
+Ser   295    51   213    30    43    22    19   138    17    21    53    28     7    38   139 98558   276     4    20    27
+Thr   349    33    99    22     9    21    20    23    11   133    25    57    49     6    59   323 98677     1     6    75
+Trp     7    66     1     3    23     7     7    42     3     7    49     5     5    22     4    22     5 99681    25    16
+Tyr    11    12    30    23    43    11     4     4   136    16    22     5     4   224     6    43    12    11 99371    11
+Val   226     9     7    16    13     7    29    35     3   504   161     7    72    24    11    28    67     3     5 98771
+Pai 0.077 0.051 0.043 0.052 0.020 0.041 0.062 0.074 0.023 0.052 0.091 0.059 0.024 0.040 0.051 0.069 0.059 0.014 0.032 0.066
+
+Amino Acid Frequencies
+         Model    Data       
+  1  A   0.077   0.091
+  2  R   0.051   0.062
+  3  N   0.043   0.081
+  4  D   0.052   0.063
+  5  C   0.020   0.062
+  6  Q   0.041   0.042
+  7  E   0.062   0.035
+  8  G   0.074   0.073
+  9  H   0.023   0.018
+ 10  I   0.052   0.044
+ 11  L   0.091   0.063
+ 12  K   0.059   0.066
+ 13  M   0.024   0.010
+ 14  F   0.040   0.019
+ 15  P   0.051   0.019
+ 16  S   0.069   0.068
+ 17  T   0.059   0.039
+ 18  W   0.014   0.039
+ 19  Y   0.032   0.045
+ 20  V   0.066   0.062
+5 / 50  JTT model  approx ln L -1047.8 ... -1056.8  diff 9.0
+29	-1047.8 ((((Bosta2,Preen),Homsa),Papan),Equca,Ratno1);
+7	-1048.1 ((((Bosta2,Preen),Papan),Homsa),Equca,Ratno1);
+3	-1053.3 ((((Bosta2,Preen),Equca),Homsa),Papan,Ratno1);
+2	-1056.8 (((Bosta2,(Preen,Papan)),Homsa),Equca,Ratno1);
+9	-1056.8 ((Bosta2,(Preen,(Papan,Homsa))),Equca,Ratno1);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.simple.protml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.simple.protml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/lysozyme6.simple.protml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+5 / 50  JTT model  approx ln L -1047.8 ... -1056.8  diff 9.0
+((((Bosta2,Preen),Homsa),Papan),Equca,Ratno1);
+((((Bosta2,Preen),Papan),Homsa),Equca,Ratno1);
+((((Bosta2,Preen),Equca),Homsa),Papan,Ratno1);
+(((Bosta2,(Preen,Papan)),Homsa),Equca,Ratno1);
+((Bosta2,(Preen,(Papan,Homsa))),Equca,Ratno1);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+************************************************************************
+* Output from:                                Tue Aug 29 19:27:07 2001 *
+*                                                                      *
+*                             MAPMAKER/EXP                             *
+*                            (version 3.0b)                            *
+*                                                                      *
+************************************************************************
+
+data from 'P:\Chad1.TXT' are loaded
+  F2 backcross data  (30 individuals, 18 loci)
+'photo' is on: file is 'P:\Chad1.OUT'
+
+12> cent kos
+centimorgan function: Kosambi
+
+13> map
+===============================================================================
+Map:                        
+  Markers          Distance 
+    1  chad1         0.0 cM
+    2  chad2         0.0 cM
+    3  chad3         3.3 cM
+    4  chad4        10.1 cM
+    5  chad5         6.7 cM
+    6  chad6        13.7 cM
+    7  chad7        10.1 cM
+    8  chad8         3.4 cM
+    9  chad9        21.7 cM
+   10  chad10       21.7 cM
+   11  chad11        0.0 cM
+   12  chad12        3.3 cM
+   13  chad13        3.3 cM
+   14  chad14        0.0 cM
+   15  chad15       10.1 cM
+   16  chad16        0.0 cM
+   17  chad17        0.0 cM
+   18  chad18     ----------
+                   107.8 cM   18 markers   log-likelihood= -50.67
+===============================================================================
+
+14> photo off

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mapmaker.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+Map:
+  mapname=THE_NAME_OF_MAP
+  Markers          Distance 
+  1  bnl5.62a         35.3 cM coremarker
+  2  npi406           15.6 cM EST
+  3  npi439a          27.2 cM
+  4  dpg11            30.7 cM ssr
+  5  bnl7.21a           33 cM
+  6  bnl23b            8.6 cM coremarker
+  7  umc58           107.9 cM
+  8  npi605a          26.5 cM EST
+  9  npi120             19 cM ssr
+  10  npi615           19.5 cM coremarker
+  11  npi407             14 cM
+  12  npi241a          45.2 cM
+  13  bnl6.32       ---------- RflP

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mast.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mast.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mast.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3990 @@
+********************************************************************************
+MAST - Motif Alignment and Search Tool
+********************************************************************************
+	MAST version 3.0 (Release date: 2002/04/02 00:11:59)
+
+	For further information on how to interpret these results or to get
+	a copy of the MAST software please access http://meme.sdsc.edu.
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+	If you use this program in your research, please cite:
+
+	Timothy L. Bailey and Michael Gribskov,
+	"Combining evidence using p-values: application to sequence homology
+	searches", Bioinformatics, 14(48-54), 1998.
+********************************************************************************
+
+
+********************************************************************************
+DATABASE AND MOTIFS
+********************************************************************************
+	DATABASE /home/sao/applications/mask_db/upstream.masked (nucleotide)
+	Last updated on Mon Jul 28 14:24:33 2003
+	Database contains 35386 sequences, 70745108 residues
+
+	Reverse complement strands are not scored.
+
+	MOTIFS acro1.set.fasta.conserved.25.meme (nucleotide)
+	MOTIF WIDTH BEST POSSIBLE MATCH
+	----- ----- -------------------
+	  2    21   ATGAAAAGCAGTGATTCACAG
+	  3    21   AGTGCCCTGGGCCTTTAAGAG
+	  4    23   AGGAGAGAGCTCACACCTCTCCC
+	  8    21   AACTTCAAAATGTCTCTTTGA
+
+	PAIRWISE MOTIF CORRELATIONS:
+	MOTIF     2     3     4
+	----- ----- ----- -----
+	   3   0.20
+	   4   0.32  0.17
+	   8   0.20  0.23  0.23
+	No overly similar pairs (correlation > 0.60) found.
+
+	Random model letter frequencies (from /home/sao/proj_motif/model.bckg):
+	A 0.264 C 0.241 G 0.237 T 0.258 
+********************************************************************************
+
+
+********************************************************************************
+SECTION I: HIGH-SCORING SEQUENCES
+********************************************************************************
+SEQUENCE NAME                      DESCRIPTION                   E-VALUE  LENGTH
+-------------                      -----------                   -------- ------
+NM_148937                          ENSMUSG00000026173               1e-19   2000
+NM_080688                          ENSRNOG00000016361             9.5e-18   2000
+NM_032726                          ENSG00000115556                7.2e-17   2000
+NM_006422                          ENSG00000111254                1.5e-06   2000
+NM_011775                          ENSMUSG00000030911             7.7e-06   2000
+NM_013455                          ENSMUSG00000022622              0.0015   2000
+NM_009581                          ENSMUSG00000042554              0.0045   2000
+NM_022915                          ENSG00000135900                  0.034   2000
+NM_012490                          ENSRNOG00000013584               0.044   2000
+NM_146726                          ENSMUSG00000047041               0.075   2000
+NM_003460                          ENSG00000103310                   0.11   2000
+NM_029236                          ENSMUSG00000037525                0.14   2000
+NM_012069                          ENSG00000101892                    0.2   2000
+NM_007868                          ENSMUSG00000025065                 0.2   2000
+NM_006805                          ENSG00000177733                   0.21   2000
+NM_012038                          ENSMUSG00000019005                0.27   2000
+NM_176924                          ENSMUSG00000047730                0.27   2000
+NM_053967                          ENSRNOG00000007231                0.28   2000
+NM_022484                          ENSG00000146802                   0.31   2000
+NM_016106                          ENSG00000092108                    0.4   2000
+NM_016163                          ENSG00000092108                    0.4   2000
+NM_021475                          ENSMUSG00000022057                0.57   2000
+NM_013462                          ENSMUSG00000031489                0.74   2000
+NM_002227                          ENSG00000162434                   0.83   2000
+NM_000717                          ENSG00000167434                   0.97   2000
+NM_147080                          ENSMUSG00000044506                   1   2000
+NM_001097                          ENSG00000100312                    1.1   2000
+NM_023662                          ENSMUSG00000031592                 1.1   2000
+NM_026576                          ENSMUSG00000016984                 1.4   2000
+NM_021757                          ENSRNOG00000006655                 1.6   2000
+NM_019794                          ENSMUSG00000031701                 1.7   2000
+NM_019343                          ENSRNOG00000003618                 1.7   2000
+NM_144715                          ENSG00000163576                    1.9   2000
+NM_010510                          ENSMUSG00000048806                   2   2000
+NM_009241                          ENSMUSG00000029682                 2.1   2000
+NM_014783                          ENSG00000175306                    2.1   2000
+NM_006056                          ENSG00000171596                    2.5   2000
+NM_175619                          ENSG00000182223                    2.7   2000
+NM_000994                          ENSG00000144713                    2.7   2000
+NM_031922                          ENSG00000135597                    2.7   2000
+NM_007260                          ENSG00000011009                    2.8   2000
+NM_011839                          ENSMUSG00000046212                 2.9   2000
+NM_009531                          ENSMUSG00000030094                 3.5   2000
+NM_011717                          ENSMUSG00000024050                 3.5   2000
+NM_011291                          ENSMUSG00000025922                 3.5   2000
+NM_000324                          ENSG00000112077                    3.6   2000
+NM_026408                          ENSMUSG00000024534                 3.7   2000
+NM_026065                          ENSMUSG00000009550                 3.7   2000
+NM_010324                          ENSMUSG00000025190                 3.7   2000
+NM_028381                          ENSMUSG00000003208                 3.8   2000
+NM_033030                          ENSG00000152430                    3.8   2000
+NM_003215                          ENSG00000135605                    3.9   2000
+NM_172893                          ENSMUSG00000038507                   4   2000
+NM_133867                          ENSMUSG00000040600                 4.1   2000
+NM_027877                          ENSMUSG00000024906                 4.1   2000
+NM_011937                          ENSMUSG00000024437                 4.2   2000
+NM_024993                          ENSG00000176204                    4.2   2000
+NM_002578                          ENSG00000077264                    4.3   2000
+NM_021763                          ENSRNOG00000010533                 4.3   2000
+NM_016957                          ENSMUSG00000051601                 4.4   2000
+NM_172499                          ENSMUSG00000041945                 4.4   2000
+NM_021470                          ENSMUSG00000029130                 4.9   2000
+NM_177001                          ENSMUSG00000047433                 4.9   2000
+NM_021836                          ENSRNOG00000003678                 4.9   2000
+NM_011167                          ENSMUSG00000021345                 5.1   2000
+NM_020610                          ENSMUSG00000031019                 5.2   2000
+NM_026838                          ENSMUSG00000031253                 5.3   2000
+NM_153270                          ENSG00000185915                    5.3   2000
+NM_013255                          ENSG00000128585                    5.3   2000
+NM_031817                          ENSRNOG00000015326                 5.4   2000
+NM_172801                          ENSMUSG00000050201                 5.5   2000
+NM_033072                          ENSMUSG00000025409                 5.6   2000
+NM_177229                          ENSMUSG00000047113                 5.7   2000
+NM_026776                          ENSMUSG00000035120                 5.7   2000
+NM_027279                          ENSMUSG00000041396                 5.8   2000
+NM_052828                          ENSG00000137394                    6.1   2000
+NM_153189                          ENSG00000106304                    6.2   2000
+NM_003117                          ENSG00000106304                    6.2   2000
+NM_004490                          ENSG00000115290                    6.2   2000
+NM_153784                          ENSMUSG00000043782                 6.3   2000
+NM_011741                          ENSMUSG00000037327                 6.4   2000
+NM_002192                          ENSG00000122641                    6.6   2000
+NM_031249                          ENSMUSG00000050614                 6.6   2000
+NM_016704                          ENSMUSG00000022181                 6.6   2000
+NM_146140                          ENSMUSG00000044528                 6.7   2000
+NM_026856                          ENSMUSG00000049606                 6.8   2000
+NM_153572                          ENSMUSG00000041298                 6.9   2000
+NM_024251                          ENSMUSG00000030051                 7.6   2000
+NM_176831                          ENSMUSG00000009768                 7.6   2000
+NM_172599                          ENSMUSG00000037526                 7.9   2000
+NM_028539                          ENSMUSG00000048409                 8.1   2000
+NM_025781                          ENSMUSG00000031953                 8.3   2000
+NM_174924                          ENSG00000169340                    8.4   2000
+NM_007003                          ENSG00000101951                    8.4   2000
+NM_028604                          ENSMUSG00000019792                 8.5   2000
+NM_013166                          ENSRNOG00000012460                 8.5   2000
+NM_153399                          ENSMUSG00000040566                 8.5   2000
+NM_173084                          ENSG00000175447                    8.6   2000
+NM_009412                          ENSMUSG00000027506                 8.6   2000
+NM_005565                          ENSG00000043462                    9.3   2000
+NM_145514                          ENSMUSG00000038733                 9.4   2000
+NM_000319                          ENSG00000139197                    9.5   2000
+NM_139063                          ENSMUSG00000038982                 9.6   2000
+NM_009980                          ENSMUSG00000030970                 9.8   2000
+
+********************************************************************************
+
+
+
+********************************************************************************
+SECTION II: MOTIF DIAGRAMS
+********************************************************************************
+SEQUENCE NAME                      E-VALUE   MOTIF DIAGRAM
+-------------                      --------  -------------
+NM_148937                             1e-19  46_[2]_195_<3>_17_[4]_967_[4]_687
+NM_080688                           9.5e-18  681_[3]_998_[2]_195_[3]_17_[4]_23
+NM_032726                           7.2e-17  73_[4]_1628_[2]_163_[3]_15_[4]_33
+NM_006422                           1.5e-06  94_[4]_71_[4]_105_[8]_1062_[2]_
+                                             361_[8]_198
+NM_011775                           7.7e-06  624_[8]_512_[3]_141_[4]_86_[2]_
+                                             74_[2]_456
+NM_013455                            0.0015  993_[2]_532_[8]_433
+NM_009581                            0.0045  117_[4]_262_[2]_1577
+NM_022915                             0.034  1378_[4]_250_[8]_71_[2]_236
+NM_012490                             0.044  147_[2]_694_[3]_618_[2]_40_[8]_417
+NM_146726                             0.075  1067_[3]_139_[2]_267_[8]_98_[4]_
+                                             41_[2]_281
+NM_003460                              0.11  592_[3]_571_[8]_71_[4]_701
+NM_029236                              0.14  263_[8]_1004_[2]_661_[4]_7
+NM_012069                               0.2  141_[8]_153_[8]_732_[4]_243_[2]_
+                                             117_[2]_108_[2]_378
+NM_007868                               0.2  493_[4]_136_[8]_522_[8]_89_[3]_
+                                             4_[8]_362_[4]_39_[8]_45_[8]_138
+NM_006805                              0.21  281_[8]_1075_[2]_78_[3]_3_[8]_
+                                             431_[4]_25
+NM_012038                              0.27  740_[8]_146_[2]_221_[8]_418_[3]_
+                                             50_[8]_239_[8]_60
+NM_176924                              0.27  396_[3]_1539_[4]_21
+NM_053967                              0.28  148_[8]_19_[4]_799_[8]_969
+NM_022484                              0.31  111_[8]_178_[4]_108_[8]_1293_
+                                             [8]_224
+NM_016106                               0.4  673_[2]_899_[8]_47_[3]_275_[4]_20
+NM_016163                               0.4  673_[2]_899_[8]_47_[3]_275_[4]_20
+NM_021475                              0.57  634_[8]_527_[8]_364_[2]_102_[8]_289
+NM_013462                              0.74  333_[3]_255_[3]_593_[8]_146_[2]_589
+NM_002227                              0.83  582_[8]_356_[8]_911_[4]_86
+NM_000717                              0.97  167_[3]_131_[8]_772_[2]_251_[4]_593
+NM_147080                                 1  355_[2]_292_[3]_909_[3]_5_[8]_
+                                             243_[4]_89
+NM_001097                               1.1  1490_[8]_489
+NM_023662                               1.1  705_[8]_239_[3]_630_[8]_47_[8]_
+                                             124_[4]_148
+NM_026576                               1.4  435_[4]_299_[2]_565_[8]_367_[8]_248
+NM_021757                               1.6  436_[3]_117_[2]_304_[4]_1041_[8]_16
+NM_019794                               1.7  682_[8]_57_[3]_237_[8]_102_[4]_836
+NM_019343                               1.7  228_[8]_40_[3]_58_[2]_655_[8]_
+                                             70_[4]_241_[3]_580
+NM_144715                               1.9  5_[3]_464_[4]_57_[3]_606_[8]_782
+NM_010510                                 2  86_[3]_1326_[2]_153_[2]_277_[8]_74
+NM_009241                               2.1  208_[2]_1224_[3]_294_[4]_209
+NM_014783                               2.1  419_[3]_585_[8]_774_[2]_159
+NM_006056                               2.5  724_[2]_641_[8]_593
+NM_175619                               2.7  563_[2]_38_[4]_167_[8]_36_[8]_1110
+NM_000994                               2.7  327_[4]_87_[8]_302_[8]_944_[8]_254
+NM_031922                               2.7  396_[4]_58_[8]_92_[8]_253_[8]_
+                                             418_[2]_676
+NM_007260                               2.8  711_[8]_915_[2]_332
+NM_011839                               2.9  111_[8]_61_[8]_414_[8]_491_[4]_
+                                             211_[8]_213_[3]_64_[8]_286
+NM_009531                               3.5  90_[8]_729_[2]_1002_[3]_87_[2]_8
+NM_011717                               3.5  146_[8]_277_[8]_589_[4]_152_[2]_750
+NM_011291                               3.5  678_[3]_299_[2]_435_[8]_161_[4]_
+                                             317_[4]_1
+NM_000324                               3.6  1220_[8]_55_[3]_105_[2]_557
+NM_026408                               3.7  28_[2]_151_[4]_27_[4]_227_[4]_
+                                             311_[8]_220_[2]_904
+NM_026065                               3.7  107_[8]_176_[3]_286_[4]_735_[8]_610
+NM_010324                               3.7  605_[2]_42_[2]_1105_[3]_185
+NM_028381                               3.8  34_[8]_1159_[3]_6_[2]_433_[2]_284
+NM_033030                               3.8  189_[3]_218_[8]_1213_[8]_53_[2]_
+                                             3_[4]_170_[3]_26
+NM_003215                               3.9  97_[8]_88_[3]_758_[4]_369_[2]_602
+NM_172893                                 4  44_[3]_340_[2]_74_[3]_791_[3]_
+                                             133_[3]_130_[8]_362
+NM_133867                               4.1  769_[3]_282_[2]_761_[4]_123
+NM_027877                               4.1  114_[8]_370_[2]_57_[8]_1295_[4]_78
+NM_011937                               4.2  418_[4]_623_[4]_913
+NM_024993                               4.2  98_[3]_552_[8]_192_[3]_111_[4]_961
+NM_002578                               4.3  434_[2]_359_[2]_704_[3]_440
+NM_021763                               4.3  788_[2]_130_[2]_361_[2]_173_[4]_462
+NM_016957                               4.4  421_[8]_591_[3]_430_[8]_473_[8]_1
+NM_172499                               4.4  686_[3]_176_[3]_34_[8]_806_[4]_212
+NM_021470                               4.9  621_[4]_380_[8]_256_[2]_244_[4]_411
+NM_177001                               4.9  548_[2]_1093_[3]_317
+NM_021836                               4.9  1433_[4]_61_[8]_462
+NM_011167                               5.1  175_[2]_314_[3]_577_[4]_584_[3]_
+                                             160_[3]_83
+NM_020610                               5.2  1245_[4]_285_[2]_29_[3]_376
+NM_026838                               5.3  8_[2]_55_[3]_1012_[3]_538_[2]_
+                                             45_[2]_106_[8]_110
+NM_153270                               5.3  38_[8]_66_[4]_175_[3]_1026_[8]_609
+NM_013255                               5.3  141_[3]_111_[8]_1249_[4]_122_
+                                             [8]_81_[4]_187
+NM_031817                               5.4  72_[8]_1324_[8]_97_[2]_444
+NM_172801                               5.5  499_[3]_72_[2]_367_[8]_486_[2]_492
+NM_033072                               5.6  460_[2]_458_[3]_190_[8]_399_[3]_409
+NM_177229                               5.7  412_[2]_472_[8]_74_[4]_977
+NM_026776                               5.7  519_[4]_464_[2]_158_[3]_794
+NM_027279                               5.8  60_[2]_237_[8]_1023_[4]_615
+NM_052828                               6.1  814_[2]_215_[8]_137_[4]_737_[2]_11
+NM_153189                               6.2  352_[3]_1004_[8]_602
+NM_003117                               6.2  352_[3]_1004_[8]_602
+NM_004490                               6.2  677_[3]_153_[3]_257_[2]_576_[8]_253
+NM_153784                               6.3  120_[8]_1007_[2]_521_[8]_134_
+                                             [3]_134
+NM_011741                               6.4  1899_[2]_80
+NM_002192                               6.6  28_[4]_183_[3]_96_[8]_1046_[4]_
+                                             202_[8]_126_[8]_147_[2]_21
+NM_031249                               6.6  502_[8]_158_[4]_247_[2]_97_[3]_
+                                             473_[8]_72_[4]_321
+NM_016704                               6.6  587_[8]_93_[3]_219_[2]_432_[4]_583
+NM_146140                               6.7  443_[2]_103_[3]_254_[8]_99_[3]_
+                                             23_[8]_386_[4]_98_[4]_443
+NM_026856                               6.8  397_[3]_368_[8]_269_[2]_170_[4]_
+                                             199_[2]_457_[2]_12
+NM_153572                               6.9  97_[2]_458_[4]_196_[3]_861_[8]_
+                                             102_[4]_156_[8]
+NM_024251                               7.6  73_[2]_64_[4]_508_[8]_658_[4]_609
+NM_176831                               7.6  44_[8]_733_[3]_368_[2]_99_[8]_672
+NM_172599                               7.9  1166_[2]_279_[8]_391_[8]_101
+NM_028539                               8.1  226_[4]_1039_[2]_60_[8]_182_[8]_407
+NM_025781                               8.3  55_[2]_61_[8]_123_[8]_23_[4]_
+                                             823_[2]_624_[3]_163
+NM_174924                               8.4  779_[3]_64_[4]_957_[2]_135
+NM_007003                               8.4  311_[4]_68_[2]_1483_[4]_71
+NM_028604                               8.5  577_[8]_511_[3]_310_[4]_356_[4]_158
+NM_013166                               8.5  605_[8]_749_[2]_604
+NM_153399                               8.5  39_[2]_895_[4]_1022
+NM_173084                               8.6  201_[2]_643_[8]_634_[3]_459
+NM_009412                               8.6  125_[8]_526_[2]_87_[2]_483_[8]_
+                                             83_[4]_70_[8]_498
+NM_005565                               9.3  878_[8]_563_[2]_409_[8]_87
+NM_145514                               9.4  314_[3]_436_[8]_635_[8]_521_[8]_10
+NM_000319                               9.5  118_[2]_227_[3]_160_[8]_1250_
+                                             [4]_159
+NM_139063                               9.6  622_[4]_876_[4]_203_[2]_232
+NM_009980                               9.8  437_[3]_1109_[8]_62_[2]_308_[8]
+
+********************************************************************************
+
+
+
+********************************************************************************
+SECTION III: ANNOTATED SEQUENCES
+********************************************************************************
+
+
+NM_148937
+  ENSMUSG00000026173
+  LENGTH = 2000  COMBINED P-VALUE = 2.87e-24  E-VALUE =    1e-19
+  DIAGRAM: 46_[2]_195_[3]_17_[4]_967_[4]_687
+
+                                                   [2]
+                                                   2.9e-13
+                                                   ATGAAAAGCAGTGATTCACAG
+                                                   +++++++++++++++++++++
+1    GAATTGAGGGCAAAAACAGCGGTATTCCACACAGACTCTGAGCAGAATGAAAAGCAGTGATTCACAGCACACTGC
+
+                                          [3]
+                                          1.9e-13
+                                          AGTGCCCTGGGCCTTTAAGAG
+                                          +++++++++++++++++++++
+226  TTCTGCAACCCCACCCCCACAATTCCTCCTGTTGACCAGTGCCCTGGGCCTTTAAGAGTTGGATAGGCTGGACCT
+
+     [4]
+     2.6e-13
+     AGGAGAGAGCTCACACCTCTCCC
+     +++++++++++++++++++++++
+301  AGGAGAGCGCTCACACCTCTCCCCACCCCACTCCTCCTCTCTGGCTGTAACTCTGCCAGTTTACAGCAGCAAACT
+
+                    [4]
+                    3.6e-05
+                    AGGAGAGAGCTCACACCTCTCCC
+                    +++  +++ +++++ ++++++++
+1276 CACTTTATATGAAACAGGTTAGCCATAACCCTTCTCAGAAGTGAGAACATAGACAAAATAGCTCTCTTCCTTCCT
+
+
+NM_080688
+  ENSRNOG00000016361
+  LENGTH = 2000  COMBINED P-VALUE = 2.67e-22  E-VALUE =  9.5e-18
+  DIAGRAM: 681_[3]_998_[2]_195_[3]_17_[4]_23
+
+
+           [3]
+           9.1e-05
+           AGTGCCCTGGGCCTTTAAGAG
+           ++ + + +  +++++++++++
+676  TGTTACAAAGTTGTAATTCATTAAAAGATGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                                                       [2]
+                                                       2.9e-13
+                                                       ATGAAAAGCAGTGATTCACAG
+                                                       +++++++++++++++++++++
+1651 AGATGAAATGAGGGCAAAAACAGTGGTATTCCACACAGACTCCGACCAGAATGAAAAGCAGTGATTCACAGAGCA
+
+                                              [3]
+                                              4.9e-12
+                                              AGTGCCCTGGGCCTTTAAGAG
+                                              +++++++++++++++++++++
+1876 CCTTTTCTGTCACTCCACCCCCACAACTCCTCCTGTTGACCAGTGCTCTGGGCCTTTAAGAGTTAGATAGGCTGG
+
+         [4]
+         7.7e-13
+         AGGAGAGAGCTCACACCTCTCCC
+         +++++++++++++++++++++++
+1951 ACCTAGGAAAGCGCTCACACCTCTCCCCACTCCACTCCTCTCTGGCTGTA
+
+
+NM_032726
+  ENSG00000115556
+  LENGTH = 2000  COMBINED P-VALUE = 2.04e-21  E-VALUE =  7.2e-17
+  DIAGRAM: 73_[4]_1628_[2]_163_[3]_15_[4]_33
+
+                                                                              [4
+                                                                              6.
+                                                                              AG
+                                                                              +
+1    GAAGAATACTGTTGTATTCATGGGAACATTGAAACAGAATATTTGAGATCTAGACTCTCCCAGAAAATCCACTAT
+
+     ]
+     7e-06
+     GAGAGAGCTCACACCTCTCCC
+     +++ ++ +++++ ++++++++
+76   GAGTGATAACACCCCTCACAGATTACCCAAGAGGGAAGGAGGTTGGGCAGCATTAAATTGATGCAAATAGTGACT
+
+                                                                               [
+                                                                               2
+                                                                               A
+                                                                               +
+1651 GTGGAAACAGACACTATGGAAAGGAGCCAGATTAAATAAGGGCAAAAACAGCCGTATTACATGCAGGAACCAGAA
+
+     2]
+     .9e-13
+     TGAAAAGCAGTGATTCACAG
+     ++++++++++++++++++++
+1726 TGAAAAGCAGTGATTCACAGAGCACTGCAACTGAGGACACTCCCTTCACTTTGAGAGCTACTGCGGGATTGGGGT
+
+                                      [3]                                 [4]
+                                      4.2e-11                             2.1e-1
+                                      AGTGCCCTGGGCCTTTAAGAG               AGGAGA
+                                      ++++ ++++++++++++++++                +++++
+1876 CCACCCCACCCCCACTACTCCCTCACATTGACCAGTGTCCTGGGCCTTTAAGAGTTGGATGGGCTGGACTGGAGA
+
+     
+     2
+     GAGCTCACACCTCTCCC
+     +++++++++++++++++
+1951 GAGCTCACACCTCTCCCCTTCTTACTGCTTCCCTCCGGCTATAACTTGCC
+
+
+NM_006422
+  ENSG00000111254
+  LENGTH = 2000  COMBINED P-VALUE = 4.16e-11  E-VALUE =  1.5e-06
+  DIAGRAM: 94_[4]_71_[4]_105_[8]_1062_[2]_361_[8]_198
+
+
+                        [4]
+                        3.6e-05
+                        AGGAGAGAGCTCACACCTCTCCC
+                        ++ ++ +++++ +++++  ++++
+76   GATCAGCAGGACTGTCTCCAGCACGGAGCTGACACTGTTCAGCTGCCTCAACTGGTAGACGCTCCCAAGAAGTCA
+
+                                           [4]
+                                           1.0e-11
+                                           AGGAGAGAGCTCACACCTCTCCC
+                                           +++++++++++++++++++++++
+151  GAGGCAGCTGTCGGGGCGGAGGTGTCCATGACCTCCCCAGGACAGAGCAAAAACTTCTCCCTCAAGAACACAAAC
+
+                     [8]
+                     2.3e-09
+                     AACTTCAAAATGTCTCTTTGA
+                      ++++++ +++++++++++++
+301  CACTTATATTGTACAATACTTCAGACTGTTTTTTTTAAATACATAAAACTTTATGTTAAAAAACTCTATTAACAT
+
+                                                      [2]
+                                                      3.7e-05
+                                                      ATGAAAAGCAGTGATTCACAG
+                                                      + ++++ ++++ +++ ++ +
+1351 CCAGGAAAGACAAATATATAAGGAGTTGCCAGAGTTGAGGAGGTCCTCCAAGAGACCCAGAGATACAGAAAGAAA
+
+                                                             [8]
+                                                             4.9e-06
+                                                             AACTTCAAAATGTCTCTTT
+                                                             ++  + ++++++  ++ ++
+1726 TCAGGATGATTTTGATTGAATAATTCAAAAGTATCTTCTGATTTTCATGGTTATAAAAGGTTAAAATGAATTCTT
+
+     
+     
+     GA
+     ++
+1801 GATAAACTGTGGCTTATTTTAAGAGAATAGTTTAAATAAGGAAGCTGCAAATTGTGACACAGACAGGGGACAGGC
+
+
+NM_011775
+  ENSMUSG00000030911
+  LENGTH = 2000  COMBINED P-VALUE = 2.18e-10  E-VALUE =  7.7e-06
+  DIAGRAM: 624_[8]_512_[3]_141_[4]_86_[2]_74_[2]_456
+
+
+                             [8]
+                             3.2e-09
+                             AACTTCAAAATGTCTCTTTGA
+                             +++++ +++++++++++++++
+601  ATTTGAACCTCTTCCCAATTACAGAGATTAAAAATGTTTTTTGGAGGGGGGAGATCATATTTTCTAGTCAAAGGT
+
+                                     [3]
+                                     4.0e-05
+                                     AGTGCCCTGGGCCTTTAAGAG
+                                      +++++ +++ + +++++++
+1126 AAAAATACACATGTCAAGCTTTGGCATCGAGAGGTGCTGTGGACATTTAAGAAGTTATTAGGCCATGGAGGATGA
+
+                                                 [4]
+                                                 2.9e-05
+                                                 AGGAGAGAGCTCACACCTCTCCC
+                                                  +++++ + +++ ++  +++ ++
+1276 ATGATGGGACCATCCAGGCTTGGACTTGAGCATCAACAGGAAGGTGGAAAACAATCCCAAATCATCCCCAAAGCC
+
+        [2]
+        4.0e-05
+        ATGAAAAGCAGTGATTCACAG
+         +  +++ ++++++++++ ++
+1426 TGTGTCCAAGTAAGGGACTCAAACACAGAGGACTCTTAAGTGTACTGTCAGGTGGGCTGCAGCTAAACCCTATTG
+
+                            [2]
+                            7.5e-10
+                            ATGAAAAGCAGTGATTCACAG
+                            ++++++  +++++++++++++
+1501 AACATACATGACATGTGTCGGGTATAAGACTAAGGGATTCACAGAATCTCAGATAGGAGATTTACCTTCTCTTGT
+
+
+NM_013455
+  ENSMUSG00000022622
+  LENGTH = 2000  COMBINED P-VALUE = 4.27e-08  E-VALUE =   0.0015
+  DIAGRAM: 993_[2]_532_[8]_433
+
+
+                       [2]
+                       1.4e-05
+                       ATGAAAAGCAGTGATTCACAG
+                       + +++++ ++++ ++++ +++
+976  CGGGCCTTGATGTGGAAAAAAAAAAACAGTTATTCTCACATATGGAGAAAGGAAGTCTGCTTTCTGATGCTTAGG
+
+                                                   [8]
+                                                   1.7e-12
+                                                   AACTTCAAAATGTCTCTTTGA
+                                                   +++++++++++++++++++++
+1501 ACTCAGCAGTGGGCAAGAAGGTAGGCTTTGAAGTCATAAGTCAAGCAACTTCAAAATGGCTCTTTGAAAGTGTTC
+
+
+NM_009581
+  ENSMUSG00000042554
+  LENGTH = 2000  COMBINED P-VALUE = 1.26e-07  E-VALUE =   0.0045
+  DIAGRAM: 117_[4]_262_[2]_1577
+
+
+                                               [4]
+                                               2.6e-09
+                                               AGGAGAGAGCTCACACCTCTCCC
+                                               ++++++++ ++ ++++++++ ++
+76   CACACTTTCGTTCTGAATTTACGTTTATAGGGATGAAACTTCAGGAAAGAAAAGACACTTCATAGTGTAGAAACA
+
+                                [2]
+                                5.9e-09
+                                ATGAAAAGCAGTGATTCACAG
+                                + ++++++++++++++++ ++
+376  NNNNNNNNNNNNNTCCCAGAAAGCATAAAAGGAGCAAGTGATTCATAGACTGCTTGCTTTCTGCAGGTGCAAACA
+
+
+NM_022915
+  ENSG00000135900
+  LENGTH = 2000  COMBINED P-VALUE = 9.57e-07  E-VALUE =    0.034
+  DIAGRAM: 1378_[4]_250_[8]_71_[2]_236
+
+
+                                 [4]
+                                 5.2e-06
+                                 AGGAGAGAGCTCACACCTCTCCC
+                                 ++++++++ +++ ++ + ++  +
+1351 AAAAATTGTACTGTATTAACAGTTTGGTAGGAAAGCAATAGAATTACATTGTTAAAGAGTGTTAACATAAAGAAG
+
+      [8]
+      1.1e-08
+      AACTTCAAAATGTCTCTTTGA
+      +++++ ++++++++++ +++
+1651 TAAATTAAAAATGTTTTCTTTTTTTGAGGATTTAAATGTTTCCTTAACCTGAAAGCGAAACAGATTTTTGTTTGT
+
+                       [2]
+                       5.3e-06
+                       ATGAAAAGCAGTGATTCACAG
+                         +++++++++ ++ + ++++
+1726 TTTGCTGTAACGAGAGAGGAAGGAACAAGAGAGTGACAGGGTGAAGCAACTTCTCCCAGAAACTTTTGGGCGCAA
+
+
+NM_012490
+  ENSRNOG00000013584
+  LENGTH = 2000  COMBINED P-VALUE = 1.25e-06  E-VALUE =    0.044
+  DIAGRAM: 147_[2]_694_[3]_618_[2]_40_[8]_417
+
+
+                                                                             [2]
+                                                                             9.4
+                                                                             ATG
+                                                                              ++
+76   TTACTGGAGATATGCTTATAGCCCAAGAGGGAGGATGAGACCTTCTAGTCACTTACAGAGGAAAGACAAAACCTA
+
+     
+     e-05
+     AAAAGCAGTGATTCACAG
+     ++++++++++ +++ ++
+151  GAAAGCAGTGCTTCTCAACCTTCCTTTTTTTTTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                                          [3]
+                                          4.2e-05
+                                          AGTGCCCTGGGCCTTTAAGAG
+                                          ++ +++ ++++++++ + + +
+826  AGAACGTGTGACAAGGTGCCACGCAGAAGAAAGAAATAAGACCTTGGGCCCTGATGTGGAAAAGAACAGTTCTCA
+
+      [2]                                                          [8]
+      6.0e-05                                                      7.2e-11
+      ATGAAAAGCAGTGATTCACAG                                        AACTTCAAAATGT
+        +++++ + +++++ + +++                                        +++++++++++++
+1501 TCAAGAAGTATGTGATACTCAGAAATGGGCAAGAAGGTAGGCTTTGAAATCATAAGTCAAGAAACTTCAAAATGG
+
+     
+     
+     CTCTTTGA
+     +++ ++++
+1576 CTCCTTGAAAGTGTTCTGGTAAACCCCCCCCCCCCCACGCCCCTGTACTGATGATCCCATCAGTCAGCAGAACCC
+
+
+NM_146726
+  ENSMUSG00000047041
+  LENGTH = 2000  COMBINED P-VALUE = 2.12e-06  E-VALUE =    0.075
+  DIAGRAM: 1067_[3]_139_[2]_267_[8]_98_[4]_41_[2]_281
+
+
+                      [3]
+                      5.6e-05
+                      AGTGCCCTGGGCCTTTAAGAG
+                      + +   ++++++ +++++++
+1051 AGTTGTCAACAATTACTATTTAGATGGGCTCTTAAAATAATCTTTTATGGAACTCATGAGTTGGTGCCAAACAAC
+
+                                [2]
+                                8.0e-05
+                                ATGAAAAGCAGTGATTCACAG
+                                + ++++++++ +  +  ++++
+1201 TGGGACTAGTGAGATCATGGACCTTAGAAAAGAAGCAATTTCCGACACTTTGCTAGGCCAGCATTAATTCCTTAC
+
+                    [8]
+                    5.0e-05
+                    AACTTCAAAATGTCTCTTTGA
+                     + ++++++++ + +++ ++
+1501 AGGCTTTTGTAATCATGTTTCAAAATTTATTTCTTTAATGGTTATTCAAATTTTTGAACAATGGGCTTAGGTAGA
+
+                                                                [4]
+                                                                2.3e-07
+                                                                AGGAGAGAGCTCACAC
+                                                                 ++++++++++  ++
+1576 AGCATAGGGACTGAGCAGCTGCTTAATTAGAGAATAGCAGTGACTTGAACTCTCAGAAATGGACAGAGAAGCAAG
+
+                                                     [2]
+                                                     1.4e-07
+     CTCTCCC                                         ATGAAAAGCAGTGATTCACAG
+     + +++++                                         + +++++++ +++++ ++ ++
+1651 CACACACACAATATAAAACTGAACAGAATGGTAAGTGGCAGAACAGAGAGAAAAAGAGGTGATACATAGAAAGAC
+
+
+NM_003460
+  ENSG00000103310
+  LENGTH = 2000  COMBINED P-VALUE = 3.14e-06  E-VALUE =     0.11
+  DIAGRAM: 592_[3]_571_[8]_71_[4]_701
+
+
+                                                                        [3]
+                                                                        1.5e-09
+                                                                        AGTGCCCT
+                                                                        ++++++ +
+526  TTAGTTATGATATACAATGCAACTTGATAAAGTCACACAGAATTTTGAATCTAAAACTATTTAGCAGAATGCCTT
+
+     
+     
+     GGGCCTTTAAGAG
+     ++ ++++++++++
+601  GGATCCTTAAAAGAAGGTCCAAAGAAGGCTTTTGATGAGACAATTTGAGCTTTTGACCCTATTACTCTAGAAATG
+
+                                                                [8]
+                                                                7.4e-06
+                                                                AACTTCAAAATGTCTC
+                                                                +++++  ++++ ++++
+1126 AGCTGACATTTAAATCCAGCCACTAGAAGCTTTCATCACTTATTCTTCCTGCCATACGTAGATTTGAAATATTTC
+
+     
+     
+     TTTGA
+      +++
+1201 CTTGCAAAAATTTTAGACTCAAGTTTTCTAGAATACAAGTGAATTTCCATATATCTTGAAGGTCTCTAAGAGCTG
+
+      [4]
+      6.3e-05
+      AGGAGAGAGCTCACACCTCTCCC
+      ++ + + ++++ +++++  +++
+1276 CAGTATAAAGCAGACACTAGTCATGTTGATGACATAATTACTTAGAATAAGACTGGCTACCCTCAGGACGAAGGC
+
+
+NM_029236
+  ENSMUSG00000037525
+  LENGTH = 2000  COMBINED P-VALUE = 3.92e-06  E-VALUE =     0.14
+  DIAGRAM: 263_[8]_1004_[2]_661_[4]_7
+
+
+                                           [8]
+                                           7.8e-06
+                                           AACTTCAAAATGTCTCTTTGA
+                                            ++++ +++++++++  +++
+226  TCTTTTCCCATCCTTCCATTCCCACTGTCTCCTAACCCTGATTTAAAATGTTTGGTGGCCCTCAAACTGAACATG
+
+                  [2]
+                  1.0e-05
+                  ATGAAAAGCAGTGATTCACAG
+                  + ++ +++++ ++ + ++ ++
+1276 AAATATATAAACTAAGACAGCCAAGGCTACATAGATACCTTGTCTTAAAAGACAAACAAATAAACAAAAAGTCCC
+
+                         [4]
+                         6.6e-09
+                         AGGAGAGAGCTCACACCTCTCCC
+                         ++++++++ ++ ++++  +++++
+1951 AGAAACCAGATCAGATTTTAAGGAGAGCAAAGAAACGACACAGCCTTTTT
+
+
+NM_012069
+  ENSG00000101892
+  LENGTH = 2000  COMBINED P-VALUE = 5.75e-06  E-VALUE =      0.2
+  DIAGRAM: 141_[8]_153_[8]_732_[4]_243_[2]_117_[2]_108_[2]_378
+
+
+                                                                       [8]
+                                                                       3.3e-06
+                                                                       AACTTCAAA
+                                                                       + +++++++
+76   ACCTGTGCCCAAGAAATGAGCATGTAGTTTTCTAGACTTTAATAATGTGCCCCCGAATCTGCTGTTATATTCAAA
+
+     
+     
+     ATGTCTCTTTGA
+      + ++++ +++
+151  GTCTCTCCTTTCTCCCTCCCCTTGTCCCTTTGTAGCCACTGCTTCATTTGCATCCGCATTAGCCACTTGTGGCTC
+
+                    [8]
+                    7.5e-07
+                    AACTTCAAAATGTCTCTTTGA
+                    ++++++++++ + +++ +++
+301  TAGTGTTTTGAGTGTAAATTCATACAGATTTCTGTCTTCAACATCCTATGGGATTTGGTTATTTTACCCCACCCA
+
+                       [4]
+                       7.1e-07
+                       AGGAGAGAGCTCACACCTCTCCC
+                       +++++ ++ ++++++++ ++ ++
+1051 ATGGGGTTTGATTTGTGAAGGAAGGATCACAAACTCCATCCAGGCCTTTTAGCCACCCAGGCTCCAGTAGGGTGG
+
+                                                                [2]
+                                                                3.7e-06
+                                                                ATGAAAAGCAGTGATT
+                                                                +++++ ++++++++++
+1276 GGATAAATATAGATCTCAACCACTACAGAAATTTGGCCAGGATTTGGAGGGGTGTGTGTATGAGGGGCAGTGACT
+
+     
+     
+     CACAG
+      +  +
+1351 GATGGGGCCACTCTGACATGTGGGCTAGAAAAAGGCTTCCTTATGTCCAAGACCCTCTTTGGGGCAGGGAGGAAT
+
+                                                    [2]
+                                                    7.7e-06
+                                                    ATGAAAAGCAGTGATTCACAG
+                                                    + +++   ++ +++++ ++++
+1426 GAAGTAGGAGCCTGGCTTCTAAAATCAGAAAGAATAATCCACATTTTAAGAAGCTCATGGATTGACACAAATAAG
+
+                               [2]
+                               3.7e-05
+                               ATGAAAAGCAGTGATTCACAG
+                               + ++++++++++ ++++   +
+1576 TCCTTTTGCATGAGATCAGCCTTATGAAAAAAGCAAGTTATTCTTGGCCATCTCCCCTCACCCCCAACTCCCACC
+
+
+NM_007868
+  ENSMUSG00000025065
+  LENGTH = 2000  COMBINED P-VALUE = 5.78e-06  E-VALUE =      0.2
+  DIAGRAM: 493_[4]_136_[8]_522_[8]_89_[3]_4_[8]_362_[4]_39_[8]_45_[8]_138
+
+
+                                                [4]
+                                                2.2e-05
+                                                AGGAGAGAGCTCACACCTCTCCC
+                                                 +++++ +  +  +++++++ +
+451  TTCTACCTTGCACACAAACTTCTGCATTTAAAGTGACTGTGGCTGGAAACATGAGCCACTTCTTCAAGATTACTA
+
+                                                         [8]
+                                                         1.0e-07
+                                                         AACTTCAAAATGTCTCTTTGA
+                                                         +++++++ ++++  +++++ +
+601  AGGGGAGGGAAGGAATTTTTTTTCAGTCACACAGATCAATTCAATTTGTCGGAGCTTCAGAATGAATTTTTAAAT
+
+                                                                           [8]
+                                                                           5.5e-
+                                                                           AACTT
+                                                                           +++
+1126 CTATGCAACAGGATCAGTGCTGTAGTCCCCGGTTCAAGCTGAAAATGTTGCACAGGAAGACATATCATGTAAAGG
+
+     
+     05
+     CAAAATGTCTCTTTGA
+      ++ +++ ++++++++
+1201 TATTATGATTTTTTTATCTGTGTCTATACAAACAGAACTTAATGTCATAACTCTTGTCATAACCCTTACCAAGGT
+
+                                   [3]                      [8]
+                                   1.1e-06                  6.3e-05
+                                   AGTGCCCTGGGCCTTTAAGAG    AACTTCAAAATGTCTCTTTG
+                                   ++++  +++++  ++++++ +     ++++  ++++  ++ ++++
+1276 CCTATGGACTCTTTACCTGTCAAGTCTGAAAATATGATGGTATTTTAAATGGGAATGCTTAGTAATCATTGTTTG
+
+     
+     
+     A
+     +
+1351 ATGTTGATGTATCTATGATGCCATAGTACCAAGGTGAACTGCTGTCCGTTTTCAAGAAGATGCTCTTGTCTCATT
+
+                                                                    [4]
+                                                                    1.9e-05
+                                                                    AGGAGAGAGCTC
+                                                                    ++++++ + +++
+1651 AAATTATAAAATGTGCTATCTGTATATTAATTCTATTATATAATTATAATAACTTGGCAAGGCAGGACACCAAAA
+
+                                                       [8]
+                                                       5.6e-05
+     ACACCTCTCCC                                       AACTTCAAAATGTCTCTTTGA
+     +++ +  ++++                                        ++ + +++++ ++++++++
+1726 ACATTATTCAGGACATATTTTCTTGTTAGTTGCCAAGATAGAATTTCTTTGAACTAATAATTTTTTTTTTTTTTT
+
+                                              [8]
+                                              3.9e-05
+                                              AACTTCAAAATGTCTCTTTGA
+                                               +++++  +++++ ++++ +
+1801 AGTTCTTCAAATTCACATTCCCAGCAAGTCTCTTTGTTTGGTAATTCTGAATGTGTTTTATTGCTTTCTGGTAGG
+
+
+NM_006805
+  ENSG00000177733
+  LENGTH = 2000  COMBINED P-VALUE = 5.92e-06  E-VALUE =     0.21
+  DIAGRAM: 281_[8]_1075_[2]_78_[3]_3_[8]_431_[4]_25
+
+
+                                                             [8]
+                                                             2.5e-05
+                                                             AACTTCAAAATGTCTCTTT
+                                                             ++++++ ++ + +  ++++
+226  TAAAAAATGGTTCNNNNNNNNNNNNNNNNNNNNNNNNNNNGCCCAATCTTTCAGAAAGCTTCTTAGTATAGCTTT
+
+     
+     
+     GA
+     ++
+301  GAGCCTTCAACGTCAACATGTAAGTCAAATATTTAAGTTAAATATTAACATCAAGTTAAGCATACTAGATTAAAA
+
+                                [2]
+                                6.2e-05
+                                ATGAAAAGCAGTGATTCACAG
+                                  +++++ ++ + +++ ++++
+1351 GACCTCTTAAGTTTCTCTAAGTACACATCAAGAATAAAGAATTAACACGTAAAGTCTTAAACAAATATCCCTTAA
+
+                                                        [3]
+                                                        2.0e-06
+                                                        AGTGCCCTGGGCCTTTAAGAG
+                                                        ++ ++ +++ ++ +++++++
+1426 AAAACGGTACGGAATGGATCCTAGAAAAAAAATGTTAGACATGTACGGTCAAACACAATGATTTATTAAAAATAA
+
+     [8]
+     1.6e-07
+     AACTTCAAAATGTCTCTTTGA
+     +++ + ++++++ ++++++++
+1501 AACGTAAAAATGATTTTTGTACATATGCTTCCAAATTTCAGGCATGGGATCCAAGTAGATTTCATAGAAAACGCT
+
+       [4]
+       1.5e-05
+       AGGAGAGAGCTCACACCTCTCCC
+        +++++++++++ + ++  + ++
+1951 GTTGGAAAGAGCTACCCCTATAGCCACTCCCAGGCATTTTAAATTTTCTT
+
+
+NM_012038
+  ENSMUSG00000019005
+  LENGTH = 2000  COMBINED P-VALUE = 7.63e-06  E-VALUE =     0.27
+  DIAGRAM: 740_[8]_146_[2]_221_[8]_418_[3]_50_[8]_239_[8]_60
+
+
+                                                                      [8]
+                                                                      3.4e-06
+                                                                      AACTTCAAAA
+                                                                      +++  +++++
+676  GAGGACACACATGGCTTTTCTTCAGTGCTGAGCGGGACAGTCTACACTAATAAAAAAGAGGACTAAAAAGCAAAC
+
+     
+     
+     TGTCTCTTTGA
+     +++ ++++++
+751  TGGATTTTTGTTCACACAGCATCTACATCTTCTAAAAAGAACGAGTGGCATGGAGGAGCCGAGCAGTGGTGGCAC
+
+            [2]
+            1.0e-07
+            ATGAAAAGCAGTGATTCACAG
+            + ++ ++++++++++ +++++
+901  GGGTTCCAGGATAGCCAGGGATACACAGAAAATCCTGTCTCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                             [8]
+                             6.7e-05
+                             AACTTCAAAATGTCTCTTTGA
+                              + ++  + ++++++++++++
+1126 CTATTCCCTGCTCTTATCTTGCTGTGTTTTCTTCTGTTTCTTGGAGTTTGACATTCATGAGTGTGTCGTCTGTCT
+
+                  [3]
+                  1.0e-05
+                  AGTGCCCTGGGCCTTTAAGAG
+                  ++++++ +++ +++   ++++
+1576 GCACTCCATTAAAAGTGCTTTGGATCAAGGAAAGAATGCTGCAGTGTTTGATAAAAGGAAGCATGGGCTCTACAC
+
+              [8]
+              1.3e-05
+              AACTTCAAAATGTCTCTTTGA
+               ++++ +++  + ++++ ++
+1651 AAGTTAGTTTAATTAAAAGGGATTTTGTGTTTTTGTTTTGTGGTCACAGCGCCTGCCCCTGAGAGGCTTAATAGC
+
+                                                 [8]
+                                                 6.1e-05
+                                                 AACTTCAAAATGTCTCTTTGA
+                                                  ++++  ++ +   +++++++
+1876 GTCAGTTTTATTCCATCCTGAAAAATAAAAAGTAGCATGCCTTTTACTTTGAATTTAATCTTGGATGACCCCACT
+
+
+NM_176924
+  ENSMUSG00000047730
+  LENGTH = 2000  COMBINED P-VALUE = 7.76e-06  E-VALUE =     0.27
+  DIAGRAM: 396_[3]_1539_[4]_21
+
+
+                          [3]
+                          1.2e-05
+                          AGTGCCCTGGGCCTTTAAGAG
+                          ++++ +++++ + +++ +++
+376  GAAATTTTATATTTAATTTTTAATAGCATGGATTTTTTAAAAATGTCTGTTGCAACTAGTTCAGAGACTTCAAGC
+
+           [4]
+           2.0e-09
+           AGGAGAGAGCTCACACCTCTCCC
+           ++++++++ +++++++   ++ +
+1951 GTCTGAAGGAGAGATCACACACGAGTCTGACCACTGTCACCTCTCTGCAG
+
+
+NM_053967
+  ENSRNOG00000007231
+  LENGTH = 2000  COMBINED P-VALUE = 7.90e-06  E-VALUE =     0.28
+  DIAGRAM: 148_[8]_19_[4]_799_[8]_969
+
+
+                                                                              [8
+                                                                              2.
+                                                                              AA
+                                                                              ++
+76   NNNGTTCTTTTTACATAGTGTATTCTGCTGTCACTTCCCCCTCCAACTCCTCCCAGATCTTTCTCACATACCTAA
+
+     ]                                     [4]
+     7e-10                                 5.6e-05
+     CTTCAAAATGTCTCTTTGA                   AGGAGAGAGCTCACACCTCTCCC
+     ++++++++++++++++++                    ++ ++++ ++++++ +++++ ++
+151  CTTCATACTGTCTCTTTTTCTTCCTCTCTTTAAAAAACAGAAGAGTGAAAAACCTTCAAACTNNNNNNNNNNNNN
+
+                                        [8]
+                                        5.3e-05
+                                        AACTTCAAAATGTCTCTTTGA
+                                        +++   ++++++ +++ +++
+976  TATAGAAGATGGTAGGGGCAAGGGACACTCTTACTAACAATATAATGATTTATTGTTTTATTAGATTTATTATTG
+
+
+NM_022484
+  ENSG00000146802
+  LENGTH = 2000  COMBINED P-VALUE = 8.65e-06  E-VALUE =     0.31
+  DIAGRAM: 111_[8]_178_[4]_108_[8]_1293_[8]_224
+
+
+                                         [8]
+                                         1.4e-05
+                                         AACTTCAAAATGTCTCTTTGA
+                                          ++++ + +++ +  +++++
+76   AATCTACCCACTAATGCATTAAATATAAGTGCATTTTAATTAAGACTTTAATTTTTTTAAAGCATAACTGGTCAT
+
+               [4]
+               1.7e-05
+               AGGAGAGAGCTCACACCTCTCCC
+                ++ ++++++++ +++++  + +
+301  TGAAAAGTCATGGGGAGAGAAAGAACTTGGCTCCTTATTTATTTTGCAACAGTGTGTATGCTATTNNNNNNNNNN
+
+                                                                       [8]
+                                                                       6.6e-10
+                                                                       AACTTCAAA
+                                                                       +++++++++
+376  NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTAAAATAAATAAATAATGGTTAAAAAAAGAAAATAAATTCATA
+
+     
+     
+     ATGTCTCTTTGA
+     ++++++++++++
+451  CTGTTTTTTTTATTCTATTGAAAAAAAAGATGATGTTTTTAGAGCTAGTGGTTTTGTGGTTTTAGCAAGGTATCA
+
+                                   [8]
+                                   5.0e-05
+                                   AACTTCAAAATGTCTCTTTGA
+                                    ++++ + + +  +++++++
+1726 CAAATAGCTGAGAAATTTAAAAGGAGATTTTACTTAACATTAACTTTTTTTAAGCTAAAAGCTCATAATTTTAAC
+
+
+NM_016106
+  ENSG00000092108
+  LENGTH = 2000  COMBINED P-VALUE = 1.12e-05  E-VALUE =      0.4
+  DIAGRAM: 673_[2]_899_[8]_47_[3]_275_[4]_20
+
+
+                                                                              [2
+                                                                              7.
+                                                                              AT
+                                                                              +
+601  NNNNNNNNNNNNNNAGAGATTATGTAATAAGATAACCTGTTTTAAATGTCACAAAAATAAATTTTTATTAATGAA
+
+     ]
+     3e-08
+     GAAAAGCAGTGATTCACAG
+     +++++++++++ +++++++
+676  AAAAGCCAGTGTTTCACACTATAAAACAATATATAATCGTTAGGCTAACCATTTCATTTATATAATTGTGAAAGT
+
+                       [8]
+                       3.2e-05
+                       AACTTCAAAATGTCTCTTTGA
+                       +++ + ++++++++ + +++
+1576 CACTGAGAAACGAGAGCTAACGTTAAAATGTTGCCTGTGTTTCAGGAGCTTCATTTACTTCAGTCCTGTGGTCCT
+
+                [3]
+                1.9e-05
+                AGTGCCCTGGGCCTTTAAGAG
+                ++ + +++   ++++++++ +
+1651 TATACTCCTAGAGCATTATCACCCTTTAAGTGCTCTGTAAGTCACCAAACCAACAGTTCAGTAATTAGGTTCTTA
+
+            [4]
+            1.4e-05
+            AGGAGAGAGCTCACACCTCTCCC
+             ++ +++ +++  +++ +++ ++
+1951 TCCGGGGCGGTAAGGGCAGCCACGTCATCCCCCCGCTCCGCTCCCCAGCC
+
+
+NM_016163
+  ENSG00000092108
+  LENGTH = 2000  COMBINED P-VALUE = 1.12e-05  E-VALUE =      0.4
+  DIAGRAM: 673_[2]_899_[8]_47_[3]_275_[4]_20
+
+
+                                                                              [2
+                                                                              7.
+                                                                              AT
+                                                                              +
+601  NNNNNNNNNNNNNNAGAGATTATGTAATAAGATAACCTGTTTTAAATGTCACAAAAATAAATTTTTATTAATGAA
+
+     ]
+     3e-08
+     GAAAAGCAGTGATTCACAG
+     +++++++++++ +++++++
+676  AAAAGCCAGTGTTTCACACTATAAAACAATATATAATCGTTAGGCTAACCATTTCATTTATATAATTGTGAAAGT
+
+                       [8]
+                       3.2e-05
+                       AACTTCAAAATGTCTCTTTGA
+                       +++ + ++++++++ + +++
+1576 CACTGAGAAACGAGAGCTAACGTTAAAATGTTGCCTGTGTTTCAGGAGCTTCATTTACTTCAGTCCTGTGGTCCT
+
+                [3]
+                1.9e-05
+                AGTGCCCTGGGCCTTTAAGAG
+                ++ + +++   ++++++++ +
+1651 TATACTCCTAGAGCATTATCACCCTTTAAGTGCTCTGTAAGTCACCAAACCAACAGTTCAGTAATTAGGTTCTTA
+
+            [4]
+            1.4e-05
+            AGGAGAGAGCTCACACCTCTCCC
+             ++ +++ +++  +++ +++ ++
+1951 TCCGGGGCGGTAAGGGCAGCCACGTCATCCCCCCGCTCCGCTCCCCAGCC
+
+
+NM_021475
+  ENSMUSG00000022057
+  LENGTH = 2000  COMBINED P-VALUE = 1.62e-05  E-VALUE =     0.57
+  DIAGRAM: 634_[8]_527_[8]_364_[2]_102_[8]_289
+
+
+                                       [8]
+                                       3.1e-08
+                                       AACTTCAAAATGTCTCTTTGA
+                                       +++++ +++ +++ ++ +++
+601  TCAGTGTGTGTGTGTAATCTATCCAATAATTCATAAATTAATAGTGGATTCTTGCCTTTCCGGTTATGCACATCC
+
+                                                              [8]
+                                                              1.9e-05
+                                                              AACTTCAAAATGTCTCTT
+                                                               +++  + +++++++++
+1126 ACTGATAGATCAAGAACGAATTGAGGAACAAGACCTTAGTATCAGACCCACCCTCTGTACTCAACAATGGCTCTC
+
+     
+     
+     TGA
+     +++
+1201 TGAACTATTTTATGACTCTAGTAATAACTACCTACCCTAAATAATTTCATAGAAGACATCACATTGTTGATTTGG
+
+                                                                        [2]
+                                                                        2.0e-06
+                                                                        ATGAAAAG
+                                                                        +++++++
+1501 GGACTCAACAGATTATATATAGTAATGTATAAGCATAAAAATGTACATATGTGCGCAATAATAATTGATAAAAGA
+
+     
+     
+     CAGTGATTCACAG
+     ++ +++  +++++
+1576 AAAGGAGACACAGTTTTGAAGAAGAGTGGGGAAGGATATATAGGAAAGTTTGAAGGGAGAGAAGGGAAGGTAGAG
+
+                                             [8]
+                                             3.1e-05
+                                             AACTTCAAAATGTCTCTTTGA
+                                             +++ +++++ ++   +++++
+1651 ATATTGTAATTAAAATATAATTTCAAAAATAAGCAACAACAACATCAAAGTGAGCCTTGTCATGATAAAATCTAT
+
+
+NM_013462
+  ENSMUSG00000031489
+  LENGTH = 2000  COMBINED P-VALUE = 2.08e-05  E-VALUE =     0.74
+  DIAGRAM: 333_[3]_255_[3]_593_[8]_146_[2]_589
+
+
+                                      [3]
+                                      8.3e-05
+                                      AGTGCCCTGGGCCTTTAAGAG
+                                      ++++++++++++ +++  +
+301  TCTTTGCTCTTGGAGTTAAAAAAGAGCCAGTGCAATACTCTGGTCACTTCCATAGAACCTCCCCACACCCCACCC
+
+              [3]
+              5.6e-07
+              AGTGCCCTGGGCCTTTAAGAG
+              +++++++++  ++++++ +
+601  TCTCAAACCAGTGCCCTGAACCTTTAGATACACAGGCACACAAAACTGAAGCAAAGTGTTTGCTGATGTCAATGA
+
+                            [8]
+                            1.4e-06
+                            AACTTCAAAATGTCTCTTTGA
+                            +++ +++++++ ++++++++
+1201 CCATAGTGAGGAGACAGGGATGAAACCTCAAACTTTTTTTTTTTCCTGGATTCCTGCTCTCAAACTAACCAGAGA
+
+                                             [2]
+                                             2.7e-05
+                                             ATGAAAAGCAGTGATTCACAG
+                                             +  + +++++++++++  +++
+1351 GGAGGGAGGGGTGCTCTCTTTTCCCTAAGCATACAGCACAAACATAGGCAGGGACTGGCAGGCCTCCAAGGCCAT
+
+
+NM_002227
+  ENSG00000162434
+  LENGTH = 2000  COMBINED P-VALUE = 2.34e-05  E-VALUE =     0.83
+  DIAGRAM: 582_[8]_356_[8]_911_[4]_86
+
+
+                                                              [8]
+                                                              7.0e-05
+                                                              AACTTCAAAATGTCTCTT
+                                                              +++   ++++++++ +++
+526  AGTGGTGTTTGTTTGTTTGTTAGTAGTAAGGACCGTAACCCACCCCTTCCTCCCCCCAAAACTATACTGTTATTT
+
+     
+     
+     TGA
+     ++
+601  TTCTATGTAAGCAAATTGTTTGAAATGCTTTGAAAATGAAATCTTAATATTAAAAGGCATATGCATGTAAAGTCT
+
+                                                                [8]
+                                                                1.5e-05
+                                                                AACTTCAAAATGTCTC
+                                                                ++++  +++++++++
+901  TAATCGTTCACATTGAATCAATGACTAAACATTTTTGATTACCCAGCTACCTCCAAGCAAACTGAAAACTGTCTA
+
+     
+     
+     TTTGA
+      ++++
+976  GTGGATCCTGAAGTCCATAGTGCCTCTAGCCGGGTCTTTCAAGTGTTGCACCACAGGGTGATGATTGATGGTAAA
+
+                     [4]
+                     2.3e-09
+                     AGGAGAGAGCTCACACCTCTCCC
+                      +++++++ +++++++++++  +
+1876 GAGTTCAGTGACTTTTTGGACAGAACACAAACTTCTTTCATTAGAAATTTTTAAAAAATGACTTGGGCATTTGTT
+
+
+NM_000717
+  ENSG00000167434
+  LENGTH = 2000  COMBINED P-VALUE = 2.75e-05  E-VALUE =     0.97
+  DIAGRAM: 167_[3]_131_[8]_772_[2]_251_[4]_593
+
+
+                      [3]
+                      1.1e-05
+                      AGTGCCCTGGGCCTTTAAGAG
+                      + ++  +++  +++ ++++++
+151  GCTCTGTGGTGCCCATAATTAAGATGCACCCATAAGAGAGTGTATAGCTTTACCCCTCCCGCATTCCTCAGAGGG
+
+                        [8]
+                        1.0e-07
+                        AACTTCAAAATGTCTCTTTGA
+                        +++++ +++ +++ ++ ++++
+301  GGGGGGCCTGAATGCAATGAACTTTAAAGTGGATCATTGATCAACAGTCCCTTAACCCCACCAAAGTTTCCCAAG
+
+                                                                   [2]
+                                                                   8.0e-05
+                                                                   ATGAAAAGCAGTG
+                                                                     +++++++++++
+1051 CCAAATGTTGGAGCCGCCCTTAAGGGAACTTCCCCTGGACTCCTCTGGAGGTCTTGCCAGATGGGAGAACCAGGG
+
+     
+     
+     ATTCACAG
+      + ++  +
+1126 GTCCATTCAGACCCCAGTCTCTTCTCTTCCCATCTCATCTCTCCACCTGGGGGGCTCAAAGAACCTAGCTCCTCC
+
+                                       [4]
+                                       2.2e-05
+                                       AGGAGAGAGCTCACACCTCTCCC
+                                       ++++++ ++++ + +    ++++
+1351 GTCTGCTTAGAAATTAAGTTTGGAGCAGCCTGGGAGGAAATAGAAGAGATGAGACCCAGAGAATCAATGGCCACT
+
+
+NM_147080
+  ENSMUSG00000044506
+  LENGTH = 2000  COMBINED P-VALUE = 2.87e-05  E-VALUE =        1
+  DIAGRAM: 355_[2]_292_[3]_909_[3]_5_[8]_243_[4]_89
+
+
+                                                            [2]
+                                                            6.4e-06
+                                                            ATGAAAAGCAGTGATTCACA
+                                                              ++++++++ +++++ + +
+301  ATGAAAATAAGTATTTAAAAGCATCCAGAATCAAAGAATAAAAATAATTAGTAATGGAAGAGGAATTGATTGAAA
+
+     
+     
+     G
+     +
+376  CTGAAACTAGTCATTGCCCTCTAATGTCCATTGAACAAAAGACATTTATAAGTATAGAAGAATAATTCATTAAAA
+
+                                                                         [3]
+                                                                         5.6e-07
+                                                                         AGTGCCC
+                                                                         +++++++
+601  TGTAACCTTCTAATTCTTTATCAATAATTATTAGATATTATTATGGAATTAATGGAATTAACATAAAGAATGCTA
+
+     
+     
+     TGGGCCTTTAAGAG
+     ++ ++ ++++++ +
+676  TGATTTTTTAAAGGTTTTATTTATTTATTGTATATAAATATACTGTAGCTGTCTTCAGACACTCCAGAAGAGGGC
+
+                            [3]                       [8]
+                            2.7e-05                   2.9e-05
+                            AGTGCCCTGGGCCTTTAAGAG     AACTTCAAAATGTCTCTTTGA
+                            + ++ + +  ++ +++++++      ++++   + ++++++++++++
+1576 ATGTTTGCTTGTTTGTTTGTTTGATTGTTTTAATCTATTAAGACTTTGAAACTGTCTGATGTTTTTTGTATGCAC
+
+                  [4]
+                  1.8e-05
+                  AGGAGAGAGCTCACACCTCTCCC
+                  +++ ++ + ++++++ ++ + ++
+1876 CACTATCCTTCACAGGTAATATCTAACATTTGTTAGAACATCCTGAATGTGACATTTAACGTTTGCTTCTTTCCA
+
+
+NM_001097
+  ENSG00000100312
+  LENGTH = 2000  COMBINED P-VALUE = 2.98e-05  E-VALUE =      1.1
+  DIAGRAM: 1490_[8]_489
+
+
+                                                                      [8]
+                                                                      3.0e-10
+                                                                      AACTTCAAAA
+                                                                      ++++++++++
+1426 TAATAGGAGACCTTGTGATATTCAGCAACGGACAGGAAGGTGGGCTTTGCAGTTGTAAATTAGGAAAATTCAAAA
+
+     
+     
+     TGTCTCTTTGA
+     ++ ++++++++
+1501 TGACTCTTGGAAAAGTGTGTTGATAGCATTCACTTGGAAGAGGAAAAGAAAACTTCCCCAACAACAATTAAGGAT
+
+
+NM_023662
+  ENSMUSG00000031592
+  LENGTH = 2000  COMBINED P-VALUE = 3.09e-05  E-VALUE =      1.1
+  DIAGRAM: 705_[8]_239_[3]_630_[8]_47_[8]_124_[4]_148
+
+
+                                   [8]
+                                   3.0e-05
+                                   AACTTCAAAATGTCTCTTTGA
+                                   ++ ++ ++ +++ ++++ +++
+676  GTTGAATTTTATGTCACTATTTTTAGATAAAATTTTAATATGATTCTAGTAAAATTATCTGCCTGTTAAAGTTTA
+
+                                                                      [3]
+                                                                      1.0e-05
+                                                                      AGTGCCCTGG
+                                                                      ++++++++
+901  CGGCCTCTGAGCCTCTGCTTCCGTGTCAGCTTCTATTTCTCCTCTCTGATCCAGGCATCATATTTAGTACCCTTA
+
+     
+     
+     GCCTTTAAGAG
+     ++ ++++ + +
+976  GTTTTTATATGTATATGTACTCCATGGGGTGTATTGCTAGGGTTGTCCGCACTTCTAATTCTAGTGCTTGGGGCT
+
+                                              [8]
+                                              1.2e-05
+                                              AACTTCAAAATGTCTCTTTGA
+                                               + ++ +++ +++ ++ +++
+1576 TATAATGTGTATATCATGTAGATCAATTTATCTGTGATACGTGTTTGATAGTGTATTCTTTTATATTTTTGGTTG
+
+                                       [8]
+                                       5.0e-06
+                                       AACTTCAAAATGTCTCTTTGA
+                                       ++++  +++ +++ ++ ++++
+1651 TGAGCCTAGCCTTTAACAGCTGAGCCATCTCTCCAGCTCGATAGTGTATTCTTTAAGATAAGTGTTTGAAAGATT
+
+                                  [4]
+                                  1.9e-07
+                                  AGGAGAGAGCTCACACCTCTCCC
+                                  ++++++++ ++ ++ ++  + ++
+1801 AACCTGATGACCTCAGATCACACAGTGGCAGGAGAGAACTGACTCCAGATAGTTGCTCTGACCTCTGCACACATG
+
+
+NM_026576
+  ENSMUSG00000016984
+  LENGTH = 2000  COMBINED P-VALUE = 3.89e-05  E-VALUE =      1.4
+  DIAGRAM: 435_[4]_299_[2]_565_[8]_367_[8]_248
+
+
+                                                                 [4]
+                                                                 2.0e-05
+                                                                 AGGAGAGAGCTCACA
+                                                                   ++++++ ++++++
+376  AGAGGGGTAGCCTGCTGGACCAGCACATCTCCACTATCCAGAAGTGTTTTTATGTTTCCTTAGAAAGATAACACA
+
+     
+     
+     CCTCTCCC
+       ++++
+451  AGTCACTTAGTATCAATTTCAGTGAGGATGGTCAGGTATAAATTTCAAAAAAAAAAACCAAAAAACAAAAAAACA
+
+            [2]
+            2.4e-06
+            ATGAAAAGCAGTGATTCACAG
+            + ++ ++++++++ + +++++
+751  GAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAAAAACCNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                                                                         [8]
+                                                                         3.2e-07
+                                                                         AACTTCA
+                                                                          ++++ +
+1276 TGGAAAGATTAGTTAGTTCTTTAAGATAAAGAATGTATGTAAGTACTGCTAACAAGCAGCCTTTAATGTACTTTA
+
+     
+     
+     AAATGTCTCTTTGA
+     ++ +++++++ ++
+1351 AAGTGTTTCTGTGCCTTCCAGTTATTTTAAGTACGAACAGACTGAAGCAAGTCATGCAACAGTTAAGATTGTCTC
+
+           [8]
+           6.1e-05
+           AACTTCAAAATGTCTCTTTGA
+            ++ + ++++ + +++++++
+1726 ATTACTTACATAATAAAGCTTTTTTTTTCTTTTTGCTAAACTAGACTTAATGCCTAGTCTGTGGGTTTCCTAACA
+
+
+NM_021757
+  ENSRNOG00000006655
+  LENGTH = 2000  COMBINED P-VALUE = 4.60e-05  E-VALUE =      1.6
+  DIAGRAM: 436_[3]_117_[2]_304_[4]_1041_[8]_16
+
+
+                                                                  [3]
+                                                                  2.4e-05
+                                                                  AGTGCCCTGGGCCT
+                                                                  ++++ ++   ++++
+376  ATTTGGTAACCTTGCCCTAGTAATCTAGTGTTTCCTTTCAGAGAGTACTAGGAAATGTTGAAATGTCACAATCCA
+
+     
+     
+     TTAAGAG
+     +++++
+451  TTAAATAAATGAGCCCTGTATGGACAGGACATTTGGGTTAGTTTTCAGGATTAAAGAGTAAATATGCCGTGCACT
+
+                                                      [2]
+                                                      2.0e-06
+                                                      ATGAAAAGCAGTGATTCACAG
+                                                      +  ++ ++++ ++++++++++
+526  GTGACTGTATGTGTCAGGTTACTCCCGGAAAGCTGTTTCCTAGAAATGTAGCAGGACAAAGGATTCACAGAGCTT
+
+                                                                               [
+                                                                               6
+                                                                               A
+                                                                               +
+826  CTTTGTGACAAGTTCCAACAGCACTAATCCTATCAGTTGTCTTCTCTGCTTCTTTACTAGTATCTAACTATTGCA
+
+     4]
+     .6e-05
+     GGAGAGAGCTCACACCTCTCCC
+     + +++ + + +++++  +++ +
+901  GAACAAAACCCACACAACACTCAGCCTTCCCTGAAGACGTATCAGCATCTGCTGGAGATTCCTGGAAAACACATC
+
+                  [8]
+                  1.1e-06
+                  AACTTCAAAATGTCTCTTTGA
+                  ++++  +++++++++ +++++
+1951 ATTTTACTACTAAAACTATAAACTGTTTATTTTAGAAATCAAACACAGGT
+
+
+NM_019794
+  ENSMUSG00000031701
+  LENGTH = 2000  COMBINED P-VALUE = 4.85e-05  E-VALUE =      1.7
+  DIAGRAM: 682_[8]_57_[3]_237_[8]_102_[4]_836
+
+
+            [8]
+            4.2e-06
+            AACTTCAAAATGTCTCTTTGA
+             ++++++ ++ +  ++ ++++
+676  AAGTATATACTTCAGAAGGAATTGTTGAAATGAAGGGTAAGAAACTGGAATCAGGGCTGGTGCATTTCTAGGCTG
+
+               [3]
+               7.5e-07
+               AGTGCCCTGGGCCTTTAAGAG
+               + +++++++  + +++ ++++
+751  AGCTAATTGAATTGCCCTGAATTCTTCAAAGGCAGCCTAGAATACAACAGCAAATCTTTTGTCTCAAAACAAGTA
+
+                                                [8]
+                                                7.5e-05
+                                                AACTTCAAAATGTCTCTTTGA
+                                                ++++  ++++++ +++ + +
+976  AACATATGAAAGGATACATGAACATTAGTCATCAGGGAAACACAAATCAAAACTGCTTTCTATCCATGGGAGAGG
+
+                     [4]
+                     1.0e-05
+                     AGGAGAGAGCTCACACCTCTCCC
+                     ++  ++++++++ + ++ +++++
+1126 AGAGAGATGGTTCAGTAGTGAAGAGCACCAGCTACTCACCCAAAGGACAGGAGTTCAAATCCCAGCACCCACATG
+
+
+NM_019343
+  ENSRNOG00000003618
+  LENGTH = 2000  COMBINED P-VALUE = 4.94e-05  E-VALUE =      1.7
+  DIAGRAM: 228_[8]_40_[3]_58_[2]_655_[8]_70_[4]_241_[3]_580
+
+
+        [8]                                                          [3]
+        1.3e-05                                                      3.3e-05
+        AACTTCAAAATGTCTCTTTGA                                        AGTGCCCTGGG
+         ++++ ++++++ ++ +++++                                        + ++ + +  +
+226  CTTGGCTTTAAAATGCTTGTTTGATTATTTTCTCCTTTCAAAGGTTGAAAAGCACAGAACTTTCATTGTCTTAAT
+
+                                                                         [2]
+                                                                         7.8e-05
+     CCTTTAAGAG                                                          ATGAAAA
+     + ++++++                                                            + +++++
+301  CTTTTAAACATAAGAAAGAGTTGAGGGTAAGGGCAAAAAAAATGCATGCAACTATTGTTAATAAGGAAAAGGAAA
+
+     
+     
+     GCAGTGATTCACAG
+     ++ ++ ++ ++ ++
+376  CATGGAACACAGACACAAAGAGGAACTTATCCTTCATTGTCTACACCATGCTATACACTTCTTTCCAAACCTTAG
+
+                                                                          [8]
+                                                                          7.0e-0
+                                                                          AACTTC
+                                                                          ++++
+976  CAAGAGAAGCAGAAATGTTGTAAACGCGGCAAGCTTTCGTGCTGACACTTGCAAAGCCTAAGTAGCTGAAAATCT
+
+     
+     5
+     AAAATGTCTCTTTGA
+     +  + +++++ ++++
+1051 AGCAGGGTTCCTTGAGAGTTGATTCCTAATCTTCTCTAATATCATCGATTTGATAAAGAGTTGCTGCGGGTTAGA
+
+               [4]
+               1.1e-07
+               AGGAGAGAGCTCACACCTCTCCC
+               ++++++++ +++ +++++ +  +
+1126 CCTTGCTCAAAGGACAGAAATCTCACTTGATTCTATCTCTTGTTAAAGCCTGTAGGGGTGGTGAGTGAGCCCTTG
+
+                                                      [3]
+                                                      6.2e-05
+                                                      AGTGCCCTGGGCCTTTAAGAG
+                                                      + +++  +++++ + ++++ +
+1351 AAAAAATAATGAATGAGGTTGATTTTTCTCTACAGATTTTCTATTTGCTATTACATTGGTCACATAAGCGATTTA
+
+
+NM_144715
+  ENSG00000163576
+  LENGTH = 2000  COMBINED P-VALUE = 5.47e-05  E-VALUE =      1.9
+  DIAGRAM: 5_[3]_464_[4]_57_[3]_606_[8]_782
+
+          [3]
+          3.6e-06
+          AGTGCCCTGGGCCTTTAAGAG
+          ++++ + ++ ++++ ++++++
+1    TTTCAAATGTCGTGCTCCAATAAGAGAAGACAGTATACATAAAGTAGATTTTTAGGCCAGATATAGTAAATGCAT
+
+                                             [4]
+                                             3.9e-05
+                                             AGGAGAGAGCTCACACCTCTCCC
+                                                ++++  ++++++++++++++
+451  CTATGGTCTTCCCAGTGAGAAGCAGCTCAAGACGCAGCTGCTTAAAGGAAACACACCTCTCCCCAGGTCTTCCAA
+
+                                                  [3]
+                                                  2.1e-05
+                                                  AGTGCCCTGGGCCTTTAAGAG
+                                                  + + ++ +++ + +++++
+526  ACACCTTGGCTGAAACTAGGCAGAAGCAGAAAAAAGGGAGAGAGAATTTCCTTGGATTTTTAACTCACTGGACAC
+
+                                                                             [8]
+                                                                             3.6
+                                                                             AAC
+                                                                             +++
+1126 TGATGACTTAAGCCAAACGTGTTAAGAAAGCATGAGTTTAGAGATCATTTGGAAAAGATTGCTTATTCCCCAAAC
+
+     
+     e-07
+     TTCAAAATGTCTCTTTGA
+      + ++++++++++ +++
+1201 CTTATAATGTTTTCTTTTGTCTTGTTTTTGCTGTCTTAAAAGCTTTCAATTTGTGGTTTTTAATATATGGAAAGA
+
+
+NM_010510
+  ENSMUSG00000048806
+  LENGTH = 2000  COMBINED P-VALUE = 5.54e-05  E-VALUE =        2
+  DIAGRAM: 86_[3]_1326_[2]_153_[2]_277_[8]_74
+
+
+                [3]
+                3.5e-07
+                AGTGCCCTGGGCCTTTAAGAG
+                ++++  +++++++++ ++ ++
+76   ATGAAGACACAAATGTGCTGGTTCTTAAACAGCACCACACAGATGCCTTTCTGTCTTTTATGGTGCTGTACTTAA
+
+             [2]
+             4.3e-07
+             ATGAAAAGCAGTGATTCACAG
+             + + +++++++++++  ++++
+1426 CAGATGTGAAGTGAAGAAGGGACAGACACCCTCAGCTTCCCCAGGGGATCATGAGGAGCAAGATAAAGGCATTTA
+
+                                     [2]
+                                     4.4e-05
+                                     ATGAAAAGCAGTGATTCACAG
+                                     +  ++++++ +++++ ++  +
+1576 ATCATTTCCCTGATGGCAAAGGTTTTGGGGTAAATAAAAGAGGTGATACATTCCTCTGTCTGGCCACCCTAAATG
+
+                                   [8]
+                                   8.8e-05
+                                   AACTTCAAAATGTCTCTTTGA
+                                    ++++ + ++ +   + ++++
+1876 CCAGAAACTATACTACTTTTTTTAAAGGAGTACTTTACAAAGAACCCTTTATAAGAAGTGTCCAGCAATTATTAA
+
+
+NM_009241
+  ENSMUSG00000029682
+  LENGTH = 2000  COMBINED P-VALUE = 5.89e-05  E-VALUE =      2.1
+  DIAGRAM: 208_[2]_1224_[3]_294_[4]_209
+
+
+                                                               [2]
+                                                               2.5e-05
+                                                               ATGAAAAGCAGTGATTC
+                                                               + ++++  ++++ ++++
+151  CATAGTTATTATCAATGCTTCCACCATGCCTATGCAATAAAGTCACTATAAAACCTTGAAAGAATTAAGGAATTC
+
+     
+     
+     ACAG
+      +++
+226  TCAGGTCATAGATCTTGTGAAGATTTCTGGATCGAGGTTCACCTGGGGAAGGTTTGAAGCTTTAACTAGCTTCTC
+
+                                 [3]
+                                 2.1e-08
+                                 AGTGCCCTGGGCCTTTAAGAG
+                                 +++++++++ ++++ ++++ +
+1426 AAGTTCCAGGACACCCAGGGAATTTAACAATACCATGATCCAATAAATGAGTCAACGAACAATCAAACAAACAAA
+
+                                                [4]
+                                                4.3e-05
+                                                AGGAGAGAGCTCACACCTCTCCC
+                                                + + ++++ +++ ++++ ++  +
+1726 TCTAAAATACTGATAGATACAGGACCCTATATATCTGGGAATGAAGTAAGAAAAATAACTACATTCAGTTTTCTG
+
+
+NM_014783
+  ENSG00000175306
+  LENGTH = 2000  COMBINED P-VALUE = 5.94e-05  E-VALUE =      2.1
+  DIAGRAM: 419_[3]_585_[8]_774_[2]_159
+
+
+                                                 [3]
+                                                 9.5e-09
+                                                 AGTGCCCTGGGCCTTTAAGAG
+                                                 + ++ +++++++++++++++
+376  CTTTAAAAAAGGATCACGACGTAGAAAAAGACACAACACTGAAGATTGTCATGGGTCTTTAAGAAAAGGAAATTT
+
+                                                       [8]
+                                                       3.9e-05
+                                                       AACTTCAAAATGTCTCTTTGA
+                                                       +++  +++++++ ++++ ++
+976  CTTTTCTTTGGATTGGGAAGGGTAAAATATCCGAAGATTTGAACTCCAAAAGAAACAAAATGATTCTATGCAAAC
+
+                         [2]
+                         6.5e-05
+                         ATGAAAAGCAGTGATTCACAG
+                           ++++++++++++  + ++
+1801 CCGCCCCAAAAACTGGCCTTGAAGGAACAAGTGAAACTCATCCTGCTTTTCATGTTTGCTGGGTTTGCCCGTTAC
+
+
+NM_006056
+  ENSG00000171596
+  LENGTH = 2000  COMBINED P-VALUE = 7.19e-05  E-VALUE =      2.5
+  DIAGRAM: 724_[2]_641_[8]_593
+
+
+                                                      [2]
+                                                      1.8e-09
+                                                      ATGAAAAGCAGTGATTCACAG
+                                                      ++++++++++++++++ ++++
+676  CATCCCTGTTTCTCAAGGGGTGAGTTCAGATCTGCTATTCCCGTTTTCCATGGGAACCAGTGACTGACAGGACCT
+
+                                         [8]
+                                         6.3e-05
+                                         AACTTCAAAATGTCTCTTTGA
+                                         +++++   ++ +++++ +++
+1351 CTCAGGGGCAGCACTCAGGTGCTCACGGCTCTGCACAGCTTTTCACAGGCTCCTTTCTCTGCAGAATCTCCCTGT
+
+
+NM_175619
+  ENSG00000182223
+  LENGTH = 2000  COMBINED P-VALUE = 7.51e-05  E-VALUE =      2.7
+  DIAGRAM: 563_[2]_38_[4]_167_[8]_36_[8]_1110
+
+
+                                           [2]
+                                           1.0e-06
+                                           ATGAAAAGCAGTGATTCACAG
+                                           + ++++++++ ++ +  ++++
+526  GAAGATGAAGATACAGATATTTCTTATAAAAAACTAAAAGAAGAGGAAATGGCAGACACTTCCTATGGCACAGTG
+
+                           [4]
+                           3.3e-07
+                           AGGAGAGAGCTCACACCTCTCCC
+                            +++++ ++++++ +++++++ +
+601  AAAGCAGAAAATATAATAATGATGGAAACCGCTCAGACTTCTCTCTAAATGTGGAGATACACAGGAGCTTCTATC
+
+                                                                   [8]
+                                                                   9.7e-05
+                                                                   AACTTCAAAATGT
+                                                                   +++++  ++++
+751  TCATACATGTAACAATTCTGATCTTTTTAAGGTTCACTGGTGTATTAACCAAACGTTGTCACAAATTACAAATCA
+
+                                                 [8]
+                                                 6.5e-05
+     CTCTTTGA                                    AACTTCAAAATGTCTCTTTGA
+      +  ++++                                     ++++ ++  ++++++++ +
+826  ATGCTGTAATATAATTTGCACCTGGAATGGCTAACGTGAAGCCTGAATTAAATGTGGTTTTTAGTTTTTACCATC
+
+
+NM_000994
+  ENSG00000144713
+  LENGTH = 2000  COMBINED P-VALUE = 7.57e-05  E-VALUE =      2.7
+  DIAGRAM: 327_[4]_87_[8]_302_[8]_944_[8]_254
+
+
+                                [4]
+                                4.8e-07
+                                AGGAGAGAGCTCACACCTCTCCC
+                                +++++  ++++ +++++ +++ +
+301  AGGCTGTGCTGTCAGCTGTAAAAGATCAGGAGGCAGCAGACACCACTCTGGTTTCTTCACTGCATTCAGCAATGC
+
+                                                                   [8]
+                                                                   1.7e-06
+                                                                   AACTTCAAAATGT
+                                                                   ++++++++++ ++
+376  CTGAAGTTAGTGCTCAGGCCGGGCATCTCAAAAGAAAAGATACTTGAGTTATTCACATTTTAAAATTCAAAACGG
+
+     
+     
+     CTCTTTGA
+     +++ +++
+451  TTCATTTTTAAGTGGCAGTGATGAATCAGAAATTTGGAAGATGATACGGGTTTCTTTTTTCCAGGGAGGAGGAAT
+
+               [8]
+               1.0e-05
+               AACTTCAAAATGTCTCTTTGA
+               ++++++ ++ +  +++ +++
+751  AATAAAGATTAGATTCTAAGTTACTTCTTTCCTCTGCACGACTCGTCTCCAATTGTTAATCNNNNNNNNNNNNNN
+
+     [8]
+     8.4e-05
+     AACTTCAAAATGTCTCTTTGA
+     ++  + + ++++ ++  +++
+1726 AGGCTAAGAATGACTACTTGTGGCTTGGGAGCCACAAGCTTCTTCAAGTGTCTCAGAACCTACCTGGTGTGAGGG
+
+
+NM_031922
+  ENSG00000135597
+  LENGTH = 2000  COMBINED P-VALUE = 7.61e-05  E-VALUE =      2.7
+  DIAGRAM: 396_[4]_58_[8]_92_[8]_253_[8]_418_[2]_676
+
+
+                          [4]
+                          3.0e-05
+                          AGGAGAGAGCTCACACCTCTCCC
+                          +  +++++ +++ ++++ ++ ++
+376  GAATGTACTCCTCTAGAGAGCATTAAAGATATACCACTACAAAGTACTAAAAAGTTTTCTATAATTTTTAAGTGT
+
+                                [8]
+                                2.5e-05
+                                AACTTCAAAATGTCTCTTTGA
+                                +++++ ++ +  + +++++++
+451  TATTTATTGGAAGGCTAAATTCTAAGAAGCTTTAACAAATATTTTTGACATTGTTAAATTAGAAGGCTATTTTTA
+
+                                                                      [8]
+                                                                      2.0e-07
+                                                                      AACTTCAAAA
+                                                                       ++ + ++++
+526  AAAAGCATATTTTAAAAAACAAAACTAAATGGAAACAGCAAAACTTGCTTGTAATGAAATACAGCTGAGTAAAAA
+
+     
+     
+     TGTCTCTTTGA
+     +++++++++++
+601  TGTTTCTTTGAAAACAAAGACACTCAGAAAAACAGGTGTTGAGTAGTTTCCTGGGCCTTTAATGCTTTAAATTTA
+
+                                            [8]
+                                            9.3e-05
+                                            AACTTCAAAATGTCTCTTTGA
+                                             + ++ ++ ++ ++++ +++
+826  CAAAGTCTATCGGTGCAGTTTAGGACTGTGAATCTATAGTATTTAAACATTTTTTCTTTCAGAAATGAAAATACA
+
+                                 [2]
+                                 8.7e-06
+                                 ATGAAAAGCAGTGATTCACAG
+                                 + +++++++  + +++++ ++
+1276 AAGTTTTAAAACAATACTTAAAAAAATTAAGAAAGGCCATTACTCATACCTATTTCATAAGTTTACATGACTGCT
+
+
+NM_007260
+  ENSG00000011009
+  LENGTH = 2000  COMBINED P-VALUE = 7.96e-05  E-VALUE =      2.8
+  DIAGRAM: 711_[8]_915_[2]_332
+
+
+                                         [8]
+                                         4.6e-07
+                                         AACTTCAAAATGTCTCTTTGA
+                                         +++++  +++++++++ +++
+676  CTCTGGGCCTGGGGGAAAGTGCTTTGTTGAGCTTCCAACTTTGTACTGTTTTCTTTTTCTNNNNNNNNNNNNNNN
+
+                                                                             [2]
+                                                                             3.5
+                                                                             ATG
+                                                                             +++
+1576 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGGTTCAGAGTGTGTCATTCCATTCATGATCCTGGAATAGGGCATG
+
+     
+     e-07
+     AAAAGCAGTGATTCACAG
+     ++++++++++ +  ++++
+1651 AAAGCCAGTGGCAGACACCCGAGGCCTTAAGAGGTGTGTCCCATGAGGATGGGAAGGTGGTAAAGTTTATGCAAA
+
+
+NM_011839
+  ENSMUSG00000046212
+  LENGTH = 2000  COMBINED P-VALUE = 8.15e-05  E-VALUE =      2.9
+  DIAGRAM: 111_[8]_61_[8]_414_[8]_491_[4]_211_[8]_213_[3]_64_[8]_286
+
+
+                                         [8]
+                                         7.8e-06
+                                         AACTTCAAAATGTCTCTTTGA
+                                         +++++  ++ + ++++++++
+76   TTCTTTGAAGCTATTTGTTCTTTTATAGGAAGCTGTAGCTTTTAAGTTTTTTTTGTCTTTCTGTAGCATAATTAC
+
+                                                [8]
+                                                9.3e-05
+                                                AACTTCAAAATGTCTCTTTGA
+                                                + +++ ++ +++   +++++
+151  ATTAGTTTTCTTTCTCTGTTGCTTGTAAAACATAAGATATTTTACCTTAATCATGAAATTTGTCTTGTACTGAAA
+
+                                 [8]
+                                 2.4e-05
+                                 AACTTCAAAATGTCTCTTTGA
+                                 +++++++ ++  ++++ ++ +
+601  CAAACATACTCCTTTGAAAATTTTCTTAAAATTCAGACCTTCTCCTTAAGAGGATAAATTGTTTTATTGTTCAAA
+
+                    [4]
+                    4.7e-07
+                    AGGAGAGAGCTCACACCTCTCCC
+                     ++ ++++ ++++++++  +  +
+1126 TCAAATCTTGTCCTTTGGCCAGAAAACAAACCAGTTTCTTTAGGATATTTTCAGTTTTCCTAATTATTTTTTATT
+
+                             [8]
+                             2.3e-06
+                             AACTTCAAAATGTCTCTTTGA
+                             +++++++++++  ++++++ +
+1351 GTTTTAAGACCTTTATATATAGGTAGATTCAAAATAATTTTTGAAATTTAATTAAAATGATACTGTTTATTGCAT
+
+                                      [3]
+                                      6.5e-05
+                                      AGTGCCCTGGGCCTTTAAGAG
+                                       +++ + +  +++++++++++
+1576 AACTTAAAAAGAATAGTTGAAGAAAGAGCATGACATGACTTCATTCTTTAAAAGCAATTTTAAAAACATTTAGAA
+
+                                                [8]
+                                                1.4e-05
+                                                AACTTCAAAATGTCTCTTTGA
+                                                ++ ++ +++  ++++++++++
+1651 CTAGATTTTAAAAAACCTTACAGCATTAAAAAAATCCTGATATAATTTTAAATAGGCTTTTTTATTATTTTAAAT
+
+
+NM_009531
+  ENSMUSG00000030094
+  LENGTH = 2000  COMBINED P-VALUE = 9.75e-05  E-VALUE =      3.5
+  DIAGRAM: 90_[8]_729_[2]_1002_[3]_87_[2]_8
+
+
+                    [8]
+                    5.5e-07
+                    AACTTCAAAATGTCTCTTTGA
+                     +++ ++ + ++++++++++
+76   CCGGCCTCAAGCAGTTGCTGCAGAGTGGTTCTTTTTGCATTTAATGCCCATCACTATCCTCATCCCAAATAAAAT
+
+                    [2]
+                    7.6e-05
+                    ATGAAAAGCAGTGATTCACAG
+                     + ++++ ++ +++++++  +
+826  CTACTTTGTCTTGGCTTTAAAGTAAATGACTCATGCTTCATGAACATTGAGAATGTGTCAGAAGCATGATGCACA
+
+                                                                    [3]
+                                                                    1.9e-05
+                                                                    AGTGCCCTGGGC
+                                                                    ++++++++++++
+1801 CTCTGACAAAAGAGCAGCAGGCAGTACTGGAAACCTGACTCAGGAAGGCAGGCCTGAAAAGCAAATGCCCTGGGT
+
+     
+     
+     CTTTAAGAG
+     ++  + +++
+1876 CTGAAGGAGAGGAAAGCAAGCGTTGTGAACGGCACTAAAGGAAGATTTGTCTCAGTGCTCCAGTACCCATAATTT
+
+                          [2]
+                          2.7e-06
+                          ATGAAAAGCAGTGATTCACAG
+                          + +++++ ++ ++++  + +
+1951 CCCAGTGAGTTCTATTTAAAAACAGAAATAAAGGACAGATAAGGCATTGT
+
+
+NM_011717
+  ENSMUSG00000024050
+  LENGTH = 2000  COMBINED P-VALUE = 9.76e-05  E-VALUE =      3.5
+  DIAGRAM: 146_[8]_277_[8]_589_[4]_152_[2]_750
+
+
+                                                                            [8]
+                                                                            9.5e
+                                                                            AACT
+                                                                             +++
+76   ATAGCCAAAACTGTTCTTCACAATTTAGTTGGGGCAGGGGAGGTAGAGAAGTATAGATTAGGCTGTTGCTGTGAT
+
+     
+     -05
+     TCAAAATGTCTCTTTGA
+     + +++  + ++ ++++
+151  TAAAAGGGCTTGTTTTTACCATTTGTCCAGATACAATGTCCTGACGATAATTTCTCCCCACTTTTGCAAAAGAAA
+
+                                                                          [8]
+                                                                          1.1e-0
+                                                                          AACTTC
+                                                                          ++ ++
+376  GCACTTGGTGTGATTTCACCCAAGGTCTTGTTTAATTTCTGATGTCCTCACAGGTTGAGCTTAGCCTAAAAGTTA
+
+     
+     6
+     AAAATGTCTCTTTGA
+     + + +++++++++++
+451  AGATTGTTTCTTTGAGACCATGGAGTGGTTTAAGCAAAGACTTAAGCTATGCTCAGGAGTTTTAGGAAAAGGTCT
+
+         [4]
+         1.4e-05
+         AGGAGAGAGCTCACACCTCTCCC
+         ++++++ + ++ ++ +++ + ++
+1051 AAGTAGGAGATAACAGAATCTTATTACCTTAGAAATGTTCAGTAAACATTAGCTGAGCAAGAAAATAGCATTGTT
+
+                                  [2]
+                                  2.4e-06
+                                  ATGAAAAGCAGTGATTCACAG
+                                  + ++ ++++++++ + +++++
+1201 TTTGAGGCCAGCCTAGTTTATAGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTTGAAAAGCAAAA
+
+
+NM_011291
+  ENSMUSG00000025922
+  LENGTH = 2000  COMBINED P-VALUE = 9.78e-05  E-VALUE =      3.5
+  DIAGRAM: 678_[3]_299_[2]_435_[8]_161_[4]_317_[4]_1
+
+
+        [3]
+        2.5e-05
+        AGTGCCCTGGGCCTTTAAGAG
+        + ++ +  ++ ++++ ++ +
+676  GATATTATCTAGGACCATCAACAAGAGAAATTACTTCATAACCGGACAAGAACTTTCCACCAACAGTACCCAAGG
+
+                            [2]
+                            2.4e-06
+                            ATGAAAAGCAGTGATTCACAG
+                            + ++ ++++++++ + +++++
+976  AGCCTGGTCTATAAGTAAGTTCCAGGACAGCCAGGGTTACACAGAGAAACCCCCAAAAGAAAAACAAAAACCCCA
+
+                                  [8]
+                                  3.8e-06
+                                  AACTTCAAAATGTCTCTTTGA
+                                  ++++  +++++++  +++++
+1426 GAAAAGCCTTTAACAAGTTGTCTTTCAAAAACTATAAACTGGAACTTTGCACATCATAGGCTGTTGATCTGACTG
+
+                                                                  [4]
+                                                                  3.7e-05
+                                                                  AGGAGAGAGCTCAC
+                                                                  + + ++ + +++++
+1576 TGCCAAGCTGTGACTGTTTATTAACTGACCAGATTACAAAAATACAATGGGTGACACCTGAAAGGGAAAAAACAC
+
+     
+     
+     ACCTCTCCC
+     ++   + ++
+1651 ACGAGTTAGAGACTGTCTTCCAAGCCGCACATCAGCACCCCCCCGTAAAGGAGCACACTTCCATTAAAGACACAT
+
+                               [4]
+                               9.6e-05
+                               AGGAGAGAGCTCACACCTCTCCC
+                               +  +++ + +++ +++++++ ++
+1951 CGATTGCCAGAAATAGATTCTGTCTCAAAAAACAACACTAACTTCTTACC
+
+
+NM_000324
+  ENSG00000112077
+  LENGTH = 2000  COMBINED P-VALUE = 1.01e-04  E-VALUE =      3.6
+  DIAGRAM: 1220_[8]_55_[3]_105_[2]_557
+
+
+                         [8]
+                         1.6e-07
+                         AACTTCAAAATGTCTCTTTGA
+                           +++ ++++++ ++++++++
+1201 NNNNNNNNNNNNNTTTGATTTTCTTAAAAATGATTTTTTGACGATGTCAGTTATCTTTTCTAAAATTTAACTTTT
+
+                          [3]
+                          2.0e-06
+                          AGTGCCCTGGGCCTTTAAGAG
+                          + ++ + ++ ++ +++++++
+1276 TACAACATTCTATAATGAGACATTAACTTGATTTTTTAAAAATAGCAAATACATGATAAATATTTAAATTGAGTG
+
+                                                                             [2]
+                                                                             6.3
+                                                                             ATG
+                                                                             + +
+1351 TGGACTAAATATGAAGTGTGTGTGTGTGCACCTGTGTGCTTGTTACTGACATGAATTCCAACTACCAAAGAAAGG
+
+     
+     e-05
+     AAAAGCAGTGATTCACAG
+     ++++ ++  ++++ +
+1426 AGAGTCAAAGACTGATGNNNNNNNNNNNNNNNNNNNNNNNNNNNNGGACATTTTATGCAAAATAGACTTTGATCA
+
+
+NM_026408
+  ENSMUSG00000024534
+  LENGTH = 2000  COMBINED P-VALUE = 1.05e-04  E-VALUE =      3.7
+  DIAGRAM: 28_[2]_151_[4]_27_[4]_227_[4]_311_[8]_220_[2]_904
+
+                                 [2]
+                                 3.6e-05
+                                 ATGAAAAGCAGTGATTCACAG
+                                 +  +++++++   ++ +++++
+1    TAGGAGAAGCAAACCAGCAAACAGTCCAAACAAAAGCAAACACACACAGTCTATGTGGGAAAGTGTATACAAGCC
+
+                                                       [4]
+                                                       5.6e-06
+                                                       AGGAGAGAGCTCACACCTCTCCC
+                                                       ++++++ + ++++++  ++++ +
+151  GAAAAAAAAAGTAACAAAAGGACTATAAAAAATAGTAGGGGAAATAACTGAGGAAACATCACAAAGATCTCTGCC
+
+                              [4]
+                              2.0e-06
+                              AGGAGAGAGCTCACACCTCTCCC
+                               ++ ++++ +++ + ++++++++
+226  TTACACTGTGGCTTGCTGGTTGCTGTGGTCAGAAAAACCTCTTCACCCCCAGGCAATCTATTTTCTGCACATACA
+
+                                                       [4]
+                                                       9.1e-05
+                                                       AGGAGAGAGCTCACACCTCTCCC
+                                                       + + ++++++++++ ++  + ++
+451  CAATGAAGTCAGGAGGGCAGGAACTGACATTTCTACTCTTGTCTAGCTCCACGTCAGAGAAAACGCCAATTCCAT
+
+              [8]
+              5.8e-07
+              AACTTCAAAATGTCTCTTTGA
+               ++++ ++++++++++ +++
+826  TAGGGAGGAGGATTAAAACTGTCTTCTGGTGACATTCAAGGCTGTCATTTCTAGGGGTGCCATTTACTTATGGTA
+
+                              [2]
+                              7.3e-05
+                              ATGAAAAGCAGTGATTCACAG
+                               +++ +++++++ ++ ++  +
+1051 CTTTCTTACTGGCAGTGTATTTTCTGTGACAGGCAGGCATACATCCCTCTGCCAGCACCCAGGTAGTATTGGATT
+
+
+NM_026065
+  ENSMUSG00000009550
+  LENGTH = 2000  COMBINED P-VALUE = 1.05e-04  E-VALUE =      3.7
+  DIAGRAM: 107_[8]_176_[3]_286_[4]_735_[8]_610
+
+
+                                     [8]
+                                     1.7e-06
+                                     AACTTCAAAATGTCTCTTTGA
+                                      +++  ++++++++++++ ++
+76   GAATGGGGGATGAGCCAAAAGACACAGACAAGTGCTATAAAATGTCTTTTAGACTTTAGTGACCAATTCCACTCA
+
+         [3]
+         8.7e-05
+         AGTGCCCTGGGCCTTTAAGAG
+          + + + +++++ +  ++++
+301  TTTTGGCATTTTGGTTTAAAAAAAAAAACCCAGAGATATTAGAATACTATTTTCATGTTTTGGTTTTACTCTCAG
+
+                [4]
+                3.0e-07
+                AGGAGAGAGCTCACACCTCTCCC
+                  ++++++++++++++++ ++++
+601  ATTAAGACTTCCTGACAGAGATCAAACCTGTCCCAAAGAACTGGACACCCCAAGCAGCAAGACCTAGTCTACCCA
+
+                        [8]
+                        5.2e-05
+                        AACTTCAAAATGTCTCTTTGA
+                          + +++++++++ +++ +++
+1351 ACTGATGATGTAAAAATAGTTAATCATACTGGATTTATGACAATAAGTAACTGGGCTGTGCAGGGTGGATCACTC
+
+
+NM_010324
+  ENSMUSG00000025190
+  LENGTH = 2000  COMBINED P-VALUE = 1.06e-04  E-VALUE =      3.7
+  DIAGRAM: 605_[2]_42_[2]_1105_[3]_185
+
+
+          [2]                                                            [2]
+          8.1e-05                                                        1.6e-07
+          ATGAAAAGCAGTGATTCACAG                                          ATGAAAA
+          + +++++++++ + +   +++                                          + ++ ++
+601  CCAGCACGGGAGGCAGAGGCAGGCAGATTTCTGAGTTTAAGGCCAGCCTGGTTTACAGAGTGAGTTCCAGGACAG
+
+     
+     
+     GCAGTGATTCACAG
+      +++++++ +++++
+676  TCAGGGATACACAGAGAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGGTGGGGGGTGGTTACTGGGTAG
+
+                                                                          [3]
+                                                                          2.4e-0
+                                                                          AGTGCC
+                                                                          +  + +
+1726 ATACACTAGGGACATAGCAGCTCCAGCGGTGACGGAAAGAGCTGGTATTCTAGAATCCCTCTAAATGATATCATC
+
+     
+     6
+     CTGGGCCTTTAAGAG
+     ++ +++ +++++++
+1801 CTAGTTTCTTAAGAAAATGGCTGGTGTCTGAACGATAAACCTACAAAAGCAACATGGACTTACAGCCTGGATCCT
+
+
+NM_028381
+  ENSMUSG00000003208
+  LENGTH = 2000  COMBINED P-VALUE = 1.06e-04  E-VALUE =      3.8
+  DIAGRAM: 34_[8]_1159_[3]_6_[2]_433_[2]_284
+
+                                       [8]
+                                       2.5e-06
+                                       AACTTCAAAATGTCTCTTTGA
+                                       +++++ + +++  +++++++
+1    GACGTGCACCACCACCCAATAGGAATTACTATTAAAATTAAGAATTCCTCTTTGCCCATTTTAAATTTTTAAATA
+
+                   [3]                        [2]
+                   8.3e-05                    4.6e-05
+                   AGTGCCCTGGGCCTTTAAGAG      ATGAAAAGCAGTGATTCACAG
+                   +  + +++++++ ++ +++ +      + ++ +++++++  + +++++
+1201 TTTCTGAGTTCGAGACCAGCCTGGTCTATAAAGTGAGTTCCAGAACAGCCAGGACCACACAGAGAAACCCTGTCT
+
+                                                  [2]
+                                                  1.7e-07
+                                                  ATGAAAAGCAGTGATTCACAG
+                                                  + ++ ++++++++++ +++++
+1651 GATCTCCTTTAGTTTGTGACCACCCTGAGCTACATAGTGAGTTACAGGACAGCCAGGGACACACAGTGAGACTCT
+
+
+NM_033030
+  ENSG00000152430
+  LENGTH = 2000  COMBINED P-VALUE = 1.07e-04  E-VALUE =      3.8
+  DIAGRAM: 189_[3]_218_[8]_1213_[8]_53_[2]_3_[4]_170_[3]_26
+
+
+                                            [3]
+                                            6.2e-05
+                                            AGTGCCCTGGGCCTTTAAGAG
+                                            + ++  ++ ++++++ ++++
+151  NNNNNNNNNNNNNNNNNNNNNNNGTAACATGTTTGGGCTATTAAGCTAGGCCATGAAGAAGGCTGTATGATATCC
+
+                                                          [8]
+                                                          8.8e-05
+                                                          AACTTCAAAATGTCTCTTTGA
+                                                           +++ ++++++   +++++
+376  GTTGTGGAAGAAACACACTTGGGTATCAATTATCATCAAAATTCCTTATCCAATAATACATAATTAATTTTTATT
+
+                 [8]
+                 5.4e-07
+                 AACTTCAAAATGTCTCTTTGA
+                  ++++ ++++++ +++++ +
+1651 TTAAAAGTATTATAATTAAAAATGATTCTTATCCTAAATATGAAAATAAATATCAAATAATATGAAACATAACAT
+
+                [2]                     [4]
+                3.1e-05                 1.3e-05
+                ATGAAAAGCAGTGATTCACAG   AGGAGAGAGCTCACACCTCTCCC
+                +++++++ ++ + ++ ++ ++   ++ +++ + ++ +++++++++
+1726 CTAATTGTTTGATAAGAAAAAATAACACAAAGCAGAGAAAATAAAAGACACCTCACTATTCCCAATGTGGTTATT
+
+        [3]
+        4.3e-05
+        AGTGCCCTGGGCCTTTAAGAG
+        ++++   ++ +  +++++
+1951 CTGAGTATGGTGAGGTATTAACTAACACTAAGTTTCACAACGGGCAGCTT
+
+
+NM_003215
+  ENSG00000135605
+  LENGTH = 2000  COMBINED P-VALUE = 1.11e-04  E-VALUE =      3.9
+  DIAGRAM: 97_[8]_88_[3]_758_[4]_369_[2]_602
+
+
+                           [8]
+                           9.6e-06
+                           AACTTCAAAATGTCTCTTTGA
+                            +++++++++++ ++   ++
+76   ACAGTCTCATTCTAAAAACAAGTACTTCATACTGATTGCATTTCTACACACACATATGCAGATTTCTGGACACTC
+
+                                                             [3]
+                                                             1.3e-06
+                                                             AGTGCCCTGGGCCTTTAAG
+                                                             +  +++ ++++++++++++
+151  TTTCTGTTTTTCTTTCATTTATGTTCTCAATGCTGAACCTTTATTAGAATTACATCATCACTGTGGTTCTTTAAA
+
+     
+     
+     AG
+     
+226  TAAGAAACAAAACTCTGTCACAACATCTGAGGAAATTATCTTTTAATGTCCCCAAGCTCTATGCCTTGGAAAAAA
+
+               [4]
+               3.5e-05
+               AGGAGAGAGCTCACACCTCTCCC
+                +++++++  ++ ++  + +++
+976  TTCATCCCACTGGAAAGCATAACCATGTGTCAACTAATTAACAGAGCAACTTTAATGAAGACACAGCATATTCTA
+
+                                [2]
+                                8.8e-05
+                                ATGAAAAGCAGTGATTCACAG
+                                  +++ ++++  + ++ ++++
+1351 TGGGATGGGAGGAGGGAAGGTCAGGAAGGAAGGAGAAAAGGCTGACAGTGATAGTAAGGTAGAGTGTGCCAGGAC
+
+
+NM_172893
+  ENSMUSG00000038507
+  LENGTH = 2000  COMBINED P-VALUE = 1.14e-04  E-VALUE =        4
+  DIAGRAM: 44_[3]_340_[2]_74_[3]_791_[3]_133_[3]_130_[8]_362
+
+                                                 [3]
+                                                 4.0e-07
+                                                 AGTGCCCTGGGCCTTTAAGAG
+                                                  +++++ +++++ ++++++++
+1    ATGTATATAGACACTCAGCCTCCAAAGTAGCTCCAAAGCCTCATTATACTTTGGTTTTTTAAAAGGACAGAGAAA
+
+                                   [2]
+                                   9.7e-05
+                                   ATGAAAAGCAGTGATTCACAG
+                                     +++++    +++++++ ++
+376  CAAAGAACAGAGGGGGTTGTAAGTGCAAGAGGGGAAATTGAGGATTCAAACTGCCATCAAGGGACAGGAGTCCAG
+
+                                                       [3]
+                                                       7.3e-06
+                                                       AGTGCCCTGGGCCTTTAAGAG
+                                                        +++++  ++++++ ++++ +
+451  TGATTCTAATGCAGTCCTGTCCCTATTCACAACAGACAAGGCAAGGCAGGTATGCCTGGGGTCTATAAATGAAGA
+
+                                          [3]
+                                          1.5e-05
+                                          AGTGCCCTGGGCCTTTAAGAG
+                                          ++ +++++ +++ + ++++ +
+1276 ATTGGGTCCAAAACAGGAATCCCTCATCAGTGAGATTAACACCCTTGTTTAATAAAGGGTCTGGGATAGTCTCCT
+
+                                              [3]
+                                              6.1e-05
+                                              AGTGCCCTGGGCCTTTAAGAG
+                                              ++++++  + ++++ + ++++
+1426 CTTGATTTTGAACTTCCTAGTCTCCAAAACCTTGAACAATAAATACCTAGTGTCCATTAAAGCCACTCCGTTTAT
+
+                                               [8]
+                                               1.4e-06
+                                               AACTTCAAAATGTCTCTTTGA
+                                                ++++++ + +++++++++
+1576 TCTTAAAAACAGACATCAAAAAAATAAACTCCAGTCTTCATCTACTTCAGATTGTCTTTTTATTTCCATACTCTC
+
+
+NM_133867
+  ENSMUSG00000040600
+  LENGTH = 2000  COMBINED P-VALUE = 1.16e-04  E-VALUE =      4.1
+  DIAGRAM: 769_[3]_282_[2]_761_[4]_123
+
+
+                        [3]
+                        4.0e-07
+                        AGTGCCCTGGGCCTTTAAGAG
+                        ++++  ++ +++++++++++
+751  AAACCCAGCCTGAGAATGGAATATGATTGGCCTTTAAGAATAAATAAACAAATTAAGGCTATTCAAACCTGAAAG
+
+                           [2]
+                           4.4e-06
+                           ATGAAAAGCAGTGATTCACAG
+                             ++++++++ ++++ ++  +
+1051 ACATCATCTCTCACTTGGTCCTGAGGAAGCCAAGGACACATTCCTGACTGCAGGCTATAGTTATATGATCTGTGG
+
+                                                           [4]
+                                                           1.3e-05
+                                                           AGGAGAGAGCTCACACCTCTC
+                                                            + +++++ ++  +++++ ++
+1801 GCCACACCTGAGGTCAGAGGCCAACAAGGAAGAAGCTGGAGAGCAGGGGTCTCGGGTAAAGCTCTGGCACCTGTC
+
+     
+     
+     CC
+     ++
+1876 AGCACAGACACTTCCTCAGGTCCACTACCGTGCTGTGCGGTTGGTGGCCTGAAAGGCCTCTCCACCCTCTAGAGT
+
+
+NM_027877
+  ENSMUSG00000024906
+  LENGTH = 2000  COMBINED P-VALUE = 1.17e-04  E-VALUE =      4.1
+  DIAGRAM: 114_[8]_370_[2]_57_[8]_1295_[4]_78
+
+
+                                            [8]
+                                            9.1e-05
+                                            AACTTCAAAATGTCTCTTTGA
+                                            +++    ++ ++++++++++
+76   AAAAATGACCTGAATATAATTCTCTACTTTCTTTATCCCAGAAATTAAGTGGTTTTTTGTTCTTTATGGTTCCTG
+
+                                                            [2]
+                                                            2.4e-06
+                                                            ATGAAAAGCAGTGATTCACA
+                                                            + ++ ++++++++ + ++++
+451  CAGAGGCAGGCGGATTTCTGAGTGCGAGGCAAGCCTGGTCTACAAAGTGAGTTCCAGGACAGCCAGGGCTACACA
+
+                                                               [8]
+                                                               7.9e-07
+     G                                                         AACTTCAAAATGTCTCT
+     +                                                         ++++++ + ++++++++
+526  GAGAAACCCTGTCTTGAAAAATCAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAACTTCTTTATGTTTCT
+
+     
+     
+     TTGA
+     ++++
+601  TTTACGATACAGGAAAAGGAATACTTCAATTTAATTCCAGAGTTGATAGGTTTATGATCATTTTAGCACAAAGAC
+
+                             [4]
+                             4.9e-05
+                             AGGAGAGAGCTCACACCTCTCCC
+                             ++ +++ + ++++++    ++ +
+1876 GCGCTGTCCTCCTTTTTAACAGTCAGTACACAAAAAAAAAGAGTCTGCTCTCGAATTTTTGGGTGAGAGGAAAGG
+
+
+NM_011937
+  ENSMUSG00000024437
+  LENGTH = 2000  COMBINED P-VALUE = 1.19e-04  E-VALUE =      4.2
+  DIAGRAM: 418_[4]_623_[4]_913
+
+
+                                                [4]
+                                                8.3e-05
+                                                AGGAGAGAGCTCACACCTCTCCC
+                                                ++  ++++++++ + +++ +++
+376  CATGATGGGCACTATGGTGACATGCAGCCTGGCACTGGGCAGTAGTTGAGAGCTACAGCCTGACCTGCNNNNNNN
+
+                   [4]
+                   1.1e-09
+                   AGGAGAGAGCTCACACCTCTCCC
+                   ++++++++++++ ++++++++++
+1051 AAGGTAGGTGAGAAAGGACAGAGATACAACTTCACAGTGTACCCCAACATATGGGGCCATTCTTAATCAATGGGA
+
+
+NM_024993
+  ENSG00000176204
+  LENGTH = 2000  COMBINED P-VALUE = 1.19e-04  E-VALUE =      4.2
+  DIAGRAM: 98_[3]_552_[8]_192_[3]_111_[4]_961
+
+
+                            [3]
+                            6.4e-06
+                            AGTGCCCTGGGCCTTTAAGAG
+                             + + +++++++++++++++
+76   AAAAGTCTTAGAACCCCAGTTCTCAAATCCTGGTTCTTTAAGAATGTTTCTTTCTTCTTGGTGATTCTTAAACAT
+
+                                                                            [8]
+                                                                            3.1e
+                                                                            AACT
+                                                                              ++
+601  ATATGCTATCAAAGAGCAATTATATTTTCCAACTGTTAACTAGCATTAGAAAGGGCAACATCTTTAATTACTCAT
+
+     
+     -06
+     TCAAAATGTCTCTTTGA
+     + +++++++++++++ +
+676  TAAAACTGTCTTTTGAAAGAGTGATCATCTAAAATCAGATACTAAAACATTATATTGCCAGAATTTAACCTCTCT
+
+                                                                [3]
+                                                                5.3e-05
+                                                                AGTGCCCTGGGCCTTT
+                                                                + ++ ++++ ++++ +
+826  CAATTTACATCCAGCATTACTTATTGACAAGTTTGAAATTTGTGACTATTTGTCTGCTCACTAACATGATCCTGT
+
+     
+     
+     AAGAG
+     +++ +
+901  AAGTGATCTTTTAAATTAAATAATCAGAAACCGGGTCCAAATTTATTCTTATAAAAACAAAAATAATATAACATG
+
+                                              [4]
+                                              1.1e-05
+                                              AGGAGAGAGCTCACACCTCTCCC
+                                                  ++++ ++++++++++++ +
+976  GAAGGTACACCTTTATTTGAGGCTGTTACAGAAAGGGAGGATACCCAGAACTCAAACCTCTCTCCTTTTATTTAT
+
+
+NM_002578
+  ENSG00000077264
+  LENGTH = 2000  COMBINED P-VALUE = 1.20e-04  E-VALUE =      4.3
+  DIAGRAM: 434_[2]_359_[2]_704_[3]_440
+
+
+                                                                [2]
+                                                                7.8e-05
+                                                                ATGAAAAGCAGTGATT
+                                                                  ++++++++ ++++
+376  CATGATGTAACTTAGAGTGCATTACTGAGAGTAATAAACATACTATAACCACTAAAGGGCCGGAAAGCATTGATA
+
+     
+     
+     CACAG
+      + +
+451  GATAATTTTTCAGCACAACAGTTAATGAGTAGACCCCTGTTTTTGTTTTTATTGTTCTTAGGTTTGTTCATAGTA
+
+                                                                     [2]
+                                                                     2.5e-05
+                                                                     ATGAAAAGCAG
+                                                                     ++++++++++
+751  TGGAGACAGAAACATTCCCTTGGGGAGTTCATGCTTTTCCAGTTTACTTTATCCTCCCAGGGGAATGAAAACCAA
+
+     
+     
+     TGATTCACAG
+       ++ + +++
+826  ATATACTCAGATGTAGACACAAGATACTAAAGAATAATGAACTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                                            [3]
+                                            1.1e-08
+                                            AGTGCCCTGGGCCTTTAAGAG
+                                            + ++ +++++++++++++++
+1501 ACACCAAGGACATCTCGATGTTAAAGCAGCTAACAAGTCATTGTTATGGGCCATTAAAAATCAACAGCCAAACCT
+
+
+NM_021763
+  ENSRNOG00000010533
+  LENGTH = 2000  COMBINED P-VALUE = 1.21e-04  E-VALUE =      4.3
+  DIAGRAM: 788_[2]_130_[2]_361_[2]_173_[4]_462
+
+
+                                           [2]
+                                           2.5e-05
+                                           ATGAAAAGCAGTGATTCACAG
+                                           + +++++ ++ +++++ + ++
+751  GAGACTGCATGTGTTCAGCGCCTGAGAGATTCAGACTGAGAAGAAACAATGACTTAGAGGATGGCAGAGTAAGAG
+
+                                            [2]
+                                            5.9e-08
+                                            ATGAAAAGCAGTGATTCACAG
+                                            +++++++++  ++++++++++
+901  TTGAGGCCAGGGCAAGTGGATACTGCAGCAGCCACTTTAATAAGAAGACAGGATTCACACAGAGGCACACTCGGA
+
+                                                   [2]
+                                                   1.5e-05
+                                                   ATGAAAAGCAGTGATTCACAG
+                                                   ++++++ ++++++ + ++++
+1276 AAGATTACATCACTGAGGAACCACAGTAGACACGCCAACCACATACATAAGACCAAGTGCTGCACAAGTAACCCA
+
+                    [4]
+                    8.0e-06
+                    AGGAGAGAGCTCACACCTCTCCC
+                    ++++++ + ++++++ + +++++
+1501 AGGTGTGGTTTTATGAGGACAAAAAAAAAATCCCTCCGTTACAGAGGAATTGCGTCATCTGTAGGAGTTTCTTCA
+
+
+NM_016957
+  ENSMUSG00000051601
+  LENGTH = 2000  COMBINED P-VALUE = 1.23e-04  E-VALUE =      4.4
+  DIAGRAM: 421_[8]_591_[3]_430_[8]_473_[8]_1
+
+
+                                                   [8]
+                                                   2.4e-06
+                                                   AACTTCAAAATGTCTCTTTGA
+                                                   ++++++++ ++++ ++ +++
+376  TATCTCACTTAGGGTGATATTCTCAAGATCCATCCATTTGCCTGCAAAATTCATGATGTATTGTTGTTTTGTTTT
+
+                                                               [3]
+                                                               3.1e-08
+                                                               AGTGCCCTGGGCCTTTA
+                                                               ++++++ ++  ++++++
+976  TGACCACAGTGTAAGACCTATCTCAGAGTTGTTTTGATTTGCTTTTCCCTGATGACTAAGTACTTTGAATCTTTA
+
+     
+     
+     AGAG
+     ++ +
+1051 AGTGCTTCTTGGCCATTCAAGATTCCTATCTTGTGAATTCTCTGTTTAGCTCTCTGCCCCATTTTTAAAATTGGG
+
+                                                                [8]
+                                                                3.8e-05
+                                                                AACTTCAAAATGTCTC
+                                                                 ++++ + ++++ +++
+1426 ATTTATTAATTTGAGACCTCTCAAATTTTTGGATGTAGACACTTAACCATTAATCTTGCCACTTAAGACTGCCTT
+
+     
+     
+     TTTGA
+       +++
+1501 CATTATGTCCCATAGATTTTGGTATGTTCTGGTTTTATTTTCATTCATTTCTAGGACTTTTAACATTTCATTCTT
+
+                                 [8]
+                                 1.3e-05
+                                 AACTTCAAAATGTCTCTTTGA
+                                  ++++ +   + ++++++++
+1951 TATGGCTGATTTTGGAGAAAAATGTGTGTGCTTTAGTGTTTTTTTTTTTT
+
+
+NM_172499
+  ENSMUSG00000041945
+  LENGTH = 2000  COMBINED P-VALUE = 1.24e-04  E-VALUE =      4.4
+  DIAGRAM: 686_[3]_176_[3]_34_[8]_806_[4]_212
+
+
+                [3]
+                3.3e-05
+                AGTGCCCTGGGCCTTTAAGAG
+                ++++++++++++ +   ++
+676  TTAAAAAATAAAGTGCTATGGTTTTAAGAATACAAACATGGCTTTGATAAGCATTTGAAAGAAAAGTCCACTGTC
+
+                                                               [3]
+                                                               7.7e-08
+                                                               AGTGCCCTGGGCCTTTA
+                                                                +++++ ++++++++++
+826  CTCTCTGAGGCTCATCTCCGTGACATTCAGATGCTGTGTGAGGACATGGCACTGGTGCCATGCTTTGGTCCTTTA
+
+                                           [8]
+                                           8.6e-06
+     AGAG                                  AACTTCAAAATGTCTCTTTGA
+     ++++                                  +++++ +++ ++  ++  ++
+901  AGAGTTCTTAGCGCCTGTGTTAAATTCACAGTGGAAACAACTTAAAAGTGAATTGGTTTTCTCACAGCCAGAGAG
+
+                                             [4]
+                                             6.9e-05
+                                             AGGAGAGAGCTCACACCTCTCCC
+                                             +++ + ++  + ++ ++++++++
+1726 CTGCCTTTCCCTAAGAGCAAATGTTAACTCTACTGTGCCCAGGTGGGAAGAGAAGCCTCTCAGGCAAGCTTCTGA
+
+
+NM_021470
+  ENSMUSG00000029130
+  LENGTH = 2000  COMBINED P-VALUE = 1.38e-04  E-VALUE =      4.9
+  DIAGRAM: 621_[4]_380_[8]_256_[2]_244_[4]_411
+
+
+                          [4]
+                          4.2e-05
+                          AGGAGAGAGCTCACACCTCTCCC
+                          ++++++   ++  +++++ + +
+601  AAAATAAGAAGCCCATAACTCAGGACAATAAAGCAACTTGTTAAAATTCAGACATCCAAAGAGTTAAATGTAGGT
+
+                                                      [8]
+                                                      2.5e-07
+                                                      AACTTCAAAATGTCTCTTTGA
+                                                      +++++ ++++++ +++++++
+976  TTAAGTGGAGTTTAAAATTATATTGTTCCAGAGACTTATCATGTGTACCAGATTTATAATGACTTTTGGGACTTC
+
+                               [2]
+                               5.3e-06
+                               ATGAAAAGCAGTGATTCACAG
+                                +++++++++ ++ ++++ ++
+1276 AGCGCGGGGATGCAGACTTAAGAGGGGTAGGAGGCAATGTCTCAGAGTGCAGAAGGCCAGGAGCCGCAAGGACAC
+
+                                                                       [4]
+                                                                       5.5e-05
+                                                                       AGGAGAGAG
+                                                                       +++ ++ ++
+1501 AGGGCGGATGGCGACGCCACACAGCTCGCAAAACAGTCGAGGGGCCGTTATCAAGTCTTTCGACTGAGGGGACAG
+
+     
+     
+     CTCACACCTCTCCC
+     ++++++ + +++ +
+1576 CAAAAAGCGCACTCCAAAGAGCCGCACGGGTCAGCGCATCCTCGGGACGCATCACGCAGCCTGGCTGGAGGGCAG
+
+
+NM_177001
+  ENSMUSG00000047433
+  LENGTH = 2000  COMBINED P-VALUE = 1.39e-04  E-VALUE =      4.9
+  DIAGRAM: 548_[2]_1093_[3]_317
+
+
+                            [2]
+                            7.3e-08
+                            ATGAAAAGCAGTGATTCACAG
+                            + ++++++++ +++ ++++++
+526  CACAGATGAAGAAACTGAGAGCCAGAAAAAGAAATGAGTCACACAACTAGAGACAAACCCAGTCACTTAGAAACT
+
+                 [3]
+                 4.7e-06
+                 AGTGCCCTGGGCCTTTAAGAG
+                 ++ +++++++++++++ +  +
+1651 AAAAAAACTAACAACACTCTGGTCCCTTCATTGAAGCCAGGGATAACTGCAGAAACTCTCATCTTGGAACACCCA
+
+
+NM_021836
+  ENSRNOG00000003678
+  LENGTH = 2000  COMBINED P-VALUE = 1.39e-04  E-VALUE =      4.9
+  DIAGRAM: 1433_[4]_61_[8]_462
+
+
+             [4]
+             9.4e-07
+             AGGAGAGAGCTCACACCTCTCCC
+             ++++++++ +++ ++ ++  +++
+1426 TACCCGAAAGGAGAGAACTACAAACTGCCCCGGGCTTGGGGCAGCCCAATCTTCTCCCTCCATCCGCCCAGCTTG
+
+                      [8]
+                      3.6e-07
+                      AACTTCAAAATGTCTCTTTGA
+                      +++++ ++++ + +++++++
+1501 GAAGAAAGAGACCACAGAAATTAATAAAGACTTTTTTTTTACTTAAATAGATTCAATAAAAAGTCAAACACACAC
+
+
+NM_011167
+  ENSMUSG00000021345
+  LENGTH = 2000  COMBINED P-VALUE = 1.43e-04  E-VALUE =      5.1
+  DIAGRAM: 175_[2]_314_[3]_577_[4]_584_[3]_160_[3]_83
+
+
+                              [2]
+                              4.1e-05
+                              ATGAAAAGCAGTGATTCACAG
+                                +++++ ++++++  ++++
+151  AAGGGACAGCACATGCTGGTAAGGATGGGAAATAAGGGAAACACATCTCCATTAATGGTGGAAGTACAAACCTGT
+
+                                                                 [3]
+                                                                 1.5e-05
+                                                                 AGTGCCCTGGGCCTT
+                                                                 +  ++++++ ++++
+451  ATTCTAATATTAAAAACAGACATCCTGAATTTTGCAGGCAAATGGATGGAACTTGATAATATCACCCTGAGTCAG
+
+     
+     
+     TAAGAG
+     + ++ +
+526  TTAGTGCAGACACAAAAATAAATGCATTGTATGCAATCACTTTTAATTGGACATTATACATAACGTGTAGGATAA
+
+                                                               [4]
+                                                               4.8e-06
+                                                               AGGAGAGAGCTCACACC
+                                                                + +++++++ ++++++
+1051 ACAAAAGATTGAGGGAATATCCAACCAATGCTTTGCCCAACTTGAAACCTATCCTATTGGAAAAGAGCCAACACC
+
+     
+     
+     TCTCCC
+     + ++++
+1126 TGACACTATTAATGATATTCTGCTATGCTTTCAGACAGGAGCCTGAGAAACTGCATCCAGCAGATGACAGAAATA
+
+                                                                      [3]
+                                                                      2.9e-05
+                                                                      AGTGCCCTGG
+                                                                      ++++ + +
+1651 AAGGTTATAGTTGGACTTATGACACATACTGAAGAGATCATCACTAACTGATATTTGCATCCTAAAATAATTTTA
+
+     
+     
+     GCCTTTAAGAG
+     ++ +++++++
+1726 TTTTTTAAAAATCAAGTTTTGGGGTATCTCAGACATGAATACAGTGTATTTTTATCATCTCTAGTCTTTGCCTCC
+
+                          [3]
+                          5.4e-07
+                          AGTGCCCTGGGCCTTTAAGAG
+                          ++++ +++++   ++++++++
+1876 TGGTTTCCAAAGGACAGCCTGAATGTTATGGAGTATTAAAAGTTTTATAATTCTGATTAGGATACATGTATCAGC
+
+
+NM_020610
+  ENSMUSG00000031019
+  LENGTH = 2000  COMBINED P-VALUE = 1.48e-04  E-VALUE =      5.2
+  DIAGRAM: 1245_[4]_285_[2]_29_[3]_376
+
+
+                                                  [4]
+                                                  9.6e-05
+                                                  AGGAGAGAGCTCACACCTCTCCC
+                                                  +++ ++ + ++++ ++++  +++
+1201 TGGATTGATTAATAAAAAGCAAATTCTTGGAAGAAGCAGAGAAATAGGCCACAAATAAGACCTGGCAGTGTCAAA
+
+                                                          [2]
+                                                          7.3e-07
+                                                          ATGAAAAGCAGTGATTCACAG
+                                                          + ++++++++ + +++ + ++
+1501 CACATGCGCACACGCTTCAGGCTGCAATTATGCTTCTGAAGTGCTGAAATGAGAAAGAAACAAATAATTGATAGC
+
+                                 [3]
+                                 1.2e-05
+                                 AGTGCCCTGGGCCTTTAAGAG
+                                 + ++ + +++++ ++ +++ +
+1576 AAATTCAAAGGAGTTAATCTGAAGGCAAATTAACTTGGGTTTTAAAATGTCTCATGTATACATCTATAATCCTAG
+
+
+NM_026838
+  ENSMUSG00000031253
+  LENGTH = 2000  COMBINED P-VALUE = 1.49e-04  E-VALUE =      5.3
+  DIAGRAM: 8_[2]_55_[3]_1012_[3]_538_[2]_45_[2]_106_[8]_110
+
+             [2]
+             2.5e-06
+             ATGAAAAGCAGTGATTCACAG
+             + ++ ++++++++ + +++++
+1    TGAGTTCCAGGACAGCAAGGGCTACACAGAGAAACCGTGCCTTGGAAAAAACAAACAAACAAACAAGCAGCAACA
+
+              [3]
+              1.0e-05
+              AGTGCCCTGGGCCTTTAAGAG
+              ++ ++++++ ++++++ + +
+76   ACAACAATAAAAACCCTGATTCTTTCACAATTGTATAGAGATCTTAAAATTTTTCCTGTTACACACATATATTAT
+
+                                                                        [3]
+                                                                        4.1e-05
+                                                                        AGTGCCCT
+                                                                        + ++ +
+1051 TGAATAGCCCTCCATCCAAGACAGCTGAAAAACAGCTTAGCAATAAATATCACCTTGCCATGGATAGATTATTTA
+
+     
+     
+     GGGCCTTTAAGAG
+     + ++ +++ ++++
+1126 GATTTATTTAAAGAACCTTTCAGAAAAGCCTCCCTAGGCATGCTTTCCTTCCATTCATAGTATGGGAAATTAATC
+
+                               [2]
+                               7.2e-06
+                               ATGAAAAGCAGTGATTCACAG
+                               +++++++++++++ + + +++
+1651 TTTCAATCCCGATGTCATGCTGAGTCATAAAAAGCAGTGTTCCTCAGAAGTCCTCTCTGGAGGTCCTATGCTAGT
+
+                      [2]
+                      5.9e-05
+                      ATGAAAAGCAGTGATTCACAG
+                       + ++++++++++++   +++
+1726 CATCACTCCACCCAGAGCTTGAAACCAGTGACAGTCACACTTCCCCTCTTCTGCAGCAGACAGCACTAGCTCCTC
+
+                                                                          [8]
+                                                                          5.2e-0
+                                                                          AACTTC
+                                                                          +++++
+1801 TAATCCTCTTGCTTCCCCCTCCCCCAACCATTTCTTGGGGAATAACAAATATAGCTTTGGGGATAATATAGCTTT
+
+     
+     6
+     AAAATGTCTCTTTGA
+     ++ + + +++++++
+1876 AAGACGACTTTTGGCAAATGTAAATGTCCTAACATCTGGGCAGTGTTACCAGAATCCCGGAGGCCCTGACAGACC
+
+
+NM_153270
+  ENSG00000185915
+  LENGTH = 2000  COMBINED P-VALUE = 1.50e-04  E-VALUE =      5.3
+  DIAGRAM: 38_[8]_66_[4]_175_[3]_1026_[8]_609
+
+                                           [8]
+                                           4.2e-07
+                                           AACTTCAAAATGTCTCTTTGA
+                                           +++++ ++++++++ + ++++
+1    TCCTTTACTATTCTATATCCTAAAATCTACTTCTAATCAGCTTTATACTGTTGCCTGTACAGCTCAGTGAATGTA
+
+                                                       [4]
+                                                       4.6e-06
+                                                       AGGAGAGAGCTCACACCTCTCCC
+                                                       ++++++ ++ +++++++ +++++
+76   CTTTCATCTTTAAGAGTTCAGATATATGCCAGTGAATATTTTTGCTGTAGAGGAGAAAGTAAAAACTCCACAGCG
+
+                            [3]
+                            2.1e-05
+                            AGTGCCCTGGGCCTTTAAGAG
+                            ++++ + +  ++ +++++++
+301  GTATGGAATTTTGGGTCCAGTGTAATATTTTTATCATTTAAAAAGAACTCTATTTGTAAAAACATTTATTTACTG
+
+                         [8]
+                         7.7e-05
+                         AACTTCAAAATGTCTCTTTGA
+                          ++++  + ++ ++++++++
+1351 CTTTCTAGATTAAAAAAAAGTGCTTAGAGATTTTTTTTTTCCTTCTCAGTTCCATATTTATATCAAACCATCAGA
+
+
+NM_013255
+  ENSG00000128585
+  LENGTH = 2000  COMBINED P-VALUE = 1.51e-04  E-VALUE =      5.3
+  DIAGRAM: 141_[3]_111_[8]_1249_[4]_122_[8]_81_[4]_187
+
+
+                                                                       [3]
+                                                                       1.0e-05
+                                                                       AGTGCCCTG
+                                                                       + ++ ++++
+76   GCCTCTGTTAAACAGTTTAGAAGGGTATTGAAAGTCACCAAAAGGATTTAAGCTTAAGAAGAACTAATTAACATG
+
+     
+     
+     GGCCTTTAAGAG
+      ++ +++++++
+151  TGTTTTTAAAAAAGATTGCTCAGGAACTATGTCTGGAAAAGAACAAGGACTTGACTTACATTAAGGTAAGAAGTG
+
+                                                     [8]
+                                                     2.0e-05
+                                                     AACTTCAAAATGTCTCTTTGA
+                                                       +++ +++ +++++ ++++
+226  GAATTATAAAATCTGGGGCCCAAAGGAATTGGAGTGAGTTAATTCCTCCCCTTTAAAGTGTCTATTTTCTTCAAA
+
+                                                [4]
+                                                7.3e-05
+                                                AGGAGAGAGCTCACACCTCTCCC
+                                                ++++++++ ++  + ++ ++ ++
+1501 TGGAAGGCGGGGACAAGACATTTAAAACCAGGACCACTCCAGGAGGACAGCACATTAGCTACTGCCCTCCAGCTC
+
+                                           [8]
+                                           2.1e-05
+                                           AACTTCAAAATGTCTCTTTGA
+                                           ++++ +++  +++++++ ++
+1651 TTCCAACTTGGTCTTCTTATTTTCCCTTCCTGCTCCATAACTACAACGTGTTTCTGTTCGTAATCATCTCGGCTT
+
+                                                                      [4]
+                                                                      3.1e-07
+                                                                      AGGAGAGAGC
+                                                                       +++++++++
+1726 TTCCTTACCTCTGCTTGTAAGAAAAGTCCTTTGCGCCATTCCCCTCCAAGCCCCAGGGAAGGGCCTGGAAAGCGC
+
+     
+     
+     TCACACCTCTCCC
+     + ++++++++ ++
+1801 TTACACCTCAGACCCAGTGTCGGTCAGCCGTCAAGGAGCGAGCGACGTGGGAAACCCTGAGATAGGACATTGGCC
+
+
+NM_031817
+  ENSRNOG00000015326
+  LENGTH = 2000  COMBINED P-VALUE = 1.52e-04  E-VALUE =      5.4
+  DIAGRAM: 72_[8]_1324_[8]_97_[2]_444
+
+                                                                             [8]
+                                                                             9.0
+                                                                             AAC
+                                                                             +++
+1    TAATACCAATTTTTTTCTTTTTTTTCAGTTGCCAGATGTATACTTTGGGTTTTTGTTAAGGATTATTTTTTAAAA
+
+     
+     e-09
+     TTCAAAATGTCTCTTTGA
+     ++ +++ +++ +++++++
+76   TTAAAAGTGTATTTTGTAATTTAGAAGGAATTTAACAATTTTTAGACTTCAAAATTGAGAATTTTCACTTTTTAG
+
+                                                                        [8]
+                                                                        7.9e-05
+                                                                        AACTTCAA
+                                                                        +++++ +
+1351 CATCTGCTAATTGTACCTTGTCTGAAACTTTACTGTGTGTTTTTGGATTCCTTGTACTGGCATACACAGATTTAC
+
+     
+     
+     AATGTCTCTTTGA
+     + + ++++ +++
+1426 ATTTGTTCCTTTTTTTAATCACGAGTCCTGTGAAGCATAAGTAAAAGAATTCACACAACAGTTTGAATATTCACC
+
+                                        [2]
+                                        3.1e-05
+                                        ATGAAAAGCAGTGATTCACAG
+                                        + +++ + +++++++ ++ +
+1501 TTAAAGCTTACTTAAAATACTTTTAAAATGAACTGAAGAACATAAGTGATACAGATGGGAGGAGGCTACAGCTCA
+
+
+NM_172801
+  ENSMUSG00000050201
+  LENGTH = 2000  COMBINED P-VALUE = 1.56e-04  E-VALUE =      5.5
+  DIAGRAM: 499_[3]_72_[2]_367_[8]_486_[2]_492
+
+
+                                                      [3]
+                                                      4.6e-05
+                                                      AGTGCCCTGGGCCTTTAAGAG
+                                                       +++ ++++++++++ ++++
+451  AGTTGGAGATGATAATGTTTAAGTAAACATGCTCAAGGCAGCCCTTCAAGGTGGCATGGGTCATGAAAAAACAAC
+
+                                                                        [2]
+                                                                        1.0e-05
+                                                                        ATGAAAAG
+                                                                        + ++ +++
+526  ACTCAGGGAGTAGGGACAGGCAGGTCTCTGTGAGTTCAAGACCAGTCTGGTCTACAGTATGAGTTCCAGGATAGC
+
+     
+     
+     CAGTGATTCACAG
+     ++ ++ + ++ ++
+601  CAAGGCTACATAGTGAGATTCTGTCTAGTAAGTAGATGAGAATAAAATAATGTACCATAATAGAGAGAGAAAAGA
+
+          [8]
+          9.3e-07
+          AACTTCAAAATGTCTCTTTGA
+           ++++ ++++++   + +++
+976  NNNNGTACTTTAAAATGAAATCTTTCAAAATGAAAGAAAGAAAACTGCATTTCCTGTCTTTCCCAATAGCCAGCC
+
+                                                                   [2]
+                                                                   2.4e-06
+                                                                   ATGAAAAGCAGTG
+                                                                   + ++ ++++++++
+1426 CTCGGGCGACAGGGGTGGGCAGATCTGTGTTCGAGGCCAGCCTGGCCTACCAAGCAAGTTCTAGGACAGCCAGGG
+
+     
+     
+     ATTCACAG
+      + +++++
+1501 CTACACAGAGAAACCCTGTCTCAAAAAAACAATAACAAAAGTCAGATGATGGAGAAAATGTCCTCTAAGTAAGTC
+
+
+NM_033072
+  ENSMUSG00000025409
+  LENGTH = 2000  COMBINED P-VALUE = 1.58e-04  E-VALUE =      5.6
+  DIAGRAM: 460_[2]_458_[3]_190_[8]_399_[3]_409
+
+
+               [2]
+               2.4e-06
+               ATGAAAAGCAGTGATTCACAG
+               + ++ ++++++++ + +++++
+451  ACAGAGTTCCAGGACAGCCAGGGCTACACAGATAAAGTCTCGAAAAAAGAGAGAGAGTTTCTTCTGGGTGTTCCT
+
+                                            [3]
+                                            8.9e-05
+                                            AGTGCCCTGGGCCTTTAAGAG
+                                            + ++ +   + ++++++++ +
+901  CCTCTAACGTCTGTCTCTGGCCGAGTCTAACACTGTACAACTGTCTCTGACCATTAAATGCTGTTGTACCGTGGA
+
+                              [8]
+                              6.3e-06
+                              AACTTCAAAATGTCTCTTTGA
+                              +++++ + + ++++++   ++
+1126 GAGTTTCAAAATATTCTCATCTGTTAAATTAAGAGTGTCTCCCATAGAAAAGCAGTGGAGGCCCCACAGGGCAAG
+
+                                                                           [3]
+                                                                           7.3e-
+                                                                           AGTGC
+                                                                           + +++
+1501 TCTGTGCTTTGAGCAACCTGAGCCAGAGGCAGAGGGGTGCTAGTGGGAATCGGGGAGATGCATGATGCTTATTGC
+
+     
+     06
+     CCTGGGCCTTTAAGAG
+     ++++  +++++ +  +
+1576 TCTGTACCTTTCACTGGGAAGGAGGGCAGCAGCCAACAGTAGCTCACAGGTTTGTAAACTGAGCCTGCTAGCTTT
+
+
+NM_177229
+  ENSMUSG00000047113
+  LENGTH = 2000  COMBINED P-VALUE = 1.60e-04  E-VALUE =      5.7
+  DIAGRAM: 412_[2]_472_[8]_74_[4]_977
+
+
+                                          [2]
+                                          2.5e-06
+                                          ATGAAAAGCAGTGATTCACAG
+                                          + ++ ++++++++ + +++++
+376  TGAGTTCGAGGCCAGCCTGGTCTACAAAGTGAGTTCCAGAACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAA
+
+          [8]
+          8.9e-07
+          AACTTCAAAATGTCTCTTTGA
+           ++++ +++++++ ++  ++
+901  ACGTCTGCTTTAAAATGTATTCAGGTAAGAAAATTTCTTAAGCCTCTTATCAAAGATATTAGCAATTAATACCTT
+
+                              [4]
+                              3.0e-05
+                              AGGAGAGAGCTCACACCTCTCCC
+                                 ++++++++++++ +  ++ +
+976  TATGGTAACTAAATTTCAGACTAACTTTACAGAGAACAAATCAGTCTCAGCATTGTCCTCTGAGGATTCTGGCAG
+
+
+NM_026776
+  ENSMUSG00000035120
+  LENGTH = 2000  COMBINED P-VALUE = 1.61e-04  E-VALUE =      5.7
+  DIAGRAM: 519_[4]_464_[2]_158_[3]_794
+
+
+                                                                          [4]
+                                                                          4.5e-0
+                                                                          AGGAGA
+                                                                           +++++
+451  GTGTGTCTGAAGACAACTACAGTGTACTCACATATCTAAAATAGATATATCTTTAAAAAAAACAGACCATGGACA
+
+     
+     5
+     GAGCTCACACCTCTCCC
+      +++++++++ + + +
+526  CAGCACACACATGTAATCTCAGTTCTCAGAAGGTTGAGGCAGTGGGATTAAAGGCTGAGCTCTGGGATAAAGGTG
+
+                                    [2]
+                                    4.5e-06
+                                    ATGAAAAGCAGTGATTCACAG
+                                      ++++  +++++++++ +++
+976  TTTGGAGCTATGAATTAAACATTGTGACAGAGAGAGATTAAGGGATTCTCACAATAATTCTTCAGGGAACAGAAA
+
+                                                                 [3]
+                                                                 2.7e-07
+                                                                 AGTGCCCTGGGCCTT
+                                                                 ++++++++  +++++
+1126 TAAAGGTGAGCATCTCCTAGCAGCCCAAGGTCAGCCTTCTTTTCAATACACCTTTTGTTTAGTGCTCTTATCCCT
+
+     
+     
+     TAAGAG
+     ++++ +
+1201 TAAATGGGTTCATATTGCATGTCAGGTCAGAAGACTCCCAAACTTGTATTTCTAGATCCAACAAGGCTCCCCTGA
+
+
+NM_027279
+  ENSMUSG00000041396
+  LENGTH = 2000  COMBINED P-VALUE = 1.63e-04  E-VALUE =      5.8
+  DIAGRAM: 60_[2]_237_[8]_1023_[4]_615
+
+                                                                 [2]
+                                                                 2.4e-06
+                                                                 ATGAAAAGCAGTGAT
+                                                                 + ++ ++++++++ +
+1    GGAGTCAGAGGCAGGCGGATTTCTGAGTTCGAGGGCAGCCTGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCT
+
+     
+     
+     TCACAG
+      +++++
+76   ACACAGAGAAACCCTGTCTTGAAAAAACAAAACAAACAAACAAACAAAAAAAGATTGATAAAAAGGTTCCTGTAA
+
+                       [8]
+                       1.1e-05
+                       AACTTCAAAATGTCTCTTTGA
+                       ++ ++++ +++++ +++++ +
+301  ATAGGGAACCTTGGAGACAAGTTCAGACTGTGTTTTTAAAATTAAGCCACATATAAAAGAGGGAGGTTTGGAGTT
+
+                 [4]
+                 3.2e-06
+                 AGGAGAGAGCTCACACCTCTCCC
+                 + + ++++ +++ ++++ ++ ++
+1351 AGATGTGGTACCACGGAAGAACTACAACTACATCCTTCAGCTCTCACGCTTCATCCAGATACTGAGCACCAGCGC
+
+
+NM_052828
+  ENSG00000137394
+  LENGTH = 2000  COMBINED P-VALUE = 1.72e-04  E-VALUE =      6.1
+  DIAGRAM: 814_[2]_215_[8]_137_[4]_737_[2]_11
+
+
+                                                                     [2]
+                                                                     6.5e-05
+                                                                     ATGAAAAGCAG
+                                                                       ++ ++++ +
+751  AAAGTGACAGCTAGCAATTTCTGCATCCCAGATGGAGTTAATGTCACCAAGAGAACTTGTACTAGGAGTAGGAGG
+
+     
+     
+     TGATTCACAG
+      ++++ ++++
+826  AGACTGACAGCCCCCAGGGTCTCTCCTCAGGAGAGAATTCAGTTATACTGAAGATGCCTTCCAGGCCCCCCTTGG
+
+     [8]
+     5.9e-06
+     AACTTCAAAATGTCTCTTTGA
+     +++++++ ++ ++ ++  +++
+1051 AGATTCAGAAGGGATCCATGATGAATGTGTCATTAGTTGGCAAGAAGAGCAGACACAGAGAGAATCAGAGATGCA
+
+             [4]
+             1.8e-05
+             AGGAGAGAGCTCACACCTCTCCC
+              ++++  +++ + +++++++ ++
+1201 CGCTTCTTTGGACTTCGAGCCCACTTCTTACCGGTAGGTCCTGGGCATACAACATACCACTNNNNNNNNNNNNNN
+
+                       [2]
+                       3.0e-07
+                       ATGAAAAGCAGTGATTCACAG
+                         ++++++++++ +++ ++++
+1951 CAAGTAAAATTACCCTAGGGGAGAAGCAGGTACTGACAGACCAACATGAG
+
+
+NM_153189
+  ENSG00000106304
+  LENGTH = 2000  COMBINED P-VALUE = 1.74e-04  E-VALUE =      6.2
+  DIAGRAM: 352_[3]_1004_[8]_602
+
+
+                                                         [3]
+                                                         5.3e-09
+                                                         AGTGCCCTGGGCCTTTAAGAG
+                                                         + +++ ++++++ ++++++++
+301  AAAATACTTGTAAATATTTTTGCTTTAGGAGAAAATATGTGCTTTCAGGGATATTACGCTGGTCTCTTAAAAGTT
+
+                                [8]
+                                4.0e-05
+                                AACTTCAAAATGTCTCTTTGA
+                                + +++ +++ +++ +++ ++
+1351 AAGTTTGTTCTGGCAAACTATTTTCTAACATTAAAATTGTATTTGTTTCCACTTTTTATTTGAGAAACAGTATGA
+
+
+NM_003117
+  ENSG00000106304
+  LENGTH = 2000  COMBINED P-VALUE = 1.74e-04  E-VALUE =      6.2
+  DIAGRAM: 352_[3]_1004_[8]_602
+
+
+                                                         [3]
+                                                         5.3e-09
+                                                         AGTGCCCTGGGCCTTTAAGAG
+                                                         + +++ ++++++ ++++++++
+301  AAAATACTTGTAAATATTTTTGCTTTAGGAGAAAATATGTGCTTTCAGGGATATTACGCTGGTCTCTTAAAAGTT
+
+                                [8]
+                                4.0e-05
+                                AACTTCAAAATGTCTCTTTGA
+                                + +++ +++ +++ +++ ++
+1351 AAGTTTGTTCTGGCAAACTATTTTCTAACATTAAAATTGTATTTGTTTCCACTTTTTATTTGAGAAACAGTATGA
+
+
+NM_004490
+  ENSG00000115290
+  LENGTH = 2000  COMBINED P-VALUE = 1.74e-04  E-VALUE =      6.2
+  DIAGRAM: 677_[3]_153_[3]_257_[2]_576_[8]_253
+
+
+       [3]
+       2.3e-05
+       AGTGCCCTGGGCCTTTAAGAG
+       +++++++ ++++++++  +++
+676  GAAATACTCAGGGCCATTCTGAGAGGTTGAAAAAGGCAAGTGTGTTTACTCATGCTGTTCTACTTAGAAGTCCTG
+
+                               [3]
+                               2.4e-05
+                               AGTGCCCTGGGCCTTTAAGAG
+                               ++++   +   +++++++++
+826  TAGCAATTAGGCTTGCATTAGCTAAAAGTAAGTTAAATCTTTAAAAATTCAATAAATAATTCTTTTTCAGTTATT
+
+         [2]
+         1.3e-06
+         ATGAAAAGCAGTGATTCACAG
+         ++++++++++  ++ ++++++
+1126 AAAAATGGGAACAAAAGAATCACAGAATCTAAAAACTATTCCCAGATGGTCTCGAAGAAGTGATAAGGAAGGTTA
+
+      [8]
+      3.6e-06
+      AACTTCAAAATGTCTCTTTGA
+       ++++++++++ ++++++ +
+1726 ATAATTCAAAATAGTTTTTAGTTTCATTAACAGGTTGTGGATAGTTTCTTTTAATTGATAAGTTATTATCATATT
+
+
+NM_153784
+  ENSMUSG00000043782
+  LENGTH = 2000  COMBINED P-VALUE = 1.78e-04  E-VALUE =      6.3
+  DIAGRAM: 120_[8]_1007_[2]_521_[8]_134_[3]_134
+
+
+                                                  [8]
+                                                  3.0e-05
+                                                  AACTTCAAAATGTCTCTTTGA
+                                                   +++++ ++++  +++++++
+76   CTCCCAAATGCTGGGATTAAAGGTGTACGCTGCCACCACCTGGCCTACTTCCTACTTCCTTTTTTTTTTTTTTTT
+
+                            [2]
+                            5.0e-06
+                            ATGAAAAGCAGTGATTCACAG
+                            ++++++ ++ ++ ++ +++++
+1126 AATTTGAGGCCATCCTGTGACACATGAGACCCTGTCACACACACATCCCAAAAATGGCACCGCACACATTCACTG
+
+                                             [8]
+                                             2.9e-05
+                                             AACTTCAAAATGTCTCTTTGA
+                                              +++  ++++ ++++++ +++
+1651 TAGATGCTGCTTCCTTAGGGTGTTACAAAAGAAAAAAAATTACTATAAAAGGGCTTTCTTAGTGGAGGGCTTACC
+
+                                                  [3]
+                                                  4.6e-07
+                                                  AGTGCCCTGGGCCTTTAAGAG
+                                                  ++ +  ++++ ++++++++++
+1801 GTAGATCTGCACATAGTAGGTGTTCAATTAATGTTCCCAGTAAGAAGCAAGATGGATCTTTAAGAGGATGGATGT
+
+
+NM_011741
+  ENSMUSG00000037327
+  LENGTH = 2000  COMBINED P-VALUE = 1.81e-04  E-VALUE =      6.4
+  DIAGRAM: 1899_[2]_80
+
+
+                             [2]
+                             9.6e-10
+                             ATGAAAAGCAGTGATTCACAG
+                             + +++++++++++++++++++
+1876 GAGTTCAATCCCCACGTCCCACAAAGGAAAGGAAGTGACTCACACAAACTGTCTTCTCAGGACACGCAAGTGGCA
+
+
+NM_002192
+  ENSG00000122641
+  LENGTH = 2000  COMBINED P-VALUE = 1.86e-04  E-VALUE =      6.6
+  DIAGRAM: 28_[4]_183_[3]_96_[8]_1046_[4]_202_[8]_126_[8]_147_[2]_21
+
+                                 [4]
+                                 1.4e-05
+                                 AGGAGAGAGCTCACACCTCTCCC
+                                 ++++++++ +++ +++++ +
+1    ATAGTATACCAACTTAGTATATTTTTCAAGGAGAGCTAAACCACCTTTTGTAATGTTTGGTTTCTCACTGTTATC
+
+              [3]
+              4.7e-06
+              AGTGCCCTGGGCCTTTAAGAG
+              ++ +++++++ + ++ +++++
+226  TGTAAAAATAGGGCTCTGGATTTTCAAAAGCACATACATGAATAATTTATTAGCTATTCCAGGCAAGCTAAGTAC
+
+                                                        [8]
+                                                        7.0e-05
+                                                        AACTTCAAAATGTCTCTTTGA
+                                                        +++   ++  ++++++++++
+301  TAGAATAAACTAGATAAAAACTTGGCTTTAAGCATGTACTTTGATATTTATAAAACAAAGGTGTTTTTTTTTCAT
+
+                                                                         [4]
+                                                                         5.2e-05
+                                                                         AGGAGAG
+                                                                         ++  +++
+1351 NNNNNNNNNNNNNNNNNNNNNNNNNNGCTATTTTGTGTGCTGTAGCAGTTCTTTTATAGCTCACATTAAGTGCAG
+
+     
+     
+     AGCTCACACCTCTCCC
+     + ++ +++++ +++++
+1426 CTCTTAAACCCCACCCCCACCCCCAACCAAAGAAAATACTTGTTAAATAAGGATTAGACAGGTCAAACACCATTG
+
+                                                                         [8]
+                                                                         6.1e-05
+                                                                         AACTTCA
+                                                                          ++   +
+1576 ATGGACAAACATATTCCTTTCAAGTATCTCTCCTTGAAGAAAATAAAAATTAATCAGATTACTTCCAATACAAAA
+
+     
+     
+     AAATGTCTCTTTGA
+     ++ + ++++++++
+1651 AAGTCTCTCTTTTTGTTCTCTCTCAGGTAAACAGTTTCAAACCTATTAGGTTGCATAGTTCTAAGATCATAAGCA
+
+                                                                      [8]
+                                                                      4.9e-05
+                                                                      AACTTCAAAA
+                                                                      ++ ++  +++
+1726 CCTTAACGAAATGTAACTTGGTATTCTTTTTTCCTTGATCTTTCATTCTTTCCATACTTGTTCTCAATTTTTTAA
+
+     
+     
+     TGTCTCTTTGA
+     ++++ +++++
+1801 TGTTGTTTGTTTTGTTTTGTTGCAAAAGTGGTACAGAAAAAGGAAGTGTGACCACCTGCACACGATTGTTCTTTT
+
+             [2]
+             5.7e-06
+             ATGAAAAGCAGTGATTCACAG
+             + +++ + +++++ + +++++
+1951 TGATATACACAGAGATAAGTGTCACACAGACATACCTTATGACCTGGGTA
+
+
+NM_031249
+  ENSMUSG00000050614
+  LENGTH = 2000  COMBINED P-VALUE = 1.86e-04  E-VALUE =      6.6
+  DIAGRAM: 502_[8]_158_[4]_247_[2]_97_[3]_473_[8]_72_[4]_321
+
+
+                                                         [8]
+                                                         1.2e-05
+                                                         AACTTCAAAATGTCTCTTTGA
+                                                          ++++ ++  ++  + +++++
+451  GGATGTCCTGGAAGTATCCCAAGTGCTAGGATTAAAGATGAGCCGCCATGCCTAATTTATGGTGAATGTTTTATT
+
+           [4]
+           1.8e-05
+           AGGAGAGAGCTCACACCTCTCCC
+            + +++ ++++ +++++ ++ ++
+676  CAAATTTGAAGACAGCTTAAACTACATACACAGTGAATTTTTCAGGCCAACCTGGGCTGTAGAAGGGCCCTGTAT
+
+                                                        [2]
+                                                        3.6e-06
+                                                        ATGAAAAGCAGTGATTCACAG
+                                                        + ++ ++++++++ + +++++
+901  GTAGGTGGATCTCTTGAGTTCAACGCCAGCCTCATCTACAGACTAAGTTCTAGGGCAGCCAGGGCTACACAGACA
+
+                        [3]
+                        2.4e-05
+                        AGTGCCCTGGGCCTTTAAGAG
+                        + ++   + +++++++ +++
+1051 CCAAGTTAAAACAGTGAAAATTATGTTTGGTCATTTAAAATAAAATGGCAGTATTTAAAGTACAAAACTGGCAAG
+
+                                                                    [8]
+                                                                    4.2e-05
+                                                                    AACTTCAAAATG
+                                                                     ++++  ++++
+1501 TTTAATGTTGTTAAGCTATTTTAAGGGATAAATAACAATATTTTTACCTCATAATATTGGATATAATTTTAAATA
+
+     
+     
+     TCTCTTTGA
+     +++++ +++
+1576 TTTCTATTATCCTACACTAAACGAAAGCTGTGAAAACACATTAAGCCTTAAATCAGCTTCCGGCTTGCGGTTTCC
+
+           [4]
+           4.4e-05
+           AGGAGAGAGCTCACACCTCTCCC
+            ++++ ++ ++ +++ +   +++
+1651 CGCCGGTGGAACGCAATGAAAACAGCCCCGAGTGTGTGGAAACACAGACGCTGCCGGTTACCCCATGGAAGCCCG
+
+
+NM_016704
+  ENSMUSG00000022181
+  LENGTH = 2000  COMBINED P-VALUE = 1.87e-04  E-VALUE =      6.6
+  DIAGRAM: 587_[8]_93_[3]_219_[2]_432_[4]_583
+
+
+                                                                   [8]
+                                                                   8.2e-05
+                                                                   AACTTCAAAATGT
+                                                                   +++++++ + +++
+526  AGATTCTGCTGAAAGAATACCCTGATATAGCTGTCTCTTGTGAGGCTATGCCAGTTCCTGGCAAATTCAGAGTGG
+
+     
+     
+     CTCTTTGA
+      +  +
+601  ATGCTCACAGTCGTCTATAGGATGGAACACAGGGTCCCCAATGGAGGAGCTAGAGAAAGCACCCAAGGAGCTGAA
+
+                               [3]
+                               1.0e-05
+                               AGTGCCCTGGGCCTTTAAGAG
+                               ++ ++++++  + + + ++
+676  GGGGTCTGCAACCCTATAGGTGAAACAACACTATGAACTAATCAGTACCCCCAGAGCTCGTGTCTCTAGCTGCAT
+
+                                              [2]
+                                              3.4e-07
+                                              ATGAAAAGCAGTGATTCACAG
+                                              + ++++++++ ++ + +++++
+901  TTGGAATAGCATTTGAAATGTAAATGAAGAAAATATCTAATAAAAAAGGAAATGGCACACAGGCTGATAGTGTCA
+
+                                                 [4]
+                                                 6.9e-05
+                                                 AGGAGAGAGCTCACACCTCTCCC
+                                                 ++++++++ ++++++    +++
+1351 GGATTTTCTTGGCAGAATCACTTGGTTTTGAATCTATTGTCACTAGGAGAGAAAAAAAATGGTTCCTAACTGAAT
+
+
+NM_146140
+  ENSMUSG00000044528
+  LENGTH = 2000  COMBINED P-VALUE = 1.88e-04  E-VALUE =      6.7
+  DIAGRAM: 443_[2]_103_[3]_254_[8]_99_[3]_23_[8]_386_[4]_98_[4]_443
+
+
+                                                                         [2]
+                                                                         1.3e-05
+                                                                         ATGAAAA
+                                                                         + + +++
+376  AACCCCCATACCAGGCTGGAGGAATCACCCCAGTAAGAGGAGGAAAAAAGGCCCCAGAACTGAACATCAGATGAG
+
+     
+     
+     GCAGTGATTCACAG
+      ++ ++++ +++ +
+451  TCAATGACACACCCACTCCCATTGTTAGGAGTCCCACAAAAACCCCAAGCCAAACCACCATAGCAAGTATGGGGA
+
+                                               [3]
+                                               9.4e-05
+                                               AGTGCCCTGGGCCTTTAAGAG
+                                                   +++++ +++++++ +++
+526  TGAGGGAATGCAGATCCATGTATTCCCTGTGATTGGCGCTCATTCTCTCTGAGTCCTTAGGAGCCCAGCTTAGTT
+
+                      [8]
+                      2.3e-05
+                      AACTTCAAAATGTCTCTTTGA
+                      ++++++++ +++ +++ + +
+826  CCGTATTTTCCTCTGGAAAATTCAAGATGCCTTCTATTGATACAATCTCCAAGCTAAATAAACAAGGTTCTATTT
+
+                                                                   [3]
+                                                                   1.3e-05
+                                                                   AGTGCCCTGGGCC
+                                                                   ++ +++ ++ +++
+901  ATACTGCGCTATGCTATGTAGCGAACTGAAGGATGACGATAAATCTCAGCACTCTAACGATAAACGCTTTGTTTC
+
+                                    [8]
+                                    4.9e-05
+     TTTAAGAG                       AACTTCAAAATGTCTCTTTGA
+     +++ +++                        ++++ ++++++ + +  +++
+976  TTTTAAAAGGGCCTGTAGCGTTATTCGTCTGAACTGCATACTTTATACTTTCACCCATCATTCCTCTCCGTAAGG
+
+                                                                    [4]
+                                                                    9.1e-05
+                                                                    AGGAGAGAGCTC
+                                                                    +  ++++++++
+1351 TTCTGAAACTGCTTCTTTGCATTGGTGTTGCCAGGTCTGACTGATTTAACATAGAGACAGCACAATACAGCGCTG
+
+     
+     
+     ACACCTCTCCC
+     + + + +++ +
+1426 AGAATACTCTGGTTCCACTCAGGATTTAAATCGTGGCTACTGATATTTATTAGACCTGTGGTGATAATAATGGCA
+
+                                       [4]
+                                       4.4e-06
+                                       AGGAGAGAGCTCACACCTCTCCC
+                                       ++++++++++++ +     ++++
+1501 GGCATGGTGTGTCACAAAGAGGTGGGTGGGGAAAAGGAGAGAGAAACAGAGAGACAGAGGCAAAGACAAAAAACA
+
+
+NM_026856
+  ENSMUSG00000049606
+  LENGTH = 2000  COMBINED P-VALUE = 1.93e-04  E-VALUE =      6.8
+  DIAGRAM: 397_[3]_368_[8]_269_[2]_170_[4]_199_[2]_457_[2]_12
+
+
+                           [3]
+                           6.5e-05
+                           AGTGCCCTGGGCCTTTAAGAG
+                           ++++  +++++++++  ++
+376  GTTTAGACATAGTACTACCCAGAGTGGGCTGGGCCCTCTAATATTAATCAATAAGACGCCCCCAGACCAATCTGA
+
+                                         [8]
+                                         4.9e-07
+                                         AACTTCAAAATGTCTCTTTGA
+                                         +++ +++ + ++++++ +++
+751  TCAACACAAGCCTATACCCAACATTATAAATACTGAAAACTCAGAGTGTTTCCTTTTTGAACCAGGAGCGAGGCA
+
+                               [2]
+                               4.0e-05
+                               ATGAAAAGCAGTGATTCACAG
+                               ++++++++++ +++   ++ +
+1051 AAGAGATGGTTCCTAAAAAGTAAATAATGGAAGGAAAGGAGATACGGAGGAAAAGAGGGGAGGAGAGGTGGGGGT
+
+                                                                        [4]
+                                                                        2.9e-05
+                                                                        AGGAGAGA
+                                                                        ++++++ +
+1201 TTAATTCTGAACTATGTGGCTATACTAATTCTTCCTGCATTCCTATACATTGATAGCTGTCATGATTAGGAAAAA
+
+     
+     
+     GCTCACACCTCTCCC
+      ++++++  +++ +
+1276 AAAAAAAAGTCATAATTAAATCTAAGAGGCTCTAAAAATTCCAAGTCTTCCAACTAGGAATCCTGTCAATTTTTT
+
+                                                                     [2]
+                                                                     5.7e-05
+                                                                     ATGAAAAGCAG
+                                                                     + +++++ +++
+1426 GCACACTGGTGTAGTCAGTCTCAGAAGGGCTTGAAATGGAGGATTCTGAGTTCAAAGCCAGCCCAGGGAAATCAG
+
+     
+     
+     TGATTCACAG
+     +++  + + +
+1501 TGAGACCCTCCCCAAACAAACAGAGCGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCCA
+
+                      [2]
+                      2.2e-05
+                      ATGAAAAGCAGTGATTCACAG
+                      + ++ +++++ ++ ++++++
+1951 TTTCCCATGAAATAAAAAAAATAACAAATGTTTCACATTAGAGGCACTGT
+
+
+NM_153572
+  ENSMUSG00000041298
+  LENGTH = 2000  COMBINED P-VALUE = 1.94e-04  E-VALUE =      6.9
+  DIAGRAM: 97_[2]_458_[4]_196_[3]_861_[8]_102_[4]_156_[8]
+
+
+                           [2]
+                           1.4e-05
+                           ATGAAAAGCAGTGATTCACAG
+                           +++++++++++ + +++ +++
+76   ATAAATAAAAGCAAGTTTGTTCATGGAAGGAAGAGTCTCTCAGTCTAATTTTAAAATTAGTCCCATTACAAATGT
+
+                                                        [4]
+                                                        9.6e-05
+                                                        AGGAGAGAGCTCACACCTCTCCC
+                                                         ++ ++ +++ +++++++ ++++
+526  AGACCTTGGTAGTGAGCTTCAGGATCAAGCTAACAGCTCAGTGGGGGAGAGGGGTAAACGACAAAACCTGTCAGT
+
+                                                  [3]
+                                                  9.7e-06
+                                                  AGTGCCCTGGGCCTTTAAGAG
+                                                   ++++++ + ++ ++++++++
+751  ATTTATAGGTTTTAGTAAGAAAAAGGGGTCAAATATCTTAAGCAACATACTAGGAGTTTTTAAAAGTGCCTCTAA
+
+                                [8]
+                                7.2e-05
+                                AACTTCAAAATGTCTCTTTGA
+                                +++++ ++++  + ++  +++
+1651 ACTCTAACAGGAAGTATTAGATTCTAGAGATTTAAAAATTATTCCTGAAGAACTTGTGATCATTCCACTCAAAGT
+
+     [4]
+     8.2e-06
+     AGGAGAGAGCTCACACCTCTCCC
+      + +++++++++ + +++++ ++
+1801 TGCACAGAGCTACCCCTTCAGCCCGGGCCACTGGCTCTCTTATTCTGTACACAAGTATGAGGTTTGGTAGCCTGG
+
+                                  [8]
+                                  1.7e-05
+                                  AACTTCAAAATGTCTCTTTGA
+                                  ++++++ + ++ ++++++++
+1951 GGCCTTCAAGTGAAGATTCCTCTGATTAAAACTTCCAGATTTTTTTTTTT
+
+
+NM_024251
+  ENSMUSG00000030051
+  LENGTH = 2000  COMBINED P-VALUE = 2.14e-04  E-VALUE =      7.6
+  DIAGRAM: 73_[2]_64_[4]_508_[8]_658_[4]_609
+
+                                                                              [2
+                                                                              2.
+                                                                              AT
+     
+1    TTTCAACATCCTTGAATTTATACTGAGTGGAAGGGTTAAAGGACCCCTTAAAGATTGAGTCAGTGCAGGATGGGA
+
+     ]
+     8e-08
+     GAAAAGCAGTGATTCACAG
+     +++++ ++ ++++++++++
+76   AGAAGTCAAGGACTCACAGGTTCACCAACCCCTCCACAATAGAAGGTGTGAGTGTTAATCTTGCTTTGTCACTTG
+
+             [4]
+             6.9e-05
+             AGGAGAGAGCTCACACCTCTCCC
+              ++ +  +++++++++++ + ++
+151  ACTGGACCTGGGGTTAGCTAAAACCTGTGCCTCTGGGCAGGGCTGTGAGGGTGTTTCCCTGAAGGACGGACTTGG
+
+                   [8]
+                   4.6e-05
+                   AACTTCAAAATGTCTCTTTGA
+                   + +++ ++ ++++++++ ++
+676  CTGATAATAGAGAAATATTTATTCTGTCTTTCTTCAGTTTAAGTAAATTTAACCCTGTGCAATTACATGGAGAGA
+
+                       [4]
+                       3.7e-05
+                       AGGAGAGAGCTCACACCTCTCCC
+                       +   ++++++++ ++++++++++
+1351 GAGGCCAGAACCCTATCCACAGCAGAGAACGCACCTCTCAGGTGACCCTCTGAGAAGACAGTGGCTTGAAGCTTC
+
+
+NM_176831
+  ENSMUSG00000009768
+  LENGTH = 2000  COMBINED P-VALUE = 2.15e-04  E-VALUE =      7.6
+  DIAGRAM: 44_[8]_733_[3]_368_[2]_99_[8]_672
+
+                                                 [8]
+                                                 7.9e-07
+                                                 AACTTCAAAATGTCTCTTTGA
+                                                 ++++++++++++ ++++ +++
+1    CTCTATGCAAAAAAACCCTCGCTTGAGAGCTGCAAAATACACTCAGATTCAAACTGCCTCTGTGATTTGTTTCTG
+
+                                                     [3]
+                                                     3.9e-05
+                                                     AGTGCCCTGGGCCTTTAAGAG
+                                                     +++++++  +++++++++++
+751  CACTTACGTGCAATGTTGCTCTCTGTTACCTGGTCAACCGTAGCCTGAAATACCAAAGTTCATTAAAACGATCCA
+
+                                                                   [2]
+                                                                   2.3e-06
+                                                                   ATGAAAAGCAGTG
+                                                                   + ++ ++++++++
+1126 TCGAGAAGCAGAGACGGGAGGCTCTCCGAGTTCCAAGCTACTCTGGTATAAAGAGGAAGTTCAGGACAGCCAGGG
+
+     
+     
+     ATTCACAG
+     ++ ++ ++
+1201 ATACAAAGAGAAACCTTGTCTCGAGACAACAAAACAAAACAACAGAAAGTCTAAGTTCTTGACTTGATACAATTT
+
+                                     [8]
+                                     7.7e-05
+                                     AACTTCAAAATGTCTCTTTGA
+                                     ++++   ++++++ +++ ++
+1276 TAAAGTTGTATGTTGAGTTCACACAAGTTTTTAAATATTTACTGTATCTGTTTTGTATGTGTGTACACATGCAGA
+
+
+NM_172599
+  ENSMUSG00000037526
+  LENGTH = 2000  COMBINED P-VALUE = 2.22e-04  E-VALUE =      7.9
+  DIAGRAM: 1166_[2]_279_[8]_391_[8]_101
+
+
+                                              [2]
+                                              2.4e-06
+                                              ATGAAAAGCAGTGATTCACAG
+                                              + ++ ++++++++ + +++++
+1126 TTTCTGAGTTCGAGGCCAGCCTGGTCTACAAAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCT
+
+                                              [8]
+                                              2.9e-05
+                                              AACTTCAAAATGTCTCTTTGA
+                                              ++ + +++++ ++ ++ ++++
+1426 TCCATATCACCTGGAAGCTACTGCTGCCTGTCGGAGAGATTAGTTACAAAAAGGATTCTTGAGAGCTTGCCGACT
+
+        [8]
+        2.7e-07
+        AACTTCAAAATGTCTCTTTGA
+          +++ +++++++ ++ ++++
+1876 ACATTCTTTAAAATGTATCCTTTAATTTCTATGAAGTACTCCAGGTTAGCTTGCACTTTTGCTGCTACCATCAAT
+
+
+NM_028539
+  ENSMUSG00000048409
+  LENGTH = 2000  COMBINED P-VALUE = 2.30e-04  E-VALUE =      8.1
+  DIAGRAM: 226_[4]_1039_[2]_60_[8]_182_[8]_407
+
+
+      [4]
+      5.2e-06
+      AGGAGAGAGCTCACACCTCTCCC
+       +++++ + +++ + ++  ++ +
+226  GTGGAGAAAACTACAGCTAGTCTCGAGTATGACCAGGGGATGTGTGATTCTCTGACTTTAACATGAATTTGGAAA
+
+                  [2]
+                  2.4e-06
+                  ATGAAAAGCAGTGATTCACAG
+                  + ++ ++++++++ + +++++
+1276 CTCTATGAGTTTCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTTGACCCCCCATCTTCCCCCCCAAAAAGG
+
+                        [8]
+                        7.9e-05
+                        AACTTCAAAATGTCTCTTTGA
+                         +  +  +++++++++ ++++
+1351 TGAAACTAAAAATTTCATCTGTCTTCAAATGGTTCCTTTAAAACTGCACTCAGGAAACAGGCATCAGGGCATATG
+
+                                                                             [8]
+                                                                             7.0
+                                                                             AAC
+                                                                              ++
+1501 CTCTGAAAACAGATAAAGGTGAGTCTGTGTGCTTGGAATGTAGTTCAGTTGGTAGACTGCTTGCCTAGCTTATAA
+
+     
+     e-06
+     TTCAAAATGTCTCTTTGA
+     ++ ++ ++ +++++++++
+1576 TTAATCCTAGCTCTTGGAAAGCAGGAGCAGGAGGACCAGAAGTTTGACAGTATCTTCAGCCAGTGGGAGATCTAG
+
+
+NM_025781
+  ENSMUSG00000031953
+  LENGTH = 2000  COMBINED P-VALUE = 2.35e-04  E-VALUE =      8.3
+  DIAGRAM: 55_[2]_61_[8]_123_[8]_23_[4]_823_[2]_624_[3]_163
+
+                                                            [2]
+                                                            9.4e-06
+                                                            ATGAAAAGCAGTGATTCACA
+                                                            + +  +++++ ++++ ++ +
+1    GAGAAAGGAAGACTGCCACAAATTCAAGGCCAGCCTGGGCTTCATGAAGAGATGGAGGCTAGCCAAGGATACATA
+
+                                                                   [8]
+                                                                   7.0e-05
+     G                                                             AACTTCAAAATGT
+     +                                                              ++++ +++ +
+76   GCAAGACATTGCATCAAATGAAATCTAAAAACAAACAAACAAATCAAGGTTCCCAACGCAAAGAATTTAAAGTAA
+
+     
+     
+     CTCTTTGA
+     ++++++
+151  TTCTTTATTGATAATAAACATTTATACTAGCATTAGAAATGTCAGTAGAAAAATAATATTTAAATAAATCTCTAA
+
+                                                             [8]
+                                                             1.0e-05
+                                                             AACTTCAAAATGTCTCTTT
+                                                             +++ + ++++++ ++++ +
+226  TGTACAAAGATTTGATCATTTTGACTATAGCAGTTAAGTGCAATAACCTAATATAAAAAGTTAAACTGATTTTGG
+
+                              [4]
+                              2.0e-05
+     GA                       AGGAGAGAGCTCACACCTCTCCC
+     +                        + ++++++ +++ ++ ++++++
+301  TCATAACCAGCCCTACACAGTACACAAGAAAGAAATACAAGTTCACAAAACCTTCTGCGCGTGGACCATTAATGG
+
+                                                   [2]
+                                                   4.3e-05
+                                                   ATGAAAAGCAGTGATTCACAG
+                                                   + ++++++++   ++ +++++
+1126 CAGAGAAACCTGTCTCCNNNNNNNNNNNNNNNNNNNNNNNNNNNCCAGAAAAACCACACACACACACCCCCAAAA
+
+                     [3]
+                     1.2e-05
+                     AGTGCCCTGGGCCTTTAAGAG
+                     ++ + + +++ + +++++++
+1801 GTGCAAGTGCCCACAGAGAGTCTTGGATATTTAAAAAGAAATTTTATTTATGTGTGACGTGAACAGCATGTAGAG
+
+
+NM_174924
+  ENSG00000169340
+  LENGTH = 2000  COMBINED P-VALUE = 2.36e-04  E-VALUE =      8.4
+  DIAGRAM: 779_[3]_64_[4]_957_[2]_135
+
+
+                                  [3]
+                                  1.4e-07
+                                  AGTGCCCTGGGCCTTTAAGAG
+                                  ++ +++++++++ ++++++++
+751  AATTAGACAAGGTGATGTTGCAAGTGCTTAAGGCTATGGGTTTTTAAAAGATAGCCTCAGTGATTGCATGGAATA
+
+                                            [4]
+                                            4.1e-05
+                                            AGGAGAGAGCTCACACCTCTCCC
+                                             ++ ++++ ++++ +++  + ++
+826  TGGAGGGGTTTGGGATGGGGGCTGGGAGGGAGGCACAGAGGGGCAGCAAAAAGACCAGTTAGGAGACTTCAGCTG
+
+                                                 [2]
+                                                 8.3e-06
+                                                 ATGAAAAGCAGTGATTCACAG
+                                                 +  +++++++ ++++ + +++
+1801 TTCTTCTAAGACTGCACCAACCAGGAAAAAGAGGTCATGAGTAAAACAGAGCCAAGGACACTCAGAGGCTTTATT
+
+
+NM_007003
+  ENSG00000101951
+  LENGTH = 2000  COMBINED P-VALUE = 2.37e-04  E-VALUE =      8.4
+  DIAGRAM: 311_[4]_68_[2]_1483_[4]_71
+
+
+                [4]
+                6.9e-05
+                AGGAGAGAGCTCACACCTCTCCC
+                ++++++ + ++ ++ +++  +++
+301  GCACCAGGTGCAGGAGACCTATGACCCCTAGCACTGCCTTTTATCTGCTGGGTTCCCCTTGGAGAACCATATATC
+
+                                [2]
+                                1.9e-05
+                                ATGAAAAGCAGTGATTCACAG
+                                + ++++++++ + ++ +  ++
+376  TTTGGAGCCCCAGAACATGCTGGTTGCACAAGAACAAAGCACACTTACTGTCTGAGAGGAAGGCTGGTGGTGTGG
+
+                                    [4]
+                                    1.4e-07
+                                    AGGAGAGAGCTCACACCTCTCCC
+                                     +++++++ +++ +++ + ++++
+1876 GGCACAGAGCCTCTAGGAGGCGGGAGAGGGCTGGAAAGACAACCCACGTGACACACCCCCGAGGTTTGGGATTGG
+
+
+NM_028604
+  ENSMUSG00000019792
+  LENGTH = 2000  COMBINED P-VALUE = 2.39e-04  E-VALUE =      8.5
+  DIAGRAM: 577_[8]_511_[3]_310_[4]_356_[4]_158
+
+
+                                                         [8]
+                                                         1.1e-06
+                                                         AACTTCAAAATGTCTCTTTGA
+                                                         +++ ++++++++++++ +++
+526  GAGTTTTCTCAAAGCGGAAGAAGTAGAAGTCCTGAGTTACCTCCCAGAGCTGAACCTCAAACTGTTTTATTGCTC
+
+                                                                [3]
+                                                                1.8e-06
+                                                                AGTGCCCTGGGCCTTT
+                                                                +  + + +++++ +++
+1051 GCTCTGTAACTTCTGTAGGAAGTAAAGGCTCGTTTTGTAGGTAACAAAGTTTGAGGGCAATCATTTTGGGTTTTT
+
+     
+     
+     AAGAG
+      ++++
+1126 GAGAGTGGGTTTCTCTTCAGGTAGCTCTGTCTGTACTGGATATCTCTCTGTAGAGCAGGCTGGCTTCAAAGTCAA
+
+                    [4]
+                    9.6e-05
+                    AGGAGAGAGCTCACACCTCTCCC
+                    +   ++++ ++ +++++++++++
+1426 GAACTTACTAATAAAAAATAAGAAAATAAACCTCACAGCTGATGCTCAAAAGATTTCAATTAATAATAATAAAAG
+
+                        [4]
+                        8.3e-05
+                        AGGAGAGAGCTCACACCTCTCCC
+                        ++++++++++++  +  +  + +
+1801 AAATCAGGAGCACATGACAAGGAGAGCGCAACTAAATGCCTGACCTCTGCACAGCATTGGTCATTTGTGGCTCCT
+
+
+NM_013166
+  ENSRNOG00000012460
+  LENGTH = 2000  COMBINED P-VALUE = 2.40e-04  E-VALUE =      8.5
+  DIAGRAM: 605_[8]_749_[2]_604
+
+
+          [8]
+          2.0e-07
+          AACTTCAAAATGTCTCTTTGA
+            +++ + +++++++++++++
+601  TGAGATTCTTAAGAATGTTTCTTGGATCAATCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+                              [2]
+                              7.0e-06
+                              ATGAAAAGCAGTGATTCACAG
+                               +++++++++ ++++ +  ++
+1351 AAGGTCTCTTGGGTTTTAAAGATAAGTAAAAACAAATGATACTGACAAATTCTGGATTCACAGTGTACAGGCCTC
+
+
+NM_153399
+  ENSMUSG00000040566
+  LENGTH = 2000  COMBINED P-VALUE = 2.41e-04  E-VALUE =      8.5
+  DIAGRAM: 39_[2]_895_[4]_1022
+
+                                            [2]
+                                            1.5e-06
+                                            ATGAAAAGCAGTGATTCACAG
+                                            + ++++++++ ++++  +  +
+1    NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAGGGAAAGAAAGGATAGATTCTCTACCTCCCCCTTC
+
+                                                            [4]
+                                                            7.6e-07
+                                                            AGGAGAGAGCTCACACCTCT
+                                                            ++++++++ +++++++++ +
+901  CCAGGTGAAAGGGTTTGAGGATATTTGATTCCTTCCAAGCGTGAACCATTGGAGCAGGAAAGCACACACACTTGA
+
+     
+     
+     CCC
+      +
+976  GAAGAGTTGTCTATCTGACCCCAGCCCCCCTGTCTCTTCTGGGAATGTGCTCAGGGGAAGGCAGTGCAAGTGCTA
+
+
+NM_173084
+  ENSG00000175447
+  LENGTH = 2000  COMBINED P-VALUE = 2.43e-04  E-VALUE =      8.6
+  DIAGRAM: 201_[2]_643_[8]_634_[3]_459
+
+
+                                                        [2]
+                                                        1.7e-06
+                                                        ATGAAAAGCAGTGATTCACAG
+                                                        ++++++++++++ +++++ +
+151  CATACATCAACTTTTGCAATCTTCCTGCTGGTAAAAATACAAAACAGCAGCATAAGAACCAGTTACTCATATACT
+
+                                             [8]
+                                             2.7e-06
+                                             AACTTCAAAATGTCTCTTTGA
+                                              ++  ++ +++++++++++++
+826  GTTTATGTGCAATCTGTAGGGCCAGGAGCATAGTCTAATCTGACCCAGAATGTCTTTTTGATTTGATATAGGTAA
+
+                         [3]
+                         3.2e-05
+                         AGTGCCCTGGGCCTTTAAGAG
+                         +  + ++++ ++ + +++++
+1501 TTGCAGACCAAGTTACCAACATGATTCTGTTCTAATAAGAATGAGTTTTTAATCCAAGAACTGATTTGACTGACG
+
+
+NM_009412
+  ENSMUSG00000027506
+  LENGTH = 2000  COMBINED P-VALUE = 2.44e-04  E-VALUE =      8.6
+  DIAGRAM: 125_[8]_526_[2]_87_[2]_483_[8]_83_[4]_70_[8]_498
+
+
+                                                       [8]
+                                                       9.7e-05
+                                                       AACTTCAAAATGTCTCTTTGA
+                                                       +++++ ++ ++ ++++ ++++
+76   TTCTCCTTCTCTCCACTTAATCATTTTCCTCATATGAAATCTCTATACAAAACTTGAAGATTTCTTATTTAAAAA
+
+                                                                             [2]
+                                                                             9.8
+                                                                             ATG
+                                                                             + +
+601  CTCTGAAAAAGGCTTTAGGTCACTCCAAGCTTGGCAGTTAACATTTGGCATGGACACTGGTAAAACCACAATAGA
+
+     
+     e-07
+     AAAAGCAGTGATTCACAG
+     +++++++++ ++  + ++
+676  GAAAGAAGTAACAGATACAACCTCATAGGCCATAAACATACACAGCAAAGGAAGCAGGAGTGAGTCAGGAGTGAC
+
+                                   [2]
+                                   1.3e-05
+                                   ATGAAAAGCAGTGATTCACAG
+                                    ++++++++  ++++++++++
+751  CACAGTTAACAGAGGGGCTTAGCAGAGCCACTGGGAAGCTTTGACTCACAGCCCCGACAGTTCTTAGAGTGGGAA
+
+              [8]
+              5.6e-05
+              AACTTCAAAATGTCTCTTTGA
+              +++++ ++  +   +++++++
+1276 ATTTACCTGAAATTAAATTTTAATCTTGTAAAAAAATGTGTGAGAATGGTATAATAAATAACTGATAAGCATATT
+
+                                           [4]
+                                           1.4e-05
+                                           AGGAGAGAGCTCACACCTCTCCC
+                                           +++ +++++++  + ++ +++ +
+1351 TGAGCTCAAATAGATGTAGCTACAAATGACAGTTACCAAGGCAAGAGCAGGACCCACACTCTCCCCACTGGAAGT
+
+                                                             [8]
+                                                             1.2e-05
+                                                             AACTTCAAAATGTCTCTTT
+                                                              ++++ +  +++++ + ++
+1426 TTGAGTGTCTGCATGTTTTTAATTTACAGAACTATAGAACTGCATACACACCTGTCTACTTTAGTCTGGTGTCTT
+
+     
+     
+     GA
+     ++
+1501 TAGAAACCCAAGATATGGTCCTCACAGAACTGTTTATCCGAGAAAAAAAATGTAGCTGGTAGTTTCTCCTTTGAA
+
+
+NM_005565
+  ENSG00000043462
+  LENGTH = 2000  COMBINED P-VALUE = 2.62e-04  E-VALUE =      9.3
+  DIAGRAM: 878_[8]_563_[2]_409_[8]_87
+
+
+                                                          [8]
+                                                          7.2e-05
+                                                          AACTTCAAAATGTCTCTTTGA
+                                                          +++ + ++++++   +++++
+826  CAGTCATAGCCATGGAACAGAACATCTTAATCCTGATGGCACTTTCCACTAATAGACTTAAAATGAGCTTTGGTT
+
+                                          [2]
+                                          3.9e-05
+                                          ATGAAAAGCAGTGATTCACAG
+                                            ++++++++ +++++  +++
+1426 TTGTGCAATAACAACCAAGGTCCCAAAGGGAAAAACTTGAAGAACAACTGATTGTCACTATTTGGCTGAATACCA
+
+                      [8]
+                      1.1e-08
+                      AACTTCAAAATGTCTCTTTGA
+                      +++++ ++++++++++ +++
+1876 ACCAAAGTCTCATCCCTAAATTTAAAATGTTTTCTTTTGGTATTTGTAGTTTAAATTTTCTTTAAGACAAAAATA
+
+
+NM_145514
+  ENSMUSG00000038733
+  LENGTH = 2000  COMBINED P-VALUE = 2.67e-04  E-VALUE =      9.4
+  DIAGRAM: 314_[3]_436_[8]_635_[8]_521_[8]_10
+
+
+                   [3]
+                   1.9e-05
+                   AGTGCCCTGGGCCTTTAAGAG
+                   ++ +  ++++++ ++ ++++
+301  TTCTCTGATTTTCTAAAGTGATGGTCTCTGAAAAATGAAGCTTAAAGGAATCTTTTCCATTACCCATTTGAATTA
+
+                          [8]
+                          8.6e-05
+                          AACTTCAAAATGTCTCTTTGA
+                          ++++   ++ +  +++++++
+751  ATGTTAGATATAGGGTCTTCAAAATGACAAGTCACTTTTTTTCCATAAAGGAAACATTCCCGTAAACTAAAAGGG
+
+       [8]
+       2.1e-05
+       AACTTCAAAATGTCTCTTTGA
+        +++++ ++++ + +++++ +
+1426 AATACTTCTTAATTTATTTTTAAAAAATTGTGCTGTTAACCCTTTTACGGGGCAACAACTATGTGAAAAGTACAA
+
+                        [8]
+                        5.6e-08
+                        AACTTCAAAATGTCTCTTTGA
+                         +++++++++++++++++++
+1951 AAATGTTTGGCCCCAATCGGGCTTCAAAATGGTTCTTTTTTTTTTTTTTT
+
+
+NM_000319
+  ENSG00000139197
+  LENGTH = 2000  COMBINED P-VALUE = 2.68e-04  E-VALUE =      9.5
+  DIAGRAM: 118_[2]_227_[3]_160_[8]_1250_[4]_159
+
+
+                                                [2]
+                                                2.2e-05
+                                                ATGAAAAGCAGTGATTCACAG
+                                                + +++ ++++ ++ + +++++
+76   GGGTGTAGAAGTGGGGCCTCTGAAAGAGGGCGAAAAGGCAAGGAAAAGGAGAAATGTTGCACAGTCTTATGGTCT
+
+                                                                       [3]
+                                                                       3.9e-05
+                                                                       AGTGCCCTG
+                                                                       + ++++++
+301  CCTGCTGCCTGCCTGCCTCCGCTCATGCACCCTGGGCTGGGATGGTACTTCTGTTCGTCTGGCATTATTGCCCTT
+
+     
+     
+     GGCCTTTAAGAG
+     +++++++ +  +
+376  GGTCATTTACCGGCAGCCCTGGGCCCCTCCTTGCTCTTCTCCATGACACTAGGACTCCCTTGGTCTTGAAGCTGG
+
+                           [8]
+                           1.0e-05
+                           AACTTCAAAATGTCTCTTTGA
+                            ++++ +++   +++++++++
+526  CTTGAGGCTTAGTCTTATTCCGTGATTAAAAGCATTTCTTGTATGTTTGTCCTTTCCCAGCCTGGAGTGATGGGA
+
+                       [4]
+                       3.2e-06
+                       AGGAGAGAGCTCACACCTCTCCC
+                       +++++++++ ++  +++++ + +
+1801 GCCTTTGAGGGGGGCGGCAGGAGAGAGTACCGACCTCCCTCGAACTCCTGGCAGAGGTGGGGGTCGCAGCAAAAG
+
+
+NM_139063
+  ENSMUSG00000038982
+  LENGTH = 2000  COMBINED P-VALUE = 2.71e-04  E-VALUE =      9.6
+  DIAGRAM: 622_[4]_876_[4]_203_[2]_232
+
+
+                           [4]
+                           2.6e-08
+                           AGGAGAGAGCTCACACCTCTCCC
+                           ++++++++ ++ ++ +  +++++
+601  GTCCCCAGAGTCCACATGGCAGAGGAGAGAACTGACTCGACTCCCACAAATTATCCTCTGATCTCCACACGTGTG
+
+                          [4]
+                          1.9e-06
+                          AGGAGAGAGCTCACACCTCTCCC
+                           +++++++  + +++++++  ++
+1501 ACAAAACTGAGGACATTAACATGGAAAGCTTAGAAACTTCGTCCTGGTCCACACCCTTAGAAAGGCGTATGCTGT
+
+                           [2]
+                           3.8e-05
+                           ATGAAAAGCAGTGATTCACAG
+                           + +++ +++ +++++ ++ ++
+1726 AGTGGTCACTAACGACATTCCAAAAAGTGCCTGTGATACAAAGGGGATTTTCACTTGCGGCTCCCTCCCCTCCCT
+
+
+NM_009980
+  ENSMUSG00000030970
+  LENGTH = 2000  COMBINED P-VALUE = 2.78e-04  E-VALUE =      9.8
+  DIAGRAM: 437_[3]_1109_[8]_62_[2]_308_[8]
+
+
+                                                                   [3]
+                                                                   9.4e-05
+                                                                   AGTGCCCTGGGCC
+                                                                    ++  + +++ +
+376  CTGACCACACATAATTTGATAGGTACCCATTCTTCCCTGTGGGAAAGAATTAAAGAAGGCTCCATTTCTTGGACT
+
+     
+     
+     TTTAAGAG
+     +++++++
+451  CTTAAGATAGGAATGGGGCTTTCATTAGTTTCTCTAGCAGACTGCTGGCTGTGAAATGGCCAACCCCTTTGGGAA
+
+                                                                        [8]
+                                                                        4.2e-05
+                                                                        AACTTCAA
+                                                                         + ++ ++
+1501 AAATACAGTAACGATAACTCACACTAAAACAAAACATTTCTGATAGCCATTATTTTTCTGTTTGGGACAGTTTAA
+
+     
+     
+     AATGTCTCTTTGA
+     + + ++++++++
+1576 AGTTTTTTTTTTCTTTTGTGTCACAAAAACAGGAATGTACCTATACAAAGGCTCAAAATAGGCCATCTTTAAAAA
+
+     [2]
+     1.6e-06
+     ATGAAAAGCAGTGATTCACAG
+       ++++++++ +++++++++
+1651 CAAAAAGGCAATGATTCACAAAAGACTATGAATAGAACATGTAACTAGCTGATACAAATCTAATAGGATTTGTTA
+
+                                  [8]
+                                  4.9e-07
+                                  AACTTCAAAATGTCTCTTTGA
+                                  +++++ +++++ ++++ +++
+1951 AAATAGATTTGAGTACAAACAGCTTGTGAAACTTAATACTTTTTTCTTTT
+
+********************************************************************************
+
+
+CPU: crick
+Time 67.670000 secs.
+
+mast acro1.set.fasta.conserved.25.meme -m 2 -m 3 -m 4 -m 8 -text -brief -remcorr -norc -stdout -d /home/sao/applications/mask_db/upstream.masked -bfile /home/sao/proj_motif/model.bckg

Added: trunk/packages/bioperl/branches/upstream/current/t/data/masta.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/masta.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/masta.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+>m1seq                     pos 1311
+CAGAAAAATTGAATTCCCACCCCCC
+CAGAAAAATTGAATTCCCACCCCCC
+CCTTAAAATAAAATCCCCACCACCA
+CAGACAAAGACATTCCACAGCTCCC
+CAGAAAAATTGAATTCCCACCCCCC
+CAGAAAAATTGAATTCCCACCCCCC
+CCTTAAAATAAAATCCCCACCACCA
+CAGACAAAGACATTCCACAGCTCCC
+>m1logs
+  -865    208   -865   -865 
+   156      8   -865   -865 
+  -865   -865    157     -4 
+   156   -865   -865     -4 
+   156      8   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+  -865   -865     -1    154 
+    97   -865   -865     96 
+    -3      8     99   -865 
+   197   -865   -865   -865 
+   156   -865   -865     -4 
+  -865   -865   -865    196 
+  -865    108   -865     96 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+  -865    208   -865   -865 
+   197   -865   -865   -865 
+  -865    166     -1   -865 
+  -865    208   -865   -865 
+    -3    108   -865     -4 
+  -865    208   -865   -865 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+>m1freq
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.000635  0.000589  0.748759  0.250017 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.000589  0.250006  0.748770 
+ 0.499388  0.000589  0.000629  0.499393 
+ 0.250012  0.249966  0.499382  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.499343  0.000629  0.499393 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.499343  0.000629  0.250017 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/masta.dat
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/megablast_output.paracel_btk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/megablast_output.paracel_btk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/megablast_output.paracel_btk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,150 @@
+MEGABLAST 1.4.13-Paracel [2002-12-12]
+
+
+Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), 
+"A greedy algorithm for aligning DNA sequences", 
+J Comput Biol 2000; 7(1-2):203-14.
+
+Database: bwb/tmp/vISbyVPvvB 
+           2 sequences; 80 total letters
+
+Searching. done
+Query= DNA sequence #1
+         (40 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+reverse-complement of sequence #1                                      74   1e-19
+DNA sequence #1                                                        74   1e-19
+
+>reverse-complement of sequence #1
+          Length = 40
+
+ Score = 73.8 bits (37), Expect = 1e-19
+ Identities = 37/37 (100%)
+ Strand = Plus / Minus
+
+                                               
+Query: 2  ccccccccccccccccccccccccccccccccccccc 38
+          |||||||||||||||||||||||||||||||||||||
+Sbjct: 40 ccccccccccccccccccccccccccccccccccccc 4
+
+
+
+ Score = 67.9 bits (34), Expect = 8e-18
+ Identities = 34/34 (100%)
+ Strand = Plus / Minus
+
+                                            
+Query: 1  cccccccccccccccccccccccccccccccccc 34
+          ||||||||||||||||||||||||||||||||||
+Sbjct: 36 cccccccccccccccccccccccccccccccccc 3
+
+
+>DNA sequence #1
+          Length = 40
+
+ Score = 73.8 bits (37), Expect = 1e-19
+ Identities = 37/37 (100%)
+ Strand = Plus / Plus
+
+                                               
+Query: 2  ccccccccccccccccccccccccccccccccccccc 38
+          |||||||||||||||||||||||||||||||||||||
+Sbjct: 1  ccccccccccccccccccccccccccccccccccccc 37
+
+
+
+ Score = 71.9 bits (36), Expect = 5e-19
+ Identities = 36/36 (100%)
+ Strand = Plus / Plus
+
+                                              
+Query: 1  cccccccccccccccccccccccccccccccccccc 36
+          ||||||||||||||||||||||||||||||||||||
+Sbjct: 3  cccccccccccccccccccccccccccccccccccc 38
+
+
+Searching. done
+Query= reverse-complement of sequence #1
+         (40 letters)
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+reverse-complement of sequence #1                                      74   1e-19
+DNA sequence #1                                                        74   1e-19
+
+>reverse-complement of sequence #1
+          Length = 40
+
+ Score = 73.8 bits (37), Expect = 1e-19
+ Identities = 37/37 (100%)
+ Strand = Plus / Plus
+
+                                               
+Query: 3  ggggggggggggggggggggggggggggggggggggg 39
+          |||||||||||||||||||||||||||||||||||||
+Sbjct: 4  ggggggggggggggggggggggggggggggggggggg 40
+
+
+
+ Score = 67.9 bits (34), Expect = 8e-18
+ Identities = 34/34 (100%)
+ Strand = Plus / Plus
+
+                                            
+Query: 7  gggggggggggggggggggggggggggggggggg 40
+          ||||||||||||||||||||||||||||||||||
+Sbjct: 3  gggggggggggggggggggggggggggggggggg 36
+
+
+>DNA sequence #1
+          Length = 40
+
+ Score = 73.8 bits (37), Expect = 1e-19
+ Identities = 37/37 (100%)
+ Strand = Plus / Minus
+
+                                               
+Query: 3  ggggggggggggggggggggggggggggggggggggg 39
+          |||||||||||||||||||||||||||||||||||||
+Sbjct: 37 ggggggggggggggggggggggggggggggggggggg 1
+
+
+
+ Score = 71.9 bits (36), Expect = 5e-19
+ Identities = 36/36 (100%)
+ Strand = Plus / Minus
+
+                                              
+Query: 5  gggggggggggggggggggggggggggggggggggg 40
+          ||||||||||||||||||||||||||||||||||||
+Sbjct: 38 gggggggggggggggggggggggggggggggggggg 3
+
+
+  Database: bwb/tmp/vISbyVPvvB
+    Posted date:  Jan 23, 2003  9:25 AM
+  Number of letters in database: 80
+  Number of sequences in database:  2
+  
+Lambda     K      H
+    1.37    0.711     1.31 
+
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+
+
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 0, Extension: 0
+Number of Hits to DB: 0
+Number of Sequences: 2
+length of database: 80
+effective search space used:        0
+S2: 4 ( 8.4 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/meme.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/meme.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/meme.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,486 @@
+********************************************************************************
+MEME - Motif discovery tool
+********************************************************************************
+MEME version 3.0 (Release date: 2001/03/05 14:24:28)
+
+For further information on how to interpret these results or to get
+a copy of the MEME software please access http://meme.sdsc.edu.
+
+This file may be used as input to the MAST algorithm for searching
+sequence databases for matches to groups of motifs.  MAST is available
+for interactive use and downloading at http://meme.sdsc.edu.
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey and Charles Elkan,
+"Fitting a mixture model by expectation maximization to discover
+motifs in biopolymers", Proceedings of the Second International
+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
+AAAI Press, Menlo Park, California, 1994.
+********************************************************************************
+
+
+********************************************************************************
+TRAINING SET
+********************************************************************************
+DATAFILE= D10Mit194.set.genbank.fasta.nref
+ALPHABET= ACGT
+Sequence name           Weight Length  Sequence name           Weight Length  
+-------------           ------ ------  -------------           ------ ------  
+20218                   1.0000   2000  10657                   1.0000   2000  
+83796                   1.0000   2000  6603                    1.0000   2000  
+********************************************************************************
+
+********************************************************************************
+COMMAND LINE SUMMARY
+********************************************************************************
+This information can also be useful in the event you wish to report a
+problem with the MEME software.
+
+command: meme D10Mit194.set.genbank.fasta.nref -dna -print_fasta -nmotifs 3 -maxw 25 
+
+model:  mod=         zoops    nmotifs=         3    evt=           inf
+object function=  E-value of product of p-values
+width:  minw=            8    maxw=           25    minic=        0.00
+width:  wg=             11    ws=              1    endgaps=       yes
+nsites: minsites=        2    maxsites=        4    wnsites=       0.8
+theta:  prob=            1    spmap=         uni    spfuzz=        0.5
+em:     prior=   dirichlet    b=            0.01    maxiter=        50
+        distance=    1e-05
+data:   n=            8000    N=               4
+strands: +
+sample: seed=            0    seqfrac=         1
+Letter frequencies in dataset:
+A 0.255 C 0.236 G 0.252 T 0.257 
+Background letter frequencies (from dataset with add-one prior applied):
+A 0.255 C 0.236 G 0.252 T 0.257 
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  1	width =   25   sites =   4   llr = 106   E-value = 1.2e-002
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 1 Description
+--------------------------------------------------------------------------------
+Simplified        A  :8:88aaa:53a8:::3:a::3::3
+pos.-specific     C  a3::3:::::3:::5a8a:8a5aa8
+probability       G  ::8:::::3:5::::::::3:::::
+matrix            T  ::33::::85::3a5::::::3:::
+
+         bits    2.1 *              * *  * ** 
+                 1.9 *    ***   * * * ** * ** 
+                 1.7 *    ***   * * * ** * ** 
+                 1.5 *    ***   * * * ** * ** 
+Information      1.2 *********  *** ****** ***
+content          1.0 ********** ********** ***
+(38.1 bits)      0.8 ********** ********** ***
+                 0.6 ********** **************
+                 0.4 *************************
+                 0.2 *************************
+                 0.0 -------------------------
+
+Multilevel           CAGAAAAATAGAATCCCCACCCCCC
+consensus             CTTC   GTA T T A  G A  A
+sequence                       C          T   
+                                              
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                      Site         
+-------------             ----- ---------            -------------------------
+6603                       1311  2.59e-15 GGCGCATTGA CAGAAAAATTGAATTCCCACCCCCC AATGAGGAGG
+83796                      1284  2.59e-15 GGAGGATTGA CAGAAAAATTGAATTCCCACCCCCC AACGAGGAGG
+20218                       938  6.34e-12 TTTTTGGTAA CCTTAAAATAAAATCCCCACCACCA CTTTTAAAAA
+10657                      1685  8.70e-12 GGCCCGCGCG CAGACAAAGACATTCCACAGCTCCC GCCCCCTCCA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+6603                              2.6e-15  1310_[1]_665
+83796                             2.6e-15  1283_[1]_692
+20218                             6.3e-12  937_[1]_1038
+10657                             8.7e-12  1684_[1]_291
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 in FASTA format
+--------------------------------------------------------------------------------
+>6603                     pos 1311
+CAGAAAAATTGAATTCCCACCCCCC
+>83796                    pos 1284
+CAGAAAAATTGAATTCCCACCCCCC
+>20218                    pos  938
+CCTTAAAATAAAATCCCCACCACCA
+>10657                    pos 1685
+CAGACAAAGACATTCCACAGCTCCC
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 11.6849 E= 1.2e-002 
+  -865    208   -865   -865 
+   156      8   -865   -865 
+  -865   -865    157     -4 
+   156   -865   -865     -4 
+   156      8   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+  -865   -865     -1    154 
+    97   -865   -865     96 
+    -3      8     99   -865 
+   197   -865   -865   -865 
+   156   -865   -865     -4 
+  -865   -865   -865    196 
+  -865    108   -865     96 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+  -865    208   -865   -865 
+   197   -865   -865   -865 
+  -865    166     -1   -865 
+  -865    208   -865   -865 
+    -3    108   -865     -4 
+  -865    208   -865   -865 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 25 n= 7904 E= 1.2e-002 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.000635  0.000589  0.748759  0.250017 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.000589  0.250006  0.748770 
+ 0.499388  0.000589  0.000629  0.499393 
+ 0.250012  0.249966  0.499382  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.499343  0.000629  0.499393 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.499343  0.000629  0.250017 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 38.44 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  2	width =   25   sites =   4   llr = 101   E-value = 4.5e-001
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 2 Description
+--------------------------------------------------------------------------------
+Simplified        A  ::::a::383:5:3:::a33:83:a
+pos.-specific     C  a3a8:3a33:a::5::8::::383:
+probability       G  ::::::::::::::::3:38a::8:
+matrix            T  :8:3:8:5:8:5a3aa::5::::::
+
+         bits    2.1 * *   *   *         *    
+                 1.9 * * * *   * * ** *  *   *
+                 1.7 * * * *   * * ** *  *   *
+                 1.5 * * * *   * * ** *  *   *
+Information      1.2 ******* *** * **** ******
+content          1.0 ******* ***** **** ******
+(36.6 bits)      0.8 ******* ***** **** ******
+                 0.6 ******* ********** ******
+                 0.4 *************************
+                 0.2 *************************
+                 0.0 -------------------------
+
+Multilevel           CTCCATCTATCATCTTCATGGACGA
+consensus             C T C ACA T A  G AA CAC 
+sequence                    C     T    G      
+                                              
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                      Site         
+-------------             ----- ---------            -------------------------
+6603                       1000  1.62e-15 CGGGAACATG CTCCATCTATCATCTTCATGGACGA AATCGACTCC
+83796                       978  4.69e-15 CGAGAACATG CTCCATCCATCATCTTCATGGACGA GATTGACTCT
+20218                      1545  1.69e-11 TAGCTTCTCT CCCCATCAATCTTATTCAGAGCCCA CCCCTCCCCC
+10657                      1075  3.40e-11 AGGATCTGGT CTCTACCTCACTTTTTGAAGGAAGA AACACTTAAT
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+6603                              1.6e-15  999_[2]_976
+83796                             4.7e-15  977_[2]_998
+20218                             1.7e-11  1544_[2]_431
+10657                             3.4e-11  1074_[2]_901
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 in FASTA format
+--------------------------------------------------------------------------------
+>6603                     pos 1000
+CTCCATCTATCATCTTCATGGACGA
+>83796                    pos  978
+CTCCATCCATCATCTTCATGGACGA
+>20218                    pos 1545
+CCCCATCAATCTTATTCAGAGCCCA
+>10657                    pos 1075
+CTCTACCTCACTTTTTGAAGGAAGA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 10.9476 E= 4.5e-001 
+  -865    208   -865   -865 
+  -865      8   -865    154 
+  -865    208   -865   -865 
+  -865    166   -865     -4 
+   197   -865   -865   -865 
+  -865      8   -865    154 
+  -865    208   -865   -865 
+    -3      8   -865     96 
+   156      8   -865   -865 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+    97   -865   -865     96 
+  -865   -865   -865    196 
+    -3    108   -865     -4 
+  -865   -865   -865    196 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+   197   -865   -865   -865 
+    -3   -865     -1     96 
+    -3   -865    157   -865 
+  -865   -865    198   -865 
+   156      8   -865   -865 
+    -3    166   -865   -865 
+  -865      8    157   -865 
+   197   -865   -865   -865 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 25 n= 7904 E= 4.5e-001 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.249966  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.249966  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.249966  0.000629  0.499393 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.499388  0.000589  0.000629  0.499393 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.250012  0.499343  0.000629  0.250017 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.250012  0.000589  0.250006  0.499393 
+ 0.250012  0.000589  0.748759  0.000640 
+ 0.000635  0.000589  0.998135  0.000640 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.000635  0.249966  0.748759  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 78.29 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  3	width =   21   sites =   4   llr = 88   E-value = 4.8e-001
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 3 Description
+--------------------------------------------------------------------------------
+Simplified        A  :::58:::::3::::33::::
+pos.-specific     C  8:833::8:8:a:8a8:a5a8
+probability       G  3::::::3:::::3:::::::
+matrix            T  :a33:aa:a38:a:::8:5:3
+
+         bits    2.1            *  *  * * 
+                 1.9  *   ** *  ** *  * * 
+                 1.7  *   ** *  ** *  * * 
+                 1.5  *   ** *  ** *  * * 
+Information      1.2 *** ************** **
+content          1.0 *** *****************
+(31.8 bits)      0.8 *** *****************
+                 0.6 *** *****************
+                 0.4 *********************
+                 0.2 *********************
+                 0.0 ---------------------
+
+Multilevel           CTCAATTCTCTCTCCCTCCCC
+consensus            G TCC  G TA  G AA T T
+sequence                T                 
+                                          
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                    Site       
+-------------             ----- ---------            ---------------------
+10657                      1511  1.45e-13 CCCAGGCGGT CTCAATTCTCTCTCCCTCCCC TTTCCGTGAC
+83796                      1801  7.40e-12 TGTATATGCA CTCTCTTCTCTCTCCCTCTCC AGGTCATGCA
+6603                       1811  1.22e-10 GTAACTTAAT GTTCATTCTCTCTCCCACCCC TAGGTCATGC
+20218                       606  7.57e-10 CCCAGGCCAG CTCAATTGTTACTGCATCTCT AGGATTGGAA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+10657                             1.5e-13  1510_[3]_469
+83796                             7.4e-12  1800_[3]_179
+6603                              1.2e-10  1810_[3]_169
+20218                             7.6e-10  605_[3]_1374
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 in FASTA format
+--------------------------------------------------------------------------------
+>10657                    pos 1511
+CTCAATTCTCTCTCCCTCCCC
+>83796                    pos 1801
+CTCTCTTCTCTCTCCCTCTCC
+>6603                     pos 1811
+GTTCATTCTCTCTCCCACCCC
+>20218                    pos  606
+CTCAATTGTTACTGCATCTCT
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 21 n= 7920 bayes= 10.9506 E= 4.8e-001 
+  -865    166     -1   -865 
+  -865   -865   -865    196 
+  -865    166   -865     -4 
+    97      8   -865     -4 
+   156      8   -865   -865 
+  -865   -865   -865    196 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+  -865   -865   -865    196 
+  -865    166   -865     -4 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+  -865    108   -865     96 
+  -865    208   -865   -865 
+  -865    166   -865     -4 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 21 n= 7920 E= 4.8e-001 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.499388  0.249966  0.000629  0.250017 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.499343  0.000629  0.499393 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.748719  0.000629  0.250017 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 117.82 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+SUMMARY OF MOTIFS
+********************************************************************************
+
+--------------------------------------------------------------------------------
+	Combined block diagrams: non-overlapping sites with p-value < 0.0001
+--------------------------------------------------------------------------------
+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+20218                            7.77e-19  605_[3(7.57e-10)]_311_[1(6.34e-12)]_582_[2(1.69e-11)]_431
+10657                            5.46e-22  1_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_7_[1(6.00e-08)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_5_[1(1.18e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_4_[1(1.29e-07)]_332_[2(3.40e-11)]_383_[3(7.75e-07)]_7_[3(1.45e-13)]_56_[3(3.47e-05)]_76_[1(8.70e-12)]_291
+83796                            1.73e-27  977_[2(4.69e-15)]_281_[1(2.59e-15)]_492_[3(7.40e-12)]_179
+6603                             9.32e-27  597_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_327_[2(1.62e-15)]_286_[1(2.59e-15)]_475_[3(1.22e-10)]_169
+--------------------------------------------------------------------------------
+
+********************************************************************************
+
+
+********************************************************************************
+Stopped because nmotifs = 3 reached.
+********************************************************************************
+
+CPU: hydra-1.lsd.ornl.gov
+
+********************************************************************************

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mini-AE001405.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mini-AE001405.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mini-AE001405.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,61 @@
+LOCUS       AE001405               11389 bp    DNA     linear   INV 04-OCT-2002
+DEFINITION  Plasmodium falciparum 3D7 chromosome 2 section 42 of 73 of the
+            complete sequence.
+ACCESSION   AE001405 AE001362
+VERSION     AE001405.2  GI:23503395
+KEYWORDS    .
+SOURCE      Plasmodium falciparum 3D7
+  ORGANISM  Plasmodium falciparum 3D7
+            Eukaryota; Alveolata; Apicomplexa; Haemosporida; Plasmodium.
+REFERENCE   1  (bases 1 to 11389)
+  AUTHORS   Gardner,M.J., Tettelin,H., Carucci,D.J., Cummings,L.M., Aravind,L.,
+            Koonin,E.V., Shallom,S., Mason,T., Yu,K., Fujii,C., Pederson,J.,
+            Shen,K., Jing,J., Aston,C., Lai,Z., Schwartz,D.C., Pertea,M.,
+            Salzberg,S., Zhou,L., Sutton,G.G., Clayton,R., White,O.,
+            Smith,H.O., Fraser,C.M., Adams,M.D., Venter,J.C. and Hoffman,S.L.
+  TITLE     Chromosome 2 sequence of the human malaria parasite Plasmodium
+            falciparum
+  JOURNAL   Science 282 (5391), 1126-1132 (1998)
+   PUBMED   9804551
+REFERENCE   2  (bases 1 to 11389)
+  AUTHORS   Gardner,M.J., Hall,N., Fung,E., White,O., Berriman,M., Hyman,R.W.,
+            Carlton,J.M., Pain,A., Nelson,K.E., Bowman,S., Paulsen,I.T.,
+            James,K., Eisen,J.A., Rutherford,K., Salzberg,S.L., Craig,A.,
+            Kyes,S., Chan,M.S., Nene,V., Shallom,S.J., Suh,B., Peterson,J.,
+            Angiuoli,S., Pertea,M., Allen,J., Selengut,J., Haft,D.,
+            Mather,M.W., Vaidya,A.B., Martin,D.M., Fairlamb,A.H.,
+            Fraunholz,M.J., Roos,D.S., Ralph,S.A., McFadden,G.I.,
+            Cummings,L.M., Subramanian,G.M., Mungall,C., Venter,J.C.,
+            Carucci,D.J., Hoffman,S.L., Newbold,C., Davis,R.W., Fraser,C.M. and
+            Barrell,B.
+  TITLE     Genome sequence of the human malaria parasite Plasmodium falciparum
+  JOURNAL   Nature 419 (6906), 498-511 (2002)
+   PUBMED   12368864
+REFERENCE   3  (bases 1 to 11389)
+  AUTHORS   Gardner,M.J.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (02-OCT-2002) The Institute for Genomic Research, 9712
+            Medical Center Dr, Rockville, MD 20850, USA
+COMMENT     On Oct 4, 2002 this sequence version replaced gi:3845224.
+FEATURES             Location/Qualifiers
+     source          1..180
+                     /organism="Plasmodium falciparum 3D7"
+                     /mol_type="genomic DNA"
+                     /isolate="3D7"
+                     /db_xref="taxon:36329"
+                     /chromosome="2"
+     repeat_region   complement(<1..55)
+                     /rpt_type=tandem
+                     /rpt_unit=(TG)10;A;(TG)7
+     repeat_region   112..117
+                     /rpt_type=tandem
+                     /rpt_unit_seq="(TAAAA)n"
+     repeat_region   complement(116..119)
+                     /rpt_type=tandem
+                     /rpt_unit_seq="(TA)n"
+ORIGIN      
+        1 tattatattt ttattaaatg caaatatgtt gatatgtaca tatatatata tatatgatca
+       61 taatatattt catttttatc atttttaaag tgtttgttta tttatattta ttatttttaa
+      121 tttttttatt ttatttaatt attttttttt attttttaaa tttttgtgac aagtttatat
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mini-align.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mini-align.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mini-align.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+CLUSTAL W (1.74) multiple sequence alignment
+
+
+P84139                MNEGEHQIKLDELFEKLLRARLIFKNKDVLRRC
+P814153               MNEGMHQIKLDVLFEKLLRARLIFKNKDVLRRC
+BAB68554              -------------------AMLIFKDKQLLQQC
+gb|443893|124775      -MRFRFQIKVPPAVEGARPALLIFKSRPELGGC
+                                                                                  
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mixedmast.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mixedmast.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mixedmast.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2369 @@
+********************************************************************************
+MAST - Motif Alignment and Search Tool
+********************************************************************************
+	MAST version 3.5.3 (Release date: 2006-04-30 01:41:24 -0700 (Sun, 30 Apr 2006))
+
+	For further information on how to interpret these results or to get
+	a copy of the MAST software please access http://meme.nbcr.net.
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+	If you use this program in your research, please cite:
+
+	Timothy L. Bailey and Michael Gribskov,
+	"Combining evidence using p-values: application to sequence homology
+	searches", Bioinformatics, 14(48-54), 1998.
+********************************************************************************
+
+
+********************************************************************************
+DATABASE AND MOTIFS
+********************************************************************************
+	DATABASE /home/struckma/XXXXX/xxxxx.seq.fasta (nucleotide)
+	Last updated on Tue Jul 11 18:30:00 2006
+	Database contains 4390 sequences, 173499994 residues
+
+	Scores for positive and reverse complement strands are combined.
+
+	MOTIFS test2.fasta.meme.out (peptide)
+	MOTIF WIDTH BEST POSSIBLE MATCH
+	----- ----- -------------------
+	  1    15   SQTTICRFEALQLSF
+	  2    15   LEKEVVRVWFCNRRQ
+	  3    15   FKQRRIKLGFTQADV
+	  4    15   KNMCKLKPLLQKWLE
+
+	PAIRWISE MOTIF CORRELATIONS:
+	MOTIF     1     2     3
+	----- ----- ----- -----
+	   2   0.12
+	   3   0.15  0.14
+	   4   0.21  0.19  0.17
+	No overly similar pairs (correlation > 0.60) found.
+
+	Random model letter frequencies (from non-redundant database):
+	A 0.070 C 0.024 D 0.040 E 0.052 F 0.040 G 0.074 H 0.029 I 0.041 K 0.052 
+	L 0.096 M 0.017 N 0.032 P 0.065 Q 0.042 R 0.067 S 0.084 T 0.052 V 0.059 
+	W 0.016 Y 0.022 
+********************************************************************************
+
+
+********************************************************************************
+SECTION I: HIGH-SCORING SEQUENCES
+********************************************************************************
+SEQUENCE NAME                      DESCRIPTION           FRAME   E-VALUE  LENGTH
+-------------                      -----------           -----   -------- ------
+chr04q                                                       c    3.3e-26 6776397
+chr02q                                                       b    9.1e-20 8059593
+
+********************************************************************************
+
+
+
+********************************************************************************
+SECTION II: MOTIF DIAGRAMS
+********************************************************************************
+SEQUENCE NAME                      E-VALUE   MOTIF DIAGRAM
+-------------                      --------  -------------
+chr04q                              3.3e-26  170163_[-3a]_45546_[+4a]_316730_
+                                             [+2c]_6917_[-1b]_46576_[+2c]_
+                                             112087_[-4a]_106122_[-3a]_54413_
+                                             [-3c]_2646_[-3c]_55292_[-1b]_
+                                             229973_[-1a]_168811_[+1b]_320302_
+                                             [+3c]_36_[+1c]_192_[+4c]_879_
+                                             [+2c]_15041_[+4b]_12578_[+2a]_
+                                             15085_[-2b]_34466_[-2a]_3572_
+                                             [-3c]_106732_[-4a]_100884_[-1a]_
+                                             150814_[+2b]_50394_[-2b]_2709_
+                                             [+1b]_80737_[-2c]_9981_[+4c]_
+                                             134258_[+2b]_5742_[+2b]_9723_
+                                             [-1b]_120512_[-2a]_180_[+2a]_
+                                             53270_[-4c]_17749_[-2a]_105178_
+                                             [-1b]_47045_[-2a]_86702_[+2c]_
+                                             6448_[-4a]_10586_[-2c]_30988_
+                                             [-2a]_125990_[-2c]_69247_[-4a]_
+                                             250534_[-2b]_143429_[-2a]_41053_
+                                             [-4b]_167542_[-1c]_126920_[-2b]_
+                                             79648_[-2c]_41344_[-1a]_34611_
+                                             [-1a]_32643_[-2a]_69929_[+1c]_
+                                             20774_[+1b]_62113_[+2c]_23189_
+                                             [-4b]_9299_[-4a]_39496_[+1b]_
+                                             43277_[-2a]_116690_[+4c]_6861_
+                                             [+4c]_71133_[+2c]_143200_[-2a]_
+                                             382714_[+1b]_15009_[+3b]_167995_
+                                             [-3c]_11445_[+3c]_85905_[-2c]_
+                                             116003_[+2b]_62888_[-3a]_64899_
+                                             [+2a]_49005_[+2a]_56736_[+3a]_
+                                             14883_[+2a]_6123_[-2a]_24729_
+                                             [-2a]_75491_[-2c]_33025_[+2a]_
+                                             223953_[+2a]_3536_[+2c]_57658_
+                                             [+1a]_321010_[-2b]_99474_[+4b]_
+                                             14568_[+4b]_14260_[-2c]_151646_
+                                             [-4b]_91790_[-2a]_60756
+chr02q                              9.1e-20  181586_[+1c]_84138_[-3c]_15167_
+                                             [-2b]_135571_[-4c]_325471_[+4a]_
+                                             12798_[-4a]_4700_[-2c]_182142_
+                                             [-1c]_7912_[-1a]_80644_[-2b]_
+                                             143591_[-2a]_5286_[-1a]_17554_
+                                             [-4b]_104512_[+2c]_75992_[-2b]_
+                                             1994_[+2a]_7759_[-2b]_11678_[+4a]_
+                                             544_[+2b]_15348_[+2b]_53558_[+2a]_
+                                             756_[-2a]_132170_[-3c]_6820_[+1a]_
+                                             226852_[+2b]_107962_[+1c]_5949_
+                                             [-2c]_138458_[-4b]_133762_[+1c]_
+                                             126753_[-2c]_69234_[+1c]_33_[+1c]_
+                                             72_[+1c]_150_[+1c]_33_[+1c]_72_
+                                             [+1c]_267_[+1c]_33_[+1c]_15489_
+                                             [-1c]_21704_[+1b]_72_[+1b]_346_
+                                             [+1c]_96059_[-4b]_36498_[-2b]_
+                                             335_[-4a]_[-1a]_155_[-3c]_18189_
+                                             [-1c]_174870_[-2c]_38785_[+1a]_
+                                             40123_[+2b]_147684_[-1b]_122397_
+                                             [+2b]_45479_[+4a]_74846_[+1c]_
+                                             23231_[+3b]_38245_[-4c]_152196_
+                                             [-2c]_304660_[+2a]_165655_[-4b]_
+                                             38440_[-4c]_21816_[+1c]_25500_
+                                             [-2c]_336004_[+3a]_323232_[-4a]_
+                                             7268_[+2c]_47044_[+4a]_41451_
+                                             [-2a]_66303_[+2a]_30342_[+1a]_
+                                             40657_[-4b]_180301_[-2c]_35205_
+                                             [-2c]_59610_[-2c]_2222_[+1b]_
+                                             1400_[-2a]_53171_[-2c]_175866_
+                                             [-2c]_162426_[-2c]_70915_[+2a]_
+                                             33010_[-1b]_139023_[+1b]_248403_
+                                             [-2b]_102258_[+2b]_13294_[-4c]_
+                                             85097_[+2b]_13395_[-4b]_217923_
+                                             [-2b]_252830_[-2a]_40346_[-3c]_
+                                             9779_[+1b]_15798_[-1b]_16835_
+                                             [+1a]_31854_[-2a]_3867_[+2a]_
+                                             28000_[+1b]_14934_[+2b]_18632_
+                                             [+4a]_54520_[+2b]_53902_[+1c]_
+                                             198948_[-2c]_10079_[+1b]_162971_
+                                             [-2a]_106411_[+2b]_74994_[-4b]_
+                                             11202_[-4b]_165552_[+2b]_7010_
+                                             [-2a]_12481_[-2b]_23349_[+1b]_
+                                             102412_[+2c]_100606_[-4a]_31326
+
+********************************************************************************
+
+
+
+********************************************************************************
+SECTION III: ANNOTATED SEQUENCES
+********************************************************************************
+
+
+chr04q
+  
+  LENGTH = 6776397  COMBINED P-VALUE = 7.48e-30  E-VALUE =  3.3e-26
+  DIAGRAM: 170163_[-3a]_45546_[+4a]_316730_[+2c]_6917_[-1b]_46576_[+2c]_112087_
+           [-4a]_106122_[-3a]_54413_[-3c]_2646_[-3c]_55292_[-1b]_229973_[-1a]_
+           168811_[+1b]_320302_[+3c]_36_[+1c]_192_[+4c]_879_[+2c]_15041_[+4b]_
+           12578_[+2a]_15085_[-2b]_34466_[-2a]_3572_[-3c]_106732_[-4a]_100884_
+           [-1a]_150814_[+2b]_50394_[-2b]_2709_[+1b]_80737_[-2c]_9981_[+4c]_
+           134258_[+2b]_5742_[+2b]_9723_[-1b]_120512_[-2a]_180_[+2a]_53270_
+           [-4c]_17749_[-2a]_105178_[-1b]_47045_[-2a]_86702_[+2c]_6448_[-4a]_
+           10586_[-2c]_30988_[-2a]_125990_[-2c]_69247_[-4a]_250534_[-2b]_143429_
+           [-2a]_41053_[-4b]_167542_[-1c]_126920_[-2b]_79648_[-2c]_41344_[-1a]_
+           34611_[-1a]_32643_[-2a]_69929_[+1c]_20774_[+1b]_62113_[+2c]_23189_
+           [-4b]_9299_[-4a]_39496_[+1b]_43277_[-2a]_116690_[+4c]_6861_[+4c]_
+           71133_[+2c]_143200_[-2a]_382714_[+1b]_15009_[+3b]_167995_[-3c]_
+           11445_[+3c]_85905_[-2c]_116003_[+2b]_62888_[-3a]_64899_[+2a]_49005_
+           [+2a]_56736_[+3a]_14883_[+2a]_6123_[-2a]_24729_[-2a]_75491_[-2c]_
+           33025_[+2a]_223953_[+2a]_3536_[+2c]_57658_[+1a]_321010_[-2b]_99474_
+           [+4b]_14568_[+4b]_14260_[-2c]_151646_[-4b]_91790_[-2a]_60756
+
+
+                                   [-3a]
+                                   3.3e-07
+                                   ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                                        +  +  +  +  +     +     +     +        +
+                                   ..T..D..T..Q..T..V..N..L..*..I..P..R..A..Q..F
+170137  CAGAAAAATCGAAGACGAGTATAAAATTGTGTCAGTCTGAGTAACATTCAGCTAAATAGGTCTAGCTTGAAA
+
+                                                  [+4a]
+                                                  5.6e-07
+                                                  K..N..M..C..K..L..K..P..L..L..
+                                                  +        +     +  +     +  +
+                                                  N..*..L..I..T..L..K..S..I..L..
+215713  TGTGTGAAAGGATATGGCCTATGGCATCAATAGGACTGAATCAACTAGCTAATCACATTAAAAAGTATTCTA
+
+        
+        
+        Q..K..W..L..E..
+        +        +
+        N..*..T..V..H..
+215785  AATTAAACAGTACACAACACTGACAAATGCAAGCGAATTTATAGAATGTGTGAGCACTGAGCAGGATGTATA
+
+                         [+2c]
+                         6.2e-07
+                         L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                               +     +  +        +  +     +  +
+                         V..H..K..K..V..V..K..C..W..F..S..N..R..N..N..
+532513  AATCCTATTTATCGCATGTCCACAAAAAAGTTGTAAAATGTTGGTTTAGTAATAGAAACAACAAAGCTCTTT
+
+                                                                           [-1b]
+                                                                           8.7e-
+                                                                           ..F..
+        
+                                                                           ..Y..
+539425  TTTATTTTCTTTCAACTTGAGACGCTAATAGGAACACCGAGTGCTTATTGAAGAAACGTTCGAGTATGTATG
+
+        
+        07
+        S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+        +  +        +     +     +  +  +  +     +
+        T..I..K..T..S..S..F..N..S..I..A..T..Y..S
+539497  TTATTTTAGTTGATGAGAAATTACTGATTGCAGTATATGACCTACTTCACATGTTATGCACCTATTATGTCA
+
+                                        [+2c]
+                                        9.6e-07
+                                        L..E..K..E..V..V..R..V..W..F..C..N..R..R
+                                           +  +     +  +  +        +  +     +
+                                        R..K..K..K..V..V..R..R..D..F..C..G..R..E
+586081  ACACACAAAGATATACGTCATAATCGTGAGAACGTAAGAAAAAGGTAGTAAGACGCGATTTCTGTGGTCGCG
+
+        
+        
+        ..Q..
+        
+        ..R..
+586153  AACGTTTAGTTTCTATTCCATTTTGGCAGAAGTTTAGAATTGTGATCTTACTAAGTATAACGGCAATCTTGT
+
+                                                                    [-4a]
+                                                                    8.5e-07
+                                                                    ..E..L..W..K
+                                                                      +        +
+                                                                    ..N..T..L..K
+698185  AGACGGAATGTCTCAGAATGCATCTTCGCACTAGTGCATACTTTTATATATATATATAGAGTTAGTCAGTTT
+
+        
+        
+        ..Q..L..L..P..K..L..K..C..M..N..K
+          +  +  +        +  +  +  +     +
+        ..Q..L..I..V..M..L..K..I..A..Q..K
+698257  TTGTAGTATCACCATCAGCTTTATGGCTTGTTTTTATTGTATGTACATTATACATGATATTTTATTGTAATG
+
+                                   [-3a]
+                                   3.8e-07
+                                   ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                                        +  +  +     +  +     +     +  +     +
+                                   ..L..N..T..Q..K..V..G..*..K..G..R..R..L..K..G
+804385  AATAAAATTATTAAATTACAACAAAAATAAATTTGTTTGTTTTACACCTCACTTACCTCTGCGTAACTTTCC
+
+                                                             [-3c]
+                                                             8.2e-08
+                                                             ..V..D..A..Q..T..F.
+                                                               +  +  +     +  +
+                                                             ..V..Q..G..V..T..Y.
+858817  CAACACACCTAAAAAAGTTATTTCTAGACATCTAGTGGTTTGTATAACCTATGTACCTGTCCTACAGTATAT
+
+        
+        
+        .G..L..K..I..R..R..Q..K..F
+         +     +     +           +
+        .G..S..T..L..R..Y..L..*..F
+858889  CCACTGGTAAGCCTGTATAACTAAAATAGAATGCGGAACAATTTTCATGGATGTACTTACTACATAGCTACC
+
+                [-3c]
+                3.6e-07
+                ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                     +     +     +  +     +  +  +  +        +
+                ..Y..N..P..Q..Q..Y..G..F..T..I..R..K..R..H..F
+861553  GTAAGGGTATAGTTTGGTTGCTGATAGCCGAAAGTAATTCTTTTGCGGTGGAAGTCGTAGTTTAGAAGCAAC
+
+                                                         [-1b]
+                                                         1.1e-07
+                                                         ..F..S..L..Q..L..A..E..
+                                                              +  +  +  +  +  +
+                                                         ..L..S..L..D..L..A..E..
+916849  ATTCCCATTGGCAGGAGCCAAGTATATCGAGTGTGTCGAGTTTAGATGATAAACTAAGGTCAAGTGCTTCTG
+
+        
+        
+        F..R..C..I..T..T..Q..S
+              +        +     +
+        A..S..S..S..N..T..R..S
+916921  CTGAAGAAGAATTAGTGCGTGATGTGTTATATTCTACAAGGCAAAATATCAACATTGTCCATGAGATGTTTC
+
+                                   [-1a]
+                                   5.7e-07
+                                   ..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+                                     +  +  +           +  +        +     +     +
+                                   ..P..S..I..K..R..T..E..F..S..P..I..*..T..R..S
+1146889 CTTAATGAGCTTTGAATGACCGAAACTAGGAGATATTTTGCGGGTTTCGAAACTCGGAATTCACGTGCGTGA
+
+                                                   [+1b]
+                                                   3.7e-07
+                                                   S..Q..T..T..I..C..R..F..E..A.
+                                                   +     +     +     +  +     +
+                                                   S..F..T..S..I..V..R..F..T..N.
+1315729 GAACGATTAAACTAGATAAACAACAGCTGACGTGAAATTCTGTTCTTTCACTTCGATCGTAAGATTCACTAA
+
+        
+        
+        .L..Q..L..S..F..
+            +     +  +
+        .Y..D..A..T..F..
+1315801 CTATGACGCAACTTTCTTCTTCGGTTTTGATTGGCTTGATTTCATGAAACGGAAATTACATCAAAAAATGGC
+
+                                                                      [+3c]
+                                                                      1.1e-16
+                                                                      F..K..Q..R
+                                                                      +  +  +  +
+                                                                      F..K..Q..K
+1636057 TTGATGTGGCTTTTTAGGAAGAAGCAAAGCAGTTGGAGCAGCTGGAACTTTTCGCGAAGGATTTCAAGCAGA
+
+                                                                               [
+                                                                               6
+        ..R..I..K..L..G..F..T..Q..A..D..V..                                    S
+          +  +  +     +  +  +  +  +  +  +                                      +
+        ..R..I..K..M..G..F..T..Q..G..D..V..                                    S
+1636129 AGCGAATTAAGATGGGGTTTACTCAGGGTGATGTTGGACAAGCAATGGGTTGTCTGTATGGCAATGATTTCA
+
+        +1c]
+        .3e-15
+        ..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+          +  +  +  +  +  +  +  +  +  +  +  +     +
+        ..Q..T..T..I..S..R..F..E..A..L..N..L..R..F..
+1636201 GTCAAACCACCATTTCAAGATTTGAAGCACTGAACTTGAGGTTTGTTTGTTCAGTTGGAGTCTTAGTTTTGT
+
+                            [+4c]
+                            1.8e-13
+                            K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                            +  +  +  +  +  +  +  +     +  +  +  +  +
+                            K..N..M..I..K..L..K..P..M..L..E..R..W..L..L..
+1636417 ATTGATCATTACACAGCTTTAAAAATATGATCAAGCTGAAGCCCATGCTAGAAAGATGGTTGCTGGATGCTA
+
+                [+2c]
+                2.1e-20
+                L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                +  +  +  +  +  +  +  +  +  +  +  +  +  +  +
+                M..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+1637353 AATGTAAAATGGAGAAGGAGGTAGTTCGTGTTTGGTTTTGCAACCGACGGCAAAAGCAAAAGAGAATATCGC
+
+                                                      [+4b]
+                                                      6.1e-07
+                                                      K..N..M..C..K..L..K..P..L.
+                                                      +  +  +     +           +
+                                                      N..S..A..*..K..I..I..C..L.
+1652401 TATAAATGGCACATTTTAAAAGTTAACAGATTATCATAAAAAATAGAATTCTGCTTGAAAAATAATATGTTT
+
+        
+        
+        .L..Q..K..W..L..E..
+            +  +  +  +  +
+        .T..S..K..W..V..E..
+1652473 GACATCAAAGTGGGTTGAGTTTTTAGGCACACTTTTGTTTGTATTAGAGGATTAACTTAATGATATTTAAAC
+
+                                                                             [+2
+                                                                             6.2
+                                                                             L..
+                                                                             +
+                                                                             Y..
+1665001 CAATTCGCCCCGCACCCTATATATAAATATAACGCATTAACTTACGTGCATGAAATAAATTTCTTATGCTAT
+
+        a]
+        e-07
+        E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+           +     +  +     +     +     +  +
+        *..R..L..V..I..A..V..C..F..F..N..R..A..V..
+1665073 TGAAGGCTCGTTATAGCAGTATGTTTTTTCAATCGAGCTGTGGCGGTTTTCGACAAAACTATATATACTGCA
+
+               [-2b]
+               2.7e-08
+               ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                          +     +  +  +  +  +  +  +
+               ..N..S..S..N..Y..F..W..V..R..V..V..E..M..F..R
+1680193 AAAGAACATTCGAAGAATTATAAAACCATACCCTCACGACTTCCATAAACCGTTGTTTATTGTTTAAAACTT
+
+                                      [-2a]
+                                      1.4e-07
+                                      ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E
+                                                 +     +  +     +  +  +  +
+                                      ..N..S..S..N..Y..F..W..I..R..V..V..E..M..F
+1714681 TCACGTTCCATTTGGTAGTAAACAAAGAACATTCGAAGAATTATAAAACCATATCCTCACGACTTCCATAAA
+
+        
+        
+        ..L
+        
+        ..R
+1714753 CCGTTGTTTATTGTTTAAAACTTGATTAGGATATTTGAATATTAGGTTTCCCATCTTCCCCCACAGTACTAT
+
+                                                       [-3c]
+                                                       3.4e-07
+                                                       ..V..D..A..Q..T..F..G..L.
+                                                            +     +  +  +     +
+                                                       ..I..D..R..Q..T..V..N..L.
+1718281 GATGACCTTGAAAATGAAACCTTGACGTGACAGATTTATTCTTATTGTATGTCTCGTTGTGTAACATTAAGT
+
+        
+        
+        .K..I..R..R..Q..K..F
+         +  +     +
+        .K..I..*..K..V..R..I
+1718353 TTTATTTATTTAACCCTAATTTTAGCATTGTTGTACTCAAGGATTTTTACATTTTATTATTACTTAGTTGCG
+
+                                                        [-4a]
+                                                        4.4e-07
+                                                        ..E..L..W..K..Q..L..L..P
+                                                             +     +  +  +  +  +
+                                                        ..R..V..R..K..S..L..V..P
+1825057 AGACGTACAATGGGAAGCTTATAGAAACATTTAATTGGATGACCGTGTTCTCACACGTTTGCTTAAGACGGG
+
+        
+        
+        ..K..L..K..C..M..N..K
+             +     +  +     +
+        ..D..L..T..V..M..R..N
+1825129 ATCTAAGGTTACCATTCTATTCGAACGAAATTATTTATCAATTGAATCAGCCCTCGGAACTAAACAAACTGA
+
+                                         [-1a]
+                                         1.6e-07
+                                         ..F..S..L..Q..L..A..E..F..R..C..I..T..T
+                                              +  +  +        +  +     +  +     +
+                                         ..S..S..L..N..Q..K..E..F..L..C..I..N..S
+1926001 ACACTGCGTGTATGTCAAACAGATCTAAGTTTATGAACTCAAATTTTGTTTTTCAAATAGGCAAATATTTGA
+
+        
+        
+        ..Q..S
+        
+        ..L..W
+1926073 TAACCAAAATTTCTCTTTTTAGGGTGGCAAAGGAGAGAAAGACAAAGGAAAAAAGAAGATTGGTGGAAGAGA
+
+                                                            [+2b]
+                                                            8.8e-07
+                                                            L..E..K..E..V..V..R.
+                                                                        +  +  +
+                                                            R..S..M..G..V..V..R.
+2076841 TTGAGCACAACATAATATTAAATTATCCTGATTGTGCTTAAACCATTAATAACGGTCTATGGGAGTCGTGAG
+
+        
+        
+        .V..W..F..C..N..R..R..Q..
+            +  +     +
+        .I..W..F..Y..N..S..L..N..
+2076913 AATATGGTTTTATAATTCATTAAATGTTTTTTGTTTACTACCAAATGGGACGAAAAAATAGGATGAAAAGGT
+
+                           [-2b]
+                           6.8e-07
+                           ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                      +     +     +  +  +  +           +
+                           ..F..L..S..N..Y..F..*..V..R..V..V..G..M..S..L
+2127313 TTAGTAGTATACAAAGAACAAACAAAGAATTATAAAACTATACCCTCACTACTCCCATAGACAGTAGTTAAT
+
+                                             [+1b]
+                                             2.9e-07
+                                             S..Q..T..T..I..C..R..F..E..A..L..Q.
+                                                   +     +  +     +     +  +  +
+                                             F..F..T..F..I..S..C..F..T..S..L..T.
+2130049 TACAGCTTCAACTTTGATCTCCGGTTGATTTTCTTCTTTTTTCACCTTTATCTCTTGTTTTACTTCTTTAAC
+
+        
+        
+        .L..S..F..
+         +  +
+        .L..S..L..
+2130121 TTTATCTTTGGTTTGTTCATTTTCCACTTTTTTGGCAGTTTCTATATCAGGTTTACTTGTTTGTTGTTCTTC
+
+                                           [-2c]
+                                           3.7e-07
+                                           ..Q..R..R..N..C..F..W..V..R..V..V..E.
+                                                   +  +        +     +  +
+                                           ..T..L..R..N..K..K..W..N..R..V..L..V.
+2210833 AGAAGAGCGCAAGAAATTGCAAGATCTCAAACTAAAGTAAGCCGATTTTTTTTCCAATTCCGTACTAGAACC
+
+        
+        
+        .K..E..L
+         +  +  +
+        .K..K..Y
+2210905 TTCTTGTAGATATAGCTATTGCTGTCGAAATATGTACAAAACTCTTATTTAAACGGTACATTTTCCAGCAGA
+
+                                                             [+4c]
+                                                             5.1e-07
+                                                             K..N..M..C..K..L..K
+                                                             +        +     +  +
+                                                             N..D..T..I..T..L..R
+2220841 ATTATGTTTCTTTCCCTTTAAACTTCTTCAATATACAGTGTACGATACGCACGAATGACACAATAACACTGC
+
+        
+        
+        ..P..L..L..Q..K..W..L..E..
+             +  +  +     +
+        ..L..L..L..A..H..W..S..K..
+2220913 GACTATTATTGGCTCACTGGTCAAAATATTTTAGAATCTAAAACTTACATACAGAAACAATACAATAGCAGC
+
+            [+2b]
+            1.7e-08
+            L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                  +  +  +  +  +        +  +  +
+            R..S..K..E..V..V..R..I..R..F..C..N..S..L..Y..
+2355193 ACAACGGTCTAAGGAAGTCGTGAGGATACGATTTTGTAATTCTTTGTATGTTCTTTGTTTACTACCAAATGG
+
+                                       [+2b]
+                                       6.8e-07
+                                       L..E..K..E..V..V..R..V..W..F..C..N..R..R.
+                                       +  +     +  +        +  +  +
+                                       L..K..G..E..V..L..S..V..W..F..A..R..S..I.
+2360953 ACCAAAACCGCAAGCGGAATTCGCCTTCGGCTTGAAAGGGGAGGTTTTGTCGGTTTGGTTCGCAAGATCTAT
+
+        
+        
+        .Q..
+        
+        .S..
+2361025 TAGCGCGCTTTTAGGCTTTAGTTAATTCGATCAGCAGTTTTAATAAAATAAGCCCATTGCGAATTATACGCC
+
+               [-1b]
+               9.9e-07
+               ..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+                 +  +        +  +     +     +  +  +  +
+               ..H..T..Q..E..L..S..N..F..V..C..I..A..S..S..A
+2370745 CAACGTCGTGTGTTTGCTCAAGGGAATTGAATACGCATATCGCAGACGAAGCTAAACAATGTCTGCCCTATA
+
+                                            [-2a]
+                                            8.7e-08
+                                            ..Q..R..R..N..C..F..W..V..R..V..V..E
+                                                       +     +  +  +  +  +  +
+                                            ..N..S..S..N..Y..F..W..V..R..V..V..*
+2491273 CTATTTTTTCGCTCCATTTGGTAGTAAACAGAGAGCATTCGAAGAATTATAAAACCATACTCTCACGACTCA
+
+        
+        
+        ..K..E..L
+        
+        ..G..V..S
+2491345 TCCCACAGACCGTTGTTAATTGTTTAAAACACAATCAGGATATTTCAACACTATGTGCTATAAAGGCGTCTC
+
+                                                     [+2a]
+                                                     8.7e-08
+                                                     L..E..K..E..V..V..R..V..W..
+                                                                 +  +  +  +  +
+                                                     S..V..G..*..V..V..R..V..W..
+2491489 ATAGTGTTGAAATATCCTGATTGTGTTTTAAACAATTAACAACGGTCTGTGGGATGAGTCGTGAGAGTATGG
+
+        
+        
+        F..C..N..R..R..Q..
+        +     +
+        F..Y..N..S..S..N..
+2491561 TTTTATAATTCTTCGAATGCTCTCTGTTTACTACCAAATGGAGCGAAAAAATAGAATAAAGGGGTGTCCGAT
+
+                [-4c]
+                3.8e-07
+                ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                  +        +     +  +           +     +  +
+                ..E..M..K..K..*..L..I..S..V..I..K..N..M..N..*
+2544841 AATTGCAATTCCATCTTCTTTTACAAAATTGACACGATTTTGTTCATGTTTTAGACATGAAAACTTTAATAT
+
+                          [-2a]
+                          9.6e-07
+                          ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                           +  +  +     +  +        +  +
+                          ..N..S..I..S..I..F..W..V..Y..V..V..K..D..K..L
+2562625 ACAGCTATACAAAAGATGGTTGCTTATTGATATAAACCATACATAAACAACTTTGTCTTTCAAATTAAAGCG
+
+                                                         [-1b]
+                                                         4.4e-07
+                                                         ..F..S..L..Q..L..A..E..
+                                                              +     +  +     +
+                                                         ..Y..S..T..Q..L..C..E..
+2667817 ACTTCAAATGTGGGCGATTTATTGCCTGTTTAGTCTTGTGTTGGATAACATACGAAGTCTGCAAGCACTCAA
+
+        
+        
+        F..R..C..I..T..T..Q..S
+        +  +           +
+        F..R..H..F..C..T..M..N
+2667889 AGCGATGGAAACAAGTCATATTTTCATGGCAGTGGAAAATAACTGGTCTGGCTTTCGCCATAACTACGCTGA
+
+                                                           [-2a]
+                                                           2.1e-07
+                                                           ..Q..R..R..N..C..F..W
+                                                                      +  +  +
+                                                           ..L..S..S..N..C..F..G
+2714905 CACCTTTTTCTTTTAATTTCTAGTCACATTTAGAAGTAAACAGAGAACATTCAATGAAGAATTACAAAACCC
+
+        
+        
+        ..V..R..V..V..E..K..E..L
+             +  +  +           +
+        ..I..R..V..V..G..M..P..L
+2714977 TATCCTCACGACTCCCATAGGCAGTTGTTAATTGTTTAAAACACGATCAGGATATTTCGATATTATGTGTTA
+
+                                              [+2c]
+                                              9.6e-07
+                                              L..E..K..E..V..V..R..V..W..F..C..N
+                                                       +  +  +  +  +     +     +
+                                              R..S..M..E..V..V..R..V..R..F..Y..N
+2801665 TATTCAAATATCATGATCGTGTTTTAATCAATTACCAACGGTCTATGGAAGTCGTAAGGGTTCGGTTTTATA
+
+        
+        
+        ..R..R..Q..
+        
+        ..S..L..N..
+2801737 ATTCTTTGAACGTTTTTTGTTTACTAGCAAATGGGACGAGAAAATGTAACGAAAATGTGTCCCATCTTTACT
+
+                                                           [-4a]
+                                                           4.6e-07
+                                                           ..E..L..W..K..Q..L..L
+                                                                   +     +  +  +
+                                                           ..L..S..W..L..S..L..V
+2808145 GAAGATCAAATGCGAACTGCTTTGCTTTATGGTGAAATCTACAACGCTTGTTAAACTCCACAAACTTAACAC
+
+        
+        
+        ..P..K..L..K..C..M..N..K
+          +  +  +        +     +
+        ..P..K..L..S..S..A..L..K
+2808217 CGGCTTGAGCGAGCTTGCTAACTTTGGTGCCGCAGTTGTAATAGTCTCGATGTGACGGGCGCCACGGCTGCA
+
+                                  [-2c]
+                                  7.4e-07
+                                  ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                             +     +  +     +  +  +
+                                  ..N..L..S..N..Y..F..W..I..R..V..V..G..L..S..R
+2818801 CCAATTTGTTCGTGAACAAAAAAAACATTCAAAGAATTATAAAACCATATTCTCACAACTCCCAAAGACCGT
+
+                                   [-2a]
+                                   4.3e-08
+                                   ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                              +     +  +     +  +  +  +
+                                   ..N..L..S..N..Y..F..W..I..R..V..V..E..M..Y..F
+2849833 CATGTTATTTGGTAGTAAACAAAGAACATTCAAAGAATTATAAAACCATATCCTCACGACTTCCATATAGAA
+
+                                                                      [-2c]
+                                                                      6.0e-07
+                                                                      ..Q..R..R.
+        
+                                                                      ..E..E..K.
+2975833 TCCAGCACAACACACATGCCCAGGACAAGTTATTCACCAGAATTGCTGATAGTTACATTGCTCTCTTCTTTA
+
+        
+        
+        .N..C..F..W..V..R..V..V..E..K..E..L
+            +     +  +        +     +  +  +
+        .T..C..M..W..V..Y..S..V..F..K..E..L
+2975905 GTGCACATCCAGACATAAGAGACAAATTTTTCAAGGTAAATTTTTGGCTTGAAACTGTCAAATAAAAGTACA
+
+                          [-4a]
+                          6.1e-07
+                          ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                               +  +  +     +  +  +  +        +        +
+                          ..V..V..W..K..R..L..I..A..R..T..S..C..C..E..N
+3045169 TTCTATTCTTACACAACCTACCACCCATTTTCTTAATATAGCCCGAGTAGAACAACATTCGTTCTGTTGTTG
+
+                                             [-2b]
+                                             3.7e-07
+                                             ..Q..R..R..N..C..F..W..V..R..V..V..
+                                                        +     +  +     +  +
+                                             ..N..S..Y..N..Y..F..W..I..R..V..L..
+3295729 TTCCTTTTCTTGTCCCAATGGGTAGTAAACAAAGAGCATTCGAATAATTATAAAACCATATCCTCACGAGTC
+
+        
+        
+        E..K..E..L
+                 +
+        G..M..Y..L
+3295801 CCATATACAGTTGTTAATTGTTTATATCATGATCAGCATATTTGGATATTATGTACTAAAGGTGTCCCATCT
+
+                       [-2a]
+                       2.8e-07
+                       ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                     +  +  +  +  +  +           +  +
+                       ..C..G..*..C..C..F..W..V..R..V..C..Q..S..D..Y
+3439225 TTGTTTTAGCCAAAAACAACCTTAACAACAAAACCATACCCTAACACACTGTGAATCATACATAATAATGCA
+
+         [-4b]
+         9.8e-07
+         ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+              +  +     +     +  +           +  +  +  +
+         ..K..L..W..L..Q..G..V..A..G..V..T..C..M..S..N
+3480337 TTTTAAGCCACAGTTGACCTACTGCCCCCACTGTACACATGCTATTTATGGCGGATAGTTATTTGAGCCATT
+
+                                                    [-1c]
+                                                    5.5e-07
+                                                    ..F..S..L..Q..L..A..E..F..R.
+                                                      +  +        +        +
+                                                    ..H..T..N..L..L..K..A..F..G.
+3647881 CGCCACCGTTAGATTCCGATCTATATACCTAGGATTTGCCATAAATGTGTGTTTAGAAGTTTCGCGAAACCG
+
+        
+        
+        .C..I..T..T..Q..S
+         +  +  +        +
+        .C..I..T..H..T..S
+3647953 CAAATCGTGTGTGTAGATGCGTATCCCAATCCCCTGCTCTTTATCTCAAGATCCAGCGCTCGCTTGTCAATC
+
+         [-2b]
+         1.9e-07
+         ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                    +     +  +     +  +  +  +
+         ..N..L..S..N..Y..F..W..I..R..V..V..E..M..F..R
+3774889 CATTTAAAGAATTATAAAACCATATCCTTACGACTTCCATAAACCGTTGTTTATTGTTTAAAACTTGATCAG
+
+                                                                      [-2c]
+                                                                      2.6e-07
+                                                                      ..Q..R..R.
+        
+                                                                      ..N..L..S.
+3854521 GGGAGAAGATGGGACACATTTTCATTCCATTTTCTTTGTACGTTTTGTTGTAAACAAAAGACATTCAAAGAA
+
+        
+        
+        .N..C..F..W..V..R..V..V..E..K..E..L
+         +     +        +  +  +  +  +
+        .N..Y..F..Q..I..R..V..V..E..R..S..R
+3854593 TTATAAAACTGTATCCTCACGACTTCCCTAGACCGTTGTTAATTGTTTAGAAACCAATCAGGAAGTTTAAAT
+
+                                                           [-1a]
+                                                           4.3e-07
+                                                           ..F..S..L..Q..L..A..E
+                                                                +  +     +     +
+                                                           ..S..S..I..K..L..D..E
+3895921 TACGGTATGGCTGACAACTTAAACAATCCATATGGAAGGCTACTGTGTTGGAGAACTTATTTTTAAGTCTTC
+
+        
+        
+        ..F..R..C..I..T..T..Q..S
+          +  +     +           +
+        ..F..R..T..I..C..I..*..S
+3895993 AAATCTTGTAATGCAGATCTACGACACCGAACTTGAGAATGCGAACGAGTTTTCAGGTTTTAACGACCTTTT
+
+           [-1a]
+           6.4e-08
+           ..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+                +  +  +  +  +     +  +  +     +
+           ..V..S..L..N..L..N..G..F..R..C..V..T..N..*..*
+3930625 ACGAACCGATAAGTTCAAGTTTCCGAAACGACAAACGGTATTTTATCAACAATAGATAAACGACTAAAAACA
+
+           [-2a]
+           8.4e-07
+           ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                      +  +  +              +     +  +  +
+           ..I..L..A..N..C..F..N..G..S..Y..V..Q..K..E..L
+3963313 ACTGATTAATGCATTGCAGAAATTTCCAGAGTATACCTGTTTCTCTAAAATGAGGACTTGTGATTGTCATAA
+
+                                                                         [+1c]
+                                                                         1.2e-08
+                                                                         S..Q..T
+                                                                         +     +
+                                                                         S..H..S
+4033225 AAACCCAAAAAGAAATGCACTTGTAGTACTGTGGGGTAAGATGGATGTCGATATAGCACATTTTATCCCATT
+
+        
+        
+        ..T..I..C..R..F..E..A..L..Q..L..S..F..
+                +  +  +     +     +  +  +  +
+        ..S..*..S..R..F..N..N..*..Q..L..S..F..
+4033297 CTTCATAATCGCGTTTTAACAATTAACAACTCTCTTTTACAATCGTGAGGTAACGGTTATATAATTCTGTAA
+
+            [+1b]
+            1.2e-08
+            S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+            +     +        +  +  +     +     +  +  +  +
+            S..H..S..S..*..S..R..F..N..N..*..Q..L..S..F..
+4054105 TTTATCCCATTCTTCATAATCGCGTTTTAACAATTAACAACTCTCTTTTACAATCGTGAGGTAACGGTTATA
+
+                                  [+2c]
+                                  8.4e-07
+                                  L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                                  +     +        +        +  +     +
+                                  L..Y..R..S..L..V..K..I..W..F..Y..N..S..L..V..
+4116241 TTATGTTTTAAACAATTAACAACGGTCTATATAGGAGTCTCGTGAAAATATGGTTTTATAATTCTTTGGTTG
+
+            [-4b]
+            1.7e-07
+            ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                 +        +  +  +     +  +  +  +  +     +
+            ..L..L..G..L..S..L..V..L..K..L..K..I..A..T..K
+4139497 TGGACAATAATCCCAGACTGAGGACTAACTTCAATTTGATTGCGGTTTTCATCTTTCTAATAAGAACAAAGA
+
+                                                                    [-4a]
+                                                                    9.3e-07
+                                                                    ..E..L..W..K
+                                                                      +     +  +
+                                                                    ..E..R..W..A
+4148785 TTTGGTTATTAGGAGAATGAAGGTATAATAAAAATAGTATTCAAAACTAAAAAAATGATATTCACGCCATGC
+
+        
+        
+        ..Q..L..L..P..K..L..K..C..M..N..K
+             +     +        +        +  +
+        ..*..L..*..A..Q..*..K..N..I..N..K
+4148857 TTATAACTATGCTTGTCATTTATTGATGTTTTTTATTTGTATTAGGTAAATGAACAAAATCCTCAAGTGTTT
+
+         [+1b]
+         8.0e-07
+         S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+         +           +  +     +        +  +  +  +  +
+         S..*..P..G..I..S..T..F..Q..*..L..D..I..S..H..
+4188385 ATCCTGACCTGGAATCTCAACTTTTCAATAACTTGACATCAGCCATACAATTACTACAAGGTAAGATAATGT
+
+                                                           [-2a]
+                                                           8.0e-08
+                                                           ..Q..R..R..N..C..F..W
+                                                                      +  +  +  +
+                                                           ..H..L..F..N..C..F..W
+4231657 GGACATCATTTCATTTTATTTACTCGTCTCACTTGGTAGTAAAAAAAGAAAATGCAAAAAATTACAAAACCA
+
+        
+        
+        ..V..R..V..V..E..K..E..L
+             +  +  +
+        ..I..R..V..V..G..T..S..R
+4231729 TATCCTCACAACTCCCGTAGACCGTTGTTAATTGTTCAAATCACAATCAAGATGTTTGGAGATTATGAGCTA
+
+          [+4c]
+          3.3e-07
+          K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+          +           +     +  +  +  +  +  +     +
+          N..L..L..K..K..I..K..P..I..L..Q..K..F..V..H..
+4348441 AAAACCTTCTTAAAAAGATCAAACCAATCTTGCAGAAGTTTGTCCACGATGACACCAACTTACAACTACATG
+
+                                                                            [+4c
+                                                                            9.3e
+                                                                            K..N
+                                                                            +  +
+                                                                            N..S
+4355281 TAAATTTTGAACGTATATATACTGTGTTTTGTAAATTTTGAACATATTCAGTTTTTAAACATCTACAAAATT
+
+        ]
+        -07
+        ..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                +     +  +  +  +     +
+        ..Q..N..A..I..K..P..I..L..I..K..Y..*..L..
+4355353 CCCAAAATGCAATTAAACCCATACTTATAAAATATTAATTACAAGTCGCATTAATTAATTTGGCAAATACAC
+
+                                              [+2c]
+                                              1.9e-07
+                                              L..E..K..E..V..V..R..V..W..F..C..N
+                                                          +  +  +  +  +  +     +
+                                              R..S..M..G..V..V..R..V..W..F..Y..N
+4426489 AATCCAAATATTCTGATCATGTTTTAAACAATTCACAACGGTCTATGGGAGTTGTAAGGGTATGGTTTTATA
+
+        
+        
+        ..R..R..Q..
+        
+        ..T..L..N..
+4426561 ATACTTTGAATGTTCTTTGTTTACTACCAAATGGGACAAAAATAATTAAAGGGTGTCCCATCTTTCCCTACC
+
+           [-2a]
+           4.6e-07
+           ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                +           +           +  +     +  +  +
+           ..M..R..*..V..Y..F..F..I..S..V..V..S..R..K..L
+4569769 TCCCATTCGCTAAACATAAAAGAAAATTGAAACTACGCTGCGTTTTAATAGTGTATATACGGCCATAGATAC
+
+                  [+1b]
+                  7.1e-07
+                  S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                  +     +        +     +  +        +  +  +
+                  S..L..T..S..L..S..P..F..E..L..G..T..I..S..L..
+4952521 TAGCGATCAAAGTTTGACGAGCCTCTCTCCTTTTGAACTTGGCACAATTTCTCTTACTTGTTGGTTTAATAT
+
+                        [+3b]
+                        7.8e-07
+                        F..K..Q..R..R..I..K..L..G..F..T..Q..A..D..V..
+                              +  +        +  +        +  +  +  +  +
+                        E..R..I..R..N..C..K..L..S..K..T..Q..G..N..V..
+4967569 GGAAATTGTGGTTTCTGAAAGGATAAGAAACTGCAAATTATCTAAAACTCAGGGAAACGTTTAAAATCATGT
+
+                [-3c]
+                3.8e-07
+                ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                  +     +     +  +  +        +     +        +
+                ..V..S..A..T..T..V..G..I..I..I..N..R..R..R..F
+5135617 CAGATGGTTACAGAAGCAGTGGTAACACCAATGATAATGTTGCGCCTTCTAAACAGAGCAAAGAATCTTGTA
+
+                                                          [+3c]
+                                                          7.8e-07
+                                                          F..K..Q..R..R..I..K..L
+                                                          +     +     +     +  +
+                                                          F..I..Q..C..R..E..S..L
+5147065 ACTTAATTGCCCCCCAAAAAATAATTAATTACATTTTCCGAAATGGTTGCTTTATACAGTGTAGAGAAAGTC
+
+        
+        
+        ..G..F..T..Q..A..D..V..
+          +           +     +
+        ..G..L..S..L..A..Y..V..
+5147137 TTGGCCTAAGTTTAGCTTATGTGTCACAATTTGACATTACAATTATCATTATAATACAATTTAAATAGTTTG
+
+                                        [-2c]
+                                        5.7e-07
+                                        ..Q..R..R..N..C..F..W..V..R..V..V..E..K.
+                                             +     +     +     +     +        +
+                                        ..F..R..I..N..Y..F..F..V..Q..V..C..Q..K.
+5233033 ATTATATGATGCAAAAATTATATGATGCTTAAAAACCTAATGTTGTAGAAAAATACCTGAACACATTGTTTT
+
+        
+        
+        .E..L
+            +
+        .R..Y
+5233105 CTATAGAGTTTGTGAGTTCAGTAAGACTGAGTGCTTTTGCGCATGATTTGACTACAAATTTCACGGCTTCAA
+
+                        [+2b]
+                        4.0e-07
+                        L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                        +     +  +  +  +  +        +     +
+                        L..F..K..E..V..V..R..P..L..F..D..N..L..I..I..
+5349097 GGAACCTGTAAGACCTTTGTTTAAGGAAGTTGTAAGACCTTTGTTTGATAACTTAATAATAGTTGTCACATG
+
+                             [-3a]
+                             4.7e-07
+                             ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                                  +  +  +     +     +  +  +  +  +     +
+                             ..R..N..T..Q..W..F..S..L..S..I..R..R..L..K..G
+5412025 TATGCTTTGCTAAATCAACTGTCTGTTTGTTTGCCAAAACGACAAACTAATGCGGCGCAATTTACCCTGCGA
+
+                             [+2a]
+                             2.6e-07
+                             L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                             +     +  +  +        +           +  +     +
+                             L..Q..R..N..V..C..S..V..S..R..*..N..R..T..Q..
+5476969 TTTATATCAACTTGTATTTTTTTACAAAGAAATGTGTGTTCTGTAAGTCGTTAAAACCGAACACAAAACGTG
+
+                                               [+2a]
+                                               6.8e-07
+                                               L..E..K..E..V..V..R..V..W..F..C..
+                                                        +  +  +  +        +  +
+                                               Q..S..M..E..V..V..R..I..Q..F..C..
+5526001 GACACTATCTTGATCGTTGTTTTTAAAAACAATTAGCATCAGTCTATGGAAGTTGTAAGGATACAGTTTTGC
+
+        
+        
+        N..R..R..Q..
+        +
+        N..S..L..N..
+5526073 AATTCTTTGAATGTCTTTGTTTACCACCAAATACGACGAGAAAATAGAATGAAAAGGTGTCCTATCTTTCCC
+
+                    [+3a]
+                    8.2e-07
+                    F..K..Q..R..R..I..K..L..G..F..T..Q..A..D..V..
+                    +  +        +     +     +  +  +     +     +
+                    L..K..E..S..R..N..K..R..G..V..T..F..T..T..V..
+5582809 AGGAAGTACTCGCTAAAAGAGTCGCGAAATAAACGTGGTGTCACATTTACTACCGTTGCGCGGGTGGTAAAG
+
+                                            [+2a]
+                                            4.1e-08
+                                            L..E..K..E..V..V..R..V..W..F..C..N..
+                                            +           +  +  +     +  +     +
+                                            L..S..M..G..V..V..R..I..W..F..Y..N..
+5597713 TACAAATATCCTGATCTTGTTTTAAACAGTTAACAACTGTCTATGGGAGTCGTGAGGATATGGTTTTATAAT
+
+        
+        
+        R..R..Q..
+        
+        S..L..N..
+5597785 TCTTTAAATGTTCTTTATTTACTACCAAGTGGGACAAGAAAATAGAACTAAAAGGTGTCCCATCTTCTCGCA
+
+                    [-2a]
+                    6.5e-07
+                    ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                               +  +  +     +     +     +  +
+                    ..Y..K..K..N..C..F..S..V..S..V..K..D..K..T..S
+5603905 AGTATAATTTCTATACTTTTTATTGCAGAAAGAAACAGAAACTTTATCTTTTGTTGAAAAACTTCTGACACA
+
+                          [-2a]
+                          8.8e-07
+                          ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                     +     +  +     +  +  +
+                          ..I..L..S..N..Y..F..W..I..R..V..V..G..I..S..R
+5628673 TTTTAGTAAACAAAGAACAATCAAAGAATTATAAAACCATATCCTCACGACTCCTATAGACCTTTGTTGATT
+
+                                  [-2c]
+                                  7.7e-07
+                                  ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                          +        +  +     +  +        +  +
+                                  ..P..Y..R..T..A..F..W..L..R..V..K..G..K..E..R
+5704201 GTTCCAGTCTATTATGGTGTGGTGTATGGATAGCGAGTAGCGAACCAGAGTCTCACTTTCCCTTTTTCTCGC
+
+                                                        [+2a]
+                                                        2.1e-07
+                                                        L..E..K..E..V..V..R..V..
+                                                        +        +  +     +
+                                                        Y..Q..V..N..V..K..R..I..
+5737249 CGACAAGAGAATGTTCGGAACTTGTGGTTGAGGCCACTTACAAGACGATACCAAGTTAATGTGAAACGAATT
+
+        
+        
+        W..F..C..N..R..R..Q..
+           +     +  +     +
+        R..F..R..N..R..A..Q..
+5737321 CGTTTTCGAAACCGAGCACAGAACTTTACTAATGACCAAACACAGGTTAATACAGTTTAAATATTTCGTTTT
+
+                                                              [+2a]
+                                                              4.4e-07
+                                                              L..E..K..E..V..V..
+                                                                       +  +  +
+                                                              R..F..V..E..V..V..
+5961241 NNCCTTTAACAGATAGTTTTCAAATAGCCTGATCGTGTTTTAAAGAATTAACAACGGTTTGTGGAAGTCGTA
+
+        
+        
+        R..V..W..F..C..N..R..R..Q..
+        +     +  +     +
+        R..I..W..F..H..N..S..L..D..
+5961313 AGGATATGGTTTCATAATTCTTTGGATGTTCTTTGTTTACTACCAAATGGAAAGAGAAAACTAAATGAAAAG
+
+                                           [+2c]
+                                           1.9e-07
+                                           L..E..K..E..V..V..R..V..W..F..C..N..R
+                                                    +  +  +  +     +  +     +
+                                           R..S..V..E..V..V..R..I..W..F..H..N..S
+5964841 CCAAATAGCCTGATCGTGTTTTAAAGAATTAACAACGGTCTGTGGAAGTCGTAAGGATATGGTTTCATAACT
+
+        
+        
+        ..R..Q..
+        
+        ..L..D..
+5964913 CTTTGGATGTTCTTTGTTTACTTCCAAATGGAAAGAGAAAACAAAATGAAAAGGTGTCCCATCTTCCCCCAT
+
+                                                                          [+1a]
+                                                                          1.5e-0
+                                                                          S..Q..
+        
+                                                                          H..L..
+6022513 ATCCAAAATAACACTTTTGCAAATTGAAAGAACTTTTTTTTTCAAATTTAAACATTTTTAAAATTGCACCTT
+
+        
+        7
+        T..T..I..C..R..F..E..A..L..Q..L..S..F..
+              +  +     +     +  +  +  +  +  +
+        F..*..I..C..T..F..L..S..L..N..L..S..F..
+6022585 TTTTAAATTTGCACCTTTTTAAGTTTAAACCTGTCTTTTAATGCTGGCTATTTTTAGTCACCACTGTGGCCT
+
+         [-2b]
+         7.1e-07
+         ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                    +     +  +     +  +  +
+         ..N..L..S..N..Y..F..W..I..R..V..V..G..L..S..Q
+6343633 CATTCAAAGAATTATAAAACCATATCCTTACGACTCCCAAAGACTGTTGTTAATTGTTTAAAACATGATCAG
+
+                        [+4b]
+                        1.9e-07
+                        K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                        +  +  +  +  +                 +  +  +  +  +
+                        N..N..A..C..K..Y..S..L..F..T..N..K..W..V..E..
+6443137 TCGTGTTTCAACAATTAACAACGCTTGTAAATATTCTTTGTTTACTAATAAATGGGTCGAATAAAAAATTGG
+
+                     [+4b]
+                     5.9e-07
+                     K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                     +  +  +  +  +           +  +  +        +  +
+                     N..S..A..V..K..I..V..K..I..L..N..M..K..L..E..
+6457753 CTCGCGACTCTGAAATAGCGCTGTTAAAATTGTTAAAATCTTGAATATGAAACTCGAGTATTATCTGTAACC
+
+                                                                      [-2c]
+                                                                      4.6e-07
+                                                                      ..Q..R..R.
+        
+                                                                      ..C..L..V.
+6472009 GAGTGGGTTAAAGATGCGACGAAGTCACATTCATTCTATTTTCTTGTCCAATTTGGTAGTAAACAAAGAACA
+
+        
+        
+        .N..C..F..W..V..R..V..V..E..K..E..L
+         +     +     +  +  +  +     +
+        .N..F..F..Q..V..R..V..V..G..K..S..W
+6472081 TTAAAAAACTGTACCCTCACGACTCCCTTAGACCATTGTTAATTGTTTAAACACAATCAGGATATTTTGGAT
+
+                                                         [-4b]
+                                                         3.8e-07
+                                                         ..E..L..W..K..Q..L..L..
+                                                              +     +     +  +
+                                                         ..K..L..E..R..H..L..L..
+6623713 GGAGCAGAATAATGCTTTGATTGTGAAAGAAATGACTTTCCAGGTCGCTTTTTAGTTCTCGGTGTAAAAGGT
+
+        
+        
+        P..K..L..K..C..M..N..K
+           +  +  +  +        +
+        N..R..L..K..I..S..D..K
+6623785 TTCTCAGTTTTATTGAATCTTTCTTCATGTCTTTCCAGATAATCCCCGTTATAAGTTACATCCATAACTTGT
+
+                    [-2a]
+                    2.2e-08
+                    ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                               +     +  +     +  +  +     +
+                    ..L..*..S..N..Y..F..W..I..R..V..V..K..K..S..R
+6715585 TTAGTAAACGAAAAGCTAAGAATTATAAAACCATATCCTCACGACTTTCTTAGACCGTTTAATAATTGTTTA
+
+
+chr02q
+  
+  LENGTH = 8059593  COMBINED P-VALUE = 2.06e-23  E-VALUE =  9.1e-20
+  DIAGRAM: 181586_[+1c]_84138_[-3c]_15167_[-2b]_135571_[-4c]_325471_[+4a]_
+           12798_[-4a]_4700_[-2c]_182142_[-1c]_7912_[-1a]_80644_[-2b]_143591_
+           [-2a]_5286_[-1a]_17554_[-4b]_104512_[+2c]_75992_[-2b]_1994_[+2a]_
+           7759_[-2b]_11678_[+4a]_544_[+2b]_15348_[+2b]_53558_[+2a]_756_[-2a]_
+           132170_[-3c]_6820_[+1a]_226852_[+2b]_107962_[+1c]_5949_[-2c]_138458_
+           [-4b]_133762_[+1c]_126753_[-2c]_69234_[+1c]_33_[+1c]_72_[+1c]_150_
+           [+1c]_33_[+1c]_72_[+1c]_267_[+1c]_33_[+1c]_15489_[-1c]_21704_[+1b]_
+           72_[+1b]_346_[+1c]_96059_[-4b]_36498_[-2b]_335_[-4a]_[-1a]_155_
+           [-3c]_18189_[-1c]_174870_[-2c]_38785_[+1a]_40123_[+2b]_147684_[-1b]_
+           122397_[+2b]_45479_[+4a]_74846_[+1c]_23231_[+3b]_38245_[-4c]_152196_
+           [-2c]_304660_[+2a]_165655_[-4b]_38440_[-4c]_21816_[+1c]_25500_[-2c]_
+           336004_[+3a]_323232_[-4a]_7268_[+2c]_47044_[+4a]_41451_[-2a]_66303_
+           [+2a]_30342_[+1a]_40657_[-4b]_180301_[-2c]_35205_[-2c]_59610_[-2c]_
+           2222_[+1b]_1400_[-2a]_53171_[-2c]_175866_[-2c]_162426_[-2c]_70915_
+           [+2a]_33010_[-1b]_139023_[+1b]_248403_[-2b]_102258_[+2b]_13294_
+           [-4c]_85097_[+2b]_13395_[-4b]_217923_[-2b]_252830_[-2a]_40346_[-3c]_
+           9779_[+1b]_15798_[-1b]_16835_[+1a]_31854_[-2a]_3867_[+2a]_28000_
+           [+1b]_14934_[+2b]_18632_[+4a]_54520_[+2b]_53902_[+1c]_198948_[-2c]_
+           10079_[+1b]_162971_[-2a]_106411_[+2b]_74994_[-4b]_11202_[-4b]_165552_
+           [+2b]_7010_[-2a]_12481_[-2b]_23349_[+1b]_102412_[+2c]_100606_[-4a]_
+           31326
+
+
+          [+1c]
+          5.5e-07
+          S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+          +              +  +     +  +  +     +  +  +
+          S..S..P..S..L..S..R..N..E..S..L..R..I..T..F..
+181585  CATCAAGTCCGTCATTATCGAGAAACGAATCGTTGAGAATTACCTTCTTCTCTTCTTCTCTCAAGCGAGTTT
+
+                         [-3c]
+                         1.9e-07
+                         ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                           +        +  +  +  +  +     +           +  +
+                         ..V..L..S..Q..T..V..G..L..L..I..N..V..F..K..L
+265753  AGCCAAATAAAACACAGGACCAAACTCTGGGTAACACCTAAAAGTATGTTAACAAACTTCAAGGGGAAATTG
+
+                                             [-2b]
+                                             8.4e-07
+                                             ..Q..R..R..N..C..F..W..V..R..V..V..
+                                                        +     +  +        +
+                                             ..Y..*..N..N..Y..F..W..Q..*..I..F..
+280945  AAAAAAACATGACATATACAGTGTGTTAAGGCTGGTAATACTAGTTGTTGTAAAACCATTGCTAAATAAACT
+
+        
+        
+        E..K..E..L
+        +  +     +
+        E..K..M..L
+281017  CTTTCATTAAAACTAGAACTAAAATAATGTGACAGAAAATATACTTACTGCATTGATAATATATGATGAATA
+
+             [-4c]
+             2.3e-07
+             ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+               +  +  +     +  +  +  +  +              +  +
+             ..N..L..W..S..Q..L..V..P..K..G..I..A..E..S..N
+416593  ATGGAATTTAGCCAAGATTGCAGAACAGGTTTGCCTATTGCTTCAGAATTATGATTGTTACGCCTAGATGAA
+
+                 [+4a]
+                 8.5e-07
+                 K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                 +  +        +  +        +  +        +  +
+                 N..S..F..Y..K..L..V..*..L..L..I..S..W..L..*..
+742105  TGGCAAAGTAACAGTTTTTATAAATTAGTTTAGCTTCTTATATCTTGGCTATAAACTTAAAAATTCATTATA
+
+                                            [-4a]
+                                            5.3e-07
+                                            ..E..L..W..K..Q..L..L..P..K..L..K..C
+                                                       +     +     +  +     +  +
+                                            ..I..G..Q..K..F..L..*..P..K..I..K..V
+754921  TTCACTGTGAGTTAACCAAAAACTCCAGACATGCAGTATACCTTGTTTAAATAGTTACGGCTTTATTTTGAC
+
+        
+        
+        ..M..N..K
+          +  +  +
+        ..A..N..N
+754993  AGCATTGTTTTATCGTTAAACCCATATGTTTGCCAACAAATTACTTTTATTTGTTTAACTTTTTGCACTTTT
+
+                                     [-2c]
+                                     4.8e-07
+                                     ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E.
+                                       +        +     +           +  +  +  +
+                                     ..Q..W..V..N..G..F..D..F..Y..I..V..N..K..Q.
+759673  TGGGGGAAGATGGGACACTTAAACATATATTGCCAAACATTTCCAAAATCAAAATAGATGACGTTTTTTTGG
+
+        
+        
+        .L
+         +
+        .Y
+759745  TAATACCACGAAGTAGTACTACTATTCCTTTATTAATTGACAACGATTTAATAATATAAATATAATAAAACA
+
+                                                                [-1c]
+                                                                6.2e-07
+                                                                ..F..S..L..Q..L.
+                                                                     +  +
+                                                                ..L..S..L..S..Y.
+941833  TATGTTTAATATTCAAAATTGATTAGCAACAAGTTAGGGTATCTGTTTGCGATTTTGAGGCTTAAAGAATAT
+
+        
+        
+        .A..E..F..R..C..I..T..T..Q..S
+            +  +  +     +
+        .K..E..F..R..N..I..S..K..H..T
+941905  TTTTCAAATCTATTTATTGATTTGTGTGTTTGAGGGAAATATTAGTTCTCTGTTTTGAATTAAAATATATTA
+
+                             [-1a]
+                             6.2e-07
+                             ..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+                                  +  +           +  +  +     +
+                             ..L..S..L..S..Y..K..E..F..R..N..I..S..K..H..T
+949825  AGGGTATCCATTTGCGATTTTGAGGCTTAAAGAATATTTTTCAAATCTATTTATTGATTTGTGTGTTTGAGG
+
+                                                                              [-
+                                                                              8.
+                                                                              ..
+        
+                                                                              ..
+1030465 GTATTGCATTGGTGCGATGTATAGTAGAGTGGGGGAATTTTCTCGACTCATTTGATAGTAAATATAAAACAT
+
+        2b]
+        0e-08
+        Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                 +     +  +     +  +  +  +
+        N..L..S..N..Y..F..W..I..R..V..V..E..M..S..R
+1030537 TCAAAGAATTATAAAACCATATCCTCACGACTTCCATAGACCGTTGGTAAATATTTAAAACACGATCAGAGT
+
+                                                                          [-2a]
+                                                                          5.5e-0
+                                                                          ..Q..R
+        
+                                                                          ..V..*
+1174105 TTGGTGGGGGAAGATGGGACACCTTTTTATTCTATTTTCTTGTGTCATTCGGTAGTAAACAAAGAACACTCA
+
+        
+        7
+        ..R..N..C..F..W..V..R..V..V..E..K..E..L
+             +     +     +  +  +  +           +
+        ..F..N..Y..F..R..V..R..V..V..G..A..T..L
+1174177 AAAATTATAAAACCGTACCCTCACGACTCCCGCCGTTAGTAGTTGTTAAAAACAAGATCAAGATGTTTGGAC
+
+                                                                             [-1
+                                                                             2.4
+                                                                             ..F
+                                                                               +
+                                                                             ..F
+1179433 TATCAATTTATTATACAATCATATACTGTGGGTAACATGTTAAAAACCATAGCAGTAGGGCAGGACAAAAAA
+
+        a]
+        e-08
+        ..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+          +  +     +  +  +  +  +     +        +
+        ..T..L..I..L..N..E..F..R..K..I..N..N..Q..*
+1179505 TGTTAAAATTAGATTTTCAAACCGTTTAATATTATTTTGTTAAGTTTACGTGTATTTCTAATAATAAATTCA
+
+                                    [-4b]
+                                    4.4e-07
+                                    ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..
+                                      +        +     +  +        +     +  +  +
+                                    ..N..K..V..K..K..L..V..S..F..L..Y..C..M..N..
+1197073 TGTCGCAAATCAACCGCTTCTAATTCTAATTTTTGACTTTTTTTAACACAGAAAACAAATAACACATGTTTT
+
+        
+        
+        K
+        +
+        K
+1197145 TTACATTTTTCAACCTTTAAATTTGTTTTTAGGCCCAAGTTTTTCTGTAATTTACCGAAAAACATACATTTT
+
+                                                 [+2c]
+                                                 1.4e-07
+                                                 L..E..K..E..V..V..R..V..W..F..C
+                                                             +  +  +  +  +  +
+                                                 R..S..M..*..V..V..R..V..W..F..Y
+1301617 TAATATCCAAATATCCTGATCGTGTTTTATACAGCCACCAACGGTCTATGTGAGTCGTGAGGGTGTGGTTTT
+
+        
+        
+        ..N..R..R..Q..
+          +
+        ..N..S..S..N..
+1301689 ATAATTCCTCGAATGCTCTTTGTTTACTACCAAATTGGACGAGAAAATAGAATGAAAAGATGTCCCATCTTC
+
+                                                      [-2b]
+                                                      6.2e-07
+                                                      ..Q..R..R..N..C..F..W..V..
+                                                        +        +     +
+                                                      ..Q..L..I..N..*..F..R..L..
+1377649 ATTATTCTATCGTTGAGTGGAGTTAGAGGGGACATTTATGCACATATTGAAGAATATTTTAAAATCTTAAAT
+
+        
+        
+        R..V..V..E..K..E..L
+           +  +     +  +  +
+        D..I..V..S..R..E..L
+1377721 CTATAACGCTTCGTTCGAGCATCTCTAGCTACAAAATATTTTGAATTCCTGTTAACTTTTAATGCTAAATAT
+
+                                                                             [+2
+                                                                             2.5
+                                                                             L..
+        
+                                                                             Q..
+1379665 TTTCGTACTGTAGGACACCTTTAGCACATAATATCGAAATATTCTGATCGTATTTTAAACAATTAACAGCAG
+
+        a]
+        e-07
+        E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                 +  +  +  +  +  +     +
+        S..M..A..V..V..R..V..W..F..Y..N..F..L..N..
+1379737 TCTATGGCAGTCGTCAGGGTGTGGTTTTATAATTTTTTGAATGTTCTTTGTTTACTAATAAATAGGACGAGA
+
+                                 [-2b]
+                                 1.8e-07
+                                 ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                            +     +        +  +  +  +  +
+                                 ..H..L..S..N..Y..F..R..I..R..V..V..E..R..P..R
+1387513 CCCAATTTGGTAGTAAACAAATAAAATGCAAAGAATTATAAAAGCGTATCCTAACGACTTCCCTAGGCCGTT
+
+                    [+4a]
+                    4.6e-07
+                    K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                    +  +  +  +        +  +  +  +
+                    N..N..A..C..C..I..K..A..I..L..G..T..F..M..I..
+1399249 CGGGTAAACAATAATAACGCGTGCTGTATCAAAGCTATTCTTGGAACATTCATGATCCATCTATTTCTAAGT
+
+                                 [+2b]
+                                 8.8e-07
+                                 L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                                 +  +  +  +     +           +           +
+                                 L..E..K..N..L..V..F..I..S..F..K..*..N..R..*..
+1399825 CTGGTAAGCATATGCAAGATTCCTCTTGGAAAAAAATCTTGTGTTTATCTCATTTAAGTGAAACCGATAATG
+
+                  [+2b]
+                  4.3e-08
+                  L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                           +  +  +  +     +  +     +
+                  V..S..M..E..V..V..R..I..W..F..Y..N..S..S..N..
+1415233 AAATTAACAAGTGTCTATGGAAGTCGTGAGAATATGGTTTTATAATTCTTCGAATTTTCTTTGTTTACTACT
+
+                                                     [+2a]
+                                                     2.4e-07
+                                                     L..E..K..E..V..V..R..V..W..
+                                                     +        +  +  +  +
+                                                     L..S..M..E..V..V..R..I..L..
+1468801 GCACATAATTTCCAAATATCCTGATCGTCTTTTAAACAATTAAAACTGTCTATGGAAGTCGTAAGGATACTG
+
+        
+        
+        F..C..N..R..R..Q..
+        +     +
+        F..Y..N..S..L..N..
+1468873 TTTTATAATTCCTTGAATGTTCTTTGTTTACTACCAAATGCAATGAAAAAGTAGAATAAAAGGGTGTCCCAT
+
+                                                              [-2a]
+                                                              5.7e-07
+                                                              ..Q..R..R..N..C..F
+                                                                         +     +
+                                                              ..V..S..N..N..T..F
+1469593 ACTTTGCATCATGTTGAGGCTCTGGTGCTTGGAAATTAACCTCTTCCACTAACAAACGCTGTTGTTCGTAAA
+
+        
+        
+        ..W..V..R..V..V..E..K..E..L
+             +        +  +  +     +
+        ..S..V..I..*..V..E..K..S..L
+1469665 ACTAACAATTCACACTTCTTTGCTTAAGCATGGGCAGGAAATGTCAGAATGTCATTTTGAGATCGCTCTCAC
+
+             [-3c]
+             9.0e-07
+             ..V..D..A..Q..T..F..G..L..K..I..R..R..Q..K..F
+                     +     +     +  +  +  +  +  +     +  +
+             ..C..K..A..T..T..A..G..L..K..I..R..K..N..K..L
+1601857 GTAAAACATTTCGCCGTAGTGGCACCGAGTTTAATGCGCTTGTTTTTTAACCAGAGAACATGTGGTGTAAAT
+
+                                      [+1a]
+                                      7.4e-07
+                                      S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..
+                                      +        +     +     +  +     +     +  +
+                                      S..R..L..T..P..S..V..F..E..K..L..H..L..S..
+1608697 TTAATCCGAATTAACAAGAATTTATAACTGTCTCGACTTACCCCGTCTGTTTTCGAAAAATTACACCTGTCA
+
+        
+        
+        F..
+        
+        S..
+1608769 AGCGAACCGCTAAATAACTCGTGTGTTTTATTATATTTTTTGACCAATAGAGTTATCAACTAATAAAGGTAT
+
+                                                               [+2b]
+                                                               9.1e-08
+                                                               L..E..K..E..V..V.
+                                                                     +        +
+                                                               D..R..K..G..H..I.
+1835569 AGGGAACGCGGAAGTTGGTGGGGCATGTTTTGCTTGGGAGGAGTTCCCATACAAAGATCGTAAAGGCCACAT
+
+        
+        
+        .R..V..W..F..C..N..R..R..Q..
+            +  +  +     +  +  +
+        .Q..V..W..F..R..N..R..R..G..
+1835641 TCAAGTGTGGTTTCGAAACAGACGCGGTGAAGGCGTCATTTTCTCCTGCGTATATCGTCAGGATTGTATGAA
+
+                                                                      [+1c]
+                                                                      5.1e-07
+                                                                      S..Q..T..T
+                                                                            +
+                                                                      K..L..T..S
+1943569 GTACGACCTGGGACCAAACCGGCCACTTCAAACCTTTAACCCTGATCATATGAGTGTAGAAAAAGCTTACGT
+
+        
+        
+        ..I..C..R..F..E..A..L..Q..L..S..F..
+          +  +  +     +  +  +     +  +
+        ..I..C..R..S..E..A..L..W..L..T..Q..
+1943641 CAATCTGCAGAAGCGAGGCTCTATGGTTAACCCAGCAGATGAACCCAAACTTGGATATGCCGCACATCCCCA
+
+                [-2c]
+                8.8e-07
+                ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                           +     +  +     +  +  +
+                ..N..L..S..N..Y..F..W..I..R..V..V..G..M..S..R
+1949617 CAAAGAGTGTTCAAAGAATTATAAAACCATATCCTCACGACTCCCATAGACCGTTGTTAATTGTTTAAAACA
+
+                                                               [-4b]
+                                                               6.7e-07
+                                                               ..E..L..W..K..Q..
+                                                                    +     +
+                                                               ..L..L..E..K..*..
+2088073 CTGGTGGCTGACTCTGATGAAGACGGAACAATATCTTTTCGTGAATTTCTGGACATAATAGTTCTTTTTACA
+
+        
+        
+        L..L..P..K..L..K..C..M..N..K
+        +  +     +  +  +        +  +
+        L..L..H..K..L..K..K..N..N..K
+2088145 AAAGGTGCTTCAACTTTTTGTTATTTTTTTTTCCTATTTAATTATTTTTACAATTTGTGACATATTGCAATT
+
+                      [+1c]
+                      6.0e-07
+                      S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                      +           +  +        +     +     +  +
+                      S..T..A..N..I..C..A..Q..E..V..L..*..L..S..L..
+2221921 TATAATGGCGTCTTTCCACAGCGAACATATGCGCACAGGAGGTCTTATAATTAAGCTTAATTGAGCTTTTGT
+
+                            [-2c]
+                            3.4e-07
+                            ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                             +  +     +  +  +  +        +
+                            ..N..L..S..I..Y..F..W..I..R..I..V..E..V..S..L
+2348713 TTTGGCAGTAAACAAAGGATATTCAAAGAAATATAAAACCATATTCTTATAACTTCCACTGACAGCTGTTAG
+
+                                           [+1c]
+                                           5.7e-07
+                                           S..Q..T..T..I..C..R..F..E..A..L..Q..L
+                                           +        +     +     +  +  +  +
+                                           S..L..D..A..A..S..S..F..E..S..L..P..V
+2417977 TAGATGCAGCATCCTCTTTTGGATTATTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAG
+
+                                                 [+1c]
+                                                 5.7e-07
+        ..S..F..                                 S..Q..T..T..I..C..R..F..E..A..L
+          +                                      +        +     +     +  +  +  +
+        ..S..L..                                 S..L..D..A..A..S..S..F..E..S..L
+2418049 TTTCACTAGATGCAGCATCCTCTTTTGGATCGTTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGC
+
+        
+        
+        ..Q..L..S..F..
+                +
+        ..P..V..S..L..
+2418121 TTCCAGTTTCACTAGACGCAGCATCCTCTTTTGGATTATTTCCAGTTTCACTAGATGCAGCATCCCCCTCTG
+
+                      [+1c]
+                      5.7e-07
+                      S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                      +        +     +     +  +  +  +        +
+                      S..L..D..A..A..S..S..F..E..S..L..P..V..S..L..
+2418193 AATCGTTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAGATGCAGCATCCT
+
+                                                                         [+1c]
+                                                                         5.7e-07
+                                                                         S..Q..T
+                                                                         +
+                                                                         S..L..D
+2418337 CATCCCCCTCTGAATCGCTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGGATTATTTCTAGTTTCACTAG
+
+                                                                               [
+                                                                               5
+        ..T..I..C..R..F..E..A..L..Q..L..S..F..                                 S
+          +     +     +  +  +  +        +                                      +
+        ..A..A..S..S..F..E..S..L..P..V..S..L..                                 S
+2418409 ATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTATACGCAGCATCCTCTTTTGGATTATTTCCAGTTT
+
+        +1c]
+        .7e-07
+        ..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                +     +     +  +  +  +        +
+        ..L..D..A..A..S..S..F..E..S..L..P..V..S..L..
+2418481 CACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGGATTATTTC
+
+                                                    [+1c]
+                                                    5.7e-07
+                                                    S..Q..T..T..I..C..R..F..E..A
+                                                    +        +     +     +  +  +
+                                                    S..L..D..A..A..S..S..F..E..S
+2418553 CAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGTTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAAT
+
+        
+        
+        ..L..Q..L..S..F..
+          +        +
+        ..L..P..V..S..L..
+2418625 CGCTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGTTTCCAGTTTCACTAGATGCAGCATCCTCTT
+
+                                                                            [+1c
+                                                                            5.7e
+                                                                            S..Q
+                                                                            +
+                                                                            S..L
+2418841 CAGCATCCTCTTTTGAATTATTTCCAGTTTCACTAGATGCAGCATCCCCCTCTGAATCGCTTCCAGTTTCAC
+
+        ]
+        -07
+        ..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+             +     +     +  +  +  +        +
+        ..D..A..A..S..S..F..E..S..L..P..V..S..L..
+2418913 TAGATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAGATGCAGCATCCCCCTCTGAATCGTTTCCAG
+
+          [+1c]
+          5.7e-07
+          S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+          +        +     +     +  +  +  +        +
+          S..L..D..A..A..S..S..F..E..S..L..P..V..S..L..
+2418985 TTTCACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGC
+
+                                                                [-1c]
+                                                                2.0e-07
+                                                                ..F..S..L..Q..L.
+                                                                  +  +  +  +
+                                                                ..H..S..L..Q..M.
+2434465 AGTTTTAGATGTAAAGTTGATTTTTACGATAATAGTTTTTACCACATCCATGTTAAATGGCTTAATTGCATT
+
+        
+        
+        .A..E..F..R..C..I..T..T..Q..S
+         +     +  +     +  +
+        .S..I..F..R..F..I..T..V..T..Y
+2434537 GAAATAAATCTGAAAATTGTAACTGTATATTATTAAATACAGTATTAGAAACCATATTCTGTATAATTTAGA
+
+                                                                     [+1b]
+                                                                     5.7e-07
+                                                                     S..Q..T..T.
+                                                                     +        +
+                                                                     S..L..D..A.
+2456209 TNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATTTCTAGTTTCACTAGATGC
+
+        
+        
+        .I..C..R..F..E..A..L..Q..L..S..F..
+            +     +  +  +  +        +
+        .A..S..S..F..E..S..L..P..V..S..L..
+2456281 AGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAGACGCAGCATCCTCTTTTGGATTATTTCCAGTTTCACT
+
+                                          [+1b]
+                                          5.7e-07
+                                          S..Q..T..T..I..C..R..F..E..A..L..Q..L.
+                                          +        +     +     +  +  +  +
+                                          S..L..D..A..A..S..S..F..E..S..L..P..V.
+2456353 AGATGCAGCATCCCCCTCTGAATCGTTTCCAGTTTCACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAGT
+
+        
+        
+        .S..F..
+         +
+        .S..L..
+2456425 TTCACTAGATGCAGCATTCTCTCTTGGATTATTTCCAGTTTCACTAGATGCAGCATTCTCTTTTGAATTATT
+
+                                                                         [+1c]
+                                                                         4.2e-08
+                                                                         S..Q..T
+                                                                         +
+                                                                         S..L..D
+2456713 ATCCTCTTTTGAATCGCTTCCAGATTTCACTAGATGCAGCATCCTCTTTTGAATCGCTTCCAGTTTCACTAG
+
+        
+        
+        ..T..I..C..R..F..E..A..L..Q..L..S..F..
+          +     +     +  +  +  +     +  +
+        ..A..A..S..S..F..E..S..L..S..L..S..L..
+2456785 ATGCAGCATCCTCTTTTGAATCGCTTTCACTTTCACTAGTTGCAGCATCCCCTTTTGAATCGTTTCCAGTTT
+
+                                                         [-4b]
+                                                         4.6e-07
+                                                         ..E..L..W..K..Q..L..L..
+                                                           +        +     +  +
+                                                         ..D..P..I..K..I..L..I..
+2552833 ATTTTTATTAAGAATTAATTTGTTTTGGATAATAAAATTAAAATTGTTAGTCTGGAATCTTAATCAATATAC
+
+        
+        
+        P..K..L..K..C..M..N..K
+           +  +  +        +  +
+        G..K..L..K..W..F..N..K
+2552905 CTTTCAACTTCCAAAAGTTTTTTTTCTAAATTCTCTTTTTCTGCAGAAATCGCCTACAATGTAAAATAATGT
+
+                        [-2b]
+                        4.7e-19
+                        ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                          +  +  +  +  +  +  +  +  +  +  +  +  +  +  +
+                        ..Q..R..Q..N..C..F..W..V..R..V..V..N..K..K..L
+2589409 TCATTCTCTTTTGTTTCTGCCTTTGATTGCAAAACCACACACGCACGACGTTTTTCTTTAAGTCGAGCTTTT
+
+                                            [-4a]
+                                            5.3e-14
+                                            ..E..L..W..K..Q..L..L..P..K..L..K..C
+                                              +  +  +        +  +  +  +  +  +  +
+                                            ..E..L..W..T..I..L..I..P..K..L..A..V
+2589769 ATTCTCCATCTTCCTCCTGTGCTCTTCCTCCGCTTTTTCTAACCACGTGATCAAGATTGGTTTCAAGGCGAC
+
+                 [-1a]
+                 9.3e-09
+        ..M..N..K..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+          +  +  +  +  +  +  +  +  +  +  +  +           +
+        ..M..N..N..H..S..L..T..L..S..E..F..R..H..V..C..S..*..I
+2589841 CATGTTGTTGTGACTGAGTGTCAAGGACTCAAACCTGTGCACACAACTTTATATTAAGCACATTATACTTCA
+
+                                                                         [-3c]
+                                                                         3.3e-17
+                                                                         ..V..D.
+                                                                           +  +
+                                                                         ..V..D.
+2589985 CTGCAAATTGTGCTTTGGCTCAGCGATCCAACACCCGGTAGCTTGAGGTTGGCAAGAGCTTGCCCAACATCA
+
+        
+        
+        .A..Q..T..F..G..L..K..I..R..R..Q..K..F
+            +  +  +  +  +  +  +  +  +  +  +  +
+        .N..Q..T..V..G..L..K..I..R..R..Q..K..F
+2590057 TTCTGCGTCACTCCAAGTTTAATGCGTCGCTGCTTGAATCTGAAACAATATTGATCCTGGGTAAGTGGCTGT
+
+                   [-1c]
+                   5.6e-08
+                   ..F..S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+                        +  +     +  +     +  +  +  +           +
+                   ..Y..T..L..*..L..A..V..F..R..S..I..P..I..T..S
+2608273 AATCAACCACAGTACGTTAACTACAAAGCCACAAACCTCGAAATCGGAATCGTTGATAAGGTCAGTGATACC
+
+                                              [-2c]
+                                              6.2e-07
+                                              ..Q..R..R..N..C..F..W..V..R..V..V.
+                                                   +           +     +  +  +
+                                              ..R..R..K..P..T..F..R..V..R..I..P.
+2783161 TCTGTCTCTTCATGCTCACTGGAAGAGTATGTCCTCACTCTCCTCTTTGGAGTAAATCTCACTCTTATAGGC
+
+        
+        
+        .E..K..E..L
+            +  +  +
+        .M..K..K..L
+2783233 ATCTTTTTTAACCGTTCTTTCTTGATTGAAGAAACATCTGTAGAACCTCCAGCAGAGACATTTAAACTGGCC
+
+                                                                    [+1a]
+                                                                    4.3e-07
+                                                                    S..Q..T..T..
+                                                                    +     +
+                                                                    S..N..T..Y..
+2821969 CGTAATATTTGATAAGCCCTGAGGTACAAATTATTGAATTAAAATCAGACCCTTTTTTTTTCAAACACATAT
+
+        
+        
+        I..C..R..F..E..A..L..Q..L..S..F..
+        +  +     +     +     +  +  +
+        I..S..L..F..F..S..Q..N..L..S..S..
+2822041 ATATCTCTGTTTTTTTCACAAAACTTAAGCTCATTTCATCAGAAAAAATCCAACTCTGCGAAATGTTTCAAC
+
+                                                            [+2b]
+                                                            5.7e-07
+                                                            L..E..K..E..V..V..R.
+                                                            +     +        +  +
+                                                            M..*..R..F..F..I..R.
+2862145 CCTAAATCGAGCTGTCTGAACGTTACAGCAACAGACACAATGTCTTAAAGGCATGTGACGTTTCTTTATACG
+
+        
+        
+        .V..W..F..C..N..R..R..Q..
+               +  +  +        +
+        .L..S..F..C..N..N..K..Q..
+2862217 GTTGTCTTTCTGCAACAACAAGCAAGTCGAGCGTGGTGCCGGTGTTAGCTGGGATGTCCTACACCAAATATG
+
+                                             [-1b]
+                                             1.5e-07
+                                             ..F..S..L..Q..L..A..E..F..R..C..I..
+                                               +  +  +  +        +  +  +
+                                             ..F..S..L..N..Y..Y..E..F..R..*..R..
+3009889 TATTCATTGCTGTATGACCTATGCACCTATGGAATTTAAAAGAAAGGTTGTAATATTCAAACCTTCATCTTG
+
+        
+        
+        T..T..Q..S
+           +  +
+        S..T..Q..L
+3009961 AGGTCTGCAACATCGGCCATAAATTCTCCAGCGACAGTCTCAAGTGCCATTGCATTTGCGAGTCCATTCATT
+
+               [+2b]
+               6.0e-07
+               L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+               +           +     +     +  +     +
+               M..V..Q..G..V..L..R..I..W..F..Y..N..S..L..N..
+3132361 ATTAACAATGGTTCAGGGAGTTCTGAGGATTTGGTTTTATAATTCTTTGAATGTTCTCTGTTTACTATCGAA
+
+                                   [+4a]
+                                   9.3e-07
+                                   K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                                   +     +  +        +     +  +     +     +  +
+                                   K..V..M..I..N..I..K..I..L..L..M..K..I..L..N..
+3177865 AATTGAAAACCTTAAGGTGTTAACATAAAGGTCATGATAAACATAAAAATCCTATTAATGAAAATATTAAAT
+
+                                              [+1c]
+                                              8.0e-07
+                                              S..Q..T..T..I..C..R..F..E..A..L..Q
+                                                    +  +  +  +  +  +
+                                              V..L..T..T..I..C..R..F..D..D..I..*
+3252745 TTATAGCAACACTATAGCAACGTTGTTATTGCTTTGTTGTGCTGACTACTATATGTCGTTTTGATGACATTT
+
+        
+        
+        ..L..S..F..
+          +     +
+        ..L..E..F..
+3252817 AATTGGAATTTTGTAAATTCTAAGAAATCGCGTAATCTGTAAAGTGTTTTGATAAATCAGTTTTGCACCCGT
+
+                                                                  [+3b]
+                                                                  6.8e-07
+                                                                  F..K..Q..R..R.
+                                                                  +  +  +  +
+                                                                  F..K..Q..R..K.
+3276001 CCTGAACGCGAAACTACAGTTTCGTATTTTGTCTGGCTTTTGAACACGTCCACGAGGTTTTAAACAACGTAA
+
+        
+        
+        .I..K..L..G..F..T..Q..A..D..V..
+         +  +        +     +     +
+        .I..K..R..F..F..*..Q..*..D..S..
+3276073 GATTAAAAGATTTTTTTAACAGTAGGATTCTGCAACTTGCAGTTAATTATCTACCGAAATTAATCGACTGTG
+
+                                                    [-4c]
+                                                    1.7e-07
+                                                    ..E..L..W..K..Q..L..L..P..K.
+                                                      +     +     +  +  +
+                                                    ..N..*..W..T..E..L..V..I..T.
+3314305 TTAAGTAAACTATATATGCCACATACACATACAACAACATATTTATTCTACCATGTTTCTAACACAATAGTT
+
+        
+        
+        .L..K..C..M..N..K
+         +  +     +  +
+        .L..K..Y..M..N..D
+3314377 AACTTATACATGTTATCAACTAACAACCTATAATATACCTTAACCTATCATATCCAGGTCAAAAAGTAGGAT
+
+             [-2c]
+             8.8e-07
+             ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                     +  +  +        +  +  +     +  +
+             ..W..K..R..N..C..V..L..V..R..V..F..E..K..C..H
+3466585 AGCTCCCACTTACGGTTACAAACTAAAACACGAACGAATTCCTTACAGTGAGAGAGAGAGAGAGAGAGCGAT
+
+              [+2a]
+              1.4e-07
+              L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                          +  +  +     +  +  +  +
+              R..P..I..G..V..V..R..I..W..F..C..N..S..L..I..
+3771289 TAACAACGGCCTATAGGAGTTGTAAGGATATGGTTTTGTAATTCTTTGATTTTTTGTTTACCACAAAATGGG
+
+                                          [-4b]
+                                          3.5e-07
+                                          ..E..L..W..K..Q..L..L..P..K..L..K..C..
+                                            +  +  +     +  +  +        +  +  +
+                                          ..N..V..W..S..S..L..L..N..S..L..K..I..
+3936961 TGACAACGCCGACGAGCTGCACAAAATTAAACGAATTCACCCAACTGGAAAGTAAATTACTCAGTTTAATTT
+
+        
+        
+        M..N..K
+              +
+        E..Q..K
+3937033 CTTGCTTCCGTCATTTTGAAATGAATTCTTTGTATGTGGCAGGTTACTCCTGCGATTACGGGTAGAATACTG
+
+                                                                               [
+                                                                               8
+                                                                               .
+        
+                                                                               .
+3975409 TTAATACAATATCTAGTTAACCACTTGTATGTGTTTCATTATTGTCACCTAGTTTTCTTATTTCACTCCCCC
+
+        -4c]
+        .5e-07
+        .E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+            +        +  +  +  +  +  +     +
+        .V..L..P..S..A..L..L..P..K..L..T..Q..S..G..S
+3975481 ACCAGTGGCGAAGCCAGAAGGGGCTTTAAGGTTTGCGACCCACTTTAAAAAAAAGTTGTTGTTTTTTTGTAA
+
+                                                    [+1c]
+                                                    9.9e-07
+                                                    S..Q..T..T..I..C..R..F..E..A
+                                                    +        +  +  +     +     +
+                                                    S..V..K..A..I..C..E..F..T..S
+3997297 AATGGGCATGGTAACTAATTTGGACAATAATATAGAACAAGGGTTCTGTAAAAGCAATATGTGAATTCACCT
+
+        
+        
+        ..L..Q..L..S..F..
+          +  +        +
+        ..L..N..R..A..F..
+3997369 CATTAAATCGAGCATTCAAACGACTTCTTGACCCACCTGAAACATTCGTTCCTTTTTGTACTTCTTGCCTGT
+
+                                     [-2c]
+                                     6.8e-07
+                                     ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E.
+                                                +     +  +     +  +  +
+                                     ..R..K..F..N..S..F..W..I..R..V..V..G..I..S.
+4022857 TTTTCGTCCCATTTGATAGTAAGCAAAAAACGTTTAAAATTAGAAAACCATATCCTCACGACTCCAATAGAC
+
+        
+        
+        .L
+        
+        .R
+4022929 CGCTAATAACTGTTTACAACACGATCAGAATATTTGGATATCATGTTCCAAAGGTGTTCCGTCTCCCCCCAT
+
+                                                              [+3a]
+                                                              7.1e-07
+                                                              F..K..Q..R..R..I..
+                                                                    +  +
+                                                              *..L..I..K..K..M..
+4358881 TCAGTCATGCCAAAAAAGACGCAAAACAAACAATCACCATCAAAGTTCCATGAGTAACTTATAAAAAAGATG
+
+        
+        
+        K..L..G..F..T..Q..A..D..V..
+        +  +     +  +     +  +  +
+        K..L..C..F..T..T..T..D..V..
+4358953 AAATTGTGTTTTACAACTACTGACGTTGCCCAGTAACCCAAGGTTATATAAATGTCATTCATTTATGCAATC
+
+                                                           [-4a]
+                                                           9.8e-07
+                                                           ..E..L..W..K..Q..L..L
+                                                             +     +  +  +     +
+                                                           ..E..K..W..K..A..R..L
+4682161 CATTTGAAAAAAGTCTACTTTAAACCGCTTGTAATGTTTTCATTCTCTTTCCTCTTTCCATTTGGCCCGGAG
+
+        
+        
+        ..P..K..L..K..C..M..N..K
+                +     +     +  +
+        ..G..F..L..T..I..L..N..N
+4682233 TCCAAAAAGCGTTATTAAATTGTTTAAAACATGATCAGCAGGAAATAGCAGATTTAGGTGCTAAGGGAATAT
+
+                            [+2c]
+                            2.2e-07
+                            L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                            +        +  +  +  +        +     +
+                            L..N..M..E..V..V..R..I..R..F..Y..N..S..L..N..
+4689505 TATCTAAATATCTTCGAATTTTAAACATGGAGGTTGTGAGGATCCGGTTTTACAACTCTTTGAATGTTCTTT
+
+                             [+4a]
+                             9.3e-07
+                             K..N..M..C..K..L..K..P..L..L..Q..K..W..L..E..
+                             +  +     +  +           +  +     +  +  +
+                             N..S..T..V..A..M..N..Q..L..L..K..R..W..L..H..
+4736593 CACTAGAAGCTTTGGTGCGTCAACTCAACTGTTGCCATGAACCAACTCTTAAAGCGATGGTTACACAGCTCA
+
+                                                     [-2a]
+                                                     3.0e-07
+                                                     ..Q..R..R..N..C..F..W..V..R
+                                                             +  +  +  +  +
+                                                     ..S..Y..R..N..C..F..W..I..S
+4778065 AAAGCTAACAGGCAAACATATCATTGCCTACATAGTTTTATAAATGGAGTAACGATTACAGAACCAGATGCT
+
+        
+        
+        ..V..V..E..K..E..L
+             +  +
+        ..S..V..D..I..Y..H
+4778137 GCTAACATCGATATAATGTAGATGTTTTTGGGTCCAAACACTTCAACGGCTCCAAATGAAATTAGATAAGCA
+
+                 [+2a]
+                 7.4e-07
+                 L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                 +     +  +  +              +  +        +
+                 Y..Q..K..N..V..K..N..I..K..F..C..S..K..R..K..
+4844449 ATATGAAAATACCAAAAAAATGTGAAAAATATAAAATTCTGTTCTAAACGAAAATACAAAAAAATATTAAAA
+
+                    [+1a]
+                    8.0e-07
+                    S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                    +           +  +     +     +     +  +  +  +
+                    S..L..F..S..I..S..F..F..L..A..E..N..I..S..F..
+4874833 TATCTCAAAATCTCTCTCTTTAGCATTAGCTTTTTCCTTGCTGAAAATATCTCATTTTCTTTTGCAGGTTAA
+
+                                          [-4b]
+                                          4.6e-07
+                                          ..E..L..W..K..Q..L..L..P..K..L..K..C..
+                                            +  +  +     +  +           +  +
+                                          ..D..L..W..L..E..L..A..Q..A..L..A..D..
+4915513 GTGACTTCTGGTTTAAAAGTTGGGGTGACATGCGATCAAGCCAGAGCTCTAAAGCTTGTGCCAGAGCGTCTG
+
+        
+        
+        M..N..K
+        +  +  +
+        A..N..K
+4915585 CATTTTTGACTCCTGTCATTTGGCGTAAAGTAAAGACAGCGTCGAGTCATATGCAATTGCTATAATGTTGCA
+
+                            [-2c]
+                            3.4e-07
+                            ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                 +     +     +  +     +  +  +
+                            ..M..R..S..N..Y..F..W..I..R..I..V..R..N..I..T
+5095873 TGGTAGTAAACAATAAAGAACATACGAGAATTGTAAAACCATATCCTAATGACCCTGTTAATTGTTTAAAAC
+
+                                                                      [-2c]
+                                                                      2.6e-07
+                                                                      ..Q..R..R.
+        
+                                                                      ..L..S..S.
+5131081 TAGGGTGGGGGAAGGTGGGACACATTTTCCTTCTATATTATCTCCACCCATTCGGTAGTAAACAAAGAAGAA
+
+        
+        
+        .N..C..F..W..V..R..V..V..E..K..E..L
+         +     +  +     +  +  +           +
+        .N..Y..F..W..S..R..V..V..K..M..S..L
+5131153 TTATAAAACCATGATCTCACGACTTTCATAGACAGTTGTTAATGATTTAAAACACGATCAGGATATTTAGAC
+
+                                     [-2c]
+                                     1.6e-07
+                                     ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E.
+                                                +  +  +  +     +  +     +
+                                     ..N..L..S..N..C..F..W..I..R..V..A..E..M..S.
+5190769 TCATTTGGTGAACACAGTCAAAACAGAACATTCAAAGAATTGCAAAACCATATCCTCACGGCTTCCATAGAC
+
+        
+        
+        .L
+        
+        .R
+5190841 CGTTGTACTACAATCAGGATATTTGCATATTATTTGCTAAAGGTGCTTGTTTTACCTCAACCTACTGTATAT
+
+                                                                        [+1b]
+                                                                        8.4e-07
+                                                                        S..Q..T.
+                                                                              +
+                                                                        Y..N..T.
+5193001 TGGATGTTGTGTTGACTGCTTGCATGATAACTGTGTTTGTAACCCGACACAACACTGAGCAAAATACAATAC
+
+        
+        
+        .T..I..C..R..F..E..A..L..Q..L..S..F..
+         +  +  +     +  +  +  +           +
+        .T..I..S..L..F..E..N..L..L..K..I..F..
+5193073 AACAATCTCATTATTTGAAAATCTATTAAAAATATTTTTTGTTAATTCTTTCCAAGTACACTGACTGAAAAC
+
+                                                                             [-2
+                                                                             5.5
+                                                                             ..Q
+        
+                                                                             ..T
+5194441 TTTGTGAAGTATTGCTGTAATTTTTTATCAGTCACGTTTCTGTACTAGTCAAAGTTTAGGAATAGGAATTGT
+
+        a]
+        e-07
+        ..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                +     +        +  +        +     +
+        ..E..P..N..T..F..I..I..R..V..T..K..K..Q..M
+5194513 TTCTGGGTTTGTAAAAATAATTCTGACTGTTTTTTTTTGCATTTGATGGTGCAGCTATTATTAGGTTTGCAT
+
+             [-2c]
+             6.3e-08
+             ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                              +  +  +  +  +  +  +        +
+             ..N..F..Y..E..Y..F..W..V..R..V..V..E..T..T..L
+5247721 AGAACATTAAAATATTCATAAAACCACACCCTTACGACTTCGGTTGTTAACTTTTTTAAACACGACTAGGCT
+
+                            [-2c]
+                            8.8e-07
+                            ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                       +     +  +     +  +  +
+                            ..N..L..S..N..Y..F..W..I..R..V..V..G..M..S..R
+5423617 TTTTGTAGTAAACAAAGAACATTCAACGAATTATAAAACCATATTCTCACGACTCCCATAGACCGTTGTTAA
+
+                                                                   [-2c]
+                                                                   4.0e-07
+                                                                   ..Q..R..R..N.
+                                                                              +
+                                                                   ..Y..L..S..N.
+5586049 AGGGCGGGGGAAGATGGGACACCTTTTTATTCTATTCCATTTGGCCGTACACAAAGAATGTACAAAGAATTA
+
+        
+        
+        .C..F..W..V..R..V..V..E..K..E..L
+            +  +  +  +  +  +
+        .Y..F..W..V..R..V..V..R..M..P..R
+5586121 TAAAACCAAACCCTCACAACTCTCATAGGCCGTTGTCAATTGTTTAAAACATGATCAGGGTATGTGAACTTT
+
+                                   [+2a]
+                                   1.6e-08
+                                   L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                                   +  +           +  +     +  +     +
+                                   L..E..L..*..L..V..R..I..W..F..Y..N..S..L..N..
+5657041 GTGTTTTAAACAAATAACAACAGTCTATTAGAGTTGTGACTTGTGAGGATATGGTTTTATAATTCTTTGAAT
+
+                                          [-1b]
+                                          4.1e-07
+                                          ..F..S..L..Q..L..A..E..F..R..C..I..T..
+                                            +  +  +  +     +  +     +  +  +
+                                          ..F..S..I..D..A..N..E..T..R..C..I..S..
+5690089 AAATAATCCACAAAAACAAGTTGAACTTTAGCACGAATGAAATATCCGCGTTTTCAGTTCGACATATACTAC
+
+        
+        
+        T..Q..S
+        
+        R..P..C
+5690161 GTGGACAGCGGTGACAAATACATTTTGCTAATTTGTTGCGTAAACAAAAATATCTCTATAGTAGATATCGGT
+
+                                                                              [+
+                                                                              2.
+                                                                              S.
+        
+                                                                              I.
+5829121 CCTATATATACCCTTTAATTCAATAAGTGTAGTCAATATAAAATATATAAAAACTTTGTAATTTGAACAGAT
+
+        1b]
+        9e-08
+        .Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+            +  +  +  +     +     +  +  +     +  +
+        .A..T..A..I..S..S..F..S..S..L..N..P..S..F..
+5829193 CGCGACGGCAATTTCGTCATTTAGTTCCCTGAATCCATCGTTTCGAAAAATTAAAAATCAAAGGAAACTACC
+
+                                                      [-2b]
+                                                      4.8e-08
+                                                      ..Q..R..R..N..C..F..W..V..
+                                                                 +     +  +
+                                                      ..K..Q..S..N..Y..F..W..I..
+6077593 AAAATGTGACACCTTTTTGTTCGATTTTCTTGTCCCGTTTTTAATTTTTTTGTGAATTATAAAACCATATTC
+
+        
+        
+        R..V..V..E..K..E..L
+        +  +  +           +
+        R..V..V..G..I..S..Y
+6077665 TAACGACTCCTATGGAGTAGTATTAATTGTTTGAAACACGATTAATACATTTGAATATTATACAATAAATGT
+
+                                             [+2b]
+                                             4.8e-07
+                                             L..E..K..E..V..V..R..V..W..F..C..N.
+                                                +  +     +  +  +        +     +
+                                             *..E..K..L..V..V..R..L..*..F..T..N.
+6179905 AACAAATATATCAAACCTTTTTGCAACAACTTAATATTAAGAAAAATTAGTAGTAAGGCTTTAGTTTACCAA
+
+        
+        
+        .R..R..Q..
+        
+        .K..N..L..
+6179977 CAAAAACTTATTATTTTAAAGTTGAAATAAGATTACTTACTGTAAACAATTTAAAAAAAGGAACGAATAATT
+
+                                                                [-4c]
+                                                                2.9e-07
+                                                                ..E..L..W..K..Q.
+                                                                     +  +  +  +
+                                                                ..L..L..W..A..E.
+6193225 CAGTATATGGTTAAACTTGAACAAACATAAATGGCTCGTTGTTTGTCTTCTAGGTGTAGCAACCACGCTTCT
+
+        
+        
+        .L..L..P..K..L..K..C..M..N..K
+                  +  +        +  +  +
+        .K..K..L..K..L..Q..F..M..N..K
+6193297 TTTTTTAACTTTAATTGAAACATGTTTTTACCTGTAAGCGCTTTAACAACCGTTATTTTAGTGCTACATTCT
+
+                              [+2b]
+                              4.6e-08
+                              L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                              +     +  +           +  +  +        +  +
+                              L..T..R..N..F..A..T..V..W..F..T..T..Q..R..H..
+6278401 AATTAATGCGCACGTATTTGCTTTGACAAGAAATTTCGCAACCGTGTGGTTCACTACCCAGCGTCACTTCCC
+
+                                                                              [-
+                                                                              2.
+                                                                              ..
+        
+                                                                              ..
+6291793 ATAATTGTTTTCCAGGCTGCTTGCAAACTTGTGAAAACTGTAGACACCAAATCTTTGACATCAATCTTCTAT
+
+        4b]
+        3e-07
+        E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+              +     +  +  +        +  +  +     +
+        H..Q..W..*..E..L..L..S..G..L..K..C..L..S..H
+6291865 GCTGCCACTACTCAAGCAGAGATCCCAGCTTGCAAAGTGAGTGCCTTTTTCTTGAGTCACTTAATCCCCACA
+
+                              [-2b]
+                              1.3e-07
+                              ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                         +        +     +  +     +  +  +  +
+                              ..N..V..S..N..*..L..W..I..R..I..A..N..R..K..L
+6509809 TATTCGGTAGTGGACATAGAACATTTACAGAGTTTTACAACCATATCCTTATGGCATTTCTTTTCAAATCAC
+
+                                         [-2a]
+                                         9.6e-07
+                                         ..Q..R..R..N..C..F..W..V..R..V..V..E..K
+                                           +  +     +  +  +                 +  +
+                                         ..Q..R..F..N..C..F..F..L..*..Y..*..N..K
+6762673 AACAAAAAGAAAAAAAAGAGGGAACACTAACTACTGTCTAAAGTTACAAAAAAACAACTAATATTAATTTTT
+
+        
+        
+        ..E..L
+          +
+        ..K..F
+6762745 TTTAAAAGGAAACACTTTCCGCTTGACCACACACTGAGGTAATATGATATCCCATTGTGATGGAATAAACCT
+
+                                        [-3c]
+                                        7.1e-07
+                                        ..V..D..A..Q..T..F..G..L..K..I..R..R..Q.
+                                          +        +  +  +     +  +        +
+                                        ..V..R..*..Q..T..V..I..L..K..F..L..K..L.
+6803065 AGTATTTTGATTAACCCTTAGAAGTTGTGCTTAACTCTTCACTGTGTAACAATCAATTTAAATAATTTAAGT
+
+        
+        
+        .K..F
+         +  +
+        .K..L
+6803137 TTTAGACCTCTTTGTCAATCGCTTAAATAATGCTATTTGTTTATTTATAATGTAATATATATAATATGTTAT
+
+                                                                        [+1b]
+                                                                        9.5e-07
+                                                                        S..Q..T.
+                                                                           +
+                                                                        Y..Q..L.
+6812857 GGTCAAATCCTGATAGTGATACATTATAAAGTAAGTTTCCTGTTTCTTTCATTAATATTTATGGTATCAATT
+
+        
+        
+        .T..I..C..R..F..E..A..L..Q..L..S..F..
+            +     +  +        +  +  +     +
+        .*..I..N..R..F..D..K..L..Q..I..*..F..
+6812929 ATGAATAAATCGATTTGATAAGCTTCAAATATAATTCAACTATGGCAGATAAATTTAATTATGTACTTGTGA
+
+                                                                           [-1b]
+                                                                           9.9e-
+                                                                           ..F..
+                                                                             +
+                                                                           ..F..
+6828697 CTAAGGATGCATTATTACTTTAATTTCAATTATGTCTTACTTAAAGCTATAACACGTCGTATACCCTAAAAA
+
+        
+        07
+        S..L..Q..L..A..E..F..R..C..I..T..T..Q..S
+              +     +     +  +  +  +     +
+        F..C..N..*..A..L..F..R..C..I..Q..T..N..N
+6828769 AACAATTCTACGCCAAAAATCGACATATCTGCGTGTTATTTTATCCAATGCTTATTTGTAAACAAAACTATC
+
+                                   [+1a]
+                                   2.0e-07
+                                   S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                                               +  +     +     +     +  +  +  +
+                                   N..L..A..R..I..C..N..F..C..S..Q..Q..L..S..F..
+6845617 TGCACGCATGGCCTGTAATGTAAACCAAATTTAGCTCGAATTTGTAATTTTTGTTCACAACAGCTTAGTTTT
+
+                                      [-2a]
+                                      6.2e-07
+                                      ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E
+                                                 +     +        +  +  +  +
+                                      ..N..L..F..N..Y..F..Q..I..R..V..V..E..V..F
+6877513 TCTAGTCCCATTTGGTGGTACACAAACAACATTCAAAAAATTATAAAACTGTATTCTCACGACTTCAACAAA
+
+        
+        
+        ..L
+          +
+        ..L
+6877585 CAGTTGTTAATTGTTTAAAACTTGATCACAATATTTGAATATGATGTGCTAACGGTGTCCCATCTTACCCCA
+
+                                                              [+2a]
+                                                              8.8e-07
+                                                              L..E..K..E..V..V..
+                                                                          +  +
+                                                              R..S..M..G..V..V..
+6881401 ATTAAAAAAAATTCTTGCCAAAGAGCCAAAATGCATCCTGATAACATTTACCAAAGGTCTATGGGAGTAGTA
+
+        
+        
+        R..V..W..F..C..N..R..R..Q..
+        +     +  +     +
+        R..I..W..F..Y..N..S..L..N..
+6881473 AGGATATGGTTTTATAATTCTTTAAATATTCTTTGTTTACTTCCAAATGGGAGGAGAAAATACAATGAAAAG
+
+                           [+1b]
+                           6.5e-07
+                           S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                           +     +  +     +  +  +        +        +
+                           S..*..S..T..D..C..R..F..V..K..L..*..Y..T..V..
+6909481 AACTAGATATTAGATTAGGTCATGAAGTACTGACTGTAGATTTGTGAAGCTGTGATACACGGTGAAAGTTGT
+
+                              [+2b]
+                              9.2e-07
+                              L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                              +        +     +  +        +     +
+                              M..A..M..E..L..V..R..I..R..F..Y..N..S..L..K..
+6924457 ACATCGTTTTTTACAATTAACAATGGCTATGGAACTCGTGAGGATACGGTTTTATAATTCTTTGAAGTTTCT
+
+                                                           [+4a]
+                                                           6.1e-07
+                                                           K..N..M..C..K..L..K..
+                                                           +     +        +
+                                                           K..L..A..S..*..L..A..
+6943105 TTTTTTTAATTAGTACCCTTTTTTTACTTTTTAAAACTGTTTTATTCCTAAAAACTAGCTTCCTAGCTGGCA
+
+        
+        
+        P..L..L..Q..K..W..L..E..
+           +  +        +  +  +
+        S..L..L..H..I..W..L..E..
+6943177 TCACTGCTACATATCTGGTTGGAAACACTGTATTTTTCCTTTTTAATCTTCCTAGGAACAACATGTTTTTCA
+
+                                                [+2b]
+                                                8.0e-08
+                                                L..E..K..E..V..V..R..V..W..F..C.
+                                                +        +     +     +  +
+                                                L..*..M..N..I..V..S..V..W..V..Y.
+6997681 AACTTCTAAACCAGTGAGGAAAACAGTTGAAAGGATTAAATTATAGATGAATATTGTGTCTGTTTGGGTATA
+
+        
+        
+        .N..R..R..Q..
+         +  +  +
+        .N..R..R..K..
+6997753 TAATCGAAGAAAAGCAATGTTTTACGGTTTATTATTGTATATTTTTACAAAGAGAGCGTTTTTTACTAAGAC
+
+                                                                   [+1c]
+                                                                   2.3e-07
+                                                                   S..Q..T..T..I
+                                                                   +        +  +
+                                                                   S..A..A..A..I
+7051609 TTTTAACGTATCAACTTCAATGCAACCCCAAAGGATCGTTTATTCAATAAAACTAGTTCAGCGCGGCCGCCA
+
+        
+        
+        ..C..R..F..E..A..L..Q..L..S..F..
+          +     +        +  +     +
+        ..C..S..F..*..K..L..Q..F..T..M..
+7051681 TATGTTCGTTTTAAAAACTTCAATTTACAATGAAATGGTTTTCCCTTCATTACCACAATACCCTTGTGCCAA
+
+                                                    [-2c]
+                                                    2.2e-07
+                                                    ..Q..R..R..N..C..F..W..V..R.
+                                                               +     +  +  +
+                                                    ..N..L..S..N..*..F..W..V..S.
+7250617 TTTTTATTCTGTTTTCTCGTCCTATTTAATAGTAAACAAAGAACATTCAAAGAATTTTAAAACCATACGCTC
+
+        
+        
+        .V..V..E..K..E..L
+         +  +  +
+        .V..V..E..M..S..R
+7250689 ACGACTTCCATAGACCGTTGTTAATTGTTTAAACACGATCAGATTATTCAGATATTATGTGCCAAAAGTGTC
+
+                        [+1b]
+                        6.0e-07
+                        S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                                    +  +  +  +     +     +     +  +
+                        D..*..A..*..I..C..R..F..T..S..T..N..D..S..H..
+7260769 GGAAACTGAAACCGCAGATTAGGCTTAAATTTGTCGTTTTACTTCAACAAACGACTCGCATGGCGATAAAAA
+
+                                [-2a]
+                                6.2e-07
+                                ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                           +  +  +        +  +  +  +
+                                ..N..L..S..N..C..F..R..I..R..V..V..E..M..S..R
+7423777 CCCATTTGGTGGTAAACACGAAACATTCAAAGAATTACAAAACCGTATCCTCACGACTTCCATAGACCGTTG
+
+                                                                        [+2b]
+                                                                        1.4e-07
+                                                                        L..E..K.
+                                                                        +     +
+                                                                        Y..Q..K.
+7530193 AAGAAGTCCTTGCCAACTCCCTCCATTGCTCAGTCTCCGGAACAAAATGAAAACGAAAATCCGATATCAAAA
+
+        
+        
+        .E..V..V..R..V..W..F..C..N..R..R..Q..
+               +  +     +  +        +  +
+        .K..M..I..R..*..W..F..S..K..Q..R..K..
+7530265 GAAGATGATCAGATGATGGTTCTCGAAACAAAGAAAGCTGTTCAGAAAAGTCCAAGGCCCAACATCAAGGCT
+
+               [-4b]
+               6.4e-07
+               ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                    +           +  +  +  +     +  +     +  +
+               ..Y..L..T..*..K..L..V..P..K..Y..A..I..L..N..N
+7605289 CACCACTGTATAGTGTTTATTTTAAGACAGGTTTGTAAGCTATTAAATTGTTTTTATTTATACTGAGTAGTT
+
+                              [-4b]
+                              6.4e-07
+                              ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                                   +           +  +  +  +     +  +     +  +
+                              ..Y..L..T..*..K..L..V..P..K..Y..A..I..L..N..N
+7616521 TGATCCTTGCACTATCACCACTGTATAGTGTTTATTTTAAGACAGGTTTGTAAGCTATTAAATTGTTTTCAT
+
+                           [+2b]
+                           8.8e-07
+                           L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                                       +  +  +     +  +     +
+                           R..S..M..G..V..V..R..I..W..F..Y..N..S..X..X..
+7782121 TGTTTTAAACAATTAACAACGGTCTATGGGGGTCGTGAGGATATGGTTTTATAATTCTNNNNNNNNNNNNNN
+
+                          [-2a]
+                          8.1e-07
+                          ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                     +     +  +     +  +  +
+                          ..N..L..Y..N..Y..F..W..I..R..V..V..G..I..S..R
+7789177 TGGTAGTAAACAAAGAACATTCAAATAATTATAAAACCATATCCTCACGACGCCTATAGACCGTTGTTAATT
+
+                        [-2b]
+                        8.3e-08
+                        ..Q..R..R..N..C..F..W..V..R..V..V..E..K..E..L
+                                   +     +  +     +  +  +  +
+                        ..N..*..L..N..Y..F..W..I..R..V..V..E..M..S..Q
+7801705 GTAGTAAGCAAAGAACATTTTATAAATTATAAAACCATATCCTCACGACTTCCATAGACTGTTGTTAATTAT
+
+                  [+1b]
+                  1.8e-07
+                  S..Q..T..T..I..C..R..F..E..A..L..Q..L..S..F..
+                  +        +  +        +  +  +     +  +  +
+                  S..L..A..T..I..Y..Y..F..E..S..H..Q..L..S..A..
+7825105 GGGGGATTCTTCACTAGCAACCATATATTATTTTGAATCACACCAATTGTCTGCAATATTCTAGCAAAAACA
+
+                   [+2c]
+                   8.8e-07
+                   L..E..K..E..V..V..R..V..W..F..C..N..R..R..Q..
+                               +  +  +     +  +     +
+                   R..S..I..G..V..V..R..I..W..F..Y..N..S..L..I..
+7927561 ACAATTGACAACGGTCTATAGGAGTCGTGAGGATATGGTTTTATAATTCTTTAATTGTCCTTTTTTTACTCT
+
+              [-4a]
+              8.1e-07
+              ..E..L..W..K..Q..L..L..P..K..L..K..C..M..N..K
+                         +  +  +           +  +     +  +  +
+              ..H..K..F..K..N..L..F..L..N..L..K..S..A..N..K
+8028217 CGTTTTGTGTTTAAACTTGTTTAAAAATAAATTTAATTTTGATGCATTTTTTAGTTTCTTTTTTTATATAAA
+
+********************************************************************************
+
+
+CPU: ubuntu
+Time 470.725418 secs.
+
+mast.bin test2.fasta.meme.out -nostatus -mt 1e-06 -ev 2e-02 -dna -brief -d /home/struckma/XXXXX/xxxxx.seq.fasta -text -stdout


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/mixedmast.dat
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mpath.ontology.test
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mpath.ontology.test	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mpath.ontology.test	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,100 @@
+!autogenerated-by:     Pathbase version 2.0
+!saved-by:             	mgruenb
+!date:                 	Mon Oct 20 20:15:35 BST 2003
+!version: $Revision: 1.1 $
+!The Pathbase mouse pathology ontology provides a description of mutant and transgenic mouse pathology phenotypes and incorporates 425 known mouse pathologies hierarchically organised as "instances of" pathological processes.
+!Questions, comments and suggestions for amendment should be sent to the Co-ordinator, Dr. Paul Schofield, Dept. of Anatomy, University of Cambridge (PS at mole.bio.cam.ac.uk).
+!The ontology was developed by the Pathbase consortium (http://www.pathbase.net) funded under the European Commission's Fifth Framework programme, Contract number QLRI-CT-1999-00320.
+$pathology_ontology ; MPATH:0
+ %cell and tissue damage ; MPATH:1
+  %cell death ; MPATH:2
+   %apoptosis ; MPATH:3 ; synonym:programmed cell death
+   %necrosis ; MPATH:4
+    %bridging necrosis ; MPATH:5
+    %caseous necrosis ; MPATH:6
+    %coagulation necrosis ; MPATH:7
+    %fat necrosis ; MPATH:8
+    %fibrinoid necrosis ; MPATH:9
+    %gangrene ; MPATH:10 ; synonym:mortification
+    %hemorrhagic necrosis ; MPATH:11
+    %liquefactive necrosis ; MPATH:12
+    %piecemeal necrosis ; MPATH:13
+  %degenerative change ; MPATH:14
+   %general degenerative change ; MPATH:15
+    %myxoid/myxomatous degeneration ; MPATH:16 ; synonym:mucoid degeneration ; synonym:myxomatosis
+   %subcellular defect ; MPATH:17
+    %cytoskeletal defect ; MPATH:18
+    %endoplasmic reticulum defect ; MPATH:19
+    %golgi defect ; MPATH:20
+    %lysosomal defect ; MPATH:21
+    %mitochondrial defect ; MPATH:22
+    %peroxysomal defect ; MPATH:23
+    %plasma membrane defect ; MPATH:24
+    %nuclear defect ; MPATH:460
+   %tissue specific degenerative process ; MPATH:25
+    %alopecia ; MPATH:26
+    %arthrosis ; MPATH:27 ; synonym:osteoarthritis ; synonym:degenerative joint disease ; synonym:osteoarthrosis
+    %atherosclerosis ; MPATH:28 ; synonym:nodular sclerosis
+    %cataract ; MPATH:29
+     %cataract- capsular-epithelial ; MPATH:461
+     %cataract- nuclear and cortical ; MPATH:462
+     %cataract- cortical liquefactive ; MPATH:463
+     %cataract- lens extrusion ; MPATH:464
+    %cystic medial necrosis ; MPATH:30 ; synonym:erdheim disease ; synonym:mucoid medial degeneration ; synonym:media necrosis aortae idiopathica cystica ; synonym:media necrosis of the aorta
+    %emphysema ; MPATH:31
+    %glaucoma ; MPATH:32
+     %glaucoma developmental ; MPATH:465
+     %glaucoma- open angle ; MPATH:466
+     %glaucoma- angle closure ; MPATH:467
+   %dystrophy ; MPATH:554
+  %intracellular and extracellular accumulation ; MPATH:33
+   %amyloid deposition ; MPATH:34
+   %bile deposition ; MPATH:35
+   %ceroid deposition ; MPATH:37
+   %glycogen deposition ; MPATH:39
+   %hyalinosis ; MPATH:40 ; synonym:hyaline degeneration, eosinophilic cytoplasmic change
+   %lipid deposition ; MPATH:42
+   %lipofuscin deposition ; MPATH:43
+   %melanin deposition ; MPATH:44
+   %protein deposition ; MPATH:45
+    %copper deposition ; MPATH:38  % mineralisation ; MPATH:555
+    %iron deposition ; MPATH:41  % mineralisation ; MPATH:555
+   %uric acid deposition ; MPATH:46
+   %pseudocyst ; MPATH:468
+   %mucous secretions ; MPATH:547
+   %mineralisation ; MPATH:555
+    %calcium deposition ; MPATH:36
+    %copper deposition ; MPATH:38  % protein deposition ; MPATH:45
+    %iron deposition ; MPATH:41  % protein deposition ; MPATH:45
+  %intracellular and extracellular depletion ; MPATH:47
+   %decalcification ; MPATH:48
+   %demyelination ; MPATH:49
+   %glycogen depletion ; MPATH:50
+   %hypocalcification ; MPATH:51
+   %lipid depletion ; MPATH:52
+   %osteopenia ; MPATH:53
+   %osteoporosis ; MPATH:54
+ %developmental and structural abnormality ; MPATH:55
+  %general developmental defect ; MPATH:56
+   %agenesis ; MPATH:57 ; synonym:agenesia
+   %aplasia ; MPATH:58
+   %branching morphogenesis defect ; MPATH:59
+   %communication defect ; MPATH:60
+   %curvature defect ; MPATH:61
+   %cyst ; MPATH:62
+   %depletion ; MPATH:63
+   %developmental dysplasia ; MPATH:64
+    %developmental cystic dysplasia ; MPATH:473
+   %developmental hypoplasia ; MPATH:65
+   %dilatation ; MPATH:66
+    %ectasia ; MPATH:474
+     %lymphangiectasis ; MPATH:98  % cardiovascular developmental defect ; MPATH:89
+     %ductal ectasia ; MPATH:475
+     %telangiectasia ; MPATH:476
+   %displacement and deformity ; MPATH:67
+   %diverticulum ; MPATH:68
+   %fistula ; MPATH:70
+   %fusion defect ; MPATH:71
+   %growth acceleration ; MPATH:72
+   %growth arrest ; MPATH:73
+   %hamartoma ; MPATH:74

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multi_1.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multi_1.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multi_1.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+>gi|239758|bbs|68379 glucocorticoid receptor,  GR [human, Peptide Partial, 394 aa]
+MDSKESLTPGREENPSSVLAQERGDVMDFYKTLRGGATVKVSASSPSLAVASQSDSKQRRLLVDFPKGSV
+>gi|239752|bbs|68871 PML-3=putative zinc finger protein [human, Peptide, 802 aa]
+MPPPETPSEGRQPSPSPSPTERAPASEEEFQFLRCQQCQAEAKCPKLLPCLHTLCSGCLEASGMQCPICQ
+>gi|238775|bbs|65126 putative tyrosine kinase receptor=UFO [human, NIH3T3, Peptide, 894 aa]
+MAWRCPRMGRVPLAWCLALCGWACMAPRGTQAEESPFVGNPGNITGARGLTGTLRCQLQVQGEPPEVHWL
+>gi|239006|bbs|65162 alpha(1,3)-fucosyltransferase, ELFT [human, Peptide, 400 aa]
+MGAPWGSPTAAAGGRRGWRRGRGLPWTVCVLAAAGLTCTALITYACWGQLPPLPWASPTPSRPVGVLLWW
+>gi|237597|bbs|60089 putative adhesion molecule=ADMLX [human, Peptide, 679 aa]
+MVPGVPGAVLTLCLWLAASSGSWRPAPARLCAAAGRVAVCRERPARSCASRCLSLQITRISAFFQHFQNN
+>gi|237995|bbs|62046 NK-1 receptor [human, lung, Peptide, 407 aa]
+MDNVLPVDSDLSPNISTNTSEPNQFVQPAWEIVLWAAAYTVIVVTSVVGNVVVMWIILAHKRMRTVTNYF
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multi_2.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multi_2.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multi_2.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+>gi|239758|bbs|68379 glucocorticoid receptor,  GR [human, Peptide Partial, 394 aa]
+MDSKESLTPGREENPSSVLAQERGDVMDFYKTLRGGATVKVSASSPSLAVASQSDSKQRRLLVDFPKGSV
+>gi|239752|bbs|68871 PML-3=putative zinc finger protein [human, Peptide, 802 aa]
+MPPPETPSEGRQPSPSPSPTERAPASEEEFQFLRCQQCQAEAKCPKLLPCLHTLCSGCLEASGMQCPICQ
+>gi|238775|bbs|65126 putative tyrosine kinase receptor=UFO [human, NIH3T3, Peptide, 894 aa]
+MAWRCPRMGRVPLAWCLALCGWACMAPRGTQAEESPFVGNPGNITGARGLTGTLRCQLQVQGEPPEVHWL
+>gi|239006|bbs|65162 alpha(1,3)-fucosyltransferase, ELFT [human, Peptide, 400 aa]
+MGAPWGSPTAAAGGRRGWRRGRGLPWTVCVLAAAGLTCTALITYACWGQLPPLPWASPTPSRPVGVLLWW
+>gi|237597|bbs|60089 putative adhesion molecule=ADMLX [human, Peptide, 679 aa]
+MVPGVPGAVLTLCLWLAASSGSWRPAPARLCAAAGRVAVCRERPARSCASRCLSLQITRISAFFQHFQNN
+>gi|237995|bbs|62046 NK-1 receptor [human, lung, Peptide, 407 aa]
+MDNVLPVDSDLSPNISTNTSEPNQFVQPAWEIVLWAAAYTVIVVTSVVGNVVVMWIILAHKRMRTVTNYF
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multi_blast.bls
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multi_blast.bls	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multi_blast.bls	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2428 @@
+BLASTP 2.1.3 [Apr-11-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATH_RAT
+         (333 letters)
+
+Database: /data_2/jason/blastdb/wormpep62
+           20,085 sequences; 8,813,425 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O4573...   196  2e-50
+F41E6.6 CE10254   cysteine protease and a protease inhibitor...   166  2e-41
+R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id...   162  3e-40
+R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810 pr...   126  2e-29
+Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4 pr...   123  1e-28
+
+>T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O45734
+           protein_id:CAB07275.1
+          Length = 337
+
+ Score =  196 bits (498), Expect = 2e-50
+ Identities = 122/318 (38%), Positives = 174/318 (54%), Gaps = 21/318 (6%)
+
+Query: 26  NAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRNH-----TFKMGLNQF 80
+           +AIEK+    + +   K YS  E    ++ F  N   I+ HN R+H     TF+MGLN  
+Sbjct: 27  SAIEKWD--DYKEDFDKEYSESEEQTYMEAFVKNMIHIENHN-RDHRLGRKTFEMGLNHI 83
+
+Query: 81  SDMSFAEIK----HKYLWSEPQNCSATKSNYLRGTG-PYPSSMDWRKKGNVVSPVKNQGA 135
+           +D+ F++ +    ++ L+ + +      S++L       P  +DWR   ++V+ VKNQG 
+Sbjct: 84  ADLPFSQYRKLNGYRRLFGDSR--IKNSSSFLAPFNVQVPDEVDWRDT-HLVTDVKNQGM 140
+
+Query: 136 CGSCWTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNK 195
+           CGSCW FS TGALE   A   G++++L+EQ LVDC+  + NHGC GGL  QAFEYI  N 
+Sbjct: 141 CGSCWAFSATGALEGQHARKLGQLVSLSEQNLVDCSTKYGNHGCNGGLMDQAFEYIRDNH 200
+
+Query: 196 GIMGEDSYPYIGKNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEV-T 254
+           G+  E+SYPY G++ +C FN +   A  K  V+    DE  +  AVA   P+S A +   
+Sbjct: 201 GVDTEESYPYKGRDMKCHFNKKTVGADDKGYVDTPEGDEEQLKIAVATQGPISIAIDAGH 260
+
+Query: 255 EDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYG-EQNGLLYWIVKXXXXXXXXXXXYFLI 313
+             F +YK GVY    C  + ++++H VL VGYG +     YWIVK           Y  I
+Sbjct: 261 RSFQLYKKGVYYDEEC--SSEELDHGVLLVGYGTDPEHGDYWIVKNSWGAGWGEKGYIRI 318
+
+Query: 314 ERGK-NMCGLAACASYPI 330
+            R + N CG+A  ASYP+
+Sbjct: 319 ARNRNNHCGVATKASYPL 336
+
+
+>F41E6.6 CE10254   cysteine protease and a protease inhibitor (ST.LOUIS)
+           TR:O16454 protein_id:AAB65956.1
+          Length = 498
+
+ Score =  166 bits (419), Expect = 2e-41
+ Identities = 108/325 (33%), Positives = 155/325 (47%), Gaps = 35/325 (10%)
+
+Query: 33  FTSWMKQHQKTYSS-REYSHRLQVFANNWRKI-QAHNQRNHTFKMGLNQFSDMSFAEIKH 90
+           F  ++ +H+K Y++ RE   R +VF  N + I +       T   G  +FSDM+  E K 
+Sbjct: 174 FLDFVDRHEKKYTNKREVLKRFRVFKKNAKVIRELQKNEQGTAVYGFTKFSDMTTMEFKK 233
+
+Query: 91  ---KYLWSEP----QNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCWTFS 143
+               Y W +P    +  +  K +        P S DWR+KG  V+ VKNQG CGSCW FS
+Sbjct: 234 IMLPYQWEQPVYPMEQANFEKHDVTINEEDLPESFDWREKG-AVTQVKNQGNCGSCWAFS 292
+
+Query: 144 TTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFE------------YI 191
+           TTG +E A  IA  K+++L+EQ+LVDC  +  + GC GGLPS A++             +
+Sbjct: 293 TTGNVEGAWFIAKNKLVSLSEQELVDC--DSMDQGCNGGLPSNAYKIGKFVVSDNYCFLV 350
+
+Query: 192 LYNK---------GIMGEDSYPYIGKNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVA 242
+            Y+K         G+  ED+YPY G+   C    +    ++   V +  +DE  M + + 
+Sbjct: 351 FYHKTTKEIIRMGGLEPEDAYPYDGRGETCHLVRKDIAVYINGSVELP-HDEVEMQKWLV 409
+
+Query: 243 LYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLLYWIVKXXXX 302
+              P+S           Y+ GV         P  +NH VL VGYG+     YWIVK    
+Sbjct: 410 TKGPISIGLN-ANTLQFYRHGVVHPFKIFCEPFMLNHGVLIVGYGKDGRKPYWIVKNSWG 468
+
+Query: 303 XXXXXXXYFLIERGKNMCGLAACAS 327
+                  YF + RGKN+CG+   A+
+Sbjct: 469 PNWGEAGYFKLYRGKNVCGVQEMAT 493
+
+
+>R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id:AAC69091.2
+          Length = 383
+
+ Score =  162 bits (410), Expect = 3e-40
+ Identities = 97/304 (31%), Positives = 157/304 (50%), Gaps = 19/304 (6%)
+
+Query: 37  MKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRNHTFKMGLNQFSDMSFAEIKH------ 90
+           +K  +K  S  E+ +R Q+F  N  + +A  +RN    + +N+F+D +  E++       
+Sbjct: 87  LKFDRKYTSVEEFEYRYQIFLRNVIEFEAEEERNLGLDLDVNEFTDWTDEELQKMVQENK 146
+
+Query: 91  --KYLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCWTFSTTGAL 148
+             KY +  P+     + +YL      P+S+DWR++G + +P+KNQG CGSCW F+T  ++
+Sbjct: 147 YTKYDFDTPK----FEGSYLETGVIRPASIDWREQGKL-TPIKNQGQCGSCWAFATVASV 201
+
+Query: 149 ESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIG- 207
+           E+  AI  GK+++L+EQ++VDC  +  N+GC GG    A +++  N G+  E  YPY   
+Sbjct: 202 EAQNAIKKGKLVSLSEQEMVDC--DGRNNGCSGGYRPYAMKFVKEN-GLESEKEYPYSAL 258
+
+Query: 208 KNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSS 267
+           K+ QC         F+ +   +  N+E  +   V    PV+F   V +    Y+SG+++ 
+Sbjct: 259 KHDQCFLKENDTRVFIDD-FRMLSNNEEDIANWVGTKGPVTFGMNVVKAMYSYRSGIFNP 317
+
+Query: 268 NSCHKTPDKVN-HAVLAVGYGEQNGLLYWIVKXXXXXXXXXXXYFLIERGKNMCGLAACA 326
+           +    T   +  HA+  +GYG +    YWIVK           YF + RG N CGLA   
+Sbjct: 318 SVEDCTEKSMGAHALTIIGYGGEGESAYWIVKNSWGTSWGASGYFRLARGVNSCGLANTV 377
+
+Query: 327 SYPI 330
+             PI
+Sbjct: 378 VAPI 381
+
+
+>R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810
+           protein_id:CAA89070.1
+          Length = 402
+
+ Score =  126 bits (316), Expect = 2e-29
+ Identities = 97/325 (29%), Positives = 152/325 (45%), Gaps = 31/325 (9%)
+
+Query: 20  TAELTVNAIEKFHFTSWMKQHQKTYS-SREYSHRLQVFANNWRKIQAHNQRNH--TFKMG 76
+           T E  +  I K  + ++ ++  K+Y+ S+E   RL  + N    I   N +N   + + G
+Sbjct: 78  TNERGIQNIAK-EYIAYTEKFDKSYATSQESLKRLNAYYNTDENIANWNIQNEHGSAEYG 136
+
+Query: 77  LNQFSDMSFAEIK--------HKYLWSE-------PQNCSATKSNYLRGTGPYPSSMDWR 121
+            N  SD +  E +        +K L  E       P++ +A K      + P+P   DWR
+Sbjct: 137 HNDMSDWTDEEFEKTLLPKSFYKRLHKEAEFIEPIPESLTAKKGE---SSSPFPDFFDWR 193
+
+Query: 122 KKGNVVSPVKNQGACGSCWTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQG 181
+            K NV++PVK QG CGSCW F++T  +E+A AIA G+   L+EQ L+DC  +  ++ C G
+Sbjct: 194 DK-NVITPVKAQGQCGSCWAFASTATVEAAWAIAHGEKRNLSEQTLLDC--DLVDNACDG 250
+
+Query: 182 GLPSQAFEYILYNKGIMGEDSYPYIG-KNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEA 240
+           G   +AF YI +  G+      PY+  +   C  N       +K       +DE +++  
+Sbjct: 251 GDEDKAFRYI-HRNGLANAVDLPYVAHRQNGCAVNDHWNTTRIK-AAYFLHHDEDSIINW 308
+
+Query: 241 VALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVN-HAVLAVGYG-EQNGLLYWIVK 298
+           +  + PV+    V +    YK GV++ +      + +  HA+L  GYG  + G  YWIVK
+Sbjct: 309 LVNFGPVNIGMAVIQPMRAYKGGVFTPSEYACKNEVIGLHALLITGYGTSKTGEKYWIVK 368
+
+Query: 299 XX-XXXXXXXXXYFLIERGKNMCGL 322
+                       Y    RG N CG+
+Sbjct: 369 NSWGNTWGVEHGYIYFARGINACGI 393
+
+
+>Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4
+           protein_id:CAA22062.1
+          Length = 343
+
+ Score =  123 bits (309), Expect = 1e-28
+ Identities = 92/304 (30%), Positives = 145/304 (47%), Gaps = 26/304 (8%)
+
+Query: 26  NAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRNH---TFKMGLNQFSD 82
+           NA + F    +++++   Y   E   R  +F+ N   ++ +N+ +    T++  LN FSD
+Sbjct: 49  NAFQNF-LVKYLREYPNEY---EIVKRFTIFSRNLDLVERYNKEDAGKVTYE--LNDFSD 102
+
+Query: 83  MSFAEIKHKYLWSEPQNCSAT-KSNYLRGTGPYPSSMDWRKKG--NVVSPVKNQGACGSC 139
+           ++  E K   +  +P +   + K   L      P+S+DWR     N V+ +K QG CGSC
+Sbjct: 103 LTEEEWKKYLMTPKPDHSEKSLKPKTLIDKKNLPNSVDWRNVNGTNHVTGIKYQGPCGSC 162
+
+Query: 140 WTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMG 199
+           W F+T  A+ESAV+I+ G + +L+ QQL+DC     +  C GG P +A +Y   + GI  
+Sbjct: 163 WAFATAAAIESAVSISGGGLQSLSSQQLLDC--TVVSDKCGGGEPVEALKY-AQSHGITT 219
+
+Query: 200 EDSYPYIGKNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNP-VSFAFEVTEDFM 258
+             +YPY     +C+      VA + + +     DE  M + VAL  P +  A   T    
+Sbjct: 220 AHNYPYYFWTTKCR-ETVPTVARISSWMKAESEDE--MAQIVALNGPMIVCANFATNKNR 276
+
+Query: 259 MYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLLYWIVKXXXXXXXXXXXYFLIERGKN 318
+            Y SG+     C   P    HA++ +GYG      YWI+K           Y  ++R  N
+Sbjct: 277 FYHSGIAEDPDCGTEP---THALIVIGYGPD----YWILKNTYSKVWGEKGYMRVKRDVN 329
+
+Query: 319 MCGL 322
+            CG+
+Sbjct: 330 WCGI 333
+
+
+>Y113G7B.15 CE23295    (HINXTON) TR:Q9U2X1 protein_id:CAB54334.1
+          Length = 328
+
+ Score =  120 bits (302), Expect = 9e-28
+ Identities = 94/317 (29%), Positives = 140/317 (43%), Gaps = 40/317 (12%)
+
+Query: 40  HQKTYSS-REYSHRLQVFANNWRKIQAHNQ------RNHTFKMGLNQFSDMSFAEIKHKY 92
+           H+K Y +  E   RL  FA N +KIQ  N       RN TF  G N+F+D +  E+  + 
+Sbjct: 3   HKKHYRTPAEKDRRLAHFAKNHQKIQELNAKARREGRNVTF--GWNKFADKNRQELSARN 60
+
+Query: 93  LWSEPQNCSAT---KSNYLRGTGPYPSSMDWRKKGN----------------VVSPVKNQ 133
+               P+N +     K  + RG+  + +    R+ G+                VV PVK+Q
+Sbjct: 61  SKIHPKNHTDLPIYKPRHPRGSRNHHNKRSKRQSGDIPDYFDLRDIYVDGSPVVGPVKDQ 120
+
+Query: 134 GACGSCWTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILY 193
+             CG CW F+TT   E+A  + S    +L++Q++ DCA + +  GC GG P    + +++
+Sbjct: 121 EQCGCCWAFATTAITEAANTLYSKSFTSLSDQEICDCADSGDTPGCVGGDPRNGLK-MVH 179
+
+Query: 194 NKGIMGEDSYPY----IGKNGQCKFNPEKAVAFVKNVVNITLND-----EAAMVEAVALY 244
+            +G   +  YPY        G C    EK+       +N+   D     E  M      +
+Sbjct: 180 LRGQSSDGDYPYEEYRANTTGNC-VGDEKSTVIQPETLNVYRFDQDYAEEDIMENLYLNH 238
+
+Query: 245 NPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYG-EQNGLLYWIVKXXXXX 303
+            P +  F V E+F  Y SGV  S  C++      H+V  VGYG   +G+ YW+V+     
+Sbjct: 239 IPTAVYFRVGENFEWYTSGVLQSEDCYQMTPAEWHSVAIVGYGTSDDGVPYWLVRNSWNS 298
+
+Query: 304 XXXXXXYFLIERGKNMC 320
+                 Y  I RG N C
+Sbjct: 299 DWGLHGYVKIRRGVNWC 315
+
+
+>K02E7.10 CE11640   protease (ST.LOUIS) TR:O17255 protein_id:AAB71030.1
+          Length = 299
+
+ Score =  114 bits (284), Expect = 1e-25
+ Identities = 70/216 (32%), Positives = 107/216 (49%), Gaps = 8/216 (3%)
+
+Query: 118 MDWRKKGNVVSPVKNQGACGSCWTFSTTGALESAVAIAS-GKMMTLAEQQLVDCAQNFNN 176
+           +DWR+KG +V PVK+QG C + + F+   A+ES  A A+ GK+++ +EQQ++DCA NF N
+Sbjct: 84  LDWREKG-IVGPVKDQGKCNASYAFAAIAAIESMYAKANNGKLLSFSEQQIIDCA-NFTN 141
+
+Query: 177 HGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKN--GQCKFNPEKAVAFVKNVVNITLNDE 234
+             CQ  L +      L   G+  E  YPY+GK   G+C+++  K +      +++  N+E
+Sbjct: 142 P-CQENLENVLSNRFLKENGVGTEADYPYVGKENVGKCEYDSSK-MKLRPTYIDVYPNEE 199
+
+Query: 235 AAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLLY 294
+            A    +  +    F       F  YK+G+Y+             ++  VGYG+     Y
+Sbjct: 200 WARAH-ITTFGTGYFRMRSPPSFFHYKTGIYNPTKEECGNANEARSLAIVGYGKDGAEKY 258
+
+Query: 295 WIVKXXXXXXXXXXXYFLIERGKNMCGLAACASYPI 330
+           WIVK           Y  + R  N CG+A   S PI
+Sbjct: 259 WIVKGSFGTSWGEHGYMKLARNVNACGMAESISIPI 294
+
+
+>C32B5.7 CE08515   cathepsin-like peptidase (ST.LOUIS) TR:P91111
+           protein_id:AAB37963.1
+          Length = 250
+
+ Score =  108 bits (270), Expect = 5e-24
+ Identities = 69/197 (35%), Positives = 104/197 (52%), Gaps = 18/197 (9%)
+
+Query: 106 NYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCWTFSTTGALESAVAIASGKMMTLAEQ 165
+           NY     P+   +DWR +G VV PVK+QG C + + F+   A+ES  AIA+G++++ +EQ
+Sbjct: 63  NYKNAKKPF---LDWRDEG-VVGPVKDQGNCNASYAFAAISAIESMYAIANGQLLSFSEQ 118
+
+Query: 166 QLVDCAQNFNNHGCQ-GGLPSQAFEYILYNKGIMGEDSYPYIG-KNGQCKFNPEKAVAFV 223
+           Q++DC       GC     P  A  Y L  KGI     YP++G KN +C+++ +KA   +
+Sbjct: 119 QIIDCL-----GGCAIESDPMMAMTY-LERKGIETYTDYPFVGKKNEKCEYDSKKAYLIL 172
+
+Query: 224 KNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVY--SSNSCHKTPDKVNHAV 281
+            +  +  ++DE+  +  +    P  F       F  YKSG+Y  +   C  T +K   A+
+Sbjct: 173 DDTYD--MSDESLALVFIDERGPGLFTMNTPPSFFNYKSGIYNPTEEECKSTNEK--RAL 228
+
+Query: 282 LAVGYGEQNGLLYWIVK 298
+             VGYG   G  YWIVK
+Sbjct: 229 TIVGYGNDKGQNYWIVK 245
+
+
+>Y51A2D.8 CE19204   Cysteine proteases (2 domains) (HINXTON) TR:Q9XXQ7
+           protein_id:CAA16407.1
+          Length = 386
+
+ Score =  106 bits (265), Expect = 2e-23
+ Identities = 82/330 (24%), Positives = 137/330 (40%), Gaps = 44/330 (13%)
+
+Query: 33  FTSWMKQHQKTYSSR-EYSHRLQVFANNWRKIQAHNQRN----HTFKMGLNQFSDMSFAE 87
+           F  + K++ + Y    E   R   F  ++  +   N ++    +  + G+N+FSD+S AE
+Sbjct: 43  FEDFKKKYNRKYKDESENQQRFNNFVKSYNNVDKLNAKSKAAGYDTQFGINKFSDLSTAE 102
+
+Query: 88  IKHKYLWSEPQN------------------CSATKSNYLRGTGPYPSSMDWRKKG----N 125
+              +     P N                      K+ + R +  YP   D R +      
+Sbjct: 103 FHGRLSNVVPSNNTGLPMLNFDKKKPDFRAADMNKTRHKRRSTRYPDYFDLRNEKINGRY 162
+
+Query: 126 VVSPVKNQGACGSCWTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPS 185
+           +V P+K+QG C  CW F+ T  +E+  A  SGK  +L++Q++ DC       GC+GG  +
+Sbjct: 163 IVGPIKDQGQCACCWGFAVTALVETVYAAHSGKFKSLSDQEVCDCGTE-GTPGCKGGSLT 221
+
+Query: 186 QAFEYILYNKGIMGEDSYPY----IGKNGQCKFNPEKAV----AFVKNVVNITLNDEAAM 237
+              +Y+    G+ G++ YPY      +  +C+      +    AF   V+N    +E  +
+Sbjct: 222 LGVQYV-KKYGLSGDEDYPYDQNRANQGRRCRLRETDRIVPARAFNFAVINPRRAEEQII 280
+
+Query: 238 VEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYG---EQNGLL- 293
+                   PV+  F+V + F  YK GV   + C +      HA   VGY    +  G   
+Sbjct: 281 QVLTEWKVPVAVYFKVGDQFKEYKEGVIIEDDCRRATQW--HAGAIVGYDTVEDSRGRSH 338
+
+Query: 294 -YWIVKXXXXXXXXXXXYFLIERGKNMCGL 322
+            YWI+K           Y  + RG++ C +
+Sbjct: 339 DYWIIKNSWGGDWAESGYVRVVRGRDWCSI 368
+
+
+>Y71H2AR.2 CE22930    (ST.LOUIS) protein_id:AAK29985.1
+          Length = 345
+
+ Score =  103 bits (257), Expect = 1e-22
+ Identities = 72/235 (30%), Positives = 105/235 (44%), Gaps = 16/235 (6%)
+
+Query: 91  KYLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCWTFSTTGALES 150
+           ++ W  P +   T   +L          DWR+KG +V PVK+QG C +   F+ T ++ES
+Sbjct: 69  RFQWETPIHMDRTTEEFL----------DWREKG-IVGPVKDQGKCNASHAFAITSSIES 117
+
+Query: 151 AVAIAS-GKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGK- 208
+             A A+ G +++ +EQQL+DC       GC+      A  Y L   GI  E  YPY+ K 
+Sbjct: 118 MYAKATNGTLLSFSEQQLIDCNDQ-GYKGCEEQFAMNAIGY-LATHGIETEADYPYVDKT 175
+
+Query: 209 NGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSSN 268
+           N +C F+  K+   +K  V    N+    V  V  Y P  F          YK G+Y+ +
+Sbjct: 176 NEKCTFDSTKSKIHLKKGVVAEGNEVLGKVY-VTNYGPAFFTMRAPPSLYDYKIGIYNPS 234
+
+Query: 269 SCHKTPDKVNHAVLAVGYGEQNGLLYWIVKXXXXXXXXXXXYFLIERGKNMCGLA 323
+               T      +++ VGYG +    YWIVK           Y  + R  N C +A
+Sbjct: 235 IEECTSTHEIRSMVIVGYGIEGEQKYWIVKGSFGTSWGEQGYMKLARDVNACAMA 289
+
+
+>C50F4.3 CE05468   thiol protease (HINXTON) TR:Q18740 protein_id:CAA94738.1
+          Length = 374
+
+ Score =  102 bits (254), Expect = 3e-22
+ Identities = 86/319 (26%), Positives = 129/319 (39%), Gaps = 31/319 (9%)
+
+Query: 33  FTSWMKQHQKTYSSR-EYSHRLQVFANNWRKI----QAHNQRNHTFKMGLNQFSDMSFAE 87
+           F  ++ ++++ Y    E   R Q F     ++    +A  +  H  K G+N+FSD+S  E
+Sbjct: 47  FEDFIVKYKRNYKDEIEKKFRFQQFVATHNRVGKMNKAAKKAGHDTKYGINKFSDLSKKE 106
+
+Query: 88  IKHKYLWSEP--QNCSATKSNYL-----RGTGPYPSSMDWRKKG----NVVSPVKNQGAC 136
+           I   Y    P   N +  K N       R     P + D R K      ++ P+K Q +C
+Sbjct: 107 IHGMYSKFGPPKNNTNVPKFNLKNLRVKRQMEGLPKTFDLRNKKVGGHYIIGPIKTQDSC 166
+
+Query: 137 GSCWTFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKG 196
+             CW F+ T   E+A+ +   K M L+EQ++ DCA   +  GC GG P    EYI    G
+Sbjct: 167 ACCWGFAATAVAEAALTVHLKKAMNLSEQEVCDCAPK-HGPGCNGGDPVDGLEYI-KEMG 224
+
+Query: 197 IMGEDSYPY-------IGKNGQCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYN-PVS 248
+           + G   YP+       +G+    K++ E     +        N E  M   + L N P+S
+Sbjct: 225 LTGGKEYPFNVNRSTQLGRCESEKYDRELNPLELDYYAIDPFNAEYQMTHHLYLLNLPIS 284
+
+Query: 249 FAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNG-----LLYWIVKXXXXX 303
+            AF        Y SG+     C        H+   VGYG         + YWI +     
+Sbjct: 285 VAFRTGASLSSYLSGILELADCDDEKGGHWHSGAIVGYGTTKNSAGRTVDYWIFRNSWWT 344
+
+Query: 304 XXXXXXYFLIERGKNMCGL 322
+                 Y  I RG++ C +
+Sbjct: 345 DWGDDGYARIVRGEDWCSI 363
+
+
+>F26E4.3 CE17714   cysteine protease (HINXTON) TR:P90850
+           protein_id:CAB03007.1
+          Length = 491
+
+ Score = 87.4 bits (215), Expect = 1e-17
+ Identities = 70/237 (29%), Positives = 102/237 (42%), Gaps = 33/237 (13%)
+
+Query: 115 PSSMDWRKK-GNVVSPVKNQGACGSCWTFSTTGALESAVAIAS-GKM-MTLAEQQLVDCA 171
+           P   D R K G ++ PV +QG CGS W+ STT      +AI S G++  TL+ QQL+ C 
+Sbjct: 224 PEHFDARDKWGPLIHPVADQGDCGSSWSVSTTAISSDRLAIISEGRINSTLSSQQLLSCN 283
+
+Query: 172 QNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNG-------------------QC 212
+           Q+    GC+GG   +A+ YI    G++G+  YPY+                       +C
+Sbjct: 284 QH-RQKGCEGGYLDRAWWYI-RKLGVVGDHCYPYVSGQSREPGHCLIPKRDYTNRQGLRC 341
+
+Query: 213 KFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVY--SSNSC 270
+               + + AF         + E  +   +    PV   F V EDF MY  GVY  S  + 
+Sbjct: 342 PSGSQDSTAFKMTPPYKVSSREEDIQTELMTNGPVQATFVVHEDFFMYAGGVYQHSDLAA 401
+
+Query: 271 HKTPDKV---NHAVLAVGYGEQNG----LLYWIVKXXXXXXXXXXXYFLIERGKNMC 320
+            K    V    H+V  +G+G  +     + YW+             YF + RG+N C
+Sbjct: 402 QKGASSVAEGYHSVRVLGWGVDHSTGKPIKYWLCANSWGTQWGEDGYFKVLRGENHC 458
+
+
+>C52E4.1 CE08943  locus:cpr-1 cathepsin-like cysteine protease (HINXTON)
+           TR:Q18783 protein_id:CAB01410.1
+          Length = 340
+
+ Score = 85.1 bits (209), Expect = 5e-17
+ Identities = 67/269 (24%), Positives = 111/269 (40%), Gaps = 33/269 (12%)
+
+Query: 82  DMSFAEIKHKYLWSEPQNCSATKSNYLRGTGP--YPSSMDWRKKGNVVSPVKNQGACGSC 139
+           +M F  +  KY  +      AT+   +  + P  + S   W +  ++   +++Q  CGSC
+Sbjct: 66  EMKFKLMDGKYAAAHSDEIRATEQEVVLASVPATFDSRTQWSECKSI-KLIRDQATCGSC 124
+
+Query: 140 WTFSTTGALESAVAIAS--GKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGI 197
+           W F     +     I +   +   ++   L+ C  +   +GC+GG P QA  +   +KG+
+Sbjct: 125 WAFGAAEMISDRTCIETKGAQQPIISPDDLLSCCGSSCGNGCEGGYPIQALRW-WDSKGV 183
+
+Query: 198 MGEDSYPYIG-----------------KNGQCKFNPEK--AVAFVKN----VVNITLNDE 234
+           +    Y   G                 K   C  + +   + A+ K+    V    +   
+Sbjct: 184 VTGGDYHGAGCKPYPIAPCTSGNCPESKTPSCSMSCQSGYSTAYAKDKHFGVSAYAVPKN 243
+
+Query: 235 AAMVEAVALYN-PVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLL 293
+           AA ++A    N PV  AF V EDF  YKSGVY   +         HA+  +G+G ++G  
+Sbjct: 244 AASIQAEIYANGPVEAAFSVYEDFYKYKSGVYKHTAGKYLG---GHAIKIIGWGTESGSP 300
+
+Query: 294 YWIVKXXXXXXXXXXXYFLIERGKNMCGL 322
+           YW+V            +F I RG + CG+
+Sbjct: 301 YWLVANSWGVNWGESGFFKIYRGDDQCGI 329
+
+
+>M04G12.2 CE12424   cysteine protease (HINXTON) TR:P92005
+           protein_id:CAB03209.1
+          Length = 467
+
+ Score = 75.1 bits (183), Expect = 6e-14
+ Identities = 68/224 (30%), Positives = 100/224 (44%), Gaps = 44/224 (19%)
+
+Query: 101 SATKSNYLRGTGPYPSSMDWRKKG--NVVSPVKNQGA---CGSCWTFSTTGALESAVAIA 155
+           S+ KSN L      P+  DWR     N  SP +NQ     CGSCW F TTGAL     +A
+Sbjct: 214 SSFKSNDL------PTGWDWRNVSGVNYCSPTRNQHIPVYCGSCWVFGTTGALNDRFNVA 267
+
+Query: 156 -SGK--MMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNGQC 212
+             G+  M  L+ Q+++DC    N   CQGG      E+    +G++ E    Y   NG+C
+Sbjct: 268 RKGRWPMTQLSPQEIIDCNGKGN---CQGGEIGNVLEHAKI-QGLVEEGCNVYRATNGEC 323
+
+Query: 213 KFNPEKAVA----------------FVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTED 256
+             NP                     +VK+   +   D+  ++  +    P++ A   T+ 
+Sbjct: 324 --NPYHRCGSCWPNECFSLTNYTRYYVKDYGQVQGRDK--IMSEIKKGGPIACAIGATKK 379
+
+Query: 257 F-MMYKSGVYSSNSCHKTPDKVNHAVLAVGYG-EQNGLLYWIVK 298
+           F   Y  GVYS     K+  + NH +   G+G ++NG+ YWI +
+Sbjct: 380 FEYEYVKGVYS----EKSDLESNHIISLTGWGVDENGVEYWIAR 419
+
+
+>F15D4.4 CE28917   cysteine protease (HINXTON) TR:Q93512
+           protein_id:CAB02487.1
+          Length = 622
+
+ Score = 75.1 bits (183), Expect = 6e-14
+ Identities = 85/332 (25%), Positives = 130/332 (38%), Gaps = 51/332 (15%)
+
+Query: 25  VNAIEKFH--------FTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRNH----T 72
+           ++ +EKF+        F S M       +++E   R  V++   +++  HN        +
+Sbjct: 119 LSPLEKFNEAMNNDGAFKSLMDVINFNSTAKEGLKRFNVYSKVKKEVDEHNIMYELGMSS 178
+
+Query: 73  FKMGLNQFSDMSFAEIKHKYLWSEPQNCSAT------KSNYLRGTGPYPSSMDWRKKGNV 126
+           +KM  NQFS     E+    L  +    +AT       S   R T P   ++DWR     
+Sbjct: 179 YKMSTNQFSVALDGEVAPLTLNLDALTPTATVIPATISSRKKRDTEP---TVDWRP---F 232
+
+Query: 127 VSPVKNQGACGSCWTFSTTGALESAVAIASGKMMTLAEQQL------VDCAQNFNNHGCQ 180
+           + P+ +Q  CG CW FS    +ES  AI      +L+ QQL      VD      N GC+
+Sbjct: 233 LKPILDQSTCGGCWAFSMISMIESFFAIQGYNTSSLSVQQLLTCDTKVDSTYGLANVGCK 292
+
+Query: 181 GGLPSQAFEYILYNKGIMGEDSYPYIGKNGQCK---FNPEKAVAFVKNVVNITLNDEAAM 237
+           GG    A  Y L           P+  ++  C    F P      + +   I+ N  AA 
+Sbjct: 293 GGYFQIAGSY-LEVSAARDASLIPFDLEDTSCDSSFFPPVVPTILLFDDGYISGNFTAAQ 351
+
+Query: 238 -------VEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQN 290
+                  +E      P++       D   Y  GVY  + C      +NHAV+ VG+ +  
+Sbjct: 352 LITMEQNIEDKVRKGPIAVGMAAGPDIYKYSEGVYDGD-CGTI---INHAVVIVGFTDD- 406
+
+Query: 291 GLLYWIVKXXXXXXXXXXXYFLIER--GKNMC 320
+              YWI++           YF ++R  GK+ C
+Sbjct: 407 ---YWIIRNSWGASWGEAGYFRVKRTPGKDPC 435
+
+
+>Y71H2AM.3 CE26272    (ST.LOUIS) protein_id:AAK29976.1
+          Length = 716
+
+ Score = 73.9 bits (180), Expect = 1e-13
+ Identities = 52/176 (29%), Positives = 77/176 (43%), Gaps = 32/176 (18%)
+
+Query: 92  YLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCWTFSTTGALESA 151
+           + W  P+    T   +L          DWR KG +V PVK+QG C +   F+ + ++ES 
+Sbjct: 70  FQWKTPKYTIQTTEEFL----------DWRDKG-IVGPVKDQGKCNASHAFAISSSIESM 118
+
+Query: 152 VAIA-SGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNG 210
+            A A +G +++ +EQQL+DC  +    GC+      A  Y +++ GI  E  YPY GK  
+Sbjct: 119 YAKATNGSLLSFSEQQLIDC-DDHGFKGCEEQPAINAVSYFIFH-GIETEADYPYAGKE- 175
+
+Query: 211 QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYS 266
+                            N  L++E    E V  Y P  F          YK G+Y+
+Sbjct: 176 -----------------NGKLSNETQGKELVTNYGPAFFTMRAPPSLYDYKIGIYN 214
+
+
+>C32B5.13 CE08521    (ST.LOUIS) TR:P91110 protein_id:AAB37968.1
+          Length = 150
+
+ Score = 71.6 bits (174), Expect = 6e-13
+ Identities = 45/143 (31%), Positives = 73/143 (50%), Gaps = 10/143 (6%)
+
+Query: 159 MMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGK-NGQCKFNPE 217
+           +++ +EQQ++DC  NF +  CQ  + S  F   +   G++ E  YPY+GK N +CK++  
+Sbjct: 10  VLSFSEQQIIDCG-NFTSP-CQENILSHEF---IKKNGVVTEADYPYVGKENEKCKYDEN 64
+
+Query: 218 KAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYS--SNSCHKTPD 275
+           K   +  N++ +    E  +   +  + P  F  +    F  YK+G+YS     C K  D
+Sbjct: 65  KIKLWPTNMLLVGNLPETLLKLFIKEHGPGYFRMKAPPSFFNYKTGIYSPTQEECGKATD 124
+
+Query: 276 KVNHAVLAVGYGEQNGLLYWIVK 298
+               ++  VGYG + G  YWIVK
+Sbjct: 125 A--RSLTIVGYGIEGGQNYWIVK 145
+
+
+  Database: /data_2/jason/blastdb/wormpep62
+    Posted date:  Sep 3, 2001  2:17 PM
+  Number of letters in database: 8,813,425
+  Number of sequences in database:  20,085
+  
+Lambda     K      H
+   0.319    0.131    0.412 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 5933049
+Number of Sequences: 20085
+Number of extensions: 243404
+Number of successful extensions: 614
+Number of sequences better than 1.0e-10: 17
+Number of HSP's better than  0.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 16
+Number of HSP's that attempted gapping in prelim test: 568
+Number of HSP's gapped (non-prelim): 17
+length of query: 333
+length of database: 8,813,425
+effective HSP length: 46
+effective length of query: 287
+effective length of database: 7,889,515
+effective search space: 2264290805
+effective search space used: 2264290805
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (21.8 bits)
+S2: 155 (64.3 bits)
+BLASTP 2.1.3 [Apr-11-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATL_HUMAN
+         (333 letters)
+
+Database: /data_2/jason/blastdb/wormpep62
+           20,085 sequences; 8,813,425 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O4573...   334  4e-92
+F41E6.6 CE10254   cysteine protease and a protease inhibitor...   194  6e-50
+R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id...   176  2e-44
+Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4 pr...   133  1e-31
+R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810 pr...   130  1e-30
+
+>T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O45734
+           protein_id:CAB07275.1
+          Length = 337
+
+ Score =  334 bits (857), Expect = 4e-92
+ Identities = 164/341 (48%), Positives = 228/341 (66%), Gaps = 12/341 (3%)
+
+Query: 1   MNPTLILAAFCLGIASATLTFDHSLEA---QWTKWKAMHNRLYGMNEEGWRRAVWEKNMK 57
+           MN  ++LA     +A  +      +E+   +W  +K   ++ Y  +EE      + KNM 
+Sbjct: 1   MNRFILLALVAAVVAVNSAKLSRQIESAIEKWDDYKEDFDKEYSESEEQTYMEAFVKNMI 60
+
+Query: 58  MIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQ----NRKPRKGKVFQEPLFYE 113
+            IE HN+++R G+ +F M +N   D+   ++R+ +NG++    + + +    F  P   +
+Sbjct: 61  HIENHNRDHRLGRKTFEMGLNHIADLPFSQYRK-LNGYRRLFGDSRIKNSSSFLAPFNVQ 119
+
+Query: 114 APRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQ 173
+            P  VDWR+   VT VKNQG CGSCWAFSATGALEGQ  RK G+L+SLSEQNLVDCS   
+Sbjct: 120 VPDEVDWRDTHLVTDVKNQGMCGSCWAFSATGALEGQHARKLGQLVSLSEQNLVDCSTKY 179
+
+Query: 174 GNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPK-QE 232
+           GN GCNGGLMD AF+Y++DN G+D+EESYPY+  +  C +N K   A+D G+VD P+  E
+Sbjct: 180 GNHGCNGGLMDQAFEYIRDNHGVDTEESYPYKGRDMKCHFNKKTVGADDKGYVDTPEGDE 239
+
+Query: 233 KALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDN 292
+           + L  AVAT GPIS+AIDAGH SF  YK+G+Y++ +CSSE++DHGVL+VGYG   T+ ++
+Sbjct: 240 EQLKIAVATQGPISIAIDAGHRSFQLYKKGVYYDEECSSEELDHGVLLVGYG---TDPEH 296
+
+Query: 293 NKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTV 333
+             YW+VKNSWG  WG  GY+++A++R NHCG+A+ ASYP V
+Sbjct: 297 GDYWIVKNSWGAGWGEKGYIRIARNRNNHCGVATKASYPLV 337
+
+
+>F41E6.6 CE10254   cysteine protease and a protease inhibitor (ST.LOUIS)
+           TR:O16454 protein_id:AAB65956.1
+          Length = 498
+
+ Score =  194 bits (493), Expect = 6e-50
+ Identities = 124/330 (37%), Positives = 171/330 (51%), Gaps = 53/330 (16%)
+
+Query: 36  HNRLYGMNEEGWRR-AVWEKNMKMI-ELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMN 93
+           H + Y    E  +R  V++KN K+I EL   E     + FT     F DMT+ EF+++M 
+Sbjct: 181 HEKKYTNKREVLKRFRVFKKNAKVIRELQKNEQGTAVYGFTK----FSDMTTMEFKKIML 236
+
+Query: 94  GFQNRKP-----------RKGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAFS 142
+            +Q  +P               + +E L    P S DWREKG VT VKNQG CGSCWAFS
+Sbjct: 237 PYQWEQPVYPMEQANFEKHDVTINEEDL----PESFDWREKGAVTQVKNQGNCGSCWAFS 292
+
+Query: 143 ATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQ----YVQDN----- 193
+            TG +EG  F    +L+SLSEQ LVDC     ++GCNGGL   A++     V DN     
+Sbjct: 293 TTGNVEGAWFIAKNKLVSLSEQELVDCDSM--DQGCNGGLPSNAYKIGKFVVSDNYCFLV 350
+
+Query: 194 ------------GGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPKQEKALMKAVAT 241
+                       GGL+ E++YPY+   E+C    K       G V++P  E  + K + T
+Sbjct: 351 FYHKTTKEIIRMGGLEPEDAYPYDGRGETCHLVRKDIAVYINGSVELPHDEVEMQKWLVT 410
+
+Query: 242 VGPISVAIDAGHESFLFYKEGIY--FEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVK 299
+            GPIS+ ++A   +  FY+ G+   F+  C    ++HGVL+VGYG    +     YW+VK
+Sbjct: 411 KGPISIGLNA--NTLQFYRHGVVHPFKIFCEPFMLNHGVLIVGYG----KDGRKPYWIVK 464
+
+Query: 300 NSWGEEWGMGGYVKMAKDRRNHCGIASAAS 329
+           NSWG  WG  GY K+ +  +N CG+   A+
+Sbjct: 465 NSWGPNWGEAGYFKLYRG-KNVCGVQEMAT 493
+
+
+>R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id:AAC69091.2
+          Length = 383
+
+ Score =  176 bits (446), Expect = 2e-44
+ Identities = 113/309 (36%), Positives = 171/309 (54%), Gaps = 39/309 (12%)
+
+Query: 42  MNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPR 101
+           + E  +R  ++ +N+  IE   +E R       + +N F D T EE ++++   Q  K  
+Sbjct: 96  VEEFEYRYQIFLRNV--IEFEAEEERN--LGLDLDVNEFTDWTDEELQKMV---QENKYT 148
+
+Query: 102 KGKVFQEPLFYEA--------PRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFR 153
+           K   F  P F  +        P S+DWRE+G +TP+KNQGQCGSCWAF+   ++E Q   
+Sbjct: 149 KYD-FDTPKFEGSYLETGVIRPASIDWREQGKLTPIKNQGQCGSCWAFATVASVEAQNAI 207
+
+Query: 154 KTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKY 213
+           K G+L+SLSEQ +VDC G   N GC+GG   YA ++V++N GL+SE+ YPY A     K+
+Sbjct: 208 KKGKLVSLSEQEMVDCDG--RNNGCSGGYRPYAMKFVKEN-GLESEKEYPYSA----LKH 260
+
+Query: 214 NPKYSVANDTG-FVD----IPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEP- 267
+           +  +   NDT  F+D    +   E+ +   V T GP++  ++   ++   Y+ GI F P 
+Sbjct: 261 DQCFLKENDTRVFIDDFRMLSNNEEDIANWVGTKGPVTFGMNV-VKAMYSYRSGI-FNPS 318
+
+Query: 268 --DCSSEDMD-HGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGI 324
+             DC+ + M  H + ++GYG E      + YW+VKNSWG  WG  GY ++A+   N CG+
+Sbjct: 319 VEDCTEKSMGAHALTIIGYGGEG----ESAYWIVKNSWGTSWGASGYFRLARG-VNSCGL 373
+
+Query: 325 ASAASYPTV 333
+           A+    P +
+Sbjct: 374 ANTVVAPII 382
+
+
+>Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4
+           protein_id:CAA22062.1
+          Length = 343
+
+ Score =  133 bits (335), Expect = 1e-31
+ Identities = 91/284 (32%), Positives = 146/284 (51%), Gaps = 28/284 (9%)
+
+Query: 48  RRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQ 107
+           R  ++ +N+ ++E +N+E   GK   T  +N F D+T EE+++ +      KP   +   
+Sbjct: 71  RFTIFSRNLDLVERYNKE-DAGK--VTYELNDFSDLTEEEWKKYL---MTPKPDHSEKSL 124
+
+Query: 108 EPLFY----EAPRSVDWRE---KGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLIS 160
+           +P         P SVDWR      +VT +K QG CGSCWAF+   A+E  +    G L S
+Sbjct: 125 KPKTLIDKKNLPNSVDWRNVNGTNHVTGIKYQGPCGSCWAFATAAAIESAVSISGGGLQS 184
+
+Query: 161 LSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVA 220
+           LS Q L+DC+    ++ C GG    A +Y Q + G+ +  +YPY      C+     +VA
+Sbjct: 185 LSSQQLLDCT--VVSDKCGGGEPVEALKYAQSH-GITTAHNYPYYFWTTKCRETVP-TVA 240
+
+Query: 221 NDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLV 280
+             + ++   + E  + + VA  GP+ V  +       FY  GI  +PDC +E   H ++V
+Sbjct: 241 RISSWMK-AESEDEMAQIVALNGPMIVCANFATNKNRFYHSGIAEDPDCGTEP-THALIV 298
+
+Query: 281 VGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGI 324
+           +GYG +        YW++KN++ + WG  GY+++ +D  N CGI
+Sbjct: 299 IGYGPD--------YWILKNTYSKVWGEKGYMRVKRD-VNWCGI 333
+
+
+>R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810
+           protein_id:CAA89070.1
+          Length = 402
+
+ Score =  130 bits (327), Expect = 1e-30
+ Identities = 89/265 (33%), Positives = 130/265 (48%), Gaps = 27/265 (10%)
+
+Query: 78  NAFGDMTSEEFRQVM--NGFQNRKPRKGKVFQEPLFYEA-----------PRSVDWREKG 124
+           N   D T EEF + +    F  R  ++ + F EP+               P   DWR+K 
+Sbjct: 138 NDMSDWTDEEFEKTLLPKSFYKRLHKEAE-FIEPIPESLTAKKGESSSPFPDFFDWRDKN 196
+
+Query: 125 YVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMD 184
+            +TPVK QGQCGSCWAF++T  +E       G   +LSEQ L+DC     +  C+GG  D
+Sbjct: 197 VITPVKAQGQCGSCWAFASTATVEAAWAIAHGEKRNLSEQTLLDCD--LVDNACDGGDED 254
+
+Query: 185 YAFQYVQDNGGLDSEESYPYEA-TEESCKYNPKYSVANDTGFVDIPKQEKALMKAVATVG 243
+            AF+Y+  N GL +    PY A  +  C  N  ++         +   E +++  +   G
+Sbjct: 255 KAFRYIHRN-GLANAVDLPYVAHRQNGCAVNDHWNTTRIKAAYFLHHDEDSIINWLVNFG 313
+
+Query: 244 PISVAIDAGHESFLFYKEGIY--FEPDCSSEDMD-HGVLVVGYGFESTESDNNKYWLVKN 300
+           P+++ + A  +    YK G++   E  C +E +  H +L+ GYG   T     KYW+VKN
+Sbjct: 314 PVNIGM-AVIQPMRAYKGGVFTPSEYACKNEVIGLHALLITGYG---TSKTGEKYWIVKN 369
+
+Query: 301 SWGEEWGM-GGYVKMAKDRRNHCGI 324
+           SWG  WG+  GY+  A+   N CGI
+Sbjct: 370 SWGNTWGVEHGYIYFARG-INACGI 393
+
+
+>Y51A2D.8 CE19204   Cysteine proteases (2 domains) (HINXTON) TR:Q9XXQ7
+           protein_id:CAA16407.1
+          Length = 386
+
+ Score =  123 bits (308), Expect = 2e-28
+ Identities = 87/322 (27%), Positives = 145/322 (45%), Gaps = 39/322 (12%)
+
+Query: 32  WKAMHNRLYGMNEEGWRRAV-WEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQ 90
+           +K  +NR Y    E  +R   + K+   ++  N + +   +     +N F D+++ EF  
+Sbjct: 46  FKKKYNRKYKDESENQQRFNNFVKSYNNVDKLNAKSKAAGYDTQFGINKFSDLSTAEFHG 105
+
+Query: 91  VMNG-------------FQNRKP-----------RKGKVFQEPLFYEAPRSVDWREKGYV 126
+            ++              F  +KP            K +  + P +++  R+     +  V
+Sbjct: 106 RLSNVVPSNNTGLPMLNFDKKKPDFRAADMNKTRHKRRSTRYPDYFDL-RNEKINGRYIV 164
+
+Query: 127 TPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYA 186
+            P+K+QGQC  CW F+ T  +E      +G+  SLS+Q + DC G +G  GC GG +   
+Sbjct: 165 GPIKDQGQCACCWGFAVTALVETVYAAHSGKFKSLSDQEVCDC-GTEGTPGCKGGSLTLG 223
+
+Query: 187 FQYVQDNGGLDSEESYPYEATEES----CKYNPKYSVANDTGF---VDIPKQEKALMKAV 239
+            QYV+   GL  +E YPY+    +    C+      +     F   V  P++ +  +  V
+Sbjct: 224 VQYVK-KYGLSGDEDYPYDQNRANQGRRCRLRETDRIVPARAFNFAVINPRRAEEQIIQV 282
+
+Query: 240 ATVG--PISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYG-FESTESDNNKYW 296
+            T    P++V    G + F  YKEG+  E DC      H   +VGY   E +   ++ YW
+Sbjct: 283 LTEWKVPVAVYFKVG-DQFKEYKEGVIIEDDCRRATQWHAGAIVGYDTVEDSRGRSHDYW 341
+
+Query: 297 LVKNSWGEEWGMGGYVKMAKDR 318
+           ++KNSWG +W   GYV++ + R
+Sbjct: 342 IIKNSWGGDWAESGYVRVVRGR 363
+
+
+>K02E7.10 CE11640   protease (ST.LOUIS) TR:O17255 protein_id:AAB71030.1
+          Length = 299
+
+ Score =  119 bits (298), Expect = 3e-27
+ Identities = 73/219 (33%), Positives = 112/219 (50%), Gaps = 14/219 (6%)
+
+Query: 118 VDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFR-KTGRLISLSEQNLVDCSGPQGNE 176
+           +DWREKG V PVK+QG+C + +AF+A  A+E    +   G+L+S SEQ ++DC+      
+Sbjct: 84  LDWREKGIVGPVKDQGKCNASYAFAAIAAIESMYAKANNGKLLSFSEQQIIDCA--NFTN 141
+
+Query: 177 GCNGGLMDYAFQYVQDNGGLDSEESYPYEATEE--SCKYNPKYSVANDTGFVDIPKQEKA 234
+            C   L +          G+ +E  YPY   E    C+Y+        T ++D+   E+ 
+Sbjct: 142 PCQENLENVLSNRFLKENGVGTEADYPYVGKENVGKCEYDSSKMKLRPT-YIDVYPNEEW 200
+
+Query: 235 LMKAVATVGPISVAIDAGHESFLFYKEGIY--FEPDCSSEDMDHGVLVVGYGFESTESDN 292
+               + T G     +     SF  YK GIY   + +C + +    + +VGYG +  E   
+Sbjct: 201 ARAHITTFGTGYFRM-RSPPSFFHYKTGIYNPTKEECGNANEARSLAIVGYGKDGAE--- 256
+
+Query: 293 NKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYP 331
+            KYW+VK S+G  WG  GY+K+A++  N CG+A + S P
+Sbjct: 257 -KYWIVKGSFGTSWGEHGYMKLARN-VNACGMAESISIP 293
+
+
+>Y71H2AR.2 CE22930    (ST.LOUIS) protein_id:AAK29985.1
+          Length = 345
+
+ Score =  119 bits (297), Expect = 3e-27
+ Identities = 79/219 (36%), Positives = 115/219 (52%), Gaps = 12/219 (5%)
+
+Query: 118 VDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKT-GRLISLSEQNLVDCSGPQGNE 176
+           +DWREKG V PVK+QG+C +  AF+ T ++E    + T G L+S SEQ L+DC+  QG +
+Sbjct: 86  LDWREKGIVGPVKDQGKCNASHAFAITSSIESMYAKATNGTLLSFSEQQLIDCN-DQGYK 144
+
+Query: 177 GCNGGLMDYAFQYVQDNGGLDSEESYPY-EATEESCKYNPKYSVANDTGFVDIPKQEKAL 235
+           GC       A  Y+  + G+++E  YPY + T E C ++   S  +    V     E   
+Sbjct: 145 GCEEQFAMNAIGYLATH-GIETEADYPYVDKTNEKCTFDSTKSKIHLKKGVVAEGNEVLG 203
+
+Query: 236 MKAVATVGPISVAIDAGHESFLFYKEGIYFE--PDCSSEDMDHGVLVVGYGFESTESDNN 293
+              V   GP    + A   S   YK GIY     +C+S      +++VGYG E  +    
+Sbjct: 204 KVYVTNYGPAFFTMRA-PPSLYDYKIGIYNPSIEECTSTHEIRSMVIVGYGIEGEQ---- 258
+
+Query: 294 KYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPT 332
+           KYW+VK S+G  WG  GY+K+A+D  N C +A+  +  T
+Sbjct: 259 KYWIVKGSFGTSWGEQGYMKLARD-VNACAMATTIAVLT 296
+
+
+>Y51A2D.1 CE18411   Cysteine proteases (2 domains) (HINXTON) TR:O62484
+           protein_id:CAA16404.1
+          Length = 382
+
+ Score =  105 bits (262), Expect = 4e-23
+ Identities = 95/353 (26%), Positives = 148/353 (41%), Gaps = 76/353 (21%)
+
+Query: 28  QWTKWKAMHNRLYGMNEEGWRRA---VWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMT 84
+           ++ ++K   +R Y    E   R    V  +N  ++ L+    + G++S   A+N F D+T
+Sbjct: 43  EFVEFKKKFSRTYKSEAENQLRLQNFVKSRN-NVVRLNKNAQKAGRNS-NFAVNQFSDLT 100
+
+Query: 85  SEEFRQVMNGF-----------QNRKPRKGKVFQEPLFYEAPRSVDWREKGY-----VTP 128
+           + E  Q ++ F           +N K   GK   +    E  R+ D R +       V P
+Sbjct: 101 TSELHQRLSRFPPNLTENSVFHKNFKKLLGKTRTKRQNSEFARNFDLRSQKVNGRYIVGP 160
+
+Query: 129 VKNQGQCGSCWAFSATGALEG------------------------------QMFRKTGRL 158
+           +KNQGQC  CW F+ T  LE                               +   K    
+Sbjct: 161 IKNQGQCACCWGFAVTAMLETIYAVNVGRFKLMSHIPALAPNFSDFDFFFFEFLAKLNMF 220
+
+Query: 159 ISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYS 218
+           +S S+Q + DC+      GC GG + +  +Y  +N GL SE  YP      + +     +
+Sbjct: 221 LSFSDQEMCDCATDGTKAGCAGGGLMWGVEYAINN-GLASEFDYPEFDQNRATRPGTCEA 279
+
+Query: 219 VANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCS-SEDMDHG 277
+           + +D                  T  P++ A  AG  +FL YK G+    DC  +  + H 
+Sbjct: 280 MDDD-----------------KTFPPVNFA--AG-TAFLQYKSGVLVTEDCDLAGTVWHA 319
+
+Query: 278 VLVVGYGFES-TESDNNKYWLVKNSWG-EEWGMGGYVKMAKDRRNHCGIASAA 328
+             +VGYG E+     + ++W++KNSWG   WG GGYVK+ +  +N CGI   A
+Sbjct: 320 GAIVGYGEENDLRGRSQRFWIMKNSWGVSGWGTGGYVKLIRG-KNWCGIERGA 371
+
+
+>F26E4.3 CE17714   cysteine protease (HINXTON) TR:P90850
+           protein_id:CAB03007.1
+          Length = 491
+
+ Score =  100 bits (250), Expect = 1e-21
+ Identities = 73/245 (29%), Positives = 110/245 (44%), Gaps = 35/245 (14%)
+
+Query: 113 EAPRSVDWREKG--YVTPVKNQGQCGSCWAFSATGALEGQM-FRKTGRLIS-LSEQNLVD 168
+           E P   D R+K    + PV +QG CGS W+ S T     ++     GR+ S LS Q L+ 
+Sbjct: 222 ELPEHFDARDKWGPLIHPVADQGDCGSSWSVSTTAISSDRLAIISEGRINSTLSSQQLLS 281
+
+Query: 169 CSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPY---EATEESCKYNPKYSVANDTGF 225
+           C+  +  +GC GG +D A+ Y++  G +  +  YPY   ++ E      PK    N  G 
+Sbjct: 282 CNQHR-QKGCEGGYLDRAWWYIRKLGVV-GDHCYPYVSGQSREPGHCLIPKRDYTNRQGL 339
+
+Query: 226 -----------------VDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPD 268
+                              +  +E+ +   + T GP+       HE F  Y  G+Y   D
+Sbjct: 340 RCPSGSQDSTAFKMTPPYKVSSREEDIQTELMTNGPVQATFVV-HEDFFMYAGGVYQHSD 398
+
+Query: 269 CSSE-------DMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNH 321
+            +++       +  H V V+G+G + +     KYWL  NSWG +WG  GY K+ +   NH
+Sbjct: 399 LAAQKGASSVAEGYHSVRVLGWGVDHSTGKPIKYWLCANSWGTQWGEDGYFKVLRG-ENH 457
+
+Query: 322 CGIAS 326
+           C I S
+Sbjct: 458 CEIES 462
+
+
+>Y113G7B.15 CE23295    (HINXTON) TR:Q9U2X1 protein_id:CAB54334.1
+          Length = 328
+
+ Score =  100 bits (248), Expect = 2e-21
+ Identities = 92/317 (29%), Positives = 130/317 (40%), Gaps = 37/317 (11%)
+
+Query: 44  EEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNG--------- 94
+           E+  R A + KN + I+  N + R    + T   N F D   +E     +          
+Sbjct: 12  EKDRRLAHFAKNHQKIQELNAKARREGRNVTFGWNKFADKNRQELSARNSKIHPKNHTDL 71
+
+Query: 95  --FQNRKPRKGKVFQEPLFY----EAPRSVDWRE-----KGYVTPVKNQGQCGSCWAFSA 143
+             ++ R PR  +            + P   D R+        V PVK+Q QCG CWAF+ 
+Sbjct: 72  PIYKPRHPRGSRNHHNKRSKRQSGDIPDYFDLRDIYVDGSPVVGPVKDQEQCGCCWAFAT 131
+
+Query: 144 TGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYP 203
+           T   E      +    SLS+Q + DC+      GC GG      + V    G  S+  YP
+Sbjct: 132 TAITEAANTLYSKSFTSLSDQEICDCADSGDTPGCVGGDPRNGLKMVHLR-GQSSDGDYP 190
+
+Query: 204 YEA----TEESCKYNPKYSV-----ANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHE 254
+           YE     T  +C  + K +V      N   F     +E  +        P +V    G E
+Sbjct: 191 YEEYRANTTGNCVGDEKSTVIQPETLNVYRFDQDYAEEDIMENLYLNHIPTAVYFRVG-E 249
+
+Query: 255 SFLFYKEGIYFEPDC--SSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYV 312
+           +F +Y  G+    DC   +    H V +VGYG   T  D   YWLV+NSW  +WG+ GYV
+Sbjct: 250 NFEWYTSGVLQSEDCYQMTPAEWHSVAIVGYG---TSDDGVPYWLVRNSWNSDWGLHGYV 306
+
+Query: 313 KMAKDRRNHCGIASAAS 329
+           K+ +   N C I S A+
+Sbjct: 307 KIRRG-VNWCLIESHAA 322
+
+
+>F15D4.4 CE28917   cysteine protease (HINXTON) TR:Q93512
+           protein_id:CAB02487.1
+          Length = 622
+
+ Score = 97.8 bits (242), Expect = 8e-21
+ Identities = 77/296 (26%), Positives = 127/296 (42%), Gaps = 39/296 (13%)
+
+Query: 44  EEGWRRA-VWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRK 102
+           +EG +R  V+ K  K ++ HN  Y  G  S+ M+ N F      E   +        P  
+Sbjct: 149 KEGLKRFNVYSKVKKEVDEHNIMYELGMSSYKMSTNQFSVALDGEVAPLTLNLDALTPTA 208
+
+Query: 103 GKV---FQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLI 159
+             +          +   +VDWR   ++ P+ +Q  CG CWAFS    +E     +     
+Sbjct: 209 TVIPATISSRKKRDTEPTVDWRP--FLKPILDQSTCGGCWAFSMISMIESFFAIQGYNTS 266
+
+Query: 160 SLSEQNLVDCSGP------QGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCK- 212
+           SLS Q L+ C           N GC GG    A  Y++ +   D+    P++  + SC  
+Sbjct: 267 SLSVQQLLTCDTKVDSTYGLANVGCKGGYFQIAGSYLEVSAARDA-SLIPFDLEDTSCDS 325
+
+Query: 213 ------------YNPKYSVANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYK 260
+                       ++  Y   N T    I  ++   ++     GPI+V + AG + +  Y 
+Sbjct: 326 SFFPPVVPTILLFDDGYISGNFTAAQLITMEQN--IEDKVRKGPIAVGMAAGPDIYK-YS 382
+
+Query: 261 EGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAK 316
+           EG+Y + DC +  ++H V++VG+         + YW+++NSWG  WG  GY ++ +
+Sbjct: 383 EGVY-DGDCGT-IINHAVVIVGF--------TDDYWIIRNSWGASWGEAGYFRVKR 428
+
+
+>C50F4.3 CE05468   thiol protease (HINXTON) TR:Q18740 protein_id:CAA94738.1
+          Length = 374
+
+ Score = 94.4 bits (233), Expect = 9e-20
+ Identities = 80/292 (27%), Positives = 125/292 (42%), Gaps = 36/292 (12%)
+
+Query: 63  NQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQ-----------NRKPRKGKVFQEPLF 111
+           N+  ++  H     +N F D++ +E   + + F            N K  + K   E L 
+Sbjct: 82  NKAAKKAGHDTKYGINKFSDLSKKEIHGMYSKFGPPKNNTNVPKFNLKNLRVKRQMEGL- 140
+
+Query: 112 YEAPRSVDWREKGY-----VTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNL 166
+              P++ D R K       + P+K Q  C  CW F+AT   E  +     + ++LSEQ +
+Sbjct: 141 ---PKTFDLRNKKVGGHYIIGPIKTQDSCACCWGFAATAVAEAALTVHLKKAMNLSEQEV 197
+
+Query: 167 VDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATE-------ESCKYNPKYS- 218
+            DC+ P+   GCNGG      +Y+++  GL   + YP+           ES KY+ + + 
+Sbjct: 198 CDCA-PKHGPGCNGGDPVDGLEYIKEM-GLTGGKEYPFNVNRSTQLGRCESEKYDRELNP 255
+
+Query: 219 VANDTGFVDIPKQEKALMKAVATVG-PISVAIDAGHESFLFYKEGIYFEPDCSSEDMD-- 275
+           +  D   +D    E  +   +  +  PISVA   G  S   Y  GI    DC  E     
+Sbjct: 256 LELDYYAIDPFNAEYQMTHHLYLLNLPISVAFRTG-ASLSSYLSGILELADCDDEKGGHW 314
+
+Query: 276 HGVLVVGYGFESTESDNN-KYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIAS 326
+           H   +VGYG     +     YW+ +NSW  +WG  GY ++ +   + C I S
+Sbjct: 315 HSGAIVGYGTTKNSAGRTVDYWIFRNSWWTDWGDDGYARIVRG-EDWCSIES 365
+
+
+>C32B5.7 CE08515   cathepsin-like peptidase (ST.LOUIS) TR:P91111
+           protein_id:AAB37963.1
+          Length = 250
+
+ Score = 94.0 bits (232), Expect = 1e-19
+ Identities = 63/191 (32%), Positives = 100/191 (51%), Gaps = 18/191 (9%)
+
+Query: 118 VDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEG 177
+           +DWR++G V PVK+QG C + +AF+A  A+E       G+L+S SEQ ++DC G    E 
+Sbjct: 72  LDWRDEGVVGPVKDQGNCNASYAFAAISAIESMYAIANGQLLSFSEQQIIDCLGGCAIES 131
+
+Query: 178 CNGGLMDYAFQYVQDNGGLDSEESYPYEATE-ESCKYNPK--YSVANDTGFVDIPKQEKA 234
+                M Y      +  G+++   YP+   + E C+Y+ K  Y + +DT   D+  +  A
+Sbjct: 132 DPMMAMTYL-----ERKGIETYTDYPFVGKKNEKCEYDSKKAYLILDDT--YDMSDESLA 184
+
+Query: 235 LMKAVATVGPISVAIDAGHESFLFYKEGIY--FEPDCSSEDMDHGVLVVGYGFESTESDN 292
+           L+  +   GP    ++    SF  YK GIY   E +C S +    + +VGYG +  ++  
+Sbjct: 185 LV-FIDERGPGLFTMNT-PPSFFNYKSGIYNPTEEECKSTNEKRALTIVGYGNDKGQN-- 240
+
+Query: 293 NKYWLVKNSWG 303
+             YW+VK S+G
+Sbjct: 241 --YWIVKGSFG 249
+
+
+>M04G12.2 CE12424   cysteine protease (HINXTON) TR:P92005
+           protein_id:CAB03209.1
+          Length = 467
+
+ Score = 92.0 bits (227), Expect = 4e-19
+ Identities = 82/288 (28%), Positives = 133/288 (45%), Gaps = 45/288 (15%)
+
+Query: 66  YREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRK-GKVFQE---PLFYEA------- 114
+           Y E      + M++  + +SEE+ +     +    +K GKVF+    P  +E+       
+Sbjct: 161 YYEPNDEALVDMSSESEESSEEWEEARPYLKCGCLKKSGKVFESKTAPREWESSSFKSND 220
+
+Query: 115 -PRSVDWREKG---YVTPVKNQG---QCGSCWAFSATGALEGQM-FRKTGR--LISLSEQ 164
+            P   DWR      Y +P +NQ     CGSCW F  TGAL  +    + GR  +  LS Q
+Sbjct: 221 LPTGWDWRNVSGVNYCSPTRNQHIPVYCGSCWVFGTTGALNDRFNVARKGRWPMTQLSPQ 280
+
+Query: 165 NLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEE---------SCKYNP 215
+            ++DC+G +GN  C GG +    ++ +  G L  E    Y AT           SC  N 
+Sbjct: 281 EIIDCNG-KGN--CQGGEIGNVLEHAKIQG-LVEEGCNVYRATNGECNPYHRCGSCWPNE 336
+
+Query: 216 KYSVANDTGFV-----DIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCS 270
+            +S+ N T +       +  ++K +M  +   GPI+ AI A  +    Y +G+Y E   S
+Sbjct: 337 CFSLTNYTRYYVKDYGQVQGRDK-IMSEIKKGGPIACAIGATKKFEYEYVKGVYSEK--S 393
+
+Query: 271 SEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDR 318
+             + +H + + G+G    + +  +YW+ +NSWGE WG  G+ ++   +
+Sbjct: 394 DLESNHIISLTGWG---VDENGVEYWIARNSWGEAWGELGWFRVVTSK 438
+
+
+>C52E4.1 CE08943  locus:cpr-1 cathepsin-like cysteine protease (HINXTON)
+           TR:Q18783 protein_id:CAB01410.1
+          Length = 340
+
+ Score = 88.6 bits (218), Expect = 5e-18
+ Identities = 66/251 (26%), Positives = 104/251 (41%), Gaps = 37/251 (14%)
+
+Query: 107 QEPLFYEAPRSVD----WREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKT--GRLIS 160
+           QE +    P + D    W E   +  +++Q  CGSCWAF A   +  +   +T   +   
+Sbjct: 89  QEVVLASVPATFDSRTQWSECKSIKLIRDQATCGSCWAFGAAEMISDRTCIETKGAQQPI 148
+
+Query: 161 LSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESY-----PYE---------- 205
+           +S  +L+ C G     GC GG    A ++    G +   + +     PY           
+Sbjct: 149 ISPDDLLSCCGSSCGNGCEGGYPIQALRWWDSKGVVTGGDYHGAGCKPYPIAPCTSGNCP 208
+
+Query: 206 -----ATEESCKYNPKYSVANDTGF----VDIPKQEKALMKAVATVGPISVAIDAGHESF 256
+                +   SC+     + A D  F      +PK   ++   +   GP+  A    +E F
+Sbjct: 209 ESKTPSCSMSCQSGYSTAYAKDKHFGVSAYAVPKNAASIQAEIYANGPVEAAFSV-YEDF 267
+
+Query: 257 LFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAK 316
+             YK G+Y +         H + ++G+G ES     + YWLV NSWG  WG  G+ K+ +
+Sbjct: 268 YKYKSGVY-KHTAGKYLGGHAIKIIGWGTES----GSPYWLVANSWGVNWGESGFFKIYR 322
+
+Query: 317 DRRNHCGIASA 327
+              + CGI SA
+Sbjct: 323 G-DDQCGIESA 332
+
+
+>F32B5.8 CE09855   cysteine proteinase (ST.LOUIS) TR:O01850
+           protein_id:AAB54210.1
+          Length = 427
+
+ Score = 88.2 bits (217), Expect = 6e-18
+ Identities = 85/288 (29%), Positives = 130/288 (44%), Gaps = 54/288 (18%)
+
+Query: 75  MAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLF---YEA--------PRSVDWREK 123
+           +A +A+G +     R   N  +    + G+VF+   +   YE         P++ DWR+ 
+Sbjct: 137 LASSAYGKVRKYSNRNRYN-LKGCYKQTGRVFEHKRYDRIYETEDFDSEDLPKTWDWRDA 195
+
+Query: 124 G---YVTPVKNQG---QCGSCWAFSATGALEGQMFRKTGRL---ISLSEQNLVDCSGP-- 172
+               Y +  +NQ     CGSCWAF AT AL  ++  K         LS Q ++DCSG   
+Sbjct: 196 NGINYASADRNQHIPQYCGSCWAFGATSALADRINIKRKNAWPQAYLSVQEVIDCSGAGT 255
+
+Query: 173 --QGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCK-YN-------------PK 216
+              G E   GG+  YA ++     G+  E    Y+A +  C  YN               
+Sbjct: 256 CVMGGEP--GGVYKYAHEH-----GIPHETCNNYQARDGKCDPYNRCGSCWPGECFSIKN 308
+
+Query: 217 YSVANDTGFVDIPKQEKALMKA-VATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMD 275
+           Y++   + +  +   EK  MKA +   GPI+  I A  ++F  Y  GIY E   + ED+D
+Sbjct: 309 YTLYKVSEYGTVHGYEK--MKAEIYHKGPIACGI-AATKAFETYAGGIYKE--VTDEDID 363
+
+Query: 276 HGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCG 323
+           H + V G+G +       +YW+ +NSWGE WG  G+ K+   +  + G
+Sbjct: 364 HIISVHGWGVD--HESGVEYWIGRNSWGEPWGEHGWFKIVTSQYKNAG 409
+
+
+>F57F5.1 CE05999   cysteine protease (HINXTON) TR:Q20950
+           protein_id:CAB00098.1
+          Length = 400
+
+ Score = 85.9 bits (211), Expect = 3e-17
+ Identities = 72/280 (25%), Positives = 114/280 (40%), Gaps = 51/280 (18%)
+
+Query: 89  RQVMNGFQNRKPRKGKVFQ--EPLFYEA--PRSVD----WREKGYVTPVKNQGQCGSCWA 140
+           +Q+M       P + +VF+   P   +A  P S D    W     ++ +++Q  CGSCWA
+Sbjct: 117 KQLMGAKMVEIPEEYRVFEMTHPEVEDAAVPDSFDSRTAWPNCPSISKIRDQSSCGSCWA 176
+
+Query: 141 FSATGALEGQ--MFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQY--------- 189
+            SA   +  +  +      ++S+S  ++  C G     GCNGG    A+++         
+Sbjct: 177 VSAAETISDRICIASNAKTILSISADDINACCGMVCGNGCNGGYPIEAWRHYVKKGYVTG 236
+
+Query: 190 --VQDNGGLD-------------------SEESYPYEATEESCKYNPKYSVANDTGF--- 225
+              QD  G                         YP +  E SC+     +   D  F   
+Sbjct: 237 GSYQDKTGCKPYPYPPCEHHVNGTHYKPCPSNMYPTDKCERSCQAGYALTYQQDLHFGQS 296
+
+Query: 226 -VDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYG 284
+              + K+   + K + T GP+ VA    +E F  Y  G+Y     +S    H V ++G+G
+Sbjct: 297 AYAVSKKAAEIQKEIMTHGPVEVAFTV-YEDFEHYSGGVYVHTAGASLG-GHAVKMLGWG 354
+
+Query: 285 FESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGI 324
+            +    +   YWL  NSW E+WG  GY ++ +   N CGI
+Sbjct: 355 VD----NGTPYWLCANSWNEDWGENGYFRIIRG-VNECGI 389
+
+
+>T10H4.12 CE27590  locus:cpr-3 protease (HINXTON) TR:Q9TW93
+           protein_id:CAB61024.2
+          Length = 370
+
+ Score = 77.4 bits (189), Expect = 1e-14
+ Identities = 80/345 (23%), Positives = 131/345 (37%), Gaps = 76/345 (22%)
+
+Query: 12  LGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGKH 71
+           +G +   +  DH    Q T W A HN +          + +E   K++++          
+Sbjct: 27  IGQSPQKVLVDHVNTVQ-TSWVAEHNEI----------SEFEMKFKVMDV---------- 65
+
+Query: 72  SFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYEAPRSVDWREK----GYVT 127
+            F   +    D+ SE F             +G++  EPL    P + D REK      + 
+Sbjct: 66  KFAEPLEKDSDVASELFV------------RGEIVPEPL----PDTFDAREKWPDCNTIK 109
+
+Query: 128 PVKNQGQCGSCWAFSATGALEGQMFRKTG--RLISLSEQNLVDCSGPQGNEGCNGGLMDY 185
+            ++NQ  CGSCWAF A   +  ++  ++   +   +S ++++ C G     GC GG    
+Sbjct: 110 LIRNQATCGSCWAFGAAEVISDRVCIQSNGTQQPVISVEDILSCCGTTCGYGCKGGYSIE 169
+
+Query: 186 AFQYVQDNGGLDSEE-----SYPY----------EATEESCK-----------YNPKYSV 219
+           A ++   +G +   +       PY          E+T  SCK           Y      
+Sbjct: 170 ALRFWASSGAVTGGDYGGHGCMPYSFAPCTKNCPESTTPSCKTTCQSSYKTEEYKKDKHY 229
+
+Query: 220 ANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVL 279
+                 V   K    +   +   GP+  +    +E F  YK G+Y           H V 
+Sbjct: 230 GASAYKVTTTKSVTEIQTEIYHYGPVEASYKV-YEDFYHYKSGVYHYTSGKLVG-GHAVK 287
+
+Query: 280 VVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGI 324
+           ++G+G E    +   YWL+ NSWG  +G  G+ K+ +   N C I
+Sbjct: 288 IIGWGVE----NGVDYWLIANSWGTSFGEKGFFKIRRG-TNECQI 327
+
+
+>F36D3.9 CE15973   cysteine protease (HINXTON) TR:O45466
+           protein_id:CAB04322.1
+          Length = 345
+
+ Score = 77.0 bits (188), Expect = 1e-14
+ Identities = 65/245 (26%), Positives = 100/245 (40%), Gaps = 36/245 (14%)
+
+Query: 109 PLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLIS--LSEQNL 166
+           PL ++A     W +   +  ++ Q  CGSCWAFS    +  +    +       +S  +L
+Sbjct: 102 PLNFDA--RTRWPQCKSMKLIREQSNCGSCWAFSTAEVISDRTCIASNGTQQPIISPTDL 159
+
+Query: 167 VDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEE-------SYPYEATEE---------- 209
+           + C G    EGC+GG    AFQ+    G +   +        YP                
+Sbjct: 160 LTCCGMSCGEGCDGGFPYRAFQWWARRGVVTGGDYLGTGCKPYPIRPCNSDNCVNLQTPP 219
+
+Query: 210 ---SCKYNPKYSVANDTGF-----VDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKE 261
+              SC+   + +  ND  +       +P+   A+   +   GP+ VA    +E F  YK 
+Sbjct: 220 CRLSCQPGYRTTYTNDKNYGSNSAYPVPRTVAAIQADIYYNGPV-VAAFIVYEDFEKYKS 278
+
+Query: 262 GIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNH 321
+           GIY      S+   H V ++G+G E        YWL  NSWG +WG  G  ++ +   + 
+Sbjct: 279 GIYRHIAGRSKG-GHAVKLIGWGTER----GTPYWLAVNSWGSQWGESGTFRILRG-VDE 332
+
+Query: 322 CGIAS 326
+           CGI S
+Sbjct: 333 CGIES 337
+
+
+>C25B8.3 CE04078  locus:cpr-6  (ST.LOUIS) protein_id:AAK39189.1
+          Length = 379
+
+ Score = 77.0 bits (188), Expect = 1e-14
+ Identities = 67/255 (26%), Positives = 106/255 (41%), Gaps = 49/255 (19%)
+
+Query: 113 EAPRSVD----WREKGYVTPVKNQGQCGSCWAFSATGALEGQM-FRKTGRL-ISLSEQNL 166
+           + P S D    W +   +  +++Q  CGSCWAF A  A+  ++     G L ++LS  +L
+Sbjct: 104 DIPESFDSRDNWPKCDSIKVIRDQSSCGSCWAFGAVEAMSDRICIASHGELQVTLSADDL 163
+
+Query: 167 VDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSE--------ESYPYEATEESCKYN---- 214
+           + C    G  GCNGG    A++Y   +G +           + YP+   E   K      
+Sbjct: 164 LSCCKSCGF-GCNGGDPLAAWRYWVKDGIVTGSNYTANNGCKPYPFPPCEHHSKKTHFDP 222
+
+Query: 215 --------PKYSVANDTGFVDIPKQE---------------KALMKAVATVGPISVAIDA 251
+                   PK      + + D    E               +A+ K + T GP+ +A + 
+Sbjct: 223 CPHDLYPTPKCEKKCVSDYTDKTYSEDKFFGASAYGVKDDVEAIQKELMTHGPLEIAFEV 282
+
+Query: 252 GHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGY 311
+            +E FL Y  G+Y           H V ++G+G +    D   YW V NSW  +WG  G+
+Sbjct: 283 -YEDFLNYDGGVYVHTG-GKLGGGHAVKLIGWGID----DGIPYWTVANSWNTDWGEDGF 336
+
+Query: 312 VKMAKDRRNHCGIAS 326
+            ++ +   + CGI S
+Sbjct: 337 FRILRG-VDECGIES 350
+
+
+>W07B8.4 CE14680   thiol protease (ST.LOUIS) TR:O16288 protein_id:AAB65345.1
+          Length = 335
+
+ Score = 75.9 bits (185), Expect = 3e-14
+ Identities = 66/249 (26%), Positives = 99/249 (39%), Gaps = 47/249 (18%)
+
+Query: 120 WREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLIS--LSEQNLVDCSGPQGN-- 175
+           W +   V  +++Q  CGSCWA +A  A+  +    +   ++  LS ++++ C   + N  
+Sbjct: 83  WPQCISVNNIRDQSHCGSCWAVAAAEAISDRTCIASNGDVNTLLSAEDILTCCTGKFNCG 142
+
+Query: 176 EGCNGGLMDYAFQYVQDNG---GLDSEESY---PYEAT---------------------- 207
+           +GC GG    A++Y   NG   G   E  Y   PY                         
+Sbjct: 143 DGCEGGYPIQAWRYWVKNGLVTGGSFESQYGCKPYSIAPCGETIDGVTWPECPMKISDTP 202
+
+Query: 208 --EESCKYNPKYSVAND------TGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFY 259
+             E  C  N  Y +  D           I +  K +   +   GP+ V     +E F  Y
+Sbjct: 203 KCEHHCTGNNSYPIPYDQDKHFGASAYAIGRSAKQIQTEILAHGPVEVGFIV-YEDFYLY 261
+
+Query: 260 KEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRR 319
+           K GIY       E   H V ++G+G ++       YWL  NSW   WG  GY ++ +   
+Sbjct: 262 KTGIYTHV-AGGELGGHAVKMLGWGVDN----GTPYWLAANSWNTVWGEKGYFRILRG-V 315
+
+Query: 320 NHCGIASAA 328
+           + CGI SAA
+Sbjct: 316 DECGIESAA 324
+
+
+>Y71H2AM.3 CE26272    (ST.LOUIS) protein_id:AAK29976.1
+          Length = 716
+
+ Score = 68.6 bits (166), Expect = 5e-12
+ Identities = 55/168 (32%), Positives = 81/168 (47%), Gaps = 23/168 (13%)
+
+Query: 118 VDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKT-GRLISLSEQNLVDCSGPQGNE 176
+           +DWR+KG V PVK+QG+C +  AF+ + ++E    + T G L+S SEQ L+DC    G +
+Sbjct: 86  LDWRDKGIVGPVKDQGKCNASHAFAISSSIESMYAKATNGSLLSFSEQQLIDCD-DHGFK 144
+
+Query: 177 GCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPKQEKALM 236
+           GC       A  Y   + G+++E  YPY   E          ++N+T       Q K L 
+Sbjct: 145 GCEEQPAINAVSYFIFH-GIETEADYPYAGKENG-------KLSNET-------QGKEL- 188
+
+Query: 237 KAVATVGPISVAIDAGHESFLFYKEGIYFE--PDCSSEDMDHGVLVVG 282
+             V   GP    + A   S   YK GIY     +C+S      +++VG
+Sbjct: 189 --VTNYGPAFFTMRA-PPSLYDYKIGIYNPSIEECTSTHEIRSMVIVG 233
+
+
+  Database: /data_2/jason/blastdb/wormpep62
+    Posted date:  Sep 3, 2001  2:17 PM
+  Number of letters in database: 8,813,425
+  Number of sequences in database:  20,085
+  
+Lambda     K      H
+   0.317    0.133    0.417 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 6230268
+Number of Sequences: 20085
+Number of extensions: 270881
+Number of successful extensions: 651
+Number of sequences better than 1.0e-10: 23
+Number of HSP's better than  0.0 without gapping: 4
+Number of HSP's successfully gapped in prelim test: 19
+Number of HSP's that attempted gapping in prelim test: 588
+Number of HSP's gapped (non-prelim): 27
+length of query: 333
+length of database: 8,813,425
+effective HSP length: 45
+effective length of query: 288
+effective length of database: 7,909,600
+effective search space: 2277964800
+effective search space used: 2277964800
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (21.6 bits)
+S2: 155 (64.3 bits)
+BLASTP 2.1.3 [Apr-11-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CATL_RAT
+         (334 letters)
+
+Database: /data_2/jason/blastdb/wormpep62
+           20,085 sequences; 8,813,425 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O4573...   325  2e-89
+F41E6.6 CE10254   cysteine protease and a protease inhibitor...   203  1e-52
+R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id...   192  2e-49
+R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810 pr...   139  2e-33
+Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4 pr...   131  5e-31
+
+>T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O45734
+           protein_id:CAB07275.1
+          Length = 337
+
+ Score =  325 bits (834), Expect = 2e-89
+ Identities = 159/311 (51%), Positives = 208/311 (66%), Gaps = 9/311 (2%)
+
+Query: 28  QWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEE 87
+           +W  +K    + Y  +EE+     + KNM  I+ HN ++  G+  F M +N   D+   +
+Sbjct: 31  KWDDYKEDFDKEYSESEEQTYMEAFVKNMIHIENHNRDHRLGRKTFEMGLNHIADLPFSQ 90
+
+Query: 88  FRQIVNGYRH----QKHKKGRLFQEPLMLQIPKTVDWREKGCVTPVKNQGQCGSCWAFSA 143
+           +R++ NGYR      + K    F  P  +Q+P  VDWR+   VT VKNQG CGSCWAFSA
+Sbjct: 91  YRKL-NGYRRLFGDSRIKNSSSFLAPFNVQVPDEVDWRDTHLVTDVKNQGMCGSCWAFSA 149
+
+Query: 144 SGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYP 203
+           +G LEGQ   K G+L+SLSEQNLVDCS   GN GCNGGLMD AF+YI++N G+D+EESYP
+Sbjct: 150 TGALEGQHARKLGQLVSLSEQNLVDCSTKYGNHGCNGGLMDQAFEYIRDNHGVDTEESYP 209
+
+Query: 204 YEAKDGSCKYRAEYAVANDTGFVDIPQ-QEKALMKAVATVGPISVAMDASHPSLQFYSSG 262
+           Y+ +D  C +  +   A+D G+VD P+  E+ L  AVAT GPIS+A+DA H S Q Y  G
+Sbjct: 210 YKGRDMKCHFNKKTVGADDKGYVDTPEGDEEQLKIAVATQGPISIAIDAGHRSFQLYKKG 269
+
+Query: 263 IYYEPNCSSKDLDHGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHC 322
+           +YY+  CSS++LDHGVL+VGY   GTD     YW+VKNSWG  WG  GYI+IA++RNNHC
+Sbjct: 270 VYYDEECSSEELDHGVLLVGY---GTDPEHGDYWIVKNSWGAGWGEKGYIRIARNRNNHC 326
+
+Query: 323 GLATAASYPIV 333
+           G+AT ASYP+V
+Sbjct: 327 GVATKASYPLV 337
+
+
+>F41E6.6 CE10254   cysteine protease and a protease inhibitor (ST.LOUIS)
+           TR:O16454 protein_id:AAB65956.1
+          Length = 498
+
+ Score =  203 bits (516), Expect = 1e-52
+ Identities = 122/331 (36%), Positives = 183/331 (54%), Gaps = 45/331 (13%)
+
+Query: 36  HRRLYGTNEEEWRR-AVWEKNMRMI-QLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIVN 93
+           H + Y    E  +R  V++KN ++I +L   E     +GFT     F DMT  EF++I+ 
+Sbjct: 181 HEKKYTNKREVLKRFRVFKKNAKVIRELQKNEQGTAVYGFTK----FSDMTTMEFKKIML 236
+
+Query: 94  GYRHQKH----KKGRLFQEPLMLQ---IPKTVDWREKGCVTPVKNQGQCGSCWAFSASGC 146
+            Y+ ++     ++    +  + +    +P++ DWREKG VT VKNQG CGSCWAFS +G 
+Sbjct: 237 PYQWEQPVYPMEQANFEKHDVTINEEDLPESFDWREKGAVTQVKNQGNCGSCWAFSTTGN 296
+
+Query: 147 LEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQ----YIKEN--------- 193
+           +EG  F+   KL+SLSEQ LVDC  D  +QGCNGGL   A++     + +N         
+Sbjct: 297 VEGAWFIAKNKLVSLSEQELVDC--DSMDQGCNGGLPSNAYKIGKFVVSDNYCFLVFYHK 354
+
+Query: 194 --------GGLDSEESYPYEAKDGSCKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPI 245
+                   GGL+ E++YPY+ +  +C    +       G V++P  E  + K + T GPI
+Sbjct: 355 TTKEIIRMGGLEPEDAYPYDGRGETCHLVRKDIAVYINGSVELPHDEVEMQKWLVTKGPI 414
+
+Query: 246 SVAMDASHPSLQFYSSGIY--YEPNCSSKDLDHGVLVVGYGYEGTDSNKDKYWLVKNSWG 303
+           S+ ++A+  +LQFY  G+   ++  C    L+HGVL+VGYG +G    +  YW+VKNSWG
+Sbjct: 415 SIGLNAN--TLQFYRHGVVHPFKIFCEPFMLNHGVLIVGYGKDG----RKPYWIVKNSWG 468
+
+Query: 304 KEWGMDGYIKIAKDRNNHCGLATAASYPIVN 334
+             WG  GY K+ + + N CG+   A+  +VN
+Sbjct: 469 PNWGEAGYFKLYRGK-NVCGVQEMATSALVN 498
+
+
+>R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id:AAC69091.2
+          Length = 383
+
+ Score =  192 bits (488), Expect = 2e-49
+ Identities = 116/310 (37%), Positives = 176/310 (56%), Gaps = 29/310 (9%)
+
+Query: 37  RRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYR 96
+           R+     E E+R  ++ +N+  I+    E  N   G  +++N F D T+EE +++V   +
+Sbjct: 91  RKYTSVEEFEYRYQIFLRNV--IEFEAEEERN--LGLDLDVNEFTDWTDEELQKMVQENK 146
+
+Query: 97  HQKHKKGRLFQEPLMLQI----PKTVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMF 152
+           + K+       E   L+     P ++DWRE+G +TP+KNQGQCGSCWAF+    +E Q  
+Sbjct: 147 YTKYDFDTPKFEGSYLETGVIRPASIDWREQGKLTPIKNQGQCGSCWAFATVASVEAQNA 206
+
+Query: 153 LKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCK 212
+           +K GKL+SLSEQ +VDC  D  N GC+GG   +A +++KEN GL+SE+ YPY A     K
+Sbjct: 207 IKKGKLVSLSEQEMVDC--DGRNNGCSGGYRPYAMKFVKEN-GLESEKEYPYSA----LK 259
+
+Query: 213 YRAEYAVANDTG-FVD----IPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYE- 266
+           +   +   NDT  F+D    +   E+ +   V T GP++  M+    ++  Y SGI+   
+Sbjct: 260 HDQCFLKENDTRVFIDDFRMLSNNEEDIANWVGTKGPVTFGMNVV-KAMYSYRSGIFNPS 318
+
+Query: 267 -PNCSSKDLD-HGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGL 324
+             +C+ K +  H + ++GYG EG    +  YW+VKNSWG  WG  GY ++A+  N+ CGL
+Sbjct: 319 VEDCTEKSMGAHALTIIGYGGEG----ESAYWIVKNSWGTSWGASGYFRLARGVNS-CGL 373
+
+Query: 325 ATAASYPIVN 334
+           A     PI+N
+Sbjct: 374 ANTVVAPIIN 383
+
+
+>R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810
+           protein_id:CAA89070.1
+          Length = 402
+
+ Score =  139 bits (351), Expect = 2e-33
+ Identities = 96/307 (31%), Positives = 154/307 (49%), Gaps = 36/307 (11%)
+
+Query: 40  YGTNEEEWRRAV----WEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIV--N 93
+           Y T++E  +R       ++N+    + N E+ + ++G     N   D T+EEF + +   
+Sbjct: 101 YATSQESLKRLNAYYNTDENIANWNIQN-EHGSAEYGH----NDMSDWTDEEFEKTLLPK 155
+
+Query: 94  GYRHQKHKKGRLFQEPLMLQI-----------PKTVDWREKGCVTPVKNQGQCGSCWAFS 142
+            +  + HK+   F EP+   +           P   DWR+K  +TPVK QGQCGSCWAF+
+Sbjct: 156 SFYKRLHKEAE-FIEPIPESLTAKKGESSSPFPDFFDWRDKNVITPVKAQGQCGSCWAFA 214
+
+Query: 143 ASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESY 202
+           ++  +E    +  G+  +LSEQ L+DC  D  +  C+GG  D AF+YI  N GL +    
+Sbjct: 215 STATVEAAWAIAHGEKRNLSEQTLLDC--DLVDNACDGGDEDKAFRYIHRN-GLANAVDL 271
+
+Query: 203 PYEA-KDGSCKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSS 261
+           PY A +   C     +          +   E +++  +   GP+++ M    P ++ Y  
+Sbjct: 272 PYVAHRQNGCAVNDHWNTTRIKAAYFLHHDEDSIINWLVNFGPVNIGMAVIQP-MRAYKG 330
+
+Query: 262 GIY--YEPNCSSKDLD-HGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMD-GYIKIAKD 317
+           G++   E  C ++ +  H +L+ GY   GT    +KYW+VKNSWG  WG++ GYI  A+ 
+Sbjct: 331 GVFTPSEYACKNEVIGLHALLITGY---GTSKTGEKYWIVKNSWGNTWGVEHGYIYFARG 387
+
+Query: 318 RNNHCGL 324
+             N CG+
+Sbjct: 388 -INACGI 393
+
+
+>Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4
+           protein_id:CAA22062.1
+          Length = 343
+
+ Score =  131 bits (330), Expect = 5e-31
+ Identities = 88/284 (30%), Positives = 152/284 (52%), Gaps = 24/284 (8%)
+
+Query: 48  RRAVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQ 107
+           R  ++ +N+ +++ +N E + GK   T E+N F D+T EE+++ +   +   H +  L  
+Sbjct: 71  RFTIFSRNLDLVERYNKEDA-GK--VTYELNDFSDLTEEEWKKYLMTPKPD-HSEKSLKP 126
+
+Query: 108 EPLM--LQIPKTVDWRE---KGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLS 162
+           + L+    +P +VDWR       VT +K QG CGSCWAF+ +  +E  + +  G L SLS
+Sbjct: 127 KTLIDKKNLPNSVDWRNVNGTNHVTGIKYQGPCGSCWAFATAAAIESAVSISGGGLQSLS 186
+
+Query: 163 EQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCKYRAEYAVAND 222
+            Q L+DC+    +  C GG    A +Y + + G+ +  +YPY      C+      VA  
+Sbjct: 187 SQQLLDCT--VVSDKCGGGEPVEALKYAQSH-GITTAHNYPYYFWTTKCRETVP-TVARI 242
+
+Query: 223 TGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVG 282
+           + ++   + E  + + VA  GP+ V  + +    +FY SGI  +P+C ++   H ++V+G
+Sbjct: 243 SSWMK-AESEDEMAQIVALNGPMIVCANFATNKNRFYHSGIAEDPDCGTEP-THALIVIG 300
+
+Query: 283 YGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLAT 326
+           YG +        YW++KN++ K WG  GY+++ +D  N CG+ T
+Sbjct: 301 YGPD--------YWILKNTYSKVWGEKGYMRVKRD-VNWCGINT 335
+
+
+>K02E7.10 CE11640   protease (ST.LOUIS) TR:O17255 protein_id:AAB71030.1
+          Length = 299
+
+ Score =  128 bits (321), Expect = 6e-30
+ Identities = 81/222 (36%), Positives = 125/222 (55%), Gaps = 18/222 (8%)
+
+Query: 118 VDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLK--TGKLISLSEQNLVDCSHDQGN 175
+           +DWREKG V PVK+QG+C + +AF+A   +E  M+ K   GKL+S SEQ ++DC++    
+Sbjct: 84  LDWREKGIVGPVKDQGKCNASYAFAAIAAIE-SMYAKANNGKLLSFSEQQIIDCAN--FT 140
+
+Query: 176 QGCNGGLMD-FAFQYIKENGGLDSEESYPYEAKD--GSCKYRAEYAVANDTGFVDIPQQE 232
+             C   L +  + +++KEN G+ +E  YPY  K+  G C+Y +       T ++D+   E
+Sbjct: 141 NPCQENLENVLSNRFLKEN-GVGTEADYPYVGKENVGKCEYDSSKMKLRPT-YIDVYPNE 198
+
+Query: 233 KALMKAVATVGPISVAMDASHPSLQFYSSGIY--YEPNCSSKDLDHGVLVVGYGYEGTDS 290
+           +     + T G     M  S PS   Y +GIY   +  C + +    + +VGYG +G   
+Sbjct: 199 EWARAHITTFGTGYFRM-RSPPSFFHYKTGIYNPTKEECGNANEARSLAIVGYGKDGA-- 255
+
+Query: 291 NKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPI 332
+             +KYW+VK S+G  WG  GY+K+A++  N CG+A + S PI
+Sbjct: 256 --EKYWIVKGSFGTSWGEHGYMKLARN-VNACGMAESISIPI 294
+
+
+>Y71H2AR.2 CE22930    (ST.LOUIS) protein_id:AAK29985.1
+          Length = 345
+
+ Score =  120 bits (301), Expect = 1e-27
+ Identities = 81/214 (37%), Positives = 114/214 (52%), Gaps = 14/214 (6%)
+
+Query: 118 VDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLK--TGKLISLSEQNLVDCSHDQGN 175
+           +DWREKG V PVK+QG+C +  AF+ +  +E  M+ K   G L+S SEQ L+DC +DQG 
+Sbjct: 86  LDWREKGIVGPVKDQGKCNASHAFAITSSIE-SMYAKATNGTLLSFSEQQLIDC-NDQGY 143
+
+Query: 176 QGCNGGLMDFAFQYIKENGGLDSEESYPYEAK-DGSCKYRAEYAVANDTGFVDIPQQEKA 234
+           +GC       A  Y+  + G+++E  YPY  K +  C + +  +  +    V     E  
+Sbjct: 144 KGCEEQFAMNAIGYLATH-GIETEADYPYVDKTNEKCTFDSTKSKIHLKKGVVAEGNEVL 202
+
+Query: 235 LMKAVATVGPISVAMDASHPSLQFYSSGIYYE--PNCSSKDLDHGVLVVGYGYEGTDSNK 292
+               V   GP    M A  PSL  Y  GIY      C+S      +++VGYG EG    +
+Sbjct: 203 GKVYVTNYGPAFFTMRAP-PSLYDYKIGIYNPSIEECTSTHEIRSMVIVGYGIEG----E 257
+
+Query: 293 DKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLAT 326
+            KYW+VK S+G  WG  GY+K+A+D  N C +AT
+Sbjct: 258 QKYWIVKGSFGTSWGEQGYMKLARD-VNACAMAT 290
+
+
+>Y51A2D.8 CE19204   Cysteine proteases (2 domains) (HINXTON) TR:Q9XXQ7
+           protein_id:CAA16407.1
+          Length = 386
+
+ Score =  108 bits (271), Expect = 4e-24
+ Identities = 64/203 (31%), Positives = 99/203 (48%), Gaps = 11/203 (5%)
+
+Query: 126 VTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDF 185
+           V P+K+QGQC  CW F+ +  +E      +GK  SLS+Q + DC   +G  GC GG +  
+Sbjct: 164 VGPIKDQGQCACCWGFAVTALVETVYAAHSGKFKSLSDQEVCDCG-TEGTPGCKGGSLTL 222
+
+Query: 186 AFQYIKENGGLDSEESYPYE---AKDG-SCKYRAEYAVANDTGF---VDIPQQEKALMKA 238
+             QY+K+  GL  +E YPY+   A  G  C+ R    +     F   V  P++ +  +  
+Sbjct: 223 GVQYVKKY-GLSGDEDYPYDQNRANQGRRCRLRETDRIVPARAFNFAVINPRRAEEQIIQ 281
+
+Query: 239 VATVGPISVAMDAS-HPSLQFYSSGIYYEPNCSSKDLDHGVLVVGYG-YEGTDSNKDKYW 296
+           V T   + VA+        + Y  G+  E +C      H   +VGY   E +      YW
+Sbjct: 282 VLTEWKVPVAVYFKVGDQFKEYKEGVIIEDDCRRATQWHAGAIVGYDTVEDSRGRSHDYW 341
+
+Query: 297 LVKNSWGKEWGMDGYIKIAKDRN 319
+           ++KNSWG +W   GY+++ + R+
+Sbjct: 342 IIKNSWGGDWAESGYVRVVRGRD 364
+
+
+>Y113G7B.15 CE23295    (HINXTON) TR:Q9U2X1 protein_id:CAB54334.1
+          Length = 328
+
+ Score = 99.4 bits (246), Expect = 3e-21
+ Identities = 87/321 (27%), Positives = 127/321 (39%), Gaps = 47/321 (14%)
+
+Query: 36  HRRLYGTNEEEWRR-AVWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNG 94
+           H++ Y T  E+ RR A + KN + IQ  N +        T   N F D   +E     N 
+Sbjct: 3   HKKHYRTPAEKDRRLAHFAKNHQKIQELNAKARREGRNVTFGWNKFADKNRQEL-SARNS 61
+
+Query: 95  YRHQKHKKGRLFQEPLMLQ----------------IPKTVDWRE-----KGCVTPVKNQG 133
+             H K+       +P   +                IP   D R+        V PVK+Q 
+Sbjct: 62  KIHPKNHTDLPIYKPRHPRGSRNHHNKRSKRQSGDIPDYFDLRDIYVDGSPVVGPVKDQE 121
+
+Query: 134 QCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKEN 193
+           QCG CWAF+ +   E    L +    SLS+Q + DC+      GC GG      + +   
+Sbjct: 122 QCGCCWAFATTAITEAANTLYSKSFTSLSDQEICDCADSGDTPGCVGGDPRNGLKMVHLR 181
+
+Query: 194 GGLDSEESYPYEA----KDGSCKYRAEYAVAN---------DTGFVDIPQQEKALMKAVA 240
+            G  S+  YPYE       G+C    +  V           D  + +    E   +  + 
+Sbjct: 182 -GQSSDGDYPYEEYRANTTGNCVGDEKSTVIQPETLNVYRFDQDYAEEDIMENLYLNHIP 240
+
+Query: 241 TVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLD--HGVLVVGYGYEGTDSNKDKYWLV 298
+           T     V       + ++Y+SG+    +C        H V +VGY   GT  +   YWLV
+Sbjct: 241 TAVYFRVG-----ENFEWYTSGVLQSEDCYQMTPAEWHSVAIVGY---GTSDDGVPYWLV 292
+
+Query: 299 KNSWGKEWGMDGYIKIAKDRN 319
+           +NSW  +WG+ GY+KI +  N
+Sbjct: 293 RNSWNSDWGLHGYVKIRRGVN 313
+
+
+>C50F4.3 CE05468   thiol protease (HINXTON) TR:Q18740 protein_id:CAA94738.1
+          Length = 374
+
+ Score = 98.2 bits (243), Expect = 6e-21
+ Identities = 80/270 (29%), Positives = 119/270 (43%), Gaps = 27/270 (10%)
+
+Query: 71  HGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKG-------RLFQEPLMLQIPKTVDWREK 123
+           H     +N F D++ +E   + + +   K+           L  +  M  +PKT D R K
+Sbjct: 90  HDTKYGINKFSDLSKKEIHGMYSKFGPPKNNTNVPKFNLKNLRVKRQMEGLPKTFDLRNK 149
+
+Query: 124 GC-----VTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGC 178
+                  + P+K Q  C  CW F+A+   E  + +   K ++LSEQ + DC+   G  GC
+Sbjct: 150 KVGGHYIIGPIKTQDSCACCWGFAATAVAEAALTVHLKKAMNLSEQEVCDCAPKHG-PGC 208
+
+Query: 179 NGGLMDFAFQYIKENGGLDSEESYPYEAKD----GSC---KYRAEY-AVANDTGFVDIPQ 230
+           NGG      +YIKE  GL   + YP+        G C   KY  E   +  D   +D   
+Sbjct: 209 NGGDPVDGLEYIKEM-GLTGGKEYPFNVNRSTQLGRCESEKYDRELNPLELDYYAIDPFN 267
+
+Query: 231 QEKALMKAVATVG-PISVAMDASHPSLQFYSSGIYYEPNCSSKDLD--HGVLVVGYGYEG 287
+            E  +   +  +  PISVA   +  SL  Y SGI    +C  +     H   +VGYG   
+Sbjct: 268 AEYQMTHHLYLLNLPISVAF-RTGASLSSYLSGILELADCDDEKGGHWHSGAIVGYGTTK 326
+
+Query: 288 TDSNKD-KYWLVKNSWGKEWGMDGYIKIAK 316
+             + +   YW+ +NSW  +WG DGY +I +
+Sbjct: 327 NSAGRTVDYWIFRNSWWTDWGDDGYARIVR 356
+
+
+>F26E4.3 CE17714   cysteine protease (HINXTON) TR:P90850
+           protein_id:CAB03007.1
+          Length = 491
+
+ Score = 97.8 bits (242), Expect = 8e-21
+ Identities = 68/241 (28%), Positives = 113/241 (46%), Gaps = 35/241 (14%)
+
+Query: 113 QIPKTVDWREKG--CVTPVKNQGQCGSCWAFSASGCLEGQM-FLKTGKLIS-LSEQNLVD 168
+           ++P+  D R+K    + PV +QG CGS W+ S +     ++  +  G++ S LS Q L+ 
+Sbjct: 222 ELPEHFDARDKWGPLIHPVADQGDCGSSWSVSTTAISSDRLAIISEGRINSTLSSQQLLS 281
+
+Query: 169 CSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEA----KDGSCKY----------- 213
+           C+  +  +GC GG +D A+ YI++ G +  +  YPY +    + G C             
+Sbjct: 282 CNQHR-QKGCEGGYLDRAWWYIRKLGVV-GDHCYPYVSGQSREPGHCLIPKRDYTNRQGL 339
+
+Query: 214 RAEYAVANDTGFVDIP-----QQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPN 268
+           R      + T F   P      +E+ +   + T GP+       H     Y+ G+Y   +
+Sbjct: 340 RCPSGSQDSTAFKMTPPYKVSSREEDIQTELMTNGPVQATF-VVHEDFFMYAGGVYQHSD 398
+
+Query: 269 CSSK-------DLDHGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNH 321
+            +++       +  H V V+G+G + +     KYWL  NSWG +WG DGY K+ +   NH
+Sbjct: 399 LAAQKGASSVAEGYHSVRVLGWGVDHSTGKPIKYWLCANSWGTQWGEDGYFKVLRG-ENH 457
+
+Query: 322 C 322
+           C
+Sbjct: 458 C 458
+
+
+>F15D4.4 CE28917   cysteine protease (HINXTON) TR:Q93512
+           protein_id:CAB02487.1
+          Length = 622
+
+ Score = 96.7 bits (239), Expect = 2e-20
+ Identities = 65/219 (29%), Positives = 102/219 (45%), Gaps = 35/219 (15%)
+
+Query: 117 TVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDC------S 170
+           TVDWR    + P+ +Q  CG CWAFS    +E    ++     SLS Q L+ C      +
+Sbjct: 226 TVDWRP--FLKPILDQSTCGGCWAFSMISMIESFFAIQGYNTSSLSVQQLLTCDTKVDST 283
+
+Query: 171 HDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCK-------------YRAEY 217
+           +   N GC GG    A  Y++ +   D+    P++ +D SC              +   Y
+Sbjct: 284 YGLANVGCKGGYFQIAGSYLEVSAARDAS-LIPFDLEDTSCDSSFFPPVVPTILLFDDGY 342
+
+Query: 218 AVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHG 277
+              N T    I  ++    K     GPI+V M A+ P +  YS G+Y + +C +  ++H 
+Sbjct: 343 ISGNFTAAQLITMEQNIEDKV--RKGPIAVGM-AAGPDIYKYSEGVY-DGDCGTI-INHA 397
+
+Query: 278 VLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAK 316
+           V++VG+         D YW+++NSWG  WG  GY ++ +
+Sbjct: 398 VVIVGF--------TDDYWIIRNSWGASWGEAGYFRVKR 428
+
+
+>C32B5.7 CE08515   cathepsin-like peptidase (ST.LOUIS) TR:P91111
+           protein_id:AAB37963.1
+          Length = 250
+
+ Score = 90.1 bits (222), Expect = 2e-18
+ Identities = 63/191 (32%), Positives = 98/191 (50%), Gaps = 18/191 (9%)
+
+Query: 118 VDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQG 177
+           +DWR++G V PVK+QG C + +AF+A   +E    +  G+L+S SEQ ++DC    G   
+Sbjct: 72  LDWRDEGVVGPVKDQGNCNASYAFAAISAIESMYAIANGQLLSFSEQQIIDC---LGGCA 128
+
+Query: 178 CNGGLMDFAFQYIKENGGLDSEESYPYEA-KDGSCKY--RAEYAVANDTGFVDIPQQEKA 234
+                M  A  Y+ E  G+++   YP+   K+  C+Y  +  Y + +DT   D+  +  A
+Sbjct: 129 IESDPM-MAMTYL-ERKGIETYTDYPFVGKKNEKCEYDSKKAYLILDDT--YDMSDESLA 184
+
+Query: 235 LMKAVATVGPISVAMDASHPSLQFYSSGIY--YEPNCSSKDLDHGVLVVGYGYEGTDSNK 292
+           L+  +   GP    M+ + PS   Y SGIY   E  C S +    + +VGYG    +   
+Sbjct: 185 LV-FIDERGPGLFTMN-TPPSFFNYKSGIYNPTEEECKSTNEKRALTIVGYG----NDKG 238
+
+Query: 293 DKYWLVKNSWG 303
+             YW+VK S+G
+Sbjct: 239 QNYWIVKGSFG 249
+
+
+>Y51A2D.1 CE18411   Cysteine proteases (2 domains) (HINXTON) TR:O62484
+           protein_id:CAA16404.1
+          Length = 382
+
+ Score = 87.8 bits (216), Expect = 8e-18
+ Identities = 87/350 (24%), Positives = 139/350 (38%), Gaps = 76/350 (21%)
+
+Query: 31  QWKSTHRRLYGTNEEEWRRA---VWEKNMRMIQLHNGEYSNGKHGFTMEMNAFGDMTNEE 87
+           ++K    R Y +  E   R    V  +N  +++L+      G++     +N F D+T  E
+Sbjct: 46  EFKKKFSRTYKSEAENQLRLQNFVKSRN-NVVRLNKNAQKAGRNS-NFAVNQFSDLTTSE 103
+
+Query: 88  FRQIV---------NGYRHQKHKK--GRLFQEPLMLQIPKTVDWREKGC-----VTPVKN 131
+             Q +         N   H+  KK  G+   +    +  +  D R +       V P+KN
+Sbjct: 104 LHQRLSRFPPNLTENSVFHKNFKKLLGKTRTKRQNSEFARNFDLRSQKVNGRYIVGPIKN 163
+
+Query: 132 QGQCGSCWAFSASGCLEG------------------------------QMFLKTGKLISL 161
+           QGQC  CW F+ +  LE                               +   K    +S 
+Sbjct: 164 QGQCACCWGFAVTAMLETIYAVNVGRFKLMSHIPALAPNFSDFDFFFFEFLAKLNMFLSF 223
+
+Query: 162 SEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCKYRAEYAVAN 221
+           S+Q + DC+ D    GC GG + +  +Y   N GL SE  YP   ++ + +     A+ +
+Sbjct: 224 SDQEMCDCATDGTKAGCAGGGLMWGVEY-AINNGLASEFDYPEFDQNRATRPGTCEAMDD 282
+
+Query: 222 DTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCS-SKDLDHGVLV 280
+           D                  T  P++ A  A    LQ Y SG+    +C  +  + H   +
+Sbjct: 283 D-----------------KTFPPVNFA--AGTAFLQ-YKSGVLVTEDCDLAGTVWHAGAI 322
+
+Query: 281 VGYGYEG-TDSNKDKYWLVKNSWG-KEWGMDGYIKIAKDRNNHCGLATAA 328
+           VGYG E        ++W++KNSWG   WG  GY+K+ + + N CG+   A
+Sbjct: 323 VGYGEENDLRGRSQRFWIMKNSWGVSGWGTGGYVKLIRGK-NWCGIERGA 371
+
+
+>C52E4.1 CE08943  locus:cpr-1 cathepsin-like cysteine protease (HINXTON)
+           TR:Q18783 protein_id:CAB01410.1
+          Length = 340
+
+ Score = 87.4 bits (215), Expect = 1e-17
+ Identities = 66/252 (26%), Positives = 110/252 (43%), Gaps = 39/252 (15%)
+
+Query: 107 QEPLMLQIPKTVD----WREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKT--GKLIS 160
+           QE ++  +P T D    W E   +  +++Q  CGSCWAF A+  +  +  ++T   +   
+Sbjct: 89  QEVVLASVPATFDSRTQWSECKSIKLIRDQATCGSCWAFGAAEMISDRTCIETKGAQQPI 148
+
+Query: 161 LSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESY-----PY----------- 204
+           +S  +L+ C       GC GG    A ++    G +   + +     PY           
+Sbjct: 149 ISPDDLLSCCGSSCGNGCEGGYPIQALRWWDSKGVVTGGDYHGAGCKPYPIAPCTSGNCP 208
+
+Query: 205 EAKDGSCKYRAEY----AVANDTGF----VDIPQQEKALMKAVATVGPISVAMDASHPSL 256
+           E+K  SC    +     A A D  F      +P+   ++   +   GP+  A    +   
+Sbjct: 209 ESKTPSCSMSCQSGYSTAYAKDKHFGVSAYAVPKNAASIQAEIYANGPVEAAFSV-YEDF 267
+
+Query: 257 QFYSSGIYYEPNCSSKDLD-HGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIA 315
+             Y SG+Y   + + K L  H + ++G+G E    +   YWLV NSWG  WG  G+ KI 
+Sbjct: 268 YKYKSGVY--KHTAGKYLGGHAIKIIGWGTE----SGSPYWLVANSWGVNWGESGFFKIY 321
+
+Query: 316 KDRNNHCGLATA 327
+           +  ++ CG+ +A
+Sbjct: 322 RG-DDQCGIESA 332
+
+
+>F32B5.8 CE09855   cysteine proteinase (ST.LOUIS) TR:O01850
+           protein_id:AAB54210.1
+          Length = 427
+
+ Score = 85.9 bits (211), Expect = 3e-17
+ Identities = 73/261 (27%), Positives = 123/261 (46%), Gaps = 36/261 (13%)
+
+Query: 88  FRQIVNGYRHQKHKKGRLFQEPLMLQIPKTVDWREKGCVTPV---KNQG---QCGSCWAF 141
+           ++Q    + H+++ +    ++     +PKT DWR+   +      +NQ     CGSCWAF
+Sbjct: 160 YKQTGRVFEHKRYDRIYETEDFDSEDLPKTWDWRDANGINYASADRNQHIPQYCGSCWAF 219
+
+Query: 142 SASGCLEGQMFLKTGKL---ISLSEQNLVDCSHDQGNQGC-NGGLMDFAFQYIKENGGLD 197
+            A+  L  ++ +K         LS Q ++DCS   G   C  GG     ++Y  E+G + 
+Sbjct: 220 GATSALADRINIKRKNAWPQAYLSVQEVIDCS---GAGTCVMGGEPGGVYKYAHEHG-IP 275
+
+Query: 198 SEESYPYEAKDGSCK-YRA-------------EYAVANDTGFVDIPQQEKALMKA-VATV 242
+            E    Y+A+DG C  Y                Y +   + +  +   EK  MKA +   
+Sbjct: 276 HETCNNYQARDGKCDPYNRCGSCWPGECFSIKNYTLYKVSEYGTVHGYEK--MKAEIYHK 333
+
+Query: 243 GPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVGYGYEGTDSNKDKYWLVKNSW 302
+           GPI+  + A+  + + Y+ GIY E   + +D+DH + V G+G +    +  +YW+ +NSW
+Sbjct: 334 GPIACGIAATK-AFETYAGGIYKE--VTDEDIDHIISVHGWGVD--HESGVEYWIGRNSW 388
+
+Query: 303 GKEWGMDGYIKIAKDRNNHCG 323
+           G+ WG  G+ KI   +  + G
+Sbjct: 389 GEPWGEHGWFKIVTSQYKNAG 409
+
+
+>M04G12.2 CE12424   cysteine protease (HINXTON) TR:P92005
+           protein_id:CAB03209.1
+          Length = 467
+
+ Score = 83.2 bits (204), Expect = 2e-16
+ Identities = 62/228 (27%), Positives = 107/228 (46%), Gaps = 33/228 (14%)
+
+Query: 114 IPKTVDWREKGCV---TPVKNQG---QCGSCWAFSASGCLEGQMFL-KTGK--LISLSEQ 164
+           +P   DWR    V   +P +NQ     CGSCW F  +G L  +  + + G+  +  LS Q
+Sbjct: 221 LPTGWDWRNVSGVNYCSPTRNQHIPVYCGSCWVFGTTGALNDRFNVARKGRWPMTQLSPQ 280
+
+Query: 165 NLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLD---------SEESYPYEAKDGSCKYRA 215
+            ++DC+   G   C GG +    ++ K  G ++         + E  PY  + GSC    
+Sbjct: 281 EIIDCN---GKGNCQGGEIGNVLEHAKIQGLVEEGCNVYRATNGECNPYH-RCGSCWPNE 336
+
+Query: 216 EYAVANDTGFV-----DIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCS 270
+            +++ N T +       +  ++K +M  +   GPI+ A+ A+      Y  G+Y E   S
+Sbjct: 337 CFSLTNYTRYYVKDYGQVQGRDK-IMSEIKKGGPIACAIGATKKFEYEYVKGVYSEK--S 393
+
+Query: 271 SKDLDHGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDR 318
+             + +H + + G+G    D N  +YW+ +NSWG+ WG  G+ ++   +
+Sbjct: 394 DLESNHIISLTGWG---VDENGVEYWIARNSWGEAWGELGWFRVVTSK 438
+
+
+>Y71H2AM.3 CE26272    (ST.LOUIS) protein_id:AAK29976.1
+          Length = 716
+
+ Score = 75.5 bits (184), Expect = 4e-14
+ Identities = 60/169 (35%), Positives = 83/169 (48%), Gaps = 25/169 (14%)
+
+Query: 118 VDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLK--TGKLISLSEQNLVDCSHDQGN 175
+           +DWR+KG V PVK+QG+C +  AF+ S  +E  M+ K   G L+S SEQ L+DC  D G 
+Sbjct: 86  LDWRDKGIVGPVKDQGKCNASHAFAISSSIE-SMYAKATNGSLLSFSEQQLIDCD-DHGF 143
+
+Query: 176 QGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGSCKYRAEYAVANDTGFVDIPQQEKAL 235
+           +GC       A  Y   + G+++E  YPY  K+          ++N+T       Q K L
+Sbjct: 144 KGCEEQPAINAVSYFIFH-GIETEADYPYAGKENG-------KLSNET-------QGKEL 188
+
+Query: 236 MKAVATVGPISVAMDASHPSLQFYSSGIYYE--PNCSSKDLDHGVLVVG 282
+              V   GP    M A  PSL  Y  GIY      C+S      +++VG
+Sbjct: 189 ---VTNYGPAFFTMRAP-PSLYDYKIGIYNPSIEECTSTHEIRSMVIVG 233
+
+
+>T10H4.12 CE27590  locus:cpr-3 protease (HINXTON) TR:Q9TW93
+           protein_id:CAB61024.2
+          Length = 370
+
+ Score = 74.7 bits (182), Expect = 7e-14
+ Identities = 60/250 (24%), Positives = 102/250 (40%), Gaps = 42/250 (16%)
+
+Query: 102 KGRLFQEPLMLQIPKTVDWREK----GCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTG- 156
+           +G +  EPL    P T D REK      +  ++NQ  CGSCWAF A+  +  ++ +++  
+Sbjct: 84  RGEIVPEPL----PDTFDAREKWPDCNTIKLIRNQATCGSCWAFGAAEVISDRVCIQSNG 139
+
+Query: 157 -KLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEE-----SYPY------ 204
+            +   +S ++++ C       GC GG    A ++   +G +   +       PY      
+Sbjct: 140 TQQPVISVEDILSCCGTTCGYGCKGGYSIEALRFWASSGAVTGGDYGGHGCMPYSFAPCT 199
+
+Query: 205 ----EAKDGSCK-----------YRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAM 249
+               E+   SCK           Y+ +         V   +    +   +   GP+  + 
+Sbjct: 200 KNCPESTTPSCKTTCQSSYKTEEYKKDKHYGASAYKVTTTKSVTEIQTEIYHYGPVEASY 259
+
+Query: 250 DASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVGYGYEGTDSNKDKYWLVKNSWGKEWGMD 309
+              +     Y SG+Y+  +       H V ++G+G E    N   YWL+ NSWG  +G  
+Sbjct: 260 KV-YEDFYHYKSGVYHYTSGKLVG-GHAVKIIGWGVE----NGVDYWLIANSWGTSFGEK 313
+
+Query: 310 GYIKIAKDRN 319
+           G+ KI +  N
+Sbjct: 314 GFFKIRRGTN 323
+
+
+>F36D3.9 CE15973   cysteine protease (HINXTON) TR:O45466
+           protein_id:CAB04322.1
+          Length = 345
+
+ Score = 71.6 bits (174), Expect = 6e-13
+ Identities = 63/235 (26%), Positives = 98/235 (40%), Gaps = 40/235 (17%)
+
+Query: 120 WREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLIS--LSEQNLVDCSHDQGNQG 177
+           W +   +  ++ Q  CGSCWAFS +  +  +  + +       +S  +L+ C      +G
+Sbjct: 111 WPQCKSMKLIREQSNCGSCWAFSTAEVISDRTCIASNGTQQPIISPTDLLTCCGMSCGEG 170
+
+Query: 178 CNGGLMDFAFQYIKENGGLDSEE-------SYPYEAKDG-------------SCK--YRA 215
+           C+GG    AFQ+    G +   +        YP    +              SC+  YR 
+Sbjct: 171 CDGGFPYRAFQWWARRGVVTGGDYLGTGCKPYPIRPCNSDNCVNLQTPPCRLSCQPGYRT 230
+
+Query: 216 EYAVANDTGF-----VDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCS 270
+            Y   ND  +       +P+   A+   +   GP+ VA    +   + Y SGIY      
+Sbjct: 231 TYT--NDKNYGSNSAYPVPRTVAAIQADIYYNGPV-VAAFIVYEDFEKYKSGIYRHIAGR 287
+
+Query: 271 SKDLDHGVLVVGYGYE-GTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGL 324
+           SK   H V ++G+G E GT      YWL  NSWG +WG  G  +I +   + CG+
+Sbjct: 288 SKG-GHAVKLIGWGTERGTP-----YWLAVNSWGSQWGESGTFRILRG-VDECGI 335
+
+
+  Database: /data_2/jason/blastdb/wormpep62
+    Posted date:  Sep 3, 2001  2:17 PM
+  Number of letters in database: 8,813,425
+  Number of sequences in database:  20,085
+  
+Lambda     K      H
+   0.317    0.134    0.426 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 6241552
+Number of Sequences: 20085
+Number of extensions: 276768
+Number of successful extensions: 629
+Number of sequences better than 1.0e-10: 20
+Number of HSP's better than  0.0 without gapping: 4
+Number of HSP's successfully gapped in prelim test: 16
+Number of HSP's that attempted gapping in prelim test: 578
+Number of HSP's gapped (non-prelim): 20
+length of query: 334
+length of database: 8,813,425
+effective HSP length: 44
+effective length of query: 290
+effective length of database: 7,929,685
+effective search space: 2299608650
+effective search space used: 2299608650
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (21.6 bits)
+S2: 156 (64.7 bits)
+BLASTP 2.1.3 [Apr-11-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= PAPA_CARPA
+         (345 letters)
+
+Database: /data_2/jason/blastdb/wormpep62
+           20,085 sequences; 8,813,425 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id...   174  7e-44
+T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O4573...   171  5e-43
+Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4 pr...   160  8e-40
+F41E6.6 CE10254   cysteine protease and a protease inhibitor...   156  2e-38
+Y51A2D.8 CE19204   Cysteine proteases (2 domains) (HINXTON) ...   127  1e-29
+
+>R09F10.1 CE28755   peptidase (ST.LOUIS) TR:Q23030 protein_id:AAC69091.2
+          Length = 383
+
+ Score =  174 bits (441), Expect = 7e-44
+ Identities = 107/348 (30%), Positives = 173/348 (48%), Gaps = 18/348 (5%)
+
+Query: 7   ISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKI 66
+           +++L    + L + + LSF  F  + +   +L       Q+F  ++LK ++ Y +++E  
+Sbjct: 45  LTQLFSGLVLLTMLILLSFFVFQRLNHKMENLKHE----QMFNDFILKFDRKYTSVEEFE 100
+
+Query: 67  YRFEIFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIAGNYTTTELSYEEV 126
+           YR++IF  N+   +   ++N    L +N F D +++E ++    +    Y      +E  
+Sbjct: 101 YRYQIFLRNVIEFEAEEERNLGLDLDVNEFTDWTDEELQKMVQENKYTKYDFDTPKFEGS 160
+
+Query: 127 LNDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQEL 186
+             +  V  P  +DWR++G +TP+KNQG CGSCWAF+ V ++E    I+ G L   SEQE+
+Sbjct: 161 YLETGVIRPASIDWREQGKLTPIKNQGQCGSCWAFATVASVEAQNAIKKGKLVSLSEQEM 220
+
+Query: 187 LDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQV 246
+           +DCD R+ GC+GGY   A++ V + G+     YPY  ++      ++       D  R +
+Sbjct: 221 VDCDGRNNGCSGGYRPYAMKFVKENGLESEKEYPYSALKHDQCFLKENDTRVFIDDFRML 280
+
+Query: 247 QPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGGIF---VGPCGNKV--DHAVAAVGYGP 301
+               E    +     PV+  +    K    YR GIF   V  C  K    HA+  +GYG 
+Sbjct: 281 SNNEEDIANWVGTKGPVTFGMNVV-KAMYSYRSGIFNPSVEDCTEKSMGAHALTIIGYGG 339
+
+Query: 302 N----YILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVKN 345
+                Y ++KNSWGT WG +GY R+ RG  +    CGL  +   P+ N
+Sbjct: 340 EGESAYWIVKNSWGTSWGASGYFRLARGVNS----CGLANTVVAPIIN 383
+
+
+>T03E6.7 CE16333   cathepsin-like protease (HINXTON) TR:O45734
+           protein_id:CAB07275.1
+          Length = 337
+
+ Score =  171 bits (434), Expect = 5e-43
+ Identities = 107/319 (33%), Positives = 163/319 (50%), Gaps = 25/319 (7%)
+
+Query: 42  ERLIQLFESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNN----SYWLGLNVFA 97
+           E  I+ ++ +    +K Y   +E+ Y  E F  N+ +I+  N+ +     ++ +GLN  A
+Sbjct: 26  ESAIEKWDDYKEDFDKEYSESEEQTY-MEAFVKNMIHIENHNRDHRLGRKTFEMGLNHIA 84
+
+Query: 98  DMSNDEFKEK--YTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDWRQKGAVTPVKNQGSC 155
+           D+   ++++   Y      +      S+    N   V +P+ VDWR    VT VKNQG C
+Sbjct: 85  DLPFSQYRKLNGYRRLFGDSRIKNSSSFLAPFN---VQVPDEVDWRDTHLVTDVKNQGMC 141
+
+Query: 156 GSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRR--SYGCNGGYPWSALQLVA-QYG 212
+           GSCWAFSA   +EG    + G L   SEQ L+DC  +  ++GCNGG    A + +   +G
+Sbjct: 142 GSCWAFSATGALEGQHARKLGQLVSLSEQNLVDCSTKYGNHGCNGGLMDQAFEYIRDNHG 201
+
+Query: 213 IHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQ-PVSVVLEAAG 271
+           +    +YPY+G    C   +K    A   G       +E  L  ++A Q P+S+ ++A  
+Sbjct: 202 VDTEESYPYKGRDMKCHFNKK-TVGADDKGYVDTPEGDEEQLKIAVATQGPISIAIDAGH 260
+
+Query: 272 KDFQLYRGGIFVGP--CGNKVDHAVAAVGYGP-----NYILIKNSWGTGWGENGYIRIKR 324
+           + FQLY+ G++        ++DH V  VGYG      +Y ++KNSWG GWGE GYIRI R
+Sbjct: 261 RSFQLYKKGVYYDEECSSEELDHGVLLVGYGTDPEHGDYWIVKNSWGAGWGEKGYIRIAR 320
+
+Query: 325 GTGNSYGVCGLYTSSFYPV 343
+              N    CG+ T + YP+
+Sbjct: 321 NRNNH---CGVATKASYPL 336
+
+
+>Y40H7A.10 CE21821   Cysteine protease (HINXTON) TR:Q9XWA4
+           protein_id:CAA22062.1
+          Length = 343
+
+ Score =  160 bits (406), Expect = 8e-40
+ Identities = 100/295 (33%), Positives = 153/295 (50%), Gaps = 15/295 (5%)
+
+Query: 48  FESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKN-NSYWLGLNVFADMSNDEFKE 106
+           F+++++K+ + Y N  E + RF IF  NL  ++  NK++       LN F+D++ +E+K 
+Sbjct: 51  FQNFLVKYLREYPNEYEIVKRFTIFSRNLDLVERYNKEDAGKVTYELNDFSDLTEEEWK- 109
+
+Query: 107 KYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDWRQKGA---VTPVKNQGSCGSCWAFSA 163
+           KY  +   +++   L  + +++    N+P  VDWR       VT +K QG CGSCWAF+ 
+Sbjct: 110 KYLMTPKPDHSEKSLKPKTLIDKK--NLPNSVDWRNVNGTNHVTGIKYQGPCGSCWAFAT 167
+
+Query: 164 VVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEG 223
+              IE  + I  G L   S Q+LLDC   S  C GG P  AL+    +GI   + YPY  
+Sbjct: 168 AAAIESAVSISGGGLQSLSSQQLLDCTVVSDKCGGGEPVEALKYAQSHGITTAHNYPYYF 227
+
+Query: 224 VQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGGIFV 283
+               C  RE  P  A+     + +  +E A + ++ N P+ V    A    + Y  GI  
+Sbjct: 228 WTTKC--RETVPTVARISSWMKAESEDEMAQIVAL-NGPMIVCANFATNKNRFYHSGIAE 284
+
+Query: 284 GP-CGNKVDHAVAAVGYGPNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYT 337
+            P CG +  HA+  +GYGP+Y ++KN++   WGE GY+R+KR        CG+ T
+Sbjct: 285 DPDCGTEPTHALIVIGYGPDYWILKNTYSKVWGEKGYMRVKR----DVNWCGINT 335
+
+
+>F41E6.6 CE10254   cysteine protease and a protease inhibitor (ST.LOUIS)
+           TR:O16454 protein_id:AAB65956.1
+          Length = 498
+
+ Score =  156 bits (395), Expect = 2e-38
+ Identities = 110/327 (33%), Positives = 156/327 (47%), Gaps = 51/327 (15%)
+
+Query: 48  FESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNNSYWL-GLNVFADMSNDEFKE 106
+           F  ++ +H K Y N  E + RF +FK N K I E  K      + G   F+DM+  EFK+
+Sbjct: 174 FLDFVDRHEKKYTNKREVLKRFRVFKKNAKVIRELQKNEQGTAVYGFTKFSDMTTMEFKK 233
+
+Query: 107 -----KYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAF 161
+                ++   +          ++  +N+ D  +PE  DWR+KGAVT VKNQG+CGSCWAF
+Sbjct: 234 IMLPYQWEQPVYPMEQANFEKHDVTINEED--LPESFDWREKGAVTQVKNQGNCGSCWAF 291
+
+Query: 162 SAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSAL---------------- 205
+           S    +EG   I    L   SEQEL+DCD    GCNGG P +A                 
+Sbjct: 292 STTGNVEGAWFIAKNKLVSLSEQELVDCDSMDQGCNGGLPSNAYKIGKFVVSDNYCFLVF 351
+
+Query: 206 ------QLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGAL-LYSI 258
+                 +++   G+   + YPY+G    C    K   A   +G  ++ P++E  +  + +
+Sbjct: 352 YHKTTKEIIRMGGLEPEDAYPYDGRGETCHLVRK-DIAVYINGSVEL-PHDEVEMQKWLV 409
+
+Query: 259 ANQPVSVVLEAAGKDFQLYRGG------IFVGPCGNKVDHAVAAVGYGPN----YILIKN 308
+              P+S+ L A     Q YR G      IF  P    ++H V  VGYG +    Y ++KN
+Sbjct: 410 TKGPISIGLNA--NTLQFYRHGVVHPFKIFCEPF--MLNHGVLIVGYGKDGRKPYWIVKN 465
+
+Query: 309 SWGTGWGENGYIRIKRGTGNSYGVCGL 335
+           SWG  WGE GY ++ RG      VCG+
+Sbjct: 466 SWGPNWGEAGYFKLYRGK----NVCGV 488
+
+
+>Y51A2D.8 CE19204   Cysteine proteases (2 domains) (HINXTON) TR:Q9XXQ7
+           protein_id:CAA16407.1
+          Length = 386
+
+ Score =  127 bits (318), Expect = 1e-29
+ Identities = 95/332 (28%), Positives = 148/332 (43%), Gaps = 44/332 (13%)
+
+Query: 37  DLTSTERLIQLFESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNNSYW----LG 92
+           D    E+L + FE +  K+N+ YK+  E   RF  F  +   +D+ N K+ +       G
+Sbjct: 32  DRDHPEKLYKAFEDFKKKYNRKYKDESENQQRFNNFVKSYNNVDKLNAKSKAAGYDTQFG 91
+
+Query: 93  LNVFADMSNDEFKEKYTGSIAGNYTTTE-LSYEEVLND---GDVN----------IPEYV 138
+           +N F+D+S  EF  + +  +  N T    L++++   D    D+N           P+Y 
+Sbjct: 92  INKFSDLSTAEFHGRLSNVVPSNNTGLPMLNFDKKKPDFRAADMNKTRHKRRSTRYPDYF 151
+
+Query: 139 DWRQKGA-----VTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRR- 192
+           D R +       V P+K+QG C  CW F+    +E +    +G     S+QE+ DC    
+Sbjct: 152 DLRNEKINGRYIVGPIKDQGQCACCWGFAVTALVETVYAAHSGKFKSLSDQEVCDCGTEG 211
+
+Query: 193 SYGCNGGYPWSALQLVAQYGIHYRNTYPYE----GVQRYCRSREKGPYA-AKTDGVRQVQ 247
+           + GC GG     +Q V +YG+     YPY+       R CR RE      A+      + 
+Sbjct: 212 TPGCKGGSLTLGVQYVKKYGLSGDEDYPYDQNRANQGRRCRLRETDRIVPARAFNFAVIN 271
+
+Query: 248 PYNEGALLYSIANQ---PVSVVLEAAGKDFQLYRGGIFV-GPCGNKVD-HAVAAVGY--- 299
+           P      +  +  +   PV+V  +  G  F+ Y+ G+ +   C      HA A VGY   
+Sbjct: 272 PRRAEEQIIQVLTEWKVPVAVYFK-VGDQFKEYKEGVIIEDDCRRATQWHAGAIVGYDTV 330
+
+Query: 300 ------GPNYILIKNSWGTGWGENGYIRIKRG 325
+                   +Y +IKNSWG  W E+GY+R+ RG
+Sbjct: 331 EDSRGRSHDYWIIKNSWGGDWAESGYVRVVRG 362
+
+
+>R07E3.1 CE02295   cysteine proteinase (HINXTON) TR:Q21810
+           protein_id:CAA89070.1
+          Length = 402
+
+ Score =  114 bits (286), Expect = 7e-26
+ Identities = 90/309 (29%), Positives = 139/309 (44%), Gaps = 41/309 (13%)
+
+Query: 54  KHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNN--SYWLGLNVFADMSNDEFKEKYTGS 111
+           K +K Y    E + R   + +  + I   N +N   S   G N  +D +++EF++     
+Sbjct: 96  KFDKSYATSQESLKRLNAYYNTDENIANWNIQNEHGSAEYGHNDMSDWTDEEFEKTLLPK 155
+
+Query: 112 IAGNYTTTELSYEEVL--------NDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSA 163
+                   E  + E +         +     P++ DWR K  +TPVK QG CGSCWAF++
+Sbjct: 156 SFYKRLHKEAEFIEPIPESLTAKKGESSSPFPDFFDWRDKNVITPVKAQGQCGSCWAFAS 215
+
+Query: 164 VVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEG 223
+             T+E    I  G     SEQ LLDCD     C+GG    A + + + G+      PY  
+Sbjct: 216 TATVEAAWAIAHGEKRNLSEQTLLDCDLVDNACDGGDEDKAFRYIHRNGLANAVDLPYVA 275
+
+Query: 224 VQR--------YCRSREKGPYAAKTDGVRQVQPYNEGALLYSIAN-QPVSVVLEAAGKDF 274
+            ++        +  +R K  Y    D         E +++  + N  PV++ + A  +  
+Sbjct: 276 HRQNGCAVNDHWNTTRIKAAYFLHHD---------EDSIINWLVNFGPVNIGM-AVIQPM 325
+
+Query: 275 QLYRGGIFVG---PCGNKVD--HAVAAVGYGPN-----YILIKNSWGTGWG-ENGYIRIK 323
+           + Y+GG+F      C N+V   HA+   GYG +     Y ++KNSWG  WG E+GYI   
+Sbjct: 326 RAYKGGVFTPSEYACKNEVIGLHALLITGYGTSKTGEKYWIVKNSWGNTWGVEHGYIYFA 385
+
+Query: 324 RGTGNSYGV 332
+           RG  N+ G+
+Sbjct: 386 RGI-NACGI 393
+
+
+>C50F4.3 CE05468   thiol protease (HINXTON) TR:Q18740 protein_id:CAA94738.1
+          Length = 374
+
+ Score =  114 bits (286), Expect = 7e-26
+ Identities = 97/357 (27%), Positives = 152/357 (42%), Gaps = 39/357 (10%)
+
+Query: 6   SISKLLFVAICLFVYMGLSFG-DFSIVGYSQN-DLTSTERLIQLFESWMLKHNKIYKNID 63
+           S+  L F+ I +F       G +F    +  N D  + E+L + FE +++K+ + YK+  
+Sbjct: 3   SLLALFFIQIFIFTVTSFDVGANFEDSFFEINIDRNNPEKLYKEFEDFIVKYKRNYKDEI 62
+
+Query: 64  EKIYRFEIFKDNLKYIDETNKK----NNSYWLGLNVFADMSNDEFKEKYT--GSIAGNYT 117
+           EK +RF+ F      + + NK      +    G+N F+D+S  E    Y+  G    N  
+Sbjct: 63  EKKFRFQQFVATHNRVGKMNKAAKKAGHDTKYGINKFSDLSKKEIHGMYSKFGPPKNNTN 122
+
+Query: 118 TTELSYEEVLNDGDVN-IPEYVDWRQKGA-----VTPVKNQGSCGSCWAFSAVVTIEGII 171
+             + + + +     +  +P+  D R K       + P+K Q SC  CW F+A    E  +
+Sbjct: 123 VPKFNLKNLRVKRQMEGLPKTFDLRNKKVGGHYIIGPIKTQDSCACCWGFAATAVAEAAL 182
+
+Query: 172 KIRTGNLNEYSEQELLDC-DRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQR---- 226
+            +        SEQE+ DC  +   GCNGG P   L+ + + G+     YP+  V R    
+Sbjct: 183 TVHLKKAMNLSEQEVCDCAPKHGPGCNGGDPVDGLEYIKEMGLTGGKEYPF-NVNRSTQL 241
+
+Query: 227 -YCRS----REKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGGI 281
+             C S    RE  P       +       +      + N P+SV     G     Y  GI
+Sbjct: 242 GRCESEKYDRELNPLELDYYAIDPFNAEYQMTHHLYLLNLPISVAFR-TGASLSSYLSGI 300
+
+Query: 282 F-VGPCGNKVD---HAVAAVGYGP---------NYILIKNSWGTGWGENGYIRIKRG 325
+             +  C ++     H+ A VGYG          +Y + +NSW T WG++GY RI RG
+Sbjct: 301 LELADCDDEKGGHWHSGAIVGYGTTKNSAGRTVDYWIFRNSWWTDWGDDGYARIVRG 357
+
+
+>F15D4.4 CE28917   cysteine protease (HINXTON) TR:Q93512
+           protein_id:CAB02487.1
+          Length = 622
+
+ Score =  110 bits (276), Expect = 1e-24
+ Identities = 84/290 (28%), Positives = 127/290 (42%), Gaps = 34/290 (11%)
+
+Query: 64  EKIYRFEIFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIA------GNYT 117
+           E + RF ++    K +DE    N  Y LG++ +  MS ++F     G +A         T
+Sbjct: 150 EGLKRFNVYSKVKKEVDE---HNIMYELGMSSYK-MSTNQFSVALDGEVAPLTLNLDALT 205
+
+Query: 118 TTELSYEEVLNDGDVNIPE-YVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTG 176
+            T       ++       E  VDWR    + P+ +Q +CG CWAFS +  IE    I+  
+Sbjct: 206 PTATVIPATISSRKKRDTEPTVDWRP--FLKPILDQSTCGGCWAFSMISMIESFFAIQGY 263
+
+Query: 177 NLNEYSEQELLDCDRR--------SYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYC 228
+           N +  S Q+LL CD +        + GC GGY   A   +        +  P++     C
+Sbjct: 264 NTSSLSVQQLLTCDTKVDSTYGLANVGCKGGYFQIAGSYLEVSAARDASLIPFDLEDTSC 323
+
+Query: 229 RSREKGP-----------YAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLY 277
+            S    P           Y +      Q+    +  +   +   P++V + AAG D   Y
+Sbjct: 324 DSSFFPPVVPTILLFDDGYISGNFTAAQLITMEQN-IEDKVRKGPIAVGM-AAGPDIYKY 381
+
+Query: 278 RGGIFVGPCGNKVDHAVAAVGYGPNYILIKNSWGTGWGENGYIRIKRGTG 327
+             G++ G CG  ++HAV  VG+  +Y +I+NSWG  WGE GY R+KR  G
+Sbjct: 382 SEGVYDGDCGTIINHAVVIVGFTDDYWIIRNSWGASWGEAGYFRVKRTPG 431
+
+
+>Y113G7B.15 CE23295    (HINXTON) TR:Q9U2X1 protein_id:CAB54334.1
+          Length = 328
+
+ Score =  103 bits (256), Expect = 2e-22
+ Identities = 89/312 (28%), Positives = 132/312 (41%), Gaps = 40/312 (12%)
+
+Query: 53  LKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKK----NNSYWLGLNVFADMSNDEFKEKY 108
+           + H K Y+   EK  R   F  N + I E N K      +   G N FAD +  E   + 
+Sbjct: 1   MHHKKHYRTPAEKDRRLAHFAKNHQKIQELNAKARREGRNVTFGWNKFADKNRQELSARN 60
+
+Query: 109 TGSIAGNYTTTELSYEEVLNDGDVN------------IPEYVDWRQ-----KGAVTPVKN 151
+           +     N+T   + Y+     G  N            IP+Y D R         V PVK+
+Sbjct: 61  SKIHPKNHTDLPI-YKPRHPRGSRNHHNKRSKRQSGDIPDYFDLRDIYVDGSPVVGPVKD 119
+
+Query: 152 QGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDC--DRRSYGCNGGYPWSALQLVA 209
+           Q  CG CWAF+     E    + + +    S+QE+ DC     + GC GG P + L++V 
+Sbjct: 120 QEQCGCCWAFATTAITEAANTLYSKSFTSLSDQEICDCADSGDTPGCVGGDPRNGLKMVH 179
+
+Query: 210 QYGIHYRNTYPYE----GVQRYCRSREKGP--YAAKTDGVRQVQPYNEGALLYSI-ANQP 262
+             G      YPYE         C   EK         +  R  Q Y E  ++ ++  N  
+Sbjct: 180 LRGQSSDGDYPYEEYRANTTGNCVGDEKSTVIQPETLNVYRFDQDYAEEDIMENLYLNHI 239
+
+Query: 263 VSVVLEAAGKDFQLYRGGIFVGPCGNKVD----HAVAAVGYGPN-----YILIKNSWGTG 313
+            + V    G++F+ Y  G+       ++     H+VA VGYG +     Y L++NSW + 
+Sbjct: 240 PTAVYFRVGENFEWYTSGVLQSEDCYQMTPAEWHSVAIVGYGTSDDGVPYWLVRNSWNSD 299
+
+Query: 314 WGENGYIRIKRG 325
+           WG +GY++I+RG
+Sbjct: 300 WGLHGYVKIRRG 311
+
+
+>Y71H2AR.2 CE22930    (ST.LOUIS) protein_id:AAK29985.1
+          Length = 345
+
+ Score = 93.6 bits (231), Expect = 2e-19
+ Identities = 65/213 (30%), Positives = 106/213 (49%), Gaps = 29/213 (13%)
+
+Query: 131 DVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGI-IKIRTGNLNEYSEQELLDC 189
+           D    E++DWR+KG V PVK+QG C +  AF+   +IE +  K   G L  +SEQ+L+DC
+Sbjct: 79  DRTTEEFLDWREKGIVGPVKDQGKCNASHAFAITSSIESMYAKATNGTLLSFSEQQLIDC 138
+
+Query: 190 DRRSY-GCNGGYPWSALQLVAQYGIHYRNTYPY-EGVQRYC-----RSR---EKGPYAAK 239
+           + + Y GC   +  +A+  +A +GI     YPY +     C     +S+   +KG  A  
+Sbjct: 139 NDQGYKGCEEQFAMNAIGYLATHGIETEADYPYVDKTNEKCTFDSTKSKIHLKKGVVAEG 198
+
+Query: 240 TDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGGIF---VGPCGNKVD-HAVA 295
+            + + +V   N G   +++   P              Y+ GI+   +  C +  +  ++ 
+Sbjct: 199 NEVLGKVYVTNYGPAFFTMRAPP----------SLYDYKIGIYNPSIEECTSTHEIRSMV 248
+
+Query: 296 AVGYG----PNYILIKNSWGTGWGENGYIRIKR 324
+            VGYG      Y ++K S+GT WGE GY+++ R
+Sbjct: 249 IVGYGIEGEQKYWIVKGSFGTSWGEQGYMKLAR 281
+
+
+>K02E7.10 CE11640   protease (ST.LOUIS) TR:O17255 protein_id:AAB71030.1
+          Length = 299
+
+ Score = 93.6 bits (231), Expect = 2e-19
+ Identities = 64/219 (29%), Positives = 104/219 (47%), Gaps = 15/219 (6%)
+
+Query: 136 EYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGI-IKIRTGNLNEYSEQELLDCDRRSY 194
+           +++DWR+KG V PVK+QG C + +AF+A+  IE +  K   G L  +SEQ+++DC   + 
+Sbjct: 82  DFLDWREKGIVGPVKDQGKCNASYAFAAIAAIESMYAKANNGKLLSFSEQQIIDCANFTN 141
+
+Query: 195 GCNGGYP-WSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGA 253
+            C        + + + + G+     YPY G +   +                V P  E A
+Sbjct: 142 PCQENLENVLSNRFLKENGVGTEADYPYVGKENVGKCEYDSSKMKLRPTYIDVYPNEEWA 201
+
+Query: 254 LLYSIANQPVSVVLEAAGKDFQLYRGGIF---VGPCGNKVD-HAVAAVGYGPN----YIL 305
+             + I           +   F  Y+ GI+      CGN  +  ++A VGYG +    Y +
+Sbjct: 202 RAH-ITTFGTGYFRMRSPPSFFHYKTGIYNPTKEECGNANEARSLAIVGYGKDGAEKYWI 260
+
+Query: 306 IKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVK 344
+           +K S+GT WGE+GY+++ R    +   CG+  S   P+K
+Sbjct: 261 VKGSFGTSWGEHGYMKLAR----NVNACGMAESISIPIK 295
+
+
+>F57F5.1 CE05999   cysteine protease (HINXTON) TR:Q20950
+           protein_id:CAB00098.1
+          Length = 400
+
+ Score = 91.3 bits (225), Expect = 8e-19
+ Identities = 84/315 (26%), Positives = 127/315 (39%), Gaps = 72/315 (22%)
+
+Query: 79  IDETNKKNNSYWLGLNVFADMSNDEFKEKYTGS----IAGNYTTTELSYEEVLNDGDVNI 134
+           +D  NK   S+   L  +     D  K++  G+    I   Y   E+++ EV    D  +
+Sbjct: 90  VDYVNKVQTSFKAELGSYFSSYPDTIKKQLMGAKMVEIPEEYRVFEMTHPEV---EDAAV 146
+
+Query: 135 PEYVD----WRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTG--NLNEYSEQELLD 188
+           P+  D    W    +++ +++Q SCGSCWA SA  TI   I I +    +   S  ++  
+Sbjct: 147 PDSFDSRTAWPNCPSISKIRDQSSCGSCWAVSAAETISDRICIASNAKTILSISADDINA 206
+
+Query: 189 CDRR--SYGCNGGYPWSALQLVAQ---------------------------YGIHYR--- 216
+           C       GCNGGYP  A +   +                            G HY+   
+Sbjct: 207 CCGMVCGNGCNGGYPIEAWRHYVKKGYVTGGSYQDKTGCKPYPYPPCEHHVNGTHYKPCP 266
+
+Query: 217 -NTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLE------- 268
+            N YP +  +R C++     Y          Q  + G   Y+++ +   +  E       
+Sbjct: 267 SNMYPTDKCERSCQAGYALTYQ---------QDLHFGQSAYAVSKKAAEIQKEIMTHGPV 317
+
+Query: 269 ----AAGKDFQLYRGGIFVGPCGNKV-DHAVAAVGYGPN----YILIKNSWGTGWGENGY 319
+                  +DF+ Y GG++V   G  +  HAV  +G+G +    Y L  NSW   WGENGY
+Sbjct: 318 EVAFTVYEDFEHYSGGVYVHTAGASLGGHAVKMLGWGVDNGTPYWLCANSWNEDWGENGY 377
+
+Query: 320 IRIKRGTGNSYGVCG 334
+            RI RG  N  G+ G
+Sbjct: 378 FRIIRGV-NECGIEG 391
+
+
+>T10H4.12 CE27590  locus:cpr-3 protease (HINXTON) TR:Q9TW93
+           protein_id:CAB61024.2
+          Length = 370
+
+ Score = 85.9 bits (211), Expect = 3e-17
+ Identities = 67/233 (28%), Positives = 104/233 (43%), Gaps = 42/233 (18%)
+
+Query: 134 IPEYVDWRQK----GAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNE--YSEQELL 187
+           +P+  D R+K      +  ++NQ +CGSCWAF A   I   + I++    +   S +++L
+Sbjct: 92  LPDTFDAREKWPDCNTIKLIRNQATCGSCWAFGAAEVISDRVCIQSNGTQQPVISVEDIL 151
+
+Query: 188 DCDRRS--YGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRY----------------CR 229
+            C   +  YGC GGY   AL+  A  G      Y   G   Y                C+
+Sbjct: 152 SCCGTTCGYGCKGGYSIEALRFWASSGAVTGGDYGGHGCMPYSFAPCTKNCPESTTPSCK 211
+
+Query: 230 SREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVV--------LEAAGK---DFQLYR 278
+           +  +  Y  KT+  ++ + Y   A   +       +         +EA+ K   DF  Y+
+Sbjct: 212 TTCQSSY--KTEEYKKDKHYGASAYKVTTTKSVTEIQTEIYHYGPVEASYKVYEDFYHYK 269
+
+Query: 279 GGIFVGPCGNKVD-HAVAAVGYGP----NYILIKNSWGTGWGENGYIRIKRGT 326
+            G++    G  V  HAV  +G+G     +Y LI NSWGT +GE G+ +I+RGT
+Sbjct: 270 SGVYHYTSGKLVGGHAVKIIGWGVENGVDYWLIANSWGTSFGEKGFFKIRRGT 322
+
+
+>C52E4.1 CE08943  locus:cpr-1 cathepsin-like cysteine protease (HINXTON)
+           TR:Q18783 protein_id:CAB01410.1
+          Length = 340
+
+ Score = 85.5 bits (210), Expect = 4e-17
+ Identities = 73/252 (28%), Positives = 102/252 (39%), Gaps = 36/252 (14%)
+
+Query: 107 KYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVT 166
+           KY  + +     TE   E VL            W +  ++  +++Q +CGSCWAF A   
+Sbjct: 75  KYAAAHSDEIRATE--QEVVLASVPATFDSRTQWSECKSIKLIRDQATCGSCWAFGAAEM 132
+
+Query: 167 IEGIIKIRTGNLNE--YSEQELLDCDRRSYG--CNGGYPWSALQLVAQYGIHYRNTYPYE 222
+           I     I T    +   S  +LL C   S G  C GGYP  AL+     G+     Y   
+Sbjct: 133 ISDRTCIETKGAQQPIISPDDLLSCCGSSCGNGCEGGYPIQALRWWDSKGVVTGGDYHGA 192
+
+Query: 223 GVQRY---------------------CRSREKGPYAA-KTDGVRQVQ-PYNEGALLYSI- 258
+           G + Y                     C+S     YA  K  GV     P N  ++   I 
+Sbjct: 193 GCKPYPIAPCTSGNCPESKTPSCSMSCQSGYSTAYAKDKHFGVSAYAVPKNAASIQAEIY 252
+
+Query: 259 ANQPVSVVLEAAGKDFQLYRGGIFVGPCGNKVD-HAVAAVGYGPN----YILIKNSWGTG 313
+           AN PV        +DF  Y+ G++    G  +  HA+  +G+G      Y L+ NSWG  
+Sbjct: 253 ANGPVEAAFSVY-EDFYKYKSGVYKHTAGKYLGGHAIKIIGWGTESGSPYWLVANSWGVN 311
+
+Query: 314 WGENGYIRIKRG 325
+           WGE+G+ +I RG
+Sbjct: 312 WGESGFFKIYRG 323
+
+
+>F26E4.3 CE17714   cysteine protease (HINXTON) TR:P90850
+           protein_id:CAB03007.1
+          Length = 491
+
+ Score = 80.5 bits (197), Expect = 1e-15
+ Identities = 66/233 (28%), Positives = 98/233 (41%), Gaps = 42/233 (18%)
+
+Query: 134 IPEYVDWRQKGA--VTPVKNQGSCGSCWAFSAV-VTIEGIIKIRTGNLNE-YSEQELLDC 189
+           +PE+ D R K    + PV +QG CGS W+ S   ++ + +  I  G +N   S Q+LL C
+Sbjct: 223 LPEHFDARDKWGPLIHPVADQGDCGSSWSVSTTAISSDRLAIISEGRINSTLSSQQLLSC 282
+
+Query: 190 DR-RSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRY-------------------CR 229
+           ++ R  GC GGY   A   + + G+   + YPY   Q                     C 
+Sbjct: 283 NQHRQKGCEGGYLDRAWWYIRKLGVVGDHCYPYVSGQSREPGHCLIPKRDYTNRQGLRCP 342
+
+Query: 230 SREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGGIFV------ 283
+           S  +   A K     +V    E      + N PV        +DF +Y GG++       
+Sbjct: 343 SGSQDSTAFKMTPPYKVSSREEDIQTELMTNGPVQATF-VVHEDFFMYAGGVYQHSDLAA 401
+
+Query: 284 ---GPCGNKVDHAVAAVGYGPN--------YILIKNSWGTGWGENGYIRIKRG 325
+                   +  H+V  +G+G +        Y L  NSWGT WGE+GY ++ RG
+Sbjct: 402 QKGASSVAEGYHSVRVLGWGVDHSTGKPIKYWLCANSWGTQWGEDGYFKVLRG 454
+
+
+>W07B8.4 CE14680   thiol protease (ST.LOUIS) TR:O16288 protein_id:AAB65345.1
+          Length = 335
+
+ Score = 78.2 bits (191), Expect = 7e-15
+ Identities = 71/260 (27%), Positives = 108/260 (41%), Gaps = 60/260 (23%)
+
+Query: 133 NIPEYVD----WRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRT-GNLNE-YSEQEL 186
+           +IP+  D    W Q  +V  +++Q  CGSCWA +A   I     I + G++N   S +++
+Sbjct: 72  SIPDSYDVRDHWPQCISVNNIRDQSHCGSCWAVAAAEAISDRTCIASNGDVNTLLSAEDI 131
+
+Query: 187 LDCDRRSY----GCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDG 242
+           L C    +    GC GGYP  A +   + G+    ++     Q  C+     P     DG
+Sbjct: 132 LTCCTGKFNCGDGCEGGYPIQAWRYWVKNGLVTGGSFE---SQYGCKPYSIAPCGETIDG 188
+
+Query: 243 VRQVQ-----------------------PYNE----GALLYSIANQPVSVVLEAAG---- 271
+           V   +                       PY++    GA  Y+I      +  E       
+Sbjct: 189 VTWPECPMKISDTPKCEHHCTGNNSYPIPYDQDKHFGASAYAIGRSAKQIQTEILAHGPV 248
+
+Query: 272 -------KDFQLYRGGIFVGPCGNKV-DHAVAAVGYGPN----YILIKNSWGTGWGENGY 319
+                  +DF LY+ GI+    G ++  HAV  +G+G +    Y L  NSW T WGE GY
+Sbjct: 249 EVGFIVYEDFYLYKTGIYTHVAGGELGGHAVKMLGWGVDNGTPYWLAANSWNTVWGEKGY 308
+
+Query: 320 IRIKRGTGNSYGVCGLYTSS 339
+            RI RG       CG+ +++
+Sbjct: 309 FRILRGVDE----CGIESAA 324
+
+
+>C32B5.7 CE08515   cathepsin-like peptidase (ST.LOUIS) TR:P91111
+           protein_id:AAB37963.1
+          Length = 250
+
+ Score = 73.9 bits (180), Expect = 1e-13
+ Identities = 57/189 (30%), Positives = 87/189 (45%), Gaps = 22/189 (11%)
+
+Query: 137 YVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGC 196
+           ++DWR +G V PVK+QG+C + +AF+A+  IE +  I  G L  +SEQ+++DC     GC
+Sbjct: 71  FLDWRDEGVVGPVKDQGNCNASYAFAAISAIESMYAIANGQLLSFSEQQIIDC---LGGC 127
+
+Query: 197 N-GGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPY---NEG 252
+                P  A+  + + GI     YP+ G     +  EK  Y +K   +     Y   +E 
+Sbjct: 128 AIESDPMMAMTYLERKGIETYTDYPFVG-----KKNEKCEYDSKKAYLILDDTYDMSDES 182
+
+Query: 253 ALLYSIANQPVSVVLEAAGKDFQLYRGGIFVGPC-----GNKVDHAVAAVGY----GPNY 303
+             L  I  +   +        F  Y+ GI+  P            A+  VGY    G NY
+Sbjct: 183 LALVFIDERGPGLFTMNTPPSFFNYKSGIY-NPTEEECKSTNEKRALTIVGYGNDKGQNY 241
+
+Query: 304 ILIKNSWGT 312
+            ++K S+GT
+Sbjct: 242 WIVKGSFGT 250
+
+
+>Y71H2AM.3 CE26272    (ST.LOUIS) protein_id:AAK29976.1
+          Length = 716
+
+ Score = 71.2 bits (173), Expect = 9e-13
+ Identities = 42/116 (36%), Positives = 60/116 (51%), Gaps = 9/116 (7%)
+
+Query: 136 EYVDWRQKGAVTPVKNQGSCGSCWAFSAVVTIEGI-IKIRTGNLNEYSEQELLDCDRRSY 194
+           E++DWR KG V PVK+QG C +  AF+   +IE +  K   G+L  +SEQ+L+DCD   +
+Sbjct: 84  EFLDWRDKGIVGPVKDQGKCNASHAFAISSSIESMYAKATNGSLLSFSEQQLIDCDDHGF 143
+
+Query: 195 -GCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPY 249
+            GC      +A+     +GI     YPY G       +E G  + +T G   V  Y
+Sbjct: 144 KGCEEQPAINAVSYFIFHGIETEADYPYAG-------KENGKLSNETQGKELVTNY 192
+
+
+>F32B5.8 CE09855   cysteine proteinase (ST.LOUIS) TR:O01850
+           protein_id:AAB54210.1
+          Length = 427
+
+ Score = 69.7 bits (169), Expect = 2e-12
+ Identities = 58/217 (26%), Positives = 91/217 (41%), Gaps = 28/217 (12%)
+
+Query: 133 NIPEYVDWRQKGAVTPV---KNQGS---CGSCWAFSAVVTIEGIIKIRTGNL---NEYSE 183
+           ++P+  DWR    +      +NQ     CGSCWAF A   +   I I+  N       S 
+Sbjct: 185 DLPKTWDWRDANGINYASADRNQHIPQYCGSCWAFGATSALADRINIKRKNAWPQAYLSV 244
+
+Query: 184 QELLDCDRRSYGCNGGYPWSALQLVAQYGIHYRNTYPYEGVQRYC----RSREKGP---Y 236
+           QE++DC        GG P    +   ++GI +     Y+     C    R     P   +
+Sbjct: 245 QEVIDCSGAGTCVMGGEPGGVYKYAHEHGIPHETCNNYQARDGKCDPYNRCGSCWPGECF 304
+
+Query: 237 AAKTDGVRQVQPYN-----EGALLYSIANQPVSVVLEAAGKDFQLYRGGIFVGPCGNKVD 291
+           + K   + +V  Y      E          P++  + AA K F+ Y GGI+       +D
+Sbjct: 305 SIKNYTLYKVSEYGTVHGYEKMKAEIYHKGPIACGI-AATKAFETYAGGIYKEVTDEDID 363
+
+Query: 292 HAVAAVGYGPN------YILIKNSWGTGWGENGYIRI 322
+           H ++  G+G +      Y + +NSWG  WGE+G+ +I
+Sbjct: 364 HIISVHGWGVDHESGVEYWIGRNSWGEPWGEHGWFKI 400
+
+
+  Database: /data_2/jason/blastdb/wormpep62
+    Posted date:  Sep 3, 2001  2:17 PM
+  Number of letters in database: 8,813,425
+  Number of sequences in database:  20,085
+  
+Lambda     K      H
+   0.318    0.138    0.428 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 6611257
+Number of Sequences: 20085
+Number of extensions: 311359
+Number of successful extensions: 788
+Number of sequences better than 1.0e-10: 19
+Number of HSP's better than  0.0 without gapping: 6
+Number of HSP's successfully gapped in prelim test: 13
+Number of HSP's that attempted gapping in prelim test: 741
+Number of HSP's gapped (non-prelim): 23
+length of query: 345
+length of database: 8,813,425
+effective HSP length: 44
+effective length of query: 301
+effective length of database: 7,929,685
+effective search space: 2386835185
+effective search space used: 2386835185
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 41 (21.7 bits)
+S2: 156 (64.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,18 @@
+>HSEARLOBE 321bp
+gctcggactctatctagcagaaacctcgttcagctagtcttgcttcatggaggtttgatc
+tagactgcaaacgtcggtgctaaaagaccatacttccgtatgtgcctatcgggagcagtc
+gctgagaagtgcggaatgatccttcaatgaccgccgttaaagcctgggagtccgcgccac
+aatcattccatatacagcaacacgcgctacgcggacctctcggtgggtgacgattctatt
+gaggcgttgaagcgagaaagatattccgattcttttcgagtctatagttaaatcggactg
+catcatccattttagggcata
+>HSMETOO 134bp
+ggccgggatggccggacctgttctgaacatcttatatccacccgaacaagttataaacaa
+tttaaatctgggcggccatctataagcgtgtcttcagtatgagagtcttcggatatcacg
+acccattaggaaag
+>empty1 an empty seq well-formatted
+
+>empty2 an empty seq without subsequent empty line
+>MMWHISK 62bp
+gctgcctctatcaagggtgaaaattgcctgcccggctgggtagacactcgcaccactccc
+cg
+>empty3 empty seq at the end of the file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq.qual
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq.qual	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq.qual	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,19 @@
+>HSEARLOBE 321bp 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+>HSMETOO 134bp
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+>empty1 an empty seq well-formatted
+
+>empty2 an empty seq without subsequent empty line
+>MMWHISK 62bp
+50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 
+50 50 50 50 50 50 50 50 50 50 50 50 
+>empty3 empty seq at the end of the file


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/multifa.seq.qual
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multiline-intrablock-comment.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multiline-intrablock-comment.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multiline-intrablock-comment.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,39 @@
+#NEXUS
+
+BEGIN TAXA;
+[block-level 
+comment 
+in 
+the 
+TAXA 
+block]
+      dimensions ntax=8;
+      taxlabels A B C D E F G H;  
+END;
+
+BEGIN CHARACTERS;
+[block-level comment 
+in the 
+CHARACTERS block]
+
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels 1 2 3 4 5;
+      matrix
+A     --ONE
+B     --ONE
+C     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+G     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+[block-level 
+comment 
+                                  in the TREES block]
+
+       tree ladder = (((((((A:1,B:1):1,C:2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/multiseq.bls
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/multiseq.bls	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/multiseq.bls	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,499 @@
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038724|dbj|BAB12759.1| DNA-binding protein hu-alpha
+[Buchnera sp. APS]
+         (92 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.310    0.127    0.323
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 99
+Number of Sequences: 1
+Number of extensions: 4
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 92
+length of database: 607
+effective HSP length: 24
+effective length of query: 68
+effective length of database: 583
+effective search space:    39644
+effective search space used:    39644
+T: 11
+A: 40
+X1: 16 ( 7.2 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 42 (21.8 bits)
+S2: 156 (65.2 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038744|dbj|BAB12779.1| DNA primase [Buchnera sp. APS]
+         (577 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits) 
+Value
+
+gi|12045104|ref|NP_072915.1| DNA primase (dnaE) [Mycoplasma gen...   125  7e-33
+
+>gi|12045104|ref|NP_072915.1| DNA primase (dnaE) [Mycoplasma
+           genitalium]
+          Length = 607
+
+ Score =  125 bits (310), Expect = 7e-33
+ Identities = 118/461 (25%), Positives = 222/461 (47%), Gaps = 57/461 (12%)
+
+Query: 10  ITELLSRTNIIELI-NTRLELKKYGKNYQTNCPFHHDKTPSFTVSNEKQFYYCFGCNAHG 68
+           + ELL +  I E+I +  ++++  G +    CPFH DK PS ++S+ K  + C+ CNA G
+Sbjct: 8   LDELLKQIKITEIIQHYGVKIQTKGNSLLALCPFHDDKNPSMSISSSKNIFKCWACNAAG 67
+
+Query: 69  NAIDFLIQYEHLSFIESIEELALIHGVKIPFENTVQNSIYVKKQKLYLLMEKICKLY--- 125
+           N I F+ +++ L +  ++++   I G+K+   N+   +    KQK Y  +      Y
+Sbjct: 68  NGIAFIQKHDQLDWKTALKKAIEICGIKLENWNSNLLTKVDPKQKRYWEINNALITYYQT 127
+
+Query: 126 --KKNINVTHLANKYLARRGINQNMIDFFLIGFSSLKWNEFYKKINISKEFEQELLINNI 183
+             K+  N   + N  + +R +N+ +I+ F +G +    +++     + +  E+   IN
+Sbjct: 128 RLKRETNPNGM-NYLVEKRKLNKTLIEQFQLGLAFHNEDKY-----LCESMERYPFINPK 181
+
+Query: 184 I---------ATDKNGY-IYD------RFQGRIIFPIQDNHGRIIGFGGRSLNDMSP-KY 226
+           I          T++ G   +D       FQ +I+ PI D +G  +GF  RS+++++  KY
+Sbjct: 182 IKPSELYLFSKTNQQGLGFFDFNTKKATFQNQIMIPIHDFNGNPVGFSARSVDNINKLKY 241
+
+Query: 227 LNSPETDIFYKRKQIYGLYQVIKKCSKPVYLLVVEGYIDVITLTQYNIDYAVSILGTSTT 286
+            NS + + F K + ++  +++ K  ++   L +VEGY DV TLT    + AV+++G +
+Sbjct: 242 KNSADHEFFKKGELLFNFHRLNKNLNQ---LFIVEGYFDVFTLTNSKFE-AVALMGLALN 297
+
+Query: 287 TEHIQLL---FKNTDIIICCYDGDDAGKNAAWKTLKKALPYISDKKTLKFILL--PNQED 341
+              I+ +   FK    ++   D D +G+NA +  ++K    +++   +  I+    N +D
+Sbjct: 298 DVQIKAIKAHFKELQTLVLALDNDASGQNAVFSLIEK----LNNNNFIVEIVQWEHNYKD 353
+
+Query: 342 PDTIIRKEGREKF----QKRIDNAITMSKFFFKNILKNINLSSDDDKFHLSVHALPLINT 397
+            D +   +G E+      KR +    +  FF K  L    +++    F      L    T
+Sbjct: 354 WDELYLNKGSEQVILQANKRQNLIEYLVSFFKKQQLDQRVITNKIIAF------LTKNQT 407
+
+Query: 398 ISSD-TIRIYLRQILARMIGILDDNQFEKFLYEKETKNTQK 437
+           I +D +  I+L + L +++   D    EK LYE   K+ +K
+Sbjct: 408 ILNDHSFLIFLIKNLVKLLEYSD----EKTLYETVLKHKEK 444
+
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.322    0.140    0.406
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 932
+Number of Sequences: 1
+Number of extensions: 63
+Number of successful extensions: 4
+Number of sequences better than 1.0e-15: 1
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 1
+Number of HSP's that attempted gapping in prelim test: 1
+Number of HSP's gapped (non-prelim): 1
+length of query: 577
+length of database: 607
+effective HSP length: 24
+effective length of query: 553
+effective length of database: 583
+effective search space:   322399
+effective search space used:   322399
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.9 bits)
+S2: 164 (68.3 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038814|dbj|BAB12849.1| integration host factor
+alpha-subunit [Buchnera sp. APS]
+         (102 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.321    0.138    0.372
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 101
+Number of Sequences: 1
+Number of extensions: 4
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 102
+length of database: 607
+effective HSP length: 21
+effective length of query: 81
+effective length of database: 586
+effective search space:    47466
+effective search space used:    47466
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.9 bits)
+S2: 157 (65.6 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038947|dbj|BAB12982.1| DNA-binding protein H-ns [Buchnera
+sp. APS]
+         (135 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.319    0.137    0.390
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 132
+Number of Sequences: 1
+Number of extensions: 3
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 135
+length of database: 607
+effective HSP length: 23
+effective length of query: 112
+effective length of database: 584
+effective search space:    65408
+effective search space used:    65408
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.7 bits)
+S2: 158 (66.0 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038982|dbj|BAB13017.1| integration host factor
+beta-subunit [Buchnera sp. APS]
+         (94 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.319    0.136    0.372
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 114
+Number of Sequences: 1
+Number of extensions: 6
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 94
+length of database: 607
+effective HSP length: 21
+effective length of query: 73
+effective length of database: 586
+effective search space:    42778
+effective search space used:    42778
+T: 11
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.7 bits)
+S2: 157 (65.6 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10038996|dbj|BAB13030.1| cold shock-like protein cspC
+[Buchnera sp. APS]
+         (69 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.316    0.136    0.399
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 100
+Number of Sequences: 1
+Number of extensions: 5
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 69
+length of database: 607
+effective HSP length: 20
+effective length of query: 49
+effective length of database: 587
+effective search space:    28763
+effective search space used:    28763
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.6 bits)
+S2: 155 (64.8 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10039073|dbj|BAB13107.1| carbon storage regulator [Buchnera
+sp. APS]
+         (57 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.318    0.138    0.362
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 64
+Number of Sequences: 1
+Number of extensions: 2
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 57
+length of database: 607
+effective HSP length: 22
+effective length of query: 35
+effective length of database: 585
+effective search space:    20475
+effective search space used:    20475
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.7 bits)
+S2: 154 (64.4 bits)
+BLASTP 2.1.2 [Oct-19-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= gi|10039151|dbj|BAB13185.1| cold shock-like protein cspE
+[Buchnera sp. APS]
+         (69 letters)
+
+Database: mycge
+           1 sequences; 607 total letters
+
+Searchingdone
+
+ ***** No hits found ******
+
+  Database: mycge
+    Posted date:  May 8, 2001  3:12 PM
+  Number of letters in database: 607
+  Number of sequences in database:  1
+
+Lambda     K      H
+   0.313    0.132    0.375
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 80
+Number of Sequences: 1
+Number of extensions: 2
+Number of successful extensions: 0
+Number of sequences better than 1.0e-15: 0
+Number of HSP's better than  0.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 69
+length of database: 607
+effective HSP length: 21
+effective length of query: 48
+effective length of database: 586
+effective search space:    28128
+effective search space used:    28128
+T: 11
+A: 40
+X1: 16 ( 7.2 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 42 (21.9 bits)
+S2: 155 (64.8 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mus.bls.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mus.bls.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mus.bls.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,660 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastn</BlastOutput_program>
+  <BlastOutput_version>blastn 2.2.6 [Apr-09-2003]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>Hs15_up1000</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>NM_011441_up_1000_chr1_4505586_r chr1:4505586-4506585</BlastOutput_query-def>
+  <BlastOutput_query-len>1000</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_expect>10</Parameters_expect>
+      <Parameters_sc-match>1</Parameters_sc-match>
+      <Parameters_sc-mismatch>-3</Parameters_sc-mismatch>
+      <Parameters_gap-open>5</Parameters_gap-open>
+      <Parameters_gap-extend>2</Parameters_gap-extend>
+      <Parameters_filter>D</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|3153</Hit_id>
+          <Hit_def>NM_001938_up_1000_chr1_93161154_f chr1:93161154-93162153</Hit_def>
+          <Hit_accession>3153</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>36.1753</Hsp_bit-score>
+              <Hsp_score>18</Hsp_score>
+              <Hsp_evalue>0.218116</Hsp_evalue>
+              <Hsp_query-from>881</Hsp_query-from>
+              <Hsp_query-to>860</Hsp_query-to>
+              <Hsp_hit-from>881</Hsp_hit-from>
+              <Hsp_hit-to>902</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>21</Hsp_identity>
+              <Hsp_positive>21</Hsp_positive>
+              <Hsp_align-len>22</Hsp_align-len>
+              <Hsp_qseq>GGAGCGGCTTCCTGCAAGCCTT</Hsp_qseq>
+              <Hsp_hseq>GGAGCGGCTTCCTGCAAACCTT</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||| ||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>2</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|17120</Hit_id>
+          <Hit_def>NM_152652_up_1000_chr16_30403002_f chr16:30403002-30404001</Hit_def>
+          <Hit_accession>17120</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>817</Hsp_query-from>
+              <Hsp_query-to>833</Hsp_query-to>
+              <Hsp_hit-from>219</Hsp_hit-from>
+              <Hsp_hit-to>235</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>ATACCCACCCCCACCCC</Hsp_qseq>
+              <Hsp_hseq>ATACCCACCCCCACCCC</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>3</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|17093</Hit_id>
+          <Hit_def>NM_145239_up_1000_chr16_29821041_f chr16:29821041-29822040</Hit_def>
+          <Hit_accession>17093</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>440</Hsp_query-from>
+              <Hsp_query-to>424</Hsp_query-to>
+              <Hsp_hit-from>157</Hsp_hit-from>
+              <Hsp_hit-to>173</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>CTGTGGAGAGCAAGTTC</Hsp_qseq>
+              <Hsp_hseq>CTGTGGAGAGCAAGTTC</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>4</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|14075</Hit_id>
+          <Hit_def>NM_013442_up_1000_chr9_35093156_r chr9:35093156-35094155</Hit_def>
+          <Hit_accession>14075</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>158</Hsp_query-from>
+              <Hsp_query-to>142</Hsp_query-to>
+              <Hsp_hit-from>573</Hsp_hit-from>
+              <Hsp_hit-to>589</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>GGTCAGTCCCCAGTGGG</Hsp_qseq>
+              <Hsp_hseq>GGTCAGTCCCCAGTGGG</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>5</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|13691</Hit_id>
+          <Hit_def>NM_153360_up_1000_chr20_57728371_r chr20:57728371-57729370</Hit_def>
+          <Hit_accession>13691</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>372</Hsp_query-from>
+              <Hsp_query-to>388</Hsp_query-to>
+              <Hsp_hit-from>941</Hsp_hit-from>
+              <Hsp_hit-to>957</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>CGCCGGGGGCTGTCTCC</Hsp_qseq>
+              <Hsp_hseq>CGCCGGGGGCTGTCTCC</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>6</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12570</Hit_id>
+          <Hit_def>NM_018099_up_1000_chr12_29201400_f chr12:29201400-29202399</Hit_def>
+          <Hit_accession>12570</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>818</Hsp_query-from>
+              <Hsp_query-to>834</Hsp_query-to>
+              <Hsp_hit-from>261</Hsp_hit-from>
+              <Hsp_hit-to>277</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>TACCCACCCCCACCCCA</Hsp_qseq>
+              <Hsp_hseq>TACCCACCCCCACCCCA</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>7</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|10324</Hit_id>
+          <Hit_def>NM_002556_up_1000_chr11_59634501_r chr11:59634501-59635500</Hit_def>
+          <Hit_accession>10324</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>424</Hsp_query-from>
+              <Hsp_query-to>408</Hsp_query-to>
+              <Hsp_hit-from>978</Hsp_hit-from>
+              <Hsp_hit-to>994</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>CATGAGGGCGGAAAAGG</Hsp_qseq>
+              <Hsp_hseq>CATGAGGGCGGAAAAGG</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>8</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|7243</Hit_id>
+          <Hit_def>NM_003687_up_1000_chr5_131623250_f chr5:131623250-131624249</Hit_def>
+          <Hit_accession>7243</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>34.193</Hsp_bit-score>
+              <Hsp_score>17</Hsp_score>
+              <Hsp_evalue>0.861861</Hsp_evalue>
+              <Hsp_query-from>46</Hsp_query-from>
+              <Hsp_query-to>62</Hsp_query-to>
+              <Hsp_hit-from>338</Hsp_hit-from>
+              <Hsp_hit-to>354</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>17</Hsp_identity>
+              <Hsp_positive>17</Hsp_positive>
+              <Hsp_align-len>17</Hsp_align-len>
+              <Hsp_qseq>GGCCTGAATGTTCTGGG</Hsp_qseq>
+              <Hsp_hseq>GGCCTGAATGTTCTGGG</Hsp_hseq>
+              <Hsp_midline>|||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>9</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|13146</Hit_id>
+          <Hit_def>NM_002442_up_1000_chr12_120589812_r chr12:120589812-120590811</Hit_def>
+          <Hit_accession>13146</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>835</Hsp_query-from>
+              <Hsp_query-to>820</Hsp_query-to>
+              <Hsp_hit-from>414</Hsp_hit-from>
+              <Hsp_hit-to>429</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CTGGGGTGGGGGTGGG</Hsp_qseq>
+              <Hsp_hseq>CTGGGGTGGGGGTGGG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>10</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12936</Hit_id>
+          <Hit_def>NM_018057_up_1000_chr12_85239491_r chr12:85239491-85240490</Hit_def>
+          <Hit_accession>12936</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>265</Hsp_query-from>
+              <Hsp_query-to>280</Hsp_query-to>
+              <Hsp_hit-from>793</Hsp_hit-from>
+              <Hsp_hit-to>808</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>GGGAGGTGCGGAGCCC</Hsp_qseq>
+              <Hsp_hseq>GGGAGGTGCGGAGCCC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>11</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12754</Hit_id>
+          <Hit_def>NM_031157_up_1000_chr12_54390235_f chr12:54390235-54391234</Hit_def>
+          <Hit_accession>12754</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>296</Hsp_query-from>
+              <Hsp_query-to>311</Hsp_query-to>
+              <Hsp_hit-from>561</Hsp_hit-from>
+              <Hsp_hit-to>576</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>GTGTTCTCAGAGGCAG</Hsp_qseq>
+              <Hsp_hseq>GTGTTCTCAGAGGCAG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>12</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12753</Hit_id>
+          <Hit_def>NM_002136_up_1000_chr12_54390235_f chr12:54390235-54391234</Hit_def>
+          <Hit_accession>12753</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>296</Hsp_query-from>
+              <Hsp_query-to>311</Hsp_query-to>
+              <Hsp_hit-from>561</Hsp_hit-from>
+              <Hsp_hit-to>576</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>GTGTTCTCAGAGGCAG</Hsp_qseq>
+              <Hsp_hseq>GTGTTCTCAGAGGCAG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>13</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12752</Hit_id>
+          <Hit_def>NM_012117_up_1000_chr12_54390632_r chr12:54390632-54391631</Hit_def>
+          <Hit_accession>12752</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>311</Hsp_query-from>
+              <Hsp_query-to>296</Hsp_query-to>
+              <Hsp_hit-from>822</Hsp_hit-from>
+              <Hsp_hit-to>837</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CTGCCTCTGAGAACAC</Hsp_qseq>
+              <Hsp_hseq>CTGCCTCTGAGAACAC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>14</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12701</Hit_id>
+          <Hit_def>NM_058242_up_1000_chr12_52603721_r chr12:52603721-52604720</Hit_def>
+          <Hit_accession>12701</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>721</Hsp_query-from>
+              <Hsp_query-to>706</Hsp_query-to>
+              <Hsp_hit-from>390</Hsp_hit-from>
+              <Hsp_hit-to>405</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>ATCCCCAAAGCAGACC</Hsp_qseq>
+              <Hsp_hseq>ATCCCCAAAGCAGACC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>15</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12700</Hit_id>
+          <Hit_def>NM_005554_up_1000_chr12_52603768_r chr12:52603768-52604767</Hit_def>
+          <Hit_accession>12700</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>721</Hsp_query-from>
+              <Hsp_query-to>706</Hsp_query-to>
+              <Hsp_hit-from>437</Hsp_hit-from>
+              <Hsp_hit-to>452</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>ATCCCCAAAGCAGACC</Hsp_qseq>
+              <Hsp_hseq>ATCCCCAAAGCAGACC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>16</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|12343</Hit_id>
+          <Hit_def>NM_152441_up_1000_chr12_1582593_r chr12:1582593-1583592</Hit_def>
+          <Hit_accession>12343</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>270</Hsp_query-from>
+              <Hsp_query-to>285</Hsp_query-to>
+              <Hsp_hit-from>850</Hsp_hit-from>
+              <Hsp_hit-to>865</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>GTGCGGAGCCCGGGGC</Hsp_qseq>
+              <Hsp_hseq>GTGCGGAGCCCGGGGC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>17</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|10984</Hit_id>
+          <Hit_def>NM_173696_up_1000_chrX_8347800_r chrX:8347800-8348799</Hit_def>
+          <Hit_accession>10984</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>821</Hsp_query-from>
+              <Hsp_query-to>836</Hsp_query-to>
+              <Hsp_hit-from>51</Hsp_hit-from>
+              <Hsp_hit-to>66</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CCACCCCCACCCCAGT</Hsp_qseq>
+              <Hsp_hseq>CCACCCCCACCCCAGT</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>18</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|10496</Hit_id>
+          <Hit_def>NM_004561_up_1000_chr11_65810324_f chr11:65810324-65811323</Hit_def>
+          <Hit_accession>10496</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>835</Hsp_query-from>
+              <Hsp_query-to>820</Hsp_query-to>
+              <Hsp_hit-from>815</Hsp_hit-from>
+              <Hsp_hit-to>830</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CTGGGGTGGGGGTGGG</Hsp_qseq>
+              <Hsp_hseq>CTGGGGTGGGGGTGGG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>19</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|7650</Hit_id>
+          <Hit_def>NM_014123_up_1000_chr6_4801623_r chr6:4801623-4802622</Hit_def>
+          <Hit_accession>7650</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>323</Hsp_query-from>
+              <Hsp_query-to>308</Hsp_query-to>
+              <Hsp_hit-from>345</Hsp_hit-from>
+              <Hsp_hit-to>360</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>-1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>GTGCCTCAGAGTCTGC</Hsp_qseq>
+              <Hsp_hseq>GTGCCTCAGAGTCTGC</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>20</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|7303</Hit_id>
+          <Hit_def>NM_004730_up_1000_chr5_137909767_r chr5:137909767-137910766</Hit_def>
+          <Hit_accession>7303</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>733</Hsp_query-from>
+              <Hsp_query-to>752</Hsp_query-to>
+              <Hsp_hit-from>163</Hsp_hit-from>
+              <Hsp_hit-to>182</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>19</Hsp_identity>
+              <Hsp_positive>19</Hsp_positive>
+              <Hsp_align-len>20</Hsp_align-len>
+              <Hsp_qseq>AGTAGCCACCTAGAAATCCT</Hsp_qseq>
+              <Hsp_hseq>AGTAGCCACCTAAAAATCCT</Hsp_hseq>
+              <Hsp_midline>|||||||||||| |||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>21</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|6269</Hit_id>
+          <Hit_def>NM_005750_up_1000_chr4_5518610_f chr4:5518610-5519609</Hit_def>
+          <Hit_accession>6269</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>820</Hsp_query-from>
+              <Hsp_query-to>835</Hsp_query-to>
+              <Hsp_hit-from>277</Hsp_hit-from>
+              <Hsp_hit-to>292</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CCCACCCCCACCCCAG</Hsp_qseq>
+              <Hsp_hseq>CCCACCCCCACCCCAG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>22</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|5123</Hit_id>
+          <Hit_def>NM_022915_up_1000_chr2_224785153_f chr2:224785153-224786152</Hit_def>
+          <Hit_accession>5123</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>374</Hsp_query-from>
+              <Hsp_query-to>389</Hsp_query-to>
+              <Hsp_hit-from>934</Hsp_hit-from>
+              <Hsp_hit-to>949</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CCGGGGGCTGTCTCCG</Hsp_qseq>
+              <Hsp_hseq>CCGGGGGCTGTCTCCG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+        <Hit>
+          <Hit_num>23</Hit_num>
+          <Hit_id>gnl|BL_ORD_ID|3532</Hit_id>
+          <Hit_def>NM_021948_up_1000_chr1_153388826_f chr1:153388826-153389825</Hit_def>
+          <Hit_accession>3532</Hit_accession>
+          <Hit_len>1000</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>32.2106</Hsp_bit-score>
+              <Hsp_score>16</Hsp_score>
+              <Hsp_evalue>3.40554</Hsp_evalue>
+              <Hsp_query-from>820</Hsp_query-from>
+              <Hsp_query-to>835</Hsp_query-to>
+              <Hsp_hit-from>305</Hsp_hit-from>
+              <Hsp_hit-to>320</Hsp_hit-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>16</Hsp_identity>
+              <Hsp_positive>16</Hsp_positive>
+              <Hsp_align-len>16</Hsp_align-len>
+              <Hsp_qseq>CCCACCCCCACCCCAG</Hsp_qseq>
+              <Hsp_hseq>CCCACCCCCACCCCAG</Hsp_hseq>
+              <Hsp_midline>||||||||||||||||</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>17516</Statistics_db-num>
+          <Statistics_db-len>17516000</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>1.69255e+10</Statistics_eff-space>
+          <Statistics_kappa>0.710605</Statistics_kappa>
+          <Statistics_lambda>1.37407</Statistics_lambda>
+          <Statistics_entropy>1.30725</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mutations.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mutations.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mutations.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,350 @@
+ID           M20132:(362)c.+4G>A; E2K
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa>aaa; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E>K
+//
+ID           M20132:(362)c.+14T>A; L5X
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 14
+Feature        /upflank: ccaagctcaaggatggaagtgcagt
+Feature        /change: t>a
+Feature        /dnflank: agggctgggaagggtctaccctcgg
+Feature      RNA; 1
+Feature        /label: nonsense
+Feature        /proof: experimental
+Feature        /location: 14 (M20132::376)
+Feature        /upflank: ccaagctcaaggatggaagtgcagt
+Feature        /change: t>a
+Feature        /dnflank: agggctgggaagggtctaccctcgg
+Feature        /codon_table: 1
+Feature        /codon: tta>taa; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: truncation
+Feature        /proof: computed
+Feature        /location: 5
+Feature        /change: L>*
+//
+ID           M20132:(362)c.+4G>A; E2K
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa>aaa; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E>K
+//
+ID           M20132:(362)c.+100delATCCAG; I34del-2
+Feature      DNA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 100..105
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: atccag>
+Feature        /dnflank: aacccgggccccaggcacccagagg
+Feature        /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV
+Feature      RNA; 1
+Feature        /label: inframe, deletion
+Feature        /proof: experimental
+Feature        /location: 100..105 (M20132::462..467)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: atccag>
+Feature        /dnflank: aacccgggccccaggcacccagagg
+Feature        /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 34..35
+Feature        /change: IQ>
+//
+ID           M20132:(362)c.+101delT; I34delX172
+Feature      DNA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 101
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: t>
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I
+Feature      RNA; 1
+Feature        /label: frameshift, deletion
+Feature        /proof: experimental
+Feature        /location: 101 (M20132::463)
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: t>
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP
+Feature         GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR
+Feature         GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA*
+//
+ID           M20132:(362)c.+101insGGGCCC; I34ins+2
+Feature      DNA; 1
+Feature        /label: insertion
+Feature        /proof: computed
+Feature        /location: 100^101
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: >gggccc
+Feature        /dnflank: tccagaacccgggccccaggcaccc
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV,
+Feature         +SduI
+Feature      RNA; 1
+Feature        /label: inframe, insertion
+Feature        /proof: experimental
+Feature        /location: 100^101 (M20132::462^463)
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: >gggccc
+Feature        /dnflank: tccagaacccgggccccaggcaccc
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV,
+Feature         +SduI
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: insertion, complex
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>RAL
+//
+ID           M20132:(362)c.+100insG; I34ins81X
+Feature      DNA; 1
+Feature        /label: insertion
+Feature        /proof: computed
+Feature        /location: 99^100
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: >g
+Feature        /dnflank: atccagaacccgggccccaggcacc
+Feature        /re_site: +BamHI, +BinI, +NlaIV, +XhoII
+Feature      RNA; 1
+Feature        /label: frameshift, insertion
+Feature        /proof: experimental
+Feature        /location: 99^100 (M20132::461^462)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: >g
+Feature        /dnflank: atccagaacccgggccccaggcacc
+Feature        /re_site: +BamHI, +BinI, +NlaIV, +XhoII
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*
+//
+ID           M20132:(362)c.+100AT>GGGCCC; I34ins82X
+Feature      DNA; 1
+Feature        /label: complex
+Feature        /proof: computed
+Feature        /location: 100..101
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: at>gggccc
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI
+Feature      RNA; 1
+Feature        /label: frameshift, complex
+Feature        /proof: experimental
+Feature        /location: 100..101 (M20132::462..463)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: at>gggccc
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*
+//
+ID           M20132:(362+1)c.-1G>A
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: -1
+Feature        /upflank: ggtggaagattcagccaagctcaag
+Feature        /change: g>a
+Feature        /dnflank: atggaagtgcagttagggctgggaa
+Feature        /re_site: -BccI, -FokI, +Hpy178III
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: -1 (M20132::361)
+Feature        /upflank: ggtggaagattcagccaagctcaag
+Feature        /change: g>a
+Feature        /dnflank: atggaagtgcagttagggctgggaa
+Feature        /re_site: -BccI, -FokI, +Hpy178III
+Feature        /region: 5'UTR
+//
+ID           M20132:(362)c.+2766T>C
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 2766
+Feature        /upflank: tctatttccacacccagtgaagcat
+Feature        /change: t>c
+Feature        /dnflank: ggaaaccctatttccccaccccagc
+Feature        /re_site: +Hpy188I, +SfaNI, -XcmI
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: 2766 (M20132::3128)
+Feature        /upflank: tctatttccacacccagtgaagcat
+Feature        /change: t>c
+Feature        /dnflank: ggaaaccctatttccccaccccagc
+Feature        /re_site: +Hpy188I, +SfaNI, -XcmI
+Feature        /region: 3'UTR
+//
+ID           J02933:(521)g.+12165A>G
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: experimental
+Feature        /location: 12165 (J02933::12686)
+Feature        /upflank: cgcacacctgtggtgcctgccaccc
+Feature        /change: a>g
+Feature        /dnflank: ctgggttgcccatgattcatttttg
+Feature        /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I,
+Feature         -TspRI
+Feature        /region: 3'UTR; (+1027)
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: computed
+Feature        /location: 2428
+Feature        /upflank: cgcacacctgtggtgcctgccaccc
+Feature        /change: a>g
+Feature        /dnflank: ctgggttgcccatgattcatttttg
+Feature        /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I,
+Feature         -TspRI
+Feature        /region: 3'UTR; (-1)
+//
+ID           J02933:(521)g.+4G>T; V2F
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 4 (J02933::525)
+Feature        /upflank: gcagcactgcagagatttcatcatg
+Feature        /change: g>t
+Feature        /dnflank: tctcccaggccctcaggctcctctg
+Feature        /re_site: -BsmAI, -Eco31I
+Feature        /region: exon; 1 (+4)
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4
+Feature        /upflank: gcagcactgcagagatttcatcatg
+Feature        /change: g>t
+Feature        /dnflank: tctcccaggccctcaggctcctctg
+Feature        /re_site: -BsmAI, -Eco31I
+Feature        /codon_table: 1
+Feature        /codon: gtc>ttc; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, nonconservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: V>F
+//
+ID           J02933:(521)g.+1168G>T; D34Y
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 1168 (J02933::1689)
+Feature        /upflank: taaggcctcaggaggagaaacacgg
+Feature        /change: g>t
+Feature        /dnflank: acatgccgtggaagccggggcctca
+Feature        /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I,
+Feature         +Tsp4CI
+Feature        /region: exon; 1 (-29)
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 100
+Feature        /upflank: taaggcctcaggaggagaaacacgg
+Feature        /change: g>t
+Feature        /dnflank: acatgccgtggaagccggggcctca
+Feature        /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I,
+Feature         +Tsp4CI
+Feature        /codon_table: 1
+Feature        /codon: gac>tac; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, nonconservative
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: D>Y
+//
+ID           J02933:(521+1)g.-4C>G
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: -4 (J02933::518)
+Feature        /upflank: ggcaggggcagcactgcagagattt
+Feature        /change: c>g
+Feature        /dnflank: atcatggtctcccaggccctcaggc
+Feature        /re_site: +BclI, +DpnI, +MboI
+Feature        /region: 5'UTR; (-4)
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: -4
+Feature        /upflank: ggcaggggcagcactgcagagattt
+Feature        /change: c>g
+Feature        /dnflank: atcatggtctcccaggccctcaggc
+Feature        /re_site: +BclI, +DpnI, +MboI
+Feature        /region: 5'UTR; (+31)
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,350 @@
+ID           M20132:(362)c.+4G>A; E2K
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa>aaa; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E>K
+//
+ID           M20132:(362)c.+14T>A; L5X
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 14
+Feature        /upflank: ccaagctcaaggatggaagtgcagt
+Feature        /change: t>a
+Feature        /dnflank: agggctgggaagggtctaccctcgg
+Feature      RNA; 1
+Feature        /label: nonsense
+Feature        /proof: experimental
+Feature        /location: 14 (M20132::376)
+Feature        /upflank: ccaagctcaaggatggaagtgcagt
+Feature        /change: t>a
+Feature        /dnflank: agggctgggaagggtctaccctcgg
+Feature        /codon_table: 1
+Feature        /codon: tta>taa; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: truncation
+Feature        /proof: computed
+Feature        /location: 5
+Feature        /change: L>*
+//
+ID           M20132:(362)c.+4G>A; E2K
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g>a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa>aaa; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E>K
+//
+ID           M20132:(362)c.+100delATCCAG; I34del-2
+Feature      DNA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 100..105
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: atccag>
+Feature        /dnflank: aacccgggccccaggcacccagagg
+Feature        /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV
+Feature      RNA; 1
+Feature        /label: inframe, deletion
+Feature        /proof: experimental
+Feature        /location: 100..105 (M20132::462..467)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: atccag>
+Feature        /dnflank: aacccgggccccaggcacccagagg
+Feature        /re_site: -BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 34..35
+Feature        /change: IQ>
+//
+ID           M20132:(362)c.+101delT; I34delX172
+Feature      DNA; 1
+Feature        /label: deletion
+Feature        /proof: computed
+Feature        /location: 101
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: t>
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I
+Feature      RNA; 1
+Feature        /label: frameshift, deletion
+Feature        /proof: experimental
+Feature        /location: 101 (M20132::463)
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: t>
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: -BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAP
+Feature         GSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPAR
+Feature         GCRSSCQHLRTRMTQLPHPRCPCWAPLSPA*
+//
+ID           M20132:(362)c.+101insGGGCCC; I34ins+2
+Feature      DNA; 1
+Feature        /label: insertion
+Feature        /proof: computed
+Feature        /location: 100^101
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: >gggccc
+Feature        /dnflank: tccagaacccgggccccaggcaccc
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV,
+Feature         +SduI
+Feature      RNA; 1
+Feature        /label: inframe, insertion
+Feature        /proof: experimental
+Feature        /location: 100^101 (M20132::462^463)
+Feature        /upflank: ctgttccagagcgtgcgcgaagtga
+Feature        /change: >gggccc
+Feature        /dnflank: tccagaacccgggccccaggcaccc
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV,
+Feature         +SduI
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 2
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: insertion, complex
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>RAL
+//
+ID           M20132:(362)c.+100insG; I34ins81X
+Feature      DNA; 1
+Feature        /label: insertion
+Feature        /proof: computed
+Feature        /location: 99^100
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: >g
+Feature        /dnflank: atccagaacccgggccccaggcacc
+Feature        /re_site: +BamHI, +BinI, +NlaIV, +XhoII
+Feature      RNA; 1
+Feature        /label: frameshift, insertion
+Feature        /proof: experimental
+Feature        /location: 99^100 (M20132::461^462)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: >g
+Feature        /dnflank: atccagaacccgggccccaggcacc
+Feature        /re_site: +BamHI, +BinI, +NlaIV, +XhoII
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*
+//
+ID           M20132:(362)c.+100AT>GGGCCC; I34ins82X
+Feature      DNA; 1
+Feature        /label: complex
+Feature        /proof: computed
+Feature        /location: 100..101
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: at>gggccc
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI
+Feature      RNA; 1
+Feature        /label: frameshift, complex
+Feature        /proof: experimental
+Feature        /location: 100..101 (M20132::462..463)
+Feature        /upflank: tctgttccagagcgtgcgcgaagtg
+Feature        /change: at>gggccc
+Feature        /dnflank: ccagaacccgggccccaggcaccca
+Feature        /re_site: +ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI,
+Feature         +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI
+Feature        /codon_table: 1
+Feature        /codon: atc>-; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: out-of-frame translation, truncation
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: I>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*
+//
+ID           M20132:(362+1)c.-1G>A
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: -1
+Feature        /upflank: ggtggaagattcagccaagctcaag
+Feature        /change: g>a
+Feature        /dnflank: atggaagtgcagttagggctgggaa
+Feature        /re_site: -BccI, -FokI, +Hpy178III
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: -1 (M20132::361)
+Feature        /upflank: ggtggaagattcagccaagctcaag
+Feature        /change: g>a
+Feature        /dnflank: atggaagtgcagttagggctgggaa
+Feature        /re_site: -BccI, -FokI, +Hpy178III
+Feature        /region: 5'UTR
+//
+ID           M20132:(362)c.+2766T>C
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 2766
+Feature        /upflank: tctatttccacacccagtgaagcat
+Feature        /change: t>c
+Feature        /dnflank: ggaaaccctatttccccaccccagc
+Feature        /re_site: +Hpy188I, +SfaNI, -XcmI
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: 2766 (M20132::3128)
+Feature        /upflank: tctatttccacacccagtgaagcat
+Feature        /change: t>c
+Feature        /dnflank: ggaaaccctatttccccaccccagc
+Feature        /re_site: +Hpy188I, +SfaNI, -XcmI
+Feature        /region: 3'UTR
+//
+ID           J02933:(521)g.+12165A>G
+Feature      DNA; 1
+Feature        /label: point, transition
+Feature        /proof: experimental
+Feature        /location: 12165 (J02933::12686)
+Feature        /upflank: cgcacacctgtggtgcctgccaccc
+Feature        /change: a>g
+Feature        /dnflank: ctgggttgcccatgattcatttttg
+Feature        /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I,
+Feature         -TspRI
+Feature        /region: 3'UTR; (+1027)
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: computed
+Feature        /location: 2428
+Feature        /upflank: cgcacacctgtggtgcctgccaccc
+Feature        /change: a>g
+Feature        /dnflank: ctgggttgcccatgattcatttttg
+Feature        /re_site: +AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I,
+Feature         -TspRI
+Feature        /region: 3'UTR; (-1)
+//
+ID           J02933:(521)g.+4G>T; V2F
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 4 (J02933::525)
+Feature        /upflank: gcagcactgcagagatttcatcatg
+Feature        /change: g>t
+Feature        /dnflank: tctcccaggccctcaggctcctctg
+Feature        /re_site: -BsmAI, -Eco31I
+Feature        /region: exon; 1 (+4)
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4
+Feature        /upflank: gcagcactgcagagatttcatcatg
+Feature        /change: g>t
+Feature        /dnflank: tctcccaggccctcaggctcctctg
+Feature        /re_site: -BsmAI, -Eco31I
+Feature        /codon_table: 1
+Feature        /codon: gtc>ttc; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, nonconservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: V>F
+//
+ID           J02933:(521)g.+1168G>T; D34Y
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 1168 (J02933::1689)
+Feature        /upflank: taaggcctcaggaggagaaacacgg
+Feature        /change: g>t
+Feature        /dnflank: acatgccgtggaagccggggcctca
+Feature        /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I,
+Feature         +Tsp4CI
+Feature        /region: exon; 1 (-29)
+Feature      RNA; 1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 100
+Feature        /upflank: taaggcctcaggaggagaaacacgg
+Feature        /change: g>t
+Feature        /dnflank: acatgccgtggaagccggggcctca
+Feature        /re_site: -BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I,
+Feature         +Tsp4CI
+Feature        /codon_table: 1
+Feature        /codon: gac>tac; 1
+Feature        /region: coding
+Feature      AA; 1
+Feature        /label: substitution, nonconservative
+Feature        /proof: computed
+Feature        /location: 34
+Feature        /change: D>Y
+//
+ID           J02933:(521+1)g.-4C>G
+Feature      DNA; 1
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: -4 (J02933::518)
+Feature        /upflank: ggcaggggcagcactgcagagattt
+Feature        /change: c>g
+Feature        /dnflank: atcatggtctcccaggccctcaggc
+Feature        /re_site: +BclI, +DpnI, +MboI
+Feature        /region: 5'UTR; (-4)
+Feature      RNA; 1
+Feature        /label: unknown
+Feature        /proof: experimental
+Feature        /location: -4
+Feature        /upflank: ggcaggggcagcactgcagagattt
+Feature        /change: c>g
+Feature        /dnflank: atcatggtctcccaggccctcaggc
+Feature        /re_site: +BclI, +DpnI, +MboI
+Feature        /region: 5'UTR; (+31)
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mutations.old.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,402 @@
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+4G&gt;A" trivname="E2K">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+14T&gt;A" trivname="L5X">
+    <DNA number="1" start="14" end="14" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>ccaagctcaaggatggaagtgcagt</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>agggctgggaagggtctaccctcgg</dnFlank>
+    </DNA>
+    <RNA number="1" start="14" end="14" length="1" isMutation="1">
+        <label>nonsense</label>
+        <proof>experimental</proof>
+        <upFlank>ccaagctcaaggatggaagtgcagt</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>agggctgggaagggtctaccctcgg</dnFlank>
+        <codon codon_ori="tta" codon_mut="taa" codon_pos="2"></codon>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="5" end="5" length="1" isMutation="1">
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>L</allele_ori>
+        <allele_mut>*</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+4G&gt;A" trivname="E2K">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100delATCCAG" trivname="I34del-2">
+    <DNA number="1" start="100" end="105" length="6" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>atccag</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>aacccgggccccaggcacccagagg</dnFlank>
+        <restriction_changes>-BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="105" length="6" isMutation="1">
+        <label>inframe</label>
+        <label>deletion</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>atccag</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>aacccgggccccaggcacccagagg</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>-BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="35" length="2" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <allele_ori>IQ</allele_ori>
+        <allele_mut></allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+101delT" trivname="I34delX172">
+    <DNA number="1" start="101" end="101" length="1" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <restriction_changes>-BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I</restriction_changes>
+    </DNA>
+    <RNA number="1" start="101" end="101" length="1" isMutation="1">
+        <label>frameshift</label>
+        <label>deletion</label>
+        <proof>experimental</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <codon codon_ori="atc" codon_pos="2"></codon>
+        <restriction_changes>-BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA*</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+101insGGGCCC" trivname="I34ins+2">
+    <DNA number="1" start="101" end="101" length="0" isMutation="1">
+        <label>insertion</label>
+        <proof>computed</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>tccagaacccgggccccaggcaccc</dnFlank>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="101" end="101" length="0" isMutation="1">
+        <label>inframe</label>
+        <label>insertion</label>
+        <proof>experimental</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>tccagaacccgggccccaggcaccc</dnFlank>
+        <codon codon_ori="atc" codon_pos="2"></codon>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>insertion</label>
+        <label>complex</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>RAL</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100insG" trivname="I34ins81X">
+    <DNA number="1" start="100" end="100" length="0" isMutation="1">
+        <label>insertion</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atccagaacccgggccccaggcacc</dnFlank>
+        <restriction_changes>+BamHI, +BinI, +NlaIV, +XhoII</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="100" length="0" isMutation="1">
+        <label>frameshift</label>
+        <label>insertion</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atccagaacccgggccccaggcacc</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>+BamHI, +BinI, +NlaIV, +XhoII</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100AT&gt;GGGCCC" trivname="I34ins82X">
+    <DNA number="1" start="100" end="101" length="2" isMutation="1">
+        <label>complex</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>at</allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="101" length="2" isMutation="1">
+        <label>frameshift</label>
+        <label>complex</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>at</allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.-1G&gt;A">
+    <DNA number="1" start="-1" end="-1" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>ggtggaagattcagccaagctcaag</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>atggaagtgcagttagggctgggaa</dnFlank>
+        <restriction_changes>-BccI, -FokI, +Hpy178III</restriction_changes>
+    </DNA>
+    <RNA number="1" start="-1" end="-1" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>ggtggaagattcagccaagctcaag</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>atggaagtgcagttagggctgggaa</dnFlank>
+        <restriction_changes>-BccI, -FokI, +Hpy178III</restriction_changes>
+        <region>5'UTR</region>
+    </RNA>
+</seqDiff>
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+2766T&gt;C">
+    <DNA number="1" start="2766" end="2766" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>tctatttccacacccagtgaagcat</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>c</allele_mut>
+        <dnFlank>ggaaaccctatttccccaccccagc</dnFlank>
+        <restriction_changes>+Hpy188I, +SfaNI, -XcmI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="2766" end="2766" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>tctatttccacacccagtgaagcat</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>c</allele_mut>
+        <dnFlank>ggaaaccctatttccccaccccagc</dnFlank>
+        <restriction_changes>+Hpy188I, +SfaNI, -XcmI</restriction_changes>
+        <region>3'UTR</region>
+    </RNA>
+</seqDiff>
+
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+12165A&gt;G">
+    <DNA number="1" start="12165" end="12165" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>experimental</proof>
+        <upFlank>cgcacacctgtggtgcctgccaccc</upFlank>
+        <allele_ori>a</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>ctgggttgcccatgattcatttttg</dnFlank>
+        <restriction_changes>+AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI</restriction_changes>
+        <region dist="1027">3'UTR</region>
+    </DNA>
+    <RNA number="1" start="2428" end="2428" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>computed</proof>
+        <upFlank>cgcacacctgtggtgcctgccaccc</upFlank>
+        <allele_ori>a</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>ctgggttgcccatgattcatttttg</dnFlank>
+        <restriction_changes>+AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI</restriction_changes>
+        <region dist="-1">3'UTR</region>
+    </RNA>
+</seqDiff>
+
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+4G&gt;T" trivname="V2F">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gcagcactgcagagatttcatcatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>tctcccaggccctcaggctcctctg</dnFlank>
+        <restriction_changes>-BsmAI, -Eco31I</restriction_changes>
+        <region value="1" dist="4">exon</region>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gcagcactgcagagatttcatcatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>tctcccaggccctcaggctcctctg</dnFlank>
+        <codon codon_ori="gtc" codon_mut="ttc" codon_pos="1"></codon>
+        <restriction_changes>-BsmAI, -Eco31I</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>nonconservative</label>
+        <proof>computed</proof>
+        <allele_ori>V</allele_ori>
+        <allele_mut>F</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+1168G&gt;T" trivname="D34Y">
+    <DNA number="1" start="1168" end="1168" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>taaggcctcaggaggagaaacacgg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>acatgccgtggaagccggggcctca</dnFlank>
+        <restriction_changes>-BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI</restriction_changes>
+        <region value="1" dist="-29">exon</region>
+    </DNA>
+    <RNA number="1" start="100" end="100" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>taaggcctcaggaggagaaacacgg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>acatgccgtggaagccggggcctca</dnFlank>
+        <codon codon_ori="gac" codon_mut="tac" codon_pos="1"></codon>
+        <restriction_changes>-BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>nonconservative</label>
+        <proof>computed</proof>
+        <allele_ori>D</allele_ori>
+        <allele_mut>Y</allele_mut>
+    </AA>
+</seqDiff>
+
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.-4C&gt;G">
+    <DNA number="1" start="-4" end="-4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>ggcaggggcagcactgcagagattt</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atcatggtctcccaggccctcaggc</dnFlank>
+        <restriction_changes>+BclI, +DpnI, +MboI</restriction_changes>
+        <region dist="-4">5'UTR</region>
+    </DNA>
+    <RNA number="1" start="-4" end="-4" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>ggcaggggcagcactgcagagattt</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atcatggtctcccaggccctcaggc</dnFlank>
+        <restriction_changes>+BclI, +DpnI, +MboI</restriction_changes>
+        <region dist="31">5'UTR</region>
+    </RNA>
+</seqDiff>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/mutations.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/mutations.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/mutations.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,388 @@
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+4G&gt;A" trivname="E2K">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+14T&gt;A" trivname="L5X">
+    <DNA number="1" start="14" end="14" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>ccaagctcaaggatggaagtgcagt</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>agggctgggaagggtctaccctcgg</dnFlank>
+    </DNA>
+    <RNA number="1" start="14" end="14" length="1" isMutation="1">
+        <label>nonsense</label>
+        <proof>experimental</proof>
+        <upFlank>ccaagctcaaggatggaagtgcagt</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>agggctgggaagggtctaccctcgg</dnFlank>
+        <codon codon_ori="tta" codon_mut="taa" codon_pos="2"></codon>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="5" end="5" length="1" isMutation="1">
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>L</allele_ori>
+        <allele_mut>*</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+4G&gt;A" trivname="E2K">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100delATCCAG" trivname="I34del-2">
+    <DNA number="1" start="100" end="105" length="6" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>atccag</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>aacccgggccccaggcacccagagg</dnFlank>
+        <restriction_changes>-BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="105" length="6" isMutation="1">
+        <label>inframe</label>
+        <label>deletion</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>atccag</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>aacccgggccccaggcacccagagg</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>-BinI, -BsiYI, -DpnI, -Hpy178III, -MboI, +MjaIV</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="35" length="2" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <allele_ori>IQ</allele_ori>
+        <allele_mut></allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+101delT" trivname="I34delX172">
+    <DNA number="1" start="101" end="101" length="1" isMutation="1">
+        <label>deletion</label>
+        <proof>computed</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <restriction_changes>-BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I</restriction_changes>
+    </DNA>
+    <RNA number="1" start="101" end="101" length="1" isMutation="1">
+        <label>frameshift</label>
+        <label>deletion</label>
+        <proof>experimental</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut></allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <codon codon_ori="atc" codon_pos="2"></codon>
+        <restriction_changes>-BinI, -DpnI, -Hpy178III, +MaeIII, -MboI, +Tsp45I</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>TRTRAPGTQRPRAQHLPAPVCCCCSSSSSSSSSSSSSSSSSSSSKRLAPGSSSSSRVRMVLPKPIVEAPQATWSWMRNSNLHSRSRPWSATPREVASQSLEPPWPPARGCRSSCQHLRTRMTQLPHPRCPCWAPLSPA*</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+101insGGGCCC" trivname="I34ins+2">
+    <DNA number="1" start="101" end="101" length="0" isMutation="1">
+        <label>insertion</label>
+        <proof>computed</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>tccagaacccgggccccaggcaccc</dnFlank>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="101" end="101" length="0" isMutation="1">
+        <label>inframe</label>
+        <label>insertion</label>
+        <proof>experimental</proof>
+        <upFlank>ctgttccagagcgtgcgcgaagtga</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>tccagaacccgggccccaggcaccc</dnFlank>
+        <codon codon_ori="atc" codon_pos="2"></codon>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +GsuI, +HaeIII, +HgiJII, -MboI, +MnlI, +NlaIV, +SduI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>insertion</label>
+        <label>complex</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>RAL</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100insG" trivname="I34ins81X">
+    <DNA number="1" start="100" end="100" length="0" isMutation="1">
+        <label>insertion</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atccagaacccgggccccaggcacc</dnFlank>
+        <restriction_changes>+BamHI, +BinI, +NlaIV, +XhoII</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="100" length="0" isMutation="1">
+        <label>frameshift</label>
+        <label>insertion</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori></allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atccagaacccgggccccaggcacc</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>+BamHI, +BinI, +NlaIV, +XhoII</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>DPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+100AT&gt;GGGCCC" trivname="I34ins82X">
+    <DNA number="1" start="100" end="101" length="2" isMutation="1">
+        <label>complex</label>
+        <proof>computed</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>at</allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="100" end="101" length="2" isMutation="1">
+        <label>frameshift</label>
+        <label>complex</label>
+        <proof>experimental</proof>
+        <upFlank>tctgttccagagcgtgcgcgaagtg</upFlank>
+        <allele_ori>at</allele_ori>
+        <allele_mut>gggccc</allele_mut>
+        <dnFlank>ccagaacccgggccccaggcaccca</dnFlank>
+        <codon codon_ori="atc" codon_pos="1"></codon>
+        <restriction_changes>+ApaI, +AsuI, -BinI, +BmgI, +BseSI, +CviJI, -DpnI, +DraII, +HaeIII, +HgiJII, -Hpy178III, -MboI, +NlaIV, +SduI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>out-of-frame translation</label>
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>I</allele_ori>
+        <allele_mut>GPPEPGPQAPRGRERSTSRRQFAAAAAAAAAAAAAAAAAAAAAAAARD*</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.-1G&gt;A">
+    <DNA number="1" start="-1" end="-1" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>ggtggaagattcagccaagctcaag</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>atggaagtgcagttagggctgggaa</dnFlank>
+        <restriction_changes>-BccI, -FokI, +Hpy178III</restriction_changes>
+    </DNA>
+    <RNA number="1" start="-1" end="-1" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>ggtggaagattcagccaagctcaag</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>atggaagtgcagttagggctgggaa</dnFlank>
+        <restriction_changes>-BccI, -FokI, +Hpy178III</restriction_changes>
+        <region>5'UTR</region>
+    </RNA>
+</seqDiff>
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="c.+2766T&gt;C">
+    <DNA number="1" start="2766" end="2766" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>tctatttccacacccagtgaagcat</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>c</allele_mut>
+        <dnFlank>ggaaaccctatttccccaccccagc</dnFlank>
+        <restriction_changes>+Hpy188I, +SfaNI, -XcmI</restriction_changes>
+    </DNA>
+    <RNA number="1" start="2766" end="2766" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>tctatttccacacccagtgaagcat</upFlank>
+        <allele_ori>t</allele_ori>
+        <allele_mut>c</allele_mut>
+        <dnFlank>ggaaaccctatttccccaccccagc</dnFlank>
+        <restriction_changes>+Hpy188I, +SfaNI, -XcmI</restriction_changes>
+        <region>3'UTR</region>
+    </RNA>
+</seqDiff>
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+12165A&gt;G">
+    <DNA number="1" start="12165" end="12165" length="1" isMutation="1">
+        <label>point</label>
+        <label>transition</label>
+        <proof>experimental</proof>
+        <upFlank>cgcacacctgtggtgcctgccaccc</upFlank>
+        <allele_ori>a</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>ctgggttgcccatgattcatttttg</dnFlank>
+        <restriction_changes>+AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI</restriction_changes>
+        <region dist="1027">3'UTR</region>
+    </DNA>
+    <RNA number="1" start="2428" end="2428" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>computed</proof>
+        <upFlank>cgcacacctgtggtgcctgccaccc</upFlank>
+        <allele_ori>a</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>ctgggttgcccatgattcatttttg</dnFlank>
+        <restriction_changes>+AciI, -BfiI, -BsrI, +FauI, +NspBII, +Sth132I, -TspRI</restriction_changes>
+        <region dist="-1">3'UTR</region>
+    </RNA>
+</seqDiff>
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+4G&gt;T" trivname="V2F">
+    <DNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gcagcactgcagagatttcatcatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>tctcccaggccctcaggctcctctg</dnFlank>
+        <restriction_changes>-BsmAI, -Eco31I</restriction_changes>
+        <region value="1" dist="4">exon</region>
+    </DNA>
+    <RNA number="1" start="4" end="4" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gcagcactgcagagatttcatcatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>tctcccaggccctcaggctcctctg</dnFlank>
+        <codon codon_ori="gtc" codon_mut="ttc" codon_pos="1"></codon>
+        <restriction_changes>-BsmAI, -Eco31I</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="2" end="2" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>nonconservative</label>
+        <proof>computed</proof>
+        <allele_ori>V</allele_ori>
+        <allele_mut>F</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.+1168G&gt;T" trivname="D34Y">
+    <DNA number="1" start="1168" end="1168" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>taaggcctcaggaggagaaacacgg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>acatgccgtggaagccggggcctca</dnFlank>
+        <restriction_changes>-BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI</restriction_changes>
+        <region value="1" dist="-29">exon</region>
+    </DNA>
+    <RNA number="1" start="100" end="100" length="1" isMutation="1">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>taaggcctcaggaggagaaacacgg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>acatgccgtggaagccggggcctca</dnFlank>
+        <codon codon_ori="gac" codon_mut="tac" codon_pos="1"></codon>
+        <restriction_changes>-BscGI, -Bsp24I, -CjePI, -FinI, +RsaI, -Sth132I, +Tsp4CI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1" start="34" end="34" length="1" isMutation="1">
+        <label>substitution</label>
+        <label>nonconservative</label>
+        <proof>computed</proof>
+        <allele_ori>D</allele_ori>
+        <allele_mut>Y</allele_mut>
+    </AA>
+</seqDiff>
+<seqDiff id="J02933" moltype="dna" offset="521" sysname="g.-4C&gt;G">
+    <DNA number="1" start="-4" end="-4" length="1" isMutation="1">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>ggcaggggcagcactgcagagattt</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atcatggtctcccaggccctcaggc</dnFlank>
+        <restriction_changes>+BclI, +DpnI, +MboI</restriction_changes>
+        <region dist="-4">5'UTR</region>
+    </DNA>
+    <RNA number="1" start="-4" end="-4" length="1" isMutation="1">
+        <label>unknown</label>
+        <proof>experimental</proof>
+        <upFlank>ggcaggggcagcactgcagagattt</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>g</allele_mut>
+        <dnFlank>atcatggtctcccaggccctcaggc</dnFlank>
+        <restriction_changes>+BclI, +DpnI, +MboI</restriction_changes>
+        <region dist="31">5'UTR</region>
+    </RNA>
+</seqDiff>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/myco_sites.gff
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/myco_sites.gff	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/myco_sites.gff	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,412 @@
+L43967	TFBS	TF binding site	845	850	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	2294	2299	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	4869	4874	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	5114	5119	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	5381	5386	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	6116	6121	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	8532	8537	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	9753	9758	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	11057	11062	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	11778	11783	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	12031	12036	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	13228	13233	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	14076	14081	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	16100	16105	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	16856	16861	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	17811	17816	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	18498	18503	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	22239	22244	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	22668	22673	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	25371	25376	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	27174	27179	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	28699	28704	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	31536	31541	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	32280	32285	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	34908	34913	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	35342	35347	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	36013	36018	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	36249	36254	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	37296	37301	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	40336	40341	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	41696	41701	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	42937	42942	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	43383	43388	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	43663	43668	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	44794	44799	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	45682	45687	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	45689	45694	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	45933	45938	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	45961	45966	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	49079	49084	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	50046	50051	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	50673	50678	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	54403	54408	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	56322	56327	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	56604	56609	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	57762	57767	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	62005	62010	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	64485	64490	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	67266	67271	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	67778	67783	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	68301	68306	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	68783	68788	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	70794	70799	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	70850	70855	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	71267	71272	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	76218	76223	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	76491	76496	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	76755	76760	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	77661	77666	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	78567	78572	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	80048	80053	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	85510	85515	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	89992	89997	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	92257	92262	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	96234	96239	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	96297	96302	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	97614	97619	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	99414	99419	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	99819	99824	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	105921	105926	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	107636	107641	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	110308	110313	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	111015	111020	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	111073	111078	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	111804	111809	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	113602	113607	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	114253	114258	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	114809	114814	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	115046	115051	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	116069	116074	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	119248	119253	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	119816	119821	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	121917	121922	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	123025	123030	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	123270	123275	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	123923	123928	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	124335	124340	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	128198	128203	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	129032	129037	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	129236	129241	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	131425	131430	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	132247	132252	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	132405	132410	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	133735	133740	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	138310	138315	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	139126	139131	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	139442	139447	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	141227	141232	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	141243	141248	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	143687	143692	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	145015	145020	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	147561	147566	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	147913	147918	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	149139	149144	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	149664	149669	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	153814	153819	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	157201	157206	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	157667	157672	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	158641	158646	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	162903	162908	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	163175	163180	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	163819	163824	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	164743	164748	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	167447	167452	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	169414	169419	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	169421	169426	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	169751	169756	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	170006	170011	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	170998	171003	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	171452	171457	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	172732	172737	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	179757	179762	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	180069	180074	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	182094	182099	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	183142	183147	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	183269	183274	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	186054	186059	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	186931	186936	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	189325	189330	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	192962	192967	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	194690	194695	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	195004	195009	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	195562	195567	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	195724	195729	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	196909	196914	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	197612	197617	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	198208	198213	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	198704	198709	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	201352	201357	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	201957	201962	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	202165	202170	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	202841	202846	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	204241	204246	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	211108	211113	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	211423	211428	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	213816	213821	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	215780	215785	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	216829	216834	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	218083	218088	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	218198	218203	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	218893	218898	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	219001	219006	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	219782	219787	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	223559	223564	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	225993	225998	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	227069	227074	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	227076	227081	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	227391	227396	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	228047	228052	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	231632	231637	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	231899	231904	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	232782	232787	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	232802	232807	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	233418	233423	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	234390	234395	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	235811	235816	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	235889	235894	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	237044	237049	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	237126	237131	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	238244	238249	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	238591	238596	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	239219	239224	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	239284	239289	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	241552	241557	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	243029	243034	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	243055	243060	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	243412	243417	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	244800	244805	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	245364	245369	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	247846	247851	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	250269	250274	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	255006	255011	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	256084	256089	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	256757	256762	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	260434	260439	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	262015	262020	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	262765	262770	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	265170	265175	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	267453	267458	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	270147	270152	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	272537	272542	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	272756	272761	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	273921	273926	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	275355	275360	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	275431	275436	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	277976	277981	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	280297	280302	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	282577	282582	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	283777	283782	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	284089	284094	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	284594	284599	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	285254	285259	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	288191	288196	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	289168	289173	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	289365	289370	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	290063	290068	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	290312	290317	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	290933	290938	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	291190	291195	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	297466	297471	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	302074	302079	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	302262	302267	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	302827	302832	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	304031	304036	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	307621	307626	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	307741	307746	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	307935	307940	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	309939	309944	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	311141	311146	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	316583	316588	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	317137	317142	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	318511	318516	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	320015	320020	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	321001	321006	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	321214	321219	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	322838	322843	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	323180	323185	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	323197	323202	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	324701	324706	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	324863	324868	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	326911	326916	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	327902	327907	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	327971	327976	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	328658	328663	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	328724	328729	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	332208	332213	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	333136	333141	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	334039	334044	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	335067	335072	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	337948	337953	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	337957	337962	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	338991	338996	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	339252	339257	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	340352	340357	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	342514	342519	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	346001	346006	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	346641	346646	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	347444	347449	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	347860	347865	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	347891	347896	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	348013	348018	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	349913	349918	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	351391	351396	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	351398	351403	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	351713	351718	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	352043	352048	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	352140	352145	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	355393	355398	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	357131	357136	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	360824	360829	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	364016	364021	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	364142	364147	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	365403	365408	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	372018	372023	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	373754	373759	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	374344	374349	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	375437	375442	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	376138	376143	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	377142	377147	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	382337	382342	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	384202	384207	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	384425	384430	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	385502	385507	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	386494	386499	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	387502	387507	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	388064	388069	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	388879	388884	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	389669	389674	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	390581	390586	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	390917	390922	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	393588	393593	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	394387	394392	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	394904	394909	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	395493	395498	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	399504	399509	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	399825	399830	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	402061	402066	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	404493	404498	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	405229	405234	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	406073	406078	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	406177	406182	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	406556	406561	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	408978	408983	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	409263	409268	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	409434	409439	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	414153	414158	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	414393	414398	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	414965	414970	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	417932	417937	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	418307	418312	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	418580	418585	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	418682	418687	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	419628	419633	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	421619	421624	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	424418	424423	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	427596	427601	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	428041	428046	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	428257	428262	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	429247	429252	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	429254	429259	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	431369	431374	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	431449	431454	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	431474	431479	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	431650	431655	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	434107	434112	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	434482	434487	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	434615	434620	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	436479	436484	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	437344	437349	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	437598	437603	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	439230	439235	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	439592	439597	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	440655	440660	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	442011	442016	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	442102	442107	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	442868	442873	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	446506	446511	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	452302	452307	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	453058	453063	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	453880	453885	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	454279	454284	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	456596	456601	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	458508	458513	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	463490	463495	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	464267	464272	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	467140	467145	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	467569	467574	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	468724	468729	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	468820	468825	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	471485	471490	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	471526	471531	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	471807	471812	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	472752	472757	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	472762	472767	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	472962	472967	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	474627	474632	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	476038	476043	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	476194	476199	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	477451	477456	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	479116	479121	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	479150	479155	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	480805	480810	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	488811	488816	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	489777	489782	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	492141	492146	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	494766	494771	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	496378	496383	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	497406	497411	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	499139	499144	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	503505	503510	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	504674	504679	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	506112	506117	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	506989	506994	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	509237	509242	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	510134	510139	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	511768	511773	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	512727	512732	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	513329	513334	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	513462	513467	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	515629	515634	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	516164	516169	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	517465	517470	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	519394	519399	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	523635	523640	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	524706	524711	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	525703	525708	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	526026	526031	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	527753	527758	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	531141	531146	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	533969	533974	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	535504	535509	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	535933	535938	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	537846	537851	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	539424	539429	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	544688	544693	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	545686	545691	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	546394	546399	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	547730	547735	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	548041	548046	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	551431	551436	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	553155	553160	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	553709	553714	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	555572	555577	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	556008	556013	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	556135	556140	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	556897	556902	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	558828	558833	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	559903	559908	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	560315	560320	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	560742	560747	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	563254	563259	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	565681	565686	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	566779	566784	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	567667	567672	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	567940	567945	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	568458	568463	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	569632	569637	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	575394	575399	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	578596	578601	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	579055	579060	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	579079	579084	7.932	+	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  
+L43967	TFBS	TF binding site	579845	579850	7.932	-	0	TF "-35 Consensus"   ; class unknown  ; sequence TTGACA  

Added: trunk/packages/bioperl/branches/upstream/current/t/data/nei_gojobori_test.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/nei_gojobori_test.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/nei_gojobori_test.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7 @@
+>seq1 
+ATGTATTCCTTCGCCGGCCCGGAGTATGACGAGGAGACAGGGAAAGTGAAGGCCCACTCACAGACTGACCGAGAGAACATCGCGCTCTACCTCATGTTTCACTACACCCGCAAGTGGGCGGCCCGTGTGGAGCAGTTGAGAGCCTACGAGGGCACGGTGGACGGGCGCTAC
+>seq2
+ATGTATTACTTCGCCGGCCCGGAGTATGACCGGAACACACGGAATGTGAAGGCCCACTCACAGACTGACCGAGAGAGCATCGCGCTCTACATCAGGTATCAGGACACCCGCAAGTGGACGGCCCATGAGGAGCAGTGGAGAGCCTACGAGGGCCGGGTGGAGTGGCGCTAC
+>seq3 
+ATGTATACCTTCGCCGGCCCGGAGTATGACCGGAACACACGGAATGTGAAGGCCCACTCACAGATTGACCGAGTGGACACCCTGCGCTACATCATGTATCAGGACACCCGCAAGTGGGCGGCCCGTGTGGAGCAGTTGAGAGCCTACGAGGGCACGGTGGAGTGGCGCTAC
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/neighbor.dist
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/neighbor.dist	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/neighbor.dist	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4 @@
+    3
+SINFRUP001     0.00000  0.16316  0.28603
+SINFRUP002     0.16316  0.00000  0.27995
+ENSP000002     0.28603  0.27995  0.00000

Added: trunk/packages/bioperl/branches/upstream/current/t/data/new_blastn.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/new_blastn.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/new_blastn.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,134 @@
+BLASTN 2.2.13 [Nov-27-2005]
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman 
+(1997), "Gapped BLAST and PSI-BLAST: a new generation of 
+protein database search programs", Nucleic Acids Res. 25:3389-3402.
+
+RID: 1141079027-8324-8848328247.BLASTQ4
+
+
+Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS,
+GSS,environmental samples or phase 0, 1 or 2 HTGS sequences)
+           3,742,891 sequences; 16,670,205,594 total letters
+Query=  pyrR, 558 bases, FB21E40C checksum.
+Length=558
+
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (Bits)  Value
+
+gi|41400296|gb|AE016958.1|  Mycobacterium avium subsp. paratub...   236    6e-59
+gi|54013472|dbj|AP006618.1|  Nocardia farcinica IFM 10152 DNA, co   127    4e-26
+gi|57546753|dbj|BA000030.2|  Streptomyces avermitilis MA-4680 gen   119    1e-23
+
+ALIGNMENTS
+>gi|41400296|gb|AE016958.1| Mycobacterium avium subsp. paratuberculosis str. k10, complete 
+genome
+Length=4829781
+
+ Features in this part of subject sequence:
+   PyrR
+
+ Score =  236 bits (119),  Expect = 6e-59
+ Identities = 248/291 (85%), Gaps = 0/291 (0%)
+ Strand=Plus/Plus
+
+Query  262      CCGCCGCGTCCGCTGGAGGCCACCTCCATTCCCGCGGGCGGTGTCGACGACGCGATCGTC  321
+                |||||||| ||||||||||||||||| || || || |||||  ||||||||||| | || 
+Sbjct  1166897  CCGCCGCGGCCGCTGGAGGCCACCTCGATCCCGGCCGGCGGCATCGACGACGCGCTGGTG  1166956
+
+Query  322      ATCCTCGTCGACGACGTCCTGTACTCGGGCCGCTCGGTCCGCTCGGCGCTGGACGCGCTG  381
+                ||||| ||||||||||| || ||||| ||||||||||| |||||||| ||||||||||||
+Sbjct  1166957  ATCCTGGTCGACGACGTGCTCTACTCCGGCCGCTCGGTGCGCTCGGCACTGGACGCGCTG  1167016
+
+Query  382      CGCGACATCGGTCGCCCCCGCATCGTGCAGCTGGCGGTGCTGGTCGACCGCGGGCACCGC  441
+                |||||| | || ||||| ||  | ||||| ||||||||||||||||||||||| ||||||
+Sbjct  1167017  CGCGACGTGGGCCGCCCGCGGGTGGTGCAACTGGCGGTGCTGGTCGACCGCGGTCACCGC  1167076
+
+Query  442      GAGCTGCCCATCCGGGCCGACTACGTGGGCAAGAACGTCCCGACCTCACGCAGCGAAAGC  501
+                ||||||||  | || ||||||||||| |||||||||||||| ||||| ||||| || |||
+Sbjct  1167077  GAGCTGCCGCTGCGCGCCGACTACGTCGGCAAGAACGTCCCCACCTCGCGCAGTGAGAGC  1167136
+
+Query  502      GTCCACGTGCTGCTCAGCGAACACGACGACCGCGACGGAGTGGTGATCTCG  552
+                || |||||||||||   ||| ||||||| |  |||||| ||||||||||||
+Sbjct  1167137  GTGCACGTGCTGCTGGCCGAGCACGACGGCGCCGACGGGGTGGTGATCTCG  1167187
+
+
+>gi|54013472|dbj|AP006618.1| Nocardia farcinica IFM 10152 DNA, complete genome
+Length=6021225
+
+ Features in this part of subject sequence:
+   putative pyrimidine operon regulator
+
+ Score =  127 bits (64),  Expect = 4e-26
+ Identities = 85/92 (92%), Gaps = 0/92 (0%)
+ Strand=Plus/Minus
+
+Query  406      GTGCAGCTGGCGGTGCTGGTCGACCGCGGGCACCGCGAGCTGCCCATCCGGGCCGACTAC  465
+                ||||||||||| ||||||||||||||||| |||||||||||||| ||||| |||||||||
+Sbjct  3837536  GTGCAGCTGGCCGTGCTGGTCGACCGCGGCCACCGCGAGCTGCCGATCCGCGCCGACTAC  3837477
+
+Query  466      GTGGGCAAGAACGTCCCGACCTCACGCAGCGA  497
+                |||||||||||||| || ||||| ||||||||
+Sbjct  3837476  GTGGGCAAGAACGTGCCCACCTCCCGCAGCGA  3837445
+
+
+>gi|57546753|dbj|BA000030.2| Streptomyces avermitilis MA-4680 genomic DNA, complete genome
+Length=9025608
+
+ Features in this part of subject sequence:
+   putative pyrimidine operon regulatory protein
+
+ Score =  119 bits (60),  Expect = 1e-23
+ Identities = 135/160 (84%), Gaps = 0/160 (0%)
+ Strand=Plus/Plus
+
+Query  323      TCCTCGTCGACGACGTCCTGTACTCGGGCCGCTCGGTCCGCTCGGCGCTGGACGCGCTGC  382
+                ||||||||||||||||||| | ||| |||||| |  ||||| | || || ||||||||| 
+Sbjct  8189890  TCCTCGTCGACGACGTCCTCTTCTCCGGCCGCACCATCCGCGCCGCCCTCGACGCGCTGA  8189949
+
+Query  383      GCGACATCGGTCGCCCCCGCATCGTGCAGCTGGCGGTGCTGGTCGACCGCGGGCACCGCG  442
+                 ||||||||| ||||| |||  ||| ||||| ||||| || ||||||||||| |||||||
+Sbjct  8189950  ACGACATCGGCCGCCCGCGCGCCGTACAGCTCGCGGTCCTCGTCGACCGCGGTCACCGCG  8190009
+
+Query  443      AGCTGCCCATCCGGGCCGACTACGTGGGCAAGAACGTCCC  482
+                | ||||||||||| ||||||||||| ||||||||| ||||
+Sbjct  8190010  AACTGCCCATCCGCGCCGACTACGTCGGCAAGAACCTCCC  8190049
+
+
+
+  Database: All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,environmental 
+samples or phase 0, 1 or 2 HTGS sequences)
+    Posted date:  Feb 23, 2006  9:00 AM
+  Number of letters in database: -509,663,586
+  Number of sequences in database:  3,742,891
+Lambda     K      H
+    1.37    0.711     1.31 
+Gapped
+Lambda     K      H
+    1.37    0.711     1.31 
+Matrix: blastn matrix:1 -3
+Gap Penalties: Existence: 5, Extension: 2
+Number of Sequences: 3742891
+Number of Hits to DB: 2465616
+Number of extensions: 117843
+Number of successful extensions: 1771
+Number of sequences better than 1e-23: 0
+Number of HSP's better than 1e-23 without gapping: 0
+Number of HSP's gapped: 1771
+Number of HSP's successfully gapped: 0
+Length of query: 558
+Length of database: 16670205594
+Length adjustment: 22
+Effective length of query: 536
+Effective length of database: 16670205594
+Effective search space: 8935230198384
+Effective search space used: 8891094027712
+A: 0
+X1: 11 (21.8 bits)
+X2: 15 (29.7 bits)
+X3: 25 (49.6 bits)
+S1: 14 (28.2 bits)
+S2: 60 (119.4 bits)
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/newformat.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/newformat.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/newformat.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+ID   GCDH_CAEEL     STANDARD;      PRT;   409 AA.
+AC   Q20772;
+DT   01-NOV-1997, integrated into UniProtKB/Swiss-Prot.
+DT   01-NOV-1996, sequence version 1.
+DT   30-MAY-2006, entry version 44.
+DE   Probable glutaryl-CoA dehydrogenase, mitochondrial precursor
+DE   (EC 1.3.99.7) (GCD).
+GN   ORFNames=F54D5.7;
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
+RC   STRAIN=Bristol N2;
+RX   MEDLINE=99069613; PubMed=9851916; DOI=10.1126/science.282.5396.2012;
+RG   The C. elegans sequencing consortium;
+RT   "Genome sequence of the nematode C. elegans: a platform for
+RT   investigating biology.";
+RL   Science 282:2012-2018(1998).
+CC   -!- CATALYTIC ACTIVITY: Glutaryl-CoA + acceptor = crotonoyl-CoA +
+CC       CO(2) + reduced acceptor.
+CC   -!- COFACTOR: FAD (By similarity).
+CC   -!- PATHWAY: Degradative pathway of L-lysine, L-hydroxylysine, and L-
+CC       tryptophan metabolism.
+CC   -!- INTERACTION:
+CC       P39745:mpk-1; NbExp=1; IntAct=EBI-313068, EBI-321013;
+CC       Q17446:pmk-1; NbExp=1; IntAct=EBI-313068, EBI-312987;
+CC   -!- SUBCELLULAR LOCATION: Mitochondrion; mitochondrial matrix
+CC       (Potential).
+CC   -!- SIMILARITY: Belongs to the acyl-CoA dehydrogenase family.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; Z66513; CAA91333.1; -; Genomic_DNA.
+DR   PIR; T22647; T22647.
+DR   UniGene; Cel.30446; -.
+DR   HSSP; Q06319; 1BUC.
+DR   IntAct; Q20772; -.
+DR   Ensembl; F54D5.7; Caenorhabditis elegans.
+DR   WormBase; WBGene00010052; F54D5.7.
+DR   WormPep; F54D5.7; CE03411.
+DR   GO; GO:0005515; F:protein binding; IPI.
+DR   InterPro; IPR006089; Acyl_CoA_DH.
+DR   InterPro; IPR006091; Acyl_CoA_DH/ox_M.
+DR   InterPro; IPR006090; Acyl_CoA_DH_1.
+DR   InterPro; IPR006092; Acyl_CoA_DH_N.
+DR   InterPro; IPR009075; AcylCo_DH/ox_C.
+DR   InterPro; IPR009100; AcylCoA_DH/ox_NM.
+DR   InterPro; IPR013764; AcylCoA_DH_1/2_C.
+DR   Pfam; PF00441; Acyl-CoA_dh_1; 1.
+DR   Pfam; PF02770; Acyl-CoA_dh_M; 1.
+DR   Pfam; PF02771; Acyl-CoA_dh_N; 1.
+DR   PROSITE; PS00072; ACYL_COA_DH_1; FALSE_NEG.
+DR   PROSITE; PS00073; ACYL_COA_DH_2; 1.
+KW   Complete proteome; FAD; Flavoprotein; Hypothetical protein;
+KW   Mitochondrion; Oxidoreductase; Transit peptide.
+FT   TRANSIT       1      ?       Mitochondrion (Potential).
+FT   CHAIN         ?    409       Probable glutaryl-CoA dehydrogenase.
+FT                                /FTId=PRO_0000000530.
+FT   ACT_SITE    388    388       Proton acceptor (Potential).
+SQ   SEQUENCE   409 AA;  44964 MW;  4D06241FB6768069 CRC64;
+     MLTRGFTSIG KIASRGLSST FYQDAFQLSD QLTEDERSLM LSAREYCQER LLPRVTEAYR
+     TEKFDPSLIP EMGSMGLLGA PYQGYGCAGT STVGYGLIAR EVERVDSGYR STMSVQTSLV
+     IGPIYNYGSE DQKQKYIPDL ASGKKIGCFG LTEPNHGSNP GGMETKATWD ETTKTYKLNG
+     SKTWISNSPV SDVMVVWARS ARHNNKIKGF ILERGMKGLT TPKIEGKLSL RASITGQIAM
+     DDVPVPEENL LPNAEGLQGP FGCLNNARLG IAWGALGAAE ECFHLARQYT LDRQQFGRPL
+     AQNQLMQLKM ADMLTEISLG LQGCLRVSRL KDEGKVQSEQ ISIIKRNSCG KALEVARKAR
+     DMLGGNGIVD EYHIMRHMVN LETVNTYEGT HDVHALILGR AITGLNGFC
+//
+ID   Q41V66_FERAC   PRELIMINARY;   PRT;   607 AA.
+AC   Q41V66;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   30-MAY-2006, entry version 5.
+DE   Glycoside hydrolase, family 15.
+GN   ORFNames=FaciDRAFT_1685;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000001; EAM94575.1; -; Genomic_DNA.
+DR   GO; GO:0004339; F:glucan 1,4-alpha-glucosidase activity; IEA.
+DR   GO; GO:0016787; F:hydrolase activity; IEA.
+DR   GO; GO:0005976; P:polysaccharide metabolism; IEA.
+DR   InterPro; IPR008928; 6hp_glycosidase.
+DR   InterPro; IPR011613; Glyco_hydro_15_rel.
+DR   InterPro; IPR012343; Glyco_trans_sub.
+DR   Pfam; PF00723; Glyco_hydro_15; 1.
+KW   Hydrolase.
+SQ   SEQUENCE   607 AA;  69495 MW;  8AC6297BA16ED500 CRC64;
+     MGTYRGLYDL HDAYRSDYLK IANHGFIANN RTAALVGIDG TIDWACLPNF NSNPVFDSIL
+     DARNGGYFKT SPVMESNVNQ YYEESTNILI TEFVNNNQVI LRLTDFLPTS SYSTITFPEI
+     HRLIEAPYSD VEVSIDIKSH FNFGSGKTNI TRDRNGYIFS CTDDTLGIST NLKLKKGNGN
+     VYSRIKVEKG SHEWIVVLSG VRQIGNVRQY ESYTRLEETR NYWSAWAGKI NYSGLYYDHV
+     IRSALTLRGL FYDPTGMMVA APTTSLPEII GGERNWDYRY TWIRDTAYVV EALSLIGLND
+     VATKFLYDIM SIVQKDKKVK TIYPVNGDSK LEEKKVNLSG YMDSIPVRIG NEASEQLQID
+     QYGSIVNAVF RFHEAGGLVT TYLWDFLIEI LDTLKDIWKL PDSSIWEFRS EPKHYLYSKL
+     ISWSAFNRAI KMGRELGYSA PYRTWHKIRE EIKNEIMEKG YNPDVKAFTQ YYGSDQMDAS
+     VLRMPLTGII SAKDPRFVST LARVEAELKN PCGMFIRYHS DDGLKGHDNA FLLLSFWYVE
+     DLILSGRIME AKETFENILD HSNHLMLFSE EINFNDCREM LGNFPQAITH LGVIRAAIKL
+     DEALRGK
+//
+ID   Q41US7_FERAC   PRELIMINARY;   PRT;   270 AA.
+AC   Q41US7;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   30-MAY-2006, entry version 4.
+DE   Potassium channel protein.
+GN   ORFNames=FaciDRAFT_1443;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000002; EAM94333.1; -; Genomic_DNA.
+DR   GO; GO:0016020; C:membrane; IEA.
+DR   GO; GO:0005216; F:ion channel activity; IEA.
+DR   GO; GO:0005267; F:potassium channel activity; IEA.
+DR   GO; GO:0006813; P:potassium ion transport; IEA.
+DR   InterPro; IPR013099; Ion_trans_2_bac.
+DR   InterPro; IPR001622; K+channel_pore.
+DR   InterPro; IPR003148; TrkA_N.
+DR   Pfam; PF07885; Ion_trans_2; 1.
+DR   Pfam; PF02254; TrkA_N; 1.
+KW   Ionic channel.
+SQ   SEQUENCE   270 AA;  30497 MW;  528C4EA75C41DF75 CRC64;
+     MQTITTVGYG DTPVYGLAGR ANGMLIMVIG IGSLGYLMAG LTSMLIDIRL SSKLGERMAA
+     EKKHIVLCNY NESTKKVLDK IKYDGIDIVI LNENEVKGDN EYTYIKGSFL RENDLIRAGI
+     KKASSVIIFS RSEDKEQMAM DAESILSAMI IRKLNPEIRI IGEILNPDSR EHASSFMDDI
+     IIKGDVSSML IYSSIMIPGI PEFINDLLMS NSISEEDIDK KYASNTYREF ISNMEKENRI
+     VLAFRKQDKI YLRENSDKKI DVDSYIFIKN
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/no-genes.genscan
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/no-genes.genscan	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/no-genes.genscan	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,20 @@
+GENSCAN 1.0	Date run: 29-Nov-101	Time: 16:19:58
+
+Sequence 5922693.fa : 780 bp : 52.82% C+G : Isochore 3 (51 - 57 C+G%)
+
+Parameter matrix: HumanIso.smat
+
+Predicted genes/exons:
+
+Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr..
+----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------
+
+ 1.00 Prom -    305    266   40                               2.29
+
+
+Predicted peptide sequence(s):
+
+Predicted coding sequence(s):
+
+
+NO PEPTIDES PREDICTED


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/no-genes.genscan
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/no_FH.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/no_FH.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/no_FH.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,192 @@
+ID   AE000001;
+FT   CDS             202..1557
+FT                   /M1_GAS_orthologue="SPy0002"
+FT                   /MGAS10394_orthologue="M6_Spy0001"
+FT                   /MGAS315_orthologue="dnaA"
+FT                   /MGAS8232_orthologue="dnaA"
+FT                   /SSI_1_orthologue="SPs0001"
+FT                   /cds_id="subject0"
+FT                   /cds_id="subject0"
+FT                   /class="1.2.1"
+FT                   /colour=2
+FT                   /fasta_file="fasta/SP_new.tab.seq.00001.out"
+FT                   /gene="dnaA"
+FT                   /gene="dnaH"
+FT                   /product="chromosomal replication initiator protein"
+FT                   /systematic_id="SpyM50001"
+FT   misc_feature    535..1488
+FT                   /colour=9
+FT                   /domain="HMMPfam:PF00308;Bacterial  dnaA
+FT                   protein;1.7e-156;codon 112-429"
+FT                   /id="SpyM50001"
+FT                   /label=HMMPfam
+FT                   /note="HMMPfam hit to PF00308, Bacterial  dnaA  protein,
+FT                   score 1.7e-156"
+FT   misc_feature    655..678
+FT                   /colour=8
+FT                   /note="PS00017 ATP/GTP-binding site motif A (P-loop)."
+FT   misc_feature    1429..1488
+FT                   /colour=8
+FT                   /note="PS01008 DnaA protein signature."
+SQ   Sequence 1841271 BP; 564546 A; 354645 C; 356610 G; 565470 T; 0 other;
+     ttgttgatat tctgtttttt cttttttagt tttccacata aaaaatagtt gaaaacaata        60
+     gcggtgtcac cttaaaatgg cttttccaca ggttgtggag aacccaaatt aacagtgtta       120
+     atttattttc cacaggttgt ggaaaactag aatagtttat ggtagaatag ttctagaatt       180
+     atccacaaga aggaacctag tatgactgaa aatgaacaaa ttttttggaa cagggtcttg       240
+     gaattagctc agagtcaatt aaaacaggca acttatgaat tttttgttca tgatgcccgt       300
+     ctattaaagg tcgataagca tattgcaact atttacttag atcaaatgaa agaactcttt       360
+     tgggaaaaaa atcttaaaga tgttattctt actgctggtt ttgaagttta taacgctcaa       420
+     atttctgttg actatgtttt cgaagaagac ctaatgattg agcaaaatca gaccaaaatc       480
+     aatcaaaaac ctaagcagca agccttaaat tctttgccta ctgttacttc agatttaaac       540
+     tcgaaatata gttttgaaaa ctttattcaa ggagatgaaa atcgttgggc tgttgctgct       600
+     tcaatagcag tagctaatac tcctggaact acctataatc ctttgtttat ttggggtggc       660
+     cctgggcttg gaaaaaccca tttattaaat gctattggta attctgtact attagaaaat       720
+     ccaaatgctc gaattaaata tatcacagct gaaaacttta ttaatgagtt tgttatccat       780
+     attcgccttg ataccatgga tgaattgaaa gaaaaatttc gtaatttaga tttactcctt       840
+     attgatgata tccaatcttt agctaaaaaa acgctctctg gaacacaaga agagttcttt       900
+     aatactttta atgcacttca taataataac aaacaaattg tcctaacaag cgaccgtaca       960
+     ccagatcatc tcaatgattt agaagatcga ttagttactc gttttaaatg gggattaaca      1020
+     gtcaatatca cacctcctga ttttgaaaca cgagtggcta ttttgacaaa taaaattcaa      1080
+     gaatataact ttatttttcc tcaagatacc attgagtatt tggctggtca atttgattct      1140
+     aatgtcagag atttagaagg tgccttaaaa gatattagtc tggttgctaa tttcaaacaa      1200
+     attgacacga ttactgttga cattgctgcc gaagctattc gcgccagaaa gcaagatgga      1260
+     cctaaaatga cagttattcc catcgaagaa attcaagcgc aagttggaaa attttacggt      1320
+     gttaccgtca aagaaattaa agctactaaa cgaacacaaa atattgtttt agcaagacaa      1380
+     gtagctatgt ttttagcacg tgaaatgaca gataacagtc ttcctaaaat tggaaaagaa      1440
+     tttggtggca gagaccattc aacagtactc catgcctata ataaaatcaa aaacatgatc      1500
+     agccaggacg aaagccttag gatcgaaatt gaaaccataa aaaacaaaat taaataacat      1560
+     gtggaaaaga atatctttta tgaaatagtt atccacaagt tgtgaacaac catttagtct      1620
+     tggattctct cgtttattta gagttatcca ctatatacac aagacctact actactactt      1680
+     attattatac ttattaaata aaggagttct catgattcaa ttttcaatta atcgcacatt      1740
+     atttattcat gctttaaatg caactaaacg tgctattagc actaaaaatg ccattcctat      1800
+     tctttcatca ataaaaattg aagtcactcc tacaggagta actttaacag ggtctaacgg      1860
+     tcaaatatca attgaaaaca ctattcctgt aagtaatgaa aatgctggtt tgctaattac      1920
+     ctctccagga gctattttat tagaagctag tttttttatt aatattattt caagtttgcc      1980
+     agatattagt ataaatgtta aagaaattga acaacaccaa gttgttttaa ccagtggtaa      2040
+     atcagagatt accttaaaag gaaaagatgt tgaccagtat cctcgtctac aagaagtatc      2100
+     aacagaaaat cctttgattt taaaaacaaa attattgaag tctattattg ctgaaacagc      2160
+     ttttgcagcc agtttacaag aaagtcgtcc tattttaaca ggagttcata ttgtattaag      2220
+     taatcataaa gattttaaag cagtagcgac tgactctcat cgtatgagcc aacgtttaat      2280
+     cactttggac aatacttcag cagattttga tgtggttatt ccaagtaaat ctttgagaga      2340
+     attttcagca gtatttacag atgatattga gaccgttgag gtatttttct caccaagcca      2400
+     aatcttgttc agaagtgaac acatttcttt ttatacacgc ctcttagaag gaaattatcc      2460
+     cgatacagac cgtttattaa tgacagaatt tgagacggag gttgttttca atacccaatc      2520
+     ccttcgccac gctatggaac gtgccttctt gatttctaat gctactcaaa atggtactgt      2580
+     taagcttgag attactcaaa atcatatttc agctcatgtt aactcacctg aggttggtaa      2640
+     ggtaaacgag gatttagata ttgttagtca gtctggtagt gatttaacta tcagcttcaa      2700
+     tccaacttac cttattgagt ctttaaaagc tattaaaagt gaaacagtaa aaattcattt      2760
+     cttatcacca gttcgaccat tcaccctaac accaggcgat gaggaagaaa gttttatcca      2820
+     attaattaca ccagtacgaa caaactaagt aagaaaaagc tcccttttag gagttttttt      2880
+     gttattataa atattaacga taatataagt ggagaaaagt gaatgtatca aattggatca      2940
+     tttgttgaaa tgaaaaaacc tcatgcttgt gtaattaaag aaactggtaa aaaggctaat      3000
+     caatggaagg tactgagagt aggagctgat attaaaattc agtgtactaa ctgtcagcat      3060
+     gtgattatga tgagtcgtta tgattttgag cgaaaattaa aaaaagtact gcaaccatga      3120
+     aagtcctttt aaataaataa ttctagctat atttgcaacc aatactttcc taaaaaattg      3180
+     ttagtatgcc gttggaaaat tagctattct aacgttatcg aaagaagaaa ggtggctatt      3240
+     gacaatgacg aaagttgtag cacagttact tagtttctcg gcaaactata tcaaaatgag      3300
+     aataatacac caaactttga taatttaaat gaaggtgaaa gagtggcata atgataagca      3360
+     aatctgaaat tttcttctta ttcataccgc tttttcttct attttttggt ataataatct      3420
+     tgattgaaat ttgaatggag attcgctaaa tggctttaac agcaggtatt gtaggcttac      3480
+     caaatgttgg taaatcaacc ttatttaacg caattacaaa agcaggagca gaagctgcta      3540
+     attatccttt tgcgactatt gatcccaatg ttggtatggt agaggttcca gatgagcgtc      3600
+     tgcaaaaatt gacagaattg attacaccta aaaaaacagt tccgacaacc tttgaattta      3660
+     ctgatattgc aggtatcgtt aaaggtgctt ctagagggga aggtctagga aataaatttt      3720
+     tagctaatat tcgtgaagtc gatgctattg ttcatgtggt acgtgctttt gatgatgaaa      3780
+     atgtcatgcg tgaacaaggt cgtgaggatg ctttcgttga tcctatagca gatattgaca      3840
+     ctattaatct tgaattaatc ttagctgatt tagagtcaat caataaacgt tatgcgcgtg      3900
+     ttgaaaaaat ggcacgaact caaaaagata aagaatcagt agcagagttc aatgttttac      3960
+     aaaagattaa acctgttttg gaagatggga aatcagctag gacaattgag tttacagaag      4020
+     aggaagcaaa agttgttaaa ggtctctttt tattaacaac taaacctgtt ttatatgtag      4080
+     ctaatgtcga tgaagataaa gttgctaatc cagatggtat tgattatgtc aaacaaattc      4140
+     gtgactttgc agctactgaa aatgctgaag tagttgttat ctcagcgcgt gcagaagaag      4200
+     aaatttcaga gcttgacgat gaggataaag aagaattttt ggaagctatc ggtcttactg      4260
+     aatcaggcgt tgataaatta accagagcag cttatcatct cttaggcctt ggaacctatt      4320
+     ttacagcagg tgaaaaagag gttcgtgctt ggacgtttaa gcgtggtata aaagctccac      4380
+     aagctgctgg tattatccat tcagattttg aaagaggttt tattcgtgca gtaaccatgt      4440
+     cttatgatga tctaatgatc tacggttcag aaaaagccgt caaagaagct ggacgcttgc      4500
+     gtgaagaagg aaaagaatac gttgttcaag atggggacat catggaattc agatttaatg      4560
+     tgtaattata ttaaaacaat atcagaaggt tggaagaaca ttccagccct tttggcattt      4620
+     tagaaagaga aaatatggta aaaatgattg ttggtctggg aaatccaggc tctaaatatg      4680
+     aaaaaacaaa gcacaatatt ggttttatgg ctattgacaa tattgtcaag aaccttgacg      4740
+     ttacctttac agatgataaa aattttaaag cacagatagg aagtactttt attaatcatg      4800
+     aaaaagttta ctttgtgaaa cctactactt ttatgaataa tagcggcata gcagtaaaag      4860
+     cattactaac ctactataat attgacataa cagatttaat tgttatctat gatgatttag      4920
+     acatggaagt cagtaaatta cgtttacgta gtaagggttc agcaggagga cataatggca      4980
+     ttaagtcaat cattgcccac attggaactc aggaatttaa ccgaatcaaa gttggtattg      5040
+     gacgaccttt aaaaggtatg actgttatta gccatgtgat gggccaattc aataccgaag      5100
+     ataatattgc tatttcgtta actcttgaca gagttgtcaa tgctgtcaag ttttatttac      5160
+     aagaaaatga ttttgaaaaa acaatgcaga aatttaatgg ataatcatgg atattttaga      5220
+     attatttagt cagaataaga aagtccaatc ctggcactct ggattaacca ccttaggaag      5280
+     acaactggta atggggttat cgggttcaag taaagcattg gctatagctt ccgcttattt      5340
+     agatgatcaa aaaaaaatag ttgtggttac atcaactcaa aatgaggttg aaaaattagc      5400
+     cagcgattta tctagtttac ttgatgaaga acttgttttc caattttttg cagacgatgt      5460
+     ggctgcagcg gaatttatct ttgcgtcaat ggataaagct ctatcaagaa tagaaaccct      5520
+     gcaattttta aggaatccta aatctcaggg cgttttaatt gttagtttat caagcttaag      5580
+     aactttattg ccaaacccag atgtttttac aaagagtcag attcaactaa cagttggaga      5640
+     agattatgat agtgatactc ttactaaaca actgatgaca attggctatc agaaggtctc      5700
+     acaggtcatt agtccgggag aatttagccg tcgaggggat attttagata tctatgagat      5760
+     tacacaagaa ttgccttatc gattggaatt ttttggcgat gatattgata gtattaggca      5820
+     attttatcca gaaactcaaa aatcttttga acaactagaa ggtattttta ttaatccagc      5880
+     aagtgatctt atttttgagg ctagtgattt tcaacgtggc attgagcaat tagaaaaggc      5940
+     tctacaaaca gcacaagatg ataaaaaatc ttatttagaa gatgtattag ctgtttcaaa      6000
+     aaacggtttt aaacataagg atatccgtaa atttcaatca ttattttacg aaaaagagtg      6060
+     gtcattatta gattatattc ctaagggaac gccaatcttt tttgatgatt ttcaaaaact      6120
+     agttgataaa aatgcaagat ttgatttaga gattgctaat ctcttgacag aagatttaca      6180
+     gcaaggaaag gctctttcca atcttaacta ctttgcagat aattatcgag agcttaggca      6240
+     ctataagcca gcgaccttct tttcaaattt tcataaggga cttggaaata tcaaatttga      6300
+     tcagatgcat cagctaactc agtatgccat gcaggaattt tttaatcaat ttcctttgtt      6360
+     gattgatgag attaaacggt atcaaaaaaa tcaaacaacg gttattgtac aggtagagtc      6420
+     tcagtatgct tacgaacgac ttgaaaaatc ttttcaagat taccaatttc gccttccttt      6480
+     agtgagtgct aatcaaattg tttcacgtga atcacaaatt gtaattggag ctatctccag      6540
+     tggtttttat tttgctgatg aaaagttagc gttaatcaca gagcatgaaa tttatcataa      6600
+     aaagatcaaa cgacgcgcta gacgatctaa tattagcaat gctgagcgtt tgaaagatta      6660
+     caatgagtta gcagtaggtg attacgtggt tcataatgtc catggtatcg gtcgctttct      6720
+     tggaattgaa acgattcagg ttcagggaat ccatcgagat tacgttacta ttcaatatca      6780
+     aaattcagac cgtatttctc ttccaattga ccaaattggt agcttatcaa agtacgtttc      6840
+     tgctgatgga aaagaaccta aaattaataa actcaatgat ggtcgttttc aaaagacaaa      6900
+     gcaaaaggtc gctagacaag tagaagatat tgctgatgac cttctaaaat tatatgctga      6960
+     aagaagtcag caaaaaggat tttcattttc accagatgat gacttgcagc gcgcttttga      7020
+     tgatgatttt gcttttgtag aaacagaaga tcaacttagg tctataaagg aaattaaagc      7080
+     tgatatggag agcatgcaac ctatggatcg tcttttagta ggcgatgtag gatttggtaa      7140
+     gacagaagta gctatgaggg cagcctttaa agcggtgaat gatcacaaac aagtagctgt      7200
+     cttagttcca accacagtct tggcccagca gcattatgaa aacttcaaag cacgctttga      7260
+     aaattacctt gttgaggttg acgtcttaag tcgtttccgt agtaaaaaag agcaagctga      7320
+     aacactagaa catgtacaaa aaggtcaaat tgacattatt atcggaaccc atcgactcct      7380
+     atcaaaagat gtggtctttt ctgatttagg attaattgtg attgatgagg aacaacggtt      7440
+     tggtgttaag cataaagaaa ctttaaagga attaaaaact aaggttgacg tcttaacctt      7500
+     aacagctact ccgattccta gaaccttaca catgtctatg ttaggtatcc gagatttatc      7560
+     ggttattgag accccaccaa ccaatcgtta tcctgttcaa acctatgttt tggaaaataa      7620
+     tccaggtctc gttagagaag ctatcattcg tgaaatggat cgtggaggac aaatttttta      7680
+     cgtttacaat aaagttgaca ctattgaaaa gaaagttgca gagctacaag aattagtccc      7740
+     agaagcttct atcggttttg ttcatgggca aatgagtgaa attcaacttg aaaatacctt      7800
+     gattgacttt ataaatggtg attatgatgt ccttgtggct acaacaatca tcgaaacagg      7860
+     agttgacatt tctaatgtaa acactttgtt tattgagaat gctgatcata tgggattgtc      7920
+     aactttatat caactgaggg ggcgcgtcgg aagaagtaat cgtattgcct atgcttacct      7980
+     gatgtatcgt cctgataaga tcctaacaga agtctctgaa aaacgtttag aggctattaa      8040
+     aggctttact gaattaggtt caggcttcaa gattgctatg cgagatttgt ctatacgagg      8100
+     agcaggtaat attttaggag cttctcaaag tggctttatt gattcggtcg gttttgaaat      8160
+     gtattctcag ttattggaac aggctattgc tagcaagcaa ggaaaaacga ctgttcgcca      8220
+     aaaaggtaat actgaaatca atcttcagat tgatgcttat ttaccagatg attatattgc      8280
+     agatgagcgc caaaaaattg acatttacaa gcgtattcga gaaattcaat caagagaaga      8340
+     ttatctcaat ttgcaagatg agctgataga tcgttttgga gagtatcctg atcaagttgc      8400
+     ctatttgtta gagatagctc tgctaaaaca ttatatggac aatgcctttg cggaattagt      8460
+     tgaacgcaaa aataatcagg tcattgttcg atttgaagta acttctttaa gttatttctt      8520
+     gactcaggat tatttcgaag ccttatctaa aacccatctt aaagctaaaa ttagtgaaca      8580
+     tcaggggaaa atcgatatcg tctttgacgt tcgccatcaa aaagattata gaattttaga      8640
+     agaattgatg ttatttggag aaaggcttag tgagataaaa atcagaaaaa acaattcagt      8700
+     ttttaaataa taatttgaaa caacttttag actgaatagt attaatgaca atggaacgaa      8760
+     ctagctcttt atacttttgt caaaaatcta agatctttgc tgcgaaatta ggatcttttc      8820
+     taccatttaa aaaataaaaa tgataaaatg aagaggattg taggagaaat tatgagacta      8880
+     gataaatatc taaaggtatc gcgccttatt aaacgtcgtt cagtagcaaa agaagttgcg      8940
+     gataagggac gaattaaagt taatgggata cttgctaaaa gttcaacgaa tgtaaaacta      9000
+     aatgatcaca ttgaaattag ttttggaaat aaattactga cagtaagagt cattgaaata      9060
+     aaagatagta caaaaaaaga agatgctctt aagatgtatg agataatcag tgaaacaagg      9120
+     ataacattaa atgaagaagc ctagtattgt tcaattaaat aatcattata ttaagaaaga      9180
+     gaatctcaaa aaaaaatttg aagaagaaga atctcaaaaa agaaatcgtt ttatgggatg      9240
+     gatccttgta agtatgatgt ttttatttat tttgccaact tataatcttg tcaaaagtta      9300
+     tgttgatttt gaaaagcaaa atcaacaggt ggttaaatta aaaaaagagt ataatgaatt      9360
+     gtcaaagagt acaaaaaaag aaaaacaatt agcagaacga ctaaaagatg ataattttgt      9420
+     caaaaaatat gctagggcaa aatactattt atcgcgtgaa ggagaaatga tttatcctat      9480
+     tccaggacta ttaccaaaat gatgatggac aatattataa aaaaaataga agcatttctt      9540
+     gctttttctg ataaaaaatt agcagagctg caacaggaaa atcaaaaagt taaagaagaa      9600
+     

Added: trunk/packages/bioperl/branches/upstream/current/t/data/no_cds_example.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/no_cds_example.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/no_cds_example.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,258 @@
+LOCUS       AAEL01000195           13065 bp    DNA     linear   INV 27-OCT-2004
+DEFINITION  Cryptosporidium hominis strain TU502 chromosome 2 CHRO012040, whole
+            genome shotgun sequence.
+ACCESSION   AAEL01000195 AAEL01000000
+VERSION     AAEL01000195.1  GI:54657549
+KEYWORDS    WGS.
+SOURCE      Cryptosporidium hominis
+  ORGANISM  Cryptosporidium hominis
+            Eukaryota; Alveolata; Apicomplexa; Coccidia; Eimeriida;
+            Cryptosporidiidae; Cryptosporidium.
+REFERENCE   1  (bases 1 to 13065)
+  AUTHORS   Xu,P., Widmer,G., Wang,Y., Ozaki,L.S., Alves,J.M., Serrano,M.G.,
+            Puiu,D., Manque,P., Akiyoshi,D., Mackey,A.J., Pearson,W.R.,
+            Dear,P.H., Bankier,A.T., Peterson,D.L., Abrahamsen,M.S., Kapur,V.,
+            Tzipori,S. and Buck,G.A.
+  TITLE     The genome of Cryptosporidium hominis
+  JOURNAL   Nature 431, 1107-1112 (2004)
+REFERENCE   2  (bases 1 to 13065)
+  AUTHORS   Xu,P., Widmer,G., Wang,Y., Ozaki,L.S., Alves,J.M., Serrano,M.G.,
+            Puiu,D., Manque,P., Akiyoshi,D., Mackey,A.J., Pearson,W.R.,
+            Dear,P.H., Bankier,A.T., Peterson,D.L., Abrahamsen,M.S., Kapur,V.,
+            Tzipori,S. and Buck,G.A.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (08-JUN-2004) Center for the Study of Biological
+            Complexity, Virginia Commonwealth University, Trani Center for Life
+            Sciences, 1000 W Cary St, Richmond, VA 23298, USA
+FEATURES             Location/Qualifiers
+     source          1..13065
+                     /organism="Cryptosporidium hominis"
+                     /mol_type="genomic DNA"
+                     /strain="TU502"
+                     /db_xref="taxon:237895"
+                     /chromosome="2"
+     gene            complement(334..2082)
+                     /locus_tag="Chro.rrn021"
+     rRNA            complement(join(334..1392,1456..2082))
+                     /locus_tag="Chro.rrn021"
+                     /product="18S ribosomal RNA"
+ORIGIN      
+        1 attatgttgt tagagggcgc agaagcacac ancccgacgg accgcgcacg aagggccacc
+       61 ccggacggcg aaccagccga cgcgcgccac agagcaaaga gaggggacca gaggggagcg
+      121 gatggagaca tcccgaagac cgcaatagga gaccacactg agagagcggg ggcaatccgc
+      181 atgcgcgacc tataggtaca cgacgtaaat cttatattat aaataacaaa attaaatttt
+      241 ctttttgttc atgttcattt aatttttaaa tgacatatat atataatata atataatata
+      301 atataatata tcaaaaactc atttctcaaa taagaatgat ccttccgcag gttcacctac
+      361 ggaaaccttg ttacgacttc tccttcctct aaatgataag gtttacgaaa ctttccttac
+      421 atgtattgct acaaagtatg gtccgaataa ttcaccggat cattcaatcg gtaggagcga
+      481 cgggcggtgt gtacaaaggg cagggacgta atcagcgcaa gctgatgact tgcgcttact
+      541 aggaattcct cgttcaagat caataattgc aatgatctat ccccatcacg atgcatattc
+      601 aaaagattac ccatttcctt cgaaacagga atatatactt gatggatgca tcagtgtagc
+      661 gcgcgcgcgg cccaggacat ctaagggcat cacagacctg ttattgccta aaacttccct
+      721 gtattaaaca tacaaagtcc ctctaagaag acagataaaa aatataatat ttcttatgtc
+      781 tatttagcag gttaaggtct cgttcgttaa cggaattaac cagacaaatc actccaccaa
+      841 ctaagaacgg ccatgcacca ccacccatag aatcaagaaa gagctatcaa tctgtcaatc
+      901 cttcctatgt ctggacctgg tgagttttcc cgtgttgagt caaattaagc cgcaggctcc
+      961 actcctggtg gtgcccttcc gtcaattcct ttaagtttca gccttgcgac catactcccc
+     1021 ccagaaccca aagactttga tttctcataa ggtgctgaag gagtaaggaa caacctccaa
+     1081 tctctagttg gcatagttta tggttaagac tacgacggta tctgatcgtc ttcgatcccc
+     1141 taactttcgt tcttgattaa tgaaaacatc cttggcaaat gctttcgcat tagtctgtct
+     1201 ttaacaaatc taagaatttc acctctgact gttaaataca aatgccccca actgtcccta
+     1261 ttaatcatta ttcttatctt agaaccaata agaaagataa aaatctttaa tattattcca
+     1321 tgctggagta ttcaaggcat atgcctgctt taagcactct aattttctca aagtaaaatt
+     1381 tcatatacta aaaaaaatag taatatgaat tatgttaata ttatataaat attcatcaaa
+     1441 atattttata taaattatta acagaaatcc aactacgagc tttttaactg caacaacttt
+     1501 aatatacgct attggagctg gaattaccgc ggctgctggc accagacttg ccctccaatt
+     1561 gatacttgta aaggggttta tacttaactc attccaatta caaaaccaaa aagtcctgta
+     1621 ttgttatttc ttgtcactac ctccctgtat taggattggg taatttgcgc gcctgctgcc
+     1681 ttccttagat gtggtagccg tttctcaggc tccctctccg gaatcgaacc ctaattcccc
+     1741 gttacccgtc attgccacgg taggccaata ccctaccgtc taaagctgat aggtcagaaa
+     1801 cttgaatgat atgtcacatt aattgtgatc cgtaaagtta ttatgagtca ccaatcatat
+     1861 tggttcttta tctaataaat acaacccttc cataaagtcg agttttttcg catgtattag
+     1921 ctctagaatt accacggtta tccatgtaag taaagattat caagtaaact ataactgtta
+     1981 taatgagcca ttcgcagttt aaccgtataa aagtttatac ttagacatgc atggcttaat
+     2041 ctttgagaca agcatatgac tactggcagg atcaaccagg ttactatcat taatattaat
+     2101 aatattttct ctttttattt tgtcctcatg aagtaaaaaa caaaaaaaaa aaatattatc
+     2161 aacattgaat acatattttg tctatcattt ataataaaca aaagaatcat caagaaatta
+     2221 ttaatgatta ttatacatac atcacataca tatatagacc atatatatat aagactgtat
+     2281 aaatatcaca agaacttctc atccagttaa attaataaga aatatcattg acttaactgg
+     2341 caatttaaga tagtattgaa accacattgt aaaatacccc acaaccttga atccaatagt
+     2401 actaataagt actctttaaa caacagtggt tacggaatat tccaacaccg tacgctccaa
+     2461 aacaccatta tatatactct caaattgact tagttatttt tacttcccaa tataaataac
+     2521 atataattaa tcataataaa accaattata cactatccat tgaaagtaat caacacatca
+     2581 aagaaaatat attaacaatg tagagtcata cattgatact tatcacatgt aactctttgg
+     2641 aaaccataat aatacaacaa cctcttgtac ttaacattcg tgtatctgtc aatccgacaa
+     2701 catgctatca aattgaccac actaaataca ccgcgcatac aatcgaaaca caatttcaat
+     2761 cgtaagacgt gcgcatttac ccgcaactca tatatacatt ttaccagtac taatcaattc
+     2821 acagcctttt cagatcgaaa tagactacga gttcttaata ctcaatgtca gcccattact
+     2881 aacacgccaa ttcccacatc cccaggttga atactcttca ccccggaaaa aaaatttacc
+     2941 gctatttttg aataaatttt ctattacaga actgtttcag agctgttcta aagaagctag
+     3001 gtaatacaaa taaataaaag ctaaacttaa tatctctatt atttatttct aatagttctc
+     3061 aataacgcta atttatttac atgaaaaata tctaattcta ccccttatta atttatttca
+     3121 ttccagatgt aatatttcta gttgtccttc taatatgcct taatatgcta gtaatatcac
+     3181 tgatgatcga cccatccgtg aagcatgcag gtacaatata gttcatgctt ttacttacat
+     3241 cgctaaccga atttataatg ctaatgcatt tgttcatgct ttctgaaatt tcatttagta
+     3301 tttgcattac gtcactaggt gtaagagtat cagtaaatgg ttcgaaacag tgaatggttt
+     3361 cttttatcca taatattctc aattttaaat ctgaaataca gttaatttga gaaaaattac
+     3421 cgctggtctt taaagactca attaaatgag gaagagtatc actcaaaatt tttgaaattc
+     3481 ctaatattac tggatggcta ataggaagtg gatcatccaa ccaaacgcat gggtcaaact
+     3541 tatatgcaag gctcaaaaca aaacattctc cacttgggat ttccctctta tctgactgag
+     3601 atagagtaga ttgatcggca gaaattgcta tcgcaaatgc gcgatcgtaa tgtttttgtt
+     3661 ttaaggcatc atcaatttgc aaagctatgt tttgggcgct cttatttgca attgagttgt
+     3721 ctggtgtaag agcttttagt ggtattccat tactgatctg taattccaaa cgattaacag
+     3781 ttgtttgcaa ttgagtaact gcactagtta atgtcccaaa tgaatctgct actacttgag
+     3841 taaagttgtc tattggattc gagctaatgt tatcgcattg tttatttaat ttagtcactt
+     3901 tttcacgaac gcttctagat tcgccaacaa gcctcgaaat gcaagtgtca atacttgtta
+     3961 atttctcatt tgtacttgta aaaagttcct taacatttgt ttcaatttta gaaatttcat
+     4021 tgatattgac ataattatca acagacttct cgattcctga caaatgcgac ttaatagatt
+     4081 gaatatcacg ttcgatcggt tctaaatact tgctaattgc tgaggaaacc gaatcattta
+     4141 actctttgga caagctgaag gtcaattttt ttaattcatc tgtaatcatt tcaaatattt
+     4201 tatcatttaa accaaaatct ttgtggttgg tattgcttgt attcgtattt ttcgattgtg
+     4261 gattgctcat tgactgtttt tcctccatat ttaatgtact aaaaatagat gtaagtaaat
+     4321 ttgatctatc agatggtaat ttattctgag gtgtattatt gtattgaaaa tcgcttttct
+     4381 cagttttatg tgatgcattt gccatcaaaa cagcataata atcatcagca atctcgtcat
+     4441 tcgtgtcaat attagttttt ttctgattag acaagtttat agtaaatgtg tcactaggta
+     4501 tactgatatt ttcggatgaa taatgggctt tattcgcact caaatctaaa acaggtggga
+     4561 taatgattga ttcgagccct ccaaaaatat ttgaaacaaa aacttcgatg ggttgttttg
+     4621 ataaattttc acttgattta gtttttccag catcaagcca ggtaacattt tttggaggtc
+     4681 tgaaagcagc cgtctctaat agtctagagg ggaagaaaaa caaatcttta atgggtagct
+     4741 cttcgccaac agagcaaacg attaattggt taaaaatctg tattgcgaaa tactcacccc
+     4801 aatttggtcc aataatatga gcattaaatt ggttattttt attatcatcc ataatatgaa
+     4861 tatttatgct ctgtaaaagt gtaatttcgc caaagttatt gtttgaatct atcggtaagt
+     4921 tatatatgcg aacagtatag atatcttttt ctactgataa taatgtagct gttacattca
+     4981 tgtggttaat ggtaggatct ctgattctta tattcggata tactgcagta tcaagtagta
+     5041 atgaatttga atcgataatt tgccttgaca ttatcgatct atagtactct ccagggctta
+     5101 aatttgagga aaaagtatca taatagccat ttgaagtaaa aagaattatt agttctccaa
+     5161 taaattggca actagaaatt ttaacatcat ccgggtagtt aatcagtacg caacatgata
+     5221 aattagcagc tgaatgctgg ggagcattgg acttgttgga gttagaacat tcttgtagaa
+     5281 tgtggtattc atatgaagaa cgattccata gacacactga gcatttattc caagcaacaa
+     5341 aacaacgatc atctaaatgg ttaaatacaa cagatttcca atgggtattt ccccttacca
+     5401 aagtattaat agtgtctgat tgaatattta ctattgaaag ttccttccct ttaatataaa
+     5461 caatattatt cgacaccttt gaaagcgaaa aactttccat tccaagccct gataatttac
+     5521 tgctgcattt agatccattg tttagttctt cttggctatt aataccagaa tctaacatga
+     5581 taactcttgg aacatcagaa aacatatgga caacatattc tctattacat tgtagtatag
+     5641 aggctgcttg agatgtagaa gcattgtgag acgatggtat cttaatatct aaagatgaac
+     5701 cagaaacatg gtaccactgc cttttcggta gttcaatcaa ctccatgatg attattaatc
+     5761 tgcaaccaaa taaatagtaa attttttttt taccaaattg cgccataaat ttctcggaat
+     5821 aattaaacta agagggaaaa atattcgttt gaaaaataaa atcttatcat tcacctaatc
+     5881 atgttcagaa actaataagt agaatctggc tttcctagac tatcaagact acctttcata
+     5941 taaaacattg tctctgggag gttatccatt tcaccgctga caatagcatt aaaaccttta
+     6001 attgtatcaa ttaagttgac aaaaacccct ggctttccgg taaaagcttc cgacataaaa
+     6061 aacggttgag taagaaactt ttggactttt cttgccctgg agacaatcaa cctttcctct
+     6121 tcagaaagtg catgagaacc ttgagtagaa attttgctct gtaactgctt atacttttgt
+     6181 aaaatcgaaa gaacttcaat agaaactttg tagtgctctt cgcctataat atttcgatct
+     6241 aaaaccttag acctactttc caatggatca atggatggga atatcccaac ctcggacatc
+     6301 tttctagaga gaactattgt agaatcgagg tgggtgaaag ctgcaacagg tgctggatca
+     6361 ttaatgtcgt cagatggaat ataaagtgcc tgaatagatg ttattgaacc atttctcgtg
+     6421 gttgtgattc tttcttgaag tttaccaaga tcagtagcca gagtaggttg atacccgatt
+     6481 tcagttggta atagccctag aaaaagtcat taaaaaaaaa aataaaaaat aacccatcag
+     6541 tcggtaattc cagctatttt ccagttgttt cagcattgaa actcagataa atatggaaag
+     6601 tttgcatcat ttgggaaaat tacactaaat atttacttac ctaaaagtgc agaaacttca
+     6661 ctccctgcct gagtaaatcg gtaaatatta tctacgaaaa ataatacgtc ttgcatcatt
+     6721 gaatctctaa aatattctgc cattgttaga cctgtaagtg caactcgcgc tcgcgctcct
+     6781 ggagtttcat tcatttgacc atataccagc gctgtcttag aacctaaaaa atcgtatatg
+     6841 ggttgtctct tatgcttgct gtcaactcgt gatccaacaa tcgacttttt attgacgcca
+     6901 tttgctagca tttcattata taaatcgtac ccttccctga tgcgctctcc aacccctgta
+     6961 tagacagagt aaccaccata cttttttgca atattgttta ttagttccat gataagaaca
+     7021 gtcttgccaa cccccgctcc accaaatagt cctattttcc caccttttat gaatggtgta
+     7081 agtagatcta ttgccttaat cccagtaaca attaatgaag gctccattac ttgttccatg
+     7141 tattcagggg ccgttctatg aatcggtttt ttaactttag cgtcgatctc tccacatccg
+     7201 tcaattgcat tccccataac attcaccatt ctaccaagtg tggccttgcc tactggaacg
+     7261 catattgggc ttccagtgtc tataactttc tctcctctag agagtccttc tgtaacatcc
+     7321 attgcaattg ccctaactga attatcactt aagtgttgcg ccacttcaag tactagtttg
+     7381 ttttgatgcc ctttaacttc caaagcgttt aacaattctg gtagtttacc ctcaaatttt
+     7441 acatctacaa ctgaacccat gatttgtgat atatagcctt ctgagagatt tttgctagtt
+     7501 ttactataag acttccatct tttgtttaag ttataatatt ctaggttgcg gatcgctttt
+     7561 ttctggaaaa ataccgaaaa tagtggtaaa aattctcttc gtgtgaattt gaagcatgga
+     7621 agagattgac ttaacaattt tcctcgaatc ataaacattg cctccactct ttaattaaaa
+     7681 ttaaaaagta attgtgcaaa attgcattaa agccaaattc tatgcaattt aatttttgcg
+     7741 tataaacaac ataacaaaaa taggaaatat attctgaaaa tcgatatcca atgtttacta
+     7801 aggaactgag tgtatttaat agcaggaagg caaaatagca gaagttaaat ataaaaaaaa
+     7861 tacaaaaata gagaaatttt caaagcagga ataagtagtg aacaattttg atgagtgaga
+     7921 tgaaagtgtt aagagaatta taattaccat tcaaaataga atataattat cttaagtact
+     7981 aaaacagatt tagagcgggt gttaaacaaa atcaaattgc tgtgggtggg gggaaatgtt
+     8041 attgcaagag taattttcca aatagttgag ttaatttacg ttgtaatacc aatgtcatct
+     8101 gaggaaggca ccgataattg gtctgcagag gaggctgtag agagcgtaaa gactctgagt
+     8161 gttagtgagc ttcagtctag gattcgcctt ttagatggtg aaattcgact tatgaaaagc
+     8221 gaaagtaatc gtcttaagca tgaattaaac cagatgaatg aaagaatcag aagtaatact
+     8281 gaaaaaataa aacttaacaa acagcttcct tatctggttg ccaacattgt cgaatcactg
+     8341 gatttttcag atgaacagga aaatgagggt gaagggatgg aattcgatgg agacaaaaac
+     8401 gataaatgca tggtaataaa aacatcatca aggcagactg tctttctccc tgtcattggg
+     8461 ttggttccag agaatgaatt aaagcctgga gatcttgttg gagtgaataa ggatagttat
+     8521 ttgattcttg ataaattgcc accggagtat gactctagag ttaaggctat ggaagttgat
+     8581 gaaagaccaa tggaagagta ttcagatatt ggaggattag ataagcagat ccaggaatta
+     8641 gtggaggcaa tcgtccttcc gatgacccat aaagaaaggt ttgaaaaaat tggtataaaa
+     8701 cccccaaagg gtgtactaat gtatggacct ccaggcacag ggaaaactct tttggcaagg
+     8761 gcatgtgcag cacaaactaa agccacattt ttgaaattag caggcccgca actcgtacaa
+     8821 atgtttattg gtgatggtgc aaagatggtt agagatgcat ttgaaattgc acgtgaaaaa
+     8881 gctccttcaa taatatttat tgacgagctt gacgctatag gtatgaaaag gtttgatagt
+     8941 gagcatagtg gggatagaga ggtacaaagg actatgcttg agttacttaa tcaattagat
+     9001 ggatttagct cagacgacag agttaaagtt attgctgcaa cgaataggcc ggatactctc
+     9061 gatccagccc ttttaagatc aggtaggctt gaccgtaagg tcgagttacc gcatccaaac
+     9121 gaagaagcaa ggtccagaat acttcaaata catagcagaa aaatgaatgt tgatttaaat
+     9181 gacgtgaatt tccaagagct atcgagatca acagatgatt ttaacggtgc tcagcttaag
+     9241 gcagtttgtg tcgaagcggg gatgacagca ctcagaaggg gagcaacgat actttgccat
+     9301 gaagattatg tcgaagggat tgcagcagta ctagcaaaga agaaatcccc acttagctat
+     9361 ttttcataaa gcaagtgaaa tacaaaaaaa aacttaatca actaattgtt agaattctta
+     9421 aatatatata tatttatacc attagcaata atctttttct ttagccgaaa tatccttatt
+     9481 cttgccttcg tcaaatacct cttctccaaa atatgggtac tcttcccatt tgggatgttt
+     9541 atcaatgatt aaatgtttca tctcctgctc taatacttca aaaaatacat gttcgaagtg
+     9601 tttgtcgccc cattgaattg gaatgatgtt attttctctg tctagcaaca tccatgtggc
+     9661 gttttgagga gcaagcccga gtaaaagctg agttcgatag gcagtacttc tagacgctcc
+     9721 atttaaacag ctcttttgga agcttgaagg tgtaagttgt tcatttggac acgaatgaat
+     9781 atctgatttc tggctttgcg aagatttttg ctttctctta cgcgattccc accacttact
+     9841 tttaatagtt gaattatctg atgaaaattc agattcagaa tctgagattt gtgaattatc
+     9901 tgataattcg caatcatcaa taaaattgtc taccgtattt gtattctgac cctttttagg
+     9961 tgatttatta aaaactctag ctctgttagg tgcggaagaa gttgttgcta agtcaatatg
+    10021 aatattgcct aacctgtttg atgaaattaa tgcagttgca tcaaatgcgc gatagcaatg
+    10081 ttgaaggcta cgctgaatta gtagctcaat tgaagatgga atttccagat cttctctaca
+    10141 aagaaaactg gcaatttgat acaagttgag atgcattgat gatgcaagcc tcaaccatct
+    10201 gactgaaact ctcatgcttc ttaaacactc gcttctaatc aataactttc tttttaacct
+    10261 ctctgcatcc gcatctggat ccatatactt tattactctc aattcagacc tgctaaaggg
+    10321 tatcttcgaa tgtggccaat caaaccatac ccaatcaaat tgtgcaacat cgagaacatc
+    10381 tggtaaacaa agcccatggt cgattggaat taacttatac ttggtttttt tgccgtcagg
+    10441 agtgctcaga gggtgttcat atggtgaaat agctggactt ggattatttg aattactaat
+    10501 gttgaatttg attgaataat tgggctggtt cgcgacaaca agaatattac tatcatttct
+    10561 atctaaatta aagagacaaa tatccaaaat tccaatgcgg tgtacatctc taatgcagaa
+    10621 tacagagggg ttgaaattac caactgtttc tgtagttgaa ataaactctt gaaatgcccc
+    10681 caacttccaa tcaacggtta tttcactatt tgccttatta ttatcttttt gaaaaatatc
+    10741 gccccattca agagtaatct tattccagct atcataatta aaagcctgat ggcatgcctc
+    10801 aagtagtgta gtatcaggaa cgccggcaaa attatggtag tatgcatccc aaattgctgt
+    10861 tgcaacctct ctgctagctc cttcacctga caatactcct gacctaaaac cttgttggcc
+    10921 taatttacct tgataacccc ttggattatt tggagagaat gcctcctcat ctagtggttt
+    10981 aaacattgca acaacttggc cttttgcatt gtacattcta taggtagcgc ccgttccatc
+    11041 caatgtaagt ttgggatgga catttctctg cattgcaagt ttaacttcaa ctacaagttt
+    11101 ctgaatttta ctcgaccatt taagtccagt acttctaatt ccaaaatctg acttgttcat
+    11161 cctgagagaa tagtgcaatg ggtgtctttg tttttcaaat gtatgcataa acctgctatt
+    11221 cggaatttct acgcctctat aaaatagctg aatgtcacga atgcttgtgc cttctgggag
+    11281 atccagatac ttaattacta gtcttttaac catttggcaa tcatagaatg ggtatattga
+    11341 caaaagtatg cgctttgtcc catccagctc gtcaatatat aagacaaatt tctggatttc
+    11401 attttcacct gcacgcaaag cattatgagt tgaaaaatat acctgttgag caacagggaa
+    11461 gttgttaatt tgattcattt acaataaaca tgacaacata ttattcttta ctcaattttt
+    11521 atcattccat taaagtgacc cgtctaaaaa tataaccgct ctatcataat taaaaattaa
+    11581 aaccaaattt cggggaaaaa aaatttatca atgcttaata atctgacatt acttccatca
+    11641 attggatcta gatatgaatc cacaacctaa aattttttat taaattaatt agaatcacaa
+    11701 tttattgcta tttggattgc tggtaaattt caacttacat gcaacattcc atcttctact
+    11761 gccttggaaa tcattaaatt atattcttct atagttagtt catcgccatt ttttaataaa
+    11821 ctcctaatct gagctttaat ttgtatgata ctaaatttag tctataacct acctttttag
+    11881 ttttatttgc cataaaccaa cttataagtg agatatcgct ttgcggttgt ttccattcaa
+    11941 tttcaagctt cttaagttgg ttgcaaactt gaatacaaaa atttaaataa gttttccaag
+    12001 gtatcgatct tttctatatt ggaaatattg agaagataat tagtgtttat tttattatcc
+    12061 ttgcaaatct actcaccttc atgcttaact attaagaaag cgtgaaaatt atagaaaaat
+    12121 cagtatgtca ggtgtgggtg ggggatctac catgtaaaat aatattcagt tagcgcaaat
+    12181 aaacaattaa aaaatggtaa tataataaat tatatatatt caagattaaa gaatttacta
+    12241 attcgtgtaa aaattgacta tgattgtgat ttactaataa tttaaaaata gtaagctaac
+    12301 agaaaaatgc atctatattc aagacatttg taaaaatgct ttgttctact tattctttgc
+    12361 tagctttttt tgaaaccttt ttgttcaatt cacttaatat actgagtagt ttatctttaa
+    12421 ttccaataga ctccttgtag tttttaaata aatctggatg agaatttcga gtagaaaagt
+    12481 gaatgatata gtcgccgaat gtatctttgc caaaaatagg tggcccttta ccctttaata
+    12541 tatatggaac cctcggatta gttttaggag gtaccttaac tcgcatgctt gtacctttaa
+    12601 ttaaactagg cacaacaatt tctccgccaa aaatacatgt attaattgaa attggaatat
+    12661 ctacatgaat attatccttt atccatttga gtttcgtatt tggcttaata ttcaccttaa
+    12721 tgaataaatc tccataattt cctgaaacaa agttaccttc tgagctcagc tttagttgcg
+    12781 ttccatcctt cgttcctctt gggattctta gtaaaacgtt cttttcttta ataatatggc
+    12841 cgctaccatt acactttatg caaagcatta aatttgaata tccagtacca ctgcacttca
+    12901 tgcacaatga cttgattaat aaaggaccat tttggtaaac attcaggcca gaaccaccac
+    12961 aatttgaaca tttagctatt tttagtcctt taatgatgcc aacgccattg caggcatcac
+    13021 atttacaact agaattagtt ttaagtgttc tgcttgtccc attaa
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/no_hsps.blastp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/no_hsps.blastp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/no_hsps.blastp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,474 @@
+BLASTP 2.0MP-WashU [16-Feb-2003] [macosx-10.2-ppc-ILP32F64 2003-02-18T10:00:41]
+
+Copyright (C) 1996-2003 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference:  Gish, W. (1996-2003) http://blast.wustl.edu
+
+Query=  mgri:MG00189.3 hypothetical protein 6892 8867 +
+        (601 letters; record 1)
+
+Database:  /nfs1/jason_input/all_euk/metazoans;
+           /nfs1/jason_input/fungi/ascopeps; /nfs1/jason_input/fungi/basido
+           351,440 sequences; 145,589,111 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+mgri:MG00189.3 hypothetical protein 6892 8867 +               3098  0.        1
+fgram:FG01141.1 hypothetical protein 47007 48803 -            2182  4.2e-226  1
+ncra:NCU03076.1 hypothetical protein (28850 - 30771)          2148  1.7e-222  1
+anid:AN1733.1 AF252630_1 delta-1-pyrroline-5-carboxylate ...  2001  6.4e-207  1
+spom:SPBC24C6.04 |||putative delta-1-pyrroline-5-carboxyl...  1718  6.2e-177  1
+calb:orf6.5650  orf6-2420:8022-6235:e  1788 bp, 595 aa, c...  1706  1.2e-175  1
+duh99:cneo_DUH99_Contig425.Gene2 Start=40771 End=43016 St...  1659  1.1e-170  1
+wih99:cneo_WIH99_107.Gene1 Start=41629 End=44150 Strand=1...  1613  8.3e-166  1
+jec21:cneo_TIGRJEC21_chr11a.pseudo.Gene521 Start=371763 E...  1582  1.6e-162  1
+pchr:pchr_Scaffold_43.Gene3 Start=38145 End=40814 Strand=...  1577  5.4e-162  1
+fgram:FG03073.1 hypothetical protein 68551 70406 -            1576  6.9e-162  1
+ccin:ccin_Contig257.Gene8 Start=55703 End=53090 Strand=-1...  1176  2.7e-161  2
+agos:AAL072C location=AgChr1:complement(223170..224891)       1540  4.5e-158  1
+umay:umay_Contig44.Gene2 Start=5123 End=2772 Strand=-1 Le...  1535  1.5e-157  1
+scer:YHR037W PUT2 SGDID:S0001079, Chr VIII from 181968-18...  1533  2.5e-157  1
+hsap:ENSP00000290597 Database:core Gene:ENSG00000159423 C...  1298  2.0e-132  1
+hsap:ENSP00000328453 Database:core Gene:ENSG00000159423 C...  1298  2.0e-132  1
+mmus:ENSMUSP00000043821 Database:core Gene:ENSMUSG0000002...  1295  4.2e-132  1
+afum:4563_glimmerf_85                                         1279  2.1e-130  1
+dmel:CG7145-PA translation from_gene[CG7145 CG7145 FBgn00...  1256  5.6e-128  1
+dmel:CG7145-PD translation from_gene[CG7145 CG7145 FBgn00...  1256  5.6e-128  1
+dmel:CG7145-PB translation from_gene[CG7145 CG7145 FBgn00...  1256  5.6e-128  1
+rnor:ENSRNOP00000025090 Database:core Gene:ENSRNOG0000001...  1235  9.5e-126  1
+agam:ENSANGP00000013138 Database:core Gene:ENSANGG0000001...  1217  7.7e-124  1
+cbri:CBG07110 CBP01802 (cb25.fpc0305.en1909a/cb25.fpc0305...  1216  9.8e-124  1
+cele:F56D12.1 CE29047   aldehyde dehydrogenase status:Par...  1201  3.8e-122  1
+anid:AN9278.1 hypothetical protein 133790 135518 +            1194  2.1e-121  1
+drer:ENSDARP00000025931 Database:core Gene:ENSDARG0000000...   614  5.1e-115  2
+drer:ENSDARP00000004693 Database:core Gene:ENSDARG0000000...   617  1.8e-108  2
+drer:ENSDARP00000006345 Database:core Gene:ENSDARG0000000...   614  2.3e-108  2
+anid:AN6022.1 hypothetical protein 49043 51304 -               625  3.3e-107  2
+dmel:CG33092-PA translation from_gene[CG33092 CG33092 FBg...   994  3.3e-100  1
+dmel:CG33092-PB translation from_gene[CG33092 CG33092 FBg...   994  3.3e-100  1
+dmel:CG33092-PC translation from_gene[CG33092 CG33092 FBg...   994  3.3e-100  1
+dmel:CG33092-PF translation from_gene[CG33092 CG33092 FBg...   994  3.3e-100  1
+drer:ENSDARP00000009693 Database:core Gene:ENSDARG0000000...   535  3.9e-100  2
+dmel:CG33092-PD translation from_gene[CG33092 CG33092 FBg...   989  1.1e-99   1
+dmel:CG33092-PE translation from_gene[CG33092 CG33092 FBg...   989  1.1e-99   1
+dmel:CG33092-PG translation from_gene[CG33092 CG33092 FBg...   984  3.8e-99   1
+dmel:CG6661-PA translation from_gene[CG6661 CG6661 FBgn00...   753  1.1e-74   1
+afum:4562_glimmerf_84                                          590  2.1e-57   1
+drer:ENSDARP00000025173 Database:core Gene:ENSDARG0000000...   571  2.2e-55   1
+duh99:cneo_DUH99_Contig338.Gene3 Start=10629 End=8428 Str...   498  1.2e-47   1
+wih99:cneo_WIH99_138.Gene135 Start=318644 End=316438 Stra...   498  1.2e-47   1
+jec21:cneo_TIGRJEC21_chr8.pseudo.Gene367 Start=297286 End...   488  2.5e-45   1
+pchr:pchr_Scaffold_257.Gene2 Start=4723 End=1965 Strand=-...   470  1.1e-44   1
+afum:3004_glimmerf_56                                          462  7.8e-44   1
+umay:umay_Contig84.Gene5 Start=5778 End=8094 Strand=1 Len...   454  5.5e-43   1
+afum:9560_glimmerf_188                                         449  1.9e-42   1
+hsap:ENSP00000261733 Database:core Gene:ENSG00000111275 C...   448  2.4e-42   1
+mmus:ENSMUSP00000031411 Database:core Gene:ENSMUSG0000002...   444  6.3e-42   1
+fgram:FG02273.1 hypothetical protein 136532 138142 +           437  3.5e-41   1
+hsap:ENSP00000332256 Database:core Gene:ENSG00000184254 C...   436  4.4e-41   1
+anid:AN9034.1 hypothetical protein 161667 163308 -             434  7.2e-41   1
+hsap:ENSP00000297785 Database:core Gene:ENSG00000165092 C...   433  9.2e-41   1
+cele:K04F1.15 CE23852   aldehyde dehydrogenase status:Pre...   431  1.5e-40   1
+mgri:MG03900.3 hypothetical protein (  (AL669991) probabl...   430  1.9e-40   1
+agam:ENSANGP00000020207 Database:core Gene:ENSANGG0000001...   429  2.4e-40   1
+agam:ENSANGP00000011393 Database:core Gene:ENSANGG0000000...   428  3.1e-40   1
+ncra:NCU00378.1 hypothetical protein (49878 - 51422)           427  4.0e-40   1
+anid:AN0554.1 AF260123_1 aldehyde dehydrogenase ALDH 5455...   426  5.1e-40   1
+fgram:FG02160.1 hypothetical protein 41708 43408 +             426  5.1e-40   1
+ncra:NCU07053.1 hypothetical protein (11322 - 12871)           422  1.3e-39   1
+hsap:ENSP00000331360 Database:core Gene:ENSG00000111275 C...   417  4.8e-39   1
+mmus:ENSMUSP00000034723 Database:core Gene:ENSMUSG0000001...   417  4.8e-39   1
+mmus:ENSMUSP00000015278 Database:core Gene:ENSMUSG0000001...   415  8.5e-39   1
+fgram:FG00979.1 conserved hypothetical protein 134502 136...   415  8.5e-39   1
+scer:YER073W ALD5 SGDID:S0000875, Chr V from 304027-305589     414  1.1e-38   1
+scer:YMR169C ALD3 SGDID:S0004779, Chr XIII from 600871-59...   414  1.1e-38   1
+hsap:ENSP00000249750 Database:core Gene:ENSG00000128918 C...   413  1.5e-38   1
+scer:YMR170C ALD2 SGDID:S0004780, Chr XIII from 603081-60...   413  1.5e-38   1
+spom:SPAC922.07c |||putative aldehyde dehydrogenase            412  1.9e-38   1
+ncra:NCU03415.1 hypothetical protein (100613 - 98979)          412  1.9e-38   1
+calb:orf6.5499  orf6-2412:4933-6438:e  1506 bp, 501 aa, c...   410  3.4e-38   1
+fgram:FG04194.1 hypothetical protein 53878 55487 -             409  4.4e-38   1
+rnor:ENSRNOP00000024000 Database:core Gene:ENSRNOG0000001...   407  7.7e-38   1
+anid:AN1689.1 hypothetical protein 308010 309618 -             405  1.3e-37   1
+mmus:ENSMUSP00000025656 Database:core Gene:ENSMUSG0000002...   404  1.7e-37   1
+rnor:ENSRNOP00000015282 Database:core Gene:ENSRNOG0000001...   404  1.7e-37   1
+dmel:CG3752-PA translation from_gene[CG3752 CG3752 FBgn00...   402  3.0e-37   1
+anid:AN4126.1 hypothetical protein 112110 113903 +             402  3.0e-37   1
+duh99:cneo_DUH99_Contig581.Gene1 Start=122995 End=125826 ...   413  4.0e-37   1
+hsap:ENSP00000259658 Database:core Gene:ENSG00000137124 C...   399  6.6e-37   1
+fgram:FG00139.1 hypothetical protein 120379 121938 +           399  6.6e-37   1
+mgri:MG05008.3 hypothetical protein 911 2404 +                 399  6.6e-37   1
+mmus:ENSMUSP00000041260 Database:core Gene:ENSMUSG0000003...   397  1.1e-36   1
+rnor:ENSRNOP00000024027 Database:core Gene:ENSRNOG0000001...   396  1.5e-36   1
+scer:YOR374W ALD4 SGDID:S0005901, Chr XV from 1039834-104...   396  1.5e-36   1
+agos:AFL044W location=AgChr3:286029..287570                    394  2.5e-36   1
+mmus:ENSMUSP00000020497 Database:core Gene:ENSMUSG0000002...   413  5.4e-36   1
+hsap:ENSP00000328288 Database:core Gene:ENSG00000111275 C...   391  5.4e-36   1
+spom:SPCC550.10 |meu8||putative betaine aldehyde dehydrog...   388  1.2e-35   1
+drer:ENSDARP00000022715 Database:core Gene:ENSDARG0000000...   387  1.6e-35   1
+anid:AN9198.1 hypothetical protein 77039 78964 -               387  1.6e-35   1
+ccin:ccin_Contig143.Gene21 Start=155829 End=153396 Strand...   397  2.2e-35   1
+umay:umay_Contig117.Gene40 Start=118398 End=115663 Strand...   395  2.9e-35   1
+agam:ENSANGP00000013314 Database:core Gene:ENSANGG0000001...   384  3.4e-35   1
+agos:AFL201W location=AgChr6:58424..59917                      383  4.4e-35   1
+fgram:FG02220.1 hypothetical protein 57928 59634 +             383  4.4e-35   1
+hsap:ENSP00000271359 Database:core Gene:ENSG00000143149 C...   382  5.8e-35   1
+calb:orf6.6640  orf6-2467:32772-34271:e  1500 bp, 499 aa,...   382  5.8e-35   1
+spom:SPAC9E9.09c |||putative aldehyde dehydrogenase            382  5.8e-35   1
+fgram:FG02392.1 hypothetical protein 112635 114218 -           381  7.5e-35   1
+scer:YPL061W ALD6 SGDID:S0005982, Chr XVI from 432583-434085   380  9.7e-35   1
+mgri:MG01991.3 hypothetical protein 16355 18108 +              387  1.3e-34   1
+rnor:ENSRNOP00000021799 Database:core Gene:ENSRNOG0000001...   379  1.3e-34   1
+drer:ENSDARP00000015715 Database:core Gene:ENSDARG0000002...   379  1.3e-34   1
+fgram:FG05831.1 hypothetical protein 397480 399069 -           379  1.3e-34   1
+drer:ENSDARP00000011744 Database:core Gene:ENSDARG0000000...   377  2.1e-34   1
+cbri:CBG05984 CBP15436 (cb25.fpc0143.gc246)                    397  4.1e-34   1
+rnor:ENSRNOP00000005611 Database:core Gene:ENSRNOG0000000...   372  7.7e-34   1
+frub:SINFRUP00000157742 Database:core Gene:SINFRUG0000014...   372  7.7e-34   1
+mgri:MG09456.3 hypothetical protein 830 2335 -                 372  7.7e-34   1
+fgram:FG11034.1 hypothetical protein 7087 10802 -              393  1.3e-33   1
+drer:ENSDARP00000024587 Database:core Gene:ENSDARG0000000...   370  1.3e-33   1
+mgri:MG00652.3 hypothetical protein 20025 21464 -              339  1.9e-33   2
+umay:umay_Contig94.Gene5 Start=7986 End=9677 Strand=1 Len...   380  2.3e-33   1
+hsap:ENSP00000308809 Database:core Gene:ENSG00000144908 C...   390  2.7e-33   1
+anid:AN4054.1 hypothetical protein 274503 276139 +             366  3.6e-33   1
+anid:AN1430.1 hypothetical protein 75595 77158 +               366  3.6e-33   1
+rnor:ENSRNOP00000018010 Database:core Gene:ENSRNOG0000001...   339  6.9e-33   2
+cele:F54D8.3 CE29809   Aldehyde dehydrogenase 2 status:Co...   371  7.0e-33   1
+pchr:pchr_Scaffold_36.Gene34 Start=136323 End=131305 Stra...   388  7.2e-33   1
+frub:SINFRUP00000146331 Database:core Gene:SINFRUG0000013...   385  8.5e-33   1
+drer:ENSDARP00000023429 Database:core Gene:ENSDARG0000000...   362  1.0e-32   1
+agam:ENSANGP00000021005 Database:core Gene:ENSANGG0000001...   385  1.2e-32   1
+fgram:FG11542.1 hypothetical protein 85606 87343 +             361  1.3e-32   1
+cbri:CBG23008 CBP05486 (cb25.fpc4426.en2389a/cb25.fpc4426...   369  1.7e-32   1
+pchr:pchr_Scaffold_42.Gene29 Start=118985 End=116126 Stra...   377  2.1e-32   1
+cbri:CBG15207 CBP03688 (cb25.fpc4010.en576a)                   379  2.1e-32   1
+dmel:CG4685-PA translation from_gene[CG4685 CG4685 FBgn00...   368  2.2e-32   1
+agam:ENSANGP00000014625 Database:core Gene:ENSANGG0000001...   359  2.2e-32   1
+frub:SINFRUP00000156880 Database:core Gene:SINFRUG0000014...   358  2.8e-32   1
+duh99:MATCHRContig45.Gene17 Start=28159 End=25771 Strand=...   371  3.4e-32   1
+cele:F42G9.5 CE07233   betaine-aldehyde dehydrogenase sta...   376  4.9e-32   1
+umay:umay_Contig192.Gene3 Start=244542 End=247243 Strand=...   375  7.1e-32   1
+rnor:ENSRNOP00000011483 Database:core Gene:ENSRNOG0000000...   377  8.6e-32   1
+jec21:cneo_TIGRJEC21_chr11b.pseudo.Gene216 Start=332496 E...   371  1.2e-31   1
+hsap:ENSP00000265605 Database:core Gene:ENSG00000118514 C...   355  3.0e-31   1
+wih99:cneo_WIH99_20.Gene3 Start=298791 End=301339 Strand=...   363  3.6e-31   1
+dmel:CG31075-PA translation from_gene[CG31075 CG31075 FBg...   352  9.1e-31   1
+jec21:cneo_TIGRJEC21_CH9.Gene24 Start=553523 End=556049 S...   356  2.4e-30   1
+anid:AN3573.1 hypothetical protein 96168 97759 +               306  2.5e-30   2
+anid:AN4847.1 hypothetical protein 21287 24043 +               363  2.5e-30   1
+drer:ENSDARP00000008179 Database:core Gene:ENSDARG0000000...   340  2.8e-30   1
+agos:ADR418W location=AgChr4:1453729..1455252                  352  3.9e-30   1
+ncra:NCU00715.1 hypothetical protein (54297 - 55937)           351  8.4e-30   1
+ncra:NCU03348.1 hypothetical protein (46178 - 44665)           345  9.3e-30   1
+frub:SINFRUP00000146332 Database:core Gene:SINFRUG0000013...   356  9.6e-30   1
+agam:ENSANGP00000014662 Database:core Gene:ENSANGG0000001...   343  1.4e-29   1
+umay:umay_Contig115.Gene5 Start=29009 End=31339 Strand=1 ...   350  2.1e-29   1
+fgram:FG05375.1 hypothetical protein 97901 99430 +             346  2.4e-29   1
+ncra:NCU02056.1 hypothetical protein (21232 - 19740)           341  3.0e-29   1
+cele:C54D1.4 CE06980   aldehyde dehydrogenase status:Part...   343  5.3e-29   1
+wih99:cneo_WIH99_80.Gene78 Start=108689 End=105572 Strand...   346  5.8e-29   1
+cele:Y69F12A.2 CE24573   aldehyde dehydrogenase status:Pa...   345  8.2e-29   1
+pchr:pchr_Scaffold_68.Gene2 Start=118512 End=121055 Stran...   342  1.5e-28   1
+anid:AN0740.1 hypothetical protein 38806 40367 -               335  2.4e-28   1
+drer:ENSDARP00000025035 Database:core Gene:ENSDARG0000000...   322  2.6e-28   1
+anid:AN3205.1 hypothetical protein 19806 21394 +               333  3.6e-28   1
+frub:SINFRUP00000135525 Database:core Gene:SINFRUG0000012...   335  3.7e-28   1
+anid:AN7315.1 hypothetical protein 94182 95712 -               334  5.0e-28   1
+dmel:CG8665-PA translation from_gene[CG8665 CG8665 FBgn00...   343  5.8e-28   1
+ncra:NCU09648.1 hypothetical protein (17442 - 19120)           331  6.8e-28   2
+afum:1839_glimmerf_24                                          333  7.1e-28   1
+rnor:ENSRNOP00000001816 Database:core Gene:ENSRNOG0000000...   290  7.4e-28   2
+mgri:MG01230.3 hypothetical protein 11881 13666 +              335  8.0e-28   1
+fgram:FG04196.1 hypothetical protein 60704 62429 +             331  1.2e-27   1
+afum:3202_glimmerf_68                                          337  1.5e-27   1
+umay:umay_Contig222.Gene6 Start=19170 End=17353 Strand=-1...   332  2.0e-27   1
+rnor:ENSRNOP00000020015 Database:core Gene:ENSRNOG0000001...   314  2.0e-27   1
+fgram:FG06752.1 hypothetical protein 57054 58751 +             328  2.6e-27   1
+hsap:ENSP00000309623 Database:core Gene:ENSG00000128918 C...   277  3.2e-27   2
+ccin:ccin_Contig206.Gene14 Start=13195 End=10953 Strand=-...   330  3.4e-27   1
+duh99:cneo_DUH99_Contig548.Gene28 Start=81130 End=78906 S...   331  3.4e-27   1
+rnor:ENSRNOP00000023774 Database:core Gene:ENSRNOG0000001...   311  4.2e-27   1
+ncra:NCU00936.1 hypothetical protein (87196 - 85536)           325  6.6e-27   1
+anid:AN4050.1 hypothetical protein 262048 263673 -             324  6.8e-27   1
+jec21:cneo_TIGRJEC21_CH2.Gene1 Start=223094 End=225269 St...   328  6.9e-27   1
+cbri:CBG09190 CBP08192 (cb25.fpc2193.fg55/cb25.fpc2193.gc...   325  7.1e-27   1
+cele:F01F1.6 CE01232   Aldehyde dehydrogenase status:Conf...   322  2.0e-26   1
+drer:ENSDARP00000010714 Database:core Gene:ENSDARG0000000...   328  2.5e-26   1
+drer:ENSDARP00000023106 Database:core Gene:ENSDARG0000000...   328  2.6e-26   1
+drer:ENSDARP00000025029 Database:core Gene:ENSDARG0000000...   328  2.6e-26   1
+fgram:FG01759.1 hypothetical protein 58734 60362 +             320  2.6e-26   1
+anid:AN1541.1 hypothetical protein 30562 32195 +               319  2.9e-26   1
+anid:AN5435.1 hypothetical protein 161833 164046 -             318  3.7e-26   1
+mgri:MG02766.3 hypothetical protein 6094 8116 -                323  4.0e-26   1
+frub:SINFRUP00000163952 Database:core Gene:SINFRUG0000015...   272  4.7e-26   2
+mgri:MG03263.3 hypothetical protein 3369 5047 +                317  5.2e-26   1
+agam:ENSANGP00000016555 Database:core Gene:ENSANGG0000001...   319  5.6e-26   1
+fgram:FG00854.1 hypothetical protein 138293 142157 -           324  6.6e-26   1
+pchr:pchr_Scaffold_15.Gene44 Start=10646 End=8127 Strand=...   318  9.4e-26   1
+cbri:CBG09070 CBP02216 (cb25.fpc2187.en7002a)                  316  1.0e-25   1
+spom:SPAC139.05 |||putative succinate semialdehyde dehydr...   314  1.3e-25   1
+mmus:ENSMUSP00000028004 Database:core Gene:ENSMUSG0000002...   312  1.8e-25   1
+cbri:CBG10981 CBP02675 (cb25.fpc2397.en1670b/cb25.fpc2397...   312  2.7e-25   1
+ncra:NCU07442.1 hypothetical protein (58745 - 56640)           314  3.8e-25   1
+mmus:ENSMUSP00000001749 Database:core Gene:ENSMUSG0000002...   310  4.8e-25   1
+afum:3310_glimmerf_183                                         307  6.6e-25   1
+fgram:FG11482.1 hypothetical protein 39753 41214 -             289  1.0e-24   1
+spom:SPAC1002.12c |||putative succinate-semialdehyde dehy...   306  1.2e-24   1
+ccin:ccin_Contig143.Gene26 Start=93392 End=90406 Strand=-...   308  1.5e-24   1
+fgram:FG02296.1 hypothetical protein 42345 43942 -             305  1.5e-24   1
+agam:ENSANGP00000017723 Database:core Gene:ENSANGG0000001...   305  1.7e-24   1
+drer:ENSDARP00000005458 Database:core Gene:ENSDARG0000000...   234  2.0e-24   2
+anid:AN3829.1 hypothetical protein 18018 19747 +               305  2.1e-24   1
+calb:orf6.6689  orf6-2469:28972-30501:e  1530 bp, 509 aa,...   303  3.1e-24   1
+fgram:FG00718.1 hypothetical protein 48603 50089 -             301  3.5e-24   1
+rnor:ENSRNOP00000024093 Database:core Gene:ENSRNOG0000001...   185  4.6e-24   3
+calb:orf6.7782  orf6-2501:55143-53707:e  1437 bp, 478 aa,...   299  6.2e-24   1
+frub:SINFRUP00000152737 Database:core Gene:SINFRUG0000014...   300  6.4e-24   1
+drer:ENSDARP00000015309 Database:core Gene:ENSDARG0000001...   280  9.9e-24   1
+mmus:ENSMUSP00000040591 Database:core Gene:ENSMUSG0000003...   299  1.0e-23   1
+mmus:ENSMUSP00000025657 Database:core Gene:ENSMUSG0000002...   279  1.3e-23   1
+hsap:ENSP00000297542 Database:core Gene:ENSG00000164904 C...   297  1.5e-23   1
+frub:SINFRUP00000142953 Database:core Gene:SINFRUG0000013...   278  1.6e-23   1
+ncra:NCU08669.1 hypothetical protein (11260 - 12786)           294  2.1e-23   1
+fgram:FG02356.1 hypothetical protein 25485 28604 +             194  2.2e-23   2
+anid:AN3591.1 hypothetical protein ( (MG01606.1) hypothet...   296  2.4e-23   1
+frub:SINFRUP00000148708 Database:core Gene:SINFRUG0000014...   295  2.6e-23   1
+cele:F36H1.6 CE05817   formyltetrahydrofolate dehydrogena...   298  5.3e-23   1
+calb:orf6.7055  orf6-2481:60364-58520:e  1845 bp, 614 aa,...   294  6.4e-23   1
+agam:ENSANGP00000022164 Database:core Gene:ENSANGG0000001...   292  6.6e-23   1
+umay:umay_Contig141.Gene14 Start=5134 End=3036 Strand=-1 ...   292  7.7e-23   1
+dmel:CG17896-PA translation from_gene[CG17896 EG:171D11.1...   289  1.2e-22   1
+dmel:CG17896-PB translation from_gene[CG17896 EG:171D11.1...   289  1.3e-22   1
+umay:umay_Contig124.Gene6 Start=37930 End=40228 Strand=1 ...   292  1.4e-22   1
+hsap:ENSP00000238696 Database:core Gene:ENSG00000119711 C...   289  1.6e-22   1
+cele:F45H10.1 CE?????     succinate semi-aldehyde dehydro...   287  1.7e-22   1
+ncra:NCU09266.1 hypothetical protein (94540 - 96177)           288  2.1e-22   1
+anid:AN4820.1 hypothetical protein 149242 150844 -             283  5.4e-22   1
+wih99:cneo_WIH99_14.Gene173 Start=142339 End=139677 Stran...   285  6.0e-22   1
+afum:9439_glimmerf_146                                         278  6.9e-22   1
+duh99:cneo_DUH99_Contig585.Gene10 Start=82555 End=84824 S...   283  8.4e-22   1
+fgram:FG03936.1 hypothetical protein 156552 158199 +           281  9.7e-22   1
+rnor:ENSRNOP00000015545 Database:core Gene:ENSRNOG0000001...   281  1.1e-21   1
+dmel:CG9629-PA translation from_gene[CG9629 CG9629 FBgn00...   281  1.2e-21   1
+dmel:CG9629-PB translation from_gene[CG9629 CG9629 FBgn00...   281  1.2e-21   1
+rnor:ENSRNOP00000021757 Database:core Gene:ENSRNOG0000001...   279  1.2e-21   1
+drer:ENSDARP00000012423 Database:core Gene:ENSDARG0000000...   279  1.3e-21   1
+pchr:pchr_Scaffold_2.Gene87 Start=346907 End=343843 Stran...   282  1.3e-21   1
+fgram:FG08596.1 hypothetical protein 57232 59131 +             282  1.4e-21   1
+afum:7129_glimmerf_213                                         279  1.5e-21   1
+cbri:CBG20774 CBP11572 (cb25.fpc4206.fg128/cb25.fpc4206.g...   279  1.5e-21   1
+pchr:pchr_Scaffold_70.Gene18 Start=126124 End=123573 Stra...   280  1.9e-21   1
+mmus:ENSMUSP00000021664 Database:core Gene:ENSMUSG0000002...   279  2.0e-21   1
+frub:SINFRUP00000144622 Database:core Gene:SINFRUG0000013...   277  2.0e-21   1
+umay:umay_Contig203.Gene28 Start=130123 End=127586 Strand...   279  2.1e-21   1
+mmus:ENSMUSP00000049810 Database:core Gene:ENSMUSG0000002...   278  2.3e-21   1
+anid:AN0079.1 hypothetical protein 72529 76110 -               284  2.4e-21   1
+fgram:FG00490.1 conserved hypothetical protein 3891 5622 -     278  3.0e-21   1
+drer:ENSDARP00000027544 Database:core Gene:ENSDARG0000000...   269  6.4e-21   1
+umay:umay_Contig3.Gene2 Start=12357 End=13920 Strand=1 Le...   273  8.6e-21   1
+agos:AgYBR006W location=AgChr5:642837..644324                  272  9.3e-21   1
+anid:AN7141.1 hypothetical protein 28075 29616 +               271  9.7e-21   1
+mgri:MG05814.3 hypothetical protein 33559 35087 -              271  9.7e-21   1
+umay:umay_Contig203.Gene4 Start=102418 End=104786 Strand=...   271  1.3e-20   1
+mmus:ENSMUSP00000038878 Database:core Gene:ENSMUSG0000003...   251  1.4e-20   1
+duh99:cneo_DUH99_Contig560.Gene45 Start=69535 End=67388 S...   271  1.5e-20   1
+calb:orf6.7229  orf6-2487:29860-31335:e  1476 bp, 491 aa,...   270  1.5e-20   1
+ccin:ccin_Contig199.Gene11 Start=493217 End=496163 Strand...   273  1.6e-20   1
+duh99:cneo_DUH99_Contig575.Gene5 Start=73052 End=75270 St...   272  1.7e-20   1
+ccin:ccin_Contig198.Gene198 Start=520002 End=516613 Stran...   272  1.7e-20   1
+drer:ENSDARP00000011728 Database:core Gene:ENSDARG0000000...   274  1.9e-20   1
+wih99:cneo_WIH99_48.Gene135 Start=262989 End=260742 Stran...   269  2.6e-20   1
+anid:AN6636.1 hypothetical protein 188016 190076 -             271  2.6e-20   1
+fgram:FG10673.1 hypothetical protein 110458 112011 +           267  3.5e-20   1
+cele:F13D12.4 CE02183   methylmalonate-semialdehyde dehyd...   266  5.3e-20   1
+jec21:cneo_TIGRJEC21_chr5.pseudo.Gene416 Start=965498 End...   270  6.6e-20   1
+drer:ENSDARP00000024566 Database:core Gene:ENSDARG0000000...   262  1.1e-19   1
+anid:AN1585.1 hypothetical protein 180201 181769 -             262  1.3e-19   1
+duh99:MATCHRContig46.Gene18 Start=5071 End=7234 Strand=1 ...   263  1.4e-19   1
+wih99:cneo_WIH99_20.Gene17 Start=160223 End=162565 Strand...   263  1.6e-19   1
+jec21:cneo_TIGRJEC21_chr4.pseudo.Gene688 Start=1937332 En...   262  2.0e-19   1
+jec21:cneo_TIGRJEC21_CH2.Gene354 Start=550124 End=547388 ...   261  2.5e-19   1
+ccin:ccin_Contig308.Gene1 Start=4191 End=6444 Strand=1 Le...   259  2.7e-19   1
+fgram:FG09762.1 hypothetical protein 5035 6633 +               258  2.9e-19   1
+drer:ENSDARP00000020121 Database:core Gene:ENSDARG0000000...   252  2.9e-19   1
+afum:8240_glimmerf_104                                         259  3.3e-19   1
+pchr:pchr_Scaffold_25.Gene48 Start=210869 End=208531 Stra...   260  3.6e-19   1
+fgram:FG04670.1 hypothetical protein 154840 156421 -           257  3.9e-19   1
+cbri:CBG03134 CBP06431 (cb25.fpc0058.fg640/cb25.fpc0058.g...   258  4.2e-19   1
+frub:SINFRUP00000132841 Database:core Gene:SINFRUG0000012...   257  4.2e-19   1
+hsap:ENSP00000259696 Database:core Gene:ENSG00000112294 C...   257  5.8e-19   1
+frub:SINFRUP00000155714 Database:core Gene:SINFRUG0000014...   236  5.8e-19   1
+calb:orf6.4131  orf6-2316:7542-5869:e  1674 bp, 557 aa, c...   257  6.5e-19   1
+afum:5674_glimmerf_172                                         234  9.5e-19   1
+wih99:cneo_WIH99_77.Gene9 Start=240235 End=242652 Strand=...   252  2.3e-18   1
+fgram:FG07803.1 hypothetical protein 81767 83572 +             250  2.4e-18   1
+duh99:MATCHRContig45.Gene1 Start=32346 End=34255 Strand=1...   250  3.1e-18   1
+wih99:cneo_WIH99_20.Gene108 Start=294712 End=292322 Stran...   250  3.6e-18   1
+jec21:cneo_TIGRJEC21_chr11b.pseudo.Gene24 Start=539782 En...   251  4.1e-18   1
+frub:SINFRUP00000138575 Database:core Gene:SINFRUG0000013...   228  4.2e-18   1
+pchr:pchr_Scaffold_41.Gene38 Start=135527 End=133723 Stra...   246  7.7e-18   1
+scer:YHR039C MSC7 SGDID:S0001081, Chr VIII from 186800-18...   247  1.2e-17   1
+jec21:cneo_TIGRJEC21_CH9.Gene516 Start=549767 End=547581 ...   245  1.3e-17   1
+pchr:pchr_Scaffold_4.Gene75 Start=259710 End=257130 Stran...   187  1.8e-17   2
+hsap:ENSP00000325473 Database:core Gene:ENSG00000118514 C...   200  1.9e-17   2
+fgram:FG01826.1 hypothetical protein 3352 4961 +               243  1.9e-17   1
+scer:YBR006W UGA2 SGDID:S0000210, Chr II from 246971-248464    242  2.2e-17   1
+mgri:MG01606.3 hypothetical protein ( (NCU09266.1) hypoth...   241  3.6e-17   1
+agos:AAL076W location=AgChr1:213684..215597                    242  4.0e-17   1
+hsap:ENSP00000314649 Database:core Gene:ENSG00000112294 C...   235  1.7e-16   1
+ccin:ccin_Contig26.Gene76 Start=185685 End=182747 Strand=...   198  1.8e-16   2
+drer:ENSDARP00000009551 Database:core Gene:ENSDARG0000000...   212  2.3e-16   1
+wih99:cneo_WIH99_84.Gene109 Start=254919 End=252452 Stran...   234  3.0e-16   1
+drer:ENSDARP00000003517 Database:core Gene:ENSDARG0000001...   228  3.0e-16   1
+scer:YMR110C YMR110C SGDID:S0004716, Chr XIII from 491991...   232  3.4e-16   1
+duh99:cneo_DUH99_Contig88.Gene1 Start=947 End=2 Strand=-1...   209  4.8e-16   1
+afum:6343_glimmerf_397                                         222  1.2e-15   1
+umay:umay_Contig126.Gene6 Start=44007 End=46488 Strand=1 ...   228  1.3e-15   1
+duh99:cneo_DUH99_Contig572.Gene12 Start=60677 End=63143 S...   226  2.2e-15   1
+rnor:ENSRNOP00000003182 Database:core Gene:ENSRNOG0000000...   224  2.4e-15   1
+drer:ENSDARP00000016103 Database:core Gene:ENSDARG0000001...   206  2.8e-15   2
+spom:SPBC21C3.15c |||putative aldehyde-dehydrogenase-like...   222  4.2e-15   1
+ccin:ccin_Contig86.Gene6 Start=60023 End=62098 Strand=1 L...   217  1.4e-14   1
+jec21:cneo_TIGRJEC21_chr3.pseudo.Gene44 Start=853936 End=...   216  2.2e-14   1
+drer:ENSDARP00000025142 Database:core Gene:ENSDARG0000001...   198  2.4e-14   2
+ccin:ccin_Contig112.Gene67 Start=228881 End=226835 Strand...   215  2.9e-14   1
+jec21:cneo_TIGRJEC21_CH9.Gene40 Start=432690 End=434826 S...   215  3.8e-14   1
+umay:umay_Contig154.Gene37 Start=150500 End=148105 Strand...   211  8.9e-14   1
+pchr:pchr_Scaffold_1.Gene119 Start=403089 End=405051 Stra...   208  1.4e-13   1
+wih99:cneo_WIH99_63.Gene197 Start=28565 End=25737 Strand=...   208  1.6e-13   1
+frub:SINFRUP00000138576 Database:core Gene:SINFRUG0000013...   181  4.9e-13   1
+duh99:cneo_DUH99_Contig562.Gene45 Start=26299 End=23976 S...   182  1.2e-12   2
+pchr:pchr_Scaffold_1.Gene91 Start=392557 End=390664 Stran...   198  1.4e-12   1
+calb:orf6.7642  orf6-2498:45415-43430:e  1986 bp, 661 aa,...   198  2.9e-12   1
+calb:orf6.7610  orf6-2497:61254-62882:e  1629 bp, 542 aa,...   192  9.1e-12   1
+cele:T08B1.3 CE13415   aldehyde dehydrogenase status:Conf...   190  9.5e-12   1
+afum:4684_glimmerf_1                                           169  9.7e-12   1
+pchr:pchr_Scaffold_201.Gene1 Start=14631 End=17178 Strand...   199  1.4e-11   2
+drer:ENSDARP00000005081 Database:core Gene:ENSDARG0000000...   188  1.9e-11   1
+mgri:MG00719.3 hypothetical protein 80568 82139 +              188  2.3e-11   1
+hsap:ENSP00000258494 Database:core Gene:ENSG00000136010 C...   188  4.2e-11   1
+frub:SINFRUP00000134540 Database:core Gene:SINFRUG0000012...   184  4.3e-11   1
+drer:ENSDARP00000019543 Database:core Gene:ENSDARG0000001...   170  4.3e-10   1
+rnor:ENSRNOP00000020325 Database:core Gene:ENSRNOG0000001...   160  9.9e-10   1
+agam:ENSANGP00000009992 Database:core Gene:ENSANGG0000000...   167  2.0e-09   2
+rnor:ENSRNOP00000017506 Database:core Gene:ENSRNOG0000001...   157  2.8e-09   1
+afum:5076_glimmerf_157                                         160  3.2e-09   1
+anid:AN5644.1 hypothetical protein 119417 121388 -             166  5.9e-09   1
+mgri:MG07270.3 hypothetical protein 24200 27609 -              165  9.7e-09   1
+mmus:ENSMUSP00000019246 Database:core Gene:ENSMUSG0000001...   163  9.9e-09   1
+umay:umay_Contig248.Gene23 Start=64732 End=62912 Strand=-...   162  1.6e-08   1
+ccin:ccin_Contig197.Gene16 Start=193203 End=195572 Strand...   160  2.5e-08   1
+hsap:ENSP00000225740 Database:core Gene:ENSG00000108602 C...   159  2.7e-08   1
+cele:T05H4.13 CE27441   aldehyde dehydrogenase status:Con...   158  4.1e-08   1
+dmel:CG11140-PI translation from_gene[CG11140 Aldh-III FB...   156  6.9e-08   1
+mmus:ENSMUSP00000062524 Database:core Gene:ENSMUSG0000002...   127  7.5e-08   3
+dmel:CG11140-PF translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+dmel:CG11140-PG translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+dmel:CG11140-PC translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+dmel:CG11140-PB translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+dmel:CG11140-PA translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+dmel:CG11140-PD translation from_gene[CG11140 Aldh-III FB...   156  8.4e-08   1
+cbri:CBG18932 CBP19504 (cb25.fpc4126.gc41)                     153  1.4e-07   1
+drer:ENSDARP00000014385 Database:core Gene:ENSDARG0000001...   138  1.6e-07   2
+mgri:MG06551.3 hypothetical protein 62216 63824 +              105  2.4e-07   2
+frub:SINFRUP00000148431 Database:core Gene:SINFRUG0000013...   147  6.2e-07   1
+rnor:ENSRNOP00000017655 Database:core Gene:ENSRNOG0000001...   132  1.1e-06   1
+drer:ENSDARP00000013496 Database:core Gene:ENSDARG0000000...   134  1.1e-06   1
+pchr:pchr_Scaffold_12.Gene51 Start=268702 End=266658 Stra...   145  1.4e-06   1
+rnor:ENSRNOP00000023789 Database:core Gene:ENSRNOG0000001...   141  2.5e-06   1
+mmus:ENSMUSP00000025795 Database:core Gene:ENSMUSG0000003...   139  2.6e-06   1
+mmus:ENSMUSP00000050692 Database:core Gene:ENSMUSG0000001...   141  2.9e-06   1
+mmus:ENSMUSP00000010169 Database:core Gene:ENSMUSG0000001...   141  2.9e-06   1
+hsap:ENSP00000007633 Database:core Gene:ENSG00000006534 C...   139  3.9e-06   1
+mmus:ENSMUSP00000039838 Database:core Gene:ENSMUSG0000003...   138  5.6e-06   1
+hsap:ENSP00000316241 Database:core Gene:ENSG00000006534 C...   138  5.7e-06   1
+rnor:ENSRNOP00000003200 Database:core Gene:ENSRNOG0000000...   138  6.6e-06   1
+frub:SINFRUP00000134534 Database:core Gene:SINFRUG0000012...   134  1.3e-05   1
+pchr:pchr_Scaffold_19.Gene52 Start=124215 End=122095 Stra...   135  1.4e-05   1
+afum:10580_glimmerf_8                                          131  1.6e-05   1
+ncra:NCU04013.1 hypothetical protein (59219 - 57534)           131  4.2e-05   1
+anid:AN8985.1 hypothetical protein 22123 23804 -               131  5.4e-05   2
+frub:SINFRUP00000129119 Database:core Gene:SINFRUG0000012...   129  5.5e-05   1
+frub:SINFRUP00000160622 Database:core Gene:SINFRUG0000015...   116  5.7e-05   1
+hsap:ENSP00000314367 Database:core Gene:ENSG00000072210 C...   128  8.2e-05   1
+drer:ENSDARP00000025640 Database:core Gene:ENSDARG0000001...   120  8.5e-05   2
+hsap:ENSP00000176643 Database:core Gene:ENSG00000072210 C...   128  9.3e-05   1
+dmel:CG11140-PH translation from_gene[CG11140 Aldh-III FB...   125  0.00014   1
+mmus:ENSMUSP00000056276 Database:core Gene:ENSMUSG0000002...   127  0.00014   3
+fgram:FG09960.1 hypothetical protein 40904 42572 +             126  0.00015   1
+mmus:ENSMUSP00000057475 Database:core Gene:ENSMUSG0000003...   130  0.00015   1
+frub:SINFRUP00000138573 Database:core Gene:SINFRUG0000013...   102  0.00015   1
+drer:ENSDARP00000027868 Database:core Gene:ENSDARG0000001...   121  0.00035   1
+drer:ENSDARP00000004886 Database:core Gene:ENSDARG0000001...   122  0.00037   1
+drer:ENSDARP00000002746 Database:core Gene:ENSDARG0000001...   121  0.00043   1
+umay:umay_Contig153.Gene4 Start=24926 End=23503 Strand=-1...   116  0.00044   1
+mmus:ENSMUSP00000056922 Database:core Gene:ENSMUSG0000003...   118  0.00047   1
+mmus:ENSMUSP00000025816 Database:core Gene:ENSMUSG0000002...   118  0.00078   1
+hsap:ENSP00000255084 Database:core Gene:ENSG00000132746 C...   116  0.0011    1
+drer:ENSDARP00000012767 Database:core Gene:ENSDARG0000000...    93  0.0014    1
+mgri:MG07890.3 hypothetical protein 2215 4003 +                111  0.0062    1
+afum:9099_glimmerf_102                                         102  0.023     1
+drer:ENSDARP00000008677 Database:core Gene:ENSDARG0000000...   101  0.043     1
+drer:ENSDARP00000020199 Database:core Gene:ENSDARG0000000...   101  0.050     1
+drer:ENSDARP00000003039 Database:core Gene:ENSDARG0000000...   101  0.052     1
+drer:ENSDARP00000015570 Database:core Gene:ENSDARG0000000...   101  0.053     1
+drer:ENSDARP00000021159 Database:core Gene:ENSDARG0000001...   100  0.054     1
+mmus:ENSMUSP00000050448 Database:core Gene:ENSMUSG0000004...    78  0.055     1
+drer:ENSDARP00000010518 Database:core Gene:ENSDARG0000001...   100  0.062     1
+drer:ENSDARP00000027140 Database:core Gene:ENSDARG0000002...    98  0.086     1
+drer:ENSDARP00000023944 Database:core Gene:ENSDARG0000000...    88  0.15      1
+drer:ENSDARP00000017270 Database:core Gene:ENSDARG0000001...    94  0.17      1
+hsap:ENSP00000318252 Database:core Gene:ENSG00000006534 C...    92  0.17      1
+drer:ENSDARP00000024731 Database:core Gene:ENSDARG0000001...    92  0.42      1
+hsap:ENSP00000293350 Database:core Gene:ENSG00000161618 C...    95  0.44      1
+rnor:ENSRNOP00000024064 Database:core Gene:ENSRNOG0000001...    86  0.47      1
+afum:3599_glimmerf_174                                          88  0.58      1
+umay:umay_Contig153.Gene5 Start=23500 End=22157 Strand=-1...    82  0.93      1
+agos:AGL192C location=AgChr7:complement(361641..363485)         87  0.95      1
+jec21:cneo_TIGRJEC21_chr3.pseudo.Gene262 Start=1259945 En...    59  0.998     1
+dmel:CG8358-PA translation from_gene[CG8358 CG8358 FBgn00...    84  0.9995    1
+
+
+
+Parameters:
+  -i /nfs1/jason_input/euk_split/eukall.597
+  -d /nfs1/jason_input/all_euk/metazoans /nfs1/jason_input/fungi/ascopeps /nfs1/jason_input/fungi/basido
+  W=3
+  T=1000
+  B=0
+
+  ctxfactor=1.00
+  E=10
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +0      0   BLOSUM62        0.317   0.134   0.392    same    same    same
+               Q=9,R=2         0.244   0.0300  0.180     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W    T  X   E2     S2
+   +0      0      601       601       10.  82 3 1000 22  0.21    34
+                                                     36  0.22    37
+
+
+Statistics:
+
+  Database:  /nfs1/jason_input/all_euk/metazoans; /nfs1/jason_input/fungi/ascopeps; /nfs1/jason_input/fungi/basido
+   Title:  /nfs1/jason_input/all_euk/metazoans; /nfs1/jason_input/fungi/ascopeps; /nfs1/jason_input/fungi/basido
+   Format:  VirtualDB
+   # of letters in database:  145,589,111
+   # of sequences in database:  351,440
+   # of database sequences satisfying E:  415
+  No. of states in DFA:  281 (30 KB)
+  Total size of DFA:  44 KB (2055 KB)
+  Time to generate neighborhood:  0.01u 0.00s 0.01t  Elapsed: 00:00:00
+  No. of threads or processors used:  2
+  Search cpu time:  28.63u 0.00s 28.63t  Elapsed: 00:00:15
+  Total cpu time:  28.68u 0.00s 28.68t  Elapsed: 00:00:15
+  Start:  Wed Nov  5 04:46:09 2003   End:  Wed Nov  5 04:46:24 2003

Added: trunk/packages/bioperl/branches/upstream/current/t/data/noninterleaved.phy
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/noninterleaved.phy	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/noninterleaved.phy	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+ 4 297
+SBP-5.05   CCTCAGATCA CTCTTTGGCA ACGACCCCTT GTCACAATAA AGATAGGGGG 
+           GCAACTAAAG GAAGCTCTAT TAGATACAGG AGCAGATGAT ACAGTATTAG 
+           AAGAAATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT 
+           GGAGGGTTTA TCAAAGTAAA ACAGTATGAT CAGATACCCA TAGAGATCTG 
+           TGGACATAAA GCTATAGGTA CAGTATTAGT AGGACCCACA CCTGTCAATA 
+           TAATTGGAAG AAATCTGTTG ACTCAGATTG GTTGCACTTT AAATTTT
+
+SBP-5.21_S CCTCAGATCA CTCTTTGGCA ACGACCCCTC GTCACAATAA AGGTAGGGGG 
+           GCAACTAAAG GAAGCTCTAT TAGATACAGG AGCAGATGAT ACAGTATTAG 
+           AAGACATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT 
+           GGAGGGTTTA TCAAAGTAAG ACAGTATGAT CAGATACCCA TAGAGATCTG 
+           TGGACATAAA GCTATAGGTA CAGTATTAGT AGGACCCACA CCTGTCAATA 
+           TAATTGGAAG AAATCTGTTG ACTCAGATTG GTTGCACTTT AAATTTT
+
+SBP-5.04_2 CCTCAGATCA CTCTTTGGCA ACGACCCCTC GTCACAATAA AGATAGGGGG 
+           GCAACTAAAG GAAGCTCTAT TAGATACAGG AGCAGATGAT ACAGTATTAG 
+           AAGAAATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT 
+           GGAGGGTTTA TCAAAGTAAG ACAGTATGAT CAGATACCCA TAGAGATCTG 
+           TGGACATAAA GCTATAGGTA CAGTATTAGT AGGACCCACA CCTGTCAATA 
+           TAATTGGAAG AAATCTGTTG ACTCAGATTG GTTGCACTTT AAATTTT
+
+REC_SBP0.2 CCTCAGATCA CTCTTTGGCA ACGACCCCTC GTCACAATAA AGATAGGGGG 
+           GCAACTAAAG GAAGCTCTAT TAGATACAGG AGCAGATGAT ACAGTATTAG 
+           AGGAAACGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT 
+           GGAGGGTTTA TCAAAGTAAG ACAGTATGAT CAGATACCCA TAGAGATCTG 
+           TGGACATAAA GCTATAGGTA CAGTATTAGT AGGACCCACA CCTGTCAATA 
+           TAATTGGAAG AAATCTGTTG ACTCAGATTG GTTGCACTTT AAATTTT

Added: trunk/packages/bioperl/branches/upstream/current/t/data/omim_genemap_test
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/omim_genemap_test	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/omim_genemap_test	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,137 @@
+1.1|9|11|95|1pter-p36.13|CCV|P|Cataract, congenital, Volkmann type||115665|Fd|linked to Rh in Scottish family||Cataract, congenital, Volkmann type (2)| | ||
+1.2|9|25|01|1pter-p36.13|ENO1, PPH, MPB1|C|Enolase-1, alpha||172430|S, F, R, REa|||Enolase deficiency (1)| | |4(Eno1)|
+1.3|10|23|87|1pter-p36.13|GDH|C|Glucose dehydrogenase||138090|S, F|||| | ||
+1.3|10|23|87|1pter-p36.14|gene-symbol1|C|XXX||100500|M method 1|comment1||| | |mousecorrelate1|
+1.3|10|23|87|1pter-p36.15|gene-symbol2|C|XXX||100501|M method 2|comment2||| | |mousecorrelate2|
+1.4|6|7|99|1pter-p36.1|CTPP, CPP, CTPA|P|Cataract, posterior polar||116600|Fd|?relation to Volkmann cataract||Cataract, posterior polar (2)| | ||
+1.5|12|22|87|1pter-p36|ERPL1, HLM2|C|Endogenous retroviral pol gene-like sequence 1 (oncogene HLM2)||131190|REa, F|||| | ||
+1.6|1|18|95|1pter-p33|HMGCL|P|3-hydroxy-3-methylglutaryl-Coenzyme A lyase||246450|REa, A|||HMG-CoA lyase deficiency (3)| | |4(Hmgcl)|
+1.7|12|16|93|1pter-p32|AGRN|P|Agrin||103320|REa|||| | |4(Agrn)|
+1.8|3|15|92|1pter-p31.2|GNB1|C|Guanine nucleotide-binding protein, beta polypeptide-1||139380|REa, A|||| | |4(Gnb1)|
+1.9|8|28|98|1pter-p22.1|SAI1, MTS1, TFS1|C|Suppression of anchorage independence-1 (malignant transformation|suppression-1)|154280|S, H|||| | |4(Tfs1)|
+1.10|4|24|01|1p36.33|TAS1R3, T1R3|P|Taste receptor type 1, member 3||605865|REn|||| | |4(Tas1r3)|
+1.11|10|27|89|1p36.33-p36.22|CA6|C|Carbonic anhydrase VI||114780|REa, A, REc, R|||| | ||
+1.12|8|9|99|1p36.3|C1orf1|P|Chromosome 1, open reading frame 1||604006|A|||| | ||
+1.13|12|1|98|1p36.3|CDC2L1|C|Cell division cycle 2-like 1||176873|REa, A, Pcm, A|||| | ||
+1.14|12|1|98|1p36.3|CDC2L2|P|Cell division cycle 2-like 2||116951|A|||| | ||
+1.15|8|4|99|1p36.3|DFFB, CAD, DFF2|C|DNA fragmentation factor, 40kD, beta subunit||601883|A|||| | ||
+1.16|11|5|99|1p36.3|EGFL3, MEGF6|P|Epidermal growth factor-like 3||604266|R|||| | ||
+1.17|1|24|01|1p36.3|GABRD|C|Gamma-aminobutyric acid (GABA) A receptor, delta||137163|REa, R|||| | ||
+1.18|9|12|96|1p36.3|HKR3|C|GLI-Kruppel family member HKR3||165270|REa, A|||| | ||
+1.19|6|15|99|1p36.3|TNFRSF18, AITR, GITR|P|Tumor necrosis factor receptor superfamily, member 18||603905|R|||| | ||
+1.20|3|30|99|1p36.3|KCNAB2, KCNA2B|P|Potassium voltage-gated channel, shaker-related subfamily, beta|member 2|601142|A|||| | ||
+1.21|11|1|99|1p36.3|MMP23A, MMP21, MIFR|P|Matrix metalloproteinase 23A||603320|REc|||| | ||
+1.22|11|1|99|1p36.3|MMP23B, MMP22|P|Matrix metalloproteinase 23B||603321|REc|||| | ||
+1.23|10|4|93|1p36.3|MTHFR|P|Methylenetetrahydrofolate reductase||236250|A|||Homocystinuria due to MTHFR deficiency (3)| | |4(Mthfr)|
+1.24|1|17|01|1p36.3|PRDM16, MEL1|P|PR domain-containing protein 16||605557|REc, Ch|||| | ||
+1.25|8|9|99|1p36.3|PTPRZ2|P|Protein-tyrosine phosphatase, receptor-type, zeta-2||604008|A|||| | ||
+1.26|1|20|98|1p36.3|RNU1A, RNU1|C|RNA, U1A small nuclear||180680|REa, A|?same as A12M2||| | |3(Rnu1b1)|
+1.27|12|21|98|1p36.3|TNFRSF12, DR3, LARD|C|Tumor necrosis factor receptor superfamily, member 12||603366|A, Ch|||| | ||
+1.28|6|20|01|1p36.3|WDR8|P|WD repeat-containing protein 8||606040|A|||| | ||
+1.29|8|9|99|1p36.3|XBX1|P|Xylan 1,4-beta-xylosidase 1||604007|A|||| | ||
+1.30|2|6|94|1p36.3-p34.3|HTR1D|C|5-hydroxytryptamine (serotonin) receptor-1D||182133|REa, A|||| | |4(Htr1d)|
+1.31|6|10|98|1p36.3-p36.2|DFFA, DFF1|C|DNA fragmentation factor, 45kD, alpha subunit||601882|A|||| | ||
+1.32|1|30|01|1p36.3-p36.2|MASP2|P|Mannan-binding lectin serine protease 2||605102|A, R|||| | ||
+1.33|7|4|95|1p36.3-p36.2|NB, NBS|C|Neuroblastoma (neuroblastoma suppressor)||256700|Ch, D|?role of DAN||Neuroblastoma (2)| | ||
+1.34|1|1|95|1p36.3-p36.2|PLOD, PLOD1|P|Procollagen-lysine, 2-oxoglutarate 5-dioxygenase (lysine hydroxylase)||153454|REa, A|||Ehlers-Danlos syndrome, type VI, 225400 (3)| | |4(Plod)|Hautala (1992)
+1.35|9|15|96|1p36.3-p36.2|SCNN1D|P|Sodium channel, voltage-gated, type I, delta polypeptide||601328|A|||| | ||
+1.36|12|18|98|1p36.3-p36.2|TNFRSF1B, TNFR2, TNFBR|C|Tumor necrosis factor receptor superfamily, member 1B||191191|REa, A, Fd|||| | |4(Tnfr1)|
+1.37|12|14|98|1p36.3-p36.2|TNFRSF14, HVEM, TR2|P|Tumor necrosis factor receptor superfamily, member 14 (herpesvirus|entry mediator)|602746|A|||| | ||
+1.38|11|18|96|1p36.3-p34.1|C1QA|C|Complement component-1, q subcomponent, alpha polypeptide||120550|REa, REb|||C1q deficiency, type A (3)| | ||
+1.39|11|18|96|1p36.3-p34.1|C1QB|C|Complement component-1, q subcomponent, beta polypeptide||120570|REa, REb|||C1q deficiency, type B (3)| | |(C1qb)|
+1.40|11|18|96|1p36.3-p34.1|C1QG|P|Complement component-1, q subcomponent, gamma polypeptide||120575|REn, REb|||C1q deficiency, type C (3)| | ||
+1.41|6|18|97|1p36.2|FRAP1|P|FK506 binding protein 12-rapamycin associated protein-1||601231|R, A|||| | ||
+1.42|12|11|01|1p36.2|KIF1B, CMT2A|C|Kinesin family member 1B||605995|Fd, REc, D|||Charcot-Marie-Tooth neuropathy, type 2A, 118210 (3)| | |4(Kif1b)|
+1.43|6|2|94|1p36.2|NPPA, PND, ANP|C|Pronatriodilatin (atrial natriuretic peptide)||108780|REa, A, H|||| | |4(Pnd)|
+1.44|5|9|95|1p36.2|NPPB, BNP|C|Natriuretic peptide precursor B||600295|H, REa, A, REn|||| | |4(Nppb)|
+1.45|8|4|99|1p36.2|SLC2A5, GLUT5|C|Solute carrier family 2 (facilitated glucose transporter), member 5||138230|REa, A, REc, R|||| | ||
+1.46|4|10|90|1p36.2-p36.13|PGD|C|6-phosphogluconate dehydrogenase||172200|F, S|||| | |4(Pgd)|
+1.47|2|19|96|1p36.2-p36.12|PAX7|C|Paired box homeotic gene-7||167410|Psh, H, REa, A|fused with FKHR in rhabdomyosarcoma||Rhabdomyosarcoma, alveolar, 268220 (3)| | |4(Pax7)|
+1.48|4|19|01|1p36.2-p36.1|DNB5|P|Deleted in neuroblastoma 5||605763|REc|||| | ||
+1.49|3|24|88|1p36.2-p36.1|FGR, SRC2|P|Oncogene FGR||164940|A, REb, REa, Fd|same as SRC2||| | |4(Fgr)|
+1.50|9|22|97|1p36.2-p36.1|GLC3B|P|Glaucoma 3, primary infantile, B||600975|Fd|||Glaucoma 3, primary infantile, B (2)| | ||
+1.51|9|7|00|1p36.2-p36.1|RERE|P|RE repeats-encoding gene||605226|REc, Ch|||| | ||
+1.52|6|19|00|1p36.2-p36.1|ZNF151|P|Zinc finger protein-151||604084|A|||| | ||
+1.53|1|1|95|1p36.2-p35|CDA|C|Cytidine deaminase||123920|A, Psh|||| | ||
+1.54|10|22|92|1p36.2-p34|EPB41, EL1|C|Erythrocyte surface protein band 4.1||130500|F, REb|||Elliptocytosis-1 (3)| | |4(Elp1)|
+1.55|10|23|87|1p36.2-p34|RD|C|Radin blood group||111620|F|||| | ||
+1.56|8|4|97|1p36.2-p34|RHCE|C|Rhesus system C and E polypeptides||111700|F, D, Fd|?order: C-E-D||| | ||
+1.57|1|10|89|1p36.2-p34|RHD|C|Rhesus system D polypeptide||111680|F, D, Fd|||| | ||
+1.58|10|23|87|1p36.2-p34|SC|C|Scianna blood group||111750|F|||| | ||
+1.59|10|18|00|1p36.2-p34|SMP1|P|Small membrane protein 1||605348|REc|between RHD and RHCE||| | ||
+1.60|1|11|95|1p36.13-p36.12|ID3|C|Inhibitor of DNA binding 3, dominant negative, helix-loop-helix|protein|600277|RE, A|||| | ||
+1.61|10|24|97|1p36.13-p36.11|D1S1733E, DAN|P|Differential-screening-selected gene aberrant in neuroblastoma||600613|A|aberrant in some neuroblastomas||| | |4(D4H1S1733E)|
+1.62|3|1|01|1p36.12|PLA2G2D, SPLASH|C|Phospholipase A2, group IID||605630|R|||| | |4(Pla2g2d)|
+1.63|4|24|01|1p36.11-p34.3|WASF2, WAVE2, SCAR2|P|Wiskott-Aldrich syndrome protein family, member 2||605875|REc|?pseudogene on Xp11.21||| | ||
+1.64|8|18|99|1p36.1-p35|C1orf4, B120|P|Brain protein 120||603024|A|||| | ||
+1.65|8|28|97|1p36.1|CAPZB, CAPPB|P|Capping protein (actin filament) muscle Z-line, beta||601572|A|||| | ||
+1.66|10|20|99|1p36.1|CDC42|P|Cell division cycle 42 (GTP-binding protein, 25kD)||116952|H, R, REc|||| | |4(cdc42)|
+1.67|1|1|95|1p36.1|CHC1, RCC1|C|Regulator of chromosome condensation||179710|REb, A|||| | |4(Chc1)|
+1.68|5|28|98|1p36.1|DDOST, OST, OST48|P|Dolichyl-diphosphooligosaccharide-protein glycosyltransferase||602202|A|||| | ||
+1.69|6|16|99|1p36.1|ECE1|C|Endothelin converting enzyme 1||600423|A, R, Psh|||Hirschsprung disease, cardiac defects, and autonomic dysfunction (3)| | ||
+1.70|5|5|98|1p36.1|EPHA2, ECK|P|Ephrin receptor EphA2||176946|A, REa|||| | |4(Epha2)|
+1.71|4|11|97|1p36.1|EXTL1|P|Exostosin-like 1||601738|R, A|||| | ||
+1.72|10|12|90|1p36.1|HMG17|C|Nonhistone chromosomal protein HMG-17||163910|REa, A|||| | ||
+1.73|4|1|01|1p36.1|HSPG2, PLC, SJS, SJA, SJS1|C|Heparan sulfate proteoglycan of basement membrane (perlecan)||142461|A, REa, LD|||Schwartz-Jampel syndrome, type 1, 255800 (3); Dyssegmental dysplasia,|Silverman-Handmaker type, 224410 (3) | |4(Plc)|
+1.74|3|6|01|1p36.1|NR0B2, SHP|P|Nuclear receptor subfamily 0, group B, member 2||604630|REa, A|||Obesity, mild, early-onset, 601665 (3)| | ||
+1.75|8|21|91|1p36.1|PRKACB|P|Protein kinase, cAMP-dependent, catalytic, beta||176892|REa, A|||| | ||
+1.76|11|3|98|1p36.1|RNU17A, RNE1|C|RNA, U17a small nucleolar||180645|REn|in IVS1 of CHC1||| | ||
+1.77|11|3|98|1p36.1|RNU17B|P|RNA, U17b small nucleolar||603239|REn|||| | ||
+1.78|12|15|99|1p36.1|RNU17D, U17HG|P|RNA, U17d small nucleolar||603238|REn|||| | ||
+1.79|2|20|98|1p36.1|RSC1A1, RS1|P|Regulatory solute carrier protein, family 1, member 1||601966|A|||| | ||
+1.80|2|15|96|1p36.1|TCEB3|P|Transcription elongation factor B (SIII), polypeptide 3 (110kD,|elongin A)|600786|A|||| | ||
+1.81|9|13|89|1p36.1|TRN|P|tRNA asparagine||189880|REa, RE|||| | ||
+1.82|11|5|97|1p36.1-p35|EPHB2, EPHT3, DRT, ERK|P|eph tyrosine kinase 3 (ephrin receptor EphB2)||600997|Psh, A|||| | ||
+1.83|4|23|96|1p36.1-p35|MEMO1|P|Methylation modifier for class I HLA||601201|D|||| | ||
+1.84|10|22|95|1p36.1-p35|MFAP2, MAGP, MAGP1|P|Microfibrillar-associated protein-2||156790|REa, A|||| | |4(Mfap2)|
+1.85|2|26|95|1p36.1-p35|RAP1GA1|P|RAP1, GTPase activating protein 1||600278|A, REa|||| | ||
+1.86|10|20|99|1p36.1-p35|RPL11|C|Ribosomal protein L11||604175|REa, R, A|||| | ||
+1.87|11|28|01|1p36.1-p35|SDHB, SDH1, SDHIP|C|Succinate dehydrogenase complex, subunit B, iron sulfur (Ip)||185470|S, REa, A|1 of 2 polypeptides||Pheochromocytoma, extraadrenal, and cervical paraganglioma, 115310|(3) | ||
+1.88|3|28|95|1p36.1-p35|SLC9A1, NHE1, APNH|C|Solute carrier family 9 (sodium/hydrogen exhanger), isoform 1|(antiporter, Na+/H+, amiloride sensitive)|107310|A, F, Fd|3cM proximal to RH||| | |4(Apnh, Nhe1)|
+1.89|7|17|01|1p36.1-p35|STMN1, LAP18, SMN|P|Stathmin||151442|REa, A|||| | |4(Lap18)|
+1.90|1|1|95|1p36.1-p34.3|GPR3|C|G protein-coupled receptor-3||600241|A|||| | ||
+1.91|3|17|94|1p36.1-p34.3|OPRD1|P|Opioid receptor, delta-1||165195|H, A|||| | |4(Nbor, Oprd1)|
+1.92|6|15|99|1p36.1-p34|ALPL, HOPS|C|Alkaline phosphatase, liver/bone/kidney||171760|S, H, Fd, F, A|||Hypophosphatasia, infantile, 241500 (3); Hypophosphatasia, childhood,|241510 (3); ?Hypophosphatasia, adult, 146300 (1) | |4(Akp2)|
+1.93|1|25|99|1p36|ALDH4A1, ALDH4, P5CDH|P|Aldehyde dehydrogenase 4 family, member A1|(delta-1-pyrroline 5-carboxylate dehydrogenase)|606811|A|||Hyperprolinemia, type II, 239510 (3)| | ||
+1.94|5|14|02|1p36|BMND3|P|Bone mineral density variability 3||606928|Fd|||[Bone mineral density variability 3], 601884 (2)| | ||
+1.95|10|2|89|1p36|BRCD2|P|Breast cancer, ductal||211420|Ch, F, D|||Breast cancer, ductal (2)| | ||
+1.96|6|19|98|1p36|CLCN6|P|Chloride channel 6||602726|REa, A|||| | ||
+1.97|10|15|97|1p36|CLCNKA|C|Chloride channel, kidney, A||602024|Fd, REc|11kb from CLCNKB||| | ||
+1.98|10|15|97|1p36|CLCNKB|C|Chloride channel, kidney, B||602023|Fd, REc|unequal crossingover with CLCNKA||Bartter syndrome, antenatal, 601678 (3); Bartter syndrome, 241200 (3)| | ||
+1.99|1|4|93|1p36|CMM, MLM, DNS|P|Cutaneous malignant melanoma/dysplastic nevus||155600|F, Fd, D|some linkage studies negative; see 9p||Malignant melanoma, cutaneous (2)| | ||Greene (1983); Goldstein (1992)
+1.100|8|15|96|1p36|DVL1|C|Dishevelled 1 (homologous to Drosophila dsh)||601365|Psh, A|||| | ||
+1.101|2|01|01|1p36|CORT|C|Cortistatin||602784|H, R, REc|||| | |4(Cort)|
+1.102|2|23|95|1p36|E2F2|P|E2F transcription factor 2||600426|A|||| | ||
+1.103|4|18|97|1p36|EYA3|P|Eyes absent, Drosophila, homolog of, 3||601655|A, H|||| | |4(Eya3)|
+1.104|9|7|00|1p36|IBD7|P|Inflammatory bowel disease-7||605225|Fd|||{Inflammatory bowel disease-7}, 266600 (2)| | ||
+1.105|4|17|01|1p36|ICMT|P|Isoprenylcysteine carboxylmethyltransferase||605851|R|||| | ||
+1.106|2|15|02|1p36|KRPPD|P|Kufor-Rakeb pallidopyramidal degeneration with supranuclear upgaze|paresis and dementia|606693|Fd|||Kufor-Rakeb syndrome (2)| | ||
+1.107|4|2|01|1p36|LUZP1|P|Leucine zipper protein 1||601422|H, R|||| | |4(Luzp)|
+1.108|3|9|00|1p36|MAD2L2, MAD2B|P|Mitotic arrest-deficient 2, S. cerevisiae, homolog-like 2||604094|R|pseudogene on 14q21-q23||| | ||
+1.109|6|3|02|1p36|PARK7|P|Parkinson disease 7, autosomal recessive early-onset||606324|Fd|||Parkinson disease, 168600 (2)| | ||
+1.110|4|21|99|1p36|PCBC, CAPB|P|Prostate cancer-brain cancer susceptibility||603688|Fd|||{Prostate cancer-brain cancer susceptibility} (2)| | ||
+1.111|9|28|96|1p36|PRDM2, RIZ|P|PR domain-containing protein 2 (retinoblastoma protein-binding|zinc-finger protein RIZ)|601196|REc, A|||| | |4(Riz)|
+1.112|11|29|99|1p36|RUNX3, CBFA3, PEBP2A3, AML2|C|Runt-related transcription factor 3||600210|A, REa|||| | |4(Aml2, Cbfa3)|
+1.113|12|18|98|1p36|TNFRSF4, TXGP1L, OX40, ACT35|P|Tumor necrosis factor receptor superfamily, member 4||600315|A|||| | |4(Ox40)|
+1.114|12|18|98|1p36|TNFRSF8, CD30, D1S166E|P|Tumor necrosis factor receptor superfamily, member 8|(CD30 antigen; Ki-1 antigen)|153243|A|||| | |4(Cd30)|
+1.115|12|18|98|1p36|TNFRSF9, ILA, CD137|P|Tumor necrosis factor receptor superfamily, member 9|(interleukin-activated receptor; Ly63, mouse, homolog of)|602250|REa|||| | |(Cd157)|
+1.116|10|15|97|1p36|TP73|P|p53-related protein||601990|D|imprinted||?Neuroblastoma (1)| | ||
+1.117|9|13|89|1p36|TRE|P|tRNA glutamic acid||180640|A|||| | ||
+1.118|6|12|01|1p36-p35|ARH, FHCB2, FHCB1|P|Autosomal recessive hypercholesterolemia gene||605747|Fd|||Hypercholesterolemia, familial, autosomal recessive, 603813 (3)| | ||
+1.119|5|29|02|1p36-p35|GALE|C|UDP galactose-4-epimerase||606953|S, LD|||Galactose epimerase deficiency, 230350 (3)| | ||
+1.120|3|12|96|1p36-p35|HTR6|P|5-hydroxytryptamine (serotonin) receptor-6||601109|REa|||| | ||
+1.121|6|18|01|1p36-p35|PARK6|P|Parkinson disease 6, autosomal recessive early-onset||605909|Fd|||Parkinson disease, 168600 (2)| | ||
+1.122|8|23|01|1p36-p35|SEPN1, SELN, RSMD1|C|Selenoprotein N||606210|REc, Fd|||Muscular dystrophy, rigid spine, 1, 602771 (3)| | ||
+1.123|1|26|97|1p36-p34.1|SCCD|P|Schnyder crystalline corneal dystrophy||121800|Fd|||Corneal dystrophy, crystalline, Schnyder (2)| | ||
+1.124|3|9|98|1p36-p34|PLA2G5|P|Phospholipase A2, group V||601192|A|||| | |4(Pla2g5)|
+1.125|5|4|00|1p36-p32|PABPC4, PABP4, IPABP, APP1|P|Polyadenylate-binding protein, cytoplasmic, 4||603407|Psh, R|||| | ||
+1.126|12|3|91|1p36-p22|SRM, SPS1|C|Spermidine synthase-1||182891|REa, A, Psh|?pseudogene on 3p14-q21||| | ||
+1.127|8|9|01|1p35.2-p33|DSCR1L2, MCIP3|P|Down syndrome critical region gene 1-like 2 (myocyte-enriched|calcineurin-interacting protein 3)|605860|REc|||| | |4(Dscr1l2)|
+1.128|2|18|96|1p35.1|GJA4, CX37|C|Gap junction protein, alpha-4, 37kD (connexin 37)||121012|REa, A, REc|||| | |(Gja4)|
+1.129|12|28|01|1p35.1|GJB3, CX31, DFNA2|C|Gap junction protein, beta-3||603324|REn, REc, Psh, A|same YAC as GJA4||Erythrokeratodermia variabilis, 133200 (3); Deafness, autosomal|dominant 2, 600101 (3); Deafness, autosomal recessive (3); Deafness, autosomal dominant, with peripheral neuropathy (3)| ||
+1.130|11|29|00|1p35.1|GJB4, CX30.3|P|Gap junction protein, beta-4||605425|REn, Fd|||Erythrokeratodermia variabilis with erythema gyratum repens,|133200 (3) | ||
+1.131|11|29|00|1p35.1|GJB5, CX31.1|P|Gap junction protein, beta-5||604493|R, REc|||| | ||
+1.132|12|2|97|1p35.1|P28|P|Inner dynein arm, clamydomonas, homolog of||602135|REa, R|||| | ||
+1.133|6|10|98|1p35|BAI2|P|Brain-specific angiongenesis inhibitor-2||602683|A|||| | ||
+1.134|8|21|91|1p35|EBVS1|P|Epstein-Barr virus integration site||132850|A|||| | ||
+1.135|3|21|93|1p35|G1P3, IFI616|C|Interferon, alpha-inducible protein (clone IFI-6-16)||147572|A, T|||| | ||

Added: trunk/packages/bioperl/branches/upstream/current/t/data/omim_text_test
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/omim_text_test	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/omim_text_test	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,227 @@
+*RECORD*
+*FIELD* NO
+100500
+*FIELD* TI
+*100500 title
+;;title1;;
+title2;;
+title3
+
+*FIELD* MN
+Mini MIM text
+
+*FIELD* CN
+Mini MIM - cn
+*FIELD* CD
+Mini MIM - cd
+*FIELD* ED
+Mini MIM - ed
+
+*FIELD* TX
+DESCRIPTION1
+
+*FIELD* TX
+DESCRIPTION2
+ 
+*FIELD* AV
+.0001
+ALCOHOL INTOLERANCE, ACUTE
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX! 
+ALDH2, GLU487LYS
+
+AV1-text
+
+.0002
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+CHRNA1, VAL156MET
+
+AV2-text
+
+.0003
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, ARG147LEU
+
+AV2-text a
+AV2-text b
+
+.0004
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, 1-BP DEL, 911T
+
+Sieb et al. (2000) found that a brother and sister with congenital
+myasthenic syndrome (601462) were compound heterozygotes for a deletion
+of 911T and a splicing mutation (IVS4+1G-A; 100725.0007).
+
+.0005
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, IVS4DS, G-A, +1
+
+See 100725.0006 and Sieb et al. (2000).
+
+.0006
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, 1-BP DEL, 1030C
+
+AV6-text
+
+
+
+
+*FIELD* SA
+sa
+
+*FIELD* RF
+1. Author11, A. A.; Author12, A. A.: Title 1. Am. J. Med. Genet1. 11: 11-111, 1981.
+
+2. Author21, A. A.; Author22, A. A.: Title 2. Am. J. Med. Genet2. 12: 22-222, 1982.
+
+3. Author31, A. A.; Author32, A. A.: Title 3. Am. J. Med. Genet3. 13: 33-333, 1983.
+
+4. other reference undef format
+
+*FIELD* CS
+
+clinical symptoms
+
+*FIELD* CN
+cn1
+
+*FIELD* CD
+cd1
+
+*FIELD* ED
+ed1
+
+*FIELD* CN
+cn2
+cn3
+
+*FIELD* ED
+ed2
+ed3
+
+
+*FIELD* CD
+cd2
+cd3
+
+
+
+
+
+*RECORD*
+*FIELD* NO
+100501
+*FIELD* TI
+#100501 second entry
+;;title1;;
+title2;;
+title3
+
+*FIELD* MN
+Mini MIM text
+
+*FIELD* CN
+Mini MIM - cn
+*FIELD* CD
+Mini MIM - cd
+*FIELD* ED
+Mini MIM - ed
+
+*FIELD* TX
+DESCRIPTION1
+
+*FIELD* TX
+DESCRIPTION2
+ 
+*FIELD* AV
+.0001
+ALCOHOL INTOLERANCE, ACUTE
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX! 
+ALDH2, GLU487LYS
+
+AV1-text
+
+.0002
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+ACETALDEHYDE DEHYDROGENASE 2, ALLELE 2, INCLUDED; ALDH2*2, INCLUDED XXX!
+CHRNA1, VAL156MET
+
+AV2-text
+
+.0003
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, ARG147LEU
+
+AV2-text a
+AV2-text b
+
+.0004
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, 1-BP DEL, 911T
+
+Sieb et al. (2000) found that a brother and sister with congenital
+myasthenic syndrome (601462) were compound heterozygotes for a deletion
+of 911T and a splicing mutation (IVS4+1G-A; 100725.0007).
+
+.0005
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, IVS4DS, G-A, +1
+
+See 100725.0006 and Sieb et al. (2000).
+
+.0006
+MYASTHENIC SYNDROME, SLOW-CHANNEL CONGENITAL
+CHRNE, 1-BP DEL, 1030C
+
+AV6-text
+
+
+
+
+*FIELD* SA
+sa
+
+*FIELD* RF
+1. Author11, A. A.; Author12, A. A.: Title 1. Am. J. Med. Genet1. 11: 11-111, 1981.
+
+2. Author21, A. A.; Author22, A. A.: Title 2. Am. J. Med. Genet2. 12: 22-222, 1982.
+
+3. Author31, A. A.; Author32, A. A.: Title 3. Am. J. Med. Genet3. 13: 33-333, 1983.
+
+4. other reference undef format
+
+*FIELD* CS
+
+clinical symptoms
+
+*FIELD* CN
+cn1
+
+*FIELD* CD
+cd1
+
+*FIELD* ED
+ed1
+
+*FIELD* CN
+cn2
+cn3
+
+*FIELD* ED
+ed2
+ed3
+
+
+*FIELD* CD
+cd2
+cd3
+
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/pep-266.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/pep-266.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/pep-266.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,114 @@
+CLUSTAL W (1.82) multiple sequence alignment
+
+
+Spar_21273             -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHSQVVNMDPA
+YOR262W                -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHSQVVNMDPA
+Smik_Contig1103.1      -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHSQVVNMDPA
+Skud_Contig1703.7      -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHSQVVNMDPA
+Sbay_Contig635.43      -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHSQVVNMDPA
+Scas_Contig692.20      -------------MPFAQIVIGPPGSGKSTYCN------GCSQFFNAIGRHGQIVNMDPA
+Sklu_Contig2277.4      -------------MPFGQIVIGPPGSGKSTYCN------GCSQFFNAVGRHAQVINMDPA
+AAL117C                -------------MAYGQIVIGPPGSGKSTYCN------GCSQFFNAIGRHARIVNMDPA
+SPAC144.07c            -------------MPFCQVVVGPPGSGKSTYCF------GMYQLLSAIGRSSIIVNLDPA
+MG06110.4              -----------------------------------------MGFLGAIGRACSVVNLDPA
+NCU09745.1             ----------------------------------MTSPLPVQQFMGAIGRQCSVVNLDPA
+FG05298.1              -------------MPFAQLVLGSPGCGKSTYCDGIQLTGQVHQFLGAIGRACSVVNLDPA
+AN2438.1               ------------------------------------------------------------
+183.m01790             MDDKELEIPVEHSTAFGQLVTGPPGAGKSTYCH------GLHQFLTAIGRPVHIINLDPA
+                                                                                   
+
+Spar_21273             NDALPYPCAVDIRDFITLEEIMQEQQLGPNGGLVYAVESLDKSIDLFILQIKSLV--EEE
+YOR262W                NDALPYPCAVDIRDFITLEEIMQEQQLGPNGGLMYAVESLDNSIDLFILQIKSLV--EEE
+Smik_Contig1103.1      NDALPYPCAVDIRDFITLEEIMQEQQLGPNGGLMYAVESLDKSIDLFLLQIKSLV--EEE
+Skud_Contig1703.7      NDALPYPCAVDIRDFITLEEVMQEQQLGPNGGLMYAVESLDNSIDLFILQIKSLV--EEE
+Sbay_Contig635.43      NDALPYPCAVDIRDFITLEEIMKEQHLGPNGGLMYAVESLDKSIDLFILQIKSLV--EEE
+Scas_Contig692.20      NDALPYPCAVDIRDFVTLEEIMQEQQLGPNGGLMYAVESLDESIDLFILQIKSLV--QEE
+Sklu_Contig2277.4      NDSLPYPCAVDIRDFITLEEIMTEQQLGPNGGLMYALESLDKSIDLFVLQIKSLV--QDE
+AAL117C                NDSLPYQCDVDIRDFITLEEIMNEQHLGPNGGLVYAFESVEHSLSLFALQIKTLV--KDE
+SPAC144.07c            NDFIKYPCAIDIRKVLDVEMIQKDYDLGPNGALIYAMEAIEYHVEWLLKELK-----KHR
+MG06110.4              NDHTSYPCALDIRNLVTLEEIMGDDNLGPNGGILYAIEELEHNFEWLEDGLK-----ELG
+NCU09745.1             NDHTNYPCALDIRDLVTLEEIMADDKLGPNGGILYALEELENNMEWLENGLK-----ELG
+FG05298.1              NDHTNYPAALDIRSLIKLEEIMKDDKLGPNGGILYALEELEHNFEWLEEGLK-----EFS
+AN2438.1               ---------------------MSEDQLGPNGGVLYALEELEENFDFLEEGLK-----ELG
+183.m01790             VPNPPYPCSINITELITLESVMEEYNLGPNGAMLYCIEFLEANFDWLVERLDEVLAEEGG
+                                              : .*****.::*..* ::  .. :   :.     :  
+
+Spar_21273             KAYLVFDCPGQVELFTHHSSLFSIFKKMEKELDMRFCVVNLIDCFYMTSPSQYISILLLA
+YOR262W                KAYLVFDCPGQVELFTHHSSLFNIFKKMEKELDIRFCVVNLIDCFYMTSPSQYISILLLA
+Smik_Contig1103.1      KAYLVFDCPGQVELFTHHSSLFNIFKKMEKELDMRFCVINLIDCFYMTSPSQYVSILLLA
+Skud_Contig1703.7      KAYLVFDCPGQVELFTHHSSLFSIFKKMEKELDMRFCVVNLIDCFYMTSPSQYVSILLLA
+Sbay_Contig635.43      KAYVVFDCPGQVELFTHHSSLFSIFKKLEKELDMRFCVVNLIDCFYMTSPSQYVSILLLA
+Scas_Contig692.20      KAYLVFDCPGQVELFTHHSSLFKIFKKLEKELDMRFCVVNLIDSFYITSPSQYVSILLLA
+Sklu_Contig2277.4      HAYVVFDCPGQVELFTHHSSLFRIFKKLERELDMRLCVVNLIDCFYITSPSQYVSILLLA
+AAL117C                NAYLVFDCPGQVELFTHHSALSKIFQQLVRDLDLRVCVVNLMDSIYITSPSQYVSVLLLA
+SPAC144.07c            DSYVIFDCPGQVELFTNHNSLQKIIKTLEKELDYRPVSVQLVDAYCCTNPSAYVSALLVC
+MG06110.4              DDYILFDCPGQVELYTHHNSLRNIFFKLQK-LGYRLVVVHLSDSICLTQPSLYISNLLLA
+NCU09745.1             EDYVLFDCPGQVELYTHHNSLRNIFYRLQK-LGYRLVVVHLSDCFCLTQPSLYISNVLLS
+FG05298.1              EDYILFDCPGQVELYTHHNSLRNIFYKLQK-IGFRLVSVHLSDSFCLTQPSLYVSNVLLS
+AN2438.1               EDYIIFDCPGQVEIFTHHSSLRNIFFKIQK-MGYRLIVLHLIDSYNLTLPSMYISSLILC
+183.m01790             NGYVVFDTPGQAELWTNHDSLKNVVEKLVK-MDYRLAAVHLSDAHYITDASKFISVVLLA
+                       . *::** ***.*::*:*.:*  :.  : : :. *   ::* *.   * .* ::* :::.
+
+Spar_21273             LRSMLMMDLPHINVFSKIDMLKSYGELPFRLDYYTEVQDLDYLEPYIEKEGSSVLGKKYS
+YOR262W                LRSMLMMDLPHINVFSKIDMLKSYGELPFRLDYYTEVQDLDYLEPYIEKEGSSVLGKKYS
+Smik_Contig1103.1      LRSMLMMDLPHINVFSKIDMLKSYGELPFRLDYYTEVQDLDYLEPYIEKEGSSVLGKKYN
+Skud_Contig1703.7      LRSMLMMDLPHINVFSKIDMLKSYGELPFRLDYYTEVQELDHLEPYIEKEGSSVLGKKYS
+Sbay_Contig635.43      LRSMLMMDLPHINVFSKIDKLKSYGELPFRLDYYTEVQDLDYLEPYIEKEGSGALGKRYS
+Scas_Contig692.20      LRSMLMMDLPQINVFSKIDMLKSYGELPFRLDYYTEVQDLDYLQPFIEKESSSVLGRRYS
+Sklu_Contig2277.4      LRSMLMMDLPHINVFSKIDLLKSYGELPFRLDYYTEVQELDYLKPHIDKEGSSVLGRKYS
+AAL117C                LRSMLMMDLPHINVLSKIDMLSSYGDLPFRLDYYTEVQDLEYLQPHIEREHKGAKALRYR
+SPAC144.07c            LKGMLQLDMPHVNILSKADLLCTYGTLPMKLDFFTEVQDLSYLAPLLDRDKR---LQRYS
+MG06110.4              LRAMLQMDLSHVNVLTKIDKVSSYDRLAFNLDFYTEVHDLSYLLPELEAENPSLRSEKFA
+NCU09745.1             LRAMLQMDLPHINVLTKIDKISSYDPLPFNLDYYTEVQDLRYLMPSLDAESPALKKGKFT
+FG05298.1              LRAMIQMDMPHINILSKIDKVADYDELPFNLDYYTDVDDLTYLTPHLETESPALRSEKFG
+AN2438.1               LRAMLQMDLPHLNVLTKIDNLSNYTSLPFNLDFYTEVQDLTYLLPHLEAESSRLSHEKFG
+183.m01790             LRAMLQMEMPHLNVLSKIDLISTYGELPFDLSYYTEVQDLSYLLGSLDSDPR---TAKYH
+                       *:.*: :::.::*:::* * :  *  *.: *.::*:*.:* :*   :: :       :: 
+
+Spar_21273             KLTETIKELVSDFNLVSFEVLSVDDKESMINLQGVIDKANGYIFGAS---EVGG------
+YOR262W                KLTETIKELVSDFNLVSFEVLSVDDKESMINLQGVIDKANGYIFGAS---EVGG------
+Smik_Contig1103.1      KLTDAIKELVSDFNLVSFEVLSVDDKESMINLQGVIDKANGYIFGAS---EVGG------
+Skud_Contig1703.7      KLTETIKELVSDFNLVSFEVLSVDDKESMINLQGVIDKANGYIFGAS---EVGG------
+Sbay_Contig635.43      KLTETISELVSDFNLVSFEVLAVDDKESMINLQGVIDKANGYIFGAS---EVGG------
+Scas_Contig692.20      KLTETISELVSDFNLVSFEVLAVDDKQSMINLQSVVDKANGYIFGAS---EVGG------
+Sklu_Contig2277.4      RLTETISELVSDFNLVSFEVLCVDDKQSMINLQSIVDKANGYIFGVS---EIGG------
+AAL117C                RLTEAIGEVVSDFNLVAFEVLCVDDKQSMINLQSAIDKANGYIFGAS---EVGG------
+SPAC144.07c            DLNKAICELVEDFNLVSFEVVAVENKASMLRVLRKIDQAGGYAYGST---EIGG------
+MG06110.4              KLNRAVANLIEDFGLVRFEVLAVENKKSMMHLLRVLDRANGYVFGGA---EGAN------
+NCU09745.1             KLNEAVANMVEQFGLVSFEVLAVENKKSMMHLLRVIDRASGYVFGGA---EGTN------
+FG05298.1              KLNEAIANLIESYGLVRYEVLAVENKKSMMHILRVIDRAGGYVFGSA---EGAN------
+AN2438.1               ALNNAIITLIEEFGLVGFETLAVEDKKSMMNLLRAIDRASGYVFGPA---EGAN------
+183.m01790             KLNKALVELIEGFSLVGFQTLAVEDKESMLNIVRLVDKMTGYIFIPSGDLEGTNAINTQA
+                        *. ::  ::. :.** ::.:.*::* **:.:   :*:  ** :  :   *  .      
+
+Spar_21273             --DTVWAEASREGA-LLANYDIQDRWIDNKEKYDKEEEEKRAALLKEQELQNKAVDVNEE
+YOR262W                --DTVWAEASREGA-LIANYDIQDRWIDNKEKYDKEEEEKRTALLKEQELQNKAVDVNEE
+Smik_Contig1103.1      --DTVWAEASREGA-LLTSYDIQDRWIDNKEKYDKEEEEKRVILLKEQELQNKAVDVNED
+Skud_Contig1703.7      --DTVWAEASREGA-LLENYDIQDRWIDNKETYDKEEQEKRASLLKEQELQNKTVDVKEE
+Sbay_Contig635.43      --DTVWAEASREGA-LLASYDIQDRWIDNKEKYDKEEQEKRAAMVKEQELQNKEVNVDEE
+Scas_Contig692.20      --DTVWAEATREGA-MMVNYDIQDRWIDNKEKYDEEERKRQEEQAKEQNMQEKEVDVDNE
+Sklu_Contig2277.4      --DTVWAEATRQGS-AIANYDIQERWIDNKDMYDREEQEKREQLLKEEELQNKEVDVDKG
+AAL117C                --DTVWAEATRQGT-AAIEYDIQDRWIDNKDFYDKEEEARRKKLLEEHELLEKEVDVNQD
+SPAC144.07c            --DAVWVNAVRQGGDPLQGISPQERWIDKKEEYDK----------YEWELEQK--STMDE
+MG06110.4              --DTVWQVAMRNEGSLMGVQDIQERWIDNKEAYDEMEQREWEEQVKAQEAMAEADAAAAE
+NCU09745.1             --DTVWQVAMRNESSLPDALDIQERWIDSKEEYDEMERKEEEEQEKLRAEQARAAEEAGL
+FG05298.1              --DTVWSVAMRNESSMLGVQDIQERWIDQKVEYDQMEREAEEEQARIQEEQAMEMEQSQP
+AN2438.1               --DSVWQVAVREGMGSMDIRDIQERWIDAKDEYDELERRQREEEIKNHQQAATYQAGNED
+183.m01790             LFGSAMSSAKLTGRAGGDVRDVQERWMDNKEAWDEWEKKEWKREAEIRAQMGTGIPEGMK
+                         .:.   *           . *:**:* *  :*.                         
+
+Spar_21273             DEWENALKEWEEKQGTDFVR--------------
+YOR262W                DEWENALKEWEEKQGMDFVR--------------
+Smik_Contig1103.1      DEWESALKEWEEKQGMDFVR--------------
+Skud_Contig1703.7      DEWENALKEWEEKQDTEFVR--------------
+Sbay_Contig635.43      DEWENALNDWEEKQGTDFVR--------------
+Scas_Contig692.20      DEWEKALKDWEEKQGTGYVR--------------
+Sklu_Contig2277.4      DEWENALKEWEEKQGMSYVK--------------
+AAL117C                DEWERAVKEWESQHSVNFVK--------------
+SPAC144.07c            DENEG-----------------------------
+MG06110.4              -EG-----DDDLMGGPGAR---------------
+NCU09745.1             GDGSVPGVAPQFTSGSGIRVTLSLVAAFTKYSDL
+FG05298.1              PPAPTGGMDPDFGDMTVPKDSGIKVVRK------
+AN2438.1               DDDDN---DYEFGRRMPVPDSGVKVMRK------
+183.m01790             GGED--------AESTGI----------------
+                                                         

Added: trunk/packages/bioperl/branches/upstream/current/t/data/pfam_tests.stk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/pfam_tests.stk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/pfam_tests.stk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,217 @@
+# STOCKHOLM 1.0
+#=GF ID   14-3-3
+#=GF AC   PF00244.9
+#=GF DE   14-3-3 protein
+#=GF AU   Finn RD
+#=GF SE   Prosite
+#=GF GA   25.00 25.00; 25.00 25.00;
+#=GF TC   28.50 28.50; 30.20 29.80;
+#=GF NC   22.70 22.70; 20.60 20.60;
+#=GF TP   Domain
+#=GF BM   hmmbuild -F HMM_ls SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_ls
+#=GF BM   hmmbuild -f -F HMM_fs SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_fs
+#=GF AM   globalfirst
+#=GF RN   [1]
+#=GF RM   95327195
+#=GF RT   Structure of a 14-3-3 protein and implications for
+#=GF RT   coordination of multiple signalling pathways. 
+#=GF RA   Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken
+#=GF RA   A, Gamblin SJ; 
+#=GF RL   Nature 1995;376:188-191.
+#=GF RN   [2]
+#=GF RM   95327196
+#=GF RT   Crystal structure of the zeta isoform of the 14-3-3
+#=GF RT   protein. 
+#=GF RA   Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington
+#=GF RA   R; 
+#=GF RL   Nature 1995;376:191-194.
+#=GF RN   [3]
+#=GF RM   96182649
+#=GF RT   Interaction of 14-3-3 with signaling proteins is mediated
+#=GF RT   by the recognition of phosphoserine. 
+#=GF RA   Muslin AJ, Tanner JW, Allen PM, Shaw AS; 
+#=GF RL   Cell 1996;84:889-897.
+#=GF RN   [4]
+#=GF RM   97424374
+#=GF RT   The 14-3-3 protein binds its target proteins with a common
+#=GF RT   site located towards the C-terminus. 
+#=GF RA   Ichimura T, Ito M, Itagaki C, Takahashi M, Horigome T,
+#=GF RA   Omata S, Ohno S, Isobe T 
+#=GF RL   FEBS Lett 1997;413:273-276.
+#=GF RN   [5]
+#=GF RM   96394689
+#=GF RT   Molecular evolution of the 14-3-3 protein family. 
+#=GF RA   Wang W, Shakes DC 
+#=GF RL   J Mol Evol 1996;43:384-398.
+#=GF RN   [6]
+#=GF RM   96300316
+#=GF RT   Function of 14-3-3 proteins. 
+#=GF RA   Jin DY, Lyu MS, Kozak CA, Jeang KT 
+#=GF RL   Nature 1996;382:308-308.
+#=GF RN   [7]
+#=GF RM   12184815
+#=GF RT   The 14-3-3s. 
+#=GF RA   Ferl RJ, Manak MS, Reyes MF; 
+#=GF RL   Genome Biol 2002;3:REVIEWS3010.
+#=GF DR   PROSITE; PDOC00633;
+#=GF DR   SMART; 14_3_3;
+#=GF DR   PRINTS; PR00305;
+#=GF DR   SCOP; 1a4o; fa;
+#=GF DR   INTERPRO; IPR000308;
+#=GF SQ   16
+#=GS RAD25_SCHPO/5-240  AC P42657
+#=GS RAD24_SCHPO/6-241  AC P42656
+#=GS BMH1_YEAST/4-240   AC P29311
+#=GS 1433E_SHEEP/4-239  AC P62262
+#=GS 1433B_VICFA/7-242  AC P42654
+#=GS 14334_LYCES/6-243  AC P42652
+#=GS 14333_LYCES/9-246  AC P93209
+#=GS 14336_ARATH/7-240  AC P48349
+#=GS 14332_ENTHI/4-238  AC P42649
+#=GS 14331_ENTHI/4-239  AC P42648
+#=GS 1433T_HUMAN/3-236  AC P27348
+#=GS 1433_XENLA/1-227   AC P29309
+#=GS 1433Z_DROME/6-239  AC P29310
+#=GS 14331_CAEEL/5-237  AC P41932
+#=GS 1433F_RAT/3-240    AC P68511
+#=GS 1433S_HUMAN/3-238  AC P31947
+RAD25_SCHPO/5-240            RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQKEESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELPPTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ
+RAD24_SCHPO/6-241            REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELAPTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA
+BMH1_YEAST/4-240             REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELPPTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG
+1433E_SHEEP/4-239            REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELPPTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG
+1433B_VICFA/7-242            RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQKEESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELPPTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG
+14334_LYCES/6-243            REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV
+14333_LYCES/9-246            REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG
+14336_ARATH/7-240            RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQKEESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMAPTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSD.....
+14332_ENTHI/4-238            REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQKEQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLVPTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE
+14331_ENTHI/4-239            REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQKEQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELAPTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE
+1433T_HUMAN/3-236            KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQKTDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQPTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE
+1433_XENLA/1-227             .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQPTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE
+1433Z_DROME/6-239            KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQPTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE
+14331_CAEEL/5-237            VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQPTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED
+1433F_RAT/3-240              REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQKTMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQPTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE
+1433S_HUMAN/3-238            RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQKSNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMPPTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE
+#=GC seq_cons                RE-hVYhAKLAEQAERY--MlpsMKpVsptss...ELSlEERNLLSVAYKNVlGARRASWRIISSIEQKEEu+G.N-c+lphIKEYRpKlEsELssICsDVLplLDcaLIPsA..ssuESKVFYhKMKGDYYRYLAEFsoG-cRKcss-sShtAYppApDIApuEhsPTHPIRLGLALNFSVFYYEILNSP-+ACpLAKQAFDEAIAELDTLsEESYKDSTLIMQLLRDNLTLWTSDhts-t
+//
+# STOCKHOLM 1.0
+#=GF ID   2-ph_phosp
+#=GF AC   PF04029.4
+#=GF DE   2-phosphosulpholactate phosphatase
+#=GF AU   Kerrison ND, Finn RD
+#=GF SE   COG2045
+#=GF GA   25.00 25.00; 25.00 25.00;
+#=GF TC   43.30 43.30; 28.00 28.00;
+#=GF NC   3.00 3.00; 22.60 22.00;
+#=GF TP   Family
+#=GF BM   hmmbuild -F HMM_ls SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_ls
+#=GF BM   hmmbuild -f -F HMM_fs SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_fs
+#=GF AM   globalfirst
+#=GF RN   [1]
+#=GF RM   21474017
+#=GF RT   Identification of coenzyme M biosynthetic
+#=GF RT   2-phosphosulfolactate phosphatase. A member of a new class
+#=GF RT   of Mg(2+)-dependent acid phosphatases. 
+#=GF RA   Graham DE, Graupner M, Xu H, White RH; 
+#=GF RL   Eur J Biochem 2001;268:5176-5188.
+#=GF DR   INTERPRO; IPR005238;
+#=GF CC   Thought to catalyse 2-phosphosulpholactate = sulpholactate + phosphate.
+#=GF CC   Probable magnesium cofactor.  Involved in the second step of coenzyme M
+#=GF CC   biosynthesis.  Inhibited by vanadate in Methanococcus jannaschii.  Also
+#=GF CC   known as the ComB family [1].
+#=GF SQ   10
+#=GS COMB_CLOAB/6-235   AC Q97E82
+#=GS COMB_THEMA/1-224   AC Q9WZQ4
+#=GS COMB_METTH/2-219   AC O27250
+#=GS COMB_METJA/1-219   AC Q58540
+#=GS COMB_DEIRA/2-232   AC Q9RUI6
+#=GS COMB_SYNY3/2-241   AC P73849
+#=GS COMB_THEVO/1-211   AC Q97CK6
+#=GS COMB_THEAC/1-211   AC Q9HIA9
+#=GS COMB_BACSU/2-223   AC O06738
+#=GS COMB_STRCO/12-227  AC Q9F3E6
+COMB_CLOAB/6-235             IISADDIKE.EKVKN..KTAVVIDMLRATSVITTALNNGCKRVVPVLTVEEALKKVKEY.GKDAILGGERKGLKIEGFDFSNSPMEY.TEDVV......KGKTLIMTTTNGTRAIKGSET.ARDILIGSVLNGEAVAEKIVELN.NDVVIVNAGTYGEFSIDDFICSGYIINCVMDRMKKLELT.DAATTA..QYVYKTNEDIKGFVKYAK.HYKRIMELGLKKDFEYCCKKDIVKLVPQYTN.GEIL..
+COMB_THEMA/1-224             MVDVVMAPC.SPVEC..RTAVVIDVLRATSTIVTALSNGASGVIPVKTIEEALEKKK....EGVLICGERNAQKPKGFNLGNSPLEY.RKEKI......SGKTIVLTTTNGTQVIEKIRS..EEIIAASFLNLSAVVEYLKSKE..DILLVCAGTNGRFSLEDFLLAGAIV..KRLKRNDLG...DGAHAA..ERYFESVENTREEIKKHSSHAKRLISLGFENDIEFCTTEDLFKTVPALVN.GVFILK
+COMB_METTH/2-219             AMRIRLSFE.RPEGS..GLCIMVDLLRASATITAALDR.FREVIPVADIEEAMEYSR....KGYLVAGERGGETLPGF.IANSPLEV.KNYR........GDVLVLTTSNGTRILESVKS...DALVGCLNNLDAVAEAARELS.DEVEVVMAGVNGRFAIEDFLCAGEIIAAIDGEMDEYA...EASVLA..VQDRSLVDDAIRRSRSAER....LGGLGFMDDVEYCIKRNITGNVPVYRD.GRIELM
+COMB_METJA/1-219             MITLCNRFT..EYKC.GNVAIVVDVLRASTTITTLLSF.IDEVYITTST.....SKK....ENAIYIGERKGRKIEGFDFGNSPTEILANKDIIKERYENGEKVILTTTNGTRVLKSLDA..EHIFIGAIVNAKYVAKAVEDFE..DVSLVPCHRENNFAIDDFIGCGVIAKYLNGEFDEF.....IKAAL..ELTKHDWMSLILNSSSAEN....LKNLGYEKDVTFAILENSIDAVGIYKK...DKSK
+COMB_DEIRA/2-232             RLRVDLLPD..SHYP..DVALVIDVLRATTTAVTLLEQGAAELLLTRTTEAALAVRETV..PDVLLAGERGGLTIPGFDLGNSPVEV.SGGAV......AGRRVVMTTTNGTIAAHRAAQTARHVVLAALVNAHAVARHALAVASEEIAIVCAGTDGRVGLDDVYAAGVIA.EYLLALGDFQVD.DGARIA..LTMRRGGGDPGEALRSSG.HGATLARLGLSSDVDYAAQLSTSRLIPTLVP.GDDVPA
+COMB_SYNY3/2-241             EIFVYHTPELTPDQSLPDCAVVIDVLRATTTIATALKVGAEAVQVFSSLDDLMATSESWPGEKRLRAGERGGAAVAGYDLGNSPLDC.TSELM......AGKRLFLSTTNGTRALQRVKDC.PQLVTASLVNRGAAVDYLAQTQPKTVWLLGSGWEGGYSLEDTVCAGAIASLLREKGIDFTVGNDEVVAA..QSLYRQWRSDLLNLFKQASHGQRLLKLDRLEDLRYCATEDLIDILPKQVSPGVLTAA
+COMB_THEVO/1-211             MIRIADGRK.EENWS..SINIIIDIFRSTTTIPVILSRGARYIVPFKDVTSALNFKRKN..KNVVLIGEKYGIKPPFFDYDNSPAQI.INADL......EGKIIAFTSTNGMYVLSRIKR..GRILFSSLVNMSATIKKVKGKD..DILVVPSNRPIGKAVEDNIFAEMLK..LALEGKNY....DREILV..NRIRETKENTVVSISTQ..............DLEICLKLDLLDCVPEYIE.GKIVND
+COMB_THEAC/1-211             MIRIGDGRK.SDSWA..EINVVVDIFRSTTTIPMILFRGAKYILPFRDVRKAIEFKRRN..PGTILVGEKYGIKPPYFDYDNSPAEI.AEADL......SGKVVAFTSTNGTYVLSRIRS..GRIIFSSYVNLSATVAMIRSQR..DVLILPSNRPIGKAPEDILFANLLK..LMAEGHEV....DVSEYT..RKTEEINRNIIAGVGER..............DLEFCLRVDHTNIVPEYID.GRIVQS
+COMB_BACSU/2-223             PIAIYQGHH..HSLAPADINIVIDVIRAFTVAHYAFIGGAKEILLVRTADEAFALKDTY..PDYVLTGEEKGVGISGFDLDNSPKRM.AGQNM......TDKSLIQKTTNGVTAALGALN.AKHLFVTGFSNAKTTAQHVKKLVANDCVINIVASHP.SGDDDMACAEYIK..GIIEGTNV.....VTAAE..AIERIKGSSVAEKFFDCR......QPLFDSEDIVYCTKELTGDFVMKVKQDGEVPTI
+COMB_STRCO/12-227            DTRFVGIPE..VGEA.PAVAVVVDVMRAFTVAAWAFARGAEKIVLAGSLDEALALKERD..PARVAL..KDGPLTPGFDLVNSPGLL.RSADL......AGRTVVQKTTAGTVGALAVRD.ASLVLCAGFVVAEATARVLRARAPEHVTFVVTGEDG.RADEDLACARYIA..RRAAGHDA....DAAGFLGRAAESRAATELVQGVRQGVH..........PDDVALCLELDRFPFAMVAAP..EDSLM
+#=GC seq_cons                hIclspshc.psstu..clAlVIDVLRATTTIssALspGAccllsspol--AlthK+p...cssllsGERtGhplsGFDluNSPhEl.sstcl......sGKsllhTTTNGTpslppl+s..pcllhuulVNtcAsActl+shs.pDVhlVsuGpsGthulEDhlsAGhIt..lttctc-h....Dsushs..pphccsscssltslppst.....lhtLsh.cDl-aChccDhhchVPphhs.Gcls.t
+//
+# STOCKHOLM 1.0
+#=GF ID   3-alpha
+#=GF AC   PF03475.3
+#=GF DE   3-alpha domain
+#=GF AU   Aravind L, Anantharaman V
+#=GF SE   Aravind L, Anantharaman V
+#=GF GA   25.00 25.00; 25.00 25.00;
+#=GF TC   25.40 25.40; 25.00 25.00;
+#=GF NC   24.40 24.40; 24.60 24.60;
+#=GF TP   Domain
+#=GF BM   hmmbuild -F HMM_ls SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_ls
+#=GF BM   hmmbuild -f -F HMM_fs SEED
+#=GF BM   hmmcalibrate --seed 0 HMM_fs
+#=GF AM   globalfirst
+#=GF RC   See figure 2.
+#=GF RN   [1]
+#=GF RM   11886751
+#=GF RT   MOSC domains: ancient, predicted sulfur-carrier domains,
+#=GF RT   present in diverse metal--sulfur cluster biosynthesis
+#=GF RT   proteins including Molybdenum cofactor sulfurases. 
+#=GF RA   Anantharaman V, Aravind L; 
+#=GF RL   FEMS Microbiol Lett 2002;207:55-61.
+#=GF DR   SCOP; 1o67; fa;
+#=GF DR   INTERPRO; IPR005163;
+#=GF DR   PDB; 1o65 A; 178; 224;
+#=GF DR   PDB; 1o65 B; 178; 224;
+#=GF DR   PDB; 1o65 C; 178; 224;
+#=GF DR   PDB; 1o67 A; 178; 224;
+#=GF DR   PDB; 1o67 B; 178; 224;
+#=GF DR   PDB; 1o67 C; 178; 224;
+#=GF CC   This small triple helical domain has been predicted
+#=GF CC   to assume a topology similar to helix-turn-helix
+#=GF CC   domains. These domains are found at the C-terminus
+#=GF CC   of proteins related to Swiss:P32157.
+#=GF SQ   11
+#=GS Y278_HAEIN/174-219    AC P43976
+#=GS Q99RT6_STAAM/171-217  AC Q99RT6
+#=GS Q9PHR4_CAMJE/164-215  AC Q9PHR4
+#=GS Q9RT82_DEIRA/181-226  AC Q9RT82
+#=GS Q9KF70_BACHD/170-214  AC Q9KF70
+#=GS Q9I1P0_PSEAE/174-219  AC Q9I1P0
+#=GS Q9I607_PSEAE/169-215  AC Q9I607
+#=GS O86804_STRCO/189-233  AC O86804
+#=GS P95151_MYCTU/201-244  AC P95151
+#=GS YIIM_ECOLI/168-214    AC P32157
+#=GS O34542_BACSU/173-219  AC O34542
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o65 A; 178; 224;
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o65 B; 178; 224;
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o65 C; 178; 224;
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o67 A; 178; 224;
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o67 B; 178; 224;
+#=GS YIIM_ECOLI/168-214 DR PDB; 1o67 C; 178; 224;
+Y278_HAEIN/174-219           QITIRHLNRLLSTP.....KNEAELDSALEIEV.LAEAFKRSIRSQISKFKQ
+Q99RT6_STAAM/171-217         RLSVQQLNDLYYNDRQ....NQDMLRYALNNPF.LSPTRRDKLQKMYNRTLK
+Q9PHR4_CAMJE/164-215         SLSVFELNQLFYSPHQILKQNPNLLDKLEKLNSLISQNWHETIHKRLKNTYD
+Q9RT82_DEIRA/181-226         APTIGELFDADFAKSH....DPAELRAWLTFP..LGKRQRKEVEKWLAKAEG
+Q9KF70_BACHD/170-214         HPTVLEVNQLYYPKDI....NKEQLRRMSQLPE.LADAWKKAFSKKLANA..
+Q9I1P0_PSEAE/174-219         DWSLLRLSEVLFDRRA....DAELLRQCLPLP..LTPSWRRTLERRLEKGQV
+Q9I607_PSEAE/169-215         ELTVARLLQWYFGDPL....EPLGLRQMMACDA.LSQRWRKTAAKRLSSGVV
+O86804_STRCO/189-233         EVTVALQFRAVTTQ.......RELLPRLLAAGGALHPEALATARKYVAEYGA
+P95151_MYCTU/201-244         NVTVGLVFRARTSE.......SELLPQLLAADA.LAAELKAYARERTPSPPP
+YIIM_ECOLI/168-214           DVTVQEAAAIAWHMPF....DDDQYHRLLSAAG.LSKSWTRTMQKRRLSGKI
+#=GR YIIM_ECOLI/168-214 SS   SCBHHHHHHHHHTSCC....CHHHHHHHHTSTT.CCHHHHHHHHHHHHHSSC
+#=GR YIIM_ECOLI/168-214 SA   6000320010013274....3372052026033.108303630350385563
+O34542_BACSU/173-219         GISVQFANRINYHDAK....NLTAIERILSEAA.LSESWRASFMKKKDRLLP
+#=GC SS_cons                 SCBHHHHHHHHHTSCC....CHHHHHHHHTSTT.CCHHHHHHHHHHHHHSSC
+#=GC SA_cons                 6000320010013274....3372052026033.108303630350385563
+#=GC seq_cons                plTVtclsclhasc......stphLcphLshss.Lupsa+cohpK+lspshs
+//
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/phi.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/phi.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/phi.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,397 @@
+BLASTP 2.0.14 [Jun-29-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CYS1_DICDI
+         (351 letters)
+
+Database: /home/peter/blast/data/swissprot
+           88,780 sequences; 31,984,247 total letters
+
+Searching......................................................................................................................................................
+3 occurrence(s) of pattern in query
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 23 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 120 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 237 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+done
+
+
+                                                                   Score     E
+                                                                   (bits)  Value
+
+Significant matches for pattern occurrence 1 at position 23
+
+
+sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR                  688  0.0
+sp|P30957|RYNC_RABIT RYANODINE RECEPTOR, CARDIAC MUSCLE                 8  4.8
+sp|Q08862|GTC_RABIT GLUTATHIONE S-TRANSFERASE YC (ALPHA II) (GST...     7  6.0
+sp|O95801|TTC4_HUMAN TETRATRICOPEPTIDE REPEAT PROTEIN 4                 7  7.6
+sp|P36114|YKZ8_YEAST HYPOTHETICAL 81.8 KDA PROTEIN IN YPT52-DBP7...     7  9.6
+
+
+Significant matches for pattern occurrence 2 at position 120
+
+
+sp|P11559|MCRA_METVO METHYL-COENZYME M REDUCTASE ALPHA SUBUNIT         13  0.13
+sp|Q49605|MCRA_METKA METHYL-COENZYME M REDUCTASE I ALPHA SUBUNIT...    11  0.43
+sp|P81901|FER_PYRIS FERREDOXIN (SEVEN-IRON FERREDOXIN)                 11  0.55
+sp|Q58256|MCRX_METJA METHYL-COENZYME M REDUCTASE II ALPHA SUBUNI...    10  1.1
+sp|P53203|YG14_YEAST HYPOTHETICAL 52.9 KD PROTEIN IN ERP6-TFG2 I...     8  3.0
+sp|P55002|MGP1_MOUSE MICROFIBRIL-ASSOCIATED GLYCOPROTEIN PRECURS...     7  6.0
+sp|Q06234|ASH1_XENLA ACHAETE-SCUTE HOMOLOG 1                            7  7.6
+sp|P20918|PLMN_MOUSE PLASMINOGEN PRECURSOR [CONTAINS: ANGIOSTATIN]      7  7.6
+
+
+Significant matches for pattern occurrence 3 at position 237
+
+
+sp|P49362|GCSB_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] B, ...     9  1.4
+sp|P49361|GCSA_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] A, ...     9  1.4
+sp|O49852|GCSP_FLATR GLYCINE DEHYDROGENASE [DECARBOXYLATING], MI...     8  4.8
+sp|P32767|PDR6_YEAST PLEIOTROPIC DRUG RESISTANCE REGULATORY PROT...     7  6.0
+sp|O49850|GCSP_FLAAN GLYCINE DEHYDROGENASE [DECARBOXYLATING], MI...     7  9.6
+
+
+Significant alignments for pattern occurrence 1 at position 23
+
+>sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 343
+
+ Score =  688 bits (1789), Expect = 0.0
+ Identities = 343/351 (97%), Positives = 343/351 (97%), Gaps = 8/351 (2%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+pattern 23                        ****
+            MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE
+Sbjct:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+
+Query:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPP 120
+pattern 120                                                            *
+            ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 
+Sbjct:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP- 119
+
+Query:  121 EEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 180
+pattern 121 ***
+               TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE
+Sbjct:  120 ---TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+
+Query:  181 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ 240
+pattern 237                                                         ****
+            CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG    
+Sbjct:  177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG---- 232
+
+Query:  241 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 300
+            AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG
+Sbjct:  233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 292
+
+Query:  301 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+Sbjct:  293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+
+
+>sp|P30957|RYNC_RABIT RYANODINE RECEPTOR, CARDIAC MUSCLE
+          Length = 4969
+
+ Score =  7.8 bits (25), Expect = 4.8
+ Identities = 14/39 (35%), Positives = 19/39 (47%)
+
+Query:  23   PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+pattern 23   ****
+             PEEQ +F E + K  +K   EE     E  +   G+ EE
+Sbjct:  4414 PEEQEKFQEQKTKEEEKEEKEETKSEPEKAEGEDGEKEE 4452
+
+
+>sp|Q08862|GTC_RABIT GLUTATHIONE S-TRANSFERASE YC (ALPHA II) (GST CLASS-ALPHA)
+          Length = 221
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 19/67 (28%), Positives = 35/67 (51%), Gaps = 12/67 (17%)
+
+Query:  21  IPPEEQ-SQFLEFQDKFNKKY---------SH-EEYLERFEIFKSNLGKIEEL-NLIAIN 68
+pattern 23    ****
+            +PPEEQ ++  + +DK   +Y         SH ++YL   ++ K+++  +E L N+  +N
+Sbjct:  112 LPPEEQEAKLAQIKDKAKNRYFPAFEKVLKSHGQDYLVGNKLSKADILLVELLYNVEELN 171
+
+Query:  69  HKADTKF 75
+              A   F
+Sbjct:  172 PGATASF 178
+
+
+>sp|O95801|TTC4_HUMAN TETRATRICOPEPTIDE REPEAT PROTEIN 4
+          Length = 356
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 14/67 (20%), Positives = 32/67 (46%), Gaps = 5/67 (7%)
+
+Query:  23  PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGK---IEELNLIAINHKADTKFGVNK 79
+pattern 23  ****
+            PEEQ++   ++D+ N  +  ++Y +    +   L K     +LN +   ++A  ++ +  
+Sbjct:  75  PEEQAK--TYKDEGNDYFKEKDYKKAVISYTEGLKKKCADPDLNAVLYTNRAAAQYYLGN 132
+
+Query:  80  FADLSSD 86
+            F    +D
+Sbjct:  133 FRSALND 139
+
+
+>sp|P36114|YKZ8_YEAST HYPOTHETICAL 81.8 KDA PROTEIN IN YPT52-DBP7 INTERGENIC REGION
+          Length = 725
+
+ Score =  6.8 bits (22), Expect = 9.6
+ Identities = 21/99 (21%), Positives = 43/99 (43%), Gaps = 21/99 (21%)
+
+Query:  21  IPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN 78
+pattern 23    ****
+            + PEEQ     L+F ++      H    ER  +  +++G    +N      +   + G+ 
+Sbjct:  213 LTPEEQKDKDLLQFAEQI-----HSMRTER--LSGAHIGNSPAIN------RLRGELGLQ 259
+
+Query:  79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+               DL  +E  ++       + +DD+ ++    DEF++S
+Sbjct:  260 AMEDLPEEEITDH------KVLSDDIDLSQATIDEFVHS 292
+
+
+
+Significant alignments for pattern occurrence 2 at position 120
+
+>sp|P11559|MCRA_METVO METHYL-COENZYME M REDUCTASE ALPHA SUBUNIT
+          Length = 555
+
+ Score = 13.0 bits (40), Expect = 0.13
+ Identities = 16/28 (57%), Positives = 18/28 (64%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEF---INSIPPEEQ 123
+pattern 120                         ****
+            IFT D  +AD LDD F   IN + PEEQ
+Sbjct:  170 IFTGDDELADELDDRFVIDINKLFPEEQ 197
+
+
+>sp|Q49605|MCRA_METKA METHYL-COENZYME M REDUCTASE I ALPHA SUBUNIT (MCR I ALPHA)
+          Length = 553
+
+ Score = 11.2 bits (35), Expect = 0.43
+ Identities = 14/28 (50%), Positives = 18/28 (64%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEFINSIP---PEEQ 123
+pattern 120                         ****
+            I T DL +AD +DD+F+  I    PEEQ
+Sbjct:  168 IITGDLELADEIDDKFLIDIEKLFPEEQ 195
+
+
+>sp|P81901|FER_PYRIS FERREDOXIN (SEVEN-IRON FERREDOXIN)
+          Length = 101
+
+ Score = 10.9 bits (34), Expect = 0.55
+ Identities = 12/23 (52%), Positives = 16/23 (69%), Gaps = 1/23 (4%)
+
+Query:  114 FINSIPPEEQTAF-DWRTRGAVT 135
+pattern 120       ****
+            F  S+ PEEQ AF +W+TR  +T
+Sbjct:  78  FGKSLTPEEQRAFEEWKTRYGIT 100
+
+
+>sp|Q58256|MCRX_METJA METHYL-COENZYME M REDUCTASE II ALPHA SUBUNIT (MCR II ALPHA)
+          Length = 553
+
+ Score =  9.8 bits (31), Expect = 1.1
+ Identities = 14/28 (50%), Positives = 17/28 (60%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEF---INSIPPEEQ 123
+pattern 120                         ****
+            IFT D  +AD +D  F   IN + PEEQ
+Sbjct:  168 IFTGDDELADEIDKRFLIDINKLFPEEQ 195
+
+
+>sp|P53203|YG14_YEAST HYPOTHETICAL 52.9 KD PROTEIN IN ERP6-TFG2 INTERGENIC REGION
+          Length = 462
+
+ Score =  8.5 bits (27), Expect = 3.0
+ Identities = 13/39 (33%), Positives = 21/39 (53%), Gaps = 9/39 (23%)
+
+Query:  112 DEFINSIP-------PEEQT--AFDWRTRGAVTPVKNQG 141
+pattern 120                ****
+            DEF+N+ P       PEEQ+  A++W  +  +  + N G
+Sbjct:  308 DEFLNTSPSPEVFTLPEEQSGMAWEWHDKDWMLDLTNDG 346
+
+
+>sp|P55002|MGP1_MOUSE MICROFIBRIL-ASSOCIATED GLYCOPROTEIN PRECURSOR (MAGP) (MAGP-1)
+          Length = 183
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 11/37 (29%), Positives = 18/37 (47%)
+
+Query:  100 FTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTP 136
+pattern 120                     ****
+            + D +  ADY D + ++   PEEQ     + +  V P
+Sbjct:  37  YGDQIDNADYYDYQEVSPRTPEEQFQSQQQVQQEVIP 73
+
+
+>sp|Q06234|ASH1_XENLA ACHAETE-SCUTE HOMOLOG 1
+          Length = 199
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 11/27 (40%), Positives = 15/27 (54%), Gaps = 1/27 (3%)
+
+Query:  105 PVADYLDDE-FINSIPPEEQTAFDWRT 130
+pattern 120                 ****
+            PV+ Y  DE   + + PEEQ   D+ T
+Sbjct:  171 PVSSYSSDEGSYDPLSPEEQELLDFTT 197
+
+
+>sp|P20918|PLMN_MOUSE PLASMINOGEN PRECURSOR [CONTAINS: ANGIOSTATIN]
+          Length = 812
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 8/13 (61%), Positives = 11/13 (84%)
+
+Query:  112 DEFINSIPPEEQT 124
+pattern 120         ****
+            D+  +S+PPEEQT
+Sbjct:  359 DQSDSSVPPEEQT 371
+
+
+
+Significant alignments for pattern occurrence 3 at position 237
+
+>sp|P49362|GCSB_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] B, MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE B) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN B)
+          Length = 1034
+
+ Score =  9.5 bits (30), Expect = 1.4
+ Identities = 21/79 (26%), Positives = 39/79 (48%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F   P  +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFPNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+>sp|P49361|GCSA_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] A, MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE A) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN A)
+          Length = 1037
+
+ Score =  9.5 bits (30), Expect = 1.4
+ Identities = 21/79 (26%), Positives = 39/79 (48%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F   P  +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  83  NSAT--PEEQTKMAEFVGFPNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 136
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  137 MQD-------LASKNKIFK 148
+
+
+>sp|O49852|GCSP_FLATR GLYCINE DEHYDROGENASE [DECARBOXYLATING], MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN)
+          Length = 1034
+
+ Score =  7.8 bits (25), Expect = 4.8
+ Identities = 21/79 (26%), Positives = 38/79 (47%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F      +++    I +T P AI  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFSNLDSL----IDATVPKAIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+>sp|P32767|PDR6_YEAST PLEIOTROPIC DRUG RESISTANCE REGULATORY PROTEIN 6
+          Length = 1081
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 25/93 (26%), Positives = 37/93 (38%), Gaps = 17/93 (18%)
+
+Query:  159 HFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI-IKNGGIQTESS 217
+            +F S+N+   +S   L     E M  +      E C   L P   ++I   N  I  +S+
+Sbjct:  642 NFTSKNEQEKISNDKL-----EVMVIKTVSTLCETCREELTPYLMHFISFLNTVIMPDSN 696
+
+Query:  218 YPYTAETG--------TQCNFNSANIGPEEQAK 242
+pattern 237                            ****
+              +   T          QC  ++   GPEEQAK
+Sbjct:  697 VSHFTRTKLVRSIGYVVQCQVSN---GPEEQAK 726
+
+
+>sp|O49850|GCSP_FLAAN GLYCINE DEHYDROGENASE [DECARBOXYLATING], MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN)
+          Length = 1034
+
+ Score =  6.8 bits (22), Expect = 9.6
+ Identities = 20/79 (25%), Positives = 38/79 (47%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F      +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFSNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+  Database: /home/peter/blast/data/swissprot
+    Posted date:  Oct 10, 2000 10:43 AM
+  Number of letters in database: 31,984,247
+  Number of sequences in database:  88,780
+  
+Lambda     K      H      C
+   0.270   0.0470    0.230    0.500 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 1047
+Number of Sequences: 88780
+Number of extensions: 1047
+Number of successful extensions: 36
+Number of sequences better than 10.0: 36
+Number of HSP's better than 10.0 without gapping: 0
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 0
+length of query: 351
+length of database: 31,984,247
+effective HSP length: 50
+effective length of query: 301
+effective length of database: 27,545,247
+effective search space: 8291119347
+effective search space used: 8291119347
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.6 bits)
+S2: 65 (29.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/phipsi.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/phipsi.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/phipsi.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3992 @@
+BLASTP 2.0.14 [Jun-29-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CYS1_DICDI
+         (351 letters)
+
+Database: /home/peter/blast/data/swissprot
+           88,780 sequences; 31,984,247 total letters
+
+Searching......................................................................................................................................................
+3 occurrence(s) of pattern in query
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 23 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 120 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+  CYS1_DICDI; PATTERN.
+ pattern P-E-E-Q at position 237 of query sequence
+effective database length=3.2e+07
+ pattern probability=8.9e-06
+lengthXprobability=2.8e+02
+
+Number of occurrences of pattern in the database is 349
+done
+
+
+Results from round 1
+
+                                                                   Score     E
+                                                                   (bits)  Value
+
+Significant matches for pattern occurrence 1 at position 23
+
+
+sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR                  688  0.0
+sp|P30957|RYNC_RABIT RYANODINE RECEPTOR, CARDIAC MUSCLE                 8  4.8
+sp|Q08862|GTC_RABIT GLUTATHIONE S-TRANSFERASE YC (ALPHA II) (GST...     7  6.0
+sp|O95801|TTC4_HUMAN TETRATRICOPEPTIDE REPEAT PROTEIN 4                 7  7.6
+sp|P36114|YKZ8_YEAST HYPOTHETICAL 81.8 KDA PROTEIN IN YPT52-DBP7...     7  9.6
+
+
+Significant matches for pattern occurrence 2 at position 120
+
+
+sp|P11559|MCRA_METVO METHYL-COENZYME M REDUCTASE ALPHA SUBUNIT         13  0.13
+sp|Q49605|MCRA_METKA METHYL-COENZYME M REDUCTASE I ALPHA SUBUNIT...    11  0.43
+sp|P81901|FER_PYRIS FERREDOXIN (SEVEN-IRON FERREDOXIN)                 11  0.55
+sp|Q58256|MCRX_METJA METHYL-COENZYME M REDUCTASE II ALPHA SUBUNI...    10  1.1
+sp|P53203|YG14_YEAST HYPOTHETICAL 52.9 KD PROTEIN IN ERP6-TFG2 I...     8  3.0
+sp|P55002|MGP1_MOUSE MICROFIBRIL-ASSOCIATED GLYCOPROTEIN PRECURS...     7  6.0
+sp|Q06234|ASH1_XENLA ACHAETE-SCUTE HOMOLOG 1                            7  7.6
+sp|P20918|PLMN_MOUSE PLASMINOGEN PRECURSOR [CONTAINS: ANGIOSTATIN]      7  7.6
+
+
+Significant matches for pattern occurrence 3 at position 237
+
+
+sp|P49362|GCSB_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] B, ...     9  1.4
+sp|P49361|GCSA_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] A, ...     9  1.4
+sp|O49852|GCSP_FLATR GLYCINE DEHYDROGENASE [DECARBOXYLATING], MI...     8  4.8
+sp|P32767|PDR6_YEAST PLEIOTROPIC DRUG RESISTANCE REGULATORY PROT...     7  6.0
+sp|O49850|GCSP_FLAAN GLYCINE DEHYDROGENASE [DECARBOXYLATING], MI...     7  9.6
+
+
+Significant alignments for pattern occurrence 1 at position 23
+
+>sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 343
+
+ Score =  688 bits (1789), Expect = 0.0
+ Identities = 343/351 (97%), Positives = 343/351 (97%), Gaps = 8/351 (2%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+pattern 23                        ****
+            MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE
+Sbjct:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+
+Query:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPP 120
+pattern 120                                                            *
+            ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 
+Sbjct:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP- 119
+
+Query:  121 EEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 180
+pattern 121 ***
+               TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE
+Sbjct:  120 ---TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+
+Query:  181 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ 240
+pattern 237                                                         ****
+            CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG    
+Sbjct:  177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG---- 232
+
+Query:  241 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 300
+            AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG
+Sbjct:  233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 292
+
+Query:  301 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+Sbjct:  293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+
+
+>sp|P30957|RYNC_RABIT RYANODINE RECEPTOR, CARDIAC MUSCLE
+          Length = 4969
+
+ Score =  7.8 bits (25), Expect = 4.8
+ Identities = 14/39 (35%), Positives = 19/39 (47%)
+
+Query:  23   PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+pattern 23   ****
+             PEEQ +F E + K  +K   EE     E  +   G+ EE
+Sbjct:  4414 PEEQEKFQEQKTKEEEKEEKEETKSEPEKAEGEDGEKEE 4452
+
+
+>sp|Q08862|GTC_RABIT GLUTATHIONE S-TRANSFERASE YC (ALPHA II) (GST CLASS-ALPHA)
+          Length = 221
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 19/67 (28%), Positives = 35/67 (51%), Gaps = 12/67 (17%)
+
+Query:  21  IPPEEQ-SQFLEFQDKFNKKY---------SH-EEYLERFEIFKSNLGKIEEL-NLIAIN 68
+pattern 23    ****
+            +PPEEQ ++  + +DK   +Y         SH ++YL   ++ K+++  +E L N+  +N
+Sbjct:  112 LPPEEQEAKLAQIKDKAKNRYFPAFEKVLKSHGQDYLVGNKLSKADILLVELLYNVEELN 171
+
+Query:  69  HKADTKF 75
+              A   F
+Sbjct:  172 PGATASF 178
+
+
+>sp|O95801|TTC4_HUMAN TETRATRICOPEPTIDE REPEAT PROTEIN 4
+          Length = 356
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 14/67 (20%), Positives = 32/67 (46%), Gaps = 5/67 (7%)
+
+Query:  23  PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGK---IEELNLIAINHKADTKFGVNK 79
+pattern 23  ****
+            PEEQ++   ++D+ N  +  ++Y +    +   L K     +LN +   ++A  ++ +  
+Sbjct:  75  PEEQAK--TYKDEGNDYFKEKDYKKAVISYTEGLKKKCADPDLNAVLYTNRAAAQYYLGN 132
+
+Query:  80  FADLSSD 86
+            F    +D
+Sbjct:  133 FRSALND 139
+
+
+>sp|P36114|YKZ8_YEAST HYPOTHETICAL 81.8 KDA PROTEIN IN YPT52-DBP7 INTERGENIC REGION
+          Length = 725
+
+ Score =  6.8 bits (22), Expect = 9.6
+ Identities = 21/99 (21%), Positives = 43/99 (43%), Gaps = 21/99 (21%)
+
+Query:  21  IPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN 78
+pattern 23    ****
+            + PEEQ     L+F ++      H    ER  +  +++G    +N      +   + G+ 
+Sbjct:  213 LTPEEQKDKDLLQFAEQI-----HSMRTER--LSGAHIGNSPAIN------RLRGELGLQ 259
+
+Query:  79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+               DL  +E  ++       + +DD+ ++    DEF++S
+Sbjct:  260 AMEDLPEEEITDH------KVLSDDIDLSQATIDEFVHS 292
+
+
+
+Significant alignments for pattern occurrence 2 at position 120
+
+>sp|P11559|MCRA_METVO METHYL-COENZYME M REDUCTASE ALPHA SUBUNIT
+          Length = 555
+
+ Score = 13.0 bits (40), Expect = 0.13
+ Identities = 16/28 (57%), Positives = 18/28 (64%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEF---INSIPPEEQ 123
+pattern 120                         ****
+            IFT D  +AD LDD F   IN + PEEQ
+Sbjct:  170 IFTGDDELADELDDRFVIDINKLFPEEQ 197
+
+
+>sp|Q49605|MCRA_METKA METHYL-COENZYME M REDUCTASE I ALPHA SUBUNIT (MCR I ALPHA)
+          Length = 553
+
+ Score = 11.2 bits (35), Expect = 0.43
+ Identities = 14/28 (50%), Positives = 18/28 (64%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEFINSIP---PEEQ 123
+pattern 120                         ****
+            I T DL +AD +DD+F+  I    PEEQ
+Sbjct:  168 IITGDLELADEIDDKFLIDIEKLFPEEQ 195
+
+
+>sp|P81901|FER_PYRIS FERREDOXIN (SEVEN-IRON FERREDOXIN)
+          Length = 101
+
+ Score = 10.9 bits (34), Expect = 0.55
+ Identities = 12/23 (52%), Positives = 16/23 (69%), Gaps = 1/23 (4%)
+
+Query:  114 FINSIPPEEQTAF-DWRTRGAVT 135
+pattern 120       ****
+            F  S+ PEEQ AF +W+TR  +T
+Sbjct:  78  FGKSLTPEEQRAFEEWKTRYGIT 100
+
+
+>sp|Q58256|MCRX_METJA METHYL-COENZYME M REDUCTASE II ALPHA SUBUNIT (MCR II ALPHA)
+          Length = 553
+
+ Score =  9.8 bits (31), Expect = 1.1
+ Identities = 14/28 (50%), Positives = 17/28 (60%), Gaps = 3/28 (10%)
+
+Query:  99  IFTDDLPVADYLDDEF---INSIPPEEQ 123
+pattern 120                         ****
+            IFT D  +AD +D  F   IN + PEEQ
+Sbjct:  168 IFTGDDELADEIDKRFLIDINKLFPEEQ 195
+
+
+>sp|P53203|YG14_YEAST HYPOTHETICAL 52.9 KD PROTEIN IN ERP6-TFG2 INTERGENIC REGION
+          Length = 462
+
+ Score =  8.5 bits (27), Expect = 3.0
+ Identities = 13/39 (33%), Positives = 21/39 (53%), Gaps = 9/39 (23%)
+
+Query:  112 DEFINSIP-------PEEQT--AFDWRTRGAVTPVKNQG 141
+pattern 120                ****
+            DEF+N+ P       PEEQ+  A++W  +  +  + N G
+Sbjct:  308 DEFLNTSPSPEVFTLPEEQSGMAWEWHDKDWMLDLTNDG 346
+
+
+>sp|P55002|MGP1_MOUSE MICROFIBRIL-ASSOCIATED GLYCOPROTEIN PRECURSOR (MAGP) (MAGP-1)
+          Length = 183
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 11/37 (29%), Positives = 18/37 (47%)
+
+Query:  100 FTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTP 136
+pattern 120                     ****
+            + D +  ADY D + ++   PEEQ     + +  V P
+Sbjct:  37  YGDQIDNADYYDYQEVSPRTPEEQFQSQQQVQQEVIP 73
+
+
+>sp|Q06234|ASH1_XENLA ACHAETE-SCUTE HOMOLOG 1
+          Length = 199
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 11/27 (40%), Positives = 15/27 (54%), Gaps = 1/27 (3%)
+
+Query:  105 PVADYLDDE-FINSIPPEEQTAFDWRT 130
+pattern 120                 ****
+            PV+ Y  DE   + + PEEQ   D+ T
+Sbjct:  171 PVSSYSSDEGSYDPLSPEEQELLDFTT 197
+
+
+>sp|P20918|PLMN_MOUSE PLASMINOGEN PRECURSOR [CONTAINS: ANGIOSTATIN]
+          Length = 812
+
+ Score =  7.1 bits (23), Expect = 7.6
+ Identities = 8/13 (61%), Positives = 11/13 (84%)
+
+Query:  112 DEFINSIPPEEQT 124
+pattern 120         ****
+            D+  +S+PPEEQT
+Sbjct:  359 DQSDSSVPPEEQT 371
+
+
+
+Significant alignments for pattern occurrence 3 at position 237
+
+>sp|P49362|GCSB_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] B, MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE B) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN B)
+          Length = 1034
+
+ Score =  9.5 bits (30), Expect = 1.4
+ Identities = 21/79 (26%), Positives = 39/79 (48%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F   P  +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFPNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+>sp|P49361|GCSA_FLAPR GLYCINE DEHYDROGENASE [DECARBOXYLATING] A, MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE A) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN A)
+          Length = 1037
+
+ Score =  9.5 bits (30), Expect = 1.4
+ Identities = 21/79 (26%), Positives = 39/79 (48%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F   P  +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  83  NSAT--PEEQTKMAEFVGFPNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 136
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  137 MQD-------LASKNKIFK 148
+
+
+>sp|O49852|GCSP_FLATR GLYCINE DEHYDROGENASE [DECARBOXYLATING], MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN)
+          Length = 1034
+
+ Score =  7.8 bits (25), Expect = 4.8
+ Identities = 21/79 (26%), Positives = 38/79 (47%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F      +++    I +T P AI  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFSNLDSL----IDATVPKAIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+>sp|P32767|PDR6_YEAST PLEIOTROPIC DRUG RESISTANCE REGULATORY PROTEIN 6
+          Length = 1081
+
+ Score =  7.4 bits (24), Expect = 6.0
+ Identities = 25/93 (26%), Positives = 37/93 (38%), Gaps = 17/93 (18%)
+
+Query:  159 HFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI-IKNGGIQTESS 217
+            +F S+N+   +S   L     E M  +      E C   L P   ++I   N  I  +S+
+Sbjct:  642 NFTSKNEQEKISNDKL-----EVMVIKTVSTLCETCREELTPYLMHFISFLNTVIMPDSN 696
+
+Query:  218 YPYTAETG--------TQCNFNSANIGPEEQAK 242
+pattern 237                            ****
+              +   T          QC  ++   GPEEQAK
+Sbjct:  697 VSHFTRTKLVRSIGYVVQCQVSN---GPEEQAK 726
+
+
+>sp|O49850|GCSP_FLAAN GLYCINE DEHYDROGENASE [DECARBOXYLATING], MITOCHONDRIAL PRECURSOR
+            (GLYCINE DECARBOXYLASE) (GLYCINE CLEAVAGE SYSTEM
+            P-PROTEIN)
+          Length = 1034
+
+ Score =  6.8 bits (22), Expect = 9.6
+ Identities = 20/79 (25%), Positives = 38/79 (47%), Gaps = 13/79 (16%)
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            NSA   PEEQ K++ F      +++    I +T P +I  D++++  +  G+ +     +
+Sbjct:  80  NSAT--PEEQTKMAEFVGFSNLDSL----IDATVPKSIRLDSMKYSKFDEGLTESQMIAH 133
+
+Query:  291 SLDHGILIVGYSAKNTIFR 309
+              D        ++KN IF+
+Sbjct:  134 MQD-------LASKNKIFK 145
+
+
+Searching..................................................done
+
+
+Results from round 2
+
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+Sequences used in model and found again:
+
+Sequences not found previously or not previously below threshold:
+
+sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR                  709  0.0
+sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR      273  4e-73
+sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RES...   270  2e-72
+sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR              266  6e-71
+sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR                  252  6e-67
+sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK C...   250  2e-66
+sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR                  238  1e-62
+sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR                    236  4e-62
+sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)                    233  3e-61
+sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE...   233  3e-61
+sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR                  231  1e-60
+sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR        221  1e-57
+sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEIN...   221  2e-57
+sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)                         216  5e-56
+sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR        215  1e-55
+sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)                         214  2e-55
+sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR        214  2e-55
+sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN...   212  7e-55
+sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   212  1e-54
+sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   209  8e-54
+sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)                         209  8e-54
+sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR                            208  1e-53
+sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR                  207  2e-53
+sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE...   207  3e-53
+sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR                  206  4e-53
+sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   206  4e-53
+sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR                              206  5e-53
+sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)                 204  3e-52
+sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   203  6e-52
+sp|Q10991|CATL_SHEEP CATHEPSIN L                                      201  1e-51
+sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR                  201  2e-51
+sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR                  200  3e-51
+sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)             199  7e-51
+sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)                         196  5e-50
+sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR                     196  5e-50
+sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR                    194  2e-49
+sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR              193  4e-49
+sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR                  193  5e-49
+sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II...   192  1e-48
+sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPS...   192  1e-48
+sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR              190  5e-48
+sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR                            188  2e-47
+sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA...   187  2e-47
+sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR                    187  2e-47
+sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)               187  4e-47
+sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR                              186  5e-47
+sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR                185  9e-47
+sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEP...   185  1e-46
+sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPA...   184  3e-46
+sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR             183  3e-46
+sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR             183  5e-46
+sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)             183  6e-46
+sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR             182  8e-46
+sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHE...   180  5e-45
+sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR                            178  2e-44
+sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)               177  3e-44
+sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)               176  6e-44
+sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR                            173  4e-43
+sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)     173  7e-43
+sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR                            171  3e-42
+sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L                         167  2e-41
+sp|P25326|CATS_BOVIN CATHEPSIN S                                      165  1e-40
+sp|P80884|ANAN_ANACO ANANAIN                                          161  2e-39
+sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR                              158  1e-38
+sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE...   158  2e-38
+sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR               152  1e-36
+sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR                  150  4e-36
+sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR                       150  6e-36
+sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR                    150  6e-36
+sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE P...   149  9e-36
+sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR                  149  9e-36
+sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR               145  1e-34
+sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR                    145  1e-34
+sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR                    143  5e-34
+sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR ...   141  3e-33
+sp|P14518|BROM_ANACO BROMELAIN, STEM                                  139  6e-33
+sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR...   138  1e-32
+sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR                    129  1e-29
+sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR...   121  3e-27
+sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPP...   111  3e-24
+sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   109  9e-24
+sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN...   108  2e-23
+sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR                            108  3e-23
+sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   107  3e-23
+sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)          100  7e-21
+sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)    95  2e-19
+sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PREC...    91  4e-18
+sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PREC...    90  5e-18
+sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)               90  5e-18
+sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR                             89  2e-17
+sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)        87  4e-17
+sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR        87  5e-17
+sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP S...    86  9e-17
+sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...    85  2e-16
+sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)              85  2e-16
+sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PREC...    85  2e-16
+sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...    85  3e-16
+sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)              85  3e-16
+sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...    80  9e-15
+sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PREC...    78  2e-14
+sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...    78  4e-14
+sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PREC...    73  7e-13
+sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P1...    70  6e-12
+sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)                   61  4e-09
+sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)                     60  9e-09
+sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3            59  1e-08
+sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)                       58  3e-08
+sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)                     56  1e-07
+sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L                          52  2e-06
+sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR                    42  0.002
+sp|P05689|CATX_BOVIN CATHEPSIN                                         40  0.006
+sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR                     39  0.019
+sp|P23897|HSER_RAT HEAT-STABLE ENTEROTOXIN RECEPTOR PRECURSOR (G...    36  0.16
+sp|P20736|BM86_BOOMI GLYCOPROTEIN ANTIGEN BM86 PRECURSOR (PROTEC...    35  0.22
+sp|P46992|YJR1_YEAST HYPOTHETICAL 43.0 KD PROTEIN IN CPS1-FPP1 I...    32  1.9
+sp|P28493|PR5_ARATH PATHOGENESIS-RELATED PROTEIN 5 PRECURSOR (PR-5)    32  1.9
+sp|P54634|POLN_LORDV NON-STRUCTURAL POLYPROTEIN [CONTAINS: RNA-D...    31  3.2
+sp|Q02521|SPP2_YEAST SPLICEOSOME MATURATION PROTEIN SPP2               31  4.2
+sp|P41901|SPR3_YEAST SPORULATION-SPECIFIC SEPTIN                       31  4.2
+sp|Q01532|BLH1_YEAST CYSTEINE PROTEINASE 1 (Y3) (BLEOMYCIN HYDRO...    30  5.5
+sp|P24896|NU5M_CAEEL NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 5            30  5.5
+sp|P25648|SRB8_YEAST SUPPRESSOR OF RNA POLYMERASE B SRB8               30  7.2
+sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C                                  30  7.2
+sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)         30  9.4
+sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (...    30  9.4
+sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)           30  9.4
+
+>sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 343
+
+ Score =  709 bits (1811), Expect = 0.0
+ Identities = 343/351 (97%), Positives = 343/351 (97%), Gaps = 8/351 (2%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+            MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE
+Sbjct:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+
+Query:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPP 120
+            ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 
+Sbjct:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP- 119
+
+Query:  121 EEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 180
+               TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE
+Sbjct:  120 ---TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+
+Query:  181 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ 240
+pattern 237                                                         ****
+            CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG    
+Sbjct:  177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG---- 232
+
+Query:  241 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 300
+            AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG
+Sbjct:  233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVG 292
+
+Query:  301 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+Sbjct:  293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+
+
+>sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR
+          Length = 313
+
+ Score =  273 bits (691), Expect = 4e-73
+ Identities = 149/324 (45%), Positives = 194/324 (58%), Gaps = 26/324 (8%)
+
+Query:  32  FQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLSSDE 87
+            F+ KF K Y S EE+  RF +FK+NL       L A+ H+      + GV +F+DL+  E
+Sbjct:  3   FKKKFGKVYGSIEEHYYRFSVFKANL-------LRAMRHQKMDPSARHGVTQFSDLTRSE 55
+
+Query:  88  FKNYYLNNKEAI-FTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSC 146
+            F+  +L  K       D   A  L  + +    PEE   FDWR RGAVTPVKNQG CGSC
+Sbjct:  56  FRRKHLGVKGGFKLPKDANQAPILPTQNL----PEE---FDWRDRGAVTPVKNQGSCGSC 108
+
+Query:  147 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 206
+            WSFSTTG +EG HF++  KLVSLSEQ LVDCDHEC + E E +CD GCNGGL  +A+ Y 
+Sbjct:  109 WSFSTTGALEGAHFLATGKLVSLSEQQLVDCDHEC-DPEEEGSCDSGCNGGLMNSAFEYT 167
+
+Query:  207 IKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+            +K GG+  E  YPYT   G  C  + + I     A +SNF+++  NE  +A  ++  GPL
+Sbjct:  168 LKTGGLMREKDYPYTGTDGGSCKLDRSKI----VASVSNFSVVSINEDQIAANLIKNGPL 223
+
+Query:  267 AIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAK--NTIFRKNMPYWIVKNSWGAD 324
+            A+A +A   Q YIGGV         L+HG+L+VGY +   +    K  PYWI+KNSWG  
+Sbjct:  224 AVAINAAYMQTYIGGVSCPYICSRRLNHGVLLVGYGSAGFSQARLKEKPYWIIKNSWGES 283
+
+Query:  325 WGEQGYIYLRRGKNTCGVSNFVST 348
+            WGE G+  + +G+N CGV + VST
+Sbjct:  284 WGENGFYKICKGRNICGVDSLVST 307
+
+
+>sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RESPONSIVE PROTEIN 15A)
+          Length = 363
+
+ Score =  270 bits (684), Expect = 2e-72
+ Identities = 144/327 (44%), Positives = 201/327 (61%), Gaps = 20/327 (6%)
+
+Query:  26  QSQFLEFQDKFNKKYS-HEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            +  F  F+ KF+K Y+  EE+  RF +FKSNL K +    +  N     + G+ KF+DL+
+Sbjct:  45  EHHFTSFKSKFSKSYATKEEHDYRFGVFKSNLIKAK----LHQNRDPTAEHGITKFSDLT 100
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCG 144
+            + EF+  +L  K+ +    LP           +  PE+   FDWR +GAVTPVK+QG CG
+Sbjct:  101 ASEFRRQFLGLKKRL---RLPAHAQKAPILPTTNLPED---FDWREKGAVTPVKDQGSCG 154
+
+Query:  145 SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 204
+            SCW+FSTTG +EG H+++  KLVSLSEQ LVDCDH C + E   +CD GCNGGL  NA+ 
+Sbjct:  155 SCWAFSTTGALEGAHYLATGKLVSLSEQQLVDCDHVC-DPEQAGSCDSGCNGGLMNNAFE 213
+
+Query:  205 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTG 264
+pattern 237                                 ****
+            Y++++GG+  E  Y YT   G+ C F+ + +     A +SNF+++  +E  +A  +V  G
+Sbjct:  214 YLLESGGVVQEKDYAYTGRDGS-CKFDKSKV----VASVSNFSVVTLDEDQIAANLVKNG 268
+
+Query:  265 PLAIAADAVEWQFYIGGV-FDIPCNPNSLDHGILIVGY--SAKNTIFRKNMPYWIVKNSW 321
+            PLA+A +A   Q Y+ GV     C  + LDHG+L+VG+   A   I  K  PYWI+KNSW
+Sbjct:  269 PLAVAINAAWMQTYMSGVSCPYVCAKSRLDHGVLLVGFGKGAYAPIRLKEKPYWIIKNSW 328
+
+Query:  322 GADWGEQGYIYLRRGKNTCGVSNFVST 348
+            G +WGEQGY  + RG+N CGV + VST
+Sbjct:  329 GQNWGEQGYYKICRGRNVCGVDSMVST 355
+
+
+>sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR
+          Length = 368
+
+ Score =  266 bits (672), Expect = 6e-71
+ Identities = 156/367 (42%), Positives = 206/367 (55%), Gaps = 42/367 (11%)
+
+Query:  6   LFVLAVFTVFVSSR---------------GIPPE---EQSQFLEFQDKFNKKY-SHEEYL 46
+            +FVL+ F V VSS                G  P+    +  F  F+ KF K Y S+EE+ 
+Sbjct:  10  VFVLSFFIVSVSSSDVNDGDDLVIRQVVGGAEPQVLTSEDHFSLFKRKFGKVYASNEEHD 69
+
+Query:  47  ERFEIFKSNLGKIEELNLIAINHKADTK--FGVNKFADLSSDEFKNYYLNNKEAI-FTDD 103
+             RF +FK+NL +         + K D     GV +F+DL+  EF+  +L  +       D
+Sbjct:  70  YRFSVFKANLRRARR------HQKLDPSATHGVTQFSDLTRSEFRKKHLGVRSGFKLPKD 123
+
+Query:  104 LPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 163
+               A  L  E +    PE+   FDWR  GAVTPVKNQG CGSCWSFS TG +EG +F++ 
+Sbjct:  124 ANKAPILPTENL----PED---FDWRDHGAVTPVKNQGSCGSCWSFSATGALEGANFLAT 176
+
+Query:  164 NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAE 223
+             KLVSLSEQ LVDCDHEC + E  ++CD GCNGGL  +A+ Y +K GG+  E  YPYT +
+Sbjct:  177 GKLVSLSEQQLVDCDHEC-DPEEADSCDSGCNGGLMNSAFEYTLKTGGLMKEEDYPYTGK 235
+
+Query:  224 TGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVF 283
+pattern 237              ****
+             G  C  + + I     A +SNF++I  +E  +A  +V  GPLA+A +A   Q YIGGV 
+Sbjct:  236 DGKTCKLDKSKI----VASVSNFSVISIDEEQIAANLVKNGPLAVAINAGYMQTYIGGVS 291
+
+Query:  284 DIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 341
+                    L+HG+L+VGY A        K  PYWI+KNSWG  WGE G+  + +G+N CG
+Sbjct:  292 CPYICTRRLNHGVLLVGYGAAGYAPARFKEKPYWIIKNSWGETWGENGFYKICKGRNICG 351
+
+Query:  342 VSNFVST 348
+            V + VST
+Sbjct:  352 VDSMVST 358
+
+
+>sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 371
+
+ Score =  252 bits (638), Expect = 6e-67
+ Identities = 138/332 (41%), Positives = 190/332 (56%), Gaps = 23/332 (6%)
+
+Query:  26  QSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            +S FL F  +F K Y   +E+  R  +FK NL +     L+        + GV KF+DL+
+Sbjct:  45  ESHFLSFVQRFGKSYKDADEHAYRLSVFKDNLRRARRHQLL----DPSAEHGVTKFSDLT 100
+
+Query:  85  SDEFKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQG 141
+              EF+  YL    ++ A+  +    A        + +P +    FDWR  GAV PVKNQG
+Sbjct:  101 PAEFRRTYLGLRKSRRALLRELGESAHEAPVLPTDGLPDD----FDWRDHGAVGPVKNQG 156
+
+Query:  142 QCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPN 201
+             CGSCWSFS +G +EG H+++  KL  LSEQ  VDCDHEC   E  ++CD GCNGGL   
+Sbjct:  157 SCGSCWSFSASGALEGAHYLATGKLEVLSEQQFVDCDHECDSSE-PDSCDSGCNGGLMTT 215
+
+Query:  202 AYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIV 261
+pattern 237                                    ****
+            A++Y+ K GG+++E  YPYT   G +C F+ + I     A + NF+++  +E  ++  ++
+Sbjct:  216 AFSYLQKAGGLESEKDYPYTGSDG-KCKFDKSKI----VASVQNFSVVSVDEAQISANLI 270
+
+Query:  262 STGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKN 319
+              GPLAI  +A   Q YIGGV         LDHG+L+VGY A     I  K+ PYWI+KN
+Sbjct:  271 KHGPLAIGINAAYMQTYIGGVSCPYICGRHLDHGVLLVGYGASGFAPIRLKDKPYWIIKN 330
+
+Query:  320 SWGADWGEQGYIYLRRG---KNTCGVSNFVST 348
+            SWG +WGE GY  + RG   +N CGV + VST
+Sbjct:  331 SWGENWGENGYYKICRGSNVRNKCGVDSMVST 362
+
+
+>sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK CATHEPSIN)
+          Length = 376
+
+ Score =  250 bits (633), Expect = 2e-66
+ Identities = 147/391 (37%), Positives = 213/391 (53%), Gaps = 63/391 (16%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIP-------PEEQSQFLEFQDKFNKKYSHEEYLERFEIFK 53
+            M++++  +L +F  F  +   P        + ++ F E+  KFN++YS  E+  R+ IFK
+Sbjct:  1   MRLLVFLILLIFVNFSFANVRPNGRRFSESQYRTAFTEWTLKFNRQYSSSEFSNRYSIFK 60
+
+Query:  54  SNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK-EAIFTDDLPVADYLDD 112
+            SN+  ++  N       + T  G+N FAD++++E++  YL  +  A   +     + L+ 
+Sbjct:  61  SNMDYVDNWNS---KGDSQTVLGLNNFADITNEEYRKTYLGTRVNAHSYNGYDGREVLNV 117
+
+Query:  113 EFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 172
+            E + + P     + DWRT+ AVTP+K+QGQCGSCWSFSTTG+ EG H +   KLVSLSEQ
+Sbjct:  118 EDLQTNPK----SIDWRTKNAVTPIKDQGQCGSCWSFSTTGSTEGAHALKTKKLVSLSEQ 173
+
+Query:  173 NLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNS 232
+            NLVDC        G E  + GC+GGL  NA++YIIKN GI TESSYPYTAETG+ C FN 
+Sbjct:  174 NLVDC-------SGPEE-NFGCDGGLMNNAFDYIIKNKGIDTESSYPYTAETGSTCLFNK 225
+
+Query:  233 ANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNP 289
+pattern 237     ****
+            ++IG    A I  +  I     +        GP+++A DA    +Q Y  G++  P C+P
+Sbjct:  226 SDIG----ATIKGYVNITAGSEISLENGAQHGPVSVAIDASHNSFQLYTSGIYYEPKCSP 281
+
+Query:  290 NSLDHGILIVGY--------------------------------SAKNTIFRKNMPYWIV 317
+              LDHG+L+VGY                                 + +++  K   YWIV
+Sbjct:  282 TELDHGVLVVGYGVQGKDDEGPVLNRKQTIVIHKNEDNKVESSDDSSDSVRPKANNYWIV 341
+
+Query:  318 KNSWGADWGEQGYIYLRRG-KNTCGVSNFVS 347
+            KNSWG  WG +GYI + +  KN CG+++  S
+Sbjct:  342 KNSWGTSWGIKGYILMSKDRKNNCGIASVSS 372
+
+
+>sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score =  238 bits (601), Expect = 1e-62
+ Identities = 139/370 (37%), Positives = 201/370 (53%), Gaps = 45/370 (12%)
+
+Query:  1   MKVI-LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKI 59
+            MKV+  L VL V       +    + ++ F ++     K Y+ EE+  R+ IF +N+  +
+Sbjct:  1   MKVLSFLCVLLVSVATAKQQFSELQYRNAFTDWMITHQKSYTSEEFGARYNIFTANMDYV 60
+
+Query:  60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+            ++ N    +  ++T  G+N FAD++++E++N YL  K   F     +    +    NS  
+Sbjct:  61  QQWN----SKGSETVLGLNNFADITNEEYRNTYLGTK---FDASSLIGTQEEKVHTNSSA 113
+
+Query:  120 PEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 179
+              +    DWR+ GAVTPVKNQGQCG CWSFSTTG+ EG HF S+ +LVSLSEQNL+DC  
+Sbjct:  114 ASK----DWRSEGAVTPVKNQGQCGGCWSFSTTGSTEGAHFQSKGELVSLSEQNLIDCST 169
+
+Query:  180 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEE 239
+pattern 237                                                          ***
+            E          + GC+GGL   A+ YII N GI TESSYPY AE G +C + S N G   
+Sbjct:  170 E----------NSGCDGGLMTYAFEYIINNNGIDTESSYPYKAENG-KCEYKSENSG--- 215
+
+Query:  240 QAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGI 296
+pattern 240 *
+             A +S++  +           V+  P+++A DA    +Q Y  G++  P C+  +LDHG+
+Sbjct:  216 -ATLSSYKTVTAGSESSLESAVNVNPVSVAIDASHQSFQLYTSGIYYEPECSSENLDHGV 274
+
+Query:  297 LIVGY--------------SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCG 341
+            L VGY              S+ N     +  YWIVKNSWG  WG +GYI + R + N CG
+Sbjct:  275 LAVGYGSGSGSSSGQSSGQSSGNLSASSSNEYWIVKNSWGTSWGIEGYILMSRNRDNNCG 334
+
+Query:  342 VSNFVSTSII 351
+            +++  S  ++
+Sbjct:  335 IASSASFPVV 344
+
+
+>sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR
+          Length = 450
+
+ Score =  236 bits (597), Expect = 4e-62
+ Identities = 137/354 (38%), Positives = 193/354 (53%), Gaps = 34/354 (9%)
+
+Query:  3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEE 61
+            V+L     + +V + S  +    + +F  F+ K+ K Y   +E   RF  F+ N+   E+
+Sbjct:  15  VLLAMAACLASVALGSLHVEESLEMRFAAFKKKYGKVYKDAKEEAFRFRAFEENM---EQ 71
+
+Query:  62  LNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPE 121
+              + A  +   T FGV  F+D++ +EF+  Y N            A     + +N     
+Sbjct:  72  AKIQAAANPYAT-FGVTPFSDMTREEFRARYRNGASYF-----AAAQKRLRKTVNVTTGR 125
+
+Query:  122 EQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 181
+               A DWR +GAVTPVK QGQCGSCW+FST GN+EGQ  ++ N LVSLSEQ LV CD   
+Sbjct:  126 APAAVDWREKGAVTPVKVQGQCGSCWAFSTIGNIEGQWQVAGNPLVSLSEQMLVSCD--- 182
+
+Query:  182 MEYEGEEACDEGCNGGLQPNAYNYIIKN--GGIQTESSYPYTAETG--TQCNFNSANIGP 237
+pattern 237                                                            *
+                     D GCNGGL  NA+N+I+ +  G + TE+SYPY +  G   QC  N   IG 
+Sbjct:  183 -------TIDSGCNGGLMDNAFNWIVNSNGGNVFTEASYPYVSGNGEQPQCQMNGHEIG- 234
+
+Query:  238 EEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGIL 297
+pattern 238 ***
+               A I++   +P++E  +A Y+   GPLAIA DA  +  Y GG+    C    LDHG+L
+Sbjct:  235 ---AAITDHVDLPQDEDAIAAYLAENGPLAIAVDAESFMDYNGGIL-TSCTSKQLDHGVL 290
+
+Query:  298 IVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            +VGY+  +     N PYWI+KNSW   WGE GYI + +G N C ++  VS++++
+Sbjct:  291 LVGYNDNS-----NPPYWIIKNSWSNMWGEDGYIRIEKGTNQCLMNQAVSSAVV 339
+
+
+>sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)
+          Length = 319
+
+ Score =  233 bits (589), Expect = 3e-61
+ Identities = 128/334 (38%), Positives = 190/334 (56%), Gaps = 30/334 (8%)
+
+Query:  21  IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+            +P     ++++F+ K+ K+Y   E   RF IFKSN+ K +   L  +  +    +GV  +
+Sbjct:  12  LPGNVDEKYVQFKLKYRKQYHETEDEIRFNIFKSNILKAQ---LYQVFVRGSAIYGVTPY 68
+
+Query:  81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQ 140
+            +DL++DEF   +L     + +        L  E +N+IP      FDWR +GAVT VKNQ
+Sbjct:  69  SDLTTDEFARTHLTASWVVPSSRSNTPTSLGKE-VNNIPKN----FDWREKGAVTEVKNQ 123
+
+Query:  141 GQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQP 200
+            G CGSCW+FSTTGNVE Q F    KL+SLSEQ LVDCD            D+GCNGGL  
+Sbjct:  124 GMCGSCWAFSTTGNVESQWFRKTGKLLSLSEQQLVDCD----------GLDDGCNGGLPS 173
+
+Query:  201 NAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYI 260
+pattern 237                                     ****
+            NAY  IIK GG+  E +YPY A+   +C+  +  +       I++   + ++ET +A ++
+Sbjct:  174 NAYESIIKMGGLMLEDNYPYDAK-NEKCHLKTDGVA----VYINSSVNLTQDETELAAWL 228
+
+Query:  261 VSTGPLAIAADAVEWQFYIGGV---FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIV 317
+                 +++  +A+  QFY  G+   + I C+   LDH +L+VGY     +  KN P+WIV
+Sbjct:  229 YHNSTISVGMNALLLQFYQHGISHPWWIFCSKYLLDHAVLLVGYG----VSEKNEPFWIV 284
+
+Query:  318 KNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            KNSWG +WGE GY  + RG  +CG++   ++++I
+Sbjct:  285 KNSWGVEWGENGYFRMYRGDGSCGINTVATSAMI 318
+
+
+>sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE A-1)
+          Length = 354
+
+ Score =  233 bits (589), Expect = 3e-61
+ Identities = 144/355 (40%), Positives = 192/355 (53%), Gaps = 40/355 (11%)
+
+Query:  5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+            LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct:  7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query:  53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD 112
+            K N+     LN    +   D      KFADL+  EF   YLN           + D+ +D
+Sbjct:  67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYA----RHLKDHKED 119
+
+Query:  113 EFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 172
+              ++   P    + DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + LVSLSEQ
+Sbjct:  120 VHVDDSAPSGVMSVDWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSLVSLSEQ 179
+
+Query:  173 NLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQCNF 230
+             LV CD+           DEGCNGGL   A N+I++  NG + TE+SYPYT+  GT+   
+Sbjct:  180 MLVSCDN----------IDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGGGTRPPC 229
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            +      E  AKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV  + C   
+Sbjct:  230 HDEG---EVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVSL-CLAW 285
+
+Query:  291 SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 345
+            SL+HG+LIVG++ KN       PYWIVKNSWG+ WGE+GYI L  G N C + N+
+Sbjct:  286 SLNHGVLIVGFN-KNA----KPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNY 335
+
+
+>sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR
+          Length = 354
+
+ Score =  231 bits (584), Expect = 1e-60
+ Identities = 143/355 (40%), Positives = 192/355 (53%), Gaps = 40/355 (11%)
+
+Query:  5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+            LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct:  7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query:  53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD 112
+            K N+     LN    +   D      KFADL+  EF   YLN           + ++ +D
+Sbjct:  67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYA----RHLKNHKED 119
+
+Query:  113 EFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 172
+              ++   P    + DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + LVSLSEQ
+Sbjct:  120 VHVDDSAPSGVMSVDWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSLVSLSEQ 179
+
+Query:  173 NLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQCNF 230
+             LV CD+           DEGCNGGL   A N+I++  NG + TE+SYPYT+  GT+   
+Sbjct:  180 MLVSCDN----------IDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGGGTRPPC 229
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 290
+pattern 237       ****
+            +      E  AKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV  + C   
+Sbjct:  230 HDEG---EVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVSL-CLAW 285
+
+Query:  291 SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 345
+            SL+HG+LIVG++ KN       PYWIVKNSWG+ WGE+GYI L  G N C + N+
+Sbjct:  286 SLNHGVLIVGFN-KNA----KPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNY 335
+
+
+>sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 322
+
+ Score =  221 bits (558), Expect = 1e-57
+ Identities = 132/349 (37%), Positives = 184/349 (51%), Gaps = 41/349 (11%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKI 59
+            MKV+ LF+  +     +           + EF+ KF +KY   EE   R  +F  NL  I
+Sbjct:  1   MKVVALFLFGLALAAANP---------SWEEFKGKFGRKYVDLEEERYRLNVFLDNLQYI 51
+
+Query:  60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+            EE N      +      +N+F+D+++++F       K+       P A      F ++  
+Sbjct:  52  EEFNKKYERGEVTYNLAINQFSDMTNEKFNAVMKGYKKG----PRPAA-----VFTSTDA 102
+
+Query:  120 PEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 179
+              E T  DWRT+GAVTPVK+QGQCGSCW+FSTTG +EGQHF+   +LVSLSEQ LVDC  
+Sbjct:  103 APESTEVDWRTKGAVTPVKDQGQCGSCWAFSTTGGIEGQHFLKTGRLVSLSEQQLVDC-- 160
+
+Query:  180 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEE 239
+pattern 237                                                          ***
+                  G    ++GCNGG    A  Y+  NGG+ TESSYPY A   T C FNS  IG   
+Sbjct:  161 -----AGGSYYNQGCNGGWVERAIMYVRDNGGVDTESSYPYEARDNT-CRFNSNTIG--- 211
+
+Query:  240 QAKISNFTMIPK-NETVMAGYIVSTGPLAIAADAVEWQF---YIGGVFDIPCNPNSLDHG 295
+pattern 240 *
+             A  + +  I + +E+ +       GP+++A DA    F   Y G  ++  C+ + LDH 
+Sbjct:  212 -ATCTGYVGIAQGSESALKTATRDIGPISVAIDASHRSFQSYYTGVYYEPSCSSSQLDHA 270
+
+Query:  296 ILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVS 343
+            +L VGY ++         +W+VKNSW   WGE GYI + R + N CG++
+Sbjct:  271 VLAVGYGSEG-----GQDFWLVKNSWATSWGESGYIKMARNRNNNCGIA 314
+
+
+>sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEINASE) (CRUZAINE)
+          Length = 467
+
+ Score =  221 bits (557), Expect = 2e-57
+ Identities = 134/358 (37%), Positives = 189/358 (52%), Gaps = 38/358 (10%)
+
+Query:  3   VILLFVLAVFTVFV--SSRGIPPEEQ--SQFLEFQDKFNKKY-SHEEYLERFEIFKSNLG 57
+            ++L  VL V    V  ++  +  EE   SQF EF+ K  + Y S  E   R  +F+ NL 
+Sbjct:  8   LLLAAVLVVMACLVPAATASLHAEETLTSQFAEFKQKHGRVYESAAEEAFRLSVFRENLF 67
+
+Query:  58  KIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+             +  L+  A  H     FGV  F+DL+ +EF++ Y N               +  E + +
+Sbjct:  68  -LARLHAAANPHAT---FGVTPFSDLTREEFRSRYHNGAAHFAAAQERARVPVKVEVVGA 123
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDC 177
+                   A DWR RGAVT VK+QGQCGSCW+FS  GNVE Q F++ + L +LSEQ LV C
+Sbjct:  124 -----PAAVDWRARGAVTAVKDQGQCGSCWAFSAIGNVECQWFLAGHPLTNLSEQMLVSC 178
+
+Query:  178 DHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQ--CNFNSA 233
+            D            D GC+GGL  NA+ +I++  NG + TE SYPY +  G    C  +  
+Sbjct:  179 D----------KTDSGCSGGLMNNAFEWIVQENNGAVYTEDSYPYASGEGISPPCTTSGH 228
+
+Query:  234 NIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLD 293
+pattern 237    ****
+             +G    A I+    +P++E  +A ++   GP+A+A DA  W  Y GGV    C    LD
+Sbjct:  229 TVG----ATITGHVELPQDEAQIAAWLAVNGPVAVAVDASSWMTYTGGVM-TSCVSEQLD 283
+
+Query:  294 HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            HG+L+VGY+    +     PYWI+KNSW   WGE+GYI + +G N C V    S++++
+Sbjct:  284 HGVLLVGYNDSAAV-----PYWIIKNSWTTQWGEEGYIRIAKGSNQCLVKEEASSAVV 336
+
+
+>sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  216 bits (545), Expect = 5e-56
+ Identities = 131/349 (37%), Positives = 181/349 (51%), Gaps = 32/349 (9%)
+
+Query:  5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+            +LF L V+ V  S+   P +  + F EF  +FNK YS E E L RF+IF+ NL +I    
+Sbjct:  4   ILFYLFVYAVVKSAAYDPLKAPNYFEEFVHRFNKNYSSEVEKLRRFKIFQHNLNEI---- 59
+
+Query:  64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQ 123
+             I  N     K+ +NKF+DLS DE    Y        T +      LD       P +  
+Sbjct:  60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPTQTQNFCKVILLDQP-----PGKGP 113
+
+Query:  124 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 183
+              FDWR    VT VKNQG CG+CW+F+T G++E Q  I  N+L++LSEQ ++DCD     
+Sbjct:  114 LEFDWRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNELINLSEQQMIDCDF---- 169
+
+Query:  184 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKI 243
+pattern 237                                                      ****
+                   D GCNGGL   A+  IIK GG+Q ES YPY A+    C  NS     + +   
+Sbjct:  170 ------VDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEAD-NNNCRMNSNKFLVQVK--- 219
+
+Query:  244 SNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 303
+              +  I   E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  
+Sbjct:  220 DCYRYIIVYEEKLKDLLPLVGPIPMAIDAADIVNYKQGIIKY-CFDSGLNHAVLLVGYGV 278
+
+Query:  304 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 351
+            +N     N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct:  279 EN-----NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 323
+
+ Score =  215 bits (541), Expect = 1e-55
+ Identities = 132/357 (36%), Positives = 189/357 (51%), Gaps = 40/357 (11%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKI 59
+            MKV +LF+  V     S           +  F+ K+ ++Y   EE   R  IF+ N   I
+Sbjct:  1   MKVAVLFLCGVALAAASP---------SWEHFKGKYGRQYVDAEEDSYRRVIFEQNQKYI 51
+
+Query:  60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+            EE N    N +      +NKF D++ +EF      N   I     PV+ +   +      
+Sbjct:  52  EEFNKKYENGEVTFNLAMNKFGDMTLEEFNAVMKGN---IPRRSAPVSVFYPKKETGP-- 106
+
+Query:  120 PEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 179
+              + T  DWRT+GAVTPVK+QGQCGSCW+FSTTG++EGQHF+    L+SL+EQ LVDC  
+Sbjct:  107 --QATEVDWRTKGAVTPVKDQGQCGSCWAFSTTGSLEGQHFLKTGSLISLAEQQLVDC-- 162
+
+Query:  180 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEE 239
+pattern 237                                                          ***
+                        +GCNGG   +A++YI  N GI TE++YPY A  G+ C F+S ++    
+Sbjct:  163 ------SRPYGPQGCNGGWMNDAFDYIKANNGIDTEAAYPYEARDGS-CRFDSNSVA--- 212
+
+Query:  240 QAKISNFTMIPK-NETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHG 295
+pattern 240 *
+             A  S  T I   +ET +   +   GP+++  DA    +QFY  GV+  P C+P+ LDH 
+Sbjct:  213 -ATCSGHTNIASGSETGLQQAVRDIGPISVTIDAAHSSFQFYSSGVYYEPSCSPSYLDHA 271
+
+Query:  296 ILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 351
+            +L VGY ++         +W+VKNSW   WG+ GYI + R + N CG++   S  ++
+Sbjct:  272 VLAVGYGSEG-----GQDFWLVKNSWATSWGDAGYIKMSRNRNNNCGIATVASYPLV 323
+
+
+>sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  214 bits (540), Expect = 2e-55
+ Identities = 130/351 (37%), Positives = 188/351 (53%), Gaps = 33/351 (9%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+            M  I+L++L    V  ++  +  +  + F +F  KFNK YS E E L RF+IF+ NL +I
+Sbjct:  1   MNKIVLYLLVYGAVQCAAYDVL-KAPNYFEDFLHKFNKSYSSESEKLRRFQIFRHNLEEI 59
+
+Query:  60  EELNLIAINHKADT-KFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSI 118
+                 I  NH   T ++ +NKFADLS DE  + Y      + T +      LD       
+Sbjct:  60  -----INKNHNDSTAQYEINKFADLSKDETISKYTGLSLPLQTQNFCEVVVLDRP----- 109
+
+Query:  119 PPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 178
+            P +    FDWR    VT VKNQG CG+CW+F+T G++E Q  I  N+ ++LSEQ L+DCD
+Sbjct:  110 PDKGPLEFDWRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNQFINLSEQQLIDCD 169
+
+Query:  179 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPE 238
+pattern 237                                                           **
+                        D GC+GGL   A+  ++  GGIQ ES YPY A  G  C  N+A    +
+Sbjct:  170 F----------VDAGCDGGLLHTAFEAVMNMGGIQAESDYPYEANNG-DCRANAAKFVVK 218
+
+Query:  239 EQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILI 298
+pattern 239 **
+             +      T+    E  +   + S GP+ +A DA +   Y  G+    C  + L+H +L+
+Sbjct:  219 VKKCYRYITVF---EEKLKDLLRSVGPIPVAIDASDIVNYKRGIMKY-CANHGLNHAVLL 274
+
+Query:  299 VGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 349
+            VGY+ +N      +P+WI+KN+WGADWGEQGY  +++  N CG+ N + +S
+Sbjct:  275 VGYAVEN-----GVPFWILKNTWGADWGEQGYFRVQQNINACGIQNELPSS 320
+
+
+>sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 321
+
+ Score =  214 bits (539), Expect = 2e-55
+ Identities = 125/326 (38%), Positives = 184/326 (56%), Gaps = 47/326 (14%)
+
+Query:  32  FQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEF-- 88
+            F+ ++ +KY   +E L R  +F+ N   IE+ N    N +   K  +N+F D++++EF  
+Sbjct:  23  FKTQYGRKYGDAKEELYRQRVFQQNEQLIEDFNKKFENGEVTFKVAMNQFGDMTNEEFNA 82
+
+Query:  89  --KNYYLNNK---EAIFTDDL-PVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQ 142
+              K Y   ++   +A+FT +  P+A                   DWRT+  VTPVK+Q Q
+Sbjct:  83  VMKGYKKGSRGEPKAVFTAEAGPMA----------------ADVDWRTKALVTPVKDQEQ 126
+
+Query:  143 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 202
+            CGSCW+FS TG +EGQHF+  ++LVSLSEQ LVDC          +  ++GC GG   +A
+Sbjct:  127 CGSCWAFSATGALEGQHFLKNDELVSLSEQQLVDC--------STDYGNDGCGGGWMTSA 178
+
+Query:  203 YNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVS 262
+pattern 237                                   ****
+            ++YI  NGGI TESSYPY AE    C F++ +IG    A  +    +   E  +   +  
+Sbjct:  179 FDYIKDNGGIDTESSYPYEAE-DRSCRFDANSIG----AICTGSVEVQHTEEALQEAVSG 233
+
+Query:  263 TGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKN 319
+             GP+++A DA    +QFY  GV ++  C+P  LDHG+L VGY  ++T       YW+VKN
+Sbjct:  234 VGPISVAIDASHFSFQFYSSGVYYEQNCSPTFLDHGVLAVGYGTEST-----KDYWLVKN 288
+
+Query:  320 SWGADWGEQGYIYLRRGK-NTCGVSN 344
+            SWG+ WG+ GYI + R + N CG+++
+Sbjct:  289 SWGSSWGDAGYIKMSRNRDNNCGIAS 314
+
+
+>sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP) (CYCLIC
+            PROTEIN-2) (CP-2)
+          Length = 334
+
+ Score =  212 bits (535), Expect = 7e-55
+ Identities = 127/359 (35%), Positives = 195/359 (53%), Gaps = 39/359 (10%)
+
+Query:  3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+            ++LL VL + T   + +       +Q+ +++    + Y   E   R  +++ N+  I+  
+Sbjct:  4   LLLLAVLCLGTALATPK-FDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLH 62
+
+Query:  63  NLIAINHKADTKFGVNKFADLSSDEFKN------YYLNNKEAIFTDDLPVADYLDDEFIN 116
+            N    N K      +N F D++++EF+       +  + K  +F + L +          
+Sbjct:  63  NGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQEPLML---------- 112
+
+Query:  117 SIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 176
+             IP       DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVD
+Sbjct:  113 QIPK----TVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVD 168
+
+Query:  177 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 236
+            C H+    +G    ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G+ C + +    
+Sbjct:  169 CSHD----QG----NQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS-CKYRA---- 215
+
+Query:  237 PEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLD 293
+pattern 237 ****
+                A  + F  IP+ E  +   + + GP+++A DA     QFY  G++  P C+   LD
+Sbjct:  216 EYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLD 275
+
+Query:  294 HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVSTSII 351
+            HG+L+VGY  + T   K+  YW+VKNSWG +WG  GYI + + +N  CG++   S  I+
+Sbjct:  276 HGVLVVGYGYEGTDSNKD-KYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIV 333
+
+
+>sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 334
+
+ Score =  212 bits (533), Expect = 1e-54
+ Identities = 126/359 (35%), Positives = 198/359 (55%), Gaps = 39/359 (10%)
+
+Query:  3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+            ++LL VL + T   + +       +++ +++    + Y   E   R  I++ N+  I+  
+Sbjct:  4   LLLLAVLCLGTALATPK-FDQTFSAEWHQWKSTHRRLYGTNEEEWRRAIWEKNMRMIQLH 62
+
+Query:  63  NLIAINHKADTKFGVNKFADLSSDEFKN------YYLNNKEAIFTDDLPVADYLDDEFIN 116
+            N    N +      +N F D++++EF+       +  + K  +F + L +          
+Sbjct:  63  NGEYSNGQHGFSMEMNAFGDMTNEEFRQVVNGYRHQKHKKGRLFQEPLML---------- 112
+
+Query:  117 SIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 176
+             IP     + DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVD
+Sbjct:  113 KIPK----SVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVD 168
+
+Query:  177 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 236
+            C H     +G    ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G+ C + +    
+Sbjct:  169 CSHA----QG----NQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS-CKYRA---- 215
+
+Query:  237 PEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLD 293
+pattern 237 ****
+                A  + F  IP+ E  +   + + GP+++A DA     QFY  G++  P C+  +LD
+Sbjct:  216 EFAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKNLD 275
+
+Query:  294 HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 351
+            HG+L+VGY  + T   KN  YW+VKNSWG++WG +GYI + + + N CG++   S  ++
+Sbjct:  276 HGVLLVGYGYEGTDSNKN-KYWLVKNSWGSEWGMEGYIKIAKDRDNHCGLATAASYPVV 333
+
+
+>sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE)
+            (SULFHYDRYL-ENDOPEPTIDASE) (SH-EP)
+          Length = 362
+
+ Score =  209 bits (526), Expect = 8e-54
+ Identities = 127/313 (40%), Positives = 179/313 (56%), Gaps = 35/313 (11%)
+
+Query:  47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK---EAIFTDD 103
+            +RF +FK+N+  +   N +   +K      +NKFAD+++ EF++ Y  +K     +F   
+Sbjct:  58  KRFNVFKANVMHVHNTNKMDKPYKLK----LNKFADMTNHEFRSTYAGSKVNHHKMFRGS 113
+
+Query:  104 LPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 163
+               +     E + S+P     + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  
+Sbjct:  114 QHGSGTFMYEKVGSVP----ASVDWRKKGAVTDVKDQGQCGSCWAFSTIVAVEGINQIKT 169
+
+Query:  164 NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAE 223
+            NKLVSLSEQ LVDCD E          ++GCNGGL  +A+ +I + GGI TES+YPYTA+
+Sbjct:  170 NKLVSLSEQELVDCDKE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYTAQ 220
+
+Query:  224 TGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGG 281
+pattern 237              ****
+             GT C+ +  N   +    I     +P N+       V+  P+++A DA   ++QFY  G
+Sbjct:  221 EGT-CDESKVN---DLAVSIDGHENVPVNDENALLKAVANQPVSVAIDAGGSDFQFYSEG 276
+
+Query:  282 VFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----K 337
+            VF   CN   L+HG+ IVGY    T+   N  YWIV+NSWG +WGEQGYI ++R     +
+Sbjct:  277 VFTGDCN-TDLNHGVAIVGYG--TTVDGTN--YWIVRNSWGPEWGEQGYIRMQRNISKKE 331
+
+Query:  338 NTCGVSNFVSTSI 350
+              CG++   S  I
+Sbjct:  332 GLCGIAMMASYPI 344
+
+
+>sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  209 bits (526), Expect = 8e-54
+ Identities = 129/349 (36%), Positives = 179/349 (50%), Gaps = 32/349 (9%)
+
+Query:  5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+            +LF L V+ V  S+     +  + F EF  +FNK Y  E E L RF+IF+ NL +I    
+Sbjct:  4   ILFYLFVYGVVNSAAYDLLKAPNYFEEFVHRFNKDYGSEVEKLRRFKIFQHNLNEI---- 59
+
+Query:  64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQ 123
+             I  N     K+ +NKF+DLS DE    Y      I T +      LD       P +  
+Sbjct:  60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPIQTQNFCKVIVLDQP-----PGKGP 113
+
+Query:  124 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 183
+              FDWR    VT VKNQG CG+CW+F+T  ++E Q  I  N+L++LSEQ ++DCD     
+Sbjct:  114 LEFDWRRLNKVTSVKNQGMCGACWAFATLASLESQFAIKHNQLINLSEQQMIDCDF---- 169
+
+Query:  184 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKI 243
+pattern 237                                                      ****
+                   D GCNGGL   A+  IIK GG+Q ES YPY A+    C  NS     + +   
+Sbjct:  170 ------VDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEAD-NNNCRMNSNKFLVQVK--- 219
+
+Query:  244 SNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 303
+              +  I   E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  
+Sbjct:  220 DCYRYITVYEEKLKDLLRLVGPIPMAIDAADIVNYKQGIIKY-CFNSGLNHAVLLVGYGV 278
+
+Query:  304 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 351
+            +N     N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct:  279 EN-----NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  208 bits (525), Expect = 1e-53
+ Identities = 126/351 (35%), Positives = 184/351 (51%), Gaps = 35/351 (9%)
+
+Query:  7   FVLAVFTVFVSSRG--IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+            F L V  + V+S    + P   + + +++    + Y   E   R  +++ N   I+  N 
+Sbjct:  5   FFLTVLCLGVASAAPKLDPNLDAHWHQWKATHRRLYGMNEEEWRRAVWEKNKKIIDLHNQ 64
+
+Query:  65  IAINHKADTKFGVNKFADLSSDEFK---NYYLNNKEAIFTDDLPVADYLDDEFINSIPPE 121
+                 K   +  +N F D++++EF+   N + N K               +  +  +P  
+Sbjct:  65  EYSEGKHAFRMAMNAFGDMTNEEFRQVMNGFQNQKHK-------KGKLFHEPLLVDVPK- 116
+
+Query:  122 EQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 181
+               + DW  +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC    
+Sbjct:  117 ---SVDWTKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCSRA- 172
+
+Query:  182 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPE-EQ 240
+pattern 237                                                        ** **
+               +G    ++GCNGGL  NA+ YI  NGG+ +E SYPY A     CN+      PE   
+Sbjct:  173 ---QG----NQGCNGGLMDNAFQYIKDNGGLDSEESYPYLATDTNSCNYK-----PECSA 220
+
+Query:  241 AKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGIL 297
+            A  + F  IP+ E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L
+Sbjct:  221 ANDTGFVDIPQREKALMKAVATVGPISVAIDAGHTSFQFYKSGIYYDPDCSCKDLDHGVL 280
+
+Query:  298 IVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 347
+            +VGY  + T    N  +WIVKNSWG +WG  GY+ + + +N  CG++   S
+Sbjct:  281 VVGYGFEGTDSNNN-KFWIVKNSWGPEWGWNGYVKMAKDQNNHCGIATAAS 330
+
+
+>sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 356
+
+ Score =  207 bits (522), Expect = 2e-53
+ Identities = 129/331 (38%), Positives = 181/331 (53%), Gaps = 40/331 (12%)
+
+Query:  29  FLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+            F  F  +  K+Y S EE  +RFEIF  NL  I   N   +++K     G+N+F DL+ DE
+Sbjct:  57  FARFAIRHRKRYDSVEEIKQRFEIFLDNLKMIRSHNRKGLSYK----LGINEFTDLTWDE 112
+
+Query:  88  FKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCG 144
+            F+ + L    N  A    +L +         N + PE +   DWR  G V+PVK QG+CG
+Sbjct:  113 FRKHKLGASQNCSATTKGNLKLT--------NVVLPETK---DWRKDGIVSPVKAQGKCG 161
+
+Query:  145 SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 204
+            SCW+FSTTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ 
+Sbjct:  162 SCWTFSTTGALEAAYAQAFGKGISLSEQQLVDCAGAFNNF--------GCNGGLPSQAFE 213
+
+Query:  205 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTG 264
+pattern 237                                 ****
+            YI  NGG+ TE +YPYT + G  C F+ ANIG +  + + N T+  + E   A  +V   
+Sbjct:  214 YIKFNGGLDTEEAYPYTGKNGI-CKFSQANIGVKVISSV-NITLGAEYELKYAVALVR-- 269
+
+Query:  265 PLAIAADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNS 320
+            P+++A + V+ ++ Y  GV+   +    P  ++H +L VGY  +N       PYW++KNS
+Sbjct:  270 PVSVAFEVVKGFKQYKSGVYASTECGDTPMDVNHAVLAVGYGVEN-----GTPYWLIKNS 324
+
+Query:  321 WGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            WGADWGE GY  +  GKN CGV+   S  I+
+Sbjct:  325 WGADWGEDGYFKMEMGKNMCGVATCASYPIV 355
+
+
+>sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE A-2)
+          Length = 444
+
+ Score =  207 bits (521), Expect = 3e-53
+ Identities = 122/327 (37%), Positives = 177/327 (53%), Gaps = 39/327 (11%)
+
+Query:  29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLS 84
+            F EF+  + + Y    E  +R   F+ NL  + E       H+A     +FG+ KF DLS
+Sbjct:  38  FEEFKRTYGRAYETLAEEQQRLANFERNLELMRE-------HQARNPHAQFGITKFFDLS 90
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEF--INSIPPEEQTAFDWRTRGAVTPVKNQGQ 142
+              EF   YLN            A +       ++++P     A DWR +GAVTPVK+QG 
+Sbjct:  91  EAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPD----AVDWREKGAVTPVKDQGA 146
+
+Query:  143 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 202
+            CGSCW+FS  GN+EGQ +++ ++LVSLSEQ LV CD            ++GC+GGL   A
+Sbjct:  147 CGSCWAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----------MNDGCDGGLMLQA 196
+
+Query:  203 YNYIIK--NGGIQTESSYPYTAETG--TQCNFNSANIGPEEQAKISNFTMIPKNETVMAG 258
+pattern 237                                       ****
+            ++++++  NG + TE SYPY +  G   +C+ +S  +     A+I    +I  +E  MA 
+Sbjct:  197 FDWLLQNTNGHLHTEDSYPYVSGNGYVPECSNSSEEL--VVGAQIDGHVLIGSSEKAMAA 254
+
+Query:  259 YIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVK 318
+            ++   GP+AIA DA  +  Y  GV    C    L+HG+L+VGY     +     PYW++K
+Sbjct:  255 WLAKNGPIAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYDMTGEV-----PYWVIK 308
+
+Query:  319 NSWGADWGEQGYIYLRRGKNTCGVSNF 345
+            NSWG DWGEQGY+ +  G N C +S +
+Sbjct:  309 NSWGGDWGEQGYVRVVMGVNACLLSEY 335
+
+
+>sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR
+          Length = 443
+
+ Score =  206 bits (520), Expect = 4e-53
+ Identities = 122/327 (37%), Positives = 177/327 (53%), Gaps = 40/327 (12%)
+
+Query:  29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLS 84
+            F EF+  + + Y    E  +R   F+ NL  + E       H+A     +FG+ KF DLS
+Sbjct:  38  FEEFKRTYGRAYETLAEEQQRLANFERNLELMRE-------HQARNPHAQFGITKFFDLS 90
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEF--INSIPPEEQTAFDWRTRGAVTPVKNQGQ 142
+              EF   YLN            A +       ++++P     A DWR +GAVTPVK+QG 
+Sbjct:  91  EAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPD----AVDWREKGAVTPVKDQGA 146
+
+Query:  143 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 202
+            CGSCW+FS  GN+EGQ +++ ++LVSLSEQ LV CD            ++GC+GGL   A
+Sbjct:  147 CGSCWAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----------MNDGCDGGLMLQA 196
+
+Query:  203 YNYIIK--NGGIQTESSYPYTAETG--TQCNFNSANIGPEEQAKISNFTMIPKNETVMAG 258
+pattern 237                                       ****
+            ++++++  NG + TE SYPY +  G   +C+ +S  +     A+I    +I  +E  MA 
+Sbjct:  197 FDWLLQNTNGHLHTEDSYPYVSGNGYVPECSNSSELV---VGAQIDGHVLIGSSEKAMAA 253
+
+Query:  259 YIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVK 318
+            ++   GP+AIA DA  +  Y  GV    C    L+HG+L+VGY     +     PYW++K
+Sbjct:  254 WLAKNGPIAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYDMTGEV-----PYWVIK 307
+
+Query:  319 NSWGADWGEQGYIYLRRGKNTCGVSNF 345
+            NSWG DWGEQGY+ +  G N C +S +
+Sbjct:  308 NSWGGDWGEQGYVRVVMGVNACLLSEY 334
+
+
+>sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 333
+
+ Score =  206 bits (520), Expect = 4e-53
+ Identities = 125/349 (35%), Positives = 187/349 (52%), Gaps = 34/349 (9%)
+
+Query:  8   VLAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLI 65
+            +LA F + ++S  +  +   ++Q+ +++   N+ Y   E   R  +++ N+  IE  N  
+Sbjct:  6   ILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQE 65
+
+Query:  66  AINHKADTKFGVNKFADLSSDEFK---NYYLNNKEAIFTDDLPVADYLDDEFINSIPPEE 122
+                K      +N F D++S+EF+   N + N K                 F   +  E 
+Sbjct:  66  YREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPR-----------KGKVFQEPLFYEA 114
+
+Query:  123 QTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 182
+              + DWR +G VTPVKNQGQCGSCW+FS TG +EGQ F    +L+SLSEQNLVDC     
+Sbjct:  115 PRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDC----- 169
+
+Query:  183 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAK 242
+pattern 237                                                       ****
+               G +  +EGCNGGL   A+ Y+  NGG+ +E SYPY A T   C +N         A 
+Sbjct:  170 --SGPQG-NEGCNGGLMDYAFQYVQDNGGLDSEESYPYEA-TEESCKYNP----KYSVAN 221
+
+Query:  243 ISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIV 299
+             + F  IPK E  +   + + GP+++A DA    + FY  G+ F+  C+   +DHG+L+V
+Sbjct:  222 DTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVV 281
+
+Query:  300 GYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVS 347
+            GY  ++T    N  YW+VKNSWG +WG  GY+ + +  +N CG+++  S
+Sbjct:  282 GYGFEST-ESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAAS 329
+
+
+>sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  206 bits (519), Expect = 5e-53
+ Identities = 121/316 (38%), Positives = 167/316 (52%), Gaps = 33/316 (10%)
+
+Query:  40  YSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFK---NYYLNNK 96
+            Y   E   R  +++ N+  IE  N      K      +N F D++++EF+   N + N K
+Sbjct:  40  YGMNEEGWRRAVWEKNMKMIELHNQEYSQGKHGFSMAMNAFGDMTNEEFRQVMNGFQNQK 99
+
+Query:  97  EAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVE 156
+                             F  S+  E   + DWR +G VT VKNQGQCGSCW+FS TG +E
+Sbjct:  100 HK-----------KGKVFHESLVLEVPKSVDWREKGYVTAVKNQGQCGSCWAFSATGALE 148
+
+Query:  157 GQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTES 216
+            GQ F    KLVSLSEQNLVDC       +G    ++GCNGGL  NA+ Y+  NGG+ TE 
+Sbjct:  149 GQMFRKTGKLVSLSEQNLVDCSRP----QG----NQGCNGGLMDNAFQYVKDNGGLDTEE 200
+
+Query:  217 SYPYTAETGTQCNFNSANIGPE-EQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--V 273
+pattern 237                     ** **
+            SYPY       C +      PE   A  + F  IP+ E  +   + + GP+++A DA   
+Sbjct:  201 SYPYLGRETNSCTYK-----PECSAANDTGFVDIPQREKALMKAVATVGPISVAIDAGHS 255
+
+Query:  274 EWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIY 332
+             +QFY  G+ +D  C+   LDHG+L+VGY  + T    +  +WIVKNSWG +WG  GY+ 
+Sbjct:  256 SFQFYKSGIYYDPDCSSKDLDHGVLVVGYGFEGT-DSNSSKFWIVKNSWGPEWGWNGYVK 314
+
+Query:  333 LRRGKNT-CGVSNFVS 347
+            + + +N  CG+S   S
+Sbjct:  315 MAKDQNNHCGISTAAS 330
+
+
+>sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)
+          Length = 380
+
+ Score =  204 bits (513), Expect = 3e-52
+ Identities = 124/334 (37%), Positives = 178/334 (53%), Gaps = 41/334 (12%)
+
+Query:  24  EEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADT----KFGVN 78
+            E ++ +  +  K+ K Y S  E+  RFEIFK  L  I+E       H ADT    K G+N
+Sbjct:  37  EVKAMYESWLIKYGKSYNSLGEWERRFEIFKETLRFIDE-------HNADTNRSYKVGLN 89
+
+Query:  79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVK 138
+            +FADL+ +EF++ YL       ++   V++  +  F   +P    +  DWR+ GAV  +K
+Sbjct:  90  QFADLTDEEFRSTYLGFTSG--SNKTKVSNRYEPRFGQVLP----SYVDWRSAGAVVDIK 143
+
+Query:  139 NQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL 198
+            +QG+CG CW+FS    VEG + I    L+SLSEQ L+DC        G      GCNGG 
+Sbjct:  144 SQGECGGCWAFSAIATVEGINKIVTGVLISLSEQELIDC--------GRTQNTRGCNGGY 195
+
+Query:  199 QPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAG 258
+pattern 237                                       ****
+              + + +II NGGI TE +YPYTA+ G +CN +  N   E+   I  +  +P N      
+Sbjct:  196 ITDGFQFIINNGGINTEENYPYTAQDG-ECNLDLQN---EKYVTIDTYENVPYNNEWALQ 251
+
+Query:  259 YIVSTGPLAIAADAV--EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWI 316
+              V+  P+++A DA    ++ Y  G+F  PC   ++DH + IVGY  +  I      YWI
+Sbjct:  252 TAVTYQPVSVALDAAGDAFKHYSSGIFTGPCG-TAIDHAVTIVGYGTEGGI-----DYWI 305
+
+Query:  317 VKNSWGADWGEQGYIYLRR---GKNTCGVSNFVS 347
+            VKNSW   WGE+GY+ + R   G  TCG++   S
+Sbjct:  306 VKNSWDTTWGEEGYMRILRNVGGAGTCGIATMPS 339
+
+
+>sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE EP-C1)
+          Length = 362
+
+ Score =  203 bits (510), Expect = 6e-52
+ Identities = 125/313 (39%), Positives = 177/313 (55%), Gaps = 35/313 (11%)
+
+Query:  47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK---EAIFTDD 103
+            +RF +FK+NL  +   N +   +K      +NKFAD+++ EF++ Y  +K     +F   
+Sbjct:  58  KRFNVFKANLMHVHNTNKMDKPYKLK----LNKFADMTNHEFRSTYAGSKVNHPRMFRGT 113
+
+Query:  104 LPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 163
+                     E + S+PP    + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  
+Sbjct:  114 PHENGAFMYEKVVSVPP----SVDWRKKGAVTDVKDQGQCGSCWAFSTVVAVEGINQIKT 169
+
+Query:  164 NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAE 223
+            NKLV+LSEQ LVDCD E          ++GCNGGL  +A+ +I + GGI TES+YPY A+
+Sbjct:  170 NKLVALSEQELVDCDKE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYKAQ 220
+
+Query:  224 TGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGG 281
+pattern 237              ****
+             GT C+ +  N   +    I     +P N+       V+  P+++A DA   ++QFY  G
+Sbjct:  221 EGT-CDASKVN---DLAVSIDGHENVPANDEDALLKAVANQPVSVAIDAGGSDFQFYSEG 276
+
+Query:  282 VFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----K 337
+            VF   C+   L+HG+ IVGY    T+   N  YWIV+NSWG +WGE GYI ++R     +
+Sbjct:  277 VFTGDCS-TDLNHGVAIVGYG--TTVDGTN--YWIVRNSWGPEWGEHGYIRMQRNISKKE 331
+
+Query:  338 NTCGVSNFVSTSI 350
+              CG++   S  I
+Sbjct:  332 GLCGIAMLPSYPI 344
+
+
+>sp|Q10991|CATL_SHEEP CATHEPSIN L
+          Length = 217
+
+ Score =  201 bits (507), Expect = 1e-51
+ Identities = 105/226 (46%), Positives = 139/226 (61%), Gaps = 23/226 (10%)
+
+Query:  127 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 186
+            DW  +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVD          
+Sbjct:  6   DWTKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVD--------SS 57
+
+Query:  187 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPE-EQAKISN 245
+pattern 237                                                   ** **
+                ++GCNGGL  NA+ YI +NGG+ +E SYPY A T T CN+      PE   AK + 
+Sbjct:  58  RPQGNQGCNGGLMDNAFQYIKENGGLDSEESYPYEA-TDTSCNYK-----PEYSAAKDTG 111
+
+Query:  246 FTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYS 302
+            F  IP+ E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY 
+Sbjct:  112 FVDIPQREKALMKAVATVGPISVAIDAGHSSFQFYKSGIYYDPDCSSKDLDHGVLVVGYG 171
+
+Query:  303 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 347
+             + T    N  +WIVKNSWG +WG +GY+ + + +N  CG++   S
+Sbjct:  172 FEGT----NNKFWIVKNSWGPEWGNKGYVKMAKDQNNHCGIATAAS 213
+
+
+>sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR
+          Length = 360
+
+ Score =  201 bits (506), Expect = 2e-51
+ Identities = 121/307 (39%), Positives = 161/307 (52%), Gaps = 28/307 (9%)
+
+Query:  43  EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTD 102
+            +E   RF +FK N+  I E N       A  K  +NKF D+++ EF++ Y  +K      
+Sbjct:  54  DEKNRRFNVFKENVKFIHEFNQ---KKDAPYKLALNKFGDMTNQEFRSKYAGSKIQHHRS 110
+
+Query:  103 DLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFIS 162
+               +          ++      + DWR +GAVT VK+QGQCGSCW+FST  +VEG + I 
+Sbjct:  111 QRGIQKNTGSFMYENVGSLPAASIDWRAKGAVTGVKDQGQCGSCWAFSTIASVEGINQIK 170
+
+Query:  163 QNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTA 222
+              +LVSLSEQ LVDCD          + +EGCNGGL   A+ +I KN GI TE SYPY  
+Sbjct:  171 TGELVSLSEQELVDCD---------TSYNEGCNGGLMDYAFEFIQKN-GITTEDSYPYAE 220
+
+Query:  223 ETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIG 280
+pattern 237               ****
+            + GT C  N  N        I     +P N        V+  P++++ +A    +QFY  
+Sbjct:  221 QDGT-CASNLLN---SPVVSIDGHQDVPANNENALMQAVANQPISVSIEASGYGFQFYSE 276
+
+Query:  281 GVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG---- 336
+            GVF   C    LDHG+ IVGY A     R    YWIVKNSWG +WGE GYI ++RG    
+Sbjct:  277 GVFTGRCG-TELDHGVAIVGYGAT----RDGTKYWIVKNSWGEEWGESGYIRMQRGISDK 331
+
+Query:  337 KNTCGVS 343
+            +  CG++
+Sbjct:  332 RGKCGIA 338
+
+
+>sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 442
+
+ Score =  200 bits (504), Expect = 3e-51
+ Identities = 117/308 (37%), Positives = 169/308 (53%), Gaps = 32/308 (10%)
+
+Query:  4   ILLFVLAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+            +L F+  +   + S++    E Q  + F  +     + YS EE+  R++IFKSN+  + +
+Sbjct:  3   VLSFLCLLLVSYASAKQQFSELQYRNAFTNWMQAHQRTYSSEEFNARYQIFKSNMDYVHQ 62
+
+Query:  62  LNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPE 121
+             N    +   +T  G+N FAD+++ E++  YL      F     +    ++E I S P  
+Sbjct:  63  WN----SKGGETVLGLNVFADITNQEYRTTYLGTP---FDGSALIGT--EEEKIFSTPAP 113
+
+Query:  122 EQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFI---SQNKLVSLSEQNLVDCD 178
+                 DWR +GAVTP+KNQGQCG CWSFSTTG+ EG HFI   ++  LVSLSEQNL+DC 
+Sbjct:  114 ---TVDWRAQGAVTPIKNQGQCGGCWSFSTTGSTEGAHFIASGTKKDLVSLSEQNLIDC- 169
+
+Query:  179 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPE 238
+pattern 237                                                           **
+                    +   + GC GGL    + YII N GI TESSYPYTAE G +C F ++NIG  
+Sbjct:  170 -------SKSYGNNGCEGGLMTLGFEYIINNKGIDTESSYPYTAEDGKECKFKTSNIG-- 220
+
+Query:  239 EQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHG 295
+pattern 239 **
+              A+I ++  +            +  P+++A DA    +Q Y  G++  P C P  LDHG
+Sbjct:  221 --AQIVSYQNVTSGSEASLQSASNNAPVSVAIDASNESFQLYESGIYYEPACTPTQLDHG 278
+
+Query:  296 ILIVGYSA 303
+            +L+VGY +
+Sbjct:  279 VLVVGYGS 286
+
+
+ Score = 48.8 bits (114), Expect = 2e-05
+ Identities = 18/35 (51%), Positives = 24/35 (68%), Gaps = 1/35 (2%)
+
+Query: 314 YWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+           YWIVKNSWG  WG  GYI++ + + N CG++   S
+Sbjct: 401 YWIVKNSWGTSWGMDGYIFMSKDRNNNCGIATMAS 435
+
+
+>sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)
+          Length = 334
+
+ Score =  199 bits (501), Expect = 7e-51
+ Identities = 127/357 (35%), Positives = 191/357 (52%), Gaps = 43/357 (12%)
+
+Query:  5   LLFVLAVFTVFVSSRGIPPEEQS---QFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+            L  VLA F + ++S  +P  +Q+   ++ +++    + Y   E   R  +++ N+  IE 
+Sbjct:  3   LSLVLAAFCLGIAS-AVPKFDQNLDTKWYQWKATHRRLYGANEEGWRRAVWEKNMKMIEL 61
+
+Query:  62  LNLIAINHKADTKFGVNKFADLSSDEFKNY---YLNNK---EAIFTDDLPVADYLDDEFI 115
+             N      K      +N F D++++EF+     + N K     +F + L    +LD    
+Sbjct:  62  HNGEYSQGKHGFTMAMNAFPDMTNEEFRQMMGCFRNQKFRKGKVFREPL----FLD---- 113
+
+Query:  116 NSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLV 175
+              +P     + DWR +G VTPVKNQ QCGSCW+FS TG +EGQ F    KLVSLSEQNLV
+Sbjct:  114 --LPK----SVDWRKKGYVTPVKNQKQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLV 167
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANI 235
+            DC       +G    ++GCNGG    A+ Y+ +NGG+ +E SYPY A     C +   N 
+Sbjct:  168 DCSRP----QG----NQGCNGGFMARAFQYVKENGGLDSEESYPYVA-VDEICKYRPEN- 217
+
+Query:  236 GPEEQAKISNFTMI-PKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNS 291
+pattern 237  ****
+                 A  + FT++ P  E  +   + + GP+++A DA    +QFY  G+ F+  C+  +
+Sbjct:  218 ---SVANDTGFTVVAPGKEKALMKAVATVGPISVAMDAGHSSFQFYKSGIYFEPDCSSKN 274
+
+Query:  292 LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 347
+            LDHG+L+VGY  +      N  YW+VKNSWG +WG  GY+ + + KN  CG++   S
+Sbjct:  275 LDHGVLVVGYGFEGA-NSNNSKYWLVKNSWGPEWGSNGYVKIAKDKNNHCGIATAAS 330
+
+
+>sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  196 bits (494), Expect = 5e-50
+ Identities = 116/322 (36%), Positives = 168/322 (52%), Gaps = 30/322 (9%)
+
+Query:  29  FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+            F +F  KFNK YS E E L RF+IF+ NL +I   N     + +  ++ +NKF+DLS +E
+Sbjct:  28  FEDFLHKFNKNYSSESEKLHRFKIFQHNLEEIINKN----QNDSTAQYEINKFSDLSKEE 83
+
+Query:  88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCW 147
+              + Y        T +      LD       P      FDWR    VT VKNQG CG+CW
+Sbjct:  84  AISKYTGLSLPHQTQNFCEVVILDRP-----PDRGPLEFDWRQFNKVTSVKNQGVCGACW 138
+
+Query:  148 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 207
+            +F+T G++E Q  I  N+L++LSEQ  +DCD            + GC+GGL   A+   +
+Sbjct:  139 AFATLGSLESQFAIKYNRLINLSEQQFIDCDR----------VNAGCDGGLLHTAFESAM 188
+
+Query:  208 KNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLA 267
+pattern 237                              ****
+            + GG+Q ES YPY    G QC  N        ++      M    E  +   + + GP+ 
+Sbjct:  189 EMGGVQMESDYPYETANG-QCRINPNRFVVGVRSCRRYIVMF---EEKLKDLLRAVGPIP 244
+
+Query:  268 IAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 327
+            +A DA +   Y  G+    C  + L+H +L+VGY+ +N     N+PYWI+KN+WG DWGE
+Sbjct:  245 VAIDASDIVNYRRGIMR-QCANHGLNHAVLLVGYAVEN-----NIPYWILKNTWGTDWGE 298
+
+Query:  328 QGYIYLRRGKNTCGVSNFVSTS 349
+             GY  +++  N CG+ N + +S
+Sbjct:  299 DGYFRVQQNINACGIRNELVSS 320
+
+
+>sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR
+          Length = 471
+
+ Score =  196 bits (494), Expect = 5e-50
+ Identities = 115/310 (37%), Positives = 166/310 (53%), Gaps = 31/310 (10%)
+
+Query:  44  EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDD 103
+            E+  RF +F  NL  ++  N  A +     + G+N+FADL+++EF+  +L  K A     
+Sbjct:  69  EHERRFLVFWDNLKFVDAHNARA-DEGGGFRLGMNRFADLTNEEFRATFLGAKVA--ERS 125
+
+Query:  104 LPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 163
+                +    + +  +P     + DWR +GAV PVKNQGQCGSCW+FS    VE  + +  
+Sbjct:  126 RAAGERYRHDGVEELPE----SVDWREKGAVAPVKNQGQCGSCWAFSAVSTVESINQLVT 181
+
+Query:  164 NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAE 223
+             ++++LSEQ LV+C             + GCNGGL  +A+++IIKNGGI TE  YPY A 
+Sbjct:  182 GEMITLSEQELVEC--------STNGQNSGCNGGLMADAFDFIIKNGGIDTEDDYPYKAV 233
+
+Query:  224 TGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGG 281
+pattern 237              ****
+             G +C+ N  N    +   I  F  +P+N+       V+  P+++A +A   E+Q Y  G
+Sbjct:  234 DG-KCDINREN---AKVVSIDGFEDVPQNDEKSLQKAVAHQPVSVAIEAGGREFQLYHSG 289
+
+Query:  282 VFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-- 339
+            VF   C   SLDHG++ VGY   N        YWIV+NSWG  WGE GY+ + R  N   
+Sbjct:  290 VFSGRCG-TSLDHGVVAVGYGTDN-----GKDYWIVRNSWGPKWGESGYVRMERNINVTT 343
+
+Query:  340 --CGVSNFVS 347
+              CG++   S
+Sbjct:  344 GKCGIAMMAS 353
+
+
+>sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR
+          Length = 458
+
+ Score =  194 bits (488), Expect = 2e-49
+ Identities = 124/355 (34%), Positives = 183/355 (50%), Gaps = 43/355 (12%)
+
+Query:  3   VILLFVLAVFTVFVSSRGIPPEEQSQ--FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+            ++LL  LA   + + S G   EE+++  + E++ +  K Y+   E   R+  F+ NL  I
+Sbjct:  12  LLLLLSLAAADMSIVSYGERSEEEARRLYAEWKAEHGKSYNAVGEEERRYAAFRDNLRYI 71
+
+Query:  60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN-----NKEAIFTDDLPVADYLDDEF 114
+            +E N  A       + G+N+FADL+++E+++ YL       +E   +D    AD      
+Sbjct:  72  DEHNAAADAGVHSFRLGLNRFADLTNEEYRDTYLGLRNKPRRERKVSDRYLAAD------ 125
+
+Query:  115 INSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL 174
+             N   PE   + DWRT+GAV  +K+QG CGSCW+FS    VE  + I    L+SLSEQ L
+Sbjct:  126 -NEALPE---SVDWRTKGAVAEIKDQGGCGSCWAFSAIAAVEDINQIVTGDLISLSEQEL 181
+
+Query:  175 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSAN 234
+            VDCD          + +EGCNGGL   A+++II NGGI TE  YPY  +   +C+ N  N
+Sbjct:  182 VDCD---------TSYNEGCNGGLMDYAFDFIINNGGIDTEDDYPYKGK-DERCDVNRKN 231
+
+Query:  235 IGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSL 292
+pattern 237   ****
+                +   I ++  +  N        V   P+++A +A    +Q Y  G+F   C   +L
+Sbjct:  232 ---AKVVTIDSYEDVTPNSETSLQKAVRNQPVSVAIEAGGRAFQLYSSGIFTGKCG-TAL 287
+
+Query:  293 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR----GKNTCGVS 343
+            DHG+  VGY  +N        YWIV+NSWG  WGE GY+ + R        CG++
+Sbjct:  288 DHGVAAVGYGTEN-----GKDYWIVRNSWGKSWGESGYVRMERNIKASSGKCGIA 337
+
+
+>sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR
+          Length = 462
+
+ Score =  193 bits (486), Expect = 4e-49
+ Identities = 122/321 (38%), Positives = 168/321 (52%), Gaps = 43/321 (13%)
+
+Query:  35  KFNKKYSHEEYLE---RFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNY 91
+            K  K  S    +E   RFEIFK NL  ++E N   ++++     G+ +FADL++DE+++ 
+Sbjct:  56  KHGKAQSQNSLVEKDRRFEIFKDNLRFVDEHNEKNLSYR----LGLTRFADLTNDEYRSK 111
+
+Query:  92  YLN---NKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWS 148
+            YL     K+      L     + DE   SI        DWR +GAV  VK+QG CGSCW+
+Sbjct:  112 YLGAKMEKKGERRTSLRYEARVGDELPESI--------DWRKKGAVAEVKDQGGCGSCWA 163
+
+Query:  149 FSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK 208
+            FST G VEG + I    L++LSEQ LVDCD          + +EGCNGGL   A+ +IIK
+Sbjct:  164 FSTIGAVEGINQIVTGDLITLSEQELVDCD---------TSYNEGCNGGLMDYAFEFIIK 214
+
+Query:  209 NGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAI 268
+pattern 237                             ****
+            NGGI T+  YPY    GT C+    N    +   I ++  +P          V+  P++I
+Sbjct:  215 NGGIDTDKDYPYKGVDGT-CDQIRKN---AKVVTIDSYEDVPTYSEESLKKAVAHQPISI 270
+
+Query:  269 AADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 326
+            A +A    +Q Y  G+FD  C    LDHG++ VGY  +N        YWIV+NSWG  WG
+Sbjct:  271 AIEAGGRAFQLYDSGIFDGSCG-TQLDHGVVAVGYGTEN-----GKDYWIVRNSWGKSWG 324
+
+Query:  327 EQGYIYLRR----GKNTCGVS 343
+            E GY+ + R        CG++
+Sbjct:  325 ESGYLRMARNIASSSGKCGIA 345
+
+
+>sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 360
+
+ Score =  193 bits (485), Expect = 5e-49
+ Identities = 115/329 (34%), Positives = 172/329 (51%), Gaps = 32/329 (9%)
+
+Query:  28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+            +F  F  ++ K Y S  E  +RF IF  +L  +   N   ++++     G+N+FAD+S +
+Sbjct:  58  RFARFAVRYGKSYESAAEVHKRFRIFSESLQLVRSTNRKGLSYR----LGINRFADMSWE 113
+
+Query:  87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSC 146
+            EF+   L   +         A    +  + +         DWR  G V+PVKNQG CGSC
+Sbjct:  114 EFRATRLGAAQNCS------ATLTGNHRMRAAAVALPETKDWREDGIVSPVKNQGHCGSC 167
+
+Query:  147 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 206
+            W+FSTTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI
+Sbjct:  168 WTFSTTGALEAAYTQATGKPISLSEQQLVDCGFAFNNF--------GCNGGLPSQAFEYI 219
+
+Query:  207 IKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+              NGG+ TE SYPY    G  C F + N+G +    + N T+  ++E   A  +V   P+
+Sbjct:  220 KYNGGLDTEESYPYQGVNGI-CKFKNENVGVKVLDSV-NITLGAEDELKDAVGLVR--PV 275
+
+Query:  267 AIAADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWG 322
+            ++A + +  ++ Y  GV+        P  ++H +L VGY  ++      +PYW++KNSWG
+Sbjct:  276 SVAFEVITGFRLYKSGVYTSDHCGTTPMDVNHAVLAVGYGVED-----GVPYWLIKNSWG 330
+
+Query:  323 ADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            ADWG++GY  +  GKN CGV+   S  I+
+Sbjct:  331 ADWGDEGYFKMEMGKNMCGVATCASYPIV 359
+
+
+>sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II) (PPII)
+          Length = 352
+
+ Score =  192 bits (482), Expect = 1e-48
+ Identities = 128/319 (40%), Positives = 169/319 (52%), Gaps = 43/319 (13%)
+
+Query:  35  KFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNY 91
+            K NK Y S +E + RFEIF+ NL  I+E N      K +  +  G+N FADLS+DEFK  
+Sbjct:  54  KHNKIYESIDEKIYRFEIFRDNLMYIDETN------KKNNSYWLGLNGFADLSNDEFKKK 107
+
+Query:  92  YLNNKEAIFTDDLPVADYLDDE-FINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFS 150
+            Y+        +D    ++ D+E F          + DWR +GAVTPVKNQG CGSCW+FS
+Sbjct:  108 YVG----FVAEDFTGLEHFDNEDFTYKHVTNYPQSIDWRAKGAVTPVKNQGACGSCWAFS 163
+
+Query:  151 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 210
+            T   VEG + I    L+ LSEQ LVDCD              GC GG Q  +  Y + N 
+Sbjct:  164 TIATVEGINKIVTGNLLELSEQELVDCDKH----------SYGCKGGYQTTSLQY-VANN 212
+
+Query:  211 GIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKN-ETVMAGYIVSTGPLAIA 269
+pattern 237                           ****
+            G+ T   YPY A+   +C    A   P  + KI+ +  +P N ET   G + +  PL++ 
+Sbjct:  213 GVHTSKVYPYQAKQ-YKCR---ATDKPGPKVKITGYKRVPSNCETSFLGALANQ-PLSVL 267
+
+Query:  270 ADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 327
+             +A    +Q Y  GVFD PC    LDH +  VGY   +    KN  Y I+KNSWG +WGE
+Sbjct:  268 VEAGGKPFQLYKSGVFDGPCG-TKLDHAVTAVGYGTSD---GKN--YIIIKNSWGPNWGE 321
+
+Query:  328 QGYIYLRR----GKNTCGV 342
+            +GY+ L+R     + TCGV
+Sbjct:  322 KGYMRLKRQSGNSQGTCGV 340
+
+
+>sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  192 bits (482), Expect = 1e-48
+ Identities = 121/333 (36%), Positives = 173/333 (51%), Gaps = 38/333 (11%)
+
+Query:  25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            E+  F  +  +  K YS  EY  R ++F +N  KI+  N    NH    K G+N+F+D+S
+Sbjct:  29  EKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHN--QRNHTF--KMGLNQFSDMS 84
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRG-AVTPVKNQGQC 143
+              E K+ YL ++                 ++    P   ++ DWR +G  V+PVKNQG C
+Sbjct:  85  FAEIKHKYLWSEPQN-------CSATKSNYLRGTGPYP-SSMDWRKKGNVVSPVKNQGAC 136
+
+Query:  144 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 203
+            GSCW+FSTTG +E    I+  K+++L+EQ LVDC         +   + GC GGL   A+
+Sbjct:  137 GSCWTFSTTGALESAVAIASGKMMTLAEQQLVDC--------AQNFNNHGCQGGLPSQAF 188
+
+Query:  204 NYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ-AKISNFTMIPKN-ETVMAGYIV 261
+pattern 237                                  ****
+             YI+ N GI  E SYPY  + G QC FN     PE+  A + N   I  N E  M   + 
+Sbjct:  189 EYILYNKGIMGEDSYPYIGKNG-QCKFN-----PEKAVAFVKNVVNITLNDEAAMVEAVA 242
+
+Query:  262 STGPLAIAADAVE-WQFYIGGVFDI-PCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIV 317
+               P++ A +  E +  Y  GV+    C+  P+ ++H +L VGY  +N +      YWIV
+Sbjct:  243 LYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIV 297
+
+Query:  318 KNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+            KNSWG++WG  GY  + RGKN CG++   S  I
+Sbjct:  298 KNSWGSNWGNNGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR
+          Length = 328
+
+ Score =  190 bits (477), Expect = 5e-48
+ Identities = 114/304 (37%), Positives = 164/304 (53%), Gaps = 29/304 (9%)
+
+Query:  47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPV 106
+            ERF IFK NL  I+  N    N  A  K G+  FA+L++DE+++ YL  +       +  
+Sbjct:  27  ERFNIFKDNLRFIDLHN--ENNKNATYKLGLTIFANLTNDEYRSLYLGARTEPVRR-ITK 83
+
+Query:  107 ADYLDDEFINSIPPEE-QTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK 165
+            A  ++ ++  ++  +E     DWR +GAV  +K+QG CGSCW+FST   VEG + I   +
+Sbjct:  84  AKNVNMKYSAAVNVDEVPVTVDWRQKGAVNAIKDQGTCGSCWAFSTAAAVEGINKIVTGE 143
+
+Query:  166 LVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETG 225
+            LVSLSEQ LVDCD         ++ ++GCNGGL   A+ +I+KNGG+ TE  YPY    G
+Sbjct:  144 LVSLSEQELVDCD---------KSYNQGCNGGLMDYAFQFIMKNGGLNTEKDYPYHGTNG 194
+
+Query:  226 TQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVF 283
+pattern 237            ****
+             +CN    N        I  +  +P  +       VS  P+++A DA    +Q Y  G+F
+Sbjct:  195 -KCNSLLKN---SRVVTIDGYEDVPSKDETALKRAVSYQPVSVAIDAGGRAFQHYQSGIF 250
+
+Query:  284 DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNT 339
+               C  N +DH ++ VGY ++N      + YWIV+NSWG  WGE GYI + R        
+Sbjct:  251 TGKCGTN-MDHAVVAVGYGSEN-----GVDYWIVRNSWGTRWGEDGYIRMERNVASKSGK 304
+
+Query:  340 CGVS 343
+            CG++
+Sbjct:  305 CGIA 308
+
+
+>sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  188 bits (472), Expect = 2e-47
+ Identities = 123/332 (37%), Positives = 170/332 (51%), Gaps = 36/332 (10%)
+
+Query:  25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            E+  F  +  K  K YS EEY  R + F SN  KI   N    N     K  +N+F+D+S
+Sbjct:  31  EKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHN----NGNHTFKMALNQFSDMS 86
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGA-VTPVKNQGQC 143
+              E K+ YL ++          ++YL        PP    + DWR +G  V+PVKNQG C
+Sbjct:  87  FAEIKHKYLWSEPQ--NCSATKSNYLRGT--GPYPP----SVDWRKKGNFVSPVKNQGAC 138
+
+Query:  144 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 203
+            GSCW+FSTTG +E    I+  K++SL+EQ LVDC  +   Y        GC GGL   A+
+Sbjct:  139 GSCWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNY--------GCQGGLPSQAF 190
+
+Query:  204 NYIIKNGGIQTESSYPYTAETGTQCNFNSAN-IGPEEQAKISNFTMIPKNETVMAGYIVS 262
+pattern 237                                   ****
+             YI+ N GI  E +YPY  + G  C F     IG  +   ++N T+   +E  M   +  
+Sbjct:  191 EYILYNKGIMGEDTYPYQGKDG-YCKFQPGKAIGFVKD--VANITIY--DEEAMVEAVAL 245
+
+Query:  263 TGPLAIAADAV-EWQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVK 318
+              P++ A +   ++  Y  G++    C+  P+ ++H +L VGY  KN I     PYWIVK
+Sbjct:  246 YNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVNHAVLAVGYGEKNGI-----PYWIVK 300
+
+Query:  319 NSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+            NSWG  WG  GY  + RGKN CG++   S  I
+Sbjct:  301 NSWGPQWGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA) (PAPAYA PROTEINASE III)
+            (PPIII) (PAPAYA PEPTIDASE A)
+          Length = 348
+
+ Score =  187 bits (471), Expect = 2e-47
+ Identities = 121/319 (37%), Positives = 161/319 (49%), Gaps = 38/319 (11%)
+
+Query:  37  NKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNYYL 93
+            NK Y + +E L RFEIFK NL  I+E N      K +  +  G+N+FADLS+DEF   Y+
+Sbjct:  56  NKFYENVDEKLYRFEIFKDNLNYIDETN------KKNNSYWLGLNEFADLSNDEFNEKYV 109
+
+Query:  94  NNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTG 153
+             +       D  +    D+EFIN          DWR +GAVTPV++QG CGSCW+FS   
+Sbjct:  110 GS-----LIDATIEQSYDEEFINEDTVNLPENVDWRKKGAVTPVRHQGSCGSCWAFSAVA 164
+
+Query:  154 NVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ 213
+             VEG + I   KLV LSEQ LVDC+              GC GG  P A  Y+ KN GI 
+Sbjct:  165 TVEGINKIRTGKLVELSEQELVDCERR----------SHGCKGGYPPYALEYVAKN-GIH 213
+
+Query:  214 TESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV 273
+pattern 237                        ****
+              S YPY A+ GT C       GP    K S    +  N        ++  P+++  ++ 
+Sbjct:  214 LRSKYPYKAKQGT-CRAKQVG-GP--IVKTSGVGRVQPNNEGNLLNAIAKQPVSVVVESK 269
+
+Query:  274 --EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 331
+               +Q Y GG+F+ PC    +DH +  VGY            Y ++KNSWG  WGE+GYI
+Sbjct:  270 GRPFQLYKGGIFEGPCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGTAWGEKGYI 323
+
+Query:  332 YLRRGK-NTCGVSNFVSTS 349
+             ++R   N+ GV     +S
+Sbjct:  324 RIKRAPGNSPGVCGLYKSS 342
+
+
+>sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR
+          Length = 362
+
+ Score =  187 bits (471), Expect = 2e-47
+ Identities = 112/329 (34%), Positives = 170/329 (51%), Gaps = 33/329 (10%)
+
+Query:  28  QFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+            +F  F  +  K+Y    E   RF IF  +L  +   N   + ++     G+N+FAD+S +
+Sbjct:  61  RFARFAVRHGKRYGDAAEVQRRFRIFSESLELVRSTNRRGLPYR----LGINRFADMSWE 116
+
+Query:  87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSC 146
+            EF+   L   +         A    +  +   P   +T  DWR  G V+PVK+QG CGSC
+Sbjct:  117 EFQASRLGAAQNCS------ATLAGNHRMRDAPALPETK-DWREDGIVSPVKDQGHCGSC 169
+
+Query:  147 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 206
+            W FSTTG++E ++  +    VSLSEQ L DC      +        GC+GGL   A+ YI
+Sbjct:  170 WPFSTTGSLEARYTQATGPPVSLSEQQLADCATRYNNF--------GCSGGLPSQAFEYI 221
+
+Query:  207 IKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+              NGG+ TE +YPYT   G  C++   N G +    + N T++ ++E   A  +V   P+
+Sbjct:  222 KYNGGLDTEEAYPYTGVNGI-CHYKPENAGVKVLDSV-NITLVAEDELKNAVGLVR--PV 277
+
+Query:  267 AIAADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWG 322
+            ++A   +  ++ Y  GV+       +P  ++H +L VGY  +N      +PYW++KNSWG
+Sbjct:  278 SVAFQVINGFRMYKSGVYTSDHCGTSPMDVNHAVLAVGYGVEN-----GVPYWLIKNSWG 332
+
+Query:  323 ADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            ADWG+ GY  +  GKN CG++   S  I+
+Sbjct:  333 ADWGDNGYFTMEMGKNMCGIATCASYPIV 361
+
+
+>sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)
+          Length = 333
+
+ Score =  187 bits (469), Expect = 4e-47
+ Identities = 115/356 (32%), Positives = 184/356 (51%), Gaps = 30/356 (8%)
+
+Query:  3   VILLFVLAVFTVFVSSRGIPPEEQS--QFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+            +I +  LA+  + V S    P+     ++ E++ K  K Y+  E   +  +++ N   IE
+Sbjct:  1   MIAVLFLAILCLEVDSTAPTPDPSLDVEWNEWRTKHGKTYNMNEERLKRAVWEKNFKMIE 60
+
+Query:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN-NKEAIFTDDLPVADYLDDEFINSIP 119
+              N   +  + D    +N F DL++ EF        ++ I    +    + D +F+  +P
+Sbjct:  61  LHNWEYLEGRHDFTMAMNAFGDLTNIEFVKMMTGFQRQKIKKTHI----FQDHQFLY-VP 115
+
+Query:  120 PEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 179
+                   DWR  G VTPVKNQG C S W+FS TG++EGQ F    +L+ LSEQNL+DC  
+Sbjct:  116 KR----VDWRQLGYVTPVKNQGHCASSWAFSATGSLEGQMFRKTERLIPLSEQNLLDCMG 171
+
+Query:  180 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEE 239
+pattern 237                                                          ***
+              + +        GC+GG    A+ Y+  NGG+ TE SYPY  + G +C +++ N     
+Sbjct:  172 SNVTH--------GCSGGFMQYAFQYVKDNGGLATEESYPYRGQ-GRECRYHAEN----S 218
+
+Query:  240 QAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGI 296
+pattern 240 *
+             A + +F  IP +E  +   +   GP+++A DA    +QFY  G++  P C    L+H +
+Sbjct:  219 AANVRDFVQIPGSEEALMKAVAKVGPISVAVDASHGSFQFYGSGIYYEPQCKRVHLNHAV 278
+
+Query:  297 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 351
+            L+VGY  +      N  +W+VKNSWG +WG +GY+ L +   N CG++ + +  I+
+Sbjct:  279 LVVGYGFEGEESDGN-SFWLVKNSWGEEWGMKGYMKLAKDWSNHCGIATYSTYPIV 333
+
+
+>sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  186 bits (468), Expect = 5e-47
+ Identities = 124/343 (36%), Positives = 176/343 (51%), Gaps = 42/343 (12%)
+
+Query:  17  SSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFG 76
+            S+  +   E+  F  +  +  KKYS EEY  R ++F SN  KI   N  A NH    K G
+Sbjct:  23  SNLAVSSFEKLHFKSWMVQHQKKYSLEEYHHRLQVFVSNWRKINAHN--AGNHTF--KLG 78
+
+Query:  77  VNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGA-VT 135
+            +N+F+D+S DE ++ YL ++           +YL        PP    + DWR +G  V+
+Sbjct:  79  LNQFSDMSFDEIRHKYLWSEPQ--NCSATKGNYLRGT--GPYPP----SMDWRKKGNFVS 130
+
+Query:  136 PVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCN 195
+            PVKNQG CGSCW+FSTTG +E    I+  K++SL+EQ LVDC         +   + GC 
+Sbjct:  131 PVKNQGSCGSCWTFSTTGALESAVAIATGKMLSLAEQQLVDC--------AQNFNNHGCQ 182
+
+Query:  196 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ----AKISNFTMIPK 251
+pattern 237                                          ****
+            GGL   A+ YI  N GI  E +YPY  +    C F      P++       ++N TM   
+Sbjct:  183 GGLPSQAFEYIRYNKGIMGEDTYPYKGQ-DDHCKFQ-----PDKAIAFVKDVANITM--N 234
+
+Query:  252 NETVMAGYIVSTGPLAIAADAV-EWQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTI 307
+            +E  M   +    P++ A +   ++  Y  G++    C+  P+ ++H +L VGY  +N I
+Sbjct:  235 DEEAMVEAVALYNPVSFAFEVTNDFLMYRKGIYSSTSCHKTPDKVNHAVLAVGYGEENGI 294
+
+Query:  308 FRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+                 PYWIVKNSWG  WG  GY  + RGKN CG++   S  I
+Sbjct:  295 -----PYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR
+          Length = 362
+
+ Score =  185 bits (466), Expect = 9e-47
+ Identities = 111/329 (33%), Positives = 169/329 (50%), Gaps = 33/329 (10%)
+
+Query:  28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+            +F  F  ++ K Y S  E   RF IF  +L ++   N   + ++     G+N+F+D+S +
+Sbjct:  60  RFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYR----LGINRFSDMSWE 115
+
+Query:  87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSC 146
+            EF+   L   +         A    +  +       +T  DWR  G V+PVKNQ  CGSC
+Sbjct:  116 EFQATRLGAAQTCS------ATLAGNHLMRDAAALPETK-DWREDGIVSPVKNQAHCGSC 168
+
+Query:  147 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 206
+            W+FSTTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI
+Sbjct:  169 WTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYI 220
+
+Query:  207 IKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+              NGGI TE SYPY    G  C++ + N   +    + N T+  ++E   A  +V   P+
+Sbjct:  221 KYNGGIDTEESYPYKGVNGV-CHYKAENAAVQVLDSV-NITLNAEDELKNAVGLVR--PV 276
+
+Query:  267 AIAADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWG 322
+            ++A   ++ ++ Y  GV+        P+ ++H +L VGY  +N      +PYW++KNSWG
+Sbjct:  277 SVAFQVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVEN-----GVPYWLIKNSWG 331
+
+Query:  323 ADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            ADWG+ GY  +  GKN C ++   S  ++
+Sbjct:  332 ADWGDNGYFKMEMGKNMCAIATCASYPVV 360
+
+
+>sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEPSIN X) (CATHEPSIN O2)
+          Length = 329
+
+ Score =  185 bits (465), Expect = 1e-46
+ Identities = 123/350 (35%), Positives = 185/350 (52%), Gaps = 39/350 (11%)
+
+Query:  9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+            L V  + V S  + PEE   + +  ++    K+Y+++ + + R  I++ NL  I   NL 
+Sbjct:  4   LKVLLLPVVSFALYPEEILDTHWELWKKTHRKQYNNKVDEISRRLIWEKNLKYISIHNLE 63
+
+Query:  66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTA 125
+            A       +  +N   D++S+E        K       +P++    ++ +  IP  E  A
+Sbjct:  64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLK-------VPLSHSRSNDTLY-IPEWEGRA 115
+
+Query:  126 ---FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 182
+                D+R +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E  
+Sbjct:  116 PDSVDYRKKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE-- 173
+
+Query:  183 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAK 242
+pattern 237                                                       ****
+                    ++GC GG   NA+ Y+ KN GI +E +YPY  +    C +N       + AK
+Sbjct:  174 --------NDGCGGGYMTNAFQYVQKNRGIDSEDAYPYVGQE-ESCMYNPTG----KAAK 220
+
+Query:  243 ISNFTMIPK-NETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILI 298
+               +  IP+ NE  +   +   GP+++A DA    +QFY  GV +D  CN ++L+H +L 
+Sbjct:  221 CRGYREIPEGNEKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDESCNSDNLNHAVLA 280
+
+Query:  299 VGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+            VGY       +K   +WI+KNSWG +WG +GYI + R K N CG++N  S
+Sbjct:  281 VGYG-----IQKGNKHWIIKNSWGENWGNKGYILMARNKNNACGIANLAS 325
+
+
+>sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPAYA PEPTIDASE B) (GLYCYL
+            ENDOPEPTIDASE)
+          Length = 348
+
+ Score =  184 bits (462), Expect = 3e-46
+ Identities = 116/315 (36%), Positives = 162/315 (50%), Gaps = 37/315 (11%)
+
+Query:  35  KFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYL 93
+            K NK Y + +E L RFEIFK NL  I+E N +   +      G+N+F+DLS+DEFK  Y+
+Sbjct:  54  KHNKNYKNVDEKLYRFEIFKDNLKYIDERNKMINGYW----LGLNEFSDLSNDEFKEKYV 109
+
+Query:  94  NNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTG 153
+             +    +T+        D+EF+N    +   + DWR +GAVTPVK+QG C SCW+FST  
+Sbjct:  110 GSLPEDYTNQP-----YDEEFVNEDIVDLPESVDWRAKGAVTPVKHQGYCESCWAFSTVA 164
+
+Query:  154 NVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ 213
+             VEG + I    LV LSEQ LVDCD +            GCN G Q  +  Y+ +N GI 
+Sbjct:  165 TVEGINKIKTGNLVELSEQELVDCDKQ----------SYGCNRGYQSTSLQYVAQN-GIH 213
+
+Query:  214 TESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV 273
+pattern 237                        ****
+              + YPY A+  T C  N    GP  + K +    +  N        ++  P+++  ++ 
+Sbjct:  214 LRAKYPYIAKQQT-CRANQVG-GP--KVKTNGVGRVQSNNEGSLLNAIAHQPVSVVVESA 269
+
+Query:  274 --EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 331
+              ++Q Y GG+F+  C    +DH +  VGY            Y ++KNSWG  WGE GYI
+Sbjct:  270 GRDFQNYKGGIFEGSCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGPGWGENGYI 323
+
+Query:  332 YLRRGK----NTCGV 342
+             +RR        CGV
+Sbjct:  324 RIRRASGNSPGVCGV 338
+
+
+>sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR
+          Length = 373
+
+ Score =  183 bits (461), Expect = 3e-46
+ Identities = 125/349 (35%), Positives = 171/349 (48%), Gaps = 40/349 (11%)
+
+Query:  8   VLAVFTVFVSSRGIPPEEQSQFLE---------FQDKFNKKYSHEEYLERFEIFKSNLGK 58
+            VLAV  V + S  IP E++    E         +Q     +  H E   RF  FKSN   
+Sbjct:  17  VLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSNAHF 75
+
+Query:  59  IEELNLIAINHKADTKFGV--NKFADLSSDEFKNYYLNNKEAIFTDDLP-VADYLDDEF- 114
+            I      + N + D  + +  N+F D+   EF+  ++ +         P V  ++     
+Sbjct:  76  IH-----SHNKRGDHPYRLHLNRFGDMDQAEFRATFVGDLRRDTPSKPPSVPGFMYAALN 130
+
+Query:  115 INSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL 174
+            ++ +PP    + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLSEQ L
+Sbjct:  131 VSDLPP----SVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLSEQEL 186
+
+Query:  175 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSAN 234
+            +DCD          A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT CN   A 
+Sbjct:  187 IDCD---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGT-CNVARAA 236
+
+Query:  235 IGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSL 292
+pattern 237   ****
+                    I     +P N        V+  P+++A +A    + FY  GVF   C    L
+Sbjct:  237 QNSPVVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGECG-TEL 295
+
+Query:  293 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 341
+            DHG+ +VGY     +      YW VKNSWG  WGEQGYI + +     G
+Sbjct:  296 DHGVAVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASG 340
+
+
+>sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR
+          Length = 371
+
+ Score =  183 bits (460), Expect = 5e-46
+ Identities = 126/353 (35%), Positives = 170/353 (47%), Gaps = 48/353 (13%)
+
+Query:  8   VLAVFTVFVSSRGIPPEEQSQFLE---------FQDKFNKKYSHEEYLERFEIFKSNLGK 58
+            VLAV  V + S  IP E++    E         +Q     +  H E   RF  FKSN   
+Sbjct:  17  VLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSNAHF 75
+
+Query:  59  IEELNLIAINHKADTKFGV--NKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF-- 114
+            I      + N + D  + +  N+F D+   EF+  ++ +       D P        F  
+Sbjct:  76  IH-----SHNKRGDHPYRLHLNRFGDMDQAEFRATFVGDLRR----DTPAKPPSVPGFMY 126
+
+Query:  115 ----INSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLS 170
+                ++ +PP    + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLS
+Sbjct:  127 AALNVSDLPP----SVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLS 182
+
+Query:  171 EQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF 230
+            EQ L+DCD          A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT CN 
+Sbjct:  183 EQELIDCD---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGT-CNV 232
+
+Query:  231 NSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCN 288
+pattern 237       ****
+              A         I     +P N        V+  P+++A +A    + FY  GVF   C 
+Sbjct:  233 ARAAQNSPVVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGDCG 292
+
+Query:  289 PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 341
+               LDHG+ +VGY     +      YW VKNSWG  WGEQGYI + +     G
+Sbjct:  293 -TELDHGVAVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASG 340
+
+
+>sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)
+          Length = 329
+
+ Score =  183 bits (459), Expect = 6e-46
+ Identities = 119/348 (34%), Positives = 181/348 (51%), Gaps = 35/348 (10%)
+
+Query:  9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+            L V  + V S  + PEE   +Q+  ++  ++K+Y+ + + + R  I++ NL  I   NL 
+Sbjct:  4   LKVLLLPVVSFALHPEEILDTQWELWKKTYSKQYNSKVDEISRRLIWEKNLKHISIHNLE 63
+
+Query:  66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDE-FINSIPPEEQT 124
+            A       +  +N   D++S+E        K        P   + +D  +I         
+Sbjct:  64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLKVP------PSRSHSNDTLYIPDWEGRTPD 117
+
+Query:  125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 184
+            + D+R +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E    
+Sbjct:  118 SIDYRKKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE---- 173
+
+Query:  185 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKIS 244
+pattern 237                                                     ****
+                  + GC GG   NA+ Y+ +N GI +E +YPY  +    C +N       + AK  
+Sbjct:  174 ------NYGCGGGYMTNAFQYVQRNRGIDSEDAYPYVGQ-DESCMYNPTG----KAAKCR 222
+
+Query:  245 NFTMIPK-NETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVG 300
+             +  IP+ NE  +   +   GP+++A DA    +QFY  GV +D  C+ ++++H +L VG
+Sbjct:  223 GYREIPEGNEKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDENCSSDNVNHAVLAVG 282
+
+Query:  301 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+            Y       +K   +WI+KNSWG  WG +GYI + R K N CG++N  S
+Sbjct:  283 YG-----IQKGNKHWIIKNSWGESWGNKGYILMARNKNNACGIANLAS 325
+
+
+>sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR
+          Length = 379
+
+ Score =  182 bits (458), Expect = 8e-46
+ Identities = 110/322 (34%), Positives = 173/322 (53%), Gaps = 38/322 (11%)
+
+Query:  40  YSHEEYLERFEIFKSNLGKIEELNLIAINHKA--DTKFGVNKFADLSSDEFKNYYLNNKE 97
+            ++HEE  +R EIFK+N   I ++N    N K+    + G+NKFAD++  EF   YL   +
+Sbjct:  56  HNHEEEAKRLEIFKNNSNYIRDMNA---NRKSPHSHRLGLNKFADITPQEFSKKYLQAPK 112
+
+Query:  98  AIFTDDLPVAD--YLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNV 155
+             + +  + +A+     +++    PP    ++DWR +G +T VK QG CG  W+FS TG +
+Sbjct:  113 DV-SQQIKMANKKMKKEQYSCDHPP---ASWDWRKKGVITQVKYQGGCGRGWAFSATGAI 168
+
+Query:  156 EGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTE 215
+            E  H I+   LVSLSEQ LVDC  E           EG   G Q  ++ +++++GGI T+
+Sbjct:  169 EAAHAIATGDLVSLSEQELVDCVEE----------SEGSYNGWQYQSFEWVLEHGGIATD 218
+
+Query:  216 SSYPYTAETGTQCNFN----SANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAAD 271
+pattern 237                          ****
+              YPY A+ G +C  N       I   E   +S+ +   + E      I+   P++++ D
+Sbjct:  219 DDYPYRAKEG-RCKANKIQDKVTIDGYETLIMSDESTESETEQAFLSAILEQ-PISVSID 276
+
+Query:  272 AVEWQFYIGGVFDIP--CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQG 329
+            A ++  Y GG++D     +P  ++H +L+VGY + +      + YWI KNSWG DWGE G
+Sbjct:  277 AKDFHLYTGGIYDGENCTSPYGINHFVLLVGYGSAD-----GVDYWIAKNSWGFDWGEDG 331
+
+Query:  330 YIYLRRGK----NTCGVSNFVS 347
+            YI+++R        CG++ F S
+Sbjct:  332 YIWIQRNTGNLLGVCGMNYFAS 353
+
+
+>sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  180 bits (451), Expect = 5e-45
+ Identities = 115/332 (34%), Positives = 166/332 (49%), Gaps = 36/332 (10%)
+
+Query:  25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            E+  F  +  +  K YS  EY  R ++F +N  KI+  N    NH    K  +N+F+D+S
+Sbjct:  29  EKFHFKSWMKQHQKTYSSVEYNHRLQMFANNWRKIQAHN--QRNHTF--KMALNQFSDMS 84
+
+Query:  85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRG-AVTPVKNQGQC 143
+              E K+ +L ++                 ++    P   ++ DWR +G  V+PVKNQG C
+Sbjct:  85  FAEIKHKFLWSEPQN-------CSATKSNYLRGTGPYP-SSMDWRKKGNVVSPVKNQGAC 136
+
+Query:  144 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 203
+             SCW+FSTTG +E    I+  K++SL+EQ LVDC         +   + GC GGL   A+
+Sbjct:  137 ASCWTFSTTGALESAVAIASGKMLSLAEQQLVDC--------AQAFNNHGCKGGLPSQAF 188
+
+Query:  204 NYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKN-ETVMAGYIVS 262
+pattern 237                                  ****
+             YI+ N GI  E SYPY  +  + C FN      +  A + N   I  N E  M   +  
+Sbjct:  189 EYILYNKGIMEEDSYPYIGK-DSSCRFNP----QKAVAFVKNVVNITLNDEAAMVEAVAL 243
+
+Query:  263 TGPLAIAADAVE-WQFYIGGVFDIPC---NPNSLDHGILIVGYSAKNTIFRKNMPYWIVK 318
+              P++ A +  E +  Y  GV+        P+ ++H +L VGY  +N +      YWIVK
+Sbjct:  244 YNPVSFAFEVTEDFLMYKSGVYSSKSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIVK 298
+
+Query:  319 NSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+            NSWG+ WGE GY  + RGKN CG++   S  I
+Sbjct:  299 NSWGSQWGENGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR
+          Length = 329
+
+ Score =  178 bits (447), Expect = 2e-44
+ Identities = 117/352 (33%), Positives = 182/352 (51%), Gaps = 43/352 (12%)
+
+Query:  9   LAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+            L V  + + S  + PEE   +Q+  ++    K+Y+ + + + R  I++ NL +I   NL 
+Sbjct:  4   LKVLLLPMVSFALSPEEMLDTQWELWKKTHQKQYNSKVDEISRRLIWEKNLKQISAHNLE 63
+
+Query:  66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD-----EFINSIPP 120
+            A       +  +N   D++S+E        +        P   Y +D     E+   +P 
+Sbjct:  64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLRIP------PSRSYSNDTLYTPEWEGRVPD 117
+
+Query:  121 EEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 180
+                + D+R +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E
+Sbjct:  118 ----SIDYRKKGYVTPVKNQGQCGSCWAFSSAGALEGQLKKKTGKLLALSPQNLVDCVTE 173
+
+Query:  181 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ 240
+pattern 237                                                         ****
+                      + GC GG    A+ Y+ +NGGI +E ++PY  +    C +N+      + 
+Sbjct:  174 ----------NYGCGGGYMTTAFQYVQQNGGIDSEDAFPYVGQ-DESCMYNAT----AKA 218
+
+Query:  241 AKISNFTMIP-KNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGI 296
+            AK   +  IP  NE  +   +   GP++++ DA    +QFY  GV +D  C+ ++++H +
+Sbjct:  219 AKCRGYREIPVGNEKALKRAVARVGPISVSIDASLASFQFYSRGVYYDENCDRDNVNHAV 278
+
+Query:  297 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+            L+VGY       +K   +WI+KNSWG  WG +GY  L R K N CG++N  S
+Sbjct:  279 LVVGYGT-----QKGSKHWIIKNSWGESWGNKGYALLARNKNNACGITNMAS 325
+
+
+>sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 376
+
+ Score =  177 bits (445), Expect = 3e-44
+ Identities = 112/351 (31%), Positives = 171/351 (47%), Gaps = 47/351 (13%)
+
+Query:  22  PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+            P E +  F  FQ +FN+ Y S EE+  R +IF  NL + + L    +      +FGV  F
+Sbjct:  35  PLELKEAFKLFQIQFNRSYLSPEEHAHRLDIFAHNLAQAQRLQEEDLG---TAEFGVTPF 91
+
+Query:  81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAF--DWR-TRGAVTPV 137
+            +DL+ +EF   Y   + A     +          I S  PEE   F  DWR   GA++P+
+Sbjct:  92  SDLTEEEFGQLYGYRRAAGGVPSM-------GREIRSEEPEESVPFSCDWRKVAGAISPI 144
+
+Query:  138 KNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGG 197
+            K+Q  C  CW+ +  GN+E    IS    V +S   L+DC            C +GC+GG
+Sbjct:  145 KDQKNCNCCWAMAAAGNIETLWRISFWDFVDVSVHELLDCGR----------CGDGCHGG 194
+
+Query:  198 LQPNAYNYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGPEEQAKISNFTMIPKNETVM 256
+pattern 237                                         ****
+               +A+  ++ N G+ +E  YP+  +    +C+        ++ A I +F M+  NE  +
+Sbjct:  195 FVWDAFITVLNNSGLASEKDYPFQGKVRAHRCHPKKY----QKVAWIQDFIMLQNNEHRI 250
+
+Query:  257 AGYIVSTGPLAIAADAVEWQFYIGGVFDIP---CNPNSLDHGILIVGYSA--------KN 305
+            A Y+ + GP+ +  +    Q Y  GV       C+P  +DH +L+VG+ +          
+Sbjct:  251 AQYLATYGPITVTINMKPLQLYRKGVIKATPTTCDPQLVDHSVLLVGFGSVKSEEGIWAE 310
+
+Query:  306 TIFRKNMP-------YWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 349
+            T+  ++ P       YWI+KNSWGA WGE+GY  L RG NTCG++ F  T+
+Sbjct:  311 TVSSQSQPQPPHPTPYWILKNSWGAQWGEKGYFRLHRGSNTCGITKFPLTA 361
+
+
+>sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 371
+
+ Score =  176 bits (442), Expect = 6e-44
+ Identities = 110/346 (31%), Positives = 166/346 (47%), Gaps = 40/346 (11%)
+
+Query:  22  PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+            P E +  F  FQ +FN+ Y +  EY  R  IF  NL + + L    +      +FG   F
+Sbjct:  33  PLELKEVFKLFQIRFNRSYWNPAEYTRRLSIFAHNLAQAQRLQQEDLG---TAEFGETPF 89
+
+Query:  81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWR-TRGAVTPVKN 139
+            +DL+ +EF   Y   +    T ++       + +  S+P       DWR  +  ++ VKN
+Sbjct:  90  SDLTEEEFGQLYGQERSPERTPNM-TKKVESNTWGESVP----RTCDWRKAKNIISSVKN 144
+
+Query:  140 QGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQ 199
+            QG C  CW+ +   N++    I   + V +S Q L+DC          E C  GCNGG  
+Sbjct:  145 QGSCKCCWAMAAADNIQALWRIKHQQFVDVSVQELLDC----------ERCGNGCNGGFV 194
+
+Query:  200 PNAYNYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGPEEQAKISNFTMIPKNETVMAG 258
+pattern 237                                       ****
+             +AY  ++ N G+ +E  YP+  +    +C         ++ A I +FTM+  NE  +A 
+Sbjct:  195 WDAYLTVLNNSGLASEKDYPFQGDRKPHRCLAKKY----KKVAWIQDFTMLSNNEQAIAH 250
+
+Query:  259 YIVSTGPLAIAADAVEWQFYIGGVFDIP---CNPNSLDHGILIVGYSAKN------TIF- 308
+            Y+   GP+ +  +    Q Y  GV       C+P  +DH +L+VG+  K       T+  
+Sbjct:  251 YLAVHGPITVTINMKLLQHYQKGVIKATPSSCDPRQVDHSVLLVGFGKKKEGMQTGTVLS 310
+
+Query:  309 -----RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 349
+                 R + PYWI+KNSWGA WGE+GY  L RG NTCGV+ +  T+
+Sbjct:  311 HSRKRRHSSPYWILKNSWGAHWGEKGYFRLYRGNNTCGVTKYPFTA 356
+
+
+>sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR
+          Length = 321
+
+ Score =  173 bits (435), Expect = 4e-43
+ Identities = 100/304 (32%), Positives = 152/304 (49%), Gaps = 30/304 (9%)
+
+Query:  52  FKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLD 111
+            F+ +L +   LN +  +  +   +G+N+F+ L  +EFK  YL +K + F           
+Sbjct:  44  FRESLNRHRYLNSLFPSENSTAFYGINQFSYLFPEEFKAIYLRSKPSKFPR-------YS 96
+
+Query:  112 DEFINSIPPEE-QTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLS 170
+             E   SIP       FDWR +  VT V+NQ  CG CW+FS  G VE  + I    L  LS
+Sbjct:  97  AEVHMSIPNVSLPLRFDWRDKQVVTQVRNQQMCGGCWAFSVVGAVESAYAIKGKPLEDLS 156
+
+Query:  171 EQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK-NGGIQTESSYPYTAETGTQCN 229
+             Q ++DC +           + GCNGG   NA N++ K    +  +S YP+ A+ G  C+
+Sbjct:  157 VQQVIDCSYN----------NYGCNGGSTLNALNWLNKMQVKLVKDSEYPFKAQNGL-CH 205
+
+Query:  230 FNSANIGPEEQAKISNFTM--IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPC 287
+pattern 237        ****
+            + S   G      I  ++       E  MA  +++ GPL +  DAV WQ Y+GG+    C
+Sbjct:  206 YFS---GSHSGFSIKGYSAYDFSDQEDEMAKALLTFGPLVVIVDAVSWQDYLGGIIQHHC 262
+
+Query:  288 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVS 347
+            +    +H +LI G+         + PYWIV+NSWG+ WG  GY +++ G N CG+++ VS
+Sbjct:  263 SSGEANHAVLITGFDKTG-----STPYWIVRNSWGSSWGVDGYAHVKMGSNVCGIADSVS 317
+
+Query:  348 TSII 351
+            +  +
+Sbjct:  318 SIFV 321
+
+
+>sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)
+          Length = 345
+
+ Score =  173 bits (433), Expect = 7e-43
+ Identities = 119/322 (36%), Positives = 163/322 (49%), Gaps = 43/322 (13%)
+
+Query:  35  KFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNY 91
+            K NK Y + +E + RFEIFK NL  I+E N      K +  +  G+N FAD+S+DEFK  
+Sbjct:  54  KHNKIYKNIDEKIYRFEIFKDNLKYIDETN------KKNNSYWLGLNVFADMSNDEFKEK 107
+
+Query:  92  YLNNKEAIFTD-DLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFS 150
+            Y  +    +T  +L   + L+D  +N   PE     DWR +GAVTPVKNQG CGSCW+FS
+Sbjct:  108 YTGSIAGNYTTTELSYEEVLNDGDVNI--PEY---VDWRQKGAVTPVKNQGSCGSCWAFS 162
+
+Query:  151 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 210
+                +EG   I    L   SEQ L+DCD              GCNGG   +A   ++   
+Sbjct:  163 AVVTIEGIIKIRTGNLNEYSEQELLDCDRR----------SYGCNGGYPWSALQ-LVAQY 211
+
+Query:  211 GIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAA 270
+pattern 237                           ****
+            GI   ++YPY    G Q    S   GP          + P NE  +  Y ++  P+++  
+Sbjct:  212 GIHYRNTYPY---EGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALL-YSIANQPVSVVL 267
+
+Query:  271 DAV--EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQ 328
+            +A   ++Q Y GG+F  PC  N +DH +  VGY            Y ++KNSWG  WGE 
+Sbjct:  268 EAAGKDFQLYRGGIFVGPCG-NKVDHAVAAVGYGPN---------YILIKNSWGTGWGEN 317
+
+Query:  329 GYIYLRRGK-NTCGVSNFVSTS 349
+            GYI ++RG  N+ GV    ++S
+Sbjct:  318 GYIRIKRGTGNSYGVCGLYTSS 339
+
+
+>sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR
+          Length = 331
+
+ Score =  171 bits (428), Expect = 3e-42
+ Identities = 116/351 (33%), Positives = 175/351 (49%), Gaps = 35/351 (9%)
+
+Query:  5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYS--HEEYLERFEIFKSNLGKIEEL 62
+            L+ VL V +  V+     P     +  ++  + K+Y   +EE + R  I++ NL  +   
+Sbjct:  4   LVCVLLVCSSAVAQLHKDPTLDHHWHLWKKTYGKQYKEKNEEAVRRL-IWEKNLKFVMLH 62
+
+Query:  63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEE 122
+            NL           G+N   D++S+E  +          T  L V             P  
+Sbjct:  63  NLEHSMGMHSYDLGMNHLGDMTSEEVMS---------LTSSLRVPSQWQRNITYKSNPNR 113
+
+Query:  123 --QTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 180
+                + DWR +G VT VK QG CG+CW+FS  G +E Q  +   KLV+LS QNLVDC   
+Sbjct:  114 ILPDSVDWREKGCVTEVKYQGSCGACWAFSAVGALEAQLKLKTGKLVTLSAQNLVDC--- 170
+
+Query:  181 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQ 240
+pattern 237                                                         ****
+                  E+  ++GCNGG    A+ YII N GI +++SYPY A    +C ++S        
+Sbjct:  171 ----STEKYGNKGCNGGFMTTAFQYIIDNKGIDSDASYPYKA-MDQKCQYDS----KYRA 221
+
+Query:  241 AKISNFTMIP-KNETVMAGYIVSTGPLAIAADAVEWQFYI--GGVFDIPCNPNSLDHGIL 297
+            A  S +T +P   E V+   + + GP+++  DA    F++   GV+  P    +++HG+L
+Sbjct:  222 ATCSKYTELPYGREDVLKEAVANKGPVSVGVDARHPSFFLYRSGVYYEPSCTQNVNHGVL 281
+
+Query:  298 IVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+            +VGY   N        YW+VKNSWG ++GE+GYI + R K N CG+++F S
+Sbjct:  282 VVGYGDLN-----GKEYWLVKNSWGHNFGEEGYIRMARNKGNHCGIASFPS 327
+
+
+>sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L
+          Length = 176
+
+ Score =  167 bits (420), Expect = 2e-41
+ Identities = 87/179 (48%), Positives = 115/179 (63%), Gaps = 16/179 (8%)
+
+Query:  127 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 186
+            DWR +G VTPVK+QGQCGSCW+FSTTG +EGQHF ++ KLVSLSEQNLVDC       EG
+Sbjct:  6   DWREKGYVTPVKDQGQCGSCWAFSTTGALEGQHFRTKGKLVSLSEQNLVDCSRP----EG 61
+
+Query:  187 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNF 246
+pattern 237                                                   ****
+                ++GCNGGL   A+ Y+  NGGI +E SYPYTA+    C + +        A  + F
+Sbjct:  62  ----NQGCNGGLMDQAFQYVQDNGGIDSEESYPYTAKDDEDCRYKA----EYNAANDTGF 113
+
+Query:  247 TMIPK-NETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGY 301
+              IP+ +E  +   + S GP+++A DA    +QFY  G++  P C+   LDHG+L+VGY
+Sbjct:  114 VDIPQGHERALMKAVASVGPVSVAIDAGHSSFQFYQSGIYYEPDCSSEDLDHGVLVVGY 172
+
+
+>sp|P25326|CATS_BOVIN CATHEPSIN S
+          Length = 217
+
+ Score =  165 bits (413), Expect = 1e-40
+ Identities = 90/227 (39%), Positives = 129/227 (56%), Gaps = 21/227 (9%)
+
+Query:  125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 184
+            + DWR +G VT VK QG CGSCW+FS  G +E Q  +   KLVSLS QNLVDC       
+Sbjct:  4   SMDWREKGCVTEVKYQGACGSCWAFSAVGALEAQVKLKTGKLVSLSAQNLVDC------- 56
+
+Query:  185 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKIS 244
+pattern 237                                                     ****
+               +  ++GCNGG    A+ YII N GI +E+SYPY A  G +C ++  N      A  S
+Sbjct:  57  STAKYGNKGCNGGFMTEAFQYIIDNNGIDSEASYPYKAMDG-KCQYDVKN----RAATCS 111
+
+Query:  245 NFTMIP-KNETVMAGYIVSTGPLAIAADAVEWQFYI--GGVFDIPCNPNSLDHGILIVGY 301
+             +  +P  +E  +   + + GP+++  DA    F++   GV+  P    +++HG+L+VGY
+Sbjct:  112 RYIELPFGSEEALKEAVANKGPVSVGIDASHSSFFLYKTGVYYDPSCTQNVNHGVLVVGY 171
+
+Query:  302 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 347
+               +        YW+VKNSWG  +G+QGYI + R   N CG++N+ S
+Sbjct:  172 GNLD-----GKDYWLVKNSWGLHFGDQGYIRMARNSGNHCGIANYPS 213
+
+
+>sp|P80884|ANAN_ANACO ANANAIN
+          Length = 216
+
+ Score =  161 bits (403), Expect = 2e-39
+ Identities = 93/224 (41%), Positives = 123/224 (54%), Gaps = 26/224 (11%)
+
+Query:  125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 184
+            + DWR  GAVT VKNQG+CGSCW+F++   VE  + I +  LVSLSEQ ++DC       
+Sbjct:  4   SIDWRDSGAVTSVKNQGRCGSCWAFASIATVESIYKIKRGNLVSLSEQQVLDC------- 56
+
+Query:  185 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKIS 244
+pattern 237                                                     ****
+                A   GC GG    AY++II N G+ + + YPY A  GT C  N    G    A I+
+Sbjct:  57  ----AVSYGCKGGWINKAYSFIISNKGVASAAIYPYKAAKGT-CKTN----GVPNSAYIT 107
+
+Query:  245 NFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSA 303
+             +T + +N      Y VS  P+A A DA   +Q Y  GVF  PC    L+H I+I+GY  
+Sbjct:  108 RYTYVQRNNERNMMYAVSNQPIAAALDASGNFQHYKRGVFTGPCG-TRLNHAIVIIGYGQ 166
+
+Query:  304 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT----CGVS 343
+             +        +WIV+NSWGA WGE GYI L R  ++    CG++
+Sbjct:  167 DSA----GKKFWIVRNSWGAGWGEGGYIRLARDVSSSFGICGIA 206
+
+
+>sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR
+          Length = 330
+
+ Score =  158 bits (396), Expect = 1e-38
+ Identities = 89/226 (39%), Positives = 128/226 (56%), Gaps = 22/226 (9%)
+
+Query:  127 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 186
+            DWR +G VT VK QG CGSCW+FS  G +EGQ  +   KLVSLS QNLVDC  E      
+Sbjct:  118 DWREKGCVTNVKYQGSCGSCWAFSAEGALEGQLKLKTGKLVSLSAQNLVDCSTE------ 171
+
+Query:  187 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNF 246
+pattern 237                                                   ****
+            E+  ++GC GG    A+ YII +  I +E+SYPY A    +C ++  N      A  S +
+Sbjct:  172 EKYGNKGCGGGFMTEAFQYII-DTSIDSEASYPYKA-MDEKCLYDPKN----RAATCSRY 225
+
+Query:  247 TMIP-KNETVMAGYIVSTGPLAIAADAV---EWQFYIGGVFDIPCNPNSLDHGILIVGYS 302
+              +P  +E  +   + + GP+++  D      +  Y  GV+D P    +++HG+L+VGY 
+Sbjct:  226 IELPFGDEEALKEAVATKGPVSVGIDDASHSSFFLYQSGVYDDPSCTENMNHGVLVVGYG 285
+
+Query:  303 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYL-RRGKNTCGVSNFVS 347
+              +        YW+VKNSWG  +G+QGYI + R  KN CG++++ S
+Sbjct:  286 TLD-----GKDYWLVKNSWGLHFGDQGYIRMARNNKNHCGIASYCS 326
+
+
+>sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE PRECURSOR
+          Length = 346
+
+ Score =  158 bits (395), Expect = 2e-38
+ Identities = 87/238 (36%), Positives = 130/238 (54%), Gaps = 25/238 (10%)
+
+Query:  112 DEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSE 171
+            D ++  +      + DWR +G +  VK+QG CGSCW+FS    +E  + I    L+SLSE
+Sbjct:  8   DRYLPKVGDSLPESIDWREKGVLVGVKDQGSCGSCWAFSAVAAMESINAIVTGNLISLSE 67
+
+Query:  172 QNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFN 231
+            Q LVDCD          + +EGC+GGL   A+ ++IKNGGI TE  YPY    G  C+  
+Sbjct:  68  QELVDCD---------RSYNEGCDGGLMDYAFEFVIKNGGIDTEEDYPYKERNGV-CDQY 117
+
+Query:  232 SANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNP 289
+pattern 237      ****
+              N    +  KI ++  +P N        V+  P++IA +A   ++Q Y  G+F   C  
+Sbjct:  118 RKN---AKVVKIDSYEDVPVNNEKALQKAVAHQPVSIALEAGGRDFQHYKSGIFTGKCG- 173
+
+Query:  290 NSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT----CGVS 343
+             ++DHG++I GY  +N      M YWIV+NSWGA+  E GY+ ++R  ++    CG++
+Sbjct:  174 TAVDHGVVIAGYGTEN-----GMDYWIVRNSWGANCRENGYLRVQRNVSSSSGLCGLA 226
+
+
+>sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR
+          Length = 308
+
+ Score =  152 bits (379), Expect = 1e-36
+ Identities = 105/320 (32%), Positives = 151/320 (46%), Gaps = 48/320 (15%)
+
+Query:  29  FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+            F ++    NK +++  EYL RF +F  N   +E          A+    +N FAD++ +E
+Sbjct:  18  FKQWAATHNKVFANRAEYLYRFAVFLDNKKFVE----------ANANTELNVFADMTHEE 67
+
+Query:  88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCW 147
+            F   +L       T ++P         + + P     + DWR+   + P K+QGQCGSCW
+Sbjct:  68  FIQTHLG-----MTYEVPETTSNVKAAVKAAPE----SVDWRS--IMNPAKDQGQCGSCW 116
+
+Query:  148 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 207
+            +F TT  +EG+      KL S SEQ LVDCD          A D GC GG   N+  +I 
+Sbjct:  117 TFCTTAVLEGRVNKDLGKLYSFSEQQLVDCD----------ASDNGCEGGHPSNSLKFIQ 166
+
+Query:  208 KNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLA 267
+pattern 237                              ****
+            +N G+  ES YPY A  GT C     N+     ++     +   +ET +   I   GP+A
+Sbjct:  167 ENNGLGLESDYPYKAVAGT-CK-KVKNVATVTGSR----RVTDGSETGLQTIIAENGPVA 220
+
+Query:  268 IAADA--VEWQFYIGGVF--DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGA 323
+            +  DA    +Q Y  G    D  C    ++H +  VGY + +     N  YWI++NSWG 
+Sbjct:  221 VGMDASRPSFQLYKKGTIYSDTKCRSRMMNHCVTAVGYGSNS-----NGKYWIIRNSWGT 275
+
+Query:  324 DWGEQGYIYLRR-GKNTCGV 342
+             WG+ GY  L R   N CG+
+Sbjct:  276 SWGDAGYFLLARDSNNMCGI 295
+
+
+>sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 315
+
+ Score =  150 bits (375), Expect = 4e-36
+ Identities = 103/317 (32%), Positives = 163/317 (50%), Gaps = 47/317 (14%)
+
+Query:  37  NKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADT-KFGVN-KFADLSSDEFKNYYLN 94
+            NK ++  E L R  IF  N        ++A N++ +T K  V+  FA ++++E+ +    
+Sbjct:  24  NKHFTAVESLRRRAIFNMNA------RIVAENNRKETFKLSVDGPFAAMTNEEYNSLLKL 77
+
+Query:  95  NKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGN 154
+             +      ++         ++N   P+   A DWR +G VTP+++QG CGSC++F +   
+Sbjct:  78  KRSGEEKGEV--------RYLNIQAPK---AVDWRKKGKVTPIRDQGNCGSCYTFGSIAA 126
+
+Query:  155 VEGQHFISQ---NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 211
+            +EG+  I +   ++ + LSE+++V C  E    +G    + GCNGGL  N YNYI++N G
+Sbjct:  127 LEGRLLIEKGGDSETLDLSEEHMVQCTRE----DG----NNGCNGGLGSNVYNYIMEN-G 177
+
+Query:  212 IQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAAD 271
+pattern 237                          ****
+            I  ES YPYT    T           +  AKI ++  + +N  V     +S G + ++ D
+Sbjct:  178 IAKESDYPYTGSDST------CRSDVKAFAKIKSYNRVARNNEVELKAAISQGLVDVSID 231
+
+Query:  272 A--VEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 326
+            A  V++Q Y  G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WG
+Sbjct:  232 ASSVQFQLYKSGAYTDTQCKNNYFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWG 286
+
+Query:  327 EQGYIYLRRGKNTCGVS 343
+            E+GYI +    NTCGV+
+Sbjct:  287 EKGYINMVIEGNTCGVA 303
+
+
+>sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR
+          Length = 395
+
+ Score =  150 bits (374), Expect = 6e-36
+ Identities = 101/331 (30%), Positives = 157/331 (46%), Gaps = 29/331 (8%)
+
+Query:  26  QSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+            ++++ ++     K Y  +E   R  IF+SN    E +N             +N  ADL+ 
+Sbjct:  88  ETEWKDYVTALGKHYDQKENNFRMAIFESNELMTERINKKYEQGLVSYTTALNDLADLTD 147
+
+Query:  86  DEF--KNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQC 143
+            +EF  +N      +         +++   +    +P +     DWRT+GAVTPV+NQG+C
+Sbjct:  148 EEFMVRNGLRLPNQTDLRGKRQTSEFYRYDKSERLPDQ----VDWRTKGAVTPVRNQGEC 203
+
+Query:  144 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 203
+            GSC++F+T   +E  H     +L+ LS QN+VDC             + GC+GG  P A+
+Sbjct:  204 GSCYAFATAAALEAYHKQMTGRLLDLSPQNIVDCT--------RNLGNNGCSGGYMPTAF 255
+
+Query:  204 NYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMI-PKNETVMAGYIVS 262
+pattern 237                                  ****
+             Y  +  GI  ES YPY   T  +C +  +     +    + F  I P +E  +   +  
+Sbjct:  256 QYASRY-GIAMESRYPYVG-TEQRCRWQQSIAVVTD----NGFNEIQPGDELALKHAVAK 309
+
+Query:  263 TGP--LAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNS 320
+             GP  + I+     ++FY  GV+    N    DH +L VGY    +       YWIVKNS
+Sbjct:  310 RGPVVVGISGSKRSFRFYKDGVYS-EGNCGRPDHAVLAVGYGTHPSY----GDYWIVKNS 364
+
+Query:  321 WGADWGEQGYIYLRRGK-NTCGVSNFVSTSI 350
+            WG DWG+ GY+Y+ R + N C +++  S  I
+Sbjct:  365 WGTDWGKDGYVYMARNRGNMCHIASAASFPI 395
+
+
+>sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR
+          Length = 506
+
+ Score =  150 bits (374), Expect = 6e-36
+ Identities = 116/363 (31%), Positives = 180/363 (48%), Gaps = 64/363 (17%)
+
+Query:  27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+            S+F ++  + NKKY + +E L+RFE FK    K ++ N +   +       VN+++D S 
+Sbjct:  160 SKFFKYMKENNKKYENMDEQLQRFENFKIRYMKTQKHNEMVGKNGLTYVQKVNQYSDFSK 219
+
+Query:  86  DEFKNYYLNNKEAIFTDDL------PVADYLDDEFINSIPPEEQT---AFDWRTRGAVTP 136
+            +EF NY+   K      DL      P+  +L +  + S+  + +    + D+R++    P
+Sbjct:  220 EEFDNYF--KKLLSVPMDLKSKYIVPLKKHLANTNLISVDNKSKDFPDSRDYRSKFNFLP 277
+
+Query:  137 VKNQGQCGSCWSFSTTGNVEGQHFISQNKL-VSLSEQNLVDCDHECMEYEGEEACDEGCN 195
+             K+QG CGSCW+F+  GN E  +  +++++ +S SEQ +VDC  E          + GC+
+Sbjct:  278 PKDQGNCGSCWAFAAIGNFEYLYVHTRHEMPISFSEQQMVDCSTE----------NYGCD 327
+
+Query:  196 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQC-NFNSANIGPEEQAKISNFTMIPKNET 254
+pattern 237                                           ****
+            GG    A+ Y+I NG +     YPY       C N+  + +G     ++     +  NE 
+Sbjct:  328 GGNPFYAFLYMINNG-VCLGDEYPYKGHEDFFCLNYRCSLLG-----RVHFIGDVKPNEL 381
+
+Query:  255 VMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSA---------- 303
+            +MA   V  GP+ IA  A E +  Y GGVFD  CNP  L+H +L+VGY            
+Sbjct:  382 IMALNYV--GPVTIAVGASEDFVLYSGGVFDGECNPE-LNHSVLLVGYGQVKKSLAFEDS 438
+
+Query:  304 -----KNTI--FRKNMP---------YWIVKNSWGADWGEQGYIYLRRGK----NTCGVS 343
+                  N I  +++N+          YWIV+NSWG +WGE GYI ++R K      CGV 
+Sbjct:  439 HSNVDSNLIKKYKENIKGDDDDDIIYYWIVRNSWGPNWGEGGYIRIKRNKAGDDGFCGVG 498
+
+Query:  344 NFV 346
+            + V
+Sbjct:  499 SDV 501
+
+
+>sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE PROTEINASE ACP3)
+          Length = 308
+
+ Score =  149 bits (372), Expect = 9e-36
+ Identities = 103/316 (32%), Positives = 159/316 (49%), Gaps = 45/316 (14%)
+
+Query:  37  NKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDEFKNYYLNN 95
+            NK ++  E L R  IF  N   + E N      K   K  V+  FA ++++E++   L +
+Sbjct:  17  NKHFTAVEALRRRAIFNMNARFVAEFN-----KKGSFKLSVDGPFAAMTNEEYRTL-LKS 70
+
+Query:  96  KEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNV 155
+            K  +  +           ++N   PE   + DWR +G VTP+++Q QCGSC++F +   +
+Sbjct:  71  KRTVEENGKVT-------YLNIQAPE---SVDWRAQGKVTPIRDQAQCGSCYTFGSLAAL 120
+
+Query:  156 EGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGI 212
+            EG+  I +      + LSE++LV C          +  + GCNGGL  N Y+YII+N G+
+Sbjct:  121 EGRLLIEKGGNANTLDLSEEHLVQCT--------RDNGNNGCNGGLGSNVYDYIIQN-GV 171
+
+Query:  213 QTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADA 272
+pattern 237                         ****
+              ES YPYT  T + C  N      +  AKI+ +  +P+N        +S G + ++ DA
+Sbjct:  172 AKESDYPYTG-TDSTCKTN-----VKAFAKITGYNKVPRNNEAELKAALSQGLVDVSIDA 225
+
+Query:  273 --VEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 327
+               ++Q Y  G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WG+
+Sbjct:  226 SSAKFQLYKSGAYSDTKCKNNFFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGD 280
+
+Query:  328 QGYIYLRRGKNTCGVS 343
+            +GYI +    NTCGV+
+Sbjct:  281 KGYINMVIEGNTCGVA 296
+
+
+>sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 315
+
+ Score =  149 bits (372), Expect = 9e-36
+ Identities = 102/324 (31%), Positives = 161/324 (49%), Gaps = 45/324 (13%)
+
+Query:  29  FLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDE 87
+            F  +  K NK ++  E L R  IF  N   ++  N I        K  V+  FA ++++E
+Sbjct:  16  FNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSVDGPFAAMTNEE 70
+
+Query:  88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCW 147
+            ++    + +    T++     YL+ +   S+        DWR  G VTP+++Q QCGSC+
+Sbjct:  71  YRTLLKSKRT---TEENGQVKYLNIQAPESV--------DWRKEGKVTPIRDQAQCGSCY 119
+
+Query:  148 SFSTTGNVEGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 204
+            +F +   +EG+  I +      + LSE+++V C          +  + GCNGGL  N Y+
+Sbjct:  120 TFGSLAALEGRLLIEKGGDANTLDLSEEHMVQCT--------RDNGNNGCNGGLGSNVYD 171
+
+Query:  205 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTG 264
+pattern 237                                 ****
+            YII++ G+  ES YPYT    T C  N  +      AKI+ +T +P+N        +S G
+Sbjct:  172 YIIEH-GVAKESDYPYTGSDST-CKTNVKSF-----AKITGYTKVPRNNEAELKAALSQG 224
+
+Query:  265 PLAIAADA--VEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKN 319
+             + ++ DA   ++Q Y  G + D  C  N  +L+H +  VGY   +         WIV+N
+Sbjct:  225 LVDVSIDASSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKECWIVRN 279
+
+Query:  320 SWGADWGEQGYIYLRRGKNTCGVS 343
+            SWG  WG++GYI +    NTCGV+
+Sbjct:  280 SWGTGWGDKGYINMVIEGNTCGVA 303
+
+
+>sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR
+          Length = 310
+
+ Score =  145 bits (363), Expect = 1e-34
+ Identities = 102/330 (30%), Positives = 160/330 (47%), Gaps = 40/330 (12%)
+
+Query:  20  GIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN- 78
+            GI       F  +  K NK ++  E L R  IF  N   ++  N I        K  V+ 
+Sbjct:  3   GIRIASAIDFNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSVDG 57
+
+Query:  79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVK 138
+             FA ++++E++    + +    T++     YL+ +   S+        DWR  G VTP++
+Sbjct:  58  PFAAMTNEEYRTLLKSKRT---TEENGQVKYLNIQAPESV--------DWRKEGKVTPLR 106
+
+Query:  139 NQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL 198
+            +Q QCGSC++F +   +EG+  I +       + N +D   E M+   +   + GCNGGL
+Sbjct:  107 DQAQCGSCYTFGSLAALEGRLLIEKG-----GDANTLDLSEEHMQCTRDNG-NNGCNGGL 160
+
+Query:  199 QPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAG 258
+pattern 237                                       ****
+              N Y+YII++G +  ES YPYT    T C  N  +       KI+ +T +P+N      
+Sbjct:  161 GSNVYDYIIEHG-VAKESDYPYTGSDST-CKTNVKSF-----RKITGYTKVPRNNEAELK 213
+
+Query:  259 YIVSTGPLAIAAD--AVEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMP 313
+              +S G L ++ D  + ++Q Y  G + D  C  N  +L+H +  VGY   +        
+Sbjct:  214 AALSQGLLDVSIDVSSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKE 268
+
+Query:  314 YWIVKNSWGADWGEQGYIYLRRGKNTCGVS 343
+             WIV+NSWG  WG++GYI +    NTCGV+
+Sbjct:  269 CWIVRNSWGTSWGDKGYINMVIEGNTCGVA 298
+
+
+>sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR
+          Length = 441
+
+ Score =  145 bits (362), Expect = 1e-34
+ Identities = 107/345 (31%), Positives = 165/345 (47%), Gaps = 58/345 (16%)
+
+Query:  28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV--NKFADLS 84
+            +F  F +K+ K + S ++ ++RF  F+ N   ++        HK    + +  NKF+DLS
+Sbjct:  119 EFDAFVEKYKKVHRSFDQRVQRFLTFRKNYHIVK-------THKPTEPYSLDLNKFSDLS 171
+
+Query:  85  SDEFKNYY--------------------LNNKEAIFTDDLPVADYLDDEFINSIPPEEQT 124
+             +EFK  Y                    +++K  I+   L  A  +++    S+   E  
+Sbjct:  172 DEEFKALYPVITPPKTYTSLSKHLEFKKMSHKNPIYISKLKKAKGIEEIKDLSLITGEN- 230
+
+Query:  125 AFDWRTRGAVTPVKNQGQ-CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 183
+              +W    AV+P K+QG  CGSCW+FS+  +VE  + + +NK   LSEQ LV+CD   M 
+Sbjct:  231 -LNWARTDAVSPTKDQGDHCGSCWAFSSIASVESLYRLYKNKSYFLSEQELVNCDKSSM- 288
+
+Query:  184 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKI 243
+pattern 237                                                      ****
+                     GC GGL   A  Y I + G+  ES  PYT    + C  +  N     +  I
+Sbjct:  289 ---------GCAGGLPITALEY-IHSKGVSFESEVPYTGIV-SPCKPSIKN-----KVFI 332
+
+Query:  244 SNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 303
+             + +++  N+ V    ++S   + IA    E + Y GG+F   C    L+H +L+VG   
+Sbjct:  333 DSISILKGNDVVNKSLVISPTVVGIAV-TKELKLYSGGIFTGKCG-GELNHAVLLVGEGV 390
+
+Query:  304 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR---GKNTCGVSNF 345
+             +      M YWI+KNSWG DWGE G++ L+R   G + CG+  F
+Sbjct:  391 DH---ETGMRYWIIKNSWGEDWGENGFLRLQRTKKGLDKCGILTF 432
+
+
+>sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR
+          Length = 439
+
+ Score =  143 bits (357), Expect = 5e-34
+ Identities = 105/351 (29%), Positives = 163/351 (45%), Gaps = 72/351 (20%)
+
+Query:  24  EEQSQFLEFQDKFNKKYS-HEEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKF 80
+            E   +F EF  K+N++++  +E L R   F+SN  +++E        K D  +  G+N+F
+Sbjct:  119 EVYREFEEFNSKYNRRHATQQERLNRLVTFRSNYLEVKE-------QKGDEPYVKGINRF 171
+
+Query:  81  ADLSSDEF--------------------------KNYYLNNKEAIFTDDLPVADYLDDEF 114
+            +DL+  EF                          K Y  N K+A+ TD+        D  
+Sbjct:  172 SDLTEREFYKLFPVMKPPKATYSNGYYLLSHMANKTYLKNLKKALNTDE--------DVD 223
+
+Query:  115 INSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL 174
+            +  +  E     DWR   +VT VK+Q  CG CW+FST G+VEG +    +K   LS Q L
+Sbjct:  224 LAKLTGEN---LDWRRSSSVTSVKDQSNCGGCWAFSTVGSVEGYYMSHFDKSYELSVQEL 280
+
+Query:  175 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSAN 234
+            +DCD          +   GC GGL  +AY Y+ K  G+ +    P+  +   +C+   A 
+Sbjct:  281 LDCD----------SFSNGCQGGLLESAYEYVRKY-GLVSAKDLPF-VDKARRCSVPKA- 327
+
+Query:  235 IGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDH 294
+pattern 237   ****
+                ++  + ++ +  K + VM   + S+      + + E   Y  GVF   C   SL+H
+Sbjct:  328 ----KKVSVPSYHVF-KGKEVMTRSLTSSPCSVYLSVSPELAKYKSGVFTGECG-KSLNH 381
+
+Query:  295 GILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR---GKNTCGV 342
+             +++VG        ++   YW+V+NSWG DWGE GY+ L R   G + CGV
+Sbjct:  382 AVVLVGEGYDEVTKKR---YWVVQNSWGTDWGENGYMRLERTNMGTDKCGV 429
+
+
+>sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR (TCP)
+          Length = 569
+
+ Score =  141 bits (351), Expect = 3e-33
+ Identities = 107/367 (29%), Positives = 169/367 (45%), Gaps = 62/367 (16%)
+
+Query:  27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+            S+F +F  + NK Y + +E + +FEIFK N   I+  N   +N  A  K  VN+F+D S 
+Sbjct:  223 SKFFKFMKEHNKVYKNIDEQMRKFEIFKINYISIKNHN--KLNKNAMYKKKVNQFSDYSE 280
+
+Query:  86  DEFKNYYLN----NKEAIFTDDLPVADYLDD-----EFINSIPPEEQTAF-------DWR 129
+            +E K Y+          I     P  ++L D     EF  +    E+  F       D+R
+Sbjct:  281 EELKEYFKTLLHVPNHMIEKYSKPFENHLKDNILISEFYTNGKRNEKDIFSKVPEILDYR 340
+
+Query:  130 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 189
+             +G V   K+QG CGSCW+F++ GN+E         ++S SEQ +VDC  +         
+Sbjct:  341 EKGIVHEPKDQGLCGSCWAFASVGNIESVFAKKNKNILSFSEQEVVDCSKD--------- 391
+
+Query:  190 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMI 249
+pattern 237                                                ****
+             + GC+GG    ++ Y+++N  +     Y Y A+    C     N   + +  +S+   +
+Sbjct:  392 -NFGCDGGHPFYSFLYVLQN-ELCLGDEYKYKAKDDMFC----LNYRCKRKVSLSSIGAV 445
+
+Query:  250 PKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGY------- 301
+             +N+ ++A  +   GPL++      ++  Y  GV++  C+   L+H +L+VGY       
+Sbjct:  446 KENQLILA--LNEVGPLSVNVGVNNDFVAYSEGVYNGTCS-EELNHSVLLVGYGQVEKTK 502
+
+Query:  302 -------SAKNTIFRKNMP------YWIVKNSWGADWGEQGYIYLRRGKN----TCGVSN 344
+                      NT    N P      YWI+KNSW   WGE G++ L R KN     CG+  
+Sbjct:  503 LNYNNKIQTYNTKENSNQPDDNIIYYWIIKNSWSKKWGENGFMRLSRNKNGDNVFCGIGE 562
+
+Query:  345 FVSTSII 351
+             V   I+
+Sbjct:  563 EVFYPIL 569
+
+
+>sp|P14518|BROM_ANACO BROMELAIN, STEM
+          Length = 212
+
+ Score =  139 bits (348), Expect = 6e-33
+ Identities = 81/224 (36%), Positives = 113/224 (50%), Gaps = 31/224 (13%)
+
+Query:  125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 184
+            + DWR  GAVT VKNQ  CG+CW+F+    VE  + I +  L  LSEQ ++DC       
+Sbjct:  5   SIDWRDYGAVTSVKNQNPCGACWAFAAIATVESIYKIKKGILEPLSEQQVLDC------- 57
+
+Query:  185 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKIS 244
+pattern 237                                                     ****
+                A   GC GG +  A+ +II N G+ + + YPY A  GT C  +    G    A I+
+Sbjct:  58  ----AKGYGCKGGWEFRAFEFIISNKGVASGAIYPYKAAKGT-CKTD----GVPNSAYIT 108
+
+Query:  245 NFTMIPKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 303
+             +  +P+N      Y VS  P+ +A DA   +Q+Y  GVF+ PC   SL+H +  +GY  
+Sbjct:  109 GYARVPRNNESSMMYAVSKQPITVAVDANANFQYYKSGVFNGPCG-TSLNHAVTAIGYGQ 167
+
+Query:  304 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR----GKNTCGVS 343
+             + I+ K          WGA WGE GYI + R        CG++
+Sbjct:  168 DSIIYPK---------KWGAKWGEAGYIRMARDVSSSSGICGIA 202
+
+
+>sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR (DER F I)
+          Length = 321
+
+ Score =  138 bits (345), Expect = 1e-32
+ Identities = 115/352 (32%), Positives = 157/352 (43%), Gaps = 52/352 (14%)
+
+Query:  7   FVLAVFTVFVSSRGIP-PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLI 65
+            FVLA+ ++ V S     P     F EF+  FNK Y+    +E  E+ + N   +E L  +
+Sbjct:  3   FVLAIASLLVLSTVYARPASIKTFEEFKKAFNKNYAT---VEEEEVARKNF--LESLKYV 57
+
+Query:  66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF----INSIPPE 121
+              N     K  +N  +DLS DEFKN YL + EA   + L     L+ E     INS+   
+Sbjct:  58  EAN-----KGAINHLSDLSLDEFKNRYLMSAEAF--EQLKTQFDLNAETSACRINSVNVP 110
+
+Query:  122 EQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 181
+             +   D R+   VTP++ QG CGSCW+FS     E  +   +N  + LSEQ LVDC    
+Sbjct:  111 SE--LDLRSLRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNTSLDLSEQELVDC---- 164
+
+Query:  182 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQA 241
+pattern 237                                                        ****
+                   A   GC+G   P    YI +NG ++ E SYPY A        NS + G     
+Sbjct:  165 -------ASQHGCHGDTIPRGIEYIQQNGVVE-ERSYPYVAREQRCRRPNSQHYG----- 211
+
+Query:  242 KISNFTMIPKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVF---DIPCNPNSLD 293
+             ISN+  I   +       ++    AIA      D   +Q Y G      D    PN   
+Sbjct:  212 -ISNYCQIYPPDVKQIREALTQTHTAIAVIIGIKDLRAFQHYDGRTIIQHDNGYQPNY-- 268
+
+Query:  294 HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 345
+            H + IVGY +      +   YWIV+NSW   WG+ GY Y + G N   +  +
+Sbjct:  269 HAVNIVGYGS-----TQGDDYWIVRNSWDTTWGDSGYGYFQAGNNLMMIEQY 315
+
+
+>sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR
+          Length = 583
+
+ Score =  129 bits (320), Expect = 1e-29
+ Identities = 100/370 (27%), Positives = 166/370 (44%), Gaps = 84/370 (22%)
+
+Query:  27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+            S+F  F +K+ + Y    E +E+++ FK N  KI++ N          K  VN+F+D S 
+Sbjct:  235 SKFFNFMNKYKRSYKDINEQMEKYKNFKMNYLKIKKHN----ETNQMYKMKVNQFSDYSK 290
+
+Query:  86  DEFKNYYLNNKEAIFTDDLPVADYLDDEFI--------------------NSIPPEEQTA 125
+             +F++Y        F   +P+ D+L  +++                     ++  +    
+Sbjct:  291 KDFESY--------FRKLVPIPDHLKKKYVVPFSSMNNGKGKNVVTSSSGANLLADVPEI 342
+
+Query:  126 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK-LVSLSEQNLVDCDHECMEY 184
+             D+R +G V   K+QG CGSCW+F++ GNVE  +    NK +++LSEQ +VDC       
+Sbjct:  343 LDYREKGIVHEPKDQGLCGSCWAFASVGNVECMYAKEHNKTILTLSEQEVVDC------- 395
+
+Query:  185 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKIS 244
+pattern 237                                                     ****
+                  + GC+GG    ++ Y I+N GI     Y Y A     C     N   + +  +S
+Sbjct:  396 ---SKLNFGCDGGHPFYSFIYAIEN-GICMGDDYKYKAMDNLFC----LNYRCKNKVTLS 447
+
+Query:  245 NFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYS- 302
+            +   + +NE + A  +   GP+++      ++ FY GG+F+  C    L+H +L+VGY  
+Sbjct:  448 SVGGVKENELIRA--LNEVGPVSVNVGVTDDFSFYGGGIFNGTCT-EELNHSVLLVGYGQ 504
+
+Query:  303 -AKNTIFRKN-------------------------MPYWIVKNSWGADWGEQGYIYLRRG 336
+               + IF++                            YWI+KNSW   WGE G++ + R 
+Sbjct:  505 VQSSKIFQEKNAYDDASGVTKKGALSYPSKADDGIQYYWIIKNSWSKFWGENGFMRISRN 564
+
+Query:  337 KN----TCGV 342
+            K      CG+
+Sbjct:  565 KEGDNVFCGI 574
+
+
+>sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR (DER P I)
+          Length = 320
+
+ Score =  121 bits (300), Expect = 3e-27
+ Identities = 111/345 (32%), Positives = 151/345 (43%), Gaps = 57/345 (16%)
+
+Query:  1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+            MK++L     +    V +R   P     F E++  FNK Y+     E  E  + N   +E
+Sbjct:  1   MKIVLAIASLLALSAVYAR---PSSIKTFEEYKKAFNKSYAT---FEDEEAARKNF--LE 52
+
+Query:  61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF----IN 116
+             +  +  N  A     +N  +DLS DEFKN +L + EA   + L     L+ E     IN
+Sbjct:  53  SVKYVQSNGGA-----INHLSDLSLDEFKNRFLMSAEAF--EHLKTQFDLNAETNACSIN 105
+
+Query:  117 SIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 176
+               P E    D R    VTP++ QG CGSCW+FS     E  +   +N+ + L+EQ LVD
+Sbjct:  106 GNAPAE---IDLRQMRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNQSLDLAEQELVD 162
+
+Query:  177 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 236
+            C           A   GC+G   P    YI  NG +Q ES Y Y A   +    N+   G
+Sbjct:  163 C-----------ASQHGCHGDTIPRGIEYIQHNGVVQ-ESYYRYVAREQSCRRPNAQRFG 210
+
+Query:  237 PEEQAKISNFTMI-PKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVF---DIPC 287
+pattern 237 ****
+                  ISN+  I P N   +   +  T   AIA      D   ++ Y G      D   
+Sbjct:  211 ------ISNYCQIYPPNVNKIREALAQTHS-AIAVIIGIKDLDAFRHYDGRTIIQRDNGY 263
+
+Query:  288 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIY 332
+             PN   H + IVGYS       + + YWIV+NSW  +WG+ GY Y
+Sbjct:  264 QPNY--HAVNIVGYSN-----AQGVDYWIVRNSWDTNWGDNGYGY 301
+
+
+>sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+            (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  111 bits (274), Expect = 3e-24
+ Identities = 83/260 (31%), Positives = 128/260 (48%), Gaps = 34/260 (13%)
+
+Query:  105 PVADYLDDEFINSIPPEEQTAFDWRT-RGA--VTPVKNQGQCGSCWSFSTTGNVEGQHFI 161
+            P+ D +  + + S+P     ++DWR  RG   V+PV+NQ  CGSC+SF++ G +E +  I
+Sbjct:  218 PITDEIQQQIL-SLPE----SWDWRNVRGINFVSPVRNQESCGSCYSFASIGMLEARIRI 272
+
+Query:  162 SQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYP 219
+              N   +  LS Q +V C              +GC+GG          ++ G+  E+ +P
+Sbjct:  273 LTNNSQTPILSPQEVVSCSPYA----------QGCDGGFPYLIAGKYAQDFGVVEENCFP 322
+
+Query:  220 YTAETGTQCN--FNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVE-WQ 276
+pattern 237                    ****
+            YTA T   C    N       E   +  F     NE +M   +V  GP+A+A +  + + 
+Sbjct:  323 YTA-TDAPCKPKENCLRYYSSEYYYVGGFYG-GCNEALMKLELVKHGPMAVAFEVHDDFL 380
+
+Query:  277 FYIGGVF-----DIPCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGY 330
+             Y  G++       P NP  L +H +L+VGY  K+ +    + YWIVKNSWG+ WGE GY
+Sbjct:  381 HYHSGIYHHTGLSDPFNPFELTNHAVLLVGYG-KDPV--TGLDYWIVKNSWGSQWGESGY 437
+
+Query:  331 IYLRRGKNTCGVSNFVSTSI 350
+              +RRG + C + +    +I
+Sbjct:  438 FRIRRGTDECAIESIAMAAI 457
+
+
+>sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+            (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  109 bits (270), Expect = 9e-24
+ Identities = 91/335 (27%), Positives = 155/335 (46%), Gaps = 42/335 (12%)
+
+Query:  34  DKFNKKYSH-----EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEF 88
+            +K N   +H     E Y ER  ++  N   ++ +N +    K+ T     ++  +S  + 
+Sbjct:  147 EKVNMNAAHLGGLQERYSER--LYTHNHNFVKAINTV---QKSWTATAYKEYEKMSLRDL 201
+
+Query:  89  KNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRT-RGA--VTPVKNQGQCGS 145
+                 +++        P+ D +  + +N   PE   ++DWR  +G   V+PV+NQ  CGS
+Sbjct:  202 IRRSGHSQRIPRPKPAPMTDEIQQQILNL--PE---SWDWRNVQGVNYVSPVRNQESCGS 256
+
+Query:  146 CWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 203
+            C+SF++ G +E +  I  N   +  LS Q +V C              +GC+GG      
+Sbjct:  257 CYSFASMGMLEARIRILTNNSQTPILSPQEVVSCSPYA----------QGCDGGFPYLIA 306
+
+Query:  204 NYIIKNGGIQTESSYPYTA-ETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVS 262
+pattern 237                                   ****
+                ++ G+  ES +PYTA ++  +   N       +   +  F     NE +M   +V 
+Sbjct:  307 GKYAQDFGVVEESCFPYTAKDSPCKPRENCLRYYSSDYYYVGGFYG-GCNEALMKLELVK 365
+
+Query:  263 TGPLAIAADAVE-WQFYIGGVF-----DIPCNPNSL-DHGILIVGYSAKNTIFRKNMPYW 315
+             GP+A+A +  + +  Y  G++       P NP  L +H +L+VGY          + YW
+Sbjct:  366 HGPMAVAFEVHDDFLHYHSGIYHHTGLSDPFNPFELTNHAVLLVGYGRDPVT---GIEYW 422
+
+Query:  316 IVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+            I+KNSWG++WGE GY  +RRG + C + +    +I
+Sbjct:  423 IIKNSWGSNWGESGYFRIRRGTDECAIESIAVAAI 457
+
+
+>sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN) (PDP)
+          Length = 139
+
+ Score =  108 bits (267), Expect = 2e-23
+ Identities = 55/145 (37%), Positives = 84/145 (57%), Gaps = 9/145 (6%)
+
+Query:  196 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETV 255
+pattern 237                                          ****
+            GGL  +A+ Y+  NGG+ +E SYPY A+ G  C +   N      A ++++  IP  E  
+Sbjct:  1   GGLIDDAFQYVKDNGGLDSEESYPYHAQ-GDSCKYRPEN----SVANVTDYWDIPSKENE 55
+
+Query:  256 MAGYIVSTGPLAIAADAV--EWQFYIGGVF-DIPCNPNSLDHGILIVGYSAKNTIFRKNM 312
+            +   + + GP++ A DA    ++FY  G++ D  C+   +DHG+L+VGY A  T   +N 
+Sbjct:  56  LMITLAAVGPISAAIDASLDTFRFYKEGIYYDPSCSSEDVDHGVLVVGYGADGTE-TENK 114
+
+Query:  313 PYWIVKNSWGADWGEQGYIYLRRGK 337
+             YWI+KNSWG DWG  GYI + + +
+Sbjct:  115 KYWIIKNSWGTDWGMDGYIKMAKDR 139
+
+
+>sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR
+          Length = 454
+
+ Score =  108 bits (266), Expect = 3e-23
+ Identities = 75/238 (31%), Positives = 109/238 (45%), Gaps = 33/238 (13%)
+
+Query:  126 FDWRT-----RGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCD 178
+            FDW +     R  VTP++NQG CGSC++  +   +E +  +  N  +   LS Q +VDC 
+Sbjct:  222 FDWTSPPDGSRSPVTPIRNQGICGSCYASPSAAALEARIRLVSNFSEQPILSPQTVVDCS 281
+
+Query:  179 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF--NSANIG 236
+                         EGCNGG          ++ G+  +   PYT E   +C    N     
+Sbjct:  282 ----------PYSEGCNGGFPFLIAGKYGEDFGLPQKIVIPYTGEDTGKCTVSKNCTRYY 331
+
+Query:  237 PEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPC-------- 287
+pattern 237 ****
+              + + I  +     NE +M   ++S GP  +  +  E +QFY  G++            
+Sbjct:  332 TTDYSYIGGYYGAT-NEKLMQLELISNGPFPVGFEVYEDFQFYKEGIYHHTTVQTDHYNF 390
+
+Query:  288 NPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 344
+            NP  L +H +L+VGY           PYW VKNSWG +WGEQGY  + RG + CGV +
+Sbjct:  391 NPFELTNHAVLLVGYGVDKL---SGEPYWKVKNSWGVEWGEQGYFRILRGTDECGVES 445
+
+
+>sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+            (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 463
+
+ Score =  107 bits (265), Expect = 3e-23
+ Identities = 75/235 (31%), Positives = 111/235 (46%), Gaps = 29/235 (12%)
+
+Query:  124 TAFDWRTRGA---VTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCD 178
+            T++DWR       V+PV+NQ  CGSC+SF++ G +E +  I  N   +  LS Q +V C 
+Sbjct:  233 TSWDWRNVHGINFVSPVRNQASCGSCYSFASMGMLEARIRILTNNSQTPILSPQEVVSCS 292
+
+Query:  179 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSA--NIG 236
+                         +GC GG          ++ G+  E+ +PYT  T + C          
+Sbjct:  293 QYA----------QGCEGGFPYLIAGKYAQDFGLVEEACFPYTG-TDSPCKMKEDCFRYY 341
+
+Query:  237 PEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDI-----PCNPN 290
+pattern 237 ****
+              E   +  F     NE +M   +V  GP+A+A +  + +  Y  G++       P NP 
+Sbjct:  342 SSEYHYVGGFYG-GCNEALMKLELVHHGPMAVAFEVYDDFLHYKKGIYHHTGLRDPFNPF 400
+
+Query:  291 SL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 344
+             L +H +L+VGY   +      M YWIVKNSWG  WGE GY  +RRG + C + +
+Sbjct:  401 ELTNHAVLLVGYGTDSA---SGMDYWIVKNSWGTGWGENGYFRIRRGTDECAIES 452
+
+
+>sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)
+          Length = 211
+
+ Score = 99.8 bits (245), Expect = 7e-21
+ Identities = 73/228 (32%), Positives = 102/228 (44%), Gaps = 33/228 (14%)
+
+Query:  117 SIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 176
+            S+P E     D R+   VTP++ QG CGSCW+FS   + E  +   +N  + L+EQ LVD
+Sbjct:  10  SLPSE----LDLRSLRTVTPIRMQGGCGSCWAFSGVASTESAYLAYRNMSLDLAEQELVD 65
+
+Query:  177 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 236
+            C           A   GC+G   P    YI +NG +Q E  YPY A   +    N+   G
+Sbjct:  66  C-----------ASQNGCHGDTIPRGIEYIQQNGVVQ-EHYYPYVAREQSCHRPNAQRYG 113
+
+Query:  237 PEEQAKISNFTMIPKNETVMAGYIVSTGPLAI---AADAVEWQFYIGGVF---DIPCNPN 290
+pattern 237 ****
+             +   +IS     P +  +      +   +A+     D   ++ Y G      D    PN
+Sbjct:  114 LKNYCQISP----PDSNKIRQALTQTHTAVAVIIGIKDLNAFRHYDGRTIMQHDNGYQPN 169
+
+Query:  291 SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKN 338
+               H + IVGY   NT   + + YWIV+NSW   WG+ GY Y     N
+Sbjct:  170 Y--HAVNIVGYG--NT---QGVDYWIVRNSWDTTWGDNGYGYFAANIN 210
+
+
+>sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)
+          Length = 151
+
+ Score = 94.8 bits (232), Expect = 2e-19
+ Identities = 60/158 (37%), Positives = 87/158 (54%), Gaps = 15/158 (9%)
+
+Query: 41  SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF 100
+           +H+E++ R+E FK N+  +   N    +  + T  G+N+ ADLS++E++  YL  +  I 
+Sbjct: 1   THKEFMPRYEEFKKNMDYVHNWN----SKGSKTVLGLNQHADLSNEEYRLNYLGTRAHIK 56
+
+Query: 101 TDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHF 160
+            +     +      +N    ++    DWR + AVTPVK+QGQCGSC   STTG+VEG   
+Sbjct: 57  LNGYHKRNL--GLRLNRPHFKQPLNVDWREKDAVTPVKDQGQCGSC-IISTTGSVEGVTA 113
+
+Query: 161 ISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL 198
+           I   KLVSLSEQN++               +EGCNGGL
+Sbjct: 114 IKTGKLVSLSEQNILRL--------SSSFGNEGCNGGL 143
+
+
+>sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score = 90.9 bits (222), Expect = 4e-18
+ Identities = 69/272 (25%), Positives = 111/272 (40%), Gaps = 47/272 (17%)
+
+Query:  108 DYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 167
+            D +  E  ++IP        W    ++  +++Q  CGSCW+F+    +  +  I+ N  V
+Sbjct:  72  DIVATEVSDAIPDHFDARDQWPNCMSINNIRDQSDCGSCWAFAAAEAISDRTCIASNGAV 131
+
+Query:  168 S--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSY------- 218
+            +  LS ++L+ C        G  +C  GC GG    A+ + +K+G + T  SY       
+Sbjct:  132 NTLLSSEDLLSC------CTGMFSCGNGCEGGYPIQAWKWWVKHG-LVTGGSYETQFGCK 184
+
+Query:  219 PY-----------------------TAETGTQCNFNSANIGPEEQAKISNFTM--IPKNE 253
+pattern 237                                          ****
+            PY                       T +    C   +    P  Q K    T   + K  
+Sbjct:  185 PYSIAPCGETVNGVKWPACPEDTEPTPKCVDSCTSKNNYATPYLQDKHFGSTAYAVGKKV 244
+
+Query:  254 TVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNM 312
+              +   I++ GP+ +A    E +  Y  GV+      +   H + I+G+   N       
+Sbjct:  245 EQIQTEILTNGPIEVAFTVYEDFYQYTTGVYVHTAGASLGGHAVKILGWGVDN-----GT 299
+
+Query:  313 PYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 344
+            PYW+V NSW   WGE+GY  + RG N CG+ +
+Sbjct:  300 PYWLVANSWNVAWGEKGYFRIIRGLNECGIEH 331
+
+
+>sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 335
+
+ Score = 90.5 bits (221), Expect = 5e-18
+ Identities = 73/299 (24%), Positives = 124/299 (41%), Gaps = 50/299 (16%)
+
+Query:  82  DLSSDEFKNYYLNNK-EAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQ 140
+            D++ ++ K   +  +  A  T D+ V  +  +E  ++IP        W    ++  +++Q
+Sbjct:  46  DITIEQVKKRLMRTEFVAPHTPDVEVVKHDINE--DTIPATFDARTQWPNCMSINNIRDQ 103
+
+Query:  141 GQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGL 198
+              CGSCW+F+       +  I+ N  V+  LS ++++ C   C        C  GC GG 
+Sbjct:  104 SDCGSCWAFAAAEAASDRFCIASNGAVNTLLSAEDVLSC---CSN------CGYGCEGGY 154
+
+Query:  199 QPNAYNYIIKNG---GIQTESSYPYTAETGTQCNFNSANI--------GPEEQAKISNFT 247
+pattern 237                                                  ****
+              NA+ Y++K+G   G   E+ +     +   C     N+        G +  A ++  T
+Sbjct:  155 PINAWKYLVKSGFCTGGSYEAQFGCKPYSLAPCGETVGNVTWPSCPDDGYDTPACVNKCT 214
+
+Query:  248 -------------------MIPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPC 287
+                                + K  + +   I++ GP+  A    E +  Y  GV+    
+Sbjct:  215 NKNYNVAYTADKHFGSTAYAVGKKVSQIQAEIIAHGPVEAAFTVYEDFYQYKTGVYVHTT 274
+
+Query:  288 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 346
+                  H I I+G+   N       PYW+V NSW  +WGE GY  + RG N CG+ + V
+Sbjct:  275 GQELGGHAIRILGWGTDN-----GTPYWLVANSWNVNWGENGYFRIIRGTNECGIEHAV 328
+
+
+>sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)
+          Length = 96
+
+ Score = 90.5 bits (221), Expect = 5e-18
+ Identities = 43/87 (49%), Positives = 55/87 (62%), Gaps = 2/87 (2%)
+
+Query: 264 GPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSW 321
+           GPLA+A +A   Q YIGGV         L+HG+L+VGY +     I  K  PYW++KNSW
+Sbjct: 1   GPLAVAINAAYMQTYIGGVSCPYICSRRLNHGVLLVGYGSAGYAPIRLKEKPYWVIKNSW 60
+
+Query: 322 GADWGEQGYIYLRRGKNTCGVSNFVST 348
+           G +WGE GY  + RG+N CGV + VST
+Sbjct: 61  GENWGENGYYKICRGRNICGVDSMVST 87
+
+
+>sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR
+          Length = 335
+
+ Score = 88.5 bits (216), Expect = 2e-17
+ Identities = 65/259 (25%), Positives = 105/259 (40%), Gaps = 47/259 (18%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL---SEQNL 174
+            +P        W     +  +++QG CGSCW+F     +  +  I  N  V++   +E  L
+Sbjct:  80  LPESFDAREQWPNCPTIKEIRDQGSCGSCWAFGAVEAISDRICIHSNGRVNVEVSAEDML 139
+
+Query:  175 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ--------------------- 213
+              C  EC          +GCNGG    A+N+  K G +                      
+Sbjct:  140 TCCGGEC---------GDGCNGGFPSGAWNFWTKKGLVSGGLYNSHVGCRPYSIPPCEHH 190
+
+Query:  214 -TESSYPYTAETGT-QCNFN-----SANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+               S  P T E  T +CN       S +   ++    S++++    + +MA  I   GP+
+Sbjct:  191 VNGSRPPCTGEGDTPKCNKTCEPGYSPSYKEDKHFGCSSYSVANNEKEIMAE-IYKNGPV 249
+
+Query:  267 AIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 325
+              A     ++  Y  GV+          H I I+G+  +N       PYW+V NSW  DW
+Sbjct:  250 EGAFSVYSDFLLYKSGVYQHVSGEIMGGHAIRILGWGVEN-----GTPYWLVGNSWNTDW 304
+
+Query:  326 GEQGYIYLRRGKNTCGVSN 344
+            G+ G+  + RG++ CG+ +
+Sbjct:  305 GDNGFFKILRGQDHCGIES 323
+
+
+>sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)
+          Length = 339
+
+ Score = 87.4 bits (213), Expect = 4e-17
+ Identities = 66/265 (24%), Positives = 113/265 (41%), Gaps = 45/265 (16%)
+
+Query:  117 SIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNL 174
+            ++P        W     +  +++QG CGSCW+F     +  +  I  N  V++  S ++L
+Sbjct:  79  NLPESFDAREQWSNCPTIAQIRDQGSCGSCWAFGAVEAMSDRICIHTNGRVNVEVSAEDL 138
+
+Query:  175 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK----NGGIQTE--------------- 215
+            + C   C        C +GCNGG    A+N+  +    +GG+                  
+Sbjct:  139 LTC---C-----GIQCGDGCNGGYPSGAWNFWTRKGLVSGGVYNSHIGCLPYTIPPCEHH 190
+
+Query:  216 ---SSYPYTAETGT-QCNFN-----SANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+               S  P T E  T +CN       S +   ++    +++++    + +MA  I   GP+
+Sbjct:  191 VNGSRPPCTGEGDTPKCNKMCEAGYSTSYKEDKHYGYTSYSVSDSEKEIMAE-IYKNGPV 249
+
+Query:  267 AIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 325
+              A     ++  Y  GV+          H I I+G+  +N +     PYW+V NSW  DW
+Sbjct:  250 EGAFTVFSDFLTYKSGVYKHEAGDVMGGHAIRILGWGIENGV-----PYWLVANSWNVDW 304
+
+Query:  326 GEQGYIYLRRGKNTCGVSNFVSTSI 350
+            G+ G+  + RG+N CG+ + +   I
+Sbjct:  305 GDNGFFKILRGENHCGIESEIVAGI 329
+
+
+>sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR
+          Length = 329
+
+ Score = 87.0 bits (212), Expect = 5e-17
+ Identities = 66/288 (22%), Positives = 117/288 (39%), Gaps = 38/288 (13%)
+
+Query:  82  DLSSDEFKNYYLNNK-EAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQ 140
+            +++ +E K   ++ K  A  +D++   +   +  + S+P    +   W    ++  +++Q
+Sbjct:  50  EITEEEMKFKLMDGKYAAAHSDEIRATE--QEVVLASVPATFDSRTQWSECKSIKLIRDQ 107
+
+Query:  141 GQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGL 198
+              CGSCW+F     +  +  I         +S  +L+ C   C       +C  GC GG 
+Sbjct:  108 ATCGSCWAFGAAEMISDRTCIETKGAQQPIISPDDLLSC---C-----GSSCGNGCEGGY 159
+
+Query:  199 QPNAYNY-----IIKNGGIQTESSYPYTAETGTQ----------CNFNSANIGPEEQAKI 243
+pattern 237                                                      ****
+               A  +     ++  G        PY     T           C+ +  +      AK 
+Sbjct:  160 PIQALRWWDSKGVVTGGDYHGAGCKPYPIAPCTSGNCPESKTPSCSMSCQSGYSTAYAKD 219
+
+Query:  244 SNFTM----IPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILI 298
+             +F +    +PKN   +   I + GP+  A    E +  Y  GV+          H I I
+Sbjct:  220 KHFGVSAYAVPKNAASIQAEIYANGPVEAAFSVYEDFYKYKSGVYKHTAGKYLGGHAIKI 279
+
+Query:  299 VGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 346
+            +G+  ++       PYW+V NSWG +WGE G+  + RG + CG+ + V
+Sbjct:  280 IGWGTES-----GSPYWLVANSWGVNWGESGFFKIYRGDDQCGIESAV 322
+
+
+>sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP SECRETASE)
+          Length = 339
+
+ Score = 86.2 bits (210), Expect = 9e-17
+ Identities = 68/285 (23%), Positives = 110/285 (37%), Gaps = 55/285 (19%)
+
+Query:  96  KEAIFTDDLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNV 155
+            +  +FT+DL             +P        W     +  +++QG CGSCW+F     +
+Sbjct:  70  QRVMFTEDL------------KLPASFDAREQWPQCPTIKEIRDQGSCGSCWAFGAVEAI 117
+
+Query:  156 EGQHFISQNKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ 213
+              +  I  N  VS+  S ++L+ C   C        C +GCNGG    A+N+  + G + 
+Sbjct:  118 SDRICIHTNAHVSVEVSAEDLLTC---C-----GSMCGDGCNGGYPAEAWNFWTRKGLVS 169
+
+Query:  214 ----------------------TESSYPYTAETGTQ-----CNFNSANIGPEEQAKISNF 246
+pattern 237                                                   ****
+                                    S  P T E  T      C    +    +++    N 
+Sbjct:  170 GGLYESHVGCRPYSIPPCEHHVNGSRPPCTGEGDTPKCSKICEPGYSPTYKQDKHYGYNS 229
+
+Query:  247 TMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN 305
+              +  +E  +   I   GP+  A     ++  Y  GV+          H I I+G+  +N
+Sbjct:  230 YSVSNSEKDIMAEIYKNGPVEGAFSVYSDFLLYKSGVYQHVTGEMMGGHAIRILGWGVEN 289
+
+Query:  306 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 350
+                   PYW+V NSW  DWG+ G+  + RG++ CG+ + V   I
+Sbjct:  290 -----GTPYWLVANSWNTDWGDNGFFKILRGQDHCGIESEVVAGI 329
+
+
+>sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SJ31)
+          Length = 342
+
+ Score = 85.4 bits (208), Expect = 2e-16
+ Identities = 64/271 (23%), Positives = 109/271 (39%), Gaps = 57/271 (21%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ--NKLVSLSEQNLV 175
+            IP +  +   W    +++ +++Q +CGSCW+F     +  +  I     +   LS  +L+
+Sbjct:  90  IPSQFDSRKKWPHCKSISQIRDQSRCGSCWAFGAVEAMTDRICIQSGGGQSAELSALDLI 149
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGI---------------------QT 214
+             C   C +      C +GC GG    A++Y +K G +                      T
+Sbjct:  150 SC---CKD------CGDGCQGGFPGVAWDYWVKRGIVTGGSKENHTGCQPYPFPKCEHHT 200
+
+Query:  215 ESSYP-------------YTAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIV 261
+pattern 237                                    ****
+            +  YP              T + G +  +       +E   + N      NE V+   I+
+Sbjct:  201 KGKYPACGTKIYKTPQCKQTCQKGYKTPYEQDKHYGDESYNVQN------NEKVIQRDIM 254
+
+Query:  262 STGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNS 320
+              GP+  A D  E +  Y  G++          H I I+G+  +     K  PYW++ NS
+Sbjct:  255 MYGPVEAAFDVYEDFLNYKSGIYRHVTGSIVGGHAIRIIGWGVE-----KRTPYWLIANS 309
+
+Query:  321 WGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            W  DWGE+G   + RG++ C + + V   +I
+Sbjct:  310 WNEDWGEKGLFRMVRGRDECSIESDVVAGLI 340
+
+
+>sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 340
+
+ Score = 85.4 bits (208), Expect = 2e-16
+ Identities = 66/265 (24%), Positives = 111/265 (40%), Gaps = 46/265 (17%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNLV 175
+            +P    T   W     ++ +++QG CGSCW+F     +  +  +  N  VS+  S ++L+
+Sbjct:  80  LPDTFDTRKQWPNCPTISEIRDQGSCGSCWAFGAVEAISDRICVHTNAKVSVEVSAEDLL 139
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ---------------------- 213
+             C   C    G E C  GCNGG    A+ Y  + G +                       
+Sbjct:  140 SC---C----GFE-CGMGCNGGYPSGAWRYWTERGLVSGGLYDSHVGCRAYTIPPCEHHV 191
+
+Query:  214 TESSYPYTAETGT--QCNFN-----SANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPL 266
+pattern 237                               ****
+              S  P T E G   +C+ +     S +   ++   I+++  +P++E  +   I   GP+
+Sbjct:  192 NGSRPPCTGEGGETPRCSRHCEPGYSPSYKEDKHYGITSYG-VPRSEKEIMAEIYKNGPV 250
+
+Query:  267 AIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 325
+              A    E +  Y  GV+          H I I+G+  +N       PYW+  NSW  DW
+Sbjct:  251 EGAFIVYEDFLMYKSGVYQHVSGEQVGGHAIRILGWGVEN-----GTPYWLAANSWNTDW 305
+
+Query:  326 GEQGYIYLRRGKNTCGVSNFVSTSI 350
+            G  G+  + RG++ CG+ + +   +
+Sbjct:  306 GITGFFKILRGEDHCGIESEIVAGV 330
+
+
+>sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PRECURSOR
+          Length = 379
+
+ Score = 85.0 bits (207), Expect = 2e-16
+ Identities = 71/265 (26%), Positives = 116/265 (42%), Gaps = 53/265 (20%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK--LVSLSEQNLV 175
+            IP    +  +W    ++  +++Q  CGSCW+F     +  +  I+ +    V+LS  +L+
+Sbjct:  105 IPESFDSRDNWPKCDSIKVIRDQSSCGSCWAFGAVEAMSDRICIASHGELQVTLSADDLL 164
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ------CN 229
+             C   C      ++C  GCNGG    A+ Y +K+G I T S+Y  TA  G +      C 
+Sbjct:  165 SC---C------KSCGFGCNGGDPLAAWRYWVKDG-IVTGSNY--TANNGCKPYPFPPCE 212
+
+Query:  230 FNSANIGPE------------EQAKISNFTMIPKNETVMAGY---------------IVS 262
+pattern 237        **            **
+             +S     +            E+  +S++T    +E    G                +++
+Sbjct:  213 HHSKKTHFDPCPHDLYPTPKCEKKCVSDYTDKTYSEDKFFGASAYGVKDDVEAIQKELMT 272
+
+Query:  263 TGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSW 321
+             GPL IA +  E +  Y GGV+          H + ++G+   + I     PYW V NSW
+Sbjct:  273 HGPLEIAFEVYEDFLNYDGGVYVHTGGKLGGGHAVKLIGWGIDDGI-----PYWTVANSW 327
+
+Query:  322 GADWGEQGYIYLRRGKNTCGVSNFV 346
+              DWGE G+  + RG + CG+ + V
+Sbjct:  328 NTDWGEDGFFRILRGVDECGIESGV 352
+
+
+>sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SM31)
+          Length = 340
+
+ Score = 84.6 bits (206), Expect = 3e-16
+ Identities = 64/260 (24%), Positives = 107/260 (40%), Gaps = 45/260 (17%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLV 175
+            IP    +   W    ++  +++Q +CGSCWSF     +  +  I     + V LS  +L+
+Sbjct:  89  IPSNFDSRKKWPGCKSIATIRDQSRCGSCWSFGAVEAMSDRSCIQSGGKQNVELSAVDLL 148
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTA---ETGTQCNFNS 232
+             C   C      E+C  GC GG+   A++Y +K G +   S   +T        +C  ++
+Sbjct:  149 TC---C------ESCGLGCEGGILGPAWDYWVKEGIVTASSKENHTGCEPYPFPKCEHHT 199
+
+Query:  233 ANIGPEEQAKISN---------------FTM----------IPKNETVMAGYIVSTGPLA 267
+pattern 237     ****
+                P   +KI N               +T           +  +E  +   I+  GP+ 
+Sbjct:  200 KGKYPPCGSKIYNTPRCKQTCQRKYKTPYTQDKHRGKSSYNVKNDEKAIQKEIMKYGPVE 259
+
+Query:  268 IAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 326
+             +    E +  Y  G++          H I I+G+  +N       PYW++ NSW  DWG
+Sbjct:  260 ASFTVYEDFLNYKSGIYKHITGEALGGHAIRIIGWGVEN-----KTPYWLIANSWNEDWG 314
+
+Query:  327 EQGYIYLRRGKNTCGVSNFV 346
+            E GY  + RG++ C + + V
+Sbjct:  315 ENGYFRIVRGRDECSIESEV 334
+
+
+>sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 339
+
+ Score = 84.6 bits (206), Expect = 3e-16
+ Identities = 66/253 (26%), Positives = 108/253 (42%), Gaps = 43/253 (16%)
+
+Query:  128 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNLVDCDHECMEYE 185
+            W     +  +++QG CGSCW+F     +  +  I  N  V++  S ++L+ C   C    
+Sbjct:  90  WSNCPTIGQIRDQGSCGSCWAFGAVEAISDRTCIHTNGRVNVEVSAEDLLTC---C---- 142
+
+Query:  186 GEEACDEGCNGGLQPNAYNYIIK----NGGIQTE------------------SSYPYTAE 223
+                C +GCNGG    A+++  K    +GG+                     S  P T E
+Sbjct:  143 -GIQCGDGCNGGYPSGAWSFWTKKGLVSGGVYNSHVGCLPYTIPPCEHHVNGSRPPCTGE 201
+
+Query:  224 TGT-QCNFN-SANIGPE-EQAKISNFTMIPKNETV--MAGYIVSTGPLAIAADAV-EWQF 277
+pattern 237                ** **
+              T +CN +  A   P  ++ K   +T    + +V  +   I   GP+  A     ++  
+Sbjct:  202 GDTPRCNKSCEAGYSPSYKEDKHFGYTSYSVSNSVKEIMAEIYKNGPVEGAFTVFSDFLT 261
+
+Query:  278 YIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK 337
+            Y  GV+          H I I+G+  +N +     PYW+  NSW  DWG+ G+  + RG+
+Sbjct:  262 YKSGVYKHEAGDMMGGHAIRILGWGVENGV-----PYWLAANSWNLDWGDNGFFKILRGE 316
+
+Query:  338 NTCGVSNFVSTSI 350
+            N CG+ + +   I
+Sbjct:  317 NHCGIESEIVAGI 329
+
+
+>sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 341
+
+ Score = 79.6 bits (193), Expect = 9e-15
+ Identities = 63/270 (23%), Positives = 106/270 (38%), Gaps = 46/270 (17%)
+
+Query:  103 DLPVADYLDDEFINSIPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFIS 162
+            D  V D   +E  + IP        W    ++  + +Q  CGSCW+ S+   +  +  I+
+Sbjct:  76  DEEVEDEELEENNDDIPESYDPRIQWANCSSLFHIPDQANCGSCWAVSSAAAMSDRICIA 135
+
+Query:  163 QN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY-----IIKNGGIQTE 215
+                K V +S Q++V C   C        C +GC GG   +A+ +     ++  G   T+
+Sbjct:  136 SKGAKQVLISAQDVVSC---CTW------CGDGCEGGWPISAFRFHADEGVVTGGDYNTK 186
+
+Query:  216 SSY-PYTAET----GTQCNFNSANIGPEEQAKISNFTMI------PKNETVMAGYIVSTG 264
+pattern 237                           ****
+             S  PY        G +  +    +G  +  +     ++      P +      Y +   
+Sbjct:  187 GSCRPYEIHPCGHHGNETYYGEC-VGMADTPRCKRRCLLGYPKSYPSDRYYKKAYQLKNS 245
+
+Query:  265 PLAIAADAV-------------EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKN 311
+              AI  D +             ++  Y  G++       +  H + ++G+  +     K 
+Sbjct:  246 VKAIQKDIMKNGPVVATYTVYEDFAHYRSGIYKHKAGRKTGLHAVKVIGWGEE-----KG 300
+
+Query:  312 MPYWIVKNSWGADWGEQGYIYLRRGKNTCG 341
+             PYWIV NSW  DWGE G+  + RG N CG
+Sbjct:  301 TPYWIVANSWHDDWGENGFFRMHRGSNDCG 330
+
+
+>sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 342
+
+ Score = 78.4 bits (190), Expect = 2e-14
+ Identities = 59/266 (22%), Positives = 110/266 (41%), Gaps = 47/266 (17%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLV 175
+            IPP       W+       +++Q  CGSCW+ ST   +  +  I+    K V++S  +++
+Sbjct:  87  IPPSYDPRDVWKNCTTFY-IRDQANCGSCWAVSTAAAISDRICIASKAEKQVNISATDIM 145
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ------TESSYPY--------- 220
+             C   C        C +GC GG    A+ Y I +G +        +   PY         
+Sbjct:  146 TC---C-----RPQCGDGCEGGWPIEAWKYFIYDGVVSGGEYLTKDVCRPYPIHPCGHHG 197
+
+Query:  221 -------------TAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLA 267
+pattern 237                              ****
+                         T     +C      +   ++    +  ++ ++   +   I+  GP+ 
+Sbjct:  198 NDTYYGECRGTAPTPPCKRKCRPGVRKMYRIDKRYGKDAYIVKQSVKAIQSEILKNGPV- 256
+
+Query:  268 IAADAV--EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 325
+            +A+ AV  +++ Y  G++          H + ++G+  +N     N  +W++ NSW  DW
+Sbjct:  257 VASFAVYEDFRHYKSGIYKHTAGELRGYHAVKMIGWGNEN-----NTDFWLIANSWHNDW 311
+
+Query:  326 GEQGYIYLRRGKNTCGVSNFVSTSII 351
+            GE+GY  + RG N CG+   ++  I+
+Sbjct:  312 GEKGYFRIVRGSNDCGIEGTIAAGIV 337
+
+
+>sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 342
+
+ Score = 77.6 bits (188), Expect = 4e-14
+ Identities = 59/266 (22%), Positives = 110/266 (41%), Gaps = 47/266 (17%)
+
+Query:  118 IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLV 175
+            IPP       W+       +++Q  CGSCW+ ST   +  +  I+    K V++S  +++
+Sbjct:  87  IPPSYDPRDVWKNCTTFY-IRDQANCGSCWAVSTAAAISDRICIASKAEKQVNISATDIM 145
+
+Query:  176 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ------TESSYPY--------- 220
+             C   C        C +GC GG    A+ Y I +G +        +   PY         
+Sbjct:  146 TC---C-----RPQCGDGCEGGWPIEAWKYFIYDGVVSGGEYLTKDVCRPYPIHPCGHHG 197
+
+Query:  221 -------------TAETGTQCNFNSANIGPEEQAKISNFTMIPKNETVMAGYIVSTGPLA 267
+pattern 237                              ****
+                         T     +C      +   ++    +  ++ ++   +   I+  GP+ 
+Sbjct:  198 NDTYYGECRGTAPTPPCKRKCRPGVRKMYRIDKRYGKDAYIVKQSVKAIQSEILRNGPV- 256
+
+Query:  268 IAADAV--EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 325
+            +A+ AV  +++ Y  G++          H + ++G+  +N     N  +W++ NSW  DW
+Sbjct:  257 VASFAVYEDFRHYKSGIYKHTAGELRGYHAVKMIGWGNEN-----NTDFWLIANSWHNDW 311
+
+Query:  326 GEQGYIYLRRGKNTCGVSNFVSTSII 351
+            GE+GY  + RG N CG+   ++  I+
+Sbjct:  312 GEKGYFRIIRGTNDCGIEGTIAAGIV 337
+
+
+>sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 370
+
+ Score = 73.3 bits (177), Expect = 7e-13
+ Identities = 56/248 (22%), Positives = 98/248 (38%), Gaps = 39/248 (15%)
+
+Query:  128 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYE 185
+            W     +  ++NQ  CGSCW+F     +  +  I  N      +S ++++ C   C    
+Sbjct:  102 WPDCNTIKLIRNQATCGSCWAFGAAEVISDRVCIQSNGTQQPVISVEDILSC---C---- 154
+
+Query:  186 GEEACDEGCNGGLQPNAYNYIIKNGGIQ---------------------TESSYPYTAET 224
+                C  GC GG    A  +   +G +                       ES+ P + +T
+Sbjct:  155 -GTTCGYGCKGGYSIEALRFWASSGAVTGGDYGGHGCMPYSFAPCTKNCPESTTP-SCKT 212
+
+Query:  225 GTQCNFNSANIGPEEQAKISNFTMIP-KNETVMAGYIVSTGPLAIAADAVE-WQFYIGGV 282
+pattern 237             ****
+              Q ++ +     ++    S + +   K+ T +   I   GP+  +    E +  Y  GV
+Sbjct:  213 TCQSSYKTEEYKKDKHYGASAYKVTTTKSVTEIQTEIYHYGPVEASYKVYEDFYHYKSGV 272
+
+Query:  283 FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGV 342
+            +          H + I+G+  +N +      YW++ NSWG  +GE+G+  +RRG N C +
+Sbjct:  273 YHYTSGKLVGGHAVKIIGWGVENGV-----DYWLIANSWGTSFGEKGFFKIRRGTNECQI 327
+
+Query:  343 SNFVSTSI 350
+               V   I
+Sbjct:  328 EGNVVAGI 335
+
+
+>sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P126) (111 KD ANTIGEN)
+          Length = 989
+
+ Score = 70.2 bits (169), Expect = 6e-12
+ Identities = 63/247 (25%), Positives = 102/247 (40%), Gaps = 46/247 (18%)
+
+Query:  137 VKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE--EACDEGC 194
+            V++QG C + W F++  ++E    +   +   +S   + +C      Y+GE  + CDEG 
+Sbjct:  579 VEDQGNCDTSWIFASKYHLETIRCMKGYEPTKISALYVANC------YKGEHKDRCDEGS 632
+
+Query:  195 NGGLQPNAYNYIIKNGG-IQTESSYPYT-AETGTQC------------------NFNSAN 234
+            +    P  +  II++ G +  ES+YPY   + G QC                  N N  N
+Sbjct:  633 S----PMEFLQIIEDYGFLPAESNYPYNYVKVGEQCPKVEDHWMNLWDNGKILHNKNEPN 688
+
+Query:  235 I----------GPEEQAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFD 284
+pattern 237             ****
+                              +  F  I K E +  G +++     I A+ V    + G    
+Sbjct:  689 SLDGKGYTAYESERFHDNMDAFVKIIKTEVMNKGSVIAY----IKAENVMGYEFSGKKVQ 744
+
+Query:  285 IPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 344
+              C  ++ DH + IVGY        +   YWIV+NSWG  WG++GY  +     T    N
+Sbjct:  745 NLCGDDTADHAVNIVGYGNYVNSEGEKKSYWIVRNSWGPYWGDEGYFKVDMYGPTHCHFN 804
+
+Query:  345 FVSTSII 351
+            F+ + +I
+Sbjct:  805 FIHSVVI 811
+
+
+>sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)
+          Length = 43
+
+ Score = 60.9 bits (145), Expect = 4e-09
+ Identities = 24/33 (72%), Positives = 27/33 (81%)
+
+Query: 125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 157
+           + DWR +GAVTPVKNQG CGSCW+FST   VEG
+Sbjct: 4   SIDWRKKGAVTPVKNQGSCGSCWAFSTIATVEG 36
+
+
+>sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)
+          Length = 43
+
+ Score = 59.7 bits (142), Expect = 9e-09
+ Identities = 24/33 (72%), Positives = 27/33 (81%)
+
+Query: 125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 157
+           + DWR +GAVTPVKNQG CGSCW+FST   VEG
+Sbjct: 4   SIDWRKKGAVTPVKNQGSCGSCWAFSTIVTVEG 36
+
+
+>sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3
+          Length = 174
+
+ Score = 59.3 bits (141), Expect = 1e-08
+ Identities = 31/103 (30%), Positives = 49/103 (47%), Gaps = 15/103 (14%)
+
+Query: 249 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 308
+           I KN  V+AG+IV            ++  Y  G++       +  H + I+G+  +    
+Sbjct: 87  IMKNGPVVAGFIVYE----------DFAHYKSGIYKHTAGRMTGGHAVKIIGWGKE---- 132
+
+Query: 309 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 351
+            K  PYW++ NSW  DWGE+G+  + RG N C +   V   I+
+Sbjct: 133 -KGTPYWLIANSWHDDWGEKGFYRMIRGINNCRIEEMVFAGIV 174
+
+
+>sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)
+          Length = 43
+
+ Score = 57.8 bits (137), Expect = 3e-08
+ Identities = 22/33 (66%), Positives = 27/33 (81%)
+
+Query: 125 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 157
+           + DWR +GAVTPV+NQG CGSCW+FS+   VEG
+Sbjct: 4   SIDWRQKGAVTPVRNQGSCGSCWTFSSVAAVEG 36
+
+
+>sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)
+          Length = 43
+
+ Score = 56.2 bits (133), Expect = 1e-07
+ Identities = 22/31 (70%), Positives = 25/31 (79%)
+
+Query: 127 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 157
+           DWR +GAVTPVK+Q  CGSCW+FST   VEG
+Sbjct: 6   DWRQKGAVTPVKDQNPCGSCWAFSTVATVEG 36
+
+
+>sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L
+          Length = 42
+
+ Score = 51.9 bits (122), Expect = 2e-06
+ Identities = 20/39 (51%), Positives = 28/39 (71%), Gaps = 1/39 (2%)
+
+Query: 314 YWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 351
+           YWIVKNSWG  WG++GYIY+ +  KN CG++   S  ++
+Sbjct: 4   YWIVKNSWGEKWGDKGYIYMAKDRKNHCGIATAASYPLV 42
+
+
+>sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR
+          Length = 136
+
+ Score = 41.8 bits (96), Expect = 0.002
+ Identities = 31/101 (30%), Positives = 50/101 (48%), Gaps = 4/101 (3%)
+
+Query: 9   LAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIA 66
+           L +  + + S   PP+    +++ E++ KF K Y+  E   R  +++ N  KIE  N   
+Sbjct: 17  LLILCLGMMSAAPPPDPSLDNEWKEWKTKFAKAYNLNEERHRRLVWEENKKKIEAHNADY 76
+
+Query: 67  INHKADTKFGVNKFADLSSDEFK-NYYLNN-KEAIFTDDLP 105
+              K     G+N+F+DL+ +EFK N Y N+        DLP
+Sbjct: 77  EQGKTSFYMGLNQFSDLTPEEFKTNCYGNSLNRGEMAPDLP 117
+
+
+>sp|P05689|CATX_BOVIN CATHEPSIN
+          Length = 73
+
+ Score = 40.2 bits (92), Expect = 0.006
+ Identities = 15/40 (37%), Positives = 24/40 (59%), Gaps = 5/40 (12%)
+
+Query: 292 LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 331
+           ++H + + G+   +      M YWIV+NSWG  WGE G++
+Sbjct: 9   INHIVSVAGWGVSD-----GMEYWIVRNSWGEPWGEHGWM 43
+
+
+>sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR
+          Length = 141
+
+ Score = 38.7 bits (88), Expect = 0.019
+ Identities = 25/85 (29%), Positives = 45/85 (52%), Gaps = 1/85 (1%)
+
+Query: 6   LFVLAVFTVFVSSRGIP-PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+           +F+L +    +S+   P P   +++ E++  F K YS +E   R  +++ N  KIE  N 
+Sbjct: 20  VFLLILCLGMMSAAPSPDPSLDNEWKEWKTTFAKAYSLDEERHRRLMWEENKKKIEAHNA 79
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFK 89
+                K     G+N+F+DL+ +EF+
+Sbjct: 80  DYERGKTSFYMGLNQFSDLTPEEFR 104
+
+
+>sp|P23897|HSER_RAT HEAT-STABLE ENTEROTOXIN RECEPTOR PRECURSOR (GC-C) (INTESTINAL
+           GUANYLATE CYCLASE) (STA RECEPTOR)
+          Length = 1072
+
+ Score = 35.6 bits (80), Expect = 0.16
+ Identities = 32/120 (26%), Positives = 56/120 (46%), Gaps = 19/120 (15%)
+
+Query: 15  FVSSRGIPPEEQSQFLEFQDK----FNKKYSHEEYLERFEIFKSNL-GKIEELNLIAINH 69
+           +V   G  PE+   +L   +     F++  S ++ L R E F+  L G+  + N+I +  
+Sbjct: 190 YVYKNGSEPEDCFWYLNALEAGVSYFSEVLSFKDVLRRSEQFQEILMGRNRKSNVIVMCG 249
+
+Query: 70  KADTKFGVN---KFAD----LSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEE 122
+             +T + V    K AD    +  D F N+Y       F DD    +Y+D+  + ++PPE+
+Sbjct: 250 TPETFYNVKGDLKVADDTVVILVDLFSNHY-------FEDDTRAPEYMDNVLVLTLPPEK 302
+
+
+>sp|P20736|BM86_BOOMI GLYCOPROTEIN ANTIGEN BM86 PRECURSOR (PROTECTIVE ANTIGEN)
+          Length = 650
+
+ Score = 35.2 bits (79), Expect = 0.22
+ Identities = 24/81 (29%), Positives = 36/81 (43%), Gaps = 5/81 (6%)
+
+Query: 151 TTGNVEGQHFISQNKLVSLSEQNLVDC----DHECMEYEGEEACDEGCNGGLQPNAYNYI 206
+           TT N +        KL  + + +  +C    DHEC     +++C E  NG  Q +    +
+Sbjct: 533 TTCNPKEIQECQDKKLECVYKNHKAECECPDDHECYREPAKDSCSEEDNGKCQSSGQRCV 592
+
+Query: 207 IKNG-GIQTESSYPYTAETGT 226
+           I+NG  +  E S   TA T T
+Sbjct: 593 IENGKAVCKEKSEATTAATTT 613
+
+
+>sp|P46992|YJR1_YEAST HYPOTHETICAL 43.0 KD PROTEIN IN CPS1-FPP1 INTERGENIC REGION
+          Length = 396
+
+ Score = 32.0 bits (71), Expect = 1.9
+ Identities = 39/191 (20%), Positives = 77/191 (39%), Gaps = 39/191 (20%)
+
+Query: 77  VNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPPEE-------------- 122
+           VNKF D++++E     + ++      + P+ADYL   F   +  ++              
+Sbjct: 42  VNKFKDITNNESCTCEVGDRVWFSGKNAPLADYLSVHFRGPLKLKQFAFYTSPGFTVNNS 101
+
+Query: 123 QTAFDW----------RTRGAVTPVKNQGQCGSCW-------SFSTTGNVEGQHFISQNK 165
+           +++ DW          +T   VT + + G+   C        S + TG+      ++   
+Sbjct: 102 RSSSDWNRLAYYESSSKTADNVTFLNHGGEASPCLGNALSYASSNGTGSASEATVLADGT 161
+
+Query: 166 LVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETG 225
+           L+S  ++ ++  +  C +   ++ C    +G   P  Y Y    GG  T   + +  E  
+Sbjct: 162 LISSDQEYIIYSNVSCPKSGYDKGCGVYRSG--IPAYYGY----GG--TTKMFLFEFEMP 213
+
+Query: 226 TQCNFNSANIG 236
+           T+   NS++IG
+Sbjct: 214 TETEKNSSSIG 224
+
+
+>sp|P28493|PR5_ARATH PATHOGENESIS-RELATED PROTEIN 5 PRECURSOR (PR-5)
+          Length = 239
+
+ Score = 32.0 bits (71), Expect = 1.9
+ Identities = 24/93 (25%), Positives = 36/93 (37%), Gaps = 7/93 (7%)
+
+Query: 137 VKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNG 196
+           ++  G  G C      G V   +    + L  + + N+V C   C  +  ++ C  G N 
+Sbjct: 137 IRPSGGSGDC---KYAGCVSDLNAACPDMLKVMDQNNVVACKSACERFNTDQYCCRGAND 193
+
+Query: 197 GLQ---PNAYNYIIKNGGIQTESSYPYTAETGT 226
+             +   P  Y+ I KN       SY Y  ET T
+Sbjct: 194 KPETCPPTDYSRIFKN-ACPDAYSYAYDDETST 225
+
+
+>sp|P54634|POLN_LORDV NON-STRUCTURAL POLYPROTEIN [CONTAINS: RNA-DIRECTED RNA POLYMERASE ;
+           THIOL PROTEASE 3C ; HELICASE (2C LIKE PROTEIN)]
+          Length = 1699
+
+ Score = 31.3 bits (69), Expect = 3.2
+ Identities = 13/31 (41%), Positives = 21/31 (66%)
+
+Query: 17  SSRGIPPEEQSQFLEFQDKFNKKYSHEEYLE 47
+           SS+G+  EE  ++   +++ N KYS EEYL+
+Sbjct: 893 SSKGLSDEEYDEYKRIREERNGKYSIEEYLQ 923
+
+
+>sp|Q02521|SPP2_YEAST SPLICEOSOME MATURATION PROTEIN SPP2
+          Length = 185
+
+ Score = 30.9 bits (68), Expect = 4.2
+ Identities = 24/99 (24%), Positives = 47/99 (47%), Gaps = 6/99 (6%)
+
+Query: 30  LEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKF---GVNKF-ADLSS 85
+           L+   K  KK   ++  ++  + K+NL   ++    +++HK  +K     ++KF  D  S
+Sbjct: 6   LKLGSKTLKKNISKKTKKKNSLQKANLFDWDDAETASLSHKPQSKIKIQSIDKFDLDEES 65
+
+Query: 86  DEFKNYYLNNKEAIFT--DDLPVADYLDDEFINSIPPEE 122
+              K   +   E   T  +D P+ +Y+ ++  N +P EE
+Sbjct: 66  SSKKKLVIKLSENADTKKNDAPLVEYVTEKEYNEVPVEE 104
+
+
+>sp|P41901|SPR3_YEAST SPORULATION-SPECIFIC SEPTIN
+          Length = 512
+
+ Score = 30.9 bits (68), Expect = 4.2
+ Identities = 17/58 (29%), Positives = 29/58 (49%), Gaps = 9/58 (15%)
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+           + +NLI +  K+D          L+ +E KN+    +E I   D+PV  +  DE +N+
+Sbjct: 237 KRVNLIPVIAKSDL---------LTKEELKNFKTQVREIIRVQDIPVCFFFGDEVLNA 285
+
+
+>sp|Q01532|BLH1_YEAST CYSTEINE PROTEINASE 1 (Y3) (BLEOMYCIN HYDROLASE) (BLM HYDROLASE)
+          Length = 454
+
+ Score = 30.5 bits (67), Expect = 5.5
+ Identities = 21/66 (31%), Positives = 29/66 (43%), Gaps = 11/66 (16%)
+
+Query: 111 DDEFINS--IPPEEQTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS 168
+           DD  +N   +  ++   F+       TPV NQ   G CW F+ T         +Q +L  
+Sbjct: 36  DDALLNKTRLQKQDNRVFNTVVSTDSTPVTNQKSSGRCWLFAAT---------NQLRLNV 86
+
+Query: 169 LSEQNL 174
+           LSE NL
+Sbjct: 87  LSELNL 92
+
+
+>sp|P24896|NU5M_CAEEL NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 5
+          Length = 527
+
+ Score = 30.5 bits (67), Expect = 5.5
+ Identities = 21/52 (40%), Positives = 26/52 (49%), Gaps = 7/52 (13%)
+
+Query: 44  EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNN 95
+           +YL +  I+K    K  +L L  IN K  T F       LSS  FKNYYL +
+Sbjct: 466 DYLAKNSIYKMKNLKFMDLFLNNINSKGYTLF-------LSSGMFKNYYLKS 510
+
+
+>sp|P25648|SRB8_YEAST SUPPRESSOR OF RNA POLYMERASE B SRB8
+          Length = 1427
+
+ Score = 30.1 bits (66), Expect = 7.2
+ Identities = 22/89 (24%), Positives = 44/89 (48%), Gaps = 10/89 (11%)
+
+Query: 21   IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV--- 77
+            +PP + S F++     +  Y  EE  ++ E F  NLG    + ++ I H+ + K+ +   
+Sbjct: 1314 LPPFQVSSFVKETKLHSGDYGEEEDADQEESFSLNLG----IGIVEIAHENEQKWLIYDK 1369
+
+Query: 78   --NKFADLSSDEFKNYYLNNKEAIFTDDL 104
+              +K+    S E   ++++N    +TDD+
+Sbjct: 1370 KDHKYVCTFSME-PYHFISNYNTKYTDDM 1397
+
+
+>sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C
+          Length = 436
+
+ Score = 30.1 bits (66), Expect = 7.2
+ Identities = 11/20 (55%), Positives = 14/20 (70%)
+
+Query: 311 NMPYWIVKNSWGADWGEQGY 330
+           N   W V+NSWG D G++GY
+Sbjct: 370 NSTKWKVENSWGKDAGQKGY 389
+
+
+>sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 455
+
+ Score = 29.7 bits (65), Expect = 9.4
+ Identities = 10/17 (58%), Positives = 13/17 (75%)
+
+Query: 315 WIVKNSWGADWGEQGYI 331
+           W V+NSWG D G +GY+
+Sbjct: 392 WRVENSWGEDHGHKGYL 408
+
+
+>sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (AMINOPEPTIDASE H)
+          Length = 455
+
+ Score = 29.7 bits (65), Expect = 9.4
+ Identities = 10/19 (52%), Positives = 14/19 (73%)
+
+Query: 315 WIVKNSWGADWGEQGYIYL 333
+           W V+NSWG D G +GY+ +
+Sbjct: 392 WRVENSWGEDRGNKGYLIM 410
+
+
+>sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 454
+
+ Score = 29.7 bits (65), Expect = 9.4
+ Identities = 10/17 (58%), Positives = 13/17 (75%)
+
+Query: 315 WIVKNSWGADWGEQGYI 331
+           W V+NSWG D G +GY+
+Sbjct: 392 WRVENSWGEDHGHKGYL 408
+
+
+  Database: /home/peter/blast/data/swissprot
+    Posted date:  Oct 10, 2000 10:43 AM
+  Number of letters in database: 31,984,247
+  Number of sequences in database:  88,780
+  
+Lambda     K      H
+   0.317    0.136    0.414 
+
+Lambda     K      H
+   0.270   0.0477    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 23348054
+Number of Sequences: 88780
+Number of extensions: 1039466
+Number of successful extensions: 3135
+Number of sequences better than 10.0: 162
+Number of HSP's better than 10.0 without gapping: 118
+Number of HSP's successfully gapped in prelim test: 8
+Number of HSP's that attempted gapping in prelim test: 2557
+Number of HSP's gapped (non-prelim): 148
+length of query: 351
+length of database: 31,984,247
+effective HSP length: 50
+effective length of query: 301
+effective length of database: 27,545,247
+effective search space: 8291119347
+effective search space used: 8291119347
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.6 bits)
+S2: 65 (29.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/phredfile.phd
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/phredfile.phd	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/phredfile.phd	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,784 @@
+BEGIN_SEQUENCE ML4924R
+
+BEGIN_COMMENT
+
+CHROMAT_FILE: ML4924R
+ABI_THUMBPRINT: 0
+PHRED_VERSION: 0.980904.e
+CALL_METHOD: phred
+QUALITY_LEVELS: 99
+TIME: Thu Jun 21 11:27:03 2001
+TRACE_ARRAY_MIN_INDEX: 0
+TRACE_ARRAY_MAX_INDEX: 8792
+CHEM: term
+DYE: big
+
+END_COMMENT
+
+BEGIN_DNA
+a 6 1
+c 6 20
+t 6 17
+t 8 26
+t 8 35
+g 12 48
+g 18 58
+t 16 66
+c 14 83
+g 11 98
+c 9 104
+c 6 122
+t 6 128
+g 9 140
+c 19 147
+a 20 159
+g 32 167
+g 34 178
+t 34 190
+a 39 200
+c 39 208
+c 39 221
+g 29 233
+g 40 243
+t 28 255
+c 21 267
+c 13 277
+g 4 288
+n 4 297
+g 4 298
+a 8 314
+t 8 328
+t 23 339
+c 29 349
+c 39 359
+c 40 369
+g 40 381
+g 40 390
+g 40 401
+t 46 413
+c 34 425
+g 33 436
+a 29 447
+c 29 453
+c 32 466
+c 33 476
+a 34 485
+c 34 492
+g 34 505
+c 34 514
+g 40 527
+t 40 537
+c 34 548
+c 34 559
+g 34 572
+c 34 579
+t 34 592
+t 34 602
+t 39 613
+g 40 625
+c 40 633
+t 40 645
+c 40 656
+a 40 664
+t 40 673
+c 40 685
+t 46 697
+c 51 707
+c 51 718
+c 56 728
+t 51 738
+c 51 749
+t 35 760
+g 35 772
+a 35 783
+c 35 790
+t 35 803
+t 35 813
+t 39 824
+g 40 836
+c 40 844
+a 40 855
+t 40 865
+c 56 876
+c 42 887
+c 51 898
+t 46 909
+t 46 921
+a 46 931
+a 46 940
+t 46 951
+c 46 962
+c 46 974
+t 46 985
+c 40 995
+a 40 1004
+g 40 1015
+a 40 1028
+a 40 1037
+t 40 1048
+g 35 1060
+g 35 1071
+c 35 1080
+g 35 1096
+t 35 1106
+c 35 1116
+a 40 1125
+g 40 1138
+a 40 1150
+g 51 1160
+a 51 1172
+a 51 1182
+g 51 1193
+a 40 1204
+a 35 1214
+g 35 1226
+c 39 1235
+t 39 1250
+c 39 1259
+t 39 1272
+c 40 1282
+g 51 1295
+a 51 1305
+a 51 1315
+c 51 1325
+c 51 1337
+c 51 1348
+a 51 1358
+a 51 1368
+t 51 1381
+g 51 1393
+a 51 1403
+g 51 1415
+g 51 1427
+g 39 1440
+a 39 1450
+t 39 1462
+a 39 1470
+t 39 1483
+c 35 1494
+a 40 1503
+a 40 1514
+g 40 1528
+g 40 1540
+t 40 1552
+t 39 1565
+c 39 1574
+a 39 1583
+g 39 1598
+a 38 1609
+a 40 1619
+g 39 1632
+c 39 1642
+t 39 1657
+c 39 1666
+g 39 1680
+t 40 1691
+t 45 1703
+c 51 1713
+t 51 1726
+c 51 1736
+a 51 1746
+a 51 1757
+c 51 1767
+a 45 1778
+t 45 1791
+c 45 1802
+t 45 1816
+c 45 1826
+c 45 1838
+g 45 1852
+t 51 1863
+t 51 1875
+g 56 1887
+g 43 1899
+t 43 1911
+g 43 1924
+a 43 1933
+g 43 1946
+a 43 1958
+g 56 1970
+t 56 1982
+g 56 1994
+g 56 2006
+t 56 2017
+g 51 2030
+a 43 2041
+t 43 2053
+c 43 2063
+g 43 2079
+t 43 2089
+c 43 2100
+t 43 2114
+c 43 2124
+a 43 2135
+c 43 2144
+c 43 2157
+c 43 2169
+g 43 2184
+t 43 2195
+g 43 2207
+c 43 2216
+c 43 2229
+t 43 2244
+c 43 2253
+c 43 2265
+a 46 2277
+a 56 2288
+g 56 2299
+g 56 2311
+t 56 2324
+t 56 2336
+t 56 2348
+t 56 2360
+g 56 2373
+g 51 2385
+a 51 2397
+a 51 2408
+c 51 2418
+a 43 2430
+g 43 2443
+c 45 2454
+t 45 2469
+c 45 2480
+a 45 2491
+g 56 2503
+t 56 2516
+g 56 2528
+g 51 2540
+t 51 2552
+c 51 2564
+a 51 2574
+a 51 2585
+a 51 2598
+c 45 2608
+a 45 2620
+c 45 2630
+c 45 2644
+t 45 2658
+g 51 2671
+t 51 2682
+c 51 2693
+t 51 2707
+t 45 2719
+c 45 2730
+t 51 2743
+c 51 2753
+c 51 2765
+a 51 2775
+a 51 2787
+g 43 2798
+g 43 2810
+c 43 2822
+g 43 2837
+a 43 2847
+g 43 2859
+g 51 2872
+t 51 2884
+a 51 2894
+c 43 2904
+a 43 2917
+c 43 2927
+t 43 2943
+g 43 2956
+t 43 2967
+g 51 2980
+a 51 2992
+g 45 3003
+g 45 3017
+t 45 3029
+c 45 3039
+t 45 3053
+t 45 3065
+t 51 3077
+c 51 3088
+g 51 3102
+g 51 3113
+t 51 3125
+a 43 3136
+t 43 3148
+t 43 3161
+a 43 3170
+g 43 3183
+a 43 3196
+c 43 3205
+g 43 3220
+t 43 3231
+a 45 3243
+a 56 3254
+t 56 3266
+g 56 3279
+a 56 3290
+a 56 3302
+a 56 3314
+a 51 3325
+g 51 3337
+a 51 3350
+t 43 3360
+t 43 3373
+g 43 3385
+c 43 3394
+g 43 3410
+t 43 3421
+g 43 3434
+c 43 3443
+t 43 3458
+a 43 3468
+c 43 3477
+g 43 3493
+t 43 3505
+c 43 3515
+a 43 3527
+c 43 3536
+c 43 3550
+g 43 3565
+t 43 3576
+g 43 3588
+a 45 3601
+g 45 3611
+a 45 3625
+g 45 3635
+g 45 3648
+a 43 3660
+g 43 3672
+a 38 3684
+c 38 3692
+a 38 3705
+a 43 3717
+g 56 3729
+g 56 3741
+c 56 3752
+a 56 3765
+a 51 3776
+t 51 3789
+g 51 3801
+c 51 3811
+a 43 3824
+g 43 3836
+c 43 3846
+t 43 3862
+t 43 3874
+c 43 3884
+t 51 3897
+t 51 3909
+g 56 3922
+a 56 3933
+g 56 3944
+a 56 3957
+g 56 3968
+t 43 3980
+g 43 3992
+g 43 4004
+c 43 4013
+t 43 4029
+t 43 4041
+g 43 4053
+a 56 4064
+a 56 4075
+a 51 4087
+g 51 4098
+t 51 4111
+c 51 4121
+a 51 4132
+a 51 4144
+g 56 4155
+g 56 4168
+a 51 4181
+g 51 4192
+t 43 4205
+a 43 4215
+c 43 4225
+g 43 4240
+a 43 4250
+g 43 4263
+c 43 4274
+t 45 4289
+g 45 4301
+t 45 4312
+t 56 4324
+g 56 4336
+a 56 4347
+g 56 4358
+g 56 4371
+a 56 4383
+g 56 4394
+g 51 4407
+a 43 4419
+a 43 4430
+c 43 4440
+t 43 4455
+t 43 4467
+c 43 4477
+a 51 4488
+g 51 4500
+t 51 4513
+g 51 4525
+a 51 4536
+c 45 4546
+a 45 4558
+c 45 4569
+t 51 4583
+g 45 4595
+g 45 4607
+c 45 4618
+t 45 4633
+g 45 4645
+t 45 4656
+t 56 4668
+t 56 4680
+t 56 4692
+g 56 4704
+g 56 4716
+a 56 4727
+t 56 4738
+t 56 4751
+c 56 4762
+g 56 4775
+g 56 4787
+t 51 4798
+a 51 4809
+t 51 4821
+c 51 4831
+c 51 4844
+a 51 4854
+g 51 4866
+g 51 4879
+a 56 4892
+g 51 4903
+c 51 4914
+a 51 4926
+c 51 4936
+a 51 4948
+t 51 4960
+t 56 4973
+g 56 4985
+a 56 4997
+t 56 5008
+c 56 5020
+t 56 5033
+t 56 5045
+g 56 5057
+g 56 5069
+a 56 5080
+a 40 5091
+t 40 5104
+c 40 5115
+a 40 5124
+a 40 5136
+g 40 5149
+t 40 5162
+a 40 5172
+c 40 5182
+g 40 5197
+a 40 5207
+t 40 5220
+c 51 5231
+c 56 5243
+t 56 5256
+t 51 5268
+c 51 5279
+t 51 5292
+a 51 5302
+c 51 5312
+g 51 5325
+g 51 5338
+g 51 5350
+t 51 5362
+a 45 5372
+t 45 5384
+c 45 5395
+t 45 5409
+a 45 5419
+t 45 5431
+g 51 5444
+g 56 5456
+a 56 5468
+a 56 5479
+t 56 5492
+g 40 5503
+g 39 5516
+a 39 5528
+c 39 5537
+t 39 5552
+t 39 5564
+c 39 5575
+t 40 5588
+a 40 5597
+t 40 5609
+g 40 5622
+t 56 5634
+t 56 5646
+g 51 5658
+t 51 5669
+t 51 5682
+c 51 5692
+t 51 5705
+t 51 5718
+g 51 5730
+a 45 5741
+a 40 5752
+c 40 5762
+g 37 5776
+c 37 5785
+c 40 5798
+c 40 5810
+g 56 5823
+g 56 5834
+g 51 5846
+a 51 5859
+t 40 5870
+a 40 5880
+c 40 5890
+c 40 5904
+g 40 5918
+t 51 5929
+g 51 5941
+t 37 5953
+t 37 5966
+g 37 5978
+c 37 5987
+a 37 5999
+c 40 6009
+g 40 6023
+t 34 6035
+c 34 6047
+g 29 6060
+c 29 6068
+c 29 6083
+g 29 6096
+t 32 6107
+a 29 6118
+g 25 6128
+g 19 6142
+t 27 6154
+g 27 6167
+c 32 6176
+a 32 6187
+a 34 6199
+g 40 6212
+a 34 6224
+c 40 6234
+t 34 6248
+c 35 6258
+g 35 6272
+c 35 6281
+g 35 6296
+t 39 6307
+t 40 6320
+g 56 6331
+g 56 6342
+g 56 6355
+a 51 6367
+t 51 6378
+t 46 6391
+c 46 6401
+a 42 6411
+g 42 6424
+c 42 6435
+a 46 6447
+t 40 6459
+a 37 6469
+g 37 6482
+a 40 6494
+g 29 6506
+t 29 6518
+t 29 6531
+a 29 6539
+c 29 6549
+c 27 6563
+a 32 6573
+a 29 6585
+g 29 6596
+g 34 6609
+a 48 6622
+t 48 6633
+g 48 6645
+a 32 6656
+t 34 6668
+g 34 6681
+c 34 6690
+c 31 6704
+a 31 6714
+t 37 6726
+g 46 6739
+a 48 6750
+a 48 6762
+g 40 6774
+t 40 6786
+g 40 6797
+g 27 6810
+t 25 6821
+t 21 6834
+c 16 6842
+c 16 6856
+a 18 6865
+a 19 6877
+g 23 6889
+t 27 6901
+t 25 6915
+a 29 6923
+a 24 6936
+g 29 6947
+t 32 6960
+a 29 6970
+c 29 6980
+g 25 6995
+a 25 7004
+a 21 7018
+g 15 7026
+g 15 7041
+t 23 7053
+g 27 7065
+t 27 7075
+t 27 7089
+a 27 7098
+t 25 7110
+c 22 7120
+c 24 7133
+t 29 7147
+c 32 7157
+a 32 7168
+a 40 7179
+c 29 7189
+a 29 7200
+a 29 7211
+g 48 7224
+t 48 7237
+c 40 7248
+t 40 7261
+c 40 7272
+a 29 7282
+g 29 7294
+a 25 7306
+a 29 7317
+c 29 7327
+a 40 7338
+t 40 7350
+c 32 7362
+a 32 7372
+c 32 7383
+t 32 7397
+g 32 7409
+g 25 7422
+t 25 7432
+t 25 7445
+g 29 7457
+a 40 7467
+a 36 7479
+g 40 7491
+a 36 7503
+g 36 7514
+t 37 7525
+t 33 7537
+t 46 7549
+g 24 7561
+t 24 7571
+t 24 7584
+t 32 7596
+c 32 7606
+t 40 7619
+t 34 7630
+t 22 7642
+t 25 7654
+c 25 7665
+a 27 7674
+t 25 7685
+t 22 7698
+t 29 7709
+t 29 7721
+g 25 7734
+t 21 7744
+t 21 7757
+t 22 7767
+t 29 7779
+g 24 7791
+t 24 7802
+g 24 7814
+g 31 7827
+t 31 7838
+g 28 7850
+t 28 7861
+t 24 7873
+g 19 7884
+g 19 7897
+c 23 7906
+t 27 7921
+c 25 7931
+t 22 7943
+t 29 7954
+t 34 7965
+t 32 7976
+t 32 7988
+t 32 8000
+a 25 8009
+t 18 8020
+a 18 8031
+a 18 8039
+t 18 8053
+t 23 8066
+t 32 8077
+g 26 8089
+c 25 8099
+g 19 8112
+g 15 8124
+t 15 8134
+t 15 8148
+c 23 8157
+t 23 8170
+g 22 8182
+g 34 8193
+a 37 8205
+t 40 8216
+g 32 8227
+a 32 8238
+t 29 8250
+a 22 8259
+a 18 8269
+g 18 8281
+g 13 8294
+c 22 8305
+t 16 8318
+t 20 8328
+t 25 8341
+g 29 8353
+t 24 8364
+t 24 8376
+g 25 8388
+t 19 8398
+t 19 8412
+g 25 8422
+t 24 8433
+g 21 8444
+g 21 8455
+t 27 8467
+g 21 8479
+g 20 8491
+t 26 8502
+a 25 8512
+g 18 8524
+a 15 8536
+a 15 8545
+t 10 8558
+c 13 8566
+c 13 8581
+t 17 8593
+c 21 8603
+a 22 8613
+a 25 8624
+g 29 8635
+t 25 8647
+a 25 8657
+c 25 8666
+c 24 8678
+c 19 8691
+t 10 8703
+c 10 8712
+g 11 8726
+a 17 8736
+g 17 8747
+a 12 8759
+t 19 8768
+a 16 8778
+t 11 8787
+END_DNA
+
+END_SEQUENCE


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/phredfile.phd
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist-36.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist-36.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist-36.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+   39
+CBG01299    0.0000  0.0817  0.0592  0.3233  1.1054  1.8888  2.1754  1.8934
+  2.1527  1.7524  3.0956  2.1666  2.5412  1.7686  2.7063  4.1339  4.2758
+  2.0880  2.7562  2.5353  2.8402  2.6241  2.5270  3.2007  2.5240  2.7543
+  2.8516  2.6939  3.2467  3.5679  3.6727  4.0515  4.1651  5.2267  4.8451
+  4.3745  3.2268  5.0186  4.7793
+CBG01300    0.0817  0.0000  0.0950  0.3111  1.1311  1.9558  2.2197  1.9320
+  2.0299  1.8101  3.0581  2.2421  2.4764  1.7550  2.6971  4.5407  4.6271
+  2.1786  2.8859  2.4726  2.8045  2.6099  2.4506  3.1862  2.5982  2.7305
+  2.7926  2.8251  3.1864  3.5483  3.6954  4.0547  3.9877  5.3975  4.5982
+  4.3584  3.0069  4.7190  4.7592
+CBG01310    0.0592  0.0950  0.0000  0.3044  1.0783  1.7794  2.0800  1.7604
+  2.1519  1.6822  3.0648  2.2776  2.3625  1.7116  2.5222  4.0405  4.0662
+  1.8746  2.6138  2.4393  2.6637  2.3819  2.3785  2.9213  2.4552  2.5751
+  2.7080  2.5817  3.0913  3.3989  3.5036  3.8942  4.1556  4.7610  4.0620
+  3.9918  3.1087  4.5302  4.1661
+CBG01309    0.3233  0.3111  0.3044  0.0000  1.2678  2.0095  2.2611  1.7987
+  2.2973  2.5338  3.3318  2.2343  2.4923  2.0327  2.8308  4.2220  4.5559
+  2.1512  3.0813  2.4151  2.6066  2.6837  2.7222  3.2280  3.2690  2.9720
+  2.7759  2.9316  3.0455  4.3733  3.9787  4.8184  4.8889  4.9857  5.9673
+  4.5837  4.1999  5.7211  4.7700
+CBG22544    1.1054  1.1311  1.0783  1.2678  0.0000  2.3005  2.2678  2.0929
+  2.6664  2.3400  3.7396  2.8023  2.5696  2.4446  2.8767  4.5479  4.1948
+  2.1784  3.4408  2.5884  2.8710  2.8704  2.8186  2.9751  3.2856  3.2412
+  3.3431  3.0718  3.6222  4.8202  4.4527  4.5427  4.2937  4.9768  4.6224
+  4.3548  3.4335  5.1190  4.5442
+CBG20585    1.8888  1.9558  1.7794  2.0095  2.3005  0.0000  1.6243  1.7037
+  2.5358  2.9125  2.9857  3.8051  2.8146  2.1396  2.9517  3.6259  3.3024
+  2.3407  2.5640  2.9741  2.9219  2.8086  2.7746  2.4331  2.4465  3.5125
+  2.5244  3.2094  3.1828  4.5101  4.5919  4.4979  3.8859  5.1380  5.1993
+  5.9836  4.3432  6.4117  4.8479
+CBG23176    2.1754  2.2197  2.0800  2.2611  2.2678  1.6243  0.0000  1.3465
+  2.4525  2.9269  3.1125  3.2487  2.3650  1.9584  3.1241  3.3811  3.2762
+  2.1610  1.9983  2.8666  3.3917  2.4625  2.5294  2.8067  2.3930  3.3267
+  2.4879  3.3610  3.3407  4.2896  4.8651  4.7867  3.7278  4.2166  5.4427
+  5.7439  5.0511  5.4130  4.2919
+CBG23612    1.8934  1.9320  1.7604  1.7987  2.0929  1.7037  1.3465  0.0000
+  2.7833  2.7880  2.7217  3.0016  1.9889  1.8519  2.6404  3.5699  3.1966
+  2.0200  2.1845  2.2500  2.5397  2.1483  2.0645  2.8851  2.3342  2.9882
+  2.6273  2.9374  2.6007  4.2752  3.5955  3.2871  3.2864  4.3964  4.2783
+  5.1700  4.4989  4.7197  3.4620
+CBG10552    2.1527  2.0299  2.1519  2.2973  2.6664  2.5358  2.4525  2.7833
+  0.0000  1.2087  3.1408  2.2870  3.5466  2.7371  4.3457  3.9308  3.6277
+  2.4312  3.0315  2.6773  3.5620  3.1399  3.6217  3.0567  3.2404  3.3961
+  3.1915  3.4462  3.8474  4.9926  4.9183  4.3491  4.5937  5.6950  5.0554
+  7.6270  3.8392  5.2635  5.0165
+CBG23530    1.7524  1.8101  1.6822  2.5338  2.3400  2.9125  2.9269  2.7880
+  1.2087  0.0000  4.0816  2.7703  3.6054  2.9679  4.3304  4.8338  4.6208
+  2.6064  3.3095  3.0407  4.5735  3.9027  3.4701  3.8061  3.7082  3.5302
+  3.4530  4.6425  3.7924  4.9159  5.1012  4.8005  5.4006  5.4801  5.8932
+  9.8750  5.1466  6.8839  6.2445
+CBG01466    3.0956  3.0581  3.0648  3.3318  3.7396  2.9857  3.1125  2.7217
+  3.1408  4.0816  0.0000  1.8592  2.3738  1.8216  2.3118  5.5897  4.6319
+  2.8710  2.6876  4.1532  3.7306  3.1374  2.7316  3.1479  2.2777  3.9899
+  2.7702  3.2043  3.3944  5.5110  4.6346  4.3744  4.1109  4.6211  4.4749
+  5.8343  5.5416  5.8238  5.8422
+CBG22548    2.1666  2.2421  2.2776  2.2343  2.8023  3.8051  3.2487  3.0016
+  2.2870  2.7703  1.8592  0.0000  2.8299  2.4083  2.9370  3.5815  3.5466
+  2.8880  3.0227  3.5764  3.0695  3.2547  3.5399  3.3406  2.9640  4.4963
+  3.0453  3.7199  3.9175  8.1129  5.2160  5.6628  4.6900  6.2934  5.3828
+  8.8144  5.2604  6.1903  8.6700
+CBG05066    2.5412  2.4764  2.3625  2.4923  2.5696  2.8146  2.3650  1.9889
+  3.5466  3.6054  2.3738  2.8299  0.0000  1.0051  1.9109  3.8239  3.8704
+  2.4515  2.7870  2.7949  2.7684  2.4166  2.5454  2.5808  2.4503  3.1563
+  2.8138  2.6766  3.0600  4.5917  4.2574  3.1549  3.9069  4.6103  4.4913
+  4.4304  4.7055  4.9398  5.0936
+CBG05832    1.7686  1.7550  1.7116  2.0327  2.4446  2.1396  1.9584  1.8519
+  2.7371  2.9679  1.8216  2.4083  1.0051  0.0000  1.5844  3.1844  3.3444
+  2.1097  2.3244  2.5432  2.1652  2.0448  2.0314  2.3851  1.9003  2.5341
+  2.3383  2.5572  2.4384  3.7584  3.8479  2.5321  3.7703  3.7069  4.3240
+  4.1994  3.9430  4.4252  3.6651
+CBG09376    2.7063  2.6971  2.5222  2.8308  2.8767  2.9517  3.1241  2.6404
+  4.3457  4.3304  2.3118  2.9370  1.9109  1.5844  0.0000  4.5978  4.3393
+  2.5303  2.3488  3.8541  3.9204  3.0260  2.7213  2.8089  2.3821  4.2228
+  2.5335  3.4108  2.7139  5.0363  4.6480  3.5726  4.1650  4.9869  5.1834
+  4.9909  4.8281  5.5340  4.0523
+CBG11598    4.1339  4.5407  4.0405  4.2220  4.5479  3.6259  3.3811  3.5699
+  3.9308  4.8338  5.5897  3.5815  3.8239  3.1844  4.5978  0.0000  0.4235
+  4.0731  4.2514  5.0939  3.5672  4.0178  4.0286  3.8919  3.8081  6.1309
+  5.2024  5.0333  6.3027  4.9386  7.1985  4.2140  5.1831  6.5824  8.8471
+ 33.9888  5.4456 35.2800 11.7598
+CBG11599    4.2758  4.6271  4.0662  4.5559  4.1948  3.3024  3.2762  3.1966
+  3.6277  4.6208  4.6319  3.5466  3.8704  3.3444  4.3393  0.4235  0.0000
+  3.9215  4.4659  5.1513  3.8787  3.8038  3.9034  3.7433  3.8979  5.5936
+  4.5541  4.6220  6.3167  4.0992  7.2295  3.7229  4.7145  6.2658  7.1633
+ 18.8916  5.1039  7.1689 13.6941
+CBG17648    2.0880  2.1786  1.8746  2.1512  2.1784  2.3407  2.1610  2.0200
+  2.4312  2.6064  2.8710  2.8880  2.4515  2.1097  2.5303  4.0731  3.9215
+  0.0000  2.8196  2.5896  2.7113  2.4208  2.4304  2.6635  2.7791  2.6486
+  3.5471  2.6170  4.4278  3.8149  3.8812  3.2632  3.6267  4.5500  3.5307
+  4.3285  3.5710  4.5875  4.1427
+CBG24285    2.7562  2.8859  2.6138  3.0813  3.4408  2.5640  1.9983  2.1845
+  3.0315  3.3095  2.6876  3.0227  2.7870  2.3244  2.3488  4.2514  4.4659
+  2.8196  0.0000  2.6854  2.8606  2.5016  2.5539  2.5206  2.3332  3.3910
+  2.4810  2.6546  2.9475  3.4766  3.8721  4.2607  4.1390  4.3110  3.8713
+  4.8306  3.7968  4.2638  4.8254
+CBG11540    2.5353  2.4726  2.4393  2.4151  2.5884  2.9741  2.8666  2.2500
+  2.6773  3.0407  4.1532  3.5764  2.7949  2.5432  3.8541  5.0939  5.1513
+  2.5896  2.6854  0.0000  3.7728  3.0711  2.9263  3.0104  3.1937  3.2678
+  2.5709  4.1542  3.2024  4.6574  4.1274  5.1118  4.7469  4.7083  6.1301
+  8.2226  5.4888  6.7308  6.5432
+CBG09384    2.8402  2.8045  2.6637  2.6066  2.8710  2.9219  3.3917  2.5397
+  3.5620  4.5735  3.7306  3.0695  2.7684  2.1652  3.9204  3.5672  3.8787
+  2.7113  2.8606  3.7728  0.0000  2.8881  2.8447  3.0452  2.9702  3.7626
+  2.9891  4.3174  3.5895  4.9629  4.7565  3.8767  4.1603  5.2401  9.4937
+  4.7434  4.7223  5.4665  6.3838
+CBG09498    2.6241  2.6099  2.3819  2.6837  2.8704  2.8086  2.4625  2.1483
+  3.1399  3.9027  3.1374  3.2547  2.4166  2.0448  3.0260  4.0178  3.8038
+  2.4208  2.5016  3.0711  2.8881  0.0000  2.3759  2.8964  2.3451  3.4168
+  2.3817  3.8947  3.3078  4.1428  4.1551  3.5709  3.4280  4.6755  5.0728
+  5.2081  5.4351  6.3079  4.8578
+CBG19005    2.5270  2.4506  2.3785  2.7222  2.8186  2.7746  2.5294  2.0645
+  3.6217  3.4701  2.7316  3.5399  2.5454  2.0314  2.7213  4.0286  3.9034
+  2.4304  2.5539  2.9263  2.8447  2.3759  0.0000  2.9129  2.1658  3.9315
+  2.3019  3.1963  2.8577  3.9306  3.9931  2.8974  3.3150  3.9219  5.4877
+  5.5611  4.3223  4.4717  4.4160
+CBG23622    3.2007  3.1862  2.9213  3.2280  2.9751  2.4331  2.8067  2.8851
+  3.0567  3.8061  3.1479  3.3406  2.5808  2.3851  2.8089  3.8919  3.7433
+  2.6635  2.5206  3.0104  3.0452  2.8964  2.9129  0.0000  2.2713  4.1687
+  2.8117  3.8018  2.8588  4.4209  3.9024  3.2947  4.4194  4.2776  4.0199
+  4.8392  4.3750  5.5841  5.4572
+CBG19006    2.5240  2.5982  2.4552  3.2690  3.2856  2.4465  2.3930  2.3342
+  3.2404  3.7082  2.2777  2.9640  2.4503  1.9003  2.3821  3.8081  3.8979
+  2.7791  2.3332  3.1937  2.9702  2.3451  2.1658  2.2713  0.0000  3.1393
+  2.4880  3.0211  2.9315  3.9284  3.5686  3.8554  3.6731  4.7036  4.0803
+  4.3514  4.1644  4.2497  4.3599
+CBG05117    2.7543  2.7305  2.5751  2.9720  3.2412  3.5125  3.3267  2.9882
+  3.3961  3.5302  3.9899  4.4963  3.1563  2.5341  4.2228  6.1309  5.5936
+  2.6486  3.3910  3.2678  3.7626  3.4168  3.9315  4.1687  3.1393  0.0000
+  4.0873  4.8328  3.1335  4.9343  4.1052  3.7843  4.0682  4.2055  5.1586
+  6.8573  4.2813  5.7905  4.5807
+CBG06938    2.8516  2.7926  2.7080  2.7759  3.3431  2.5244  2.4879  2.6273
+  3.1915  3.4530  2.7702  3.0453  2.8138  2.3383  2.5335  5.2024  4.5541
+  3.5471  2.4810  2.5709  2.9891  2.3817  2.3019  2.8117  2.4880  4.0873
+  0.0000  2.9802  3.2309  3.9309  4.7128  4.4982  4.4156  5.4946  4.9963
+  6.0390  4.3920  5.3974  5.2162
+CBG05569    2.6939  2.8251  2.5817  2.9316  3.0718  3.2094  3.3610  2.9374
+  3.4462  4.6425  3.2043  3.7199  2.6766  2.5572  3.4108  5.0333  4.6220
+  2.6170  2.6546  4.1542  4.3174  3.8947  3.1963  3.8018  3.0211  4.8328
+  2.9802  0.0000  3.7514  4.7160  4.5549  4.3316  3.8211  5.9466  5.4133
+  5.8945  5.1835  4.8958  5.1716
+CBG01473    3.2467  3.1864  3.0913  3.0455  3.6222  3.1828  3.3407  2.6007
+  3.8474  3.7924  3.3944  3.9175  3.0600  2.4384  2.7139  6.3027  6.3167
+  4.4278  2.9475  3.2024  3.5895  3.3078  2.8577  2.8588  2.9315  3.1335
+  3.2309  3.7514  0.0000  2.8729  3.6229  3.3751  3.2855  3.8873  3.2016
+  4.4501  4.1784  5.5009  6.1935
+CBG20565    3.5679  3.5483  3.3989  4.3733  4.8202  4.5101  4.2896  4.2752
+  4.9926  4.9159  5.5110  8.1129  4.5917  3.7584  5.0363  4.9386  4.0992
+  3.8149  3.4766  4.6574  4.9629  4.1428  3.9306  4.4209  3.9284  4.9343
+  3.9309  4.7160  2.8729  0.0000  2.8421  2.9834  3.0900  3.1468  4.3755
+  5.5408  4.0756  4.7912  4.9496
+CBG08040    3.6727  3.6954  3.5036  3.9787  4.4527  4.5919  4.8651  3.5955
+  4.9183  5.1012  4.6346  5.2160  4.2574  3.8479  4.6480  7.1985  7.2295
+  3.8812  3.8721  4.1274  4.7565  4.1551  3.9931  3.9024  3.5686  4.1052
+  4.7128  4.5549  3.6229  2.8421  0.0000  2.5113  3.0329  2.6063  3.7315
+  6.1202  4.1715  4.5099  6.1951
+CBG11008    4.0515  4.0547  3.8942  4.8184  4.5427  4.4979  4.7867  3.2871
+  4.3491  4.8005  4.3744  5.6628  3.1549  2.5321  3.5726  4.2140  3.7229
+  3.2632  4.2607  5.1118  3.8767  3.5709  2.8974  3.2947  3.8554  3.7843
+  4.4982  4.3316  3.3751  2.9834  2.5113  0.0000  3.0012  3.5271  5.2530
+  7.9480  3.9136  4.1362  4.1935
+CBG19232    4.1651  3.9877  4.1556  4.8889  4.2937  3.8859  3.7278  3.2864
+  4.5937  5.4006  4.1109  4.6900  3.9069  3.7703  4.1650  5.1831  4.7145
+  3.6267  4.1390  4.7469  4.1603  3.4280  3.3150  4.4194  3.6731  4.0682
+  4.4156  3.8211  3.2855  3.0900  3.0329  3.0012  0.0000  3.0649  5.7627
+  7.8215  4.0200  4.3420  6.2253
+CBG09297    5.2267  5.3975  4.7610  4.9857  4.9768  5.1380  4.2166  4.3964
+  5.6950  5.4801  4.6211  6.2934  4.6103  3.7069  4.9869  6.5824  6.2658
+  4.5500  4.3110  4.7083  5.2401  4.6755  3.9219  4.2776  4.7036  4.2055
+  5.4946  5.9466  3.8873  3.1468  2.6063  3.5271  3.0649  0.0000  4.4712
+  7.1902  4.4573  4.4089  5.8707
+CBG17433    4.8451  4.5982  4.0620  5.9673  4.6224  5.1993  5.4427  4.2783
+  5.0554  5.8932  4.4749  5.3828  4.4913  4.3240  5.1834  8.8471  7.1633
+  3.5307  3.8713  6.1301  9.4937  5.0728  5.4877  4.0199  4.0803  5.1586
+  4.9963  5.4133  3.2016  4.3755  3.7315  5.2530  5.7627  4.4712  0.0000
+  8.9961  4.6166  4.4816  5.4284
+CBG23193    4.3745  4.3584  3.9918  4.5837  4.3548  5.9836  5.7439  5.1700
+  7.6270  9.8750  5.8343  8.8144  4.4304  4.1994  4.9909 33.9888 18.8916
+  4.3285  4.8306  8.2226  4.7434  5.2081  5.5611  4.8392  4.3514  6.8573
+  6.0390  5.8945  4.4501  5.5408  6.1202  7.9480  7.8215  7.1902  8.9961
+  0.0000  7.4108  6.0423  6.4791
+CBG22788    3.2268  3.0069  3.1087  4.1999  3.4335  4.3432  5.0511  4.4989
+  3.8392  5.1466  5.5416  5.2604  4.7055  3.9430  4.8281  5.4456  5.1039
+  3.5710  3.7968  5.4888  4.7223  5.4351  4.3223  4.3750  4.1644  4.2813
+  4.3920  5.1835  4.1784  4.0756  4.1715  3.9136  4.0200  4.4573  4.6166
+  7.4108  0.0000  5.3195  4.4669
+CBG22521    5.0186  4.7190  4.5302  5.7211  5.1190  6.4117  5.4130  4.7197
+  5.2635  6.8839  5.8238  6.1903  4.9398  4.4252  5.5340 35.2800  7.1689
+  4.5875  4.2638  6.7308  5.4665  6.3079  4.4717  5.5841  4.2497  5.7905
+  5.3974  4.8958  5.5009  4.7912  4.5099  4.1362  4.3420  4.4089  4.4816
+  6.0423  5.3195  0.0000  5.3441
+CBG00435    4.7793  4.7592  4.1661  4.7700  4.5442  4.8479  4.2919  3.4620
+  5.0165  6.2445  5.8422  8.6700  5.0936  3.6651  4.0523 11.7598 13.6941
+  4.1427  4.8254  6.5432  6.3838  4.8578  4.4160  5.4572  4.3599  4.5807
+  5.2162  5.1716  6.1935  4.9496  6.1951  4.1935  6.2253  5.8707  5.4284
+  6.4791  4.4669  5.3441  0.0000

Added: trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/phylipdist.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+    5
+Alpha          0.00000  4.23419  3.63330  6.20865  3.45431
+Beta           4.23419  0.00000  3.49289  3.36540  4.29179
+Gamma          3.63330  3.49289  0.00000  3.68733  5.84929
+Delta          6.20865  3.36540  3.68733  0.00000  4.43345
+Epsilon        3.45431  4.29179  5.84929  4.43345  0.00000

Added: trunk/packages/bioperl/branches/upstream/current/t/data/pictogram.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/pictogram.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/pictogram.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+>seq1
+AGCGTCACA
+>seq2
+ATCGTACTC
+>seq3
+GTGTAATTG
+>seq3
+GTGTAATTG
+>seq3
+GTGTAATTG
+>seq3
+GTGTAATTG
+>seq3
+GTGTAATTG
+>seq3
+GTGTAATTG
+>seq4
+CTCGTACTG
+>seq4
+CTCGTACTG
+>seq4
+CTCGTACTG
+>seq4
+CTCGTACTG
+>seq4
+CTCGTACTG
+>seq4
+CTCGTACTG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/plague_yeast.bls.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/plague_yeast.bls.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/plague_yeast.bls.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,383 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763811|emb|CAB53164.1| putative transposase [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>340</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>8.07438e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763812|emb|CAB53165.1| putative ATP-binding protein [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>260</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>5.66408e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763813|emb|CAB53166.1| putative replication regulatory protein [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>64</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>6.53308e+07</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763814|emb|CAB53167.1| pesticin immunity protein [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>141</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>2.68243e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763815|emb|CAB53168.1| pesticin [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>357</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_hits>
+        <Hit>
+          <Hit_num>1</Hit_num>
+          <Hit_id>gi|6320063|ref|NP_010143.1|</Hit_id>
+          <Hit_def>glucose permease; Rgt2p</Hit_def>
+          <Hit_accession>NP_010143</Hit_accession>
+          <Hit_len>763</Hit_len>
+          <Hit_hsps>
+            <Hsp>
+              <Hsp_num>1</Hsp_num>
+              <Hsp_bit-score>33.113</Hsp_bit-score>
+              <Hsp_score>74</Hsp_score>
+              <Hsp_evalue>0.0893657</Hsp_evalue>
+              <Hsp_query-from>106</Hsp_query-from>
+              <Hsp_query-to>233</Hsp_query-to>
+              <Hsp_hit-from>573</Hsp_hit-from>
+              <Hsp_hit-to>694</Hsp_hit-to>
+              <Hsp_pattern-from>0</Hsp_pattern-from>
+              <Hsp_pattern-to>0</Hsp_pattern-to>
+              <Hsp_query-frame>1</Hsp_query-frame>
+              <Hsp_hit-frame>1</Hsp_hit-frame>
+              <Hsp_identity>37</Hsp_identity>
+              <Hsp_positive>49</Hsp_positive>
+              <Hsp_gaps>34</Hsp_gaps>
+              <Hsp_align-len>142</Hsp_align-len>
+              <Hsp_density>0</Hsp_density>
+              <Hsp_qseq>KVYRVMVLEGTIAESIEHLDKKENEDILNNNRNRIVLADNTVINFDNISQLKEFLRRSVNIVDHDIFSSNGFEG--------------FNPTSHFPSNPSSDYFNSTGVTFGSGVDLGQRSKQDLLNDGVPQYIADRLDGYY</Hsp_qseq>
+              <Hsp_hseq>KKIRKRCLAFPISQQIEMKTNIKNAGKLDNNNSPIVQDDS-----HNIIDVDGFLENQIQSNDHMIAADKGSGSLVNIIDTAPLTSTEFKPVEHPPVNY---------------VDLGNGLGLNTYNRGPPSIISDSTDEFY</Hsp_hseq>
+              <Hsp_midline>K  R   L   I++ IE     +N   L+NN + IV  D+      NI  +  FL   +   DH I +  G                 F P  H P N                VDLG     +  N G P  I+D  D +Y</Hsp_midline>
+            </Hsp>
+          </Hit_hsps>
+        </Hit>
+      </Iteration_hits>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>8.3019e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763816|emb|CAB53169.1| hypothetical protein [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>138</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>2.46958e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763817|emb|CAB53170.1| coagulase/fibrinolysin precursor [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>312</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>7.18398e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763818|emb|CAB53171.1| putative transcriptional regulator [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>99</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>1.39258e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd"><BlastOutput>
+  <BlastOutput_program>blastp</BlastOutput_program>
+  <BlastOutput_version>blastp 2.1.3 [Apr-11-2001]</BlastOutput_version>
+  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;,  Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
+  <BlastOutput_db>yeast.aa</BlastOutput_db>
+  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
+  <BlastOutput_query-def>gi|5763819|emb|CAB53172.1| hypothetical protein [Yersinia pestis]</BlastOutput_query-def>
+  <BlastOutput_query-len>115</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_matrix>BLOSUM62</Parameters_matrix>
+      <Parameters_expect>0.1</Parameters_expect>
+      <Parameters_include>0</Parameters_include>
+      <Parameters_sc-match>0</Parameters_sc-match>
+      <Parameters_sc-mismatch>0</Parameters_sc-mismatch>
+      <Parameters_gap-open>11</Parameters_gap-open>
+      <Parameters_gap-extend>1</Parameters_gap-extend>
+      <Parameters_filter>S</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+  <BlastOutput_iterations>
+    <Iteration>
+      <Iteration_iter-num>1</Iteration_iter-num>
+      <Iteration_stat>
+        <Statistics>
+          <Statistics_db-num>6298</Statistics_db-num>
+          <Statistics_db-len>2974038</Statistics_db-len>
+          <Statistics_hsp-len>0</Statistics_hsp-len>
+          <Statistics_eff-space>2.00971e+08</Statistics_eff-space>
+          <Statistics_kappa>0.041</Statistics_kappa>
+          <Statistics_lambda>0.267</Statistics_lambda>
+          <Statistics_entropy>0.14</Statistics_entropy>
+        </Statistics>
+      </Iteration_stat>
+      <Iteration_message>No hits found</Iteration_message>
+    </Iteration>
+  </BlastOutput_iterations>
+</BlastOutput>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,74 @@
+ID           M20132:(362)[c.+4G|A|T;c.+31C|A]; [E2|K|X;Q11|K]
+Feature      DNA; 1.1
+Feature        /label: point, transition
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g|a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      DNA; 1.2
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 4
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g|t
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature      RNA; 1.1
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g|a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa|aaa; 1
+Feature        /region: coding
+Feature      RNA; 1.2
+Feature        /label: nonsense
+Feature        /proof: experimental
+Feature        /location: 4 (M20132::366)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: g|t
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -BccI
+Feature        /codon_table: 1
+Feature        /codon: gaa|taa; 1
+Feature        /region: coding
+Feature      AA; 1.1
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E|K
+Feature      AA; 1.2
+Feature        /label: truncation
+Feature        /proof: computed
+Feature        /location: 2
+Feature        /change: E|*
+Feature      DNA; 2
+Feature        /label: point, transversion
+Feature        /proof: computed
+Feature        /location: 31
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: c|a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -CviRI, -SfaNI
+Feature      RNA; 2
+Feature        /label: missense
+Feature        /proof: experimental
+Feature        /location: 31 (M20132::393)
+Feature        /upflank: gaagattcagccaagctcaaggatg
+Feature        /change: c|a
+Feature        /dnflank: aagtgcagttagggctgggaagggt
+Feature        /re_site: -CviRI, -SfaNI
+Feature        /codon_table: 1
+Feature        /codon: caa|aaa; 1
+Feature        /region: coding
+Feature      AA; 2
+Feature        /label: substitution, conservative
+Feature        /proof: computed
+Feature        /location: 11
+Feature        /change: Q|K
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.old.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.old.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.old.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,86 @@
+
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="[c.+4G|A|T;c.+31C|A]" trivname="[E2|K|X;Q11|K]">
+    <DNA number="1.1" start="4" end="4" length="1" isMutation="0">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <DNA number="1.2" start="4" end="4" length="1" isMutation="0">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1.1" start="4" end="4" length="1" isMutation="0">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <RNA number="1.2" start="4" end="4" length="1" isMutation="0">
+        <label>nonsense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="taa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1.1" start="2" end="2" length="1" isMutation="0">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+    <AA number="1.2" start="2" end="2" length="1" isMutation="0">
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>*</allele_mut>
+    </AA>
+    <DNA number="2" start="31" end="31" length="1" isMutation="0">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-CviRI, -SfaNI</restriction_changes>
+    </DNA>
+    <RNA number="2" start="31" end="31" length="1" isMutation="0">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="caa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-CviRI, -SfaNI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="2" start="11" end="11" length="1" isMutation="0">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>Q</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/polymorphism.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+<seqDiff id="M20132" moltype="rna" offset="362" sysname="[c.+4G|A|T;c.+31C|A]" trivname="[E2|K|X;Q11|K]">
+    <DNA number="1.1" start="4" end="4" length="1" isMutation="0">
+        <label>point</label>
+        <label>transition</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <DNA number="1.2" start="4" end="4" length="1" isMutation="0">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-BccI</restriction_changes>
+    </DNA>
+    <RNA number="1.1" start="4" end="4" length="1" isMutation="0">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <RNA number="1.2" start="4" end="4" length="1" isMutation="0">
+        <label>nonsense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>g</allele_ori>
+        <allele_mut>t</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="gaa" codon_mut="taa" codon_pos="1"></codon>
+        <restriction_changes>-BccI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="1.1" start="2" end="2" length="1" isMutation="0">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+    <AA number="1.2" start="2" end="2" length="1" isMutation="0">
+        <label>truncation</label>
+        <proof>computed</proof>
+        <allele_ori>E</allele_ori>
+        <allele_mut>*</allele_mut>
+    </AA>
+    <DNA number="2" start="31" end="31" length="1" isMutation="0">
+        <label>point</label>
+        <label>transversion</label>
+        <proof>computed</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <restriction_changes>-CviRI, -SfaNI</restriction_changes>
+    </DNA>
+    <RNA number="2" start="31" end="31" length="1" isMutation="0">
+        <label>missense</label>
+        <proof>experimental</proof>
+        <upFlank>gaagattcagccaagctcaaggatg</upFlank>
+        <allele_ori>c</allele_ori>
+        <allele_mut>a</allele_mut>
+        <dnFlank>aagtgcagttagggctgggaagggt</dnFlank>
+        <codon codon_ori="caa" codon_mut="aaa" codon_pos="1"></codon>
+        <restriction_changes>-CviRI, -SfaNI</restriction_changes>
+        <region>coding</region>
+    </RNA>
+    <AA number="2" start="11" end="11" length="1" isMutation="0">
+        <label>substitution</label>
+        <label>conservative</label>
+        <proof>computed</proof>
+        <allele_ori>Q</allele_ori>
+        <allele_mut>K</allele_mut>
+    </AA>
+</seqDiff>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+# J.Stajich unpublished results, S.aureus clinical and environmental
+# isolates AFLP data 2 Markers where alleles are assigned based on unique
+# banding pattern
+SAMPLE,AFLP1,AFLP2
+MRSA-1748-7,6,15
+MRSA-1826-24,6,15
+MRSA-1902-18,6,15
+MRSA-1905-9,6,15
+MRSA-1933-10,8,15
+MRSA-1964-26,12,21
+MRSA-1969-1,1,13
+MRSA-1980-6,6,15
+MRSA-1983-12,6,15
+MSSA-1741-22,10,15
+MSSA-1755-13,9,15
+MSSA-1763-3,3,15
+MSSA-1819-21,6,19
+MSSA-1868-19,2,15
+MSSA-1894-14,10,15
+MSSA-1915-17,6,15
+MSSA-1925-2,2,14
+MSSA-1940-15,6,15
+MSSA-1948-5,5,15
+NC-17A-8,7,16
+NC-2-16,6,18
+NC-21A-25,11,15
+NC-23A-4,4,14
+NC-4-20,6,15

Added: trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.multidat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.multidat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/popgen_saureus.multidat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,27 @@
+# J.Stajich unpublished results, S.aureus clinical and environmental
+# isolates AFLP data 2 Markers where alleles are assigned based on unique
+# banding pattern
+SAMPLE,B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16,B17,B18,B19,B20
+NC23A,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0
+MSSA1940,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MRSA1983,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+NC21A,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0
+NC4,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MSSA1915,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+NC2,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,0,1,0,1
+NC17A,0,0,0,1,1,0,1,1,0,1,0,1,0,0,1,0,0,1,1,0
+MRSA1748,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MRSA1905,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MSSA1925,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0
+MSSA1763,0,0,0,1,0,0,1,1,0,1,1,1,0,0,1,0,1,1,1,0
+MRSA1980,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MSSA1948,0,0,0,1,0,1,0,1,1,1,0,1,0,0,1,0,1,1,1,0
+MSSA1741,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0
+MSSA1755,0,1,1,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MSSA1819,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,0,1
+MSSA1868,0,0,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,1,0
+MSSA1894,0,0,0,1,0,0,1,1,1,1,0,0,0,0,1,0,1,1,1,0
+MRSA1826,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MRSA1902,0,0,0,1,0,0,1,1,1,1,0,1,0,0,1,0,1,1,1,0
+MRSA1933,0,0,0,1,0,0,1,1,1,1,1,1,0,0,1,0,1,1,1,0
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/popstats.prettybase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/popstats.prettybase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/popstats.prettybase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+01	A01	A
+01	A02	A
+01	A03	A
+01	A04	A
+01	A05	A
+02	A01	A
+02	A02	T
+02	A03	T
+02	A04	T
+02	A05	T
+04	A01	G
+04	A02	G
+04	A03	C
+04	A04	C
+04	A05	G
+05	A01	T
+05	A02	C
+05	A03	T
+05	A04	T
+05	A05	T
+11	A01	G
+11	A02	G
+11	A03	G
+11	A04	A
+11	A05	A
+
+01	out	G
+02	out	A
+04	out	G
+05	out	T
+11	out	G
+
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/pre_rel9.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/pre_rel9.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/pre_rel9.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,196 @@
+ID   GCDH_CAEEL     STANDARD;      PRT;   409 AA.
+AC   Q20772;
+DT   01-NOV-1997, integrated into UniProtKB/Swiss-Prot.
+DT   01-NOV-1996, sequence version 1.
+DT   30-MAY-2006, entry version 44.
+DE   Probable glutaryl-CoA dehydrogenase, mitochondrial precursor
+DE   (EC 1.3.99.7) (GCD).
+GN   ORFNames=F54D5.7;
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
+RC   STRAIN=Bristol N2;
+RX   MEDLINE=99069613; PubMed=9851916; DOI=10.1126/science.282.5396.2012;
+RG   The C. elegans sequencing consortium;
+RT   "Genome sequence of the nematode C. elegans: a platform for
+RT   investigating biology.";
+RL   Science 282:2012-2018(1998).
+CC   -!- CATALYTIC ACTIVITY: Glutaryl-CoA + acceptor = crotonoyl-CoA +
+CC       CO(2) + reduced acceptor.
+CC   -!- COFACTOR: FAD (By similarity).
+CC   -!- PATHWAY: Degradative pathway of L-lysine, L-hydroxylysine, and L-
+CC       tryptophan metabolism.
+CC   -!- INTERACTION:
+CC       P39745:mpk-1; NbExp=1; IntAct=EBI-313068, EBI-321013;
+CC       Q17446:pmk-1; NbExp=1; IntAct=EBI-313068, EBI-312987;
+CC   -!- SUBCELLULAR LOCATION: Mitochondrion; mitochondrial matrix
+CC       (Potential).
+CC   -!- SIMILARITY: Belongs to the acyl-CoA dehydrogenase family.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; Z66513; CAA91333.1; -; Genomic_DNA.
+DR   PIR; T22647; T22647.
+DR   UniGene; Cel.30446; -.
+DR   HSSP; Q06319; 1BUC.
+DR   IntAct; Q20772; -.
+DR   Ensembl; F54D5.7; Caenorhabditis elegans.
+DR   WormBase; WBGene00010052; F54D5.7.
+DR   WormPep; F54D5.7; CE03411.
+DR   GO; GO:0005515; F:protein binding; IPI.
+DR   InterPro; IPR006089; Acyl_CoA_DH.
+DR   InterPro; IPR006091; Acyl_CoA_DH/ox_M.
+DR   InterPro; IPR006090; Acyl_CoA_DH_1.
+DR   InterPro; IPR006092; Acyl_CoA_DH_N.
+DR   InterPro; IPR009075; AcylCo_DH/ox_C.
+DR   InterPro; IPR009100; AcylCoA_DH/ox_NM.
+DR   InterPro; IPR013764; AcylCoA_DH_1/2_C.
+DR   Pfam; PF00441; Acyl-CoA_dh_1; 1.
+DR   Pfam; PF02770; Acyl-CoA_dh_M; 1.
+DR   Pfam; PF02771; Acyl-CoA_dh_N; 1.
+DR   PROSITE; PS00072; ACYL_COA_DH_1; FALSE_NEG.
+DR   PROSITE; PS00073; ACYL_COA_DH_2; 1.
+KW   Complete proteome; FAD; Flavoprotein; Hypothetical protein;
+KW   Mitochondrion; Oxidoreductase; Transit peptide.
+FT   TRANSIT       1      ?       Mitochondrion (Potential).
+FT   CHAIN         ?    409       Probable glutaryl-CoA dehydrogenase.
+FT                                /FTId=PRO_0000000530.
+FT   ACT_SITE    388    388       Proton acceptor (Potential).
+SQ   SEQUENCE   409 AA;  44964 MW;  4D06241FB6768069 CRC64;
+     MLTRGFTSIG KIASRGLSST FYQDAFQLSD QLTEDERSLM LSAREYCQER LLPRVTEAYR
+     TEKFDPSLIP EMGSMGLLGA PYQGYGCAGT STVGYGLIAR EVERVDSGYR STMSVQTSLV
+     IGPIYNYGSE DQKQKYIPDL ASGKKIGCFG LTEPNHGSNP GGMETKATWD ETTKTYKLNG
+     SKTWISNSPV SDVMVVWARS ARHNNKIKGF ILERGMKGLT TPKIEGKLSL RASITGQIAM
+     DDVPVPEENL LPNAEGLQGP FGCLNNARLG IAWGALGAAE ECFHLARQYT LDRQQFGRPL
+     AQNQLMQLKM ADMLTEISLG LQGCLRVSRL KDEGKVQSEQ ISIIKRNSCG KALEVARKAR
+     DMLGGNGIVD EYHIMRHMVN LETVNTYEGT HDVHALILGR AITGLNGFC
+//
+ID   Q41V66_FERAC   PRELIMINARY;   PRT;   607 AA.
+AC   Q41V66;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   30-MAY-2006, entry version 5.
+DE   Glycoside hydrolase, family 15.
+GN   ORFNames=FaciDRAFT_1685;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000001; EAM94575.1; -; Genomic_DNA.
+DR   GO; GO:0004339; F:glucan 1,4-alpha-glucosidase activity; IEA.
+DR   GO; GO:0016787; F:hydrolase activity; IEA.
+DR   GO; GO:0005976; P:polysaccharide metabolism; IEA.
+DR   InterPro; IPR008928; 6hp_glycosidase.
+DR   InterPro; IPR011613; Glyco_hydro_15_rel.
+DR   InterPro; IPR012343; Glyco_trans_sub.
+DR   Pfam; PF00723; Glyco_hydro_15; 1.
+KW   Hydrolase.
+SQ   SEQUENCE   607 AA;  69495 MW;  8AC6297BA16ED500 CRC64;
+     MGTYRGLYDL HDAYRSDYLK IANHGFIANN RTAALVGIDG TIDWACLPNF NSNPVFDSIL
+     DARNGGYFKT SPVMESNVNQ YYEESTNILI TEFVNNNQVI LRLTDFLPTS SYSTITFPEI
+     HRLIEAPYSD VEVSIDIKSH FNFGSGKTNI TRDRNGYIFS CTDDTLGIST NLKLKKGNGN
+     VYSRIKVEKG SHEWIVVLSG VRQIGNVRQY ESYTRLEETR NYWSAWAGKI NYSGLYYDHV
+     IRSALTLRGL FYDPTGMMVA APTTSLPEII GGERNWDYRY TWIRDTAYVV EALSLIGLND
+     VATKFLYDIM SIVQKDKKVK TIYPVNGDSK LEEKKVNLSG YMDSIPVRIG NEASEQLQID
+     QYGSIVNAVF RFHEAGGLVT TYLWDFLIEI LDTLKDIWKL PDSSIWEFRS EPKHYLYSKL
+     ISWSAFNRAI KMGRELGYSA PYRTWHKIRE EIKNEIMEKG YNPDVKAFTQ YYGSDQMDAS
+     VLRMPLTGII SAKDPRFVST LARVEAELKN PCGMFIRYHS DDGLKGHDNA FLLLSFWYVE
+     DLILSGRIME AKETFENILD HSNHLMLFSE EINFNDCREM LGNFPQAITH LGVIRAAIKL
+     DEALRGK
+//
+ID   Q41US7_FERAC   PRELIMINARY;   PRT;   270 AA.
+AC   Q41US7;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   30-MAY-2006, entry version 4.
+DE   Potassium channel protein.
+GN   ORFNames=FaciDRAFT_1443;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000002; EAM94333.1; -; Genomic_DNA.
+DR   GO; GO:0016020; C:membrane; IEA.
+DR   GO; GO:0005216; F:ion channel activity; IEA.
+DR   GO; GO:0005267; F:potassium channel activity; IEA.
+DR   GO; GO:0006813; P:potassium ion transport; IEA.
+DR   InterPro; IPR013099; Ion_trans_2_bac.
+DR   InterPro; IPR001622; K+channel_pore.
+DR   InterPro; IPR003148; TrkA_N.
+DR   Pfam; PF07885; Ion_trans_2; 1.
+DR   Pfam; PF02254; TrkA_N; 1.
+KW   Ionic channel.
+SQ   SEQUENCE   270 AA;  30497 MW;  528C4EA75C41DF75 CRC64;
+     MQTITTVGYG DTPVYGLAGR ANGMLIMVIG IGSLGYLMAG LTSMLIDIRL SSKLGERMAA
+     EKKHIVLCNY NESTKKVLDK IKYDGIDIVI LNENEVKGDN EYTYIKGSFL RENDLIRAGI
+     KKASSVIIFS RSEDKEQMAM DAESILSAMI IRKLNPEIRI IGEILNPDSR EHASSFMDDI
+     IIKGDVSSML IYSSIMIPGI PEFINDLLMS NSISEEDIDK KYASNTYREF ISNMEKENRI
+     VLAFRKQDKI YLRENSDKKI DVDSYIFIKN
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/primedseq.fa
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/primedseq.fa	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/primedseq.fa	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,4 @@
+>Test1
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
+TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC

Added: trunk/packages/bioperl/branches/upstream/current/t/data/primer3_infile.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/primer3_infile.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/primer3_infile.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+PRIMER_SEQUENCE_ID=sN11902
+PRIMER_COMMENT=3831
+SEQUENCE=ACAGCAACATCGAACTCGTCGTCTTCGAAAGCCTCTTTGGCCTTCGCTGCAAGCTTCTTAGCCATGTCGATTGTGGTTTTTGGTTAGGTTGAGAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGATGATGATTCGATTCAAGAAAGGGAATGAAGTAGTAGACGAGGGCCAAAGAGTATAAAGATGGATGCTATCTTTTTCTCTTCTATTGTTCTGTTATTGTTTTGTGAGGGAAGTTTTGGCGCTTCAATACAAAAAAATAAATAAAGAATAAACAAATACAGCTTAGTTAAATGGGTATAACATGGGCCGGAATATATAATCACGAATTAAGGCCCATCATTGTCTCAAAATTAAACTCGGCCTCTATAATACTTCTCCCACACCGGTAATCTTGAGATGAACACTCCTCTTTCGTGTAGTATAAAATAAGACGAAGGTCCAACGTTGAAAGATACTTACCTGGACGGGGTCTAGTCCACGCGTGGACTAAACTTGAGGACCAACGAAGAAGAAGAAGAAGAAGAAGAAGAAGAGAGTCGTAAAGTGGAAGCAGCGATGAAGAGTAACGATAGTAGTAAGAACAGAGTGAGTTGGTTTATAGATGAAGATCCGTCTCCACGTCCTCCGAAATGCACAGTTCTTTGGAAAGAGCTTTTGCGTTTGAAGAAACAGAGGAGCTCTTCGTCGGTGAAGTCCTCTCTGTCTCCGTCGTCGTCCACGTCATCATCTAGCTCGCTTGAGGATAAGAGAGAAGGGAAGAGAGGTAAGAAAGGATTAGAGAGGACGAGATCGTC
+TARGET=513,26
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+=
+PRIMER_SEQUENCE_ID=sN11926
+PRIMER_COMMENT=3854
+SEQUENCE=ACGCTGCTCGGAGAAGCGGCGCCGATATCAATCCGTCAGAAAATGTTTGTTTTCTTCTTCTTCTTCTTCTTCTTCTTCTCGATCTTTGTGTGTGTGTTTCTTTTAGATCTGACTTTGTAGTTTTAGGTTTGATTGGGCGATTGAATCGAGATGAGTTTTGTTCATTTAAGTTTCAGTTGCCGACACTGACGTAGAGATTTGTGTTAGGGTCCTCGTTTGTTGTGAATTTCAATTGACAAATCTGTTTTGGACATCTAACAAAGGGCTTTGTTTTTGTTTTTGTTGCATATGTAGTCAATGCTGGAACCAACAAAGCGGCTTCGAGCAGCACCTCCTTGAACACAAAGAGGCTCGATGATGATACTGAGACCTTAGCTCGTGAGTATATATAATTCATCTATTCTCTCATTCACAGCGAATTAGTATCGACTTCTATATCTCTCTGTGGTTCAAATAAACTTTAGTTGTATCTTCAAAGCTTTATATACTGTTTTTCTTTCCCGTGTTTAAGAGATTGATTAGTATATTGCTTGTTCGAAGAGTCTTTGTT
+TARGET=54,23
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+=
+PRIMER_SEQUENCE_ID=sN1922
+PRIMER_COMMENT=3922
+SEQUENCE=ACAGCAACATCGAACTCGTCGTCTTCGAAAGCCTCTTTGGCCTTCGCTGCAAGCTTCTTAGCCATGTCGATTGTGGTTTTTGGTTAGGTTGAGAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGATGATGATTCGATTCAAGAAAGGGAATGAAGTAGTAGACGAGGGCCAAAGAGTATAAAGATGGATGCGATCTTTTTCTCTTCTATTGTTCTGTTATTGTTTTGTGAGGGAAGTTTTGGCGCTTCAATACAAAAAAATAAATAAAGAATAAACAAATACAGCTTAGTTAAATGGGTATAACATGGGCCGGAATATATAATCACGGATTAAGGCCCATCATTGTCTCAAAATTAAACTCGGCCTCTATAATACTTCTCCCACACCGGTAATCTTGAGATGAACACTCCTCTTTCGTGTAGTATAAAATAAGACGAAGGTCCAACGTTGAAAGATACTTACCTGGACGGGGTC
+TARGET=96,32
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+=
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/primer3_infile.txt
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/primer3_outfile.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/primer3_outfile.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/primer3_outfile.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,103 @@
+PRIMER_SEQUENCE_ID=sN11902
+PRIMER_COMMENT=3831
+SEQUENCE=ACAGCAACATCGAACTCGTCGTCTTCGAAAGCCTCTTTGGCCTTCGCTGCAAGCTTCTTAGCCATGTCGATTGTGGTTTTTGGTTAGGTTGAGAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGATGATGATTCGATTCAAGAAAGGGAATGAAGTAGTAGACGAGGGCCAAAGAGTATAAAGATGGATGCTATCTTTTTCTCTTCTATTGTTCTGTTATTGTTTTGTGAGGGAAGTTTTGGCGCTTCAATACAAAAAAATAAATAAAGAATAAACAAATACAGCTTAGTTAAATGGGTATAACATGGGCCGGAATATATAATCACGAATTAAGGCCCATCATTGTCTCAAAATTAAACTCGGCCTCTATAATACTTCTCCCACACCGGTAATCTTGAGATGAACACTCCTCTTTCGTGTAGTATAAAATAAGACGAAGGTCCAACGTTGAAAGATACTTACCTGGACGGGGTCTAGTCCACGCGTGGACTAAACTTGAGGACCAACGAAGAAGAAGAAGAAGAAGAAGAAGAAGAGAGTCGTAAAGTGGAAGCAGCGATGAAGAGTAACGATAGTAGTAAGAACAGAGTGAGTTGGTTTATAGATGAAGATCCGTCTCCACGTCCTCCGAAATGCACAGTTCTTTGGAAAGAGCTTTTGCGTTTGAAGAAACAGAGGAGCTCTTCGTCGGTGAAGTCCTCTCTGTCTCCGTCGTCGTCCACGTCATCATCTAGCTCGCTTGAGGATAAGAGAGAAGGGAAGAGAGGTAAGAAAGGATTAGAGAGGACGAGATCGTC
+TARGET=513,26
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+PRIMER_LEFT_EXPLAIN=considered 4849, GC content failed 207, low tm 2062, high tm 1173, long poly-x seq 11, ok 1396
+PRIMER_RIGHT_EXPLAIN=considered 2415, low tm 892, high tm 844, high end compl 3, ok 676
+PRIMER_PAIR_EXPLAIN=considered 2, high end compl 1, ok 1
+PRIMER_PAIR_PENALTY=0.0991
+PRIMER_LEFT_PENALTY=0.095506
+PRIMER_RIGHT_PENALTY=0.003589
+PRIMER_LEFT_SEQUENCE=CACGCGTGGACTAAACTTGA
+PRIMER_RIGHT_SEQUENCE=AACGCAAAAGCTCTTTCCAA
+PRIMER_LEFT=484,20
+PRIMER_RIGHT=668,20
+PRIMER_LEFT_TM=59.904
+PRIMER_RIGHT_TM=59.996
+PRIMER_LEFT_GC_PERCENT=50.000
+PRIMER_RIGHT_GC_PERCENT=40.000
+PRIMER_LEFT_SELF_ANY=8.00
+PRIMER_RIGHT_SELF_ANY=6.00
+PRIMER_LEFT_SELF_END=3.00
+PRIMER_RIGHT_SELF_END=2.00
+PRIMER_LEFT_END_STABILITY=7.0000
+PRIMER_RIGHT_END_STABILITY=8.5000
+PRIMER_PAIR_COMPL_ANY=4.00
+PRIMER_PAIR_COMPL_END=2.00
+PRIMER_PRODUCT_SIZE=185
+=
+PRIMER_SEQUENCE_ID=sN11926
+PRIMER_COMMENT=3854
+SEQUENCE=ACGCTGCTCGGAGAAGCGGCGCCGATATCAATCCGTCAGAAAATGTTTGTTTTCTTCTTCTTCTTCTTCTTCTTCTTCTCGATCTTTGTGTGTGTGTTTCTTTTAGATCTGACTTTGTAGTTTTAGGTTTGATTGGGCGATTGAATCGAGATGAGTTTTGTTCATTTAAGTTTCAGTTGCCGACACTGACGTAGAGATTTGTGTTAGGGTCCTCGTTTGTTGTGAATTTCAATTGACAAATCTGTTTTGGACATCTAACAAAGGGCTTTGTTTTTGTTTTTGTTGCATATGTAGTCAATGCTGGAACCAACAAAGCGGCTTCGAGCAGCACCTCCTTGAACACAAAGAGGCTCGATGATGATACTGAGACCTTAGCTCGTGAGTATATATAATTCATCTATTCTCTCATTCACAGCGAATTAGTATCGACTTCTATATCTCTCTGTGGTTCAAATAAACTTTAGTTGTATCTTCAAAGCTTTATATACTGTTTTTCTTTCCCGTGTTTAAGAGATTGATTAGTATATTGCTTGTTCGAAGAGTCTTTGTT
+TARGET=54,23
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+PRIMER_LEFT_EXPLAIN=considered 315, low tm 52, high tm 212, ok 51
+PRIMER_RIGHT_EXPLAIN=considered 4504, GC content failed 63, low tm 2423, high tm 832, ok 1186
+PRIMER_PAIR_EXPLAIN=considered 1, ok 1
+PRIMER_PAIR_PENALTY=1.0219
+PRIMER_LEFT_PENALTY=0.973324
+PRIMER_RIGHT_PENALTY=0.048588
+PRIMER_LEFT_SEQUENCE=GCCGATATCAATCCGTCAGA
+PRIMER_RIGHT_SEQUENCE=CTCTACGTCAGTGTCGGCAA
+PRIMER_LEFT=21,20
+PRIMER_RIGHT=196,20
+PRIMER_LEFT_TM=60.973
+PRIMER_RIGHT_TM=60.049
+PRIMER_LEFT_GC_PERCENT=50.000
+PRIMER_RIGHT_GC_PERCENT=55.000
+PRIMER_LEFT_SELF_ANY=6.00
+PRIMER_RIGHT_SELF_ANY=5.00
+PRIMER_LEFT_SELF_END=3.00
+PRIMER_RIGHT_SELF_END=2.00
+PRIMER_LEFT_END_STABILITY=6.7000
+PRIMER_RIGHT_END_STABILITY=10.0000
+PRIMER_PAIR_COMPL_ANY=5.00
+PRIMER_PAIR_COMPL_END=3.00
+PRIMER_PRODUCT_SIZE=176
+=
+PRIMER_SEQUENCE_ID=sN1922
+PRIMER_COMMENT=3922
+SEQUENCE=ACAGCAACATCGAACTCGTCGTCTTCGAAAGCCTCTTTGGCCTTCGCTGCAAGCTTCTTAGCCATGTCGATTGTGGTTTTTGGTTAGGTTGAGAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGATGATGATTCGATTCAAGAAAGGGAATGAAGTAGTAGACGAGGGCCAAAGAGTATAAAGATGGATGCGATCTTTTTCTCTTCTATTGTTCTGTTATTGTTTTGTGAGGGAAGTTTTGGCGCTTCAATACAAAAAAATAAATAAAGAATAAACAAATACAGCTTAGTTAAATGGGTATAACATGGGCCGGAATATATAATCACGGATTAAGGCCCATCATTGTCTCAAAATTAAACTCGGCCTCTATAATACTTCTCCCACACCGGTAATCTTGAGATGAACACTCCTCTTTCGTGTAGTATAAAATAAGACGAAGGTCCAACGTTGAAAGATACTTACCTGGACGGGGTC
+TARGET=96,32
+PRIMER_PRODUCT_SIZE_RANGE=100-500
+PRIMER_FILE_FLAG=0
+PRIMER_LIBERAL_BASE=1
+PRIMER_NUM_RETURN=1
+PRIMER_FIRST_BASE_INDEX=1
+PRIMER_EXPLAIN_FLAG=1
+PRIMER_LEFT_EXPLAIN=considered 735, low tm 100, high tm 441, ok 194
+PRIMER_RIGHT_EXPLAIN=considered 3232, GC content failed 191, low tm 1479, high tm 573, long poly-x seq 12, ok 977
+PRIMER_PAIR_EXPLAIN=considered 3, high end compl 1, ok 2
+PRIMER_PAIR_PENALTY=0.2733
+PRIMER_LEFT_PENALTY=0.124828
+PRIMER_RIGHT_PENALTY=0.148509
+PRIMER_LEFT_SEQUENCE=CGTCTTCGAAAGCCTCTTTG
+PRIMER_RIGHT_SEQUENCE=CAACGTTGGACCTTCGTCTT
+PRIMER_LEFT=20,20
+PRIMER_RIGHT=454,20
+PRIMER_LEFT_TM=60.125
+PRIMER_RIGHT_TM=60.149
+PRIMER_LEFT_GC_PERCENT=50.000
+PRIMER_RIGHT_GC_PERCENT=50.000
+PRIMER_LEFT_SELF_ANY=6.00
+PRIMER_RIGHT_SELF_ANY=8.00
+PRIMER_LEFT_SELF_END=3.00
+PRIMER_RIGHT_SELF_END=1.00
+PRIMER_LEFT_END_STABILITY=7.3000
+PRIMER_RIGHT_END_STABILITY=6.4000
+PRIMER_PAIR_COMPL_ANY=4.00
+PRIMER_PAIR_COMPL_END=0.00
+PRIMER_PRODUCT_SIZE=435
+=
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/primer3_outfile.txt
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/primer3_output.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/primer3_output.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/primer3_output.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,103 @@
+PRIMER_SEQUENCE_ID=XXU25548
+SEQUENCE=CTGACTCTTATACACAAGTAGCGTCCTGGACGGAACCTTTCCCGTTTTGCCCTGTTCTGGAAAACCGGGCTGCTCAGGGCGATATTACTGCACCCGGCGGTGCTCGCCGTTTAACGGGTGATCAGACTGCCGCTCTGCGTGATTCTCTTAGCGATAAACCTGCAAAAAATATTATTTTGCTGATTGGCGATGGGATGGGGGACTCGGAAATTACTGCCGCACGTAATTATGCCGAAGGTGCGGGCGGCTTTTTTAAAGGTATAGATGCCTTACCGCTTACCGGGCAATACACTCACTATGCGCTGAATAAAAAAACCGGCAAACCGGACTACGTCACCGACTCGGCTGCATCAGCAACCGCCTGGTCAACCGGTGTCAAAACCTATAACGGCGCGCTGGGCGTCGATATTCACGAAAAAGATCACCCAACGATTCTGGAAATGGCAAAAGCCGCAGGTCTGGCGACCGGTAACGTTTCTACCGCAGAGTTGCAGGATGCCACGCCCGCTGCGCTGGTGGCACATGTGACCTCGCGCAAATGCTACGGTCCGAGCGCGACCAGTGAAAAATGTCCGGGTAACGCTCTGGAAAAAGGCGGAAAAGGATCGATTACCGAACAGCTGCTTAACGCTCGTGCCGACGTTACGCTTGGCGGCGGCGCAAAAACCTTTGCTGAAACGGCAACCGCTGGTGAATGGCAGGGAAAAACGCTGCGTGAACAGGCACAGGCGCGTGGTTATCAGTTGGTGAGCGATGCTGCCTCACTGAATTCGGTGACGGAAGCGAATCAGCAAAAACCCCTGCTTGGCCTGTTTGCTGACGGCAATATGCCAGTGCGCTGGCTAGGACCGAAAGCAACGTACCATGGCAATATCGATAAGCCCGCAGTCACCTGTACGCCAAATCCGCAACGTAATGACAGTGTACCAACCCTGGCGCAGATGACCGACAAAGCCATTGAATTGTTGAGTAAAAATGAGAAAGGCTTTTTCCTGCAAGTTGAAGGTGCGTCAATCGATAAACAGGATCATGCTGCGAATCCTTGTGGGCAAATTGGCGAGACGGTCGATCTCGATGAAGCCGTACAACGGGCGCTGGAATTCGCTAAAAAGGAGGGTAACACGCTGGTCATAGTCACCGCTGATCACGCCCACGCCAGCCAGATTGTTGCGCCGGATACCAAAGCTCCGGGCCTCACCCAGGCGCTAAATACCAAAGATGGCGCAGTGATGGTGATGAGTTACGGGAACTCCGAAGAGGATTCACAAGAACATACCGGCAGTCAGTTGCGTATTGCGGCGTATGGCCCGCATGCCGCCAATGTTGTTGGACTGACCGACCAGACCGATCTCTTCTACACCATGAAAGCCGCTCTGGGGCTGAAATAAAACCGCGCCCGGCAGTGAATTTTCGCTGCCGGGTGGTTTTTTTGCTGTTAGCAACCAGACTTAATGGCAGATCACGGGCGCATACGCTCATGGTTAAAACATGAAGAGGGATGGTGCTATGAAAATAACATTACTGGTTACCTTGCTTTTCGGTCTGGTTTTTTTAACCACCGTCGGCGCTGCCGAGAGAACTTTAACCCCACAACAACAGCGTATGACCTCCTGTAATCAGCAGGCGACGGCGCAGGCGTTGAAAGGGGATGCTCGTAAGACCTACATGAGTGATTGCCTGAAGAACAGCAAGTCTGCGCCTGGCGAAAAAAGTTTGACGCCACAGCAGCAAAAGATGCGCGAATGCAATAATCAAGCAACACAACAATCTCTGAAAGGTGATGATCGTAATAAGTTTATGAGTGCCTGCCTCAAGAAAGCCGCCTGATACCTGATAGTGCTAACGGGTGAGCTACGAAAATGGCTCACCCGAAATATCATACTTCTGCCTTTAGCTCCGTCTCTATAATTTGGGAAAATTGTTTCTGAATGTTCCCAAAAATAATGAATGATGAAAACTTTTTCAAAAAAGCGGCGGCGCACGGGGAGGAACCTCCTTTAACTCCTCAAAACGAACATCAGCGGTCCGGGCTGCGCTTCGCCCGTCGCGTCAGACTACCCCGTGCGGTTGGCCTGGCTGGCATGTTCTTACCGATTGCTTCAACGCTGGTTTCACACCCGCCGCCGGGCTGGTGGTGGCTGGTGTTGGTCGGCTGGGCGTTCGTCTGGCCGCATTTAGCCTGGCAGATAGCGAGCAGGGCCGTCGATCCGCTTAGCCGGGAAATTTACAACTTAAAAACCGATGCAGTATTAGCGGGAATGTGGGTAGGCGTAATGGGCGTAAACGTGCTGCCTTCCACCGCGATGTTGATGATTATGTGTCTGAATTTGATGGGGGCAGGCGGCCCCCGTCTGTTTGTCGCGGGTCTGGTGTTGATGGTGGTTTCCTGCCTTGTCACCCTCGAGGCCACCACATTCCGCACCGTAGGATTACTGCATCAGGAGTGGTGGATGCGCCCGGATGACCCTGCCGATGCGGATGAAAAGGAGAGTGGCAAATGGCTGGCAGCGGCCGCAACTAGCCGGTTACGCATGGGCAGCATGATGAGCAACGTGATTGCGGTCTGTGACCGCGAAGCCGATATTCATGCTTATCTGCAGGACAGGCTGGCGCATAACGAGCGCTTCGTGGTGCGCTCCAAGCACCCACGCAAGGACGTAGAGTCTGGGTTGTATCTGATCGACCATCTGAAGAACCAACCGGAGTTGGGTGGCTATCAGATCAGCATTCCGCAAAAGGGCGTGGTGGATAAACGCGGTAAACGTAAAAATCGACCAGCCCGCAAGGCGAGCTTGAGCCTGCGCAGTGGGCGCATCACGCTAAAACAGGGGAATATCACGCTCAACGCGGTGCTGGCCGAGGAGATTAACCCGCCCAAGGGTGAGACCCCGTTGAAATGGTTGTTGCTGACCGGCGAACCGGTCGAGTCGCTAGCCCAAGCCTTGCGCGTCATCGACATTTATACCCATCGCTGGCGGATCGAGGAGTTCCATAAGGCATGGAAAACCGGAGCAGGAGCCGAGAGGCAACGCATGGAGGAGCCGGATAATCTGGAGCGGATGGTCTCGATCCTCTCGTTTGTTGCGGTCAGGCTGTTACAGCTCAGAGAAAGCTTCACGCTGCCGCAAGCACTCAGGGCGCAAGGGCTGCTAAAGGAAGCGGAACACGTAGAAAGCCAGTCCGCAGAAACGGTGCTGACCCCGGATGAATGTCAGCTACTGGGCTATCTGGACAAGGGAAAACGCAAGCGCAAAGAGAAAGCAGGTAGCTTGCAGTGGGCTTACATGGCGATAGCTAGACTGGGCGGTTTTATGGACAGCAAGCGAACCGGAATTGCCAGCTGGGGCGCCCTCTGGTAAGGTTGGGAAGCCCTGCAAAGTAAACTGGATGGCTTTCTTGCCGCCAAGGATCTGATGGCGCAGGGGATCAAGATCTGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAGGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGCGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCCGGGCTCGATCCCCTCGCGAGTTGGTTCAGCTGCTGCCTGAGGCTGGACGACCTCGCGGAGTTCTACCGGCAGTGCAAATCCGTCGGCATCCAGGAAACCAGCAGCGGCTATCCGCGCATCCATGCCCCCGAACTGCAGGAGTGGGGAGGCACGATGGCCGCTTTGGTCGACCCGGACGGGACGCTCCTGCGCCTGATACAGAACGAATTGCTTGCAGGCATCTCATGAGTGTGTCTTCCCGTTTTCCGCCTGAGGTCACTGCGTGGATGGAGCGCTGGCGCCTGCTGCGCGACGGCGAGCTGCTCACCACCCACTCGAGCTGGATACTTCCCGTCCGCCAGGGGGACATGCCGGCGATGCTGAAGGTCGCGCGCATTCCCGATGAAGAGGCCGGTTACCGCCTGTTGACCTGGTGGGACGGGCAGGGCGCCGCCCGAGTCTTCGCCTCGGCGGCGGGCGCTCTGCTCATGGAGCGCGCGTCCGGGGCCGGGGACCTTGCACAGATAGCGTGGTCCGGCCAGGACGACGAGGCTTGCAGGATCCTCTGCGACACCGCCGCTCGTCTGCACGCGCCGCGGTCCGGACCGCCGCCCGATCTCCATCCGCTACAGGAATGGTTCCAGCCGCTTTTCCGGTTGGCCGCTGAGCACGCGGCACTTGCGCCCGCCGCCAGCGTAGCGCGCCAACTTCTGGCGGCGCCGCGCGAGGTGTGCCCGCTCCACGGCGACCTGCACCACGAGAACGTGCTCGACTTCGGCGACCGCGGCTGGCTGGCCATCGACCCGCACGGACTGCTCGGCGAGCGCACCTTCGACTATGCCAACATCTTCACGAATCCCGATCTCAGCGACCCCGGTCGCCCGCTTGCGATCCTGCCGGGCAGGCTGGAGGCTCGACTCAGCATTGTGGTCGCGACGACCGGGTTTGAGCCCGAACGGCTTCTTCGCTGGATCATTGCATGGACGGGCTTGTCGGCAGCCTGGTTCATCGGCGACGGCGACGGCGAGGGCGAGGGCGCTGCGATTGATCTGGCCGTAAACGCCATGGCACGCCGGTTGCTTGACTAGCGCGGTCACCGATCTCACCTGGTCGTCGAGCTAGGTCAGGCCGTGTCGGGCGTGATCCGCTGGAAGTCGTTGCGGGCCACACCCGCCGCCTCGAAGCCCTGCACCAGGCCGGCATCGTGGTGTGCGTGGCCGAGGGACTATGGAAGGTGCCGGACGATCTGCCCGAGCAGGGCCGCCGCTATGACGCCCAGCGTCTTGGTGGCGTGACGGTGGAGCTGAAATCGCACCTGCCCATCGAGCGGCAGGCCCGCGTGATCGGTGCCACCTGGCTTGACCAGCAGTTGATCGACGGTGGCTCGGGCTTGGGCGACCTGGGCTTTAGCAGTGAGGCCAAGTAGGCGATACAGCAGCGCGCGGACTTCCTGGCCGAACAGGGACTGGCCGAGCGGCGCGGGCAGCGCGTGATCCTCACCGGAATCTGCTCGGCAGCAGCGGGCTCGGGAACTGGCGCAGGCCGCGAAGGACATTGCCGCCGATACCGGCCTGGAGCATCGCCCCGTGGCCGACGGCCAGCGCGTTGCCGGCGTCTACCGGCGCCCCGTCATGCTCGCCAGCGGGCGAAATGGGATGCTTGATGACGCCAAGGGGTCCAGCCTCGTGCGGTGGAAGCCCATCGAACAGCGGCTTGGGGAGCAGCTCGCCGCGACGGTGCGCGGTGGCGGCGTGTCTTGGGAGATTGGACGACAGCGTGGGCCGGCCCCTGTCTCTTGATCAGATCTTGATCCCCTGCGCCATCAGATCCTTGGCGGCAAGAAAGCCATCCAGTTTACTTTGCAGGGCTTCCCAACCTTCCCAGAGGGCGCCCCAGCTGGCAATTCCGGTTCGCTTGCTGTCCATAAAACCGCCCAGTCTAGCTATCGCCATGTAAGCCCACTGCAAGCTACCTGCTTTCTCTTTGCGCTTGCGTTTTCCCTTGTCCAGATAGCCCAGTAGCTGACATTCATCCGGGGTCAGCACCGTTTCTGCGGACTGGCTTTCTACGTGTTCCGCTTCCTTTAGCAGCCCTTGCGCCCTGAGTGCTTGCGGCAGCGTGAAGCTTTCTCTGAGCTGTAACAGCCTGACCGCAACAAACGAGAGGATCGAGACCATCCGCTCCAGATTATCCGGCTCCTCCATGCGTTGCCTCTCGGCTCCTGCTCCGGTTTTCCATGCCTTATGGAACTCCTCGATCCGCCAGCGATGGGTATAAATGTCGATGACGCGCAAGGCTTGGGCTAGCGACTCGACCGGTTCGCCGGTCAGCAACAACCATTTCAACGGGGTCTCACCCTTGGGCGGGTTAATCTCCTCGGCCAGCACCGCGTTGAGCGTGATATTCCCCTGTTTTAGCGTGATGCGCCCACTGCGCAGGCTCAAGCTCGCCTTGCGGGCTGGTCGATTTTTACGTTTACCGCGTTTATCCACCACGCCCTTTTGCGGAATGCTGATCTGATAGCCACCCAACTCCGGTTGGTTCTTCAGATGGTCGATCAGATACAACCCAGACTCTACGTCCTTGCGTGGGTGCTTGGAGCGCACCACGAAGCGCTCGTTATGCGCCAGCCTGTCCTGCAGATAAGCATGAATATCGGCTTCGCGGTCACAGACCGCAATCACGTTGCTCATCATGCTGCCCATGCGTAACCGGCTAGTTGCGGCCGCTGCCAGCCATTTGCCACTCTCCTTTTCATCCGCATCGGCAGGGTCATCCGGGCGCATCCACCACTCCTGATGCAGTAATCCTACGGTGCGGAATGTGGTGGCCTCGAGCAAGAGAACGGAGTGAACCCACCATCCGCGGGATTTATCCTGAATAGAGCCCAGCTTGCCAAGCTCTTCGGCGACCTGGTGGCGATAACTCAAAGAGGTGGTGTCCTCAATGGCCAGCAGTTCGGGAAACTCCTGAGCCAACTTGACTGTTTGCATGGCGCCAGCCTTTCTGATCGCCTCGGCAGAAACGTTGGGATTGCGGTAAAATCGGTAAGCGCCTTCCTGCATGGCTTCACTACCCTCTGATGAGATGGTTATTGATTTACCAGAATATTTTGCCAATTGGGCGGCGACGTTAACCAAGCGGGCAGTACGGCGAGGATCACCCAGCGCCGCCGAAGAGAACACAGATTTAGCCCAGTCGGCCGCACGATGAAGAGCAGAAGTTATCATGAACGTTACCATGTTAGGAGGTCACATGGAAGTCAGATCCTGGAAAACGGGAAAGGTTCCGTTCAGGACGCTACTTGTGTATAAGAGTCAG
+PRIMER_PAIR_PENALTY=0.0034
+PRIMER_LEFT_PENALTY=0.000037
+PRIMER_RIGHT_PENALTY=0.003322
+PRIMER_LEFT_SEQUENCE=GACCGAAAGCAACGTACCAT
+PRIMER_RIGHT_SEQUENCE=ATGACCAGCGTGTTACCCTC
+PRIMER_LEFT=846,20
+PRIMER_RIGHT=1131,20
+PRIMER_LEFT_TM=60.000
+PRIMER_RIGHT_TM=59.997
+PRIMER_LEFT_GC_PERCENT=50.000
+PRIMER_RIGHT_GC_PERCENT=55.000
+PRIMER_LEFT_SELF_ANY=4.00
+PRIMER_RIGHT_SELF_ANY=3.00
+PRIMER_LEFT_SELF_END=2.00
+PRIMER_RIGHT_SELF_END=3.00
+PRIMER_LEFT_END_STABILITY=7.8000
+PRIMER_RIGHT_END_STABILITY=9.4000
+PRIMER_PAIR_COMPL_ANY=4.00
+PRIMER_PAIR_COMPL_END=1.00
+PRIMER_PRODUCT_SIZE=286
+PRIMER_PAIR_PENALTY_1=0.0180
+PRIMER_LEFT_1_PENALTY=0.003322
+PRIMER_RIGHT_1_PENALTY=0.014712
+PRIMER_LEFT_1_SEQUENCE=GAGGGTAACACGCTGGTCAT
+PRIMER_RIGHT_1_SEQUENCE=GAATCCTCTTCGGAGTTCCC
+PRIMER_LEFT_1=1112,20
+PRIMER_RIGHT_1=1263,20
+PRIMER_LEFT_1_TM=59.997
+PRIMER_RIGHT_1_TM=60.015
+PRIMER_LEFT_1_GC_PERCENT=55.000
+PRIMER_RIGHT_1_GC_PERCENT=55.000
+PRIMER_LEFT_1_SELF_ANY=3.00
+PRIMER_RIGHT_1_SELF_ANY=4.00
+PRIMER_LEFT_1_SELF_END=2.00
+PRIMER_RIGHT_1_SELF_END=2.00
+PRIMER_LEFT_1_END_STABILITY=6.3000
+PRIMER_RIGHT_1_END_STABILITY=9.7000
+PRIMER_PAIR_1_COMPL_ANY=4.00
+PRIMER_PAIR_1_COMPL_END=1.00
+PRIMER_PRODUCT_SIZE_1=152
+PRIMER_PAIR_PENALTY_2=0.0213
+PRIMER_LEFT_2_PENALTY=0.000037
+PRIMER_RIGHT_2_PENALTY=0.021275
+PRIMER_LEFT_2_SEQUENCE=GACCGAAAGCAACGTACCAT
+PRIMER_RIGHT_2_SEQUENCE=CTTTTTAGCGAATTCCAGCG
+PRIMER_LEFT_2=846,20
+PRIMER_RIGHT_2=1111,20
+PRIMER_LEFT_2_TM=60.000
+PRIMER_RIGHT_2_TM=59.979
+PRIMER_LEFT_2_GC_PERCENT=50.000
+PRIMER_RIGHT_2_GC_PERCENT=45.000
+PRIMER_LEFT_2_SELF_ANY=4.00
+PRIMER_RIGHT_2_SELF_ANY=6.00
+PRIMER_LEFT_2_SELF_END=2.00
+PRIMER_RIGHT_2_SELF_END=2.00
+PRIMER_LEFT_2_END_STABILITY=7.8000
+PRIMER_RIGHT_2_END_STABILITY=10.2000
+PRIMER_PAIR_2_COMPL_ANY=4.00
+PRIMER_PAIR_2_COMPL_END=0.00
+PRIMER_PRODUCT_SIZE_2=266
+PRIMER_PAIR_PENALTY_3=0.0354
+PRIMER_LEFT_3_PENALTY=0.019325
+PRIMER_RIGHT_3_PENALTY=0.016052
+PRIMER_LEFT_3_SEQUENCE=AGACAATCGGCTGCTCTGAT
+PRIMER_RIGHT_3_SEQUENCE=CTCGTCCTGCAGTTCATTCA
+PRIMER_LEFT_3=3544,20
+PRIMER_RIGHT_3=3653,20
+PRIMER_LEFT_3_TM=59.981
+PRIMER_RIGHT_3_TM=59.984
+PRIMER_LEFT_3_GC_PERCENT=50.000
+PRIMER_RIGHT_3_GC_PERCENT=50.000
+PRIMER_LEFT_3_SELF_ANY=4.00
+PRIMER_RIGHT_3_SELF_ANY=6.00
+PRIMER_LEFT_3_SELF_END=3.00
+PRIMER_RIGHT_3_SELF_END=1.00
+PRIMER_LEFT_3_END_STABILITY=6.6000
+PRIMER_RIGHT_3_END_STABILITY=6.9000
+PRIMER_PAIR_3_COMPL_ANY=4.00
+PRIMER_PAIR_3_COMPL_END=2.00
+PRIMER_PRODUCT_SIZE_3=110
+PRIMER_PAIR_PENALTY_4=0.0354
+PRIMER_LEFT_4_PENALTY=0.019325
+PRIMER_RIGHT_4_PENALTY=0.016052
+PRIMER_LEFT_4_SEQUENCE=AGACAATCGGCTGCTCTGAT
+PRIMER_RIGHT_4_SEQUENCE=TCGTCCTGCAGTTCATTCAG
+PRIMER_LEFT_4=3544,20
+PRIMER_RIGHT_4=3652,20
+PRIMER_LEFT_4_TM=59.981
+PRIMER_RIGHT_4_TM=59.984
+PRIMER_LEFT_4_GC_PERCENT=50.000
+PRIMER_RIGHT_4_GC_PERCENT=50.000
+PRIMER_LEFT_4_SELF_ANY=4.00
+PRIMER_RIGHT_4_SELF_ANY=6.00
+PRIMER_LEFT_4_SELF_END=3.00
+PRIMER_RIGHT_4_SELF_END=2.00
+PRIMER_LEFT_4_END_STABILITY=6.6000
+PRIMER_RIGHT_4_END_STABILITY=7.0000
+PRIMER_PAIR_4_COMPL_ANY=4.00
+PRIMER_PAIR_4_COMPL_END=3.00
+PRIMER_PRODUCT_SIZE_4=109
+=

Added: trunk/packages/bioperl/branches/upstream/current/t/data/prints.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/prints.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/prints.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,457 @@
+Sn; BIOPERL_TEST_SEQUENCE_ID
+Si; Fasta sequence
+1TBS
+1TBH DNAGYRASEB      2.035573e-70
+1TBH TPI2FAMILY      3.830756e-70
+1TBF
+2TBS
+2TBT FingerPrint     No.Motifs SumId    AveId    ProfScore  Ppvalue     Evalue      GraphScan               
+2TBH DNAGYRASEB      10 of  11 5.3e+02  53       4363       2.5e-75     2e-70       IIII.IIIIII     
+2TBH TPI2FAMILY      9  of  9  539.06   59.90    4433       4.8e-75     3.8e-70     IIIIIIIII       
+2TBN BCTRLSENSOR     2  of  4  64.27    32.14    391        2.4e-06     0.19        I.i.            
+2TBN TROPOMYOSIN     2  of  5  58.53    29.26    483        4.4e-05     3.5         I.i..           
+2TBN MEROZOITESA     2  of  15 52.70    26.35    488        8e-05       6.4         .i...........i. 
+2TBN FADG3PDH        2  of  6  72.65    36.32    400        0.00012     9.8         I....I          
+2TBN PROXISOMPAGR    2  of  6  97.06    48.53    481        0.00046     37          .I..I.          
+2TBN CYANASE         2  of  5  60.96    30.48    322        0.00068     54          i...I           
+2TBN ALARACEMASE     2  of  9  49.23    24.61    344        0.00068     54          ..I...i..       
+2TBN FMOXYGENASE     2  of  13 44.28    22.14    327        0.00073     58          i........i...   
+2TBF
+3TBS
+3TBT MotifName       No.Mots   IdScore PfScore Pvalue    Sequence                                                Len  low  pos  high 
+3TBH DNAGYRASEB      1  of  11 56.97   334     4.14e-05  YSAGQIKILEG                                             11   4    8    33   
+3TBH DNAGYRASEB      2  of  11 48.33   419     1.44e-07  DFQFDILSARFRELAF                                        16   178  182  207  
+3TBH DNAGYRASEB      3  of  11 48.81   352     4.94e-06  FLNKGLVLIVEDRR                                          14   193  197  222  
+3TBH DNAGYRASEB      4  of  11 40.58   487     2.12e-10  GGIVSFVEHINENKHPMHKVIHF                                 23   217  227  277  
+3TBH DNAGYRASEB      6  of  11 65.69   637     1.20e-11  GEDVKEGLTAVISIKIP                                       17   310  319  369  
+3TBH DNAGYRASEB      7  of  11 45.33   358     7.99e-07  FFEENPNITKKILEK                                         15   359  367  417  
+3TBH DNAGYRASEB      8  of  11 60.16   604     3.60e-13  KCILSAKAREAARKARDLTRR                                   21   373  381  431  
+3TBH DNAGYRASEB      9  of  11 60.33   332     1.39e-06  LVSALGTGIG                                              10   459  477  911  
+3TBH DNAGYRASEB      10 of  11 44.62   312     2.07e-05  PSNRVVLKVKLDD                                           13   588  594  1029 
+3TBH DNAGYRASEB      11 of  11 54.51   528     4.10e-09  AEETFNILMGDEVQPRK                                       17   604  610  1045 
+3TBB
+3TBH TPI2FAMILY      1  of  9  56.63   501     7.92e-09  GLHKMVYEVVDNSVDE                                        16   31   37   172  
+3TBH TPI2FAMILY      2  of  9  62.28   483     2.55e-08  EVKDNGRGIPVDIH                                          14   66   72   209  
+3TBH TPI2FAMILY      3  of  9  73.17   547     2.56e-10  GGLHGVGVSVVNALS                                         15   109  115  252  
+3TBH TPI2FAMILY      4  of  9  50.35   371     8.26e-06  CFTNNINNNLGGTH                                          14   251  274  414  
+3TBH TPI2FAMILY      5  of  9  70.89   498     6.45e-10  LYLVEGDSAGGSAKQ                                         15   399  427  561  
+3TBH TPI2FAMILY      6  of  9  66.21   629     3.61e-10  KIRYHKIMIMTDADIDG                                       17   461  494  928  
+3TBH TPI2FAMILY      7  of  9  52.17   503     4.41e-10  HIRTLLLTFFFRYMRPVI                                      18   479  512  946  
+3TBH TPI2FAMILY      8  of  9  43.15   321     1.38e-06  GYLYVAQPPLYLI                                           13   499  532  966  
+3TBH TPI2FAMILY      9  of  9  64.20   580     7.91e-11  IQRYKGLGEMNPEQLWE                                       17   539  573  1008 
+3TBB
+3TBN BCTRLSENSOR     1  of  4  40.72   272     2.48e-05  NIIEVKDNGRGIPVD                                         15   219  69   1911 
+3TBN BCTRLSENSOR     3  of  4  23.55   119     9.78e-02  GLHGVGVSVVNALSEYLEV                                     19   252  116  1946 
+3TBB
+3TBN TROPOMYOSIN     1  of  5  35.76   286     9.84e-04  ELAFLNKGLVLIVEDRRR                                      18   9    194  357  
+3TBN TROPOMYOSIN     3  of  5  22.77   197     4.43e-02  ILEKCILSAKAREAARKARDLTRRKTVLE                           29   70   378  560  
+3TBB
+3TBN MEROZOITESA     2  of  15 28.21   293     1.55e-03  NGRGIPVDIHPDKKISTIEV                                    20   68   76   123  
+3TBN MEROZOITESA     14 of  15 24.49   195     5.17e-02  LIKHGKNSTYAYSDKEKEELL                                   21   510  543  576  
+3TBB
+3TBN FADG3PDH        1  of  6  32.91   209     6.51e-03  NDAYKVSGGLHGV                                           13   4    108  75   
+3TBN FADG3PDH        6  of  6  39.74   191     1.88e-02  LYLIKHGKNSTYA                                           13   347  541  454  
+3TBB
+3TBN PROXISOMPAGR    2  of  6  50.00   241     1.57e-02  PDATIFTTVDFQFD                                          14   30   173  60   
+3TBN PROXISOMPAGR    5  of  6  47.06   240     2.93e-02  EDVKEGLTAVISIKIPQ                                       17   185  320  215  
+3TBB
+3TBN CYANASE         1  of  5  25.89   131     4.96e-02  KHPTGLSGEDVKEGLT                                        16   13   312  77   
+3TBN CYANASE         5  of  5  35.06   191     1.37e-02  QAILPLKGKIL                                             11   131  448  297  
+3TBB
+3TBN ALARACEMASE     3  of  9  32.91   191     1.08e-02  AYKVSGGLHGVGV                                           13   120  110  155  
+3TBN ALARACEMASE     7  of  9  16.32   153     6.29e-02  LAEISIQYSETYTENI                                        16   265  257  307  
+3TBB
+3TBN FMOXYGENASE     1  of  13 22.06   151     7.01e-02  RKTVLEGGGLPGKLADC                                       17   2    401  7    
+3TBN FMOXYGENASE     10 of  13 22.22   176     1.04e-02  RVVLKVKLDDFVEAEETF                                      18   225  597  296  
+3TBB
+3TBF
+4TBT --------------------------------------------------------------------------
+4TBN DNAGYRASEB
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL        YDASsIqVLEG                                                        
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                  eFdFetLakRLRELAFFLNkGltItLtDeR           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL      GGIvsFVeYLNrnKtplhdepiY                                              
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                        GeDiREGLTAiISVKvP                               fLe
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL ENPqeAKkIveKKiidAArAReAARkARElTRR                                         
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                   LITALGtGIG                              
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL    PevRtLlqVTleD   ADeiFstLMGDdVEPRR             
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN TPI2FAMILY
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                     GLHHlvdEIvDNAaDE                   sVe
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL DNGRGIPVdiH                             GGLHGVGASVVNALS                   
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                    SFVNnIaTteGGTH         
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                         LiLVEGDSAGGSAKs   
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                  kLRYgKIiIMTDADvDG HIktLLL
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL TFFyRymPpLi  GfvyiAqPPLYKv                            IQRYKGLGEMnakqLwE   
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN BCTRLSENSOR
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                     vlieVs
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL DtGiGIppe                                gGTGLGLaIVkriveahGG              
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN TROPOMYOSIN
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                              EVASLNRRIQLVEEELDR           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL        EiQLKEAKHIAEEADRKYEEVARKLVIiE                                      
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN MEROZOITESA
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL  HGSGIRVDLGEDAEVaNtqY                                                     
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                         YDKMDQAddYGKStSRkDEML                             
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN FADG3PDH
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                  fDVlVIGGGaTGa                            
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                       LITIAGGKlTTYR                                       
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN PROXISOMPAGR
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                         FDIKPFTTVDFSSI                                    
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                        QAEKEKLLAEISSDIDQ                                  
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN CYANASE
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                KaakGlTFadLadelG                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL    RVVVTlDGKYL                                                            
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN ALARACEMASE
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                    hLKvDTGMnRLGv                          
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                   iGvVaiGYADGypRal                        
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN FMOXYGENASE
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                               KRVAiIGAGVSGLASIK                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL     kVkiKpsVKEfTETSAIF                           
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN CD44
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                            DLNITCRYAGVFHVE
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL KNGRY                                                                     
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL             DQFMTADETRNLQNVDMKIG                 
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN JANUSKINASE3
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                      aETFh
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL VGLPGA                                                                    
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                     GLhVDGvAlnLTS         
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                                                           
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                           
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL                                                  
+4TBL 
+4TBS --------------------------------------------------------------------------
+4TBT --------------------------------------------------------------------------
+4TBN KUPTAKETRKA
+4TBS MSQKETSYSAGQIKILEGLEAVRKRPGMYIGTQDETGLHKMVYEVVDNSVDEAMAGHCTEIRISILPNNIIEVK
+4TBL                                                                           
+4TBL 
+4TBS DNGRGIPVDIHPDKKISTIEVVMTILHAGGKFENDAYKVSGGLHGVGVSVVNALSEYLEVEVHQKGKIYTQKYE
+4TBL                                                                           
+4TBL 
+4TBS KGIPVSPVEIKGDSSERGTIVRFKPDATIFTTVDFQFDILSARFRELAFLNKGLVLIVEDRRRGAEGENLLRNE
+4TBL                                                                           
+4TBL 
+4TBS FQFSGGIVSFVEHINENKHPMHKVIHFERNKDDVLAEISIQYSETYTENIFCFTNNINNNLGGTHLEGFRAALT
+4TBL                                                                           
+4TBL 
+4TBS RTLNDFLKKDTTLSKKHPTGLSGEDVKEGLTAVISIKIPQPQFNSQTKEKLVNAEIKGIMQTLSSEGLTLFFEE
+4TBL                                                                           
+4TBL 
+4TBS NPNITKKILEKCILSAKAREAARKARDLTRRKTVLEGGGLPGKLADCSEKDPAFSELYLVEGDSAGGSAKQGRD
+4TBL                                KIIILGAGQVGgTLA                            
+4TBL 
+4TBS RNTQAILPLKGKILNVEKARLDKILSSEEIRILVSALGTGIGEDEFNINKIRYHKIMIMTDADIDGSHIRTLLL
+4TBL                                                                           
+4TBL 
+4TBS TFFFRYMRPVIERGYLYVAQPPLYLIKHGKNSTYAYSDKEKEELLKNVGTEKVVIQRYKGLGEMNPEQLWETTM
+4TBL                                                                        TaS
+4TBL 
+4TBS DPSNRVVLKVKLDDFVEAEETFNILMGDEVQPRKQFIEINAAKVANLDL
+4TBL DEtNMvACQvAy                                     
+4TBL 
+4TBS --------------------------------------------------------------------------

Added: trunk/packages/bioperl/branches/upstream/current/t/data/promoterwise.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/promoterwise.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/promoterwise.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+1596.49	MUSSPSYN	4	18	1	4.143962167-143965267	29	43	1	group_0_0
+1596.49	MUSSPSYN	19	242	1	4.143962167-143965267	45	268	1	group_0_0
+1596.49	MUSSPSYN	244	365	1	4.143962167-143965267	527	648	1	group_0_0
+1596.49	MUSSPSYN	367	457	1	4.143962167-143965267	967	1057	1	group_0_0
+1596.49	MUSSPSYN	459	611	1	4.143962167-143965267	1772	1924	1	group_0_0
+1596.49	MUSSPSYN	613	840	1	4.143962167-143965267	2252	2479	1	group_0_0
+1596.49	MUSSPSYN	842	964	1	4.143962167-143965267	2566	2688	1	group_0_0
+1596.49	MUSSPSYN	1146	1152	1	4.143962167-143965267	2693	2699	1	group_0_0
+1596.49	MUSSPSYN	1154	1159	1	4.143962167-143965267	2700	2705	1	group_0_0
+1596.49	MUSSPSYN	1161	1161	1	4.143962167-143965267	2706	2706	1	group_0_0
+1596.49	MUSSPSYN	1163	1172	1	4.143962167-143965267	2707	2716	1	group_0_0

Added: trunk/packages/bioperl/branches/upstream/current/t/data/protpars.phy
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/protpars.phy	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/protpars.phy	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,413 @@
+ 3 5127
+SINFRUP001   .......... ......DDQV VLQCTASVLK EQIKLCLSCE GFGNRLCFLE 
+SINFRUP002   .......... ......DDQV VLQCTASVLK EQIKLCLSCE GFGNRLCFLE 
+ENSP000003   .MGDAEGEDE VQFLRTDDEV VLQCSATVLK EQLKLCLAAE GFGNRLCFLE 
+
+             TTSNAQNVPP DLAICTFILE QSLSVRALQE MLANTVEMTE AVDLDKWSSQ 
+             TTSNAQNVPP DLAICTFILE QSLSVRALQE MLANTVEMTE AVDLDKWSSQ 
+             PTSNAQNVPP DLAICCFVLE QSLSVRALQE MLANT..... .VEAGVESSQ 
+
+             GGGHRTLLYG HAILLRHNHS GMYLSCLTTS RSLTDKLAFD VGLQEDSTGE 
+             GGGHRTLLYG HAILLRHNHS GMYLSCLTTS RSLTDKLAFD VGLQEDSTGE 
+             GGGHRTLLYG HAILLRHAHS RMYLSCLTTS RSMTDKLAFD VGLQEDATGE 
+
+             ACWWTIHPAS KQRSEGEKVR VGDDLILVSV SSERYLHLSY ASGDLMVDAS 
+             ACWWTIHPAS KQRSEGEKVR VGDDLILVSV SSERYLHLSY ASGDLMVDAS 
+             ACWWTMHPAS KQRSEGEKVR VGDDIILVSV SSERYLHLST ASGELQVDAS 
+
+             FMQTLWNMNP ISSGCELAEG FLTGGHVLRL FHGHMDECLA IATPEEGEEK 
+             FMQTLWNMNP ISSGCELAEG FLTGGHVLRL FHGHMDECLA IATPEEGEEK 
+             FMQTLWNMNP ICSRCE..EG FVTGGHVLRL FHGHMDECLT ISPADS.DDQ 
+
+             RRMAHYEGGS VCSQARSLWR LEPLRISWSG SHMKWGQSFR IRHITTGRYL 
+             RRMAHYEGGS VCSQARSLWR LEPLRISWSG SHMKWGQSFR IRHITTGRYL 
+             RRLVYYEGGA VCTHARSLWR LEPLRISWSG SHLRWGQPLR VRHVTTGQYL 
+
+             CLDEEKGLLV VDPERANTKL SAFCFRASKE KVDVAQKRDV EGMGIPEIKY 
+             CLDEEKGLLV VDPERANTKL SAFCFRASKE KVDVAQKRDV EGMGIPEIKY 
+             ALTEDQGLVV VDASKAHTKA TSFCFRISKE KLDVAPKRDV EGMGPPEIKY 
+
+             GESMCFVQHV STGLWLTYAS LDAKAARLGM MKRKVILHQE GHMDDALTVS 
+             GESMCFVQHV STGLWLTYAS LDAKAARLGM MKRKVILHQE GHMDDALTVS 
+             GESLCFVQHV ASGLWLTYAA PDPKALRLGV LKKKAMLHQE GHMDDALSLT 
+
+             RSQTEESQAA RMIYSTVGLF RQFIKGLDTL TGKNKSPGAL S...LPLEGV 
+             RSQTEESQAA RMIYSTVGLF RQFIKGLDTL TGKNKSPGAL S...LPLEGV 
+             RCQQEESQAA RMIHSTNGLY NQFIKSLDSF SGKPRGSGPP AGTALPIEGV 
+
+             ILSLQDLIFY FRPPDEELEH EEKQTKLRSL RNRQNLFQEE GMITIVLECI 
+             ILSLQDLIFY FRPPDEELEH EEKQTKLRSL RNRQNLFQEE GMITIVLECI 
+             ILSLQDLIIY FEPPSEDLQH EEKQSKLRSL RNRQSLFQEE GMLSMVLNCI 
+
+             DRLNVYNTAA HFSEFAGEEA AESWKEIVNL LYELLASLIR GNRSNCALFC 
+             DRLNVYNTAA HFSEFAGEEA AESWKEIVNL LYELLASLIR GNRSNCALFC 
+             DRLNVYTTAA HFAEFAGEEA AESWKEIVNL LYELLASLIR GNRSNCALFS 
+
+             DNLDWLVSKL DRLEASSGIL EVLYCVLIES PEVLNIIQEN HIKSIISLLD 
+             DNLDWLVSKL DRLEASSGIL EVLYCVLIES PEVLNIIQEN HIKSIISLLD 
+             TNLDWLVSKL DRLEASSGIL EVLYCVLIES PEVLNIIQEN HIKSIISLLD 
+
+             KHGRNHKVLD VLRSLCVCNG VAVRSNQNLI TENLLPGRDL LLQTNIVNYV 
+             KHGRNHKVLD VLRSLCVCNG VAVRSNQNLI TENLLPGRDL LLQTNIVNYV 
+             KHGRNHKVLD VLCSLCVCNG VAVRSNQDLI TENLLPGREL LLQTNLINYV 
+
+             TSVRPNIFLG TCEGSTQYKK WYYEVMVDHV EAFVTAQATH LRVGWAMTEG 
+             TSVRPNIFLG TCEGSTQYKK WYYEVMVDHV EAFVTAQATH LRVGWAMTEG 
+             TSIRPNIFVG RAEGTTQYSK WYFEVMVDEV TPFLTAQATH LRVGWALTEG 
+
+             YSPYPGGGEG WGGNGVGDDL YSYSFDGLHL WSGTVPRQVA SPNAHTLAAD 
+             YSPYPGGGEG WGGNGVGDDL YSYSFDGLHL WSGTVPRQVA SPNAHTLAAD 
+             YTPYPGAGEG WGGNGVGDDL YSYGFDGLHL WTGHVARPVT SPGQHLLAPE 
+
+             DVVSCCLDLS VPSISFRING HPVQGMFENF NVDSLFFPVI SFSAGVKARF 
+             DVVSCCLDLS VPSISFRING HPVQGMFENF NVDSLFFPVI SFSAGVKARF 
+             DVISCCLDLS VPSISFRING CPVQGVFESF NLDGLFFPVV SFSAGVKVRF 
+
+             LLGGRHGDFK FMPPPGYAPC YEALLPRERM RIEPIKEYKH DFNGVRNLLG 
+             LLGGRHGDFK FMPPPGYAPC YEALLPRERM RIEPIKEYKH DFNGVRNLLG 
+             LLGGRHGEFK FLPPPGYAPC HEAVLPRERL HLEPIKEYRR EGPRGPHLVG 
+
+             PTLSLTHTSF TPCPVDTVQI VLPPHLERIR EKLAENIHEL WAVTRIEQGW 
+             PTLSLTHTSF TPCPVDTVQI VLPPHLERIR EKLAENIHEL WAVTRIEQGW 
+             PSRCLSHTDF VPCPVDTVQI VLPPHLERIR EKLAENIHEL WALTRIEQGW 
+
+             TYGSFRDDNK KLHPCLVDFQ SLPEPERNYN LQMSAETLKC VCAV...A.. 
+             TYGSFRDDNK KLHPCLVDFQ SLPEPERNYN LQMSAETLKC VCAV...A.. 
+             TYGPVRDDNK RLHPCLVDFH SLPEPERNYN LQMSGETLKT LLALGCHVGM 
+
+             ......ETLH DCVSSR.YVM SNAYKPAPLD LSHVKLTPNQ NQLVEKLAEN 
+             ......ETLH DCVSSR.YVM SNAYKPAPLD LSHVKLTPNQ NQLVEKLAEN 
+             ADEKAEDNLK KTKLPKTYMM SNGYKPAPLD LSHVRLTPAQ TTLVDRLAEN 
+
+             GHNVWARDRV RQGWTYSIVQ DILNKRNPRL VPYILLDERT KKTNRDSVNN 
+             GHNVWARDRV RQGWTYSIVQ DILNKRNPRL VPYILLDERT KKTNRDSVNN 
+             GHNVWARDRV GQGWSYSAVQ DIPARRNPRL VPYRLLDEAT KRSNRDSLCQ 
+
+             AVRTLIGYGY NIEPPDQEST GHGLENTRGD KVRIFRAEKS YAVTQGKWYF 
+             AVRTLIGYGY NIEPPDQEST GHGLENTRGD KVRIFRAEKS YAVTQGKWYF 
+             AVRTLLGYGY NIEPPDQEPS Q.VENQSRCD RVRIFRAEKS YTVQSGRWYF 
+
+             EFEAVTTGEM RVGWARPNVH SDTELGADEL AYVFNGNKA. ........QR 
+             EFEAVTTGEM RVGWARPNVH SDTELGADEL AYVFNGNKA. ........QR 
+             EFEAVTTGEM RVGWARPELR PDVELGADEL AYVFNGHRG. ........QR 
+
+             WHIGNEPFGR QWQSGDVVGC MIDLTEMNIM FTLNGEMLIS DSGSEMAFKD 
+             WHIGNEPFGR QWQSGDVVGC MIDLTEMNIM FTLNGEMLIS DSGSEMAFKD 
+             WHLGSEPFGR PWQPGDVVGC MIDLTENTII FTLNGEVLMS DSGSETAFRE 
+
+             IEIGEGFIPV CTLGLSQVGR INLGQNVSSL RYFAICGLQE GFEPFAINMK 
+             IEIGEGFIPV CTLGLSQVGR INLGQNVSSL RYFAICGLQE GFEPFAINMK 
+             IEIGDGFLPV CSLGPGQVGH LNLGQDVSSL RFFAICGLQE GFEPFAINMQ 
+
+             RDTTMWFSKS LPQFVPVPAD HNHIEVSRVD GTVDSAPCLK LTHKTYGSQN 
+             RDTTMWFSKS LPQFVPVPAD HNHIEVSRVD GTVDSAPCLK LTHKTYGSQN 
+             RPVTTWFSKG LPQFEPVPLE HPHYEVSRVD GTVDTPPCLR LTHRTWGSQN 
+
+             ANTDMLFLRL SMPIQFHATF KVPAGTTPLT RALTIP...E DVAVVEPDSE 
+             ANTDMLFLRL SMPIQFHATF KVPAGTTPLT RALTIP...E DVAVVEPDSE 
+             SLVEMLFLRL SLPVQFHQHF RCTAGATPLA PPGLQPPAED EARAAEPDPD 
+
+             FEVLKKSASR KEQEEDKKEP SVPKEI.... ........L. .AENEKDTMS 
+             FEVLKKSASR KEQEEDKKEP SVPKEI.... ........L. .AENEKDTMS 
+             YENLRRSAGG WSEAENGKEG TAKEGAPGGT PQAGGEAQPA RAENEKDATT 
+
+             EKGKKRGFFS KAKKAAMTPL A.....PPPP PTVPRLVEDV VPDD.RDDPE 
+             EKGKKRGFFS KAKKAAMTPL A.....PPPP PTVPRLVEDV VPDD.RDDPE 
+             EKNKKRGFLF KAKKVAMMTQ P......PAT PTLPRLPHDV VPADNRDDPE 
+
+             IILSTTTYYY SVRIFAGQEP SGVWVGWVTP DYHQYDQTFD LSKVRSVTVT 
+             IILSTTTYYY SVRIFAGQEP SGVWVGWVTP DYHQYDQTFD LSKVRSVTVT 
+             IILNTTTYYY SVRVFAGQEP SCVWAGWVTP DYHQHDMSFD LSKVRVVTVT 
+
+             VGDDKGNIYN SMKRSNCYMV WGDDLVS.NH QTRFSQEDMV IGCLVDLATG 
+             VGDDKGNIYN SMKRSNCYMV WGDDLVS.NH QTRFSQEDMV IGCLVDLATG 
+             MGDEQGNVHS SLKCSNCYMV WGGDFVSPGQ QGRISHTDLV IGCLVDLATG 
+
+             LMTFTANGKE INTFYQVEPN TKLFPAVFVQ PLSQNMVQLE LGKLKNIMPI 
+             LMTFTANGKE INTFYQVEPN TKLFPAVFVQ PLSQNMVQLE LGKLKNIMPI 
+             LMTFTANGKE SNTFFQVEPN TKLFPAVFVL PTHQNVIQFE LGKQKNIMPL 
+
+             SAAMFRSERN NPVPQCPPRL DVQMLTPVIW SRMPNRFLNP DVGRVSERLG 
+             SAAMFRSERN NPVPQCPPRL DVQMLTPVIW SRMPNRFLNP DVGRVSERLG 
+             SAAMFQSERK NPAPQCPPRL EMQMLMPVSW SRMPNHFLQV ETRRAGERLG 
+
+             WVVECTEPLI MMALHIPEEN RCIDILELSE RQDLMKFHYH TLMLYCAVCA 
+             WVVECTEPLI MMALHIPEEN RCIDILELSE RQDLMKFHYH TLMLYCAVCA 
+             WAVQCQEPLT MMALHIPEEN RCMDILELSE RLDLQRFHSH TLRLYRAVCA 
+
+             LGNNRVAHAL CSHVDESQLF YATENTYLPG PLRSGYYDLL ISIHLESAKR 
+             LGNNRVAHAL CSHVDESQLF YATENTYLPG PLRSGYYDLL ISIHLESAKR 
+             LGNNRVAHAL CSHVDQAQLL HALEDAHLPG PLRAGYYDLL ISIHLESACR 
+
+             ARLGTNREFI VPMTEETLSI KLYPDAV... ...KAHSLPG VGLTTCLRPK 
+             ARLGTNREFI VPMTEETLSI KLYPDAV... ...KAHSLPG VGLTTCLRPK 
+             SRRSMLSEYI VPLTPETRAI TLFPPGRSTE NGHPRHGLPG VGVTTSLRPP 
+
+             LHFS...... SINFVGTDLD LYTLSPVFPL QELKNRAISM LTEAVLDGSQ 
+             LHFS...... SINFVGTDLD LYTLSPVFPL QELKNRAISM LTEAVLDGSQ 
+             HHFSPPCFVA ALPAAGAAEA PARLSPAIPL EALRDKALRM LGEAVRDGGQ 
+
+             AMRDPVGGSV EFHFVPILKL ISTLLIMGIF NDDDTKHILK MIDPNVFSGK 
+             AMRDPVGGSV EFHFVPILKL ISTLLIMGIF NDDDTKHILK MIDPNVFSGK 
+             HARDPVGGSV EFQFVPVLKL VSTLLVMGIF GDEDVKQILK MIEPEVFTEE 
+
+             DDEE...... ETDKPVEGGP AEGEGDKAKG EESEEAAELE D...EGVGKV 
+             DDEE...... ETDKPVEGGP AEGEGDKAKG EESEEAAELE D...EGVGKV 
+             EEEE...... ..DEEEEGEE EDEEE..... .........K E...EDEEET 
+
+             DGEKMEEEKE AEVVAVDLKD EEEGLEEGLL QMKLPESVKL QMCTLLQFFC 
+             DGEKMEEEKE AEVVAVDLKD EEEGLEEGLL QMKLPESVKL QMCTLLQFFC 
+             AQEKEDEEKE EEEAAE..GE KEEGLEEGLL QMKLPESVKL QMCHLLEYFC 
+
+             DCELRHRVEA IVAYSDKFVH NIQDNQRIRY NQLMRAFTMS AAETARKTRE 
+             DCELRHRVEA IVAYSDKFVH NIQDNQRIRY NQLMRAFTMS AAETARKTRE 
+             DQELQHRVES LAAFAERYVD KLQANQRSRY GLLIKAFSMT AAETARRTRE 
+
+             FRSPPQDQVL LLTNFKHSLE EEECPVPDNV RETLKEFHND LLLHCGIHIE 
+             FRSPPQDQVL LLTNFKHSLE EEECPVPDNV RETLKEFHND LLLHCGIHIE 
+             FRSPPQEQIN MLLQFKDGTD EEDCPLPEEI RQDLLDFHQD LLAHCGIQLD 
+
+             EEPVEEEVDT SLRGRLLSLV DKIKSIRGKK TEEKPE.VEE ETKPSTLQEL 
+             EEPVEEEVDT SLRGRLLSLV DKIKSIRGKK TEEKPE.VEE ETKPSTLQEL 
+             GEEEEPEEET TLGSRLMSLL EKVRLVKKKE EKPEEERSAE ESKPRSLQEL 
+
+             ISHTMIHWAQ ESFIQNPELV RLMFSLLHRQ YDGLGELIRA LPKAYAINAV 
+             ISHTMIHWAQ ESFIQNPELV RLMFSLLHRQ YDGLGELIRA LPKAYAINAV 
+             VSHMVVRWAQ EDFVQSPELV RAMFSLLHRQ YDGLGELLRA LPRAYTISPS 
+
+             SVQDTMDLLE CLGQIRSLLI VQMGPEEERL MIQSIGNIMN NKVFYQHPNL 
+             SVQDTMDLLE CLGQIRSLLI VQMGPEEERL MIQSIGNIMN NKVFYQHPNL 
+             SVEDTMSLLE CLGQIRSLLI VQMGPQEENL MIQSIGNIMN NKVFYQHPNL 
+
+             MRALGMHETV MEVMVNVLGG GGDSKEIRFP QMVTNCCRFL CYFCRISRQN 
+             MRALGMHETV MEVMVNVLGG GGDSKEIRFP QMVTNCCRFL CYFCRISRQN 
+             MRALGMHETV MEVMVNVLGG G.ESKEIRFP KMVTSCCRFL CYFCRISRQN 
+
+             QRSMFDHLSY LLQNSSIGLG MRGSTPLDVA AASCIDNNEL ALALQEQDLE 
+             QRSMFDHLSY LLQNSSIGLG MRGSTPLDVA AASCIDNNEL ALALQEQDLE 
+             QRSMFDHLSY LLENSGIGLG MQGSTPLDVA AASVIDNNEL ALALQEQDLE 
+
+             MVVTYLAGCG LQMCPMLLSK CYPDIGWNPC GGERYLDFLR FAVFVNGESV 
+             MVVTYLAGCG LQMCPMLLSK CYPDIGWNPC GGERYLDFLR FAVFVNGESV 
+             KVVSYLAGCG LQSCPMLVAK GYPDIGWNPC GGERYLDFLR FAVFVNGESV 
+
+             EENANVVVRL LIRRPECFGP ALRGEGGNGL LAAMEEAIKI SEDPARDGPT 
+             EENANVVVRL LIRRPECFGP ALRGEGGNGL LAAMEEAIKI SEDPARDGPT 
+             EENANVVVRL LIRKPECFGP ALRGEGGSGL LAAIEEAIRI SEDPARDGPG 
+
+             VKKDRRF.MF GGEEQQEENR VHLGNAIMSF YSALIDLLGR CAPEMHLIQA 
+             VKKDRRF.MF GGEEQQEENR VHLGNAIMSF YSALIDLLGR CAPEMHLIQA 
+             IRRDRRR.EH FGEEPPEENR VHLGHAIMSF YAALIDLLGR CAPEMHLIQA 
+
+             GKGEALRIRA ILRSLVPIED LVGVISLPVQ IPSYGKDSQI VEPKMSASFV 
+             GKGEALRIRA ILRSLVPIED LVGVISLPVQ IPSYGKDSQI VEPKMSASFV 
+             GKGEALRIRA ILRSLVPLED LVGIISLPLQ IPTLGKDGAL VQPKMSASFV 
+
+             PDHKASMVLF LDRVYGIDNQ DFLLHVLEVG FLPDMRAAAS LDTVAFSTTE 
+             PDHKASMVLF LDRVYGIDNQ DFLLHVLEVG FLPDMRAAAS LDTVAFSTTE 
+             PDHKASMVLF LDRVYGIENQ DFLLHVLDVG FLPDMRAAAS LDTATFSTTE 
+
+             MALALNRYLC SAVLPLLTKC APLFAGTDHR AIMIDSMLHT IYRLSRGRAL 
+             MALALNRYLC SAVLPLLTKC APLFAGTDHR AIMIDSMLHT IYRLSRGRAL 
+             MALALNRYLC LAVLPLITKC APLFAGTEHR AIMVDSMLHT VYRLSRGRSL 
+
+             TKAQRDVIEE CLMSLCKYLR PSMLQHLLRR LVFDVPILNE YAKMPLKLLT 
+             TKAQRDVIEE CLMSLCKYLR PSMLQHLLRR LVFDVPILNE YAKMPLKLLT 
+             TKAQRDVIED CLMSLCRYIR PSMLQHLLRR LVFDVPILNE FAKMPLKLLT 
+
+             NHYERCWKYY CLPNGWANFG VTSEEELHLS RKLFWGIFES LAHKKFDAEL 
+             NHYERCWKYY CLPNGWANFG VTSEEELHLS RKLFWGIFES LAHKKFDAEL 
+             NHYERCWKYY CLPTGWANFG VTSEEELHLT RKLFWGIFDS LAHKKYDPEL 
+
+             FKIAMPCLCA IAGAIPPDYV DASYSSHTEK KASVDAEGNF DPKPVETTNT 
+             FKIAMPCLCA IAGAIPPDYV DASYSSHTEK KASVDAEGNF DPKPVETTNT 
+             YRMAMPCLCA IAGALPPDYV DASYSSKAEK KATVDAEGNF DPRPVETLNV 
+
+             IIPERLDAFI NKYAEHTHDK WAFEKIQNNW TYGEVLDEDA KTHPMLRPYK 
+             IIPERLDAFI NKYAEHTHDK WAFEKIQNNW TYGEVLDEDA KTHPMLRPYK 
+             IIPEKLDSFI NKFAEYTHEK WAFDKIQNNW SYGENIDEEL KTHPMLRPYK 
+
+             TFSEKDKEIY RWPIKESIKA MLAWEWTLEK ARDGEGEVEK KAATRKISQT 
+             TFSEKDKEIY RWPIKESIKA MLAWEWTLEK ARDGEGEVEK KAATRKISQT 
+             TFSEKDKEIY RWPIKESLKA MIAWEWTIEK AREGEEEKTE KKKTRKISQS 
+
+             AQATYDPSHG YSPQPIDISG MTLSRELQSM AEQLAENYHN TWGRKKKVEL 
+             AQATYDPSHG YSPQPIDISG MTLSRELQSM AEQLAENYHN TWGRKKKVEL 
+             AQ.TYDPREG YNPQPPDLSA VTLSRELQAM AEQLAENYHN TWGRKKKQEL 
+
+             QSKGGGTHPL LVPYDTLTAK EKARDREKAQ DLLKFLQLNG YAVTR..GMK 
+             QSKGGGTHPL LVPYDTLTAK EKARDREKAQ DLLKFLQLNG YAVTR..GMK 
+             EAKGGGTHPL LVPYDTLTAK EKARDREKAQ ELLKFLQMNG YAVTRHAGLK 
+
+             DMEQDISSIE KRFAYGFLQK LLKWMDIAQE FIAHLEAVVS SGRVEKSPHE 
+             DMEQDISSIE KRFAYGFLQK LLKWMDIAQE FIAHLEAVVS SGRVEKSPHE 
+             DMELDSSSIE KRFAFGFLQQ LLRWMDISQE FIAHLEAVVS SGRVEKSPHE 
+
+             QEIKFFAKIL LPLVNQYFKN HCLYFLSTPA KVLGSGGHSS NKEKEMIASI 
+             QEIKFFAKIL LPLVNQYFKN HCLYFLSTPA KVLGSGGHSS NKEKEMIASI 
+             QEIKFFAKIL LPLINQYFTN HCLYFLSTPA KVLGSGGHAS NKEKEMITSL 
+
+             FCKLAALVRH RVSLFGTDAS AVVNCLHILS RSLDARTVMK SGPEIVKAGL 
+             FCKLAALVRH RVSLFGTDAS AVVNCLHILS RSLDARTVMK SGPEIVKAGL 
+             FCKLAALVRH RVSLFGTDAP AVVNCLHILA RSLDARTVMK SGPEIVKAGL 
+
+             RQFFESAADD IEKMVENLKL GKVSSRNQ.V KGVSQNINYT TIALLPVLTS 
+             RQFFESAADD IEKMVENLKL GKVSSRNQ.V KGVSQNINYT TIALLPVLTS 
+             RSFFESASED IEKMVENLRL GKVSQARTQV KGVGQNLTYT TVALLPVLTT 
+
+             LFDHIAQHQF GDDVILDDLQ ISCYRIMCSI YSLGTVKTPH AEKQRPALGE 
+             LFDHIAQHQF GDDVILDDLQ ISCYRIMCSI YSLGTVKTPH AEKQRPALGE 
+             LFQHIAQHQF GDDVILDDVQ VSCYRTLCSI YSLGTTKNTY VEKLRPALGE 
+
+             CLAHLAAAMP VAFLEPTLNE FNTFSVYTTK TPRERSILGL PSQVEELCPD 
+             CLAHLAAAMP VAFLEPTLNE FNTFSVYTTK TPRERSILGL PSQVEELCPD 
+             CLARLAAAMP VAFLEPQLNE YNACSVYTTK SPRERAILGL PNSVEEMCPD 
+
+             IPELEVLMKD IHDLAESGAR YTEMPHVIEI TLPMLCNYLP RWWERGLEN. 
+             IPELEVLMKD IHDLAESGAR YTEMPHVIEI TLPMLCNYLP RWWERGLEN. 
+             IPVLERLMAD IGGLAESGAR YTEMPHVIEI TLPMLCSYLP RWWERGPEAP 
+
+             ...FPEQEGQ ICTSVTSEQL NQLLGSIMKI VVNNLGIDEA SWMKRLAVFA 
+             ...FPEQEGQ ICTSVTSEQL NQLLGSIMKI VVNNLGIDEA SWMKRLAVFA 
+             PSALPAGAPP PCTAVTSDHL NSLLGNILRI IVNNLGIDEA SWMKRLAVFA 
+
+             QPIVSRAKPE MLKSHFIPTM EKLKKRCGKV VAEEDHLRME GKTEVDSENG 
+             QPIVSRAKPE MLKSHFIPTM EKLKKRCGKV VAEEDHLRME GKTEVDSENG 
+             QPIVSRARPE LLQSHFIPTI GRLRKRAGKV VSEEEQLRLE AKAEAQEGEL 
+
+             TIRDEFAVLC RDLYALYPLL IRYVDNSRAR WLTNPDPDAE ELFRMVGEVF 
+             TIRDEFAVLC RDLYALYPLL IRYVDNSRAR WLTNPDPDAE ELFRMVGEVF 
+             LVRDEFSVLC RDLYALYPLL IRYVDNNRAQ WLTEPNPSAE ELFRMVGEIF 
+
+             IFWSKSHNFK REEQNFVVMN EINNMSFLTA DSKSKMSKS. ........GG 
+             IFWSKSHNFK REEQNFVVMN EINNMSFLTA DSKSKMSKS. ........GG 
+             IYWSKSHNFK REEQNFVVQN EINNMSFLTA DNKSKMAKVG ACPVSPQSGG 
+
+             SEQERTKKKR RGDRYSVQTS LIVAALKKLL PIGLNMCSPA DQELINLAKI 
+             SEQERTKKKR RGDRYSVQTS LIVAALKKLL PIGLNMCSPA DQELINLAKI 
+             SDQERTKKKR RGDRYSVQTS LIVATLKKML PIGLNMCAPT DQDLITLAKT 
+
+             RYSLKDTDEE VREFLHNNLH LQGKVE.DPA MRWQMSLYKE MAGKAEDAED 
+             RYSLKDTDEE VREFLHNNLH LQGKVE.DPA MRWQMSLYKE MAGKAEDAED 
+             RYALKDTDEE VREFLHNNLH LQGKVEGSPS LRWQMALYRG VPGREEDADD 
+
+             PEKVVKRVQE VSAVLYHIEV TEHPFKSKKM VWHKLLSKQR RRAVVACFRM 
+             PEKVVKRVQE VSAVLYHIEV TEHPFKSKKM VWHKLLSKQR RRAVVACFRM 
+             PEKIVRRVQE VSAVLYYLDQ TEHPYKSKKA VWHKLLSKQR RRAVVACFRM 
+
+             TPLYNIITHR ATNMFLDAYK RNWLETEGYS FEDKMIDDLS VSLDHIRSE. 
+             TPLYNIITHR ATNMFLDAYK RNWLETEGYS FEDKMIDDLS VSLDHIRSE. 
+             TPLYNLPTHR ACNMFLESYK AAWILTEDHS FEDRMIDDLS KAGEQEEEEE 
+
+             ....KKPDPL HQLILHFSRT ALTEKMKLDV DHLYMSYADI MAKGFSVSPP 
+             ....KKPDPL HQLILHFSRT ALTEKMKLDV DHLYMSYADI MAKGFSVSPP 
+             EVEEKKPDPL HQLVLHFSRT ALTEKSKLDE DYLYMAYADI MAKSCHLEEG 
+
+             CSASQ..... ........EK EMEKQRLLYQ QSRLHNRGAA EMVLQMISAC 
+             CSASQ..... ........EK EMEKQRLLYQ QSRLHNRGAA EMVLQMISAC 
+             GENGE...AE EEVEVSFEEK QMEKQRLLYQ QARLHTRGAA EMVLQMISAC 
+
+             KGEPGAMVSS TLKLGISILN GGNSDVQQKM LDYLKDKKDV GFFLSIQSLM 
+             KGEPGAMVSS TLKLGISILN GGNSDVQQKM LDYLKDKKDV GFFLSIQSLM 
+             KGETGAMVSS TLKLGISILN GGNAEVQQKM LDYLKDKKEV GFFQSIQALM 
+
+             QTCSVLDLNA FERQNKAEGL GMVSEEGTNE KVMADDEFTC DLFRFLQLLC 
+             QTCSVLDLNA FERQNKAEGL GMVSEEGTNE KVMADDEFTC DLFRFLQLLC 
+             QTCSVLDLNA FERQNKAEGL GMVNEDGTGE KVMADDEFTQ DLFRFLQLLC 
+
+             EGHNNDFQNY LRTQTGSTTT INVIICTVDY LLRLQESISD FYWYYSGKDI 
+             EGHNNDFQNY LRTQTGSTTT INVIICTVDY LLRLQESISD FYWYYSGKDI 
+             EGHNNDFQNY LRTQTGNTTT INIIICTVDY LLRLQESISD FYWYYSGKDV 
+
+             IDEPGKRNFS KAMNVAKQVF NSLTEYIQGP CTGNQQSLAH SRLWDAVVGF 
+             IDEPGKRNFS KAMNVAKQVF NSLTEYIQGP CTGNQQSLAH SRLWDAVVGF 
+             IEEQGKRNFS KAMSVAKQVF NSLTEYIQGP CTGNQQSLAH SRLWDAVVGF 
+
+             LHVFAHMMMK LAQ....... ..DSSQIGLL KELLDLQKDM VVMLLSLLEG 
+             LHVFAHMMMK LAQ....... ..DSSQIGLL KELLDLQKDM VVMLLSLLEG 
+             LHVFAHMMMK LAQ....... ..DSSQIELL KELLDLQKDM VVMLLSLLEG 
+
+             NVVNGTIAKQ MVDMLVESSS NVEMILKFFD MFLKLKDIVA SDAFRDYVTD 
+             NVVNGTIAKQ MVDMLVESSS NVEMILKFFD MFLKLKDIVA SDAFRDYVTD 
+             NVVNGMIARQ MVDMLVESSS NVEMILKFFD MFLKLKDIVG SEAFQDYVTD 
+
+             PRGLISKKDF SKAMDSQKQY TPAEIQFLLS CSEADENEMI NFEEFADRFQ 
+             PRGLISKKDF SKAMDSQKQY TPAEIQFLLS CSEADENEMI NFEEFADRFQ 
+             PRGLISKKDF QKAMDSQKQF SGPEIQFLLS CSEADENEMI NCEEFANRFQ 
+
+             EPAKDIGFNI AVLLTNLSEH VPHDTRLQNF LEQAESVLNY FRPFLGRIEI 
+             EPAKDIGFNI AVLLTNLSEH VPHDTRLQNF LEQAESVLNY FRPFLGRIEI 
+             EPARDIGFNV AVLLTNLSEH VPHDPRLHNF LELAESILEY FRPYLGRIEI 
+
+             MGASRKIERI YFEISEANRN QWEMPQVRES KRQFIFDVVN EGGESEKMEM 
+             MGASRKIERI YFEISEANRN QWEMPQVRES KRQFIFDVVN EGGESEKMEM 
+             MGASRRIERI YFEISETNRA QWEMPQVKES KRQFIFDVVN EGGEAEKMEL 
+
+             FVNFCEDTIF EMNIA...AH A......... .......... .......... 
+             FVNFCEDTIF EMNIA...AH A......... .......... .......... 
+             FVSFCEDTIF EMQIAAQISE PEGEPETDED EGAGAAEAGA EGAEEGAAGL 
+
+             .....PESTS AFADFLKSVV NFFNMFTFRN LRRRYRRFRK MTVKEMVIGL 
+             .....PESTS AFADFLKSVV NFFNMFTFRN LRRRYRRFRK MTVKEMVIGL 
+             EGTAATAAAG ATARVVAAAG RALRGLSYRS LRRRVRRLRR LTAREAATAV 
+
+             ATFVYTVVMG ILMFVYSICK GFFTLIWKVL FGGGLVESAK KMTVTDILAS 
+             ATFVYTVVMG ILMFVYSICK GFFTLIWKVL FGGGLVESAK KMTVTDILAS 
+             AALLWAAVTR AGAAGAGAAA GALGLLWGSL FGGGLVEGAK KVTVTELLAG 
+
+             MPDPTQDEVH GELPPEPGSR EDQD..TEGG ADLLDPVGGE EEEEDSEERE 
+             MPDPTQDEVH GELPPEPGSR EDQD..TEGG ADLLDPVGGE EEEEDSEERE 
+             MPDPTSDEVH GEQPAGPGGD ADGEGASEGA GDAAEG.AGD EEEAVHEAGP 
+
+             GGRLPGFNTP .......... GGLGDFGETT PEEPPTPEGT PLLKRKLVSR 
+             GGRLPGFNTP .......... GGLGDFGETT PEEPPTPEGT PLLKRKLVSR 
+             GGADGAVAVT DGGPFRPEGA GGLGDMGDTT PAEPPTPEGS PILKRKLGVD 
+
+             HNQIGGQGEE ENAEHEEPPQ ETEKADTENG EKAKKPEAEP EVKEEEPVEE 
+             HNQIGGQGEE ENAEHEEPPQ ETEKADTENG EKAKKPEAEP EVKEEEPVEE 
+             GVEEE..LPP EPEPEPEPEL EPEKADAENG EKEEV....P EPTPEP.... 
+
+             EEITVKAKAK KSKKPVEEGF ELWNELEIQR VKFMNYLSRN FYNLRYLALF 
+             EEITVKAKAK KSKKPVEEGF ELWNELEIQR VKFMNYLSRN FYNLRYLALF 
+             PKKQAPPSPP PKKE..EAGG EFWGELEVQR VKFLNYLSRN FYTLRFLALF 
+
+             IAFALNFILL FYKVSDSPP. GEED.....F EGSGLFEGSG LFEGSGVQED 
+             IAFALNFILL FYKVSDSPP. GEED.....F EGSGLFEGSG LFEGSGVQED 
+             LAFAINFILL FYKVSDSPP. GEDD.....M EGSAAGDVSG AGSG.GSSGW 
+
+             GSGLDDGGED DDEEGPLYYF LEESTGYMEP AMAFLSIVHT IISFLCIIGY 
+             GSGLDDGGED DDEEGPLYYF LEESTGYMEP AMAFLSIVHT IISFLCIIGY 
+             GLGAGEEAEG DEDENMVYYF LEESTGYMEP ALRCLSLLHT LVAFLCIIGY 
+
+             NCLKVPLVIF KREKELARKL EFDGVYVTEQ PEDDDIKGQW DRLVLNTPSF 
+             NCLKVPLVIF KREKELARKL EFDGVYVTEQ PEDDDIKGQW DRLVLNTPSF 
+             NCLKVPLVIF KREKELARKL EFDGLYITEQ PEDDDVKGQW DRLVLNTPSF 
+
+             PNNYWDKFVK RKVLDKYGDI YGRERIAELL GMDLASLDVS AMTHEKKPEP 
+             PNNYWDKFVK RKVLDKYGDI YGRERIAELL GMDLASLDVS AMTHEKKPEP 
+             PSNYWDKFVK RKVLDKHGDI YGRERIAELL GMDLATLEIT AHNERK.PNP 
+
+             DTSMFSWITS IDIKYQIWKF GVVFTDNTFL YLVWYFLMSI LGHYNNFFFA 
+             DTSMFSWITS IDIKYQIWKF GVVFTDNTFL YLVWYFLMSI LGHYNNFFFA 
+             PPGLLTWLMS IDVKYQIWKF GVIFTDNSFL YLGWYMVMSL LGHYNNFFFA 
+
+             AHLLDIAMGV KTLRTILSSV THNGKQLMMT VGLLAVVVYL YTVVAFNFFR 
+             AHLLDIAMGV KTLRTILSSV THNGKQLMMT VGLLAVVVYL YTVVAFNFFR 
+             AHLLDIAMGV KTLRTILSSV THNGKQLVMT VGLLAVVVYL YTVVAFNFFR 
+
+             KFYNKSEDED EPDMKCDDMM TCYLFHMYVG VRAGGGIGDE IEDPAGDEYE 
+             KFYNKSEDED EPDMKCDDMM TCYLFHMYVG VRAGGGIGDE IEDPAGDEYE 
+             KFYNKSEDED EPDMKCDDMM TCYLFHMYVG VRAGGGIGDE IEDPAGDEYE 
+
+             LYRVVFDITF FFFVIVILLA IIQGLIIDAF GELRDQQEQV REDMETKCFI 
+             LYRVVFDITF FFFVIVILLA IIQGLIIDAF GELRDQQEQV REDMETKCFI 
+             LYRVVFDITF FFFVIVILLA IIQGLIIDAF GELRDQQEQV KEDMETKCFI 
+
+             CGIGSDYFDT TPHGFETHTL EEHNLANYMF FLMYLINKDE TEHTGQESYV 
+             CGIGSDYFDT TPHGFETHTL EEHNLANYMF FLMYLINKDE TEHTGQESYV 
+             CGIGSDYFDT TPHGFETHTL EEHNLANYMF FLMYLINKDE TEHTGQESYV 
+
+             WKMYQERCWD FFPAGDCFRK QYEDQL. 
+             WKMYQERCWD FFPAGDCFRK QYEDQL. 
+             WKMYQERCWD FFPAGDCFRK QYEDQLS 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/pseudowise.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/pseudowise.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/pseudowise.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+Synonymous     : 7
+Nonsynonymous  : 18
+Ka/Ks          : 2.57
+Unlikely       : 0
+Identical      : 5
+Stop           : 0
+Total codons   : 30
+Frameshift     : 0
+Intron         : 1
+//
+Breaking with no 5'SS on END with 625
+Gene 1
+Gene 163 626 
+  Exon 163 213 phase -1
+  Exon 585 626 phase -1
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/psi_xml.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/psi_xml.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/psi_xml.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,179 @@
+<entrySet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:psi="net:sf:psidev:mi" xmlns="net:sf:psidev:mi" xmlns:xin="http://dip.doe-mbi.ucla.edu/xml/XIN" xsi:schemaLocation="net:sf:psidev:mi MIF.xsd" level="1" version="1">
+ <entry>
+   <interactorList>
+     <proteinInteractor id="G_1">
+       <names>
+         <shortLabel/>
+         <fullName>adenine specific DNA methyltransferase (dpnA) HP0050</fullName>
+       </names>
+       <xref>
+         <primaryRef db="DIP" id="3048N"/>
+<secondaryRef db="PIR" id="B64526"/>
+<secondaryRef db="GI" id="2313123"/>
+<secondaryRef db="RefSeq" id="NP_206851"/>
+</xref>
+<organism ncbiTaxId="85962">
+<names>
+<shortLabel/>
+<fullName>Helicobacter pylori 26695</fullName>
+</names>
+</organism>
+</proteinInteractor>
+<proteinInteractor id="G_2">
+<names>
+<shortLabel/>
+<fullName>hypothetical HP0001</fullName>
+</names>
+<xref>
+<primaryRef db="DIP" id="3047N"/>
+<secondaryRef db="SWP" id="O24853"/>
+<secondaryRef db="PIR" id="A64520"/>
+<secondaryRef db="GI" id="2313078"/>
+<secondaryRef db="RefSeq" id="NP_206803"/>
+</xref>
+<organism ncbiTaxId="85962">
+<names>
+<shortLabel/>
+<fullName>Helicobacter pylori 26695</fullName>
+</names>
+</organism>
+</proteinInteractor>
+<proteinInteractor id="G_4">
+<names>
+<shortLabel/>
+<fullName>GTP-binding membrane protein (lepA) HP0355</fullName>
+</names>
+<xref>
+<primaryRef db="DIP" id="3049N"/>
+<secondaryRef db="SWP" id="O25122"/>
+<secondaryRef db="PIR" id="C64564"/>
+<secondaryRef db="GI" id="2313456"/>
+<secondaryRef db="RefSeq" id="NP_207153"/>
+</xref>
+<organism ncbiTaxId="85962">
+<names>
+<shortLabel/>
+<fullName>Helicobacter pylori 26695</fullName>
+</names>
+</organism>
+</proteinInteractor>
+<proteinInteractor id="G_X">
+<names>
+<shortLabel/>
+<fullName>bogus-binding membrane protein (lepA) HP0355</fullName>
+</names>
+<xref>
+<primaryRef db="DIP" id="N"/>
+<secondaryRef db="SWP" id="O2"/>
+<secondaryRef db="PIR" id="C4"/>
+<secondaryRef db="GI" id="2"/>
+<secondaryRef db="RefSeq" id="207153"/>
+</xref>
+<organism ncbiTaxId="85962">
+<names>
+<shortLabel/>
+<fullName>Helicobacter pylori 26695</fullName>
+</names>
+</organism>
+</proteinInteractor>
+
+</interactorList>
+<interactionList>
+<interaction>
+<experimentList>
+<experimentDescription id="DIP_5317X">
+<bibref>
+<xref>
+<primaryRef db="pubmed" id="11196647"/>
+</xref>
+</bibref>
+<interactionDetection>
+<names>
+<shortLabel>Two hybrid test</shortLabel>
+</names>
+<xref>
+<primaryRef db="DO" id="DO:00018"/>
+<secondaryRef db="PSI" id="MI:0018"/>
+</xref>
+</interactionDetection>
+</experimentDescription>
+</experimentList>
+<participantList>
+<proteinParticipant>
+<proteinInteractorRef ref="G_1"/>
+</proteinParticipant>
+<proteinParticipant>
+<proteinInteractorRef ref="G_2"/>
+</proteinParticipant>
+</participantList>
+<xref>
+<primaryRef db="DIP" id="4305E"/>
+</xref>
+</interaction>
+<interaction>
+<experimentList>
+<experimentDescription id="DIP_5318X">
+<bibref>
+<xref>
+<primaryRef db="pubmed" id="11196647"/>
+</xref>
+</bibref>
+<interactionDetection>
+<names>
+<shortLabel>Two hybrid test</shortLabel>
+</names>
+<xref>
+<primaryRef db="DO" id="DO:00018"/>
+<secondaryRef db="PSI" id="MI:0018"/>
+</xref>
+</interactionDetection>
+</experimentDescription>
+</experimentList>
+<participantList>
+<proteinParticipant>
+<proteinInteractorRef ref="G_4"/>
+</proteinParticipant>
+<proteinParticipant>
+<proteinInteractorRef ref="G_2"/>
+</proteinParticipant>
+</participantList>
+<xref>
+<primaryRef db="DIP" id="4306E"/>
+</xref>
+</interaction>
+<interaction>
+<experimentList>
+<experimentDescription id="DIP_5318X">
+<bibref>
+<xref>
+<primaryRef db="pubmed" id="11196647"/>
+</xref>
+</bibref>
+<interactionDetection>
+<names>
+<shortLabel>Two hybrid test</shortLabel>
+</names>
+<xref>
+<primaryRef db="DO" id="DO:00018"/>
+<secondaryRef db="PSI" id="MI:0018"/>
+</xref>
+</interactionDetection>
+</experimentDescription>
+</experimentList>
+<participantList>
+<proteinParticipant>
+<proteinInteractorRef ref="G_X"/>
+</proteinParticipant>
+<proteinParticipant>
+<proteinInteractorRef ref="G_2"/>
+</proteinParticipant>
+</participantList>
+<xref>
+<primaryRef db="DIP" id="1E"/>
+</xref>
+</interaction>
+
+</interactionList>
+</entry>
+</entrySet>
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/psiblastreport.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/psiblastreport.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/psiblastreport.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,7510 @@
+BLASTP 2.0.14 [Jun-29-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= CYS1_DICDI
+         (343 letters)
+
+Database: /home/peter/blast/data/swissprot.pr
+           88,780 sequences; 31,984,247 total letters
+
+Searching..................................................done
+
+
+Results from round 1
+
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR                  721  0.0
+sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR      281  1e-75
+sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RES...   278  1e-74
+sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR              275  1e-73
+sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR                  262  7e-70
+sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK C...   262  7e-70
+sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR                  249  7e-66
+sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)                    244  2e-64
+sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR                    243  4e-64
+sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE...   236  5e-62
+sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR                  235  9e-62
+sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEIN...   232  1e-60
+sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR        225  1e-58
+sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR        224  2e-58
+sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN...   224  3e-58
+sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   223  5e-58
+sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR        221  1e-57
+sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)                         221  2e-57
+sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)                         220  4e-57
+sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR                            218  1e-56
+sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR                  218  2e-56
+sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE...   218  2e-56
+sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   217  3e-56
+sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   215  8e-56
+sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR                              214  2e-55
+sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)                         213  3e-55
+sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR                  212  7e-55
+sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)             210  5e-54
+sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR                  209  6e-54
+sp|Q10991|CATL_SHEEP CATHEPSIN L                                      209  6e-54
+sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   208  2e-53
+sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)                 206  4e-53
+sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR                  203  6e-52
+sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR                     203  6e-52
+sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR                  201  2e-51
+sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPS...   200  3e-51
+sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)                         199  9e-51
+sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR              198  2e-50
+sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)               198  2e-50
+sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR                    198  2e-50
+sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II...   194  2e-49
+sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEP...   194  2e-49
+sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR                    194  3e-49
+sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR              194  3e-49
+sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR                            192  1e-48
+sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR                              191  2e-48
+sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR                191  2e-48
+sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)             191  2e-48
+sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA...   190  3e-48
+sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHE...   189  6e-48
+sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR                            188  1e-47
+sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)               185  9e-47
+sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR             184  2e-46
+sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR             184  3e-46
+sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPA...   183  3e-46
+sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR             183  3e-46
+sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR                            182  8e-46
+sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)               182  1e-45
+sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR                            177  3e-44
+sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)     176  6e-44
+sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L                         175  1e-43
+sp|P25326|CATS_BOVIN CATHEPSIN S                                      173  5e-43
+sp|P80884|ANAN_ANACO ANANAIN                                          167  3e-41
+sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR                              167  4e-41
+sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE...   164  3e-40
+sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR               162  7e-40
+sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR                       159  8e-39
+sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR                  156  5e-38
+sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE P...   153  4e-37
+sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR                  153  5e-37
+sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR                    153  6e-37
+sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR               150  5e-36
+sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR                    146  5e-35
+sp|P14518|BROM_ANACO BROMELAIN, STEM                                  146  6e-35
+sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR                    145  1e-34
+sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR...   144  3e-34
+sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR ...   144  3e-34
+sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR                    132  1e-30
+sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR...   123  7e-28
+sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPP...   117  3e-26
+sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   116  8e-26
+sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   113  4e-25
+sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR                            113  7e-25
+sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN...   113  7e-25
+sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)          105  1e-22
+sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)    96  1e-19
+sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PREC...    94  3e-19
+sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)               90  5e-18
+sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR        90  5e-18
+sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PREC...    89  1e-17
+sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)        88  3e-17
+sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR                             87  5e-17
+sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP S...    86  1e-16
+sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...    85  3e-16
+sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...    85  3e-16
+sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)              84  5e-16
+sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PREC...    83  6e-16
+sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)              83  6e-16
+sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...    76  1e-13
+sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PREC...    75  2e-13
+sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...    75  3e-13
+sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PREC...    73  7e-13
+sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P1...    71  4e-12
+sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)                   63  8e-10
+sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)                     62  2e-09
+sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3            59  1e-08
+sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)                       59  2e-08
+sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)                     58  3e-08
+sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L                          52  2e-06
+sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR                    42  0.002
+sp|P21381|THPA_THADA THAUMATOPAIN                                      41  0.005
+sp|P05689|CATX_BOVIN CATHEPSIN                                         40  0.006
+sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR                     39  0.018
+sp|P20736|BM86_BOOMI GLYCOPROTEIN ANTIGEN BM86 PRECURSOR (PROTEC...    35  0.21
+sp|Q11121|PLB1_TORDE LYSOPHOSPHOLIPASE PRECURSOR (PHOSPHOLIPASE B)     35  0.27
+sp|P46992|YJR1_YEAST HYPOTHETICAL 43.0 KD PROTEIN IN CPS1-FPP1 I...    34  0.61
+sp|P28493|PR5_ARATH PATHOGENESIS-RELATED PROTEIN 5 PRECURSOR (PR-5)    32  1.8
+sp|P41901|SPR3_YEAST SPORULATION-SPECIFIC SEPTIN                       32  2.4
+sp|P54634|POLN_LORDV NON-STRUCTURAL POLYPROTEIN [CONTAINS: RNA-D...    31  3.1
+sp|P21173|DNAA_MICLU CHROMOSOMAL REPLICATION INITIATOR PROTEIN DNAA    31  3.1
+sp|P89263|Y022_GVXN HYPOTHETICAL ORF22 HOMOLOG                         31  5.3
+sp|P24896|NU5M_CAEEL NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 5            31  5.3
+sp|P25648|SRB8_YEAST SUPPRESSOR OF RNA POLYMERASE B SRB8               30  7.0
+sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C                                  30  7.0
+sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)         30  9.1
+sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (...    30  9.1
+sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)           30  9.1
+
+>sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 343
+
+ Score =  721 bits (1841), Expect = 0.0
+ Identities = 343/343 (100%), Positives = 343/343 (100%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE
+Sbjct: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+           ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT
+Sbjct: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY
+Sbjct: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+           EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM
+Sbjct: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+
+Query: 241 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+           IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF
+Sbjct: 241 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+
+Query: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+Sbjct: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+
+
+>sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR
+          Length = 313
+
+ Score =  281 bits (712), Expect = 1e-75
+ Identities = 147/316 (46%), Positives = 193/316 (60%), Gaps = 18/316 (5%)
+
+Query: 32  FQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLSSDE 87
+           F+ KF K Y S EE+  RF +FK+NL       L A+ H+      + GV +F+DL+  E
+Sbjct: 3   FKKKFGKVYGSIEEHYYRFSVFKANL-------LRAMRHQKMDPSARHGVTQFSDLTRSE 55
+
+Query: 88  FKNYYLNNKEAI-FTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           F+  +L  K       D   A  L  +   ++P  FDWR RGAVTPVKNQG CGSCWSFS
+Sbjct: 56  FRRKHLGVKGGFKLPKDANQAPILPTQ---NLPEEFDWRDRGAVTPVKNQGSCGSCWSFS 112
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           TTG +EG HF++  KLVSLSEQ LVDCDHEC + E E +CD GCNGGL  +A+ Y +K G
+Sbjct: 113 TTGALEGAHFLATGKLVSLSEQQLVDCDHEC-DPEEEGSCDSGCNGGLMNSAFEYTLKTG 171
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVE 266
+           G+  E  YPYT   G  C  + + I A +SNF+++  NE  +A  ++  GPLA+A +A  
+Sbjct: 172 GLMREKDYPYTGTDGGSCKLDRSKIVASVSNFSVVSINEDQIAANLIKNGPLAVAINAAY 231
+
+Query: 267 WQFYIGGVFDIPCNPNSLDHGILIVGYSAK--NTIFRKNMPYWIVKNSWGADWGEQGYIY 324
+            Q YIGGV         L+HG+L+VGY +   +    K  PYWI+KNSWG  WGE G+  
+Sbjct: 232 MQTYIGGVSCPYICSRRLNHGVLLVGYGSAGFSQARLKEKPYWIIKNSWGESWGENGFYK 291
+
+Query: 325 LRRGKNTCGVSNFVST 340
+           + +G+N CGV + VST
+Sbjct: 292 ICKGRNICGVDSLVST 307
+
+
+>sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RESPONSIVE PROTEIN 15A)
+          Length = 363
+
+ Score =  278 bits (703), Expect = 1e-74
+ Identities = 144/320 (45%), Positives = 201/320 (62%), Gaps = 14/320 (4%)
+
+Query: 26  QSQFLEFQDKFNKKYS-HEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           +  F  F+ KF+K Y+  EE+  RF +FKSNL K +    +  N     + G+ KF+DL+
+Sbjct: 45  EHHFTSFKSKFSKSYATKEEHDYRFGVFKSNLIKAK----LHQNRDPTAEHGITKFSDLT 100
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPV-ADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCW 143
+           + EF+  +L  K+ +    LP  A         ++P  FDWR +GAVTPVK+QG CGSCW
+Sbjct: 101 ASEFRRQFLGLKKRL---RLPAHAQKAPILPTTNLPEDFDWREKGAVTPVKDQGSCGSCW 157
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +EG H+++  KLVSLSEQ LVDCDH C + E   +CD GCNGGL  NA+ Y++
+Sbjct: 158 AFSTTGALEGAHYLATGKLVSLSEQQLVDCDHVC-DPEQAGSCDSGCNGGLMNNAFEYLL 216
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAAD 263
+           ++GG+  E  Y YT   G+ C F+ + + A +SNF+++  +E  +A  +V  GPLA+A +
+Sbjct: 217 ESGGVVQEKDYAYTGRDGS-CKFDKSKVVASVSNFSVVTLDEDQIAANLVKNGPLAVAIN 275
+
+Query: 264 AVEWQFYIGGV-FDIPCNPNSLDHGILIVGY--SAKNTIFRKNMPYWIVKNSWGADWGEQ 320
+           A   Q Y+ GV     C  + LDHG+L+VG+   A   I  K  PYWI+KNSWG +WGEQ
+Sbjct: 276 AAWMQTYMSGVSCPYVCAKSRLDHGVLLVGFGKGAYAPIRLKEKPYWIIKNSWGQNWGEQ 335
+
+Query: 321 GYIYLRRGKNTCGVSNFVST 340
+           GY  + RG+N CGV + VST
+Sbjct: 336 GYYKICRGRNVCGVDSMVST 355
+
+
+>sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR
+          Length = 368
+
+ Score =  275 bits (695), Expect = 1e-73
+ Identities = 155/359 (43%), Positives = 205/359 (56%), Gaps = 34/359 (9%)
+
+Query: 6   LFVLAVFTVFVSSR---------------GIPPE---EQSQFLEFQDKFNKKY-SHEEYL 46
+           +FVL+ F V VSS                G  P+    +  F  F+ KF K Y S+EE+ 
+Sbjct: 10  VFVLSFFIVSVSSSDVNDGDDLVIRQVVGGAEPQVLTSEDHFSLFKRKFGKVYASNEEHD 69
+
+Query: 47  ERFEIFKSNLGKIEELNLIAINHKADTK--FGVNKFADLSSDEFKNYYLNNKEAI-FTDD 103
+            RF +FK+NL +         + K D     GV +F+DL+  EF+  +L  +       D
+Sbjct: 70  YRFSVFKANLRRARR------HQKLDPSATHGVTQFSDLTRSEFRKKHLGVRSGFKLPKD 123
+
+Query: 104 LPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+              A  L  E   ++P  FDWR  GAVTPVKNQG CGSCWSFS TG +EG +F++  KLV
+Sbjct: 124 ANKAPILPTE---NLPEDFDWRDHGAVTPVKNQGSCGSCWSFSATGALEGANFLATGKLV 180
+
+Query: 164 SLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ 223
+           SLSEQ LVDCDHEC + E  ++CD GCNGGL  +A+ Y +K GG+  E  YPYT + G  
+Sbjct: 181 SLSEQQLVDCDHEC-DPEEADSCDSGCNGGLMNSAFEYTLKTGGLMKEEDYPYTGKDGKT 239
+
+Query: 224 CNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNS 283
+           C  + + I A +SNF++I  +E  +A  +V  GPLA+A +A   Q YIGGV         
+Sbjct: 240 CKLDKSKIVASVSNFSVISIDEEQIAANLVKNGPLAVAINAGYMQTYIGGVSCPYICTRR 299
+
+Query: 284 LDHGILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVST 340
+           L+HG+L+VGY A        K  PYWI+KNSWG  WGE G+  + +G+N CGV + VST
+Sbjct: 300 LNHGVLLVGYGAAGYAPARFKEKPYWIIKNSWGETWGENGFYKICKGRNICGVDSMVST 358
+
+
+>sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 371
+
+ Score =  262 bits (663), Expect = 7e-70
+ Identities = 138/324 (42%), Positives = 189/324 (57%), Gaps = 15/324 (4%)
+
+Query: 26  QSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           +S FL F  +F K Y   +E+  R  +FK NL +     L+        + GV KF+DL+
+Sbjct: 45  ESHFLSFVQRFGKSYKDADEHAYRLSVFKDNLRRARRHQLL----DPSAEHGVTKFSDLT 100
+
+Query: 85  SDEFKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGS 141
+             EF+  YL    ++ A+  +    A        + +P  FDWR  GAV PVKNQG CGS
+Sbjct: 101 PAEFRRTYLGLRKSRRALLRELGESAHEAPVLPTDGLPDDFDWRDHGAVGPVKNQGSCGS 160
+
+Query: 142 CWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY 201
+           CWSFS +G +EG H+++  KL  LSEQ  VDCDHEC   E  ++CD GCNGGL   A++Y
+Sbjct: 161 CWSFSASGALEGAHYLATGKLEVLSEQQFVDCDHECDSSE-PDSCDSGCNGGLMTTAFSY 219
+
+Query: 202 IIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIA 261
+           + K GG+++E  YPYT   G +C F+ + I A + NF+++  +E  ++  ++  GPLAI 
+Sbjct: 220 LQKAGGLESEKDYPYTGSDG-KCKFDKSKIVASVQNFSVVSVDEAQISANLIKHGPLAIG 278
+
+Query: 262 ADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGE 319
+            +A   Q YIGGV         LDHG+L+VGY A     I  K+ PYWI+KNSWG +WGE
+Sbjct: 279 INAAYMQTYIGGVSCPYICGRHLDHGVLLVGYGASGFAPIRLKDKPYWIIKNSWGENWGE 338
+
+Query: 320 QGYIYLRRG---KNTCGVSNFVST 340
+            GY  + RG   +N CGV + VST
+Sbjct: 339 NGYYKICRGSNVRNKCGVDSMVST 362
+
+
+>sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK CATHEPSIN)
+          Length = 376
+
+ Score =  262 bits (663), Expect = 7e-70
+ Identities = 147/383 (38%), Positives = 213/383 (55%), Gaps = 55/383 (14%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIP-------PEEQSQFLEFQDKFNKKYSHEEYLERFEIFK 53
+           M++++  +L +F  F  +   P        + ++ F E+  KFN++YS  E+  R+ IFK
+Sbjct: 1   MRLLVFLILLIFVNFSFANVRPNGRRFSESQYRTAFTEWTLKFNRQYSSSEFSNRYSIFK 60
+
+Query: 54  SNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK-EAIFTDDLPVADYLDD 112
+           SN+  ++  N       + T  G+N FAD++++E++  YL  +  A   +     + L+ 
+Sbjct: 61  SNMDYVDNWNS---KGDSQTVLGLNNFADITNEEYRKTYLGTRVNAHSYNGYDGREVLNV 117
+
+Query: 113 EFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 172
+           E + + P + DWRT+ AVTP+K+QGQCGSCWSFSTTG+ EG H +   KLVSLSEQNLVD
+Sbjct: 118 EDLQTNPKSIDWRTKNAVTPIKDQGQCGSCWSFSTTGSTEGAHALKTKKLVSLSEQNLVD 177
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 232
+           C        G E  + GC+GGL  NA++YIIKN GI TESSYPYTAETG+ C FN ++IG
+Sbjct: 178 C-------SGPEE-NFGCDGGLMNNAFDYIIKNKGIDTESSYPYTAETGSTCLFNKSDIG 229
+
+Query: 233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGIL 289
+           A I  +  I     +        GP+++A DA    +Q Y  G++  P C+P  LDHG+L
+Sbjct: 230 ATIKGYVNITAGSEISLENGAQHGPVSVAIDASHNSFQLYTSGIYYEPKCSPTELDHGVL 289
+
+Query: 290 IVGY--------------------------------SAKNTIFRKNMPYWIVKNSWGADW 317
+           +VGY                                 + +++  K   YWIVKNSWG  W
+Sbjct: 290 VVGYGVQGKDDEGPVLNRKQTIVIHKNEDNKVESSDDSSDSVRPKANNYWIVKNSWGTSW 349
+
+Query: 318 GEQGYIYLRRG-KNTCGVSNFVS 339
+           G +GYI + +  KN CG+++  S
+Sbjct: 350 GIKGYILMSKDRKNNCGIASVSS 372
+
+
+>sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score =  249 bits (629), Expect = 7e-66
+ Identities = 139/362 (38%), Positives = 201/362 (55%), Gaps = 37/362 (10%)
+
+Query: 1   MKVI-LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKI 59
+           MKV+  L VL V       +    + ++ F ++     K Y+ EE+  R+ IF +N+  +
+Sbjct: 1   MKVLSFLCVLLVSVATAKQQFSELQYRNAFTDWMITHQKSYTSEEFGARYNIFTANMDYV 60
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           ++ N    +  ++T  G+N FAD++++E++N YL  K   F     +    +    NS  
+Sbjct: 61  QQWN----SKGSETVLGLNNFADITNEEYRNTYLGTK---FDASSLIGTQEEKVHTNSSA 113
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+            + DWR+ GAVTPVKNQGQCG CWSFSTTG+ EG HF S+ +LVSLSEQNL+DC  E   
+Sbjct: 114 ASKDWRSEGAVTPVKNQGQCGGCWSFSTTGSTEGAHFQSKGELVSLSEQNLIDCSTE--- 170
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                  + GC+GGL   A+ YII N GI TESSYPY AE G +C + S N GA +S++ 
+Sbjct: 171 -------NSGCDGGLMTYAFEYIINNNGIDTESSYPYKAENG-KCEYKSENSGATLSSYK 222
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGY--- 293
+            +           V+  P+++A DA    +Q Y  G++  P C+  +LDHG+L VGY   
+Sbjct: 223 TVTAGSESSLESAVNVNPVSVAIDASHQSFQLYTSGIYYEPECSSENLDHGVLAVGYGSG 282
+
+Query: 294 -----------SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+                      S+ N     +  YWIVKNSWG  WG +GYI + R + N CG+++  S  
+Sbjct: 283 SGSSSGQSSGQSSGNLSASSSNEYWIVKNSWGTSWGIEGYILMSRNRDNNCGIASSASFP 342
+
+Query: 342 II 343
+           ++
+Sbjct: 343 VV 344
+
+
+>sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)
+          Length = 319
+
+ Score =  244 bits (617), Expect = 2e-64
+ Identities = 128/326 (39%), Positives = 190/326 (58%), Gaps = 22/326 (6%)
+
+Query: 21  IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+           +P     ++++F+ K+ K+Y   E   RF IFKSN+ K +   L  +  +    +GV  +
+Sbjct: 12  LPGNVDEKYVQFKLKYRKQYHETEDEIRFNIFKSNILKAQ---LYQVFVRGSAIYGVTPY 68
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG 140
+           +DL++DEF   +L     + +        L  E +N+IP  FDWR +GAVT VKNQG CG
+Sbjct: 69  SDLTTDEFARTHLTASWVVPSSRSNTPTSLGKE-VNNIPKNFDWREKGAVTEVKNQGMCG 127
+
+Query: 141 SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 200
+           SCW+FSTTGNVE Q F    KL+SLSEQ LVDCD            D+GCNGGL  NAY 
+Sbjct: 128 SCWAFSTTGNVESQWFRKTGKLLSLSEQQLVDCD----------GLDDGCNGGLPSNAYE 177
+
+Query: 201 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAI 260
+            IIK GG+  E +YPY A+   +C+  +  +   I++   + ++ET +A ++     +++
+Sbjct: 178 SIIKMGGLMLEDNYPYDAK-NEKCHLKTDGVAVYINSSVNLTQDETELAAWLYHNSTISV 236
+
+Query: 261 AADAVEWQFYIGGV---FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+             +A+  QFY  G+   + I C+   LDH +L+VGY     +  KN P+WIVKNSWG +W
+Sbjct: 237 GMNALLLQFYQHGISHPWWIFCSKYLLDHAVLLVGYG----VSEKNEPFWIVKNSWGVEW 292
+
+Query: 318 GEQGYIYLRRGKNTCGVSNFVSTSII 343
+           GE GY  + RG  +CG++   ++++I
+Sbjct: 293 GENGYFRMYRGDGSCGINTVATSAMI 318
+
+
+>sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR
+          Length = 450
+
+ Score =  243 bits (614), Expect = 4e-64
+ Identities = 136/346 (39%), Positives = 193/346 (55%), Gaps = 26/346 (7%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEE 61
+           V+L     + +V + S  +    + +F  F+ K+ K Y   +E   RF  F+ N+   E+
+Sbjct: 15  VLLAMAACLASVALGSLHVEESLEMRFAAFKKKYGKVYKDAKEEAFRFRAFEENM---EQ 71
+
+Query: 62  LNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTA 121
+             + A  +   T FGV  F+D++ +EF+  Y N           +   ++       P A
+Sbjct: 72  AKIQAAANPYAT-FGVTPFSDMTREEFRARYRNGASYFAAAQKRLRKTVNVT-TGRAPAA 129
+
+Query: 122 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYE 181
+            DWR +GAVTPVK QGQCGSCW+FST GN+EGQ  ++ N LVSLSEQ LV CD       
+Sbjct: 130 VDWREKGAVTPVKVQGQCGSCWAFSTIGNIEGQWQVAGNPLVSLSEQMLVSCD------- 182
+
+Query: 182 GEEACDEGCNGGLQPNAYNYIIKN--GGIQTESSYPYTAETG--TQCNFNSANIGAKISN 237
+                D GCNGGL  NA+N+I+ +  G + TE+SYPY +  G   QC  N   IGA I++
+Sbjct: 183 ---TIDSGCNGGLMDNAFNWIVNSNGGNVFTEASYPYVSGNGEQPQCQMNGHEIGAAITD 239
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN 297
+              +P++E  +A Y+   GPLAIA DA  +  Y GG+    C    LDHG+L+VGY+  +
+Sbjct: 240 HVDLPQDEDAIAAYLAENGPLAIAVDAESFMDYNGGIL-TSCTSKQLDHGVLLVGYNDNS 298
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+                N PYWI+KNSW   WGE GYI + +G N C ++  VS++++
+Sbjct: 299 -----NPPYWIIKNSWSNMWGEDGYIRIEKGTNQCLMNQAVSSAVV 339
+
+
+>sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE
+           A-1)
+          Length = 354
+
+ Score =  236 bits (596), Expect = 5e-62
+ Identities = 146/350 (41%), Positives = 196/350 (55%), Gaps = 38/350 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+           LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct: 7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query: 53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN-NKEAIFTDDLPVADYLD 111
+           K N+     LN    +   D      KFADL+  EF   YLN +  A    D     ++D
+Sbjct: 67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYARHLKDHKEDVHVD 123
+
+Query: 112 DEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLV 171
+           D   + + +  DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + LVSLSEQ LV
+Sbjct: 124 DSAPSGVMSV-DWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSLVSLSEQMLV 182
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQ--CNFN 227
+            CD+           DEGCNGGL   A N+I++  NG + TE+SYPYT+  GT+  C+ +
+Sbjct: 183 SCDN----------IDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGGGTRPPCH-D 231
+
+Query: 228 SANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHG 287
+              +GAKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV  + C   SL+HG
+Sbjct: 232 EGEVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVSL-CLAWSLNHG 290
+
+Query: 288 ILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 337
+           +LIVG++ KN       PYWIVKNSWG+ WGE+GYI L  G N C + N+
+Sbjct: 291 VLIVGFN-KNA----KPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNY 335
+
+
+>sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR
+          Length = 354
+
+ Score =  235 bits (594), Expect = 9e-62
+ Identities = 146/359 (40%), Positives = 195/359 (53%), Gaps = 56/359 (15%)
+
+Query: 5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+           LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct: 7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query: 53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN----------NKEAIFTD 102
+           K N+     LN    +   D      KFADL+  EF   YLN          +KE +  D
+Sbjct: 67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYARHLKNHKEDVHVD 123
+
+Query: 103 DLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKL 162
+           D   +  +          + DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + L
+Sbjct: 124 DSAPSGVM----------SVDWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSL 173
+
+Query: 163 VSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAET 220
+           VSLSEQ LV CD+           DEGCNGGL   A N+I++  NG + TE+SYPYT+  
+Sbjct: 174 VSLSEQMLVSCDN----------IDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGG 223
+
+Query: 221 GTQ--CNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIP 278
+           GT+  C+ +   +GAKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV  + 
+Sbjct: 224 GTRPPCH-DEGEVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVSL- 281
+
+Query: 279 CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 337
+           C   SL+HG+LIVG++ KN       PYWIVKNSWG+ WGE+GYI L  G N C + N+
+Sbjct: 282 CLAWSLNHGVLIVGFN-KNA----KPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNY 335
+
+
+>sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEINASE) (CRUZAINE)
+          Length = 467
+
+ Score =  232 bits (585), Expect = 1e-60
+ Identities = 137/350 (39%), Positives = 191/350 (54%), Gaps = 30/350 (8%)
+
+Query: 3   VILLFVLAVFTVFV--SSRGIPPEEQ--SQFLEFQDKFNKKY-SHEEYLERFEIFKSNLG 57
+           ++L  VL V    V  ++  +  EE   SQF EF+ K  + Y S  E   R  +F+ NL 
+Sbjct: 8   LLLAAVLVVMACLVPAATASLHAEETLTSQFAEFKQKHGRVYESAAEEAFRLSVFRENLF 67
+
+Query: 58  KIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+            +  L+  A  H     FGV  F+DL+ +EF++ Y +N  A F      A       +  
+Sbjct: 68  -LARLHAAANPHAT---FGVTPFSDLTREEFRSRY-HNGAAHFAAAQERARVPVKVEVVG 122
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+            P A DWR RGAVT VK+QGQCGSCW+FS  GNVE Q F++ + L +LSEQ LV CD   
+Sbjct: 123 APAAVDWRARGAVTAVKDQGQCGSCWAFSAIGNVECQWFLAGHPLTNLSEQMLVSCD--- 179
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQ--CNFNSANIGA 233
+                    D GC+GGL  NA+ +I++  NG + TE SYPY +  G    C  +   +GA
+Sbjct: 180 -------KTDSGCSGGLMNNAFEWIVQENNGAVYTEDSYPYASGEGISPPCTTSGHTVGA 232
+
+Query: 234 KISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+            I+    +P++E  +A ++   GP+A+A DA  W  Y GGV    C    LDHG+L+VGY
+Sbjct: 233 TITGHVELPQDEAQIAAWLAVNGPVAVAVDASSWMTYTGGVM-TSCVSEQLDHGVLLVGY 291
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           +    +     PYWI+KNSW   WGE+GYI + +G N C V    S++++
+Sbjct: 292 NDSAAV-----PYWIIKNSWTTQWGEEGYIRIAKGSNQCLVKEEASSAVV 336
+
+
+>sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 322
+
+ Score =  225 bits (567), Expect = 1e-58
+ Identities = 130/341 (38%), Positives = 182/341 (53%), Gaps = 33/341 (9%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKI 59
+           MKV+ LF+  +     +           + EF+ KF +KY   EE   R  +F  NL  I
+Sbjct: 1   MKVVALFLFGLALAAANP---------SWEEFKGKFGRKYVDLEEERYRLNVFLDNLQYI 51
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           EE N      +      +N+F+D+++++F       K+       P A +   +      
+Sbjct: 52  EEFNKKYERGEVTYNLAINQFSDMTNEKFNAVMKGYKKG----PRPAAVFTSTDAAPE-S 106
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+           T  DWRT+GAVTPVK+QGQCGSCW+FSTTG +EGQHF+   +LVSLSEQ LVDC      
+Sbjct: 107 TEVDWRTKGAVTPVKDQGQCGSCWAFSTTGGIEGQHFLKTGRLVSLSEQQLVDC------ 160
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+             G    ++GCNGG    A  Y+  NGG+ TESSYPY A   T C FNS  IGA  + + 
+Sbjct: 161 -AGGSYYNQGCNGGWVERAIMYVRDNGGVDTESSYPYEARDNT-CRFNSNTIGATCTGYV 218
+
+Query: 240 MIPK-NETVMAGYIVSTGPLAIAADAVEWQF---YIGGVFDIPCNPNSLDHGILIVGYSA 295
+            I + +E+ +       GP+++A DA    F   Y G  ++  C+ + LDH +L VGY +
+Sbjct: 219 GIAQGSESALKTATRDIGPISVAIDASHRSFQSYYTGVYYEPSCSSSQLDHAVLAVGYGS 278
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVS 335
+           +         +W+VKNSW   WGE GYI + R + N CG++
+Sbjct: 279 EG-----GQDFWLVKNSWATSWGESGYIKMARNRNNNCGIA 314
+
+
+>sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 323
+
+ Score =  224 bits (566), Expect = 2e-58
+ Identities = 132/349 (37%), Positives = 188/349 (53%), Gaps = 32/349 (9%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKI 59
+           MKV +LF+  V     S           +  F+ K+ ++Y   EE   R  IF+ N   I
+Sbjct: 1   MKVAVLFLCGVALAAASP---------SWEHFKGKYGRQYVDAEEDSYRRVIFEQNQKYI 51
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           EE N    N +      +NKF D++ +EF      N   I     PV+ +   +      
+Sbjct: 52  EEFNKKYENGEVTFNLAMNKFGDMTLEEFNAVMKGN---IPRRSAPVSVFYPKKETGPQA 108
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+           T  DWRT+GAVTPVK+QGQCGSCW+FSTTG++EGQHF+    L+SL+EQ LVDC      
+Sbjct: 109 TEVDWRTKGAVTPVKDQGQCGSCWAFSTTGSLEGQHFLKTGSLISLAEQQLVDC------ 162
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                   +GCNGG   +A++YI  N GI TE++YPY A  G+ C F+S ++ A  S  T
+Sbjct: 163 --SRPYGPQGCNGGWMNDAFDYIKANNGIDTEAAYPYEARDGS-CRFDSNSVAATCSGHT 219
+
+Query: 240 MIPK-NETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSA 295
+            I   +ET +   +   GP+++  DA    +QFY  GV+  P C+P+ LDH +L VGY +
+Sbjct: 220 NIASGSETGLQQAVRDIGPISVTIDAAHSSFQFYSSGVYYEPSCSPSYLDHAVLAVGYGS 279
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+           +         +W+VKNSW   WG+ GYI + R + N CG++   S  ++
+Sbjct: 280 EG-----GQDFWLVKNSWATSWGDAGYIKMSRNRNNNCGIATVASYPLV 323
+
+
+>sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP) (CYCLIC
+           PROTEIN-2) (CP-2)
+          Length = 334
+
+ Score =  224 bits (564), Expect = 3e-58
+ Identities = 127/351 (36%), Positives = 195/351 (55%), Gaps = 31/351 (8%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+           ++LL VL + T   + +       +Q+ +++    + Y   E   R  +++ N+  I+  
+Sbjct: 4   LLLLAVLCLGTALATPK-FDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLH 62
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKN------YYLNNKEAIFTDDLPVADYLDDEFIN 116
+           N    N K      +N F D++++EF+       +  + K  +F + L +          
+Sbjct: 63  NGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQEPLML---------- 112
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+            IP   DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVDC H+
+Sbjct: 113 QIPKTVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHD 172
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+               +G    ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G+ C + +    A  +
+Sbjct: 173 ----QG----NQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS-CKYRAEYAVANDT 223
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGY 293
+            F  IP+ E  +   + + GP+++A DA     QFY  G++  P C+   LDHG+L+VGY
+Sbjct: 224 GFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVGY 283
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVSTSII 343
+             + T   K+  YW+VKNSWG +WG  GYI + + +N  CG++   S  I+
+Sbjct: 284 GYEGTDSNKD-KYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIV 333
+
+
+>sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 334
+
+ Score =  223 bits (562), Expect = 5e-58
+ Identities = 126/351 (35%), Positives = 198/351 (55%), Gaps = 31/351 (8%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+           ++LL VL + T   + +       +++ +++    + Y   E   R  I++ N+  I+  
+Sbjct: 4   LLLLAVLCLGTALATPK-FDQTFSAEWHQWKSTHRRLYGTNEEEWRRAIWEKNMRMIQLH 62
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKN------YYLNNKEAIFTDDLPVADYLDDEFIN 116
+           N    N +      +N F D++++EF+       +  + K  +F + L +          
+Sbjct: 63  NGEYSNGQHGFSMEMNAFGDMTNEEFRQVVNGYRHQKHKKGRLFQEPLML---------- 112
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+            IP + DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVDC H 
+Sbjct: 113 KIPKSVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHA 172
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+               +G    ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G+ C + +    A  +
+Sbjct: 173 ----QG----NQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS-CKYRAEFAVANDT 223
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGY 293
+            F  IP+ E  +   + + GP+++A DA     QFY  G++  P C+  +LDHG+L+VGY
+Sbjct: 224 GFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKNLDHGVLLVGY 283
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+             + T   KN  YW+VKNSWG++WG +GYI + + + N CG++   S  ++
+Sbjct: 284 GYEGTDSNKN-KYWLVKNSWGSEWGMEGYIKIAKDRDNHCGLATAASYPVV 333
+
+
+>sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 321
+
+ Score =  221 bits (558), Expect = 1e-57
+ Identities = 123/317 (38%), Positives = 182/317 (56%), Gaps = 37/317 (11%)
+
+Query: 32  FQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEF-- 88
+           F+ ++ +KY   +E L R  +F+ N   IE+ N    N +   K  +N+F D++++EF  
+Sbjct: 23  FKTQYGRKYGDAKEELYRQRVFQQNEQLIEDFNKKFENGEVTFKVAMNQFGDMTNEEFNA 82
+
+Query: 89  --KNYYLNNK---EAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCW 143
+             K Y   ++   +A+FT +              +    DWRT+  VTPVK+Q QCGSCW
+Sbjct: 83  VMKGYKKGSRGEPKAVFTAEA-----------GPMAADVDWRTKALVTPVKDQEQCGSCW 131
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FS TG +EGQHF+  ++LVSLSEQ LVDC          +  ++GC GG   +A++YI 
+Sbjct: 132 AFSATGALEGQHFLKNDELVSLSEQQLVDC--------STDYGNDGCGGGWMTSAFDYIK 183
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAAD 263
+            NGGI TESSYPY AE    C F++ +IGA  +    +   E  +   +   GP+++A D
+Sbjct: 184 DNGGIDTESSYPYEAE-DRSCRFDANSIGAICTGSVEVQHTEEALQEAVSGVGPISVAID 242
+
+Query: 264 A--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQ 320
+           A    +QFY  GV ++  C+P  LDHG+L VGY  ++T       YW+VKNSWG+ WG+ 
+Sbjct: 243 ASHFSFQFYSSGVYYEQNCSPTFLDHGVLAVGYGTEST-----KDYWLVKNSWGSSWGDA 297
+
+Query: 321 GYIYLRRGK-NTCGVSN 336
+           GYI + R + N CG+++
+Sbjct: 298 GYIKMSRNRDNNCGIAS 314
+
+
+>sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  221 bits (557), Expect = 2e-57
+ Identities = 131/342 (38%), Positives = 181/342 (52%), Gaps = 26/342 (7%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+           +LF L V+ V  S+   P +  + F EF  +FNK YS E E L RF+IF+ NL +I    
+Sbjct: 4   ILFYLFVYAVVKSAAYDPLKAPNYFEEFVHRFNKNYSSEVEKLRRFKIFQHNLNEI---- 59
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+            I  N     K+ +NKF+DLS DE    Y        T +      LD       P  FD
+Sbjct: 60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPTQTQNFCKVILLDQP-PGKGPLEFD 117
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR    VT VKNQG CG+CW+F+T G++E Q  I  N+L++LSEQ ++DCD         
+Sbjct: 118 WRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNELINLSEQQMIDCDF-------- 169
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN-FTMIP 242
+              D GCNGGL   A+  IIK GG+Q ES YPY A+    C  NS     ++ + +  I 
+Sbjct: 170 --VDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEAD-NNNCRMNSNKFLVQVKDCYRYII 226
+
+Query: 243 KNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+             E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  +N     
+Sbjct: 227 VYEEKLKDLLPLVGPIPMAIDAADIVNYKQGIIKY-CFDSGLNHAVLLVGYGVEN----- 280
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 343
+           N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct: 281 NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  220 bits (554), Expect = 4e-57
+ Identities = 131/344 (38%), Positives = 188/344 (54%), Gaps = 27/344 (7%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           M  I+L++L    V  ++  +  +  + F +F  KFNK YS E E L RF+IF+ NL +I
+Sbjct: 1   MNKIVLYLLVYGAVQCAAYDVL-KAPNYFEDFLHKFNKSYSSESEKLRRFQIFRHNLEEI 59
+
+Query: 60  EELNLIAINHKADT-KFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSI 118
+                I  NH   T ++ +NKFADLS DE  + Y      + T +      LD    +  
+Sbjct: 60  -----INKNHNDSTAQYEINKFADLSKDETISKYTGLSLPLQTQNFCEVVVLDRP-PDKG 113
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 178
+           P  FDWR    VT VKNQG CG+CW+F+T G++E Q  I  N+ ++LSEQ L+DCD    
+Sbjct: 114 PLEFDWRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNQFINLSEQQLIDCDF--- 170
+
+Query: 179 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN- 237
+                   D GC+GGL   A+  ++  GGIQ ES YPY A  G  C  N+A    K+   
+Sbjct: 171 -------VDAGCDGGLLHTAFEAVMNMGGIQAESDYPYEANNG-DCRANAAKFVVKVKKC 222
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN 297
+           +  I   E  +   + S GP+ +A DA +   Y  G+    C  + L+H +L+VGY+ +N
+Sbjct: 223 YRYITVFEEKLKDLLRSVGPIPVAIDASDIVNYKRGIMKY-CANHGLNHAVLLVGYAVEN 281
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+                 +P+WI+KN+WGADWGEQGY  +++  N CG+ N + +S
+Sbjct: 282 -----GVPFWILKNTWGADWGEQGYFRVQQNINACGIQNELPSS 320
+
+
+>sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  218 bits (550), Expect = 1e-56
+ Identities = 124/342 (36%), Positives = 182/342 (52%), Gaps = 25/342 (7%)
+
+Query: 7   FVLAVFTVFVSSRG--IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+           F L V  + V+S    + P   + + +++    + Y   E   R  +++ N   I+  N 
+Sbjct: 5   FFLTVLCLGVASAAPKLDPNLDAHWHQWKATHRRLYGMNEEEWRRAVWEKNKKIIDLHNQ 64
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFK---NYYLNNKEAIFTDDLPVADYLDDEFINSIPTA 121
+                K   +  +N F D++++EF+   N + N K               +  +  +P +
+Sbjct: 65  EYSEGKHAFRMAMNAFGDMTNEEFRQVMNGFQNQKHK-------KGKLFHEPLLVDVPKS 117
+
+Query: 122 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYE 181
+            DW  +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC       +
+Sbjct: 118 VDWTKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCSRA----Q 173
+
+Query: 182 GEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMI 241
+           G    ++GCNGGL  NA+ YI  NGG+ +E SYPY A     CN+      A  + F  I
+Sbjct: 174 G----NQGCNGGLMDNAFQYIKDNGGLDSEESYPYLATDTNSCNYKPECSAANDTGFVDI 229
+
+Query: 242 PKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNT 298
+           P+ E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY  + T
+Sbjct: 230 PQREKALMKAVATVGPISVAIDAGHTSFQFYKSGIYYDPDCSCKDLDHGVLVVGYGFEGT 289
+
+Query: 299 IFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 339
+               N  +WIVKNSWG +WG  GY+ + + +N  CG++   S
+Sbjct: 290 DSNNN-KFWIVKNSWGPEWGWNGYVKMAKDQNNHCGIATAAS 330
+
+
+>sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR
+          Length = 443
+
+ Score =  218 bits (549), Expect = 2e-56
+ Identities = 123/320 (38%), Positives = 177/320 (54%), Gaps = 34/320 (10%)
+
+Query: 29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLS 84
+           F EF+  + + Y    E  +R   F+ NL  + E       H+A     +FG+ KF DLS
+Sbjct: 38  FEEFKRTYGRAYETLAEEQQRLANFERNLELMRE-------HQARNPHAQFGITKFFDLS 90
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEF--INSIPTAFDWRTRGAVTPVKNQGQCGSC 142
+             EF   YLN            A +       ++++P A DWR +GAVTPVK+QG CGSC
+Sbjct: 91  EAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPDAVDWREKGAVTPVKDQGACGSC 150
+
+Query: 143 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 202
+           W+FS  GN+EGQ +++ ++LVSLSEQ LV CD            ++GC+GGL   A++++
+Sbjct: 151 WAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----------MNDGCDGGLMLQAFDWL 200
+
+Query: 203 IK--NGGIQTESSYPYTAETG--TQC-NFNSANIGAKISNFTMIPKNETVMAGYIVSTGP 257
+           ++  NG + TE SYPY +  G   +C N +   +GA+I    +I  +E  MA ++   GP
+Sbjct: 201 LQNTNGHLHTEDSYPYVSGNGYVPECSNSSELVVGAQIDGHVLIGSSEKAMAAWLAKNGP 260
+
+Query: 258 LAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+           +AIA DA  +  Y  GV    C    L+HG+L+VGY     +     PYW++KNSWG DW
+Sbjct: 261 IAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYDMTGEV-----PYWVIKNSWGGDW 314
+
+Query: 318 GEQGYIYLRRGKNTCGVSNF 337
+           GEQGY+ +  G N C +S +
+Sbjct: 315 GEQGYVRVVMGVNACLLSEY 334
+
+
+>sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE
+           A-2)
+          Length = 444
+
+ Score =  218 bits (549), Expect = 2e-56
+ Identities = 123/321 (38%), Positives = 178/321 (55%), Gaps = 35/321 (10%)
+
+Query: 29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKA---DTKFGVNKFADLS 84
+           F EF+  + + Y    E  +R   F+ NL  + E       H+A     +FG+ KF DLS
+Sbjct: 38  FEEFKRTYGRAYETLAEEQQRLANFERNLELMRE-------HQARNPHAQFGITKFFDLS 90
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEF--INSIPTAFDWRTRGAVTPVKNQGQCGSC 142
+             EF   YLN            A +       ++++P A DWR +GAVTPVK+QG CGSC
+Sbjct: 91  EAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPDAVDWREKGAVTPVKDQGACGSC 150
+
+Query: 143 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 202
+           W+FS  GN+EGQ +++ ++LVSLSEQ LV CD            ++GC+GGL   A++++
+Sbjct: 151 WAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----------MNDGCDGGLMLQAFDWL 200
+
+Query: 203 IK--NGGIQTESSYPYTAETG--TQCNFNSAN--IGAKISNFTMIPKNETVMAGYIVSTG 256
+           ++  NG + TE SYPY +  G   +C+ +S    +GA+I    +I  +E  MA ++   G
+Sbjct: 201 LQNTNGHLHTEDSYPYVSGNGYVPECSNSSEELVVGAQIDGHVLIGSSEKAMAAWLAKNG 260
+
+Query: 257 PLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGAD 316
+           P+AIA DA  +  Y  GV    C    L+HG+L+VGY     +     PYW++KNSWG D
+Sbjct: 261 PIAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYDMTGEV-----PYWVIKNSWGGD 314
+
+Query: 317 WGEQGYIYLRRGKNTCGVSNF 337
+           WGEQGY+ +  G N C +S +
+Sbjct: 315 WGEQGYVRVVMGVNACLLSEY 335
+
+
+>sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE)
+           (SULFHYDRYL-ENDOPEPTIDASE) (SH-EP)
+          Length = 362
+
+ Score =  217 bits (547), Expect = 3e-56
+ Identities = 127/306 (41%), Positives = 179/306 (57%), Gaps = 29/306 (9%)
+
+Query: 47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK---EAIFTDD 103
+           +RF +FK+N+  +   N +   +K      +NKFAD+++ EF++ Y  +K     +F   
+Sbjct: 58  KRFNVFKANVMHVHNTNKMDKPYKLK----LNKFADMTNHEFRSTYAGSKVNHHKMFRGS 113
+
+Query: 104 LPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+              +     E + S+P + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  NKLV
+Sbjct: 114 QHGSGTFMYEKVGSVPASVDWRKKGAVTDVKDQGQCGSCWAFSTIVAVEGINQIKTNKLV 173
+
+Query: 164 SLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ 223
+           SLSEQ LVDCD E          ++GCNGGL  +A+ +I + GGI TES+YPYTA+ GT 
+Sbjct: 174 SLSEQELVDCDKE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYTAQEGT- 223
+
+Query: 224 CNFNSAN-IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCN 280
+           C+ +  N +   I     +P N+       V+  P+++A DA   ++QFY  GVF   CN
+Sbjct: 224 CDESKVNDLAVSIDGHENVPVNDENALLKAVANQPVSVAIDAGGSDFQFYSEGVFTGDCN 283
+
+Query: 281 PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSN 336
+              L+HG+ IVGY    T+   N  YWIV+NSWG +WGEQGYI ++R     +  CG++ 
+Sbjct: 284 -TDLNHGVAIVGYG--TTVDGTN--YWIVRNSWGPEWGEQGYIRMQRNISKKEGLCGIAM 338
+
+Query: 337 FVSTSI 342
+             S  I
+Sbjct: 339 MASYPI 344
+
+
+>sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 333
+
+ Score =  215 bits (543), Expect = 8e-56
+ Identities = 124/341 (36%), Positives = 186/341 (54%), Gaps = 26/341 (7%)
+
+Query: 8   VLAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLI 65
+           +LA F + ++S  +  +   ++Q+ +++   N+ Y   E   R  +++ N+  IE  N  
+Sbjct: 6   ILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQE 65
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFK---NYYLNNKEAIFTDDLPVADYLDDEFINSIPTAF 122
+               K      +N F D++S+EF+   N + N K               +      P + 
+Sbjct: 66  YREGKHSFTMAMNAFGDMTSEEFRQVMNGFQNRKPR-------KGKVFQEPLFYEAPRSV 118
+
+Query: 123 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 182
+           DWR +G VTPVKNQGQCGSCW+FS TG +EGQ F    +L+SLSEQNLVDC        G
+Sbjct: 119 DWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDC-------SG 171
+
+Query: 183 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP 242
+            +  +EGCNGGL   A+ Y+  NGG+ +E SYPY A T   C +N     A  + F  IP
+Sbjct: 172 PQG-NEGCNGGLMDYAFQYVQDNGGLDSEESYPYEA-TEESCKYNPKYSVANDTGFVDIP 229
+
+Query: 243 KNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTI 299
+           K E  +   + + GP+++A DA    + FY  G+ F+  C+   +DHG+L+VGY  ++T 
+Sbjct: 230 KQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFEST- 288
+
+Query: 300 FRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVS 339
+              N  YW+VKNSWG +WG  GY+ + +  +N CG+++  S
+Sbjct: 289 ESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAAS 329
+
+
+>sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  214 bits (540), Expect = 2e-55
+ Identities = 117/307 (38%), Positives = 165/307 (53%), Gaps = 23/307 (7%)
+
+Query: 40  YSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFK---NYYLNNK 96
+           Y   E   R  +++ N+  IE  N      K      +N F D++++EF+   N + N K
+Sbjct: 40  YGMNEEGWRRAVWEKNMKMIELHNQEYSQGKHGFSMAMNAFGDMTNEEFRQVMNGFQNQK 99
+
+Query: 97  EAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHF 156
+                          +  +  +P + DWR +G VT VKNQGQCGSCW+FS TG +EGQ F
+Sbjct: 100 HK-------KGKVFHESLVLEVPKSVDWREKGYVTAVKNQGQCGSCWAFSATGALEGQMF 152
+
+Query: 157 ISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPY 216
+               KLVSLSEQNLVDC       +G    ++GCNGGL  NA+ Y+  NGG+ TE SYPY
+Sbjct: 153 RKTGKLVSLSEQNLVDCSRP----QG----NQGCNGGLMDNAFQYVKDNGGLDTEESYPY 204
+
+Query: 217 TAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV 274
+                  C +      A  + F  IP+ E  +   + + GP+++A DA    +QFY  G+
+Sbjct: 205 LGRETNSCTYKPECSAANDTGFVDIPQREKALMKAVATVGPISVAIDAGHSSFQFYKSGI 264
+
+Query: 275 -FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-C 332
+            +D  C+   LDHG+L+VGY  + T    +  +WIVKNSWG +WG  GY+ + + +N  C
+Sbjct: 265 YYDPDCSSKDLDHGVLVVGYGFEGT-DSNSSKFWIVKNSWGPEWGWNGYVKMAKDQNNHC 323
+
+Query: 333 GVSNFVS 339
+           G+S   S
+Sbjct: 324 GISTAAS 330
+
+
+>sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  213 bits (538), Expect = 3e-55
+ Identities = 129/342 (37%), Positives = 179/342 (51%), Gaps = 26/342 (7%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+           +LF L V+ V  S+     +  + F EF  +FNK Y  E E L RF+IF+ NL +I    
+Sbjct: 4   ILFYLFVYGVVNSAAYDLLKAPNYFEEFVHRFNKDYGSEVEKLRRFKIFQHNLNEI---- 59
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+            I  N     K+ +NKF+DLS DE    Y      I T +      LD       P  FD
+Sbjct: 60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPIQTQNFCKVIVLDQP-PGKGPLEFD 117
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR    VT VKNQG CG+CW+F+T  ++E Q  I  N+L++LSEQ ++DCD         
+Sbjct: 118 WRRLNKVTSVKNQGMCGACWAFATLASLESQFAIKHNQLINLSEQQMIDCDF-------- 169
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN-FTMIP 242
+              D GCNGGL   A+  IIK GG+Q ES YPY A+    C  NS     ++ + +  I 
+Sbjct: 170 --VDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEAD-NNNCRMNSNKFLVQVKDCYRYIT 226
+
+Query: 243 KNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+             E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  +N     
+Sbjct: 227 VYEEKLKDLLRLVGPIPMAIDAADIVNYKQGIIKY-CFNSGLNHAVLLVGYGVEN----- 280
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 343
+           N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct: 281 NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 356
+
+ Score =  212 bits (535), Expect = 7e-55
+ Identities = 126/324 (38%), Positives = 176/324 (53%), Gaps = 34/324 (10%)
+
+Query: 29  FLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  F  +  K+Y S EE  +RFEIF  NL  I   N   +++K     G+N+F DL+ DE
+Sbjct: 57  FARFAIRHRKRYDSVEEIKQRFEIFLDNLKMIRSHNRKGLSYK----LGINEFTDLTWDE 112
+
+Query: 88  FKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWS 144
+           F+ + L    N  A    +L + + +       +P   DWR  G V+PVK QG+CGSCW+
+Sbjct: 113 FRKHKLGASQNCSATTKGNLKLTNVV-------LPETKDWRKDGIVSPVKAQGKCGSCWT 165
+
+Query: 145 FSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK 204
+           FSTTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  
+Sbjct: 166 FSTTGALEAAYAQAFGKGISLSEQQLVDCAGAFNNF--------GCNGGLPSQAFEYIKF 217
+
+Query: 205 NGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVS-TGPLAIAAD 263
+           NGG+ TE +YPYT + G  C F+ ANIG K+ +   I         Y V+   P+++A +
+Sbjct: 218 NGGLDTEEAYPYTGKNGI-CKFSQANIGVKVISSVNITLGAEYELKYAVALVRPVSVAFE 276
+
+Query: 264 AVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+            V+ ++ Y  GV+   +    P  ++H +L VGY  +N       PYW++KNSWGADWGE
+Sbjct: 277 VVKGFKQYKSGVYASTECGDTPMDVNHAVLAVGYGVEN-----GTPYWLIKNSWGADWGE 331
+
+Query: 320 QGYIYLRRGKNTCGVSNFVSTSII 343
+            GY  +  GKN CGV+   S  I+
+Sbjct: 332 DGYFKMEMGKNMCGVATCASYPIV 355
+
+
+>sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)
+          Length = 334
+
+ Score =  210 bits (528), Expect = 5e-54
+ Identities = 127/349 (36%), Positives = 191/349 (54%), Gaps = 35/349 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQS---QFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+           L  VLA F + ++S  +P  +Q+   ++ +++    + Y   E   R  +++ N+  IE 
+Sbjct: 3   LSLVLAAFCLGIAS-AVPKFDQNLDTKWYQWKATHRRLYGANEEGWRRAVWEKNMKMIEL 61
+
+Query: 62  LNLIAINHKADTKFGVNKFADLSSDEFKNY---YLNNK---EAIFTDDLPVADYLDDEFI 115
+            N      K      +N F D++++EF+     + N K     +F + L    +LD    
+Sbjct: 62  HNGEYSQGKHGFTMAMNAFPDMTNEEFRQMMGCFRNQKFRKGKVFREPL----FLD---- 113
+
+Query: 116 NSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 175
+             +P + DWR +G VTPVKNQ QCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC  
+Sbjct: 114 --LPKSVDWRKKGYVTPVKNQKQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCSR 171
+
+Query: 176 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI 235
+                +G    ++GCNGG    A+ Y+ +NGG+ +E SYPY A     C +   N  A  
+Sbjct: 172 P----QG----NQGCNGGFMARAFQYVKENGGLDSEESYPYVA-VDEICKYRPENSVAND 222
+
+Query: 236 SNFTMI-PKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIV 291
+           + FT++ P  E  +   + + GP+++A DA    +QFY  G+ F+  C+  +LDHG+L+V
+Sbjct: 223 TGFTVVAPGKEKALMKAVATVGPISVAMDAGHSSFQFYKSGIYFEPDCSSKNLDHGVLVV 282
+
+Query: 292 GYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 339
+           GY  +      N  YW+VKNSWG +WG  GY+ + + KN  CG++   S
+Sbjct: 283 GYGFEGA-NSNNSKYWLVKNSWGPEWGSNGYVKIAKDKNNHCGIATAAS 330
+
+
+>sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 442
+
+ Score =  209 bits (527), Expect = 6e-54
+ Identities = 116/300 (38%), Positives = 167/300 (55%), Gaps = 24/300 (8%)
+
+Query: 4   ILLFVLAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEE 61
+           +L F+  +   + S++    E Q  + F  +     + YS EE+  R++IFKSN+  + +
+Sbjct: 3   VLSFLCLLLVSYASAKQQFSELQYRNAFTNWMQAHQRTYSSEEFNARYQIFKSNMDYVHQ 62
+
+Query: 62  LNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTA 121
+            N    +   +T  G+N FAD+++ E++  YL      F     +    +  F    PT 
+Sbjct: 63  WN----SKGGETVLGLNVFADITNQEYRTTYLGTP---FDGSALIGTEEEKIFSTPAPTV 115
+
+Query: 122 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFIS---QNKLVSLSEQNLVDCDHECM 178
+            DWR +GAVTP+KNQGQCG CWSFSTTG+ EG HFI+   +  LVSLSEQNL+DC     
+Sbjct: 116 -DWRAQGAVTPIKNQGQCGGCWSFSTTGSTEGAHFIASGTKKDLVSLSEQNLIDCS---- 170
+
+Query: 179 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNF 238
+               +   + GC GGL    + YII N GI TESSYPYTAE G +C F ++NIGA+I ++
+Sbjct: 171 ----KSYGNNGCEGGLMTLGFEYIINNKGIDTESSYPYTAEDGKECKFKTSNIGAQIVSY 226
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGYSA 295
+             +            +  P+++A DA    +Q Y  G++  P C P  LDHG+L+VGY +
+Sbjct: 227 QNVTSGSEASLQSASNNAPVSVAIDASNESFQLYESGIYYEPACTPTQLDHGVLVVGYGS 286
+
+
+ Score = 48.8 bits (114), Expect = 2e-05
+ Identities = 18/35 (51%), Positives = 24/35 (68%), Gaps = 1/35 (2%)
+
+Query: 306 YWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+           YWIVKNSWG  WG  GYI++ + + N CG++   S
+Sbjct: 401 YWIVKNSWGTSWGMDGYIFMSKDRNNNCGIATMAS 435
+
+
+>sp|Q10991|CATL_SHEEP CATHEPSIN L
+          Length = 217
+
+ Score =  209 bits (527), Expect = 6e-54
+ Identities = 104/226 (46%), Positives = 140/226 (61%), Gaps = 17/226 (7%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DW  +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVD     
+Sbjct: 1   VPKSVDWTKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVD----- 55
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                    ++GCNGGL  NA+ YI +NGG+ +E SYPY A T T CN+      AK + 
+Sbjct: 56  ---SSRPQGNQGCNGGLMDNAFQYIKENGGLDSEESYPYEA-TDTSCNYKPEYSAAKDTG 111
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYS 294
+           F  IP+ E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY 
+Sbjct: 112 FVDIPQREKALMKAVATVGPISVAIDAGHSSFQFYKSGIYYDPDCSSKDLDHGVLVVGYG 171
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVS 339
+            + T    N  +WIVKNSWG +WG +GY+ + + +N  CG++   S
+Sbjct: 172 FEGT----NNKFWIVKNSWGPEWGNKGYVKMAKDQNNHCGIATAAS 213
+
+
+>sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE EP-C1)
+          Length = 362
+
+ Score =  208 bits (523), Expect = 2e-53
+ Identities = 124/306 (40%), Positives = 176/306 (56%), Gaps = 29/306 (9%)
+
+Query: 47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK---EAIFTDD 103
+           +RF +FK+NL  +   N +   +K      +NKFAD+++ EF++ Y  +K     +F   
+Sbjct: 58  KRFNVFKANLMHVHNTNKMDKPYKLK----LNKFADMTNHEFRSTYAGSKVNHPRMFRGT 113
+
+Query: 104 LPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+                    E + S+P + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  NKLV
+Sbjct: 114 PHENGAFMYEKVVSVPPSVDWRKKGAVTDVKDQGQCGSCWAFSTVVAVEGINQIKTNKLV 173
+
+Query: 164 SLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ 223
+           +LSEQ LVDCD E          ++GCNGGL  +A+ +I + GGI TES+YPY A+ GT 
+Sbjct: 174 ALSEQELVDCDKE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYKAQEGT- 223
+
+Query: 224 CNFNSAN-IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCN 280
+           C+ +  N +   I     +P N+       V+  P+++A DA   ++QFY  GVF   C+
+Sbjct: 224 CDASKVNDLAVSIDGHENVPANDEDALLKAVANQPVSVAIDAGGSDFQFYSEGVFTGDCS 283
+
+Query: 281 PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSN 336
+              L+HG+ IVGY    T+   N  YWIV+NSWG +WGE GYI ++R     +  CG++ 
+Sbjct: 284 -TDLNHGVAIVGYG--TTVDGTN--YWIVRNSWGPEWGEHGYIRMQRNISKKEGLCGIAM 338
+
+Query: 337 FVSTSI 342
+             S  I
+Sbjct: 339 LPSYPI 344
+
+
+>sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)
+          Length = 380
+
+ Score =  206 bits (520), Expect = 4e-53
+ Identities = 123/327 (37%), Positives = 176/327 (53%), Gaps = 35/327 (10%)
+
+Query: 24  EEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADT----KFGVN 78
+           E ++ +  +  K+ K Y S  E+  RFEIFK  L  I+E       H ADT    K G+N
+Sbjct: 37  EVKAMYESWLIKYGKSYNSLGEWERRFEIFKETLRFIDE-------HNADTNRSYKVGLN 89
+
+Query: 79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQ 138
+           +FADL+ +EF++ YL       ++   V++  +  F   +P+  DWR+ GAV  +K+QG+
+Sbjct: 90  QFADLTDEEFRSTYLGFTSG--SNKTKVSNRYEPRFGQVLPSYVDWRSAGAVVDIKSQGE 147
+
+Query: 139 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 198
+           CG CW+FS    VEG + I    L+SLSEQ L+DC        G      GCNGG   + 
+Sbjct: 148 CGGCWAFSAIATVEGINKIVTGVLISLSEQELIDC--------GRTQNTRGCNGGYITDG 199
+
+Query: 199 YNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG-AKISNFTMIPKNETVMAGYIVSTGP 257
+           + +II NGGI TE +YPYTA+ G +CN +  N     I  +  +P N        V+  P
+Sbjct: 200 FQFIINNGGINTEENYPYTAQDG-ECNLDLQNEKYVTIDTYENVPYNNEWALQTAVTYQP 258
+
+Query: 258 LAIAADAV--EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGA 315
+           +++A DA    ++ Y  G+F  PC   ++DH + IVGY  +  I      YWIVKNSW  
+Sbjct: 259 VSVALDAAGDAFKHYSSGIFTGPCG-TAIDHAVTIVGYGTEGGI-----DYWIVKNSWDT 312
+
+Query: 316 DWGEQGYIYLRR---GKNTCGVSNFVS 339
+            WGE+GY+ + R   G  TCG++   S
+Sbjct: 313 TWGEEGYMRILRNVGGAGTCGIATMPS 339
+
+
+>sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR
+          Length = 360
+
+ Score =  203 bits (510), Expect = 6e-52
+ Identities = 125/304 (41%), Positives = 164/304 (53%), Gaps = 30/304 (9%)
+
+Query: 43  EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTD 102
+           +E   RF +FK N+  I E N       A  K  +NKF D+++ EF++ Y  +K      
+Sbjct: 54  DEKNRRFNVFKENVKFIHEFNQ---KKDAPYKLALNKFGDMTNQEFRSKYAGSKIQHHRS 110
+
+Query: 103 DLPVADYLDD---EFINSIPTA-FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFIS 158
+              +         E + S+P A  DWR +GAVT VK+QGQCGSCW+FST  +VEG + I 
+Sbjct: 111 QRGIQKNTGSFMYENVGSLPAASIDWRAKGAVTGVKDQGQCGSCWAFSTIASVEGINQIK 170
+
+Query: 159 QNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTA 218
+             +LVSLSEQ LVDCD          + +EGCNGGL   A+ +I KN GI TE SYPY  
+Sbjct: 171 TGELVSLSEQELVDCD---------TSYNEGCNGGLMDYAFEFIQKN-GITTEDSYPYAE 220
+
+Query: 219 ETGTQCNFNSANIG-AKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVF 275
+           + GT C  N  N     I     +P N        V+  P++++ +A    +QFY  GVF
+Sbjct: 221 QDGT-CASNLLNSPVVSIDGHQDVPANNENALMQAVANQPISVSIEASGYGFQFYSEGVF 279
+
+Query: 276 DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNT 331
+              C    LDHG+ IVGY A     R    YWIVKNSWG +WGE GYI ++RG    +  
+Sbjct: 280 TGRCG-TELDHGVAIVGYGAT----RDGTKYWIVKNSWGEEWGESGYIRMQRGISDKRGK 334
+
+Query: 332 CGVS 335
+           CG++
+Sbjct: 335 CGIA 338
+
+
+>sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR
+          Length = 471
+
+ Score =  203 bits (510), Expect = 6e-52
+ Identities = 115/303 (37%), Positives = 165/303 (53%), Gaps = 25/303 (8%)
+
+Query: 44  EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDD 103
+           E+  RF +F  NL  ++  N  A +     + G+N+FADL+++EF+  +L  K A     
+Sbjct: 69  EHERRFLVFWDNLKFVDAHNARA-DEGGGFRLGMNRFADLTNEEFRATFLGAKVA--ERS 125
+
+Query: 104 LPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+               +    + +  +P + DWR +GAV PVKNQGQCGSCW+FS    VE  + +   +++
+Sbjct: 126 RAAGERYRHDGVEELPESVDWREKGAVAPVKNQGQCGSCWAFSAVSTVESINQLVTGEMI 185
+
+Query: 164 SLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ 223
+           +LSEQ LV+C             + GCNGGL  +A+++IIKNGGI TE  YPY A  G +
+Sbjct: 186 TLSEQELVEC--------STNGQNSGCNGGLMADAFDFIIKNGGIDTEDDYPYKAVDG-K 236
+
+Query: 224 CNFNSANIG-AKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCN 280
+           C+ N  N     I  F  +P+N+       V+  P+++A +A   E+Q Y  GVF   C 
+Sbjct: 237 CDINRENAKVVSIDGFEDVPQNDEKSLQKAVAHQPVSVAIEAGGREFQLYHSGVFSGRCG 296
+
+Query: 281 PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT----CGVSN 336
+             SLDHG++ VGY   N        YWIV+NSWG  WGE GY+ + R  N     CG++ 
+Sbjct: 297 -TSLDHGVVAVGYGTDN-----GKDYWIVRNSWGPKWGESGYVRMERNINVTTGKCGIAM 350
+
+Query: 337 FVS 339
+             S
+Sbjct: 351 MAS 353
+
+
+>sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 360
+
+ Score =  201 bits (505), Expect = 2e-51
+ Identities = 119/327 (36%), Positives = 175/327 (53%), Gaps = 36/327 (11%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  ++ K Y S  E  +RF IF  +L  +   N   ++++     G+N+FAD+S +
+Sbjct: 58  RFARFAVRYGKSYESAAEVHKRFRIFSESLQLVRSTNRKGLSYR----LGINRFADMSWE 113
+
+Query: 87  EFKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCW 143
+           EF+   L    N  A  T      ++       ++P   DWR  G V+PVKNQG CGSCW
+Sbjct: 114 EFRATRLGAAQNCSATLT-----GNHRMRAAAVALPETKDWREDGIVSPVKNQGHCGSCW 168
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI 
+Sbjct: 169 TFSTTGALEAAYTQATGKPISLSEQQLVDCGFAFNNF--------GCNGGLPSQAFEYIK 220
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKI---SNFTMIPKNETVMAGYIVSTGPLAI 260
+            NGG+ TE SYPY    G  C F + N+G K+    N T+  ++E   A  +V   P+++
+Sbjct: 221 YNGGLDTEESYPYQGVNGI-CKFKNENVGVKVLDSVNITLGAEDELKDAVGLVR--PVSV 277
+
+Query: 261 AADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGAD 316
+           A + +  ++ Y  GV+        P  ++H +L VGY  ++      +PYW++KNSWGAD
+Sbjct: 278 AFEVITGFRLYKSGVYTSDHCGTTPMDVNHAVLAVGYGVED-----GVPYWLIKNSWGAD 332
+
+Query: 317 WGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           WG++GY  +  GKN CGV+   S  I+
+Sbjct: 333 WGDEGYFKMEMGKNMCGVATCASYPIV 359
+
+
+>sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  200 bits (504), Expect = 3e-51
+ Identities = 121/324 (37%), Positives = 172/324 (52%), Gaps = 28/324 (8%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           E+  F  +  +  K YS  EY  R ++F +N  KI+  N    NH    K G+N+F+D+S
+Sbjct: 29  EKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHN--QRNHTF--KMGLNQFSDMS 84
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPVKNQGQCGSCW 143
+             E K+ YL ++          ++YL        P++ DWR +G  V+PVKNQG CGSCW
+Sbjct: 85  FAEIKHKYLWSEPQ--NCSATKSNYLRGT--GPYPSSMDWRKKGNVVSPVKNQGACGSCW 140
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E    I+  K+++L+EQ LVDC         +   + GC GGL   A+ YI+
+Sbjct: 141 TFSTTGALESAVAIASGKMMTLAEQQLVDC--------AQNFNNHGCQGGLPSQAFEYIL 192
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAA 262
+            N GI  E SYPY  + G QC FN     A + N   I  N E  M   +    P++ A 
+Sbjct: 193 YNKGIMGEDSYPYIGKNG-QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAF 251
+
+Query: 263 DAVE-WQFYIGGVFDI-PCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+           +  E +  Y  GV+    C+  P+ ++H +L VGY  +N +      YWIVKNSWG++WG
+Sbjct: 252 EVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIVKNSWGSNWG 306
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVSTSI 342
+             GY  + RGKN CG++   S  I
+Sbjct: 307 NNGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  199 bits (500), Expect = 9e-51
+ Identities = 118/316 (37%), Positives = 170/316 (53%), Gaps = 26/316 (8%)
+
+Query: 29  FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F +F  KFNK YS E E L RF+IF+ NL +I   N     + +  ++ +NKF+DLS +E
+Sbjct: 28  FEDFLHKFNKNYSSESEKLHRFKIFQHNLEEIINKN----QNDSTAQYEINKFSDLSKEE 83
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+             + Y        T +      LD       P  FDWR    VT VKNQG CG+CW+F+T
+Sbjct: 84  AISKYTGLSLPHQTQNFCEVVILDRPPDRG-PLEFDWRQFNKVTSVKNQGVCGACWAFAT 142
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+            G++E Q  I  N+L++LSEQ  +DCD            + GC+GGL   A+   ++ GG
+Sbjct: 143 LGSLESQFAIKYNRLINLSEQQFIDCDR----------VNAGCDGGLLHTAFESAMEMGG 192
+
+Query: 208 IQTESSYPYTAETGTQCNFNSAN--IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV 265
+           +Q ES YPY    G QC  N     +G + S    I   E  +   + + GP+ +A DA 
+Sbjct: 193 VQMESDYPYETANG-QCRINPNRFVVGVR-SCRRYIVMFEEKLKDLLRAVGPIPVAIDAS 250
+
+Query: 266 EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+           +   Y  G+    C  + L+H +L+VGY+ +N     N+PYWI+KN+WG DWGE GY  +
+Sbjct: 251 DIVNYRRGIMR-QCANHGLNHAVLLVGYAVEN-----NIPYWILKNTWGTDWGEDGYFRV 304
+
+Query: 326 RRGKNTCGVSNFVSTS 341
+           ++  N CG+ N + +S
+Sbjct: 305 QQNINACGIRNELVSS 320
+
+
+>sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR
+          Length = 462
+
+ Score =  198 bits (498), Expect = 2e-50
+ Identities = 119/313 (38%), Positives = 165/313 (52%), Gaps = 35/313 (11%)
+
+Query: 35  KFNKKYSHEEYLE---RFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNY 91
+           K  K  S    +E   RFEIFK NL  ++E N   ++++     G+ +FADL++DE+++ 
+Sbjct: 56  KHGKAQSQNSLVEKDRRFEIFKDNLRFVDEHNEKNLSYR----LGLTRFADLTNDEYRSK 111
+
+Query: 92  YLN---NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTT 148
+           YL     K+      L     + DE    +P + DWR +GAV  VK+QG CGSCW+FST 
+Sbjct: 112 YLGAKMEKKGERRTSLRYEARVGDE----LPESIDWRKKGAVAEVKDQGGCGSCWAFSTI 167
+
+Query: 149 GNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGI 208
+           G VEG + I    L++LSEQ LVDCD          + +EGCNGGL   A+ +IIKNGGI
+Sbjct: 168 GAVEGINQIVTGDLITLSEQELVDCD---------TSYNEGCNGGLMDYAFEFIIKNGGI 218
+
+Query: 209 QTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VE 266
+            T+  YPY    GT            I ++  +P          V+  P++IA +A    
+Sbjct: 219 DTDKDYPYKGVDGTCDQIRKNAKVVTIDSYEDVPTYSEESLKKAVAHQPISIAIEAGGRA 278
+
+Query: 267 WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLR 326
+           +Q Y  G+FD  C    LDHG++ VGY  +N        YWIV+NSWG  WGE GY+ + 
+Sbjct: 279 FQLYDSGIFDGSCG-TQLDHGVVAVGYGTEN-----GKDYWIVRNSWGKSWGESGYLRMA 332
+
+Query: 327 R----GKNTCGVS 335
+           R        CG++
+Sbjct: 333 RNIASSSGKCGIA 345
+
+
+>sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)
+          Length = 333
+
+ Score =  198 bits (497), Expect = 2e-50
+ Identities = 115/348 (33%), Positives = 184/348 (52%), Gaps = 22/348 (6%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQS--QFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           +I +  LA+  + V S    P+     ++ E++ K  K Y+  E   +  +++ N   IE
+Sbjct: 1   MIAVLFLAILCLEVDSTAPTPDPSLDVEWNEWRTKHGKTYNMNEERLKRAVWEKNFKMIE 60
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN-NKEAIFTDDLPVADYLDDEFINSIP 119
+             N   +  + D    +N F DL++ EF        ++ I    +    + D +F+  +P
+Sbjct: 61  LHNWEYLEGRHDFTMAMNAFGDLTNIEFVKMMTGFQRQKIKKTHI----FQDHQFLY-VP 115
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+              DWR  G VTPVKNQG C S W+FS TG++EGQ F    +L+ LSEQNL+DC    + 
+Sbjct: 116 KRVDWRQLGYVTPVKNQGHCASSWAFSATGSLEGQMFRKTERLIPLSEQNLLDCMGSNVT 175
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+           +        GC+GG    A+ Y+  NGG+ TE SYPY  + G +C +++ N  A + +F 
+Sbjct: 176 H--------GCSGGFMQYAFQYVKDNGGLATEESYPYRGQ-GRECRYHAENSAANVRDFV 226
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSAK 296
+            IP +E  +   +   GP+++A DA    +QFY  G++  P C    L+H +L+VGY  +
+Sbjct: 227 QIPGSEEALMKAVAKVGPISVAVDASHGSFQFYGSGIYYEPQCKRVHLNHAVLVVGYGFE 286
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+                 N  +W+VKNSWG +WG +GY+ L +   N CG++ + +  I+
+Sbjct: 287 GEESDGN-SFWLVKNSWGEEWGMKGYMKLAKDWSNHCGIATYSTYPIV 333
+
+
+>sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR
+          Length = 458
+
+ Score =  198 bits (497), Expect = 2e-50
+ Identities = 122/348 (35%), Positives = 182/348 (52%), Gaps = 37/348 (10%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQ--FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           ++LL  LA   + + S G   EE+++  + E++ +  K Y+   E   R+  F+ NL  I
+Sbjct: 12  LLLLLSLAAADMSIVSYGERSEEEARRLYAEWKAEHGKSYNAVGEEERRYAAFRDNLRYI 71
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN-----NKEAIFTDDLPVADYLDDEF 114
+           +E N  A       + G+N+FADL+++E+++ YL       +E   +D    AD      
+Sbjct: 72  DEHNAAADAGVHSFRLGLNRFADLTNEEYRDTYLGLRNKPRRERKVSDRYLAADN----- 126
+
+Query: 115 INSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 174
+             ++P + DWRT+GAV  +K+QG CGSCW+FS    VE  + I    L+SLSEQ LVDCD
+Sbjct: 127 -EALPESVDWRTKGAVAEIKDQGGCGSCWAFSAIAAVEDINQIVTGDLISLSEQELVDCD 185
+
+Query: 175 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG-A 233
+                     + +EGCNGGL   A+++II NGGI TE  YPY  +   +C+ N  N    
+Sbjct: 186 ---------TSYNEGCNGGLMDYAFDFIINNGGIDTEDDYPYKGK-DERCDVNRKNAKVV 235
+
+Query: 234 KISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIV 291
+            I ++  +  N        V   P+++A +A    +Q Y  G+F   C   +LDHG+  V
+Sbjct: 236 TIDSYEDVTPNSETSLQKAVRNQPVSVAIEAGGRAFQLYSSGIFTGKCG-TALDHGVAAV 294
+
+Query: 292 GYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR----GKNTCGVS 335
+           GY  +N        YWIV+NSWG  WGE GY+ + R        CG++
+Sbjct: 295 GYGTEN-----GKDYWIVRNSWGKSWGESGYVRMERNIKASSGKCGIA 337
+
+
+>sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II) (PPII)
+          Length = 352
+
+ Score =  194 bits (488), Expect = 2e-49
+ Identities = 125/315 (39%), Positives = 167/315 (52%), Gaps = 43/315 (13%)
+
+Query: 35  KFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNY 91
+           K NK Y S +E + RFEIF+ NL  I+E N      K +  +  G+N FADLS+DEFK  
+Sbjct: 54  KHNKIYESIDEKIYRFEIFRDNLMYIDETN------KKNNSYWLGLNGFADLSNDEFKKK 107
+
+Query: 92  YLNNKEAIFTDDLPVADYLDDE-----FINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           Y+        +D    ++ D+E      + + P + DWR +GAVTPVKNQG CGSCW+FS
+Sbjct: 108 YVG----FVAEDFTGLEHFDNEDFTYKHVTNYPQSIDWRAKGAVTPVKNQGACGSCWAFS 163
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           T   VEG + I    L+ LSEQ LVDCD              GC GG Q  +  Y + N 
+Sbjct: 164 TIATVEGINKIVTGNLLELSEQELVDCDKH----------SYGCKGGYQTTSLQY-VANN 212
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADA- 264
+           G+ T   YPY A+       +      KI+ +  +P N ET   G + +  PL++  +A 
+Sbjct: 213 GVHTSKVYPYQAKQYKCRATDKPGPKVKITGYKRVPSNCETSFLGALANQ-PLSVLVEAG 271
+
+Query: 265 -VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+              +Q Y  GVFD PC    LDH +  VGY   +    KN  Y I+KNSWG +WGE+GY+
+Sbjct: 272 GKPFQLYKSGVFDGPCG-TKLDHAVTAVGYGTSD---GKN--YIIIKNSWGPNWGEKGYM 325
+
+Query: 324 YLRR----GKNTCGV 334
+            L+R     + TCGV
+Sbjct: 326 RLKRQSGNSQGTCGV 340
+
+
+>sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEPSIN X) (CATHEPSIN O2)
+          Length = 329
+
+ Score =  194 bits (488), Expect = 2e-49
+ Identities = 121/339 (35%), Positives = 180/339 (52%), Gaps = 25/339 (7%)
+
+Query: 9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + V S  + PEE   + +  ++    K+Y+++ + + R  I++ NL  I   NL 
+Sbjct: 4   LKVLLLPVVSFALYPEEILDTHWELWKKTHRKQYNNKVDEISRRLIWEKNLKYISIHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+           A       +  +N   D++S+E        K  +         Y+  E+    P + D+R
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLKVPLSHSRSNDTLYIP-EWEGRAPDSVDYR 122
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E         
+Sbjct: 123 KKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE--------- 173
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK-N 244
+            ++GC GG   NA+ Y+ KN GI +E +YPY  +    C +N     AK   +  IP+ N
+Sbjct: 174 -NDGCGGGYMTNAFQYVQKNRGIDSEDAYPYVGQE-ESCMYNPTGKAAKCRGYREIPEGN 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   +   GP+++A DA    +QFY  GV +D  CN ++L+H +L VGY       +
+Sbjct: 232 EKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDESCNSDNLNHAVLAVGYG-----IQ 286
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+           K   +WI+KNSWG +WG +GYI + R K N CG++N  S
+Sbjct: 287 KGNKHWIIKNSWGENWGNKGYILMARNKNNACGIANLAS 325
+
+
+>sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR
+          Length = 362
+
+ Score =  194 bits (487), Expect = 3e-49
+ Identities = 114/327 (34%), Positives = 171/327 (51%), Gaps = 37/327 (11%)
+
+Query: 28  QFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  +  K+Y    E   RF IF  +L  +   N   + ++     G+N+FAD+S +
+Sbjct: 61  RFARFAVRHGKRYGDAAEVQRRFRIFSESLELVRSTNRRGLPYR----LGINRFADMSWE 116
+
+Query: 87  EFKNYYLN---NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCW 143
+           EF+   L    N  A    +  + D        ++P   DWR  G V+PVK+QG CGSCW
+Sbjct: 117 EFQASRLGAAQNCSATLAGNHRMRD------APALPETKDWREDGIVSPVKDQGHCGSCW 170
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+            FSTTG++E ++  +    VSLSEQ L DC      +        GC+GGL   A+ YI 
+Sbjct: 171 PFSTTGSLEARYTQATGPPVSLSEQQLADCATRYNNF--------GCSGGLPSQAFEYIK 222
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKI---SNFTMIPKNETVMAGYIVSTGPLAI 260
+            NGG+ TE +YPYT   G  C++   N G K+    N T++ ++E   A  +V   P+++
+Sbjct: 223 YNGGLDTEEAYPYTGVNGI-CHYKPENAGVKVLDSVNITLVAEDELKNAVGLVR--PVSV 279
+
+Query: 261 AADAVE-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGAD 316
+           A   +  ++ Y  GV+       +P  ++H +L VGY  +N      +PYW++KNSWGAD
+Sbjct: 280 AFQVINGFRMYKSGVYTSDHCGTSPMDVNHAVLAVGYGVEN-----GVPYWLIKNSWGAD 334
+
+Query: 317 WGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           WG+ GY  +  GKN CG++   S  I+
+Sbjct: 335 WGDNGYFTMEMGKNMCGIATCASYPIV 361
+
+
+>sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR
+          Length = 328
+
+ Score =  194 bits (487), Expect = 3e-49
+ Identities = 115/300 (38%), Positives = 160/300 (53%), Gaps = 29/300 (9%)
+
+Query: 47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK----EAIFTD 102
+           ERF IFK NL  I+  N    N  A  K G+  FA+L++DE+++ YL  +      I   
+Sbjct: 27  ERFNIFKDNLRFIDLHN--ENNKNATYKLGLTIFANLTNDEYRSLYLGARTEPVRRITKA 84
+
+Query: 103 DLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKL 162
+                 Y     ++ +P   DWR +GAV  +K+QG CGSCW+FST   VEG + I   +L
+Sbjct: 85  KNVNMKYSAAVNVDEVPVTVDWRQKGAVNAIKDQGTCGSCWAFSTAAAVEGINKIVTGEL 144
+
+Query: 163 VSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGT 222
+           VSLSEQ LVDCD         ++ ++GCNGGL   A+ +I+KNGG+ TE  YPY    G 
+Sbjct: 145 VSLSEQELVDCD---------KSYNQGCNGGLMDYAFQFIMKNGGLNTEKDYPYHGTNG- 194
+
+Query: 223 QCNFNSANIG-AKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPC 279
+           +CN    N     I  +  +P  +       VS  P+++A DA    +Q Y  G+F   C
+Sbjct: 195 KCNSLLKNSRVVTIDGYEDVPSKDETALKRAVSYQPVSVAIDAGGRAFQHYQSGIFTGKC 254
+
+Query: 280 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVS 335
+             N +DH ++ VGY ++N      + YWIV+NSWG  WGE GYI + R        CG++
+Sbjct: 255 GTN-MDHAVVAVGYGSEN-----GVDYWIVRNSWGTRWGEDGYIRMERNVASKSGKCGIA 308
+
+
+>sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  192 bits (482), Expect = 1e-48
+ Identities = 122/326 (37%), Positives = 168/326 (51%), Gaps = 32/326 (9%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           E+  F  +  K  K YS EEY  R + F SN  KI   N    N     K  +N+F+D+S
+Sbjct: 31  EKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHN----NGNHTFKMALNQFSDMS 86
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGA-VTPVKNQGQCGSCW 143
+             E K+ YL ++          ++YL        P + DWR +G  V+PVKNQG CGSCW
+Sbjct: 87  FAEIKHKYLWSEPQ--NCSATKSNYLRGT--GPYPPSVDWRKKGNFVSPVKNQGACGSCW 142
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E    I+  K++SL+EQ LVDC  +   Y        GC GGL   A+ YI+
+Sbjct: 143 TFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNY--------GCQGGLPSQAFEYIL 194
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSAN-IG--AKISNFTMIPKNETVMAGYIVSTGPLAI 260
+            N GI  E +YPY  + G  C F     IG    ++N T+   +E  M   +    P++ 
+Sbjct: 195 YNKGIMGEDTYPYQGKDG-YCKFQPGKAIGFVKDVANITIY--DEEAMVEAVALYNPVSF 251
+
+Query: 261 AADAV-EWQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGAD 316
+           A +   ++  Y  G++    C+  P+ ++H +L VGY  KN I     PYWIVKNSWG  
+Sbjct: 252 AFEVTQDFMMYRTGIYSSTSCHKTPDKVNHAVLAVGYGEKNGI-----PYWIVKNSWGPQ 306
+
+Query: 317 WGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           WG  GY  + RGKN CG++   S  I
+Sbjct: 307 WGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  191 bits (481), Expect = 2e-48
+ Identities = 122/332 (36%), Positives = 171/332 (50%), Gaps = 28/332 (8%)
+
+Query: 17  SSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFG 76
+           S+  +   E+  F  +  +  KKYS EEY  R ++F SN  KI   N  A NH    K G
+Sbjct: 23  SNLAVSSFEKLHFKSWMVQHQKKYSLEEYHHRLQVFVSNWRKINAHN--AGNHTF--KLG 78
+
+Query: 77  VNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGA-VTPVKN 135
+           +N+F+D+S DE ++ YL ++           +YL        P + DWR +G  V+PVKN
+Sbjct: 79  LNQFSDMSFDEIRHKYLWSEPQ--NCSATKGNYLRGT--GPYPPSMDWRKKGNFVSPVKN 134
+
+Query: 136 QGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQ 195
+           QG CGSCW+FSTTG +E    I+  K++SL+EQ LVDC         +   + GC GGL 
+Sbjct: 135 QGSCGSCWTFSTTGALESAVAIATGKMLSLAEQQLVDC--------AQNFNNHGCQGGLP 186
+
+Query: 196 PNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVS 254
+             A+ YI  N GI  E +YPY  +    C F      A + +   I  N E  M   +  
+Sbjct: 187 SQAFEYIRYNKGIMGEDTYPYKGQ-DDHCKFQPDKAIAFVKDVANITMNDEEAMVEAVAL 245
+
+Query: 255 TGPLAIAADAV-EWQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVK 310
+             P++ A +   ++  Y  G++    C+  P+ ++H +L VGY  +N I     PYWIVK
+Sbjct: 246 YNPVSFAFEVTNDFLMYRKGIYSSTSCHKTPDKVNHAVLAVGYGEENGI-----PYWIVK 300
+
+Query: 311 NSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           NSWG  WG  GY  + RGKN CG++   S  I
+Sbjct: 301 NSWGPQWGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR
+          Length = 362
+
+ Score =  191 bits (481), Expect = 2e-48
+ Identities = 111/322 (34%), Positives = 167/322 (51%), Gaps = 27/322 (8%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  ++ K Y S  E   RF IF  +L ++   N   + ++     G+N+F+D+S +
+Sbjct: 60  RFARFAVRYGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYR----LGINRFSDMSWE 115
+
+Query: 87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           EF+   L    A  T    +A         ++P   DWR  G V+PVKNQ  CGSCW+FS
+Sbjct: 116 EFQATRLG---AAQTCSATLAGNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFS 172
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           TTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  NG
+Sbjct: 173 TTGALEAAYTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNG 224
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADAV 265
+           GI TE SYPY    G  C++ + N   ++ +   I  N E  +   +    P+++A   +
+Sbjct: 225 GIDTEESYPYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVI 283
+
+Query: 266 E-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQG 321
+           + ++ Y  GV+        P+ ++H +L VGY  +N      +PYW++KNSWGADWG+ G
+Sbjct: 284 DGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVEN-----GVPYWLIKNSWGADWGDNG 338
+
+Query: 322 YIYLRRGKNTCGVSNFVSTSII 343
+           Y  +  GKN C ++   S  ++
+Sbjct: 339 YFKMEMGKNMCAIATCASYPVV 360
+
+
+>sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)
+          Length = 329
+
+ Score =  191 bits (480), Expect = 2e-48
+ Identities = 119/339 (35%), Positives = 179/339 (52%), Gaps = 25/339 (7%)
+
+Query: 9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + V S  + PEE   +Q+  ++  ++K+Y+ + + + R  I++ NL  I   NL 
+Sbjct: 4   LKVLLLPVVSFALHPEEILDTQWELWKKTYSKQYNSKVDEISRRLIWEKNLKHISIHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+           A       +  +N   D++S+E        K            Y+ D +    P + D+R
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLKVPPSRSHSNDTLYIPD-WEGRTPDSIDYR 122
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E         
+Sbjct: 123 KKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE--------- 173
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK-N 244
+            + GC GG   NA+ Y+ +N GI +E +YPY  +    C +N     AK   +  IP+ N
+Sbjct: 174 -NYGCGGGYMTNAFQYVQRNRGIDSEDAYPYVGQ-DESCMYNPTGKAAKCRGYREIPEGN 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   +   GP+++A DA    +QFY  GV +D  C+ ++++H +L VGY       +
+Sbjct: 232 EKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDENCSSDNVNHAVLAVGYG-----IQ 286
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+           K   +WI+KNSWG  WG +GYI + R K N CG++N  S
+Sbjct: 287 KGNKHWIIKNSWGESWGNKGYILMARNKNNACGIANLAS 325
+
+
+>sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA) (PAPAYA PROTEINASE
+           III) (PPIII) (PAPAYA PEPTIDASE A)
+          Length = 348
+
+ Score =  190 bits (479), Expect = 3e-48
+ Identities = 122/319 (38%), Positives = 166/319 (51%), Gaps = 46/319 (14%)
+
+Query: 37  NKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNYYL 93
+           NK Y + +E L RFEIFK NL  I+E N      K +  +  G+N+FADLS+DEF   Y+
+Sbjct: 56  NKFYENVDEKLYRFEIFKDNLNYIDETN------KKNNSYWLGLNEFADLSNDEFNEKYV 109
+
+Query: 94  NNKEAIFTDDLPVADYLDDEFIN----SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTG 149
+            +       D  +    D+EFIN    ++P   DWR +GAVTPV++QG CGSCW+FS   
+Sbjct: 110 GS-----LIDATIEQSYDEEFINEDTVNLPENVDWRKKGAVTPVRHQGSCGSCWAFSAVA 164
+
+Query: 150 NVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ 209
+            VEG + I   KLV LSEQ LVDC+              GC GG  P A  Y+ KN GI 
+Sbjct: 165 TVEGINKIRTGKLVELSEQELVDCERR----------SHGCKGGYPPYALEYVAKN-GIH 213
+
+Query: 210 TESSYPYTAETGTQCNFNSANIGAKISNFTMI----PKNETVMAGYIVSTGPLAIAADAV 265
+             S YPY A+ GT     +  +G  I   + +    P NE  +   I    P+++  ++ 
+Sbjct: 214 LRSKYPYKAKQGT---CRAKQVGGPIVKTSGVGRVQPNNEGNLLNAIAKQ-PVSVVVESK 269
+
+Query: 266 --EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+              +Q Y GG+F+ PC    +DH +  VGY            Y ++KNSWG  WGE+GYI
+Sbjct: 270 GRPFQLYKGGIFEGPCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGTAWGEKGYI 323
+
+Query: 324 YLRRGK-NTCGVSNFVSTS 341
+            ++R   N+ GV     +S
+Sbjct: 324 RIKRAPGNSPGVCGLYKSS 342
+
+
+>sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  189 bits (476), Expect = 6e-48
+ Identities = 117/324 (36%), Positives = 167/324 (51%), Gaps = 28/324 (8%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           E+  F  +  +  K YS  EY  R ++F +N  KI+  N    NH    K  +N+F+D+S
+Sbjct: 29  EKFHFKSWMKQHQKTYSSVEYNHRLQMFANNWRKIQAHN--QRNHTF--KMALNQFSDMS 84
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRG-AVTPVKNQGQCGSCW 143
+             E K+ +L ++          ++YL        P++ DWR +G  V+PVKNQG C SCW
+Sbjct: 85  FAEIKHKFLWSEPQ--NCSATKSNYLRGT--GPYPSSMDWRKKGNVVSPVKNQGACASCW 140
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E    I+  K++SL+EQ LVDC         +   + GC GGL   A+ YI+
+Sbjct: 141 TFSTTGALESAVAIASGKMLSLAEQQLVDC--------AQAFNNHGCKGGLPSQAFEYIL 192
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAA 262
+            N GI  E SYPY  +  + C FN     A + N   I  N E  M   +    P++ A 
+Sbjct: 193 YNKGIMEEDSYPYIGK-DSSCRFNPQKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAF 251
+
+Query: 263 DAVE-WQFYIGGVFDIPC---NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+           +  E +  Y  GV+        P+ ++H +L VGY  +N +      YWIVKNSWG+ WG
+Sbjct: 252 EVTEDFLMYKSGVYSSKSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIVKNSWGSQWG 306
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVSTSI 342
+           E GY  + RGKN CG++   S  I
+Sbjct: 307 ENGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR
+          Length = 329
+
+ Score =  188 bits (473), Expect = 1e-47
+ Identities = 117/344 (34%), Positives = 181/344 (52%), Gaps = 35/344 (10%)
+
+Query: 9   LAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + + S  + PEE   +Q+  ++    K+Y+ + + + R  I++ NL +I   NL 
+Sbjct: 4   LKVLLLPMVSFALSPEEMLDTQWELWKKTHQKQYNSKVDEISRRLIWEKNLKQISAHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD-----EFINSIPT 120
+           A       +  +N   D++S+E        +        P   Y +D     E+   +P 
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLRIP------PSRSYSNDTLYTPEWEGRVPD 117
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           + D+R +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E    
+Sbjct: 118 SIDYRKKGYVTPVKNQGQCGSCWAFSSAGALEGQLKKKTGKLLALSPQNLVDCVTE---- 173
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+                 + GC GG    A+ Y+ +NGGI +E ++PY  +    C +N+    AK   +  
+Sbjct: 174 ------NYGCGGGYMTTAFQYVQQNGGIDSEDAFPYVGQ-DESCMYNATAKAAKCRGYRE 226
+
+Query: 241 IP-KNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAK 296
+           IP  NE  +   +   GP++++ DA    +QFY  GV +D  C+ ++++H +L+VGY   
+Sbjct: 227 IPVGNEKALKRAVARVGPISVSIDASLASFQFYSRGVYYDENCDRDNVNHAVLVVGYGT- 285
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+               +K   +WI+KNSWG  WG +GY  L R K N CG++N  S
+Sbjct: 286 ----QKGSKHWIIKNSWGESWGNKGYALLARNKNNACGITNMAS 325
+
+
+>sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 371
+
+ Score =  185 bits (466), Expect = 9e-47
+ Identities = 110/338 (32%), Positives = 164/338 (47%), Gaps = 32/338 (9%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+           P E +  F  FQ +FN+ Y +  EY  R  IF  NL + + L    +      +FG   F
+Sbjct: 33  PLELKEVFKLFQIRFNRSYWNPAEYTRRLSIFAHNLAQAQRLQQEDLG---TAEFGETPF 89
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR-TRGAVTPVKNQGQC 139
+           +DL+ +EF   Y   +    T ++       + +  S+P   DWR  +  ++ VKNQG C
+Sbjct: 90  SDLTEEEFGQLYGQERSPERTPNM-TKKVESNTWGESVPRTCDWRKAKNIISSVKNQGSC 148
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 199
+             CW+ +   N++    I   + V +S Q L+DC          E C  GCNGG   +AY
+Sbjct: 149 KCCWAMAAADNIQALWRIKHQQFVDVSVQELLDC----------ERCGNGCNGGFVWDAY 198
+
+Query: 200 NYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+             ++ N G+ +E  YP+  +    +C        A I +FTM+  NE  +A Y+   GP+
+Sbjct: 199 LTVLNNSGLASEKDYPFQGDRKPHRCLAKKYKKVAWIQDFTMLSNNEQAIAHYLAVHGPI 258
+
+Query: 259 AIAADAVEWQFYIGGVFDIP---CNPNSLDHGILIVGYSAKN------TIF------RKN 303
+            +  +    Q Y  GV       C+P  +DH +L+VG+  K       T+       R +
+Sbjct: 259 TVTINMKLLQHYQKGVIKATPSSCDPRQVDHSVLLVGFGKKKEGMQTGTVLSHSRKRRHS 318
+
+Query: 304 MPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+            PYWI+KNSWGA WGE+GY  L RG NTCGV+ +  T+
+Sbjct: 319 SPYWILKNSWGAHWGEKGYFRLYRGNNTCGVTKYPFTA 356
+
+
+>sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR
+          Length = 373
+
+ Score =  184 bits (463), Expect = 2e-46
+ Identities = 123/345 (35%), Positives = 172/345 (49%), Gaps = 40/345 (11%)
+
+Query: 8   VLAVFTVFVSSRGIPPEEQSQFLE---------FQDKFNKKYSHEEYLERFEIFKSNLGK 58
+           VLAV  V + S  IP E++    E         +Q     +  H E   RF  FKSN   
+Sbjct: 17  VLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSNAHF 75
+
+Query: 59  IEELNLIAINHKADTKFGV--NKFADLSSDEFKNYYLNNKEAIFTDDLP-VADYLDDEF- 114
+           I      + N + D  + +  N+F D+   EF+  ++ +         P V  ++     
+Sbjct: 76  IH-----SHNKRGDHPYRLHLNRFGDMDQAEFRATFVGDLRRDTPSKPPSVPGFMYAALN 130
+
+Query: 115 INSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 174
+           ++ +P + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLSEQ L+DCD
+Sbjct: 131 VSDLPPSVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLSEQELIDCD 190
+
+Query: 175 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF----NSAN 230
+                     A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT CN      ++ 
+Sbjct: 191 ---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGT-CNVARAAQNSP 240
+
+Query: 231 IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGI 288
+           +   I     +P N        V+  P+++A +A    + FY  GVF   C    LDHG+
+Sbjct: 241 VVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGECG-TELDHGV 299
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 333
+            +VGY     +      YW VKNSWG  WGEQGYI + +     G
+Sbjct: 300 AVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASG 340
+
+
+>sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR
+          Length = 371
+
+ Score =  184 bits (462), Expect = 3e-46
+ Identities = 124/349 (35%), Positives = 171/349 (48%), Gaps = 48/349 (13%)
+
+Query: 8   VLAVFTVFVSSRGIPPEEQSQFLE---------FQDKFNKKYSHEEYLERFEIFKSNLGK 58
+           VLAV  V + S  IP E++    E         +Q     +  H E   RF  FKSN   
+Sbjct: 17  VLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSNAHF 75
+
+Query: 59  IEELNLIAINHKADTKFGV--NKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF-- 114
+           I      + N + D  + +  N+F D+   EF+  ++ +       D P        F  
+Sbjct: 76  IH-----SHNKRGDHPYRLHLNRFGDMDQAEFRATFVGDLRR----DTPAKPPSVPGFMY 126
+
+Query: 115 ----INSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL 170
+               ++ +P + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLSEQ L
+Sbjct: 127 AALNVSDLPPSVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLSEQEL 186
+
+Query: 171 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF---- 226
+           +DCD          A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT CN     
+Sbjct: 187 IDCD---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGT-CNVARAA 236
+
+Query: 227 NSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSL 284
+            ++ +   I     +P N        V+  P+++A +A    + FY  GVF   C    L
+Sbjct: 237 QNSPVVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGDCG-TEL 295
+
+Query: 285 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 333
+           DHG+ +VGY     +      YW VKNSWG  WGEQGYI + +     G
+Sbjct: 296 DHGVAVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASG 340
+
+
+>sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPAYA PEPTIDASE B) (GLYCYL
+           ENDOPEPTIDASE)
+          Length = 348
+
+ Score =  183 bits (461), Expect = 3e-46
+ Identities = 112/309 (36%), Positives = 164/309 (52%), Gaps = 33/309 (10%)
+
+Query: 35  KFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYL 93
+           K NK Y + +E L RFEIFK NL  I+E N +   +      G+N+F+DLS+DEFK  Y+
+Sbjct: 54  KHNKNYKNVDEKLYRFEIFKDNLKYIDERNKMINGYW----LGLNEFSDLSNDEFKEKYV 109
+
+Query: 94  NNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 153
+            +    +T+     ++++++ ++ +P + DWR +GAVTPVK+QG C SCW+FST   VEG
+Sbjct: 110 GSLPEDYTNQPYDEEFVNEDIVD-LPESVDWRAKGAVTPVKHQGYCESCWAFSTVATVEG 168
+
+Query: 154 QHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESS 213
+            + I    LV LSEQ LVDCD +            GCN G Q  +  Y+ +N GI   + 
+Sbjct: 169 INKIKTGNLVELSEQELVDCDKQ----------SYGCNRGYQSTSLQYVAQN-GIHLRAK 217
+
+Query: 214 YPYTAETGTQCNFNSANIGAKI--SNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQF 269
+           YPY A+  T C  N    G K+  +    +  N        ++  P+++  ++   ++Q 
+Sbjct: 218 YPYIAKQQT-CRANQVG-GPKVKTNGVGRVQSNNEGSLLNAIAHQPVSVVVESAGRDFQN 275
+
+Query: 270 YIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK 329
+           Y GG+F+  C    +DH +  VGY            Y ++KNSWG  WGE GYI +RR  
+Sbjct: 276 YKGGIFEGSCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGPGWGENGYIRIRRAS 329
+
+Query: 330 ----NTCGV 334
+                 CGV
+Sbjct: 330 GNSPGVCGV 338
+
+
+>sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR
+          Length = 379
+
+ Score =  183 bits (461), Expect = 3e-46
+ Identities = 108/318 (33%), Positives = 171/318 (52%), Gaps = 38/318 (11%)
+
+Query: 40  YSHEEYLERFEIFKSNLGKIEELNLIAINHKA--DTKFGVNKFADLSSDEFKNYYLNNKE 97
+           ++HEE  +R EIFK+N   I ++N    N K+    + G+NKFAD++  EF   YL   +
+Sbjct: 56  HNHEEEAKRLEIFKNNSNYIRDMNA---NRKSPHSHRLGLNKFADITPQEFSKKYLQAPK 112
+
+Query: 98  AIFTDDLPVADYL---DDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQ 154
+            + +  + +A+     +    +  P ++DWR +G +T VK QG CG  W+FS TG +E  
+Sbjct: 113 DV-SQQIKMANKKMKKEQYSCDHPPASWDWRKKGVITQVKYQGGCGRGWAFSATGAIEAA 171
+
+Query: 155 HFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSY 214
+           H I+   LVSLSEQ LVDC  E           EG   G Q  ++ +++++GGI T+  Y
+Sbjct: 172 HAIATGDLVSLSEQELVDCVEE----------SEGSYNGWQYQSFEWVLEHGGIATDDDY 221
+
+Query: 215 PYTAETGTQCNFNSANIGAKISNF-TMIPKNETVMAG------YIVSTGPLAIAADAVEW 267
+           PY A+ G +C  N       I  + T+I  +E+  +         +   P++++ DA ++
+Sbjct: 222 PYRAKEG-RCKANKIQDKVTIDGYETLIMSDESTESETEQAFLSAILEQPISVSIDAKDF 280
+
+Query: 268 QFYIGGVFDIP--CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+             Y GG++D     +P  ++H +L+VGY + +      + YWI KNSWG DWGE GYI++
+Sbjct: 281 HLYTGGIYDGENCTSPYGINHFVLLVGYGSAD-----GVDYWIAKNSWGFDWGEDGYIWI 335
+
+Query: 326 RRGK----NTCGVSNFVS 339
+           +R        CG++ F S
+Sbjct: 336 QRNTGNLLGVCGMNYFAS 353
+
+
+>sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR
+          Length = 321
+
+ Score =  182 bits (458), Expect = 8e-46
+ Identities = 98/299 (32%), Positives = 155/299 (51%), Gaps = 28/299 (9%)
+
+Query: 52  FKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLD 111
+           F+ +L +   LN +  +  +   +G+N+F+ L  +EFK  YL +K + F        Y  
+Sbjct: 44  FRESLNRHRYLNSLFPSENSTAFYGINQFSYLFPEEFKAIYLRSKPSKFPR------YSA 97
+
+Query: 112 DEFIN----SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSE 167
+           +  ++    S+P  FDWR +  VT V+NQ  CG CW+FS  G VE  + I    L  LS 
+Sbjct: 98  EVHMSIPNVSLPLRFDWRDKQVVTQVRNQQMCGGCWAFSVVGAVESAYAIKGKPLEDLSV 157
+
+Query: 168 QNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK-NGGIQTESSYPYTAETGTQCNF 226
+           Q ++DC +           + GCNGG   NA N++ K    +  +S YP+ A+ G    F
+Sbjct: 158 QQVIDCSYN----------NYGCNGGSTLNALNWLNKMQVKLVKDSEYPFKAQNGLCHYF 207
+
+Query: 227 NSANIGAKISNFTM--IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSL 284
+           + ++ G  I  ++       E  MA  +++ GPL +  DAV WQ Y+GG+    C+    
+Sbjct: 208 SGSHSGFSIKGYSAYDFSDQEDEMAKALLTFGPLVVIVDAVSWQDYLGGIIQHHCSSGEA 267
+
+Query: 285 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           +H +LI G+         + PYWIV+NSWG+ WG  GY +++ G N CG+++ VS+  +
+Sbjct: 268 NHAVLITGFDKTG-----STPYWIVRNSWGSSWGVDGYAHVKMGSNVCGIADSVSSIFV 321
+
+
+>sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 376
+
+ Score =  182 bits (457), Expect = 1e-45
+ Identities = 109/341 (31%), Positives = 170/341 (48%), Gaps = 35/341 (10%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+           P E +  F  FQ +FN+ Y S EE+  R +IF  NL + + L    +      +FGV  F
+Sbjct: 35  PLELKEAFKLFQIQFNRSYLSPEEHAHRLDIFAHNLAQAQRLQEEDLG---TAEFGVTPF 91
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR-TRGAVTPVKNQGQC 139
+           +DL+ +EF   Y   + A     +   +   +E   S+P + DWR   GA++P+K+Q  C
+Sbjct: 92  SDLTEEEFGQLYGYRRAAGGVPSMG-REIRSEEPEESVPFSCDWRKVAGAISPIKDQKNC 150
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 199
+             CW+ +  GN+E    IS    V +S   L+DC            C +GC+GG   +A+
+Sbjct: 151 NCCWAMAAAGNIETLWRISFWDFVDVSVHELLDCGR----------CGDGCHGGFVWDAF 200
+
+Query: 200 NYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+             ++ N G+ +E  YP+  +    +C+       A I +F M+  NE  +A Y+ + GP+
+Sbjct: 201 ITVLNNSGLASEKDYPFQGKVRAHRCHPKKYQKVAWIQDFIMLQNNEHRIAQYLATYGPI 260
+
+Query: 259 AIAADAVEWQFYIGGVFDIP---CNPNSLDHGILIVGYSA--------KNTIFRKNMP-- 305
+            +  +    Q Y  GV       C+P  +DH +L+VG+ +          T+  ++ P  
+Sbjct: 261 TVTINMKPLQLYRKGVIKATPTTCDPQLVDHSVLLVGFGSVKSEEGIWAETVSSQSQPQP 320
+
+Query: 306 -----YWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+                YWI+KNSWGA WGE+GY  L RG NTCG++ F  T+
+Sbjct: 321 PHPTPYWILKNSWGAQWGEKGYFRLHRGSNTCGITKFPLTA 361
+
+
+>sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR
+          Length = 331
+
+ Score =  177 bits (444), Expect = 3e-44
+ Identities = 116/347 (33%), Positives = 176/347 (50%), Gaps = 35/347 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYS--HEEYLERFEIFKSNLGKIEEL 62
+           L+ VL V +  V+     P     +  ++  + K+Y   +EE + R  I++ NL  +   
+Sbjct: 4   LVCVLLVCSSAVAQLHKDPTLDHHWHLWKKTYGKQYKEKNEEAVRRL-IWEKNLKFVMLH 62
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS----- 117
+           NL           G+N   D++S+E  +          T  L V                
+Sbjct: 63  NLEHSMGMHSYDLGMNHLGDMTSEEVMS---------LTSSLRVPSQWQRNITYKSNPNR 113
+
+Query: 118 -IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+            +P + DWR +G VT VK QG CG+CW+FS  G +E Q  +   KLV+LS QNLVDC   
+Sbjct: 114 ILPDSVDWREKGCVTEVKYQGSCGACWAFSAVGALEAQLKLKTGKLVTLSAQNLVDC--- 170
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                 E+  ++GCNGG    A+ YII N GI +++SYPY A    +C ++S    A  S
+Sbjct: 171 ----STEKYGNKGCNGGFMTTAFQYIIDNKGIDSDASYPYKA-MDQKCQYDSKYRAATCS 225
+
+Query: 237 NFTMIP-KNETVMAGYIVSTGPLAIAADAVEWQFYI--GGVFDIPCNPNSLDHGILIVGY 293
+            +T +P   E V+   + + GP+++  DA    F++   GV+  P    +++HG+L+VGY
+Sbjct: 226 KYTELPYGREDVLKEAVANKGPVSVGVDARHPSFFLYRSGVYYEPSCTQNVNHGVLVVGY 285
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+              N        YW+VKNSWG ++GE+GYI + R K N CG+++F S
+Sbjct: 286 GDLN-----GKEYWLVKNSWGHNFGEEGYIRMARNKGNHCGIASFPS 327
+
+
+>sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)
+          Length = 345
+
+ Score =  176 bits (442), Expect = 6e-44
+ Identities = 116/315 (36%), Positives = 161/315 (50%), Gaps = 37/315 (11%)
+
+Query: 35  KFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKFADLSSDEFKNY 91
+           K NK Y + +E + RFEIFK NL  I+E N      K +  +  G+N FAD+S+DEFK  
+Sbjct: 54  KHNKIYKNIDEKIYRFEIFKDNLKYIDETN------KKNNSYWLGLNVFADMSNDEFKEK 107
+
+Query: 92  YLNNKEAIFTD-DLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGN 150
+           Y  +    +T  +L   + L+D  +N IP   DWR +GAVTPVKNQG CGSCW+FS    
+Sbjct: 108 YTGSIAGNYTTTELSYEEVLNDGDVN-IPEYVDWRQKGAVTPVKNQGSCGSCWAFSAVVT 166
+
+Query: 151 VEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT 210
+           +EG   I    L   SEQ L+DCD              GCNGG   +A   ++   GI  
+Sbjct: 167 IEGIIKIRTGNLNEYSEQELLDCDRR----------SYGCNGGYPWSALQ-LVAQYGIHY 215
+
+Query: 211 ESSYPYTAETGTQCNFNSANIGAKISNFTMI-PKNETVMAGYIVSTGPLAIAADAV--EW 267
+            ++YPY        +       AK      + P NE  +  Y ++  P+++  +A   ++
+Sbjct: 216 RNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALL-YSIANQPVSVVLEAAGKDF 274
+
+Query: 268 QFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR 327
+           Q Y GG+F  PC  N +DH +  VGY            Y ++KNSWG  WGE GYI ++R
+Sbjct: 275 QLYRGGIFVGPCG-NKVDHAVAAVGYGPN---------YILIKNSWGTGWGENGYIRIKR 324
+
+Query: 328 GK-NTCGVSNFVSTS 341
+           G  N+ GV    ++S
+Sbjct: 325 GTGNSYGVCGLYTSS 339
+
+
+>sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L
+          Length = 176
+
+ Score =  175 bits (439), Expect = 1e-43
+ Identities = 88/179 (49%), Positives = 117/179 (65%), Gaps = 12/179 (6%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 178
+           P + DWR +G VTPVK+QGQCGSCW+FSTTG +EGQHF ++ KLVSLSEQNLVDC     
+Sbjct: 2   PRSVDWREKGYVTPVKDQGQCGSCWAFSTTGALEGQHFRTKGKLVSLSEQNLVDCSRP-- 59
+
+Query: 179 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNF 238
+             EG    ++GCNGGL   A+ Y+  NGGI +E SYPYTA+    C + +    A  + F
+Sbjct: 60  --EG----NQGCNGGLMDQAFQYVQDNGGIDSEESYPYTAKDDEDCRYKAEYNAANDTGF 113
+
+Query: 239 TMIPK-NETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGY 293
+             IP+ +E  +   + S GP+++A DA    +QFY  G++  P C+   LDHG+L+VGY
+Sbjct: 114 VDIPQGHERALMKAVASVGPVSVAIDAGHSSFQFYQSGIYYEPDCSSEDLDHGVLVVGY 172
+
+
+>sp|P25326|CATS_BOVIN CATHEPSIN S
+          Length = 217
+
+ Score =  173 bits (434), Expect = 5e-43
+ Identities = 91/226 (40%), Positives = 131/226 (57%), Gaps = 17/226 (7%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DWR +G VT VK QG CGSCW+FS  G +E Q  +   KLVSLS QNLVDC    
+Sbjct: 1   LPDSMDWREKGCVTEVKYQGACGSCWAFSAVGALEAQVKLKTGKLVSLSAQNLVDC---- 56
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                 +  ++GCNGG    A+ YII N GI +E+SYPY A  G +C ++  N  A  S 
+Sbjct: 57  ---STAKYGNKGCNGGFMTEAFQYIIDNNGIDSEASYPYKAMDG-KCQYDVKNRAATCSR 112
+
+Query: 238 FTMIP-KNETVMAGYIVSTGPLAIAADAVEWQFYI--GGVFDIPCNPNSLDHGILIVGYS 294
+           +  +P  +E  +   + + GP+++  DA    F++   GV+  P    +++HG+L+VGY 
+Sbjct: 113 YIELPFGSEEALKEAVANKGPVSVGIDASHSSFFLYKTGVYYDPSCTQNVNHGVLVVGYG 172
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVS 339
+             +        YW+VKNSWG  +G+QGYI + R   N CG++N+ S
+Sbjct: 173 NLD-----GKDYWLVKNSWGLHFGDQGYIRMARNSGNHCGIANYPS 213
+
+
+>sp|P80884|ANAN_ANACO ANANAIN
+          Length = 216
+
+ Score =  167 bits (419), Expect = 3e-41
+ Identities = 93/223 (41%), Positives = 124/223 (54%), Gaps = 22/223 (9%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DWR  GAVT VKNQG+CGSCW+F++   VE  + I +  LVSLSEQ ++DC    
+Sbjct: 1   VPQSIDWRDSGAVTSVKNQGRCGSCWAFASIATVESIYKIKRGNLVSLSEQQVLDC---- 56
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                  A   GC GG    AY++II N G+ + + YPY A  GT C  N     A I+ 
+Sbjct: 57  -------AVSYGCKGGWINKAYSFIISNKGVASAAIYPYKAAKGT-CKTNGVPNSAYITR 108
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAK 296
+           +T + +N      Y VS  P+A A DA   +Q Y  GVF  PC    L+H I+I+GY   
+Sbjct: 109 YTYVQRNNERNMMYAVSNQPIAAALDASGNFQHYKRGVFTGPCG-TRLNHAIVIIGYGQD 167
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT----CGVS 335
+           +        +WIV+NSWGA WGE GYI L R  ++    CG++
+Sbjct: 168 SA----GKKFWIVRNSWGAGWGEGGYIRLARDVSSSFGICGIA 206
+
+
+>sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR
+          Length = 330
+
+ Score =  167 bits (418), Expect = 4e-41
+ Identities = 90/228 (39%), Positives = 132/228 (57%), Gaps = 18/228 (7%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           ++P + DWR +G VT VK QG CGSCW+FS  G +EGQ  +   KLVSLS QNLVDC  E
+Sbjct: 112 TLPDSVDWREKGCVTNVKYQGSCGSCWAFSAEGALEGQLKLKTGKLVSLSAQNLVDCSTE 171
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                 E+  ++GC GG    A+ YII +  I +E+SYPY A    +C ++  N  A  S
+Sbjct: 172 ------EKYGNKGCGGGFMTEAFQYII-DTSIDSEASYPYKA-MDEKCLYDPKNRAATCS 223
+
+Query: 237 NFTMIP-KNETVMAGYIVSTGPLAIAADAV---EWQFYIGGVFDIPCNPNSLDHGILIVG 292
+            +  +P  +E  +   + + GP+++  D      +  Y  GV+D P    +++HG+L+VG
+Sbjct: 224 RYIELPFGDEEALKEAVATKGPVSVGIDDASHSSFFLYQSGVYDDPSCTENMNHGVLVVG 283
+
+Query: 293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL-RRGKNTCGVSNFVS 339
+           Y   +        YW+VKNSWG  +G+QGYI + R  KN CG++++ S
+Sbjct: 284 YGTLD-----GKDYWLVKNSWGLHFGDQGYIRMARNNKNHCGIASYCS 326
+
+
+>sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE PRECURSOR
+          Length = 346
+
+ Score =  164 bits (410), Expect = 3e-40
+ Identities = 86/226 (38%), Positives = 127/226 (56%), Gaps = 21/226 (9%)
+
+Query: 116 NSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 175
+           +S+P + DWR +G +  VK+QG CGSCW+FS    +E  + I    L+SLSEQ LVDCD 
+Sbjct: 16  DSLPESIDWREKGVLVGVKDQGSCGSCWAFSAVAAMESINAIVTGNLISLSEQELVDCD- 74
+
+Query: 176 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI 235
+                    + +EGC+GGL   A+ ++IKNGGI TE  YPY    G    +       KI
+Sbjct: 75  --------RSYNEGCDGGLMDYAFEFVIKNGGIDTEEDYPYKERNGVCDQYRKNAKVVKI 126
+
+Query: 236 SNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+            ++  +P N        V+  P++IA +A   ++Q Y  G+F   C   ++DHG++I GY
+Sbjct: 127 DSYEDVPVNNEKALQKAVAHQPVSIALEAGGRDFQHYKSGIFTGKCG-TAVDHGVVIAGY 185
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNT----CGVS 335
+             +N      M YWIV+NSWGA+  E GY+ ++R  ++    CG++
+Sbjct: 186 GTEN-----GMDYWIVRNSWGANCRENGYLRVQRNVSSSSGLCGLA 226
+
+
+>sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR
+          Length = 308
+
+ Score =  162 bits (407), Expect = 7e-40
+ Identities = 105/312 (33%), Positives = 150/312 (47%), Gaps = 40/312 (12%)
+
+Query: 29  FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F ++    NK +++  EYL RF +F  N   +E          A+    +N FAD++ +E
+Sbjct: 18  FKQWAATHNKVFANRAEYLYRFAVFLDNKKFVE----------ANANTELNVFADMTHEE 67
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           F   +L       T ++P         + + P + DWR+   + P K+QGQCGSCW+F T
+Sbjct: 68  FIQTHLG-----MTYEVPETTSNVKAAVKAAPESVDWRS--IMNPAKDQGQCGSCWTFCT 120
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+           T  +EG+      KL S SEQ LVDCD          A D GC GG   N+  +I +N G
+Sbjct: 121 TAVLEGRVNKDLGKLYSFSEQQLVDCD----------ASDNGCEGGHPSNSLKFIQENNG 170
+
+Query: 208 IQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--V 265
+           +  ES YPY A  GT C     N+     +  +   +ET +   I   GP+A+  DA   
+Sbjct: 171 LGLESDYPYKAVAGT-CK-KVKNVATVTGSRRVTDGSETGLQTIIAENGPVAVGMDASRP 228
+
+Query: 266 EWQFYIGGVF--DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+            +Q Y  G    D  C    ++H +  VGY + +     N  YWI++NSWG  WG+ GY 
+Sbjct: 229 SFQLYKKGTIYSDTKCRSRMMNHCVTAVGYGSNS-----NGKYWIIRNSWGTSWGDAGYF 283
+
+Query: 324 YLRR-GKNTCGV 334
+            L R   N CG+
+Sbjct: 284 LLARDSNNMCGI 295
+
+
+>sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR
+          Length = 395
+
+ Score =  159 bits (398), Expect = 8e-39
+ Identities = 101/323 (31%), Positives = 155/323 (47%), Gaps = 21/323 (6%)
+
+Query: 26  QSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           ++++ ++     K Y  +E   R  IF+SN    E +N             +N  ADL+ 
+Sbjct: 88  ETEWKDYVTALGKHYDQKENNFRMAIFESNELMTERINKKYEQGLVSYTTALNDLADLTD 147
+
+Query: 86  DEF--KNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCW 143
+           +EF  +N      +         +++   +    +P   DWRT+GAVTPV+NQG+CGSC+
+Sbjct: 148 EEFMVRNGLRLPNQTDLRGKRQTSEFYRYDKSERLPDQVDWRTKGAVTPVRNQGECGSCY 207
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +F+T   +E  H     +L+ LS QN+VDC             + GC+GG  P A+ Y  
+Sbjct: 208 AFATAAALEAYHKQMTGRLLDLSPQNIVDCT--------RNLGNNGCSGGYMPTAFQYAS 259
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMI-PKNETVMAGYIVSTGP--LAI 260
+           +  GI  ES YPY   T  +C +  +      + F  I P +E  +   +   GP  + I
+Sbjct: 260 RY-GIAMESRYPYVG-TEQRCRWQQSIAVVTDNGFNEIQPGDELALKHAVAKRGPVVVGI 317
+
+Query: 261 AADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQ 320
+           +     ++FY  GV+    N    DH +L VGY    +       YWIVKNSWG DWG+ 
+Sbjct: 318 SGSKRSFRFYKDGVYS-EGNCGRPDHAVLAVGYGTHPSY----GDYWIVKNSWGTDWGKD 372
+
+Query: 321 GYIYLRRGK-NTCGVSNFVSTSI 342
+           GY+Y+ R + N C +++  S  I
+Sbjct: 373 GYVYMARNRGNMCHIASAASFPI 395
+
+
+>sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 315
+
+ Score =  156 bits (391), Expect = 5e-38
+ Identities = 108/309 (34%), Positives = 166/309 (52%), Gaps = 39/309 (12%)
+
+Query: 37  NKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADT-KFGVN-KFADLSSDEFKNYYLN 94
+           NK ++  E L R  IF  N        ++A N++ +T K  V+  FA ++++E+ N  L 
+Sbjct: 24  NKHFTAVESLRRRAIFNMNA------RIVAENNRKETFKLSVDGPFAAMTNEEY-NSLLK 76
+
+Query: 95  NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQ 154
+            K +   ++     YL+ +     P A DWR +G VTP+++QG CGSC++F +   +EG+
+Sbjct: 77  LKRS--GEEKGEVRYLNIQ----APKAVDWRKKGKVTPIRDQGNCGSCYTFGSIAALEGR 130
+
+Query: 155 HFISQ---NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTE 211
+             I +   ++ + LSE+++V C  E    +G    + GCNGGL  N YNYI++N GI  E
+Sbjct: 131 LLIEKGGDSETLDLSEEHMVQCTRE----DG----NNGCNGGLGSNVYNYIMEN-GIAKE 181
+
+Query: 212 SSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQF 269
+           S YPYT    T C  +     AKI ++  + +N  V     +S G + ++ DA  V++Q 
+Sbjct: 182 SDYPYTGSDST-CR-SDVKAFAKIKSYNRVARNNEVELKAAISQGLVDVSIDASSVQFQL 239
+
+Query: 270 YIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLR 326
+           Y  G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WGE+GYI + 
+Sbjct: 240 YKSGAYTDTQCKNNYFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGEKGYINMV 294
+
+Query: 327 RGKNTCGVS 335
+              NTCGV+
+Sbjct: 295 IEGNTCGVA 303
+
+
+>sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE PROTEINASE ACP3)
+          Length = 308
+
+ Score =  153 bits (384), Expect = 4e-37
+ Identities = 103/308 (33%), Positives = 159/308 (51%), Gaps = 37/308 (12%)
+
+Query: 37  NKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDEFKNYYLNN 95
+           NK ++  E L R  IF  N   + E N      K   K  V+  FA ++++E++   L +
+Sbjct: 17  NKHFTAVEALRRRAIFNMNARFVAEFN-----KKGSFKLSVDGPFAAMTNEEYRTL-LKS 70
+
+Query: 96  KEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQH 155
+           K  +  ++     YL+ +     P + DWR +G VTP+++Q QCGSC++F +   +EG+ 
+Sbjct: 71  KRTV--EENGKVTYLNIQ----APESVDWRAQGKVTPIRDQAQCGSCYTFGSLAALEGRL 124
+
+Query: 156 FISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTES 212
+            I +      + LSE++LV C          +  + GCNGGL  N Y+YII+N G+  ES
+Sbjct: 125 LIEKGGNANTLDLSEEHLVQCT--------RDNGNNGCNGGLGSNVYDYIIQN-GVAKES 175
+
+Query: 213 SYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFY 270
+            YPYT  T + C  N     AKI+ +  +P+N        +S G + ++ DA   ++Q Y
+Sbjct: 176 DYPYTG-TDSTCKTN-VKAFAKITGYNKVPRNNEAELKAALSQGLVDVSIDASSAKFQLY 233
+
+Query: 271 IGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR 327
+             G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WG++GYI +  
+Sbjct: 234 KSGAYSDTKCKNNFFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGDKGYINMVI 288
+
+Query: 328 GKNTCGVS 335
+             NTCGV+
+Sbjct: 289 EGNTCGVA 296
+
+
+>sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 315
+
+ Score =  153 bits (383), Expect = 5e-37
+ Identities = 102/316 (32%), Positives = 161/316 (50%), Gaps = 37/316 (11%)
+
+Query: 29  FLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDE 87
+           F  +  K NK ++  E L R  IF  N   ++  N I        K  V+  FA ++++E
+Sbjct: 16  FNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSVDGPFAAMTNEE 70
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           ++    + +    T++     YL+ +     P + DWR  G VTP+++Q QCGSC++F +
+Sbjct: 71  YRTLLKSKRT---TEENGQVKYLNIQ----APESVDWRKEGKVTPIRDQAQCGSCYTFGS 123
+
+Query: 148 TGNVEGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK 204
+              +EG+  I +      + LSE+++V C          +  + GCNGGL  N Y+YII+
+Sbjct: 124 LAALEGRLLIEKGGDANTLDLSEEHMVQCT--------RDNGNNGCNGGLGSNVYDYIIE 175
+
+Query: 205 NGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA 264
+           + G+  ES YPYT    T C  N  +  AKI+ +T +P+N        +S G + ++ DA
+Sbjct: 176 H-GVAKESDYPYTGSDST-CKTNVKSF-AKITGYTKVPRNNEAELKAALSQGLVDVSIDA 232
+
+Query: 265 --VEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+              ++Q Y  G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WG+
+Sbjct: 233 SSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGD 287
+
+Query: 320 QGYIYLRRGKNTCGVS 335
+           +GYI +    NTCGV+
+Sbjct: 288 KGYINMVIEGNTCGVA 303
+
+
+>sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR
+          Length = 506
+
+ Score =  153 bits (382), Expect = 6e-37
+ Identities = 115/358 (32%), Positives = 176/358 (49%), Gaps = 62/358 (17%)
+
+Query: 27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           S+F ++  + NKKY + +E L+RFE FK    K ++ N +   +       VN+++D S 
+Sbjct: 160 SKFFKYMKENNKKYENMDEQLQRFENFKIRYMKTQKHNEMVGKNGLTYVQKVNQYSDFSK 219
+
+Query: 86  DEFKNYYLNNKEAIFTDDL------PVADYLDDEFINSI-------PTAFDWRTRGAVTP 132
+           +EF NY+   K      DL      P+  +L +  + S+       P + D+R++    P
+Sbjct: 220 EEFDNYF--KKLLSVPMDLKSKYIVPLKKHLANTNLISVDNKSKDFPDSRDYRSKFNFLP 277
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQNKL-VSLSEQNLVDCDHECMEYEGEEACDEGCN 191
+            K+QG CGSCW+F+  GN E  +  +++++ +S SEQ +VDC  E          + GC+
+Sbjct: 278 PKDQGNCGSCWAFAAIGNFEYLYVHTRHEMPISFSEQQMVDCSTE----------NYGCD 327
+
+Query: 192 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGY 251
+           GG    A+ Y+I NG +     YPY       C     ++  ++     +  NE +MA  
+Sbjct: 328 GGNPFYAFLYMINNG-VCLGDEYPYKGHEDFFCLNYRCSLLGRVHFIGDVKPNELIMALN 386
+
+Query: 252 IVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSA--------------- 295
+            V  GP+ IA  A E +  Y GGVFD  CNP  L+H +L+VGY                 
+Sbjct: 387 YV--GPVTIAVGASEDFVLYSGGVFDGECNPE-LNHSVLLVGYGQVKKSLAFEDSHSNVD 443
+
+Query: 296 KNTI--FRKNMP---------YWIVKNSWGADWGEQGYIYLRRGK----NTCGVSNFV 338
+            N I  +++N+          YWIV+NSWG +WGE GYI ++R K      CGV + V
+Sbjct: 444 SNLIKKYKENIKGDDDDDIIYYWIVRNSWGPNWGEGGYIRIKRNKAGDDGFCGVGSDV 501
+
+
+>sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR
+          Length = 310
+
+ Score =  150 bits (374), Expect = 5e-36
+ Identities = 102/322 (31%), Positives = 160/322 (49%), Gaps = 32/322 (9%)
+
+Query: 20  GIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN- 78
+           GI       F  +  K NK ++  E L R  IF  N   ++  N I        K  V+ 
+Sbjct: 3   GIRIASAIDFNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSVDG 57
+
+Query: 79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQ 138
+            FA ++++E++    + +    T++     YL+ +     P + DWR  G VTP+++Q Q
+Sbjct: 58  PFAAMTNEEYRTLLKSKRT---TEENGQVKYLNIQ----APESVDWRKEGKVTPLRDQAQ 110
+
+Query: 139 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 198
+           CGSC++F +   +EG+  I +       + N +D   E M+   +   + GCNGGL  N 
+Sbjct: 111 CGSCYTFGSLAALEGRLLIEKG-----GDANTLDLSEEHMQCTRDNG-NNGCNGGLGSNV 164
+
+Query: 199 YNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+           Y+YII++ G+  ES YPYT    T C  N  +   KI+ +T +P+N        +S G L
+Sbjct: 165 YDYIIEH-GVAKESDYPYTGSDST-CKTNVKSF-RKITGYTKVPRNNEAELKAALSQGLL 221
+
+Query: 259 AIAAD--AVEWQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSW 313
+            ++ D  + ++Q Y  G + D  C  N  +L+H +  VGY   +         WIV+NSW
+Sbjct: 222 DVSIDVSSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKECWIVRNSW 276
+
+Query: 314 GADWGEQGYIYLRRGKNTCGVS 335
+           G  WG++GYI +    NTCGV+
+Sbjct: 277 GTSWGDKGYINMVIEGNTCGVA 298
+
+
+>sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR
+          Length = 441
+
+ Score =  146 bits (366), Expect = 5e-35
+ Identities = 107/339 (31%), Positives = 164/339 (47%), Gaps = 54/339 (15%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV--NKFADLS 84
+           +F  F +K+ K + S ++ ++RF  F+ N   ++        HK    + +  NKF+DLS
+Sbjct: 119 EFDAFVEKYKKVHRSFDQRVQRFLTFRKNYHIVK-------THKPTEPYSLDLNKFSDLS 171
+
+Query: 85  SDEFKNYY--------------------LNNKEAIFTDDLPVADYLDDEFINSIPTA--F 122
+            +EFK  Y                    +++K  I+   L  A  +++    S+ T    
+Sbjct: 172 DEEFKALYPVITPPKTYTSLSKHLEFKKMSHKNPIYISKLKKAKGIEEIKDLSLITGENL 231
+
+Query: 123 DWRTRGAVTPVKNQGQ-CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYE 181
+           +W    AV+P K+QG  CGSCW+FS+  +VE  + + +NK   LSEQ LV+CD   M   
+Sbjct: 232 NWARTDAVSPTKDQGDHCGSCWAFSSIASVESLYRLYKNKSYFLSEQELVNCDKSSM--- 288
+
+Query: 182 GEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMI 241
+                  GC GGL   A  Y I + G+  ES  PYT    + C  +  N    I + +++
+Sbjct: 289 -------GCAGGLPITALEY-IHSKGVSFESEVPYTGIV-SPCKPSIKN-KVFIDSISIL 338
+
+Query: 242 PKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFR 301
+             N+ V    ++S   + IA    E + Y GG+F   C    L+H +L+VG    +    
+Sbjct: 339 KGNDVVNKSLVISPTVVGIAV-TKELKLYSGGIFTGKCG-GELNHAVLLVGEGVDH---E 393
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRR---GKNTCGVSNF 337
+             M YWI+KNSWG DWGE G++ L+R   G + CG+  F
+Sbjct: 394 TGMRYWIIKNSWGEDWGENGFLRLQRTKKGLDKCGILTF 432
+
+
+>sp|P14518|BROM_ANACO BROMELAIN, STEM
+          Length = 212
+
+ Score =  146 bits (365), Expect = 6e-35
+ Identities = 81/224 (36%), Positives = 115/224 (51%), Gaps = 27/224 (12%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           ++P + DWR  GAVT VKNQ  CG+CW+F+    VE  + I +  L  LSEQ ++DC   
+Sbjct: 1   AVPQSIDWRDYGAVTSVKNQNPCGACWAFAAIATVESIYKIKKGILEPLSEQQVLDC--- 57
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                   A   GC GG +  A+ +II N G+ + + YPY A  GT C  +     A I+
+Sbjct: 58  --------AKGYGCKGGWEFRAFEFIISNKGVASGAIYPYKAAKGT-CKTDGVPNSAYIT 108
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 295
+            +  +P+N      Y VS  P+ +A DA   +Q+Y  GVF+ PC   SL+H +  +GY  
+Sbjct: 109 GYARVPRNNESSMMYAVSKQPITVAVDANANFQYYKSGVFNGPCG-TSLNHAVTAIGYGQ 167
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR----GKNTCGVS 335
+            + I+ K          WGA WGE GYI + R        CG++
+Sbjct: 168 DSIIYPK---------KWGAKWGEAGYIRMARDVSSSSGICGIA 202
+
+
+>sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR
+          Length = 439
+
+ Score =  145 bits (363), Expect = 1e-34
+ Identities = 104/343 (30%), Positives = 159/343 (46%), Gaps = 64/343 (18%)
+
+Query: 24  EEQSQFLEFQDKFNKKYS-HEEYLERFEIFKSNLGKIEELNLIAINHKADTKF--GVNKF 80
+           E   +F EF  K+N++++  +E L R   F+SN  +++E        K D  +  G+N+F
+Sbjct: 119 EVYREFEEFNSKYNRRHATQQERLNRLVTFRSNYLEVKE-------QKGDEPYVKGINRF 171
+
+Query: 81  ADLSSDEF--------------------------KNYYLNNKEAIFTDDLPVADYLDDEF 114
+           +DL+  EF                          K Y  N K+A+ TD+       D + 
+Sbjct: 172 SDLTEREFYKLFPVMKPPKATYSNGYYLLSHMANKTYLKNLKKALNTDE-------DVDL 224
+
+Query: 115 INSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 174
+                   DWR   +VT VK+Q  CG CW+FST G+VEG +    +K   LS Q L+DCD
+Sbjct: 225 AKLTGENLDWRRSSSVTSVKDQSNCGGCWAFSTVGSVEGYYMSHFDKSYELSVQELLDCD 284
+
+Query: 175 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAK 234
+                     +   GC GGL  +AY Y+ K  G+ +    P+  +   +C+   A     
+Sbjct: 285 ----------SFSNGCQGGLLESAYEYVRKY-GLVSAKDLPF-VDKARRCSVPKAK-KVS 331
+
+Query: 235 ISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYS 294
+           + ++ +  K + VM   + S+      + + E   Y  GVF   C   SL+H +++VG  
+Sbjct: 332 VPSYHVF-KGKEVMTRSLTSSPCSVYLSVSPELAKYKSGVFTGECG-KSLNHAVVLVGEG 389
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR---GKNTCGV 334
+                 ++   YW+V+NSWG DWGE GY+ L R   G + CGV
+Sbjct: 390 YDEVTKKR---YWVVQNSWGTDWGENGYMRLERTNMGTDKCGV 429
+
+
+>sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR (DER F I)
+          Length = 321
+
+ Score =  144 bits (359), Expect = 3e-34
+ Identities = 116/346 (33%), Positives = 158/346 (45%), Gaps = 48/346 (13%)
+
+Query: 7   FVLAVFTVFVSSRGIP-PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLI 65
+           FVLA+ ++ V S     P     F EF+  FNK Y+    +E  E+ + N   +E L  +
+Sbjct: 3   FVLAIASLLVLSTVYARPASIKTFEEFKKAFNKNYAT---VEEEEVARKNF--LESLKYV 57
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF----INSI--P 119
+             N     K  +N  +DLS DEFKN YL + EA   + L     L+ E     INS+  P
+Sbjct: 58  EAN-----KGAINHLSDLSLDEFKNRYLMSAEAF--EQLKTQFDLNAETSACRINSVNVP 110
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+           +  D R+   VTP++ QG CGSCW+FS     E  +   +N  + LSEQ LVDC      
+Sbjct: 111 SELDLRSLRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNTSLDLSEQELVDC------ 164
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                A   GC+G   P    YI +NG ++ E SYPY A        NS + G  ISN+ 
+Sbjct: 165 -----ASQHGCHGDTIPRGIEYIQQNGVVE-ERSYPYVAREQRCRRPNSQHYG--ISNYC 216
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVF---DIPCNPNSLDHGILIV 291
+            I   +       ++    AIA      D   +Q Y G      D    PN   H + IV
+Sbjct: 217 QIYPPDVKQIREALTQTHTAIAVIIGIKDLRAFQHYDGRTIIQHDNGYQPNY--HAVNIV 274
+
+Query: 292 GYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNF 337
+           GY +      +   YWIV+NSW   WG+ GY Y + G N   +  +
+Sbjct: 275 GYGS-----TQGDDYWIVRNSWDTTWGDSGYGYFQAGNNLMMIEQY 315
+
+
+>sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR (TCP)
+          Length = 569
+
+ Score =  144 bits (359), Expect = 3e-34
+ Identities = 102/363 (28%), Positives = 167/363 (45%), Gaps = 62/363 (17%)
+
+Query: 27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           S+F +F  + NK Y + +E + +FEIFK N   I+  N   +N  A  K  VN+F+D S 
+Sbjct: 223 SKFFKFMKEHNKVYKNIDEQMRKFEIFKINYISIKNHN--KLNKNAMYKKKVNQFSDYSE 280
+
+Query: 86  DEFKNY--------------YLNNKEAIFTDDLPVADYL------DDEFINSIPTAFDWR 125
+           +E K Y              Y    E    D++ ++++       + +  + +P   D+R
+Sbjct: 281 EELKEYFKTLLHVPNHMIEKYSKPFENHLKDNILISEFYTNGKRNEKDIFSKVPEILDYR 340
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G V   K+QG CGSCW+F++ GN+E         ++S SEQ +VDC  +         
+Sbjct: 341 EKGIVHEPKDQGLCGSCWAFASVGNIESVFAKKNKNILSFSEQEVVDCSKD--------- 391
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNE 245
+            + GC+GG    ++ Y+++N  +     Y Y A+    C          +S+   + +N+
+Sbjct: 392 -NFGCDGGHPFYSFLYVLQN-ELCLGDEYKYKAKDDMFCLNYRCKRKVSLSSIGAVKENQ 449
+
+Query: 246 TVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGY----------- 293
+            ++A  +   GPL++      ++  Y  GV++  C+   L+H +L+VGY           
+Sbjct: 450 LILA--LNEVGPLSVNVGVNNDFVAYSEGVYNGTCS-EELNHSVLLVGYGQVEKTKLNYN 506
+
+Query: 294 ---SAKNTIFRKNMP------YWIVKNSWGADWGEQGYIYLRRGKN----TCGVSNFVST 340
+                 NT    N P      YWI+KNSW   WGE G++ L R KN     CG+   V  
+Sbjct: 507 NKIQTYNTKENSNQPDDNIIYYWIIKNSWSKKWGENGFMRLSRNKNGDNVFCGIGEEVFY 566
+
+Query: 341 SII 343
+            I+
+Sbjct: 567 PIL 569
+
+
+>sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR
+          Length = 583
+
+ Score =  132 bits (329), Expect = 1e-30
+ Identities = 102/367 (27%), Positives = 165/367 (44%), Gaps = 86/367 (23%)
+
+Query: 27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           S+F  F +K+ + Y    E +E+++ FK N  KI++ N          K  VN+F+D S 
+Sbjct: 235 SKFFNFMNKYKRSYKDINEQMEKYKNFKMNYLKIKKHN----ETNQMYKMKVNQFSDYSK 290
+
+Query: 86  DEFKNYYLNNKEAIFTDDLPVADYLDDEFI------------------------NSIPTA 121
+            +F++Y        F   +P+ D+L  +++                          +P  
+Sbjct: 291 KDFESY--------FRKLVPIPDHLKKKYVVPFSSMNNGKGKNVVTSSSGANLLADVPEI 342
+
+Query: 122 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK-LVSLSEQNLVDCDHECMEY 180
+            D+R +G V   K+QG CGSCW+F++ GNVE  +    NK +++LSEQ +VDC       
+Sbjct: 343 LDYREKGIVHEPKDQGLCGSCWAFASVGNVECMYAKEHNKTILTLSEQEVVDC------- 395
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQC-NFNSANIGAKISNFT 239
+                 + GC+GG    ++ Y I+N GI     Y Y A     C N+   N    +S+  
+Sbjct: 396 ---SKLNFGCDGGHPFYSFIYAIEN-GICMGDDYKYKAMDNLFCLNYRCKN-KVTLSSVG 450
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYS--AK 296
+            + +NE + A  +   GP+++      ++ FY GG+F+  C    L+H +L+VGY     
+Sbjct: 451 GVKENELIRA--LNEVGPVSVNVGVTDDFSFYGGGIFNGTCT-EELNHSVLLVGYGQVQS 507
+
+Query: 297 NTIFRKN-------------------------MPYWIVKNSWGADWGEQGYIYLRRGKN- 330
+           + IF++                            YWI+KNSW   WGE G++ + R K  
+Sbjct: 508 SKIFQEKNAYDDASGVTKKGALSYPSKADDGIQYYWIIKNSWSKFWGENGFMRISRNKEG 567
+
+Query: 331 ---TCGV 334
+               CG+
+Sbjct: 568 DNVFCGI 574
+
+
+>sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR (DER P I)
+          Length = 320
+
+ Score =  123 bits (305), Expect = 7e-28
+ Identities = 110/338 (32%), Positives = 151/338 (44%), Gaps = 51/338 (15%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           MK++L     +    V +R   P     F E++  FNK Y+     E  E  + N   +E
+Sbjct: 1   MKIVLAIASLLALSAVYAR---PSSIKTFEEYKKAFNKSYAT---FEDEEAARKNF--LE 52
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF----IN 116
+            +  +  N  A     +N  +DLS DEFKN +L + EA   + L     L+ E     IN
+Sbjct: 53  SVKYVQSNGGA-----INHLSDLSLDEFKNRFLMSAEAF--EHLKTQFDLNAETNACSIN 105
+
+Query: 117 -SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 175
+            + P   D R    VTP++ QG CGSCW+FS     E  +   +N+ + L+EQ LVDC  
+Sbjct: 106 GNAPAEIDLRQMRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNQSLDLAEQELVDC-- 163
+
+Query: 176 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI 235
+                    A   GC+G   P    YI  NG +Q ES Y Y A   +    N+   G  I
+Sbjct: 164 ---------ASQHGCHGDTIPRGIEYIQHNGVVQ-ESYYRYVAREQSCRRPNAQRFG--I 211
+
+Query: 236 SNFTMI-PKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVF---DIPCNPNSLDH 286
+           SN+  I P N   +   +  T   AIA      D   ++ Y G      D    PN   H
+Sbjct: 212 SNYCQIYPPNVNKIREALAQTHS-AIAVIIGIKDLDAFRHYDGRTIIQRDNGYQPNY--H 268
+
+Query: 287 GILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIY 324
+            + IVGYS       + + YWIV+NSW  +WG+ GY Y
+Sbjct: 269 AVNIVGYSN-----AQGVDYWIVRNSWDTNWGDNGYGY 301
+
+
+>sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  117 bits (291), Expect = 3e-26
+ Identities = 81/255 (31%), Positives = 128/255 (49%), Gaps = 32/255 (12%)
+
+Query: 105 PVADYLDDEFINSIPTAFDWRT-RGA--VTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK 161
+           P+ D +  + + S+P ++DWR  RG   V+PV+NQ  CGSC+SF++ G +E +  I  N 
+Sbjct: 218 PITDEIQQQIL-SLPESWDWRNVRGINFVSPVRNQESCGSCYSFASIGMLEARIRILTNN 276
+
+Query: 162 LVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAE 219
+             +  LS Q +V C              +GC+GG          ++ G+  E+ +PYTA 
+Sbjct: 277 SQTPILSPQEVVSCSPYA----------QGCDGGFPYLIAGKYAQDFGVVEENCFPYTA- 325
+
+Query: 220 TGTQCNFNSANIGAKISNFTMIPK-----NETVMAGYIVSTGPLAIAADAVE-WQFYIGG 273
+           T   C      +    S +  +       NE +M   +V  GP+A+A +  + +  Y  G
+Sbjct: 326 TDAPCKPKENCLRYYSSEYYYVGGFYGGCNEALMKLELVKHGPMAVAFEVHDDFLHYHSG 385
+
+Query: 274 VF-----DIPCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRR 327
+           ++       P NP  L +H +L+VGY  K+ +    + YWIVKNSWG+ WGE GY  +RR
+Sbjct: 386 IYHHTGLSDPFNPFELTNHAVLLVGYG-KDPV--TGLDYWIVKNSWGSQWGESGYFRIRR 442
+
+Query: 328 GKNTCGVSNFVSTSI 342
+           G + C + +    +I
+Sbjct: 443 GTDECAIESIAMAAI 457
+
+
+>sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  116 bits (287), Expect = 8e-26
+ Identities = 90/331 (27%), Positives = 156/331 (46%), Gaps = 42/331 (12%)
+
+Query: 34  DKFNKKYSH-----EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEF 88
+           +K N   +H     E Y ER  ++  N   ++ +N +    K+ T     ++  +S  + 
+Sbjct: 147 EKVNMNAAHLGGLQERYSER--LYTHNHNFVKAINTV---QKSWTATAYKEYEKMSLRDL 201
+
+Query: 89  KNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRT-RGA--VTPVKNQGQCGSCWSF 145
+                +++        P+ D +  + +N +P ++DWR  +G   V+PV+NQ  CGSC+SF
+Sbjct: 202 IRRSGHSQRIPRPKPAPMTDEIQQQILN-LPESWDWRNVQGVNYVSPVRNQESCGSCYSF 260
+
+Query: 146 STTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           ++ G +E +  I  N   +  LS Q +V C              +GC+GG          
+Sbjct: 261 ASMGMLEARIRILTNNSQTPILSPQEVVSCSPYA----------QGCDGGFPYLIAGKYA 310
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK-----NETVMAGYIVSTGPL 258
+           ++ G+  ES +PYTA+  + C      +    S++  +       NE +M   +V  GP+
+Sbjct: 311 QDFGVVEESCFPYTAKD-SPCKPRENCLRYYSSDYYYVGGFYGGCNEALMKLELVKHGPM 369
+
+Query: 259 AIAADAVE-WQFYIGGVF-----DIPCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKN 311
+           A+A +  + +  Y  G++       P NP  L +H +L+VGY          + YWI+KN
+Sbjct: 370 AVAFEVHDDFLHYHSGIYHHTGLSDPFNPFELTNHAVLLVGYGRDPVT---GIEYWIIKN 426
+
+Query: 312 SWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           SWG++WGE GY  +RRG + C + +    +I
+Sbjct: 427 SWGSNWGESGYFRIRRGTDECAIESIAVAAI 457
+
+
+>sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 463
+
+ Score =  113 bits (281), Expect = 4e-25
+ Identities = 75/236 (31%), Positives = 113/236 (47%), Gaps = 31/236 (13%)
+
+Query: 118 IPTAFDWRTRGA---VTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVD 172
+           +PT++DWR       V+PV+NQ  CGSC+SF++ G +E +  I  N   +  LS Q +V 
+Sbjct: 231 LPTSWDWRNVHGINFVSPVRNQASCGSCYSFASMGMLEARIRILTNNSQTPILSPQEVVS 290
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 232
+           C              +GC GG          ++ G+  E+ +PYT  T + C        
+Sbjct: 291 CSQYA----------QGCEGGFPYLIAGKYAQDFGLVEEACFPYTG-TDSPCKMKEDCFR 339
+
+Query: 233 AKISNFTMIPK-----NETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDI-----PCNP 281
+              S +  +       NE +M   +V  GP+A+A +  + +  Y  G++       P NP
+Sbjct: 340 YYSSEYHYVGGFYGGCNEALMKLELVHHGPMAVAFEVYDDFLHYKKGIYHHTGLRDPFNP 399
+
+Query: 282 NSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+             L +H +L+VGY   +      M YWIVKNSWG  WGE GY  +RRG + C + +
+Sbjct: 400 FELTNHAVLLVGYGTDSA---SGMDYWIVKNSWGTGWGENGYFRIRRGTDECAIES 452
+
+
+>sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR
+          Length = 454
+
+ Score =  113 bits (279), Expect = 7e-25
+ Identities = 75/242 (30%), Positives = 113/242 (45%), Gaps = 35/242 (14%)
+
+Query: 117 SIPTAFDWRT-----RGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQN 169
+           ++P  FDW +     R  VTP++NQG CGSC++  +   +E +  +  N  +   LS Q 
+Sbjct: 217 NLPLEFDWTSPPDGSRSPVTPIRNQGICGSCYASPSAAALEARIRLVSNFSEQPILSPQT 276
+
+Query: 170 LVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSA 229
+           +VDC              EGCNGG          ++ G+  +   PYT E   +C  +  
+Sbjct: 277 VVDCS----------PYSEGCNGGFPFLIAGKYGEDFGLPQKIVIPYTGEDTGKCTVSKN 326
+
+Query: 230 NIGAKISNFTMI-----PKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPC---- 279
+                 ++++ I       NE +M   ++S GP  +  +  E +QFY  G++        
+Sbjct: 327 CTRYYTTDYSYIGGYYGATNEKLMQLELISNGPFPVGFEVYEDFQFYKEGIYHHTTVQTD 386
+
+Query: 280 ----NPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGV 334
+               NP  L +H +L+VGY           PYW VKNSWG +WGEQGY  + RG + CGV
+Sbjct: 387 HYNFNPFELTNHAVLLVGYGVDKL---SGEPYWKVKNSWGVEWGEQGYFRILRGTDECGV 443
+
+Query: 335 SN 336
+            +
+Sbjct: 444 ES 445
+
+
+>sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN) (PDP)
+          Length = 139
+
+ Score =  113 bits (279), Expect = 7e-25
+ Identities = 55/141 (39%), Positives = 84/141 (59%), Gaps = 5/141 (3%)
+
+Query: 192 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGY 251
+           GGL  +A+ Y+  NGG+ +E SYPY A+ G  C +   N  A ++++  IP  E  +   
+Sbjct: 1   GGLIDDAFQYVKDNGGLDSEESYPYHAQ-GDSCKYRPENSVANVTDYWDIPSKENELMIT 59
+
+Query: 252 IVSTGPLAIAADAV--EWQFYIGGVF-DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWI 308
+           + + GP++ A DA    ++FY  G++ D  C+   +DHG+L+VGY A  T   +N  YWI
+Sbjct: 60  LAAVGPISAAIDASLDTFRFYKEGIYYDPSCSSEDVDHGVLVVGYGADGTE-TENKKYWI 118
+
+Query: 309 VKNSWGADWGEQGYIYLRRGK 329
+           +KNSWG DWG  GYI + + +
+Sbjct: 119 IKNSWGTDWGMDGYIKMAKDR 139
+
+
+>sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)
+          Length = 211
+
+ Score =  105 bits (260), Expect = 1e-22
+ Identities = 73/222 (32%), Positives = 102/222 (45%), Gaps = 29/222 (13%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           S+P+  D R+   VTP++ QG CGSCW+FS   + E  +   +N  + L+EQ LVDC   
+Sbjct: 10  SLPSELDLRSLRTVTPIRMQGGCGSCWAFSGVASTESAYLAYRNMSLDLAEQELVDC--- 66
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                   A   GC+G   P    YI +NG +Q E  YPY A   +    N+   G K  
+Sbjct: 67  --------ASQNGCHGDTIPRGIEYIQQNGVVQ-EHYYPYVAREQSCHRPNAQRYGLK-- 115
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVF---DIPCNPNSLDHGI 288
+           N+  I   ++      ++    A+A      D   ++ Y G      D    PN   H +
+Sbjct: 116 NYCQISPPDSNKIRQALTQTHTAVAVIIGIKDLNAFRHYDGRTIMQHDNGYQPNY--HAV 173
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKN 330
+            IVGY   NT   + + YWIV+NSW   WG+ GY Y     N
+Sbjct: 174 NIVGYG--NT---QGVDYWIVRNSWDTTWGDNGYGYFAANIN 210
+
+
+>sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)
+          Length = 151
+
+ Score = 95.6 bits (234), Expect = 1e-19
+ Identities = 61/157 (38%), Positives = 86/157 (53%), Gaps = 17/157 (10%)
+
+Query: 41  SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF 100
+           +H+E++ R+E FK N+  +   N    +  + T  G+N+ ADLS++E++  YL  +  I 
+Sbjct: 1   THKEFMPRYEEFKKNMDYVHNWN----SKGSKTVLGLNQHADLSNEEYRLNYLGTRAHIK 56
+
+Query: 101 TDDLPVADY---LDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFI 157
+            +     +    L+       P   DWR + AVTPVK+QGQCGSC   STTG+VEG   I
+Sbjct: 57  LNGYHKRNLGLRLNRPHFKQ-PLNVDWREKDAVTPVKDQGQCGSC-IISTTGSVEGVTAI 114
+
+Query: 158 SQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL 194
+              KLVSLSEQN++               +EGCNGGL
+Sbjct: 115 KTGKLVSLSEQNILRL--------SSSFGNEGCNGGL 143
+
+
+>sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 335
+
+ Score = 94.4 bits (231), Expect = 3e-19
+ Identities = 82/300 (27%), Positives = 129/300 (42%), Gaps = 60/300 (20%)
+
+Query: 82  DLSSDEFKNYYLNNK-EAIFTDDLPVADYLDDEFINSIPTAFDWRTRG----AVTPVKNQ 136
+           D++ ++ K   +  +  A  T D+ V  +  +E  ++IP  FD RT+     ++  +++Q
+Sbjct: 46  DITIEQVKKRLMRTEFVAPHTPDVEVVKHDINE--DTIPATFDARTQWPNCMSINNIRDQ 103
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGL 194
+             CGSCW+F+       +  I+ N  V+  LS ++++ C   C        C  GC GG 
+Sbjct: 104 SDCGSCWAFAAAEAASDRFCIASNGAVNTLLSAEDVLSC---CSN------CGYGCEGGY 154
+
+Query: 195 QPNAYNYIIKNGGIQTESSY-------PYT-AETG------------------------- 221
+             NA+ Y++K+G   T  SY       PY+ A  G                         
+Sbjct: 155 PINAWKYLVKSG-FCTGGSYEAQFGCKPYSLAPCGETVGNVTWPSCPDDGYDTPACVNKC 213
+
+Query: 222 TQCNFNSANIGAKISNFTM--IPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIP 278
+           T  N+N A    K    T   + K  + +   I++ GP+  A    E +  Y  GV+   
+Sbjct: 214 TNKNYNVAYTADKHFGSTAYAVGKKVSQIQAEIIAHGPVEAAFTVYEDFYQYKTGVYVHT 273
+
+Query: 279 CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 338
+                  H I I+G+   N       PYW+V NSW  +WGE GY  + RG N CG+ + V
+Sbjct: 274 TGQELGGHAIRILGWGTDN-----GTPYWLVANSWNVNWGENGYFRIIRGTNECGIEHAV 328
+
+
+>sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)
+          Length = 96
+
+ Score = 90.5 bits (221), Expect = 5e-18
+ Identities = 43/87 (49%), Positives = 55/87 (62%), Gaps = 2/87 (2%)
+
+Query: 256 GPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSW 313
+           GPLA+A +A   Q YIGGV         L+HG+L+VGY +     I  K  PYW++KNSW
+Sbjct: 1   GPLAVAINAAYMQTYIGGVSCPYICSRRLNHGVLLVGYGSAGYAPIRLKEKPYWVIKNSW 60
+
+Query: 314 GADWGEQGYIYLRRGKNTCGVSNFVST 340
+           G +WGE GY  + RG+N CGV + VST
+Sbjct: 61  GENWGENGYYKICRGRNICGVDSMVST 87
+
+
+>sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR
+          Length = 329
+
+ Score = 90.5 bits (221), Expect = 5e-18
+ Identities = 69/288 (23%), Positives = 118/288 (40%), Gaps = 46/288 (15%)
+
+Query: 82  DLSSDEFKNYYLNNK-EAIFTDDLPVADYLDDEFINSIPTAFDWRTRGA----VTPVKNQ 136
+           +++ +E K   ++ K  A  +D++   +   +  + S+P  FD RT+ +    +  +++Q
+Sbjct: 50  EITEEEMKFKLMDGKYAAAHSDEIRATE--QEVVLASVPATFDSRTQWSECKSIKLIRDQ 107
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGL 194
+             CGSCW+F     +  +  I         +S  +L+ C   C       +C  GC GG 
+Sbjct: 108 ATCGSCWAFGAAEMISDRTCIETKGAQQPIISPDDLLSC---C-----GSSCGNGCEGGY 159
+
+Query: 195 QPNAYNY-----IIKNGGIQTESSYPY--------------TAETGTQCNFNSANIGAKI 235
+              A  +     ++  G        PY              T      C    +   AK 
+Sbjct: 160 PIQALRWWDSKGVVTGGDYHGAGCKPYPIAPCTSGNCPESKTPSCSMSCQSGYSTAYAKD 219
+
+Query: 236 SNFTM----IPKNETVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILI 290
+            +F +    +PKN   +   I + GP+  A    E +  Y  GV+          H I I
+Sbjct: 220 KHFGVSAYAVPKNAASIQAEIYANGPVEAAFSVYEDFYKYKSGVYKHTAGKYLGGHAIKI 279
+
+Query: 291 VGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 338
+           +G+  ++       PYW+V NSWG +WGE G+  + RG + CG+ + V
+Sbjct: 280 IGWGTES-----GSPYWLVANSWGVNWGESGFFKIYRGDDQCGIESAV 322
+
+
+>sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score = 89.3 bits (218), Expect = 1e-17
+ Identities = 69/272 (25%), Positives = 113/272 (41%), Gaps = 55/272 (20%)
+
+Query: 108 DYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+           D +  E  ++IP  FD    W    ++  +++Q  CGSCW+F+    +  +  I+ N  V
+Sbjct: 72  DIVATEVSDAIPDHFDARDQWPNCMSINNIRDQSDCGSCWAFAAAEAISDRTCIASNGAV 131
+
+Query: 164 S--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSY------- 214
+           +  LS ++L+ C        G  +C  GC GG    A+ + +K+G + T  SY       
+Sbjct: 132 NTLLSSEDLLSC------CTGMFSCGNGCEGGYPIQAWKWWVKHG-LVTGGSYETQFGCK 184
+
+Query: 215 -------------------PYTAETGTQC--------NFNSANIGAKISNFTM--IPKNE 245
+                              P   E   +C        N+ +  +  K    T   + K  
+Sbjct: 185 PYSIAPCGETVNGVKWPACPEDTEPTPKCVDSCTSKNNYATPYLQDKHFGSTAYAVGKKV 244
+
+Query: 246 TVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNM 304
+             +   I++ GP+ +A    E +  Y  GV+      +   H + I+G+   N       
+Sbjct: 245 EQIQTEILTNGPIEVAFTVYEDFYQYTTGVYVHTAGASLGGHAVKILGWGVDN-----GT 299
+
+Query: 305 PYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+           PYW+V NSW   WGE+GY  + RG N CG+ +
+Sbjct: 300 PYWLVANSWNVAWGEKGYFRIIRGLNECGIEH 331
+
+
+>sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)
+          Length = 339
+
+ Score = 87.8 bits (214), Expect = 3e-17
+ Identities = 68/264 (25%), Positives = 114/264 (42%), Gaps = 51/264 (19%)
+
+Query: 117 SIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNL 170
+           ++P +FD    W     +  +++QG CGSCW+F     +  +  I  N  V++  S ++L
+Sbjct: 79  NLPESFDAREQWSNCPTIAQIRDQGSCGSCWAFGAVEAMSDRICIHTNGRVNVEVSAEDL 138
+
+Query: 171 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK----NGGIQTE--------------- 211
+           + C   C        C +GCNGG    A+N+  +    +GG+                  
+Sbjct: 139 LTC---C-----GIQCGDGCNGGYPSGAWNFWTRKGLVSGGVYNSHIGCLPYTIPPCEHH 190
+
+Query: 212 ---SSYPYTAETGT-QCN------FNSANIGAKISNFTM--IPKNETVMAGYIVSTGPLA 259
+              S  P T E  T +CN      ++++    K   +T   +  +E  +   I   GP+ 
+Sbjct: 191 VNGSRPPCTGEGDTPKCNKMCEAGYSTSYKEDKHYGYTSYSVSDSEKEIMAEIYKNGPVE 250
+
+Query: 260 IAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+            A     ++  Y  GV+          H I I+G+  +N +     PYW+V NSW  DWG
+Sbjct: 251 GAFTVFSDFLTYKSGVYKHEAGDVMGGHAIRILGWGIENGV-----PYWLVANSWNVDWG 305
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVSTSI 342
+           + G+  + RG+N CG+ + +   I
+Sbjct: 306 DNGFFKILRGENHCGIESEIVAGI 329
+
+
+>sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR
+          Length = 335
+
+ Score = 87.0 bits (212), Expect = 5e-17
+ Identities = 67/259 (25%), Positives = 103/259 (38%), Gaps = 55/259 (21%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL---SEQNL 170
+           +P +FD    W     +  +++QG CGSCW+F     +  +  I  N  V++   +E  L
+Sbjct: 80  LPESFDAREQWPNCPTIKEIRDQGSCGSCWAFGAVEAISDRICIHSNGRVNVEVSAEDML 139
+
+Query: 171 VDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ--------------------- 209
+             C  EC          +GCNGG    A+N+  K G +                      
+Sbjct: 140 TCCGGEC---------GDGCNGGFPSGAWNFWTKKGLVSGGLYNSHVGCRPYSIPPCEHH 190
+
+Query: 210 -TESSYPYTAETGT-QCNFNSANIGAKIS---------NFTMIPKNETVMAGYIVSTGPL 258
+              S  P T E  T +CN  +   G   S         +   +  NE  +   I   GP+
+Sbjct: 191 VNGSRPPCTGEGDTPKCN-KTCEPGYSPSYKEDKHFGCSSYSVANNEKEIMAEIYKNGPV 249
+
+Query: 259 AIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+             A     ++  Y  GV+          H I I+G+  +N       PYW+V NSW  DW
+Sbjct: 250 EGAFSVYSDFLLYKSGVYQHVSGEIMGGHAIRILGWGVEN-----GTPYWLVGNSWNTDW 304
+
+Query: 318 GEQGYIYLRRGKNTCGVSN 336
+           G+ G+  + RG++ CG+ +
+Sbjct: 305 GDNGFFKILRGQDHCGIES 323
+
+
+>sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP SECRETASE)
+          Length = 339
+
+ Score = 85.8 bits (209), Expect = 1e-16
+ Identities = 70/285 (24%), Positives = 110/285 (38%), Gaps = 63/285 (22%)
+
+Query: 96  KEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNV 151
+           +  +FT+DL             +P +FD    W     +  +++QG CGSCW+F     +
+Sbjct: 70  QRVMFTEDL------------KLPASFDAREQWPQCPTIKEIRDQGSCGSCWAFGAVEAI 117
+
+Query: 152 EGQHFISQNKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ 209
+             +  I  N  VS+  S ++L+ C   C        C +GCNGG    A+N+  + G + 
+Sbjct: 118 SDRICIHTNAHVSVEVSAEDLLTC---C-----GSMCGDGCNGGYPAEAWNFWTRKGLVS 169
+
+Query: 210 ----------------------TESSYPYTAETGTQCNFNSANIGAKIS---------NF 238
+                                   S  P T E  T         G   +         N 
+Sbjct: 170 GGLYESHVGCRPYSIPPCEHHVNGSRPPCTGEGDTPKCSKICEPGYSPTYKQDKHYGYNS 229
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN 297
+             +  +E  +   I   GP+  A     ++  Y  GV+          H I I+G+  +N
+Sbjct: 230 YSVSNSEKDIMAEIYKNGPVEGAFSVYSDFLLYKSGVYQHVTGEMMGGHAIRILGWGVEN 289
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+                  PYW+V NSW  DWG+ G+  + RG++ CG+ + V   I
+Sbjct: 290 -----GTPYWLVANSWNTDWGDNGFFKILRGQDHCGIESEVVAGI 329
+
+
+>sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SJ31)
+          Length = 342
+
+ Score = 84.7 bits (206), Expect = 3e-16
+ Identities = 66/268 (24%), Positives = 111/268 (40%), Gaps = 59/268 (22%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLV 171
+           IP+ FD    W    +++ +++Q +CGSCW+F     +  +  I     +   LS  +L+
+Sbjct: 90  IPSQFDSRKKWPHCKSISQIRDQSRCGSCWAFGAVEAMTDRICIQSGGGQSAELSALDLI 149
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGI---------------------QT 210
+            C   C +      C +GC GG    A++Y +K G +                      T
+Sbjct: 150 SC---CKD------CGDGCQGGFPGVAWDYWVKRGIVTGGSKENHTGCQPYPFPKCEHHT 200
+
+Query: 211 ESSYP-------------YTAETGTQCNFNS-ANIGAKISNFTMIPKNETVMAGYIVSTG 256
+           +  YP              T + G +  +    + G +  N   +  NE V+   I+  G
+Sbjct: 201 KGKYPACGTKIYKTPQCKQTCQKGYKTPYEQDKHYGDESYN---VQNNEKVIQRDIMMYG 257
+
+Query: 257 PLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGA 315
+           P+  A D  E +  Y  G++          H I I+G+  +     K  PYW++ NSW  
+Sbjct: 258 PVEAAFDVYEDFLNYKSGIYRHVTGSIVGGHAIRIIGWGVE-----KRTPYWLIANSWNE 312
+
+Query: 316 DWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           DWGE+G   + RG++ C + + V   +I
+Sbjct: 313 DWGEKGLFRMVRGRDECSIESDVVAGLI 340
+
+
+>sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SM31)
+          Length = 340
+
+ Score = 84.7 bits (206), Expect = 3e-16
+ Identities = 66/260 (25%), Positives = 109/260 (41%), Gaps = 53/260 (20%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLV 171
+           IP+ FD    W    ++  +++Q +CGSCWSF     +  +  I     + V LS  +L+
+Sbjct: 89  IPSNFDSRKKWPGCKSIATIRDQSRCGSCWSFGAVEAMSDRSCIQSGGKQNVELSAVDLL 148
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTA---ETGTQCNFNS 228
+            C   C      E+C  GC GG+   A++Y +K G +   S   +T        +C  ++
+Sbjct: 149 TC---C------ESCGLGCEGGILGPAWDYWVKEGIVTASSKENHTGCEPYPFPKCEHHT 199
+
+Query: 229 AN----IGAKISN---------------FTM----------IPKNETVMAGYIVSTGPLA 259
+                  G+KI N               +T           +  +E  +   I+  GP+ 
+Sbjct: 200 KGKYPPCGSKIYNTPRCKQTCQRKYKTPYTQDKHRGKSSYNVKNDEKAIQKEIMKYGPVE 259
+
+Query: 260 IAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+            +    E +  Y  G++          H I I+G+  +N       PYW++ NSW  DWG
+Sbjct: 260 ASFTVYEDFLNYKSGIYKHITGEALGGHAIRIIGWGVEN-----KTPYWLIANSWNEDWG 314
+
+Query: 319 EQGYIYLRRGKNTCGVSNFV 338
+           E GY  + RG++ C + + V
+Sbjct: 315 ENGYFRIVRGRDECSIESEV 334
+
+
+>sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 339
+
+ Score = 83.9 bits (204), Expect = 5e-16
+ Identities = 69/265 (26%), Positives = 111/265 (41%), Gaps = 55/265 (20%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNLV 171
+           +P  FD    W     +  +++QG CGSCW+F     +  +  I  N  V++  S ++L+
+Sbjct: 80  LPETFDAREQWSNCPTIGQIRDQGSCGSCWAFGAVEAISDRTCIHTNGRVNVEVSAEDLL 139
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK----NGGIQTE---------------- 211
+            C   C        C +GCNGG    A+++  K    +GG+                   
+Sbjct: 140 TC---C-----GIQCGDGCNGGYPSGAWSFWTKKGLVSGGVYNSHVGCLPYTIPPCEHHV 191
+
+Query: 212 --SSYPYTAETGT-QCNFNSANIGAKIS----------NFTMIPKNETVMAGYIVSTGPL 258
+             S  P T E  T +CN  S   G   S          ++++    + +MA  I   GP+
+Sbjct: 192 NGSRPPCTGEGDTPRCN-KSCEAGYSPSYKEDKHFGYTSYSVSNSVKEIMAE-IYKNGPV 249
+
+Query: 259 AIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+             A     ++  Y  GV+          H I I+G+  +N +     PYW+  NSW  DW
+Sbjct: 250 EGAFTVFSDFLTYKSGVYKHEAGDMMGGHAIRILGWGVENGV-----PYWLAANSWNLDW 304
+
+Query: 318 GEQGYIYLRRGKNTCGVSNFVSTSI 342
+           G+ G+  + RG+N CG+ + +   I
+Sbjct: 305 GDNGFFKILRGENHCGIESEIVAGI 329
+
+
+>sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PRECURSOR
+          Length = 379
+
+ Score = 83.5 bits (203), Expect = 6e-16
+ Identities = 72/265 (27%), Positives = 114/265 (42%), Gaps = 61/265 (23%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK--LVSLSEQNLV 171
+           IP +FD    W    ++  +++Q  CGSCW+F     +  +  I+ +    V+LS  +L+
+Sbjct: 105 IPESFDSRDNWPKCDSIKVIRDQSSCGSCWAFGAVEAMSDRICIASHGELQVTLSADDLL 164
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ------CN 225
+            C   C      ++C  GCNGG    A+ Y +K+G I T S+Y  TA  G +      C 
+Sbjct: 165 SC---C------KSCGFGCNGGDPLAAWRYWVKDG-IVTGSNY--TANNGCKPYPFPPCE 212
+
+Query: 226 FNSANIGAK----------------ISNFTMIPKNETVMAGY---------------IVS 254
+            +S                      +S++T    +E    G                +++
+Sbjct: 213 HHSKKTHFDPCPHDLYPTPKCEKKCVSDYTDKTYSEDKFFGASAYGVKDDVEAIQKELMT 272
+
+Query: 255 TGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSW 313
+            GPL IA +  E +  Y GGV+          H + ++G+   + I     PYW V NSW
+Sbjct: 273 HGPLEIAFEVYEDFLNYDGGVYVHTGGKLGGGHAVKLIGWGIDDGI-----PYWTVANSW 327
+
+Query: 314 GADWGEQGYIYLRRGKNTCGVSNFV 338
+             DWGE G+  + RG + CG+ + V
+Sbjct: 328 NTDWGEDGFFRILRGVDECGIESGV 352
+
+
+>sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 340
+
+ Score = 83.5 bits (203), Expect = 6e-16
+ Identities = 69/278 (24%), Positives = 112/278 (39%), Gaps = 56/278 (20%)
+
+Query: 100 FTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 159
+           F +D+ + D  D        T   W     ++ +++QG CGSCW+F     +  +  +  
+Sbjct: 74  FAEDMDLPDTFD--------TRKQWPNCPTISEIRDQGSCGSCWAFGAVEAISDRICVHT 125
+
+Query: 160 NKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ-------- 209
+           N  VS+  S ++L+ C   C    G E C  GCNGG    A+ Y  + G +         
+Sbjct: 126 NAKVSVEVSAEDLLSC---C----GFE-CGMGCNGGYPSGAWRYWTERGLVSGGLYDSHV 177
+
+Query: 210 --------------TESSYPYTAETGT--QCN------FNSANIGAKISNFTM--IPKNE 245
+                           S  P T E G   +C+      ++ +    K    T   +P++E
+Sbjct: 178 GCRAYTIPPCEHHVNGSRPPCTGEGGETPRCSRHCEPGYSPSYKEDKHYGITSYGVPRSE 237
+
+Query: 246 TVMAGYIVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNM 304
+             +   I   GP+  A    E +  Y  GV+          H I I+G+  +N       
+Sbjct: 238 KEIMAEIYKNGPVEGAFIVYEDFLMYKSGVYQHVSGEQVGGHAIRILGWGVEN-----GT 292
+
+Query: 305 PYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           PYW+  NSW  DWG  G+  + RG++ CG+ + +   +
+Sbjct: 293 PYWLAANSWNTDWGITGFFKILRGEDHCGIESEIVAGV 330
+
+
+>sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 341
+
+ Score = 76.1 bits (184), Expect = 1e-13
+ Identities = 65/270 (24%), Positives = 104/270 (38%), Gaps = 54/270 (20%)
+
+Query: 103 DLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFIS 158
+           D  V D   +E  + IP ++D    W    ++  + +Q  CGSCW+ S+   +  +  I+
+Sbjct: 76  DEEVEDEELEENNDDIPESYDPRIQWANCSSLFHIPDQANCGSCWAVSSAAAMSDRICIA 135
+
+Query: 159 QN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY-----IIKNGGIQTE 211
+               K V +S Q++V C   C        C +GC GG   +A+ +     ++  G   T+
+Sbjct: 136 SKGAKQVLISAQDVVSC---CTW------CGDGCEGGWPISAFRFHADEGVVTGGDYNTK 186
+
+Query: 212 SSY-PYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGY------------------- 251
+            S  PY        + N    G  +            + GY                   
+Sbjct: 187 GSCRPYEIHPCGH-HGNETYYGECVGMADTPRCKRRCLLGYPKSYPSDRYYKKAYQLKNS 245
+
+Query: 252 -------IVSTGPLAIAADAVE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKN 303
+                  I+  GP+       E +  Y  G++       +  H + ++G+  +     K 
+Sbjct: 246 VKAIQKDIMKNGPVVATYTVYEDFAHYRSGIYKHKAGRKTGLHAVKVIGWGEE-----KG 300
+
+Query: 304 MPYWIVKNSWGADWGEQGYIYLRRGKNTCG 333
+            PYWIV NSW  DWGE G+  + RG N CG
+Sbjct: 301 TPYWIVANSWHDDWGENGFFRMHRGSNDCG 330
+
+
+>sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 342
+
+ Score = 75.3 bits (182), Expect = 2e-13
+ Identities = 55/247 (22%), Positives = 102/247 (41%), Gaps = 50/247 (20%)
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGC 190
+           +++Q  CGSCW+ ST   +  +  I+    K V++S  +++ C   C        C +GC
+Sbjct: 105 IRDQANCGSCWAVSTAAAISDRICIASKAEKQVNISATDIMTC---C-----RPQCGDGC 156
+
+Query: 191 NGGLQPNAYNYIIKNGGIQ------TESSYPYTAET----GTQCNFNSANIGAKI----- 235
+            GG    A+ Y I +G +        +   PY        G    +      A       
+Sbjct: 157 EGGWPIEAWKYFIYDGVVSGGEYLTKDVCRPYPIHPCGHHGNDTYYGECRGTAPTPPCKR 216
+
+Query: 236 -----------------SNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFD 276
+                             +  ++ ++   +   I+  GP+ +A+ AV  +++ Y  G++ 
+Sbjct: 217 KCRPGVRKMYRIDKRYGKDAYIVKQSVKAIQSEILKNGPV-VASFAVYEDFRHYKSGIYK 275
+
+Query: 277 IPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+                    H + ++G+  +N     N  +W++ NSW  DWGE+GY  + RG N CG+  
+Sbjct: 276 HTAGELRGYHAVKMIGWGNEN-----NTDFWLIANSWHNDWGEKGYFRIVRGSNDCGIEG 330
+
+Query: 337 FVSTSII 343
+            ++  I+
+Sbjct: 331 TIAAGIV 337
+
+
+>sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 342
+
+ Score = 74.5 bits (180), Expect = 3e-13
+ Identities = 55/247 (22%), Positives = 102/247 (41%), Gaps = 50/247 (20%)
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGC 190
+           +++Q  CGSCW+ ST   +  +  I+    K V++S  +++ C   C        C +GC
+Sbjct: 105 IRDQANCGSCWAVSTAAAISDRICIASKAEKQVNISATDIMTC---C-----RPQCGDGC 156
+
+Query: 191 NGGLQPNAYNYIIKNGGIQ------TESSYPYTAET----GTQCNFNSANIGAKI----- 235
+            GG    A+ Y I +G +        +   PY        G    +      A       
+Sbjct: 157 EGGWPIEAWKYFIYDGVVSGGEYLTKDVCRPYPIHPCGHHGNDTYYGECRGTAPTPPCKR 216
+
+Query: 236 -----------------SNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFD 276
+                             +  ++ ++   +   I+  GP+ +A+ AV  +++ Y  G++ 
+Sbjct: 217 KCRPGVRKMYRIDKRYGKDAYIVKQSVKAIQSEILRNGPV-VASFAVYEDFRHYKSGIYK 275
+
+Query: 277 IPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+                    H + ++G+  +N     N  +W++ NSW  DWGE+GY  + RG N CG+  
+Sbjct: 276 HTAGELRGYHAVKMIGWGNEN-----NTDFWLIANSWHNDWGEKGYFRIIRGTNDCGIEG 330
+
+Query: 337 FVSTSII 343
+            ++  I+
+Sbjct: 331 TIAAGIV 337
+
+
+>sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 370
+
+ Score = 73.4 bits (177), Expect = 7e-13
+ Identities = 61/259 (23%), Positives = 101/259 (38%), Gaps = 49/259 (18%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLV 171
+           +P  FD    W     +  ++NQ  CGSCW+F     +  +  I  N      +S ++++
+Sbjct: 92  LPDTFDAREKWPDCNTIKLIRNQATCGSCWAFGAAEVISDRVCIQSNGTQQPVISVEDIL 151
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ---------------------T 210
+            C   C        C  GC GG    A  +   +G +                       
+Sbjct: 152 SC---C-----GTTCGYGCKGGYSIEALRFWASSGAVTGGDYGGHGCMPYSFAPCTKNCP 203
+
+Query: 211 ESSYPYTAETGTQCNFNSA------NIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA 264
+           ES+ P + +T  Q ++ +       + GA     T   K+ T +   I   GP+  +   
+Sbjct: 204 ESTTP-SCKTTCQSSYKTEEYKKDKHYGASAYKVTTT-KSVTEIQTEIYHYGPVEASYKV 261
+
+Query: 265 VE-WQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+            E +  Y  GV+          H + I+G+  +N +      YW++ NSWG  +GE+G+ 
+Sbjct: 262 YEDFYHYKSGVYHYTSGKLVGGHAVKIIGWGVENGV-----DYWLIANSWGTSFGEKGFF 316
+
+Query: 324 YLRRGKNTCGVSNFVSTSI 342
+            +RRG N C +   V   I
+Sbjct: 317 KIRRGTNECQIEGNVVAGI 335
+
+
+>sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P126) (111 KD ANTIGEN)
+          Length = 989
+
+ Score = 70.6 bits (170), Expect = 4e-12
+ Identities = 61/247 (24%), Positives = 101/247 (40%), Gaps = 50/247 (20%)
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE--EACDEGC 190
+           V++QG C + W F++  ++E    +   +   +S   + +C      Y+GE  + CDEG 
+Sbjct: 579 VEDQGNCDTSWIFASKYHLETIRCMKGYEPTKISALYVANC------YKGEHKDRCDEGS 632
+
+Query: 191 NGGLQPNAYNYIIKNGG-IQTESSYPYT-AETGTQCN----------------------- 225
+           +    P  +  II++ G +  ES+YPY   + G QC                        
+Sbjct: 633 S----PMEFLQIIEDYGFLPAESNYPYNYVKVGEQCPKVEDHWMNLWDNGKILHNKNEPN 688
+
+Query: 226 ---------FNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFD 276
+                    + S      +  F  I K E +  G +++     I A+ V    + G    
+Sbjct: 689 SLDGKGYTAYESERFHDNMDAFVKIIKTEVMNKGSVIAY----IKAENVMGYEFSGKKVQ 744
+
+Query: 277 IPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+             C  ++ DH + IVGY        +   YWIV+NSWG  WG++GY  +     T    N
+Sbjct: 745 NLCGDDTADHAVNIVGYGNYVNSEGEKKSYWIVRNSWGPYWGDEGYFKVDMYGPTHCHFN 804
+
+Query: 337 FVSTSII 343
+           F+ + +I
+Sbjct: 805 FIHSVVI 811
+
+
+>sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)
+          Length = 43
+
+ Score = 63.2 bits (151), Expect = 8e-10
+ Identities = 25/35 (71%), Positives = 28/35 (79%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 153
+           P + DWR +GAVTPVKNQG CGSCW+FST   VEG
+Sbjct: 2   PESIDWRKKGAVTPVKNQGSCGSCWAFSTIATVEG 36
+
+
+>sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)
+          Length = 43
+
+ Score = 62.1 bits (148), Expect = 2e-09
+ Identities = 25/35 (71%), Positives = 28/35 (79%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 153
+           P + DWR +GAVTPVKNQG CGSCW+FST   VEG
+Sbjct: 2   PESIDWRKKGAVTPVKNQGSCGSCWAFSTIVTVEG 36
+
+
+>sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3
+          Length = 174
+
+ Score = 59.3 bits (141), Expect = 1e-08
+ Identities = 31/103 (30%), Positives = 49/103 (47%), Gaps = 15/103 (14%)
+
+Query: 241 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+           I KN  V+AG+IV            ++  Y  G++       +  H + I+G+  +    
+Sbjct: 87  IMKNGPVVAGFIVYE----------DFAHYKSGIYKHTAGRMTGGHAVKIIGWGKE---- 132
+
+Query: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            K  PYW++ NSW  DWGE+G+  + RG N C +   V   I+
+Sbjct: 133 -KGTPYWLIANSWHDDWGEKGFYRMIRGINNCRIEEMVFAGIV 174
+
+
+>sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)
+          Length = 43
+
+ Score = 58.6 bits (139), Expect = 2e-08
+ Identities = 23/36 (63%), Positives = 28/36 (76%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 153
+           I  + DWR +GAVTPV+NQG CGSCW+FS+   VEG
+Sbjct: 1   IVASIDWRQKGAVTPVRNQGSCGSCWTFSSVAAVEG 36
+
+
+>sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)
+          Length = 43
+
+ Score = 58.2 bits (138), Expect = 3e-08
+ Identities = 23/35 (65%), Positives = 27/35 (76%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEG 153
+           P + DWR +GAVTPVK+Q  CGSCW+FST   VEG
+Sbjct: 2   PGSVDWRQKGAVTPVKDQNPCGSCWAFSTVATVEG 36
+
+
+>sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L
+          Length = 42
+
+ Score = 51.9 bits (122), Expect = 2e-06
+ Identities = 20/39 (51%), Positives = 28/39 (71%), Gaps = 1/39 (2%)
+
+Query: 306 YWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+           YWIVKNSWG  WG++GYIY+ +  KN CG++   S  ++
+Sbjct: 4   YWIVKNSWGEKWGDKGYIYMAKDRKNHCGIATAASYPLV 42
+
+
+>sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR
+          Length = 136
+
+ Score = 41.8 bits (96), Expect = 0.002
+ Identities = 31/101 (30%), Positives = 50/101 (48%), Gaps = 4/101 (3%)
+
+Query: 9   LAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIA 66
+           L +  + + S   PP+    +++ E++ KF K Y+  E   R  +++ N  KIE  N   
+Sbjct: 17  LLILCLGMMSAAPPPDPSLDNEWKEWKTKFAKAYNLNEERHRRLVWEENKKKIEAHNADY 76
+
+Query: 67  INHKADTKFGVNKFADLSSDEFK-NYYLNN-KEAIFTDDLP 105
+              K     G+N+F+DL+ +EFK N Y N+        DLP
+Sbjct: 77  EQGKTSFYMGLNQFSDLTPEEFKTNCYGNSLNRGEMAPDLP 117
+
+
+>sp|P21381|THPA_THADA THAUMATOPAIN
+          Length = 35
+
+ Score = 40.6 bits (93), Expect = 0.005
+ Identities = 16/31 (51%), Positives = 22/31 (70%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           ++P + DW  +GAV  VKNQ  CGSC +FS+
+Sbjct: 1   NLPNSVDWWKKGAVAAVKNQRXCGSCXAFSS 31
+
+
+>sp|P05689|CATX_BOVIN CATHEPSIN
+          Length = 73
+
+ Score = 40.2 bits (92), Expect = 0.006
+ Identities = 15/40 (37%), Positives = 24/40 (59%), Gaps = 5/40 (12%)
+
+Query: 284 LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+           ++H + + G+   +      M YWIV+NSWG  WGE G++
+Sbjct: 9   INHIVSVAGWGVSD-----GMEYWIVRNSWGEPWGEHGWM 43
+
+
+>sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR
+          Length = 141
+
+ Score = 38.7 bits (88), Expect = 0.018
+ Identities = 25/85 (29%), Positives = 45/85 (52%), Gaps = 1/85 (1%)
+
+Query: 6   LFVLAVFTVFVSSRGIP-PEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+           +F+L +    +S+   P P   +++ E++  F K YS +E   R  +++ N  KIE  N 
+Sbjct: 20  VFLLILCLGMMSAAPSPDPSLDNEWKEWKTTFAKAYSLDEERHRRLMWEENKKKIEAHNA 79
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFK 89
+                K     G+N+F+DL+ +EF+
+Sbjct: 80  DYERGKTSFYMGLNQFSDLTPEEFR 104
+
+
+>sp|P20736|BM86_BOOMI GLYCOPROTEIN ANTIGEN BM86 PRECURSOR (PROTECTIVE ANTIGEN)
+          Length = 650
+
+ Score = 35.2 bits (79), Expect = 0.21
+ Identities = 24/81 (29%), Positives = 36/81 (43%), Gaps = 5/81 (6%)
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDC----DHECMEYEGEEACDEGCNGGLQPNAYNYI 202
+           TT N +        KL  + + +  +C    DHEC     +++C E  NG  Q +    +
+Sbjct: 533 TTCNPKEIQECQDKKLECVYKNHKAECECPDDHECYREPAKDSCSEEDNGKCQSSGQRCV 592
+
+Query: 203 IKNG-GIQTESSYPYTAETGT 222
+           I+NG  +  E S   TA T T
+Sbjct: 593 IENGKAVCKEKSEATTAATTT 613
+
+
+>sp|Q11121|PLB1_TORDE LYSOPHOSPHOLIPASE PRECURSOR (PHOSPHOLIPASE B)
+          Length = 649
+
+ Score = 34.8 bits (78), Expect = 0.27
+ Identities = 31/144 (21%), Positives = 56/144 (38%), Gaps = 13/144 (9%)
+
+Query: 109 YLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 168
+           Y DDE +  I   F+  TRG +T   +   C +C              + + K  SL+  
+Sbjct: 519 YTDDERLKMIKNGFEAATRGNLTDDSSFMGCVAC-------------AVMRRKQQSLNAT 565
+
+Query: 169 NLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNS 228
+              +C      Y      D+    GL  + ++    +      ++  Y++ + T    N 
+Sbjct: 566 LPEECSTCFTNYCWNGTIDDTPVSGLDNSDFDPTAASSAYSAYNTESYSSSSATGSKKNG 625
+
+Query: 229 ANIGAKISNFTMIPKNETVMAGYI 252
+           A + A  ++FT I    T +AG++
+Sbjct: 626 AGLPATPTSFTSILTLLTAIAGFL 649
+
+
+>sp|P46992|YJR1_YEAST HYPOTHETICAL 43.0 KD PROTEIN IN CPS1-FPP1 INTERGENIC REGION
+          Length = 396
+
+ Score = 33.6 bits (75), Expect = 0.61
+ Identities = 40/191 (20%), Positives = 76/191 (38%), Gaps = 43/191 (22%)
+
+Query: 77  VNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEF------------------INSI 118
+           VNKF D++++E     + ++      + P+ADYL   F                  +N+ 
+Sbjct: 42  VNKFKDITNNESCTCEVGDRVWFSGKNAPLADYLSVHFRGPLKLKQFAFYTSPGFTVNNS 101
+
+Query: 119 PTAFDW----------RTRGAVTPVKNQGQCGSCW-------SFSTTGNVEGQHFISQNK 161
+            ++ DW          +T   VT + + G+   C        S + TG+      ++   
+Sbjct: 102 RSSSDWNRLAYYESSSKTADNVTFLNHGGEASPCLGNALSYASSNGTGSASEATVLADGT 161
+
+Query: 162 LVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETG 221
+           L+S  ++ ++  +  C +   ++ C    +G   P  Y Y    GG  T   + +  E  
+Sbjct: 162 LISSDQEYIIYSNVSCPKSGYDKGCGVYRSG--IPAYYGY----GG--TTKMFLFEFEMP 213
+
+Query: 222 TQCNFNSANIG 232
+           T+   NS++IG
+Sbjct: 214 TETEKNSSSIG 224
+
+
+>sp|P28493|PR5_ARATH PATHOGENESIS-RELATED PROTEIN 5 PRECURSOR (PR-5)
+          Length = 239
+
+ Score = 32.1 bits (71), Expect = 1.8
+ Identities = 24/93 (25%), Positives = 36/93 (37%), Gaps = 7/93 (7%)
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNG 192
+           ++  G  G C      G V   +    + L  + + N+V C   C  +  ++ C  G N 
+Sbjct: 137 IRPSGGSGDC---KYAGCVSDLNAACPDMLKVMDQNNVVACKSACERFNTDQYCCRGAND 193
+
+Query: 193 GLQ---PNAYNYIIKNGGIQTESSYPYTAETGT 222
+             +   P  Y+ I KN       SY Y  ET T
+Sbjct: 194 KPETCPPTDYSRIFKN-ACPDAYSYAYDDETST 225
+
+
+>sp|P41901|SPR3_YEAST SPORULATION-SPECIFIC SEPTIN
+          Length = 512
+
+ Score = 31.7 bits (70), Expect = 2.4
+ Identities = 18/63 (28%), Positives = 30/63 (47%), Gaps = 9/63 (14%)
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           + +NLI +  K+D          L+ +E KN+    +E I   D+PV  +  DE +N+  
+Sbjct: 237 KRVNLIPVIAKSDL---------LTKEELKNFKTQVREIIRVQDIPVCFFFGDEVLNATQ 287
+
+Query: 120 TAF 122
+             F
+Sbjct: 288 DIF 290
+
+
+>sp|P54634|POLN_LORDV NON-STRUCTURAL POLYPROTEIN [CONTAINS: RNA-DIRECTED RNA POLYMERASE ;
+           THIOL PROTEASE 3C ; HELICASE (2C LIKE PROTEIN)]
+          Length = 1699
+
+ Score = 31.3 bits (69), Expect = 3.1
+ Identities = 13/31 (41%), Positives = 21/31 (66%)
+
+Query: 17  SSRGIPPEEQSQFLEFQDKFNKKYSHEEYLE 47
+           SS+G+  EE  ++   +++ N KYS EEYL+
+Sbjct: 893 SSKGLSDEEYDEYKRIREERNGKYSIEEYLQ 923
+
+
+>sp|P21173|DNAA_MICLU CHROMOSOMAL REPLICATION INITIATOR PROTEIN DNAA
+          Length = 515
+
+ Score = 31.3 bits (69), Expect = 3.1
+ Identities = 25/104 (24%), Positives = 46/104 (44%), Gaps = 13/104 (12%)
+
+Query: 31  EFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKN 90
+           EF + F     H+E     +++++    ++ L +  I   AD +  V +F       F  
+Sbjct: 247 EFTNDFINSIRHDEGASFKQVYRN----VDILLIDDIQFLADKEATVEEFFHT----FNT 298
+
+Query: 91  YYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVK 134
+            Y NNK+ + T DLP        F + + + F+W   G +T ++
+Sbjct: 299 LYNNNKQVVITSDLPPKQL--SGFEDRLRSRFEW---GLITDIQ 337
+
+
+>sp|P89263|Y022_GVXN HYPOTHETICAL ORF22 HOMOLOG
+          Length = 166
+
+ Score = 30.5 bits (67), Expect = 5.3
+ Identities = 27/107 (25%), Positives = 43/107 (39%), Gaps = 9/107 (8%)
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           SF T     G H  SQ   V +S Q+LV        Y+    CD+          Y+ ++
+Sbjct: 66  SFDTLEGPGGGHCFSQP--VRVSRQDLVT-------YDCASLCDDVRAAYFYVGPYDRLV 116
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAG 250
+            +G    E  Y  T      CN  ++ +   I+++T I ++    AG
+Sbjct: 117 VDGNELQEGGYCTTNSVPRNCNRETSILLHSINHWTCIAEDPRYYAG 163
+
+
+>sp|P24896|NU5M_CAEEL NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 5
+          Length = 527
+
+ Score = 30.5 bits (67), Expect = 5.3
+ Identities = 21/52 (40%), Positives = 26/52 (49%), Gaps = 7/52 (13%)
+
+Query: 44  EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNN 95
+           +YL +  I+K    K  +L L  IN K  T F       LSS  FKNYYL +
+Sbjct: 466 DYLAKNSIYKMKNLKFMDLFLNNINSKGYTLF-------LSSGMFKNYYLKS 510
+
+
+>sp|P25648|SRB8_YEAST SUPPRESSOR OF RNA POLYMERASE B SRB8
+          Length = 1427
+
+ Score = 30.1 bits (66), Expect = 7.0
+ Identities = 22/89 (24%), Positives = 44/89 (48%), Gaps = 10/89 (11%)
+
+Query: 21   IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV--- 77
+            +PP + S F++     +  Y  EE  ++ E F  NLG    + ++ I H+ + K+ +   
+Sbjct: 1314 LPPFQVSSFVKETKLHSGDYGEEEDADQEESFSLNLG----IGIVEIAHENEQKWLIYDK 1369
+
+Query: 78   --NKFADLSSDEFKNYYLNNKEAIFTDDL 104
+              +K+    S E   ++++N    +TDD+
+Sbjct: 1370 KDHKYVCTFSME-PYHFISNYNTKYTDDM 1397
+
+
+>sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C
+          Length = 436
+
+ Score = 30.1 bits (66), Expect = 7.0
+ Identities = 11/20 (55%), Positives = 14/20 (70%)
+
+Query: 303 NMPYWIVKNSWGADWGEQGY 322
+           N   W V+NSWG D G++GY
+Sbjct: 370 NSTKWKVENSWGKDAGQKGY 389
+
+
+>sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 455
+
+ Score = 29.7 bits (65), Expect = 9.1
+ Identities = 10/17 (58%), Positives = 13/17 (75%)
+
+Query: 307 WIVKNSWGADWGEQGYI 323
+           W V+NSWG D G +GY+
+Sbjct: 392 WRVENSWGEDHGHKGYL 408
+
+
+>sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (AMINOPEPTIDASE H)
+          Length = 455
+
+ Score = 29.7 bits (65), Expect = 9.1
+ Identities = 10/19 (52%), Positives = 14/19 (73%)
+
+Query: 307 WIVKNSWGADWGEQGYIYL 325
+           W V+NSWG D G +GY+ +
+Sbjct: 392 WRVENSWGEDRGNKGYLIM 410
+
+
+>sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 454
+
+ Score = 29.7 bits (65), Expect = 9.1
+ Identities = 10/17 (58%), Positives = 13/17 (75%)
+
+Query: 307 WIVKNSWGADWGEQGYI 323
+           W V+NSWG D G +GY+
+Sbjct: 392 WRVENSWGEDHGHKGYL 408
+
+
+Searching..................................................done
+
+
+Results from round 2
+
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+Sequences used in model and found again:
+
+sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR                  527  e-149
+sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR                            440  e-123
+sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN...   431  e-121
+sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   431  e-120
+sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR                              430  e-120
+sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTE...   421  e-118
+sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)             417  e-116
+sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR              412  e-115
+sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR                    407  e-113
+sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR        403  e-112
+sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR      402  e-112
+sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEP...   401  e-111
+sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RES...   399  e-111
+sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK C...   398  e-111
+sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)             395  e-110
+sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR                  394  e-109
+sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR        393  e-109
+sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR        392  e-109
+sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR                            392  e-109
+sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)               391  e-109
+sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR                  391  e-108
+sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   389  e-108
+sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR              387  e-107
+sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR                            384  e-106
+sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR                            383  e-106
+sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR                              382  e-106
+sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYS...   380  e-105
+sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPS...   377  e-104
+sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR              376  e-104
+sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR                375  e-104
+sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR                  374  e-103
+sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR                     374  e-103
+sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR                  373  e-103
+sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR                  373  e-103
+sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHE...   372  e-103
+sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR                    370  e-102
+sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)                 368  e-102
+sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)                         367  e-101
+sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR                              365  e-100
+sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR                    365  e-100
+sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)                         363  e-100
+sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)                         363  e-100
+sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR             361  e-100
+sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR             361  1e-99
+sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)                    361  2e-99
+sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEIN...   356  4e-98
+sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)                         351  2e-96
+sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR                  348  1e-95
+sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR                  348  1e-95
+sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE...   347  2e-95
+sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE...   347  3e-95
+sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPA...   346  3e-95
+sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA...   341  1e-93
+sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II...   339  6e-93
+sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)     334  1e-91
+sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR             331  1e-90
+sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR                       327  2e-89
+sp|Q10991|CATL_SHEEP CATHEPSIN L                                      325  1e-88
+sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR                  323  4e-88
+sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)               319  4e-87
+sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)               318  9e-87
+sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR                  318  9e-87
+sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR               315  8e-86
+sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE P...   312  9e-85
+sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR               309  5e-84
+sp|P25326|CATS_BOVIN CATHEPSIN S                                      307  2e-83
+sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR                  301  1e-81
+sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR ...   299  5e-81
+sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE...   298  9e-81
+sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR                            296  5e-80
+sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR                    291  1e-78
+sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR                    278  1e-74
+sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR...   272  8e-73
+sp|P80884|ANAN_ANACO ANANAIN                                          271  1e-72
+sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L                         264  1e-70
+sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   262  9e-70
+sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR                    259  6e-69
+sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR...   255  1e-67
+sp|P14518|BROM_ANACO BROMELAIN, STEM                                  253  4e-67
+sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (D...   251  2e-66
+sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPP...   250  4e-66
+sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR                    243  5e-64
+sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP S...   238  1e-62
+sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)       236  4e-62
+sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)             233  4e-61
+sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PREC...   232  8e-61
+sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR                            232  8e-61
+sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)             232  1e-60
+sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR                            231  1e-60
+sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR       231  1e-60
+sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PREC...   231  2e-60
+sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PREC...   225  8e-59
+sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...   222  1e-57
+sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECUR...   216  5e-56
+sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...   213  4e-55
+sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PREC...   212  1e-54
+sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PREC...   208  2e-53
+sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PREC...   204  3e-52
+sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)          201  1e-51
+sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN...   185  2e-46
+sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)   158  2e-38
+sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P1...   157  3e-38
+sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3           139  6e-33
+sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)              131  2e-30
+sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)                     85  3e-16
+sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)                   84  4e-16
+sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)                     81  2e-15
+sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L                          77  5e-14
+sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)                       76  1e-13
+
+Sequences not found previously or not previously below threshold:
+
+sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR                    98  2e-20
+sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR                     96  1e-19
+sp|P05689|CATX_BOVIN CATHEPSIN                                         60  9e-09
+sp|P94869|PEPG_LACDL AMINOPEPTIDASE G                                  46  1e-04
+sp|P94870|PEPE_LACHE AMINOPEPTIDASE E                                  43  0.001
+sp|P94868|PEPW_LACDL AMINOPEPTIDASE W                                  42  0.002
+sp|Q10744|PEPC_LACHE AMINOPEPTIDASE C                                  41  0.003
+sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C                                  40  0.009
+sp|Q48543|PEPC_LACDL AMINOPEPTIDASE C                                  38  0.025
+sp|P21381|THPA_THADA THAUMATOPAIN                                      38  0.025
+sp|Q56115|PEPC_STRTR AMINOPEPTIDASE C                                  36  0.13
+sp|P09983|HLY1_ECOLI HEMOLYSIN, CHROMOSOMAL                            36  0.17
+sp|P33403|CYSP_TRIFO CYSTEINE PROTEINASE                               35  0.28
+sp|P08715|HLYA_ECOLI HEMOLYSIN, PLASMID                                35  0.28
+sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (...    34  0.37
+sp|P54704|PSPB_DICDI PRESPORE PROTEIN B PRECURSOR                      34  0.37
+sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)         34  0.49
+sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)           34  0.49
+sp|P80532|CAT3_FASHE PUTATIVE CATHEPSIN L3 (NEWLY EXCYSTED JUVEN...    34  0.49
+sp|P16462|LKTA_ACTAC LEUKOTOXIN                                        34  0.49
+sp|P13438|TSP_MOUSE TROPHOBLAST-SPECIFIC PROTEIN PRECURSOR             32  1.4
+sp|Q00951|HLYA_ACTSU HEMOLYSIN (CYTOLYSIN II) (CLY-IIA) (HLY-IIA...    32  1.4
+sp|P15377|RT2A_ACTPL RTX-II TOXIN DETERMINANT A (APX-IIA) (HEMOL...    32  1.4
+sp|P52181|TGLC_PAGMA PROTEIN-GLUTAMINE GAMMA-GLUTAMYLTRANSFERASE...    32  1.9
+sp|P35681|TCTP_ORYSA TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HO...    32  1.9
+sp|Q01532|BLH1_YEAST CYSTEINE PROTEINASE 1 (Y3) (BLEOMYCIN HYDRO...    32  2.5
+sp|P16312|MMAL_DERMI MAJOR MITE FECAL ALLERGEN DER M 1 (DER M I)       31  3.2
+sp|O03992|TCTP_FRAAN TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HO...    31  4.2
+sp|Q04489|YMJ6_YEAST HYPOTHETICAL 59.5 KD PROTEIN IN VPS9-RAD10 ...    31  4.2
+sp|Q03164|HRX_HUMAN ZINC FINGER PROTEIN HRX (ALL-1) (TRITHORAX-L...    31  4.2
+sp||CATB_COTJA_1 [Segment 1 of 2] CATHEPSIN B (CATHEPSIN B1)           30  5.6
+sp|Q62703|RCN2_RAT RETICULOCALBIN 2 PRECURSOR (CALCIUM-BINDING P...    30  5.6
+sp|P48651|PSS1_HUMAN PHOSPHATIDYLSERINE SYNTHASE I (SERINE-EXCHA...    30  5.6
+sp|P33404|CYSP_TRIVA CYSTEINE PROTEINASE                               30  5.6
+sp|Q00576|PSS1_CRILO PHOSPHATIDYLSERINE SYNTHASE I (SERINE-EXCHA...    30  5.6
+sp|Q9ZRX0|TCTP_PSEMZ TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HO...    30  7.3
+sp|Q94480|V136_DICDI VEG136 PROTEIN                                    30  7.3
+sp|P55131|RT32_ACTPL RTX-III TOXIN DETERMINANT A FROM SEROTYPE 8...    30  7.3
+sp|P55130|RT31_ACTPL RTX-III TOXIN DETERMINANT A FROM SEROTYPE 2...    30  7.3
+sp|P40101|YE16_YEAST HYPOTHETICAL 35.9 KD PROTEIN IN ISC10 3'REGION    30  7.3
+sp|P13388|XMRK_XIPMA MELANOMA RECEPTOR PROTEIN-TYROSINE KINASE P...    30  7.3
+sp|Q9ZL75|MOAA_HELPJ MOLYBDENUM COFACTOR BIOSYNTHESIS PROTEIN A        30  9.5
+sp|P55129|RT12_ACTPL RTX-I TOXIN DETERMINANT A FROM SEROTYPES 5/...    30  9.5
+sp|P55128|RT11_ACTPL RTX-I TOXIN DETERMINANT A FROM SEROTYPES 1/...    30  9.5
+sp|P35669|GSHB_SCHPO GLUTATHIONE SYNTHETASE LARGE CHAIN (GLUTATH...    30  9.5
+sp|P11140|ABRA_ABRPR ABRIN-A PRECURSOR (RRNA N-GLYCOSIDASE)            30  9.5
+sp|Q00690|LEM2_MOUSE E-SELECTIN PRECURSOR (ENDOTHELIAL LEUKOCYTE...    30  9.5
+sp|P06620|ICEN_PSESY ICE NUCLEATION PROTEIN                            30  9.5
+
+>sp|P04988|CYS1_DICDI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 343
+
+ Score =  527 bits (1343), Expect = e-149
+ Identities = 343/343 (100%), Positives = 343/343 (100%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE
+Sbjct: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+           ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT
+Sbjct: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY
+Sbjct: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+           EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM
+Sbjct: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+
+Query: 241 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+           IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF
+Sbjct: 241 IPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+
+Query: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
+Sbjct: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+
+
+>sp|P25975|CATL_BOVIN CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  440 bits (1119), Expect = e-123
+ Identities = 120/343 (34%), Positives = 176/343 (50%), Gaps = 19/343 (5%)
+
+Query: 7   FVLAVFTVFVSSRG--IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+           F L V  + V+S    + P   + + +++    + Y   E   R  +++ N   I+  N 
+Sbjct: 5   FFLTVLCLGVASAAPKLDPNLDAHWHQWKATHRRLYGMNEEEWRRAVWEKNKKIIDLHNQ 64
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDW 124
+                K   +  +N F D++++EF+                      +  +  +P + DW
+Sbjct: 65  EYSEGKHAFRMAMNAFGDMTNEEFRQVMNG----FQNQKHKKGKLFHEPLLVDVPKSVDW 120
+
+Query: 125 RTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEE 184
+             +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC           
+Sbjct: 121 TKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCSRA-------- 172
+
+Query: 185 ACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN 244
+             ++GCNGGL  NA+ YI  NGG+ +E SYPY A     CN+      A  + F  IP+ 
+Sbjct: 173 QGNQGCNGGLMDNAFQYIKDNGGLDSEESYPYLATDTNSCNYKPECSAANDTGFVDIPQR 232
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY  + T   
+Sbjct: 233 EKALMKAVATVGPISVAIDAGHTSFQFYKSGIYYDPDCSCKDLDHGVLVVGYGFEGTDSN 292
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+            N  +WIVKNSWG +WG  GY+ + +   N CG++   S   +
+Sbjct: 293 NNK-FWIVKNSWGPEWGWNGYVKMAKDQNNHCGIATAASYPTV 334
+
+
+>sp|P07154|CATL_RAT CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP) (CYCLIC
+           PROTEIN-2) (CP-2)
+          Length = 334
+
+ Score =  431 bits (1097), Expect = e-121
+ Identities = 123/345 (35%), Positives = 187/345 (53%), Gaps = 19/345 (5%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+           ++LL VL + T   + +       +Q+ +++    + Y   E   R  +++ N+  I+  
+Sbjct: 4   LLLLAVLCLGTALATPKF-DQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLH 62
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAF 122
+           N    N K      +N F D++++EF+      +               +  +  IP   
+Sbjct: 63  NGEYSNGKHGFTMEMNAFGDMTNEEFRQIVNGYRHQKH----KKGRLFQEPLMLQIPKTV 118
+
+Query: 123 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 182
+           DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVDC H       
+Sbjct: 119 DWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSH------- 171
+
+Query: 183 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP 242
+            +  ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G  C + +    A  + F  IP
+Sbjct: 172 -DQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDG-SCKYRAEYAVANDTGFVDIP 229
+
+Query: 243 KNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSAKNTI 299
+           + E  +   + + GP+++A DA     QFY  G++  P C+   LDHG+L+VGY  + T 
+Sbjct: 230 QQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLVVGYGYEGTD 289
+
+Query: 300 FRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+             K+  YW+VKNSWG +WG  GYI + + + N CG++   S  I+
+Sbjct: 290 SNKDK-YWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIV 333
+
+
+>sp|P06797|CATL_MOUSE CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 334
+
+ Score =  431 bits (1096), Expect = e-120
+ Identities = 120/347 (34%), Positives = 189/347 (53%), Gaps = 18/347 (5%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           M ++LL  +      +++        +++ +++    + Y   E   R  I++ N+  I+
+Sbjct: 1   MNLLLLLAVLCLGTALATPKFDQTFSAEWHQWKSTHRRLYGTNEEEWRRAIWEKNMRMIQ 60
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+             N    N +      +N F D++++EF+      +               +  +  IP 
+Sbjct: 61  LHNGEYSNGQHGFSMEMNAFGDMTNEEFRQVVNGYRHQKH----KKGRLFQEPLMLKIPK 116
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           + DWR +G VTPVKNQGQCGSCW+FS +G +EGQ F+   KL+SLSEQNLVDC H     
+Sbjct: 117 SVDWREKGCVTPVKNQGQCGSCWAFSASGCLEGQMFLKTGKLISLSEQNLVDCSHA---- 172
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+                 ++GCNGGL   A+ YI +NGG+ +E SYPY A+ G  C + +    A  + F  
+Sbjct: 173 ----QGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDG-SCKYRAEFAVANDTGFVD 227
+
+Query: 241 IPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSAKN 297
+           IP+ E  +   + + GP+++A DA     QFY  G++  P C+  +LDHG+L+VGY  + 
+Sbjct: 228 IPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKNLDHGVLLVGYGYEG 287
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+           T   KN  YW+VKNSWG++WG +GYI + + + N CG++   S  ++
+Sbjct: 288 TDSNKNK-YWLVKNSWGSEWGMEGYIKIAKDRDNHCGLATAASYPVV 333
+
+
+>sp|Q28944|CATL_PIG CATHEPSIN L PRECURSOR
+          Length = 334
+
+ Score =  430 bits (1094), Expect = e-120
+ Identities = 115/343 (33%), Positives = 171/343 (49%), Gaps = 19/343 (5%)
+
+Query: 7   FVLAVFTVFVSSRG--IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+             L    + ++S    +     + + +++    + Y   E   R  +++ N+  IE  N 
+Sbjct: 5   LFLTALCLGIASAAPKLDQNLDADWYKWKATHGRLYGMNEEGWRRAVWEKNMKMIELHNQ 64
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDW 124
+                K      +N F D++++EF+                      +  +  +P + DW
+Sbjct: 65  EYSQGKHGFSMAMNAFGDMTNEEFRQVMNG----FQNQKHKKGKVFHESLVLEVPKSVDW 120
+
+Query: 125 RTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEE 184
+           R +G VT VKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC           
+Sbjct: 121 REKGYVTAVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCS--------RP 172
+
+Query: 185 ACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN 244
+             ++GCNGGL  NA+ Y+  NGG+ TE SYPY       C +      A  + F  IP+ 
+Sbjct: 173 QGNQGCNGGLMDNAFQYVKDNGGLDTEESYPYLGRETNSCTYKPECSAANDTGFVDIPQR 232
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY  + T   
+Sbjct: 233 EKALMKAVATVGPISVAIDAGHSSFQFYKSGIYYDPDCSSKDLDHGVLVVGYGFEGTDSN 292
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+            +  +WIVKNSWG +WG  GY+ + +   N CG+S   S   +
+Sbjct: 293 SSK-FWIVKNSWGPEWGWNGYVKMAKDQNNHCGISTAASYPTV 334
+
+
+>sp|P07711|CATL_HUMAN CATHEPSIN L PRECURSOR (MAJOR EXCRETED PROTEIN) (MEP)
+          Length = 333
+
+ Score =  421 bits (1072), Expect = e-118
+ Identities = 118/343 (34%), Positives = 180/343 (52%), Gaps = 20/343 (5%)
+
+Query: 7   FVLAVFTVFVSSRGIPPE--EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNL 64
+            +LA F + ++S  +  +   ++Q+ +++   N+ Y   E   R  +++ N+  IE  N 
+Sbjct: 5   LILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQ 64
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDW 124
+                K      +N F D++S+EF+                      +      P + DW
+Sbjct: 65  EYREGKHSFTMAMNAFGDMTSEEFRQVMNG----FQNRKPRKGKVFQEPLFYEAPRSVDW 120
+
+Query: 125 RTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEE 184
+           R +G VTPVKNQGQCGSCW+FS TG +EGQ F    +L+SLSEQNLVDC           
+Sbjct: 121 REKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCS--------GP 172
+
+Query: 185 ACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN 244
+             +EGCNGGL   A+ Y+  NGG+ +E SYPY A     C +N     A  + F  IPK 
+Sbjct: 173 QGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATE-ESCKYNPKYSVANDTGFVDIPKQ 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGVF-DIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   + + GP+++A DA    + FY  G++ +  C+   +DHG+L+VGY  ++T   
+Sbjct: 232 EKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESD 291
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+            N  YW+VKNSWG +WG  GY+ + +  +N CG+++  S   +
+Sbjct: 292 NNK-YWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTV 333
+
+
+>sp|O60911|CATM_HUMAN CATHEPSIN L2 PRECURSOR (CATHEPSIN V)
+          Length = 334
+
+ Score =  417 bits (1061), Expect = e-116
+ Identities = 118/346 (34%), Positives = 179/346 (51%), Gaps = 21/346 (6%)
+
+Query: 5   LLFVLAVFTVFVSSRGI--PPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEEL 62
+           L  VLA F + ++S          +++ +++    + Y   E   R  +++ N+  IE  
+Sbjct: 3   LSLVLAAFCLGIASAVPKFDQNLDTKWYQWKATHRRLYGANEEGWRRAVWEKNMKMIELH 62
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAF 122
+           N      K      +N F D++++EF+      +   F           +     +P + 
+Sbjct: 63  NGEYSQGKHGFTMAMNAFPDMTNEEFRQMMGCFRNQKFR----KGKVFREPLFLDLPKSV 118
+
+Query: 123 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 182
+           DWR +G VTPVKNQ QCGSCW+FS TG +EGQ F    KLVSLSEQNLVDC         
+Sbjct: 119 DWRKKGYVTPVKNQKQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVDCS-------- 170
+
+Query: 183 EEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMI- 241
+               ++GCNGG    A+ Y+ +NGG+ +E SYPY A     C +   N  A  + FT++ 
+Sbjct: 171 RPQGNQGCNGGFMARAFQYVKENGGLDSEESYPYVAVD-EICKYRPENSVANDTGFTVVA 229
+
+Query: 242 PKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVF-DIPCNPNSLDHGILIVGYSAKNT 298
+           P  E  +   + + GP+++A DA    +QFY  G++ +  C+  +LDHG+L+VGY  +  
+Sbjct: 230 PGKEKALMKAVATVGPISVAMDAGHSSFQFYKSGIYFEPDCSSKNLDHGVLVVGYGFE-G 288
+
+Query: 299 IFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+               N  YW+VKNSWG +WG  GY+ + + K N CG++   S   +
+Sbjct: 289 ANSNNSKYWLVKNSWGPEWGSNGYVKIAKDKNNHCGIATAASYPNV 334
+
+
+>sp|P43296|RD19_ARATH CYSTEINE PROTEINASE RD19A PRECURSOR
+          Length = 368
+
+ Score =  412 bits (1048), Expect = e-115
+ Identities = 148/356 (41%), Positives = 199/356 (55%), Gaps = 28/356 (7%)
+
+Query: 6   LFVLAVFTVFVSSRGIP------------------PEEQSQFLEFQDKFNKKY-SHEEYL 46
+           +FVL+ F V VSS  +                      +  F  F+ KF K Y S+EE+ 
+Sbjct: 10  VFVLSFFIVSVSSSDVNDGDDLVIRQVVGGAEPQVLTSEDHFSLFKRKFGKVYASNEEHD 69
+
+Query: 47  ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPV 106
+            RF +FK+NL +      +          GV +F+DL+  EF+  +L  +          
+Sbjct: 70  YRFSVFKANLRRARRHQKLDP----SATHGVTQFSDLTRSEFRKKHLGVRSGFKLP--KD 123
+
+Query: 107 ADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLS 166
+           A+        ++P  FDWR  GAVTPVKNQG CGSCWSFS TG +EG +F++  KLVSLS
+Sbjct: 124 ANKAPILPTENLPEDFDWRDHGAVTPVKNQGSCGSCWSFSATGALEGANFLATGKLVSLS 183
+
+Query: 167 EQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNF 226
+           EQ LVDCDHEC + E  ++CD GCNGGL  +A+ Y +K GG+  E  YPYT + G  C  
+Sbjct: 184 EQQLVDCDHEC-DPEEADSCDSGCNGGLMNSAFEYTLKTGGLMKEEDYPYTGKDGKTCKL 242
+
+Query: 227 NSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDH 286
+           + + I A +SNF++I  +E  +A  +V  GPLA+A +A   Q YIGGV         L+H
+Sbjct: 243 DKSKIVASVSNFSVISIDEEQIAANLVKNGPLAVAINAGYMQTYIGGVSCPYICTRRLNH 302
+
+Query: 287 GILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVST 340
+           G+L+VGY A        K  PYWI+KNSWG  WGE G+  + +G+N CGV + VST
+Sbjct: 303 GVLLVGYGAAGYAPARFKEKPYWIIKNSWGETWGENGFYKICKGRNICGVDSMVST 358
+
+
+>sp|P25776|ORYA_ORYSA ORYZAIN ALPHA CHAIN PRECURSOR
+          Length = 458
+
+ Score =  407 bits (1036), Expect = e-113
+ Identities = 121/350 (34%), Positives = 183/350 (51%), Gaps = 27/350 (7%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQ--FLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           ++LL  LA   + + S G   EE+++  + E++ +  K Y+   E   R+  F+ NL  I
+Sbjct: 12  LLLLLSLAAADMSIVSYGERSEEEARRLYAEWKAEHGKSYNAVGEEERRYAAFRDNLRYI 71
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           +E N  A       + G+N+FADL+++E+++ YL  +     +   V+D        ++P
+Sbjct: 72  DEHNAAADAGVHSFRLGLNRFADLTNEEYRDTYLGLRNKPRRER-KVSDRYLAADNEALP 130
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+            + DWRT+GAV  +K+QG CGSCW+FS    VE  + I    L+SLSEQ LVDCD     
+Sbjct: 131 ESVDWRTKGAVAEIKDQGGCGSCWAFSAIAAVEDINQIVTGDLISLSEQELVDCD----- 185
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANI-GAKISNF 238
+                + +EGCNGGL   A+++II NGGI TE  YPY  +   +C+ N  N     I ++
+Sbjct: 186 ----TSYNEGCNGGLMDYAFDFIINNGGIDTEDDYPYKGKD-ERCDVNRKNAKVVTIDSY 240
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAK 296
+             +  N        V   P+++A +A    +Q Y  G+F   C   +LDHG+  VGY  +
+Sbjct: 241 EDVTPNSETSLQKAVRNQPVSVAIEAGGRAFQLYSSGIFTGKCG-TALDHGVAAVGYGTE 299
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           N        YWIV+NSWG  WGE GY+ + R        CG++   S  +
+Sbjct: 300 N-----GKDYWIVRNSWGKSWGESGYVRMERNIKASSGKCGIAVEPSYPL 344
+
+
+>sp|P25782|CYS2_HOMAM DIGESTIVE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 323
+
+ Score =  403 bits (1024), Expect = e-112
+ Identities = 132/349 (37%), Positives = 187/349 (52%), Gaps = 32/349 (9%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKI 59
+           MKV +LF+  V     S           +  F+ K+ ++Y   EE   R  IF+ N   I
+Sbjct: 1   MKVAVLFLCGVALAAASPS---------WEHFKGKYGRQYVDAEEDSYRRVIFEQNQKYI 51
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           EE N    N +      +NKF D++ +EF      N   I     PV+ +   +      
+Sbjct: 52  EEFNKKYENGEVTFNLAMNKFGDMTLEEFNAVMKGN---IPRRSAPVSVFYPKKETGPQA 108
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+           T  DWRT+GAVTPVK+QGQCGSCW+FSTTG++EGQHF+    L+SL+EQ LVDC      
+Sbjct: 109 TEVDWRTKGAVTPVKDQGQCGSCWAFSTTGSLEGQHFLKTGSLISLAEQQLVDCS----- 163
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                   +GCNGG   +A++YI  N GI TE++YPY A  G  C F+S ++ A  S  T
+Sbjct: 164 ---RPYGPQGCNGGWMNDAFDYIKANNGIDTEAAYPYEARDG-SCRFDSNSVAATCSGHT 219
+
+Query: 240 MI-PKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGYSA 295
+            I   +ET +   +   GP+++  DA    +QFY  GV+  P C+P+ LDH +L VGY +
+Sbjct: 220 NIASGSETGLQQAVRDIGPISVTIDAAHSSFQFYSSGVYYEPSCSPSYLDHAVLAVGYGS 279
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+           +         +W+VKNSW   WG+ GYI + R + N CG++   S  ++
+Sbjct: 280 EG-----GQDFWLVKNSWATSWGDAGYIKMSRNRNNNCGIATVASYPLV 323
+
+
+>sp|P43295|A494_ARATH PROBABLE CYSTEINE PROTEINASE A494 PRECURSOR
+          Length = 313
+
+ Score =  402 bits (1021), Expect = e-112
+ Identities = 141/312 (45%), Positives = 187/312 (59%), Gaps = 10/312 (3%)
+
+Query: 32  FQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKN 90
+           F+ KF K Y   EE+  RF +FK+NL +      +  + +     GV +F+DL+  EF+ 
+Sbjct: 3   FKKKFGKVYGSIEEHYYRFSVFKANLLRAMRHQKMDPSARH----GVTQFSDLTRSEFRR 58
+
+Query: 91  YYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGN 150
+            +L  K          A+        ++P  FDWR RGAVTPVKNQG CGSCWSFSTTG 
+Sbjct: 59  KHLGVKGGFKLP--KDANQAPILPTQNLPEEFDWRDRGAVTPVKNQGSCGSCWSFSTTGA 116
+
+Query: 151 VEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT 210
+           +EG HF++  KLVSLSEQ LVDCDHEC + E E +CD GCNGGL  +A+ Y +K GG+  
+Sbjct: 117 LEGAHFLATGKLVSLSEQQLVDCDHEC-DPEEEGSCDSGCNGGLMNSAFEYTLKTGGLMR 175
+
+Query: 211 ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFY 270
+           E  YPYT   G  C  + + I A +SNF+++  NE  +A  ++  GPLA+A +A   Q Y
+Sbjct: 176 EKDYPYTGTDGGSCKLDRSKIVASVSNFSVVSINEDQIAANLIKNGPLAVAINAAYMQTY 235
+
+Query: 271 IGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGEQGYIYLRRG 328
+           IGGV         L+HG+L+VGY +        K  PYWI+KNSWG  WGE G+  + +G
+Sbjct: 236 IGGVSCPYICSRRLNHGVLLVGYGSAGFSQARLKEKPYWIIKNSWGESWGENGFYKICKG 295
+
+Query: 329 KNTCGVSNFVST 340
+           +N CGV + VST
+Sbjct: 296 RNICGVDSLVST 307
+
+
+>sp|P43235|CATK_HUMAN CATHEPSIN K PRECURSOR (CATHEPSIN O) (CATHEPSIN X) (CATHEPSIN O2)
+          Length = 329
+
+ Score =  401 bits (1020), Expect = e-111
+ Identities = 121/341 (35%), Positives = 179/341 (52%), Gaps = 25/341 (7%)
+
+Query: 9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + V S  + PEE   + +  ++    K+Y+++ + + R  I++ NL  I   NL 
+Sbjct: 4   LKVLLLPVVSFALYPEEILDTHWELWKKTHRKQYNNKVDEISRRLIWEKNLKYISIHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+           A       +  +N   D++S+E        K  +         Y+  E+    P + D+R
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLKVPLSHSRSNDTLYIP-EWEGRAPDSVDYR 122
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E         
+Sbjct: 123 KKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE--------- 173
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KN 244
+            ++GC GG   NA+ Y+ KN GI +E +YPY  +    C +N     AK   +  IP  N
+Sbjct: 174 -NDGCGGGYMTNAFQYVQKNRGIDSEDAYPYVGQE-ESCMYNPTGKAAKCRGYREIPEGN 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADAV--EWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   +   GP+++A DA    +QFY  GV +D  CN ++L+H +L VGY       +
+Sbjct: 232 EKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDESCNSDNLNHAVLAVGYG-----IQ 286
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVSTS 341
+           K   +WI+KNSWG +WG +GYI + R KN  CG++N  S  
+Sbjct: 287 KGNKHWIIKNSWGENWGNKGYILMARNKNNACGIANLASFP 327
+
+
+>sp|P25804|CYSP_PEA CYSTEINE PROTEINASE 15A PRECURSOR (TURGOR-RESPONSIVE PROTEIN 15A)
+          Length = 363
+
+ Score =  399 bits (1015), Expect = e-111
+ Identities = 141/322 (43%), Positives = 196/322 (60%), Gaps = 12/322 (3%)
+
+Query: 23  PEEQSQFLEFQDKFNKKYSHEEYL-ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFA 81
+              +  F  F+ KF+K Y+ +E    RF +FKSNL K +       N     + G+ KF+
+Sbjct: 42  LNAEHHFTSFKSKFSKSYATKEEHDYRFGVFKSNLIKAKLHQ----NRDPTAEHGITKFS 97
+
+Query: 82  DLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGS 141
+           DL++ EF+  +L  K+ +       A         ++P  FDWR +GAVTPVK+QG CGS
+Sbjct: 98  DLTASEFRRQFLGLKKRLRLPAH--AQKAPILPTTNLPEDFDWREKGAVTPVKDQGSCGS 155
+
+Query: 142 CWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY 201
+           CW+FSTTG +EG H+++  KLVSLSEQ LVDCDH C + E   +CD GCNGGL  NA+ Y
+Sbjct: 156 CWAFSTTGALEGAHYLATGKLVSLSEQQLVDCDHVC-DPEQAGSCDSGCNGGLMNNAFEY 214
+
+Query: 202 IIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIA 261
+           ++++GG+  E  Y YT   G  C F+ + + A +SNF+++  +E  +A  +V  GPLA+A
+Sbjct: 215 LLESGGVVQEKDYAYTGRDG-SCKFDKSKVVASVSNFSVVTLDEDQIAANLVKNGPLAVA 273
+
+Query: 262 ADAVEWQFYIGGVFD-IPCNPNSLDHGILIVGY--SAKNTIFRKNMPYWIVKNSWGADWG 318
+            +A   Q Y+ GV     C  + LDHG+L+VG+   A   I  K  PYWI+KNSWG +WG
+Sbjct: 274 INAAWMQTYMSGVSCPYVCAKSRLDHGVLLVGFGKGAYAPIRLKEKPYWIIKNSWGQNWG 333
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVST 340
+           EQGY  + RG+N CGV + VST
+Sbjct: 334 EQGYYKICRGRNVCGVDSMVST 355
+
+
+>sp|P04989|CYS2_DICDI CYSTEINE PROTEINASE 2 PRECURSOR (PRESTALK CATHEPSIN)
+          Length = 376
+
+ Score =  398 bits (1012), Expect = e-111
+ Identities = 145/386 (37%), Positives = 211/386 (54%), Gaps = 55/386 (14%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIP-------PEEQSQFLEFQDKFNKKYSHEEYLERFEIFK 53
+           M++++  +L +F  F  +   P        + ++ F E+  KFN++YS  E+  R+ IFK
+Sbjct: 1   MRLLVFLILLIFVNFSFANVRPNGRRFSESQYRTAFTEWTLKFNRQYSSSEFSNRYSIFK 60
+
+Query: 54  SNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKE-AIFTDDLPVADYLDD 112
+           SN+  ++  N       + T  G+N FAD++++E++  YL  +  A   +     + L+ 
+Sbjct: 61  SNMDYVDNWNSK---GDSQTVLGLNNFADITNEEYRKTYLGTRVNAHSYNGYDGREVLNV 117
+
+Query: 113 EFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 172
+           E + + P + DWRT+ AVTP+K+QGQCGSCWSFSTTG+ EG H +   KLVSLSEQNLVD
+Sbjct: 118 EDLQTNPKSIDWRTKNAVTPIKDQGQCGSCWSFSTTGSTEGAHALKTKKLVSLSEQNLVD 177
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 232
+           C      +        GC+GGL  NA++YIIKN GI TESSYPYTAETG+ C FN ++IG
+Sbjct: 178 CSGPEENF--------GCDGGLMNNAFDYIIKNKGIDTESSYPYTAETGSTCLFNKSDIG 229
+
+Query: 233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGIL 289
+           A I  +  I     +        GP+++A DA    +Q Y  G++  P C+P  LDHG+L
+Sbjct: 230 ATIKGYVNITAGSEISLENGAQHGPVSVAIDASHNSFQLYTSGIYYEPKCSPTELDHGVL 289
+
+Query: 290 IVGYSA--------------------------------KNTIFRKNMPYWIVKNSWGADW 317
+           +VGY                                   +++  K   YWIVKNSWG  W
+Sbjct: 290 VVGYGVQGKDDEGPVLNRKQTIVIHKNEDNKVESSDDSSDSVRPKANNYWIVKNSWGTSW 349
+
+Query: 318 GEQGYIYLRRG-KNTCGVSNFVSTSI 342
+           G +GYI + +  KN CG+++  S  +
+Sbjct: 350 GIKGYILMSKDRKNNCGIASVSSYPL 375
+
+
+>sp|P43236|CATK_RABIT CATHEPSIN K PRECURSOR (OC-2 PROTEIN)
+          Length = 329
+
+ Score =  395 bits (1005), Expect = e-110
+ Identities = 118/341 (34%), Positives = 178/341 (51%), Gaps = 25/341 (7%)
+
+Query: 9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + V S  + PEE   +Q+  ++  ++K+Y+ + + + R  I++ NL  I   NL 
+Sbjct: 4   LKVLLLPVVSFALHPEEILDTQWELWKKTYSKQYNSKVDEISRRLIWEKNLKHISIHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+           A       +  +N   D++S+E        K            Y+  ++    P + D+R
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLKVPPSRSHSNDTLYIP-DWEGRTPDSIDYR 122
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E         
+Sbjct: 123 KKGYVTPVKNQGQCGSCWAFSSVGALEGQLKKKTGKLLNLSPQNLVDCVSE--------- 173
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KN 244
+            + GC GG   NA+ Y+ +N GI +E +YPY  +    C +N     AK   +  IP  N
+Sbjct: 174 -NYGCGGGYMTNAFQYVQRNRGIDSEDAYPYVGQD-ESCMYNPTGKAAKCRGYREIPEGN 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADAV--EWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   +   GP+++A DA    +QFY  GV +D  C+ ++++H +L VGY       +
+Sbjct: 232 EKALKRAVARVGPVSVAIDASLTSFQFYSKGVYYDENCSSDNVNHAVLAVGYG-----IQ 286
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVSTS 341
+           K   +WI+KNSWG  WG +GYI + R KN  CG++N  S  
+Sbjct: 287 KGNKHWIIKNSWGESWGNKGYILMARNKNNACGIANLASFP 327
+
+
+>sp|P54640|CYS5_DICDI CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score =  394 bits (1001), Expect = e-109
+ Identities = 138/362 (38%), Positives = 200/362 (55%), Gaps = 37/362 (10%)
+
+Query: 1   MKVI-LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKI 59
+           MKV+  L VL V       +    + ++ F ++     K Y+ EE+  R+ IF +N+  +
+Sbjct: 1   MKVLSFLCVLLVSVATAKQQFSELQYRNAFTDWMITHQKSYTSEEFGARYNIFTANMDYV 60
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           ++ N    +  ++T  G+N FAD++++E++N YL  K   F     +    +    NS  
+Sbjct: 61  QQWN----SKGSETVLGLNNFADITNEEYRNTYLGTK---FDASSLIGTQEEKVHTNSSA 113
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+            + DWR+ GAVTPVKNQGQCG CWSFSTTG+ EG HF S+ +LVSLSEQNL+DC  E   
+Sbjct: 114 ASKDWRSEGAVTPVKNQGQCGGCWSFSTTGSTEGAHFQSKGELVSLSEQNLIDCSTE--- 170
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                  + GC+GGL   A+ YII N GI TESSYPY AE G +C + S N GA +S++ 
+Sbjct: 171 -------NSGCDGGLMTYAFEYIINNNGIDTESSYPYKAENG-KCEYKSENSGATLSSYK 222
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYS-- 294
+            +           V+  P+++A DA    +Q Y  G++  P C+  +LDHG+L VGY   
+Sbjct: 223 TVTAGSESSLESAVNVNPVSVAIDASHQSFQLYTSGIYYEPECSSENLDHGVLAVGYGSG 282
+
+Query: 295 ------------AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+                       + N     +  YWIVKNSWG  WG +GYI + R + N CG+++  S  
+Sbjct: 283 SGSSSGQSSGQSSGNLSASSSNEYWIVKNSWGTSWGIEGYILMSRNRDNNCGIASSASFP 342
+
+Query: 342 II 343
+           ++
+Sbjct: 343 VV 344
+
+
+>sp|P13277|CYS1_HOMAM DIGESTIVE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 322
+
+ Score =  393 bits (998), Expect = e-109
+ Identities = 130/351 (37%), Positives = 186/351 (52%), Gaps = 37/351 (10%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKI 59
+           MKV+ LF+  +     +           + EF+ KF +KY   EE   R  +F  NL  I
+Sbjct: 1   MKVVALFLFGLALAAANPS---------WEEFKGKFGRKYVDLEEERYRLNVFLDNLQYI 51
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           EE N      +      +N+F+D+++++F       K+      +  +        ++ P
+Sbjct: 52  EEFNKKYERGEVTYNLAINQFSDMTNEKFNAVMKGYKKGPRPAAVFTS-------TDAAP 104
+
+Query: 120 TA--FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+            +   DWRT+GAVTPVK+QGQCGSCW+FSTTG +EGQHF+   +LVSLSEQ LVDC    
+Sbjct: 105 ESTEVDWRTKGAVTPVKDQGQCGSCWAFSTTGGIEGQHFLKTGRLVSLSEQQLVDC---- 160
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+               G    ++GCNGG    A  Y+  NGG+ TESSYPY A   T C FNS  IGA  + 
+Sbjct: 161 ---AGGSYYNQGCNGGWVERAIMYVRDNGGVDTESSYPYEARDNT-CRFNSNTIGATCTG 216
+
+Query: 238 FTMIPK-NETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGY 293
+           +  I + +E+ +       GP+++A DA    +Q Y  GV+  P C+ + LDH +L VGY
+Sbjct: 217 YVGIAQGSESALKTATRDIGPISVAIDASHRSFQSYYTGVYYEPSCSSSQLDHAVLAVGY 276
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+            ++         +W+VKNSW   WGE GYI + R + N CG++       +
+Sbjct: 277 GSEG-----GQDFWLVKNSWATSWGESGYIKMARNRNNNCGIATDACYPTV 322
+
+
+>sp|P25784|CYS3_HOMAM DIGESTIVE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 321
+
+ Score =  392 bits (997), Expect = e-109
+ Identities = 126/348 (36%), Positives = 185/348 (52%), Gaps = 32/348 (9%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKI 59
+           MKV  LF+  +     S           +  F+ ++ +KY   +E L R  +F+ N   I
+Sbjct: 1   MKVAALFLCGLALATASPS---------WDHFKTQYGRKYGDAKEELYRQRVFQQNEQLI 51
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           E+ N    N +   K  +N+F D++++EF       K+            +       + 
+Sbjct: 52  EDFNKKFENGEVTFKVAMNQFGDMTNEEFNAVMKGYKKG----SRGEPKAVFTAEAGPMA 107
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+              DWRT+  VTPVK+Q QCGSCW+FS TG +EGQHF+  ++LVSLSEQ LVDC      
+Sbjct: 108 ADVDWRTKALVTPVKDQEQCGSCWAFSATGALEGQHFLKNDELVSLSEQQLVDCST---- 163
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+               +  ++GC GG   +A++YI  NGGI TESSYPY AE    C F++ +IGA  +   
+Sbjct: 164 ----DYGNDGCGGGWMTSAFDYIKDNGGIDTESSYPYEAED-RSCRFDANSIGAICTGSV 218
+
+Query: 240 MIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDI-PCNPNSLDHGILIVGYSAK 296
+            +   E  +   +   GP+++A DA    +QFY  GV+    C+P  LDHG+L VGY  +
+Sbjct: 219 EVQHTEEALQEAVSGVGPISVAIDASHFSFQFYSSGVYYEQNCSPTFLDHGVLAVGYGTE 278
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTSII 343
+           +T       YW+VKNSWG+ WG+ GYI + R + N CG+++  S   +
+Sbjct: 279 ST-----KDYWLVKNSWGSSWGDAGYIKMSRNRDNNCGIASEPSYPTV 321
+
+
+>sp|P25774|CATS_HUMAN CATHEPSIN S PRECURSOR
+          Length = 331
+
+ Score =  392 bits (996), Expect = e-109
+ Identities = 112/342 (32%), Positives = 172/342 (49%), Gaps = 21/342 (6%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELN 63
+           L+ VL V +  V+     P     +  ++  + K+Y    E   R  I++ NL  +   N
+Sbjct: 4   LVCVLLVCSSAVAQLHKDPTLDHHWHLWKKTYGKQYKEKNEEAVRRLIWEKNLKFVMLHN 63
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+           L           G+N   D++S+E  +   + +               +     +P + D
+Sbjct: 64  LEHSMGMHSYDLGMNHLGDMTSEEVMSLTSSLRVPSQWQRNITYKSNPNRI---LPDSVD 120
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR +G VT VK QG CG+CW+FS  G +E Q  +   KLV+LS QNLVDC  E       
+Sbjct: 121 WREKGCVTEVKYQGSCGACWAFSAVGALEAQLKLKTGKLVTLSAQNLVDCSTE------- 173
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP- 242
+           +  ++GCNGG    A+ YII N GI +++SYPY A    +C ++S    A  S +T +P 
+Sbjct: 174 KYGNKGCNGGFMTTAFQYIIDNKGIDSDASYPYKAMD-QKCQYDSKYRAATCSKYTELPY 232
+
+Query: 243 KNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIF 300
+             E V+   + + GP+++  DA    +  Y  GV+  P    +++HG+L+VGY   N   
+Sbjct: 233 GREDVLKEAVANKGPVSVGVDARHPSFFLYRSGVYYEPSCTQNVNHGVLVVGYGDLN--- 289
+
+Query: 301 RKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+                YW+VKNSWG ++GE+GYI + R K N CG+++F S  
+Sbjct: 290 --GKEYWLVKNSWGHNFGEEGYIRMARNKGNHCGIASFPSYP 329
+
+
+>sp|P15242|TES1_RAT TESTIN 1/2 PRECURSOR (CMB-22/CMB-23)
+          Length = 333
+
+ Score =  391 bits (995), Expect = e-109
+ Identities = 112/347 (32%), Positives = 175/347 (50%), Gaps = 20/347 (5%)
+
+Query: 3   VILLFVLAVFTVFVSSRGI--PPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           +I +  LA+  + V S      P    ++ E++ K  K Y+  E   +  +++ N   IE
+Sbjct: 1   MIAVLFLAILCLEVDSTAPTPDPSLDVEWNEWRTKHGKTYNMNEERLKRAVWEKNFKMIE 60
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPT 120
+             N   +  + D    +N F DL++ EF               +       D     +P 
+Sbjct: 61  LHNWEYLEGRHDFTMAMNAFGDLTNIEFVKMMTG----FQRQKIKKTHIFQDHQFLYVPK 116
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+             DWR  G VTPVKNQG C S W+FS TG++EGQ F    +L+ LSEQNL+DC    + +
+Sbjct: 117 RVDWRQLGYVTPVKNQGHCASSWAFSATGSLEGQMFRKTERLIPLSEQNLLDCMGSNVTH 176
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTM 240
+                   GC+GG    A+ Y+  NGG+ TE SYPY  + G +C +++ N  A + +F  
+Sbjct: 177 --------GCSGGFMQYAFQYVKDNGGLATEESYPYRGQ-GRECRYHAENSAANVRDFVQ 227
+
+Query: 241 IPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSAKN 297
+           IP +E  +   +   GP+++A DA    +QFY  G++  P C    L+H +L+VGY  + 
+Sbjct: 228 IPGSEEALMKAVAKVGPISVAVDASHGSFQFYGSGIYYEPQCKRVHLNHAVLVVGYGFE- 286
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+                   +W+VKNSWG +WG +GY+ L +   N CG++ + +  I+
+Sbjct: 287 GEESDGNSFWLVKNSWGEEWGMKGYMKLAKDWSNHCGIATYSTYPIV 333
+
+
+>sp|Q10716|CYS1_MAIZE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 371
+
+ Score =  391 bits (993), Expect = e-108
+ Identities = 141/368 (38%), Positives = 201/368 (54%), Gaps = 34/368 (9%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEE-------------------QSQFLEFQDKFNKKYS 41
+           M   +L +L++ +    +  +  E+                   +S FL F  +F K Y 
+Sbjct: 1   MAHRVLLLLSLASAAAVAAAVDAEDPLIRQVVPGGDDNDLELNAESHFLSFVQRFGKSYK 60
+
+Query: 42  H-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN---NKE 97
+             +E+  R  +FK NL +     L+        + GV KF+DL+  EF+  YL    ++ 
+Sbjct: 61  DADEHAYRLSVFKDNLRRARRHQLLDP----SAEHGVTKFSDLTPAEFRRTYLGLRKSRR 116
+
+Query: 98  AIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFI 157
+           A+  +    A        + +P  FDWR  GAV PVKNQG CGSCWSFS +G +EG H++
+Sbjct: 117 ALLRELGESAHEAPVLPTDGLPDDFDWRDHGAVGPVKNQGSCGSCWSFSASGALEGAHYL 176
+
+Query: 158 SQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYT 217
+           +  KL  LSEQ  VDCDHEC +    ++CD GCNGGL   A++Y+ K GG+++E  YPYT
+Sbjct: 177 ATGKLEVLSEQQFVDCDHEC-DSSEPDSCDSGCNGGLMTTAFSYLQKAGGLESEKDYPYT 235
+
+Query: 218 AETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDI 277
+              G +C F+ + I A + NF+++  +E  ++  ++  GPLAI  +A   Q YIGGV   
+Sbjct: 236 GSDG-KCKFDKSKIVASVQNFSVVSVDEAQISANLIKHGPLAIGINAAYMQTYIGGVSCP 294
+
+Query: 278 PCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSWGADWGEQGYIYLRRG---KNTC 332
+                 LDHG+L+VGY A     I  K+ PYWI+KNSWG +WGE GY  + RG   +N C
+Sbjct: 295 YICGRHLDHGVLLVGYGASGFAPIRLKDKPYWIIKNSWGENWGENGYYKICRGSNVRNKC 354
+
+Query: 333 GVSNFVST 340
+           GV + VST
+Sbjct: 355 GVDSMVST 362
+
+
+>sp|P12412|CYSP_VIGMU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE)
+           (SULFHYDRYL-ENDOPEPTIDASE) (SH-EP)
+          Length = 362
+
+ Score =  389 bits (989), Expect = e-108
+ Identities = 131/360 (36%), Positives = 187/360 (51%), Gaps = 36/360 (10%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQ---------FLEFQDKFNKKYSHEEYLERFEI 51
+           MK +L  VL++  V   +      E+           +  ++       S  E  +RF +
+Sbjct: 3   MKKLLWVVLSLSLVLGVANSFDFHEKDLESEESLWDLYERWRSHHTVSRSLGEKHKRFNV 62
+
+Query: 52  FKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAI---FTDDLPVAD 108
+           FK+N+  +   N +        K  +NKFAD+++ EF++ Y  +K      F      + 
+Sbjct: 63  FKANVMHVHNTNKMDKP----YKLKLNKFADMTNHEFRSTYAGSKVNHHKMFRGSQHGSG 118
+
+Query: 109 YLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQ 168
+               E + S+P + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  NKLVSLSEQ
+Sbjct: 119 TFMYEKVGSVPASVDWRKKGAVTDVKDQGQCGSCWAFSTIVAVEGINQIKTNKLVSLSEQ 178
+
+Query: 169 NLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNS 228
+            LVDCD E          ++GCNGGL  +A+ +I + GGI TES+YPYTA+ GT      
+Sbjct: 179 ELVDCDKE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYTAQEGTCDESKV 229
+
+Query: 229 ANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDH 286
+            ++   I     +P N+       V+  P+++A DA   ++QFY  GVF   C    L+H
+Sbjct: 230 NDLAVSIDGHENVPVNDENALLKAVANQPVSVAIDAGGSDFQFYSEGVFTGDC-NTDLNH 288
+
+Query: 287 GILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           G+ IVGY            YWIV+NSWG +WGEQGYI ++R     +  CG++   S  I
+Sbjct: 289 GVAIVGYGTTV----DGTNYWIVRNSWGPEWGEQGYIRMQRNISKKEGLCGIAMMASYPI 344
+
+
+>sp|P43297|RD21_ARATH CYSTEINE PROTEINASE RD21A PRECURSOR
+          Length = 462
+
+ Score =  387 bits (984), Expect = e-107
+ Identities = 120/330 (36%), Positives = 167/330 (50%), Gaps = 29/330 (8%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKYSHE---EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN 78
+             E  S +  +  K  K  S     E   RFEIFK NL  ++E N   +      + G+ 
+Sbjct: 43  EAEVMSIYEAWLVKHGKAQSQNSLVEKDRRFEIFKDNLRFVDEHNEKNL----SYRLGLT 98
+
+Query: 79  KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQ 138
+           +FADL++DE+++ YL  K      +   +   +    + +P + DWR +GAV  VK+QG 
+Sbjct: 99  RFADLTNDEYRSKYLGAKME-KKGERRTSLRYEARVGDELPESIDWRKKGAVAEVKDQGG 157
+
+Query: 139 CGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNA 198
+           CGSCW+FST G VEG + I    L++LSEQ LVDCD          + +EGCNGGL   A
+Sbjct: 158 CGSCWAFSTIGAVEGINQIVTGDLITLSEQELVDCD---------TSYNEGCNGGLMDYA 208
+
+Query: 199 YNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+           + +IIKNGGI T+  YPY    GT            I ++  +P          V+  P+
+Sbjct: 209 FEFIIKNGGIDTDKDYPYKGVDGTCDQIRKNAKVVTIDSYEDVPTYSEESLKKAVAHQPI 268
+
+Query: 259 AIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGAD 316
+           +IA +A    +Q Y  G+FD  C    LDHG++ VGY  +N        YWIV+NSWG  
+Sbjct: 269 SIAIEAGGRAFQLYDSGIFDGSCG-TQLDHGVVAVGYGTEN-----GKDYWIVRNSWGKS 322
+
+Query: 317 WGEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           WGE GY+ + R        CG++   S  I
+Sbjct: 323 WGESGYLRMARNIASSSGKCGIAIEPSYPI 352
+
+
+>sp|P55097|CATK_MOUSE CATHEPSIN K PRECURSOR
+          Length = 329
+
+ Score =  384 bits (976), Expect = e-106
+ Identities = 115/341 (33%), Positives = 178/341 (51%), Gaps = 25/341 (7%)
+
+Query: 9   LAVFTVFVSSRGIPPEE--QSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLI 65
+           L V  + + S  + PEE   +Q+  ++    K+Y+ + + + R  I++ NL +I   NL 
+Sbjct: 4   LKVLLLPMVSFALSPEEMLDTQWELWKKTHQKQYNSKVDEISRRLIWEKNLKQISAHNLE 63
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+           A       +  +N   D++S+E        +            Y   E+   +P + D+R
+Sbjct: 64  ASLGVHTYELAMNHLGDMTSEEVVQKMTGLRIPPSRSYSNDTLYTP-EWEGRVPDSIDYR 122
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G VTPVKNQGQCGSCW+FS+ G +EGQ      KL++LS QNLVDC  E         
+Sbjct: 123 KKGYVTPVKNQGQCGSCWAFSSAGALEGQLKKKTGKLLALSPQNLVDCVTE--------- 173
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KN 244
+            + GC GG    A+ Y+ +NGGI +E ++PY  +    C +N+    AK   +  IP  N
+Sbjct: 174 -NYGCGGGYMTTAFQYVQQNGGIDSEDAFPYVGQD-ESCMYNATAKAAKCRGYREIPVGN 231
+
+Query: 245 ETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYSAKNTIFR 301
+           E  +   +   GP++++ DA    +QFY  GV +D  C+ ++++H +L+VGY       +
+Sbjct: 232 EKALKRAVARVGPISVSIDASLASFQFYSRGVYYDENCDRDNVNHAVLVVGYGT-----Q 286
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGKNT-CGVSNFVSTS 341
+           K   +WI+KNSWG  WG +GY  L R KN  CG++N  S  
+Sbjct: 287 KGSKHWIIKNSWGESWGNKGYALLARNKNNACGITNMASFP 327
+
+
+>sp|P09668|CATH_HUMAN CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  383 bits (974), Expect = e-106
+ Identities = 119/341 (34%), Positives = 167/341 (48%), Gaps = 28/341 (8%)
+
+Query: 8   VLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAI 67
+           +L V     +   +   E+  F  +  K  K YS EEY  R + F SN  KI   N    
+Sbjct: 14  LLGVPVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHN---- 69
+
+Query: 68  NHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTR 127
+           N     K  +N+F+D+S  E K+ YL ++          ++YL        P + DWR +
+Sbjct: 70  NGNHTFKMALNQFSDMSFAEIKHKYLWSEPQ--NCSATKSNYLRGT--GPYPPSVDWRKK 125
+
+Query: 128 GA-VTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEAC 186
+           G  V+PVKNQG CGSCW+FSTTG +E    I+  K++SL+EQ LVDC  +   Y      
+Sbjct: 126 GNFVSPVKNQGACGSCWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNY------ 179
+
+Query: 187 DEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNE 245
+             GC GGL   A+ YI+ N GI  E +YPY  + G  C F        + +   I   +E
+Sbjct: 180 --GCQGGLPSQAFEYILYNKGIMGEDTYPYQGKDGY-CKFQPGKAIGFVKDVANITIYDE 236
+
+Query: 246 TVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCN---PNSLDHGILIVGYSAKNTIFR 301
+             M   +    P++ A +   ++  Y  G++        P+ ++H +L VGY  KN    
+Sbjct: 237 EAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVNHAVLAVGYGEKN---- 292
+
+Query: 302 KNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+             +PYWIVKNSWG  WG  GY  + RGKN CG++   S  I
+Sbjct: 293 -GIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|O46427|CATH_PIG CATHEPSIN H PRECURSOR
+          Length = 335
+
+ Score =  382 bits (971), Expect = e-106
+ Identities = 117/336 (34%), Positives = 165/336 (48%), Gaps = 28/336 (8%)
+
+Query: 13  TVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKAD 72
+               S+  +   E+  F  +  +  KKYS EEY  R ++F SN  KI   N         
+Sbjct: 19  ACGASNLAVSSFEKLHFKSWMVQHQKKYSLEEYHHRLQVFVSNWRKINAHNA----GNHT 74
+
+Query: 73  TKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGA-VT 131
+            K G+N+F+D+S DE ++ YL ++           +YL        P + DWR +G  V+
+Sbjct: 75  FKLGLNQFSDMSFDEIRHKYLWSEPQ--NCSATKGNYLRGT--GPYPPSMDWRKKGNFVS 130
+
+Query: 132 PVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCN 191
+           PVKNQG CGSCW+FSTTG +E    I+  K++SL+EQ LVDC             + GC 
+Sbjct: 131 PVKNQGSCGSCWTFSTTGALESAVAIATGKMLSLAEQQLVDCAQNFN--------NHGCQ 182
+
+Query: 192 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAG 250
+           GGL   A+ YI  N GI  E +YPY  +    C F      A + +   I  N E  M  
+Sbjct: 183 GGLPSQAFEYIRYNKGIMGEDTYPYKGQDD-HCKFQPDKAIAFVKDVANITMNDEEAMVE 241
+
+Query: 251 YIVSTGPLAIAADA-VEWQFYIGGVFDIPCN---PNSLDHGILIVGYSAKNTIFRKNMPY 306
+            +    P++ A +   ++  Y  G++        P+ ++H +L VGY  +N      +PY
+Sbjct: 242 AVALYNPVSFAFEVTNDFLMYRKGIYSSTSCHKTPDKVNHAVLAVGYGEEN-----GIPY 296
+
+Query: 307 WIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           WIVKNSWG  WG  GY  + RGKN CG++   S  I
+Sbjct: 297 WIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPI 332
+
+
+>sp|P25803|CYSP_PHAVU VIGNAIN PRECURSOR (BEAN ENDOPEPTIDASE) (CYSTEINE PROTEINASE EP-C1)
+          Length = 362
+
+ Score =  380 bits (966), Expect = e-105
+ Identities = 128/354 (36%), Positives = 182/354 (51%), Gaps = 32/354 (9%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIP--PEEQSQF---LEFQDKFNKKYSHEEYLERFEIFKSNLG 57
+           V+L F L +               E+S +     ++       S  E  +RF +FK+NL 
+Sbjct: 9   VVLSFSLVLGVANSFDFHDKDLASEESLWDLYERWRSHHTVSRSLGEKHKRFNVFKANLM 68
+
+Query: 58  KIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNK---EAIFTDDLPVADYLDDEF 114
+            +   N +        K  +NKFAD+++ EF++ Y  +K     +F            E 
+Sbjct: 69  HVHNTNKMDKP----YKLKLNKFADMTNHEFRSTYAGSKVNHPRMFRGTPHENGAFMYEK 124
+
+Query: 115 INSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 174
+           + S+P + DWR +GAVT VK+QGQCGSCW+FST   VEG + I  NKLV+LSEQ LVDCD
+Sbjct: 125 VVSVPPSVDWRKKGAVTDVKDQGQCGSCWAFSTVVAVEGINQIKTNKLVALSEQELVDCD 184
+
+Query: 175 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAK 234
+            E          ++GCNGGL  +A+ +I + GGI TES+YPY A+ GT       ++   
+Sbjct: 185 KE---------ENQGCNGGLMESAFEFIKQKGGITTESNYPYKAQEGTCDASKVNDLAVS 235
+
+Query: 235 ISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVG 292
+           I     +P N+       V+  P+++A DA   ++QFY  GVF   C    L+HG+ IVG
+Sbjct: 236 IDGHENVPANDEDALLKAVANQPVSVAIDAGGSDFQFYSEGVFTGDC-STDLNHGVAIVG 294
+
+Query: 293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           Y            YWIV+NSWG +WGE GYI ++R     +  CG++   S  I
+Sbjct: 295 YGTTV----DGTNYWIVRNSWGPEWGEHGYIRMQRNISKKEGLCGIAMLPSYPI 344
+
+
+>sp|P00786|CATH_RAT CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  377 bits (958), Expect = e-104
+ Identities = 117/324 (36%), Positives = 167/324 (51%), Gaps = 28/324 (8%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           E+  F  +  +  K YS  EY  R ++F +N  KI+  N          K G+N+F+D+S
+Sbjct: 29  EKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRN----HTFKMGLNQFSDMS 84
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAV-TPVKNQGQCGSCW 143
+             E K+ YL ++          ++YL        P++ DWR +G V +PVKNQG CGSCW
+Sbjct: 85  FAEIKHKYLWSEPQ--NCSATKSNYLRGT--GPYPSSMDWRKKGNVVSPVKNQGACGSCW 140
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E    I+  K+++L+EQ LVDC             + GC GGL   A+ YI+
+Sbjct: 141 TFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFN--------NHGCQGGLPSQAFEYIL 192
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAA 262
+            N GI  E SYPY  + G QC FN     A + N   I  N E  M   +    P++ A 
+Sbjct: 193 YNKGIMGEDSYPYIGKNG-QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAF 251
+
+Query: 263 DAV-EWQFYIGGVFDIPCN---PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+           +   ++  Y  GV+        P+ ++H +L VGY  +N +      YWIVKNSWG++WG
+Sbjct: 252 EVTEDFMMYKSGVYSSNSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIVKNSWGSNWG 306
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVSTSI 342
+             GY  + RGKN CG++   S  I
+Sbjct: 307 NNGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|P25251|CYS4_BRANA CYSTEINE PROTEINASE COT44 PRECURSOR
+          Length = 328
+
+ Score =  376 bits (955), Expect = e-104
+ Identities = 115/329 (34%), Positives = 167/329 (49%), Gaps = 32/329 (9%)
+
+Query: 29  FLEFQDKFNKKYSH-----EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADL 83
+           +L +  +  K  S+      +  ERF IFK NL  I+  N    N  A  K G+  FA+L
+Sbjct: 4   YLRWSLEHGKSNSNSNGIINQQDERFNIFKDNLRFIDLHNE--NNKNATYKLGLTIFANL 61
+
+Query: 84  SSDEFKNYYLNNK----EAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQC 139
+           ++DE+++ YL  +      I         Y     ++ +P   DWR +GAV  +K+QG C
+Sbjct: 62  TNDEYRSLYLGARTEPVRRITKAKNVNMKYSAAVNVDEVPVTVDWRQKGAVNAIKDQGTC 121
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 199
+           GSCW+FST   VEG + I   +LVSLSEQ LVDCD          + ++GCNGGL   A+
+Sbjct: 122 GSCWAFSTAAAVEGINKIVTGELVSLSEQELVDCDK---------SYNQGCNGGLMDYAF 172
+
+Query: 200 NYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLA 259
+            +I+KNGG+ TE  YPY    G   +    +    I  +  +P  +       VS  P++
+Sbjct: 173 QFIMKNGGLNTEKDYPYHGTNGKCNSLLKNSRVVTIDGYEDVPSKDETALKRAVSYQPVS 232
+
+Query: 260 IAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+           +A DA    +Q Y  G+F   C   ++DH ++ VGY ++N +      YWIV+NSWG  W
+Sbjct: 233 VAIDAGGRAFQHYQSGIFTGKCG-TNMDHAVVAVGYGSENGV-----DYWIVRNSWGTRW 286
+
+Query: 318 GEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           GE GYI + R        CG++   S  +
+Sbjct: 287 GEDGYIRMERNVASKSGKCGIAIEASYPV 315
+
+
+>sp|P05167|ALEU_HORVU THIOL PROTEASE ALEURAIN PRECURSOR
+          Length = 362
+
+ Score =  375 bits (954), Expect = e-104
+ Identities = 119/374 (31%), Positives = 179/374 (47%), Gaps = 54/374 (14%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQ---------------------------SQFLEFQDK 35
+           ++ L VLA   V V+S     +                              +F  F  +
+Sbjct: 8   LLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVR 67
+
+Query: 36  FNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLN 94
+           + K Y S  E   RF IF  +L ++   N   + ++     G+N+F+D+S +EF+   L 
+Sbjct: 68  YGKSYESAAEVRRRFRIFSESLEEVRSTNRKGLPYR----LGINRFSDMSWEEFQATRLG 123
+
+Query: 95  NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQ 154
+              A  T    +A         ++P   DWR  G V+PVKNQ  CGSCW+FSTTG +E  
+Sbjct: 124 ---AAQTCSATLAGNHLMRDAAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAA 180
+
+Query: 155 HFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSY 214
+           +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  NGGI TE SY
+Sbjct: 181 YTQATGKNISLSEQQLVDCAGGFNNF--------GCNGGLPSQAFEYIKYNGGIDTEESY 232
+
+Query: 215 PYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADAVE-WQFYIG 272
+           PY    G  C++ + N   ++ +   I  N E  +   +    P+++A   ++ ++ Y  
+Sbjct: 233 PYKGVNGV-CHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAFQVIDGFRQYKS 291
+
+Query: 273 GVFD-IPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK 329
+           GV+    C   P+ ++H +L VGY  +N +     PYW++KNSWGADWG+ GY  +  GK
+Sbjct: 292 GVYTSDHCGTTPDDVNHAVLAVGYGVENGV-----PYWLIKNSWGADWGDNGYFKMEMGK 346
+
+Query: 330 NTCGVSNFVSTSII 343
+           N C ++   S  ++
+Sbjct: 347 NMCAIATCASYPVV 360
+
+
+>sp|Q10717|CYS2_MAIZE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 360
+
+ Score =  374 bits (951), Expect = e-103
+ Identities = 114/322 (35%), Positives = 168/322 (51%), Gaps = 26/322 (8%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  ++ K Y S  E  +RF IF  +L  +   N   +      + G+N+FAD+S +
+Sbjct: 58  RFARFAVRYGKSYESAAEVHKRFRIFSESLQLVRSTNRKGL----SYRLGINRFADMSWE 113
+
+Query: 87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           EF+   L   +          ++       ++P   DWR  G V+PVKNQG CGSCW+FS
+Sbjct: 114 EFRATRLGAAQNCSAT--LTGNHRMRAAAVALPETKDWREDGIVSPVKNQGHCGSCWTFS 171
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           TTG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  NG
+Sbjct: 172 TTGALEAAYTQATGKPISLSEQQLVDCGFAFNNF--------GCNGGLPSQAFEYIKYNG 223
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADAV 265
+           G+ TE SYPY    G  C F + N+G K+ +   I    E  +   +    P+++A + +
+Sbjct: 224 GLDTEESYPYQGVNGI-CKFKNENVGVKVLDSVNITLGAEDELKDAVGLVRPVSVAFEVI 282
+
+Query: 266 E-WQFYIGGVFD-IPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQG 321
+             ++ Y  GV+    C   P  ++H +L VGY  ++ +     PYW++KNSWGADWG++G
+Sbjct: 283 TGFRLYKSGVYTSDHCGTTPMDVNHAVLAVGYGVEDGV-----PYWLIKNSWGADWGDEG 337
+
+Query: 322 YIYLRRGKNTCGVSNFVSTSII 343
+           Y  +  GKN CGV+   S  I+
+Sbjct: 338 YFKMEMGKNMCGVATCASYPIV 359
+
+
+>sp|P25777|ORYB_ORYSA ORYZAIN BETA CHAIN PRECURSOR
+          Length = 471
+
+ Score =  374 bits (951), Expect = e-103
+ Identities = 112/304 (36%), Positives = 159/304 (51%), Gaps = 23/304 (7%)
+
+Query: 44  EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDD 103
+           E+  RF +F  NL  ++  N  A       + G+N+FADL+++EF+  +L  K A     
+Sbjct: 69  EHERRFLVFWDNLKFVDAHNARADEGGG-FRLGMNRFADLTNEEFRATFLGAKVAER--S 125
+
+Query: 104 LPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV 163
+               +    + +  +P + DWR +GAV PVKNQGQCGSCW+FS    VE  + +   +++
+Sbjct: 126 RAAGERYRHDGVEELPESVDWREKGAVAPVKNQGQCGSCWAFSAVSTVESINQLVTGEMI 185
+
+Query: 164 SLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ 223
+           +LSEQ LV+C             + GCNGGL  +A+++IIKNGGI TE  YPY A  G  
+Sbjct: 186 TLSEQELVECST--------NGQNSGCNGGLMADAFDFIIKNGGIDTEDDYPYKAVDGKC 237
+
+Query: 224 CNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNP 281
+                      I  F  +P+N+       V+  P+++A +A   E+Q Y  GVF   C  
+Sbjct: 238 DINRENAKVVSIDGFEDVPQNDEKSLQKAVAHQPVSVAIEAGGREFQLYHSGVFSGRCG- 296
+
+Query: 282 NSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKN----TCGVSNF 337
+            SLDHG++ VGY   N        YWIV+NSWG  WGE GY+ + R  N     CG++  
+Sbjct: 297 TSLDHGVVAVGYGTDN-----GKDYWIVRNSWGPKWGESGYVRMERNINVTTGKCGIAMM 351
+
+Query: 338 VSTS 341
+            S  
+Sbjct: 352 ASYP 355
+
+
+>sp|Q40143|CYS3_LYCES CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 356
+
+ Score =  373 bits (948), Expect = e-103
+ Identities = 123/321 (38%), Positives = 169/321 (52%), Gaps = 28/321 (8%)
+
+Query: 29  FLEFQDKFNKKYSHEEYL-ERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  F  +  K+Y   E + +RFEIF  NL  I   N   +      K G+N+F DL+ DE
+Sbjct: 57  FARFAIRHRKRYDSVEEIKQRFEIFLDNLKMIRSHNRKGL----SYKLGINEFTDLTWDE 112
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           F+ + L    A           L    +  +P   DWR  G V+PVK QG+CGSCW+FST
+Sbjct: 113 FRKHKLG---ASQNCSATTKGNLKLTNVV-LPETKDWRKDGIVSPVKAQGKCGSCWTFST 168
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+           TG +E  +  +  K +SLSEQ LVDC      +        GCNGGL   A+ YI  NGG
+Sbjct: 169 TGALEAAYAQAFGKGISLSEQQLVDCAGAFNNF--------GCNGGLPSQAFEYIKFNGG 220
+
+Query: 208 IQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAADAVE 266
+           + TE +YPYT + G  C F+ ANIG K+ +   I    E  +   +    P+++A + V+
+Sbjct: 221 LDTEEAYPYTGKNGI-CKFSQANIGVKVISSVNITLGAEYELKYAVALVRPVSVAFEVVK 279
+
+Query: 267 -WQFYIGGVF-DIPCN--PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGY 322
+            ++ Y  GV+    C   P  ++H +L VGY  +N       PYW++KNSWGADWGE GY
+Sbjct: 280 GFKQYKSGVYASTECGDTPMDVNHAVLAVGYGVEN-----GTPYWLIKNSWGADWGEDGY 334
+
+Query: 323 IYLRRGKNTCGVSNFVSTSII 343
+             +  GKN CGV+   S  I+
+Sbjct: 335 FKMEMGKNMCGVATCASYPIV 355
+
+
+>sp|P43156|CYSP_HEMSP THIOL PROTEASE SEN102 PRECURSOR
+          Length = 360
+
+ Score =  373 bits (947), Expect = e-103
+ Identities = 127/352 (36%), Positives = 181/352 (51%), Gaps = 30/352 (8%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQ--SQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           V L F+    ++  + + +  E+   + + +++         +E   RF +FK N+  I 
+Sbjct: 12  VALSFLSIAQSIPFTEKDLASEDSLWNLYEKWRTHHTVARDLDEKNRRFNVFKENVKFIH 71
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADY---LDDEFINS 117
+           E N       A  K  +NKF D+++ EF++ Y  +K         +         E + S
+Sbjct: 72  EFNQK---KDAPYKLALNKFGDMTNQEFRSKYAGSKIQHHRSQRGIQKNTGSFMYENVGS 128
+
+Query: 118 IPT-AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           +P  + DWR +GAVT VK+QGQCGSCW+FST  +VEG + I   +LVSLSEQ LVDCD  
+Sbjct: 129 LPAASIDWRAKGAVTGVKDQGQCGSCWAFSTIASVEGINQIKTGELVSLSEQELVDCD-- 186
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                   + +EGCNGGL   A+ +I KN GI TE SYPY  + GT  +    +    I 
+Sbjct: 187 -------TSYNEGCNGGLMDYAFEFIQKN-GITTEDSYPYAEQDGTCASNLLNSPVVSID 238
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYS 294
+               +P N        V+  P++++ +A    +QFY  GVF   C    LDHG+ IVGY 
+Sbjct: 239 GHQDVPANNENALMQAVANQPISVSIEASGYGFQFYSEGVFTGRCG-TELDHGVAIVGYG 297
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVSTSI 342
+           A     R    YWIVKNSWG +WGE GYI ++RG    +  CG++   S  I
+Sbjct: 298 AT----RDGTKYWIVKNSWGEEWGESGYIRMQRGISDKRGKCGIAMEASYPI 345
+
+
+>sp|P49935|CATH_MOUSE CATHEPSIN H PRECURSOR (CATHEPSIN B3) (CATHEPSIN BA)
+          Length = 333
+
+ Score =  372 bits (944), Expect = e-103
+ Identities = 114/324 (35%), Positives = 164/324 (50%), Gaps = 28/324 (8%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+           E+  F  +  +  K YS  EY  R ++F +N  KI+  N          K  +N+F+D+S
+Sbjct: 29  EKFHFKSWMKQHQKTYSSVEYNHRLQMFANNWRKIQAHNQRN----HTFKMALNQFSDMS 84
+
+Query: 85  SDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAV-TPVKNQGQCGSCW 143
+             E K+ +L ++          ++YL        P++ DWR +G V +PVKNQG C SCW
+Sbjct: 85  FAEIKHKFLWSEPQ--NCSATKSNYLRGT--GPYPSSMDWRKKGNVVSPVKNQGACASCW 140
+
+Query: 144 SFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYII 203
+           +FSTTG +E    I+  K++SL+EQ LVDC             + GC GGL   A+ YI+
+Sbjct: 141 TFSTTGALESAVAIASGKMLSLAEQQLVDCAQAFN--------NHGCKGGLPSQAFEYIL 192
+
+Query: 204 KNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKN-ETVMAGYIVSTGPLAIAA 262
+            N GI  E SYPY  +  + C FN     A + N   I  N E  M   +    P++ A 
+Sbjct: 193 YNKGIMEEDSYPYIGKD-SSCRFNPQKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAF 251
+
+Query: 263 DAV-EWQFYIGGVFDIPCN---PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+           +   ++  Y  GV+        P+ ++H +L VGY  +N +      YWIVKNSWG+ WG
+Sbjct: 252 EVTEDFLMYKSGVYSSKSCHKTPDKVNHAVLAVGYGEQNGLL-----YWIVKNSWGSQWG 306
+
+Query: 319 EQGYIYLRRGKNTCGVSNFVSTSI 342
+           E GY  + RGKN CG++   S  I
+Sbjct: 307 ENGYFLIERGKNMCGLAACASYPI 330
+
+
+>sp|P25778|ORYC_ORYSA ORYZAIN GAMMA CHAIN PRECURSOR
+          Length = 362
+
+ Score =  370 bits (940), Expect = e-102
+ Identities = 110/322 (34%), Positives = 164/322 (50%), Gaps = 27/322 (8%)
+
+Query: 28  QFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F  +  K+Y    E   RF IF  +L  +   N   + ++     G+N+FAD+S +
+Sbjct: 61  RFARFAVRHGKRYGDAAEVQRRFRIFSESLELVRSTNRRGLPYR----LGINRFADMSWE 116
+
+Query: 87  EFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           EF+   L    A       +A         ++P   DWR  G V+PVK+QG CGSCW FS
+Sbjct: 117 EFQASRLG---AAQNCSATLAGNHRMRDAPALPETKDWREDGIVSPVKDQGHCGSCWPFS 173
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           TTG++E ++  +    VSLSEQ L DC      +        GC+GGL   A+ YI  NG
+Sbjct: 174 TTGSLEARYTQATGPPVSLSEQQLADCATRYNNF--------GCSGGLPSQAFEYIKYNG 225
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNETVMAGYIVSTGPLAIAADAV 265
+           G+ TE +YPYT   G  C++   N G K+ +   I    E  +   +    P+++A   +
+Sbjct: 226 GLDTEEAYPYTGVNGI-CHYKPENAGVKVLDSVNITLVAEDELKNAVGLVRPVSVAFQVI 284
+
+Query: 266 E-WQFYIGGVF---DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQG 321
+             ++ Y  GV+       +P  ++H +L VGY  +N +     PYW++KNSWGADWG+ G
+Sbjct: 285 NGFRMYKSGVYTSDHCGTSPMDVNHAVLAVGYGVENGV-----PYWLIKNSWGADWGDNG 339
+
+Query: 322 YIYLRRGKNTCGVSNFVSTSII 343
+           Y  +  GKN CG++   S  I+
+Sbjct: 340 YFTMEMGKNMCGIATCASYPIV 361
+
+
+>sp|P00785|ACTN_ACTCH ACTINIDAIN PRECURSOR (ACTINIDIN)
+          Length = 380
+
+ Score =  368 bits (936), Expect = e-102
+ Identities = 116/352 (32%), Positives = 180/352 (50%), Gaps = 29/352 (8%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPP----EEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSN 55
+           M ++    L + ++  +++ +      E ++ +  +  K+ K Y+   E+  RFEIFK  
+Sbjct: 10  MSLLFFSTLLILSLAFNAKNLTQRTNDEVKAMYESWLIKYGKSYNSLGEWERRFEIFKET 69
+
+Query: 56  LGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI 115
+           L  I+E N    +     K G+N+FADL+ +EF++ YL       ++   V++  +  F 
+Sbjct: 70  LRFIDEHNA---DTNRSYKVGLNQFADLTDEEFRSTYLGFTSG--SNKTKVSNRYEPRFG 124
+
+Query: 116 NSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 175
+             +P+  DWR+ GAV  +K+QG+CG CW+FS    VEG + I    L+SLSEQ L+DC  
+Sbjct: 125 QVLPSYVDWRSAGAVVDIKSQGECGGCWAFSAIATVEGINKIVTGVLISLSEQELIDC-- 182
+
+Query: 176 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI 235
+                 G      GCNGG   + + +II NGGI TE +YPYTA+ G             I
+Sbjct: 183 ------GRTQNTRGCNGGYITDGFQFIINNGGINTEENYPYTAQDGECNLDLQNEKYVTI 236
+
+Query: 236 SNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+             +  +P N        V+  P+++A DA    ++ Y  G+F  PC   ++DH + IVGY
+Sbjct: 237 DTYENVPYNNEWALQTAVTYQPVSVALDAAGDAFKHYSSGIFTGPCG-TAIDHAVTIVGY 295
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK---NTCGVSNFVSTSI 342
+             +       + YWIVKNSW   WGE+GY+ + R      TCG++   S  +
+Sbjct: 296 GTEG-----GIDYWIVKNSWDTTWGEEGYMRILRNVGGAGTCGIATMPSYPV 342
+
+
+>sp|P41721|CATV_NPVBM VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  367 bits (932), Expect = e-101
+ Identities = 131/342 (38%), Positives = 180/342 (52%), Gaps = 26/342 (7%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+           +LF L V+ V  S+   P +  + F EF  +FNK YS E E L RF+IF+ NL +I    
+Sbjct: 4   ILFYLFVYAVVKSAAYDPLKAPNYFEEFVHRFNKNYSSEVEKLRRFKIFQHNLNEI---- 59
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+            I  N     K+ +NKF+DLS DE    Y        T +      LD       P  FD
+Sbjct: 60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPTQTQNFCKVILLDQPPGKG-PLEFD 117
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR    VT VKNQG CG+CW+F+T G++E Q  I  N+L++LSEQ ++DCD         
+Sbjct: 118 WRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNELINLSEQQMIDCD--------- 168
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN-FTMIP 242
+              D GCNGGL   A+  IIK GG+Q ES YPY A     C  NS     ++ + +  I 
+Sbjct: 169 -FVDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEA-DNNNCRMNSNKFLVQVKDCYRYII 226
+
+Query: 243 KNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+             E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  +N     
+Sbjct: 227 VYEEKLKDLLPLVGPIPMAIDAADIVNYKQGIIKY-CFDSGLNHAVLLVGYGVEN----- 280
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 343
+           N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct: 281 NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|Q02765|CATS_RAT CATHEPSIN S PRECURSOR
+          Length = 330
+
+ Score =  365 bits (926), Expect = e-100
+ Identities = 104/330 (31%), Positives = 161/330 (48%), Gaps = 22/330 (6%)
+
+Query: 18  SRGIPPEEQSQFLEFQD-KFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFG 76
+           +    P     +  ++  +  +     E   R  I++ NL  I   NL           G
+Sbjct: 15  ATAERPTLDHHWDLWKKTRMRRNTDQNEEDVRRLIWEKNLKFIMLHNLEHSMGMHSYSVG 74
+
+Query: 77  VNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQ 136
+           +N   D++ +E   Y  + +     +         ++    +P + DWR +G VT VK Q
+Sbjct: 75  MNHMGDMTPEEVIGYMGSLRIPRPWNRSGTLKSSSNQT---LPDSVDWREKGCVTNVKYQ 131
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQP 196
+           G CGSCW+FS  G +EGQ  +   KLVSLS QNLVDC  E      E+  ++GC GG   
+Sbjct: 132 GSCGSCWAFSAEGALEGQLKLKTGKLVSLSAQNLVDCSTE------EKYGNKGCGGGFMT 185
+
+Query: 197 NAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIP-KNETVMAGYIVST 255
+            A+ YII +  I +E+SYPY A    +C ++  N  A  S +  +P  +E  +   + + 
+Sbjct: 186 EAFQYII-DTSIDSEASYPYKAMD-EKCLYDPKNRAATCSRYIELPFGDEEALKEAVATK 243
+
+Query: 256 GPLAIAADA---VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNS 312
+           GP+++  D      +  Y  GV+D P    +++HG+L+VGY            YW+VKNS
+Sbjct: 244 GPVSVGIDDASHSSFFLYQSGVYDDPSCTENMNHGVLVVGYGT-----LDGKDYWLVKNS 298
+
+Query: 313 WGADWGEQGYIYLRR-GKNTCGVSNFVSTS 341
+           WG  +G+QGYI + R  KN CG++++ S  
+Sbjct: 299 WGLHFGDQGYIRMARNNKNHCGIASYCSYP 328
+
+
+>sp|P14658|CYSP_TRYBB CYSTEINE PROTEINASE PRECURSOR
+          Length = 450
+
+ Score =  365 bits (926), Expect = e-100
+ Identities = 136/346 (39%), Positives = 190/346 (54%), Gaps = 26/346 (7%)
+
+Query: 3   VILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEE 61
+           V+L     + +V + S  +    + +F  F+ K+ K Y   +E   RF  F+ N+ + + 
+Sbjct: 15  VLLAMAACLASVALGSLHVEESLEMRFAAFKKKYGKVYKDAKEEAFRFRAFEENMEQAK- 73
+
+Query: 62  LNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTA 121
+              I         FGV  F+D++ +EF+  Y N           +   ++       P A
+Sbjct: 74  ---IQAAANPYATFGVTPFSDMTREEFRARYRNGASYFAAAQKRLRKTVNVT-TGRAPAA 129
+
+Query: 122 FDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYE 181
+            DWR +GAVTPVK QGQCGSCW+FST GN+EGQ  ++ N LVSLSEQ LV CD       
+Sbjct: 130 VDWREKGAVTPVKVQGQCGSCWAFSTIGNIEGQWQVAGNPLVSLSEQMLVSCDT------ 183
+
+Query: 182 GEEACDEGCNGGLQPNAYNYIIK-NGG-IQTESSYPYTAETGT--QCNFNSANIGAKISN 237
+                D GCNGGL  NA+N+I+  NGG + TE+SYPY +  G   QC  N   IGA I++
+Sbjct: 184 ----IDSGCNGGLMDNAFNWIVNSNGGNVFTEASYPYVSGNGEQPQCQMNGHEIGAAITD 239
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN 297
+              +P++E  +A Y+   GPLAIA DA  +  Y GG+    C    LDHG+L+VGY+   
+Sbjct: 240 HVDLPQDEDAIAAYLAENGPLAIAVDAESFMDYNGGIL-TSCTSKQLDHGVLLVGYN--- 295
+
+Query: 298 TIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+                N PYWI+KNSW   WGE GYI + +G N C ++  VS++++
+Sbjct: 296 --DNSNPPYWIIKNSWSNMWGEDGYIRIEKGTNQCLMNQAVSSAVV 339
+
+
+>sp|P25783|CATV_NPVAC VIRAL CATHEPSIN (V-CATH)
+          Length = 323
+
+ Score =  363 bits (923), Expect = e-100
+ Identities = 129/342 (37%), Positives = 178/342 (51%), Gaps = 26/342 (7%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELN 63
+           +LF L V+ V  S+     +  + F EF  +FNK Y  E E L RF+IF+ NL +I    
+Sbjct: 4   ILFYLFVYGVVNSAAYDLLKAPNYFEEFVHRFNKDYGSEVEKLRRFKIFQHNLNEI---- 59
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+            I  N     K+ +NKF+DLS DE    Y      I T +      LD       P  FD
+Sbjct: 60  -INKNQNDSAKYEINKFSDLSKDETIAKYTGLSLPIQTQNFCKVIVLDQPPGKG-PLEFD 117
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR    VT VKNQG CG+CW+F+T  ++E Q  I  N+L++LSEQ ++DCD         
+Sbjct: 118 WRRLNKVTSVKNQGMCGACWAFATLASLESQFAIKHNQLINLSEQQMIDCD--------- 168
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN-FTMIP 242
+              D GCNGGL   A+  IIK GG+Q ES YPY A     C  NS     ++ + +  I 
+Sbjct: 169 -FVDAGCNGGLLHTAFEAIIKMGGVQLESDYPYEA-DNNNCRMNSNKFLVQVKDCYRYIT 226
+
+Query: 243 KNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+             E  +   +   GP+ +A DA +   Y  G+    C  + L+H +L+VGY  +N     
+Sbjct: 227 VYEEKLKDLLRLVGPIPMAIDAADIVNYKQGIIKY-CFNSGLNHAVLLVGYGVEN----- 280
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN-FVSTSII 343
+           N+PYW  KN+WG DWGE G+  +++  N CG+ N   ST++I
+Sbjct: 281 NIPYWTFKNTWGTDWGEDGFFRVQQNINACGMRNELASTAVI 322
+
+
+>sp|P41715|CATV_NPVCF VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  363 bits (922), Expect = e-100
+ Identities = 128/343 (37%), Positives = 187/343 (54%), Gaps = 25/343 (7%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           M  I+L++L    V  ++  +  +  + F +F  KFNK YS E E L RF+IF+ NL +I
+Sbjct: 1   MNKIVLYLLVYGAVQCAAYDV-LKAPNYFEDFLHKFNKSYSSESEKLRRFQIFRHNLEEI 59
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+              N    ++ +  ++ +NKFADLS DE  + Y      + T +      LD       P
+Sbjct: 60  INKN----HNDSTAQYEINKFADLSKDETISKYTGLSLPLQTQNFCEVVVLDRPPDKG-P 114
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+             FDWR    VT VKNQG CG+CW+F+T G++E Q  I  N+ ++LSEQ L+DCD     
+Sbjct: 115 LEFDWRRLNKVTSVKNQGMCGACWAFATLGSLESQFAIKHNQFINLSEQQLIDCD----- 169
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN-F 238
+                  D GC+GGL   A+  ++  GGIQ ES YPY A  G  C  N+A    K+   +
+Sbjct: 170 -----FVDAGCDGGLLHTAFEAVMNMGGIQAESDYPYEANNG-DCRANAAKFVVKVKKCY 223
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNT 298
+             I   E  +   + S GP+ +A DA +   Y  G+    C  + L+H +L+VGY+ +N 
+Sbjct: 224 RYITVFEEKLKDLLRSVGPIPVAIDASDIVNYKRGIMKY-CANHGLNHAVLLVGYAVENG 282
+
+Query: 299 IFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+           +     P+WI+KN+WGADWGEQGY  +++  N CG+ N + +S
+Sbjct: 283 V-----PFWILKNTWGADWGEQGYFRVQQNINACGIQNELPSS 320
+
+
+>sp|P25250|CYS2_HORVU CYSTEINE PROTEINASE EP-B 2 PRECURSOR
+          Length = 373
+
+ Score =  361 bits (918), Expect = e-100
+ Identities = 121/358 (33%), Positives = 170/358 (46%), Gaps = 38/358 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQ---------FLEFQDKFNKKYSHEEYLERFEIFKSN 55
+           +  VLAV  V + S  IP E++           +  +Q     +  H E   RF  FKSN
+Sbjct: 14  VAAVLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSN 72
+
+Query: 56  LGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI 115
+              I   N          +  +N+F D+   EF+  ++ +         P         +
+Sbjct: 73  AHFIHSHNKR---GDHPYRLHLNRFGDMDQAEFRATFVGDLRRDTPSKPPSVPGFMYAAL 129
+
+Query: 116 N--SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDC 173
+           N   +P + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLSEQ L+DC
+Sbjct: 130 NVSDLPPSVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLSEQELIDC 189
+
+Query: 174 DHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ---CNFNSAN 230
+           D          A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT        ++ 
+Sbjct: 190 D---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGTCNVARAAQNSP 240
+
+Query: 231 IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIPCNPNSLDHGI 288
+           +   I     +P N        V+  P+++A +A    + FY  GVF   C    LDHG+
+Sbjct: 241 VVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGECG-TELDHGV 299
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK----NTCGVSNFVSTSI 342
+            +VGY     +      YW VKNSWG  WGEQGYI + +        CG++   S  +
+Sbjct: 300 AVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASGGLCGIAMEASYPV 353
+
+
+>sp|P25249|CYS1_HORVU CYSTEINE PROTEINASE EP-B 1 PRECURSOR
+          Length = 371
+
+ Score =  361 bits (917), Expect = 1e-99
+ Identities = 121/358 (33%), Positives = 170/358 (46%), Gaps = 38/358 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGIPPEEQSQ---------FLEFQDKFNKKYSHEEYLERFEIFKSN 55
+           +  VLAV  V + S  IP E++           +  +Q     +  H E   RF  FKSN
+Sbjct: 14  VAAVLAVAAVELCS-AIPMEDKDLESEEALWDLYERWQSAHRVRRHHAEKHRRFGTFKSN 72
+
+Query: 56  LGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI 115
+              I   N          +  +N+F D+   EF+  ++ +         P         +
+Sbjct: 73  AHFIHSHNKR---GDHPYRLHLNRFGDMDQAEFRATFVGDLRRDTPAKPPSVPGFMYAAL 129
+
+Query: 116 N--SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDC 173
+           N   +P + DWR +GAVT VK+QG+CGSCW+FST  +VEG + I    LVSLSEQ L+DC
+Sbjct: 130 NVSDLPPSVDWRQKGAVTGVKDQGKCGSCWAFSTVVSVEGINAIRTGSLVSLSEQELIDC 189
+
+Query: 174 DHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ---CNFNSAN 230
+           D          A ++GC GGL  NA+ YI  NGG+ TE++YPY A  GT        ++ 
+Sbjct: 190 D---------TADNDGCQGGLMDNAFEYIKNNGGLITEAAYPYRAARGTCNVARAAQNSP 240
+
+Query: 231 IGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIPCNPNSLDHGI 288
+           +   I     +P N        V+  P+++A +A    + FY  GVF   C    LDHG+
+Sbjct: 241 VVVHIDGHQDVPANSEEDLARAVANQPVSVAVEASGKAFMFYSEGVFTGDCG-TELDHGV 299
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK----NTCGVSNFVSTSI 342
+            +VGY     +      YW VKNSWG  WGEQGYI + +        CG++   S  +
+Sbjct: 300 AVVGYG----VAEDGKAYWTVKNSWGPSWGEQGYIRVEKDSGASGGLCGIAMEASYPV 353
+
+
+>sp|Q26534|CATL_SCHMA CATHEPSIN L PRECURSOR (SMCL1)
+          Length = 319
+
+ Score =  361 bits (916), Expect = 2e-99
+ Identities = 128/326 (39%), Positives = 190/326 (58%), Gaps = 22/326 (6%)
+
+Query: 21  IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+           +P     ++++F+ K+ K+Y   E   RF IFKSN+ K +   L  +  +    +GV  +
+Sbjct: 12  LPGNVDEKYVQFKLKYRKQYHETEDEIRFNIFKSNILKAQ---LYQVFVRGSAIYGVTPY 68
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG 140
+           +DL++DEF   +L     + +        L  E +N+IP  FDWR +GAVT VKNQG CG
+Sbjct: 69  SDLTTDEFARTHLTASWVVPSSRSNTPTSLGKE-VNNIPKNFDWREKGAVTEVKNQGMCG 127
+
+Query: 141 SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 200
+           SCW+FSTTGNVE Q F    KL+SLSEQ LVDCD            D+GCNGGL  NAY 
+Sbjct: 128 SCWAFSTTGNVESQWFRKTGKLLSLSEQQLVDCDG----------LDDGCNGGLPSNAYE 177
+
+Query: 201 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAI 260
+            IIK GG+  E +YPY A+   +C+  +  +   I++   + ++ET +A ++     +++
+Sbjct: 178 SIIKMGGLMLEDNYPYDAKN-EKCHLKTDGVAVYINSSVNLTQDETELAAWLYHNSTISV 236
+
+Query: 261 AADAVEWQFYIGGV---FDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+             +A+  QFY  G+   + I C+   LDH +L+VGY     +  KN P+WIVKNSWG +W
+Sbjct: 237 GMNALLLQFYQHGISHPWWIFCSKYLLDHAVLLVGYG----VSEKNEPFWIVKNSWGVEW 292
+
+Query: 318 GEQGYIYLRRGKNTCGVSNFVSTSII 343
+           GE GY  + RG  +CG++   ++++I
+Sbjct: 293 GENGYFRMYRGDGSCGINTVATSAMI 318
+
+
+>sp|P25779|CYSP_TRYCR CRUZIPAIN PRECURSOR (MAJOR CYSTEINE PROTEINASE) (CRUZAINE)
+          Length = 467
+
+ Score =  356 bits (904), Expect = 4e-98
+ Identities = 133/350 (38%), Positives = 184/350 (52%), Gaps = 30/350 (8%)
+
+Query: 3   VILLFVLAVFTVFV----SSRGIPPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLG 57
+           ++L  VL V    V    +S        SQF EF+ K  + Y S  E   R  +F+ NL 
+Sbjct: 8   LLLAAVLVVMACLVPAATASLHAEETLTSQFAEFKQKHGRVYESAAEEAFRLSVFRENLF 67
+
+Query: 58  KIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINS 117
+                     +      FGV  F+DL+ +EF++ Y +N  A F      A       +  
+Sbjct: 68  LARLHAAANPH----ATFGVTPFSDLTREEFRSRY-HNGAAHFAAAQERARVPVKVEVVG 122
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+            P A DWR RGAVT VK+QGQCGSCW+FS  GNVE Q F++ + L +LSEQ LV CD   
+Sbjct: 123 APAAVDWRARGAVTAVKDQGQCGSCWAFSAIGNVECQWFLAGHPLTNLSEQMLVSCDK-- 180
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETG--TQCNFNSANIGA 233
+                    D GC+GGL  NA+ +I++  NG + TE SYPY +  G    C  +   +GA
+Sbjct: 181 --------TDSGCSGGLMNNAFEWIVQENNGAVYTEDSYPYASGEGISPPCTTSGHTVGA 232
+
+Query: 234 KISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+            I+    +P++E  +A ++   GP+A+A DA  W  Y GGV    C    LDHG+L+VGY
+Sbjct: 233 TITGHVELPQDEAQIAAWLAVNGPVAVAVDASSWMTYTGGVM-TSCVSEQLDHGVLLVGY 291
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           +    +     PYWI+KNSW   WGE+GYI + +G N C V    S++++
+Sbjct: 292 NDSAAV-----PYWIIKNSWTTQWGEEGYIRIAKGSNQCLVKEEASSAVV 336
+
+
+>sp|O10364|CATV_NPVOP VIRAL CATHEPSIN (V-CATH)
+          Length = 324
+
+ Score =  351 bits (890), Expect = 2e-96
+ Identities = 122/343 (35%), Positives = 181/343 (52%), Gaps = 25/343 (7%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           M  I+L +L    V  ++  +  +  + F +F  KFNK YS E E L RF+IF+ NL +I
+Sbjct: 1   MNKIMLCLLVCGVVHAATYDL-LKAPNYFEDFLHKFNKNYSSESEKLHRFKIFQHNLEEI 59
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+              N     + +  ++ +NKF+DLS +E  + Y        T +      LD    +  P
+Sbjct: 60  INKNQ----NDSTAQYEINKFSDLSKEEAISKYTGLSLPHQTQNFCEVVILDRPP-DRGP 114
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+             FDWR    VT VKNQG CG+CW+F+T G++E Q  I  N+L++LSEQ  +DCD     
+Sbjct: 115 LEFDWRQFNKVTSVKNQGVCGACWAFATLGSLESQFAIKYNRLINLSEQQFIDCDR---- 170
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI-SNF 238
+                  + GC+GGL   A+   ++ GG+Q ES YPY    G QC  N       + S  
+Sbjct: 171 ------VNAGCDGGLLHTAFESAMEMGGVQMESDYPYETANG-QCRINPNRFVVGVRSCR 223
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNT 298
+             I   E  +   + + GP+ +A DA +   Y  G+    C  + L+H +L+VGY+ +N 
+Sbjct: 224 RYIVMFEEKLKDLLRAVGPIPVAIDASDIVNYRRGIMR-QCANHGLNHAVLLVGYAVEN- 281
+
+Query: 299 IFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+               N+PYWI+KN+WG DWGE GY  +++  N CG+ N + +S
+Sbjct: 282 ----NIPYWILKNTWGTDWGEDGYFRVQQNINACGIRNELVSS 320
+
+
+>sp|P36400|LCPB_LEIME CYSTEINE PROTEINASE B PRECURSOR
+          Length = 443
+
+ Score =  348 bits (883), Expect = 1e-95
+ Identities = 123/348 (35%), Positives = 184/348 (52%), Gaps = 28/348 (8%)
+
+Query: 4   ILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEEL 62
+           ++  VLA       +  +     + F EF+  + + Y    E  +R   F+ NL  + E 
+Sbjct: 13  VVCVVLAAACAPARAIHVGTPAAALFEEFKRTYGRAYETLAEEQQRLANFERNLELMREH 72
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDE--FINSIPT 120
+                +     +FG+ KF DLS  EF   YLN            A +       ++++P 
+Sbjct: 73  QARNPH----AQFGITKFFDLSEAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPD 128
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           A DWR +GAVTPVK+QG CGSCW+FS  GN+EGQ +++ ++LVSLSEQ LV CD      
+Sbjct: 129 AVDWREKGAVTPVKDQGACGSCWAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----- 183
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETG---TQCNFNSANIGAKI 235
+                 ++GC+GGL   A++++++  NG + TE SYPY +  G      N +   +GA+I
+Sbjct: 184 -----MNDGCDGGLMLQAFDWLLQNTNGHLHTEDSYPYVSGNGYVPECSNSSELVVGAQI 238
+
+Query: 236 SNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 295
+               +I  +E  MA ++   GP+AIA DA  +  Y  GV    C    L+HG+L+VGY  
+Sbjct: 239 DGHVLIGSSEKAMAAWLAKNGPIAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYDM 297
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+              +     PYW++KNSWG DWGEQGY+ +  G N C +S +  ++ +
+Sbjct: 298 TGEV-----PYWVIKNSWGGDWGEQGYVRVVMGVNACLLSEYPVSAHV 340
+
+
+>sp|P25775|LCPA_LEIME CYSTEINE PROTEINASE A PRECURSOR
+          Length = 354
+
+ Score =  348 bits (883), Expect = 1e-95
+ Identities = 143/355 (40%), Positives = 190/355 (53%), Gaps = 36/355 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+           LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct: 7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query: 53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD 112
+           K N+     LN    +   D      KFADL+  EF   YLN             D   D
+Sbjct: 67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYARHLKNHKEDVHVD 123
+
+Query: 113 EFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 172
+           +   S   + DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + LVSLSEQ LV 
+Sbjct: 124 DSAPSGVMSVDWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSLVSLSEQMLVS 183
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQ--CNFNS 228
+           CD            DEGCNGGL   A N+I++  NG + TE+SYPYT+  GT+  C+ + 
+Sbjct: 184 CD----------NIDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGGGTRPPCH-DE 232
+
+Query: 229 ANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGI 288
+             +GAKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV    C   SL+HG+
+Sbjct: 233 GEVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVS-LCLAWSLNHGV 291
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           LIVG++          PYWIVKNSWG+ WGE+GYI L  G N C + N+  ++ +
+Sbjct: 292 LIVGFN-----KNAKPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNYPVSATV 341
+
+
+>sp|Q05094|CYS2_LEIPI CYSTEINE PROTEINASE 2 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE
+           A-2)
+          Length = 444
+
+ Score =  347 bits (882), Expect = 2e-95
+ Identities = 124/349 (35%), Positives = 187/349 (53%), Gaps = 29/349 (8%)
+
+Query: 4   ILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEEL 62
+           ++  VLA       +  +     + F EF+  + + Y    E  +R   F+ NL  + E 
+Sbjct: 13  VVCVVLAAACAPARAIHVGTPAAALFEEFKRTYGRAYETLAEEQQRLANFERNLELMREH 72
+
+Query: 63  NLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDE--FINSIPT 120
+                +     +FG+ KF DLS  EF   YLN            A +       ++++P 
+Sbjct: 73  QARNPH----AQFGITKFFDLSEAEFAARYLNGAAYFAAAKRHAAQHYRKARADLSAVPD 128
+
+Query: 121 AFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEY 180
+           A DWR +GAVTPVK+QG CGSCW+FS  GN+EGQ +++ ++LVSLSEQ LV CD      
+Sbjct: 129 AVDWREKGAVTPVKDQGACGSCWAFSAVGNIEGQWYLAGHELVSLSEQQLVSCDD----- 183
+
+Query: 181 EGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETG--TQCNFNSAN--IGAK 234
+                 ++GC+GGL   A++++++  NG + TE SYPY +  G   +C+ +S    +GA+
+Sbjct: 184 -----MNDGCDGGLMLQAFDWLLQNTNGHLHTEDSYPYVSGNGYVPECSNSSEELVVGAQ 238
+
+Query: 235 ISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYS 294
+           I    +I  +E  MA ++   GP+AIA DA  +  Y  GV    C    L+HG+L+VGY 
+Sbjct: 239 IDGHVLIGSSEKAMAAWLAKNGPIAIALDASSFMSYKSGVL-TACIGKQLNHGVLLVGYD 297
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+               +     PYW++KNSWG DWGEQGY+ +  G N C +S +  ++ +
+Sbjct: 298 MTGEV-----PYWVIKNSWGGDWGEQGYVRVVMGVNACLLSEYPVSAHV 341
+
+
+>sp|P35591|CYS1_LEIPI CYSTEINE PROTEINASE 1 PRECURSOR (AMASTIGOTE CYSTEINE PROTEINASE
+           A-1)
+          Length = 354
+
+ Score =  347 bits (880), Expect = 3e-95
+ Identities = 143/355 (40%), Positives = 190/355 (53%), Gaps = 36/355 (10%)
+
+Query: 5   LLFVLAVFTVFVSSRGI-------PPEEQ----SQFLEFQDKFNKKYSHE-EYLERFEIF 52
+           LLF + V  +FV   G        PP +     + +  F+ +  K +  + E   RF  F
+Sbjct: 7   LLFAIVVTILFVVCYGSALIAQTPPPVDNFVASAHYGSFKKRHGKAFGGDAEEGHRFNAF 66
+
+Query: 53  KSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDD 112
+           K N+     LN    +   D      KFADL+  EF   YLN             D   D
+Sbjct: 67  KQNMQTAYFLNTQNPHAHYDVS---GKFADLTPQEFAKLYLNPDYYARHLKDHKEDVHVD 123
+
+Query: 113 EFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVD 172
+           +   S   + DWR +GAVTPVKNQG CGSCW+FS  GN+EGQ   S + LVSLSEQ LV 
+Sbjct: 124 DSAPSGVMSVDWRDKGAVTPVKNQGLCGSCWAFSAIGNIEGQWAASGHSLVSLSEQMLVS 183
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIK--NGGIQTESSYPYTAETGTQ--CNFNS 228
+           CD            DEGCNGGL   A N+I++  NG + TE+SYPYT+  GT+  C+ + 
+Sbjct: 184 CD----------NIDEGCNGGLMDQAMNWIMQSHNGSVFTEASYPYTSGGGTRPPCH-DE 232
+
+Query: 229 ANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGI 288
+             +GAKI+ F  +P +E  +A ++   GP+A+A DA  WQ Y GGV    C   SL+HG+
+Sbjct: 233 GEVGAKITGFLSLPHDEERIAEWVEKRGPVAVAVDATTWQLYFGGVVS-LCLAWSLNHGV 291
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           LIVG++          PYWIVKNSWG+ WGE+GYI L  G N C + N+  ++ +
+Sbjct: 292 LIVGFN-----KNAKPPYWIVKNSWGSSWGEKGYIRLAMGSNQCMLKNYPVSATV 341
+
+
+>sp|P05994|PAP4_CARPA PAPAYA PROTEINASE IV PRECURSOR (PPIV) (PAPAYA PEPTIDASE B) (GLYCYL
+           ENDOPEPTIDASE)
+          Length = 348
+
+ Score =  346 bits (879), Expect = 3e-95
+ Identities = 111/321 (34%), Positives = 163/321 (50%), Gaps = 29/321 (9%)
+
+Query: 29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  +  K NK Y + +E L RFEIFK NL  I+E N +   +      G+N+F+DLS+DE
+Sbjct: 48  FNSWMLKHNKNYKNVDEKLYRFEIFKDNLKYIDERNKMINGYW----LGLNEFSDLSNDE 103
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           FK  Y+ +    +T+     ++++++ ++ +P + DWR +GAVTPVK+QG C SCW+FST
+Sbjct: 104 FKEKYVGSLPEDYTNQPYDEEFVNEDIVD-LPESVDWRAKGAVTPVKHQGYCESCWAFST 162
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+              VEG + I    LV LSEQ LVDCD +            GCN G Q  +  Y+ +N G
+Sbjct: 163 VATVEGINKIKTGNLVELSEQELVDCDKQ----------SYGCNRGYQSTSLQYVAQN-G 211
+
+Query: 208 IQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV-- 265
+           I   + YPY A+  T           K +    +  N        ++  P+++  ++   
+Sbjct: 212 IHLRAKYPYIAKQQTCRANQVGGPKVKTNGVGRVQSNNEGSLLNAIAHQPVSVVVESAGR 271
+
+Query: 266 EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+           ++Q Y GG+F+  C    +DH +  VGY            Y ++KNSWG  WGE GYI +
+Sbjct: 272 DFQNYKGGIFEGSCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGPGWGENGYIRI 325
+
+Query: 326 RRGKNT----CGVSNFVSTSI 342
+           RR        CGV       I
+Sbjct: 326 RRASGNSPGVCGVYRSSYYPI 346
+
+
+>sp|P10056|PAP3_CARPA CARICAIN PRECURSOR (PAPAYA PROTEINASE OMEGA) (PAPAYA PROTEINASE
+           III) (PPIII) (PAPAYA PEPTIDASE A)
+          Length = 348
+
+ Score =  341 bits (865), Expect = 1e-93
+ Identities = 114/317 (35%), Positives = 162/317 (50%), Gaps = 26/317 (8%)
+
+Query: 29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  +    NK Y + +E L RFEIFK NL  I+E N    ++      G+N+FADLS+DE
+Sbjct: 48  FNSWMLNHNKFYENVDEKLYRFEIFKDNLNYIDETNKKNNSYW----LGLNEFADLSNDE 103
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           F   Y+ +      +     ++++++ +N +P   DWR +GAVTPV++QG CGSCW+FS 
+Sbjct: 104 FNEKYVGSLIDATIEQSYDEEFINEDTVN-LPENVDWRKKGAVTPVRHQGSCGSCWAFSA 162
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+              VEG + I   KLV LSEQ LVDC+              GC GG  P A  Y+ KN G
+Sbjct: 163 VATVEGINKIRTGKLVELSEQELVDCERR----------SHGCKGGYPPYALEYVAKN-G 211
+
+Query: 208 IQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--V 265
+           I   S YPY A+ GT           K S    +  N        ++  P+++  ++   
+Sbjct: 212 IHLRSKYPYKAKQGTCRAKQVGGPIVKTSGVGRVQPNNEGNLLNAIAKQPVSVVVESKGR 271
+
+Query: 266 EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+            +Q Y GG+F+ PC    +DH +  VGY            Y ++KNSWG  WGE+GYI +
+Sbjct: 272 PFQLYKGGIFEGPCG-TKVDHAVTAVGYGKSG-----GKGYILIKNSWGTAWGEKGYIRI 325
+
+Query: 326 RRG-KNTCGVSNFVSTS 341
+           +R   N+ GV     +S
+Sbjct: 326 KRAPGNSPGVCGLYKSS 342
+
+
+>sp|P14080|PAP2_CARPA CHYMOPAPAIN PRECURSOR (PAPAYA PROTEINASE II) (PPII)
+          Length = 352
+
+ Score =  339 bits (860), Expect = 6e-93
+ Identities = 119/321 (37%), Positives = 160/321 (49%), Gaps = 29/321 (9%)
+
+Query: 29  FLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  +  K NK Y S +E + RFEIF+ NL  I+E N    ++      G+N FADLS+DE
+Sbjct: 48  FDSWMLKHNKIYESIDEKIYRFEIFRDNLMYIDETNKKNNSYW----LGLNGFADLSNDE 103
+
+Query: 88  FKNYYLNNKEAIFTDDLP-VADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFS 146
+           FK  Y+      FT       +    + + + P + DWR +GAVTPVKNQG CGSCW+FS
+Sbjct: 104 FKKKYVGFVAEDFTGLEHFDNEDFTYKHVTNYPQSIDWRAKGAVTPVKNQGACGSCWAFS 163
+
+Query: 147 TTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNG 206
+           T   VEG + I    L+ LSEQ LVDCD              GC GG Q  +  Y+  N 
+Sbjct: 164 TIATVEGINKIVTGNLLELSEQELVDCDKH----------SYGCKGGYQTTSLQYVANN- 212
+
+Query: 207 GIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA-- 264
+           G+ T   YPY A+       +      KI+ +  +P N        ++  PL++  +A  
+Sbjct: 213 GVHTSKVYPYQAKQYKCRATDKPGPKVKITGYKRVPSNCETSFLGALANQPLSVLVEAGG 272
+
+Query: 265 VEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIY 324
+             +Q Y  GVFD PC    LDH +  VGY   +        Y I+KNSWG +WGE+GY+ 
+Sbjct: 273 KPFQLYKSGVFDGPCG-TKLDHAVTAVGYGTSD-----GKNYIIIKNSWGPNWGEKGYMR 326
+
+Query: 325 LRR----GKNTCGVSNFVSTS 341
+           L+R     + TCGV       
+Sbjct: 327 LKRQSGNSQGTCGVYKSSYYP 347
+
+
+>sp|P00784|PAPA_CARPA PAPAIN PRECURSOR (PAPAYA PROTEINASE I) (PPI)
+          Length = 345
+
+ Score =  334 bits (848), Expect = 1e-91
+ Identities = 107/321 (33%), Positives = 152/321 (47%), Gaps = 32/321 (9%)
+
+Query: 29  FLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDE 87
+           F  +  K NK Y + +E + RFEIFK NL  I+E N    ++      G+N FAD+S+DE
+Sbjct: 48  FESWMLKHNKIYKNIDEKIYRFEIFKDNLKYIDETNKKNNSYW----LGLNVFADMSNDE 103
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           FK  Y  +    +T      + + ++   +IP   DWR +GAVTPVKNQG CGSCW+FS 
+Sbjct: 104 FKEKYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDWRQKGAVTPVKNQGSCGSCWAFSA 163
+
+Query: 148 TGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG 207
+              +EG   I    L   SEQ L+DCD              GCNGG   +A   + +  G
+Sbjct: 164 VVTIEGIIKIRTGNLNEYSEQELLDCDRR----------SYGCNGGYPWSALQLVAQ-YG 212
+
+Query: 208 IQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAV-- 265
+           I   ++YPY        +       AK      +         Y ++  P+++  +A   
+Sbjct: 213 IHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGK 272
+
+Query: 266 EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+           ++Q Y GG+F  PC  N +DH +  VGY            Y ++KNSWG  WGE GYI +
+Sbjct: 273 DFQLYRGGIFVGPCG-NKVDHAVAAVGYG---------PNYILIKNSWGTGWGENGYIRI 322
+
+Query: 326 RRGKNT----CGVSNFVSTSI 342
+           +RG       CG+       +
+Sbjct: 323 KRGTGNSYGVCGLYTSSFYPV 343
+
+
+>sp|P22895|P34_SOYBN P34 PROBABLE THIOL PROTEASE PRECURSOR
+          Length = 379
+
+ Score =  331 bits (840), Expect = 1e-90
+ Identities = 109/334 (32%), Positives = 169/334 (49%), Gaps = 33/334 (9%)
+
+Query: 24  EEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFAD 82
+           +  S F  ++ +  + Y +HEE  +R EIFK+N   I ++N          + G+NKFAD
+Sbjct: 39  QVSSLFQLWKSEHGRVYHNHEEEAKRLEIFKNNSNYIRDMNA-NRKSPHSHRLGLNKFAD 97
+
+Query: 83  LSSDEFKNYYLNNKEAIFTDDLPVADYLDDE--FINSIPTAFDWRTRGAVTPVKNQGQCG 140
+           ++  EF   YL   + +          +  E    +  P ++DWR +G +T VK QG CG
+Sbjct: 98  ITPQEFSKKYLQAPKDVSQQIKMANKKMKKEQYSCDHPPASWDWRKKGVITQVKYQGGCG 157
+
+Query: 141 SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYN 200
+             W+FS TG +E  H I+   LVSLSEQ LVDC  E           EG   G Q  ++ 
+Sbjct: 158 RGWAFSATGAIEAAHAIATGDLVSLSEQELVDCVEE----------SEGSYNGWQYQSFE 207
+
+Query: 201 YIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNF-TMIPKNE------TVMAGYIV 253
+           +++++GGI T+  YPY A+ G +C  N       I  + T+I  +E             +
+Sbjct: 208 WVLEHGGIATDDDYPYRAKEG-RCKANKIQDKVTIDGYETLIMSDESTESETEQAFLSAI 266
+
+Query: 254 STGPLAIAADAVEWQFYIGGVFDIP--CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKN 311
+              P++++ DA ++  Y GG++D     +P  ++H +L+VGY + + +      YWI KN
+Sbjct: 267 LEQPISVSIDAKDFHLYTGGIYDGENCTSPYGINHFVLLVGYGSADGV-----DYWIAKN 321
+
+Query: 312 SWGADWGEQGYIYLRRGKNT----CGVSNFVSTS 341
+           SWG DWGE GYI+++R        CG++ F S  
+Sbjct: 322 SWGFDWGEDGYIWIQRNTGNLLGVCGMNYFASYP 355
+
+
+>sp|O17473|CATL_BRUPA CATHEPSIN L-LIKE PRECURSOR
+          Length = 395
+
+ Score =  327 bits (829), Expect = 2e-89
+ Identities = 99/325 (30%), Positives = 153/325 (46%), Gaps = 23/325 (7%)
+
+Query: 25  EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLS 84
+            ++++ ++     K Y  +E   R  IF+SN    E +N             +N  ADL+
+Sbjct: 87  LETEWKDYVTALGKHYDQKENNFRMAIFESNELMTERINKKYEQGLVSYTTALNDLADLT 146
+
+Query: 85  SDEF--KNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSC 142
+            +EF  +N      +         +++   +    +P   DWRT+GAVTPV+NQG+CGSC
+Sbjct: 147 DEEFMVRNGLRLPNQTDLRGKRQTSEFYRYDKSERLPDQVDWRTKGAVTPVRNQGECGSC 206
+
+Query: 143 WSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYI 202
+           ++F+T   +E  H     +L+ LS QN+VDC             + GC+GG  P A+ Y 
+Sbjct: 207 YAFATAAALEAYHKQMTGRLLDLSPQNIVDC--------TRNLGNNGCSGGYMPTAFQY- 257
+
+Query: 203 IKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMI-PKNETVMAGYIVSTGPLAIA 261
+               GI  ES YPY      +C +  +      + F  I P +E  +   +   GP+ + 
+Sbjct: 258 ASRYGIAMESRYPYVGTE-QRCRWQQSIAVVTDNGFNEIQPGDELALKHAVAKRGPVVVG 316
+
+Query: 262 A--DAVEWQFYIGGVF-DIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWG 318
+                  ++FY  GV+ +  C     DH +L VGY    +       YWIVKNSWG DWG
+Sbjct: 317 ISGSKRSFRFYKDGVYSEGNCG--RPDHAVLAVGYGTHPSY----GDYWIVKNSWGTDWG 370
+
+Query: 319 EQGYIYLRRGK-NTCGVSNFVSTSI 342
+           + GY+Y+ R + N C +++  S  I
+Sbjct: 371 KDGYVYMARNRGNMCHIASAASFPI 395
+
+
+>sp|Q10991|CATL_SHEEP CATHEPSIN L
+          Length = 217
+
+ Score =  325 bits (824), Expect = 1e-88
+ Identities = 104/230 (45%), Positives = 140/230 (60%), Gaps = 17/230 (7%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DW  +G VTPVKNQGQCGSCW+FS TG +EGQ F    KLVSLSEQNLVD     
+Sbjct: 1   VPKSVDWTKKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGKLVSLSEQNLVD----- 55
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                    ++GCNGGL  NA+ YI +NGG+ +E SYPY A T T CN+      AK + 
+Sbjct: 56  ---SSRPQGNQGCNGGLMDNAFQYIKENGGLDSEESYPYEA-TDTSCNYKPEYSAAKDTG 111
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGV-FDIPCNPNSLDHGILIVGYS 294
+           F  IP+ E  +   + + GP+++A DA    +QFY  G+ +D  C+   LDHG+L+VGY 
+Sbjct: 112 FVDIPQREKALMKAVATVGPISVAIDAGHSSFQFYKSGIYYDPDCSSKDLDHGVLVVGYG 171
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+            + T    N  +WIVKNSWG +WG +GY+ + +   N CG++   S   +
+Sbjct: 172 FEGT----NNKFWIVKNSWGPEWGNKGYVKMAKDQNNHCGIATAASYPTV 217
+
+
+>sp|P54639|CYS4_DICDI CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 442
+
+ Score =  323 bits (819), Expect = 4e-88
+ Identities = 115/308 (37%), Positives = 166/308 (53%), Gaps = 23/308 (7%)
+
+Query: 1   MKVI-LLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKI 59
+           M+V+  L +L V       +    + ++ F  +     + YS EE+  R++IFKSN+  +
+Sbjct: 1   MRVLSFLCLLLVSYASAKQQFSELQYRNAFTNWMQAHQRTYSSEEFNARYQIFKSNMDYV 60
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+            + N    +   +T  G+N FAD+++ E++  YL      F     +    +  F    P
+Sbjct: 61  HQWN----SKGGETVLGLNVFADITNQEYRTTYLGTP---FDGSALIGTEEEKIFSTPAP 113
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN---KLVSLSEQNLVDCDHE 176
+              DWR +GAVTP+KNQGQCG CWSFSTTG+ EG HFI+      LVSLSEQNL+DC   
+Sbjct: 114 -TVDWRAQGAVTPIKNQGQCGGCWSFSTTGSTEGAHFIASGTKKDLVSLSEQNLIDCSK- 171
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                     + GC GGL    + YII N GI TESSYPYTAE G +C F ++NIGA+I 
+Sbjct: 172 -------SYGNNGCEGGLMTLGFEYIINNKGIDTESSYPYTAEDGKECKFKTSNIGAQIV 224
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGY 293
+           ++  +            +  P+++A DA    +Q Y  G++  P C P  LDHG+L+VGY
+Sbjct: 225 SYQNVTSGSEASLQSASNNAPVSVAIDASNESFQLYESGIYYEPACTPTQLDHGVLVVGY 284
+
+Query: 294 SAKNTIFR 301
+            + ++   
+Sbjct: 285 GSGSSSSS 292
+
+
+ Score = 70.2 bits (169), Expect = 6e-12
+ Identities = 18/46 (39%), Positives = 26/46 (56%), Gaps = 1/46 (2%)
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+             +   +  YWIVKNSWG  WG  GYI++ + + N CG++   S  
+Sbjct: 392 GAVEASSGNYWIVKNSWGTSWGMDGYIFMSKDRNNNCGIATMASFP 437
+
+
+>sp|P56203|CATW_MOUSE CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 371
+
+ Score =  319 bits (810), Expect = 4e-87
+ Identities = 108/340 (31%), Positives = 162/340 (46%), Gaps = 32/340 (9%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKF 80
+           P E +  F  FQ +FN+ Y +  EY  R  IF  NL + + L    +      +FG   F
+Sbjct: 33  PLELKEVFKLFQIRFNRSYWNPAEYTRRLSIFAHNLAQAQRLQQEDL---GTAEFGETPF 89
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRT-RGAVTPVKNQGQC 139
+           +DL+ +EF   Y   +    T ++       + +  S+P   DWR  +  ++ VKNQG C
+Sbjct: 90  SDLTEEEFGQLYGQERSPERTPNMTK-KVESNTWGESVPRTCDWRKAKNIISSVKNQGSC 148
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 199
+             CW+ +   N++    I   + V +S Q L+DC          E C  GCNGG   +AY
+Sbjct: 149 KCCWAMAAADNIQALWRIKHQQFVDVSVQELLDC----------ERCGNGCNGGFVWDAY 198
+
+Query: 200 NYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+             ++ N G+ +E  YP+  +    +C        A I +FTM+  NE  +A Y+   GP+
+Sbjct: 199 LTVLNNSGLASEKDYPFQGDRKPHRCLAKKYKKVAWIQDFTMLSNNEQAIAHYLAVHGPI 258
+
+Query: 259 AIAADAVEWQFYIGGVFDIP---CNPNSLDHGILIVGYSAKNTIFRKNM----------- 304
+            +  +    Q Y  GV       C+P  +DH +L+VG+  K    +              
+Sbjct: 259 TVTINMKLLQHYQKGVIKATPSSCDPRQVDHSVLLVGFGKKKEGMQTGTVLSHSRKRRHS 318
+
+Query: 305 -PYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            PYWI+KNSWGA WGE+GY  L RG NTCGV+ +  T+ +
+Sbjct: 319 SPYWILKNSWGAHWGEKGYFRLYRGNNTCGVTKYPFTAQV 358
+
+
+>sp|P56202|CATW_HUMAN CATHEPSIN W PRECURSOR (LYMPHOPAIN)
+          Length = 376
+
+ Score =  318 bits (807), Expect = 9e-87
+ Identities = 114/368 (30%), Positives = 177/368 (47%), Gaps = 44/368 (11%)
+
+Query: 6   LFVLAVFTVFVSSRGI---------PPEEQSQFLEFQDKFNKKY-SHEEYLERFEIFKSN 55
+           L  L V  +    RG          P E +  F  FQ +FN+ Y S EE+  R +IF  N
+Sbjct: 10  LLALLVAGLAQGIRGPLRAQDLGPQPLELKEAFKLFQIQFNRSYLSPEEHAHRLDIFAHN 69
+
+Query: 56  LGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI 115
+           L + + L    +      +FGV  F+DL+ +EF   Y   + A     +   +   +E  
+Sbjct: 70  LAQAQRLQEEDL---GTAEFGVTPFSDLTEEEFGQLYGYRRAAGGVPSMG-REIRSEEPE 125
+
+Query: 116 NSIPTAFDWRT-RGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCD 174
+            S+P + DWR   GA++P+K+Q  C  CW+ +  GN+E    IS    V +S   L+DC 
+Sbjct: 126 ESVPFSCDWRKVAGAISPIKDQKNCNCCWAMAAAGNIETLWRISFWDFVDVSVHELLDCG 185
+
+Query: 175 HECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGT-QCNFNSANIGA 233
+                      C +GC+GG   +A+  ++ N G+ +E  YP+  +    +C+       A
+Sbjct: 186 R----------CGDGCHGGFVWDAFITVLNNSGLASEKDYPFQGKVRAHRCHPKKYQKVA 235
+
+Query: 234 KISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFD---IPCNPNSLDHGILI 290
+            I +F M+  NE  +A Y+ + GP+ +  +    Q Y  GV       C+P  +DH +L+
+Sbjct: 236 WIQDFIMLQNNEHRIAQYLATYGPITVTINMKPLQLYRKGVIKATPTTCDPQLVDHSVLL 295
+
+Query: 291 VGYSA--------KNTIFRKN-------MPYWIVKNSWGADWGEQGYIYLRRGKNTCGVS 335
+           VG+ +          T+  ++        PYWI+KNSWGA WGE+GY  L RG NTCG++
+Sbjct: 296 VGFGSVKSEEGIWAETVSSQSQPQPPHPTPYWILKNSWGAQWGEKGYFRLHRGSNTCGIT 355
+
+Query: 336 NFVSTSII 343
+            F  T+ +
+Sbjct: 356 KFPLTARV 363
+
+
+>sp|Q01958|CPP2_ENTHI CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 315
+
+ Score =  318 bits (807), Expect = 9e-87
+ Identities = 102/330 (30%), Positives = 160/330 (47%), Gaps = 37/330 (11%)
+
+Query: 21  IPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-K 79
+           +       F  +  K NK ++  E L R  IF  N   ++  N I        K  V+  
+Sbjct: 8   LAIASAIDFNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSVDGP 62
+
+Query: 80  FADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQC 139
+           FA ++++E++    + +    T++     YL+ +     P + DWR  G VTP+++Q QC
+Sbjct: 63  FAAMTNEEYRTLLKSKRT---TEENGQVKYLNIQA----PESVDWRKEGKVTPIRDQAQC 115
+
+Query: 140 GSCWSFSTTGNVEGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQP 196
+           GSC++F +   +EG+  I +      + LSE+++V C          +  + GCNGGL  
+Sbjct: 116 GSCYTFGSLAALEGRLLIEKGGDANTLDLSEEHMVQC--------TRDNGNNGCNGGLGS 167
+
+Query: 197 NAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTG 256
+           N Y+YII++ G+  ES YPYT    T C  N  +  AKI+ +T +P+N        +S G
+Sbjct: 168 NVYDYIIEH-GVAKESDYPYTGSDST-CKTNVKSF-AKITGYTKVPRNNEAELKAALSQG 224
+
+Query: 257 PLAIAADAVE--WQFYIGGVF-DIPCNPNS--LDHGILIVGYSAKNTIFRKNMPYWIVKN 311
+            + ++ DA    +Q Y  G + D  C  N   L+H +  VGY   +         WIV+N
+Sbjct: 225 LVDVSIDASSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKECWIVRN 279
+
+Query: 312 SWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+           SWG  WG++GYI +    NTCGV+      
+Sbjct: 280 SWGTGWGDKGYINMVIEGNTCGVATDPLYP 309
+
+
+>sp|P36185|ACP2_ENTHI CYSTEINE PROTEINASE ACP2 PRECURSOR
+          Length = 310
+
+ Score =  315 bits (799), Expect = 8e-86
+ Identities = 102/330 (30%), Positives = 158/330 (46%), Gaps = 32/330 (9%)
+
+Query: 18  SRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGV 77
+           + GI       F  +  K NK ++  E L R  IF  N   ++  N I        K  V
+Sbjct: 1   AAGIRIASAIDFNTWASKNNKHFTAIEKLRRRAIFNMNAKFVDSFNKIG-----SFKLSV 55
+
+Query: 78  N-KFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQ 136
+           +  FA ++++E++    + +    T++     YL+ +     P + DWR  G VTP+++Q
+Sbjct: 56  DGPFAAMTNEEYRTLLKSKRT---TEENGQVKYLNIQA----PESVDWRKEGKVTPLRDQ 108
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQP 196
+            QCGSC++F +   +EG+  I +       + N +D   E M+    +  + GCNGGL  
+Sbjct: 109 AQCGSCYTFGSLAALEGRLLIEKG-----GDANTLDLSEEHMQCT-RDNGNNGCNGGLGS 162
+
+Query: 197 NAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTG 256
+           N Y+YII++ G+  ES YPYT    T C  N  +   KI+ +T +P+N        +S G
+Sbjct: 163 NVYDYIIEH-GVAKESDYPYTGSDST-CKTNVKSFR-KITGYTKVPRNNEAELKAALSQG 219
+
+Query: 257 PLAIAADAVE--WQFYIGGVF-DIPCNPNS--LDHGILIVGYSAKNTIFRKNMPYWIVKN 311
+            L ++ D     +Q Y  G + D  C  N   L+H +  VGY   +         WIV+N
+Sbjct: 220 LLDVSIDVSSAKFQLYKSGAYTDTKCKNNYFALNHEVCAVGYGVVD-----GKECWIVRN 274
+
+Query: 312 SWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+           SWG  WG++GYI +    NTCGV+      
+Sbjct: 275 SWGTSWGDKGYINMVIEGNTCGVATDPLYP 304
+
+
+>sp|Q06964|CPP3_ENTHI CYSTEINE PROTEINASE 3 PRECURSOR (CYSTEINE PROTEINASE ACP3)
+          Length = 308
+
+ Score =  312 bits (790), Expect = 9e-85
+ Identities = 102/322 (31%), Positives = 157/322 (48%), Gaps = 37/322 (11%)
+
+Query: 29  FLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDE 87
+           F  +    NK ++  E L R  IF  N   + E N      K   K  V+  FA ++++E
+Sbjct: 9   FNTWAANNNKHFTAVEALRRRAIFNMNARFVAEFNK-----KGSFKLSVDGPFAAMTNEE 63
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           ++    + +     ++     YL+ +     P + DWR +G VTP+++Q QCGSC++F +
+Sbjct: 64  YRTLLKSKRTV---EENGKVTYLNIQA----PESVDWRAQGKVTPIRDQAQCGSCYTFGS 116
+
+Query: 148 TGNVEGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK 204
+              +EG+  I +      + LSE++LV C          +  + GCNGGL  N Y+YII+
+Sbjct: 117 LAALEGRLLIEKGGNANTLDLSEEHLVQC--------TRDNGNNGCNGGLGSNVYDYIIQ 168
+
+Query: 205 NGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA 264
+           N G+  ES YPYT    T C  N     AKI+ +  +P+N        +S G + ++ DA
+Sbjct: 169 N-GVAKESDYPYTGTDST-CKTN-VKAFAKITGYNKVPRNNEAELKAALSQGLVDVSIDA 225
+
+Query: 265 VE--WQFYIGGVF-DIPCNPN--SLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+               +Q Y  G + D  C  N  +L+H +  VGY   +         WIV+NSWG  WG+
+Sbjct: 226 SSAKFQLYKSGAYSDTKCKNNFFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGD 280
+
+Query: 320 QGYIYLRRGKNTCGVSNFVSTS 341
+           +GYI +    NTCGV+      
+Sbjct: 281 KGYINMVIEGNTCGVATDPLYP 302
+
+
+>sp|P36184|ACP1_ENTHI CYSTEINE PROTEINASE ACP1 PRECURSOR
+          Length = 308
+
+ Score =  309 bits (784), Expect = 5e-84
+ Identities = 109/348 (31%), Positives = 156/348 (44%), Gaps = 53/348 (15%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKI 59
+           M  ++LFV       V+           F ++    NK +++  EYL RF +F  N   +
+Sbjct: 1   MFALILFVSLACANEVA-----------FKQWAATHNKVFANRAEYLYRFAVFLDNKKFV 49
+
+Query: 60  EELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIP 119
+           E          A+    +N FAD++ +EF   +L       T ++P         + + P
+Sbjct: 50  E----------ANANTELNVFADMTHEEFIQTHLG-----MTYEVPETTSNVKAAVKAAP 94
+
+Query: 120 TAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECME 179
+            + DWR+   + P K+QGQCGSCW+F TT  +EG+      KL S SEQ LVDCD     
+Sbjct: 95  ESVDWRS--IMNPAKDQGQCGSCWTFCTTAVLEGRVNKDLGKLYSFSEQQLVDCD----- 147
+
+Query: 180 YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFT 239
+                A D GC GG   N+  +I +N G+  ES YPY A  GT          A ++   
+Sbjct: 148 -----ASDNGCEGGHPSNSLKFIQENNGLGLESDYPYKAVAGTC---KKVKNVATVTGSR 199
+
+Query: 240 MIP-KNETVMAGYIVSTGPLAIAADAV--EWQFYIGGVF--DIPCNPNSLDHGILIVGYS 294
+            +   +ET +   I   GP+A+  DA    +Q Y  G    D  C    ++H +  VGY 
+Sbjct: 200 RVTDGSETGLQTIIAENGPVAVGMDASRPSFQLYKKGTIYSDTKCRSRMMNHCVTAVGYG 259
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+                   N  YWI++NSWG  WG+ GY  L R   N CG+    +  
+Sbjct: 260 -----SNSNGKYWIIRNSWGTSWGDAGYFLLARDSNNMCGIGRDSNYP 302
+
+
+>sp|P25326|CATS_BOVIN CATHEPSIN S
+          Length = 217
+
+ Score =  307 bits (778), Expect = 2e-83
+ Identities = 91/228 (39%), Positives = 129/228 (55%), Gaps = 17/228 (7%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DWR +G VT VK QG CGSCW+FS  G +E Q  +   KLVSLS QNLVDC    
+Sbjct: 1   LPDSMDWREKGCVTEVKYQGACGSCWAFSAVGALEAQVKLKTGKLVSLSAQNLVDCST-- 58
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                 +  ++GCNGG    A+ YII N GI +E+SYPY A  G +C ++  N  A  S 
+Sbjct: 59  -----AKYGNKGCNGGFMTEAFQYIIDNNGIDSEASYPYKAMDG-KCQYDVKNRAATCSR 112
+
+Query: 238 FTMIP-KNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNSLDHGILIVGYS 294
+           +  +P  +E  +   + + GP+++  DA    +  Y  GV+  P    +++HG+L+VGY 
+Sbjct: 113 YIELPFGSEEALKEAVANKGPVSVGIDASHSSFFLYKTGVYYDPSCTQNVNHGVLVVGYG 172
+
+Query: 295 AKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK-NTCGVSNFVSTS 341
+                      YW+VKNSWG  +G+QGYI + R   N CG++N+ S  
+Sbjct: 173 N-----LDGKDYWLVKNSWGLHFGDQGYIRMARNSGNHCGIANYPSYP 215
+
+
+>sp|Q01957|CPP1_ENTHI CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 315
+
+ Score =  301 bits (763), Expect = 1e-81
+ Identities = 103/322 (31%), Positives = 155/322 (47%), Gaps = 37/322 (11%)
+
+Query: 29  FLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVN-KFADLSSDE 87
+           F  +    NK ++  E L R  IF  N   + E N          K  V+  FA ++++E
+Sbjct: 16  FNTWVANNNKHFTAVESLRRRAIFNMNARIVAENNRKE-----TFKLSVDGPFAAMTNEE 70
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFST 147
+           + +     +     ++     YL+ +     P A DWR +G VTP+++QG CGSC++F +
+Sbjct: 71  YNSLLKLKRSG---EEKGEVRYLNIQA----PKAVDWRKKGKVTPIRDQGNCGSCYTFGS 123
+
+Query: 148 TGNVEGQHFISQN---KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIK 204
+              +EG+  I +    + + LSE+++V C  E          + GCNGGL  N YNYI++
+Sbjct: 124 IAALEGRLLIEKGGDSETLDLSEEHMVQCTRE--------DGNNGCNGGLGSNVYNYIME 175
+
+Query: 205 NGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA 264
+           N GI  ES YPYT    T C  +     AKI ++  + +N  V     +S G + ++ DA
+Sbjct: 176 N-GIAKESDYPYTGSDST-CRSD-VKAFAKIKSYNRVARNNEVELKAAISQGLVDVSIDA 232
+
+Query: 265 VE--WQFYIGGVF-DIPCNPNS--LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+               +Q Y  G + D  C  N   L+H +  VGY   +         WIV+NSWG  WGE
+Sbjct: 233 SSVQFQLYKSGAYTDTQCKNNYFALNHEVCAVGYGVVD-----GKECWIVRNSWGTGWGE 287
+
+Query: 320 QGYIYLRRGKNTCGVSNFVSTS 341
+           +GYI +    NTCGV+      
+Sbjct: 288 KGYINMVIEGNTCGVATDPLYP 309
+
+
+>sp|P25805|CYSP_PLAFA THROPHOZOITE CYSTEINE PROTEINASE PRECURSOR (TCP)
+          Length = 569
+
+ Score =  299 bits (758), Expect = 5e-81
+ Identities = 100/363 (27%), Positives = 161/363 (43%), Gaps = 62/363 (17%)
+
+Query: 27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           S+F +F  + NK Y + +E + +FEIFK N   I+  N   +N  A  K  VN+F+D S 
+Sbjct: 223 SKFFKFMKEHNKVYKNIDEQMRKFEIFKINYISIKNHNK--LNKNAMYKKKVNQFSDYSE 280
+
+Query: 86  DEFKN--------------YYLNNKEAIFTDDLPVADYL------DDEFINSIPTAFDWR 125
+           +E K                Y    E    D++ ++++       + +  + +P   D+R
+Sbjct: 281 EELKEYFKTLLHVPNHMIEKYSKPFENHLKDNILISEFYTNGKRNEKDIFSKVPEILDYR 340
+
+Query: 126 TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEA 185
+            +G V   K+QG CGSCW+F++ GN+E         ++S SEQ +VDC  +         
+Sbjct: 341 EKGIVHEPKDQGLCGSCWAFASVGNIESVFAKKNKNILSFSEQEVVDCSKD--------- 391
+
+Query: 186 CDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNE 245
+            + GC+GG    ++ Y+++N  +     Y Y A+    C          +S+   +   E
+Sbjct: 392 -NFGCDGGHPFYSFLYVLQN-ELCLGDEYKYKAKDDMFCLNYRCKRKVSLSSIGAV--KE 447
+
+Query: 246 TVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGYS---------- 294
+             +   +   GPL++      ++  Y  GV++  C    L+H +L+VGY           
+Sbjct: 448 NQLILALNEVGPLSVNVGVNNDFVAYSEGVYNGTC-SEELNHSVLLVGYGQVEKTKLNYN 506
+
+Query: 295 ----AKNTIFRKNMP------YWIVKNSWGADWGEQGYIYLRRGKN----TCGVSNFVST 340
+                 NT    N P      YWI+KNSW   WGE G++ L R KN     CG+   V  
+Sbjct: 507 NKIQTYNTKENSNQPDDNIIYYWIIKNSWSKKWGENGFMRLSRNKNGDNVFCGIGEEVFY 566
+
+Query: 341 SII 343
+            I+
+Sbjct: 567 PIL 569
+
+
+>sp|P20721|CYSL_LYCES LOW-TEMPERATURE-INDUCED CYSTEINE PROTEINASE PRECURSOR
+          Length = 346
+
+ Score =  298 bits (756), Expect = 9e-81
+ Identities = 88/243 (36%), Positives = 130/243 (53%), Gaps = 21/243 (8%)
+
+Query: 106 VADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL 165
+            +D    +  +S+P + DWR +G +  VK+QG CGSCW+FS    +E  + I    L+SL
+Sbjct: 6   KSDRYLPKVGDSLPESIDWREKGVLVGVKDQGSCGSCWAFSAVAAMESINAIVTGNLISL 65
+
+Query: 166 SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCN 225
+           SEQ LVDCD          + +EGC+GGL   A+ ++IKNGGI TE  YPY    G    
+Sbjct: 66  SEQELVDCDR---------SYNEGCDGGLMDYAFEFVIKNGGIDTEEDYPYKERNGVCDQ 116
+
+Query: 226 FNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIPCNPNS 283
+           +       KI ++  +P N        V+  P++IA +A   ++Q Y  G+F   C   +
+Sbjct: 117 YRKNAKVVKIDSYEDVPVNNEKALQKAVAHQPVSIALEAGGRDFQHYKSGIFTGKCG-TA 175
+
+Query: 284 LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVS 339
+           +DHG++I GY  +N      M YWIV+NSWGA+  E GY+ ++R        CG++   S
+Sbjct: 176 VDHGVVIAGYGTEN-----GMDYWIVRNSWGANCRENGYLRVQRNVSSSSGLCGLAIEPS 230
+
+Query: 340 TSI 342
+             +
+Sbjct: 231 YPV 233
+
+
+>sp|P43234|CATO_HUMAN CATHEPSIN O PRECURSOR
+          Length = 321
+
+ Score =  296 bits (750), Expect = 5e-80
+ Identities = 98/297 (32%), Positives = 151/297 (49%), Gaps = 20/297 (6%)
+
+Query: 50  EIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADY 109
+             F+ +L +   LN +  +  +   +G+N+F+ L  +EFK  YL +K + F      A+ 
+Sbjct: 42  AAFRESLNRHRYLNSLFPSENSTAFYGINQFSYLFPEEFKAIYLRSKPSKFPR--YSAEV 99
+
+Query: 110 LDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQN 169
+                  S+P  FDWR +  VT V+NQ  CG CW+FS  G VE  + I    L  LS Q 
+Sbjct: 100 HMSIPNVSLPLRFDWRDKQVVTQVRNQQMCGGCWAFSVVGAVESAYAIKGKPLEDLSVQQ 159
+
+Query: 170 LVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGG-IQTESSYPYTAETGTQCNFNS 228
+           ++DC             + GCNGG   NA N++ K    +  +S YP+ A+ G    F+ 
+Sbjct: 160 VIDCS----------YNNYGCNGGSTLNALNWLNKMQVKLVKDSEYPFKAQNGLCHYFSG 209
+
+Query: 229 ANIGAKISNF--TMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDH 286
+           ++ G  I  +        E  MA  +++ GPL +  DAV WQ Y+GG+    C+    +H
+Sbjct: 210 SHSGFSIKGYSAYDFSDQEDEMAKALLTFGPLVVIVDAVSWQDYLGGIIQHHCSSGEANH 269
+
+Query: 287 GILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            +LI G+         + PYWIV+NSWG+ WG  GY +++ G N CG+++ VS+  +
+Sbjct: 270 AVLITGFDKTG-----STPYWIVRNSWGSSWGVDGYAHVKMGSNVCGIADSVSSIFV 321
+
+
+>sp|P46102|CYSP_PLAVN CYSTEINE PROTEINASE PRECURSOR
+          Length = 506
+
+ Score =  291 bits (738), Expect = 1e-78
+ Identities = 108/360 (30%), Positives = 167/360 (46%), Gaps = 58/360 (16%)
+
+Query: 27  SQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSS 85
+           S+F ++  + NKKY + +E L+RFE FK    K ++ N +   +       VN+++D S 
+Sbjct: 160 SKFFKYMKENNKKYENMDEQLQRFENFKIRYMKTQKHNEMVGKNGLTYVQKVNQYSDFSK 219
+
+Query: 86  DEFKNYYLNNKEAIFTDDL----PVADYLDDEFINSI-------PTAFDWRTRGAVTPVK 134
+           +EF NY+                P+  +L +  + S+       P + D+R++    P K
+Sbjct: 220 EEFDNYFKKLLSVPMDLKSKYIVPLKKHLANTNLISVDNKSKDFPDSRDYRSKFNFLPPK 279
+
+Query: 135 NQGQCGSCWSFSTTGNVEGQHFISQNKL-VSLSEQNLVDCDHECMEYEGEEACDEGCNGG 193
+           +QG CGSCW+F+  GN E  +  +++++ +S SEQ +VDC  E          + GC+GG
+Sbjct: 280 DQGNCGSCWAFAAIGNFEYLYVHTRHEMPISFSEQQMVDCSTE----------NYGCDGG 329
+
+Query: 194 LQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIV 253
+               A+ Y+I N G+     YPY       C     ++  ++     +  NE +M   + 
+Sbjct: 330 NPFYAFLYMINN-GVCLGDEYPYKGHEDFFCLNYRCSLLGRVHFIGDVKPNELIM--ALN 386
+
+Query: 254 STGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFR----------- 301
+             GP+ IA  A  ++  Y GGVFD  CNP  L+H +L+VGY                   
+Sbjct: 387 YVGPVTIAVGASEDFVLYSGGVFDGECNP-ELNHSVLLVGYGQVKKSLAFEDSHSNVDSN 445
+
+Query: 302 ---------KNMP------YWIVKNSWGADWGEQGYIYLRRGK----NTCGVSNFVSTSI 342
+                    K         YWIV+NSWG +WGE GYI ++R K      CGV + V   I
+Sbjct: 446 LIKKYKENIKGDDDDDIIYYWIVRNSWGPNWGEGGYIRIKRNKAGDDGFCGVGSDVFFPI 505
+
+
+>sp|P42666|CYSP_PLAVI CYSTEINE PROTEINASE PRECURSOR
+          Length = 583
+
+ Score =  278 bits (704), Expect = 1e-74
+ Identities = 100/383 (26%), Positives = 164/383 (42%), Gaps = 68/383 (17%)
+
+Query: 11  VFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSH-EEYLERFEIFKSNLGKIEELNLIAINH 69
+           V    +    +  +  S+F  F +K+ + Y    E +E+++ FK N  KI++ N      
+Sbjct: 219 VSVAQIEGLFVNLKYASKFFNFMNKYKRSYKDINEQMEKYKNFKMNYLKIKKHNETNQM- 277
+
+Query: 70  KADTKFGVNKFADLSSDEFKNYY---------LNNKEAIFTDDLPVADYLDDEFINS--- 117
+               K  VN+F+D S  +F++Y+         L  K  +    +      +    +S   
+Sbjct: 278 ---YKMKVNQFSDYSKKDFESYFRKLVPIPDHLKKKYVVPFSSMNNGKGKNVVTSSSGAN 334
+
+Query: 118 ----IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLV-SLSEQNLVD 172
+               +P   D+R +G V   K+QG CGSCW+F++ GNVE  +    NK + +LSEQ +VD
+Sbjct: 335 LLADVPEILDYREKGIVHEPKDQGLCGSCWAFASVGNVECMYAKEHNKTILTLSEQEVVD 394
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIG 232
+           C             + GC+GG    ++ Y I+N GI     Y Y A     C        
+Sbjct: 395 CSK----------LNFGCDGGHPFYSFIYAIEN-GICMGDDYKYKAMDNLFCLNYRCKNK 443
+
+Query: 233 AKISNFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIV 291
+             +S+   + +NE  +   +   GP+++      ++ FY GG+F+  C    L+H +L+V
+Sbjct: 444 VTLSSVGGVKENE--LIRALNEVGPVSVNVGVTDDFSFYGGGIFNGTC-TEELNHSVLLV 500
+
+Query: 292 GYS---------------AKNTIFRKN------------MPYWIVKNSWGADWGEQGYIY 324
+           GY                  + + +K               YWI+KNSW   WGE G++ 
+Sbjct: 501 GYGQVQSSKIFQEKNAYDDASGVTKKGALSYPSKADDGIQYYWIIKNSWSKFWGENGFMR 560
+
+Query: 325 LRRGKN----TCGVSNFVSTSII 343
+           + R K      CG+   V   I+
+Sbjct: 561 ISRNKEGDNVFCGIGVEVFYPIL 583
+
+
+>sp|P16311|MMAL_DERFA MAJOR MITE FECAL ALLERGEN DER F 1 PRECURSOR (DER F I)
+          Length = 321
+
+ Score =  272 bits (688), Expect = 8e-73
+ Identities = 110/350 (31%), Positives = 150/350 (42%), Gaps = 46/350 (13%)
+
+Query: 7   FVLAVFTVFVSSRGIP-PEEQSQFLEFQDKFNKKYSHEEYLE-RFEIFKSNLGKIEELNL 64
+           FVLA+ ++ V S     P     F EF+  FNK Y+  E  E   + F  +L  +E    
+Sbjct: 3   FVLAIASLLVLSTVYARPASIKTFEEFKKAFNKNYATVEEEEVARKNFLESLKYVEA--- 59
+
+Query: 65  IAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFIN------SI 118
+                    K  +N  +DLS DEFKN YL + EA   + L     L+ E         ++
+Sbjct: 60  --------NKGAINHLSDLSLDEFKNRYLMSAEAF--EQLKTQFDLNAETSACRINSVNV 109
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 178
+           P+  D R+   VTP++ QG CGSCW+FS     E  +   +N  + LSEQ LVDC     
+Sbjct: 110 PSELDLRSLRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNTSLDLSEQELVDC----- 164
+
+Query: 179 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNF 238
+                 A   GC+G   P    YI +N G+  E SYPY A        NS + G  ISN+
+Sbjct: 165 ------ASQHGCHGDTIPRGIEYIQQN-GVVEERSYPYVAREQRCRRPNSQHYG--ISNY 215
+
+Query: 239 TMIPKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGG-VFDIPCNPNSLDHGILIVG 292
+             I   +       ++    AIA      D   +Q Y G  +           H + IVG
+Sbjct: 216 CQIYPPDVKQIREALTQTHTAIAVIIGIKDLRAFQHYDGRTIIQHDNGYQPNYHAVNIVG 275
+
+Query: 293 YSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+           Y        +   YWIV+NSW   WG+ GY Y + G N   +  +    I
+Sbjct: 276 YG-----STQGDDYWIVRNSWDTTWGDSGYGYFQAGNNLMMIEQYPYVVI 320
+
+
+>sp|P80884|ANAN_ANACO ANANAIN
+          Length = 216
+
+ Score =  271 bits (687), Expect = 1e-72
+ Identities = 93/229 (40%), Positives = 122/229 (52%), Gaps = 22/229 (9%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHEC 177
+           +P + DWR  GAVT VKNQG+CGSCW+F++   VE  + I +  LVSLSEQ ++DC    
+Sbjct: 1   VPQSIDWRDSGAVTSVKNQGRCGSCWAFASIATVESIYKIKRGNLVSLSEQQVLDC---- 56
+
+Query: 178 MEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISN 237
+                  A   GC GG    AY++II N G+ + + YPY A  GT C  N     A I+ 
+Sbjct: 57  -------AVSYGCKGGWINKAYSFIISNKGVASAAIYPYKAAKGT-CKTNGVPNSAYITR 108
+
+Query: 238 FTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAK 296
+           +T + +N      Y VS  P+A A DA   +Q Y  GVF  PC    L+H I+I+GY   
+Sbjct: 109 YTYVQRNNERNMMYAVSNQPIAAALDASGNFQHYKRGVFTGPCG-TRLNHAIVIIGYGQD 167
+
+Query: 297 NTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGK----NTCGVSNFVSTS 341
+           +        +WIV+NSWGA WGE GYI L R        CG++      
+Sbjct: 168 SA----GKKFWIVRNSWGAGWGEGGYIRLARDVSSSFGICGIAMDPLYP 212
+
+
+>sp||CATL_CHICK_1 [Segment 1 of 2] CATHEPSIN L
+          Length = 176
+
+ Score =  264 bits (669), Expect = 1e-70
+ Identities = 86/183 (46%), Positives = 115/183 (61%), Gaps = 12/183 (6%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECM 178
+           P + DWR +G VTPVK+QGQCGSCW+FSTTG +EGQHF ++ KLVSLSEQNLVDC     
+Sbjct: 2   PRSVDWREKGYVTPVKDQGQCGSCWAFSTTGALEGQHFRTKGKLVSLSEQNLVDCS---- 57
+
+Query: 179 EYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNF 238
+                   ++GCNGGL   A+ Y+  NGGI +E SYPYTA+    C + +    A  + F
+Sbjct: 58  ----RPEGNQGCNGGLMDQAFQYVQDNGGIDSEESYPYTAKDDEDCRYKAEYNAANDTGF 113
+
+Query: 239 TMIPKN-ETVMAGYIVSTGPLAIAADA--VEWQFYIGGVFDIP-CNPNSLDHGILIVGYS 294
+             IP+  E  +   + S GP+++A DA    +QFY  G++  P C+   LDHG+L+VGY 
+Sbjct: 114 VDIPQGHERALMKAVASVGPVSVAIDAGHSSFQFYQSGIYYEPDCSSEDLDHGVLVVGYG 173
+
+Query: 295 AKN 297
+            + 
+Sbjct: 174 FEG 176
+
+
+>sp|P97821|CATC_MOUSE DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  262 bits (662), Expect = 9e-70
+ Identities = 86/317 (27%), Positives = 148/317 (46%), Gaps = 37/317 (11%)
+
+Query: 43  EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTD 102
+           E Y ER   +  N   ++ +N +    K+ T     ++  +S  +      +++      
+Sbjct: 161 ERYSERL--YTHNHNFVKAINTVQ---KSWTATAYKEYEKMSLRDLIRRSGHSQRIPRPK 215
+
+Query: 103 DLPVADYLDDEFINSIPTAFDWRTR---GAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 159
+             P+ D +  + +N +P ++DWR       V+PV+NQ  CGSC+SF++ G +E +  I  
+Sbjct: 216 PAPMTDEIQQQILN-LPESWDWRNVQGVNYVSPVRNQESCGSCYSFASMGMLEARIRILT 274
+
+Query: 160 NKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYT 217
+           N   +  LS Q +V C              +GC+GG          ++ G+  ES +PYT
+Sbjct: 275 NNSQTPILSPQEVVSCSPYA----------QGCDGGFPYLIAGKYAQDFGVVEESCFPYT 324
+
+Query: 218 AETGTQCNFNSANIGAKISNFTMIPK-----NETVMAGYIVSTGPLAIAADAVE-WQFYI 271
+           A+  + C      +    S++  +       NE +M   +V  GP+A+A +  + +  Y 
+Sbjct: 325 AKD-SPCKPRENCLRYYSSDYYYVGGFYGGCNEALMKLELVKHGPMAVAFEVHDDFLHYH 383
+
+Query: 272 GGVFDI-----PCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+            G++       P NP  L +H +L+VGY          + YWI+KNSWG++WGE GY  +
+Sbjct: 384 SGIYHHTGLSDPFNPFELTNHAVLLVGYGRDPVT---GIEYWIIKNSWGSNWGESGYFRI 440
+
+Query: 326 RRGKNTCGVSNFVSTSI 342
+           RRG + C + +    +I
+Sbjct: 441 RRGTDECAIESIAVAAI 457
+
+
+>sp|P25781|CYSP_THEAN CYSTEINE PROTEINASE PRECURSOR
+          Length = 441
+
+ Score =  259 bits (655), Expect = 6e-69
+ Identities = 105/343 (30%), Positives = 162/343 (46%), Gaps = 50/343 (14%)
+
+Query: 28  QFLEFQDKFNKKY-SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           +F  F +K+ K + S ++ ++RF  F+ N   ++                +NKF+DLS +
+Sbjct: 119 EFDAFVEKYKKVHRSFDQRVQRFLTFRKNYHIVKTHKPTEP-----YSLDLNKFSDLSDE 173
+
+Query: 87  EFKNYY--------------------LNNKEAIFTDDLPVADYLDDEFINSIP--TAFDW 124
+           EFK  Y                    +++K  I+   L  A  +++    S+      +W
+Sbjct: 174 EFKALYPVITPPKTYTSLSKHLEFKKMSHKNPIYISKLKKAKGIEEIKDLSLITGENLNW 233
+
+Query: 125 RTRGAVTPVKNQG-QCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+               AV+P K+QG  CGSCW+FS+  +VE  + + +NK   LSEQ LV+CD   M     
+Sbjct: 234 ARTDAVSPTKDQGDHCGSCWAFSSIASVESLYRLYKNKSYFLSEQELVNCDKSSM----- 288
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK 243
+                GC GGL   A  YI  + G+  ES  PYT    + C  +  N    I + +++  
+Sbjct: 289 -----GCAGGLPITALEYI-HSKGVSFESEVPYTGIV-SPCKPSIKN-KVFIDSISILKG 340
+
+Query: 244 NETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKN 303
+           N+ V    ++S   + IA    E + Y GG+F   C    L+H +L+VG    +      
+Sbjct: 341 NDVVNKSLVISPTVVGIAVT-KELKLYSGGIFTGKCG-GELNHAVLLVGEGVDH---ETG 395
+
+Query: 304 MPYWIVKNSWGADWGEQGYIYLRR---GKNTCGVSNFVSTSII 343
+           M YWI+KNSWG DWGE G++ L+R   G + CG+  F    I+
+Sbjct: 396 MRYWIIKNSWGEDWGENGFLRLQRTKKGLDKCGILTFGLNPIL 438
+
+
+>sp|P08176|MMAL_DERPT MAJOR MITE FECAL ALLERGEN DER P 1 PRECURSOR (DER P I)
+          Length = 320
+
+ Score =  255 bits (644), Expect = 1e-67
+ Identities = 105/356 (29%), Positives = 153/356 (42%), Gaps = 49/356 (13%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           MK++L     +    V +R   P     F E++  FNK Y+  E     E  + N   +E
+Sbjct: 1   MKIVLAIASLLALSAVYAR---PSSIKTFEEYKKAFNKSYATFEDE---EAARKN--FLE 52
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI----- 115
+            +  +  N        +N  +DLS DEFKN +L + EA   + L     L+ E       
+Sbjct: 53  SVKYVQSNGG-----AINHLSDLSLDEFKNRFLMSAEAF--EHLKTQFDLNAETNACSIN 105
+
+Query: 116 NSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDH 175
+            + P   D R    VTP++ QG CGSCW+FS     E  +   +N+ + L+EQ LVDC  
+Sbjct: 106 GNAPAEIDLRQMRTVTPIRMQGGCGSCWAFSGVAATESAYLAYRNQSLDLAEQELVDC-- 163
+
+Query: 176 ECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKI 235
+                    A   GC+G   P    YI  NG +Q ES Y Y A   +    N+   G  I
+Sbjct: 164 ---------ASQHGCHGDTIPRGIEYIQHNGVVQ-ESYYRYVAREQSCRRPNAQRFG--I 211
+
+Query: 236 SNFTMI-PKNETVMAGYIV-STGPLAIAA---DAVEWQFYIGGVF---DIPCNPNSLDHG 287
+           SN+  I P N   +   +  +   +A+     D   ++ Y G      D    PN   H 
+Sbjct: 212 SNYCQIYPPNVNKIREALAQTHSAIAVIIGIKDLDAFRHYDGRTIIQRDNGYQPNY--HA 269
+
+Query: 288 ILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+           + IVGYS       + + YWIV+NSW  +WG+ GY Y     +   +  +    I+
+Sbjct: 270 VNIVGYSN-----AQGVDYWIVRNSWDTNWGDNGYGYFAANIDLMMIEEYPYVVIL 320
+
+
+>sp|P14518|BROM_ANACO BROMELAIN, STEM
+          Length = 212
+
+ Score =  253 bits (640), Expect = 4e-67
+ Identities = 80/230 (34%), Positives = 114/230 (48%), Gaps = 27/230 (11%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           ++P + DWR  GAVT VKNQ  CG+CW+F+    VE  + I +  L  LSEQ ++DC   
+Sbjct: 1   AVPQSIDWRDYGAVTSVKNQNPCGACWAFAAIATVESIYKIKKGILEPLSEQQVLDCAK- 59
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                       GC GG +  A+ +II N G+ + + YPY A  GT C  +     A I+
+Sbjct: 60  ----------GYGCKGGWEFRAFEFIISNKGVASGAIYPYKAAKGT-CKTDGVPNSAYIT 108
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGYSA 295
+            +  +P+N      Y VS  P+ +A DA   +Q+Y  GVF+ PC   SL+H +  +GY  
+Sbjct: 109 GYARVPRNNESSMMYAVSKQPITVAVDANANFQYYKSGVFNGPCG-TSLNHAVTAIGYGQ 167
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG----KNTCGVSNFVSTS 341
+            + I+ K          WGA WGE GYI + R        CG++      
+Sbjct: 168 DSIIYPK---------KWGAKWGEAGYIRMARDVSSSSGICGIAIDPLYP 208
+
+
+>sp|P53634|CATC_HUMAN DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 463
+
+ Score =  251 bits (634), Expect = 2e-66
+ Identities = 86/318 (27%), Positives = 137/318 (43%), Gaps = 36/318 (11%)
+
+Query: 41  SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF 100
+           S E+Y  R   +K +   ++ +N I  +  A T      +  L+  +       +   I 
+Sbjct: 159 SQEKYSNRL--YKYDHNFVKAINAIQKSWTATTYME---YETLTLGDMIRRSGGHSRKIP 213
+
+Query: 101 TDDLPVADYLDDEFINSIPTAFDWRTRGA---VTPVKNQGQCGSCWSFSTTGNVEGQHFI 157
+                       + I  +PT++DWR       V+PV+NQ  CGSC+SF++ G +E +  I
+Sbjct: 214 RPKPAPLTAEIQQKILHLPTSWDWRNVHGINFVSPVRNQASCGSCYSFASMGMLEARIRI 273
+
+Query: 158 SQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYP 215
+             N   +  LS Q +V C              +GC GG          ++ G+  E+ +P
+Sbjct: 274 LTNNSQTPILSPQEVVSCSQYA----------QGCEGGFPYLIAGKYAQDFGLVEEACFP 323
+
+Query: 216 YTAETGTQCNFNSANIGAKISNFTMIPK-----NETVMAGYIVSTGPLAIAADAVE-WQF 269
+           YT    + C           S +  +       NE +M   +V  GP+A+A +  + +  
+Sbjct: 324 YTGTD-SPCKMKEDCFRYYSSEYHYVGGFYGGCNEALMKLELVHHGPMAVAFEVYDDFLH 382
+
+Query: 270 YIGGVFDI-----PCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYI 323
+           Y  G++       P NP  L +H +L+VGY   +      M YWIVKNSWG  WGE GY 
+Sbjct: 383 YKKGIYHHTGLRDPFNPFELTNHAVLLVGYGTDSAS---GMDYWIVKNSWGTGWGENGYF 439
+
+Query: 324 YLRRGKNTCGVSNFVSTS 341
+            +RRG + C + +    +
+Sbjct: 440 RIRRGTDECAIESIAVAA 457
+
+
+>sp|P80067|CATC_RAT DIPEPTIDYL-PEPTIDASE I PRECURSOR (DPP-I) (DPPI) (CATHEPSIN C)
+           (CATHEPSIN J) (DIPEPTIDYL TRANSFERASE)
+          Length = 462
+
+ Score =  250 bits (631), Expect = 4e-66
+ Identities = 86/317 (27%), Positives = 145/317 (45%), Gaps = 37/317 (11%)
+
+Query: 43  EEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTD 102
+           E+Y ER   +  +   ++ +N +  +  A T     ++  LS  +      ++   +   
+Sbjct: 161 EKYSERL--YSHHHNFVKAINSVQKSWTATT---YRRYEKLSIRDLIRRSGHSGRILRPK 215
+
+Query: 103 DLPVADYLDDEFINSIPTAFDWRTRGA---VTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 159
+             P+ D +  + + S+P ++DWR       V+PV+NQ  CGSC+SF++ G +E +  I  
+Sbjct: 216 PAPITDEIQQQIL-SLPESWDWRNVRGINFVSPVRNQESCGSCYSFASIGMLEARIRILT 274
+
+Query: 160 NKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYT 217
+           N   +  LS Q +V C              +GC+GG          ++ G+  E+ +PYT
+Sbjct: 275 NNSQTPILSPQEVVSCSPYA----------QGCDGGFPYLIAGKYAQDFGVVEENCFPYT 324
+
+Query: 218 AETGTQCNFNSANIGAKISNFTMIPK-----NETVMAGYIVSTGPLAIAADAVE-WQFYI 271
+           A     C      +    S +  +       NE +M   +V  GP+A+A +  + +  Y 
+Sbjct: 325 ATDAP-CKPKENCLRYYSSEYYYVGGFYGGCNEALMKLELVKHGPMAVAFEVHDDFLHYH 383
+
+Query: 272 GGVFDI-----PCNPNSL-DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+            G++       P NP  L +H +L+VGY          + YWIVKNSWG+ WGE GY  +
+Sbjct: 384 SGIYHHTGLSDPFNPFELTNHAVLLVGYGKDPVT---GLDYWIVKNSWGSQWGESGYFRI 440
+
+Query: 326 RRGKNTCGVSNFVSTSI 342
+           RRG + C + +    +I
+Sbjct: 441 RRGTDECAIESIAMAAI 457
+
+
+>sp|P22497|CYSP_THEPA CYSTEINE PROTEINASE PRECURSOR
+          Length = 439
+
+ Score =  243 bits (613), Expect = 5e-64
+ Identities = 98/342 (28%), Positives = 155/342 (44%), Gaps = 48/342 (14%)
+
+Query: 24  EEQSQFLEFQDKFNKKYSHE-EYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFAD 82
+           E   +F EF  K+N++++ + E L R   F+SN  +++E              G+N+F+D
+Sbjct: 119 EVYREFEEFNSKYNRRHATQQERLNRLVTFRSNYLEVKE-----QKGDEPYVKGINRFSD 173
+
+Query: 83  LSSDEFKN---------------YYLNNKEAIFTDDLPVADYLDDEFINSIP----TAFD 123
+           L+  EF                 YYL +  A  T    +   L+ +    +        D
+Sbjct: 174 LTEREFYKLFPVMKPPKATYSNGYYLLSHMANKTYLKNLKKALNTDEDVDLAKLTGENLD 233
+
+Query: 124 WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE 183
+           WR   +VT VK+Q  CG CW+FST G+VEG +    +K   LS Q L+DCD         
+Sbjct: 234 WRRSSSVTSVKDQSNCGGCWAFSTVGSVEGYYMSHFDKSYELSVQELLDCD--------- 284
+
+Query: 184 EACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK 243
+            +   GC GGL  +AY Y+ K  G+ +    P+  +   +C+   A     + ++ +   
+Sbjct: 285 -SFSNGCQGGLLESAYEYVRK-YGLVSAKDLPFV-DKARRCSVPKAK-KVSVPSYHVFKG 340
+
+Query: 244 NETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+            E  +    +++ P ++      E   Y  GVF   C   SL+H +++VG          
+Sbjct: 341 KE--VMTRSLTSSPCSVYLSVSPELAKYKSGVFTGECG-KSLNHAVVLVGEGYDEV---T 394
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRR---GKNTCGVSNFVSTS 341
+              YW+V+NSWG DWGE GY+ L R   G + CGV +   ++
+Sbjct: 395 KKRYWVVQNSWGTDWGENGYMRLERTNMGTDKCGVLDTSMSA 436
+
+
+>sp|P07858|CATB_HUMAN CATHEPSIN B PRECURSOR (CATHEPSIN B1) (APP SECRETASE)
+          Length = 339
+
+ Score =  238 bits (602), Expect = 1e-62
+ Identities = 71/299 (23%), Positives = 114/299 (37%), Gaps = 58/299 (19%)
+
+Query: 82  DLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQG 137
+           D+S       YL      F         +       +P +FD    W     +  +++QG
+Sbjct: 51  DMS-------YLKRLCGTFLGGPKPPQRVMFTEDLKLPASFDAREQWPQCPTIKEIRDQG 103
+
+Query: 138 QCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQ 195
+            CGSCW+F     +  +  I  N  VS+  S ++L+ C   C        C +GCNGG  
+Sbjct: 104 SCGSCWAFGAVEAISDRICIHTNAHVSVEVSAEDLLTC---C-----GSMCGDGCNGGYP 155
+
+Query: 196 PNAYNYIIKNGGIQ----------------------TESSYPYTAETGT-QCN------F 226
+             A+N+  + G +                         S  P T E  T +C+      +
+Sbjct: 156 AEAWNFWTRKGLVSGGLYESHVGCRPYSIPPCEHHVNGSRPPCTGEGDTPKCSKICEPGY 215
+
+Query: 227 NSANIGAKISNF--TMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNS 283
+           +      K   +    +  +E  +   I   GP+  A     ++  Y  GV+        
+Sbjct: 216 SPTYKQDKHYGYNSYSVSNSEKDIMAEIYKNGPVEGAFSVYSDFLLYKSGVYQHVTGEMM 275
+
+Query: 284 LDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+             H I I+G+  +N       PYW+V NSW  DWG+ G+  + RG++ CG+ + V   I
+Sbjct: 276 GGHAIRILGWGVEN-----GTPYWLVANSWNTDWGDNGFFKILRGQDHCGIESEVVAGI 329
+
+
+>sp|P00787|CATB_RAT CATHEPSIN B PRECURSOR (CATHEPSIN B1) (RSG-2)
+          Length = 339
+
+ Score =  236 bits (597), Expect = 4e-62
+ Identities = 69/289 (23%), Positives = 117/289 (39%), Gaps = 51/289 (17%)
+
+Query: 92  YLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFST 147
+           YL            + + +      ++P +FD    W     +  +++QG CGSCW+F  
+Sbjct: 54  YLKKLCGTVLGGPNLPERVGFSEDINLPESFDAREQWSNCPTIAQIRDQGSCGSCWAFGA 113
+
+Query: 148 TGNVEGQHFISQNKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKN 205
+              +  +  I  N  V++  S ++L+ C   C        C +GCNGG    A+N+  + 
+Sbjct: 114 VEAMSDRICIHTNGRVNVEVSAEDLLTC---C-----GIQCGDGCNGGYPSGAWNFWTRK 165
+
+Query: 206 GGIQ----------------------TESSYPYTAETGT-QCN------FNSANIGAKIS 236
+           G +                         S  P T E  T +CN      ++++    K  
+Sbjct: 166 GLVSGGVYNSHIGCLPYTIPPCEHHVNGSRPPCTGEGDTPKCNKMCEAGYSTSYKEDKHY 225
+
+Query: 237 NFTM--IPKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+            +T   +  +E  +   I   GP+  A     ++  Y  GV+          H I I+G+
+Sbjct: 226 GYTSYSVSDSEKEIMAEIYKNGPVEGAFTVFSDFLTYKSGVYKHEAGDVMGGHAIRILGW 285
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+             +N +     PYW+V NSW  DWG+ G+  + RG+N CG+ + +   I
+Sbjct: 286 GIENGV-----PYWLVANSWNVDWGDNGFFKILRGENHCGIESEIVAGI 329
+
+
+>sp|P10605|CATB_MOUSE CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 339
+
+ Score =  233 bits (588), Expect = 4e-61
+ Identities = 67/289 (23%), Positives = 111/289 (38%), Gaps = 51/289 (17%)
+
+Query: 92  YLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFST 147
+           YL            +   +       +P  FD    W     +  +++QG CGSCW+F  
+Sbjct: 54  YLKKLCGTVLGGPKLPGRVAFGEDIDLPETFDAREQWSNCPTIGQIRDQGSCGSCWAFGA 113
+
+Query: 148 TGNVEGQHFISQNKLVSL--SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKN 205
+              +  +  I  N  V++  S ++L+ C   C        C +GCNGG    A+++  K 
+Sbjct: 114 VEAISDRTCIHTNGRVNVEVSAEDLLTC---C-----GIQCGDGCNGGYPSGAWSFWTKK 165
+
+Query: 206 GGIQ----------------------TESSYPYTAETGT-QCN------FNSANIGAKIS 236
+           G +                         S  P T E  T +CN      ++ +    K  
+Sbjct: 166 GLVSGGVYNSHVGCLPYTIPPCEHHVNGSRPPCTGEGDTPRCNKSCEAGYSPSYKEDKHF 225
+
+Query: 237 NFTM--IPKNETVMAGYIVSTGPLAIAADA-VEWQFYIGGVFDIPCNPNSLDHGILIVGY 293
+            +T   +  +   +   I   GP+  A     ++  Y  GV+          H I I+G+
+Sbjct: 226 GYTSYSVSNSVKEIMAEIYKNGPVEGAFTVFSDFLTYKSGVYKHEAGDMMGGHAIRILGW 285
+
+Query: 294 SAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+             +N +     PYW+  NSW  DWG+ G+  + RG+N CG+ + +   I
+Sbjct: 286 GVENGV-----PYWLAANSWNLDWGDNGFFKILRGENHCGIESEIVAGI 329
+
+
+>sp|P43509|CPR5_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 5 PRECURSOR
+          Length = 344
+
+ Score =  232 bits (586), Expect = 8e-61
+ Identities = 68/280 (24%), Positives = 114/280 (40%), Gaps = 53/280 (18%)
+
+Query: 105 PVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN 160
+              D +  E  ++IP  FD    W    ++  +++Q  CGSCW+F+    +  +  I+ N
+Sbjct: 69  KDEDIVATEVSDAIPDHFDARDQWPNCMSINNIRDQSDCGSCWAFAAAEAISDRTCIASN 128
+
+Query: 161 KLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTES------ 212
+             V+  LS ++L+ C        G  +C  GC GG    A+ + +K+G +   S      
+Sbjct: 129 GAVNTLLSSEDLLSC------CTGMFSCGNGCEGGYPIQAWKWWVKHGLVTGGSYETQFG 182
+
+Query: 213 SYPYTA-------------------ETGTQC--------NFNSANIGAKISNFTM--IPK 243
+             PY+                    E   +C        N+ +  +  K    T   + K
+Sbjct: 183 CKPYSIAPCGETVNGVKWPACPEDTEPTPKCVDSCTSKNNYATPYLQDKHFGSTAYAVGK 242
+
+Query: 244 NETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRK 302
+               +   I++ GP+ +A     ++  Y  GV+      +   H + I+G+   N     
+Sbjct: 243 KVEQIQTEILTNGPIEVAFTVYEDFYQYTTGVYVHTAGASLGGHAVKILGWGVDN----- 297
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+             PYW+V NSW   WGE+GY  + RG N CG+ +     I
+Sbjct: 298 GTPYWLVANSWNVAWGEKGYFRIIRGLNECGIEHSAVAGI 337
+
+
+>sp|P07688|CATB_BOVIN CATHEPSIN B PRECURSOR
+          Length = 335
+
+ Score =  232 bits (586), Expect = 8e-61
+ Identities = 66/263 (25%), Positives = 108/263 (40%), Gaps = 51/263 (19%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSL--SEQNLV 171
+           +P +FD    W     +  +++QG CGSCW+F     +  +  I  N  V++  S ++++
+Sbjct: 80  LPESFDAREQWPNCPTIKEIRDQGSCGSCWAFGAVEAISDRICIHSNGRVNVEVSAEDML 139
+
+Query: 172 DCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQ---------------------- 209
+            C        GE  C +GCNGG    A+N+  K G +                       
+Sbjct: 140 TC------CGGE--CGDGCNGGFPSGAWNFWTKKGLVSGGLYNSHVGCRPYSIPPCEHHV 191
+
+Query: 210 TESSYPYTAETGT-QCN------FNSANIGAKISN--FTMIPKNETVMAGYIVSTGPLAI 260
+             S  P T E  T +CN      ++ +    K        +  NE  +   I   GP+  
+Sbjct: 192 NGSRPPCTGEGDTPKCNKTCEPGYSPSYKEDKHFGCSSYSVANNEKEIMAEIYKNGPVEG 251
+
+Query: 261 AADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+           A     ++  Y  GV+          H I I+G+  +N       PYW+V NSW  DWG+
+Sbjct: 252 AFSVYSDFLLYKSGVYQHVSGEIMGGHAIRILGWGVEN-----GTPYWLVGNSWNTDWGD 306
+
+Query: 320 QGYIYLRRGKNTCGVSNFVSTSI 342
+            G+  + RG++ CG+ + +   +
+Sbjct: 307 NGFFKILRGQDHCGIESEIVAGM 329
+
+
+>sp|P43233|CATB_CHICK CATHEPSIN B PRECURSOR (CATHEPSIN B1)
+          Length = 340
+
+ Score =  232 bits (585), Expect = 1e-60
+ Identities = 72/323 (22%), Positives = 122/323 (37%), Gaps = 62/323 (19%)
+
+Query: 59  IEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSI 118
+           +  +N +    +A   F      D+S       Y+      F       + +D      +
+Sbjct: 31  VNHINKLNTTGRAGHNF---HNTDMS-------YVKKLCGTFLGGPKAPERVDFAEDMDL 80
+
+Query: 119 PTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVD 172
+           P  FD    W     ++ +++QG CGSCW+F     +  +  +  N  VS  +S ++L+ 
+Sbjct: 81  PDTFDTRKQWPNCPTISEIRDQGSCGSCWAFGAVEAISDRICVHTNAKVSVEVSAEDLLS 140
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGI----------------------QT 210
+           C   C        C  GCNGG    A+ Y  + G +                        
+Sbjct: 141 C---C-----GFECGMGCNGGYPSGAWRYWTERGLVSGGLYDSHVGCRAYTIPPCEHHVN 192
+
+Query: 211 ESSYPYTAETGT--QCN------FNSANIGAKISNFTM--IPKNETVMAGYIVSTGPLAI 260
+            S  P T E G   +C+      ++ +    K    T   +P++E  +   I   GP+  
+Sbjct: 193 GSRPPCTGEGGETPRCSRHCEPGYSPSYKEDKHYGITSYGVPRSEKEIMAEIYKNGPVEG 252
+
+Query: 261 AADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGE 319
+           A     ++  Y  GV+          H I I+G+  +N       PYW+  NSW  DWG 
+Sbjct: 253 AFIVYEDFLMYKSGVYQHVSGEQVGGHAIRILGWGVEN-----GTPYWLAANSWNTDWGI 307
+
+Query: 320 QGYIYLRRGKNTCGVSNFVSTSI 342
+            G+  + RG++ CG+ + +   +
+Sbjct: 308 TGFFKILRGEDHCGIESEIVAGV 330
+
+
+>sp|Q26563|CATC_SCHMA CATHEPSIN C PRECURSOR
+          Length = 454
+
+ Score =  231 bits (584), Expect = 1e-60
+ Identities = 80/281 (28%), Positives = 124/281 (43%), Gaps = 38/281 (13%)
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFI---NSIPTAFDWRT-----RGAVTP 132
+           +  + DE +N     K  +    +        E I    ++P  FDW +     R  VTP
+Sbjct: 178 SKYTIDELRNRAGGVKSMVTRPSVLNRKTPSKELISLTGNLPLEFDWTSPPDGSRSPVTP 237
+
+Query: 133 VKNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGC 190
+           ++NQG CGSC++  +   +E +  +  N  +   LS Q +VDC              EGC
+Sbjct: 238 IRNQGICGSCYASPSAAALEARIRLVSNFSEQPILSPQTVVDCSPY----------SEGC 287
+
+Query: 191 NGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPK-----NE 245
+           NGG          ++ G+  +   PYT E   +C  +        ++++ I       NE
+Sbjct: 288 NGGFPFLIAGKYGEDFGLPQKIVIPYTGEDTGKCTVSKNCTRYYTTDYSYIGGYYGATNE 347
+
+Query: 246 TVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPC--------NPNSL-DHGILIVGYSA 295
+            +M   ++S GP  +  +   ++QFY  G++            NP  L +H +L+VGY  
+Sbjct: 348 KLMQLELISNGPFPVGFEVYEDFQFYKEGIYHHTTVQTDHYNFNPFELTNHAVLLVGYGV 407
+
+Query: 296 KNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSN 336
+                    PYW VKNSWG +WGEQGY  + RG + CGV +
+Sbjct: 408 DKLS---GEPYWKVKNSWGVEWGEQGYFRILRGTDECGVES 445
+
+
+>sp|P25807|CYS1_CAEEL GUT-SPECIFIC CYSTEINE PROTEINASE PRECURSOR
+          Length = 329
+
+ Score =  231 bits (584), Expect = 1e-60
+ Identities = 69/290 (23%), Positives = 114/290 (38%), Gaps = 46/290 (15%)
+
+Query: 83  LSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQ 138
+           ++ +E K   ++ K A    D  +     +  + S+P  FD    W    ++  +++Q  
+Sbjct: 51  ITEEEMKFKLMDGKYAAAHSD-EIRATEQEVVLASVPATFDSRTQWSECKSIKLIRDQAT 109
+
+Query: 139 CGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQP 196
+           CGSCW+F     +  +  I         +S  +L+ C   C       +C  GC GG   
+Sbjct: 110 CGSCWAFGAAEMISDRTCIETKGAQQPIISPDDLLSC---C-----GSSCGNGCEGGYPI 161
+
+Query: 197 NAYNYIIKNGGIQTESSY------PYTAETGTQ--------------CNFNSANIGAKIS 236
+            A  +   + G+ T   Y      PY     T               C    +   AK  
+Sbjct: 162 QALRWW-DSKGVVTGGDYHGAGCKPYPIAPCTSGNCPESKTPSCSMSCQSGYSTAYAKDK 220
+
+Query: 237 NFTM----IPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIV 291
+           +F +    +PKN   +   I + GP+  A     ++  Y  GV+          H I I+
+Sbjct: 221 HFGVSAYAVPKNAASIQAEIYANGPVEAAFSVYEDFYKYKSGVYKHTAGKYLGGHAIKII 280
+
+Query: 292 GYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+           G+  ++       PYW+V NSWG +WGE G+  + RG + CG+ + V   
+Sbjct: 281 GWGTES-----GSPYWLVANSWGVNWGESGFFKIYRGDDQCGIESAVVAG 325
+
+
+>sp|P43508|CPR4_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 4 PRECURSOR
+          Length = 335
+
+ Score =  231 bits (583), Expect = 2e-60
+ Identities = 78/304 (25%), Positives = 126/304 (40%), Gaps = 60/304 (19%)
+
+Query: 82  DLSSDEFKNYYLNNK-EAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQ 136
+           D++ ++ K   +  +  A  T D+ V  +  +E  ++IP  FD    W    ++  +++Q
+Sbjct: 46  DITIEQVKKRLMRTEFVAPHTPDVEVVKHDINE--DTIPATFDARTQWPNCMSINNIRDQ 103
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGL 194
+             CGSCW+F+       +  I+ N  V+  LS ++++ C   C        C  GC GG 
+Sbjct: 104 SDCGSCWAFAAAEAASDRFCIASNGAVNTLLSAEDVLSC---CSN------CGYGCEGGY 154
+
+Query: 195 QPNAYNYIIKNGGIQTESSY-------PYT-------------------AETGTQC---- 224
+             NA+ Y++K+G   T  SY       PY+                         C    
+Sbjct: 155 PINAWKYLVKSG-FCTGGSYEAQFGCKPYSLAPCGETVGNVTWPSCPDDGYDTPACVNKC 213
+
+Query: 225 ---NFNSANIGAKISNFTM--IPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIP 278
+              N+N A    K    T   + K  + +   I++ GP+  A     ++  Y  GV+   
+Sbjct: 214 TNKNYNVAYTADKHFGSTAYAVGKKVSQIQAEIIAHGPVEAAFTVYEDFYQYKTGVYVHT 273
+
+Query: 279 CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 338
+                  H I I+G+   N       PYW+V NSW  +WGE GY  + RG N CG+ + V
+Sbjct: 274 TGQELGGHAIRILGWGTDN-----GTPYWLVANSWNVNWGENGYFRIIRGTNECGIEHAV 328
+
+Query: 339 STSI 342
+              +
+Sbjct: 329 VGGV 332
+
+
+>sp|P43510|CPR6_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 6 PRECURSOR
+          Length = 379
+
+ Score =  225 bits (569), Expect = 8e-59
+ Identities = 83/388 (21%), Positives = 143/388 (36%), Gaps = 78/388 (20%)
+
+Query: 1   MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIE 60
+           MK +L     V   + +                DK+  +    E  E        L   +
+Sbjct: 1   MKTLLFLSCIVVAAYCAC-------NDNLESVLDKYRNREIDSEAAE--------LDGDD 45
+
+Query: 61  ELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSI-- 118
+            ++ +  N    T     +F+ +  +  K  +           +    +L       +  
+Sbjct: 46  LIDYVNENQNLWTAKKQRRFSSVYGENDKAKWGLMGVNHVRLSVKGKQHLSKTKDLDLDI 105
+
+Query: 119 PTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNK--LVSLSEQNLVD 172
+           P +FD    W    ++  +++Q  CGSCW+F     +  +  I+ +    V+LS  +L+ 
+Sbjct: 106 PESFDSRDNWPKCDSIKVIRDQSSCGSCWAFGAVEAMSDRICIASHGELQVTLSADDLLS 165
+
+Query: 173 CDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQ------CNF 226
+           C           +C  GCNGG    A+ Y +K+G I T S+Y  TA  G +      C  
+Sbjct: 166 CCK---------SCGFGCNGGDPLAAWRYWVKDG-IVTGSNY--TANNGCKPYPFPPCEH 213
+
+Query: 227 NSANIGA----------------KISNFTMIPKNE---------------TVMAGYIVST 255
+           +S                      +S++T    +E                 +   +++ 
+Sbjct: 214 HSKKTHFDPCPHDLYPTPKCEKKCVSDYTDKTYSEDKFFGASAYGVKDDVEAIQKELMTH 273
+
+Query: 256 GPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWG 314
+           GPL IA +   ++  Y GGV+          H + ++G+   + I     PYW V NSW 
+Sbjct: 274 GPLEIAFEVYEDFLNYDGGVYVHTGGKLGGGHAVKLIGWGIDDGI-----PYWTVANSWN 328
+
+Query: 315 ADWGEQGYIYLRRGKNTCGVSNFVSTSI 342
+            DWGE G+  + RG + CG+ + V   I
+Sbjct: 329 TDWGEDGFFRILRGVDECGIESGVVGGI 356
+
+
+>sp|P43157|CYSP_SCHJA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SJ31)
+          Length = 342
+
+ Score =  222 bits (559), Expect = 1e-57
+ Identities = 69/299 (23%), Positives = 114/299 (38%), Gaps = 53/299 (17%)
+
+Query: 84  SSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQC 139
+           S D+ +      KE               +    IP+ FD    W    +++ +++Q +C
+Sbjct: 56  SLDDARILMGARKEDAEMKRNRRPTVDHHDLNVEIPSQFDSRKKWPHCKSISQIRDQSRC 115
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVS--LSEQNLVDCDHECMEYEGEEACDEGCNGGLQPN 197
+           GSCW+F     +  +  I      S  LS  +L+ C   C +      C +GC GG    
+Sbjct: 116 GSCWAFGAVEAMTDRICIQSGGGQSAELSALDLISC---CKD------CGDGCQGGFPGV 166
+
+Query: 198 AYNYIIKNGGIQTESS--------YPY----------------TAETGTQCN------FN 227
+           A++Y +K G +   S         YP+                      QC       + 
+Sbjct: 167 AWDYWVKRGIVTGGSKENHTGCQPYPFPKCEHHTKGKYPACGTKIYKTPQCKQTCQKGYK 226
+
+Query: 228 SANIGAKISN--FTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSL 284
+           +     K        +  NE V+   I+  GP+  A D   ++  Y  G++         
+Sbjct: 227 TPYEQDKHYGDESYNVQNNEKVIQRDIMMYGPVEAAFDVYEDFLNYKSGIYRHVTGSIVG 286
+
+Query: 285 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            H I I+G+  +     K  PYW++ NSW  DWGE+G   + RG++ C + + V   +I
+Sbjct: 287 GHAIRIIGWGVE-----KRTPYWLIANSWNEDWGEKGLFRMVRGRDECSIESDVVAGLI 340
+
+
+>sp|P25792|CYSP_SCHMA CATHEPSIN B-LIKE CYSTEINE PROTEINASE PRECURSOR (ANTIGEN SM31)
+          Length = 340
+
+ Score =  216 bits (545), Expect = 5e-56
+ Identities = 67/303 (22%), Positives = 117/303 (38%), Gaps = 55/303 (18%)
+
+Query: 78  NKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPV 133
+           N+F  L     +      +  +     P  D+  +++   IP+ FD    W    ++  +
+Sbjct: 51  NRFHSLDDARIQMGARREEPDLRRKRRPTVDH--NDWNVEIPSNFDSRKKWPGCKSIATI 108
+
+Query: 134 KNQGQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCN 191
+           ++Q +CGSCWSF     +  +  I     + V LS  +L+ C   C      E+C  GC 
+Sbjct: 109 RDQSRCGSCWSFGAVEAMSDRSCIQSGGKQNVELSAVDLLTC---C------ESCGLGCE 159
+
+Query: 192 GGLQPNAYNYIIKNGGIQTESS--------YPY----------------------TAETG 221
+           GG+   A++Y +K G +   S         YP+                        +  
+Sbjct: 160 GGILGPAWDYWVKEGIVTASSKENHTGCEPYPFPKCEHHTKGKYPPCGSKIYNTPRCKQT 219
+
+Query: 222 TQCNFNSANIGAKISN--FTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIP 278
+            Q  + +     K        +  +E  +   I+  GP+  +     ++  Y  G++   
+Sbjct: 220 CQRKYKTPYTQDKHRGKSSYNVKNDEKAIQKEIMKYGPVEASFTVYEDFLNYKSGIYKHI 279
+
+Query: 279 CNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFV 338
+                  H I I+G+  +N       PYW++ NSW  DWGE GY  + RG++ C + + V
+Sbjct: 280 TGEALGGHAIRIIGWGVEN-----KTPYWLIANSWNEDWGENGYFRIVRGRDECSIESEV 334
+
+Query: 339 STS 341
+              
+Sbjct: 335 IAG 337
+
+
+>sp|P25802|CYS1_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 341
+
+ Score =  213 bits (537), Expect = 4e-55
+ Identities = 64/297 (21%), Positives = 110/297 (36%), Gaps = 56/297 (18%)
+
+Query: 88  FKNYYLNNKEAIFTD--DLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGS 141
+           FK   ++ K     +  D  V D   +E  + IP ++D    W    ++  + +Q  CGS
+Sbjct: 59  FKQRLMDLKYIDQNNIPDEEVEDEELEENNDDIPESYDPRIQWANCSSLFHIPDQANCGS 118
+
+Query: 142 CWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAY 199
+           CW+ S+   +  +  I+    K V +S Q++V C   C        C +GC GG   +A+
+Sbjct: 119 CWAVSSAAAMSDRICIASKGAKQVLISAQDVVSC---CTW------CGDGCEGGWPISAF 169
+
+Query: 200 NYIIKNGGIQ------TESSYPYTAETGTQCNFNSANIGAKI------------------ 235
+            +    G +         S  PY        + N    G  +                  
+Sbjct: 170 RFHADEGVVTGGDYNTKGSCRPYEIHPCGH-HGNETYYGECVGMADTPRCKRRCLLGYPK 228
+
+Query: 236 --------SNFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDH 286
+                        +  +   +   I+  GP+        ++  Y  G++       +  H
+Sbjct: 229 SYPSDRYYKKAYQLKNSVKAIQKDIMKNGPVVATYTVYEDFAHYRSGIYKHKAGRKTGLH 288
+
+Query: 287 GILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            + ++G+  +     K  PYWIV NSW  DWGE G+  + RG N CG    ++   +
+Sbjct: 289 AVKVIGWGEE-----KGTPYWIVANSWHDDWGENGFFRMHRGSNDCGFEERMAAGSV 340
+
+
+>sp|P25793|CYS2_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 2 PRECURSOR
+          Length = 342
+
+ Score =  212 bits (534), Expect = 1e-54
+ Identities = 64/302 (21%), Positives = 119/302 (39%), Gaps = 56/302 (18%)
+
+Query: 81  ADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQ 136
+           +D + D F+   ++ K      +L V +  D E    IP ++D    W+       +++Q
+Sbjct: 53  SDPTPD-FEQKIMSIKYKHQKLNLMVKEDPDPEVD--IPPSYDPRDVWKNCTTFY-IRDQ 108
+
+Query: 137 GQCGSCWSFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGL 194
+             CGSCW+ ST   +  +  I+    K V++S  +++ C   C        C +GC GG 
+Sbjct: 109 ANCGSCWAVSTAAAISDRICIASKAEKQVNISATDIMTC---C-----RPQCGDGCEGGW 160
+
+Query: 195 QPNAYNYIIKNGGIQTES------SYPYTAET----GTQCNFNSANIGAKI--------- 235
+              A+ Y I +G +            PY        G    +      A           
+Sbjct: 161 PIEAWKYFIYDGVVSGGEYLTKDVCRPYPIHPCGHHGNDTYYGECRGTAPTPPCKRKCRP 220
+
+Query: 236 -------------SNFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNP 281
+                         +  ++ ++   +   I+  GP+  +     +++ Y  G++      
+Sbjct: 221 GVRKMYRIDKRYGKDAYIVKQSVKAIQSEILKNGPVVASFAVYEDFRHYKSGIYKHTAGE 280
+
+Query: 282 NSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTS 341
+               H + ++G+  +N     N  +W++ NSW  DWGE+GY  + RG N CG+   ++  
+Sbjct: 281 LRGYHAVKMIGWGNEN-----NTDFWLIANSWHNDWGEKGYFRIVRGSNDCGIEGTIAAG 335
+
+Query: 342 II 343
+           I+
+Sbjct: 336 IV 337
+
+
+>sp|P19092|CYS1_HAECO CATHEPSIN B-LIKE CYSTEINE PROTEINASE 1 PRECURSOR
+          Length = 342
+
+ Score =  208 bits (523), Expect = 2e-53
+ Identities = 62/295 (21%), Positives = 115/295 (38%), Gaps = 55/295 (18%)
+
+Query: 88  FKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCW 143
+           F+   ++ K      +L V +  D E    IP ++D    W+       +++Q  CGSCW
+Sbjct: 59  FEQKIMDIKYKHQKLNLMVKEDPDPEVD--IPPSYDPRDVWKNCTTFY-IRDQANCGSCW 115
+
+Query: 144 SFSTTGNVEGQHFISQN--KLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY 201
+           + ST   +  +  I+    K V++S  +++ C   C        C +GC GG    A+ Y
+Sbjct: 116 AVSTAAAISDRICIASKAEKQVNISATDIMTC---C-----RPQCGDGCEGGWPIEAWKY 167
+
+Query: 202 IIKNGGIQTES------SYPYTAET----GTQCNFNSANIGAKI---------------- 235
+            I +G +            PY        G    +      A                  
+Sbjct: 168 FIYDGVVSGGEYLTKDVCRPYPIHPCGHHGNDTYYGECRGTAPTPPCKRKCRPGVRKMYR 227
+
+Query: 236 ------SNFTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGGVFDIPCNPNSLDHGI 288
+                  +  ++ ++   +   I+  GP+  +     +++ Y  G++          H +
+Sbjct: 228 IDKRYGKDAYIVKQSVKAIQSEILRNGPVVASFAVYEDFRHYKSGIYKHTAGELRGYHAV 287
+
+Query: 289 LIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII 343
+            ++G+  +N     N  +W++ NSW  DWGE+GY  + RG N CG+   ++  I+
+Sbjct: 288 KMIGWGNEN-----NTDFWLIANSWHNDWGEKGYFRIIRGTNDCGIEGTIAAGIV 337
+
+
+>sp|P43507|CPR3_CAEEL CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3 PRECURSOR
+          Length = 370
+
+ Score =  204 bits (513), Expect = 3e-52
+ Identities = 61/265 (23%), Positives = 99/265 (37%), Gaps = 49/265 (18%)
+
+Query: 112 DEFINSIPTAFD----WRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS--L 165
+           +     +P  FD    W     +  ++NQ  CGSCW+F     +  +  I  N      +
+Sbjct: 86  EIVPEPLPDTFDAREKWPDCNTIKLIRNQATCGSCWAFGAAEVISDRVCIQSNGTQQPVI 145
+
+Query: 166 SEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSY------PYTAE 219
+           S ++++ C   C        C  GC GG    A  +   +G + T   Y      PY+  
+Sbjct: 146 SVEDILSC---C-----GTTCGYGCKGGYSIEALRFWASSGAV-TGGDYGGHGCMPYSFA 196
+
+Query: 220 TGTQ---------CNF------------NSANIGAKISNFTMIPKNETVMAGYIVSTGPL 258
+             T+         C                 + GA     T   K+ T +   I   GP+
+Sbjct: 197 PCTKNCPESTTPSCKTTCQSSYKTEEYKKDKHYGASAYKVTT-TKSVTEIQTEIYHYGPV 255
+
+Query: 259 AIAADAV-EWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADW 317
+             +     ++  Y  GV+          H + I+G+  +N +      YW++ NSWG  +
+Sbjct: 256 EASYKVYEDFYHYKSGVYHYTSGKLVGGHAVKIIGWGVENGV-----DYWLIANSWGTSF 310
+
+Query: 318 GEQGYIYLRRGKNTCGVSNFVSTSI 342
+           GE+G+  +RRG N C +   V   I
+Sbjct: 311 GEKGFFKIRRGTNECQIEGNVVAGI 335
+
+
+>sp|P25780|EUM1_EURMA MITE GROUP I ALLERGEN EUR M 1 (EUR M I)
+          Length = 211
+
+ Score =  201 bits (507), Expect = 1e-51
+ Identities = 69/220 (31%), Positives = 99/220 (44%), Gaps = 25/220 (11%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHE 176
+           S+P+  D R+   VTP++ QG CGSCW+FS   + E  +   +N  + L+EQ LVDC   
+Sbjct: 10  SLPSELDLRSLRTVTPIRMQGGCGSCWAFSGVASTESAYLAYRNMSLDLAEQELVDC--- 66
+
+Query: 177 CMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKIS 236
+                   A   GC+G   P    YI +NG +Q E  YPY A   +    N+   G K  
+Sbjct: 67  --------ASQNGCHGDTIPRGIEYIQQNGVVQ-EHYYPYVAREQSCHRPNAQRYGLK-- 115
+
+Query: 237 NFTMIPKNETVMAGYIVSTGPLAIAA-----DAVEWQFYIGGVFDIPCNPNSLD-HGILI 290
+           N+  I   ++      ++    A+A      D   ++ Y G       N    + H + I
+Sbjct: 116 NYCQISPPDSNKIRQALTQTHTAVAVIIGIKDLNAFRHYDGRTIMQHDNGYQPNYHAVNI 175
+
+Query: 291 VGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKN 330
+           VGY        + + YWIV+NSW   WG+ GY Y     N
+Sbjct: 176 VGYGN-----TQGVDYWIVRNSWDTTWGDNGYGYFAANIN 210
+
+
+>sp|P25773|CATL_FELCA CATHEPSIN L (PROGESTERONE-DEPENDENT PROTEIN) (PDP)
+          Length = 139
+
+ Score =  185 bits (464), Expect = 2e-46
+ Identities = 55/141 (39%), Positives = 84/141 (59%), Gaps = 5/141 (3%)
+
+Query: 192 GGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGY 251
+           GGL  +A+ Y+  NGG+ +E SYPY A+ G  C +   N  A ++++  IP  E  +   
+Sbjct: 1   GGLIDDAFQYVKDNGGLDSEESYPYHAQ-GDSCKYRPENSVANVTDYWDIPSKENELMIT 59
+
+Query: 252 IVSTGPLAIAADAV--EWQFYIGGVFDIP-CNPNSLDHGILIVGYSAKNTIFRKNMPYWI 308
+           + + GP++ A DA    ++FY  G++  P C+   +DHG+L+VGY A  T   +N  YWI
+Sbjct: 60  LAAVGPISAAIDASLDTFRFYKEGIYYDPSCSSEDVDHGVLVVGYGADGT-ETENKKYWI 118
+
+Query: 309 VKNSWGADWGEQGYIYLRRGK 329
+           +KNSWG DWG  GYI + + +
+Sbjct: 119 IKNSWGTDWGMDGYIKMAKDR 139
+
+
+>sp|Q23894|CYS3_DICDI CYSTEINE PROTEINASE 3 (CYSTEINE PROTEINASE II)
+          Length = 151
+
+ Score =  158 bits (395), Expect = 2e-38
+ Identities = 61/158 (38%), Positives = 86/158 (53%), Gaps = 17/158 (10%)
+
+Query: 41  SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF 100
+           +H+E++ R+E FK N+  +   N    +  + T  G+N+ ADLS++E++  YL  +  I 
+Sbjct: 1   THKEFMPRYEEFKKNMDYVHNWN----SKGSKTVLGLNQHADLSNEEYRLNYLGTRAHIK 56
+
+Query: 101 TDDLPVAD---YLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFI 157
+            +     +    L+       P   DWR + AVTPVK+QGQCGSC   STTG+VEG   I
+Sbjct: 57  LNGYHKRNLGLRLNRPHFKQ-PLNVDWREKDAVTPVKDQGQCGSC-IISTTGSVEGVTAI 114
+
+Query: 158 SQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQ 195
+              KLVSLSEQN++               +EGCNGGL 
+Sbjct: 115 KTGKLVSLSEQNILRLSSSF--------GNEGCNGGLM 144
+
+
+>sp|P13823|SERA_PLAFG SERINE-REPEAT ANTIGEN PROTEIN PRECURSOR (P126) (111 KD ANTIGEN)
+          Length = 989
+
+ Score =  157 bits (393), Expect = 3e-38
+ Identities = 60/255 (23%), Positives = 101/255 (39%), Gaps = 46/255 (18%)
+
+Query: 123 DWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEG 182
+           D     +   V++QG C + W F++  ++E    +   +   +S   + +C      Y+G
+Sbjct: 569 DENNCISNLQVEDQGNCDTSWIFASKYHLETIRCMKGYEPTKISALYVANC------YKG 622
+
+Query: 183 EEACDEGCNGGLQPNAYNYIIKNGG-IQTESSYPYT-AETGTQCN--------------- 225
+           E    + C+ G  P  +  II++ G +  ES+YPY   + G QC                
+Sbjct: 623 EHK--DRCDEGSSPMEFLQIIEDYGFLPAESNYPYNYVKVGEQCPKVEDHWMNLWDNGKI 680
+
+Query: 226 -----------------FNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQ 268
+                            + S      +  F  I K E +  G +++     I A+ V   
+Sbjct: 681 LHNKNEPNSLDGKGYTAYESERFHDNMDAFVKIIKTEVMNKGSVIAY----IKAENVMGY 736
+
+Query: 269 FYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG 328
+            + G      C  ++ DH + IVGY        +   YWIV+NSWG  WG++GY  +   
+Sbjct: 737 EFSGKKVQNLCGDDTADHAVNIVGYGNYVNSEGEKKSYWIVRNSWGPYWGDEGYFKVDMY 796
+
+Query: 329 KNTCGVSNFVSTSII 343
+             T    NF+ + +I
+Sbjct: 797 GPTHCHFNFIHSVVI 811
+
+
+>sp|Q06544|CYS3_OSTOS CATHEPSIN B-LIKE CYSTEINE PROTEINASE 3
+          Length = 174
+
+ Score =  139 bits (348), Expect = 6e-33
+ Identities = 31/130 (23%), Positives = 53/130 (39%), Gaps = 8/130 (6%)
+
+Query: 217 TAETGTQCNFNSANIGAKISN--FTMIPKNETVMAGYIVSTGPLAIAADAV-EWQFYIGG 273
+             +   Q  +  A    K        +P N   +   I+  GP+        ++  Y  G
+Sbjct: 50  KCQKTCQRGYLKAYKEDKHFGKSAYRLPNNVKAIQRDIMKNGPVVAGFIVYEDFAHYKSG 109
+
+Query: 274 VFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCG 333
+           ++       +  H + I+G+  +     K  PYW++ NSW  DWGE+G+  + RG N C 
+Sbjct: 110 IYKHTAGRMTGGHAVKIIGWGKE-----KGTPYWLIANSWHDDWGEKGFYRMIRGINNCR 164
+
+Query: 334 VSNFVSTSII 343
+           +   V   I+
+Sbjct: 165 IEEMVFAGIV 174
+
+
+>sp|P05993|PAP5_CARPA CYSTEINE PROTEINASE (CLONE PLBPC13)
+          Length = 96
+
+ Score =  131 bits (327), Expect = 2e-30
+ Identities = 43/87 (49%), Positives = 55/87 (62%), Gaps = 2/87 (2%)
+
+Query: 256 GPLAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKN--TIFRKNMPYWIVKNSW 313
+           GPLA+A +A   Q YIGGV         L+HG+L+VGY +     I  K  PYW++KNSW
+Sbjct: 1   GPLAVAINAAYMQTYIGGVSCPYICSRRLNHGVLLVGYGSAGYAPIRLKEKPYWVIKNSW 60
+
+Query: 314 GADWGEQGYIYLRRGKNTCGVSNFVST 340
+           G +WGE GY  + RG+N CGV + VST
+Sbjct: 61  GENWGENGYYKICRGRNICGVDSMVST 87
+
+
+>sp|P12399|CT2A_MOUSE CTLA-2-ALPHA PROTEIN PRECURSOR
+          Length = 136
+
+ Score = 98.2 bits (241), Expect = 2e-20
+ Identities = 32/131 (24%), Positives = 53/131 (40%), Gaps = 14/131 (10%)
+
+Query: 8   VLAVFTVFVSSRGIPPE--EQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLI 65
+            L +  + + S   PP+    +++ E++ KF K Y+  E   R  +++ N  KIE  N  
+Sbjct: 16  FLLILCLGMMSAAPPPDPSLDNEWKEWKTKFAKAYNLNEERHRRLVWEENKKKIEAHNAD 75
+
+Query: 66  AINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWR 125
+               K     G+N+F+DL+ +EFK     N                 E    +P   D  
+Sbjct: 76  YEQGKTSFYMGLNQFSDLTPEEFKTNCYGNSL------------NRGEMAPDLPEYEDLG 123
+
+Query: 126 TRGAVTPVKNQ 136
+               +TP + Q
+Sbjct: 124 KNSYLTPGRAQ 134
+
+
+>sp|P12400|CT2B_MOUSE CTLA-2-BETA PROTEIN PRECURSOR
+          Length = 141
+
+ Score = 95.9 bits (235), Expect = 1e-19
+ Identities = 29/133 (21%), Positives = 54/133 (39%), Gaps = 13/133 (9%)
+
+Query: 4   ILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELN 63
+           + L +L +  +  +     P   +++ E++  F K YS +E   R  +++ N  KIE  N
+Sbjct: 20  VFLLILCLGMMSAAPS-PDPSLDNEWKEWKTTFAKAYSLDEERHRRLMWEENKKKIEAHN 78
+
+Query: 64  LIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFD 123
+                 K     G+N+F+DL+ +EF+             +   +     E    +P   D
+Sbjct: 79  ADYERGKTSFYMGLNQFSDLTPEEFR------------TNCCGSSMCRGEMAPDLPEYED 126
+
+Query: 124 WRTRGAVTPVKNQ 136
+                 +TP + Q
+Sbjct: 127 LGKNSYLTPGRAQ 139
+
+
+>sp|P32957|CC4_CARCN CYSTEINE PROTEINASE IV (CC-IV)
+          Length = 43
+
+ Score = 84.6 bits (206), Expect = 3e-16
+ Identities = 26/42 (61%), Positives = 30/42 (70%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN 160
+           P + DWR +GAVTPVKNQG CGSCW+FST   VEG + I   
+Sbjct: 2   PESIDWRKKGAVTPVKNQGSCGSCWAFSTIVTVEGINKIRTG 43
+
+
+>sp|P32956|CC3_CARCN CYSTEINE PROTEINASE III (CC-III)
+          Length = 43
+
+ Score = 84.2 bits (205), Expect = 4e-16
+ Identities = 26/42 (61%), Positives = 30/42 (70%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN 160
+           P + DWR +GAVTPVKNQG CGSCW+FST   VEG + I   
+Sbjct: 2   PESIDWRKKGAVTPVKNQGSCGSCWAFSTIATVEGINKIVHG 43
+
+
+>sp|P32955|CC2_CARCN CYSTEINE PROTEINASE II (CC-II)
+          Length = 43
+
+ Score = 81.5 bits (198), Expect = 2e-15
+ Identities = 24/42 (57%), Positives = 29/42 (68%)
+
+Query: 119 PTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQN 160
+           P + DWR +GAVTPVK+Q  CGSCW+FST   VEG + I   
+Sbjct: 2   PGSVDWRQKGAVTPVKDQNPCGSCWAFSTVATVEGINKIVTG 43
+
+
+>sp||CATL_CHICK_2 [Segment 2 of 2] CATHEPSIN L
+          Length = 42
+
+ Score = 77.2 bits (187), Expect = 5e-14
+ Identities = 20/42 (47%), Positives = 28/42 (66%), Gaps = 1/42 (2%)
+
+Query: 303 NMPYWIVKNSWGADWGEQGYIYLRRG-KNTCGVSNFVSTSII 343
+              YWIVKNSWG  WG++GYIY+ +  KN CG++   S  ++
+Sbjct: 1   GKKYWIVKNSWGEKWGDKGYIYMAKDRKNHCGIATAASYPLV 42
+
+
+>sp|P32954|CC1_CARCN CYSTEINE PROTEINASE I (CC-I)
+          Length = 43
+
+ Score = 76.0 bits (184), Expect = 1e-13
+ Identities = 24/40 (60%), Positives = 29/40 (72%)
+
+Query: 118 IPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFI 157
+           I  + DWR +GAVTPV+NQG CGSCW+FS+   VEG   I
+Sbjct: 1   IVASIDWRQKGAVTPVRNQGSCGSCWTFSSVAAVEGIIKI 40
+
+
+>sp|P05689|CATX_BOVIN CATHEPSIN
+          Length = 73
+
+ Score = 59.7 bits (142), Expect = 9e-09
+ Identities = 15/41 (36%), Positives = 24/41 (57%), Gaps = 5/41 (12%)
+
+Query: 285 DHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+           +H + + G+   +      M YWIV+NSWG  WGE G++ +
+Sbjct: 10  NHIVSVAGWGVSD-----GMEYWIVRNSWGEPWGEHGWMRI 45
+
+
+>sp|P94869|PEPG_LACDL AMINOPEPTIDASE G
+          Length = 437
+
+ Score = 46.0 bits (107), Expect = 1e-04
+ Identities = 13/49 (26%), Positives = 21/49 (42%), Gaps = 4/49 (8%)
+
+Query: 280 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG 328
+               + H + +VG        R+    W V+NSWG   GE+G+  +   
+Sbjct: 355 GAGEVSHAMTLVGVDEDKGDIRQ----WKVENSWGDKSGEKGFFVMSHN 399
+
+
+>sp|P94870|PEPE_LACHE AMINOPEPTIDASE E
+          Length = 438
+
+ Score = 42.9 bits (99), Expect = 0.001
+ Identities = 14/48 (29%), Positives = 21/48 (43%), Gaps = 4/48 (8%)
+
+Query: 278 PCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+                 + H + +VG    N   R+    W V+NSWG   G +GY  +
+Sbjct: 354 KTGVGEVSHAMTLVGVDEDNGEVRQ----WKVENSWGDKSGAKGYYVM 397
+
+
+>sp|P94868|PEPW_LACDL AMINOPEPTIDASE W
+          Length = 437
+
+ Score = 42.1 bits (97), Expect = 0.002
+ Identities = 14/43 (32%), Positives = 20/43 (45%), Gaps = 4/43 (9%)
+
+Query: 286 HGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRG 328
+           H + +VG        R+    W V+NSWG   GE+GY  +   
+Sbjct: 361 HDMALVGVDVDGGQVRQ----WKVENSWGDKSGEKGYFTMSAD 399
+
+
+>sp|Q10744|PEPC_LACHE AMINOPEPTIDASE C
+          Length = 449
+
+ Score = 41.4 bits (95), Expect = 0.003
+ Identities = 13/46 (28%), Positives = 22/46 (47%), Gaps = 4/46 (8%)
+
+Query: 280 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+             + +DH ++I G    +    K    W ++NSWG   G +GY  +
+Sbjct: 358 GESMMDHAMVITGVDIVDGKPTK----WKIENSWGEKPGFKGYFVM 399
+
+
+>sp|Q04723|PEPC_LACLC AMINOPEPTIDASE C
+          Length = 436
+
+ Score = 39.8 bits (91), Expect = 0.009
+ Identities = 15/68 (22%), Positives = 29/68 (42%), Gaps = 9/68 (13%)
+
+Query: 262 ADAVEWQ------FYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGA 315
+            D+ +++      F       +    + + H +++ G    +     N   W V+NSWG 
+Sbjct: 326 MDSYDFKSSLDIEFTQSKAGRLDYGESLMTHAMVLAG---VDLDADGNSTKWKVENSWGK 382
+
+Query: 316 DWGEQGYI 323
+           D G++GY 
+Sbjct: 383 DAGQKGYF 390
+
+
+ Score = 31.6 bits (70), Expect = 2.5
+ Identities = 15/77 (19%), Positives = 28/77 (35%), Gaps = 10/77 (12%)
+
+Query: 77  VNKFADLSS---DEFKN--YYLNNKEAIFTDDLPVADYLDDEFINSIPT-AFDWRTRGAV 130
+           +   +D +    + F         + A+  + L  +  +      ++P  + D       
+Sbjct: 1   MTVTSDFTQKLYENFAENTKLRAVENAVTKNGLLSSLEVRGSHAANLPEFSLDLTKD--- 57
+
+Query: 131 TPVKNQGQCGSCWSFST 147
+            PV NQ Q G CW F+ 
+Sbjct: 58  -PVTNQKQSGRCWMFAA 73
+
+
+>sp|Q48543|PEPC_LACDL AMINOPEPTIDASE C
+          Length = 449
+
+ Score = 38.2 bits (87), Expect = 0.025
+ Identities = 12/46 (26%), Positives = 23/46 (49%), Gaps = 4/46 (8%)
+
+Query: 280 NPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYL 325
+             + ++H ++I    A + +  K    W ++NSWG   G +GY  +
+Sbjct: 358 GESMMNHAMVIT---AVDLVDDKPTK-WKIENSWGDKSGFKGYFVM 399
+
+
+>sp|P21381|THPA_THADA THAUMATOPAIN
+          Length = 35
+
+ Score = 38.2 bits (87), Expect = 0.025
+ Identities = 14/27 (51%), Positives = 19/27 (69%), Gaps = 2/27 (7%)
+
+Query: 117 SIPTAFDWRTRGAVTPVKNQGQ-CGSC 142
+           ++P + DW  +GAV  VKNQ + CGSC
+Sbjct: 1   NLPNSVDWWKKGAVAAVKNQ-RXCGSC 26
+
+
+>sp|Q56115|PEPC_STRTR AMINOPEPTIDASE C
+          Length = 445
+
+ Score = 35.9 bits (81), Expect = 0.13
+ Identities = 11/46 (23%), Positives = 21/46 (44%), Gaps = 5/46 (10%)
+
+Query: 279 CNPNSLDHGILIVGYSAKNTIFRKNMPY-WIVKNSWGADWGEQGYI 323
+            + + + H +++ G            P  W ++NSWG   G++GY 
+Sbjct: 356 YSESLMTHAMVLTGVDLD----ADGKPIKWKIENSWGDKVGQKGYF 397
+
+
+>sp|P09983|HLY1_ECOLI HEMOLYSIN, CHROMOSOMAL
+          Length = 1023
+
+ Score = 35.5 bits (80), Expect = 0.17
+ Identities = 21/119 (17%), Positives = 35/119 (28%), Gaps = 20/119 (16%)
+
+Query: 31  EFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAI-------NHKADTKF-----GV 77
+           E++ K  K Y    Y  R   F + N   + + N             +          GV
+Sbjct: 430 EWEKKHGKNYFENGYDARHAAFLEDNFKILSQYNKEYSVERSVLITQQHWDTLIGELAGV 489
+
+Query: 78  NKFAD--LSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVK 134
+            +  D  LS   + +YY   K           D    +  + +    D     + T +K
+Sbjct: 490 TRNGDKTLSGKSYIDYYEEGKR-----LEKKPDEFQKQVFDPLKGNIDLSDSKSSTLLK 543
+
+
+>sp|P33403|CYSP_TRIFO CYSTEINE PROTEINASE
+          Length = 23
+
+ Score = 34.7 bits (78), Expect = 0.28
+ Identities = 8/19 (42%), Positives = 12/19 (63%)
+
+Query: 120 TAFDWRTRGAVTPVKNQGQ 138
+            + DWR +G V  +K+Q Q
+Sbjct: 2   DSLDWREKGVVNSIKDQAQ 20
+
+
+>sp|P08715|HLYA_ECOLI HEMOLYSIN, PLASMID
+          Length = 1024
+
+ Score = 34.7 bits (78), Expect = 0.28
+ Identities = 9/38 (23%), Positives = 14/38 (36%), Gaps = 1/38 (2%)
+
+Query: 31  EFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAI 67
+           E++ K  K Y    Y  R   F + N   + + N    
+Sbjct: 431 EWEKKHGKNYFENGYDARHAAFLEDNFKILSQYNKEYS 468
+
+
+>sp|P87362|BLMH_CHICK BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH) (AMINOPEPTIDASE H)
+          Length = 455
+
+ Score = 34.3 bits (77), Expect = 0.37
+ Identities = 10/19 (52%), Positives = 14/19 (73%)
+
+Query: 307 WIVKNSWGADWGEQGYIYL 325
+           W V+NSWG D G +GY+ +
+Sbjct: 392 WRVENSWGEDRGNKGYLIM 410
+
+
+>sp|P54704|PSPB_DICDI PRESPORE PROTEIN B PRECURSOR
+          Length = 379
+
+ Score = 34.3 bits (77), Expect = 0.37
+ Identities = 24/109 (22%), Positives = 41/109 (37%), Gaps = 15/109 (13%)
+
+Query: 210 TESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQF 269
+           T   YP T   G    +N   +  K  +  ++PK E   + Y+   GP     D   W++
+Sbjct: 78  TGKIYP-TVNMGDCHRYNVDLVFKKDKSGNVMPKKELRESAYVP-HGP----IDPATWKY 131
+
+Query: 270 YI--GGVFDI-PCNPNSL------DHGILIVGYSAKNTIFRKNMPYWIV 309
+           Y    G +    C+P ++          L +GY A        +  W++
+Sbjct: 132 YTFVQGKWTGFGCDPQNVVFSGAEGGMPLQLGYGANGKNGDNGISVWLI 180
+
+
+>sp|Q13867|BLMH_HUMAN BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 455
+
+ Score = 34.0 bits (76), Expect = 0.49
+ Identities = 10/19 (52%), Positives = 14/19 (73%)
+
+Query: 307 WIVKNSWGADWGEQGYIYL 325
+           W V+NSWG D G +GY+ +
+Sbjct: 392 WRVENSWGEDHGHKGYLCM 410
+
+
+>sp|P70645|BLMH_RAT BLEOMYCIN HYDROLASE (BLM HYDROLASE) (BMH)
+          Length = 454
+
+ Score = 34.0 bits (76), Expect = 0.49
+ Identities = 10/19 (52%), Positives = 14/19 (73%)
+
+Query: 307 WIVKNSWGADWGEQGYIYL 325
+           W V+NSWG D G +GY+ +
+Sbjct: 392 WRVENSWGEDHGHKGYLCM 410
+
+
+>sp|P80532|CAT3_FASHE PUTATIVE CATHEPSIN L3 (NEWLY EXCYSTED JUVENILE PROTEIN 8)
+          Length = 19
+
+ Score = 34.0 bits (76), Expect = 0.49
+ Identities = 9/18 (50%), Positives = 12/18 (66%)
+
+Query: 118 IPTAFDWRTRGAVTPVKN 135
+           +P + DWR  G VT VK+
+Sbjct: 2   VPASIDWREYGYVTEVKD 19
+
+
+>sp|P16462|LKTA_ACTAC LEUKOTOXIN
+          Length = 1050
+
+ Score = 34.0 bits (76), Expect = 0.49
+ Identities = 11/68 (16%), Positives = 25/68 (36%), Gaps = 2/68 (2%)
+
+Query: 12  FTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKA 71
+            +    S  I  +   +   +++K+ K YS   Y  R   F      ++  N +   +K 
+Sbjct: 410 ASKQAVSEHIANQLADKIKAWENKYGKNYSENGYDARHSAFLE--DSLKLFNELREKYKT 467
+
+Query: 72  DTKFGVNK 79
+           +    + +
+Sbjct: 468 ENILSITQ 475
+
+
+>sp|P13438|TSP_MOUSE TROPHOBLAST-SPECIFIC PROTEIN PRECURSOR
+          Length = 124
+
+ Score = 32.4 bits (72), Expect = 1.4
+ Identities = 19/129 (14%), Positives = 41/129 (31%), Gaps = 12/129 (9%)
+
+Query: 8   VLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAI 67
+            L +  + V+S  I PE Q      + K       +E L +  ++   +   +  +    
+Sbjct: 6   FLVILCLGVASAVIVPEAQLDAELQEQK------DKEVLIK-AVWSKFMKTNKLHSSEND 58
+
+Query: 68  NHKADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTR 127
+                +   ++    L+ +E           +F ++         + +   P   D+   
+Sbjct: 59  QETEGSNIEMSASGQLTDEELMKIMTTVLHPMFEEEENKP-----QPVVDDPEFEDYTES 113
+
+Query: 128 GAVTPVKNQ 136
+           G    V NQ
+Sbjct: 114 GDGFFVPNQ 122
+
+
+>sp|Q00951|HLYA_ACTSU HEMOLYSIN (CYTOLYSIN II) (CLY-IIA) (HLY-IIA) (CYTC) (APPA)
+          Length = 956
+
+ Score = 32.4 bits (72), Expect = 1.4
+ Identities = 10/36 (27%), Positives = 16/36 (43%), Gaps = 1/36 (2%)
+
+Query: 31  EFQDKFNKKYSHEEYLER-FEIFKSNLGKIEELNLI 65
+           E++ K NK Y  + Y  R     + N+  +  LN  
+Sbjct: 427 EWEKKHNKNYFEQGYDSRHLADLQDNMKFLINLNKE 462
+
+
+>sp|P15377|RT2A_ACTPL RTX-II TOXIN DETERMINANT A (APX-IIA) (HEMOLYSIN IIA) (HLY-IIA)
+           (CYTOLYSIN IIA) (CLY-IIA)
+          Length = 956
+
+ Score = 32.4 bits (72), Expect = 1.4
+ Identities = 10/36 (27%), Positives = 16/36 (43%), Gaps = 1/36 (2%)
+
+Query: 31  EFQDKFNKKYSHEEYLER-FEIFKSNLGKIEELNLI 65
+           E++ K NK Y  + Y  R     + N+  +  LN  
+Sbjct: 427 EWEKKHNKNYFEQGYDSRHLADLQDNMKFLINLNKE 462
+
+
+>sp|P52181|TGLC_PAGMA PROTEIN-GLUTAMINE GAMMA-GLUTAMYLTRANSFERASE (TISSUE
+           TRANSGLUTAMINASE) (TGASE C) (TGC)
+          Length = 695
+
+ Score = 32.0 bits (71), Expect = 1.9
+ Identities = 12/48 (25%), Positives = 17/48 (35%), Gaps = 5/48 (10%)
+
+Query: 102 DDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTTG 149
+           ++          +  S+P    W   G V PVK     G CW F+   
+Sbjct: 237 EEPYTDGVAPYRWTGSVPILQQWSKAG-VRPVKY----GQCWVFAAVA 279
+
+
+>sp|P35681|TCTP_ORYSA TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HOMOLOG (TCTP)
+          Length = 168
+
+ Score = 32.0 bits (71), Expect = 1.9
+ Identities = 12/34 (35%), Positives = 19/34 (55%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSN 55
+           PP ++ QF+ F  ++ K  S +   E+ E FK N
+Sbjct: 80  PPFDKKQFVTFMKRYIKNLSAKLDAEKQEEFKKN 113
+
+
+>sp|Q01532|BLH1_YEAST CYSTEINE PROTEINASE 1 (Y3) (BLEOMYCIN HYDROLASE) (BLM HYDROLASE)
+          Length = 454
+
+ Score = 31.6 bits (70), Expect = 2.5
+ Identities = 16/40 (40%), Positives = 19/40 (47%), Gaps = 9/40 (22%)
+
+Query: 131 TPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNL 170
+           TPV NQ   G CW F+ T  +         +L  LSE NL
+Sbjct: 62  TPVTNQKSSGRCWLFAATNQL---------RLNVLSELNL 92
+
+
+>sp|P16312|MMAL_DERMI MAJOR MITE FECAL ALLERGEN DER M 1 (DER M I)
+          Length = 30
+
+ Score = 31.2 bits (69), Expect = 3.2
+ Identities = 8/24 (33%), Positives = 14/24 (58%)
+
+Query: 114 FINSIPTAFDWRTRGAVTPVKNQG 137
+              ++P+  D R+   VTP++ QG
+Sbjct: 7   NSGNVPSELDLRSLRTVTPIRMQG 30
+
+
+>sp|O03992|TCTP_FRAAN TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HOMOLOG (TCTP)
+          Length = 170
+
+ Score = 30.8 bits (68), Expect = 4.2
+ Identities = 16/62 (25%), Positives = 30/62 (47%), Gaps = 13/62 (20%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFA 81
+           PP ++ QF+ +  ++ K  + +   E+ E FK N+             +  TKF ++K +
+Sbjct: 82  PPFDKKQFVTWVKRYIKLLTPKLEGEQQETFKKNI-------------EGATKFLLSKLS 128
+
+Query: 82  DL 83
+           DL
+Sbjct: 129 DL 130
+
+
+>sp|Q04489|YMJ6_YEAST HYPOTHETICAL 59.5 KD PROTEIN IN VPS9-RAD10 INTERGENIC REGION
+          Length = 525
+
+ Score = 30.8 bits (68), Expect = 4.2
+ Identities = 26/122 (21%), Positives = 43/122 (34%), Gaps = 20/122 (16%)
+
+Query: 174 DHECMEYEGEEACDEGCNGGLQPNAYNYIIKN-----GGIQ----TESSYPYTAET--GT 222
+           D   +++ GE    E   G         +++N     G I      E  Y YT      +
+Sbjct: 87  DRYFLQFNGELYNKEISQGDNDSLYIASMLQNLKEGMGVIDVIKSLEGEYAYTIYDVNSS 146
+
+Query: 223 QCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCNPN 282
+           +  F    IG +  ++++ P NE  +A         ++   A  +Q  IGGV        
+Sbjct: 147 KLYFGRDPIGRRSLSYSVTPDNELYVA---------SVTGSAGSFQDCIGGVIYEYDTRT 197
+
+Query: 283 SL 284
+            L
+Sbjct: 198 KL 199
+
+
+>sp|Q03164|HRX_HUMAN ZINC FINGER PROTEIN HRX (ALL-1) (TRITHORAX-LIKE PROTEIN)
+          Length = 3969
+
+ Score = 30.8 bits (68), Expect = 4.2
+ Identities = 17/81 (20%), Positives = 38/81 (45%), Gaps = 7/81 (8%)
+
+Query: 126  TRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDC-DHECMEYEGEE 184
+               +V+ VK QGQ  S  +     ++E    +  +     S++NL+D  + E ++ + + 
+Sbjct: 2782 NCHSVSRVKTQGQ-DSLEA--QLSSLESSRRVHTSTP---SDKNLLDTYNTELLKSDSDN 2835
+
+Query: 185  ACDEGCNGGLQPNAYNYIIKN 205
+               + C   L  +  ++++KN
+Sbjct: 2836 NNSDDCGNILPSDIMDFVLKN 2856
+
+
+>sp||CATB_COTJA_1 [Segment 1 of 2] CATHEPSIN B (CATHEPSIN B1)
+          Length = 25
+
+ Score = 30.4 bits (67), Expect = 5.6
+ Identities = 6/25 (24%), Positives = 12/25 (48%), Gaps = 4/25 (16%)
+
+Query: 118 IPTAFD----WRTRGAVTPVKNQGQ 138
+           +P  FD    W     ++ +++QG 
+Sbjct: 1   LPDTFDSRKQWPNCPTISEIRDQGS 25
+
+
+>sp|Q62703|RCN2_RAT RETICULOCALBIN 2 PRECURSOR (CALCIUM-BINDING PROTEIN ERC-55)
+           (TAIPOXIN-ASSOCIATED CALCIUM-BINDING PROTEIN-49)
+           (TCBP-49)
+          Length = 318
+
+ Score = 30.4 bits (67), Expect = 5.6
+ Identities = 17/109 (15%), Positives = 40/109 (36%), Gaps = 4/109 (3%)
+
+Query: 26  QSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIA-INHKADTKFGVNKFADLS 84
+           +++  ++     K Y+ +E  ++F  +  N       +      +     F  N   D +
+Sbjct: 84  ENELSQWIQMSFKHYAMQEAKQQFVEYDKNSDGTVTWDEYNVQMYDRVIDFDENTALDDT 143
+
+Query: 85  SDE-FKNYYLNNKEAIFTDDLPVADYLDDEFINSI--PTAFDWRTRGAV 130
+            +E F+  +L +K+     +      L+ E   +   P   D+ T   +
+Sbjct: 144 EEESFRQLHLKDKKRFEKANQDSGPGLNLEEFIAFEHPEEVDYMTEFVI 192
+
+
+>sp|P48651|PSS1_HUMAN PHOSPHATIDYLSERINE SYNTHASE I (SERINE-EXCHANGE ENZYME I) (KIAA0024)
+          Length = 473
+
+ Score = 30.4 bits (67), Expect = 5.6
+ Identities = 6/20 (30%), Positives = 8/20 (40%)
+
+Query: 142 CWSFSTTGNVEGQHFISQNK 161
+           CW F   G +E    I   +
+Sbjct: 354 CWVFGVIGFLEAIVCIKFGQ 373
+
+
+>sp|P33404|CYSP_TRIVA CYSTEINE PROTEINASE
+          Length = 22
+
+ Score = 30.4 bits (67), Expect = 5.6
+ Identities = 10/17 (58%), Positives = 13/17 (75%), Gaps = 1/17 (5%)
+
+Query: 123 DWRTRGAVTPV-KNQGQ 138
+           DWR +GAV  + K+QGQ
+Sbjct: 6   DWRKKGAVNVIXKDQGQ 22
+
+
+>sp|Q00576|PSS1_CRILO PHOSPHATIDYLSERINE SYNTHASE I (SERINE-EXCHANGE ENZYME I)
+          Length = 471
+
+ Score = 30.4 bits (67), Expect = 5.6
+ Identities = 6/20 (30%), Positives = 8/20 (40%)
+
+Query: 142 CWSFSTTGNVEGQHFISQNK 161
+           CW F   G +E    I   +
+Sbjct: 354 CWVFGVIGFLEAIVCIKFGQ 373
+
+
+>sp|Q9ZRX0|TCTP_PSEMZ TRANSLATIONALLY CONTROLLED TUMOR PROTEIN HOMOLOG (TCTP)
+          Length = 167
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 17/62 (27%), Positives = 27/62 (43%), Gaps = 13/62 (20%)
+
+Query: 22  PPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFA 81
+           PP ++ QFL F  ++ K  + +   ER   FK N+             +   K  V+K +
+Sbjct: 79  PPFDKKQFLGFIKRYIKNLATKLSEERQAEFKKNV-------------EGAAKMLVSKLS 125
+
+Query: 82  DL 83
+           DL
+Sbjct: 126 DL 127
+
+
+>sp|Q94480|V136_DICDI VEG136 PROTEIN
+          Length = 357
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 21/95 (22%), Positives = 34/95 (35%), Gaps = 3/95 (3%)
+
+Query: 70  KADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGA 129
+                +G++ F  LS DE    Y ++       +     Y     I S    ++W     
+Sbjct: 79  NKKYDYGLDLFL-LSIDEGITGYRDDSLETVKRNQEQ--YPIPSQILSYKELYNWTMDDI 135
+
+Query: 130 VTPVKNQGQCGSCWSFSTTGNVEGQHFISQNKLVS 164
+           V  +  +G C SC  F       G   +  NK+V+
+Sbjct: 136 VKEIGLKGNCTSCGVFRRQALDRGAVMLKANKIVT 170
+
+
+>sp|P55131|RT32_ACTPL RTX-III TOXIN DETERMINANT A FROM SEROTYPE 8 (APX-IIIA) (CYTOLYSIN
+           IIIA) (CLY-IIIA)
+          Length = 1052
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 9/53 (16%), Positives = 21/53 (38%), Gaps = 1/53 (1%)
+
+Query: 20  GIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAINHKA 71
+            +  +  ++  E++ K+ K Y    Y  R + F + +   +   N      +A
+Sbjct: 427 HVASKIGNKIDEWEKKYGKNYFENGYDARHKAFLEDSFSLLSSFNKQYETERA 479
+
+
+>sp|P55130|RT31_ACTPL RTX-III TOXIN DETERMINANT A FROM SEROTYPE 2 (APX-IIIA) (CYTOLYSIN
+           IIIA) (CLY-IIIA)
+          Length = 1049
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 9/53 (16%), Positives = 21/53 (38%), Gaps = 1/53 (1%)
+
+Query: 20  GIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAINHKA 71
+            +  +  ++  E++ K+ K Y    Y  R + F + +   +   N      +A
+Sbjct: 427 HVASKIGNKIDEWEKKYGKNYFENGYDARHKAFLEDSFSLLSSFNKQYETERA 479
+
+
+>sp|P40101|YE16_YEAST HYPOTHETICAL 35.9 KD PROTEIN IN ISC10 3'REGION
+          Length = 306
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 13/48 (27%), Positives = 16/48 (33%), Gaps = 1/48 (2%)
+
+Query: 199 YNY-IIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNE 245
+           Y Y I+KNG    ES Y    E      F       K+   +     E
+Sbjct: 103 YQYEILKNGDFPEESDYEVKGECDGFTLFKVLFCTVKVKKTSYYRNKE 150
+
+
+>sp|P13388|XMRK_XIPMA MELANOMA RECEPTOR PROTEIN-TYROSINE KINASE PRECURSOR
+          Length = 1166
+
+ Score = 30.1 bits (66), Expect = 7.3
+ Identities = 16/54 (29%), Positives = 23/54 (41%), Gaps = 11/54 (20%)
+
+Query: 140 GSCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGG 193
+           GSCW+          H     KL+  +EQ    C+  C   +  + C+E C GG
+Sbjct: 202 GSCWAPGPG------HCQKFTKLL-CAEQ----CNRRCRGPKPIDCCNEHCAGG 244
+
+
+>sp|Q9ZL75|MOAA_HELPJ MOLYBDENUM COFACTOR BIOSYNTHESIS PROTEIN A
+          Length = 321
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 23/162 (14%), Positives = 51/162 (31%), Gaps = 9/162 (5%)
+
+Query: 41  SHEEYLERFEIFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSDEFKNYYLNNKEAIF 100
+           + +E LE  E  K+   +I  +  +   H      G+ +   L     K   +  ++   
+Sbjct: 165 NDDEILELLEYAKNRSIQIRYIEFMENTHAKSLVKGLKEKEILDLIAQKYKIMGMEKPKQ 224
+
+Query: 101 TDDLPVADYLDDEFINSIPTAFDW-RTRGAVTPVKNQGQCGSCWSFSTTGNVEGQHFISQ 159
+                       +F    P + D+ ++   +    +   C   +        E       
+Sbjct: 225 GSSKIYTLENGYQFGIIAPHSDDFCQSCNRIRLASDGKICPCLYYQDAIDAKEAIINKDT 284
+
+Query: 160 NKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNY 201
+             +  L +Q++++   + M  +         NGG    A+ Y
+Sbjct: 285 KMMKRLLKQSIINKPEKNMWNDK--------NGGTPTRAFYY 318
+
+
+>sp|P55129|RT12_ACTPL RTX-I TOXIN DETERMINANT A FROM SEROTYPES 5/10 (APX-IA) (HEMOLYSIN
+           IA) (HLY-IA) (CYTOLYSIN IA) (CLY-IA)
+          Length = 1023
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 8/42 (19%), Positives = 15/42 (35%), Gaps = 1/42 (2%)
+
+Query: 27  SQFLEFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAI 67
+           ++  E++ K  K Y    Y  R   F +     + + N    
+Sbjct: 423 NKIDEWEKKHGKNYFENGYDARHSAFLEDTFELLSQYNKEYS 464
+
+
+>sp|P55128|RT11_ACTPL RTX-I TOXIN DETERMINANT A FROM SEROTYPES 1/9 (APX-IA) (HEMOLYSIN
+           IA) (HLY-IA) (CYTOLYSIN IA) (CLY-IA)
+          Length = 1023
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 8/42 (19%), Positives = 15/42 (35%), Gaps = 1/42 (2%)
+
+Query: 27  SQFLEFQDKFNKKYSHEEYLERFEIF-KSNLGKIEELNLIAI 67
+           ++  E++ K  K Y    Y  R   F +     + + N    
+Sbjct: 423 NKIDEWEKKHGKNYFENGYDARHSAFLEDTFELLSQYNKEYS 464
+
+
+>sp|P35669|GSHB_SCHPO GLUTATHIONE SYNTHETASE LARGE CHAIN (GLUTATHIONE SYNTHASE LARGE
+           CHAIN) (GSH SYNTHETASE LARGE CHAIN) (GSH-S)
+           (PHYTOCHELATIN SYNTHETASE)
+          Length = 498
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 16/67 (23%), Positives = 26/67 (37%), Gaps = 6/67 (8%)
+
+Query: 33  QDKFNKKYSHEEYLERFE------IFKSNLGKIEELNLIAINHKADTKFGVNKFADLSSD 86
+           Q  +NK Y+       F       I K +    +  NL   + +A      N+F  LS  
+Sbjct: 66  QKAYNKLYAKIANDYEFLRLHLQSITKYDEFMNKLWNLYQKHREAVAHLKENQFQPLSLG 125
+
+Query: 87  EFKNYYL 93
+            F++ Y+
+Sbjct: 126 VFRSDYM 132
+
+
+>sp|P11140|ABRA_ABRPR ABRIN-A PRECURSOR (RRNA N-GLYCOSIDASE)
+          Length = 528
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 12/59 (20%), Positives = 22/59 (36%), Gaps = 2/59 (3%)
+
+Query: 152 EGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT 210
+           E Q  +  +  +  S QN  +C       +G      GC+ G     + +   +G I +
+Sbjct: 436 EQQWALYTDGSIR-SVQNTNNCLTSKDHKQGSTILLMGCSNGWASQRWVF-KNDGSIYS 492
+
+
+>sp|Q00690|LEM2_MOUSE E-SELECTIN PRECURSOR (ENDOTHELIAL LEUKOCYTE ADHESION MOLECULE 1)
+           (ELAM-1) (LEUKOCYTE-ENDOTHELIAL CELL ADHESION MOLECULE
+           2) (LECAM2) (CD62E)
+          Length = 612
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 7/30 (23%), Positives = 13/30 (43%)
+
+Query: 171 VDCDHECMEYEGEEACDEGCNGGLQPNAYN 200
+           ++C H    +    +C  GC  G  P++  
+Sbjct: 191 LNCSHPFGPFSYNSSCSFGCKRGYLPSSME 220
+
+
+>sp|P06620|ICEN_PSESY ICE NUCLEATION PROTEIN
+          Length = 1200
+
+ Score = 29.7 bits (65), Expect = 9.5
+ Identities = 9/33 (27%), Positives = 13/33 (39%), Gaps = 3/33 (9%)
+
+Query: 277 IPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIV 309
+              N  + +H  LI GY +  T        W+V
+Sbjct: 194 YGSNETAGNHSDLIAGYGSTGTA---GSDSWLV 223
+
+
+  Database: /home/peter/blast/data/swissprot.pr
+    Posted date:  Oct 10, 2000 10:43 AM
+  Number of letters in database: 31,984,247
+  Number of sequences in database:  88,780
+  
+Lambda     K      H
+   0.318    0.140    0.482 
+
+Lambda     K      H
+   0.270   0.0491    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 48087047
+Number of Sequences: 88780
+Number of extensions: 2165712
+Number of successful extensions: 7103
+Number of sequences better than 10.0: 284
+Number of HSP's better than 10.0 without gapping: 253
+Number of HSP's successfully gapped in prelim test: 31
+Number of HSP's that attempted gapping in prelim test: 5661
+Number of HSP's gapped (non-prelim): 339
+length of query: 343
+length of database: 31,984,247
+effective HSP length: 49
+effective length of query: 294
+effective length of database: 27,634,027
+effective search space: 8124403938
+effective search space used: 8124403938
+T: 11
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.6 bits)
+S2: 65 (29.7 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/puzzle.tre
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/puzzle.tre	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/puzzle.tre	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,5 @@
+[ lh=-2673.059726 ](anid:0.11568,(histo7:0.00692,hcap_NA:0.00698,histo6:0.00352)100:0.06375,
+((cpos:0.00001,cimm:0.00001)100:0.04597,uree:0.03765)100:0.05282);
+[ lh=-2674.559163 ](anid:0.09945,((histo7:0.00607,hcap_NA:0.00607,histo6:0.00607)100
+:0.07755,((cpos:0.00001,cimm:0.00001)100:0.03918,uree:0.03919)100:0.04443
+):0.01583);

Added: trunk/packages/bioperl/branches/upstream/current/t/data/qrna-relloc.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/qrna-relloc.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/qrna-relloc.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,70 @@
+#---------------------------------------------------------------------------------
+#      qrna 1.2b (Tue Dec 18 15:04:38 CST 2001) using squid 1.5m (Sept 1997)
+#---------------------------------------------------------------------------------
+#      PAM model =  BLOSUM62 scaled by 1.000
+#---------------------------------------------------------------------------------
+#      RNA model =  /mix_tied_linux.cfg
+#---------------------------------------------------------------------------------
+#      seq file  =  tst.out
+#                   #seqs: 2 (max_len = 290)
+#---------------------------------------------------------------------------------
+#      window version: window = 150   slide = 50 -- length range = [0,9999999]
+#---------------------------------------------------------------------------------
+# 1  [+ strand] 
+>Contig1/24732-25017 (290)
+>chr5.pseudo/527251-527533 (290)
+
+length of whole alignment after removing common gaps: 290 
+
+length alignment: 150 (id=62.67)
+posX: 0-149 [0-149](150) -- (0.19 0.25 0.31 0.25) 
+posY: 0-149 [0-145](146) -- (0.14 0.31 0.25 0.31) 
+ 34.327235 39.943369 26.661628
+ 28.676235 4.323732 17.984592
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = COD 
+OTH  ends = 0 149
+COD  ends = 0 149
+RNA  ends = 76 91
+              OTH =       33.356             COD =       38.943             RNA =       25.665 
+   logoddspostOTH =        0.000  logoddspostCOD =        5.588  logoddspostRNA =       -7.691 
+
+length alignment: 150 (id=64.00)
+posX: 50-199 [50-198](149) -- (0.19 0.26 0.29 0.26) 
+posY: 50-199 [47-195](149) -- (0.17 0.29 0.24 0.30) 
+ 39.662831 22.857014 31.781677
+ 35.882277 13.531348 24.975088
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 50 199
+COD  ends = 51 197
+RNA  ends = 76 91
+              OTH =       38.764             COD =       21.859             RNA =       30.795 
+   logoddspostOTH =        0.000  logoddspostCOD =      -16.905  logoddspostRNA =       -7.970 
+
+length alignment: 150 (id=61.33)
+posX: 100-249 [100-246](147) -- (0.24 0.24 0.28 0.23) 
+posY: 100-249 [97-243](147) -- (0.22 0.26 0.28 0.24) 
+ 29.980841 -9.710794 19.504746
+ 29.998914 -25.369993 19.522819
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 249 100
+COD  ends = 100 240
+RNA  ends = 250 250
+              OTH =       29.990             COD =      -10.711             RNA =       19.514 
+   logoddspostOTH =        0.000  logoddspostCOD =      -40.701  logoddspostRNA =      -10.476 
+
+length alignment: 140 (id=62.86)
+posX: 150-289 [150-285](136) -- (0.22 0.25 0.27 0.26) 
+posY: 150-289 [146-282](137) -- (0.27 0.23 0.26 0.24) 
+ 27.136986 -17.070736 17.286579
+ 28.477528 -24.525347 18.465446
+LOCAL_DIAG_VITERBI -- [Inside SCFG]
+winner = OTH 
+OTH  ends = 289 150
+COD  ends = 154 288
+RNA  ends = 190 290
+              OTH =       27.958             COD =      -18.063             RNA =       17.993 
+   logoddspostOTH =        0.000  logoddspostCOD =      -46.020  logoddspostRNA =       -9.964 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/qualfile.qual
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/qualfile.qual	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/qualfile.qual	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig1 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 40 40 39 35 35 35 35 35 39 35 35 35 35 35 39 39 35 35 35 35 35 35 39 39 39 39 39
+39 39 40 46 46 46 46 51 51 51 51 51 51 45 45 45 51 38 38 39 39 39 39 45 45 45 45 40 40 40 38 38 38 38 38 38 40 40 43 43 43 43 43 43 43 45 45 51 45 45 
+45 51 51 51 56 56 51 51 43 43 43 43 43 43 43 35 35 35 35 35 35 43 36 36 43 43 43 36 43 43 43 43 43 43 45 45 45 51 51 51 45 45 45 45 45 45 45 43 43 43 
+43 43 43 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 56 56 56 56 56 56 56 56 56 56 56 56 51 51 45 45 45 45 45 45 43 40 43 43 43 43 43 43 43 43 45 45 
+51 51 51 51 45 45 45 51 51 51 51 51 51 51 51 51 43 43 43 43 43 45 56 56 51 45 45 45 43 43 43 43 43 43 43 51 51 51 51 51 51 51 51 56 56 56 51 51 51 51 
+43 43 43 43 43 43 51 56 56 56 56 56 56 56 56 56 56 56 51 45 45 45 45 45 51 56 56 56 51 51 43 36 36 36 36 36 43 56 51 51 51 51 51 43 43 43 43 43 43 51 
+51 51 51 51 51 56 56 51 51 51 51 51 51 51 51 51 56 56 56 56 56 56 56 56 45 45 40 40 40 40 40 40 40 40 40 45 56 40 40 40 40 40 40 39 39 40 40 40 40 51 
+51 56 56 56 56 56 51 40 35 35 35 35 35 35 45 51 56 56 51 45 45 45 45 40 40 40 40 40 40 51 51 51 56 56 51 51 51 45 45 45 45 45 45 51 51 40 40 40 40 40 
+40 56 56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 51 51 51 51 51 51 51 56 56 51 51 51 45 32 32 29 29 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig2 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42 46 33 26 26 33 35 36 38 38 40 46 37 40 37 37 37 40 37 37 35 35 35 35 35 35 37 
+47 47 47 37 37 35 35 35 35 35 35 35 35 35 35 35 35 35 35 42 32 29 29 23 23 23 37 33 35 35 26 26 26 26 29 32 32 32 32 32 35 39 35 42 35 35 35 35 35 35 
+56 56 42 35 28 21 20 20 28 35 40 40 37 35 35 35 35 35 35 40 40 35 35 35 35 32 32 32 32 32 35 35 40 41 40 35 35 32 29 35 35 35 32 32 35 43 43 36 56 42 
+46 44 50 50 44 44 44 44 44 44 44 44 50 50 42 42 42 42 42 42 42 42 41 42 38 38 38 38 38 40 45 46 43 42 41 41 40 38 38 40 35 35 38 40 35 35 32 33 33 38 
+42 42 42 40 43 43 36 36 35 35 32 36 38 42 42 42 41 40 38 35 35 32 32 32 36 36 40 41 41 44 37 37 27 27 27 45 45 45 45 43 43 42 41 41 41 42 38 40 41 41 
+41 32 38 38 38 38 43 42 42 42 35 35 35 40 40 41 41 41 41 41 41 42 46 42 38 36 36 32 32 36 36 38 38 40 38 38 50 42 42 42 42 42 42 42 46 46 35 32 33 35 
+37 40 43 45 46 42 42 42 44 42 41 45 45 45 45 41 42 42 46 35 32 32 35 35 43 43 56 56 44 42 42 46 42 42 42 50 42 42 40 45 40 40 37 42 42 46 44 50 44 50 
+50 50 42 42 40 40 40 45 40 40 29 29 33 46 42 43 42 56 56 44 56 44 40 35 35 35 35 35 37 42 40 40 42 42 44 48 42 40 36 39 35 39 42 44 47 47 42 40 40 40 
+40 40 42 56 56 56 37 37 37 35 35 37 42 42 42 37 42 46 40 40 40 42 56 42 44 44 42 37 37 37 37 37 26 28 28 35 35 37 42 42 47 56 42 42 34 34 30 40 33 42 
+42 42 42 35 40 40 33 33 26 26 21 29 29 34 29 30 40 40 34 29 25 34 40 40 29 32 27 27 21 15 16 24 29 32 32 32 34 29 31 31 31 31 31 32 35 37 37 40 46 40 
+32 32 25 24 24 23 19 20 29 31 27 32 29 29 28 27 30 29 29 31 33 35 35 35 41 39 45 45 52 52 52 52 66 66 52 50 61 61 50 50 47 47 47 47 50 56 50 50 50 50 
+50 50 50 36 32 24 29 40 37 34 34 34 40 40 40 40 40 40 40 37 33 33 32 39 29 29 29 28 26 36 24 16 16 22 22 37 40 40 40 37 29 29 29 40 27 24 22 29 29 25 
+25 32 25 25 24 28 28 30 32 32 29 21 26 25 25 25 25 15 18 18 18 29 29 29 26 26 26 42 29 29 29 29 32 42 50 35 29 32 35 32 33 29 29 29 29 27 40 29 22 18 
+23 25 34 33 32 25 19 19 21 15 10 15 25 31 31 25 21 15 16 19 25 23 29 25 19 19 24 19 22 22 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig3 
+34 34 49 55 55 66 75 79 80 84 77 77 77 84 84 77 77 72 66 66 55 55 55 54 54 39 39 39 39 51 66 66 61 61 61 61 66 56 56 66 66 84 84 89 77 89 84 86 90 90 
+90 90 90 90 90 90 90 71 71 40 35 35 35 35 50 55 50 68 68 56 56 56 84 84 86 78 73 73 66 66 73 81 81 75 75 70 57 57 52 47 47 44 50 56 56 53 53 59 56 61 
+66 72 68 70 79 77 78 79 63 63 60 53 50 50 50 50 50 50 35 35 35 35 35 43 40 40 40 35 35 35 52 52 79 82 82 73 64 54 52 50 67 67 78 80 82 82 87 90 90 86 
+78 75 71 71 62 66 56 56 60 76 81 84 84 75 75 71 80 75 79 71 71 71 72 77 80 71 71 71 76 71 71 71 71 71 71 56 56 51 51 51 66 66 66 71 79 75 73 74 74 75 
+80 74 70 67 62 59 66 70 72 78 88 88 83 86 80 80 81 75 73 70 62 60 60 73 73 76 72 86 80 90 90 88 82 87 81 82 82 84 76 76 70 72 72 82 86 90 85 75 63 61 
+59 58 57 61 48 48 45 60 60 70 85 85 72 87 90 90 90 90 85 85 90 90 71 65 71 56 56 51 43 43 43 43 43 45 51 56 56 50 50 50 43 43 35 43 43 43 43 43 43 45 
+56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 
+56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 
+56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 50 48 22 22 21 27 27 33 28 24 19 13 13 13 16 16 21 24 18 18 17 13 12 25 25 25 26 13 13 10 
+16 13 23 23 14 14 14 15 19 16 16 12 14 14 12 13 10 10 13 16 12 10 11 10 12 25 21 25 22 29 24 29 29 30 36 33 25 25 15 15 16 27 25 25 15 16 19 12 12 17 
+25 18 17 17 22 22 16 16 15 12 12 11 13 15 12 11 12 12 11 13 21 27 27 18 15 14 12 11 10 8 8 16 24 24 13 11 12 25 25 31 29 32 36 41 35 32 29 28 28 18 
+18 31 27 14 12 12 11 12 17 15 22 32 30 30 18 15 15 9 10 10 10 13 13 15 15 18 16 15 18 14 18 12 12 15 16 21 11 13 11 21 10 16 18 24 15 14 10 9 13 15 
+21 21 11 11 11 19 19 18 17 21 12 12 21 16 11 10 10 15 14 27 15 21 13 13 10 10 14 14 15 19 21 16 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig4 
+35 29 29 29 29 29 30 40 40 45 51 56 51 51 46 46 40 40 40 37 37 37 46 46 37 35 35 23 24 18 22 30 36 42 42 42 42 46 35 35 35 35 35 35 45 51 42 51 40 40 
+40 46 46 46 56 46 46 40 40 40 35 35 35 39 39 39 51 51 51 51 51 51 39 39 35 35 35 35 45 40 51 35 35 35 35 35 35 35 35 35 35 35 35 35 38 38 38 38 40 40 
+40 51 51 51 40 40 45 45 45 45 51 51 51 51 51 51 51 51 51 43 43 42 42 46 46 56 56 56 51 51 51 51 51 51 56 56 66 61 61 61 61 61 61 66 61 61 65 90 90 65 
+66 66 66 61 61 61 61 61 61 61 61 61 90 90 61 66 66 66 61 53 53 53 53 68 70 81 75 90 90 90 90 90 84 83 84 80 78 79 80 66 66 51 55 54 55 55 70 70 90 87 
+89 89 90 90 90 90 90 90 90 90 86 90 90 90 90 90 90 90 90 90 90 81 81 66 66 66 66 66 81 81 81 81 71 76 76 76 76 76 66 66 66 61 61 61 61 61 61 66 66 61 
+61 70 70 70 70 66 81 90 90 90 84 77 77 85 90 77 74 74 70 75 79 85 90 90 90 90 88 88 88 90 90 90 90 90 90 90 86 90 90 90 90 90 90 90 90 89 83 83 81 82 
+82 82 82 82 87 88 85 87 87 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 87 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 87 87 89 90 90 87 88 88 86 85 82 84 90 90 90 90 90 89 89 89 89 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 87 87 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 81 87 87 90 87 87 90 90 90 90 90 90 90 90 90 90 90 90 82 83 90 90 90 90 90 90 90 90 90 90 90 90 90 85 87 87 90 
+90 90 90 90 90 90 90 90 90 87 87 90 87 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 89 90 90 89 90 90 87 90 85 87 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 89 82 82 82 86 90 85 87 90 89 75 75 78 78 84 82 80 80 77 77 83 87 87 87 89 90 90 90 90 90 90 90 90 90 
+90 90 90 81 81 74 84 84 80 84 74 80 80 85 82 90 90 90 87 87 87 87 89 89 88 85 73 69 67 67 71 71 76 84 90 90 90 88 85 90 86 90 77 85 85 90 90 90 90 90 
+90 90 82 82 80 80 80 82 85 85 90 90 90 90 90 90 81 79 79 79 73 83 79 84 84 82 72 70 68 59 50 50 50 44 39 44 39 44 50 50 50 54 58 58 50 50 54 54 43 43 
+34 38 34 47 47 47 50 45 52 52 48 52 56 52 52 52 52 52 52 43 51 51 51 56 56 56 56 48 48 40 40 40 48 40 40 29 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig5 
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 51 46 40 37 37 40 40 39 39 39 39 39 39 39 40 
+40 51 46 46 46 46 46 51 56 51 51 51 51 40 40 40 35 35 35 40 39 39 39 39 39 39 39 39 39 39 39 39 40 51 51 51 56 56 51 40 40 40 39 39 39 40 40 40 40 40 
+51 56 56 56 46 46 39 39 39 39 40 45 46 46 40 40 40 40 51 51 39 43 43 43 43 43 45 45 45 45 45 51 43 43 43 43 43 43 43 43 43 45 45 45 45 45 51 56 43 43 
+43 43 43 43 43 43 43 43 43 43 43 43 45 45 43 43 43 45 45 43 43 43 43 43 43 56 56 51 45 43 43 43 43 43 43 45 45 45 45 51 51 51 56 56 56 56 51 43 43 43 
+43 43 43 45 45 45 43 43 51 45 45 43 43 43 43 43 43 51 43 43 43 43 43 43 43 43 43 43 45 45 45 45 45 45 51 51 51 56 56 56 56 51 51 51 51 51 51 51 51 45 
+43 36 36 36 36 36 43 45 45 45 43 43 43 43 43 36 36 36 36 36 36 43 43 43 43 43 43 43 56 56 56 51 51 56 56 56 51 43 43 43 43 43 45 51 43 43 43 43 43 43 
+43 43 43 43 45 45 51 45 43 43 43 43 43 45 56 56 56 56 56 56 56 56 56 56 56 51 51 51 56 51 51 51 51 51 51 51 56 56 56 56 56 56 56 56 51 51 51 45 45 45 
+45 45 51 45 45 45 45 45 51 56 56 56 56 56 56 56 56 56 56 56 56 51 76 76 80 90 90 90 90 90 90 90 76 74 76 78 82 76 74 76 76 74 74 79 84 83 81 76 76 82 
+82 87 90 85 90 85 89 90 90 90 90 90 88 82 82 83 83 85 90 90 90 90 90 90 90 85 81 81 81 87 84 90 88 88 88 90 90 90 89 90 90 85 90 90 90 90 90 82 81 82 
+90 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 82 78 82 74 81 87 90 87 90 90 88 85 82 90 90 90 90 90 90 90 90 90 90 90 80 78 82 87 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 84 80 74 77 79 81 80 83 87 90 90 90 90 90 90 90 90 90 76 76 81 61 61 53 53 53 68 70 77 90 81 81 76 87 87 90 90 
+90 90 90 83 79 80 80 90 90 90 88 85 87 80 87 81 76 76 81 81 90 90 90 90 90 90 86 86 86 90 85 90 90 85 85 82 78 73 67 72 75 67 61 68 55 55 55 32 39 34 
+16 15 4 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig6 
+56 56 56 56 56 56 56 56 56 56 56 56 56 51 51 61 61 61 61 66 61 61 61 61 61 61 61 61 61 61 61 61 61 55 55 55 61 61 61 66 66 66 66 66 66 61 61 61 61 61 
+61 85 86 85 61 61 61 55 61 61 61 76 70 79 79 78 84 70 68 68 68 68 68 76 76 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 85 81 81 81 81 81 90 
+90 90 88 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 87 87 90 90 88 88 90 90 88 88 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 81 81 81 88 
+86 88 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 86 86 88 86 86 86 86 86 86 88 88 88 90 90 90 90 90 90 90 90 88 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 82 82 82 82 78 80 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 88 90 90 87 80 80 80 80 80 83 90 90 90 90 90 90 90 89 83 83 83 78 78 78 78 86 86 86 90 90 90 90 88 88 80 80 80 80 
+80 80 90 90 90 80 80 80 88 88 88 90 90 90 85 85 85 85 83 83 90 83 83 83 83 83 88 85 82 85 85 85 85 83 86 86 86 86 86 90 90 85 90 90 90 90 90 90 90 89 
+86 79 72 68 69 77 90 74 77 67 70 68 67 69 69 71 79 79 79 79 79 80 80 83 83 86 86 90 90 80 74 74 69 69 70 87 85 85 80 84 80 72 67 67 65 62 62 72 80 80 
+85 79 83 83 85 90 90 62 62 58 54 57 60 80 70 64 75 75 64 64 64 64 57 59 65 71 71 65 72 70 83 70 72 72 57 52 59 59 58 57 57 62 78 66 71 71 73 74 74 66 
+66 61 61 61 55 54 54 35 35 35 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig7 
+0 0 27 28 48 48 48 48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 56 47 56 56 56 56 56 56 56 47 47 47 48 48 48 48 56 56 56 56 56 56 56 56 
+48 48 56 56 56 56 56 56 56 56 56 56 56 56 56 48 48 35 35 45 45 47 49 55 55 55 55 50 61 61 61 56 56 56 56 61 61 66 66 66 66 66 66 61 61 61 61 61 61 66 
+66 66 66 66 66 55 50 50 65 65 66 70 81 81 81 85 90 90 81 81 81 90 87 88 84 86 86 90 90 90 90 90 90 90 85 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 88 87 87 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 88 88 90 89 89 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 89 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 84 84 82 82 84 
+90 90 90 90 90 90 90 90 90 90 90 90 89 89 89 89 88 88 90 90 90 89 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 
+90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 84 89 89 90 
+90 90 90 90 77 77 75 75 86 90 90 90 90 90 90 90 90 90 90 90 90 81 81 76 88 83 76 70 77 73 63 66 66 75 76 82 66 72 69 73 61 67 52 52 52 39 39 40 40 40 
+55 66 55 68 75 79 86 86 90 90 81 81 54 50 50 50 55 50 52 35 35 35 
+>Run_SRC3700_2001-03-02_53+54.fasta.screen.Contig8 
+56 56 56 56 51 61 61 61 53 53 53 53 53 53 61 61 61 61 61 61 55 55 55 55 55 53 53 53 53 53 53 61 66 66 61 61 66 66 66 61 61 66 66 66 66 66 66 66 53 53 
+53 53 53 53 55 55 55 55 55 55 55 55 55 61 61 55 53 53 53 53 53 61 61 55 55 53 53 53 53 53 53 53 53 53 56 53 61 61 61 61 61 66 66 66 66 66 66 66 66 66 
+66 61 53 53 53 53 53 53 53 61 61 61 61 66 66 61 53 53 53 53 53 53 66 66 61 53 53 46 46 49 49 49 53 56 61 61 55 53 53 53 53 53 53 53 53 53 61 61 66 53 
+53 53 53 53 53 53 53 53 55 66 66 66 66 66 66 66 66 55 55 53 53 53 53 53 53 53 55 55 61 61 53 53 53 53 53 53 53 53 53 53 53 53 55 55 55 61 61 61 61 61 
+61 61 61 61 66 55 53 53 53 53 53 53 53 53 53 53 55 66 66 61 56 53 53 53 54 61 66 66 66 66 66 66 66 66 66 66 66 66 66 66 53 53 53 53 53 50 55 55 55 55 
+55 55 55 55 50 61 61 55 55 55 61 55 55 66 66 66 66 66 55 55 61 55 55 55 61 61 66 66 66 66 66 61 55 50 50 50 50 50 55 61 55 55 61 61 61 55 56 61 61 61 
+61 61 61 61 61 61 66 56 53 56 55 50 50 50 50 50 50 50 50 55 55 55 45 45 45 45 45 45 61 66 66 66 61 55 55 55 55 55 55 50 50 50 47 47 50 61 61 66 66 66 
+61 55 55 55 50 50 50 55 55 55 66 66 61 50 50 50 50 50 50 50 47 47 47 50 58 56 54 54 43 43 42 35 35 35 58 58 58 46 44 44 39 41 44 50 66 66 66 50 50 47 
+47 50 56 61 61 55 55 55 50 50 50 50 50 50 61 50 50 50 50 50 58 54 54 58 56 50 50 50 50 49 49 42 42 42 35 35 32 35 40 42 50 46 49 46 50 50 44 44 44 42 
+42 43 46 46 50 56 56 56 56 54 54 50 50 58 58 44 44 39 50 50 50 54 54 58 58 58 56 56 50 50 39 44 44 42 37 37 39 35 39 44 50 50 58 49 49 39 39 39 44 44 
+50 50 58 34 28 15 15 15 29 32 35 34 32 30 35 33 30 30 42 58 58 58 58 50 43 43 39 42 39 39 37 29 25 23 25 25 29 29 34 40 40 33 32 25 25 25 27 32 32 29 
+29 29 34 40 40 29 29 29 29 29 29 26 25 25 35 31 27 25 24 25 40 29 25 24 25 25 22 24 27 23 19 9 9 9 17 20 24 24 25 25 22 24 34 34 37 34 34 48 34 


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/qualfile.qual
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings1.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings1.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings1.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B "OTU C" D E F G H;
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels One Two "Char 3" Four Five;
+      matrix
+A     --ONE
+B     --ONE
+"OTU C"     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+G     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+       tree "the ladder tree" =
+       (((((((A:1,B:1):1,"OTU C":2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings2.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings2.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/quoted-strings2.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=8;
+      taxlabels A B 'OTU C' D E F G H;
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels One Two 'Char 3' Four Five;
+      matrix
+A     --ONE
+B     --ONE
+'OTU C'     TWO--
+D     THREE
+E     F-OUR
+F     FIVE-
+G     SIX--
+H     SEVEN;
+END;
+
+BEGIN TREES;
+       tree 'the ladder tree' =
+       (((((((A:1,B:1):1,'OTU C':2):1,D:3):1,E:4):1,F:5):1,G:6):1,H:7);
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=7;
+      taxlabels A 
+
+B 
+C 
+             D      E     F      G;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels 1
+
+ 2
+
+ 3
+
+ 4
+
+5
+
+;
+      matrix
+A     -      -O      N E      
+B             -   -  ON          E
+C TWO        --               
+D             TH               RE              E
+E   F                         -OU R
+F     F           I             V      E-
+G            S             I         X-             -                 ;
+END
+;
+
+BEGIN TREES;
+       tree radical_whitespace = (
+(
+ (
+  (
+   (
+    (A:1,
+         B:1):1,
+                C:2):1,
+                       D:3):1,
+                              E:4):1,
+                                     F:5):1,
+                                            G:6)
+;
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace_02.nex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace_02.nex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/radical-whitespace_02.nex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,53 @@
+#NEXUS
+
+BEGIN TAXA;
+      dimensions ntax=7;
+      taxlabels A 
+
+B 
+C 
+             D      E     F      G;  
+END;
+
+BEGIN CHARACTERS;
+      dimensions nchar=5;
+      format datatype=protein missing=? gap=-;
+      charlabels 1
+
+ 2
+
+ 3
+
+ 4
+
+5
+
+;
+      matrix
+A     -      -O      N E      
+B             -   -  ON          E
+C TWO        --               
+D             TH               RE              E
+E   F                         -OU R
+F     F           I             V      E-
+G            S             I         X-             -
+                 ;   
+                 
+END
+;
+
+BEGIN TREES;
+       tree radical_whitespace = (
+(
+ (
+  (
+   (
+    (A:1,
+         B:1):1,
+                C:2):1,
+                       D:3):1,
+                              E:4):1,
+                                     F:5):1,
+                                            G:6)
+;
+END;
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.abi
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.abi
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ctf
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ctf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.exp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/readtest.exp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/readtest.exp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,74 @@
+ID   (stdin)
+EN   (stdin)
+LN   (stdin)
+LT   SCF
+QR   1107
+AV   7 7 7 7 7 6 6 6 6 4 4 4 6 6 6 6 12 21 24 29 29 19 16 7 7 8 13 
+AV        13 22 22 25 29 29 34 32 35 35 35 35 40 40 51 51 51 45 45 
+AV        45 56 40 40 40 40 40 40 46 46 56 42 42 56 45 40 40 40 40 
+AV        40 42 42 35 33 33 35 35 35 46 56 56 40 40 40 40 34 33 33 
+AV        24 24 12 14 9 11 16 31 31 48 48 48 48 40 20 19 8 6 6 6 
+AV        8 21 25 30 32 32 33 37 40 40 56 56 56 56 42 33 27 15 15 
+AV        15 33 33 42 56 48 44 42 33 33 20 22 13 13 13 22 22 42 44 
+AV        42 42 42 42 42 42 42 38 38 45 45 45 40 37 37 36 40 35 37 
+AV        38 40 40 42 40 45 38 40 42 37 40 40 40 40 45 56 46 42 42 
+AV        42 43 56 56 43 51 45 40 40 40 40 40 44 44 56 39 40 35 36 
+AV        36 40 42 42 42 50 50 44 47 44 56 56 44 44 42 42 40 40 40 
+AV        37 37 40 40 40 45 37 40 35 35 35 35 40 40 40 40 44 44 36 
+AV        36 19 25 14 33 33 35 35 35 40 36 38 37 37 37 42 42 42 42 
+AV        35 35 35 42 37 42 36 36 36 35 35 35 42 42 42 42 42 42 42 
+AV        56 42 42 42 42 42 37 37 42 42 42 42 42 42 34 34 34 42 31 
+AV        32 28 28 28 31 29 33 29 25 17 24 21 17 9 8 10 13 12 17 
+AV        10 9 9 9 10 8 11 11 11 8 9 9 9 8 7 10 8 8 6 6 8 8 12 10 
+AV        9 9 9 9 9 9 9 9 9 9 11 10 9 9 9 13 9 11 9 9 7 7 10 8 8 
+AV        8 9 9 9 9 9 9 9 9 9 9 9 9 7 8 9 9 9 7 7 7 7 7 7 9 9 8 10 
+AV        8 8 8 11 10 11 10 11 9 8 6 8 8 8 8 8 9 11 20 20 16 13 6 
+AV        6 8 8 9 10 10 10 11 13 10 9 9 11 11 15 21 17 13 9 9 10 
+AV        9 9 8 8 8 9 9 9 6 6 8 8 9 13 9 9 9 9 9 9 9 9 9 9 9 9 11 
+AV        8 8 6 6 6 6 13 10 10 8 8 6 6 8 9 10 10 10 9 9 9 10 10 8 
+AV        8 6 6 6 6 11 9 8 12 9 9 9 12 10 9 9 9 8 8 6 6 6 8 9 9 9 
+AV        9 9 9 9 8 4 4 8 8 9 11 16 11 9 8 8 8 8 6 6 8 8 8 9 11 11 
+AV        11 10 6 6 6 6 6 6 6 6 6 8 6 7 9 9 9 9 9 11 10 14 10 13 
+AV        9 9 8 8 8 8 8 9 9 11 11 11 8 7 13 10 12 9 8 8 11 11 8 6 
+AV        8 6 6 9 6 6 9 13 10 8 8 6 6 8 9 7 7 12 12 6 6 8 8 9 6 8 
+AV        8 8 6 6 8 13 11 6 6 6 6 6 6 7 7 6 6 6 6 6 6 6 9 9 8 10 
+AV        6 6 6 6 6 9 6 6 8 6 6 8 8 10 4 4 4 8 9 9 12 12 10 9 8 8 
+AV        6 6 8 6 6 6 6 8 8 4 4 4 6 6 6 6 6 9 7 8 9 8 8 8 10 10 10 
+AV        8 6 6 8 8 8 9 9 6 6 12 6 6 8 9 9 7 7 9 9 9 9 7 7 9 9 9 
+AV        10 8 7 9 9 7 7 7 7 9 7 6 6 6 6 8 8 7 7 9 9 9 13 8 8 6 6 
+AV        9 6 7 7 8 9 8 8 6 6 6 6 8 6 6 6 6 6 8 6 7 6 6 6 7 7 7 9 
+AV        9 10 9 11 11 14 13 13 9 9 9 6 6 8 8 8 8 9 14 10 9 9 7 10 
+AV        9 11 8 8 8 8 10 10 10 9 9 8 8 6 7 7 9 6 6 9 10 11 11 7 
+AV        7 11 10 10 11 9 11 8 8 11 9 9 7 6 6 6 14 8 8 13 11 11 11 
+AV        9 9 9 8 11 6 6 8 8 9 9 8 8 8 8 8 6 9 19 21 15 13 11 9 13 
+AV        9 10 16 16 9 9 9 9 10 9 9 9 6 6 8 8 6 6 10 8 10 9 9 6 6 
+AV        8 8 8 8 8 6 6 8 8 6 6 8 8 8 8 6 6 8 6 6 8 8 8 8 9 12 12 
+AV        12 8 8 9 21 19 15 12 9 9 9 9 9 12 16 11 11 11 12 12 9 7 
+AV        9 9 10 9 8 8 11 13 12 9 9 9 9 10 9 8 8 6 6 9 8 9 9 11 10 
+AV        10 11 10 8 7 8 8 6 6 6 8 9 9 11 11 16 9 9 10 9 9 9 9 9 
+AV        9 9 8 8 6 6 8 8 8 6 6 8 8 9 9 9 9 9 9 9 9 10 9 10 10 11 
+AV        14 21 20 18 14 11 9 6 6 6 7 6 6 8 8 9 9 10 10 10 10 9 9 
+AV        9 11 11 9 10 6 6 8 8 9 9 6 6 6 6 8 8 9 9 9 9 9 7 7 9 9 
+AV        9 9 9 9 9 12 9 9 9 9 9 9 10 12 9 10 10 11 11 11 12 10 7 
+AV        7 9 7 7 9 13 9 9 9 6 6 6 8 8 8 6 6 
+SQ   
+     GATGATTCCG GCTTCGGACG ACTCTAGAGG ATCCCCATTT TTATAGTTTT TATCTTGTAA
+     TAGATGTTTA GATTTTTCGT TGTAATTATT TTCTTTATTG TTGAAATTAG TATCTCTGGG
+     TAATTTATCA TATTCTCTGG AAAATGATTT ACTATCACTA GATACTTCAT AAGATTTATA
+     ATCTTTATTA TGAAAATCAT CTCTATTTTT CAAATTATTA TTATATCTAT CAAAGTTTCT
+     GTCTTCATTA TATCTATTAG CATATCTATC TTTATCTTTA TCCCTATCAC TATATCTATC
+     ATATGGTTCA TCTTGTTCAA CCGATCAGAC TCGATTCGCC ATCGCCTCTA ACGGATGGCC
+     GCTCCCCCTC TCATACCTCG CTCCCCTCGA CATCCCCCGT CTCGCCACCC TATCCGCCCC
+     CTTCATCACC CCCCCTTATC CACACCCTCA CCCCCCGCAT CGCGCACCCA CGACCACCCG
+     AAGAACCGCC CTTACTCCCA AGTACGCCCC GACCTCCATC ACCCTATGCG GTACCACTCC
+     CACCACACCC AGTCCTACTT TCGCCCGCAC ATCGGCCCCG CTTCAGACAG CTCCCAACTA
+     CGCAACCCAC GCTTGTTCTT GTTCACACTC GAATACTCGA ATCTCTCATT ACTCCGCGGA
+     CTCCGCCGCA CCTGTGCACC ATTAACTGTG TAGCGCCTGA ACCGGCACCT CTGATTACCA
+     CTTCCTCCAC CAGCACAGTC CTATTACCGC ATGTCGCTCT GCTAAGACAG TGCAAGACTC
+     TGCGGTCGCT CTGACCCGCA TCCGCCAGGG CACCTCTCAC CCTCGCTGGC CACCCCGCCC
+     CCCTCTCCCT GCCCCTTCAT TCCCCCAAAC CGCTTTCAAC GGGACACACC CCTCCGCGGC
+     GGACCACAAC TCGCCGTCGG CCACCACTCA CACCTTCCCT CCTCCTTCCC CCACATCACG
+     CCAACCCCGT GGGACGGCTC TCCCGCGGCT ACGACGCGCA ACCCCCCCTC GCCGCTTCCC
+     CCCCAACTTC CCACGGGCTC CCCTCCGCCC CTTACCCGCG AGGAGCTTCA CCCGCGAACC
+     ACCTCCCCCC TTTCCCAACA GCACCG
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.pln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/readtest.pln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/readtest.pln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,19 @@
+GATGATTCCGGCTTCGGACGACTCTAGAGGATCCCCATTTTTATAGTTTTTATCTTGTAA
+TAGATGTTTAGATTTTTCGTTGTAATTATTTTCTTTATTGTTGAAATTAGTATCTCTGGG
+TAATTTATCATATTCTCTGGAAAATGATTTACTATCACTAGATACTTCATAAGATTTATA
+ATCTTTATTATGAAAATCATCTCTATTTTTCAAATTATTATTATATCTATCAAAGTTTCT
+GTCTTCATTATATCTATTAGCATATCTATCTTTATCTTTATCCCTATCACTATATCTATC
+ATATGGTTCATCTTGTTCAACCGATCAGACTCGATTCGCCATCGCCTCTAACGGATGGCC
+GCTCCCCCTCTCATACCTCGCTCCCCTCGACATCCCCCGTCTCGCCACCCTATCCGCCCC
+CTTCATCACCCCCCCTTATCCACACCCTCACCCCCCGCATCGCGCACCCACGACCACCCG
+AAGAACCGCCCTTACTCCCAAGTACGCCCCGACCTCCATCACCCTATGCGGTACCACTCC
+CACCACACCCAGTCCTACTTTCGCCCGCACATCGGCCCCGCTTCAGACAGCTCCCAACTA
+CGCAACCCACGCTTGTTCTTGTTCACACTCGAATACTCGAATCTCTCATTACTCCGCGGA
+CTCCGCCGCACCTGTGCACCATTAACTGTGTAGCGCCTGAACCGGCACCTCTGATTACCA
+CTTCCTCCACCAGCACAGTCCTATTACCGCATGTCGCTCTGCTAAGACAGTGCAAGACTC
+TGCGGTCGCTCTGACCCGCATCCGCCAGGGCACCTCTCACCCTCGCTGGCCACCCCGCCC
+CCCTCTCCCTGCCCCTTCATTCCCCCAAACCGCTTTCAACGGGACACACCCCTCCGCGGC
+GGACCACAACTCGCCGTCGGCCACCACTCACACCTTCCCTCCTCCTTCCCCCACATCACG
+CCAACCCCGTGGGACGGCTCTCCCGCGGCTACGACGCGCAACCCCCCCTCGCCGCTTCCC
+CCCCAACTTCCCACGGGCTCCCCTCCGCCCCTTACCCGCGAGGAGCTTCACCCGCGAACC
+ACCTCCCCCCTTTCCCAACAGCACCG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ztr
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/readtest.ztr
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/rebase.itype2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/rebase.itype2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/rebase.itype2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,26 @@
+ 
+REBASE version 307                                              itype2.307
+ 
+    =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+    REBASE, The Restriction Enzyme Database   http://rebase.neb.com
+    Copyright (c)  Dr. Richard J. Roberts, 2003.   All rights reserved.
+    =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+ 
+Rich Roberts                                                    Jun 29 2003
+ 
+AaaI	XmaIII	C^GGCCG			1204
+AagI	ClaI	AT^CGAT			1157
+AaqI	ApaLI	GTGCAC			973
+AarI		CACCTGC(4/8)		F	362,701
+AasI	DrdI	GACNNNN^NNGTC		F	536
+AatI	StuI	AGG^CCT		O	1046,1189
+AatII		GACGT^C		AFGIKMNOR	1189
+AbeI	BbvCI	CCTCAGC(-5/-2)			1296
+AbrI	XhoI	C^TCGAG			1068
+AccI		GT^MKAC	5(6)	AEGJKMNORSU	528,684,1413
+AccII	FnuDII	CG^CG		AJK	561,1413
+AccIII	BspMII	T^CCGGA		EGJKR	562,822
+AciI		CCGC(-3/-1)	?(5),-2(5)	N	926
+Bsp24I		(8/13)GACNNNNNNTGG(12/7)			261
+AloI		(7/12)GAACNNNNNNTCC(12/7)	3(6),-3(6)	F	154,1050
+TaqII		GACCGA(11/9),CACCCA(11/9)		X	67,822,1030

Added: trunk/packages/bioperl/branches/upstream/current/t/data/rebase.withrefm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/rebase.withrefm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/rebase.withrefm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,227 @@
+ 
+REBASE version 304                                              withrefm.304
+ 
+    =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+    REBASE, The Restriction Enzyme Database   http://rebase.neb.com
+    Copyright (c)  Dr. Richard J. Roberts, 2003.   All rights reserved.
+    =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+ 
+Rich Roberts                                                    Mar 24 2003
+ 
+
+<ENZYME NAME>   Restriction enzyme name.
+<ISOSCHIZOMERS> Other enzymes with this specificity.
+<RECOGNITION SEQUENCE> 
+                These are written from 5' to 3', only one strand being given.
+                If the point of cleavage has been determined, the precise site
+                is marked with ^.  For enzymes such as HgaI, MboII etc., which
+                cleave away from their recognition sequence the cleavage sites
+                are indicated in parentheses.  
+
+                For example HgaI GACGC (5/10) indicates cleavage as follows:
+                                5' GACGCNNNNN^      3'
+                                3' CTGCGNNNNNNNNNN^ 5'
+
+                In all cases the recognition sequences are oriented so that
+                the cleavage sites lie on their 3' side.
+
+                REBASE Recognition sequences representations use the standard 
+                abbreviations (Eur. J. Biochem. 150: 1-5, 1985) to represent 
+                ambiguity.
+                                R = G or A
+                                Y = C or T
+                                M = A or C
+                                K = G or T
+                                S = G or C
+                                W = A or T
+                                B = not A (C or G or T)
+                                D = not C (A or G or T)
+                                H = not G (A or C or T)
+                                V = not T (A or C or G)
+                                N = A or C or G or T
+
+
+
+                ENZYMES WITH UNUSUAL CLEAVAGE PROPERTIES:  
+
+                Enzymes that cut on both sides of their recognition sequences,
+                such as BcgI, Bsp24I, CjeI and CjePI, have 4 cleavage sites
+                each instead of 2.
+
+                Bsp24I
+                          5'      ^NNNNNNNNGACNNNNNNTGGNNNNNNNNNNNN^   3'
+                          3' ^NNNNNNNNNNNNNCTGNNNNNNACCNNNNNNN^        5'
+
+
+                This will be described in some REBASE reports as:
+
+                             Bsp24I (8/13)GACNNNNNNTGG(12/7)
+
+<METHYLATION SITE>
+                The site of methylation by the cognate methylase when known
+                is indicated X(Y) or X,X2(Y,Y2), where X is the base within
+                the recognition sequence that is modified.  A negative number
+                indicates the complementary strand, numbered from the 5' base 
+                of that strand, and Y is the specific type of methylation 
+                involved:
+                               (6) = N6-methyladenosine 
+                               (5) = 5-methylcytosine 
+                               (4) = N4-methylcytosine
+
+                If the methylation information is different for the 3' strand,
+                X2 and Y2 are given as well.
+
+<MICROORGANISM> Organism from which this enzyme had been isolated.
+<SOURCE>        Either an individual or a National Culture Collection.
+<COMMERCIAL AVAILABILITY>
+                Each commercial source of restriction enzymes and/or methylases
+                listed in REBASE is assigned a single character abbreviation 
+                code.  For example:
+
+                K        Takara (1/98)
+                M        Boehringer Mannheim (10/97)
+                N        New England Biolabs (4/98)
+ 
+                The date in parentheses indicates the most recent update of 
+                that organization's listings in REBASE.
+
+<REFERENCES>only the primary references for the isolation and/or purification
+of the restriction enzyme or methylase, the determination of the recognition
+sequence and cleavage site or the methylation specificity are given.
+
+
+REBASE codes for commercial sources of enzymes
+
+                A        Amersham Pharmacia Biotech (1/03)
+                C        Minotech Biotechnology (6/01)
+                E        Stratagene (1/03)
+                F        Fermentas AB (1/03)
+                G        Qbiogene (1/03)
+                H        American Allied Biochemical, Inc. (10/98)
+                I        SibEnzyme Ltd. (1/03)
+                J        Nippon Gene Co., Ltd. (6/00)
+                K        Takara Shuzo Co. Ltd. (1/03)
+                M        Roche Applied Science (1/03)
+                N        New England Biolabs (1/03)
+                O        Toyobo Biochemicals (11/98)
+                P        Megabase Research Products (5/99)
+                Q        CHIMERx (1/03)
+                R        Promega Corporation (1/03)
+                S        Sigma Chemical Corporation (1/03)
+                U        Bangalore Genei (1/03)
+                V        MRC-Holland (1/03)
+                X        EURx Ltd. (1/03)
+
+<1>AaaI
+<2>XmaIII,BseX3I,BsoDI,BstZI,EagI,EclXI,Eco52I,SenPT16I,TauII,Tsp504I
+<3>C^GGCCG
+<4>
+<5>Acetobacter aceti ss aceti
+<6>M. Fukaya
+<7>
+<8>Tagami, H., Tayama, K., Tohyama, T., Fukaya, M., Okumura, H., Kawamura, Y., Horinouchi, S., Beppu, T., (1988) FEMS Microbiol. Lett., vol. 56, pp. 161-166.
+
+<1>AacI
+<2>BamHI,AaeI,AcaII,AccEBI,AinII,AliI,Ali12257I,Ali12258I,ApaCI,AsiI,AspTII,Atu1II,BamFI,BamKI,BamNI,Bca1259I,Bce751I,Bco10278I,BnaI,BsaDI,Bsp30I,Bsp46I,Bsp90II,Bsp98I,Bsp130I,Bsp131I,Bsp144I,Bsp4009I,BspAAIII,BstI,Bst1126I,Bst2464I,Bst2902I,BstQI,Bsu90I,Bsu8565I,Bsu8646I,BsuB519I,BsuB763I,CelI,DdsI,GdoI,GinI,GoxI,GseIII,GstI,MleI,Mlu23I,NasBI,Nsp29132II,NspSAIV,OkrAI,Pac1110I,Pae177I,Pfl8I,Psp56I,RhsI,Rlu4I,RspLKII,SolI,SpvI,SurI,Uba19I,Uba31I,Uba38I,Uba51I,Uba88I,Uba1098I,Uba1163I,Uba1167I,Uba1172I,Uba1173I,Uba1205I,Uba1224I,Uba1242I,Uba1250I,Uba1258I,Uba1297I,Uba1302I,Uba1324I,Uba1325I,Uba1334I,Uba1339I,Uba1346I,Uba1383I,Uba1398I,Uba1402I,Uba1414I,Uba4009I
+<3>GGATCC
+<4>
+<5>Acetobacter aceti sub. liquefaciens
+<6>IFO 12388
+<7>
+<8>Seurinck, J., van Montagu, M., Unpublished observations.
+
+<1>AaeI
+<2>BamHI,AacI,AcaII,AccEBI,AinII,AliI,Ali12257I,Ali12258I,ApaCI,AsiI,AspTII,Atu1II,BamFI,BamKI,BamNI,Bca1259I,Bce751I,Bco10278I,BnaI,BsaDI,Bsp30I,Bsp46I,Bsp90II,Bsp98I,Bsp130I,Bsp131I,Bsp144I,Bsp4009I,BspAAIII,BstI,Bst1126I,Bst2464I,Bst2902I,BstQI,Bsu90I,Bsu8565I,Bsu8646I,BsuB519I,BsuB763I,CelI,DdsI,GdoI,GinI,GoxI,GseIII,GstI,MleI,Mlu23I,NasBI,Nsp29132II,NspSAIV,OkrAI,Pac1110I,Pae177I,Pfl8I,Psp56I,RhsI,Rlu4I,RspLKII,SolI,SpvI,SurI,Uba19I,Uba31I,Uba38I,Uba51I,Uba88I,Uba1098I,Uba1163I,Uba1167I,Uba1172I,Uba1173I,Uba1205I,Uba1224I,Uba1242I,Uba1250I,Uba1258I,Uba1297I,Uba1302I,Uba1324I,Uba1325I,Uba1334I,Uba1339I,Uba1346I,Uba1383I,Uba1398I,Uba1402I,Uba1414I,Uba4009I
+<3>GGATCC
+<4>
+<5>Acetobacter aceti sub. liquefaciens
+<6>M. Van Montagu
+<7>
+<8>Seurinck, J., van Montagu, M., Unpublished observations.
+
+<1>AagI
+<2>ClaI,Apu16I,Asp14I,Asp37I,Asp86I,Asp123I,Asp130I,Asp707I,BanIII,BavCI,BazI,BbvAII,Bci29I,BciBI,BcmI,Bco79I,BdiI,BfrAI,Bli41I,Bli86I,Bli576I,Bli585I,BliAI,BliRI,Bsa29I,BscI,BscVI,BseCI,Bsh108AI,BsiXI,Bsp2I,Bsp4I,Bsp84I,Bsp106I,Bsp125I,Bsp126I,Bsp127I,Bsp145I,BspDI,BspJII,BspOVII,BspXI,BspZEI,BsrCI,Bst28I,BstLVI,BstNZ169I,Bsu15I,BsuTUI,Bth1202I,Bth9415I,BtuI,Csp4I,LcaI,LplI,PgaI,Rme21I,Ssp27144I,Uba22I,Uba24I,Uba30I,Uba34I,Uba43I,Uba1096I,Uba1100I,Uba1133I,Uba1137I,Uba1138I,Uba1144I,Uba1145I,Uba1161I,Uba1168I,Uba1195I,Uba1196I,Uba1197I,Uba1198I,Uba1199I,Uba1200I,Uba1233I,Uba1238I,Uba1246I,Uba1257I,Uba1275I,Uba1286I,Uba1295I,Uba1315I,Uba1342I,Uba1366II,Uba1379I,Uba1380I,Uba1394I,Uba1412I,Uba1416I,Uba1427I,Uba1430I,Uba1451I,Uba1453I,ZhoI
+<3>AT^CGAT
+<4>
+<5>Achromobacter agile
+<6>N.N. Sokolov
+<7>
+<8>Sokolov, N.N., Maneliene, Z.P., Butkus, V.V., Fitzner, A.B., Khoroshutina, E.B., Kalugin, A.A., Janulaitis, A., (1990) Bioorg. Khim., vol. 16, pp. 1040-1044.
+
+<1>AamI
+<2>
+<3>?
+<4>
+<5>Azospirillum amazonense
+<6>G. Schwabe
+<7>
+<8>Schwabe, G., Posseckert, G., Klingmuller, W., (1985) Gene, vol. 39, pp. 113-116.
+
+<1>AaqI
+<2>ApaLI,Alw44I,AmeI,Bsp146I,DaqI,Pfl23I,Pfr12I,PliI,ScoNI,SnoI,Uba1203I,Uba1387I,VneI
+<3>GTGCAC
+<4>
+<5>Alcaligenes aquamarinus 559
+<6>V.E. Repin
+<7>
+<8>Repin, V.E., Unpublished observations.
+
+<1>AarI
+<2>
+<3>CACCTGC(4/8)
+<4>
+<5>Arthrobacter aurescens SS2-322
+<6>A. Janulaitis
+<7>F
+<8>Grigaite, R., Maneliene, Z., Janulaitis, A., (2002) Nucleic Acids Res., vol. 30.
+Maneliene, Z., Zakareviciene, L., Unpublished observations.
+
+<1>AasI
+<2>DrdI,DseDI
+<3>GACNNNN^NNGTC
+<4>
+<5>Arthrobacter aurescens RFL3
+<6>V. Butkus
+<7>F
+<8>Kazlauskiene, R., Vaitkevicius, D., Maneliene, Z., Trinkunaite, L., Kiuduliene, L., Petrusyte, M., Butkus, V., Janulaitis, A., Unpublished observations.
+
+<1>AatI
+<2>StuI,Asp78I,AspMI,ChyI,Eco147I,GdiI,GobAI,NtaSI,PceI,PluI,Pme55I,Ppu13I,SarI,Sru30DI,SseBI,SsvI,SteI,Uba40I,Uba1170I,Uba1180I,Uba1217I,Uba1239I,Uba1371I,Uba1403I,Uba1419I,VchO44I
+<3>AGG^CCT
+<4>
+<5>Acetobacter aceti
+<6>IFO 3281
+<7>O
+<8>Sato, H., Yamada, Y., (1990) J. Gen. Appl. Microbiol., vol. 36, pp. 273-277.
+Sugisaki, H., Maekawa, Y., Kanazawa, S., Takanami, M., (1982) Nucleic Acids Res., vol. 10, pp. 5747-5752.
+
+<1>TaqII
+<2>
+<3>GACCGA(11/9),CACCCA(11/9)
+<4>
+<5>Thermus aquaticus YTI
+<6>J.I. Harris
+<7>X
+<8>Barker, D., Hoff, M., Oliphant, A., White, R., (1984) Nucleic Acids Res., vol. 12, pp. 5567-5581.
+Myers, P.A., Roberts, R.J., Unpublished observations.
+Rutkowska, S.M., Jaworowska, I., Skowron, P.M., Unpublished observations.
+
+<1>M.PhiBssHII
+<2>MluI,ApeI,Bbi24I,BstZ9I,Uba6I
+<3>ACGCGT,CCGCGG,RGCGCY,RCCGGY,GCGCGC
+<4>2(5), 2(5), 3(5), 2(5), 2(5)
+<5>Bacillus stearothermophilus H3
+<6>ATCC 49820
+<7>
+<8>Schumann, J., Walter, J., Willert, J., Wild, C., Koch, D., Trautner, T.A., (1996) J. Mol. Biol., vol. 257, pp. 949-959.
+Schumann, J., Willert, J., Wild, C., Waler, J., Trautner, T.A., (1995) Gene, vol. 157, pp. 103-104.
+
+<1>AloI
+<2>
+<3>(7/12)GAACNNNNNNTCC(12/7)
+<4>3(6),-3(6)
+<5>Acinetobacter lwoffi Ks 4-8
+<6>V. Butkus
+<7>F
+<8>Cesnaviciene, E.E., Petrusyte, M.M., Kazlauskiene, R.R., Maneliene, Z., Timinskas, A., Lubys, A., Janulaitis, A., (2001) J. Mol. Biol., vol. 314, pp. 205-216.
+Savelskiene, A., Petrusyte, M., Padegimiene, E., Vonseviciene, E., Kiuduliene, E., Butkus, V., Janulaitis, A., Unpublished observations.

Added: trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/seqdatabase.ini
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/seqdatabase.ini	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/seqdatabase.ini	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+VERSION=1.00
+
+[testbdb]
+protocol=flat
+location=t/tmp
+dbname=testbdb
+
+[embl]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=embl
+
+[swissprot]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=swall
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/registry/bdb/seqdatabase.ini
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/seqdatabase.ini
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/seqdatabase.ini	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/seqdatabase.ini	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,17 @@
+VERSION=1.00
+
+[testflat]
+protocol=flat
+location=t/tmp
+dbname=testflat
+
+[embl]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=embl
+
+[swissprot]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=swall
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/registry/flat/seqdatabase.ini
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/rel9.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/rel9.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/rel9.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,195 @@
+ID   GCDH_CAEEL              Reviewed;         409 AA.
+AC   Q20772;
+DT   01-NOV-1997, integrated into UniProtKB/Swiss-Prot.
+DT   01-NOV-1996, sequence version 1.
+DT   31-OCT-2006, entry version 47.
+DE   Probable glutaryl-CoA dehydrogenase, mitochondrial precursor
+DE   (EC 1.3.99.7) (GCD).
+GN   ORFNames=F54D5.7;
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA].
+RC   STRAIN=Bristol N2;
+RX   MEDLINE=99069613; PubMed=9851916; DOI=10.1126/science.282.5396.2012;
+RG   The C. elegans sequencing consortium;
+RT   "Genome sequence of the nematode C. elegans: a platform for
+RT   investigating biology.";
+RL   Science 282:2012-2018(1998).
+CC   -!- CATALYTIC ACTIVITY: Glutaryl-CoA + acceptor = crotonoyl-CoA +
+CC       CO(2) + reduced acceptor.
+CC   -!- COFACTOR: FAD (By similarity).
+CC   -!- PATHWAY: Degradative pathway of L-lysine, L-hydroxylysine, and L-
+CC       tryptophan metabolism.
+CC   -!- INTERACTION:
+CC       P39745:mpk-1; NbExp=1; IntAct=EBI-313068, EBI-321013;
+CC       Q17446:pmk-1; NbExp=1; IntAct=EBI-313068, EBI-312987;
+CC   -!- SUBCELLULAR LOCATION: Mitochondrion; mitochondrial matrix
+CC       (Potential).
+CC   -!- SIMILARITY: Belongs to the acyl-CoA dehydrogenase family.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; Z66513; CAA91333.1; -; Genomic_DNA.
+DR   PIR; T22647; T22647.
+DR   UniGene; Cel.30446; -.
+DR   HSSP; Q06319; 1BUC.
+DR   IntAct; Q20772; -.
+DR   Ensembl; F54D5.7; Caenorhabditis elegans.
+DR   KEGG; cel:F54D5.7; -.
+DR   WormBase; WBGene00010052; F54D5.7.
+DR   WormPep; F54D5.7; CE03411.
+DR   GO; GO:0005515; F:protein binding; IPI:IntAct.
+DR   InterPro; IPR006089; Acyl_CoA_DH.
+DR   InterPro; IPR006091; Acyl_CoA_DH/ox_M.
+DR   InterPro; IPR006090; Acyl_CoA_DH_1.
+DR   InterPro; IPR006092; Acyl_CoA_DH_N.
+DR   InterPro; IPR009075; AcylCo_DH/ox_C.
+DR   InterPro; IPR013786; AcylCoA_DH/ox_N.
+DR   InterPro; IPR009100; AcylCoA_DH/ox_NM.
+DR   InterPro; IPR013764; AcylCoA_DH_1/2_C.
+DR   Pfam; PF00441; Acyl-CoA_dh_1; 1.
+DR   Pfam; PF02770; Acyl-CoA_dh_M; 1.
+DR   Pfam; PF02771; Acyl-CoA_dh_N; 1.
+DR   PROSITE; PS00072; ACYL_COA_DH_1; FALSE_NEG.
+DR   PROSITE; PS00073; ACYL_COA_DH_2; 1.
+KW   Complete proteome; FAD; Flavoprotein; Hypothetical protein;
+KW   Mitochondrion; Oxidoreductase; Transit peptide.
+FT   TRANSIT       1      ?       Mitochondrion (Potential).
+FT   CHAIN         ?    409       Probable glutaryl-CoA dehydrogenase.
+FT                                /FTId=PRO_0000000530.
+FT   ACT_SITE    388    388       Proton acceptor (Potential).
+SQ   SEQUENCE   409 AA;  44964 MW;  4D06241FB6768069 CRC64;
+     MLTRGFTSIG KIASRGLSST FYQDAFQLSD QLTEDERSLM LSAREYCQER LLPRVTEAYR
+     TEKFDPSLIP EMGSMGLLGA PYQGYGCAGT STVGYGLIAR EVERVDSGYR STMSVQTSLV
+     IGPIYNYGSE DQKQKYIPDL ASGKKIGCFG LTEPNHGSNP GGMETKATWD ETTKTYKLNG
+     SKTWISNSPV SDVMVVWARS ARHNNKIKGF ILERGMKGLT TPKIEGKLSL RASITGQIAM
+     DDVPVPEENL LPNAEGLQGP FGCLNNARLG IAWGALGAAE ECFHLARQYT LDRQQFGRPL
+     AQNQLMQLKM ADMLTEISLG LQGCLRVSRL KDEGKVQSEQ ISIIKRNSCG KALEVARKAR
+     DMLGGNGIVD EYHIMRHMVN LETVNTYEGT HDVHALILGR AITGLNGFC
+//
+ID   Q41V66_FERAC            Unreviewed;       607 AA.
+AC   Q41V66;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   31-OCT-2006, entry version 6.
+DE   Glycoside hydrolase, family 15.
+GN   ORFNames=FaciDRAFT_1685;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000001; EAM94575.1; -; Genomic_DNA.
+DR   GO; GO:0004339; F:glucan 1,4-alpha-glucosidase activity; IEA:InterPro.
+DR   GO; GO:0016787; F:hydrolase activity; IEA:UniProtKB-KW.
+DR   GO; GO:0005976; P:polysaccharide metabolism; IEA:InterPro.
+DR   InterPro; IPR008928; 6hp_glycosidase.
+DR   InterPro; IPR011613; Glyco_hydro_15_rel.
+DR   InterPro; IPR012343; Glyco_trans_sub.
+DR   Pfam; PF00723; Glyco_hydro_15; 1.
+KW   Hydrolase.
+SQ   SEQUENCE   607 AA;  69495 MW;  8AC6297BA16ED500 CRC64;
+     MGTYRGLYDL HDAYRSDYLK IANHGFIANN RTAALVGIDG TIDWACLPNF NSNPVFDSIL
+     DARNGGYFKT SPVMESNVNQ YYEESTNILI TEFVNNNQVI LRLTDFLPTS SYSTITFPEI
+     HRLIEAPYSD VEVSIDIKSH FNFGSGKTNI TRDRNGYIFS CTDDTLGIST NLKLKKGNGN
+     VYSRIKVEKG SHEWIVVLSG VRQIGNVRQY ESYTRLEETR NYWSAWAGKI NYSGLYYDHV
+     IRSALTLRGL FYDPTGMMVA APTTSLPEII GGERNWDYRY TWIRDTAYVV EALSLIGLND
+     VATKFLYDIM SIVQKDKKVK TIYPVNGDSK LEEKKVNLSG YMDSIPVRIG NEASEQLQID
+     QYGSIVNAVF RFHEAGGLVT TYLWDFLIEI LDTLKDIWKL PDSSIWEFRS EPKHYLYSKL
+     ISWSAFNRAI KMGRELGYSA PYRTWHKIRE EIKNEIMEKG YNPDVKAFTQ YYGSDQMDAS
+     VLRMPLTGII SAKDPRFVST LARVEAELKN PCGMFIRYHS DDGLKGHDNA FLLLSFWYVE
+     DLILSGRIME AKETFENILD HSNHLMLFSE EINFNDCREM LGNFPQAITH LGVIRAAIKL
+     DEALRGK
+//
+ID   Q41US7_FERAC            Unreviewed;       270 AA.
+AC   Q41US7;
+DT   27-SEP-2005, integrated into UniProtKB/TrEMBL.
+DT   27-SEP-2005, sequence version 1.
+DT   31-OCT-2006, entry version 6.
+DE   Potassium channel protein.
+GN   ORFNames=FaciDRAFT_1443;
+OS   Ferroplasma acidarmanus Fer1.
+OC   Archaea; Euryarchaeota; Thermoplasmata; Thermoplasmatales;
+OC   Ferroplasmaceae; Ferroplasma.
+OX   NCBI_TaxID=333146;
+RN   [1]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RT   "Sequencing of the draft genome and assembly of Ferroplasma
+RT   acidarmanus fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [2]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-ORNL);
+RA   Larimer F., Land M.;
+RT   "Annotation of the draft genome assembly of Ferroplasma acidarmanus
+RT   fer1.";
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   NUCLEOTIDE SEQUENCE.
+RC   STRAIN=Fer1;
+RG   US DOE Joint Genome Institute (JGI-PGF);
+RA   Copeland A., Lucas S., Lapidus A., Barry K., Detter C., Glavina T.,
+RA   Hammon N., Israni S., Pitluck S., Richardson P.;
+RL   Submitted (JUN-2005) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CAUTION: The sequence shown here is derived from an
+CC       EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is
+CC       preliminary data.
+CC   -----------------------------------------------------------------------
+CC   Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
+CC   Distributed under the Creative Commons Attribution-NoDerivs License
+CC   -----------------------------------------------------------------------
+DR   EMBL; AABC04000002; EAM94333.1; -; Genomic_DNA.
+DR   GO; GO:0005216; F:ion channel activity; IEA:UniProtKB-KW.
+DR   GO; GO:0006813; P:potassium ion transport; IEA:InterPro.
+DR   InterPro; IPR013099; Ion_trans_2.
+DR   InterPro; IPR003148; TrkA_N.
+DR   Pfam; PF07885; Ion_trans_2; 1.
+DR   Pfam; PF02254; TrkA_N; 1.
+DR   PROSITE; PS51201; RCK_N; 1.
+KW   Ionic channel.
+SQ   SEQUENCE   270 AA;  30497 MW;  528C4EA75C41DF75 CRC64;
+     MQTITTVGYG DTPVYGLAGR ANGMLIMVIG IGSLGYLMAG LTSMLIDIRL SSKLGERMAA
+     EKKHIVLCNY NESTKKVLDK IKYDGIDIVI LNENEVKGDN EYTYIKGSFL RENDLIRAGI
+     KKASSVIIFS RSEDKEQMAM DAESILSAMI IRKLNPEIRI IGEILNPDSR EHASSFMDDI
+     IIKGDVSSML IYSSIMIPGI PEFINDLLMS NSISEEDIDK KYASNTYREF ISNMEKENRI
+     VLAFRKQDKI YLRENSDKKI DVDSYIFIKN
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/repeatmasker.fa.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/repeatmasker.fa.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/repeatmasker.fa.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,5 @@
+   SW  perc perc perc  query        position in query    matching  repeat        position in  repeat
+score  div. del. ins.  sequence     begin  end (left)   repeat    class/family  begin  end (left)  ID
+
+  504   0.0  5.6  0.0  contig11600   1337 1407  (994) +  (TTAGGG)n Simple_repeat     2   76    (0)   1  
+  890   5.3  5.9  0.0  contig11600   1712 2225  (176) +  (TTAGGG)n Simple_repeat     1  544    (0)   5  

Added: trunk/packages/bioperl/branches/upstream/current/t/data/revcomp_mrna.gb
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/revcomp_mrna.gb	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/revcomp_mrna.gb	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1420 @@
+LOCUS       AP000868   181589 bp  DNA   HTG   
+DEFINITION  Reannotated sequence via Ensembl
+ACCESSION   AP000868
+VERSION     NO_SV_NUMBER
+SOURCE      Human.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Vertebrata; Mammalia; Eutheria;
+            Primates; Catarrhini; Hominidae; Homo.
+COMMENT     This sequence was reannotated via the Ensembl system. Please visit
+            the Ensembl web site, http://www.ensembl.org/ for more
+            information.
+COMMENT     The /gene indicates a unique id for a gene, /cds a unique id for a
+            translation and a /exon a unique id for an exon. These ids are
+            maintained wherever possible between versions. For more
+            information on how to interpret the feature table, please visit
+            http://www.ensembl.org/Docs/embl.html.
+COMMENT     All the exons and transcripts in Ensembl are confirmed by
+            similarity to either protein or cDNA sequences.
+COMMENT     This entry represents a slice of genome sequence. The ID and
+            accession give the location of the slice. The underlying clone
+            information, where available, is detailed in the misc features of
+            the feature table.
+FEATURES             Location/Qualifiers
+     source          1..181589
+                     /organism="Homo sapiens"
+     CDS             join(complement(23956..24166),complement(23467..23555),
+                     complement(22474..22603),complement(21817..21936),
+                     complement(20341..21332))
+                     /gene="ENSG00000166261"
+                     /cds="ENSP00000299336"
+                     /transcript="ENST00000299336"
+                     /note="transcript split due to inability to predict a
+                     single translateable transcript"
+                     /db_xref="RefSeq:NM_003455"
+                     /db_xref="MIM:603430"
+                     /db_xref="LocusLink:7753"
+                     /db_xref="HUGO:12994"
+                     /db_xref="SWISSPROT:O95125"
+                     /db_xref="EMBL:AF027219"
+                     /db_xref="protein_id:AAC79941"
+                     /db_xref="EMBL:AF027218"
+                     /db_xref="protein_id:AAC79940"
+                     /db_xref="EMBL:AJ276177"
+                     /db_xref="protein_id:CAC21447"
+                     /db_xref="EMBL:AJ276178"
+                     /db_xref="EMBL:AJ276179"
+                     /db_xref="EMBL:AJ276180"
+                     /db_xref="EMBL:AJ276181"
+                     /db_xref="EMBL:AJ276182"
+                     /db_xref="GO:GO:0006355"
+                     /db_xref="GO:GO:0005634"
+                     /db_xref="GO:GO:0003700"
+                     /db_xref="GO:GO:0006629"
+                     /db_xref="GO:GO:0003704"
+                     /db_xref="GO:GO:0000122"
+                     /translation="VTVHVHGQEVLSEETVHLGVEPESPNELQDPVQSSTPEQSPEET
+                     TQSPDLGAPAEQRPHQEEELQTLQESEVPVPEDPDLPAERSSGDSEMVALLTALSQGL
+                     VTFKDVAVCFSQDQWSDLDPTQKEFYGEYVLEEDCGIVVSLSFPIPRPDEISQVREEE
+                     PWVPDIQEPQETQEPEILSFTYTGDRSKDEEECLEQEDLSLEDIHRPVLGEPEIHQTP
+                     DWEIVFEDNPGRLNERRFGTNISQVNSFVNLRETTPVHPLLGRHHDCSVCGKSFTCNS
+                     HLVRHLRTHTGEKPYKCMECGKSYTRSSHLARHQKVHKMNAPYKYPLNRKNLEETSPV
+                     TQAERTPSVEKPYRCDDCGKHFRWTSDLVRHQRTHTGEKPFFCTICGKSFSQKSVLTT
+                     HQRIHLGGKPYLCGECGEDFSEHRRYLAHRKTHAAEELYLCSECGRCFTHSAAFAKHL
+                     RGHASVRPCRCNECGKSFSRRDHLVRHQRTHTGEKPFTCPTCGKSFSRGYHLIRHQRT
+                     HSEKTS"
+     CDS             join(complement(24828..25229))
+                     /gene="ENSG00000166261"
+                     /cds="ENSP00000299336"
+                     /transcript="ENST00000299336"
+                     /note="transcript split due to inability to predict a
+                     single translateable transcript"
+                     /db_xref="RefSeq:NM_003455"
+                     /db_xref="MIM:603430"
+                     /db_xref="LocusLink:7753"
+                     /db_xref="HUGO:12994"
+                     /db_xref="SWISSPROT:O95125"
+                     /db_xref="EMBL:AF027219"
+                     /db_xref="protein_id:AAC79941"
+                     /db_xref="EMBL:AF027218"
+                     /db_xref="protein_id:AAC79940"
+                     /db_xref="EMBL:AJ276177"
+                     /db_xref="protein_id:CAC21447"
+                     /db_xref="EMBL:AJ276178"
+                     /db_xref="EMBL:AJ276179"
+                     /db_xref="EMBL:AJ276180"
+                     /db_xref="EMBL:AJ276181"
+                     /db_xref="EMBL:AJ276182"
+                     /db_xref="GO:GO:0006355"
+                     /db_xref="GO:GO:0005634"
+                     /db_xref="GO:GO:0003700"
+                     /db_xref="GO:GO:0006629"
+                     /db_xref="GO:GO:0003704"
+                     /db_xref="GO:GO:0000122"
+                     /translation="MATAVEPEDQDLWEEEGILMVKLEDDFTCRPESVLQRDDPVLET
+                     SHQNFRRFRYQEAASPREALIRLRELCHQWLRPERRTKEQILELLVLEQFLTVLPGEL
+                     QSWVRGQRPESGEEAVTLVEGLQKQPRRPRRW"
+     CDS             join(complement(47951..48856))
+                     /gene="ENSG00000150410"
+                     /cds="ENSP00000307279"
+                     /transcript="ENST00000307086"
+                     /translation="RNGTVITEFILLGFPVIQGLQTPLFIAIFLTYILTLAGNGLIIA
+                     TVWAEPRLQIPMYFFLCNLSFLEIWYTTTVIPKLLGTFVVARTVICMSCCLLQAFFHF
+                     FVGTTEFLILTIMSFDRYLTICNPLHHPTIMTSKLCLQLALSSWVVGFTIVFCQTMLL
+                     IQLPFCGNNVISHFYCDVGPSLKAACIDTSILELLGVIATILVIPGSLLFNMISYIYI
+                     LSAILRIPSATGHQKTFSTCASHLTVVSLLYGAVLFMYLRPTAHSSFKINKVVSVLNT
+                     ILTPLLNPFIYTIRNKEVKGALRKAM"
+     exon            complement(18268..21332)
+                     /exon_id="ENSE00001101518"
+                     /start_phase=1
+                     /end_phase=0
+     exon            complement(21817..21936)
+                     /exon_id="ENSE00001101510"
+                     /start_phase=1
+                     /end_phase=1
+     exon            complement(22474..22603)
+                     /exon_id="ENSE00001101512"
+                     /start_phase=0
+                     /end_phase=1
+     exon            complement(23467..23555)
+                     /exon_id="ENSE00001101515"
+                     /start_phase=1
+                     /end_phase=0
+     exon            complement(23956..24166)
+                     /exon_id="ENSE00001101523"
+                     /start_phase=0
+                     /end_phase=1
+     exon            complement(24828..25239)
+                     /exon_id="ENSE00001101520"
+                     /start_phase=0
+                     /end_phase=1
+     exon            complement(47951..48436)
+                     /exon_id="ENSE00001101532"
+                     /start_phase=0
+                     /end_phase=0
+     exon            complement(47951..48856)
+                     /exon_id="ENSE00001134026"
+                     /start_phase=0
+                     /end_phase=0
+     repeat_region   125..225
+                     /note="MIR3: matches 106 to 202 of consensus"
+     repeat_region   125..225
+                     /note="MIR3: matches 106 to 202 of consensus"
+     repeat_region   388..595
+                     /note="MIR: matches 28 to 247 of consensus"
+     repeat_region   388..595
+                     /note="MIR: matches 28 to 247 of consensus"
+     repeat_region   complement(637..860)
+                     /note="L1ME3: matches 5922 to 6147 of consensus"
+     repeat_region   complement(637..860)
+                     /note="L1ME3: matches 5922 to 6147 of consensus"
+     repeat_region   complement(1015..1295)
+                     /note="AluSx: matches 1 to 287 of consensus"
+     repeat_region   complement(1015..1295)
+                     /note="AluSx: matches 1 to 287 of consensus"
+     repeat_region   complement(1296..1913)
+                     /note="L1ME3: matches 5326 to 5931 of consensus"
+     repeat_region   complement(1296..1913)
+                     /note="L1ME3: matches 5326 to 5931 of consensus"
+     repeat_region   complement(2610..2670)
+                     /note="L2: matches 3251 to 3313 of consensus"
+     repeat_region   complement(2610..2670)
+                     /note="L2: matches 3251 to 3313 of consensus"
+     repeat_region   complement(2996..3298)
+                     /note="AluJb: matches 1 to 300 of consensus"
+     repeat_region   complement(2996..3298)
+                     /note="AluJb: matches 1 to 300 of consensus"
+     repeat_region   complement(3516..3635)
+                     /note="AluJb: matches 176 to 299 of consensus"
+     repeat_region   complement(3516..3635)
+                     /note="AluJb: matches 176 to 299 of consensus"
+     repeat_region   complement(3655..3703)
+                     /note="AluSg/x: matches 172 to 220 of consensus"
+     repeat_region   complement(3655..3703)
+                     /note="AluSp/q: matches 172 to 220 of consensus"
+     repeat_region   complement(3687..3735)
+                     /note="AluSp/q: matches 172 to 220 of consensus"
+     repeat_region   complement(3687..3735)
+                     /note="AluSg/x: matches 172 to 220 of consensus"
+     repeat_region   complement(3719..3767)
+                     /note="AluSp/q: matches 172 to 220 of consensus"
+     repeat_region   complement(3719..3767)
+                     /note="AluSg/x: matches 172 to 220 of consensus"
+     repeat_region   complement(3751..3799)
+                     /note="AluSg/x: matches 172 to 220 of consensus"
+     repeat_region   complement(3751..3799)
+                     /note="AluSp/q: matches 172 to 220 of consensus"
+     repeat_region   complement(3783..3831)
+                     /note="AluSp/q: matches 172 to 220 of consensus"
+     repeat_region   complement(3783..3831)
+                     /note="AluSg/x: matches 172 to 220 of consensus"
+     repeat_region   complement(3848..4069)
+                     /note="AluJb: matches 1 to 219 of consensus"
+     repeat_region   complement(3848..4069)
+                     /note="AluJb: matches 1 to 219 of consensus"
+     repeat_region   complement(4155..4454)
+                     /note="AluSc: matches 1 to 301 of consensus"
+     repeat_region   complement(4155..4454)
+                     /note="AluSc: matches 1 to 301 of consensus"
+     repeat_region   complement(4799..5101)
+                     /note="L2: matches 2430 to 2745 of consensus"
+     repeat_region   complement(4799..5101)
+                     /note="L2: matches 2430 to 2745 of consensus"
+     repeat_region   5244..5548
+                     /note="AluJb: matches 1 to 304 of consensus"
+     repeat_region   5244..5548
+                     /note="AluJb: matches 1 to 304 of consensus"
+     repeat_region   complement(5947..6101)
+                     /note="MIR: matches 12 to 155 of consensus"
+     repeat_region   complement(5947..6101)
+                     /note="MIR: matches 12 to 155 of consensus"
+     repeat_region   complement(6344..6647)
+                     /note="AluSx: matches 1 to 309 of consensus"
+     repeat_region   complement(6344..6647)
+                     /note="AluSx: matches 1 to 309 of consensus"
+     repeat_region   complement(6822..8048)
+                     /note="L1PA7: matches 4924 to 6148 of consensus"
+     repeat_region   complement(6822..8048)
+                     /note="L1PA7: matches 4924 to 6148 of consensus"
+     repeat_region   8054..8991
+                     /note="L2: matches 2302 to 3312 of consensus"
+     repeat_region   8054..8991
+                     /note="L2: matches 2302 to 3312 of consensus"
+     repeat_region   10131..10182
+                     /note="L2: matches 3209 to 3260 of consensus"
+     repeat_region   10131..10182
+                     /note="L2: matches 3209 to 3260 of consensus"
+     repeat_region   complement(10288..10590)
+                     /note="AluSx: matches 1 to 303 of consensus"
+     repeat_region   complement(10288..10590)
+                     /note="AluSx: matches 1 to 303 of consensus"
+     repeat_region   11254..11313
+                     /note="L2: matches 3254 to 3313 of consensus"
+     repeat_region   11254..11313
+                     /note="L2: matches 3254 to 3313 of consensus"
+     repeat_region   11615..11747
+                     /note="MLT1H: matches 23 to 158 of consensus"
+     repeat_region   11615..11747
+                     /note="MLT1H: matches 23 to 158 of consensus"
+     repeat_region   complement(12076..12357)
+                     /note="AluSq: matches 21 to 301 of consensus"
+     repeat_region   complement(12076..12357)
+                     /note="AluSq: matches 21 to 301 of consensus"
+     repeat_region   12517..12571
+                     /note="MIR: matches 91 to 145 of consensus"
+     repeat_region   12517..12571
+                     /note="MIR: matches 91 to 145 of consensus"
+     repeat_region   complement(12952..14120)
+                     /note="Tigger3b: matches 2 to 1231 of consensus"
+     repeat_region   complement(12952..14120)
+                     /note="Tigger3b: matches 2 to 1231 of consensus"
+     repeat_region   complement(14154..14506)
+                     /note="L1MEc: matches 1919 to 2247 of consensus"
+     repeat_region   complement(14154..14506)
+                     /note="L1MEc: matches 1919 to 2247 of consensus"
+     repeat_region   complement(14872..15052)
+                     /note="L1MEc: matches 2319 to 2525 of consensus"
+     repeat_region   complement(14872..15052)
+                     /note="L1M4: matches 2250 to 2456 of consensus"
+     repeat_region   15053..15352
+                     /note="AluSg: matches 1 to 299 of consensus"
+     repeat_region   15053..15352
+                     /note="AluSg: matches 1 to 299 of consensus"
+     repeat_region   complement(15353..15431)
+                     /note="L1MEc: matches 2247 to 2319 of consensus"
+     repeat_region   complement(15353..15431)
+                     /note="L1M4: matches 2178 to 2250 of consensus"
+     repeat_region   16944..17245
+                     /note="AluY: matches 1 to 301 of consensus"
+     repeat_region   16944..17245
+                     /note="AluY: matches 1 to 301 of consensus"
+     repeat_region   complement(21440..21588)
+                     /note="MIR: matches 53 to 228 of consensus"
+     repeat_region   25556..25701
+                     /note="L2: matches 2484 to 2642 of consensus"
+     repeat_region   complement(25702..25806)
+                     /note="MER5A: matches 86 to 187 of consensus"
+     repeat_region   25724..25866
+                     /note="MER5B: matches 22 to 176 of consensus"
+     repeat_region   25889..26077
+                     /note="MIR: matches 16 to 198 of consensus"
+     repeat_region   26460..26737
+                     /note="L2: matches 2564 to 2856 of consensus"
+     repeat_region   27188..27280
+                     /note="L2: matches 2503 to 2596 of consensus"
+     repeat_region   complement(27731..27883)
+                     /note="L1MEc: matches 679 to 837 of consensus"
+     repeat_region   complement(27909..28078)
+                     /note="AluJo: matches 124 to 293 of consensus"
+     repeat_region   28615..29091
+                     /note="L2: matches 2735 to 3265 of consensus"
+     repeat_region   31003..31232
+                     /note="MIR: matches 10 to 259 of consensus"
+     repeat_region   32097..32219
+                     /note="MIR3: matches 36 to 158 of consensus"
+     repeat_region   complement(32367..32650)
+                     /note="AluJb: matches 1 to 286 of consensus"
+     repeat_region   33309..33436
+                     /note="L2: matches 3134 to 3264 of consensus"
+     repeat_region   complement(33681..33786)
+                     /note="L2: matches 3199 to 3308 of consensus"
+     repeat_region   34120..34290
+                     /note="MER5A: matches 2 to 187 of consensus"
+     repeat_region   complement(36857..37015)
+                     /note="MER4A: matches 261 to 466 of consensus"
+     repeat_region   complement(37016..37108)
+                     /note="MER4A: matches 1 to 98 of consensus"
+     repeat_region   complement(37111..37459)
+                     /note="MLT2B2: matches 101 to 477 of consensus"
+     repeat_region   complement(37465..37675)
+                     /note="AluSg/x: matches 90 to 306 of consensus"
+     repeat_region   complement(37676..37767)
+                     /note="MLT2B2: matches 1 to 95 of consensus"
+     repeat_region   complement(37908..38394)
+                     /note="MER31A: matches 1 to 485 of consensus"
+     repeat_region   complement(38413..38707)
+                     /note="AluSc: matches 1 to 294 of consensus"
+     repeat_region   complement(38905..39443)
+                     /note="L1MC4a: matches 5297 to 5736 of consensus"
+     repeat_region   complement(39957..40059)
+                     /note="L2: matches 3167 to 3270 of consensus"
+     repeat_region   complement(40271..40366)
+                     /note="L2: matches 2957 to 3058 of consensus"
+     repeat_region   complement(40367..40884)
+                     /note="MER4C: matches 2 to 465 of consensus"
+     repeat_region   complement(40885..41109)
+                     /note="L2: matches 2784 to 2957 of consensus"
+     repeat_region   41110..41435
+                     /note="AluJb: matches 1 to 309 of consensus"
+     repeat_region   complement(41436..41656)
+                     /note="L2: matches 2550 to 2784 of consensus"
+     repeat_region   complement(42552..43939)
+                     /note="L1PA15: matches 4780 to 6165 of consensus"
+     repeat_region   complement(45781..46404)
+                     /note="MER4B: matches 1 to 779 of consensus"
+     repeat_region   46607..47082
+                     /note="LOR1a: matches 1 to 497 of consensus"
+     misc            1..19151
+                     /note="Component DNA fragment"
+                     /note="accession=AP002765.3"
+                     /note="start=159020"
+                     /note="end=178170"
+                     /note="orientation=1"
+     variation       complement(2261..2261)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:552820"
+                     /db_xref="HGBASE:SNP000375829"
+     variation       6896..6896
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:475710"
+                     /db_xref="HGBASE:SNP000368989"
+     variation       8597..8597
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:552467"
+                     /db_xref="HGBASE:SNP001202646"
+     variation       9185..9185
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:557862"
+                     /db_xref="HGBASE:SNP000376288"
+     variation       9242..9242
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:558021"
+                     /db_xref="HGBASE:SNP000843781"
+     variation       complement(9593..9593)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:655756"
+                     /db_xref="HGBASE:SNP000458091"
+     variation       10744..10744
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:503766"
+                     /db_xref="HGBASE:SNP000076194"
+     variation       complement(11324..11324)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:627134"
+                     /db_xref="HGBASE:SNP000501198"
+     variation       complement(11580..11580)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:625888"
+                     /db_xref="HGBASE:SNP000410419"
+     variation       complement(12055..12055)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:623707"
+     variation       complement(12298..12298)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1791700"
+                     /db_xref="HGBASE:SNP000404217"
+                     /db_xref="TSC-CSHL:TSC0947239"
+     variation       15872..15872
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2276410"
+     prediction      join(complement(15914..16090))
+     variation       16640..16640
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2282646"
+                     /db_xref="HGBASE:SNP001239633"
+     variation       17550..17550
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2126708"
+                     /db_xref="HGBASE:SNP001310527"
+                     /db_xref="TSC-CSHL:TSC1134267"
+     misc            19152..181589
+                     /note="Component DNA fragment"
+                     /note="accession=AP000868.4"
+                     /note="start=19153"
+                     /note="end=181590"
+                     /note="orientation=1"
+     variation       19560..19560
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2126709"
+                     /db_xref="HGBASE:SNP001310528"
+                     /db_xref="TSC-CSHL:TSC1134268"
+     variation       complement(19688..19688)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:10904"
+                     /db_xref="HGBASE:SNP000017373"
+     variation       complement(22574..22574)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2282644"
+                     /db_xref="HGBASE:SNP001241241"
+     variation       complement(23148..23148)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:663471"
+                     /db_xref="HGBASE:SNP000668983"
+     variation       complement(23159..23159)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1720331"
+                     /db_xref="HGBASE:SNP000606798"
+     variation       complement(23419..23419)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:671898"
+                     /db_xref="HGBASE:SNP000519992"
+     variation       complement(24698..24698)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2282643"
+                     /db_xref="HGBASE:SNP001357715"
+     prediction      join(complement(24827..25228),complement(23955..24249),
+                     complement(23394..23554),complement(22473..22662),
+                     complement(21816..21935),complement(20460..21331))
+     variation       complement(25549..25549)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2282642"
+                     /db_xref="HGBASE:SNP001241241"
+     variation       complement(25566..25566)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2282641"
+                     /db_xref="HGBASE:SNP001241241"
+     variation       complement(26578..26578)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1880058"
+                     /db_xref="HGBASE:SNP000858034"
+                     /db_xref="TSC-CSHL:TSC0891347"
+     variation       complement(28686..28686)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1791715"
+                     /db_xref="HGBASE:SNP000423743"
+     variation       complement(29269..29269)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:682246"
+     variation       complement(32214..32214)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1940185"
+                     /db_xref="HGBASE:SNP000860124"
+                     /db_xref="TSC-CSHL:TSC1009511"
+     variation       complement(33267..33267)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1557429"
+                     /db_xref="HGBASE:SNP000400840"
+                     /db_xref="TSC-CSHL:TSC0444634"
+     variation       complement(33395..33395)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1557428"
+                     /db_xref="HGBASE:SNP000400839"
+                     /db_xref="TSC-CSHL:TSC0444633"
+     variation       complement(33396..33396)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1557427"
+                     /db_xref="HGBASE:SNP000400838"
+                     /db_xref="TSC-CSHL:TSC0444632"
+     variation       complement(33424..33424)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1557426"
+                     /db_xref="HGBASE:SNP000400837"
+                     /db_xref="TSC-CSHL:TSC0444631"
+     variation       complement(33565..33565)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1557425"
+                     /db_xref="HGBASE:SNP000400836"
+                     /db_xref="TSC-CSHL:TSC0444630"
+     variation       34696..34696
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1939924"
+                     /db_xref="HGBASE:SNP000859985"
+                     /db_xref="TSC-CSHL:TSC1009047"
+     variation       34809..34809
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1939925"
+                     /db_xref="HGBASE:SNP000859986"
+                     /db_xref="TSC-CSHL:TSC1009048"
+     variation       complement(34928..34928)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1893061"
+                     /db_xref="HGBASE:SNP000858271"
+                     /db_xref="TSC-CSHL:TSC0919478"
+     variation       35738..35738
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2091068"
+                     /db_xref="HGBASE:SNP001309950"
+                     /db_xref="TSC-CSHL:TSC1312336"
+     variation       35970..35970
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:2272142"
+                     /db_xref="HGBASE:SNP001314474"
+     variation       37832..37832
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1940191"
+                     /db_xref="HGBASE:SNP000860129"
+                     /db_xref="TSC-CSHL:TSC1009519"
+     variation       complement(40576..40576)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:629647"
+                     /db_xref="HGBASE:SNP000763754"
+     variation       complement(40807..40807)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:628708"
+                     /db_xref="HGBASE:SNP000111700"
+     variation       complement(40856..40856)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:634148"
+                     /db_xref="HGBASE:SNP000528519"
+     variation       complement(40872..40872)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:607154"
+                     /db_xref="HGBASE:SNP000380635"
+     variation       complement(42902..42902)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1148082"
+     variation       complement(42996..42996)
+                     /replace=""
+                     /evidence="not_experimental"
+                     /db_xref="dbSNP:1291089"
+                     /db_xref="HGBASE:SNP000157115"
+BASE COUNT    52693 a  34921 c  36093 g  57882 t
+ORIGIN      
+        1 gaattcagtt gctggctctg ccacttactt ctgagtggcc tcaaattttg taacctctct
+       61 ggttaaaatc agctgttttc caattgacct tagccctggc ctttgctaac attaaatgag
+      121 tgttgtcacc actctaagct ttaattcctt cacatagaaa atgaagagat tggatagacc
+      181 agacaattgc cacatcccct tccaaatcca ggattctata gttctagtag gaagcaagga
+      241 aacaaaacat ttcagtttat tgtttggtat aaatgaatca tcttcttctt tgaagtcctt
+      301 gttaatcaac tttcccataa gccagaattc atcactccca ctgggccttg tgaaaatggg
+      361 aaaacatctg aataatggac tattgctggg gctttggaac cacactgacc tgtcttcttc
+      421 ctcggccgta tgcatatgat ggacacctgg acacggtact tagcacccta aacatcagct
+      481 tcccagtttg taaactggag ttagtaactg agcatccctc acagagttat tgtaatgact
+      541 aagtgagata atttatatgt agataaaatg ccttgcctct aaaaaatatt cagtgatgtt
+      601 aagctattgc tttcttttta tttacttgtt tgttgaaata atttatatgt ggtgaagtgt
+      661 ccaaattgta tacacctcaa tagatcttta catctgtata tacctctgtg acaaccccta
+      721 gaacaagata cagaacattt ctagcacaac agaaggctca cttctactcc ctttcagtaa
+      781 atatccaact cccaccaaag taaccaatat tctgaccttt atcaccatag attagttttt
+      841 cctgttctca aacttcatct atgtatgtat gtatgtatgt gtgtatctat ctatctatct
+      901 ctatcatcta tatctatctc tatcatctat atctatctat catctatacc tatcatctat
+      961 atctatcata tctatctatc tatctatcta tctatctatc tatctatcta tctaattttt
+     1021 gaaacagagt ctcagtcacc caggctggag tgcagtggta cgaccttggc tcactgcaac
+     1081 caccacctcc caagttcaag tgattctcat gcctcagcct cctgagtagc tgggaccata
+     1141 ggcctgtgcc accataccca gctcattttt gtatttttag tagacagggt tttgccacgt
+     1201 tggccaggct ggtcttgaac tcctgatgtt aagtgatccg cccaccttgg cctcccaaag
+     1261 tgctggcatt tcaggtgtga gccaccacac ctggccactt cacatgaata taatcataca
+     1321 ttatgtgatc tttagagtct ggcttctctc aatcaatatt atatttttag tgagattcat
+     1381 tcatgttgct gcaagtagca ataacatttt ggtcttcatt gctgtataac attccactta
+     1441 ttgcatacca caatttattt atatactgta ctatttatgg acatttgagt tgcttccttt
+     1501 ttttgctgtt aaaaaaataa ttttctatga gctacttacc catgtctttg gtttatgtaa
+     1561 gcactcattt ccgttgatat acacccagca gtgaaattgc tagggcatag agtatttgta
+     1621 aggttaactt tagtagatcc taccaagcag ttcttggtag tacaagcagt gcaagtgctt
+     1681 gtaacagttt acattcctcc cagcagtgtg aaaggtttcc agttgctcag ctgcttcacc
+     1741 atcactgggt tttctgaatt cttttcattt agtcattcta gtggcaatag ggtagtatct
+     1801 cattgagctt tcaatttgca tgttcctgat gactaaaaat gttgattact ttttcctgcg
+     1861 ctttttgtcc atttcgatat cctcacttgt gaagtgccta ttcaggtccc ttgcacctca
+     1921 aaaatgttag ctattatttt gtatcatact aattgtgctt ttaagttatc cgtatatcta
+     1981 tctactctcc aaatagatga taagcccttt gatatgcaag gtctgtgcgt cactcattta
+     2041 cgaattctta gtgcctaacg taagactcat gttatacatt cgttgaatac attgtcaaac
+     2101 gtctaggact cagcagatca tggcaaaatg tcgccatctg gtgtctgcag ggaatgagat
+     2161 gtggaggatg caaagccagc tggctggata attcattgtt ttccatgtga gctttctctt
+     2221 atgtacgatt taaaaaatat gtatatattg caaattgcct ctgcatactt gattttaaaa
+     2281 atattttcta tatgtacttt catttttaaa aactcaattt aaagtgcaaa cttgtggaac
+     2341 agaaaaagtt ccacaatgac taaattcttt actaaacaat tactaaacaa ttcactaaac
+     2401 taaattcttt agtaattgtt tacattttca agagttgggt gctgtattca tagatgaaac
+     2461 aataagtact tactatattc tagtcctgaa gtttattcaa gagatataag gtgctcatag
+     2521 gctagtaagg aagaaacaaa aaaatcagcc tggagtatta taataaatgc cttaaaatag
+     2581 gtgtttgttc gttcattcgt tcactcactc attcatcatt caacatatat tattgagcat
+     2641 ttattctgta ccaggcattg tcacaggtac tataaaccca aacgtgtgtg tcctaaagtg
+     2701 tgactgctct aagaaaacct atagtaaaaa tatatatata atttttcaag gatttggaac
+     2761 tttggtatat tttcctttga gtataagctg ctgtagacgg taggccatgt gagaccccag
+     2821 cttctattag ggttacacat agaacaagat ggccctagtt gaaaatactt ttccataaag
+     2881 tgtactgagg tggggaggag gtggcgaact tcactgctct tctcaactct ttcttccctc
+     2941 tactcactgc tcctcaatcc accaccaaga caaatggtaa caatggattg atcaggttta
+     3001 tttgtttgtt ttttgagacg gggtcttgct ctgtcaccca ggctggagtg cagtggtgtg
+     3061 atcacaactt actgcagcct caaccccagg ctcaagcaat cctcctacct cagcttcctc
+     3121 agtagctagg actacaggca tgtgccacca tgcccagcta attaaaacaa cattattttg
+     3181 tagatatggg gtccccacca tgttgcctag actggcctgg aactcctgag ctcaagcgat
+     3241 cctcccaccg cagcctctca aagtgcttgg attataggtg tgagccaccg tgcctggctt
+     3301 gatcaggttt agaaacattg tagtgcccat gggtaaagtc tctgtccagg ttacaggcta
+     3361 gagctcacat gggcgtgacc tcccagtcac tgtcacactg ttggaggaca taggctgggg
+     3421 tggcaatacg cttctgataa aggccagtag taggtgtctt aggctggcaa tattttaagc
+     3481 actgggagat atgatactgc acctttcctc ttatactttt tttttttttt tttgagacag
+     3541 ggtctctacc acccatgctg gggtgcagtg acactatcat agctcgttgc agcctcagcc
+     3601 tcccaggttc aggcaatcct cctacctcag cctcccagat tcaggcaatc ctcctacctc
+     3661 agcctcccag gttcaggcaa tcctcctacc tcagcctccc aggttcaggc aatcctccta
+     3721 cctcagcctc ccaggttcag gcaatcctcc tacctcagcc tcccaggttc aggcaatcct
+     3781 cctacctcag cctcccaggt tcaggcaatc ctcctacctc agcctcccag gttcaggcaa
+     3841 tcctcctacc tcagcctccc aggttcaggc aatcctccta cctcagcctc ctcagtagct
+     3901 ggagccacag ctgtgtgcca ccacatccag ctaatttgaa aaattatttt gtagagacgg
+     3961 gggtcccatt atgttgtcca ggctggtctg gaactcctgg gctcaaacga tcctcccgcc
+     4021 ttggcctccc aaagtgctgg gattacttgg gtgagccact gcgcctggcc tcctctgatg
+     4081 cttcttgtga gcagatgtat tcttgaattt cttcacagca gttcacaaaa agtgtctatg
+     4141 acctcatcac ccaagttttt tttttttttt tttttttgag atggtcttgc tctggcacca
+     4201 ggccagagtg cagtggtgcc atctgggctc actgcaacct ccgcctccct ggttcaagtg
+     4261 attttcctgc ctcagcctcc caagtagctg ggattaacag gcacgtgcca acacgcccaa
+     4321 ctaatttttg tatttttagt aaagatgggg tttcaccatg ttggccagga tggtctcgat
+     4381 ctcctgacct tgtgatccgc cagcctcggc ctcccaaagt gctgggatta caggcgtgag
+     4441 ccaccgcacc aagccatcac ccgactttta cacattctcc tagacttctt ctgacattac
+     4501 aatgcaacaa acatttattt atttcctaat ataaaccaaa aattctgcta gatgttggca
+     4561 aaaaacaatg aaagacagtg tttgccttcc aagagaaaag gtaacagatg attaacatat
+     4621 gtaaagggct atgacagaag tatatgaagg aaatatgaga gcttgcttct tggatgaagt
+     4681 cttaagaata aaaaggaatt atccagcaag agtgggtaag agtgggggat ggacaatagg
+     4741 agctgcatga gcaaagggca tgcatgcaga gctgtttgca ttccatcttc aacatgctgg
+     4801 gaagctttta acctgagaag tagcagatac aaataaatat tttaaaaatt tattggcaac
+     4861 catatgaagg atgaattatg gcatggtacc attggaagca ataattctgt tagcatgcca
+     4921 tggtaataat tcacatggtg tagcctcagt caatgcactg aaagtgaggg aggagagatg
+     4981 agagaggggc acagatgcag ttcgttaagg ttagtttaga attagtatca ctgatagata
+     5041 tgaagagcaa gggaaaaaga aaaatctggg acaaatcata ggtttctgat ttaggtcaat
+     5101 ggtggccagt cactttcagg aagaagaaca gatgctgggg tactggcttt agtgagggca
+     5161 ctaatccctt ccccctgccc attccaactt atattcactt ctagaaagga tgaatgggat
+     5221 tttggtgtaa taagaagaca ggttggccag gcgtggtggc tcatgcctgt aattccagca
+     5281 ctttgggagg ccaaggcagg cagatcattt gagcccatga gtttgagacc agcctgggca
+     5341 acatgatgaa accctgtctc tacaaataat acaaaaaatt agctaagtgt ggtggcacat
+     5401 gcctgtagtc ccagctattc aggaggttaa ggtgggagga tggcttgaac ccaggacatg
+     5461 gaggttgcag tgagccgtga tcataccgct gcactccagc ccagatgacg gaacgagact
+     5521 ccatctcaag taaaacaaca acaacaaaag gtcaagaaat tgagcatggt gttgtgtttt
+     5581 tcagcacctc aggcattctg atcactttta cttacttgtt tagattgctc atgtcagagt
+     5641 gccgcaagtt gtatacattg cctcacagct taattgccta tgaggagttt gggtccaaaa
+     5701 acctcactgg ttagcagtaa aaagctttca aaaatttata attccagaca tttccctatt
+     5761 tccaccgttt tgcagagtta gagtccctgt ggactgagcg agcttccagc actgggagtc
+     5821 gacctctagt ggtaattctt taaactgcaa gtcctgactt tggtgatttg gcttattttg
+     5881 ccagaaccag ggcagctgag ttcaggtttt ggttacaatt aataaaaaat gttggcttct
+     5941 aagagatcta ttaatatctc agttttactt atgagaggcc ttagagatat gaagtaagat
+     6001 gaactcacat gcccaaggtc atacagctag taagtgatga aggtaggatt caaatcctgg
+     6061 tccgtaggtt tattaatcca gagcctgtgc tcttaaccac tacctgttct gcctctgttg
+     6121 ttacttgaat catctcttta ggcaaagttg accaatcttt ttctttcttt ctttctttcc
+     6181 ttccttcttt ctttctttct ttctttcttt ctttctttct ttctttcttt ctttccttct
+     6241 ttctttcttt ctttttcctt ccttccttcc ttctctttct ttctctttct ttccttcttt
+     6301 ctttcttttt ctttcttctt tctctttctt tctttcttcc ttcctttttt tgtttctttt
+     6361 tttttttttt caagttctca ctctgtcacc gaggctgaag tgccactgtg gcctgatctt
+     6421 ggctcaccgc aacctcctcc caggttcaag aaatccttgc atctcagcct catcagtagc
+     6481 ttggactaca ggcatgtgac accacacctg tctaattttt gtatttttgg tagagacagg
+     6541 gttttgccat cttggccagg ctggtcttga actcctgagc tcaagcaatc cacctgtttc
+     6601 agcctcccaa agtgttggga ttacagacat ggggcactgt gcctggcctt gaccatccct
+     6661 tttatatgct ctctaatata tctactgtag cacatctcat acattagaga ccagtctttc
+     6721 ccaccagttc atgaattccc tggaagcaga gctcttagta agtacacaat gaatgtttaa
+     6781 tgaatgggca gatattataa tcctactctc agactctttt aattttttta ttagacttta
+     6841 agttctggga tacatgtgca gaaagtgcag gtttgttaca taggtataca cgtgccatag
+     6901 tggtttgctg cacccatcaa cctgtcatct gcattaggta tttctcctaa tgccctccct
+     6961 ccacttgccc ccgaccccct gacaggcccc agggtctgat cttcccctcc ctgtgcccat
+     7021 gtgatctcac tgttcaactc ccacttatga gtgagaatat gcagtgtttg gttttctgtc
+     7081 cctgttagtt tgctgagaat gatagttaca agcttcatcc atgtccctgc aaaggacatg
+     7141 aactcatcct tttttatggc tgcatagtat tccatggtgt atatgtgcca cactttcttt
+     7201 atccagtcta tcattaatgg gcatttgggt tggttacgag tccttgctat cgtgaatagt
+     7261 gctgcaataa acatacatgt gcatgtctct ttatagcaga atgatttata atgctttggg
+     7321 tatataccca gtaatgggat ggctgggtca aatggtattt ctggttttag attcttgagg
+     7381 aatcgccgcc ctgtcttcca caatggttga actagtttac acttgcacta acagtgtaaa
+     7441 agtgttccta tttctccaca tcctctccag catctgtttt ttcctgactt tttaatgatc
+     7501 gccattccaa ctggtgtgag atggtatctc attgtggttt tgacttgcat ttctctaatg
+     7561 accagtgatg atgagctgtt tttcatatgt ttgttgacca cataaaatgt cttcttttga
+     7621 caagtgtctg ttcatatcca tcgcccactt tttgatgggg ttgttttttt cttgtaaatg
+     7681 tgcttatgtt ccttgtagat tctggatatt agccctttgt cagatggata gattgcaaaa
+     7741 attttctccc attctgtagg ttgtctgttc actctgatga tagtttcttt tgctgtgcag
+     7801 aagctcttta gtttaattag atcccatttg tcaattttgg cttttgttgc cattgctttt
+     7861 ggtgttttag tcatgaagtc tttgcccatg cctatgtcct gaatggtaat gcctaggttt
+     7921 tcttctaggg tttttatggt gttaggtctt atgtttaagt ctttaatcca acttgactta
+     7981 atttttgtac aaggtgcaag gaaggggtcc agtttcagtt ttctcgcaga ctcttatagc
+     8041 tagctattta cataacatct ccactagaat gtcaaataga catctcaaac atcccatgtc
+     8101 caaactacac tcccgatgat gctaaagaga tgtattccac ctactatctt tccatcttaa
+     8161 tcaatggcgt ccacctttgc ccatttactc agtccaaaat catacaattc tctttaactc
+     8221 atcttcccca ccccccaaca aatctcacat ttactagcaa atcttgttaa ttctatttta
+     8281 attatatgat ttgaactcta ccttttctca tcatttccac tgctctgctc cttatccaga
+     8341 ctgtgcatgt gtttcgcttg gattactgtg ttagcctgcc aagtggtgtc ttagtggtct
+     8401 cttcctgaac tgccaagaat ctattctcag ctatcagcat atttatagtt gtgagatgat
+     8461 atgtgagttc tttgaacaat accctctagt ggctccccat cttaatcaga atcaaagaca
+     8521 caattgttcc aagcccatgt atctggctcc tcattgcccc tctaagagca tctcctattg
+     8581 ctctcccctc tctctccggg ttctaggctc accaagcaca cttctgtctc agggacttta
+     8641 cactagctat tcactctctg gaatgctttt tcatgaatag ctgtttaaat tgctcctcca
+     8701 actctttcac atctttgttt aaatgtcacc ttcccagtga agccttcctg gattatcccc
+     8761 tcacccatcc gtggaacttc ctctccactt tccttgatta atttttcttc ttggcagttt
+     8821 tttttccttt taatatgtga tataatttaa ataatgcttt attttgttga ctgtctttcc
+     8881 tcaccatact ataagctcca tgagtgtggg gatatttgtc tcttttgttt actgctactt
+     8941 ccctaacacc tccaacaata ggtgtttatt acatactttc tggatgcatg aaagaaaaac
+     9001 gaagaaactc atccctcagt acaggaactg tatgttctga atgattgcat taggtgtgct
+     9061 ctatacattt ttgcctctct taacagtcta caattccttg actctggggc cttctagttg
+     9121 gtatgccccc tattaaacta caggcagatt tttattaaga aaattcactg cagtaagtat
+     9181 ttatatagag caataattat ttgtaacttt ttgttggtca gtttagcaac aaagcttcct
+     9241 atgcttagca attacgtgca atcattttcc ctgttcatgt aacagtcttc tcatatgaga
+     9301 ataatggatg gagcagtatt tttatttaga ttgaactttc gctgtactct ttatgatagt
+     9361 tctgatcaca gaaaatgatt actcctccac catgtcctct caaaaaataa aaaagttgac
+     9421 tagttgaaac tcccaaccca ctcttcaaaa acaagaaaca caaaacaaaa acctaccagg
+     9481 aaataaacat gaatatctaa ggggatgtga gggtgttgtc agttcacttt attaatcagc
+     9541 ttaacacaat ccagcctaac aaaataaatt agagcttaag tcaaagccac aaggggttaa
+     9601 agtggttgta ggaggtttga gaagggccat tgagccagat gttgtgtcta gtcaatctca
+     9661 agcttgctgc acagttaaag ccgaccattc taccttccct aataacactt tagttgaacc
+     9721 ctcaaatcct cccagaagtc ctccctagtt attattaacc aaccccttct ctcaattcct
+     9781 aagtccaact cttctcttcc tgagcccatt tctttattcc acatccctca ttttcagatg
+     9841 atgaccacag catttaccac aggagaaaaa aagagaaacc atcagtcaca gattgcctta
+     9901 attttccttc cttccccctt ctatgagctg aacacatttt gtgttcacct gaacatcttt
+     9961 ttcttttgtc ccagaatgaa atatttttca atgctaatcc cttcacctgt gcttttgatc
+    10021 gtatcctctt aaagatttcc ttcctcaatt atctcctctg cctttttcag tgtttcagtc
+    10081 tccccttgtt ccttctcctc agcttatata atttggcaat atgtataaat acacctccta
+    10141 gcacagtgtc cggaaaatag caggtgctca ataaatgtta gtttgactcc cctaaagctt
+    10201 caagtgtccc ttagcctgag aaaaaccatc cattcttgac actgcatttc cccaaactgc
+    10261 cctgccacac tccttttcct tcccaagctt tttttttttt tttttttttg agacacagtc
+    10321 tcgctctgtc acccaggctg gagtgcaatg gtgtgatctc ggctcactgc aaactctgcc
+    10381 tcccaagttc aagcgattct cctgcctcag cctctccagt agctgggatt acaggtgtgc
+    10441 accaccaggc ccggctaatt tttgtacttt tagtagagac ggggcttcac catattttcc
+    10501 aggctggtct caacctcctg acctcaggtg atccgcccac ctcggcacct gaaagtgctg
+    10561 ggatatcagg tgtgagccac cacgcctggc ccttcccaag attcttacaa aagcattctc
+    10621 tatgcccaat ttctctactt ttacttgctc atttaagttt tgtctcactt tatataattg
+    10681 aattatttaa tcaaatactc tctttcaggt ttttatttaa cttggccgtt ttacggcatt
+    10741 tgataacact aaccagttcc tcctttccat tttgttaaaa aaaattcttg gacttttttc
+    10801 aacatttctc tctgattccc ctttactctc tgttgttctt caatgttctt tggtgggtac
+    10861 cgctctgcct ccgtcaatgg ttccttaaat gtttttctgt ccctagcaat tttcttttca
+    10921 atctattcaa cttccttctt taaaatgttc ccttccacac ccattctctg aactgtcacc
+    10981 tatactgcat attgctccca aatctgtatc ttctttccag attttttctc acaacagagc
+    11041 tacttctact tactggatat tctagcattg ccttacattc tatataacca aaactgagct
+    11101 caatatcttt cttttatgca tttttctttt tcctatgttt ccaaattgaa taataatgtc
+    11161 acttgtcacc cagatcccca cagccctctg tgcatgttgt gatttattca tctttgcatc
+    11221 ctccacctca tcccctagct tctcgttcat tcaagcccat tatagttccc agtacacaga
+    11281 agacactcaa aagctatttg ttgaatgagt gcataaatga aacgctttca gaggaataaa
+    11341 tctataagca atcaagggtg ctcttgattt ttccataaag ttacatagga cattgtaatc
+    11401 acccagttga tcattgctaa gaaagtgctt gatcctattc tctattatca gcttgcagga
+    11461 gaaaggagaa acaaacatgt tttatgtttc ttgtatagac aggatgacta atcttagaag
+    11521 acttataaaa acgagcatct tccgagtcaa cctgaatgta aaatgctgaa agaatgaact
+    11581 gtgaatcctg cttggggact ggtagagtga atttttgact cccattcttg acccctccta
+    11641 atactcaaac cctttgccat ataattcatc agtccctccc actaagggaa ttggtgaaaa
+    11701 ttctcacgct ttgactttgg gctctgccac gttgctttta ctaatggatg ttagtggact
+    11761 ggaggcatga ggcttgacat gcacttggtg gttgggtttt ctttctttac cttttgatca
+    11821 ccatgagagt aacatatgtg gctcacctca ctaagcccag gaggagaatg ggagatgtgt
+    11881 agagcacaac taccccagct gacccacaga ccatcaggga gaagcaaagc cacccagtcc
+    11941 aacccagcca agatcagttt ctgaaactct agctaatctg caaactgcat acgtccaata
+    12001 aatgcttatt attgtaaacc accaacattc tgtgttattg tgtagcagta tttgaataat
+    12061 acaaatacca acaggcttct tttttttttt ttttgagacg gagtttcgct cttgttgccc
+    12121 aggctggagt gcagtggcgc gatctcagct cactgcaacc tccacccccg gggttcaagc
+    12181 gattctccgg caggagaatc ctgagtagct gggattacag acatgcacca ccacactcag
+    12241 ctaatttttt gtatttttag tagagatggg gtttcatcat gttggccagg ctagtcttga
+    12301 actcctgacc tcaggtaatc cacctgcctc ggcctcccaa agtgcaggga ttacaggcac
+    12361 cggccccatc agccttcttt tactcccttt cttctgactc tggcattctg cctcccttct
+    12421 cctcagttct cttctcaaca gtctgctaat gtgaatacaa ttggctttag ggtcaaaaag
+    12481 ttgtgacttt gaactcattc tgtgaatgac tgggaggctt gaatatgtta cttttctcct
+    12541 ttgagccttg gtttcttaat ctgtgaagta agaaaaagat acctgtgatg actcaataag
+    12601 ctacattaag caaccagcat gcaataatca gcctacactc cagaactgag tgtaactgac
+    12661 cagaagggat cacaaactga gcaaaagcag cggttccttg ttcgttctgc actctcccca
+    12721 tacaaaattc cattcctttc ccccagattt gttcttcttc ttaggctcag gcagttttac
+    12781 catagtgtgt tctctctgcc tccagcactg tggtaaagat gatgtttctt attcctttaa
+    12841 tgcctcttgg ctacatccat tctccaggga caattcattt taaggagtgc cttttatgtt
+    12901 tggccttctt gtgtgtattc tttgaggtat gtctcgtgtg tgtgtgccta cacagtcatg
+    12961 catcgcttaa caatgggata tgttatgaga agtgtgtcat tagatgattt cgtcattgtg
+    13021 tgatcataga gtgtatgtac acaaacacag atgctatagc ctacttcaca cctaggctat
+    13081 atggtatagc ctattgctcc tacgctacaa acctctatag cattttactg tactgaatac
+    13141 agcaggtgat tgtaacacac tggtaagtat ttgtctatct aagcatatct aaacatagaa
+    13201 aaggtacagt aaaaatacaa tgtgaaagat aaaaaatggt ccacctgtat agggcactta
+    13261 ccatgaatgg agcttgcagg actggaagtt gctccaggtg agtcagtgag tgactggtga
+    13321 gtgaatgtga aggtctagga cattactgta cacaactgta gacttcatca acactgtata
+    13381 cttaggctac acgaaatttt tacaaatttt ttctttcttc aataataaat taacctaagc
+    13441 atggtgtaac ttttttgctt tataaacttt tagctttttt aactttttga ctcttttata
+    13501 ctaacactta gctaaaacac aaacacactg tacagctgta ccaaaatatt ttttttcctt
+    13561 attctttaag cttttttcaa ttttttcaat ttttattgtt ttacttttta aacttctttg
+    13621 ttttaaacca agacacaaac atagcttagc ctcggcctac tcagggtcag gatcatcagt
+    13681 atcactgtct tccacctcta cattttgtcc cactggaaca tctttcaggg gcaataaccc
+    13741 cagtggagct gtcatcttct ataataacaa tgccttcttc tggaatcctt cctgaaggat
+    13801 cttcctgagg ctgtcttcta gttaactgtt ttatttctta aataagtaag agtacactct
+    13861 aaaataataa tgaaaatata agtacataag cccatgacag tcatttctta tcaagtatta
+    13921 tatactgtac ataattatat gtgctagaat tttccatgac tagcagcaca gtaggtttcc
+    13981 agcagcatca tcatgaacat gagtaatgca ttgctatgac atcagtaggc tgtaggaatt
+    14041 tttcagctcc attacaatcc tacaggacca ctgttgtata tgtggtttgt cactgatgaa
+    14101 aacgttgtgt gccacatgac tctatttgtg tgcatataaa atgttaactt gatctttaat
+    14161 tgaagtattt aatctattta catttaatgt tactatttat atatttgagt tttagttacc
+    14221 attttactat ttgttatcta tttgactcac ttgttttata ttcctttttc tcttagcttc
+    14281 atttgcataa ttttttagta cattattttc cctctattaa ctcgtcagtt atacatttgc
+    14341 ttactattgt tttggtggtt actctagaaa tcataacata tattttttat tattactgtc
+    14401 taaaaaacaa aatagccctt ttccactttt tcaataatgc cagaaccttc aaatgtttta
+    14461 actccatcca ctctcttcat cttatgtgtt attgctgaca tatatctcat aagacatatt
+    14521 tactttatac aatcattatt gatttatact tatcaacata tttaccattt ccatagctta
+    14581 ttacttagtg tatttctata tttccatttc atcatcaata ttgccagaag tttttgtagt
+    14641 cttttcaaag aaccagctta cttactttgt taaacatatt atatatattt tatcattact
+    14701 ttctccaatt atctcctttc tatattcttt taatttattg tataaatatt tatccttttt
+    14761 ttatctgctt gatctatcaa aaactaggag aggtgcattt aaattgctca ctgaagtgat
+    14821 gaatttatca atttttcctg taattctact gatttttatt acaaagccag aagcatatag
+    14881 aaatttagac atgttatata ttcctggtag attaaacttt taattatgta gaggccttct
+    14941 cataatgctc tggcttaaaa cattttttat tcaatattaa aataattcct ttagctttct
+    15001 ttgtacagta ttttcctggt atatcctcac tttttaaaaa ttttccctat cctggccggg
+    15061 cccggtggct catgcctgta atcccagcac tttgggaggc cgaggtgagt ggatcacgag
+    15121 gtcaggagtt caagaccaac ctgaccaata tagtgaaacc cagtctctac taaaaaaaaa
+    15181 aaaaattagc cgggcatggt ggcaggcacc tgtaatccca cctacttggg aggctggggc
+    15241 aggagaatta cctgaaccca ggaggtggag gttgcagtga gccaacatga tgccattgca
+    15301 ctccagcctg ggcgactgag tgagattcca tctcaaaaaa aaaaaaaaaa aaatcactat
+    15361 ttttgtatat ctgttgtata tactttgtta taatttactt ttttttttac ccattctgac
+    15421 aatctctatc ttaaatggta gttactattt attctatttt attcatatat ttgggctgtg
+    15481 ttcttcgtag ttctattttt tcatcttttt atgctttttt tggcctttct ttagacaacc
+    15541 taagtcccac ttttgaaaaa aattaattcc tattttccct ccactattta gaaaattact
+    15601 ctctccattc ttatgagtat ctttgatact acagtgcaca cttaatttaa caatgttgag
+    15661 gttaaccaat atcttacagc ttttgtcctg aacagttcat gaatgttaaa acacttaatt
+    15721 cagatcaccc tatcccaact tacatactac ttctattctc tattttagta atttattaaa
+    15781 actgtagttg tattttagtg gagggccttt cagaatatca tgctaggggc actgaaggag
+    15841 gcaaggatct tctcccttta tttgctgacc cagtcttctg ctttttgctg gtttcttgac
+    15901 tcagggttta ctcctcattg tttggatgga ttattcaact gccccacagc tcttttgccg
+    15961 tggaactgct tctctgtaga cctgttgcca agattccgca aatgaaccca agcaccaagc
+    16021 aattgctgta ggcctcttcc tgaaatcttc tcgttagtag ttccttggac ttgtctagga
+    16081 gagatgtaca actaaaaaag aaaccacaaa cagaagagca gagaagtcgt cttacgggaa
+    16141 accctgagaa tctgcagttt tgattatatt cctgtgaaat ggcctttttg cagcgttgtt
+    16201 atattctgca gagaacgcta ctaagattta ttctctactt aaatggagaa ggtttggcta
+    16261 ttttcctgca tggcgtttta gatggttgaa tcttcttgct tctggaagtc tttacagctt
+    16321 ctggaagtct ttacataacc ctacaatgct ctcaacctcc tctgaagagt ccctatcttt
+    16381 atgaattgcc aaataaacta gtttgtgctg actgattcct tttttagaat ttgaattgca
+    16441 ttcctccttc tacctgaaac ctcggtgtac acacgtgcat gcgtgcactc acacacacta
+    16501 aagagaatag aaattgcaca ttaagaggta tctttgtgct ggtctgacaa tcactttgtt
+    16561 cctttttgtt attaaagata aaaaaggtgg tgtctaaaga aagctgggct cctccttgct
+    16621 tgatcctcag gacttccgca aggatgaatt aagttattcc acttattcca aggagatgtg
+    16681 cttacatttg aaacattgta tcagttactt tgccacaata aaggttagta atcacgtggg
+    16741 ttggccatcc actttgtggc tctttcagtc tttccctctc ccataaacta gattagtggg
+    16801 ttagtcagcc aacatcacta gaaatataag gagatggaaa taattgtagg tatatttctc
+    16861 tcagctattg tggctggact atttaggtca gctgagggaa gacgctagag gtaaaacaaa
+    16921 ctatttttaa aaatactggt gtggggccag gtgcggtggc tcacgcttgc aatcccagca
+    16981 ctttgggagg ccaaggcagg cggatcacaa ggtcaggaga tcgagaccat cctggctaac
+    17041 atggtgaaac cccgtctcta ccaaaattac agaaaatcag ccgggcgtgg tggtgggtgc
+    17101 ctgtagtccc agcttcctgg ggaggctgag gcaggagaat ggcgtgaacc cgggaggcgg
+    17161 agcttgcagt gagccgagat cacgccactg cactccagcc tgggcgacag agcgagactc
+    17221 catctcaaaa caaacaaaca aacaaatact ggtgtggaag ggtaccaaat tattaacctg
+    17281 ctcaacatgt catcagttac tatttctcag ttcagctttc attctagaaa actactatgg
+    17341 agccgttaac tctcatttgg attactgtcc tttcagccag tctggtgaat caaaattttt
+    17401 tatttattaa atatcccatt tttcgcctgt ggcagggtgg tggtccagag gccttgactg
+    17461 aaggggactg gaacaacttg tctgtgccat ctctgggctt ttctcctcta ctactctact
+    17521 gctcagtgca aacaatgtat cttctattag atccattact tcagttgtgc tttgaagggg
+    17581 gcaacaggtt tcttagtggg ttttgatttt agtaccttga tgttaatctg atttatgaaa
+    17641 ttctccatgg agttcatctg agtttaccgg attctgctcc ggaatatgag gcctaagtcc
+    17701 cagataataa aaatgtattt tggcttgatc tcttgggcat ttatttctcc tcggactttg
+    17761 tggtgaagag gttaaatatt ccttggttat tcctaaatta tctggtcaaa catccttata
+    17821 ttatcatttt taatgtgtca tacctcttta gcacatttcc aaccacctgc cacttctgtt
+    17881 aattaagtag cgtattaact tcctccaaat aatatcttta aaatcccacc gttttctccc
+    17941 tcagaaactt agtatggtat tcaaggatgc tagaattcta ctgctcacaa gatactagag
+    18001 gggacccctc cgaatacaca ccctgtgctc caacctccct gctcacgctc agctgcacta
+    18061 cttctttcct gccgctgcac actgtggttg tgtttgttca gatttgggag acaacatgtc
+    18121 cctcccagaa cctcagaaaa ttcagtaatt tacaaaaggg accagagatt tagtgtcagc
+    18181 atatgaaact ctgcagggag atcagtaatt aaggactatt tgatgttcca acaaaccggc
+    18241 ccagcctcac aagctttccc tcatgtagaa ctctgacaga ttttcattta ttggaataat
+    18301 gcaaataaga aaattaaaaa gtcaatttgt tgtcactgcc agtattttac caacctttat
+    18361 aaatggttaa gtctgagcta ggagaaacta tcttttgtgg cagacatttc acccaggaca
+    18421 agctgtgctt aggcttagat acttggttta gaaatgcctg cttattagga cagtatcttc
+    18481 tgcaaactcc aggcttggat cttatctcag agtgcatatg tttcaccgga ctctgctaat
+    18541 catcccaacc cttcctttcc tcaaggaaaa atactctgta ctcccagatc tatttgagta
+    18601 tgaaccaccc tcagaagtat caggacactg aagccctgga aaaggaatgg gtaagatggg
+    18661 aagtcccaga aaccagggaa cttggctgca agtgttacct ttatttattt catatatgta
+    18721 tatttccaga aggacaacgg ttgactggta ttaagaccaa accagtccat cagaatatca
+    18781 tcgaccaaaa tgagaatttc cctgttgggc cattagggtt ggagtcttat taaccctccc
+    18841 ctattcctcg ggcccataat gcattatgag atagaggaga agaaaccagg cacagaagac
+    18901 tctccctgtc atgctttaac cccataccct gtgctccact tatggaggac gtctccccag
+    18961 ttcttgccac tcccacaatc agagaaggtg aggttcataa ttattcagcc tcatctccaa
+    19021 ggaaccttta actggcaaca gagattgttg ggccatcatg agatgctact ttctagctct
+    19081 atgaacataa aatcacgtaa gaaaaagcaa acaacacaaa tacatgaaag ccatttactc
+    19141 ctggtgaatt cctcagggtc ccaggttcaa cactttccgt gatgtcagag tactcagtca
+    19201 gggatgatgg ggacagggtg tcagaacagt cttgatggtc ttgccagcaa cagctttttc
+    19261 ttattttcca taatttggtc ttagtcgttc tccagttgtc ttcatgtaaa taaagtggcc
+    19321 catggcaatc atgattctgt aattgttata gtgcctttgt aagttgacag tttccaaatc
+    19381 cccttactca tacgacccct gtgaaggggg gtgtgaaggg gttggtgggc ttgtgcatat
+    19441 gagggaatgt gaacgatttc attatgaccg aattatgctt tactcaataa gcactcaaac
+    19501 actaccatct cacttgtagt agaagtgcta gggatgcaac caagagactg gttgaataac
+    19561 gggaaggtta aatgcatgag tattttaata gaaaaaaata ttaaaaacaa aaccaaaact
+    19621 cgtatggaaa gaggcttctg actaggccct cctttctagc agtgtttggc acctgataac
+    19681 aatttgacgc tgggggaggg agcagcttga caaggtccac atgaagcagc cttgagattt
+    19741 ttcgcccact tccatttctc ctcagcatca aaggaaagtg catggtaaga agtgaatcag
+    19801 acagcgaggg atctatgcag ttatcctgtg aatgtgtggc aaagccatca ggacgcaaac
+    19861 ctcctgtctt gagtccagag ctgtgtgctt taagatttga ccccagtaaa gacagaatcc
+    19921 ttcaggctcg tcccttccca aacctaaggg gctgtccttg gccactttca acttgaagct
+    19981 gttgagcagt ctttaggata tgtaacagcc tattttttgg acgatatagg aaccacgacc
+    20041 tcttaagtct cttagttctc ctactttata cccatgaggt agaggcaggt gcactccagt
+    20101 gaaggagaag cctcaattca ggttgtactt tggatgaaac atcccctcaa ggcgtagagg
+    20161 aaggctgaga ggttctgccc aggctcgtgt ttagttgcag gaagaggtaa tgtcagatct
+    20221 gagcaggtca gggagactcc ctcgaaaagg tcttcccagg ctgacaagag ggcttttcct
+    20281 cttcctcacc tcccttaggt gagggctgaa agcagatctc ctcacatggg gacctagcta
+    20341 ggaggtcttt tctgagtggg tcctctgatg cctaattaag tgatatcctc tgctgaagct
+    20401 ttttccacag gtagggcacg tgaatggttt ctccccagtg tgtgttctct gatgcctgac
+    20461 gaggtggtcc ctgcgactga agctcttccc acattcgttg catcggcagg gcctcactga
+    20521 ggcgtgtcct ctcaagtgct tggcgaacgc tgcgctgtgg gtgaagcagc gcccgcactc
+    20581 gctgcagagg tagagttcct cagcagcgtg cgtcttccgg tgcgccaggt accgcctgtg
+    20641 ttcactgaag tcctcaccac actctccaca caagtagggt ttgcctccca ggtggattct
+    20701 ttggtgtgtt gttaacacag atttctggct gaagcttttg ccacaaatag tacaaaagaa
+    20761 gggtttttct ccagtatgtg tcctctgatg tctgacaagg tctgaagtcc agcggaagtg
+    20821 ctttccgcaa tcatcacatc tatagggttt ctccactgat ggagttctct cagcctgtgt
+    20881 cacaggggag gtctcttcca aattcttccg gtttagggga tatttgtaag gcgcgttcat
+    20941 cttgtgaacc ttttggtgcc tggcaagatg tgagcttcgt gtgtaacttt ttccacattc
+    21001 catacattta tagggtttct ctcctgtgtg agtcctcagg tgtctaacaa ggtgggagtt
+    21061 acaagtgaag ctctttccac acacagaaca gtcatgatgc ctccctaaca gggggtggac
+    21121 gggtgtagtt tcccgaaggt tcacaaaact attcacttga gaaatattag taccaaatct
+    21181 tctttcatta agtctacctg gattgtcctc aaagactatt tcccaatctg gagtctggtg
+    21241 aatttctggt tctcccaaaa caggcctgtg tatatcctcc aaactcagat cttcctgctc
+    21301 cagacactct tcctcatctt tactcctatc tcctgtagaa agggtgagag gaaaaagggg
+    21361 gtcactgtag cggcaacaat gccttctaca caatggggag atttttacaa agggttttga
+    21421 gatggcaaga ctaaatagaa tgcctgtcct gtgccaagca ctttctcata ttgttatgct
+    21481 atcaacaaac cttggtatgc acaaatataa tcttcatttt acaaatgcag agattgagaa
+    21541 tcgagggttt aagggaattg tacagtaaat ggtatagctg caattccaaa agttaacatt
+    21601 tttctttcac ccttttgtat tacattgcta tcagacaaat gatgggagga gaaaagagga
+    21661 gggcaatgac agaagaaatg gctgtcagag ggagaaagca gtactggctg acatgttaga
+    21721 agaactgatg agagcggggc cctacagaga gagagaagca ctgcctagag gaactgctgg
+    21781 ctcagggtgg gtaacaccgt ttttgtcatt cctcacctgt gtaggtaaaa ctcaggattt
+    21841 ctggctcttg agtctcctga ggctcttgga tatctgggac ccaaggctct tcctctctaa
+    21901 cctgggagat ctcatcaggt ctggggattg gaaatgctgc tcaagagagg gaaaatagga
+    21961 tatcacgatt ggctcaacaa ttccctgtgt taagagcggt ccttttctat ttggtaggtt
+    22021 gtggggcaaa tacatagcta gctcaggtga tgaaatcttt catctcttta gtttgtgtac
+    22081 ttttaaccaa ggactgcgta tctcttgcct ttcttgtggt tttcacctgc aacttaataa
+    22141 ttataccatt gtacacctta tcctcttttc ccatattcaa gaaagattat ctccaactct
+    22201 tactgtttta aaactcatgt gcttacttca aatatttctg cactaaagct gtcatattac
+    22261 aagtttgttt tcctattaga ggtttccatt tccatccatt tatcttttac ttgaaaggac
+    22321 accatatttt tcacctaatc cctcaatttt attggtgggg aaggcataag agaagttcac
+    22381 tgcatacaat ctaaggtcta agatccccca aaatttccag caggaaaaag atctctgggc
+    22441 ttagaacact ctagaatact tgaaattcct tacacagaga gacaacaatt ccacagtctt
+    22501 cttccaagac atattctcca tagaactctt tctgtgttgg gtccagatca ctccactggt
+    22561 cctgggaaaa gcatacggcc acatccttga acgttaccag tccctgaaac cacataagga
+    22621 tttcatcaaa acgtgatgct acgggtagcc tcgtattttg tccctgttga aggtcataca
+    22681 ataggtggac tcctctagga gatgaaagga gcttgagtgg cgggctgctt aaactcattc
+    22741 tcgggggagc catgtgtccc ctgtggcact ggccctgccc cctccaactc caccactttt
+    22801 cctgaggacc agcttatgtt cttcttcttt aaggtattgt ttaaaattta ggaaaggtct
+    22861 taaaaatcat cattcagctc cctacttgac actttaacgc tatatacatt tctgcaaaat
+    22921 tcatcttttc tagtacactt tccctaaaga ctctaatcac tttcaatagt ttctatactc
+    22981 tatagccaaa acaagacttt tttcctcagt tagattacaa tctaaaacca tacatgaatt
+    23041 ctatttcttt ttctccagcc acatggctta gtttcagagc tttgatcatg gtgaatactt
+    23101 cctaaatgtc tcagcagtac agtctgtcat acttacaaat gactatcaaa tttattcctt
+    23161 taaaaaaaaa aaaaactaaa aaaccctcat ttgacaactt cgcagttgta ctggagtgtt
+    23221 acaaaataat cccattttgt tagttcttct agtcaatggc aaataaactg gtgttggtgc
+    23281 ctgtggatcc aatataaggt atgataattt atctcattat ctcctcccct tccaggtacc
+    23341 tgcagaactg tgagcttaaa actggcaaga gtgtaaatgt gcatcctgct tcacctgttt
+    23401 agtatggctt atgcccagtg gttcttcaaa caaattctct gtggtacaga ggtaactagg
+    23461 gcacacctgt gacagagcag taagaagagc aaccatctct gagtctccag agctcctctc
+    23521 tgcaggaagg tctgggtcct cgggcactgg gacctctatg aagaaaagaa ggcaaagatc
+    23581 agtaatatgc agcatgcccc aataatgggc aaaaatctgg gcatccccca ccatctttgg
+    23641 taggaaggtg gccctatcct ttctattatg ggaaccaaga aactggccta agggtagctc
+    23701 ccatatccca atcccatcac ctatgagact atcagaatgc caagagaaca agcaacaatc
+    23761 tgctgtctaa gcttggccct gtttatgggg gactctagct ggcggtactg tctcctcttg
+    23821 acatcctgga ttctgtgatg tctttccata cagtctatct acccagcttc cttggtatag
+    23881 gagaaatgtc cgagaaatgt ccccctcctt gcctgccttc tgccacaatc ccccctttct
+    23941 gctgatgctt cccaccgctc tcctgcaggg tctggagctc ctcttcctgg tgtggacgct
+    24001 gctctgccgg tgcccccaga tctgggctct gtgtggtttc ctcaggagac tgctcggggg
+    24061 tcgagctttg cacaggatcc tgcagctcat taggtgactc aggctccact cctaaatgca
+    24121 ccgtctcctc tgacaggact tcctggccgt gaacatggac agtcacctgg gaataattcc
+    24181 aacttccagt caccatggag tcaggaggaa gttcttggtt gtcacgggag cttattttag
+    24241 gagaagatgc ctagagaaaa cactcccaga gggacccaga caaggagggg aaactccagt
+    24301 ttcaatggag caggcaagga ggagctgaag gttatcctcc ccaggacaag gcaggggatg
+    24361 cgcaggcctg cactgctctc tcaccacccc cagcctgggc ccctgtcacc caaggttcct
+    24421 tcccatctcc acctgtccca cctgcagctc tccttgctgc ctgcttccct agctgaccag
+    24481 ccaggaaggg ggatctgtga ctggggatct atgagaacct tcgaggaagt cacaaaattc
+    24541 tcctctccag gaacccacgg ctcaacattc aaagaaggga aaaccaaagc catgaccctt
+    24601 tggtggctga gacccctcag atggtcccta tacaggtgtt ccaatatcca gaaagtcaca
+    24661 ttcatacaca cacatccccc taatccatgg ggctcatggt atatgagcaa cccaagggca
+    24721 gagcccacta ataatttatg gggatcctgc aagctctccc cgcaaatcca gccttccagc
+    24781 aaatagtccc cctccacatc acacagatca ggactccccc tcctcaccca ccgccttggt
+    24841 ctcctgggtt gtttctgcaa accctccacc agcgtcactg cctcctcgcc actttctggc
+    24901 cgttggcccc gcacccagct ctgtagttct ccaggtagga cggtaagaaa ttgttccagc
+    24961 acaagcagct ctaggatctg ctcctttgtc cgcctctctg gtctcagcca ctggtgacaa
+    25021 agttctcgga gtctgatgag agcttctcta gggcttgctg cctcctggta gcggaagcgt
+    25081 cggaagttct ggtgggaggt ttccagcacc gggtcatccc tctgtaagac agactctggc
+    25141 cgacaggtga aatcatcttc cagtttcacc atcagaattc cctcttcttc ccaaagatcc
+    25201 tggtcctctg gttccacggc tgtagccatt tcttggcttt ggggtggtct cacaccatct
+    25261 agagctcaca ctgtcattag ccccccgcct cagcctggac acagcgagga ttttcttcca
+    25321 agggccctgg atagagaagt aaagacaaac aagagtataa gcatgaaact atacacaagg
+    25381 acaacagcct gagttcaatc ataatcctct acacaaactt cttcaaatgg ttaaatcctc
+    25441 ttttccccaa ccaccatagc cataaatcaa cagatccagt tgggagagga taatttacaa
+    25501 ctaaaggtat ggtcctctac tccctttcct aggcaaggtt catactctgc aagaaaatac
+    25561 tgcatttcca cttcacaaat actgccaatt cccccgctta agtctccaaa ccggttccct
+    25621 tctcttcctt cctacagact gctttagttt gagccttcat catctcctgc ctggactact
+    25681 gctctaactt cctaagatcc ctgcagtact caactataag caaagtacaa cagaattacc
+    25741 tgtggagatt ttgcaaaaca tagtatcaca catcattttt cagacactct gattcagtca
+    25801 attcaggtaa ctgtatgttt agagatgcta cttgcagttc gaatgtgtat ccatggttga
+    25861 gaaacaccaa ccatgaggga gatgtgtaag gttatgagcg aaagttctgg agtcaagctg
+    25921 cctgggtttg aatcctggct ctgtcgctta ctaggtattt ccagagtggg accataggca
+    25981 agttctcatt ctcctaatgc cagtttctct gtctatagaa gggagataat aatggttttt
+    26041 atctaatagg cttactgtaa ggattaaatg tattactaaa gcctatgagc attaaatgta
+    26101 ttactatctt tgcttagaag agtgatggca ccagctatta tctttctaaa ttatagttcc
+    26161 aactatatta ctcagtattt taaaacaaaa cctttttcat gttagtaaaa tcagttccaa
+    26221 aatcctggga atgacacgaa gctctttaca atcagccccc aatctatttt tcaagcttca
+    26281 gatcctacct tttccctgta tcacctggtt ataaaaaatg cattgctgat ctccctacaa
+    26341 cagggtgtct tttcagtctt ccatgtcttt ttgaaggttg ttccttctgc acaggctact
+    26401 ctccaccctt ctcctttcca cgctacatgg tgtgttctct ttctttttaa cgggtcttgt
+    26461 ctttattccc acaatttaca gcttaggtcc ctctcacttc tcatctggat gactgcaact
+    26521 gcctccaaat gtgagctatc tgcctccaga tttgtcacct tcacgtaccc actatgatca
+    26581 gaacgagctc tcttggcaaa cgtgctcatg tccctttcct atctaagatc cttccatcac
+    26641 tccccgctgt cttgttaagt gaaatacaaa cttcaaacat tctacattag gttgacgttc
+    26701 atcttgttat tctcttcgcc ctcctctcca atcctttcag acacacacgc acacacgtgt
+    26761 gtacatgcaa agcacacaag agtcccgtgt tctttagctt ttgaatatgc tgtttcttct
+    26821 gattctttat ccttggttag gttaaacttt aaatttttca agacataagc tcaagcttca
+    26881 tgtcttctga gatgttttct ttgatcccat ttcccctact ctcagacata gttagaaatc
+    26941 acctaagtcc tcccgtagtg acctgggcac ctctctatcc cagtattggt catattacat
+    27001 agccatcgct catctttcct gtgaaactct ttgaggtcag agactatttt ccatcaatct
+    27061 ccaataccta taataaagtg cacaataaat gcatttaaaa tgaataaaac tctactccaa
+    27121 agcttcttcc tatggtaaca ggaagaacca ttgtttcctc cattatgctt ttactatctc
+    27181 ttgattattc accatatcct gttaattcca acttccaaaa aatgtctttc gaatccatcc
+    27241 atctcttcat ctctactgcc atttcccggt tgagccatta tgacccacta gaaggtattc
+    27301 ataggaactt taaaaaaaat tacaatatcc tctgctccta aacggcactg gcacgtaaca
+    27361 ggcacggata cattggagag atgacttagt tcaacctcca tgactggttt tcttggtcct
+    27421 gtccccctcc agaccagtgt ccacaagcag cttaagggct ccatcatttc tcaacctttt
+    27481 tgtgctatac tacagttgcc tcacatccac attcaaggtt atcgatccac tcttaatagc
+    27541 tgtgtctcat ctattgttgc ccatcaaatg agtttacagt caatgatcac attatcttca
+    27601 ttctttctca aaacgcttct aattcatgct cttatgacat ttttttcatt accctcagtc
+    27661 acttgcaaca tactttcaca gtctatcttg gatagctccc taattcctag atcttcatgt
+    27721 gtaatttttc ctctgctatc tctacctcat ggtgatttat ttttttctca agtggcttgt
+    27781 aattttttta ttttatgtcc atctttttac tttgggagtc ctgagagccc tggatttaaa
+    27841 tgtgagcccc ttgaatgtaa tttcatttgt ttcttcctaa gtgctaatgg tttcaatgat
+    27901 cttggacagt ttgctttttt aagactctcc ctctgttgcc caggctggag tgcagtagta
+    27961 caatcatagc tcactacagc ctggatctcc tggcctcaag tgattttcca accttggcct
+    28021 cccaaagcac tgttaccaga ggctgagcca ccatgcctgg ccaaatcagt ttttatgtta
+    28081 atttctcagg gtgttcctga ataatgcttt tgtgtattat ttagatcgat gccttcaagt
+    28141 aatgtttggc ctacccaagg ttctggagaa cccacttcct tgcacttttc ctgggtcagt
+    28201 aggacgattt tcaaaggtcc aggtttgcag gctgcttaat tctgtaactt tcacacagcg
+    28261 tcagcttccc aagtcctatc tccctgttgg agtaccagta ttaaaagctc aatccctaga
+    28321 aaccttattt tggtttggaa tctcactgcg ccatggtaaa gtcagcttat acttattttt
+    28381 ctagtggtta tttctttttc cttctggaac ctatgcgctt tcctttagct tttgagagag
+    28441 agagacagag agagagagag agacagacac acagggagag acagacacac agggagagac
+    28501 agcatgggct gtcctgttcc tgacaggaac aggatttaat aatgggcagg aggcccatcc
+    28561 acattagctc cacctgcaat actccaagta ggtgagtggt gaacttccta acatacaaaa
+    28621 tctgccaatg gcttcccagt gaccttaggt taaaatttaa acttcttaaa atgtgataca
+    28681 caaagacctt caaaagctgg tccctgctac ctctccttcc ttccatgact gccactccca
+    28741 ttgaaagcca ctgcaagaat tactgttctt tcttttctat accctccctg ggctttcctg
+    28801 atgctgttca ccttgtctga agtacccttc tccactcttc atgtggttaa ctctatccat
+    28861 ctttcagagc tcagtttttc caagatgcct tccctggtcc tcccagatga tgcttccaca
+    28921 ggaacttgta tctcccccta tcacagcatg cacatgtgtc tattttttgt gtcatcaaaa
+    28981 ctgttcatct tctgttgaac tcactggact gtaaagctct gagggcaggg atatgctgtg
+    29041 cccagctcga tgcctggcac acaccaggca ctcacttcac atttgatgaa tgcagagaat
+    29101 gcttagtaag gttttaaaag tctgtttcca gaaggataga taccatgtta tattaaccca
+    29161 tattcatcca taagtctagc aaactgtgtg ctggataaag gcatacatag ggaaaaaaac
+    29221 atttattgtt tggattgtga gtaaaacaga attctcttag tggttcccaa gatgatggaa
+    29281 ccacaactca tcacaaggaa tcttcaggtt gcaccgggaa taatccaaat gcaagtcaca
+    29341 caacacttgg catttgaaaa aaaaaaaatc attttccatt gaggatttaa atcaatttta
+    29401 ataagcgcaa aatcagacat caaataaatg gaactacaat actaattatt tatccagaca
+    29461 aagctttcca aaccccaccc caactcctac ccctggaaga cagaattaat acatagggtc
+    29521 ttttggctta ataaaaggac ttctagtatc ttaaaagttt gagaaccgac gtccagtgaa
+    29581 gtagacttgg aatcctcatt gttttcacgc agcactaggt acaagagatt ccacagtatg
+    29641 gatcctacaa cgtatgcatt gacagaagag ttcctgctct ctcaaagggc tttgctctcc
+    29701 aaagaaaaaa gaacatgtaa cagtgccaca ccaaacttgc catttaaaaa tgcccttcag
+    29761 aaatcacaag ctgagatttc tggtgcttgt tttgcattta ctttggcagc ctaggaagat
+    29821 gaaggaaaac agatcagcaa tagaaggaat gaaggaagga gtctttgtgg ggtaagagat
+    29881 gggaaatgaa taggaggtac agcagtgctg tgcacagcac caaactgggt atccaaagac
+    29941 atatgccccg gccccagatc cccaagaaac tggagcttcc ctgagggttt cgtgaagtaa
+    30001 cgtacccaag agcccttgta aaggataaag ccctctgcag ataaccagtc tcctaactca
+    30061 gtgattatct gagatatgag agctttatga actaaaatgt gttgtgcaaa tgtcaagatt
+    30121 taatattaga ggcagaaaac agatgttgtg tttttattct ataattaatg ggatgtgcta
+    30181 tgccataagt ataccttgca atttgttgcc tttgctattc taataccttt cttcaatgtc
+    30241 tgtccttgga agagttccag agtaagacaa aaactataga gagaatacaa tagagagaat
+    30301 atgaaaccag gccaggatta agtaagagac ttggttccag aagacactaa aacagtatct
+    30361 gcaaagatac aacctagtgt cttgagtgtt cctaggatgc ataggattgg agaggagcta
+    30421 tcaaggtaag acctctgtac aatcctcagg ccagctgtct ctgcccctta tctgaaggtg
+    30481 tcacgtggca gtgaagaccc aagctcaatt ctcttacaag aacagaaaac cacaggcaat
+    30541 gaaacaatat ctagttctct accttaaaac gctactagct agggtgatct ttctggaccc
+    30601 taaatgacaa agtctgtagt catataccat caacctaaag gagagacagt gatctgccta
+    30661 tgtactggca aataatcttc agaagaaaat agtgattaga actgaaatta ttatatttag
+    30721 cagatattta tatcccttcc agtccccatt cttattttat accttaatgc aggcctatag
+    30781 tttggtaatt ttaatacaac tacttgctgg ggctaaccag ggcctgtcaa cagctcctct
+    30841 cagtgttcta gaatagaaat tggctaataa aaactgttaa ccgctctttt ccaatgctgc
+    30901 tgctgttctt taaaacgaaa gattagccca gtacaatcct tctaatggac atattctttt
+    30961 attcttgatt gtcctatata caagcgctta cctaagcttt ccacataatg gttagtggcc
+    31021 ttggcacgct gggtttgaat ccctgacctc gaatccctga cctcgcttat tagccttgta
+    31081 acacaatttc cctaagcctc agtcttatcc tcaaaaatgt ggctaatact acttatctca
+    31141 tgtggttgtt ttaaggagta aatgaaataa cctatagaaa cccttcagta tagagtctgg
+    31201 catagggtag gtgctcaata tattttattc attttccttc aagctatttg aaatcgtttt
+    31261 gggaagaagg caagatttaa atgataaaca agcaaacagt tctctattgc atcataatta
+    31321 caaaaaaaca gatggcataa ggaaaatctt ggtcggtcta ttgttatatc aaagaaacca
+    31381 tgctaaacca aaaaaaaaat ccttgctttg gaggcactta tgaagtagga ttaaatgaaa
+    31441 gtgttggaat acagatgaga aggaacctta gacacaggtt ctcccaccga agaccatatc
+    31501 agcaatgctt ctaattacca atatatcaaa gtcctctcca ctccctgtaa ctggcaggag
+    31561 acaatctgag gcaaactcag tgtaccaaga agtcctgtga aaacttaacc tagacagcac
+    31621 aatgaatgaa agggaatgaa ttccctggct tttggccttc taacacgtga ccagtatgct
+    31681 tcaaaacatg tttaaaactg tttctgtaaa atatttttgc cacctaagtt tcccccccgc
+    31741 cccttcataa aagtgatatt tgagtagtga ggggagaaat ggctcaaaat ttgttacaag
+    31801 aggttccaaa gaaagacaat aaggtataga gaaagggaag acaggaagaa agagaaagag
+    31861 gtcaagtatg tagtggatag tacagggaag tcacaagtat cttctgtctg gaaatgctgc
+    31921 agattttgtg gttagcctca agatattagg tgggaataaa catgtcattg agagtaaatg
+    31981 taaacacaaa aatgaaaaat aaaaattgta acaattcact ttattctaaa ctgtttttga
+    32041 tggttatata tggcaggcct aggaggcaat gttagacagt agatagttgc cttcggcgga
+    32101 gggtgacctg tgttctgtcc actgctctac attgtaaaga ggtgtcatct tggacaagtc
+    32161 acggcagctc atgttgagtc ttgggatgat tctttatcta aaggtagaga actgaaccag
+    32221 ctttttgatg gtgatgagga ggggttttga gaggaacaag gatatacagc atctctattg
+    32281 aatatttggg ccataatgta acagcctagg atgctgagat ttactcaaga gctgaggacg
+    32341 gctaggggcc aaacacagat ttttaagttt tgagataggg tttactctgt tgctaagact
+    32401 ggagtgcagt ggcaagatca caactcactg caatctctgc ctcccaggtt caggccatcc
+    32461 tcccacccca gcctcctaag tatctgggac tacagcacat gccaccatgc ccagctaatg
+    32521 tttaaatttt ttatagagaa ggggtcttgc catgttgccc aggctggtct caaatacctg
+    32581 ggctcaagtg atctgccagc tttggcctcc caaagtgtta ggattacagg tgtgagccac
+    32641 tgcacccagc ctcaaataca gatttttaag aggccagatt tatacacctg gccatcatgg
+    32701 tcatgttggt taacctccat catgaagcta ggctttttaa ccttcaggaa aactacccca
+    32761 gagcagttaa tggcctaaat aaaggaggtc aagctatatg tctcagttta aaacagaaca
+    32821 tgacaaatta acctgaagca ggaaaaggag gggacatcaa acctgacacc catcctgata
+    32881 agagtaactg ctctcatttg acagccagag ttaaactaga tgtatgcaat ctctacaaca
+    32941 aacatagtca atgccaatca gatagtcaga ggaaccatca tttaatggga aataacgcaa
+    33001 taacatatat aacagagctg accggaggtg ttgctataga taattcataa cagcagtaaa
+    33061 ttcaaaagtc atttactatg tttttagctt tgtcattaac acagcctgtg tggatactcc
+    33121 ctgtttcacg tctagatgac ttcctcctgg ctttgctcaa gggtccttta gcatttgagg
+    33181 agatgataag ggatgggatc cccgcctgtt gaaccagagg aatctgagga gtaccaggtt
+    33241 tccactgaag actaaatcat gatccagaga aggaatctca gaagtcattc ttgtcgtggg
+    33301 catggcttaa cgtctgtctc cccactgtgg gccctgccct cgagagcaag ggtctcctta
+    33361 ttcattttta tattcccagt gcctaccact ttgtggagcc cttagtagaa aaggtattca
+    33421 ataaacgcac attgagttta agctttctat ttctcctccc ccaactagta tatattacag
+    33481 aagcactgaa aattcagaaa tcagccagtg tcagggctcc tgggtaataa accattcaca
+    33541 gggatagctg taggaatttg gcataagcat ttattccaat ctcttggggg taatcaagtg
+    33601 gcataatcat gccctgcttt actgtgtagc ataaggccag tagagaaaaa catcaagagg
+    33661 ggaggaggac aagggatgga gttcaataca caaatatcta ctgagcgtca attacgtgtc
+    33721 ccacactatt caaggtgctg ggcaaagaaa gataaaaaaa gcagaagctc tgaccttgag
+    33781 gaacttaaca ggtagttaag aaattgcaga attttagcaa tactaggaat tcacagcaaa
+    33841 agcccagaat acaggtgtaa tattttgatc aaaacgtgaa gcccctatca agagaattaa
+    33901 gaaaatcaag gggaagaact tatcaatatt tcacctcaaa tttggcatat agcattcagt
+    33961 tgttttaagt acgctttctt aattttgcga acaaaatatt cagacctaag tgattatcta
+    34021 taaaagatca gattactagt ttccaacaca gaagtactca aacaaaggta ttctgaacct
+    34081 caacctactg caatacatat atcacatttg gccaaaaggt agttgttctc aaagtgaaga
+    34141 acccagatct gcagcatcag tacctgagcc ttgttagaag tacaaattct caagccccag
+    34201 cacagaccta ttaccagaaa ctctaggggt gggccccagc aatatgcgtt taactagccc
+    34261 ttcaggtgct tatagtaagt ttgagaacca ccacctagca aatgaagtaa atctgaagtt
+    34321 catgcagaaa gccattgctg gccaaactaa agggtttgct ggtgagagaa cagcaaaaac
+    34381 aaggggtata gaactgtttc acattttaaa tctgtgccaa atttactggg aaaaaaaaat
+    34441 gtaaacctgt aacttacctc atctgtacaa tttgcaatgg ctgagtccct cacattgtgg
+    34501 taaaccttgt tgtacccttg gaatgattgt ctactgcaat atagtatata cacagagtac
+    34561 aaaaagggta tagctggagg agaaagggtg aaagattatg aatcccaggc gtttggtcct
+    34621 aggaggaaaa attcctgtcc tctctccaac cctgcccacc ctcttatgct agatagtaac
+    34681 agcagaaagt tatcccagac tgctccgcgt tcacacgctt cccaccaaca aacctcctcc
+    34741 caaggctatc ccacaacctt tctgaagtag cttcctggat caagccccac ggggcagctt
+    34801 ggaagagaag ctgggaggga tgaggggctc cactagcatt ccttgtggct tctgagtgta
+    34861 gctgccctgg gtgtcacctg cagagcgtgg atagagaaaa agcaacagtt ctaagcccag
+    34921 cctctaattg agagcctggg atctagcgct gctccccagg accaaggttg ttcaagaaag
+    34981 gctgggtcta agagtctccc tctggggagg tcagtacaga ttccaaggca gacagggtta
+    35041 tcaaagcgcc tagaagatga gaacattcat tgttcagaaa aacaaaaatt agactgacac
+    35101 ctttctgaag gaacacagcc tatccatggc acaaaaacaa atttttaatc aagacactca
+    35161 agaagtaggt atactcctgc taaaagtaac gcaggtgctg agaagaggtg ccaaaactgg
+    35221 ctgttaggag tgggaggaga ggaaagttgc aagtggagac agcagctact ggcttgaggg
+    35281 gacctcaggg tctgtcacag tggcagctga aaagcctctg gccaaatgga cgccaaggac
+    35341 tactatcatc acaggtgaca agatcacggg tgagtgtatc tgggtgtgtg ctggggggct
+    35401 gaggggggct tggtggtgta aggagatggg gctagccggg gcaaggagcc cgcggagatg
+    35461 atcttaagca atctgagagg aggggtaaga aaaagctgcg gaaaataaag gggcctagac
+    35521 cactcctccc tcggttccgg ggggacagtt ggcgcggaag cgttaggagg tacgaggtac
+    35581 ggactaacta catttctgca ggccccagga gggggcggag ggcggtggct ggaaaggaaa
+    35641 ggaagccggg agcgccatcc gtgtccccgg agatactggc tccggccacc aggaaacaca
+    35701 gagaccacaa cgtctcggca ccctcagtac accgccgcca ccccttcccc aggacacggc
+    35761 gatttcccca gactctgggg ggcggggtct ccggtccctc cccctcctcc ccgccctgcc
+    35821 cggctctgcc cgatctggtc cggcccggtc cagcccggcc cggcccagcc cagcccactc
+    35881 aggactcacg gctccgacag ctccggcggc tcctcccctg ccccggcttc tctcccaccc
+    35941 actcccgacc gaaacagcgc cgccggatcc gagccgcgcg gggcctagcg ggtcggaaca
+    36001 cgtggggcct ttgcgcccgg aaccggaagc gatcgctcgc cccgccttct cgccgagtag
+    36061 gagcctttct aggctgtggg aggtttgatc ctctctatgc gccctccgcg ggttgcggct
+    36121 cagtgctccc caaggaccca gtgcgaagaa gcctcggacc ccacagctgc cgtccgcgac
+    36181 ccctaactcc actctgcagc gaataaatta acatcttggg cctttcattt acaggcatac
+    36241 cttctgcaag ggttttcttt ttcttttctt ttttcttttc tgtttttgat cttttgtgtg
+    36301 tttgttttca tttggatacc tctacgtgtt tttcttgatt tttttttctt tgcattgtcc
+    36361 gctgcaacat ggtagaaaag ggcagcgtta ggtttgaata ctggctcttg cagtcaccat
+    36421 cagcagcagg gcctcggccg aatctctcca tctttgaacc ctattgtgca tcatataaaa
+    36481 cgggcataat aataccgtta ctctgaggac taaatgatac attgcacatg aactgaatgg
+    36541 aacatcagaa gctcagaaag aaaatagtta tggtatcact gttctttctt ttcctttgag
+    36601 tgttgcttcg tatttcttta ctttcaggct gtcttccctg atctcatttg ttcatttttg
+    36661 tccgttcggt gacctttctg cccatacttg acattttcct ttgctctctg tcccttttac
+    36721 cctgtaagtc accactctcc acgtgccatg tgccacttcc ttttcatggg acacattgct
+    36781 gggatctgtt tactgggttt gaaccaaagg attagttact tttggtggag tggcagtgta
+    36841 ttagcgaaac agaactaata ggtgtgaacc cagaaggcag gacaactcaa agcggggagg
+    36901 gggcttccag gtcataggta gataagacac aaatggttgc attgagttgc tgattagcct
+    36961 ctccggaagg aggcaatcag atatgcattt atctcagtga gcagagaggt gactttgaat
+    37021 agaaggggag gcaggtttgc cctaagcagt tcccagcatg aattttccct ttagtgattt
+    37081 tgggggcccc aaatcacttt cctttcacac ggggtatgta taaacatata aaaggagatt
+    37141 tattataagg agttggctta cataattatg gacgttgaga agtcccacca tctgcttact
+    37201 gcaagctgga gaaataagga ggttggtggt ataattcagt ctaaagctga aggcctgaga
+    37261 actggaaagg gattgaggtg ctggtgtctg aaggccagag agcaggagga gatgatgtcc
+    37321 cagctcaaga agagagaggt gaatttactt ttcctctgcc tttttatact atttgggccc
+    37381 tcaatggatt ggatggtgcc tacgcatagt ggtgagagtg attttcttta ctcagtctac
+    37441 tgattcaaat gctaatctct ctctcttttt tttttttttt tttttttttg gacagagtct
+    37501 tgctctgtcg cccagcgatc tcggcttact gcaatgtctg cctcctgggt tcaagcaatt
+    37561 ctcctgcctc agcctcccga atagctggga ttacaggcat gtgccaccat gcccagctat
+    37621 tttttttttg tgtgtgtgta tttttagtag agagggggtt tcaccatatt ggccagaaac
+    37681 accctcacag acacaccgac taataatgtt ttctgttgtc tgggcatccc ttagctagtc
+    37741 aagttgacac ataaaattac ccaatacaaa gagaaatata catttcatga aagcctactg
+    37801 tgtaccaaat atttttctct gtaaatctca ttgaatattt taaaatattg gtattaagtg
+    37861 cataaataat acataaacat cttcttctaa acatttaatg cattacagtg ttaaaggaaa
+    37921 aattattcag tgatacttgt tagagcatgg taagggagac cttattcagg accaggaaag
+    37981 gtaaagaaac cactgcaatg aagtcttgct gtagggatga gagattgggc tgaactctga
+    38041 atacagaatg ggcaagtggg aatttatagt caaggaactt tttttggagt ttgggacgtc
+    38101 gatggattga aaattactaa ggggaaacat caggcttaag gcggattctg gctaagtcaa
+    38161 cctaatagga ttcttgctga agacaggcca ggttgatcag acatcacctg gggcgtgatg
+    38221 gaggatgagg acactgacca ggtattaagg atggtcagat atcaaggatg ggggcgttcc
+    38281 tgccaaagga cttagagttc tttgctaaca gtagatttta caaggaagta cgccgatagc
+    38341 cctaggagaa gtttcagaag cctgacaagt ttggcgaaga aaagagattt tgtcaatgga
+    38401 taaagctaat gacttttttt ttttttttgg gacggagtct cactctgcca ccaggctgga
+    38461 gtgcagtggc accatcccgg ctcactgcaa cctcgacctc ccgggttaaa gcgattctcc
+    38521 tgcctcagcc tcccgagtag ctgagactac aggcatgcac caccatgccc agctaatttt
+    38581 tgtattttta gtagagatgg ggtttcacca tgttggctag gatggtctca atctcttgac
+    38641 ctcccgatcc accagcctca gcctcccaca agtgctggga ttacaggcat gagccaccat
+    38701 gcctggccgc taatgacatt tttaccaacc ctggcttcct cctctctctc cagaagttag
+    38761 ctgttcttat gaccctaatg tatttatctt ccagatattt ttattttcat gtgctgtgta
+    38821 catatgtatc cacagaaagt acatagtatt attttctgtg tacttgtgtg tatacctgtg
+    38881 tgttcatgtt gaattcttcc atttgttttc tcttagcagt gggccttgga gatctgttga
+    38941 tagcggtaaa taaaaatctc ccttattcct tctgctttct gcataacatt ccatagtctg
+    39001 aatagcaatt tgcctgttga aaacataagt ttttataatt gttgggatta caaacaatgc
+    39061 tataatgatc accctctttg tgcgtacatg caatggtact ctgagacgga tactgagaaa
+    39121 gagtattgct gggtcaaagg ttgcacactt ttaatgaata ctgccaagtt ttcctcccaa
+    39181 atgaatgcac ctgtttacac ccttaacaac agtatacaag attttttccc tgtatccatg
+    39241 cccttacctc atgttgccgg aattgtaaat ctcagccagt ttaatgggca aaaataatat
+    39301 ctaattgttt tactttgcct tcccatcgtt aatggtgaga ctgaactttc tttcatacat
+    39361 ttgttggtaa tttgtagttt tcctgtgagt tttattttta tattggactt tttgtttttt
+    39421 taaattggtt gttagatgtt ctttatgtat cctagatcct aatgatttta cgtatatatt
+    39481 gcaagagata tattaatttt atgtgtcttg actaagttca tgaaagcctg ctgtgtataa
+    39541 gatgtttttc tctataaatc tcattgaata tttttaaata ttaatattaa attaatgttt
+    39601 tccttaactt tctagattat tgtctaactt tgtagtatta aagtttcaaa tctgatacac
+    39661 tctttccccc gcttcaaatt aggtgtttta tttaagaata tcttttctgc accaaagtca
+    39721 taaaggcttc tcctgtatgt tcttctacta ctttttgctt ttccttttta tggttagatt
+    39781 tggaatttat ttttgtgaat gatataaatt ggggattcta atctaatatc agtggtaatc
+    39841 aatttctact tggaaaacat ggagtatgag atatttacat aatagaggat atatttacat
+    39901 acaaaagata tattccatat gtgatgtgat cagaattctg tcagcgcagt tagctggtcc
+    39961 ttcatttgtc aatgattttt tgagtgactg ctcttacaag acactgtggt aggtatatgg
+    40021 aacagaatgg agaataagaa agacgtaata tctgttctcg atctaacttt caactggtgt
+    40081 gtgtgtgtat gatatgaagg agttggggaa gaagaaaaca gtttaaataa cttacgcaat
+    40141 tcactaactg tggttattta attatgatta attaaattaa ttataattat tgtaagttct
+    40201 ccagagaaat acagagtgct acaagaactt gttactggag gtcctaatcc tggtggtggt
+    40261 gctgttagag aggtagttag ggaaagcttt cttgaatagt gacatttgaa ctgagtctgt
+    40321 ggaatgcctg ggcaggatga gcctgggggt gaggttggga gatggggtat aaaccaaaaa
+    40381 gtgtctgaga cagacctcaa ttgatttaga ggtttatttt gccaaggttg aggatgcacc
+    40441 gtggaaaagt agacacaagt tgcagtagga tctgtggcct gtgctttttc caaagaggac
+    40501 ttcaatattt aaagcagaaa gagcaggcag gagaggaagg aggaaagaag aaaaagggga
+    40561 gggtaggcac ctagcaagaa gtcccattct tgtgaagctc tgaccagtgc tcagcaatcc
+    40621 acattttaca tctgaaaaga aagtagtggg gacaaagtca attatgcata gatctgcatt
+    40681 ttctatgagg taaagtaaat gtgaagtaaa gtaaatatgc ctagaaacaa aaggaatttc
+    40741 agaaacacat gtgtttagaa acaaaaagta tgtgtttaga aacaaaaaaa aaaggaaggc
+    40801 agtttttgcg tgatttagct cccaagcata gcttttccct ttggcatagt gagtttgggg
+    40861 tcccaagatt ttgttttcct tttactgagg tgagtaaaaa agaatacttg aagtaatggg
+    40921 aaatagtgta taccacagct ctgaaatggg aaggagcctt ggtgtactgg agaaactgag
+    40981 aagaggctgg tgtggctgcg gtctgacagc aatagaggaa tttgtgaggc ctgatattgt
+    41041 ggacagagat cagacctaat agactaatgg tcttataggc caaattaact gctgaaatta
+    41101 atgagaagct ggtcaggtat ggtggctcac acctataatc ccagctcttt gggaggttga
+    41161 ggtgggtgga acccttgagc ttaggagttc gagaccagcc taggaaacat ggtaaaaccc
+    41221 tgtctctaca aaaaaaatgc acacacacac tcacactcac ttagctaggt gtggtggcat
+    41281 gcatctgtca tcccagctac tcaggaggct gaggtgggag gatcacttga accccagagg
+    41341 tagaggttgc agtgagccga gatcgtgcca ctgcactcca gcctgggcaa cagagtgaga
+    41401 tgctgtctcc aaaaaataaa taaataaata aataaataaa taaataaata aataaaatta
+    41461 ataaaattaa tgggaagcca ttggagattt tttattaatt aagcaacatg attagatatg
+    41521 cacctttata agatctttct ggtcaccgag gagaatggag gggaccatga gtggatgagg
+    41581 agagataact taggaagcta ttactgtaga tcaggtactt ggactaaagc aataatagta
+    41641 gatatggagg gcattgggtt atttctttaa aactatttga tgcagagcaa agaactaatt
+    41701 gttagaaaaa tggttattgt tacaaaaatt gttaggaaaa taggatagag agaggttatt
+    41761 ttccaaaagt tgaagaaatg gcagggataa ttagtaatag tgaagtgagc tcagctgaaa
+    41821 gtgtgcatag gagttccact ctcctagtta ttcaatccaa gtctggctga acaccaagta
+    41881 tagaaagtga atcgttctgg aagagctatt tgtacatgac atttttataa ttagagtcat
+    41941 ttagacttgc aagaagaatt tgtaattcta agtgacctgg tggtgagcca cttggtcagg
+    42001 cactagatta gatacctgga taagtatata attatctgca ttcaaatgtg tctatattat
+    42061 actttatatt ttctcatatt tgatttttac actctgatcc ccaagttagc tggtcatttg
+    42121 tgaataccat ggagtcataa ggtcctgata aaaacagagg tggctttaga gatatacctt
+    42181 aaggaagaca gcaggagatc tagtctgaga cacgttgaga aaaatggtct gtagctggat
+    42241 aagcttggaa aagaccatat actctatcct ccttttggaa atttataatg catgtgaaga
+    42301 tccctagaaa tgaaaaacaa agtcgagttt attacttgac atgtaaggga gagctatatc
+    42361 attcaatgtg atgggagtga gtctttctga gttaaaagag gaaagatgaa atctaggata
+    42421 agaactggaa gttttgcttc aataaggcag gatcttgaag actatcactg attggttaag
+    42481 tagaggattg taaagtaaag tattattttg gaatgacaat tgctctggtt ttcaataatt
+    42541 atgggacagg tctttttttt cttccaactt ttattttggg tcgagggagt acacgtgcag
+    42601 gtttgttata ggggtaaatt gcttgtcagt gaggtttgat gcacaaatga tcccatcacc
+    42661 caggtagtga gcacagtacc cagtaggtag ttttttgatc ctcactcttc tccctccctc
+    42721 caccctcaag taggccccag tgtctgttgt ctccatggtg tccgtgtgta ctcaatgttt
+    42781 agctcccact tataagtgag aacatgtggt atcttgtttt ctgttcctgc gttaatttgc
+    42841 ttacaataat ggcttccagc tccatccatt ttgctgcaga agacatgatg tcattctttt
+    42901 ttgtggctgc atagtattcc atggtatata tgtaccacat tttctttatc cagtccacca
+    42961 ttgaggggta tttaggttga ttccatgtct ttgctattgt ggaatagtgc tgcaatgaac
+    43021 atatgtgtgc atgtgtcttt atggtagaag gatttatatt cttttgggtt taaacccagt
+    43081 aatgggattg ctgggtcaaa tagcaattct gttttacatt caagaaatct ccaaattgct
+    43141 ttccacagta gctgaactaa tttacattcc caccagcagt gtatattcat tcccctttct
+    43201 ctgcaacctt gccggcatct gttatttctc ttttactttt taataatagc cattctgact
+    43261 ggtatgagat ggtatcccgt tgtagttttg atttgtattt ctttaatgat tagtgatgtt
+    43321 gggcattttt ttccttgtat ttgttggcca catgtatgtc ttcttttgag aagtgtctgt
+    43381 tcatgtcctt tgcccatttt ttaatggaat tgtttgtttt ttgcttgttg aattgtttaa
+    43441 gttccttata gattctggat attagacttt cgtcaggtgc ataattttca aatgttttct
+    43501 cccattctgt aggttgtctg tgtaccctgt tgatagtttc ttttgctgtg cagaagctcc
+    43561 ttagtttaat taggtcccac ttgtcaattt ttgtttttgc tgcagttgcc tttggagtct
+    43621 tcatcatgaa atctttgcca ggtcctatgt ccagaatgct atttcctatg ttttcttcta
+    43681 gggtgtttta tagttttagg ttttatattt aaatctttac tccatcttga gctgattttt
+    43741 gtatatagtg aaagggaggg atccagtttc aatcttctgc atatggctag ccaattgtcc
+    43801 cagcatcatt tattgaatag ggagtccttt ccccattgct tgtttttgtc aactttgttg
+    43861 aagatcagat ggttgcaggt gtgtggcttt atttctgggt tttctaacct gttccattgg
+    43921 tctctgtgtc tgtttctgta tgggcacaag tcttaatgaa ttccaagata ggatgaataa
+    43981 agctatgctg ggttcaaggt catataggag gtactttcat acacatatgc ttgagatttg
+    44041 gaaatttttt tttctaattt agtgttatgg attcttagaa gttcaatagt gaaggaacct
+    44101 acttaatctt ttctaactct gcattccccc cacccagttt gaattctgag tcctgtttat
+    44161 gaatatcctg tggcatcatg aaaacactgc tatactgaag tgtgtgtatt ttcaaaaaca
+    44221 aattaactct aaagttcatt gttaaagatt tgtttcaggt ctatccaggt gctcaacttt
+    44281 gtcattaaag ttattgatac tagaaaaata atatccaggg atctgggtag gttgttcgtg
+    44341 atattgtttg aatggatctt tgtgtaattc cttactcttc tggagaaaag tagaatagga
+    44401 aagacatttt ggctttagag acctgagaaa cagtagggta caatgtgagg tctcaagttc
+    44461 tgtcttgtct ctgtggccac ggctcctttt ctttaaaaca gcactctacc ttctctggac
+    44521 actggaggct gcccacaatc tccttcatgt ctttgcctcc cctgtaggtg aaggaaagtc
+    44581 agattttagg catggttccc tgaaagtctt cttgtaattc atatcattct tgatgttcag
+    44641 gaggcttcac cgttggatag acttctgtaa ttctaggatt tccaactccg agaactaata
+    44701 cccctgcaga ctaataccca cacacacaca aatgcacaca cacacacaca caaacacaca
+    44761 cgttttttcc cggtagaatc agtaccagga acatagcgtg gtacatgata tcaatatatc
+    44821 agtaagacat aaacatgcat gaataaatgt tgtctatttg aagtttcatg aattgaagcc
+    44881 tggtggtatt ctcatacctg ttcttccccc cttcctcctt ttcttggatc tgccaaaatg
+    44941 tttttatcct accctctgtc tgccattaga catgaaaccc ttaaggtcat aggctaggcc
+    45001 cagagaaact aatatttatg agatttgaag gcaaattttt aaaaactttt ttaaaataat
+    45061 aatataaccc agatgttcca acacctctag atttaggtta ccttgagtac ttgcccaagc
+    45121 ccctgcaaat gtgaagtgct tcctaaaatt tctgcgtatt ctgtggccat ctctgtgaga
+    45181 ttctctatgg gctgtttcta atccaaatta tgtcattcca gacatgaaac gttattcctc
+    45241 cttgactcac acacaacgca cacatatatg aatgcatgtg acatgcatgt tctgttcata
+    45301 catgcatttg gtaatataga agactttact gaaggtttga gggggagaca gattaacatt
+    45361 gtagaaatgt gacttcaaac tgagcattag gcatccccat tcttaaactg ctttctaggt
+    45421 gccctgacgc tggggaattt ccactgactt cttcccacag actcctctgc tttgctccat
+    45481 tttggggttc cttttttctt cctttctctg tcattttctt ttttgagaaa tattaactat
+    45541 tacctttttt tcctcattca gggcactcta aaccattctt tgatgacaga gctcatgata
+    45601 atattgatgt ccttaggttc tctctcttcc tttatctcca aatgatctat ttctcctgca
+    45661 cagctccaag acagttccct ctctaagaga aaatcaggta cagcagttga gataataata
+    45721 tttttgcctc attacatttt gttttgaatt ttcaattcaa tattaaaagg aatgtgtgca
+    45781 gtgttgatga aaggagtcaa actctgcaaa atattggaag agatttattc tgagcccaat
+    45841 atgagtgacc atggcctgtg gcacagccct caggagatcc tgagaacatg tgcctaaggt
+    45901 ggttggggca cagcttggat ttatacactt tagagagaca tgagacatca gtcagataca
+    45961 tttaaggtat acattggttc tgtccagaaa ggcaggacaa cttgaagtgg ggagtggggg
+    46021 gcttccaggt tataggtaga tttaaaaatg ttctgattgg caattggttg aaagagttac
+    46081 tatcaataga aaggaatgtc tgggttatga taaggggttg tggagaccaa agttttatca
+    46141 tgcaaatgaa gcttccaggt agtaagcttc agagagaata gattgtacat ttgtaaatgt
+    46201 ttcttatcac acttcaggtc tgtgttgatg ttaaatgctg gttggctttt cctgaattcc
+    46261 aaaagggagg agggcataat gaggtgtgtc tgaccccttc ttcccgtcat gacccaaacc
+    46321 agtctttcag gttaactttg gagtgccctg gccaagagga ggagtccatt cagatggttg
+    46381 agaggccctt agaatttggt ttacagcagt aatttttttt tttagtcata gttttaatgg
+    46441 tcttttaatg ctagtttttc tttactgttg ggaagagaca tatctaatac tcattatcac
+    46501 aaactgaatc acctgcatca tcattcccta aaaagatatc ttgcctgaag acaagggtca
+    46561 gatcagtagg tttgtagcct actttgaggg actttgcaca ctaaccctga aaccagccca
+    46621 attgttccac acaaaagata tttatggatt tttaaatgaa cacagaaatt gacactctgt
+    46681 cttaaaacct gaaacttaca tttgtcttaa ctgagttcct tcctcaggaa actgaccctc
+    46741 aggcaagaga ctgaaaccca ctagatcacc aatccggaca acaagatgcc aggcccctca
+    46801 ttcatcatga tggcttcctt acccctccct aattcctatt ttctcacctt accagctctt
+    46861 cttccttacc cctctctaac tcctattctc cctcccttcc actatatata aacctcccaa
+    46921 atttagttgg ttggggagat ggatttgaga ctctacctcc catttttctt ggctgcagca
+    46981 ttcagttaaa gccttcttcc ctggcaatgc tcagtgactc agtgattgat gttctgtgca
+    47041 gcaaacacca ggacctgact gaaccctggg ggttttggtg acaatcctac atgtgagccc
+    47101 tgaacttcat gtaaccttgg ggctgcattt tccagtagga aagtgtttct agatatattg
+    47161 gctctgagac ttttcctagc ttcctaggta catggtctac ttatattgga gcccagaaac
+    47221 tgctgtaatc catgaactgc aaatggaaat tttttacaag atgttctcaa gttttggtat
+    47281 tatctgagga tgaacacctc taagtttatc taaaaatttg tattatttca agatggaagt
+    47341 ccccttattg agatattatc aaacaaaaag ctgattcatt tcaaatattt tggaaggacc
+    47401 attagaatta ttagaagtct gtagcgtgct tctctcatga taaagatgat aggaactaag
+    47461 aaaatgagat gtggaatgtt ttatgttttt gacaagaaga tgaggcaaag agggtggtca
+    47521 taaaaaaaaa gatggtggtc ataaataaag agggagacag gattccttat acatttttaa
+    47581 agttgttgac atttgtctct gaccttagtg gtttctactt tcagtttgca gttgacaagt
+    47641 ccagagttct acttcatgca tctgaggata tactgggact tccttgttga agaattacat
+    47701 tgtttcttct caagaaaaac tcctagttca tttacatgta acaagttctc atgggagacc
+    47761 ttaaccatca tggtttgctt ttgcatcttt atcttactat agcattgaat cagaacttct
+    47821 gggatttggg gggagcctgt tttaccaaac ttctccaaaa tcctctctca actttcattg
+    47881 tttttatctg cattaagctc actttgatgt gttgcatgtt ttactttgca tgaccagtct
+    47941 ttgggcaagt cattgccttt cttaaggctc ccttcacctc cttgtttcta atagtataaa
+    48001 taaagggatt cagaaggggg gtgaggatag tatttagcac agacaccacc ttattaatct
+    48061 taaaggagga gtgtgctgtg ggtcttaggt acatgaacag aacagccccg tagagcaggg
+    48121 agacaactgt caggtgcgag gcacaggtag agaaagtctt ttggtggcca gtggctgaag
+    48181 gaattcgtag gattgcggac agaatgtaga tataagaaat catattaaag agaagtgacc
+    48241 ctgggatcac aaggatggtt gctatgacgc ccaggagttc caaaatgctg gtgtctatgc
+    48301 aggcggcttt caaactgggc ccaacatcac agtagaaatg actgataaca ttattgccac
+    48361 agaatggcaa ctggatgagc agcatcgtct gacaaaagac aatggtgaag cccaccaccc
+    48421 aggagctcag ggccagctgc aggcagagtt tgctggtcat gatggtgggg tggtgaaggg
+    48481 gattgcagat ggtgaggtag cggtcaaaag acatgatagt gaggatcaag aactcggtgg
+    48541 tgcccacgaa gaagtggaag aaggcctgca gcaggcagca ggacatgcag attactgttc
+    48601 ttgccactac aaaggttcct agcagtttgg ggatgactgt ggtggtgtac cagatttcta
+    48661 agaaagacaa gttacaaagg aagaagtaca ttggaatttg tagcctgggc tcagcccaca
+    48721 cagtggcaat aataagccca ttgcctgcaa gggttaatat gtaggtgaga aagattgcaa
+    48781 taaagagagg tgtttgtagg ccttggataa caggaaagcc tagcaggatg aattctgtga
+    48841 ttactgtgcc atttctcatg tcccttgata ccctagaaag acaagcaaga aagctcctta
+    48901 gaatcttaga gcacagttga ttatagttgg gacaaaaaat aaggaggatt ctgactatgt
+    48961 aaggtccttc gggacctgac tctatttcag gtcctccctc acccacatac aaatacgcac
+    49021 tggccacccc ttctttactg atgatttggc tggttctttt gtcagcaaaa ccagcttcta
+    49081 atggactttg aatgtattaa aaacacttga tgaatgcaaa gtgttttttc acccttatac
+    49141 aactatgaga ataatcatgt gaaatgtggt tgcttcttgt caattattca tataactatt
+    49201 ttcaatgcat agtgtgtata ccaacttatg tacgtaaacg tggctgtgag ttcataagaa
+    49261 attaatccct attctcagag tctgaaaaag aagttactga ttacattaca gatacgttta
+    49321 aggaagccat tattgtgtgt gggtgtgagt gtgtgtctta aaataacccc aaatcattgc
+    49381 tattatatta gtggtgagat aaagttagat aacagaagag gtgtttttct gaagaatcca
+    49441 cttccacatt ctagttcaga ctcctttgct aaaggtatca aaaataacat ttaaaactaa
+    49501 atgaataaaa ctgtttagga agtagagaaa tattcaactc aaattcaaaa tttcagaaca
+    49561 agctgagtca aagcctgtta tatggaagca cctaataaat gtacatggga taaagatctg
+    49621 cagacactaa tctacaaaag tgaaaggtta gcaaatgcta aggataacaa aatgggaact
+    49681 ctgaaaagta catttttgtc aaaatttttt ttgtgactaa atgtcttttt ataccaagta
+    49741 aagtatttct atttgtatat gcaaattcat gggctgagaa catcctttct gttctaccaa
+    49801 aattcagtag aaaatagtaa aactacttcc tttcttattg aggctattct tccatgtggg
+    49861 tttcctttga aatgggtgag acaaattact gtgaaatgaa tttcacattt ttcaaagtac
+    49921 ataagaagct tagcatgaaa gaggcaactt acaggtcatt gataggttgt ttcattcttc
+    49981 cctcatgtga gataactgta attctctgag ggcttgctta gggaagaaaa acacttccat
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/rfam_tests.stk
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/rfam_tests.stk	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/rfam_tests.stk	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,362 @@
+# STOCKHOLM 1.0
+
+#=GF AC   RF00006
+#=GF ID   Vault
+#=GF DE   Vault RNA
+#=GF AU   Bateman A
+#=GF SE   Bateman A
+#=GF SS   Predicted; PFOLD; Bateman A
+#=GF GA   25.00
+#=GF TC   41.33
+#=GF NC   23.46
+#=GF TP   Gene;
+#=GF BM   cmbuild CM SEED
+#=GF BM   cmsearch --local -W 150 CM SEQDB
+#=GF DR   URL; http://vaults.arc.ucla.edu/sci/sci_home.htm;
+#=GF CC   This family of RNAs are found as part of the enigmatic vault
+#=GF CC   ribonucleoprotein complex. The complex consists of a major
+#=GF CC   vault protein (MVP), two minor vault proteins (VPARP and TEP1),
+#=GF CC   and several small untranslated RNA molecules. It has been
+#=GF CC   suggested that the vault complex is involved in drug
+#=GF CC   resistance.
+#=GF CC   We have identified a putative novel vault RNA on chromosome 5
+#=GF CC   EMBL:AC005219.
+#=GF SQ   11
+
+Z11765.1/1-89                        GGUCAGCAACAGCUC.AGCGGUUACUUCUCGA.....CACGGAAUUGUAA
+AF210457.1/105-240                   GGCCAGCUUUAGCUC.AGCGGUUAC..UUCGACAGUGGUUCAGUUCAU.U
+AY007237.1/1-136                     GGCCAGCUUUAGCUG.AGCGGUUAC..UUUGACAGUGUUUCAGUUCAU.U
+Z11771.1/1-137                       GGCCAGCUUUAGCUC.AGCGGUUAC..UUCGACGUGCUCCAGUUUGAGCA
+AC116353.2/181870-181951             GGCUGGCUUUAGCUC.AGCGGUUAC..UUCGCGUGU.............C
+AF058927.1/1-92                      GGYCAGCUUYAGCUC.AGCGGUUAC..UUCGACAGUUCUUUAAUUG...A
+AC005219.1/49915-50008               GGUCGGAGUUAGCUCAAGCGGUUAC..CUCCUCAUGCC...........G
+AF058926.1/1-82                      GGYCAGCWWYAGCUC.AGCGGUUAC..UUCGAGUAC.............A
+AC116353.2/174637-174718             GGCUGGCUUUAGCUC.AGCGGUUAC..UUCGAGUAC.............A
+AC116353.2/166987-167078             GGCUGGCUUUAGCUC.AGCGGUUAC..UUCGACAGUUCUUUAAUUG...A
+Z97054.1/58392-58486                 GGCUGGCUUUAGCUC.AGCGGUUAC..UUCGACAAUGCUUUCCAUGGUUA
+#=GC SS_cons                         .<<<<<.....<<<<..<<<<.......<<<<..................
+
+Z11765.1/1-89                        UUCUG........................AAAACCUUUC...........
+AF210457.1/105-240                   ACCAGCUAUUCGUAGCAGGUUCGAACAACACAACCAACCACUUACCUAAC
+AY007237.1/1-136                     ACCAGCUAUUCGUAGCAGGUUCAAAUAACACAACCAACCACUUGCCUAAC
+Z11771.1/1-137                       GGCUAUGUAACGUGGUCGGUUCGAGCAACACAACCAGCCGCUUGCCUAUC
+AC116353.2/181870-181951             AUCAA........................ACCACCUCUC...........
+AF058927.1/1-92                      AACAA........................GCAACCUGUC...........
+AC005219.1/49915-50008               GACUU........................UCUAUCUGUCCAUCUCUGU..
+AF058926.1/1-82                      UUGUA........................ACCACCUCUC...........
+AC116353.2/174637-174718             UUGUA........................ACCACCUCUC...........
+AC116353.2/166987-167078             AACAA........................GCAACCUGUC...........
+Z97054.1/58392-58486                 GGAAA........................CCAACCUCUC...........
+#=GC SS_cons                         ................................<<<...............
+
+Z11765.1/1-89                        ..........GGGGUUCGAAACCCGCGGGCGCCACCUGAC
+AF210457.1/105-240                   CCGUGAGUGUUUGGUUCGAGACCCGCGGGCGCUCCCUGGC
+AY007237.1/1-136                     CCAUGAGUGUUUGGUUCGAGACCGGCGGGCGCUCCCUGGC
+Z11771.1/1-137                       UGGUGAGUGGUUGGUUCGAGACCCGCGGGCGCUCUCUGGC
+AC116353.2/181870-181951             .......UGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGC
+AF058927.1/1-92                      .......UGGGUGGUUCGARACCCGCGGCCGCUMYCUGGC
+AC005219.1/49915-50008               .......GCUGGGGUUCGAGACCCGCGGGUGCUUACUGAC
+AF058926.1/1-82                      .......UGGGUGGUUCGARACCCGCGGSCGCYMYCUGRC
+AC116353.2/174637-174718             .......UGGGUGGUUCGAGACCCGCGGGUGCUUUCCAGC
+AC116353.2/166987-167078             .......UGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGU
+Z97054.1/58392-58486                 .......UGGGUGGUUUGAGACCCGUGGGCCCUCUCCAGU
+#=GC SS_cons                         ............>>>>>>>...>>>>>>>>.....>>>>>
+//
+# STOCKHOLM 1.0
+
+#=GF AC   RF00007
+#=GF ID   U12
+#=GF DE   U12 minor spliceosomal RNA
+#=GF AU   Griffiths-Jones SR, Mifsud W
+#=GF SE   Shukla GC and Padgett RA, PMID:10199569
+#=GF SS   Published; PMID:10199569
+#=GF GA   10.00
+#=GF TC   11.43
+#=GF NC   undefined
+#=GF TP   Gene; snRNA; splicing;
+#=GF BM   cmbuild CM SEED
+#=GF BM   cmsearch -W 160 CM SEQDB
+#=GF RN   [1]
+#=GF RM   10199569
+#=GF RT   Conservation of functional features of U6atac and U12 snRNAs between
+#=GF RT   vertebrates and higher plants.
+#=GF RA   Shukla GC, Padgett RA;
+#=GF RL   RNA 1999;5:525-538.
+#=GF RN   [2]
+#=GF RM   9149533
+#=GF RT   Pre-mRNA splicing: the discovery of a new spliceosome doubles the
+#=GF RT   challenge.
+#=GF RA   Tarn WY, Steitz JA;
+#=GF RL   Trends Biochem Sci 1997;22:132-137.
+#=GF RN   [3]
+#=GF RM   11864616
+#=GF RT   The divergent U12-type spliceosome is required for pre-mRNA splicing
+#=GF RT   and is essential for development in Drosophila.
+#=GF RA   Otake LR, Scamborova P, Hashimoto C, Steitz JA;
+#=GF RL   Mol Cell 2002;9:439-446.
+#=GF CC   The U12 small nuclear (snRNA), together with U4atac/U6atac, U5,
+#=GF CC   and U11 snRNAs and associated proteins, forms a spliceosome that
+#=GF CC   cleaves a divergent class of low-abundance pre-mRNA introns. 
+#=GF CC   Although the U12 sequence is very divergent from that of U2
+#=GF CC   (Rfam:RF00004), the two are functionally analogous [2].
+#=GF SQ   7
+
+L43844.1/2-149                      .UGCCUUAAACUUAUGAGUAAGGAAAAUAACAACU......CGGGGUGAC
+L43843.1/2-150                      .UGCCUUAAACUUAUGAGUAAGGAAAAUAACGAUU......CGGGGUGAC
+L43846.1/332-460                    .UGCCUUAAACUUAUGAGUAAGGAAAAUAACGAUU......CGGGGUGAC
+L43845.1/357-512                    AUGUCUUAAACUUAUGAGUAAGGAAAAUAACGAUUGUUAUUCGGGGUGAU
+J04119.1/2-130                      .UGCCUUAAACUUAUGAGUAAGGAAAAUAACGAUU......CGGGGUGAC
+Z93241.11/76641-76790               AUGCCUUAAACUUAUGAGUAAGGAAAAUAACGAUU......CGGGGUGAC
+AL513366.11/57716-57871             AUGUCUUAAACUUAUGAGUAAGGAAAAUAACGAUUGUUAUUCGGGGUGAU
+#=GC SS_cons                        ...<<<<<..........>>>>>........<<<<......<<<<.....
+
+L43844.1/2-149                      GCCCGAGUCCUCACUACUGAUGUGAGAGGAAUUUUUGUGCGGGUACAGGU
+L43843.1/2-150                      GCCCGAGUCCUCACUGCUUAUGUGAGAAGAAUUUUUGAGCGGGUAUAGGU
+L43846.1/332-460                    GCCCGAAUCCUCACUGCUAAUGUGAGACGAAUUUUUGAGCGGGUAAAGGU
+L43845.1/357-512                    GCCCGAAUCCUCACUGCUAAUGUGAGACGAAUUUUUGAGCUGGUAAAGGU
+J04119.1/2-130                      GCCCGAAUCCUCACUGCUAAUGUGAGACGAAUUUUUGAGCGGGUAAAGGU
+Z93241.11/76641-76790               GCCCGAAUCCUCACUGCUAAUGUGAGACGAAUUUUUGAGCGGGUAAAGGU
+AL513366.11/57716-57871             GCCCGAAUCCUCACUGCUAAUGUGAGACGAAUUUUUGAGCUGGUAAAGGU
+#=GC SS_cons                        .>>>>>>>><<<<<.......>>>>>...........<<<<<<<...<<<
+
+L43844.1/2-149                      CGUCCCC.GGGUGACCCGCUUACUUCGCGGGAUGCCCAGGUGCAAUGAUC
+L43843.1/2-150                      UGCAAUCUGAGCGACCCGCCUACUUUGCGGGAUGCCUGGGUGACGCGAUC
+L43846.1/332-460                    CGCCCUCAAGGUGACCCGCCUACUUUGCGGGAUGCC..............
+L43845.1/357-512                    CGCCCCUAAGGUGACCAGCCUACUUUGCGGGAUGCCUAGGAGUCGCGAUC
+J04119.1/2-130                      CGCCCUCAAGGUGACCCGCCUACUUUGCGGGAUGCC..............
+Z93241.11/76641-76790               CGCCCUCAAGGUGACCCGCCUACUUUGCGGGAUGCCUGGGAGUUGCGAUC
+AL513366.11/57716-57871             CGCCCCUAAGGUGACCAGCCUACUUUGCGGGAUGCCUAGGAGUCGCGAUC
+#=GC SS_cons                        <<<<.....>>>>>>>.>>>>>>>..<<<<<<<<<.........>>>>>>
+
+L43844.1/2-149                      UGCCCG
+L43843.1/2-150                      UGCCCG
+L43846.1/332-460                    ......
+L43845.1/357-512                    UGCCUG
+J04119.1/2-130                      ......
+Z93241.11/76641-76790               UGCCCG
+AL513366.11/57716-57871             UGCCUG
+#=GC SS_cons                        >>>...
+//
+# STOCKHOLM 1.0
+
+#=GF AC   RF00008
+#=GF ID   Hammerhead_3
+#=GF DE   Hammerhead ribozyme (type III)
+#=GF AU   Bateman A
+#=GF SE   Bateman A
+#=GF SS   Published; PMID:7969422
+#=GF GA   29.00
+#=GF TC   29.53
+#=GF NC   28.96
+#=GF TP   Gene; ribozyme;
+#=GF BM   cmbuild CM SEED
+#=GF BM   cmsearch -W 130 CM SEQDB
+#=GF RN   [1]
+#=GF RM   7969422
+#=GF RT   Three-dimensional structure of a hammerhead ribozyme.
+#=GF RA   Pley HW, Flaherty KM, McKay DB;
+#=GF RL   Nature 1994;372:68-74.
+#=GF RN   [2]
+#=GF RM   9506521
+#=GF RT   The structural basis of hammerhead ribozyme self-cleavage.
+#=GF RA   Murray JB, Terwey DP, Maloney L, Karpeisky A, Usman N, Beigelman L,
+#=GF RA   Scott WG;
+#=GF RL   Cell 1998;92:665-673.
+#=GF RN   [3]
+#=GF RM   10899150
+#=GF RT   Distribution of hammerhead and hammerhead-like RNA motifs through the
+#=GF RT   GenBank.
+#=GF RA   Ferbeyre G, Bourdeau V, Pageau M, Miramontes P, Cedergren R;
+#=GF RL   Genome Res 2000;10:1011-1019.
+#=GF CC   The hammerhead ribozyme is one of the smallest catalytic
+#=GF CC   RNAs. These RNAs have an endonuclease function, and most
+#=GF CC   often are autocatalytic. Structurally it is composed of
+#=GF CC   three base paired helices, separated by short linkers of
+#=GF CC   conserved sequence. These helices are called I, II and III.
+#=GF CC   We have classified hammerhead ribozymes into three types
+#=GF CC   based on which helix the 5' and 3' ends of the sequence
+#=GF CC   join. This family are the type III hammerheads.
+#=GF CC   The conserved uridine-turn links helix I 
+#=GF CC   to helix II and usually has the sequence CUGA.  Helix II and
+#=GF CC   III are linked by a sequence CGAAA.  The cleavage reaction 
+#=GF CC   occurs between helix III and I, and is usually a C.
+#=GF CC   Hammerhead ribozymes are found in plant viroids and other
+#=GF CC   small replicating sattelite RNA species.  Hammerhead
+#=GF CC   ribozymes have been found in animals as well as plants.
+#=GF CC   This family includes a couple of false matches currently
+#=GF CC   these are EMBL:AC078923 and EMBL:BC050488. These animal
+#=GF CC   sequences are not expected to be hammerhead ribozymes.
+#=GF SQ   84
+
+AJ295015.1/58-1                ACAGAGUC.UGACAAA......CCGUCACUGAAGACGUUCAACUU.....
+AJ295018.1/58-1                ACAGGGUC.UGACAAA......CCGUCACUGAAGACGUUCAACUU.....
+AJ536620.1/206-152             CCACCGUC.GGAAAGUG.UGCGCUUUCCCUGAUGAGCCCAA.........
+AJ536615.1/1-44                .........GGGUGGUG.UGUACCAUCCCUGAUGAGUCCAA.........
+AJ536612.1/206-152             UCACCGUC.GGAAAGUG.UGCGCUUUCCCUGAUGAGCCCGA.........
+AJ536617.1/1-40                .........GGGUGGUGUGUA.CCACCCCUGAUGAGUCCGA.........
+AJ536614.1/206-152             UCACCGUC.GGAAAGUG.UGCGCUUUCCCUGAUGAGCCCAA.........
+AJ536620.1/1-40                .........GGGUGGUGUGUG.CCACCCCUGAUGAGUCCGA.........
+AJ536619.1/206-152             CCACCGUC.GGAAAGUG.UGUACUUUCCCUGAUGAGUCCGA.........
+AJ550911.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGUUGCUGA.......
+AF170503.1/280-333             GAAAGGUC.UGUGCUU......AGCACACUGACGAGUUCCUGA.......
+AF170504.1/284-337             GAUAAGUC.UGUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ550912.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGUUGCUGA.......
+AJ247113.1/134-53              .UCCAGUC.GAGACCUGAAGUGGGUUUCCUGACGAGGCUGUGGAGAGAGC
+AJ241833.1/282-334             AAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ241841.1/57-3                CAAAAGUU.UGGGCUAA.....AGCCCACUGAUGAGCCGCUGA.......
+AJ241839.1/282-334             GAUGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ550901.1/282-334             GAAGAGUC..GCGCUA......AGCGCACUGAUGAGUCUUUGA.......
+AJ005298.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ241819.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ005320.1/281-333             GAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ005310.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGCUGA.......
+AF339740.1/56-3                CAUAAGUC.UGGGCUU......AGCCCACUGAUGAGCCGUUGA.......
+AJ241828.1/56-3                CAUAAGUC.UGGGUUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ550906.1/56-3                CAUAAGUC.UGGGCUU......AGCCCACUGAUGAGUCGCUGC.......
+AJ550903.1/281-333             GAAGAGUC..GUGCUU......AGCACACUGAUGAGUCUCUGA.......
+AJ241831.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ241840.1/56-3                CAGAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGCUGA.......
+AJ005303.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ005312.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGCUGA.......
+AJ550906.1/282-334             GAAGAGUC..GUGCUU......AGCACACUGAUGAGUCUCUGA.......
+AJ550907.1/56-3                CAUAAGUC.UGGGCUU......AGCCUACUGAUGAGUCGCUGC.......
+AF170503.1/55-3                CAUAAGUC.UGGGCUU......AG.CCACUGACGAGUCGCUGG.......
+Y14700.1/133-53                .UCCAGUC.GAGACCUGAAGUGGGUUUCCUGAUGAGGCUGUGGAGAGAGC
+AJ005321.1/281-333             GAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ241845.1/282-335             GAUGAGUC.UGUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ005322.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGACGAGCCGUUGA.......
+M83545.1/56-3                  CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGUCGCUGA.......
+AJ005305.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGACGAGCCGCUGG.......
+AJ550908.1/281-334             GAAGAGUC.UGCGCUU......AGCGCACUCAUGAGUCUCUGA.......
+AJ550909.1/56-3                CAUAAGUC.UGGGCUU......AGCCCACUGAUGAGUUGCUGC.......
+AJ241843.1/56-3                CAUAAGUC.UGGGCUU......AGCCCACUGAUGAGCCACUGA.......
+AF170523.1/55-3                CAAAAGUC.UGGGCUU......AG.CCACUGAUGAGCCGUUGA.......
+AF170509.1/56-3                CAUAAGUC.UAGGCUU......AGCCCACUGAUGAGCCGUUGA.......
+AJ241847.1/281-334             GAAGAGUC.UGUGCUA......AGCACACUGAUGAGUUUCUGA.......
+AJ241823.1/282-335             AAAGAGUC.UGUGCUA......AGCACACUGAUGAGUCUCUAA.......
+AJ247122.1/132-52              .UCCAGUC.GAGACCUGAAGUGGGUUUCCUGACGAGGCUGUGGAGAGAGC
+AJ005300.1/282-335             UAAGAGUC.UGUGGUA......AGCACACUGAUGAGUCUCUGA.......
+AJ005302.1/281-334             AAAGAGUC.UGUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ005318.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AF339739.1/56-3                CAUAAGUC.UGGGCUU......AGCCCACUGAUGAGGCGUUGA.......
+AJ241838.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGCUGCUGA.......
+AJ247121.1/133-53              .UCCAGUC.GAGACCUGAAGUGGGUUUCCUGACGAGGCUGUGGAGAGAGC
+AJ550911.1/282-335             GAAGAGUC.UGUGCUA......AGCACACUGACGAGUCUCUGA.......
+AJ550898.1/282-335             GAAGAGUC.UGCGCUA......AGCGCACUGAUGAGUCUUUGA.......
+AF170516.1/283-335             GAAGAGUC..GUGCUU......AGCACACUGAUGAGUCUCUGA.......
+AJ005319.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGUUGA.......
+AJ550899.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCUUUGC.......
+AF170519.1/55-3                CAUAAGUC.UGGGCUU......AG.CCACUGAUGAGCCGUUGA.......
+AJ247116.1/133-53              .UCCAGUC.GAGACCUGAAGUGGGUUUACUGAUGAGGCUGUGGAGAGAGC
+AJ550907.1/281-333             AAAGAGUC..GCGCUU......AGCGCACUGAUGAGUCUCUGA.......
+AJ241850.1/282-334             UAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AF170499.1/56-3                CAAAAGUC.UGGGCUA......AGCCCACUGAUGAGCCGCUGA.......
+AF170520.1/282-335             GAUAAGUC.UGUGCUU......AGCACACUGAUGAGUCUCUGA.......
+AJ005299.1/282-335             UAAGAGUC.UGUGCUA......AGCACACUGAUGAAUCUCUGA.......
+AJ005312.1/282-335             GAUGAGUC.UGUGCUA......AGCACACUGAUGAGUCUAUGA.......
+AJ550909.1/282-333             GAAGAGUC..GCGCUU......AGCGCACUGAUGAGUCUCUGA.......
+AJ005322.1/281-334             GAAGAGUC.UGUGCUA......AGCACACUGACGAGUCUCUGA.......
+AJ005294.1/282-334             UAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ247123.1/132-52              .UCCAGUC.GAGACCUGAAGUGGGUUUCCUGAUGAGGCUGUGGAGAGAGC
+AJ550900.1/56-3                CAUAAGUC.UGGGCUA......AGCCCACUGAUGAGCCUUUGC.......
+AJ241830.1/282-334             AAAGAGUC..GUGCUA......AGCACACUGAUGAGUCUCUGA.......
+AJ241831.1/281-334             GAAGAGUC.UGUGCUA......AGCACACUGAUGAGUCUCUGA.......
+M83545.1/282-335               GAAGAGUC.UGUGCUA......AGCACACUGACGAGUCUCUGA.......
+AJ550910.1/282-336             GAAGAGUCCUGCGCUU......AGCGCACUGACGAGUCUCUGA.......
+AJ005314.1/281-334             AAAGAGUC.UGUGCUA......AGCACACUGACGAGUCUCUGA.......
+Y12833.1/339-285               CCGCUAUA.UGGGGAUGUGUG.UCCCUACUGACGAGUUCAA.........
+M63666.1/246-192               CCGGUGUC.UCAAGGUGCGUA.CCUUGACUGAUGAGUCCGA.........
+J02439.1/42-95                 UGUCCGUA..GUGGAUGUGUA.UCCACUCUGAUGAGUCCGA.........
+J02386.1/42-95                 UGUCCGUA..GUGGAUGUGUA.UCCACUCUGAUGAGUCCAA.........
+M33000.1/55-110                ACGCUGUC.UGUACUUGUAUC.AGUACACUGACGAGUCCCU.........
+M33001.1/56-111                ACGCUGUC.UGUACUUAUAUC.AGUACACUGACGAGUCCCU.........
+D00685.1/1-46                  .........GCCAGACGU.GGACCCGGCCUGAUGAGUCCGAAA.......
+M17439.1/1-48                  .........ACCGGAUGUGCUUUCCGGUCUGAUGAGUCCGU.........
+#=GC SS_cons                   .<<<<<<..<<<<<.........>>>>>.......<<<<...........
+
+AJ295015.1/58-1                ...............GCGUUGAACAGAAACUCUGC
+AJ295018.1/58-1                ...............GCGUUGAACAGAAACUCUGC
+AJ536620.1/206-152             ...................AAGGGCGAAACGGUAC
+AJ536615.1/1-44                ...................AAGGACGAAAUGG...
+AJ536612.1/206-152             ...................AAGGGCGAAACGGUAC
+AJ536617.1/1-40                ...................AAGGACGAA.......
+AJ536614.1/206-152             ...................AAGGGCGAAACGGUAC
+AJ536620.1/1-40                ...................AAGGACGAA.......
+AJ536619.1/206-152             ...................AAGGACGAAACGGUAC
+AJ550911.1/56-3                .................AACGCAACGAAACUUUUG
+AF170503.1/280-333             .................AAUGGAACGAAACCUUUU
+AF170504.1/284-337             .................AAUGAGACGAAACUUAUC
+AJ550912.1/56-3                .................GAUGCGACGAAACUUUUG
+AJ247113.1/134-53              AAAUUGCUUUACUCCCGCACAAGCCGAAACUGGA.
+AJ241833.1/282-334             .................AAUGAGACGAAACUCUUU
+AJ241841.1/57-3                .................AAUGCGGCGAAACUUUUG
+AJ241839.1/282-334             .................AAUGAGACGAAACUCAUA
+AJ550901.1/282-334             .................GAUAAGACGAAACUCUUC
+AJ005298.1/56-3                .................GAUACGGCGAAACUUUUG
+AJ241819.1/56-3                .................AAUACGGCGAAACUUUUG
+AJ005320.1/281-333             .................AAUGAGACGAAACUCUUU
+AJ005310.1/56-3                .................AAUGCGGCGAAACUUUUG
+AF339740.1/56-3                .................GAUACGGCGGAACUUAUG
+AJ241828.1/56-3                .................GAUACGGCGAAACUUAUG
+AJ550906.1/56-3                .................GAUGCGACGAAACUUAUG
+AJ550903.1/281-333             .................AAUGAGACGAAACUCUUU
+AJ241831.1/56-3                .................GAUACGGCGAAACUUCUG
+AJ241840.1/56-3                .................AAUGCGGCGAAACUUUUG
+AJ005303.1/56-3                .................GAUACGGCGAAACUUUUG
+AJ005312.1/56-3                .................AAUGCGGCUAAACUUUUG
+AJ550906.1/282-334             .................GAUGAGACGAAACUCUUC
+AJ550907.1/56-3                .................GAUGCGACGAAACUUAUG
+AF170503.1/55-3                .................GAUACGGCGAAACUUAUG
+Y14700.1/133-53                GAAAGCUUUACUCCCA.CACAAGCCGAAACUGGA.
+AJ005321.1/281-333             .................AAUGAGACGAAACUCUUG
+AJ241845.1/282-335             .................AAUGAGACGAAACUCAUG
+AJ005322.1/56-3                .................GAUACGGCGAAACUUAUG
+M83545.1/56-3                  .................AAUGCGACGAAACUUAUG
+AJ005305.1/56-3                .................GAUACGGCGAAACUUUUG
+AJ550908.1/281-334             .................GAUGAGACGAAACUCUUC
+AJ550909.1/56-3                .................GAUGCAACGAAACUUAUG
+AJ241843.1/56-3                .................AAUGCGGCGAAACUUUUG
+AF170523.1/55-3                .................GAUACGGCGAAACUUUUG
+AF170509.1/56-3                .................GAUACGGCGAAACUUAUG
+AJ241847.1/281-334             .................AAUGAGACGAAACUCUUG
+AJ241823.1/282-335             .................AAUGAGACGAAACUCUUU
+AJ247122.1/132-52              UAUUGCUUUACUCCCG.CACAAGCCGAAACUGGA.
+AJ005300.1/282-335             .................AAUGAGACGAAACUCUUG
+AJ005302.1/281-334             .................AUUGAGACGAAACUCUUG
+AJ005318.1/56-3                .................GAUACGGUGAAACUUAUG
+AF339739.1/56-3                .................GAUACGGCGAAACUUAUG
+AJ241838.1/56-3                .................AAUGCGGCAAAACUUUUG
+AJ247121.1/133-53              UUUCGCUUUACUCCCG.CACAAGCCGAAACUGGA.
+AJ550911.1/282-335             .................AAUGAGACGAAACUCUUC
+AJ550898.1/282-335             .................AAUAAGACGAAACUCUUC
+AF170516.1/283-335             .................AAUGAGACGAAACUCUUC
+AJ005319.1/56-3                .................GAUACGGCGAAACUUAUG
+AJ550899.1/56-3                .................GAUAAGGCGAAACUUAUG
+AF170519.1/55-3                .................GAUACGGCGAAACUUAUG
+AJ247116.1/133-53              GAAAGCUUUACUCCCA.CACAAGCCGAAACUGGA.
+AJ550907.1/281-333             .................GAUGAGACGAAACUCUUC
+AJ241850.1/282-334             .................AAAGAGACGAAACUCUUU
+AF170499.1/56-3                .................AAUGCGGCGAAACUUUUG
+AF170520.1/282-335             .................AAUGAGACGAAACUUAUC
+AJ005299.1/282-335             .................AAUGAGACGAAACUCUUG
+AJ005312.1/282-335             .................AAUGAGACGAAACUCAUA
+AJ550909.1/282-333             ..................AUGAGACGAAACUCUUC
+AJ005322.1/281-334             .................AAUGAGACGAAACUCUUU
+AJ005294.1/282-334             .................AAUGAGACGAAACUCUUG
+AJ247123.1/132-52              GAAAGCUUUACUCCCG.CACAAGCCGAAACUGGA.
+AJ550900.1/56-3                .................GACAAGGCGAAACUUAUG
+AJ241830.1/282-334             .................AAUGAGACGAAACUCUUG
+AJ241831.1/281-334             .................AAUGAGACGAAACUCUUA
+M83545.1/282-335               .................GAUGAGACGAAACUCUUC
+AJ550910.1/282-336             .................GAUGAGACGAAACUCUUC
+AJ005314.1/281-334             .................AAUGAGACGAAACUCUUU
+Y12833.1/339-285               ...................AAGAACGAAAUAGUUA
+M63666.1/246-192               ...................AAGGACGAAACACCAG
+J02439.1/42-95                 ...................AAGGACGAAACGGAUG
+J02386.1/42-95                 ...................AAGGACGAAACGGAUG
+M33000.1/55-110                ..................AAAGGACGAAACAGCGC
+M33001.1/56-111                ..................AAAGGACGAAACAGCGC
+D00685.1/1-46                  .....................GGACGAAACAGUA.
+M17439.1/1-48                  ...................GAGGACGAAACAGGAC
+#=GC SS_cons                   .....................>>>>...>>>>>>.
+//
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/roa1.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/roa1.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/roa1.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,448 @@
+ID   HSHNCPA1   standard; RNA; HUM; 1198 BP.
+XX
+AC   X79536;
+XX
+SV   X79536.1
+XX
+DT   03-JUN-1994 (Rel. 39, Created)
+DT   03-JUN-1994 (Rel. 39, Last updated, Version 1)
+XX
+DE   H.sapiens mRNA for hnRNPcore protein A1
+XX
+KW   core protein.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [2]
+RP   1-1198
+RA   Leffers H.;
+RT   ;
+RL   Submitted (01-JUN-1994) to the EMBL/GenBank/DDBJ databases.
+RL   H. Leffers, Inst. of Medical Research Biochemistry & Danish Centre for
+RL   Human Genome Research, Ole Worms Alle 170, Aarhus Univ., 8000 Aarhus C,
+RL   DENMARK
+XX
+RN   [3]
+RA   Knudsen S.M., Leffers H.;
+RT   "Cloning and sequencing of a splice variant of human hnRNP core protein
+RT   A1";
+RL   Unpublished.
+XX
+DR   SWISS-PROT; P09651; ROA1_HUMAN.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1198
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT                   /tissue_type="lung"
+FT                   /cell_type="fibroblast"
+FT                   /cell_line="MRC5"
+FT                   /clone_lib="lambda ZapII"
+FT                   /clone="HDP1"
+FT   CDS             27..989
+FT                   /db_xref="SWISS-PROT:P09651"
+FT                   /product="hnRNPcore protein A1"
+FT                   /protein_id="CAA56072.1"
+FT                   /translation="MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVV
+FT                   MRDPNTKRSRGFGFVTYATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTV
+FT                   KKIFVGGIKEDTEEHHLRDYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFDDHDSVDKIV
+FT                   IQKYHTVNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNF
+FT                   SGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNF
+FT                   GGRSSGPYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF"
+FT   misc_feature    995
+FT                   /note="alternative splice site"
+FT   polyA_signal    1180..1185
+XX
+SQ   Sequence 1198 BP; 334 A; 216 C; 336 G; 312 T; 0 other;
+     ttaaagtctc tcttcaccct gccgtcatgt ctaagtcaga gtctcctaaa gagcccgaac        60
+     agctgaggaa gctcttcatt ggagggttga gctttgaaac aactgatgag agcctgagga       120
+     gccattttga gcaatgggga acgctcacgg actgtgtggt aatgagagat ccaaacacca       180
+     agcgctctag gggctttggg tttgtcacat atgccactgt ggaggaggtg gatgcagcta       240
+     tgaatgcaag gccacacaag gtggatggaa gagttgtgga accaaagaga gctgtctcca       300
+     gagaagattc tcaaagacca ggtgcccact taactgtgaa aaagatattt gttggtggca       360
+     ttaaagaaga cactgaagaa catcacctaa gagattattt tgaacagtat ggaaaaattg       420
+     aagtgattga aatcatgact gaccgaggca gtggcaagaa aaggggcttt gcctttgtaa       480
+     cctttgacga ccatgactcc gtggataaga ttgtcattca gaaataccat actgtgaatg       540
+     gccacaactg tgaagttaga aaagccctgt caaagcaaga gatggctagt gcttcatcca       600
+     gccaaagagg tcgaagtggt tctggaaact ttggtggtgg tcgtggaggt ggtttcggtg       660
+     ggaatgacaa cttcggtcgt ggaggaaact tcagtggtcg tggtggcttt ggtggcagcc       720
+     gtggtggtgg tggatatggt ggcagtgggg atggctataa tggatttggc aatgatggaa       780
+     gcaattttgg aggtggtgga agctacaatg attttgggaa ttacaacaat cagtcttcaa       840
+     attttggacc catgaaggga ggaaattttg gaggcagaag ctctggcccc tatggcggtg       900
+     gaggccaata ctttgcaaaa ccacgaaacc aaggtggcta tggcggttcc agcagcagca       960
+     gtagctatgg cagtggcaga agattttaat tagggaggag tctgctacta gtcttatcag      1020
+     ctcttaaaaa cagaaactca tctgtccaag ttcgtggcag aaaggaacgt ccttgtgaag      1080
+     acctttatct gagccactgt acttcgttat cacgccatgc agtttacatg agctgttctg      1140
+     cagctcgaaa ttccattttg tgaatgggtt ttttttttta ataaactgta tttaactt        1198
+//
+ID   HSHNRNPA   standard; DNA; HUM; 5368 BP.
+XX
+AC   X12671;
+XX
+SV   X12671.1
+XX
+DT   23-NOV-1989 (Rel. 21, Created)
+DT   24-APR-1993 (Rel. 35, Last updated, Version 3)
+XX
+DE   Human gene for heterogeneous nuclear ribonucleoprotein (hnRNP) core
+DE   protein A1
+XX
+KW   hnRNP A1 proten; ribonucleoprotein; RNA binding protein.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [1]
+RP   1-5368
+RA   Riva S.;
+RT   ;
+RL   Submitted (23-AUG-1988) to the EMBL/GenBank/DDBJ databases.
+RL   Riva S., Consiglio Nazionale Delle Ricerche, Istituto Di Genetica
+RL   Biochimica Ed Evoluzionistica CNR, Via Abbiategrasso 2D7, 27100 Pavia,
+RL   Italy.
+XX
+RN   [2]
+RP   1-5368
+RA   Biamonti G., Buvoli M., Bassi M.T., Morandi C., Cobianchi F., Riva S.;
+RT   "Isolation of an active gene encoding human hnRNP protein A1";
+RL   J. Mol. Biol. 207:491-503(1988).
+XX
+DR   SWISS-PROT; P09651; ROA1_HUMAN.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..5368
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT                   /clone="pES5"
+FT                   /tissue_type="liver"
+FT                   /clone_lib="lambdaCh4A."
+FT   mRNA            join(695..813,1377..1493,1789..1935,2084..2294,2388..2480,
+FT                   2567..2659,2794..2868,3806..3961,4252..4311,4543..5240)
+FT   CDS             join(799..813,1377..1493,1789..1935,2084..2294,2388..2480,
+FT                   2567..2659,2794..2868,3806..3961,4252..4307)
+FT                   /db_xref="SWISS-PROT:P09651"
+FT                   /product="hnrnp a1 protein"
+FT                   /protein_id="CAA31191.1"
+FT                   /translation="MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVV
+FT                   MRDPNTKRSRGFGFVTYATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTV
+FT                   KKIFVGGIKEDTEEHHLRDYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFDDHDSVDKIV
+FT                   IQKYHTVNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNF
+FT                   SGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNF
+FT                   GGRSSGPYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF"
+FT   exon            695..813
+FT                   /number=1
+FT   misc_feature    695..695
+FT                   /note="mRNA initiation site"
+FT   misc_feature    715..715
+FT                   /note="mRNA initiation site"
+FT   misc_feature    735..735
+FT                   /note="mRNA initiation site"
+FT   intron          814..1376
+FT                   /number=1
+FT   exon            1377..1493
+FT                   /number=2
+FT   intron          1494..1788
+FT                   /number=2
+FT   exon            1789..1935
+FT                   /number=3
+FT   intron          1936..2083
+FT                   /number=3
+FT   exon            2084..2294
+FT                   /number=4
+FT   intron          2295..2387
+FT                   /number=4
+FT   exon            2388..2480
+FT                   /number=5
+FT   intron          2481..2566
+FT                   /number=5
+FT   exon            2567..2659
+FT                   /number=6
+FT   intron          2660..2793
+FT                   /number=6
+FT   exon            2794..2868
+FT                   /number=7
+FT   intron          2869..3805
+FT                   /number=7
+FT   exon            3806..3961
+FT                   /number=8
+FT   intron          3962..4251
+FT                   /number=8
+FT   exon            4252..4311
+FT                   /number=9
+FT   intron          4312..4542
+FT                   /number=9
+FT   exon            4543..5240
+FT                   /number=10
+XX
+SQ   Sequence 5368 BP; 1476 A; 1052 C; 1270 G; 1570 T; 0 other;
+     gggattgaga gtgatcactc acgctaacgt ctgccctgtt cctgtatggt gaggccgcac        60
+     cacaagccac caccgccgcc gccttctgcg caacgccaac cgcccgccaa aacggatcct       120
+     tccctgcgcc tgcgcaacca atcttgggac cggacctttt ttctccgccc actacgcatg       180
+     cgcaaagcta ggacaaactc ccgccaacac gcaggcgccg taggttcact gcctactcct       240
+     gcccgccatt tcacgtgttc tcagaggcag gtggaacttc ttaatgcgcc tgcgcaaaac       300
+     tcgccatttt actacacgtg cggtcaacaa gagttcattg caaaaaaatt gttacctcct       360
+     agctgcttgt ctaatacata gtgttaatca tgctttgcca agcgacttga ctgtaatatt       420
+     tgcgcgtgga agattaaaaa gatgttaaac acccaaggta gattcaaatg tgaatgattg       480
+     gtcggttggc caatcagact ggttaacaat aacattactc gggaaccaat ggactccaag       540
+     gggtggagac ggcgtagaac gaccgaagga atgacgttac acagcaatgt ggcaccacag       600
+     gccaatagca gggggaagcg atttcaagta tccaatcaga gctgttctag ggcggagtct       660
+     accaatgccg aaagcgagga ggcggggtaa aaaagagagg gcgaaggtag gctggcagat       720
+     acgttcgtca gcttgctcct ttctgcccgt ggacgccgcc gaagaagcat cgttaaagtc       780
+     tctcttcacc ctgccgtcat gtctaagtca gaggtgagtt aggcgcgctt tcccacttga       840
+     attttttcct ctccctttcc tgaatcggta agatgctgct gggtttcgtt ccttgcacca       900
+     gcccattcta cagttccttc ggtcgctgcc acggcctacc cctcccaaag ttcaagtcgc       960
+     cattttgtcc tcttgatcgc catgaggccg ctctccgcca accatgtgtt atcatgcggg      1020
+     actcgttact cgtagcaaaa ttcttaggca cacaggatct ttgtcttttt ttaaaccttg      1080
+     ccttggtgag cgagttttct aaagagcgat tagtcccatt gtggagatgc acccctaccg      1140
+     cccaagcctt tgttgcgcgt gcgtcggaag gcgactaggg acgcatgcgc ttgcgatttc      1200
+     ctagcactcc caactccagc atacggcctc ccttgatagg cagaagcacg tgtcttgttg      1260
+     cgacctgaac gaacaataag tgctaggtac acagttggtg tctagttttt cttttcctcg      1320
+     atggaaattg tttcgtgttg tagcccattt aacacttccc cctcccccca ctctagtctc      1380
+     ctaaagagcc cgaacagctg aggaagctct tcattggagg gttgagcttt gaaacaactg      1440
+     atgagagcct gaggagccat tttgagcaat ggggaacgct cacggactgt gtggtaagat      1500
+     ttggaaggga caaagcagta aaacagccga tttccttggc ttatcttggt gcagtcttct      1560
+     ccgaatgctt atgaaagtag ttaatagcat tatagttaga gctttgttgg caaaggaacg      1620
+     tcctgctttg attttaaaag ctaacctctt aaatctaagg gtagtgggaa actggacgaa      1680
+     ctttttataa aaggctggtg taaagtttcc tattgcccta ttcaaagtta aaataacaaa      1740
+     agcttttgcg gtcagacttt gtgttacata aattaacact gttctcaggt aatgagagat      1800
+     ccaaacacca agcgctctag gggctttggg tttgtcacat atgccactgt ggaggaggtg      1860
+     gatgcagcta tgaatgcaag gccacacaag gtggatggaa gagttgtgga accaaagaga      1920
+     gctgtctcca gagaagtgag tgggtttttt ttcttcttct tcttaaactt acttggatat      1980
+     gtgctgctat gaacttaaga ttcgggagtt ttctaaactt accaaaattt tttattcgag      2040
+     tataggcttt gctaatctaa acctatggtt tttctcctat taggattctc aaagaccagg      2100
+     tgcccactta actgtgaaaa agatatttgt tggtggcatt aaagaagaca ctgaagaaca      2160
+     tcacctaaga gattattttg aacagtatgg aaaaattgaa gtgattgaaa tcatgactga      2220
+     ccgaggcagt ggcaagaaaa ggggctttgc ctttgtaacc tttgacgacc atgactccgt      2280
+     ggataagatt gtcagtaagt atcagatagt ggcatttagt aagggttcca caatctgtat      2340
+     ggcattctaa accctgatac catgttgtat ctatgttttt tttttagttc agaaatacca      2400
+     tactgtgaat ggccacaact gtgaagttag aaaagccctg tcaaagcaag agatggctag      2460
+     tgcttcatcc agccaaagag gtatgcttgt tgcttaatta aaccttaaag gtaactttga      2520
+     gttactccag tatgaatgat ttaatgctta aacttcatgt cttaaggtcg aagtggttct      2580
+     ggaaactttg gtggtggtcg tggaggtggt ttcggtggga atgacaactt cggtcgtgga      2640
+     ggaaacttca gtggtcgtgg tatgtatggt ttatctacat gtagttctga cttctcacca      2700
+     tctttgctat gaagatttta cagtacggga actgcattca gaatgtcact ttaagtccaa      2760
+     gtcatactta aaacttgaaa ctttttctta caggtggctt tggtggcagc cgtggtggtg      2820
+     gtggatatgg tggcagtggg gatggctata atggatttgg caatgatggt aagtttttta      2880
+     ggaataagta gagaaaaatt cctggcaacc tggatcttta gaataggtta gtagagacta      2940
+     aaattctggt gcatgtcaaa ctcaactttg cccataacac gcatgctgtg agcaggcctt      3000
+     cagccgttac acttgcacaa gttttcattg tcaaatactt ttgtcttatt gagaagaatt      3060
+     gtattcttgt aggtggttat ggaggaggcg gccctggtta ctctggagga agcagaggct      3120
+     atggaagtgg tggacagggt tatggaaacc agggcagtgg ctatggcggg agtggcagct      3180
+     atgacagcta taacaacgga ggcggaggcg gctttggcgg tggtagtggt aggtatccag      3240
+     tgatccaagt acttggtgtg acagctagat tagcctttta gagcttgggt tctggtgctg      3300
+     ttgaagcatt gtgtggtaca ctgcatggta tattaaaaac aaatgggctt gctatgctac      3360
+     ctcctcctag ctttaagctg gggccgcctc actcccaaat agtagagata agtggatagt      3420
+     gttgtctttg agttagatta gtatcataga aggatttagt attttaactc ctttgggacc      3480
+     ttaggcgctt agttgatgta tccaagatac ttctgcttgc tgtggccctg gatccgtgaa      3540
+     ggccttcaag gctgaagggt atgcttgtgc cactctgaaa atctctttat tttatgtcat      3600
+     ggtgagttag gccagttttc tttgtattac tggattattc aactgaatgc ctttcccaga      3660
+     gaatgaaatg caaagattgg agtcaccata gtttgggaga aaggaaggct gataactcaa      3720
+     ccttatttta ttctgactgc taaacagaat tggaaactaa catcatcctc aggtaacaga      3780
+     taaaggccct ctttcccatt cataggaagc aattttggag gtggtggaag ctacaatgat      3840
+     tttgggaatt acaacaatca gtcttcaaat tttggaccca tgaagggagg aaattttgga      3900
+     ggcagaagct ctggccccta tggcggtgga ggccaatact ttgcaaaacc acgaaaccaa      3960
+     ggtatggtat ctatgtaatt ttggataatg tcaaaagagt gtctgtagct actgctggga      4020
+     agaaagccct ttaactgcta tgtctgggca gcaaaacgtt tatagtttag aaccttcaga      4080
+     aagtgataat ttgatcacaa attagaaaaa tcatgggacc tctttaccac ctcccttgta      4140
+     gtagggccat ttttaaatgg ccagacactt gaatttaact tttattatcc caaatatgaa      4200
+     aacattactg ttggcacttt gaaactttaa aagaaaaatt gtacttttca ggtggctatg      4260
+     gcggttccag cagcagcagt agctatggca gtggcagaag attttaatta ggtaagtaag      4320
+     cacctttttg tgtgttgaca taatttttta aattgctgat gaacccaata accctaatgt      4380
+     agctgagcag tgcaacatag ttaacattat aattgcagta attgtggata taaagttaat      4440
+     attcagatca gcaaaatttg tgggaaacaa acttgatatt ggattgtagc cttgagtctt      4500
+     aatatgttta gattaacaac tctattccat attgttcaac aggaaacaaa gcttagcagg      4560
+     agaggagagc cagagaagtg acagggaagc tacaggttac aacagatttg tgaactcagc      4620
+     caagcacagt ggtggcaggg cctagctgct acaaagaaga catgttttag acaaatactc      4680
+     atgtgtatgg gcaaaaaact cgaggactgt atttgtgact aattgtataa caggttattt      4740
+     tagtttctgt tctgtggaaa gtgtaaagca ttccaacaaa gggttttaat gtagattttt      4800
+     ttttttgcac cccatgctgt tgattgctaa atgtaacagt ctgatcgtga cgctgaataa      4860
+     atgtcttttt tttaatgtgc tgtgtaaagt tagtctactc ttaagccatc ttggtaaatt      4920
+     tccccaacag tgtgaagtta gaattccttc agggtgatgc caggttctat ttggaattta      4980
+     tatacaacct gcttgggtgg agaagccatt gtcttcggaa accttggtgt agttgaactg      5040
+     atagttactg ttgtgacctg aagttcacca ttaaaaggga ttacccaagc aaaatcatgg      5100
+     aatggttata aaagtgattg ttggcacatc ctatgcaata tatctaaatt gaataatggt      5160
+     accagataaa attatagatg ggaatgaagc ttgtgtatcc attatcatgt gtaatcaata      5220
+     aacgatttaa ttctcttgaa tgaaatgaca actgtatgga tttgggactg gcagagattt      5280
+     ggactttccc tacccactcc ccctgataat aatgttgaat gcttctatca caattcaagt      5340
+     tcaaagctct gctagggaat agaaacta                                         5368
+//
+ID   HSRNPA1    standard; RNA; HUM; 1747 BP.
+XX
+AC   X06747;
+XX
+SV   X06747.1
+XX
+DT   08-JUN-1988 (Rel. 16, Created)
+DT   12-SEP-1993 (Rel. 36, Last updated, Version 2)
+XX
+DE   Human hnRNP core protein A1
+XX
+KW   hnRNP A1 protein; ribonucleoprotein.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [1]
+RP   1-1747
+RA   Riva S.;
+RT   ;
+RL   Submitted (26-JAN-1988) to the EMBL/GenBank/DDBJ databases.
+RL   Riva S., Istituto Di Genetica Biochimica, Ed Evoluzionistica - CNR, Via
+RL   Abbiategrasso 207, 27100 Pavia, Italy.
+XX
+RN   [2]
+RP   1-1747
+RX   MEDLINE; 88233978.
+RA   Buvoli M., Biamonti G., Ghetti A., Riva S., Bassi M.T., Horandi C.;
+RT   "cDNA cloning of human hnRNP protein A1 reveals the existence of multiple
+RT   mRNA isoforms.";
+RL   Nucleic Acids Res. 16:3751-3770(1988).
+XX
+DR   SWISS-PROT; P09651; ROA1_HUMAN.
+XX
+CC   pRP15 sequence encodes the protein A1 type alpha isoform,
+CC   pRP12 (truncated at the 5' end, respectively) the beta isoform
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1747
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT                   /tissue_type="fibroblast"
+FT                   /clone_lib="lambdagt11"
+FT                   /clone="pRP15"
+FT   CDS             86..1048
+FT                   /db_xref="SWISS-PROT:P09651"
+FT                   /note="protein A1-alpha (AA 1-320)"
+FT                   /protein_id="CAA29922.1"
+FT                   /translation="MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVV
+FT                   MRDPNTKRSRGFGFVTYATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTV
+FT                   KKIFVGGIKEDTEEHHLRDYFEQFGKIEVIEIMTDRGSGKKKGFAFVTFDDHDSVDKIV
+FT                   IQKYHTVNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNF
+FT                   SGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNF
+FT                   GGRSSGPYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF"
+FT   misc_feature    209..209
+FT                   /note="5' end of clone pRP12"
+FT   variation       467..469
+FT                   /note="uau (Tyr) is uuu (Phe) in pRP12 (protein A1-beta)"
+FT   variation       521..523
+FT                   /note="agg (Arg) is aag (Lys) in pRP12 (protein A1-beta)"
+FT   misc_feature    1365..1370
+FT                   /note="polyA signal"
+FT   misc_feature    1726..1731
+FT                   /note="polyA signal"
+FT   polyA_site      1747..1747
+FT                   /note="polyA site"
+XX
+SQ   Sequence 1747 BP; 512 A; 300 C; 466 G; 469 T; 0 other;
+     cgaaaggtta caaaggcaaa gccccttttc tgcccgtgga cgccgccgaa gaagcatcgt        60
+     taaagtctct cttcaccctg ccgtcatgtc taagtcagag tctcctaaag agcccgaaca       120
+     gctgaggaag ctcttcattg gagggttgag ctttgaaaca actgatgaga gcctgaggag       180
+     ccattttgag caatggggaa cgctcacgga ctgtgtggta atgagagatc caaacaccaa       240
+     gcgctctagg ggctttgggt ttgtcacata tgccactgtg gaggaggtgg atgcagctat       300
+     gaatgcaagg ccacacaagg tggatggaag agttgtggaa ccaaagagag ctgtctccag       360
+     agaagattct caaagaccag gtgcccactt aactgtgaaa aagatatttg ttggtggcat       420
+     taaagaagac actgaagaac atcacctaag agattatttt gaacagtttg gaaaaattga       480
+     agtgattgaa atcatgactg accgaggcag tggcaagaaa aagggctttg cctttgtaac       540
+     ctttgacgac catgactccg tggataagat tgtcattcag aaataccata ctgtgaatgg       600
+     ccacaactgt gaagttagaa aagccctgtc aaagcaagag atggctagtg cttcatccag       660
+     ccaaagaggt cgaagtggtt ctggaaactt tggtggtggt cgtggaggtg gtttcggtgg       720
+     gaatgacaac ttcggtcgtg gaggaaactt cagtggtcgt ggtggctttg gtggcagccg       780
+     tggtggtggt ggatatggtg gcagtgggga tggctataat ggatttggca atgatggaag       840
+     caattttgga ggtggtggaa gctacaatga ttttgggaat tacaacaatc agtcttcaaa       900
+     ttttggaccc atgaagggag gaaattttgg aggcagaagc tctggcccct atggcggtgg       960
+     aggccaatac tttgcaaaac cacgaaacca aggtggctat ggcggttcca gcagcagcag      1020
+     tagctatggc agtggcagaa gattttaatt aggaaacaaa gcttagcagg agaggagagc      1080
+     cagagaagtg acagggaagc tacaggttac aacagatttg tgaactcagc caagcacagt      1140
+     ggtggcaggg cctagctgct acaaagaaga catgttttag acaaatactc atgtgtatgg      1200
+     gcaaaaaact cgaggactgt atttgtgact aattgtataa caggttattt tagtttctgt      1260
+     tctgtggaaa gtgtaaagca ttccaacaaa gggttttaat gtagattttt tttttgcacc      1320
+     ccatgctgtt gattgctaaa tgtaacagtc tgatcgtgac gctgaataaa tgtctttttt      1380
+     ttaatgtgct gtgtaaagtt agtctactct taagccatct tggtaaattt ccccaacagt      1440
+     gtgaagttag aattccttca gggtgatgcc aggttctatt tggaatttat atacaacctg      1500
+     cttgggtgga gaagccattg tcttcggaaa ccttggtgta gttgaactga tagttactgt      1560
+     tgtgacctga agttcaccat taaaagggat tacccaagca aaatcatgga atggttataa      1620
+     aagtgattgt tggcacatcc tatgcaatat atctaaattg aataatggta ccagataaaa      1680
+     ttatagatgg gaatgaagct tgtgtatcca ttatcatgtg taatcaataa acgatttaat      1740
+     tctcttg                                                                1747
+//
+ID   HSUPIR1    standard; RNA; HUM; 924 BP.
+XX
+AC   X04347;
+XX
+SV   X04347.1
+XX
+DT   18-NOV-1986 (Rel. 10, Created)
+DT   12-JUL-1995 (Rel. 44, Last updated, Version 3)
+XX
+DE   Human liver mRNA fragment DNA binding protein UPI homologue
+DE   (C-terminus)
+XX
+KW   DNA binding protein.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; Eutheria;
+OC   Primates; Catarrhini; Hominidae; Homo.
+XX
+RN   [1]
+RP   1-924
+RX   MEDLINE; 87053868.
+RA   Riva S., Morandi C., Tsoulfas P., Pandolfo M., Biamonti G., Merrill B.,
+RA   Williams K.R., Multhaup G., Beyreuther K., Werr H., Heinrich B.,
+RA   Schaefer K.P.;
+RT   "Mammalian single-stranded DNA binding protein UP I is derived from the
+RT   hnRNP cor protein A1";
+RL   EMBO J. 5:2267-2273(1986).
+XX
+DR   SWISS-PROT; P09651; ROA1_HUMAN.
+XX
+CC   Pos. 1-216 is homologous to UPI (aa 126-195); pos. 184-273 is
+CC   homologous to V8 protease peptide AI/II (aa 185-214)
+XX
+CC   Data kindly reviewed (08-NOV-1986) by K. Beyreuther
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..924
+FT                   /db_xref="taxon:9606"
+FT                   /organism="Homo sapiens"
+FT   CDS             <1..591
+FT                   /codon_start=1
+FT                   /db_xref="SWISS-PROT:P09651"
+FT                   /note="ORF protein; C-terminal (aa 125-319; 196aa)"
+FT                   /protein_id="CAA27874.1"
+FT                   /translation="FEQYGKIEVIEIMTDPGSGKKRGFAFVTFDDHDSVDKIVIQKYHT
+FT                   VNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNFSGRGGF
+FT                   GGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNFGGRSSG
+FT                   PYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF"
+FT   misc_feature    208..210
+FT                   /note="dimethylated arginin"
+FT   polyA_signal    908..913
+FT                   /note="pot. polyA signal"
+FT   polyA_site      924..924
+FT                   /note="polyA site"
+XX
+SQ   Sequence 924 BP; 262 A; 148 C; 266 G; 248 T; 0 other;
+     tttgaacagt atggaaaaat tgaagtgatt gaaatcatga ctgacccagg cagtggcaag        60
+     aaaaggggct ttgcctttgt aacctttgac gaccatgact ccgtggataa gattgtcatt       120
+     cagaaatacc atactgtgaa tggccacaac tgtgaagtta gaaaagccct gtcaaagcaa       180
+     gagatggcta gtgcttcatc cagccaaaga ggtcgaagtg gttctggaaa ctttggtggt       240
+     ggtcgtggag gtggtttcgg tgggaatgac aacttcggtc gtggaggaaa cttcagtggt       300
+     cgtggtggct ttggtggcag ccgtggtggt ggtggatatg gtggcagtgg ggatggctat       360
+     aatggatttg gcaatgatgg aagcaatttt ggaggtggtg gaagctacaa tgattttggg       420
+     aattacaaca atcagtcttc aaattttgga cccatgaagg gaggaaattt tggaggcaga       480
+     agctctggcc cctatggcgg tggaggccaa tactttgcaa aaccacgaaa ccaaggtggc       540
+     tatggcggtt ccagcagcag cagtagctat ggcagtggca gaagatttta attaggaaac       600
+     aagcttggca ggagaggaga gccagagaag tgacagggaa gctacaggtt acaacagatt       660
+     tgtgaactca gccaagcaca gtggtggcag ggcctagctg ctacaaagaa gacatgtttt       720
+     agacaaatac tcatgtgtat gggcaaaaaa ctcgaggact gtatttgtga ctaattgtat       780
+     aacaggttat tttagtttct gttctgtgga aagtgaaagc attccaacaa agggttttaa       840
+     tgtagatttt ttttttttgc accccatgct gttgattgct aaatgtaaca gtctgatcgt       900
+     gacgctgaat aaatgtcttt tttt                                              924
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/roa1.genbank
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/roa1.genbank	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/roa1.genbank	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,91 @@
+LOCUS       AI129902       37 bp    mRNA            EST       27-OCT-1998
+DEFINITION  qc41b07.x1 Soares_pregnant_uterus_NbHPU Homo sapiens cDNA clone
+            IMAGE:1712149 3' similar to SW:ROA1_SCHAM P21522 HETEROGENEOUS
+            NUCLEAR RIBONUCLEOPROTEIN A1, A2/B1 HOMOLOG. ;contains MSR1.b2 MSR1
+            repetitive element ;, mRNA sequence.
+ACCESSION   AI129902
+NID         g3598416
+VERSION     AI129902.1  GI:3598416
+KEYWORDS    EST.
+SOURCE      human.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia;
+            Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 37)
+  AUTHORS   NCI-CGAP http://www.ncbi.nlm.nih.gov/ncicgap.
+  TITLE     National Cancer Institute, Cancer Genome Anatomy Project (CGAP),
+            Tumor Gene Index
+  JOURNAL   Unpublished (1997)
+COMMENT     On May 8, 1995 this sequence version replaced gi:800643
+
+            Contact: Robert Strausberg, Ph.D.
+            Tel: (301) 496-1550
+            Email: Robert_Strausberg at nih.gov
+            This clone is available royalty-free through LLNL ; contact the
+            IMAGE Consortium (info at image.llnl.gov) for further information. 
+            Trace considered overall poor quality
+            Insert Length: 525   Std Error: 0.00
+            Seq primer: -40m13 fwd. ET from Amersham
+            High quality sequence stop: 1.
+FEATURES             Location/Qualifiers
+     source          1..37
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /clone="IMAGE:1712149"
+                     /clone_lib="Soares_pregnant_uterus_NbHPU"
+                     /sex="female"
+                     /dev_stage="adult"
+                     /lab_host="DH10B"
+                     /note="Organ: uterus; Vector: pT7T3-Pac; Site_1: Not I;
+                     Site_2: Eco RI; 1st strand cDNA was primed with a Not I -
+                     oligo(dT) primer [5'
+                     AACTGGAAGAATTCGCGGCCGCCTTTTTTTTTTTTTTTTTT 3'],
+                     double-stranded cDNA was ligated to Eco RI adaptors
+                     (Pharmacia), digested with Not I and cloned into the Not I
+                     and Eco RI sites of the modified pT7T3 vector. Library
+                     went through one round of normalization.  Library
+                     constructed by M. Fatima Bonaldo."
+BASE COUNT        5 a     28 c      2 g      2 t
+ORIGIN      
+        1 ctccgcgcca actcccccca cccccccccc acacccc
+//
+LOCUS       BAB68554                 141 aa            linear   VRT 11-APR-2002
+DEFINITION  alpha D-globin [Aldabrachelys elephantina].
+ACCESSION   BAB68554
+PID         g15824047
+VERSION     BAB68554.1  GI:15824047
+DBSOURCE    accession AB072353.1
+KEYWORDS    .
+SOURCE      Aldabra giant tortoise.
+  ORGANISM  Aldabrachelys elephantina
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Testudines; Cryptodira; Testudinoidea; Testudinidae; Aldabrachelys.
+REFERENCE   1
+  AUTHORS   Shishikura,F.
+  TITLE     The primary structure of hemoglobin D from the Aldabra giant
+            tortoise, Geochelone gigantea
+  JOURNAL   Zoolog. Sci. 19, 197-206 (2002)
+REFERENCE   2  (residues 1 to 141)
+  AUTHORS   Shishikura,F.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (29-SEP-2001) Fumio Shishikura, Nihon University School
+            of Medicine, Biology; Oyaguchi-kamimachi, 30-1, Itabashi-ku, Tokyo
+            173-8610, Japan (E-mail:fshishi at med.nihon-u.ac.jp,
+            Tel:81-3-3972-8111(ex.2291), Fax:81-3-3972-0027)
+FEATURES             Location/Qualifiers
+     source          1..141
+                     /organism="Aldabrachelys elephantina"
+                     /db_xref="taxon:167804"
+                     /note="synonym:Dipsochelys dussumieri~synonym:Geochelone
+                     gigantea"
+     Protein         1..141
+                     /product="alpha D-globin"
+     CDS             1..141
+                     /coded_by="join(AB072353.1:1..92,AB072353.1:307..511,
+                     AB072353.1:739..>864)"
+                     /note="hemoglobin D"
+ORIGIN      
+        1 mlteddkqli qhvwekvleh qedfgaeale rmfivypstk tyfphfdlhh dseqirhhgk
+       61 kvvgalgdav khidnlsatl selsnlhayn lrvdpvnfkl lshcfqvvlg ahlgreytpq
+      121 vqvaydkfla avsavlaeky r
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/roa1.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/roa1.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/roa1.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,257 @@
+ID   ROA1_HUMAN     STANDARD;      PRT;   371 AA.
+AC   P09651;
+DT   01-MAR-1989 (Rel. 10, Created)
+DT   01-AUG-1990 (Rel. 15, Last sequence update)
+DT   01-NOV-1997 (Rel. 35, Last annotation update)
+DE   HETEROGENEOUS NUCLEAR RIBONUCLEOPROTEIN A1 (HELIX-DESTABILIZING
+DE   PROTEIN) (SINGLE-STRAND BINDING PROTEIN) (HNRNP CORE PROTEIN A1).
+GN   HNRPA1.
+OS   Homo sapiens (Human).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia;
+OC   Eutheria; Primates; Catarrhini; Hominidae; Homo.
+RN   [1]
+RP   SEQUENCE OF 1-250 AND 303-371 FROM N.A.
+RC   TISSUE=LIVER;
+RX   MEDLINE; 89342435.
+RA   BIAMONTI G., BUVOLI M., BASSI M.T., MORANDI C., COBIANCHI F., RIVA S.;
+RT   "Isolation of an active gene encoding human hnRNP protein A1.
+RT   Evidence for alternative splicing.";
+RL   J. Mol. Biol. 207:491-503(1989).
+RN   [2]
+RP   SEQUENCE OF 1-250 AND 303-371 FROM N.A.
+RC   TISSUE=FIBROBLAST;
+RX   MEDLINE; 88233978.
+RA   BUVOLI M., BIAMONTI G., GHETTI A., RIVA S., BASSI M.T., HORANDI C.;
+RT   "cDNA cloning of human hnRNP protein A1 reveals the existence of
+RT   multiple mRNA isoforms.";
+RL   Nucleic Acids Res. 16:3751-3770(1988).
+RN   [3]
+RP   SEQUENCE OF 1-250 AND 303-371 FROM N.A.
+RC   TISSUE=LUNG;
+RA   KNUDSEN S.M., LEFFERS H.;
+RL   Submitted (JUN-1994) to the EMBL/GenBank/DDBJ databases.
+RN   [4]
+RP   SEQUENCE OF 124-250 AND 303-371 FROM N.A.
+RC   TISSUE=LIVER;
+RX   MEDLINE; 87053868.
+RA   RIVA S., MORANDI C., TSOULFAS P., PANDOLFO M., BIAMONTI G.,
+RA   MERRILL B., WILLIAMS K.R., MULTHAUP G., BEYREUTHER K., WERR H.,
+RA   HEINRICH B., SCHAEFER K.P.;
+RT   "Mammalian single-stranded DNA binding protein UP I is derived from
+RT   the hnRNP core protein A1.";
+RL   EMBO J. 5:2267-2273(1986).
+RN   [5]
+RP   SEQUENCE OF 251-302 FROM N.A.
+RX   MEDLINE; 90214633.
+RA   BUVOLI M., COBIANCHI F., BESTAGNO M.G., MANGIAROTTI A., BASSI M.T.,
+RA   BIAMONTI G., RIVA S.;
+RT   "Alternative splicing in the human gene for the core protein A1
+RT   generates another hnRNP protein.";
+RL   EMBO J. 9:1229-1235(1990).
+RN   [6]
+RP   NUCLEAR LOCALIZATION DOMAIN.
+RX   MEDLINE; 95247808.
+RA   SIOMI H., DREYFUSS G.;
+RT   "A nuclear localization domain in the hnRNP A1 protein.";
+RL   J. Cell Biol. 129:551-560(1995).
+RN   [7]
+RP   NUCLEAR LOCALIZATION DOMAIN, AND NUCLEAR EXPORT.
+RX   MEDLINE; 96067639.
+RA   MICHAEL W.M., CHOI M., DREYFUSS G.;
+RT   "A nuclear export signal in hnRNP A1: a signal-mediated, temperature-
+RT   dependent nuclear protein export pathway.";
+RL   Cell 83:415-422(1995).
+RN   [8]
+RP   NUCLEAR LOCALIZATION DOMAIN.
+RX   MEDLINE; 95286702.
+RA   WEIGHARDT F., BIAMONTI G., RIVA S.;
+RT   "Nucleo-cytoplasmic distribution of human hnRNP proteins: a search
+RT   for the targeting domains in hnRNP A1.";
+RL   J. Cell Sci. 108:545-555(1995).
+RN   [9]
+RP   3D-STRUCTURE MODELING OF 106-189.
+RX   MEDLINE; 91099515.
+RA   GHETTI A., BOLOGNESI M., COBIANCHI F., MORANDI C.;
+RT   "Modeling by homology of RNA binding domain in A1 hnRNP protein.";
+RL   FEBS Lett. 277:272-276(1990).
+RN   [10]
+RP   X-RAY CRYSTALLOGRAPHY (1.75 ANGSTROMS) OF 8-180.
+RX   MEDLINE; 97307256.
+RA   SHAMOO Y., KRUEGER U., RICE L.M., WILLIAMS K.R., STEITZ T.A.;
+RT   "Crystal structure of the two RNA binding domains of human hnRNP A1
+RT   at 1.75-A resolution.";
+RL   Nat. Struct. Biol. 4:215-222(1997).
+RN   [11]
+RP   X-RAY CRYSTALLOGRAPHY (1.9 ANGSTROMS) OF 6-181.
+RX   MEDLINE; 97277240.
+RA   XU R.M., JOKHAN L., CHENG X., MAYEDA A., KRAINER A.R.;
+RT   "Crystal structure of human UP1, the domain of hnRNP A1 that contains
+RT   two RNA-recognition motifs.";
+RL   Structure 5:559-570(1997).
+CC   -!- FUNCTION: INVOLVED IN THE PACKAGING OF PRE-MRNA INTO HNRNP
+CC       PARTICLES, TRANSPORT OF POLY-A MRNA FROM THE NUCLEUS TO THE
+CC       CYTOPLASM AND MAY MODULATE SPLICE SITE SELECTION.
+CC   -!- SUBCELLULAR LOCATION: NUCLEAR. SHUTTLES CONTINUOUSLY BETWEEN THE
+CC       NUCLEUS AND THE CYTOPLASM ALONG WITH MRNA. COMPONENT OF
+CC       RIBONUCLEOSOMES.
+CC   -!- ALTERNATIVE PRODUCTS: A1-A (SHOWN HERE) AND A1-B ARE OBTAINED BY
+CC       ALTERNATIVE SPLICING OF THE SAME GENE. A1-A IS TWENTY TIMES MORE
+CC       ABUNDANT THEN A1-B.
+CC   -!- SIMILARITY: BELONGS TO THE A/B GROUP OF HNRNP, WHICH ARE BASIC AND
+CC       GLY-RICH PROTEINS.
+CC   -!- SIMILARITY: CONTAINS 2 RNA RECOGNITION MOTIFS (RNP).
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; X12671; CAA31191.1; -.
+DR   EMBL; X06747; CAA29922.1; ALT_SEQ.
+DR   EMBL; X04347; CAA27874.1; -.
+DR   EMBL; X79536; CAA56072.1; -.
+DR   PIR; S04617; S04617.
+DR   PIR; A24894; A24894.
+DR   PIR; S02061; S02061.
+DR   PDB; 1HA1; 15-MAY-97.
+DR   PDB; 1UP1; 17-SEP-97.
+DR   AARHUS/GHENT-2DPAGE; 207; NEPHGE.
+DR   AARHUS/GHENT-2DPAGE; 2114; NEPHGE.
+DR   AARHUS/GHENT-2DPAGE; 3612; NEPHGE.
+DR   MIM; 164017; -.
+DR   PFAM; PF00076; rrm; 2.
+DR   PROSITE; PS00030; RNP_1; 2.
+KW   Nuclear protein; RNA-binding; Repeat; Ribonucleoprotein;
+KW   Methylation; Alternative splicing; 3D-structure.
+FT   INIT_MET      0      0
+FT   DOMAIN        3     93       GLOBULAR A DOMAIN.
+FT   DOMAIN       94    184       GLOBULAR B DOMAIN.
+FT   DOMAIN      194    371       GLY-RICH.
+FT   DOMAIN       15     20       RNA-BINDING (RNP2) (BY SIMILARITY).
+FT   DOMAIN       54     61       RNA-BINDING (RNP1).
+FT   DOMAIN      106    111       RNA-BINDING (RNP2) (BY SIMILARITY).
+FT   DOMAIN      145    152       RNA-BINDING (RNP1).
+FT   DOMAIN      217    239       RNA-BINDING RGG-BOX.
+FT   DOMAIN      319    356       NUCLEAR TARGETING SEQUENCE (M9).
+FT   MOD_RES     193    193       METHYLATION (BY SIMILARITY).
+FT   VARSPLIC    251    302       MISSING (IN FORM A1-A).
+FT   MUTAGEN     325    325       G->A: NO NUCLEAR IMPORT NOR EXPORT.
+FT   MUTAGEN     326    326       P->A: NO NUCLEAR IMPORT NOR EXPORT.
+FT   MUTAGEN     333    334       GG->LL: NORMAL NUCLEAR IMPORT AND EXPORT.
+FT   CONFLICT    139    139       R -> P (IN REF. 4).
+SQ   SEQUENCE   371 AA;  38715 MW;  ECBA15FB CRC32;
+     SKSESPKEPE QLRKLFIGGL SFETTDESLR SHFEQWGTLT DCVVMRDPNT KRSRGFGFVT
+     YATVEEVDAA MNARPHKVDG RVVEPKRAVS REDSQRPGAH LTVKKIFVGG IKEDTEEHHL
+     RDYFEQYGKI EVIEIMTDRG SGKKRGFAFV TFDDHDSVDK IVIQKYHTVN GHNCEVRKAL
+     SKQEMASASS SQRGRSGSGN FGGGRGGGFG GNDNFGRGGN FSGRGGFGGS RGGGGYGGSG
+     DGYNGFGNDG GYGGGGPGYS GGSRGYGSGG QGYGNQGSGY GGSGSYDSYN NGGGRGFGGG
+     SGSNFGGGGS YNDFGNYNNQ SSNFGPMKGG NFGGRSSGPY GGGGQYFAKP RNQGGYGGSS
+     SSSSYGSGRR F
+//
+ID   A2S3_RAT       STANDARD;      PRT;   913 AA.
+AC   Q8R2H7; Q8R2H6; Q8R4G3;
+DT   28-FEB-2003 (Rel. 41, Created)
+DT   15-MAR-2004 (Rel. 43, Last sequence update)
+DT   15-MAR-2004 (Rel. 43, Last annotation update)
+DE   Amyotrophic lateral sclerosis 2 chromosomal region candidate gene
+DE   protein 3 homolog (GABA-A receptor interacting factor-1) (GRIF-1) (O-
+DE   GlcNAc transferase-interacting protein of 98 kDa).
+GN   ALS2CR3 OR GRIF1 OR OIP98.
+OS   Rattus norvegicus (Rat).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Rattus.
+OX   NCBI_TaxID=10116;
+RN   [1]
+RP   SEQUENCE FROM N.A. (ISOFORMS 1 AND 2), SUBCELLULAR LOCATION, AND
+RP   INTERACTION WITH GABA-A RECEPTOR.
+RC   TISSUE=Brain;
+RX   MEDLINE=22162448; PubMed=12034717;
+RA   Beck M., Brickley K., Wilkinson H.L., Sharma S., Smith M.,
+RA   Chazot P.L., Pollard S., Stephenson F.A.;
+RT   "Identification, molecular cloning, and characterization of a novel
+RT   GABAA receptor-associated protein, GRIF-1.";
+RL   J. Biol. Chem. 277:30079-30090(2002).
+RN   [2]
+RP   REVISIONS TO 579 AND 595-596, AND VARIANTS VAL-609 AND PRO-820.
+RA   Stephenson F.A.;
+RL   Submitted (FEB-2003) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SEQUENCE FROM N.A. (ISOFORM 3), INTERACTION WITH O-GLCNAC TRANSFERASE,
+RP   AND O-GLYCOSYLATION.
+RC   STRAIN=Sprague-Dawley; TISSUE=Brain;
+RX   MEDLINE=22464403; PubMed=12435728;
+RA   Iyer S.P.N., Akimoto Y., Hart G.W.;
+RT   "Identification and cloning of a novel family of coiled-coil domain
+RT   proteins that interact with O-GlcNAc transferase.";
+RL   J. Biol. Chem. 278:5399-5409(2003).
+CC   -!- SUBUNIT: Interacts with GABA-A receptor and O-GlcNac transferase.
+CC   -!- SUBCELLULAR LOCATION: Cytoplasmic.
+CC   -!- ALTERNATIVE PRODUCTS:
+CC       Event=Alternative splicing; Named isoforms=3;
+CC       Name=1; Synonyms=GRIF-1a;
+CC         IsoId=Q8R2H7-1; Sequence=Displayed;
+CC       Name=2; Synonyms=GRIF-1b;
+CC         IsoId=Q8R2H7-2; Sequence=VSP_003786, VSP_003787;
+CC       Name=3;
+CC         IsoId=Q8R2H7-3; Sequence=VSP_003788;
+CC   -!- PTM: O-glycosylated.
+CC   -!- SIMILARITY: TO HUMAN OIP106.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; AJ288898; CAC81785.2; -.
+DR   EMBL; AJ288898; CAC81786.2; -.
+DR   EMBL; AF474163; AAL84588.1; -.
+DR   GO; GO:0005737; C:cytoplasm; IEP.
+DR   GO; GO:0005634; C:nucleus; IDA.
+DR   GO; GO:0005886; C:plasma membrane; IEP.
+DR   GO; GO:0005478; F:intracellular transporter activity; NAS.
+DR   GO; GO:0005515; F:protein binding; IPI.
+DR   GO; GO:0005102; F:receptor binding; IPI.
+DR   GO; GO:0006836; P:neurotransmitter transport; NAS.
+DR   GO; GO:0006493; P:O-linked glycosylation; IDA.
+DR   GO; GO:0006605; P:protein targeting; IDA.
+DR   GO; GO:0006357; P:regulation of transcription from Pol II pro...; IDA.
+DR   InterPro; IPR006933; HAP1_N.
+DR   Pfam; PF04849; HAP1_N; 1.
+KW   Coiled coil; Alternative splicing; Polymorphism.
+FT   DOMAIN      134    355       COILED COIL (POTENTIAL).
+FT   DOMAIN      502    519       COILED COIL (POTENTIAL).
+FT   VARSPLIC    653    672       VATSNPGKCLSFTNSTFTFT -> ALVSHHCPVEAVRAVHP
+FT                                TRL (in isoform 2).
+FT                                /FTId=VSP_003786.
+FT   VARSPLIC    673    913       Missing (in isoform 2).
+FT                                /FTId=VSP_003787.
+FT   VARSPLIC    620    687       VQQPLQLEQKPAPPPPVTGIFLPPMTSAGGPVSVATSNPGK
+FT                                CLSFTNSTFTFTTCRILHPSDITQVTP -> GSAASSTGAE
+FT                                ACTTPASNGYLPAAHDLSRGTSL (in isoform 3).
+FT                                /FTId=VSP_003788.
+FT   VARIANT     609    609       E -> V.
+FT   VARIANT     820    820       S -> P.
+SQ   SEQUENCE   913 AA;  101638 MW;  D0E135DBEC30C28C CRC64;
+     MSLSQNAIFK SQTGEENLMS SNHRDSESIT DVCSNEDLPE VELVNLLEEQ LPQYKLRVDS
+     LFLYENQDWS QSSHQQQDAS ETLSPVLAEE TFRYMILGTD RVEQMTKTYN DIDMVTHLLA
+     ERDRDLELAA RIGQALLKRN HVLSEQNESL EEQLGQAFDQ VNQLQHELSK KEELLRIVSI
+     ASEESETDSS CSTPLRFNES FSLSQGLLQL DMMHEKLKEL EEENMALRSK ACHIKTETFT
+     YEEKEQKLIN DCVNELRETN AQMSRMTEEL SGKSDELLRY QEEISSLLSQ IVDLQHKLKE
+     HVIEKEELRL HLQASKDAQR QLTMELHELQ DRNMECLGML HESQEEIKEL RNKAGPSAHL
+     CFSQAYGVFA GESLAAEIEG TMRKKLSLDE ESVFKQKAQQ KRVFDTVKVA NDTRGRSVTF
+     PVLLPIPGSN RSSVIMTAKP FESGVQQTED KTLPNQGSST EVPGNSHPRD PPGLPEDSDL
+     ATALHRLSLR RQNYLSEKQF FAEEWERKLQ ILAEQEEEVS SCEALTENLA SFCTDQSETT
+     ELGSAGCLRG FMPEKLQIVK PLEGSQTLHH WQQLAQPNLG TILDPRPGVI TKGFTQMPKD
+     AVYHISDLEE DEEVGITFQV QQPLQLEQKP APPPPVTGIF LPPMTSAGGP VSVATSNPGK
+     CLSFTNSTFT FTTCRILHPS DITQVTPSSG FPSLSCGSSA GSASNTAVNS PAASYRLSIG
+     ESITNRRDST ITFSSTRSLA KLLQERGISA KVYHSPASEN PLLQLRPKAL ATPSTPPNSP
+     SQSPCSSPVP FEPRVHVSEN FLASRPAETF LQEMYGLRPS RAPPDVGQLK MNLVDRLKRL
+     GIARVVKTPV PRENGKSREA EMGLQKPDSA VYLNSGGSLL GGLRRNQSLP VMMGSFGAPV
+     CTTSPKMGIL KED
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/roa1_v2.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/roa1_v2.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/roa1_v2.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,99 @@
+ID   X79536; SV 1; linear; mRNA; STD; HUM; 1198 BP.
+XX
+AC   X79536;
+XX
+DT   03-JUN-1994 (Rel. 39, Created)
+DT   18-APR-2005 (Rel. 83, Last updated, Version 2)
+XX
+DE   H.sapiens mRNA for hnRNPcore protein A1
+XX
+KW   core protein.
+XX
+OS   Homo sapiens (human)
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; Mammalia;
+OC   Eutheria; Euarchontoglires; Primates; Haplorrhini; Catarrhini; Hominidae;
+OC   Homo.
+XX
+RN   [2]
+RP   1-1198
+RA   Leffers H.;
+RT   ;
+RL   Submitted (01-JUN-1994) to the EMBL/GenBank/DDBJ databases.
+RL   H. Leffers, Inst. of Medical Research Biochemistry & Danish Centre for
+RL   Human Genome Research, Ole Worms Alle 170, Aarhus Univ., 8000 Aarhus C,
+RL   DENMARK
+XX
+RN   [3]
+RA   Knudsen S.M., Leffers H.;
+RT   "Cloning and sequencing of a splice variant of human hnRNP core protein
+RT   A1";
+RL   Unpublished.
+XX
+DR   H-InvDB; HIT000323756.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1198
+FT                   /organism="Homo sapiens"
+FT                   /mol_type="mRNA"
+FT                   /clone_lib="lambda ZapII"
+FT                   /clone="HDP1"
+FT                   /cell_line="MRC5"
+FT                   /cell_type="fibroblast"
+FT                   /tissue_type="lung"
+FT                   /db_xref="taxon:9606"
+FT   CDS             27..989
+FT                   /product="hnRNPcore protein A1"
+FT                   /db_xref="GDB:127388"
+FT                   /db_xref="GOA:P09651"
+FT                   /db_xref="HGNC:5031"
+FT                   /db_xref="InterPro:IPR000504"
+FT                   /db_xref="InterPro:IPR012677"
+FT                   /db_xref="PDB:1HA1"
+FT                   /db_xref="PDB:1L3K"
+FT                   /db_xref="PDB:1PGZ"
+FT                   /db_xref="PDB:1PO6"
+FT                   /db_xref="PDB:1U1K"
+FT                   /db_xref="PDB:1U1L"
+FT                   /db_xref="PDB:1U1M"
+FT                   /db_xref="PDB:1U1N"
+FT                   /db_xref="PDB:1U1O"
+FT                   /db_xref="PDB:1U1P"
+FT                   /db_xref="PDB:1U1Q"
+FT                   /db_xref="PDB:1U1R"
+FT                   /db_xref="PDB:1UP1"
+FT                   /db_xref="PDB:2UP1"
+FT                   /db_xref="UniProtKB/Swiss-Prot:P09651"
+FT                   /protein_id="CAA56072.1"
+FT                   /translation="MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVV
+FT                   MRDPNTKRSRGFGFVTYATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTV
+FT                   KKIFVGGIKEDTEEHHLRDYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFDDHDSVDKIV
+FT                   IQKYHTVNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNF
+FT                   SGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNF
+FT                   GGRSSGPYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF"
+FT   misc_feature    995
+FT                   /note="alternative splice site"
+FT   polyA_signal    1180..1185
+XX
+SQ   Sequence 1198 BP; 334 A; 216 C; 336 G; 312 T; 0 other;
+     ttaaagtctc tcttcaccct gccgtcatgt ctaagtcaga gtctcctaaa gagcccgaac        60
+     agctgaggaa gctcttcatt ggagggttga gctttgaaac aactgatgag agcctgagga       120
+     gccattttga gcaatgggga acgctcacgg actgtgtggt aatgagagat ccaaacacca       180
+     agcgctctag gggctttggg tttgtcacat atgccactgt ggaggaggtg gatgcagcta       240
+     tgaatgcaag gccacacaag gtggatggaa gagttgtgga accaaagaga gctgtctcca       300
+     gagaagattc tcaaagacca ggtgcccact taactgtgaa aaagatattt gttggtggca       360
+     ttaaagaaga cactgaagaa catcacctaa gagattattt tgaacagtat ggaaaaattg       420
+     aagtgattga aatcatgact gaccgaggca gtggcaagaa aaggggcttt gcctttgtaa       480
+     cctttgacga ccatgactcc gtggataaga ttgtcattca gaaataccat actgtgaatg       540
+     gccacaactg tgaagttaga aaagccctgt caaagcaaga gatggctagt gcttcatcca       600
+     gccaaagagg tcgaagtggt tctggaaact ttggtggtgg tcgtggaggt ggtttcggtg       660
+     ggaatgacaa cttcggtcgt ggaggaaact tcagtggtcg tggtggcttt ggtggcagcc       720
+     gtggtggtgg tggatatggt ggcagtgggg atggctataa tggatttggc aatgatggaa       780
+     gcaattttgg aggtggtgga agctacaatg attttgggaa ttacaacaat cagtcttcaa       840
+     attttggacc catgaaggga ggaaattttg gaggcagaag ctctggcccc tatggcggtg       900
+     gaggccaata ctttgcaaaa ccacgaaacc aaggtggcta tggcggttcc agcagcagca       960
+     gtagctatgg cagtggcaga agattttaat tagggaggag tctgctacta gtcttatcag      1020
+     ctcttaaaaa cagaaactca tctgtccaag ttcgtggcag aaaggaacgt ccttgtgaag      1080
+     acctttatct gagccactgt acttcgttat cacgccatgc agtttacatg agctgttctg      1140
+     cagctcgaaa ttccattttg tgaatgggtt ttttttttta ataaactgta tttaactt        1198
+//
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sbay_c545-yeast.BLASTZ.PSL
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sbay_c545-yeast.BLASTZ.PSL	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sbay_c545-yeast.BLASTZ.PSL	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,39 @@
+916	617	0	0	13	69	10	86	+	I	230203	139870	141472	sbay_c545	28791	12394	14013	24	45,10,9,6,334,104,93,6,6,12,18,119,6,32,376,23,33,133,14,69,12,7,31,35,	139870,139920,139930,139939,139947,140293,140400,140493,140501,140507,140525,140544,140663,140681,140713,141089,141115,141154,141287,141304,141373,141385,141393,141437,	12394,12439,12451,12462,12468,12802,12906,13006,13012,13020,13032,13050,13170,13176,13231,13613,13636,13669,13832,13846,13927,13940,13947,13978,
+171	119	0	0	2	8	2	5	+	I	230203	174624	174922	sbay_c545	28791	27293	27588	5	125,24,26,5,110,	174624,174749,174776,174807,174812,	27293,27421,27445,27471,27478,
+102	56	0	0	1	1	2	9	-	I	230203	123813	123972	sbay_c545	28791	27301	27468	4	116,4,19,19,	106231,106347,106351,106371,	27301,27421,27430,27449,
+65	21	0	0	0	0	1	4	+	II	813140	68257	68343	sbay_c545	28791	10029	10119	2	7,79,	68257,68264,	10029,10040,
+922	582	0	0	15	73	14	100	+	II	813140	84860	86437	sbay_c545	28791	12373	13977	30	55,31,320,13,107,47,11,33,11,11,4,77,12,67,29,135,232,36,8,16,7,10,89,18,7,56,13,12,7,30,	84860,84915,84956,85286,85301,85411,85458,85472,85508,85521,85532,85538,85615,85631,85698,85727,85865,86097,86145,86165,86181,86191,86201,86290,86311,86318,86374,86387,86399,86407,	12373,12435,12466,12786,12799,12906,12956,12967,13000,13011,13029,13033,13114,13126,13202,13234,13369,13622,13658,13666,13683,13690,13702,13804,13822,13846,13908,13927,13940,13947,
+79	32	0	0	1	1	1	2	-	II	813140	166297	166409	sbay_c545	28791	10038	10151	3	86,14,11,	646731,646817,646832,	10038,10126,10140,
+423	272	0	0	0	0	2	9	-	III	315339	189542	190237	sbay_c545	28791	5293	5997	3	131,508,56,	125102,125233,125741,	5293,5427,5941,
+443	195	0	0	5	35	1	2	+	IV	1531929	366737	367410	sbay_c545	28791	902	1542	7	341,4,22,42,11,21,197,	366737,367090,367102,367124,367169,367185,367213,	902,1243,1247,1271,1313,1324,1345,
+82	31	0	0	0	0	2	13	+	IV	1531929	778726	778839	sbay_c545	28791	10019	10145	3	11,14,88,	778726,778737,778751,	10019,10031,10057,
+89	37	0	0	2	4	0	0	-	IV	1531929	1290928	1291058	sbay_c545	28791	10031	10157	3	84,13,29,	240871,240958,240972,	10031,10115,10128,
+918	588	0	0	18	108	16	114	-	IV	1531929	44092	45706	sbay_c545	28791	12351	13971	35	23,7,13,43,30,135,10,191,52,15,17,11,48,9,53,6,20,146,15,157,8,158,5,9,17,20,13,14,125,31,40,5,18,16,26,	1486223,1486247,1486259,1486284,1486333,1486363,1486498,1486510,1486716,1486768,1486786,1486804,1486817,1486865,1486877,1486932,1486941,1486961,1487107,1487122,1487279,1487297,1487455,1487460,1487469,1487486,1487515,1487538,1487571,1487696,1487727,1487767,1487772,1487792,1487811,	12351,12374,12381,12394,12437,12470,12607,12617,12808,12863,12878,12895,12906,12957,12966,13019,13025,13050,13205,13223,13384,13392,13555,13577,13592,13616,13636,13649,13663,13801,13846,13898,13911,13929,13945,
+81	44	0	0	1	1	0	0	-	V	576869	142910	143036	sbay_c545	28791	10033	10158	2	94,31,	433833,433928,	10033,10127,
+991	629	0	0	8	55	13	87	-	V	576869	94727	96402	sbay_c545	28791	12267	13974	22	34,42,37,84,5,371,9,198,12,108,22,21,123,16,221,27,11,145,13,65,9,47,	480467,480514,480570,480607,480698,480703,481074,481086,481285,481308,481416,481438,481464,481587,481603,481825,481852,481863,482008,482021,482086,482095,	12267,12301,12343,12394,12478,12490,12864,12873,13071,13083,13200,13230,13251,13376,13393,13614,13645,13659,13813,13841,13909,13927,
+66	15	0	0	1	2	1	8	-	VI	270148	186205	186288	sbay_c545	28791	10030	10119	3	15,7,59,	83860,83877,83884,	10030,10045,10060,
+1688	962	0	0	10	43	15	175	-	VII	1090936	628575	631268	sbay_c545	28791	6807	9632	26	219,20,125,11,15,104,51,21,212,16,24,6,13,84,47,10,336,8,959,24,11,22,20,6,22,264,	459668,459887,459908,460036,460059,460080,460187,460240,460261,460473,460489,460513,460519,460532,460625,460672,460686,461024,461033,461992,462016,462027,462049,462069,462075,462097,	6807,7027,7047,7172,7183,7198,7302,7353,7388,7602,7636,7663,7677,7692,7776,7827,7837,8173,8181,9154,9227,9241,9290,9322,9331,9368,
+298	102	0	0	3	10	0	0	-	VII	1090936	619156	619566	sbay_c545	28791	18118	18518	4	193,8,81,118,	471370,471564,471573,471662,	18118,18311,18319,18400,
+285	103	0	0	1	1	1	3	-	VII	1090936	618080	618469	sbay_c545	28791	24049	24440	3	188,172,28,	472467,472656,472828,	24049,24237,24412,
+104	62	0	0	2	10	1	7	+	VIII	562639	270904	271080	sbay_c545	28791	27336	27509	4	99,9,28,30,	270904,271005,271022,271050,	27336,27435,27444,27479,
+207	136	0	0	2	19	6	28	-	VIII	562639	316825	317187	sbay_c545	28791	27172	27543	9	20,11,30,20,7,13,155,4,83,	245452,245472,245487,245532,245552,245559,245572,245727,245731,	27172,27193,27204,27234,27257,27268,27298,27454,27460,
+84	23	0	0	0	0	2	20	-	IX	439885	105317	105424	sbay_c545	28791	10030	10157	3	15,62,30,	334461,334476,334538,	10030,10054,10127,
+462	268	0	0	4	35	8	25	+	X	745440	67814	68579	sbay_c545	28791	5244	5999	13	23,13,27,95,20,11,9,7,391,16,53,16,49,	67814,67837,67855,67891,67986,68006,68017,68026,68051,68445,68461,68514,68530,	5244,5268,5281,5308,5406,5431,5448,5458,5465,5856,5875,5933,5950,
+1108	665	0	0	5	21	15	78	+	X	745440	519403	521197	sbay_c545	28791	12123	13974	21	14,6,39,69,174,6,623,18,11,11,81,47,140,13,225,19,143,18,60,9,47,	519403,519423,519429,519468,519537,519713,519719,520344,520362,520374,520395,520476,520523,520663,520676,520901,520920,521063,521081,521141,521150,	12123,12137,12149,12197,12269,12443,12451,13074,13093,13104,13115,13199,13255,13397,13411,13639,13661,13814,13846,13909,13927,
+55	19	0	0	0	0	0	0	-	X	745440	469796	469870	sbay_c545	28791	10060	10134	1	74,	275570,	10060,
+909	581	0	0	9	54	15	90	-	X	745440	381175	382719	sbay_c545	28791	12394	13974	25	50,5,348,67,21,13,166,20,99,18,156,12,11,167,28,12,15,6,4,125,31,44,25,6,41,	362721,362773,362779,363133,363200,363229,363242,363408,363431,363530,363548,363704,363718,363733,363923,363951,363968,363983,363989,363993,364118,364149,364193,364218,364224,	12394,12444,12449,12797,12868,12889,12903,13072,13092,13200,13221,13383,13395,13406,13573,13619,13631,13647,13656,13663,13801,13846,13893,13920,13933,
+387	246	0	0	6	34	4	34	-	X	745440	250596	251263	sbay_c545	28791	26968	27635	11	95,23,19,3,6,75,15,43,16,321,17,	494177,494284,494310,494329,494336,494342,494419,494435,494478,494506,494827,	26968,27063,27086,27106,27109,27118,27193,27208,27269,27285,27618,
+69	33	0	0	0	0	0	0	-	X	745440	250471	250573	sbay_c545	28791	27802	27904	1	102,	494867,	27802,
+81	43	0	0	0	0	0	0	-	XI	666445	183812	183936	sbay_c545	28791	27301	27425	1	124,	482509,	27301,
+899	599	0	0	11	55	11	85	+	XII	1078172	95925	97478	sbay_c545	28791	12394	13977	23	45,10,9,6,334,104,93,6,6,12,18,119,6,32,376,23,33,119,16,12,57,11,51,	95925,95975,95985,95994,96002,96348,96455,96548,96556,96562,96580,96599,96718,96736,96768,97144,97170,97209,97328,97344,97359,97416,97427,	12394,12439,12451,12462,12468,12802,12906,13006,13012,13020,13032,13050,13170,13176,13231,13613,13636,13669,13801,13834,13846,13909,13926,
+14247	3493	0	0	57	258	76	314	-	XII	1078172	855634	873632	sbay_c545	28791	0	18054	134	45,9,762,6,16,9,12,14,9,426,253,98,327,24,68,18,43,57,190,32,275,796,729,668,22,72,9,31,20,39,18,1089,21,96,33,8,47,19,16,46,26,16,16,12,49,18,9,39,18,1398,1698,206,26,16,449,306,107,16,45,22,14,25,36,18,21,12,41,9,31,8,18,457,82,25,30,10,23,6,7,12,41,9,14,1994,38,30,26,9,18,13,28,1842,25,14,16,13,69,41,271,28,38,10,27,12,51,225,33,14,12,163,33,37,66,6,25,10,19,13,23,19,78,28,10,94,35,53,20,15,15,23,20,14,15,96,	204540,204594,204604,205366,205372,205388,205398,205410,205424,205434,205860,206113,206211,206539,206563,206633,206652,206696,206754,206944,206976,207251,208050,208851,209519,209541,209626,209650,209681,209701,209740,209764,210854,210876,210984,211017,211025,211072,211091,211109,211155,211181,211199,211220,211233,211286,211304,211313,211352,211370,212771,214478,214684,214710,214726,215175,215481,215588,215606,215651,215673,215687,215713,215753,215771,215792,215813,215854,215864,215895,215903,215921,216384,216470,216495,216525,216535,216563,216570,216578,216590,216631,216643,216658,218653,218691,218721,218747,218761,218780,218796,218824,220666,220691,220709,220728,220741,220810,220853,221124,221152,221191,221201,221228,221240,221294,221522,221555,221570,221593,221756,221789,221827,221893,221899,221924,221935,221956,221969,221993,222013,222094,222122,222132,222226,222261,222314,222336,222354,222369,222392,222412,222427,222442,	0,45,54,817,824,841,850,866,881,890,1319,1590,1703,2030,2055,2123,2141,2184,2241,2449,2484,2765,3561,4290,4959,4982,5054,5063,5095,5116,5159,5177,6266,6287,6383,6417,6426,6482,6502,6518,6567,6596,6612,6628,6640,6689,6709,6719,6760,6782,8180,9878,10090,10119,10144,10599,10915,11023,11039,11085,11108,11123,11148,11184,11207,11231,11243,11286,11295,11339,11354,11374,11831,11913,11939,11971,11982,12005,12011,12018,12031,12073,12082,12096,14090,14129,14177,14204,14213,14231,14244,14283,16126,16152,16166,16182,16197,16268,16309,16587,16616,16654,16671,16702,16715,16766,16991,17025,17039,17051,17217,17251,17288,17362,17373,17400,17410,17429,17443,17466,17485,17563,17599,17625,17720,17756,17810,17830,17845,17866,17903,17924,17938,17958,
+3388	970	0	0	21	71	33	128	-	XII	1078172	850217	854646	sbay_c545	28791	24305	28791	55	91,8,19,78,38,351,19,8,93,52,38,22,26,17,13,25,261,55,19,21,55,25,13,34,7,19,15,29,668,9,10,11,6,68,20,53,7,13,22,11,16,28,14,49,20,29,26,11,8,746,24,12,938,56,32,	223526,223625,223633,223652,223741,223780,224131,224150,224158,224251,224312,224350,224372,224398,224416,224430,224456,224717,224772,224793,224815,224870,224895,224908,224942,224950,224969,224984,225013,225683,225694,225704,225716,225731,225799,225819,225872,225879,225892,225914,225926,225942,225971,225988,226041,226061,226090,226125,226136,226146,226892,226916,226928,227866,227923,	24305,24396,24406,24443,24521,24559,24912,24939,24949,25044,25096,25135,25161,25188,25205,25218,25243,25505,25562,25581,25602,25660,25686,25700,25737,25744,25764,25780,25812,26480,26489,26500,26511,26517,26587,26611,26665,26673,26689,26712,26723,26740,26768,26782,26831,26852,26884,26910,26926,26934,27707,27735,27764,28703,28759,
+66	23	0	0	0	0	0	0	+	XIII	924430	86939	87028	sbay_c545	28791	10030	10119	1	89,	86939,	10030,
+920	583	0	0	18	111	16	117	-	XIV	784328	252084	253698	sbay_c545	28791	12351	13971	35	29,14,43,30,135,8,105,15,71,52,15,17,11,48,9,53,6,20,146,15,157,8,158,5,9,17,20,13,14,125,31,50,11,17,26,	530630,530665,530691,530740,530770,530905,530915,531022,531037,531123,531175,531193,531211,531224,531272,531284,531339,531348,531368,531514,531529,531686,531704,531862,531867,531876,531893,531922,531945,531978,532103,532134,532187,532198,532218,	12351,12380,12394,12437,12470,12607,12615,12720,12737,12808,12863,12878,12895,12906,12957,12966,13019,13025,13050,13205,13223,13384,13392,13555,13577,13592,13616,13636,13649,13663,13801,13846,13896,13928,13945,
+162	102	0	0	1	1	2	10	-	XIV	784328	70957	71222	sbay_c545	28791	27120	27394	4	38,12,103,111,	713106,713145,713157,713260,	27120,27158,27171,27283,
+1536	428	0	0	5	29	5	19	+	XV	1091283	459446	461439	sbay_c545	28791	18756	20739	11	12,7,1816,9,13,18,34,21,12,13,9,	459446,459458,459465,461298,461307,461320,461340,461375,461396,461411,461430,	18756,18773,18782,20598,20611,20625,20643,20677,20705,20717,20730,
+1608	513	0	0	6	48	6	36	+	XV	1091283	698738	700907	sbay_c545	28791	21813	23970	13	25,13,1791,6,30,49,11,5,25,16,16,45,89,	698738,698763,698779,700570,700576,700607,700656,700667,700679,700713,700742,700773,700818,	21813,21842,21855,23660,23669,23699,23757,23770,23775,23800,23816,23832,23881,
+59	27	0	0	0	0	0	0	-	XV	1091283	110777	110863	sbay_c545	28791	10033	10119	1	86,	980420,	10033,
+147	87	0	0	1	1	1	10	-	XV	1091283	105814	106049	sbay_c545	28791	27147	27391	3	115,14,105,	985234,985350,985364,	27147,27262,27286,
+62	30	0	0	0	0	0	0	+	XVI	948061	156852	156944	sbay_c545	28791	27301	27393	1	92,	156852,	27301,
+65	26	0	0	1	1	0	0	-	XVI	948061	897071	897163	sbay_c545	28791	10043	10134	2	78,13,	50898,50977,	10043,10121,

Added: trunk/packages/bioperl/branches/upstream/current/t/data/seg.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/seg.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/seg.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+>LBL_0012(32-46) complexity=2.47 (12/2.20/2.50)
+gdggwtfegwggppe
+
+>LBL_0012(66-80) complexity=2.31 (12/2.20/2.50)
+kfssrasakavakks
+
+>LBL_0012(123-138) complexity=2.31 (12/2.20/2.50)
+svivsqsqgvvkgvgv
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/seqdatabase.ini
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/seqdatabase.ini	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/seqdatabase.ini	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,22 @@
+VERSION=1.00
+
+[testflat]
+protocol=flat
+location=t/tmp
+dbname=testflat
+
+[testbdb]
+protocol=flat
+location=t/tmp
+dbname=testbdb
+
+[embl]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=embl
+
+[swissprot]
+protocol=biofetch
+location=http://www.ebi.ac.uk/cgi-bin/dbfetch
+dbname=swall
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/seqdatabase.ini
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/seqfeaturedb/test.gff3
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/seqfeaturedb/test.gff3	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/seqfeaturedb/test.gff3	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1334 @@
+##gff-version 3
+##sequence-region Contig1 1 37450
+# #index-subfeatures 0
+Contig1	confirmed	transcript	1001	2000	42	+	.	ID=trans-1;Name=abc-1;Alias=xyz-2;Note=function+unknown
+Contig1	confirmed	exon	1001	1100	.	+	.	Parent=trans-1
+Contig1	confirmed	exon	1201	1300	.	+	.	Parent=trans-1
+Contig1	confirmed	exon	1401	1450	.	+	.	Parent=trans-1
+Contig1	confirmed	CDS	1051	1100	.	+	0	Parent=trans-1
+Contig1	confirmed	CDS	1201	1300	.	+	2	Parent=trans-1
+Contig1	confirmed	CDS	1401	1440	.	+	0	Parent=trans-1
+Contig1	est	match	1001	1100	96	.	.	Target=CEESC13F 1 100 +;Name=match1
+Contig1	est	match	1201	1300	99	.	.	Target=CEESC13F 101 200 +;Name=match2
+Contig1	est	match	1401	1450	99	.	.	Target=CEESC13F 201 250 +;Name=match3
+Contig1	tc1	transposable_element	5001	6000	.	+	.	ID=c128.1;Name=c128.1
+Contig1	tc1	transposable_element	8001	9000	.	-	.	ID=c128.2;Name=c128.2
+Contig1	confirmed	transcript	30001	31000	.	-	.	ID=trans-2;Name=trans-2;Alias=xyz-2;Note=Terribly+interesting
+Contig1	confirmed	exon	30001	30100	.	-	.	Parent=trans-2;Alias=abc-1;Note=function+unknown;index=1
+Contig1	confirmed	exon	30701	30800	.	-	.	Parent=trans-2
+Contig1	confirmed	exon	30801	31000	.	-	.	Parent=trans-2
+
+##sequence-region Contig2 1 37450
+Contig2	clone	assembly_component	1	2000	.	.	.	Target=AL12345.1 1 2000 +;Name=match4;Note=Terribly+interesting
+Contig2	clone	assembly_component	2001	5000	.	.	.	Target=AL11111.1 6000 3001 +;Name=match5
+Contig2	clone	assembly_component	5001	20000	.	.	.	Target=AC13221.2 1 15000 +;Name=match6
+Contig2	clone	assembly_component	2001	37450	.	.	.	Target=M7.3 1001 36450 +;Name=match7
+Contig2	predicted	transcript	2501	4500	.	+	.	ID=trans-3;Name=trans-3;Alias=trans-18
+Contig2	predicted	transcript	5001	8001	.	-	.	ID=trans-4;Name=trans-4
+
+# processed_transcript
+Contig3	clone	assembly_component	1	50000	.	.	.	ID=AL12345.2
+Contig3	confirmed	mRNA	32000	35000	.	+	.	ID=trans-8
+Contig3	confirmed	UTR	32000	32100	.	+	.	Parent=trans-8
+Contig3	confirmed	CDS	32101	33000	.	+	.	Parent=trans-8
+Contig3	confirmed	CDS	34000	34500	.	+	.	Parent=trans-8
+Contig3	confirmed	CDS	34600	34900	.	+	.	Parent=trans-8
+Contig3	confirmed	UTR	34901	35000	.	+	.	Parent=trans-8
+
+# associative attributes
+# these are not intended to have any implied parent/child relationship, but their attributes can be
+# used to group them arbitrarily.
+Contig4	clone	assembly_component	1	50000	.	.	.	ID=ABC123
+Contig4	confirmed	gene	32000	35000	.	+	.	ID=thing1;gene=gene-9
+Contig4	confirmed	mRNA	32000	35000	.	+	.	ID=thing2;mRNA=trans-9;gene=gene-9
+Contig4	confirmed	CDS	32000	35000	.	+	.	ID=thing3;mRNA=trans-9
+
+# three-tiered gene
+Contig1	confirmed	gene	2000	3000	.	.	.	ID=tier0;Name=gene3;expressed=yes;in_process=1
+Contig1	confirmed	mRNA	2000	3000	.	+	.	Parent=tier0;ID=tier0.1;expressed=yes;Name=gene3.a;index=1
+Contig1	confirmed	mRNA	2500	3000	.	+	.	Parent=tier0;ID=tier0.2;Name=gene3.b
+Contig1	confirmed	five_prime_UTR	2000	2100	.	+	.	Parent=tier0.1
+Contig1	confirmed	CDS	2101	2200	.	+	0	Parent=tier0.1
+Contig1	confirmed	CDS	2500	2800	.	+	0	Parent=tier0.1
+Contig1	confirmed	three_prime_UTR	2801	2900	.	+	.	Parent=tier0.1
+Contig1	confirmed	three_prime_UTR	2910	3000	.	+	.	Parent=tier0.1
+
+Contig1	confirmed	five_prime_UTR	2500	2510	.	+	.	Parent=tier0.2
+Contig1	confirmed	CDS	2511	2520	.	+	0	Parent=tier0.2
+Contig1	confirmed	CDS	2300	2400	.	+	0	Parent=tier0.2,tier0.1;Name=shared_exon
+Contig1	confirmed	CDS	2500	2800	.	+	0	Parent=tier0.2
+Contig1	confirmed	three_prime_UTR	2801	2900	.	+	.	Parent=tier0.2;ID=utr1
+Contig1	confirmed	three_prime_UTR	2910	3000	.	+	.	Parent=tier0.2;ID=utr1
+
+ctgA	est	EST_match	5410	5500	.	-	.	ID=Match2;Name=agt830.3;Target=agt830.3 505 595
+ctgA	est	EST_match	7000	7503	.	-	.	ID=Match2;Name=agt830.3;Target=agt830.3 1 504 
+ctgA	est	EST_match	1050	1500	.	+	.	ID=Match1;Name=agt830.5;Target=agt830.5 1 451
+ctgA	est	EST_match	3000	3202	.	+	.	ID=Match1;Name=agt830.5;Target=agt830.5 452 654
+
+# test out DNA loading
+>Contig1
+ttcttcgacactaagaaccatgccccatgtcacgacttcaccctcattgacgagcgttgt
+tcactcatctcgctgggccactagctacccatggggttaattggccagggatcggataca
+cgttaatcgatcggaaaactgcgcgtgtcatcaactcttcttacaatgctataagtaacg
+gaaactgaatagctaaacataagtgagcttcagagcagcgagagcacttaacaaaccttg
+ggttgggctaagtgctttagttccatcatctcaagagtgcatagaagtctgcacgcggcc
+gccggctggaagcatggtgaatactgactattagctgttattcctctggaacacaccggt
+acagaaggtacatcgacacttctttggtgctataagaccagttgcgtggccgataggggt
+gatgacgagtaaccctttcgcagctcaagtccagaacgagacctgtcaattcccgctctc
+ttgcattgattttcacgatcgttacaagctaaaaatagttgaaggagaatgagcaggatg
+tgctccccacggtctcttacccctgacactcttcgcgttctatatctccggaaagcccgg
+aaccttttgtgcttcgtgtgagcataccgaacttgtttatgccctgccgccggataagct
+tttcacgttagtaagaatgctcactgagacaggttgtcaatagccggcgttaactgccat
+tataggaatttttaggacgaccgaagattaaatggcggagatttgttggcggattaagtt
+ttccgcgcaccggggtaggagatcactcggctatcgtgctttaatgatgcggctatcgga
+gatgtttgcgcagccgaactccaacgggtattaaatctcggcagcgtcttaccgctttta
+tacagtacatgatttcgccgaaaggaaagtatatggtgttctcggttggttgcattatcg
+acttattcaactaacacaacgttcggggttattattcctcccgaagatgatgccaaatat
+acatttggctcataaccgaagtgtaatggatcttaggttggctagcggctgcgtacagac
+aatagatacagtattcccgactttggacgaccgaggccggtcctgagagttgaaagtcac
+gggacgtttggccctgtgtctttgttccaccgttaaatattgggggcgacgtatcggcct
+ggacgtatacttcatgacctttcgtccatggccatacggcggcggggattttaatctaga
+tccagtcgtaggcttggagctatcctctctttatcgcgcttgacaatagcatccccaaga
+tacgatctacccaagtgacgcgagctcaaaccctattttgtggtatacggctgatgtaga
+tggcaacaacggtcgagtatagagttcctacctacgcctacggagtctgtacgtctgtgt
+atatcgagcgcattcgaggatattgatcgttggcgagccggtttgcggaaaacagacagc
+aaatttgatgcagatggtattgccaactgtcccagcgcggcttaagggccgtggtatgac
+gaagggatctatgtgaattctcccgggccctaatagatgtggcataggaaatgcccgaag
+cccggaagccttcgtactgcacgtcgttgccatacggatcatgtagagagaggtccctat
+accctctaatgtggcctgaccttttaagcatttacgtacttgaatatttaggcgattcct
+ccttgataacttcgttgcaccggttcctctcccggatctgagagattaaagcgtgactca
+catttctaggtaaaccgtagttctggacgaacccgtcgtcggatgatggacgagctggaa
+aagacccatcatttccacgcagacaggaatacaagagtgtctggcaacaaggcctgccag
+attactttatctcatacatatgggatgagaacacgacctcatcgaaaaaagttagtgtat
+ggccgccgacagtcgtacaatatcaataatcctgctgtttgcaacttcgctggacatgta
+ctgaataatgcaagcttgaaaacgtataccctgcaagcccctgctatggtcgacccccgg
+tggagcatatcgcgatagctgcttttgaccatccacgggccaccgacactctgaaaatgg
+gtatcctctcggactgcatacggaccaacctctctagtagaattaacccagagtatttag
+ccgggatactctcgcgagttccaatctatatgccactcccgtaggcatgatttgatcttg
+cgctttgcaagcaactggcgttcagtgattacggaatcctcgagatcgccagggagatga
+cagggggcggttggcaccccggcgtatgagttactactggggccaggtcggcatccgttt
+attacgtggcgcctcgaatgggttcgtttacagacgccgcgagtatcaccacacgattag
+ttggctcggcacacgacgggccgcaactgaatattggtatgccctgacccacagcagatg
+gatcccctacgtgaaattactggaaacccatctctccagttaatgtggagcttactgtat
+tacgcgggcttaacctggtcgtggtacgggaggtccgcctaatacatcgatggtcctcac
+cacgaacgttatatgaagctctgtcagcacgacgttgcaaagacacataaacatttgctc
+ctcttcgtcatacatatccgtggtgccgcgaaagaatgggacgcatttgtgaatggacgc
+ttgggggacgcccgggttcacgagcctagccctccagtttattgctctataaccccggcg
+ttcgatcgatagacgtgctgggaagcgagcagtacctagctcggtcatttaacctccgag
+actctcgtccaccgcaagcgacctttccagacagtcgaggccagccgcaccaatgacatc
+caggcctcaggtacttgaaacgctcttgagagcctgaattcatgaaaagcccaagacggt
+tcactcacaccacgcaattattattgctgcctgtgtggtcccaatcacgtggggtcttac
+acggagagagggatatacagagccttctgcttttgtcggcggtgaccttcaggccggatc
+agcgttgaactctggtgtaaggatatatgaagtttactaccacaggagatttgcatttat
+cccatggcaggtcccggaaattggaggaaaagtggtagccttcatctactcaaaagatgc
+acctaaccggacactcggaccgatgtttgacgaagaaatttatattgcaacagggccatc
+ccgatgaaagagggcagtgacagagtagggaaaaaacgcaacaaaatttgattagtggga
+cactgaattagatgtgatgtacaggtccagtgaggctccgcatggggcttaggtgtcaac
+tagggtaagaccccccagctcaccgtgtcgaacaaacacgagtacccccgcctctcggca
+agaataacaccttattcgttctcatctatcagaccctccactgtaggtagctcggtactc
+ccgttaggtctactgcaatccggctgcccggcgaagcgggcttagcgaggggccgaaagt
+ggaatgacgaaatggtccggagctgcgcgggcagacgacggtcgtcgactcacctcgaat
+tcaagccgcctcgcccggattacaaccctcatacatgggccaattcagctcggtccgata
+cgacagcttagtatagcagcctggccggttttgtagccttctttctacctctaaaagaga
+caaagtccctgtgttattgtaaccgtgcatcgatctcgtcaccttcaatatgcaacgtga
+agactgaggccctagcctctttgagggtcacccgaatgagatatgggatgtcgaatacat
+gggccaagtttcaggacacgaggatacagggttaaccgtcgaatttcttcttgcaaggta
+gagatcccgaagtctctagaacgggttacgtacaattgggattagcgattaggaggagag
+gcacgttagacaaccgacaaagccgaacaccgccctgtcgtatccagagcgggtaagttt
+taattgcgcgtccctcttctcaaagtcccgatatgcccggctaaatatagttgcggatag
+gtttgccgtccggcggtaggagagggaggctccctgtgatgtcgacgcttatcgtcacca
+ctgacagcctcgacctcgatattgagtgccaaatcgtctggtcacccatcacgatccaga
+ccgtcgattaaaccccgtgtaccccgtgttatacctgacgtccgtacctccaataacctt
+aggtcaaataaatcttcttgagtctatctcattcggatacgctgcaaaggagaaggtccc
+atagttcggttcggtcattggatacaatgctgtacgctaacaggccttcactttagattc
+gcgtcgtgttgaagttctattatatagttccccagcaactttttggagatacgatggtct
+acatcttaaaataagcaaatttggcatgcgatggacattgtagaagcccgataccaggct
+aactacgacgccctgtgcgggattattcagtaacagactatatcgggggccgactctgtg
+tcgtacccccaccggagactctaggacgttgggcaccccaacccagtacgatacagactt
+ccttgatctaaacgtattgtcatttttcgttaacacatatcaagctgtttttcgataacc
+tacgggtactcgtttgacaggcaccaggtatgtctccccccggtcagactgaaggctaaa
+tagccgtacaatttcagctcgcgtaggtagttcgattcgcaggtgcatgatagcacagaa
+gaggtcaccgaactgcacgactaaaatgaccgctggttgccgtagtagcatgcgaagcag
+cttgccaacatcatcactgtcaccagccccctgtattcacacatagggctaagattaaac
+cctcatagcgattgttggatttgatcggacactttgccttgctagggggctatttactgt
+agcaaggcccgacatcgtggtccttgtcgagtctctgttccactaattcttcattgagat
+ctcattctcgaaatccaacgaatggggcggatatcgacaaaccattggcaagttgctata
+agccagcaacaatatccccgtgacatcatcgtgccgggttaaatacaagagggtgtttgg
+gtccgcccccggaatcgcttctatggcattatatggtcccccctcctttactgtgacatt
+gtgcagacccttcgttacgactatcaattccaccccggaaagttagttctccacatattt
+tacctgggtcggcgttctgccggctacgcaggtcttctgtacaagcgcgggaaggtacac
+tagtctatagacgccgagttccgctacggcccctaatccatgagatctttggtgttattt
+cgatcatctcctgggagggtatacttcaatgacccattctgggaaagataaccaaggcga
+acgctcgcaattaacgctactgtggaattgaacatttcctcagtgtacgagtgtctctac
+gcagcgtacgcctatggagtctttgcatgtgactgcgacccatatctacaaaccggcatt
+cccgggttctaccaactggttgtgccgcgcgaagggagatttccccgtacgccagtgtcc
+accggaagcgctccataacagctgtggaagcgaatagggtgccggggtctacacaattct
+cacgagccaagatccccaacaacgattacggccctccgaccaaaatacacatttgcactg
+attcctttgcggccctgaatcatctcagagccgatactcgctaaggcctggtcggggaat
+cccctgtacagtcataatccatccattggcagagtaacttttgcatctttgctacttgag
+atatcgtgtagctctgaaagacggccatgaatcttagctctgggaccgtatctaggggca
+ggctccgtctgtcatggttgggccgtaagcctgccttttacgttaatgccccggtaccac
+gatcagtacagcgtcacccagtaccccttgccattgtcacccctgcgttagacaaataac
+actagaacagcctgatcgctagctcatatctggtccctcgaaaaactgcgactcggtttc
+atcctagattgtgtgtattccctctatggacatgcttggacagtaccatggagcgccgca
+ccaagatcagaagtgccgtcagcagatacctatagtacgacatctagagtacattagacc
+atggatagacatccttgtacttgtgtgatctcgagtaatcgcagatgctctgggcggggt
+tttttcccgagcccttttagtgttaagtacactcaatctcaccggaatggatcaccccgc
+acttaaaggtccctacccaaatcccacgaggccaccgagggtcgaaagtcggttgacgtc
+cctgtagtccgcattgccaacattaccactactttacccgtctatccaggcttgtagtag
+tgcgcatacgaaaagacttataaaaatcatctaggaatttcatttccagggagctgtcgc
+caaaactgatgatccacagaagaccgatcttcaaggtcctcccagctatctgccaaaccc
+tgctaccatcacccccacgaaggtctggtctctctcaatgcagtatttcaattcactccg
+ctccaagtgtagcgggaagttaacttttcataaaatataagaaggtatcacagctcccgg
+ccagtgagtatccgctccgaggctgtgtcttactgaagatgtacgcaaaattaatggaga
+tcccggttagtctcacggaaaggtgtcggggagggtctcacgttcaccatgtgcatttgt
+aaaataatagctgctgtaacttattcacattcctaacaatagagcctagccgataggtat
+atcccgccgacggtgtcgcccgtccttgaaaaataaagcgagccattgagcgcgacgctg
+tgtagaactcgttcgcatacacgcatacgcccggcgtcgagctgcggtttcctctgcaca
+agaagttctgttatcaaaataaagctaaggattccaaacatgtgacaccagctaccccac
+caccccatataagtatagcatgctcccattggcacagtgtaccgggtgttgacggtcgcg
+ggctcggtcttcatggaagctcctgttggaagcgtgtactattaatgtccccctcattta
+agtactgcgaacccaaaattttgggttaggaaaaagacggcatcctcttatatgacgagt
+cacctttttttttacactcgagccttacggaagagaagtcgaacaaaagcacgtggtgac
+ggtccctcaaatactatcgacatggccaccttcctattattaatagccgcttcacgattg
+ctattagagtgcctatctcatacttcctatgatcgtacgtggatggagacgaagcgggtg
+tggtgagctttgtattccggacgagtgtgcggcttggtaggaaccggggcgtatgaaatc
+tctcatgggaggaggtagtagagttagacaccgtctaagttagttctcggactgtatcgc
+agtcagaaccggccgttcagagcgggaaccacagaaatgggcccccccgcagcttcacac
+gcatactcggacatgctggcttgcacgtcgtaagatgaggtgaatagggctatgcgaata
+tcctgaaatcgaggagctagaacggcaggaacctaatggatagtcagttatgttcagtcg
+gctcccttcaaaatgggtgctgcagatacttaggtcgaccgtcacggagaccccgtagac
+gcgaacgggtagtacaaacatagcactagccaccacggagaagagttggctcgttaggtc
+tcgtcactctgcattggttgacaagcttgggctaccaatgccctgtcctaacactcaggg
+cgcggaatatagtactctccgactggcattggattagcattactcgttctttttgatttc
+gtgagcgagctaggctaaactttaacgccatgattgacgtcgttaggggatcggctccca
+atagctaatgccgtgatgtctttcactaacgtgggacaagtccattcgctaaccgcgcaa
+cggttatctcaaaactaaaggacgtttagaagaatacgttgagctggcgatcggcgccta
+tccgttggtcacctaaggtcctagtgttaaatatcctcggacgggccactaccatagctg
+agagcccaagtgttacaggcttcgacacattactcatgccgaccccccggtaaccagggc
+ctggccgtgcccggcgaatccttatgagcagtttacggaaaagctatgtgaccttattcg
+cctcaccctatgacgatgcatgaactagacgacagtaacgtaaagtgtaaaaaaaaactg
+gcgctatacctttatcccataaacaccctttttcgccggctgcgcgcggcaagcgctgat
+tgttatactccgaatgtccgggctagacactgatgcacactccctaggaacagtcaggtc
+actcgctcctcagtcgtgggcaatcgatgtcacctccagcgagaatcgcccatatcggtc
+tggaattaattccgttataacgtgacatcgacctctccgctgggcgcataggcaactttg
+cctgctaatttttccccttcaacaacaggcccgacagagttacacctaccgtaacagtga
+gctgacttaaacgcttcaactgttaaacactgactagtgacggggctcggtacgcccagg
+tgatcatcgaaggatagcgtagatttcgagcataaatgacgtcgggccgatcaggcgttc
+taaaatggcggaattataagccagttctgccagcgccgcgtaaatggtcttcgtggactc
+actgcatatatcgaatttttagactcaggcgcatcactgttggcggacgattactgacag
+ggttttcggaatggtcccttctcattcccagcggtacttgagctgtgctgtgcgttttgg
+cccctcggacagctgcgtttacatcttcaagcctcatacccgcgagtacttgcgacgcct
+tcagctagttgtcgcaacattgggcagtaacccaagaggggtcgctaacataggtagaga
+tccgccctcgtccttaaatgcaagggcgttatcttggggcgctaatcaattggacgaagc
+tcccttaggttatcgaagcccgaactaggctgggttcgtgtgagtatgaaaactggttaa
+gctatagaaagcttcatttgcccaagaggacttcctttgagtagcaacatgaggtaaccc
+atagggtgctcccaatatttgtaatgactctggctgttcggttgtgccgtgtcctaccat
+atgcgcacctgtacctgcgactaagatacgcgtgcaatagaagggaccgtgggttccacg
+caatctcaacacgatagggttgttttgattttattgccgccccgtgagatctgtgtatcg
+ttaggttaggtagcctgaactgggtagcgattatctgcatgacggatcatacccgttggc
+gtcccccaattctctaaaaaccctgggtccacgagtagaccgggacccagggcttagcag
+cgactacagaagcctgcattatcctgttacaaccccgaagtgaagacctcggacgcagtc
+atccctagcagttcaagcggtccggaggtccggtaaggattgaatcttgtgcttcgtgaa
+tagactgtagttggttcaaacacaatctaggcatgggcccggaacccacagtgaacagga
+gatgtgtctccaaagaatccgttatctcgcacttgacggacctgatactcaataatttta
+gagaggacgcacttactgctatgcgtcatttgggcggccagcgaacccggattggatctt
+ctagccacaatgaggcggaaatcttcgcacgttctagtggccctctaaaaggaacgatgg
+ttaattcgacggcctgggctcggcgttggtctgattatagactattttccggatgtagaa
+ggaaccgaacttcctcgctggtcttaaaggctagcaacaaagagacaaaggttaagcgtg
+aacatgagctccacagcttcagttgagccaggacttttaccttccatggcacgcaggatc
+gatccagatatccgacgcactaagaaatcgtcccagggctgtacgtctgggtgctatgcc
+agcatcatccgtcgtctattgacgagaaactactcgtacacttcagccatggataaccaa
+cttcctaagggtcgtgcgtcatgcatacctatacagggcagcttaatggtgatgagtctt
+gtatataagggcgacttttttacagcgtgtttacctctggccgacaaatagcatccatta
+tgcttagaaagctctccctaattgaggcgttcctcgcttgcccgtcctagctatgacgag
+gcccctttacgtcccagaataagctccgcatctaaccggaatagcgctctcaggattatg
+actcctctcatctatgccggttcaagctaacatagtctctgtgcggcgaggggttgtcat
+gtgatttaaagagatagaagcctgcttgtatctcgtagctgaccggtcacgtacgcggtt
+gttgaggctcttgaccatctataaacgctgtttcctcttttatttggcccgcaagctcta
+ctcactggccgttgacggctacattcgttgaagatatacgacagtggtgataattgactt
+ctgacgaattcggtacccgcgatatcgcgcctcccctcttttgcatagctctcatatagt
+tactccgaaggggcttggccacatataaatagtaaccgaatctgtgatgtgatagctgcc
+gtagggctcacgagttgcctaacagtaatgcagggcgaaccgcggctaacgtcacagctt
+accggtgggcggctcccgccatgttgttaccgttaacgtagtgcactcacacggccgtta
+accgtcctaccaccccgaggcagagctcgcaatccctaccataaaccgatcttctcaacc
+tttctagaggacaaaacgcaccgtaactactagccgccgcttgttgatagatatccccga
+gtgccactgcaggcggttatgtatcgaattaattacaagtttgggacgagagcgacccga
+gaagatttgataatacttaaaggctcacctcgctcgcagtatttcgggtgaaaatccgct
+cactaacatcgaccgaggtttttggaggtctacaaacaagcggctagccccgtgaagaac
+ggggtcttaactattttagaaggtaatagatactctgcagcttacttccataagtgggta
+ctgagcaaggatttaaacgcaataaatgtgcttgccggtatggatgctatggaagctccg
+cttctccagagaacttggcgagattatctcaataacgacatagcttcggagtgagagtga
+tacccgaacatgctcagagtctggggatacccggacgaagacgtaacgccaatgtcgtcc
+ccccgaccggcgagtcaagacccccacagagggtcgccagtcagccggtgcgtctcgagt
+cagtaggagatattccagagcctcatctgaactcggcatgcaccctcatccggttgagtt
+ggtcatcaatatcccctgcttaactaagattaagatattaccgctgataatagcacgtga
+gccttaatgcggtaagagcagcacactcacgagagatcagttacgtgtctcatcagtaag
+aactatccgctgccttttctcctcagcggcatcaagcctacaccctctcgtagcgaaggc
+aaggtgaaggttatggatcacctatcagaatcgagaacgtccccctgaccgtgtagactt
+cgacagcctgggcgcgcctcccgttatcctgcccaaaggctatgtccaagggtcccgaca
+agcatatggacaaaaagaacagcagattgatacagattgcgtccaaattgtccttgcgtt
+ataagtaagtaatggcgccgtcatggaaaaaggggtcatacttgggtggtgaaaccccac
+agtctgagcggcgactgtggccccgtccgtagccctatcccaggattaggtaaccggaat
+aggctaatacacgggtgatgcactcttcggatcacgaaagacggagcttcagccagtagg
+aaacggaatagtgatgctggtggggccgacataattaggggttccgcagtatgggtcgac
+ttagaaaaccgagaaaatttttttcacgtcgtagggtttacatgcgcagagcggcaagaa
+gatgaggggaaaccttgcttataccggggtgccctttgccgtaagcaatgaacagacaac
+tacccgcaacacttagcatgtcaggcggagttcaacgtcattcgtaggttctagtatcag
+tgtgggacgttattgccccggcacgattctaagttaatacatgaactctggtgccatgga
+gggctacgctgaggtagctctacttgcaaccccgtggtgtgttccctactcgcgctgagg
+gcaatattagagattttgaccctgaaacagcgtaaaccccgggataccaaggaccgactg
+cgtcgggctttagatacttgagggcccataactccgggggtcggcttgtacgttatttcc
+agtaggccggcacgctgggtatcattagatctacaggccccaacaccactctgtcattag
+attagggtcggttggtcgtcacggcaattattcgatgacaattgttgataccagctgcgg
+aagaagccggtccgggaactcctccaaatgaaaggacagtcttttcatatcgcagctatg
+ttcagtggatccaatggatgcagatacgtttaggatcgtgtaaacatttgaattatgtga
+acagcgtaaatggggcggatatgccgggtatcgaatacactcgtggttctaacttggaag
+tccacccagaaaagcctccggcacgcgtcctgtaaccgactggatcgttaagacaggctg
+gagacaatatcaatgcgtgttctaaagacagcaggtcttacacatggcgattgccgtagg
+caagcttgtagctaagagtacgatgcttcgtagataatcacgctgaactgcagggtctct
+tggcatatgaaccctttcgcctgcaagctaatcgtcctctctccataatattgtaacaca
+gcggtgtgcgccacgtttacagatgtccgtgacgtacttagtagtttgacgtgctgaatc
+tagatatcattcagggattagaggaaccgagtttgggcgccatcacgctatatccattaa
+ggctgcgacattaaggtgccaaatctctaagccctaattggggcgtatggctagcttcta
+atgtaacaatagaaaaccaacatcgtaacggttgatacttataattcccctttgggttgt
+gaagagcgggaagggaaggagaattcagtactcaaacagcagtgtatggtagttgagcca
+cataaatgtagagcggatgcatgaccatggctcggtaatgaggcttgcctagtctagttg
+gtgaagtggtgaacactgggagcgttagaatataagtccgtttacccaaacgttctgatt
+ctcatcgcccccttttgccggcgactgcactgcacgtttagcaatgtgttatcgccatcc
+atatccccaattgtagtttaggaaccacggggatttcctgcgcccaaggcgttatgtggg
+atacgaaaaggtcccgccgtatagaaaacgtttcatctaggggacaacaactataacttt
+atcaacgcttttgactgacgcggggaaccaactacacagagcataaggaatgggaccagc
+tactagagacaggggctgaactgagggtgagtggttagctccggaaaacatgcttcggtc
+ccgagaactggaaccagcggcgaacgggcgctccttctaattaggatcagtggagaacgg
+cccgttgccgctttacactgtatgcaccgagctcacacgctgagccgttatttacaatta
+tgctacactagcgcgtggccccgagagcacggagataaggatacaaatcggtgatataat
+ccattaatgtgtgaacttgtcccctccgcccgtgcatacttaatcgatcgagctacaaat
+cacccgcagctggatacttaattcagccgagtctcgtagcagtacgtatgatggatactg
+acggtcccaggatttgcagataccagtgtatcaaagtatacattgatgtttgttcataat
+attgcgcctcaggctgagtcttatttaaacagcgagagtaccacacgcccctcctatggc
+tacatacgatgaacagtttgcctagggtcattccggctgaggtccgaatagttgtacttt
+aggatcaggaacgaatggacctatcattaggaagcgcactccctgcttgccctggtgccg
+ccctaagtgctgaatgtgcctcaaacacctccgtcgaatgcgacggcatctcgggtcggg
+cgacttcccgattaattattagcactaaaaaccatcgtggatgagttggggctctggaca
+gaagatcttattagacgtgctttcggtgcgctccgttcagtgtttaaggtacgtcaatac
+cggaatccctggtagtaacgcgtgactaagaaaatactggagctgccaatgatactatcg
+gggtattccgcgagagattcaggcctgccgatgggcgcgatggatgcactctactgagtt
+acttgtcgctgtgtgatggataatgttcataacagccacgtatatacaatgggggggcgt
+tccacataggcatgtcccaagatctacccgttgcgcgtcatcctgccggttcacgattaa
+aaagtctcgtttagctcatggctgctacaggttaaggccacgacaccgaggaggtccaag
+agtgcttcccactctatattgatctcttcctaacgccaaagctatgtcctacatgatatg
+agttatactgagacaacagaaaatcccatcgctaagaaagacgatgcgcctacgcttagc
+ttcgtccgtaagaggcagtccgatgtttgaggtggccgctgggccgtatgccgaccaaga
+tacgaccccgggtattatgctccattgaacttagctaagaagatccaacgggatgtgttt
+agagcgggattggagctcccccatgcagttggatccggagtaggtccctcactcgagcct
+gactgtatgccggcacagccgtggcactcatttcgcccgtggagggtgttcgtgaacttt
+aataccaatactctaagctgtccacgcacgggacaggtatgagtttggaggacaccaatt
+taacatgctcagagtcttgtaatgctgccagcggtctctaaggtgctaccaaacaagtaa
+gcggaattgagatgacgtggtttgccgaggctggaataatgaggtggttccctagccttc
+gattctacgtccattagggtaacagcacgattagaactgtggatcacgggccaggatctc
+ccattcaagtatccagactcatctccgctgacgctagcatgcgttggcggtaaggcagtg
+tgtggaaacaacggttccacccgtggggatcgcgaggctgtcatattccattgaagtgtc
+ggtatctaccaaacaagcgcaggtcggggaatcctgaaagcctacgtcagacttagcgac
+tttcatgggctgggtgtcgagaggtcacttcagtacgctatgtagtgatagcgacccgcg
+cgtagttcacgaactctagcagaccagtgcgcgtcatccgtttttattgcctcaacttag
+cgtcataaccctagttatcgcgaggacacccaatacgaaggactgcccctaagaggcgat
+aattttacgcaggtagacacgtcggcacacgctggaattccatactagataactcccaca
+cttttggcccgacagggagaccctgctgacggcgtgtcacggagctgctcatcgaagcga
+acgctatctgaatagtggaagtggcgttgtgaaaaatccatggtgagtgcggaggggaac
+tcgggtagcgcataaatcactgtttccctaccgtcacccgattcctccactcgtcggcgt
+gctaagccacgtgtgagcacccgctaccgttgtagggtcaacgggacttcttatgggcct
+cggtcggccaatttcatcccttacagagattagtgtgtttgcgtggagtccctcattgta
+gcgcactcttcggggcagggattatcgtggattatcctcctcaagagcgaggcgccgatc
+gattgccagggacctacagcagcaccttatcattcgaaatcggcacgaccctcttttcta
+cgagtgcctgggtgttaaagagattgataatcaccggatgcgctcagggagtcattactc
+gtggtgttggccccgtttcacgggcttccaaaagacaatttcatcggcatttagtatcta
+ggcccagttgtcagaacgggtccctgtgatctgtaccccggctccgagcctgaccacttt
+acacacgcgtaactgatgtacgtatccgcgtcgaccgttcgattctcactagtcagagcc
+cgtgctcaaggagagaggtcttctcgaacacttgagacctaggcttaagttccgagtact
+gacccgaatcaataagtttacgtgtctagcgcaatagcgtcatgggggcgtgcgtcatac
+tggtccggcgtaatctcgactagataaatccagcgcctgatcaggttacagtaaagcata
+gattcattaccatggcctagggtctcaaggccgatgattgaccgcgcactagtacttagg
+ttttgagtcttcgagtagatccatgacccgtggggcgtctatagggctgcgacttctcta
+gtatcgggatttaaactagccgtcttccaggtagcgagcgattggcattcgtaacagctg
+taaccgtttattccttgctaccataatgtgcccaaaaattcgccactagaaggttgatag
+aatcttaactgtggtatagtttcgggccgtgctggaacggtgattgtactttgcgtcgac
+aagaaaggtgttgcggtgaagagcaccccaccatttgacttgccttggcgacttttcctc
+tttgcagctgatatactctgcgctatgttattcgggcccgtacaaccgtgcacctctcat
+cacctgactattactacctccccactccctttatgtcagctttctctaaattttcacatg
+caattcacgccctccacgcgtaacaagaaagcgggactgtcagataatcctaccgactaa
+gaccacaagcgtgaagatggataaccctcgggtattcgtagaggcgcgatcgactttaaa
+tttgcgtacggccaagattttatcacataaacgggcaatgagttgccgcggcgtgaagac
+cccctatacgagaggatatcgccatctactatatcatgtatcagtgtgtaggaggccttt
+ctaacgtatacagttgttcgtccccaattgtccccatcgggtgatagaaatcatttcacc
+cttaatggattgatatgaccaattactatactgttagatacagttaactcgtccaagaaa
+agatgctggactggtcttgtaagcagcgcgccgctaaagatgcgaatcttactagtcagg
+agtctggacccttttttcttatgactagacagatgttgcgccgtttccgggctaacttat
+aagagaactaagctacagttacccctgcctgcctgtcgcgtccatccgcgtcgtaccaac
+ttcacatgttatagtagtagcactgttcggattttttgttctatctagattcgaattgtg
+tgagcttcaagaaatggtgctccaattgttctgcaatgagtcttctctgctgacaacccg
+acaggttatcagaccgaaggatgggctccgtgcacgttgcacttcatagccgcatatccg
+cagattatgaagaatgcgtattatggttactgttcctagccgaattataagcgctatcaa
+ttcccgtctaaagtacaacgtccgaacgacaacttacacaagtccgcctcgcattactgc
+cctcctggtcacactccatgaagatcaggcagttctaaacgacgaggttgttgctatcta
+ctcaacagccagaagtatacagacatcatatatactcagctacaacgctggatgataagc
+tatcagcctcagccgcacatgtgggatacgcgcctctctgaacctgagaaaggcttcccc
+catcaaggcgtgtatctgcccctgcctgacgcgataattaacgtgtgatcaattcgcccc
+gtgtctgcgctagtactcaaactggttgcccttgttctcatgttggcggcgtgtgcgaca
+ggtactcagcgggagtatgaatgtagttgacgcacggtatgtgcagaagtaggaatgcgg
+atgacccgttcgttcgatctatatttcggaccgcgtcacctgagcggtctttcagccgtt
+taagtggagagtaaaagatcaccacaaacaacgacgcacactctatgtgggcttttattg
+gtctgagctcgtctagccccctaacactttcgaaccttactcttgttgcatttgtaacaa
+gttatgccggtgaccagaacggttaggttagttgcgtcacagatatggactcgctgctgc
+aaagtaggggtaatacgaacacggtagggtcaataagcgcactaattgcggctgtatagg
+gagcgggcgcgagaaggacgactactggtcattcccaattgttctctccagcctaatggc
+ttcggctctacacgtacgctactattagcccaatagttaaccaacgacttaaccggcgat
+ggcagcgcgcgtgagctaggaacagactttcacaacgcacgatataggggcttaaacggc
+agtcctatggcattcgacggtagtgttccaaagctacgtgataccggttgcatcaataat
+ccccggttacgttcgctacttggtagctgcgcatgtctgccctgtgagctcctgcaattg
+actttttcagagtgcagagttataaacagcgggagagcaacatggcctcaatctcgtttc
+tcaaaccttggctgacttgtgttatgccggagctcccactggtatgtggaacacgagacg
+caaggataagctaacgtctcaatggtacggcgcaacaggtaggtatcgaaagcataagca
+tgttcaggcctcttgcggtgcgtacaaaaatctctagacgtacaacttagaaatgtcttc
+cggtatgacactattttgtgcttggtcccatgtaaatctgattttcggataagcccctat
+aatcaacctactgccggacgtctcgggttccgagacggtcccactatgattggtacaatt
+ggactgagagaacacaaaatacgtcgcaagaaccctgaagtggaagacactgattaacgc
+aacgaatataaataacgggcctgcattgggtccccttgtccatgggaaatgttcacgcac
+atgtgcgggcccggggcgcaattactggcatttagatgaggtgcacctaacctagacacg
+gtcctaggccactgcaccggtatcactcacttagcacttatcgtcatcatagtgcgagct
+tgttttagcagccttggcatcagacgggatcgtccgaagtcacgtgacggagatcccatt
+gcgcgattccgactgcaaggaaacaaagaaacgcgggacgattctctccaatgcatcctt
+tccctatcaaaagcagtttactacaagttgcggcaattttttcggtaggaggggtctatg
+cagatcttggcgccgactgtccttgaggggccgctggtagtctgggttgtgcttacgttc
+attgatgtacgctcacatcgtaaagtgaaatggatgaaatatttaagtcctgcgttggaa
+cattttcgaagtctttacaagacacaacgggaaaccctcgcaaagtttatgatagaataa
+ccttatcacatgcacctaatgagggcccaagctagttgtccaattctacaaaaccaggta
+cctcacctgttcaaggatttcacccagtagaacaaggcgcccggaacacaggggataata
+cgattcgtgactcgggatgttcgagcccattgcgagcgcagttcaatcctacccggacct
+tggtggtccggtcccaaatcccagtggaatagctcttgtcggaactgcgcaggactttct
+ttgccctggcacgctgttaatcccctcgtaacctgtggtatagcctgtctgaaggcctac
+acaacgaacgtagaacagaaggcgtggctacctccaggtctctctcggaccttcatgaaa
+taccgagtttccacaggggctatggtgcggacaagctccgctcggcctgattttgattcg
+cagctgacgtcaaacgatgcgaaccctcttcttgttatcggtagggcggttgggctttat
+agaccgtttagctatgcctttcgagccgcacatccctggacggcaacctcggttcgcacc
+atattccgttgatctagccagccatccatcatcggaatgttcatataacacgtacgaaaa
+aaatagttaggggcgactattaccggctggggatccttcagggcacgacctgaaccagaa
+cggaggttcgcgcttctgttacagcctcaacttgcaaggacttccgcctatgcacgcgag
+agatctgcgctatgcagacgaacttccacagaggtttaatactagatagtgtcgtattga
+ggcttcaaggcggaacctgagagtggttcccctgcatctatctcaccgactgggctcctg
+tcactcgactctcactgatcgtacactagagggtgggcctacctaaccttactggagctt
+ctactcttctgacacagtagctgtcaggatcaataactacgtgtagctccaagtcgctac
+gcttgcaacgagccattcaacaattcgtgcccacagattttagttaagtaacgttagtga
+atgctgtaagactccaattctagtacttgaccgacaatacaggggtcacgacaaatacat
+gtgcgtaagttcccatgccaggagtgtttgacctcccctcaaaaaggacgggcggtttac
+taatatggttctgggcgggtgctcattaccatcgttccgatgtttcgatcatgatgatcc
+atcctctctgatcacttaaaatcttagtgcaaaagacttgattactggtcgacgggaaac
+cgaatagcaaccgggggtcgcgcttccatggaagcgggttgacgtcttgctcgggatgct
+tcccgttatctacgctatacggagatccgctaggtgcactggcctattggagcactatac
+cgaggaccaacgcgtatgaagaagtgtagtcgtatctgaagtttcgacctggaaagcagg
+ggctcttacaggggtgccaacgacaaccgatgatttaaacaagttctccaatctcacgag
+caggcgaggccacctagaccttattacagggatcctgccgagccagtacggtgcctagtg
+cgaccactcattcttttatgcgcgcgactccctggggcagcgaagaactacttaagagca
+acgttgcaggcgagcttagatgatatccttatctccctcacttgcagactcactgactgg
+tagacctgtcgtgtttaatttgcttttgcggctcttccaatgcgcacagtcgtaggatga
+taatgatgccaacgacatgtgaccgttatcgggatgacttaactgtagaccaacgattta
+agtttgtaatgatgccgattagaccagcttcggatttgaggtctccgtgcgtcgttggaa
+ttacaggttagaatgttgagatccaaaatggattgtacgcagttcacgaggcgaccgttg
+taccgacaaggtcctgtcatgagtctggttgccgtcccttatgaaagggctgacaaaccg
+ggacacccagtcgaactgaaagcagcgccaaagaggcgagcttaaatttcgggatagctc
+gcggctccggaacaacgaggcgagcaacatgcaagaaatgcagacagtactcgggtccgg
+tgggaggcgaacacttctagcctatccgaacatatggggccaagaaaactagtgctttct
+tcgggcgaaaagtaggacataagttcgcctggttggatgtagtgtaggaatgatgcaagc
+attcccgtacgataactgaatcagtcacctggggatgaagaccggtgagccctgatccct
+tcataagatcggtcacatgcacttacacttggaacgtattgagaggcaatatgaggagcc
+gatcgccgtgattccgattaaggcttcagttaccctgtcagaggccccagctagtcttaa
+ctaacataaacgatggtctaccggtcgcaacacaagtctcctaacctggcatttacggtc
+tctcacatacccgacatagctcatcgtttgggatcgaattgcgatgcagacgttgtaggt
+tgcgcacacaggatgcagcgtgtccccggcgataggacaccgtttgagttggcccatgaa
+cccttctaatttgtgacttttttaatgtaatcttcgtttgtgtagttcattcatcagtct
+atatccgtccggaccccatcgcaaatataatagcgcccagatttatcttacaccgctgat
+ggcaataccaccaggctatgacgcagtctactgttagcttctcactctgacgtctaaatc
+attttagtattggtgacccgggtcagacttcgcggatgaaatcttaccggacaccaccaa
+ctatacaatcggcctttagataggagtaagagccagtcaccgcgtcagcttgccagatgg
+tgatgactgaggcctggtgcgttgtcgttcaccaaaggttattcctcaactgacggcgca
+acttccagcacaggcccgagttgctagcctcggccgatccctgaatgggcattcatcagt
+attcaagcgtgacacatgtgacgcagttttcagcgccatgccttttataaactaaaaaat
+gtcatgaaaaaacaagacacgctcgacaacgacaattagggcgcgattgtattagaagca
+cattgaaagctactccccgacgtccggcttgcaaggctcaatcgggttgtggtcgtctgc
+acatgcctaatgaaatccaggtcgtaatcaagtcgggcagatcggatacgcattgtactg
+gctgattaagcccatcatccgtttcgggcacacgtaagataagaccctggtggcgtaata
+acaggtatcactcgctggttacgtgtgcacgacatcgtaaactacgctgcctgcgatatc
+tagctaattgcaccgcataaaataatagcgaccgaaatgatgcggcccggaaccgatggc
+tctgataacggagcacggggtccaagaggtagaacctgcgaacagtcgccgttcaatggc
+ggcctagggcacatctggtgacaaattgcagcagcaaaggactgatcggcactctctaag
+ttggtatcgtgcatataagagcttcagccgatgtccgcattgcgtgttattcgagtcagc
+tgaccttcggtgccctccaaccagatacatgagggatttgaaccgttgtgcctgttagtg
+atggattttactccattttccagaacggtggacagttttccaggtactgcgtaacactgg
+acgaacatggaccaaacagcagctttcgaagtacggtcgcttggatgttaagagtctaac
+gatcacaagtagctaccatattcacaattttgtgtttcttaagccattcgtaaataaagg
+aatacgaagcgtccgaccagaggtctgatgtgtctcgtgtcattggtagagatgtttacc
+ttagaaccggtcaccaggatttctgacgttttcggttagcggctctgccccgtagggatg
+cttacgtgtcgaagttaggacttcttattatcacgtctactagttcatggacgatctgta
+atgttattccccgaggccgatgacgtgaattaacgaaggcgaatgctagccgtcaaccca
+gaacagcaggggcggtggcctactgtctgagtcgaatagtcacgtcctaggtacccagta
+caagacctacaccaggatatgttgggatgtattaccggcaaccctttaagtagggcaaca
+gggccacactgagtgcagaactatacgagtcccaacaaagaggtggttcagccaccagcc
+agtaagtttgcagttcaggcgggttaggccacacaagccgtagcccgatgcaactatggc
+cttgggtaaacccccgtccaaaattattgactacttgtactgacaggttgccgtgtgatg
+cttacggtacctcaggtctggtatgacctcattgagtccccagtaacgtagacattgttt
+tcttagataatccgctagagcggcggtcggcgaagaagtccacgtcactgataagtcaag
+gcgactctgacaagctctccgtgtcatgcacttaagcctagcaaatttaggatgaggaga
+aatattgcatcagggacggggaatccgaggataaagcactcataagcctgttgacacccg
+catgctgaatgctaagctagaggcgcaggctccaagccgtcctcgcaagtagatcttcgt
+gaggtagcgtatgtcagtagagtaccatcaggcaccctggcgctcatagcccagcgcctc
+tccgatgttggtctcccacgagagacccggtgtagcccctgtggaagagttaataagcat
+catcgacggatttggtgaatataattcctttagggaacatatctataatgtgaacaagcg
+ataacagccatgatattaattgaacaaattcgatgacttatgtcctcgtccaatgttttg
+gtcatggacagtacgccatatactcaactcatgggatgttgtctcccccctagcgggccc
+accgtatttaaagctaaccgttatttaaacctggcctgcatgtggtgtacgggagcagtc
+agtactatctcctagcgtaccacacaccacagaatgtttcgtttgaataccagttccagg
+gagtgggaatgttggtgaccagaggatacatcgaagttcaggcgcgttgagccagtggtt
+ggtgggccggtcgcgtaacgaggaagggccaaacggcccagcatctccgcagatataacg
+gtgcacgaagaagcgatctccatacaggtaggtcgcgtcgctaacgcaatcctcatagcg
+gtgccgatcaatgtgttcagttgatctggccactgcggtgcgcagtctaacctaacatga
+aaacccatgatccgaccagatgttatcggcaatgacggagcaaaatattgtggtgtctgc
+atgctattccgcatcggctttcctatccgcttaggaggtggaggacacgcgtaattcgac
+ttctcgacactacaaacgttcttatcagtggttgatccaatgcctcctgagtttccaaac
+caatcgatacctaccacggctagaacccagcttaagtcccggaccgcgcgctggagatgg
+cagggatgcttgcttcttcagtctcagggtagtcacgctcgttagagttacagtcaaact
+acaagtgccgaatcgcaagacatggccgtacatgttccaaatgtgcgcgaccgacgaaaa
+cgatgcatctggaacatccttcactttcggggattgttccgtgtgtggggacgaccctct
+ctgatagtagggaagcttacaccggatgaccagcggtacgggttttataaaattgaatgc
+cggaacacctggtgcatctgtgtctgtttacaaagtcaactgctaaagtccagtgcacct
+aagtgctagagccatctcagccaggtggagagataggaatggaactaatgagtgtccgac
+atataccgaaatagtgaatagcattatcggggtcacctacctcaccataatgttaaccag
+tacgtggaggtgagtagcattgatgttggttccacgactctagttaaaagtagggatgtt
+gcgtggtcaggaaactccagcacgcagcaattattcgatgataatggcgcggtcttgtgg
+aaccgcatgattcattaatcacgacctcaacacattcggttgaaagtaacgaaagtacca
+ggacggcaaattgtggatcgatgtcggcttgaaacagtctcttgtctgtcaatgattgca
+gctattgggctcctttttgagattactcatctcatacttgaatgtacggtcaattcccgc
+tcaggtataagggctaggaccaactacgggcttagagctaagtcaccggtgcagagcaag
+gacgtctctcccagatatataagggctttaccggtatcgaattaggcctttatccagtgg
+cctagttacggtcgatcgtttctcgtgaggttcctatacacgacgtggagggtcgcaatt
+gcgaggcacttctaggtctttccggacagaccatacggccccgccgcacgatgggatgaa
+ggggatagaggtcgtgacgctaagtatgacattaacggggtctatctgacgccagcatta
+acgcgttgtgaccggaggaagtcgaaaccggtgggagggcgttcttctagcggtccagag
+cccatattaaccgcaagcgtgatcggagtcgaccttacctctcagctgagacgaagtgta
+gtggcttgtctgagctccggttggtccctcgactatgctacacaggactacagtgtgctc
+ccgcattgacaaatgactcttggggcatggactaacgagtgatcaagtttcaccatttat
+tccctagcgtaagtcgcgtaaggatatcaggtcagcctaatttagaggatttcgtgacgc
+ctactggacgaagaggtgttactgcggaaggttccagaaaaggggcaatgatccaaaaag
+caagatagggacggacttatgatgcaactgttgagcggccggcagaccaaagcgcttatt
+gctgcgagaggagatgggcagactgtgtgcgaaataaaagtgtcctgtcgcggatgagtt
+ctagattgtcggacacctggtagaggcgcactagcaacaagaattcttggaatcggtagt
+tgcctacagtcctcgttgtaccacaaggccctcagaaagccagggtaagttccattacta
+tcaccttcgtttccttcctataaattttgcgtacgctcagtgacgtaatttcgtcgccgt
+atgtgtgttccaagaccttacgggttataggtgtcgcttccctagtcggaacttcgattg
+gagttcacgcccaattcaacagagggaagatgcgacccacaagctgcaaccagtcgaaat
+aagaaggagcattgtagcggacccttcacagtggggctcttagcgcactcgcgccaggga
+tatcgtgccccccgcactgtacccaaaagggtcagatcatttcaatgacagagcaatcga
+tctacgcaaagctcctcggtgtaatagacaggttagaggcaatttctggttagcgcggtg
+cgtagccctagcagataagacactagaactccgaggctatgggtaagacatcgccgccgt
+tggagttagtgagcagcgaaatcccccgactggtgctagggtaagaagacccgtttcgtg
+cacccgggagaagactgtagccgacattcggttgagcagatccatcatctaagtgttgaa
+taaacaagctttggtccggcagtcttcgcgcattccctaccttcaattcgcttccctcat
+atactaaatcaagagatcgataataaactgtattgccacctctgttttgctggtcaaagt
+cttgcgactaccacggcggaatctcgtctttggcatagaatgcccacttggccccgagta
+tgcaacgacttaagcagcgaaaaatacggataagcaccatgaggcgaacacgctcgcagg
+ggactcccagtgctcggaccgcgattgcgatccatgtacgaatgagtaggatctccaccg
+gtggatcgccgtcataccctcagggaggttccccccaagctctacgtccaacggaaaaat
+caggcgtgctcatcttcattcgtacagtgcccaagaccgctcgcacttgcgagtgctggg
+accatgacaggtcgcggcatgaatagtacgaagcgggaaccacggacgattcgtcacaac
+aggtcccgattcgtcttgaatactactgcaaagccagcgaatgacaccgactgctaacca
+cggaggaaataccatgcgaactgttaacatgcaatacattggtgctgggctcatccctgg
+cgcaaggccacatctggactgaccgtccagattaaaagtatgccgccggacgcgttcgaa
+ctggtcaaaaacctttcgataaggtgttcacgttactcttatacgaacaaatctaagcct
+agaggaactagacatagcagacctggttgaacttgcgcttaagcgtcgtcaaaaagcgct
+actagtttataacctgcaaccttctgccggctcgcatagcgaaacgcgagaacgcttggt
+tttagtcgacgcgctcaaatctatgcttttgaacttcgtggctttcgtgtgaaacatcgt
+atcgtagcatcatcacagatcacaattcataacttcatgccgcatcgcgatagccccccc
+tctttctagaccagacagatgtagacgatcaactgaatcggccgtacgccgtactggcat
+ggttatgctgcaattattttcttagggcagatatcgatctgacaggtaagactaagacca
+tctcggcatttccgagagcttataaagctccgtaatatgcgtgctacacctgcgatgaca
+agtgactcccgagaaaaaacaaagatcttgcacactggaagaggtgttttcactttcaat
+tgaggatatcactttgcgctcgctacgggacattagccacataacacacgtgaagcccaa
+tgtgctcaataagcggtggtttggacaatagggtccaaaattcctatcgctactcaaatt
+tttgccggtaaatggctctgcgtgcctagcagaatctctttttgcagacaagcggcgacg
+gcccgagaccggctggtcagtcctggtttgcactgatactctccataggacccttgcagg
+tatgggcgagaaatcctcgggatgttatccagcaacacgtgcgttcgcaaattctgtaga
+cttttggactaaaataagtgcactggctgttcacgttatcgagcgacgttcccgattcct
+tattgctctgcgcgaaccacggtccgattgtagaaagacggagtacggtaaaaacgccgt
+caagtataatgtcagtgactttctataaaaggttgggaagtacgttatgtaagttgcact
+cttagtccgcatcggttccatgtgccccggtactacagcgaaggtcgtccacactcagca
+aggagaaggcgagacgtacgtttagctcttaacgtaactggtccaacagcctccttcggt
+gaggcgttgagcgtagcagggtcactaatcatgtagggagagacagcctctggcagacaa
+tcgttatcgaaacacaccaatacaggcgacagccggcccaattacaaggatacagctgct
+ctgggacagcatcgtttcctgtgaaagctcgtcacgattttacactcatccttggccgtt
+tacaccacgatgcggcttggataagagaattaagaccaagtgatgccgacatcattatcc
+gctctatctaccacctattcgtctttcgcctacggcctagttctactagggtctttgtta
+ggtaatgtaatccgtcccgagtggcctctgcacgtcgcgctttgcaaaaaatagcgccca
+tgatcgaggaattctgtattatacgagatacctggcgtcaaaatacagggaatggggtgg
+cgcgcaacttgggactactcctcgcccaccagtgacgtctgaggatgatcagccgcgcag
+gtgcaattaccccgccgtctaagctaagtctaaaatcccgagacgtttcgctttgattag
+gggattgctgaagccaggcacccggggtctcagctgcacgcctgacactggacttgccct
+cggcgcagcagccttcctcctctgtgaggtcaaaaagtccttattaccatagtcttttcc
+gtgtgtcacacttctagatacgcgcagtgaacctagcggtgtctgagatagttcatactg
+gtatatcccgtttatttgtcaactgttacccctgagccgaatggacacgtgtgttacggc
+agtgtgacagacctccgcccattttggatgatggtatagcgctacatgcacggcgaaggc
+ctgccaaatacgctgtagcggaattaccattgatggcattcgatggactaaggcacctac
+cgccacgaacaaggtgtcagccttccattgaggcattgtgaatcaaagttctgcctaacg
+ctgtcctctatcagctggcgcagtgtttgtaaaccatcgtatagtccgtcataaccttct
+tccttatggtttcgcaatctcgcgccaactacatgggtctgatcataggcgctccaatgt
+acaacttagccggccaggtgaagattgaaatcaacactacactttccagggtcgaaggag
+tgaacacccaacgggcgtttccagagtgcgacgactgcacagttgcccaccgctgaggct
+ctgagattaatgcgctacatgtattgtatgcagcctttccttatggaaacgagtttcacg
+gcataacggtttcggttgtgcagggacctctcgggtttacagagcctaattggattcatg
+tgtgggatgcgtcaacgggcaacttccaatgtcgtcctaggcgccgatgaccaccattct
+agctcatagtacgtaggaaataggttgtgctgattgtacgctactagtataactcccgta
+tcctcgctgtgaggagatcggtgagattttcacttgagagagagaattacctcacgagca
+aggttaaaattactcaaaagcgattttcaggttaataggatgcttgaggctgcctacagt
+tcagatgaacgggcattgcgtcgcagaggttcggataacagtgaaatattcggtctaatg
+atattcgggtaaggagactataattacccggtgcagcataatattgtaaccgcggtcgtg
+cgttaggctgatctacgacggtaaagaaagccggtatgctcgaagactgctggtcccagg
+acctgtgaatcaaaactgaagccctgtgctcccacgggtattagagcaactgactagttc
+cggaccagtaatccgggggccctggagtgggcggaaacacgtcagattaacaccttcgag
+tgctattggctgattggccttgctacgaacctcccccagccagcagaaacacatctggcg
+acggtgagttcgcctcgcaagaagaaaccgttaatctaacgtgagtcacggccccagacg
+gtcataggcaggggtaggcgtagaggtactcgtcatgtacaaaccgtcccgtaaaattaa
+agattaccatgcaaggctctgaaggtttcgacgccgctttcaaaagcgagacatgaagac
+actcccttccgcacaaagaaagggaagtcttaggaagtcttgtaagtgccacttcacctc
+tgacatgagcaatttgatccttggactttcttaattcaagccgcataccggtcaaacaca
+tttactatttgctttcacgttcccgatagagtacctatggtggctctataaaatgacatt
+ttagggagataggatcgcttctagtgaggcgcggagcatggggtcatcgctttcgctgcg
+agatcaatcgttgggactcggctcctaattcatacctatgaataggtttgtggcatgctg
+tatttccagacaacaaattcgttgagatgtccgtagccgccttactttacccagatgggt
+tgttatttcagtcggcaagttctacctcgtgggctgtacctcagattgcaaatactccga
+agtggatcggattccgccgtgcgttacaggaacaatgggggttgctgccttgggcgttca
+aagttaccaacggtcaatgtccgggagcagttacgagcgggcgtccgtgtcaaaggtttc
+aataaggccatcacggctagttcattccgtctcgtcaactgggtgggttatgactgtgtg
+attacacaaaaagtcatttttcttttgatcctgatggccatgtagttctcctagaagagc
+acagcggatagtgatcgagccgcgatgtgcgagcaacccagcccgttgttttcaagttcg
+gctttgcctagatagaatcaggggctgtatcattgagttcgattctcccggtcagccagc
+ctgtcgccaaagaaagcttcaggccgcgttggtcagcgcggcgagtagcgaggagttcgg
+ctcgagtctgatctacttccctgtatttccacggtgtccacccccgtgtaccgctgggtt
+aagtaagccagctcgaaggttaaccagtttattagcgcgtagtcaacatgggtatctacg
+acggcccaccctgaatacacgcatcaaacacttggttcaggatgactctaccctgatacg
+tcagagggacttatttactattgtctcgcagggacttaaatcatcaggcggaactgtgta
+cctgtgatcggatagtgagagttgtgatgacggatacaagctagctcgctgcctactgga
+acatgtagtgtgtaagtgtcaggctactctgtactatagagtaatgcgggtcctaagagt
+tcccgctctaggttcaccaatggtcatataaactgccggtgtagggatccttggcctact
+atcgacgggcctcctgcgggtgcggtccaacgagtatccggttcgtcaaggagttgagac
+tgttgatctctcttgctcatatggagaagtgatagattatgctatcttcgttttctgcaa
+gaacagtagaacgaacgtgtacaataacctgggccactcgtgtcgtccaaagcctcaaga
+aatactcctgcacgagatgagagcatttctatatcgtcgaagttcatgccgaaaacgatt
+atacgttgcacgttacttagaggatcgcagcaagagtgtattagaacatgaaggagaata
+gaaaggaggtgtccgatggatacaggtgcgactcttggcgattaagataaactagggagg
+gcgaccttctcgtgtgttccggtaacgagcactcgtcctttgacagagtctgcattacat
+aaggccggtaccaggagaataactcaggcattacgaacaaatctttcgaacggagtcgat
+aggcatgccgctgatacccaggcggtggggtatactagtcaaccgccgtgaagccaagcg
+ggcagtctactagccaaacgtcgagcttctcttgcacttactaggactaaaccctcgggg
+ttttagatggtttcttccgccgggcgccatttgactccgacctcaccgcgtgtatggtcc
+ggtggaatccaacacggagaagcaaacgggactctgcacagtttgagcgctgtccgagaa
+gcgtgctgccgacttccaggattttgtagattcaccctatgtgggtgctaacggcgtacc
+ccgcgcctaccgcagacgaccgggttggaggaggaattgcgtgcactaatctcctgccag
+ctgcaccgggtcggtgccacgttctccttggacggaggacacttccactagccggtcctg
+tgagtgatctgctaccacgcaacagcaagtagtgaactcggcgattgcagctcgcgccac
+ctaactcgttaaaggcgccgtaggcgcatgccaggtcgcaaaaccggtcatattccccaa
+gatgacgcaagttatctttgctcgcagtcgatcctacgacgtataccaaagaaggtaccc
+aatcattagtctcaacactaaagatagtccctcaagtagtggagcaaggttcgcacttgc
+acatacaatcgtatgcagagtttgagatgctgttcgtactcaaggctattaacgctatat
+gttagaagagctgtaatccgacatttatctgacgctgcttctaccccgtgcaatcgttgg
+tgaagggtattcgtgctgcctcttttccttcgagagtactgcgcgctccagttatgaact
+acccctgttaggaggccaatttaggggcatactgcaacgtttgcgactcatttttcgcgc
+gtaactccgtggagaatatacaagaattgcccacttcagtttatacgccgatatggtgga
+aagccggagagttgatgtagtaacgaggctccagcgaaaatgagtgcgactgcgatagag
+tggagatctatccaacggcatggcgactcagctaggggtggggtaggatcctgttggtaa
+acctagaccggaggctgcctccggggataatcatctggcagtgaatccggagaattaatg
+acctgaccgatatttcaagaaagtgcaggggctgagatgcatcactgattccattgtggt
+ctcgatgttaagattagaataatggattagaggcacttgttattcgtaagtgttaattac
+ataacctctcttggatagtcctttgtcatctcgggttgtgcaatagcggggcttcagggg
+tcacaaatagacttacccacccaacaatctacagtcttgaatgacggggagcacacaacg
+accacatctgcaccgttcattattggagcatatggccataaaagccgtacctacctctct
+ccggagcaatggcgaaagcggtaatgttacgtaactacaaaccgctaaagacgaagaaat
+ggcaaccactctgtcccggaccgggcagctaacggctgaaatcttgtgaactatattaac
+tgctgtacagcgctgtagtacggtttctggtttcggctagtagtaccttgcagaagcacc
+gattaccaaccaccgtacgattcgccaacgagttagcctcatgcgttcaaggcgttcccc
+gcaccgtctctcgctcgattggacaccatgctattgacggcatgattacgagggcagcta
+gtaatgacaactactggcccgcgttgccaattcttttgcatcacgtggctagtacgtcag
+cagcgtccctttctgcagaacgcagggtctctgtaaccgtcgctgcgcacaacgtttagc
+atcaggaattctcgtttctactctgttaaagggtaacgtggcaaacatagccatcgcttt
+tggcggactcgctgtggtccaggtcgacgaattgggggtccagctcttcgtgagctgcag
+ctggcgtgaagcaagaactgagtggcctgaatgtgagcagagtattgggactggtgatcc
+acgcaacggtttgagagcgtatgggctgcaccggctagtttatgggagtaaacttgaaca
+gtagtgaacttgcaggggccgattttatccgggaggactgaaagcgagaatagcacgtca
+cgcacgcggtgctactgttgctgatcggtatgagtcctacgatattcgccgtctatggcc
+accacacaaaaaaacggggagggcagctgattatcagtcggtgtaatgtattcggcatct
+gcccgccctcatccgcctctgtcccgtgtcaactgtactccacgtcagtgacattttcgc
+atcacgtcggtgtacacaaatctgaataccgcaatcacgggccaagcagttatcctgcac
+tgatcactccaactggaggcatcgggttacacgtctccagacctggtctcggagccgatc
+ggcgattgggtaaaaaaactgatttttgtgcgcggtggcaggtggggcacagaccatgtc
+agtccgagtgtatctttggtgaattccggactggagtacattcgcctaattccgtctggt
+tccgtatcgctggggactccatgacatgtctgttggctccgagggtcgtagtaagaggga
+ctcatgctatactcgtgttgcacgattcctttagtatatagtaccatcggaggtctatca
+tggcacacacaacttgagtcgcaccggtaacgcatacattttaaattcggggaggaagtt
+tcgagcgtacatgatcgcgatagagggaccaagaatagggggtggagtggacttgacgag
+ctcgacctgccctgtcgctgccagaatgccatccatctagacacaaatcttacggacccc
+ccctggctcgtagcaaccgacggcgatgactcgacggcgcacgctacgcatgggggccgg
+gccacggaacatatagttaatgctccctaaacggcctcgagcaggaccaaaaggccgccg
+ttaatccgtagcgactgctagacgcactagactctgcctgaatttatagcggggtgtgtc
+ttattacatcaaggtcctgaggccgtaaacggatgctcggtaaggcagagatttgacttt
+gacaagctgaatccattgcacccagatttaaggggatccctgtatcccgaaacttccctg
+actactcaagcagtaaccaagcatgcgatacaaccgatctgtatgctggtttcggggcgt
+agtcgaggagcctagcgacttggagctatcgaaccaggcccgaaaccgtgttaggactcg
+gatatagattcggaggaacttggcatagatcgagtaaattgggtaacactttactggacc
+ttgatttgcgttgcctatcgcaaccggactactagccgacagcgtaagacccagcaggca
+ttctaaggcaaccgacctgatagatctaaagttcttctcgaaaaaaccgcgtcgacaggt
+ttttaacggaccgcccctcagatcaaacagtctgagatagcagcgcttagaaggcagagg
+tggtggtccaccaaccgattcagggccaccagcattgctggagtaaaggccaaataagca
+aaatgaggaacgatttccggaatactcgatacgtcccagctctaagggtgctcgcggcgg
+agtcgcgaagtgaagtccgtgcgatgcataggctataacgggacgatctgctgacgtatt
+gttgttcgccctgtattcacacattttatgttggccgtagggcggcggcctggtgcaccc
+aaccgtgcaacacggcacgcgttctggggcaggcggataccctcattggtgtggagggtt
+agctgcgtgtaggtttgctctatccataaaaaggaaaactacagttccagagcttgccgg
+tgctttcacactactatagcggcccttcaacggcttagccgcatgaatattcttgagcct
+cgtattgtgc
+>Contig2
+gattaatgtctggacccacggtccgtgctatcacagtgtccaccgtaggccacgacaatg
+gcacagaagatacccaattccaaaacgctgtcgggtggtctgattagggcgtccgatctt
+gtaaataggaccaagcttaacctggaccactaattcctggaagctctgccagagtatgtt
+tttccgggcgttcgatcgttgtctgttccgccatcaaaagcacagttttgagacgccata
+ctctccttctccgataacataagaggagctacgtgaactgttctcggacccgcgagtgga
+gtacaactatattctcgtcccataaatgaagtggcttatggggcacgaacatcgtgtcat
+actctagagtccgattccatttcgacctttccctccaggcgccctgggttatccgacaca
+aatacgggtaccagacgacgcattaccaagaatgatgtaccacactacgattacattgcc
+cgatattccgtgagacaaagccacattaaattttacaagggtggtatttcgcttctgatc
+ctaactaggaagatgcaagacatcgactcgagttgcacgggagagtgggagcattgttca
+gccgaaagccaatgagcgacaacggcatgggaagacagcggtaaaagcacaggctctcgc
+gttaccttcagtcttatatgcgttcatacttttggaaatcggctgcccgtacattaaagc
+ggcgccgcgggccgaagctcagcgcagcatggtatctcaaggcatatccgctgaaatgaa
+attgataacgcattctgatatccaacgcaggaaagatgaacgaacttgtaagttgaactg
+gagtttttagacagccgccatgtcgataccagaaagacctcacctaggtagtaagcgact
+tacatacgaccgaatatctatggtgaacgtaaggagctatttatcagaccctaaaaggct
+tttattgtgctcaatatcgcatatgggtgcataggcttagctggagttggtacctgcggg
+atggttcctggtccacgttatttgagtaactggtgtgccatctccgactcaattgaaaaa
+cttacctctacgcattccaacgcgtcatggatgaggtatcacctcacaatacggctttat
+ctggatctgaacgggtgaggaacttcgaaacactaatattaggcatttggggagtaggcc
+tatccacgacgactggagatggtccaaacgtcttacgtagacgggttctatcagtgttgc
+atggctattactattagtcttaacttggtacctgtaaaactcatttgccccagaaaaatt
+cttgccctgaaaactgccctaaaccatcgacctcgactactagagacgcacgcgcggata
+ttagtgccggactaagcgccataatatgtttagcatgtcaacaggtgcgtgctgacgcct
+gtgcctcattactgaggtaagcgggtgcttgatgtactactaattggtgtacaattgtgt
+tttgtgacatgcttgtctcccgctccgatagttggaatgtggatcatgtggagctgtact
+gacacggtacgatggctgtgtggtagtcccgggtcctgacacctaataactccagaaaca
+gcgacagggggggtatgcaattacggaatcagcgagtagcaacataagcggaggacgtac
+accgttagttgtcagttgtattggattgcagatgcaggaatcggtgcctagaaaagttat
+ggatggcaagggttccgatccggtgcctacaaaattacacgtatcaatctcgtcctagtt
+ggtaacatacacgacgcctagatcctgatacatgcggcaccatgtggcaccaggtggcaa
+cataaggttctaccactagacacttaagcaagggaattaagccggtgttagcaactgcct
+acccgcgaccttgaacggaccttcgaagaccttagaaaagtagctgtaggtgcaggccta
+ccgtccagtacgaaaagtggtccatgcgctcgggctagttattgaaagttgactggtaca
+cgctgcaagctacattatatgttgcgccgaagtacttggtacgggtgtgtctccttccgg
+gtctatggtcacatcttcaccttactgcccctttacgcgagaatcatgtgtgacattttg
+gacgccgaggactcgccagaaccttgcggagactggtcaacggcccgttatgtaggcagc
+cgagttccgtttgatgatcgaacattgcatatattagtcctagtaatgcaatgatagcag
+gccccgctcaagtccactagaaacgagcatcaccggctcagtcaatctttcagctaagga
+gtccatgaatgtgggactccataaacttttgcgggccccaagttggcggctggctgagga
+ttctgtcatagactgctccagcagcccgcacggtcgcgaagtgcgtggggtcccgtcggc
+gccctgtacgggcagtgcaccatccgatgccccttaccgacgtgatagtccgtgatgctt
+cacggcacatgtgagctaatgcgtgatagctttctggggctatgtttcagtggtccaatg
+acatagcacgccactatccgagtgaatgggaaggctcgatatcgaagattcaaaatgcgg
+gagtcttgggttggattctgcctgggggtcttaaagatcaagaacggcccaaagcacact
+cgggcgggcccctagcagacgcgccttgtcctcagccgtttagtttaaaatgttttaagc
+tgtcgggccagctatgaaagaactccggagttgtgatggacgctcatgcgtccggacggg
+atcgtactgattggggtaggcccaaccgacactcttgcagacgccgcgtccacctgcagg
+aacgcccccttttcgaatagtatgcctacccgtacatagggaacttgtccacaacgcgga
+ggttgatctcgatgaagatagaggtttggcctgaccttaaagctggaggctctgaacagc
+tgggtcatacagcaggttcacggcgccgggagctagctgaagagtaaggacgacgagaga
+cataagcttcgccctttattaaaacacaatactcttttattggcacctaccgcaattacg
+cttccttgttttcaccccgggcagtgtctttgatgggaccattttgtaaggggactgtca
+ataaaccaacgcgtagcttccgtatcatctggtcgctatgcttgctacggctcgtacgct
+tcgcaggtagagctcccggggggtccagaacgcgctagcaaattcaggactaatctgaac
+tctgttttttgaggtacgcttgctccgatgcactctagcatcgtcactagattctcggca
+gtgcgtggtgccagtgattggatctaggtgccggtcgagatcgcgctcaccgaaatgggt
+ggcgacacccgatggcgggcagttgcgtctcattctagtacagtttcgaactgtgtcctt
+ccggtcacaacgaatgattgtctgcgcggcctgggaaaccttaccgagttccccagatct
+ctaatggttgagcttatcactgtcatagacgagcggggggccacaacctaggattattgc
+ggaagcacatttgccggctaagggtcagaaataaaagagggcgtcccttagcgcctacgt
+taccaaaccctgggctgttgatcaacgcttgggataacttaagacgaattcagcgtctgc
+acgatacggacgcaactactcaacggtatagtaaaattagaagaccggtagcgttcagtg
+gctggcaagataatccaccctcgagccgctaacttaacctagcgtcctcatcttctcctg
+ctggtcacggagtgggttgttactacggacgctatctctagaacactattttaactcatt
+cctggcgttcagcccttcgctaaagtaccagataagaggggccgcatatacatgaaagat
+cgttgtcagccatgggtggactgtatattccatagatacactcgcaccgagatgaggcat
+ttttcttttcaagatcgcacacctattgcggtgaaacggcgcatctgcgaacaatacgtc
+tggggagtcagaaacgtcctgtggtgtgcatctcaagctggtggtgttcggtaggcggtc
+ccgttctcacactaaacttcggtccgtcactataagcaccacataacctatcgttggcat
+acgggacgacgctttcgggctggttttctccgttgagagcatgattgaagcttctgggca
+tctgggcctctgttgcctgactctctaccatccgaaccttagggctcggtcgcgggtgcg
+attctggactagtcagatgaaagacgcctagcgattggcctttgacccaaaagctgcatc
+acctaggcgattggacttgacgattttgagtaccgactgctccctaatacggcgcaactc
+tatgtgcgccccgcatgacacgagatcgtctaggaaaatagaagcagccgacagattgac
+cgccttactccaattgataacagggaagcggcccagttcgtggttagagccttgtttaga
+attacggcaccgccaagactgtggtggtgcgcgcgtgcttacaattgtccctatggaata
+tgccgaagatttgcacctgccaggttaggactctcaaaaatgatagcgtaaacgtaggtg
+aaccgcattctcgttaccatgaccaaacgtgcatacgccatgagaaattacttctatctc
+aacaacaacgtacaatccggcacatacgttaggatggtaagctattctcgtctagtagca
+gattacggcgagcctgggctacttcatctgtctagtagtcagaaggtattcctagattgt
+gcggtagtacaccgtatcaagcgaaagtgatacttcgagtgagattagaaatgaagcgga
+agcatgggataatctacccccccggtaggtctcggcctcctaccttaagacttgggccgt
+aattgggaggagggtgtcgaaaaacggtaccgtatagaaataaacctaacccaaagattc
+aattttctgatgaagcacaccaccgggagggcacttgataaaattttgtcatttccatcg
+cctttaatgaagtcctgcataacctaatcactaggaacacacgaaaaacccgttgtatca
+cggaggtagcgcgatccctctttcactctcttccatatgcgccccttggagggtagggga
+tatgcttcaatcagactctgatggacatatgaacgatgtgggttagaagtgtggggcaca
+ggcacgcatccgatttcagtgctcgaatccgaaactaatatttacccttccccgcacttc
+aatgtagagacaggagaatgttaatctgactgagcagataaacaactcgtccatgccgat
+agattacgcaatatcacgagcgttaccatttagtttggcgattaagatggcgcaccagat
+ttatgcgcttcctcaagtctagatcggcctttgctttaaaacttaggaattgcgccgtga
+gccagacagcggatttatgatcgcacgtcttttgggaggcgtgcgataaaattatagcat
+gtgtgcccgaaatggtcgctgcacgtcaaatacctcgcgtacgaacgtgaacctgagttc
+cggagagacccgcctactccactatggggaaagagatccgtataaagcgggaagcccttg
+ggtattgagcggatttagacgatccgacgagtcgcccttacatcgtgctgacactgaagt
+cttaccgtatttacgcgagctggctcggactttgccgcacagaggttgccatattttacg
+aggatataacttttgatcagccgtcgattacatcgtcgagatcacgacactctatccgct
+cactgcgtcagtctaactacgaaacacgaccccccaccaacgcgcggggtgtcgttccat
+aataaacactcgcattttacaatgacgatatgatcatgtacaagaaagttgacgtgaaaa
+atctagtgctccgtgtttcccagggcttatatcggctgacagtcacaactacgcgaattt
+ttatactctcagtgttctcagcaaaatacgtctccgcaccgaaccccttcccattgaacc
+aactggtgttagttagagttttataacccgtcgcgactgtgagctaatccgacgcaactt
+ggaaagatcccctcgttggcagagcgccgacctacacacctgctagatcgccttgagcac
+ctttttaacacctcagcgctgcgcgctgtataattcgatgagacctaaaatggaccctaa
+cctcaggacttagcttccctaaagtaagcctatgcatatggaaaccgtcgttgaggctcg
+cgactggtgtatccccggctggccataacgaaatcatggtgtcgggtcgcgaatagccgc
+catcattgtcacaacctgcttttattattggcatagtgcaaccgactcttgagcaagctg
+caatgaggcagtgcggaccggaggccaagagacaacaatcatgtcagaagatcaggcccg
+tcaggcttgtgttctagggtatcagaccgggtggaatcttcccttgtgctgcgagggaac
+aggtcgagaagaacccgtatcgacacgccgtgctcacgatctgttgccactggaccaaac
+gcactagtttggtttcgcaggttgtaccggcccggggacttattaacgaactggcctcgt
+ggatgccgtcgggtaagtagcaatatacgggacagctttatcgagcatcaaactagtcga
+tccacggaacacactgccctccttaattttacgcaggagaagacttgcaaatctggcccg
+ggtacttttgatgacgcgtcagctagtagcacgtgctagtagtattgctcgagagaactg
+tgtcatactaaatagtgaaactttcgcgggaacgagacgtcttccgccgggctttgcact
+tcgcgacgtagaacaggtaagttaaatgacaggtcctaccagtgtgttgacggaccccac
+caggccacacggcccggagtgagtaacacctcaagctggaaccaagctcctggcctgacg
+gccaccgatgggcaggccgagccaagaatactacatcctctacgagggtagtgcggcacc
+agccaggccgccacttgagtcaggtttgatgaatggaaaatcaagtactcccctgcttgg
+tagccatgccagcatcatgaaggcatccttaatagcacgagtatggaggtatccggtatg
+acaaaaggcttctacttcagcttaccagcggacttcccgaagaccgagcacgttcgcgta
+attagttcaaagcccctttcgacttttcatggtacactgactttcacccacaagtggctg
+cctctgtttcatacccagggcagcaatctaagtggatactattgcaagccggtatatcat
+ttactcgtaacggcgaggcggtgaaccagcacgatcttaccctcaaattgatagaaatgt
+cggcaattgaatgaagtgaaagaggtttaaacgccctttcttattatgaaggaaatgttt
+cacgagtggctccattcatcgcggctttcctgcgtcaagagtgtcccgtgcggagcgtag
+tagaccccaagaccccaaagtcacccgttacacggctcggatgccctcgaaacagcgggt
+tatacgtcaaactgtacaaggcttatgattagtaatttgcttcgagatatacagagttcg
+gactcgccctcgcccctgcctaggcttagctcgcatatcgcggtgatgtaattatggcct
+attggggggaacacaaaccacgccagaattgatggcccgacgtggggccctgacgtatgc
+tactgtagcgatgttgagtgcacgggatcacgcttcacctgcgaacgtgcgaataagggt
+ggccatactagccatgttgtctggcagtacccagatgatctccgttctaggcaaaccaca
+aggggcaagcacctcgacacgaaggccaaagtcaggacctgaacgctgcgaggcacgaat
+gagacaggaggcctgcgactgcaggtgcttactgtctaataaactgttgttactcacccc
+cccaaaacttttcccattcatatgaattcgtaggttgaatatacctcagacatcccaaat
+cccgaaccgcgccctgaccttgtgtggaatgactatctagaaacgaatcagtagaaactc
+ttatgacggtatctaatcggtttgcacatgagctgaaggcttttaaggagattaaggccc
+tagtttgaaggccgaccctgacgtagcgaggtactctgcaccaggccctaagcgaattat
+tgaataactaggccgaatgaaatacttgtctagacactccgggcattggaagcttagggg
+gtgttacctcgctctttgcggcttcggtctaaaggcttggacctcgtcatcctctattct
+acgcgcccgacatcgctcgcatacgatcgtacatcttcttcggtagatcgcacagagaac
+aggtggatttttagcagacctggctgaaggtactatcatgtcacatcgaccgcataatgg
+agcccctggccgaaacccggagactaattgcgacaactagccgagcacatatatttctat
+catggtactcaaatgccataagggattggagcgcgcgcaagcagtccaagcgcaaggctg
+aggttctcttgacacattcggtatcgagcgagagctagcacagaacccacgcatggctta
+aaaacaaggcggccgtagggcataaactaaccgacgcgaatagtacccgctttttgctac
+cgccaatgggacccaccctgcgccgcaatacgtctgttgcatgcctgccccgctgtcacg
+ccttcaacgatctgttccaccgcatccatggtctattactctttggccttactcgatcgg
+gtgtgcataaaggaaacctcttctgtggtgaaaagggcgagacgcgcttttcgtagtgaa
+gacttactccttatcaccaacgcccctggaggcatggtgattactgcgcgcttatccgta
+tcacgccctgccaatcgtgcgttattcagcacgtccgttgactgtcatctgtgtgtggga
+ggcccgaggacgaaaatgggaacaatcacgaggcatcgatctctgagctcacagcggctg
+gtcccgtgcctataatttaaatcctgactcagagccgtgtggctcgcgacggttacataa
+agagccaattctttgcctttcggttatcgaaatatctctggtgggaagctcatctcgtgg
+aatctccgcgtaagtgcagtatcggtcgtatctcatattgtctagttgcctaggtggtcg
+ttgactccttggacaagcactggtcctacaggtggtacgttggagcgaacggatgtgctg
+aaatatagccaggtacgattgccctgccaggtgaacatgacgagttgaaatggattaggg
+cgcgcatgagtggagatccgctccacagtagcgccacttatactttgctttataagggtc
+agggtcctccacggctaatgtacactaacgaagccaaactaagggtctactcgacgaaat
+agtgcatgatcgacgagttcaggtaagaaaggagtttttactggtaaagatgaatatatt
+tcctactgaaagggataggcaggtggcaatttaatattcccttagacgactcttacgcgg
+aggccgacgcttgagcagggcatcgcggtccttagtcgagtatgtcttcggaagaatcct
+tcgaaagcaaaagccgcttaatttgtcgacccgtctaaattaatcatgtgcaagaaattc
+ttccggttcaatccaagatattattcatcgtcgagaatcgacgaggctaccgaaagaggc
+cctgttcatttatccgccaacacaccacgtaaattcgtatgcttagaaaggcgccgtgta
+gcccctaccataagtgtcgctctgggcttactctaagtaggcttccctgttacgccgact
+agcacccatctggatacgagtttcgtcgaacctttgattgtgaccgatccccccattaat
+atcgttcgttgagacctgatattggtaatcaaggtagatgattaaccgcttttgagggaa
+aataccgatagcaaggccaaggtctcacgtctccgcgaatcatacgaaattgcccatagt
+aaacagcatcccagtccatcatgaatgtgtccgtcgtagcctcgggaaaaaatccaacga
+ttacagtggaccgtacctgaagactgtcactgttcgttcaaagagcgcgcgtagttttac
+cctggtcgaaaagtcaagtgtgagatcggtcatagtattcgatgccggcatagcgcgagt
+acactcccaccttcattcgatctatcatgggcaagaccgtgttgccaagtgcccgttcag
+gttgcgccgaaggtaaagtcgcgggtgagggtagaccagtatttagatctcaatggagtc
+tgccagcatacggcaggccacgtaggtccttcgtgtaaacacagataccactaagcattc
+ccagtgcacagggggcgaaaaggtcacgatatctgcggagttcatccgagtcaaggaggc
+ggaaatgacccgcgcgagcaaggggccaggttaactaaacagcaaattctgtcgactgac
+tcaaaggcagcgtacatgataagcacgttggtcctctgggctccctccccgttgtaggct
+aacttgtgaacgcttacggacacttttctgctgggaacagcaagctacagcattacaaaa
+actagactcatgccgggtaggcaacgcgtagcgacctgttataatgcggcgcaatctcga
+tccacagtgtctagcgaacactacaaaataggcaaggctgagacatccctcatctggggc
+tcgaagagaacataatgctttaattcgtgcacattgtcaaaccccagacgaacattaagc
+ataatttccgcttggggtgatgtctacacctcgccaatccatcccgcagggtatctattt
+gaagggaccctcgaacacctgctccctgtttcacacgcctagcatgatggcaagacgaac
+atttcaatggccggcatgagaaggcaaaggattcgtacttttccgaggggggaaagggat
+gagtctgagcgtcgcatggggttgccaatttattgggcgcccatggtatccccgtatcta
+gggattgagttgatgccagcagtaaattactactaatttgatcagaacgtaataccggta
+gccgactagcccgtcacgacgtcgtttatcaactatagacagctcacttgaggttaaaac
+gaaggacaaaaggcctggcgttgtccgtgttttaagtctgagccgggttgtgggtgtgag
+tcggcaaatatcttttaatagggtaaacaggccagatgccatgcagctattcggaatctg
+aggacagggctgccgttgttgcgctggttttaagttcagtcattactgacgcgccaaaac
+aagagtaacctaatggatctcgtaatctccgagacgatcagcgtgacagaaatgtctgct
+gcggcacgtcttggcggagctaagcctaccatattttctatatccaatcgtattcatccc
+aggcgaccccaacagaatcaactcggccttggaaaggaagtgacgaactcgatgggtcca
+ctgtcacacggagtcaccagtcgtcctctgtgatttagagttctaatggagccggtcagc
+cacgcagagccagaccaggactcccgacgcttggcaacgtagtccttcaggtacgggcga
+catcggtttggtcgtatgcatgcagcacatacaagcgatcctcgtggaatcacaccggtc
+tattcgctatctgtcttaacgcgcacggagcacgtcattccgaagataaagtggattcga
+ctattgaagcatacaggatccttatgactggctgcggatggtctgggagttgatcgattt
+tacaggaggacattacgggtggagggtccccataccgcttgactcaaaactgggtagggc
+gcacctatatacgagccatacggttttcaatcaacatagcgttcaaccactccagcgcat
+accacttcaagtactctgtaccaaagatcgcagtggacatccctcatactgtttatcacc
+cttcgagcagagtcttatagttccttgggatttcgatttgcgaagttaatcgagcatgct
+ttccttggatcccgtcgggagggcgtcgcgtgtataggcctcacaattttcccgtcgcca
+tgtgacttggaacttatcagagaatctcgtttccctaggtgaacgtagctaggcaatcgt
+cttgggtaagcctcgttggtaggtgacttttcaattgaatcgagctgctttgatagggct
+tgcgccctcaaataacagacagcttctattgccgccaccggccctatgccagtttcaaag
+cgaacggcatccagccagctgccccgtgatgtactaccctgcgtatcaagcgtatgcgtc
+gcggccacttggaacacgtcgtcatctcaccgtatgtggcatatgccgccgaacgcgagt
+cagacgcagcgtactacatcataaaggccagcggtgagcatgactttgaggcgtatcggc
+actccgtcttaacttggtcgtggtaaagctcgccggtccaaggcactatacgtagcaatg
+gaataatacggggagtttacgatggagcgccaaaattggtgttcgcccaccttcgtagag
+gcagtgatatctctccctgtcctaacaggtaaaaacctaggtttgacgtgttcgcgatgt
+gagtgcccgtgatttaagcaactcgagacactgcaaaccagcgacggtctcactattgac
+tgtcgggtgctgtattagttatacataggtgagtcccaaaacctaaaagttaagaggtta
+caagatttagggaggagatattccgctttccacctggtcgcgttggcgatccagtcttca
+aggccggagaactcgacagtcagaaccccaagtggaccaaatatgacacgtagtggacag
+tggggctcttcacagattactctctcagcaatccgcgcttgccagggagcagcgtcaatg
+cataaaccggcgactaggcattgcgagattaggaggggaattaaggtactgcaagtgaag
+actcgatgctttacgtgggccatccaagatgactcaacggtccccttcgtataaactcgc
+gaattgcaaccaagacagtgtattctacgccgttgtgaatcccgcatcggttcgttgaga
+ctgcttaattttccggaggagccaataccctcgccgttcagtacaagtcgtagatccgcg
+tgtcgtgtcttgggcaccggaccggtattaacggaccgttgcagcgcaggtgaaacaccc
+tttgtaggtgctcggggtgggggacgtacctatgatgggactactatccagggcagagac
+gtttactacttaaggtcaagggtgttagggtgcgtatcgggtcacggttaaggcgacgta
+ccaaccgagtccaacagtagtaaatgctcactgggagtagccatattcgaggctcgggtc
+cggctggttattaccagctggtaaggccttttaatctatcacgcgcctaatagcttggag
+cgatttgattccgtcacttatgccatcactagcaccgggtgcggaaccctcttcatggca
+accgcaggtctcatttatggatgagtatttagttaactattaacatacttaacggggcct
+accaaagtcggtgactaagggccccgggcacgtctcaccctgctgtacatactgccttac
+tcatggtagttcgtcggactccactcgtcggggctgaacttctagaccgcgtgggagggc
+gatatattaccgttcgcttcattcgatatcgcctaactaagggggggggatcccatggcc
+ccgcaacacgcaaaaacttgaacagtgatcgggatttctaaccacatatccaacaagctg
+gtactttccgaaataggatactgctcggttttcatcgggacataggatagagacggatac
+agggactcaatatagtgaaaactccgccacgcctgcttacacgtcccaacgcgttacacc
+ggaggcacgtggtctgtcttatacgactggggtaaccatggcaacaaaatagttcctctg
+gctgggtcggactctggtgtttagggatgcgatataagcttttggaagccggacgcctaa
+agcttgggtgagaacatgaggttacataccagggagaaatctgttcgtgcattggtttct
+gcttcgtacagattcgcgtaatgggggctgattagcttctggcaagtaaggtatagaatc
+aaccaccaaatgaatgtcttacgaaatggtgtgacggtcacccagaggaccgcgagcatt
+tcaatcagaccgtgaatctagacattcttgaataacgagcactcaatgtagtttaggagc
+ggatcttctccgcaaactgtgtaccaggacgacttcctgtggtggaaatcgctgtactag
+gggtagaactctgggctggaattccagcgcggcgtgatagcactgtctacccttcacgtc
+actggcagtcgttcgtgtcagttagcgctagaccttcgagctctagatatcataggccgc
+tagttactgggatttatatgaccataagcatccattcgtgttacgacagattgcctctgg
+caccctggcccaccgagacatgacagtcacaagcttgtatcccccatggtgtccgcagag
+gtttggtatgttgtaatttactgagtttaagagatgcaatatatagattttagccgaaat
+ctgtgaagatcactagtcaaggcgcgcccaattctataatctcacccaagtaaccccttc
+agttcgccgactccgcccaacccctcttgccctgttctgtctgccttcgagagagaccca
+gtttcaattcgagctgctacggataaaggattcgaggctccgccccgtgcatggcgtgaa
+ctgtcagcgaaaaacgtcctgtggtaaccctgtcaagacaggcaagggtgttctttctat
+gcatggtcctctacgtactttactcactaatgaactctgagctgctcagaaccatcacca
+agaattacgcggtctagaccccgggcagggaacacgcatacatcatatacgctagggaac
+tgacgaacagattacatgcctcctatgataccggaaggcgtgcttctacttttcccaaca
+tgagatggatattgtcccagtatcccctcaaacgatcgggagagtcgagcaggtgtctca
+agctaatttaacgtaatcggggctgctgcggtggtagcggtcgtgaaaacccggcggtct
+cgaagtcggtagtaatagtagtgtcccgaagatggagatggacgttggcatgtgcgtttg
+atactgctgttgcgcgcggcggaatgatcttttcgcgaccgccgagcacggtcgaaaagg
+ccaggaggtggtcattgtagatgatatttcgatttacaaaatggtgttctggaggagctg
+atttgtttgggtttatggacagatggggaggccgtacccccgaacgtgatagtatgatag
+tggtggcgttcagttgactcagagatatcacctggccgacgagttgagtaaactaacctg
+gcaacgtcgtaccgtatttcgttacattcgattggagaagagcaatttaaatattaaaaa
+accacatgcggagtctaatcctatgaccccactatataggtaccgacagttacgtccaga
+ttacagtttatccctcgggcagttcgcctgctgattctcaggcacaccagctccgctatt
+actgggcatggctggacgagatctagcatcgactgagaacatggccgagcagagtccacc
+accttcttctggaagcgctgaatgtgccactggtgttcctgctgggagtaaacaccgttg
+ttgtcaagagtccgccgcttatcacctcaggggcgtgtgaactgaattagtcactattcg
+ttacagcacccggtttacttaattgaatctatccaccggacggcaggtagtcaaccactg
+ccatgctagctccgatcacccaaagacggtccaggatgcgtgcacgttcctatgattatg
+cgcacacgatctcaatcctatcttaccagactgattatactatgcggtaggacgccatgt
+ttcctgtcagttccttgactcagcatctcgactagtctgaaaattctaactctccacagc
+ggctttcacgtgggattgtgagtatttcctgtcatcagatgtattcgagagaaatcgtgc
+agtgacttcctatcaattttgttgcgatcagccgcttacgtggcaccaagtagcggtgac
+acgaccgcgccccacattccgccacaacatggaccttcaatttaagctccacaccagcgg
+ccgtatcatcgtcatgagcttccccctaccccacatccgaacacgttgctctacactggc
+atggcgagcgtggcctgaggccaattcggatagtattccgtagagtcgtaagggaatcgt
+ggtaaatagcgtgaggagcttgtcaccggctgcacgacgagtttgagactctagttccat
+aaagcaggatcgctacatagtgagtaattatcagtcaaccacagcaagcaccccttactc
+ataatgacggtacacaaggtgtaacattgtagaactcaaacaccaggtgtggaaccgacc
+gacaaaaagcgagccctatttttcatacaaatcaccgaatcttagggccagcagtacatt
+taaataaaatgacttccccacaggaatcgcgatcgcgtcatagcaccgaaaatgtgtggt
+gtcgattagaccttactgcagtgagaacaatgtaggcagtgcgccccaacgcccgtgacg
+gccgaaagaagccttggtccgccaacaagtactgtcgtccctgcatacgtatctctaccg
+tattgctggttgcggactagaagtccgatcgcctagttttacaaacccgatgcattcacg
+tctcaaacaacacaaagaatgttacactgactgggtattcaactcgcccatctagctact
+catcttcggtcgagactaaaaggtgccccgcagtatgctgtgttatgggagttcatagtt
+tagcaaatccgggattaagagggtcaagctgtaaccgtgcggtgtctgctgacagaataa
+atcagaggactagcgaaaccgtagctataaaccgccccagaaattaatctgcgttggtac
+tcagtgatgtttcgccatgccactaaccagcaagtgtcaaccgtgcggagttccaagaca
+cggcaacaaaagcagctctacgcgggtttcaagaaataatctaaagctacaattcatgga
+agtgagataccatggtgacccacaatcagacttaaccagtaccgcaaaagcttgctcgcg
+ctgtcaaaagcctgttgtacacccaataatcttggtatatttaaaagttcactaaagggg
+cccaagggagtcattaattgcggtgcgcgaatggtcacttcatccccttgcccctccgcc
+cgtcgataacgcctgcttgacgtccggatccctgggcagtcggagttcgtctgagctggg
+ggttacatttacttatatttcaacagaggggcaataaaacggagggaggcactaaaacac
+actcatagcgactctattagttgtgcgatgtccgggcaggtactgtacaatcaacatcgt
+cttacccgcccctcctcagaccaagagtgtctttgtttacgcacgaatggggaaaagacc
+ttgtccgaagtgtatcagttggtacagtcctagtctttgatcggtcctatcaacaatcgc
+acgttcatgctgcgtttgcacgatggtcccctctttgagtctcctcaaaggcgccaaggt
+tcctaaacacatgactcgagtcatgggggaacgttagcagaacctaatgcacgggaggca
+cggcagggcgttcctcaggcgattagctccctttcaagtaggggtgaagcacgcccagag
+tctcgtgtgctccgtttgagcacgatgtggagagcgttacaacggtcccaataaacgcct
+agccactcgttagccatctcctatcaggtatggcgttgaaaatatcccgcgacaacccat
+cataagtggtattagatggcatttctaacaccattaatggttgaggactgttgtcccgtg
+ggataatctgaaaatacattattgagtctaccaattattaagggaaaccgtccccgggga
+agtcgacctgcctggtccgtataacgggtccactgtcccgcgtcttgattagagatcttg
+agatatgcaggccatataatactcggaccacttagccgcctgcaatctttagtgcttcag
+ccgcaggcaaataaaagataacttaggtacctaagctacccgtagcgcaaagcaggtata
+tgtaccgtcattcgattgcccgtccgcaccgagagattacgacaggcaactgtcctgggt
+gatcttaccgactcagcctagtcccgatcttcacttctttacgccctaattcctcgtggg
+tgggcggcgcatgatttctgctcagtaaacacaaaggtgcttcaagcccctatcacgtta
+ggttcgagtcaggttagttcggacagcacgccaatcctcttaatacaggcctgtacgaca
+tgtttttggaagacccttctggtcgggatgtcgtaattgcaaggggcacctcaggggcag
+cggggtaagagaaatacgccgtattgtagggcgtataggtttgcacgccctaatcgatac
+ataatcctccaataacacgaagagtgtttgtcgagcaggcactgtcgatcggatagtggt
+caagcaggttctttcgcacagcctactcaagaattggtggtacgctgttgtgtgacgtat
+ccggggaatacaatcctgtatcatgatgacaaatcagatggggcgaataggtgattagca
+aagctgtatgattcaacagtagtatgtaagtagaattagggcaccgaagtattgcgtcgt
+tcattcagcccgtagcttaccgccatttatattgataggttaaagcacaagtgaagattt
+aggctacagcgaagcggttccgctgaccgagtccgggtccatcccttcgcccatggtgag
+aggcagtcaggcagtgcagcctcagaggatgtcttctagtgttgaaatttctgatcggtc
+gagatatctccactactgcagcctgggggccattatcagtcgagtgaacttggctcacct
+cagatcttaacatgaacaatgatcgtaaaaagttgattacgccgtaacctttcgcactta
+accgttgggaattgcgcagagacatgagaattagtagtctgcgtaaaagagagtaccgca
+agggcgagctatataattattcactcaaaaccagttcacacaaatagatgtcctcttgca
+tttgcacatatcaaagatatttttacttaacctctaagcaatggacagtgccccgtctag
+cgagttgaaaggcgccacgactacgcacgtatgatctatatgtgaacgcaagcacttact
+acgccacaacagaactcaatgacggtgccgggtctacgcaaaccgcagggatagatccgg
+acgaacgggaacagcataatgcaccgtcgcgcttctcatggctaggtacttgggcggacg
+gttccccaccgccgtcatatcgtcaccagctagttgtggttagagcgattgtctctagcg
+aatcgtcgcttctataatgcccctccgctctacccgctctgggatcgtcttttgtatttg
+tatctcccggagggttgtgctgctatcgcccacgatcctggtcgtcaagtggttacagat
+ggagtaatccttcaataacatcaggtacagcaacctctgctagggtgtaacacggggtgt
+agctgactgccaggatcgctgcctccaaggtcagtgcgatcgcgagaaaggtaatctaac
+tcggccgggaccactcagaatggaggaaaagcagggcgcttgcatacggcttagtaggaa
+aattgtataactggcatgtttctgccccccggtttcacggacagttcgagagggcggagt
+ttgcagatactgactgtcgtggagggcatgtatagctgcaaagtgtatcaacatcgcctt
+cttttctctaagtgctctagattcaccgcttcttgccggcgagccgctttacttgcaatc
+tgtcgtccgacatgactttcgcaactacaaagtagcgttcgtctattttgcccaccagca
+attttgggtaccacattacggaaatctatattgccgtggagataatgaattagacagcac
+tatacagctcaggtaaattgctgagtttgctggtctgctgagtgacaacggaacctgagt
+tgcctcgccgcgtgaaacaggggagggagctgtaatattaaatctagtctcgggtatgtc
+ggccgatagtgcttgtcggcgcagttatcgcgacaaagaaatccatattcggtgacctgc
+gttctaagttatgtatcgtcagatcctacgaccgggaagaaaggtgcctctataccggta
+tttattgctggagctgtcgaactacttaggactccgatgctagtgcaccggataaaaagc
+ttaaaatcaaacatccggagacggacacgcgcatactctatgccccgagtttagttatct
+aatggtcttggcgtgtgttagcccctacggaagcgcgccggtcttctcggtcttgggcag
+ttgtgatcgggcgaactataaggaccttcccgtcctcccctcagggagctcgttagtggt
+aggcaaccagcttcgaacggatggctgccatccggtaggggcgccgattggtaccagctc
+tcttatatgactacaccggggtaatgacataggagctctgataagaacggccacctataa
+ctcagacatagcgttagatgtccgcactggaactcaccgacatcgcgagggccgaaacag
+cgaatgaattagcctccagtttttcagtagcagcccctatcctcgcctctcgctgcagac
+gtttaaggggccgaccatataggaatgcgcctcctcgttgggcgttgattagccccgtga
+ggtgacaagatctctccaagtgattagggcggtggtggctaaatatacatttacaatatg
+tgtgcccctcagcctggagaactctgtctcctcattcgaggcgccctcacgccgtgggaa
+ctcagctactctagtcgcagtggtcaggagtcctattaaactatgccggaagatactcag
+agatcagtgctatagacgatccgtagccgtattgctgcgcattgccttcctgatgtagga
+ccacatcgtcgtccagcacctgacgtcaagtcggtcaaaaatttaattttgacctaaccc
+tgtatacccttcgagccggtaactcctccgtgatacggttggtctttcctgggtaaccga
+agatttggccgaggatttgtgtcctccaccacatataaagctgtccaaaccgcctactcg
+aatgatccagctcgggcttcgacctgttctaggaagtacacataataacctgagggggca
+agtcttggacaacttatattcttcttctccgcgcgtggaaaatctacctattaattataa
+atgaaccacgttattcacaactacttgcgggtttctccgcagctccacaggcgacttacg
+ggacaacgagcaaagtgctgacagcgctcccatagctgtattgaatactacgattcgcac
+cccttgtatcagtaataacctatgccgttccgagtatccctgacctaggatctattgtcc
+tacctggagggcacatggccagaagaggatagataatacgcgcaatgcccgagcaatcgg
+ggagaccgagtgtgtggaaccccaaatcagatctaaacgcgagcgcgagcgcacaatcgc
+gaacgaaattaccaaacaatcattggcgaacaaacgtcaagggctcgcctgtcacagggt
+atcaaatccggcgttaaccaataatgcatgagtttaagacagtcgaccccctatatgcca
+ccatcgagaaacggcagttgatgaccaagtacgtaatggttcttggctatatgcgcgctg
+agaaagaagactagcattagctgacggctattacggaatggtaagagctcattcgttctc
+ctcttccgcggagagcaagtaagcatagtagtcgtgggtaattaccccctcgagtgaaga
+ccgagaagaacgacgacacattgtttcatcctaaaagcagttagatagtgaataatcccc
+gaggccaaacttgtgaaacaagaattgagggcacaacactctcactctgggagtgcgacg
+tcaagtccttgacagctagttgtctgtgtagggggcagcgctatgagcctttccgattct
+cgatctacgccaggcatgtcagacggcctatatttgtcgctgggggagagactactaagg
+gaagctcactaggacgcttgactatcgtttttggtgagagaaccctaaatcggctgcctt
+gatcttctcaatcattcctaacgtgtgtctcgccgtcgcagacgctcagcgatgagccat
+cttacacccctattaactgtacttctggtgtacgcatcatcacaatctcgagatttcgga
+cgttatgagggcagtactcccggacgccaaccgtgagtacgtccagtatacgggatctca
+gattgccaaatcaacatcctagcgggaaggatgtgtcgtgcctccctgaaaccgggtgcg
+gcgagcctgcgtcaaaacacggtcgcagtagctaagttcgtcaaataagcagtttggcat
+cccctcccaatagggctgacctattgggtcctcgttttttgccctagagaaacgaggttg
+ccaattcgtcacaggttggcgatgcaacccgacccttgttaatatattagttgcaccgta
+gaggatttacagaaagctggcgcagcaggtaggataataccgagcatgggcaggtatggt
+gggcgtataccaacccattatgcaagttgccgtatttagtaaactagcggtaatctaaag
+aaacatcttacccaatttgaggtcgccgacttgattgtgggaggttaaggtatgatctgt
+cattactattttggaaagaccctggcaagtgcaccgcgtggactggcaaaagtatcattg
+ccccgacttagccagccgcgggtctaaacggaacagaaactttatcctggcccgacggga
+cgtttcccttctggggttggatcacagcgctgcctgataggacggaggatcactcgctat
+ggtccggacacacgcctcggtatgatacacgtagactagactgccgccccgggggcgagt
+gagcggtccttcgatttgtgcgaggagggtgcgcaattgtttcctacctgctactaccac
+gtaacgaagtggctcgttgagcttcggtatatcaccggcatggatgaatacggagcgtat
+gagcgcgagttccatgactaaacagtgcgtcatcgatccaatgaccggatcccaaaatgt
+ggcggaggagctaaaccgcatgctcactaatcgatttgggtatcgtcagctgcctccggg
+ccagcccaggtaaagcgccgtaagacctcagcacagtgagggctgatggtatatcagcga
+agttgagggttgggttccagtagcgaacgcatatcagtccttaaatggagcacatggggc
+tatcgacagtccgttaaccgtgagagggaggagtttttacggtgcattgtggggacggcg
+cgaacctaaggtaagagttatctgatcggtgcggtggtagacccatgaggctaatgaaag
+tacacaactagattgctggaacccggcgagcgagctaggcatcattcttagcaaaggcgg
+caacgccaactagcagcacataatccactcccaacctacaaaagtctgcctgttccgtgt
+gatcggcgacgaggcgtcttaccgacttgcaaagtcgtttccgtgattgccgtcaattca
+tcgagccttgtccactccttgtggcacggatcactgtcccccagctttatcactttcgat
+tgacgtcaggtatgaacctaaagcaatggcgggggcaccaacacttactataacttaaaa
+accaagaccttctctgcctaagcaagtaacctcgcacgaataaatcttagtcacacattg
+tattatatgcttggtagcgcggcaccaaatatctacgacaaagtaactcatttgaaatta
+gaagttcgcaaccagagtcgtaagtgcagttcgtgatggcgcgccgctccctacccataa
+ttctcgatattctatgtcgtggcccatcctcaacaacgatcagtgccccgtcggcggtca
+catacgccaacactgatgttactcgcaatccacgatggcgaggagccatgctcctcgtgc
+gagtaaaaactgcccgtagcatgtgtcgagctgtgatgttgcacaagaaacgacttacat
+aggcactaccactcccgcccgtcctataactacgaaaatacatctaggtcggcaatgggg
+cgcttgcaggccgattctgccatcgtccaaacgcccacctagtacgacatgtggccgcga
+gggggcatgacagctccgtcgcattctgaacatcctgtgatagcgacgctagcagtagag
+cagtaatcaatggagtgatcgctctagtgcgattatgtcgggtttcaacgagtgcctcta
+cctctattgtgaattggtccggactacacagacactgagtacaaccccgctggtccttgg
+tacgctccaacgcaagtaaccgccacatgcatttttctgtgagcactagcagctcccttg
+aagattacgctagctagtgctggggacatgtgatcctttatccctcctaattcacctcct
+ggactttggaaactagtgtccccatcggcttggataaaagaaaatcgcgttgtcacgtgg
+acgtgtaattgcgaactgtccctggacatctttccgtgtcctgcgctacgtcgcctcgtc
+tgtagttataaccgcactccgggcacgagatgaacctgatcaaaagcctcggaacgtaat
+ggaattacctgctctctctttaaccaagcctaatcaaacactcccactgtttaggcagat
+agggacgcccactcctttctgattatactcaattccagtagataagttgcccggaaccgg
+tatgcgtataacacccgggagttcattgtcgagcccgcctcatgcttagccgtgacactt
+gatacgttatggcagcagcgacggatacttcgttccacccgggtaggtagcggttatcag
+gaccgccgccttcccgtgtcaggttatcctggaacgcccgccgggaacaccggcaggtat
+gatgtaggctataacatctggtaacaaccaagagtggtcgccgtaagtcggggggacaac
+gtatataaattcgacaagtcgatgtcgcgacgccattctggtcacgatatacttcgataa
+acgatctgtgttagatggtcacctggttgttatcctgtatctctccgccgcatgactcgc
+gcctgatccttctgtggccaggggacgaatgtacgggttctaatactgtatccaacataa
+ctgttggtttagtgctcttcgggtcctcgactattgtgcagaacatggacacacgctgcg
+gaattaaatggaacagcatcaattgcctctctaggagctccgcttaaatcacgggattac
+aatagcttgggtagtatgcacacgacgcactggattcgctatatgactatcagctgttac
+ctgcacaggcttgtctcccgtgagcatgaaacttatgcccggccaaaacctcaactcaga
+tgtgcaaagctggcgagtttcctcatgccccgctactccttgtatcgacggggttcactt
+gtccggcctccctttcatatgttcgccacttagagcagggagaattgagcggacgattgt
+tgtaacgggtgcttgcatctcatgcttttaggctctgtgggtaagaggaccgtacatggt
+ttgtagttttcgctgtgaacaccgaggtaatctcgacggtacgctatcatgagtgtccac
+gcgagaaggcccccaccggctcgttgtgccgtatttgcctctttctagcgaataacttaa
+cccggacctagccatgtggtctagcggaaaaataattgtcctcttattacctaaattgct
+tccaaggtttccgacggttgctgcaattgcgcatagaggttggtattagctatagcccca
+cggttaaatgttttataaaggcgtacaacatgaacgcgcggagcgttaatgctgccacga
+tgtgctaagaaccatagcacttcaagacgattactacaggcaaaaaacttatcagggcgc
+ccgaggtccaggggacccgaatcagtatgtccatgatagcgctgacgttgccagctcgct
+cgctcaaggtatcttcgggtaaggggtcccttctaaataggtcgtcaccatgcttaacca
+tctgcctcgtagttatatactctcgtcatttccggacgggaaaaccaagatgcatcaaca
+cgaacgctaacttaatcatatgtcatctgcttcagacagtctgccccgtaatcgtttagg
+ttatgctattttcggtttgatacagtacgcattggattcttgtctgtaatcttgttaaat
+ggaatatggggccacgtagggtccagaccgttgttggtggtactaaagcctgagtagagc
+agaaatagcaccggctatgtcgaccaagcaagaacagtgcggacactccctccgtgtcaa
+cacttgcgctacgttttatcgccgttgtgagcaaacctcagtttacaaaggcaattcccg
+gttgtttacacggtcattcgctcggtattgcctttgttggatcctatatcggatccgacc
+gttctctccatgatattcggtacgctccctcggttgtagactgcgaatgattggcaacgc
+gtgatctgactactaggattaagaaccgcgtttagaagtctagggtggacaatagtttgt
+gttcggaagtagatcccctatgtgatctgatcatagcatgaggcgcgttcatacactcat
+actgttcgcctgctaatttcggacataagcataccgacgtccgacgtatggacagccatg
+aaccgtttaggcctggtgagatttgcacaccagacgacccattacagtacaccagccgag
+taagttaggtacgggtgctacgctcatacatcaccagccacgggggagcggtctgatatt
+tgacgggaggtccgcgccttgtcttctttcgttaaatatgcaatatcgcttatagaccta
+ttaaccgcgaccgactggtattattctccccgtataagcaattgaccgttgccgctctcc
+cttgtgtctttgctgacttgatggcagcctgtggcatgttagtgacttacactactaggc
+gatgtgggaacgcgcttttccttttttctaagtgttgtaagccacatcgccgtatattta
+gatacttgcccgacagccaggcgtaggcttgcgcggccccacacctgaactgtctacaac
+cagcgcccttagtcagtcagaagaacagtaacatatggcatttcacaaacactgagccag
+ttgagacaaatccactgagtatgggagggtgatacatttatagtatggcatactcgacgg
+ggtgatatgacatggtttcttgggttcgggtgttctcgttttgaacttggccccgcggcg
+gcgttacttggggatctaccttataacgggccctcctgggcataaaggtacgacgtaacg
+aagcaagcccactattcttttgaggggggtccatacttcttcaatttttagatcagagtg
+ttcaggcacgccaccgttcagataagagccccataaaagactactaagattatgaccctt
+aaaataaaaggattggagcgaagtcagcacactaaatcatcttctagctctgtacttcgg
+ccgcgtgcttcgtgcgaattcaaaaaccgagtggggcgcgtcggcgcacttggctcggat
+tagaatcttgtgagacatcttgtcacttagatcttggggcagagctctagcatgttcgat
+caaaactcacgtagactcactaatataaaagtctcctcttctgtatcagtgatgtgagcc
+aattggaagttcacgcgcgtctgctctacttcttcaatgagcacggtgacgcaaaacacc
+ccggtaatatgtcgatcagagacactctccgtccttggtagtgctttacacttcaatccc
+tgacacaataactcgctattatagcaaacgcaccggtacatccacatatatatgatacac
+gctcgacggatggactacccgtagagccgatgtacgtgctaagacttcacatccaagaga
+cgaccaaaattgtttcacatgagcactaaggtcactgataacggtatggcgaatattatt
+ggaaatctgtgaagtgttgggtagtgggacttgagacctaatcttatatagccttccgac
+cggataaattggaagtctggctcgtgatcggatatcgataggagacccagcacgccaccg
+gacggcgcgtagaaccttgtggaagccacgttaccccagctcgacagcccacaggtcctc
+tgagcaagagaccataggtaaacaccgacgtcaggaggagctgggtccatacctgaggtc
+gcctactaacgccaacacgagggttgggaaaactaataggtgtacggtatagacaatccg
+ctggcatttagagggcctggtatgagtgagggggcacctctgttctcaataactcgctca
+agccgggatgtacgacgaccgtaaatggtgataccattgcaccacagcttgtcacaggca
+cgtacgtagggtgtacgatcttgaattgatgcatcctccccttttctttcacgttagaga
+aggtatcagctgaatgtccgggatggagccacggctacgctcgaactcagccagatccgt
+aagtccgcatatccctgcaggtcttgggttcgtcctaatcatggataacgtgtagaaggc
+actcctctgtggccggtagtcacgacccacgtacgcggtgttcaccgttcagccgtcatc
+cctttgtgccgagttataattggcagtttcctgctgatcccggcgcggtcgagtcgcaac
+ttcgaccatagggagtctcgttagctgaaccgccagcaaacctggcttaggaatgtccgc
+cttctgcaacgctggttgacggcacagaggactgtaggcctattcctggccagacggatg
+gtctgggctagtttacactgggacaagtaaggacaactacacagcgagcacctccactgg
+taatgggcctagccactgtgccactcattcccgctcaagttggggggtgcggtgagggta
+tggctatgggatatctgctttacctttgtgactcgctgtggatgtacgaggcgcgcagct
+ctttctagtttccctgcgctgtacagcagtgactcagaaaccaggaacctccacaagatt
+agagacgtacacgcgatgccacccgaactatctgttacttttgtcctcatcagccttacg
+atgatcaggatggccacgaacttaagtagatcaagttcgtcagtgctgtccaggtggtcg
+ttgcaaacgacgattagtcacggacaagtgccactcaataggcatcgttagattcaatcg
+tacgttaccatcgcactgtttccggaggtgtgagctctcagcggttcagcatgacgtttt
+cagacaccaggcccctacggatgagcgccgactgaccacccacgaggcgggctaggcatg
+acttgatgggcttcgcattggcccacatgccaatctcttaactagtagcatctacgacca
+ggcccatgggtcagtccctgactctgtgggcacctggcctcaatcaatgcgggccattga
+ggggctcccctatcttctactttggtagagcatgtgcgaggatggggtccatgaagtcta
+gcagtagtttataatagattatttaggccctgaccagtgtattgagacgcataatactgt
+cccttcagggggcacgctcttactcgtctacaattctttatgatcatggacgtcagatgg
+ttcttagataacctaattatatcttcccacggtcgtgaatgccagtgtcatccggtcatc
+atgtatgcggcggagaaagcctttgctctcagaagcccgttcctaaaattcgaaccgtat
+agtatactttactcgcttcgagggcggtttctcagttgagtcctctttagcgtgatcagg
+tactgtaatagtggtctttaacgtagcacctcgaatctcgaagggtctgcgcttcagtcc
+tttcggtcgcacagagcatctcacacttcgctcatggcctacgggaatgcgcccaagtga
+ggtctgttgggtggccggataaggatatgcctatccttgatacccttgactatgcccagt
+gtgcacggaaagcgcccggggaacgccgataatgcacaataggcgcaacaactatgggca
+tactgccaatagctgaagcgtcatcatagatgatcagtgattgtttgctctgctcaggtt
+ccatatgagctcacgcgggatctctcagacctattggcttggaataatggcgctacataa
+aggatagctgacgtgcactccgagcgtgtcgaaggctgcaacttcaccgtggtaacgact
+ccctcaacagtgcccaataagtgatccatttacgttctagtgctaagcaactcatggccc
+tagtaagcagctagacacacagatcaggcaacttaaatggaacctacttcgtcactttca
+tgggacacgaagctcaaggagcggtgaccagcaaacagccgcagcttcacaattgaattt
+cgaccgaccggatagaggacaagcatactttgtcaacaactaatttgctcgcgaggaaca
+ttgttggttacctaggcttaagtcgggatagacgttatcggctctaatcatctttccgac
+aaatacttgacgtgtacacggattctgcatttagggatcactgtggtgtgctacaagtac
+accgtctacgatggacccatagatgaccctcgaggcttctcactaagtacagctctcggg
+ccggttgttttgccgggcggttacccttctctacgagctcgtccatgagtcttaggtcgt
+gacaaattcggtctcattctagcagtacgaatcgacggtccgcgctaggcgaatctttta
+gctatattcaaatactgggaaaagacaaacagcaagataagagcgttcaacggagccgaa
+aaatgtacagagtgctacgaaccgaaagccacgagcgacttgactgtactatgggcgcaa
+accgttgctctatacacggagtcatcttagtctagttaaatcttttcagggttagaattc
+gacatggatcgactcattagggaaaactactctacacctctattttcttgtcccatccag
+gtaaatgggcctcgcgtggtgtaggcggcgcattctcctagaggcgactggacggaaagt
+agggggtctcgagtactggatttcgatgcctgttgaacgaggccatcaaagcgaagttgg
+gggttgatgagatgcgcagccaatccggcgacaaccgtggtctcccatatcgtctacgac
+gatggcattttccatcatgtagactgaaaataatgaagaaatccgtgacacagtggactc
+agcgttaaagtcttttctcgcgtgctcggctatcccctttaccgtcccgggggccacccc
+gggtacagtgctttacccggctcgcctcaacaaaacacacctccccataaatggtgagcg
+gggacttagaagggcacgttagctacagaatgggtcaatacaatcgcctgcctatctcaa
+tggtgctccccgagccgcatgctagagtgcgaagtcgactactagtagtacacacgtctc
+cggttgaaagagctgggaggttgagtgcggtgtcgtagaagaggtagggccagcatgtgc
+tactagtcaccctacaaagcaattcaaacaaacaactccgccacacccggtcacagagca
+ttgttgtataccatgggtcatatcgctagcaacgatttaatcatccctaaccctacacta
+ccatgacctagcgatgggttcccggacgaaacagattgacgaaacgtgtagcggactcgc
+agtactctttctctggtctagaagcaggcgcggctgtagaattaatctgtcgttgattca
+gctgataatggacctgttggcttcattacttcacgagccagtgacgctccagaaggcagc
+atgttcgagaacctgccaccgtcatctaacccaaaggctggctgttaaatacatctttgc
+ggcatcccaggacaaggtaatgaatgctcagcagtctctcacaacctccgcaccgacgcg
+tttctatgcaagtactagtacctgaacgtatatgttcctaaaagcctcatcccgttggag
+tctgtcagacgtgaactagttaagcgcaaatgtggacaccgtgagttgcctgactggtaa
+agtagcagttaagaaaaacttaatgtatatcgtcccagactgaatgggtccacacttagg
+tggcgtaccgtcttcgtgtttccacacgggacgctcgacactcaccaggaggagcggccg
+gtagacagaggacggggtattgaacctgtacttgcattatagaaattagaatccggaata
+ggcgcatgaagtctgccctctgtccgctcacgaagccccacgtcttacacgcatgtcgaa
+tatgagtcagatgtaccagaggcctgttcccaccacagtaactggccatcagtctccagt
+aggaggagcgagtgacagacgacgacaagaccggtttcctcccccttcctgtcccagccc
+atagacctgaagggacacaagtcaccgttggcacaactagatatcactacatactatcgc
+tccccacccccttcggaccacaacgagaaatggtcccccttctaagctactcctcgacgg
+ccccacgggtcaccggccttttgaccagcttccacaggagcacgtagcagtatattctcg
+tgtttaaaggtcgcggctgtggtggggctggttagtgcgtattgtcgatctacatcggga
+cttagaataactctacctcaatcgatccggatgtcgcaagatatctcaccgtctcgtctt
+cctttcgagggtgcatggcattggtaaccgctggtcttcagcacgtttgaaatttgtcga
+gtgcctaagagatagtgctactgaccggcgtaatcacggtctacaaacgtagatattgac
+catttcgtaggcgagtttgtgggctaactcccactcgcgttggtaatttggaggtcgccc
+gcaagaacccgaaccagcgctggagcgtggctaacactcgtcgaggttttaggcattgct
+cggtgtgtcccaaatttctattaccttatggcgcttctagggaggggaagtgctgcgagt
+aactgctaacatgtcggcctgaatagccctgcaaccggtccctgtgcgttcgggtgtcac
+atcgcccctaaatgaacttggaccgcctagtacaacacccggcgacaatacacgcatcat
+cggaaacaactgtatagcttcatacagttcagacttagaacattaattcttcctcgagag
+ggacccgtacgccctacctccgtcaagccccaagctgaaaggccacaataccgcgttcac
+agtctcagggagggaaaatcacatcaagtatcgcgagtggtgttaagtttaccgtcaggg
+tatactcctaactagacgtagctactcgactattgcacgcaccatgagacatgttagccg
+cgtgtttgcaatagattccccattcagagtatgatgaggatagttcgtgcactccatttg
+acctctacgcattgcaaccggtccacgtcattcgacggatgtccagataaggggtccaga
+tgcgtacacgactgtgttctctctacgcagtgtccccgctgaagttagtagggcctccgg
+acccacggatctactaccccgagtcgggctatcaagggaatgggacctagccggggcatg
+tacggtaccgatagatctgtgtatgttcggcttttgaaagtaggaccttgcggggttaga
+aggtagcgttatgggtccatgccgcgattcgctcgaaccagtaccagtgcttaattgtcc
+gcgtcgcaggcgcacgtgcaggattaaacgtctcttacaactataactgacgcctacgga
+tttgaagaccacaaccaaccgcgagaatgcctagtgttggcgaagacctatctccttgat
+gcgcgcagtgcatcttctaaccgtggcaagatcatgaacctgtgagaactttcttttaga
+tatcagacccccgtggcacccagaaactgccgctgttttgggcaagcgccgttgatacaa
+tacaggtgcatggacattcatccggacgagtagttctttaggcgggcccaccctaccacc
+gggcgcttacatccatccttcgctctcaatatcaattttatgtccagaggctccaaaatt
+tgcaaggtgactaagactggtagatctattacttcaccgttggggggacgtcctccacac
+caactaagtcattttgatatgaacattgaatcgatgctataaacggacatattagaaatt
+ttcagaggttttacgtgagtactccaaaggctccacactgaaagttattgctcagcgaca
+atgtctacgtcgacggactaaacgcagccgccttccataggacgattaccgaccgaacgt
+agcgagtacgggtctaaccttcggtcttgcaatcaagtagacgtcgtttctaagagtttg
+tagcagagaaacgactgagagtgtatacatattggcacatcctggagtaccaattcatgg
+aagcggaacgtccgaccgtaccgtccacccggatgaagttcaccatactatcggcgggct
+gtcccaccaaccctacgccttacgagaacgcacggcggtgtggacaccctctgattgtcc
+tgtagccgagcctaggaccgtctcgtgttggacagatcgcattgtatgcaaactcgtgat
+ccaatgaggtcccttggctatacagacaccgcgaggactctagggtcctggggttaaatg
+attcgtcttttcacttagatcgtgggccctccgtgtttatgttgggccaggctactagta
+tattcgatacatccgctggcctttgataacgtttgttaaatgttatctcggaggcttttt
+ttgaatcatacagctatgtcagacgagagttcctcagtgtcacccttcgttatcaagaaa
+tggtgaatttaatgttcgtagttttcagttattcagcagtctgtacccaaaacgtcaagt
+tcgttatcttcagcacaaacgggtcgggccctgaaaaacaaatcacgaaactgctacact
+ctttattttggcctagattattagctattgtttggcttttacttacggactcgtaggacg
+atgtacgtgcacttggtgcacatcatccttaattgggccggacggttaatcttatcaact
+ttgcgttcggctaaagattgcgacccttgtatctgttttgtgtcatctcgcttggtgcta
+gggtgatgtctctactctggagtgtatacgacgcaagcccttgcaggctaactacggact
+gctgcacactctaattcgaccgaactcgagtacgactgcgattataaatgcaacaagtcc
+ggggaagcctccgacaccaagacgcgacgctcgcaccatctttcccgtcgctatcccagc
+tcggtaagtctcgctcccgcccggcttaccaaaacaacacactcctcggcaatcgaatga
+gcgtcttgtgccgataatactttccaaacccgagttttttcctaattgtttggggaacgc
+cagtgaggtcagaacgcctacatcagaaggcggacgagagcctcacgattactcgacctg
+cttttgtggcacattgagtgccccctggagtcagaacactccgtggctggtacaaaaatg
+gtgttagttcttcgacaattagatcatatcaggcggttttagccactattaccgaccatc
+cacgcacacctagcaactcaactttttatcacgctggcaagtaagtggaaggccctgctg
+ccgttaaacggcaggctcatcgcggtatccacatcaattgcggtgaccggggtctcgctt
+tagaaaagccgatatggtccaggacgtcgtggtgcgcaggaacccttagaatttagtatg
+cggcgcgcgacgcgctgagagtcatcggcgccttagacggagttactccgcgaagcgctc
+aaagcctaccaagttctatactggtaagggaggataaggccacgtttcaaatatatacgt
+attgcattcgcgtatccatcccgagaaagaagtcgcggccgcaccgatggctgggcagct
+atagggattcaggccaaacgattgcgggacgatcctaaattgttgcccctgttaagagtt
+tcataaccattagattccagtctactacttctgaggacttctctgatcatattaatgttc
+atgggacactgctttcaaccgtatcaatgtgcgaaacgtaccttatgatccctttcgctg
+acacacacgatcctgtggttcagggtgaaaacactcaaatccaataacgcggggtaacag
+gccggttctcaagttggagaagcatacagcgagtcccgtatagtccgagcagaggcttca
+tcacgagtacaagcttttcttcagtgccgaaccataggaaggtctacataaaccggccgc
+tctaatcttggcgccgcgagagcaaagagagggctcacccacatcaaattcgactgcggg
+tcggcctccggtccccgtagtagagtacacgcgatacgtctttactctacctagaagact
+tgtcttcaagcacaccagcttactccggatcagcgttacataaagcccataaattagcat
+tgggcgagccgaccaggggtgaatattacacgaccggcgtcggtgcacgacccaggcatc
+agtcatccttggaggtttcagccacgcgggtgtgcatgaggcacccgctgcagtcacctc
+tgacggtctg
+>CEESC13F
+ttgcgttcggctaaagattgcgacccttgtatctgttttgtgtcatctcgcttggtgcta
+gggtgatgtctctactctggagtgtatacgacgcaagcccttgcaggctaactacggact
+gctgcacactctaattcgaccgaactcgagtacgactgcgattataaatgcaacaagtcc
+ggggaagcctccgacaccaagacgcgacgctcgcaccatctttcccgtcgctatcccagc
+tcggtaagtctcgctcccgcccggcttaccaaaacaacacactcctcggcaatcgaatga
+gcgtcttgtgccgataatactttccaaacccgagttttttcctaattgtttggggaacgc
+cagtgaggtcagaacgcctacatcagaaggcggacgagagcctcacgattactcgacctg
+cttttgtggcacattgagtgccccctggagtcagaacactccgtggctggtacaaaaatg
+gtgttagttcttcgacaattagatcatatcaggcggttttagccactattaccgaccatc
+cacgcacacctagcaactcaactttttatcacgctggcaagtaagtggaaggccctgctg
+ccgttaaacggcaggctcatcgcggtatccacatcaattgcggtgaccggggtctcgctt
+tagaaaagccgatatggtccaggacgtcgtggtgcgcaggaacccttagaatttagtatg
+cggcgcgcgacgcgctgagagtcatcggcgccttagacggagttactccgcgaagcgctc

Added: trunk/packages/bioperl/branches/upstream/current/t/data/seqfile.pir
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/seqfile.pir	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/seqfile.pir	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,18 @@
+>P1;CCHU
+cytochrome c [validated] - human
+MGDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE*
+>P1;CCCZ
+cytochrome c - chimpanzee (tentative sequence)
+GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKKATNE*
+>P1;CCST
+cytochrome c - snapping turtle (tentative sequence)
+GDVEK.GKKIF.VQKCAQCHTVEKGGKH.KTGPNLNGL.IGRKTGQAEGF.SYTEANKN.KGITWG.EETLM.EY.LENPKKY.IPGTKM.IF.AGIKKKAERADL.IAY.LKDATSK*
+>P1;CCFG
+cytochrome c - bullfrog (tentative sequence)
+GDVEKGKKIF(V,Q.K.C.A.Q.C.H.T.C,E.K.G.G.K.H)KVGPNLYGLIGRKTGQAAGFSYTDANKNKGITW(G.E,D,T.L.M.E.Y)LENPKKYIPGTKMIFAGI(K.K.K.G.E.R.Q)DLIAY(L.K.S,A,C,S,K)*
+>P1;CCND
+cytochrome c - love-in-a-mist (tentative sequence)
+AS.F.BZAPAGBSAS(G.E.K)I.F.KTKCAZCHTVBZGAGH.KZGP(N.L)H.G.L.F.GRQSGT.VAG.Y.SY.SAANKN.KAVN.W.EEKT.L.Y.DYLLNPKK.Y.IP(G.T.K.M)VFPGL.KKPZZRABL.LA.Y.LKESTA*
+>F1;C44264
+ALL-1/AF-4 clone 25 mutant fusion protein - human (fragment)
+/EKPPPVNKQENAGTLNIFSTLSNGNSSKQKIPADGVHRIRVDFKTYSNEVHCVEEILKEMTHSWPPPLTAIHTPSTAEPSKFPFPTKDSQHVSSVTQNQKQYDTSSKTHSNSQQGTSSMLEDDLQLSDSEDSDS/*

Added: trunk/packages/bioperl/branches/upstream/current/t/data/seqs.fas
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/seqs.fas	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/seqs.fas	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,14 @@
+>gi|239758|bbs|68379 glucocorticoid receptor,  GR [human, Peptide Partial, 394 aa]
+MDSKESLTPGREENPSSVLAQERGDVMDFYKTLRGGATVKVSASSPSLAVASQSDSKQRRLLVDFPKGSV
+>gi|239752|bbs|68871 PML-3=putative zinc finger protein [human, Peptide, 802 aa]
+MPPPETPSEGRQPSPSPSPTERAPASEEEFQFLRCQQCQAEAKCPKLLPCLHTLCSGCLEASGMQCPICQ
+>gi|238775|bbs|65126 putative tyrosine kinase receptor=UFO [human, NIH3T3, Peptide, 894 aa]
+MAWRCPRMGRVPLAWCLALCGWACMAPRGTQAEESPFVGNPGNITGARGLTGTLRCQLQVQGEPPEVHWL
+>gi|239006|bbs|65162 alpha(1,3)-fucosyltransferase, ELFT [human, Peptide, 400 aa]
+MGAPWGSPTAAAGGRRGWRRGRGLPWTVCVLAAAGLTCTALITYACWGQLPPLPWASPTPSRPVGVLLWW
+>gi|237597|bbs|60089 putative adhesion molecule=ADMLX [human, Peptide, 679 aa]
+MVPGVPGAVLTLCLWLAASSGSWRPAPARLCAAAGRVAVCRERPARSCASRCLSLQITRISAFFQHFQNN
+>gi|237995|bbs|62046 NK-1 receptor [human, lung, Peptide, 407 aa]
+MDNVLPVDSDLSPNISTNTSEPNQFVQPAWEIVLWAAAYTVIVVTSVVGNVVVMWIILAHKRMRTVTNYF
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sequencefamily.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sequencefamily.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sequencefamily.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,900 @@
+ID   MA32_HUMAN     STANDARD;      PRT;   282 AA.
+AC   Q07021;
+DT   01-FEB-1995 (Rel. 31, Created)
+DT   01-FEB-1995 (Rel. 31, Last sequence update)
+DT   01-OCT-2000 (Rel. 40, Last annotation update)
+DE   COMPLEMENT COMPONENT 1, Q SUBCOMPONENT BINDING PROTEIN, MITOCHONDRIAL
+DE   PRECURSOR (GLYCOPROTEIN GC1QBP) (GC1Q-R PROTEIN) (HYALURONAN-BINDING
+DE   PROTEIN 1) (PRE-MRNA SPLICING FACTOR SF2, P32 SUBUNIT) (P33).
+GN   GC1QBP OR HABP1 OR SF2P32 OR C1QBP.
+OS   Homo sapiens (Human).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606;
+RN   [1]
+RP   SEQUENCE FROM N.A., AND SEQUENCE OF 74; 76-93 AND 208-216.
+RC   TISSUE=FIBROBLAST;
+RX   MEDLINE=94085792; PubMed=8262387;
+RA   Honore B., Madsen P., Rasmussen H.H., Vandekerckhove J., Celis J.E.,
+RA   Leffers H.;
+RT   "Cloning and expression of a cDNA covering the complete coding region
+RT   of the P32 subunit of human pre-mRNA splicing factor SF2.";
+RL   Gene 134:283-287(1993).
+RN   [2]
+RP   SEQUENCE OF 5-282 FROM N.A., AND SEQUENCE OF 74-114.
+RX   MEDLINE=91309150; PubMed=1830244;
+RA   Krainer A.R., Mayeda A., Kozak D., Binns G.;
+RT   "Functional expression of cloned human splicing factor SF2: homology
+RT   to RNA-binding proteins, U1 70K, and Drosophila splicing regulators.";
+RL   Cell 66:383-394(1991).
+RN   [3]
+RP   SEQUENCE FROM N.A., AND PARTIAL SEQUENCE.
+RX   MEDLINE=94253723; PubMed=8195709;
+RA   Ghebrehiwet B., Lim B.L., Peerschke E.I., Willis A.C., Reid K.B.;
+RT   "Isolation, cDNA cloning, and overexpression of a 33-kD cell surface
+RT   glycoprotein that binds to the globular 'heads' of C1q.";
+RL   J. Exp. Med. 179:1809-1821(1994).
+RN   [4]
+RP   X-RAY CRYSTALLOGRAPHY (2.25 ANGSTROMS).
+RX   MEDLINE=99199225; PubMed=10097078;
+RA   Jiang J., Zhang Y., Krainer A.R., Xu R.-M.;
+RT   "Crystal structure of human p32, a doughnut-shaped acidic
+RT   mitochondrial matrix protein.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 96:3572-3577(1999).
+CC   -!- FUNCTION: NOT KNOWN. BINDS TO THE GLOBULAR "HEADS" OF C1Q THUS
+CC       INHIBITING C1 ACTIVATION.
+CC   -!- SUBCELLULAR LOCATION: MITOCHONDRIAL MATRIX.
+CC   -!- SIMILARITY: BELONGS TO THE MAM33 FAMILY.
+CC   -!- CAUTION: WAS ORIGINALLY (REF.1 AND REF.2) THOUGHT TO BE A PRE-MRNA
+CC       SPLICING FACTOR THAT PLAYS A ROLE IN PREVENTING EXON SKIPPING,
+CC       ENSURING THE ACCURACY OF SPLICING AND REGULATING ALTERNATIVE
+CC       SPLICING.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L04636; AAA16315.1; -.
+DR   EMBL; M69039; AAA73055.1; -.
+DR   EMBL; X75913; CAA53512.1; -.
+DR   PIR; JT0762; JT0762.
+DR   PIR; S44104; S44104.
+DR   PDB; 1P32; 06-APR-99.
+DR   MIM; 601269; -.
+KW   Mitochondrion; Transit peptide; 3D-structure.
+FT   TRANSIT       1     73       MITOCHONDRION.
+FT   CHAIN        74    282       COMPLEMENT COMPONENT 1, Q SUBCOMPONENT
+FT                                BINDING PROTEIN.
+SQ   SEQUENCE   282 AA;  31362 MW;  2F747FA73BB1314B CRC64;
+     MLPLLRCVPR VLGSSVAGLR AAAPASPFRQ LLQPAPRLCT RPFGLLSVRA GSERRPGLLR
+     PRGPCACGCG CGSLHTDGDK AFVDFLSDEI KEERKIQKHK TLPKMSGGWE LELNGTEAKL
+     VRKVAGEKIT VTFNINNSIP PTFDGEEEPS QGQKVEEQEP ELTSTPNFVV EVIKNDDGKK
+     ALVLDCHYPE DEVGQEDEAE SDIFSIREVS FQSTGESEWK DTNYTLNTDS LDWALYDHLM
+     DFLADRGVDN TFADELVELS TALEHQEYIT FLEDLKSFVK SQ
+//
+ID   ACON_CAEEL     STANDARD;      PRT;   788 AA.
+AC   P34455;
+DT   01-FEB-1994 (Rel. 28, Created)
+DT   01-FEB-1994 (Rel. 28, Last sequence update)
+DT   15-JUL-1999 (Rel. 38, Last annotation update)
+DE   Probable aconitate hydratase, mitochondrial precursor (EC 4.2.1.3)
+DE   (Citrate hydro-lyase) (Aconitase).
+GN   F54H12.1.
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   STRAIN=BRISTOL N2;
+RX   MEDLINE=94150718; PubMed=7906398;
+RA   Wilson R., Ainscough R., Anderson K., Baynes C., Berks M.,
+RA   Bonfield J., Burton J., Connell M., Copsey T., Cooper J., Coulson A.,
+RA   Craxton M., Dear S., Du Z., Durbin R., Favello A., Fraser A.,
+RA   Fulton L., Gardner A., Green P., Hawkins T., Hillier L., Jier M.,
+RA   Johnston L., Jones M., Kershaw J., Kirsten J., Laisster N.,
+RA   Latreille P., Lightning J., Lloyd C., Mortimore B., O'Callaghan M.,
+RA   Parsons J., Percy C., Rifken L., Roopra A., Saunders D., Shownkeen R.,
+RA   Sims M., Smaldon N., Smith A., Smith M., Sonnhammer E., Staden R.,
+RA   Sulston J., Thierry-Mieg J., Thomas K., Vaudin M., Vaughan K.,
+RA   Waterson R., Watson A., Weinstock L., Wilkinson-Sproat J.,
+RA   Wohldman P.;
+RT   "2.2 Mb of contiguous nucleotide sequence from chromosome III of C.
+RT   elegans.";
+RL   Nature 368:32-38(1994).
+CC   -!- CATALYTIC ACTIVITY: Citrate = cis-aconitate + H(2)O.
+CC   -!- COFACTOR: ACONITASE HAS AN ACTIVE (4FE-4S) AND AN INACTIVE (3FE-
+CC       4S) FORMS. THE ACTIVE (4FE-4S) CLUSTER IS PART OF THE CATALYTIC
+CC       SITE THAT INTERCONVERTS CITRATE, CIS-ACONITASE, AND ISOCITRATE (BY
+CC       SIMILARITY).
+CC   -!- PATHWAY: TRICARBOXYLIC ACID CYCLE.
+CC   -!- SUBUNIT: MONOMER (BY SIMILARITY).
+CC   -!- SUBCELLULAR LOCATION: Mitochondrial (By similarity).
+CC   -!- SIMILARITY: BELONGS TO THE ACONITASE/IPM ISOMERASE FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L25599; AAA28050.1; -.
+DR   PIR; S44831; S44831.
+DR   HSSP; P20004; 1AMJ.
+DR   WormPep; F54H12.1; CE00516.
+DR   InterPro; IPR001030; Aconitase.
+DR   InterPro; IPR000573; Aconitase_C.
+DR   Pfam; PF00330; aconitase; 1.
+DR   Pfam; PF00694; Aconitase_C; 1.
+DR   PRINTS; PR00415; ACONITASE.
+DR   ProDom; PD000511; Aconitase; 1.
+DR   PROSITE; PS00450; ACONITASE_1; 1.
+DR   PROSITE; PS01244; ACONITASE_2; 1.
+KW   Hypothetical protein; Lyase; Tricarboxylic acid cycle; Iron-sulfur;
+KW   Mitochondrion; Transit peptide; 4Fe-4S.
+FT   TRANSIT       1      ?       MITOCHONDRION (POTENTIAL).
+FT   CHAIN         ?    788       PROBABLE ACONITATE HYDRATASE.
+FT   METAL       393    393       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+FT   METAL       456    456       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+FT   METAL       459    459       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+SQ   SEQUENCE   788 AA;  85712 MW;  8861E6FC198B70D9 CRC64;
+     MRYHFLFGSL RNHLFSFRGV IYCREKLFNC SKLSFRPSKV AISKFEPKSY LPYEKLSQTV
+     KIVKDRLKRP LTLSEKILYG HLDQPKTQDI ERGVSYLRLR PDRVAMQDAT AQMAMLQFIS
+     SGLPKTAVPS TIHCDHLIEA QKGGAQDLAR AKDLNKEVFN FLATAGSKYG VGFWKPGSGI
+     IHQIILENYA FPGLLLIGTD SHTPNGGGLG GLCIGVGGAD AVDVMADIPW ELKCPKVIGI
+     KLTGKLNGWT SAKDVILKVA DILTVKGGTG AIVEYFGPGV DSISATGMGT ICNMGAEIGA
+     TTSVFPYNES MYKYLEATGR KEIAEEARKY KDLLTADDGA NYDQIIEINL DTLTPHVNGP
+     FTPDLASSID KLGENAKKNG WPLDVKVSLI GSCTNSSYED MTRAASIAKQ ALDKGLKAKT
+     IFTITPGSEQ VRATIERDGL SKIFADFGGM VLANACGPCI GQWDRQDVKK GEKNTIVTSY
+     NRNFTGRNDA NPATHGFVTS PDITTAMAIS GRLDFNPLTD ELTAADGSKF KLQAPTGLDL
+     PPKGYDPGED TFQAPSGSGQ VDVSPSSDRL QLLSPFDKWD GKDLEDMKIL IKVTGKCTTD
+     HISAAGPWLK YRGHLDNISN NLFLTAINAD NGEMNKVKNQ VTGEYGAVPA TARKYKADGV
+     RWVAIGDENY GEGSSREHAA LEPRHLGGRA IIVKSFARIH ETNLKKQGML PLTFANPADY
+     DKIDPSDNVS IVGLSSFAPG KPLTAIFKKT NGSKVEVTLN HTFNEQQIEW FKAGSALNRM
+     KEVFAKSK
+//
+ID   143E_HUMAN     STANDARD;      PRT;   255 AA.
+AC   P42655; P29360; Q63631;
+DT   01-NOV-1995 (Rel. 32, Created)
+DT   01-NOV-1995 (Rel. 32, Last sequence update)
+DT   15-JUL-1999 (Rel. 38, Last annotation update)
+DE   14-3-3 protein epsilon (Mitochondrial import stimulation factor L
+DE   subunit) (Protein kinase C inhibitor protein-1) (KCIP-1) (14-3-3E).
+GN   YWHAE.
+OS   Homo sapiens (Human),
+OS   Mus musculus (Mouse),
+OS   Rattus norvegicus (Rat),
+OS   Bos taurus (Bovine), and
+OS   Ovis aries (Sheep).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606, 10090, 10116, 9913, 9940;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=95372385; PubMed=7644510;
+RA   Conklin D.S., Galaktionov K., Beach D.;
+RT   "14-3-3 proteins associate with cdc25 phosphatases.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 92:7892-7896(1995).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Heart;
+RA   Luk S.C.W., Lee C.Y., Waye M.M.Y.;
+RL   Submitted (JUN-1995) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=96300316; PubMed=8684458;
+RA   Jin D.-Y., Lyu M.S., Kozak C.A., Jeang K.-T.;
+RT   "Function of 14-3-3 proteins.";
+RL   Nature 382:308-308(1996).
+RN   [4]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Liver;
+RX   MEDLINE=97011338; PubMed=8858348;
+RA   Chong S.S., Tanigami A., Roschke A.V., Ledbetter D.H.;
+RT   "14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on
+RT   chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome
+RT   region.";
+RL   Genome Res. 6:735-741(1996).
+RN   [5]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RA   Tanigami A., Chong S.S., Ledbetter D.H.;
+RT   "14-3-3 epsilon genomic sequence.";
+RL   Submitted (AUG-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [6]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Placenta;
+RA   Strausberg R.;
+RL   Submitted (DEC-2000) to the EMBL/GenBank/DDBJ databases.
+RN   [7]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat, and Sheep; TISSUE=Pineal gland;
+RX   MEDLINE=94296566; PubMed=8024705;
+RA   Roseboom P.H., Weller J.L., Babila T., Aitken A., Sellers L.A.,
+RA   Moffet J.R., Namboodiri M.A., Klein D.C.;
+RT   "Cloning and characterization of the epsilon and zeta isoforms of the
+RT   14-3-3 proteins.";
+RL   DNA Cell Biol. 13:629-640(1994).
+RN   [8]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Liver;
+RX   MEDLINE=95122474; PubMed=7822263;
+RA   Alam R., Hachiya N., Sakaguchi M., Shun-Ichiro K., Iwanaga S.,
+RA   Kitajima M., Mihara K., Omura T.;
+RT   "cDNA cloning and characterization of mitochondrial import
+RT   stimulation factor (MSF) purified from rat liver cytosol.";
+RL   J. Biochem. 116:416-425(1994).
+RN   [9]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=96280718; PubMed=8694795;
+RA   Gao L., Gu X.B., Yu D.S., Yu R.K., Zeng G.;
+RT   "Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3-
+RT   sialyltransferase.";
+RL   Biochem. Biophys. Res. Commun. 224:103-107(1996).
+RN   [10]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=SWISS; TISSUE=Kidney;
+RX   MEDLINE=95269876; PubMed=7750640;
+RA   McConnell J.E., Armstrong J.F., Bard J.B.;
+RT   "The mouse 14-3-3 epsilon isoform, a kinase regulator whose
+RT   expression pattern is modulated in mesenchyme and neuronal
+RT   differentiation.";
+RL   Dev. Biol. 169:218-228(1995).
+RN   [11]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=129/SV;
+RA   Takihara Y., Irie K., Nomura M., Motaleb M., Matsumoto K.,
+RA   Shimada K.;
+RL   Submitted (SEP-1996) to the EMBL/GenBank/DDBJ databases.
+RN   [12]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Bovine;
+RA   Jones J.M., Niikura T., Pinke R.M., Guo W., Molday L., Leykam J.,
+RA   McConnell D.G.;
+RT   "Expression of 14-3-3 proteins in bovine retinal photoreceptors.";
+RL   Submitted (JAN-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [13]
+RP   SEQUENCE OF 1-152; 165-184 AND 216-255.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=92283271; PubMed=1317796;
+RA   Toker A., Sellers L.A., Amess B., Patel Y., Harris A., Aitken A.;
+RT   "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3)
+RT   from sheep brain. Amino acid sequence of phosphorylated forms.";
+RL   Eur. J. Biochem. 206:453-461(1992).
+RN   [14]
+RP   SEQUENCE OF 1-23 AND 125-140.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=90345949; PubMed=2143472;
+RA   Toker A., Ellis C.A., Sellers L.A., Aitken A.;
+RT   "Protein kinase C inhibitor proteins. Purification from sheep brain
+RT   and sequence similarity to lipocortins and 14-3-3 protein.";
+RL   Eur. J. Biochem. 191:421-429(1990).
+CC   -!- FUNCTION: ACTIVATES TYROSINE AND TRYPTOPHAN HYDROXYLASES IN THE
+CC       PRESENCE OF CA(2+)/CALMODULIN-DEPENDENT PROTEIN KINASE II, AND
+CC       STRONGLY ACTIVATES PROTEIN KINASE C. IS PROBABLY A MULTIFUNCTIONAL
+CC       REGULATOR OF THE CELL SIGNALING PROCESSES MEDIATED BY BOTH
+CC       KINASES.
+CC   -!- SUBUNIT: HOMODIMER.
+CC   -!- SUBCELLULAR LOCATION: CYTOPLASMIC.
+CC   -!- TISSUE SPECIFICITY: 14-3-3 PROTEINS ARE LOCALIZED IN NEURONS, AND
+CC       ARE AXONALLY TRANSPORTED TO THE NERVE TERMINALS. THEY MAY BE ALSO
+CC       PRESENT, AT LOWER LEVELS, IN VARIOUS OTHER EUKARYOTIC TISSUES.
+CC   -!- SIMILARITY: BELONGS TO THE 14-3-3 FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; U28936; AAA75301.1; -.
+DR   EMBL; U20972; AAC50175.1; -.
+DR   EMBL; U43399; AAC50625.1; -.
+DR   EMBL; U43430; AAD00026.1; -.
+DR   EMBL; U54778; AAC50710.1; -.
+DR   EMBL; AB017103; BAA32538.1; -.
+DR   EMBL; AB017098; BAA32538.1; JOINED.
+DR   EMBL; AB017099; BAA32538.1; JOINED.
+DR   EMBL; AB017100; BAA32538.1; JOINED.
+DR   EMBL; AB017101; BAA32538.1; JOINED.
+DR   EMBL; AB017102; BAA32538.1; JOINED.
+DR   EMBL; BC000179; AAH00179.1; -.
+DR   EMBL; BC001440; AAH01440.1; -.
+DR   EMBL; M84416; AAC37659.1; -.
+DR   EMBL; D30739; BAA06401.1; -.
+DR   EMBL; Z19599; CAA79659.1; -.
+DR   EMBL; U53882; AAC52676.1; -.
+DR   EMBL; L07914; AAC37321.1; -.
+DR   EMBL; D87663; BAA13424.1; -.
+DR   EMBL; AF043735; AAC61927.1; -.
+DR   PIR; S10806; S10806.
+DR   PIR; S10807; S10807.
+DR   HSSP; P29312; 1A38.
+DR   MIM; 605066; -.
+DR   MGD; MGI:894689; Ywhae.
+DR   InterPro; IPR000308; 14-3-3.
+DR   Pfam; PF00244; 14-3-3; 1.
+DR   PRINTS; PR00305; 1433ZETA.
+DR   ProDom; PD000600; 14-3-3; 1.
+DR   SMART; SM00101; 14_3_3; 1.
+DR   PROSITE; PS00796; 1433_1; 1.
+DR   PROSITE; PS00797; 1433_2; 1.
+KW   Brain; Neurone; Acetylation; Multigene family.
+FT   MOD_RES       1      1       ACETYLATION.
+FT   CONFLICT     73     73       K -> T (IN REF. 9).
+FT   CONFLICT    120    120       F -> S (IN REF. 9).
+FT   CONFLICT    123    123       K -> Y (IN REF. 9).
+FT   CONFLICT    129    129       H -> Y (IN REF. 14).
+SQ   SEQUENCE   255 AA;  29174 MW;  07817CCBD1F75B26 CRC64;
+     MDDREDLVYQ AKLAEQAERY DEMVESMKKV AGMDVELTVE ERNLLSVAYK NVIGARRASW
+     RIISSIEQKE ENKGGEDKLK MIREYRQMVE TELKLICCDI LDVLDKHLIP AANTGESKVF
+     YYKMKGDYHR YLAEFATGND RKEAAENSLV AYKAASDIAM TELPPTHPIR LGLALNFSVF
+     YYEILNSPDR ACRLAKAAFD DAIAELDTLS EESYKDSTLI MQLLRDNLTL WTSDMQGDGE
+     EQNKEALQDV EDENQ
+//
+ID   143B_BOVIN     STANDARD;      PRT;   245 AA.
+AC   P29358;
+DT   01-DEC-1992 (Rel. 24, Created)
+DT   01-FEB-1996 (Rel. 33, Last sequence update)
+DT   16-OCT-2001 (Rel. 40, Last annotation update)
+DE   14-3-3 protein beta/alpha (Protein kinase C inhibitor protein-1)
+DE   (KCIP-1).
+GN   YWHAB.
+OS   Bos taurus (Bovine), and
+OS   Ovis aries (Sheep).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Cetartiodactyla; Ruminantia; Pecora; Bovoidea;
+OC   Bovidae; Bovinae; Bos.
+OX   NCBI_TaxID=9913, 9940;
+RN   [1]
+RP   SEQUENCE.
+RC   SPECIES=Bovine;
+RX   MEDLINE=91108808; PubMed=1671102;
+RA   Isobe T., Ichimura T., Sunaya T., Okuyama T., Takahashi N., Kuwano R.,
+RA   Takahashi Y.;
+RT   "Distinct forms of the protein kinase-dependent activator of tyrosine
+RT   and tryptophan hydroxylases.";
+RL   J. Mol. Biol. 217:125-132(1991).
+RN   [2]
+RP   SEQUENCE OF 2-145 FROM N.A.
+RC   SPECIES=Bovine; TISSUE=Retina;
+RA   Jones J.M., Niikura T., Pinke R.M., Guo W., Molday L., Leykam J.,
+RA   McConnell D.G.;
+RT   "Expression of 14-3-3 proteins in bovine retinal photoreceptors.";
+RL   Submitted (JAN-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SEQUENCE OF 2-83; 121-186 AND 199-241.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=92283271; PubMed=1317796;
+RA   Toker A., Sellers L.A., Amess B., Patel Y., Harris A., Aitken A.;
+RT   "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3)
+RT   from sheep brain. Amino acid sequence of phosphorylated forms.";
+RL   Eur. J. Biochem. 206:453-461(1992).
+RN   [4]
+RP   SEQUENCE OF 2-23.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=90345949; PubMed=2143472;
+RA   Toker A., Ellis C.A., Sellers L.A., Aitken A.;
+RT   "Protein kinase C inhibitor proteins. Purification from sheep brain
+RT   and sequence similarity to lipocortins and 14-3-3 protein.";
+RL   Eur. J. Biochem. 191:421-429(1990).
+RN   [5]
+RP   PHOSPHORYLATION.
+RC   SPECIES=Sheep;
+RX   MEDLINE=95197587; PubMed=7890696;
+RA   Aitken A., Howell S., Jones D., Madrazo J., Patel Y.;
+RT   "14-3-3 alpha and delta are the phosphorylated forms of
+RT   raf-activating 14-3-3 beta and zeta. In vivo stoichiometric
+RT   phosphorylation in brain at a Ser-Pro-Glu-Lys motif.";
+RL   J. Biol. Chem. 270:5706-5709(1995).
+RN   [6]
+RP   POST-TRANSLATIONAL MODIFICATIONS.
+RC   SPECIES=Sheep;
+RA   Aitken A., Patel Y., Martin H., Jones D., Robinson K., Madrazo J.,
+RA   Howell S.;
+RT   "Electrospray mass spectroscopy analysis with online trapping of
+RT   posttranslationally modified mammalian and avian brain 14-3-3
+RT   isoforms.";
+RL   J. Protein Chem. 13:463-465(1994).
+CC   -!- FUNCTION: ACTIVATES TYROSINE AND TRYPTOPHAN HYDROXYLASES IN THE
+CC       PRESENCE OF CA(2+)/CALMODULIN-DEPENDENT PROTEIN KINASE II, AND
+CC       STRONGLY ACTIVATES PROTEIN KINASE C. IS PROBABLY A MULTIFUNCTIONAL
+CC       REGULATOR OF THE CELL SIGNALING PROCESSES MEDIATED BY BOTH
+CC       KINASES.
+CC   -!- SUBUNIT: HOMODIMER.
+CC   -!- SUBCELLULAR LOCATION: CYTOPLASMIC.
+CC   -!- ALTERNATIVE PRODUCTS: TWO FORMS ARE PRODUCED BY ALTERNATIVE
+CC       INITIATION.
+CC   -!- TISSUE SPECIFICITY: 14-3-3 PROTEINS ARE LOCALIZED IN NEURONS, AND
+CC       ARE AXONALLY TRANSPORTED TO THE NERVE TERMINALS. THEY MAY BE ALSO
+CC       PRESENT, AT LOWER LEVELS, IN VARIOUS OTHER EUKARYOTIC TISSUES.
+CC   -!- PTM: ISOFORM ALPHA DIFFERS FROM ISOFORM BETA IN BEING
+CC       PHOSPHORYLATED.
+CC   -!- SIMILARITY: BELONGS TO THE 14-3-3 FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; AF043736; AAC02090.1; -.
+DR   PIR; S13467; S13467.
+DR   PIR; S10804; S10804.
+DR   PIR; S23179; S23179.
+DR   HSSP; P29312; 1A38.
+DR   InterPro; IPR000308; 14-3-3.
+DR   Pfam; PF00244; 14-3-3; 1.
+DR   PRINTS; PR00305; 1433ZETA.
+DR   ProDom; PD000600; 14-3-3; 1.
+DR   SMART; SM00101; 14_3_3; 1.
+DR   PROSITE; PS00796; 1433_1; 1.
+DR   PROSITE; PS00797; 1433_2; 1.
+KW   Brain; Neurone; Phosphorylation; Acetylation; Multigene family;
+KW   Alternative initiation.
+FT   INIT_MET      0      0
+FT   CHAIN         1    245       14-3-3 PROTEIN BETA/ALPHA, LONG ISOFORM.
+FT   CHAIN         2    245       14-3-3 PROTEIN BETA/ALPHA, SHORT ISOFORM.
+FT   INIT_MET      2      2       FOR SHORT ISOFORM.
+FT   MOD_RES       1      1       ACETYLATION.
+FT   MOD_RES       2      2       ACETYLATION (IN SHORT ISOFORM).
+FT   MOD_RES     185    185       PHOSPHORYLATION.
+SQ   SEQUENCE   245 AA;  27950 MW;  AA91C2314D99549F CRC64;
+     TMDKSELVQK AKLAEQAERY DDMAAAMKAV TEQGHELSNE ERNLLSVAYK NVVGARRSSW
+     RVISSIEQKT ERNEKKQQMG KEYREKIEAE LQDICNDVLQ LLDKYLIPNA TQPESKVFYL
+     KMKGDYFRYL SEVASGDNKQ TTVSNSQQAY QEAFEISKKE MQPTHPIRLG LALNFSVFYY
+     EILNSPEKAC SLAKTAFDEA IAELDTLNEE SYKDSTLIMQ LLRDNLTLWT SENQGDEGDA
+     GEGEN
+//
+ID   CALM_HUMAN     STANDARD;      PRT;   148 AA.
+AC   P02593; P99014; P70667; Q61379; Q61380;
+DT   21-JUL-1986 (Rel. 01, Created)
+DT   21-JUL-1986 (Rel. 01, Last sequence update)
+DT   16-OCT-2001 (Rel. 40, Last annotation update)
+DE   Calmodulin.
+GN   (CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND
+GN   (CALM3 OR CAM3 OR CAMC).
+OS   Homo sapiens (Human),
+OS   Mus musculus (Mouse),
+OS   Rattus norvegicus (Rat),
+OS   Oryctolagus cuniculus (Rabbit),
+OS   Bos taurus (Bovine),
+OS   Gallus gallus (Chicken),
+OS   Anas platyrhynchos (Domestic duck),
+OS   Xenopus laevis (African clawed frog),
+OS   Arbacia punctulata (Punctuate sea urchin),
+OS   Oncorhynchus sp. (Salmon), and
+OS   Oryzias latipes (Medaka fish).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606, 10090, 10116, 9986, 9913, 9031, 8839, 8355, 7641,
+OX   8025, 8090;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=89034207; PubMed=3182832;
+RA   Fischer R., Koller M., Flura M., Mathews S., Strehler-Page M.A.,
+RA   Krebs J., Penniston J.T., Carafoli E., Strehler E.E.;
+RT   "Multiple divergent mRNAs code for a single human calmodulin.";
+RL   J. Biol. Chem. 263:17055-17062(1988).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=88059053; PubMed=2445749;
+RA   Sengupta B., Friedberg F., Detera-Wadleigh S.D.;
+RT   "Molecular analysis of human and rat calmodulin complementary DNA
+RT   clones. Evidence for additional active genes in these species.";
+RL   J. Biol. Chem. 262:16663-16670(1987).
+RN   [3]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=85022688; PubMed=6385987;
+RA   Wawrzynczak E.J., Perham R.N.;
+RT   "Isolation and nucleotide sequence of a cDNA encoding human
+RT   calmodulin.";
+RL   Biochem. Int. 9:177-185(1984).
+RN   [4]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Blood;
+RX   MEDLINE=95010144; PubMed=7925473;
+RA   Rhyner J.A., Ottiger M., Wicki R., Greenwood T.M., Strehler E.E.;
+RT   "Structure of the human CALM1 calmodulin gene and identification of
+RT   two CALM1-related pseudogenes CALM1P1 and CALM1P2.";
+RL   Eur. J. Biochem. 225:71-82(1994).
+RN   [5]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Lymphoma;
+RA   Kato S.;
+RL   Submitted (FEB-1995) to the EMBL/GenBank/DDBJ databases.
+RN   [6]
+RP   SEQUENCE.
+RC   SPECIES=Human; TISSUE=Brain;
+RX   MEDLINE=82231946; PubMed=7093203;
+RA   Sasagawa T., Ericsson L.H., Walsh K.A., Schreiber W.E., Fischer E.H.,
+RA   Titani K.;
+RT   "Complete amino acid sequence of human brain calmodulin.";
+RL   Biochemistry 21:2565-2569(1982).
+RN   [7]
+RP   SEQUENCE.
+RC   SPECIES=Rabbit; TISSUE=Skeletal muscle;
+RX   MEDLINE=81138220; PubMed=7202416;
+RA   Grand R.J.A., Shenolikar S., Cohen P.;
+RT   "The amino acid sequence of the delta subunit (calmodulin) of rabbit
+RT   skeletal muscle phosphorylase kinase.";
+RL   Eur. J. Biochem. 113:359-367(1981).
+RN   [8]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Brain;
+RA   Kasai H., Kato Y., Isobe T., Kawasaki H., Okuyama T.;
+RT   "Determination of the complete amino acid sequence of calmodulin
+RT   (phenylalanine-rich acidic protein II) from bovine brain.";
+RL   Biomed. Res. 1:248-264(1980).
+RN   [9]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Brain;
+RX   MEDLINE=80094551; PubMed=7356670;
+RA   Watterson D.M., Sharief F., Vanaman T.C.;
+RT   "The complete amino acid sequence of the Ca2+-dependent modulator
+RT   protein (calmodulin) of bovine brain.";
+RL   J. Biol. Chem. 255:962-975(1980).
+RN   [10]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Uterus;
+RA   Grand R.J.A., Perry S.V.;
+RT   "The amino acid sequence of the troponin C-like protein (modulator
+RT   protein) from bovine uterus.";
+RL   FEBS Lett. 92:137-142(1978).
+RN   [11]
+RP   SEQUENCE OF 38-60.
+RC   SPECIES=Bovine;
+RX   MEDLINE=89064822; PubMed=3058479;
+RA   Pribilla I., Krueger H., Buchner K., Otto H., Schiebler W.,
+RA   Tripier D., Hucho F.;
+RT   "Heat-resistant inhibitors of protein kinase C from bovine brain.";
+RL   Eur. J. Biochem. 177:657-664(1988).
+RN   [12]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse;
+RX   MEDLINE=88257100; PubMed=3384819;
+RA   Bender P.K., Dedman J.R., Emerson C.P.;
+RT   "The abundance of calmodulin mRNAs is regulated in phosphorylase
+RT   kinase-deficient skeletal muscle.";
+RL   J. Biol. Chem. 263:9733-9737(1988).
+RN   [13]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse;
+RX   MEDLINE=90006775; PubMed=2551780;
+RA   Danchin A., Sezer O., Glaser P., Chalon P., Caput D.;
+RT   "Cloning and expression of mouse-brain calmodulin as an activator of
+RT   Bordetella pertussis adenylate cyclase in Escherichia coli.";
+RL   Gene 80:145-149(1989).
+RN   [14]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=BALB/C; TISSUE=Brain;
+RA   Kato K.;
+RT   "A collection of cDNA clones with specific expression patterns in
+RT   mouse brain.";
+RL   Eur. J. Neurosci. 2:704-711(1991).
+RN   [15]
+RP   SEQUENCE.
+RC   SPECIES=Rat; TISSUE=Testis;
+RX   MEDLINE=78066877; PubMed=201628;
+RA   Dedman J.R., Jackson R.L., Schreiber W.E., Means A.R.;
+RT   "Sequence homology of the Ca2+-dependent regulator of cyclic
+RT   nucleotide phosphodiesterase from rat testis with other Ca2+-binding
+RT   proteins.";
+RL   J. Biol. Chem. 253:343-346(1978).
+RN   [16]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=87246077; PubMed=2885164;
+RA   Sherbany A.A., Parent A.S., Brosius J.;
+RT   "Rat calmodulin cDNA.";
+RL   DNA 6:267-272(1987).
+RN   [17]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=87226204; PubMed=3035194;
+RA   Nojima H., Hirofumi S.;
+RT   "Structure of a gene for rat calmodulin.";
+RL   J. Mol. Biol. 193:439-445(1987).
+RN   [18]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat;
+RX   MEDLINE=87257889; PubMed=3037336;
+RA   Nojima H., Kishi K., Sokabe H.;
+RT   "Multiple calmodulin mRNA species are derived from two distinct
+RT   genes.";
+RL   Mol. Cell. Biol. 7:1873-1880(1987).
+RN   [19]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; STRAIN=SHR;
+RX   MEDLINE=89362474; PubMed=2527998;
+RA   Nojima H.;
+RT   "Structural organization of multiple rat calmodulin genes.";
+RL   J. Mol. Biol. 208:269-282(1989).
+RN   [20]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RX   MEDLINE=84008199; PubMed=6137485;
+RA   Putkey J.A., Ts'Ui K.F., Tanaka T., Lagace L., Stein J.P., Lai E.C.,
+RA   Means A.R.;
+RT   "Chicken calmodulin genes. A species comparison of cDNA sequences and
+RT   isolation of a genomic clone.";
+RL   J. Biol. Chem. 258:11864-11870(1983).
+RN   [21]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RX   MEDLINE=85104969; PubMed=2981850;
+RA   Simmen R.C.M., Tanaka T., Ts'Ui K.F., Putkey J.A., Scott M.J.,
+RA   Lai E.C., Means A.R.;
+RT   "The structural organization of the chicken calmodulin gene.";
+RL   J. Biol. Chem. 260:907-912(1985).
+RN   [22]
+RP   ERRATUM.
+RC   SPECIES=Chicken;
+RA   Simmen R.C.M., Tanaka T., Ts'Ui K.F., Putkey J.A., Scott M.J.,
+RA   Lai E.C., Means A.R.;
+RL   J. Biol. Chem. 262:4928-4929(1987).
+RN   [23]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RA   Iida Y.;
+RT   "cDNA sequences and molecular evolution of calmodulin genes of
+RT   chicken and eel.";
+RL   Bull. Chem. Soc. Jpn. 57:2667-2668(1984).
+RN   [24]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=A.platyrhynchos;
+RX   MEDLINE=93287810; PubMed=8389959;
+RA   Kimura N., Kurosawa N., Kondo K., Tsukada Y.;
+RT   "Molecular cloning of the kainate-binding protein and calmodulin
+RT   genes which are induced by an imprinting stimulus in ducklings.";
+RL   Brain Res. Mol. Brain Res. 17:351-355(1993).
+RN   [25]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=X.laevis;
+RX   MEDLINE=84191128; PubMed=6325880;
+RA   Chien Y.-H., Dawid I.B.;
+RT   "Isolation and characterization of calmodulin genes from Xenopus
+RT   laevis.";
+RL   Mol. Cell. Biol. 4:507-513(1984).
+RN   [26]
+RP   SEQUENCE OF 1-141 FROM N.A.
+RC   SPECIES=A.punctulata;
+RX   MEDLINE=88172463; PubMed=3351921;
+RA   Hardy D.O., Bender P.K., Kretsinger R.H.;
+RT   "Two calmodulin genes are expressed in Arbacia punctulata. An ancient
+RT   gene duplication is indicated.";
+RL   J. Mol. Biol. 199:223-227(1988).
+RN   [27]
+RP   SEQUENCE.
+RC   SPECIES=Salmon;
+RA   Yazawa M., Toda H., Yagi Y.;
+RT   "Amino acid sequence of salmon calmodulin.";
+RL   Seikagaku 57:1037-1037(1985).
+RN   [28]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=O.latipes;
+RX   MEDLINE=93012998; PubMed=1398109;
+RA   Matsuo K., Sato K., Ikeshima H., Shimoda K., Takano T.;
+RT   "Four synonymous genes encode calmodulin in the teleost fish, medaka
+RT   (Oryzias latipes): conservation of the multigene one-protein
+RT   principle.";
+RL   Gene 119:279-281(1992).
+RN   [29]
+RP   SEQUENCE OF 1-27, AND UBIQUITYLATION OF LYS-21.
+RC   SPECIES=Bovine;
+RX   MEDLINE=98380241; PubMed=9716384;
+RA   Laub M., Steppuhn J.A., Blueggel M., Immler D., Meyer H.E.,
+RA   Jennissen H.P.;
+RT   "Modulation of calmodulin function by ubiquitin-calmodulin ligase and
+RT   identification of the responsible ubiquitylation site in vertebrate
+RT   calmodulin.";
+RL   Eur. J. Biochem. 255:422-431(1998).
+RN   [30]
+RP   X-RAY CRYSTALLOGRAPHY (3.0 ANGSTROMS).
+RC   SPECIES=Rat;
+RX   MEDLINE=85188323; PubMed=3990807;
+RA   Babu Y.S., Sack J.S., Greenhough T.J., Bugg C.E., Means A.R.,
+RA   Cook W.J.;
+RT   "Three-dimensional structure of calmodulin.";
+RL   Nature 315:37-40(1985).
+RN   [31]
+RP   X-RAY CRYSTALLOGRAPHY (2.2 ANGSTROMS).
+RC   SPECIES=Rat;
+RX   MEDLINE=89110997; PubMed=3145979;
+RA   Babu Y.S., Bugg C.E., Cook W.J.;
+RT   "Structure of calmodulin refined at 2.2-A resolution.";
+RL   J. Mol. Biol. 204:191-204(1988).
+RN   [32]
+RP   X-RAY CRYSTALLOGRAPHY (2 ANGSTROMS).
+RC   SPECIES=Bovine;
+RX   MEDLINE=98104088; PubMed=9438860;
+RA   Wall M.E., Clarage J.B., Phillips G.N.;
+RT   "Motions of calmodulin characterized using both Bragg and diffuse
+RT   X-ray scattering.";
+RL   Structure 5:1599-1612(1997).
+RN   [33]
+RP   STRUCTURE BY NMR OF 76-148.
+RX   MEDLINE=94085641; PubMed=8262263;
+RA   Finn B.E., Drakenberg T., Forsen S.;
+RT   "The structure of apo-calmodulin. A 1H NMR examination of the
+RT   carboxy-terminal domain.";
+RL   FEBS Lett. 336:368-374(1993).
+RN   [34]
+RP   STRUCTURE BY NMR OF 76-148.
+RX   MEDLINE=96018615; PubMed=7552749;
+RA   Finn B.E., Evenas J., Drakenberg T., Waltho J.P., Thulin E.,
+RA   Forsen S.;
+RT   "Calcium-induced structural changes and domain autonomy in
+RT   calmodulin.";
+RL   Nat. Struct. Biol. 2:777-783(1995).
+RN   [35]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=96018613; PubMed=7552747;
+RA   Zhang M., Tanaka T., Ikura M.;
+RT   "Calcium-induced conformational transition revealed by the solution
+RT   structure of apo calmodulin.";
+RL   Nat. Struct. Biol. 2:758-767(1995).
+RN   [36]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=96018614; PubMed=7552748;
+RA   Kuboniwa H., Tjandra N., Grzesiek S., Ren H., Klee C.B., Bax A.;
+RT   "Solution structure of calcium-free calmodulin.";
+RL   Nat. Struct. Biol. 2:768-776(1995).
+RN   [37]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=98179557; PubMed=9514729;
+RA   Osawa M., Swindells M.B., Tanikawa J., Tanaka T., Mase T., Furuya T.,
+RA   Ikura M.;
+RT   "Solution structure of calmodulin-W-7 complex: the basis of diversity
+RT   in molecular recognition.";
+RL   J. Mol. Biol. 276:165-176(1998).
+RN   [38]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=99425120; PubMed=10493800;
+RA   Elshorst B., Hennig M., Foersterling H., Diener A., Maurer M.,
+RA   Schulte P., Schwalbe H., Griesinger C., Krebs J., Schmid H.,
+RA   Vorherr T., Carafoli E.;
+RT   "NMR solution structure of a complex of calmodulin with a binding
+RT   peptide of the Ca(2+) pump.";
+RL   Biochemistry 38:12320-12332(1999).
+CC   -!- FUNCTION: CALMODULIN MEDIATES THE CONTROL OF A LARGE NUMBER OF
+CC       ENZYMES BY CA(++). AMONG THE ENZYMES TO BE STIMULATED BY THE
+CC       CALMODULIN-CA(++) COMPLEX ARE A NUMBER OF PROTEIN KINASES AND
+CC       PHOSPHATASES.
+CC   -!- PTM: UBIQUITYLATION STRONGLY DECREASES THE ACTIVITY.
+CC   -!- MISCELLANEOUS: THIS PROTEIN HAS FOUR FUNCTIONAL CALCIUM-BINDING
+CC       SITES.
+CC   -!- SIMILARITY: TO OTHER EF-HAND CALCIUM BINDING PROTEINS.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L00101; AAA48653.1; -.
+DR   EMBL; L00096; AAA48653.1; JOINED.
+DR   EMBL; L00097; AAA48653.1; JOINED.
+DR   EMBL; L00098; AAA48653.1; JOINED.
+DR   EMBL; L00099; AAA48653.1; JOINED.
+DR   EMBL; L00100; AAA48653.1; JOINED.
+DR   EMBL; M16659; AAA40864.1; -.
+DR   EMBL; M27319; AAA35635.1; -.
+DR   EMBL; U12022; AAB60644.1; -.
+DR   EMBL; U11886; AAB60644.1; JOINED.
+DR   EMBL; D45887; BAA08302.1; -.
+DR   EMBL; X13817; CAA32050.1; -.
+DR   EMBL; J04046; AAA51918.1; -.
+DR   EMBL; M19311; AAA35641.1; -.
+DR   EMBL; M19312; AAA40862.1; -.
+DR   EMBL; M17069; AAA40863.1; -.
+DR   EMBL; X13933; CAA32120.1; -.
+DR   EMBL; X13931; CAA32119.1; -.
+DR   EMBL; X13932; CAA32119.1; JOINED.
+DR   EMBL; X05117; CAA32119.1; JOINED.
+DR   EMBL; X13833; CAA32062.1; -.
+DR   EMBL; X13834; CAA32062.1; JOINED.
+DR   EMBL; X13835; CAA32062.1; JOINED.
+DR   EMBL; X14265; CAA32478.1; -.
+DR   EMBL; D83350; BAA11896.1; -.
+DR   EMBL; M36167; AAA48650.1; -.
+DR   EMBL; K01944; AAA49668.1; -.
+DR   EMBL; K01945; AAA49669.1; -.
+DR   EMBL; D10363; BAA01195.1; -.
+DR   EMBL; M19380; AAA66181.1; -.
+DR   EMBL; M19381; AAA66182.1; -.
+DR   EMBL; L31642; AAA65934.1; -.
+DR   EMBL; M27844; AAA37365.1; -.
+DR   EMBL; X61432; CAA43674.1; -.
+DR   PIR; S13159; MCHU.
+DR   PIR; JK0013; MCON.
+DR   PIR; A90719; MCBO.
+DR   PIR; A91104; MCRB.
+DR   PIR; S03206; MCRT.
+DR   PIR; A92394; MCCH.
+DR   PIR; S02690; S02690.
+DR   PIR; A60781; A60781.
+DR   PIR; JC1305; JC1305.
+DR   PDB; 2CLN; 15-OCT-94.
+DR   PDB; 3CLN; 09-JAN-89.
+DR   PDB; 1TRC; 15-OCT-91.
+DR   PDB; 1AK8; 17-SEP-97.
+DR   PDB; 1CDL; 31-AUG-94.
+DR   PDB; 1CDM; 31-AUG-94.
+DR   PDB; 1CFC; 07-DEC-95.
+DR   PDB; 1CFD; 07-DEC-95.
+DR   PDB; 1CLL; 31-OCT-93.
+DR   PDB; 1CM1; 04-MAR-98.
+DR   PDB; 1CM4; 04-MAR-98.
+DR   PDB; 1CMF; 07-DEC-95.
+DR   PDB; 1CMG; 07-DEC-95.
+DR   PDB; 1CTR; 20-DEC-94.
+DR   PDB; 1DEG; 31-MAY-94.
+DR   PDB; 1DMO; 01-AUG-96.
+DR   PDB; 1LIN; 08-MAR-96.
+DR   PDB; 1AJI; 17-SEP-97.
+DR   PDB; 1A29; 16-SEP-98.
+DR   PDB; 1MUX; 25-NOV-98.
+DR   PDB; 1CFF; 24-SEP-91.
+DR   SWISS-2DPAGE; P99014; MOUSE.
+DR   Aarhus/Ghent-2DPAGE; 9048; IEF.
+DR   MIM; 114180; -.
+DR   MIM; 114182; -.
+DR   MIM; 114183; -.
+DR   MGD; MGI:88251; Calm.
+DR   MGD; MGI:103250; Calm2.
+DR   MGD; MGI:103249; Calm3.
+DR   InterPro; IPR002048; EF-hand.
+DR   Pfam; PF00036; efhand; 4.
+DR   SMART; SM00054; EFh; 4.
+DR   PROSITE; PS00018; EF_HAND; 4.
+KW   Calcium-binding; Duplication; Methylation; Acetylation;
+KW   3D-structure.
+FT   INIT_MET      0      0
+FT   MOD_RES       1      1       ACETYLATION.
+FT   MOD_RES     115    115       METHYLATION (TRI-) (IN CHICKEN).
+FT   CA_BIND      20     31       EF-HAND 1.
+FT   CA_BIND      56     67       EF-HAND 2.
+FT   CA_BIND      93    104       EF-HAND 3.
+FT   CA_BIND     129    140       EF-HAND 4.
+FT   BINDING      21     21       UBIQUITIN (MULTI-).
+FT   CONFLICT     25     25       G -> N (IN REF. 12; AAA66182).
+FT   HELIX         5     19
+FT   TURN         21     22
+FT   STRAND       26     27
+FT   HELIX        29     37
+FT   TURN         38     40
+FT   HELIX        45     55
+FT   TURN         57     58
+FT   STRAND       63     64
+FT   HELIX        65     92
+FT   TURN         94     95
+FT   STRAND      100    100
+FT   HELIX       102    111
+FT   TURN        112    113
+FT   HELIX       118    128
+FT   STRAND      136    136
+FT   HELIX       138    146
+SQ   SEQUENCE   148 AA;  16706 MW;  464B8A287475A1CA CRC64;
+     ADQLTEEQIA EFKEAFSLFD KDGDGTITTK ELGTVMRSLG QNPTEAELQD MINEVDADGN
+     GTIDFPEFLT MMARKMKDTD SEEEIREAFR VFDKDGNGYI SAAELRHVMT NLGEKLTDEE
+     VDEMIREADI DGDGQVNYEE FVQMMTAK
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/short.blx
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/short.blx	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/short.blx	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,451 @@
+BLASTX 2.0.13 [May-26-2000]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= 18.ctg12393
+         (6959 letters)
+
+Database: h_nrNov10_2000
+           57,234 sequences; 18,303,128 total letters
+
+Searching..................................................done
+
+                                                                   Score     E
+Sequences producing significant alignments:                        (bits)  Value
+
+gi|728837|sp|P39194|ALU7_HUMAN ALU SUBFAMILY SQ SEQUENCE CONTAM...   161  8e-39
+gi|728836|sp|P39193|ALU6_HUMAN ALU SUBFAMILY SP SEQUENCE CONTAM...   156  3e-37
+
+>gi|728837|sp|P39194|ALU7_HUMAN ALU SUBFAMILY SQ SEQUENCE
+            CONTAMINATION WARNING ENTRY
+          Length = 593
+
+ Score =  161 bits (404), Expect = 8e-39
+ Identities = 73/97 (75%), Positives = 76/97 (78%)
+ Frame = -1
+
+Query: 2996 FFLRQSFTLVTQAGVQWHDLSSLQPLPPRFKGFSSLSLPISWDYRRLPPCLANFCIFHKD 2817
+            FFLR+SF LV QAGVQW DL SLQP PP FK FS LSLP SWDYRR PP  ANFCIF +D
+Sbjct: 299  FFLRRSFALVAQAGVQWRDLGSLQPPPPGFKRFSCLSLPSSWDYRRPPPRPANFCIFSRD 358
+
+Query: 2816 GVLPCWPGWS*TPDLR*YAHFGIPKCWDYRREPPCPA 2706
+            GV PCWPGWS TPDLR     G+PKCWDYRREPP PA
+Sbjct: 359  GVSPCWPGWSRTPDLRXSTRLGLPKCWDYRREPPRPA 395
+
+
+ Score =  156 bits (391), Expect = 3e-37
+ Identities = 76/97 (78%), Positives = 83/97 (85%)
+ Frame = +3
+
+Query: 2706 GRARWLTPVIPALWDAKVGISSEVRSSRPAWPTWQNSVFMKNTKISQAWWQAPVIPANWE 2885
+            GRARWLTPVIPALW+A+ G S EVRSSRPAWPTW N V  KNTKIS+AWW+APVIPA  E
+Sbjct: 1    GRARWLTPVIPALWEAEAGGSPEVRSSRPAWPTWXNPVSTKNTKISRAWWRAPVIPATRE 60
+
+Query: 2886 AEAGESLESRRQRLQ*AEIVPLHSSLGDKSKTLSQKK 2996
+            AEAGESLE  R+RLQ AEI PLHSSLG+KS+T SQKK
+Sbjct: 61   AEAGESLEPGRRRLQXAEIAPLHSSLGNKSETPSQKK 97
+
+
+ Score =  140 bits (349), Expect = 2e-32
+ Identities = 74/101 (73%), Positives = 79/101 (77%)
+ Frame = -2
+
+Query: 2995 FF*DRVLLLSPRLECNGTISAHCNLCLLDSRDSPASASQLAGITGACHHAWLIFVFFIKT 2816
+            FF D V LL PRLEC+G ISAHCNL L  S DSPASAS++AGITGA HHA LIFVF ++T
+Sbjct: 399  FFXDGVSLLLPRLECSGAISAHCNLRLPGSSDSPASASRVAGITGARHHARLIFVFLVET 458
+
+Query: 2815 EFCHVGQAGLELLTSDDMPTLASQSAGITGVSHRARPGIMF 2693
+             F HVGQAGLELLTS D P  ASQSAGITGVSHRAR    F
+Sbjct: 459  GFHHVGQAGLELLTSGDPPASASQSAGITGVSHRARXXXFF 499
+
+
+ Score =  134 bits (334), Expect = 1e-30
+ Identities = 67/89 (75%), Positives = 71/89 (79%)
+ Frame = +1
+
+Query: 2707 AGHGGSRL*SQHFGMPKWAYHLRSGVQDQPGQHGKTPSL*KIQKLARHGGRRL*SQLIGR 2886
+            AG GGSRL SQHFG P+   HLRSGV+DQPGQHG+TPSL KIQKLA  GG RL SQL+GR
+Sbjct: 101  AGRGGSRLXSQHFGRPRRVDHLRSGVRDQPGQHGETPSLLKIQKLAGRGGGRLXSQLLGR 160
+
+Query: 2887 LRLENPLNLGGRGCSELRSCHCTPAWVTR 2973
+            LR EN LN GG GCSE RS HCTPAW TR
+Sbjct: 161  LRQENRLNPGGGGCSEPRSRHCTPAWATR 189
+
+
+ Score =  129 bits (321), Expect = 4e-29
+ Identities = 72/101 (71%), Positives = 75/101 (73%)
+ Frame = +2
+
+Query: 2693 KHYTWPGTVAHACNPSTLGCQSGHII*GQEFKTSLANMAKLRLYEKYKN*PGMVAGACNP 2872
+            K    PG VAHACNPSTLG + G I  GQEF+TSLANM K RLY KYKN PG+VAGACNP
+Sbjct: 195  KKXXXPGAVAHACNPSTLGGRGGWITXGQEFETSLANMVKPRLYXKYKNXPGVVAGACNP 254
+
+Query: 2873 S*LGG*GWRIP*I*EAEVAVS*DRAIALQPG*QE*NSVSKK 2995
+            S  GG G RI    EAEVAVS DRA ALQPG QE NSVSKK
+Sbjct: 255  SYSGGXGRRIAXTREAEVAVSRDRATALQPGQQERNSVSKK 295
+
+
+ Score =  119 bits (295), Expect = 5e-26
+ Identities = 74/101 (73%), Positives = 79/101 (77%), Gaps = 54/101 (53%)
+ Frame = +1
+
+Query: 5179 FFLRWSFALSPRLECSGAISAHCNLRLPGSSNSPASASRVAGTTGVRHHTRLIFVFFVKT 5358
+            FF      L PRLECSGAISAHCNLRLPGSS+SPASASRVAG TG RHH RLIFVF V+T
+Sbjct: 399  FFXDGVSLLLPRLECSGAISAHCNLRLPGSSDSPASASRVAGITGARHHARLIFVFLVET 458
+
+Query: 5359 GFHHVGQ------------------AGLN------------------------------- 5391
+            GFHHVGQ                  AG+                                
+Sbjct: 459  GFHHVGQAGLELLTSGDPPASASQSAGITGVSHRARXXXFFETEFRSCCPGWSAVARSRL 518
+
+Query: 5392 -----SRPQVIHLSQPPKVLGLQV*ATVPGYFMDF 5481
+                 SR Q I L QPP+ LGLQ  AT PG F+ F
+Sbjct: 519  TATSASRVQAILLPQPPEXLGLQAPATTPGXFLYF 553
+
+
+ Score =  113 bits (281), Expect = 2e-24
+ Identities = 65/94 (69%), Positives = 73/94 (77%), Gaps = 55/94 (58%)
+ Frame = +3
+
+Query: 5205 VTQAGMQWCNLSSLQPPPPRFKQFSCLSLPSSWDYRCAPPHQANFCIFCKDRFSPRWPGW 5384
+            V QAG+QW +L SLQPPPP FK+FSCLSLPSSWDYR  PP  ANFCIF +D  SP WPGW
+Sbjct: 308  VAQAGVQWRDLGSLQPPPPGFKRFSCLSLPSSWDYRRPPPRPANFCIFSRDGVSPCWPGW 367
+
+Query: 5385 S-----------------------------------------ELQTSG------------ 5405
+            S                                          L+ SG            
+Sbjct: 368  SRTPDLRXSTRLGLPKCWDYRREPPRPAXXXFFXDGVSLLLPRLECSGAISAHCNLRLPG 427
+
+Query: 5406 --DSPVSASQSAGITGVSHRARLLHGFLI 5486
+              DSP SAS+ AGITG  H ARL+  FL+
+Sbjct: 428  SSDSPASASRVAGITGARHHARLIFVFLV 456
+
+
+ Score =  111 bits (276), Expect(2) = 1e-32
+ Identities = 51/63 (80%), Positives = 55/63 (86%)
+ Frame = -1
+
+Query: 5387 RPAWPTW*KPVFTKNTKISLVWWRTPVVPATREAEAGELLEPGRRRLQ*AEIAPLHSSLG 5208
+            RPAWPTW  PV TKNTKIS  WWR PV+PATREAEAGE LEPGRRRLQ AEIAPLHSSLG
+Sbjct: 28   RPAWPTWXNPVSTKNTKISRAWWRAPVIPATREAEAGESLEPGRRRLQXAEIAPLHSSLG 87
+
+Query: 5207 DRA 5199
+            +++
+Sbjct: 88   NKS 90
+
+
+ Score = 89.7 bits (219), Expect(2) = 6e-24
+ Identities = 45/61 (73%), Positives = 45/61 (73%)
+ Frame = -2
+
+Query: 5386 DQPGQRGENLSLQKIQKLAWCGGAHL*SQLLGRLRQENCLNLGGGGCSELRLHHCIPAWV 5207
+            DQPGQ GE  SL KIQKLA  GG  L SQLLGRLRQEN LN GGGGCSE R  HC PAW 
+Sbjct: 128  DQPGQHGETPSLLKIQKLAGRGGGRLXSQLLGRLRQENRLNPGGGGCSEPRSRHCTPAWA 187
+
+Query: 5206 T 5204
+            T
+Sbjct: 188  T 188
+
+
+ Score = 87.8 bits (214), Expect = 1e-16
+ Identities = 53/89 (59%), Positives = 56/89 (62%)
+ Frame = -3
+
+Query: 2973 SCHPGWSAMARSQLTATSASXXXXXXXXXXXXXXXXXXXATMPG*FLYFS*RRSFAMLAR 2794
+            SC PGWSA+ARS+LTATSAS                   AT PG FLYF  RR F MLAR
+Sbjct: 505  SCCPGWSAVARSRLTATSASRVQAILLPQPPEXLGLQAPATTPGXFLYFXXRRGFTMLAR 564
+
+Query: 2793 LVLNS*PQMICPLWHPKVLGLQA*ATVPG 2707
+            LV NS PQ+I P   PKVLGLQA AT PG
+Sbjct: 565  LVSNSXPQVIHPPRPPKVLGLQAXATAPG 593
+
+
+ Score = 83.1 bits (202), Expect = 4e-15
+ Identities = 45/62 (72%), Positives = 48/62 (76%)
+ Frame = -3
+
+Query: 5388 QTSLANVVKTCLYKKYKN*PGVVAHTCSPSYSGG*GRRIA*TWEAEVAVS*DCTTAFQPG 5209
+            +TSLAN+VK  LY KYKN PGVVA  C+PSYSGG GRRIA T EAEVAVS D  TA QPG
+Sbjct: 226  ETSLANMVKPRLYXKYKNXPGVVAGACNPSYSGGXGRRIAXTREAEVAVSRDRATALQPG 285
+
+Query: 5208 *Q 5203
+             Q
+Sbjct: 286  QQ 287
+
+
+ Score = 62.8 bits (150), Expect = 5e-09
+ Identities = 36/61 (59%), Positives = 41/61 (67%)
+ Frame = +2
+
+Query: 5204 CHPGWNAVVQSQLTATSASQVQAIXXXXXXXXXXXXVCATTPG*FLYFL*RQVFTTLARL 5383
+            C PGW+AV +S+LTATSAS+VQAI              ATTPG FLYF  R+ FT LARL
+Sbjct: 506  CCPGWSAVARSRLTATSASRVQAILLPQPPEXLGLQAPATTPGXFLYFXXRRGFTMLARL 565
+
+Query: 5384 V 5386
+            V
+Sbjct: 566  V 566
+
+
+ Score = 50.4 bits (118), Expect(2) = 1e-32
+ Identities = 23/26 (88%), Positives = 23/26 (88%)
+ Frame = -2
+
+Query: 5464 RARWLTPVIPALWEAETGESPEVWSS 5387
+            RARWLTPVIPALWEAE G SPEV SS
+Sbjct: 2    RARWLTPVIPALWEAEAGGSPEVRSS 27
+
+
+ Score = 47.6 bits (111), Expect = 2e-04
+ Identities = 23/38 (60%), Positives = 24/38 (62%)
+ Frame = +2
+
+Query: 5390 TPDLR*FTCLSLPKCWDYRCEPPCPATSWIFDYRLCLL 5503
+            TPDLR  T L LPKCWDYR EPP PA    F   + LL
+Sbjct: 370  TPDLRXSTRLGLPKCWDYRREPPRPAXXXFFXDGVSLL 407
+
+
+ Score = 43.4 bits (100), Expect(2) = 6e-24
+ Identities = 20/27 (74%), Positives = 23/27 (85%)
+ Frame = -3
+
+Query: 5466 AGHGGSHL*SQHFGRLRQVNHLRSGVQ 5386
+            AG GGS L SQHFGR R+V+HLRSGV+
+Sbjct: 101  AGRGGSRLXSQHFGRPRRVDHLRSGVR 127
+
+
+>gi|728836|sp|P39193|ALU6_HUMAN ALU SUBFAMILY SP SEQUENCE
+            CONTAMINATION WARNING ENTRY
+          Length = 593
+
+ Score =  156 bits (390), Expect = 3e-37
+ Identities = 71/97 (73%), Positives = 74/97 (76%)
+ Frame = -1
+
+Query: 2996 FFLRQSFTLVTQAGVQWHDLSSLQPLPPRFKGFSSLSLPISWDYRRLPPCLANFCIFHKD 2817
+            FFLR+SF LV QAGVQW DL S QP PP FK FS LSLP SWDYR  PP  ANFCIF +D
+Sbjct: 299  FFLRRSFALVAQAGVQWRDLGSPQPPPPGFKRFSCLSLPSSWDYRHAPPRPANFCIFSRD 358
+
+Query: 2816 GVLPCWPGWS*TPDLR*YAHFGIPKCWDYRREPPCPA 2706
+            GV PCW GWS TPDLR  A  G+PKCWDYRREPP PA
+Sbjct: 359  GVSPCWSGWSRTPDLRXSARLGLPKCWDYRREPPRPA 395
+
+
+ Score =  146 bits (364), Expect = 4e-34
+ Identities = 72/97 (74%), Positives = 81/97 (83%)
+ Frame = +3
+
+Query: 2706 GRARWLTPVIPALWDAKVGISSEVRSSRPAWPTWQNSVFMKNTKISQAWWQAPVIPANWE 2885
+            GRARWLTPVIPALW+A+ G S EV SSRPA PTW+N V  KNTKIS+AWW+ PVIPA  E
+Sbjct: 1    GRARWLTPVIPALWEAEAGGSPEVGSSRPAXPTWRNPVSTKNTKISRAWWRMPVIPATRE 60
+
+Query: 2886 AEAGESLESRRQRLQ*AEIVPLHSSLGDKSKTLSQKK 2996
+            AEAGESLE  R+RL+ AEI PLHSSLG+KS+T SQKK
+Sbjct: 61   AEAGESLEPGRRRLRXAEIAPLHSSLGNKSETPSQKK 97
+
+
+ Score =  133 bits (332), Expect = 2e-30
+ Identities = 72/101 (71%), Positives = 76/101 (74%)
+ Frame = -2
+
+Query: 2995 FF*DRVLLLSPRLECNGTISAHCNLCLLDSRDSPASASQLAGITGACHHAWLIFVFFIKT 2816
+            FF D V LL PRLECNG ISAH NL L  S DSPASAS++AGITG  HHA LIFVF ++T
+Sbjct: 399  FFXDGVSLLLPRLECNGAISAHRNLRLPGSSDSPASASRVAGITGMRHHARLIFVFLVET 458
+
+Query: 2815 EFCHVGQAGLELLTSDDMPTLASQSAGITGVSHRARPGIMF 2693
+             F HVGQAGLEL TS D P  ASQSAGITGVSHRAR    F
+Sbjct: 459  GFLHVGQAGLELPTSGDPPASASQSAGITGVSHRARXXXFF 499
+
+
+ Score =  129 bits (320), Expect = 5e-29
+ Identities = 65/89 (73%), Positives = 69/89 (77%)
+ Frame = +1
+
+Query: 2707 AGHGGSRL*SQHFGMPKWAYHLRSGVQDQPGQHGKTPSL*KIQKLARHGGRRL*SQLIGR 2886
+            AG GGSRL SQHFG P+ A HLRSGV+DQP QHG+TPSL KIQKLA  GG  L SQL+GR
+Sbjct: 101  AGRGGSRLXSQHFGRPRRADHLRSGVRDQPDQHGETPSLLKIQKLAGRGGACLXSQLLGR 160
+
+Query: 2887 LRLENPLNLGGRGCSELRSCHCTPAWVTR 2973
+            LR EN LN GG GC E RS HCTPAW TR
+Sbjct: 161  LRQENRLNPGGGGCGEPRSRHCTPAWATR 189
+
+
+ Score =  125 bits (311), Expect = 6e-28
+ Identities = 70/101 (69%), Positives = 74/101 (72%)
+ Frame = +2
+
+Query: 2693 KHYTWPGTVAHACNPSTLGCQSGHII*GQEFKTSLANMAKLRLYEKYKN*PGMVAGACNP 2872
+            K    PG VAHACNPSTLG + G I  G+EF+TSL NM K RLY KYKN PG+VA ACNP
+Sbjct: 195  KKXXXPGAVAHACNPSTLGGRGGRITXGREFETSLTNMEKPRLYXKYKNXPGVVAHACNP 254
+
+Query: 2873 S*LGG*GWRIP*I*EAEVAVS*DRAIALQPG*QE*NSVSKK 2995
+            S  GG G RI    EAEVAVS DRAIALQPG QE NSVSKK
+Sbjct: 255  SYSGGXGRRIAXTREAEVAVSRDRAIALQPGQQERNSVSKK 295
+
+
+ Score =  109 bits (270), Expect = 4e-23
+ Identities = 71/101 (70%), Positives = 78/101 (76%), Gaps = 54/101 (53%)
+ Frame = +1
+
+Query: 5179 FFLRWSFALSPRLECSGAISAHCNLRLPGSSNSPASASRVAGTTGVRHHTRLIFVFFVKT 5358
+            FF      L PRLEC+GAISAH NLRLPGSS+SPASASRVAG TG+RHH RLIFVF V+T
+Sbjct: 399  FFXDGVSLLLPRLECNGAISAHRNLRLPGSSDSPASASRVAGITGMRHHARLIFVFLVET 458
+
+Query: 5359 GFHHVGQ------------------AGLN------------------------------- 5391
+            GF HVGQ                  AG+                                
+Sbjct: 459  GFLHVGQAGLELPTSGDPPASASQSAGITGVSHRARXXXFFETEFRSCCPGWSAMARSRL 518
+
+Query: 5392 -----SRPQVIHLSQPPKVLGLQV*ATVPGYFMDF 5481
+                 SR Q I L QPP+ LGLQ  AT PG F+ F
+Sbjct: 519  TATSASRVQAILLPQPPEXLGLQACATTPGXFLYF 553
+
+
+ Score =  105 bits (260), Expect = 6e-22
+ Identities = 62/94 (65%), Positives = 71/94 (74%), Gaps = 55/94 (58%)
+ Frame = +3
+
+Query: 5205 VTQAGMQWCNLSSLQPPPPRFKQFSCLSLPSSWDYRCAPPHQANFCIFCKDRFSPRWPGW 5384
+            V QAG+QW +L S QPPPP FK+FSCLSLPSSWDYR APP  ANFCIF +D  SP W GW
+Sbjct: 308  VAQAGVQWRDLGSPQPPPPGFKRFSCLSLPSSWDYRHAPPRPANFCIFSRDGVSPCWSGW 367
+
+Query: 5385 S-----------------------------------------------------ELQTSG 5405
+            S                                                      L+  G
+Sbjct: 368  SRTPDLRXSARLGLPKCWDYRREPPRPAXXXFFXDGVSLLLPRLECNGAISAHRNLRLPG 427
+
+Query: 5406 DS--PVSASQSAGITGVSHRARLLHGFLI 5486
+             S  P SAS+ AGITG+ H ARL+  FL+
+Sbjct: 428  SSDSPASASRVAGITGMRHHARLIFVFLV 456
+
+
+ Score =  104 bits (258), Expect(2) = 1e-30
+ Identities = 49/63 (77%), Positives = 54/63 (84%)
+ Frame = -1
+
+Query: 5387 RPAWPTW*KPVFTKNTKISLVWWRTPVVPATREAEAGELLEPGRRRLQ*AEIAPLHSSLG 5208
+            RPA PTW  PV TKNTKIS  WWR PV+PATREAEAGE LEPGRRRL+ AEIAPLHSSLG
+Sbjct: 28   RPAXPTWRNPVSTKNTKISRAWWRMPVIPATREAEAGESLEPGRRRLRXAEIAPLHSSLG 87
+
+Query: 5207 DRA 5199
+            +++
+Sbjct: 88   NKS 90
+
+
+ Score = 88.2 bits (215), Expect = 1e-16
+ Identities = 53/89 (59%), Positives = 56/89 (62%)
+ Frame = -3
+
+Query: 2973 SCHPGWSAMARSQLTATSASXXXXXXXXXXXXXXXXXXXATMPG*FLYFS*RRSFAMLAR 2794
+            SC PGWSAMARS+LTATSAS                   AT PG FLYF  RR F+ML R
+Sbjct: 505  SCCPGWSAMARSRLTATSASRVQAILLPQPPEXLGLQACATTPGXFLYFXXRRGFSMLVR 564
+
+Query: 2793 LVLNS*PQMICPLWHPKVLGLQA*ATVPG 2707
+            LV NS PQ+I P   PKVLGLQA AT PG
+Sbjct: 565  LVSNSRPQVIRPPRPPKVLGLQAXATAPG 593
+
+
+ Score = 85.8 bits (209), Expect(2) = 2e-22
+ Identities = 44/61 (72%), Positives = 44/61 (72%)
+ Frame = -2
+
+Query: 5386 DQPGQRGENLSLQKIQKLAWCGGAHL*SQLLGRLRQENCLNLGGGGCSELRLHHCIPAWV 5207
+            DQP Q GE  SL KIQKLA  GGA L SQLLGRLRQEN LN GGGGC E R  HC PAW 
+Sbjct: 128  DQPDQHGETPSLLKIQKLAGRGGACLXSQLLGRLRQENRLNPGGGGCGEPRSRHCTPAWA 187
+
+Query: 5206 T 5204
+            T
+Sbjct: 188  T 188
+
+
+ Score = 80.8 bits (196), Expect = 2e-14
+ Identities = 43/62 (69%), Positives = 46/62 (73%)
+ Frame = -3
+
+Query: 5388 QTSLANVVKTCLYKKYKN*PGVVAHTCSPSYSGG*GRRIA*TWEAEVAVS*DCTTAFQPG 5209
+            +TSL N+ K  LY KYKN PGVVAH C+PSYSGG GRRIA T EAEVAVS D   A QPG
+Sbjct: 226  ETSLTNMEKPRLYXKYKNXPGVVAHACNPSYSGGXGRRIAXTREAEVAVSRDRAIALQPG 285
+
+Query: 5208 *Q 5203
+             Q
+Sbjct: 286  QQ 287
+
+
+ Score = 63.2 bits (151), Expect = 4e-09
+ Identities = 34/61 (55%), Positives = 41/61 (66%)
+ Frame = +2
+
+Query: 5204 CHPGWNAVVQSQLTATSASQVQAIXXXXXXXXXXXXVCATTPG*FLYFL*RQVFTTLARL 5383
+            C PGW+A+ +S+LTATSAS+VQAI             CATTPG FLYF  R+ F+ L RL
+Sbjct: 506  CCPGWSAMARSRLTATSASRVQAILLPQPPEXLGLQACATTPGXFLYFXXRRGFSMLVRL 565
+
+Query: 5384 V 5386
+            V
+Sbjct: 566  V 566
+
+
+ Score = 50.8 bits (119), Expect(2) = 1e-30
+ Identities = 23/26 (88%), Positives = 23/26 (88%)
+ Frame = -2
+
+Query: 5464 RARWLTPVIPALWEAETGESPEVWSS 5387
+            RARWLTPVIPALWEAE G SPEV SS
+Sbjct: 2    RARWLTPVIPALWEAEAGGSPEVGSS 27
+
+
+ Score = 45.7 bits (106), Expect = 7e-04
+ Identities = 22/38 (57%), Positives = 23/38 (59%)
+ Frame = +2
+
+Query: 5390 TPDLR*FTCLSLPKCWDYRCEPPCPATSWIFDYRLCLL 5503
+            TPDLR    L LPKCWDYR EPP PA    F   + LL
+Sbjct: 370  TPDLRXSARLGLPKCWDYRREPPRPAXXXFFXDGVSLL 407
+
+
+ Score = 41.8 bits (96), Expect(2) = 2e-22
+ Identities = 19/27 (70%), Positives = 22/27 (81%)
+ Frame = -3
+
+Query: 5466 AGHGGSHL*SQHFGRLRQVNHLRSGVQ 5386
+            AG GGS L SQHFGR R+ +HLRSGV+
+Sbjct: 101  AGRGGSRLXSQHFGRPRRADHLRSGVR 127
+
+
+  Database: h_nrNov10_2000
+    Posted date:  Nov 10, 2000  9:40 AM
+  Number of letters in database: 18,303,128
+  Number of sequences in database:  57,234
+  
+Lambda     K      H
+   0.318    0.135    0.401 
+
+Gapped
+Lambda     K      H
+   0.270   0.0470    0.230 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 262249022
+Number of Sequences: 57234
+Number of extensions: 6465920
+Number of successful extensions: 16162
+Number of sequences better than 1.0e-03: 302
+Number of HSP's better than  0.0 without gapping: 147
+Number of HSP's successfully gapped in prelim test: 4
+Number of HSP's that attempted gapping in prelim test: 14866
+Number of HSP's gapped (non-prelim): 1104
+length of query: 2319
+length of database: 18,303,128
+effective HSP length: 55
+effective length of query: 2264
+effective length of database: 15,155,258
+effective search space: 34311504112
+effective search space used: 34311504112
+frameshift window, decay const: 50,  0.1
+T: 12
+A: 40
+X1: 16 ( 7.3 bits)
+X2: 38 (14.8 bits)
+X3: 64 (24.9 bits)
+S1: 41 (21.7 bits)
+S2: 105 (45.3 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/signalp.negative.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/signalp.negative.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/signalp.negative.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,179 @@
+SignalP 3.0 Server - prediction results
+Technical University of Denmark
+
+Using neural networks (NN) and hidden Markov models (HMM) trained on Gram-negative bacteria
+
+>my_fasta_id
+
+
+
+SignalP-NN result:
+
+
+>my_fasta_id  length = 70
+
+# pos  aa    C       S       Y
+    1   M   0.006   0.019   0.006
+    2   K   0.006   0.016   0.006
+    3   G   0.006   0.019   0.005
+    4   N   0.006   0.012   0.005
+    5   K   0.006   0.011   0.003
+    6   E   0.007   0.014   0.000
+    7   V   0.006   0.012   0.000
+    8   L   0.007   0.012   0.000
+    9   E   0.006   0.001   0.000
+   10   I   0.007   0.015   0.000
+   11   L   0.006   0.007   0.000
+   12   G   0.006   0.011   0.000
+   13   E   0.007   0.010   0.000
+   14   V   0.006   0.002   0.000
+   15   L   0.006   0.005   0.000
+   16   S   0.007   0.058   0.000
+   17   A   0.023   0.023   0.000
+   18   E   0.024   0.019   0.000
+   19   L   0.008   0.038   0.000
+   20   T   0.009   0.036   0.000
+   21   A   0.011   0.038   0.000
+   22   I   0.060   0.049   0.000
+   23   N   0.009   0.120   0.000
+   24   Q   0.028   0.058   0.000
+   25   Y   0.007   0.056   0.000
+   26   F   0.009   0.070   0.000
+   27   I   0.012   0.040   0.008
+   28   H   0.011   0.055   0.010
+   29   A   0.008   0.048   0.011
+   30   K   0.354   0.033   0.086
+   31   M   0.006   0.039   0.012
+   32   N   0.016   0.051   0.022
+   33   K   0.020   0.030   0.025
+   34   N   0.008   0.060   0.017
+   35   W   0.007   0.031   0.017
+   36   G   0.007   0.021   0.017
+   37   F   0.009   0.017   0.019
+   38   K   0.007   0.026   0.017
+   39   K   0.007   0.030   0.017
+   40   L   0.006   0.033   0.015
+   41   A   0.007   0.002   0.017
+   42   D   0.008   0.001   0.017
+   43   F   0.006   0.001   0.014
+   44   M   0.006   0.001   0.014
+   45   K   0.006   0.001   0.013
+   46   R   0.006   0.000   0.012
+   47   E   0.006   0.000   0.011
+   48   S   0.006   0.000   0.011
+   49   I   0.006   0.000   0.010
+   50   D   0.007   0.000   0.010
+   51   E   0.007   0.000   0.009
+   52   M   0.006   0.000   0.007
+   53   K   0.006   0.000   0.007
+   54   H   0.006   0.000   0.006
+   55   A   0.007   0.000   0.005
+   56   D   0.010   0.000   0.005
+   57   E   0.006   0.000   0.002
+   58   V   0.007   0.000   0.001
+   59   I   0.007   0.000   0.001
+   60   D   0.006   0.000   0.001
+   61   R   0.006   0.000   0.001
+   62   I   0.006   0.000   0.000
+   63   L   0.006   0.000   0.000
+   64   Y   0.006   0.000   0.000
+   65   L   0.006   0.000   0.000
+   66   D   0.006   0.000   0.000
+   67   G   0.006   0.000   0.000
+   68   V   0.006   0.000   0.000
+   69   P   0.006   0.000   0.000
+   70   D   0.006   0.000   0.000
+
+
+>my_fasta_id  length = 70
+# Measure  Position  Value  Cutoff  signal peptide?
+  max. C    30       0.354   0.52   NO
+  max. Y    30       0.086   0.33   NO
+  max. S    23       0.120   0.92   NO
+  mean S     1-29    0.030   0.49   NO
+       D     1-29    0.058   0.44   NO
+
+
+
+SignalP-HMM result:
+
+
+>my_fasta_id
+# pos  aa    C       S      n-reg   h-reg   c-reg
+    1   M   0.000   0.000   0.000   0.000   0.000
+    2   K   0.000   0.000   0.000   0.000   0.000
+    3   G   0.000   0.000   0.000   0.000   0.000
+    4   N   0.000   0.000   0.000   0.000   0.000
+    5   K   0.000   0.000   0.000   0.000   0.000
+    6   E   0.000   0.000   0.000   0.000   0.000
+    7   V   0.000   0.000   0.000   0.000   0.000
+    8   L   0.000   0.000   0.000   0.000   0.000
+    9   E   0.000   0.000   0.000   0.000   0.000
+   10   I   0.000   0.000   0.000   0.000   0.000
+   11   L   0.000   0.000   0.000   0.000   0.000
+   12   G   0.000   0.000   0.000   0.000   0.000
+   13   E   0.000   0.000   0.000   0.000   0.000
+   14   V   0.000   0.000   0.000   0.000   0.000
+   15   L   0.000   0.000   0.000   0.000   0.000
+   16   S   0.000   0.000   0.000   0.000   0.000
+   17   A   0.000   0.000   0.000   0.000   0.000
+   18   E   0.000   0.000   0.000   0.000   0.000
+   19   L   0.000   0.000   0.000   0.000   0.000
+   20   T   0.000   0.000   0.000   0.000   0.000
+   21   A   0.000   0.000   0.000   0.000   0.000
+   22   I   0.000   0.000   0.000   0.000   0.000
+   23   N   0.000   0.000   0.000   0.000   0.000
+   24   Q   0.000   0.000   0.000   0.000   0.000
+   25   Y   0.000   0.000   0.000   0.000   0.000
+   26   F   0.000   0.000   0.000   0.000   0.000
+   27   I   0.000   0.000   0.000   0.000   0.000
+   28   H   0.000   0.000   0.000   0.000   0.000
+   29   A   0.000   0.000   0.000   0.000   0.000
+   30   K   0.000   0.000   0.000   0.000   0.000
+   31   M   0.000   0.000   0.000   0.000   0.000
+   32   N   0.000   0.000   0.000   0.000   0.000
+   33   K   0.000   0.000   0.000   0.000   0.000
+   34   N   0.000   0.000   0.000   0.000   0.000
+   35   W   0.000   0.000   0.000   0.000   0.000
+   36   G   0.000   0.000   0.000   0.000   0.000
+   37   F   0.000   0.000   0.000   0.000   0.000
+   38   K   0.000   0.000   0.000   0.000   0.000
+   39   K   0.000   0.000   0.000   0.000   0.000
+   40   L   0.000   0.000   0.000   0.000   0.000
+   41   A   0.000   0.000   0.000   0.000   0.000
+   42   D   0.000   0.000   0.000   0.000   0.000
+   43   F   0.000   0.000   0.000   0.000   0.000
+   44   M   0.000   0.000   0.000   0.000   0.000
+   45   K   0.000   0.000   0.000   0.000   0.000
+   46   R   0.000   0.000   0.000   0.000   0.000
+   47   E   0.000   0.000   0.000   0.000   0.000
+   48   S   0.000   0.000   0.000   0.000   0.000
+   49   I   0.000   0.000   0.000   0.000   0.000
+   50   D   0.000   0.000   0.000   0.000   0.000
+   51   E   0.000   0.000   0.000   0.000   0.000
+   52   M   0.000   0.000   0.000   0.000   0.000
+   53   K   0.000   0.000   0.000   0.000   0.000
+   54   H   0.000   0.000   0.000   0.000   0.000
+   55   A   0.000   0.000   0.000   0.000   0.000
+   56   D   0.000   0.000   0.000   0.000   0.000
+   57   E   0.000   0.000   0.000   0.000   0.000
+   58   V   0.000   0.000   0.000   0.000   0.000
+   59   I   0.000   0.000   0.000   0.000   0.000
+   60   D   0.000   0.000   0.000   0.000   0.000
+   61   R   0.000   0.000   0.000   0.000   0.000
+   62   I   0.000   0.000   0.000   0.000   0.000
+   63   L   0.000   0.000   0.000   0.000   0.000
+   64   Y   0.000   0.000   0.000   0.000   0.000
+   65   L   0.000   0.000   0.000   0.000   0.000
+   66   D   0.000   0.000   0.000   0.000   0.000
+   67   G   0.000   0.000   0.000   0.000   0.000
+   68   V   0.000   0.000   0.000   0.000   0.000
+   69   P   0.000   0.000   0.000   0.000   0.000
+   70   D   0.000   0.000   0.000   0.000   0.000
+
+
+>my_fasta_id
+Prediction: Non-secretory protein
+Signal peptide probability: 0.000
+Max cleavage site probability: 0.000 between pos. -1 and  0
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/signalp.positive.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/signalp.positive.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/signalp.positive.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,187 @@
+     SignalP 3.0 Server - prediction results
+        Technical University of Denmark
+
+
+
+
+
+Using neural networks (NN) and hidden Markov models (HMM) trained on Gram-negative bacteria
+
+
+
+>my_fasta_id
+
+
+
+SignalP-NN result:
+
+
+>my_fasta_id  length = 70
+
+# pos  aa    C       S       Y
+    1   M   0.006   0.512   0.022
+    2   R   0.006   0.473   0.022
+    3   I   0.006   0.324   0.020
+    4   T   0.006   0.454   0.013
+    5   I   0.006   0.588   0.005
+    6   L   0.006   0.223   0.000
+    7   A   0.006   0.339   0.000
+    8   S   0.012   0.504   0.000
+    9   V   0.006   0.642   0.000
+   10   V   0.008   0.384   0.008
+   11   I   0.007   0.212   0.012
+   12   P   0.007   0.160   0.009
+   13   C   0.007   0.263   0.000
+   14   L   0.009   0.648   0.000
+   15   G   0.006   0.594   0.016
+   16   F   0.011   0.538   0.031
+   17   S   0.007   0.598   0.028
+   18   A   0.016   0.678   0.050
+   19   S   0.026   0.717   0.075
+   20   C   0.008   0.748   0.047
+   21   M   0.010   0.755   0.059
+   22   A   0.064   0.902   0.161
+   23   A   0.916   0.225   0.678
+   24   E   0.467   0.046   0.487
+   25   D   0.016   0.042   0.089
+   26   V   0.011   0.043   0.071
+   27   M   0.007   0.048   0.053
+   28   I   0.009   0.042   0.061
+   29   V   0.009   0.041   0.062
+   30   S   0.007   0.033   0.052
+   31   A   0.007   0.026   0.052
+   32   S   0.028   0.022   0.097
+   33   G   0.007   0.014   0.046
+   34   Y   0.043   0.010   0.107
+   35   E   0.008   0.018   0.042
+   36   K   0.011   0.031   0.046
+   37   K   0.009   0.008   0.036
+   38   L   0.009   0.015   0.029
+   39   T   0.006   0.017   0.015
+   40   N   0.006   0.012   0.013
+   41   A   0.006   0.013   0.013
+   42   A   0.008   0.004   0.014
+   43   A   0.007   0.001   0.013
+   44   S   0.007   0.000   0.012
+   45   V   0.006   0.001   0.010
+   46   S   0.006   0.000   0.009
+   47   V   0.006   0.000   0.008
+   48   I   0.006   0.000   0.008
+   49   N   0.008   0.000   0.008
+   50   Q   0.009   0.000   0.009
+   51   E   0.007   0.000   0.007
+   52   E   0.006   0.000   0.006
+   53   L   0.006   0.000   0.005
+   54   Q   0.007   0.000   0.005
+   55   S   0.006   0.000   0.004
+   56   S   0.007   0.000   0.003
+   57   Q   0.006   0.000   0.003
+   58   Y   0.006   0.000   0.001
+   59   H   0.007   0.000   0.001
+   60   D   0.007   0.000   0.001
+   61   L   0.007   0.000   0.001
+   62   A   0.007   0.000   0.001
+   63   E   0.008   0.000   0.001
+   64   A   0.006   0.000   0.001
+   65   L   0.008   0.000   0.001
+   66   R   0.006   0.000   0.001
+   67   S   0.006   0.000   0.001
+   68   V   0.006   0.000   0.001
+   69   E   0.006   0.000   0.001
+   70   G   0.007   0.000   0.001
+
+
+>my_fasta_id  length = 70
+# Measure  Position  Value  Cutoff  signal peptide?
+  max. C    23       0.916   0.52   YES
+  max. Y    23       0.678   0.33   YES
+  max. S    22       0.902   0.92   NO
+  mean S     1-22    0.512   0.49   YES
+       D     1-22    0.595   0.44   YES
+# Most likely cleavage site between pos. 22 and 23: CMA-AE
+
+
+
+SignalP-HMM result:
+
+
+>my_fasta_id
+# pos  aa    C       S      n-reg   h-reg   c-reg
+    1   M   0.000   1.000   1.000   0.000   0.000
+    2   R   0.000   1.000   1.000   0.000   0.000
+    3   I   0.000   1.000   1.000   0.000   0.000
+    4   T   0.000   1.000   0.910   0.090   0.000
+    5   I   0.000   1.000   0.572   0.428   0.000
+    6   L   0.000   1.000   0.224   0.776   0.000
+    7   A   0.000   1.000   0.011   0.989   0.000
+    8   S   0.000   1.000   0.001   0.999   0.000
+    9   V   0.000   1.000   0.000   1.000   0.000
+   10   V   0.000   1.000   0.000   1.000   0.000
+   11   I   0.000   1.000   0.000   1.000   0.000
+   12   P   0.000   1.000   0.000   0.999   0.001
+   13   C   0.000   1.000   0.000   0.997   0.003
+   14   L   0.000   1.000   0.000   0.977   0.023
+   15   G   0.000   1.000   0.000   0.849   0.151
+   16   F   0.000   1.000   0.000   0.224   0.776
+   17   S   0.000   1.000   0.000   0.000   1.000
+   18   A   0.000   1.000   0.000   0.000   1.000
+   19   S   0.000   1.000   0.000   0.000   1.000
+   20   C   0.000   1.000   0.000   0.000   1.000
+   21   M   0.000   1.000   0.000   0.000   1.000
+   22   A   0.000   1.000   0.000   0.000   1.000
+   23   A   1.000   0.000   0.000   0.000   0.000
+   24   E   0.000   0.000   0.000   0.000   0.000
+   25   D   0.000   0.000   0.000   0.000   0.000
+   26   V   0.000   0.000   0.000   0.000   0.000
+   27   M   0.000   0.000   0.000   0.000   0.000
+   28   I   0.000   0.000   0.000   0.000   0.000
+   29   V   0.000   0.000   0.000   0.000   0.000
+   30   S   0.000   0.000   0.000   0.000   0.000
+   31   A   0.000   0.000   0.000   0.000   0.000
+   32   S   0.000   0.000   0.000   0.000   0.000
+   33   G   0.000   0.000   0.000   0.000   0.000
+   34   Y   0.000   0.000   0.000   0.000   0.000
+   35   E   0.000   0.000   0.000   0.000   0.000
+   36   K   0.000   0.000   0.000   0.000   0.000
+   37   K   0.000   0.000   0.000   0.000   0.000
+   38   L   0.000   0.000   0.000   0.000   0.000
+   39   T   0.000   0.000   0.000   0.000   0.000
+   40   N   0.000   0.000   0.000   0.000   0.000
+   41   A   0.000   0.000   0.000   0.000   0.000
+   42   A   0.000   0.000   0.000   0.000   0.000
+   43   A   0.000   0.000   0.000   0.000   0.000
+   44   S   0.000   0.000   0.000   0.000   0.000
+   45   V   0.000   0.000   0.000   0.000   0.000
+   46   S   0.000   0.000   0.000   0.000   0.000
+   47   V   0.000   0.000   0.000   0.000   0.000
+   48   I   0.000   0.000   0.000   0.000   0.000
+   49   N   0.000   0.000   0.000   0.000   0.000
+   50   Q   0.000   0.000   0.000   0.000   0.000
+   51   E   0.000   0.000   0.000   0.000   0.000
+   52   E   0.000   0.000   0.000   0.000   0.000
+   53   L   0.000   0.000   0.000   0.000   0.000
+   54   Q   0.000   0.000   0.000   0.000   0.000
+   55   S   0.000   0.000   0.000   0.000   0.000
+   56   S   0.000   0.000   0.000   0.000   0.000
+   57   Q   0.000   0.000   0.000   0.000   0.000
+   58   Y   0.000   0.000   0.000   0.000   0.000
+   59   H   0.000   0.000   0.000   0.000   0.000
+   60   D   0.000   0.000   0.000   0.000   0.000
+   61   L   0.000   0.000   0.000   0.000   0.000
+   62   A   0.000   0.000   0.000   0.000   0.000
+   63   E   0.000   0.000   0.000   0.000   0.000
+   64   A   0.000   0.000   0.000   0.000   0.000
+   65   L   0.000   0.000   0.000   0.000   0.000
+   66   R   0.000   0.000   0.000   0.000   0.000
+   67   S   0.000   0.000   0.000   0.000   0.000
+   68   V   0.000   0.000   0.000   0.000   0.000
+   69   E   0.000   0.000   0.000   0.000   0.000
+   70   G   0.000   0.000   0.000   0.000   0.000
+
+
+>my_fasta_id
+Prediction: Signal peptide
+Signal peptide probability: 1.000
+Max cleavage site probability: 1.000 between pos. 22 and 23
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.for
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.for	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.for	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+
+seq1 = human.genomic, 5368 bp
+seq2 = est.for (>hs_est), 479 bp
+
+695-813  (1-119)   100% ->
+1377-1500  (120-243)   99% ->
+1797-1935  (244-382)   100% ->
+2084-2180  (383-479)   100%
+
+seq1 = human.genomic, 5368 bp
+seq2 = est.for (>hs_est), 479 bp
+
+695-813  (1-119)   100% ->
+1377-1500  (120-243)   99% ->
+1797-1935  (244-382)   100% ->
+2084-2180  (383-479)   100%

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.rev
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.rev	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sim4.for.rev	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+
+seq1 = human.genomic, 5368 bp
+seq2 = hn_est.rev (>REVCOMP), 479 bp
+
+(complement)
+695-813  (1-119)   100% ->
+1377-1500  (120-243)   99% ->
+1797-1935  (244-382)   100% ->
+2084-2180  (383-479)   100%

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sim4.rev
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sim4.rev	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sim4.rev	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,15 @@
+
+seq1 = /nfs/disk21/birney/prog/wise2/example/human.rev, 5368 bp
+seq2 = temp.cdna (>HSHNCPA1), 1198 bp
+
+(complement)
+486-503  (10-27)   83% ==
+1048-1117  (194-265)   93% <-
+1408-1563  (266-421)   100% <-
+2501-2575  (422-496)   100% <-
+2710-2802  (497-589)   100% <-
+2889-2981  (590-682)   100% <-
+3075-3285  (683-893)   100% <-
+3434-3580  (894-1040)   100% <-
+3876-3992  (1041-1157)   100% <-
+4556-4596  (1158-1198)   100%

Added: trunk/packages/bioperl/branches/upstream/current/t/data/singleNSsite.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/singleNSsite.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/singleNSsite.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,152 @@
+CODONML (in paml 3.15, November 2005)    test.phy   Model: One dN/dS ratio 
+Codon frequencies: F3x4
+Site-class models:  PositiveSelection
+ns =   3  ls =   6
+
+Codon usage in sequences
+--------------------------------------------------------------------------
+Phe TTT  1  1  1 | Ser TCT  1  1  0 | Tyr TAT  0  0  0 | Cys TGT  0  0  0
+    TTC  0  0  0 |     TCC  0  0  1 |     TAC  0  0  0 |     TGC  0  0  0
+Leu TTA  0  0  0 |     TCA  0  0  0 | *** TAA  0  0  0 | *** TGA  0  0  0
+    TTG  0  0  0 |     TCG  0  0  0 |     TAG  0  0  0 | Trp TGG  0  0  0
+--------------------------------------------------------------------------
+Leu CTT  0  0  0 | Pro CCT  0  0  0 | His CAT  1  1  1 | Arg CGT  0  0  0
+    CTC  0  0  0 |     CCC  0  0  1 |     CAC  0  0  0 |     CGC  0  0  0
+    CTA  0  0  0 |     CCA  1  1  0 | Gln CAA  0  0  0 |     CGA  0  0  0
+    CTG  0  0  0 |     CCG  0  0  0 |     CAG  0  0  0 |     CGG  0  0  0
+--------------------------------------------------------------------------
+Ile ATT  0  0  0 | Thr ACT  0  0  0 | Asn AAT  0  0  0 | Ser AGT  0  0  0
+    ATC  0  0  0 |     ACC  0  0  0 |     AAC  0  0  0 |     AGC  0  0  0
+    ATA  0  0  0 |     ACA  0  0  0 | Lys AAA  0  0  0 | Arg AGA  0  0  0
+Met ATG  2  1  1 |     ACG  0  1  1 |     AAG  0  0  0 |     AGG  0  0  0
+--------------------------------------------------------------------------
+Val GTT  0  0  0 | Ala GCT  0  0  0 | Asp GAT  0  0  0 | Gly GGT  0  0  0
+    GTC  0  0  0 |     GCC  0  0  0 |     GAC  0  0  0 |     GGC  0  0  0
+    GTA  0  0  0 |     GCA  0  0  0 | Glu GAA  0  0  0 |     GGA  0  0  0
+    GTG  0  0  0 |     GCG  0  0  0 |     GAG  0  0  0 |     GGG  0  0  0
+--------------------------------------------------------------------------
+
+Codon position x base (3x4) table for each sequence.
+
+#1: test0          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.50000    C:0.33333    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#2: test1          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.50000    C:0.00000    A:0.16667    G:0.33333
+
+#3: test2          
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.33333    C:0.50000    A:0.16667    G:0.00000
+position  3:    T:0.33333    C:0.33333    A:0.00000    G:0.33333
+
+Sums of codon usage counts
+------------------------------------------------------------------------------
+Phe F TTT       3 | Ser S TCT       2 | Tyr Y TAT       0 | Cys C TGT       0
+      TTC       0 |       TCC       1 |       TAC       0 |       TGC       0
+Leu L TTA       0 |       TCA       0 | *** * TAA       0 | *** * TGA       0
+      TTG       0 |       TCG       0 |       TAG       0 | Trp W TGG       0
+------------------------------------------------------------------------------
+Leu L CTT       0 | Pro P CCT       0 | His H CAT       3 | Arg R CGT       0
+      CTC       0 |       CCC       1 |       CAC       0 |       CGC       0
+      CTA       0 |       CCA       2 | Gln Q CAA       0 |       CGA       0
+      CTG       0 |       CCG       0 |       CAG       0 |       CGG       0
+------------------------------------------------------------------------------
+Ile I ATT       0 | Thr T ACT       0 | Asn N AAT       0 | Ser S AGT       0
+      ATC       0 |       ACC       0 |       AAC       0 |       AGC       0
+      ATA       0 |       ACA       0 | Lys K AAA       0 | Arg R AGA       0
+Met M ATG       4 |       ACG       2 |       AAG       0 |       AGG       0
+------------------------------------------------------------------------------
+Val V GTT       0 | Ala A GCT       0 | Asp D GAT       0 | Gly G GGT       0
+      GTC       0 |       GCC       0 |       GAC       0 |       GGC       0
+      GTA       0 |       GCA       0 | Glu E GAA       0 |       GGA       0
+      GTG       0 |       GCG       0 |       GAG       0 |       GGG       0
+------------------------------------------------------------------------------
+
+
+Codon position x base (3x4) table, overall
+
+position  1:    T:0.33333    C:0.33333    A:0.33333    G:0.00000
+position  2:    T:0.38889    C:0.44444    A:0.16667    G:0.00000
+position  3:    T:0.44444    C:0.11111    A:0.11111    G:0.33333
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+(Note: This matrix is not used in later m.l. analysis.
+Use runmode = -2 for ML pairwise comparison.)
+
+test0               
+test1               -1.0000 (0.0706 0.0000)
+test2                0.0510 (0.0706 1.3844) 0.0000 (0.0000 0.9745)
+
+
+TREE #  1:  (1, 2, 3);   MP score: 3
+check convergence..
+lnL(ntime:  3  np:  8):    -30.819156     +0.000000
+   4..1     4..2     4..3  
+  0.25573  0.00000  0.62424  5.28487  1.00000  0.00000  0.09213  1.00000
+
+Note: Branch length is defined as number of nucleotide substitutions per codon (not per neucleotide site).
+
+tree length =   0.87997
+
+(1: 0.255727, 2: 0.000004, 3: 0.624239);
+
+(test0: 0.255727, test1: 0.000004, test2: 0.624239);
+
+Detailed output identifying parameters
+
+kappa (ts/tv) =  5.28487
+
+
+dN/dS for site classes (K=3)
+
+p:   1.00000  0.00000  0.00000
+w:   0.09213  1.00000  1.00000
+
+dN & dS for each branch
+
+ branch           t        N        S    dN/dS       dN       dS   N*dN   S*dS
+
+   4..1       0.256     12.9      5.1   0.0921   0.0224   0.2429    0.3    1.2
+   4..2       0.000     12.9      5.1   0.0921   0.0000   0.0000    0.0    0.0
+   4..3       0.624     12.9      5.1   0.0921   0.0546   0.5930    0.7    3.0
+
+
+Naive Empirical Bayes (NEB) analysis
+Bayes Empirical Bayes (BEB) analysis (Yang, Wong & Nielsen 2005. Mol. Biol. Evol. 22:1107-1118)
+Positively selected sites (*: P>95%; **: P>99%)
+
+            Pr(w>1)     post mean +- SE for w
+
+
+
+
+The grid (see ternary graph for p0-p1)
+
+w0:   0.050  0.150  0.250  0.350  0.450  0.550  0.650  0.750  0.850  0.950
+w2:   1.500  2.500  3.500  4.500  5.500  6.500  7.500  8.500  9.500 10.500
+
+
+Posterior on the grid
+
+w0:   0.402  0.260  0.147  0.081  0.045  0.026  0.016  0.010  0.007  0.005
+w2:   0.138  0.114  0.104  0.098  0.095  0.093  0.091  0.090  0.089  0.088
+
+Posterior for p0-p1 (see the ternary graph)
+
+ 0.001
+ 0.001 0.002 0.003
+ 0.001 0.001 0.002 0.003 0.006
+ 0.000 0.001 0.001 0.002 0.004 0.006 0.010
+ 0.000 0.000 0.001 0.001 0.003 0.004 0.007 0.011 0.016
+ 0.000 0.000 0.001 0.001 0.002 0.003 0.005 0.008 0.012 0.017 0.025
+ 0.000 0.000 0.000 0.001 0.001 0.002 0.004 0.006 0.009 0.013 0.019 0.026 0.036
+ 0.000 0.000 0.000 0.000 0.001 0.001 0.003 0.004 0.006 0.009 0.014 0.019 0.027 0.037 0.049
+ 0.000 0.000 0.000 0.000 0.001 0.001 0.002 0.003 0.005 0.007 0.010 0.014 0.020 0.028 0.037 0.049 0.063
+ 0.000 0.000 0.000 0.000 0.000 0.001 0.001 0.002 0.003 0.005 0.007 0.010 0.015 0.020 0.027 0.036 0.046 0.061 0.075
+
+sum of density on p0-p1 =   1.000000

Added: trunk/packages/bioperl/branches/upstream/current/t/data/so.obo
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/so.obo	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/so.obo	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6424 @@
+format-version: 1.2
+date: 13:02:2006 15:53
+saved-by: kareneilbeck
+auto-generated-by: OBO-Edit 1.000-beta15
+subsetdef: SOFA "SO feature annotation"
+default-namespace: sequence
+remark: autogenerated-by\:   DAG-Edit version 1.417\nsaved-by\:       eilbeck\ndate\:         Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $
+
+[Term]
+id: SO:0000000
+name: Sequence_Ontology
+subset: SOFA
+
+[Term]
+id: SO:0000001
+name: region
+def: "Continuous sequence." [SO:ke]
+subset: SOFA
+synonym: "sequence" RELATED []
+is_a: SO:0000110 ! located_sequence_feature
+
+[Term]
+id: SO:0000002
+name: sequence_secondary_structure
+def: "A folded sequence." [SO:ke]
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000003
+name: G_quartet
+def: "G-quartets are unusual nucelic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/ ~ davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf]
+synonym: "G-quartet" RELATED []
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000004
+name: interior_coding_exon
+is_a: SO:0000195 ! coding_exon
+
+[Term]
+id: SO:0000005
+name: satellite_DNA
+def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000006
+name: PCR_product
+def: "A region amplified by a PCR reaction." [SO:ke]
+subset: SOFA
+synonym: "amplicon" RELATED []
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000007
+name: read_pair
+def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+relationship: part_of SO:0000149 ! contig
+
+[Term]
+id: SO:0000008
+name: gene_sensu_your_favorite_organism
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000009
+name: gene_class
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000010
+name: protein_coding_gene
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000011
+name: non_protein_coding_gene
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000012
+name: scRNA_primary_transcript
+def: "The primary transcript of any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html]
+synonym: "small_cytoplasmic_RNA" RELATED []
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000013
+name: scRNA
+def: "Any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000014
+name: INR_motif
+def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000015
+name: DPE_motif
+def: "A sequence element characteristic of some RNA polymerase II promoters; always found with the INR_motif. Positioned from +28 to +32 with respect to the TSS (+1). Consensus sequence (A|G)G(A|T)(C|T)(G|A|C). Required for TFIID binding to TATA-less promoters." [PMID:12651739]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000016
+name: BRE_motif
+def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000017
+name: PSE_motif
+def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000018
+name: linkage_group
+def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046]
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000019
+name: RNA_hairpin_loop
+is_a: SO:0000715 ! RNA_motif
+
+[Term]
+id: SO:0000020
+name: RNA_internal_loop
+is_a: SO:0000715 ! RNA_motif
+
+[Term]
+id: SO:0000021
+name: asymmetric_RNA_internal_loop
+is_a: SO:0000020 ! RNA_internal_loop
+
+[Term]
+id: SO:0000022
+name: A_minor_RNA_motif
+is_a: SO:0000021 ! asymmetric_RNA_internal_loop
+
+[Term]
+id: SO:0000023
+name: K_turn_RNA_motif
+is_a: SO:0000021 ! asymmetric_RNA_internal_loop
+
+[Term]
+id: SO:0000024
+name: Sarcin_like_RNA_motif
+is_a: SO:0000021 ! asymmetric_RNA_internal_loop
+
+[Term]
+id: SO:0000025
+name: symmetric_RNA_internal_loop
+is_a: SO:0000020 ! RNA_internal_loop
+
+[Term]
+id: SO:0000026
+name: RNA_junction_loop
+is_a: SO:0000715 ! RNA_motif
+
+[Term]
+id: SO:0000027
+name: RNA_hook_turn
+is_a: SO:0000026 ! RNA_junction_loop
+
+[Term]
+id: SO:0000028
+name: base_pair
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000029
+name: WC_base_pair
+def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293]
+synonym: "Watson_Crick_based_pair" RELATED []
+is_a: SO:0000028 ! base_pair
+
+[Term]
+id: SO:0000030
+name: sugar_edge_base_pair
+def: "A type of non-canonical base-pairing." [PMID:12177293]
+is_a: SO:0000028 ! base_pair
+
+[Term]
+id: SO:0000031
+name: aptamer
+def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:http://aptamer.icmb.utexas.edu]
+is_a: SO:0000351 ! synthetic_sequence
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000032
+name: DNA_aptamer
+def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu]
+is_a: SO:0000031 ! aptamer
+
+[Term]
+id: SO:0000033
+name: RNA_aptamer
+def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu]
+is_a: SO:0000031 ! aptamer
+
+[Term]
+id: SO:0000034
+name: morpholino
+def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http:www.gene-tools.com/Morpholinos/morpholinos.HTML]
+is_a: SO:0000351 ! synthetic_sequence
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000035
+name: riboswitch
+def: "Riboswitches are mRNAs that can act as direct sensors of small molecules to control their own expression. A riboswitch contains a cis element within mRNA, that can act as a direct sensor of metabolites without a protein intermediate." [PMID:2820954]
+is_a: SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000036
+name: matrix_attachment_site
+def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma]
+synonym: "MAR" RELATED []
+synonym: "SMAR" RELATED []
+synonym: "scaffold_attachment_site" RELATED []
+is_a: SO:0000626 ! chromosomal_regulatory_element
+
+[Term]
+id: SO:0000037
+name: locus_control_region
+def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma]
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000038
+name: match_set
+def: "A collection of match parts" [SO:ke]
+subset: SOFA
+is_a: SO:0000343 ! match
+
+[Term]
+id: SO:0000039
+name: match_part
+def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke]
+subset: SOFA
+is_a: SO:0000343 ! match
+relationship: part_of SO:0000038 ! match_set
+
+[Term]
+id: SO:0000040
+name: genomic_clone
+def: "A clone of a DNA region of a genome." [SO:ma]
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000041
+name: variation_operation
+def: "An operation that can be applied to a sequence, that results in a chnage." [SO:ke]
+is_a: SO:0000000 ! Sequence_Ontology
+
+[Term]
+id: SO:0000042
+name: pseudogene_attribute
+def: "An attribute of a pseudogene (SO:0000336)." [SO:ma]
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000043
+name: processed_pseudogene
+synonym: "pseudogene_by_reverse_transcription" RELATED []
+is_a: SO:0000042 ! pseudogene_attribute
+
+[Term]
+id: SO:0000044
+name: pseudogene_by_unequal_crossing_over
+is_a: SO:0000042 ! pseudogene_attribute
+
+[Term]
+id: SO:0000045
+name: delete
+def: "To remove a subsection of sequence." [SO:ke]
+is_a: SO:0000041 ! variation_operation
+
+[Term]
+id: SO:0000046
+name: insert
+def: "To insert a subsection of sequence." [SO:ke]
+is_a: SO:0000041 ! variation_operation
+
+[Term]
+id: SO:0000047
+name: invert
+def: "To invert a subsection of sequence." [SO:ke]
+is_a: SO:0000041 ! variation_operation
+
+[Term]
+id: SO:0000048
+name: substitute
+def: "To substitute a subsection of sequence for another." [SO:ke]
+is_a: SO:0000041 ! variation_operation
+
+[Term]
+id: SO:0000049
+name: translocate
+def: "To translocate a subsection of sequence." [SO:ke]
+is_a: SO:0000041 ! variation_operation
+
+[Term]
+id: SO:0000050
+name: gene_part
+def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke]
+subset: SOFA
+is_obsolete: true
+
+[Term]
+id: SO:0000051
+name: probe
+def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma]
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000052
+name: assortment_derived_deficiency
+synonym: "assortment-derived_deficiency" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:0000053
+name: mutation_affecting_regulatory_region
+is_a: SO:1000132 ! consequences_of_mutation
+
+[Term]
+id: SO:0000054
+name: aneuploid
+is_a: SO:1000182 ! chromosome_number_variation
+
+[Term]
+id: SO:0000055
+name: hyperploid
+is_a: SO:0000054 ! aneuploid
+
+[Term]
+id: SO:0000056
+name: hypoploid
+is_a: SO:0000054 ! aneuploid
+
+[Term]
+id: SO:0000057
+name: operator
+def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma]
+subset: SOFA
+is_a: SO:0000752 ! gene_group_regulatory_region
+
+[Term]
+id: SO:0000058
+name: assortment_derived_aneuploid
+synonym: "assortment-derived_aneuploid" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:0000059
+name: nuclease_binding_site
+is_a: SO:0000410 ! protein_binding_site
+
+[Term]
+id: SO:0000060
+name: compound_chromosome_arm
+is_a: SO:1000042 ! compound_chromosome
+
+[Term]
+id: SO:0000061
+name: restriction_enzyme_binding_site
+is_a: SO:0000059 ! nuclease_binding_site
+
+[Term]
+id: SO:0000062
+name: deficient_intrachromosomal_transposition
+is_a: SO:1000041 ! intrachromosomal_transposition
+
+[Term]
+id: SO:0000063
+name: deficient_interchromosomal_transposition
+is_a: SO:1000155 ! interchromosomal_transposition
+
+[Term]
+id: SO:0000064
+name: gene_by_transcript_attribute
+comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004.
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000065
+name: free_chromosome_arm
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:0000066
+name: gene_by_polyadenylation_attribute
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000067
+name: gene_to_gene_feature
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000068
+name: overlapping_gene
+is_a: SO:0000067 ! gene_to_gene_feature
+
+[Term]
+id: SO:0000069
+name: gene_included_within_intron
+is_a: SO:0000068 ! overlapping_gene
+
+[Term]
+id: SO:0000070
+name: gene_included_within_intron_antiparallel
+is_a: SO:0000069 ! gene_included_within_intron
+
+[Term]
+id: SO:0000071
+name: gene_included_within_intron_parallel
+is_a: SO:0000069 ! gene_included_within_intron
+
+[Term]
+id: SO:0000072
+name: end_overlapping_gene
+is_a: SO:0000068 ! overlapping_gene
+
+[Term]
+id: SO:0000073
+name: end_overlapping_gene_five_primethree_prime_overlap
+is_a: SO:0000072 ! end_overlapping_gene
+
+[Term]
+id: SO:0000074
+name: end_overlapping_gene_five_primefive_prime_overlap
+is_a: SO:0000072 ! end_overlapping_gene
+
+[Term]
+id: SO:0000075
+name: end_overlapping_gene_three_primethree_prime_overlap
+is_a: SO:0000072 ! end_overlapping_gene
+
+[Term]
+id: SO:0000076
+name: end_overlapping_gene_three_primefive_prime_overlap
+is_a: SO:0000072 ! end_overlapping_gene
+
+[Term]
+id: SO:0000077
+name: antisense_gene
+is_a: SO:0000068 ! overlapping_gene
+
+[Term]
+id: SO:0000078
+name: polycistronic_transcript
+is_a: SO:0000115 ! transcript_feature
+
+[Term]
+id: SO:0000079
+name: dicistronic_transcript
+is_a: SO:0000078 ! polycistronic_transcript
+
+[Term]
+id: SO:0000080
+name: member_of_operon
+is_a: SO:0000081 ! member_gene_array
+
+[Term]
+id: SO:0000081
+name: member_gene_array
+is_a: SO:0000067 ! gene_to_gene_feature
+
+[Term]
+id: SO:0000082
+name: processed_transcript_attribute
+is_a: SO:0000237 ! transcript_attribute
+
+[Term]
+id: SO:0000083
+name: macronuclear_sequence_feature
+is_a: SO:0000735 ! sequence_location
+
+[Term]
+id: SO:0000084
+name: micronuclear_sequence_feature
+is_a: SO:0000735 ! sequence_location
+
+[Term]
+id: SO:0000085
+name: gene_by_genome_location
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000086
+name: gene_by_organelle_of_genome
+is_a: SO:0000085 ! gene_by_genome_location
+
+[Term]
+id: SO:0000087
+name: nuclear_gene
+is_a: SO:0000086 ! gene_by_organelle_of_genome
+
+[Term]
+id: SO:0000088
+name: mt_gene
+synonym: "mitochondrial_gene" RELATED []
+is_a: SO:0000086 ! gene_by_organelle_of_genome
+
+[Term]
+id: SO:0000089
+name: kinetoplast_gene
+is_a: SO:0000088 ! mt_gene
+
+[Term]
+id: SO:0000090
+name: plastid_gene
+is_a: SO:0000086 ! gene_by_organelle_of_genome
+
+[Term]
+id: SO:0000091
+name: apicoplast_gene
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000092
+name: ct_gene
+synonym: "chloroplast_gene" RELATED []
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000093
+name: chromoplast_gene
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000094
+name: cyanelle_gene
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000095
+name: leucoplast_gene
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000096
+name: proplastid_gene
+is_a: SO:0000090 ! plastid_gene
+
+[Term]
+id: SO:0000097
+name: nucleomorph_gene
+is_a: SO:0000086 ! gene_by_organelle_of_genome
+
+[Term]
+id: SO:0000098
+name: plasmid_gene
+is_a: SO:0000085 ! gene_by_genome_location
+
+[Term]
+id: SO:0000099
+name: proviral_gene
+is_a: SO:0000085 ! gene_by_genome_location
+
+[Term]
+id: SO:0000100
+name: endogenous_retroviral_gene
+is_a: SO:0000099 ! proviral_gene
+
+[Term]
+id: SO:0000101
+name: transposable_element
+def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html]
+subset: SOFA
+is_a: SO:0000187 ! repeat_family
+is_a: SO:1000028 ! intrachromosomal_mutation
+
+[Term]
+id: SO:0000102
+name: expressed_sequence_match
+def: "A match to an EST or cDNA sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000347 ! nucleotide_match
+
+[Term]
+id: SO:0000103
+name: clone_insert_end
+def: "The end of the clone insert." [SO:ke]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000753 ! clone_insert
+
+[Term]
+id: SO:0000104
+name: polypeptide
+def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversable denaturation." [SO:ma]
+subset: SOFA
+relationship: derives_from SO:0000316 ! CDS
+
+[Term]
+id: SO:0000105
+name: chromosome_arm
+def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm]
+relationship: part_of SO:0000340 ! chromosome
+
+[Term]
+id: SO:0000106
+name: non_capped_primary_transcript
+is_a: SO:0000146 ! primary_transcript_by_cap_class
+
+[Term]
+id: SO:0000107
+name: sequencing_primer
+is_a: SO:0000112 ! primer
+
+[Term]
+id: SO:0000108
+name: mRNA_with_frameshift
+is_a: SO:0000082 ! processed_transcript_attribute
+
+[Term]
+id: SO:0000109
+name: sequence_variant
+def: "A region of sequence where variation has been observed." [SO:ke]
+subset: SOFA
+synonym: "mutation" RELATED []
+is_a: SO:0000110 ! located_sequence_feature
+
+[Term]
+id: SO:0000110
+name: located_sequence_feature
+def: "A biological feature that can be attributed to a region of biological sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000000 ! Sequence_Ontology
+
+[Term]
+id: SO:0000111
+name: transposable_element_gene
+def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transpable element genes of the TY element in yeast." [SO:ke]
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000112
+name: primer
+def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html]
+subset: SOFA
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000113
+name: integrated_virus
+def: "A viral sequence which has integrated into the host genome." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000114
+name: methylated_C
+def: "A methylated deoxy-cytosine." [SO:ke]
+subset: SOFA
+is_a: SO:0000306 ! methylated_base_feature
+
+[Term]
+id: SO:0000115
+name: transcript_feature
+is_a: SO:0000237 ! transcript_attribute
+
+[Term]
+id: SO:0000116
+name: edited_transcript
+def: "A gene whose transcript is edited." [http://www.rna.ucla.edu/]
+is_a: SO:0000115 ! transcript_feature
+
+[Term]
+id: SO:0000117
+name: transcript_with_readthrough_stop_codon
+is_obsolete: true
+
+[Term]
+id: SO:0000118
+name: transcript_with_translational_frameshift
+is_a: SO:1001261 ! recoded_mRNA
+
+[Term]
+id: SO:0000119
+name: gene_by_class_of_regulation
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000120
+name: protein_coding_primary_transcript
+def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke]
+comment: May contain introns
+subset: SOFA
+synonym: "pre-mRNA" RELATED []
+is_a: SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000121
+name: forward_primer
+def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php]
+is_a: SO:0000112 ! primer
+
+[Term]
+id: SO:0000122
+name: RNA_sequence_secondary_structure
+def: "A folded RNA sequence." [SO:ke]
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000123
+name: transcriptionally_regulated
+def: "." [SO:ma]
+comment: by\:<protein_id>
+is_a: SO:0000119 ! gene_by_class_of_regulation
+
+[Term]
+id: SO:0000124
+name: transcriptionally_constitutive
+is_a: SO:0000123 ! transcriptionally_regulated
+
+[Term]
+id: SO:0000125
+name: transcriptionally_induced
+is_a: SO:0000123 ! transcriptionally_regulated
+
+[Term]
+id: SO:0000126
+name: transcriptionally_repressed
+is_a: SO:0000123 ! transcriptionally_regulated
+
+[Term]
+id: SO:0000127
+name: silenced_gene
+is_a: SO:0000126 ! transcriptionally_repressed
+
+[Term]
+id: SO:0000128
+name: gene_silenced_by_DNA_modification
+is_a: SO:0000127 ! silenced_gene
+
+[Term]
+id: SO:0000129
+name: gene_silenced_by_DNA_methylation
+is_a: SO:0000128 ! gene_silenced_by_DNA_modification
+
+[Term]
+id: SO:0000130
+name: post_translationally_regulated
+synonym: "post-translationally_regulated" RELATED []
+is_a: SO:0000119 ! gene_by_class_of_regulation
+
+[Term]
+id: SO:0000131
+name: translationally_regulated
+is_a: SO:0000119 ! gene_by_class_of_regulation
+
+[Term]
+id: SO:0000132
+name: reverse_primer
+def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php]
+is_a: SO:0000112 ! primer
+
+[Term]
+id: SO:0000133
+name: gene_by_epigenetic_modification
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000134
+name: imprinted
+is_a: SO:0000119 ! gene_by_class_of_regulation
+is_a: SO:0000133 ! gene_by_epigenetic_modification
+
+[Term]
+id: SO:0000135
+name: maternally_imprinted
+is_a: SO:0000134 ! imprinted
+
+[Term]
+id: SO:0000136
+name: paternally_imprinted
+is_a: SO:0000134 ! imprinted
+
+[Term]
+id: SO:0000137
+name: allelically_excluded
+is_a: SO:0000133 ! gene_by_epigenetic_modification
+
+[Term]
+id: SO:0000138
+name: gene_rearranged_at_DNA_level
+is_a: SO:0000133 ! gene_by_epigenetic_modification
+
+[Term]
+id: SO:0000139
+name: ribosome_entry_site
+def: "Region in mRNA where ribosome assembles." [SO:ke]
+comment: gene\:<gene_id>
+subset: SOFA
+relationship: part_of SO:0000203 ! UTR
+
+[Term]
+id: SO:0000140
+name: attenuator
+def: "A sequence segment located between the promoter and a structural gene that causes partial termination of transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0000752 ! gene_group_regulatory_region
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000141
+name: terminator
+def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0000752 ! gene_group_regulatory_region
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000142
+name: DNA_sequence_secondary_structure
+def: "A folded DNA sequence." [SO:ke]
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000143
+name: assembly_component
+def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000144
+name: primary_transcript_attribute
+is_a: SO:0000237 ! transcript_attribute
+
+[Term]
+id: SO:0000145
+name: recoded_codon
+is_a: SO:0000360 ! codon
+
+[Term]
+id: SO:0000146
+name: primary_transcript_by_cap_class
+is_a: SO:0000144 ! primary_transcript_attribute
+
+[Term]
+id: SO:0000147
+name: exon
+def: "A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+relationship: part_of SO:0000673 ! transcript
+
+[Term]
+id: SO:0000148
+name: supercontig
+def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls]
+subset: SOFA
+synonym: "scaffold" RELATED []
+is_a: SO:0000353 ! assembly
+relationship: part_of SO:0000719 ! ultracontig
+
+[Term]
+id: SO:0000149
+name: contig
+def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+is_a: SO:0000353 ! assembly
+relationship: part_of SO:0000148 ! supercontig
+
+[Term]
+id: SO:0000150
+name: read
+def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+relationship: part_of SO:0000149 ! contig
+
+[Term]
+id: SO:0000151
+name: clone
+def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/]
+subset: SOFA
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000152
+name: YAC
+def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma]
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000760 ! YAC_clone
+
+[Term]
+id: SO:0000153
+name: BAC
+def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma]
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000764 ! BAC_clone
+
+[Term]
+id: SO:0000154
+name: PAC
+def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm]
+synonym: "P1" RELATED []
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000762 ! PAC_clone
+
+[Term]
+id: SO:0000155
+name: plasmid
+def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma]
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000156
+name: cosmid
+def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as aplasmids or packaged as a phage,since they retain the lambda cos sites." [SO:ma]
+comment: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1)\:9-20.
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000765 ! cosmid_clone
+
+[Term]
+id: SO:0000157
+name: phagemid
+def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma]
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000761 ! phagemid_clone
+
+[Term]
+id: SO:0000158
+name: fosmid
+def: "A cloning vector that utilises the E. coli F factor." [SO:ma]
+comment: Birren BW et al. A human chromosome 22 fosmid resource\: mapping and analysis of 96 clones. Genomics 1996;
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000763 ! fosmid_clone
+
+[Term]
+id: SO:0000159
+name: deletion
+def: "The sequence that is deleted." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+is_a: SO:0000109 ! sequence_variant
+relationship: sequence_of SO:0000045 ! delete
+
+[Term]
+id: SO:0000160
+name: lambda_clone
+def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8]
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000161
+name: methylated_A
+def: "A methylated adenine." [SO:ke]
+subset: SOFA
+is_a: SO:0000250 ! modified_RNA_base_feature
+is_a: SO:0000306 ! methylated_base_feature
+
+[Term]
+id: SO:0000162
+name: splice_site
+def: "The position where intron is excised." [SO:ke]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000163
+name: splice_donor_site
+def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html]
+subset: SOFA
+synonym: "donor" RELATED []
+synonym: "donor_splice_site" RELATED []
+is_a: SO:0000162 ! splice_site
+
+[Term]
+id: SO:0000164
+name: splice_acceptor_site
+def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html]
+subset: SOFA
+synonym: "acceptor" RELATED []
+synonym: "acceptor_splice_site" RELATED []
+is_a: SO:0000162 ! splice_site
+
+[Term]
+id: SO:0000165
+name: enhancer
+def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000166
+name: enhancer_by_bound_factor
+is_a: SO:0000402 ! enhancer_attribute
+
+[Term]
+id: SO:0000167
+name: promoter
+def: "The region on a DNA molecule involved in RNA polymerase binding to initiate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000168
+name: restriction_enzyme_cut_site
+def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma]
+is_obsolete: true
+
+[Term]
+id: SO:0000169
+name: RNApol_I_promoter
+def: "A DNA sequence sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke]
+synonym: "RNA_polymerase_A_promoter" RELATED []
+is_a: SO:0000167 ! promoter
+
+[Term]
+id: SO:0000170
+name: RNApol_II_promoter
+def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke]
+synonym: "RNA_polymerase_B_promoter" RELATED []
+is_a: SO:0000167 ! promoter
+
+[Term]
+id: SO:0000171
+name: RNApol_III_promoter
+def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke]
+synonym: "RNA_polymerase_C_promoter" RELATED []
+is_a: SO:0000167 ! promoter
+
+[Term]
+id: SO:0000172
+name: CAAT_signal
+def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000173
+name: GC_rich_region
+def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "GC-rich_region" RELATED []
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+
+[Term]
+id: SO:0000174
+name: TATA_box
+def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+relationship: part_of SO:0000170 ! RNApol_II_promoter
+relationship: part_of SO:0000171 ! RNApol_III_promoter
+
+[Term]
+id: SO:0000175
+name: minus_10_signal
+def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "-10_signal" RELATED []
+synonym: "Pribnow_box" RELATED []
+relationship: part_of SO:0000613 ! bacterial_RNApol_promoter
+
+[Term]
+id: SO:0000176
+name: minus_35_signal
+def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "-35_signal" RELATED []
+relationship: part_of SO:0000613 ! bacterial_RNApol_promoter
+
+[Term]
+id: SO:0000177
+name: cross_genome_match
+def: "A nucleotide match against a sequence from another organism." [SO:ma]
+subset: SOFA
+is_a: SO:0000347 ! nucleotide_match
+
+[Term]
+id: SO:0000178
+name: operon
+def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma]
+subset: SOFA
+is_a: SO:0005855 ! gene_group
+
+[Term]
+id: SO:0000179
+name: clone_insert_start
+def: "The start of the clone insert." [SO:ke]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000753 ! clone_insert
+
+[Term]
+id: SO:0000180
+name: retrotransposon
+def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R]
+is_a: SO:0000101 ! transposable_element
+
+[Term]
+id: SO:0000181
+name: translated_nucleotide_match
+def: "A match against a translated sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000347 ! nucleotide_match
+
+[Term]
+id: SO:0000182
+name: DNA_transposon
+def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke]
+is_a: SO:0000101 ! transposable_element
+
+[Term]
+id: SO:0000183
+name: non_transcribed_region
+def: "A region of the gene which is not transcribed." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+relationship: part_of SO:0000704 ! gene
+
+[Term]
+id: SO:0000184
+name: U2_intron
+def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511]
+comment: May have either GT-AG or AT-AG 5' and 3' boundaries.
+is_a: SO:0000662 ! spliceosomal_intron
+
+[Term]
+id: SO:0000185
+name: primary_transcript
+def: "The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+synonym: "precursor_RNA" RELATED []
+is_a: SO:0000673 ! transcript
+
+[Term]
+id: SO:0000186
+name: LTR_retrotransposon
+def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke]
+is_a: SO:0000180 ! retrotransposon
+
+[Term]
+id: SO:0000187
+name: repeat_family
+def: "A group of characterized repeat sequences." [SO:ke]
+subset: SOFA
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000188
+name: intron
+def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+relationship: part_of SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000189
+name: non_LTR_retrotransposon
+def: "A retrotransposon without long terminal repeat sequences." [SO:ke]
+is_a: SO:0000180 ! retrotransposon
+
+[Term]
+id: SO:0000190
+name: five_prime_intron
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000191
+name: interior_intron
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000192
+name: three_prime_intron
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000193
+name: RFLP_fragment
+def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908]
+subset: SOFA
+synonym: "restriction_fragment_length_polymorphism" RELATED []
+is_a: SO:0000412 ! restriction_fragment
+
+[Term]
+id: SO:0000194
+name: LINE_element
+def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http:www.ucl.ac.uk/~ucbhjow/b241/glossary.html]
+synonym: "Long interspersed element" RELATED []
+synonym: "Long interspersed nuclear element" RELATED []
+is_a: SO:0000189 ! non_LTR_retrotransposon
+
+[Term]
+id: SO:0000195
+name: coding_exon
+def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke]
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0000196
+name: five_prime_exon_coding_region
+def: "The sequence of the 5' exon that encodes for protein." [SO:ke]
+is_a: SO:0000195 ! coding_exon
+relationship: part_of SO:0000200 ! five_prime_coding_exon
+
+[Term]
+id: SO:0000197
+name: three_prime_exon_coding_region
+def: "The sequence of the 3' exon that encodes for protein." [SO:ke]
+is_a: SO:0000195 ! coding_exon
+relationship: part_of SO:0000202 ! three_prime_coding_exon
+
+[Term]
+id: SO:0000198
+name: noncoding_exon
+def: "An exon that does not contain any codons." [SO:ke]
+synonym: "noncoding_exon" RELATED []
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0000199
+name: translocation
+def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke]
+relationship: sequence_of SO:0000049 ! translocate
+
+[Term]
+id: SO:0000200
+name: five_prime_coding_exon
+def: "The 5' most coding exon." [SO:ke]
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0000201
+name: interior_exon
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0000202
+name: three_prime_coding_exon
+def: "The exon that is most 3-prime on a given transcript." [SO:ma]
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0000203
+name: UTR
+def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke]
+subset: SOFA
+synonym: "untranslated_region" RELATED []
+relationship: part_of SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000204
+name: five_prime_UTR
+def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+synonym: "five_prime_untranslated_region" RELATED []
+is_a: SO:0000203 ! UTR
+
+[Term]
+id: SO:0000205
+name: three_prime_UTR
+def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+synonym: "three_prime_untranslated_region" RELATED []
+is_a: SO:0000203 ! UTR
+
+[Term]
+id: SO:0000206
+name: SINE_element
+def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke]
+synonym: "Short interspersed element" RELATED []
+synonym: "Short interspersed nuclear element" RELATED []
+is_a: SO:0000189 ! non_LTR_retrotransposon
+
+[Term]
+id: SO:0000207
+name: simple_sequence_length_polymorphism
+is_a: SO:0000248 ! sequence_length_variation
+
+[Term]
+id: SO:0000208
+name: terminal_inverted_repeat_element
+def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http:www.genetics.org/cgi/reprint/156/4/1983.pdf]
+is_a: SO:0000182 ! DNA_transposon
+
+[Term]
+id: SO:0000209
+name: rRNA_primary_transcript
+def: "A primary transcript encoding a ribosomal RNA." [SO:ke]
+synonym: "ribosomal_RNA_primary_transcript" RELATED []
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000210
+name: tRNA_primary_transcript
+def: "A primary transcript encoding a transfer RNA (SO:0000253.)" [SO:ke]
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000211
+name: alanine_tRNA_primary_transcript
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000212
+name: arginine_tRNA_primary_transcript
+def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000213
+name: asparagine_tRNA_primary_transcript
+def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000214
+name: aspartic_acid_tRNA_primary_transcript
+def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000215
+name: cysteine_tRNA_primary_transcript
+def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000216
+name: glutamic_acid_tRNA_primary_transcript
+def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000217
+name: glutamine_tRNA_primary_transcript
+def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000218
+name: glycine_tRNA_primary_transcript
+def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000219
+name: histidine_tRNA_primary_transcript
+def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000220
+name: isoleucine_tRNA_primary_transcript
+def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000221
+name: leucine_tRNA_primary_transcript
+def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000222
+name: lysine_tRNA_primary_transcript
+def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000223
+name: methionine_tRNA_primary_transcript
+def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000224
+name: phenylalanine_tRNA_primary_transcript
+def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000225
+name: proline_tRNA_primary_transcript
+def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000226
+name: serine_tRNA_primary_transcript
+def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000227
+name: threonine_tRNA_primary_transcript
+def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000228
+name: tryptophan_tRNA_primary_transcript
+def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000229
+name: tyrosine_tRNA_primary_transcript
+def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000230
+name: valine_tRNA_primary_transcript
+def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0000231
+name: snRNA_primary_transcript
+def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke]
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000232
+name: snoRNA_primary_transcript
+def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke]
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000233
+name: processed_transcript
+def: "A transcript which has undergone processing to remove parts such as introns and transcribed_spacer_regions." [SO:ke]
+comment: A processed transcript cannot contain introns.
+subset: SOFA
+is_a: SO:0000673 ! transcript
+relationship: derives_from SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000234
+name: mRNA
+def: "Messenger RNA is the intermediate molecule between DNA and protein. It  includes UTR and coding sequences. It does not contain introns." [SO:ma]
+comment: mRNA does not contain introns as it is a processd_transcript.nThe equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns.
+subset: SOFA
+synonym: "messenger_RNA" RELATED []
+is_a: SO:0000233 ! processed_transcript
+
+[Term]
+id: SO:0000235
+name: TF_binding_site
+def: "A region of a molecule that binds to a transcription factor." [SO:ke]
+subset: SOFA
+synonym: "transcription_factor_binding_site" RELATED []
+is_a: SO:0000410 ! protein_binding_site
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000236
+name: ORF
+def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER" [SO:ma, SO:rb]
+comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised.
+subset: SOFA
+synonym: "open_reading_frame" RELATED []
+is_a: SO:0000717 ! reading_frame
+
+[Term]
+id: SO:0000237
+name: transcript_attribute
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000238
+name: foldback_element
+def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats" [http:www.genetics.org/cgi/reprint/156/4/1983.pdf]
+synonym: "LVR element" RELATED []
+synonym: "long inverted repeat element" RELATED []
+is_a: SO:0000182 ! DNA_transposon
+
+[Term]
+id: SO:0000239
+name: flanking_region
+def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000240
+name: chromosome_variation
+is_a: SO:0000000 ! Sequence_Ontology
+
+[Term]
+id: SO:0000241
+name: internal_UTR
+is_a: SO:0000203 ! UTR
+
+[Term]
+id: SO:0000242
+name: untranslated_region_polyicistronic_mRNA
+def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke]
+is_a: SO:0000203 ! UTR
+
+[Term]
+id: SO:0000243
+name: internal_ribosome_entry_site
+def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke]
+synonym: "IRES" RELATED []
+is_a: SO:0000139 ! ribosome_entry_site
+
+[Term]
+id: SO:0000244
+name: four_cutter_restriction_site
+synonym: "4-cutter_restriction_site" RELATED []
+synonym: "four-cutter_restriction_sit" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:0000245
+name: mRNA_by_polyadenylation_status
+is_a: SO:0000082 ! processed_transcript_attribute
+
+[Term]
+id: SO:0000246
+name: mRNA_polyadenylated
+is_a: SO:0000245 ! mRNA_by_polyadenylation_status
+
+[Term]
+id: SO:0000247
+name: mRNA_not_polyadenylated
+is_a: SO:0000245 ! mRNA_by_polyadenylation_status
+
+[Term]
+id: SO:0000248
+name: sequence_length_variation
+is_a: SO:1000002 ! substitution
+
+[Term]
+id: SO:0000249
+name: six_cutter_restriction_site
+synonym: "6-cutter_restriction_site" RELATED []
+synonym: "six-cutter_restriction_site" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:0000250
+name: modified_RNA_base_feature
+def: "A post_transcriptionally modified base." [SO:ke]
+relationship: part_of SO:0000673 ! transcript
+
+[Term]
+id: SO:0000251
+name: eight_cutter_restriction_site
+synonym: "8-cutter_restriction_site" RELATED []
+synonym: "eight-cutter_restriction_site" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:0000252
+name: rRNA
+def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, ISBN:0198506732]
+subset: SOFA
+synonym: "ribsomal_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000253
+name: tRNA
+def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732]
+subset: SOFA
+synonym: "transfer_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000254
+name: alanyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000255
+name: rRNA_small_subunit_primary_transcript
+def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke]
+is_a: SO:0000209 ! rRNA_primary_transcript
+
+[Term]
+id: SO:0000256
+name: asparaginyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000257
+name: aspartyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000258
+name: cysteinyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000259
+name: glutaminyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000260
+name: glutamyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000261
+name: glycyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000262
+name: histidyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000263
+name: isoleucyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000264
+name: leucyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000265
+name: lysyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000266
+name: methionyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000267
+name: phenylalanyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000268
+name: prolyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000269
+name: seryl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000270
+name: threonyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000271
+name: tryptophanyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000272
+name: tyrosyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000273
+name: valyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000274
+name: snRNA
+def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing" [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, PMID:11733745]
+subset: SOFA
+synonym: "small_nuclear_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000275
+name: snoRNA
+def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012]
+subset: SOFA
+synonym: "small_nucleolar_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000276
+name: miRNA
+def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors." [PMID:12592000]
+subset: SOFA
+synonym: "micro_RNA" RELATED []
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000277
+name: transcript_by_bound_factor
+is_a: SO:0000237 ! transcript_attribute
+
+[Term]
+id: SO:0000278
+name: transcript_by_bound_nucleic_acid
+is_a: SO:0000277 ! transcript_by_bound_factor
+
+[Term]
+id: SO:0000279
+name: transcript_by_bound_protein
+is_a: SO:0000277 ! transcript_by_bound_factor
+
+[Term]
+id: SO:0000280
+name: engineered_gene
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000281
+name: engineered_foreign_gene
+is_a: SO:0000280 ! engineered_gene
+is_a: SO:0000285 ! foreign_gene
+
+[Term]
+id: SO:0000282
+name: mRNA_with_minus_1_frameshift
+is_a: SO:0000108 ! mRNA_with_frameshift
+
+[Term]
+id: SO:0000283
+name: engineered_foreign_transposable_element_gene
+is_a: SO:0000111 ! transposable_element_gene
+is_a: SO:0000280 ! engineered_gene
+
+[Term]
+id: SO:0000284
+name: type_I_enzyme_restriction_site
+def: "The recognition site is bipartate and interupted." [http://www.promega.com]
+is_obsolete: true
+
+[Term]
+id: SO:0000285
+name: foreign_gene
+is_a: SO:0000452 ! transgene
+
+[Term]
+id: SO:0000286
+name: long_terminal_repeat
+def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "LTR" RELATED []
+synonym: "direct_terminal _repeat" RELATED []
+is_a: SO:0000657 ! repeat_region
+relationship: part_of SO:0000186 ! LTR_retrotransposon
+
+[Term]
+id: SO:0000287
+name: fusion_gene
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000288
+name: engineered_fusion_gene
+is_a: SO:0000280 ! engineered_gene
+is_a: SO:0000287 ! fusion_gene
+
+[Term]
+id: SO:0000289
+name: microsatellite
+def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml]
+subset: SOFA
+is_a: SO:0000705 ! tandem_repeat
+
+[Term]
+id: SO:0000290
+name: dinucleotide_repeat_microsatellite_feature
+is_a: SO:0000289 ! microsatellite
+
+[Term]
+id: SO:0000291
+name: trinucleotide_repeat_microsatellite_feature
+is_a: SO:0000289 ! microsatellite
+
+[Term]
+id: SO:0000292
+name: repetitive_element
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000293
+name: engineered_foreign_repetitive_element
+is_a: SO:0000280 ! engineered_gene
+is_a: SO:0000292 ! repetitive_element
+
+[Term]
+id: SO:0000294
+name: inverted_repeat
+def: "The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC." [SO:ke]
+subset: SOFA
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000295
+name: U12_intron
+def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511]
+comment: May have either GT-AC or AT-AC 5' and 3' boundaries.
+is_a: SO:0000662 ! spliceosomal_intron
+
+[Term]
+id: SO:0000296
+name: origin_of_replication
+def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000297
+name: D_loop
+def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+is_a: SO:0000296 ! origin_of_replication
+
+[Term]
+id: SO:0000298
+name: recombination_feature
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000299
+name: specific_recombination_site
+is_a: SO:0000669 ! sequence_rearrangement_feature
+
+[Term]
+id: SO:0000300
+name: recombination_feature_of_rearranged_gene
+is_a: SO:0000299 ! specific_recombination_site
+
+[Term]
+id: SO:0000301
+name: recombination_feature_of_vertebrate_immune_system_gene
+is_a: SO:0000300 ! recombination_feature_of_rearranged_gene
+
+[Term]
+id: SO:0000302
+name: J_gene_recombination_feature
+def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-RS]
+synonym: "J-RS" RELATED []
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000303
+name: clip
+def: "Part of the primary transcript that is clipped off during processing." [SO:ke]
+subset: SOFA
+relationship: part_of SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000304
+name: type_II_enzyme_restriction_site
+def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com]
+is_obsolete: true
+
+[Term]
+id: SO:0000305
+name: modified_base_site
+def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+comment: modified base\:<modified_base>
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000306
+name: methylated_base_feature
+def: "A nucleotide modified by methylation." [SO:ke]
+subset: SOFA
+is_a: SO:0000305 ! modified_base_site
+
+[Term]
+id: SO:0000307
+name: CpG_island
+def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd]
+subset: SOFA
+synonym: "CG_island" RELATED []
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000308
+name: sequence_feature_locating_method
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000309
+name: computed_feature
+is_a: SO:0000308 ! sequence_feature_locating_method
+
+[Term]
+id: SO:0000310
+name: predicted_ab_initio_computation
+is_a: SO:0000309 ! computed_feature
+
+[Term]
+id: SO:0000311
+name: computed_feature_by_similarity
+def: "." [SO:ma]
+comment: similar to\:<sequence_id>
+is_a: SO:0000309 ! computed_feature
+
+[Term]
+id: SO:0000312
+name: experimentally_determined_feature
+is_a: SO:0000308 ! sequence_feature_locating_method
+
+[Term]
+id: SO:0000313
+name: stem_loop
+def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000314
+name: direct_repeat
+def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke]
+subset: SOFA
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000315
+name: transcription_start_site
+def: "The site where transcription begins." [SO:ke]
+subset: SOFA
+synonym: "TSS" RELATED []
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000316
+name: CDS
+def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma]
+subset: SOFA
+synonym: "coding_sequence" RELATED []
+relationship: part_of SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000317
+name: cDNA_clone
+def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html]
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000318
+name: start_codon
+def: "First codon to be translated by a ribosome." [SO:ke]
+synonym: "initiation codon" RELATED []
+is_a: SO:0000360 ! codon
+
+[Term]
+id: SO:0000319
+name: stop_codon
+def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke]
+is_a: SO:0000360 ! codon
+
+[Term]
+id: SO:0000320
+name: intronic_splice_enhancer
+def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke]
+is_a: SO:0000344 ! splice_enhancer
+relationship: part_of SO:0000662 ! spliceosomal_intron
+
+[Term]
+id: SO:0000321
+name: mRNA_with_plus_1_frameshift
+is_a: SO:0000108 ! mRNA_with_frameshift
+
+[Term]
+id: SO:0000322
+name: nuclease_hypersensitive_site
+is_a: SO:0000684 ! nuclease_sensitive_site
+
+[Term]
+id: SO:0000323
+name: coding_start
+def: "The first base to be translated into protein." [SO:ke]
+synonym: "translation_start" RELATED []
+relationship: part_of SO:0000316 ! CDS
+
+[Term]
+id: SO:0000324
+name: tag
+def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000325
+name: rRNA_large_subunit_primary_transcript
+def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke]
+is_a: SO:0000209 ! rRNA_primary_transcript
+
+[Term]
+id: SO:0000326
+name: SAGE_tag
+def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract]
+subset: SOFA
+is_a: SO:0000324 ! tag
+
+[Term]
+id: SO:0000327
+name: coding_end
+def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke]
+synonym: "translation_end" RELATED []
+relationship: part_of SO:0000316 ! CDS
+
+[Term]
+id: SO:0000328
+name: microarray_oligo
+synonym: "microarray_oligonucleotide" RELATED []
+is_a: SO:0000051 ! probe
+is_a: SO:0000324 ! tag
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000329
+name: mRNA_with_plus_2_frameshift
+is_a: SO:0000108 ! mRNA_with_frameshift
+
+[Term]
+id: SO:0000330
+name: conserved_region
+def: "Region of sequence similarity by descent from a common ancestor." [SO:ke]
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000331
+name: STS
+def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com]
+subset: SOFA
+synonym: "sequence_tag_site" RELATED []
+is_a: SO:0000324 ! tag
+
+[Term]
+id: SO:0000332
+name: coding_conserved_region
+def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke]
+is_a: SO:0000330 ! conserved_region
+
+[Term]
+id: SO:0000333
+name: exon_junction
+def: "The boundary between two exons in a processed transcript." [SO:ke]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000233 ! processed_transcript
+
+[Term]
+id: SO:0000334
+name: nc_conserved_region
+def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke]
+synonym: "noncoding_conserved_region" RELATED []
+is_a: SO:0000330 ! conserved_region
+
+[Term]
+id: SO:0000335
+name: mRNA_with_minus_2_frameshift
+is_a: SO:0000108 ! mRNA_with_frameshift
+
+[Term]
+id: SO:0000336
+name: pseudogene
+def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html]
+subset: SOFA
+is_a: SO:0000462 ! pseudogenic_region
+relationship: non_functional_homolog_of SO:0000704 ! gene
+
+[Term]
+id: SO:0000337
+name: RNAi_reagent
+def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd]
+subset: SOFA
+is_a: SO:0000695 ! reagent
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000338
+name: MITE
+def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083]
+is_a: SO:0000208 ! terminal_inverted_repeat_element
+
+[Term]
+id: SO:0000339
+name: recombination_hotspot
+def: "A region in a genome whioch promotes recombination." [SO:rd]
+is_a: SO:0000298 ! recombination_feature
+
+[Term]
+id: SO:0000340
+name: chromosome
+def: "Structural unit composed of long DNA molecule." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000341
+name: chromosome_band
+def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma]
+synonym: "cytological_band" RELATED []
+relationship: part_of SO:0000340 ! chromosome
+
+[Term]
+id: SO:0000342
+name: site_specific_recombination_target_region
+is_a: SO:0000299 ! specific_recombination_site
+
+[Term]
+id: SO:0000343
+name: match
+def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000344
+name: splice_enhancer
+def: "Region of a transcript that regulates splicing." [SO:ke]
+subset: SOFA
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000345
+name: EST
+def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html]
+subset: SOFA
+synonym: "expressed_sequence_tag" RELATED []
+is_a: SO:0000695 ! reagent
+relationship: derives_from SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000346
+name: Cre_recombination_target_region
+synonym: "lox_site" RELATED []
+is_a: SO:0000342 ! site_specific_recombination_target_region
+
+[Term]
+id: SO:0000347
+name: nucleotide_match
+def: "A match against a nucleotide sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000343 ! match
+
+[Term]
+id: SO:0000348
+name: nucleic_acid
+is_a: SO:0000443 ! polymer_type
+
+[Term]
+id: SO:0000349
+name: protein_match
+def: "A match against a protein sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000343 ! match
+
+[Term]
+id: SO:0000350
+name: FLP_recombination_target_region
+synonym: "FRT_site" RELATED []
+is_a: SO:0000342 ! site_specific_recombination_target_region
+
+[Term]
+id: SO:0000351
+name: synthetic_sequence
+def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma]
+is_a: SO:0000443 ! polymer_type
+
+[Term]
+id: SO:0000352
+name: DNA
+is_a: SO:0000348 ! nucleic_acid
+
+[Term]
+id: SO:0000353
+name: assembly
+def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000354
+name: group_1_intron_homing_endonuclease_target_region
+is_a: SO:0000684 ! nuclease_sensitive_site
+
+[Term]
+id: SO:0000355
+name: haplotype_block
+def: "A region of the genome which in which markers are co-inherited as the result of the lack of historic recombination between them due to their close proximity." [SO:ma]
+is_a: SO:0000298 ! recombination_feature
+
+[Term]
+id: SO:0000356
+name: RNA
+is_a: SO:0000348 ! nucleic_acid
+
+[Term]
+id: SO:0000357
+name: sequence_by_flanking_target_attribute
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000358
+name: protein
+def: "One or more polypeptides which may, or may not, be covalently bonded, and which assume a native secondary and tertiary structure." [SO:ma]
+comment: This definition no longer matches the meaning of the concept! Term should probably be proteinacious or something... KEn
+is_a: SO:0000443 ! polymer_type
+
+[Term]
+id: SO:0000359
+name: floxed_sequence
+is_a: SO:0000357 ! sequence_by_flanking_target_attribute
+is_a: SO:0000452 ! transgene
+
+[Term]
+id: SO:0000360
+name: codon
+def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html]
+subset: SOFA
+relationship: part_of SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000361
+name: FRT_flanked_sequence
+is_a: SO:0000357 ! sequence_by_flanking_target_attribute
+
+[Term]
+id: SO:0000362
+name: chimeric_cDNA_clone
+def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma]
+is_a: SO:0000317 ! cDNA_clone
+
+[Term]
+id: SO:0000363
+name: floxed_gene
+is_a: SO:0000359 ! floxed_sequence
+
+[Term]
+id: SO:0000364
+name: transposable_element_flanking_region
+def: "The region of sequence surrounding a transposible element." [SO:ke]
+is_a: SO:0000239 ! flanking_region
+
+[Term]
+id: SO:0000365
+name: integron
+def: "DNA elements capable of mobilizing individual gene cassettes into bacterial chromosomes by site- specific recombination." [http://www.genomicglossaries.com/content/DNA.asp]
+is_a: SO:0000669 ! sequence_rearrangement_feature
+
+[Term]
+id: SO:0000366
+name: insertion_site
+def: "The junction where an insertion occurred." [SO:ke]
+subset: SOFA
+is_a: SO:0000109 ! sequence_variant
+is_a: SO:0000699 ! junction
+relationship: position_of SO:0000046 ! insert
+
+[Term]
+id: SO:0000367
+name: attI_site
+relationship: part_of SO:0000365 ! integron
+
+[Term]
+id: SO:0000368
+name: transposable_element_insertion_site
+def: "The junction in a genome where a transposable_element has inserted." [SO:ke]
+subset: SOFA
+is_a: SO:0000366 ! insertion_site
+
+[Term]
+id: SO:0000369
+name: integrase_coding_region
+relationship: part_of SO:0000365 ! integron
+
+[Term]
+id: SO:0000370
+name: small_regulatory_ncRNA
+def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000371
+name: conjugative_transposon
+def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/C.html]
+is_a: SO:0000182 ! DNA_transposon
+
+[Term]
+id: SO:0000372
+name: enzymatic_RNA
+def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000373
+name: recombinationally_inverted
+is_a: SO:0000456 ! recombinationally_rearranged_gene
+
+[Term]
+id: SO:0000374
+name: ribozyme
+def: "An RNA with catalytic activity." [SO:ma]
+subset: SOFA
+is_a: SO:0000372 ! enzymatic_RNA
+
+[Term]
+id: SO:0000375
+name: rRNA_5.8S
+def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002]
+subset: SOFA
+synonym: "5.8S_rRNA" RELATED []
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000376
+name: RNA_6S
+def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013]
+synonym: "6S_RNA" RELATED []
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000377
+name: CsrB_RsmB_RNA
+def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018]
+synonym: "CsrB-RsmB_RNA" RELATED []
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000378
+name: DsrA_RNA
+def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014]
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000379
+name: GcvB_RNA
+def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022]
+is_a: SO:0000378 ! DsrA_RNA
+
+[Term]
+id: SO:0000380
+name: hammerhead_ribozyme
+def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html]
+subset: SOFA
+is_a: SO:0000374 ! ribozyme
+
+[Term]
+id: SO:0000381
+name: group_IIA_intron
+is_a: SO:0000603 ! group_II_intron
+
+[Term]
+id: SO:0000382
+name: group_IIB_intron
+is_a: SO:0000603 ! group_II_intron
+
+[Term]
+id: SO:0000383
+name: MicF_RNA
+def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033]
+is_a: SO:0000644 ! antisense_RNA
+
+[Term]
+id: SO:0000384
+name: OxyS_RNA
+def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035]
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000385
+name: RNase_MRP_RNA
+def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030]
+subset: SOFA
+is_a: SO:0000372 ! enzymatic_RNA
+
+[Term]
+id: SO:0000386
+name: RNase_P_RNA
+def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010]
+subset: SOFA
+is_a: SO:0000374 ! ribozyme
+
+[Term]
+id: SO:0000387
+name: RprA_RNA
+def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034]
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000388
+name: RRE_RNA
+def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036]
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000389
+name: spot_42_RNA
+def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021]
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000390
+name: telomerase_RNA
+def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025]
+subset: SOFA
+is_a: SO:0000372 ! enzymatic_RNA
+
+[Term]
+id: SO:0000391
+name: U1_snRNA
+def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000392
+name: U2_snRNA
+def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000393
+name: U4_snRNA
+def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000394
+name: U4atac_snRNA
+def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000395
+name: U5_snRNA
+def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000396
+name: U6_snRNA
+def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000397
+name: U6atac_snRNA
+def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http:http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000398
+name: U11_snRNA
+def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000399
+name: U12_snRNA
+def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000400
+name: sequence_attribute
+is_a: SO:0000000 ! Sequence_Ontology
+
+[Term]
+id: SO:0000401
+name: gene_attribute
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000402
+name: enhancer_attribute
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000403
+name: U14_snRNA
+def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016]
+subset: SOFA
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0000404
+name: vault_RNA
+def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000405
+name: Y_RNA
+def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000406
+name: twintron
+def: "An intron within an intron." [PMID:1899376]
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000407
+name: rRNA_18S
+def: "18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome" [SO:ke]
+subset: SOFA
+synonym: "16S_rRNA" RELATED []
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000408
+name: site
+def: "The interbase position where something (eg an aberration) occurred." [SO:ke]
+is_obsolete: true
+
+[Term]
+id: SO:0000409
+name: binding_site
+def: "A region on the surface of a molecule that may interact with another molecule." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000410
+name: protein_binding_site
+def: "A region of a molecule that binds to a protein." [SO:ke]
+is_a: SO:0000409 ! binding_site
+
+[Term]
+id: SO:0000411
+name: rescue_fragment
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000412
+name: restriction_fragment
+def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119]
+subset: SOFA
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000413
+name: sequence_difference
+def: "A region where the sequences differs from that of a specified sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000700 ! remark
+
+[Term]
+id: SO:0000414
+name: genomically_contaminated_cDNA_clone
+is_a: SO:0000317 ! cDNA_clone
+
+[Term]
+id: SO:0000415
+name: genomic_polyA_primed_cDNA_clone
+is_a: SO:0000317 ! cDNA_clone
+
+[Term]
+id: SO:0000416
+name: partially_unprocessed_cDNA_clone
+is_a: SO:0000317 ! cDNA_clone
+
+[Term]
+id: SO:0000417
+name: polypeptide_domain
+def: "A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains." [http:www.molbiol.bbsrc.ac.uk/new_protein/domains.html]
+relationship: part_of SO:0000104 ! polypeptide
+
+[Term]
+id: SO:0000418
+name: signal_peptide
+def: "The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html]
+subset: SOFA
+synonym: "signal peptide coding sequence" RELATED []
+relationship: part_of SO:0000104 ! polypeptide
+
+[Term]
+id: SO:0000419
+name: mature_peptide
+def: "The coding sequence for the mature or final peptide or protein product following post-translational modification." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html]
+subset: SOFA
+relationship: part_of SO:0000104 ! polypeptide
+
+[Term]
+id: SO:0000420
+name: five_prime_terminal_inverted_repeat
+is_a: SO:0000481 ! terminal_inverted_repeat
+
+[Term]
+id: SO:0000421
+name: three_prime_terminal_inverted_repeat
+is_a: SO:0000481 ! terminal_inverted_repeat
+
+[Term]
+id: SO:0000422
+name: U5_LTR_region
+relationship: part_of SO:0000286 ! long_terminal_repeat
+
+[Term]
+id: SO:0000423
+name: R_LTR_region
+relationship: part_of SO:0000286 ! long_terminal_repeat
+
+[Term]
+id: SO:0000424
+name: U3_LTR_region
+relationship: part_of SO:0000286 ! long_terminal_repeat
+
+[Term]
+id: SO:0000425
+name: five_prime_LTR
+is_a: SO:0000286 ! long_terminal_repeat
+
+[Term]
+id: SO:0000426
+name: three_prime_LTR
+is_a: SO:0000286 ! long_terminal_repeat
+
+[Term]
+id: SO:0000427
+name: R_five_prime_LTR_region
+is_a: SO:0000423 ! R_LTR_region
+relationship: part_of SO:0000425 ! five_prime_LTR
+
+[Term]
+id: SO:0000428
+name: U5_five_prime_LTR_region
+is_a: SO:0000422 ! U5_LTR_region
+relationship: part_of SO:0000425 ! five_prime_LTR
+
+[Term]
+id: SO:0000429
+name: U3_five_prime_LTR_region
+is_a: SO:0000424 ! U3_LTR_region
+relationship: part_of SO:0000425 ! five_prime_LTR
+
+[Term]
+id: SO:0000430
+name: R_three_prime_LTR_region
+relationship: part_of SO:0000426 ! three_prime_LTR
+
+[Term]
+id: SO:0000431
+name: U3_three_prime_LTR_region
+relationship: part_of SO:0000426 ! three_prime_LTR
+
+[Term]
+id: SO:0000432
+name: U5_three_prime_LTR_region
+relationship: part_of SO:0000426 ! three_prime_LTR
+
+[Term]
+id: SO:0000433
+name: non_LTR_retrotransposon_polymeric_tract
+def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke]
+is_a: SO:0000657 ! repeat_region
+relationship: part_of SO:0000189 ! non_LTR_retrotransposon
+
+[Term]
+id: SO:0000434
+name: transposable_element_target_site_duplication
+def: "A sequence of DNA that is duplicated when a transposable element inserts; usually found at each end the insertion." [http:www.koko.gov.my/CocoaBioTech/Glossaryt.html]
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000435
+name: RR_tract
+def: "A polypurine tract within an LTR_retrotransposon." [SO:ke]
+synonym: "LTR_retrotransposon_poly_purine_tract" RELATED []
+is_a: SO:0000186 ! LTR_retrotransposon
+
+[Term]
+id: SO:0000436
+name: ARS
+def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma]
+subset: SOFA
+synonym: "autonomously_replicating_sequence" RELATED []
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000437
+name: assortment_derived_duplication
+is_obsolete: true
+
+[Term]
+id: SO:0000438
+name: gene_not_polyadenylated
+is_a: SO:0000066 ! gene_by_polyadenylation_attribute
+
+[Term]
+id: SO:0000439
+name: inverted_ring_chromosome
+is_a: SO:1000030 ! chromosomal_inversion
+is_a: SO:1000045 ! ring_chromosome
+
+[Term]
+id: SO:0000440
+name: vector
+def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma]
+is_a: SO:0000695 ! reagent
+relationship: part_of SO:0000151 ! clone
+
+[Term]
+id: SO:0000441
+name: ss_oligo
+def: "A single stranded oligonucleotide." [SO:ke]
+synonym: "single stranded oligonucleotide.new synonym" RELATED []
+synonym: "ss_oligonucleotide" RELATED []
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000442
+name: ds_oligo
+def: "A double stranded oligonucleotide." [SO:ke]
+synonym: "double stranded oligonucleotide" RELATED []
+synonym: "ds_oligonucleotide" RELATED []
+is_a: SO:0000696 ! oligo
+
+[Term]
+id: SO:0000443
+name: polymer_type
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000444
+name: three_prime_noncoding_exon
+def: "Non-coding exon in the 3' UTR." [SO:ke]
+is_a: SO:0000198 ! noncoding_exon
+
+[Term]
+id: SO:0000445
+name: five_prime_noncoding_exon
+def: "Non-coding exon in the 5' UTR." [SO:ke]
+synonym: "five_prime_noncoding_exon" RELATED []
+is_a: SO:0000198 ! noncoding_exon
+
+[Term]
+id: SO:0000446
+name: UTR_intron
+def: "Intron located in the untranslated region." [SO:ke]
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000447
+name: five_prime_UTR_intron
+def: "An intron located in the 5' UTR." [SO:ke]
+is_a: SO:0000446 ! UTR_intron
+
+[Term]
+id: SO:0000448
+name: three_prime_UTR_intron
+def: "An intron located in the 3' UTR." [SO:ke]
+is_a: SO:0000446 ! UTR_intron
+
+[Term]
+id: SO:0000449
+name: random_sequence
+def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma]
+is_a: SO:0000351 ! synthetic_sequence
+
+[Term]
+id: SO:0000450
+name: interband
+def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma]
+is_a: SO:0000341 ! chromosome_band
+
+[Term]
+id: SO:0000451
+name: gene_polyadenylated
+is_a: SO:0000066 ! gene_by_polyadenylation_attribute
+
+[Term]
+id: SO:0000452
+name: transgene
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:0000453
+name: transposition
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:0000454
+name: rasiRNA
+def: "A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284]
+subset: SOFA
+synonym: "repeat associated small interfering RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000455
+name: gene_with_mRNA_with_frameshift
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000456
+name: recombinationally_rearranged_gene
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000457
+name: interchromosomal_duplication
+is_a: SO:1000037 ! chromosomal_duplication
+
+[Term]
+id: SO:0000458
+name: D_gene
+def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-GENE]
+synonym: "D-GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000504 ! D_DJ_C_cluster
+relationship: part_of SO:0000505 ! D_DJ_cluster
+relationship: part_of SO:0000506 ! D_DJ_J_C_cluster
+relationship: part_of SO:0000508 ! D_DJ_J_cluster
+relationship: part_of SO:0000509 ! D_J_C_cluster
+relationship: part_of SO:0000527 ! V_D_DJ_C_cluster
+relationship: part_of SO:0000528 ! V_D_DJ_cluster
+relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster
+relationship: part_of SO:0000530 ! V_D_DJ_J_cluster
+relationship: part_of SO:0000531 ! V_D_J_C_cluster
+relationship: part_of SO:0000532 ! V_D_J_cluster
+relationship: part_of SO:0000559 ! D_cluster
+relationship: part_of SO:0000560 ! D_J_cluster
+
+[Term]
+id: SO:0000459
+name: gene_with_trans_spliced_transcript
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000460
+name: vertebrate_immunoglobulin_T_cell_receptor_gene
+synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene" RELATED []
+is_a: SO:0000456 ! recombinationally_rearranged_gene
+
+[Term]
+id: SO:0000461
+name: inversion_derived_bipartite_deficiency
+def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km]
+is_a: SO:1000029 ! chromosomal_deletion
+
+[Term]
+id: SO:0000462
+name: pseudogenic_region
+def: "A non-functional descendent of a functional entitity." [SO:cjm]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000463
+name: gene_with_alternately_spliced_transcript
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000464
+name: decayed_exon
+def: "A non-functional descendent of an exon." [SO:ke]
+subset: SOFA
+is_a: SO:0000462 ! pseudogenic_region
+relationship: non_functional_homolog_of SO:0000147 ! exon
+
+[Term]
+id: SO:0000465
+name: inversion_derived_deficiency_plus_duplication
+def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km]
+is_a: SO:1000029 ! chromosomal_deletion
+is_a: SO:1000038 ! intrachromosomal_duplication
+
+[Term]
+id: SO:0000466
+name: V_gene
+def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-GENE]
+synonym: "V_GENE" RELATED []
+synonym: "variable_gene" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000518 ! V_DJ_cluster
+relationship: part_of SO:0000519 ! V_DJ_J_cluster
+relationship: part_of SO:0000520 ! V_VDJ_C_cluster
+relationship: part_of SO:0000521 ! V_VDJ_cluster
+relationship: part_of SO:0000522 ! V_VDJ_J_cluster
+relationship: part_of SO:0000523 ! V_VJ_C_cluster
+relationship: part_of SO:0000524 ! V_VJ_cluster
+relationship: part_of SO:0000525 ! V_VJ_J_cluster
+relationship: part_of SO:0000526 ! V_cluster
+relationship: part_of SO:0000527 ! V_D_DJ_C_cluster
+relationship: part_of SO:0000528 ! V_D_DJ_cluster
+relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster
+relationship: part_of SO:0000530 ! V_D_DJ_J_cluster
+relationship: part_of SO:0000531 ! V_D_J_C_cluster
+relationship: part_of SO:0000532 ! V_D_J_cluster
+relationship: part_of SO:0000534 ! V_J_cluster
+relationship: part_of SO:0000535 ! V_J_C_cluster
+relationship: part_of SO:0000542 ! V_DJ_C_cluster
+relationship: part_of SO:0000564 ! V_DJ_J_C_cluster
+relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster
+relationship: part_of SO:0000566 ! V_VJ_J_C_cluster
+
+[Term]
+id: SO:0000467
+name: post_translationally_regulated_by_protein_stability
+synonym: "post-translationally_regulated_by_protein_stability" RELATED []
+is_a: SO:0000130 ! post_translationally_regulated
+
+[Term]
+id: SO:0000468
+name: golden_path_fragment
+def: "One of the pieces of sequence that make up a golden path." [SO:rd]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+relationship: part_of SO:0000688 ! golden_path
+
+[Term]
+id: SO:0000469
+name: post_translationally_regulated_by_protein_modification
+synonym: "post-translationally_regulated_by_protein_modification" RELATED []
+is_a: SO:0000130 ! post_translationally_regulated
+
+[Term]
+id: SO:0000470
+name: J_gene
+def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-GENE]
+synonym: "J-GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000485 ! DJ_J_cluster
+relationship: part_of SO:0000487 ! VDJ_J_C_cluster
+relationship: part_of SO:0000488 ! VDJ_J_cluster
+relationship: part_of SO:0000490 ! VJ_J_C_cluster
+relationship: part_of SO:0000491 ! VJ_J_cluster
+relationship: part_of SO:0000506 ! D_DJ_J_C_cluster
+relationship: part_of SO:0000508 ! D_DJ_J_cluster
+relationship: part_of SO:0000509 ! D_J_C_cluster
+relationship: part_of SO:0000511 ! J_C_cluster
+relationship: part_of SO:0000513 ! J_cluster
+relationship: part_of SO:0000519 ! V_DJ_J_cluster
+relationship: part_of SO:0000522 ! V_VDJ_J_cluster
+relationship: part_of SO:0000525 ! V_VJ_J_cluster
+relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster
+relationship: part_of SO:0000530 ! V_D_DJ_J_cluster
+relationship: part_of SO:0000531 ! V_D_J_C_cluster
+relationship: part_of SO:0000532 ! V_D_J_cluster
+relationship: part_of SO:0000534 ! V_J_cluster
+relationship: part_of SO:0000535 ! V_J_C_cluster
+relationship: part_of SO:0000540 ! DJ_J_C_cluster
+relationship: part_of SO:0000560 ! D_J_cluster
+relationship: part_of SO:0000564 ! V_DJ_J_C_cluster
+relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster
+relationship: part_of SO:0000566 ! V_VJ_J_C_cluster
+
+[Term]
+id: SO:0000471
+name: autoregulated
+is_a: SO:0000123 ! transcriptionally_regulated
+
+[Term]
+id: SO:0000472
+name: tiling_path
+def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO]
+subset: SOFA
+is_a: SO:0000353 ! assembly
+
+[Term]
+id: SO:0000473
+name: negatively_autoregulated
+is_a: SO:0000126 ! transcriptionally_repressed
+is_a: SO:0000471 ! autoregulated
+
+[Term]
+id: SO:0000474
+name: tiling_path_fragment
+def: "A piece of sequence that makes up a tiling_path.SO:0000472." [SO:ke]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+relationship: part_of SO:0000472 ! tiling_path
+
+[Term]
+id: SO:0000475
+name: positively_autoregulated
+is_a: SO:0000125 ! transcriptionally_induced
+is_a: SO:0000471 ! autoregulated
+
+[Term]
+id: SO:0000476
+name: contig_read
+def: "A DNA sequencer read which is part of a contig." [SO:ke]
+is_a: SO:0000150 ! read
+
+[Term]
+id: SO:0000477
+name: polycistronic_gene
+is_a: SO:0000081 ! member_gene_array
+
+[Term]
+id: SO:0000478
+name: C_gene
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-GENE]
+synonym: "C_GENE" RELATED []
+synonym: "constant_gene" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000487 ! VDJ_J_C_cluster
+relationship: part_of SO:0000489 ! VJ_C_cluster
+relationship: part_of SO:0000490 ! VJ_J_C_cluster
+relationship: part_of SO:0000504 ! D_DJ_C_cluster
+relationship: part_of SO:0000506 ! D_DJ_J_C_cluster
+relationship: part_of SO:0000509 ! D_J_C_cluster
+relationship: part_of SO:0000511 ! J_C_cluster
+relationship: part_of SO:0000520 ! V_VDJ_C_cluster
+relationship: part_of SO:0000523 ! V_VJ_C_cluster
+relationship: part_of SO:0000527 ! V_D_DJ_C_cluster
+relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster
+relationship: part_of SO:0000531 ! V_D_J_C_cluster
+relationship: part_of SO:0000535 ! V_J_C_cluster
+relationship: part_of SO:0000539 ! DJ_C_cluster
+relationship: part_of SO:0000540 ! DJ_J_C_cluster
+relationship: part_of SO:0000541 ! VDJ_C_cluster
+relationship: part_of SO:0000542 ! V_DJ_C_cluster
+relationship: part_of SO:0000558 ! C_cluster
+relationship: part_of SO:0000564 ! V_DJ_J_C_cluster
+relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster
+relationship: part_of SO:0000566 ! V_VJ_J_C_cluster
+
+[Term]
+id: SO:0000479
+name: trans_spliced_transcript
+synonym: "trans-spliced_transcript" RELATED []
+is_a: SO:0000082 ! processed_transcript_attribute
+
+[Term]
+id: SO:0000480
+name: tiling_path_clone
+def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly.A minimal_tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly attempting to minimize the overlap between adjacent clones. (LS)" [SO:ke]
+is_a: SO:0000151 ! clone
+is_a: SO:0000474 ! tiling_path_fragment
+
+[Term]
+id: SO:0000481
+name: terminal_inverted_repeat
+def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke]
+synonym: "TIR" RELATED []
+is_a: SO:0000657 ! repeat_region
+relationship: part_of SO:0000208 ! terminal_inverted_repeat_element
+
+[Term]
+id: SO:0000482
+name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene-cluster" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+
+[Term]
+id: SO:0000483
+name: nc_primary_transcript
+def: "A primary transcript that is never translated into a protein." [SO:ke]
+subset: SOFA
+synonym: "noncoding_primary_transcript" RELATED []
+is_a: SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000484
+name: three_prime_exon_noncoding_region
+def: "The sequence of the 3' exon that is not coding." [SO:ke]
+synonym: "three_prime_exon_noncoding_region" RELATED []
+relationship: part_of SO:0000202 ! three_prime_coding_exon
+
+[Term]
+id: SO:0000485
+name: DJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-CLUSTER]
+synonym: "(DJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000486
+name: five_prime_exon_noncoding_region
+def: "The sequence of the 5' exon preceeding the start codon." [SO:ke]
+synonym: "five_prime_exon_noncoding_region" RELATED []
+relationship: part_of SO:0000200 ! five_prime_coding_exon
+
+[Term]
+id: SO:0000487
+name: VDJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-C-CLUSTER]
+synonym: "(VDJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000488
+name: VDJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-CLUSTER]
+synonym: "(VDJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000489
+name: VJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-C-CLUSTER]
+synonym: "(VJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000490
+name: VJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-C-CLUSTER]
+synonym: "(VJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000491
+name: VJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-CLUSTER]
+synonym: "(VJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000492
+name: D_gene_recombination_feature
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000493
+name: three_prime_D_heptamer
+def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-HEPTAMER]
+synonym: "3'D-HEPTAMER" RELATED []
+is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000494
+name: three_prime_D_nonamer
+def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-NONAMER]
+synonym: "3'D-NOMAMER" RELATED []
+is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000495
+name: three_prime_D_spacer
+def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-SPACER]
+synonym: "3'D-SPACER" RELATED []
+is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000496
+name: five_prime_D_heptamer
+def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-HEPTAMER]
+synonym: "5'D-HEPTAMER" RELATED []
+is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000497
+name: five_prime_D_nonamer
+def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-NONAMER]
+synonym: "5'D-NONAMER" RELATED []
+is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000498
+name: five_prime_D_spacer
+def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-SPACER]
+synonym: "5'-SPACER" RELATED []
+synonym: "five_prime_D-spacer" RELATED []
+is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence
+
+[Term]
+id: SO:0000499
+name: virtual_sequence
+def: "A continous piece of sequence similar to the 'virtual contig' concept of ensembl." [SO:ke]
+subset: SOFA
+is_a: SO:0000353 ! assembly
+
+[Term]
+id: SO:0000500
+name: Hoogsteen_base_pair
+def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293]
+is_a: SO:0000028 ! base_pair
+
+[Term]
+id: SO:0000501
+name: reverse_Hoogsteen_base_pair
+def: "A type of non-canonical base-pairing." [SO:ke]
+is_a: SO:0000028 ! base_pair
+
+[Term]
+id: SO:0000502
+name: transcribed_region
+def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke]
+comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being.
+subset: SOFA
+is_obsolete: true
+
+[Term]
+id: SO:0000503
+name: alternately_spliced_gene_encodeing_one_transcript
+is_obsolete: true
+
+[Term]
+id: SO:0000504
+name: D_DJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-C-CLUSTER]
+synonym: "D-(DJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000505
+name: D_DJ_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-CLUSTER]
+synonym: "D-(DJ)-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000506
+name: D_DJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-C-CLUSTER]
+synonym: "D-(DJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000507
+name: pseudogenic_exon
+is_a: SO:0000462 ! pseudogenic_region
+relationship: non_functional_homolog_of SO:0000147 ! exon
+relationship: part_of SO:0000516 ! pseudogenic_transcript
+
+[Term]
+id: SO:0000508
+name: D_DJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-CLUSTER]
+synonym: "D-(DJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000509
+name: D_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-C-CLUSTER]
+synonym: "D-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000510
+name: VD_gene
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-GENE]
+synonym: "V_D_GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+
+[Term]
+id: SO:0000511
+name: J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-C-CLUSTER]
+synonym: "J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000512
+name: inversion_derived_deficiency_plus_aneuploid
+def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km]
+is_a: SO:1000029 ! chromosomal_deletion
+
+[Term]
+id: SO:0000513
+name: J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-CLUSTER]
+synonym: "J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000514
+name: J_nonamer
+def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-NONAMER]
+synonym: "J-NONAMER" RELATED []
+is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000302 ! J_gene_recombination_feature
+
+[Term]
+id: SO:0000515
+name: J_heptamer
+def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-HEPTAMER]
+synonym: "J-HEPTAMER" RELATED []
+is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000302 ! J_gene_recombination_feature
+
+[Term]
+id: SO:0000516
+name: pseudogenic_transcript
+is_a: SO:0000462 ! pseudogenic_region
+relationship: non_functional_homolog_of SO:0000673 ! transcript
+relationship: part_of SO:0000336 ! pseudogene
+
+[Term]
+id: SO:0000517
+name: J_spacer
+def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-SPACER]
+synonym: "J-SPACER" RELATED []
+is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000302 ! J_gene_recombination_feature
+
+[Term]
+id: SO:0000518
+name: V_DJ_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-CLUSTER]
+synonym: "V-(DJ)-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000519
+name: V_DJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-CLUSTER]
+synonym: "V-(DJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000520
+name: V_VDJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-C-CLUSTER]
+synonym: "V-(VDJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000521
+name: V_VDJ_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-CLUSTER]
+synonym: "V-(VDJ)-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000522
+name: V_VDJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-CLUSTER]
+synonym: "V-(VDJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000523
+name: V_VJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-C-CLUSTER]
+synonym: "V-(VJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000524
+name: V_VJ_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-CLUSTER]
+synonym: "V-(VJ)-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000525
+name: V_VJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-CLUSTER]
+synonym: "V-(VJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000526
+name: V_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-CLUSTER]
+synonym: "V-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000527
+name: V_D_DJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-C-CLUSTER]
+synonym: "V-D-(DJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000528
+name: V_D_DJ_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-CLUSTER]
+synonym: "V-D-(DJ)-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000529
+name: V_D_DJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-C-CLUSTER]
+synonym: "V-D-(DJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000530
+name: V_D_DJ_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-CLUSTER]
+synonym: "V-D-(DJ)-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000531
+name: V_D_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-C-CLUSTER]
+synonym: "V-D-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000532
+name: V_D_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-CLUSTER]
+synonym: "V-D-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000533
+name: V_heptamer
+def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-HEPTAMER]
+synonym: "V-HEPTAMER" RELATED []
+is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000538 ! V_gene_recombination_feature
+
+[Term]
+id: SO:0000534
+name: V_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-CLUSTER]
+synonym: "V-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000535
+name: V_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-C-CLUSTER]
+synonym: "V-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000536
+name: V_nonamer
+def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-NONAMER]
+synonym: "V-NONAMER" RELATED []
+is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000538 ! V_gene_recombination_feature
+
+[Term]
+id: SO:0000537
+name: V_spacer
+def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-SPACER]
+synonym: "V-SPACER" RELATED []
+is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene
+relationship: part_of SO:0000538 ! V_gene_recombination_feature
+
+[Term]
+id: SO:0000538
+name: V_gene_recombination_feature
+def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-RS]
+synonym: "V-RS" RELATED []
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000539
+name: DJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-C-CLUSTER]
+synonym: "(DJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000540
+name: DJ_J_C_cluster
+def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-C-CLUSTER]
+synonym: "(DJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000541
+name: VDJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-C-CLUSTER]
+synonym: "(VDJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000542
+name: V_DJ_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-C-CLUSTER]
+synonym: "V-(DJ)-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000543
+name: alternately_spliced_gene_encoding_greater_than_one_transcript
+is_obsolete: true
+
+[Term]
+id: SO:0000544
+name: helitron
+def: "A rolling circle transposon. Autonomous Helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569]
+is_a: SO:0000101 ! transposable_element
+
+[Term]
+id: SO:0000545
+name: recoding_pseudoknot
+def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937]
+is_a: SO:0000591 ! pseudoknot
+relationship: part_of SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:0000546
+name: designed_sequence
+is_a: SO:0000351 ! synthetic_sequence
+
+[Term]
+id: SO:0000547
+name: inversion_derived_bipartite_duplication
+def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km]
+is_a: SO:1000038 ! intrachromosomal_duplication
+
+[Term]
+id: SO:0000548
+name: gene_with_edited_transcript
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000549
+name: inversion_derived_duplication_plus_aneuploid
+def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km]
+is_a: SO:1000038 ! intrachromosomal_duplication
+
+[Term]
+id: SO:0000550
+name: aneuploid_chromosome
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:0000551
+name: polyA_signal_sequence
+def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000552
+name: Shine_Dalgarno_sequence
+def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke]
+synonym: "RBS" RELATED []
+synonym: "Shine-Dalgarno_sequence" RELATED []
+synonym: "five_prime_ribosome_binding_site" RELATED []
+is_a: SO:0000139 ! ribosome_entry_site
+
+[Term]
+id: SO:0000553
+name: polyA_site
+def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000233 ! processed_transcript
+
+[Term]
+id: SO:0000554
+name: assortment_derived_deficiency_plus_duplication
+is_obsolete: true
+
+[Term]
+id: SO:0000555
+name: five_prime_clip
+def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "five_prime_-clip" RELATED []
+is_a: SO:0000303 ! clip
+
+[Term]
+id: SO:0000556
+name: five_prime_D_recombination_signal_sequence
+def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-RS]
+synonym: "5'RS" RELATED []
+synonym: "five_prime_D-recombination_signal_sequence" RELATED []
+is_a: SO:0000492 ! D_gene_recombination_feature
+
+[Term]
+id: SO:0000557
+name: three_prime_clip
+def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+synonym: "3'-clip" RELATED []
+is_a: SO:0000303 ! clip
+
+[Term]
+id: SO:0000558
+name: C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-CLUSTER]
+synonym: "C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000559
+name: D_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-CLUSTER]
+synonym: "D-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000560
+name: D_J_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-CLUSTER]
+synonym: "D-J-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000561
+name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene
+def: " 7 nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin/T-cell receptor gene" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#HEPTAMER]
+synonym: "HEPTAMER" RELATED []
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000562
+name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000563
+name: spacer_of_recombination_feature_of_vertebrate_immune_system_gene
+is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene
+
+[Term]
+id: SO:0000564
+name: V_DJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-C-CLUSTER]
+synonym: "V-(DJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000565
+name: V_VDJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-C-CLUSTER]
+synonym: "V-(VDJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000566
+name: V_VJ_J_C_cluster
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-C-CLUSTER]
+synonym: "V-(VJ)-J-C-CLUSTER" RELATED []
+is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster
+
+[Term]
+id: SO:0000567
+name: inversion_derived_aneuploid_chromosome
+def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km]
+is_a: SO:0000550 ! aneuploid_chromosome
+
+[Term]
+id: SO:0000568
+name: bidirectional_promotor
+is_a: SO:0000167 ! promoter
+
+[Term]
+id: SO:0000569
+name: retrotransposed_protein_coding_gene
+alt_id: SO:0100042
+synonym: "captured_pseudogene" RELATED []
+is_a: SO:0000010 ! protein_coding_gene
+is_a: SO:0000042 ! pseudogene_attribute
+
+[Term]
+id: SO:0000570
+name: three_prime_D_recombination_signal_sequence
+def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-RS]
+synonym: "3'D-RS" RELATED []
+synonym: "three_prime_D-recombination_signal_sequence" RELATED []
+is_a: SO:0000492 ! D_gene_recombination_feature
+
+[Term]
+id: SO:0000571
+name: miRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000572
+name: DJ_gene
+def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-GENE]
+synonym: "D_J_GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000485 ! DJ_J_cluster
+relationship: part_of SO:0000504 ! D_DJ_C_cluster
+relationship: part_of SO:0000505 ! D_DJ_cluster
+relationship: part_of SO:0000506 ! D_DJ_J_C_cluster
+relationship: part_of SO:0000508 ! D_DJ_J_cluster
+relationship: part_of SO:0000518 ! V_DJ_cluster
+relationship: part_of SO:0000519 ! V_DJ_J_cluster
+relationship: part_of SO:0000527 ! V_D_DJ_C_cluster
+relationship: part_of SO:0000528 ! V_D_DJ_cluster
+relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster
+relationship: part_of SO:0000530 ! V_D_DJ_J_cluster
+relationship: part_of SO:0000539 ! DJ_C_cluster
+relationship: part_of SO:0000540 ! DJ_J_C_cluster
+relationship: part_of SO:0000542 ! V_DJ_C_cluster
+relationship: part_of SO:0000564 ! V_DJ_J_C_cluster
+
+[Term]
+id: SO:0000573
+name: rRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000574
+name: DJ_gene
+def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-GENE]
+synonym: "V-D-J-GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000487 ! VDJ_J_C_cluster
+relationship: part_of SO:0000488 ! VDJ_J_cluster
+relationship: part_of SO:0000520 ! V_VDJ_C_cluster
+relationship: part_of SO:0000521 ! V_VDJ_cluster
+relationship: part_of SO:0000522 ! V_VDJ_J_cluster
+relationship: part_of SO:0000541 ! VDJ_C_cluster
+relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster
+
+[Term]
+id: SO:0000575
+name: scRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000576
+name: VJ_gene
+def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-GENE]
+synonym: "V-J-GENE" RELATED []
+is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene
+relationship: part_of SO:0000489 ! VJ_C_cluster
+relationship: part_of SO:0000490 ! VJ_J_C_cluster
+relationship: part_of SO:0000491 ! VJ_J_cluster
+relationship: part_of SO:0000523 ! V_VJ_C_cluster
+relationship: part_of SO:0000524 ! V_VJ_cluster
+relationship: part_of SO:0000525 ! V_VJ_J_cluster
+relationship: part_of SO:0000566 ! V_VJ_J_C_cluster
+
+[Term]
+id: SO:0000577
+name: centromere
+def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke]
+subset: SOFA
+is_a: SO:0000628 ! chromosomal_structural_element
+
+[Term]
+id: SO:0000578
+name: snoRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000579
+name: edited_transcript_feature
+def: "A locatable feature on a transcript that is edited." [SO:ma]
+relationship: part_of SO:0000673 ! transcript
+
+[Term]
+id: SO:0000580
+name: methylation_guide_snoRNA_primary_transcript
+def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke]
+is_a: SO:0000232 ! snoRNA_primary_transcript
+
+[Term]
+id: SO:0000581
+name: cap
+def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html]
+subset: SOFA
+relationship: adjacent_to SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000582
+name: rRNA_cleavage_snoRNA_primary_transcript
+def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke]
+is_a: SO:0000232 ! snoRNA_primary_transcript
+
+[Term]
+id: SO:0000583
+name: pre_edited_region
+def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu]
+synonym: "pre-edited_region" RELATED []
+is_a: SO:0000579 ! edited_transcript_feature
+
+[Term]
+id: SO:0000584
+name: tmRNA
+def: "tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. tmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023]
+synonym: "10Sa_RNA" RELATED []
+synonym: "ssrA" RELATED []
+is_a: SO:0000370 ! small_regulatory_ncRNA
+
+[Term]
+id: SO:0000585
+name: C_D_box_snoRNA_gene
+is_a: SO:0000578 ! snoRNA_gene
+
+[Term]
+id: SO:0000586
+name: tmRNA_primary_transcript
+def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke]
+synonym: "10Sa_RNA_primary_transcript" RELATED []
+synonym: "ssrA_RNA_primary_transcript" RELATED []
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000587
+name: group_I_intron
+def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028]
+subset: SOFA
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000588
+name: autocatalytically_spliced_intron
+def: "A self spliced intron." [SO:ke]
+subset: SOFA
+is_a: SO:0000188 ! intron
+is_a: SO:0000374 ! ribozyme
+
+[Term]
+id: SO:0000589
+name: SRP_RNA_primary_transcript
+def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke]
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000590
+name: SRP_RNA
+def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017]
+subset: SOFA
+synonym: "7S RNA" RELATED []
+synonym: "signal_recognition_particle_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000591
+name: pseudoknot
+def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract]
+is_a: SO:0000002 ! sequence_secondary_structure
+
+[Term]
+id: SO:0000592
+name: H_pseudoknot
+def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract]
+synonym: "H-pseudoknot" RELATED []
+is_a: SO:0000591 ! pseudoknot
+
+[Term]
+id: SO:0000593
+name: C_D_box_snoRNA
+def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html]
+synonym: "C/D_box_snoRNA" RELATED []
+is_a: SO:0000275 ! snoRNA
+
+[Term]
+id: SO:0000594
+name: H_ACA_box_snoRNA
+def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html]
+synonym: "H/ACA_box_snoRNA" RELATED []
+is_a: SO:0000275 ! snoRNA
+
+[Term]
+id: SO:0000595
+name: C_D_box_snoRNA_primary_transcript
+def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke]
+is_a: SO:0000232 ! snoRNA_primary_transcript
+
+[Term]
+id: SO:0000596
+name: H_ACA_box_snoRNA_primary_transcript
+def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke]
+is_a: SO:0000232 ! snoRNA_primary_transcript
+
+[Term]
+id: SO:0000597
+name: transcript_edited_by_U_insertion/deletion
+def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html]
+is_a: SO:0000116 ! edited_transcript
+
+[Term]
+id: SO:0000598
+name: transcript_edited_by_C_insertion_and_dinucleotide_insertion
+def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm]
+synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED []
+is_a: SO:0000116 ! edited_transcript
+
+[Term]
+id: SO:0000599
+name: transcript_edited_by_C_to_U_substitution
+is_a: SO:0000116 ! edited_transcript
+
+[Term]
+id: SO:0000600
+name: transcript_edited_by_A_to_I_substitution
+is_a: SO:0000116 ! edited_transcript
+
+[Term]
+id: SO:0000601
+name: transcript_edited_by_G_addition
+is_a: SO:0000116 ! edited_transcript
+
+[Term]
+id: SO:0000602
+name: guide_RNA
+def: "A short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html]
+subset: SOFA
+synonym: "gRNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000603
+name: group_II_intron
+def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml]
+subset: SOFA
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000604
+name: editing_block
+def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/]
+is_a: SO:0000579 ! edited_transcript_feature
+
+[Term]
+id: SO:0000605
+name: intergenic_region
+def: "The region between two known genes." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000606
+name: editing_domain
+def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/]
+is_a: SO:0000579 ! edited_transcript_feature
+
+[Term]
+id: SO:0000607
+name: unedited_region
+def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/]
+is_a: SO:0000579 ! edited_transcript_feature
+
+[Term]
+id: SO:0000608
+name: H_ACA_box_snoRNA_gene
+is_a: SO:0000578 ! snoRNA_gene
+
+[Term]
+id: SO:0000609
+name: oligo_U_tail
+def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/]
+relationship: adjacent_to SO:0000602 ! guide_RNA
+
+[Term]
+id: SO:0000610
+name: polyA_sequence
+def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke]
+subset: SOFA
+relationship: adjacent_to SO:0000234 ! mRNA
+
+[Term]
+id: SO:0000611
+name: branch_site
+def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke]
+subset: SOFA
+synonym: "branch_point" RELATED []
+relationship: part_of SO:0000662 ! spliceosomal_intron
+
+[Term]
+id: SO:0000612
+name: polypyrimidine_tract
+def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888]
+subset: SOFA
+relationship: part_of SO:0000662 ! spliceosomal_intron
+
+[Term]
+id: SO:0000613
+name: bacterial_RNApol_promoter
+def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke]
+is_a: SO:0000167 ! promoter
+is_a: SO:0000752 ! gene_group_regulatory_region
+
+[Term]
+id: SO:0000614
+name: bacterial_terminator
+def: "A terminator signal for bacterial transcription." [SO:ke]
+is_a: SO:0000141 ! terminator
+
+[Term]
+id: SO:0000615
+name: terminator_of_type_2_RNApol_III_promoter
+def: "A terminator signal for RNA polymerase III transcription." [SO:ke]
+is_a: SO:0000141 ! terminator
+
+[Term]
+id: SO:0000616
+name: transcription_end_site
+def: "The site where transcription ends." [SO:ke]
+subset: SOFA
+is_a: SO:0000699 ! junction
+relationship: part_of SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000617
+name: RNApol_III_promoter_type_1
+is_a: SO:0000171 ! RNApol_III_promoter
+
+[Term]
+id: SO:0000618
+name: RNApol_III_promoter_type_2
+synonym: "tRNA_promoter" RELATED []
+is_a: SO:0000171 ! RNApol_III_promoter
+
+[Term]
+id: SO:0000619
+name: A_box
+relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2
+
+[Term]
+id: SO:0000620
+name: B_box
+relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2
+
+[Term]
+id: SO:0000621
+name: RNApol_III_promoter_type_3
+is_a: SO:0000171 ! RNApol_III_promoter
+
+[Term]
+id: SO:0000622
+name: C_box
+relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1
+
+[Term]
+id: SO:0000623
+name: snRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000624
+name: telomere
+def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end," [SO:ma]
+subset: SOFA
+is_a: SO:0000628 ! chromosomal_structural_element
+
+[Term]
+id: SO:0000625
+name: silencer
+def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s]
+subset: SOFA
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000626
+name: chromosomal_regulatory_element
+relationship: part_of SO:0000340 ! chromosome
+
+[Term]
+id: SO:0000627
+name: insulator
+subset: SOFA
+synonym: "insulator_element" RELATED []
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000628
+name: chromosomal_structural_element
+subset: SOFA
+relationship: part_of SO:0000340 ! chromosome
+
+[Term]
+id: SO:0000629
+name: five_prime_open_reading_frame
+relationship: part_of SO:0000204 ! five_prime_UTR
+
+[Term]
+id: SO:0000630
+name: upstream_AUG_codon
+relationship: part_of SO:0000203 ! UTR
+
+[Term]
+id: SO:0000631
+name: polycistronic_primary_transcript
+def: "A primary transcript encoding for more than one protein product." [SO:ke]
+is_a: SO:0000078 ! polycistronic_transcript
+
+[Term]
+id: SO:0000632
+name: monocistronic_primary_transcript
+def: "A primary transcript encoding for more than one protein product." [SO:ke]
+is_a: SO:0000665 ! monocistronic_transcript
+
+[Term]
+id: SO:0000633
+name: monocistronic_mRNA
+def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd]
+synonym: "monocistronic_processed_transcript" RELATED []
+is_a: SO:0000665 ! monocistronic_transcript
+
+[Term]
+id: SO:0000634
+name: polycistronic_mRNA
+def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd]
+synonym: "polycistronic_processed_transcript" RELATED []
+is_a: SO:0000078 ! polycistronic_transcript
+
+[Term]
+id: SO:0000635
+name: mini_exon_donor_RNA
+def: "The 3' site of a mini-exon which is trans-spliced on to the 5'end of a mature mRNA." [SO:ke]
+synonym: "mini-exon_donor_RNA" RELATED []
+is_a: SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000636
+name: spliced_leader_RNA
+synonym: "mini-exon" RELATED []
+relationship: part_of SO:0000635 ! mini_exon_donor_RNA
+
+[Term]
+id: SO:0000637
+name: engineered_plasmid
+synonym: "engineered_plasmid_gene" RELATED []
+is_a: SO:0000098 ! plasmid_gene
+is_a: SO:0000280 ! engineered_gene
+
+[Term]
+id: SO:0000638
+name: transcribed_spacer_region
+def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html]
+relationship: part_of SO:0000209 ! rRNA_primary_transcript
+
+[Term]
+id: SO:0000639
+name: internal_transcribed_spacer_region
+def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke]
+is_a: SO:0000638 ! transcribed_spacer_region
+
+[Term]
+id: SO:0000640
+name: external_transcribed_spacer_region
+def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke]
+is_a: SO:0000638 ! transcribed_spacer_region
+
+[Term]
+id: SO:0000641
+name: tetranucleotide_repeat_microsatellite_feature
+is_a: SO:0000289 ! microsatellite
+
+[Term]
+id: SO:0000642
+name: SRP_RNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000643
+name: minisatellite
+def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm]
+subset: SOFA
+is_a: SO:0000705 ! tandem_repeat
+
+[Term]
+id: SO:0000644
+name: antisense_RNA
+def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke]
+subset: SOFA
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000645
+name: antisense_primary_transcript
+def: "The reverse complement of the primary transcript." [SO:ke]
+subset: SOFA
+is_a: SO:0000185 ! primary_transcript
+
+[Term]
+id: SO:0000646
+name: siRNA
+def: "Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules." [PMID:12592000]
+subset: SOFA
+synonym: "small_interfering_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000647
+name: miRNA_primary_transcript
+def: "A primary transcript encoding a micro RNA." [SO:ke]
+synonym: "micro_RNA_primary_transcript" RELATED []
+is_a: SO:0000483 ! nc_primary_transcript
+
+[Term]
+id: SO:0000648
+name: stRNA_primary_transcript
+def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke]
+synonym: "small_temporal_RNA_primary_transcript" RELATED []
+is_a: SO:0000647 ! miRNA_primary_transcript
+
+[Term]
+id: SO:0000649
+name: stRNA
+def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512]
+subset: SOFA
+synonym: "small_temporal_RNA" RELATED []
+is_a: SO:0000655 ! ncRNA
+
+[Term]
+id: SO:0000650
+name: small_subunit_rRNA
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000651
+name: large_subunit_rRNA
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000652
+name: rRNA_5S
+def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001]
+subset: SOFA
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000653
+name: rRNA_28S
+def: "A component of the large ribosomal subunit." [SO:ke]
+subset: SOFA
+synonym: "23S_rRNA" RELATED []
+synonym: "28S_rRNA" RELATED []
+is_a: SO:0000252 ! rRNA
+
+[Term]
+id: SO:0000654
+name: maxi_circle_gene
+synonym: "maxi-circle" RELATED []
+is_a: SO:0000088 ! mt_gene
+
+[Term]
+id: SO:0000655
+name: ncRNA
+def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke]
+comment: ncRNA is a processed_transcript so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript.
+subset: SOFA
+synonym: "noncoding_RNA" RELATED []
+is_a: SO:0000233 ! processed_transcript
+
+[Term]
+id: SO:0000656
+name: stRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000657
+name: repeat_region
+def: "A region of sequence containing one or more repeat units." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000658
+name: dispersed_repeat
+def: "A repeat that is located at dispersed sites in the genome." [SO:ke]
+subset: SOFA
+synonym: "interspersed_repeat" RELATED []
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000659
+name: tmRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000660
+name: DNA_invertase_target_sequence
+is_a: SO:0000342 ! site_specific_recombination_target_region
+
+[Term]
+id: SO:0000661
+name: intron_attribute
+is_a: SO:0000401 ! gene_attribute
+
+[Term]
+id: SO:0000662
+name: spliceosomal_intron
+def: "An intron which is spliced by the spliceosome." [SO:ke]
+subset: SOFA
+is_a: SO:0000188 ! intron
+
+[Term]
+id: SO:0000663
+name: tRNA_gene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:0000664
+name: introgressed_chromosome_region
+relationship: part_of SO:0000340 ! chromosome
+
+[Term]
+id: SO:0000665
+name: monocistronic_transcript
+is_a: SO:0000115 ! transcript_feature
+
+[Term]
+id: SO:0000666
+name: mobile_intron
+is_a: SO:0000661 ! intron_attribute
+
+[Term]
+id: SO:0000667
+name: insertion
+def: "A region of sequence identified as having been inserted." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+is_a: SO:0000109 ! sequence_variant
+relationship: sequence_of SO:0000046 ! insert
+
+[Term]
+id: SO:0000668
+name: EST_match
+def: "A match against an EST sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000102 ! expressed_sequence_match
+
+[Term]
+id: SO:0000669
+name: sequence_rearrangement_feature
+is_a: SO:0000298 ! recombination_feature
+
+[Term]
+id: SO:0000670
+name: chromosome_breakage_sequence
+def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma]
+is_a: SO:0000669 ! sequence_rearrangement_feature
+
+[Term]
+id: SO:0000671
+name: internal_eliminated_sequence
+def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma]
+is_a: SO:0000669 ! sequence_rearrangement_feature
+
+[Term]
+id: SO:0000672
+name: macronucleus_destined_segment
+def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma]
+is_a: SO:0000669 ! sequence_rearrangement_feature
+
+[Term]
+id: SO:0000673
+name: transcript
+def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma]
+subset: SOFA
+relationship: member_of SO:0000704 ! gene
+
+[Term]
+id: SO:0000674
+name: non_canonical_splice_site
+def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke]
+synonym: "non-canonical_splice_site" RELATED []
+is_a: SO:0000162 ! splice_site
+
+[Term]
+id: SO:0000675
+name: canonical_splice_site
+def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke]
+is_a: SO:0000162 ! splice_site
+
+[Term]
+id: SO:0000676
+name: canonical_three_prime_splice_site
+def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke]
+is_a: SO:0000164 ! splice_acceptor_site
+is_a: SO:0000675 ! canonical_splice_site
+
+[Term]
+id: SO:0000677
+name: canonical_five_prime_splice_site
+def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke]
+is_a: SO:0000163 ! splice_donor_site
+is_a: SO:0000675 ! canonical_splice_site
+
+[Term]
+id: SO:0000678
+name: non_canonical_three_prime_splice_site
+def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke]
+synonym: "non-canonical_three_prime_splice_site" RELATED []
+is_a: SO:0000164 ! splice_acceptor_site
+is_a: SO:0000674 ! non_canonical_splice_site
+
+[Term]
+id: SO:0000679
+name: non_canonical_five_prime_splice_site
+def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke]
+synonym: "non-canonical-five_prime_splice_site" RELATED []
+is_a: SO:0000163 ! splice_donor_site
+is_a: SO:0000674 ! non_canonical_splice_site
+
+[Term]
+id: SO:0000680
+name: non_canonical_start_codon
+def: "A start codon that is not the usual AUG sequence." [SO:ke]
+synonym: "non-canonical_start_codon" RELATED []
+synonym: "non_ATG_start_codon" RELATED []
+is_a: SO:0000318 ! start_codon
+
+[Term]
+id: SO:0000681
+name: aberrant_processed_transcript
+def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke]
+is_a: SO:0000233 ! processed_transcript
+
+[Term]
+id: SO:0000682
+name: splicing_feature
+is_obsolete: true
+
+[Term]
+id: SO:0000683
+name: exonic_splice_enhancer
+def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract]
+is_a: SO:0000344 ! splice_enhancer
+
+[Term]
+id: SO:0000684
+name: nuclease_sensitive_site
+def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000685
+name: DNAaseI_hypersensitive_site
+is_a: SO:0000322 ! nuclease_hypersensitive_site
+
+[Term]
+id: SO:0000686
+name: translocation_element
+def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma]
+relationship: part_of SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:0000687
+name: deletion_junction
+def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke]
+subset: SOFA
+is_a: SO:0000109 ! sequence_variant
+is_a: SO:0000699 ! junction
+relationship: position_of SO:0000045 ! delete
+
+[Term]
+id: SO:0000688
+name: golden_path
+def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls]
+subset: SOFA
+is_a: SO:0000353 ! assembly
+
+[Term]
+id: SO:0000689
+name: cDNA_match
+def: "A match against cDNA sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000102 ! expressed_sequence_match
+
+[Term]
+id: SO:0000690
+name: gene_with_polycistronic_transcript
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000691
+name: translocation_site
+def: "The space between two bases in a sequence which marks the position where a translocation has occurred." [SO:ke]
+relationship: position_of SO:0000049 ! translocate
+
+[Term]
+id: SO:0000692
+name: gene_with_dicistronic_transcript
+is_a: SO:0000690 ! gene_with_polycistronic_transcript
+
+[Term]
+id: SO:0000693
+name: gene_with_recoded_mRNA
+is_a: SO:0000064 ! gene_by_transcript_attribute
+
+[Term]
+id: SO:0000694
+name: SNP
+def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf]
+subset: SOFA
+synonym: "single_nucleotide_polymorphism" RELATED []
+is_a: SO:1000008 ! point_mutation
+
+[Term]
+id: SO:0000695
+name: reagent
+def: "A sequence used in experiment." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000696
+name: oligo
+def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma]
+subset: SOFA
+synonym: "oligonucleotide" RELATED []
+is_a: SO:0000695 ! reagent
+
+[Term]
+id: SO:0000697
+name: gene_with_stop_codon_read_through
+is_a: SO:0000693 ! gene_with_recoded_mRNA
+
+[Term]
+id: SO:0000698
+name: gene_with_stop_codon_redefined_as_pyrrolysine
+is_a: SO:0000697 ! gene_with_stop_codon_read_through
+
+[Term]
+id: SO:0000699
+name: junction
+def: "A junction refers to an interbase location of zero in a sequence." [SO:ke]
+subset: SOFA
+synonym: "boundary" RELATED []
+is_a: SO:0000110 ! located_sequence_feature
+
+[Term]
+id: SO:0000700
+name: remark
+def: "A comment about the sequence." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000701
+name: possible_base_call_error
+def: "A region of sequence where the validity of the base calling is questionable." [SO:ke]
+subset: SOFA
+is_a: SO:0000413 ! sequence_difference
+
+[Term]
+id: SO:0000702
+name: possible_assembly_error
+def: "A region of sequence where there may have been an error in the assembly." [SO:ke]
+subset: SOFA
+is_a: SO:0000413 ! sequence_difference
+
+[Term]
+id: SO:0000703
+name: experimental_result_region
+def: "A region of sequence implicated in an experimental result." [SO:ke]
+subset: SOFA
+is_a: SO:0000700 ! remark
+
+[Term]
+id: SO:0000704
+name: gene
+def: "A locatable region of genomic sequence, corresponding to a unit of inheritance, which is associated with regulatory regions, transcribed regions and/or other functional sequence regions" [SO:rd]
+subset: SOFA
+is_a: SO:0000001 ! region
+relationship: member_of SO:0005855 ! gene_group
+
+[Term]
+id: SO:0000705
+name: tandem_repeat
+def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html]
+subset: SOFA
+is_a: SO:0000657 ! repeat_region
+relationship: part_of SO:0000005 ! satellite_DNA
+
+[Term]
+id: SO:0000706
+name: trans_splice_acceptor_site
+def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke]
+subset: SOFA
+is_a: SO:0000164 ! splice_acceptor_site
+
+[Term]
+id: SO:0000707
+name: trans_splice_donor_site
+def: "The site at which trans-splicing occurs." [SO:ke]
+synonym: "trans-splice_donor_site" RELATED []
+is_a: SO:0000163 ! splice_donor_site
+
+[Term]
+id: SO:0000708
+name: SL1_acceptor_site
+is_a: SO:0000706 ! trans_splice_acceptor_site
+
+[Term]
+id: SO:0000709
+name: SL2_acceptor_site
+is_a: SO:0000706 ! trans_splice_acceptor_site
+
+[Term]
+id: SO:0000710
+name: gene_with_stop_codon_redefined_as_selenocysteine
+is_a: SO:0000697 ! gene_with_stop_codon_read_through
+
+[Term]
+id: SO:0000711
+name: gene_with_mRNA_recoded_by_translational_bypass
+is_a: SO:0000693 ! gene_with_recoded_mRNA
+
+[Term]
+id: SO:0000712
+name: gene_with_transcript_with_translational_frameshift
+is_a: SO:0000693 ! gene_with_recoded_mRNA
+
+[Term]
+id: SO:0000713
+name: DNA_motif
+is_a: SO:0000714 ! nucleotide_motif
+
+[Term]
+id: SO:0000714
+name: nucleotide_motif
+def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000715
+name: RNA_motif
+is_a: SO:0000714 ! nucleotide_motif
+
+[Term]
+id: SO:0000716
+name: dicistronic_mRNA
+synonym: "dicistronic_processed_transcript" RELATED []
+is_a: SO:0000079 ! dicistronic_transcript
+
+[Term]
+id: SO:0000717
+name: reading_frame
+def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It does not contain the start or stop codon." [SO:rb]
+comment: This term was added after a request by SGD.nAgust 2004. Modified after SO meeting in Cambridge to not include start or stop.
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000718
+name: blocked_reading_frame
+def: "A reading_frame that is interupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb]
+comment: Term requested by Rama from SGD
+is_a: SO:0000717 ! reading_frame
+
+[Term]
+id: SO:0000719
+name: ultracontig
+def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG]
+subset: SOFA
+is_a: SO:0000353 ! assembly
+
+[Term]
+id: SO:0000720
+name: foreign_transposable_element
+comment: requested by Michael on 19 Nov 2004
+is_a: SO:0000101 ! transposable_element
+
+[Term]
+id: SO:0000721
+name: gene_with_dicistronic_primary_transcript
+comment: Requested by Michael, 19 nov 2004
+is_a: SO:0000692 ! gene_with_dicistronic_transcript
+
+[Term]
+id: SO:0000722
+name: gene_with_dicistronic_mRNA
+comment: Requested by MA nov 19 2004
+synonym: "gene_with_dicistronic_processed_transcript" RELATED []
+is_a: SO:0000692 ! gene_with_dicistronic_transcript
+
+[Term]
+id: SO:0000723
+name: iDNA
+def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma]
+synonym: "intervening DNA" RELATED []
+is_a: SO:0000298 ! recombination_feature
+
+[Term]
+id: SO:0000724
+name: origin_of_transfer
+def: "A region of a DNA molecule whre transfer is initiated during the process of conjugation or mobilization." [http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+subset: SOFA
+synonym: "oriT" RELATED []
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000725
+name: transit_peptide
+def: "The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein: this domain is involved in post translational import of the protein into the organelle." [http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+comment: Added to bring SO inline with the embl ddbj genbank feature table.
+subset: SOFA
+relationship: part_of SO:0000104 ! polypeptide
+
+[Term]
+id: SO:0000726
+name: repeat_unit
+def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types]
+comment: Added to comply with the feature table.
+is_a: SO:0000657 ! repeat_region
+
+[Term]
+id: SO:0000727
+name: TF_module
+def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active" [SO:SG]
+comment: Requested by Stepen Grossmann Dec 2004.
+synonym: "CRM" RELATED []
+synonym: "cis_regulatory_module" RELATED []
+is_a: SO:0005836 ! regulatory_region
+
+[Term]
+id: SO:0000728
+name: intein
+relationship: part_of SO:0000104 ! polypeptide
+
+[Term]
+id: SO:0000729
+name: intein_containing_protein_coding_gene
+is_a: SO:0000010 ! protein_coding_gene
+
+[Term]
+id: SO:0000730
+name: gap
+def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke]
+subset: SOFA
+is_a: SO:0000143 ! assembly_component
+relationship: part_of SO:0000353 ! assembly
+
+[Term]
+id: SO:0000731
+name: fragment
+comment: added because of request by MO people.
+is_a: SO:0000733 ! feature_attribute
+
+[Term]
+id: SO:0000732
+name: predicted
+is_a: SO:0000733 ! feature_attribute
+
+[Term]
+id: SO:0000733
+name: feature_attribute
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000734
+name: exemplar_mRNA
+def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http:mged.sourceforge.net/ontologies/MGEDontology.php#exemplar_mRNA]
+comment: Added for the MO people.
+is_a: SO:0000082 ! processed_transcript_attribute
+
+[Term]
+id: SO:0000735
+name: sequence_location
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:0000736
+name: organelle_location
+is_a: SO:0000735 ! sequence_location
+
+[Term]
+id: SO:0000737
+name: mitochondrial_sequence
+is_a: SO:0000736 ! organelle_location
+
+[Term]
+id: SO:0000738
+name: nuclear_sequence
+is_a: SO:0000736 ! organelle_location
+
+[Term]
+id: SO:0000739
+name: nucleomorphic_sequence
+is_a: SO:0000736 ! organelle_location
+
+[Term]
+id: SO:0000740
+name: plastid_sequence
+is_a: SO:0000736 ! organelle_location
+
+[Term]
+id: SO:0000741
+name: kinetoplast_sequence
+is_a: SO:0000737 ! mitochondrial_sequence
+
+[Term]
+id: SO:0000742
+name: maxicircle_sequence
+is_a: SO:0000737 ! mitochondrial_sequence
+
+[Term]
+id: SO:0000743
+name: apicoplast_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000744
+name: chromoplast_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000745
+name: chloroplast_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000746
+name: cyanelle_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000747
+name: leucoplast_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000748
+name: proplastid_sequence
+is_a: SO:0000740 ! plastid_sequence
+
+[Term]
+id: SO:0000749
+name: plasmid_sequence
+is_a: SO:0000735 ! sequence_location
+
+[Term]
+id: SO:0000750
+name: amplification_origin
+def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma]
+is_a: SO:0000296 ! origin_of_replication
+
+[Term]
+id: SO:0000751
+name: proviral_sequence
+is_a: SO:0000735 ! sequence_location
+
+[Term]
+id: SO:0000752
+name: gene_group_regulatory_region
+is_a: SO:0005836 ! regulatory_region
+relationship: member_of SO:0005855 ! gene_group
+
+[Term]
+id: SO:0000753
+name: clone_insert
+relationship: part_of SO:0000151 ! clone
+
+[Term]
+id: SO:0000754
+name: lambda_vector
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000160 ! lambda_clone
+
+[Term]
+id: SO:0000755
+name: plasmid_vector
+is_a: SO:0000440 ! vector
+relationship: part_of SO:0000759 ! plasmid_clone
+
+[Term]
+id: SO:0000756
+name: cDNA
+def: "DNA synthesized by reverse transcriptase using RNA as a template" [SO:ma]
+is_a: SO:0000695 ! reagent
+relationship: part_of SO:0000317 ! cDNA_clone
+
+[Term]
+id: SO:0000757
+name: single_stranded_cDNA
+is_a: SO:0000756 ! cDNA
+
+[Term]
+id: SO:0000758
+name: double_stranded_cDNA
+is_a: SO:0000756 ! cDNA
+
+[Term]
+id: SO:0000759
+name: plasmid_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000760
+name: YAC_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000761
+name: phagemid_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000762
+name: PAC_clone
+synonym: "P1_clone" RELATED []
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000763
+name: fosmid_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000764
+name: BAC_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000765
+name: cosmid_clone
+is_a: SO:0000151 ! clone
+
+[Term]
+id: SO:0000766
+name: pyrrolysyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0000767
+name: clone_insert_start
+is_obsolete: true
+
+[Term]
+id: SO:0000768
+name: episome
+def: "A plasmid that may integrate with a chromosome. " [SO:ma]
+is_a: SO:0000155 ! plasmid
+
+[Term]
+id: SO:0000769
+name: tmRNA_coding_piece
+def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw]
+comment: Added in response to comment from Kelly Williams from Indiana.nhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 Nov, 2005
+relationship: part_of SO:0000584 ! tmRNA
+
+[Term]
+id: SO:0000770
+name: tmRNA_acceptor_piece
+def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw]
+comment: Added in response to Kelly Williams from Indiananhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 nov 2005
+relationship: part_of SO:0000584 ! tmRNA
+
+[Term]
+id: SO:0000771
+name: QTL
+def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http:rgd.cbi.pku.edu.cn/tu/qtls/]
+comment: Added in respose to request by Simon Twigger November 14th 2005
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000772
+name: genomic_island
+comment: Genomic islands are transmissible elements characterized by large size (>10kb).
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0000773
+name: pathogenic_island
+def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke]
+comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker
+is_a: SO:0000772 ! genomic_island
+
+[Term]
+id: SO:0000774
+name: metabolic_island
+def: "A transmissible_element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke]
+comment: genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands
+is_a: SO:0000772 ! genomic_island
+
+[Term]
+id: SO:0000775
+name: adaptive_island
+comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands.nNature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker
+is_a: SO:0000772 ! genomic_island
+
+[Term]
+id: SO:0000776
+name: symbiosis_island
+def: "A transmissible_element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke]
+comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands.nnEvolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA genenJohn T. Sullivan and Clive W. RonsonnPNAS 1998 Apr 28 95 (9) 5145-5149n
+is_a: SO:0000772 ! genomic_island
+
+[Term]
+id: SO:0000777
+name: pseudogenic_rRNA
+comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase.
+subset: SOFA
+is_a: SO:0000462 ! pseudogenic_region
+
+[Term]
+id: SO:0000778
+name: pseudogenic_tRNA
+comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase.
+subset: SOFA
+is_a: SO:0000462 ! pseudogenic_region
+
+[Term]
+id: SO:0001044
+name: nuclear_mt_pseudogene
+synonym: "NUMT" RELATED []
+synonym: "nuclear_mitochondrial_pseudogene" RELATED []
+is_a: SO:0000042 ! pseudogene_attribute
+
+[Term]
+id: SO:0005836
+name: regulatory_region
+def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R]
+subset: SOFA
+is_a: SO:0000001 ! region
+relationship: member_of SO:0000704 ! gene
+
+[Term]
+id: SO:0005837
+name: snRNA_4.5S_primary_transcript
+def: "A primary transcript encoding a 4.5S snRNA." [SO:ke]
+synonym: "4.5S_snRNA_primary_transcript" RELATED []
+is_a: SO:0000231 ! snRNA_primary_transcript
+
+[Term]
+id: SO:0005839
+name: snRNA_4.5S
+synonym: "4.5S_snRNA" RELATED []
+is_a: SO:0000274 ! snRNA
+
+[Term]
+id: SO:0005841
+name: methylation_guide_snoRNA
+is_a: SO:0000275 ! snoRNA
+
+[Term]
+id: SO:0005843
+name: rRNA_cleavage_snoRNA
+is_a: SO:0000275 ! snoRNA
+
+[Term]
+id: SO:0005845
+name: single_exon
+is_a: SO:0000147 ! exon
+
+[Term]
+id: SO:0005847
+name: member_of_gene_cassette_array
+is_a: SO:0005848 ! member_of_gene_cassette
+
+[Term]
+id: SO:0005848
+name: member_of_gene_cassette
+is_a: SO:0000081 ! member_gene_array
+
+[Term]
+id: SO:0005849
+name: member_of_gene_subarray
+is_a: SO:0000081 ! member_gene_array
+
+[Term]
+id: SO:0005850
+name: primer_binding_site
+def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html]
+relationship: part_of SO:0000186 ! LTR_retrotransposon
+
+[Term]
+id: SO:0005851
+name: gene_array
+def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma]
+comment: This would include\, for example\, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays.
+is_a: SO:0005855 ! gene_group
+
+[Term]
+id: SO:0005852
+name: gene_subarray
+def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma]
+comment: This would include\, for example\, a cluster of genes encoding different histones.
+is_a: SO:0005851 ! gene_array
+
+[Term]
+id: SO:0005853
+name: gene_cassette
+def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma]
+comment: This would include\, for example\, the mating type gene cassettes of S. cerevisiae.
+is_a: SO:0005855 ! gene_group
+
+[Term]
+id: SO:0005854
+name: gene_cassette_array
+def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma]
+comment: This would include\, for example\, the arrays of non-functional VSG genes of Trypanosomes.
+is_a: SO:0005853 ! gene_cassette
+
+[Term]
+id: SO:0005855
+name: gene_group
+def: "A collection of related genes." [SO:ma]
+subset: SOFA
+is_a: SO:0000001 ! region
+
+[Term]
+id: SO:0005856
+name: selenocysteine_tRNA_primary_transcript
+def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke]
+is_a: SO:0000210 ! tRNA_primary_transcript
+
+[Term]
+id: SO:0005857
+name: selenocysteinyl_tRNA
+is_a: SO:0000253 ! tRNA
+
+[Term]
+id: SO:0005858
+name: syntenic_region
+def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html]
+is_a: SO:0000330 ! conserved_region
+
+[Term]
+id: SO:1000002
+name: substitution
+def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+subset: SOFA
+is_a: SO:0000001 ! region
+is_a: SO:0000109 ! sequence_variant
+relationship: sequence_of SO:0000048 ! substitute
+
+[Term]
+id: SO:1000004
+name: partially_characterised_change_in_DNA_sequence
+def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence
+
+[Term]
+id: SO:1000005
+name: complex_substitution
+def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+subset: SOFA
+is_a: SO:1000002 ! substitution
+
+[Term]
+id: SO:1000007
+name: uncharacterised_change_in_nucleotide_sequence
+def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000002 ! substitution
+
+[Term]
+id: SO:1000008
+name: point_mutation
+def: "A mutation event where a single DNA nucleotide changes into another nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+subset: SOFA
+is_a: SO:1000002 ! substitution
+
+[Term]
+id: SO:1000009
+name: transition
+def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000008 ! point_mutation
+
+[Term]
+id: SO:1000010
+name: pyrimidine_transition
+def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke]
+is_a: SO:1000009 ! transition
+
+[Term]
+id: SO:1000011
+name: C_to_T_transition
+def: "A transition of a cytidine to a thymine." [SO:ke]
+is_a: SO:1000010 ! pyrimidine_transition
+
+[Term]
+id: SO:1000012
+name: C_to_T_transition_at_pCpG_site
+def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000011 ! C_to_T_transition
+
+[Term]
+id: SO:1000013
+name: T_to_C_transition
+is_a: SO:1000010 ! pyrimidine_transition
+
+[Term]
+id: SO:1000014
+name: purine_transition
+def: "A substitution of a purine, A or G, for another purine." [SO:ke]
+is_a: SO:1000009 ! transition
+
+[Term]
+id: SO:1000015
+name: A_to_G_transition
+def: "A transition of an adenine to a guanine." [SO:ke]
+is_a: SO:1000014 ! purine_transition
+
+[Term]
+id: SO:1000016
+name: G_to_A_transition
+def: "A transition of a guanine to an adenine." [SO:ke]
+is_a: SO:1000014 ! purine_transition
+
+[Term]
+id: SO:1000017
+name: transversion
+def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000008 ! point_mutation
+
+[Term]
+id: SO:1000018
+name: pyrimidine_to_purine_transversion
+def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke]
+is_a: SO:1000017 ! transversion
+
+[Term]
+id: SO:1000019
+name: C_to_A_transversion
+def: "A transversion from cytidine to adenine." [SO:ke]
+is_a: SO:1000018 ! pyrimidine_to_purine_transversion
+
+[Term]
+id: SO:1000020
+name: C_to_G_transversion
+is_a: SO:1000018 ! pyrimidine_to_purine_transversion
+
+[Term]
+id: SO:1000021
+name: T_to_A_transversion
+def: "A transversion from T to A." [SO:ke]
+is_a: SO:1000018 ! pyrimidine_to_purine_transversion
+
+[Term]
+id: SO:1000022
+name: T_to_G_transversion
+def: "A transversion from T to G." [SO:ke]
+is_a: SO:1000018 ! pyrimidine_to_purine_transversion
+
+[Term]
+id: SO:1000023
+name: purine_to_pyrimidine_transversion
+def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke]
+is_a: SO:1000017 ! transversion
+
+[Term]
+id: SO:1000024
+name: A_to_C_transversion
+def: "A transversion from adenine to cytidine." [SO:ke]
+is_a: SO:1000023 ! purine_to_pyrimidine_transversion
+
+[Term]
+id: SO:1000025
+name: A_to_T_transversion
+def: "A transversion from adenine to thymine." [SO:ke]
+is_a: SO:1000023 ! purine_to_pyrimidine_transversion
+
+[Term]
+id: SO:1000026
+name: G_to_C_transversion
+def: "A transversion from guanine to cytidine." [SO:ke]
+is_a: SO:1000023 ! purine_to_pyrimidine_transversion
+
+[Term]
+id: SO:1000027
+name: G_to_T_transversion
+def: "A transversion from guanine to thymine." [SO:ke]
+is_a: SO:1000023 ! purine_to_pyrimidine_transversion
+
+[Term]
+id: SO:1000028
+name: intrachromosomal_mutation
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000029
+name: chromosomal_deletion
+synonym: "(Drosophila)Df" RELATED []
+synonym: "(bacteria)&ampDgr;" RELATED []
+synonym: "(fungi)D" RELATED []
+is_a: SO:0000550 ! aneuploid_chromosome
+is_a: SO:1000028 ! intrachromosomal_mutation
+
+[Term]
+id: SO:1000030
+name: chromosomal_inversion
+synonym: "(Drosophila)In" RELATED []
+synonym: "(bacteria)IN" RELATED []
+synonym: "(fungi)In" RELATED []
+is_a: SO:1000028 ! intrachromosomal_mutation
+
+[Term]
+id: SO:1000031
+name: interchromosomal_mutation
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000032
+name: indel
+def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:0000109 ! sequence_variant
+
+[Term]
+id: SO:1000033
+name: nucleotide_deletion
+def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000032 ! indel
+
+[Term]
+id: SO:1000034
+name: nucleotide_insertion
+def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000032 ! indel
+
+[Term]
+id: SO:1000035
+name: nucleotide_duplication
+def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000034 ! nucleotide_insertion
+
+[Term]
+id: SO:1000036
+name: inversion
+def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+subset: SOFA
+is_a: SO:0000001 ! region
+is_a: SO:0000109 ! sequence_variant
+relationship: sequence_of SO:0000047 ! invert
+
+[Term]
+id: SO:1000037
+name: chromosomal_duplication
+synonym: "(Drosophila)Dp" RELATED []
+synonym: "(fungi)Dp" RELATED []
+is_a: SO:0000550 ! aneuploid_chromosome
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000038
+name: intrachromosomal_duplication
+is_a: SO:1000028 ! intrachromosomal_mutation
+is_a: SO:1000037 ! chromosomal_duplication
+
+[Term]
+id: SO:1000039
+name: direct_tandem_duplication
+is_a: SO:1000173 ! tandem_duplication
+
+[Term]
+id: SO:1000040
+name: inverted_tandem_duplication
+is_a: SO:1000173 ! tandem_duplication
+
+[Term]
+id: SO:1000041
+name: intrachromosomal_transposition
+synonym: "(Drosophila)Tp" RELATED []
+is_a: SO:0000453 ! transposition
+is_a: SO:1000038 ! intrachromosomal_duplication
+
+[Term]
+id: SO:1000042
+name: compound_chromosome
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000043
+name: Robertsonian_fusion
+is_a: SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:1000044
+name: chromosomal_translocation
+synonym: "(Drosophila)T" RELATED []
+synonym: "(fungi)T" RELATED []
+is_a: SO:1000031 ! interchromosomal_mutation
+
+[Term]
+id: SO:1000045
+name: ring_chromosome
+synonym: "(Drosophila)R" RELATED []
+synonym: "(fungi)C" RELATED []
+is_a: SO:1000028 ! intrachromosomal_mutation
+
+[Term]
+id: SO:1000046
+name: pericentric_inversion
+is_a: SO:1000030 ! chromosomal_inversion
+
+[Term]
+id: SO:1000047
+name: paracentric_inversion
+is_a: SO:1000030 ! chromosomal_inversion
+
+[Term]
+id: SO:1000048
+name: reciprocal_chromosomal_translocation
+is_a: SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:1000049
+name: mutation_affecting_transcript
+def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000132 ! consequences_of_mutation
+
+[Term]
+id: SO:1000050
+name: no_change_in_transcript
+def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000052
+name: complex_change_in_transcript
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000054
+name: mutation_affecting_coding_sequence
+def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000079 ! mutation_affecting_transcript_sequence
+
+[Term]
+id: SO:1000055
+name: initiator_codon_change_in_transcript
+def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript
+
+[Term]
+id: SO:1000056
+name: amino_acid_coding_codon_change_in_transcript
+def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000054 ! mutation_affecting_coding_sequence
+
+[Term]
+id: SO:1000057
+name: synonymous_codon_change_in_transcript
+def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript
+
+[Term]
+id: SO:1000058
+name: non_synonymous_codon_change_in_transcript
+def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+synonym: "non-synonymous_codon_change_in_transcript" RELATED []
+is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript
+
+[Term]
+id: SO:1000059
+name: missense_codon_change_in_transcript
+def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000058 ! non_synonymous_codon_change_in_transcript
+
+[Term]
+id: SO:1000060
+name: conservative_missense_codon_change_in_transcript
+def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+comment: The exact rules need to be stated\, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix.
+is_a: SO:1000059 ! missense_codon_change_in_transcript
+
+[Term]
+id: SO:1000061
+name: nonconservative_missense_codon_change_in_transcript
+def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+comment: The exact rules need to be stated\, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix.
+is_a: SO:1000059 ! missense_codon_change_in_transcript
+
+[Term]
+id: SO:1000062
+name: nonsense_codon_change_in_transcript
+def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript
+
+[Term]
+id: SO:1000063
+name: terminator_codon_change_in_transcript
+is_a: SO:1000054 ! mutation_affecting_coding_sequence
+
+[Term]
+id: SO:1000064
+name: mutation_affecting_reading_frame
+def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000054 ! mutation_affecting_coding_sequence
+
+[Term]
+id: SO:1000065
+name: frameshift_mutation
+def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000064 ! mutation_affecting_reading_frame
+
+[Term]
+id: SO:1000066
+name: plus_1_frameshift_mutation
+is_a: SO:1000065 ! frameshift_mutation
+
+[Term]
+id: SO:1000067
+name: minus_1_frameshift_mutation
+is_a: SO:1000065 ! frameshift_mutation
+
+[Term]
+id: SO:1000068
+name: plus_2_frameshift_mutation
+is_a: SO:1000065 ! frameshift_mutation
+
+[Term]
+id: SO:1000069
+name: minus_2_frameshift_mutation
+is_a: SO:1000065 ! frameshift_mutation
+
+[Term]
+id: SO:1000070
+name: mutation_affecting_transcript_processing
+def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000079 ! mutation_affecting_transcript_sequence
+
+[Term]
+id: SO:1000071
+name: mutation_affecting_splicing
+def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000132 ! consequences_of_mutation
+
+[Term]
+id: SO:1000072
+name: splice_donor_mutation
+is_a: SO:1000071 ! mutation_affecting_splicing
+is_a: SO:1000074 ! cryptic_splice_activator_mutation
+
+[Term]
+id: SO:1000073
+name: splice_acceptor_mutation
+is_a: SO:1000071 ! mutation_affecting_splicing
+
+[Term]
+id: SO:1000074
+name: cryptic_splice_activator_mutation
+def: "Mutation creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000071 ! mutation_affecting_splicing
+
+[Term]
+id: SO:1000075
+name: mutation_affecting_editing
+def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000070 ! mutation_affecting_transcript_processing
+
+[Term]
+id: SO:1000076
+name: mutation_affecting_transcription
+def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000078
+name: mutation_decreasing_rate_of_transcription
+is_a: SO:1000081 ! mutation_affecting_rate_of_transcription
+
+[Term]
+id: SO:1000079
+name: mutation_affecting_transcript_sequence
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000080
+name: mutation_increasing_rate_of_transcription
+is_a: SO:1000081 ! mutation_affecting_rate_of_transcription
+
+[Term]
+id: SO:1000081
+name: mutation_affecting_rate_of_transcription
+is_a: SO:1000076 ! mutation_affecting_transcription
+
+[Term]
+id: SO:1000082
+name: mutation_affecting_transcript_stability
+def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000079 ! mutation_affecting_transcript_sequence
+
+[Term]
+id: SO:1000083
+name: mutation_increasing_transcript_stability
+def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000082 ! mutation_affecting_transcript_stability
+
+[Term]
+id: SO:1000084
+name: mutation_decreasing_transcript_stability
+def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000082 ! mutation_affecting_transcript_stability
+
+[Term]
+id: SO:1000085
+name: mutation_affecting_level_of_transcript
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000086
+name: mutation_decreasing_level_of_transcript
+is_a: SO:1000085 ! mutation_affecting_level_of_transcript
+
+[Term]
+id: SO:1000087
+name: mutation_increasing_level_of_transcript
+is_a: SO:1000085 ! mutation_affecting_level_of_transcript
+
+[Term]
+id: SO:1000088
+name: mutation_affecting_translational_product
+def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000132 ! consequences_of_mutation
+
+[Term]
+id: SO:1000089
+name: no_change_of_translational_product
+def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000090
+name: uncharacterised_change_of_translational_product
+def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000091
+name: partially_characterised_change_of_translational_product
+def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000090 ! uncharacterised_change_of_translational_product
+
+[Term]
+id: SO:1000092
+name: complex_change_of_translational_product
+def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000093
+name: amino_acid_substitution
+def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000094
+name: conservative_amino_acid_substitution
+is_a: SO:1000093 ! amino_acid_substitution
+
+[Term]
+id: SO:1000095
+name: nonconservative_amino_acid_substitution
+is_a: SO:1000093 ! amino_acid_substitution
+
+[Term]
+id: SO:1000096
+name: amino_acid_insertion
+def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000097
+name: amino_acid_deletion
+def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000098
+name: polypeptide_truncation
+def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000099
+name: polypeptide_elongation
+def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000100
+name: polypeptide_N_terminal_elongation
+def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+synonym: "polypeptide_N-terminal_elongation" RELATED []
+is_a: SO:1000099 ! polypeptide_elongation
+
+[Term]
+id: SO:1000101
+name: polypeptide_C_terminal_elongation
+def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+synonym: "polypeptide_C-terminal_elongation" RELATED []
+is_a: SO:1000099 ! polypeptide_elongation
+
+[Term]
+id: SO:1000102
+name: mutation_affecting_level_of_translational_product
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000103
+name: mutation_decreasing_level_of_translation_product
+is_a: SO:1000102 ! mutation_affecting_level_of_translational_product
+
+[Term]
+id: SO:1000104
+name: mutation_increasing_level_of_translation_product
+is_a: SO:1000102 ! mutation_affecting_level_of_translational_product
+
+[Term]
+id: SO:1000105
+name: mutation_affecting_polypeptide_amino_acid_sequence
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000106
+name: inframe_polypeptide_N_terminal_elongation
+synonym: "inframe_polypeptide_N-terminal_elongation" RELATED []
+is_a: SO:1000100 ! polypeptide_N_terminal_elongation
+
+[Term]
+id: SO:1000107
+name: out_of_frame_polypeptide_N_terminal_elongation
+synonym: "out_of_frame_polypeptide_N-terminal_elongation" RELATED []
+is_a: SO:1000100 ! polypeptide_N_terminal_elongation
+
+[Term]
+id: SO:1000108
+name: inframe_polypeptide_C_terminal_elongation
+synonym: "inframe_polypeptide_C-terminal_elongation" RELATED []
+is_a: SO:1000101 ! polypeptide_C_terminal_elongation
+
+[Term]
+id: SO:1000109
+name: out_of_frame_polypeptide_C_terminal_elongation
+synonym: "out_of_frame_polypeptide_C-terminal_elongation" RELATED []
+is_a: SO:1000101 ! polypeptide_C_terminal_elongation
+
+[Term]
+id: SO:1000110
+name: frame_restoring_mutation
+is_a: SO:1000065 ! frameshift_mutation
+
+[Term]
+id: SO:1000111
+name: mutation_affecting_3D_structure_of_polypeptide
+synonym: "mutation_affecting_3D-structure_of_polypeptide" RELATED []
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000112
+name: no_3D_structural_change
+is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide
+
+[Term]
+id: SO:1000113
+name: uncharacterised_3D_structural_change
+is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide
+
+[Term]
+id: SO:1000114
+name: partially_characterised_3D_structural_change
+is_a: SO:1000113 ! uncharacterised_3D_structural_change
+
+[Term]
+id: SO:1000115
+name: complex_3D_structural_change
+is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide
+
+[Term]
+id: SO:1000116
+name: conformational_change
+is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide
+
+[Term]
+id: SO:1000117
+name: mutation_affecting_polypeptide_function
+is_a: SO:1000088 ! mutation_affecting_translational_product
+
+[Term]
+id: SO:1000118
+name: loss_of_function_of_polypeptide
+synonym: "loss-of-function_of_polypeptide" RELATED []
+is_a: SO:1000117 ! mutation_affecting_polypeptide_function
+
+[Term]
+id: SO:1000119
+name: inactive_ligand_binding_site
+is_a: SO:1000118 ! loss_of_function_of_polypeptide
+
+[Term]
+id: SO:1000120
+name: inactive_catalytic_site
+is_a: SO:1000119 ! inactive_ligand_binding_site
+
+[Term]
+id: SO:1000121
+name: polypeptide_localization_affected
+is_a: SO:1000117 ! mutation_affecting_polypeptide_function
+
+[Term]
+id: SO:1000122
+name: polypeptide_post_translational_processing_affected
+synonym: "polypeptide_post-translational_processing_affected" RELATED []
+is_a: SO:1000117 ! mutation_affecting_polypeptide_function
+is_a: SO:1000118 ! loss_of_function_of_polypeptide
+
+[Term]
+id: SO:1000123
+name: polypeptide_post_translational_processing_affected
+synonym: "polypeptide_post-translational_processing_affected" RELATED []
+is_obsolete: true
+
+[Term]
+id: SO:1000124
+name: partial_loss_of_function_of_polypeptide
+synonym: "partial_loss-of-function_of_polypeptide" RELATED []
+is_a: SO:1000118 ! loss_of_function_of_polypeptide
+
+[Term]
+id: SO:1000125
+name: gain_of_function_of_polypeptide
+synonym: "gain-of-function_of_polypeptide" RELATED []
+is_a: SO:1000117 ! mutation_affecting_polypeptide_function
+
+[Term]
+id: SO:1000126
+name: mutation_affecting_transcript_secondary_structure
+is_a: SO:1000079 ! mutation_affecting_transcript_sequence
+
+[Term]
+id: SO:1000127
+name: compensatory_transcript_secondary_structure_mutation
+is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure
+
+[Term]
+id: SO:1000132
+name: consequences_of_mutation
+is_a: SO:0000000 ! Sequence_Ontology
+
+[Term]
+id: SO:1000134
+name: polypeptide_fusion
+is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence
+
+[Term]
+id: SO:1000136
+name: autosynaptic_chromosome
+synonym: "(Drosophila)A" RELATED []
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000138
+name: homo_compound_chromosome
+synonym: "homo-compound_chromosome" RELATED []
+is_a: SO:1000042 ! compound_chromosome
+
+[Term]
+id: SO:1000140
+name: hetero_compound_chromosome
+synonym: "hetero-compound_chromosome" RELATED []
+is_a: SO:1000042 ! compound_chromosome
+
+[Term]
+id: SO:1000141
+name: chromosome_fission
+is_a: SO:1000028 ! intrachromosomal_mutation
+
+[Term]
+id: SO:1000142
+name: dexstrosynaptic_chromosome
+is_a: SO:1000136 ! autosynaptic_chromosome
+
+[Term]
+id: SO:1000143
+name: laevosynaptic_chromosome
+is_a: SO:1000136 ! autosynaptic_chromosome
+
+[Term]
+id: SO:1000144
+name: free_duplication
+is_a: SO:1000037 ! chromosomal_duplication
+
+[Term]
+id: SO:1000145
+name: free_ring_duplication
+synonym: "(Drosophila)R" RELATED []
+is_a: SO:1000045 ! ring_chromosome
+is_a: SO:1000144 ! free_duplication
+
+[Term]
+id: SO:1000146
+name: complex_chromosomal_mutation
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000147
+name: deficient_translocation
+def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual]
+synonym: "(Drosophila)Df" RELATED []
+synonym: "(Drosophila)DfT" RELATED []
+is_a: SO:1000029 ! chromosomal_deletion
+is_a: SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:1000148
+name: inversion_cum_translocation
+def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual]
+synonym: "(Drosophila)InT" RELATED []
+synonym: "(Drosophila)T" RELATED []
+synonym: "inversion-cum-translocation" RELATED []
+is_a: SO:1000030 ! chromosomal_inversion
+is_a: SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:1000149
+name: bipartite_duplication
+def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual]
+synonym: "(Drosophila)bDp" RELATED []
+is_a: SO:1000031 ! interchromosomal_mutation
+
+[Term]
+id: SO:1000150
+name: cyclic_translocation
+def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual]
+is_a: SO:1000044 ! chromosomal_translocation
+
+[Term]
+id: SO:1000151
+name: bipartite_inversion
+def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual]
+synonym: "(Drosophila)bIn" RELATED []
+is_a: SO:1000030 ! chromosomal_inversion
+
+[Term]
+id: SO:1000152
+name: uninverted_insertional_duplication
+def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual]
+synonym: "(Drosophila)eDp" RELATED []
+is_a: SO:1000154 ! insertional_duplication
+
+[Term]
+id: SO:1000153
+name: inverted_insertional_duplication
+def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual]
+synonym: "(Drosophila)iDp" RELATED []
+is_a: SO:1000154 ! insertional_duplication
+
+[Term]
+id: SO:1000154
+name: insertional_duplication
+synonym: "(Drosophila)Dpp" RELATED []
+is_a: SO:1000037 ! chromosomal_duplication
+
+[Term]
+id: SO:1000155
+name: interchromosomal_transposition
+synonym: "(Drosophila)Tp" RELATED []
+is_a: SO:0000453 ! transposition
+is_a: SO:1000031 ! interchromosomal_mutation
+
+[Term]
+id: SO:1000156
+name: inverted_interchromosomal_transposition
+synonym: "(Drosophila)iTp" RELATED []
+is_a: SO:1000155 ! interchromosomal_transposition
+
+[Term]
+id: SO:1000157
+name: uninverted_interchromosomal_transposition
+synonym: "(Drosophila)eTp" RELATED []
+is_a: SO:1000155 ! interchromosomal_transposition
+
+[Term]
+id: SO:1000158
+name: inverted_intrachromosomal_transposition
+def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual]
+synonym: "(Drosophila)iTp" RELATED []
+is_a: SO:1000030 ! chromosomal_inversion
+is_a: SO:1000041 ! intrachromosomal_transposition
+
+[Term]
+id: SO:1000159
+name: uninverted_intrachromosomal_transposition
+def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual]
+synonym: "(Drosophila)eTp" RELATED []
+is_a: SO:1000041 ! intrachromosomal_transposition
+
+[Term]
+id: SO:1000160
+name: unoriented_insertional_duplication
+def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual]
+synonym: "(Drosophila)uDp" RELATED []
+is_a: SO:1000154 ! insertional_duplication
+
+[Term]
+id: SO:1000161
+name: unorientated_interchromosomal_transposition
+synonym: "(Drosophila)uTp" RELATED []
+is_a: SO:1000155 ! interchromosomal_transposition
+
+[Term]
+id: SO:1000162
+name: unorientated_intrachromosomal_transposition
+def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual]
+synonym: "(Drosophila)uTp" RELATED []
+is_a: SO:1000041 ! intrachromosomal_transposition
+
+[Term]
+id: SO:1000170
+name: uncharacterised_chromosomal_mutation
+is_a: SO:1000183 ! chromosome_structure_variation
+
+[Term]
+id: SO:1000171
+name: deficient_inversion
+def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual]
+synonym: "(Drosophila)Df" RELATED []
+synonym: "(Drosophila)DfIn" RELATED []
+is_a: SO:1000029 ! chromosomal_deletion
+is_a: SO:1000030 ! chromosomal_inversion
+
+[Term]
+id: SO:1000173
+name: tandem_duplication
+is_a: SO:1000038 ! intrachromosomal_duplication
+
+[Term]
+id: SO:1000175
+name: partially_characterised_chromosomal_mutation
+is_a: SO:1000170 ! uncharacterised_chromosomal_mutation
+
+[Term]
+id: SO:1000177
+name: uncharacterised_change_in_transcript
+def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000049 ! mutation_affecting_transcript
+
+[Term]
+id: SO:1000179
+name: partially_characterised_change_in_transcript
+def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000177 ! uncharacterised_change_in_transcript
+
+[Term]
+id: SO:1000180
+name: mutation_affecting_gene_structure
+is_a: SO:1000132 ! consequences_of_mutation
+
+[Term]
+id: SO:1000181
+name: gene_fusion
+is_a: SO:1000180 ! mutation_affecting_gene_structure
+
+[Term]
+id: SO:1000182
+name: chromosome_number_variation
+is_a: SO:0000240 ! chromosome_variation
+
+[Term]
+id: SO:1000183
+name: chromosome_structure_variation
+is_a: SO:0000240 ! chromosome_variation
+
+[Term]
+id: SO:1000184
+name: mutation_causes_exon_loss
+is_a: SO:1000071 ! mutation_affecting_splicing
+
+[Term]
+id: SO:1000185
+name: mutation_causes_intron_gain
+def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html]
+is_a: SO:1000071 ! mutation_affecting_splicing
+
+[Term]
+id: SO:1000186
+name: cryptic_splice_donor_activation
+is_a: SO:1000074 ! cryptic_splice_activator_mutation
+
+[Term]
+id: SO:1001186
+name: cryptic_splice_acceptor_activation
+is_a: SO:1000074 ! cryptic_splice_activator_mutation
+
+[Term]
+id: SO:1001187
+name: alternatively_spliced_transcript
+is_a: SO:0000115 ! transcript_feature
+
+[Term]
+id: SO:1001188
+name: alternatively_spliced_transcript_encoding_1_polypeptide
+is_a: SO:1001187 ! alternatively_spliced_transcript
+
+[Term]
+id: SO:1001189
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+is_a: SO:1001187 ! alternatively_spliced_transcript
+
+[Term]
+id: SO:1001190
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_same_start_codon_different_stop_codon
+is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping
+
+[Term]
+id: SO:1001191
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_same_stop_codon
+is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping
+
+[Term]
+id: SO:1001192
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon
+is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+
+[Term]
+id: SO:1001193
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_overlapping
+is_a: SO:1001192 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon
+is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping
+
+[Term]
+id: SO:1001194
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping
+is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+
+[Term]
+id: SO:1001195
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_not_overlapping
+is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide
+
+[Term]
+id: SO:1001196
+name: cryptogene
+is_a: SO:0000011 ! non_protein_coding_gene
+
+[Term]
+id: SO:1001197
+name: dicistronic_primary_transcript
+is_a: SO:0000079 ! dicistronic_transcript
+
+[Term]
+id: SO:1001217
+name: member_of_regulon
+is_a: SO:0000081 ! member_gene_array
+
+[Term]
+id: SO:1001244
+name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping
+synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED []
+is_a: SO:1001192 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon
+is_a: SO:1001195 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_not_overlapping
+
+[Term]
+id: SO:1001246
+name: CDS_independently_known
+is_a: SO:1001255 ! status_of_coding_sequence
+
+[Term]
+id: SO:1001247
+name: orphan_CDS
+def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [MA:SO]
+is_a: SO:1001254 ! CDS_predicted
+
+[Term]
+id: SO:1001249
+name: CDS_supported_by_domain_match_data
+is_a: SO:1001254 ! CDS_predicted
+
+[Term]
+id: SO:1001251
+name: CDS_supported_by_sequence_similarity_data
+is_a: SO:1001254 ! CDS_predicted
+
+[Term]
+id: SO:1001254
+name: CDS_predicted
+is_a: SO:1001255 ! status_of_coding_sequence
+
+[Term]
+id: SO:1001255
+name: status_of_coding_sequence
+is_a: SO:0000400 ! sequence_attribute
+
+[Term]
+id: SO:1001259
+name: CDS_supported_by_EST_or_cDNA_data
+is_a: SO:1001254 ! CDS_predicted
+
+[Term]
+id: SO:1001260
+name: internal_Shine_Dalgarno_sequence
+def: "A Shine Delgarno sequence that is upstream of a non-5' CDS in a polycistronic mRNA." [SO:ke]
+is_a: SO:0000243 ! internal_ribosome_entry_site
+is_a: SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:1001261
+name: recoded_mRNA
+def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract]
+is_a: SO:0000115 ! transcript_feature
+
+[Term]
+id: SO:1001262
+name: minus_1_translational_frameshift
+is_a: SO:0000118 ! transcript_with_translational_frameshift
+
+[Term]
+id: SO:1001263
+name: plus_1_translational_frameshift
+is_a: SO:0000118 ! transcript_with_translational_frameshift
+
+[Term]
+id: SO:1001264
+name: mRNA_recoded_by_translational_bypass
+def: "A gene whose mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract]
+is_a: SO:1001261 ! recoded_mRNA
+
+[Term]
+id: SO:1001265
+name: mRNA_recoded_by_codon_redefinition
+def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma]
+is_a: SO:1001261 ! recoded_mRNA
+
+[Term]
+id: SO:1001266
+name: stop_codon_redefinition_as_selenocysteine
+is_a: SO:1001267 ! stop_codon_readthrough
+
+[Term]
+id: SO:1001267
+name: stop_codon_readthrough
+is_a: SO:1001265 ! mRNA_recoded_by_codon_redefinition
+
+[Term]
+id: SO:1001268
+name: recoding_stimulatory_region
+def: "A site in an mRNA sequence that stimulates the recoding of the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract]
+synonym: "recoding_stimulatory_signal" RELATED []
+relationship: part_of SO:0000234 ! mRNA
+
+[Term]
+id: SO:1001269
+name: four_bp_start_codon
+def: "A non-canonical start codon with 4 pase pairs." [SO:ke]
+synonym: "4bp_start_codon" RELATED []
+is_a: SO:0000680 ! non_canonical_start_codon
+
+[Term]
+id: SO:1001270
+name: stop_codon_redefinition_as_pyrrolysine
+is_a: SO:1001267 ! stop_codon_readthrough
+
+[Term]
+id: SO:1001271
+name: archeal_intron
+def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma]
+is_a: SO:0000661 ! intron_attribute
+
+[Term]
+id: SO:1001272
+name: tRNA_intron
+is_a: SO:0000661 ! intron_attribute
+
+[Term]
+id: SO:1001273
+name: CTG_start_codon
+def: "A non-canonical start codon of sequence CTG." [SO:ke]
+is_a: SO:0000680 ! non_canonical_start_codon
+
+[Term]
+id: SO:1001274
+name: SECIS_element
+def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031]
+is_a: SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:1001275
+name: retron
+def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma]
+is_a: SO:0000009 ! gene_class
+
+[Term]
+id: SO:1001277
+name: three_prime_recoding_site
+is_a: SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:1001279
+name: three_prime_stem_loop_structure
+def: "The stem-loop secondary structural element downstream of the redefined region." [SO:ke]
+is_a: SO:1001277 ! three_prime_recoding_site
+
+[Term]
+id: SO:1001280
+name: five_prime_recoding_site
+def: "The recoding signal found 5' of the redefined codon." [SO:ke]
+is_a: SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:1001281
+name: flanking_three_prime_quadruplet_recoding_signal
+def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [SO:ke]
+is_a: SO:1001277 ! three_prime_recoding_site
+
+[Term]
+id: SO:1001282
+name: UAG_stop_codon_signal
+is_a: SO:1001288 ! stop_codon_signal
+
+[Term]
+id: SO:1001283
+name: UAA_stop_codon_signal
+is_a: SO:1001288 ! stop_codon_signal
+
+[Term]
+id: SO:1001284
+name: regulon
+def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732]
+subset: SOFA
+is_a: SO:0005855 ! gene_group
+
+[Term]
+id: SO:1001285
+name: UGA_stop_codon_signal
+is_a: SO:1001288 ! stop_codon_signal
+
+[Term]
+id: SO:1001286
+name: three_prime_repeat_recoding_signal
+def: "It is a downstream sequence important for recoding that contains repetitive elements." [SO:ke]
+is_a: SO:1001277 ! three_prime_recoding_site
+
+[Term]
+id: SO:1001287
+name: distant_three_prime_recoding_signal
+def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract]
+is_a: SO:1001277 ! three_prime_recoding_site
+
+[Term]
+id: SO:1001288
+name: stop_codon_signal
+is_a: SO:1001268 ! recoding_stimulatory_region
+
+[Term]
+id: SO:2000061
+name: databank_entry
+def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke]
+subset: SOFA
+synonym: "accession" RELATED []
+is_a: SO:0000695 ! reagent
+
+[Typedef]
+id: adjacent_to
+name: adjacent_to
+def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke]
+subset: SOFA
+domain: SO:0000110 ! located_sequence_feature
+range: SO:0000110 ! located_sequence_feature
+is_symmetric: true
+
+[Typedef]
+id: associated_with
+name: associated_with
+comment: This relationship is vague and up for discussion.
+is_symmetric: true
+
+[Typedef]
+id: derives_from
+name: derives_from
+subset: SOFA
+is_transitive: true
+
+[Typedef]
+id: genome_of
+name: genome_of
+
+[Typedef]
+id: has_genome_location
+name: has_genome_location
+domain: SO:0000085 ! gene_by_genome_location
+range: SO:0000704 ! gene
+is_obsolete: true
+
+[Typedef]
+id: homologous_to
+name: homologous_to
+is_symmetric: true
+is_a: similar_to ! similar_to
+
+[Typedef]
+id: member_of
+name: member_of
+comment: A subtype of part_of.ninverse is collection_of.nWinston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444.
+subset: SOFA
+is_a: part_of ! part_of
+
+[Typedef]
+id: non_functional_homolog_of
+name: non_functional_homolog_of
+def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke]
+is_a: homologous_to ! homologous_to
+
+[Typedef]
+id: orthologous_to
+name: orthologous_to
+is_symmetric: true
+is_a: homologous_to ! homologous_to
+
+[Typedef]
+id: paralogous_to
+name: paralogous_to
+is_symmetric: true
+is_a: homologous_to ! homologous_to
+
+[Typedef]
+id: part_of
+name: part_of
+subset: SOFA
+is_transitive: true
+
+[Typedef]
+id: position_of
+name: position_of
+
+[Typedef]
+id: regulated_by
+name: regulated_by
+is_obsolete: true
+
+[Typedef]
+id: sequence_of
+name: sequence_of
+
+[Typedef]
+id: similar_to
+name: similar_to
+is_symmetric: true
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sofa.ontology
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sofa.ontology	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sofa.ontology	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+!autogenerated-by:     DAG-Edit version 1.316
+!saved-by:             suzi
+!date:                 Wed Feb 19 16:38:05 SGT 2003
+!version: $Revision: 1.1 $
+!type: % ISA Is a
+!type: < PARTOF Part of
+!Sequence_ontology_Lite_Version
+!This is only for comment; not for implementation
+!Comments to: song-devel at sourceforge.net
+$Sequence_Feature_Ontology ; SO:0000000
+ %sofa ; GO:0000001
+  %feature ; SO:20000000
+   %chromosome ; GO:0000005
+    <centromere ; GO:0000004
+    <telomere ; GO:0000003
+   %gene ; SO:0000704
+    <regulatory_region ; SO:0005836
+     %enhancer ; SO:0000165
+     %TF_binding_site ; SO:0000235 ; synonym:transcription_factor_binding_site % nucleotide_motif ; SO:0000714
+    <transcript ; SO:0000673
+     %primary_transcript ; SO:0000185 ; synonym:precursor_RNA
+      <exon ; SO:0000147
+      <intron ; SO:0000188
+      %noncoding_primary_transcript ; SO:0000483
+       %micro_RNA_primary_transcript ; SO:0000647
+       %transfer_RNA_primary_transcript ; SO:0000210
+      <splice_site ; SO:0000162
+       %splice_acceptor ; SO:0000164 ; synonym:acceptor_splice_site
+        %transsplice_acceptor_site ; SO:0000706
+       %splice_donor ; SO:0000163 ; synonym:donor_splice_site
+      <transcription_start_site ; SO:0000315
+     %processed_transcript ; SO:0000233
+      <exon_junction ; SO:0000333
+      %mRNA ; SO:0000234 ; synonym:messenger_RNA
+       <coding_sequence ; SO:0000316
+        <coding_end ; SO:0000327
+        <coding_start ; SO:0000323
+       <untranslated_region ; SO:0000203 ; synonym:UTR
+        %five_prime_untranslated_region ; SO:0000204 ; synonym:5'-UTR
+        %three_prime_untranslated_region ; SO:0000205 ; synonym:3'-UTR
+      %ncRNA ; SO:0000655 ; synonym:noncoding_RNA
+       %miRNA ; SO:0000276 ; synonym:micro_RNA
+       %rRNA ; SO:0000252 ; synonym:ribosomal_RNA
+       %tRNA ; SO:0000253 ; synonym:transfer_RNA
+      <polyA_site ; SO:0000553
+   %match ; SO:0000343
+    %nucleotide_to_nucleotide_match ; SO:0000347
+     %cross_genome_match ; SO:0000177
+     %expressed_sequence_match ; GO:0000007
+      %cDNA_match ; SO:0000689 % RNAi_reagent ; GO:0000009
+      %EST_match ; SO:0000668
+     %translated_nucleotide_match ; SO:0000181
+    %nucleotide_to_protein_match ; SO:0000351
+   %nucleotide_motif ; SO:0000714
+    %CpG_island ; SO:0000307
+    %TF_binding_site ; SO:0000235 ; synonym:transcription_factor_binding_site % regulatory_region ; SO:0005836
+   %origin_of_replication ; SO:0000296
+   %pseudogene_region ; SO:0000336
+   %reagent ; SO:0000695
+    %assembly_component ; SO:0000143
+     %contig ; SO:0000149
+     %golden_path_region ; SO:0000688
+    %clone ; SO:0000151
+     %cDNA_clone ; GO:0000011
+     <clone_end ; SO:0000103
+     %genomic_clone ; GO:0000012
+    %databank_entry ; SO:2000061
+    %oligonucleotide ; SO:0000696 ; synonym:primer % RNAi_reagent ; GO:0000009
+    %pcr_product ; SO:0000006 ; synonym:amplicon
+     %STS ; GO:0000008 ; synonym:sequence_tag_site
+   %remark ; SO:0000700
+    %experimental_reagent_region ; SO:0000703
+     %RNAi_reagent ; GO:0000009
+      %cDNA_match ; SO:0000689 % expressed_sequence_match ; GO:0000007
+      %oligonucleotide ; SO:0000696 ; synonym:primer % reagent ; SO:0000695
+    %potential_sequencing_error ; SO:0000701
+   %repeat_region ; SO:0000657
+    %direct_repeat ; SO:0000314
+    %dispersed_repeat ; SO:0000658
+    %inverted_repeat ; SO:0000294
+    %repeat_family ; SO:0000187
+     %transposable_element ; SO:0000101
+      %DNA_transposon ; SO:0000182
+      %retrotransposon ; SO:0000180
+       %LTR_retrotransposon ; SO:0000186
+       %non_LTR_retrotransposon ; SO:0000189
+        %LINE_element ; SO:0000194
+        %SINE_element ; SO:0000206
+    %tandem_repeat ; SO:0000705
+     %microsatellite ; SO:0000289
+   %sequence_variant ; SO:0000109
+    %deletion ; SO:0000159
+     <deletion_junction ; SO:0000687
+    %insertion ; SO:0000667
+    %inversion ; SO:0000697
+     <inversion_junction ; SO:0000692
+    %substitution ; SO:1000002
+    %translocation_junction ; SO:0000691

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sparsealn.needle
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sparsealn.needle	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sparsealn.needle	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+########################################
+# Program:  needle
+# Rundate:  Wed Mar 13 14:01:53 2002
+# Report_file: out.water
+########################################
+#=======================================
+#
+# Aligned_sequences: 2
+# 1: KV1K_HUMAN
+# 2: IF1Y_HUMAN
+# Matrix: EBLOSUM62
+# Gap_penalty: 10.0
+# Extend_penalty: 0.5
+#
+# Length: 238
+# Identity:       5/238 ( 2.1%)
+# Similarity:     9/238 ( 3.8%)
+# Gaps:         225/238 (94.5%)
+# Score: 18.0
+# 
+#
+#=======================================
+
+KV1K_HUMAN         1 DIQMTQSPSTLSVSVGDRVTITCEASQTVLSYLNWYQQKPGKAPKLLIYA     50
+                                                                       
+IF1Y_HUMAN         1                                                         0
+
+KV1K_HUMAN        51 ASSLETGVPSRFSGQGSGTBFTFTISSVZPZBFATYYCQZYLDLPRTFGQ    100
+                                                                 |:..|:
+IF1Y_HUMAN         1                                             PKNKGK      6
+
+KV1K_HUMAN       101 GTKVDLKR                                              108
+                     |.| :.:|                                          
+IF1Y_HUMAN         7 GGK-NRRRGKNENESEKRELVFKEDGQEYAQVIKMLGNGRLEALCFDGVK     55
+
+KV1K_HUMAN       109                                                       108
+                                                                       
+IF1Y_HUMAN        56 RLCHIRGKLRKKVWINTSDIILVGLRDYQDNKADVILKYNADEARSLKAY    105
+
+KV1K_HUMAN       109                                           108
+                                                           
+IF1Y_HUMAN       106 GGLPEHAKINETDTFGPGDDDEIQFDDIGDDDEDIDDI    143

Added: trunk/packages/bioperl/branches/upstream/current/t/data/spidey.noalignment
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/spidey.noalignment	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/spidey.noalignment	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,5 @@
+--SPIDEY version 1.40--
+Genomic: lcl|chr1 No definition line found, 187448317 bp
+mRNA: lcl|tmpseq_0 No definition line found, 663 bp
+No alignment found.
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/spidey.test1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/spidey.test1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/spidey.test1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,170 @@
+--SPIDEY version 1.40--
+Genomic: lcl|chr2 No definition line found, 145732769 bp
+mRNA: lcl|tmpseq_0 No definition line found, 1110 bp
+Strand: minus
+Number of exons: 6
+Exon 1(-): 36375691-36375798 (gen)  1-108 (mRNA)  id 97.2% mismatches 3 gaps 0  splice site (d  a): 1  0
+Exon 2(-): 36369345-36369492 (gen)  109-256 (mRNA)  id 100.0% mismatches 0 gaps 0  splice site (d  a): 0  1
+Exon 3(-): 36367232-36367437 (gen)  257-462 (mRNA)  id 100.0% mismatches 0 gaps 0  splice site (d  a): 1  1
+Exon 4(-): 36364083-36364229 (gen)  463-609 (mRNA)  id 100.0% mismatches 0 gaps 0  splice site (d  a): 1  1
+Exon 5(-): 36358231-36358489 (gen)  610-868 (mRNA)  id 100.0% mismatches 0 gaps 0  splice site (d  a): 1  1
+Exon 6(-): 36356457-36356698 (gen)  869-1110 (mRNA)  id 100.0% mismatches 0 gaps 0  splice site (d  a): 0  1
+Number of splice sites: 4
+mRNA coverage: 100%
+overall percent identity: 99.7%
+Missing mRNA ends: neither 
+
+Genomic: lcl|chr2 No definition line found
+mRNA: lcl|tmpseq_0 No definition line found
+Exon 1: 36375798-36375691 (gen)  1-108 (mRNA)
+
+
+CCTCTTTTTCTTTGCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
+           |  ||||||||||||||||||||||||||||||||||||||||||||||
+          ATGTCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
+           M  S  G  Y  I  P  S  Y  L  D  K  D  E  L  C  V  V 
+
+
+ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAGGT
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAG
+  C  G  D  K  A  T  G  Y  H  Y  R  C  I  T  C  E  G  C  K 
+
+
+AAATGGCA
+
+Exon 2: 36369492-36369345 (gen)  109-256 (mRNA)
+
+
+TTGCACTTAGGGATTTTTCAGAAGAACCATTCAGAAAAACCTCCATCCAACCTATTCCTG
+          ||||||||||||||||||||||||||||||||||||||||||||||||||
+          GGATTTTTCAGAAGAACCATTCAGAAAAACCTCCATCCAACCTATTCCTG
+           G  F  F  R  R  T  I  Q  K  N  L  H  P  T  Y  S  C 
+
+
+TAAATATGAAGGAAAATGTGTGATAGACAAAGTAACAAGAAATCAGTGCCAGGAATGTCG
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+TAAATATGAAGGAAAATGTGTGATAGACAAAGTAACAAGAAATCAGTGCCAGGAATGTCG
+  K  Y  E  G  K  C  V  I  D  K  V  T  R  N  Q  C  Q  E  C  R 
+
+
+CTTCAAAAAATGTATCTTTGTTGGCATGGCAACAGATTGTGAGTATAT
+||||||||||||||||||||||||||||||||||||||
+CTTCAAAAAATGTATCTTTGTTGGCATGGCAACAGATT
+  F  K  K  C  I  F  V  G  M  A  T  D 
+
+
+Exon 3: 36367437-36367232 (gen)  257-462 (mRNA)
+
+
+TCCCTGCTAGTGGTGTTGGATGACAGCAAGAGGCTGGCAAAGAGGAAGCTGATAGAAGAA
+          ||||||||||||||||||||||||||||||||||||||||||||||||||
+          TGGTGTTGGATGACAGCAAGAGGCTGGCAAAGAGGAAGCTGATAGAAGAA
+          L  V  L  D  D  S  K  R  L  A  K  R  K  L  I  E  E 
+
+
+AATCGAGAGAAGAGGCGTCGGGAAGAGCTGCAGAAAACGATTGGTCACAAACCAGAACCA
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+AATCGAGAGAAGAGGCGTCGGGAAGAGCTGCAGAAAACGATTGGTCACAAACCAGAACCA
+ N  R  E  K  R  R  R  E  E  L  Q  K  T  I  G  H  K  P  E  P 
+
+
+ACAGATGAGGAATGGGAGCTGATCAAAATTGTCACTGAAGCACATGTGGCCACCAATGCA
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+ACAGATGAGGAATGGGAGCTGATCAAAATTGTCACTGAAGCACATGTGGCCACCAATGCA
+ T  D  E  E  W  E  L  I  K  I  V  T  E  A  H  V  A  T  N  A 
+
+
+CAAGGAAGCCACTGGAAGCAGAAAAGGAAATTTCTGGTAGGGACTA
+||||||||||||||||||||||||||||||||||||
+CAAGGAAGCCACTGGAAGCAGAAAAGGAAATTTCTG
+ Q  G  S  H  W  K  Q  K  R  K  F  L 
+
+
+Exon 4: 36364229-36364083 (gen)  463-609 (mRNA)
+
+
+ATATCCTTAGCCAGAAGACATTGGGCAAGCACCAATAGTTAATGCCCCAGAAGGGGGGAA
+          ||||||||||||||||||||||||||||||||||||||||||||||||||
+          CCAGAAGACATTGGGCAAGCACCAATAGTTAATGCCCCAGAAGGGGGGAA
+           P  E  D  I  G  Q  A  P  I  V  N  A  P  E  G  G  K 
+
+
+AGTGGATTTAGAAGCCTTCAGCCAGTTTACAAAAATTATCACACCAGCGATTACAAGAGT
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+AGTGGATTTAGAAGCCTTCAGCCAGTTTACAAAAATTATCACACCAGCGATTACAAGAGT
+  V  D  L  E  A  F  S  Q  F  T  K  I  I  T  P  A  I  T  R  V 
+
+
+GGTGGATTTTGCCAAAAAGTTGCCTATGTTTTGTGAGGTAAGACAAA
+|||||||||||||||||||||||||||||||||||||
+GGTGGATTTTGCCAAAAAGTTGCCTATGTTTTGTGAG
+  V  D  F  A  K  K  L  P  M  F  C  E 
+
+
+Exon 5: 36358489-36358231 (gen)  610-868 (mRNA)
+
+
+ATTTCTGCAGCTGCCATGTGAAGACCAGATCATCCTTCTGAAAGGCTGCTGTATGGAGAT
+          ||||||||||||||||||||||||||||||||||||||||||||||||||
+          CTGCCATGTGAAGACCAGATCATCCTTCTGAAAGGCTGCTGTATGGAGAT
+           L  P  C  E  D  Q  I  I  L  L  K  G  C  C  M  E  I 
+
+
+AATGTCCCTCCGAGCAGCAGTTCGCTATGACCCCGAGAGTGAGACTTTAACGCTAAATGG
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+AATGTCCCTCCGAGCAGCAGTTCGCTATGACCCCGAGAGTGAGACTTTAACGCTAAATGG
+  M  S  L  R  A  A  V  R  Y  D  P  E  S  E  T  L  T  L  N  G 
+
+
+GGAGATGGCGGTGACAAGGGGCCAGCTGAAAAATGGGGGTCTTGGCGTAGTTTCTGATGC
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+GGAGATGGCGGTGACAAGGGGCCAGCTGAAAAATGGGGGTCTTGGCGTAGTTTCTGATGC
+  E  M  A  V  T  R  G  Q  L  K  N  G  G  L  G  V  V  S  D  A 
+
+
+CATTTTTGACCTGGGCATGTCTCTTTCTTCATTTAACCTGGATGACACCGAGGTTGCCCT
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+CATTTTTGACCTGGGCATGTCTCTTTCTTCATTTAACCTGGATGACACCGAGGTTGCCCT
+  I  F  D  L  G  M  S  L  S  S  F  N  L  D  D  T  E  V  A  L 
+
+
+TCTCCAGGCTGTCCTGCTCATGTCATCAGGTGAGAACAG
+|||||||||||||||||||||||||||||
+TCTCCAGGCTGTCCTGCTCATGTCATCAG
+  L  Q  A  V  L  L  M  S  S 
+
+
+Exon 6: 36356698-36356457 (gen)  869-1110 (mRNA)
+
+
+GTATCTGCAGATCGCCCAGGCCTTGTTTGCGTCGAGAGAATAGAAAAGTGTCAAGAGGGT
+          ||||||||||||||||||||||||||||||||||||||||||||||||||
+          ATCGCCCAGGCCTTGTTTGCGTCGAGAGAATAGAAAAGTGTCAAGAGGGT
+          D  R  P  G  L  V  C  V  E  R  I  E  K  C  Q  E  G 
+
+
+TTCCTCCTGGCATTTGAACACTACATTAATTACAGAAAACACCATGTTGCACATTTTTGG
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+TTCCTCCTGGCATTTGAACACTACATTAATTACAGAAAACACCATGTTGCACATTTTTGG
+ F  L  L  A  F  E  H  Y  I  N  Y  R  K  H  H  V  A  H  F  W 
+
+
+CCAAAACTGCTGATGAAAGTGACAGATCTGCGAATGATTGGAGCCTGCCATGCCAGCCGC
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+CCAAAACTGCTGATGAAAGTGACAGATCTGCGAATGATTGGAGCCTGCCATGCCAGCCGC
+ P  K  L  L  M  K  V  T  D  L  R  M  I  G  A  C  H  A  S  R 
+
+
+TTCCTGCACATGAAGGTGGAGTGCCCCACAGAACTCTTCCCTCCATTGTTCCTGGAGGTG
+||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+TTCCTGCACATGAAGGTGGAGTGCCCCACAGAACTCTTCCCTCCATTGTTCCTGGAGGTG
+ F  L  H  M  K  V  E  C  P  T  E  L  F  P  P  L  F  L  E  V 
+
+
+TTTGAGGATTAGAGAGACTGGA
+||||||||||||
+TTTGAGGATTAG
+ F  E  D  * 
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sprintf.rnamotif
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sprintf.rnamotif	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sprintf.rnamotif	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,281 @@
+#RM scored
+#RM descr h5 ss h3
+#RM dfile sprintf.descr
+>gi|173609|gb|M28984|ACARRDX A.castellani 5S ribosomal RNA
+gi|173609|gb|M28984|ACARRDX -12.500,6,gcga 0      81   16 gggtgg gcga ccaccc
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO -15.400,6,gaaa 0     110   16 ccccgg gaaa ccgggg
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO -12.100,5,gaaa 0     111   14 cccgg gaaa ccggg
+>gi|173741|gb|M83548|AQF16SRRN Aquifex pyrophilus 16S ribosomal RNA (16S rRNA)
+gi|173741|gb|M83548|AQF16SRRN -15.400,6,gaaa 0     154   16 ccccgg gaaa ccgggg
+>gi|173741|gb|M83548|AQF16SRRN Aquifex pyrophilus 16S ribosomal RNA (16S rRNA)
+gi|173741|gb|M83548|AQF16SRRN -12.100,5,gaaa 0     155   14 cccgg gaaa ccggg
+>gi|173741|gb|M83548|AQF16SRRN Aquifex pyrophilus 16S ribosomal RNA (16S rRNA)
+gi|173741|gb|M83548|AQF16SRRN -14.900,7,gaaa 0    1028   18 actccgc gaaa gcggagt
+>gi|173741|gb|M83548|AQF16SRRN Aquifex pyrophilus 16S ribosomal RNA (16S rRNA)
+gi|173741|gb|M83548|AQF16SRRN -13.200,6,gaaa 0    1029   16 ctccgc gaaa gcggag
+>gi|173748|gb|M64487|ARFRRLSA A.fulgicus large subunit ribosomal RNA
+gi|173748|gb|M64487|ARFRRLSA -12.700,7,ttac 0     137   18 cgctccc ttac gggagcg
+>gi|173748|gb|M64487|ARFRRLSA A.fulgicus large subunit ribosomal RNA
+gi|173748|gb|M64487|ARFRRLSA -15.500,7,gtaa 1     154   18 cgctccc gtaa gggagcg
+>gi|173748|gb|M64487|ARFRRLSA A.fulgicus large subunit ribosomal RNA
+gi|173748|gb|M64487|ARFRRLSA -13.100,6,gtaa 1     153   16 gctccc gtaa gggagc
+>gi|470692|gb|M82686|ARIRRE05 Aristolochia gigantea 18S ribosomal RNA (18S rRNA), ca. bp 1203 to 1414 in mature rRNA
+gi|470692|gb|M82686|ARIRRE05 -12.200,7,gtca 1     174   18 gtggagg gtca cctccac
+>gi|470705|gb|M82700|ARURRE05 Arundinaria gigantea 18S ribosomal RNA (18S rRNA), ca. bp 1204 to 1414 in mature rRNA
+gi|470705|gb|M82700|ARURRE05 -12.000,7,catc 0     156   18 gcggagg catc cctccgc
+>gi|470705|gb|M82700|ARURRE05 Arundinaria gigantea 18S ribosomal RNA (18S rRNA), ca. bp 1204 to 1414 in mature rRNA
+gi|470705|gb|M82700|ARURRE05 -12.700,7,gatg 1     173   18 gcggagg gatg cctccgc
+>gi|470750|gb|M82449|AVORRE05 Persea borbonia 18S ribosomal RNA (18S rRNA), ca. bp 1205 to 1414 in mature rRNA
+gi|470750|gb|M82449|AVORRE05 -12.200,7,gata 1     172   18 gtggagg gata cctccac
+>gi|439580|gb|L15530|AZORR16SA Azoarcus sp. (strain BH72) 16S ribosomal RNA (16S rRNA)
+gi|439580|gb|L15530|AZORR16SA -12.500,5,ttcg 0      76   14 ggggc ttcg gcccc
+>gi|173823|gb|M79434|BAIRRDG02 Bacterial sp. (TH3) 16S rRNA sequence
+gi|173823|gb|M79434|BAIRRDG02 -13.100,6,gaaa 0      52   16 caccgg gaaa ccggtg
+>gi|603438|gb|L26511|BEDERRNAA Berndtia purpurea 18S ribosomal RNA (18S rRNA)
+gi|603438|gb|L26511|BEDERRNAA -13.100,6,gaaa 1     816   16 caggcg gaaa cgcctg
+>gi|304227|gb|L19406|BODRR24SA Bodo caudatus 24S ribosomal RNA large subunit
+gi|304227|gb|L19406|BODRR24SA -15.400,6,gaaa 0     512   16 cccggg gaaa cccggg
+>gi|304227|gb|L19406|BODRR24SA Bodo caudatus 24S ribosomal RNA large subunit
+gi|304227|gb|L19406|BODRR24SA -12.100,5,gaaa 0     513   14 ccggg gaaa cccgg
+>gi|173977|gb|M97571|BRARREA Branchiostoma floridae 18S ribosomal RNA (18S rRNA), 5' end
+gi|173977|gb|M97571|BRARREA -12.200,6,ttcg 0     225   16 ccgggg ttcg ccccgg
+>gi|470792|gb|M82728|BRERRE05 Brasenia schreberi 18S ribosomal RNA (18S rRNA), ca. bp 1238 to 1414 in mature rRNA
+gi|470792|gb|M82728|BRERRE05 -12.200,7,ggta 1     139   18 gtggagg ggta cctccac
+>gi|289579|gb|L14636|CCYRRDB Capnocytophaga sputigena 16S ribosomal RNA
+gi|289579|gb|L14636|CCYRRDB -12.400,7,ttcg 0      74   18 ggttacc ttcg ggtaacc
+>gi|174069|gb|M34116|CFXRRDA C.aurantiacus 16S ribosomal RNA
+gi|174069|gb|M34116|CFXRRDA -13.200,6,gaaa 0     141   16 cccgtc gaaa gacggg
+>gi|174076|gb|M62791|CHBRRDAA C.vibrioforme 16S ribosomal RNA
+gi|174076|gb|M62791|CHBRRDAA -12.200,6,gaaa 0     148   16 ccccga gaaa tcgggg
+>gi|174079|gb|M31769|CHBSSRNA C.limicola ribosomal RNA small subunit
+gi|174079|gb|M31769|CHBSSRNA -12.200,6,gaaa 0     146   16 ccccga gaaa tcgggg
+>gi|174102|gb|K03164|CHKUR5B Chicken U5B small nuclear RNA, complete
+gi|174102|gb|K03164|CHKUR5B -16.200,7,ttcg 0      97   18 gctccgc ttcg gcggagc
+>gi|174102|gb|K03164|CHKUR5B Chicken U5B small nuclear RNA, complete
+gi|174102|gb|K03164|CHKUR5B -12.800,6,ttcg 0      98   16 ctccgc ttcg gcggag
+>gi|174102|gb|K03164|CHKUR5B Chicken U5B small nuclear RNA, complete
+gi|174102|gb|K03164|CHKUR5B -12.400,7,cgaa 1     114   18 gctccgc cgaa gcggagc
+>gi|174103|gb|K00474|CHKURU4A Chicken U4a small nuclear RNA (snRNA)
+gi|174103|gb|K00474|CHKURU4A -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|174104|gb|K00475|CHKURU4B Chicken U4b small nuclear RNA (snRNA)
+gi|174104|gb|K00475|CHKURU4B -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|174144|gb|M59092|CLORR16SR Clostridium kluyveri 16S ribosomal RNA
+gi|174144|gb|M59092|CLORR16SR -13.700,7,atcg 0     809   18 ggggggt atcg acccccc
+>gi|174144|gb|M59092|CLORR16SR Clostridium kluyveri 16S ribosomal RNA
+gi|174144|gb|M59092|CLORR16SR -13.100,7,cgat 1     826   18 ggggggt cgat acccccc
+>gi|174209|gb|M20058|CRSRREA3 Chiton (C.stelleri) 18S rRNA, segment 3 of 3
+gi|174209|gb|M20058|CRSRREA3 -12.900,5,ggaa 0     118   14 ggccc ggaa gggcc
+>gi|174273|gb|M59057|DEOSSRNA Dermatophilus congolensis small subunit ribosomal RNA sequence
+gi|174273|gb|M59057|DEOSSRNA -12.100,5,gaaa 0     151   14 ccccg gaaa cgggg
+>gi|470912|gb|M82693|DRIRRE05 Drimys aromatica 18S ribosomal RNA (18S rRNA), ca. bp 1203 to 1414 in mature rRNA
+gi|470912|gb|M82693|DRIRRE05 -15.200,7,gtga 1     174   18 gtggagg gtga cctccac
+>gi|470912|gb|M82693|DRIRRE05 Drimys aromatica 18S ribosomal RNA (18S rRNA), ca. bp 1203 to 1414 in mature rRNA
+gi|470912|gb|M82693|DRIRRE05 -12.500,6,gtga 1     173   16 tggagg gtga cctcca
+>gi|174363|gb|K01260|ECOPRM1 Escherichia coli 10sb (M1) RNA; the RNA component of RNase P
+gi|174363|gb|K01260|ECOPRM1 -12.000,5,gcaa 0     152   14 cccgc gcaa gcggg
+>gi|174493|gb|M59388|ELERR18S E.cuneatus 18S ribosomal RNA
+gi|174493|gb|M59388|ELERR18S -13.300,6,gaaa 0     728   16 gtcgcc gaaa ggcgac
+>gi|174566|gb|M62683|FIBRR16SB Fibrobacter succinogenes succinogenes (strain A3C) 16S ribosomal RNA
+gi|174566|gb|M62683|FIBRR16SB -12.900,6,gttc 0     814   16 ccgggg gttc ccccgg
+>gi|174566|gb|M62683|FIBRR16SB Fibrobacter succinogenes succinogenes (strain A3C) 16S ribosomal RNA
+gi|174566|gb|M62683|FIBRR16SB -12.900,6,gaac 1     829   16 ccgggg gaac ccccgg
+>gi|174567|gb|M62684|FIBRR16SC Fibrobacter succinogenes succinogenes (strain B1) 16S ribosomal RNA
+gi|174567|gb|M62684|FIBRR16SC -12.900,6,gttc 0     810   16 ccgggg gttc ccccgg
+>gi|174567|gb|M62684|FIBRR16SC Fibrobacter succinogenes succinogenes (strain B1) 16S ribosomal RNA
+gi|174567|gb|M62684|FIBRR16SC -12.900,6,gaac 1     825   16 ccgggg gaac ccccgg
+>gi|174568|gb|M62685|FIBRR16SD Fibrobacter succinogenes succinogenes (strain BL2) 16S ribosomal RNA
+gi|174568|gb|M62685|FIBRR16SD -12.900,6,gttc 0     810   16 ccgggg gttc ccccgg
+>gi|174568|gb|M62685|FIBRR16SD Fibrobacter succinogenes succinogenes (strain BL2) 16S ribosomal RNA
+gi|174568|gb|M62685|FIBRR16SD -12.900,6,gaac 1     825   16 ccgggg gaac ccccgg
+>gi|174570|gb|M62687|FIBRR16SF Fibrobacter intestinalis (strain DR7) 16S ribosomal RNA
+gi|174570|gb|M62687|FIBRR16SF -13.900,6,gcga 0     816   16 cccggg gcga cccggg
+>gi|174572|gb|M62689|FIBRR16SH Fibrobacter succinogenes (strain HM2) 16S ribosomal RNA
+gi|174572|gb|M62689|FIBRR16SH -12.200,6,ttcg 0     809   16 ccgggg ttcg ccccgg
+>gi|174573|gb|M62690|FIBRR16SI Fibrobacter intestinalis (strain JG1) 16S ribosomal RNA
+gi|174573|gb|M62690|FIBRR16SI -13.900,6,gcga 0     816   16 cccggg gcga cccggg
+>gi|174574|gb|M62691|FIBRR16SJ Fibrobacter intestinalis (strain LH1) 16S ribosomal RNA
+gi|174574|gb|M62691|FIBRR16SJ -13.900,6,gcga 0     816   16 cccggg gcga cccggg
+>gi|174576|gb|M62693|FIBRR16SL Fibrobacter succinogenes (strain MC1) 16S ribosomal RNA
+gi|174576|gb|M62693|FIBRR16SL -12.100,6,acat 0     168   16 ccccgg acat ccgggg
+>gi|174576|gb|M62693|FIBRR16SL Fibrobacter succinogenes (strain MC1) 16S ribosomal RNA
+gi|174576|gb|M62693|FIBRR16SL -12.200,6,ttcg 0     808   16 ccgggg ttcg ccccgg
+>gi|174576|gb|M62693|FIBRR16SL Fibrobacter succinogenes (strain MC1) 16S ribosomal RNA
+gi|174576|gb|M62693|FIBRR16SL -12.100,6,atgt 1     183   16 ccccgg atgt ccgggg
+>gi|174579|gb|M62696|FIBRR16SO Fibrobacter succinogenes (strain S85) 16S ribosomal RNA
+gi|174579|gb|M62696|FIBRR16SO -12.900,6,gttc 0     811   16 ccgggg gttc ccccgg
+>gi|174579|gb|M62696|FIBRR16SO Fibrobacter succinogenes (strain S85) 16S ribosomal RNA
+gi|174579|gb|M62696|FIBRR16SO -12.900,6,gaac 1     826   16 ccgggg gaac ccccgg
+>gi|174591|gb|M59231|FLXRR16S Flexistipes sinusarabici small subunit rRNA
+gi|174591|gb|M59231|FLXRR16S -13.600,6,ttcg 0      76   16 cctccc ttcg gggagg
+>gi|174592|gb|L11306|FRARRDX Frankia sp. 16S ribosomal RNA
+gi|174592|gb|L11306|FRARRDX -12.100,5,gaaa 0     143   14 cccgg gaaa ccggg
+>gi|174596|gb|M97575|FSARREE Petromyzon marinus 18S ribosomal RNA (18S rRNA), 5' end
+gi|174596|gb|M97575|FSARREE -12.500,6,gcga 0    1714   16 gggcag gcga ctgccc
+>gi|174670|gb|M91611|GAORRITSA Gastrosuillus laricinus ribosomal RNA internal transcribed spacer
+gi|174670|gb|M91611|GAORRITSA -13.100,6,gcga 0      89   16 cgcggg gcga cccgcg
+>gi|174671|gb|M91612|GAORRITSB Gastrosuillus luricinus ribosomal RNA internal transcribed spacer
+gi|174671|gb|M91612|GAORRITSB -13.100,6,gcga 0     105   16 cgcggg gcga cccgcg
+>gi|695346|gb|L36199|HBMRRDM Heliobacillus mobilis 16S ribosomal RNA (16S rRNA)
+gi|695346|gb|L36199|HBMRRDM -13.800,6,gaaa 0     152   16 cagccc gaaa gggctg
+>gi|174809|gb|M37643|HECRR16SB Helicobacter felis 16S rRNA
+gi|174809|gb|M37643|HECRR16SB -12.700,7,gaaa 0     962   18 tccccta gaaa tagggga
+>gi|174838|gb|L04675|HLO16SRRNA Heliothrix oregonensis 16S ribosomal RNA (16S rRNA), partial rRNA
+gi|174838|gb|L04675|HLO16SRRNA -12.800,5,gaaa 1      38   14 ccccc gaaa ggggg
+>gi|174913|gb|M23820|HUMRRAUA Human U5-A ribosomal RNA
+gi|174913|gb|M23820|HUMRRAUA -12.500,7,cttg 0     101   18 gccttgc cttg gcaaggc
+>gi|174913|gb|M23820|HUMRRAUA Human U5-A ribosomal RNA
+gi|174913|gb|M23820|HUMRRAUA -12.500,7,caag 1     118   18 gccttgc caag gcaaggc
+>gi|174944|gb|K00472|HUMURU4B Human U4b small nuclear RNA (snRNA)
+gi|174944|gb|K00472|HUMURU4B -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|174959|gb|M32851|HYM28SRRNA Hymenochirus curtipes 28S ribosomal RNA, complete
+gi|174959|gb|M32851|HYM28SRRNA -13.300,7,cgcg 0     335   18 agccgcg cgcg cgcggct
+>gi|174959|gb|M32851|HYM28SRRNA Hymenochirus curtipes 28S ribosomal RNA, complete
+gi|174959|gb|M32851|HYM28SRRNA -13.300,7,cgcg 1     352   18 agccgcg cgcg cgcggct
+>gi|175002|gb|M97573|LARRREC Lampetra aepyptera 18S ribosomal RNA (18S rRNA), 5' end
+gi|175002|gb|M97573|LARRREC -12.500,6,gcga 0    1714   16 gggcag gcga ctgccc
+>gi|471872|gb|M82532|LAURRE05 Lactuca sativa 18S ribosomal RNA (18S rRNA), ca. bp 1205 to 1414 in mature rRNA
+gi|471872|gb|M82532|LAURRE05 -12.200,7,gata 1     172   18 gtggagg gata cctccac
+>gi|175039|gb|M23727|LBARRNAV L.vitulinus 16S ribosomal RNA small subunit
+gi|175039|gb|M23727|LBARRNAV -13.400,6,gaga 0    1000   16 gggacg gaga cgtccc
+>gi|175044|gb|M23729|LBARRNAZ L.catenaforme 16S ribosomal RNA small subunit
+gi|175044|gb|M23729|LBARRNAZ -15.700,7,gaga 0    1000   18 tcgcgcg gaga cgcgcga
+>gi|175044|gb|M23729|LBARRNAZ L.catenaforme 16S ribosomal RNA small subunit
+gi|175044|gb|M23729|LBARRNAZ -13.800,6,gaga 0    1001   16 cgcgcg gaga cgcgcg
+>gi|471885|gb|M82539|LIQRRE05 Liquidambar styraciflua 18S ribosomal RNA (18S rRNA), ca. bp 1205 to 1414 in mature rRNA
+gi|471885|gb|M82539|LIQRRE05 -12.200,7,gtca 1     172   18 gtggagg gtca cctccac
+>gi|175140|gb|M58822|LISRR16ST Listeria monocytogenes 16S ribosomal RNA
+gi|175140|gb|M58822|LISRR16ST -13.300,6,gaaa 0     153   16 ctccgg gaaa ccggag
+>gi|951043|gb|L33974|LTTRRD Leptothrix discophora (strain SP-6) 16S ribosomal RNA (16S rRNA)
+gi|951043|gb|L33974|LTTRRD -12.000,5,gaaa 0     135   14 ccggc gaaa gccgg
+>gi|504534|gb|L33979|LTTRRDB Leptothrix cholodnii (strain LMG 7171) 16S ribosomal RNA (16S rRNA)
+gi|504534|gb|L33979|LTTRRDB -12.000,5,gaaa 0     135   14 ccggc gaaa gccgg
+>gi|504535|gb|L33981|LTTRRDC Leptothrix sp. (strain NC-1) 16S ribosomal RNA (16S rRNA)
+gi|504535|gb|L33981|LTTRRDC -12.000,5,gaaa 0     135   14 ccggc gaaa gccgg
+>gi|308922|gb|L10943|MABRRI Marine Eubacterial sp. (aggregate agg27) PCR generated ribosomal RNA fragment
+gi|308922|gb|L10943|MABRRI -12.100,7,acat 0      81   18 gggagag acat ctctccc
+>gi|308922|gb|L10943|MABRRI Marine Eubacterial sp. (aggregate agg27) PCR generated ribosomal RNA fragment
+gi|308922|gb|L10943|MABRRI -12.100,7,atgt 1      98   18 gggagag atgt ctctccc
+>gi|175375|gb|M95654|MTBRRDB Methylobacterium sp. 16S ribosomal RNA
+gi|175375|gb|M95654|MTBRRDB -12.600,7,cttg 0     776   18 tggcctg cttg caggcca
+>gi|175375|gb|M95654|MTBRRDB Methylobacterium sp. 16S ribosomal RNA
+gi|175375|gb|M95654|MTBRRDB -12.600,7,caag 1     793   18 tggcctg caag caggcca
+>gi|175376|gb|M95655|MTBRRDC Methylobacterium sp. 16S ribosomal RNA
+gi|175376|gb|M95655|MTBRRDC -12.600,7,cttg 0     776   18 tggcctg cttg caggcca
+>gi|175376|gb|M95655|MTBRRDC Methylobacterium sp. 16S ribosomal RNA
+gi|175376|gb|M95655|MTBRRDC -12.600,7,caag 1     793   18 tggcctg caag caggcca
+>gi|175408|gb|M14414|MUSRRMA Mouse 5.4S ribosomal small nuclear RNA
+gi|175408|gb|M14414|MUSRRMA -14.000,6,gaaa 1      44   16 ccctcc gaaa ggaggg
+>gi|175433|gb|M10328|MUSUR57S Mouse 5.7S U4 small nuclear RNA type a
+gi|175433|gb|M10328|MUSUR57S -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|175434|gb|M18004|MUSUR57T Mouse 5.7S U4 small nuclear RNA type b
+gi|175434|gb|M18004|MUSUR57T -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|175446|gb|M59126|MVORR16SY Methanococcus jannaschii 16S ribosomal RNA
+gi|175446|gb|M59126|MVORR16SY -13.700,6,ttcg 0      65   16 gctccc ttcg gggagc
+>gi|175555|gb|L06168|NGORR16SB Neisseria flavescens 16S ribosomal RNA
+gi|175555|gb|L06168|NGORR16SB -13.300,6,gaga 0    1005   16 cctccg gaga cggagg
+>gi|294001|gb|L16523|NYSRR23S Nyssa sativa 26S ribosomal RNA
+gi|294001|gb|L16523|NYSRR23S -15.200,7,gaaa 0      88   18 gccagag gaaa ctctggc
+>gi|175717|gb|M59064|PSERR16SB Pseudomonas diminuta 16S ribosomal RNA
+gi|175717|gb|M59064|PSERR16SB -13.200,6,gaga 0     950   16 gccacg gaga cgtggc
+>gi|472129|gb|M82490|PTNRRE04 Platanus occidentalis 18S ribosomal RNA (18S rRNA), ca. bp 1207 to 1414 in mature rRNA
+gi|472129|gb|M82490|PTNRRE04 -12.200,7,gtca 1     170   18 gtggagg gtca cctccac
+>gi|294428|gb|L16474|PVORR16SJ Prevotella oris (ATCC 33573) 16S ribosomal RNA
+gi|294428|gb|L16474|PVORR16SJ -12.100,7,cttg 0      78   18 ggggaag cttg cttcccc
+>gi|294428|gb|L16474|PVORR16SJ Prevotella oris (ATCC 33573) 16S ribosomal RNA
+gi|294428|gb|L16474|PVORR16SJ -12.100,7,caag 1      95   18 ggggaag caag cttcccc
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -13.400,7,ttct 0      48   18 gcgtccc ttct gggacgc
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -12.900,6,gtac 0     603   16 ccgggg gtac ccccgg
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -14.100,7,gctc 0    1066   18 ccctccc gctc gggaggg
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -14.100,7,gagc 1    1083   18 ccctccc gagc gggaggg
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -12.900,6,gtac 1     618   16 ccgggg gtac ccccgg
+>gi|408945|gb|L19921|PYWRRDX Pyrococcus abyssi 16S ribosomal RNA
+gi|408945|gb|L19921|PYWRRDX -12.900,7,agaa 1      65   18 gcgtccc agaa gggacgc
+>gi|472171|gb|M82511|RACRRE05 Ranunculus acris 18S ribosomal RNA (18S rRNA), ca. bp 1203 to 1414 in mature rRNA
+gi|472171|gb|M82511|RACRRE05 -12.200,7,ggta 1     174   18 gtggagg ggta cctccac
+>gi|175815|gb|M11057|RATUR8 Rat U8 small nuclear RNA from Novikoff hepatoma cells
+gi|175815|gb|M11057|RATUR8 -14.000,6,gaaa 1      43   16 ccctcc gaaa ggaggg
+>gi|175816|gb|K00477|RATURU4A Rat U4a small nuclear RNA (snRNA)
+gi|175816|gb|K00477|RATURU4A -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|175817|gb|K00478|RATURU4B Rat U4b small nuclear RNA (snRNA)
+gi|175817|gb|K00478|RATURU4B -12.000,7,tacg 0     128   18 cagtctc tacg gagactg
+>gi|175851|gb|M91613|RHSRRITSA Rhizopogon subcaerulescens ribosomal RNA internal transcribed spacer
+gi|175851|gb|M91613|RHSRRITSA -12.800,7,tcgc 0      40   18 gcctcgc tcgc gcgaggc
+>gi|175851|gb|M91613|RHSRRITSA Rhizopogon subcaerulescens ribosomal RNA internal transcribed spacer
+gi|175851|gb|M91613|RHSRRITSA -16.100,7,gcga 1      57   18 gcctcgc gcga gcgaggc
+>gi|175851|gb|M91613|RHSRRITSA Rhizopogon subcaerulescens ribosomal RNA internal transcribed spacer
+gi|175851|gb|M91613|RHSRRITSA -12.700,6,gcga 1      56   16 cctcgc gcga gcgagg
+>gi|294741|gb|L15196|ROLRR18S Roridula gorgonias 18S ribosomal RNA
+gi|294741|gb|L15196|ROLRR18S -12.200,7,gtca 1     628   18 gtggagg gtca cctccac
+>gi|175876|gb|M59069|RSPRR16SD Rhodospirillum salinarum 16S ribosomal RNA
+gi|175876|gb|M59069|RSPRR16SD -16.700,7,gaga 0     953   18 gcctcgg gaga ccgaggc
+>gi|175876|gb|M59069|RSPRR16SD Rhodospirillum salinarum 16S ribosomal RNA
+gi|175876|gb|M59069|RSPRR16SD -13.300,6,gaga 0     954   16 cctcgg gaga ccgagg
+>gi|175876|gb|M59069|RSPRR16SD Rhodospirillum salinarum 16S ribosomal RNA
+gi|175876|gb|M59069|RSPRR16SD -12.300,7,tctc 1     970   18 gcctcgg tctc ccgaggc
+>gi|457377|gb|L25709|SGBRRNA Solemya reidi gill symbiont ribosomal RNA
+gi|457377|gb|L25709|SGBRRNA -13.900,6,gaaa 0     140   16 cctggg gaaa cccagg
+>gi|175944|gb|M32848|SIL28SRRNA Silurana tropicalis 28S ribosomal RNA, complete
+gi|175944|gb|M32848|SIL28SRRNA -13.000,6,gcgc 0     341   16 gccggg gcgc cccggc
+>gi|175944|gb|M32848|SIL28SRRNA Silurana tropicalis 28S ribosomal RNA, complete
+gi|175944|gb|M32848|SIL28SRRNA -13.000,6,gcgc 1     356   16 gccggg gcgc cccggc
+>gi|175988|gb|M88722|SPARR16SB Spirochaeta halophila 16S ribosomal RNA
+gi|175988|gb|M88722|SPARR16SB -12.800,6,ttcg 0       2   16 cgtccc ttcg gggacg
+>gi|176013|gb|M62707|SPNRR18SA Sphaeromonas communis (strain FG) 18S ribosomal RNA
+gi|176013|gb|M62707|SPNRR18SA -12.700,7,gcaa 0     183   18 aacctgg gcaa ccaggtt
+>gi|176061|gb|M91614|SUIRRITSA Suillus grevillei ribosomal RNA internal transcribed spacer
+gi|176061|gb|M91614|SUIRRITSA -13.100,6,gcga 0     105   16 cgcggg gcga cccgcg
+>gi|176062|gb|M91615|SUIRRITSB Suillus grevillei ribosomal RNA internal transcribed spacer
+gi|176062|gb|M91615|SUIRRITSB -13.100,6,gcga 0     105   16 cgcggg gcga cccgcg
+>gi|176197|gb|M67497|THCLRRNA T.celer 23S ribosomal RNA gene sequence
+gi|176197|gb|M67497|THCLRRNA -16.600,7,gaaa 0     263   18 ctccggg gaaa cccggag
+>gi|176197|gb|M67497|THCLRRNA T.celer 23S ribosomal RNA gene sequence
+gi|176197|gb|M67497|THCLRRNA -14.000,6,gaaa 0     264   16 tccggg gaaa cccgga
+>gi|176197|gb|M67497|THCLRRNA T.celer 23S ribosomal RNA gene sequence
+gi|176197|gb|M67497|THCLRRNA -12.100,5,gaaa 0     265   14 ccggg gaaa cccgg
+>gi|176197|gb|M67497|THCLRRNA T.celer 23S ribosomal RNA gene sequence
+gi|176197|gb|M67497|THCLRRNA -13.100,6,gaaa 0    1004   16 ggggga gaaa tccccc
+>gi|176197|gb|M67497|THCLRRNA T.celer 23S ribosomal RNA gene sequence
+gi|176197|gb|M67497|THCLRRNA -12.200,7,tttc 1     280   18 ctccggg tttc cccggag
+>gi|176199|gb|M21529|THCRRD T.celer 16S ribosomal RNA
+gi|176199|gb|M21529|THCRRD -12.900,7,ggaa 0    1455   18 ccgtagg ggaa cctacgg
+>gi|176219|gb|M67498|TMORRNAA Thermotoga maritima large subunit ribosomal RNA, complete
+gi|176219|gb|M67498|TMORRNAA -12.000,5,gaaa 0     152   14 ccgcc gaaa ggcgg
+>gi|176219|gb|M67498|TMORRNAA Thermotoga maritima large subunit ribosomal RNA, complete
+gi|176219|gb|M67498|TMORRNAA -13.700,6,ttcg 0    1615   16 ggtccc ttcg gggacc
+>gi|176254|gb|M57738|TRPRRDSB Treponema succinifaciens 16S rRNA
+gi|176254|gb|M57738|TRPRRDSB -16.400,7,gaga 0    1036   18 ccgcctg gaga caggcgg
+>gi|176254|gb|M57738|TRPRRDSB Treponema succinifaciens 16S rRNA
+gi|176254|gb|M57738|TRPRRDSB -13.100,6,gaga 0    1037   16 cgcctg gaga caggcg
+>gi|176254|gb|M57738|TRPRRDSB Treponema succinifaciens 16S rRNA
+gi|176254|gb|M57738|TRPRRDSB -12.000,7,tctc 1    1053   18 ccgcctg tctc caggcgg
+>gi|176257|gb|M59294|TRPSSRNAA Treponema sp. small subunit ribosomal RNA
+gi|176257|gb|M59294|TRPSSRNAA -12.800,7,gaga 0    1026   18 tcgcatg gaga catgcga
+>gi|1220551|gb|L27800|TRRBRDNAA Trichoderma reesei 5.8S ribosomal DNA (5.8S rDNA)
+gi|1220551|gb|L27800|TRRBRDNAA -12.500,7,tatt 0     118   18 ccgccag tatt ctggcgg
+>gi|1220551|gb|L27800|TRRBRDNAA Trichoderma reesei 5.8S ribosomal DNA (5.8S rDNA)
+gi|1220551|gb|L27800|TRRBRDNAA -12.100,7,aata 1     135   18 ccgccag aata ctggcgg
+>gi|603469|gb|L26520|TRWERRNAA Trypetesa lampas 18S ribosomal RNA (18S rRNA)
+gi|603469|gb|L26520|TRWERRNAA -13.100,6,gaaa 1     818   16 caggcg gaaa cgcctg
+>gi|176260|gb|M35966|TTERRDA T.tenax 16S rRNA
+gi|176260|gb|M35966|TTERRDA -13.900,7,ggcg 0      63   18 cgcccgg ggcg ccgggcg
+>gi|176260|gb|M35966|TTERRDA T.tenax 16S rRNA
+gi|176260|gb|M35966|TTERRDA -13.200,7,cgcc 1      80   18 cgcccgg cgcc ccgggcg
+>gi|603470|gb|L26521|ULOERRNAA Ulophysema oeresundense 18S ribosomal RNA (18S rRNA)
+gi|603470|gb|L26521|ULOERRNAA -12.500,7,tcac 0     225   18 cgtgccc tcac gggcacg
+>gi|603470|gb|L26521|ULOERRNAA Ulophysema oeresundense 18S ribosomal RNA (18S rRNA)
+gi|603470|gb|L26521|ULOERRNAA -14.400,7,tgaa 0    1785   18 ccgcagg tgaa cctgcgg
+>gi|603470|gb|L26521|ULOERRNAA Ulophysema oeresundense 18S ribosomal RNA (18S rRNA)
+gi|603470|gb|L26521|ULOERRNAA -12.900,7,ttca 1    1802   18 ccgcagg ttca cctgcgg
+>gi|603470|gb|L26521|ULOERRNAA Ulophysema oeresundense 18S ribosomal RNA (18S rRNA)
+gi|603470|gb|L26521|ULOERRNAA -16.300,7,gtga 1     242   18 cgtgccc gtga gggcacg
+>gi|603470|gb|L26521|ULOERRNAA Ulophysema oeresundense 18S ribosomal RNA (18S rRNA)
+gi|603470|gb|L26521|ULOERRNAA -13.900,6,gtga 1     241   16 gtgccc gtga gggcac
+>gi|475134|gb|M82432|ZAMRREA02 Zamia ottonis 18S ribosomal RNA (18S rRNA), ca. bp 234 to 287 in mature rRNA
+gi|475134|gb|M82432|ZAMRREA02 -12.200,7,gata 1     172   18 gtggagg gata cctccac

Added: trunk/packages/bioperl/branches/upstream/current/t/data/ssp160.embl.1
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/ssp160.embl.1	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/ssp160.embl.1	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,158 @@
+ID   AF036895   standard; DNA; INV; 3693 BP.
+XX
+AC   AF036895;
+XX
+SV   AF036895.1
+XX
+DT   22-DEC-1997 (Rel. 54, Created)
+DT   03-MAR-2000 (Rel. 62, Last updated, Version 6)
+XX
+DE   Chironomus thummi special lobe-specific silk protein ssp160 gene, complete
+DE   cds.
+XX
+KW   .
+XX
+OS   Chironomus thummi
+OC   Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota; Neoptera;
+OC   Endopterygota; Diptera; Nematocera; Chironomoidea; Chironomidae;
+OC   Chironominae; Chironomus.
+XX
+RN   [1]
+RP   1-3644
+RX   MEDLINE; 96199249.
+RX   PUBMED; 8621663.
+RA   Hoffman R.T., Schmidt E.R., Case S.T.;
+RT   "A cell-specific glycosylated silk protein from Chironomus thummi salivary
+RT   glands. Cloning, chromosomal localization, and characterization of cDNA";
+RL   J. Biol. Chem. 271(16):9809-9815(1996).
+XX
+RN   [2]
+RP   1-3693
+RX   MEDLINE; 99077856.
+RX   PUBMED; 9858763.
+RA   Berezikov E., Blinov A.G., Scherbik S., Cox C.K., Case S.T.;
+RT   "Structure and polymorphism of the Chironomus thummi gene encoding special
+RT   lobe-specific silk protein, ssp160";
+RL   Gene 223(1-2):347-354(1998).
+XX
+RN   [3]
+RP   1-3693
+RA   Berezikov E., Blinov A.G., Scherbik S.S., Cox C.C., Case S.T.;
+RT   ;
+RL   Submitted (05-DEC-1997) to the EMBL/GenBank/DDBJ databases.
+RL   Biochemistry, University of Mississippi Medical Center, 2500 North State
+RL   Street, Jackson, MS 39216-4505, USA
+XX
+CC   On Mar 4, 1998 this sequence version replaced gi:2707296.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..3693
+FT                   /chromosome="IV"
+FT                   /db_xref="taxon:7154"
+FT                   /organism="Chironomus thummi"
+FT                   /strain="German"
+FT                   /map="A2b"
+FT                   /dev_stage="larval"
+FT                   /cell_type="lobe-specific secretory"
+FT                   /tissue_type="salivary gland"
+FT   CAAT_signal     84..87
+FT                   /gene="ssp160"
+FT   TATA_signal     169..172
+FT                   /gene="ssp160"
+FT   mRNA            join(198..288,604..759,825..2408,2874..3129,3188..3241,
+FT                   3299..3569)
+FT                   /gene="ssp160"
+FT                   /product="special lobe-specific silk protein ssp160"
+FT   5'UTR           198..234
+FT                   /gene="ssp160"
+FT   CDS             join(235..288,604..759,825..2408,2874..3129,3188..3241,
+FT                   3299..3441)
+FT                   /codon_start=1
+FT                   /db_xref="UniProt/TrEMBL:O44416"
+FT                   /note="160 kDa secretory protein"
+FT                   /gene="ssp160"
+FT                   /product="special lobe-specific silk protein ssp160"
+FT                   /protein_id="AAD11516.1"
+FT                   /translation="MNIKVILVCALVAIFFAQVAEGGPIANFVGFIISLLFSLFEVMLS
+FT                   VVFDVKSFTSLSNATANATMPGFASSVGGGRFTVIMKGTFNLIAMISANIQAIQSGSGS
+FT                   ASSNSSSSANSTTSSNSTTSSNSTTSSNSTTSSNSTSSGLTTGASVVSLIDTCAWVYQD
+FT                   SSVGIAYLMVSILALFYGQSVSAPPYADLGIPALPANTSGAGVPQSVQIKAAITYINVT
+FT                   INFITLTGQQFEDLQGPVTTDCGCPNTTSVAPLVAEWEAIMAALEAFASGSASSNSTSN
+FT                   STSTSNSTTTSNSTTTTNSTTSTNSTSSSNSTTIAGSIDIAANLTVALQNLQALLMQEA
+FT                   TCAPCLAANAKKSGVREFGPCKAAGSSCARSGQRKVKRKARLEKMRAKSRRAVGNRKGS
+FT                   MKKRVRSRAKKFGKAAKSGVRRYRKNIKFVYIPPVMASLNAYAALMASLSDSISYQSES
+FT                   ALNSTDSACNSTSNSTDEAVINATTAVTDMFVNFTAMVINNTVAHPNCTQYADMALSMV
+FT                   SQINEQIIACGSQSDSAQSSIYANVTISIVAMAQEYNNFASMSDKCTRSFANSWLWMYI
+FT                   KWVFYRMGMTSGVPNFLACQTKAQSSLTAFLASFNATVSATISAASANNSEVQSSEAAC
+FT                   IESSLAEAAVILEMFEAAYQNCTDPGSVTVPAETTTTTSTTTTTTTTTTTAAPTTTTTK
+FT                   AANAPFTYPLCTLIMSTTCSLGGAGCTYPLISSAGCCPSGKTLNTGLGGRGCCK"
+FT   3'UTR           3442..3569
+FT                   /gene="ssp160"
+FT   polyA_signal    order(3520..3523,3532..3537,3539..3544)
+FT                   /gene="ssp160"
+XX
+SQ   Sequence 3693 BP; 1171 A; 768 C; 633 G; 1121 T; 0 other;
+     aagctttcaa ctctcattta aacctaaata agcacatcat atccacaatt catcagccaa        60
+     taaaaattca ctttatttta atccaatcaa agaaagtttc acgatgtcaa aaacattcgt       120
+     ggatgtcccg tcaagtggaa ggagaaacga aaattcacga aatttcacaa taaaaagtga       180
+     gtaagttgtg tggtccatgt catatcgatc taatttatcg aatagtgata aaatatgaat       240
+     attaaagtga tcttagtgtg cgccttggtt gcgatcttct ttgcacaggt aagttggtaa       300
+     cttgggttgg gatcttcttt aaatttgact agctttgtac aagatatgag ttgtatagtc       360
+     tgtagttgta gtgtcatggg ttgtatactg tcttgtaggc taggttttgc ggctcttgca       420
+     caaatatatc ttaaaaagat cttaaaagtc aacgagagtg ttcaataacg tgatcaataa       480
+     caagcccatg atacgtctca gctccttaaa tccaccaaat atgactcata tacagacaca       540
+     accctaatct taaagtacta agcaatttta atcacaattt taaattttcc acatcttatc       600
+     caggttgcag aaggcggacc aatcgccaat tttgtaggct tcatcatctc cctcttgttc       660
+     tccttgtttg aagtcatgct gtcagttgtg ttcgatgtca agtcgttcac aagcttgtca       720
+     aatgccacag caaatgcaac gatgcctggt tttgcatctg tgagtatttt tagctatagc       780
+     ttgacctgca aaatccttga taaatatgtt gattccattt ctagagcgtt ggtggtggtc       840
+     gcttcacagt catcatgaaa ggaacattca atttgattgc catgatatcg gcaaatattc       900
+     aagccattca gtcaggatca ggatcagcat cgtctaattc ctcatcaagt gctaattcta       960
+     caacatcctc aaactcaaca acaagctcaa actcaacgac atcctcgaac tcaacaacaa      1020
+     gctcaaactc tacatccagt ggattaacaa ccggtgctag cgttgtaagc ttgattgata      1080
+     cctgtgcttg ggtctatcag gacagttcag ttggaattgc ctacttgatg gtctcaattt      1140
+     tggcactttt ctatggacaa tctgtctcag caccgccata tgctgatctt ggtataccag      1200
+     ctctaccagc aaatacctct ggtgctggag ttccacaatc tgtacaaatt aaagcagcaa      1260
+     ttacttacat caatgttact attaacttta ttaccttaac tggtcaacaa tttgaagatt      1320
+     tacaaggtcc agttaccaca gattgtggat gtccaaatac aactagtgtt gcgccacttg      1380
+     ttgctgaatg ggaagccata atggctgctc ttgaagcttt cgctagtgga tcagcatcat      1440
+     ctaattctac atctaattca acatcaacaa gtaactcaac aacgacaagc aactcaacaa      1500
+     ctacgacaaa ttcaacaact tcaactaatt caacatcctc gtcaaattcg acaacaattg      1560
+     ctggatctat tgacattgct gctaatctta cagttgccct ccagaacctc caagccttgc      1620
+     tcatgcaaga agctacttgt gctccatgcc tagctgcaaa tgccaaaaag agtggtgttc      1680
+     gtgaatttgg accatgtaaa gctgctggct catcatgtgc tagatcagga caaagaaaag      1740
+     tcaagcgtaa ggcacgattg gagaagatgc gtgccaagtc ccgtcgtgca gtcggtaaca      1800
+     gaaaaggatc aatgaagaag cgcgtccgta gtcgtgcaaa gaaattcgga aaggctgcaa      1860
+     agtcaggagt tagacgatac cgcaagaaca tcaagtttgt ctacattcca ccagtcatgg      1920
+     cttcacttaa tgcatatgca gcattgatgg catctttaag tgacagcatc tcatatcaat      1980
+     ctgaatcggc tttgaactca acagactctg catgtaactc aacatcaaac agcacagatg      2040
+     aggctgtaat taatgcaaca accgctgtta ctgatatgtt tgtcaacttt actgctatgg      2100
+     tcatcaataa tactgttgca catccaaatt gtactcaata tgctgacatg gctctttcaa      2160
+     tggtctctca aatcaacgag caaatcattg catgcggaag tcagtctgat tctgctcagt      2220
+     catcaatcta tgcaaatgtt acaatcagta ttgttgcaat ggctcaagaa tacaataact      2280
+     ttgcatctat gtctgataag tgtaccagat catttgccaa tagttggttg tggatgtaca      2340
+     ttaagtgggt cttctatcga atgggaatga cttctggagt tcctaacttc ctcgcttgtc      2400
+     aaactaaggt ttgtgattta cttaacaaat acttgaaact aaacgtatcc cttagaactg      2460
+     aacttatcat tagaaattga acttatcctt taaaaactga acttatcccg agaaaagaaa      2520
+     tgtatcctta gaaacgaaat cggttttaaa aactgagctt attcttcagg aactaaactc      2580
+     atccttagaa actgaccata tccttaaaaa ctgaacgtat tcttagaagc taactcatta      2640
+     ttcagaaact gaacttgacc ttagaaactg aacatattct tcggactaaa ctcatcctaa      2700
+     gaaactgacc gtatccttaa aaactgaact tagaagctga actcattatt cagaaactga      2760
+     agttcaccct tagaaactga acttattctt caggaactaa acttatcctt tgaaactgaa      2820
+     cttatcctta aaaactgatt ttactgagtc cttctttaat ccttcattaa taggctcaaa      2880
+     gcagtcttac agcattcttg gcatcattta atgccacagt cagtgccacg atttcagcag      2940
+     caagtgcaaa taattctgaa gttcaaagct ctgaagcagc ttgtattgaa agtagtcttg      3000
+     cggaagcagc cgttatttta gaaatgtttg aggcagcata tcaaaactgt acagatccag      3060
+     gatccgtaac agttccagca gagactacaa caacgaccag tacaaccacc actacaacta      3120
+     ccacaacaag taggttaaaa ttggaaaagt ttagaatttt gtgtgcaatt tttttaattt      3180
+     tttacagcaa ccgcagcacc aacaacaacc acaactaagg ctgctaatgc accattcaca      3240
+     tgtaagatat gaaatcaaat tgtgactttt tctaagaaat tgtttccttt ttttctagat      3300
+     ccattgtgta ctttgatcat gtcaacaaca tgctcactgg gtggagcagg atgtacatat      3360
+     ccacttatct catctgctgg ctgctgtcca tctggcaaaa cattaaacac tggtctcggt      3420
+     ggacgtggtt gttgcaaata aaatctacag cgacagttaa attgaagcaa ttattttttc      3480
+     ctatttcttg atgtagaaca ttttttgtac ttaattttga taaaatgtgg cattaaacat      3540
+     ataatctgaa atgtggacga gaaactttat tttaaactat ttatttattt tttaatcttt      3600
+     tcagaaactc aatttatccc ataggaactg aacttatcct taataaactg gtcttatctt      3660
+     aagcaacaaa acgttttctt ttcaactgaa ttc                                   3693
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_medline.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_medline.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_medline.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1336 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--Sample XML file generated by XML Spy v4.2 U (http://www.xmlspy.com)-->
+<!DOCTYPE MedlineCitationSet SYSTEM "nlmmedline.dtd">
+<MedlineCitationSet>
+	<MedlineCitation Owner="NLM" Status="In-Process">
+		<MedlineID>Text1</MedlineID>
+		<PMID>Text2</PMID>
+		<DateCreated>
+			<Year>1998</Year>
+			<Month>1</Month>
+			<Day>1</Day>
+			<Hour>0</Hour>
+			<Minute>0</Minute>
+			<Second>0</Second>
+		</DateCreated>
+		<DateCompleted>
+			<Year>1999</Year>
+			<Month>2</Month>
+			<Day>2</Day>
+			<Hour>1</Hour>
+			<Minute>1</Minute>
+			<Second>1</Second>
+		</DateCompleted>
+		<DateRevised>
+			<Year>2000</Year>
+			<Month>3</Month>
+			<Day>3</Day>
+			<Hour>2</Hour>
+			<Minute>2</Minute>
+			<Second>2</Second>
+		</DateRevised>
+		<Article>
+			<Journal>
+				<ISSN>Text3</ISSN>
+				<JournalIssue>
+					<Volume>Text4</Volume>
+					<Issue>Text5</Issue>
+					<PubDate>
+						<Year>2001</Year>
+						<Month>4</Month>
+						<Day>4</Day>
+					</PubDate>
+				</JournalIssue>
+				<Coden>Text6</Coden>
+				<Title>Text7</Title>
+				<ISOAbbreviation>Text8</ISOAbbreviation>
+			</Journal>
+			<ArticleTitle>Text9</ArticleTitle>
+			<Pagination>
+				<StartPage>Text10</StartPage>
+				<EndPage>Text11</EndPage>
+				<MedlinePgn>Text12</MedlinePgn>
+			</Pagination>
+			<Abstract>
+				<AbstractText>Text13</AbstractText>
+				<CopyrightInformation>Text14</CopyrightInformation>
+			</Abstract>
+			<Affiliation>Text15</Affiliation>
+			<AuthorList CompleteYN="Y">
+				<Author>
+					<LastName>Text16</LastName>
+					<ForeName>Text17</ForeName>
+					<Initials>Text18</Initials>
+					<Suffix>Text19</Suffix>
+					<Affiliation>Text20</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text21</LastName>
+					<ForeName>Text22</ForeName>
+					<Initials>Text23</Initials>
+					<Suffix>Text24</Suffix>
+					<Affiliation>Text25</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text26</LastName>
+					<ForeName>Text27</ForeName>
+					<Initials>Text28</Initials>
+					<Suffix>Text29</Suffix>
+					<Affiliation>Text30</Affiliation>
+				</Author>
+			</AuthorList>
+			<Language>Text31</Language>
+			<Language>Text32</Language>
+			<Language>Text33</Language>
+			<DataBankList CompleteYN="Y">
+				<DataBank>
+					<DataBankName>Text34</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text35</AccessionNumber>
+						<AccessionNumber>Text36</AccessionNumber>
+						<AccessionNumber>Text37</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text38</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text39</AccessionNumber>
+						<AccessionNumber>Text40</AccessionNumber>
+						<AccessionNumber>Text41</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text42</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text43</AccessionNumber>
+						<AccessionNumber>Text44</AccessionNumber>
+						<AccessionNumber>Text45</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+			</DataBankList>
+			<GrantList CompleteYN="Y">
+				<Grant>
+					<GrantID>Text46</GrantID>
+					<Acronym>Text47</Acronym>
+					<Agency>Text48</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text49</GrantID>
+					<Acronym>Text50</Acronym>
+					<Agency>Text51</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text52</GrantID>
+					<Acronym>Text53</Acronym>
+					<Agency>Text54</Agency>
+				</Grant>
+			</GrantList>
+			<PublicationTypeList>
+				<PublicationType>Text55</PublicationType>
+				<PublicationType>Text56</PublicationType>
+				<PublicationType>Text57</PublicationType>
+			</PublicationTypeList>
+			<VernacularTitle>Text58</VernacularTitle>
+			<DateOfElectronicPublication>Text59</DateOfElectronicPublication>
+		</Article>
+		<MedlineJournalInfo>
+			<Country>Text60</Country>
+			<MedlineTA>Text61</MedlineTA>
+			<MedlineCode>Text62</MedlineCode>
+			<NlmUniqueID>Text63</NlmUniqueID>
+		</MedlineJournalInfo>
+		<ChemicalList>
+			<Chemical>
+				<RegistryNumber>Text64</RegistryNumber>
+				<NameOfSubstance>Text65</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text66</RegistryNumber>
+				<NameOfSubstance>Text67</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text68</RegistryNumber>
+				<NameOfSubstance>Text69</NameOfSubstance>
+			</Chemical>
+		</ChemicalList>
+		<CitationSubset>Text70</CitationSubset>
+		<CitationSubset>Text71</CitationSubset>
+		<CitationSubset>Text72</CitationSubset>
+		<CommentsCorrections>
+			<CommentOn>
+				<RefSource>Text73</RefSource>
+				<PMID>Text74</PMID>
+				<Note>Text75</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text76</RefSource>
+				<PMID>Text77</PMID>
+				<Note>Text78</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text79</RefSource>
+				<PMID>Text80</PMID>
+				<Note>Text81</Note>
+			</CommentOn>
+			<CommentIn>
+				<RefSource>Text82</RefSource>
+				<PMID>Text83</PMID>
+				<Note>Text84</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text85</RefSource>
+				<PMID>Text86</PMID>
+				<Note>Text87</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text88</RefSource>
+				<PMID>Text89</PMID>
+				<Note>Text90</Note>
+			</CommentIn>
+			<ErratumIn>
+				<RefSource>Text91</RefSource>
+				<PMID>Text92</PMID>
+				<Note>Text93</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text94</RefSource>
+				<PMID>Text95</PMID>
+				<Note>Text96</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text97</RefSource>
+				<PMID>Text98</PMID>
+				<Note>Text99</Note>
+			</ErratumIn>
+			<ErratumFor>
+				<RefSource>Text100</RefSource>
+				<PMID>Text101</PMID>
+				<Note>Text102</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text103</RefSource>
+				<PMID>Text104</PMID>
+				<Note>Text105</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text106</RefSource>
+				<PMID>Text107</PMID>
+				<Note>Text108</Note>
+			</ErratumFor>
+			<RepublishedFrom>
+				<RefSource>Text109</RefSource>
+				<PMID>Text110</PMID>
+				<Note>Text111</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text112</RefSource>
+				<PMID>Text113</PMID>
+				<Note>Text114</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text115</RefSource>
+				<PMID>Text116</PMID>
+				<Note>Text117</Note>
+			</RepublishedFrom>
+			<RepublishedIn>
+				<RefSource>Text118</RefSource>
+				<PMID>Text119</PMID>
+				<Note>Text120</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text121</RefSource>
+				<PMID>Text122</PMID>
+				<Note>Text123</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text124</RefSource>
+				<PMID>Text125</PMID>
+				<Note>Text126</Note>
+			</RepublishedIn>
+			<RetractionOf>
+				<RefSource>Text127</RefSource>
+				<PMID>Text128</PMID>
+				<Note>Text129</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text130</RefSource>
+				<PMID>Text131</PMID>
+				<Note>Text132</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text133</RefSource>
+				<PMID>Text134</PMID>
+				<Note>Text135</Note>
+			</RetractionOf>
+			<RetractionIn>
+				<RefSource>Text136</RefSource>
+				<PMID>Text137</PMID>
+				<Note>Text138</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text139</RefSource>
+				<PMID>Text140</PMID>
+				<Note>Text141</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text142</RefSource>
+				<PMID>Text143</PMID>
+				<Note>Text144</Note>
+			</RetractionIn>
+			<UpdateIn>
+				<RefSource>Text145</RefSource>
+				<PMID>Text146</PMID>
+				<Note>Text147</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text148</RefSource>
+				<PMID>Text149</PMID>
+				<Note>Text150</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text151</RefSource>
+				<PMID>Text152</PMID>
+				<Note>Text153</Note>
+			</UpdateIn>
+			<UpdateOf>
+				<RefSource>Text154</RefSource>
+				<PMID>Text155</PMID>
+				<Note>Text156</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text157</RefSource>
+				<PMID>Text158</PMID>
+				<Note>Text159</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text160</RefSource>
+				<PMID>Text161</PMID>
+				<Note>Text162</Note>
+			</UpdateOf>
+			<SummaryForPatientsIn>
+				<RefSource>Text163</RefSource>
+				<PMID>Text164</PMID>
+				<Note>Text165</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text166</RefSource>
+				<PMID>Text167</PMID>
+				<Note>Text168</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text169</RefSource>
+				<PMID>Text170</PMID>
+				<Note>Text171</Note>
+			</SummaryForPatientsIn>
+			<OriginalReportIn>
+				<RefSource>Text172</RefSource>
+				<PMID>Text173</PMID>
+				<Note>Text174</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text175</RefSource>
+				<PMID>Text176</PMID>
+				<Note>Text177</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text178</RefSource>
+				<PMID>Text179</PMID>
+				<Note>Text180</Note>
+			</OriginalReportIn>
+		</CommentsCorrections>
+		<GeneSymbolList>
+			<GeneSymbol>Text181</GeneSymbol>
+			<GeneSymbol>Text182</GeneSymbol>
+			<GeneSymbol>Text183</GeneSymbol>
+		</GeneSymbolList>
+		<MeshHeadingList>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text184</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text185</QualifierName>
+				<QualifierName MajorTopicYN="N">Text186</QualifierName>
+				<QualifierName MajorTopicYN="N">Text187</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text188</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text189</QualifierName>
+				<QualifierName MajorTopicYN="N">Text190</QualifierName>
+				<QualifierName MajorTopicYN="N">Text191</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text192</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text193</QualifierName>
+				<QualifierName MajorTopicYN="N">Text194</QualifierName>
+				<QualifierName MajorTopicYN="N">Text195</QualifierName>
+			</MeshHeading>
+		</MeshHeadingList>
+		<NumberOfReferences>Text196</NumberOfReferences>
+		<PersonalNameSubjectList>
+			<PersonalNameSubject>
+				<LastName>Text197</LastName>
+				<ForeName>Text198</ForeName>
+				<Initials>Text199</Initials>
+				<Suffix>Text200</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text201</LastName>
+				<ForeName>Text202</ForeName>
+				<Initials>Text203</Initials>
+				<Suffix>Text204</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text205</LastName>
+				<ForeName>Text206</ForeName>
+				<Initials>Text207</Initials>
+				<Suffix>Text208</Suffix>
+			</PersonalNameSubject>
+		</PersonalNameSubjectList>
+		<OtherID Source="NASA">Text209</OtherID>
+		<OtherID Source="NASA">Text210</OtherID>
+		<OtherID Source="NASA">Text211</OtherID>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text212</AbstractText>
+			<CopyrightInformation>Text213</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text214</AbstractText>
+			<CopyrightInformation>Text215</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text216</AbstractText>
+			<CopyrightInformation>Text217</CopyrightInformation>
+		</OtherAbstract>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text218</Keyword>
+			<Keyword MajorTopicYN="N">Text219</Keyword>
+			<Keyword MajorTopicYN="N">Text220</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text221</Keyword>
+			<Keyword MajorTopicYN="N">Text222</Keyword>
+			<Keyword MajorTopicYN="N">Text223</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text224</Keyword>
+			<Keyword MajorTopicYN="N">Text225</Keyword>
+			<Keyword MajorTopicYN="N">Text226</Keyword>
+		</KeywordList>
+		<SpaceFlightMission>Text227</SpaceFlightMission>
+		<SpaceFlightMission>Text228</SpaceFlightMission>
+		<SpaceFlightMission>Text229</SpaceFlightMission>
+		<InvestigatorList>
+			<Investigator>
+				<LastName>Text230</LastName>
+				<ForeName>Text231</ForeName>
+				<Initials>Text232</Initials>
+				<Suffix>Text233</Suffix>
+				<Affiliation>Text234</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text235</LastName>
+				<ForeName>Text236</ForeName>
+				<Initials>Text237</Initials>
+				<Suffix>Text238</Suffix>
+				<Affiliation>Text239</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text240</LastName>
+				<ForeName>Text241</ForeName>
+				<Initials>Text242</Initials>
+				<Suffix>Text243</Suffix>
+				<Affiliation>Text244</Affiliation>
+			</Investigator>
+		</InvestigatorList>
+		<GeneralNote Owner="NLM">Text245</GeneralNote>
+		<GeneralNote Owner="NLM">Text246</GeneralNote>
+		<GeneralNote Owner="NLM">Text247</GeneralNote>
+	</MedlineCitation>
+	<MedlineCitation Owner="NLM" Status="In-Process">
+		<MedlineID>Text248</MedlineID>
+		<PMID>Text249</PMID>
+		<DateCreated>
+			<Year>1997</Year>
+			<Month>5</Month>
+			<Day>5</Day>
+			<Hour>3</Hour>
+			<Minute>3</Minute>
+			<Second>3</Second>
+		</DateCreated>
+		<DateCompleted>
+			<Year>1998</Year>
+			<Month>6</Month>
+			<Day>6</Day>
+			<Hour>4</Hour>
+			<Minute>4</Minute>
+			<Second>4</Second>
+		</DateCompleted>
+		<DateRevised>
+			<Year>1999</Year>
+			<Month>7</Month>
+			<Day>7</Day>
+			<Hour>5</Hour>
+			<Minute>5</Minute>
+			<Second>5</Second>
+		</DateRevised>
+		<Article>
+			<Journal>
+				<ISSN>Text250</ISSN>
+				<JournalIssue>
+					<Volume>Text251</Volume>
+					<Issue>Text252</Issue>
+					<PubDate>
+						<Year>2000</Year>
+						<Month>8</Month>
+						<Day>8</Day>
+					</PubDate>
+				</JournalIssue>
+				<Coden>Text253</Coden>
+				<Title>Text254</Title>
+				<ISOAbbreviation>Text255</ISOAbbreviation>
+			</Journal>
+			<ArticleTitle>Text256</ArticleTitle>
+			<Pagination>
+				<StartPage>Text257</StartPage>
+				<EndPage>Text258</EndPage>
+				<MedlinePgn>Text259</MedlinePgn>
+			</Pagination>
+			<Abstract>
+				<AbstractText>Text260</AbstractText>
+				<CopyrightInformation>Text261</CopyrightInformation>
+			</Abstract>
+			<Affiliation>Text262</Affiliation>
+			<AuthorList CompleteYN="Y">
+				<Author>
+					<LastName>Text263</LastName>
+					<ForeName>Text264</ForeName>
+					<Initials>Text265</Initials>
+					<Suffix>Text266</Suffix>
+					<Affiliation>Text267</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text268</LastName>
+					<ForeName>Text269</ForeName>
+					<Initials>Text270</Initials>
+					<Suffix>Text271</Suffix>
+					<Affiliation>Text272</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text273</LastName>
+					<ForeName>Text274</ForeName>
+					<Initials>Text275</Initials>
+					<Suffix>Text276</Suffix>
+					<Affiliation>Text277</Affiliation>
+				</Author>
+			</AuthorList>
+			<Language>Text278</Language>
+			<Language>Text279</Language>
+			<Language>Text280</Language>
+			<DataBankList CompleteYN="Y">
+				<DataBank>
+					<DataBankName>Text281</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text282</AccessionNumber>
+						<AccessionNumber>Text283</AccessionNumber>
+						<AccessionNumber>Text284</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text285</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text286</AccessionNumber>
+						<AccessionNumber>Text287</AccessionNumber>
+						<AccessionNumber>Text288</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text289</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text290</AccessionNumber>
+						<AccessionNumber>Text291</AccessionNumber>
+						<AccessionNumber>Text292</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+			</DataBankList>
+			<GrantList CompleteYN="Y">
+				<Grant>
+					<GrantID>Text293</GrantID>
+					<Acronym>Text294</Acronym>
+					<Agency>Text295</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text296</GrantID>
+					<Acronym>Text297</Acronym>
+					<Agency>Text298</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text299</GrantID>
+					<Acronym>Text300</Acronym>
+					<Agency>Text301</Agency>
+				</Grant>
+			</GrantList>
+			<PublicationTypeList>
+				<PublicationType>Text302</PublicationType>
+				<PublicationType>Text303</PublicationType>
+				<PublicationType>Text304</PublicationType>
+			</PublicationTypeList>
+			<VernacularTitle>Text305</VernacularTitle>
+			<DateOfElectronicPublication>Text306</DateOfElectronicPublication>
+		</Article>
+		<MedlineJournalInfo>
+			<Country>Text307</Country>
+			<MedlineTA>Text308</MedlineTA>
+			<MedlineCode>Text309</MedlineCode>
+			<NlmUniqueID>Text310</NlmUniqueID>
+		</MedlineJournalInfo>
+		<ChemicalList>
+			<Chemical>
+				<RegistryNumber>Text311</RegistryNumber>
+				<NameOfSubstance>Text312</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text313</RegistryNumber>
+				<NameOfSubstance>Text314</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text315</RegistryNumber>
+				<NameOfSubstance>Text316</NameOfSubstance>
+			</Chemical>
+		</ChemicalList>
+		<CitationSubset>Text317</CitationSubset>
+		<CitationSubset>Text318</CitationSubset>
+		<CitationSubset>Text319</CitationSubset>
+		<CommentsCorrections>
+			<CommentOn>
+				<RefSource>Text320</RefSource>
+				<PMID>Text321</PMID>
+				<Note>Text322</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text323</RefSource>
+				<PMID>Text324</PMID>
+				<Note>Text325</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text326</RefSource>
+				<PMID>Text327</PMID>
+				<Note>Text328</Note>
+			</CommentOn>
+			<CommentIn>
+				<RefSource>Text329</RefSource>
+				<PMID>Text330</PMID>
+				<Note>Text331</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text332</RefSource>
+				<PMID>Text333</PMID>
+				<Note>Text334</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text335</RefSource>
+				<PMID>Text336</PMID>
+				<Note>Text337</Note>
+			</CommentIn>
+			<ErratumIn>
+				<RefSource>Text338</RefSource>
+				<PMID>Text339</PMID>
+				<Note>Text340</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text341</RefSource>
+				<PMID>Text342</PMID>
+				<Note>Text343</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text344</RefSource>
+				<PMID>Text345</PMID>
+				<Note>Text346</Note>
+			</ErratumIn>
+			<ErratumFor>
+				<RefSource>Text347</RefSource>
+				<PMID>Text348</PMID>
+				<Note>Text349</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text350</RefSource>
+				<PMID>Text351</PMID>
+				<Note>Text352</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text353</RefSource>
+				<PMID>Text354</PMID>
+				<Note>Text355</Note>
+			</ErratumFor>
+			<RepublishedFrom>
+				<RefSource>Text356</RefSource>
+				<PMID>Text357</PMID>
+				<Note>Text358</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text359</RefSource>
+				<PMID>Text360</PMID>
+				<Note>Text361</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text362</RefSource>
+				<PMID>Text363</PMID>
+				<Note>Text364</Note>
+			</RepublishedFrom>
+			<RepublishedIn>
+				<RefSource>Text365</RefSource>
+				<PMID>Text366</PMID>
+				<Note>Text367</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text368</RefSource>
+				<PMID>Text369</PMID>
+				<Note>Text370</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text371</RefSource>
+				<PMID>Text372</PMID>
+				<Note>Text373</Note>
+			</RepublishedIn>
+			<RetractionOf>
+				<RefSource>Text374</RefSource>
+				<PMID>Text375</PMID>
+				<Note>Text376</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text377</RefSource>
+				<PMID>Text378</PMID>
+				<Note>Text379</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text380</RefSource>
+				<PMID>Text381</PMID>
+				<Note>Text382</Note>
+			</RetractionOf>
+			<RetractionIn>
+				<RefSource>Text383</RefSource>
+				<PMID>Text384</PMID>
+				<Note>Text385</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text386</RefSource>
+				<PMID>Text387</PMID>
+				<Note>Text388</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text389</RefSource>
+				<PMID>Text390</PMID>
+				<Note>Text391</Note>
+			</RetractionIn>
+			<UpdateIn>
+				<RefSource>Text392</RefSource>
+				<PMID>Text393</PMID>
+				<Note>Text394</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text395</RefSource>
+				<PMID>Text396</PMID>
+				<Note>Text397</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text398</RefSource>
+				<PMID>Text399</PMID>
+				<Note>Text400</Note>
+			</UpdateIn>
+			<UpdateOf>
+				<RefSource>Text401</RefSource>
+				<PMID>Text402</PMID>
+				<Note>Text403</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text404</RefSource>
+				<PMID>Text405</PMID>
+				<Note>Text406</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text407</RefSource>
+				<PMID>Text408</PMID>
+				<Note>Text409</Note>
+			</UpdateOf>
+			<SummaryForPatientsIn>
+				<RefSource>Text410</RefSource>
+				<PMID>Text411</PMID>
+				<Note>Text412</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text413</RefSource>
+				<PMID>Text414</PMID>
+				<Note>Text415</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text416</RefSource>
+				<PMID>Text417</PMID>
+				<Note>Text418</Note>
+			</SummaryForPatientsIn>
+			<OriginalReportIn>
+				<RefSource>Text419</RefSource>
+				<PMID>Text420</PMID>
+				<Note>Text421</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text422</RefSource>
+				<PMID>Text423</PMID>
+				<Note>Text424</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text425</RefSource>
+				<PMID>Text426</PMID>
+				<Note>Text427</Note>
+			</OriginalReportIn>
+		</CommentsCorrections>
+		<GeneSymbolList>
+			<GeneSymbol>Text428</GeneSymbol>
+			<GeneSymbol>Text429</GeneSymbol>
+			<GeneSymbol>Text430</GeneSymbol>
+		</GeneSymbolList>
+		<MeshHeadingList>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text431</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text432</QualifierName>
+				<QualifierName MajorTopicYN="N">Text433</QualifierName>
+				<QualifierName MajorTopicYN="N">Text434</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text435</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text436</QualifierName>
+				<QualifierName MajorTopicYN="N">Text437</QualifierName>
+				<QualifierName MajorTopicYN="N">Text438</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text439</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text440</QualifierName>
+				<QualifierName MajorTopicYN="N">Text441</QualifierName>
+				<QualifierName MajorTopicYN="N">Text442</QualifierName>
+			</MeshHeading>
+		</MeshHeadingList>
+		<NumberOfReferences>Text443</NumberOfReferences>
+		<PersonalNameSubjectList>
+			<PersonalNameSubject>
+				<LastName>Text444</LastName>
+				<ForeName>Text445</ForeName>
+				<Initials>Text446</Initials>
+				<Suffix>Text447</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text448</LastName>
+				<ForeName>Text449</ForeName>
+				<Initials>Text450</Initials>
+				<Suffix>Text451</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text452</LastName>
+				<ForeName>Text453</ForeName>
+				<Initials>Text454</Initials>
+				<Suffix>Text455</Suffix>
+			</PersonalNameSubject>
+		</PersonalNameSubjectList>
+		<OtherID Source="NASA">Text456</OtherID>
+		<OtherID Source="NASA">Text457</OtherID>
+		<OtherID Source="NASA">Text458</OtherID>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text459</AbstractText>
+			<CopyrightInformation>Text460</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text461</AbstractText>
+			<CopyrightInformation>Text462</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text463</AbstractText>
+			<CopyrightInformation>Text464</CopyrightInformation>
+		</OtherAbstract>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text465</Keyword>
+			<Keyword MajorTopicYN="N">Text466</Keyword>
+			<Keyword MajorTopicYN="N">Text467</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text468</Keyword>
+			<Keyword MajorTopicYN="N">Text469</Keyword>
+			<Keyword MajorTopicYN="N">Text470</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text471</Keyword>
+			<Keyword MajorTopicYN="N">Text472</Keyword>
+			<Keyword MajorTopicYN="N">Text473</Keyword>
+		</KeywordList>
+		<SpaceFlightMission>Text474</SpaceFlightMission>
+		<SpaceFlightMission>Text475</SpaceFlightMission>
+		<SpaceFlightMission>Text476</SpaceFlightMission>
+		<InvestigatorList>
+			<Investigator>
+				<LastName>Text477</LastName>
+				<ForeName>Text478</ForeName>
+				<Initials>Text479</Initials>
+				<Suffix>Text480</Suffix>
+				<Affiliation>Text481</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text482</LastName>
+				<ForeName>Text483</ForeName>
+				<Initials>Text484</Initials>
+				<Suffix>Text485</Suffix>
+				<Affiliation>Text486</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text487</LastName>
+				<ForeName>Text488</ForeName>
+				<Initials>Text489</Initials>
+				<Suffix>Text490</Suffix>
+				<Affiliation>Text491</Affiliation>
+			</Investigator>
+		</InvestigatorList>
+		<GeneralNote Owner="NLM">Text492</GeneralNote>
+		<GeneralNote Owner="NLM">Text493</GeneralNote>
+		<GeneralNote Owner="NLM">Text494</GeneralNote>
+	</MedlineCitation>
+	<MedlineCitation Owner="NLM" Status="In-Process">
+		<MedlineID>Text495</MedlineID>
+		<PMID>Text496</PMID>
+		<DateCreated>
+			<Year>2001</Year>
+			<Month>9</Month>
+			<Day>9</Day>
+			<Hour>6</Hour>
+			<Minute>6</Minute>
+			<Second>6</Second>
+		</DateCreated>
+		<DateCompleted>
+			<Year>1997</Year>
+			<Month>10</Month>
+			<Day>10</Day>
+			<Hour>7</Hour>
+			<Minute>7</Minute>
+			<Second>7</Second>
+		</DateCompleted>
+		<DateRevised>
+			<Year>1998</Year>
+			<Month>11</Month>
+			<Day>11</Day>
+			<Hour>8</Hour>
+			<Minute>8</Minute>
+			<Second>8</Second>
+		</DateRevised>
+		<Article>
+			<Journal>
+				<ISSN>Text497</ISSN>
+				<JournalIssue>
+					<Volume>Text498</Volume>
+					<Issue>Text499</Issue>
+					<PubDate>
+						<Year>1999</Year>
+						<Month>12</Month>
+						<Day>12</Day>
+					</PubDate>
+				</JournalIssue>
+				<Coden>Text500</Coden>
+				<Title>Text501</Title>
+				<ISOAbbreviation>Text502</ISOAbbreviation>
+			</Journal>
+			<ArticleTitle>Text503</ArticleTitle>
+			<Pagination>
+				<StartPage>Text504</StartPage>
+				<EndPage>Text505</EndPage>
+				<MedlinePgn>Text506</MedlinePgn>
+			</Pagination>
+			<Abstract>
+				<AbstractText>Text507</AbstractText>
+				<CopyrightInformation>Text508</CopyrightInformation>
+			</Abstract>
+			<Affiliation>Text509</Affiliation>
+			<AuthorList CompleteYN="Y">
+				<Author>
+					<LastName>Text510</LastName>
+					<ForeName>Text511</ForeName>
+					<Initials>Text512</Initials>
+					<Suffix>Text513</Suffix>
+					<Affiliation>Text514</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text515</LastName>
+					<ForeName>Text516</ForeName>
+					<Initials>Text517</Initials>
+					<Suffix>Text518</Suffix>
+					<Affiliation>Text519</Affiliation>
+				</Author>
+				<Author>
+					<LastName>Text520</LastName>
+					<ForeName>Text521</ForeName>
+					<Initials>Text522</Initials>
+					<Suffix>Text523</Suffix>
+					<Affiliation>Text524</Affiliation>
+				</Author>
+			</AuthorList>
+			<Language>Text525</Language>
+			<Language>Text526</Language>
+			<Language>Text527</Language>
+			<DataBankList CompleteYN="Y">
+				<DataBank>
+					<DataBankName>Text528</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text529</AccessionNumber>
+						<AccessionNumber>Text530</AccessionNumber>
+						<AccessionNumber>Text531</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text532</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text533</AccessionNumber>
+						<AccessionNumber>Text534</AccessionNumber>
+						<AccessionNumber>Text535</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+				<DataBank>
+					<DataBankName>Text536</DataBankName>
+					<AccessionNumberList>
+						<AccessionNumber>Text537</AccessionNumber>
+						<AccessionNumber>Text538</AccessionNumber>
+						<AccessionNumber>Text539</AccessionNumber>
+					</AccessionNumberList>
+				</DataBank>
+			</DataBankList>
+			<GrantList CompleteYN="Y">
+				<Grant>
+					<GrantID>Text540</GrantID>
+					<Acronym>Text541</Acronym>
+					<Agency>Text542</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text543</GrantID>
+					<Acronym>Text544</Acronym>
+					<Agency>Text545</Agency>
+				</Grant>
+				<Grant>
+					<GrantID>Text546</GrantID>
+					<Acronym>Text547</Acronym>
+					<Agency>Text548</Agency>
+				</Grant>
+			</GrantList>
+			<PublicationTypeList>
+				<PublicationType>Text549</PublicationType>
+				<PublicationType>Text550</PublicationType>
+				<PublicationType>Text551</PublicationType>
+			</PublicationTypeList>
+			<VernacularTitle>Text552</VernacularTitle>
+			<DateOfElectronicPublication>Text553</DateOfElectronicPublication>
+		</Article>
+		<MedlineJournalInfo>
+			<Country>Text554</Country>
+			<MedlineTA>Text555</MedlineTA>
+			<MedlineCode>Text556</MedlineCode>
+			<NlmUniqueID>Text557</NlmUniqueID>
+		</MedlineJournalInfo>
+		<ChemicalList>
+			<Chemical>
+				<RegistryNumber>Text558</RegistryNumber>
+				<NameOfSubstance>Text559</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text560</RegistryNumber>
+				<NameOfSubstance>Text561</NameOfSubstance>
+			</Chemical>
+			<Chemical>
+				<RegistryNumber>Text562</RegistryNumber>
+				<NameOfSubstance>Text563</NameOfSubstance>
+			</Chemical>
+		</ChemicalList>
+		<CitationSubset>Text564</CitationSubset>
+		<CitationSubset>Text565</CitationSubset>
+		<CitationSubset>Text566</CitationSubset>
+		<CommentsCorrections>
+			<CommentOn>
+				<RefSource>Text567</RefSource>
+				<PMID>Text568</PMID>
+				<Note>Text569</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text570</RefSource>
+				<PMID>Text571</PMID>
+				<Note>Text572</Note>
+			</CommentOn>
+			<CommentOn>
+				<RefSource>Text573</RefSource>
+				<PMID>Text574</PMID>
+				<Note>Text575</Note>
+			</CommentOn>
+			<CommentIn>
+				<RefSource>Text576</RefSource>
+				<PMID>Text577</PMID>
+				<Note>Text578</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text579</RefSource>
+				<PMID>Text580</PMID>
+				<Note>Text581</Note>
+			</CommentIn>
+			<CommentIn>
+				<RefSource>Text582</RefSource>
+				<PMID>Text583</PMID>
+				<Note>Text584</Note>
+			</CommentIn>
+			<ErratumIn>
+				<RefSource>Text585</RefSource>
+				<PMID>Text586</PMID>
+				<Note>Text587</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text588</RefSource>
+				<PMID>Text589</PMID>
+				<Note>Text590</Note>
+			</ErratumIn>
+			<ErratumIn>
+				<RefSource>Text591</RefSource>
+				<PMID>Text592</PMID>
+				<Note>Text593</Note>
+			</ErratumIn>
+			<ErratumFor>
+				<RefSource>Text594</RefSource>
+				<PMID>Text595</PMID>
+				<Note>Text596</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text597</RefSource>
+				<PMID>Text598</PMID>
+				<Note>Text599</Note>
+			</ErratumFor>
+			<ErratumFor>
+				<RefSource>Text600</RefSource>
+				<PMID>Text601</PMID>
+				<Note>Text602</Note>
+			</ErratumFor>
+			<RepublishedFrom>
+				<RefSource>Text603</RefSource>
+				<PMID>Text604</PMID>
+				<Note>Text605</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text606</RefSource>
+				<PMID>Text607</PMID>
+				<Note>Text608</Note>
+			</RepublishedFrom>
+			<RepublishedFrom>
+				<RefSource>Text609</RefSource>
+				<PMID>Text610</PMID>
+				<Note>Text611</Note>
+			</RepublishedFrom>
+			<RepublishedIn>
+				<RefSource>Text612</RefSource>
+				<PMID>Text613</PMID>
+				<Note>Text614</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text615</RefSource>
+				<PMID>Text616</PMID>
+				<Note>Text617</Note>
+			</RepublishedIn>
+			<RepublishedIn>
+				<RefSource>Text618</RefSource>
+				<PMID>Text619</PMID>
+				<Note>Text620</Note>
+			</RepublishedIn>
+			<RetractionOf>
+				<RefSource>Text621</RefSource>
+				<PMID>Text622</PMID>
+				<Note>Text623</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text624</RefSource>
+				<PMID>Text625</PMID>
+				<Note>Text626</Note>
+			</RetractionOf>
+			<RetractionOf>
+				<RefSource>Text627</RefSource>
+				<PMID>Text628</PMID>
+				<Note>Text629</Note>
+			</RetractionOf>
+			<RetractionIn>
+				<RefSource>Text630</RefSource>
+				<PMID>Text631</PMID>
+				<Note>Text632</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text633</RefSource>
+				<PMID>Text634</PMID>
+				<Note>Text635</Note>
+			</RetractionIn>
+			<RetractionIn>
+				<RefSource>Text636</RefSource>
+				<PMID>Text637</PMID>
+				<Note>Text638</Note>
+			</RetractionIn>
+			<UpdateIn>
+				<RefSource>Text639</RefSource>
+				<PMID>Text640</PMID>
+				<Note>Text641</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text642</RefSource>
+				<PMID>Text643</PMID>
+				<Note>Text644</Note>
+			</UpdateIn>
+			<UpdateIn>
+				<RefSource>Text645</RefSource>
+				<PMID>Text646</PMID>
+				<Note>Text647</Note>
+			</UpdateIn>
+			<UpdateOf>
+				<RefSource>Text648</RefSource>
+				<PMID>Text649</PMID>
+				<Note>Text650</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text651</RefSource>
+				<PMID>Text652</PMID>
+				<Note>Text653</Note>
+			</UpdateOf>
+			<UpdateOf>
+				<RefSource>Text654</RefSource>
+				<PMID>Text655</PMID>
+				<Note>Text656</Note>
+			</UpdateOf>
+			<SummaryForPatientsIn>
+				<RefSource>Text657</RefSource>
+				<PMID>Text658</PMID>
+				<Note>Text659</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text660</RefSource>
+				<PMID>Text661</PMID>
+				<Note>Text662</Note>
+			</SummaryForPatientsIn>
+			<SummaryForPatientsIn>
+				<RefSource>Text663</RefSource>
+				<PMID>Text664</PMID>
+				<Note>Text665</Note>
+			</SummaryForPatientsIn>
+			<OriginalReportIn>
+				<RefSource>Text666</RefSource>
+				<PMID>Text667</PMID>
+				<Note>Text668</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text669</RefSource>
+				<PMID>Text670</PMID>
+				<Note>Text671</Note>
+			</OriginalReportIn>
+			<OriginalReportIn>
+				<RefSource>Text672</RefSource>
+				<PMID>Text673</PMID>
+				<Note>Text674</Note>
+			</OriginalReportIn>
+		</CommentsCorrections>
+		<GeneSymbolList>
+			<GeneSymbol>Text675</GeneSymbol>
+			<GeneSymbol>Text676</GeneSymbol>
+			<GeneSymbol>Text677</GeneSymbol>
+		</GeneSymbolList>
+		<MeshHeadingList>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text678</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text679</QualifierName>
+				<QualifierName MajorTopicYN="N">Text680</QualifierName>
+				<QualifierName MajorTopicYN="N">Text681</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text682</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text683</QualifierName>
+				<QualifierName MajorTopicYN="N">Text684</QualifierName>
+				<QualifierName MajorTopicYN="N">Text685</QualifierName>
+			</MeshHeading>
+			<MeshHeading>
+				<DescriptorName MajorTopicYN="N">Text686</DescriptorName>
+				<QualifierName MajorTopicYN="N">Text687</QualifierName>
+				<QualifierName MajorTopicYN="N">Text688</QualifierName>
+				<QualifierName MajorTopicYN="N">Text689</QualifierName>
+			</MeshHeading>
+		</MeshHeadingList>
+		<NumberOfReferences>Text690</NumberOfReferences>
+		<PersonalNameSubjectList>
+			<PersonalNameSubject>
+				<LastName>Text691</LastName>
+				<ForeName>Text692</ForeName>
+				<Initials>Text693</Initials>
+				<Suffix>Text694</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text695</LastName>
+				<ForeName>Text696</ForeName>
+				<Initials>Text697</Initials>
+				<Suffix>Text698</Suffix>
+			</PersonalNameSubject>
+			<PersonalNameSubject>
+				<LastName>Text699</LastName>
+				<ForeName>Text700</ForeName>
+				<Initials>Text701</Initials>
+				<Suffix>Text702</Suffix>
+			</PersonalNameSubject>
+		</PersonalNameSubjectList>
+		<OtherID Source="NASA">Text703</OtherID>
+		<OtherID Source="NASA">Text704</OtherID>
+		<OtherID Source="NASA">Text705</OtherID>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text706</AbstractText>
+			<CopyrightInformation>Text707</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text708</AbstractText>
+			<CopyrightInformation>Text709</CopyrightInformation>
+		</OtherAbstract>
+		<OtherAbstract Type="AAMC">
+			<AbstractText>Text710</AbstractText>
+			<CopyrightInformation>Text711</CopyrightInformation>
+		</OtherAbstract>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text712</Keyword>
+			<Keyword MajorTopicYN="N">Text713</Keyword>
+			<Keyword MajorTopicYN="N">Text714</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text715</Keyword>
+			<Keyword MajorTopicYN="N">Text716</Keyword>
+			<Keyword MajorTopicYN="N">Text717</Keyword>
+		</KeywordList>
+		<KeywordList Owner="NLM">
+			<Keyword MajorTopicYN="N">Text718</Keyword>
+			<Keyword MajorTopicYN="N">Text719</Keyword>
+			<Keyword MajorTopicYN="N">Text720</Keyword>
+		</KeywordList>
+		<SpaceFlightMission>Text721</SpaceFlightMission>
+		<SpaceFlightMission>Text722</SpaceFlightMission>
+		<SpaceFlightMission>Text723</SpaceFlightMission>
+		<InvestigatorList>
+			<Investigator>
+				<LastName>Text724</LastName>
+				<ForeName>Text725</ForeName>
+				<Initials>Text726</Initials>
+				<Suffix>Text727</Suffix>
+				<Affiliation>Text728</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text729</LastName>
+				<ForeName>Text730</ForeName>
+				<Initials>Text731</Initials>
+				<Suffix>Text732</Suffix>
+				<Affiliation>Text733</Affiliation>
+			</Investigator>
+			<Investigator>
+				<LastName>Text734</LastName>
+				<ForeName>Text735</ForeName>
+				<Initials>Text736</Initials>
+				<Suffix>Text737</Suffix>
+				<Affiliation>Text738</Affiliation>
+			</Investigator>
+		</InvestigatorList>
+		<GeneralNote Owner="NLM">Text739</GeneralNote>
+		<GeneralNote Owner="NLM">Text740</GeneralNote>
+		<GeneralNote Owner="NLM">Text741</GeneralNote>
+	</MedlineCitation>
+	<DeleteCitation>
+		<MedlineID>Text742</MedlineID>
+		<MedlineID>Text743</MedlineID>
+		<MedlineID>Text744</MedlineID>
+	</DeleteCitation>
+</MedlineCitationSet>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_pubmed.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_pubmed.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/stress_test_pubmed.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,483 @@
+<PubMedArticleSet>
+
+<PubMedArticle>
+<NCBIArticle Status="Completed">
+<MedlineID>11223344</MedlineID>
+<Article>
+<Journal></Journal>
+<ArticleTitle>TESTING ARTICLE</ArticleTitle>
+</Article>
+</NCBIArticle>
+<PubmedData>
+        <History>
+                <PubMedPubDate PubStatus="pubmed">
+                        <Year>2001</Year>
+                        <Month>12</Month>
+                        <Day>1</Day>
+                        <Hour>10</Hour>
+                        <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="medline">
+                        <Year>2002</Year>
+                        <Month>1</Month>
+                        <Day>5</Day>
+                        <Hour>10</Hour>
+                        <Minute>1</Minute>
+                </PubMedPubDate>
+        </History>
+        <History>
+                <PubMedPubDate PubStatus="retracted">
+                        <Year>2003</Year>
+                        <Month>12</Month>
+                        <Day>1</Day>
+                        <Hour>10</Hour>
+                        <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="received">
+                        <Year>2004</Year>
+                        <Month>1</Month>
+                        <Day>5</Day>
+                        <Hour>10</Hour>
+                        <Minute>1</Minute>
+                </PubMedPubDate>
+        </History>
+        <PublicationStatus>ppublish</PublicationStatus>
+        <ProviderId>Testing provider</ProviderId>
+        <ArticleIdList>
+                <ArticleId IdType="pubmed">11726920</ArticleId>
+                <ArticleId IdType="doi">10.1038/ng1201-365</ArticleId>
+                <ArticleId IdType="pii">ng1201-365</ArticleId>
+                <ArticleId IdType="medline">21583752</ArticleId>
+        </ArticleIdList>
+        <URL type="fulltext" lang="AF">http://myserver/mydata</URL>
+        <URL type="summary">http://yourserver/yourdata</URL>
+        <URL>http://anyserver/anydata</URL>
+</PubmedData>
+</PubMedArticle>
+
+
+<PubMedArticle>
+<MedlineCitation Status="Completed">
+<MedlineID>21583752</MedlineID>
+<PMID>11726920</PMID>
+<DateCreated>
+<Year>2001</Year>
+<Month>11</Month>
+<Day>29</Day>
+</DateCreated>
+<DateCompleted>
+<Year>2001</Year>
+<Month>12</Month>
+<Day>20</Day>
+</DateCompleted>
+<Article>
+<Journal>
+<ISSN>1061-4036</ISSN>
+<JournalIssue>
+<Volume>29</Volume>
+<Issue>4</Issue>
+<PubDate>
+<Year>2001</Year>
+<Month>Dec</Month>
+</PubDate>
+</JournalIssue>
+</Journal>
+<ArticleTitle>Minimum information about a microarray experiment (MIAME)-toward standards for microarray data.</ArticleTitle>
+<Pagination>
+<MedlinePgn>365-71</MedlinePgn>
+</Pagination>
+<Abstract>
+<AbstractText>Microarray analysis has become a widely used tool for the generation of gene expression data on a genomic scale. Although many significant results have been derived from microarray studies, one limitation has been the lack of standards for presenting and exchanging such data. Here we present a proposal, the Minimum Information About a Microarray Experiment (MIAME), that describes the minimum information required to ensure that microarray data can be easily interpreted and that results derived from its analysis can be independently verified. The ultimate goal of this work is to establish a standard for recording and reporting microarray-based gene expression data, which will in turn facilitate the establishment of databases and public repositories and enable the development of data analysis tools. With respect to MIAME, we concentrate on defining the content and structure of the necessary information rather than the technical format for capturing it.</AbstractText>
+</Abstract>
+<Affiliation>European Bioinformatics Institute, EMBL outstation, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. brazma at ebi.ac.uk</Affiliation>
+<AuthorList>
+<Author>
+<LastName>Brazma</LastName>
+<ForeName>A</ForeName>
+<Initials>A</Initials>
+</Author>
+<Author>
+<LastName>Hingamp</LastName>
+<ForeName>P</ForeName>
+<Initials>P</Initials>
+</Author>
+<Author>
+<LastName>Quackenbush</LastName>
+<ForeName>J</ForeName>
+<Initials>J</Initials>
+</Author>
+<Author>
+<LastName>Sherlock</LastName>
+<ForeName>G</ForeName>
+<Initials>G</Initials>
+</Author>
+<Author>
+<LastName>Spellman</LastName>
+<ForeName>P</ForeName>
+<Initials>P</Initials>
+</Author>
+<Author>
+<LastName>Stoeckert</LastName>
+<ForeName>C</ForeName>
+<Initials>C</Initials>
+</Author>
+<Author>
+<LastName>Aach</LastName>
+<ForeName>J</ForeName>
+<Initials>J</Initials>
+</Author>
+<Author>
+<LastName>Ansorge</LastName>
+<ForeName>W</ForeName>
+<Initials>W</Initials>
+</Author>
+<Author>
+<LastName>Ball</LastName>
+<ForeName>C A</ForeName>
+<Initials>CA</Initials>
+</Author>
+<Author>
+<LastName>Causton</LastName>
+<ForeName>H C</ForeName>
+<Initials>HC</Initials>
+</Author>
+<Author>
+<LastName>Gaasterland</LastName>
+<ForeName>T</ForeName>
+<Initials>T</Initials>
+</Author>
+<Author>
+<LastName>Glenisson</LastName>
+<ForeName>P</ForeName>
+<Initials>P</Initials>
+</Author>
+<Author>
+<LastName>Holstege</LastName>
+<ForeName>F C</ForeName>
+<Initials>FC</Initials>
+</Author>
+<Author>
+<LastName>Kim</LastName>
+<ForeName>I F</ForeName>
+<Initials>IF</Initials>
+</Author>
+<Author>
+<LastName>Markowitz</LastName>
+<ForeName>V</ForeName>
+<Initials>V</Initials>
+</Author>
+<Author>
+<LastName>Matese</LastName>
+<ForeName>J C</ForeName>
+<Initials>JC</Initials>
+</Author>
+<Author>
+<LastName>Parkinson</LastName>
+<ForeName>H</ForeName>
+<Initials>H</Initials>
+</Author>
+<Author>
+<LastName>Robinson</LastName>
+<ForeName>A</ForeName>
+<Initials>A</Initials>
+</Author>
+<Author>
+<LastName>Sarkans</LastName>
+<ForeName>U</ForeName>
+<Initials>U</Initials>
+</Author>
+<Author>
+<LastName>Schulze-Kremer</LastName>
+<ForeName>S</ForeName>
+<Initials>S</Initials>
+</Author>
+<Author>
+<LastName>Stewart</LastName>
+<ForeName>J</ForeName>
+<Initials>J</Initials>
+</Author>
+<Author>
+<LastName>Taylor</LastName>
+<ForeName>R</ForeName>
+<Initials>R</Initials>
+</Author>
+<Author>
+<LastName>Vilo</LastName>
+<ForeName>J</ForeName>
+<Initials>J</Initials>
+</Author>
+<Author>
+<LastName>Vingron</LastName>
+<ForeName>M</ForeName>
+<Initials>M</Initials>
+</Author>
+</AuthorList>
+<Language>eng</Language>
+<PublicationTypeList>
+<PublicationType>Journal Article</PublicationType>
+</PublicationTypeList>
+</Article>
+<MedlineJournalInfo>
+<Country>United States</Country>
+<MedlineTA>Nat Genet</MedlineTA>
+<NlmUniqueID>9216904</NlmUniqueID>
+</MedlineJournalInfo>
+<CitationSubset>IM</CitationSubset>
+<MeshHeadingList>
+<MeshHeading>
+<DescriptorName MajorTopicYN="Y">Computational Biology</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Gene Expression Profiling</DescriptorName>
+<QualifierName>methods</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Oligonucleotide Array Sequence Analysis</DescriptorName>
+<QualifierName MajorTopicYN="Y">standards</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Support, Non-U.S. Gov't</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Support, U.S. Gov't, P.H.S.</DescriptorName>
+</MeshHeading>
+</MeshHeadingList>
+</MedlineCitation>
+<PubmedData>
+        <History>
+                <PubMedPubDate PubStatus="pubmed">
+                        <Year>2001</Year>
+                        <Month>12</Month>
+                        <Day>1</Day>
+                        <Hour>10</Hour>
+                        <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="medline">
+                        <Year>2002</Year>
+                        <Month>1</Month>
+                        <Day>5</Day>
+                        <Hour>10</Hour>
+                        <Minute>1</Minute>
+                </PubMedPubDate>
+        </History>
+        <PublicationStatus>ppublish</PublicationStatus>
+        <ArticleIdList>
+                <ArticleId IdType="pubmed">11726920</ArticleId>
+                <ArticleId IdType="doi">10.1038/ng1201-365</ArticleId>
+                <ArticleId IdType="pii">ng1201-365</ArticleId>
+                <ArticleId IdType="medline">21583752</ArticleId>
+        </ArticleIdList>
+</PubmedData>
+</PubMedArticle>
+
+<PubMedArticle>
+<MedlineCitation Status="Completed">
+<MedlineID>21465135</MedlineID>
+<PMID>11580977</PMID>
+<DateCreated>
+<Year>2001</Year>
+<Month>10</Month>
+<Day>02</Day>
+</DateCreated>
+<DateCompleted>
+<Year>2001</Year>
+<Month>12</Month>
+<Day>04</Day>
+</DateCompleted>
+<Article>
+<Journal>
+<ISSN>1286-4579</ISSN>
+<JournalIssue>
+<Volume>3</Volume>
+<Issue>10</Issue>
+<PubDate>
+<Year>2001</Year>
+<Month>Aug</Month>
+</PubDate>
+</JournalIssue>
+</Journal>
+<ArticleTitle>Gene expression data analysis.</ArticleTitle>
+<Pagination>
+<MedlinePgn>823-9</MedlinePgn>
+</Pagination>
+<Abstract>
+<AbstractText>Microarrays are one of the latest breakthroughs in experimental molecular biology, which allow monitoring of gene expression for tens of thousands of genes in parallel and are already producing huge amounts of valuable data. Analysis and handling of such data is becoming one of the major bottlenecks in the utilization of the technology. The raw microarray data are images, which have to be transformed into gene expression matrices, tables where rows represent genes, columns represent various samples such as tissues or experimental conditions, and numbers in each cell characterize the expression level of the particular gene in the particular sample. These matrices have to be analyzed further if any knowledge about the underlying biological processes is to be extracted. In this paper we concentrate on discussing bioinformatics methods used for such analysis. We briefly discuss supervised and unsupervised data analysis and its applications, such as predicting gene function classes and cancer classification as well as some possible future directions.</AbstractText>
+</Abstract>
+<Affiliation>European Molecular Biology Laboratory, Outstation Hinxton--the European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, CB10 1SD, Cambridge, UK. brazma at ebi.ac.uk</Affiliation>
+<AuthorList>
+<Author>
+<LastName>Brazma</LastName>
+<ForeName>A</ForeName>
+<Initials>A</Initials>
+</Author>
+<Author>
+<LastName>Vilo</LastName>
+<ForeName>J</ForeName>
+<Initials>J</Initials>
+</Author>
+</AuthorList>
+<Language>eng</Language>
+<PublicationTypeList>
+<PublicationType>Journal Article</PublicationType>
+<PublicationType>Review</PublicationType>
+<PublicationType>Review, Tutorial</PublicationType>
+</PublicationTypeList>
+</Article>
+<MedlineJournalInfo>
+<Country>France</Country>
+<MedlineTA>Microbes Infect</MedlineTA>
+<NlmUniqueID>100883508</NlmUniqueID>
+</MedlineJournalInfo>
+<CitationSubset>IM</CitationSubset>
+<MeshHeadingList>
+<MeshHeading>
+<DescriptorName>Animal</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Computational Biology</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName MajorTopicYN="Y">Gene Expression Regulation</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Human</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Oligonucleotide Array Sequence Analysis</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Statistics</DescriptorName>
+<QualifierName MajorTopicYN="Y">methods</QualifierName>
+</MeshHeading>
+</MeshHeadingList>
+<NumberOfReferences>29</NumberOfReferences>
+</MedlineCitation>
+<PubmedData>
+        <History>
+                <PubMedPubDate PubStatus="pubmed">
+                        <Year>2001</Year>
+                        <Month>10</Month>
+                        <Day>3</Day>
+                        <Hour>10</Hour>
+                        <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="medline">
+                        <Year>2002</Year>
+                        <Month>1</Month>
+                        <Day>5</Day>
+                        <Hour>10</Hour>
+                        <Minute>1</Minute>
+                </PubMedPubDate>
+        </History>
+        <PublicationStatus>ppublish</PublicationStatus>
+        <ArticleIdList>
+                <ArticleId IdType="pubmed">11580977</ArticleId>
+                <ArticleId IdType="pii">S128645790101440X</ArticleId>
+                <ArticleId IdType="medline">21465135</ArticleId>
+        </ArticleIdList>
+</PubmedData>
+</PubMedArticle>
+
+<PubMedArticle>
+<MedlineCitation Status="Completed">
+<MedlineID>21138228</MedlineID>
+<PMID>11238066</PMID>
+<DateCreated>
+<Year>2001</Year>
+<Month>03</Month>
+<Day>12</Day>
+</DateCreated>
+<DateCompleted>
+<Year>2001</Year>
+<Month>05</Month>
+<Day>31</Day>
+</DateCompleted>
+<Article>
+<Journal>
+<ISSN>1367-4803</ISSN>
+<JournalIssue>
+<Volume>17</Volume>
+<Issue>2</Issue>
+<PubDate>
+<Year>2001</Year>
+<Month>Feb</Month>
+</PubDate>
+</JournalIssue>
+</Journal>
+<ArticleTitle>On the importance of standardisation in life sciences.</ArticleTitle>
+<Pagination>
+<MedlinePgn>113-4</MedlinePgn>
+</Pagination>
+<AuthorList>
+<Author>
+<LastName>Brazma</LastName>
+<ForeName>A</ForeName>
+<Initials>A</Initials>
+</Author>
+</AuthorList>
+<Language>eng</Language>
+<PublicationTypeList>
+<PublicationType>Editorial</PublicationType>
+</PublicationTypeList>
+</Article>
+<MedlineJournalInfo>
+<Country>England</Country>
+<MedlineTA>Bioinformatics</MedlineTA>
+<NlmUniqueID>9808944</NlmUniqueID>
+</MedlineJournalInfo>
+<CitationSubset>IM</CitationSubset>
+<MeshHeadingList>
+<MeshHeading>
+<DescriptorName>Biological Sciences</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Databases, Factual</DescriptorName>
+<QualifierName MajorTopicYN="Y">standards</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Gene Expression Profiling</DescriptorName>
+<QualifierName>methods</QualifierName>
+<QualifierName>standards</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Human</DescriptorName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Information Storage and Retrieval</DescriptorName>
+<QualifierName MajorTopicYN="Y">standards</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Oligonucleotide Array Sequence Analysis</DescriptorName>
+<QualifierName>methods</QualifierName>
+<QualifierName>standards</QualifierName>
+</MeshHeading>
+<MeshHeading>
+<DescriptorName>Sequence Analysis, DNA</DescriptorName>
+</MeshHeading>
+</MeshHeadingList>
+</MedlineCitation>
+<PubmedData>
+        <History>
+                <PubMedPubDate PubStatus="pubmed">
+                        <Year>2001</Year>
+                        <Month>3</Month>
+                        <Day>10</Day>
+                        <Hour>10</Hour>
+                        <Minute>0</Minute>
+                </PubMedPubDate>
+                <PubMedPubDate PubStatus="medline">
+                        <Year>2001</Year>
+                        <Month>6</Month>
+                        <Day>2</Day>
+                        <Hour>10</Hour>
+                        <Minute>1</Minute>
+                </PubMedPubDate>
+        </History>
+        <PublicationStatus>ppublish</PublicationStatus>
+        <ArticleIdList>
+                <ArticleId IdType="pubmed">11238066</ArticleId>
+                <ArticleId IdType="medline">21138228</ArticleId>
+        </ArticleIdList>
+</PubmedData>
+</PubMedArticle>
+</PubMedArticleSet>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/sv40_small.xml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/sv40_small.xml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/sv40_small.xml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,348 @@
+<entrySet level="1" version="1" xmlns="net:sf:psidev:mi" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="net:sf:psidev:mi http://psidev.sourceforge.net/mi/xml/src/MIF.xsd">
+     <entry>
+       <source releaseDate="2005-12-05">
+         <names>
+           <shortLabel>European Bioinformatics Institute</shortLabel>
+         </names>
+         <bibref>
+           <xref>
+             <primaryRef db="pubmed" id="14681455"/>
+           </xref>
+         </bibref>
+         <xref>
+           <primaryRef db="psi-mi" id="MI:0469"/>
+         </xref>
+         <attributeList>
+           <attribute name="postalAddress">Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD, United Kingdom</attribute>
+           <attribute name="url">http://www.ebi.ac.uk</attribute>
+         </attributeList>
+       </source>
+       <experimentList>
+         <experimentDescription id="EBI-617684">
+           <names>
+             <shortLabel>eyckerman-2001-1</shortLabel>
+             <fullName>Design and application of a cytokine-receptor-based interaction trap.</fullName>
+           </names>
+           <bibref>
+             <xref>
+               <primaryRef db="pubmed" id="11781573"/>
+             </xref>
+           </bibref>
+           <xref>
+             <primaryRef db="intact" id="EBI-617684" secondary="eyckerman-2001-1"/>
+             <secondaryRef db="newt" id="10090" secondary="mouse"/>
+             <secondaryRef db="newt" id="10633" secondary="sv40"/>
+             <secondaryRef db="newt" id="9606" secondary="human"/>
+           </xref>
+           <hostOrganism ncbiTaxId="9606">
+             <names>
+               <shortLabel>human-293t</shortLabel>
+               <fullName>Homo sapiens 293 cells transformed with SV40 large T antigen</fullName>
+             </names>
+             <cellType>
+               <names>
+                 <shortLabel>293t</shortLabel>
+                 <fullName>293 cells expressing SV40 large T antigen.</fullName>
+               </names>
+               <xref>
+                 <primaryRef db="pubmed" id="3031469"/>
+                 <secondaryRef db="cabri" id="ICLC HTL04001"/>
+               </xref>
+             </cellType>
+           </hostOrganism>
+           <interactionDetection>
+             <names>
+               <shortLabel>mappit</shortLabel>
+               <fullName>mammalian protein protein interaction trap</fullName>
+             </names>
+             <xref>
+               <primaryRef db="psi-mi" id="MI:0231"/>
+               <secondaryRef db="pubmed" id="12853652"/>
+             </xref>
+           </interactionDetection>
+           <participantDetection>
+             <names>
+               <shortLabel>predetermined</shortLabel>
+               <fullName>predetermined participant</fullName>
+             </names>
+             <xref>
+               <primaryRef db="psi-mi" id="MI:0396"/>
+               <secondaryRef db="pubmed" id="7940758"/>
+               <secondaryRef db="pubmed" id="14755292"/>
+             </xref>
+           </participantDetection>
+           <attributeList>
+             <attribute name="exp-modification">MAPPIT bait construct - a chimeric cytokine receptor composed of the extracellular region of homodimeric EpoR, fused to the transmembrane and cytosolic domains of the leptin receptor wherein all the tyrosine residues are mutated to phenylalanines. A bait, consisting of a region of the bait protein is C-terminally fused to this signaling-deficient receptor. The interaction is detected via activation of STAT3 and a subsequent reporter gene activation.</attribute>
+             <attribute name="figure-legend">2b, 2c, 5</attribute>
+             <attribute name="contact-email">jan.tavernier at ugent.be</attribute>
+             <attribute name="author-list">Eyckerman S., Verhee A., der Heyden JV., Lemmens I., Ostade XV., Vandekerckhove J., Tavernier J.</attribute>
+           </attributeList>
+         </experimentDescription>
+       </experimentList>
+       <interactorList>
+         <proteinInteractor id="EBI-617698">
+           <names>
+             <shortLabel>tala_sv40</shortLabel>
+             <fullName>Large T antigen</fullName>
+           </names>
+           <xref>
+             <primaryRef db="uniprotkb" id="P03070" secondary="tala_sv40" version="SP_48"/>
+             <secondaryRef db="interpro" id="IPR001623" secondary="DnaJ_N"/>
+             <secondaryRef db="interpro" id="IPR010932" secondary="Polyoma_lg_T_C"/>
+             <secondaryRef db="interpro" id="IPR003133" secondary="T_Ag_DNA_bind"/>
+             <secondaryRef db="intact" id="EBI-617698" secondary="tala_sv40"/>
+           </xref>
+           <organism ncbiTaxId="10633">
+             <names>
+               <shortLabel>sv40</shortLabel>
+               <fullName>Simian virus 40</fullName>
+             </names>
+           </organism>
+           <sequence>MDKVLNREESLQLMDLLGLERSAWGNIPLMRKAYLKKCKEFHPDKGGDEEKMKKMNTLYKKMEDGVKYAHQPDFGGFWDATEIPTYGTDEWEQWWNAFNEENLFCSEEMPSSDDEATADSQHSTPPKKKRKVEDPKDFPSELLSFLSHAVFSNRTLACFAIYTTKEKAALLYKKIMEKYSVTFISRHNSYNHNILFFLTPHRHRVSAINNYAQKLCTFSFLICKGVNKEYLMYSALTRDPFSVIEESLPGGLKEHDFNPEEAEETKQVSWKLVTEYAMETKCDDVLLLLGMYLEFQYSFEMCLKCIKKEQPSHYKYHEKHYANAAIFADSKNQKTICQQAVDTVLAKKRVDSLQLTREQMLTNRFNDLLDRMDIMFGSTGSADIEEWMAGVAWLHCLLPKMDSVVYDFLKCMVYNIPKKRYWLFKGPIDSGKTTLAAALLELCGGKALNVNLPLDRLNFELGVAIDQFLVVFEDVKGTGGESRDLPSGQGINNLDNLRDYLDGSVKVNLEKKHLNKRTQIFPPGIVTMNEFSVPKTLQARFVKQIDFRAKDYLKHCLERSEFLLEKRIIQSGIALLLMLIWYRPVAEFAQSIQSRIVEWKERLDKEFSLSVYQKMKFNVAMGIGVLDWLRNSDDDDEDSQENADKNEDGGEKNMEDSGHETGIDSQSQGSFQAPQSSQSVHDHNQPYHICRGFTCFKKPPTPPPEPET</sequence>
+         </proteinInteractor>
+         <proteinInteractor id="EBI-474016">
+           <names>
+             <shortLabel>p53_mouse</shortLabel>
+             <fullName>Cellular tumor antigen p53</fullName>
+           </names>
+           <xref>
+             <primaryRef db="uniprotkb" id="P02340" secondary="p53_mouse" version="SP_48"/>
+             <secondaryRef db="go" id="GO:0005829" secondary="C:cytosol"/>
+             <secondaryRef db="go" id="GO:0005739" secondary="C:mitochondrion"/>
+             <secondaryRef db="go" id="GO:0005730" secondary="C:nucleolus"/>
+             <secondaryRef db="go" id="GO:0005524" secondary="F:ATP binding"/>
+             <secondaryRef db="go" id="GO:0005507" secondary="F:copper ion binding"/>
+             <secondaryRef db="go" id="GO:0000739" secondary="F:DNA strand annealing activit"/>
+             <secondaryRef db="go" id="GO:0005515" secondary="F:protein binding"/>
+             <secondaryRef db="go" id="GO:0003700" secondary="F:transcription factor activit"/>
+             <secondaryRef db="go" id="GO:0006915" secondary="P:apoptosis"/>
+             <secondaryRef db="go" id="GO:0006284" secondary="P:base-excision repair"/>
+             <secondaryRef db="go" id="GO:0008635" secondary="P:caspase activation via cytoc"/>
+             <secondaryRef db="go" id="GO:0007569" secondary="P:cell aging"/>
+             <secondaryRef db="go" id="GO:0007050" secondary="P:cell cycle arrest"/>
+             <secondaryRef db="go" id="GO:0030154" secondary="P:cell differentiation"/>
+             <secondaryRef db="go" id="GO:0042771" secondary="P:DNA damage response, signal"/>
+             <secondaryRef db="go" id="GO:0043066" secondary="P:negative regulation of apopt"/>
+             <secondaryRef db="go" id="GO:0030308" secondary="P:negative regulation of cell"/>
+             <secondaryRef db="go" id="GO:0008156" secondary="P:negative regulation of DNA r"/>
+             <secondaryRef db="go" id="GO:0048147" secondary="P:negative regulation of fibro"/>
+             <secondaryRef db="go" id="GO:0006289" secondary="P:nucleotide-excision repair"/>
+             <secondaryRef db="go" id="GO:0000060" secondary="P:protein-nucleus import, tran"/>
+             <secondaryRef db="go" id="GO:0042127" secondary="P:regulation of cell prolifera"/>
+             <secondaryRef db="go" id="GO:0006355" secondary="P:regulation of transcription,"/>
+             <secondaryRef db="go" id="GO:0006974" secondary="P:response to DNA damage stimu"/>
+             <secondaryRef db="go" id="GO:0009411" secondary="P:response to UV"/>
+             <secondaryRef db="go" id="GO:0010165" secondary="P:response to X-ray"/>
+             <secondaryRef db="interpro" id="IPR002117" secondary="P53"/>
+             <secondaryRef db="interpro" id="IPR011615" secondary="p53_DNA_bind"/>
+             <secondaryRef db="interpro" id="IPR010991" secondary="p53_tetrameristn"/>
+             <secondaryRef db="uniprotkb" id="Q9QUP3" secondary="p53_mouse" version="SP_48"/>
+             <secondaryRef db="go" id="GO:0005657" secondary="C:replication fork"/>
+             <secondaryRef db="go" id="GO:0045941" secondary="P:positive regulation of trans"/>
+             <secondaryRef db="interpro" id="IPR012346" secondary="P53_RUNT_DNA_bd"/>
+             <secondaryRef db="intact" id="EBI-474016" secondary="p53_mouse"/>
+           </xref>
+           <organism ncbiTaxId="10090">
+             <names>
+               <shortLabel>mouse</shortLabel>
+               <fullName>Mus musculus</fullName>
+             </names>
+           </organism>
+           <sequence>MTAMEESQSDISLELPLSQETFSGLWKLLPPEDILPSPHCMDDLLLPQDVEEFFEGPSEALRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGNYGFHLGFLQSGTAKSVMCTYSPPLNKLFCQLAKTCPVQLWVSATPPAGSRVRAMAIYKKSQHMTEVVRRCPHHERCSDGDGLAPPQHLIRVEGNLYPEYLEDRQTFRHSVVVPYEPPEAGSEYTTIHYKYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRDSFEVRVCACPGRDRRTEEENFRKKEVLCPELPPGSAKRALPTCTSASPPQKKKPLDGEYFTLKIRGRKRFEMFRELNEALELKDAHATEESGDSRAHSSYLKTKKGQSTSRHKKTMVKKVGPDSD</sequence>
+         </proteinInteractor>
+         <proteinInteractor id="EBI-617321">
+           <names>
+             <shortLabel>epor_human</shortLabel>
+             <fullName>Erythropoietin receptor precursor</fullName>
+           </names>
+           <xref>
+             <primaryRef db="uniprotkb" id="P19235" secondary="epor_human" version="SP_48"/>
+             <secondaryRef db="go" id="GO:0005887" secondary="C:integral to plasma membrane"/>
+             <secondaryRef db="go" id="GO:0004900" secondary="F:erythropoietin receptor acti"/>
+             <secondaryRef db="go" id="GO:0007165" secondary="P:signal transduction"/>
+             <secondaryRef db="interpro" id="IPR002996" secondary="Cytkn_recept_B/G"/>
+             <secondaryRef db="interpro" id="IPR009167" secondary="EPO_receptor"/>
+             <secondaryRef db="interpro" id="IPR003961" secondary="FN_III"/>
+             <secondaryRef db="interpro" id="IPR008957" secondary="FN_III-like"/>
+             <secondaryRef db="interpro" id="IPR003528" secondary="HemptreceptL_F1"/>
+             <secondaryRef db="uniprotkb" id="Q15443" secondary="epor_human" version="SP_48"/>
+             <secondaryRef db="intact" id="EBI-617321" secondary="epor_human"/>
+           </xref>
+           <organism ncbiTaxId="9606">
+             <names>
+               <shortLabel>human</shortLabel>
+               <fullName>Homo sapiens</fullName>
+             </names>
+           </organism>
+           <sequence>MDHLGASLWPQVGSLCLLLAGAAWAPPPNLPDPKFESKAALLAARGPEELLCFTERLEDLVCFWEEAASAGVGPGNYSFSYQLEDEPWKLCRLHQAPTARGAVRFWCSLPTADTSSFVPLELRVTAASGAPRYHRVIHINEVVLLDAPVGLVARLADESGHVVLRWLPPPETPMTSHIRYEVDVSAGNGAGSVQRVEILEGRTECVLSNLRGRTRYTFAVRARMAEPSFGGFWSAWSEPVSLLTPSDLDPLILTLSLILVVILVLLTVLALLSHRRALKQKIWPGIPSPESEFEGLFTTHKGNFQLWLYQNDGCLWWSPCTPFTEDPPASLEVLSERCWGTMQAVEPGTDDEGPLLEPVGSEHAQDTYLVLDKWLLPRNPPSEDLPGPGGSVDIVAMDEGSEASSCSSALASKPSPEGASAASFEYTILDPSSQLLRPWTLCPELPPTPPHLKYLYLVVSDSGISTDYSSGDSQGAQGGLSDGPYSNPYENSLIPAAEPLPPSYVACS</sequence>
+         </proteinInteractor>
+         <proteinInteractor id="EBI-617489">
+           <names>
+             <shortLabel>cish_mouse</shortLabel>
+             <fullName>Cytokine-inducible SH2-containing protein</fullName>
+           </names>
+           <xref>
+             <primaryRef db="uniprotkb" id="Q62225" secondary="cish_mouse" version="SP_48"/>
+             <secondaryRef db="go" id="GO:0005886" secondary="C:plasma membrane"/>
+             <secondaryRef db="go" id="GO:0005515" secondary="F:protein binding"/>
+             <secondaryRef db="go" id="GO:0007205" secondary="P:protein kinase C activation"/>
+             <secondaryRef db="interpro" id="IPR000980" secondary="SH2"/>
+             <secondaryRef db="interpro" id="IPR001496" secondary="SOCS_C"/>
+             <secondaryRef db="intact" id="EBI-617489" secondary="cish_mouse"/>
+           </xref>
+           <organism ncbiTaxId="10090">
+             <names>
+               <shortLabel>mouse</shortLabel>
+               <fullName>Mus musculus</fullName>
+             </names>
+           </organism>
+           <sequence>MVLCVQGSCPLLAVEQIGRRPLWAQSLELPGPAMQPLPTGAFPEEVTEETPVQAENEPKVLDPEGDLLCIAKTFSYLRESGWYWGSITASEARQHLQKMPEGTFLVRDSTHPSYLFTLSVKTTRGPTNVRIEYADSSFRLDSNCLSRPRILAFPDVVSLVQHYVASCAADTRSDSPDPAPTPALPMSKQDAPSDSVLPIPVATAVHLKLVQPFVRRSSARSLQHLCRLVINRLVADVDCLPLPRRMADYLRQYPFQL</sequence>
+         </proteinInteractor>
+         <proteinInteractor id="EBI-617737">
+           <names>
+             <shortLabel>socs2_human</shortLabel>
+             <fullName>Suppressor of cytokine signaling 2</fullName>
+           </names>
+           <xref>
+             <primaryRef db="uniprotkb" id="O14508" secondary="socs2_human" version="SP_49"/>
+             <secondaryRef db="go" id="GO:0005737" secondary="C:cytoplasm"/>
+             <secondaryRef db="go" id="GO:0005131" secondary="F:growth hormone receptor bind"/>
+             <secondaryRef db="go" id="GO:0005159" secondary="F:insulin-like growth factor r"/>
+             <secondaryRef db="go" id="GO:0008269" secondary="F:JAK pathway signal transduct"/>
+             <secondaryRef db="go" id="GO:0005148" secondary="F:prolactin receptor binding"/>
+             <secondaryRef db="go" id="GO:0005070" secondary="F:SH3/SH2 adaptor activity"/>
+             <secondaryRef db="go" id="GO:0006916" secondary="P:anti-apoptosis"/>
+             <secondaryRef db="go" id="GO:0007259" secondary="P:JAK-STAT cascade"/>
+             <secondaryRef db="go" id="GO:0045666" secondary="P:positive regulation of neuro"/>
+             <secondaryRef db="go" id="GO:0040014" secondary="P:regulation of body size"/>
+             <secondaryRef db="go" id="GO:0001558" secondary="P:regulation of cell growth"/>
+             <secondaryRef db="go" id="GO:0009966" secondary="P:regulation of signal transdu"/>
+             <secondaryRef db="interpro" id="IPR000980" secondary="SH2"/>
+             <secondaryRef db="interpro" id="IPR001496" secondary="SOCS_C"/>
+             <secondaryRef db="uniprotkb" id="O14542" secondary="socs2_human" version="SP_49"/>
+             <secondaryRef db="uniprotkb" id="O95102" secondary="socs2_human" version="SP_49"/>
+             <secondaryRef db="uniprotkb" id="Q9UKS5" secondary="socs2_human" version="SP_49"/>
+             <secondaryRef db="go" id="GO:0005515" secondary="F:protein binding"/>
+             <secondaryRef db="intact" id="EBI-617737" secondary="socs2_human"/>
+           </xref>
+           <organism ncbiTaxId="9606">
+             <names>
+               <shortLabel>human</shortLabel>
+               <fullName>Homo sapiens</fullName>
+             </names>
+           </organism>
+           <sequence>MTLRCLEPSGNGGEGTRSQWGTAGSAEEPSPQAARLAKALRELGQTGWYWGSMTVNEAKEKLKEAPEGTFLIRDSSHSDYLLTISVKTSAGPTNLRIEYQDGKFRLDSIICVKSKLKQFDSVVHLIDYYVQMCKDKRTGPEAPRNGTVHLYLTKPLYTSAPSLQHLCRLTINKCTGAIWGLPLPTRLKDYLEEYKFQV</sequence>
+         </proteinInteractor>
+       </interactorList>
+       <interactionList>
+         <interaction>
+           <names>
+             <shortLabel>sv40-tp53-1</shortLabel>
+             <fullName>Interactions between SV40 large T antigen and murine p53 demonstrated by MAPPIT</fullName>
+           </names>
+           <experimentList>
+             <experimentRef ref="EBI-617684"/>
+           </experimentList>
+           <participantList>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-617698"/>
+               <role>prey</role>
+             </proteinParticipant>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-474016"/>
+               <role>bait</role>
+             </proteinParticipant>
+           </participantList>
+           <interactionType>
+             <names>
+               <shortLabel>physical interaction</shortLabel>
+               <fullName>physical interaction</fullName>
+             </names>
+             <xref>
+               <primaryRef db="psi-mi" id="MI:0218"/>
+               <secondaryRef db="pubmed" id="14755292"/>
+             </xref>
+           </interactionType>
+           <xref>
+             <primaryRef db="intact" id="EBI-617704" secondary="sv40-tp53-1"/>
+           </xref>
+           <attributeList>
+             <attribute name="kd">1.0</attribute>
+           </attributeList>
+         </interaction>
+         <interaction>
+           <names>
+             <shortLabel>epor-cish-4</shortLabel>
+             <fullName>Interaction between human EpoR and Murine CIS-1 demonstrated by MAPPIT</fullName>
+           </names>
+           <experimentList>
+             <experimentRef ref="EBI-617684"/>
+           </experimentList>
+           <participantList>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-617321"/>
+               <role>bait</role>
+             </proteinParticipant>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-617489"/>
+               <role>prey</role>
+             </proteinParticipant>
+           </participantList>
+           <interactionType>
+             <names>
+               <shortLabel>physical interaction</shortLabel>
+               <fullName>physical interaction</fullName>
+             </names>
+             <xref>
+               <primaryRef db="psi-mi" id="MI:0218"/>
+               <secondaryRef db="pubmed" id="14755292"/>
+             </xref>
+           </interactionType>
+           <xref>
+             <primaryRef db="intact" id="EBI-617720" secondary="epor-cish-4"/>
+           </xref>
+           <attributeList>
+             <attribute name="kd">1.0</attribute>
+           </attributeList>
+         </interaction>
+         <interaction>
+           <names>
+             <shortLabel>epor-socs2-3</shortLabel>
+             <fullName>Interaction between human EpoR and SOCS2 demonstrated by MAPPIT</fullName>
+           </names>
+           <experimentList>
+             <experimentRef ref="EBI-617684"/>
+           </experimentList>
+           <participantList>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-617737"/>
+               <role>prey</role>
+             </proteinParticipant>
+             <proteinParticipant>
+               <proteinInteractorRef ref="EBI-617321"/>
+               <role>bait</role>
+             </proteinParticipant>
+           </participantList>
+           <interactionType>
+             <names>
+               <shortLabel>physical interaction</shortLabel>
+               <fullName>physical interaction</fullName>
+             </names>
+             <xref>
+               <primaryRef db="psi-mi" id="MI:0218"/>
+               <secondaryRef db="pubmed" id="14755292"/>
+             </xref>
+           </interactionType>
+           <xref>
+             <primaryRef db="intact" id="EBI-617778" secondary="epor-socs2-3"/>
+           </xref>
+           <attributeList>
+             <attribute name="caution">SOCS2 described as fragment but no detail given</attribute>
+             <attribute name="agonist">Erythropoietin</attribute>
+             <attribute name="resulting-ptm">Phosphorylation of SOCS2 - dependent on treatment of cells with erythropoietin</attribute>
+             <attribute name="kd">1.0</attribute>
+           </attributeList>
+         </interaction>
+       </interactionList>
+     </entry>
+   </entrySet>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/swiss.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/swiss.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/swiss.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,900 @@
+ID   MA32_HUMAN     STANDARD;      PRT;   282 AA.
+AC   Q07021;
+DT   01-FEB-1995 (Rel. 31, Created)
+DT   01-FEB-1995 (Rel. 31, Last sequence update)
+DT   01-OCT-2000 (Rel. 40, Last annotation update)
+DE   COMPLEMENT COMPONENT 1, Q SUBCOMPONENT BINDING PROTEIN, MITOCHONDRIAL
+DE   PRECURSOR (GLYCOPROTEIN GC1QBP) (GC1Q-R PROTEIN) (HYALURONAN-BINDING
+DE   PROTEIN 1) (PRE-MRNA SPLICING FACTOR SF2, P32 SUBUNIT) (P33).
+GN   GC1QBP OR HABP1 OR SF2P32 OR C1QBP.
+OS   Homo sapiens (Human).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606;
+RN   [1]
+RP   SEQUENCE FROM N.A., AND SEQUENCE OF 74; 76-93 AND 208-216.
+RC   TISSUE=FIBROBLAST;
+RX   MEDLINE=94085792; PubMed=8262387;
+RA   Honore B., Madsen P., Rasmussen H.H., Vandekerckhove J., Celis J.E.,
+RA   Leffers H.;
+RT   "Cloning and expression of a cDNA covering the complete coding region
+RT   of the P32 subunit of human pre-mRNA splicing factor SF2.";
+RL   Gene 134:283-287(1993).
+RN   [2]
+RP   SEQUENCE OF 5-282 FROM N.A., AND SEQUENCE OF 74-114.
+RX   MEDLINE=91309150; PubMed=1830244;
+RA   Krainer A.R., Mayeda A., Kozak D., Binns G.;
+RT   "Functional expression of cloned human splicing factor SF2: homology
+RT   to RNA-binding proteins, U1 70K, and Drosophila splicing regulators.";
+RL   Cell 66:383-394(1991).
+RN   [3]
+RP   SEQUENCE FROM N.A., AND PARTIAL SEQUENCE.
+RX   MEDLINE=94253723; PubMed=8195709;
+RA   Ghebrehiwet B., Lim B.L., Peerschke E.I., Willis A.C., Reid K.B.;
+RT   "Isolation, cDNA cloning, and overexpression of a 33-kD cell surface
+RT   glycoprotein that binds to the globular 'heads' of C1q.";
+RL   J. Exp. Med. 179:1809-1821(1994).
+RN   [4]
+RP   X-RAY CRYSTALLOGRAPHY (2.25 ANGSTROMS).
+RX   MEDLINE=99199225; PubMed=10097078;
+RA   Jiang J., Zhang Y., Krainer A.R., Xu R.-M.;
+RT   "Crystal structure of human p32, a doughnut-shaped acidic
+RT   mitochondrial matrix protein.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 96:3572-3577(1999).
+CC   -!- FUNCTION: NOT KNOWN. BINDS TO THE GLOBULAR "HEADS" OF C1Q THUS
+CC       INHIBITING C1 ACTIVATION.
+CC   -!- SUBCELLULAR LOCATION: MITOCHONDRIAL MATRIX.
+CC   -!- SIMILARITY: BELONGS TO THE MAM33 FAMILY.
+CC   -!- CAUTION: WAS ORIGINALLY (REF.1 AND REF.2) THOUGHT TO BE A PRE-MRNA
+CC       SPLICING FACTOR THAT PLAYS A ROLE IN PREVENTING EXON SKIPPING,
+CC       ENSURING THE ACCURACY OF SPLICING AND REGULATING ALTERNATIVE
+CC       SPLICING.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L04636; AAA16315.1; -.
+DR   EMBL; M69039; AAA73055.1; -.
+DR   EMBL; X75913; CAA53512.1; -.
+DR   PIR; JT0762; JT0762.
+DR   PIR; S44104; S44104.
+DR   PDB; 1P32; 06-APR-99.
+DR   MIM; 601269; -.
+KW   Mitochondrion; Transit peptide; 3D-structure.
+FT   TRANSIT       1     73       MITOCHONDRION.
+FT   CHAIN        74    282       COMPLEMENT COMPONENT 1, Q SUBCOMPONENT
+FT                                BINDING PROTEIN.
+SQ   SEQUENCE   282 AA;  31362 MW;  2F747FA73BB1314B CRC64;
+     MLPLLRCVPR VLGSSVAGLR AAAPASPFRQ LLQPAPRLCT RPFGLLSVRA GSERRPGLLR
+     PRGPCACGCG CGSLHTDGDK AFVDFLSDEI KEERKIQKHK TLPKMSGGWE LELNGTEAKL
+     VRKVAGEKIT VTFNINNSIP PTFDGEEEPS QGQKVEEQEP ELTSTPNFVV EVIKNDDGKK
+     ALVLDCHYPE DEVGQEDEAE SDIFSIREVS FQSTGESEWK DTNYTLNTDS LDWALYDHLM
+     DFLADRGVDN TFADELVELS TALEHQEYIT FLEDLKSFVK SQ
+//
+ID   ACON_CAEEL     STANDARD;      PRT;   788 AA.
+AC   P34455;
+DT   01-FEB-1994 (Rel. 28, Created)
+DT   01-FEB-1994 (Rel. 28, Last sequence update)
+DT   15-JUL-1999 (Rel. 38, Last annotation update)
+DE   Probable aconitate hydratase, mitochondrial precursor (EC 4.2.1.3)
+DE   (Citrate hydro-lyase) (Aconitase).
+GN   F54H12.1.
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   STRAIN=BRISTOL N2;
+RX   MEDLINE=94150718; PubMed=7906398;
+RA   Wilson R., Ainscough R., Anderson K., Baynes C., Berks M.,
+RA   Bonfield J., Burton J., Connell M., Copsey T., Cooper J., Coulson A.,
+RA   Craxton M., Dear S., Du Z., Durbin R., Favello A., Fraser A.,
+RA   Fulton L., Gardner A., Green P., Hawkins T., Hillier L., Jier M.,
+RA   Johnston L., Jones M., Kershaw J., Kirsten J., Laisster N.,
+RA   Latreille P., Lightning J., Lloyd C., Mortimore B., O'Callaghan M.,
+RA   Parsons J., Percy C., Rifken L., Roopra A., Saunders D., Shownkeen R.,
+RA   Sims M., Smaldon N., Smith A., Smith M., Sonnhammer E., Staden R.,
+RA   Sulston J., Thierry-Mieg J., Thomas K., Vaudin M., Vaughan K.,
+RA   Waterson R., Watson A., Weinstock L., Wilkinson-Sproat J.,
+RA   Wohldman P.;
+RT   "2.2 Mb of contiguous nucleotide sequence from chromosome III of C.
+RT   elegans.";
+RL   Nature 368:32-38(1994).
+CC   -!- CATALYTIC ACTIVITY: Citrate = cis-aconitate + H(2)O.
+CC   -!- COFACTOR: ACONITASE HAS AN ACTIVE (4FE-4S) AND AN INACTIVE (3FE-
+CC       4S) FORMS. THE ACTIVE (4FE-4S) CLUSTER IS PART OF THE CATALYTIC
+CC       SITE THAT INTERCONVERTS CITRATE, CIS-ACONITASE, AND ISOCITRATE (BY
+CC       SIMILARITY).
+CC   -!- PATHWAY: TRICARBOXYLIC ACID CYCLE.
+CC   -!- SUBUNIT: MONOMER (BY SIMILARITY).
+CC   -!- SUBCELLULAR LOCATION: Mitochondrial (By similarity).
+CC   -!- SIMILARITY: BELONGS TO THE ACONITASE/IPM ISOMERASE FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L25599; AAA28050.1; -.
+DR   PIR; S44831; S44831.
+DR   HSSP; P20004; 1AMJ.
+DR   WormPep; F54H12.1; CE00516.
+DR   InterPro; IPR001030; Aconitase.
+DR   InterPro; IPR000573; Aconitase_C.
+DR   Pfam; PF00330; aconitase; 1.
+DR   Pfam; PF00694; Aconitase_C; 1.
+DR   PRINTS; PR00415; ACONITASE.
+DR   ProDom; PD000511; Aconitase; 1.
+DR   PROSITE; PS00450; ACONITASE_1; 1.
+DR   PROSITE; PS01244; ACONITASE_2; 1.
+KW   Hypothetical protein; Lyase; Tricarboxylic acid cycle; Iron-sulfur;
+KW   Mitochondrion; Transit peptide; 4Fe-4S.
+FT   TRANSIT       1      ?       MITOCHONDRION (POTENTIAL).
+FT   CHAIN         ?    788       PROBABLE ACONITATE HYDRATASE.
+FT   METAL       393    393       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+FT   METAL       456    456       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+FT   METAL       459    459       IRON-SULFUR (4FE-4S) (BY SIMILARITY).
+SQ   SEQUENCE   788 AA;  85712 MW;  8861E6FC198B70D9 CRC64;
+     MRYHFLFGSL RNHLFSFRGV IYCREKLFNC SKLSFRPSKV AISKFEPKSY LPYEKLSQTV
+     KIVKDRLKRP LTLSEKILYG HLDQPKTQDI ERGVSYLRLR PDRVAMQDAT AQMAMLQFIS
+     SGLPKTAVPS TIHCDHLIEA QKGGAQDLAR AKDLNKEVFN FLATAGSKYG VGFWKPGSGI
+     IHQIILENYA FPGLLLIGTD SHTPNGGGLG GLCIGVGGAD AVDVMADIPW ELKCPKVIGI
+     KLTGKLNGWT SAKDVILKVA DILTVKGGTG AIVEYFGPGV DSISATGMGT ICNMGAEIGA
+     TTSVFPYNES MYKYLEATGR KEIAEEARKY KDLLTADDGA NYDQIIEINL DTLTPHVNGP
+     FTPDLASSID KLGENAKKNG WPLDVKVSLI GSCTNSSYED MTRAASIAKQ ALDKGLKAKT
+     IFTITPGSEQ VRATIERDGL SKIFADFGGM VLANACGPCI GQWDRQDVKK GEKNTIVTSY
+     NRNFTGRNDA NPATHGFVTS PDITTAMAIS GRLDFNPLTD ELTAADGSKF KLQAPTGLDL
+     PPKGYDPGED TFQAPSGSGQ VDVSPSSDRL QLLSPFDKWD GKDLEDMKIL IKVTGKCTTD
+     HISAAGPWLK YRGHLDNISN NLFLTAINAD NGEMNKVKNQ VTGEYGAVPA TARKYKADGV
+     RWVAIGDENY GEGSSREHAA LEPRHLGGRA IIVKSFARIH ETNLKKQGML PLTFANPADY
+     DKIDPSDNVS IVGLSSFAPG KPLTAIFKKT NGSKVEVTLN HTFNEQQIEW FKAGSALNRM
+     KEVFAKSK
+//
+ID   143E_HUMAN     STANDARD;      PRT;   255 AA.
+AC   P42655; P29360; Q63631;
+DT   01-NOV-1995 (Rel. 32, Created)
+DT   01-NOV-1995 (Rel. 32, Last sequence update)
+DT   15-JUL-1999 (Rel. 38, Last annotation update)
+DE   14-3-3 protein epsilon (Mitochondrial import stimulation factor L
+DE   subunit) (Protein kinase C inhibitor protein-1) (KCIP-1) (14-3-3E).
+GN   YWHAE.
+OS   Homo sapiens (Human),
+OS   Mus musculus (Mouse),
+OS   Rattus norvegicus (Rat),
+OS   Bos taurus (Bovine), and
+OS   Ovis aries (Sheep).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606, 10090, 10116, 9913, 9940;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=95372385; PubMed=7644510;
+RA   Conklin D.S., Galaktionov K., Beach D.;
+RT   "14-3-3 proteins associate with cdc25 phosphatases.";
+RL   Proc. Natl. Acad. Sci. U.S.A. 92:7892-7896(1995).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Heart;
+RA   Luk S.C.W., Lee C.Y., Waye M.M.Y.;
+RL   Submitted (JUN-1995) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=96300316; PubMed=8684458;
+RA   Jin D.-Y., Lyu M.S., Kozak C.A., Jeang K.-T.;
+RT   "Function of 14-3-3 proteins.";
+RL   Nature 382:308-308(1996).
+RN   [4]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Liver;
+RX   MEDLINE=97011338; PubMed=8858348;
+RA   Chong S.S., Tanigami A., Roschke A.V., Ledbetter D.H.;
+RT   "14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on
+RT   chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome
+RT   region.";
+RL   Genome Res. 6:735-741(1996).
+RN   [5]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RA   Tanigami A., Chong S.S., Ledbetter D.H.;
+RT   "14-3-3 epsilon genomic sequence.";
+RL   Submitted (AUG-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [6]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Placenta;
+RA   Strausberg R.;
+RL   Submitted (DEC-2000) to the EMBL/GenBank/DDBJ databases.
+RN   [7]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat, and Sheep; TISSUE=Pineal gland;
+RX   MEDLINE=94296566; PubMed=8024705;
+RA   Roseboom P.H., Weller J.L., Babila T., Aitken A., Sellers L.A.,
+RA   Moffet J.R., Namboodiri M.A., Klein D.C.;
+RT   "Cloning and characterization of the epsilon and zeta isoforms of the
+RT   14-3-3 proteins.";
+RL   DNA Cell Biol. 13:629-640(1994).
+RN   [8]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Liver;
+RX   MEDLINE=95122474; PubMed=7822263;
+RA   Alam R., Hachiya N., Sakaguchi M., Shun-Ichiro K., Iwanaga S.,
+RA   Kitajima M., Mihara K., Omura T.;
+RT   "cDNA cloning and characterization of mitochondrial import
+RT   stimulation factor (MSF) purified from rat liver cytosol.";
+RL   J. Biochem. 116:416-425(1994).
+RN   [9]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=96280718; PubMed=8694795;
+RA   Gao L., Gu X.B., Yu D.S., Yu R.K., Zeng G.;
+RT   "Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3-
+RT   sialyltransferase.";
+RL   Biochem. Biophys. Res. Commun. 224:103-107(1996).
+RN   [10]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=SWISS; TISSUE=Kidney;
+RX   MEDLINE=95269876; PubMed=7750640;
+RA   McConnell J.E., Armstrong J.F., Bard J.B.;
+RT   "The mouse 14-3-3 epsilon isoform, a kinase regulator whose
+RT   expression pattern is modulated in mesenchyme and neuronal
+RT   differentiation.";
+RL   Dev. Biol. 169:218-228(1995).
+RN   [11]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=129/SV;
+RA   Takihara Y., Irie K., Nomura M., Motaleb M., Matsumoto K.,
+RA   Shimada K.;
+RL   Submitted (SEP-1996) to the EMBL/GenBank/DDBJ databases.
+RN   [12]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Bovine;
+RA   Jones J.M., Niikura T., Pinke R.M., Guo W., Molday L., Leykam J.,
+RA   McConnell D.G.;
+RT   "Expression of 14-3-3 proteins in bovine retinal photoreceptors.";
+RL   Submitted (JAN-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [13]
+RP   SEQUENCE OF 1-152; 165-184 AND 216-255.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=92283271; PubMed=1317796;
+RA   Toker A., Sellers L.A., Amess B., Patel Y., Harris A., Aitken A.;
+RT   "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3)
+RT   from sheep brain. Amino acid sequence of phosphorylated forms.";
+RL   Eur. J. Biochem. 206:453-461(1992).
+RN   [14]
+RP   SEQUENCE OF 1-23 AND 125-140.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=90345949; PubMed=2143472;
+RA   Toker A., Ellis C.A., Sellers L.A., Aitken A.;
+RT   "Protein kinase C inhibitor proteins. Purification from sheep brain
+RT   and sequence similarity to lipocortins and 14-3-3 protein.";
+RL   Eur. J. Biochem. 191:421-429(1990).
+CC   -!- FUNCTION: ACTIVATES TYROSINE AND TRYPTOPHAN HYDROXYLASES IN THE
+CC       PRESENCE OF CA(2+)/CALMODULIN-DEPENDENT PROTEIN KINASE II, AND
+CC       STRONGLY ACTIVATES PROTEIN KINASE C. IS PROBABLY A MULTIFUNCTIONAL
+CC       REGULATOR OF THE CELL SIGNALING PROCESSES MEDIATED BY BOTH
+CC       KINASES.
+CC   -!- SUBUNIT: HOMODIMER.
+CC   -!- SUBCELLULAR LOCATION: CYTOPLASMIC.
+CC   -!- TISSUE SPECIFICITY: 14-3-3 PROTEINS ARE LOCALIZED IN NEURONS, AND
+CC       ARE AXONALLY TRANSPORTED TO THE NERVE TERMINALS. THEY MAY BE ALSO
+CC       PRESENT, AT LOWER LEVELS, IN VARIOUS OTHER EUKARYOTIC TISSUES.
+CC   -!- SIMILARITY: BELONGS TO THE 14-3-3 FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; U28936; AAA75301.1; -.
+DR   EMBL; U20972; AAC50175.1; -.
+DR   EMBL; U43399; AAC50625.1; -.
+DR   EMBL; U43430; AAD00026.1; -.
+DR   EMBL; U54778; AAC50710.1; -.
+DR   EMBL; AB017103; BAA32538.1; -.
+DR   EMBL; AB017098; BAA32538.1; JOINED.
+DR   EMBL; AB017099; BAA32538.1; JOINED.
+DR   EMBL; AB017100; BAA32538.1; JOINED.
+DR   EMBL; AB017101; BAA32538.1; JOINED.
+DR   EMBL; AB017102; BAA32538.1; JOINED.
+DR   EMBL; BC000179; AAH00179.1; -.
+DR   EMBL; BC001440; AAH01440.1; -.
+DR   EMBL; M84416; AAC37659.1; -.
+DR   EMBL; D30739; BAA06401.1; -.
+DR   EMBL; Z19599; CAA79659.1; -.
+DR   EMBL; U53882; AAC52676.1; -.
+DR   EMBL; L07914; AAC37321.1; -.
+DR   EMBL; D87663; BAA13424.1; -.
+DR   EMBL; AF043735; AAC61927.1; -.
+DR   PIR; S10806; S10806.
+DR   PIR; S10807; S10807.
+DR   HSSP; P29312; 1A38.
+DR   MIM; 605066; -.
+DR   MGD; MGI:894689; Ywhae.
+DR   InterPro; IPR000308; 14-3-3.
+DR   Pfam; PF00244; 14-3-3; 1.
+DR   PRINTS; PR00305; 1433ZETA.
+DR   ProDom; PD000600; 14-3-3; 1.
+DR   SMART; SM00101; 14_3_3; 1.
+DR   PROSITE; PS00796; 1433_1; 1.
+DR   PROSITE; PS00797; 1433_2; 1.
+KW   Brain; Neurone; Acetylation; Multigene family.
+FT   MOD_RES       1      1       ACETYLATION.
+FT   CONFLICT     73     73       K -> T (IN REF. 9).
+FT   CONFLICT    120    120       F -> S (IN REF. 9).
+FT   CONFLICT    123    123       K -> Y (IN REF. 9).
+FT   CONFLICT    129    129       H -> Y (IN REF. 14).
+SQ   SEQUENCE   255 AA;  29174 MW;  07817CCBD1F75B26 CRC64;
+     MDDREDLVYQ AKLAEQAERY DEMVESMKKV AGMDVELTVE ERNLLSVAYK NVIGARRASW
+     RIISSIEQKE ENKGGEDKLK MIREYRQMVE TELKLICCDI LDVLDKHLIP AANTGESKVF
+     YYKMKGDYHR YLAEFATGND RKEAAENSLV AYKAASDIAM TELPPTHPIR LGLALNFSVF
+     YYEILNSPDR ACRLAKAAFD DAIAELDTLS EESYKDSTLI MQLLRDNLTL WTSDMQGDGE
+     EQNKEALQDV EDENQ
+//
+ID   143B_BOVIN     STANDARD;      PRT;   245 AA.
+AC   P29358;
+DT   01-DEC-1992 (Rel. 24, Created)
+DT   01-FEB-1996 (Rel. 33, Last sequence update)
+DT   16-OCT-2001 (Rel. 40, Last annotation update)
+DE   14-3-3 protein beta/alpha (Protein kinase C inhibitor protein-1)
+DE   (KCIP-1).
+GN   YWHAB.
+OS   Bos taurus (Bovine), and
+OS   Ovis aries (Sheep).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Cetartiodactyla; Ruminantia; Pecora; Bovoidea;
+OC   Bovidae; Bovinae; Bos.
+OX   NCBI_TaxID=9913, 9940;
+RN   [1]
+RP   SEQUENCE.
+RC   SPECIES=Bovine;
+RX   MEDLINE=91108808; PubMed=1671102;
+RA   Isobe T., Ichimura T., Sunaya T., Okuyama T., Takahashi N., Kuwano R.,
+RA   Takahashi Y.;
+RT   "Distinct forms of the protein kinase-dependent activator of tyrosine
+RT   and tryptophan hydroxylases.";
+RL   J. Mol. Biol. 217:125-132(1991).
+RN   [2]
+RP   SEQUENCE OF 2-145 FROM N.A.
+RC   SPECIES=Bovine; TISSUE=Retina;
+RA   Jones J.M., Niikura T., Pinke R.M., Guo W., Molday L., Leykam J.,
+RA   McConnell D.G.;
+RT   "Expression of 14-3-3 proteins in bovine retinal photoreceptors.";
+RL   Submitted (JAN-1998) to the EMBL/GenBank/DDBJ databases.
+RN   [3]
+RP   SEQUENCE OF 2-83; 121-186 AND 199-241.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=92283271; PubMed=1317796;
+RA   Toker A., Sellers L.A., Amess B., Patel Y., Harris A., Aitken A.;
+RT   "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3)
+RT   from sheep brain. Amino acid sequence of phosphorylated forms.";
+RL   Eur. J. Biochem. 206:453-461(1992).
+RN   [4]
+RP   SEQUENCE OF 2-23.
+RC   SPECIES=Sheep; TISSUE=Brain;
+RX   MEDLINE=90345949; PubMed=2143472;
+RA   Toker A., Ellis C.A., Sellers L.A., Aitken A.;
+RT   "Protein kinase C inhibitor proteins. Purification from sheep brain
+RT   and sequence similarity to lipocortins and 14-3-3 protein.";
+RL   Eur. J. Biochem. 191:421-429(1990).
+RN   [5]
+RP   PHOSPHORYLATION.
+RC   SPECIES=Sheep;
+RX   MEDLINE=95197587; PubMed=7890696;
+RA   Aitken A., Howell S., Jones D., Madrazo J., Patel Y.;
+RT   "14-3-3 alpha and delta are the phosphorylated forms of
+RT   raf-activating 14-3-3 beta and zeta. In vivo stoichiometric
+RT   phosphorylation in brain at a Ser-Pro-Glu-Lys motif.";
+RL   J. Biol. Chem. 270:5706-5709(1995).
+RN   [6]
+RP   POST-TRANSLATIONAL MODIFICATIONS.
+RC   SPECIES=Sheep;
+RA   Aitken A., Patel Y., Martin H., Jones D., Robinson K., Madrazo J.,
+RA   Howell S.;
+RT   "Electrospray mass spectroscopy analysis with online trapping of
+RT   posttranslationally modified mammalian and avian brain 14-3-3
+RT   isoforms.";
+RL   J. Protein Chem. 13:463-465(1994).
+CC   -!- FUNCTION: ACTIVATES TYROSINE AND TRYPTOPHAN HYDROXYLASES IN THE
+CC       PRESENCE OF CA(2+)/CALMODULIN-DEPENDENT PROTEIN KINASE II, AND
+CC       STRONGLY ACTIVATES PROTEIN KINASE C. IS PROBABLY A MULTIFUNCTIONAL
+CC       REGULATOR OF THE CELL SIGNALING PROCESSES MEDIATED BY BOTH
+CC       KINASES.
+CC   -!- SUBUNIT: HOMODIMER.
+CC   -!- SUBCELLULAR LOCATION: CYTOPLASMIC.
+CC   -!- ALTERNATIVE PRODUCTS: TWO FORMS ARE PRODUCED BY ALTERNATIVE
+CC       INITIATION.
+CC   -!- TISSUE SPECIFICITY: 14-3-3 PROTEINS ARE LOCALIZED IN NEURONS, AND
+CC       ARE AXONALLY TRANSPORTED TO THE NERVE TERMINALS. THEY MAY BE ALSO
+CC       PRESENT, AT LOWER LEVELS, IN VARIOUS OTHER EUKARYOTIC TISSUES.
+CC   -!- PTM: ISOFORM ALPHA DIFFERS FROM ISOFORM BETA IN BEING
+CC       PHOSPHORYLATED.
+CC   -!- SIMILARITY: BELONGS TO THE 14-3-3 FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; AF043736; AAC02090.1; -.
+DR   PIR; S13467; S13467.
+DR   PIR; S10804; S10804.
+DR   PIR; S23179; S23179.
+DR   HSSP; P29312; 1A38.
+DR   InterPro; IPR000308; 14-3-3.
+DR   Pfam; PF00244; 14-3-3; 1.
+DR   PRINTS; PR00305; 1433ZETA.
+DR   ProDom; PD000600; 14-3-3; 1.
+DR   SMART; SM00101; 14_3_3; 1.
+DR   PROSITE; PS00796; 1433_1; 1.
+DR   PROSITE; PS00797; 1433_2; 1.
+KW   Brain; Neurone; Phosphorylation; Acetylation; Multigene family;
+KW   Alternative initiation.
+FT   INIT_MET      0      0
+FT   CHAIN         1    245       14-3-3 PROTEIN BETA/ALPHA, LONG ISOFORM.
+FT   CHAIN         2    245       14-3-3 PROTEIN BETA/ALPHA, SHORT ISOFORM.
+FT   INIT_MET      2      2       FOR SHORT ISOFORM.
+FT   MOD_RES       1      1       ACETYLATION.
+FT   MOD_RES       2      2       ACETYLATION (IN SHORT ISOFORM).
+FT   MOD_RES     185    185       PHOSPHORYLATION.
+SQ   SEQUENCE   245 AA;  27950 MW;  AA91C2314D99549F CRC64;
+     TMDKSELVQK AKLAEQAERY DDMAAAMKAV TEQGHELSNE ERNLLSVAYK NVVGARRSSW
+     RVISSIEQKT ERNEKKQQMG KEYREKIEAE LQDICNDVLQ LLDKYLIPNA TQPESKVFYL
+     KMKGDYFRYL SEVASGDNKQ TTVSNSQQAY QEAFEISKKE MQPTHPIRLG LALNFSVFYY
+     EILNSPEKAC SLAKTAFDEA IAELDTLNEE SYKDSTLIMQ LLRDNLTLWT SENQGDEGDA
+     GEGEN
+//
+ID   CALM_HUMAN     STANDARD;      PRT;   148 AA.
+AC   P02593; P99014; P70667; Q61379; Q61380;
+DT   21-JUL-1986 (Rel. 01, Created)
+DT   21-JUL-1986 (Rel. 01, Last sequence update)
+DT   16-OCT-2001 (Rel. 40, Last annotation update)
+DE   Calmodulin.
+GN   (CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND
+GN   (CALM3 OR CAM3 OR CAMC).
+OS   Homo sapiens (Human),
+OS   Mus musculus (Mouse),
+OS   Rattus norvegicus (Rat),
+OS   Oryctolagus cuniculus (Rabbit),
+OS   Bos taurus (Bovine),
+OS   Gallus gallus (Chicken),
+OS   Anas platyrhynchos (Domestic duck),
+OS   Xenopus laevis (African clawed frog),
+OS   Arbacia punctulata (Punctuate sea urchin),
+OS   Oncorhynchus sp. (Salmon), and
+OS   Oryzias latipes (Medaka fish).
+OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+OC   Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+OX   NCBI_TaxID=9606, 10090, 10116, 9986, 9913, 9031, 8839, 8355, 7641,
+OX   8025, 8090;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=89034207; PubMed=3182832;
+RA   Fischer R., Koller M., Flura M., Mathews S., Strehler-Page M.A.,
+RA   Krebs J., Penniston J.T., Carafoli E., Strehler E.E.;
+RT   "Multiple divergent mRNAs code for a single human calmodulin.";
+RL   J. Biol. Chem. 263:17055-17062(1988).
+RN   [2]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=88059053; PubMed=2445749;
+RA   Sengupta B., Friedberg F., Detera-Wadleigh S.D.;
+RT   "Molecular analysis of human and rat calmodulin complementary DNA
+RT   clones. Evidence for additional active genes in these species.";
+RL   J. Biol. Chem. 262:16663-16670(1987).
+RN   [3]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human;
+RX   MEDLINE=85022688; PubMed=6385987;
+RA   Wawrzynczak E.J., Perham R.N.;
+RT   "Isolation and nucleotide sequence of a cDNA encoding human
+RT   calmodulin.";
+RL   Biochem. Int. 9:177-185(1984).
+RN   [4]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Blood;
+RX   MEDLINE=95010144; PubMed=7925473;
+RA   Rhyner J.A., Ottiger M., Wicki R., Greenwood T.M., Strehler E.E.;
+RT   "Structure of the human CALM1 calmodulin gene and identification of
+RT   two CALM1-related pseudogenes CALM1P1 and CALM1P2.";
+RL   Eur. J. Biochem. 225:71-82(1994).
+RN   [5]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Human; TISSUE=Lymphoma;
+RA   Kato S.;
+RL   Submitted (FEB-1995) to the EMBL/GenBank/DDBJ databases.
+RN   [6]
+RP   SEQUENCE.
+RC   SPECIES=Human; TISSUE=Brain;
+RX   MEDLINE=82231946; PubMed=7093203;
+RA   Sasagawa T., Ericsson L.H., Walsh K.A., Schreiber W.E., Fischer E.H.,
+RA   Titani K.;
+RT   "Complete amino acid sequence of human brain calmodulin.";
+RL   Biochemistry 21:2565-2569(1982).
+RN   [7]
+RP   SEQUENCE.
+RC   SPECIES=Rabbit; TISSUE=Skeletal muscle;
+RX   MEDLINE=81138220; PubMed=7202416;
+RA   Grand R.J.A., Shenolikar S., Cohen P.;
+RT   "The amino acid sequence of the delta subunit (calmodulin) of rabbit
+RT   skeletal muscle phosphorylase kinase.";
+RL   Eur. J. Biochem. 113:359-367(1981).
+RN   [8]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Brain;
+RA   Kasai H., Kato Y., Isobe T., Kawasaki H., Okuyama T.;
+RT   "Determination of the complete amino acid sequence of calmodulin
+RT   (phenylalanine-rich acidic protein II) from bovine brain.";
+RL   Biomed. Res. 1:248-264(1980).
+RN   [9]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Brain;
+RX   MEDLINE=80094551; PubMed=7356670;
+RA   Watterson D.M., Sharief F., Vanaman T.C.;
+RT   "The complete amino acid sequence of the Ca2+-dependent modulator
+RT   protein (calmodulin) of bovine brain.";
+RL   J. Biol. Chem. 255:962-975(1980).
+RN   [10]
+RP   SEQUENCE.
+RC   SPECIES=Bovine; TISSUE=Uterus;
+RA   Grand R.J.A., Perry S.V.;
+RT   "The amino acid sequence of the troponin C-like protein (modulator
+RT   protein) from bovine uterus.";
+RL   FEBS Lett. 92:137-142(1978).
+RN   [11]
+RP   SEQUENCE OF 38-60.
+RC   SPECIES=Bovine;
+RX   MEDLINE=89064822; PubMed=3058479;
+RA   Pribilla I., Krueger H., Buchner K., Otto H., Schiebler W.,
+RA   Tripier D., Hucho F.;
+RT   "Heat-resistant inhibitors of protein kinase C from bovine brain.";
+RL   Eur. J. Biochem. 177:657-664(1988).
+RN   [12]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse;
+RX   MEDLINE=88257100; PubMed=3384819;
+RA   Bender P.K., Dedman J.R., Emerson C.P.;
+RT   "The abundance of calmodulin mRNAs is regulated in phosphorylase
+RT   kinase-deficient skeletal muscle.";
+RL   J. Biol. Chem. 263:9733-9737(1988).
+RN   [13]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse;
+RX   MEDLINE=90006775; PubMed=2551780;
+RA   Danchin A., Sezer O., Glaser P., Chalon P., Caput D.;
+RT   "Cloning and expression of mouse-brain calmodulin as an activator of
+RT   Bordetella pertussis adenylate cyclase in Escherichia coli.";
+RL   Gene 80:145-149(1989).
+RN   [14]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Mouse; STRAIN=BALB/C; TISSUE=Brain;
+RA   Kato K.;
+RT   "A collection of cDNA clones with specific expression patterns in
+RT   mouse brain.";
+RL   Eur. J. Neurosci. 2:704-711(1991).
+RN   [15]
+RP   SEQUENCE.
+RC   SPECIES=Rat; TISSUE=Testis;
+RX   MEDLINE=78066877; PubMed=201628;
+RA   Dedman J.R., Jackson R.L., Schreiber W.E., Means A.R.;
+RT   "Sequence homology of the Ca2+-dependent regulator of cyclic
+RT   nucleotide phosphodiesterase from rat testis with other Ca2+-binding
+RT   proteins.";
+RL   J. Biol. Chem. 253:343-346(1978).
+RN   [16]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=87246077; PubMed=2885164;
+RA   Sherbany A.A., Parent A.S., Brosius J.;
+RT   "Rat calmodulin cDNA.";
+RL   DNA 6:267-272(1987).
+RN   [17]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; TISSUE=Brain;
+RX   MEDLINE=87226204; PubMed=3035194;
+RA   Nojima H., Hirofumi S.;
+RT   "Structure of a gene for rat calmodulin.";
+RL   J. Mol. Biol. 193:439-445(1987).
+RN   [18]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat;
+RX   MEDLINE=87257889; PubMed=3037336;
+RA   Nojima H., Kishi K., Sokabe H.;
+RT   "Multiple calmodulin mRNA species are derived from two distinct
+RT   genes.";
+RL   Mol. Cell. Biol. 7:1873-1880(1987).
+RN   [19]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Rat; STRAIN=SHR;
+RX   MEDLINE=89362474; PubMed=2527998;
+RA   Nojima H.;
+RT   "Structural organization of multiple rat calmodulin genes.";
+RL   J. Mol. Biol. 208:269-282(1989).
+RN   [20]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RX   MEDLINE=84008199; PubMed=6137485;
+RA   Putkey J.A., Ts'Ui K.F., Tanaka T., Lagace L., Stein J.P., Lai E.C.,
+RA   Means A.R.;
+RT   "Chicken calmodulin genes. A species comparison of cDNA sequences and
+RT   isolation of a genomic clone.";
+RL   J. Biol. Chem. 258:11864-11870(1983).
+RN   [21]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RX   MEDLINE=85104969; PubMed=2981850;
+RA   Simmen R.C.M., Tanaka T., Ts'Ui K.F., Putkey J.A., Scott M.J.,
+RA   Lai E.C., Means A.R.;
+RT   "The structural organization of the chicken calmodulin gene.";
+RL   J. Biol. Chem. 260:907-912(1985).
+RN   [22]
+RP   ERRATUM.
+RC   SPECIES=Chicken;
+RA   Simmen R.C.M., Tanaka T., Ts'Ui K.F., Putkey J.A., Scott M.J.,
+RA   Lai E.C., Means A.R.;
+RL   J. Biol. Chem. 262:4928-4929(1987).
+RN   [23]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=Chicken;
+RA   Iida Y.;
+RT   "cDNA sequences and molecular evolution of calmodulin genes of
+RT   chicken and eel.";
+RL   Bull. Chem. Soc. Jpn. 57:2667-2668(1984).
+RN   [24]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=A.platyrhynchos;
+RX   MEDLINE=93287810; PubMed=8389959;
+RA   Kimura N., Kurosawa N., Kondo K., Tsukada Y.;
+RT   "Molecular cloning of the kainate-binding protein and calmodulin
+RT   genes which are induced by an imprinting stimulus in ducklings.";
+RL   Brain Res. Mol. Brain Res. 17:351-355(1993).
+RN   [25]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=X.laevis;
+RX   MEDLINE=84191128; PubMed=6325880;
+RA   Chien Y.-H., Dawid I.B.;
+RT   "Isolation and characterization of calmodulin genes from Xenopus
+RT   laevis.";
+RL   Mol. Cell. Biol. 4:507-513(1984).
+RN   [26]
+RP   SEQUENCE OF 1-141 FROM N.A.
+RC   SPECIES=A.punctulata;
+RX   MEDLINE=88172463; PubMed=3351921;
+RA   Hardy D.O., Bender P.K., Kretsinger R.H.;
+RT   "Two calmodulin genes are expressed in Arbacia punctulata. An ancient
+RT   gene duplication is indicated.";
+RL   J. Mol. Biol. 199:223-227(1988).
+RN   [27]
+RP   SEQUENCE.
+RC   SPECIES=Salmon;
+RA   Yazawa M., Toda H., Yagi Y.;
+RT   "Amino acid sequence of salmon calmodulin.";
+RL   Seikagaku 57:1037-1037(1985).
+RN   [28]
+RP   SEQUENCE FROM N.A.
+RC   SPECIES=O.latipes;
+RX   MEDLINE=93012998; PubMed=1398109;
+RA   Matsuo K., Sato K., Ikeshima H., Shimoda K., Takano T.;
+RT   "Four synonymous genes encode calmodulin in the teleost fish, medaka
+RT   (Oryzias latipes): conservation of the multigene one-protein
+RT   principle.";
+RL   Gene 119:279-281(1992).
+RN   [29]
+RP   SEQUENCE OF 1-27, AND UBIQUITYLATION OF LYS-21.
+RC   SPECIES=Bovine;
+RX   MEDLINE=98380241; PubMed=9716384;
+RA   Laub M., Steppuhn J.A., Blueggel M., Immler D., Meyer H.E.,
+RA   Jennissen H.P.;
+RT   "Modulation of calmodulin function by ubiquitin-calmodulin ligase and
+RT   identification of the responsible ubiquitylation site in vertebrate
+RT   calmodulin.";
+RL   Eur. J. Biochem. 255:422-431(1998).
+RN   [30]
+RP   X-RAY CRYSTALLOGRAPHY (3.0 ANGSTROMS).
+RC   SPECIES=Rat;
+RX   MEDLINE=85188323; PubMed=3990807;
+RA   Babu Y.S., Sack J.S., Greenhough T.J., Bugg C.E., Means A.R.,
+RA   Cook W.J.;
+RT   "Three-dimensional structure of calmodulin.";
+RL   Nature 315:37-40(1985).
+RN   [31]
+RP   X-RAY CRYSTALLOGRAPHY (2.2 ANGSTROMS).
+RC   SPECIES=Rat;
+RX   MEDLINE=89110997; PubMed=3145979;
+RA   Babu Y.S., Bugg C.E., Cook W.J.;
+RT   "Structure of calmodulin refined at 2.2-A resolution.";
+RL   J. Mol. Biol. 204:191-204(1988).
+RN   [32]
+RP   X-RAY CRYSTALLOGRAPHY (2 ANGSTROMS).
+RC   SPECIES=Bovine;
+RX   MEDLINE=98104088; PubMed=9438860;
+RA   Wall M.E., Clarage J.B., Phillips G.N.;
+RT   "Motions of calmodulin characterized using both Bragg and diffuse
+RT   X-ray scattering.";
+RL   Structure 5:1599-1612(1997).
+RN   [33]
+RP   STRUCTURE BY NMR OF 76-148.
+RX   MEDLINE=94085641; PubMed=8262263;
+RA   Finn B.E., Drakenberg T., Forsen S.;
+RT   "The structure of apo-calmodulin. A 1H NMR examination of the
+RT   carboxy-terminal domain.";
+RL   FEBS Lett. 336:368-374(1993).
+RN   [34]
+RP   STRUCTURE BY NMR OF 76-148.
+RX   MEDLINE=96018615; PubMed=7552749;
+RA   Finn B.E., Evenas J., Drakenberg T., Waltho J.P., Thulin E.,
+RA   Forsen S.;
+RT   "Calcium-induced structural changes and domain autonomy in
+RT   calmodulin.";
+RL   Nat. Struct. Biol. 2:777-783(1995).
+RN   [35]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=96018613; PubMed=7552747;
+RA   Zhang M., Tanaka T., Ikura M.;
+RT   "Calcium-induced conformational transition revealed by the solution
+RT   structure of apo calmodulin.";
+RL   Nat. Struct. Biol. 2:758-767(1995).
+RN   [36]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=96018614; PubMed=7552748;
+RA   Kuboniwa H., Tjandra N., Grzesiek S., Ren H., Klee C.B., Bax A.;
+RT   "Solution structure of calcium-free calmodulin.";
+RL   Nat. Struct. Biol. 2:768-776(1995).
+RN   [37]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=98179557; PubMed=9514729;
+RA   Osawa M., Swindells M.B., Tanikawa J., Tanaka T., Mase T., Furuya T.,
+RA   Ikura M.;
+RT   "Solution structure of calmodulin-W-7 complex: the basis of diversity
+RT   in molecular recognition.";
+RL   J. Mol. Biol. 276:165-176(1998).
+RN   [38]
+RP   STRUCTURE BY NMR.
+RX   MEDLINE=99425120; PubMed=10493800;
+RA   Elshorst B., Hennig M., Foersterling H., Diener A., Maurer M.,
+RA   Schulte P., Schwalbe H., Griesinger C., Krebs J., Schmid H.,
+RA   Vorherr T., Carafoli E.;
+RT   "NMR solution structure of a complex of calmodulin with a binding
+RT   peptide of the Ca(2+) pump.";
+RL   Biochemistry 38:12320-12332(1999).
+CC   -!- FUNCTION: CALMODULIN MEDIATES THE CONTROL OF A LARGE NUMBER OF
+CC       ENZYMES BY CA(++). AMONG THE ENZYMES TO BE STIMULATED BY THE
+CC       CALMODULIN-CA(++) COMPLEX ARE A NUMBER OF PROTEIN KINASES AND
+CC       PHOSPHATASES.
+CC   -!- PTM: UBIQUITYLATION STRONGLY DECREASES THE ACTIVITY.
+CC   -!- MISCELLANEOUS: THIS PROTEIN HAS FOUR FUNCTIONAL CALCIUM-BINDING
+CC       SITES.
+CC   -!- SIMILARITY: TO OTHER EF-HAND CALCIUM BINDING PROTEINS.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; L00101; AAA48653.1; -.
+DR   EMBL; L00096; AAA48653.1; JOINED.
+DR   EMBL; L00097; AAA48653.1; JOINED.
+DR   EMBL; L00098; AAA48653.1; JOINED.
+DR   EMBL; L00099; AAA48653.1; JOINED.
+DR   EMBL; L00100; AAA48653.1; JOINED.
+DR   EMBL; M16659; AAA40864.1; -.
+DR   EMBL; M27319; AAA35635.1; -.
+DR   EMBL; U12022; AAB60644.1; -.
+DR   EMBL; U11886; AAB60644.1; JOINED.
+DR   EMBL; D45887; BAA08302.1; -.
+DR   EMBL; X13817; CAA32050.1; -.
+DR   EMBL; J04046; AAA51918.1; -.
+DR   EMBL; M19311; AAA35641.1; -.
+DR   EMBL; M19312; AAA40862.1; -.
+DR   EMBL; M17069; AAA40863.1; -.
+DR   EMBL; X13933; CAA32120.1; -.
+DR   EMBL; X13931; CAA32119.1; -.
+DR   EMBL; X13932; CAA32119.1; JOINED.
+DR   EMBL; X05117; CAA32119.1; JOINED.
+DR   EMBL; X13833; CAA32062.1; -.
+DR   EMBL; X13834; CAA32062.1; JOINED.
+DR   EMBL; X13835; CAA32062.1; JOINED.
+DR   EMBL; X14265; CAA32478.1; -.
+DR   EMBL; D83350; BAA11896.1; -.
+DR   EMBL; M36167; AAA48650.1; -.
+DR   EMBL; K01944; AAA49668.1; -.
+DR   EMBL; K01945; AAA49669.1; -.
+DR   EMBL; D10363; BAA01195.1; -.
+DR   EMBL; M19380; AAA66181.1; -.
+DR   EMBL; M19381; AAA66182.1; -.
+DR   EMBL; L31642; AAA65934.1; -.
+DR   EMBL; M27844; AAA37365.1; -.
+DR   EMBL; X61432; CAA43674.1; -.
+DR   PIR; S13159; MCHU.
+DR   PIR; JK0013; MCON.
+DR   PIR; A90719; MCBO.
+DR   PIR; A91104; MCRB.
+DR   PIR; S03206; MCRT.
+DR   PIR; A92394; MCCH.
+DR   PIR; S02690; S02690.
+DR   PIR; A60781; A60781.
+DR   PIR; JC1305; JC1305.
+DR   PDB; 2CLN; 15-OCT-94.
+DR   PDB; 3CLN; 09-JAN-89.
+DR   PDB; 1TRC; 15-OCT-91.
+DR   PDB; 1AK8; 17-SEP-97.
+DR   PDB; 1CDL; 31-AUG-94.
+DR   PDB; 1CDM; 31-AUG-94.
+DR   PDB; 1CFC; 07-DEC-95.
+DR   PDB; 1CFD; 07-DEC-95.
+DR   PDB; 1CLL; 31-OCT-93.
+DR   PDB; 1CM1; 04-MAR-98.
+DR   PDB; 1CM4; 04-MAR-98.
+DR   PDB; 1CMF; 07-DEC-95.
+DR   PDB; 1CMG; 07-DEC-95.
+DR   PDB; 1CTR; 20-DEC-94.
+DR   PDB; 1DEG; 31-MAY-94.
+DR   PDB; 1DMO; 01-AUG-96.
+DR   PDB; 1LIN; 08-MAR-96.
+DR   PDB; 1AJI; 17-SEP-97.
+DR   PDB; 1A29; 16-SEP-98.
+DR   PDB; 1MUX; 25-NOV-98.
+DR   PDB; 1CFF; 24-SEP-91.
+DR   SWISS-2DPAGE; P99014; MOUSE.
+DR   Aarhus/Ghent-2DPAGE; 9048; IEF.
+DR   MIM; 114180; -.
+DR   MIM; 114182; -.
+DR   MIM; 114183; -.
+DR   MGD; MGI:88251; Calm.
+DR   MGD; MGI:103250; Calm2.
+DR   MGD; MGI:103249; Calm3.
+DR   InterPro; IPR002048; EF-hand.
+DR   Pfam; PF00036; efhand; 4.
+DR   SMART; SM00054; EFh; 4.
+DR   PROSITE; PS00018; EF_HAND; 4.
+KW   Calcium-binding; Duplication; Methylation; Acetylation;
+KW   3D-structure.
+FT   INIT_MET      0      0
+FT   MOD_RES       1      1       ACETYLATION.
+FT   MOD_RES     115    115       METHYLATION (TRI-) (IN CHICKEN).
+FT   CA_BIND      20     31       EF-HAND 1.
+FT   CA_BIND      56     67       EF-HAND 2.
+FT   CA_BIND      93    104       EF-HAND 3.
+FT   CA_BIND     129    140       EF-HAND 4.
+FT   BINDING      21     21       UBIQUITIN (MULTI-).
+FT   CONFLICT     25     25       G -> N (IN REF. 12; AAA66182).
+FT   HELIX         5     19
+FT   TURN         21     22
+FT   STRAND       26     27
+FT   HELIX        29     37
+FT   TURN         38     40
+FT   HELIX        45     55
+FT   TURN         57     58
+FT   STRAND       63     64
+FT   HELIX        65     92
+FT   TURN         94     95
+FT   STRAND      100    100
+FT   HELIX       102    111
+FT   TURN        112    113
+FT   HELIX       118    128
+FT   STRAND      136    136
+FT   HELIX       138    146
+SQ   SEQUENCE   148 AA;  16706 MW;  464B8A287475A1CA CRC64;
+     ADQLTEEQIA EFKEAFSLFD KDGDGTITTK ELGTVMRSLG QNPTEAELQD MINEVDADGN
+     GTIDFPEFLT MMARKMKDTD SEEEIREAFR VFDKDGNGYI SAAELRHVMT NLGEKLTDEE
+     VDEMIREADI DGDGQVNYEE FVQMMTAK
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/swisspfam.data
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/swisspfam.data	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/swisspfam.data	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+>ROA1_MOUSE       |==============================================| P49312 319 a.a.
+Pfam-B_14464     1                          --------------------- (3) PD14464
+178-319
+rrm              2   ----------   ----------                      (1058) PF00076  RNA recognition motif. (aka RRM, RBD, or RNP domain)  15-85 106-176
+>ROA1_RAT         |==============================================| P04256 319 a.a.
+Pfam-B_14464     1                          --------------------- (3) PD14464
+178-319
+rrm              2   ----------   ----------                      (1058) PF00076  RNA recognition motif. (aka RRM, RBD, or RNP domain)  15-85 106-176
+>ROA1_SCHAM       |=================================================| P21522 342 a.a.
+rrm              2   ----------   ----------                         (1058) PF00076  RNA recognition motif. (aka RRM, RBD, or RNP domain)  19-89 110-180
+>ROA1_XENLA       |==============================================| P17130 365 a.a.
+rrm              2  ---------   ---------                         (1058) PF00076  RNA recognition motif. (aka RRM, RBD, or RNP domain)  16-86 107-177         

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tab1part.mif
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tab1part.mif	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tab1part.mif	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,82 @@
+DIP:4305E	DIP:3048N			PIR:B64526	GI:2313123	DIP:3047N		SWP:O24853	PIR:A64520	GI:2313078
+DIP:4307E	DIP:3050N			PIR:H64618	GI:2313921	DIP:3047N		SWP:O24853	PIR:A64520	GI:2313078
+DIP:4308E	DIP:3051N			PIR:B64520	GI:2313079	DIP:3051N			PIR:B64520	GI:2313079
+DIP:4309E	DIP:3052N		SWP:P56036	PIR:H64669	GI:2314362	DIP:3051N			PIR:B64520	GI:2313079
+DIP:4310E	DIP:3053N			PIR:A64696	GI:2314583	DIP:3051N			PIR:B64520	GI:2313079
+DIP:4311E	DIP:3054N			PIR:C64701	GI:2314631	DIP:3051N			PIR:B64520	GI:2313079
+DIP:4312E	DIP:3055N		SWP:P55993	PIR:H64530	GI:2313167	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4313E	DIP:3057N		SWP:P55991	PIR:D64534	GI:2313198	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4314E	DIP:3058N		SWP:P56112	PIR:G64541	GI:2313264	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4315E	DIP:3059N			PIR:F64554	GI:2313368	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4316E	DIP:3060N			PIR:E64559	GI:2313410	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4317E	DIP:3061N			PIR:G64592	GI:2313700	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4318E	DIP:3062N		SWP:O25336	PIR:G64596	GI:2313737	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4319E	DIP:3063N			PIR:H64599	GI:2313761	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4320E	DIP:3064N		SWP:P56143	PIR:B64609	GI:2313838	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4321E	DIP:3065N			PIR:A64642	GI:2314119	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4322E	DIP:3066N			PIR:B64679	GI:2314439	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4323E	DIP:3067N			PIR:F64688	GI:2314520	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4324E	DIP:3053N			PIR:A64696	GI:2314583	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4325E	DIP:3068N			PIR:C64696	GI:2314584	DIP:3056N		SWP:P56155	PIR:E64520	GI:2313082
+DIP:4326E	DIP:3070N			PIR:E64595	GI:2313728	DIP:3069N		SWP:P42383	PIR:S36237	GI:2313084
+DIP:4327E	DIP:3071N			PIR:B64648	GI:2314168	DIP:3069N		SWP:P42383	PIR:S36237	GI:2313084
+DIP:4328E	DIP:3072N			PIR:D64660	GI:2314290	DIP:3069N		SWP:P42383	PIR:S36237	GI:2313084
+DIP:4329E	DIP:3073N			PIR:H64671	GI:2314377	DIP:3069N		SWP:P42383	PIR:S36237	GI:2313084
+DIP:4330E	DIP:3074N		SWP:P48225	PIR:C71986	GI:2313085	DIP:3074N		SWP:P48225	PIR:C71986	GI:2313085
+DIP:4331E	DIP:3053N			PIR:A64696	GI:2314583	DIP:3074N		SWP:P48225	PIR:C71986	GI:2313085
+DIP:4332E	DIP:3075N			PIR:H64521	GI:2313097	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4333E	DIP:3076N			PIR:F64524	GI:2313125	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4334E	DIP:3077N			PIR:D64548	GI:2313317	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4335E	DIP:3078N			PIR:G64574	GI:2313548	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4336E	DIP:3079N			PIR:G64605	GI:2313813	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4337E	DIP:3080N			PIR:A64607	GI:2313821	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4338E	DIP:3081N			PIR:G64614	GI:2313885	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4339E	DIP:3082N		SWP:Q59465	PIR:G64618	GI:2313920	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4340E	DIP:3083N			PIR:A64641	GI:2314107	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4341E	DIP:3084N			PIR:A64647	GI:2314162	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4342E	DIP:3085N			PIR:G64667	GI:2314343	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4343E	DIP:3086N			PIR:C64678	GI:2314432	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4344E	DIP:3087N			PIR:B64701	GI:2314626	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4345E	DIP:3088N			PIR:B64706	GI:2314662	DIP:3075N			PIR:H64521	GI:2313097
+DIP:4346E	DIP:3090N			PIR:F64546	GI:2313304	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4347E	DIP:3091N			PIR:B64564	GI:2313455	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4348E	DIP:3092N			PIR:G64585	GI:2313642	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4349E	DIP:3093N		SWP:P55980	PIR:C64588	GI:2313664	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4350E	DIP:3094N			PIR:G64595	GI:2313726	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4351E	DIP:3082N		SWP:Q59465	PIR:G64618	GI:2313920	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4352E	DIP:3083N			PIR:A64641	GI:2314107	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4353E	DIP:3095N		SWP:O06758	PIR:A64650	GI:2314189	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4354E	DIP:3096N		SWP:P55989	PIR:H64653	GI:2314221	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4355E	DIP:3097N			PIR:F64672	GI:2314381	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4356E	DIP:3087N			PIR:B64701	GI:2314626	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4357E	DIP:3098N		SWP:P56105	PIR:F64703	GI:2314647	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4358E	DIP:3099N			PIR:G64707	GI:2314677	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4359E	DIP:3100N		SWP:O26074	PIR:F64713	GI:2314730	DIP:3089N			PIR:D64523	GI:2313105
+DIP:4360E	DIP:3102N		SWP:P56061	PIR:F64520	GI:2313083	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4361E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4362E	DIP:3104N		SWP:O08314	PIR:A64555	GI:2313378	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4363E	DIP:3095N		SWP:O06758	PIR:A64650	GI:2314189	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4364E	DIP:3105N			PIR:A64709	GI:2314693	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4365E	DIP:3106N		SWP:O26087	PIR:G64715	GI:2314750	DIP:3101N			PIR:G64523	GI:2313114
+DIP:4366E	DIP:3107N		SWP:P56066	PIR:H64523	GI:2313107	DIP:3107N		SWP:P56066	PIR:H64523	GI:2313107
+DIP:4367E	DIP:3108N			PIR:C64524	GI:2313110	DIP:3108N			PIR:C64524	GI:2313110
+DIP:4368E	DIP:3110N			PIR:A64522	GI:2313091	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4369E	DIP:3111N			PIR:C64526	GI:2313124	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4370E	DIP:3112N			PIR:F64526	GI:2313132	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4384E	DIP:3112N			PIR:F64526	GI:2313132	DIP:3111N			PIR:C64526	GI:2313124
+DIP:4371E	DIP:3059N			PIR:F64554	GI:2313368	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4372E	DIP:3113N			PIR:E64567	GI:2313486	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4373E	DIP:3114N			PIR:C64572	GI:2313522	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4374E	DIP:3115N		SWP:O25424	PIR:C64610	GI:2313847	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4375E	DIP:3116N		SWP:P56082	PIR:E64661	GI:2314284	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4376E	DIP:3117N		SWP:P56032	PIR:F64684	GI:2314480	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4377E	DIP:3118N			PIR:H64693	GI:2314561	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4378E	DIP:3119N			PIR:E64525	GI:2313120	DIP:3119N			PIR:E64525	GI:2313120
+DIP:4379E	DIP:3053N			PIR:A64696	GI:2314583	DIP:3119N			PIR:E64525	GI:2313120
+DIP:4380E	DIP:3120N			PIR:G64525	GI:2313121	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4381E	DIP:3121N			PIR:H64525	GI:2313122	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4382E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4383E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4389E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4390E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4391E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3120N			PIR:G64525	GI:2313121

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tab2part.mif
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tab2part.mif	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tab2part.mif	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,3 @@
+DIP:5000E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3054N			PIR:G64525	GI:2313121
+DIP:5001E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3054N			PIR:G64525	GI:2313121
+DIP:5002E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3054N			PIR:G64525	GI:2313121

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tab3part.mif
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tab3part.mif	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tab3part.mif	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,21 @@
+DIP:4369E	DIP:3111N			PIR:C64526	GI:2313124	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4370E	DIP:3112N			PIR:F64526	GI:2313132	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4384E	DIP:3112N			PIR:F64526	GI:2313132	DIP:3111N			PIR:C64526	GI:2313124
+DIP:4371E	DIP:3059N			PIR:F64554	GI:2313368	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4372E	DIP:3113N			PIR:E64567	GI:2313486	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4373E	DIP:3114N			PIR:C64572	GI:2313522	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4374E	DIP:3115N		SWP:O25424	PIR:C64610	GI:2313847	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4375E	DIP:3116N		SWP:P56082	PIR:E64661	GI:2314284	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4376E	DIP:3117N		SWP:P56032	PIR:F64684	GI:2314480	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4377E	DIP:3118N			PIR:H64693	GI:2314561	DIP:3109N			PIR:B64525	GI:2313117
+DIP:4378E	DIP:3119N			PIR:E64525	GI:2313120	DIP:3119N			PIR:E64525	GI:2313120
+DIP:4379E	DIP:3053N			PIR:A64696	GI:2314583	DIP:3119N			PIR:E64525	GI:2313120
+DIP:4380E	DIP:3120N			PIR:G64525	GI:2313121	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4381E	DIP:3121N			PIR:H64525	GI:2313122	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4382E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3120N			PIR:G64525	GI:2313121
+DIP:4383E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4389E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4390E	DIP:3103N			PIR:B64528	GI:2313138	DIP:3123N			PIR:E64527	GI:2313147
+DIP:4391E	DIP:3122N			PIR:G64606	GI:2313818	DIP:3120N			PIR:G64525	GI:2313121
+DIP:5000E	DIP:3122N			PIR:G64606	GI:2313818	DIP:1111N			PIR:G5	GI:4
+DIP:5001E	DIP:3122N			PIR:G64606	GI:2313818	DIP:2222N			PIR:G6	GI:2

Added: trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/names.dmp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/names.dmp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/names.dmp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,282 @@
+1760	|	Actinobacteria (class)	|		|	scientific name	|
+1760	|	Actinobacteria Stackebrandt et al. 1997	|		|	synonym	|
+1760	|	Actinomycetes	|		|	synonym	|
+1760	|	Actinomycetes Krasil'nikov 1949	|		|	synonym	|
+1760	|	High GC gram-positive bacteria	|		|	synonym	|
+1760	|	high G+C Gram-positive bacteria	|		|	genbank common name	|
+1760	|	high GC Gram+	|	high GC gram-positives<blast1760>	|	blast name	|
+5533	|	Rhodotorula	|	Rhodotorula <Sporidiobolaceae>	|	scientific name	|
+9605	|	Homo	|		|	scientific name	|
+9606	|	Homo sapiens	|		|	scientific name	|
+9606	|	human	|		|	genbank common name	|
+9606	|	man	|		|	common name	|
+32061	|	"Chloroflexi" Castenholz 2001	|		|	synonym	|
+32061	|	Chloroflecales	|		|	misnomer	|
+32061	|	Chloroflexaceae/Deinococcaceae group	|		|	in-part	|
+32061	|	Chloroflexi (class)	|		|	scientific name	|
+32061	|	Chloroflexus/Deinococcaceae group	|		|	in-part	|
+32061	|	Chloroflexus/Deinococcus group	|		|	in-part	|
+32061	|	not "Chlorobacteria" Cavalier-Smith 1992	|		|	synonym	|
+32061	|	not Chlorobacteria Cavalier-Smith 2002	|		|	synonym	|
+89593	|	Craniata	|	Craniata <chordata>	|	scientific name	|
+165724	|	Rhodotorula	|	Rhodotorula <Microstromatales>	|	scientific name	|
+165726	|	Rhodotorula	|	Rhodotorula <Melanotaeniaceae>	|	scientific name	|
+165791	|	Rhodotorula	|	Rhodotorula <Erythrobasidium clade>	|	scientific name	|
+192684	|	Rhodotorula	|	Rhodotorula <Microbotryum clade>	|	scientific name	|
+200204	|	Rhodotorula	|	Rhodotorula <Agaricostilbomycetidae>	|	scientific name	|
+200795	|	Chloroflexi	|	Chloroflexi <phylum>	|	scientific name	|
+200795	|	Chloroflexi Garrity and Holt 2001 emend. Hugenholtz and Stackebrandt 2004	|		|	synonym	|
+200795	|	GNS bacteria	|	GNS bacteria<blast200795>	|	blast name	|
+200795	|	Thermomicrobia	|	Thermomicrobia <phylum>	|	includes	|
+200795	|	Thermomicrobia Garrity and Holt 2002	|		|	includes	|
+200795	|	green non-sulfur bacteria	|		|	common name	|
+200795	|	green nonsulfur bacteria	|		|	common name	|
+231509	|	Rhodotorula	|	Rhodotorula <Microbotryomycetidae>	|	scientific name	|
+266791	|	Rhodotorula	|	Rhodotorula <Ustilaginales>	|	scientific name	|
+2759	|	Eucarya	|		|	synonym	|
+2759	|	Eucaryotae	|		|	synonym	|
+2759	|	Eukarya	|		|	synonym	|
+2759	|	Eukaryota	|		|	scientific name	|
+2759	|	Eukaryotae	|		|	synonym	|
+2759	|	eucaryotes	|		|	genbank common name	|
+2759	|	eukaryotes	|		|	common name	|
+2759	|	eukaryotes	|	eukaryotes<blast2759>	|	blast name	|
+6072	|	Eumetazoa	|		|	scientific name	|
+7711	|	Chordata	|		|	scientific name	|
+7711	|	chordates	|		|	genbank common name	|
+7711	|	chordates	|	chordates<blast7711>	|	blast name	|
+7742	|	Vertebrata	|		|	scientific name	|
+7742	|	vertebrates	|		|	genbank common name	|
+7742	|	vertebrates	|	vertebrates<blast7742>	|	blast name	|
+7776	|	Gnathostomata	|	Gnathostomata <vertebrate>	|	scientific name	|
+7776	|	jawed vertebrates	|		|	genbank common name	|
+8287	|	Sarcopterygii	|		|	scientific name	|
+9347	|	Eutheria	|		|	scientific name	|
+9347	|	Placentalia	|		|	synonym	|
+9347	|	eutherian mammals	|		|	common name	|
+9347	|	placental mammals	|		|	common name	|
+9347	|	placentals	|		|	genbank common name	|
+9443	|	Primata	|		|	synonym	|
+9443	|	Primates	|		|	scientific name	|
+9443	|	primate	|		|	equivalent name	|
+9526	|	Catarrhini	|		|	scientific name	|
+9604	|	Hominidae	|		|	scientific name	|
+9604	|	Pongidae	|		|	synonym	|
+27591	|	Gazella granti	|		|	scientific name	|
+27591	|	Grant's gazelle	|		|	genbank common name	|
+27592	|	Bovinae	|		|	scientific name	|
+27596	|	Anoa depressicornis	|		|	synonym	|
+27596	|	Bubalis depressicornis	|		|	misspelling	|
+27596	|	Bubalus depressicornis	|		|	scientific name	|
+27596	|	anoa	|		|	common name	|
+27596	|	lowland anoa	|		|	genbank common name	|
+27598	|	Cervus sp.	|		|	scientific name	|
+32523	|	Tetrapoda	|		|	scientific name	|
+32523	|	tetrapods	|		|	genbank common name	|
+32524	|	Amniota	|		|	scientific name	|
+32524	|	amniotes	|		|	genbank common name	|
+32525	|	Theria	|		|	scientific name	|
+33154	|	Fungi/Metazoa group	|		|	scientific name	|
+33208	|	Animalia	|		|	synonym	|
+33208	|	Metazoa	|		|	scientific name	|
+33208	|	animals	|		|	blast name	|
+33208	|	metazoans	|		|	genbank common name	|
+33208	|	multicellular animals	|		|	common name	|
+33213	|	Bilateria	|		|	scientific name	|
+33316	|	Coelomata	|		|	scientific name	|
+33511	|	Deuterostomia	|		|	scientific name	|
+40674	|	Mammalia	|		|	scientific name	|
+40674	|	mammals	|		|	genbank common name	|
+40674	|	mammals	|	mammals<blast40674>	|	blast name	|
+60720	|	Nesticus archeri	|		|	scientific name	|
+60721	|	Nesticus barri	|		|	scientific name	|
+60722	|	Nesticus barrowsi	|		|	scientific name	|
+60723	|	Nesticus bishopi	|		|	scientific name	|
+60724	|	Nesticus brimleyi	|		|	scientific name	|
+60725	|	Nesticus carteri	|		|	scientific name	|
+60726	|	Nesticus cooperi	|		|	scientific name	|
+60727	|	Nesticus crosbyi	|		|	scientific name	|
+60728	|	Nesticus dellingeri	|		|	scientific name	|
+60729	|	Nesticus furtivus	|		|	scientific name	|
+77110	|	Nuphar advena (Aiton) W.T.Aiton subsp. ulvacea (G.S.Mill. et Standl.) Padgett	|		|	synonym	|
+77110	|	Nuphar advena subsp. ulvacea	|		|	synonym	|
+77110	|	Nuphar ulvacea	|		|	scientific name	|
+77110	|	Nuphar ulvacea (G.S.Mill. et Standl.) Standl.	|		|	synonym	|
+77111	|	Nuphar advena (Aiton) W.T. Aiton subsp. orbiculata (Small) Padgett	|		|	synonym	|
+77111	|	Nuphar advena subsp. orbiculata	|		|	synonym	|
+77111	|	Nuphar orbiculata	|		|	scientific name	|
+77111	|	Nuphar orbiculata (Small) Standl.	|		|	synonym	|
+77112	|	Nuphar sagittifolia	|		|	scientific name	|
+77113	|	Nuphar lutea	|		|	scientific name	|
+77113	|	Nuphar luteum	|		|	misspelling	|
+77113	|	yellow water lily	|		|	common name	|
+77114	|	Nuphar microphylla	|		|	scientific name	|
+77115	|	Cyprinodon tularosa	|		|	scientific name	|
+77116	|	uncultured Acinetobacter NB1	|		|	scientific name	|
+77117	|	uncultured Acinetobacter NB2	|		|	scientific name	|
+77118	|	environmental samples	|	environmental samples <Burkholderia>	|	scientific name	|
+77119	|	uncultured Burkholderia NB3	|		|	scientific name	|
+77420	|	uncultured Treponema clone RFS34p	|		|	scientific name	|
+77421	|	uncultured Treponema clone RFS35p	|		|	scientific name	|
+77422	|	uncultured Treponema clone RFS37p	|		|	scientific name	|
+77423	|	uncultured Treponema clone RFS39p	|		|	scientific name	|
+77424	|	uncultured Treponema clone RFS41	|		|	scientific name	|
+77425	|	uncultured Treponema clone RFS42p	|		|	scientific name	|
+77426	|	uncultured Treponema clone RFS43p	|		|	scientific name	|
+77427	|	uncultured Treponema clone RFS44p	|		|	scientific name	|
+77428	|	uncultured Treponema clone RFS45p	|		|	scientific name	|
+77429	|	uncultured Treponema clone RFS46p	|		|	scientific name	|
+77760	|	Synechocystis PCC9413	|		|	equivalent name	|
+77760	|	Synechocystis sp. BO 9201	|		|	synonym	|
+77760	|	Synechocystis sp. BO9201	|		|	synonym	|
+77760	|	Synechocystis sp. PCC 9413	|		|	scientific name	|
+77763	|	Banna virus	|		|	scientific name	|
+77763	|	Coltivirus BANNA	|		|	synonym	|
+77763	|	Coltivirus JKT-6423	|		|	includes	|
+77763	|	Coltivirus JKT-6969	|		|	includes	|
+77763	|	Coltivirus JKT-7043	|		|	includes	|
+77764	|	Clitoria falcata mosaic virus	|		|	scientific name	|
+77764	|	Clitoria geminivirus	|		|	synonym	|
+77765	|	Malva geminivirus	|		|	scientific name	|
+77766	|	Sida geminivirus	|		|	scientific name	|
+77767	|	Tobacco geminivirus	|		|	scientific name	|
+77768	|	"Bacteroides (Prevotella) ruminicola subsp. ruminicola" biovar 7	|		|	synonym	|
+77768	|	Bacteroides (Prevotella) ruminicola subsp. ruminicola biovar 7	|		|	synonym	|
+77768	|	Prevotella albensis	|		|	scientific name	|
+77768	|	Prevotella albensis Avgustin et al. 1997	|		|	synonym	|
+77769	|	quadrivittata group	|		|	scientific name	|
+82870	|	Plectanocotyle	|		|	scientific name	|
+82871	|	Plectanocotyle gurnardi	|		|	scientific name	|
+82872	|	Zeuxapta	|		|	scientific name	|
+82873	|	Zeuxapta seriolae	|		|	scientific name	|
+82874	|	Neomicrocotyle	|		|	scientific name	|
+82875	|	Neomicrocotyle pacifica	|		|	scientific name	|
+82876	|	Tachyporinae	|		|	scientific name	|
+82877	|	Oxytelinae	|		|	scientific name	|
+82878	|	Dasycerinae	|		|	scientific name	|
+82879	|	Scaphidiinae	|		|	scientific name	|
+89593	|	Craniata	|	Craniata <chordata>	|	scientific name	|
+93472	|	Hormonema macrosporum	|		|	scientific name	|
+93473	|	Phaeotheca fissurella	|		|	scientific name	|
+93474	|	Trimmatostroma abietina	|		|	scientific name	|
+93475	|	Trimmatostroma cordae	|		|	scientific name	|
+93476	|	Trimmatostroma salicis	|		|	scientific name	|
+93477	|	Sarcinomyces petricola	|		|	scientific name	|
+93478	|	Kabatiella caulivora	|		|	scientific name	|
+93479	|	Kabatiella lini	|		|	scientific name	|
+94430	|	Voyriella	|		|	scientific name	|
+94431	|	Voyriella parviflora	|		|	scientific name	|
+94432	|	Human rotavirus MP409	|		|	scientific name	|
+94433	|	Camberwell virus	|		|	scientific name	|
+94434	|	Oryzorictes	|		|	scientific name	|
+94435	|	Oryzorictes talpoides	|		|	scientific name	|
+94435	|	mole tenrec	|		|	common name	|
+94435	|	molelike rice tenrec	|		|	genbank common name	|
+94436	|	Parascalops	|		|	scientific name	|
+94437	|	Brewer's mole	|		|	common name	|
+94437	|	Parascalops breweri	|		|	scientific name	|
+94437	|	hairy-tailed mole	|		|	genbank common name	|
+94438	|	Tenrec	|		|	scientific name	|
+94439	|	Tenrec ecaudatus	|		|	scientific name	|
+94439	|	tail-less tenrec	|		|	common name	|
+94439	|	tailess tenrec	|		|	genbank common name	|
+95260	|	Palthis angulalis	|		|	scientific name	|
+95261	|	Papaipema	|		|	scientific name	|
+95262	|	Papaipema sp.	|		|	scientific name	|
+95263	|	Triocnemis	|		|	scientific name	|
+95264	|	Triocnemis saporis	|		|	scientific name	|
+95265	|	Polybia occidentalis nigratella	|		|	scientific name	|
+95266	|	Ropalidia romandi cabeti	|		|	scientific name	|
+95267	|	Epyrinae gen. sp. JC106	|		|	scientific name	|
+95268	|	Ammonophila sp. JC134	|		|	misspelling	|
+95268	|	Ammophila sp. JC134	|		|	scientific name	|
+95269	|	Arge	|		|	scientific name	|
+96040	|	Blastocrithidia triatoma	|		|	scientific name	|
+96041	|	Leptomonas peterhoffi	|		|	scientific name	|
+96043	|	environmental samples	|	environmental samples <Asteridae>	|	scientific name	|
+96044	|	Bufo regularis gut clone FMA-1999	|		|	scientific name	|
+96045	|	'Nocardia uniformis subsp. tsuyamanensis'	|		|	synonym	|
+96045	|	Nocardia uniformis subsp. tsuyamanensis	|		|	scientific name	|
+96045	|	Nocardia uniformis subsp. tsuyamanesis	|		|	misspelling	|
+96046	|	Human rotavirus strain MP409	|		|	scientific name	|
+96047	|	Ammophila arenaria	|		|	scientific name	|
+96048	|	Elymus sp. Kellogg s.n.	|		|	scientific name	|
+96049	|	Ehrharta erecta	|		|	scientific name	|
+117570	|	Teleostomi	|		|	scientific name	|
+117571	|	Euteleostomi	|		|	scientific name	|
+117571	|	bony vertebrates	|		|	genbank common name	|
+131567	|	biota	|		|	synonym	|
+131567	|	cellular organisms	|		|	scientific name	|
+207598	|	Homo/Pan/Gorilla group	|		|	scientific name	|
+314146	|	Euarchontoglires	|		|	scientific name	|
+314293	|	Anthropoidea	|		|	synonym	|
+314293	|	Simiiformes	|		|	scientific name	|
+314295	|	Hominoidea	|		|	scientific name	|
+325230	|	Francisella sp. AF-01-23	|		|	scientific name	|
+325231	|	Francisella sp. AF-01-27	|		|	scientific name	|
+325232	|	Francisella sp. AF-01-28	|		|	scientific name	|
+325233	|	Francisella sp. AF-03-27	|		|	scientific name	|
+325234	|	Francisella sp. AF-03-28	|		|	scientific name	|
+325235	|	Francisella sp. AF-04-15	|		|	scientific name	|
+325236	|	Francisella sp. AF-04-405	|		|	scientific name	|
+325237	|	Francisella sp. AF-01-22	|		|	scientific name	|
+325240	|	Shewanella baltica OS155	|		|	scientific name	|
+325240	|	Shewanella baltica str. OS155	|		|	equivalent name	|
+325240	|	Shewanella baltica strain OS155	|		|	equivalent name	|
+325241	|	Influenza A virus (A/chicken/Beijing/ZH/02(H9N2))	|		|	scientific name	|
+325242	|	Influenza A virus (A/chicken/Hebei/11/02(H9N2))	|		|	scientific name	|
+325243	|	Influenza A virus (A/chicken/Jiangsu/W1/00(H9N2))	|		|	scientific name	|
+325244	|	Influenza A virus (A/chicken/Neimeng/ZH/02(H9N2))	|		|	scientific name	|
+325245	|	Influenza A virus (A/chicken/Yunan/3/01(H9N2))	|		|	scientific name	|
+325246	|	Influenza A virus (A/chicken/Beijing/8/99(H9N2))	|		|	scientific name	|
+325247	|	Influenza A virus (A/chicken/Henan/1/01(H9N2))	|		|	scientific name	|
+325248	|	Influenza A virus (A/chicken/Henan/1/98(H9N2))	|		|	scientific name	|
+325249	|	Lactococcus bacteriophage Rc13	|		|	scientific name	|
+325250	|	Lactococcus bacteriophage Rc14	|		|	scientific name	|
+325251	|	Lactococcus bacteriophage Rc15	|		|	scientific name	|
+325252	|	Lactococcus bacteriophage Rc16	|		|	scientific name	|
+325253	|	Lactococcus bacteriophage Rc6	|		|	scientific name	|
+325254	|	Stainforthia concava	|		|	synonym	|
+325254	|	Virgulina concava	|		|	scientific name	|
+325255	|	Stainforthia sp. isolate 2641	|		|	scientific name	|
+325256	|	Bolivina sp. JPM99	|		|	scientific name	|
+325257	|	Bolivina sp. isolate 170	|		|	scientific name	|
+325258	|	Cassidulinoides	|		|	scientific name	|
+325259	|	Cassidulinoides porrectus	|		|	scientific name	|
+331540	|	Pomponema	|		|	scientific name	|
+331541	|	Pomponema sp. 2P12K2	|		|	scientific name	|
+331542	|	Linhomoeus	|		|	scientific name	|
+331543	|	Linhomoeus sp. 7I12K2	|		|	scientific name	|
+331544	|	Latronema	|		|	scientific name	|
+331545	|	Latronema sp. 1P10K3	|		|	scientific name	|
+331546	|	Phanodermatidae	|		|	scientific name	|
+331547	|	Phanoderma	|		|	scientific name	|
+331548	|	Phanoderma sp. 3I23B4	|		|	scientific name	|
+331549	|	Latronema sp. 2P15K2	|		|	scientific name	|
+332089	|	Geoemyda silvatica	|		|	scientific name	|
+332089	|	Heosemys silvatica	|		|	synonym	|
+332089	|	Vijayachelys silvatica	|		|	synonym	|
+332130	|	Spirogyra sp. ARL 87 3	|		|	scientific name	|
+332131	|	Spirogyra sp. CFD JH0058	|		|	scientific name	|
+332132	|	Spirogyra sp. CFD JH0130	|		|	scientific name	|
+332136	|	Arthroderma cookiellum	|		|	scientific name	|
+332136	|	Arthroderma cookiellum (de Clercq) Weitzman et al.	|		|	synonym	|
+332136	|	Nannizzia cookiella	|		|	synonym	|
+332136	|	Nannizzia cookiella de Clercq	|		|	synonym	|
+332137	|	Mitsuokella sp. TM-10	|		|	scientific name	|
+332138	|	proteobacterium 4R11-401	|		|	scientific name	|
+332139	|	proteobacterium 8R12-301	|		|	scientific name	|
+333160	|	Influenza A virus (A/green-winged teal/TX/7/01(H8N4))	|		|	scientific name	|
+333161	|	Influenza A virus (A/mallard/MD/F123/98(H6N8))	|		|	scientific name	|
+333162	|	Influenza A virus (A/mallard/MN/107/98(H6N8))	|		|	scientific name	|
+333163	|	Influenza A virus (A/mallard/MN/133/98(H5N2))	|		|	scientific name	|
+333164	|	Influenza A virus (A/mallard/MN/145/99(H4N6))	|		|	scientific name	|
+333165	|	Influenza A virus (A/mallard/MN/153/98(H9N2))	|		|	scientific name	|
+333166	|	Influenza A virus (A/mallard/MN/17/99(H7N7))	|		|	scientific name	|
+333167	|	Influenza A virus (A/mallard/MN/2/98(H4N6))	|		|	scientific name	|
+333168	|	Influenza A virus (A/mallard/MN/220/98(H4N6))	|		|	scientific name	|
+333169	|	Influenza A virus (A/mallard/MN/253/99(H6N5))	|		|	scientific name	|
+335118	|	Machaerina	|		|	scientific name	|
+335118	|	Machaerina Vahl	|		|	synonym	|
+335119	|	Machaerina sp. Johns 9195	|		|	scientific name	|
+376913	|	Haplorrhini	|		|	scientific name	|

Added: trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/nodes.dmp
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/nodes.dmp	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/taxdump/nodes.dmp	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,189 @@
+1760	|	201174	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+5533	|	89013	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+9605	|	207598	|	genus	|		|	5	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+9606	|	9605	|	species	|	HS	|	5	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+32061	|	200795	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+89593	|	7711	|	subphylum	|		|	10	|	0	|	1	|	1	|	2	|	0	|	0	|	0	|		|
+165724	|	165723	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+165726	|	165725	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+165791	|	165790	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+192684	|	200376	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+200204	|	165794	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+200795	|	2	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+231509	|	231508	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+266791	|	266789	|	genus	|		|	4	|	1	|	1	|	1	|	4	|	1	|	0	|	0	|		|
+2759	|	131567	|	superkingdom	|		|	1	|	0	|	1	|	0	|	1	|	0	|	0	|	0	|		|
+6072	|	33208	|	no rank	|		|	1	|	1	|	1	|	1	|	5	|	0	|	1	|	0	|		|
+7711	|	33511	|	phylum	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+7742	|	89593	|	no rank	|		|	10	|	0	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+7776	|	7742	|	superclass	|		|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+8287	|	117571	|	no rank	|		|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+9347	|	32525	|	no rank	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+9443	|	314146	|	order	|		|	5	|	0	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+9526	|	314293	|	parvorder	|		|	5	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+9604	|	314295	|	family	|		|	5	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+27591	|	9933	|	species	|	GG	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+27592	|	9895	|	subfamily	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+27596	|	9918	|	species	|	BD	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+27598	|	9859	|	species	|	CS	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+32523	|	8287	|	no rank	|		|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+32524	|	32523	|	no rank	|		|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+32525	|	40674	|	no rank	|		|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+33154	|	2759	|	no rank	|		|	4	|	0	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+33208	|	33154	|	kingdom	|		|	1	|	0	|	1	|	1	|	1	|	1	|	0	|	0	|		|
+33213	|	6072	|	no rank	|		|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+33316	|	33213	|	no rank	|		|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+33511	|	33316	|	no rank	|		|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+40674	|	32524	|	class	|		|	2	|	0	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+60720	|	44402	|	species	|	NA	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60721	|	44402	|	species	|	NB	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60722	|	44402	|	species	|	NB	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60723	|	44402	|	species	|	NB	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60724	|	44402	|	species	|	NB	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60725	|	44402	|	species	|	NC	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60726	|	44402	|	species	|	NC	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60727	|	44402	|	species	|	NC	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60728	|	44402	|	species	|	ND	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+60729	|	44402	|	species	|	NF	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+77110	|	4415	|	species	|	NU	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+77111	|	4415	|	species	|	NO	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+77112	|	4415	|	species	|	NS	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+77113	|	4415	|	species	|	NL	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+77114	|	4415	|	species	|	NM	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+77115	|	28741	|	species	|	CT	|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+77116	|	75661	|	species	|	UA	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77117	|	75661	|	species	|	UA	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77118	|	32008	|	no rank	|		|	11	|	0	|	11	|	1	|	0	|	1	|	0	|	0	|	uncultured	|
+77119	|	77118	|	species	|	UB	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77420	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77421	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77422	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77423	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77424	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77425	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77426	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77427	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77428	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77429	|	77353	|	species	|	UT	|	11	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|	uncultured	|
+77760	|	1142	|	species	|	SS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+77763	|	208294	|	species	|	BV	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+77764	|	291027	|	species	|	CF	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+77765	|	291027	|	species	|	MG	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+77766	|	291027	|	species	|	SG	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+77767	|	291027	|	species	|	TG	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+77768	|	838	|	species	|	PA	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+77769	|	7324	|	species group	|		|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+82870	|	116927	|	genus	|		|	1	|	1	|	1	|	1	|	9	|	1	|	0	|	0	|		|
+82871	|	82870	|	species	|	PG	|	1	|	1	|	1	|	1	|	9	|	1	|	1	|	0	|		|
+82872	|	66915	|	genus	|		|	1	|	1	|	1	|	1	|	9	|	1	|	0	|	0	|		|
+82873	|	82872	|	species	|	ZS	|	1	|	1	|	1	|	1	|	9	|	1	|	1	|	0	|		|
+82874	|	54584	|	genus	|		|	1	|	1	|	1	|	1	|	9	|	1	|	0	|	0	|		|
+82875	|	82874	|	species	|	NP	|	1	|	1	|	1	|	1	|	9	|	1	|	1	|	0	|		|
+82876	|	351509	|	subfamily	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+82877	|	351512	|	subfamily	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+82878	|	144867	|	subfamily	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+82879	|	351512	|	subfamily	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+89593	|	7711	|	subphylum	|		|	10	|	0	|	1	|	1	|	2	|	0	|	0	|	0	|		|
+93472	|	46635	|	species	|	HM	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93473	|	92987	|	species	|	PF	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93474	|	92989	|	species	|	TA	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93475	|	92989	|	species	|	TC	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93476	|	92989	|	species	|	TS	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93477	|	62074	|	species	|	SP	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93478	|	5422	|	species	|	KC	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+93479	|	5422	|	species	|	KL	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+94430	|	303180	|	genus	|		|	4	|	1	|	1	|	1	|	1	|	1	|	0	|	0	|		|
+94431	|	94430	|	species	|	VP	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+94432	|	36428	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+94433	|	11983	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+94434	|	176110	|	genus	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+94435	|	94434	|	species	|	OT	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+94436	|	9373	|	genus	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+94437	|	94436	|	species	|	PB	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+94438	|	176113	|	genus	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+94439	|	94438	|	species	|	TE	|	2	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+95260	|	56403	|	species	|	PA	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95261	|	95182	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+95262	|	95261	|	species	|	PS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95263	|	319997	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+95264	|	95263	|	species	|	TS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95265	|	91432	|	subspecies	|	PO	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95266	|	91443	|	subspecies	|	RR	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95267	|	69814	|	species	|	EG	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95268	|	95167	|	species	|	AS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+95269	|	85773	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+96040	|	28004	|	species	|	BT	|	1	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+96041	|	5683	|	species	|	LP	|	1	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+96043	|	128925	|	no rank	|		|	11	|	0	|	1	|	1	|	1	|	1	|	0	|	0	|	uncultured	|
+96044	|	96043	|	species	|	BR	|	11	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|	uncultured	|
+96045	|	53432	|	subspecies	|	NU	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+96046	|	161240	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+96047	|	95188	|	species	|	AA	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+96048	|	15492	|	species	|	ES	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+96049	|	15488	|	species	|	EE	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+117570	|	7776	|	no rank	|		|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+117571	|	117570	|	no rank	|		|	10	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+131567	|	1	|	no rank	|		|	8	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+207598	|	9604	|	no rank	|		|	5	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+314146	|	9347	|	superorder	|		|	2	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|
+314293	|	376913	|	infraorder	|		|	5	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+314295	|	9526	|	superfamily	|		|	5	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+325230	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325231	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325232	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325233	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325234	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325235	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325236	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325237	|	262	|	species	|	FS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325240	|	62322	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325241	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325242	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325243	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325244	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325245	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325246	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325247	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325248	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+325249	|	374900	|	no rank	|		|	3	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325250	|	374900	|	no rank	|		|	3	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325251	|	374900	|	no rank	|		|	3	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325252	|	374900	|	no rank	|		|	3	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325253	|	374900	|	no rank	|		|	3	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+325254	|	349982	|	species	|	VC	|	1	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+325255	|	313617	|	species	|	SS	|	1	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+325256	|	46083	|	species	|	BS	|	1	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+325257	|	46083	|	species	|	BS	|	1	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+325258	|	203408	|	genus	|		|	1	|	1	|	1	|	1	|	1	|	1	|	0	|	0	|		|
+325259	|	325258	|	species	|	CP	|	1	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+331540	|	73920	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+331541	|	331540	|	species	|	PS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+331542	|	319958	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+331543	|	331542	|	species	|	LS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+331544	|	331538	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+331545	|	331544	|	species	|	LS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+331546	|	319996	|	family	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+331547	|	331546	|	genus	|		|	1	|	1	|	1	|	1	|	5	|	1	|	0	|	0	|		|
+331548	|	331547	|	species	|	PS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+331549	|	331544	|	species	|	LS	|	1	|	1	|	1	|	1	|	5	|	1	|	1	|	0	|		|
+332089	|	204959	|	species	|	GS	|	10	|	1	|	1	|	1	|	2	|	1	|	1	|	0	|		|
+332130	|	3179	|	species	|	SS	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+332131	|	3179	|	species	|	SS	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+332132	|	3179	|	species	|	SS	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+332136	|	63399	|	species	|	AC	|	4	|	1	|	1	|	1	|	4	|	1	|	1	|	0	|		|
+332137	|	52225	|	species	|	MS	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+332138	|	81684	|	species	|	PR	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+332139	|	81684	|	species	|	PR	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+333160	|	142943	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333161	|	184009	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333162	|	184009	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333163	|	119220	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333164	|	102800	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333165	|	102796	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333166	|	119218	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333167	|	102800	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333168	|	102800	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+333169	|	184006	|	no rank	|		|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+335118	|	4609	|	genus	|		|	4	|	1	|	1	|	1	|	1	|	1	|	0	|	0	|		|
+335119	|	335118	|	species	|	MS	|	4	|	1	|	1	|	1	|	1	|	1	|	1	|	0	|		|
+376913	|	9443	|	suborder	|		|	5	|	1	|	1	|	1	|	2	|	1	|	0	|	0	|		|

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tblastn.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tblastn.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tblastn.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,108 @@
+TBLASTN 2.2.10 [Oct-19-2004]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= HAHU | 1114 |  Hemoglobin alpha chain - Human, chimpanzee, and
+pygmy chimpanzee
+         (141 letters)
+
+Database: testnt.fa 
+           2 sequences; 13,260 total letters
+
+Searching..done
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+gi|10040111|emb|AL390796.6|AL390796 Homo sapiens chromosome 1 cl...    18   2.3  
+test6                                                                  16   6.7  
+
+>gi|10040111|emb|AL390796.6|AL390796 Homo sapiens chromosome 1 clone
+            RP11-562F3, *** SEQUENCING IN PROGRESS ***, 16 unordered
+            pieces /len=164198
+          Length = 8160
+
+ Score = 17.7 bits (34), Expect = 2.3
+ Identities = 8/23 (34%), Positives = 11/23 (47%)
+ Frame = +1
+
+Query: 56   KGHGKKVADALTNAVAHVDDMPN 78
+            KGH KK+     N  ++  D  N
+Sbjct: 7603 KGHLKKITSFFFNHTSNYRDEYN 7671
+
+
+
+ Score = 17.3 bits (33), Expect = 3.0
+ Identities = 9/28 (32%), Positives = 13/28 (46%)
+ Frame = +1
+
+Query: 31   RMFLSFPTTKTYFPHFDLSHGSAQVKGH 58
+            RM+  F    TY+ +F L      V G+
+Sbjct: 7069 RMYFCFLKVLTYYFNFFLFFNIFYVLGY 7152
+
+
+>test6
+          Length = 5100
+
+ Score = 16.2 bits (30), Expect = 6.7
+ Identities = 6/9 (66%), Positives = 7/9 (77%)
+ Frame = +3
+
+Query: 72   HVDDMPNAL 80
+            H+D M NAL
+Sbjct: 3822 HMDYMSNAL 3848
+
+
+
+ Score = 15.8 bits (29), Expect = 8.8
+ Identities = 5/7 (71%), Positives = 6/7 (85%)
+ Frame = -2
+
+Query: 93   VDPVNFK 99
+            +DP NFK
+Sbjct: 1814 IDP*NFK 1794
+
+
+  Database: testnt.fa
+    Posted date:  Feb 5, 2005  6:26 PM
+  Number of letters in database: 13,260
+  Number of sequences in database:  2
+  
+Lambda     K      H
+   0.319    0.130    0.385 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 1998
+Number of Sequences: 2
+Number of extensions: 23
+Number of successful extensions: 4
+Number of sequences better than 10.0: 2
+Number of HSP's better than 10.0 without gapping: 3
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 4
+length of query: 141
+length of database: 4420
+effective HSP length: 39
+effective length of query: 102
+effective length of database: 4342
+effective search space:   442884
+effective search space used:   442884
+frameshift window, decay const: 40,  0.1
+T: 13
+A: 40
+X1: 16 ( 7.4 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 29 (16.3 bits)
+S2: 29 (15.8 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.ace
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.ace	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.ace	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,33 @@
+
+Protein : "roa1_drome"
+Peptide "roa1_drome"
+
+Peptide : "roa1_drome"
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVV
+VMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVK
+KLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQK
+QHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGN
+NWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGND
+FGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY
+
+
+Sequence : "Name\; 4\% strewn with \\ various \/ escaped characters"
+DNA "Name\; 4\% strewn with \\ various \/ escaped characters"
+
+DNA : "Name\; 4\% strewn with \\ various \/ escaped characters"
+gctcggactctatctagcagaaacctcgttcagctagtcttgcttcatggaggtttgatc
+tagactgcaaacgtcggtgctaaaagaccatacttccgtatgtgcctatcgggagcagtc
+gctgagaagtgcggaatgatccttcaatgaccgccgttaaagcctgggagtccgcgccac
+aatcattccatatacagcaacacgcgctacgcggacctctcggtgggtgacgattctatt
+gaggcgttgaagcgagaaagatattccgattcttttcgagtctatagttaaatcggactg
+catcatccattttagggcata
+
+
+Sequence : "Last"
+DNA "Last"
+
+DNA : "Last"
+ggccgggatggccggacctgttctgaacatcttatatccacccgaacaagttataaacaa
+tttaaatctgggcggccatctataagcgtgtcttcagtatgagagtcttcggatatcacg
+acccattaggaaag
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.embl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.embl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.embl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,235 @@
+ID   SC10H5 standard; DNA; PRO; 4870 BP.
+XX
+AC   AL031232;
+XX
+DE   Streptomyces coelicolor cosmid 10H5.
+XX
+KW   integral membrane protein.
+XX
+OS   Streptomyces coelicolor
+OC   Eubacteria; Firmicutes; Actinomycetes; Streptomycetes;
+OC   Streptomycetaceae; Streptomyces.
+XX
+RN   [1]
+RP   1-4870
+RA   Oliver K., Harris D.;
+RT   ;
+RL   Unpublished.
+XX
+RN   [2]
+RP   1-4870
+RA   Parkhill J., Barrell B.G., Rajandream M.A.;
+RT   ;
+RL   Submitted (10-AUG-1998) to the EMBL/GenBank/DDBJ databases.
+RL   Streptomyces coelicolor sequencing project,
+RL   Sanger Centre, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA
+RL   E-mail: barrell at sanger.ac.uk
+RL   Cosmids supplied by Prof. David A. Hopwood, [3]
+RL   John Innes Centre, Norwich Research Park, Colney,
+RL   Norwich, Norfolk NR4 7UH, UK.
+XX
+RN   [3]
+RP   1-4870
+RA   Redenbach M., Kieser H.M., Denapaite D., Eichner A.,
+RA   Cullum J., Kinashi H., Hopwood D.A.;
+RT   "A set of ordered cosmids and a detailed genetic and physical
+RT   map for the 8 Mb Streptomyces coelicolor A3(2) chromosome.";
+RL   Mol. Microbiol. 21(1):77-96(1996).
+XX
+CC   Notes:
+CC
+CC   Streptomyces coelicolor sequencing at The Sanger Centre is funded 
+CC   by the BBSRC.
+CC
+CC   Details of S. coelicolor sequencing at the Sanger Centre 
+CC   are available on the World Wide Web. 
+CC   (URL; http://www.sanger.ac.uk/Projects/S_coelicolor/)
+CC
+CC   CDS are numbered using the following system eg SC7B7.01c. 
+CC   SC (S. coelicolor), 7B7 (cosmid name), .01 (first CDS), 
+CC   c (complementary strand).
+CC
+CC   The more significant matches with motifs in the PROSITE
+CC   database are also included but some of these may be fortuitous.
+CC
+CC   The length in codons is given for each CDS.
+CC
+CC   Usually the highest scoring match found by fasta -o is given for
+CC   CDS which show significant similarity to other CDS in the database.
+CC   The position of possible ribosome binding site sequences are
+CC   given where these have been used to deduce the initiation codon.
+CC   
+CC   Gene prediction is based on positional base preference in codons 
+CC   using a specially developed Hidden Markov Model (Krogh et al., 
+CC   Nucleic Acids Research, 22(22):4768-4778(1994)) and the FramePlot 
+CC   program of Bibb et al., Gene 30:157-66(1984) as implemented at 
+CC   http://www.nih.go.jp/~jun/cgi-bin/frameplot.pl. CAUTION:  We may  
+CC   not have predicted the correct initiation codon.  Where possible 
+CC   we choose an initiation codon (atg, gtg, ttg or (att)) which is 
+CC   preceded by an upstream ribosome binding site sequence (optimally 
+CC   5-13bp before the initiation codon).  If this cannot be identified
+CC   we choose the most upstream initiation codon.
+CC     
+CC   IMPORTANT: This sequence MAY NOT be the entire insert of
+CC   the sequenced clone.  It may be shorter because we only
+CC   sequence overlapping sections once, or longer, because we
+CC   arrange for a small overlap between neighbouring submissions.
+CC
+CC   Cosmid 10H5 lies to the right of 3A7 on the AseI-B genomic restriction 
+CC   fragment.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..4870
+FT                   /organism="Streptomyces coelicolor"
+FT                   /strain="A3(2)"
+FT                   /clone="cosmid 10H5"
+FT   CDS             complement(<1..327)
+FT                   /note="SC10H5.01c, unknown, partial CDS, len >109 aa;
+FT                   possible integral membrane protein"
+FT                   /gene="SC10H5.01c"
+FT                   /product="hypothetical protein SC10H5.01c"
+FT   CDS             complement(350..805)
+FT                   /note="SC10H5.02c, probable integral membrane protein, len:
+FT                   151 aa; similar to S. coelicolor hypothetical protein
+FT                   TR:O54194 (EMBL:AL021411) SC7H1.35 (155 aa), fasta scores;
+FT                   opt: 431 z-score: 749.8 E(): 0, 53.5% identity in 114 aa
+FT                   overlap."
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.02c"
+FT   RBS             complement(812..815)
+FT                   /note="possible RBS upstream of SC10H5.02c"
+FT   CDS             complement(837..1301)
+FT                   /note="SC10H5.03c, probable integral membrane protein, len:
+FT                   154 aa"
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.03c"
+FT   RBS             complement(1308..1312)
+FT                   /note="possible RBS upstream of SC10H5.03c"
+FT   CDS             complement(1427..1735)
+FT                   /note="SC10H5.04c, unknown, len: 103 aa; possible membrane"
+FT                   /gene="SC10H5.04c"
+FT                   /product="hypothetical protein SC10H5.04c"
+FT   RBS             complement(1738..1741)
+FT                   /note="possible RBS upstream of SC10H5.05c"
+FT   misc_feature    1800^1801
+FT                   /note="Zero-length feature added to test Bioperl parsing"
+FT   CDS             1933..2022
+FT                   /note="SC10H5.05, questionable ORF, len: 29 aa"
+FT                   /gene="SC10H5.05"
+FT                   /product="hypothetical protein SC10H5.05"
+FT   CDS             2019..2642
+FT                   /note="SC10H5.06, probable membrane protein, len: 207 aa;
+FT                   similar to S. coelicolor TR:O54192 SC7H1.33c (191 aa),
+FT                   fasta scores; opt: 312 z-score: 355.2 E(): 1.6e-12, 36.8%
+FT                   identity in 182 aa overlap"
+FT                   /product="putative membrane protein"
+FT                   /gene="SC10H5.06"
+FT   RBS             2627..2631
+FT                   /note="possible RBS upstream of SC10H5.07"
+FT   CDS             2639..4048
+FT                   /note="SC10H5.07, unknown, len: 469 aa"
+FT                   /gene="SC10H5.07"
+FT                   /product="hypothetical protein SC10H5.07"
+FT   CDS             complement(4100..4297)
+FT                   /note="SC10H5.08c, unknown, len: 65 aa"
+FT                   /gene="SC10H5.08c"
+FT                   /product="hypothetical protein SC10H5.08c"
+FT   RBS             complement(4314..4319)
+FT                   /note="possible RBS upstream of SC10H5.08c"
+FT   CDS             complement(4439..>4870)
+FT                   /note="SC10H5.09c, probable integral membrane protein,
+FT                   partial CDS len: >143 aa; some similarity in C-terminus to
+FT                   S. coelicolor hypothetical protein TR:O54106
+FT                   (EMBL:AL021529) SC10A5.15 (114 aa), fasta scores; opt: 145
+FT                   z-score: 233.8 E(): 9.2e-06, 33.3% identity in 81 aa
+FT                   overlap. Overlaps and extends SC3A7.01c"
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.09c"
+FT   misc_feature    4769..4870
+FT                   /note="overlap with cosmid 3A7 from 1 to 102"
+XX
+SQ   Sequence 4870 BP; 769 A; 1717 C; 1693 G; 691 T; 0 other;
+     gatcagtaga cccagcgaca gcagggcggg gcccagcagg ccggccgtgg cgtagagcgc        60
+     gaggacggcg accggcgtgg ccaccgacag gatggctgcg gcgacgcgga cgacaccgga       120
+     gtgtgccagg gcccaccaca cgccgatggc cgcgagcgcg agtcccgcgc tgccgaacag       180
+     ggcccacagc acactgcgca gaccggcggc cacgagtggc gccaggacgg tgcccagcag       240
+     gagcagcagg gtgacgtggg cgcgcgctgc actgtggccg ccccgtccgc ccgacgcgcg       300
+     cggctcgtca tctcgcggtc ccaccaccgg tcggccccat tactcgtcct caaccctgtg       360
+     gcgactgacg ttccccggac aggtcgtacc gattgccgcc acgccccacc acgcacaggg       420
+     cccagacgac gaagcctgac atggtgatca tgacgacgga ccacaccggg tagtacggca       480
+     gcgagaggaa gttggcgatg atcaccagcc cggcgatggc gaccccggtg acacgtgccc       540
+     acatcgccgt tttgagcagc ccggcgctga cgaccatggc gagcgcgccg agcgcgagat       600
+     ggatccaccc ccacccggtg agatcgaact ggaaaacgta gttgggcgtg gtgacgaaga       660
+     cgtcgtcctc ggcgatggcc atgatgcccc ggaagaggct gagcagcccg gcgaggaaga       720
+     gcatcaccgc cgcgaaggcg gtaaggcccg tcgcccattc ctgcctcgcg gtgtgtgccg       780
+     ggtggtgggt atgtgacgtg gtcatctcgg acctcgtttc gtggaatgcg gatgcttcag       840
+     cgagcggagg cgccggtgcc cgccgcgccc gtgtgccctg ccgggccgtg accggacagg       900
+     accaattcct tcgccttgcg gaactcctcg tccgtgatgg caccccggtc tcggatctcg       960
+     gagagccggg ccagctcgtc gacgctgctg gacccgccgc ccacggtctt cctgatgtag      1020
+     gcgtcgaact cctcctgctg agcccgtgcc cgcgttgtct cccggctgcc catgttcttg      1080
+     ccgcgagcga tcacgtagac gaaaacgccc aggaagggca ggaggatgca gaacaccaac      1140
+     cagccggcct tcgcccagcc actcagtccg tcgtcccgga agatgtcggt gacgacgcgg      1200
+     aagagcagga cgaaccacat gatccacagg aagatcatca gcatcgtcca gaaggcaccc      1260
+     agcagtgggt agtcgtacgc caggtaggtc tgtgcactca tgtccgtcct ccgtcctccg      1320
+     gggcgcggcc cggcggccct cgttccgtac tgacatcagg gtggtcacgg gtcccaccgg      1380
+     tcggcatcac ccggcacggg tgagtggggc gccgaggccg tcgtggtcag gcccgggaca      1440
+     ccggtgtgac cctggtggaa ggacgcgtcc cgtggggcac gcaccgccgg ccgagggcga      1500
+     ccaccgcctc ggtcagtccg agcaggccca gccacaggcc gagaagtcgg gtcagggcac      1560
+     gggccgactc ggcgggcagc gcgaggacga cgattccggc gacgtcgacg gccagcgggt      1620
+     tgcgcaggcc cagcactccg gccggggcgc ccggcaccag cgtggcgagg gccgatgcca      1680
+     tgagccaggt ccaggaaccc ccaagcctgg cgaggacgtg cgccggatcg ctcaatgctc      1740
+     cggtgaccgc cccgcccgac ccgtctccct tgtcggcagg ttccgccgca tcacgcggaa      1800
+     cggagatggc tcccctgtgg atcgggcggc cgctgcgggg ccgcccggtt ggtcggtcgg      1860
+     tgagcgccgg actccccctt cagctcttcc agggtcgggg tcgacaccga ggtcctggat      1920
+     cacccgtcag gggtgatccg ggcatgccgt cgtggcggtg aggtgggata cgggaacgat      1980
+     cggcccacgg gggaccggac gagacgaaga gacgtgagat gagcgatacg aactcgggcg      2040
+     gcgggcgcca ggccgcttcc ggaccggccc cacgtggccg actccctttc cgccggcgcg      2100
+     tggccctggt cgctgtcgca cgtcccctga tcgtcacggt cggtctcgtc accgcctact      2160
+     acctgcttcc cctggacgag agactcagcg ccggcaccct ggtgtcgctg gtgtgcggac      2220
+     tgctcgcagt ccttctggtg ttctgctggg aggtgcgggc catcacgcgc tccccgcatc      2280
+     cgcgtctgag agcgatcgag ggcctggccg ccacgctggt gctgttcctg gtcctcttcg      2340
+     ccggctccta ctacctgctg ggtcgctccg cgcccggctc cttcagcgag ccgctgaaca      2400
+     ggacggacgc gctgtacttc actctgacca cgttcgccac cgtcggcttc ggggacatca      2460
+     ccgcacgctc cgagaccggg cggatcctca cgatggcgca gatgacggga gggctactgc      2520
+     tcgtcggagt cgccgcccgg gtgctggcga gcgcagtgca ggcggggctg caccgacagg      2580
+     gccggggacc ggcggcatcg ccacgctccg gtgctgcgga ggagccggag gccggaccat      2640
+     gaccgtaccc ggtggcttca ccgcctccct gccgccggcc gagcgagccg cgtacggcag      2700
+     gaaggcccgt aaaagggcct cacgttcgtg ccacggctgg tacgagccgg ggcagcggcg      2760
+     gcctgacccc gtcgacctgc tggagcgcca gtccggcgag cgtgtcccgg cactcgtgcc      2820
+     catccgctac ggtcgcatgc tggagtcgcc gttccgcttc taccgcggtg cggcagcgat      2880
+     catggcggcg gacctggcac ccctgcccag cagcggactc caggtgcaat tgtgcgggga      2940
+     cgcgcacccg ttgaacttcc ggctcctggc ctcaccggag cgccggctgg tcttcgacat      3000
+     caacgacttc gacgagacgc tgcccggccc cttcgagtgg gacgtcaaac ggctggcggc      3060
+     cggattcgtg atcgcggccc ggtcgaacgg cttctcgtcc aaggaacaga accgcaccgt      3120
+     tcgggcctgt gtgcgggcct accgggagcg catgagggag ttcgccgtca tgccgaccct      3180
+     ggacatctgg tacgcccagg acgacgccga ccacgtacgg caactgctgg ctacggaggc      3240
+     cagaggagaa gctgagcagc ggctcaggga cgcggctgcg aaggcccgca cacgcaccca      3300
+     catgagggcg ttcgcgaagc tcacccgcgt cacggccgag ggccggcgca tcacccccga      3360
+     cccgccgctg atcaccccac tcggcgatct gctcaccgac ccggccgaag ccggccggga      3420
+     ggaggaactg cggtccgtcg tgaacggcta cgcacggtcc ctgccgcccg agcgccggca      3480
+     cctgctgcgt cactaccggc ttgtggacat ggcgcgcaag gtggtcggcg tcggcagtgt      3540
+     cggcacccgc tgctgggtac tgcttctgct cggcagggac gacgacgatc ctctgctgct      3600
+     ccaggccaag gaagcctcgg aatcggtgct ggcggcccac acgggcggcg aacgctacga      3660
+     ccatcagggc cgcagggtcg tggccggcca gcgtctgatc cagaccaccg gtgacatctt      3720
+     tctcggctgg gcgcgcgtca ccggcttcga cggaaaggcc cgggacttct acgtgcgtca      3780
+     actgtgggac tggaagggcg tcgcgcggcc ggaaaccatg gggcccgacc tgctctccct      3840
+     cttcgcccgg ctgtgcggtg cctgcctggc gagggcccac gcccgttccg gtgaccccgt      3900
+     cgcgctcgcc gcgtacctgg gcggcagcga ccgcttcgac ggcgcgctca ccgagttcgc      3960
+     ccagtcctac gccgatcaga atgaacgcga ccacgaagct ctgctggcgg cctgccgctc      4020
+     cggcagggtc acggccgccc gtttgtgagg ccgacccggg aacggccggc gggctggcac      4080
+     acaccgccgc cggtcggcgt cattccggaa gctgccgcat ctccaggacg cgcaggccca      4140
+     gcgactggca gcgggtgagc aacccgtaca gatgggcctc gtcgatcacc gtgccgaaca      4200
+     gcacggtctg gccggacatg acgacgtgct ccagctccgg gaacgcgttg gccagcgtcc      4260
+     gtgacaggtg tccctcgacg cggatctcgt agcgcacgag cggtcctttc accgtaggag      4320
+     ctcgggacac cgcccggggc tccgggtcgg acggtgctct tggtgacgag cctgcgcctc      4380
+     gtcgccctcc ggtgccctca cccagcacag gtgactccaa ccgcagtgtc agtgcctttc      4440
+     agtgcgtcac tgtgatcttg acgacgacga tcaccaggcc gagcagtacg ttgaccgtcg      4500
+     cggtgacggc caccagtcgt cgcgaggcgc ccgcgcggtg cgccgcggcg acggaccagc      4560
+     ccacctgacc ggcgacggcg acggacagcg ccagccacag ggtgcccggg acgtccagcc      4620
+     ccagtacggg gctgacggcg atggccgcgg ccggaggcac ggcggccttg acgatcggcc      4680
+     actcctcgcg gcacacacgc agaatcaccc gccggtccgg agtgtgccgc gcgagacgcg      4740
+     ctccgaacag ttcggcgtgg acgtgagcga tccagaacac caagctggtg agcaacagca      4800
+     gaagaaccag ttcggcgcgg gggaacgagc ccagggtgcc ggcgccgatc acgacggagg      4860
+     ctgcgagcat                                                             4870
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.embl2sq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.embl2sq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.embl2sq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,237 @@
+ID   SC10H5 standard; DNA; PRO; 4870 BP.
+XX
+AC   AL031232;
+XX
+DE   Streptomyces coelicolor cosmid 10H5.
+XX
+KW   integral membrane protein.
+XX
+OS   Streptomyces coelicolor
+OC   Eubacteria; Firmicutes; Actinomycetes; Streptomycetes;
+OC   Streptomycetaceae; Streptomyces.
+XX
+RN   [1]
+RP   1-4870
+RA   Oliver K., Harris D.;
+RT   ;
+RL   Unpublished.
+XX
+RN   [2]
+RP   1-4870
+RA   Parkhill J., Barrell B.G., Rajandream M.A.;
+RT   ;
+RL   Submitted (10-AUG-1998) to the EMBL/GenBank/DDBJ databases.
+RL   Streptomyces coelicolor sequencing project,
+RL   Sanger Centre, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA
+RL   E-mail: barrell at sanger.ac.uk
+RL   Cosmids supplied by Prof. David A. Hopwood, [3]
+RL   John Innes Centre, Norwich Research Park, Colney,
+RL   Norwich, Norfolk NR4 7UH, UK.
+XX
+RN   [3]
+RP   1-4870
+RA   Redenbach M., Kieser H.M., Denapaite D., Eichner A.,
+RA   Cullum J., Kinashi H., Hopwood D.A.;
+RT   "A set of ordered cosmids and a detailed genetic and physical
+RT   map for the 8 Mb Streptomyces coelicolor A3(2) chromosome.";
+RL   Mol. Microbiol. 21(1):77-96(1996).
+XX
+CC   Notes:
+CC
+CC   Streptomyces coelicolor sequencing at The Sanger Centre is funded 
+CC   by the BBSRC.
+CC
+CC   Details of S. coelicolor sequencing at the Sanger Centre 
+CC   are available on the World Wide Web. 
+CC   (URL; http://www.sanger.ac.uk/Projects/S_coelicolor/)
+CC
+CC   CDS are numbered using the following system eg SC7B7.01c. 
+CC   SC (S. coelicolor), 7B7 (cosmid name), .01 (first CDS), 
+CC   c (complementary strand).
+CC
+CC   The more significant matches with motifs in the PROSITE
+CC   database are also included but some of these may be fortuitous.
+CC
+CC   The length in codons is given for each CDS.
+CC
+CC   Usually the highest scoring match found by fasta -o is given for
+CC   CDS which show significant similarity to other CDS in the database.
+CC   The position of possible ribosome binding site sequences are
+CC   given where these have been used to deduce the initiation codon.
+CC   
+CC   Gene prediction is based on positional base preference in codons 
+CC   using a specially developed Hidden Markov Model (Krogh et al., 
+CC   Nucleic Acids Research, 22(22):4768-4778(1994)) and the FramePlot 
+CC   program of Bibb et al., Gene 30:157-66(1984) as implemented at 
+CC   http://www.nih.go.jp/~jun/cgi-bin/frameplot.pl. CAUTION:  We may  
+CC   not have predicted the correct initiation codon.  Where possible 
+CC   we choose an initiation codon (atg, gtg, ttg or (att)) which is 
+CC   preceded by an upstream ribosome binding site sequence (optimally 
+CC   5-13bp before the initiation codon).  If this cannot be identified
+CC   we choose the most upstream initiation codon.
+CC     
+CC   IMPORTANT: This sequence MAY NOT be the entire insert of
+CC   the sequenced clone.  It may be shorter because we only
+CC   sequence overlapping sections once, or longer, because we
+CC   arrange for a small overlap between neighbouring submissions.
+CC
+CC   Cosmid 10H5 lies to the right of 3A7 on the AseI-B genomic restriction 
+CC   fragment.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..4870
+FT                   /organism="Streptomyces coelicolor"
+FT                   /strain="A3(2)"
+FT                   /clone="cosmid 10H5"
+FT   CDS             complement(<1..327)
+FT                   /note="SC10H5.01c, unknown, partial CDS, len >109 aa;
+FT                   possible integral membrane protein"
+FT                   /gene="SC10H5.01c"
+FT                   /product="hypothetical protein SC10H5.01c"
+FT   CDS             complement(350..805)
+FT                   /note="SC10H5.02c, probable integral membrane protein, len:
+FT                   151 aa; similar to S. coelicolor hypothetical protein
+FT                   TR:O54194 (EMBL:AL021411) SC7H1.35 (155 aa), fasta scores;
+FT                   opt: 431 z-score: 749.8 E(): 0, 53.5% identity in 114 aa
+FT                   overlap."
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.02c"
+FT   RBS             complement(812..815)
+FT                   /note="possible RBS upstream of SC10H5.02c"
+FT   CDS             complement(837..1301)
+FT                   /note="SC10H5.03c, probable integral membrane protein, len:
+FT                   154 aa"
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.03c"
+FT   RBS             complement(1308..1312)
+FT                   /note="possible RBS upstream of SC10H5.03c"
+FT   CDS             complement(1427..1735)
+FT                   /note="SC10H5.04c, unknown, len: 103 aa; possible membrane"
+FT                   /gene="SC10H5.04c"
+FT                   /product="hypothetical protein SC10H5.04c"
+FT   RBS             complement(1738..1741)
+FT                   /note="possible RBS upstream of SC10H5.05c"
+FT   misc_feature    1800^1801
+FT                   /note="Zero-length feature added to test Bioperl parsing"
+FT   CDS             1933..2022
+FT                   /note="SC10H5.05, questionable ORF, len: 29 aa"
+FT                   /gene="SC10H5.05"
+FT                   /product="hypothetical protein SC10H5.05"
+FT   CDS             2019..2642
+FT                   /note="SC10H5.06, probable membrane protein, len: 207 aa;
+FT                   similar to S. coelicolor TR:O54192 SC7H1.33c (191 aa),
+FT                   fasta scores; opt: 312 z-score: 355.2 E(): 1.6e-12, 36.8%
+FT                   identity in 182 aa overlap"
+FT                   /product="putative membrane protein"
+FT                   /gene="SC10H5.06"
+FT   RBS             2627..2631
+FT                   /note="possible RBS upstream of SC10H5.07"
+FT   CDS             2639..4048
+FT                   /note="SC10H5.07, unknown, len: 469 aa"
+FT                   /gene="SC10H5.07"
+FT                   /product="hypothetical protein SC10H5.07"
+FT   CDS             complement(4100..4297)
+FT                   /note="SC10H5.08c, unknown, len: 65 aa"
+FT                   /gene="SC10H5.08c"
+FT                   /product="hypothetical protein SC10H5.08c"
+FT   RBS             complement(4314..4319)
+FT                   /note="possible RBS upstream of SC10H5.08c"
+FT   CDS             complement(4439..>4870)
+FT                   /note="SC10H5.09c, probable integral membrane protein,
+FT                   partial CDS len: >143 aa; some similarity in C-terminus to
+FT                   S. coelicolor hypothetical protein TR:O54106
+FT                   (EMBL:AL021529) SC10A5.15 (114 aa), fasta scores; opt: 145
+FT                   z-score: 233.8 E(): 9.2e-06, 33.3% identity in 81 aa
+FT                   overlap. Overlaps and extends SC3A7.01c"
+FT                   /product="putative integral membrane protein"
+FT                   /gene="SC10H5.09c"
+FT   misc_feature    4769..4870
+FT                   /note="overlap with cosmid 3A7 from 1 to 102"
+XX
+SQ   Sequence 4870 BP; 769 A; 1717 C; 1693 G; 691 T; 0 
+SQ   other;
+     gatcagtaga cccagcgaca gcagggcggg gcccagcagg ccggccgtgg cgtagagcgc        60
+     gaggacggcg accggcgtgg ccaccgacag gatggctgcg gcgacgcgga cgacaccgga       120
+     gtgtgccagg gcccaccaca cgccgatggc cgcgagcgcg agtcccgcgc tgccgaacag       180
+     ggcccacagc acactgcgca gaccggcggc cacgagtggc gccaggacgg tgcccagcag       240
+     gagcagcagg gtgacgtggg cgcgcgctgc actgtggccg ccccgtccgc ccgacgcgcg       300
+     cggctcgtca tctcgcggtc ccaccaccgg tcggccccat tactcgtcct caaccctgtg       360
+     gcgactgacg ttccccggac aggtcgtacc gattgccgcc acgccccacc acgcacaggg       420
+     cccagacgac gaagcctgac atggtgatca tgacgacgga ccacaccggg tagtacggca       480
+     gcgagaggaa gttggcgatg atcaccagcc cggcgatggc gaccccggtg acacgtgccc       540
+     acatcgccgt tttgagcagc ccggcgctga cgaccatggc gagcgcgccg agcgcgagat       600
+     ggatccaccc ccacccggtg agatcgaact ggaaaacgta gttgggcgtg gtgacgaaga       660
+     cgtcgtcctc ggcgatggcc atgatgcccc ggaagaggct gagcagcccg gcgaggaaga       720
+     gcatcaccgc cgcgaaggcg gtaaggcccg tcgcccattc ctgcctcgcg gtgtgtgccg       780
+     ggtggtgggt atgtgacgtg gtcatctcgg acctcgtttc gtggaatgcg gatgcttcag       840
+     cgagcggagg cgccggtgcc cgccgcgccc gtgtgccctg ccgggccgtg accggacagg       900
+     accaattcct tcgccttgcg gaactcctcg tccgtgatgg caccccggtc tcggatctcg       960
+     gagagccggg ccagctcgtc gacgctgctg gacccgccgc ccacggtctt cctgatgtag      1020
+     gcgtcgaact cctcctgctg agcccgtgcc cgcgttgtct cccggctgcc catgttcttg      1080
+     ccgcgagcga tcacgtagac gaaaacgccc aggaagggca ggaggatgca gaacaccaac      1140
+     cagccggcct tcgcccagcc actcagtccg tcgtcccgga agatgtcggt gacgacgcgg      1200
+     aagagcagga cgaaccacat gatccacagg aagatcatca gcatcgtcca gaaggcaccc      1260
+     agcagtgggt agtcgtacgc caggtaggtc tgtgcactca tgtccgtcct ccgtcctccg      1320
+     gggcgcggcc cggcggccct cgttccgtac tgacatcagg gtggtcacgg gtcccaccgg      1380
+     tcggcatcac ccggcacggg tgagtggggc gccgaggccg tcgtggtcag gcccgggaca      1440
+     ccggtgtgac cctggtggaa ggacgcgtcc cgtggggcac gcaccgccgg ccgagggcga      1500
+     ccaccgcctc ggtcagtccg agcaggccca gccacaggcc gagaagtcgg gtcagggcac      1560
+     gggccgactc ggcgggcagc gcgaggacga cgattccggc gacgtcgacg gccagcgggt      1620
+     tgcgcaggcc cagcactccg gccggggcgc ccggcaccag cgtggcgagg gccgatgcca      1680
+     tgagccaggt ccaggaaccc ccaagcctgg cgaggacgtg cgccggatcg ctcaatgctc      1740
+     cggtgaccgc cccgcccgac ccgtctccct tgtcggcagg ttccgccgca tcacgcggaa      1800
+     cggagatggc tcccctgtgg atcgggcggc cgctgcgggg ccgcccggtt ggtcggtcgg      1860
+     tgagcgccgg actccccctt cagctcttcc agggtcgggg tcgacaccga ggtcctggat      1920
+     cacccgtcag gggtgatccg ggcatgccgt cgtggcggtg aggtgggata cgggaacgat      1980
+     cggcccacgg gggaccggac gagacgaaga gacgtgagat gagcgatacg aactcgggcg      2040
+     gcgggcgcca ggccgcttcc ggaccggccc cacgtggccg actccctttc cgccggcgcg      2100
+     tggccctggt cgctgtcgca cgtcccctga tcgtcacggt cggtctcgtc accgcctact      2160
+     acctgcttcc cctggacgag agactcagcg ccggcaccct ggtgtcgctg gtgtgcggac      2220
+     tgctcgcagt ccttctggtg ttctgctggg aggtgcgggc catcacgcgc tccccgcatc      2280
+     cgcgtctgag agcgatcgag ggcctggccg ccacgctggt gctgttcctg gtcctcttcg      2340
+     ccggctccta ctacctgctg ggtcgctccg cgcccggctc cttcagcgag ccgctgaaca      2400
+     ggacggacgc gctgtacttc actctgacca cgttcgccac cgtcggcttc ggggacatca      2460
+     ccgcacgctc cgagaccggg cggatcctca cgatggcgca gatgacggga gggctactgc      2520
+     tcgtcggagt cgccgcccgg gtgctggcga gcgcagtgca ggcggggctg caccgacagg      2580
+     gccggggacc ggcggcatcg ccacgctccg gtgctgcgga ggagccggag gccggaccat      2640
+     gaccgtaccc ggtggcttca ccgcctccct gccgccggcc gagcgagccg cgtacggcag      2700
+     gaaggcccgt aaaagggcct cacgttcgtg ccacggctgg tacgagccgg ggcagcggcg      2760
+     gcctgacccc gtcgacctgc tggagcgcca gtccggcgag cgtgtcccgg cactcgtgcc      2820
+     catccgctac ggtcgcatgc tggagtcgcc gttccgcttc taccgcggtg cggcagcgat      2880
+     catggcggcg gacctggcac ccctgcccag cagcggactc caggtgcaat tgtgcgggga      2940
+     cgcgcacccg ttgaacttcc ggctcctggc ctcaccggag cgccggctgg tcttcgacat      3000
+     caacgacttc gacgagacgc tgcccggccc cttcgagtgg gacgtcaaac ggctggcggc      3060
+     cggattcgtg atcgcggccc ggtcgaacgg cttctcgtcc aaggaacaga accgcaccgt      3120
+     tcgggcctgt gtgcgggcct accgggagcg catgagggag ttcgccgtca tgccgaccct      3180
+     ggacatctgg tacgcccagg acgacgccga ccacgtacgg caactgctgg ctacggaggc      3240
+     cagaggagaa gctgagcagc ggctcaggga cgcggctgcg aaggcccgca cacgcaccca      3300
+     catgagggcg ttcgcgaagc tcacccgcgt cacggccgag ggccggcgca tcacccccga      3360
+     cccgccgctg atcaccccac tcggcgatct gctcaccgac ccggccgaag ccggccggga      3420
+     ggaggaactg cggtccgtcg tgaacggcta cgcacggtcc ctgccgcccg agcgccggca      3480
+     cctgctgcgt cactaccggc ttgtggacat ggcgcgcaag gtggtcggcg tcggcagtgt      3540
+     cggcacccgc tgctgggtac tgcttctgct cggcagggac gacgacgatc ctctgctgct      3600
+     ccaggccaag gaagcctcgg aatcggtgct ggcggcccac acgggcggcg aacgctacga      3660
+     ccatcagggc cgcagggtcg tggccggcca gcgtctgatc cagaccaccg gtgacatctt      3720
+     tctcggctgg gcgcgcgtca ccggcttcga cggaaaggcc cgggacttct acgtgcgtca      3780
+     actgtgggac tggaagggcg tcgcgcggcc ggaaaccatg gggcccgacc tgctctccct      3840
+     cttcgcccgg ctgtgcggtg cctgcctggc gagggcccac gcccgttccg gtgaccccgt      3900
+     cgcgctcgcc gcgtacctgg gcggcagcga ccgcttcgac ggcgcgctca ccgagttcgc      3960
+     ccagtcctac gccgatcaga atgaacgcga ccacgaagct ctgctggcgg cctgccgctc      4020
+     cggcagggtc acggccgccc gtttgtgagg ccgacccggg aacggccggc gggctggcac      4080
+     acaccgccgc cggtcggcgt cattccggaa gctgccgcat ctccaggacg cgcaggccca      4140
+     gcgactggca gcgggtgagc aacccgtaca gatgggcctc gtcgatcacc gtgccgaaca      4200
+     gcacggtctg gccggacatg acgacgtgct ccagctccgg gaacgcgttg gccagcgtcc      4260
+     gtgacaggtg tccctcgacg cggatctcgt agcgcacgag cggtcctttc accgtaggag      4320
+     ctcgggacac cgcccggggc tccgggtcgg acggtgctct tggtgacgag cctgcgcctc      4380
+     gtcgccctcc ggtgccctca cccagcacag gtgactccaa ccgcagtgtc agtgcctttc      4440
+     agtgcgtcac tgtgatcttg acgacgacga tcaccaggcc gagcagtacg ttgaccgtcg      4500
+     cggtgacggc caccagtcgt cgcgaggcgc ccgcgcggtg cgccgcggcg acggaccagc      4560
+     ccacctgacc ggcgacggcg acggacagcg ccagccacag ggtgcccggg acgtccagcc      4620
+     ccagtacggg gctgacggcg atggccgcgg ccggaggcac ggcggccttg acgatcggcc      4680
+     actcctcgcg gcacacacgc agaatcaccc gccggtccgg agtgtgccgc gcgagacgcg      4740
+     ctccgaacag ttcggcgtgg acgtgagcga tccagaacac caagctggtg agcaacagca      4800
+     gaagaaccag ttcggcgcgg gggaacgagc ccagggtgcc ggcgccgatc acgacggagg      4860
+     ctgcgagcat                                                             4870
+//
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+
+
+>roa1_drome Rea guano receptor type III >> 0.1
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVV
+VMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVK
+KLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQK
+QHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGN
+NWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGND
+FGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY
+>roa2_drome Rea guano ligand
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVV
+VMKDPTSTSTSTSTSTSTSTSTMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVK
+KLFVGALKDDHDEQSIRDYFQHLLLLLLLDLLLLDLLLLDLLLFVEFDDYDPVDKVVLQK
+QHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGN
+NWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGND
+FGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.game
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.game	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.game	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,934 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE game PUBLIC "game" "http://www.fruitfly.org/annot/gamexml.dtd.txt">
+<!-- GAME-XML generated by Bio::SeqIO::game::gameWriter -->
+<!-- Created Mon Dec 15 22:20:37 2003 -->
+<!-- Questions: smckay at bcgsc.bc.ca -->
+
+<game version="1.2">
+  <seq id="L16622" length="28735" type="dna" focus="true">
+    <organism>Caenorhabditis elegans</organism>
+    <name>L16622</name>
+    <db_xref>taxon:6239</db_xref>
+    <residues>
+      GTGAACAGAAAAAAACGTTGAAAGGGAGGAAGAAGTCGTCACACAGATTCTAAAGTGACT
+      TTGAGAGACGAAAATGTGTTCGTTAATCTTCTAGAGAAAACGAGAAAAAAAAACTGATGC
+      AGTAAAAACTAAAAGAAGAAACTAATTCCTTTGCTCTTCGTAATGAGATTGTTTGGGTGG
+      GCTTCGCTGTTTCAGAATTCAGTGGAACTCTTGAAATTTAAGGAAATGAAACTACTGTAG
+      TGTGCATATTCATTTAATTTAGATTTAAAACAAATCTGGACAATATGCTTTCTTATCAGA
+      ATATTCACTAGCAACTGTTATAAAAATAAAATTAGTACAATTTTTTATTTATTTTCAATT
+      GAAAGTTCCGAATAAGGGATTGAATTCTAGGATTCTTCAAACTCAGATATAGTGAAGAAA
+      TGACATTGTCAAGTTGATAGATTGATTGGTAAGCTTGATGCCATTGTAATTTGAGACTAG
+      CTCCTAAAATTAATAAATTTCATATTAAATCAGAAAACAGAAAGTTGTCAAATCTAGAAG
+      CATACTCATCGAATAGACAAGATTTGTTATTATTCTAAAATTTAAAAACTTGTGACTATG
+      ATTAAAGAAGAAAAAGTCACAAGGTAGTCAAGTCTCATGGGTGTTCAGTTTTTGTTCCTA
+      GAAACGGAATCAATGAAACACTCTTGGCTCACTACTTCCAACCGGTAGATTTAAATTATA
+      CCGATTCCCCGGTGTGTCCGCGCCTCCAACTCCGGAAACCGTCTCTAAAAATGGAGTTAG
+      GGTACATGAACATTCTACAGAGCGTTCATTAGACATTCTGCATATCTTTGAACTTTTTTT
+      TCCTTTTTTAATACTTATTAAACGGAAGAGTAAAGTTTCTAAAGTATCAATAAAACCTAA
+      CCAATTTAGAACATAAAACTTCAAGAGAGACCTAAAAATGTAAAATGTGCATTCTTAACC
+      AATTACTTTCCATATTTTCTGTTTTTTCTATCCAATTTTGCTTCTTTCGGAAATGGCAAA
+      AAAGACGTCGTTCTGTCTGGCAATGTTGAGCTCTATCAATCAGGCTGCCGTTTGGATCAG
+      AAAAAGGGGAAAATTGAAGGATGAAAAGAATGGGTACTTCTTGAACTTTTTTCATCAATT
+      CTTCTTTCTAACTTCATTCGTGCATTTTTCTGTTTTGGCTCTCCAGTGGCTTCTGCCGTT
+      TCGTCAGGAAAATCTCATTAGATGAGCTATTGTCGTGAACTTTTTTCTGCCGGTTTTAAA
+      TTTCTTTAGGGGCACGTGTATTAGATAAATATGGGTGGGAAGGAAAAAAAAACAAACACG
+      AAAAAAATGAAAATGAGACAAGCGAGATGGAAGATTTTGGAAGAACTTTGATAGTTCAAA
+      GAAAACAAGTAAAACAATAAACTATGTAAACGCAATTAATAAAATTTCGTAGTACAAAAA
+      ACCTTGAGTTAAAATCCAGGTAAAGTCGAACGTCAGGGATTCTTTTTAAAATAAATTCCG
+      CAAAATTTTTTTGGAGATATAAAACAAAAACTCGATAAATAGAGGAAAAAACTAACTGGC
+      TAACATAATAAGATACTAAAGTTTGTCTTTAATCATACATACAATCAAACCTGTCTGCCT
+      ACCTTTCAATCTGTATTTTACCATGTTTACAATTATTGGTAATAGGCACACCTGTAGGCA
+      CAAGGCAGGTAAGCATGTCATACACGGGAAAATTGGAATTTTAAGCAACTGCATACTGCA
+      ATATAATTCATTTACCCAATTTGAATATTAGAAGCTCAAAAATGTTTTGAAGTAAAAAAG
+      CTTTTAGACTTTAGATTTAAAAAAATATATGTCCATTCAAATATTCCGTCAATATTCCGA
+      AAACTTAAAAAATTAAATTAAATAATCAGAAGTTCTAATACCCTTTTCGATTCCCATGAC
+      ACTGAATTACAATGAAAAATTTTACGTTTTCAAAAAAATGAGCGCAACATTTTTAGCATG
+      TAGACCAAAAAATGAAGTCGGGGGAAATTGAAAATTGTCAGTCAACAACAAATAAGCATT
+      TCTAATAAATATTTATGCATTGGGCTTTTGAAAACCTGCATAACACTAACATTTCACGGT
+      TTGATTCATGACGATTTCCTTTTGCTTAAAATTTTAATTTTTTCATGAAAAGCTCATCAA
+      TTGTAAAACAGAATGAATTTTCAAAAGACGTTTTTGAATATAAAAAATGAGTTTTCATTT
+      TTCAACGAAAATTTACGTTGCTCAGTGACGTTTTATTACAGAAGAGGGTAAGAAGCAGTG
+      ATGCCTTTTTTTTTGCAAAGTTATGGATGTGCATTTTGACTTTTTCTCAAATTTTCGAAG
+      TGTATTCAATTTTTTAAATAGGAAAAAGTTATGACTTAATACCGGGACAATTTTTCCTCC
+      TATCTGCAGAACTTTCTCATAGTTCTGAAACTCGGTGTGATTTCCGACAGAGTTATAAAT
+      TCACCATGAACTTTTGCAAAATTTGATTAGATTAGAAAACTTGCACCAAAAAGACAAACT
+      TATCCAACATTTGAAGATATTAATTTCATACAAAAAATTTCCTACAGTGAAGGCTTAACT
+      CGGTTTGCAGCTAAATCAAATTTTCATTTATGGATCTTATTTTATCATCGTTGTAATAAA
+      ATTAAACATTTTTCATAAGCTTTCAATTTTCCAAAAAAAATGAACATTAATTAAAAATCC
+      AACAGAAAAAAAAACTTGAATCACTATTTGCAATGTGAAAATGGGGGGAAATCCAGTACA
+      CATTTTACATTTATAGACAAAGTATTTTTTCTACGCTGAAAACATTCTGAAGAGTCTCTC
+      GAAAGTTGGTTTGAGAACTAGACATACTTCGCAATAATCTGATAACATTTTCTGGTAGAT
+      GTACTGCTAGAACACTATTTTTTGAAAATAAAATTTCTTATTTTTCTGAAAACGTTTTGT
+      AAAATGAAAAATAAATTCTCTAAAAAATCCATAATTACATAAATGATAAATGGTAAACTA
+      TGCACTTGAAAAGTGGCATTCTTCTGACTTGAAATTGTCTAGACAGTTTGAACAATTCCC
+      AAGTCTGATGATTCTAAAAATAAAAAACAAATTTTGAAAGTATCAAAAATGTTTGAACAA
+      AAAAAGGTTCTAGCTAAAGTTCCAAGGAACTTGATTTTGCGAAAAATCTTCTCCAACTCT
+      CTGGTGACACCCTTCGATTATCTCTCCAACTCACAAGTTTTTCAGTCAACTCGCCGTTTA
+      TTTCCGAAAAAAATAGAAAGAGTGGAAGAGATGAACGAAAATCTATTCGATAGTGTTCTC
+      TTGTTTTCATTTAAAATTCTCTTTCAGATGTTTGAATTTTTCCAGTTGTTTTCAAGAAGC
+      TCGGCAAACTTATCTCGCATTCATTTAAAGTTTGAATTTATGTTCTTACTTGATTAGACT
+      GGAAAAGAAAAAGAAAAACACATGTTTAATTGAAACTTCAATAACTTGTTCTAGCTTCTG
+      TGACCTTTCCAATCAGAACAATTATCACGAATGATTACTTGTGCTTACTCAATTTCATGT
+      ACTTTTGCTTAGAATTGATGTAAATGTGTTCAGAACCATAGAAAAATTCCGAAGTTTTCG
+      AAATGCAGTTACCGTCCACTTCCATTTCAGAATTTCATGCTTCATATTATCAGCATTTGA
+      AGGCCTACTCCCATTATCCAGTCTGATGTGTAACCCACACTATGTGACGAATATTCTGAT
+      GAAATTTAGTAAGCACAAGTTATCATTTTTAACAGTTATTTTTATCGTGACTGTCACAAA
+      CTCCAATTTTCTTTTCGAACAATACCGACCTGGTGCATTTTTCTTGAAAGATATTAATCT
+      CTATATTGCAATACAATATTGTAAGATAGTTCAAAGTCTTAGAATTGGTCTACATAACTC
+      CGTAATTGGAATTAAAGTAAAGGCACCGGACTACCTACCGACCACTCATTGTCAGCATTT
+      GGCACTAGCTTCTTGCCAAAGTTGGCCAAACGTTTATAGTTTTCTGCAATTTTTTGTTAC
+      TTTTTAGTTGAGTGGTATAGTTGTTGGCAGTAGGTGCGCTTGTCGACACGAGGCAGGTAT
+      TTCTGTGCCTACAGGCCTTCTACTTCAGACATGATTTCTGCAACAATTTAGATGACTAAA
+      ATCTGATGAAATTGGGAAATCGCCAGACATCATTTTCTAGAAACATTTTGATCGCAATGA
+      TCTTGGCAGAAAGTCTATTTCATTATCATCTCATCTTTAAAAGAACATAATTTTTAAGAA
+      AAATCTATTGGCAAACTGTGTTTCTACTTTACTTCACAGGAAGAGCAACGTTTGTGTGTG
+      TCTGTATTTTGTGCCAATATGTGGTTTGTAGCACAGCTATGCTACAAAGTTCAAATTCGT
+      TTTGAAGGGGGAGAGAGGAACCCCGTGATTTCTGTGTTGTCTTATTCTCATCATTAAACT
+      CTGAACATTCAGCAATTTTGATCCATAATACTCAAACTTCTAGTTTCATCATCCTCATCT
+      TTTCTGGTGCCCCATTATCCTCTGATTCTAAAATATAGTTTTCCACCGTGAACTTCTCAA
+      TTTTTTCCAAATTCTTTAGCGTACTTTACTTCTGAAAAACTGTGCAATCTCATTTTCAGA
+      ATCTCCACTCAATTTTCATTCATTCAGTATGCAAACTATAAATTCGAGGTAATTTAAGGA
+      TCAAAAAGTCCTTGGGAAGGTCAATAACCAATGACATGCCGCGAAGAAAAACGCAGGCAA
+      TCCTTGAAAAGAGACGATACGCAGACTATTAAACAGAGATGTTTGTCATCATTTCTGTCT
+      TCTCGTTCTTCTCTATTTCACTCTCCTCCATAAAAAGACTATTTATACCAAACTATTCTT
+      CACTTTGAGCATCACTTTCTGAAGACTAGACTTAATTTTTGCCTTCTTTAAAAATCTGAA
+      GTATTGAAGATTTTATTTCATTTAAATTTGATTTAGAACTCTTGTCTTTTACTATTCCAT
+      CAAAATATAAATTAAATTTTCAGACTTATTCGCCGATTCATTTCTCAAAGAAACATGACA
+      GTGCTTGCTGGAGTAAACAGTAAAATCGTCAAAAATGGTAAGGCTTCATTTCCAATTTTA
+      GTTCAAGAATTCCCAAAAAAAGATAAAAAATTAATCACAGAGTAATATAATAATTGTATG
+      TCCATATTGCATATGCAATACTATTTTTCTCAATCAATAACTTCAAGTGCAACTCTGATA
+      GGGTTGCAATTCTTTTTCAGGGAAAATATGGTATTTAATTTGCTCAAATTGTAAAGAAGT
+      GTTCCACTTTATCAAAAAATAGTTGGATATGATTAAAGTTCAAGTTTAATATTATTTATT
+      AGAAAATCCTATCTTAGTTTTGCGGAAATTTAATATTATTTTTTCAGGTGACCCAGCGCC
+      AGCTCCTCCGGCTGCTGGAACCATTCGTATCTACAACATGCGATTCTGCCCGTGGCTCAA
+      CGTGCTCTAATCTATGCATCTGTTAAAAATATACCGAGCGATGTTATCAATGTTCACTTG
+      CAAGAAAAACCCGACTGGTACTTCTCAAAACATTACAAAGGACAAGTTCCGACATTGGAG
+      CACGACGAAGGAAAGAAGCATGTTATTGAATCAGCTGTGATTCCAGAATATTTGGATGAT
+      ATCTATCCAGAGACTCGTATTCTTCCAACTGATCCTTACGAGAAGGTTCAGCAAAAGTTA
+      TTGTTGGATAGAATCAGTGGACAAGTGTCACCTGCATTCTACGGAGTCGTTCAAGCTGTC
+      AAAAATCCAGATCTCCGCGAAGAAAAATTTGCGGATATCAAGAAGGCGTATGATAATGCT
+      GAGCAGTTATTGACTGGAGATTTCTATTCAGGTATTTTTTAAGAATTAAGATTGAGAATG
+      CGAAACTTATAAAGAACAGAAACTAAGTTGTAGTTTTAGTCTTGTGTATTTTAAAATTAT
+      TTGAATTAATTTCAGGAACCTCAAAACCTGGATTTGTTGACTATCTTCTCTACCCGAACA
+      TTCAACGCGCTTACTGGGCAGCACACATAGTTCCAGATTTCCCATTGGAAGCTGAATCGT
+      TCCCAGGACCAAACTATCCAAGACTATCCAAATGGTACAAGGCTCTGGAATCGATTCCAG
+      AAGTTGCCGCTGCCAGTCAGCCAACAGAGAATGGGGTTGGATTCTTCAAGGATTACCTCG
+      GTGGATCTCCAAACTATGACTATGGATTGTAAACATTTGTTATTATATTTTTAAACTTTG
+      TGTTGTGGATGTGAATATGTGGAATTTAATAAAACATTTCTCGATATAATAATGATTTTG
+      TTGAATTAGAAAAATTAGAAAAGTGGACGATTCTAAAAACAAAAGTTACAACGAAAATCA
+      TCGAAGGAAAAAACAACTGAATTCCAAAATAGTTTTCAGAGGTGATCACAAAATGTTCTC
+      AAACGATATATATTCTACCATCAATAATTTTATTGGCACTATATCACAGTCCATAATTCC
+      TGTGCTTTAATTATACTTTTCAGTATAGAACAATATGCTATATTATCAAGTTATGCGTCC
+      AATAAACACAATTTATTTTTCAGACTGAATTTAAGCCATATTGAGAATAGCGAAATAAAA
+      ACGTAGAGGAAATTTGTGATCGCCATTCACAATTAATTCTTAGATCGCAATGATAACAAA
+      CTTCGATTCAAAAGTCATCATGCAAATTCACCGTTCTCGTGTGTGTGTGTTTTTGGAGGA
+      AATAACACAATTTTGTGACTGATTTTTTTACAACATGTGGTTTGTAGCATAGTTCAAAGT
+      CATTCTAGAGGGGGCTCAGAGGGAGTTCTTTCGCTATGTCATCGTTTGTTTTTGCACACC
+      AAGAAAAATGAAAATAAATGCTCTAGGATGTCATGGATCGTTTCCATTCTTAATAAGTAG
+      AAGCTAGGATTTCCTATACAAAAATAAGTAATCTTCGTTTCTACGTCTATCAACTTAAAT
+      TTTTGTATACAATCCACTTTGGTAATATTCAAGGCCTTCCTGTAAAATGTTTTATGATCA
+      ATCCGTTACACCAAGAAAACAAGTGCAATTTGTCATCATGTAGGCTTCCGCCTGTGTTTA
+      CTTCCTTCCCCCAGCACAACACTGACTATTTATACCAAATTAATAATGCAGCATTCCTCA
+      TGTGATAACTCGTTTGACTTTTATATCTTTCTACGTGCATCTTTCAAGCTCGAAAATTAA
+      TTTTAAAAATTTACATTGCAGAACAATTGCGGAACGAAGAAGCATGTCAGTCCTATCAGG
+      ATTAAACACTAAAGTTGTCAAAAATGGTACGTTATTGAGATTGTTTTTGTCTGACAACTG
+      AATTACCAAGATTCTTTCAGGTGATCCAGCTCCAGCTCCACCAGCTTCAGGAACCATTCG
+      TATCTACAATATGCGATATTGCCCATGGGCTCAACGTGCTCTAATCTTTGCGTCTCTCAA
+      AAAGATCCCGACCGAAGTAATCAACATCCATTTGGATCAAAAACCAGATTGGTTTTTCAC
+      GAAACATTACAAAGGACAAGTTCCAGCACTAGAGCACGACGAGGGAAAGAAAATTGTGAT
+      TGAGTCAGCTGTGATTCCAGAATATTTGGATGATATCTATCCAGAACCTCGTATTATTCC
+      AACTGACCATTACGAGAAAGTTCAGCAAAAGTTATTGTTGGATAGAATCAGTGGACAACT
+      TTCTTCTGCATTCTACGGTGTTGTTCAAGCGGCCAAAATTTCTGATCTTCTCAAGGAAAA
+      ACTAGTTGAACTTGCAAAAGCATATGATACAGCAGAGGAACTCTTGACTGGAGATTTTTA
+      TTCAGGCACGAAATATTTTAATATTGAAACTAATCAGTTTAATTAAAATTAATTTAATAT
+      TTAAATCTTCAGGAACATCAAAACCAGGATTTGTCGATTATCTCATATATCCGAACATTC
+      AACGTGCTTTCTGGACTTCCCATATCATCAAGGATTTCCCATTAAAAGTAGAATCGTTCC
+      CAGGACCAAACTATCCGAAACTATCTAAATGGTATAAAAGATTGGATTCAATTCCAGAAG
+      TTATTGCTACCAGCCAACCAACTGAAACAGCAGTTGAGTTCTTCAAAAGCTGGATTATTG
+      GAGCACCAAACTTTGACTATGGATTGTAAATATCTTCTCTTCGTTACAAATTGCATATGA
+      GTATTTGAATGTAAATAAACTTTTTGGAGAGATTGAAGCAGACTTTTTTGTTTATCCAGA
+      AAGTAAGACAAAGAGGGAGATGCTATTAGTGACAGATGATGGTTGAGAATAAGAGAAAAA
+      GGATGAAAACGACGAGATCTGATTGTTGTCTCTCGTCCAGTCACTTTCCATTTCATTTTT
+      ATAGAAATACTTCGGCGACGCGGATGCGTGTCTGCTTCTTGCAAACTATTCTTCCTCTTA
+      TAAAGTTGGCTTTGAGGATTTATTTTTTTTTGAAAATTGTTTTCTTTTAACTGACACCGG
+      AGTTTTATTTCTATAGGTTTTTATGAAAAGTTTTATCATGAAGTGAACTTGTCACCTCTG
+      TTCGGTTTCTTGTGTTTTTGATATTAAGTCTAACTGTCCCTTTTTATGAAATGTAATTTT
+      TTTAATTTTGTATAAGCTTCATACAGTACGGACGTTTTGAACATTTGATGAGTCAGAGTT
+      AAAGAGAAAACTGATAATTTTTTTTCCATCTTTCTCCTCACTTGTGAATAAACTAAACGC
+      ATTTCTGTGGACGTTCCAAGTGTAATATGAGAGTTGTTTTCATTTGGAAATGCGGGAATA
+      TATTGAATCTTCCATTAGATGTTCAGGAATATATAAATACGTTGTCTGCTCTGAAAATTC
+      ACACGGAAAATCTAAAAATTGTCAAATTATAGATTTCATTCTCAAATGACTATATAACAT
+      TTTATTTTTGCAATTTCTTTTCAATTAGGAAACATTTCAAAAAGCTACGTTGTTTTTCAC
+      ATTCAAAATGATTACTGTCGGTGCGTTCATTTTCCGAGTTTTTCCAATTTCACGCTTGCT
+      CTTCTTCGTAAAAAACTCGTAATTTAGAAATTGTGTCTAGATCAAAAAAAAAATTTTCTG
+      AGCAATCCTGAATCAGGCATGCTCTCTAAACAACTCTCAGATATCTGAGATATGGGAAGC
+      AAATTTTGAGACCTTACTAGTTATAAAAATCATTAAAAATCAACGCCGACAGTTTCTCAC
+      AGAAACTTAAACCGAAAAATCCCAACGAAGACTTCAGCTCTTTTTTCTTTGAAATTTGAG
+      ACAAAGGCCCGTTCTATTGTCTTTCCGACTCACATCGTTCATTAATAAATCGTTCTTTCT
+      TCTACTTCATTCATCAATTTCCTCTTGACCAGAGAGAGTCCCTACTCTTGAAGCTCCTCT
+      TCTTTACTCTTTTCTTACTTACGCACAAAAAGTCTCTCTATCACTGCGTCTCTCTATCCA
+      TCTCTTCTACATGTCACTTGTCGTCTCTGCGCCTCTATAACACGTAACAATCTCTACCTT
+      CAAGTTCTCTAGTCACCTGTCTTCGTCTATACCTTTTGCCACGAAAATTACTACGTAGAA
+      GCTGTCCTATTGTAAAGATGAAACAGTTTGAAGAGAAATTGGATGATTGTGATCTATTGG
+      TCTCAGAATTTGATGGATTTCTTTTCCATGTTTTCAATTTTAACGTCTATATTCTTACCT
+      AGGTACTCATAATTTTAACTTTGTTTATATTTTTATAAACTTATAAGTTACAATTTTTAA
+      ATCAGTTAACAACTTCCTATAATCAAATTGTATTCTATTTTTTTTGGCACAAACACATAT
+      AAATGTCCAAATATTTGCGCACGAGTCACCCCTCTCCACTCATTTGCCGCCCAATTTTGA
+      CGTTTTCTTCCTTGCACATTTTGACAGCATTTCTAATTTCAGGAAATTCTTCATATATCA
+      ATTGGTCAGTCACAATTATCCTCCTCATTCTTGGACTTTACGCCTGTCATCGATTTTTGA
+      ACATGAAGAAGTTGACTCGAGATGCACAGGAACCACTTTTACGTGAGTTTTTAAAGATTT
+      TTTTTTTGAAAATTGATGTCTTGCATTTTATTTAGCTCACTGGATAGTAGAAAAAATATT
+      TTTTTTATCTATTTGAAAATCAAATGTGTTAAAAAAATATTTTTGGAGAAAAATAACTGA
+      AAGCTCCTTTCTGAATTATTGTTTTATTATTAAACATTTGTTTTCTTCTAACTTTATGTT
+      TTTTAATGTTTTTTTTTTACTTTTTAAATCCTGAATTATTTTGTGAAAATTCAAACAGTT
+      TCATTTTTAAAATTTCAAACCCTGATAAAAAGTTCAATATTTTTCACTGAACTTTAATTT
+      TTTTAAAAGTTTATGAAAATTTCCTATGAAATTAAGTTCAGAAGTTTTTTAGCTCATATC
+      CGCCCCTCCACAAGGAATAAAATTCGAAAATATATTTATGGAACTATTTTTATTTTATCA
+      ATTTTTCTCCTTTATCGATCACTGAACAGTCCAGACACATCAAAACACGGAATTGGCAGA
+      AATGGAGATGTAAGTTTTGAGATTTATTGCAACAAATAATTTACAAAATAATTTCAGTTT
+      ATTGAATATGAGCCAAAAGCAGGACCCACGATAAAAGAGCCTGTAGAGAATATAGTTAAA
+      TTGGACGTTTATATGGAAGCACAGTGTCCGGATACATCTAGGTGAGCAGTTAGTAATTAA
+      ATTAATTTAATATTTGATTTATTTTAAGATTTTTCCGTCAACAACTTAAAAAAGCGTGGG
+      ATATTCTAGGAAGGCTAAATCGAATCGAATTGAATGTAATTCCATTTGGAAAAGCGAGGT
+      GTACAGAGAAAGGAAACGATTTCGAGTGAGTTTTTTTTGTTAATTGATTTTAAATCTGAT
+      CATAAAATATTGCAGATGTCAATGTCAGCATGGTCCGACAGAATGTCAGATTAATCAATT
+      AATGAATTGTGTCATTGATCGATTTGGGTTTCCACATAGATATTTGCCAGGTGTTTTGTG
+      TATGCAGGGAAAATATTCATTAGATGAGGCAATGAAATGTGTTACTGAGAATTATCCATC
+      TGAATATGAAAGGTATGTATTTTGTGCCGTAAATGCATAGTTAGACCAACGAATACTTTT
+      TAAAATCATACGAAATATATTTTCATATATTATCACTGAATATAATAGTTAATGAAAGAG
+      TAATGCTCATTTTTCAGTTCAACTTTATTTTTCAAAGAATATTGAATTTTAGAATGCGTG
+      AATGTGCATCAGGAACTCGAGGTCGCCGCCTTCTTGCTCTTTCCGGACAGAAAACTGCAT
+      CACTAACTCCAGCAATTGACTTCATTCCCTGGATTGTTATTAATGGTTCACGTAACTCGG
+      ATGCTCTTTATGATCTAACACAGAATGTCTGTGAAGCAATGCAACCAATGCCATCTGCAT
+      GCAAAGATTACTTACGTTCATTACAATAATCACATCTTTTACGGGTTGACTTTTCGTCTT
+      ATAGTTTTTTTTAAAATACAATTGGTGTCTATCTATGAGTGCCTTTCACAACTCGGCGGG
+      TCCTAAAATTGTTTATTATATTTATTTAAATTTTTGTTGTAGTTTGTGTTAGTGTGACTA
+      ACTTATTGTGTTAATTTTCTTAAAAAGAACGTTTTTTATTAAAATAAAAAGTTGCAAATT
+      GTAAAAGTTTGTGTTTATCACATTATGATATTTTGGGCAATTGTGAGGATCTATTAAAAA
+      TTTATAAATCTCTTTGACAGTGTGTGGGAAAAATAAGTTATTTTTAGCTTCTGATATTTT
+      CTAGGATTAACAGAAAAAACAGCAAATTTCAGGTATACCCGCTTGCCAGTTCGTGATCAA
+      CTCCAGTGTTTTCCAAAAAAACAAATCTACCCTTCCCCAGCTTCAGATGTTACAAACTCG
+      ATAAAATTTGTTTCAGAAACATCTCTTCAGTGTGACCACAAACTAGTCTTTCGCTTCCTT
+      TTAACAACAAAAAATGGAAAAAGAAGGAGGGATTTACAAGAGGCTACGACGATACGAATG
+      AATGAAAACGATTTGATGCAATCAGCTGCTGCTTCTGCATTTGCCATTCAATTTGTCACC
+      TTTCTGCCAAATTTACACGATCTGTTTTGAGTGTGGACTTTTTGAAAGTTTAAACCACTT
+      TTCGTCAATTTTTAAATGATGTTTTTACTTCAGTTTTTATTATTTTGTTTTGCAAAAAAT
+      ATTTCAGTATGCCTGCATTTTTTAAATATTTAAAGTTTGATTTTTTTTAACATCCAAGTA
+      GAAATGATAGCTCACCTACTCCAACTAAATTTTGACCAACAACTGTCACTTCTATATTTG
+      AAGACATAATTAACATAAATCTTGAATTTTTGAAGTAATTTTAATGTCTGAACATCTTGT
+      TTTGAATCTTGTTTTTTTGCCGAAAAATTTGAAGAAAAAAGAAACTGAAATATTGCAAAC
+      ATCGCCAGAATGCAGACGGTAGGGTTGAATAAGATAGAGGGCATTGAACCCTTTCTAATT
+      TTCTGTTTTGCAAATTATTTTACAGTAGGTCTGAACTTCACAGTTTCATGGTACGCCCAA
+      TTTTTAACTTCTTTTTTGAATTCAAATTTTCTAAACTACATTATCGATTTCCATGAAAAC
+      AGTTGCATTAACTTCCTCTGACCATTCCAAGAATTTCTGGCTTACCAACCGACATCACTC
+      TTGCCCCCTCGTCATTAAGCCGTAATTGATAGCGACAAAAAAAAAGAAAAGCCGGCTATT
+      TTAATCGAATCTTCTTCATTTGAGAATGGAGGGTGCTACTTGAATGGGTGACAATTGACT
+      CGTGAAATTCTTCTTTATCTTTTCTCCTTATTTTTCTCAGAATTTCTTCATCATCCACTT
+      TTTTGGAGTTTCAAATGTTAATTGCAATCTGTCTCATTTTGGTAGTCATTTGGAAAACAC
+      GGGGGAGGCGATAACAGGAAGCTTAAGGGATAGACATACACTTGCAATTGTCGAAAAAGC
+      GATATCTTTAACGATTATTACGATTCTTTCAGTGTGACGTAATCCTAATCAGTTTATTTT
+      TATTTTTTCTGAAAGCTTCTTTTACGAATTGCGCAATTAATAGTGTCAGTAGAAAAGGCA
+      TAATTTTTGAAGAATATGCCAAAATATGTAAACCCTCTCCGTTAATAGCAGTAGCTAGTG
+      ATCTAGACTATATGCAATACACACTAGTTGTCCAATTGAAACAGGTATCCACAATATTCA
+      CGATTTTTGAAGTGTGATGTATTAGATAATCCTATCATTTTTTCCTCATCGGCCAGTACT
+      TTTTTTGTTGTTATTTTTGCAATATCCTCCGCTTTTTATTGTTTTCCTATTCACACCTGT
+      ATTTGATTCTGGTTTCCCAAAAAGAACAGGCATAGTTTTTGCGTTGGGAACTGGTTTTAT
+      TTCAGCATATCTTCTCATTTCTCAACCAGAATTAGAAACATTTTTAGAACAATCACATTT
+      ATAGCCTAAATTTTTACTAAAAATATCTGAAAAACATGATATACACTTTGTAGAATTTTT
+      GAAAATAATATCCGCCTATCCATGATTTAACCTTATTATTCGAAATCTGTGAGATTCCTC
+      AAAGTAGAAACATAAAATTTCAGGCACAACACAAAAGTCGGAACTCAATTAAAATCGAAT
+      ACCCTGTTTGAGATGGCGTTTCTGGCTCGTAAAACGTCTTCTCTCCTACCAGCCACCACA
+      TCCTCTACAGTCAAGCATATGATCTACGATGAACCACATTTTGCAATGCAGAACAGTTTG
+      GCAAAACTTATCAAAGAGAAAATAAACCCAAATGTTGCACAATGGGAAAAGAGTGGAAGA
+      TATCCAGCACATTTTGTGTTCAAAATGCTTGGACAACTTGGAGTATTTGCGGTGAATAAG
+      CCTGTAGGTGAGGATACTTATTTTAAAGAAAAAATTTTGGAAGTTGAAAATTATTGAAGA
+      CTATGGTGGGACTGGTCGAGATTTTGCAATGTCAATAGCAATAGCTGAACAAATTGGAGC
+      AGTTGATTGTGGATCGATTCCAATGTCAGTCATGGTTCAAAGTGACATGAGTACTCCTGC
+      TCTTGCACAATTTGGTGAGTTCTATAAAACTTATACTGTAACTTAATTGATATATCAGGC
+      TCCGATTCACTCCGCAATCGCTTTCTTCGTCCTTCAATCAATGGTGATCTAGTTAGTTCA
+      ATTGCAGTCTCCGAACCACATGCAGGATCAGATGTATCCGCAATTCGCACACATGCCCGT
+      CGGTACGGCAGCGACTTGATAATAAATGGCTCAAAAATGTGGATAACAAATGGAGATCAG
+      GCAGATTGGGCATGTGTTCTAGTAAATACTTCAAATGCGAAAAATTTGCACAAAAATAAG
+      TCGCTGGTGTGTATTCCACTGGACTCAATTGGTGTACATCGATCAACTCCGTTGGATAAA
+      TTAGGAATGAGAAGCTCCGATACAGTTCAACTATTTTTTGAAGATGTTAGGGTGAGTTTC
+      TTAAAATGATCTACGGCCCCTTTAACCAATTTTAATAAATAATTCAATGTTCATTTCAAT
+      CGAATCATTTTTCAGGTTCCCTCGTCATACATAATAGGCGAAGAAGGACGTGGATTTGCA
+      TATCAAATGAATCAATTCAATGATGAGCGCCTTGTAACAGTTGCTGTTGGGCTTCTCCCA
+      CTTCAAAAATGTATAAATGAGACGATTGAGTATGCAAGAGAACGATTAATATTTGGAAAG
+      ACACTTCTCGATCAGCAATATGTTCAATTTCGGTTAGCCGAGTTGGAGGCTGAACTGGAG
+      GCAACCCGTTCTTTGCTCTATCGAACAGTGCTGGCACGTTGCCAAGGCGAGGATGTGAGC
+      ATGTTGACTGCGATGGCGAAATTGAAAATTGGAAGACTGGCAAGAAAAGTTACTGATCAG
+      TGTCTACAGGTGAGGCGTTTTTGTTCTAAAATATACAAAAAATTCTCAAAATATGTATAT
+      AAATCACTTGTAATATTCTCCATATTAGACTTGAATATTCCTTGCTCTTCTTTGTCAGAT
+      TATATCTCGGTTGTATTTGTTTTTATGAAAACAAAATTGCCAACTAACAAAATTTGTGCA
+      AAATAATTTGCTTTATTTTGGATGTTGAACTTTTTTTGATGAAATTAAGACAACCGAGAT
+      ATAAACAGTCAAAGTATAGCAATGCAAGGATAATTCGGTATATGTTTTTGTGATCCCTCC
+      AGTGGCAGTTTTTCATAACTTGATGGTTTTTTTATAGAAATGAATTGGAATAACGCTAAA
+      GCTTCATTATTAATATTCTCTTAATTTCAGATCTGGGGAGGTGCTGGATATCTGAATGAC
+      AATGGAATATCGAGAGCCTTCAGAGATTTCCGTATATTTTCGATTGGCGCTGGTTGCGAT
+      GAAGTTATGATGCAGATTATTCATAAAACACAGTCCAAAAGGCAACAGAAAAGAATTTGA
+      GAACATTTTTAAATGTTATATTTGTAAATACGAAAATAAAATGCAATTGTACTGAAAACG
+      ATAAAAATAAAACAGCGAAAAAGTCATATTGTATAGAATTTGGCACGTATATCTACAACC
+      AGTTTCTAGTGACCCAGGTATCTTGAAGTAAGTATTCAATGAATCAATTCAAGTTATTAT
+      ATTTATATTTGTCCGCATCGGAAGGAAAGCGCAAAGAAGTTTCTCTCTCCGCCTCATCAA
+      ATTTTTTGTGTTTGCATTTCAAAAATGACTGCAATGAAACGCGAATTACTGCGAGTAAGT
+      AAAGTTAGTTTTGATAGAAACTACTGTATGAGAAAACCGGTTGAAAAGTAAAGATGAGCA
+      GCAGTATTTCATGGAAAAAAGAGGGAGACAACAAGAGACGGAGTATATAAGGTGTCATGG
+      ATGCTCCGAGAGTGTTTACTTCTTTGTTTCAATTTTCACACTTTTCATTCTTTTCATTCT
+      TTTTGTTTTTCACAATTATTTAGCAGATCGGTAACTTTTTGCTTTGATAATTTCATAGAT
+      ACTTTCGAATCGAAATTAATTTTCAAATTAGCCTACAGTAATTTTGCTCTCATCTCTGAG
+      TTCTAGATCATGTTTCAATTTACCGAAAGTGTTTACACAAGTTACCAAGAAAACAAAAAA
+      TTCAAGTTTCCGAAAATTATCAAATGTTTATCAAAAAGGTCCTATGATGTTTAAAACAAT
+      TTTTCAAACTTCCAGAAAAATTTTAACTTACTGTTTCTTGAGCGTTTACAGTAACTCCGG
+      TGTTTCCAGTAGGCATAGCTTACCTTGAAAGCAGGCAGGCGAAAATTTCTCTAGACCAAC
+      CAGAATAACTTACTTTATTGCTAAGTTGAATCAAACAATTTTGTAAAAAAAACGAATTTT
+      GGAATCATGATCCCTATTCAAGCTTCTAGTTGCTGGTCAGCTAGGTTTTGGGTTTTTTTT
+      TGGAAAAATATTCAAAAAACATTTATATAATAGTTAGAATTAACATTTTTTGATAAAACC
+      TCGACATTTTTGTTTTGTCTGAAAAAATAGGAAAATCTTACGTTTTTCGAAAAAACCCGT
+      GCTCGTGAAAAGTATGTCCTCTGAGAGAAGTAATGTTTCATCTGACCAGTTGCAACTTTC
+      TGTGTGCACATTCTTTTGATAAAATGGTATCACAGATCTATTCTAAAAGCCAACATCTAA
+      ATTCTTTGCTCTATCTTTATCAGTTGATACGGATCTTCTCATCTCATTCGCCCACAATCT
+      TCCCATACTAATTCATCAAACCCACTTGTAAATATACGCGCGGTTGATCAAAATTTGTGT
+      GTGTTATGGCACATTGTGCAAATAGTTTTACCACACTTACATACTTCAACTCAAACCTTT
+      GAGGAGTTTGACAGAGAGATGGAAAGATAGTCTGCAAACGGCAGATTTTTGAAGTTTCAC
+      CGCTGTCCATCTAATTTTAGGTATTTTTCGGAATCTTTTGCAGGACGTTATCATCTATCT
+      TTCCCGTTATCAATTAGTCATAATTATCCAATTAGTGGCAGTTGTTAGAAGAAATAGGTA
+      ATATGCATAATAGTGTCATTTGCCATTGGCCACCTCCACCAAACTTTCGATTATGCCGTT
+      TTCCGTTTTCTGTGTGTTTCTTCGTCCTTCCTCATCATTTCTCATTCGCTTTTTTTTCTT
+      CCCATCTTTTCCAACATGTCGCACTAAGAGTGACCAAAAAACCTTTCAAATTTTGCGTGT
+      TCTTTCGGTCTTTCCGGAAGGGACAAAAATCAAAACGACACTGGAATTATGAACTCATCC
+      ATTTTCCACTTTAAAAGTTGAAAAAAGTAAACAGCGGGGTTATTGTGGTTTGATCTCTTT
+      TAAAAATCAGTTAAATATAGGAGTCAAGACCTCAATGAGCACTCTTCAAGATATGGTTCT
+      ACTAGACTCAACTTGAAGATTTTCAAGAGTTCTGGAGACTTTTTCAAGGCTACTGCTTTC
+      AAGCTTCAGAATTTTAAACATTTTGGAAATAATCTTAAACTGGAGTTCAATAGCCAATTG
+      AGCAATTTGTTATAACGTTTTTTTCTTAATTTTTTAAATTAGAATCAGTGTAAATTTATA
+      AGTTTCAAAATTAGTTTTGCACTTATCTTTGGGCGTTACTGAATTTTTTACGTGGTGAAC
+      CTTGAGAAAAAATTCTAAGGCTTCTAATTGAGAAAACTAATTTAAATTCCGCTCCCAGGA
+      GTTACCAATTTTAATACGTTTCCAAAAAATTAAATATTCTTCGAATCTCATTTTTAAAGT
+      TTCCATTTGGCACAAACCACAATAATTTAAGTAAGACGTTTGATCTATGCCGTAGTTTGT
+      GTACTTCAACGTTTATCCTTAAGTACCTAGGCCCGTGTTTTTACAGCTCTGCTCTTTATC
+      GGTACATACTGTTCTCTGTCTTTATTGATAGAAATTTTGAAAAATGCAACAATATGGTAT
+      CTATCAGGTCGTCCCATAAGTTTTTGTACTTTTTTAAAACTTTTTGAACAAGTTCTAAAC
+      TGACAGAACAAAATCGAATCTTTTATAAATGCGCATGTATAGTATGTACTACTTGTCAAA
+      ATTTTTATGCGTTATTTCAATATCCTCCTGATAACAATCACGGAAACCAGAGCCACAAAT
+      AGCGACATACCCAAATAATGGGAGGTGTTTTCCTTCGTCCTGCTATTCACAGGGAATTTA
+      TCAATGAACATGAAAACATAGTATTAGTAAAGATAATGATTCAAAATACATGTTCAGTAT
+      GGTTAAAATTATCATTAGCACTTATTAGCCGTTTTGGACGTGGACTATTTGGCTCATGTT
+      TATCAAGCACTGAGTGAACATCTTCATGGAATAATTTCTCACTAAAAGTGATGGGATTAT
+      TTTGATTGTTGTTTCTAATTTTATATAACAATACTTGCATAGTACAAATACAAACTTCGT
+      TTTACTTGCTGATTTCTCAATCATAAATTAGAAGCCCAACACTATAAATGTCGGGTATCA
+      CATGAGGTTGGCCATGTAGATTGTTTGAACGAAGAGGCCACCAGTAAAGTTTGTTAATTT
+      ATTTATGATACATATATCCACTTCTAAATAACACTAGACTTAATTATCTATCTTTCATTC
+      CGAGGACTAAATGGACCAATATATGCTTCAATCACTCCTATAGGCAATTGTTAAAAGTAC
+      AAAATAGTGTGGTTACAATGTTCTCAATTATAACATCTCCCCATGACTGAAAAAATTAAA
+      TTTTTTTAAAATTTTGACTGCACATGATGTGCACTTATCGTAAACATACACGATGCACCC
+      GTTCCATTCCCAGCGGCTTCACAGGAATCAAAAACTCGGGCGCCATATTTAATTGGCCTC
+      AACAATTGTGTTTAGCTACAGTAGTTTTTCCGGAATAGTTATACTAAATTTAAAATTATT
+      TAAAACAAGAGTGTGGAAGCATCTACTTGACAGTATATATTAACCATTACTTTAAGCTCT
+      GGGTGGTGTAGAACAAACTCCAGAAGGAATGGTGTAAAAAGCTGATTCTATAGTTACTCG
+      TTTTTCTAAACAACCGCGGGGGCCTGGGATGCCAGAGTTATGTTGCAATAAGGTGACAAG
+      TTGGTGACATGCTACCACTAATATAAAATCTTAGAATTGTCCGAAAAAGTTTTGGGAATA
+      ATTCGAAAAAAAGTACAAAAACTTATGGGACGACCTGATAGGATATATGTTAAAAACTAT
+      TTTTGAAAAAATATTTTATTTTGAACAATGAAATAAGGTTCCTGCCTCAAGGTTTCTTTT
+      TGACGCGAACTCCGATGACATTTTAATTATCAAACGGTCTAAGTGAAAATTTATTGGACA
+      ACTCTTTAGTTGAAGTGCACTTTAGGAGCAGGCATACATGAAGGCGTGAGGCAGGCGTAG
+      GTCGCTTACGAGGCAGACAATTTTTAAAAAAATCACCATCCTTTTGTACTAATAAACACT
+      CTCTAAAAGTTTGCAATGTTGTCTCCCAACACGAAAAGTTCAATCAACTTCTGCACTCAA
+      TTTTTTTGCAAGATGACCCATTTGATTCAAGGGGGTTACCAGTAGACTTACCTGCAAAAA
+      AACAGTATTCGTGCATAAATCCATCAAAATGAAGTGTGCGTCTTCTTCTTAGTTTCCGTC
+      TCCCGTTGTTTCTTAATGTATACAGAAGATGTACGGGGCAGCAGCAGCAGAAAAAAGATT
+      TGCGTACACCAAACACATCAAAACGATATGCGTGAAATGAGCGAATCGTCCGCATTCTCC
+      CCTTTTTTCTTTCAATTTTCAAGGAGAGAGAAAACTCTGTGAGACAGTGAAGAAGTGGGG
+      TTTTGACTGGAAAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAACTGATTCTTA
+      TCTGAGTTCCGACGACGATTCCACCGTTTTTTGGTCTGGTCTTCTTTCCTCCGCTTCTTC
+      TTCTTCTACTTCTCTTTTCACGTCTTTCTCATATTTGGTTGTTTTTCAAGTTTTGAACTC
+      TTTCTACTACATACTTTTCACATGTACCTTTAAAAAACTCATAATTCATTTTCCAATGTG
+      TTGAAAACTACTGTAACTGCTTAAAAGTCAGAAACAGTAACGAAACTATTTTCATGATAA
+      AATCAAAAATTGTTTCGATTCGAAAATGTTTTTATATACTCGACATGTGTGTACATGTGT
+      AAACCAGTCGTTTCAAAAATTTTACAAAAAAATGTAAAGAAACTGTTCAGTGATCAGTAT
+      GCTCCAGCTTCTTAGTTTCTTAGTTTCTAGGACTTCACACACTGCCTGCCTTCAAACTAC
+      CGCCTATTAACATTTATTCCGGTCGCTCTTTTGTATTTATTGAGGAAATCAACTACTGTA
+      GTTTTTTAAAAATTAATTTATTGATTTGGCAATTTTTCTTTTTTTTTCAAGATTCAAAAA
+      TAAGAAATTGTATTTTACTCACCATTATTCAAAAAACTTGATGAAATGTTTAAATTTTAT
+      GGTAAATGATCAAAACTAATTTTTTGAAAAATCGACGCCTTCTCTTTTGTTTCTGAATAT
+      AGGCACTCCTAGCCGTGCAGGCAGGCATTTCTGTGCCTACACGAAAGCCCTAGAATACAT
+      ACTTTCAAAATGTTCACTGTTTGTTCTTTTTCCCTGAACATAGAACATCCGGAAATTGCT
+      TACATTGGAATTTCATTCAAGTACATTTGTAAAACTTACAAACACTTGAGAAAACAAAAC
+      AAATCTGAAAACGTGTTGTCTGAATGCATTGGGGAGACAAGTTGGGTTCTTCCTTCCATT
+      TACCTTGTTGTTGTTTTTTTTTCTGTTCAAAAATTTATTCGTTCTCTGGGTCAAATTGTT
+      CAATTATAGTTGTTGTTTGCATTCGGTTGTTCCATTTTTCACAGTTTTCTCTCTTGCAAA
+      AATTTTATAAAAGACTACAAGTAAATATAAAACCATGAACTCACATTTGGCACGGGCTAG
+      AATTCTGACTTCTGCCCTTCATTTTCTGTTGAAAATTGTCAAATTCTTTTCAAGGTCTTT
+      TATTTATTTTGTTCCTCGTGGAACATAACATTTTTGATATCTTTCAATATCAAACTTTAG
+      TCTACTCATAAACTGTATGTACCTACCTACAGTAACCTCTCAGAAACATTATGATCCTAA
+      GTATTTCTGATATTTGCTCCGAGGTAAAAGTAGATCCTACTTAATTTATAAATTAAGTGC
+      GTCAGTATCTATGTTGATTTAGTTTTTGTGATCTTCCAACTAATCGTATCTATCAGAACA
+      TCAGTAGCAAAAATCTAAAATCATGAGAGAGCATTTTCTAAAAAACACATAATTTTCAGA
+      AGGCTTCATGGCGAAGGCGCATGCCGAATTTTTGCTTTTTGACTCGACATTATACAATTT
+      TCCTTTAAAAAAATTAAGTTTTATTATGATATTTGGTGATTTAAAAATTATAAGATTAAT
+      TTTTAATCATTGCTCTACGGGCCGCACTACGCCTTTAAGCCTCAGTTCCAATATAATCAG
+      TTTGAATATTTAGTCACCGTTTTGACATTTACAGCTTAATCCAATTACTTTCAACTCAAT
+      TTATCAAACTGTCAAAATGCCCACCTAATTGTTCTTTGCCAGACTTTTGAAACCAAAAGC
+      CACACCCTCAATAGGTCATCTAAAAGTAGCTGTGGTCTCTCCAATGACCTCTATTGTAAA
+      TTGGGCGGCAATTCTTCTTCATCATCAATTTCAGATGCATCACAAGCATCAAAAGCATCA
+      TATATGAAGAACAGTAGAAAGAGGAAACTCATACTTTTCTAACCATTTTTTCTGATATAG
+      ACACTGGCATAGACCAATTCAAAAGTGTTGTAATATTGTACCGGCTACTCTCTTCTTTGT
+      GTTCTGCATTTTTCTGTCATTTTTCTTGAAGAAAAAGGCTTCTGACCTCTCCTCCAATGT
+      CACCTTTTTAGTGTTTCACAATAGGATTATTCCCATGCTTTTCACACAGTCTCTGAATTT
+      TTTGTTGTTCGAACCAAAAAAAAAAGAGAGTGTTTAGCTTTCTGTTTAGTTGGTTTTCGA
+      CTTTTTTATTGACTTTTTTATTAATATTTTCGTCATCTCATCAATCATACATAATTAAGC
+      GGACAAGGCATTAGGACCGGTGGGTGAGATGTGTTGTTGCGTTAGACGAATTCTAGATTA
+      CGTGATTTTAGTCGAGATTGTGTCGTCTGTGAATATCGAAATAGCGATTTAATTAGTCAT
+      GGGAAAAGTTGTCGTGGTAGAGATTCAAGTGACCTTTCTTTGCCGTTTTTATGGCTTAAT
+      CTATAAAAGGCTTAAAGGCTTGTAGTCTTCTAACCGGACGATCTATTACATAATATAATG
+      AAGGGGATATTTAACTAATTTTTTTAAAATTAAAATCCATAAGACTAGCTGAAATTTACC
+      TTAGGAATGCTATAGAAATATCAAATTTCGTCAGTGTTCTATTTTTTCTGATCACCAAAA
+      ATTATCAGTCGACAAAAAAAGTATATTCCATTTTTTATGCATCTACTGTGCCATGGGCAA
+      TGGATTTAATTTATATTCTTGGTGAGCCCTCCCATGTAGGCTCAAAACTGCCAAAAATGT
+      GCAGCAGACAGAAGCTCAGAGAGGCAGGCAGGTATGAAACAGACAAATATACCTAGATAC
+      GTGCCTGCCTACGTTTTCAAAATGCATCGATTTTGTCTTGGAAAAGCTGCTAAAATCTTA
+      CAGCTCCTGTGCCCTTCACATGCGCTGGAGTTCAACTCAAAATGGCAAAAATGTGTGGTA
+      GGCTAATTTTTTGCCCGCCTACGTTGTTGCATTGAGACAACTTCTGAACGGATTTCGTGA
+      ACAAATTTTCTTTCCAGATTAATTTTTTAAAACTTTGCTCCCAAAATTGCATCATTATCG
+      GTAAAGTTCGGGTATCCCCTCTTCATCTCTTAGACTTACTGTGTAACAATTTGCATAAAT
+      ATACACTGGTCTCTCTCAATCTTCTTTCATTTGATCACTTATACTTTTACACTTGGCACG
+      CACACATAGGCGAACCGCCAGGCTTCTCCTTTGCCCTTCTGGCGGCCAACCCCCCCCCCC
+      CCCCCCCCTCCCCCCGTTTTTTAAGTTTTCTTGTCATCTTGAAATGGTCGAAAAAATTAG
+      TGTTGTATTCTTGCACTGACGATGATGATGATGAAGTGGAAGAAAGAACCTGTTCTCTCG
+      GCCCACAAAAAATTCCAGAGGTGTTGGTCAATTTGACCTTCTAGACCATCACATTCCAGC
+      GCGGCTTCATAAAGAAATCGTCTCGAGAAAATTGAATTCAAGAATAGAAAAAATTATATT
+      TCAAATGAACTCGATTTTTGAATGCTGAATTCTTCTTCCTCACAATGACAGTCTCATTGA
+      ATTATAACTCAATTTTTAGAATTTTTGATTCCGAGTGAACCACATTTCAGAAAGGAAAAA
+      AGTTTTGATGATGTGTGTTCAATGTATAGGTTACTGCCACACCACACTACCAATCAACCG
+      GTTCTAAAAAGACAAGCTTCTCTGGAATTCAGTTTCAGAACACTGGGGCGGGGCTTCAGG
+      TTATGTTGGGAGTGGGAACCAAGAAGACACACGACTTCTTATTACTTTTTTAGTTTTACA
+      AAAATTTCATTTTCTTTGAAACTGTTTTACTCAACTCAAAAAAACGAGGCCAGGAGAGAG
+      TTCAATATTTATCGACTTTTATAAATTTTATATAATTTCACCATAAGTTTTATACTATTT
+      TATGCAAGTACATAGAGTGTGCTCTTGGTAAAGCGTCTAATTTTTCCGAAAGTTCACTCT
+      CTTTAATATTTTGGAATTTTATTTGTTTTAAAAGTTCGTTTTGAGGATAAACCTTGAAGA
+      TTGGAAAACGGTAATCTTTGCAGCGATTTTTATAACTGGAAACATGCTATCAGTAACTTA
+      ATTTTATTTGTTCTATAAAGTTAAAAAAACGTGAGTATAAAAATTTCAAAAAAATCCAAA
+      AAACAGACTCCGCCCAAATCTTGGCTTCTTTAAATTTTTGTATAAAATGTTCGTGTGCTT
+      ATAATTCAACATTTTGACTAAAATGGAATACTGTAAGAAAATACCCCCGAACTTGATATC
+      AACCCATTATAAACTCTTGTGATTTTATAATTTACAAAAAATGATAGTTGACAATTATTT
+      GAAAATTATGGAACCAATGTCCTAATCTACAGCCACATACAGTTATTCTTACAATTATAT
+      CAAAAAATCATATTCCCGTTCCGTTTTCCCTCTTTCAAAAGTCGTTCATATCTGCTTATG
+      GCACCTGTTGCGCCACCCCACGCCACCCAGATACAAGATTCTGAAAAAAAAACGAGTGAG
+      AGTAAATGTAAGACGAAGAGATTTAGAGAAAGTGTCAAAATCCCACTGGTTTTTTATTCG
+      ACATCCCCCTTTTTCTCACTCACTCGTTCGTCTCGCCATCGCCGTCGCAAAAAGTATCAT
+      AAAGTTGCCCTCACTGAGAGAACGACTTGCCTTCCGCTGAGGATAGATGGTTCCCGCGCG
+      GAAGGAGAAGGAATCCCCTCTCTTTCTCCGACACTTTCACCGCTTCTCATATGATGCCAT
+      TCTCGGGAAATACATTCGTAATTGGATCATCTGGGACTGGCAAAAATGAGAAAGAGAAGG
+      GGGAACGTTTTCTTTTCAACTGGAAAATAGTTACTGACCTTGAGCCAACTGGAAGCAGAA
+      AATGTTTAATTAAGGGGAAGAAACTAAATAAGGAAAATTAGTGTAACTACATTTACTTTT
+      ATGGTAGGAGATCACTTTGTAACTTACTAAGTTGATTTTGCTAAAAATAAAACTGATGGC
+      AGACCATTATCATTAAGTTTTGTAAAGCATCAATTTGAACAGCAAAGTTGAAGCATAAGC
+      CTATGCGGAAGCTTGAATCTAAGTCTATGCCTAAGCCTAAGACTAAGCCTACGCATAATT
+      GTAAGCACCTTTTTTTCACATTTTGGAACATTTGGAAATTTACCAAAAACTTCAATCGAA
+      ATCTTTGAAAACTCAAAGAAGTTGTGTTCTATTATGTTATTCGATCATTTTGGACTAATT
+      TAAATATATTCAGACAAAACCCCTCAGTGACTTTTGAATAGGTCCAAACATTTTTTAATT
+      TTACAGAAACGAATCCATCAATATTTTGTTATTTTGCTTTCATAGCCGAGGGCACAAGTG
+      TCTTTAGATGAGATGACATCCTCAATTCTAGACACGGATTCTAGACATGGGCTGTTGCAA
+      TTAGCATAGTTCTTGGTACTTTACGCATGTCACATTGAAAACTAAAGTGAGAATAGTTTC
+      CCGTTCCAATTAATACGTCGCGCCCTCTGTTTCTCCCAAAACGATACCAATGGCATCACG
+      ATTGAAAGCGGCAAAATATAGAGAGATTGAGAGAGAAACGGTTTCTATATACAGTGTAAA
+      TTTGCCCCCTGTTGCCATCATCATCACATAGACATACACACTCCATAATAGCTAAGGTTA
+      GCTTCCTGTCGCCCCCATCTCGGCAATAAAAATCGCTTTGATTTCAATCTCATTTGTTTT
+      TTAACCACCAGTTTGCTTATTTTTACTTTATAAATTGAGTTCTCTTGTTTCCAAATTTTT
+      TTGCAAATTACATACGTTGAGGACATTTAGTCATAATAACCATTCTTTGAATTTTTTAAA
+      AGTTTAGTGGCTAAAAGTCTTTCGTGCTTTTTTTCTAGACATTTTTGAAAAACAGCAACA
+      ACTTTCGAAAGTTTAATCAAATACTTTTAAATTGTAAAATGCCTATTTGTTTATGTATTT
+      CTGTTATGAACAATAGAGTTCCAGTTTGTGTATCTATGATCTTTGATGACTACTAAACAG
+      ACTTTGTTTTGATAGTTCCCGGTTTTTAGCAGTTTCAATTTGAGAAAGAGTCTAGGTATT
+      TCCTTATCACATTTGATAAAGTAGCTAATGGTGGCCGTGAACTGTGATTACTCAATGATG
+      ATGTCGAGGATTTTAGGGATTTGAACGGTTTTTCCAAATTTTAGTTAATTCAGTTTTTAT
+      AGCTGAGCTAACTATTATAACTTAGCACTAATTCCATTTATTTTGACTCTCCAATTGTCA
+      TCTGGCACAGTTAAGTATTTCTTAAAGTTTTCGGTTTCTACATTGCCTATAATTTGCTCA
+      ATTTTTGACCATGCCGATATTTGATACTTTTCACCTTACTTATGTAAAAAGTCAACTACC
+      TTTTTTTGCCGCATGGCTTTATTAGGCAAATAGCAAAAGTCCCCCTGAATCTTTTTGAAA
+      ACTCTTATTTTTAGTCCCTTGAACATTTAATTGAACTGCAAAATTGATGCTCGTTAATCT
+      ACATATATCCAGTAAATAGGCCGTAGGCAGGCAGGTGCAATATTTGATTATTTGTACCCA
+      TCTAACATGCTGGTCTGCTTCTGCAGCCACTAGATCTAGTTTTCATTGTTCAGCAAAAAA
+      TCCTAGGCAAGATTTACTAAAGTTGTTGCCCTTCCTGACCTAAATTACATGAATTTCGTT
+      ACTCTAAAATCTTTCAACGACTTTTGTAACGCATTAATCTGTTTTTTTTCACTTCCTTGA
+      TTATCTAATAGTTCTTCTTTTTCATCTTCAACATCCTTTTTCCCCTTTACTCTTTCCCTA
+      CATCTTACCATCTCATCAACAATCTTTCATAAATCTCTCTTTTAACTTTCGATTCTTTCA
+      GTATTCGGATTCATCTCAACATGCGTGTCACCCTTCATTACTTCCTTTTTATTACCATTT
+      CTGTTTAGTAGTTGAACATTTGTACATGTGTGTTCTTCCTGTCTACAGTTTTGAATGGGG
+      GCTCGAAACCTTTGATGATTCACGGTATTTTTTCTCTTGTTCTCAAGTTTTAGAAATATA
+      GAATAACACCACATATTTCTGATAAGATCATTATATATAGTTATGATGTATGCTCTAACC
+      AAATAAACCTTATCACTATGTTGTTTTTCCAAGTTTTCGGAAACCACAGCGTTCACTTCT
+      ATTTCCTCCAGCTGTATCATCAACCGTTACCGCAATGAACATTGCGCAACACTCATTTTC
+      CATCGGTTTCCTAGTTCTTCTTCATCTTCTGTCTCTCTTATATTTTATAGAAACATTGAA
+      AGAGACTAGGAGATATAGAGAGAAAAAGAGACAATTGGCATTCGTTGAAAGAGAGAAGGC
+      CTGTTGCGCAGAACCCTCTCTCTTGCATCCCTGCTTTTCCATCAGCTTCTAGCATCAGTA
+      ATCGATCCGATTAGGTTAAGCTCATTTTAAGGATCATCTCACTCTTAGTCTTTAAATTTC
+      ATTGAATACTGTAGATAGAACTCTAGACTTTGAACTGACGCTTTTCTCCAGGCTTTTAGG
+      ATTTTGGGCAGAGCCGATAGTCATGGTTTGTGACAACTGCCGGTATAAATTTCACGTTTT
+      TTCTGAAGAAAAAAAGCGTCCAGAATACCTCTGAGATTCAAATATCTCACATTAAATTTA
+      AAAAAAAACTATTTAAAAATTTTCGCGGCGTTCGACAACTTCGGCAATTGCCGGTGTCCG
+      AAAATATTTGCCTACGGCAACTAGGTTGCTGGTAGCCAAAAACATTTGGCGCAGTTCGGT
+      TGCAAGATTTTTAGAAGACAAATTGATATTTTGAGGAAAAAATACGACTCCTAAGGTAAT
+      TAAAAAATTATAGGCTCCAAAAATTTTGGATTGAAAAAAAGAATTAGAAAAATGTAATCA
+      CACTTCGAGCAAAATTTGAAAATTACAATTTCGAAATCCGATAGATTTCCACTATAAACG
+      ATTTCTAATACTTGTCTGATTTTTTTGAACCCAACCAGAACAAAAAACTTGAACCAGAAC
+      AAACCAGAACAAAAAAAAACAAAAGACCTAAAACCCACAGCAAAGGGCAAAAACTACAAA
+      AGGCGGAAACCATTTTTTCTAGACATTTTTCTTTATAGGACGTCAAAAAATGTAAGAAAA
+      GACATAGGGTAGAGTGAGAGAATAAGAAATGGATGCAAAAACCGCAAAAACTGTTTATAT
+      CTGACTATCGCTCACCCAAAATGCAATGTATCTGTCGTCTCTTCGTCTCTTTCTCTTTCA
+      TTTTCCCTCTCTCTCTCTCTCTTCCTTGATAATATCTTGTGAAGATAAATGAATGAATTG
+      GATGAGACGTTTGCTTTTTGGGTTCCCCTATTTCCTGAAAATCAGGAGGTTATTGGGTTA
+      CTGGATAACTTCCTTGTGTCCTTTTTGTCGGTGTCCTAGAATACTGTAGACATTTTCAAG
+      GTTATGATGTACCCATCTTTTTTCCCCATTTTGCATGTTTTACCTCTCCCTTTCATCTTT
+      ACTATCTTGAGGGTCAATCTGAACACTCTTTTTTTATTTTGGTTTGATGTCGTGTACTAT
+      TACCGCTTTCATCTTTCTTGCATGTGTAAGACTTTCAGTTAGCCTATAAACTAGAAACGG
+      CAGCCGACACGTCCCAGGTTGTTATAGAACCCGTGATTTGTCGGCTGCTCATCCCTTTGT
+      AGTTTGCATTTCGAGTAATTAATTAAAAATAAATGAACTTTTGCTAGCGATGTTTCCAAA
+      TTTGTTTTTTTTATTTGTTTCAAAAATTTGCCCTGTCTACCAAAAAAGTTTTAAAAGCTC
+      CGATGACTTTAGTTTGGAATTTTTCTAATTCAAATTCCAAGGCTATCCATACCTTTTTTT
+      ACTAAAATTCCCATGTTTTTTTGGGTGTTTTTACCCGAGAGACAAAACGGCGACAAATTT
+      CTCGAATTCCCCGCTCCTCTCCTAATGAGTTATGTGGTCGTTTGGAAAAAGGAAAACCGT
+      TTGGCCAGCAGTGGCCACTGCTAAAAATGAAAAAAGACGATGATGGGTGATGATC
+    </residues>
+  </seq>
+  <map_position seq="L16622" type="dna">
+    <span>
+      <start>1</start>
+      <end>28735</end>
+    </span>
+  </map_position>
+  <annotation id="source">
+    <name>source</name>
+    <type>source</type>
+    <property>
+      <type>chromosome</type>
+      <value>III</value>
+    </property>
+    <property>
+      <type>clone</type>
+      <value>C02D5</value>
+    </property>
+    <property>
+      <type>db_xref</type>
+      <value>taxon:6239</value>
+    </property>
+    <property>
+      <type>mol_type</type>
+      <value>genomic DNA</value>
+    </property>
+    <property>
+      <type>organism</type>
+      <value>Caenorhabditis elegans</value>
+    </property>
+    <property>
+      <type>strain</type>
+      <value>Bristol N2</value>
+    </property>
+    <feature_set id="source">
+      <name>source</name>
+      <type>source</type>
+      <property>
+        <type>chromosome</type>
+        <value>III</value>
+      </property>
+      <property>
+        <type>clone</type>
+        <value>C02D5</value>
+      </property>
+      <property>
+        <type>db_xref</type>
+        <value>taxon:6239</value>
+      </property>
+      <property>
+        <type>mol_type</type>
+        <value>genomic DNA</value>
+      </property>
+      <property>
+        <type>organism</type>
+        <value>Caenorhabditis elegans</value>
+      </property>
+      <property>
+        <type>strain</type>
+        <value>Bristol N2</value>
+      </property>
+      <feature_span id="source">
+        <name>source</name>
+        <type>source</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>1</start>
+            <end>28735</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+    </feature_set>
+  </annotation>
+  <annotation id="C02D5.3">
+    <name>C02D5.3</name>
+    <type>gene</type>
+    <property>
+      <type>gene</type>
+      <value>C02D5.3</value>
+    </property>
+    <gene association="IS">
+      <name>C02D5.3</name>
+    </gene>
+    <feature_set id="C02D5.3">
+      <name>C02D5.3</name>
+      <type>transcript</type>
+      <property>
+        <type>db_xref</type>
+        <value>GI:32453032</value>
+      </property>
+      <property>
+        <type>db_xref</type>
+        <value>WormBase:C02D5.3</value>
+      </property>
+      <property>
+        <type>gene</type>
+        <value>C02D5.3</value>
+      </property>
+      <property>
+        <type>note</type>
+        <value>
+          contains similarity to Pfam domain PF02798 
+          (Glutathione S-transferase, N-terminal 
+          domain)
+        </value>
+      </property>
+      <property>
+        <type>product</type>
+        <value>Hypothetical protein C02D5.3</value>
+      </property>
+      <property>
+        <type>protein_id</type>
+        <value>AAO12454.2</value>
+      </property>
+      <property>
+        <type>standard_name</type>
+        <value>C02D5.3</value>
+      </property>
+      <feature_span produces_seq="C02D5.3" id="C02D5.3">
+        <name>C02D5.3</name>
+        <type>start_codon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>5035</start>
+            <end>5037</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.3:1">
+        <name>C02D5.3:1</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>5035</start>
+            <end>5077</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.3:2">
+        <name>C02D5.3:2</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>5411</start>
+            <end>5851</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.3:3">
+        <name>C02D5.3:3</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>5956</start>
+            <end>6208</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.3:4">
+        <name>C02D5.3:4</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>7984</start>
+            <end>8008</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+    </feature_set>
+  </annotation>
+  <seq id="C02D5.3" length="253" type="aa">
+    <description>
+      translation from_gene[C02D5.3] cds_boundaries:(L16622:5035..8008)
+      transcript_info:[C02D5.3]
+    </description>
+    <residues>
+      MTVLAGVNSKIVKNGCWNHSYLQHAILPVAQRALIYASVKNIPSDVINVHLQEKPDWYFS
+      KHYKGQVPTLEHDEGKKHVIESAVIPEYLDDIYPETRILPTDPYEKVQQKLLLDRISGQV
+      SPAFYGVVQAVKNPDLREEKFADIKKAYDNAEQLLTGDFYSGTSKPGFVDYLLYPNIQRA
+      YWAAHIVPDFPLEAESFPGPNYPRLSKWYKALESIPEVAAASQPTENGVGFFKDYLGGSP
+      NYDYGLTKLSETI
+    </residues>
+  </seq>
+  <annotation id="C02D5.2">
+    <name>C02D5.2</name>
+    <type>gene</type>
+    <property>
+      <type>gene</type>
+      <value>C02D5.2</value>
+    </property>
+    <gene association="IS">
+      <name>C02D5.2</name>
+    </gene>
+    <feature_set id="C02D5.2">
+      <name>C02D5.2</name>
+      <type>transcript</type>
+      <property>
+        <type>db_xref</type>
+        <value>GI:289605</value>
+      </property>
+      <property>
+        <type>db_xref</type>
+        <value>WormBase:C02D5.2</value>
+      </property>
+      <property>
+        <type>gene</type>
+        <value>C02D5.2</value>
+      </property>
+      <property>
+        <type>note</type>
+        <value>
+          contains similarity to Pfam domain PF03227 
+          (Gamma interferon inducible lysosomal 
+          thiol reductase (GILT)); coded for 
+          by the following C. elegans cDNAs: 
+          CEESG21F, CEESG21R, CEESG21 
+        </value>
+      </property>
+      <property>
+        <type>product</type>
+        <value>Hypothetical protein C02D5.2</value>
+      </property>
+      <property>
+        <type>protein_id</type>
+        <value>AAA27914.1</value>
+      </property>
+      <property>
+        <type>standard_name</type>
+        <value>C02D5.2</value>
+      </property>
+      <feature_span produces_seq="C02D5.2" id="C02D5.2">
+        <name>C02D5.2</name>
+        <type>start_codon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>9783</start>
+            <end>9785</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:1">
+        <name>C02D5.2:1</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>9783</start>
+            <end>10002</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:2">
+        <name>C02D5.2:2</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>10373</start>
+            <end>10509</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:3">
+        <name>C02D5.2:3</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>10558</start>
+            <end>10661</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:4">
+        <name>C02D5.2:4</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>10709</start>
+            <end>10825</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:5">
+        <name>C02D5.2:5</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>10876</start>
+            <end>11052</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.2:6">
+        <name>C02D5.2:6</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>11213</start>
+            <end>11429</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+    </feature_set>
+  </annotation>
+  <seq id="C02D5.2" length="323" type="aa">
+    <description>
+      translation from_gene[C02D5.2] cds_boundaries:(L16622:9783..11429)
+      transcript_info:[C02D5.2]
+    </description>
+    <residues>
+      MSKYLRTSHPSPLICRPILTFSSLHILTAFLISGNSSYINWSVTIILLILGLYACHRFLN
+      MKKLTRDAQEPLLPHIRPSTRNKIRKYIYGTIFILSIFLLYRSLNSPDTSKHGIGRNGDF
+      IEYEPKAGPTIKEPVENIVKLDVYMEAQCPDTSRFFRQQLKKAWDILGRLNRIELNVIPF
+      GKARCTEKGNDFECQCQHGPTECQINQLMNCVIDRFGFPHRYLPGVLCMQGKYSLDEAMK
+      CVTENYPSEYERMRECASGTRGRRLLALSGQKTASLTPAIDFIPWIVINGSRNSDALYDL
+      TQNVCEAMQPMPSACKDYLRSLQ
+    </residues>
+  </seq>
+  <annotation id="C02D5.1">
+    <name>C02D5.1</name>
+    <type>gene</type>
+    <property>
+      <type>gene</type>
+      <value>C02D5.1</value>
+    </property>
+    <gene association="IS">
+      <name>C02D5.1</name>
+    </gene>
+    <feature_set id="C02D5.1">
+      <name>C02D5.1</name>
+      <type>transcript</type>
+      <property>
+        <type>db_xref</type>
+        <value>GI:15145275</value>
+      </property>
+      <property>
+        <type>db_xref</type>
+        <value>WormBase:C02D5.1</value>
+      </property>
+      <property>
+        <type>gene</type>
+        <value>C02D5.1</value>
+      </property>
+      <property>
+        <type>note</type>
+        <value>
+          contains similarity to Pfam domains PF02770 
+          (Acyl-CoA dehydrogenase, middle domain), 
+          PF02771 (Acyl-CoA dehydrogenase, N-terminal 
+          domain), PF00441 (Acyl-CoA dehydrogenases); 
+          coded for by the following C. elegans 
+          cDNAs: yk122b9.5 
+        </value>
+      </property>
+      <property>
+        <type>product</type>
+        <value>Hypothetical protein C02D5.1</value>
+      </property>
+      <property>
+        <type>protein_id</type>
+        <value>AAA27913.2</value>
+      </property>
+      <property>
+        <type>standard_name</type>
+        <value>C02D5.1</value>
+      </property>
+      <feature_span produces_seq="C02D5.1" id="C02D5.1">
+        <name>C02D5.1</name>
+        <type>start_codon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>13633</start>
+            <end>13635</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.1:1">
+        <name>C02D5.1:1</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>13633</start>
+            <end>13867</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.1:2">
+        <name>C02D5.1:2</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>13920</start>
+            <end>14054</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.1:3">
+        <name>C02D5.1:3</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>14099</start>
+            <end>14451</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.1:4">
+        <name>C02D5.1:4</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>14536</start>
+            <end>14889</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+      <feature_span id="C02D5.1:5">
+        <name>C02D5.1:5</name>
+        <type>exon</type>
+        <seq_relationship type="query" seq="L16622">
+          <span>
+            <start>15271</start>
+            <end>15420</end>
+          </span>
+        </seq_relationship>
+      </feature_span>
+    </feature_set>
+  </annotation>
+  <seq id="C02D5.1" length="408" type="aa">
+    <description>
+      translation from_gene[C02D5.1] cds_boundaries:(L16622:13633..15420)
+      transcript_info:[C02D5.1]
+    </description>
+    <residues>
+      MAFLARKTSSLLPATTSSTVKHMIYDEPHFAMQNSLAKLIKEKINPNVAQWEKSGRYPAH
+      FVFKMLGQLGVFAVNKPVDYGGTGRDFAMSIAIAEQIGAVDCGSIPMSVMVQSDMSTPAL
+      AQFGSDSLRNRFLRPSINGDLVSSIAVSEPHAGSDVSAIRTHARRYGSDLIINGSKMWIT
+      NGDQADWACVLVNTSNAKNLHKNKSLVCIPLDSIGVHRSTPLDKLGMRSSDTVQLFFEDV
+      RVPSSYIIGEEGRGFAYQMNQFNDERLVTVAVGLLPLQKCINETIEYARERLIFGKTLLD
+      QQYVQFRLAELEAELEATRSLLYRTVLARCQGEDVSMLTAMAKLKIGRLARKVTDQCLQI
+      WGGAGYLNDNGISRAFRDFRIFSIGAGCDEVMMQIIHKTQSKRQQKRI
+    </residues>
+  </seq>
+</game>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.gcg
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.gcg	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.gcg	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,18 @@
+Rea guano receptor type III >> 0.1
+roa1_drome  Length: 358  Thu Apr 17 12:09:52 2003  Type: P  Check: 8903  ..
+
+       1  MVNSNQNQNG NSNGHDDDFP QDSITEPEHM RKLFIGGLDY RTTDENLKAH
+
+      51  EKWGNIVDVV VMKDPRTKRS RGFGFITYSH SSMIDEAQKS RPHKIDGRVE
+
+     101  PKRAVPRQDI DSPNAGATVK KLFVGALKDD HDEQSIRDYF QHFGNIVDNI
+
+     151  VIDKETGKKR GFAFVEFDDY DPVDKVVLQK QHQLNGKMVD VKKALPKNDQ
+
+     201  QGGGGGRGGP GGRAGGNRGN MGGGNYGNQN GGGNWNNGGN NWGNNRGNDN
+
+     251  WGNNSFGGGG GGGGGYGGGN NSWGNNNPWD NGNGGGNFGG GGNNWNGGND
+
+     301  FGGYQQNYGG GPQRGGGNFN NNRMQPYQGG GGFKAGGGNQ GNYGNNQGFN
+
+     351  NGGNNRRY

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgblast
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgblast	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgblast	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,115 @@
+!!SEQUENCE_LIST 1.0
+BLASTP 2.2.1 [Apr-13-2001]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+
+Query= /v0/people/staji002/test.gcg
+         (146 letters)
+
+Database: pir
+           274,514 sequences; 93,460,074 total letters
+
+Searching. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .done
+
+                                                         Score    E
+ Sequences producing significant alignments:             (bits)  Value ..
+
+PIR2:S44629  Begin: 342 End: 470 
+!F22B7.10 protein - Caenorhabditis elegans                   57  2e-08
+PIR2:T21398  Begin: 40 End: 108 
+!hypothetical protein F26D2.1 - Caenorhabditis elegans       33  0.40
+PIR1:WMBELM  Begin: 307 End: 385 
+!membrane protein LMP-2A - human herpesvirus 4               32  0.53
+\\End of List
+
+>PIR2:S44629 F22B7.10 protein - Caenorhabditis elegans
+          Length = 628
+
+ Score = 57.0 bits (136), Expect = 2e-08
+ Identities = 38/135 (28%), Positives = 69/135 (50%), Gaps = 8/135 (5%)
+
+Query: 3   CAAEFDFMEKETPLRYTKTXXXXXXXXXXXXXXRKIISDMWGVLAKQQTHVRKHQFDHGE 62
+           C+AEFDF++  T  +   T                 + +   +L +    +     ++GE
+Sbjct: 342 CSAEFDFIQYSTIEKLCGTLLIPLALISLVTFVFNFVKNT-NLLWRNSEEIG----ENGE 396
+
+Query: 63  LVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGW--LFCKVHPGAIVFVI 120
+           ++Y+ +QL   T +  LIMRLKLF+TP++C++A+L  + +L G   +   +   A+V VI
+Sbjct: 397 ILYNVVQLCCSTVMAFLIMRLKLFMTPHLCIVAALFANSKLLGGDRISKTIRVSALVGVI 456
+
+Query: 121 LAAMSIQGSANLQTQ 135
+            A +  +G  N++ Q
+Sbjct: 457 -AILFYRGIPNIRQQ 470
+
+
+>PIR2:T21398 hypothetical protein F26D2.1 - Caenorhabditis elegans
+          Length = 346
+
+ Score = 32.7 bits (73), Expect = 0.40
+ Identities = 20/71 (28%), Positives = 41/71 (57%), Gaps = 11/71 (15%)
+
+Query: 66  HALQLLAYTALGILIMRLKLFLTPYMCV---------MASLICSRQLFGWLFCKVHPGAI 116
+           + + ++A+ +LGI+   L++F+ PY+ V         +++ I ++ L  WLF  +  G +
+Sbjct: 40  YRIMIVAFASLGIIYSGLEVFIKPYLHVYNNCILYFSLSTWISAKPLLPWLFA-IWSG-M 97
+
+Query: 117 VFVILAAMSIQ 127
+             V++A +SIQ
+Sbjct: 98  YLVVIAFISIQ 108
+
+
+>PIR1:WMBELM membrane protein LMP-2A - human herpesvirus 4
+          Length = 497
+
+ Score = 32.3 bits (72), Expect = 0.53
+ Identities = 26/79 (32%), Positives = 38/79 (47%), Gaps = 4/79 (5%)
+
+Query: 67  ALQLLAYTALGILIMRLKLFLTPYMCVMASLICSR----QLFGWLFCKVHPGAIVFVILA 122
+           AL LLA   LG L +     L     ++  LICS      L   L  ++   A+  ++LA
+Sbjct: 307 ALALLASLILGTLNLTTMFLLMLLWTLVVLLICSSCSSCPLSKILLARLFLYALALLLLA 366
+
+Query: 123 AMSIQGSANLQTQWKSTAS 141
+           +  I G + LQT +KS +S
+Sbjct: 367 SALIAGGSILQTNFKSLSS 385
+
+
+  Database: pir
+    Posted date:  Jan 3, 2002  1:13 PM
+  Number of letters in database: 93,460,074
+  Number of sequences in database:  274,514
+  
+Lambda     K      H
+   0.329    0.137    0.426 
+
+Gapped
+Lambda     K      H
+   0.267   0.0410    0.140 
+
+
+Matrix: BLOSUM62
+Gap Penalties: Existence: 11, Extension: 1
+Number of Hits to DB: 21,034,208
+Number of Sequences: 274514
+Number of extensions: 620007
+Number of successful extensions: 1381
+Number of sequences better than  1.0: 3
+Number of HSP's better than  1.0 without gapping: 1
+Number of HSP's successfully gapped in prelim test: 2
+Number of HSP's that attempted gapping in prelim test: 1379
+Number of HSP's gapped (non-prelim): 3
+length of query: 146
+length of database: 93,460,074
+effective HSP length: 102
+effective length of query: 44
+effective length of database: 65,459,646
+effective search space: 2880224424
+effective search space used: 2880224424
+T: 11
+A: 40
+X1: 15 ( 7.1 bits)
+X2: 38 (14.6 bits)
+X3: 64 (24.7 bits)
+S1: 40 (21.9 bits)
+S2: 70 (31.6 bits)

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgfasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgfasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.gcgfasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,261 @@
+!!SEQUENCE_LIST 1.0
+
+
+(Peptide) FASTA of: test.gcg  from: 1 to: 146  August 25, 2003 13:25
+
+ REFORMAT of: b124_sp.pep  check: -1  from: 1  to: 146  January 28, 1999 16:22
+ (No documentation)
+
+ TO: PIR:*  Sequences:    283,308  Symbols:    96,168,669  Word Size: 2
+
+ Databases searched:
+   NBRF, Release 76.0, Released on 31Mar2003, Formatted on 7Apr2003
+
+ Scoring matrix: GenRunData:blosum50.cmp
+ Variable pamfactor used
+ Gap creation penalty: 12  Gap extension penalty: 2
+
+
+
+Histogram Key:
+ Each histogram symbol represents 474 search set sequences
+ Each inset symbol represents 4 search set sequences
+ z-scores computed from opt scores
+
+z-score obs    exp
+        (=)    (*)
+
+< 20    789      0:==
+  22      0      0:
+  24      4      0:=
+  26      8      6:*
+  28      9     64:*
+  30    101    390:*
+  32    407   1509:=  *
+  34   2185   4092:=====   *
+  36   7555   8404:================ *
+  38  16600  13889:=============================*======
+  40  25000  19373:========================================*============
+  42  27813  23681:=================================================*=========
+  44  28394  26123:=======================================================*====
+  46  26152  26607:========================================================*
+  48  23191  25473:=================================================    *
+  50  20419  23244:============================================     *
+  52  18108  20435:=======================================    *
+  54  15701  17455:==================================  *
+  56  13874  14581:==============================*
+  58  11026  11970:======================== *
+  60   9392   9697:====================*
+  62   7678   7774:================*
+  64   6295   6183:=============*
+  66   4986   4887:==========*
+  68   3909   3844:========*
+  70   3131   3012:======*
+  72   2497   2354:====*=
+  74   1858   1835:===*
+  76   1469   1428:===*
+  78   1160   1110:==*
+  80    845    862:=*
+  82    665    659:=*
+  84    515    522:=*
+  86    376    404:*
+  88    261    313:*
+  90    225    242:*
+  92    157    187:*         :=======================================*
+  94    132    145:*         :=================================   *
+  96     93    112:*         :========================   *
+  98     63     87:*         :================     *
+ 100     73     67:*         :================*==
+ 102     44     52:*         :=========== *
+ 104     32     40:*         :======== *
+ 106     27     31:*         :=======*
+ 108     18     24:*         :=====*
+ 110     18     19:*         :====*
+ 112     11     14:*         :===*
+ 114     11     11:*         :==*
+ 116     10      9:*         :==*
+ 118      8      7:*         :=*
+>120     13      5:*         :=*==
+
+Joining threshold: 36, opt. threshold: 24, opt. width:  16, reg.-scaled
+
+
+The best scores are:                    init1 initn   opt    z-sc E(283250)..
+
+PIR2:S44629    Begin: 342  End: 470
+! F22B7.10 protein - Caenorhabditis e...  108   143   241   304.1  1.1e-09
+PIR1:WMBELM    Begin: 307  End: 385
+! membrane protein LMP-2A - human her...   59    91    99   130.6     5.1
+PIR2:AG0762    Begin: 63  End: 144
+! probable membrane protein STY2265 [...   65    65    96   128.9     6.4
+PIR2:B83179    Begin: 9  End: 86
+! hypothetical protein PA3730 [import...   40    40    92   127.0     8.2
+\\End of List
+
+
+test.gcg
+PIR2:S44629
+
+P1;S44629 - F22B7.10 protein - Caenorhabditis elegans
+C;Species: Caenorhabditis elegans
+C;Date: 20-Feb-1995 #sequence_revision 20-Feb-1995 #text_change 04-Mar-2000
+C;Accession: S44629
+R;Anderson, K.
+submitted to the EMBL Data Library, March 1993 . . . 
+
+
+SCORES   Init1: 108   Initn: 143   Opt: 241   z-score: 304.1 E(): 1.1e-09
+>>PIR2:S44629                                             (628 aa)
+ initn: 143 init1: 108 opt: 241 Z-score: 304.1 expect(): 1.1e-09
+Smith-Waterman score: 241;    32.6% identity in 135 aa overlap
+ (3-135:342-470)
+
+                                                 10        20        30  
+test.gcg                                 VXCAAEFDFMEKETPLRYTKTLLLPVVLVVFV
+                                           |:|||||::  |  :   |||:|::|: :|
+S44629       GLGIEDDAHIFDILRSKFTSFANFHTRLYTCSAEFDFIQYSTIEKLCGTLLIPLALISLV
+                   320       330       340       350       360       370 
+
+                   40        50        60        70        80        90  
+test.gcg     AIVRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMC
+             ::| :::::  ::| ::: ::     ::||::|:::||   |::::||||||||:||::|
+S44629       TFVFNFVKNT-NLLWRNSEEIG----ENGEILYNVVQLCCSTVMAFLIMRLKLFMTPHLC
+                   380        390           400       410       420      
+
+                  100         110       120       130       140          
+test.gcg     VMASLICSRQLFG--WLFCKVHPGAIVFVILAAMSIQGSANLQTQWKSTASLALET    
+             ::|:|: : :|:|   :   :: :|:| || | :  :|  |:: |               
+S44629       IVAALFANSKLLGGDRISKTIRVSALVGVI-AILFYRGIPNIRQQLNVKGEYSNPDQEML
+              430       440       450        460       470       480     
+
+S44629       FDWIQHNTKQDAVFAGTMPVMANVKLTTLRPIVNHPHYEHVGIRERTLKVYSMFSKKPIA
+               490       500       510       520       530       540     
+
+
+test.gcg
+PIR1:WMBELM
+
+P1;WMBELM - membrane protein LMP-2A - human herpesvirus 4
+N;Contains: membrane protein LMP-2B
+C;Species: human herpesvirus 4, Epstein-Barr virus
+A;Note: host Homo sapiens (man)
+C;Date: 31-Dec-1989 #sequence_revision 31-Dec-1989 #text_change 16-Jul-1999
+C;Accession: A30178; B30178; S00392 . . . 
+
+
+SCORES   Init1: 59    Initn: 91    Opt: 99    z-score: 130.6 E(): 5.1   
+>>PIR1:WMBELM                                             (497 aa)
+ initn:  91 init1:  59 opt:  99 Z-score: 130.6 expect():  5.1
+Smith-Waterman score: 99;    32.9% identity in 79 aa overlap
+ (67-141:307-385)
+
+               40        50        60        70        80        90      
+test.gcg     KIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMCVMAS
+                                           || |||   || | :   ::|     ::: 
+WMBELM       MTLLLLAFVLWLSSPGGLGTLGAALLTLAAALALLASLILGTLNLTTMFLLMLLWTLVVL
+              280       290       300       310       320       330      
+
+              100           110       120       130       140            
+test.gcg     LICSR----QLFGWLFCKVHPGAIVFVILAAMSIQGSANLQTQWKSTASLALET      
+             ||||      |   |: ::   |:::::||:  | |:: |||::|| :|           
+WMBELM       LICSSCSSCPLSKILLARLFLYALALLLLASALIAGGSILQTNFKSLSSTEFIPNLFCML
+              340       350       360       370       380       390      
+
+WMBELM       LLIVAGILFILAILTEWGSGNRTYGPVFMCLGGLLTMVAGAVWLTVMSNTLLSAWILTAG
+              400       410       420       430       440       450      
+
+
+test.gcg
+PIR2:AG0762
+
+P1;AG0762 - probable membrane protein STY2265 [imported] - Salmonella enterica 
+ subsp. enterica serovar Typhi (strain CT18)
+C;Species: Salmonella enterica subsp. enterica serovar Typhi
+A;Note: this species has also been called Salmonella typhi
+C;Date: 09-Nov-2001 #sequence_revision 09-Nov-2001 #text_change 18-Nov-2002
+C;Accession: AG0762
+R;Parkhill, J.; Dougan, G.; James, K.D.; Thomson, N.R.; Pickard, D.; Wain, J.; 
+ Churcher, C.; Mungall, K.L.; Bentley, S.D.; Holden, M.T.G.; Sebaihia, M.; 
+ Baker, S.; Basham, D.; Brooks, K.; Chillingworth, T.; Connerton, P.; Cronin, 
+ A.; Davis, P.; Davies, R.M.; Dowd, L.; White, N.; Farrar, J.; Feltwell, T.; 
+ Hamlin, N.; Haque, A.; Hien, T.T.; Holroyd, S.; Jagels, K.; Krogh, A.; Larsen, 
+ T.S.; Leather, S.; Moule, S.; O'Gaora, P
+
+
+SCORES   Init1: 65    Initn: 65    Opt: 96    z-score: 128.9 E(): 6.4   
+>>PIR2:AG0762                                             (352 aa)
+ initn:  65 init1:  65 opt:  96 Z-score: 128.9 expect():  6.4
+Smith-Waterman score: 96;    27.6% identity in 87 aa overlap
+ (61-137:63-144)
+
+                     40        50        60        70            80      
+test.gcg     FVAIVRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYT----ALGILIMRLKLF
+                                           |::| :|:: :: |    |||:: :||:||
+AG0762       TFLLVRLFSIPEGTWPLITLVVIMGPISFWGNVVPRAFERIGGTILGAALGLVALRLELF
+                   40        50        60        70        80        90  
+
+               90          100       110         120       130        140
+test.gcg     LTPYM---CVMASLICSRQLFGWLFCKVHP--GAIVFVILAAMSIQGSANLQTQ-WKSTA
+               | |   |::| ::|     |||    :|  : :: : ||::    :::::|  |::  
+AG0762       SLPLMLVWCAIAMFLC-----GWLALGKKPYQALLIGITLAVVVGAPAGDMNTALWRGGD
+                  100            110       120       130       140       
+
+                                                                         
+test.gcg     SLALET                                                      
+                                                                         
+AG0762       VILGALLAMLFTGIWPQRAFLHWRIQLAHCVTAYNRVYQAALSPNLLERPRLDKYLQRLL
+             150       160       170       180       190       200       
+
+
+test.gcg
+PIR2:B83179
+
+P1;B83179 - hypothetical protein PA3730 [imported] - Pseudomonas aeruginosa 
+ (strain PAO1)
+C;Species: Pseudomonas aeruginosa
+C;Date: 15-Sep-2000 #sequence_revision 15-Sep-2000 #text_change 31-Dec-2000
+C;Accession: B83179
+R;Stover, C.K.; Pham, X.Q.; Erwin, A.L.; Mizoguchi, S.D.; Warrener, P.; Hickey, 
+ M.J.; Brinkman, F.S.L.; Hufnagle, W.O.; Kowalik, D.J.; Lagrou, M.; Garber, 
+ R.L.; Goltry, L.; Tolentino, E.; Westbrook-Wadman, S.; Yuan, Y.; Brody, L.L.; 
+ Coulter, S.N.; Folger, K.R.; Kas, A.; Larbig, K.; Lim, R.M.; Smith, K.A.; 
+ Spencer, D.H.; Wong, G.K.S.; Wu, Z.; Paulsen, I.T.; Reizer, J.; Saier, M.H.; 
+ Hancock, R.E.W.; Lory, S.; Olson, M.V.
+Nature 406, 959-964, 2000 . . . 
+
+
+SCORES   Init1: 40    Initn: 40    Opt: 92    z-score: 127.0 E(): 8.2   
+>>PIR2:B83179                                             (213 aa)
+ initn:  40 init1:  40 opt:  92 Z-score: 127.0 expect():  8.2
+Smith-Waterman score: 92;    28.4% identity in 88 aa overlap
+ (22-109:9-86)
+
+                     10        20        30        40        50        60
+test.gcg     VXCAAEFDFMEKETPLRYTKTLLLPVVLVVFVAIVRKIISDMWGVLAKQQTHVRKHQFDH
+                                  | :|:||  |: |:  |   :||::|  ::::   ::| 
+B83179                    MEGFLQTALSFPTVLFSFLLILAII---YWGIVALGMVEIDVLDLDA
+                                  10        20           30        40    
+
+                     70        80        90       100       110       120
+test.gcg     GELVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGWLFCKVHPGAIVFVI
+               :|  | |     :|: |: :|||  :|   |:: |    ::|:|::|           
+B83179       ESVVDGAGQA---EGLAALLAKLKLNGVPVTLVLTLL----SFFAWFLCYFVQLWLLSAL
+                 50           60        70            80        90       
+
+                    130       140                                        
+test.gcg     LAAMSIQGSANLQTQWKSTASLALET                                  
+                                                                         
+B83179       PLGWLRYPLGAVVAVGALFLAAPLAATLCRPLRPLFRKLESTSSKSVLGQVAVVRSGRVT
+             100       110       120       130       140       150       
+
+
+
+! Distributed over 1 thread.
+!      Start time: Mon Aug 25 13:23:54 2003
+! Completion time: Mon Aug 25 13:25:12 2003
+
+! CPU time used:
+!        Database scan:  0:01:34.1
+! Post-scan processing:  0:00:00.6
+!       Total CPU time:  0:01:34.7
+! Output File: test.fasta

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,326 @@
+LOCUS       DDU63596      310 bp    DNA             INV       14-MAY-1999
+DEFINITION  Dictyostelium discoideum Tdd-4 transposable element flanking
+            sequence, clone p427/428 right end.
+ACCESSION   U63596
+NID         g2393749
+KEYWORDS    .
+SOURCE      Dictyostelium discoideum.
+  ORGANISM  Dictyostelium discoideum
+            Eukaryota; Dictyosteliida; Dictyostelium.
+REFERENCE   1  (bases 1 to 310)
+  AUTHORS   Wells,D.J.
+  TITLE     Tdd-4, a DNA transposon of Dictyostelium that encodes proteins
+            similar to LTR retroelement integrases
+  JOURNAL   Nucleic Acids Res. 27 (11), 2408-2415 (1999)
+REFERENCE   2  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Dictyostelium discoideum Tdd-4 transposable element, right end
+            flanking sequence from clone p427/428
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-JUL-1996) Biology, Utah State Univ., Logan, UT
+            84322-5305, USA
+FEATURES             Location/Qualifiers
+     source          1..310
+                     /organism="Dictyostelium discoideum"
+                     /strain="AX4"
+                     /db_xref="taxon:44689"
+                     /clone="p427/428"
+     misc_feature    5.12
+                     /note="Fuzzy location"
+     misc_feature    join(J00194:(100..202),1..245,256..258)
+                     /note="Location partly in another entry"
+BASE COUNT      118 a     46 c     67 g     79 t
+ORIGIN      
+        1 gtgacagttg gctgtcagac atacaatgat tgtttagaag aggagaagat tgatccggag
+       61 taccgtgata gtattttaaa aactatgaaa gcgggaatac ttaatggtaa actagttaga
+      121 ttatgtgacg tgccaagggg tgtagatgta gaaattgaaa caactggtct aaccgattca
+      181 gaaggagaaa gtgaatcaaa agaagaagag tgatgatgaa tagccaccat tactgcatac
+      241 tgtagccctt acccttgtcg caccattagc cattaataaa aataaaaaat tatataaaaa
+      301 ttacacccat 
+//
+LOCUS       DDU63595       83 bp    DNA             INV       14-MAY-1999
+DEFINITION  Dictyostelium discoideum Tdd-4 transposable element flanking
+            sequence, clone p427/428 left end.
+ACCESSION   U63595
+NID         g2393748
+KEYWORDS    .
+SOURCE      Dictyostelium discoideum.
+  ORGANISM  Dictyostelium discoideum
+            Eukaryota; Dictyosteliida; Dictyostelium.
+REFERENCE   1  (bases 1 to 83)
+  AUTHORS   Wells,D.J.
+  TITLE     Tdd-4, a DNA transposon of Dictyostelium that encodes proteins
+            similar to LTR retroelement integrases
+  JOURNAL   Nucleic Acids Res. 27 (11), 2408-2415 (1999)
+REFERENCE   2  (bases 1 to 83)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Dictyostelium discoideum Tdd-4 transposable element, left end
+            flanking sequence from clone p427/428
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 83)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-JUL-1996) Biology, Utah State Univ., Logan, UT
+            84322-5305, USA
+FEATURES             Location/Qualifiers
+     source          1..83
+                     /organism="Dictyostelium discoideum"
+                     /strain="AX4"
+                     /db_xref="taxon:44689"
+                     /clone="p427/428"
+BASE COUNT       31 a     16 c     12 g     24 t
+ORIGIN      
+        1 ttcgaaggat atctcaaggc agttaataat tactatgatg attgtaaaat attccaaagt
+       61 ttcccagacc caccaataat gac
+//
+LOCUS       HUMBDNF       918 bp    DNA             PRI       31-OCT-1994
+DEFINITION  Human brain-derived neurotrophic factor (BDNF) gene, complete cds.
+ACCESSION   M37762
+VERSION     M37762.1  GI:179402
+KEYWORDS    neurotrophic factor.
+SOURCE      Human DNA.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia;
+            Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 918)
+  AUTHORS   Jones,K.R. and Reichardt,L.F.
+  TITLE     Molecular cloning of a human gene that is a member of the nerve
+            growth factor family
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 87 (20), 8060-8064 (1990)
+  MEDLINE   91045937
+COMMENT     Draft entry and computer-readable sequence for [Proc. Natl. Acad.
+            Sci. U.S.A. (1990) In press] kindly submitted
+            by K.R.Jones, 13-AUG-1990.
+FEATURES             Location/Qualifiers
+     source          1..918
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /dev_stage="adult"
+     sig_peptide     76..123
+                     /gene="NTF3"
+                     /note="G00-125-917; putative"
+                     /product="brain-derived neurotrophic factor"
+     CDS             76..819
+                     /gene="BDNF"
+                     /note="putative"
+                     /codon_start=1
+                     /db_xref="GDB:G00-125-916"
+                     /product="brain-derived neurotrophic factor"
+                     /protein_id="AAA51820.1"
+                     /db_xref="GI:179403"
+                     /translation="MTILFLTMVISYFGCMKAAPMKEANIRGQGGLAYPGVRTHGTLE
+                     SVNGPKAGSRGLTSLADTFEHVIEELLDEDQKVRPNEENNKDADLYTSRVMLSSQVPL
+                     EPPLLFLLEEYKNYLDAANMSMRVRRHSDPARRGELSVCDSISEWVTAADKKTAVDMS
+                     GGTVTVLEKVPVSKGQLKQYFYETKCNPMGYTKEGCRGIDKRHWNSQCRTTQSYVRAL
+                     TMDSKKRIGWRFIRIDTSCVCTLTIKRGR"
+     gene            76..816
+                     /gene="NTF3"
+                     /map="12p13"
+     gene            76..819
+                     /gene="BDNF"
+                     /map="11p13"
+     mat_peptide     124..816
+                     /gene="NTF3"
+                     /note="G00-125-917; putative"
+                     /product="brain-derived neurotrophic factor"
+BASE COUNT      269 a    192 c    237 g    220 t
+ORIGIN
+        1 ggtgaaagaa agccctaacc agttttctgt cttgtttctg ctttctccct acagttccac
+       61 caggtgagaa gagtgatgac catccttttc cttactatgg ttatttcata ctttggttgc
+      121 atgaaggctg cccccatgaa agaagcaaac atccgaggac aaggtggctt ggcctaccca
+      181 ggtgtgcgga cccatgggac tctggagagc gtgaatgggc ccaaggcagg ttcaagaggc
+      241 ttgacatcat tggctgacac tttcgaacac gtgatagaag agctgttgga tgaggaccag
+      301 aaagttcggc ccaatgaaga aaacaataag gacgcagact tgtacacgtc cagggtgatg
+      361 ctcagtagtc aagtgccttt ggagcctcct cttctctttc tgctggagga atacaaaaat
+      421 tacctagatg ctgcaaacat gtccatgagg gtccggcgcc actctgaccc tgcccgccga
+      481 ggggagctga gcgtgtgtga cagtattagt gagtgggtaa cggcggcaga caaaaagact
+      541 gcagtggaca tgtcgggcgg gacggtcaca gtccttgaaa aggtccctgt atcaaaaggc
+      601 caactgaagc aatacttcta cgagaccaag tgcaatccca tgggttacac aaaagaaggc
+      661 tgcaggggca tagacaaaag gcattggaac tcccagtgcc gaactaccca gtcgtacgtg
+      721 cgggccctta ccatggatag caaaaagaga attggctggc gattcataag gatagacact
+      781 tcttgtgtat gtacattgac cattaaaagg ggaagatagt ggatttatgt tgtatagatt
+      841 agattatatt gagacaaaaa ttatctattt gtatatatac ataacagggt aaattattca
+      901 gttaagaaaa aaataatt
+//
+LOCUS       NT_010368  161485 bp    DNA             CON       16-NOV-2000
+DEFINITION  Homo sapiens chromosome 15 working draft sequence segment, complete
+            sequence.
+ACCESSION   NT_010368
+VERSION     NT_010368.1  GI:11433101
+KEYWORDS    HTG.
+SOURCE      human.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 161485)
+  AUTHORS   International Human Genome Project collaborators.
+  TITLE     Toward the complete sequence of the human genome
+  JOURNAL   Unpublished
+COMMENT     GENOME ANNOTATION REFSEQ:  NCBI contigs are derived from assembled
+            genomic sequence data. They may include both draft and finished
+            sequence.
+            COMPLETENESS: not full length.
+FEATURES             Location/Qualifiers
+     source          1..310
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /chromosome="15"
+     source          order(1..100,251..300,300..310)
+                     /note="Doctored from Accession AC011224 
+	             sequenced by Whitehead Institute
+                     for Biomedical Research"
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /clone="RP11-10K20"
+     variation       244
+                     /replace="T"
+                     /replace="A"
+                     /db_xref="dbSNP:140670"
+ORIGIN      
+        1 gtgacagttg gctgtcagac atacaatgat tgtttagaag aggagaagat tgatccggag
+       61 taccgtgata gtattttaaa aactatgaaa gcgggaatac ttaatggtaa actagttaga
+      121 ttatgtgacg tgccaagggg tgtagatgta gaaattgaaa caactggtct aaccgattca
+      181 gaaggagaaa gtgaatcaaa agaagaagag tgatgatgaa tagccaccat tactgcatac
+      241 tgtagccctt acccttgtcg caccattagc cattaataaa aataaaaaat tatataaaaa
+      301 ttacacccat 
+//
+LOCUS       HUMBETGLOA              3002 bp    DNA     linear   PRI 26-AUG-1994
+DEFINITION  Human haplotype C4 beta-globin gene, complete cds.
+ACCESSION   L26462
+VERSION     L26462.1  GI:432453
+KEYWORDS    beta-globin.
+SOURCE      Homo sapiens DNA.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 3002)
+  AUTHORS   Fullerton,S.M., Harding,R.M., Boyce,A.J. and Clegg,J.B.
+  TITLE     Molecular and population genetic analysis of allelic sequence
+            diversity at the human beta-globin locus
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 91 (5), 1805-1809 (1994)
+  MEDLINE   94173918
+   PUBMED   7907422
+FEATURES             Location/Qualifiers
+     source          1..3002
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /haplotype="C4"
+                     /note="sequence found in a Melanesian population"
+     variation       111
+                     /replace="t"
+     variation       263
+                     /note="Rsa I polymorphism"
+                     /replace="t"
+     variation       273
+                     /replace="c"
+     variation       286..287
+                     /note="2 bp insertion of AT"
+                     /replace=""
+     variation       288
+                     /replace="t"
+     variation       295..296
+                     /note="1 bp deletion of C or 2 bp deletion of CT"
+                     /replace=""
+     variation       347
+                     /replace="c"
+     variation       476
+                     /replace="t"
+     variation       500
+                     /replace="c"
+     CDS             join(866..957,1088..1310,2161..2289)
+                     /codon_start=1
+                     /product="beta-globin"
+                     /protein_id="AAA21100.1"
+                     /db_xref="GI:532506"
+                     /translation="MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFE
+                     SFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPE
+                     NFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH"
+     exon            <866..957
+                     /number=1
+     variation       874
+                     /replace="c"
+     intron          958..1087
+                     /number=1
+     exon            1088..1310
+                     /number=2
+     intron          1311..2160
+                     /number=2
+     variation       1326
+                     /note="Ava II polymorphism"
+                     /replace="g"
+     variation       1384
+                     /replace="g"
+     variation       1391
+                     /replace="t"
+     variation       1976
+                     /replace="t"
+     exon            2161..>2289
+                     /number=3
+     variation       2522
+                     /replace="c"
+     variation       2602
+                     /replace="a"
+     variation       2604
+                     /replace="c"
+     variation       2760
+                     /note="Hinf I polymorphism"
+                     /replace="t"
+     variation       2913
+                     /replace="g"
+BASE COUNT      810 a    601 c    599 g    992 t
+ORIGIN      
+        1 acctcctatt tgacaccact gattacccca ttgatagtca cactttgggt tgtaagtgac
+       61 tttttattta tttgtatttt tgactgcatt aagaggtctc tagtttttta cctcttgttt
+      121 cccaaaacct aataagtaac taatgcacag agcacattga tttgtattta ttctattttt
+      181 agacataatt tattagcatg catgagcaaa ttaagaaaaa caacaacaaa tgaatgcata
+      241 tatatgtata tgtatgtgtg tacatataca catatatata tatatatatt ttttcttttc
+      301 ttaccagaag gttttaatcc aaataaggag aagatatgct tagaactgag gtagagtttt
+      361 catccattct gtcctgtaag tattttgcat attctggaga cgcaggaaga gatccatcta
+      421 catatcccaa agctgaatta tggtagacaa aactcttcca cttttagtgc atcaacttct
+      481 tatttgtgta ataagaaaat tgggaaaacg atcttcaata tgcttaccaa gctgtgattc
+      541 caaatattac gtaaatacac ttgcaaagga ggatgttttt agtagcaatt tgtactgatg
+      601 gtatggggcc aagagatata tcttagaggg agggctgagg gtttgaagtc caactcctaa
+      661 gccagtgcca gaagagccaa ggacaggtac ggctgtcatc acttagacct caccctgtgg
+      721 agccacaccc tagggttggc caatctactc ccaggagcag ggagggcagg agccagggct
+      781 gggcataaaa gtcagggcag agccatctat tgcttacatt tgcttctgac acaactgtgt
+      841 tcactagcaa cctcaaacag acaccatggt gcatctgact cctgaggaga agtctgccgt
+      901 tactgccctg tggggcaagg tgaacgtgga tgaagttggt ggtgaggccc tgggcaggtt
+      961 ggtatcaagg ttacaagaca ggtttaagga gaccaataga aactgggcat gtggagacag
+     1021 agaagactct tgggtttctg ataggcactg actctctctg cctattggtc tattttccca
+     1081 cccttaggct gctggtggtc tacccttgga cccagaggtt ctttgagtcc tttggggatc
+     1141 tgtccactcc tgatgctgtt atgggcaacc ctaaggtgaa ggctcatggc aagaaagtgc
+     1201 tcggtgcctt tagtgatggc ctggctcacc tggacaacct caagggcacc tttgccacac
+     1261 tgagtgagct gcactgtgac aagctgcacg tggatcctga gaacttcagg gtgagtctat
+     1321 gggacccttg atgttttctt tccccttctt ttctatggtt aagttcatgt cataggaagg
+     1381 ggataagtaa cagggtacag tttagaatgg gaaacagacg aatgattgca tcagtgtgga
+     1441 agtctcagga tcgttttagt ttcttttatt tgctgttcat aacaattgtt ttcttttgtt
+     1501 taattcttgc tttctttttt tttcttctcc gcaattttta ctattatact taatgcctta
+     1561 acattgtgta taacaaaagg aaatatctct gagatacatt aagtaactta aaaaaaaact
+     1621 ttacacagtc tgcctagtac attactattt ggaatatatg tgtgcttatt tgcatattca
+     1681 taatctccct actttatttt cttttatttt taattgatac ataatcatta tacatattta
+     1741 tgggttaaag tgtaatgttt taatatgtgt acacatattg accaaatcag ggtaattttg
+     1801 catttgtaat tttaaaaaat gctttcttct tttaatatac ttttttgttt atcttatttc
+     1861 taatactttc cctaatctct ttctttcagg gcaataatga tacaatgtat catgcctctt
+     1921 tgcaccattc taaagaataa cagtgataat ttctgggtta aggcaatagc aatatctctg
+     1981 catataaata tttctgcata taaattgtaa ctgatgtaag aggtttcata ttgctaatag
+     2041 cagctacaat ccagctacca ttctgctttt attttatggt tgggataagg ctggattatt
+     2101 ctgagtccaa gctaggccct tttgctaatc atgttcatac ctcttatctt cctcccacag
+     2161 ctcctgggca acgtgctggt ctgtgtgctg gcccatcact ttggcaaaga attcacccca
+     2221 ccagtgcagg ctgcctatca gaaagtggtg gctggtgtgg ctaatgccct ggcccacaag
+     2281 tatcactaag ctcgctttct tgctgtccaa tttctattaa aggttccttt gttccctaag
+     2341 tccaactact aaactggggg atattatgaa gggccttgag catctggatt ctgcctaata
+     2401 aaaaacattt attttcattg caatgatgta tttaaattat ttctgaatat tttactaaaa
+     2461 agggaatgtg ggaggtcagt gcatttaaaa cataaagaaa tgaagagcta gttcaaacct
+     2521 tgggaaaata cactatatct taaactccat gaaagaaggt gaggctgcaa acagctaatg
+     2581 cacattggca acagccctga tgcatatgcc ttattcatcc ctcagaaaag gattcaagta
+     2641 gaggcttgat ttggaggtta aagttttgct atgctgtatt ttacattact tattgtttta
+     2701 gctgtcctca tgaatgtctt ttcactaccc atttgcttat cctgcatctc tcagccttga
+     2761 ctccactcag ttctcttgct tagagatacc acctttcccc tgaagtgttc cttccatgtt
+     2821 ttacggcgag atggtttctc ctcgcctggc cactcagcct tagttgtctc tgttgtctta
+     2881 tagaggtcta cttgaagaag gaaaaacagg ggtcatggtt tgactgtcct gtgagccctt
+     2941 cttccctgcc tcccccactc acagtgaccc ggaatctgca gtgctagtct cccggaacta
+     3001 tc
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank.noseq
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank.noseq	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.genbank.noseq	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,240 @@
+LOCUS       DDU63596      310 bp    DNA             INV       14-MAY-1999
+DEFINITION  Dictyostelium discoideum Tdd-4 transposable element flanking
+            sequence, clone p427/428 right end.
+ACCESSION   U63596
+NID         g2393749
+KEYWORDS    .
+SOURCE      Dictyostelium discoideum.
+  ORGANISM  Dictyostelium discoideum
+            Eukaryota; Dictyosteliida; Dictyostelium.
+REFERENCE   1  (bases 1 to 310)
+  AUTHORS   Wells,D.J.
+  TITLE     Tdd-4, a DNA transposon of Dictyostelium that encodes proteins
+            similar to LTR retroelement integrases
+  JOURNAL   Nucleic Acids Res. 27 (11), 2408-2415 (1999)
+REFERENCE   2  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Dictyostelium discoideum Tdd-4 transposable element, right end
+            flanking sequence from clone p427/428
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-JUL-1996) Biology, Utah State Univ., Logan, UT
+            84322-5305, USA
+FEATURES             Location/Qualifiers
+     source          1..310
+                     /organism="Dictyostelium discoideum"
+                     /strain="AX4"
+                     /db_xref="taxon:44689"
+                     /clone="p427/428"
+     misc_feature    5.12
+                     /note="Fuzzy location"
+     misc_feature    join(J00194:(100..202),1..245,256..258)
+                     /note="Location partly in another entry"
+BASE COUNT      118 a     46 c     67 g     79 t
+//
+LOCUS       DDU63595       83 bp    DNA             INV       14-MAY-1999
+DEFINITION  Dictyostelium discoideum Tdd-4 transposable element flanking
+            sequence, clone p427/428 left end.
+ACCESSION   U63595
+NID         g2393748
+KEYWORDS    .
+SOURCE      Dictyostelium discoideum.
+  ORGANISM  Dictyostelium discoideum
+            Eukaryota; Dictyosteliida; Dictyostelium.
+REFERENCE   1  (bases 1 to 83)
+  AUTHORS   Wells,D.J.
+  TITLE     Tdd-4, a DNA transposon of Dictyostelium that encodes proteins
+            similar to LTR retroelement integrases
+  JOURNAL   Nucleic Acids Res. 27 (11), 2408-2415 (1999)
+REFERENCE   2  (bases 1 to 83)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Dictyostelium discoideum Tdd-4 transposable element, left end
+            flanking sequence from clone p427/428
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 83)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-JUL-1996) Biology, Utah State Univ., Logan, UT
+            84322-5305, USA
+FEATURES             Location/Qualifiers
+     source          1..83
+                     /organism="Dictyostelium discoideum"
+                     /strain="AX4"
+                     /db_xref="taxon:44689"
+                     /clone="p427/428"
+BASE COUNT       31 a     16 c     12 g     24 t
+//
+LOCUS       HUMBDNF       918 bp    DNA             PRI       31-OCT-1994
+DEFINITION  Human brain-derived neurotrophic factor (BDNF) gene, complete cds.
+ACCESSION   M37762
+VERSION     M37762.1  GI:179402
+KEYWORDS    neurotrophic factor.
+SOURCE      Human DNA.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia;
+            Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 918)
+  AUTHORS   Jones,K.R. and Reichardt,L.F.
+  TITLE     Molecular cloning of a human gene that is a member of the nerve
+            growth factor family
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 87 (20), 8060-8064 (1990)
+  MEDLINE   91045937
+COMMENT     Draft entry and computer-readable sequence for [Proc. Natl. Acad.
+            Sci. U.S.A. (1990) In press] kindly submitted
+            by K.R.Jones, 13-AUG-1990.
+FEATURES             Location/Qualifiers
+     source          1..918
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /dev_stage="adult"
+     sig_peptide     76..123
+                     /gene="NTF3"
+                     /note="G00-125-917; putative"
+                     /product="brain-derived neurotrophic factor"
+     CDS             76..819
+                     /gene="BDNF"
+                     /note="putative"
+                     /codon_start=1
+                     /db_xref="GDB:G00-125-916"
+                     /product="brain-derived neurotrophic factor"
+                     /protein_id="AAA51820.1"
+                     /db_xref="GI:179403"
+                     /translation="MTILFLTMVISYFGCMKAAPMKEANIRGQGGLAYPGVRTHGTLE
+                     SVNGPKAGSRGLTSLADTFEHVIEELLDEDQKVRPNEENNKDADLYTSRVMLSSQVPL
+                     EPPLLFLLEEYKNYLDAANMSMRVRRHSDPARRGELSVCDSISEWVTAADKKTAVDMS
+                     GGTVTVLEKVPVSKGQLKQYFYETKCNPMGYTKEGCRGIDKRHWNSQCRTTQSYVRAL
+                     TMDSKKRIGWRFIRIDTSCVCTLTIKRGR"
+     gene            76..816
+                     /gene="NTF3"
+                     /map="12p13"
+     gene            76..819
+                     /gene="BDNF"
+                     /map="11p13"
+     mat_peptide     124..816
+                     /gene="NTF3"
+                     /note="G00-125-917; putative"
+                     /product="brain-derived neurotrophic factor"
+BASE COUNT      269 a    192 c    237 g    220 t
+//
+LOCUS       NT_010368  161485 bp    DNA             CON       16-NOV-2000
+DEFINITION  Homo sapiens chromosome 15 working draft sequence segment, complete
+            sequence.
+ACCESSION   NT_010368
+VERSION     NT_010368.1  GI:11433101
+KEYWORDS    HTG.
+SOURCE      human.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 161485)
+  AUTHORS   International Human Genome Project collaborators.
+  TITLE     Toward the complete sequence of the human genome
+  JOURNAL   Unpublished
+COMMENT     GENOME ANNOTATION REFSEQ:  NCBI contigs are derived from assembled
+            genomic sequence data. They may include both draft and finished
+            sequence.
+            COMPLETENESS: not full length.
+FEATURES             Location/Qualifiers
+     source          1..310
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /chromosome="15"
+     source          order(1..100,251..300,300..310)
+                     /note="Doctored from Accession AC011224 
+	             sequenced by Whitehead Institute
+                     for Biomedical Research"
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /clone="RP11-10K20"
+     variation       244
+                     /replace="T"
+                     /replace="A"
+                     /db_xref="dbSNP:140670"
+CONTIG      AC011224.27:1..116977
+//
+LOCUS       HUMBETGLOA              3002 bp    DNA     linear   PRI 26-AUG-1994
+DEFINITION  Human haplotype C4 beta-globin gene, complete cds.
+ACCESSION   L26462
+VERSION     L26462.1  GI:432453
+KEYWORDS    beta-globin.
+SOURCE      Homo sapiens DNA.
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Primates; Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 3002)
+  AUTHORS   Fullerton,S.M., Harding,R.M., Boyce,A.J. and Clegg,J.B.
+  TITLE     Molecular and population genetic analysis of allelic sequence
+            diversity at the human beta-globin locus
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 91 (5), 1805-1809 (1994)
+  MEDLINE   94173918
+   PUBMED   7907422
+FEATURES             Location/Qualifiers
+     source          1..3002
+                     /organism="Homo sapiens"
+                     /db_xref="taxon:9606"
+                     /haplotype="C4"
+                     /note="sequence found in a Melanesian population"
+     variation       111
+                     /replace="t"
+     variation       263
+                     /note="Rsa I polymorphism"
+                     /replace="t"
+     variation       273
+                     /replace="c"
+     variation       286..287
+                     /note="2 bp insertion of AT"
+                     /replace=""
+     variation       288
+                     /replace="t"
+     variation       295..296
+                     /note="1 bp deletion of C or 2 bp deletion of CT"
+                     /replace=""
+     variation       347
+                     /replace="c"
+     variation       476
+                     /replace="t"
+     variation       500
+                     /replace="c"
+     CDS             join(866..957,1088..1310,2161..2289)
+                     /codon_start=1
+                     /product="beta-globin"
+                     /protein_id="AAA21100.1"
+                     /db_xref="GI:532506"
+                     /translation="MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFE
+                     SFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPE
+                     NFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH"
+     exon            <866..957
+                     /number=1
+     variation       874
+                     /replace="c"
+     intron          958..1087
+                     /number=1
+     exon            1088..1310
+                     /number=2
+     intron          1311..2160
+                     /number=2
+     variation       1326
+                     /note="Ava II polymorphism"
+                     /replace="g"
+     variation       1384
+                     /replace="g"
+     variation       1391
+                     /replace="t"
+     variation       1976
+                     /replace="t"
+     exon            2161..>2289
+                     /number=3
+     variation       2522
+                     /replace="c"
+     variation       2602
+                     /replace="a"
+     variation       2604
+                     /replace="c"
+     variation       2760
+                     /note="Hinf I polymorphism"
+                     /replace="t"
+     variation       2913
+                     /replace="g"
+BASE COUNT      810 a    601 c    599 g    992 t

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.interpro
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.interpro	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.interpro	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,241 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<interpro_matches>
+
+   <protein id="AAA02489" length="1068" crc64="2BC49DB46BD50A75" >
+	<interpro id="IPR002719" name="Retinoblastoma-associated protein, B-box" type="Domain" parent_id="IPR011028">
+	  <match id="PF01857" name="RB_B" dbname="PFAM">
+	    <location start="786" end="944" score="2.2e-111" status="T" evidence="HMMPfam" />
+	  </match>
+	</interpro>
+	<interpro id="IPR002720" name="Retinoblastoma-associated protein, A-box" type="Domain" parent_id="IPR011028">
+	  <match id="PF01858" name="RB_A" dbname="PFAM">
+	    <location start="385" end="578" score="2e-120" status="T" evidence="HMMPfam" />
+	  </match>
+	</interpro>
+	<interpro id="IPR006670" name="Cyclin" type="Domain" parent_id="IPR011028">
+	  <found_in>
+	    <rel_ref ipr_ref="IPR005258"/>
+	    <rel_ref ipr_ref="IPR000812"/>
+	 </found_in>
+	  <contains>
+	    <rel_ref ipr_ref="IPR006671"/>
+	  </contains>
+	  <match id="SM00385" name="CYCLIN" dbname="SMART">
+	    <location start="801" end="910" score="8.1e-06" status="T" evidence="HMMSmart" />
+	  </match>
+	</interpro>
+	<interpro id="IPR011028" name="Cyclin-like" type="Domain">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR002719"/>
+	    <rel_ref ipr_ref="IPR002720"/>
+	    <rel_ref ipr_ref="IPR006670"/>
+	    <rel_ref ipr_ref="IPR006671"/>
+	    <rel_ref ipr_ref="IPR004367"/>
+	  </child_list>
+	  <found_in>
+	    <rel_ref ipr_ref="IPR005258"/>
+	    <rel_ref ipr_ref="IPR008721"/>
+	    <rel_ref ipr_ref="IPR004944"/>
+	    <rel_ref ipr_ref="IPR000812"/>
+	 </found_in>
+	  <match id="SSF47954" name="Cyclin-like" dbname="SUPERFAMILY">
+	    <location start="392" end="582" score="5.3e-69" status="T" evidence="superfamily" />
+	    <location start="786" end="969" score="1.2e-43" status="T" evidence="superfamily" />
+	    <location start="14" end="129" score="2.9e-05" status="T" evidence="superfamily" />
+	  </match>
+	</interpro>
+	  <match id="seg" name="seg" dbname="SEG">
+	    <location start="8" end="20" status="?" evidence="Seg" />
+	    <location start="609" end="618" status="?" evidence="Seg" />
+	  </match>
+   </protein>
+   <protein id="AAA02501" length="2221" crc64="2281A7271A5F1BBD" >
+	<interpro id="IPR001682" name="Ca2+/Na+ channel, pore region" type="Family" parent_id="IPR005820">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR001696"/>
+	    <rel_ref ipr_ref="IPR002077"/>
+	  </child_list>
+	  <contains>
+	    <rel_ref ipr_ref="IPR002111"/>
+	  </contains>
+	  <match id="PS50273" name="CHANNEL_PORE_CA_NA" dbname="PROFILE">
+	    <location start="234" end="408" score="16.410" status="T" evidence="ProfileScan" />
+	    <location start="617" end="756" score="17.689" status="T" evidence="ProfileScan" />
+	    <location start="1015" end="1189" score="14.354" status="T" evidence="ProfileScan" />
+	    <location start="1374" end="1527" score="52.450" status="T" evidence="ProfileScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR002077" name="Ca2+ channel, alpha subunit" type="Family" parent_id="IPR001682">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR005445"/>
+	    <rel_ref ipr_ref="IPR005446"/>
+	    <rel_ref ipr_ref="IPR005447"/>
+	    <rel_ref ipr_ref="IPR005448"/>
+	    <rel_ref ipr_ref="IPR005449"/>
+	  </child_list>
+	  <contains>
+	    <rel_ref ipr_ref="IPR002111"/>
+	  </contains>
+	  <match id="PR00167" name="CACHANNEL" dbname="PRINTS">
+	    <location start="255" end="270" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="366" end="383" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="405" end="429" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="687" end="713" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="1187" end="1207" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="1397" end="1411" score="9.3e-75" status="T" evidence="FPrintScan" />
+	    <location start="1445" end="1457" score="9.3e-75" status="T" evidence="FPrintScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR002111" name="Cation (not K+) channel, TM region" type="Domain">
+	  <found_in>
+	    <rel_ref ipr_ref="IPR000434"/>
+	    <rel_ref ipr_ref="IPR002077"/>
+	    <rel_ref ipr_ref="IPR002153"/>
+	    <rel_ref ipr_ref="IPR005445"/>
+	    <rel_ref ipr_ref="IPR005446"/>
+	    <rel_ref ipr_ref="IPR005447"/>
+	    <rel_ref ipr_ref="IPR005448"/>
+	    <rel_ref ipr_ref="IPR005449"/>
+	    <rel_ref ipr_ref="IPR005450"/>
+	    <rel_ref ipr_ref="IPR005451"/>
+	    <rel_ref ipr_ref="IPR005452"/>
+	    <rel_ref ipr_ref="IPR005457"/>
+	    <rel_ref ipr_ref="IPR005458"/>
+	    <rel_ref ipr_ref="IPR005459"/>
+	    <rel_ref ipr_ref="IPR005461"/>
+	    <rel_ref ipr_ref="IPR005462"/>
+	    <rel_ref ipr_ref="IPR005463"/>
+	    <rel_ref ipr_ref="IPR008051"/>
+	    <rel_ref ipr_ref="IPR008052"/>
+	    <rel_ref ipr_ref="IPR008053"/>
+	    <rel_ref ipr_ref="IPR008054"/>
+	    <rel_ref ipr_ref="IPR005820"/>
+	    <rel_ref ipr_ref="IPR005821"/>
+	    <rel_ref ipr_ref="IPR003915"/>
+	    <rel_ref ipr_ref="IPR001682"/>
+	    <rel_ref ipr_ref="IPR001696"/>
+	    <rel_ref ipr_ref="IPR008344"/>
+	    <rel_ref ipr_ref="IPR008345"/>
+	    <rel_ref ipr_ref="IPR008346"/>
+	    <rel_ref ipr_ref="IPR008347"/>
+	 </found_in>
+	  <match id="PS50272" name="CATION_CHANNEL_TRPL" dbname="PROFILE">
+	    <location start="618" end="756" score="10.675" status="T" evidence="ProfileScan" />
+	    <location start="1310" end="1527" score="11.101" status="T" evidence="ProfileScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR005446" name="L-type voltage-dependent calcium channel alpha 1 subunit" type="Family" parent_id="IPR002077">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR005450"/>
+	    <rel_ref ipr_ref="IPR005451"/>
+	    <rel_ref ipr_ref="IPR005452"/>
+	  </child_list>
+	  <contains>
+	    <rel_ref ipr_ref="IPR002111"/>
+	  </contains>
+	  <match id="PR01630" name="LVDCCALPHA1" dbname="PRINTS">
+	    <location start="152" end="170" score="4.2e-49" status="T" evidence="FPrintScan" />
+	    <location start="203" end="214" score="4.2e-49" status="T" evidence="FPrintScan" />
+	    <location start="414" end="427" score="4.2e-49" status="T" evidence="FPrintScan" />
+	    <location start="501" end="519" score="4.2e-49" status="T" evidence="FPrintScan" />
+	    <location start="590" end="605" score="4.2e-49" status="T" evidence="FPrintScan" />
+	    <location start="678" end="691" score="4.2e-49" status="T" evidence="FPrintScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR005451" name="L-type voltage-dependent calcium channel alpha 1C subunit" type="Family" parent_id="IPR005446">
+	  <contains>
+	    <rel_ref ipr_ref="IPR002111"/>
+	    <rel_ref ipr_ref="IPR010983"/>
+	  </contains>
+	  <match id="PR01635" name="LVDCCALPHA1C" dbname="PRINTS">
+	    <location start="17" end="32" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="33" end="48" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="64" end="80" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="788" end="807" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="808" end="827" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="828" end="844" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1480" end="1494" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1727" end="1739" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1788" end="1806" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1807" end="1823" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1929" end="1949" score="3.6e-116" status="T" evidence="FPrintScan" />
+	    <location start="1994" end="2004" score="3.6e-116" status="T" evidence="FPrintScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR005820" name="Cation channel, non-ligand gated" type="Family" parent_id="IPR005821">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR001682"/>
+	    <rel_ref ipr_ref="IPR003091"/>
+	  </child_list>
+	  <contains>
+	    <rel_ref ipr_ref="IPR001622"/>
+	    <rel_ref ipr_ref="IPR002111"/>
+	  </contains>
+	  <match id="PS50266" name="CATION_CHANNEL_TM" dbname="PROFILE">
+	    <location start="124" end="408" score="28.433" status="T" evidence="ProfileScan" />
+	    <location start="524" end="756" score="24.586" status="T" evidence="ProfileScan" />
+	    <location start="900" end="1189" score="26.324" status="T" evidence="ProfileScan" />
+	    <location start="1239" end="1527" score="30.008" status="T" evidence="ProfileScan" />
+	  </match>
+	</interpro>
+	<interpro id="IPR005821" name="Ion transport protein" type="Family">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR004729"/>
+	    <rel_ref ipr_ref="IPR005820"/>
+	    <rel_ref ipr_ref="IPR003280"/>
+	  </child_list>
+	  <contains>
+	    <rel_ref ipr_ref="IPR001622"/>
+	    <rel_ref ipr_ref="IPR002110"/>
+	    <rel_ref ipr_ref="IPR002111"/>
+	  </contains>
+	  <match id="PF00520" name="Ion_trans" dbname="PFAM">
+	    <location start="158" end="404" score="1.2e-71" status="T" evidence="HMMPfam" />
+	    <location start="555" end="752" score="4.8e-58" status="T" evidence="HMMPfam" />
+	    <location start="952" end="1185" score="5.2e-66" status="T" evidence="HMMPfam" />
+	    <location start="1270" end="1523" score="1.8e-70" status="T" evidence="HMMPfam" />
+	  </match>
+	</interpro>
+	<interpro id="IPR010983" name="EF-Hand-like" type="Domain">
+	  <child_list>
+	    <rel_ref ipr_ref="IPR000261"/>
+	    <rel_ref ipr_ref="IPR001751"/>
+	    <rel_ref ipr_ref="IPR002048"/>
+	  </child_list>
+	  <found_in>
+	    <rel_ref ipr_ref="IPR005359"/>
+	    <rel_ref ipr_ref="IPR005451"/>
+	    <rel_ref ipr_ref="IPR007736"/>
+	    <rel_ref ipr_ref="IPR008080"/>
+	    <rel_ref ipr_ref="IPR003299"/>
+	    <rel_ref ipr_ref="IPR001125"/>
+	    <rel_ref ipr_ref="IPR001192"/>
+	    <rel_ref ipr_ref="IPR005176"/>
+	    <rel_ref ipr_ref="IPR003153"/>
+	 </found_in>
+	  <match id="SSF47473" name="EF-hand" dbname="SUPERFAMILY">
+	    <location start="1526" end="1670" score="3.6e-05" status="T" evidence="superfamily" />
+	  </match>
+	</interpro>
+	  <match id="coil" name="coiled-coil" dbname="COIL">
+	    <location start="756" end="783" status="?" evidence="Coil" />
+	    <location start="1619" end="1640" status="?" evidence="Coil" />
+	  </match>
+	  <match id="seg" name="seg" dbname="SEG">
+	    <location start="26" end="39" status="?" evidence="Seg" />
+	    <location start="504" end="518" status="?" evidence="Seg" />
+	    <location start="635" end="648" status="?" evidence="Seg" />
+	    <location start="650" end="668" status="?" evidence="Seg" />
+	    <location start="767" end="777" status="?" evidence="Seg" />
+	    <location start="798" end="809" status="?" evidence="Seg" />
+	    <location start="838" end="857" status="?" evidence="Seg" />
+	    <location start="905" end="917" status="?" evidence="Seg" />
+	    <location start="994" end="1003" status="?" evidence="Seg" />
+	    <location start="1013" end="1027" status="?" evidence="Seg" />
+	    <location start="1162" end="1181" status="?" evidence="Seg" />
+	    <location start="1812" end="1822" status="?" evidence="Seg" />
+	    <location start="2027" end="2042" status="?" evidence="Seg" />
+	    <location start="2084" end="2093" status="?" evidence="Seg" />
+	  </match>
+   </protein>
+</interpro_matches>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.lasergene
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.lasergene	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.lasergene	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,24 @@
+Created: Jueves, 08 de Junio de 2006 01:56 p.m.
+
+This is a test sequence created with EditSeq (Lasergene's DNAStar)
+
+^^
+ATCGATCGATCG
+TCGATCGATCGA
+CGATCGATCGTT
+^^
+>gi|105633134|gb|DW246643.1|DW246643 JC3_E04 Forward JC Glycine max cDNA 5', mRNA sequence
+^^
+ATACACCTCTTTCTCATTGAACATTGGAATCTGGACAAGCACGACGGGGTAATTAGAGTTGCCGAGTTCCTCGTCGTCTTGGAGTGGCTCGAACTTGTAGCGCTGGTGTGGCTTCTTCCAGAAGAGCTTGACGAGGATGATGACAATGCCCATGTAGACCCTCTCCATGAAGAGCATGAGCGCCATGGCGAGGCTAATGC
+^^
+Sample protein sequence in Lasergene format
+ALEU_HORVU
+
+
+^^
+MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
+SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
+AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
+GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
+QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
+IATCASYPVVAA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.mase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.mase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.mase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,298 @@
+;; saved by seaview on Wed Jan  7 11:14:33 1998
+;;# of segments=14 all seqs
+;; 25,25 27,27 30,30 96,96 98,98 117,117 149,149 160,160 164,164 173,173
+;; 258,258 282,282 289,289 291,291
+;no comment
+AK1H_ECOLI/114-431
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+;no comment
+AKH_HAEIN/114-431
+-----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQ
+LLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNG
+SDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLP
+TLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESL
+Q----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS
+---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQ
+AKGIAARF------FSALAQANISIVAIA
+;no comment
+AKH1_MAIZE/117-440
+-----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREV
+LVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDG
+SDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILS
+TLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENG
+DLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS
+---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMAS
+TPGVSATL------FDALAKANINVRAIA
+;no comment
+AK2H_ECOLI/112-431
+-----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAR
+EFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNG
+SDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLP
+LLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARI
+VTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSE
+VADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQ
+PVEFTWQSDDGISLVAVL
+;no comment
+AK1_BACSU/66-374
+-----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAG
+FLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGG
+SDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLP
+VVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDV
+FERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIV
+YTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI
+------VSALSEKEIPILQSA
+;no comment
+AK2_BACST/63-370
+-----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAG
+ITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGG
+SDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIK
+EISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHL
+IVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA----
+----------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEG
+ADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS
+;no comment
+AK2_BACSU/63-373
+-----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAG
+IRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGG
+SDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLE
+GISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNL
+IVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ----
+----------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEF
+EKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS
+;no comment
+AKAB_CORFL/63-379
+-----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAG
+VLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGG
+SDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLE
+KLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEA
+VLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV----
+----------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNW
+TNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS
+;no comment
+AKAB_MYCSM/63-379
+-----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAG
+VITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGG
+SDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLD
+TVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDA
+ILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI----
+----------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGF
+SQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS
+;no comment
+AK3_ECOLI/106-407
+-----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRK
+VMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGG
+SDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRID
+EIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRA
+LAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVA
+LTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG-------
+---NDLSKACGVGKEVF
+;no comment
+AK_YEAST/134-472
+-----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPS
+DFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGY
+TDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLD
+SVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGEST
+PPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTI
+LDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVG
+KHMKQYIGIAG---TMFTTLAEEGINIEMIS
+;no comment
+PROB_BACSU/1-278
+-------------------MKKQRIVVKIGSSSLTNSKGS------------IDEQNQRA
+CSAISVLKKAG---------------------HEMILITS-----------GAVAAGFSS
+LGYPSRP---------VTIKGKQAAAAVGQTLLMQQYMNQFKQYSLTPGQILLTRNDFSK
+RERYRNAYA---TIMELLERGVIPIINENDSTSVEELTFGDNDMLSALVSGLIHADQLMI
+LTDINGLYDANPNEILSERFDYLPEITPELL-GYAGSAGSKVGTGGMKSKLLATQTALSL
+GVKVFIGTGSGEQKLADILDGRGDG-TYIGDKELSSVN-NT------RQWIQFHSPISGE
+II
+;no comment
+PROB_CORGL/1-288
+--------------MRERISNAKRVVVKIGSSSLTNDEDG------------HTVDPN-R
+INTIVNALQARMEAG-----------------SDLIVVSS-----------GAVAAGMAP
+LGLSTRP---------TELAVKQAAAAVGQVHLMHQWGRSFARYGRPIGQVLLTAADAGK
+RDRARNAQR---TIDKLRILGAVPIVNENDTVATTGVNFGDNDRLAAIVAHLVSADALVL
+LSDVD----------GLFDKNPTDPTAKFISEVRDGNDLKGVIAGDGGKVGTGGMASKVS
+AARLASRSG--VPVLLTSAANIGPALEDAQVGTVFHPKDNRLSAWKFWALYA------AD
+TAGKIRLDDGAVE
+;no comment
+PROB_ECOLI/1-253
+------------------MSDSQTLVVKLGTSVLTGGSRR-----LNRAHI----VEL--
+VRQCAQLHAAGHR---------------------IVIVTS-----------GAIAAGREH
+LGYPELP---------ATIASKQLLAAVGQSRLIQLWEQLFSIYGIHVGQMLLTRADMED
+RERFLNARD---TLRALLDNNIVPVINENDAVATAAIKVGDNDNLSALAAILAGADKLLL
+LTDQK----------GLYTADPRSNPQAE-----LIKDVYGIDDALRAIAGDSVSGLGTG
+----------GMSTKLQAADVACRAGID-TIIAAGSKP-------------GVIGDVMEG
+ISVGT
+;no comment
+PROB_HAEIN/1-243
+-------------------MNKKTIVVKFGTSTLTQGSPK-----LNSPHM----MEI--
+VRQIAQLHNDGFR---------------------IVIVTS-----------GAIAAGRHY
+LNHPQLP---------PTIASKQLLAAVGQSQLIQAWEKLFAIYDIHIGQLLLTRADIED
+RERFLNARD---TLYALLDNHIIPVINENDAVATAEIKVGDNDNLSALVAILVQAEQLYL
+LTDQQ----------GLFDSDPRKNPEAK-----LIPVVEQITDHIRSIAGGSGTNLGTG
+----------GMMTKIIAADVATRSGIE-TIIAPGNRP-------------NVIAD----
+-----
+;no comment
+PROB_YEAST/1-264
+-------------MKDANESKSYTIVIKLGSSSLVDEKTKE----PKLAIM----SLI--
+VETVVKLRRMGHK---------------------VIIVSS-----------GGIAVGLRT
+MRMNKRP---------KHLAEVQAIAAIGQGRLIGRWDLLFSQFDQRIAQILLTRNDILD
+WTQYKNAQN---TINELLNMGVIPIVNENDTLSVREIKFGDNDTLSAITSALIHADYLFL
+LTDVD----------CLYTDNPRTNPDAM-----PILVVPDLSKGLPGVNTAGGSGSDVG
+TG----------GMETKLVAADLATNAGVH-TLIMKSDTPAN----------IGRIVEYM
+QTLELDD
+;no comment
+YHJ3_YEAST/1-259
+------------------MTKAYTIVIKLGSSSLVDESTKE----PKLSIM----TLI--
+VETVTNLKRMGHK---------------------VIIVSS-----------GGIAVGLDA
+LNIPHKP---------KQLSEVQAIAAVGQGRLIARWNMLFSQYGEQTAQILLTRNDILR
+WNQYNNARN---TINELLAMGVIPIVNENDTLSISEIEFGDNDTLSAITAALVGADFLFL
+LTDVD----------CLYTDNPRTNPDAR-----PIVLVPELSEGLPGVNTSSGSGSEVG
+TG----------GMRTKLIAADLASNAGIE-TIVMKSDRPEY----------VPKIVDYI
+QHHFRPP
+;no comment
+PYRH_ECOLI/1-226
+-------------------PVYKRILLKLSGEALQGTEGFGIDASILDR----------M
+AQEIKE--------------------LVELGIQVGVVIGGG----NLFRGAGLAKAGMNR
+VVGDHMGMLATVMNGLAMRDALHRAYVNARLMSAIPLNGVCDSYSWAEAISLL-----RN
+NRVVI------------LSAGTGNPFFTTDSAACLRGIEIEADVVLKATKVDGVFTA-DP
+AKD---------PTATMYEQLTYSEVLEKELKVMDLAAFTLARDHKLPIRVF-NMNKPGA
+LRRVVMG
+;no comment
+PYRH_HAEIN/1-229
+----------------MSQPIYKRILLKLSGEALQGEDGLGIDPAILDR----------M
+AVEIKE--------------------LVEMGVEVSVVLGGG----NLFRGAKLAKAGMNR
+VVGDHMGMLATVMNGLAMRDSLFRADVNAKLMSAFQLNGICDTYNWSEAIKML-----RE
+KRVVI------------FSAGTGNPFFTTDSTACLRGIEIEADVVLKATKVDGVYDC-DP
+AKN---------PDAKLYKNLSYAEVIDKELKVMDLSAFTLARDHGMPIRVF-NMGKPGA
+LRQVVTG
+;no comment
+PYRH_MYCTU/1-237
+P-------ASTGAASAAQLSGYSRVLLKLGGEMFGGGQ-VGLDPDVVAQ----------V
+ARQIAD--------------------VVRGGVQIAVVIGGG----NFFRGAQLQQLGMER
+TRSDYMGMLGTVMNSLALQDFLEKEGIVTRVQTAITMGQVAEPYLPLRAVRHL-----EK
+GRVVI------------FGAGMGLPYFSTDTTAAQRALEIGADVVLMAKAVDGVFAE-DP
+RVN---------PEAELLTAVSHREVLDRGLRVADATAFSLCMDNGMPILVF-NLLTDGN
+IARAVRG
+;no comment
+P5C1_ARATH/320-639
+ILLDIADALEANVTTIKAENELDVASAQEAG--LEESMVARLVMTPG-KISSLAASVRKL
+AD-MEDPIGRVL-KKTEVADGLVLEK-TSSPLGVLLIVFESRPDALVQIASLAIRSGNGL
+LLKGGKE-----------------ARRSNAILHKVITDAIPETVG--GKLIGLVTS--RE
+EIPDL-----------------LKLDDVIDLVI-PRGSNKLVTQIKNTTK-IPVLGHADG
+I-------------CHVYVDKACDTDMAKRIVSDAKLDYPAACNAMETLLVHKDLEQNAV
+LNELIF--ALQSNGVTLYGGPRASKILNIP-------EARSFNHEYCAKACTVEVVED-V
+YGAIDHIHRHGSAH------TDCIVTEDHEVAELF
+;no comment
+P5CS_VIGAC/321-640
+ILLKIADALEANEKIIRIENEADVTAAQEAG--YEKSLVARLALKPG-KIASLANNMRII
+AN-MEDPIGRVL-KRTELSDGLILEK-TSSPLGVLLIVFESRPDALVQIASLAIRSGNGL
+LLKGGKE-----------------AKRSNAILHKVIIEAIPDNVG--GKLIGLVTS--RE
+EIPEL-----------------LKLDDVIDLVI-PRGSNKLVSQIKSSTK-IPVLGHADG
+I-------------CHVYVDKSANVEMAKRIVLDAKVDYPAACNAMETLLIHKDLIEKGW
+LKEIIL--DLRTEGVILYGGPVASSLLNIP-------QAHSFHHEYSSLACTAEIVDD-V
+YAAIDHINLYGSAH------TDSIVAEDNEVANVF
+;no comment
+PROA_SYNY3/1-318
+----IADGLTAAMPEILAANQEDCAAAEAMG--IAKPLYNRLLLGES-KLKSTIAGVKDV
+EH-LPDPLGQVT-LHRQLDEGLVLKR-VGCPLGVLGVIFEARPEALIQISSLAIKSGNAV
+ILKGGRE-----------------ATRSCQVLTEVIQTALAKTVV-SPEAINLLTT--RE
+EIREL-----------------LGLNQYVDLII-PRGSNEFVQYIQQNTQ-IPVLGHADG
+I-------------CHLYLDAQADLSKAIPITVDAKTQYPAACNAIETLLVHQAIAAEFL
+PPLAQ--ALGEKGVSLRGDSGTQKLIDCEP-----ATEADWCTEYSDLILSIKIVDS-LE
+AAIDHINQYGSKH------TDGIISEDLTAAEQF
+;no comment
+PROA_YEAST/1-343
+ILYKIHDALKANAHAIEEANKIDLAVAKETG--LADSLLKRLDLFKGDKFEVMLQGIKDV
+AE-LEDPVGKVK-MARELDDGLTLYQ-VTAPVGVLLVIFESRPEVIANITALSIKSGNAA
+ILKGGKE-------------SVNTFREMAKIVNDTIAQFQSETGV-PVGSVQLIET--RQ
+DVSDL-----------------LDQDEYIDLVV-PRGSNALVRKIKDTTK-IPVLGHADG
+I-------------CSIYLDEDADLIKAKRISLDAKTNYPAGCNAMETLLINPKFSKWWE
+VLENLTLEGGVTIHATKDLKT--AYFDKLNELGKLTEAIQCKTVDA-----DEEQDFDKE
+FLSLDLAAKFVTS-TESAIQHINTHSSRH------TDAIVTENKANAEKF
+;no comment
+P5CS_CAEEL/305-630
+MVRHLAALLVDKEKYIIEANQTDLANAKSAG--LDPQLLNRLKMTPE-KIQDLHAGLNTI
+ADSAETLVGRVL-KKVKISEGLFLEQ-VTVPIGSLMVIFESRPDCLPQVASLAMASGNAL
+LLKGGKE-----------------AEESNKALHALVQEALGTHGFEMRDAVTLVRS--RE
+DVADL-----------------LQLKDLIDLII-PRGSSDLVRSMQEKSKGIPVLGHAEG
+V-------------WHVYIDKDCDEQKAIQIVRDSKCDYPSACNAAETILIHKDLA----
+-----------TAPFFDSLCS--MFKAEGVKLHAGPKLAALLKFAP-----PPAESMSFE
+YGSLECTLEVVDN-VEEAVAHIIRYGSGH------TESIITENTNTAEHF
+;no comment
+P5CS_HUMAN/323-645
+IIHHLADLLTDQRDEILLANKKDLEEAEGR---LAAPLLKRLSLSTS-KLNSLAIGLRQI
+AASSQDSVGRVL-RRTRIAKNLELEQ-VTVPIGVLLVIFESRPDCPTPGGSFAIASGNGL
+LLKGGKE-----------------AAHSNRILHLLTQEALSIHGV--KEAVQLVNT--RE
+EVEDL-----------------CRLDKMIDLII-PRGSSQLVRDIQKAAKGIPVMGHSEG
+I-------------CHMYVDSEASVDKVTRLVRDSKCEYPAACNALETLLIHRDLL----
+-----------RTPLFDQIID--MLRVEQVKIHAGPKFASYLTFSP-----SEVKSLRTE
+YGDLELCIEVVDN-VQDAIDHIHKYGSSH------TDVIVTEDENTAEFF
+;no comment
+PROA_BACSU/1-308
+-----------------AENAKDIVNGKENG--LTPDIIDRLSLDEK-RIRDIADAVELL
+ID-LADPIGDSL-ETIEKENGLFIQK-IRVPLGVVGMIYEARPNVTVDAATLCLKTGNAV
+VLRGSSS-----------------AIHSNKALVSVIYRALEQSAL-PIHTVQLIEDTSRE
+TAKEL-----------------FTLNDGLDVLI-PRGGKKLIDLVVREST-VPVLETGAG
+N-------------CHIFIDETAKPQMAEKVVVNAKTQRPSVCNAIESLLIHKAWA----
+------------RQNGKELLD--QLENAGVEIRG--DELVCELHP--SSKQASKEDWETE
+FLAPVLSVKTVEN-VQEAVKHIQQYGTNH------SEAILTENDKNAVYF
+;no comment
+PROA_CORGL/6-332
+ILRAAADELVARSAEIIEANASDIEAGRANG--MEESMIDRLALDES-RIEGIAGGLRQV
+AG-LTDPVGEVL-RGHVMENGIQMKQ-VRVPLGVMGMVYEARPNVTVDAFALALKSGNVA
+FVRGSST-----------------AVHSNTKLVEILQDVLERFEL-PRETVQLLPCQTRG
+SVQDL-----------------ITARGLVDVVI-PRGGAGLINAVVTGAT-VPTIETGTG
+N-------------CHFYIDAEAKLGQAIAMVINGKTRRCSVCNATETALLDAALS----
+------------DSDKLAVVQ--ALQEAGVTIHGRVAELEAFGAT--DVVEATETDWDSE
+YLSFDIAVAVVDG-VDGALAHIAKYSTKH------TEAIATQNIETAQRF
+;no comment
+PROA_ECOLI/1-328
+-LEKIADELEAQSEIILNANAQDVADARANG--LSEAMLDRLALTPA-RLKGIADDVRQV
+CN-LADPVGQVI-DGGVLDSGLRLER-RRVPLGVIGVIYEARPNVTVDVASLCLKTGNAV
+ILRGGKE-----------------TCRTNAATVAVIQDALKSCGL-PAGAVQAIDNPDRA
+LVSEM-----------------LRMDKYIDMLI-PRGGAGLHKLCREQST-IPVITGGIG
+V-------------CHIYVDESVEIAEALKVIVNAKTQRPSTCNTVETLLVNKNIA----
+------------DSFLPALSK--QMAESGVTLHADAAALAQLQAGPAKVVAVKAEEYDDE
+FLSLDLNVKIVSD-LDDAIAHIREHGTQH------SDAILTRDMRNAQRF
+;no comment
+PROA_SERMA/1-325
+----MADRLEANSEAILLANEQDMAQARATG--MSEALLDRLLLTPA-RLAAIANDVRQV
+CR-LNDPVGHVL-DGNLLDSGLKLER-RRVPLGVIGVIYEARPNVTIDVASLCLKTGNAV
+ILRGGKE-----------------THNTNQATVKVIQQALEQCGL-PAAAVQAIDSPDRA
+LVNEL-----------------LRLDRYVDMLI-PRGGAGLHKLCREQST-IPVITGGIG
+V-------------CHTYVDADVDFDKALTVIENAKIQRPSACNSLETLLVNRSIA----
+------------AEFLPALSA--KMAAAGVTLHAAENALPLLQGGPATVVPVNAEDYDDE
+WLSLDLNVLLVDD-IDQAIDHIRTHGTNH------SDAILTRSLSSAEHF
+;no comment
+PROA_HAEIN/1-314
+---------------ILAENAKDIELAKQNG--LSDALIDRLLLTQE-RLQGIANDVRHV
+IS-LADPVGKII-DGGTLDSGLKIER-VRTPLGVIGTIYEARPNVTIDVASLCLKTGNAV
+ILRGGKE-----------------TQFSNKILIEVVQNALEQAGL-PKFAVQAITDPNRE
+LVMQL-----------------LKLDRYVDMII-PRGGSGLHELCKQHST-IPVIVGGVG
+V-------------CHTFVEKSADQNKAIFVIDNAKTQRPSTCNTLETLLVQHSIA----
+------------EEFLPKLVS--HLSAKNVKYHAKSTALNILKQAGANVCEVTEKELRKE
+WGSLDLNVVVVED-IHAAIEHIRQYGTQH------SESILTSSQSLARQF
+;no comment
+PROA_THETH/1-320
+----MADLLEARWEEVLRANREDLEEAERTG--LPKAKLDRLALKEK-DLKTLTEGLRQI
+AR-LPDPLGRIE-GLAKRPNGLRVGR-MRVPLGLIGFIYEARPGATVEAVSVALKAGNAM
+LLRGGKE-----------------AFRSNRALVALWHEALEEAGL-PEEAVTLVPTTDRE
+AVLEM-----------------CRLE-LLDLLI-PRGGEELIRLVQQEAR-VPVLAHAKG
+V-------------NHLYVDEKADLSMALRLALNGKTQRPAVCNALEAVLVHEKVA----
+------------EAFLPRLEK--AMREKGVELRACPRALPLLKEA----VPAREDEWDRE
+YLDLVLRVKVVSG-LEEALAHIARYGSRH------TEAICTEDPKAAWRF
+;no comment
+PROA_CAMJE/1-281
+------------RILALCEGLEKIAYIEDP--------------------IGKISKGWKN
+Y-------------------AGLNIQKISIPLGLICVIYEARPSLSAEIAALMIKSSNAC
+VFKGGSE-----------------AKFTNEAIFTLVNKVLKEFDL--QDCFAMFTQ--RD
+EILQI-----------------LAFDDLIDVII-PRGSSNMIQEIANNTK-IPLIKQNKG
+L-------------CHAFVDQSANLDMALKIILNAKCQRVSVCNALETLLIHEKIAKN--
+--------------FISLLIP--EFEKFKVKIHAHENTLAYFNNSNLEIFKANENTFDTE
+WLDFALSVKLVKD-CDEAIEHINKHSSLH------SETIISNDASNIAKF

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.meme
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.meme	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.meme	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,486 @@
+********************************************************************************
+MEME - Motif discovery tool
+********************************************************************************
+MEME version 3.0 (Release date: 2001/03/05 14:24:28)
+
+For further information on how to interpret these results or to get
+a copy of the MEME software please access http://meme.sdsc.edu.
+
+This file may be used as input to the MAST algorithm for searching
+sequence databases for matches to groups of motifs.  MAST is available
+for interactive use and downloading at http://meme.sdsc.edu.
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey and Charles Elkan,
+"Fitting a mixture model by expectation maximization to discover
+motifs in biopolymers", Proceedings of the Second International
+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
+AAAI Press, Menlo Park, California, 1994.
+********************************************************************************
+
+
+********************************************************************************
+TRAINING SET
+********************************************************************************
+DATAFILE= D10Mit194.set.genbank.fasta.nref
+ALPHABET= ACGT
+Sequence name           Weight Length  Sequence name           Weight Length  
+-------------           ------ ------  -------------           ------ ------  
+20218                   1.0000   2000  10657                   1.0000   2000  
+83796                   1.0000   2000  6603                    1.0000   2000  
+********************************************************************************
+
+********************************************************************************
+COMMAND LINE SUMMARY
+********************************************************************************
+This information can also be useful in the event you wish to report a
+problem with the MEME software.
+
+command: meme D10Mit194.set.genbank.fasta.nref -dna -print_fasta -nmotifs 3 -maxw 25 
+
+model:  mod=         zoops    nmotifs=         3    evt=           inf
+object function=  E-value of product of p-values
+width:  minw=            8    maxw=           25    minic=        0.00
+width:  wg=             11    ws=              1    endgaps=       yes
+nsites: minsites=        2    maxsites=        4    wnsites=       0.8
+theta:  prob=            1    spmap=         uni    spfuzz=        0.5
+em:     prior=   dirichlet    b=            0.01    maxiter=        50
+        distance=    1e-05
+data:   n=            8000    N=               4
+strands: +
+sample: seed=            0    seqfrac=         1
+Letter frequencies in dataset:
+A 0.255 C 0.236 G 0.252 T 0.257 
+Background letter frequencies (from dataset with add-one prior applied):
+A 0.255 C 0.236 G 0.252 T 0.257 
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  1	width =   25   sites =   4   llr = 106   E-value = 1.2e-002
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 1 Description
+--------------------------------------------------------------------------------
+Simplified        A  :8:88aaa:53a8:::3:a::3::3
+pos.-specific     C  a3::3:::::3:::5a8a:8a5aa8
+probability       G  ::8:::::3:5::::::::3:::::
+matrix            T  ::33::::85::3a5::::::3:::
+
+         bits    2.1 *              * *  * ** 
+                 1.9 *    ***   * * * ** * ** 
+                 1.7 *    ***   * * * ** * ** 
+                 1.5 *    ***   * * * ** * ** 
+Information      1.2 *********  *** ****** ***
+content          1.0 ********** ********** ***
+(38.1 bits)      0.8 ********** ********** ***
+                 0.6 ********** **************
+                 0.4 *************************
+                 0.2 *************************
+                 0.0 -------------------------
+
+Multilevel           CAGAAAAATAGAATCCCCACCCCCC
+consensus             CTTC   GTA T T A  G A  A
+sequence                       C          T   
+                                              
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                      Site         
+-------------             ----- ---------            -------------------------
+6603                       1311  2.59e-15 GGCGCATTGA CAGAAAAATTGAATTCCCACCCCCC AATGAGGAGG
+83796                      1284  2.59e-15 GGAGGATTGA CAGAAAAATTGAATTCCCACCCCCC AACGAGGAGG
+20218                       938  6.34e-12 TTTTTGGTAA CCTTAAAATAAAATCCCCACCACCA CTTTTAAAAA
+10657                      1685  8.70e-12 GGCCCGCGCG CAGACAAAGACATTCCACAGCTCCC GCCCCCTCCA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+6603                              2.6e-15  1310_[1]_665
+83796                             2.6e-15  1283_[1]_692
+20218                             6.3e-12  937_[1]_1038
+10657                             8.7e-12  1684_[1]_291
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 in FASTA format
+--------------------------------------------------------------------------------
+>6603                     pos 1311
+CAGAAAAATTGAATTCCCACCCCCC
+>83796                    pos 1284
+CAGAAAAATTGAATTCCCACCCCCC
+>20218                    pos  938
+CCTTAAAATAAAATCCCCACCACCA
+>10657                    pos 1685
+CAGACAAAGACATTCCACAGCTCCC
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 11.6849 E= 1.2e-002 
+  -865    208   -865   -865 
+   156      8   -865   -865 
+  -865   -865    157     -4 
+   156   -865   -865     -4 
+   156      8   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+   197   -865   -865   -865 
+  -865   -865     -1    154 
+    97   -865   -865     96 
+    -3      8     99   -865 
+   197   -865   -865   -865 
+   156   -865   -865     -4 
+  -865   -865   -865    196 
+  -865    108   -865     96 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+  -865    208   -865   -865 
+   197   -865   -865   -865 
+  -865    166     -1   -865 
+  -865    208   -865   -865 
+    -3    108   -865     -4 
+  -865    208   -865   -865 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 25 n= 7904 E= 1.2e-002 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.000635  0.000589  0.748759  0.250017 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.000589  0.250006  0.748770 
+ 0.499388  0.000589  0.000629  0.499393 
+ 0.250012  0.249966  0.499382  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.748765  0.000589  0.000629  0.250017 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.499343  0.000629  0.499393 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.499343  0.000629  0.250017 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 38.44 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  2	width =   25   sites =   4   llr = 101   E-value = 4.5e-001
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 2 Description
+--------------------------------------------------------------------------------
+Simplified        A  ::::a::383:5:3:::a33:83:a
+pos.-specific     C  a3a8:3a33:a::5::8::::383:
+probability       G  ::::::::::::::::3:38a::8:
+matrix            T  :8:3:8:5:8:5a3aa::5::::::
+
+         bits    2.1 * *   *   *         *    
+                 1.9 * * * *   * * ** *  *   *
+                 1.7 * * * *   * * ** *  *   *
+                 1.5 * * * *   * * ** *  *   *
+Information      1.2 ******* *** * **** ******
+content          1.0 ******* ***** **** ******
+(36.6 bits)      0.8 ******* ***** **** ******
+                 0.6 ******* ********** ******
+                 0.4 *************************
+                 0.2 *************************
+                 0.0 -------------------------
+
+Multilevel           CTCCATCTATCATCTTCATGGACGA
+consensus             C T C ACA T A  G AA CAC 
+sequence                    C     T    G      
+                                              
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                      Site         
+-------------             ----- ---------            -------------------------
+6603                       1000  1.62e-15 CGGGAACATG CTCCATCTATCATCTTCATGGACGA AATCGACTCC
+83796                       978  4.69e-15 CGAGAACATG CTCCATCCATCATCTTCATGGACGA GATTGACTCT
+20218                      1545  1.69e-11 TAGCTTCTCT CCCCATCAATCTTATTCAGAGCCCA CCCCTCCCCC
+10657                      1075  3.40e-11 AGGATCTGGT CTCTACCTCACTTTTTGAAGGAAGA AACACTTAAT
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+6603                              1.6e-15  999_[2]_976
+83796                             4.7e-15  977_[2]_998
+20218                             1.7e-11  1544_[2]_431
+10657                             3.4e-11  1074_[2]_901
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 in FASTA format
+--------------------------------------------------------------------------------
+>6603                     pos 1000
+CTCCATCTATCATCTTCATGGACGA
+>83796                    pos  978
+CTCCATCCATCATCTTCATGGACGA
+>20218                    pos 1545
+CCCCATCAATCTTATTCAGAGCCCA
+>10657                    pos 1075
+CTCTACCTCACTTTTTGAAGGAAGA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 10.9476 E= 4.5e-001 
+  -865    208   -865   -865 
+  -865      8   -865    154 
+  -865    208   -865   -865 
+  -865    166   -865     -4 
+   197   -865   -865   -865 
+  -865      8   -865    154 
+  -865    208   -865   -865 
+    -3      8   -865     96 
+   156      8   -865   -865 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+    97   -865   -865     96 
+  -865   -865   -865    196 
+    -3    108   -865     -4 
+  -865   -865   -865    196 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+   197   -865   -865   -865 
+    -3   -865     -1     96 
+    -3   -865    157   -865 
+  -865   -865    198   -865 
+   156      8   -865   -865 
+    -3    166   -865   -865 
+  -865      8    157   -865 
+   197   -865   -865   -865 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 25 n= 7904 E= 4.5e-001 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.249966  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.000635  0.249966  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.249966  0.000629  0.499393 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.499388  0.000589  0.000629  0.499393 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.250012  0.499343  0.000629  0.250017 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+ 0.250012  0.000589  0.250006  0.499393 
+ 0.250012  0.000589  0.748759  0.000640 
+ 0.000635  0.000589  0.998135  0.000640 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.000635  0.249966  0.748759  0.000640 
+ 0.998141  0.000589  0.000629  0.000640 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 78.29 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  3	width =   21   sites =   4   llr = 88   E-value = 4.8e-001
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 3 Description
+--------------------------------------------------------------------------------
+Simplified        A  :::58:::::3::::33::::
+pos.-specific     C  8:833::8:8:a:8a8:a5a8
+probability       G  3::::::3:::::3:::::::
+matrix            T  :a33:aa:a38:a:::8:5:3
+
+         bits    2.1            *  *  * * 
+                 1.9  *   ** *  ** *  * * 
+                 1.7  *   ** *  ** *  * * 
+                 1.5  *   ** *  ** *  * * 
+Information      1.2 *** ************** **
+content          1.0 *** *****************
+(31.8 bits)      0.8 *** *****************
+                 0.6 *** *****************
+                 0.4 *********************
+                 0.2 *********************
+                 0.0 ---------------------
+
+Multilevel           CTCAATTCTCTCTCCCTCCCC
+consensus            G TCC  G TA  G AA T T
+sequence                T                 
+                                          
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name             Start   P-value                    Site       
+-------------             ----- ---------            ---------------------
+10657                      1511  1.45e-13 CCCAGGCGGT CTCAATTCTCTCTCCCTCCCC TTTCCGTGAC
+83796                      1801  7.40e-12 TGTATATGCA CTCTCTTCTCTCTCCCTCTCC AGGTCATGCA
+6603                       1811  1.22e-10 GTAACTTAAT GTTCATTCTCTCTCCCACCCC TAGGTCATGC
+20218                       606  7.57e-10 CCCAGGCCAG CTCAATTGTTACTGCATCTCT AGGATTGGAA
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+10657                             1.5e-13  1510_[3]_469
+83796                             7.4e-12  1800_[3]_179
+6603                              1.2e-10  1810_[3]_169
+20218                             7.6e-10  605_[3]_1374
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 in FASTA format
+--------------------------------------------------------------------------------
+>10657                    pos 1511
+CTCAATTCTCTCTCCCTCCCC
+>83796                    pos 1801
+CTCTCTTCTCTCTCCCTCTCC
+>6603                     pos 1811
+GTTCATTCTCTCTCCCACCCC
+>20218                    pos  606
+CTCAATTGTTACTGCATCTCT
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 21 n= 7920 bayes= 10.9506 E= 4.8e-001 
+  -865    166     -1   -865 
+  -865   -865   -865    196 
+  -865    166   -865     -4 
+    97      8   -865     -4 
+   156      8   -865   -865 
+  -865   -865   -865    196 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+  -865   -865   -865    196 
+  -865    166   -865     -4 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+  -865   -865   -865    196 
+  -865    166     -1   -865 
+  -865    208   -865   -865 
+    -3    166   -865   -865 
+    -3   -865   -865    154 
+  -865    208   -865   -865 
+  -865    108   -865     96 
+  -865    208   -865   -865 
+  -865    166   -865     -4 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 3 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 21 n= 7920 E= 4.8e-001 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.499388  0.249966  0.000629  0.250017 
+ 0.748765  0.249966  0.000629  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.000629  0.250017 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.000589  0.000629  0.998147 
+ 0.000635  0.748719  0.250006  0.000640 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.250012  0.748719  0.000629  0.000640 
+ 0.250012  0.000589  0.000629  0.748770 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.499343  0.000629  0.499393 
+ 0.000635  0.998096  0.000629  0.000640 
+ 0.000635  0.748719  0.000629  0.250017 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 117.82 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+SUMMARY OF MOTIFS
+********************************************************************************
+
+--------------------------------------------------------------------------------
+	Combined block diagrams: non-overlapping sites with p-value < 0.0001
+--------------------------------------------------------------------------------
+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+20218                            7.77e-19  605_[3(7.57e-10)]_311_[1(6.34e-12)]_582_[2(1.69e-11)]_431
+10657                            5.46e-22  1_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_7_[1(6.00e-08)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_5_[1(1.18e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_4_[1(1.29e-07)]_332_[2(3.40e-11)]_383_[3(7.75e-07)]_7_[3(1.45e-13)]_56_[3(3.47e-05)]_76_[1(8.70e-12)]_291
+83796                            1.73e-27  977_[2(4.69e-15)]_281_[1(2.59e-15)]_492_[3(7.40e-12)]_179
+6603                             9.32e-27  597_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_327_[2(1.62e-15)]_286_[1(2.59e-15)]_475_[3(1.22e-10)]_169
+--------------------------------------------------------------------------------
+
+********************************************************************************
+
+
+********************************************************************************
+Stopped because nmotifs = 3 reached.
+********************************************************************************
+
+CPU: hydra-1.lsd.ornl.gov
+
+********************************************************************************

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.meme2
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.meme2	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.meme2	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,359 @@
+********************************************************************************
+MEME - Motif discovery tool
+********************************************************************************
+MEME version 3.0 (Release date: 2002/04/02 00:11:59)
+
+For further information on how to interpret these results or to get
+a copy of the MEME software please access http://meme.sdsc.edu.
+
+This file may be used as input to the MAST algorithm for searching
+sequence databases for matches to groups of motifs.  MAST is available
+for interactive use and downloading at http://meme.sdsc.edu.
+********************************************************************************
+
+
+********************************************************************************
+REFERENCE
+********************************************************************************
+If you use this program in your research, please cite:
+
+Timothy L. Bailey and Charles Elkan,
+"Fitting a mixture model by expectation maximization to discover
+motifs in biopolymers", Proceedings of the Second International
+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
+AAAI Press, Menlo Park, California, 1994.
+********************************************************************************
+
+
+********************************************************************************
+TRAINING SET
+********************************************************************************
+DATAFILE= test.fasta
+ALPHABET= ACGT
+Sequence name            Weight Length  Sequence name            Weight Length  
+-------------            ------ ------  -------------            ------ ------  
+68723                    1.0000   2000  16939                    1.0000   2001  
+20754                    1.0000   2001  6707                     1.0000   2000  
+20755                    1.0000   2000  6700                     1.0000   2002  
+20760                    1.0000   2000  20761                    1.0000   2000  
+20762                    1.0000   2000  
+********************************************************************************
+
+********************************************************************************
+COMMAND LINE SUMMARY
+********************************************************************************
+This information can also be useful in the event you wish to report a
+problem with the MEME software.
+
+command: meme test.fasta -dna -nostatus -nmotifs 2 -minsites 8 -maxw 20 -revcomp 
+
+model:  mod=         zoops    nmotifs=         2    evt=           inf
+object function=  E-value of product of p-values
+width:  minw=            8    maxw=           20    minic=        0.00
+width:  wg=             11    ws=              1    endgaps=       yes
+nsites: minsites=        8    maxsites=        9    wnsites=       0.8
+theta:  prob=            1    spmap=         uni    spfuzz=        0.5
+em:     prior=   dirichlet    b=            0.01    maxiter=        50
+        distance=    1e-05
+data:   n=           18004    N=               9
+strands: + -
+sample: seed=            0    seqfrac=         1
+Letter frequencies in dataset:
+A 0.295 C 0.205 G 0.205 T 0.295 
+Background letter frequencies (from dataset with add-one prior applied):
+A 0.295 C 0.205 G 0.205 T 0.295 
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  1	width =   20   sites =   8   llr = 147   E-value = 1.3e-002
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 1 Description
+--------------------------------------------------------------------------------
+Simplified        A  ::a1::931:6:348:1::1
+pos.-specific     C  aa:::8:11841:331:139
+probability       G  :::9::::63::84::3:8:
+matrix            T  ::::a3161::9:::969::
+
+         bits    2.3 **                  
+                 2.1 **                  
+                 1.8 *** *               
+                 1.6 *****              *
+Information      1.4 ******   * **  * ***
+content          1.1 *******  * ** ** ***
+(26.5 bits)      0.9 *******  **** ** ***
+                 0.7 ******* ***** ******
+                 0.5 ********************
+                 0.2 ********************
+                 0.0 --------------------
+
+Multilevel           CCAGTCATGCATGAATTTGC
+consensus                 T A GC AGC G C 
+sequence                          C      
+                                         
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name            Strand  Start   P-value                    Site      
+-------------            ------  ----- ---------            --------------------
+20761                        +   1879  6.50e-13 TCTGATTAAG CCAGTCATGCATGGATTTGC ATTTTGGTTG
+20760                        +   1875  6.50e-13 CCCAGTCACG CCAGTCATGCATGGATTTGC ATTTTGATTG
+6700                         +   1100  2.27e-10 CCTGCTCATG CCAGTCATGGATAAATTTGC ATCTGGCTTA
+20755                        +   1478  5.08e-10 CCCTGTCAGG CCAGTTATGGATGAATGTGC ACTTAANNNN
+6707                         +   1431  6.11e-09 TCACACAGAT CCAGTCAATCCTGCCTGTCC ATCTCAATGA
+20762                        +   1878  1.89e-08 CCTGGTTAGG CCAGTTAAACACAGATTTGC ATTTTGGTTA
+16939                        -    914  2.01e-08 ACTTTTCCTT CCAATCATGCCTGCCCTTGA ACCCTATTGG
+20754                        +   1175  6.73e-08 GCTCACCTTG CCAGTCTCCCCTGAATACCC TACATGCCCT
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+20761                             6.5e-13  1878_[+1]_102
+20760                             6.5e-13  1874_[+1]_106
+6700                              2.3e-10  1099_[+1]_883
+20755                             5.1e-10  1477_[+1]_503
+6707                              6.1e-09  1430_[+1]_550
+20762                             1.9e-08  1877_[+1]_103
+16939                               2e-08  913_[-1]_1068
+20754                             6.7e-08  1174_[+1]_807
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 in BLOCKS format
+--------------------------------------------------------------------------------
+BL   MOTIF 1 width=20 seqs=8
+20761                    ( 1879) CCAGTCATGCATGGATTTGC  1 
+20760                    ( 1875) CCAGTCATGCATGGATTTGC  1 
+6700                     ( 1100) CCAGTCATGGATAAATTTGC  1 
+20755                    ( 1478) CCAGTTATGGATGAATGTGC  1 
+6707                     ( 1431) CCAGTCAATCCTGCCTGTCC  1 
+20762                    ( 1878) CCAGTTAAACACAGATTTGC  1 
+16939                    (  914) CCAATCATGCCTGCCCTTGA  1 
+20754                    ( 1175) CCAGTCTCCCCTGAATACCC  1 
+//
+
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 20 n= 17833 bayes= 11.1216 E= 1.3e-002 
+  -965    229   -965   -965 
+  -965    229   -965   -965 
+   176   -965   -965   -965 
+  -124   -965    210   -965 
+  -965   -965   -965    176 
+  -965    187   -965    -24 
+   157   -965   -965   -124 
+   -24    -71   -965    108 
+  -124    -71    161   -124 
+  -965    187     29   -965 
+   108     87   -965   -965 
+  -965    -71   -965    157 
+   -24   -965    187   -965 
+    34     29     87   -965 
+   134     29   -965   -965 
+  -965    -71   -965    157 
+  -124   -965     29    108 
+  -965    -71   -965    157 
+  -965     29    187   -965 
+  -124    210   -965   -965 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 1 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 20 n= 17833 E= 1.3e-002 
+ 0.000369  0.999007  0.000255  0.000369 
+ 0.000369  0.999007  0.000255  0.000369 
+ 0.999120  0.000255  0.000255  0.000369 
+ 0.125213  0.000255  0.874163  0.000369 
+ 0.000369  0.000255  0.000255  0.999120 
+ 0.000369  0.749319  0.000255  0.250057 
+ 0.874276  0.000255  0.000255  0.125213 
+ 0.250057  0.125099  0.000255  0.624589 
+ 0.125213  0.125099  0.624475  0.125213 
+ 0.000369  0.749319  0.249943  0.000369 
+ 0.624589  0.374787  0.000255  0.000369 
+ 0.000369  0.125099  0.000255  0.874276 
+ 0.250057  0.000255  0.749319  0.000369 
+ 0.374901  0.249943  0.374787  0.000369 
+ 0.749432  0.249943  0.000255  0.000369 
+ 0.000369  0.125099  0.000255  0.874276 
+ 0.125213  0.000255  0.249943  0.624589 
+ 0.000369  0.125099  0.000255  0.874276 
+ 0.000369  0.249943  0.749319  0.000369 
+ 0.125213  0.874163  0.000255  0.000369 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 75.70 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+MOTIF  2	width =   15   sites =   8   llr = 117   E-value = 1.2e+003
+********************************************************************************
+--------------------------------------------------------------------------------
+	Motif 2 Description
+--------------------------------------------------------------------------------
+Simplified        A  :1a39:::::18:::
+pos.-specific     C  ::::1:::8a:::::
+probability       G  96:3::1:::::4::
+matrix            T  13:5:a9a3:936aa
+
+         bits    2.3          *     
+                 2.1          *     
+                 1.8   *  * * *   **
+                 1.6 * *  * * *   **
+Information      1.4 * * ******   **
+content          1.1 * * *******  **
+(21.0 bits)      0.9 * * ***********
+                 0.7 *** ***********
+                 0.5 ***************
+                 0.2 ***************
+                 0.0 ---------------
+
+Multilevel           GGATATTTCCTATTT
+consensus             T A    T  TG  
+sequence                G           
+                                    
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 sites sorted by position p-value
+--------------------------------------------------------------------------------
+Sequence name            Strand  Start   P-value                 Site    
+-------------            ------  ----- ---------            ---------------
+20762                        +   1845  2.62e-09 TCCAGGAACA GGATATTTCCTATTT TTGAGAGTCC
+6700                         +   1068  2.62e-09 TTTCAGAACA GGATATTTCCTATTT TGAGTATCCT
+20755                        +   1445  2.84e-08 GCCAAGGGTG GGATATTTTCTATTT TGTAGAGTCC
+20754                        -    664  5.62e-08 TTTCTTAGAA GGAAATTTCCTTGTT CTCTTTCTAT
+20761                        +    670  1.06e-07 GAAGAAAAAG GAAGATTTCCTAGTT AACAATTCAA
+68723                        -   1925  5.26e-07 TTGCTTTCTT TGAGATGTCCTAGTT CACTCCTAAA
+20760                        -    651  5.56e-07 TTTAAACTTG GTAAATTTTCTTTTT CTTCACATTT
+16939                        -   1616  6.78e-07 TAGTTCAGTT GTATCTTTCCAATTT TGATGTTTGG
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 block diagrams
+--------------------------------------------------------------------------------
+SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+20762                             2.6e-09  1844_[+2]_141
+6700                              2.6e-09  1067_[+2]_920
+20755                             2.8e-08  1444_[+2]_541
+20754                             5.6e-08  663_[-2]_1323
+20761                             1.1e-07  669_[+2]_1316
+68723                             5.3e-07  1924_[-2]_61
+20760                             5.6e-07  650_[-2]_1335
+16939                             6.8e-07  1615_[-2]_371
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 in BLOCKS format
+--------------------------------------------------------------------------------
+BL   MOTIF 2 width=15 seqs=8
+20762                    ( 1845) GGATATTTCCTATTT  1 
+6700                     ( 1068) GGATATTTCCTATTT  1 
+20755                    ( 1445) GGATATTTTCTATTT  1 
+20754                    (  664) GGAAATTTCCTTGTT  1 
+20761                    (  670) GAAGATTTCCTAGTT  1 
+68723                    ( 1925) TGAGATGTCCTAGTT  1 
+20760                    (  651) GTAAATTTTCTTTTT  1 
+16939                    ( 1616) GTATCTTTCCAATTT  1 
+//
+
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific scoring matrix
+--------------------------------------------------------------------------------
+log-odds matrix: alength= 4 w= 15 n= 17878 bayes= 11.1253 E= 1.2e+003 
+  -965   -965    210   -124 
+  -124   -965    161    -24 
+   176   -965   -965   -965 
+   -24   -965     29     76 
+   157    -71   -965   -965 
+  -965   -965   -965    176 
+  -965   -965    -71    157 
+  -965   -965   -965    176 
+  -965    187   -965    -24 
+  -965    229   -965   -965 
+  -124   -965   -965    157 
+   134   -965   -965    -24 
+  -965   -965     87    108 
+  -965   -965   -965    176 
+  -965   -965   -965    176 
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+	Motif 2 position-specific probability matrix
+--------------------------------------------------------------------------------
+letter-probability matrix: alength= 4 w= 15 n= 17878 E= 1.2e+003 
+ 0.000369  0.000255  0.874163  0.125213 
+ 0.125213  0.000255  0.624475  0.250057 
+ 0.999120  0.000255  0.000255  0.000369 
+ 0.250057  0.000255  0.249943  0.499745 
+ 0.874276  0.125099  0.000255  0.000369 
+ 0.000369  0.000255  0.000255  0.999120 
+ 0.000369  0.000255  0.125099  0.874276 
+ 0.000369  0.000255  0.000255  0.999120 
+ 0.000369  0.749319  0.000255  0.250057 
+ 0.000369  0.999007  0.000255  0.000369 
+ 0.125213  0.000255  0.000255  0.874276 
+ 0.749432  0.000255  0.000255  0.250057 
+ 0.000369  0.000255  0.374787  0.624589 
+ 0.000369  0.000255  0.000255  0.999120 
+ 0.000369  0.000255  0.000255  0.999120 
+--------------------------------------------------------------------------------
+
+
+
+
+
+Time 152.20 secs.
+
+********************************************************************************
+
+
+********************************************************************************
+SUMMARY OF MOTIFS
+********************************************************************************
+
+--------------------------------------------------------------------------------
+	Combined block diagrams: non-overlapping sites with p-value < 0.0001
+--------------------------------------------------------------------------------
+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM
+-------------            ----------------  -------------
+68723                            2.83e-04  473_[-1(8.43e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_35_[-1(3.15e-06)]_[+1(1.38e-05)]_9_[-1(7.89e-05)]_63_[-1(2.97e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_26_[+2(4.37e-06)]_46_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_6_[-1(7.97e-06)]_24_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_1_[-2(5.92e-05)]_279_[-1(8.43e-06)]_[+1(1.38e-05)]_[+2(5.60e-05)]_157_[-2(5.26e-07)]_61
+16939                            3.50e-06  913_[-1(2.01e-08)]_682_[-2(6.78e-07)]_205_[+1(5.80e-05)]_146
+20754                            1.05e-06  39_[-2(1.78e-05)]_4_[+1(4.78e-06)]_564_[-2(7.42e-05)]_6_[-2(5.62e-08)]_146_[+2(3.98e-05)]_335_[+1(6.73e-08)]_93_[+1(5.54e-05)]_694
+6707                             2.26e-05  173_[+1(4.46e-05)]_655_[-2(5.60e-05)]_3_[+1(3.06e-06)]_12_[+1(4.78e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.93e-05)]_181_[-1(3.26e-05)]_251_[+1(6.11e-09)]_329_[+1(6.34e-05)]_201
+20755                            5.26e-09  160_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3_[+1(6.41e-06)]_219_[-1(4.80e-05)]_962_[+2(2.84e-08)]_18_[+1(5.08e-10)]_6_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_14_[+1(7.97e-06)]_83
+6700                             2.48e-10  48_[+1(7.97e-06)]_267_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_129_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3_[+1(4.41e-06)]_280_[+2(2.62e-09)]_17_[+1(2.27e-10)]_101_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+2(7.29e-05)]_[+1(6.41e-06)]_604_[-1(8.43e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3
+20760                            1.53e-10  259_[-1(6.20e-05)]_339_[-2(3.17e-05)]_17_[-2(5.56e-07)]_436_[-2(4.06e-05)]_7_[+1(2.95e-07)]_6_[-1(1.12e-05)]_705_[+1(6.50e-13)]_106
+20761                            3.10e-11  397_[-2(1.10e-06)]_257_[+2(1.06e-07)]_382_[-2(5.60e-05)]_[+2(7.29e-05)]_[-1(3.24e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_722_[+1(6.50e-13)]_102
+20762                            1.72e-08  134_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_92_[+2(9.22e-05)]_517_[+1(7.97e-06)]_7_[-1(4.78e-06)]_439_[+2(2.62e-09)]_18_[+1(1.89e-08)]_103
+--------------------------------------------------------------------------------
+
+********************************************************************************
+
+
+********************************************************************************
+Stopped because nmotifs = 2 reached.
+********************************************************************************
+
+CPU: crick
+
+********************************************************************************

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.metafasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.metafasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.metafasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+>test
+ABCDEFHIJKLMNOPQRSTUVWXYZ
+&charge
+NBNAANCNJCNNNONNCNNUNNXNZ
+&chemical
+LBSAARCLJCLSMOIMCHHULRXRZ
+&functional
+HBPAAHCHJCHHPOHPCPPUHHXPZ
+&hydrophobic
+I & OIOIJOIIOOIOOOOUIIXOZ
+&structural
+ABAEEIEIJEIIEOAEEAAUIAXAZ

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.nh
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.nh	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.nh	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+(((hADH2:0.1,hADH1:0.11):0.05,nADHY:0.1,iADHX:0.12):0.1,(yADH4:0.09,yADH3:0.13,yADH2:0.12,yADH1:0.11):0.1);
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.nhx
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.nhx	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.nhx	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1 @@
+(((ADH2[&&NHX:S=human:E=1.1.1.1]:0.1,ADH1:0.11[&&NHX:S=human:E=1.1.1.1]):0.05[&&NHX:S=Primates:E=1.1.1.1:D=Y:B=100],ADHY:0.1[&&NHX:S=nematode:E=1.1.1.1],ADHX:0.12[&&NHX:S=insect:E=1.1.1.1]):0.1[&&NHX:S=Metazoa:E=1.1.1.1:D=N],(ADH4:0.09[&&NHX:S=yeast:E=1.1.1.1],ADH3:0.13[&&NHX:S=yeast:E=1.1.1.1],ADH2:0.12[&&NHX:S=yeast:E=1.1.1.1],ADH1:0.11[&&NHX:S=yeast:E=1.1.1.1]):0.1[&&NHX:S=Fungi])[&&NHX:E=1.1.1.1:D=N];
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.pfam
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.pfam	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.pfam	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,38 @@
+TASM_BFDV/6-67            RLTELLCLPV.......TATAADIKTAYRRTALKYHPDKGGD.................EEKMKELNTLMEEFRETEGLRADETLE
+TASM_SV40/12-75           QLMDLLGLERS.....AWGNIPLMRKAYLKKCKEFHPDKGGD.................EEKMKKMNTLYKKMEDGVKYAHQPDFG
+TASM_POVLY/12-75          ELMDLLQITRA.....AWGNLSMMKKAYKNVSKLYHPDKGGD.................SAKMQRLNELFQRVQVTLMEIRSQCGS
+TASM_POVMA/12-75          RLLELLKLPRQ.....LWGDFGRMQQAYKQQSLLLHPDKGGS.................HALMQELNSLWGTFKTEVYNLRMNLGG
+TAMI_POVHA/12-75          ALISLLDLEPQ.....YWGDYGRMQKCYKKKCLQLHPDKGGN.................EELMQQLNTLWTKLKDGLYRVRLLLGP
+TASM_POVBO/10-71          ELRGLLGTPD.......IGNADTLKKAFLKACKVHHPDKGGN.................EEAMKRLLYLYNKAKIAASATTSQVWY
+DNJ1_HUMAN/4-68           DYYQTLGLAR.......GASDEEIKRAYRRQALRYHPDKNKE..............PGAEEKFKEIAEAYDVLSDPRKREIFDRYG
+DNAJ_HAEDU/5-70           DYYEVLGLQK.......GATEKDIKRAYKRLAAKYHPDKNQG.............SKDSEEKFKQITEAYEILTDDQKRAAYDQYG
+DNJ2_ALLPO/13-74          KYYEVLGVSK.......NATPEDLKKAYRKAAIKNHPDKGGD.................PEKFKEIGQAYEVLNDPEKREIYDQYG
+PSI_SCHPO/6-68            KLYDCLEVRP.......EASEAELKKAYRKLALKYHPDKNPN................GEKKFKEISLAYEVLSDPQRRKLYDQYG
+XDJ1_YEAST/9-77           RLYDVLGVTR.......DATVQEIKTAYRKLALKHHPDKYVDQD..........SKEVNEIKFKEITAAYEILSDPEKKSHYDLYG
+DNAJ_BORBU/4-69           DYYEILGLSK.......GASKDEIKKAYRKIAIKYHPDRNQG.............NEEAASIFKEATQAYEILIDDNKKAKYDRFG
+CSP_RAT/15-80             SLYHVLGLDK.......NATSDDIKKSYRKLALKYHPDKNPD.............NPEAADKFKEINNAHAILTDATKRNIYDKYG
+HLJ1_YEAST/21-85          EFYEILKVDR.......KATDSEIKKAYRKLAIKLHPDKNSH..............PKAGEAFKVINRAFEVLSNEEKRSIYDRIG
+CAJ1_YEAST/6-71           EYYDILGIKP.......EATPTEIKKAYRRKAMETHPDKHPD.............DPDAQAKFQAVGEAYQVLSDPGLRSKYDQFG
+YIS4_YEAST/6-71           EYYDLLGVST.......TASSIEIKKAYRKKSIQEHPDKNPN.............DPTATERFQAISEAYQVLGDDDLRAKYDKYG
+YNW7_YEAST/4-70           CYYELLGVET.......HASDLELKKAYRKKALQYHPDKNPDN............VEEATQKFAVIRAAYEVLSDPQERAWYDSHK
+DNAJ_ERYRH/6-70           DFYEILGVSK.......SATDAEIKKAYRQLAKKYHPDINKE..............DGAEAKFKEVQEAYEVLSDSQKRANYDQFG
+YLW5_CAEEL/531-595        DYYKTLGVDK.......KSDAKAIKKAYFQLAKKYHPDVNKT..............KEAQTKFQEISEAYEVLSDDTKRQEYDAYG
+CBPA_ECOLI/5-69           DYYAIMGVKP.......TDDLKTIKTAYRRLARKYHPDVSKE..............PDAEARFKEVAEAWEVLSDEQRRAEYDQMW
+DNAJ_CAUCR/3-68           DYYEILGVTR.......TIDEAGLKSRVRKLAMEHHPDRNGG.............CENAAGRFKEINEAYSVLSDSQKRAAYDRFG
+DNJM_MYCGE/7-71           DYYEVLGITP.......DADQSEIKKAFRKLAKKYHPDRNNA..............PDAAKIFAEINEANDVLSNPKKRANYDKYG
+DNAJ_SYNP7/6-71           DYYALLGIPQ.......SADQAAIKAAFRKLARQCHPDLNPG.............DRQAEERFKQISEAYEILSDPDRRAEYQRFS
+DNAJ_STRCO/10-75          DYYKVLGVPK.......DATEAEIKKAYRKLARENHPDANKG.............NVKAEERFKEISEANDILGDPKKRKEYDEAR
+YJH3_YEAST/585-655        DYYKILGVSP.......SASSKEIRKAYLNLTKKYHPDKIKANHN........DKQESIHETMSQINEAYETLSDDDKRKEYDLSR
+DNJL_MYCGE/2-64           NLYDLLELPT.......TASIKEIKIAYKRLAKRYHPDVNKL................GSQTFVEINNAYSILSDPNQKEKYDSML
+YFL1_YEAST/44-108         NFYKFLKLPKL.....QNSSTKEITKNLRKLSKKYHPDKNPK................YRKLYERLNLATQILSNSSNRKIYDYYL
+YGM8_YEAST/79-151         NLYDVLELPTPLDVHTIYDDLPQIKRKYRTLALKYHPDKHPD.............NPSIIHKFHLLSTATNILTNADVRPHYDRWL
+YD1J_SCHPO/32-110         TPYEILELPR.......TCTANDIKRKYIELVKKHHPDKMKNASQLAPTESPPEINKHNEEYFRLLLAANALLSDKRRREEYDRFG
+YJQ2_YEAST/13-77          TYYSILGLTS.......NATSSEVHKSYLKLARLLHPDKTKS..............DKSEELFKAVVHAHSILTDEDQKLRYDRDL
+NPL1_YEAST/125-196        DPYEILGIST.......SASDRDIKSAYRKLSVKFHPDKLAKGLT.......PDEKSVMEETYVQITKAYESLTDELVRQNYLKYG
+YJ67_YEAST/8-76           THYEILRIPS.......DATQDEIKKAYRNRLLNTHPDKLSKSI..........HDTVSNVTINKIQDAYKILSNIKTRREYDRLI
+ZUO1_YEAST/97-168         DLYAAMGLSKLR....FRATESQIIKAHRKQVVKYHPDKQSAAG..........GSLDQDGFFKIIQKAFETLTDSNKRAQYDSCD
+ZRF1_MOUSE/88-161         DHYAVLGLGHVR....YTATQRQIKAAHKAMVLKHHPDKRKAAGE........PIKEGDNDYFTCITKAYEMLSDPVKRRAFNSVD
+RESA_PLAFF/523-587        LYYDILGVGV.......NADMNEITERYFKLAENYYPYQRSG..............STVFHNFRKVNEAYQVLGDIDKKRWYNKYG
+YQ07_CAEEL/562-626        DAYSVFGLRS.......DCSDDDIKRNYKRLAALVSPDKCTI..............DAADQVYELVDVAFSAIGYKDSRSEYTLEN
+YFHE_ECOLI/2-74           DYFTLFGLPAR.....YQLDTQALSLRFQDLQRQYHPDKFASGSQ........AEQLAAVQQSATINQAWQTLRHPLMRAEYLLSL
+YGB8_YEAST/13-82          TFYELFPKTFPKKLPIWTIDQSRLRKEYRQLQAQHHPDMAQQ................GSEQSSTLNQAYHTLKDPLRRSQYMLKL


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/test.pfam
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.pir
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.pir	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.pir	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,6 @@
+>P1;roa1_drome
+Rea guano receptor type III >> 0.1
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY*
+>P1;roa2_drome
+Rea guano ligand
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPTSTSTSTSTSTSTSTSTMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHLLLLLLLDLLLLDLLLLDLLLFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY*

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.ptt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.ptt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.ptt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,370 @@
+Leptospira interrogans serovar Lai str. 56601 chromosome II, complete sequence - 0..358943
+367 proteins
+Location	Strand	Length	PID	Gene	Synonym	Code	COG	Product
+15..2531	+	838	24217062	-	LB001	-	-	hypothetical protein
+2491..3423	+	310	24217063	metF	LB002	-	COG0685E	5,10-methylenetetrahydrofolate reductase
+3716..3823	-	35	24217064	-	LB004	-	-	hypothetical protein
+3779..3886	+	35	24217065	-	LB003	-	-	hypothetical protein
+4066..4890	-	274	24217066	-	LB005	-	COG2107R	hypothetical protein
+4975..5085	+	36	24217067	-	LB006	-	-	hypothetical protein
+5239..5754	+	171	24217068	-	LB007	-	COG0561R	hypothetical protein
+5762..6112	+	116	24217069	-	LB008	-	COG0561R	hypothetical protein
+6083..6373	-	96	24217070	-	LB009	-	-	hypothetical protein
+6410..7279	+	289	24217071	hemA	LB010	-	COG0373H	glutamyl-tRNA reductase
+7249..8892	+	547	24217072	hemC	LB011	-	-	porphobilinogen deaminase
+8861..9814	+	317	24217073	hemB	LB012	-	-	delta-aminolevulinic acid dehydratase
+9811..11142	+	443	24217074	hemL	LB013	-	COG0001H	glutamate-1-semialdehyde aminotransferase
+11142..12059	+	305	24217075	-	LB014	-	COG0642T	two-component hybrid sensor and regulator
+12056..12748	+	230	24217076	-	LB015	-	COG0745TK	two-component response regulator
+12708..13760	+	350	24217077	hemE	LB016	-	COG0407H	uroporphyrinogen decarboxylase
+13667..15106	+	479	24217078	hemF	LB017	-	COG0635H	coproporphyrinogen III oxidase
+15186..16169	+	327	24217079	-	LB018	-	-	hypothetical protein
+16244..16384	+	46	24217080	-	LB019	-	-	hypothetical protein
+16381..17664	+	427	24217081	hemG	LB020	-	COG1232H	protoporphyrinogen oxidase
+17941..18075	-	44	24217082	-	LB021	-	-	hypothetical protein
+18395..19468	+	357	24217083	-	LB022	-	COG3547L	transposase
+19839..20135	-	98	24217084	-	LB023	-	-	ferrochelatase
+20176..20748	-	190	24217085	-	LB024	-	-	ferrochelatase
+21008..22150	-	380	24217086	-	LB025	-	-	hypothetical protein
+22312..23229	+	305	24217087	parA3	LB026	-	COG1192D	ParA protein
+23222..23929	+	235	24217088	parB3	LB027	-	COG1475K	ParB protein
+24029..24631	+	200	24217089	-	LB028	-	COG0431R	putative reductase
+24684..24791	-	35	24217090	-	LB029	-	-	hypothetical protein
+25233..26396	-	387	24217091	-	LB030	-	COG3274S	probable intercellular adhesion protein C
+26476..26817	-	113	24217092	-	LB032	-	-	hypothetical protein
+26492..26851	+	119	24217093	-	LB031	-	COG0664T	cyclic nucleotide dependent protein kinase
+26903..27112	-	69	24217094	-	LB033	-	-	unknown protein confirmed by proteomics
+27247..28002	+	251	24217095	-	LB034	-	COG1028IQR	3-oxoacyl-(acyl carrier protein) reductase
+27978..28529	+	183	24217096	amsI	LB035	-	COG0394T	low molecular weight phosphotyrosine protein phosphatase
+28559..29827	+	422	24217097	ndh	LB036	-	COG1252C	NADH dehydrogenase
+29824..30282	+	152	24217098	-	LB037	-	-	putative sucrose/H+ symporter
+30314..30475	-	53	24217099	-	LB038	-	-	hypothetical protein
+30707..31315	-	202	24217100	-	LB039	-	-	hypothetical protein
+31361..31627	-	88	24217101	-	LB040	-	-	hypothetical protein
+31683..32507	+	274	24217102	-	LB041	-	COG1028IQR	3-oxoacyl-(acyl carrier protein) reductase
+32517..32939	+	140	24217103	-	LB042	-	COG3832S	hypothetical protein
+33001..33372	+	123	24217104	-	LB043	-	-	unknown protein confirmed by proteomics
+33697..34767	+	356	24217105	-	LB044	-	-	hypothetical protein
+35199..35384	-	61	24217106	-	LB045	-	-	hypothetical protein
+35388..35525	-	45	24217107	-	LB046	-	-	hypothetical protein
+36004..37326	+	440	24217108	-	LB047	-	COG2849S	hypothetical protein
+37916..38989	+	357	24217109	-	LB048	-	COG3547L	putative transposase
+39952..40101	+	49	24217110	-	LB049	-	-	hypothetical protein
+40098..40712	+	204	24217111	-	LB050	-	-	hypothetical protein
+40773..41846	+	357	24217112	-	LB051	-	COG0714R	magnesium chelatase, putative
+41853..42683	+	276	24217113	-	LB052	-	COG1721R	hypothetical protein
+42680..43549	+	289	24217114	-	LB053	-	-	hypothetical protein
+43522..44484	+	320	24217115	vwa1	LB054	-	-	von Willebrand factor type A domain containing protein
+44481..45524	+	347	24217116	vwa2	LB055	-	-	von Willebrand factor type A domain containing protein
+45467..46210	+	247	24217117	-	LB056	-	-	TPR-repeat-containing protein
+46207..47850	+	547	24217118	batD	LB057	-	-	BatD
+47847..49670	+	607	24217119	htpG2	LB058	-	COG0326O	heat shock protein 90
+49688..50257	+	189	24217120	-	LB059	-	-	hypothetical protein
+50238..50729	-	163	24217121	-	LB060	-	-	hypothetical protein
+50959..52557	+	532	24217122	-	LB061	-	COG3211R	hypothetical protein
+52565..52909	-	114	24217123	-	LB062	-	COG1366T	anti-anti-sigma factor
+52906..53925	-	339	24217124	-	LB063	-	COG0061G	hypothetical protein
+53937..54044	-	35	24217125	-	LB064	-	-	hypothetical protein
+54100..54918	+	272	24217126	-	LB065	-	-	hypothetical protein
+54915..55394	+	159	24217127	-	LB066	-	-	hypothetical protein
+55509..55622	+	37	24217128	-	LB067	-	-	hypothetical protein
+55772..56593	+	273	24217129	-	LB068	-	COG1398I	fatty acid desaturase
+57309..57893	+	194	24217130	-	LB069	-	-	hypothetical protein
+59172..59822	+	216	24217131	-	LB070	-	COG2085R	hypothetical protein
+60390..60638	+	82	24217132	-	LB071	-	-	hypothetical protein
+61294..61851	+	185	24217133	-	LB072	-	-	hypothetical protein
+61981..62949	-	322	24217134	-	LB073	-	COG1703E	lysine arginine ornithine transport system kinase
+62957..64969	-	670	24217135	mcm2	LB074	-	COG2185I	methylmalonyl-CoA mutase
+65000..65104	-	34	24217136	-	LB076	-	-	hypothetical protein
+65076..66413	+	445	24217137	-	LB075	-	-	hypothetical protein
+67553..68245	+	230	24217138	-	LB077	-	-	hypothetical protein
+68865..69938	-	357	24217139	-	LB078	-	COG4232OC	TPR-repeat-containing protein
+70067..70345	+	92	24217140	hisE	LB079	-	COG0140E	phosphoribosyl-ATP pyrophosphohydrolase
+70349..72280	+	643	24217141	-	LB080	-	-	hypothetical protein
+72258..72386	+	42	24217142	-	LB081	-	-	hypothetical protein
+72749..73513	+	254	24217143	-	LB082	-	COG1028IQR	3-ketoacyl-acyl carrier protein reductase
+74006..74239	+	77	24217144	acp	LB083	-	COG0236IQ	acyl carrier protein
+74293..75036	+	247	24217145	-	LB084	-	COG0571K	ribonuclease III
+75411..75998	+	195	24217146	-	LB085	-	COG1051F,COG0494LR	MutT/nudix family protein
+75995..77080	+	361	24217147	aroB	LB086	-	COG0337E	3-dehydroquinate synthase
+77080..77961	+	293	24217148	-	LB087	-	COG0668M	hypothetical protein
+78288..78662	+	124	24217149	-	LB088	-	-	hypothetical protein
+79179..80735	+	518	24217150	-	LB089	-	COG2509R	Uncharacterized FAD-dependent dehydrogenase
+80785..81324	+	179	24217151	-	LB090	-	-	hypothetical protein
+81632..81871	-	79	24217152	-	LB091	-	-	hypothetical protein
+81925..82335	-	136	24217153	-	LB092	-	-	hypothetical protein
+82392..84329	-	645	24217154	fadD	LB093	-	COG1022I	probable long-chain-fatty-acid--CoA ligase
+84491..87430	+	979	24217155	cyaA17	LB094	-	COG2114T,COG0840NT	adenylate cyclase
+87459..87566	-	35	24217156	-	LB095	-	-	hypothetical protein
+87988..88740	-	250	24217157	-	LB096	-	COG0500QR	hypothetical protein
+88780..90210	-	476	24217158	-	LB098	-	-	Predicted xylanase/chitin deacetilase
+90205..90318	+	37	24217159	-	LB097	-	-	hypothetical protein
+90403..90918	-	171	24217160	-	LB099	-	-	hypothetical protein
+91165..91323	+	52	24217161	-	LB100	-	-	hypothetical protein
+91526..91648	-	40	24217162	-	LB101	-	-	hypothetical protein
+91672..92481	+	269	24217163	-	LB102	-	COG0647G	phospholysine phosphohistidine inorganic pyrophosphate phosphatase
+92488..92916	-	142	24217164	-	LB103	-	COG0824R	hypothetical protein
+93181..93759	-	192	24217165	-	LB104	-	COG1309K	putative TetR-family transcriptional regulator
+94594..95610	+	338	24217166	-	LB105	-	COG0640K,COG0500QR	transcriptional regulator, ArsR family
+95612..96922	+	436	24217167	-	LB106	-	COG0499H	S-adenosyl-L-homocysteine hydrolase
+96995..97294	+	99	24217168	-	LB107	-	-	ferredoxin
+97341..101084	+	1247	24217169	metH	LB108	-	COG0646E,COG1410E	B12-dependent homocysteine-N5-methyltetrahydrofolate transmethylase
+101193..101531	+	112	24217170	-	LB109	-	-	hypothetical protein
+101979..102911	-	310	24217171	-	LB110	-	-	putative outermembrane protein
+103023..104318	+	431	24217172	-	LB111	-	COG0205G	diphosphate--fructose-6-phosphate 1-phosphotransferase
+104517..106262	-	581	24217173	-	LB112	-	COG2203T,COG2208TK	putative regulatory protein contains GAF domain
+106440..107324	-	294	24217174	argB	LB114	-	COG0548E	acetylglutamate kinase
+107290..108132	+	280	24217175	-	LB113	-	COG1028IQR	3-oxoacyl-(acyl carrier protein) reductase
+108927..109031	-	34	24217176	-	LB115	-	-	hypothetical protein
+109072..110814	-	580	24217177	-	LB116	-	COG3975R	hypothetical protein
+110807..111298	-	163	24217178	-	LB117	-	COG1225O	bacterioferritin comigratory protein
+111451..113616	+	721	24217179	cyaA18	LB118	-	COG2114T	adenylate cyclase
+114803..115147	+	114	24217180	-	LB119	-	-	hypothetical protein
+115194..116291	+	365	24217181	-	LB120	-	-	hypothetical protein
+116308..117120	+	270	24217182	-	LB121	-	COG1028IQR	3-oxoacyl-(acyl carrier protein) reductase
+117169..118068	-	299	24217183	-	LB122	-	COG0582L	site-specific integrase/recombinase XerD related protein
+118184..118723	-	179	24217184	aroK	LB123	-	COG0703E	shikimic acid kinase I
+118720..119571	-	283	24217185	-	LB124	-	COG1639T	HD-GYP domain (HD superfamily hydrolase)
+119577..120128	-	183	24217186	cheD2	LB125	-	COG1871NT	chemotaxis protein
+120255..121481	+	408	24217187	traB	LB126	-	COG1916S	pheromone shutdown protein
+121805..122368	+	187	24217188	-	LB127	-	-	hypothetical protein
+122598..122750	-	50	24217189	-	LB129	-	-	hypothetical protein
+122743..122871	+	42	24217190	-	LB128	-	-	hypothetical protein
+123146..124291	-	381	24217191	-	LB130	-	-	transcriptional regulator, AraC family
+124576..125685	-	369	24217192	-	LB131	-	-	putative transcriptional regulator, araC family protein
+126389..126511	+	40	24217193	-	LB132	-	-	hypothetical protein
+126928..128532	+	534	24217194	-	LB133	-	-	probable protein containing EAL domain
+129379..129828	+	149	24217195	-	LB134	-	-	hypothetical protein
+130629..130736	-	35	24217196	-	LB135	-	-	hypothetical protein
+130768..131166	-	132	24217197	-	LB136	-	COG1366T	anti-sigma factor antagonist
+131177..132175	-	332	24217198	-	LB137	-	-	hypothetical protein
+132172..133629	-	485	24217199	-	LB138	-	-	hypothetical protein
+133632..135125	-	497	24217200	-	LB139	-	COG2208TK	putative regulation protein contains HAMP domain
+136387..136539	+	50	24217201	-	LB140	-	-	hypothetical protein
+136739..137446	-	235	24217202	-	LB141	-	-	similar to putative lipoprotein qlp42
+137699..138250	-	183	24217203	-	LB142	-	-	hypothetical protein
+138314..139405	-	363	24217204	-	LB143	-	COG4254S	similar to putative lipoprotein qlp42
+139426..139947	-	173	24217205	-	LB144	-	COG1595K	RNA polymerase ECF-type sigma factor
+140082..140189	+	35	24217206	-	LB145	-	-	hypothetical protein
+141064..141711	+	215	24217207	-	LB146	-	-	hypothetical protein
+141831..142478	+	215	24217208	-	LB147	-	-	conserved hyperthetical protein
+142975..143316	-	113	24217209	-	LB148	-	-	hypothetical protein
+143870..144493	-	207	24217210	pmgA	LB149	-	COG0406G	phosphoglycerate mutase
+144510..145457	-	315	24217211	cbiB	LB150	-	COG1270H	cobalamin biosynthesis protein B
+145461..146933	-	490	24217212	cbiP	LB151	-	COG1492H	cobyric acid synthase
+148052..148672	-	206	24217213	cobP	LB152	-	COG2087H	cobinamide kinase
+148612..149349	-	245	24217214	-	LB153	-	COG1865S	hypothetical protein
+149292..150656	-	454	24217215	cbiA	LB154	-	COG1797H	cobyrinic acid-diamide synthase
+150656..151192	-	178	24217216	cobA	LB155	-	COG2109H	cob(I)alamin adenosyltransferase
+151189..151953	-	254	24217217	cbiF	LB156	-	COG2875H	precorrin-3 methylase
+151967..153460	-	497	24217218	cbiH	LB157	-	COG1010H	precorrin methylase
+153457..154614	-	385	24217219	cbiG	LB158	-	COG2073H	cobalamin biosynthesis protein
+154601..155365	-	254	24217220	cobI	LB159	-	COG2243H	precorrin-2 methyltransferase
+155362..156597	-	411	24217221	cbiE	LB160	-	COG2241H,COG2242H	precorrin-6Y methylase
+156594..157265	-	223	24217222	cbiC	LB161	-	-	cobalamin biosynthesis precorrin isomerase
+157262..158365	-	367	24217223	cbiD	LB162	-	-	cobalt-precorrin-6A synthase
+158369..159118	-	249	24217224	-	LB163	-	COG1018C	Oxidoreductase FAD-binding family protein
+159124..159495	-	123	24217225	-	LB164	-	-	unknown protein confirmed by proteomics
+159516..160685	-	389	24217226	cbiX	LB165	-	COG2138S,COG3411C	putative cbiX protein
+161652..161771	+	39	24217227	-	LB166	-	-	hypothetical protein
+162903..163043	-	46	24217228	-	LB168	-	-	hypothetical protein
+163042..165165	+	707	24217229	-	LB167	-	-	hypothetical protein
+165171..165743	-	190	24217230	-	LB169	-	-	hypothetical protein
+165785..167674	+	629	24217231	-	LB170	-	COG1086MG	similar to capsular polysaccharide biosynthesis protein
+167667..168137	+	156	24217232	-	LB171	-	-	hypothetical protein
+168134..169744	+	536	24217233	-	LB172	-	COG2252R	hypothetical protein
+169746..169901	+	51	24217234	-	LB173	-	-	hypothetical protein
+170186..171073	+	295	24217235	htpX	LB174	-	COG0501O	heat shock protein HtpX
+171180..171356	-	58	24217236	-	LB175	-	-	hypothetical protein
+171595..172128	+	177	24217237	-	LB176	-	COG0503F	adenine phosphoribosyltransferase
+172166..173680	+	504	24217238	-	LB177	-	COG0265O	putative serine protease
+173695..175278	+	527	24217239	-	LB178	-	COG0265O	putative serine protease
+175702..175815	-	37	24217240	-	LB179	-	-	hypothetical protein
+175960..176091	-	43	24217241	-	LB180	-	-	hypothetical protein
+176650..177882	-	410	24217242	-	LB181	-	-	hypothetical protein
+177879..179456	-	525	24217243	-	LB182	-	-	hypothetical protein
+179453..179845	-	130	24217244	-	LB183	-	COG0735P	transcriptional regulator (Fur family)
+180591..180731	-	46	24217245	-	LB184	-	-	hypothetical protein
+181088..181198	-	36	24217246	-	LB185	-	-	hypothetical protein
+181530..182207	+	225	24217247	-	LB186	-	COG5398P	heme oxygenase
+182182..183390	+	402	24217248	-	LB187	-	-	hypothetical protein
+183377..183514	-	45	24217249	-	LB188	-	-	hypothetical protein
+183575..184105	-	176	24217250	-	LB190	-	-	hypothetical protein
+184098..184205	+	35	24217251	-	LB189	-	-	hypothetical protein
+184996..187128	-	710	24217252	-	LB191	-	COG4771P,COG1629P	putative TonB-dependent outer membrane receptor protein
+187107..187517	-	136	24217253	-	LB192	-	-	hypothetical protein
+187719..187829	+	36	24217254	-	LB193	-	-	hypothetical protein
+187833..188411	-	192	24217255	-	LB194	-	-	unknown protein confirmed by proteomics
+188742..188870	+	42	24217256	-	LB195	-	-	hypothetical protein
+189081..189608	+	175	24217257	-	LB196	-	-	putative LRR repeat family protein
+189621..190394	-	257	24217258	-	LB197	-	-	hypothetical protein
+190812..191117	-	101	24217259	-	LB198	-	COG0718S	hypothetical protein
+191222..192577	-	451	24217260	-	LB199	-	-	putative outermembrane protein
+192685..192810	-	41	24217261	-	LB200	-	-	hypothetical protein
+192835..194457	-	540	24217262	lonA	LB201	-	COG1067O	putative ATP-dependent protease LA
+194483..195541	-	352	24217263	-	LB202	-	-	queuosine biosynthesis protein
+195737..197182	+	481	24217264	-	LB203	-	-	hypothetical protein
+197202..198344	+	380	24217265	mtfA	LB204	-	COG0438M	mannosyltransferase A
+198307..198921	+	204	24217266	-	LB205	-	-	hypothetical protein
+199150..199296	+	48	24217267	-	LB206	-	-	hypothetical protein
+199351..200739	+	462	24217268	-	LB207	-	-	hypothetical protein
+200811..202223	+	470	24217269	-	LB208	-	COG1696M	alginate o-acetyltransferase
+202238..203398	+	386	24217270	-	LB209	-	-	hypothetical protein
+203395..204792	+	465	24217271	-	LB210	-	COG1696M	alginate o-acetyltransferase
+204799..205869	+	356	24217272	-	LB211	-	-	hypothetical protein
+205866..206210	+	114	24217273	-	LB212	-	COG1694R	hypothetical protein
+206966..207379	-	137	24217274	exbD2	LB213	-	COG0848U	putative biopolymer transport protein
+207376..207903	-	175	24217275	-	LB214	-	COG0811U	MotA/ExbB proton channel family protein
+208518..209267	+	249	24217276	ubiE	LB215	-	COG2226H	ubiquinone/menaquinone biosynthesis methlytransferase
+209350..211110	+	586	24217277	-	LB216	-	-	hypothetical protein
+211107..211502	+	131	24217278	-	LB217	-	-	hypothetical protein
+211547..212938	+	463	24217279	-	LB218	-	COG2339S	hypothetical protein
+212921..213655	-	244	24217280	-	LB219	-	-	hypothetical protein
+213749..214984	-	411	24217281	-	LB220	-	-	hypothetical protein
+215512..215634	+	40	24217282	-	LB221	-	-	hypothetical protein
+216332..216784	-	150	24217283	-	LB222	-	COG1734T	hypothetical protein
+217221..218072	-	283	24217284	-	LB223	-	COG2801L	putative transposase
+218069..218374	-	101	24217285	-	LB224	-	COG2963L	putative transposase
+219779..222361	+	860	24217286	-	LB225	-	-	hypothetical protein
+222462..222995	+	177	24217287	-	LB226	-	-	Fimh-like protein
+224010..224129	+	39	24217288	-	LB227	-	-	hypothetical protein
+224104..225336	+	410	24217289	-	LB228	-	-	Putative AraC-type Regulator
+225796..225906	-	36	24217290	-	LB229	-	-	hypothetical protein
+226283..226582	+	99	24217291	-	LB230	-	COG2963L	putative transposase
+226579..227424	+	281	24217292	-	LB231	-	COG2801L	putative transposase
+228469..228888	-	139	24217293	-	LB232	-	-	hypothetical protein
+229573..229791	-	72	24217294	-	LB233	-	-	hypothetical protein
+229900..230004	-	34	24217295	-	LB234	-	-	hypothetical protein
+230358..232076	-	572	24217296	-	LB235	-	COG5001T	probable protein contain EAL family signaling protein
+232421..232525	-	34	24217297	-	LB236	-	-	hypothetical protein
+232542..233129	-	195	24217298	-	LB237	-	-	GGDEF family protein
+233172..234227	-	351	24217299	-	LB238	-	-	putative CACHE family protein
+234316..234507	+	63	24217300	-	LB239	-	-	hypothetical protein
+234521..236095	-	524	24217301	-	LB240	-	-	GGDEF family protein
+236543..239152	+	869	24217302	-	LB241	-	COG0642T	two-component hybrid sensor and regulator
+239627..240328	-	233	24217303	-	LB242	-	-	hypothetical protein
+240371..240778	-	135	24217304	-	LB243	-	-	hypothetical protein
+241061..242065	-	334	24217305	-	LB244	-	-	hypothetical protein
+242934..244322	-	462	24217306	-	LB245	-	COG0591ER	probable sodium:solute symporter
+244678..244803	-	41	24217307	-	LB246	-	-	hypothetical protein
+245062..245250	-	62	24217308	-	LB247	-	-	hypothetical protein
+245365..246822	+	485	24217309	-	LB248	-	-	hypothetical protein
+246884..246997	-	37	24217310	-	LB249	-	-	hypothetical protein
+247072..247353	-	93	24217311	-	LB250	-	-	hypothetical protein
+247350..249314	-	654	24217312	-	LB251	-	-	hypothetical protein
+249442..250320	-	292	24217313	-	LB252	-	COG1752R	hypothetical protein
+250333..250566	-	77	24217314	-	LB253	-	-	hypothetical protein
+250911..252125	-	404	24217315	-	LB254	-	COG2814G	putative sugar transport protein
+252638..253927	+	429	24217316	-	LB255	-	COG0427C	4-Hydroxybutyrate CoA-transferase
+253999..254133	+	44	24217317	-	LB256	-	-	hypothetical protein
+254196..254714	+	172	24217318	-	LB257	-	-	hypothetical protein
+254727..257126	+	799	24217319	-	LB258	-	COG4870O	Cysteine protease
+257127..257249	+	40	24217320	-	LB259	-	-	hypothetical protein
+257596..257706	-	36	24217321	-	LB260	-	-	hypothetical protein
+257787..259139	-	450	24217322	-	LB261	-	-	hypothetical protein
+259245..260663	-	472	24217323	galF	LB262	-	-	UDP-glucose pyrophosphorylase
+260651..261100	-	149	24217324	-	LB263	-	-	hypothetical protein
+261110..262027	+	305	24217325	-	LB264	-	-	hypothetical protein
+262747..263928	+	393	24217326	-	LB265	-	COG0596R	hypothetical protein
+264495..265445	-	316	24217327	-	LB266	-	COG0385R	hypothetical protein
+266337..267488	-	383	24217328	-	LB267	-	COG0520E	aminotransferase
+267499..268368	-	289	24217329	-	LB268	-	COG1633S	hypothetical protein
+268431..269057	-	208	24217330	-	LB269	-	COG3714S	hypothetical protein
+269054..269176	-	40	24217331	-	LB270	-	-	hypothetical protein
+269322..269507	+	61	24217332	-	LB271	-	COG0861P	hypothetical protein
+269486..270124	+	212	24217333	-	LB272	-	COG0861P	hypothetical protein
+270216..272096	+	626	24217334	mcm2	LB273	-	COG2185I	methylmalonyl-CoA mutase
+272093..274291	+	732	24217335	mcm3	LB274	-	COG2185I	methylmalonyl-CoA mutase
+274304..275359	+	351	24217336	argK	LB275	-	COG1703E	arginine/ornithine transport system ATPase
+276450..276827	+	125	24217337	-	LB276	-	-	hypothetical protein
+276984..277673	+	229	24217338	-	LB277	-	-	hypothetical protein
+277675..278124	+	149	24217339	-	LB278	-	-	hypothetical protein
+278133..280793	+	886	24217340	-	LB279	-	-	hypothetical protein
+280793..281308	+	171	24217341	-	LB280	-	-	hypothetical protein
+281387..282001	+	204	24217342	exbB	LB281	-	COG0811U	transport protein ExbB
+282091..282495	+	134	24217343	exbD3	LB282	-	COG0848U	transport protein ExbD
+282505..283155	+	216	24217344	-	LB283	-	COG0810M	TonB protein
+284176..284292	+	38	24217345	-	LB284	-	-	hypothetical protein
+284338..285132	+	264	24217346	-	LB285	-	COG2908S	hypothetical protein
+285144..286796	+	550	24217347	gltB2	LB286	-	COG0069E	glutamate synthase
+287132..287860	+	242	24217348	-	LB287	-	-	hypothetical protein
+287857..288297	+	146	24217349	-	LB288	-	COG0824R	hypothetical protein
+288431..288862	+	143	24217350	-	LB289	-	-	hypothetical protein
+289090..291633	+	847	24217351	-	LB290	-	COG0642T,COG2202T	two-component hybrid sensor and regulator
+291637..292335	-	232	24217352	-	LB291	-	COG1028IQR	3-oxoacyl-[acyl-carrier protein] reductase
+292348..293097	-	249	24217353	-	LB292	-	COG0235G	probable sugar aldolase
+293099..293632	-	177	24217354	-	LB293	-	COG1791S	probable ARD family methionine salvage pathway enzyme
+293655..294371	-	238	24217355	-	LB294	-	COG0231J	elongation factor P
+294557..295630	+	357	24217356	-	LB295	-	COG3547L	putative transposase
+296868..297005	+	45	24217357	-	LB296	-	-	hypothetical protein
+297444..298343	+	299	24217358	pstS	LB297	-	COG0226P	phosphate-binding protein PstS
+298418..299515	+	365	24217359	-	LB298	-	COG1509E	L-lysine 2,3-aminomutase
+299490..300128	+	212	24217360	-	LB299	-	-	receptor tyrosine kinase
+300168..300389	+	73	24217361	-	LB300	-	-	hypothetical protein
+300382..301155	+	257	24217362	-	LB301	-	-	hypothetical protein
+301358..301474	-	38	24217363	-	LB302	-	-	hypothetical protein
+301790..302635	-	281	24217364	-	LB303	-	COG2801L	putative transposase
+302632..302937	-	101	24217365	-	LB304	-	-	putative transposase
+303419..303913	-	164	24217366	-	LB305	-	COG3216S	hypothetical protein
+304190..304294	-	34	24217367	-	LB306	-	-	hypothetical protein
+304360..304818	-	152	24217368	-	LB307	-	COG1238S	hypothetical protein
+304824..304964	-	46	24217369	-	LB308	-	-	hypothetical protein
+305503..306762	+	419	24217370	-	LB309	-	-	similar to Fimh-like protein
+306995..307822	-	275	24217371	pyrF	LB310	-	COG0284F	orotidine-5'-monophosphate decarboxylase
+307834..308679	-	281	24217372	speE2	LB311	-	COG0421E	spermidine synthase
+308686..309552	-	288	24217373	-	LB312	-	-	hypothetical protein
+309558..309662	-	34	24217374	-	LB314	-	-	hypothetical protein
+309639..309782	+	47	24217375	-	LB313	-	-	hypothetical protein
+310349..310483	+	44	24217376	-	LB315	-	-	hypothetical protein
+310505..311047	-	180	24217377	-	LB316	-	-	unknown protein confirmed by proteomics
+311077..312246	+	389	24217378	-	LB317	-	COG0787M	alanine racemase
+312297..313442	+	381	24217379	-	LB318	-	-	hypothetical protein
+313463..314374	+	303	24217380	-	LB319	-	-	hypothetical protein
+314371..315162	+	263	24217381	-	LB320	-	-	hypothetical protein
+316118..316777	-	219	24217382	-	LB321	-	-	hypothetical protein
+316778..319093	-	771	24217383	-	LB322	-	COG0642T	two-component hybrid sensor and regulator
+319149..319766	+	205	24217384	-	LB323	-	COG0741M	lytic transglycosylase
+319842..319994	-	50	24217385	-	LB324	-	-	hypothetical protein
+320145..320873	+	242	24217386	-	LB325	-	COG1309K	transcriptional regulator, TetR family
+321371..321490	+	39	24217387	-	LB326	-	-	hypothetical protein
+321518..323791	-	757	24217388	acn	LB327	-	COG1048C	aconitate hydratase
+324060..325211	-	383	24217389	ompA	LB328	-	COG1360N,COG2885M	outer membrane protein OmpA
+325387..326403	-	338	24217390	lysS2	LB329	-	COG2269J	lysyl-tRNA synthetase
+326504..326608	-	34	24217391	-	LB331	-	-	hypothetical protein
+326607..326936	+	109	24217392	-	LB330	-	-	hypothetical protein
+327256..327666	-	136	24217393	-	LB332	-	-	hypothetical protein
+327874..328605	+	243	24217394	-	LB333	-	COG0745TK	two-component response regulator
+329173..329832	-	219	24217395	-	LB334	-	-	hypothetical protein
+329835..330245	-	136	24217396	phhB	LB335	-	-	pterin-4-alpha-carbinolamine dehydratase
+330224..330352	-	42	24217397	-	LB336	-	-	hypothetical protein
+330565..331380	+	271	24217398	-	LB337	-	-	hypothetical protein
+331365..331493	+	42	24217399	-	LB338	-	-	hypothetical protein
+331510..331692	+	60	24217400	-	LB339	-	-	hypothetical protein
+332599..333030	-	143	24217401	-	LB340	-	-	hypothetical protein
+333122..333505	+	127	24217402	-	LB341	-	-	hypothetical protein
+333502..333771	+	89	24217403	-	LB342	-	-	hypothetical protein
+333876..334343	+	155	24217404	-	LB343	-	-	cytochrome-c oxidase chain III
+334727..334846	-	39	24217405	-	LB344	-	-	hypothetical protein
+334894..335967	-	357	24217406	-	LB345	-	COG3547L	putative transposase
+336410..336715	+	101	24217407	-	LB346	-	COG2963L	putative transposase
+337384..337512	+	42	24217408	-	LB347	-	-	hypothetical protein
+337515..337955	+	146	24217409	-	LB348	-	-	hypothetical protein
+338085..338201	+	38	24217410	-	LB349	-	-	hypothetical protein
+339199..339609	-	136	24217411	-	LB350	-	-	hypothetical protein
+339811..340188	-	125	24217412	-	LB351	-	-	putative outermembrane protein
+340238..340336	-	32	24217413	-	LB352	-	-	unknown protein confirmed by proteomics
+340442..341902	-	486	24217414	pyk2	LB353	-	-	pyruvate kinase
+341941..343902	-	653	24217415	-	LB354	-	-	hypothetical protein
+343915..344964	-	349	24217416	asd	LB355	-	COG0136E	aspartate-semialdehyde dehydrogenase
+345016..346608	-	530	24217417	-	LB356	-	-	hypothetical protein
+346596..346715	-	39	24217418	-	LB357	-	-	hypothetical protein
+347158..348723	+	521	24217419	-	LB358	-	-	hypothetical protein
+348694..350157	-	487	24217420	-	LB359	-	COG1696M	alginate O-acetylation protein
+350989..351204	-	71	24217421	-	LB360	-	-	hypothetical protein
+351865..352251	-	128	24217422	-	LB361	-	-	hypothetical protein
+352294..353268	-	324	24217423	-	LB362	-	-	hypothetical protein
+353351..354448	-	365	24217424	-	LB363	-	-	hypothetical protein
+354363..355733	-	456	24217425	-	LB364	-	COG2204T	transcriptional regulator (FIS family)
+355978..356730	+	250	24217426	parA4	LB365	-	COG1192D	ParA protein
+356714..357559	+	281	24217427	parB4	LB366	-	COG1475K	ParB family protein
+357993..358931	+	312	24217428	-	LB367	-	-	hypothetical protein

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.raw
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.raw	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.raw	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY
+MNKQRGTYSEVSLAQDPKRQQRKLKGNKISISGTKQEIFQVELNLQNASSDHQGNDKTYHCKGLLPPPEKLTAEVLGIICIVLMATVLKTIVLIPCIGVLEQNNFSLNRRMQKARHCGHCPEEWITYSNSCYYIGKERRTWEERVCWPVLRRTLICFL

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.swiss
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.swiss	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.swiss	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,56 @@
+ID   GCDH_CAEEL     STANDARD;      PRT;   409 AA.
+AC   Q20772;
+DT   01-NOV-1997 (Rel. 35, Created)
+DT   01-NOV-1997 (Rel. 35, Last sequence update)
+DT   16-OCT-2001 (Rel. 40, Last annotation update)
+DE   PROBABLE GLUTARYL-COA DEHYDROGENASE, MITOCHONDRIAL PRECURSOR
+DE   (EC 1.3.99.7) (GCD).
+GN   F54D5.7.
+OS   Caenorhabditis elegans.
+OC   Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida; Rhabditoidea;
+OC   Rhabditidae; Peloderinae; Caenorhabditis.
+OX   NCBI_TaxID=6239;
+RN   [1]
+RP   SEQUENCE FROM N.A.
+RC   STRAIN=BRISTOL N2;
+RA   Coles L.;
+RL   Submitted (OCT-1995) to the EMBL/GenBank/DDBJ databases.
+CC   -!- CATALYTIC ACTIVITY: GLUTARYL-COA + ACCEPTOR = CROTONOYL-COA +
+CC       CO(2) + REDUCED ACCEPTOR.
+CC   -!- COFACTOR: FAD (BY SIMILARITY).
+CC   -!- PATHWAY: DEGRADATIVE PATHWAY OF L-LYSINE, L-HYDROXYLYSINE,
+CC       AND L-TRYPTOPHAN METABOLISM.
+CC   -!- SUBCELLULAR LOCATION: MITOCHONDRIAL MATRIX (POTENTIAL).
+CC   -!- SIMILARITY: BELONGS TO THE ACYL-COA DEHYDROGENASES FAMILY.
+CC   --------------------------------------------------------------------------
+CC   This SWISS-PROT entry is copyright. It is produced through a collaboration
+CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation -
+CC   the European Bioinformatics Institute.  There are no  restrictions on  its
+CC   use  by  non-profit  institutions as long  as its content  is  in  no  way
+CC   modified and this statement is not removed.  Usage  by  and for commercial
+CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/
+CC   or send an email to license at isb-sib.ch).
+CC   --------------------------------------------------------------------------
+DR   EMBL; Z66513; CAA91333.1; -.
+DR   HSSP; Q06319; 1BUC.
+DR   WormPep; F54D5.7; CE03411.
+DR   InterPro; IPR001552; Acyl-CoA_dh.
+DR   Pfam; PF00441; Acyl-CoA_dh; 1.
+DR   Pfam; PF02770; Acyl-CoA_dh_M; 1.
+DR   Pfam; PF02771; Acyl-CoA_dh_N; 1.
+DR   PROSITE; PS00072; ACYL_COA_DH_1; FALSE_NEG.
+DR   PROSITE; PS00073; ACYL_COA_DH_2; 1.
+KW   Hypothetical protein; Oxidoreductase; Flavoprotein; FAD;
+KW   Mitochondrion; Transit peptide.
+FT   TRANSIT       1      ?       MITOCHONDRION (POTENTIAL).
+FT   CHAIN         ?    409       PROBABLE GLUTARYL-COA DEHYDROGENASE.
+FT   ACT_SITE    388    388       BASE (POTENTIAL).
+SQ   SEQUENCE   409 AA;  44964 MW;  4D06241FB6768069 CRC64;
+     MLTRGFTSIG KIASRGLSST FYQDAFQLSD QLTEDERSLM LSAREYCQER LLPRVTEAYR
+     TEKFDPSLIP EMGSMGLLGA PYQGYGCAGT STVGYGLIAR EVERVDSGYR STMSVQTSLV
+     IGPIYNYGSE DQKQKYIPDL ASGKKIGCFG LTEPNHGSNP GGMETKATWD ETTKTYKLNG
+     SKTWISNSPV SDVMVVWARS ARHNNKIKGF ILERGMKGLT TPKIEGKLSL RASITGQIAM
+     DDVPVPEENL LPNAEGLQGP FGCLNNARLG IAWGALGAAE ECFHLARQYT LDRQQFGRPL
+     AQNQLMQLKM ADMLTEISLG LQGCLRVSRL KDEGKVQSEQ ISIIKRNSCG KALEVARKAR
+     DMLGGNGIVD EYHIMRHMVN LETVNTYEGT HDVHALILGR AITGLNGFC
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.tab
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.tab	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.tab	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,2 @@
+roa1_drome	MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPRTKRSRGFGFITYSHSSMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHFGNIVDNIVIDKETGKKRGFAFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY
+roa2_drome	MVNSNQNQNGNSNGHDDDFPQDSITEPEHMRKLFIGGLDYRTTDENLKAHEKWGNIVDVVVMKDPTSTSTSTSTSTSTSTSTMIDEAQKSRPHKIDGRVEPKRAVPRQDIDSPNAGATVKKLFVGALKDDHDEQSIRDYFQHLLLLLLLDLLLLDLLLLDLLLFVEFDDYDPVDKVVLQKQHQLNGKMVDVKKALPKNDQQGGGGGRGGPGGRAGGNRGNMGGGNYGNQNGGGNWNNGGNNWGNNRGNDNWGNNSFGGGGGGGGGYGGGNNSWGNNNPWDNGNGGGNFGGGGNNWNGGNDFGGYQQNYGGGPQRGGGNFNNNRMQPYQGGGGFKAGGGNQGNYGNNQGFNNGGNNRRY

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.tigrxml
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.tigrxml	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.tigrxml	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,41 @@
+<ASSEMBLY ASMBL_ID = "162" COORDS = "1-1178688">
+        <HEADER>
+                <CLONE_NAME>chr9</CLONE_NAME>
+                <ORGANISM>Cryptococcus neoformans</ORGANISM>
+                <AUTHOR_LIST CONTACT = "">
+                </AUTHOR_LIST>
+        </HEADER>
+        <TU FEAT_NAME = "162.t00500" LOCUS = "" PUB_LOCUS = "" ALT_LOCUS = "" COM_NAME = "hypothetical protein" PUB_COMMENT = "" COORDS = "185408-187155">
+                <MODEL FEAT_NAME = "162.m02638" COMMENT = "" COORDS = "185794-187041">
+                        <PROTEIN_SEQ>MSAHSGSCIPSTSCPSSTVSINGTCVTCPLDCATCSTASTCSTCPSDRPILKNGRCIAYCATDTYYDTSTGTCQACDWTCKNCVGEGSAMCSSCSDGYMLKDGVCVDALCGDAGFANGFGMCFSSFVHKSQKRYLGLLALVGVAIIAGIASWWYVRRERRKTRQATKEFGKRLDERNVNDRLSALRLEKVFGFNRVTFGRGGDRSARTTQEDGGKKNKLRELLLPSKRRSGNEEMEMKKSNFAPDKERDCYDSWRTSNFGKDNWVAPPPYVPSQGVPTPVDVKHTFNKRDSLDSIPTPSHQTFAPSSSTSSFTITRPATPPRKLQNPYLGSTIIHSMSTPSPPPHSRSLMPPPRPGMGRRESGNSFSSGSLWTPMTGMTSITKITADKERDVRRYSGRQDRQMDVERRPTDYDLL*</PROTEIN_SEQ>
+                        <EXON FEAT_NAME = "162.e11999" COORDS = "185408-185433">
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e12000" COORDS = "185487-187155">
+                                <CDS FEAT_NAME = "162.c02494" COORDS = "185794-187041"/>
+                        </EXON>
+                </MODEL>
+        </TU>
+        <TU FEAT_NAME = "162.t00448" LOCUS = "" PUB_LOCUS = "" ALT_LOCUS = "" COM_NAME = "eukaryotic translation initiation factor 2 alpha subunit (eif-2- alpha). (fission yeast" PUB_COMMENT = "" COORDS = "61061-59343">
+                <MODEL FEAT_NAME = "162.m02967" COMMENT = "" COORDS = "60833-59633">
+                        <PROTEIN_SEQ>MPRFYENKYPEVDQLVMVQVQSIEDMGAYVKLLEYDNIEGMILLSELSRRRIRSVQKLIRVGRNEVVVVMRVDPDKGYIDLSKRRVSAEEVVKCEEQYEKGKAVDSIITQVAKKRGVTPESLYEKIAWPLHRQYGHAYEAFKLSISEPEAVFGSLELDEETLADLRSGIARRLTPKPVKVRADIEVKCFSYAGIDAIKRALTAGEAVSTPDVPIKVRLVAPPLYVMSTTSTDKNAAIELMEKAVEVIGETVRKDKGDITIKMKPKVVSETEDAELKALMEQFEAANMDQAGDDESSEEDE*</PROTEIN_SEQ>
+                        <EXON FEAT_NAME = "162.e18653" COORDS = "61061-60801">
+                                <CDS FEAT_NAME = "162.c05241" COORDS = "60833-60801"/>
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e18654" COORDS = "60736-60674">
+                                <CDS FEAT_NAME = "162.c05242" COORDS = "60736-60674"/>
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e18655" COORDS = "60619-60487">
+                                <CDS FEAT_NAME = "162.c05243" COORDS = "60619-60487"/>
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e18656" COORDS = "60431-60224">
+                                <CDS FEAT_NAME = "162.c05244" COORDS = "60431-60224"/>
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e18657" COORDS = "60160-59809">
+                                <CDS FEAT_NAME = "162.c05245" COORDS = "60160-59809"/>
+                        </EXON>
+                        <EXON FEAT_NAME = "162.e18658" COORDS = "59746-59343">
+                                <CDS FEAT_NAME = "162.c05246" COORDS = "59746-59633"/>
+                        </EXON>
+                </MODEL>
+        </TU>
+</ASSEMBLY>

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.txt
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.txt	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.txt	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+>Test
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
+TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
+ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG
+CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA
+GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
+AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG
+AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/test.waba
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test.waba	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test.waba	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+U57623.c1 align 40.9% of 3833 pair1_hs.fa U57623:33-2971 + pair1_mm.fa.U02884:242-3687 +
+agctatgatggcgtcacagtactccagcctgggagacacagcga-gagactttgtctctaaaa------------------------------------aataataataaaataaaaagttcaatgaaacaatacacccaaagccctcagcatgcaataaatagcaagacaaggcaggtcttattt-ttactgaaagtgcttagta-aactatacagtg-acaaaccaccgcacaacaggctctcgaaaggaggcagcaaattacccaaaagtg-caggcggcttgctagt-gtgcacaggccaaagaaagggcggcaggtggggaaggcagccat------------------gggccttgaagagctgaccgaattggcagaatttctgcaggaggggagctgggaacgacctgagctaaagctcggagctgtgcgaagaaaccggaaaagcccagagcacttgcaggggcgggtggggagctagatggtggggtggggtggggacggaggagggc-ca-gcaggagacattccgcagggaggggcaagcacgtgtgaggcgggc--ggggcgcgaagggtcaggcttttgctcaaaacaggcagaggacaaggtcagctcagccgcagaccgagccgctggtgactgtctccgccaccaggcagtgagagtgaagggagagcgcgacctctgaagcccgctagactaagcttgcaatctgagctccattcaccccctcctatttcttgagaccttgtcagttcccctgtgagcctcggactcacttgta-aa-acgaggacagatgcccgt----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gccagaa-g-tcaaccagagctttccccggcgtgggcaccagcccaagggcg-ttttgttttctagtctcatctctgctctgacgctaagctcaaagagggactgggggacgggaagatatccaccatggatgcgccctagatctcgggctggtgtcggctgttccttctcagattccagagtgtctagaggccaggaaagggagaaggtcctaccagcctggggtagggactcgggggccagg-cactg-gcgctgacgcagg-ctagcagggcgccactggctggtc-cccacccacctcggtgggt-tgggggatgggcgcaccagcccctcctgggtgagccctagcctggggcttcctatttcgggagccgggggcgtgggccacgtctcctcatgtgatgcgagggctatttaaagcggcagcccgggcagggagccgccg-tcggagcccttgcacgcctgctctcttgtagcttctctcagcctagcccagcatcactatggtggacgctttcctgggcacctggaagctagtggacagcaagaatttcgatgactacatgaagtcactcggtgagcaagccgcggggctcaggatgttgg-----------------------cttggggactggctggtggcgtgcctagccccacgcagcactcctgccgcatccctcctggttaagactggggaataggggagcgcggagatggcagcctggcctagagcaggt--------------------ggggcctgttcagagggggctttggtggtccaaatctggttagagaccacggtagggaggtggtggaaggaggcagctgtg-----------------tgggaggctctttccaggaag-agggatatgtgatttggaggtaggaggagggtttggataaagaacactgatcacaggaaagggagtgtagccaggggagaaaaagaacaggggcatgggtagtttagaaattggaggagactgaacccagaaagggaatggggcagccagggagtgtacaatgatgtaaacaagtaggaaatacctaggaggaaaaagattagtggggaaaaaactgtggatcagtgaatcagatatgagaaggacgtaagacaggaacctgcagtaagcagcaatccccatctctgc---tt-ggttagggaagagaattcttgctggagaatgccctttctcaccagccagtctgaccttgtcctgcagtctatgtatccaggccttcatcactgtctgtgagcctcgtggtagggtggggcaagaggcccatgatcagctgggcctttcctgcaac-ccaa-ggctcacc-tatctgtgcgaggggtaggcag-agaaagccattggacttctgatgtgcagtagagggtccca-aggcaaggtcaagacctgggagggaggatcactggtttaggaggatgtggagaactcctgtggtgttgggatggagaagaatcaggattcaaagaatctcac-aggtgaggaacttggagattccca-tacca-tctagttcaacagggaaactgaaaccaggagagtagaaatgtattataacaattccacagcagagccaatatgaaaatctaaggtttctagatctgta-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------acccagagctcttcccactaccctacaggccctgcgagtgggaagaaaagtagaaactgcttagctaatgattgacctcagcccttcttctactgctttgggcttagatggagag---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gtcaaagctctcaacggcctctaccctatcttg--------------ggcgctatgcccagtaattctaggcaggcagtcattct-tagaggagcagcccccagcccccacgaacacagcccagcagctattgggaagttggaatgcccagatttagttcctccttccaaagctgggccagagctgagtct-tgaattgagctgcaacaactttacc-attctt-gttcccttattctgccccg-agttggg-tcagcgggctg-gtctccctgaagtcctgttatctttcagcagcttatgttaaggcagccagcattctcatcgtaggaatggaaagcctgggaaaataccctcctcagctctcagtaagtagtgctggcttcatttctaagtagaacccagatctccctgagtctcctaaattctgtcagctcaatattcttagtttctcttggttcagaccctcactcatcccgcagtggtttccttttcaaacactccatacctctgggtagatcctaagtgaacag
+agccagagcagcggcacacagctgtaacccaggagacagaggcaggagaatctttagttcaaggtcaacccctgctacacattgagatgctgacttggtaataatta-aacagaaactgctgaactaaggaataggctccactgaggttccct-tactcacctgtaaaaaggggatgataccacctaccaacgaaaaagttgagtgtgaccatgccctgaagtaggctacaaccatcaatagtcgggtcttatttaataacgtactttaaggtgacaagcagtctagtggcagaagtcaggggaaaaaactgacttcagcagagggtcgcggctttccgggagttaaggtggccgaggccggaagaaccctctgaatagaca-aa-ttgt-c-ttcgcggagtgaagaacgaccctggcacaagctcagaggtcagtaaataaagc-ctgaagcgctttcaggcagcggcgacgggtgggactg-----------------------cggagaaaggcgcaggcgggagacattccgcagggaggggctagcacgtgtggggctagcatgagggaagcaaggtcacg-ttctccgc-cagcagg-tgaggcgctgggcagctcagccatccgcggtgtccaaggcaactcttttc---------cact-tgtctggtaggagcaagagggctc-aaaggccactagaccatgctctctgtccaggctcca---attcttttttacttacggcgaccgcgtca-ttcctctccgagcctctgagcctcttctacaagaagaggacataggaccgttgagatgggtttttgggtaaaggcccttgctgtcaagccttgacaaccccagtttgatacgtgggacccacacggtggaagcagagaagggactcccgcgagttacaacgaacgccccagtctcccaccccttccccataagtacgcctacacgagcatacacaatataagaataaaaccacagcgaattaaaaaacaaggcggcagaaggatcaagcggcg-tttctccagcgt-ggcaccagctcaagggcgagtttcctttc-agt-------------------------------------------------------------------------------------------------------------------------------atggccgggggatgctctacttgggttgcgggaagcgccccgcagccaggccagggatgggttagatggcaccaacaggaccgcgggcgccgctgacgtaggcgacgggagggctgtgggggatgggc-c-ccagccctt--tgcgggagtgcaagcc-ccggcttcctatttcgggagcgaggggtgtgggccac-tttcatcatgtgatgcgagggctatttaaagaggctgtcc-agccgggagctgcggttctcagtgcctgctcgcctcctcactcatcg-----------------------caccatggcggacgcctttgtcggtacctggaagctagtggacagcaagaattttgatgactacatgaagtcactcggtgagcga-acgaacggcgcaggatctagggtcaggagggccggcaaggcggtcttggcgctgagctcccagggggagtgcccccatg-tgc-ctcccgcaagctcctagccagtccagac-agggaatactgaggtgcggag--ggtggcctgggctgaagccactccactccaccccaccccaccccggcctcctgggaggggggtgtcgcggtccaagcttggcgagcctcgtagct--ggaggggaagggtagaggcagctgtggccgcagaggtccgggatgggagg--ctttctaggaagcagtg-taggtgatccggaggt-gga--------------aaggggaggga-aagaagggcgggaggctggccgcaggagaaggcaaagaggagcat-ggtggtccagaa----------attgaattccgaaagggaatagagcagcta-ggagtgtacag-----------------------------------agcct-ggaggaagactaaagaaaatcagtgaattccatctgggaagaggtgaagatacagccaggcagtcagcaacaagccctacccctccatgttggggtagtgaagaggcctctctctggaagatgccctggttc-tca-ccagcctgaccttcacctacagtgtgtgcagcca-cccctgggatcagtcggagacgctgctgctagagcagggcaagacgaccacta-cacatagg-cttcccggccgcaccagtcggccaccggatcagtgctggggatagggtgaagaaagc----------ctgggatcgagcagagggtgtcagaagaaaggtgaagagctatgaagggaga-gtgtggctt-gg-ggctgggga-aa-ttgtgtggtgtgggcggtgacacaacgcctttaaccagcactctgggaagcagaggcaggtgaatctcccgagttctaggcctggtctatacagaga-gaattccaggacag-ccaggactacacagagaaaccatgtcttgaaaaaaaaaagaaaggaagagtcccatgatttacttaataggaagacagcttgggacacatgagctcatcgcctcataggaaagcccaggatttctttttgaagactgaactagagccttgtgcatgccccctactgctgagttatactccccactcacacacacacacacacacaccctctttttactctgtgtaacaggttctcactaacttccccaggttggctttgatcttgtaacctgccatctcggccttccaaatagttgagaacccagaactacctagagttcttcccatt------------tcaacagtggggaatgtcacatgaaccacttatccaa-ga-cggccccagcccttcctttcttgccttgagcttagataaagacctctacctgcggagtccctggctatatcatcctggtctaggaggctggggcagggaaaacaggactgtgtcatgcctgagctagcttccactccgtcttccccgggaaggagggctggaatcggacatgttgagggatgtgtgtagttgcctctcacctacttccagctcttctctgaaacaggcccacaaagcaatttgtccttttggtttggggaatggaacccaaggccctttgcccgtgctaagcaagcactgcgactgctgaaccacatctccagacagggctcccaccggcacctaccctaccctgaggctctccaggaaggcagctggtcttgtcttttaagacagggttttactgtgtatccctggctctcctggaactccctaa-ggagaccaggtcctgtt-gtctctgcctctcccgcactggggttagaggtatgagcccacacccagctagttggctgtcaagttagggagtactagattacctgggcttagttctgtttcactgagctgccgctgccttcctagacttctttttgcctcagggcatgttgtc-ttcaggccattgt---------------------------------------------------------------tctgtgggtgctgaacccagctttagtcag-gggactgaaattctatttagcctaaaaatatcgacaggctgaaggccagta--aagtctagatgcaccccagcttccagcagtaactggcttcactggcaccctacacctacctgtaggtgggtcctgggaaaacag

+U57623.c2 align 43.7% of 4211 pair1_hs.fa U57623:3021-6913 + pair1_mm.fa.U02884:3704-6847 +
+gcctgactcatcctgataccaaggggcaatgccaagttcctcactggccaagcaagggtgggctgacagcataacagcagaggcagcccctgcccctcctgctgtagacctagggctctcaaggggcaaagaggtcccgtctagtaccagtgaccacaggcacaactgctggcct---ggattgagtatgtgctggacagaatcgcccagtgaaaatagtcaacagttttggagccgaggttcaaatctatgtcagtagtttattctctttgaattt-----------------------------------------------------------------------------------------------------------------------------------------------------cgacaagacact-tcgcactcttcattgtaaactg--gggat-aatctacgcttcgaggctgttacaagcattaagtaaaacaacccatgtagggcatgtgcagagtacctagcttccagcaagcactatgtagccaggtacatttggagactttacac-acaccacctcactacactgggctgcctcctgcctcacctttg-------------------------------------------ccttggaaga-cagttcaatgttaagctgctggggggagagggggcagtcatgattagttctttgttctttacttggttgcaggacacttaggactttgcccagtacccaaggaagccatgcttgggtcaggaagagagtctctgtaaagccttagactgggagtcaggagacgggtttgagtctaactca-ttgttgctaccgctttaggctcctcctgaatctgcacaataggacaaatacttcctttgta-c-ctaactcctagatcatagataacaggctttgaaaatgatgggttgccatgtataagggacaagagcactaacacttcttag-tttcagggtaaaaacttccaaagttggaaaactcctatgcctaaggctttggaagggaaagtctatgtttctcttctttcctcagccttattcctaaggctttgagagcttttcaggtgccctggaaggcagccttatgctccagccttgggaggtagtatagctgagcacttaagcaagctctggactcagacaattctgggcttcaatc-tcagatttgtgaccctgggctttacctctgtttttgtatctgtaacgtggaaacagtcttcagaagaacaggaagaactaaatgagataacatgtacagttcttactacacaaaaagctcatagtacttaatagtagctttttttttttttttttgagatggaatctcactctgttggctaggctggagtgcagtggcacaatctcgactcactgcaacctccacctcctaggttcaagcaattctcagcctcagcctcctgagtagctgggattacaggcacataccaccacatctggctaatttttt-gtatttttagtagagacgggtttcaccatattggccag-gctggtcttaaactcctggcctcatgtgatccgcctgccttggcctcccaaagtgtgattacaggcgtgagccaccacacttggcccaatagtagcttattctaatcccagctctgccactgacttgctatggcactgctgttccttaagtatctctcatctaatgggatcagttatctgtgttcaccaaacagaactaagcgcaagactgaattttaaaattcccatgca-aaggctttgaaagatacagtcctccacttccccatacccaggcctgagagttattcattgagtttcttgtacactgcttctctaccccagctcatatactcataac-ct-tccccctaccctcaggtgtgggttttgctaccaggcaggtggccagcatgaccaagcctaccacaatcatcgaaaagaatggggacattctcaccctaaaaacacacagcaccttcaagaacacagagatcagctttaagttgggggtggagttcgatgagacaacagcagatgacaggaaggtcaaggtaagtcagggaaacaggggtggggaatggagagtgctgagactctaaaagagaataggctggtagtcttggctccctgg-tattgcaccctgaggggcagactatcatggggaatttacatgaaacaagattcataaagcctgtgtagtgctggaatgccactgatgctaaatacatg-tcagttctgtcctcttgttttcttccctcccttcttgggattcatctattgtctgcctc-ggaatgggcagcacagagccaggatgttcttctgacctcagtatctactccagctccagctgggtgaccctgt---gcaaggtatgcagtagctct-aggtttctttccccttccatagatggagagttatgtggccatggctgtgacctgaagtgctttaggaatgatgcccagaagtcagggccctccactgagtgaggtcattgtgacctccagcagcaaaaaaggcagccaggaactagaagc-acctactcagatgccgcttca-act-tctaactcccagacatggccaatgaccctgacaaactatttccagtgttgccagctgacaggca-------ggaaagagctatgttccgtgatagggcattcaccttgtcatgaatgtgtttgcagtgtctcccaccaagccttagcccctcctcccagggttctatcaccctgcagtggctgtcttggcagcttgc-ctcagccttccaggccaggcatgggagcgagagaacttaagggctttgacctctatagggtgtccctatagcagtgttctat-catgacactatcattcagccccatcagctgtttcctcttcctcatagctgtccccagaaagaacagg---------------------------------------------------atcacacaggtggctggcagcagagctggggatggtgcccaaagatggcagtctaccttggataaaggtggctgccccaccacctgctcatacctccttggacttgcctactttctcaaggggcaagaaccccaattaaacacaata--gc-cctgtggaatgcctagggcaaaaatatctactctgagtaggcaaaaaaaactaggggaat-gagaacaaggagtaaggtaaggataaaaaagagcacactaaga-gacaggcctcataccccttatcacctaaacaatacacagaaccttctcagattctcctactgaaccaccttgctcatcaggatcccttagcctggccttgtggcccccaaactcct--aggaaagagagctggaagagctgccaaatgagaaccagctgatgtatgtatgctggcagcacccagagctgaggaaccacttcaagggcatcca------------------------------gtcacaggactttgtggttgctgccctcttgttggctaa-agaggtcacatgatgtggaccaagaaaaggtgtaggaatacagggcaggaagtctaattatccaatacttcctatcactaagggtcttttagacattatgtggactaaccacaaggctggataaagattctcaggactactcctcctcctcagtcagtctttcccagggatagactagtaaatcccacctgtatctgaggggaccaggctacgggaatcacctagagtacagataagtgtctgtcttgaaggcttgtggtacttctcagagccaggctctctggctccaccatactgcctgcctctccctccttgcctaatatctgaaggcctcttccccagaaaggcagtagtggagcagaggctggaggtgaactagatgtcttgcagggatagctgggaggcggattgcctgagctcttgtcctcacaccatcactagtttgggtcaaaggctgtgtcctctgtggcccagtgtccagaccccaccctgcccctcaattcctgactaagatcacagctcaggcctctaccctctttccacagtccattgtgacactggatggagggaaacttgttcacctgcagaaatgggacgggcaagagaccacacttgtgcgggagctaattgatggaaaactcatcctggtaagatgggca
+gcccaactcacacagataccagtaggcaaagccaagcctctcact------------------------tcttgctgtgtagccaaagctcctgacttatcctatagaaccaaaggt-tcttaggacaaagcagcccagcctagtttaagtgacttcaagcacagatggtggcttcaagggtagagtatgttattccaagaatgatatagtgagactaaaagagagtttgggaatctatg-tcactaaactcggatta-tttatttacttagcctttttgagacagggtttcactatagcccatactctggaagctacgcagcccaagctggccttgaattctcggcatttcccctgctccagtcccctgcctcttgagtgagattctaggggagtcaccatgcttggcccgtttgactttggctaagacagcaccgtgtctcagcctcgtttgtaaatggaaactataaagcttaggtttagggctgtcccaagga-t-act-tggcca-ccacttagagcttgtgcagtgtgcacagcttgcagcag------------------------------atgcacaataccatagccttatattgggctgcctcctgccacatcgctgaggatggctcagagtgtgctggggccagacgacaggtagtcaaccatggaagattccaggaaagctactaacccaaagcaccaaaggcttgac-ccaaggggtctgtgaactttacctgcttgagggacacctgggaccttgcctaggactcag--------------------------------atccaatgattatgtcaggagtctccccagggacttccaagtcatgcagttgtcgctacttttttcagc-cct-ctacgtctg--tggta-gac-aagac-tcctttgtatctctaac--------------caggaggctttgaaactga--cgctgccatacagacggca-gagagcact-gc-tgtctcagttttctgggtggaaa---------tgggagacgacccttgtccaggggactctagaaggcagttgacg-atctcttggttcttcagtcctgtt-ctgtgtgttcaggag------------------------------------------------actagaagccagcgggt-acccagctctggagcgacacagtgcttagcagcttccatctgaattgtgaccctg-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ttcacatctacaaaatcttccaggggcaggactgcgtgaggggctggctatgtggctcacaccccatggtaccttccttgccgctgttgccgaccctgccct-gcatccacatgtctcatatttgacatgag-------------------------------------------------------------------------------------------------------tttaacagctgccgagctaagggaaagagcta-agggaatgagaaaatggccccacacaaagacgtgggccactgag-gatcgccttccccatgcctaggccacaaactt-ctctttgggtct-atata-ac-acgtctctaccctacctaatgtgtcctcaactatctgcccctgcccttaggtgtgggctttgccaccaggcaggtggctagcatgaccaagcctactaccatcatcgagaagaacggggatactatcaccataaagacacaaagtaccttcaagaacacagagatcaactttcagctgggaatagagttcgacgaggtgacagcagatgaccggaaggtcaaggtgagtcagag-aa-agggga-tggagggcactg-gatggaacaccacagggtaagaggctggccctcttagctccttggctttttaaccccaaggggcagg-----------------------------ttcataaagcctgctcagtgctgcgatggcccagggactaagtataagctctggcc--------tgttttcttctc-accttcctgggaaggatctatcagctgtcactggagtgggcag--cagagccaagatattcttccgacct-tgt-gc--ctccagctccagctgggtgaccttgtacaccaaggtacccagtggctctgagg-------------------------acatcagcagccatggctgtgacctgaagtgttttaggaatgatgcccagaagtcagtg--ctctac--agcgaggtcatcctg-gctctggcagcagaggaggcagctgggaacaacaagctgagttctatccacaggtttcctgctctgtagctcccaggctagaccactgaccggaacaaactgtttccagtgccaccagttgacaggcacctgcgtttaccaacaggagtgctgttgggtgtccatagtctcttcaagtatttattt-aagggtatgggagggagtgtctacctccagtgatgctcagttatctcatggtggttatcttcttagcagcttgctcaaaatctcccagcactgg---gccacgcacagaactaagtaacatctgtatctataggttgtccttata-cagtatcctctacatcacaacccta-aaattcatctcttcagctctttcctcctcacagttgcccccagaaagaacagggtaggtaccaaccagtcttgcagttacagaggcgctaaagcccagcccaggaccacacagcaagctggaagtgaagatccccaaggcgccctgctgcaccctgctccaccacat-------------gtcccacctac-c-tacatctttgaacttgccatcttccttgagagaccagatttcacattaaatataacagtggtccactgggatgtct---------------------------------------ggacaatggggaatgaagattccagaaggtggaatatagacaggaaagagatgagtgacagccatcacctt-tctagactcctaccgacaaaccctgcttag-ctctccttctga---------------------------------cctgcaactcccaaattcctaaagcagatagatttggggcagctgcccaacacgaactagctga-g-a--taggctggcagcaccaaggaccgaatggtcactggagctagagctagagaacactccaaggatgcctgggtccttggtccaaggaccttgcaattgccgctcttttgccagttaagggaggtcacatggtagaaacaa---------------------------aagtctaatttgccaata-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------tatcaaggctgagctgcctgccatgcccactgccagagggcc-ccgcccctgcagagcagga---------------------------------agcttgggcagctgggctggaggtgggctgtctgctgatcccagcaccatcactggttaggttcctggtttg--------------------------------------------tcctggctctggtcacagcttagg-ctct-gtctctttccacagtcactggtgacgctggacggaggcaaactcatccatgtgcagaagtggaacgggcaggagacaacactaactagggagctagttgacgggaaactcatcctggtaagatgggca

+U57623.c3 align 50.3% of 2218 pair1_hs.fa U57623:7003-9170 + pair1_mm.fa.U02884:6891-8711 +
+gctgggacaagagagtggttgtgggtcagggtggtatcaggtgggaatttttctgtgtagtggctttggactcacacaggccggaactcaaatcttaccttataggctacatgactgtgggcaaatcaccttttccaagtgcaactgtaaaacgggtattaataataccaaccttgtagggctgctgggaagcctgtaagagacagtgtatgcacagcacaaagca-tcact-gat-tgaggaacacagcaggtgctccatgtcctttgtttgctcttcctgtgtttctaccttgcctcacctcaggaagaagtagaaaacagggccaaatctgatcccaggccctctaggaggggctcccattgcctatctcagcattccctttcctctcctccctaggactgcattgtcacttgcagggacaggctcgtgactggtggggacactgaatga-cagtacagtcctttcttccccattctagtcctac-cccattttcatgctttctatgtctggcctactgaaactacttgactactgcttgggtaggaagtaccacagccaggctggcagatctgttcaagcttggggacttcacttggagaatctagccttgactgaattccccccagacccagggagagcagccaactgtggattctgcctaaccacagggcctcaggttttcacctaggcatcttcactgcacaccttcttgggtcagcataacctgttaactgcattcttgtactcatgtgggacaggggtccccttgaagtttggaatgaggtgcctagctttggtggggatgtgatatgcaggaccaaattctcagtggcagctgaactatggtgaggccatgggtctggctctatgatgccagaccggatagtgggaggtacagggctctggccctggcactactctaagttagggaaggattggagttagtacccaaacacagtcctttcctgagtctctggatatttttcctatttgtc-aac-tatatgccaggcaccatcttagacactaaggatgaagaagccaaatggtataagggaaggaaaaacactcaggtcttgaccaaattacttcctctctaaaggctc-gttttt--tccaaatctctaaaataagaattacaatgcctgtcttaaggatttgctgtgcatatcagaaaaaaaaaattatgtatgtatacacacacacacacacacacacacacatacatacttgccggcactggtaggtctcagtgaca-attatcaggaggaagggagggtagaatgctcgcaatg-gtgttcctggctc-ccac-cccccatctc-actctgtctttccttccagacactcacccacggcactgcagtttgcactcgcacttacgagaaagaggcatgacctgactgcactgttgctgac----tactactctgccaatcggctacccctcgactcagcaccacattgcctcat--ttct-tcctctg-cattttgtac-aa-atccacgaattcttctggggtcaggtgccactgaccgggatcc-agttccagttcccatggtgtatgtggttttttttttttttttttaactgcactcatagggtgctctgaggtcaataaagcagagccaaggccacccagttgccttttggcctttggtaacataactctgggagtcttggtttatcctgtgtgtcagagagtg-ggcagaaataacggcctgaaggttactgaggaagaagcactggatgggagactgaaatggacagtctcggagc-ctgttaatcagctgatcaccttacacatttaataataaaagagctgtacctacacgttgcctttacactgcccc-----------------ccctccatggtcaaatgacctagttcagtcagtgatggggcttccccaggtttggctattgaactgtcacttcaggcccatcctacactga-aa-gctc-ttgggt-ctggctgttctctgtgaaatgctgtagtctctccctttccagaattcaggttcagggca-cagaacccaggcttgtaccatggtggtgggaga-aaatgaccactggccaagaggactgctgacctgtgcaccaggctagtacttatgactacaaattcttactgcttctctaatcaactctgagggaagagggcatctgatcattacaaaagggagggcttataagtgat
+actaggacaagagag-----------------------------gcaggggt-ggggtcctggctgtggatttacacaggtcttggttcaagcatcagtctaaaggctatctgac-aacacataaccttcaagggccactgaaatgg-------------------------------gggctgctgggaggcc----------agtgtattcagagtccaaagcactggccaaatgggaagaacgcagtaggcacccacaaacacttcctcctgcttgctgt----------------tccctgggaagaggcagaaagcaggacc-agt-agatcccaggctggagaggagcagctgccat-----------------cctctcccctcctgagtagaaatgca-ggagtc-tgcaggaccaagtgtgtgcctggtgggcactgccagggagcagccccctccttactcacaattttgtctactgctcctggcttcctg-gaaatatctattgtcta------------------------gctaggaggtgatacaaccaggcctgcagatctgct---------------------------------ttgggacaagtttgaaagcatccctggaatagtccctacatcctcgtaggactgtac-ctgggccaggagcatggaccacaatgtcagcatcacagacctgggtcgctcagtggaacccactgattgca------tattcacatgagactggggttcccactgagttagaagtgaagggcctagctctggaagggaggtaacaaagaggatcatgttcac-taggtagctagagtaaggtgaggccatggct-gggctctacagtgccaga------------------------------------atactgcatgttaggggagg-gagga-aaagttggc-agcttagcagtttctcaaggctctgttcttatcacccgtttgtctgccacatacaccaggcacccccttaggca-----ggtgctgaaatgaacacaaaggaggaacaggaatgtact-ggatcttgaccagtttacctcctctctcaagggcctatttttcccccaaatctctaaaatgctaattataa--catct-taaaagatttg-------tatcagaaaaaaaa-----------------------------------------------------------gtaaagtgcctggcacacagtaggtgctcaagtgctggtcaggatgagggtggggagcactccctcctctgctctgccccatctgaaacctgtctttctttctagactctcactcatggcagtgtggtgagcactcggacttatgagaaggaggcgtgacctggctgctccgtcactgaccgcccgctcctctgccaactggccacccctcagctcagcaccatgctgcctcatggttttcccctctgacattttgtataaacattcttgggttgggat-ttttctggagatacggggcatcagcctggacccagttcctactatgtatgtggtttatttttt------aaaactgtatccaaagggtgctccaaggtcaataaagcagaaccaaggccacccagttgtct-------------------------------gtctttggtcctcctttcctgtgtgtcaggttgaaatgaaggcctataggtcacctgggaagcagcactgtcaaggag-ccg-agtggacaggctcaaggctcagttagg-----------------------------gaacagtagcacctatgtaatacccttacactgacctgccaaggctcagagaagctagctgtcattctagcatctatgcaagcccttacactggcctgcccatggcagagcagctggctgtcactgtgtggctatttcacattcatcctgcacagacattcctggatttgctgtatggtgtgctgtggtcaccctctctctagagtacaggctcaggacatcaaggtccaggtgtgaacaactgtggtgggaggtgactgctaagagtcgcccactcatgcccagcaagtccccagggttacaaatacaagggaaagcggtc---------------------------------------atcactatggaagagaaggtttatgagtaat

+X57152.c1 align 46.8% of 7572 pair9_hs.fa X57152:3-5844 + pair9_mm.fa.X80685:631-7367 +
+ctgtcggttggggtcctacttttacataacgcccccacaatgcccttcgccttcctcaacgtggcccccgctccaagcccattttctggagccaggaatccactctgtgggttaggaaaggccctcaggaggc-ggagggaaacctgtggaatgccgagaagccgtgtaatgaaataacggtcacggcctggcccctcaccattactctgaccagggttcgaag-----------------------gtcacacttagagcctaaggggaaatggagaagtgcaaagggacgag------cagaatggctggcaccacctcaggttagcgcactgggacgttcca-gttctcacaccgcccaccccaccccacccaagtcctacgca-cggagccaag-ccg-cacctctcccctcatgaggcaggagccccggaggaaacagtacgcccgtcaagggtctctggcgggactgattcgcactaggggcccaacaggcaataaggacccagcggattggccgaggataggccagtcccctgggcagcagcgccgcgccgggactagaggggaacgtgaggaga-gctgcggaaagagatccagcctggctc-cctcctttccccgccctaagtcagcctcttcacccagtgagcacaaaactgtattgcccagactcccgggccccg-a-acgccatacctggcttccgcttccggtggcttctcgttgtgccccgcccgcaagcgccctcctccgggccttcgtgacagccag-gtcgtgcgcgggtcatcctgggattggtagttcg-ctttctctcatttagccagtttctttctctaccggggactccgtgtcccggcatccaccgcggcacctgac-c--cttggcgcttgcgtgttgccctcttccccaccctccctaatttccact------------ccccccaccccacttcgcctgccgcggtcgggtccgcggcctgcgctgtagcggtcgccgccgttccctggaagtagcaacttccctaccccaccccagtcctggtccccgtccagccggtgagtctgaagtcgtcgctgctccgagtcccttgtcgctgggagcggcacatggggtctccggactttgatgt-ggggcgggggaggaagcgaccaggtccg-gcacgaaggagggagaggtggcctgaggagcggaggggggatgtgtggattccggtgaaagggacctgacaat-c-gcc--c-ccaaccc-gtgagaaaaggaggagcccagttcttgcttgagaatgataaacttggaaacccttgggaaaggcgtgggggtcatgcagagacttgtattggtagggagcctgagtcgaggtccctgccggagttgacacagaggagagagggccctggccttcgggagctccagggatgtgggtcgggctggtgggtcaaagtatctg-ttggcttctttcaagtggtgg-gaccccaaagaatgtttaacttcaaagaaaaggggctgagatgtaaattagaggagctggagaggagtgcttcagagtttgggttgctttaagaaagggtggttccgaattctcccgtggttggagggccgaatgtgggaggagggaggataccagaggcagggaaggagaacttgagctttactgac--actgttctt-tttc-tagctgacgtgaagatgagcagctcagaggaggtgtcctggatttcctggttctgtgggctccgtggcaatgaattcttctgtgaagtgagttctcttcaacc-tccctacttgccagcttcacatatcttcccaccagacgttccttcacatattccacttctacactgttctcttacatgctatttgaaaacttcctatcagcaaagagtcccccctataaaccccgacgaacctgtgctaaagtggcaaaactggggcccaagtcctgagtctgccaccgtccagcaatataacgttgggctagtcaatttgtgtctttttcttttttttgagactgggtctcactctgtcaccgaggctggagggtagtggtgcgatctcggcttactgccacctctgcctcccaggttcaagcgattctcctgctccagcctcccaagtagctgggattacaagtgcctgccaccatgcctggctaatttttgtatttttagtagagacagggtttcactatgttggcaaggctggtctcgaactccagacctcagggtgatctgcctgcctcgggcctcccaaagtgctgggattacaggcgtgagcattgcgcccggcctgtatcttttgttactaaagtggcactgctagtacttgtctcaggtggcctttaggaaaactgaaatgctacacattgaaatgttttg-------------------------------------------ttcagaaaccatgctgttcagcttccacc-ttccttagccagctgagaggacaaaactggttcctagagacgggatacaggagtggagtagggacaaagatcttggaaaagaatgtctaa-gaaaaag-a-ttgctgtatctacttatccttagaaaagaaaagccaaagcttttatgggagagagtgtaggtgaactagggagagacacaagtacttctgctgagttgggagtgagaaacaagcacaacagatgcagttgtgttgatgataaggcatcacttagagcattttgcccaggtcaaagatgaggattttgata-tgggttc--cctcttggcttccatgt-cctgacaggtggatgaagactacatccaggacaaatttaatcttactggactcaatgagcaggtccctcactatcgacaagctctagacatgatcttggacctggagcctggtgaggcaccctcagggttgtt---------------------------------------------ttgtgtgtgtgcgtgcactatttttctcttc-aaat-ctctattcacttgcctgaattttgccaaatttcctttggttctctgatttctttaaccccaaattcatgctttattttgatcctccacctgactcttgtctagt-tttgtgacgtatatcacttgttctcatgttttctaaatccgcaattcagacctattccaaaatgcgtttcctcag-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ggtctggtttgttgtctgtttctcctgctttgcaccttccagtctagagtttcatcttctgcattgacattgttgcagttatgtattgaggagggagttgggagggagagcaaggagcagaggctgaaaaggtgtgaggggaaggcagagctgtcttcgtttgatgcaagggtcagaagcccaggtttctgggtcccatgcccagatgttggatggggtaaggcccaaaagtaggtgctaggcaaactgaatagcccgcagcccctggatatgggcagggcacctaggaaagctgaaaaacaagtagttgcatttggccgggctgtggttcagatgaagaactggaagacaaccccaaccagagtgacctgattgagcaggcagccgagatgctttatggattgatccacgcccgctacatccttaccaaccgtggcatcgcccagatggtgaggcctctctgctcctacctgcctccttctgagcagtaagagacacaggttcctgcagcaagaagtcatgtttaagccctgtttaaggaagctagctgagaagaggggaagaaccccagaacttgggcctgggaattgaattctgattgggggtcatcctgaagggattgttttcagggagggagac-agaccttgaatcagagagttgtgatagactgcctcttcctcaaggaacaaacaacaaatggctctgatggtttgtagccctgccctaatttggaagaaaggcaacacagaagtttgagagcccatctagtccagagaagggggcctctggacagagttggaaggagtgccgacagagttggtatgggttgggctgcgaagggagttgc-ctcttctttacat-ct-acctgccaaccccttccattgtattcacctcagttggaaaagtaccagcaaggagactttggttactgtcctcgtgtgtactgtgagaaccagccaatgcttcccattggtgagtgttgaagaagggaaaggaaagcaccgtgtggcagtcttatgggaaggagttggggctcaacacattggagcctgagtcctgaggggaggttaggtaggaatagggggatacctggcctgctgagtctggctg----tctcccaggcctttcagacatcccaggtgaagccatggtgaagctctactgccccaagtgcatggatgtgtacacacccaagtcatcaagacaccatcacacggatggcgcctacttcggcactggtttccctcacatgctcttcatggtgcatcccgagtaccggcccaagagacctgccaaccagtttgtgcccaggtagggagcagggagagtcattaagggtcaaaggaaaggcccaagatcccccagagaggggaggacagggcatggccctttcttgaggtctgcttctcccagaatcagggcatctccctgctgagtgactgtgggaaagttatttgattatctgtgc-ttgagttaccttattgtagaatgttcttgagctgagaagttgggaaccacgaggctttagctctgagcaggtccat-----------------------------------------------------------------------------------------------------------------------------agaggagctcaggtgg-ggaggtgggaatgcaggtgactggcagggcctggatggggctcatgctgctgcctctctgacctctgccctggcctaggctctacggtttcaagatccatccgatggcctaccagctgcagctccaagccgccagcaacttcaagagcccagtcaagacgattcgctgattccctcccccacctgtcctgcagtctttgtcttttccttt-cttttttgccaccctttcaggaaccctgtatggtttttagtttaaattaaaggagtcgttatcgtggtgggaatatgaaataaagtagaagaaaaggccatgagctagtctgctggtgcttgctgttggggaagggaaggtgatggtgtgttggactccaggggccctcatggcccagcccaccctccccagattgaaaaccaggacagatttgtgctcagtggat-tgggtggtgtttttagtatggagcagaacagaattcctaggactgcgtgtgatgaaatgcaaggtcaaaaggaaaagacaaagcatatttcaaagatgagaaatatttgtttggatatctatgactgtctgtttatactgtaaggggcttaatcagcagctccatcttttagttttagttctaaaggaaaagtag-cctaaagtcagtataactaaagggtggaacgaggtgggacaaggtccggaattgctgctcagtgatgtgtgtgtgcctgccgctggtggagctgagactgctcactctcagaaggatggggatgcttgatttcctggccaggttgtcccagcacagtggggattggccctgttgtatgacgaagacagcacatggtggcagagatagatactaacccatggactttccaagggagggaataggtctttggagggtatgcaagacaaaggtagacactggataaagaacccggtagtgcccaggtattaccccatctgggccattactcccacactcaggaaccagacgttgtgggtgaggacatgctgtccctcctgccaagta-ataacttccttccca-gccaggatcctgccccaagtaggaatatagctctgcatttacagcagctcctgctcagac--cttgtcaaaaccaccctgcagcttaggattaaggagcatggtcacaggaaggtggggtttcagggcatcccctcaggaactgcccatctccccagaattccaaaatgaaggtccatatgcttgtaggtgtgctggtcatggtgggctca-cagtaggaaagggtaagtggggcccaggggcagggaggg
+ctgtcggttggggtcctgcttttacataacgcccccacaatgcccttcgcc--------------------------------------------------accctccgcgttcagaaagactctgaggacacagtggggagacctgtgggatgccgaggagctgtgagatgaagtaac-atcacgg--ttacccctcaccactgctctgagcagggtttgaagaacagtcagggtcatcagcacaactctctaccccag-ctctgaggagatggagaagtacaaagggacaagcggcggcagaatggctggccccacctcaggttagctcgcaggg-cggtccaggttctcacgcagcgtgcacagctcagcgcatcccaagcacacccgacctaagtccctcaccacaactctgcgcatgccggagcccgagcgcactctgt----------------------------------------------------------------cgcgggcctggcga-gaga-accaggatcccgtgggttggcctggcacagggaagagggagaaaaaaaagcaaaggcaactgagggagcacc-gcagggatgacaatcagcccgaggacctacccggcagctt-agcgag-cctaacaagactatatttcccaggcttcctggtcgggaagaccccctgctccgcttccgcttccggtggcgcctcggttcgccccgccc-ccag-gccttcctctagggcctcgcggcttagagagtcatgcgcgggtcctcttgggattggtagtttgtcttccccccatttcgtcagtttctctctgccgacgggactacatctcccggcgtccacggaggtagggggcggttcttagcgcctgc-cgttgccct-tttctcaccc-aaatcctcaccagtttccagtggccccccccctccctacttcgtctgctgcggccgggtcggctttttgcgctgtagtggtctctgcggttccttggaagca-cagctccccttccccgccccagtcccagtccccgtccggccggtgagtgtggggttgtccccgcgtcgagtgcc-cgtccctgggcgcggcac-tctggttggagctcctcgatgtgggggcgacgaaggaagtgaccgggtcggtagccgaaggaggtcgggggccctgggcgagaggaggcggcttttggggacgctcgtgagacccacagaagtgtggtgccttcgccaaccctgcgagtgcagga-gaggccggctc-tg-ttggggaaaatgaacttcggaagccttgggacaggcgccaggaacatgcaggcacctgcggtggtggggagccggagccgagggccctgtcgcggctgacatggaggggagaggggcctggctttgaggagcgccagggacgagggtc-------------acactaacagatcaggacctgtcattttctggaaagtagcgaaacttattgccagcacactgcaagggggctgaggtgaaggagagaagcttg-ga-ga-tgcttcagaggccgggatccttcaagcctggctggactcagattcttccgttgtt-gtggagaatgtgtgggaggagggaagagacaaga----------aacagcttggtctccacttacttaacgctcttctttcacagcggacataaagatgagtagctctgaggaggtgtcctggatttcctggttctgtgggctccgtggtaatgaattcttctgtgaggtgagtttaggtcggccaactccacatgcc------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ctcgatgttttattttatgttgtttgagagcagatttc----------------------------gacggaactcactatgcagcctagactggccttgaactcctggcagtcctacttcctcagtctcctgagt-gctaggatttcaaggttttgtcaatatgcatggctaacagaggtacctct-------------------------------------------------------------------------------------------------------------------------------------------------------------------gggtgaccactaggataattaaaatatttcatgttaaagttgggtaatgtgcaaagcccggtggcttcagtagaaaattataaatgcctttcagaaactaaactgtttagccttcatgattcccaagtcttcaactcagagagtttgctatccccaaga-agatggcaagtgcggtttctaaaataagcttgtgtcgaag-ctgaataatgtctatgtctttgctggagatggttagcctta-aagaaagggcctgcagtttttatgagacttc--------------------------agtacctctgcagaattggga--------ataggacacgagaggtgacttggctgaggataagacatctcttgg--------gcccaggtggaagatgggccttcctatattattttcaaccccttggcttccctgtgcccg-taggtggatgaagactacatccaggacaaatttaatcttactggactcaatgagcaggtgcctcactatcgacaagctctggacatgatcttagacctggaacctggtaatgtgcccttagggttgttgctgactgtggggttggtgataaagtgtgcactcccctccataggtgctttgttttcattttcttctggtctctgctcactggcctcttgaggtggttg-ttgtgggaggtggaaactgggttttcttttttcttt-ttcatatatatatattttatgtatataagcacactaaagctgtacaggtggttgtga-gcct-tcatgtggttgctgggaattgaatttaggaactctgctctcgagtcaaacccgctcactcagtccctgctcactctggcccaaagatttatttattattataaataaatacactgtaactgttttcagacacaccagaagagggcatcagatcccattacagatggttgtgagccaccatgtggttgctgggatttgaactcaggaccttaggaagagcagtcagtgctcttaaccgctgagccatctctctggcccgagacaggttttcttgggtagccttgtctgttctagaactcattctgtagaccagcctgtccttgaactcacagagatctgcttgtctctgcctcctgagtactggatcacagactggcttgcttcttgaatttctaaatttccttttaataatccagctctctctcctatccccagatcccttgctcctgtcttttctcccccacctcaaggcccaagcattcataacccaaactggactttaccccttggtctttctgccctgctttcgatggctgagtcgtggcacacacagcagtgcttggcttcttcactgttttgattccttatgactcttgctttgtctccaagctctctgaaagtagctcagttctggttcgtaacgatagtttcctctcgggtttggtgtgctgctgcctcttcccctctgctgctctagag-ggcctcctctgcgtttgtgtgtttacatttacttactgtgaaaagag-aggcaggcagagactaaag-------aggaacacagagaaggccctgccgagcagtcttgctttg-tagaagaatcaaaagcccaggtttctgggtcagatgtatagatgatgggtagggcaaggctcagaaacaggtactagagaaactgga-ggcgc-caccttgcaggtaaggtggggttaccctggaaaactaaacaagcggccaccccacccacgtt--------ttcagatgaagagctggaagacaaccccaaccagagcgacttgatcgaacaggcagctgagatgctttatgggttgatccacgcccgctacatcctcaccaaccgaggcatcgcacaaatggtgaggc-------------tctg-ctccttct--gcaggaagggaaacaggtgcatg-ggtccg-aggcctggttcctacctg-gcaaggagcgtggcagaggagagagggagagacccagagc-tggg-ctgaggagtgaatcctcactcagggtcccattgaa-gg--tgttttaaggaagggagatgaaaccggaaaacgaaggagtttg-----------------cagggaacaaatagcaaatggcccctgggctctgcag-actgccc-cacaaag--gaaaggc-ac-ttg-agtttgaggacctatctcg-acagag-agggggcttctagtcagaagtgggagacatgggg-------------gggggggaccatgaaatgagttgagattgcctcttcatgctcacccaccagtcccttctgttttacacaccttagttggaaaagtaccagcagggagactttggctactgtcctcgtgtatactgtgagaaccagccaatgcttcctatcggtgagtgttgaa----------------------------------agccagaagctgccttactgtac-tcagtgcccgagtcct-ttgggaggttggggagcgcta--gggatacctggtctgc--agtctggctgtctatctcccaggcctttcagacatcccaggcgaggccatggtgaaactctactgccccaagtgcatggacgtgtacacacccaagtcctccagacaccaccacacggacggcgcatacttcggcactggtttccctcacatgctcttcatggtgcatccagagtaccggcccaagcgacctgccaaccagtttgtacccaggtagggagcaaagacagtcacaaaggatcaaag-----agccaaggt-ctagagagaattgaggaca-agcatagccctttcttgaggcttgcctatcccaaagtcagggctttttcctgctgagtggctg-ggg-aagttacttgcttgtgagtccattttcttatctatgaagcagggatgccaacagtacctgcctagcaacagttttaaatgtatggcacttgactatttaaacagtgactgttgctgttcccattttggatgcttagctttgtgatttagtgagacttatcatcctggtatctttttttttactgtataccaggggagaaagattctattttgaacaggtactaaatcttcttttttttttttttttttttttttccgagacagggtttctctgtgtagctctggctgtcctggaactcactttgtagatcaggctggcctcaaactcagaaatccgcctgcctctgcctcccaagtgctgggattaaaggcatgtgccaccatgcccggcagtactaaatcttaataaatttgaaatttcctgtgtgggttggaagtttcccctggcctcatgcctgccaaaccatcctctctctctgagctatactttcactgtgagtttttggttggttggttggtttggcacaggtgttgtgagaggcacagggaatgtttttttttttaagatttatttatttatttatttatttatttatttatttatttatttattatatgtaagtacactgtagctgtcctcagacactccagaagagggagtcagatcttgttacggatggttgtgagccaccatgtggttgctgggatttgaacttccgacctttggaagagcagtcgccatctcaccagcccccacagggaatgtttgacagggtcgttgttggctaagttgatctcaagctataaataagactataaatgaggcacgcattacctggcctggaattttaattttagtggatgtttatggta-catttttttaagctgggga-tggttacacaaggggaggcagaggtaagcagatctctttattttgagccaaggccctgttagacacagaagccctgtcttgaaaaactagcccctccccaaataagagaaagagagagacatgtttcatgtattcatgtgcaaggagattggggtcctgtggagtcgatgtgaggaggctgggatggggttggccg-cactggcagagctagcagagagcttaccgtgctgcctctctga-c-ct-ccttcacccaggctctatggtttcaagatccatccaatggcttaccagctgcagctccaagccgccagcaacttcaagagcccagtcaagactattcgctgattgcc-cacccacctctccctctgtctgtgacaccaccattcctctgctgccaccctttcaggaagtc--tatggtttttagtttaaattaaaggaattgttactgtggtgggaatatgaaataaag--gaagaaaaggccatgagct-gtctcctactgtgtgttgctggggtcgggaagg-ggtgggctactgcactccagggaccctcttgctctagcccatcttccctaggttggaagc-agcccagattgcggcttcatgaatctgtaccatgtacagatagtgtgtcagagcagcact-ctagg-ttgggtgagcgcaagtgcaggattgaagggaaaggtttcagaggttggagaacatctgctctcttggggctg-tggctgtggctg-ctg--tagacttcatgggaagt--------------cccctcagtcttgctgttaaagggaatgtaaccctacagtaaataaaactgga---------gaggaggca-gagagccccaactgttgctcagtggtgtctgtgtgccccctgctgggagacctgaga--gc-c-gtctggaaaaggttggaaggcttg-tct-ctggccaacctctcccagcataatgggggccaagcctactgtgtg-ggtggacagcaggtagtggcagagacaggt--------------ccctgaagtgggggtacaggccttt-caaggcatg-tcta-ggatgaagacacctgataaagagccagctggtgctcttctgtggcct---------------------acacaggaaccagacaatgtgggtgaggacaggc-cttcttcctgccaa-tagatgacttcctcctctggtcaggatcttgcctcaggcaggagtgcacctctgcacccacagcagtttctgctcagactgcctgtcaaacccattctgcctctgaagatca--------------agaaaggtggggctt-ggggta-ctttgtgggagttgctcctctcccccaaattccataatgagggcccgcgtgcttgtaggtatgctgaccatggtgggctttgcgatggggaagggtaagt-aagaccagggtctgggagga


Added: trunk/packages/bioperl/branches/upstream/current/t/data/test_badlf.gcg
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/test_badlf.gcg	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/test_badlf.gcg	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+ REFORMAT of: b124_sp.pep  check: -1  from: 1  to: 146  January 28, 1999 16:22
+
+ (No documentation)
+
+b124_sp.pep  Length: 146  January 28, 1999 16:22  Type: P  Check: 5250  ..
+
+       1  VXCAAEFDFM EKETPLRYTK TLLLPVVLVV FVAIVRKIIS DMWGVLAKQQ 
+
+      51  THVRKHQFDH GELVYHALQL LAYTALGILI MRLKLFLTPY MCVMASLICS 
+
+     101  RQLFGWLFCK VHPGAIVFVI LAAMSIQGSA NLQTQWKSTA SLALET
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.aln
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.aln	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.aln	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+CLUSTAL W (1.74) multiple sequence alignment
+
+
+P84139                MNEGEHQIKLDELFEKLLRARKIFKNKDVLRHSYTPKDLPLRHEQIETLAQILVPVLRGE
+P814153               MNEGMHQIKLDVLFEKLLRARKIFKNKDVLRHSYTPKDLPHRHEQIETLAQILVPVLRGE
+P851414               ------------------------------------------------------------
+P841414               ------------------------------------------------------------
+BAB68554              --------------------MLTEDDKQLIQHVWEKVLEHQEDFGAEALERMFIVYPSTK
+gb|443893|124775      -MRFRFGVVVPPAVAGARPELLVVGSRPELG-RWEPRGAVRLRPAGTAAGDGALALQEPG
+                                                                                  
+
+P84139                TPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIV
+P814153               TPSNIFVYG-KTGTGKTVTVK-FVTEELKRISEKYNIPVDVIYINCEIVDTHYRVLANIV
+P851414               -MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILQTHEHMD-HYGKTPLIA
+P841414               -MKIVWCGH-ACFLVEDRGTK-ILIDPYPDVDEDRIGKVDYILVTHEHMD-HYGKTPLIA
+BAB68554              TYFPHFDLHHDSEQIRHHGKK-VVGALGDAVKHIDNLSATLSELSNLHCY-NLRVDPVNF
+gb|443893|124775      LWLGEVELA-AEEAAQDGAEPGRVDTFWYKFLKREPGGELSWEGNGPHHDRCCTYNENNL
+                                     .       :      . .           .               
+
+P84139                NYFKDETGIGVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTR
+P814153               NYFKDETGIEVPMVGWPTDEVYAKLKQVIDMKERFVIIVLDEIDKLVKKSGDEVLYSLTR
+P851414               KLSD--------------------------------------------------------
+P841414               KLSD--------------------------------------------------------
+BAB68554              KLLSHCFQVVLGAHLG--REYTPQVQVAYDKFLAAVSAVLAEKYR---------------
+gb|443893|124775      VDGVYCLPIG---HWGEATGHTNEMKHTTDFYFNIAGHQAMHYSRILPNIWLGSCPRQVE
+                                                                                  
+
+P84139                INTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAF
+P814153               INTELKRAKVSVIGISNDLKFKEYLDPRVLSSLSEEEVVFPPYDANQLRDILTQRAEEAF
+P851414               ------------------------------------------------------------
+P841414               ------------------------------------------------------------
+BAB68554              ------------------------------------------------------------
+gb|443893|124775      HVTIKLKHELGITAVMN-FQTEWDIVQNSSGCNRYPEPMTPDTMIKLYREEGLAYIWMP-
+                                                                                  
+
+P84139                YPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQ
+P814153               YPGVLDEGVIPLCAALAAREHGDARKALDLLRVAGEIAEREGASKVTEKHVWKAQEKIEQ
+P851414               ------------------------------------------------------------
+P841414               ------------------------------------------------------------
+BAB68554              ------------------------------------------------------------
+gb|443893|124775      TPDMSTEGRVQMLPQAVCLLHALLEKGHIVY-----VHCNAGVGRSTAAVCGWLQYVMGW
+                                                                                  
+
+P84139                DMMEEVIKTRPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISD
+P814153               DMMEEVIKTLPLQSKVLLYAIVLLDENGDLPANTGDVYAVYRELCEYIDLEPLTQRRISD
+P851414               ------------------------------------------------------------
+P841414               ------------------------------------------------------------
+BAB68554              ------------------------------------------------------------
+gb|443893|124775      NLRKVQYFLMAKRPAVYIDEEALARAQEDFFQKFGKVRSSVCSL----------------
+                                                                                  
+
+P84139                LINELDMLGIINAKVVSKGRYGRTKEIRLNVTSYKIRNVLRYDYSIQPLLTISLKSEQRR
+P814153               LINELDMLGIINAKVVSKGRYGRTKEIRLMVTSYKIRNVLRYDYSIQPLLTISLKSEQRR
+P851414               ------------------------------------------------------------
+P841414               ------------------------------------------------------------
+BAB68554              ------------------------------------------------------------
+gb|443893|124775      ------------------------------------------------------------
+                                                                                  
+
+P84139                LI
+P814153               LI
+P851414               --
+P841414               --
+BAB68554              --
+gb|443893|124775      --
+                        

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,88 @@
+>AK1H_ECOLI/114-431 DESCRIPTION HERE
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+>AKH_HAEIN 114-431
+-----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQ
+LLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNG
+SDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLP
+TLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESL
+Q----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS
+---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQ
+AKGIAARF------FSALAQANISIVAIA
+>AKH1_MAIZE/117-440
+-----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREV
+LVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDG
+SDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILS
+TLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENG
+DLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS
+---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMAS
+TPGVSATL------FDALAKANINVRAIA
+>AK2H_ECOLI/112-431
+-----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAR
+EFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNG
+SDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLP
+LLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARI
+VTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSE
+VADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQ
+PVEFTWQSDDGISLVAVL
+>AK1_BACSU/66-374
+-----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAG
+FLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGG
+SDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLP
+VVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDV
+FERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIV
+YTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI
+------VSALSEKEIPILQSA
+>AK2_BACST/63-370
+-----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAG
+ITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGG
+SDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIK
+EISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHL
+IVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA----
+----------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEG
+ADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS
+>AK2_BACSU/63-373
+-----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAG
+IRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGG
+SDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLE
+GISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNL
+IVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ----
+----------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEF
+EKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS
+>AKAB_CORFL/63-379
+-----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAG
+VLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGG
+SDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLE
+KLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEA
+VLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV----
+----------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNW
+TNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS
+>AKAB_MYCSM/63-379
+-----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAG
+VITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGG
+SDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLD
+TVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDA
+ILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI----
+----------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGF
+SQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS
+>AK3_ECOLI/106-407
+-----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRK
+VMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGG
+SDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRID
+EIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRA
+LAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVA
+LTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG-------
+---NDLSKACGVGKEVF
+>AK_YEAST/134-472 A COMMENT FOR YEAST
+-----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPS
+DFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGY
+TDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLD
+SVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGEST
+PPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTI
+LDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVG
+KHMKQYIGIAG---TMFTTLAEEGINIEMIS

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.mase
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.mase	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.mase	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,22 @@
+;; saved by seaview on Wed Jan  7 11:14:33 1998
+;;# of segments=14 all seqs
+;; 25,25 27,27 30,30 96,96 98,98 117,117 149,149 160,160 164,164 173,173
+;; 258,258 282,282 289,289 291,291
+;no comment
+AK1H_ECOLI
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+;no comment
+AKH_HAEIN
+-----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQ
+LLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNG
+SDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLP
+TLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESL
+Q----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS
+---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQ
+AKGIAARF------FSALAQAN

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.metafasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.metafasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.metafasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,8 @@
+>test1/3-25
+CDEFHIJKLMNOPQRSTUVWXYZ
+&hydrophobic
+I & OIOIJOIIOOIOOOOUIIX
+&structural
+ABAEEIEIJEIIEOAEEAAUIAX
+>test1/3-23
+CDEFHIJKLMNOPQRSTUVWX--

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.msf
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.msf	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.msf	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,113 @@
+
+NoName   MSF: 16  Type: P  Fri May 25 11:28:34 2001  Check: 00 ..
+
+ Name: 1433_LYCES/9-246  Len:    242  Check:  2603  Weight:  1.00
+ Name: 1434_LYCES/6-243  Len:    242  Check:  2257  Weight:  1.00
+ Name: 143R_ARATH/7-245  Len:    242  Check:  3905  Weight:  1.00
+ Name: 143B_VICFA/7-242  Len:    242  Check:  2825  Weight:  1.00
+ Name: 143E_HUMAN/4-239  Len:    242  Check:  2034  Weight:  1.00
+ Name: BMH1_YEAST/4-240  Len:    242  Check:  7237  Weight:  1.00
+ Name: RA24_SCHPO/6-241  Len:    242  Check:  3762  Weight:  1.00
+ Name: RA25_SCHPO/5-240  Len:    242  Check:  2995  Weight:  1.00
+ Name: 1431_ENTHI/4-239  Len:    242  Check:  2213  Weight:  1.00
+ Name: 1432_ENTHI/4-238  Len:    242  Check:  8344  Weight:  1.00
+ Name: 1433_CAEEL/5-237  Len:    242  Check:  1241  Weight:  1.00
+ Name: 143Z_DROME/6-239  Len:    242  Check:  8864  Weight:  1.00
+ Name: 1433_XENLA/1-227  Len:    242  Check:  8793  Weight:  1.00
+ Name: 143T_HUMAN/3-236  Len:    242  Check:  6034  Weight:  1.00
+ Name: 143F_MOUSE/3-240  Len:    242  Check:  5185  Weight:  1.00
+ Name: 143S_HUMAN/3-238  Len:    242  Check:  1679  Weight:  1.00
+
+//
+
+
+1433_LYCES/9-246      REENVYMAKL ADRAESDEEM VEFMEKVSNS LGS.EELTVE ERNLLSVAYK 
+1434_LYCES/6-243      REENVYLAKL AEQAERYEEM IEFMEKVAKT ADV.EELTVE ERNLLSVAYK 
+143R_ARATH/7-245      RDQYVYMAKL AEQAERYEEM VQFMEQLVTG ATPAEELTVE ERNLLSVAYK 
+143B_VICFA/7-242      RENFVYIAKL AEQAERYEEM VDSMKNVANL DV...ELTIE ERNLLSVGYK 
+143E_HUMAN/4-239      REDLVYQAKL AEQAERYDEM VESMKKVAGM DV...ELTVE ERNLLSVAYK 
+BMH1_YEAST/4-240      REDSVYLAKL AEQAERYEEM VENMKTVASS GQ...ELSVE ERNLLSVAYK 
+RA24_SCHPO/6-241      REDAVYLAKL AEQAERYEGM VENMKSVAST DQ...ELTVE ERNLLSVAYK 
+RA25_SCHPO/5-240      RENSVYLAKL AEQAERYEEM VENMKKVACS ND...KLSVE ERNLLSVAYK 
+1431_ENTHI/4-239      REDCVYTAKL AEQSERYDEM VQCMKQVAEM EA...ELSIE ERNLLSVAYK 
+1432_ENTHI/4-238      REDLVYLSKL AEQSERYEEM VQYMKQVAEM GT...ELSVE ERNLISVAYK 
+1433_CAEEL/5-237      VEELVQRAKL AEQAERYDDM AAAMKKVTEQ GQ...ELSNE ERNLLSVAYK 
+143Z_DROME/6-239      KEELVQKAKL AEQSERYDDM AQAMKSVTET GV...ELSNE ERNLLSVAYK 
+1433_XENLA/1-227      .......AKL SEQAERYDDM AASMKAVTEL GA...ELSNE ERNLLSVAYK 
+143T_HUMAN/3-236      KTELIQKAKL AEQAERYDDM ATCMKAVTEQ GA...ELSNE ERNLLSVAYK 
+143F_MOUSE/3-240      REQLLQRARL AEQAERYDDM ASAMKAVTEL NE...PLSNE DRNLLSVAYK 
+143S_HUMAN/3-238      RASLIQKAKL AEQAERYEDM AAFMKGAVEK GE...ELSCE ERNLLSVAYK 
+
+
+1433_LYCES/9-246      NVIGARRASW RIISSIEQKE ESRG.NEEHV NSIREYRSKI ENELSKICDG 
+1434_LYCES/6-243      NVIGARRASW RIISSIEQKE ESRG.NEDHV NTIKEYRSKI EADLSKICDG 
+143R_ARATH/7-245      NVIGSLRAAW RIVSSIEQKE ESRK.NDEHV SLVKDYRSKV ESELSSVCSG 
+143B_VICFA/7-242      NVIGARRASW RILSSIEQKE ESKG.NDVNA KRIKEYRHKV ETELSNICID 
+143E_HUMAN/4-239      NVIGARRASW RIISSIEQKE ENKG.GEDKL KMIREYRQMV ETELKLICCD 
+BMH1_YEAST/4-240      NVIGARRASW RIVSSIEQKE ESKEKSEHQV ELICSYRSKI ETELTKISDD 
+RA24_SCHPO/6-241      NVIGARRASW RIVSSIEQKE ESKG.NTAQV ELIKEYRQKI EQELDTICQD 
+RA25_SCHPO/5-240      NIIGARRASW RIISSIEQKE ESRG.NTRQA ALIKEYRKKI EDELSDICHD 
+1431_ENTHI/4-239      NVIGAKRASW RIISSLEQKE QAKG.NDKHV EIIKGYRAKI EKELSTCCDD 
+1432_ENTHI/4-238      NVVGSRRASW RIISSLEQKE QAKG.NTQRV ELIKTYRAKI EQELSQKCDD 
+1433_CAEEL/5-237      NVVGARRSSW RVISSIEQKT EG...SEKKQ QLAKEYRVKV EQELNDICQD 
+143Z_DROME/6-239      NVVGARRSSW RVISSIEQKT EA...SARKQ QLAREYRERV EKELREICYE 
+1433_XENLA/1-227      NVVGARRSSW RVISSIEQKT EG...NDKRQ QMAREYREKV ETELQDICKD 
+143T_HUMAN/3-236      NVVGGRRSAW RVISSIEQKT DT...SDKKL QLIKDYREKV ESELRSICTT 
+143F_MOUSE/3-240      NVVGARRSSW RVISSIEQKT MADG.NEKKL EKVKAYREKI EKELETVCND 
+143S_HUMAN/3-238      NVVGGQRAAW RVLSSIEQKS NEEG.SEEKG PEVREYREKV ETELQGVCDT 
+
+
+1433_LYCES/9-246      ILKLLDSKLI PSA..TSGDS KVFYLKMKGD YHRYLAEFKT GAERKEAAES 
+1434_LYCES/6-243      ILSLLESNLI PSA..STAES KVFHLKMKGD YHRYLAEFKT GTERKEAAEN 
+143R_ARATH/7-245      ILKLLDSHLI PSA..GASES KVFYLKMKGD YHRYMAEFKS GDERKTAAED 
+143B_VICFA/7-242      VMRVIDEHLI PSA..AAGES TVFYYKMKGD YYRYLAEFKT GNEKKEAGDQ 
+143E_HUMAN/4-239      ILDVLDKHLI PAA..NTGES KVFYYKMKGD YHRYLAEFAT GNDRKEAAEN 
+BMH1_YEAST/4-240      ILSVLDSHLI PSA..TTGES KVFYYKMKGD YHRYLAEFSS GDAREKATNA 
+RA24_SCHPO/6-241      ILTVLEKHLI PNA..ASAES KVFYYKMKGD YYRYLAEFAV GEKRQHSADQ 
+RA25_SCHPO/5-240      VLSVLEKHLI PAA..TTGES KVFYYKMKGD YYRYLAEFTV GEVCKEAADS 
+1431_ENTHI/4-239      VLKVIQENLL PKA..STSES KVFFKKMEGD YYRYFAEFTV DEKRKEVADK 
+1432_ENTHI/4-238      VLKIITEFLL KNS..TSIES KVFFKKMEGD YYRYYAEFTV DEKRKEVADK 
+1433_CAEEL/5-237      VLKLLDEFLI VKA..GAAES KAFYLKMKGD YYRYLAEVAS .EDRAAVVEK 
+143Z_DROME/6-239      VLGLLDKYLI PKA..SNPES KVFYLKMKGD YYRYLAEVAT GDARNTVVDD 
+1433_XENLA/1-227      VLDLLDRFLV PNA..TPPES KVFYLKMKGD YYRYLSEVAS GDSKQETVAS 
+143T_HUMAN/3-236      VLELLDKYLI ANA..TNPES KVFYLKMKGD YFRYLAEVAC GDDRKQTIDN 
+143F_MOUSE/3-240      VLALLDKFLI KNCNDFQYES KVFYLKMKGD YYRYLAEVAS GEKKNSVVEA 
+143S_HUMAN/3-238      VLGLLDSHLI KEA..GDAES RVFYLKMKGD YYRYLAEVAT GDDKKRIIDS 
+
+
+1433_LYCES/9-246      TLTAYKAAQD IASAELAPTH PIRLGLALNF SVFYYEILNS PDRACNLAKQ 
+1434_LYCES/6-243      TLLAYKSAQD IALAELAPTH PIRLGLALNF SVFYYEILNS PDRACNLAKQ 
+143R_ARATH/7-245      TMLAYKAAQD IAAADMAPTH PIRLGLALNF SVFYYEILNS SDKACNMAKQ 
+143B_VICFA/7-242      SMKAYESATT AAEAELPPTH PIRLGLALNF SVFYYEILNS PERACHLAKQ 
+143E_HUMAN/4-239      SLVAYKAASD IAMTELPPTH PIRLGLALNF SVFYYEILNS PDRACRLAKA 
+BMH1_YEAST/4-240      SLEAYKTASE IATTELPPTH PIRLGLALNF SVFYYEIQNS PDKACHLAKQ 
+RA24_SCHPO/6-241      SLEGYKAASE IATAELAPTH PIRLGLALNF SVFYYEILNS PDRACYLAKQ 
+RA25_SCHPO/5-240      SLEAYKAASD IAVAELPPTD PMRLGLALNF SVFYYEILDS PESACHLAKQ 
+1431_ENTHI/4-239      SLAAYTEATE ISNAELAPTH PIRLGLALNF SVFYFEIMND ADKACQLAKQ 
+1432_ENTHI/4-238      SLAAYQEATD TA.ASLVPTH PIRLGLALNF SVFYYQIMND ADKACQLAKE 
+1433_CAEEL/5-237      SQKAYQEALD IAKDKMQPTH PIRLGLALNF SVFYYEILNT PEHACQLAKQ 
+143Z_DROME/6-239      SQTAYQDAFD ISKGKMQPTH PIRLGLALNF SVFYYEILNS PDKACQLAKQ 
+1433_XENLA/1-227      SQQAYQEAFE ISKSEMQPTH PIRLGLALNF SVFYYEILNS PEKACSLAKS 
+143T_HUMAN/3-236      SQGAYQEAFD ISKKEMQPTH PIRLGLALNF SVFYYEILNN PELACTLAKT 
+143F_MOUSE/3-240      SEAAYKEAFE ISKEHMQPTH PIRLGLALNF SVFYYEIQNA PEQACLLAKQ 
+143S_HUMAN/3-238      ARSAYQEAMD ISKKEMPPTN PIRLGLALNF SVFHYEIANS PEEAISLAKT 
+
+
+1433_LYCES/9-246      AFDEAIAELD TLGEESYKDS TLIMQLLRDN LTLWTSDMQD DG 
+1434_LYCES/6-243      AFDEAISELD TLGEESYKDS TLIMQLLRDN LTLWTSDNAD DV 
+143R_ARATH/7-245      AFEEAIAELD TLGEESYKDS TLIMQLLRDN LTLWTSDYAG AD 
+143B_VICFA/7-242      AFDEAISELD TLNEESYKDS TLIMQLLRDN LTLWTSDIPE DG 
+143E_HUMAN/4-239      AFDDAIAELD TLSEESYKDS TLIMQLLRDN LTLWTSDMQG DG 
+BMH1_YEAST/4-240      AFDDAIAELD TLSEESYKDS TLIMQLLRDN LTLWTSDMSE SG 
+RA24_SCHPO/6-241      AFDEAISELD SLSEESYKDS TLIMQLLRDN LTLWTSDAEY SA 
+RA25_SCHPO/5-240      VFDEAISELD SLSEESYKDS TLIMQLLRDN LTLWTSDAEY NQ 
+1431_ENTHI/4-239      AFDDAIAKLD EVPENMYKDS TLIMQLLRDN LTLWTSDACD EE 
+1432_ENTHI/4-238      AFDEAIQKLD EVPEESYKES TLIMQLLRDN LTLWTSDMGD DE 
+1433_CAEEL/5-237      AFDDAIAELD TLNEDSYKDS TLIMQLLRDN LTLWTSDVGA ED 
+143Z_DROME/6-239      AFDDAIAELD TLNEDSYKDS TLIMQLLRDN LTLWTSDTQG DE 
+1433_XENLA/1-227      AFDEAIRELD TLNEESYKDS TLIMQLLRDN LTLWTSENQG EE 
+143T_HUMAN/3-236      AFDEAIAELD TLNEDSYKDS TLIMQLLRDN LTLWTSDSAG EE 
+143F_MOUSE/3-240      AFDDAIAELD TLNEDSYKDS TLIMQLLRDN LTLWTSDQQD EE 
+143S_HUMAN/3-238      TFDEAMADLH TLSEDSYKDS TLIMQLLRDN LTLWTADNAG EE 
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.nexus
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.nexus	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.nexus	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,22 @@
+#NEXUS
+[TITLE: Four Anthropoidea]
+
+begin data;
+dimensions ntax=4 nchar=50;
+format interleave datatype=RNA missing=N gap=-;
+
+matrix
+'Homo sapiens'      AGUCGAGUC---GCAGAAACGCAUGAC-GAC
+Pan_paniscus        AGUCGCGUCG--GCAGAAACGCAUGACGGAC
+Gorilla_gorilla     AGUCGCGUCG--GCAGAUACGCAUCACGGAC
+Pongo_pigmaeus      AGUCGCGUCGAAGCAGA--CGCAUGACGGAC
+
+'Homo sapiens'      CACAUUUU-CCUUGCAAAG
+Pan_paniscus        CACAUCAU-CCUUGCAAAG
+Gorilla_gorilla     -ACAUCAUCCCUCGCAGAG
+Pongo_pigmaeus      CACAUCAUCCCUUGCAGAG
+;
+
+endblock;
+begin assumptions;
+options deftype=unord;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.pfam
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.pfam	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.pfam	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,16 @@
+1433_LYCES/9-246    REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG
+1434_LYCES/6-243    REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELAPTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV
+143R_ARATH/7-245    RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQKEESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMAPTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDYAGAD
+143B_VICFA/7-242    RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQKEESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELPPTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG
+143E_HUMAN/4-239    REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELPPTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG
+BMH1_YEAST/4-240    REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELPPTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG
+RA24_SCHPO/6-241    REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQKEESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELAPTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA
+RA25_SCHPO/5-240    RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQKEESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELPPTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ
+1431_ENTHI/4-239    REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQKEQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELAPTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE
+1432_ENTHI/4-238    REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQKEQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLVPTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE
+1433_CAEEL/5-237    VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQPTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED
+143Z_DROME/6-239    KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQPTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE
+1433_XENLA/1-227    .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQPTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE
+143T_HUMAN/3-236    KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQKTDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQPTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE
+143F_MOUSE/3-240    REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQKTMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQPTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE
+143S_HUMAN/3-238    RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQKSNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMPPTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.phylip
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.phylip	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.phylip	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,10 @@
+4 50
+Homo sapie AGUCGAGUC---GCAGAAACGCAUGAC-GACC
+Pan panisc AGUCGCGUCG--GCAGAAACGCAUGACGGACC
+Gorilla go AGUCGCGUCG--GCAGAUACGCAUCACGGAC-
+Pongo pigm AGUCGCGUCGAAGCAGA--CGCAUGACGGACC
+ 
+ACAUUUU-CCUUGCAAAG
+ACAUCAU-CCUUGCAAAG
+ACAUCAUCCCUCGCAGAG
+ACAUCAUCCCUUGCAGAG

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.po
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.po	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.po	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,896 @@
+VERSION=clustalw
+NAME=NoName
+TITLE=NoName
+LENGTH=879
+SOURCECOUNT=6
+SOURCENAME=P84139
+SOURCEINFO=420 0 0 -1 
+SOURCENAME=P814153
+SOURCEINFO=420 0 0 -1 
+SOURCENAME=P851414
+SOURCEINFO=60 154 0 -1 
+SOURCENAME=P841414
+SOURCEINFO=60 154 0 -1 
+SOURCENAME=BAB68554
+SOURCEINFO=141 42 0 -1 
+SOURCENAME=gb|443893|124775
+SOURCEINFO=331 2 0 -1 
+M:S0S1
+N:L0S0S1A2
+M:S5A1
+E:L1S0S1A4
+R:L2S5A3
+G:L3S0S1A6
+F:L4S5A5
+E:L5S0A8
+M:L5S1A9
+R:L6S5A7
+H:L7L8S0S1A11
+F:L9S5A10
+Q:L10S0S1A13
+G:L11S5A12
+I:L12S0S1A15
+V:L13S5A14
+K:L14S0S1A17
+V:L15S5A16
+L:L16S0S1A19
+V:L17S5A18
+D:L18S0S1A21
+P:L19S5A20
+E:L20S0A23
+V:L20S1A24
+P:L21S5A22
+L:L22L23S0S1A26
+A:L24S5A25
+F:L25S0S1A28
+V:L26S5A27
+E:L27S0S1A30
+A:L28S5A29
+K:L29S0S1A32
+G:L30S5A31
+L:L31S0S1A34
+A:L32S5A33
+L:L33S0S1A36
+R:L34S5A35
+R:L35S0S1A38
+P:L36S5A37
+A:L37S0S1A40
+E:L38S5A39
+R:L39S0S1A42
+M:S4A43
+L:L40S5A41
+K:L41S0S1A45
+L:L42L43S4S5A44
+I:L44S0S1A47
+T:L45S4A48
+V:L45S5A46
+F:L46S0S1A50
+E:L47S4A51
+V:L48S5A49
+K:L49S0S1A53
+D:L50S4A54
+G:L51S5A52
+N:L52S0S1A56
+D:L53S4A57
+S:L54S5A55
+K:L55L56S0S1S4A59
+R:L57S5A58
+D:L58S0S1A61
+Q:L58S4A62
+P:L59S5A60
+V:L60S0S1A64
+L:L61S4A65
+E:L62S5A63
+L:L63L65S0S1S5A67
+I:L64S4A66
+R:L66S0S1A69
+Q:L67S4A70
+G:L66S5A68
+H:L68L69S0S1S4
+S:L71S0S1A73
+V:L71S4A74
+R:L70S5A72
+Y:L72S0S1A76
+W:L73L74S4S5A75
+T:L75S0S1A78
+E:L76S4S5A77
+P:L77L78S0S1S5A80
+K:L78S4A79
+K:L79S0S1A82
+V:L80S4A83
+R:L79S5A81
+D:L81S0S1A85
+L:L82S4A86
+G:L83S5A84
+L:L84S0S1A88
+E:L85S4A89
+A:L86S5A87
+P:L87S0S1A91
+H:L88S4A92
+V:L89S5A90
+L:L90S0A94
+H:L90S1A95
+Q:L91S4A96
+R:L92S5A93
+R:L93L94S0S1A98
+E:L95S4A99
+L:L96S5A97
+H:L97S0S1A101
+D:L98S4A102
+R:L99S5A100
+E:L100S0S1A104
+F:L101S4A105
+P:L102S5A103
+Q:L103S0S1A107
+G:L104S4A108
+A:L105S5A106
+I:L106S0S1A110
+A:L107S4A111
+G:L108S5A109
+E:L109L110S0S1S4A113
+T:L111S5A112
+T:L112S0S1A115
+A:L112L113S4S5A114
+L:L114L115S0S1S4A117
+A:L115S5A116
+A:L116S0S1A119
+E:L116S4A120
+G:L117S5A118
+Q:L118S0S1A122
+R:L119S4A123
+D:L120S5A121
+I:L121S0S1A125
+M:L122S4A126
+G:L123S5A124
+L:L124S0S1A128
+F:L125S4A129
+A:L126S5A127
+V:L127S0S1A131
+I:L128S4A132
+L:L129S5A130
+P:L130S0S1A134
+V:L131S4A135
+A:L132S5A133
+V:L133S0S1A137
+Y:L134S4A138
+L:L135S5A136
+L:L136S0S1A140
+P:L137S4A141
+Q:L138S5A139
+R:L139S0S1A143
+S:L140S4A144
+E:L141S5A142
+G:L142S0S1A146
+T:L143S4A147
+P:L144S5A145
+E:L145S0S1A149
+K:L146S4A150
+G:L147S5A148
+T:L148L149S0S1S4A152
+L:L150S5A151
+P:L151S0S1A154
+M:S2S3A155
+Y:L151S4A156
+W:L152S5A153
+S:L153S0S1A158
+K:L154S2S3A159
+F:L155S4A160
+L:L156S5A157
+N:L157S0S1A162
+I:L158S2S3A163
+P:L159S4A164
+G:L160S5A161
+I:L161S0S1A166
+V:L162S2S3A167
+H:L163S4A168
+E:L164S5A165
+F:L165L167S0S1S4A170
+W:L166S2S3A171
+V:L168S5A169
+V:L169S0S1A173
+C:L170S2S3A174
+D:L169S4A175
+E:L171S5A172
+Y:L172S0S1A177
+G:L173S2S3A178
+L:L174L175S4S5A176
+G:L176S0S1A180
+H:L177L178S2S3S4A181
+A:L178S5A179
+H:L180S4
+K:L179S0S1A184
+A:L180L181S2S3S5A185
+D:L182S4A183
+T:L183S0S1A187
+C:L184S2S3A188
+S:L185S4A189
+E:L184S5A186
+G:L186S0S1A191
+F:L187S2S3A192
+E:L188L189S4S5A190
+T:L190S0S1A194
+L:L191S2S3A195
+Q:L192S4A196
+A:L192S5A193
+G:L193S0S1A198
+V:L194S2S3A199
+I:L195S4A200
+A:L196S5A197
+K:L197S0S1A202
+E:L198S2S3A203
+R:L199S4A204
+Q:L200S5A201
+T:L201S0S1A206
+D:L202L204S2S3S5A207
+H:L203S4A205
+V:L205S0S1A209
+R:L206S2S3A210
+H:L207S4A211
+G:L206S5A208
+T:L208S0S1A213
+G:L209L210S2S3S4A214
+A:L211S5A212
+V:L212S0S1A216
+T:L213S2S3A217
+K:L213S4A218
+E:L214S5A215
+K:L215L216L217S0S1S2S3S4A220
+P:L218S5A219
+G:L220S5
+F:L219S0S1A223
+I:L219S2S3A224
+V:L219S4A225
+R:L221S5A222
+V:L222L224L225S0S1S4S5A227
+L:L223S2S3A226
+T:L226S0S1A229
+I:L227S2S3A230
+G:L226S4A231
+D:L226S5A228
+E:L228S0S1A233
+D:L229S2S3A234
+A:L230S4A235
+T:L231S5A232
+E:L232S0S1A237
+P:L233S2S3A238
+L:L234S4A239
+F:L235S5A236
+L:L236S0S1A241
+Y:L237S2S3A242
+G:L238S4A243
+W:L239S5A240
+K:L240S0S1A245
+P:L241S2S3A246
+D:L242S4A247
+Y:L243S5A244
+R:L244S0S1A249
+D:L245S2S3A250
+A:L246S4A251
+K:L247S5A248
+I:L248S0S1A253
+V:L249L250S2S3S4A254
+F:L251S5A252
+S:L252S0S1A256
+D:L253S2S3A257
+K:L253S4A258
+L:L254S5A255
+E:L255L256S0S1S2S3A260
+H:L257S4A261
+K:L258S5A259
+K:L259S0S1A263
+D:L259S2S3A264
+I:L260S4A265
+R:L261S5A262
+Y:L262S0S1A267
+R:L263S2S3A268
+D:L264S4A269
+E:L265S5A266
+N:L266L268S0S1S4A271
+I:L267S2S3A272
+P:L269S5A270
+I:L270S0S1A274
+G:L271L272S2S3S5A275
+L:L270S4A273
+P:L273S0S1A277
+K:L274S2S3A278
+S:L275S4A279
+G:L274S5A276
+V:L276L277S0S1S2S3A281
+A:L278S4A282
+E:L279S5A280
+D:L280S0S1S2S3A284
+T:L281S4A285
+L:L282S5A283
+V:L283S0S1A287
+Y:L283S2S3A288
+L:L284S4A289
+S:L285S5A286
+I:L286L287S0S1S2S3A291
+S:L288S4A292
+W:L289S5A290
+Y:L290S0S1A294
+L:L290S2S3A295
+E:L291L292S4S5A293
+I:L293S0S1A297
+Q:L294S2A298
+V:L294S3A299
+L:L295S4A300
+G:L295S5A296
+N:L296L300S0S1S5A302
+T:L297L298S2S3A303
+S:L299S4A301
+C:L301S0S1A305
+H:L302S2S3A306
+N:L303S4A307
+G:L301S5A304
+E:L304L305S0S1S2S3A309
+L:L306S4A310
+P:L307S5A308
+I:L308S0S1A312
+H:L308L309L310S2S3S4S5A311
+V:L311S0S1A314
+M:L312S2S3A315
+C:L312S4A316
+H:L312S5A313
+D:L313L314L316S0S1S2S3S5A318
+Y:L315S4A317
+T:L317S0S1A320
+R:L317S5A319
+H:L317L319S0S1S2S3A322
+N:L318S4A323
+C:L320S5A321
+Y:L321S0S1S2S3A325
+L:L322S4A326
+C:L323S5A324
+R:L324L325S0S1S4A328
+G:L324S2S3A329
+T:L326S5A327
+V:L327S0S1S4A331
+K:L328S2S3A332
+Y:L329S5A330
+L:L330S0S1A334
+T:L331S2S3A335
+D:L330S4A336
+N:L332S5A333
+A:L333S0S1A338
+P:L334L335S2S3S4A339
+E:L336S5A337
+N:L337L339S0S1S5A341
+L:L338S2S3A342
+V:L338S4A340
+I:L340L341S0S1S2S3A344
+N:L340L342S4S5A343
+V:L343S0S1A346
+A:L343S2S3A347
+F:L344S4A348
+L:L344S5A345
+N:L345S0S1A350
+K:L346L347S2S3S4A351
+V:L348S5A349
+Y:L349S0S1A353
+L:L350S2S3S4A354
+D:L351S5A352
+F:L352S0S1A356
+S:L353S2S3A357
+L:L353S4A358
+G:L354S5A355
+K:L355S0S1A360
+D:L356S2S3A361
+S:L357S4A362
+V:L358S5A359
+D:L359S0S1A364
+H:L361S4A365
+Y:L362S5A363
+E:L363S0S1A367
+C:L364L365S4S5A366
+T:L366S0S1A369
+F:L367S4A370
+L:L367S5A368
+G:L368S0S1A372
+Q:L369S4A373
+P:L370S5A371
+I:L371L373S0S1S5A375
+V:L372S4A374
+G:L374S0S5A377
+E:L374S1A378
+V:L375S4A376
+V:L376L377S0S1A380
+L:L378S4A379
+P:L379S0S1A382
+G:L380S4A381
+M:L381S0S1A384
+A:L382S4A383
+V:L383S0S1A386
+H:L376L384S4S5A385
+G:L385S0S1A388
+L:L386S4A389
+W:L386S5A387
+W:L387S0S1A391
+G:L388L389S4S5A390
+P:L390S0S1A393
+E:L391S5A392
+T:L392S0S1A395
+A:L393S5A394
+D:L394S0S1A397
+R:L391S4A398
+T:L395S5A396
+E:L396L397S0S1S4A400
+G:L398S5A399
+V:L399S0S1A402
+Y:L399S4A403
+H:L400S5A401
+Y:L401S0S1A405
+T:L402L403S4S5A404
+A:L404S0S1A407
+P:L405S4A408
+N:L405S5A406
+K:L406S0S1A410
+Q:L407S4A411
+E:L408S5A409
+L:L409S0S1A413
+V:L410S4A414
+M:L411S5A412
+K:L412L414S0S1S5A416
+Q:L413S4A415
+Q:L415S0S1A418
+V:L416S4A419
+H:L415S5A417
+V:L417S0S1A421
+A:L418S4A422
+T:L419S5A420
+I:L420S0S1A424
+Y:L421S4A425
+T:L422S5A423
+D:L423L424L425S0S1S4S5
+M:L426S0S1A428
+K:L426S4A429
+F:L426S5A427
+K:L427S0S1A431
+F:L428S4A432
+Y:L429S5A430
+E:L430S0S1A434
+L:L431S4A435
+F:L432S5A433
+R:L433S0S1A437
+A:L434S4A438
+N:L435S5A436
+F:L436S0S1A440
+A:L437S4A441
+I:L438S5A439
+V:L439L440S0S1S4A443
+A:L441S5A442
+I:L442S0S1A445
+S:L442S4A446
+G:L443S5A444
+I:L444S0S1A448
+A:L445S4A449
+H:L446S5A447
+V:L447L448S0S1S4A451
+Q:L449S5A450
+L:L450S0S1S4A453
+A:L451S5A452
+D:L452S0S1A455
+A:L452S4A456
+M:L453S5A454
+E:L454L455S0S1S4A458
+H:L456S5A457
+I:L457S0S1A460
+K:L457S4A461
+Y:L458S5A459
+D:L459S0S1A463
+Y:L460S4A464
+S:L461S5A462
+K:L462S0S1A466
+R:L463L464S4S5A465
+L:L465S0S1A468
+I:L466S5A467
+V:L467S0S1A470
+L:L468S5A469
+K:L469S0S1A472
+P:L470S5A471
+K:L471S0S1A474
+N:L472S5A473
+S:L473S0S1A476
+I:L474S5A475
+G:L475S0S1A478
+W:L476S5A477
+D:L477S0S1A480
+L:L478S5A479
+E:L479S0S1A482
+G:L480S5A481
+V:L481S0S1A484
+S:L482S5A483
+L:L483S0S1A486
+C:L484S5A485
+Y:L485S0S1A488
+P:L486S5A487
+S:L487S0S1A490
+R:L488S5A489
+L:L489S0S1A492
+Q:L490S5A491
+T:L491S0S1A494
+V:L492S5A493
+R:L493S0S1A496
+E:L494S5A495
+I:L495S0S1A498
+H:L496S5A497
+N:L497S0S1A500
+V:L498S5A499
+T:L499L500S0S1S5
+E:L501S0S1A503
+I:L501S5A502
+L:L502S0S1A505
+K:L503S5A504
+K:L504S0S1A507
+L:L505S5A506
+R:L506S0S1A509
+K:L507S5A508
+A:L508S0S1A511
+H:L509S5A510
+K:L510S0S1A513
+E:L511S5A512
+V:L512S0S1A515
+L:L513S5A514
+S:L514S0S1A517
+G:L515S5A516
+V:L516S0S1A519
+I:L517S5A518
+I:L518S0S1A521
+T:L519S5A520
+G:L520S0S1A523
+A:L521S5A522
+I:L522S0S1A525
+V:L523S5A524
+S:L524S0S1A527
+M:L525S5A526
+N:L526L527S0S1S5
+D:L528S0S1
+L:L529S0S1A531
+F:L528S5A530
+K:L530S0S1A533
+Q:L531S5A532
+F:L532S0S1A535
+T:L533S5A534
+K:L534S0S1A537
+E:L535S5A536
+E:L536S0S1A539
+W:L537S5A538
+Y:L538S0S1A541
+D:L539S5A540
+L:L540S0S1A543
+I:L541S5A542
+D:L542S0S1A545
+V:L543S5A544
+P:L544S0S1A547
+Q:L545S5A546
+R:L546S0S1A549
+N:L547S5A548
+V:L548S0S1A551
+S:L549S5A550
+L:L550S0S1A553
+S:L551S5A552
+S:L552S0S1A555
+G:L553S5A554
+S:L554S0S1A557
+C:L555S5A556
+L:L556S0S1A559
+N:L557S5A558
+S:L558S0S1A561
+R:L559S5A560
+E:L560S0S1A563
+Y:L561S5A562
+E:L562S0S1A565
+P:L563S5A564
+E:L564L565S0S1S5
+V:L566S0S1A568
+P:L566S5A567
+V:L567S0S1A570
+M:L568S5A569
+F:L569S0S1A572
+T:L570S5A571
+P:L571L572S0S1S5
+P:L573S0S1A575
+D:L573S5A574
+Y:L574S0S1A577
+T:L575S5A576
+D:L576S0S1A579
+M:L577S5A578
+A:L578S0S1A581
+I:L579S5A580
+N:L580S0S1A583
+K:L581S5A582
+Q:L582S0S1A585
+L:L583S5A584
+L:L584S0S1A587
+Y:L585S5A586
+R:L586L587S0S1S5
+D:L588S0S1A590
+E:L588S5A589
+I:L589S0S1A592
+E:L590S5A591
+L:L591S0S1A594
+G:L592S5A593
+T:L593S0S1A596
+L:L594S5A595
+Q:L595S0S1A598
+A:L596S5A597
+R:L597S0S1A600
+Y:L598S5A599
+A:L599S0S1A602
+I:L600S5A601
+E:L601S0S1A604
+W:L602S5A603
+E:L603S0S1A606
+M:L604S5A605
+A:L605S0S1A608
+P:L606S5A607
+F:L607S0S1
+Y:L609S0S1A611
+T:L608S5A610
+P:L610L611S0S1S5
+G:L612S0S1A614
+D:L612S5A613
+V:L613S0S1A616
+M:L614S5A615
+L:L615S0S1A618
+S:L616S5A617
+D:L617S0S1A620
+T:L618S5A619
+E:L619L620S0S1S5
+G:L621S0S1S5
+V:L622S0S1A624
+R:L622S5A623
+I:L623S0S1A626
+V:L624S5A625
+P:L625S0S1A628
+Q:L626S5A627
+L:L627S0S1A630
+M:L628S5A629
+C:L629S0S1A632
+L:L630S5A631
+A:L631S0S1A634
+P:L632S5A633
+A:L633S0S1A636
+Q:L634S5A635
+L:L635S0S1A638
+A:L636S5A637
+A:L637S0S1A640
+V:L638S5A639
+A:L639S0S1A642
+C:L640S5A641
+R:L641S0S1A644
+L:L642S5A643
+E:L643S0S1A646
+L:L644S5A645
+H:L645L646S0S1S5
+G:L647S0S1A649
+A:L647S5A648
+D:L648S0S1A651
+L:L649S5A650
+A:L650S0S1A653
+L:L651S5A652
+R:L652S0S1A655
+E:L653S5A654
+K:L654L655S0S1S5
+A:L656S0S1A658
+G:L656S5A657
+L:L657S0S1A660
+H:L658S5A659
+D:L659S0S1A662
+I:L660S5A661
+L:L661S0S1A664
+V:L662S5A663
+L:L663S0S1A666
+Y:L664S5A665
+R:L665S0S1
+V:L667S0S1
+A:L668S0S1
+G:L669S0S1
+E:L670S0S1
+I:L671S0S1A673
+V:L666S5A672
+A:L672S0S1A675
+H:L673S5A674
+E:L674S0S1A677
+C:L675S5A676
+R:L676S0S1A679
+N:L677S5A678
+E:L678S0S1A681
+A:L679S5A680
+G:L680L681S0S1S5
+A:L682S0S1A684
+V:L682S5A683
+S:L683S0S1A686
+G:L684S5A685
+K:L685S0S1A688
+R:L686S5A687
+V:L687S0S1A690
+S:L688S5A689
+T:L689L690S0S1S5
+E:L691S0S1A693
+A:L691S5A692
+K:L692S0S1A695
+A:L693S5A694
+H:L694S0S1A697
+V:L695S5A696
+V:L696S0S1A699
+C:L697S5A698
+W:L698S0S1A701
+G:L699S5A700
+K:L700S0S1A703
+W:L701S5A702
+A:L702S0S1A705
+L:L703S5A704
+Q:L704L705S0S1S5
+E:L706S0S1A708
+Y:L706S5A707
+K:L707S0S1A710
+V:L708S5A709
+I:L709S0S1A712
+M:L710S5A711
+E:L711S0S1A714
+G:L712S5A713
+Q:L713S0S1A716
+W:L714S5A715
+D:L715S0S1A718
+N:L716S5A717
+M:L717S0S1A720
+L:L718S5A719
+M:L719S0S1A722
+R:L720S5A721
+E:L721S0S1A724
+K:L722S5A723
+E:L723S0S1A726
+V:L724S5A725
+V:L725S0S1A728
+Q:L726S5A727
+I:L727S0S1A730
+Y:L728S5A729
+K:L729S0S1A732
+F:L730S5A731
+T:L731S0S1A734
+L:L732S5A733
+R:L733S0A736
+L:L733S1A737
+M:L734S5A735
+P:L735L736S0S1A739
+A:L737S5A738
+L:L738S0S1A741
+K:L739S5A740
+Q:L740S0S1A743
+R:L741S5A742
+S:L742S0S1A745
+P:L743S5A744
+K:L744S0S1A747
+A:L745S5A746
+V:L746L747S0S1S5
+L:L748S0S1A750
+Y:L748S5A749
+L:L749S0S1A752
+I:L750S5A751
+Y:L751S0S1A754
+D:L752S5A753
+A:L753S0S1A756
+E:L754S5A755
+I:L755S0S1A758
+E:L756S5A757
+V:L757S0S1A760
+A:L758S5A759
+L:L759L760S0S1S5
+L:L761S0S1A763
+A:L761S5A762
+D:L762S0S1A765
+R:L763S5A764
+E:L764S0S1A767
+A:L765S5A766
+N:L766S0S1A769
+Q:L767S5A768
+G:L768S0S1A771
+E:L769S5A770
+D:L770L771S0S1S5
+L:L772S0S1A774
+F:L772S5A773
+P:L773S0S1A776
+F:L774S5A775
+A:L775S0S1A778
+Q:L776S5A777
+N:L777S0S1A780
+K:L778S5A779
+T:L779S0S1A782
+F:L780S5A781
+G:L781L782S0S1S5
+D:L783S0S1A785
+K:L783S5A784
+V:L784L785S0S1S5
+Y:L786S0S1A788
+R:L786S5A787
+A:L787S0S1A790
+S:L788S5A789
+V:L789S0S1A792
+S:L790S5A791
+Y:L791S0S1A794
+V:L792S5A793
+R:L793S0S1A796
+C:L794S5A795
+E:L795S0S1A798
+S:L796S5A797
+L:L797L798S0S1S5
+C:L799S0S1
+E:L800S0S1
+Y:L801S0S1
+I:L802S0S1
+D:L803S0S1
+L:L804S0S1
+E:L805S0S1
+P:L806S0S1
+L:L807S0S1
+T:L808S0S1
+Q:L809S0S1
+R:L810S0S1
+R:L811S0S1
+I:L812S0S1
+S:L813S0S1
+D:L814S0S1
+L:L815S0S1
+I:L816S0S1
+N:L817S0S1
+E:L818S0S1
+L:L819S0S1
+D:L820S0S1
+M:L821S0S1
+L:L822S0S1
+G:L823S0S1
+I:L824S0S1
+I:L825S0S1
+N:L826S0S1
+A:L827S0S1
+K:L828S0S1
+V:L829S0S1
+V:L830S0S1
+S:L831S0S1
+K:L832S0S1
+G:L833S0S1
+R:L834S0S1
+Y:L835S0S1
+G:L836S0S1
+R:L837S0S1
+T:L838S0S1
+K:L839S0S1
+E:L840S0S1
+I:L841S0S1
+R:L842S0S1
+L:L843S0S1
+N:L844S0A846
+M:L844S1A845
+V:L845L846S0S1
+T:L847S0S1
+S:L848S0S1
+Y:L849S0S1
+K:L850S0S1
+I:L851S0S1
+R:L852S0S1
+N:L853S0S1
+V:L854S0S1
+L:L855S0S1
+R:L856S0S1
+Y:L857S0S1
+D:L858S0S1
+Y:L859S0S1
+S:L860S0S1
+I:L861S0S1
+Q:L862S0S1
+P:L863S0S1
+L:L864S0S1
+L:L865S0S1
+T:L866S0S1
+I:L867S0S1
+S:L868S0S1
+L:L869S0S1
+K:L870S0S1
+S:L871S0S1
+E:L872S0S1
+Q:L873S0S1
+R:L874S0S1
+R:L875S0S1
+L:L876S0S1
+I:L877S0S1

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.prodom
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.prodom	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.prodom	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,63 @@
+ID   1184 p2000.1                           57 seq.
+AC   PD001000
+KW   CB21(6) CB22(5) CB23(3)  // CHLOROPHYLL PROTEIN PRECURSOR BINDING PHOTOSYSTEM II TRANSIT PEPTIDE I TYPE 
+LA   36
+AL P04777|CB21_ARATH       1    33 0.59 MAASTMALSSPA-FAGKAVNLS--PAASEVLGSGRV
+AL P04778|CB22_ARATH       1    33 0.59 MAASTMALSSPA-FAGKAVKLS--PAASEVLGSGRV
+AL P13851|CB21_SINAL       1    33 0.77 MAASTMALSSPA-FAGKAVKLS--PGASEVFGTGRV
+AL Q39142|Q39142_ARATH     1    33 0.77 MAASTMALSSPA-LTGKAVKLS--PAASEVFGTGRI
+AL O22669|O22669_PANGI     1    33 0.91 MAASTMALSSPS-FAGMAVKVA--PSSSELFGSGRI
+AL P92919|P92919_APIGR     1    33 0.91 MAASTMALSSPA-LAGKAVKVA--PSSSELFGNGRV
+AL P04780|CB22_PETSP       1    32 0.70 MAAATMALSSST-FAGKVVKLS--PSSSEITGNGK.
+AL Q41447|Q41447_SOLTU     1    32 0.70 MAAATMALSSST-FAGKAVKLS--PSSSEITGNGR.
+AL P12469|CB23_NICPL       1    33 0.31 MAASTMALSSSS-FAGKAVKLS--PSSSEITGNGKV
+AL O64446|O64446_NICSY     1    33 0.31 MAASTMALSSSS-FAGKAVKLS--PSSSEITGNGKV
+AL P27496|CB25_TOBAC       1    33 0.38 MAAATMALSSSS-FAGKAVKLS--PSSSEITGNGKV
+AL O64450|O64450_NICSY     1    33 0.45 MAASTMALSSSS-FVGKAVKLS--PSSSEITGNGKV
+AL P27495|CB24_TOBAC       1    33 0.49 MAASTMALSSPS-FAGKAVKLS--PSSSEITGNGKV
+AL P04781|CB23_PETSP       1    33 0.63 MAATTMALSSSS-FAGKAVKLS--SSSSEITGNGKV
+AL P07369|CB2G_LYCES       1    33 0.66 MATSTMALSSST-FAGKAVKLS--PSSSEITGNGRV
+AL P04783|CB25_PETSP       1    32 0.80 MAAATMALSSPS-FAGKAVKFS--PSSSEITGNGK.
+AL P27493|CB22_TOBAC       1    33 0.24 MAAATMALSSPS-FAGQAVKLS--PSAPEITGNGRV
+AL O64444|O64444_NICSY     1    33 0.24 MAASTMALSSPS-FAGQAVKLS--PSAPEITGNGRV
+AL O64442|O64442_NICSY     1    33 0.21 MAAATMALSSPS-FAGQAVKLS--PSASEITGNGRV
+AL O64443|O64443_NICSY     1    33 0.21 MAAATMALSSPS-FAGQAVKLS--PSASEITGNGRV
+AL Q41423|Q41423_SOLTU     1    33 0.21 MAAATMALSSPS-FAGQAVKLS--PSASEITGNGRI
+AL O64445|O64445_NICSY     1    33 0.28 MAASTMALSSPS-FAGQAVKLS--PSASEITGNGRV
+AL P07370|CB2B_LYCES       1    33 0.24 MAAATMALSSPS-FAGQAVKLS--PSASEISGNGRI
+AL Q41421|Q41421_SOLTU     1    33 0.24 MAAATMALSSPS-FAGQAVKLS--PSASEISGNGRI
+AL Q41422|Q41422_SOLTU     1    33 0.24 MAAATMALSSPS-FAGQAVKLS--PSASEISGNGRI
+AL Q41425|Q41425_SOLTU     1    33 0.35 MAASTMALSSPS-FAGQAVKLS--PSASEISGNGRI
+AL Q41424|Q41424_SOLTU     1    33 0.45 MAASTMALSSPS-FAGQAVKLS--PSTSEITGNGRI
+AL P27491|CB27_TOBAC       1    32 0.59 MTASTMALSSPS-FAGNAVKLS--PSSSEITGNGK.
+AL O64449|O64449_NICSY     1    32 0.59 MTASTMALSSPS-FAGNAVKLS--PSSSEITGNGK.
+AL O64448|O64448_NICSY     1    33 0.98 MAAATMSLSSPS-FAGKAVKLS--PSSYEIIGNGKV
+AL Q32291|Q32291_GOSHI     1    33 1.29 MASTTMALSSPS-FAGKAVKFS--PSTPEIQGTGRV
+AL P12333|CB21_SPIOL       1    33 0.77 MASSTMALSSPS-LAGKAVKLG--PTASEIIGEGRI
+AL O04686|O04686_MESCR     1    33 0.77 MASSTMALSSPS-LAGKMVKLA--PTASEILGEGRI
+AL O04685|O04685_MESCR     1    33 0.84 MASSTMALSSPS-FAGKAVKLS--PTASETLGEGRI
+AL O04687|O04687_MESCR     1    33 0.94 MASSAMALSSPS-LAGKAVKLN--PTTSQILGEGRI
+AL Q40961|Q40961_PRUPE     1    33 1.29 MASSTMALSSPS-LAGQAVKLG--SSVSNIVGEGRI
+AL P27492|CB21_TOBAC       1    32 0.80 MAASTTALSSP--FAGKAVKLS--PSSSEVTGNGKV
+AL P12470|CB25_NICPL       1    32 0.80 MAASTTALSSP--FAGKAVKLS--PSSSEVTGNGKV
+AL O64447|O64447_NICSY     1    32 1.11 MASSTMALSSS--FAGKAVKLS--PSSSEITGNGKV
+AL Q40247|Q40247_LACSA     1    32 1.32 MAASTMALSSP--FAGQAVKTS--PSSSELFGNGRV
+AL Q40185|Q40185_LEMGI     1    32 1.95 .MAASMALSSPS-LVGKAVKLA--PAASEVFGEGRV
+AL P08963|CB22_HORVU       1    33 2.02 MAAATMALSSST-FAGKAVKNL--SSSSEVQGDARV
+AL P04779|CB21_PETSP       1    32 1.57 MAAATMALSSSS-FAGKAVNV---PSSSEITRNGKV
+AL P04782|CB24_PETSP       1    31 1.57 MAAATMAISSSS-FAGKAVNV---PSSSQITGNGK.
+AL Q39141|Q39141_ARATH     1    31 2.19 MASSTMALSSPA-FAGKAVK----PAASDVLGSGRV
+AL P09755|CB22_SOYBN       1    32 1.18 MAASTMALSSSS-LAGQAMKLA--PSTPEL-GVGRV
+AL Q43437|Q43437_SOYBN     1    32 1.18 MAASTMALSSSS-LAGQAIKLA--PSTPEL-GVGRV
+AL O48657|O48657_FAGCR     1    32 1.43 MAASTMALSSPS-LAGKAVKLA--PSTPEL-NVGRV
+AL O81391|O81391_MEDSA     1    31 1.53 MAASSMALSSPT-LAGKPVKLT--PSSQEL-GAAR.
+AL Q9ZP08|Q9ZP08_CICAR     1    31 1.53 MAASSMALSSPT-LAGKPVKLS--PSSQEL-GASR.
+AL P09756|CB23_SOYBN       3    31 1.81 .AASSMALSSPS-LAGKAVKLG--PSAPEV-GRV..
+AL Q39831|Q39831_SOYBN     3    31 1.81 .AASSMALSSPS-LAGKAVKLG--PSAPEV-GRV..
+AL P04784|CB21_WHEAT       1    32 1.74 MAATTMSLSSSS-FAGKAVKNL--PSSA-LIGDARV
+AL O24401|O24401_WHEAT     1    32 1.74 MAAATMSLSSST-FAGKAVKNV--PSLA-LLGEARV
+AL Q40065|Q40065_HORVU     1    32 1.78 MAATTMYLSSST-FAGKAVKNV--PLSA-LFGEARV
+AL P27490|CB28_PEA         1    31 3.34 MAASSMALSSPT-LTGKPVETSANPSSQELGG....
+AL O24226|O24226_ORYSA     1    30 3.62 MAASTMALSSPAALAGKAV------ANAKVFGEGRV
+CO                                      MAASTMALSSPS-FAGKAVKLS--PSSSEITGNGRV
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.psi
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.psi	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.psi	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,911 @@
+               QUERY  MAWDMCNQDSESVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+         PRGC1_HUMAN   MAWDMCNQDSESVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q4W5M7_HUMAN   MAWDMCNQDSESVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q5RBY0_PONPY   MAWDMCNQDSESVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+           PRGC1_PIG   MAWDMCNQD--SVWTDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+           PRGC1_RAT   MAWDMCSQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+         PRGC1_BOVIN   MAWDMCNQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q4L229_BOVIN   MAWDMCNQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+         PRGC1_MOUSE   MAWDMCSQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q3UP72_MOUSE   MAWDMCSQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q60GU0_CHICK   MAWDMCNQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDADS
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   MAWDMCNQDSESVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q3LIG2_MOUSE   MAWDMCSQD--SVWSDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   MAWDRCNQD--SVWRELECAALVGEDQPLCPDLPELDLSELDVSDLDADS
+          Q811R2_RAT   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q8TAL0_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q8VHJ7_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q86YN5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q8N1N9_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q86YN3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q8TDE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q86YN6_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q86YN4_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSAT
+        Q8C1C0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSPT
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   MAWDMCNQD--SVWTDIECAALVGEDQPLCPDLPELDLSELDVNDLDTDS
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXSDFDSVN
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+         PRGC1_HUMAN   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+        Q4W5M7_HUMAN   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+        Q5RBY0_PONPY   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+           PRGC1_PIG   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+           PRGC1_RAT   FLGGLKWCSDQSEIISNQYNNEPANIFEKIDEENEANLLAVLTETLDSLP
+         PRGC1_BOVIN   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+        Q4L229_BOVIN   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+         PRGC1_MOUSE   FLGGLKWCSDQSEIISNQYNNEPANIFEKIDEENEANLLAVLTETLDSLP
+        Q3UP72_MOUSE   FLGGLKWCSDQSEIISNQYNNEPANIFEKIDEENEANLLAVLTETLDSLP
+        Q60GU0_CHICK   FLGGLKWYSDQSEVISSQYSNEPANIFEKIDEENEANLLAVLTETLDSIP
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXDEENEANLLAVLTETLDSIP
+        Q3LIG1_HUMAN   FLGGLKWCSDQSEIISNQYNNEPSNIFEKIDEENEANLLAVLTETLDSLP
+        Q3LIG2_MOUSE   FLGGLKWCSDQSEIISNQYNNEPANIFEKIDEENEANLLAVLTETLDSLP
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   FLGGLKWYSDQSEIISAQYGNEASNLFEKIDEENEANLLAVLTETLDSIP
+          Q811R2_RAT   CFGELQWCPETSETEPSQYSPDDSEFFQ-IDSENEA-LLAALTKTLDDIP
+        Q8TAL0_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q8VHJ7_MOUSE   CFGELQWCPETSETEPSQYSPDDSELFQ-IDSENEA-LLAALTKTLDDIP
+        Q86YN5_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q8N1N9_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q86YN3_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q8TDE5_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q86YN6_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q86YN4_HUMAN   CFGELQWCPENSETEPNQYSPDDSELFQ-IDSENEA-LLAELTKTLDDIP
+        Q8C1C0_MOUSE   CFGELQWCPETSETEPSQYSPDDSELFQ-IDSENEA-LLAALTKTLDDIP
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   FLGGLKWCSDQSEIISNQYNNEPSNIFEXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   CLSELHWCNEQSDHSPAQYSAGDPELFE---EEN-AALLAALTDSLDGIV
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+         PRGC1_HUMAN   VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q4W5M7_HUMAN   VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q5RBY0_PONPY   VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+           PRGC1_PIG   VDEDGLPSFDALTDGDVTTENEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+           PRGC1_RAT   VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+         PRGC1_BOVIN   VDEDGLPSFDALTDGDVTTENEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q4L229_BOVIN   VDEDGLPSFDALTDGDVTTENEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+         PRGC1_MOUSE   VDEDGLPSFDALTDGAVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q3UP72_MOUSE   VDEDGLPSFDALTDGAVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q60GU0_CHICK   VDEDGLPSFDALTDGDVTNEHDASPSPMPDGTPPPQEAEEPSLLKKLLLA
+          Q5QHW4_PIG   XXXXXXXXXDALTDGDVTTENEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   VDEDGLPSFEALADGDVTNASDRSCPSSPDGSPRTPEPEEPSLLKKLLLA
+        Q3LIG1_HUMAN   VDEDGLPSFDALTDGDVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q3LIG2_MOUSE   VDEDGLPSFDALTDGAVTTDNEASPSSMPDGTPPPQEAEEPSLLKKLLLA
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   VDEDGLPSFEALADGDVTNASDRSCPSSPGGSPRTPEPEEPSLLKKLLLA
+          Q811R2_RAT   EDDVGLAAFPGLDEGDTPSCTPASPAPLSVPPSPASEVDELSLLQKLLLA
+        Q8TAL0_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSL-------
+        Q8VHJ7_MOUSE   EDDVGLAAFPELDEGDTPSCTAPPSPTLERLLSPASDVDELSLLQKLLLA
+        Q86YN5_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q8N1N9_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q86YN3_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q8TDE5_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q86YN6_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q86YN4_HUMAN   EDDVGLAAFPALDGGDALSCTAPPSPAPEKPSAPAPEVDELSLLQKLLLA
+        Q8C1C0_MOUSE   EDDVGLAAFPELDEGDTPSCTAPPSPTLERLLSPASDVDELSLLQKLLLA
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXA
+        Q800H2_BRARE   EDGVGLSVFPSLGDEPEEGEEEEDDLPMESEPLPSPETEDPSLLKKLLLT
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  PANTQLSYNECSGLSTQNHANHNHRIRTNPAIVKTENSWSNKAKSICQQQ
+         PRGC1_HUMAN   PANTQLSYNECSGLSTQNHANHNHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q4W5M7_HUMAN   PANTQLSYNECSGLSTQNHANHNHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q5RBY0_PONPY   PANTQLSYNECSGLSTQNHANHNHRIRTNPAIVKTENSWSNKAKSICQQQ
+           PRGC1_PIG   PANTQLSYNECSGLSTQNHANHNHRIRTNPAVVKTENSWSNKAKSICQQQ
+           PRGC1_RAT   PANTQLSYNECSGLSTQNHANHTHRIRTNPAIVKTENSWSNKAKSICQQQ
+         PRGC1_BOVIN   PANTQLSYNECSGLSTQNHANHNHRIRTNPAVVKTENSWSNKAKSICQQQ
+        Q4L229_BOVIN   PANTQLSYNECSGLSTQNHANHNHRIRTNPAVVKTENSWSNKAKSICQQQ
+         PRGC1_MOUSE   PANTQLSYNECSGLSTQNHANHTHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q3UP72_MOUSE   PANTQLSYNECSGLSTQNHANHTHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q60GU0_CHICK   PANTQLNYNECSGLSTQNHANTNHRIRTSPVVVKTENSWSNKAKSICQQQ
+          Q5QHW4_PIG   PANTQLSYNECSGLSTQNHANHNHRIRTNPAVVKTENSWSNKAKSICQQQ
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   PANSQLSYNQYTGGQAQNHASSNHRIRPPPAVVKMESTWNGKARGSSQQN
+        Q3LIG1_HUMAN   PANTQLSYNECSGLSTQNHANHNHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q3LIG2_MOUSE   PANTQLSYNECSGLSTQNHANHTHRIRTNPAIVKTENSWSNKAKSICQQQ
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   PANSQLSYNQYTGGKAQNHASSNHRIRPPPAVVKMESPWNGKSRGCSQQN
+          Q811R2_RAT   TSSPTASSDALKDGATWSQTSLSS--RSQRPCVKVDGTQDKKTPML----
+        Q8TAL0_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   ----------------------------------ADSTQDKKAPMM----
+        Q8VHJ7_MOUSE   TSSPTASSDALKDGATWSQTSLSS--RSQRPCVKVDGTQDKKTPTL----
+        Q86YN5_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q8N1N9_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q86YN3_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q8TDE5_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q86YN6_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q86YN4_HUMAN   T-----SYPTSSSDTQKEGTAWRQRSKSQRPCVKADSTQDKKAPMM----
+        Q8C1C0_MOUSE   TSSPTASSDALKDGATWSQTSLSS--RSQRPCVKVDGTQDKKTPTL----
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXWSNKAKSICQQQ
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   PANSQLSYNQYPGGKAQNHAASNQRIRPAPSCIKTENPWNSKPRGAC-PN
+        Q800H2_BRARE   PPNVPVGLESHKDSGVHRHSSRNQHVKPVRPVLKXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKSHTQSQSQH
+         PRGC1_HUMAN   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKSHTQSQSQH
+        Q4W5M7_HUMAN   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKSHTQSQSQH
+        Q5RBY0_PONPY   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKSHTQSQSQH
+           PRGC1_PIG   KPQRRPCSELLKYLTTNDDPPHTKPTETRNSSRDKCTSKKKAHTQSQSQH
+           PRGC1_RAT   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCASKKKSHTQPQSQH
+         PRGC1_BOVIN   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKAHTQSQTQH
+        Q4L229_BOVIN   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKAHTQSQTQH
+         PRGC1_MOUSE   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCASKKKSHTQPQSQH
+        Q3UP72_MOUSE   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCASKKKSHTQPQSQH
+        Q60GU0_CHICK   KPQRRPCSELLKYLTTNDDPPQTKPAENRNSSKEKCTSKRKPHLQSQTNH
+          Q5QHW4_PIG   KPQRRPCSELLKYLTTNDDPPHTKPTETRNSSRDKCTSKKKAHTQSQSQH
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   RPVRRHCTELLKYLTATDDILHAKNNDAKGTSRDKSGLGL ----------
+        Q3LIG1_HUMAN   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKSHTQSQSQH
+        Q3LIG2_MOUSE   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCASKKKSHTQPQSQH
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   RPVRRHCTELLKYLTATDDILHPKASEAKGTSRDKS--------------
+          Q811R2_RAT   RSQSRPCTELHKHLTSVLPCPRGKA-------------------------
+        Q8TAL0_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q8VHJ7_MOUSE   RAQSRPCTELHKHLTSVLPCPRVKA-------------------------
+        Q86YN5_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q8N1N9_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q86YN3_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q8TDE5_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q86YN6_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q86YN4_HUMAN   QSQSRSCTELHKHLTSAQCCLQ----------------------------
+        Q8C1C0_MOUSE   RAQSRPCTELHKHLTSVLPCPRVKA-------------------------
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   KPQRRPCSELLKYLTTNDDPPHTKPTENRNSSRDKCTSKKKAHTQSQTQH
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   RSMRRPCTELLKYLTSSDEAFQTKAGEAKSTXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+         PRGC1_HUMAN   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q4W5M7_HUMAN   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q5RBY0_PONPY   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+           PRGC1_PIG   LQAKPTSLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+           PRGC1_RAT   AQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+         PRGC1_BOVIN   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q4L229_BOVIN   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+         PRGC1_MOUSE   AQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q3UP72_MOUSE   AQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q60GU0_CHICK   LQAKPTSLSLPLTPESPNDPKGSPFENKTIEQTLSVELSGTAGLTPPTTP
+          Q5QHW4_PIG   LQAKPTSLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q4R5X5_MACFA   XXAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLTPPTTP
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   --AKPTTLPLPLTPESPNDHKGSPFENKTIERTLSVEIAGTPGLTPPTTP
+        Q3LIG1_HUMAN   LQAKPTTLSLPLTPESPNXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   AQAKPTTLSLPLTPESPNXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXLSGTAGLTPPTTP
+        Q4SAM8_TETNG   --AKPTTLPLPLTPESPNDHKGSPFENKAIERTLSVEIAGTP--DRRQPV
+          Q811R2_RAT   ----PRCLMLAL---SQSDPLG----KKSFEESLTVELCGTAGLTPPTTP
+        Q8TAL0_HUMAN   XXXXPRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q8TDE4_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q8VHJ7_MOUSE   -----RCLMLAL---SQSDSLG----KKSFEESLTVELCGTAGLTPPTTP
+        Q86YN5_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q8N1N9_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q86YN3_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q8TDE5_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q86YN6_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q86YN4_HUMAN   ----PRCLMLALSQSDPT------FGKKSFEQTLTVELCGTAGLTPPTTP
+        Q8C1C0_MOUSE   -----RCLMLAL---SQSDSLG----RKSFEESLTVELCGTAGLTPPTTP
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   LQAKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGTAGLXXXXXX
+        Q800H3_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXYANKPFEQTLCMELCGTAGLTPPTTP
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXKPTTLSLPLTPESPNDPKGSPFENKTIERTLSVELSGXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  PHKANQDNPFRASPKLKSSCKTVVPPPSKKPRYSESSGTQGNNSTKKGPE
+         PRGC1_HUMAN   PHKANQDNPFRASPKLKSSCKTVVPPPSKKPRYSESSGTQGNNSTKKGPE
+        Q4W5M7_HUMAN   PHKANQDNPFRASPKLKSSCKTVVPPPSKKPRYSESSGTQGNNSTKKGPE
+        Q5RBY0_PONPY   PHKANQDNPFRASPKLKSSCKTVVPPPSKKPRYSESSGTQGNNSTKKGPE
+           PRGC1_PIG   PHKANQDNPFRASPKLKPPCKTVVPPPSKKTRYSESSGTHGNNSTKKGPE
+           PRGC1_RAT   PHKANQDNPFKASPKLKPSCKTVVPPPTKRARYSECSGTQGSHSTKKGPE
+         PRGC1_BOVIN   PHKANQDNPFRASPKLKPSCKTVVPPPSKKARYSESSCTQGSNSTKKGPE
+        Q4L229_BOVIN   PHKANQDNPFRASPKLKPSCKTVVPPPSKKARYSESSCTQGSNSTKKGPE
+         PRGC1_MOUSE   PHKANQDNPFKASPKLKPSCKTVVPPPTKRARYSECSGTQGSHSTKKGPE
+        Q3UP72_MOUSE   PHKANQDNPFKASPKLKPSCKTVVPPPTKRARYSECSGTQGSHSTKKGPE
+        Q60GU0_CHICK   PHKANQDNPFRTSPKPKSSCKTV-APPSKKPRYSESSGSQGNNPVKKGPE
+          Q5QHW4_PIG   PHKANQDNPFRASPKLKPPCKTVVPPPSKKTRYSESSGTHGNNSTKKGPE
+        Q4R5X5_MACFA   PHKANQDNPFRASPKPKSSCKTVVPPPSKKPRYSESSGTQGNNSTKKGPE
+        Q58FA1_SHEEP   XHKANQDNPFRASPKLKPSCKTVVPPPAKKARYSESSGTQGSNSTKKGPE
+        Q3Y595_FUGRU   PHKASQENPFKASLKTKLSSCSSSAFVCKRARLSESGPGAGGGPIRKGPE
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   PHKANQDNPFRASPKPKSSCKTVVPPPSKKARYSESSGTQGNNSTKKGPE
+        Q4SAM8_TETNG   WNRFCGASRKRESQTFPKNQERVTAPACKRARLSEPGPAAPAPGARKGPE
+          Q811R2_RAT   PYKPMEEDPFKQDTKHSPGQDTAPSLPSPETLQLTATPGASHKLPKRHPE
+        Q8TAL0_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q8TDE4_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q8VHJ7_MOUSE   PYKPMEEDPFKPDTKLSPGQDTAPSLPSPEALPLTATPGASHKLPKRHPE
+        Q86YN5_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q8N1N9_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q86YN3_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q8TDE5_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q86YN6_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q86YN4_HUMAN   PYKPTEEDPFKPDIKHSLGKEIALSLPSPEGLSLKATPGAAHKLPKKHPE
+        Q8C1C0_MOUSE   PYKPMEEDPFKPDTKLSPGQDTAPSLPSPEALPLTATPGASHKLPKRHPE
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   PHKPVEDELFK---------------PDAKADLSTKSSCLMRANMRKLPE
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  QSELYAQLSKSSVLTGGHEERKTKRPSLRLFGDHDYCQSINSKTEILINI
+         PRGC1_HUMAN   QSELYAQLSKSSVLTGGHEERKTKRPSLRLFGDHDYCQSINSKTEILINI
+        Q4W5M7_HUMAN   QSELYAQLSKSSVLTGGHEERKTKRPSLRLFGDHDYCQSINSKTEILINI
+        Q5RBY0_PONPY   QSELYAQLSKSSVLTGGHEERKTKRPSLRLFGDHDYCQSINSKTEILINI
+           PRGC1_PIG   QSELYAQLSKTSALGGGHEERKARRPSLRLFGDHDYCQSINSKAEILINI
+           PRGC1_RAT   QSELYAQLSKSSVLSRGHEERKTKRPSLRLFGDHDYCQSVNSKTDILINI
+         PRGC1_BOVIN   QSELYAQLSKTSVLTSGHEERKAKRPSLRLFGDHDYCQSINSKTEILVST
+        Q4L229_BOVIN   QSELYAQLSKTSVLTSGHEERKAKRPSLRLFGDHDYCQSINSKTEILVST
+         PRGC1_MOUSE   QSELYAQLSKSSGLSRGHEERKTKRPSLRLFGDHDYCQSLNSKTDILINI
+        Q3UP72_MOUSE   QSELYAQLSKSSGLSRGHEERKTKRPSLRLFGDHDYCQSLNSKTDILINI
+        Q60GU0_CHICK   QTELYAQLSKTTALSSGHEERKTKRPSLRLFGDHDYCQSVNSKSEIHIKI
+          Q5QHW4_PIG   QSELYAQLSKTSALGGGHEERKARRPSLRLFGDHDYCQSINSKAEILINI
+        Q4R5X5_MACFA   QSELYAQLSKSTVLTGGHEERKTKRHSLRLFGDHDYCQSINSKTEILIHI
+        Q58FA1_SHEEP   QSESYAQLSKTSVLTSGHEERKAKRPSLRLFGDHDYCQSINSKTEILVSI
+        Q3Y595_FUGRU   QTELYAQLSKASTALGGPEERRSKRAAPRGYSDHDYCQASAKKDSAALVP
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   QSELYAQLSKSSVPSSGHEERKAKRPSPRLFGDHDYCQSINSKTEILLHV
+        Q4SAM8_TETNG   QTELYAQLSKASTARGHRSSGSSQRAASRRHSDHDYCQASAGKDSVTMTT
+          Q811R2_RAT   RSELLSHLQHAT--TQPVSQAGQKRPFSCSFGDHDYCQVIRPEAAL----
+        Q8TAL0_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q8TDE4_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q8VHJ7_MOUSE   RSELLSHLQHAT--TQPVSQAGQKRPFSCSFGDHDYCQVLRPEAAL----
+        Q86YN5_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q8N1N9_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q86YN3_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q8TDE5_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q86YN6_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q86YN4_HUMAN   RSELLSHLRHATAQPA--SQAGQKRPFSCSFGDHDYCQVLRPEGVL----
+        Q8C1C0_MOUSE   RSELLSHLQHAT--TQPVSQAGQKRPFSCSFGDHDYCQVLRPEAAL----
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   QTELYAQLRR ----MGQTGDIDSKGGTQRAYGDHDYCLGESRKTTAVLNG
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  SQELQDSRQLENKDVSSDWQGQICSSTDSDQCYLRETLEASKQVSPCSTR
+         PRGC1_HUMAN   SQELQDSRQLENKDVSSDWQGQICSSTDSDQCYLRETLEASKQVSPCSTR
+        Q4W5M7_HUMAN   SQELQDSRQLENKDVSSDWQGQICSSTDSDQCYLRETLEASKQVSPCSTR
+        Q5RBY0_PONPY   SQELQDSRQLENKDVSSDWQGQICSSTDSDQCYLRETLEASKQVSPCSTR
+           PRGC1_PIG   SQELHDSRQLDSKDAASDWQRQMCSSTDSDQCYLTETSEASRQVSPGSAR
+           PRGC1_RAT   SQELQDSRQLDFKDASCDWQGHICSSTDSSQCYLRETLEASKQVSPCSTR
+         PRGC1_BOVIN   SQELHDSRQLENKDASSNGPGQIHSSTDSDPCYLRETAEVSRQVSPGSTR
+        Q4L229_BOVIN   SQELHDSRQLENKDASSNGPGQIHSSTDSDPCYLRETAEVSRQVSPGSTR
+         PRGC1_MOUSE   SQELQDSRQLDFKDASCDWQGHICSSTDSGQCYLRETLEASKQVSPCSTR
+        Q3UP72_MOUSE   SQELQDSRQLDFKDASCDWQGHICSSTDSGQCYLRETLEASKQVSPCSTR
+        Q60GU0_CHICK   SQELQDSRQLEFKDSSPGWQCQICSSLEQDQYFKKETLQTSKQGSQGNNR
+          Q5QHW4_PIG   SQELHDSRQLDSKDAASDWQRQMCSSTDSDQCYLTETSEASRQVSPGSAR
+        Q4R5X5_MACFA   SQELQDSRQLENKDVSSDWQGQICSSTDSDQCYLRETSEASKQVSPCSTR
+        Q58FA1_SHEEP   SQELHDSRQLENKDASSNGPGQIHSSTDSDLCYLRETAEVSRQVSPGSTR
+        Q3Y595_FUGRU   TVGRVEDGHVECKDSAMPPEQQDLSSVDGKAAPARPSGDDEDQLQTCASR
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   SQELPDSRQLDYKDASSAWQGQICSSTDSDQLYLRETLEASKQVSPCGTR
+        Q4SAM8_TETNG   AAMTAEDGHVECKDLAMPPSSSSSSPPSSSPPPKASSGPSAEQQNPSSAR
+          Q811R2_RAT   --QRKVLRSWEPIKVHLEDLAHQGATLPVETKTPRREADQNCDPTP-KDS
+        Q8TAL0_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q8TDE4_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q8VHJ7_MOUSE   --QRKVLRSWEPIGVHLEDLAQQGAPLPTETKAPRR--EANQNCDPTKDS
+        Q86YN5_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q8N1N9_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q86YN3_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q8TDE5_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q86YN6_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q86YN4_HUMAN   --QRKVLRSWEPSGVHLDWPQQGAPWAEA- QAPGREEDRSCDAGAPPKDS
+        Q8C1C0_MOUSE   --QRKVLRSWEPIGVHLEDLAQQGAPLPTETKAPRR--EANQNCDPTKDS
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   SQ------------------------------------------------
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  KQLQDQEIRAELNKHFGHPSQAVFDDEADKTGELRDSDFSNEQFSKLPMF
+         PRGC1_HUMAN   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTGELRDSDFSNEQFSKLPMF
+        Q4W5M7_HUMAN   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTGELRDSDFSNEQFSKLPMF
+        Q5RBY0_PONPY   KQLQDQEIRAELDKHFGHPSQAVFDDEADKTSELRDSDFSNEQFSKLPMF
+           PRGC1_PIG   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTSELRDSDFSNEQFSKLPMF
+           PRGC1_RAT   KQLQDQEIRAELNKHFGHPSQAVFDDKVDKTSELRDGNFSNEQFSKLPVF
+         PRGC1_BOVIN   KQLQDQEIRAELNKHFGHPSQAVFDDKADKTSELRDSDFSNEQFSKLPMF
+        Q4L229_BOVIN   KQLQDQEIRAELNKHFGHPSQAVFDDKADKTSELRDSDFSNEQFSKLPMF
+         PRGC1_MOUSE   KQLQDQEIRAELNKHFGHPCQAVFDDKSDKTSELRDGDFSNEQFSKLPVF
+        Q3UP72_MOUSE   KQLQDQEIRAELNKHFGHPCQAVFDDKSDKTSELRDGDFSNEQFSKLPVF
+        Q60GU0_CHICK   KQLQDQEIRAELNKHFGHPSQAVFDEEADKTGELRDSDYSNEQFSKLPMF
+          Q5QHW4_PIG   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTSELRDSDFSNEQFSKLPMF
+        Q4R5X5_MACFA   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTSELRDSDFSNEQFSKLPMF
+        Q58FA1_SHEEP   KQLQDQEIRAELNKHFGHPSQAVFDDKADRTSELRDSDFSNEQFSKLPMF
+        Q3Y595_FUGRU   KLLRDNQIRAELNKHFGHPLQALYSQXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   KQLQDQEIRAELNKHFGHPSQAVFDDEADKTSELRDSDFSNEQFSKLPMF
+        Q4SAM8_TETNG   KLLRDNEIRAELNKHFGPPLQALYSQGREPVGEGEDSYYPH----RLPSY
+          Q811R2_RAT   MQLRDHEIRASLTKHFG---------------------------------
+        Q8TAL0_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q8TDE4_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q8VHJ7_MOUSE   MQLRDHEIRASLTKHFG---------------------------------
+        Q86YN5_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q8N1N9_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q86YN3_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q8TDE5_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q86YN6_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q86YN4_HUMAN   TLLRDHEIRASLTKHFG---------------------------------
+        Q8C1C0_MOUSE   MQLRDHEIRASLTKHFG---------------------------------
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   --------------------------------------------------
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  INSGLAMDGLFDDSEDESDKLSYPWDGTQSYSLFNVSPSCSSFNSPCRDS
+         PRGC1_HUMAN   INSGLAMDGLFDDSEDESDKLSYPWDGTQSYSLFNVSPSCSSFNSPCRDS
+        Q4W5M7_HUMAN   INSGLAMDGLFDDSEDESDKLSYPWDGTQSYSLFNVSPSCSSFNSPCRDS
+        Q5RBY0_PONPY   INSGLAMDGLFDDSEDESDKLSYPWDGTQSYSLFNVSPSCSSFNSPCRDS
+           PRGC1_PIG   INSGLAMDGLFDDSEDESDKLNSPWDGTQSYSLFDVSPSCSSFNSPCRDS
+           PRGC1_RAT   INSGLAMDGLFDDSEDENDKLSYPWDGTQSYSLFDVSPSCSSFNSPCRDS
+         PRGC1_BOVIN   INSGLAMDGLFDDSEDESDKLNSPWDGTQSYSLFDVSPSCSSFNSPCRDS
+        Q4L229_BOVIN   INSGLAMDGLFDDSEDESDKLNSPWDGTQSYSLFDVSPSCSSFNSPCRDS
+         PRGC1_MOUSE   INSGLAMDGLFDDSEDESDKLSYPWDGTQPYSLFDVSPSCSSFNSPCRDS
+        Q3UP72_MOUSE   INSGLAMDGLFDDSEDESDKLSYPWDGTQPYSLFDVSPSCSSFNSPCRDS
+        Q60GU0_CHICK   INSGLAMDGLFDDSEDESDKLCYPWDGTQSYSLFDVSPSCSSFNSPCRDS
+          Q5QHW4_PIG   INSGLAMDGLFDDSEDESDKLNSPWDGTQSYSLFDVSPSCSSFNSPCRDS
+        Q4R5X5_MACFA   INSGLAMDGLFDDSEDESDKLSYPWDGTQSYSLFNVSPSCSSFNSPCRDS
+        Q58FA1_SHEEP   INSGLAMDGLFDDSEDESDKLNSPWDGTQSYSLFDVSPSCSSFNSPCRDS
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   INSGLAMDXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   LHPGLPFHEELELAQDREGRSLYPWEGTPLDLLFDRSPSCPSSCSPSRGS
+          Q811R2_RAT   --------------------------------------------------
+        Q8TAL0_HUMAN   --------------------------------------------------
+        Q8TDE4_HUMAN   --------------------------------------------------
+        Q8VHJ7_MOUSE   --------------------------------------------------
+        Q86YN5_HUMAN   --------------------------------------------------
+        Q8N1N9_HUMAN   --------------------------------------------------
+        Q86YN3_HUMAN   --------------------------------------------------
+        Q8TDE5_HUMAN   --------------------------------------------------
+        Q86YN6_HUMAN   --------------------------------------------------
+        Q86YN4_HUMAN   --------------------------------------------------
+        Q8C1C0_MOUSE   --------------------------------------------------
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   --------------------------------------------------
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  VSPPKSLFSQRPQRMRSRSRSFSRHRSCSRSPYSRSRSRSPGSRSSSRSC
+         PRGC1_HUMAN   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4W5M7_HUMAN   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5RBY0_PONPY   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_PIG   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_RAT   VSPPKSLF------------------------------------------
+         PRGC1_BOVIN   VSPPKSLF------------------------------------------
+        Q4L229_BOVIN   VSPPKSLF------------------------------------------
+         PRGC1_MOUSE   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3UP72_MOUSE   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q60GU0_CHICK   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW4_PIG   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   VSPPKSLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   I-------------------------------------------------
+          Q811R2_RAT   --------------------------------------------------
+        Q8TAL0_HUMAN   --------------------------------------------------
+        Q8TDE4_HUMAN   --------------------------------------------------
+        Q8VHJ7_MOUSE   --------------------------------------------------
+        Q86YN5_HUMAN   --------------------------------------------------
+        Q8N1N9_HUMAN   --------------------------------------------------
+        Q86YN3_HUMAN   --------------------------------------------------
+        Q8TDE5_HUMAN   --------------------------------------------------
+        Q86YN6_HUMAN   --------------------------------------------------
+        Q86YN4_HUMAN   --------------------------------------------------
+        Q8C1C0_MOUSE   --------------------------------------------------
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   --------------------------------------------------
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  YYYESSHYRHRTHRNSPLYVRSRSRSPYSRRPRYDSYEEYQHERLKREEY
+         PRGC1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4W5M7_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5RBY0_PONPY   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_RAT   --------------------------------------------------
+         PRGC1_BOVIN   -------------------------------------EEYQHERLKREEY
+        Q4L229_BOVIN   -------------------------------------EEYQHERLKREEY
+         PRGC1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3UP72_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q60GU0_CHICK   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   --------------------------------------------------
+          Q811R2_RAT   --------------------------------------------------
+        Q8TAL0_HUMAN   --------------------------------------------------
+        Q8TDE4_HUMAN   --------------------------------------------------
+        Q8VHJ7_MOUSE   --------------------------------------------------
+        Q86YN5_HUMAN   --------------------------------------------------
+        Q8N1N9_HUMAN   --------------------------------------------------
+        Q86YN3_HUMAN   --------------------------------------------------
+        Q8TDE5_HUMAN   --------------------------------------------------
+        Q86YN6_HUMAN   --------------------------------------------------
+        Q86YN4_HUMAN   --------------------------------------------------
+        Q8C1C0_MOUSE   --------------------------------------------------
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   --------------------------------------------------
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+               QUERY  RREYEKRESERAKQRERQRQKAIEERRVIYVGKIRPDTTRTELRDRFEVF
+         PRGC1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4W5M7_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5RBY0_PONPY   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_RAT   ---------------------------VIYVGKIRPDTTRTELRDRFEVF
+         PRGC1_BOVIN   RREYEKRESERAKQRERQRQKAIEERRVIYVGKIRPDTTRTELRDRFEVF
+        Q4L229_BOVIN   RREYEKRESERAKQRERQRQKAIEERRVIYVGKIRPDTTRTELRDRFEVF
+         PRGC1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3UP72_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q60GU0_CHICK   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   ---------------------------VVYVGRLRSDCTRTELKRRFEVF
+          Q811R2_RAT   ---------------------------VVYIRNLSGDMSSRELKKRFEVF
+        Q8TAL0_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q8TDE4_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q8VHJ7_MOUSE   ---------------------------VVYIRNLSSDMSSRELKKRFEVF
+        Q86YN5_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q8N1N9_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q86YN3_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q8TDE5_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q86YN6_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q86YN4_HUMAN   ---------------------------VVYIQNLSSDMSSRELKRRFEVF
+        Q8C1C0_MOUSE   ---------------------------VVYIRNLSSDMSSRELKKRFEVF
+        Q76N31_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q5VV66_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q5VV67_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q6P3U5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q9BZE5_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q91YW8_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q6P3W1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q6NZN1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q80TW6_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+          Q5QHW3_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXELRDRFEVF
+        Q4RQH2_TETNG   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVYVGRIRGTMTQKELGERFSLF
+        Q9BUJ3_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXVVFIGKIPGRMTRSELKQRFSVF
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   ---------------------------VIYIHNLPSSVTQTMLRKRFEAF
+        Q5U183_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q6NPA7_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q8IPM1_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q8IH74_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q8IPM0_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q9VN24_DROME   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRIEQETTKEILRRKFLPY
+        Q7QA91_ANOGA   XXXXXXXXXXXXXXXXXXXXXXXXXXXIVYVGRLESTTRKEDLQQKFQPY
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXYIKHLPDDITDDRLREIFEPF
+
+               QUERY  GEIEECTVNLRDDGDSYGFITYRYTCDAFAALENGYTLRRSNETDFELYF
+         PRGC1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4W5M7_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5RBY0_PONPY   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_RAT   GEIEECTVNLRDDGDSYGFITYRYTCDAFAALENGYTLRRSNETDFELYF
+         PRGC1_BOVIN   GEIEECTVNLRDDGDSYGFITYRYTCDAFAALENGYTLRRSNETDFELYF
+        Q4L229_BOVIN   GEIEECTVNLRDDGDSYGFITYRYTCDAFAALENGYTLRRSNETDFELYF
+         PRGC1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3UP72_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q60GU0_CHICK   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   GEIEECAVNLRDDGDNFGFITYRYTCDAFAALENGHTLRRSDEPQFELCF
+          Q811R2_RAT   GEIVECQVLRRKRGQKHGFITFRCSEHAALSVRNGATLRKRNEPSFHLSY
+        Q8TAL0_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q8TDE4_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q8VHJ7_MOUSE   GEIVECQVLTRKRGQKHGFITFRCSEHAALSVRNGATLRKRNEPSFHLSY
+        Q86YN5_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q8N1N9_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q86YN3_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q8TDE5_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q86YN6_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q86YN4_HUMAN   GEIEECEVLTRNRGEKYGFITYRCSEHAALSLTKGAALRKRNEPSFQLSY
+        Q8C1C0_MOUSE   GEIVECQVLTRKRGQKHGFITFRCSEHAALSVRNGATLRKRNEPSFHLSY
+        Q76N31_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q5VV66_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q5VV67_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q6P3U5_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q9BZE5_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q91YW8_MOUSE   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q6P3W1_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q6NZN1_MOUSE   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q80TW6_MOUSE   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+          Q5QHW3_PIG   GEIEECTVNLRDDGDSYGFITYRYTCDAFAALENGYTLRRSNETDFELYF
+        Q4RQH2_TETNG   GEIEDCTLHFRNHGDNYGFVTYYDTKDAFAAIENGSKLRKPDELPFDLCF
+        Q9BUJ3_HUMAN   GEIEECTIHFRVQGDNYGFVTYRYAEEAFAAIESGHKLRQADEQPFDLCF
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   GHPEDCKVVIKKE-ERCGVITLRHTQ-------NGQTSRHRWDLLGPSGG
+        Q5U183_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q6NPA7_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q8IPM1_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q8IH74_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q8IPM0_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q9VN24_DROME   GSIKQITIHYKENGMKYGFVTYERAQDAFTAIDTSH--RDSQISMYDISF
+        Q7QA91_ANOGA   GKIVKITLHMKANGSRYGFVTFEKPQHAYDAIDARGT--DPNLRNYDVSF
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   GKITSCAIMKEPNGKGFAFVCFEDKQHASAALRNGHPLEHSAKPLYXXXX
+
+               QUERY  CGRKQFFKSNYADLDSNSDDFDPASTKSKYDSLDFDSLLKEAQRSLRR
+         PRGC1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4W5M7_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q5RBY0_PONPY   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+           PRGC1_RAT   CGRKQFFKSNYADLDSNSDDFDPASTKSKYDSLDFDSLLKEAQRSLRR
+         PRGC1_BOVIN   CGRKQFFKSNYADLDSNSDDFDPACIKSKYDSLDFDSLLKEAQRSLRR
+        Q4L229_BOVIN   CGRKQFFKSNYADLDSNSDDFDPACIKSKYDSLDFDSLLKEAQRSLRR
+         PRGC1_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3UP72_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q60GU0_CHICK   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW4_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4R5X5_MACFA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q58FA1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3Y595_FUGRU   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG1_HUMAN   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG2_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6R2I4_SPETR   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4SAM8_TETNG   GGQKQFCKSHYADLDSHSDDFDPASTKSKYGSLDFDSLLREAQRSLRX
+          Q811R2_RAT   GGLRHFRWPRYTDYDPTSEESLPSSGKSKYEAMDFDSLLKEAQQSLHX
+        Q8TAL0_HUMAN   GGLRHLGWPRYTDYXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE4_HUMAN   GGLRHFCWPRYTDYDSNSEEALPASGKSKYEAMDFDSLLKEAQQSLHX
+        Q8VHJ7_MOUSE   GGLRHFRWPRYTDYDPTSEESLPSSGKSKYEAMDFDSLLKEAQQSLHX
+        Q86YN5_HUMAN   GGLRHFCWPRYTDYXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8N1N9_HUMAN   GGLRHFCWPRYTDYDSNSEEALPASGKSKYEAMDFDSLLKEAQQSLHX
+        Q86YN3_HUMAN   GGLRHFCWPRYTDYXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8TDE5_HUMAN   GGLRHFCWPRYTDYDSNSEEALPASGKSKYEAMDFDSLLKEAQQSLHX
+        Q86YN6_HUMAN   GGLRHFCWPRYTDYDSNSEEALPASGKSKYEAMDFDSLLKEAQQSLHX
+        Q86YN4_HUMAN   GGLRHFCWPRYTDYDSNSEEALPASGKSKYEAMDFDSLLKEAQQSLHX
+        Q8C1C0_MOUSE   GGLRHFRWPRYTDYDPTSEESLPSSGKSKYEAMDFDSLLKEAQQSLHX
+        Q76N31_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q5VV66_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q5VV67_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q6P3U5_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q9BZE5_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q91YW8_MOUSE   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q6P3W1_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q6NZN1_MOUSE   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+        Q80TW6_MOUSE   GGRRQFCKRSYSDLDSNREDFDPAPVKSKFDSLDFDTLLKQAQKNLRR
+          Q5QHW3_PIG   CGRKQFFKSNYADLDSNSDDFDPASTKSKYDSLDFDSLLKEAQRSLRX
+        Q4RQH2_TETNG   GGRRQFCQTSYADLDS-STEYEPFPAKGKFHALDFDTLLKQAQQNLKR
+        Q9BUJ3_HUMAN   GGRRQFCKRSYSDLDSNREDFDPAPVRAXXXXXXXXXXXXXXXXXXXX
+        Q6VAC1_SHEEP   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H3_BRARE   NGSRRFGRKRYIDLDEAG----PGPVKSKYDALDFDALLKEAQRSLHR
+        Q5U183_DROME   GGRRAFCRSSYADLDNAGXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q6NPA7_DROME   GGRRAFCRSSYADLDNAGXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM1_DROME   GGRRAFCRSSYADLDNAGXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IH74_DROME   GGRRAFCRSSYADLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q8IPM0_DROME   GGRRAFCRSSYADLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q9VN24_DROME   GGRRAFCRSSYADLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q7QA91_ANOGA   GGRRAFCRTQYADLXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          Q5QHW5_PIG   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q52MY8_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q800H2_BRARE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q3LIG0_MOUSE   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+        Q4QA88_LEIMA   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.selex
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.selex	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.selex	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,11 @@
+AK1H_ECOLI/114-431    CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAESTRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQSDCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF------FAALARANINIVAIA
+AKH_HAEIN/114-431     -----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQLLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNGSDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLPTLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESLQ----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQAKGIAARF------FSALAQANISIVAIA
+AKH1_MAIZE/117-440    -----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREVLVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDGSDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILSTLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENGDLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMASTPGVSATL------FDALAKANINVRAIA
+AK2H_ECOLI/112-431    -----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAREFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNGSDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLPLLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARIVTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSEVADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQPVEFTWQSDDGISLVAVL
+AK1_BACSU/66-374      -----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAGFLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGGSDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLPVVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDVFERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIVYTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI------VSALSEKEIPILQSA
+AK2_BACST/63-370      -----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAGITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGGSDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIKEISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHLIVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA--------------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEGADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS
+AK2_BACSU/63-373      -----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAGIRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGGSDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLEGISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNLIVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ--------------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEFEKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS
+AKAB_CORFL/63-379     -----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAGVLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGGSDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLEKLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEAVLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV--------------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNWTNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS
+AKAB_MYCSM/63-379     -----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAGVITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGGSDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLDTVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDAILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI--------------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGFSQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS
+AK3_ECOLI/106-407     -----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRKVMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGGSDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRIDEIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRALAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVALTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG----------NDLSKACGVGKEVF
+AK_YEAST/134-472      -----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPSDFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGYTDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLDSVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGESTPPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTILDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVGKHMKQYIGIAG---TMFTTLAEEGINIEMIS

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln.stockholm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln.stockholm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln.stockholm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,360 @@
+# STOCKHOLM 1.0
+#=GF ID   14-3-3
+#=GF AC   PF00244
+#=GF DE   14-3-3 proteins
+#=GF AU   Finn RD
+#=GF AL   Clustalw
+#=GF SE   Prosite
+#=GF GA   25 25
+#=GF TC   35.40 35.40
+#=GF NC   8.80 8.80
+#=GF BM   hmmbuild -f HMM SEED
+#=GF BM   hmmcalibrate --seed 0 HMM
+#=GF RN   [1]
+#=GF RM   95327195
+#=GF RT   Structure of a 14-3-3 protein and implications for
+#=GF RT   coordination of multiple signalling pathways. 
+#=GF RA   Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken
+#=GF RA   A, Gamblin SJ; 
+#=GF RL   Nature 1995;376:188-191.
+#=GF RN   [2]
+#=GF RM   95327196
+#=GF RT   Crystal structure of the zeta isoform of the 14-3-3
+#=GF RT   protein. 
+#=GF RA   Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington
+#=GF RA   R; 
+#=GF RL   Nature 1995;376:191-194.
+#=GF RN   [3]
+#=GF RM   96182649
+#=GF RT   Interaction of 14-3-3 with signaling proteins is mediated
+#=GF RT   by the recognition of phosphoserine. 
+#=GF RA   Muslin AJ, Tanner JW, Allen PM, Shaw AS; 
+#=GF RL   Cell 1996;84:889-897.
+#=GF RN   [4]
+#=GF RM   97424374
+#=GF RT   The 14-3-3 protein binds its target proteins with a common
+#=GF RT   site located towards the C-terminus. 
+#=GF RA   Ichimura T, Ito M, Itagaki C, Takahashi M, Horigome T,
+#=GF RA   Omata S, Ohno S, Isobe T 
+#=GF RL   FEBS Lett 1997;413:273-276.
+#=GF RN   [5]
+#=GF RM   96394689
+#=GF RT   Molecular evolution of the 14-3-3 protein family. 
+#=GF RA   Wang W, Shakes DC 
+#=GF RL   J Mol Evol 1996;43:384-398.
+#=GF RN   [6]
+#=GF RM   96300316
+#=GF RT   Function of 14-3-3 proteins. 
+#=GF RA   Jin DY, Lyu MS, Kozak CA, Jeang KT 
+#=GF RL   Nature 1996;382:308-308.
+#=GF DR   PROSITE; PDOC00633;
+#=GF DR   SMART; 14_3_3;
+#=GF DR   PRINTS; PR00305;
+#=GF DR   SCOP; 1a4o; fa;
+#=GF DR   INTERPRO; IPR000308;
+#=GF SQ   16
+#=GS 1433_LYCES/9-246  AC P93209
+#=GS 1431_ENTHI/4-239  AC P42648
+#=GS 1432_ENTHI/4-238  AC P42649
+#=GS 1434_LYCES/6-243  AC P42652
+#=GS 143B_VICFA/7-242  AC P42654
+#=GS 1433_CAEEL/5-237  AC P41932
+#=GS 143Z_DROME/6-239  AC P29310
+#=GS 1433_XENLA/1-227  AC P29309
+#=GS 143E_HUMAN/4-239  AC P42655
+#=GS 143F_MOUSE/3-240  AC P11576
+#=GS 143R_ARATH/7-245  AC P42647
+#=GS 143S_HUMAN/3-238  AC P31947
+#=GS 143T_HUMAN/3-236  AC P27348
+#=GS BMH1_YEAST/4-240  AC P29311
+#=GS RA24_SCHPO/6-241  AC P42656
+#=GS RA25_SCHPO/5-240  AC P42657
+1433_LYCES/9-246             REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG
+1434_LYCES/6-243             REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV
+143R_ARATH/7-245             RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQK
+EESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMA
+PTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDYAGAD
+143B_VICFA/7-242             RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQK
+EESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELP
+PTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG
+143E_HUMAN/4-239             REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELP
+PTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG
+BMH1_YEAST/4-240             REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELP
+PTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG
+RA24_SCHPO/6-241             REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA
+RA25_SCHPO/5-240             RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQK
+EESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELP
+PTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ
+1431_ENTHI/4-239             REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQK
+EQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELA
+PTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE
+1432_ENTHI/4-238             REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQK
+EQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLV
+PTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE
+1433_CAEEL/5-237             VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQ
+PTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED
+143Z_DROME/6-239             KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQ
+PTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE
+1433_XENLA/1-227             .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQ
+PTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE
+143T_HUMAN/3-236             KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQK
+TDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQ
+PTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE
+143F_MOUSE/3-240             REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQK
+TMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQ
+PTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE
+143S_HUMAN/3-238             RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQK
+SNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMP
+PTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE
+//
+# STOCKHOLM 1.0
+#=GF ID   14-3-3
+#=GF AC   PF00244
+#=GF DE   14-3-3 proteins
+#=GF AU   Finn RD
+#=GF AL   Clustalw
+#=GF SE   Prosite
+#=GF GA   25 25
+#=GF TC   35.40 35.40
+#=GF NC   8.80 8.80
+#=GF BM   hmmbuild -f HMM SEED
+#=GF BM   hmmcalibrate --seed 0 HMM
+#=GF RN   [1]
+#=GF RM   95327195
+#=GF RT   Structure of a 14-3-3 protein and implications for
+#=GF RT   coordination of multiple signalling pathways. 
+#=GF RA   Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken
+#=GF RA   A, Gamblin SJ; 
+#=GF RL   Nature 1995;376:188-191.
+#=GF RN   [2]
+#=GF RM   95327196
+#=GF RT   Crystal structure of the zeta isoform of the 14-3-3
+#=GF RT   protein. 
+#=GF RA   Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington
+#=GF RA   R; 
+#=GF RL   Nature 1995;376:191-194.
+#=GF RN   [3]
+#=GF RM   96182649
+#=GF RT   Interaction of 14-3-3 with signaling proteins is mediated
+#=GF RT   by the recognition of phosphoserine. 
+#=GF RA   Muslin AJ, Tanner JW, Allen PM, Shaw AS; 
+#=GF RL   Cell 1996;84:889-897.
+#=GF RN   [4]
+#=GF RM   97424374
+#=GF RT   The 14-3-3 protein binds its target proteins with a common
+#=GF RT   site located towards the C-terminus. 
+#=GF RA   Ichimura T, Ito M, Itagaki C, Takahashi M, Horigome T,
+#=GF RA   Omata S, Ohno S, Isobe T 
+#=GF RL   FEBS Lett 1997;413:273-276.
+#=GF RN   [5]
+#=GF RM   96394689
+#=GF RT   Molecular evolution of the 14-3-3 protein family. 
+#=GF RA   Wang W, Shakes DC 
+#=GF RL   J Mol Evol 1996;43:384-398.
+#=GF RN   [6]
+#=GF RM   96300316
+#=GF RT   Function of 14-3-3 proteins. 
+#=GF RA   Jin DY, Lyu MS, Kozak CA, Jeang KT 
+#=GF RL   Nature 1996;382:308-308.
+#=GF DR   PROSITE; PDOC00633;
+#=GF DR   SMART; 14_3_3;
+#=GF DR   PRINTS; PR00305;
+#=GF DR   SCOP; 1a4o; fa;
+#=GF DR   INTERPRO; IPR000308;
+#=GF SQ   16
+#=GS 1433_LYCES/9-246  AC P93209
+#=GS 1431_ENTHI/4-239  AC P42648
+#=GS 1432_ENTHI/4-238  AC P42649
+#=GS 1434_LYCES/6-243  AC P42652
+#=GS 143B_VICFA/7-242  AC P42654
+#=GS 1433_CAEEL/5-237  AC P41932
+#=GS 143Z_DROME/6-239  AC P29310
+#=GS 1433_XENLA/1-227  AC P29309
+#=GS 143E_HUMAN/4-239  AC P42655
+#=GS 143F_MOUSE/3-240  AC P11576
+#=GS 143R_ARATH/7-245  AC P42647
+#=GS 143S_HUMAN/3-238  AC P31947
+#=GS 143T_HUMAN/3-236  AC P27348
+#=GS BMH1_YEAST/4-240  AC P29311
+#=GS RA24_SCHPO/6-241  AC P42656
+#=GS RA25_SCHPO/5-240  AC P42657
+1433_LYCES/9-246             REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG
+1434_LYCES/6-243             REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV
+143R_ARATH/7-245             RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQK
+EESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMA
+PTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDYAGAD
+143B_VICFA/7-242             RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQK
+EESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELP
+PTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG
+143E_HUMAN/4-239             REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELP
+PTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG
+BMH1_YEAST/4-240             REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELP
+PTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG
+RA24_SCHPO/6-241             REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA
+RA25_SCHPO/5-240             RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQK
+EESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELP
+PTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ
+1431_ENTHI/4-239             REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQK
+EQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELA
+PTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE
+1432_ENTHI/4-238             REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQK
+EQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLV
+PTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE
+1433_CAEEL/5-237             VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQ
+PTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED
+143Z_DROME/6-239             KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQ
+PTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE
+1433_XENLA/1-227             .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQ
+PTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE
+143T_HUMAN/3-236             KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQK
+TDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQ
+PTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE
+143F_MOUSE/3-240             REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQK
+TMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQ
+PTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE
+143S_HUMAN/3-238             RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQK
+SNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMP
+PTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE
+//
+# STOCKHOLM 1.0
+#=GF ID   14-3-3
+#=GF AC   PF00244
+#=GF DE   14-3-3 proteins
+#=GF AU   Finn RD
+#=GF AL   Clustalw
+#=GF SE   Prosite
+#=GF GA   25 25
+#=GF TC   35.40 35.40
+#=GF NC   8.80 8.80
+#=GF BM   hmmbuild -f HMM SEED
+#=GF BM   hmmcalibrate --seed 0 HMM
+#=GF RN   [1]
+#=GF RM   95327195
+#=GF RT   Structure of a 14-3-3 protein and implications for
+#=GF RT   coordination of multiple signalling pathways. 
+#=GF RA   Xiao B, Smerdon SJ, Jones DH, Dodson GG, Soneji Y, Aitken
+#=GF RA   A, Gamblin SJ; 
+#=GF RL   Nature 1995;376:188-191.
+#=GF RN   [2]
+#=GF RM   95327196
+#=GF RT   Crystal structure of the zeta isoform of the 14-3-3
+#=GF RT   protein. 
+#=GF RA   Liu D, Bienkowska J, Petosa C, Collier RJ, Fu H, Liddington
+#=GF RA   R; 
+#=GF RL   Nature 1995;376:191-194.
+#=GF RN   [3]
+#=GF RM   96182649
+#=GF RT   Interaction of 14-3-3 with signaling proteins is mediated
+#=GF RT   by the recognition of phosphoserine. 
+#=GF RA   Muslin AJ, Tanner JW, Allen PM, Shaw AS; 
+#=GF RL   Cell 1996;84:889-897.
+#=GF RN   [4]
+#=GF RM   97424374
+#=GF RT   The 14-3-3 protein binds its target proteins with a common
+#=GF RT   site located towards the C-terminus. 
+#=GF RA   Ichimura T, Ito M, Itagaki C, Takahashi M, Horigome T,
+#=GF RA   Omata S, Ohno S, Isobe T 
+#=GF RL   FEBS Lett 1997;413:273-276.
+#=GF RN   [5]
+#=GF RM   96394689
+#=GF RT   Molecular evolution of the 14-3-3 protein family. 
+#=GF RA   Wang W, Shakes DC 
+#=GF RL   J Mol Evol 1996;43:384-398.
+#=GF RN   [6]
+#=GF RM   96300316
+#=GF RT   Function of 14-3-3 proteins. 
+#=GF RA   Jin DY, Lyu MS, Kozak CA, Jeang KT 
+#=GF RL   Nature 1996;382:308-308.
+#=GF DR   PROSITE; PDOC00633;
+#=GF DR   SMART; 14_3_3;
+#=GF DR   PRINTS; PR00305;
+#=GF DR   SCOP; 1a4o; fa;
+#=GF DR   INTERPRO; IPR000308;
+#=GF SQ   16
+#=GS 1433_LYCES/9-246  AC P93209
+#=GS 1431_ENTHI/4-239  AC P42648
+#=GS 1432_ENTHI/4-238  AC P42649
+#=GS 1434_LYCES/6-243  AC P42652
+#=GS 143B_VICFA/7-242  AC P42654
+#=GS 1433_CAEEL/5-237  AC P41932
+#=GS 143Z_DROME/6-239  AC P29310
+#=GS 1433_XENLA/1-227  AC P29309
+#=GS 143E_HUMAN/4-239  AC P42655
+#=GS 143F_MOUSE/3-240  AC P11576
+#=GS 143R_ARATH/7-245  AC P42647
+#=GS 143S_HUMAN/3-238  AC P31947
+#=GS 143T_HUMAN/3-236  AC P27348
+#=GS BMH1_YEAST/4-240  AC P29311
+#=GS RA24_SCHPO/6-241  AC P42656
+#=GS RA25_SCHPO/5-240  AC P42657
+1433_LYCES/9-246             REENVYMAKLADRAESDEEMVEFMEKVSNSLGS.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEEHVNSIREYRSKIENELSKICDGILKLLDSKLIPSA..TSGDSKVFYLKMKGDYHRYLAEFKTGAERKEAAESTLTAYKAAQDIASAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDMQDDG
+1434_LYCES/6-243             REENVYLAKLAEQAERYEEMIEFMEKVAKTADV.EELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EESRG.NEDHVNTIKEYRSKIEADLSKICDGILSLLESNLIPSA..STAESKVFHLKMKGDYHRYLAEFKTGTERKEAAENTLLAYKSAQDIALAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACNLAKQAFDEAISELDTLGEESYKDSTLIMQLLRDNLTLWTSDNADDV
+143R_ARATH/7-245             RDQYVYMAKLAEQAERYEEMVQFMEQLVTGATPAEELTVEERNLLSVAYKNVIGSLRAAWRIVSSIEQK
+EESRK.NDEHVSLVKDYRSKVESELSSVCSGILKLLDSHLIPSA..GASESKVFYLKMKGDYHRYMAEFKSGDERKTAAEDTMLAYKAAQDIAAADMA
+PTHPIRLGLALNFSVFYYEILNSSDKACNMAKQAFEEAIAELDTLGEESYKDSTLIMQLLRDNLTLWTSDYAGAD
+143B_VICFA/7-242             RENFVYIAKLAEQAERYEEMVDSMKNVANLDV...ELTIEERNLLSVGYKNVIGARRASWRILSSIEQK
+EESKG.NDVNAKRIKEYRHKVETELSNICIDVMRVIDEHLIPSA..AAGESTVFYYKMKGDYYRYLAEFKTGNEKKEAGDQSMKAYESATTAAEAELP
+PTHPIRLGLALNFSVFYYEILNSPERACHLAKQAFDEAISELDTLNEESYKDSTLIMQLLRDNLTLWTSDIPEDG
+143E_HUMAN/4-239             REDLVYQAKLAEQAERYDEMVESMKKVAGMDV...ELTVEERNLLSVAYKNVIGARRASWRIISSIEQK
+EENKG.GEDKLKMIREYRQMVETELKLICCDILDVLDKHLIPAA..NTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAYKAASDIAMTELP
+PTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMQGDG
+BMH1_YEAST/4-240             REDSVYLAKLAEQAERYEEMVENMKTVASSGQ...ELSVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSA..TTGESKVFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELP
+PTHPIRLGLALNFSVFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSESG
+RA24_SCHPO/6-241             REDAVYLAKLAEQAERYEGMVENMKSVASTDQ...ELTVEERNLLSVAYKNVIGARRASWRIVSSIEQK
+EESKG.NTAQVELIKEYRQKIEQELDTICQDILTVLEKHLIPNA..ASAESKVFYYKMKGDYYRYLAEFAVGEKRQHSADQSLEGYKAASEIATAELA
+PTHPIRLGLALNFSVFYYEILNSPDRACYLAKQAFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYSA
+RA25_SCHPO/5-240             RENSVYLAKLAEQAERYEEMVENMKKVACSND...KLSVEERNLLSVAYKNIIGARRASWRIISSIEQK
+EESRG.NTRQAALIKEYRKKIEDELSDICHDVLSVLEKHLIPAA..TTGESKVFYYKMKGDYYRYLAEFTVGEVCKEAADSSLEAYKAASDIAVAELP
+PTDPMRLGLALNFSVFYYEILDSPESACHLAKQVFDEAISELDSLSEESYKDSTLIMQLLRDNLTLWTSDAEYNQ
+1431_ENTHI/4-239             REDCVYTAKLAEQSERYDEMVQCMKQVAEMEA...ELSIEERNLLSVAYKNVIGAKRASWRIISSLEQK
+EQAKG.NDKHVEIIKGYRAKIEKELSTCCDDVLKVIQENLLPKA..STSESKVFFKKMEGDYYRYFAEFTVDEKRKEVADKSLAAYTEATEISNAELA
+PTHPIRLGLALNFSVFYFEIMNDADKACQLAKQAFDDAIAKLDEVPENMYKDSTLIMQLLRDNLTLWTSDACDEE
+1432_ENTHI/4-238             REDLVYLSKLAEQSERYEEMVQYMKQVAEMGT...ELSVEERNLISVAYKNVVGSRRASWRIISSLEQK
+EQAKG.NTQRVELIKTYRAKIEQELSQKCDDVLKIITEFLLKNS..TSIESKVFFKKMEGDYYRYYAEFTVDEKRKEVADKSLAAYQEATDTA.ASLV
+PTHPIRLGLALNFSVFYYQIMNDADKACQLAKEAFDEAIQKLDEVPEESYKESTLIMQLLRDNLTLWTSDMGDDE
+1433_CAEEL/5-237             VEELVQRAKLAEQAERYDDMAAAMKKVTEQGQ...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...SEKKQQLAKEYRVKVEQELNDICQDVLKLLDEFLIVKA..GAAESKAFYLKMKGDYYRYLAEVAS.EDRAAVVEKSQKAYQEALDIAKDKMQ
+PTHPIRLGLALNFSVFYYEILNTPEHACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDVGAED
+143Z_DROME/6-239             KEELVQKAKLAEQSERYDDMAQAMKSVTETGV...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEA...SARKQQLAREYRERVEKELREICYEVLGLLDKYLIPKA..SNPESKVFYLKMKGDYYRYLAEVATGDARNTVVDDSQTAYQDAFDISKGKMQ
+PTHPIRLGLALNFSVFYYEILNSPDKACQLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDTQGDE
+1433_XENLA/1-227             .......AKLSEQAERYDDMAASMKAVTELGA...ELSNEERNLLSVAYKNVVGARRSSWRVISSIEQK
+TEG...NDKRQQMAREYREKVETELQDICKDVLDLLDRFLVPNA..TPPESKVFYLKMKGDYYRYLSEVASGDSKQETVASSQQAYQEAFEISKSEMQ
+PTHPIRLGLALNFSVFYYEILNSPEKACSLAKSAFDEAIRELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGEE
+143T_HUMAN/3-236             KTELIQKAKLAEQAERYDDMATCMKAVTEQGA...ELSNEERNLLSVAYKNVVGGRRSAWRVISSIEQK
+TDT...SDKKLQLIKDYREKVESELRSICTTVLELLDKYLIANA..TNPESKVFYLKMKGDYFRYLAEVACGDDRKQTIDNSQGAYQEAFDISKKEMQ
+PTHPIRLGLALNFSVFYYEILNNPELACTLAKTAFDEAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDSAGEE
+143F_MOUSE/3-240             REQLLQRARLAEQAERYDDMASAMKAVTELNE...PLSNEDRNLLSVAYKNVVGARRSSWRVISSIEQK
+TMADG.NEKKLEKVKAYREKIEKELETVCNDVLALLDKFLIKNCNDFQYESKVFYLKMKGDYYRYLAEVASGEKKNSVVEASEAAYKEAFEISKEHMQ
+PTHPIRLGLALNFSVFYYEIQNAPEQACLLAKQAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDEE
+143S_HUMAN/3-238             RASLIQKAKLAEQAERYEDMAAFMKGAVEKGE...ELSCEERNLLSVAYKNVVGGQRAAWRVLSSIEQK
+SNEEG.SEEKGPEVREYREKVETELQGVCDTVLGLLDSHLIKEA..GDAESRVFYLKMKGDYYRYLAEVATGDDKKRIIDSARSAYQEAMDISKKEMP
+PTNPIRLGLALNFSVFHYEIANSPEEAISLAKTTFDEAMADLHTLSEDSYKDSTLIMQLLRDNLTLWTADNAGEE
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testaln2.fasta
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testaln2.fasta	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testaln2.fasta	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,120 @@
+>AK1H_ECOLI/114-431 DESCRIPTION HERE
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+>AK1H_ECOLI_dup1 DESCRIPTION HERE
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+>AK1H_ECOLI_dup2 DESCRIPTION HERE
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIVAIA
+>AK1H_ECOLI_dup3_head_gap DESCRIPTION HERE
+---SINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIV---
+>AK1H_ECOLI_dup4_end_gap DESCRIPTION HERE
+CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE
+STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD
+CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH
+PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------
+NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS
+DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF---
+---FAALARANINIV---
+>AKH_HAEIN 114-431
+-----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQ
+LLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNG
+SDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLP
+TLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESL
+Q----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS
+---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQ
+AKGIAARF------FSALAQANISIVAIA
+>AKH1_MAIZE/117-440
+-----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREV
+LVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDG
+SDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILS
+TLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENG
+DLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS
+---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMAS
+TPGVSATL------FDALAKANINVRAIA
+>AK2H_ECOLI/112-431
+-----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAR
+EFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNG
+SDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLP
+LLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARI
+VTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSE
+VADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQ
+PVEFTWQSDDGISLVAVL
+>AK1_BACSU/66-374
+-----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAG
+FLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGG
+SDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLP
+VVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDV
+FERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIV
+YTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI
+------VSALSEKEIPILQSA
+>AK2_BACST/63-370
+-----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAG
+ITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGG
+SDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIK
+EISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHL
+IVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA----
+----------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEG
+ADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS
+>AK2_BACSU/63-373
+-----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAG
+IRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGG
+SDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLE
+GISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNL
+IVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ----
+----------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEF
+EKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS
+>AKAB_CORFL/63-379
+-----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAG
+VLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGG
+SDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLE
+KLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEA
+VLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV----
+----------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNW
+TNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS
+>AKAB_MYCSM/63-379
+-----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAG
+VITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGG
+SDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLD
+TVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDA
+ILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI----
+----------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGF
+SQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS
+>AK3_ECOLI/106-407
+-----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRK
+VMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGG
+SDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRID
+EIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRA
+LAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVA
+LTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG-------
+---NDLSKACGVGKEVF
+>AK_YEAST/134-472 A COMMENT FOR YEAST
+-----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPS
+DFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGY
+TDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLD
+SVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGEST
+PPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTI
+LDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVG
+KHMKQYIGIAG---TMFTTLAEEGINIEMIS

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testdat.exonerate
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testdat.exonerate	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testdat.exonerate	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,80 @@
+C4 Alignment display:
+  Model: est2genome
+  Raw score: 1615
+  Aligned positions 65->416 of query
+  Aligned positions 0->939 of target
+
+Query: ln27 
+Target: Contig124
+
+  66 : GTGATGTAGGAAAGAAGCCGGCTGGGACTGGCGCGGTGATATCGACGTGA : 115
+       |||||||||||||||| |||||||||||||||||||||||||||||||||
+   1 : GTGATGTAGGAAAGAAACCGGCTGGGACTGGCGCGGTGATATCGACGTGA :  50
+
+ 116 : TTGTTCATGCTGCCGCTAGGCTGGGAGTCTTCAAGGCGACAGGCATCGCT : 165
+       ||||||||||||||||||||||||||||||||||||||||||||||||||
+  51 : TTGTTCATGCTGCCGCTAGGCTGGGAGTCTTCAAGGCGACAGGCATCGCT : 100
+
+ 166 : GCAATAAGTGTCTCGTGGATTGTC  <<<< Intron 1 <<<<  CAAT : 193
+       ||||||||||||||||||||||||         53 bp        ||||
+ 101 : GCAATAAGTGTCTCGTGGATTGTCct..................acCAAT : 181
+
+ 194 : TGTTTAGAGCAAGCCAAACACCAAGCAGTATCCATACCCATATCAATCAT : 243
+       ||||||||||||||||||||||||||||||||||||||||||||||||||
+ 182 : TGTTTAGAGCAAGCCAAACACCAAGCAGTATCCATACCCATATCAATCAT : 231
+
+ 244 : AGTCATTTTGAGCGGTATTGTGTCGGGCCAAGAGTGATTGTAGATGTAGA : 293
+       ||||||||||||||||||||||||||||||||||||||||||||||||||
+ 232 : AGTCATTTTGAGCGGTATTGTGTCGGGCCAAGAGTGATTGTAGATGTAGA : 281
+
+ 294 : AGGGTAAAGAAAACGGGGCTGTTACAAGACAAAGGAGAAGAGAGTCGTCC : 343
+       ||||||||||||||||||||||||||||||||||||||||||||||||||
+ 282 : AGGGTAAAGAAAACGGGGCTGTTACAAGACAAAGGAGAAGAGAGTCGTCC : 331
+
+ 344 : GAGGGAGTTAGTGATGGGGAGAGTCGAAATGC  <<<< Intron 2 << : 375
+       ||||||||||||||||||||||||||||||||        535 bp    
+ 332 : GAGGGAGTTAGTGATGGGGAGAGTCGAAATGCct................ : 898
+
+ 376 : <<  CTACACCAGGCTGCAATATTCTCGCCTATAATTCTGCGCTT : 416
+           |||||||||||||||||||||||||||||||||||||||||
+ 899 : ..acCTACACCAGGCTGCAATATTCTCGCCTATAATTCTGCGCTT : 939
+
+cigar: ln27 65 416 - Contig124 0 939 + 1615 M 124 D 53 M 186 D 535 M 41
+C4 Alignment display:
+  Model: est2genome
+  Raw score: 1152
+  Aligned positions 385->644 of query
+  Aligned positions 900->1296 of target
+
+Query: ln74
+Target: Contig275
+
+  386 : CTTGGGGTCCTTCTCCGATTCACTGCTTCCACCGCTGCTGCCGC  <<<< :  429
+        ||||||||||||||||||||||||||||||||||||||||||||      
+  901 : CTTGGGGTCCTTCTCCGATTCACTGCTTCCACCGCTGCTGCCGCct.... :  997
+
+  430 :  Intron 1 <<<<  ACTTTTGCAAAATGAAGACATTTTCCCTCTTACC :  463
+           53 bp        ||||||||||||||||||||||||||||||||||
+  998 : ..............acACTTTTGCAAAATGAAGACATTTTCCCTCTTACC : 1031
+
+  464 : ACCCCACTTTTCCAAGGCGTCCTTGACGCTCTTACTGTTTTTGTACATTT :  513
+        ||||||||||||||||||||||||||||||||||||||||||||||||||
+ 1032 : ACCCCACTTTTCCAAGGCGTCCTTGACGCTCTTACTGTTTTTGTACATTT : 1081
+
+  514 : GAGCACAATCAATGTAGGTGTACCCGGTCTTCAAGGCTGAGACGAGATGC :  563
+        ||||||||||||||||||||||||||||||||||||||||||||||||||
+ 1082 : GAGCACAATCAATGTAGGTGTACCCGGTCTTCAAGGCTGAGACGAGATGC : 1131
+
+  564 : TCAACGCATTCCTTGCCGTAGTGGGTGGTGC  <<<< Intron 2 <<< :  594
+        |||||||||||||||||||||||||||||||         84 bp     
+ 1132 : TCAACGCATTCCTTGCCGTAGTGGGTGGTGCct................. : 1246
+
+  595 : <  CAAGACCAAACCCAATTTTGGGGATCTTGACACCATCATTGAGAGTT :  641
+           |||||||||||||||||||||||||||||||||||||||||||||||
+ 1247 : .acCAAGACCAAACCCAATTTTGGGGATCTTGACACCATCATTGAGAGTT : 1293
+
+  642 : ATG :  644
+        |||
+ 1294 : ATG : 1296
+
+cigar: ln74 385 644 - Contig275 900 1296 + 1152 M 44 D 53 M 165 D 84 M 50

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testdbaccnums.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testdbaccnums.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testdbaccnums.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,443 @@
+BLASTP 2.2.4 [Aug-26-2002]
+
+
+Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, 
+Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
+"Gapped BLAST and PSI-BLAST: a new generation of protein database search
+programs",  Nucleic Acids Res. 25:3389-3402.
+RID: 1036160600-011802-21377
+Query= test_result
+         (8 letters)
+
+Database: All non-redundant GenBank CDS
+translations+PDB+SwissProt+PIR+PRF 
+           1,220,597 sequences; 388,937,031 total letters
+
+
+
+                                                                 Score    E
+Sequences producing significant alignments:                      (bits) Value
+
+gb|NP_065733.1|CYT19 (NM_020682) Cyt19 protein; likely ortholog of r...    31   0.18 
+emb|XP_053690.4|Cyt19 (XM_053690) similar to Cyt19 protein; likely or...    31   0.18 
+dbj|NP_056277.2|DKFZP586L0724 (NM_015462) DKFZP586L0724 protein [Homo sapiens...    21   113  
+pir||T14789 hypothetical protein DKFZp586L0724.1 - human >gi|581...    21   113  
+prf||XP_064862.2 (XM_064862) similar to pheromone receptor [Homo...    21   152  
+pdb|BAB13968.1|1 (AK022138) unnamed protein product [Homo sapiens]      21   204  
+sp|Q16478|GLK5_HUMAN Glutamate receptor, ionotropic kainate 5 pr...    21   204  
+pat|US|NP_002079.2 (NM_002088) glutamate receptor, ionotropic, kai...    21   204  
+bbs|NP_079463.2| (NM_025187) hypothetical protein FLJ12076 [Homo...    21   204  
+gnl|db1|NP_002444.1 (NM_002453) mitochondrial translational initiat...    21   204  
+ref|XP_051877.1| (XM_051877) similar to NS1-binding protein-like...    20   274  
+lcl|AAH16829.1| (BC016829) Unknown (protein for IMAGE:413...    20   274  
+gi|1|gb|NP_065733.1|CYT19 (NM_020682) Cyt19 protein; likely ortholog of r...    31   0.18 
+gi|2|emb|XP_053690.4|Cyt19 (XM_053690) similar to Cyt19 protein; likely or...    31   0.18 
+gi|3|dbj|NP_056277.2|DKFZP586L0724 (NM_015462) DKFZP586L0724 protein [Homo sapiens...    21   113  
+gi|4|pir||T14789 hypothetical protein DKFZp586L0724.1 - human >gi|581...    21   113  
+gi|5|prf||XP_064862.2 (XM_064862) similar to pheromone receptor [Homo...    21   152  
+gi|6|pdb|BAB13968.1|1 (AK022138) unnamed protein product [Homo sapiens]      21   204  
+gi|7|sp|Q16478|GLK5_HUMAN Glutamate receptor, ionotropic kainate 5 pr...    21   204  
+gi|8|pat|US|NP_002079.2 (NM_002088) glutamate receptor, ionotropic, kai...    21   204  
+gi|9|bbs|NP_079463.2| (NM_025187) hypothetical protein FLJ12076 [Homo...    21   204  
+gi|10|gnl|db1|NP_002444.1 (NM_002453) mitochondrial translational initiat...    21   204  
+gi|11|ref|XP_051877.1| (XM_051877) similar to NS1-binding protein-like...    20   274  
+gi|12|lcl|AAH16829.1| (BC016829) Unknown (protein for IMAGE:413...    20   274  
+MY_test_ID (BC016829) Unknown (protein for IMAGE:413...    20   274  
+
+ALIGNMENTS
+>pir||T14789 hypothetical protein DKFZp586L0724.1 - human
+ emb|CAB53709.1| (AL110271) hypothetical protein [Homo sapiens]
+ gb|AAH01726.1|AAH01726 (BC001726) Similar to DKFZP586L0724 protein [Homo sapiens]
+          Length = 314
+
+ Score = 21.4 bits (43), Expect =   113
+ Identities = 6/8 (75%), Positives = 7/8 (87%)
+
+Query: 1   LMFDANFT 8
+           L+ DANFT
+Sbjct: 247 LLLDANFT 254
+
+
+>gb|NP_065733.1|CYT19 (NM_020682) Cyt19 protein; likely ortholog of rat methyltransferase
+           Cyt19; S-adenosylmethionine:arsenic (III)
+           methyltransferase [Homo sapiens]
+ gb|AAG09731.1|AF226730_1 (AF226730) Cyt19 [Homo sapiens]
+          Length = 338
+
+ Score = 30.8 bits (65), Expect = 0.18
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1   LMFDANFT 8
+           LMFDANFT
+Sbjct: 286 LMFDANFT 293
+
+
+>emb|XP_053690.4|Cyt19 (XM_053690) similar to Cyt19 protein; likely ortholog of rat
+           methyltransferase Cyt19; S-adenosylmethionine:arsenic
+           (III) methyltransferase [Homo sapiens]
+          Length = 375
+
+ Score = 30.8 bits (65), Expect = 0.18
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1   LMFDANFT 8
+           LMFDANFT
+Sbjct: 286 LMFDANFT 293
+
+
+>dbj|NP_056277.2|DKFZP586L0724 (NM_015462) DKFZP586L0724 protein [Homo sapiens]
+ dbj|BAB14647.1| (AK023702) unnamed protein product [Homo sapiens]
+          Length = 719
+
+ Score = 21.4 bits (43), Expect =   113
+ Identities = 6/8 (75%), Positives = 7/8 (87%)
+
+Query: 1   LMFDANFT 8
+           L+ DANFT
+Sbjct: 652 LLLDANFT 659
+
+
+>prf||XP_064862.2 (XM_064862) similar to pheromone receptor [Homo sapiens]
+          Length = 370
+
+ Score = 21.0 bits (42), Expect =   152
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 2   MFDAN 6
+           MFDAN
+Sbjct: 348 MFDAN 352
+
+
+>pdb|BAB13968.1|1 (AK022138) unnamed protein product [Homo sapiens]
+          Length = 258
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1  LMFDA 5
+          LMFDA
+Sbjct: 45 LMFDA 49
+
+
+>sp|Q16478|GLK5_HUMAN Glutamate receptor, ionotropic kainate 5 precursor (Glutamate
+           receptor KA-2) (KA2) (Excitatory amino acid receptor 2)
+           (EAA2)
+ pir||I57936 glutamate receptor subunit - human
+ gb|AAB22591.1| (S40369) glutamate receptor subunit; EAA2; excitatory amino acid
+           receptor 2 [Homo sapiens]
+          Length = 980
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1   LMFDA 5
+           LMFDA
+Sbjct: 306 LMFDA 310
+
+
+>pat|US|NP_002079.2 (NM_002088) glutamate receptor, ionotropic, kainate 5 [Homo
+           sapiens]
+ emb|CAC80547.1| (AJ249209) kainate receptor subunit KA2a [Homo sapiens]
+          Length = 981
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1   LMFDA 5
+           LMFDA
+Sbjct: 306 LMFDA 310
+
+
+>bbs|NP_079463.2| (NM_025187) hypothetical protein FLJ12076 [Homo sapiens]
+ sp|Q9BSU1|U183_HUMAN UPF0183 protein
+ gb|AAH04556.1|AAH04556 (BC004556) Similar to CG7083 gene product [Homo sapiens]
+ gb|AAK96888.1| (AF176088) lin-10-like protein [Homo sapiens]
+          Length = 422
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1  LMFDA 5
+          LMFDA
+Sbjct: 67 LMFDA 71
+
+
+
+ Score = 16.8 bits (32), Expect =  2880
+ Identities = 4/5 (80%), Positives = 5/5 (100%)
+
+Query: 2   MFDAN 6
+           +FDAN
+Sbjct: 296 LFDAN 300
+
+
+>gnl|db1|NP_002444.1 (NM_002453) mitochondrial translational initiation factor 2
+           precursor; IF-2mt [Homo sapiens]
+ sp|P46199|IF2M_HUMAN Translation initiation factor IF-2, mitochondrial precursor
+           (IF-2Mt) (IF-2(Mt))
+ pir||A55628 translation initiation factor IF-2 precursor, mitochondrial - human
+ gb|AAA67038.1| (L34600) initiation factor 2 [Homo sapiens]
+ gb|AAM14617.1|AF494407_1 (AF494407) mitochondrial translation-initiation factor 2 [Homo
+           sapiens]
+ gb|AAM70196.1| (AF495546) translation initiation factor 2 [Homo sapiens]
+          Length = 727
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+
+Query: 1   LMFDAN 6
+           LMFD N
+Sbjct: 398 LMFDEN 403
+
+
+>ref|XP_051877.1| (XM_051877) similar to NS1-binding protein-like protein [Homo
+          sapiens]
+ dbj|BAA74873.1| (AB020657) KIAA0850 protein [Homo sapiens]
+ emb|CAB72329.1| (AL078644) bG279B7.1.1 (NS1-binding protein (KIAA0850, BTB/POZ
+          domain and Kelch motifs containing protein)) [Homo
+          sapiens]
+ gb|AAG43485.1| (AF205218) NS1-binding protein-like protein [Homo sapiens]
+          Length = 642
+
+ Score = 20.2 bits (40), Expect =   274
+ Identities = 6/8 (75%), Positives = 6/8 (75%), Gaps = 1/8 (12%)
+
+Query: 1  LMF-DANF 7
+          LMF D NF
+Sbjct: 7  LMFEDENF 14
+
+
+>lcl|AAH16829.1| (BC016829) Unknown (protein for IMAGE:4132295) [Homo sapiens]
+          Length = 391
+
+ Score = 20.2 bits (40), Expect =   274
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+
+Query: 3  FDANFT 8
+          FD NFT
+Sbjct: 32 FDTNFT 37
+
+>gi|1|gb|NP_065733.1|CYT19 (NM_020682) Cyt19 protein; likely ortholog of rat methyltransferase
+           Cyt19; S-adenosylmethionine:arsenic (III)
+           methyltransferase [Homo sapiens]
+ gb|AAG09731.1|AF226730_1 (AF226730) Cyt19 [Homo sapiens]
+          Length = 338
+
+ Score = 30.8 bits (65), Expect = 0.18
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1   LMFDANFT 8
+           LMFDANFT
+Sbjct: 286 LMFDANFT 293
+
+
+>gi|2|emb|XP_053690.4|Cyt19 (XM_053690) similar to Cyt19 protein; likely ortholog of rat
+           methyltransferase Cyt19; S-adenosylmethionine:arsenic
+           (III) methyltransferase [Homo sapiens]
+          Length = 375
+
+ Score = 30.8 bits (65), Expect = 0.18
+ Identities = 8/8 (100%), Positives = 8/8 (100%)
+
+Query: 1   LMFDANFT 8
+           LMFDANFT
+Sbjct: 286 LMFDANFT 293
+
+
+>gi|3|dbj|NP_056277.2|DKFZP586L0724 (NM_015462) DKFZP586L0724 protein [Homo sapiens]
+ dbj|BAB14647.1| (AK023702) unnamed protein product [Homo sapiens]
+          Length = 719
+
+ Score = 21.4 bits (43), Expect =   113
+ Identities = 6/8 (75%), Positives = 7/8 (87%)
+
+Query: 1   LMFDANFT 8
+           L+ DANFT
+Sbjct: 652 LLLDANFT 659
+
+
+>gi|4|pir||T14789 hypothetical protein DKFZp586L0724.1 - human
+ emb|CAB53709.1| (AL110271) hypothetical protein [Homo sapiens]
+ gb|AAH01726.1|AAH01726 (BC001726) Similar to DKFZP586L0724 protein [Homo sapiens]
+          Length = 314
+
+ Score = 21.4 bits (43), Expect =   113
+ Identities = 6/8 (75%), Positives = 7/8 (87%)
+
+Query: 1   LMFDANFT 8
+           L+ DANFT
+Sbjct: 247 LLLDANFT 254
+
+
+>gi|5|prf||XP_064862.2 (XM_064862) similar to pheromone receptor [Homo sapiens]
+          Length = 370
+
+ Score = 21.0 bits (42), Expect =   152
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 2   MFDAN 6
+           MFDAN
+Sbjct: 348 MFDAN 352
+
+
+>gi|6|pdb|BAB13968.1|1 (AK022138) unnamed protein product [Homo sapiens]
+          Length = 258
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1  LMFDA 5
+          LMFDA
+Sbjct: 45 LMFDA 49
+
+
+>gi|7|sp|Q16478|GLK5_HUMAN Glutamate receptor, ionotropic kainate 5 precursor (Glutamate
+           receptor KA-2) (KA2) (Excitatory amino acid receptor 2)
+           (EAA2)
+ pir||I57936 glutamate receptor subunit - human
+ gb|AAB22591.1| (S40369) glutamate receptor subunit; EAA2; excitatory amino acid
+           receptor 2 [Homo sapiens]
+          Length = 980
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1   LMFDA 5
+           LMFDA
+Sbjct: 306 LMFDA 310
+
+
+>gi|8|pat|US|NP_002079.2 (NM_002088) glutamate receptor, ionotropic, kainate 5 [Homo
+           sapiens]
+ emb|CAC80547.1| (AJ249209) kainate receptor subunit KA2a [Homo sapiens]
+          Length = 981
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1   LMFDA 5
+           LMFDA
+Sbjct: 306 LMFDA 310
+
+
+>gi|9|bbs|NP_079463.2| (NM_025187) hypothetical protein FLJ12076 [Homo sapiens]
+ sp|Q9BSU1|U183_HUMAN UPF0183 protein
+ gb|AAH04556.1|AAH04556 (BC004556) Similar to CG7083 gene product [Homo sapiens]
+ gb|AAK96888.1| (AF176088) lin-10-like protein [Homo sapiens]
+          Length = 422
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/5 (100%), Positives = 5/5 (100%)
+
+Query: 1  LMFDA 5
+          LMFDA
+Sbjct: 67 LMFDA 71
+
+
+
+ Score = 16.8 bits (32), Expect =  2880
+ Identities = 4/5 (80%), Positives = 5/5 (100%)
+
+Query: 2   MFDAN 6
+           +FDAN
+Sbjct: 296 LFDAN 300
+
+
+>gi|10|gnl|db1|NP_002444.1 (NM_002453) mitochondrial translational initiation factor 2
+           precursor; IF-2mt [Homo sapiens]
+ sp|P46199|IF2M_HUMAN Translation initiation factor IF-2, mitochondrial precursor
+           (IF-2Mt) (IF-2(Mt))
+ pir||A55628 translation initiation factor IF-2 precursor, mitochondrial - human
+ gb|AAA67038.1| (L34600) initiation factor 2 [Homo sapiens]
+ gb|AAM14617.1|AF494407_1 (AF494407) mitochondrial translation-initiation factor 2 [Homo
+           sapiens]
+ gb|AAM70196.1| (AF495546) translation initiation factor 2 [Homo sapiens]
+          Length = 727
+
+ Score = 20.6 bits (41), Expect =   204
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+
+Query: 1   LMFDAN 6
+           LMFD N
+Sbjct: 398 LMFDEN 403
+
+
+>gi|11|ref|XP_051877.1| (XM_051877) similar to NS1-binding protein-like protein [Homo
+          sapiens]
+ dbj|BAA74873.1| (AB020657) KIAA0850 protein [Homo sapiens]
+ emb|CAB72329.1| (AL078644) bG279B7.1.1 (NS1-binding protein (KIAA0850, BTB/POZ
+          domain and Kelch motifs containing protein)) [Homo
+          sapiens]
+ gb|AAG43485.1| (AF205218) NS1-binding protein-like protein [Homo sapiens]
+          Length = 642
+
+ Score = 20.2 bits (40), Expect =   274
+ Identities = 6/8 (75%), Positives = 6/8 (75%), Gaps = 1/8 (12%)
+
+Query: 1  LMF-DANF 7
+          LMF D NF
+Sbjct: 7  LMFEDENF 14
+
+
+>gi|12|lcl|AAH16829.1| (BC016829) Unknown (protein for IMAGE:4132295) [Homo sapiens]
+          Length = 391
+
+ Score = 20.2 bits (40), Expect =   274
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+
+Query: 3  FDANFT 8
+          FD NFT
+Sbjct: 32 FDTNFT 37
+
+>MY_test_ID (BC016829) Unknown (protein for IMAGE:4132295) [Homo sapiens]
+          Length = 391
+
+ Score = 20.2 bits (40), Expect =   274
+ Identities = 5/6 (83%), Positives = 5/6 (83%)
+
+Query: 3  FDANFT 8
+          FD NFT
+Sbjct: 32 FDTNFT 37
+
+
+  Database: All non-redundant GenBank CDS
+  translations+PDB+SwissProt+PIR+PRF
+    Posted date:  Oct 31, 2002  3:52 AM
+  Number of letters in database: 388,937,031
+  Number of sequences in database:  1,220,597
+  
+Lambda     K      H
+   0.328    0.274     1.77 
+
+Gapped
+Lambda     K      H
+   0.294    0.110    0.610 
+
+
+Matrix: PAM30
+Gap Penalties: Existence: 9, Extension: 1
+Number of Hits to DB: 660,045
+Number of Sequences: 1220597
+Number of extensions: 3731
+Number of successful extensions: 3028
+Number of sequences better than 200000.0: 2850
+Number of HSP's better than 200000.0 without gapping: 2850
+Number of HSP's successfully gapped in prelim test: 0
+Number of HSP's that attempted gapping in prelim test: 0
+Number of HSP's gapped (non-prelim): 3028
+length of query: 8
+length of database: 35,444,647
+effective HSP length: 0
+effective length of query: 9
+effective length of database: 35,444,647
+effective search space: 319001823
+effective search space used: 319001823
+T: 11
+A: 40
+X1: 15 ( 7.1 bits)
+X2: 35 (14.8 bits)
+X3: 58 (24.6 bits)
+S1: 18 (10.4 bits)
+S2: 18 (10.8 bits)
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testfile.erpin
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testfile.erpin	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testfile.erpin	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+
+Training set:	"/home/Administrator/pyrR.epn":
+		40 sequences of length 43
+Cutoff:		1.00  
+
+Database:	"AE016879.fna"
+		5227266 nucleotides to be processed in 1 sequence
+		ATGC ratios: 0.322  0.324  0.176  0.178
+E-value at cutoff 1.0 for 5.2Mb double strand data: 4.59e-02
+
+>gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome
+FW   1 5181155..5181183  30.36  1.68e-05
+CTTT.aacc--.CAACC.CCGTGA.GGTTG.a.GAAG
+>gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome
+RC   1 3709092..3709121  28.97  5.61e-05
+CTTT.taatt-.CAGTC.CTGTGA.GACCG.g.AAAG
+RC   2 3710524..3710553  27.97  1.31e-04
+TTTT.aaatg-.TAGTC.CTGTGA.GGCTG.c.CAAA
+RC   3 3711223..3711251  31.64  4.44e-06
+CTTT.aaca--.CAGCC.CCGTGA.GGTTG.a.GAAG
+
+-------- at level 1 --------
+10454532 bases processed
+cutoff: 1.00
+6 config. per site
+10 hits
+4 independent hits
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/testfuzzy.genbank
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/testfuzzy.genbank	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/testfuzzy.genbank	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,78 @@
+LOCUS       DDU63596      310 bp    DNA             INV       14-MAY-1999
+DEFINITION  Dictyostelium discoideum Tdd-4 transposable element flanking
+            sequence, clone p427/428 right end.
+ACCESSION   U63596
+NID         g2393749
+KEYWORDS    .
+SOURCE      Dictyostelium discoideum.
+  ORGANISM  Dictyostelium discoideum
+            Eukaryota; Dictyosteliida; Dictyostelium.
+REFERENCE   1  (bases 1 to 310)
+  AUTHORS   Wells,D.J.
+  TITLE     Tdd-4, a DNA transposon of Dictyostelium that encodes proteins
+            similar to LTR retroelement integrases
+  JOURNAL   Nucleic Acids Res. 27 (11), 2408-2415 (1999)
+REFERENCE   2  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Dictyostelium discoideum Tdd-4 transposable element, right end
+            flanking sequence from clone p427/428
+  JOURNAL   Unpublished
+REFERENCE   3  (bases 1 to 310)
+  AUTHORS   Wells,D.J. and Welker,D.L.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (11-JUL-1996) Biology, Utah State Univ., Logan, UT
+            84322-5305, USA
+FEATURES             Location/Qualifiers
+     source          1..310
+                     /organism="Dictyostelium discoideum"
+                     /strain="AX4"
+                     /db_xref="taxon:44689"
+                     /clone="p427/428"
+     misc_feature    5.12
+                     /note="Fuzzy location"
+     misc_feature    <5..12
+                     /note="Fuzzy location2"
+     misc_feature    5>..12
+                     /note="Fuzzy location3"
+     misc_feature    5..<12
+                     /note="Fuzzy location4"
+     misc_feature    5..12>
+                     /note="Fuzzy location5"
+     misc_feature    <5..12>
+                     /note="Fuzzy location6"
+     misc_feature    5.12>
+                     /note="Fuzzy location7"
+     misc_feature    <5.12>
+                     /note="Fuzzy location8"
+     misc_feature    5>.12>
+                     /note="Fuzzy location9"
+     misc_feature    5.<12
+                     /note="Fuzzy location10"
+     misc_feature    5.60..<120
+                     /note="Fuzzy location11"
+     misc_feature    5^9..>200
+                     /note="Fuzzy location12"
+     misc_feature    5.10..12.15
+                     /note="Fuzzy location13"
+     misc_feature    5.10
+                     /note="Fuzzy location14"
+     misc_feature    join(J00194:(100..202),1..245,300..422)
+                     /note="Location partly in another entry"
+     misc_feature    complement(join(1..30,30.40,4^8,40.30..90))
+	             /note="complement of join"
+     misc_feature    order(1..30, 31..40)
+		     /note="location uses order"
+     misc_feature    order(AF183172.1:1..30, 31..40)
+		     /note="location uses order in another sequence"
+     misc_feature    order(AF183172.1:<1..30, 31..>342)
+		     /note="uses order and fuzzy locs in another sequence"
+     mRNA            join(complement(83202..83329),84248..84996)
+BASE COUNT      118 a     46 c     67 g     79 t
+ORIGIN      
+        1 gtgacagttg gctgtcagac atacaatgat tgtttagaag aggagaagat tgatccggag
+       61 taccgtgata gtattttaaa aactatgaaa gcgggaatac ttaatggtaa actagttaga
+      121 ttatgtgacg tgccaagggg tgtagatgta gaaattgaaa caactggtct aaccgattca
+      181 gaaggagaaa gtgaatcaaa agaagaagag tgatgatgaa tagccaccat tactgcatac
+      241 tgtagccctt acccttgtcg caccattagc cattaataaa aataaaaaat tatataaaaa
+      301 ttacacccat 
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tmhmm.out
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tmhmm.out	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tmhmm.out	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,12 @@
+# my_sequence_id Length: 178
+# my_sequence_id Number of predicted TMHs:  3
+# my_sequence_id Exp number of AAs in TMHs: 69.58306
+# my_sequence_id Exp number, first 60 AAs:  8.42188
+# my_sequence_id Total prob of N-in:        0.07306
+my_sequence_id	TMHMM2.0	outside	     1    53
+my_sequence_id	TMHMM2.0	TMhelix	    54    76
+my_sequence_id	TMHMM2.0	inside	    77   115
+my_sequence_id	TMHMM2.0	TMhelix	   116   138
+my_sequence_id	TMHMM2.0	outside	   139   150
+my_sequence_id	TMHMM2.0	TMhelix	   151   173
+my_sequence_id	TMHMM2.0	inside	   174   178    

Added: trunk/packages/bioperl/branches/upstream/current/t/data/transfac.dat
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/transfac.dat	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/transfac.dat	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,83 @@
+VV  TRANSFAC MATRIX TABLE, Release 6.4 - licensed - 2002-12-02, (C) Biobase GmbH
+XX
+//
+AC  M00001
+XX
+ID  V$MYOD_01
+XX
+DT  19.10.1992 (created); ewi.
+DT  22.10.1997 (updated); dbo.
+CO  Copyright (C), Biobase GmbH.
+XX
+NA  MyoD
+XX
+DE  myoblast determination gene product
+XX
+BF  T00526; MyoD; Species: mouse, Mus musculus.
+XX
+P0      A      C      G      T
+01      1      2      2      0      S
+02      2      1      2      0      R
+03      3      0      1      1      A
+04      0      5      0      0      C
+05      5      0      0      0      A
+06      0      0      4      1      G
+07      0      1      4      0      G
+08      0      0      0      5      T
+09      0      0      5      0      G
+10      0      1      2      2      K
+11      0      2      0      3      Y
+12      1      0      3      1      G
+XX
+BA  5 functional elements in 3 genes
+XX
+CC  no comment
+XX
+//
+AC  M00002
+XX
+ID  V$E47_01
+XX
+DT  19.10.1992 (created); ewi.
+DT  18.07.2000 (updated); ewi.
+CO  Copyright (C), Biobase GmbH.
+XX
+NA  E47
+XX
+DE  E47
+XX
+BF  T00207; E47; Species: human, Homo sapiens.
+XX
+P0      A      C      G      T
+01      4      4      3      0      V
+02      2      5      4      0      S
+03      3      2      4      2      N
+04      2      0      9      0      G
+05      0     11      0      0      C
+06     11      0      0      0      A
+07      0      0     11      0      G
+08      1      2      8      0      G
+09      0      0      0     11      T
+10      0      0     11      0      G
+11      0      0      4      7      K
+12      1      4      3      3      N
+13      1      6      2      2      C
+14      1      4      4      2      N
+15      1      4      2      3      N
+XX
+BA  11 selected strong binding sites for E47, E47-MyoD, E12+MyoD and (weak) for
+BA  E12
+XX
+BS  R05108; Start: 3; Length: 15; Gaps: 17; Orientation: p.
+BS  R05109; Start: 1; Length: 15; Gaps:; Orientation: p.
+BS  R05110; Start: 5; Length: 15; Gaps:; Orientation: p.
+BS  R05111; Start: 5; Length: 15; Gaps:; Orientation: p.
+BS  R05112; Start: 8; Length: 15; Gaps:; Orientation: p.
+BS  R05113; Start: 9; Length: 15; Gaps:; Orientation: p.
+BS  R05114; Start: 8; Length: 15; Gaps:; Orientation: p.
+BS  R05115; Start: 7; Length: 15; Gaps:; Orientation: p.
+BS  R05116; Start: 11; Length: 15; Gaps:; Orientation: p.
+BS  R05117; Start: 5; Length: 15; Gaps:; Orientation: p.
+BS  R05118; Start: 5; Length: 15; Gaps:; Orientation: p.
+XX
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tree_nonewline.nexus
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tree_nonewline.nexus	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tree_nonewline.nexus	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,9 @@
+#NEXUS 
+
+Begin trees;  [Treefile saved Wed Jul 26 19:40:41 2000]
+
+[output from your data run]
+
+Translate 1 TRXEcoli, 2 TRXHomo, 3 TRXSacch, 4 erCaelA, 5 erCaelB, 6 erCaelC, 7 erHomoA, 8 erHomoB, 9 erHomoC, 10 erpCaelC ; 
+
+tree PAUP_1 = [&U] (1,((2,3),((((4,10),(5,8)),(6,9)),7))); End; 

Added: trunk/packages/bioperl/branches/upstream/current/t/data/tricky.wublast
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/tricky.wublast	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/tricky.wublast	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,103 @@
+Reference:  Gish, W. (1996-2004) http://blast.wustl.edu
+
+Query=  AT1G70100.2 | Symbol: None | expressed protein | chr1:26406799-26409760
+    FORWARD | Aliases: None
+        (483 letters; record 9)
+
+Database:  AraRicMaiMedVir
+           151,652 sequences; 63,865,049 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+                                                                     Smallest
+                                                                       Sum
+                                                              High  Probability
+Sequences producing High-scoring Segment Pairs:              Score  P(N)      N
+
+AT5G55660.1 | Symbol: None | expressed protein, similar t...   104  0.019     1
+
+
+
+
+>AT5G55660.1 | Symbol: None | expressed protein, similar to unknown protein
+            (pir::T08929) | chr5:22556601-22560700 FORWARD | Aliases: MDF20.10,
+            MDF20_10
+        Length = 779
+
+ Score = 104 (41.7 bits), Expect = 0.019, P = 0.019
+ Identities = 58/240 (24%), Positives = 106/240 (44%)
+
+Query: 115 - 350
+Sbjct: 159 - 380
+
+ Score = 93 (37.8 bits), Expect = 0.29, P = 0.26
+ Identities = 45/211 (21%), Positives = 82/211 (38%)
+
+Query: 182 - 389
+Sbjct: 527 - 730
+
+ Score = 88 (36.0 bits), Expect = 1.0, P = 0.64
+ Identities = 54/229 (23%), Positives = 91/229 (39%)
+
+Query: 175 - 398
+Sbjct: 444 - 642
+
+ Score = 83 (34.3 bits), Expect = 3.6, P = 0.97
+ Identities = 85/386 (22%), Positives = 150/386 (38%)
+
+Query: 115 - 473
+Sbjct: 159 - 534
+
+ Score = 78 (32.5 bits), Expect = 0.43, Sum P(2) = 0.35
+ Identities = 50/253 (19%), Positives = 112/253 (44%)
+
+Query: 36 - 280
+Sbjct: 24 - 261
+
+ Score = 59 (25.8 bits), Expect = 0.43, Sum P(2) = 0.35
+ Identities = 45/225 (20%), Positives = 83/225 (36%)
+
+Query: 267 - 478
+Sbjct: 445 - 662
+
+ Score = 57 (25.1 bits), Expect = 0.68, Sum P(2) = 0.50
+ Identities = 43/212 (20%), Positives = 89/212 (41%)
+
+Query: 276 - 481
+Sbjct: 479 - 678
+
+
+
+Parameters:
+  gi
+  noseqs
+  stats
+  wordmask=seg+xnu
+  qrecmin=1
+  qrecmax=10110
+  E=10
+  V=10000
+  B=10000
+  postsw
+
+  ctxfactor=1.00
+
+  Query                        -----  As Used  -----    -----  Computed  ----
+  Frame  MatID Matrix name     Lambda    K       H      Lambda    K       H
+   +0      0   BLOSUM62        0.306   0.122   0.320    same    same    same
+               Q=9,R=2         0.244   0.0300  0.180     n/a     n/a     n/a
+
+  Query
+  Frame  MatID  Length  Eff.Length     E    S W   T  X   E2     S2
+   +0      0      483       483       10.  78 3  10 23  0.19    35
+                                                    35  0.22    37
+
+
+Statistics:
+  Query          Expected          Observed           HSPs
+  Frame  MatID  High Score        High Score       Reportable
+   +0      0    71 (31.4 bits)  2415 (1066.8 bits)   1089
+
+  Query         Neighborhd  Word      Excluded    Failed   Successful  Overlaps
+  Frame  MatID   Words      Hits        Hits    Extensions Extensions  Excluded
+   +0      0     14620    91959369    23132921    68703395   123053      3666
+

Added: trunk/packages/bioperl/branches/upstream/current/t/data/trna.strict.rnamotif
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/trna.strict.rnamotif	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/trna.strict.rnamotif	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,200 @@
+#RM scored
+#RM descr h5 ss h5 ss h3 ss h5 ss h3 ss h5 ss h3 h3 ss
+#RM dfile trna.strict.descr
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       1   75 gacctcg tg gcg caacggtag cgc g tctga ctccaga tcaga aggctgc gtgt tcgaatc acgt cggggtc acca
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       1   75 gacctcg tg gcg caacggtag cgc g tctga ctccaga tcaga aggct gcgtg ttcgaat cacgt cggggtc acca
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       1   75 gacctcg tg gcgc aacggta gcgc g tctga ctccaga tcaga aggctgc gtgt tcgaatc acgt cggggtc acca
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       1   75 gacctcg tg gcgc aacggta gcgc g tctga ctccaga tcaga aggct gcgtg ttcgaat cacgt cggggtc acca
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       2   73 acctcg tg gcg caacggtag cgc g tctga ctccaga tcaga aggctgc gtgt tcgaatc acgt cggggt cacc
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       2   73 acctcg tg gcg caacggtag cgc g tctga ctccaga tcaga aggct gcgtg ttcgaat cacgt cggggt cacc
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       2   73 acctcg tg gcgc aacggta gcgc g tctga ctccaga tcaga aggctgc gtgt tcgaatc acgt cggggt cacc
+>gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA
+gi|173683|gb|M10671|ACSTRW    0.000 0       2   73 acctcg tg gcgc aacggta gcgc g tctga ctccaga tcaga aggct gcgtg ttcgaat cacgt cggggt cacc
+>gi|173684|gb|M29552|ACYRR16S A.pyogenes 16S ribosomal RNA
+gi|173684|gb|M29552|ACYRR16S    0.000 0     510   76 cgcggta at acg tagggcnctag cgt t gtccg gaattat tgggc gtaaag agct cgtaggc ggtt tgttgcg cctg
+>gi|173684|gb|M29552|ACYRR16S A.pyogenes 16S ribosomal RNA
+gi|173684|gb|M29552|ACYRR16S    0.000 0     510   76 cgcggta at acgt agggcncta gcgt t gtccg gaattat tgggc gtaaag agct cgtaggc ggtt tgttgcg cctg
+>gi|173684|gb|M29552|ACYRR16S A.pyogenes 16S ribosomal RNA
+gi|173684|gb|M29552|ACYRR16S    0.000 0     511   74 gcggta at acg tagggcnctag cgt t gtccg gaattat tgggc gtaaag agct cgtaggc ggtt tgttgc gcct
+>gi|173684|gb|M29552|ACYRR16S A.pyogenes 16S ribosomal RNA
+gi|173684|gb|M29552|ACYRR16S    0.000 0     511   74 gcggta at acgt agggcncta gcgt t gtccg gaattat tgggc gtaaag agct cgtaggc ggtt tgttgc gcct
+>gi|173689|gb|M33910|ACYRRNAO A.odontolyticus 16S ribosomal RNA
+gi|173689|gb|M33910|ACYRRNAO    0.000 0      92   77 gggttg gt gga aaggttttt tct g gtggg ggatggg ctcgc ggcctatcagc ttgt tggtggg gtga tggcct acca
+>gi|173689|gb|M33910|ACYRRNAO A.odontolyticus 16S ribosomal RNA
+gi|173689|gb|M33910|ACYRRNAO    0.000 0      92   77 gggttg gt ggaa aggtttt ttct g gtggg ggatggg ctcgc ggcctatcagc ttgt tggtggg gtga tggcct acca
+>gi|173725|gb|K00230|ANITRLCAA Anacystis nidulans Leu-tRNA-CAA
+gi|173725|gb|K00230|ANITRLCAA    0.000 0       1   87 gggcaag tg gcg gaattggtaga cgc a gcaga ctcaaaa tctgc cgctagcgatagtgt gtggg ttcgagt cccac cttgccc acca
+>gi|173725|gb|K00230|ANITRLCAA Anacystis nidulans Leu-tRNA-CAA
+gi|173725|gb|K00230|ANITRLCAA    0.000 0       2   85 ggcaag tg gcg gaattggtaga cgc a gcaga ctcaaaa tctgc cgctagcgatagtgt gtggg ttcgagt cccac cttgcc cacc
+>gi|173726|gb|K00231|ANITRLCAG Anacystis nidulans Leu-tRNA-CAG
+gi|173726|gb|K00231|ANITRLCAG    0.000 0       1   87 gcggaac tg gcg aaattggtaga cgc g ctaga ttcaggt tctag tggtttcacgactgt ccggg ttcaagt cccgg gttccgc acca
+>gi|173726|gb|K00231|ANITRLCAG Anacystis nidulans Leu-tRNA-CAG
+gi|173726|gb|K00231|ANITRLCAG    0.000 0       2   85 cggaac tg gcg aaattggtaga cgc g ctaga ttcaggt tctag tggtttcacgactgt ccggg ttcaagt cccgg gttccg cacc
+>gi|173727|gb|K00311|ANITRMF Anacystis nidulans initiator Met-tRNA-f
+gi|173727|gb|K00311|ANITRMF    0.000 0       2   75 gcgggg ta gag cagcctggtag ctc g tcggg ctcataa cccga aggtc agagg ttcaaat cctct ccccgc cacc
+>gi|173727|gb|K00311|ANITRMF Anacystis nidulans initiator Met-tRNA-f
+gi|173727|gb|K00311|ANITRMF    0.000 0       2   75 gcgggg ta gagc agcctggta gctc g tcggg ctcataa cccga aggtc agagg ttcaaat cctct ccccgc cacc
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     464   85 cgcggt aa gacg tagggcgcgag cgtt g tccg gatttat tggg cgtaaagagctcgtagg cggc ttgttgc gtcg gctgtg aaaa
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     464   76 cgcggt aa gacg tagggcgcgag cgtt g tccg gatttat tggg cgtaaa gagct cgtaggc ggctt gttgcg tcgg
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     464   76 cgcggta ag acg tagggcgcgag cgt t gtccg gatttat tgggc gtaaag agct cgtaggc ggct tgttgcg tcgg
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     464   76 cgcggta ag acgt agggcgcga gcgt t gtccg gatttat tgggc gtaaag agct cgtaggc ggct tgttgcg tcgg
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     465   74 gcggta ag acg tagggcgcgag cgt t gtccg gatttat tgggc gtaaag agct cgtaggc ggct tgttgc gtcg
+>gi|1236163|gb|L41047|ANNRRO Actinoplanes sp. ribosomal RNA (rRNA)
+gi|1236163|gb|L41047|ANNRRO    0.000 0     465   74 gcggta ag acgt agggcgcga gcgt t gtccg gatttat tgggc gtaaag agct cgtaggc ggct tgttgc gtcg
+>gi|173750|gb|M37693|ARGRRNASS Arthobacter simplex small subunit ribosomal RNA
+gi|173750|gb|M37693|ARGRRNASS    0.000 0     502   76 cgcggta at acg tagggtccnag cgt t gtccg gaattat tgggc gtaaag ggct cgtaggc ggtt tgtcgcg tcgg
+>gi|173750|gb|M37693|ARGRRNASS Arthobacter simplex small subunit ribosomal RNA
+gi|173750|gb|M37693|ARGRRNASS    0.000 0     502   76 cgcggta at acgt agggtccna gcgt t gtccg gaattat tgggc gtaaag ggct cgtaggc ggtt tgtcgcg tcgg
+>gi|173750|gb|M37693|ARGRRNASS Arthobacter simplex small subunit ribosomal RNA
+gi|173750|gb|M37693|ARGRRNASS    0.000 0     503   74 gcggta at acg tagggtccnag cgt t gtccg gaattat tgggc gtaaag ggct cgtaggc ggtt tgtcgc gtcg
+>gi|173750|gb|M37693|ARGRRNASS Arthobacter simplex small subunit ribosomal RNA
+gi|173750|gb|M37693|ARGRRNASS    0.000 0     503   74 gcggta at acgt agggtccna gcgt t gtccg gaattat tgggc gtaaag ggct cgtaggc ggtt tgtcgc gtcg
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       1   75 agcagag tg gcg cagtggaag cgt g ctggg cccataa cccag aggtc cgagg atcgaaa cctcg ctctgct acca
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       1   75 agcagag tg gcgc agtggaa gcgt g ctggg cccataa cccag aggtc cgagg atcgaaa cctcg ctctgct acca
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       2   73 gcagag tg gcg cagtggaag cgt g ctggg cccataa cccag aggtc cgagg atcgaaa cctcg ctctgc tacc
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       2   73 gcagag tg gcgc agtggaa gcgt g ctggg cccataa cccag aggtc cgagg atcgaaa cctcg ctctgc tacc
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       2   73 gcagagt gg cgc agtggaagc gtg c tggg cccataa ccca gaggtcc gagg atcgaaa cctc gctctgc tacc
+>gi|173764|gb|K00326|ASRTRMI Asterias amurensis initiator Met-tRNA-i
+gi|173764|gb|K00326|ASRTRMI    0.000 0       3   71 cagagt gg cgc agtggaagc gtg c tggg cccataa ccca gaggtcc gagg atcgaaa cctc gctctg ctac
+>gi|173793|gb|K00141|BACTRA B.subtilis Ala-tRNA
+gi|173793|gb|K00141|BACTRA    0.000 0       1   76 ggagcct ta gct cagctgggag agc g cctgc tttgcac gcagg aggtc agcgg ttcgatc ccgct aggctcc acca
+>gi|173793|gb|K00141|BACTRA B.subtilis Ala-tRNA
+gi|173793|gb|K00141|BACTRA    0.000 0       1   76 ggagcct ta gctc agctggga gagc g cctgc tttgcac gcagg aggtc agcgg ttcgatc ccgct aggctcc acca
+>gi|173793|gb|K00141|BACTRA B.subtilis Ala-tRNA
+gi|173793|gb|K00141|BACTRA    0.000 0       2   74 gagcct ta gct cagctgggag agc g cctgc tttgcac gcagg aggtc agcgg ttcgatc ccgct aggctc cacc
+>gi|173793|gb|K00141|BACTRA B.subtilis Ala-tRNA
+gi|173793|gb|K00141|BACTRA    0.000 0       2   74 gagcct ta gctc agctggga gagc g cctgc tttgcac gcagg aggtc agcgg ttcgatc ccgct aggctc cacc
+>gi|304176|gb|L18939|BACTRCYSA Bacillus subtilis transfer RNA-Cys
+gi|304176|gb|L18939|BACTRCYSA    0.000 0       1   74 ggcggca ta gcc aagtggtaa ggc a gaggt cttcaaa acctt tatc cccgg ttcgaat ccggg tgccgcc tcca
+>gi|304176|gb|L18939|BACTRCYSA Bacillus subtilis transfer RNA-Cys
+gi|304176|gb|L18939|BACTRCYSA    0.000 0       2   72 gcggca ta gcc aagtggtaa ggc a gaggt cttcaaa acctt tatc cccgg ttcgaat ccggg tgccgc ctcc
+>gi|173794|gb|K00332|BACTRF B.stearothermophilus Phe-tRNA
+gi|173794|gb|K00332|BACTRF    0.000 0       1   76 ggctcgg ta gct cagtcggtag agc a aagga ctgaaaa tcctt gtgtc ggcgg ttcgatt ccgtc ccgagcc acca
+>gi|173794|gb|K00332|BACTRF B.stearothermophilus Phe-tRNA
+gi|173794|gb|K00332|BACTRF    0.000 0       1   76 ggctcgg ta gctc agtcggta gagc a aagga ctgaaaa tcctt gtgtc ggcgg ttcgatt ccgtc ccgagcc acca
+>gi|173794|gb|K00332|BACTRF B.stearothermophilus Phe-tRNA
+gi|173794|gb|K00332|BACTRF    0.000 0       2   74 gctcgg ta gct cagtcggtag agc a aagga ctgaaaa tcctt gtgtc ggcgg ttcgatt ccgtc ccgagc cacc
+>gi|173794|gb|K00332|BACTRF B.stearothermophilus Phe-tRNA
+gi|173794|gb|K00332|BACTRF    0.000 0       2   74 gctcgg ta gctc agtcggta gagc a aagga ctgaaaa tcctt gtgtc ggcgg ttcgatt ccgtc ccgagc cacc
+>gi|173795|gb|K00333|BACTRFA B.subtilis Phe-tRNA
+gi|173795|gb|K00333|BACTRFA    0.000 0       1   76 ggctcgg ta gct cagttggtag agc a acgga ctgaaaa tccgt gtgtc ggcgg ttcgatt ccgtc ccgagcc acca
+>gi|173795|gb|K00333|BACTRFA B.subtilis Phe-tRNA
+gi|173795|gb|K00333|BACTRFA    0.000 0       1   76 ggctcgg ta gctc agttggta gagc a acgga ctgaaaa tccgt gtgtc ggcgg ttcgatt ccgtc ccgagcc acca
+>gi|173795|gb|K00333|BACTRFA B.subtilis Phe-tRNA
+gi|173795|gb|K00333|BACTRFA    0.000 0       2   74 gctcgg ta gct cagttggtag agc a acgga ctgaaaa tccgt gtgtc ggcgg ttcgatt ccgtc ccgagc cacc
+>gi|173795|gb|K00333|BACTRFA B.subtilis Phe-tRNA
+gi|173795|gb|K00333|BACTRFA    0.000 0       2   74 gctcgg ta gctc agttggta gagc a acgga ctgaaaa tccgt gtgtc ggcgg ttcgatt ccgtc ccgagc cacc
+>gi|173796|gb|M24863|BACTRFAA B.stearothermophilus Phe-tRNA
+gi|173796|gb|M24863|BACTRFAA    0.000 0       1   76 gctcgtg ta gct cagtcggtag agc a aagga ctgaaya tcctt gtgtc ggcgg ttcgatt ccgtc cgcgagc acca
+>gi|173796|gb|M24863|BACTRFAA B.stearothermophilus Phe-tRNA
+gi|173796|gb|M24863|BACTRFAA    0.000 0       1   76 gctcgtg ta gctc agtcggta gagc a aagga ctgaaya tcctt gtgtc ggcgg ttcgatt ccgtc cgcgagc acca
+>gi|173796|gb|M24863|BACTRFAA B.stearothermophilus Phe-tRNA
+gi|173796|gb|M24863|BACTRFAA    0.000 0       2   74 ctcgtg ta gct cagtcggtag agc a aagga ctgaaya tcctt gtgtc ggcgg ttcgatt ccgtc cgcgag cacc
+>gi|173796|gb|M24863|BACTRFAA B.stearothermophilus Phe-tRNA
+gi|173796|gb|M24863|BACTRFAA    0.000 0       2   74 ctcgtg ta gctc agtcggta gagc a aagga ctgaaya tcctt gtgtc ggcgg ttcgatt ccgtc cgcgag cacc
+>gi|173797|gb|K00201|BACTRG1 B.subtilis Gly-tRNA-1
+gi|173797|gb|K00201|BACTRG1    0.000 0       1   74 gcgggtg ta gtt tagtggtaa aac c tcagc cttccaa gctga tgtc gtgag ttcgatt ctcat cacccgc tcca
+>gi|173797|gb|K00201|BACTRG1 B.subtilis Gly-tRNA-1
+gi|173797|gb|K00201|BACTRG1    0.000 0       1   74 gcgggtg ta gttt agtggta aaac c tcagc cttccaa gctga tgtc gtgag ttcgatt ctcat cacccgc tcca
+>gi|173797|gb|K00201|BACTRG1 B.subtilis Gly-tRNA-1
+gi|173797|gb|K00201|BACTRG1    0.000 0       2   72 cgggtg ta gtt tagtggtaa aac c tcagc cttccaa gctga tgtc gtgag ttcgatt ctcat cacccg ctcc
+>gi|173797|gb|K00201|BACTRG1 B.subtilis Gly-tRNA-1
+gi|173797|gb|K00201|BACTRG1    0.000 0       2   72 cgggtg ta gttt agtggta aaac c tcagc cttccaa gctga tgtc gtgag ttcgatt ctcat cacccg ctcc
+>gi|173800|gb|M26875|BACTRLC B.stearothermophilus Leu-tRNA-CAA
+gi|173800|gb|M26875|BACTRLC    0.000 0       1   86 gccgatg tg gcg gaattggcaga cgc g cacga ctcaaaa tcgtg tgggctttgcccgt gtggg ttcgact cccac catcggc acca
+>gi|173800|gb|M26875|BACTRLC B.stearothermophilus Leu-tRNA-CAA
+gi|173800|gb|M26875|BACTRLC    0.000 0       2   84 ccgatg tg gcg gaattggcaga cgc g cacga ctcaaaa tcgtg tgggctttgcccgt gtggg ttcgact cccac catcgg cacc
+>gi|173801|gb|K00310|BACTRMF B.subtilis initiator Met-tRNA-f
+gi|173801|gb|K00310|BACTRMF    0.000 0       2   75 gcgggg tg gag cagttcggtag ctc g tcggg ctcataa cccga aggtc gcagg ttcaaat cctgc ccccgc aacc
+>gi|173801|gb|K00310|BACTRMF B.subtilis initiator Met-tRNA-f
+gi|173801|gb|K00310|BACTRMF    0.000 0       2   75 gcgggg tg gagc agttcggta gctc g tcggg ctcataa cccga aggtc gcagg ttcaaat cctgc ccccgc aacc
+>gi|173802|gb|K00297|BACTRMM Bacillus subtilis Met-tRNA-m
+gi|173802|gb|K00297|BACTRMM    0.000 0       1   76 ggcggtg ta gct cagcggctag agc g tacgg ttcatac ccgtg aggtc ggggg ttcgatc ccctc cgccgct acca
+>gi|173802|gb|K00297|BACTRMM Bacillus subtilis Met-tRNA-m
+gi|173802|gb|K00297|BACTRMM    0.000 0       1   76 ggcggtg ta gctc agcggcta gagc g tacgg ttcatac ccgtg aggtc ggggg ttcgatc ccctc cgccgct acca
+>gi|173802|gb|K00297|BACTRMM Bacillus subtilis Met-tRNA-m
+gi|173802|gb|K00297|BACTRMM    0.000 0       2   74 gcggtg ta gct cagcggctag agc g tacgg ttcatac ccgtg aggtc ggggg ttcgatc ccctc cgccgc tacc
+>gi|173802|gb|K00297|BACTRMM Bacillus subtilis Met-tRNA-m
+gi|173802|gb|K00297|BACTRMM    0.000 0       2   74 gcggtg ta gctc agcggcta gagc g tacgg ttcatac ccgtg aggtc ggggg ttcgatc ccctc cgccgc tacc
+>gi|173803|gb|M27310|BACTRPA B.subtilis Pro-tRNA
+gi|173803|gb|M27310|BACTRPA    0.000 0       1   77 cgggaag ta gct cagcttggtag agc a catgg tttggga ccatg gggtc gcagg ttcgaat cctgt cttcccg acca
+>gi|173803|gb|M27310|BACTRPA B.subtilis Pro-tRNA
+gi|173803|gb|M27310|BACTRPA    0.000 0       1   77 cgggaag ta gctc agcttggta gagc a catgg tttggga ccatg gggtc gcagg ttcgaat cctgt cttcccg acca
+>gi|173803|gb|M27310|BACTRPA B.subtilis Pro-tRNA
+gi|173803|gb|M27310|BACTRPA    0.000 0       2   75 gggaag ta gct cagcttggtag agc a catgg tttggga ccatg gggtc gcagg ttcgaat cctgt cttccc gacc
+>gi|173803|gb|M27310|BACTRPA B.subtilis Pro-tRNA
+gi|173803|gb|M27310|BACTRPA    0.000 0       2   75 gggaag ta gctc agcttggta gagc a catgg tttggga ccatg gggtc gcagg ttcgaat cctgt cttccc gacc
+>gi|173804|gb|K00156|BACTRR B.subtilis Arg-tRNA
+gi|173804|gb|K00156|BACTRR    0.000 0       1   76 gcgcccg ta gct caatggatag agc g tttga ctgcgga tcaaa aggtt agggg ttcgact cccct cgggcgc gcca
+>gi|173804|gb|K00156|BACTRR B.subtilis Arg-tRNA
+gi|173804|gb|K00156|BACTRR    0.000 0       1   76 gcgcccg ta gctc aatggata gagc g tttga ctgcgga tcaaa aggtt agggg ttcgact cccct cgggcgc gcca
+>gi|173804|gb|K00156|BACTRR B.subtilis Arg-tRNA
+gi|173804|gb|K00156|BACTRR    0.000 0       2   74 cgcccg ta gct caatggatag agc g tttga ctgcgga tcaaa aggtt agggg ttcgact cccct cgggcg cgcc
+>gi|173804|gb|K00156|BACTRR B.subtilis Arg-tRNA
+gi|173804|gb|K00156|BACTRR    0.000 0       2   74 cgcccg ta gctc aatggata gagc g tttga ctgcgga tcaaa aggtt agggg ttcgact cccct cgggcg cgcc
+>gi|173805|gb|K00276|BACTRT B.subtilis Thr-tRNA
+gi|173805|gb|K00276|BACTRT    0.000 0       1   76 gccggtg ta gct caattggtag agc a actga cttgtaa tcagt aggtt ggggg ttcaagt cctct tgccggc acca
+>gi|173805|gb|K00276|BACTRT B.subtilis Thr-tRNA
+gi|173805|gb|K00276|BACTRT    0.000 0       1   76 gccggtg ta gctc aattggta gagc a actga cttgtaa tcagt aggtt ggggg ttcaagt cctct tgccggc acca
+>gi|173805|gb|K00276|BACTRT B.subtilis Thr-tRNA
+gi|173805|gb|K00276|BACTRT    0.000 0       2   74 ccggtg ta gct caattggtag agc a actga cttgtaa tcagt aggtt ggggg ttcaagt cctct tgccgg cacc
+>gi|173805|gb|K00276|BACTRT B.subtilis Thr-tRNA
+gi|173805|gb|K00276|BACTRT    0.000 0       2   74 ccggtg ta gctc aattggta gagc a actga cttgtaa tcagt aggtt ggggg ttcaagt cctct tgccgg cacc
+>gi|173806|gb|K00246|BACTRV1 B.subtilis Val-tRNA-1
+gi|173806|gb|K00246|BACTRV1    0.000 0       1   76 ggaggat ta gct cagctgggag agc a tctgc cttacaa gcaga gggtc ggcgg ttcgagc ccgtc atcctcc acca
+>gi|173806|gb|K00246|BACTRV1 B.subtilis Val-tRNA-1
+gi|173806|gb|K00246|BACTRV1    0.000 0       1   76 ggaggat ta gctc agctggga gagc a tctgc cttacaa gcaga gggtc ggcgg ttcgagc ccgtc atcctcc acca
+>gi|173806|gb|K00246|BACTRV1 B.subtilis Val-tRNA-1
+gi|173806|gb|K00246|BACTRV1    0.000 0       2   74 gaggat ta gct cagctgggag agc a tctgc cttacaa gcaga gggtc ggcgg ttcgagc ccgtc atcctc cacc
+>gi|173806|gb|K00246|BACTRV1 B.subtilis Val-tRNA-1
+gi|173806|gb|K00246|BACTRV1    0.000 0       2   74 gaggat ta gctc agctggga gagc a tctgc cttacaa gcaga gggtc ggcgg ttcgagc ccgtc atcctc cacc
+>gi|173807|gb|K01065|BACTRV2 B.stearothermophilus Val-tRNA-2
+gi|173807|gb|K01065|BACTRV2    0.000 0       1   76 gattccg ta gct cagctgggag agc g ccacc ttgacag ggtgg aggtc gctgg ttcgagc ccagt cggaatc acca
+>gi|173807|gb|K01065|BACTRV2 B.stearothermophilus Val-tRNA-2
+gi|173807|gb|K01065|BACTRV2    0.000 0       1   76 gattccg ta gctc agctggga gagc g ccacc ttgacag ggtgg aggtc gctgg ttcgagc ccagt cggaatc acca
+>gi|173807|gb|K01065|BACTRV2 B.stearothermophilus Val-tRNA-2
+gi|173807|gb|K01065|BACTRV2    0.000 0       2   74 attccg ta gct cagctgggag agc g ccacc ttgacag ggtgg aggtc gctgg ttcgagc ccagt cggaat cacc
+>gi|173807|gb|K01065|BACTRV2 B.stearothermophilus Val-tRNA-2
+gi|173807|gb|K01065|BACTRV2    0.000 0       2   74 attccg ta gctc agctggga gagc g ccacc ttgacag ggtgg aggtc gctgg ttcgagc ccagt cggaat cacc
+>gi|173809|gb|K00270|BACTRY B.stearothermophilus Tyr-tRNA
+gi|173809|gb|K00270|BACTRY    0.000 0       1   85 ggagggg ta gcg aagtggctaaa cgc g gcgga ctgtaaa tccgc tccctttgggttc ggcgg ttcgaat ccgtc cccctcc acca
+>gi|173809|gb|K00270|BACTRY B.stearothermophilus Tyr-tRNA
+gi|173809|gb|K00270|BACTRY    0.000 0       2   83 gagggg ta gcg aagtggctaaa cgc g gcgga ctgtaaa tccgc tccctttgggttc ggcgg ttcgaat ccgtc cccctc cacc
+>gi|173811|gb|K00269|BACTRYI B.subtilis Tyr-tRNA, species I and II
+gi|173811|gb|K00269|BACTRYI    0.000 0       1   85 ggagggg ta gcg aagtggctaaa cgc g gcgga ctgtaaa tccgc tccctcagggttc ggcag ttcgaat ctgcc cccctcc acca
+>gi|173811|gb|K00269|BACTRYI B.subtilis Tyr-tRNA, species I and II
+gi|173811|gb|K00269|BACTRYI    0.000 0       2   83 gagggg ta gcg aagtggctaaa cgc g gcgga ctgtaaa tccgc tccctcagggttc ggcag ttcgaat ctgcc cccctc cacc
+>gi|173880|gb|M38018|BIFRRDQ B.bifidum small subunit ribosomal RNA gene
+gi|173880|gb|M38018|BIFRRDQ    0.000 0     538   82 cggatt ta ttg ggcg taa a gggct cgtaggc ggctc gtcgcgtccggtgtgaaagtc catc gcttaac ggtg gatctg cgcc
+>gi|459692|gb|L29265|BLHRRNA Blastocrithidia culicis ribosomal RNA
+gi|459692|gb|L29265|BLHRRNA    0.000 0    1657   77 gggtctg ta gct cagctggttag agc a ccgtc ttgataa ggcgg gggtc gttgg ttcaagt ccaac tagaccc acca
+>gi|459692|gb|L29265|BLHRRNA Blastocrithidia culicis ribosomal RNA
+gi|459692|gb|L29265|BLHRRNA    0.000 0    1657   77 gggtctg ta gctc agctggtta gagc a ccgtc ttgataa ggcgg gggtc gttgg ttcaagt ccaac tagaccc acca
+>gi|459692|gb|L29265|BLHRRNA Blastocrithidia culicis ribosomal RNA
+gi|459692|gb|L29265|BLHRRNA    0.000 0    1658   75 ggtctg ta gct cagctggttag agc a ccgtc ttgataa ggcgg gggtc gttgg ttcaagt ccaac tagacc cacc
+>gi|459692|gb|L29265|BLHRRNA Blastocrithidia culicis ribosomal RNA
+gi|459692|gb|L29265|BLHRRNA    0.000 0    1658   75 ggtctg ta gctc agctggtta gagc a ccgtc ttgataa ggcgg gggtc gttgg ttcaagt ccaac tagacc cacc
+>gi|173898|gb|M22136|BLYCPTREA Hordeum vulgare chloroplast Glu-tRNA
+gi|173898|gb|M22136|BLYCPTREA    0.000 0       1   76 tccgtcg ta gtc taggdggdtag gat a ctcgg ctttcac ccgag agac ccggg ttcaagt cccgg cgacgga acca
+>gi|173898|gb|M22136|BLYCPTREA Hordeum vulgare chloroplast Glu-tRNA
+gi|173898|gb|M22136|BLYCPTREA    0.000 0       1   76 tccgtcg ta gtct aggdggdta ggat a ctcgg ctttcac ccgag agac ccggg ttcaagt cccgg cgacgga acca
+>gi|173898|gb|M22136|BLYCPTREA Hordeum vulgare chloroplast Glu-tRNA
+gi|173898|gb|M22136|BLYCPTREA    0.000 0       2   74 ccgtcg ta gtc taggdggdtag gat a ctcgg ctttcac ccgag agac ccggg ttcaagt cccgg cgacgg aacc
+>gi|173898|gb|M22136|BLYCPTREA Hordeum vulgare chloroplast Glu-tRNA

Added: trunk/packages/bioperl/branches/upstream/current/t/data/unigene.data
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/unigene.data	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/unigene.data	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,110 @@
+ID          Hs.2
+TITLE       N-acetyltransferase 2 (arylamine N-acetyltransferase)
+GENE        NAT2
+CYTOBAND    8p22
+LOCUSLINK   10
+HOMOL       YES
+EXPRESS     liver ; hepatocellular carcinoma ; adenocarcinoma ; corresponding non cancerous liver tissue ; Liver ; colon ; Cell lines 
+RESTR_EXPR   liver
+GNM_TERMINUS      S
+CHROMOSOME  8
+STS         ACC=G59899 UNISTS=137181
+STS         ACC=GDB:386004 UNISTS= 157141
+STS         ACC=WIAF-2120 UNISTS= 44576
+STS         ACC=G06461 UNISTS= 17088
+STS         ACC=GDB:310612 UNISTS= 156422
+STS         ACC=PMC310725P3 UNISTS= 272646
+STS         ACC=GDB:310613 UNISTS= 156423
+STS         ACC=GDB:187676 UNISTS= 155563
+PROTSIM     ORG=Escherischia coli; PROTGI=16129422; PROTID=ref:NP_415980.1; PCT=24.81; ALN=255
+PROTSIM     ORG=Homo sapiens; PROTGI=105377; PROTID=pir:B34585; PCT=100.00; ALN=290
+PROTSIM     ORG=Mus musculus; PROTGI=1703436; PROTID=sp:P50295; PCT=74.83; ALN=290
+PROTSIM     ORG=Rattus norvegicus; PROTGI=16758720; PROTID=ref:NP_446306.1; PCT=73.79; ALN=290
+SCOUNT      26
+SEQUENCE    ACC=BX095770.1; NID=g27827877; CLONE=IMAGp998I184581_,_IMAGE:1870937; LID=1079; SEQTYPE=EST; PERIPHERAL=1
+SEQUENCE    ACC=AI262683.1; NID=g3870886; CLONE=IMAGE:1870937; END=3'; LID=1079; SEQTYPE=EST
+SEQUENCE    ACC=CB161982.1; NID=g28148108; CLONE=L17N670205n1-15-F12; END=5'; LID=12542; SEQTYPE=EST; PERIPHERAL=1
+SEQUENCE    ACC=CB161860.1; NID=g28147986; CLONE=L17N670205n1-41-A04; END=5'; LID=12542; SEQTYPE=EST
+SEQUENCE    ACC=AI460128.1; NID=g4313009; CLONE=IMAGE:2151449; END=3'; LID=1556; SEQTYPE=EST
+SEQUENCE    ACC=AI733799.1; NID=g5054912; CLONE=IMAGE:1870937; END=3'; LID=1079; SEQTYPE=EST
+SEQUENCE    ACC=AI792606.1; NID=g5340322; CLONE=IMAGE:1870937; END=5'; LID=1079; SEQTYPE=EST
+SEQUENCE    ACC=NM_000015.1; NID=g4557782; PID=g4557783; SEQTYPE=mRNA
+SEQUENCE    ACC=BC067218.1; NID=g45501306; PID=g45501307; SEQTYPE=mRNA
+SEQUENCE    ACC=CR407631.1; NID=g47115198; PID=g47115199; SEQTYPE=mRNA
+SEQUENCE    ACC=AV658623.1; NID=g9879637; CLONE=GLCFOD10; END=3'; LID=5601; SEQTYPE=EST
+SEQUENCE    ACC=AV658656.1; NID=g9879670; CLONE=GLCFOG07; END=3'; LID=5601; SEQTYPE=EST
+SEQUENCE    ACC=AV684197.1; NID=g10286060; CLONE=GKCFZH06; END=5'; LID=6533; SEQTYPE=EST
+SEQUENCE    ACC=D90040.1; NID=g219411; PID=g219412; SEQTYPE=mRNA
+SEQUENCE    ACC=AU099534.1; NID=g13550663; CLONE=HSI08034; LID=8800; SEQTYPE=EST
+SEQUENCE    ACC=BG533459.1; NID=g13524999; CLONE=IMAGE:4072143; END=5'; LID=6989; MGC=4557782; SEQTYPE=EST; TRACE=44404609
+SEQUENCE    ACC=BG563731.1; NID=g13571383; CLONE=IMAGE:4712210; END=5'; LID=6989; MGC=4557782; SEQTYPE=EST; TRACE=44153506
+SEQUENCE    ACC=BG568400.1; NID=g13576053; CLONE=IMAGE:4716802; END=5'; LID=6989; MGC=4557782; SEQTYPE=EST; TRACE=44156561
+SEQUENCE    ACC=BG569272.1; NID=g13576925; CLONE=IMAGE:4722638; END=5'; LID=6989; SEQTYPE=EST; TRACE=44157191
+SEQUENCE    ACC=BG569293.1; NID=g13576946; CLONE=IMAGE:4722596; END=5'; LID=6989; MGC=4557782; SEQTYPE=EST; TRACE=44157214
+SEQUENCE    ACC=BG617259.1; NID=g13668630; CLONE=IMAGE:4734378; END=5'; LID=6989; SEQTYPE=EST; TRACE=44229423
+SEQUENCE    ACC=BG618195.1; NID=g13669566; CLONE=IMAGE:4767316; END=5'; LID=6989; MGC=4557782; SEQTYPE=EST; TRACE=45338366
+SEQUENCE    ACC=BG204539.1; NID=g13726226; LID=8655; SEQTYPE=EST
+SEQUENCE    ACC=BC015878.1; NID=g16198419; PID=g16198420; SEQTYPE=mRNA
+SEQUENCE    ACC=D90042.1; NID=g219415; PID=g219416; SEQTYPE=mRNA
+SEQUENCE    ACC=BU624903.1; NID=g23291118; CLONE=UI-H-FG1-bgl-g-02-0-UI; END=3'; LID=11914; SEQTYPE=EST; TRACE=159705553
+//
+ID          Rn.1
+TITLE       Transcribed sequences
+EXPRESS     Mixed tissues 
+STS         ACC=RH128068 UNISTS=211376
+SCOUNT      9
+SEQUENCE    ACC=AA859577.1; NID=g4230123; CLONE=UI-R-E0-bv-c-09-0-UI; END=3'; LID=1127; SEQTYPE=EST; TRACE=154346471
+SEQUENCE    ACC=AW251121.1; NID=g6594732; CLONE=UI-R-BJ0-adi-f-04-0-UI; END=3'; LID=2759; SEQTYPE=EST; TRACE=154371414
+SEQUENCE    ACC=AW252428.1; NID=g6596019; CLONE=UI-R-BJ0-adx-d-05-0-UI; END=3'; LID=2759; SEQTYPE=EST; TRACE=154373036
+SEQUENCE    ACC=BQ194853.1; NID=g20370404; CLONE=UI-R-CN1-cmb-f-16-0-UI; END=3'; LID=10150; SEQTYPE=EST; TRACE=154334854
+SEQUENCE    ACC=BU758764.1; NID=g23721624; CLONE=UI-R-FF0-cow-c-07-0-UI; END=3'; LID=11044; SEQTYPE=EST; TRACE=154339445
+SEQUENCE    ACC=CA510294.1; NID=g25001248; CLONE=UI-R-FS0-cqr-l-23-0-UI; END=3'; LID=12129; SEQTYPE=EST; TRACE=159666883
+SEQUENCE    ACC=CB613849.1; NID=g29573737; CLONE=urrg1-00170-g2; END=5'; LID=12874; SEQTYPE=EST
+SEQUENCE    ACC=CB763094.1; NID=g29851485; CLONE=urrg1-00038-c12; END=5'; LID=12874; SEQTYPE=EST
+SEQUENCE    ACC=CK838684.1; NID=g45188969; CLONE=UI-R-AC1-xo-a-03-0-UI; END=3'; LID=1719; SEQTYPE=EST
+//
+ID          Mm.340763
+TITLE       Transcribed locus, strongly similar to NP_003008.1 splicing factor, arginine/serine-rich 3; splicing factor, arginine//serine-rich, 20-kD [Homo sapiens]
+HOMOL       YES
+EXPRESS     other ; brain ; liver ; eye ; kidney ; testis ; pituitary gland ; whole body ; colon ; muscle ; thymus 
+CHROMOSOME  11
+STS         ACC=- UNISTS=3193
+PROTSIM     ORG=Arabidopsis thaliana; PROTGI=15236000; PROTID=ref:NP_194886.1; PCT=37.25; ALN=95
+PROTSIM     ORG=Caenorhabditis elegans; PROTGI=7496951; PROTID=pir:T34145; PCT=50.33; ALN=131
+PROTSIM     ORG=Drosophila melanogaster; PROTGI=384217; PROTID=prf:1905314A; PCT=67.29; ALN=106
+PROTSIM     ORG=Homo sapiens; PROTGI=139781; PROTID=sp:P23152; PCT=99.26; ALN=136
+PROTSIM     ORG=Mus musculus; PROTGI=111257; PROTID=pir:S14016; PCT=99.26; ALN=136
+PROTSIM     ORG=Rattus norvegicus; PROTGI=1168968; PROTID=sp:Q09167; PCT=48.84; ALN=83
+SCOUNT      31
+SEQUENCE    ACC=BY050916.1; NID=g26156364; CLONE=I730062L23; END=5'; LID=12235; SEQTYPE=EST
+SEQUENCE    ACC=BY066798.1; NID=g26170395; CLONE=I920050M24; END=5'; LID=12249; SEQTYPE=EST
+SEQUENCE    ACC=BY297944.1; NID=g26488281; CLONE=K530357B20; END=5'; LID=12286; SEQTYPE=EST
+SEQUENCE    ACC=BY403069.1; NID=g26632637; CLONE=I730043J07; END=3'; LID=12253; SEQTYPE=EST
+SEQUENCE    ACC=BY417621.1; NID=g26684533; CLONE=I920028M23; END=3'; LID=4141; SEQTYPE=EST
+SEQUENCE    ACC=BY426591.1; NID=g26701831; CLONE=I920085O07; END=3'; LID=12250; SEQTYPE=EST
+SEQUENCE    ACC=BY448670.1; NID=g26740448; CLONE=K630031D11; END=3'; LID=2601; SEQTYPE=EST
+SEQUENCE    ACC=BY677654.1; NID=g27066665; CLONE=K920015D15; END=3'; LID=12304; SEQTYPE=EST
+SEQUENCE    ACC=CB947876.1; NID=g30199635; CLONE=IMAGE:30309766; END=5'; LID=12883; SEQTYPE=EST
+SEQUENCE    ACC=CD775546.1; NID=g32434048; CLONE=UI-M-AQ0-ciz-j-19-0-UI; END=3'; LID=1947; SEQTYPE=EST
+SEQUENCE    ACC=BX630904.1; NID=g33610776; CLONE=LIONp462B02438; END=3'; LID=14219; SEQTYPE=EST
+SEQUENCE    ACC=CF748683.1; NID=g37645028; CLONE=IMAGE:30629174; END=5'; LID=14478; SEQTYPE=EST
+SEQUENCE    ACC=CF750995.1; NID=g37647341; CLONE=IMAGE:30623151; END=5'; LID=14479; SEQTYPE=EST; TRACE=302787286
+SEQUENCE    ACC=CA587449.1; NID=g40792709; LID=12138; SEQTYPE=EST
+SEQUENCE    ACC=CO044451.1; NID=g48584991; CLONE=IMAGE:30657728; END=3'; LID=15581; SEQTYPE=EST
+SEQUENCE    ACC=D28619.1; NID=g618936; CLONE=86F09; LID=240; SEQTYPE=EST
+SEQUENCE    ACC=AA474597.1; NID=g2202752; CLONE=IMAGE:805477; END=5'; LID=850; SEQTYPE=EST
+SEQUENCE    ACC=AA498526.1; NID=g2233549; CLONE=IMAGE:889465; END=5'; LID=609; SEQTYPE=EST
+SEQUENCE    ACC=AA638070.1; NID=g2561658; CLONE=IMAGE:1121421; END=5'; LID=916; SEQTYPE=EST; TRACE=216759053
+SEQUENCE    ACC=AV101859.1; NID=g5249407; CLONE=2410079M03; LID=1882; SEQTYPE=EST
+SEQUENCE    ACC=AV113236.1; NID=g5267316; CLONE=2610020C14; LID=1884; SEQTYPE=EST
+SEQUENCE    ACC=AI840089.1; NID=g5474302; CLONE=UI-M-AL0-abt-e-09-0-UI; END=3'; LID=1937; SEQTYPE=EST; TRACE=158440126
+SEQUENCE    ACC=AI843632.1; NID=g5477845; CLONE=UI-M-AO1-aen-f-09-0-UI; END=3'; LID=1944; SEQTYPE=EST; TRACE=158502493
+SEQUENCE    ACC=AV259282.1; NID=g6246741; CLONE=4930404D15; END=3'; LID=2547; SEQTYPE=EST
+SEQUENCE    ACC=AV290384.1; NID=g6304415; CLONE=5133400P08; END=3'; LID=2560; SEQTYPE=EST
+SEQUENCE    ACC=AW111788.1; NID=g6824501; CLONE=MT1455; END=3'; LID=2483; SEQTYPE=EST
+SEQUENCE    ACC=BE333113.1; NID=g9206889; CLONE=IMAGE:3326433; END=5'; LID=544; SEQTYPE=EST; TRACE=114452409
+SEQUENCE    ACC=BE951610.1; NID=g10591137; CLONE=UI-M-CC0-ayc-f-08-0-UI; END=3'; LID=6769; SEQTYPE=EST; TRACE=158466712
+SEQUENCE    ACC=BE984495.1; NID=g10656785; CLONE=UI-M-CG0p-bgf-c-02-0-UI; END=3'; LID=6780; SEQTYPE=EST; TRACE=158541808
+SEQUENCE    ACC=BU525661.1; NID=g22836102; CLONE=IMAGE:6534145; END=5'; LID=11268; SEQTYPE=EST
+SEQUENCE    ACC=BU554412.1; NID=g22904684; CLONE=IMAGE:6581716; END=5'; LID=11140; SEQTYPE=EST
+//

Added: trunk/packages/bioperl/branches/upstream/current/t/data/urease.tre.nexus
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/urease.tre.nexus	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/urease.tre.nexus	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+#NEXUS 
+
+Begin trees;  [Treefile saved Thu Jun 24 10:15:07 2004]
+[!
+>Data file = /home/jes12/possel/urease/urease.cdna.nex
+>Heuristic search settings:
+>  Optimality criterion = distance (minimum evolution)
+>    Negative branch lengths allowed, but set to zero for tree-score calculation
+>    Distance measure = uncorrected ("p")
+>  Starting tree(s) obtained via neighbor-joining
+>  Branch-swapping algorithm: tree-bisection-reconnection (TBR)
+>  Steepest descent option not in effect
+>  Initial 'MaxTrees' setting = 100
+>  Zero-length branches not collapsed
+>  'MulTrees' option in effect
+>  Topological constraints not enforced
+>  Trees are unrooted
+>
+>Heuristic search completed
+>   Total number of rearrangements tried = 34
+>   Score of best tree(s) found = 1.13334
+>   Number of trees retained = 1
+>   Time used = <1 sec (CPU time = 0.00 sec)
+]
+	Translate
+		1 Anidulans,
+		2 CneoA,
+		3 Cimmitis,
+		4 Mgrisea,
+		5 Ncrassa,
+		6 Spombe
+		;
+tree PAUP_1 = [&U] (((1:0.188380,(4:0.147551,5:0.134318):0.027742):0.012293,3:0.175935):0.012799,2:0.212917,6:0.221404);
+End;

Added: trunk/packages/bioperl/branches/upstream/current/t/data/version2.scf
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/version2.scf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/version3.scf
===================================================================
(Binary files differ)


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/data/version3.scf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: trunk/packages/bioperl/branches/upstream/current/t/data/worm_fam_2785.cdna
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/worm_fam_2785.cdna	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/worm_fam_2785.cdna	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,76 @@
+>CBG10100
+ATGGCGAGTGATATGAGGCACACATCAAGCAACACAAGAAGGCCAAGTATGGGTTCAAAC
+TCCAAATTACTAGCTGGAAAATCCTTTCTCCTAGATATTGGAAATCGACAATGGCGGACA
+AAGGTAGCTGAACGAATTCTGAATTACGGAGCGAACATTATGGATTCGTTCGGAGAAGTA
+GACCCGCATGCTGTTGTCTCTGATAATCCGATGGCATTGAAGTTCGAGAAAAAGGATGGC
+AGTCAACATTTTAATGAAAAAATCAGAACTCCCCAGAGTTTCCTAAAACAAATGGATGCC
+TTCGCCAATAAAAAAGGGAATACCACATCGGCAACAACTTCACTCAGTACCATTCGACCA
+CGGACTGATCTCAAAGCCACTAAAGATACCCTGACAAGAACTGAGACACCCGTTGTGCCT
+CAACGATCCTCAAAGTCCCGTCAATCTTCTGTTCCGAGAACTGCTAAAGTATTTGTGCGA
+GTCGAAGCTCCTACTAAACGACCAGAGATTCGGTGTATACCCAGAACAGCATATGATACT
+CTGTACGGCGGTAGTGATACGGGTCATTCAGTATTCAGAATCGCAGAAACGGCATTGAAA
+GAGAGAAGAGTTCGAGAGTATGATCTATTCGTGAAGGGTCGATACGAGCCACATAAGAAA
+CCTTTCAAAATGGATGAGAAGGATAAATTTTGTCAGTTTTGCGAGAAAGAATACAGTGGA
+GAGAGGAAAGATCACGAAAGAACCGACGAGCATCGAGCCAAAGCCAGAACTCGAGGCCTC
+CTTCCAGCACTTGAAAGAGGAATTTTGAGCGCTCGACTGAAATTGAAGAATAGCCAAAAT
+CAGGTGATGATGGTGGCGAAACGAAAACTGGATGACAACGCCTTTCAAGAACAATCGAAA
+AGAGCACGCGTCGAGTTTGAGTACGGTGAAAATATGGTGAAGGCAGATTGGCAAAGTTTG
+AAAGAGAACAATTTAACAACAGTGATTCCCGAGAAAATTCTGAAAGTGTCGCCCGGAAAA
+GGCTCTGTCCTCTCGCCACGACGTCGTCAGCAACGAATTCAACCTCGCACAAGCCAAGGA
+GACTTGATTTGA
+>F22B7.13
+atggacgtctcttattacgatggtcccaaggatgaagtcgccgaagcaatgctgaaaagc
+gcggtgacggccatgagattgggacaatacgaggatggaaaaggacgcttagaggagata
+atggagttcggaacctcaaattttcaactacttggtacaatctacatgtattacggaaga
+gtgtgcaggcatttgaaccatgatgccaaggccttggagtttttcgaacatgagttgaac
+atgttcaaattgatcttcaactacccagaagcatgtgattccacacgtcgcatcgtcgag
+caggcactcaaaatgggaaagttccccaaggctcgacggtttgctgaggatctcattgat
+tacaccagcaataagaagaacggagagaagtatatcggtcaagctcgaattttgttcgct
+tccgtgtgcctcgaaggatgtgaaagagacgtcgagagtaatcaagatgagaagaagaag
+cttttgtcaatatgtgctgaacagattgcagccgtgaaattgttcaacgagaataatacg
+gaaggagctgtgtctgagaccaaaatcatgttacttgaggcgaaatgcttgtcactagac
+gaaaaatacgaggaatcgcgtcgcaagtatcaagaatgcatcgattttgccatcaaaaca
+gaccagtttgaagcagttcacatcgcctattacgacaaggctctatatgctgagacagat
+cttcttttctttattatcagagatctcagaagtgctctcttctacgccacgaaattcgga
+aaagagcgagatgtagtcaaatataagtcgaagctatccgaagagatgctgagaaatggc
+gaattccacgaagcatatctctacggattggaagcgcttgtatcgattcggaagcttgga
+ttgaacgaatacattggagatgtgttgcttacaatcgcaaagtgcctcattgcacttgga
+aaaagacgccaagctgcttattttatcatcttggggagtgttctgaccatcaaccaaaac
+agtttcaaactgttctacgagcagatcgacgtggcgatgaatcaagagagaagcgaaacg
+gcaactgatcaagatgtatgcctcgcaattgattcgtctcctgatccgacatcctcgaat
+gacatgattaataagttcgtcgtcgaactggagcacgcaacaaatgtggaaacctgggaa
+atgattgtcaacggaatcattgacgaccagaagaaaccagtggcgatcgaaaagaaagag
+aacgaggaacccgtagacatgatggatctcattttcagtatgagctcacgtatggatgat
+caaagaactgaactgcctgctgccagattcattccgcctcgtccagtgtcatcggcatcg
+aaaaagactacaaagagtcacagaatcctccctggactccgtgccaattggacaaaagtg
+cagtcgatgaagttcgatggtcacacaatgaataggatcctgaagaggtcgaagaaaagc
+aaatcgtcattggattctacaaattcgatgcagggcgatgatactcgaagcgatgatgtg
+acaatgacgtccaaatag
+>C38C10.4
+atggacgtctcttattatgatggccctaaggatgaagtcatcgaagcaatgctgaaaagc
+gcggtgacggccatgaaattgggacaatacgaggatggaaaaggacgcttagaggacaca
+atggagttcggaacctccaattttcaactgcttggtacaatctacatgtattacggaaga
+gtgtgcaggcatttgaaccacgacgccaaggccttggagtttttcgaacatgagttgaac
+atgttcaaattgatcttcaactacccagaagcatgtgattccacacgtcgcatcgtccag
+caggcactcaaaatggaaaagttctccaaggctcgacggtttgctgaggatctcattgat
+tacaccagcaataagaagaacggagagaagtatatcggtcaagctcgaattttgttcgct
+tccgtgtgcctcgaaggatgtgaaagagacgtcgagagtaatcaagatgagaagaagaag
+cttttgtcaatatgtgctgaacagattgcagccgtgaaattgttcaacgagaataatacg
+gaaggagctgtgtctgagaccaaaattatgctaattgaggcgaaatgcttgtcactcgac
+gaaaaatacgaggaatcgcgtcgcaagtatcaagaatgcatcgattttgccatcaaaaca
+gaccagtttgaagcagttcacatcgcctattacgacaaggctctctatgctgagacatat
+cttcttttctttattatcagagatctcagaagtgctctcttctacgccacgaaattcgga
+aaagagcgagatgtagtcaaatataagtcgaagctatccgaagagatgctgagaaatggc
+gaattccacgaagcatatctctacggattggaagcgcttgtatcgattcggaagcttgga
+ttgaacgaacacattggagatgtgttgcttacaatcgcaaagtgtctcattgcacttgga
+aaaagacgccaagctgcttattttatcatcttggggagtgttctgaccatcaaccaaagc
+agtttcaaactgttctacgagcagatcgacgtggcgatgaatcaagagagaagcgaaacg
+gcaactgatcaagatgcatgcctcgcaattgattcgtctcctgatccgacatcctcgaat
+gacatgattaataagttcgtcgtcaaactggagcacgcaacaaatgttgaaacctgggaa
+atgattgtcaacggaatcattgaagaccagaagaaaccagtagcgatcgaaaagaaagag
+aacgaggaacccgtagacatgatggatctcattttcagtatgagctcgcgtatggatgat
+cagagaactgaactgtctgctgccagattcattccgcctcgtccagtgtcatcggcatcg
+aaaaagactacaaagagccacagaattctccctggactccgtgccaattggacaaaagtg
+cagtcgatgaagttcgatggtcacacaatgaataggatcctgaagaggtcgaagaaaagc
+aaatcgtcattggattctacaaattcgatacagggcgatgatactcgaagcgatgatgtg
+acaatgacgtccaaatag

Added: trunk/packages/bioperl/branches/upstream/current/t/data/yeast.tRNAscanSE
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/yeast.tRNAscanSE	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/yeast.tRNAscanSE	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,298 @@
+Sequence		tRNA 	Bounds	tRNA	   	Intron Bounds	Cove
+Name    	tRNA #	Begin	End  	Type	Codon	Begin	End	Score
+--------	------	---- 	------	----	-----	-----	----	------
+I       	1	139154	139256	Pro	CCA	139190	139220	60.49
+I       	2	166267	166339	Ala	GCA	0	0	76.57
+I       	3	181135	181248	Leu	TTG	181173	181204	53.68
+I       	4	182597	182516	Ser	TCT	0	0	85.00
+II      	1	9583 	9666 	Leu	TTA	0	0	69.31
+II      	2	36390	36480	Phe	TTC	36427	36444	68.89
+II      	3	197456	197529	Ile	ATT	0	0	71.40
+II      	4	227037	227118	Ser	TCT	0	0	85.00
+II      	5	266337	266409	Thr	ACT	0	0	78.63
+II      	6	347562	347645	Leu	TTA	0	0	69.31
+II      	7	350786	350857	Gln	CAA	0	0	65.72
+II      	8	405836	405907	Arg	AGA	0	0	66.88
+II      	9	405918	405989	Asp	GAC	0	0	58.36
+II      	10	645124	645195	Glu	GAA	0	0	62.99
+II      	11	643035	642964	Cys	TGC	0	0	75.04
+II      	12	326825	326752	Val	GTA	0	0	74.06
+II      	13	197661	197591	Gly	GGC	0	0	62.73
+III     	1	90858	90971	Leu	TTG	90896	90927	54.18
+III     	2	127710	127783	Asn	AAC	0	0	77.73
+III     	3	149914	149985	Met	ATG	0	0	74.96
+III     	4	227934	228034	Ser	TCG	227971	227989	75.88
+III     	5	295477	295549	Thr	ACT	0	0	78.63
+III     	6	168364	168293	Gln	CAA	0	0	68.39
+III     	7	151350	151278	Lys	AAG	0	0	77.79
+III     	8	142765	142695	Gly	GGC	0	0	62.73
+III     	9	123642	123571	Pro	CCT	0	0	66.99
+III     	10	82532	82461	Glu	GAA	0	0	62.99
+IV      	1	83549	83619	Gly	GGC	0	0	62.73
+IV      	2	359576	359671	Lys	AAA	359613	359635	69.00
+IV      	3	410375	410447	Ala	GCT	0	0	74.48
+IV      	4	437768	437849	Ser	TCT	0	0	85.00
+IV      	5	519737	519820	Leu	TTA	0	0	69.31
+IV      	6	520966	521037	Gln	CAA	0	0	68.39
+IV      	7	568877	568948	Arg	AGA	0	0	66.88
+IV      	8	568959	569030	Asp	GAC	0	0	58.36
+IV      	9	884353	884485	Ile	ATA	884390	884449	61.82
+IV      	10	946303	946391	Tyr	TAC	946342	946355	68.81
+IV      	11	992823	992893	Gly	GGC	0	0	62.73
+IV      	12	1175818	1175890	Met	ATG	0	0	70.03
+IV      	13	1201738	1201810	Lys	AAG	0	0	77.79
+IV      	14	1305618	1305700	Ser	TCT	0	0	84.84
+IV      	15	1461815	1461701	Leu	TTG	1461777	1461745	54.09
+IV      	16	1352526	1352454	Lys	AAG	0	0	77.79
+IV      	17	1257067	1256996	Gly	GGG	0	0	75.60
+IV      	18	1150930	1150831	Undet	???	0	0	57.54
+IV      	19	1095453	1095362	Phe	TTC	1095416	1095398	71.83
+IV      	20	1075535	1075463	Val	GTG	0	0	75.97
+IV      	21	1017269	1017198	Glu	GAG	0	0	59.79
+IV      	22	981046	980965	Ser	TCT	0	0	85.00
+IV      	23	802795	802724	Gln	CAA	0	0	68.39
+IV      	24	668073	668000	Ile	ATT	0	0	71.40
+IV      	25	645217	645146	Gln	CAA	0	0	68.39
+IV      	26	620034	619962	Arg	CGT	0	0	71.46
+IV      	27	488865	488792	Val	GTA	0	0	74.06
+IV      	28	434332	434260	Thr	ACT	0	0	78.63
+V       	1	86604	86685	Ser	TCT	0	0	85.00
+V       	2	100133	100204	Met	ATG	0	0	74.96
+V       	3	138666	138737	Arg	AGA	0	0	66.88
+V       	4	177098	177169	Glu	GAA	0	0	62.99
+V       	5	207356	207427	His	CAC	0	0	66.37
+V       	6	250285	250356	Gln	CAA	0	0	68.39
+V       	7	354930	355001	Glu	GAA	0	0	62.99
+V       	8	438696	438769	Val	GTT	0	0	68.42
+V       	9	469452	469525	Val	GTT	0	0	68.42
+V       	10	551353	551280	Ile	ATT	0	0	71.40
+V       	11	492419	492347	Arg	CGT	0	0	71.46
+V       	12	487397	487326	Glu	GAA	0	0	62.99
+V       	13	443271	443198	Ile	ATT	0	0	71.40
+V       	14	435820	435748	Lys	AAG	0	0	77.79
+V       	15	434608	434537	His	CAC	0	0	66.37
+V       	16	312091	312019	Ala	GCA	0	0	76.57
+V       	17	288522	288441	Ser	TCA	0	0	84.16
+V       	18	135497	135425	Lys	AAG	0	0	77.79
+V       	19	131153	131082	Gln	CAA	0	0	68.78
+V       	20	61960	61890	Gly	GGC	0	0	62.73
+VI      	1	101370	101472	Pro	CCA	101406	101436	60.63
+VI      	2	162221	162291	Gly	GGC	0	0	62.73
+VI      	3	167429	167517	Tyr	TAC	167468	167481	68.18
+VI      	4	226746	226675	Lys	AAG	0	0	70.32
+VI      	5	210694	210606	Tyr	TAC	210655	210642	68.18
+VI      	6	204983	204911	Ala	GCT	0	0	74.48
+VI      	7	191600	191500	Ser	AGC	191563	191545	68.67
+VI      	8	181035	180965	Gly	GGC	0	0	62.73
+VI      	9	158000	157909	Phe	TTC	157963	157945	69.79
+VI      	10	137553	137480	Asn	AAC	0	0	77.73
+VII     	1	115490	115585	Lys	AAA	115527	115549	69.00
+VII     	2	122271	122343	Lys	AAG	0	0	77.79
+VII     	3	205525	205638	Leu	TTG	205563	205594	53.40
+VII     	4	319782	319853	His	CAC	0	0	66.37
+VII     	5	328584	328655	Glu	GAA	0	0	62.99
+VII     	6	405471	405542	Arg	AGA	0	0	66.88
+VII     	7	423096	423209	Leu	TTG	423134	423165	54.18
+VII     	8	531611	531682	Asp	GAC	0	0	58.36
+VII     	9	541851	541922	Glu	GAA	0	0	62.99
+VII     	10	661750	661821	Thr	ACA	0	0	77.84
+VII     	11	700676	700757	Leu	CTC	0	0	64.51
+VII     	12	731139	731212	Asn	AAC	0	0	77.73
+VII     	13	736342	736413	Arg	AGA	0	0	66.88
+VII     	14	739124	739197	Ile	ATT	0	0	71.40
+VII     	15	774351	774423	Ala	GCT	0	0	74.48
+VII     	16	779618	779689	Gly	GGA	0	0	66.72
+VII     	17	794419	794491	Ala	GCA	0	0	76.57
+VII     	18	823484	823557	Val	GTT	0	0	68.42
+VII     	19	845651	845721	Gly	GGC	0	0	62.73
+VII     	20	876396	876468	Lys	AAG	0	0	77.79
+VII     	21	1004220	1004291	Thr	ACA	0	0	77.84
+VII     	22	931023	930953	Gly	GGC	0	0	62.73
+VII     	23	878817	878712	Trp	TGG	878781	878748	67.27
+VII     	24	857493	857380	Leu	TTG	857455	857424	54.18
+VII     	25	828796	828725	Arg	AGA	0	0	66.88
+VII     	26	707180	707109	Cys	TGC	0	0	75.04
+VII     	27	701049	700954	Lys	AAA	701012	700990	69.64
+VII     	28	561744	561663	Ser	TCT	0	0	85.00
+VII     	29	544649	544578	Asp	GAC	0	0	58.36
+VII     	30	440810	440719	Phe	TTC	440773	440755	69.79
+VII     	31	412368	412295	Val	GTT	0	0	68.42
+VII     	32	401599	401528	Glu	GAA	0	0	62.99
+VII     	33	287459	287354	Trp	TGG	287423	287390	67.27
+VII     	34	185790	185718	Lys	AAG	0	0	77.79
+VII     	35	110698	110627	His	CAC	0	0	66.37
+VII     	36	73900	73827	Val	GTT	0	0	68.42
+VIII    	1	62752	62823	His	CAC	0	0	66.37
+VIII    	2	134312	134383	Gln	CAA	0	0	68.39
+VIII    	3	466986	467057	Thr	ACA	0	0	79.33
+VIII    	4	475774	475702	Val	GTG	0	0	75.97
+VIII    	5	388996	388894	Pro	CCA	388960	388930	59.08
+VIII    	6	358571	358480	Phe	TTC	358534	358516	69.79
+VIII    	7	237940	237849	Phe	TTC	237903	237885	69.79
+VIII    	8	146305	146233	Ala	GCT	0	0	74.48
+VIII    	9	133098	133017	Ser	TCT	0	0	85.00
+VIII    	10	116172	116100	Thr	ACT	0	0	78.63
+VIII    	11	85366	85293	Val	GTT	0	0	68.42
+IX      	1	175028	175100	Thr	ACT	0	0	78.63
+IX      	2	197589	197660	Glu	GAG	0	0	59.79
+IX      	3	210662	210735	Ile	ATT	0	0	71.40
+IX      	4	248847	248928	Ser	TCA	0	0	84.16
+IX      	5	325746	325818	Thr	ACT	0	0	78.63
+IX      	6	370414	370485	Glu	GAA	0	0	62.99
+IX      	7	336417	336346	Asp	GAC	0	0	58.36
+IX      	8	324372	324301	Asp	GAC	0	0	58.36
+IX      	9	300298	300227	Lys	AAG	0	0	70.32
+IX      	10	183510	183437	Ile	ATT	0	0	71.40
+X       	1	204432	204503	Asp	GAC	0	0	58.36
+X       	2	233636	233708	Arg	CGT	0	0	70.24
+X       	3	353940	354028	Tyr	TAC	353979	353992	68.18
+X       	4	355070	355141	Arg	AGA	0	0	66.88
+X       	5	355152	355223	Asp	GAC	0	0	58.36
+X       	6	378056	378129	Val	GTT	0	0	68.42
+X       	7	414659	414731	Lys	AAG	0	0	77.79
+X       	8	422630	422702	Met	ATG	0	0	70.03
+X       	9	531521	531591	Gly	GGC	0	0	62.73
+X       	10	538248	538319	Arg	AGG	0	0	71.23
+X       	11	541201	541272	Asp	GAC	0	0	58.36
+X       	12	617612	617712	Leu	CTA	617650	617668	61.03
+X       	13	542737	542649	Tyr	TAC	542698	542685	68.81
+X       	14	523786	523705	Ser	TCT	0	0	85.00
+X       	15	517577	517506	Met	ATG	0	0	74.96
+X       	16	424208	424125	Leu	TTA	0	0	69.31
+X       	17	415729	415624	Trp	TGG	415693	415660	67.27
+X       	18	396492	396422	Gly	GGC	0	0	62.73
+X       	19	390811	390739	Met	ATG	0	0	70.03
+X       	20	374273	374202	Arg	AGA	0	0	66.88
+X       	21	374191	374120	Asp	GAC	0	0	58.36
+X       	22	197084	197012	Ala	GCT	0	0	74.48
+X       	23	116004	115933	Glu	GAA	0	0	62.99
+X       	24	59172	59100	Thr	ACT	0	0	78.63
+XI      	1	74625	74698	Asn	AAC	0	0	77.73
+XI      	2	84209	84292	Leu	TTA	0	0	69.31
+XI      	3	202641	202713	Lys	AAG	0	0	77.79
+XI      	4	219537	219609	Ala	GCT	0	0	74.48
+XI      	5	302559	302664	Trp	TGG	302595	302628	67.27
+XI      	6	379318	379391	Val	GTT	0	0	68.42
+XI      	7	458195	458308	Leu	TTG	458233	458264	54.18
+XI      	8	490606	490678	Arg	CGT	0	0	71.46
+XI      	9	517625	517697	Ala	GCT	0	0	74.48
+XI      	10	578601	578696	Lys	AAA	578638	578660	69.00
+XI      	11	513040	512969	Asp	GAC	0	0	58.36
+XI      	12	313113	313042	His	CAC	0	0	66.37
+XI      	13	307858	307785	Val	GTT	0	0	68.42
+XI      	14	162559	162488	Arg	AGA	0	0	66.88
+XI      	15	141090	141019	Glu	GAA	0	0	62.99
+XI      	16	46807	46736	Thr	ACG	0	0	75.96
+XII     	1	92547	92649	Pro	CCA	92583	92613	59.86
+XII     	2	374356	374428	Arg	CGT	0	0	71.46
+XII     	3	427133	427204	Asp	GAC	0	0	58.36
+XII     	4	628383	628497	Leu	TTG	628421	628453	55.72
+XII     	5	732090	732190	Leu	CTA	732128	732146	61.03
+XII     	6	734802	734875	Ile	ATT	0	0	71.40
+XII     	7	784354	784453	Undet	???	0	0	57.54
+XII     	8	793918	793989	Asp	GAC	0	0	58.36
+XII     	9	797178	797249	Glu	GAA	0	0	62.99
+XII     	10	818609	818680	Arg	CGG	0	0	57.65
+XII     	11	875375	875470	Lys	AAA	875412	875434	69.00
+XII     	12	1052140	1052067	Ile	ATT	0	0	71.40
+XII     	13	976051	975978	Asn	AAC	0	0	77.73
+XII     	14	963050	962967	Leu	TTA	0	0	69.31
+XII     	15	687932	687859	Val	GTT	0	0	68.42
+XII     	16	657006	656934	Ala	GCT	0	0	74.48
+XII     	17	605432	605300	Ile	ATA	605395	605336	61.58
+XII     	18	592619	592519	Leu	CTA	592581	592563	60.39
+XII     	19	448722	448651	Gln	CAA	0	0	68.39
+XII     	20	214956	214884	Ala	GCA	0	0	76.57
+XII     	21	168024	167943	Ser	TCT	0	0	85.00
+XIII    	1	168795	168883	Tyr	TAC	168834	168847	68.18
+XIII    	2	290800	290871	Glu	GAA	0	0	57.99
+XIII    	3	352279	352369	Phe	TTC	352316	352333	69.38
+XIII    	4	363063	363134	His	CAC	0	0	66.37
+XIII    	5	463553	463624	Asp	GAC	0	0	58.36
+XIII    	6	504894	505007	Leu	TTG	504932	504963	54.18
+XIII    	7	837928	838016	Tyr	TAC	837967	837980	68.18
+XIII    	8	808317	808246	Gln	CAG	0	0	64.54
+XIII    	9	768440	768368	Ala	GCT	0	0	74.48
+XIII    	10	747962	747891	Arg	AGA	0	0	66.88
+XIII    	11	586708	586635	Val	GTT	0	0	68.42
+XIII    	12	572954	572882	Met	ATG	0	0	70.03
+XIII    	13	480692	480620	Lys	AAG	0	0	77.79
+XIII    	14	420660	420587	Val	GTT	0	0	68.42
+XIII    	15	379407	379302	Trp	TGG	379371	379338	67.27
+XIII    	16	372517	372444	Val	GTT	0	0	68.42
+XIII    	17	321218	321146	Ala	GCT	0	0	74.48
+XIII    	18	259239	259158	Ser	TCT	0	0	85.00
+XIII    	19	196170	196068	Pro	CCA	196134	196104	60.49
+XIII    	20	183968	183898	Gly	GGC	0	0	62.73
+XIII    	21	131896	131825	Arg	AGA	0	0	66.88
+XIV     	1	102714	102787	Asn	AAC	0	0	77.73
+XIV     	2	104803	104875	Thr	ACT	0	0	78.63
+XIV     	3	374866	374956	Phe	TTC	374903	374920	70.15
+XIV     	4	443003	443116	Leu	TTG	443041	443072	54.18
+XIV     	5	547090	547192	Pro	CCA	547126	547156	59.08
+XIV     	6	568111	568213	Pro	CCA	568147	568177	59.08
+XIV     	7	602308	602381	Ile	ATT	0	0	71.40
+XIV     	8	632596	632669	Asn	AAC	0	0	77.73
+XIV     	9	726213	726130	Leu	TTA	0	0	69.31
+XIV     	10	631914	631843	Pro	CCT	0	0	66.99
+XIV     	11	569936	569863	Ile	ATT	0	0	71.40
+XIV     	12	560761	560689	Thr	ACT	0	0	78.63
+XIV     	13	519165	519095	Asp	GAC	0	0	31.75
+XIV     	14	96310	96239	Gly	GGA	0	0	66.72
+XV      	1	113801	113873	Thr	ACT	0	0	78.63
+XV      	2	228329	228402	Asn	AAC	0	0	77.73
+XV      	3	274673	274773	Ser	AGC	274710	274728	68.67
+XV      	4	282164	282234	Gly	GGC	0	0	62.73
+XV      	5	288192	288280	Tyr	TAC	288231	288244	68.81
+XV      	6	301097	301198	Pro	CCA	301133	301162	58.94
+XV      	7	438644	438739	Lys	AAA	438681	438703	69.00
+XV      	8	464451	464552	Pro	CCA	464487	464516	59.59
+XV      	9	487440	487513	Asn	AAC	0	0	77.73
+XV      	10	571958	572029	Asp	GAC	0	0	58.36
+XV      	11	710201	710272	Met	ATG	0	0	74.96
+XV      	12	976414	976486	Met	ATG	0	0	70.03
+XV      	13	980676	980780	Pro	CCA	980712	980744	55.42
+XV      	14	854256	854184	Ala	GCA	0	0	76.57
+XV      	15	663885	663812	Val	GTT	0	0	67.95
+XV      	16	594425	594354	Gly	GGG	0	0	80.95
+XV      	17	354112	354040	Thr	ACT	0	0	78.63
+XV      	18	340371	340299	Arg	CGT	0	0	71.46
+XV      	19	226679	226609	Gly	GGC	0	0	62.73
+XV      	20	111032	110961	Gly	GGA	0	0	66.72
+XVI     	1	572264	572334	Gly	GGC	0	0	62.73
+XVI     	2	582057	582129	Lys	AAG	0	0	77.79
+XVI     	3	689560	689641	Ser	TCA	0	0	84.16
+XVI     	4	744281	744352	Thr	ACA	0	0	77.84
+XVI     	5	810671	810744	Asn	AAC	0	0	77.73
+XVI     	6	819524	819597	Ile	ATT	0	0	71.40
+XVI     	7	856897	856969	Ala	GCT	0	0	74.48
+XVI     	8	860374	860444	Gly	GGC	0	0	62.73
+XVI     	9	880364	880291	Ile	ATT	0	0	71.40
+XVI     	10	775833	775762	Cys	TGC	0	0	75.04
+XVI     	11	769299	769204	Lys	AAA	769262	769240	69.00
+XVI     	12	622626	622535	Phe	TTC	622589	622571	69.79
+XVI     	13	560284	560193	Phe	TTC	560247	560229	69.79
+XVI     	14	435959	435888	Cys	TGC	0	0	75.04
+XVI     	15	338918	338847	Met	ATG	0	0	74.96
+XVI     	16	210262	210191	Glu	GAA	0	0	62.99
+XVI     	17	56274	56169	Trp	TGG	56238	56205	67.27
+XVII    	1	9374 	9444 	SeC	TGA	0	0	51.26
+XVII    	2	35373	35444	Glu	GAA	0	0	38.30
+XVII    	3	48201	48287	Ser	TCA	0	0	51.45
+XVII    	4	63862	63934	Thr	ACA	0	0	65.36
+XVII    	5	64597	64667	His	CAC	0	0	48.69
+XVII    	6	66095	66176	Pseudo	TTA	0	0	37.52
+XVII    	7	66210	66282	Pseudo	CAA	0	0	29.02
+XVII    	8	67061	67132	Lys	AAA	0	0	34.97
+XVII    	9	67309	67381	Arg	AGA	0	0	58.95
+XVII    	10	67468	67539	Pseudo	GGA	0	0	46.63
+XVII    	11	68322	68393	Pseudo	GAC	0	0	28.24
+XVII    	12	69289	69359	Arg	CGT	0	0	42.27
+XVII    	13	69846	69918	Ala	GCA	0	0	34.42
+XVII    	14	70162	70234	Ile	ATC	0	0	43.88
+XVII    	15	71433	71504	Asn	AAC	0	0	51.73
+XVII    	16	72632	72705	Met	ATG	0	0	50.11
+XVII    	17	77431	77502	Phe	TTC	0	0	46.41
+XVII    	18	78533	78605	Val	GTA	0	0	63.19
+XVII    	19	85035	85107	Pseudo	ATG	0	0	38.75
+XVII    	20	78162	78091	Pseudo	TAA	0	0	39.47

Added: trunk/packages/bioperl/branches/upstream/current/t/data/yn00.mlc
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/data/yn00.mlc	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/data/yn00.mlc	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,106 @@
+YN00 examples/abglobin.nuc
+
+ns =   5	ls = 285
+
+Codon position x base (3x4) table for each sequence.
+
+human               
+position  1:    T:0.12982    C:0.25965    A:0.21404    G:0.39649
+position  2:    T:0.29474    C:0.26667    A:0.30526    G:0.13333
+position  3:    T:0.18947    C:0.41404    A:0.03509    G:0.36140
+
+goat-cow            
+position  1:    T:0.13684    C:0.23509    A:0.22807    G:0.40000
+position  2:    T:0.30526    C:0.24211    A:0.31228    G:0.14035
+position  3:    T:0.21754    C:0.39649    A:0.03509    G:0.35088
+
+rabbit              
+position  1:    T:0.14386    C:0.24912    A:0.24561    G:0.36140
+position  2:    T:0.29474    C:0.23860    A:0.32982    G:0.13684
+position  3:    T:0.19298    C:0.40351    A:0.04912    G:0.35439
+
+rat                 
+position  1:    T:0.14737    C:0.22807    A:0.24561    G:0.37895
+position  2:    T:0.28070    C:0.23860    A:0.32632    G:0.15439
+position  3:    T:0.25965    C:0.38596    A:0.07018    G:0.28421
+
+marsupial           
+position  1:    T:0.17895    C:0.22105    A:0.24561    G:0.35439
+position  2:    T:0.28772    C:0.27018    A:0.30877    G:0.13333
+position  3:    T:0.25965    C:0.38596    A:0.06316    G:0.29123
+
+Average
+position  1:    T:0.14737    C:0.23860    A:0.23579    G:0.37825
+position  2:    T:0.29263    C:0.25123    A:0.31649    G:0.13965
+position  3:    T:0.22386    C:0.39719    A:0.05053    G:0.32842
+
+Codon usage for each species
+--------------------------------------------------------------------------------------------------
+Phe TTT  5  8  3  3  6 | Ser TCT  4  2  6  7  6 | Tyr TAT  3  2  3  1  1 | Cys TGT  2  1  1  2  2
+    TTC 10  9 13 11  8 |     TCC  6  7  7  3  8 |     TAC  3  3  3  5  5 |     TGC  1  1  1  3  3
+Leu TTA  0  0  0  0  0 |     TCA  0  0  0  0  1 | *** TAA  0  0  0  0  0 | *** TGA  0  0  0  0  0
+    TTG  0  2  1  4  5 |     TCG  0  1  0  0  2 |     TAG  0  0  0  0  0 | Trp TGG  3  3  3  3  4
+--------------------------------------------------------------------------------------------------
+Leu CTT  1  1  0  1  3 | Pro CCT  7  2  4  7  3 | His CAT  2  4  4  5  6 | Arg CGT  1  2  1  2  1
+    CTC  5  4  4  4  7 |     CCC  3  6  5  4  7 |     CAC 16 11 15 14 12 |     CGC  0  1  0  0  0
+    CTA  1  2  0  2  1 |     CCA  2  0  1  0  0 | Gln CAA  0  0  0  0  1 |     CGA  0  0  0  0  0
+    CTG 29 28 30 21 15 |     CCG  2  2  1  0  0 |     CAG  4  4  5  5  7 |     CGG  1  0  1  0  0
+--------------------------------------------------------------------------------------------------
+Ile ATT  0  0  1  4  1 | Thr ACT  3  4  4  3 10 | Asn AAT  1  5  5  3  2 | Ser AGT  2  3  5  2  1
+    ATC  0  0  3  2  7 |     ACC 12 11 12  9  9 |     AAC  9  7  7  8  4 |     AGC  4  4  3  5  3
+    ATA  0  0  0  1  0 |     ACA  1  0  0  1  0 | Lys AAA  4  3  5  5  3 | Arg AGA  0  1  0  0  2
+Met ATG  3  3  2  4  5 |     ACG  0  0  0  0  0 |     AAG 18 21 19 19 21 |     AGG  4  3  4  4  2
+--------------------------------------------------------------------------------------------------
+Val GTT  5  5  4  4  6 | Ala GCT  8 11  8 13 10 | Asp GAT  5  8  1 11  6 | Gly GGT  5  4  5  6 10
+    GTC  4  6  2  4  3 |     GCC 21 18 16 18 19 |     GAC 10 10 10  8 10 |     GGC 14 15 14 12  5
+    GTA  0  0  0  1  1 |     GCA  0  1  1  3  2 | Glu GAA  2  2  7  4  4 |     GGA  0  1  0  3  3
+    GTG 21 19 21 14 14 |     GCG  7  4  3  0  0 |     GAG 10  9 10  5  6 |     GGG  1  1  1  2  2
+--------------------------------------------------------------------------------------------------
+
+
+Sums
+--------------------------------------------------
+Phe TTT 25 | Ser TCT 25 | Tyr TAT 10 | Cys TGT  8
+    TTC 51 |     TCC 31 |     TAC 19 |     TGC  9
+Leu TTA  0 |     TCA  1 | *** TAA  0 | *** TGA  0
+    TTG 12 |     TCG  3 |     TAG  0 | Trp TGG 16
+--------------------------------------------------
+Leu CTT  6 | Pro CCT 23 | His CAT 21 | Arg CGT  7
+    CTC 24 |     CCC 25 |     CAC 68 |     CGC  1
+    CTA  6 |     CCA  3 | Gln CAA  1 |     CGA  0
+    CTG 123 |     CCG  5 |     CAG 25 |     CGG  2
+--------------------------------------------------
+Ile ATT  6 | Thr ACT 24 | Asn AAT 16 | Ser AGT 13
+    ATC 12 |     ACC 53 |     AAC 35 |     AGC 19
+    ATA  1 |     ACA  2 | Lys AAA 20 | Arg AGA  3
+Met ATG 17 |     ACG  0 |     AAG 98 |     AGG 17
+--------------------------------------------------
+Val GTT 24 | Ala GCT 50 | Asp GAT 31 | Gly GGT 30
+    GTC 19 |     GCC 92 |     GAC 48 |     GGC 60
+    GTA  2 |     GCA  7 | Glu GAA 19 |     GGA  7
+    GTG 89 |     GCG 14 |     GAG 40 |     GGG  7
+--------------------------------------------------
+
+
+Nei & Gojobori 1986. dN/dS (dN, dS)
+human          
+goat-cow         0.251(0.0863 0.3443)
+rabbit           0.263(0.0867 0.3301)  0.294(0.1054 0.3581)
+rat              0.204(0.1261 0.6164)  0.246(0.1493 0.6065)  0.218(0.1348 0.6187)
+marsupial        0.190(0.1931 1.0148)  0.189(0.1910 1.0099)  0.218(0.2111 0.9668)  0.272(0.2404 0.8852)
+
+Estimation by the method of Yang & Nielsen (2000):
+(equal weighting of pathways)
+
+seq. seq.     S       N        t   kappa   omega     dN +- SE    dS +- SE
+
+   2    1   183.7   671.3   0.5169  1.5804  0.1625 0.0818 +- 0.0115  0.5031 +- 0.0930
+   3    1   177.5   677.5   0.5033  1.5804  0.1642 0.0815 +- 0.0114  0.4967 +- 0.0927
+   3    2   180.1   674.9   0.5627  1.5804  0.1929 0.0997 +- 0.0128  0.5169 +- 0.0925
+   4    1   191.3   663.7   0.9075  1.5804  0.1308 0.1216 +- 0.0144  0.9301 +- 0.1987
+   4    2   192.4   662.6   0.9642  1.5804  0.1557 0.1447 +- 0.0159  0.9297 +- 0.2091
+   4    3   187.0   668.0   0.9790  1.5804  0.1262 0.1298 +- 0.0149  1.0286 +- 0.2614
+   5    1   189.3   665.7   2.1638  1.5804  0.0711 0.1853 +- 0.0184  2.6055 +- 3.9344
+   5    2   190.4   664.6   1.2984  1.5804  0.1416 0.1842 +- 0.0184  1.3008 +- 0.1995
+   5    3   185.2   669.8   1.3214  1.5804  0.1554 0.2023 +- 0.0194  1.3020 +- 0.2026
+   5    4   194.5   660.5   1.5554  1.5804  0.1591 0.2354 +- 0.0215  1.4797 +- 0.5173

Added: trunk/packages/bioperl/branches/upstream/current/t/ePCR.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ePCR.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ePCR.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    use vars qw($NUMTESTS $DEBUG);
+    $NUMTESTS = 25;
+    $DEBUG   = 1;
+    plan test => $NUMTESTS;
+}
+
+use Bio::Tools::EPCR;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $seqio = new Bio::SeqIO('-format' => 'fasta', '-file' => Bio::Root::IO->catfile("t","data","genomic-seq.fasta"));
+
+my $seq = $seqio->next_seq;
+ok($seq);
+my $epcr = new Bio::Tools::EPCR( '-file' => Bio::Root::IO->catfile("t","data","genomic-seq.epcr"));
+ok ($epcr);
+my %strand;
+while( defined(my $feature = $epcr->next_feature) ) {
+    ok($feature);
+    ok($feature->start);
+    ok($feature->end);
+    $seq->add_SeqFeature($feature);
+    $strand{$feature->strand} ++;
+}
+ok ($strand{1},  3 , 'expected 3 forward strand ePCR hits');
+ok ($strand{-1}, 3 , 'expected 3 reverse strand ePCR hits');
+
+
+if( $DEBUG ) {
+    $seqio = new Bio::SeqIO('-format' => 'genbank' );
+    $seqio->write_seq($seq);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/embl.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/embl.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/embl.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,188 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: embl.t,v 1.5.2.1 2006/10/31 15:48:58 cjfields Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 56;
+}
+
+use Bio::Seq;
+use Bio::SeqIO;
+use Bio::Annotation::Collection;
+
+ok(1);
+
+# Set to -1 for release version, so warnings aren't printed
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $ast = Bio::SeqIO->new( -format => 'embl',
+			   -verbose => $verbose,
+			   -file => Bio::Root::IO->catfile
+			   ("t","data","roa1.dat"));
+$ast->verbose($verbose);
+my $as = $ast->next_seq();
+ok defined $as->seq;
+ok($as->display_id, 'HSHNCPA1');
+ok($as->accession_number, 'X79536');
+ok($as->seq_version, 1);
+ok($as->version, 1);
+ok($as->desc, 'H.sapiens mRNA for hnRNPcore protein A1');
+ok($as->molecule, 'RNA');
+ok($as->alphabet, 'rna');
+ok(scalar $as->all_SeqFeatures(), 4);
+ok($as->length, 1198);
+ok($as->species->binomial(), 'Homo sapiens');
+
+# EMBL Release 87 changes (8-17-06)
+
+$ast = Bio::SeqIO->new( -format => 'embl',
+			   -verbose => $verbose,
+			   -file => Bio::Root::IO->catfile
+			   ("t","data","roa1_v2.dat"));
+$ast->verbose($verbose);
+$as = $ast->next_seq();
+ok defined $as->seq;
+# accession # same as display name now
+ok($as->display_id, 'X79536'); 
+ok($as->accession_number, 'X79536');
+ok($as->seq_version, 1);
+ok($as->version, 1);
+ok($as->desc, 'H.sapiens mRNA for hnRNPcore protein A1');
+# mRNA instead of RNA
+ok($as->molecule, 'mRNA');
+ok($as->alphabet, 'rna');
+ok(scalar $as->all_SeqFeatures(), 4);
+ok($as->length, 1198);
+ok($as->species->binomial(), 'Homo sapiens');
+
+my $ent = Bio::SeqIO->new( -file => Bio::Root::IO->catfile
+									("t","data","test.embl"),
+									-format => 'embl');
+my $seq = $ent->next_seq();
+
+ok(defined $seq->seq(), 1,
+   'failure to read Embl with ^ location and badly split double quotes');
+ok(scalar $seq->annotation->get_Annotations('reference'), 3);
+
+my $out = Bio::SeqIO->new(-file=> ">embl.out",
+							  -format => 'embl');
+ok($out->write_seq($seq),1,
+   'failure to write Embl format with ^ < and > locations');
+unlink("embl.out");
+
+# embl with no FT
+$ent = Bio::SeqIO->new( -file => Bio::Root::IO->catfile
+								("t","data","test.embl"),
+								-format => 'embl');
+
+
+# embl with no FH
+my $noFH = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+								("t","data","no_FH.embl"),
+								-format => 'embl');
+ok(scalar($noFH->next_seq->get_SeqFeatures), 4);
+
+
+$seq = $ent->next_seq();
+ok($seq);
+ok(lc($seq->subseq(1,10)),'gatcagtaga');
+ok($seq->length);
+
+# bug 1571
+$ent = Bio::SeqIO->new(-format => 'embl',
+							  -file   => Bio::Root::IO->catfile
+							  (qw(t data test.embl2sq)));
+ok($ent->next_seq->length,4877);
+
+# embl repbase
+$ent = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+							  ("t","data","BEL16-LTR_AG.embl"), -format => 'embl');
+$seq = $ent->next_seq;
+ok($seq->display_id,'BEL16-LTR_AG');
+
+# test secondary accessions in EMBL (bug #1332)
+my $seqio = new Bio::SeqIO(-format => 'embl',
+									-file => Bio::Root::IO->catfile
+									( qw(t data ECAPAH02.embl)));
+$seq = $seqio->next_seq;
+ok($seq->accession_number, 'D10483');
+ok($seq->seq_version, 2);
+my @accs = $seq->get_secondary_accessions();
+ok($accs[0], 'J01597');
+ok($accs[-1], 'X56742');
+
+### TPA TESTS - Thanks to Richard Adams ###
+# test Third Party Annotation entries in EMBL/Gb format 
+# to ensure compatability with parsers.
+my $str = new Bio::SeqIO(-format =>'embl',
+								 -file => Bio::Root::IO->catfile
+								 ( qw(t data BN000066-tpa.embl)));
+$seq = $str->next_seq;
+ok(defined $seq);
+ok($seq->accession_number, 'BN000066');
+ok($seq->alphabet, 'dna');
+ok($seq->display_id, 'AGA000066');
+ok($seq->length, 5195);
+ok($seq->division, 'INV');
+ok($seq->get_dates, 2);
+ok($seq->keywords, 'acetylcholinesterase; achE1 gene; Third Party Annotation; TPA.');
+ok($seq->seq_version, 1);
+ok($seq->feature_count, 15);
+
+my $spec_obj = $seq->species;
+ok ($spec_obj->common_name, 'African malaria mosquito');
+ok ($spec_obj->species, 'gambiae');
+ok ($spec_obj->genus, 'Anopheles');
+ok ($spec_obj->binomial, 'Anopheles gambiae');
+
+my $ac = $seq->annotation;
+my $reference =  ($ac->get_Annotations('reference') )[1];
+ok ($reference->title,'"A novel acetylcholinesterase gene in mosquitoes codes for the insecticide target and is non-homologous to the ace gene in Drosophila"');
+ok ($reference->authors,'Weill M., Fort P., Berthomi eu A., Dubois M.P., Pasteur N., Raymond M.');
+my $cmmnt =  ($ac->get_Annotations('comment') )[0];
+ok($cmmnt->text, 'see also AJ488492 for achE-1 from Kisumu strain Third Party Annotation Database: This TPA record uses Anopheles gambiae trace archive data (http://trace.ensembl.org) ');
+
+
+$ent = Bio::SeqIO->new( -file => Bio::Root::IO->catfile
+                        ("t","data","test.embl"),
+                        -format => 'embl');
+$ent->verbose($verbose);
+$seq = $ent->next_seq();
+my $species = $seq->species();
+my @cl = $species->classification();
+ok( $cl[3] ne $species->genus(), 1, 'genus duplicated in EMBL parsing');
+$ent->close();
+
+#
+## read-write - test embl writing of a PrimarySeq
+#
+my $primaryseq = new Bio::PrimarySeq( -seq => 'AGAGAGAGATA',
+                                      -id  => 'myid',
+                                      -desc => 'mydescr',
+                                      -alphabet => 'DNA',
+                                      -accession_number => 'myaccession');
+
+
+
+$verbose = -1 unless $ENV{'BIOPERLDEBUG'};  # silence warnings unless we are debuggin
+
+my $embl = new Bio::SeqIO(-format => 'embl',
+                          -verbose => $verbose,
+                          -file => ">primaryseq.embl");
+
+ok($embl->write_seq($primaryseq));
+
+# this should generate a warning
+my $scalar = "test";
+eval {
+	$embl->write_seq($scalar);
+};
+ok ($@);
+
+unlink("primaryseq.embl");

Added: trunk/packages/bioperl/branches/upstream/current/t/entrezgene.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/entrezgene.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/entrezgene.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,455 @@
+#!/usr/bin/perl
+
+use strict;
+use Bio::Root::IO;
+use Data::Dumper;
+use vars qw($DEBUG $NUMTESTS $ASNOK);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval {
+		require Bio::ASN1::EntrezGene;
+		$ASNOK=1;
+	};
+	if ($@) {
+		$ASNOK = 0;
+		warn "Bio::ASN1::EntrezGene not installed, skipping tests\n";
+	}
+    plan tests => ($NUMTESTS = 1003);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete entrezgene tests',1);
+	}
+}
+
+exit(0) unless $ASNOK;
+
+use Bio::SeqIO; 
+
+my @species=('Homo sapiens','Mus musculus', 'Caenorhabditis elegans');
+my @pubmed=qw(15461460
+15221005
+14702039
+12477932
+8889549
+3610142
+3458201
+2591067);
+
+my %pmed=(1=>8, 
+            2=>55,
+            3=>1,
+            4=>0,
+            5=>0,
+            6=>0,
+            7=>0,
+            8=>1,
+            9=>32,
+            10=>58,
+            11=>1,
+            12=>76,
+            13=>7,
+            14=>5,
+            15=>13,
+            9996=>0,
+            11286=>0,
+            11287=>5,
+            11288=>0,
+            11289=>0,
+            11293=>0,
+            11294=>0,
+            11295=>0,
+            11296=>0,
+            11297=>0,
+            11298=>3,
+            11299=>0,
+            11300=>0,
+            11301=>0,
+            11302=>9,
+            11303=>54,
+            11304=>11,
+            11305=>3,
+            11306=>9,
+            171590=>0,
+            171591=>0,
+            171592=>0,
+            171593=>0,
+            171594=>0);
+            
+my %asym=(1=>['A1B', 'ABG', 'GAB', 'HYST2477', 'DKFZp686F0970'],
+            2=>['FWP007','S863-7','DKFZp779B086'], 4=>['A12M1'], 5=>['A12M2'],6=>['A12M3'],7=>['A12M4'],
+            9=>['AAC1'],10=>['AAC2'],11=>['NATP'],
+            12=>['ACT','AACT','MGC88254'],13=>['DAC'],15=>['SNAT','AA-NAT'],
+            14=>[''],
+            11287=>['A1m','A2m','MAM'],
+            11298=>['Nat4','SNAT','Nat-2'],
+            11302=>['AATYK','mKIAA0641'],11303=>['Abc1'],
+            11304=>['RmP','Abcr','Abc10','D430003I15Rik'],
+            11305=>['Abc2','mKIAA1062','D2H0S1474E'],
+            11306=>['Abc7'],
+            171590=>['Y74C9A.3','CELK05052'],
+            171591=>['Y74C9A.2','CELK01753'],
+            171592=>['Y74C9A.4a','Y74C9A.4b','CELK08126'],
+            171593=>['Y74C9A.5','CELK09643'],
+            171594=>['Y48G1C.4','CELK05819']);
+            
+my @ids=qw(1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+9996
+11286
+11287
+11288
+11289
+11293
+11294
+11295
+11296
+11297
+11298
+11299
+11300
+11301
+11302
+11303
+11304
+11305
+11306
+171590
+171591
+171592
+171593
+171594);
+ok(1);
+
+my $fs='!';
+my @revkeys=('Entrez Gene Status','RefSeq status','Official Full Name','chromosome','cyto','Reference','dblink',
+'ALIAS_SYMBOL','OntologyTerm','Index terms','Official Symbol','cM','Property');
+
+
+my $eio=new Bio::SeqIO(-file=>Bio::Root::IO->catfile("t","data",
+							 "entrezgene.dat"),-format=>'entrezgene', -debug=>'on',-service_record=>'yes');
+ok $eio;
+my ($seq,$struct,$uncapt);
+while (1) {
+my $seq;
+($seq,$struct,$uncapt)=$eio->next_seq;
+last unless ($seq);
+
+#T0: GENERAL TESTS
+ok $seq;
+ok ref($struct),'Bio::Cluster::SequenceFamily';
+my $acc=$seq->accession_number;
+
+#T1: ORGANISM
+my $org=$seq->species->binomial;
+ok grep(/\b$org\b/, at species),1,$org;
+
+#T2: SUMMARY test
+ok $seq->desc if ($acc eq '1')||($acc eq '2')||($acc eq '11304');
+ok !defined $seq->desc if ($acc eq '171592')||($acc eq '11306');
+
+#Are we supposed to have this in our test?
+ok grep(/\b$acc\b/, at ids),1;
+
+my $ann=$seq->annotation();
+my $tcount;
+
+#T3: ENTREZGENE STATUS TESTS
+my @egstatus=$ann->get_Annotations('Entrez Gene Status');
+foreach my $status (@egstatus) {
+ STATUS: {
+		if ($acc==1) {ok $status->value,'live'; last STATUS;}
+		if ($acc==2) {ok $status->value,'live'; last STATUS;}
+		if ($acc==4) {ok $status->value,'discontinued'; last STATUS;}
+		if ($acc==6) {ok $status->value,'discontinued'; last STATUS;}
+		if ($acc==11288) {ok $status->value,'secondary'; last STATUS;}
+		if ($acc==11293) {ok $status->value,'secondary'; last STATUS;} 
+		if ($acc==171594) {ok $status->value,'live'; last STATUS;} 
+	}
+}
+
+#T4: REFSEQ STATUS TESTS
+my @refstatus=$ann->get_Annotations('RefSeq status');
+foreach my $status (@refstatus) {
+ STATUS: {
+		if ($acc==1) {ok $status->value,'REVIEWED'; last STATUS;}
+		if ($acc==2) {ok $status->value,'REVIEWED'; last STATUS;}
+		if ($acc==3) {ok $status->value,'PROVISIONAL'; last STATUS;}
+		if ($acc==4) {ok $status->value,'WITHDRAWN'; last STATUS;}
+		if ($acc==9) {ok $status->value,'VALIDATED'; last STATUS;}
+		if ($acc==11300) {ok $status->value,''; last STATUS;}
+		if ($acc==11306) {ok $status->value,'MODEL'; last STATUS;}
+		if ($acc==11293) {ok $status->value,'secondary'; last STATUS;} 
+		if ($acc==171594) {ok $status->value,'Reviewed'; last STATUS;} 
+	}
+}
+
+#T5: GENE NAME TESTS
+my @ofname=$ann->get_Annotations('Official Full Name');
+foreach my $name (@ofname) {
+ STATUS: {
+		if ($acc==10) {ok $name->value,'N-acetyltransferase 2 (arylamine N-acetyltransferase)'; last STATUS;}
+		if ($acc==13) {ok $name->value,'arylacetamide deacetylase (esterase)'; last STATUS;}
+		if ($acc==14) {ok $name->value,'angio-associated, migratory cell protein'; last STATUS;}
+		if ($acc==11287) {ok $name->value,'pregnancy zone protein'; last STATUS;}
+            if ($acc==11298) {ok $name->value,'arylalkylamine N-acetyltransferase'; last STATUS;}
+		if ($acc==11304) {ok $name->value,'ATP-binding cassette, sub-family A (ABC1), member 4'; last STATUS;}
+		if ($acc==11306) {ok $name->value,'ATP-binding cassette, sub-family B (MDR/TAP), member 7'; last STATUS;} 
+	}
+}
+
+#T6: CHROMOSOME TESTS
+my @chr=$ann->get_Annotations('chromosome');
+foreach my $chr (@chr) {
+ STATUS: {
+		if ($acc==5) {ok $chr->value,1; last STATUS;}
+		if ($acc==6) {ok $chr->value,1; last STATUS;}
+		if ($acc==7) {ok $chr->value,17; last STATUS;}
+		if ($acc==11306) {ok $chr->value,'X'; last STATUS;}
+		if ($acc==11304) {ok $chr->value,3; last STATUS;}
+		if ($acc==171590) {ok $chr->value,'I'; last STATUS;}
+		if ($acc==171592) {ok $chr->value,'I'; last STATUS;} 
+	}
+}
+
+#T7: GENE SYMBOL ALIAS TESTS
+my @sym=$ann->get_Annotations('ALIAS_SYMBOL');
+foreach my $sym (@sym) {
+    next if (($sym eq '')||!defined($sym));
+    ok grep(/\b$sym\b/,@{$asym{$acc}}),1;
+}
+
+#T8: CYTO LOCATION TESTS
+my @map=$ann->get_Annotations('cyto');
+foreach my $map (@map) {
+
+  STATUS: {
+		 if ($acc==10) {ok $map->value,'8p22'; last STATUS;}
+		 if ($acc==11) {ok $map->value,'8p22'; last STATUS;}
+		 if ($acc==13) {ok $map->value,'3q21.3-q25.2'; last STATUS;}
+		 if ($acc==11306) {ok $map->value,'X C-D'; last STATUS;}
+		 if ($acc==11305) {ok $map->value,'2 A2-B'; last STATUS;}
+		 if ($acc==11304) {ok $map->value,'3 G1'; last STATUS;}
+		 if ($acc==11303) {ok $map->value,'4 A5-B3'; last STATUS;} 
+	 }
+ }
+
+#T9: REFERENCE NUMBER TEST
+my @refs=$ann->get_Annotations('Reference');
+my $refs=$#refs+1||0;
+ok $pmed{$acc},$refs;
+
+
+my @dblinks=$ann->get_Annotations('dblink');
+my @keys=$ann->get_all_annotation_keys;
+
+#T10: GENERIF AND OTHER DBLINK TESTS
+my @url=qw(HGMD Ensembl KEGG Homologene);#Only validate the URL
+foreach my $dblink (@dblinks) {
+my $dbname=$dblink->database||'';
+DB: {
+    if ( $dbname eq 'generif') {#Should have ID and text
+        ok $dblink->primary_id;
+        ok $dblink->comment->text;
+        last DB;
+    }
+    if ($acc==2) {
+        if (($dbname eq 'MIM')&&($dblink->authority)&&($dblink->authority eq 'phenotype')) {
+            ok $dblink->optional_id;
+            last DB;
+        }
+        if ($dbname eq 'Evidence viewer') {
+            ok $dblink->url; #We may even validate the urls?
+            ok $dblink->primary_id,2;
+            last DB;
+        }
+        if ($dbname eq 'Model maker') {
+            ok $dblink->url; #We may even validate the urls?
+            ok $dblink->primary_id,2;
+            last DB;
+        }
+        if ($dbname eq 'AceView') {
+            ok $dblink->url; #We may even validate the urls?
+            ok $dblink->primary_id,2;
+            last DB; 
+        }
+        if (grep(/$dbname/, at url)) {
+            ok $dblink->url; #We may even validate the urls?
+            last DB;
+        }
+        if ($dbname eq 'GDB') {
+            ok $dblink->primary_id,'GDB:119639'; #We may even validate the urls?
+            last DB;
+        }
+        if ($dbname eq 'UniGene') {
+            ok $dblink->url; #We may even validate the urls?
+            ok $dblink->primary_id,'Hs.212838';
+            last DB;
+        }
+        if ($dbname eq 'PharmGKB') {
+            ok $dblink->primary_id,'PA24357';
+            last DB;
+        }
+        if ($dbname eq 'MGC') {
+            ok $dblink->url; #We may even validate the urls?
+            ok $dblink->primary_id,'BC040071';
+            last DB;
+        }
+    }
+}
+}
+
+#T11: SOME EXTERNAL DATABASE IDS TESTS
+foreach my $key (@keys) {
+	next if grep(/\b$key\b/, @revkeys);
+	my @all=$ann->get_Annotations($key);
+	#Checking xref to some databases- OMIM, Wormbase and HGNC, others later
+	foreach my $pid (@all) {
+	 DBID: {
+			if (($acc==8)&&($key eq 'MIM')) {ok $pid->value,'108985'; last DBID;}
+			if (($acc==9)&&($key eq 'HGNC')) {ok $pid->value,'7645'; last DBID;}
+			if (($acc==11298)&&($key eq 'MGI')) {ok $pid->value,'1328365'; last DBID;}
+			if (($acc==171593)&&($key eq 'AceView/WormGenes')) {ok $pid->value,'1A502'; last DBID;} 
+			if (($acc==171594)&&($key eq 'WormBase')) {ok $pid->value,'Y48G1C.4'; last DBID;} 
+		}
+	}
+}
+
+#T12: REFERENCE RECORD TEST
+if ($acc==1) {
+    foreach my $ref (@refs) {
+        my $pmed=$ref->medline;
+        ok grep(/\b$pmed\b/, at pubmed),1;
+    }
+}
+
+#T13/14: STS Markers and Gene Ontology
+my @syn=('MGI:707739','MPC786');
+my @evid=qw(IEA TAS ISS);
+my (%pmeds,%go);
+ $go{11305}=['5524', '16887', '5215', '8203', '6810', '16021' ,'5765'];
+ $go{11298}=['8080', '8415', '4060', '16740'];
+ $pmeds{11305}=['12466851']; 
+my @types=qw(Function Component Process);
+if (($acc==11305)||($acc==11298)) { #Let's check just this two...
+	foreach my $ot ($ann->get_Annotations('OntologyTerm')) {
+		if (($ot->term->authority)&&($ot->term->authority eq 'STS marker')) {
+			if ($acc==11305) {
+				ok $ot->name,'AI413825';
+				ok $ot->term->namespace,'UniSTS';
+				ok $ot->identifier,158928;
+			}
+			else {
+				ok $ot->name,'D11Mit102';
+				ok $ot->term->namespace,'UniSTS';
+				ok $ot->identifier,126289;
+				foreach my $syn ($ot->get_synonyms) {
+					ok grep(/\b$syn\b/, at syn),1;
+				}
+			}
+			next;
+		}
+		my $evid=$ot->comment;
+		$evid=~s/evidence: //i;
+		my $type=$ot->ontology->name;
+		my @ref=$ot->term->get_references;
+		my $id=$ot->identifier;
+		my $thispmed=$ref[0]->medline if (@ref);
+		ok grep(/\b$type\b/, at types),1;
+		ok grep(/\b$id\b/,@{$go{$acc}}),1;
+		ok grep(/\b$thispmed\b/,@{$pmeds{$acc}}),1 if ($thispmed);
+		ok $ot->name;
+	}
+}
+
+#T15/16/17: GENOMIC LOCATION TESTS/SEQUENCE TYPES TESTS/CONSERVED DOMAINS TESTS
+my @gffs=('SEQ	entrezgene	gene location	63548355	63556668	.	+	.',
+			 'SEQ	entrezgene	genestructure	63548355	63556668	.	+	.',
+			 'SEQ	entrezgene	gene location	31124733	31133046	.	+	.',
+			 'SEQ	entrezgene	genestructure	31124733	31133046	.	+	.',
+			 'SEQ	entrezgene	gene location	8163589	8172398	.	+	.',
+			 'SEQ	entrezgene	genestructure	8163589	8172398	.	+	.');
+my @contigs=$struct->get_members;
+my @auth=('mrna','genomic','product','mrna sequence','protein');#Known types....
+foreach my $contig (@contigs) {
+	my $stype=$contig->authority;
+	ok grep(/^$stype$/i, at auth),1;
+	if ($acc==1) {#Do just 1?
+		if (($contig->authority eq 'genomic')||($contig->authority eq 'Genomic')) {
+			foreach my $sf ($contig->get_SeqFeatures) {
+				$sf->source_tag('entrezgene');
+				my $gff=$sf->gff_string;
+				$gff=~s/[\t\s]+$//g;
+				foreach my $gffstr (@gffs) {
+					if ($gffstr eq $gff) {
+						ok(1);
+						last;
+					}
+				}
+			}
+		}
+		if ($contig->authority eq 'Product') {
+			ok $contig->id,'NP_570602';
+			ok $contig->accession_number,21071030;
+			foreach my $sf ($contig->get_SeqFeatures) {
+            foreach my $dblink ($sf->annotation->get_Annotations('dblink')) {
+					my $key=$dblink->{_anchor}?$dblink->{_anchor}:$dblink->optional_id;
+					my $db=$dblink->database;
+					next unless (($db =~/cdd/i)||($sf->primary_tag=~ /conserved/i));
+					my $desc;
+					if ($key =~ /:/) {
+						($key,$desc)=split(/:/,$key);
+					}
+					$desc=~s/^\s+//;#THIS SHOULD GO IN entrezgene.pm!!!
+					ok $desc,'IGc2; Immunoglobulin C-2 Type';
+					ok $key,'smart00408';
+					ok $sf->score,103;
+					ok $db,'CDD';
+					ok $sf->start,223;
+					ok $sf->end,282;
+            }
+			}
+		}
+	}
+}
+}
+
+
+#, -locuslink=>'convert');
+#See if we can convert to locuslink
+#T18: BACKCOMPATIBILITY TESTS
+my @llsp =('OFFICIAL_GENE_NAME','CHR','MAP','OFFICIAL_SYMBOL');
+my $eio_b=new Bio::SeqIO(-file=>Bio::Root::IO->catfile("t","data",
+							 "entrezgene.dat"),-format=>'entrezgene', -debug=>'on',-service_record=>'yes',-locuslink=>'convert');
+
+while (my $seq=$eio_b->next_seq) {
+    ok $seq;
+    my $acc=$seq->accession_number;
+    ok grep(/\b$acc\b/, at ids),1;
+    my $ann=$seq->annotation;
+    last if ($acc==4);#3 is enough? and 4 does not have gene name, so....
+    foreach my $key (@llsp) {
+        my @vals=$ann->get_Annotations($key);
+        ok @vals;
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/est2genome.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/est2genome.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/est2genome.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,97 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# $Id: est2genome.t,v 1.3 2003/05/12 13:58:17 shawnh Exp $
+
+my $error;
+use strict;
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    $error = 0; 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    use vars qw($TESTCOUNT);
+    $TESTCOUNT = 60;
+    plan tests => $TESTCOUNT;
+}
+
+use Bio::Tools::Est2Genome;
+my $verbose = 0;
+
+my $parser = new Bio::Tools::Est2Genome(-file   => Bio::Root::IO->catfile
+					('t','data', 'hs_est.est2genome'));
+
+ok($parser);
+my $feature_set = $parser->parse_next_gene;
+ok(ref($feature_set), qr/ARRAY/i );
+
+ok(scalar @$feature_set, 7);
+my @exons = grep { $_->primary_tag eq 'Exon' } @$feature_set;
+my @introns = grep { $_->primary_tag eq 'Intron' } @$feature_set;
+
+my @expected_exons = ( [695,813,1,1,119,1],
+		       [1377,1493,1,120,236,1],
+		       [1789,1935,1,237,382,1],
+		       [2084,2180,1,383,479,1]);
+my @expected_introns = ( [814,1376,1],
+			 [1494,1788,1],
+			 [1936,2083,1] );
+
+foreach my $e ( @exons ) {
+    my $test_e = shift @expected_exons;
+    my $i = 0;
+    ok($e->query->start, $test_e->[$i++]);
+    ok($e->query->end, $test_e->[$i++]);
+    ok($e->query->strand, $test_e->[$i++]);
+
+    ok($e->hit->start, $test_e->[$i++]);
+    ok($e->hit->end, $test_e->[$i++]);
+    ok($e->hit->strand, $test_e->[$i++]);
+}
+ok(! @expected_exons);
+foreach my $intron ( @introns ) {
+    my $test_i = shift @expected_introns;
+    my $i = 0;
+    ok($intron->start, $test_i->[$i++]);
+    ok($intron->end, $test_i->[$i++]);
+    ok($intron->strand, $test_i->[$i++]);
+}
+ok(! @expected_introns);
+
+$parser = new Bio::Tools::Est2Genome(-file   => Bio::Root::IO->catfile
+					('t','data', 'hs_est.est2genome'));
+
+ok($parser);
+my $gene = $parser->parse_next_gene(1);
+ at expected_exons = ( [695,813,1,1,119,1],
+		       [1377,1493,1,120,236,1],
+		       [1789,1935,1,237,382,1],
+		       [2084,2180,1,383,479,1]);
+ at expected_introns = ( [814,1376,1],
+			 [1494,1788,1],
+			 [1936,2083,1] );
+
+foreach my $trans($gene->transcripts){
+   my @exons = $trans->exons;
+   foreach my $e(@exons){
+    my $test_e = shift @expected_exons;
+    my $i = 0;
+    ok($e->start, $test_e->[$i++]);
+    ok($e->end, $test_e->[$i++]);
+    ok($e->strand, $test_e->[$i++]);
+  }
+  my @introns = $trans->introns;
+  foreach my $intron ( @introns ) {
+    my $test_i = shift @expected_introns;
+    my $i = 0;
+    ok($intron->start, $test_i->[$i++]);
+    ok($intron->end, $test_i->[$i++]);
+    ok($intron->strand, $test_i->[$i++]);
+  }
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/exp.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/exp.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/exp.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,52 @@
+# -*-Perl-*-
+# $Id: exp.t,v 1.3 2005/09/17 02:11:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 3;
+	$error = 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	# SeqIO modules abi.pm, ctf.pm, exp.pm, pln.pm, ztr.pm
+	# all require Bio::SeqIO::staden::read, part of bioperl-ext
+	eval {
+		require Bio::SeqIO::staden::read;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Bio::SeqIO::staden::read of bioperl-ext is not installed or is installed incorrectly - skipping exp.t tests\n";
+   }
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+
+END { 
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the exp tests',1);
+   }
+}
+
+exit(0) if ( $error == 1 );
+
+use Bio::SeqIO::exp;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'exp',
+			 -verbose => $verbose,
+			 -file => Bio::Root::IO->catfile
+			 (qw(t data readtest.exp) ));
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "GATGATTCCGGCTTCGGACGACTCTAGAGGATCCCCATTTTTATAGTTTTTATCTTGTAATAGATGTTTAGATTTTTCGTTGTAATTATTTTCTTTATTGTTGAAATTAGTATCTCTGGGTAATTTATCATATTCTCTGGAAAATGATTTACTATCACTAGATACTTCATAAGATTTATAATCTTTATTATGAAAATCATCTCTATTTTTCAAATTATTATTATATCTATCAAAGTTTCTGTCTTCATTATATCTATTAGCATATCTATCTTTATCTTTATCCCTATCACTATATCTATCATATGGTTCATCTTGTTCAACCGATCAGACTCGATTCGCCATCGCCTCTAACGGATGGCCGCTCCCCCTCTCATACCTCGCTCCCCTCGACATCCCCCGTCTCGCCACCCTATCCGCCCCCTTCATCACCCCCCCTTATCCACACCCTCACCCCCCGCATCGCGCACCCACGACCACCCGAAGAACCGCCCTTACTCCCAAGTACGCCCCGACCTCCATCACCCTATGCGGTACCACTCCCACCACACCCAGTCCTACTTTCGCCCGCACATCGGCCCCGCTTCAGACAGCTCCCAACTACGCAACCCACGCTTGTTCTTGTTCACACTCGAATACTCGAATCTCTCATTACTCCGCGGACTCCGCCGCACCTGTGCACCATTAACTGTGTAGCGCCTGAACCGGCACCTCTGATTACCACTTCCTCCACCAGCACAGTCCTATTACCGCATGTCGCTCTGCTAAGACAGTGCAAGACTCTGCGGTCGCTCTGACCCGCATCCGCCAGGGCACCTCTCACCCTCGCTGGCCACCCCGCCCCCCTCTCCCTGCCCCTTCATTCCCCCAAACCGCTTTCAACGGGACACACCCCTCCGCGGCGGACCACAACTCGCCGTCGGCCACCACTCACACCTTCCCTCCTCCTTCCCCCACATCACGCCAACCCCGTGGGACGGCTCTCCCGCGGCTACGACGCGCAACCCCCCCTCGCCGCTTCCCCCCCAACTTCCCACGGGCTCCCCTCCGCCCCTTACCCGCGAGGAGCTTCACCCGCGAACCACCTCCCCCCTTTCCCAACAGCACCG");
+

Added: trunk/packages/bioperl/branches/upstream/current/t/fasta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/fasta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/fasta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+# -*-Perl-*-
+# $Id: fasta.t,v 1.1 2005/09/07 00:43:17 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+BEGIN {
+	$NUMTESTS = 6;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	#
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO::fasta;
+use Bio::Root::IO;
+
+# There are many other tests of fasta I/O in t/, this
+# is a dedicated script that could be further customized
+ 
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => '',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 (qw(t data test.fasta) ));
+
+ok(my $seq = $io->next_seq);
+ok($seq->length, 358);
+ok($seq->display_id,'roa1_drome');
+ok($seq->desc,'Rea guano receptor type III >> 0.1');
+ok($seq->alphabet,'protein');

Added: trunk/packages/bioperl/branches/upstream/current/t/flat.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/flat.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/flat.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,161 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: flat.t,v 1.11 2003/07/09 01:12:11 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+
+my $error;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $error = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 16;
+    plan tests => $NUMTESTS;
+    eval { 
+	require DB_File; 
+	require Bio::DB::Flat; 
+	require Bio::Root::IO; 
+	1;
+    };
+    if( $@ ) {
+	print STDERR "DB_File not loaded. This means flat.t test cannot be executed. Skipping\n";
+	foreach ( $Test::ntest..$NUMTESTS ) {
+	    skip('DB_File not installed',1);
+	}
+	$error = 1;
+    }
+}
+
+if( $error ==  1 ) {
+    exit(0);
+}
+my $testnum;
+my $verbose = 0;
+
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run. 
+
+
+#First of all we need to create an flat db
+use Bio::Root::IO;
+use Cwd;
+my $cd = cwd();
+my $tmpdir = Bio::Root::IO->catfile($cd,qw(t tmp));
+&maketmpdir();
+my $db = Bio::DB::Flat->new(-directory  => $tmpdir,
+                            -index      => 'bdb',
+			    -dbname     => 'mydb',
+			    -format     => 'fasta',
+			    -verbose    => $verbose,
+                 	    -write_flag => 1
+                            );
+ok($db);
+my $dir = Bio::Root::IO->catfile($cd,qw(t data AAC12660.fa));
+my $result = $db->build_index(glob($dir));
+ok($result);
+
+#Now let's get the sequence out again
+my $seq = $db->get_Seq_by_id('AAC12660');
+ok($seq);
+ok($seq->length,504);
+undef $db;
+&cleanup();
+&maketmpdir();
+$db = Bio::DB::Flat->new(-directory  => $tmpdir,
+                         -index      => 'bdb',
+                         -format     => 'embl',
+			 -dbname     => 'myembl',
+                         -verbose    => $verbose,
+                         -write_flag => 1
+			 );
+
+$dir= Bio::Root::IO->catfile($cd,qw(t data factor7.embl));
+
+$result = $db->build_index(glob($dir));
+ok($result);
+$seq = $db->get_Seq_by_id('HSCFVII');
+ok($seq);
+ok($seq->length,12850);
+
+# deal with wantarray conditions
+$seq = $db->get_Seq_by_acc('J02933');
+ok($seq && ref($seq));
+ok($seq->length,12850);
+
+
+undef $db;
+
+&cleanup();
+&maketmpdir();
+
+
+$db = Bio::DB::Flat->new(-directory  => $tmpdir,
+			 -index      => 'binarysearch',
+			 -format     => 'fasta',
+			 -dbname     => 'mybinfa',
+			 -verbose    => $verbose,
+			 -write_flag => 1
+			 );
+
+$dir= Bio::Root::IO->catfile($cd,qw(t data dbfa 1.fa));
+$result = $db->build_index($dir);
+ok($result);
+$seq = $db->get_Seq_by_id('AW057119');
+ok($seq);
+ok($seq->length,808);
+undef $db;
+
+&cleanup();
+&maketmpdir();
+$db = Bio::DB::Flat->new(-directory  => $tmpdir,
+			 -index      => 'binarysearch',
+			 -format     => 'swiss',
+			 -dbname     => 'mybinswiss',
+			 -verbose    => $verbose,
+			 -write_flag => 1
+			 );
+$dir= Bio::Root::IO->catfile($cd,qw(t data swiss.dat));
+$result = $db->build_index($dir);
+
+ok($result);
+$seq = $db->get_Seq_by_id('ACON_CAEEL');
+ok($seq);
+ok($seq->length,788);
+
+$seq = $db->get_Seq_by_id('ACON_CAEEL');
+ok($seq && ref($seq));
+
+undef $db;
+
+
+&cleanup();
+
+sub maketmpdir {
+    mkdir ($tmpdir,0777);
+}
+
+sub cleanup {
+    eval { 
+	 Bio::Root::IO->rmtree($tmpdir) if( defined $tmpdir && -d $tmpdir);
+    };
+} 
+
+END {
+    &cleanup();
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/game.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/game.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/game.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: game.t,v 1.21 2004/02/27 20:20:53 smckay Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    use vars qw($TESTCOUNT);
+    $TESTCOUNT = 23;
+    plan tests => $TESTCOUNT;
+    
+    $error  = 0;
+    eval { require XML::Parser::PerlSAX; };
+    if( $@ ) {
+	print STDERR "XML::Parser::PerlSAX not loaded. This means game test cannot be executed. Skipping\n";
+	foreach ( $Test::ntest..$TESTCOUNT ) {
+	    skip('XML::Parser::PerlSAX installed',1);
+	}
+	$error = 1;
+    } 
+    # make sure we can load it, assuming that the prerequisites are really met
+
+    if( $error == 0 ) {
+	eval { require Bio::SeqIO::game; };
+	if( $@ ) {
+	    print STDERR "game.pm not loaded. This means game test cannot be executed. Skipping\n";
+	    foreach ( $Test::ntest..$TESTCOUNT ) {
+		skip('game.pm not loaded because XML::Writer not loaded',1);
+	    }
+	    $error = 1;
+	} 
+    }
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+END{ 
+    unlink('testgameout.game')
+}
+use Bio::SeqIO;
+use Bio::Root::IO;
+my $verbose = $DEBUG ? 1 : -1;
+my $str = Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile("t","data","test.game"), 
+			  '-format' => 'game',
+			  '-verbose' => $verbose);
+ok ($str);
+my $seq = $str->next_seq();
+ok($seq);
+
+# exercise game parsing
+$str = new Bio::SeqIO(
+    -format =>'game',
+    -file => Bio::Root::IO->catfile ( qw(t data test.game))
+		      );
+$seq = $str->next_seq;
+ok(defined $seq);
+ok(defined $seq->seq);
+ok($seq->alphabet, 'dna');
+ok($seq->display_id, 'L16622');
+ok($seq->length, 28735);
+ok($seq->species->binomial, 'Caenorhabditis elegans');
+my @feats = $seq->get_SeqFeatures;
+ok(scalar(@feats), 7);
+my $source = grep { $_->primary_tag eq 'source' } @feats;
+ok($source);
+my @genes = grep { $_->primary_tag eq 'gene' } @feats;
+ok(scalar(@genes), 3);
+ok($genes[0]->has_tag('gene'));
+my $gname;
+if ( $genes[0]->has_tag('gene') ) {
+    ($gname) = $genes[0]->get_tag_values('gene');
+}
+ok($gname, 'C02D5.3');
+ok($genes[0]->strand, 1);
+my $cds   = grep { $_->primary_tag eq 'CDS' } @feats;
+ok($cds, 3);
+
+# make sure we can read what we write
+# test XML-writing
+my $testfile = "testgameout.game";
+# map argument is require to write a <map_position> element
+my $out = new Bio::SeqIO(-format => 'game', -file => ">$testfile", -map => 1);
+$out->write_seq($seq);
+$out->close();
+
+$str = new Bio::SeqIO(-format =>'game', -file => $testfile);
+$seq = $str->next_seq;
+ok(defined $seq);
+ok(defined $seq->seq);
+ok($seq->alphabet, 'dna');
+ok($seq->display_id, 'L16622');
+ok($seq->length, 28735);
+ok($seq->species->binomial, 'Caenorhabditis elegans');
+
+my $genes = grep { $_->primary_tag eq 'gene' } @feats;
+$cds   = grep { $_->primary_tag eq 'CDS' } @feats;
+ok($genes, 3);
+ok($cds, 3);
+unlink $testfile;
+

Added: trunk/packages/bioperl/branches/upstream/current/t/gcg.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/gcg.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/gcg.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,42 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for Modules
+# $Id: gcg.t,v 1.1 2005/08/28 03:41:56 bosborne Exp $
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+
+	use Test;
+	plan tests => 4;
+}
+
+if( $error == 1 ) {
+	exit(0);
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+# test DOS linefeeds in gcg parser
+my $str = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+								  ("t","data","test_badlf.gcg"),
+								  -verbose => $verbose,
+								  -format => 'GCG');
+ok($str);
+ok ( my $seq = $str->next_seq());
+ok(length($seq->seq) > 0 );
+print "Sequence 1 of 1 from GCG stream:\n", $seq->seq, "\n" if( $verbose);

Added: trunk/packages/bioperl/branches/upstream/current/t/genbank.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/genbank.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/genbank.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,502 @@
+	# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: genbank.t,v 1.11.2.5 2006/11/23 21:07:08 sendu Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 239;
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+ok(1);
+
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+END {
+	unlink "tmp_revcomp_mrna.gb" if -e "tmp_revcomp_mrna.gb";
+	unlink 'testsource.gb';
+}
+
+my $ast = Bio::SeqIO->new(-format => 'GenBank' ,
+								  -verbose => $verbose,
+								  -file => Bio::Root::IO->catfile
+								  ("t","data","roa1.genbank"));
+$ast->verbose($verbose);
+my $as = $ast->next_seq();
+ok $as->molecule, 'mRNA';
+ok $as->alphabet, 'dna';
+ok($as->primary_id, 3598416);
+my @class = $as->species->classification;
+ok $class[$#class],'Eukaryota';
+
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile
+							  ("t","data","NT_021877.gbk"));
+$ast->verbose($verbose);
+$as = $ast->next_seq();
+ok $as->molecule, 'DNA';
+ok $as->alphabet, 'dna';
+ok($as->primary_id, 37539616);
+ok($as->accession_number, 'NT_021877');
+
+my ($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures();
+ok(($cds->get_tag_values('transl_except'))[1],
+   '(pos:complement(4224..4226),aa:OTHER)');
+
+# test for a DBSOURCE line
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile("t","data","BAB68554.gb"));
+$ast->verbose($verbose);
+$as = $ast->next_seq();
+ok $as->molecule, 'linear';
+ok $as->alphabet, 'protein';
+# Though older GenBank releases indicate SOURCE contains only the common name,
+# this is no longer true.  In general, this line will contain an abbreviated
+# form of the full organism name (but may contain the full length name),
+# as well as the optional common name and organelle.  There is no get/set
+# for the abbreviated name but it is accessible via name()
+ok defined($as->species->name('abbreviated')->[0]);
+ok $as->species->name('abbreviated')->[0], 'Aldabra giant tortoise';
+ok($as->primary_id, 15824047);
+my $ac = $as->annotation;
+ok defined $ac;
+my @dblinks = $ac->get_Annotations('dblink');
+ok(scalar @dblinks,1);
+ok($dblinks[0]->database, 'GenBank');
+ok($dblinks[0]->primary_id, 'AB072353');
+ok($dblinks[0]->version, '1');
+ok("$dblinks[0]", 'GenBank:AB072353.1');
+
+# test for multi-line SOURCE
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile("t","data",
+                                                       "NC_006346.gb"));
+$as = $ast->next_seq;
+ok $as->species->binomial('FULL'), 'Bolitoglossa n. sp. RLM-2004';
+ at class = $as->species->classification;
+ok($class[$#class],'Eukaryota');
+ok($as->species->common_name,'mushroomtongue salamander');
+
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile("t","data",
+                                                       "U71225.gb"));
+$as = $ast->next_seq;
+ at class = $as->species->classification;
+ok($class[$#class],'Eukaryota');
+ok $as->species->common_name,'black-bellied salamander';
+
+# test for unusual common name
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile("t","data",
+                                                       "AB077698.gb"));
+$as = $ast->next_seq;
+# again, this is not a common name but is in name('abbreviated')
+ok defined($as->species->name('abbreviated')->[0]);
+ok $as->species->name('abbreviated')->[0],'Homo sapiens cDNA to mRNA';
+
+# test for common name with parentheses
+$ast = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile("t","data",
+                                                       "DQ018368.gb"));
+$as = $ast->next_seq;
+ok $as->species->scientific_name,'(Populus tomentosa x P. bolleana) x P. tomentosa var. truncata';
+
+# test secondary accessions
+my $seqio = new Bio::SeqIO(-format => 'genbank',
+									-verbose => $verbose,
+									-file => Bio::Root::IO->catfile
+									(qw(t data D10483.gbk)));
+my $seq = $seqio->next_seq;
+my @kw =  $seq->get_keywords;
+ok(scalar @kw, 118);
+ok($kw[-1], 'yabO');
+my @sec_acc = $seq->get_secondary_accessions();
+ok(scalar @sec_acc,14);
+ok($sec_acc[-1], 'X56742');
+
+# bug #1487
+my $str = new Bio::SeqIO(-verbose => $verbose,
+								 -file    => Bio::Root::IO->catfile
+								 (qw(t data D12555.gbk)));
+eval {
+	$seq = $str->next_seq;
+};
+
+ok(! $@ );
+
+# bug 1647 rpt_unit sub-feature with multiple parens
+$str = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile
+							  (qw(t data mini-AE001405.gb) ));
+ok($seq = $str->next_seq);
+my @rpts = grep { $_->primary_tag eq 'repeat_region' }
+  $seq->get_SeqFeatures;
+ok $#rpts, 2;
+my @rpt_units = map {$_->get_tag_values('rpt_unit')} @rpts;
+ok $#rpt_units, 0;
+ok $rpt_units[0],'(TG)10;A;(TG)7';
+
+# test bug #1673 , RDB-II genbank files
+$str = Bio::SeqIO->new(-format => 'genbank',
+							  -verbose => $verbose,
+                       -file => Bio::Root::IO->catfile
+							  (qw(t data Mcjanrna_rdbII.gbk) )
+		      );
+ok($seq = $str->next_seq);
+my @refs = $seq->annotation->get_Annotations('reference');
+ok(@refs, 1);
+ok($seq->display_id,'Mc.janrrnA');
+ok($seq->molecule ,'RNA');
+
+$str  = new Bio::SeqIO(-format => 'genbank',
+							  -file   => Bio::Root::IO->catfile
+							  ("t","data","AF165282.gb"),
+							  -verbose => $verbose);
+$seq = $str->next_seq;
+my @features = $seq->all_SeqFeatures();
+ok(@features, 5);
+ok($features[0]->start, 1);
+ok($features[0]->end, 226);
+my $location = $features[1]->location;
+ok($location->isa('Bio::Location::SplitLocationI'));
+my @sublocs = $location->sub_Location();
+ok(@sublocs, 29);
+
+# version and primary ID - believe it or not, this wasn't working
+ok ($seq->version, 1);
+ok ($seq->seq_version, 1);
+ok ($seq->primary_id, "5734104");
+
+# streaming and Bio::RichSeq creation
+my $stream = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+									  ("t","data","test.genbank"),
+									  -verbose => $verbose,
+                             -format => 'GenBank');
+$stream->verbose($verbose);
+my $seqnum = 0;
+my $species;
+my @cl;
+my $lasts;
+my @ids = qw(DDU63596 DDU63595 HUMBDNF);
+my @tids = (44689, 44689, 9606);
+my @tnames = ("Dictyostelium discoideum","Dictyostelium discoideum",
+				  "Homo sapiens");
+while($seq = $stream->next_seq()) {
+	if($seqnum < 3) {
+		ok $seq->display_id(), $ids[$seqnum];
+		$species = $seq->species();
+		@cl = $species->classification();
+		ok( $species->binomial(), $tnames[$seqnum],
+			 'species parsing incorrect for genbank');
+		ok( $cl[3] ne $species->genus(), 1,
+			 'genus duplicated in genbank parsing');
+		ok( $species->ncbi_taxid, $tids[$seqnum] );
+	}
+	$seqnum++;
+	$lasts = $seq;
+}
+ok $lasts->display_id(), "HUMBETGLOA";
+my ($ref) = $lasts->annotation->get_Annotations('reference');
+ok($ref->medline, 94173918);
+$stream->close();
+
+$stream = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+								  ("t","data","test.genbank.noseq"),
+								  -verbose => $verbose,
+								  -format => 'GenBank' );
+$seqnum = 0;
+while($seq = $stream->next_seq()) {
+	if($seqnum < 3) {
+		ok $seq->display_id(), $ids[$seqnum];
+	} elsif( $seq->display_id eq 'M37762') {
+		ok( ($seq->get_keywords())[0], 'neurotrophic factor');
+	}
+	$seqnum++;
+}
+ok $seqnum, 5, "Total number of sequences in test file";
+
+# fuzzy
+$seq = Bio::SeqIO->new( -format => 'GenBank',
+								-verbose => $verbose,
+                        -file =>Bio::Root::IO->catfile
+								("t","data","testfuzzy.genbank"));
+$seq->verbose($verbose);
+ok(defined($as = $seq->next_seq()));
+
+ at features = $as->all_SeqFeatures();
+ok(@features,21);
+my $lastfeature = pop @features;
+# this is a split location; the root doesn't have strand
+ok($lastfeature->strand, undef);
+$location = $lastfeature->location;
+$location->verbose(-1); # silence the warning of undef seq_id()
+# see above; splitlocs roots do not have a strand really
+ok($location->strand, undef);
+ok($location->start, 83202);
+ok($location->end, 84996);
+
+ at sublocs = $location->sub_Location();
+
+ok(@sublocs, 2);
+my $loc = shift @sublocs;
+ok($loc->start, 83202);
+ok($loc->end, 83329);
+ok($loc->strand, -1);
+
+$loc = shift @sublocs;
+ok($loc->start, 84248);
+ok($loc->end, 84996);
+ok($loc->strand,1);
+
+$seq = Bio::SeqIO->new(-format => 'GenBank',
+							  -verbose => $verbose,
+                       -file=> ">" . Bio::Root::IO->catfile
+							  ("t","data","genbank.fuzzyout"));
+$seq->verbose($verbose);
+ok($seq->write_seq($as));
+unlink(Bio::Root::IO->catfile("t","data","genbank.fuzzyout"));
+
+## now genbank ##
+$str = new Bio::SeqIO(-format =>'genbank',
+							 -verbose => $verbose,
+							 -file => Bio::Root::IO->catfile
+							 ( qw(t data BK000016-tpa.gbk)));
+$seq = $str->next_seq;
+ok(defined $seq);
+ok(defined $seq->seq);
+ok($seq->accession_number, 'BK000016');
+ok($seq->alphabet, 'dna');
+ok($seq->display_id, 'BK000016');
+ok($seq->length, 1162);
+ok($seq->division, 'ROD');
+ok($seq->get_dates, 1);
+ok($seq->keywords, 'Third Party Annotation; TPA');
+ok($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
+ok($seq->seq_version, 1);
+ok($seq->feature_count, 2);
+my $spec_obj = $seq->species;
+ok ($spec_obj->common_name, 'house mouse');
+ok ($spec_obj->species, 'musculus');
+ok ($spec_obj->genus, 'Mus');
+ok ($spec_obj->binomial, 'Mus musculus');
+$ac = $seq->annotation;
+my $reference =  ($ac->get_Annotations('reference') )[0];
+ok ($reference->pubmed, '11479594');
+ok ($reference->medline, '21372465');
+
+# validate that what is written is what is read
+my $testfile = "testtpa.gbk";
+my $out = new Bio::SeqIO(-file => ">$testfile",
+							 -format => 'genbank');
+$out->write_seq($seq);
+$out->close();
+
+$str = new Bio::SeqIO(-format =>'genbank',
+							 -file => $testfile);
+$seq = $str->next_seq;
+ok(defined $seq);
+ok(defined $seq->seq);
+ok($seq->accession_number, 'BK000016');
+ok($seq->alphabet, 'dna');
+ok($seq->display_id, 'BK000016');
+ok($seq->length, 1162);
+ok($seq->division, 'ROD');
+ok($seq->get_dates, 1);
+ok($seq->keywords, 'Third Party Annotation; TPA');
+ok($seq->desc, 'TPA: Mus musculus pantothenate kinase 4 mRNA, partial cds.');
+ok($seq->seq_version, 1);
+ok($seq->feature_count, 2);
+$spec_obj = $seq->species;
+ok ($spec_obj->common_name, 'house mouse');
+ok ($spec_obj->species, 'musculus');
+ok ($spec_obj->genus, 'Mus');
+ok ($spec_obj->binomial, 'Mus musculus');
+$ac = $seq->annotation;
+$reference =  ($ac->get_Annotations('reference') )[0];
+ok ($reference->pubmed, '11479594');
+ok ($reference->medline, '21372465');
+
+unlink($testfile);
+
+# write revcomp split location
+my $gb = new Bio::SeqIO(-format => 'genbank',
+                        -file   => Bio::Root::IO->catfile
+                        (qw(t data revcomp_mrna.gb)));
+$seq = $gb->next_seq();
+
+$gb = new Bio::SeqIO(-format => 'genbank',
+                     -file   => ">tmp_revcomp_mrna.gb");
+
+$gb->write_seq($seq);
+undef $gb;
+ok(! -z "tmp_revcomp_mrna.gb");
+# INSERT DIFFING CODE HERE
+
+# bug 1925, continuation of long ORGANISM line ends up in @classification:
+# ORGANISM  Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC
+#           9150
+#           Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
+#           Enterobacteriaceae; Salmonella.
+$gb = new Bio::SeqIO(-format => 'genbank',
+							-file   => Bio::Root::IO->catfile
+							(qw(t data NC_006511-short.gbk)));
+$seq = $gb->next_seq;
+ok $seq->species->common_name, undef;
+ok $seq->species->scientific_name, "Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150";
+ at class = $seq->species->classification;
+ok $class[$#class], "Bacteria";
+
+# WGS tests
+$gb = new Bio::SeqIO(-format => 'genbank',
+							-file   => Bio::Root::IO->catfile
+							(qw(t data O_sat.wgs)));
+$seq = $gb->next_seq;
+
+my @tests = ('WGS'        => 'AAAA02000001-AAAA02050231',
+				  'WGS_SCAFLD' => 'CM000126-CM000137',
+				  'WGS_SCAFLD' => 'CH398081-CH401163');
+
+foreach my $wgs
+(map {$seq->annotation->get_Annotations($_)} qw(WGS WGS_SCAFLD)) {
+    my ($tagname, $value) = (shift @tests, shift @tests);
+	ok($wgs->tagname, $tagname);
+	ok($wgs->value, $value);
+}
+
+# make sure we can retrieve a feature with a primary tag of 'misc_difference'
+$gb = new Bio::SeqIO(-format => 'genbank',
+							-file   => Bio::Root::IO->catfile
+							(qw(t data BC000007.gbk)));
+$seq = $gb->next_seq;
+($cds) = grep { $_->primary_tag eq 'misc_difference' } $seq->get_SeqFeatures;
+my @vals = $cds->get_tag_values('gene');
+ok $vals[0], 'PX19';
+
+# Check that the source,organism section is identical between input and output.
+# - test an easy one where organism is species, then two different formats of
+# subspecies, then a species with a format that used to be mistaken for
+# subspecies, then a bacteria with no genus, and finally a virus with a genus.
+
+# These tests are now somewhat out-of-date since we are moving to a Bio::Taxon-
+# based system for verifying taxonomic information.  Right now they just verify
+# changes so are really useless; I will change them to verify common name,
+# organelle, scientific name, etc.
+
+my $outfile = 'testsource.gb';
+
+# output always adds a period (GenBank std), but two of these files do not use them.
+
+foreach my $in ('BK000016-tpa.gbk', 'ay116458.gb', 'ay149291.gb', 'NC_006346.gb', 'ay007676.gb', 'dq519393.gb') {
+    my $infile =  Bio::Root::IO->catfile("t","data",$in);
+    
+	$str = new Bio::SeqIO(-format =>'genbank',
+						  -verbose => $verbose,
+						  -file => $infile);
+	$seq = $str->next_seq;
+	
+	$out = new Bio::SeqIO(-file => ">$outfile", -format => 'genbank');
+	$out->write_seq($seq);
+	$out->close();
+	
+	open (IN, $infile);
+	my @in = <IN>;
+	close(IN);
+	open (RESULT, $outfile);
+	my $line = 0;
+	my $check = 0;
+	my $ok = 1;
+    
+    FILECHECK:
+	while (my $result = <RESULT>) {
+		if ($result =~ /^KEYWORDS/) {
+			$check = 1;
+			next;
+		}
+
+		if ($result =~ /^REFERENCE/) {
+			last FILECHECK;
+		}
+
+		if ($check) {
+            
+            # end periods don't count (not all input files have them)
+            $result =~ s{\.$}{};
+            $in[$line] =~ s{\.$}{};
+            
+			if ($result ne $in[$line]) {
+				$ok = 0;
+				last;
+			}
+		}
+		
+
+	} continue { $line++ }
+	close(RESULT);
+	
+	ok $ok;
+	
+	unlink($outfile);
+}
+
+# NB: there should probably be full testing on all lines to ensure that output
+# matches input.
+
+# 20061117: problem with *double* colon in some annotation-dblink values
+
+foreach my $in ('P35527.gb') {
+        my $infile =  Bio::Root::IO->catfile("t","data",$in);
+       $str = new Bio::SeqIO(-format =>'genbank',
+                             -verbose => $verbose,
+                             -file => $infile);
+       $seq = $str->next_seq;
+        my $ac      = $seq->annotation();      # Bio::AnnotationCollection
+        foreach my $key ($ac->get_all_annotation_keys() ) {
+                my @values = $ac->get_Annotations( $key);
+                foreach my $value (@values) {
+                        if ($key eq 'dblink') {
+
+                                ok (index($value,'::') < 0);   # this should never be true
+
+                                ok ($value );   # check value is not empty
+
+                             #  print "  ann/", sprintf('%12s  ',$key), '>>>', $value , '<<<', "\n";
+                             #  print "        index double colon: ",index($value   ,'::'), "\n";
+
+                                #  check db name:
+                                my @parts = split(/:/,$value);
+                                if ( $parts[0] =~ /^(?:
+                                        #  not an exhaustive list of databases;
+                                        #  just the db's referenced in P35527.gb:
+                                        swissprot | GenBank | GenPept  | HSSP| IntAct | Ensembl | KEGG | HGNC | MIM | ArrayExpress
+                                                  | GO      | InterPro | Pfam| PRINTS | PROSITE
+                                                     )$/x )
+                                {
+                                      ok 1;
+                                }
+                                else {
+                                      ok 0;
+                                }
+
+                                ok ( $parts[1] );
+
+                        }
+                        # elsif ($key eq 'reference') { }
+                }
+        }
+
+
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/hmmer.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/hmmer.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/hmmer.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,287 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# $Id: hmmer.t,v 1.18 2004/11/11 18:29:10 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan test => 136;
+}
+
+use Bio::SearchIO;
+use Bio::Tools::HMMER::Domain;
+use Bio::Tools::HMMER::Set;
+use Bio::Tools::HMMER::Results;
+use Bio::Root::IO;
+
+
+my $searchio = new Bio::SearchIO(-format => 'hmmer',
+				 -file   => Bio::Root::IO->catfile
+				 ("t","data","hmmpfam.out"));
+
+while( my $result = $searchio->next_result ) {
+    ok(ref($result),'Bio::Search::Result::HMMERResult');
+    ok($result->algorithm, 'HMMPFAM');
+    ok($result->algorithm_version, '2.1.1');
+    ok($result->hmm_name, 'pfam');
+    ok($result->sequence_file, '/home/birney/src/wise2/example/road.pep');
+    ok($result->query_name, 'roa1_drome');
+    ok($result->query_description, '');
+    ok($result->num_hits(), 2);
+    my ($hsp,$hit);
+    if( $hit = $result->next_model ) {
+	ok($hit->name, 'SEED');
+	ok($hit->raw_score, '146.1');
+	ok($hit->significance, '6.3e-40');
+	ok(ref($hit), 'Bio::Search::Hit::HMMERHit');
+	ok($hit->num_hsps, 1);
+
+	if( defined( $hsp = $hit->next_domain ) ) {
+	    ok($hsp->hit->start, 1);
+	    ok($hsp->hit->end, 77);
+	    ok($hsp->query->start, 33);
+	    ok($hsp->query->end, 103);
+	    ok($hsp->score, 71.2);
+	    ok($hsp->evalue, '2.2e-17');
+	    ok($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP');
+	    ok($hsp->gaps('query'), 7);
+	    ok($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv');
+	    ok($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ');
+	    ok(	length($hsp->homology_string), length($hsp->hit_string));
+	    ok( length($hsp->query_string), length($hsp->homology_string));
+	}
+    }
+    if( defined ($hit = $result->next_model) ) {
+	if( defined($hsp = $hit->next_domain) ) {
+	    ok($hsp->hit->start, 1);
+	    ok($hsp->hit->end, 77);
+	    ok($hsp->query->start, 124);
+	    ok($hsp->query->end, 194);
+	    ok($hsp->score, 75.5);
+	    ok($hsp->evalue, '1.1e-18');
+	    ok($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV');
+	    ok($hsp->gaps('query'), 6);
+	    ok($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv');	 
+	    ok($hsp->homology_string, 'lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v');
+	    ok(	length($hsp->homology_string), length($hsp->hit_string));
+	    ok( length($hsp->query_string), length($hsp->homology_string));
+	}
+	last;
+    }
+}
+$searchio = new Bio::SearchIO(-format => 'hmmer',
+			      -file   => Bio::Root::IO->catfile
+			      ("t","data","hmmsearch.out"));
+while( my $result = $searchio->next_result ) {
+    ok(ref($result),'Bio::Search::Result::HMMERResult');
+    ok($result->algorithm, 'HMMSEARCH');
+    ok($result->algorithm_version, '2.0');
+    ok($result->hmm_name, 'HMM [SEED]');
+    ok($result->sequence_file, 'HMM.dbtemp.29591');
+    ok($result->database_name, 'HMM.dbtemp.29591');
+    ok($result->query_name, 'SEED');
+    ok($result->query_description, '');
+    ok($result->num_hits(), 1215);
+    my $hit = $result->next_model;
+    ok($hit->name, 'Q91581');
+    ok($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN');
+    ok($hit->significance, '2e-31');
+    ok($hit->raw_score, 119.7);
+    my $hsp = $hit->next_domain;
+    ok($hsp->score,119.7);
+    ok($hsp->evalue, '2e-31');
+    ok($hsp->query->start, 18);
+    ok($hsp->query->end, 89);
+    ok($hsp->hit->start, 1);
+    ok($hsp->hit->end, 77);
+    ok($hsp->query->seq_id(), 'SEED');
+    ok($hsp->hit->seq_id(), 'Q91581');   
+}
+
+$searchio = new Bio::SearchIO(-format => 'hmmer',
+			      -file   => Bio::Root::IO->catfile("t","data",
+								"L77119.hmmer"));
+
+while( my $result = $searchio->next_result ) {
+    ok(ref($result),'Bio::Search::Result::HMMERResult');
+    ok($result->algorithm, 'HMMPFAM');
+    ok($result->algorithm_version, '2.2g');
+    ok($result->hmm_name, 'Pfam');
+    ok($result->sequence_file, 'L77119.faa');
+    ok($result->query_name, 'gi|1522636|gb|AAC37060.1|');
+    ok($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
+    ok($result->num_hits(), 1);
+    my $hit = $result->next_hit;
+    ok($hit->name, 'Methylase_M');
+    ok($hit->description,'Type I restriction modification system, M');
+    ok($hit->significance, '0.0022');
+    ok($hit->raw_score, -105.2);
+    my $hsp = $hit->next_hsp;
+    ok($hsp->score,-105.2);
+    ok($hsp->evalue, '0.0022');
+    ok($hsp->query->start, 280);
+    ok($hsp->query->end, 481);
+    ok($hsp->hit->start, 1);
+    ok($hsp->hit->end, 279);
+    ok($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|');
+    ok($hsp->hit->seq_id(), 'Methylase_M');
+    ok($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn');
+    ok($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST');
+    ok($hsp->homology_string, ' ++EL+++  av+   R              L+F K++ dk      +i+         p +   + +++y   ++   ++ ++y ++      + lF++++   e ++  ++++ + +    ++      + +       Glf ++++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++                  +++ ++    k+n+i +    s+');
+    
+}
+
+
+$searchio = new Bio::SearchIO(-format => 'hmmer',
+			      -file   => Bio::Root::IO->catfile("t","data",
+								"cysprot1b.hmmsearch"));
+
+
+while( my $result = $searchio->next_result ) {
+    ok(ref($result),'Bio::Search::Result::HMMERResult');
+    ok($result->algorithm, 'HMMSEARCH');
+    ok($result->algorithm_version, '2.2g');
+    ok($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]');
+    ok($result->database_name, 'cysprot1b.fa');
+    ok($result->sequence_file, 'cysprot1b.fa');
+    ok($result->query_name, 'Peptidase_C1');
+    ok($result->query_accession, 'PF00112');
+    ok($result->query_description, 'Papain family cysteine protease');
+    ok($result->num_hits(), 4);
+    my $hit = $result->next_hit;
+    ok($hit->name, 'CATL_RAT');
+    ok($hit->description,'');
+    ok($hit->significance, '2e-135');
+    ok($hit->raw_score, 449.4);
+    my $hsp = $hit->next_hsp;
+    ok($hsp->score,449.4);
+    ok($hsp->evalue, '2e-135');
+    ok($hsp->query->start, 1);
+    ok($hsp->query->end, 337);
+    ok($hsp->hit->start, 114);
+    ok($hsp->hit->end, 332);
+    ok($hsp->query->seq_id(), 'Peptidase_C1');
+    ok($hsp->hit->seq_id(), 'CATL_RAT');
+    ok($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI');
+    ok($hsp->homology_string, '+P+++DWRe kg  VtpVK+QG qCGSCWAFSa g lEg+ ++kt      gkl+sLSEQ+LvDC++ d gn+      GCnG Glmd Af+Yik+     NgGl++E++Y     PY+    +kd                   g+Cky+  + ++     a+++g++d+p++     E+al+ka+a++GP+sVa+das+ s    q+Y+sG       +Y+++    C+++   +LdH+Vl+VGYG e+                                      ++++ +YW+VKNSWG++WG++GY++ia+++n    n+CG+a+ asypi');
+    ok($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi');
+    $hit = $result->next_hit;
+    ok($hit->name, 'CATL_HUMAN');
+    ok($hit->description,'');
+    ok($hit->significance, '6.1e-134');
+    ok($hit->raw_score, 444.5);
+}
+			      
+my ($domain,$set,$homol,$rev,$res,$dom, at doms);
+    $domain = Bio::Tools::HMMER::Domain->new(-verbose=>1);
+
+ok ref($domain), 'Bio::Tools::HMMER::Domain';
+
+$domain->start(50);
+$domain->end(200);
+$domain->hstart(10);
+$domain->hend(100);
+$domain->seqbits(50);
+$domain->bits(20);
+$domain->evalue(0.0001);
+$domain->seq_id('silly');
+
+
+# test that we can get out forward and reverse homol_SeqFeatures
+$homol = $domain->feature2();
+ok $homol->start(), 10;
+
+$rev = $domain;
+
+ok $rev->start(), 50;
+
+$set = Bio::Tools::HMMER::Set->new();
+$set->add_Domain($domain);
+
+ at doms = $set->each_Domain();
+$dom = shift @doms;
+
+ok $dom->start(), 50;
+
+$set->bits(300);
+$set->evalue(0.0001);
+$set->name('sillyname');
+$set->desc('a desc');
+$set->accession('fakeaccesssion');
+ok $set->bits(), 300;
+ok $set->evalue(), 0.0001;
+ok $set->name(), 'sillyname';
+ok $set->desc, 'a desc';
+ok $set->accession, 'fakeaccesssion';
+
+$res = Bio::Tools::HMMER::Results->new( -file => Bio::Root::IO->catfile("t","data","hmmsearch.out") , -type => 'hmmsearch');
+my $seen =0;
+ok $res->hmmfile, "HMM";
+ok $res->seqfile, "HMM.dbtemp.29591";
+
+my $first = 0;
+foreach $set ( $res->each_Set) {
+    foreach $domain ( $set->each_Domain ) {
+    #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
+    # do nothing for the moment
+      $seen = 1;
+  }
+}
+ok $seen, 1;
+
+ok $res->number, 1215, "\nBad number of domains. Expecting 1215. Got" . $res->number;
+
+$res = Bio::Tools::HMMER::Results->new( -file => 
+				      Bio::Root::IO->catfile("t","data",
+							     "hmmpfam.out") , 
+					-type => 'hmmpfam');
+
+ok ($res->number, 2);
+
+# parse HMM 2.2 files
+
+$res = Bio::Tools::HMMER::Results->new( -file => 
+				      Bio::Root::IO->catfile("t","data",
+							     "L77119.hmmer"),
+					-type => 'hmmpfam');
+$seen =0;
+ok $res->hmmfile, 'Pfam';
+ok $res->seqfile, 'L77119.faa';
+foreach $set ( $res->each_Set) {
+    # only one set anyways
+
+    ok($set->name, 'gi|1522636|gb|AAC37060.1|');
+    ok($set->desc, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
+    ok($set->accession, '[none]');
+    foreach $domain ( $set->each_Domain ) {
+	#print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
+    # do nothing for the moment
+	ok($domain->start, 280);
+	ok($domain->end, 481);
+	ok($domain->bits, -105.2);
+	ok($domain->evalue, 0.0022 );
+    }
+}
+ok ($res->number, 1);
+
+# test for bugs #(1189,1034,1172)
+$res = Bio::Tools::HMMER::Results->new( -file => Bio::Root::IO->catfile
+					("t","data","hmmsearch.out") , 
+					-type => 'hmmsearch');
+my $res2 = $res->filter_on_cutoff(100,50);
+ok($res2);
+ok($res2->number, 604);
+
+
+# now let's test the new Bio::SearchIO::hmmer

Added: trunk/packages/bioperl/branches/upstream/current/t/hmmer_pull.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/hmmer_pull.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/hmmer_pull.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,208 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+##
+# $Id: hmmer_pull.t,v 1.1.2.1 2006/08/20 22:03:30 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan test => 287;
+}
+
+use Bio::Root::IO;
+use Bio::SearchIO;
+
+ok(1);
+
+my $searchio = new Bio::SearchIO(-format => 'hmmer_pull', -file => Bio::Root::IO->catfile("t","data","hmmpfam_fake.out"), -verbose => -1);
+my @data = ([qw(roa1_drome roa2_drome)], [2, 1], [1, 2], [2, 1]);
+while (my $result = $searchio->next_result) {
+    ok ref($result), 'Bio::Search::Result::HmmpfamResult';
+    ok $result->algorithm, 'HMMPFAM';
+    ok $result->algorithm_version, '2.1.1';
+    ok $result->hmm_name, 'pfam';
+    ok $result->hmm_file, $result->hmm_name;
+    ok $result->database_name, $result->hmm_name;
+    ok $result->sequence_file, '/home/birney/src/wise2/example/road.pep';
+    ok $result->sequence_database, $result->sequence_file;
+    ok $result->query_name, shift @{$data[0]};
+    ok $result->num_hits(), shift @{$data[1]};
+    ok $result->no_hits_found, 0;
+    
+	ok $result->query_accession, '';
+    ok $result->query_description, '';
+    ok ! $result->query_length;
+    ok ! $result->database_letters;
+    ok ! $result->database_entries;
+    ok $result->algorithm_reference, '';
+    ok $result->get_parameter('test'), undef;
+    ok $result->available_parameters, undef;
+    ok $result->get_statistic('test'), undef;
+    ok $result->available_statistics, undef;
+    
+    my @orig_order = $result->hits;
+    ok @orig_order, shift @{$data[3]};
+	if (@orig_order > 1) {
+		ok $orig_order[0]->name ne $orig_order[1]->name;
+		$result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::a->[2]
+													<=> 
+							   $Bio::Search::Result::HmmpfamResult::b->[2]});
+		my @hits = $result->hits;
+		ok @hits, @orig_order;
+		ok $hits[0]->name, $orig_order[1]->name;
+		$result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::b->[4]
+													<=> 
+							   $Bio::Search::Result::HmmpfamResult::a->[4]});
+	}
+    
+    my @hit_data = ([qw(SEED TEST)], [146.1, "5.0"], [6.3e-40, 7.2], [2, 1], [77, undef], [2, 0], [1, 2],
+                    ["33 34 36 38 43 45 47 48 51 53 55 57 58 65 68 71 73 74 76 88 98 99 124 125 126 127 129 132 135 140 142 145 146 148 149 151 153 154 156 157 158 159 160 161 164 165 166 167 168 169 170 178 187 189 194", ''],
+                    ["1 2 3 4 6 9 11 12 13 15 16 17 19 21 22 23 25 26 28 30 31 33 39 40 41 42 43 44 46 47 48 49 50 51 52 60 61 70 72 73 77", ''],
+                    ["1-6 8-13 15-23 25-33 39-56 58-63 67-77", '']);
+    while (defined(my $hit = $result->next_model)) {
+        ok ref($hit), 'Bio::Search::Hit::HmmpfamHit';
+        ok $hit->name, shift @{$hit_data[0]};
+        ok $hit->raw_score, shift @{$hit_data[1]};
+        ok $hit->score, $hit->raw_score;
+        ok $hit->significance, shift @{$hit_data[2]};
+        ok $hit->p, $hit->significance;
+        ok $hit->num_hsps, shift @{$hit_data[3]};
+        ok $hit->n, $hit->num_hsps;
+        ok $hit->algorithm, $result->algorithm;
+        ok $hit->overlap, 0;
+        ok $hit->rank, shift @{$hit_data[6]};
+        ok $hit->tiled_hsps, 0;
+        ok $hit->strand('query'), 1;
+        ok $hit->strand('hit'), 1;
+        my @strands = $hit->strand;
+        ok "@strands", "1 1";
+        
+        ok $hit->description, undef;
+        ok $hit->accession, undef;
+        ok ! $hit->locus;
+        ok ! $hit->bits;
+        ok ! $result->logical_length('query');
+        ok ! $result->frame;
+        ok $hit->each_accession_number, undef;
+        
+        ok $hit->length, shift @{$hit_data[4]};
+        ok $hit->logical_length('hit'), $hit->length;
+		
+		if ($result->query_name eq 'roa1_drome') {
+			my @inds = $hit->seq_inds('query', 'identical');
+			ok "@inds", shift @{$hit_data[7]};
+			@inds = $hit->seq_inds('hit', 'identical');
+			ok "@inds", shift @{$hit_data[8]};
+			@inds = $hit->seq_inds('hit', 'conserved', 1);
+			ok "@inds", shift @{$hit_data[9]};
+		}
+		
+		if ($hit->name eq 'SEED') {
+			my $best = $hit->hsp('best');
+			ok $best->evalue, 1.1e-18;
+			my $worst = $hit->hsp('worst');
+			ok $worst->evalue, 2.2e-17;
+			ok $hit->start('query'), 33;
+			ok $hit->start('hit'), 1;
+			ok $hit->end('query'), 194;
+			ok $hit->end('hit'), 77;
+			my @range = $hit->range('query');
+			ok "@range", '33 194';
+			@range = $hit->range('hit');
+			ok "@range", '1 77';
+			
+			if ($hit->query_name eq 'roa1_drome') {
+				ok $hit->length_aln('query'),142;
+				ok $hit->length_aln('hit'), 77;
+				ok $hit->gaps('total'), 14;
+				ok $hit->gaps('query'), 13;
+				ok $hit->gaps('hit'), 1;
+				ok $hit->matches('id'), 41;
+				ok $hit->matches('cons'), 24;
+				ok $hit->frac_identical, 0.387;
+				ok $hit->frac_conserved, 0.169;
+				ok ! $hit->frac_aligned_query;
+				ok $hit->frac_aligned_hit, '1.00';
+				ok $hit->num_unaligned_hit, 1;
+				ok $hit->num_unaligned_query, 13;
+			}
+		}
+        
+        my @hsps = $hit->hsps;
+        ok @hsps, shift @{$hit_data[5]};
+        
+        my @hsp_data = ([1, 1], [77, 77], [33, 124], [103, 194], [71.2, 75.5], [2.2e-17, 1.1e-18],
+                        ['LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP',
+                         'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV'],
+						[7, 6],
+						['lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv',
+                         'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv'],
+                        ['lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ',
+                         'lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v'],
+						[1, 0], [8, 6], [1, 2], ['33 103', '124 194'], [78, 77], [22, 33], [33, 23],
+						['0.310', '0.465'], ['0.286', '0.429'], ['0.282', '0.429']);
+        
+        while (defined(my $hsp = $hit->next_domain)) {
+            ok ref($hsp), 'Bio::Search::HSP::HmmpfamHSP';
+            ok $hsp->hit->start, shift @{$hsp_data[0]};
+            ok $hsp->hit->end, shift @{$hsp_data[1]};
+            ok $hsp->query->start, shift @{$hsp_data[2]};
+            ok $hsp->query->end, shift @{$hsp_data[3]};
+			ok $hsp->start('hit'), $hsp->hit->start;
+			ok $hsp->end('hit'),$hsp->hit->end;
+			ok $hsp->start('query'), $hsp->query->start;
+			ok $hsp->end('query'), $hsp->query->end;
+			ok $hsp->strand('hit'), 1;
+			ok $hsp->strand('query'), 1;
+            ok $hsp->score, shift @{$hsp_data[4]};
+			ok ! $hsp->bits;
+            ok $hsp->evalue, shift @{$hsp_data[5]};
+			ok ! $hsp->pvalue;
+			ok $hsp->significance, $hsp->evalue;
+			ok $hsp->algorithm, $result->algorithm;
+			ok $hsp->rank, shift @{$hsp_data[12]};
+			my @range = $hsp->range;
+			ok "@range", shift @{$hsp_data[13]};
+			ok $hsp->n, $hit->num_hsps;
+			ok $hsp->length('query'), 71;
+			ok $hsp->length('hit'), 77;
+			my $locseq = $hsp->seq('hit');
+			
+			if ($result->query_name eq 'roa1_drome') {
+				ok ref($locseq), 'Bio::LocatableSeq';
+				my $aln = $hsp->get_aln('hit');
+				ok ref($aln), 'Bio::SimpleAlign';
+				ok $hsp->query_string, shift @{$hsp_data[6]};
+				ok $hsp->gaps('query'), shift @{$hsp_data[7]};
+				ok $hsp->gaps('hit'), shift @{$hsp_data[10]};
+				ok $hsp->gaps('total'), shift @{$hsp_data[11]};
+				ok $hsp->hit_string, shift @{$hsp_data[8]};
+				ok $hsp->homology_string, shift @{$hsp_data[9]};
+				ok $hsp->seq_str('hit'), $hsp->hit_string;
+				ok $hsp->seq_str('query'), $hsp->query_string;
+				ok $hsp->seq_str('homology'), $hsp->homology_string;
+				ok length($hsp->homology_string), length($hsp->hit_string);
+				ok length($hsp->query_string), length($hsp->homology_string);
+				ok $hsp->length('total'), shift @{$hsp_data[14]};
+				ok $hsp->hsp_length, $hsp->length('total');
+				ok $hsp->num_identical, shift @{$hsp_data[15]};
+				ok $hsp->num_conserved, shift @{$hsp_data[16]};
+				ok $hsp->frac_identical('query'), shift @{$hsp_data[17]};
+				ok $hsp->frac_identical('hit'), shift @{$hsp_data[18]};
+				ok $hsp->frac_identical('total'), shift @{$hsp_data[19]};
+			}
+        }
+    }
+}
+
+ok $searchio->result_count, 2;


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/hmmer_pull.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/interpro.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/interpro.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/interpro.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,85 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for Modules
+# $Id: interpro.t,v 1.3 2005/11/22 19:34:48 bosborne Exp $
+#
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {
+  use vars qw($error $NUMTESTS);
+  $error = 0;
+  # to handle systems with no installed Test module
+  # we include the t dir (where a copy of Test.pm is located)
+  # as a fallback
+  eval { require Test; };
+  if ( $@ ) {
+    use lib 't';
+  }
+  # interpro uses XML::DOM
+  eval {require XML::DOM::XPath};
+  if ( $@ ) {
+    $error = 1;
+    warn "XML::DOM::XPath not found - skipping interpro tests\n";
+  }
+  $NUMTESTS = 17;
+  use Test;
+  plan tests => $NUMTESTS;
+}
+
+END { 
+  foreach ( $Test::ntest..$NUMTESTS) {
+    skip('Unable to run interpro tests because XML::DOM::XPath is not installed',1);
+  }
+}
+
+if ( $error == 1 ) {
+	exit(0);
+}
+
+use Bio::SeqIO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $t_file = Bio::Root::IO->catfile("t","data","test.interpro");
+my $a_in = Bio::SeqIO->new( -file => $t_file,
+									 -verbose => $verbose,
+									 -format => 'interpro');
+
+my $seq = $a_in->next_seq();
+ok($seq);
+ok($seq->isa('Bio::Seq::RichSeq'));
+ok(scalar( $seq->get_SeqFeatures() ) == 6);
+
+my($feat) = $seq->get_SeqFeatures();
+ok($feat->isa('Bio::SeqFeature::Generic'));
+
+ok($feat->display_name eq 'Retinoblastoma-associated protein, B-box');
+
+ok($seq = $a_in->next_seq());
+ok(scalar( $seq->get_SeqFeatures() ) == 40);
+
+ok(!($seq = $a_in->next_seq()));
+
+# Bug 1908 (enhancement)
+$t_file = Bio::Root::IO->catfile("t","data","interpro_ebi.xml");
+my $b_in = Bio::SeqIO->new( -file => $t_file,
+									 -verbose => $verbose,
+									 -format => 'interpro');
+$seq = $b_in->next_seq();
+ok($seq);
+
+my @features = $seq->get_SeqFeatures;
+ok scalar @features,2;
+ok $features[0]->primary_tag, 'region';
+ok $features[0]->display_name,'Protein of unknown function DUF1021';
+ok $features[0]->location->end,78;
+
+my @dblinks = $features[0]->annotation->get_Annotations('dblink');
+ok (scalar @dblinks,3);
+ok $dblinks[1]->primary_id,'IPR009366';
+ok $dblinks[2]->primary_id,'PF06257.1';
+
+__END__
+

Added: trunk/packages/bioperl/branches/upstream/current/t/kegg.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/kegg.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/kegg.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,49 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: kegg.t,v 1.2 2005/11/12 23:15:49 bosborne Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 13;
+}
+
+use Bio::SeqIO;
+
+ok(1);
+
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $io = Bio::SeqIO->new(-format => 'kegg',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 ("t","data","AHCYL1.kegg"));
+ok($io);
+my $kegg = $io->next_seq();
+ok($kegg);
+ok($kegg->accession, '10768');
+ok($kegg->display_id, 'AHCYL1');
+ok($kegg->alphabet, 'dna');
+ok($kegg->seq,'atgtcgatgcctgacgcgatgccgctgcccggggtcggggaggagctgaagcaggccaaggagatcgaggacgccgagaagtactccttcatggccaccgtcaccaaggcgcccaagaagcaaatccagtttgctgatgacatgcaggagttcaccaaattccccaccaaaactggccgaagatctttgtctcgctcgatctcacagtcctccactgacagctacagttcagctgcatcctacacagatagctctgatgatgaggtttctccccgagagaagcagcaaaccaactccaagggcagcagcaatttctgtgtgaagaacatcaagcaggcagaatttggacgccgggagattgagattgcagagcaagacatgtctgctctgatttcactcaggaaacgtgctcagggggagaagcccttggctggtgctaaaatagtgggctgtacacacatcacagcccagacagcggtgttgattgagacactctgtgccctgggggctcagtgccgctggtctgcttgtaacatctactcaactcagaatgaagtagctgcagcactggctgaggctggagttgcagtgttcgcttggaagggcgagtcagaagatgacttctggtggtgtattgaccgctgtgtgaacatggatgggtggcaggccaacatgatcctggatgatgggggagacttaacccactgggtttataagaagtatccaaacgtgtttaagaagatccgaggcattgtggaagagagcgtgactggtgttcacaggctgtatcagctctccaaagctgggaagctctgtgttccggccatgaacgtcaatgattctgttaccaaacagaagtttgataacttgtactgctgccgagaatccattttggatggcctgaagaggaccacagatgtgatgtttggtgggaaacaagtggtggtgtgtggctatggtgaggtaggcaagggctgctgtgctgctctcaaagctcttggagcaattgtctacattaccgaaatcgaccccatctgtgctctgcaggcctgcatggatgggttcagggtggtaaagctaaatgaagtcatccggcaagtcgatgtcgtaataacttgcacaggaaataagaatgtagtgacacgggagcacttggatcgcatgaaaaacagttgtatcgtatgcaatatgggccactccaacacagaaatcgatgtgaccagcctccgcactccggagctgacgtgggagcgagtacgttctcaggtggaccatgtcatctggccagatggcaaacgagttgtcctcctggcagagggtcgtctactcaatttgagctgctccacagttcccacctttgttctgtccatcacagccacaacacaggctttggcactgatagaactctataatgcacccgaggggcgatacaagcaggatgtgtacttgcttcctaagaaaatggatgaatacgttgccagcttgcatctgccatcatttgatgcccaccttacagagctgacagatgaccaagcaaaatatctgggactcaacaaaaatgggccattcaaacctaattattacagatactaa');
+ok($kegg->translate->seq);
+
+ok(($kegg->annotation->get_Annotations('description'))[0]->text,
+   'S-adenosylhomocysteine hydrolase-like 1 [EC:3.3.1.1]');
+
+ok(($kegg->annotation->get_Annotations('pathway'))[0]->text,
+   'Metabolism; Amino Acid Metabolism; Methionine metabolism');
+
+ok( (grep {$_->database eq 'KO'}
+     $kegg->annotation->get_Annotations('dblink'))[0]->comment, 
+    'adenosylhomocysteinase' );
+
+ok( (grep {$_->database eq 'PATH'} 
+     $kegg->annotation->get_Annotations('dblink'))[0]->primary_id,
+    'hsa00271' );
+
+ok( ($kegg->annotation->get_Annotations('aa_seq'))[0]->text,
+'MSMPDAMPLPGVGEELKQAKEIEDAEKYSFMATVTKAPKKQIQFADDMQEFTKFPTKTGRRSLSRSISQSSTDSYSSAASYTDSSDDEVSPREKQQTNSKGSSNFCVKNIKQAEFGRREIEIAEQDMSALISLRKRAQGEKPLAGAKIVGCTHITAQTAVLIETLCALGAQCRWSACNIYSTQNEVAAALAEAGVAVFAWKGESEDDFWWCIDRCVNMDGWQANMILDDGGDLTHWVYKKYPNVFKKIRGIVEESVTGVHRLYQLSKAGKLCVPAMNVNDSVTKQKFDNLYCCRESILDGLKRTTDVMFGGKQVVVCGYGEVGKGCCAALKALGAIVYITEIDPICALQACMDGFRVVKLNEVIRQVDVVITCTGNKNVVTREHLDRMKNSCIVCNMGHSNTEIDVTSLRTPELTWERVRSQVDHVIWPDGKRVVLLAEGRLLNLSCSTVPTFVLSITATTQALALIELYNAPEGRYKQDVYLLPKKMDEYVASLHLPSFDAHLTELTDDQAKYLGLNKNGPFKPNYYRY');

Added: trunk/packages/bioperl/branches/upstream/current/t/largefasta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/largefasta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/largefasta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,56 @@
+
+
+use strict;
+BEGIN { 
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;    
+    plan tests => 15;
+}
+
+use Bio::SeqIO;
+use vars qw($tmpfile);
+use Bio::Root::IO;
+END { unlink $tmpfile; }
+
+$tmpfile = Bio::Root::IO->catfile("t","data","largefastatest.out");
+my $seqio = new Bio::SeqIO('-format'=>'largefasta',
+			   '-file'  =>Bio::Root::IO->catfile("t","data","genomic-seq.fasta"));
+ok defined $seqio, 1, 'cannot instantiate Bio::SeqIO::largefasta';
+
+my $pseq = $seqio->next_seq();
+$pseq->alphabet('dna');
+$pseq->desc('this is my description');;
+my $plength = $pseq->length();
+my $last_3 = $pseq->subseq($plength-3,$plength);
+
+ok defined $pseq, 1, 'could not call next_seq';
+ok $plength > 0, 1, "could not call length, seq was empty";
+ok length($pseq->subseq(100, 299)), 200, 'error in subseq'; 
+ok $pseq->trunc(100,199)->length(), 100, 'error in trunc'; 
+ok $pseq->alphabet(), 'dna', 'alphabet was ' . $pseq->alphabet();
+ok $pseq->display_id(), 'HSBA536C5',"no display id";
+ok $pseq->accession_number(), 'unknown', "no accession";
+ok $pseq->desc, 'this is my description', 'no description';
+
+ok open(OUT, ">$tmpfile"), 1,'could not open output file';
+
+my $seqout = new Bio::SeqIO('-format' => 'largefasta',
+			    '-fh'     => \*OUT );
+ok defined $seqout, 1,'could not open seq with outputstream';
+
+ok $seqout->write_seq($pseq), 1,'could not write seq';
+$seqout->close();
+close(OUT);
+my $seqin = new Bio::SeqIO('-format' => 'largefasta',
+			'-file'   => $tmpfile);
+my $pseq2 = $seqin->next_seq;
+ok ($plength, $pseq2->length(), 
+    "written file was not same length as expected");
+ok ($pseq->display_id(), $pseq2->display_id(), 
+    "display ids were not identical as expected");
+ok ($pseq->desc(), $pseq2->desc() , 
+    "description was not identical (" . $pseq->desc() . 
+    "," . $pseq2->desc() . ")");

Added: trunk/packages/bioperl/branches/upstream/current/t/largepseq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/largepseq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/largepseq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,91 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: largepseq.t,v 1.9 2001/11/02 18:48:09 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 22;
+}
+use Bio::Seq::LargePrimarySeq;
+use Bio::Seq::LargeSeq;
+use Bio::Location::Simple;
+use Bio::Location::Fuzzy;
+use Bio::Location::Split;
+
+my $pseq = Bio::Seq::LargePrimarySeq->new();
+ok $pseq;
+$pseq->add_sequence_as_string('ATGGGGTGGGGTGAAACCCTTTGGGGGTGGGGTAAAT');
+$pseq->add_sequence_as_string('GTTTGGGGTTAAACCCCTTTGGGGGGT');
+
+ok $pseq->display_id('hello'), 'hello';
+
+ok $pseq->seq, 'ATGGGGTGGGGTGAAACCCTTTGGGGGTGGGGTAAATGTTTGGGGTTAAACCCCTTTGGGGGGT' , "Sequence is " . $pseq->seq;
+
+ok $pseq->subseq(3,7), 'GGGGT', "Subseq is ".$pseq->subseq(3,7);
+my $location = new Bio::Location::Simple(-start => 4, -end => 8,
+					 -strand => 1);
+ok($pseq->subseq($location), 'GGGTG');
+
+my $splitlocation = new Bio::Location::Split;
+
+$splitlocation->add_sub_Location( new Bio::Location::Simple('-start' => 1,
+							    '-end'   => 15,
+							    '-strand' => 1));
+
+$splitlocation->add_sub_Location( new Bio::Location::Simple('-start' => 21,
+							    '-end'   => 27,
+							    '-strand' => -1));
+
+ok( $pseq->subseq($splitlocation), 'ATGGGGTGGGGTGAACCCCCAA');
+
+my $fuzzy = new Bio::Location::Fuzzy(-start => '<10',
+				     -end   => '18',
+				     -strand => 1);
+
+ok( $pseq->subseq($fuzzy), 'GGTGAAACC');
+
+
+ok($pseq->trunc(8,15)->seq, 'GGGGTGAA', 
+    'trunc seq was ' . $pseq->trunc(8,15)->seq);
+
+
+ok $pseq->alphabet('dna'), 'dna'; # so translate will not complain
+ok $pseq->translate()->seq, 'MGWGETLWGWGKCLGLNPFGG';
+
+
+my $seq = new Bio::Seq::LargeSeq(-primaryseq => $pseq );
+
+ok $seq->display_id('hello'), 'hello';
+
+ok $seq->seq, 'ATGGGGTGGGGTGAAACCCTTTGGGGGTGGGGTAAATGTTTGGGGTTAAACCCCTTTGGGGGGT' , "Sequence is " . $seq->seq;
+
+ok $seq->subseq(3,7), 'GGGGT', "Subseq is ".$seq->subseq(3,7);
+ok ($seq->trunc(8,15)->seq, 'GGGGTGAA', 
+    'trunc seq was ' . $seq->trunc(8,15)->seq);
+
+ok $seq->alphabet('dna'), 'dna'; # so translate will not complain
+ok $seq->translate()->seq, 'MGWGETLWGWGKCLGLNPFGG';
+
+$seq = new Bio::Seq::LargeSeq( -display_id => 'hello');
+$seq->seq('ATGGGGTGGGGT');
+ok $seq->display_id, 'hello';
+
+ok $seq->seq, 'ATGGGGTGGGGT' , "Sequence is " . $seq->seq;
+
+ok $seq->subseq(3,7), 'GGGGT', "Subseq is ".$seq->subseq(3,7);
+ok ($seq->trunc(8,12)->seq, 'GGGGT', 
+    'trunc seq was ' . $seq->trunc(8,12)->seq);
+
+ok $seq->alphabet('dna'), 'dna'; # so translate will not complain
+ok $seq->translate()->seq, 'MGWG';

Added: trunk/packages/bioperl/branches/upstream/current/t/lasergene.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lasergene.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lasergene.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,67 @@
+# -*-Perl-*-
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+BEGIN {
+	$NUMTESTS = 11;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO::raw;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+
+#
+# Positive tests
+#
+
+my $io = Bio::SeqIO->new(
+  -format => 'lasergene',
+  -verbose => $verbose,
+  -file => Bio::Root::IO->catfile(qw(t data test.lasergene))
+);
+
+ok($io);
+
+my $seq;
+
+ok($seq = $io->next_seq);
+ok($seq->length, 12*3);
+ok($seq->subseq(1,12) eq 'ATCGATCGATCG');
+
+ok($seq = $io->next_seq);
+ok($seq->length, 200);
+
+ok($seq = $io->next_seq);
+ok($seq->length, 70*5+12);
+
+ok(not defined $io->next_seq);
+
+#
+# Negative tests
+#
+
+$io = Bio::SeqIO->new(
+  -format => 'lasergene',
+  -verbose => $verbose,
+  -file => Bio::Root::IO->catfile(qw(t data test.fasta)) # not lasergene!
+);
+
+ok($io);
+
+eval { 
+  $io->next_seq;
+};
+ok($@ =~ m/unexpected end of file/i);
+

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Error.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Error.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Error.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,741 @@
+# Error.pm
+#
+# Copyright (c) 1997-8 Graham Barr <gbarr at ti.com>. All rights reserved.
+# This program is free software; you can redistribute it and/or
+# modify it under the same terms as Perl itself.
+#
+# Based on my original Error.pm, and Exceptions.pm by Peter Seibel
+# <peter at weblogic.com> and adapted by Jesse Glick <jglick at sig.bsh.com>.
+#
+# but modified ***significantly***
+
+package Error;
+
+use strict;
+use 5.004;
+
+use overload (
+	'""'	   =>	'stringify',
+	'0+'	   =>	'value',
+	'bool'     =>	sub { return 1; },
+	'fallback' =>	1
+);
+
+$Error::Depth = 0;	# Depth to pass to caller()
+$Error::Debug = 0;	# Generate verbose stack traces
+ at Error::STACK = ();	# Clause stack for try
+$Error::THROWN = undef;	# last error thrown, a workaround until die $ref works
+
+my $LAST;		# Last error created
+my %ERROR;		# Last error associated with package
+
+# Exported subs are defined in Error::subs
+
+sub import {
+    shift;
+    local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
+    Error::subs->import(@_);
+}
+
+# I really want to use last for the name of this method, but it is a keyword
+# which prevent the syntax  last Error
+
+sub prior {
+    shift; # ignore
+
+    return $LAST unless @_;
+
+    my $pkg = shift;
+    return exists $ERROR{$pkg} ? $ERROR{$pkg} : undef
+	unless ref($pkg);
+
+    my $obj = $pkg;
+    my $err = undef;
+    if($obj->isa('HASH')) {
+	$err = $obj->{'__Error__'}
+	    if exists $obj->{'__Error__'};
+    }
+    elsif($obj->isa('GLOB')) {
+	$err = ${*$obj}{'__Error__'}
+	    if exists ${*$obj}{'__Error__'};
+    }
+
+    $err;
+}
+
+# Return as much information as possible about where the error
+# happened. The -stacktrace element only exists if $Error::DEBUG
+# was set when the error was created
+
+sub stacktrace {
+    my $self = shift;
+
+    return $self->{'-stacktrace'}
+	if exists $self->{'-stacktrace'};
+
+    my $text = exists $self->{'-text'} ? $self->{'-text'} : "Died";
+
+    $text .= sprintf(" at %s line %d.\n", $self->file, $self->line)
+	unless($text =~ /\n$/s);
+
+    $text;
+}
+
+# Allow error propagation, ie
+#
+# $ber->encode(...) or
+#    return Error->prior($ber)->associate($ldap);
+
+sub associate {
+    my $err = shift;
+    my $obj = shift;
+
+    return unless ref($obj);
+
+    if($obj->isa('HASH')) {
+	$obj->{'__Error__'} = $err;
+    }
+    elsif($obj->isa('GLOB')) {
+	${*$obj}{'__Error__'} = $err;
+    }
+    $obj = ref($obj);
+    $ERROR{ ref($obj) } = $err;
+
+    return;
+}
+
+sub new {
+    my $self = shift;
+    my($pkg,$file,$line) = caller($Error::Depth);
+
+    my $err = bless {
+	'-package' => $pkg,
+	'-file'    => $file,
+	'-line'    => $line,
+	@_
+    }, $self;
+
+    $err->associate($err->{'-object'})
+	if(exists $err->{'-object'});
+
+    # To always create a stacktrace would be very inefficient, so
+    # we only do it if $Error::Debug is set
+
+    if($Error::Debug) {
+	require Carp;
+	local $Carp::CarpLevel = $Error::Depth;
+	my $text = defined($err->{'-text'}) ? $err->{'-text'} : "Error";
+	my $trace = Carp::longmess($text);
+	# Remove try calls from the trace
+	$trace =~ s/(\n\s+\S+__ANON__[^\n]+)?\n\s+eval[^\n]+\n\s+Error::subs::try[^\n]+(?=\n)//sog;
+	$trace =~ s/(\n\s+\S+__ANON__[^\n]+)?\n\s+eval[^\n]+\n\s+Error::subs::run_clauses[^\n]+\n\s+Error::subs::try[^\n]+(?=\n)//sog;
+	$err->{'-stacktrace'} = $trace
+    }
+
+    $@ = $LAST = $ERROR{$pkg} = $err;
+}
+
+# Throw an error. this contains some very gory code.
+
+sub throw {
+    my $self = shift;
+    local $Error::Depth = $Error::Depth + 1;
+
+    # if we are not rethrow-ing then create the object to throw
+    $self = $self->new(@_) unless ref($self);
+    
+    die $Error::THROWN = $self;
+}
+
+# syntactic sugar for
+#
+#    die with Error( ... );
+
+sub with {
+    my $self = shift;
+    local $Error::Depth = $Error::Depth + 1;
+
+    $self->new(@_);
+}
+
+# syntactic sugar for
+#
+#    record Error( ... ) and return;
+
+sub record {
+    my $self = shift;
+    local $Error::Depth = $Error::Depth + 1;
+
+    $self->new(@_);
+}
+
+# catch clause for
+#
+# try { ... } catch CLASS with { ... }
+
+sub catch {
+    my $pkg = shift;
+    my $code = shift;
+    my $clauses = shift || {};
+    my $catch = $clauses->{'catch'} ||= [];
+
+    unshift @$catch,  $pkg, $code;
+
+    $clauses;
+}
+
+# Object query methods
+
+sub object {
+    my $self = shift;
+    exists $self->{'-object'} ? $self->{'-object'} : undef;
+}
+
+sub file {
+    my $self = shift;
+    exists $self->{'-file'} ? $self->{'-file'} : undef;
+}
+
+sub line {
+    my $self = shift;
+    exists $self->{'-line'} ? $self->{'-line'} : undef;
+}
+
+sub text {
+    my $self = shift;
+    exists $self->{'-text'} ? $self->{'-text'} : undef;
+}
+
+# overload methods
+
+sub stringify {
+    my $self = shift;
+    defined $self->{'-text'} ? $self->{'-text'} : "Died";
+}
+
+sub value {
+    my $self = shift;
+    exists $self->{'-value'} ? $self->{'-value'} : undef;
+}
+
+package Error::Simple;
+
+ at Error::Simple::ISA = qw(Error);
+
+sub new {
+    my $self  = shift;
+    my $text  = "" . shift;
+    my $value = shift;
+    my(@args) = ();
+
+    local $Error::Depth = $Error::Depth + 1;
+
+    @args = ( -file => $1, -line => $2)
+	if($text =~ s/ at (\S+) line (\d+)(\.\n)?$//s);
+
+    push(@args, '-value', 0 + $value)
+	if defined($value);
+
+    $self->SUPER::new(-text => $text, @args);
+}
+
+sub stringify {
+    my $self = shift;
+    my $text = $self->SUPER::stringify;
+    $text .= sprintf(" at %s line %d.\n", $self->file, $self->line)
+	unless($text =~ /\n$/s);
+    $text;
+}
+
+##########################################################################
+##########################################################################
+
+# Inspired by code from Jesse Glick <jglick at sig.bsh.com> and
+# Peter Seibel <peter at weblogic.com>
+
+package Error::subs;
+
+use Exporter ();
+use vars qw(@EXPORT_OK @ISA %EXPORT_TAGS);
+
+ at EXPORT_OK   = qw(try with finally except otherwise);
+%EXPORT_TAGS = (try => \@EXPORT_OK);
+
+ at ISA = qw(Exporter);
+
+sub run_clauses ($$$\@) {
+    my($clauses,$err,$wantarray,$result) = @_;
+    my $code = undef;
+
+    $err = new Error::Simple($err) unless ref($err);
+
+    CATCH: {
+
+	# catch
+	my $catch;
+	if(defined($catch = $clauses->{'catch'})) {
+	    my $i = 0;
+
+	    CATCHLOOP:
+	    for( ; $i < @$catch ; $i += 2) {
+		my $pkg = $catch->[$i];
+		unless(defined $pkg) {
+		    #except
+		    splice(@$catch,$i,2,$catch->[$i+1]->());
+		    $i -= 2;
+		    next CATCHLOOP;
+		}
+		elsif($err->isa($pkg)) {
+		    $code = $catch->[$i+1];
+		    while(1) {
+			my $more = 0;
+			local($Error::THROWN);
+			my $ok = eval {
+			    if($wantarray) {
+				@{$result} = $code->($err,\$more);
+			    }
+			    elsif(defined($wantarray)) {
+			        @{$result} = ();
+				$result->[0] = $code->($err,\$more);
+			    }
+			    else {
+				$code->($err,\$more);
+			    }
+			    1;
+			};
+			if( $ok ) {
+			    next CATCHLOOP if $more;
+			    undef $err;
+			}
+			else {
+			    $err = defined($Error::THROWN)
+				    ? $Error::THROWN : $@;
+			    $err = new Error::Simple($err)
+				    unless ref($err);
+			}
+			last CATCH;
+		    };
+		}
+	    }
+	}
+
+	# otherwise
+	my $owise;
+	if(defined($owise = $clauses->{'otherwise'})) {
+	    my $code = $clauses->{'otherwise'};
+	    my $more = 0;
+	    my $ok = eval {
+		if($wantarray) {
+		    @{$result} = $code->($err,\$more);
+		}
+		elsif(defined($wantarray)) {
+		    @{$result} = ();
+		    $result->[0] = $code->($err,\$more);
+		}
+		else {
+		    $code->($err,\$more);
+		}
+		1;
+	    };
+	    if( $ok ) {
+		undef $err;
+	    }
+	    else {
+		$err = defined($Error::THROWN)
+			? $Error::THROWN : $@;
+		$err = new Error::Simple($err)
+			unless ref($err);
+	    }
+	}
+    }
+    $err;
+}
+
+sub try (&;$) {
+    my $try = shift;
+    my $clauses = @_ ? shift : {};
+    my $ok = 0;
+    my $err = undef;
+    my @result = ();
+
+    unshift @Error::STACK, $clauses;
+
+    my $wantarray = wantarray();
+
+    do {
+	local $Error::THROWN = undef;
+
+	$ok = eval {
+	    if($wantarray) {
+		@result = $try->();
+	    }
+	    elsif(defined $wantarray) {
+		$result[0] = $try->();
+	    }
+	    else {
+		$try->();
+	    }
+	    1;
+	};
+
+	$err = defined($Error::THROWN) ? $Error::THROWN : $@
+	    unless $ok;
+    };
+
+    shift @Error::STACK;
+
+    $err = run_clauses($clauses,$err,wantarray, at result)
+	unless($ok);
+
+    $clauses->{'finally'}->()
+	if(defined($clauses->{'finally'}));
+
+    throw $err if defined($err);
+
+    wantarray ? @result : $result[0];
+}
+
+# Each clause adds a sub to the list of clauses. The finally clause is
+# always the last, and the otherwise clause is always added just before
+# the finally clause.
+#
+# All clauses, except the finally clause, add a sub which takes one argument
+# this argument will be the error being thrown. The sub will return a code ref
+# if that clause can handle that error, otherwise undef is returned.
+#
+# The otherwise clause adds a sub which unconditionally returns the users
+# code reference, this is why it is forced to be last.
+#
+# The catch clause is defined in Error.pm, as the syntax causes it to
+# be called as a method
+
+sub with (&;$) {
+    @_
+}
+
+sub finally (&) {
+    my $code = shift;
+    my $clauses = { 'finally' => $code };
+    $clauses;
+}
+
+# The except clause is a block which returns a hashref or a list of
+# key-value pairs, where the keys are the classes and the values are subs.
+
+sub except (&;$) {
+    my $code = shift;
+    my $clauses = shift || {};
+    my $catch = $clauses->{'catch'} ||= [];
+    
+    my $sub = sub {
+	my $ref;
+	my(@array) = $code->($_[0]);
+	if(@array == 1 && ref($array[0])) {
+	    $ref = $array[0];
+	    $ref = [ %$ref ]
+		if(UNIVERSAL::isa($ref,'HASH'));
+	}
+	else {
+	    $ref = \@array;
+	}
+	@$ref
+    };
+
+    unshift @{$catch}, undef, $sub;
+
+    $clauses;
+}
+
+sub otherwise (&;$) {
+    my $code = shift;
+    my $clauses = shift || {};
+
+    if(exists $clauses->{'otherwise'}) {
+	require Carp;
+	Carp::croak("Multiple otherwise clauses");
+    }
+
+    $clauses->{'otherwise'} = $code;
+
+    $clauses;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Error - Error/exception handling in an OO-ish way
+
+=head1 SYNOPSIS
+
+    use Error qw(:try);
+
+    throw Error::Simple( "A simple error");
+
+    sub xyz {
+        ...
+	record Error::Simple("A simple error")
+	    and return;
+    }
+
+    unlink($file) or throw Error::Simple("$file: $!",$!);
+
+    try {
+	do_some_stuff();
+	die "error!" if $condition;
+	throw Error::Simple -text => "Oops!" if $other_condition;
+    }
+    catch Error::IO with {
+	my $E = shift;
+	print STDERR "File ", $E->{'-file'}, " had a problem\n";
+    }
+    except {
+	my $E = shift;
+	my $general_handler=sub {send_message $E->{-description}};
+	return {
+	    UserException1 => $general_handler,
+	    UserException2 => $general_handler
+	};
+    }
+    otherwise {
+	print STDERR "Well I don't know what to say\n";
+    }
+    finally {
+	close_the_garage_door_already(); # Should be reliable
+    }; # Don't forget the trailing ; or you might be surprised
+
+=head1 DESCRIPTION
+
+The C<Error> package provides two interfaces. Firstly C<Error> provides
+a procedural interface to exception handling. Secondly C<Error> is a
+base class for errors/exceptions that can either be thrown, for
+subsequent catch, or can simply be recorded.
+
+Errors in the class C<Error> should not be thrown directly, but the
+user should throw errors from a sub-class of C<Error>.
+
+=head1 PROCEDURAL INTERFACE
+
+C<Error> exports subroutines to perform exception handling. These will
+be exported if the C<:try> tag is used in the C<use> line.
+
+=over 4
+
+=item try BLOCK CLAUSES
+
+C<try> is the main subroutine called by the user. All other subroutines
+exported are clauses to the try subroutine.
+
+The BLOCK will be evaluated and, if no error is throw, try will return
+the result of the block.
+
+C<CLAUSES> are the subroutines below, which describe what to do in the
+event of an error being thrown within BLOCK.
+
+=item catch CLASS with BLOCK
+
+This clauses will cause all errors that satisfy C<$err-E<gt>isa(CLASS)>
+to be caught and handled by evaluating C<BLOCK>.
+
+C<BLOCK> will be passed two arguments. The first will be the error
+being thrown. The second is a reference to a scalar variable. If this
+variable is set by the catch block then, on return from the catch
+block, try will continue processing as if the catch block was never
+found.
+
+To propagate the error the catch block may call C<$err-E<gt>throw>
+
+If the scalar reference by the second argument is not set, and the
+error is not thrown. Then the current try block will return with the
+result from the catch block.
+
+=item except BLOCK
+
+When C<try> is looking for a handler, if an except clause is found
+C<BLOCK> is evaluated. The return value from this block should be a
+HASHREF or a list of key-value pairs, where the keys are class names
+and the values are CODE references for the handler of errors of that
+type.
+
+=item otherwise BLOCK
+
+Catch any error by executing the code in C<BLOCK>
+
+When evaluated C<BLOCK> will be passed one argument, which will be the
+error being processed.
+
+Only one otherwise block may be specified per try block
+
+=item finally BLOCK
+
+Execute the code in C<BLOCK> either after the code in the try block has
+successfully completed, or if the try block throws an error then
+C<BLOCK> will be executed after the handler has completed.
+
+If the handler throws an error then the error will be caught, the
+finally block will be executed and the error will be re-thrown.
+
+Only one finally block may be specified per try block
+
+=back
+
+=head1 CLASS INTERFACE
+
+=head2 CONSTRUCTORS
+
+The C<Error> object is implemented as a HASH. This HASH is initialized
+with the arguments that are passed to it's constructor. The elements
+that are used by, or are retrievable by the C<Error> class are listed
+below, other classes may add to these.
+
+	-file
+	-line
+	-text
+	-value
+	-object
+
+If C<-file> or C<-line> are not specified in the constructor arguments
+then these will be initialized with the file name and line number where
+the constructor was called from.
+
+If the error is associated with an object then the object should be
+passed as the C<-object> argument. This will allow the C<Error> package
+to associate the error with the object.
+
+The C<Error> package remembers the last error created, and also the
+last error associated with a package. This could either be the last
+error created by a sub in that package, or the last error which passed
+an object blessed into that package as the C<-object> argument.
+
+=over 4
+
+=item throw ( [ ARGS ] )
+
+Create a new C<Error> object and throw an error, which will be caught
+by a surrounding C<try> block, if there is one. Otherwise it will cause
+the program to exit.
+
+C<throw> may also be called on an existing error to re-throw it.
+
+=item with ( [ ARGS ] )
+
+Create a new C<Error> object and returns it. This is defined for
+syntactic sugar, eg
+
+    die with Some::Error ( ... );
+
+=item record ( [ ARGS ] )
+
+Create a new C<Error> object and returns it. This is defined for
+syntactic sugar, eg
+
+    record Some::Error ( ... )
+	and return;
+
+=back
+
+=head2 STATIC METHODS
+
+=over 4
+
+=item prior ( [ PACKAGE ] )
+
+Return the last error created, or the last error associated with
+C<PACKAGE>
+
+=back
+
+=head2 OBJECT METHODS
+
+=over 4
+
+=item stacktrace
+
+If the variable C<$Error::Debug> was non-zero when the error was
+created, then C<stacktrace> returns a string created by calling
+C<Carp::longmess>. If the variable was zero the C<stacktrace> returns
+the text of the error appended with the filename and line number of
+where the error was created, providing the text does not end with a
+newline.
+
+=item object
+
+The object this error was associated with
+
+=item file
+
+The file where the constructor of this error was called from
+
+=item line
+
+The line where the constructor of this error was called from
+
+=item text
+
+The text of the error
+
+=back
+
+=head2 OVERLOAD METHODS
+
+=over 4
+
+=item stringify
+
+A method that converts the object into a string. This method may simply
+return the same as the C<text> method, or it may append more
+information. For example the file name and line number.
+
+By default this method returns the C<-text> argument that was passed to
+the constructor, or the string C<"Died"> if none was given.
+
+=item value
+
+A method that will return a value that can be associated with the
+error. For example if an error was created due to the failure of a
+system call, then this may return the numeric value of C<$!> at the
+time.
+
+By default this method returns the C<-value> argument that was passed
+to the constructor.
+
+=back
+
+=head1 PRE-DEFINED ERROR CLASSES
+
+=over 4
+
+=item Error::Simple
+
+This class can be used to hold simple error strings and values. It's
+constructor takes two arguments. The first is a text value, the second
+is a numeric value. These values are what will be returned by the
+overload methods.
+
+If the text value ends with C<at file line 1> as $@ strings do, then
+this infomation will be used to set the C<-file> and C<-line> arguments
+of the error object.
+
+This class is used internally if an eval'd block die's with an error
+that is a plain string.
+
+=back
+
+=head1 KNOWN BUGS
+
+None, but that does not mean there are not any.
+
+=head1 AUTHORS
+
+Graham Barr, gbarr at pobox.com
+
+The code that inspired me to write this was originally written by
+Peter Seibel E<lt>peter at weblogic.comE<gt> and adapted by Jesse Glick
+E<lt>jglick at sig.bsh.comE<gt>.
+
+=head1 MAINTAINER
+
+Arun Kumar U, u_arunkumar at yahoo.com
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Module.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Module.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Module.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,182 @@
+package Test::Builder::Module;
+
+use Test::Builder;
+
+require Exporter;
+use base qw(Exporter);
+
+$VERSION = '0.03';
+
+use strict;
+
+# 5.004's Exporter doesn't have export_to_level.
+my $_export_to_level = sub {
+      my $pkg = shift;
+      my $level = shift;
+      (undef) = shift;                  # redundant arg
+      my $callpkg = caller($level);
+      $pkg->export($callpkg, @_);
+};
+
+
+=head1 NAME
+
+Test::Builder::Module - Base class for test modules
+
+=head1 SYNOPSIS
+
+  # Emulates Test::Simple
+  package Your::Module;
+
+  my $CLASS = __PACKAGE__;
+
+  use base 'Test::Builder::Module';
+  @EXPORT = qw(ok);
+
+  sub ok ($;$) {
+      my $tb = $CLASS->builder;
+      return $tb->ok(@_);
+  }
+  
+  1;
+
+
+=head1 DESCRIPTION
+
+This is a superclass for Test::Builder-based modules.  It provides a
+handful of common functionality and a method of getting at the underlying
+Test::Builder object.
+
+
+=head2 Importing
+
+Test::Builder::Module is a subclass of Exporter which means your
+module is also a subclass of Exporter.  @EXPORT, @EXPORT_OK, etc...
+all act normally.
+
+A few methods are provided to do the C<use Your::Module tests => 23> part
+for you.
+
+=head3 import
+
+Test::Builder::Module provides an import() method which acts in the
+same basic way as Test::More's, setting the plan and controling
+exporting of functions and variables.  This allows your module to set
+the plan independent of Test::More.
+
+All arguments passed to import() are passed onto 
+C<< Your::Module->builder->plan() >> with the exception of 
+C<import =>[qw(things to import)]>.
+
+    use Your::Module import => [qw(this that)], tests => 23;
+
+says to import the functions this() and that() as well as set the plan
+to be 23 tests.
+
+import() also sets the exported_to() attribute of your builder to be
+the caller of the import() function.
+
+Additional behaviors can be added to your import() method by overriding
+import_extra().
+
+=cut
+
+sub import {
+    my($class) = shift;
+
+    my $test = $class->builder;
+
+    my $caller = caller;
+
+    $test->exported_to($caller);
+
+    $class->import_extra(\@_);
+    my(@imports) = $class->_strip_imports(\@_);
+
+    $test->plan(@_);
+
+    $class->$_export_to_level(1, $class, @imports);
+}
+
+
+sub _strip_imports {
+    my $class = shift;
+    my $list  = shift;
+
+    my @imports = ();
+    my @other   = ();
+    my $idx = 0;
+    while( $idx <= $#{$list} ) {
+        my $item = $list->[$idx];
+
+        if( defined $item and $item eq 'import' ) {
+            push @imports, @{$list->[$idx+1]};
+            $idx++;
+        }
+        else {
+            push @other, $item;
+        }
+
+        $idx++;
+    }
+
+    @$list = @other;
+
+    return @imports;
+}
+
+
+=head3 import_extra
+
+    Your::Module->import_extra(\@import_args);
+
+import_extra() is called by import().  It provides an opportunity for you
+to add behaviors to your module based on its import list.
+
+Any extra arguments which shouldn't be passed on to plan() should be 
+stripped off by this method.
+
+See Test::More for an example of its use.
+
+B<NOTE> This mechanism is I<VERY ALPHA AND LIKELY TO CHANGE> as it
+feels like a bit of an ugly hack in its current form.
+
+=cut
+
+sub import_extra {}
+
+
+=head2 Builder
+
+Test::Builder::Module provides some methods of getting at the underlying
+Test::Builder object.
+
+=head3 builder
+
+  my $builder = Your::Class->builder;
+
+This method returns the Test::Builder object associated with Your::Class.
+It is not a constructor so you can call it as often as you like.
+
+This is the preferred way to get the Test::Builder object.  You should
+I<not> get it via C<< Test::Builder->new >> as was previously
+recommended.
+
+The object returned by builder() may change at runtime so you should
+call builder() inside each function rather than store it in a global.
+
+  sub ok {
+      my $builder = Your::Class->builder;
+
+      return $builder->ok(@_);
+  }
+
+
+=cut
+
+sub builder {
+    return Test::Builder->new;
+}
+
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester/Color.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester/Color.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester/Color.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,50 @@
+package Test::Builder::Tester::Color;
+
+use strict;
+
+require Test::Builder::Tester;
+
+=head1 NAME
+
+Test::Builder::Tester::Color - turn on colour in Test::Builder::Tester
+
+=head1 SYNOPSIS
+
+   When running a test script
+
+     perl -MTest::Builder::Tester::Color test.t
+
+=head1 DESCRIPTION
+
+Importing this module causes the subroutine color in Test::Builder::Tester
+to be called with a true value causing colour highlighting to be turned
+on in debug output.
+
+The sole purpose of this module is to enable colour highlighting
+from the command line.
+
+=cut
+
+sub import
+{
+    Test::Builder::Tester::color(1);
+}
+
+=head1 AUTHOR
+
+Copyright Mark Fowler E<lt>mark at twoshortplanks.comE<gt> 2002.
+
+This program is free software; you can redistribute it
+and/or modify it under the same terms as Perl itself.
+
+=head1 BUGS
+
+This module will have no effect unless Term::ANSIColor is installed.
+
+=head1 SEE ALSO
+
+L<Test::Builder::Tester>, L<Term::ANSIColor>
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder/Tester.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,639 @@
+package Test::Builder::Tester;
+
+use strict;
+use vars qw(@EXPORT $VERSION);
+$VERSION = "1.04";
+
+use Test::Builder;
+use Symbol;
+use Carp;
+
+=head1 NAME
+
+Test::Builder::Tester - test testsuites that have been built with
+Test::Builder
+
+=head1 SYNOPSIS
+
+    use Test::Builder::Tester tests => 1;
+    use Test::More;
+
+    test_out("not ok 1 - foo");
+    test_fail(+1);
+    fail("foo");
+    test_test("fail works");
+
+=head1 DESCRIPTION
+
+A module that helps you test testing modules that are built with
+B<Test::Builder>.
+
+The testing system is designed to be used by performing a three step
+process for each test you wish to test.  This process starts with using
+C<test_out> and C<test_err> in advance to declare what the testsuite you
+are testing will output with B<Test::Builder> to stdout and stderr.
+
+You then can run the test(s) from your test suite that call
+B<Test::Builder>.  At this point the output of B<Test::Builder> is
+safely captured by B<Test::Builder::Tester> rather than being
+interpreted as real test output.
+
+The final stage is to call C<test_test> that will simply compare what you
+predeclared to what B<Test::Builder> actually outputted, and report the
+results back with a "ok" or "not ok" (with debugging) to the normal
+output.
+
+=cut
+
+####
+# set up testing
+####
+
+my $t = Test::Builder->new;
+
+###
+# make us an exporter
+###
+
+use base qw(Exporter);
+
+ at EXPORT = qw(test_out test_err test_fail test_diag test_test line_num);
+
+# _export_to_level and import stolen directly from Test::More.  I am
+# the king of cargo cult programming ;-)
+
+# 5.004's Exporter doesn't have export_to_level.
+sub _export_to_level
+{
+      my $pkg = shift;
+      my $level = shift;
+      (undef) = shift;                  # XXX redundant arg
+      my $callpkg = caller($level);
+      $pkg->export($callpkg, @_);
+}
+
+sub import {
+    my $class = shift;
+    my(@plan) = @_;
+
+    my $caller = caller;
+
+    $t->exported_to($caller);
+    $t->plan(@plan);
+
+    my @imports = ();
+    foreach my $idx (0..$#plan) {
+        if( $plan[$idx] eq 'import' ) {
+            @imports = @{$plan[$idx+1]};
+            last;
+        }
+    }
+
+    __PACKAGE__->_export_to_level(1, __PACKAGE__, @imports);
+}
+
+###
+# set up file handles
+###
+
+# create some private file handles
+my $output_handle = gensym;
+my $error_handle  = gensym;
+
+# and tie them to this package
+my $out = tie *$output_handle, "Test::Builder::Tester::Tie", "STDOUT";
+my $err = tie *$error_handle,  "Test::Builder::Tester::Tie", "STDERR";
+
+####
+# exported functions
+####
+
+# for remembering that we're testing and where we're testing at
+my $testing = 0;
+my $testing_num;
+
+# remembering where the file handles were originally connected
+my $original_output_handle;
+my $original_failure_handle;
+my $original_todo_handle;
+
+my $original_test_number;
+my $original_harness_state;
+
+my $original_harness_env;
+
+# function that starts testing and redirects the filehandles for now
+sub _start_testing
+{
+    # even if we're running under Test::Harness pretend we're not
+    # for now.  This needed so Test::Builder doesn't add extra spaces
+    $original_harness_env = $ENV{HARNESS_ACTIVE} || 0;
+    $ENV{HARNESS_ACTIVE} = 0;
+
+    # remember what the handles were set to
+    $original_output_handle  = $t->output();
+    $original_failure_handle = $t->failure_output();
+    $original_todo_handle    = $t->todo_output();
+
+    # switch out to our own handles
+    $t->output($output_handle);
+    $t->failure_output($error_handle);
+    $t->todo_output($error_handle);
+
+    # clear the expected list
+    $out->reset();
+    $err->reset();
+
+    # remeber that we're testing
+    $testing = 1;
+    $testing_num = $t->current_test;
+    $t->current_test(0);
+
+    # look, we shouldn't do the ending stuff
+    $t->no_ending(1);
+}
+
+=head2 Functions
+
+These are the six methods that are exported as default.
+
+=over 4
+
+=item test_out
+
+=item test_err
+
+Procedures for predeclaring the output that your test suite is
+expected to produce until C<test_test> is called.  These procedures
+automatically assume that each line terminates with "\n".  So
+
+   test_out("ok 1","ok 2");
+
+is the same as
+
+   test_out("ok 1\nok 2");
+
+which is even the same as
+
+   test_out("ok 1");
+   test_out("ok 2");
+
+Once C<test_out> or C<test_err> (or C<test_fail> or C<test_diag>) have
+been called once all further output from B<Test::Builder> will be
+captured by B<Test::Builder::Tester>.  This means that your will not
+be able perform further tests to the normal output in the normal way
+until you call C<test_test> (well, unless you manually meddle with the
+output filehandles)
+
+=cut
+
+sub test_out(@)
+{
+    # do we need to do any setup?
+    _start_testing() unless $testing;
+
+    $out->expect(@_)
+}
+
+sub test_err(@)
+{
+    # do we need to do any setup?
+    _start_testing() unless $testing;
+
+    $err->expect(@_)
+}
+
+=item test_fail
+
+Because the standard failure message that B<Test::Builder> produces
+whenever a test fails will be a common occurrence in your test error
+output, and because has changed between Test::Builder versions, rather
+than forcing you to call C<test_err> with the string all the time like
+so
+
+    test_err("# Failed test ($0 at line ".line_num(+1).")");
+
+C<test_fail> exists as a convenience function that can be called
+instead.  It takes one argument, the offset from the current line that
+the line that causes the fail is on.
+
+    test_fail(+1);
+
+This means that the example in the synopsis could be rewritten
+more simply as:
+
+   test_out("not ok 1 - foo");
+   test_fail(+1);
+   fail("foo");
+   test_test("fail works");
+
+=cut
+
+sub test_fail
+{
+    # do we need to do any setup?
+    _start_testing() unless $testing;
+
+    # work out what line we should be on
+    my ($package, $filename, $line) = caller;
+    $line = $line + (shift() || 0); # prevent warnings
+
+    # expect that on stderr
+    $err->expect("#     Failed test ($0 at line $line)");
+}
+
+=item test_diag
+
+As most of the remaining expected output to the error stream will be
+created by Test::Builder's C<diag> function, B<Test::Builder::Tester>
+provides a convience function C<test_diag> that you can use instead of
+C<test_err>.
+
+The C<test_diag> function prepends comment hashes and spacing to the
+start and newlines to the end of the expected output passed to it and
+adds it to the list of expected error output.  So, instead of writing
+
+   test_err("# Couldn't open file");
+
+you can write
+
+   test_diag("Couldn't open file");
+
+Remember that B<Test::Builder>'s diag function will not add newlines to
+the end of output and test_diag will. So to check
+
+   Test::Builder->new->diag("foo\n","bar\n");
+
+You would do
+
+  test_diag("foo","bar")
+
+without the newlines.
+
+=cut
+
+sub test_diag
+{
+    # do we need to do any setup?
+    _start_testing() unless $testing;
+
+    # expect the same thing, but prepended with "#     "
+    local $_;
+    $err->expect(map {"# $_"} @_)
+}
+
+=item test_test
+
+Actually performs the output check testing the tests, comparing the
+data (with C<eq>) that we have captured from B<Test::Builder> against
+that that was declared with C<test_out> and C<test_err>.
+
+This takes name/value pairs that effect how the test is run.
+
+=over
+
+=item title (synonym 'name', 'label')
+
+The name of the test that will be displayed after the C<ok> or C<not
+ok>.
+
+=item skip_out
+
+Setting this to a true value will cause the test to ignore if the
+output sent by the test to the output stream does not match that
+declared with C<test_out>.
+
+=item skip_err
+
+Setting this to a true value will cause the test to ignore if the
+output sent by the test to the error stream does not match that
+declared with C<test_err>.
+
+=back
+
+As a convience, if only one argument is passed then this argument
+is assumed to be the name of the test (as in the above examples.)
+
+Once C<test_test> has been run test output will be redirected back to
+the original filehandles that B<Test::Builder> was connected to
+(probably STDOUT and STDERR,) meaning any further tests you run
+will function normally and cause success/errors for B<Test::Harness>.
+
+=cut
+
+sub test_test
+{
+   # decode the arguements as described in the pod
+   my $mess;
+   my %args;
+   if (@_ == 1)
+     { $mess = shift }
+   else
+   {
+     %args = @_;
+     $mess = $args{name} if exists($args{name});
+     $mess = $args{title} if exists($args{title});
+     $mess = $args{label} if exists($args{label});
+   }
+
+    # er, are we testing?
+    croak "Not testing.  You must declare output with a test function first."
+	unless $testing;
+
+    # okay, reconnect the test suite back to the saved handles
+    $t->output($original_output_handle);
+    $t->failure_output($original_failure_handle);
+    $t->todo_output($original_todo_handle);
+
+    # restore the test no, etc, back to the original point
+    $t->current_test($testing_num);
+    $testing = 0;
+
+    # re-enable the original setting of the harness
+    $ENV{HARNESS_ACTIVE} = $original_harness_env;
+
+    # check the output we've stashed
+    unless ($t->ok(    ($args{skip_out} || $out->check)
+                    && ($args{skip_err} || $err->check),
+                   $mess))
+    {
+      # print out the diagnostic information about why this
+      # test failed
+
+      local $_;
+
+      $t->diag(map {"$_\n"} $out->complaint)
+	unless $args{skip_out} || $out->check;
+
+      $t->diag(map {"$_\n"} $err->complaint)
+	unless $args{skip_err} || $err->check;
+    }
+}
+
+=item line_num
+
+A utility function that returns the line number that the function was
+called on.  You can pass it an offset which will be added to the
+result.  This is very useful for working out the correct text of
+diagnostic functions that contain line numbers.
+
+Essentially this is the same as the C<__LINE__> macro, but the
+C<line_num(+3)> idiom is arguably nicer.
+
+=cut
+
+sub line_num
+{
+    my ($package, $filename, $line) = caller;
+    return $line + (shift() || 0); # prevent warnings
+}
+
+=back
+
+In addition to the six exported functions there there exists one
+function that can only be accessed with a fully qualified function
+call.
+
+=over 4
+
+=item color
+
+When C<test_test> is called and the output that your tests generate
+does not match that which you declared, C<test_test> will print out
+debug information showing the two conflicting versions.  As this
+output itself is debug information it can be confusing which part of
+the output is from C<test_test> and which was the original output from
+your original tests.  Also, it may be hard to spot things like
+extraneous whitespace at the end of lines that may cause your test to
+fail even though the output looks similar.
+
+To assist you, if you have the B<Term::ANSIColor> module installed
+(which you should do by default from perl 5.005 onwards), C<test_test>
+can colour the background of the debug information to disambiguate the
+different types of output. The debug output will have it's background
+coloured green and red.  The green part represents the text which is
+the same between the executed and actual output, the red shows which
+part differs.
+
+The C<color> function determines if colouring should occur or not.
+Passing it a true or false value will enable or disable colouring
+respectively, and the function called with no argument will return the
+current setting.
+
+To enable colouring from the command line, you can use the
+B<Text::Builder::Tester::Color> module like so:
+
+   perl -Mlib=Text::Builder::Tester::Color test.t
+
+Or by including the B<Test::Builder::Tester::Color> module directly in
+the PERL5LIB.
+
+=cut
+
+my $color;
+sub color
+{
+  $color = shift if @_;
+  $color;
+}
+
+=back
+
+=head1 BUGS
+
+Calls C<<Test::Builder->no_ending>> turning off the ending tests.
+This is needed as otherwise it will trip out because we've run more
+tests than we strictly should have and it'll register any failures we
+had that we were testing for as real failures.
+
+The color function doesn't work unless B<Term::ANSIColor> is installed
+and is compatible with your terminal.
+
+Bugs (and requests for new features) can be reported to the author
+though the CPAN RT system:
+L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Test-Builder-Tester>
+
+=head1 AUTHOR
+
+Copyright Mark Fowler E<lt>mark at twoshortplanks.comE<gt> 2002, 2004.
+
+Some code taken from B<Test::More> and B<Test::Catch>, written by by
+Michael G Schwern E<lt>schwern at pobox.comE<gt>.  Hence, those parts
+Copyright Micheal G Schwern 2001.  Used and distributed with
+permission.
+
+This program is free software; you can redistribute it
+and/or modify it under the same terms as Perl itself.
+
+=head1 NOTES
+
+This code has been tested explicitly on the following versions
+of perl: 5.7.3, 5.6.1, 5.6.0, 5.005_03, 5.004_05 and 5.004.
+
+Thanks to Richard Clamp E<lt>richardc at unixbeard.netE<gt> for letting
+me use his testing system to try this module out on.
+
+=head1 SEE ALSO
+
+L<Test::Builder>, L<Test::Builder::Tester::Color>, L<Test::More>.
+
+=cut
+
+1;
+
+####################################################################
+# Helper class that is used to remember expected and received data
+
+package Test::Builder::Tester::Tie;
+
+##
+# add line(s) to be expected
+
+sub expect
+{
+    my $self = shift;
+
+    my @checks = @_;
+    foreach my $check (@checks) {
+        $check = $self->_translate_Failed_check($check);
+        push @{$self->[2]}, ref $check ? $check : "$check\n";
+    }
+}
+
+
+sub _translate_Failed_check 
+{
+    my($self, $check) = @_;
+
+    if( $check =~ /\A(.*)#     (Failed .*test) \((.*?) at line (\d+)\)\z/ ) {
+        $check = qr/\Q$1\E#\s+\Q$2\E.*?\n?.*?\Q$3\E at line \Q$4\E.*\n?/;
+    }
+
+    return $check;
+}
+
+
+##
+# return true iff the expected data matches the got data
+
+sub check
+{
+    my $self = shift;
+
+    # turn off warnings as these might be undef
+    local $^W = 0;
+
+    my @checks = @{$self->[2]};
+    my $got = $self->[1];
+    foreach my $check (@checks) {
+        $check = qr/^\Q$check\E/ unless ref $check;
+        return 0 unless $got =~ s/^$check//;
+    }
+
+    return length $got == 0;
+}
+
+##
+# a complaint message about the inputs not matching (to be
+# used for debugging messages)
+
+sub complaint
+{
+    my $self = shift;
+    my $type   = $self->type;
+    my $got    = $self->got;
+    my $wanted = join "\n", @{$self->wanted};
+
+    # are we running in colour mode?
+    if (Test::Builder::Tester::color)
+    {
+      # get color
+      eval "require Term::ANSIColor";
+      unless ($@)
+      {
+	# colours
+
+	my $green = Term::ANSIColor::color("black").
+	            Term::ANSIColor::color("on_green");
+        my $red   = Term::ANSIColor::color("black").
+                    Term::ANSIColor::color("on_red");
+	my $reset = Term::ANSIColor::color("reset");
+
+	# work out where the two strings start to differ
+	my $char = 0;
+	$char++ while substr($got, $char, 1) eq substr($wanted, $char, 1);
+
+	# get the start string and the two end strings
+	my $start     = $green . substr($wanted, 0,   $char);
+	my $gotend    = $red   . substr($got   , $char) . $reset;
+	my $wantedend = $red   . substr($wanted, $char) . $reset;
+
+	# make the start turn green on and off
+	$start =~ s/\n/$reset\n$green/g;
+
+	# make the ends turn red on and off
+	$gotend    =~ s/\n/$reset\n$red/g;
+	$wantedend =~ s/\n/$reset\n$red/g;
+
+	# rebuild the strings
+	$got    = $start . $gotend;
+	$wanted = $start . $wantedend;
+      }
+    }
+
+    return "$type is:\n" .
+           "$got\nnot:\n$wanted\nas expected"
+}
+
+##
+# forget all expected and got data
+
+sub reset
+{
+    my $self = shift;
+    @$self = ($self->[0], '', []);
+}
+
+
+sub got
+{
+    my $self = shift;
+    return $self->[1];
+}
+
+sub wanted
+{
+    my $self = shift;
+    return $self->[2];
+}
+
+sub type
+{
+    my $self = shift;
+    return $self->[0];
+}
+
+###
+# tie interface
+###
+
+sub PRINT  {
+    my $self = shift;
+    $self->[1] .= join '', @_;
+}
+
+sub TIEHANDLE {
+    my($class, $type) = @_;
+
+    my $self = bless [$type], $class;
+    $self->reset;
+
+    return $self;
+}
+
+sub READ {}
+sub READLINE {}
+sub GETC {}
+sub FILENO {}
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Builder.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1749 @@
+package Test::Builder;
+
+use 5.004;
+
+# $^C was only introduced in 5.005-ish.  We do this to prevent
+# use of uninitialized value warnings in older perls.
+$^C ||= 0;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = '0.33';
+$VERSION = eval $VERSION;    # make the alpha version come out as a number
+
+# Make Test::Builder thread-safe for ithreads.
+BEGIN {
+    use Config;
+    # Load threads::shared when threads are turned on
+    if( $] >= 5.008 && $Config{useithreads} && $INC{'threads.pm'}) {
+        require threads::shared;
+
+        # Hack around YET ANOTHER threads::shared bug.  It would 
+        # occassionally forget the contents of the variable when sharing it.
+        # So we first copy the data, then share, then put our copy back.
+        *share = sub (\[$@%]) {
+            my $type = ref $_[0];
+            my $data;
+
+            if( $type eq 'HASH' ) {
+                %$data = %{$_[0]};
+            }
+            elsif( $type eq 'ARRAY' ) {
+                @$data = @{$_[0]};
+            }
+            elsif( $type eq 'SCALAR' ) {
+                $$data = ${$_[0]};
+            }
+            else {
+                die "Unknown type: ".$type;
+            }
+
+            $_[0] = &threads::shared::share($_[0]);
+
+            if( $type eq 'HASH' ) {
+                %{$_[0]} = %$data;
+            }
+            elsif( $type eq 'ARRAY' ) {
+                @{$_[0]} = @$data;
+            }
+            elsif( $type eq 'SCALAR' ) {
+                ${$_[0]} = $$data;
+            }
+            else {
+                die "Unknown type: ".$type;
+            }
+
+            return $_[0];
+        };
+    }
+    # 5.8.0's threads::shared is busted when threads are off.
+    # We emulate it here.
+    else {
+        *share = sub { return $_[0] };
+        *lock  = sub { 0 };
+    }
+}
+
+
+=head1 NAME
+
+Test::Builder - Backend for building test libraries
+
+=head1 SYNOPSIS
+
+  package My::Test::Module;
+  use Test::Builder;
+  require Exporter;
+  @ISA = qw(Exporter);
+  @EXPORT = qw(ok);
+
+  my $Test = Test::Builder->new;
+  $Test->output('my_logfile');
+
+  sub import {
+      my($self) = shift;
+      my $pack = caller;
+
+      $Test->exported_to($pack);
+      $Test->plan(@_);
+
+      $self->export_to_level(1, $self, 'ok');
+  }
+
+  sub ok {
+      my($test, $name) = @_;
+
+      $Test->ok($test, $name);
+  }
+
+
+=head1 DESCRIPTION
+
+Test::Simple and Test::More have proven to be popular testing modules,
+but they're not always flexible enough.  Test::Builder provides the a
+building block upon which to write your own test libraries I<which can
+work together>.
+
+=head2 Construction
+
+=over 4
+
+=item B<new>
+
+  my $Test = Test::Builder->new;
+
+Returns a Test::Builder object representing the current state of the
+test.
+
+Since you only run one test per program C<new> always returns the same
+Test::Builder object.  No matter how many times you call new(), you're
+getting the same object.  This is called a singleton.  This is done so that
+multiple modules share such global information as the test counter and
+where test output is going.
+
+If you want a completely new Test::Builder object different from the
+singleton, use C<create>.
+
+=cut
+
+my $Test = Test::Builder->new;
+sub new {
+    my($class) = shift;
+    $Test ||= $class->create;
+    return $Test;
+}
+
+
+=item B<create>
+
+  my $Test = Test::Builder->create;
+
+Ok, so there can be more than one Test::Builder object and this is how
+you get it.  You might use this instead of C<new()> if you're testing
+a Test::Builder based module, but otherwise you probably want C<new>.
+
+B<NOTE>: the implementation is not complete.  C<level>, for example, is
+still shared amongst B<all> Test::Builder objects, even ones created using
+this method.  Also, the method name may change in the future.
+
+=cut
+
+sub create {
+    my $class = shift;
+
+    my $self = bless {}, $class;
+    $self->reset;
+
+    return $self;
+}
+
+=item B<reset>
+
+  $Test->reset;
+
+Reinitializes the Test::Builder singleton to its original state.
+Mostly useful for tests run in persistent environments where the same
+test might be run multiple times in the same process.
+
+=cut
+
+use vars qw($Level);
+
+sub reset {
+    my ($self) = @_;
+
+    # We leave this a global because it has to be localized and localizing
+    # hash keys is just asking for pain.  Also, it was documented.
+    $Level = 1;
+
+    $self->{Test_Died}    = 0;
+    $self->{Have_Plan}    = 0;
+    $self->{No_Plan}      = 0;
+    $self->{Original_Pid} = $$;
+
+    share($self->{Curr_Test});
+    $self->{Curr_Test}    = 0;
+    $self->{Test_Results} = &share([]);
+
+    $self->{Exported_To}    = undef;
+    $self->{Expected_Tests} = 0;
+
+    $self->{Skip_All}   = 0;
+
+    $self->{Use_Nums}   = 1;
+
+    $self->{No_Header}  = 0;
+    $self->{No_Ending}  = 0;
+
+    $self->_dup_stdhandles unless $^C;
+
+    return undef;
+}
+
+=back
+
+=head2 Setting up tests
+
+These methods are for setting up tests and declaring how many there
+are.  You usually only want to call one of these methods.
+
+=over 4
+
+=item B<exported_to>
+
+  my $pack = $Test->exported_to;
+  $Test->exported_to($pack);
+
+Tells Test::Builder what package you exported your functions to.
+This is important for getting TODO tests right.
+
+=cut
+
+sub exported_to {
+    my($self, $pack) = @_;
+
+    if( defined $pack ) {
+        $self->{Exported_To} = $pack;
+    }
+    return $self->{Exported_To};
+}
+
+=item B<plan>
+
+  $Test->plan('no_plan');
+  $Test->plan( skip_all => $reason );
+  $Test->plan( tests => $num_tests );
+
+A convenient way to set up your tests.  Call this and Test::Builder
+will print the appropriate headers and take the appropriate actions.
+
+If you call plan(), don't call any of the other methods below.
+
+=cut
+
+sub plan {
+    my($self, $cmd, $arg) = @_;
+
+    return unless $cmd;
+
+    if( $self->{Have_Plan} ) {
+        die sprintf "You tried to plan twice!  Second plan at %s line %d\n",
+          ($self->caller)[1,2];
+    }
+
+    if( $cmd eq 'no_plan' ) {
+        $self->no_plan;
+    }
+    elsif( $cmd eq 'skip_all' ) {
+        return $self->skip_all($arg);
+    }
+    elsif( $cmd eq 'tests' ) {
+        if( $arg ) {
+            return $self->expected_tests($arg);
+        }
+        elsif( !defined $arg ) {
+            die "Got an undefined number of tests.  Looks like you tried to ".
+                "say how many tests you plan to run but made a mistake.\n";
+        }
+        elsif( !$arg ) {
+            die "You said to run 0 tests!  You've got to run something.\n";
+        }
+    }
+    else {
+        require Carp;
+        my @args = grep { defined } ($cmd, $arg);
+        Carp::croak("plan() doesn't understand @args");
+    }
+
+    return 1;
+}
+
+=item B<expected_tests>
+
+    my $max = $Test->expected_tests;
+    $Test->expected_tests($max);
+
+Gets/sets the # of tests we expect this test to run and prints out
+the appropriate headers.
+
+=cut
+
+sub expected_tests {
+    my $self = shift;
+    my($max) = @_;
+
+    if( @_ ) {
+        die "Number of tests must be a postive integer.  You gave it '$max'.\n"
+          unless $max =~ /^\+?\d+$/ and $max > 0;
+
+        $self->{Expected_Tests} = $max;
+        $self->{Have_Plan}      = 1;
+
+        $self->_print("1..$max\n") unless $self->no_header;
+    }
+    return $self->{Expected_Tests};
+}
+
+
+=item B<no_plan>
+
+  $Test->no_plan;
+
+Declares that this test will run an indeterminate # of tests.
+
+=cut
+
+sub no_plan {
+    my $self = shift;
+
+    $self->{No_Plan}   = 1;
+    $self->{Have_Plan} = 1;
+}
+
+=item B<has_plan>
+
+  $plan = $Test->has_plan
+
+Find out whether a plan has been defined. $plan is either C<undef> (no plan has been set), C<no_plan> (indeterminate # of tests) or an integer (the number of expected tests).
+
+=cut
+
+sub has_plan {
+    my $self = shift;
+
+    return($self->{Expected_Tests}) if $self->{Expected_Tests};
+    return('no_plan') if $self->{No_Plan};
+    return(undef);
+};
+
+
+=item B<skip_all>
+
+  $Test->skip_all;
+  $Test->skip_all($reason);
+
+Skips all the tests, using the given $reason.  Exits immediately with 0.
+
+=cut
+
+sub skip_all {
+    my($self, $reason) = @_;
+
+    my $out = "1..0";
+    $out .= " # Skip $reason" if $reason;
+    $out .= "\n";
+
+    $self->{Skip_All} = 1;
+
+    $self->_print($out) unless $self->no_header;
+    exit(0);
+}
+
+=back
+
+=head2 Running tests
+
+These actually run the tests, analogous to the functions in
+Test::More.
+
+$name is always optional.
+
+=over 4
+
+=item B<ok>
+
+  $Test->ok($test, $name);
+
+Your basic test.  Pass if $test is true, fail if $test is false.  Just
+like Test::Simple's ok().
+
+=cut
+
+sub ok {
+    my($self, $test, $name) = @_;
+
+    # $test might contain an object which we don't want to accidentally
+    # store, so we turn it into a boolean.
+    $test = $test ? 1 : 0;
+
+    unless( $self->{Have_Plan} ) {
+        require Carp;
+        Carp::croak("You tried to run a test without a plan!  Gotta have a plan.");
+    }
+
+    lock $self->{Curr_Test};
+    $self->{Curr_Test}++;
+
+    # In case $name is a string overloaded object, force it to stringify.
+    $self->_unoverload_str(\$name);
+
+    $self->diag(<<ERR) if defined $name and $name =~ /^[\d\s]+$/;
+    You named your test '$name'.  You shouldn't use numbers for your test names.
+    Very confusing.
+ERR
+
+    my($pack, $file, $line) = $self->caller;
+
+    my $todo = $self->todo($pack);
+    $self->_unoverload_str(\$todo);
+
+    my $out;
+    my $result = &share({});
+
+    unless( $test ) {
+        $out .= "not ";
+        @$result{ 'ok', 'actual_ok' } = ( ( $todo ? 1 : 0 ), 0 );
+    }
+    else {
+        @$result{ 'ok', 'actual_ok' } = ( 1, $test );
+    }
+
+    $out .= "ok";
+    $out .= " $self->{Curr_Test}" if $self->use_numbers;
+
+    if( defined $name ) {
+        $name =~ s|#|\\#|g;     # # in a name can confuse Test::Harness.
+        $out   .= " - $name";
+        $result->{name} = $name;
+    }
+    else {
+        $result->{name} = '';
+    }
+
+    if( $todo ) {
+        $out   .= " # TODO $todo";
+        $result->{reason} = $todo;
+        $result->{type}   = 'todo';
+    }
+    else {
+        $result->{reason} = '';
+        $result->{type}   = '';
+    }
+
+    $self->{Test_Results}[$self->{Curr_Test}-1] = $result;
+    $out .= "\n";
+
+    $self->_print($out);
+
+    unless( $test ) {
+        my $msg = $todo ? "Failed (TODO)" : "Failed";
+        $self->_print_diag("\n") if $ENV{HARNESS_ACTIVE};
+
+	if( defined $name ) {
+	    $self->diag(qq[  $msg test '$name'\n]);
+	    $self->diag(qq[  in $file at line $line.\n]);
+	}
+	else {
+	    $self->diag(qq[  $msg test in $file at line $line.\n]);
+	}
+    } 
+
+    return $test ? 1 : 0;
+}
+
+
+sub _unoverload {
+    my $self  = shift;
+    my $type  = shift;
+
+    local($@,$!);
+
+    eval { require overload } || return;
+
+    foreach my $thing (@_) {
+        eval { 
+            if( _is_object($$thing) ) {
+                if( my $string_meth = overload::Method($$thing, $type) ) {
+                    $$thing = $$thing->$string_meth();
+                }
+            }
+        };
+    }
+}
+
+
+sub _is_object {
+    my $thing = shift;
+
+    return eval { ref $thing && $thing->isa('UNIVERSAL') } ? 1 : 0;
+}
+
+
+sub _unoverload_str {
+    my $self = shift;
+
+    $self->_unoverload(q[""], @_);
+}    
+
+sub _unoverload_num {
+    my $self = shift;
+
+    $self->_unoverload('0+', @_);
+
+    for my $val (@_) {
+        next unless $self->_is_dualvar($$val);
+        $$val = $$val+0;
+    }
+}
+
+
+# This is a hack to detect a dualvar such as $!
+sub _is_dualvar {
+    my($self, $val) = @_;
+
+    local $^W = 0;
+    my $numval = $val+0;
+    return 1 if $numval != 0 and $numval ne $val;
+}
+
+
+
+=item B<is_eq>
+
+  $Test->is_eq($got, $expected, $name);
+
+Like Test::More's is().  Checks if $got eq $expected.  This is the
+string version.
+
+=item B<is_num>
+
+  $Test->is_num($got, $expected, $name);
+
+Like Test::More's is().  Checks if $got == $expected.  This is the
+numeric version.
+
+=cut
+
+sub is_eq {
+    my($self, $got, $expect, $name) = @_;
+    local $Level = $Level + 1;
+
+    $self->_unoverload_str(\$got, \$expect);
+
+    if( !defined $got || !defined $expect ) {
+        # undef only matches undef and nothing else
+        my $test = !defined $got && !defined $expect;
+
+        $self->ok($test, $name);
+        $self->_is_diag($got, 'eq', $expect) unless $test;
+        return $test;
+    }
+
+    return $self->cmp_ok($got, 'eq', $expect, $name);
+}
+
+sub is_num {
+    my($self, $got, $expect, $name) = @_;
+    local $Level = $Level + 1;
+
+    $self->_unoverload_num(\$got, \$expect);
+
+    if( !defined $got || !defined $expect ) {
+        # undef only matches undef and nothing else
+        my $test = !defined $got && !defined $expect;
+
+        $self->ok($test, $name);
+        $self->_is_diag($got, '==', $expect) unless $test;
+        return $test;
+    }
+
+    return $self->cmp_ok($got, '==', $expect, $name);
+}
+
+sub _is_diag {
+    my($self, $got, $type, $expect) = @_;
+
+    foreach my $val (\$got, \$expect) {
+        if( defined $$val ) {
+            if( $type eq 'eq' ) {
+                # quote and force string context
+                $$val = "'$$val'"
+            }
+            else {
+                # force numeric context
+                $self->_unoverload_num($val);
+            }
+        }
+        else {
+            $$val = 'undef';
+        }
+    }
+
+    return $self->diag(sprintf <<DIAGNOSTIC, $got, $expect);
+         got: %s
+    expected: %s
+DIAGNOSTIC
+
+}    
+
+=item B<isnt_eq>
+
+  $Test->isnt_eq($got, $dont_expect, $name);
+
+Like Test::More's isnt().  Checks if $got ne $dont_expect.  This is
+the string version.
+
+=item B<isnt_num>
+
+  $Test->isnt_num($got, $dont_expect, $name);
+
+Like Test::More's isnt().  Checks if $got ne $dont_expect.  This is
+the numeric version.
+
+=cut
+
+sub isnt_eq {
+    my($self, $got, $dont_expect, $name) = @_;
+    local $Level = $Level + 1;
+
+    if( !defined $got || !defined $dont_expect ) {
+        # undef only matches undef and nothing else
+        my $test = defined $got || defined $dont_expect;
+
+        $self->ok($test, $name);
+        $self->_cmp_diag($got, 'ne', $dont_expect) unless $test;
+        return $test;
+    }
+
+    return $self->cmp_ok($got, 'ne', $dont_expect, $name);
+}
+
+sub isnt_num {
+    my($self, $got, $dont_expect, $name) = @_;
+    local $Level = $Level + 1;
+
+    if( !defined $got || !defined $dont_expect ) {
+        # undef only matches undef and nothing else
+        my $test = defined $got || defined $dont_expect;
+
+        $self->ok($test, $name);
+        $self->_cmp_diag($got, '!=', $dont_expect) unless $test;
+        return $test;
+    }
+
+    return $self->cmp_ok($got, '!=', $dont_expect, $name);
+}
+
+
+=item B<like>
+
+  $Test->like($this, qr/$regex/, $name);
+  $Test->like($this, '/$regex/', $name);
+
+Like Test::More's like().  Checks if $this matches the given $regex.
+
+You'll want to avoid qr// if you want your tests to work before 5.005.
+
+=item B<unlike>
+
+  $Test->unlike($this, qr/$regex/, $name);
+  $Test->unlike($this, '/$regex/', $name);
+
+Like Test::More's unlike().  Checks if $this B<does not match> the
+given $regex.
+
+=cut
+
+sub like {
+    my($self, $this, $regex, $name) = @_;
+
+    local $Level = $Level + 1;
+    $self->_regex_ok($this, $regex, '=~', $name);
+}
+
+sub unlike {
+    my($self, $this, $regex, $name) = @_;
+
+    local $Level = $Level + 1;
+    $self->_regex_ok($this, $regex, '!~', $name);
+}
+
+=item B<maybe_regex>
+
+  $Test->maybe_regex(qr/$regex/);
+  $Test->maybe_regex('/$regex/');
+
+Convenience method for building testing functions that take regular
+expressions as arguments, but need to work before perl 5.005.
+
+Takes a quoted regular expression produced by qr//, or a string
+representing a regular expression.
+
+Returns a Perl value which may be used instead of the corresponding
+regular expression, or undef if it's argument is not recognised.
+
+For example, a version of like(), sans the useful diagnostic messages,
+could be written as:
+
+  sub laconic_like {
+      my ($self, $this, $regex, $name) = @_;
+      my $usable_regex = $self->maybe_regex($regex);
+      die "expecting regex, found '$regex'\n"
+          unless $usable_regex;
+      $self->ok($this =~ m/$usable_regex/, $name);
+  }
+
+=cut
+
+
+sub maybe_regex {
+    my ($self, $regex) = @_;
+    my $usable_regex = undef;
+
+    return $usable_regex unless defined $regex;
+
+    my($re, $opts);
+
+    # Check for qr/foo/
+    if( ref $regex eq 'Regexp' ) {
+        $usable_regex = $regex;
+    }
+    # Check for '/foo/' or 'm,foo,'
+    elsif( ($re, $opts)        = $regex =~ m{^ /(.*)/ (\w*) $ }sx           or
+           (undef, $re, $opts) = $regex =~ m,^ m([^\w\s]) (.+) \1 (\w*) $,sx
+         )
+    {
+        $usable_regex = length $opts ? "(?$opts)$re" : $re;
+    }
+
+    return $usable_regex;
+};
+
+sub _regex_ok {
+    my($self, $this, $regex, $cmp, $name) = @_;
+
+    my $ok = 0;
+    my $usable_regex = $self->maybe_regex($regex);
+    unless (defined $usable_regex) {
+        $ok = $self->ok( 0, $name );
+        $self->diag("    '$regex' doesn't look much like a regex to me.");
+        return $ok;
+    }
+
+    {
+        my $test;
+        my $code = $self->_caller_context;
+
+        local($@, $!);
+
+        # Yes, it has to look like this or 5.4.5 won't see the #line directive.
+        # Don't ask me, man, I just work here.
+        $test = eval "
+$code" . q{$test = $this =~ /$usable_regex/ ? 1 : 0};
+
+        $test = !$test if $cmp eq '!~';
+
+        local $Level = $Level + 1;
+        $ok = $self->ok( $test, $name );
+    }
+
+    unless( $ok ) {
+        $this = defined $this ? "'$this'" : 'undef';
+        my $match = $cmp eq '=~' ? "doesn't match" : "matches";
+        $self->diag(sprintf <<DIAGNOSTIC, $this, $match, $regex);
+                  %s
+    %13s '%s'
+DIAGNOSTIC
+
+    }
+
+    return $ok;
+}
+
+=item B<cmp_ok>
+
+  $Test->cmp_ok($this, $type, $that, $name);
+
+Works just like Test::More's cmp_ok().
+
+    $Test->cmp_ok($big_num, '!=', $other_big_num);
+
+=cut
+
+
+my %numeric_cmps = map { ($_, 1) } 
+                       ("<",  "<=", ">",  ">=", "==", "!=", "<=>");
+
+sub cmp_ok {
+    my($self, $got, $type, $expect, $name) = @_;
+
+    # Treat overloaded objects as numbers if we're asked to do a
+    # numeric comparison.
+    my $unoverload = $numeric_cmps{$type} ? '_unoverload_num'
+                                          : '_unoverload_str';
+
+    $self->$unoverload(\$got, \$expect);
+
+
+    my $test;
+    {
+        local($@,$!);   # don't interfere with $@
+                        # eval() sometimes resets $!
+
+        my $code = $self->_caller_context;
+
+        # Yes, it has to look like this or 5.4.5 won't see the #line directive.
+        # Don't ask me, man, I just work here.
+        $test = eval "
+$code" . "\$got $type \$expect;";
+
+    }
+    local $Level = $Level + 1;
+    my $ok = $self->ok($test, $name);
+
+    unless( $ok ) {
+        if( $type =~ /^(eq|==)$/ ) {
+            $self->_is_diag($got, $type, $expect);
+        }
+        else {
+            $self->_cmp_diag($got, $type, $expect);
+        }
+    }
+    return $ok;
+}
+
+sub _cmp_diag {
+    my($self, $got, $type, $expect) = @_;
+    
+    $got    = defined $got    ? "'$got'"    : 'undef';
+    $expect = defined $expect ? "'$expect'" : 'undef';
+    return $self->diag(sprintf <<DIAGNOSTIC, $got, $type, $expect);
+    %s
+        %s
+    %s
+DIAGNOSTIC
+}
+
+
+sub _caller_context {
+    my $self = shift;
+
+    my($pack, $file, $line) = $self->caller(1);
+
+    my $code = '';
+    $code .= "#line $line $file\n" if defined $file and defined $line;
+
+    return $code;
+}
+
+
+=item B<BAIL_OUT>
+
+    $Test->BAIL_OUT($reason);
+
+Indicates to the Test::Harness that things are going so badly all
+testing should terminate.  This includes running any additional test
+scripts.
+
+It will exit with 255.
+
+=cut
+
+sub BAIL_OUT {
+    my($self, $reason) = @_;
+
+    $self->{Bailed_Out} = 1;
+    $self->_print("Bail out!  $reason");
+    exit 255;
+}
+
+=for deprecated
+BAIL_OUT() used to be BAILOUT()
+
+=cut
+
+*BAILOUT = \&BAIL_OUT;
+
+
+=item B<skip>
+
+    $Test->skip;
+    $Test->skip($why);
+
+Skips the current test, reporting $why.
+
+=cut
+
+sub skip {
+    my($self, $why) = @_;
+    $why ||= '';
+    $self->_unoverload_str(\$why);
+
+    unless( $self->{Have_Plan} ) {
+        require Carp;
+        Carp::croak("You tried to run tests without a plan!  Gotta have a plan.");
+    }
+
+    lock($self->{Curr_Test});
+    $self->{Curr_Test}++;
+
+    $self->{Test_Results}[$self->{Curr_Test}-1] = &share({
+        'ok'      => 1,
+        actual_ok => 1,
+        name      => '',
+        type      => 'skip',
+        reason    => $why,
+    });
+
+    my $out = "ok";
+    $out   .= " $self->{Curr_Test}" if $self->use_numbers;
+    $out   .= " # skip";
+    $out   .= " $why"       if length $why;
+    $out   .= "\n";
+
+    $self->_print($out);
+
+    return 1;
+}
+
+
+=item B<todo_skip>
+
+  $Test->todo_skip;
+  $Test->todo_skip($why);
+
+Like skip(), only it will declare the test as failing and TODO.  Similar
+to
+
+    print "not ok $tnum # TODO $why\n";
+
+=cut
+
+sub todo_skip {
+    my($self, $why) = @_;
+    $why ||= '';
+
+    unless( $self->{Have_Plan} ) {
+        require Carp;
+        Carp::croak("You tried to run tests without a plan!  Gotta have a plan.");
+    }
+
+    lock($self->{Curr_Test});
+    $self->{Curr_Test}++;
+
+    $self->{Test_Results}[$self->{Curr_Test}-1] = &share({
+        'ok'      => 1,
+        actual_ok => 0,
+        name      => '',
+        type      => 'todo_skip',
+        reason    => $why,
+    });
+
+    my $out = "not ok";
+    $out   .= " $self->{Curr_Test}" if $self->use_numbers;
+    $out   .= " # TODO & SKIP $why\n";
+
+    $self->_print($out);
+
+    return 1;
+}
+
+
+=begin _unimplemented
+
+=item B<skip_rest>
+
+  $Test->skip_rest;
+  $Test->skip_rest($reason);
+
+Like skip(), only it skips all the rest of the tests you plan to run
+and terminates the test.
+
+If you're running under no_plan, it skips once and terminates the
+test.
+
+=end _unimplemented
+
+=back
+
+
+=head2 Test style
+
+=over 4
+
+=item B<level>
+
+    $Test->level($how_high);
+
+How far up the call stack should $Test look when reporting where the
+test failed.
+
+Defaults to 1.
+
+Setting $Test::Builder::Level overrides.  This is typically useful
+localized:
+
+    {
+        local $Test::Builder::Level = 2;
+        $Test->ok($test);
+    }
+
+=cut
+
+sub level {
+    my($self, $level) = @_;
+
+    if( defined $level ) {
+        $Level = $level;
+    }
+    return $Level;
+}
+
+
+=item B<use_numbers>
+
+    $Test->use_numbers($on_or_off);
+
+Whether or not the test should output numbers.  That is, this if true:
+
+  ok 1
+  ok 2
+  ok 3
+
+or this if false
+
+  ok
+  ok
+  ok
+
+Most useful when you can't depend on the test output order, such as
+when threads or forking is involved.
+
+Test::Harness will accept either, but avoid mixing the two styles.
+
+Defaults to on.
+
+=cut
+
+sub use_numbers {
+    my($self, $use_nums) = @_;
+
+    if( defined $use_nums ) {
+        $self->{Use_Nums} = $use_nums;
+    }
+    return $self->{Use_Nums};
+}
+
+
+=item B<no_diag>
+
+    $Test->no_diag($no_diag);
+
+If set true no diagnostics will be printed.  This includes calls to
+diag().
+
+=item B<no_ending>
+
+    $Test->no_ending($no_ending);
+
+Normally, Test::Builder does some extra diagnostics when the test
+ends.  It also changes the exit code as described below.
+
+If this is true, none of that will be done.
+
+=item B<no_header>
+
+    $Test->no_header($no_header);
+
+If set to true, no "1..N" header will be printed.
+
+=cut
+
+foreach my $attribute (qw(No_Header No_Ending No_Diag)) {
+    my $method = lc $attribute;
+
+    my $code = sub {
+        my($self, $no) = @_;
+
+        if( defined $no ) {
+            $self->{$attribute} = $no;
+        }
+        return $self->{$attribute};
+    };
+
+    no strict 'refs';
+    *{__PACKAGE__.'::'.$method} = $code;
+}
+
+
+=back
+
+=head2 Output
+
+Controlling where the test output goes.
+
+It's ok for your test to change where STDOUT and STDERR point to,
+Test::Builder's default output settings will not be affected.
+
+=over 4
+
+=item B<diag>
+
+    $Test->diag(@msgs);
+
+Prints out the given @msgs.  Like C<print>, arguments are simply
+appended together.
+
+Normally, it uses the failure_output() handle, but if this is for a
+TODO test, the todo_output() handle is used.
+
+Output will be indented and marked with a # so as not to interfere
+with test output.  A newline will be put on the end if there isn't one
+already.
+
+We encourage using this rather than calling print directly.
+
+Returns false.  Why?  Because diag() is often used in conjunction with
+a failing test (C<ok() || diag()>) it "passes through" the failure.
+
+    return ok(...) || diag(...);
+
+=for blame transfer
+Mark Fowler <mark at twoshortplanks.com>
+
+=cut
+
+sub diag {
+    my($self, @msgs) = @_;
+
+    return if $self->no_diag;
+    return unless @msgs;
+
+    # Prevent printing headers when compiling (i.e. -c)
+    return if $^C;
+
+    # Smash args together like print does.
+    # Convert undef to 'undef' so its readable.
+    my $msg = join '', map { defined($_) ? $_ : 'undef' } @msgs;
+
+    # Escape each line with a #.
+    $msg =~ s/^/# /gm;
+
+    # Stick a newline on the end if it needs it.
+    $msg .= "\n" unless $msg =~ /\n\Z/;
+
+    local $Level = $Level + 1;
+    $self->_print_diag($msg);
+
+    return 0;
+}
+
+=begin _private
+
+=item B<_print>
+
+    $Test->_print(@msgs);
+
+Prints to the output() filehandle.
+
+=end _private
+
+=cut
+
+sub _print {
+    my($self, @msgs) = @_;
+
+    # Prevent printing headers when only compiling.  Mostly for when
+    # tests are deparsed with B::Deparse
+    return if $^C;
+
+    my $msg = join '', @msgs;
+
+    local($\, $", $,) = (undef, ' ', '');
+    my $fh = $self->output;
+
+    # Escape each line after the first with a # so we don't
+    # confuse Test::Harness.
+    $msg =~ s/\n(.)/\n# $1/sg;
+
+    # Stick a newline on the end if it needs it.
+    $msg .= "\n" unless $msg =~ /\n\Z/;
+
+    print $fh $msg;
+}
+
+
+=item B<_print_diag>
+
+    $Test->_print_diag(@msg);
+
+Like _print, but prints to the current diagnostic filehandle.
+
+=cut
+
+sub _print_diag {
+    my $self = shift;
+
+    local($\, $", $,) = (undef, ' ', '');
+    my $fh = $self->todo ? $self->todo_output : $self->failure_output;
+    print $fh @_;
+}    
+
+=item B<output>
+
+    $Test->output($fh);
+    $Test->output($file);
+
+Where normal "ok/not ok" test output should go.
+
+Defaults to STDOUT.
+
+=item B<failure_output>
+
+    $Test->failure_output($fh);
+    $Test->failure_output($file);
+
+Where diagnostic output on test failures and diag() should go.
+
+Defaults to STDERR.
+
+=item B<todo_output>
+
+    $Test->todo_output($fh);
+    $Test->todo_output($file);
+
+Where diagnostics about todo test failures and diag() should go.
+
+Defaults to STDOUT.
+
+=cut
+
+sub output {
+    my($self, $fh) = @_;
+
+    if( defined $fh ) {
+        $self->{Out_FH} = _new_fh($fh);
+    }
+    return $self->{Out_FH};
+}
+
+sub failure_output {
+    my($self, $fh) = @_;
+
+    if( defined $fh ) {
+        $self->{Fail_FH} = _new_fh($fh);
+    }
+    return $self->{Fail_FH};
+}
+
+sub todo_output {
+    my($self, $fh) = @_;
+
+    if( defined $fh ) {
+        $self->{Todo_FH} = _new_fh($fh);
+    }
+    return $self->{Todo_FH};
+}
+
+
+sub _new_fh {
+    my($file_or_fh) = shift;
+
+    my $fh;
+    if( _is_fh($file_or_fh) ) {
+        $fh = $file_or_fh;
+    }
+    else {
+        $fh = do { local *FH };
+        open $fh, ">$file_or_fh" or 
+            die "Can't open test output log $file_or_fh: $!";
+	_autoflush($fh);
+    }
+
+    return $fh;
+}
+
+
+sub _is_fh {
+    my $maybe_fh = shift;
+    return 0 unless defined $maybe_fh;
+
+    return 1 if ref \$maybe_fh eq 'GLOB'; # its a glob
+
+    return UNIVERSAL::isa($maybe_fh,               'GLOB')       ||
+           UNIVERSAL::isa($maybe_fh,               'IO::Handle') ||
+
+           # 5.5.4's tied() and can() doesn't like getting undef
+           UNIVERSAL::can((tied($maybe_fh) || ''), 'TIEHANDLE');
+}
+
+
+sub _autoflush {
+    my($fh) = shift;
+    my $old_fh = select $fh;
+    $| = 1;
+    select $old_fh;
+}
+
+
+sub _dup_stdhandles {
+    my $self = shift;
+
+    $self->_open_testhandles;
+
+    # Set everything to unbuffered else plain prints to STDOUT will
+    # come out in the wrong order from our own prints.
+    _autoflush(\*TESTOUT);
+    _autoflush(\*STDOUT);
+    _autoflush(\*TESTERR);
+    _autoflush(\*STDERR);
+
+    $self->output(\*TESTOUT);
+    $self->failure_output(\*TESTERR);
+    $self->todo_output(\*TESTOUT);
+}
+
+
+my $Opened_Testhandles = 0;
+sub _open_testhandles {
+    return if $Opened_Testhandles;
+    # We dup STDOUT and STDERR so people can change them in their
+    # test suites while still getting normal test output.
+    open(TESTOUT, ">&STDOUT") or die "Can't dup STDOUT:  $!";
+    open(TESTERR, ">&STDERR") or die "Can't dup STDERR:  $!";
+    $Opened_Testhandles = 1;
+}
+
+
+=back
+
+
+=head2 Test Status and Info
+
+=over 4
+
+=item B<current_test>
+
+    my $curr_test = $Test->current_test;
+    $Test->current_test($num);
+
+Gets/sets the current test number we're on.  You usually shouldn't
+have to set this.
+
+If set forward, the details of the missing tests are filled in as 'unknown'.
+if set backward, the details of the intervening tests are deleted.  You
+can erase history if you really want to.
+
+=cut
+
+sub current_test {
+    my($self, $num) = @_;
+
+    lock($self->{Curr_Test});
+    if( defined $num ) {
+        unless( $self->{Have_Plan} ) {
+            require Carp;
+            Carp::croak("Can't change the current test number without a plan!");
+        }
+
+        $self->{Curr_Test} = $num;
+
+        # If the test counter is being pushed forward fill in the details.
+        my $test_results = $self->{Test_Results};
+        if( $num > @$test_results ) {
+            my $start = @$test_results ? @$test_results : 0;
+            for ($start..$num-1) {
+                $test_results->[$_] = &share({
+                    'ok'      => 1, 
+                    actual_ok => undef, 
+                    reason    => 'incrementing test number', 
+                    type      => 'unknown', 
+                    name      => undef 
+                });
+            }
+        }
+        # If backward, wipe history.  Its their funeral.
+        elsif( $num < @$test_results ) {
+            $#{$test_results} = $num - 1;
+        }
+    }
+    return $self->{Curr_Test};
+}
+
+
+=item B<summary>
+
+    my @tests = $Test->summary;
+
+A simple summary of the tests so far.  True for pass, false for fail.
+This is a logical pass/fail, so todos are passes.
+
+Of course, test #1 is $tests[0], etc...
+
+=cut
+
+sub summary {
+    my($self) = shift;
+
+    return map { $_->{'ok'} } @{ $self->{Test_Results} };
+}
+
+=item B<details>
+
+    my @tests = $Test->details;
+
+Like summary(), but with a lot more detail.
+
+    $tests[$test_num - 1] = 
+            { 'ok'       => is the test considered a pass?
+              actual_ok  => did it literally say 'ok'?
+              name       => name of the test (if any)
+              type       => type of test (if any, see below).
+              reason     => reason for the above (if any)
+            };
+
+'ok' is true if Test::Harness will consider the test to be a pass.
+
+'actual_ok' is a reflection of whether or not the test literally
+printed 'ok' or 'not ok'.  This is for examining the result of 'todo'
+tests.  
+
+'name' is the name of the test.
+
+'type' indicates if it was a special test.  Normal tests have a type
+of ''.  Type can be one of the following:
+
+    skip        see skip()
+    todo        see todo()
+    todo_skip   see todo_skip()
+    unknown     see below
+
+Sometimes the Test::Builder test counter is incremented without it
+printing any test output, for example, when current_test() is changed.
+In these cases, Test::Builder doesn't know the result of the test, so
+it's type is 'unkown'.  These details for these tests are filled in.
+They are considered ok, but the name and actual_ok is left undef.
+
+For example "not ok 23 - hole count # TODO insufficient donuts" would
+result in this structure:
+
+    $tests[22] =    # 23 - 1, since arrays start from 0.
+      { ok        => 1,   # logically, the test passed since it's todo
+        actual_ok => 0,   # in absolute terms, it failed
+        name      => 'hole count',
+        type      => 'todo',
+        reason    => 'insufficient donuts'
+      };
+
+=cut
+
+sub details {
+    my $self = shift;
+    return @{ $self->{Test_Results} };
+}
+
+=item B<todo>
+
+    my $todo_reason = $Test->todo;
+    my $todo_reason = $Test->todo($pack);
+
+todo() looks for a $TODO variable in your tests.  If set, all tests
+will be considered 'todo' (see Test::More and Test::Harness for
+details).  Returns the reason (ie. the value of $TODO) if running as
+todo tests, false otherwise.
+
+todo() is about finding the right package to look for $TODO in.  It
+uses the exported_to() package to find it.  If that's not set, it's
+pretty good at guessing the right package to look at based on $Level.
+
+Sometimes there is some confusion about where todo() should be looking
+for the $TODO variable.  If you want to be sure, tell it explicitly
+what $pack to use.
+
+=cut
+
+sub todo {
+    my($self, $pack) = @_;
+
+    $pack = $pack || $self->exported_to || $self->caller($Level);
+    return 0 unless $pack;
+
+    no strict 'refs';
+    return defined ${$pack.'::TODO'} ? ${$pack.'::TODO'}
+                                     : 0;
+}
+
+=item B<caller>
+
+    my $package = $Test->caller;
+    my($pack, $file, $line) = $Test->caller;
+    my($pack, $file, $line) = $Test->caller($height);
+
+Like the normal caller(), except it reports according to your level().
+
+=cut
+
+sub caller {
+    my($self, $height) = @_;
+    $height ||= 0;
+
+    my @caller = CORE::caller($self->level + $height + 1);
+    return wantarray ? @caller : $caller[0];
+}
+
+=back
+
+=cut
+
+=begin _private
+
+=over 4
+
+=item B<_sanity_check>
+
+  $self->_sanity_check();
+
+Runs a bunch of end of test sanity checks to make sure reality came
+through ok.  If anything is wrong it will die with a fairly friendly
+error message.
+
+=cut
+
+#'#
+sub _sanity_check {
+    my $self = shift;
+
+    _whoa($self->{Curr_Test} < 0,  'Says here you ran a negative number of tests!');
+    _whoa(!$self->{Have_Plan} and $self->{Curr_Test}, 
+          'Somehow your tests ran without a plan!');
+    _whoa($self->{Curr_Test} != @{ $self->{Test_Results} },
+          'Somehow you got a different number of results than tests ran!');
+}
+
+=item B<_whoa>
+
+  _whoa($check, $description);
+
+A sanity check, similar to assert().  If the $check is true, something
+has gone horribly wrong.  It will die with the given $description and
+a note to contact the author.
+
+=cut
+
+sub _whoa {
+    my($check, $desc) = @_;
+    if( $check ) {
+        die <<WHOA;
+WHOA!  $desc
+This should never happen!  Please contact the author immediately!
+WHOA
+    }
+}
+
+=item B<_my_exit>
+
+  _my_exit($exit_num);
+
+Perl seems to have some trouble with exiting inside an END block.  5.005_03
+and 5.6.1 both seem to do odd things.  Instead, this function edits $?
+directly.  It should ONLY be called from inside an END block.  It
+doesn't actually exit, that's your job.
+
+=cut
+
+sub _my_exit {
+    $? = $_[0];
+
+    return 1;
+}
+
+
+=back
+
+=end _private
+
+=cut
+
+$SIG{__DIE__} = sub {
+    # We don't want to muck with death in an eval, but $^S isn't
+    # totally reliable.  5.005_03 and 5.6.1 both do the wrong thing
+    # with it.  Instead, we use caller.  This also means it runs under
+    # 5.004!
+    my $in_eval = 0;
+    for( my $stack = 1;  my $sub = (CORE::caller($stack))[3];  $stack++ ) {
+        $in_eval = 1 if $sub =~ /^\(eval\)/;
+    }
+    $Test->{Test_Died} = 1 unless $in_eval;
+};
+
+sub _ending {
+    my $self = shift;
+
+    $self->_sanity_check();
+
+    # Don't bother with an ending if this is a forked copy.  Only the parent
+    # should do the ending.
+    # Exit if plan() was never called.  This is so "require Test::Simple" 
+    # doesn't puke.
+    # Don't do an ending if we bailed out.
+    if( ($self->{Original_Pid} != $$) 			or
+	(!$self->{Have_Plan} && !$self->{Test_Died}) 	or
+	$self->{Bailed_Out}
+      )
+    {
+	_my_exit($?);
+	return;
+    }
+
+    # Figure out if we passed or failed and print helpful messages.
+    my $test_results = $self->{Test_Results};
+    if( @$test_results ) {
+        # The plan?  We have no plan.
+        if( $self->{No_Plan} ) {
+            $self->_print("1..$self->{Curr_Test}\n") unless $self->no_header;
+            $self->{Expected_Tests} = $self->{Curr_Test};
+        }
+
+        # Auto-extended arrays and elements which aren't explicitly
+        # filled in with a shared reference will puke under 5.8.0
+        # ithreads.  So we have to fill them in by hand. :(
+        my $empty_result = &share({});
+        for my $idx ( 0..$self->{Expected_Tests}-1 ) {
+            $test_results->[$idx] = $empty_result
+              unless defined $test_results->[$idx];
+        }
+
+        my $num_failed = grep !$_->{'ok'}, 
+                              @{$test_results}[0..$self->{Curr_Test}-1];
+
+        my $num_extra = $self->{Curr_Test} - $self->{Expected_Tests};
+
+        if( $num_extra < 0 ) {
+            my $s = $self->{Expected_Tests} == 1 ? '' : 's';
+            $self->diag(<<"FAIL");
+Looks like you planned $self->{Expected_Tests} test$s but only ran $self->{Curr_Test}.
+FAIL
+        }
+        elsif( $num_extra > 0 ) {
+            my $s = $self->{Expected_Tests} == 1 ? '' : 's';
+            $self->diag(<<"FAIL");
+Looks like you planned $self->{Expected_Tests} test$s but ran $num_extra extra.
+FAIL
+        }
+
+        if ( $num_failed ) {
+            my $num_tests = $self->{Curr_Test};
+            my $s = $num_failed == 1 ? '' : 's';
+
+            my $qualifier = $num_extra == 0 ? '' : ' run';
+
+            $self->diag(<<"FAIL");
+Looks like you failed $num_failed test$s of $num_tests$qualifier.
+FAIL
+        }
+
+        if( $self->{Test_Died} ) {
+            $self->diag(<<"FAIL");
+Looks like your test died just after $self->{Curr_Test}.
+FAIL
+
+            _my_exit( 255 ) && return;
+        }
+
+        my $exit_code;
+        if( $num_failed ) {
+            $exit_code = $num_failed <= 254 ? $num_failed : 254;
+        }
+        elsif( $num_extra != 0 ) {
+            $exit_code = 255;
+        }
+        else {
+            $exit_code = 0;
+        }
+
+        _my_exit( $exit_code ) && return;
+    }
+    elsif ( $self->{Skip_All} ) {
+        _my_exit( 0 ) && return;
+    }
+    elsif ( $self->{Test_Died} ) {
+        $self->diag(<<'FAIL');
+Looks like your test died before it could output anything.
+FAIL
+        _my_exit( 255 ) && return;
+    }
+    else {
+        $self->diag("No tests run!\n");
+        _my_exit( 255 ) && return;
+    }
+}
+
+END {
+    $Test->_ending if defined $Test and !$Test->no_ending;
+}
+
+=head1 EXIT CODES
+
+If all your tests passed, Test::Builder will exit with zero (which is
+normal).  If anything failed it will exit with how many failed.  If
+you run less (or more) tests than you planned, the missing (or extras)
+will be considered failures.  If no tests were ever run Test::Builder
+will throw a warning and exit with 255.  If the test died, even after
+having successfully completed all its tests, it will still be
+considered a failure and will exit with 255.
+
+So the exit codes are...
+
+    0                   all tests successful
+    255                 test died or all passed but wrong # of tests run
+    any other number    how many failed (including missing or extras)
+
+If you fail more than 254 tests, it will be reported as 254.
+
+
+=head1 THREADS
+
+In perl 5.8.0 and later, Test::Builder is thread-safe.  The test
+number is shared amongst all threads.  This means if one thread sets
+the test number using current_test() they will all be effected.
+
+Test::Builder is only thread-aware if threads.pm is loaded I<before>
+Test::Builder.
+
+=head1 EXAMPLES
+
+CPAN can provide the best examples.  Test::Simple, Test::More,
+Test::Exception and Test::Differences all use Test::Builder.
+
+=head1 SEE ALSO
+
+Test::Simple, Test::More, Test::Harness
+
+=head1 AUTHORS
+
+Original code by chromatic, maintained by Michael G Schwern
+E<lt>schwern at pobox.comE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2002, 2004 by chromatic E<lt>chromatic at wgz.orgE<gt> and
+                        Michael G Schwern E<lt>schwern at pobox.comE<gt>.
+
+This program is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+See F<http://www.perl.com/perl/misc/Artistic.html>
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/More.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/More.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/More.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,1547 @@
+package Test::More;
+
+use 5.004;
+
+use strict;
+
+
+# Can't use Carp because it might cause use_ok() to accidentally succeed
+# even though the module being used forgot to use Carp.  Yes, this
+# actually happened.
+sub _carp {
+    my($file, $line) = (caller(1))[1,2];
+    warn @_, " at $file line $line\n";
+}
+
+
+
+use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS $TODO);
+$VERSION = '0.64';
+$VERSION = eval $VERSION;    # make the alpha version come out as a number
+
+use Test::Builder::Module;
+ at ISA    = qw(Test::Builder::Module);
+ at EXPORT = qw(ok use_ok require_ok
+             is isnt like unlike is_deeply
+             cmp_ok
+             skip todo todo_skip
+             pass fail
+             eq_array eq_hash eq_set
+             $TODO
+             plan
+             can_ok  isa_ok
+             diag
+	     BAIL_OUT
+            );
+
+
+=head1 NAME
+
+Test::More - yet another framework for writing test scripts
+
+=head1 SYNOPSIS
+
+  use Test::More tests => $Num_Tests;
+  # or
+  use Test::More qw(no_plan);
+  # or
+  use Test::More skip_all => $reason;
+
+  BEGIN { use_ok( 'Some::Module' ); }
+  require_ok( 'Some::Module' );
+
+  # Various ways to say "ok"
+  ok($this eq $that, $test_name);
+
+  is  ($this, $that,    $test_name);
+  isnt($this, $that,    $test_name);
+
+  # Rather than print STDERR "# here's what went wrong\n"
+  diag("here's what went wrong");
+
+  like  ($this, qr/that/, $test_name);
+  unlike($this, qr/that/, $test_name);
+
+  cmp_ok($this, '==', $that, $test_name);
+
+  is_deeply($complex_structure1, $complex_structure2, $test_name);
+
+  SKIP: {
+      skip $why, $how_many unless $have_some_feature;
+
+      ok( foo(),       $test_name );
+      is( foo(42), 23, $test_name );
+  };
+
+  TODO: {
+      local $TODO = $why;
+
+      ok( foo(),       $test_name );
+      is( foo(42), 23, $test_name );
+  };
+
+  can_ok($module, @methods);
+  isa_ok($object, $class);
+
+  pass($test_name);
+  fail($test_name);
+
+  BAIL_OUT($why);
+
+  # UNIMPLEMENTED!!!
+  my @status = Test::More::status;
+
+
+=head1 DESCRIPTION
+
+B<STOP!> If you're just getting started writing tests, have a look at
+Test::Simple first.  This is a drop in replacement for Test::Simple
+which you can switch to once you get the hang of basic testing.
+
+The purpose of this module is to provide a wide range of testing
+utilities.  Various ways to say "ok" with better diagnostics,
+facilities to skip tests, test future features and compare complicated
+data structures.  While you can do almost anything with a simple
+C<ok()> function, it doesn't provide good diagnostic output.
+
+
+=head2 I love it when a plan comes together
+
+Before anything else, you need a testing plan.  This basically declares
+how many tests your script is going to run to protect against premature
+failure.
+
+The preferred way to do this is to declare a plan when you C<use Test::More>.
+
+  use Test::More tests => $Num_Tests;
+
+There are rare cases when you will not know beforehand how many tests
+your script is going to run.  In this case, you can declare that you
+have no plan.  (Try to avoid using this as it weakens your test.)
+
+  use Test::More qw(no_plan);
+
+B<NOTE>: using no_plan requires a Test::Harness upgrade else it will
+think everything has failed.  See L<CAVEATS and NOTES>).
+
+In some cases, you'll want to completely skip an entire testing script.
+
+  use Test::More skip_all => $skip_reason;
+
+Your script will declare a skip with the reason why you skipped and
+exit immediately with a zero (success).  See L<Test::Harness> for
+details.
+
+If you want to control what functions Test::More will export, you
+have to use the 'import' option.  For example, to import everything
+but 'fail', you'd do:
+
+  use Test::More tests => 23, import => ['!fail'];
+
+Alternatively, you can use the plan() function.  Useful for when you
+have to calculate the number of tests.
+
+  use Test::More;
+  plan tests => keys %Stuff * 3;
+
+or for deciding between running the tests at all:
+
+  use Test::More;
+  if( $^O eq 'MacOS' ) {
+      plan skip_all => 'Test irrelevant on MacOS';
+  }
+  else {
+      plan tests => 42;
+  }
+
+=cut
+
+sub plan {
+    my $tb = Test::More->builder;
+
+    $tb->plan(@_);
+}
+
+
+# This implements "use Test::More 'no_diag'" but the behavior is
+# deprecated.
+sub import_extra {
+    my $class = shift;
+    my $list  = shift;
+
+    my @other = ();
+    my $idx = 0;
+    while( $idx <= $#{$list} ) {
+        my $item = $list->[$idx];
+
+        if( defined $item and $item eq 'no_diag' ) {
+            $class->builder->no_diag(1);
+        }
+        else {
+            push @other, $item;
+        }
+
+        $idx++;
+    }
+
+    @$list = @other;
+}
+
+
+=head2 Test names
+
+By convention, each test is assigned a number in order.  This is
+largely done automatically for you.  However, it's often very useful to
+assign a name to each test.  Which would you rather see:
+
+  ok 4
+  not ok 5
+  ok 6
+
+or
+
+  ok 4 - basic multi-variable
+  not ok 5 - simple exponential
+  ok 6 - force == mass * acceleration
+
+The later gives you some idea of what failed.  It also makes it easier
+to find the test in your script, simply search for "simple
+exponential".
+
+All test functions take a name argument.  It's optional, but highly
+suggested that you use it.
+
+
+=head2 I'm ok, you're not ok.
+
+The basic purpose of this module is to print out either "ok #" or "not
+ok #" depending on if a given test succeeded or failed.  Everything
+else is just gravy.
+
+All of the following print "ok" or "not ok" depending on if the test
+succeeded or failed.  They all also return true or false,
+respectively.
+
+=over 4
+
+=item B<ok>
+
+  ok($this eq $that, $test_name);
+
+This simply evaluates any expression (C<$this eq $that> is just a
+simple example) and uses that to determine if the test succeeded or
+failed.  A true expression passes, a false one fails.  Very simple.
+
+For example:
+
+    ok( $exp{9} == 81,                   'simple exponential' );
+    ok( Film->can('db_Main'),            'set_db()' );
+    ok( $p->tests == 4,                  'saw tests' );
+    ok( !grep !defined $_, @items,       'items populated' );
+
+(Mnemonic:  "This is ok.")
+
+$test_name is a very short description of the test that will be printed
+out.  It makes it very easy to find a test in your script when it fails
+and gives others an idea of your intentions.  $test_name is optional,
+but we B<very> strongly encourage its use.
+
+Should an ok() fail, it will produce some diagnostics:
+
+    not ok 18 - sufficient mucus
+    #   Failed test 'sufficient mucus'
+    #   in foo.t at line 42.
+
+This is actually Test::Simple's ok() routine.
+
+=cut
+
+sub ok ($;$) {
+    my($test, $name) = @_;
+    my $tb = Test::More->builder;
+
+    $tb->ok($test, $name);
+}
+
+=item B<is>
+
+=item B<isnt>
+
+  is  ( $this, $that, $test_name );
+  isnt( $this, $that, $test_name );
+
+Similar to ok(), is() and isnt() compare their two arguments
+with C<eq> and C<ne> respectively and use the result of that to
+determine if the test succeeded or failed.  So these:
+
+    # Is the ultimate answer 42?
+    is( ultimate_answer(), 42,          "Meaning of Life" );
+
+    # $foo isn't empty
+    isnt( $foo, '',     "Got some foo" );
+
+are similar to these:
+
+    ok( ultimate_answer() eq 42,        "Meaning of Life" );
+    ok( $foo ne '',     "Got some foo" );
+
+(Mnemonic:  "This is that."  "This isn't that.")
+
+So why use these?  They produce better diagnostics on failure.  ok()
+cannot know what you are testing for (beyond the name), but is() and
+isnt() know what the test was and why it failed.  For example this
+test:
+
+    my $foo = 'waffle';  my $bar = 'yarblokos';
+    is( $foo, $bar,   'Is foo the same as bar?' );
+
+Will produce something like this:
+
+    not ok 17 - Is foo the same as bar?
+    #   Failed test 'Is foo the same as bar?'
+    #   in foo.t at line 139.
+    #          got: 'waffle'
+    #     expected: 'yarblokos'
+
+So you can figure out what went wrong without rerunning the test.
+
+You are encouraged to use is() and isnt() over ok() where possible,
+however do not be tempted to use them to find out if something is
+true or false!
+
+  # XXX BAD!
+  is( exists $brooklyn{tree}, 1, 'A tree grows in Brooklyn' );
+
+This does not check if C<exists $brooklyn{tree}> is true, it checks if
+it returns 1.  Very different.  Similar caveats exist for false and 0.
+In these cases, use ok().
+
+  ok( exists $brooklyn{tree},    'A tree grows in Brooklyn' );
+
+For those grammatical pedants out there, there's an C<isn't()>
+function which is an alias of isnt().
+
+=cut
+
+sub is ($$;$) {
+    my $tb = Test::More->builder;
+
+    $tb->is_eq(@_);
+}
+
+sub isnt ($$;$) {
+    my $tb = Test::More->builder;
+
+    $tb->isnt_eq(@_);
+}
+
+*isn't = \&isnt;
+
+
+=item B<like>
+
+  like( $this, qr/that/, $test_name );
+
+Similar to ok(), like() matches $this against the regex C<qr/that/>.
+
+So this:
+
+    like($this, qr/that/, 'this is like that');
+
+is similar to:
+
+    ok( $this =~ /that/, 'this is like that');
+
+(Mnemonic "This is like that".)
+
+The second argument is a regular expression.  It may be given as a
+regex reference (i.e. C<qr//>) or (for better compatibility with older
+perls) as a string that looks like a regex (alternative delimiters are
+currently not supported):
+
+    like( $this, '/that/', 'this is like that' );
+
+Regex options may be placed on the end (C<'/that/i'>).
+
+Its advantages over ok() are similar to that of is() and isnt().  Better
+diagnostics on failure.
+
+=cut
+
+sub like ($$;$) {
+    my $tb = Test::More->builder;
+
+    $tb->like(@_);
+}
+
+
+=item B<unlike>
+
+  unlike( $this, qr/that/, $test_name );
+
+Works exactly as like(), only it checks if $this B<does not> match the
+given pattern.
+
+=cut
+
+sub unlike ($$;$) {
+    my $tb = Test::More->builder;
+
+    $tb->unlike(@_);
+}
+
+
+=item B<cmp_ok>
+
+  cmp_ok( $this, $op, $that, $test_name );
+
+Halfway between ok() and is() lies cmp_ok().  This allows you to
+compare two arguments using any binary perl operator.
+
+    # ok( $this eq $that );
+    cmp_ok( $this, 'eq', $that, 'this eq that' );
+
+    # ok( $this == $that );
+    cmp_ok( $this, '==', $that, 'this == that' );
+
+    # ok( $this && $that );
+    cmp_ok( $this, '&&', $that, 'this && that' );
+    ...etc...
+
+Its advantage over ok() is when the test fails you'll know what $this
+and $that were:
+
+    not ok 1
+    #   Failed test in foo.t at line 12.
+    #     '23'
+    #         &&
+    #     undef
+
+It's also useful in those cases where you are comparing numbers and
+is()'s use of C<eq> will interfere:
+
+    cmp_ok( $big_hairy_number, '==', $another_big_hairy_number );
+
+=cut
+
+sub cmp_ok($$$;$) {
+    my $tb = Test::More->builder;
+
+    $tb->cmp_ok(@_);
+}
+
+
+=item B<can_ok>
+
+  can_ok($module, @methods);
+  can_ok($object, @methods);
+
+Checks to make sure the $module or $object can do these @methods
+(works with functions, too).
+
+    can_ok('Foo', qw(this that whatever));
+
+is almost exactly like saying:
+
+    ok( Foo->can('this') && 
+        Foo->can('that') && 
+        Foo->can('whatever') 
+      );
+
+only without all the typing and with a better interface.  Handy for
+quickly testing an interface.
+
+No matter how many @methods you check, a single can_ok() call counts
+as one test.  If you desire otherwise, use:
+
+    foreach my $meth (@methods) {
+        can_ok('Foo', $meth);
+    }
+
+=cut
+
+sub can_ok ($@) {
+    my($proto, @methods) = @_;
+    my $class = ref $proto || $proto;
+    my $tb = Test::More->builder;
+
+    unless( $class ) {
+        my $ok = $tb->ok( 0, "->can(...)" );
+        $tb->diag('    can_ok() called with empty class or reference');
+        return $ok;
+    }
+
+    unless( @methods ) {
+        my $ok = $tb->ok( 0, "$class->can(...)" );
+        $tb->diag('    can_ok() called with no methods');
+        return $ok;
+    }
+
+    my @nok = ();
+    foreach my $method (@methods) {
+        local($!, $@);  # don't interfere with caller's $@
+                        # eval sometimes resets $!
+        eval { $proto->can($method) } || push @nok, $method;
+    }
+
+    my $name;
+    $name = @methods == 1 ? "$class->can('$methods[0]')" 
+                          : "$class->can(...)";
+
+    my $ok = $tb->ok( !@nok, $name );
+
+    $tb->diag(map "    $class->can('$_') failed\n", @nok);
+
+    return $ok;
+}
+
+=item B<isa_ok>
+
+  isa_ok($object, $class, $object_name);
+  isa_ok($ref,    $type,  $ref_name);
+
+Checks to see if the given C<< $object->isa($class) >>.  Also checks to make
+sure the object was defined in the first place.  Handy for this sort
+of thing:
+
+    my $obj = Some::Module->new;
+    isa_ok( $obj, 'Some::Module' );
+
+where you'd otherwise have to write
+
+    my $obj = Some::Module->new;
+    ok( defined $obj && $obj->isa('Some::Module') );
+
+to safeguard against your test script blowing up.
+
+It works on references, too:
+
+    isa_ok( $array_ref, 'ARRAY' );
+
+The diagnostics of this test normally just refer to 'the object'.  If
+you'd like them to be more specific, you can supply an $object_name
+(for example 'Test customer').
+
+=cut
+
+sub isa_ok ($$;$) {
+    my($object, $class, $obj_name) = @_;
+    my $tb = Test::More->builder;
+
+    my $diag;
+    $obj_name = 'The object' unless defined $obj_name;
+    my $name = "$obj_name isa $class";
+    if( !defined $object ) {
+        $diag = "$obj_name isn't defined";
+    }
+    elsif( !ref $object ) {
+        $diag = "$obj_name isn't a reference";
+    }
+    else {
+        # We can't use UNIVERSAL::isa because we want to honor isa() overrides
+        local($@, $!);  # eval sometimes resets $!
+        my $rslt = eval { $object->isa($class) };
+        if( $@ ) {
+            if( $@ =~ /^Can't call method "isa" on unblessed reference/ ) {
+                if( !UNIVERSAL::isa($object, $class) ) {
+                    my $ref = ref $object;
+                    $diag = "$obj_name isn't a '$class' it's a '$ref'";
+                }
+            } else {
+                die <<WHOA;
+WHOA! I tried to call ->isa on your object and got some weird error.
+This should never happen.  Please contact the author immediately.
+Here's the error.
+$@
+WHOA
+            }
+        }
+        elsif( !$rslt ) {
+            my $ref = ref $object;
+            $diag = "$obj_name isn't a '$class' it's a '$ref'";
+        }
+    }
+            
+      
+
+    my $ok;
+    if( $diag ) {
+        $ok = $tb->ok( 0, $name );
+        $tb->diag("    $diag\n");
+    }
+    else {
+        $ok = $tb->ok( 1, $name );
+    }
+
+    return $ok;
+}
+
+
+=item B<pass>
+
+=item B<fail>
+
+  pass($test_name);
+  fail($test_name);
+
+Sometimes you just want to say that the tests have passed.  Usually
+the case is you've got some complicated condition that is difficult to
+wedge into an ok().  In this case, you can simply use pass() (to
+declare the test ok) or fail (for not ok).  They are synonyms for
+ok(1) and ok(0).
+
+Use these very, very, very sparingly.
+
+=cut
+
+sub pass (;$) {
+    my $tb = Test::More->builder;
+    $tb->ok(1, @_);
+}
+
+sub fail (;$) {
+    my $tb = Test::More->builder;
+    $tb->ok(0, @_);
+}
+
+=back
+
+
+=head2 Module tests
+
+You usually want to test if the module you're testing loads ok, rather
+than just vomiting if its load fails.  For such purposes we have
+C<use_ok> and C<require_ok>.
+
+=over 4
+
+=item B<use_ok>
+
+   BEGIN { use_ok($module); }
+   BEGIN { use_ok($module, @imports); }
+
+These simply use the given $module and test to make sure the load
+happened ok.  It's recommended that you run use_ok() inside a BEGIN
+block so its functions are exported at compile-time and prototypes are
+properly honored.
+
+If @imports are given, they are passed through to the use.  So this:
+
+   BEGIN { use_ok('Some::Module', qw(foo bar)) }
+
+is like doing this:
+
+   use Some::Module qw(foo bar);
+
+Version numbers can be checked like so:
+
+   # Just like "use Some::Module 1.02"
+   BEGIN { use_ok('Some::Module', 1.02) }
+
+Don't try to do this:
+
+   BEGIN {
+       use_ok('Some::Module');
+
+       ...some code that depends on the use...
+       ...happening at compile time...
+   }
+
+because the notion of "compile-time" is relative.  Instead, you want:
+
+  BEGIN { use_ok('Some::Module') }
+  BEGIN { ...some code that depends on the use... }
+
+
+=cut
+
+sub use_ok ($;@) {
+    my($module, @imports) = @_;
+    @imports = () unless @imports;
+    my $tb = Test::More->builder;
+
+    my($pack,$filename,$line) = caller;
+
+    local($@,$!);   # eval sometimes interferes with $!
+
+    if( @imports == 1 and $imports[0] =~ /^\d+(?:\.\d+)?$/ ) {
+        # probably a version check.  Perl needs to see the bare number
+        # for it to work with non-Exporter based modules.
+        eval <<USE;
+package $pack;
+use $module $imports[0];
+USE
+    }
+    else {
+        eval <<USE;
+package $pack;
+use $module \@imports;
+USE
+    }
+
+    my $ok = $tb->ok( !$@, "use $module;" );
+
+    unless( $ok ) {
+        chomp $@;
+        $@ =~ s{^BEGIN failed--compilation aborted at .*$}
+                {BEGIN failed--compilation aborted at $filename line $line.}m;
+        $tb->diag(<<DIAGNOSTIC);
+    Tried to use '$module'.
+    Error:  $@
+DIAGNOSTIC
+
+    }
+
+    return $ok;
+}
+
+=item B<require_ok>
+
+   require_ok($module);
+   require_ok($file);
+
+Like use_ok(), except it requires the $module or $file.
+
+=cut
+
+sub require_ok ($) {
+    my($module) = shift;
+    my $tb = Test::More->builder;
+
+    my $pack = caller;
+
+    # Try to deterine if we've been given a module name or file.
+    # Module names must be barewords, files not.
+    $module = qq['$module'] unless _is_module_name($module);
+
+    local($!, $@); # eval sometimes interferes with $!
+    eval <<REQUIRE;
+package $pack;
+require $module;
+REQUIRE
+
+    my $ok = $tb->ok( !$@, "require $module;" );
+
+    unless( $ok ) {
+        chomp $@;
+        $tb->diag(<<DIAGNOSTIC);
+    Tried to require '$module'.
+    Error:  $@
+DIAGNOSTIC
+
+    }
+
+    return $ok;
+}
+
+
+sub _is_module_name {
+    my $module = shift;
+
+    # Module names start with a letter.
+    # End with an alphanumeric.
+    # The rest is an alphanumeric or ::
+    $module =~ s/\b::\b//g;
+    $module =~ /^[a-zA-Z]\w*$/;
+}
+
+=back
+
+
+=head2 Complex data structures
+
+Not everything is a simple eq check or regex.  There are times you
+need to see if two data structures are equivalent.  For these
+instances Test::More provides a handful of useful functions.
+
+B<NOTE> I'm not quite sure what will happen with filehandles.
+
+=over 4
+
+=item B<is_deeply>
+
+  is_deeply( $this, $that, $test_name );
+
+Similar to is(), except that if $this and $that are references, it
+does a deep comparison walking each data structure to see if they are
+equivalent.  If the two structures are different, it will display the
+place where they start differing.
+
+is_deeply() compares the dereferenced values of references, the
+references themselves (except for their type) are ignored.  This means
+aspects such as blessing and ties are not considered "different".
+
+is_deeply() current has very limited handling of function reference
+and globs.  It merely checks if they have the same referent.  This may
+improve in the future.
+
+Test::Differences and Test::Deep provide more in-depth functionality
+along these lines.
+
+=cut
+
+use vars qw(@Data_Stack %Refs_Seen);
+my $DNE = bless [], 'Does::Not::Exist';
+sub is_deeply {
+    my $tb = Test::More->builder;
+
+    unless( @_ == 2 or @_ == 3 ) {
+        my $msg = <<WARNING;
+is_deeply() takes two or three args, you gave %d.
+This usually means you passed an array or hash instead 
+of a reference to it
+WARNING
+        chop $msg;   # clip off newline so carp() will put in line/file
+
+        _carp sprintf $msg, scalar @_;
+
+	return $tb->ok(0);
+    }
+
+    my($this, $that, $name) = @_;
+
+    $tb->_unoverload_str(\$that, \$this);
+
+    my $ok;
+    if( !ref $this and !ref $that ) {  		# neither is a reference
+        $ok = $tb->is_eq($this, $that, $name);
+    }
+    elsif( !ref $this xor !ref $that ) {  	# one's a reference, one isn't
+        $ok = $tb->ok(0, $name);
+	$tb->diag( _format_stack({ vals => [ $this, $that ] }) );
+    }
+    else {			       		# both references
+        local @Data_Stack = ();
+        if( _deep_check($this, $that) ) {
+            $ok = $tb->ok(1, $name);
+        }
+        else {
+            $ok = $tb->ok(0, $name);
+            $tb->diag(_format_stack(@Data_Stack));
+        }
+    }
+
+    return $ok;
+}
+
+sub _format_stack {
+    my(@Stack) = @_;
+
+    my $var = '$FOO';
+    my $did_arrow = 0;
+    foreach my $entry (@Stack) {
+        my $type = $entry->{type} || '';
+        my $idx  = $entry->{'idx'};
+        if( $type eq 'HASH' ) {
+            $var .= "->" unless $did_arrow++;
+            $var .= "{$idx}";
+        }
+        elsif( $type eq 'ARRAY' ) {
+            $var .= "->" unless $did_arrow++;
+            $var .= "[$idx]";
+        }
+        elsif( $type eq 'REF' ) {
+            $var = "\${$var}";
+        }
+    }
+
+    my @vals = @{$Stack[-1]{vals}}[0,1];
+    my @vars = ();
+    ($vars[0] = $var) =~ s/\$FOO/     \$got/;
+    ($vars[1] = $var) =~ s/\$FOO/\$expected/;
+
+    my $out = "Structures begin differing at:\n";
+    foreach my $idx (0..$#vals) {
+        my $val = $vals[$idx];
+        $vals[$idx] = !defined $val ? 'undef'          :
+                      $val eq $DNE  ? "Does not exist" :
+	              ref $val      ? "$val"           :
+                                      "'$val'";
+    }
+
+    $out .= "$vars[0] = $vals[0]\n";
+    $out .= "$vars[1] = $vals[1]\n";
+
+    $out =~ s/^/    /msg;
+    return $out;
+}
+
+
+sub _type {
+    my $thing = shift;
+
+    return '' if !ref $thing;
+
+    for my $type (qw(ARRAY HASH REF SCALAR GLOB CODE Regexp)) {
+        return $type if UNIVERSAL::isa($thing, $type);
+    }
+
+    return '';
+}
+
+=back
+
+
+=head2 Diagnostics
+
+If you pick the right test function, you'll usually get a good idea of
+what went wrong when it failed.  But sometimes it doesn't work out
+that way.  So here we have ways for you to write your own diagnostic
+messages which are safer than just C<print STDERR>.
+
+=over 4
+
+=item B<diag>
+
+  diag(@diagnostic_message);
+
+Prints a diagnostic message which is guaranteed not to interfere with
+test output.  Like C<print> @diagnostic_message is simply concatenated
+together.
+
+Handy for this sort of thing:
+
+    ok( grep(/foo/, @users), "There's a foo user" ) or
+        diag("Since there's no foo, check that /etc/bar is set up right");
+
+which would produce:
+
+    not ok 42 - There's a foo user
+    #   Failed test 'There's a foo user'
+    #   in foo.t at line 52.
+    # Since there's no foo, check that /etc/bar is set up right.
+
+You might remember C<ok() or diag()> with the mnemonic C<open() or
+die()>.
+
+B<NOTE> The exact formatting of the diagnostic output is still
+changing, but it is guaranteed that whatever you throw at it it won't
+interfere with the test.
+
+=cut
+
+sub diag {
+    my $tb = Test::More->builder;
+
+    $tb->diag(@_);
+}
+
+
+=back
+
+
+=head2 Conditional tests
+
+Sometimes running a test under certain conditions will cause the
+test script to die.  A certain function or method isn't implemented
+(such as fork() on MacOS), some resource isn't available (like a 
+net connection) or a module isn't available.  In these cases it's
+necessary to skip tests, or declare that they are supposed to fail
+but will work in the future (a todo test).
+
+For more details on the mechanics of skip and todo tests see
+L<Test::Harness>.
+
+The way Test::More handles this is with a named block.  Basically, a
+block of tests which can be skipped over or made todo.  It's best if I
+just show you...
+
+=over 4
+
+=item B<SKIP: BLOCK>
+
+  SKIP: {
+      skip $why, $how_many if $condition;
+
+      ...normal testing code goes here...
+  }
+
+This declares a block of tests that might be skipped, $how_many tests
+there are, $why and under what $condition to skip them.  An example is
+the easiest way to illustrate:
+
+    SKIP: {
+        eval { require HTML::Lint };
+
+        skip "HTML::Lint not installed", 2 if $@;
+
+        my $lint = new HTML::Lint;
+        isa_ok( $lint, "HTML::Lint" );
+
+        $lint->parse( $html );
+        is( $lint->errors, 0, "No errors found in HTML" );
+    }
+
+If the user does not have HTML::Lint installed, the whole block of
+code I<won't be run at all>.  Test::More will output special ok's
+which Test::Harness interprets as skipped, but passing, tests.
+
+It's important that $how_many accurately reflects the number of tests
+in the SKIP block so the # of tests run will match up with your plan.
+If your plan is C<no_plan> $how_many is optional and will default to 1.
+
+It's perfectly safe to nest SKIP blocks.  Each SKIP block must have
+the label C<SKIP>, or Test::More can't work its magic.
+
+You don't skip tests which are failing because there's a bug in your
+program, or for which you don't yet have code written.  For that you
+use TODO.  Read on.
+
+=cut
+
+#'#
+sub skip {
+    my($why, $how_many) = @_;
+    my $tb = Test::More->builder;
+
+    unless( defined $how_many ) {
+        # $how_many can only be avoided when no_plan is in use.
+        _carp "skip() needs to know \$how_many tests are in the block"
+          unless $tb->has_plan eq 'no_plan';
+        $how_many = 1;
+    }
+
+    if( defined $how_many and $how_many =~ /\D/ ) {
+        _carp "skip() was passed a non-numeric number of tests.  Did you get the arguments backwards?";
+        $how_many = 1;
+    }
+
+    for( 1..$how_many ) {
+        $tb->skip($why);
+    }
+
+    local $^W = 0;
+    last SKIP;
+}
+
+
+=item B<TODO: BLOCK>
+
+    TODO: {
+        local $TODO = $why if $condition;
+
+        ...normal testing code goes here...
+    }
+
+Declares a block of tests you expect to fail and $why.  Perhaps it's
+because you haven't fixed a bug or haven't finished a new feature:
+
+    TODO: {
+        local $TODO = "URI::Geller not finished";
+
+        my $card = "Eight of clubs";
+        is( URI::Geller->your_card, $card, 'Is THIS your card?' );
+
+        my $spoon;
+        URI::Geller->bend_spoon;
+        is( $spoon, 'bent',    "Spoon bending, that's original" );
+    }
+
+With a todo block, the tests inside are expected to fail.  Test::More
+will run the tests normally, but print out special flags indicating
+they are "todo".  Test::Harness will interpret failures as being ok.
+Should anything succeed, it will report it as an unexpected success.
+You then know the thing you had todo is done and can remove the
+TODO flag.
+
+The nice part about todo tests, as opposed to simply commenting out a
+block of tests, is it's like having a programmatic todo list.  You know
+how much work is left to be done, you're aware of what bugs there are,
+and you'll know immediately when they're fixed.
+
+Once a todo test starts succeeding, simply move it outside the block.
+When the block is empty, delete it.
+
+B<NOTE>: TODO tests require a Test::Harness upgrade else it will
+treat it as a normal failure.  See L<CAVEATS and NOTES>).
+
+
+=item B<todo_skip>
+
+    TODO: {
+        todo_skip $why, $how_many if $condition;
+
+        ...normal testing code...
+    }
+
+With todo tests, it's best to have the tests actually run.  That way
+you'll know when they start passing.  Sometimes this isn't possible.
+Often a failing test will cause the whole program to die or hang, even
+inside an C<eval BLOCK> with and using C<alarm>.  In these extreme
+cases you have no choice but to skip over the broken tests entirely.
+
+The syntax and behavior is similar to a C<SKIP: BLOCK> except the
+tests will be marked as failing but todo.  Test::Harness will
+interpret them as passing.
+
+=cut
+
+sub todo_skip {
+    my($why, $how_many) = @_;
+    my $tb = Test::More->builder;
+
+    unless( defined $how_many ) {
+        # $how_many can only be avoided when no_plan is in use.
+        _carp "todo_skip() needs to know \$how_many tests are in the block"
+          unless $tb->has_plan eq 'no_plan';
+        $how_many = 1;
+    }
+
+    for( 1..$how_many ) {
+        $tb->todo_skip($why);
+    }
+
+    local $^W = 0;
+    last TODO;
+}
+
+=item When do I use SKIP vs. TODO?
+
+B<If it's something the user might not be able to do>, use SKIP.
+This includes optional modules that aren't installed, running under
+an OS that doesn't have some feature (like fork() or symlinks), or maybe
+you need an Internet connection and one isn't available.
+
+B<If it's something the programmer hasn't done yet>, use TODO.  This
+is for any code you haven't written yet, or bugs you have yet to fix,
+but want to put tests in your testing script (always a good idea).
+
+
+=back
+
+
+=head2 Test control
+
+=over 4
+
+=item B<BAIL_OUT>
+
+    BAIL_OUT($reason);
+
+Indicates to the harness that things are going so badly all testing
+should terminate.  This includes the running any additional test scripts.
+
+This is typically used when testing cannot continue such as a critical
+module failing to compile or a necessary external utility not being
+available such as a database connection failing.
+
+The test will exit with 255.
+
+=cut
+
+sub BAIL_OUT {
+    my $reason = shift;
+    my $tb = Test::More->builder;
+
+    $tb->BAIL_OUT($reason);
+}
+
+=back
+
+
+=head2 Discouraged comparison functions
+
+The use of the following functions is discouraged as they are not
+actually testing functions and produce no diagnostics to help figure
+out what went wrong.  They were written before is_deeply() existed
+because I couldn't figure out how to display a useful diff of two
+arbitrary data structures.
+
+These functions are usually used inside an ok().
+
+    ok( eq_array(\@this, \@that) );
+
+C<is_deeply()> can do that better and with diagnostics.  
+
+    is_deeply( \@this, \@that );
+
+They may be deprecated in future versions.
+
+=over 4
+
+=item B<eq_array>
+
+  my $is_eq = eq_array(\@this, \@that);
+
+Checks if two arrays are equivalent.  This is a deep check, so
+multi-level structures are handled correctly.
+
+=cut
+
+#'#
+sub eq_array {
+    local @Data_Stack;
+    _deep_check(@_);
+}
+
+sub _eq_array  {
+    my($a1, $a2) = @_;
+
+    if( grep !_type($_) eq 'ARRAY', $a1, $a2 ) {
+        warn "eq_array passed a non-array ref";
+        return 0;
+    }
+
+    return 1 if $a1 eq $a2;
+
+    my $ok = 1;
+    my $max = $#$a1 > $#$a2 ? $#$a1 : $#$a2;
+    for (0..$max) {
+        my $e1 = $_ > $#$a1 ? $DNE : $a1->[$_];
+        my $e2 = $_ > $#$a2 ? $DNE : $a2->[$_];
+
+        push @Data_Stack, { type => 'ARRAY', idx => $_, vals => [$e1, $e2] };
+        $ok = _deep_check($e1,$e2);
+        pop @Data_Stack if $ok;
+
+        last unless $ok;
+    }
+
+    return $ok;
+}
+
+sub _deep_check {
+    my($e1, $e2) = @_;
+    my $tb = Test::More->builder;
+
+    my $ok = 0;
+
+    # Effectively turn %Refs_Seen into a stack.  This avoids picking up
+    # the same referenced used twice (such as [\$a, \$a]) to be considered
+    # circular.
+    local %Refs_Seen = %Refs_Seen;
+
+    {
+        # Quiet uninitialized value warnings when comparing undefs.
+        local $^W = 0; 
+
+        $tb->_unoverload_str(\$e1, \$e2);
+
+        # Either they're both references or both not.
+        my $same_ref = !(!ref $e1 xor !ref $e2);
+	my $not_ref  = (!ref $e1 and !ref $e2);
+
+        if( defined $e1 xor defined $e2 ) {
+            $ok = 0;
+        }
+        elsif ( $e1 == $DNE xor $e2 == $DNE ) {
+            $ok = 0;
+        }
+        elsif ( $same_ref and ($e1 eq $e2) ) {
+            $ok = 1;
+        }
+	elsif ( $not_ref ) {
+	    push @Data_Stack, { type => '', vals => [$e1, $e2] };
+	    $ok = 0;
+	}
+        else {
+            if( $Refs_Seen{$e1} ) {
+                return $Refs_Seen{$e1} eq $e2;
+            }
+            else {
+                $Refs_Seen{$e1} = "$e2";
+            }
+
+            my $type = _type($e1);
+            $type = 'DIFFERENT' unless _type($e2) eq $type;
+
+            if( $type eq 'DIFFERENT' ) {
+                push @Data_Stack, { type => $type, vals => [$e1, $e2] };
+                $ok = 0;
+            }
+            elsif( $type eq 'ARRAY' ) {
+                $ok = _eq_array($e1, $e2);
+            }
+            elsif( $type eq 'HASH' ) {
+                $ok = _eq_hash($e1, $e2);
+            }
+            elsif( $type eq 'REF' ) {
+                push @Data_Stack, { type => $type, vals => [$e1, $e2] };
+                $ok = _deep_check($$e1, $$e2);
+                pop @Data_Stack if $ok;
+            }
+            elsif( $type eq 'SCALAR' ) {
+                push @Data_Stack, { type => 'REF', vals => [$e1, $e2] };
+                $ok = _deep_check($$e1, $$e2);
+                pop @Data_Stack if $ok;
+            }
+            elsif( $type ) {
+                push @Data_Stack, { type => $type, vals => [$e1, $e2] };
+                $ok = 0;
+            }
+	    else {
+		_whoa(1, "No type in _deep_check");
+	    }
+        }
+    }
+
+    return $ok;
+}
+
+
+sub _whoa {
+    my($check, $desc) = @_;
+    if( $check ) {
+        die <<WHOA;
+WHOA!  $desc
+This should never happen!  Please contact the author immediately!
+WHOA
+    }
+}
+
+
+=item B<eq_hash>
+
+  my $is_eq = eq_hash(\%this, \%that);
+
+Determines if the two hashes contain the same keys and values.  This
+is a deep check.
+
+=cut
+
+sub eq_hash {
+    local @Data_Stack;
+    return _deep_check(@_);
+}
+
+sub _eq_hash {
+    my($a1, $a2) = @_;
+
+    if( grep !_type($_) eq 'HASH', $a1, $a2 ) {
+        warn "eq_hash passed a non-hash ref";
+        return 0;
+    }
+
+    return 1 if $a1 eq $a2;
+
+    my $ok = 1;
+    my $bigger = keys %$a1 > keys %$a2 ? $a1 : $a2;
+    foreach my $k (keys %$bigger) {
+        my $e1 = exists $a1->{$k} ? $a1->{$k} : $DNE;
+        my $e2 = exists $a2->{$k} ? $a2->{$k} : $DNE;
+
+        push @Data_Stack, { type => 'HASH', idx => $k, vals => [$e1, $e2] };
+        $ok = _deep_check($e1, $e2);
+        pop @Data_Stack if $ok;
+
+        last unless $ok;
+    }
+
+    return $ok;
+}
+
+=item B<eq_set>
+
+  my $is_eq = eq_set(\@this, \@that);
+
+Similar to eq_array(), except the order of the elements is B<not>
+important.  This is a deep check, but the irrelevancy of order only
+applies to the top level.
+
+    ok( eq_set(\@this, \@that) );
+
+Is better written:
+
+    is_deeply( [sort @this], [sort @that] );
+
+B<NOTE> By historical accident, this is not a true set comparison.
+While the order of elements does not matter, duplicate elements do.
+
+B<NOTE> eq_set() does not know how to deal with references at the top
+level.  The following is an example of a comparison which might not work:
+
+    eq_set([\1, \2], [\2, \1]);
+
+Test::Deep contains much better set comparison functions.
+
+=cut
+
+sub eq_set  {
+    my($a1, $a2) = @_;
+    return 0 unless @$a1 == @$a2;
+
+    # There's faster ways to do this, but this is easiest.
+    local $^W = 0;
+
+    # It really doesn't matter how we sort them, as long as both arrays are 
+    # sorted with the same algorithm.
+    #
+    # Ensure that references are not accidentally treated the same as a
+    # string containing the reference.
+    #
+    # Have to inline the sort routine due to a threading/sort bug.
+    # See [rt.cpan.org 6782]
+    #
+    # I don't know how references would be sorted so we just don't sort
+    # them.  This means eq_set doesn't really work with refs.
+    return eq_array(
+           [grep(ref, @$a1), sort( grep(!ref, @$a1) )],
+           [grep(ref, @$a2), sort( grep(!ref, @$a2) )],
+    );
+}
+
+=back
+
+
+=head2 Extending and Embedding Test::More
+
+Sometimes the Test::More interface isn't quite enough.  Fortunately,
+Test::More is built on top of Test::Builder which provides a single,
+unified backend for any test library to use.  This means two test
+libraries which both use Test::Builder B<can be used together in the
+same program>.
+
+If you simply want to do a little tweaking of how the tests behave,
+you can access the underlying Test::Builder object like so:
+
+=over 4
+
+=item B<builder>
+
+    my $test_builder = Test::More->builder;
+
+Returns the Test::Builder object underlying Test::More for you to play
+with.
+
+
+=back
+
+
+=head1 EXIT CODES
+
+If all your tests passed, Test::Builder will exit with zero (which is
+normal).  If anything failed it will exit with how many failed.  If
+you run less (or more) tests than you planned, the missing (or extras)
+will be considered failures.  If no tests were ever run Test::Builder
+will throw a warning and exit with 255.  If the test died, even after
+having successfully completed all its tests, it will still be
+considered a failure and will exit with 255.
+
+So the exit codes are...
+
+    0                   all tests successful
+    255                 test died or all passed but wrong # of tests run
+    any other number    how many failed (including missing or extras)
+
+If you fail more than 254 tests, it will be reported as 254.
+
+B<NOTE>  This behavior may go away in future versions.
+
+
+=head1 CAVEATS and NOTES
+
+=over 4
+
+=item Backwards compatibility
+
+Test::More works with Perls as old as 5.004_05.
+
+
+=item Overloaded objects
+
+String overloaded objects are compared B<as strings> (or in cmp_ok()'s
+case, strings or numbers as appropriate to the comparison op).  This
+prevents Test::More from piercing an object's interface allowing
+better blackbox testing.  So if a function starts returning overloaded
+objects instead of bare strings your tests won't notice the
+difference.  This is good.
+
+However, it does mean that functions like is_deeply() cannot be used to
+test the internals of string overloaded objects.  In this case I would
+suggest Test::Deep which contains more flexible testing functions for
+complex data structures.
+
+
+=item Threads
+
+Test::More will only be aware of threads if "use threads" has been done
+I<before> Test::More is loaded.  This is ok:
+
+    use threads;
+    use Test::More;
+
+This may cause problems:
+
+    use Test::More
+    use threads;
+
+
+=item Test::Harness upgrade
+
+no_plan and todo depend on new Test::Harness features and fixes.  If
+you're going to distribute tests that use no_plan or todo your
+end-users will have to upgrade Test::Harness to the latest one on
+CPAN.  If you avoid no_plan and TODO tests, the stock Test::Harness
+will work fine.
+
+Installing Test::More should also upgrade Test::Harness.
+
+=back
+
+
+=head1 HISTORY
+
+This is a case of convergent evolution with Joshua Pritikin's Test
+module.  I was largely unaware of its existence when I'd first
+written my own ok() routines.  This module exists because I can't
+figure out how to easily wedge test names into Test's interface (along
+with a few other problems).
+
+The goal here is to have a testing utility that's simple to learn,
+quick to use and difficult to trip yourself up with while still
+providing more flexibility than the existing Test.pm.  As such, the
+names of the most common routines are kept tiny, special cases and
+magic side-effects are kept to a minimum.  WYSIWYG.
+
+
+=head1 SEE ALSO
+
+L<Test::Simple> if all this confuses you and you just want to write
+some tests.  You can upgrade to Test::More later (it's forward
+compatible).
+
+L<Test> is the old testing module.  Its main benefit is that it has
+been distributed with Perl since 5.004_05.
+
+L<Test::Harness> for details on how your test results are interpreted
+by Perl.
+
+L<Test::Differences> for more ways to test complex data structures.
+And it plays well with Test::More.
+
+L<Test::Class> is like XUnit but more perlish.
+
+L<Test::Deep> gives you more powerful complex data structure testing.
+
+L<Test::Unit> is XUnit style testing.
+
+L<Test::Inline> shows the idea of embedded testing.
+
+L<Bundle::Test> installs a whole bunch of useful test modules.
+
+
+=head1 AUTHORS
+
+Michael G Schwern E<lt>schwern at pobox.comE<gt> with much inspiration
+from Joshua Pritikin's Test module and lots of help from Barrie
+Slaymaker, Tony Bowden, blackstar.co.uk, chromatic, Fergal Daly and
+the perl-qa gang.
+
+
+=head1 BUGS
+
+See F<http://rt.cpan.org> to report and view bugs.
+
+
+=head1 COPYRIGHT
+
+Copyright 2001, 2002, 2004 by Michael G Schwern E<lt>schwern at pobox.comE<gt>.
+
+This program is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+See F<http://www.perl.com/perl/misc/Artistic.html>
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Simple.pm
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Simple.pm	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Simple.pm	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,229 @@
+package Test::Simple;
+
+use 5.004;
+
+use strict 'vars';
+use vars qw($VERSION @EXPORT);
+$VERSION = '0.64';
+$VERSION = eval $VERSION;    # make the alpha version come out as a number
+
+use base qw(Test::Builder::Module);
+ at EXPORT = qw(ok);
+
+my $CLASS = __PACKAGE__;
+
+
+=head1 NAME
+
+Test::Simple - Basic utilities for writing tests.
+
+=head1 SYNOPSIS
+
+  use Test::Simple tests => 1;
+
+  ok( $foo eq $bar, 'foo is bar' );
+
+
+=head1 DESCRIPTION
+
+** If you are unfamiliar with testing B<read Test::Tutorial> first! **
+
+This is an extremely simple, extremely basic module for writing tests
+suitable for CPAN modules and other pursuits.  If you wish to do more
+complicated testing, use the Test::More module (a drop-in replacement
+for this one).
+
+The basic unit of Perl testing is the ok.  For each thing you want to
+test your program will print out an "ok" or "not ok" to indicate pass
+or fail.  You do this with the ok() function (see below).
+
+The only other constraint is you must pre-declare how many tests you
+plan to run.  This is in case something goes horribly wrong during the
+test and your test program aborts, or skips a test or whatever.  You
+do this like so:
+
+    use Test::Simple tests => 23;
+
+You must have a plan.
+
+
+=over 4
+
+=item B<ok>
+
+  ok( $foo eq $bar, $name );
+  ok( $foo eq $bar );
+
+ok() is given an expression (in this case C<$foo eq $bar>).  If it's
+true, the test passed.  If it's false, it didn't.  That's about it.
+
+ok() prints out either "ok" or "not ok" along with a test number (it
+keeps track of that for you).
+
+  # This produces "ok 1 - Hell not yet frozen over" (or not ok)
+  ok( get_temperature($hell) > 0, 'Hell not yet frozen over' );
+
+If you provide a $name, that will be printed along with the "ok/not
+ok" to make it easier to find your test when if fails (just search for
+the name).  It also makes it easier for the next guy to understand
+what your test is for.  It's highly recommended you use test names.
+
+All tests are run in scalar context.  So this:
+
+    ok( @stuff, 'I have some stuff' );
+
+will do what you mean (fail if stuff is empty)
+
+=cut
+
+sub ok ($;$) {
+    $CLASS->builder->ok(@_);
+}
+
+
+=back
+
+Test::Simple will start by printing number of tests run in the form
+"1..M" (so "1..5" means you're going to run 5 tests).  This strange
+format lets Test::Harness know how many tests you plan on running in
+case something goes horribly wrong.
+
+If all your tests passed, Test::Simple will exit with zero (which is
+normal).  If anything failed it will exit with how many failed.  If
+you run less (or more) tests than you planned, the missing (or extras)
+will be considered failures.  If no tests were ever run Test::Simple
+will throw a warning and exit with 255.  If the test died, even after
+having successfully completed all its tests, it will still be
+considered a failure and will exit with 255.
+
+So the exit codes are...
+
+    0                   all tests successful
+    255                 test died or all passed but wrong # of tests run
+    any other number    how many failed (including missing or extras)
+
+If you fail more than 254 tests, it will be reported as 254.
+
+This module is by no means trying to be a complete testing system.
+It's just to get you started.  Once you're off the ground its
+recommended you look at L<Test::More>.
+
+
+=head1 EXAMPLE
+
+Here's an example of a simple .t file for the fictional Film module.
+
+    use Test::Simple tests => 5;
+
+    use Film;  # What you're testing.
+
+    my $btaste = Film->new({ Title    => 'Bad Taste',
+                             Director => 'Peter Jackson',
+                             Rating   => 'R',
+                             NumExplodingSheep => 1
+                           });
+    ok( defined($btaste) && ref $btaste eq 'Film,     'new() works' );
+
+    ok( $btaste->Title      eq 'Bad Taste',     'Title() get'    );
+    ok( $btaste->Director   eq 'Peter Jackson', 'Director() get' );
+    ok( $btaste->Rating     eq 'R',             'Rating() get'   );
+    ok( $btaste->NumExplodingSheep == 1,        'NumExplodingSheep() get' );
+
+It will produce output like this:
+
+    1..5
+    ok 1 - new() works
+    ok 2 - Title() get
+    ok 3 - Director() get
+    not ok 4 - Rating() get
+    #   Failed test 'Rating() get'
+    #   in t/film.t at line 14.
+    ok 5 - NumExplodingSheep() get
+    # Looks like you failed 1 tests of 5
+
+Indicating the Film::Rating() method is broken.
+
+
+=head1 CAVEATS
+
+Test::Simple will only report a maximum of 254 failures in its exit
+code.  If this is a problem, you probably have a huge test script.
+Split it into multiple files.  (Otherwise blame the Unix folks for
+using an unsigned short integer as the exit status).
+
+Because VMS's exit codes are much, much different than the rest of the
+universe, and perl does horrible mangling to them that gets in my way,
+it works like this on VMS.
+
+    0     SS$_NORMAL        all tests successful
+    4     SS$_ABORT         something went wrong
+
+Unfortunately, I can't differentiate any further.
+
+
+=head1 NOTES
+
+Test::Simple is B<explicitly> tested all the way back to perl 5.004.
+
+Test::Simple is thread-safe in perl 5.8.0 and up.
+
+=head1 HISTORY
+
+This module was conceived while talking with Tony Bowden in his
+kitchen one night about the problems I was having writing some really
+complicated feature into the new Testing module.  He observed that the
+main problem is not dealing with these edge cases but that people hate
+to write tests B<at all>.  What was needed was a dead simple module
+that took all the hard work out of testing and was really, really easy
+to learn.  Paul Johnson simultaneously had this idea (unfortunately,
+he wasn't in Tony's kitchen).  This is it.
+
+
+=head1 SEE ALSO
+
+=over 4
+
+=item L<Test::More>
+
+More testing functions!  Once you outgrow Test::Simple, look at
+Test::More.  Test::Simple is 100% forward compatible with Test::More
+(i.e. you can just use Test::More instead of Test::Simple in your
+programs and things will still work).
+
+=item L<Test>
+
+The original Perl testing module.
+
+=item L<Test::Unit>
+
+Elaborate unit testing.
+
+=item L<Test::Inline>, L<SelfTest>
+
+Embed tests in your code!
+
+=item L<Test::Harness>
+
+Interprets the output of your test program.
+
+=back
+
+
+=head1 AUTHORS
+
+Idea by Tony Bowden and Paul Johnson, code by Michael G Schwern
+E<lt>schwern at pobox.comE<gt>, wardrobe by Calvin Klein.
+
+
+=head1 COPYRIGHT
+
+Copyright 2001, 2002, 2004 by Michael G Schwern E<lt>schwern at pobox.comE<gt>.
+
+This program is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+See F<http://www.perl.com/perl/misc/Artistic.html>
+
+=cut
+
+1;

Added: trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Tutorial.pod
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Tutorial.pod	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lib/Test/Tutorial.pod	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,603 @@
+=head1 NAME
+
+Test::Tutorial - A tutorial about writing really basic tests
+
+=head1 DESCRIPTION
+
+
+I<AHHHHHHH!!!!  NOT TESTING!  Anything but testing!  
+Beat me, whip me, send me to Detroit, but don't make 
+me write tests!>
+
+I<*sob*>
+
+I<Besides, I don't know how to write the damned things.>
+
+
+Is this you?  Is writing tests right up there with writing
+documentation and having your fingernails pulled out?  Did you open up
+a test and read 
+
+    ######## We start with some black magic
+
+and decide that's quite enough for you?
+
+It's ok.  That's all gone now.  We've done all the black magic for
+you.  And here are the tricks...
+
+
+=head2 Nuts and bolts of testing.
+
+Here's the most basic test program.
+
+    #!/usr/bin/perl -w
+
+    print "1..1\n";
+
+    print 1 + 1 == 2 ? "ok 1\n" : "not ok 1\n";
+
+since 1 + 1 is 2, it prints:
+
+    1..1
+    ok 1
+
+What this says is: C<1..1> "I'm going to run one test." [1] C<ok 1>
+"The first test passed".  And that's about all magic there is to
+testing.  Your basic unit of testing is the I<ok>.  For each thing you
+test, an C<ok> is printed.  Simple.  B<Test::Harness> interprets your test
+results to determine if you succeeded or failed (more on that later).
+
+Writing all these print statements rapidly gets tedious.  Fortunately,
+there's B<Test::Simple>.  It has one function, C<ok()>.
+
+    #!/usr/bin/perl -w
+
+    use Test::Simple tests => 1;
+
+    ok( 1 + 1 == 2 );
+
+and that does the same thing as the code above.  C<ok()> is the backbone
+of Perl testing, and we'll be using it instead of roll-your-own from
+here on.  If C<ok()> gets a true value, the test passes.  False, it
+fails.
+
+    #!/usr/bin/perl -w
+
+    use Test::Simple tests => 2;
+    ok( 1 + 1 == 2 );
+    ok( 2 + 2 == 5 );
+
+from that comes
+
+    1..2
+    ok 1
+    not ok 2
+    #     Failed test (test.pl at line 5)
+    # Looks like you failed 1 tests of 2.
+
+C<1..2> "I'm going to run two tests."  This number is used to ensure
+your test program ran all the way through and didn't die or skip some
+tests.  C<ok 1> "The first test passed."  C<not ok 2> "The second test
+failed".  Test::Simple helpfully prints out some extra commentary about
+your tests.
+
+It's not scary.  Come, hold my hand.  We're going to give an example
+of testing a module.  For our example, we'll be testing a date
+library, B<Date::ICal>.  It's on CPAN, so download a copy and follow
+along. [2]
+
+
+=head2 Where to start?
+
+This is the hardest part of testing, where do you start?  People often
+get overwhelmed at the apparent enormity of the task of testing a
+whole module.  Best place to start is at the beginning.  Date::ICal is
+an object-oriented module, and that means you start by making an
+object.  So we test C<new()>.
+
+    #!/usr/bin/perl -w
+
+    use Test::Simple tests => 2;
+
+    use Date::ICal;
+
+    my $ical = Date::ICal->new;         # create an object
+    ok( defined $ical );                # check that we got something
+    ok( $ical->isa('Date::ICal') );     # and it's the right class
+
+run that and you should get:
+
+    1..2
+    ok 1
+    ok 2
+
+congratulations, you've written your first useful test.
+
+
+=head2 Names
+
+That output isn't terribly descriptive, is it?  When you have two
+tests you can figure out which one is #2, but what if you have 102?
+
+Each test can be given a little descriptive name as the second
+argument to C<ok()>.
+
+    use Test::Simple tests => 2;
+
+    ok( defined $ical,              'new() returned something' );
+    ok( $ical->isa('Date::ICal'),   "  and it's the right class" );
+
+So now you'd see...
+
+    1..2
+    ok 1 - new() returned something
+    ok 2 -   and it's the right class
+
+
+=head2 Test the manual
+
+Simplest way to build up a decent testing suite is to just test what
+the manual says it does. [3] Let's pull something out of the 
+L<Date::ICal/SYNOPSIS> and test that all its bits work.
+
+    #!/usr/bin/perl -w
+
+    use Test::Simple tests => 8;
+
+    use Date::ICal;
+
+    $ical = Date::ICal->new( year => 1964, month => 10, day => 16, 
+                             hour => 16, min => 12, sec => 47, 
+                             tz => '0530' );
+
+    ok( defined $ical,            'new() returned something' );
+    ok( $ical->isa('Date::ICal'), "  and it's the right class" );
+    ok( $ical->sec   == 47,       '  sec()'   );
+    ok( $ical->min   == 12,       '  min()'   );    
+    ok( $ical->hour  == 16,       '  hour()'  );
+    ok( $ical->day   == 17,       '  day()'   );
+    ok( $ical->month == 10,       '  month()' );
+    ok( $ical->year  == 1964,     '  year()'  );
+
+run that and you get:
+
+    1..8
+    ok 1 - new() returned something
+    ok 2 -   and it's the right class
+    ok 3 -   sec()
+    ok 4 -   min()
+    ok 5 -   hour()
+    not ok 6 -   day()
+    #     Failed test (- at line 16)
+    ok 7 -   month()
+    ok 8 -   year()
+    # Looks like you failed 1 tests of 8.
+
+Whoops, a failure! [4] Test::Simple helpfully lets us know on what line
+the failure occurred, but not much else.  We were supposed to get 17,
+but we didn't.  What did we get??  Dunno.  We'll have to re-run the
+test in the debugger or throw in some print statements to find out.
+
+Instead, we'll switch from B<Test::Simple> to B<Test::More>.  B<Test::More>
+does everything B<Test::Simple> does, and more!  In fact, Test::More does
+things I<exactly> the way Test::Simple does.  You can literally swap
+Test::Simple out and put Test::More in its place.  That's just what
+we're going to do.
+
+Test::More does more than Test::Simple.  The most important difference
+at this point is it provides more informative ways to say "ok".
+Although you can write almost any test with a generic C<ok()>, it
+can't tell you what went wrong.  Instead, we'll use the C<is()>
+function, which lets us declare that something is supposed to be the
+same as something else:
+
+    #!/usr/bin/perl -w
+
+    use Test::More tests => 8;
+
+    use Date::ICal;
+
+    $ical = Date::ICal->new( year => 1964, month => 10, day => 16, 
+                             hour => 16, min => 12, sec => 47, 
+                             tz => '0530' );
+
+    ok( defined $ical,            'new() returned something' );
+    ok( $ical->isa('Date::ICal'), "  and it's the right class" );
+    is( $ical->sec,     47,       '  sec()'   );
+    is( $ical->min,     12,       '  min()'   );    
+    is( $ical->hour,    16,       '  hour()'  );
+    is( $ical->day,     17,       '  day()'   );
+    is( $ical->month,   10,       '  month()' );
+    is( $ical->year,    1964,     '  year()'  );
+
+"Is C<$ical-E<gt>sec> 47?"  "Is C<$ical-E<gt>min> 12?"  With C<is()> in place,
+you get some more information
+
+    1..8
+    ok 1 - new() returned something
+    ok 2 -   and it's the right class
+    ok 3 -   sec()
+    ok 4 -   min()
+    ok 5 -   hour()
+    not ok 6 -   day()
+    #     Failed test (- at line 16)
+    #          got: '16'
+    #     expected: '17'
+    ok 7 -   month()
+    ok 8 -   year()
+    # Looks like you failed 1 tests of 8.
+
+letting us know that C<$ical-E<gt>day> returned 16, but we expected 17.  A
+quick check shows that the code is working fine, we made a mistake
+when writing up the tests.  Just change it to:
+
+    is( $ical->day,     16,       '  day()'   );
+
+and everything works.
+
+So any time you're doing a "this equals that" sort of test, use C<is()>.
+It even works on arrays.  The test is always in scalar context, so you
+can test how many elements are in a list this way. [5]
+
+    is( @foo, 5, 'foo has 5 elements' );
+
+
+=head2 Sometimes the tests are wrong
+
+Which brings us to a very important lesson.  Code has bugs.  Tests are
+code.  Ergo, tests have bugs.  A failing test could mean a bug in the
+code, but don't discount the possibility that the test is wrong.
+
+On the flip side, don't be tempted to prematurely declare a test
+incorrect just because you're having trouble finding the bug.
+Invalidating a test isn't something to be taken lightly, and don't use
+it as a cop out to avoid work.
+
+
+=head2 Testing lots of values
+
+We're going to be wanting to test a lot of dates here, trying to trick
+the code with lots of different edge cases.  Does it work before 1970?
+After 2038?  Before 1904?  Do years after 10,000 give it trouble?
+Does it get leap years right?  We could keep repeating the code above,
+or we could set up a little try/expect loop.
+
+    use Test::More tests => 32;
+    use Date::ICal;
+
+    my %ICal_Dates = (
+            # An ICal string     And the year, month, date
+            #                    hour, minute and second we expect.
+            '19971024T120000' =>    # from the docs.
+                                [ 1997, 10, 24, 12,  0,  0 ],
+            '20390123T232832' =>    # after the Unix epoch
+                                [ 2039,  1, 23, 23, 28, 32 ],
+            '19671225T000000' =>    # before the Unix epoch
+                                [ 1967, 12, 25,  0,  0,  0 ],
+            '18990505T232323' =>    # before the MacOS epoch
+                                [ 1899,  5,  5, 23, 23, 23 ],
+    );
+
+
+    while( my($ical_str, $expect) = each %ICal_Dates ) {
+        my $ical = Date::ICal->new( ical => $ical_str );
+
+        ok( defined $ical,            "new(ical => '$ical_str')" );
+        ok( $ical->isa('Date::ICal'), "  and it's the right class" );
+
+        is( $ical->year,    $expect->[0],     '  year()'  );
+        is( $ical->month,   $expect->[1],     '  month()' );
+        is( $ical->day,     $expect->[2],     '  day()'   );
+        is( $ical->hour,    $expect->[3],     '  hour()'  );
+        is( $ical->min,     $expect->[4],     '  min()'   );    
+        is( $ical->sec,     $expect->[5],     '  sec()'   );
+    }
+
+So now we can test bunches of dates by just adding them to
+C<%ICal_Dates>.  Now that it's less work to test with more dates, you'll
+be inclined to just throw more in as you think of them.
+Only problem is, every time we add to that we have to keep adjusting
+the C<use Test::More tests =E<gt> ##> line.  That can rapidly get
+annoying.  There's two ways to make this work better.
+
+First, we can calculate the plan dynamically using the C<plan()>
+function.
+
+    use Test::More;
+    use Date::ICal;
+
+    my %ICal_Dates = (
+        ...same as before...
+    );
+
+    # For each key in the hash we're running 8 tests.
+    plan tests => keys %ICal_Dates * 8;
+
+Or to be even more flexible, we use C<no_plan>.  This means we're just
+running some tests, don't know how many. [6]
+
+    use Test::More 'no_plan';   # instead of tests => 32
+
+now we can just add tests and not have to do all sorts of math to
+figure out how many we're running.
+
+
+=head2 Informative names
+
+Take a look at this line here
+
+    ok( defined $ical,            "new(ical => '$ical_str')" );
+
+we've added more detail about what we're testing and the ICal string
+itself we're trying out to the name.  So you get results like:
+
+    ok 25 - new(ical => '19971024T120000')
+    ok 26 -   and it's the right class
+    ok 27 -   year()
+    ok 28 -   month()
+    ok 29 -   day()
+    ok 30 -   hour()
+    ok 31 -   min()
+    ok 32 -   sec()
+
+if something in there fails, you'll know which one it was and that
+will make tracking down the problem easier.  So try to put a bit of
+debugging information into the test names.
+
+Describe what the tests test, to make debugging a failed test easier
+for you or for the next person who runs your test.
+
+
+=head2 Skipping tests
+
+Poking around in the existing Date::ICal tests, I found this in
+F<t/01sanity.t> [7]
+
+    #!/usr/bin/perl -w
+
+    use Test::More tests => 7;
+    use Date::ICal;
+
+    # Make sure epoch time is being handled sanely.
+    my $t1 = Date::ICal->new( epoch => 0 );
+    is( $t1->epoch, 0,          "Epoch time of 0" );
+
+    # XXX This will only work on unix systems.
+    is( $t1->ical, '19700101Z', "  epoch to ical" );
+
+    is( $t1->year,  1970,       "  year()"  );
+    is( $t1->month, 1,          "  month()" );
+    is( $t1->day,   1,          "  day()"   );
+
+    # like the tests above, but starting with ical instead of epoch
+    my $t2 = Date::ICal->new( ical => '19700101Z' );
+    is( $t2->ical, '19700101Z', "Start of epoch in ICal notation" );
+
+    is( $t2->epoch, 0,          "  and back to ICal" );
+
+The beginning of the epoch is different on most non-Unix operating
+systems [8].  Even though Perl smooths out the differences for the most
+part, certain ports do it differently.  MacPerl is one off the top of
+my head. [9] We I<know> this will never work on MacOS.  So rather than
+just putting a comment in the test, we can explicitly say it's never
+going to work and skip the test.
+
+    use Test::More tests => 7;
+    use Date::ICal;
+
+    # Make sure epoch time is being handled sanely.
+    my $t1 = Date::ICal->new( epoch => 0 );
+    is( $t1->epoch, 0,          "Epoch time of 0" );
+
+    SKIP: {
+        skip('epoch to ICal not working on MacOS', 6) 
+            if $^O eq 'MacOS';
+
+        is( $t1->ical, '19700101Z', "  epoch to ical" );
+
+        is( $t1->year,  1970,       "  year()"  );
+        is( $t1->month, 1,          "  month()" );
+        is( $t1->day,   1,          "  day()"   );
+
+        # like the tests above, but starting with ical instead of epoch
+        my $t2 = Date::ICal->new( ical => '19700101Z' );
+        is( $t2->ical, '19700101Z', "Start of epoch in ICal notation" );
+
+        is( $t2->epoch, 0,          "  and back to ICal" );
+    }
+
+A little bit of magic happens here.  When running on anything but
+MacOS, all the tests run normally.  But when on MacOS, C<skip()> causes
+the entire contents of the SKIP block to be jumped over.  It's never
+run.  Instead, it prints special output that tells Test::Harness that
+the tests have been skipped.
+
+    1..7
+    ok 1 - Epoch time of 0
+    ok 2 # skip epoch to ICal not working on MacOS
+    ok 3 # skip epoch to ICal not working on MacOS
+    ok 4 # skip epoch to ICal not working on MacOS
+    ok 5 # skip epoch to ICal not working on MacOS
+    ok 6 # skip epoch to ICal not working on MacOS
+    ok 7 # skip epoch to ICal not working on MacOS
+
+This means your tests won't fail on MacOS.  This means less emails
+from MacPerl users telling you about failing tests that you know will
+never work.  You've got to be careful with skip tests.  These are for
+tests which don't work and I<never will>.  It is not for skipping
+genuine bugs (we'll get to that in a moment).
+
+The tests are wholly and completely skipped. [10]  This will work.
+
+    SKIP: {
+        skip("I don't wanna die!");
+
+        die, die, die, die, die;
+    }
+
+
+=head2 Todo tests
+
+Thumbing through the Date::ICal man page, I came across this:
+
+   ical
+
+       $ical_string = $ical->ical;
+
+   Retrieves, or sets, the date on the object, using any
+   valid ICal date/time string.
+
+"Retrieves or sets".  Hmmm, didn't see a test for using C<ical()> to set
+the date in the Date::ICal test suite.  So I'll write one.
+
+    use Test::More tests => 1;
+    use Date::ICal;
+
+    my $ical = Date::ICal->new;
+    $ical->ical('20201231Z');
+    is( $ical->ical, '20201231Z',   'Setting via ical()' );
+
+run that and I get
+
+    1..1
+    not ok 1 - Setting via ical()
+    #     Failed test (- at line 6)
+    #          got: '20010814T233649Z'
+    #     expected: '20201231Z'
+    # Looks like you failed 1 tests of 1.
+
+Whoops!  Looks like it's unimplemented.  Let's assume we don't have
+the time to fix this. [11] Normally, you'd just comment out the test
+and put a note in a todo list somewhere.  Instead, we're going to
+explicitly state "this test will fail" by wrapping it in a C<TODO> block.
+
+    use Test::More tests => 1;
+
+    TODO: {
+        local $TODO = 'ical($ical) not yet implemented';
+
+        my $ical = Date::ICal->new;
+        $ical->ical('20201231Z');
+
+        is( $ical->ical, '20201231Z',   'Setting via ical()' );
+    }
+
+Now when you run, it's a little different:
+
+    1..1
+    not ok 1 - Setting via ical() # TODO ical($ical) not yet implemented
+    #          got: '20010822T201551Z'
+    #     expected: '20201231Z'
+
+Test::More doesn't say "Looks like you failed 1 tests of 1".  That '#
+TODO' tells Test::Harness "this is supposed to fail" and it treats a
+failure as a successful test.  So you can write tests even before
+you've fixed the underlying code.
+
+If a TODO test passes, Test::Harness will report it "UNEXPECTEDLY
+SUCCEEDED".  When that happens, you simply remove the TODO block with
+C<local $TODO> and turn it into a real test.
+
+
+=head2 Testing with taint mode.
+
+Taint mode is a funny thing.  It's the globalest of all global
+features.  Once you turn it on, it affects I<all> code in your program
+and I<all> modules used (and all the modules they use).  If a single
+piece of code isn't taint clean, the whole thing explodes.  With that
+in mind, it's very important to ensure your module works under taint
+mode.
+
+It's very simple to have your tests run under taint mode.  Just throw
+a C<-T> into the C<#!> line.  Test::Harness will read the switches
+in C<#!> and use them to run your tests.
+
+    #!/usr/bin/perl -Tw
+
+    ...test normally here...
+
+So when you say C<make test> it will be run with taint mode and
+warnings on.
+
+
+=head1 FOOTNOTES
+
+=over 4
+
+=item 1
+
+The first number doesn't really mean anything, but it has to be 1.
+It's the second number that's important.
+
+=item 2
+
+For those following along at home, I'm using version 1.31.  It has
+some bugs, which is good -- we'll uncover them with our tests.
+
+=item 3
+
+You can actually take this one step further and test the manual
+itself.  Have a look at B<Test::Inline> (formerly B<Pod::Tests>).
+
+=item 4
+
+Yes, there's a mistake in the test suite.  What!  Me, contrived?
+
+=item 5
+
+We'll get to testing the contents of lists later.
+
+=item 6
+
+But what happens if your test program dies halfway through?!  Since we
+didn't say how many tests we're going to run, how can we know it
+failed?  No problem, Test::More employs some magic to catch that death
+and turn the test into a failure, even if every test passed up to that
+point.
+
+=item 7
+
+I cleaned it up a little.
+
+=item 8
+
+Most Operating Systems record time as the number of seconds since a
+certain date.  This date is the beginning of the epoch.  Unix's starts
+at midnight January 1st, 1970 GMT.
+
+=item 9
+
+MacOS's epoch is midnight January 1st, 1904.  VMS's is midnight,
+November 17th, 1858, but vmsperl emulates the Unix epoch so it's not a
+problem.
+
+=item 10
+
+As long as the code inside the SKIP block at least compiles.  Please
+don't ask how.  No, it's not a filter.
+
+=item 11
+
+Do NOT be tempted to use TODO tests as a way to avoid fixing simple
+bugs!
+
+=back
+
+=head1 AUTHORS
+
+Michael G Schwern E<lt>schwern at pobox.comE<gt> and the perl-qa dancers!
+
+=head1 COPYRIGHT
+
+Copyright 2001 by Michael G Schwern E<lt>schwern at pobox.comE<gt>.
+
+This documentation is free; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+Irrespective of its distribution, all code examples in these files
+are hereby placed into the public domain.  You are permitted and
+encouraged to use this code in your own programs for fun
+or for profit as you see fit.  A simple comment in the code giving
+credit would be courteous but is not required.
+
+=cut

Added: trunk/packages/bioperl/branches/upstream/current/t/lucy.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/lucy.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/lucy.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,55 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: lucy.t,v 1.2 2002/03/07 04:08:04 jason Exp $
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    plan tests => 22;
+}
+
+
+use Bio::Tools::Lucy;
+use Bio::Root::IO;
+
+ok(1);
+
+my @params = (adv_stderr => 1, seqfile => Bio::Root::IO->catfile("t","data","lucy.seq"), rev_desig => 'R'); 
+# Bio::Tools::Lucy will find .qual, .info, and .stderr files in this folder 
+
+my $lucyObj = Bio::Tools::Lucy->new(@params);
+ok $lucyObj->isa('Bio::Tools::Lucy');
+ok $lucyObj->seqfile();
+$lucyObj->adv_stderr(1);
+my $stderr = $lucyObj->adv_stderr();
+ok $stderr;
+my $names =$lucyObj->get_sequence_names();
+ok $names;
+my $seq = shift @$names;
+ok $seq, 'TaLr1010B10R';
+ok $lucyObj->length_raw("$seq"), 1060;
+ok $lucyObj->length_clear("$seq"), 420;
+ok $lucyObj->start_clear("$seq"), 86;
+ok $lucyObj->end_clear("$seq"), 505;
+ok $lucyObj->avg_quality("$seq");
+ok $lucyObj->full_length("$seq");
+ok $lucyObj->polyA("$seq");
+ok $lucyObj->direction("$seq"), 'R';
+ok $lucyObj->per_GC("$seq");
+ok $lucyObj->sequence("$seq");
+ok $lucyObj->quality("$seq");
+my $seqObj = $lucyObj->get_Seq_Obj("$seq");
+ok $seqObj;
+my $seqObjs = $lucyObj->get_Seq_Objs();
+ok $seqObjs;
+
+my $rejects = $lucyObj->get_rejects();
+ok $rejects;
+my ($key) = (sort keys %$rejects);
+ok $key, 'TaLr1011A07R';
+ok $rejects->{$key}, 'Q'; 


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/lucy.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/masta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/masta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/masta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+# -*-Perl-*-
+#Some simple tests for meme and transfac parsers
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 16;
+}
+
+use Bio::Matrix::PSM::IO;
+use Bio::Root::IO;
+
+ok(1);
+END {
+ unlink(Bio::Root::IO->catfile(qw(t data masta_w.dat)));
+}
+#Let's try masta formats here
+my $mio =  new Bio::Matrix::PSM::IO(-format=>'masta', 
+				      -file=>Bio::Root::IO->catfile(qw(t data masta.dat)));
+my $wmio=new Bio::Matrix::PSM::IO(-format=>'masta', 
+				      -file=>">".Bio::Root::IO->catfile(qw(t data masta_w.dat)));
+$wmio->_flush_on_write(1);
+ok $mio;
+ok $wmio;
+my @cons;
+my $carry;
+while (my $site=$mio->next_matrix) {
+	ok $site;
+	push @cons,$site->consensus;	
+	$carry=$site if ($site->id eq 'm1logs');
+} 
+ok $cons[0],'CAGAAAAATNGAATNCCCACCCCCC';
+ok $cons[1],'CAGAAAAATAGAATCCCCACCCCCC';
+ok $cons[2],'CAGAAAAATNNAATNCCCACCNCCC';
+
+$wmio->write_psm($carry,'PWM');
+$carry->id('m1freq');
+$wmio->write_psm($carry,'PFM');
+$carry->id('m1seq');
+$wmio->write_psm($carry,'SEQ');
+$wmio->DESTROY;
+my $chio=new Bio::Matrix::PSM::IO(-format=>'masta', 
+				      -file=>Bio::Root::IO->catfile(qw(t data masta_w.dat)));
+ok $chio;
+my $site=$chio->next_matrix;
+ok $site->id,'m1logs';
+ok $site->consensus,'CAGAAAAATAGAATCCCCACCCCCC';	
+$site=$chio->next_matrix;
+ok $site->id,'m1freq';
+ok $site->consensus,'CAGAAAAATAGAATCCCCACCCCCC';  
+$site=$chio->next_matrix;
+ok $site->id,'m1seq';
+ok $site->consensus,'CAGAAAAATAGAATCCCCACCCCCC';


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/masta.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/metafasta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/metafasta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/metafasta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,34 @@
+# -*-Perl-*-
+# $Id: metafasta.t,v 1.1 2005/09/07 02:04:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+BEGIN {
+	$NUMTESTS = 4;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO::metafasta;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'metafasta',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 (qw(t data test.metafasta) ));
+
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "ABCDEFHIJKLMNOPQRSTUVWXYZ");
+ok($seq->display_id,'test');

Added: trunk/packages/bioperl/branches/upstream/current/t/multiple_fasta.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/multiple_fasta.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/multiple_fasta.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,25 @@
+
+
+
+use strict;
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => 8;
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $in = Bio::SeqIO->new(-file => "<".Bio::Root::IO->catfile("t","data", 
+	"multifa.seq") , '-format' => 'Fasta');
+ok $in;
+my $c=0;
+while ( my $seq = $in->next_seq() ) {
+    ok($seq);
+    $c++;
+}
+ok $c,6, " missing sequences in the file";

Added: trunk/packages/bioperl/branches/upstream/current/t/obo_parser.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/obo_parser.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/obo_parser.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,148 @@
+# -*-Perl-*-
+# $Id: obo_parser.t,v 1.1.4.1 2006/10/02 23:10:40 sendu Exp $
+
+use strict;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    eval { require 'Graph.pm' };
+    if( $@ ) {
+	    print STDERR "\nGraph.pm doesn't seem to be installed on this system -- the OBO Parser needs it...\n\n";
+	    plan tests => 1;
+	    ok( 1 );
+	    exit( 0 );
+    }
+
+    plan tests => 43;
+}
+
+
+use Bio::OntologyIO;
+use Bio::Ontology::RelationshipType;
+use Bio::Root::IO;
+
+my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+
+my $io = Bio::Root::IO->new(); # less typing from now on
+my $parser = Bio::OntologyIO->new(
+                      -format    => "obo",
+		      -file      => $io->catfile("t", "data",
+						 "so.obo"));
+
+my $ont = $parser->next_ontology();
+ok ($ont);
+ok ($ont->name(), "sequence");
+
+my @roots = $ont->get_root_terms();
+ok (scalar(@roots), 1);
+ok ($roots[0]->name(), "Sequence_Ontology");
+ok ($roots[0]->identifier(), "SO:0000000");
+
+my @terms = $ont->get_child_terms($roots[0]);
+ok (scalar(@terms), 5);
+my ($term) = grep { $_->name() eq "variation_operation"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "sequence_attribute"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "consequences_of_mutation"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "chromosome_variation"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "located_sequence_feature"; } @terms;
+ok $term;
+
+ at terms = $ont->get_child_terms($terms[0]);
+ok (scalar(@terms), 5);
+($term) = grep { $_->name() eq "translocate"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "delete"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "insert"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "substitute"; } @terms;
+ok $term;
+($term) = grep { $_->name() eq "invert"; } @terms;
+ok $term;
+
+my $featterm = $terms[0];
+ at terms = $ont->get_child_terms($featterm);
+ok (scalar(@terms), 2);
+
+# substitution has two parents, see whether this is handled
+ at terms = $ont->find_terms(-name => "substitution");
+$term =  $terms[0];
+ok ($term->name(), "substitution");
+
+# search using obo terms;
+ at terms = $ont->find_identically_named_terms($term);
+ok scalar @terms, 1;
+ at terms = $ont->find_identical_terms($term);
+ok scalar @terms, 1;
+ at terms = $ont->find_similar_terms($term);
+ok scalar @terms, 7;
+
+ at terms = $ont->get_ancestor_terms($term);
+ok (scalar(@terms), 6);
+ok (scalar(grep { $_->name() eq "region"; } @terms), 1);
+ok (scalar(grep { $_->name() eq "sequence_variant"; } @terms), 1);
+
+# processed_transcript has part-of and is-a children
+
+ at terms = $ont->find_terms(-name => "processed_transcript");;
+$term = $terms[0];
+
+ at terms = $ont->get_child_terms($term);
+ok (scalar(@terms), 5);
+ at terms = $ont->get_child_terms($term, $PART_OF);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_child_terms($term, $IS_A);
+ok (scalar(@terms), 3);
+ at terms = $ont->get_child_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 5);
+
+# TF_binding_site has 2 parents and different relationships in the two
+# paths up (although the relationships to its two parents are of the
+# same type, namely is-a)
+ at terms = $ont->find_terms(-name => "TF_binding_site");;
+$term = $terms[0];
+
+ at terms = $ont->get_parent_terms($term);
+ok (scalar(@terms), 2);
+my ($pterm) = grep { $_->name eq "regulatory_region"; } @terms;
+ok $pterm;
+ at terms = $ont->get_parent_terms($term, $PART_OF);
+ok (scalar(@terms), 0);
+ at terms = $ont->get_parent_terms($term, $IS_A);
+ok (scalar(@terms), 2);
+ at terms = $ont->get_parent_terms($term, $PART_OF, $IS_A);
+ok (scalar(@terms), 2);
+
+
+# pull out all relationships
+my @rels = $ont->get_relationships();
+my @relset = grep { $_->object_term->name eq "Sequence_Ontology"; } @rels;
+ok (scalar(@relset), 5);
+ at relset = grep { $_->subject_term->name eq "Sequence_Ontology"; } @rels;
+ok (scalar(@relset), 0);
+
+# relationships for a specific term only
+($term) = $ont->find_terms(-identifier => "SO:0000082");
+ok ($term);
+ok ($term->identifier, "SO:0000082");
+ok ($term->name, "processed_transcript_attribute");
+ at rels = $ont->get_relationships($term);
+ok (scalar(@rels), 5);
+ at relset = grep { $_->predicate_term->name eq "IS_A"; } @rels;
+ok (scalar(@relset), 5);
+ at relset = grep { $_->object_term->identifier eq "SO:0000082"; } @rels;
+ok (scalar(@relset), 4);
+ at relset = grep { $_->subject_term->identifier eq "SO:0000082"; } @rels;
+ok (scalar(@relset), 1);

Added: trunk/packages/bioperl/branches/upstream/current/t/pICalculator.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/pICalculator.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/pICalculator.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,28 @@
+# -*-Perl-*-
+# Bioperl Test Harness Script for pICalculator.pm
+
+use strict;
+BEGIN {
+    eval { require Test; };
+    if ($@) {
+       use lib 't';
+    }
+    use Test;
+    plan tests => 36;
+}
+use Bio::Seq;
+use Bio::Tools::pICalculator;
+
+my @results = (12.999052267583,12.99700393539,12.9905348815881,12.9701609055248,12.9065486239062,12.7131376670492,12.1681721433832,10.8960154975975,8.82939162036317,6.81329734996812,5.58311842185452,4.87361913724596,4.11053952923425,3.00644711484741,1.91237900622079,1.19755236429121,0.669596284738213,0.0571988207175853,-0.584285455699191,-1.14218959353989,-1.79865831607402,-2.74360327055112,-3.87361697725167,-4.91494976791445,-6.01005299841696,-7.43711791135299,-8.77859455006782,-9.53905973773058,-9.84470802408586);
+
+my $protein = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI";
+ok my $seq = Bio::Seq->new(-seq => $protein);
+ok $seq->seq eq $protein;
+ok my $pep = $seq->seq;
+ok my $calc = new Bio::Tools::pICalculator(-places => 2);
+ok $calc->seq($seq);
+ok my $iep = $calc->iep;
+for ( my $x = 0 ; $x <= 14 ; $x += .5 ) {
+   ok ($calc->charge_at_pH($x),$results[(2 * $x)]);
+}
+ok ($calc->iep,8.54);


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/pICalculator.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/phd.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/phd.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/phd.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,98 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: phd.t,v 1.8.4.2 2006/11/08 17:25:55 sendu Exp $
+#
+
+
+use strict;
+use vars qw($DEBUG);
+use Bio::Root::IO;
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+        use lib 't/lib';
+    }
+    use Test::More;
+    plan tests => 9;
+}
+END  {
+    unlink qw(write_phd.phd);
+}
+
+print("Checking if the Bio::SeqIO::phd module could be used, even though it shouldn't be directly used...\n") if ( $DEBUG);
+        # test 1
+use_ok('Bio::SeqIO::phd');
+
+print("Checking to see if SeqWithQuality objects can be created from a file...\n") if ($DEBUG);
+my $in_phd  = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data",
+								"phredfile.phd"),
+			      '-format'  => 'phd',
+			      '-verbose' => $DEBUG || 0);
+isa_ok($in_phd,'Bio::SeqIO::phd');
+
+my @phreds;
+print("I saw these in qualfile.qual:\n") if($DEBUG);
+my $phd = $in_phd->next_seq();
+print("Did you get the 'QUALITY_LEVELS' comment?\n") if ($DEBUG);
+is($phd->{comments}->{'QUALITY_LEVELS'}, 99);
+print("Checking to see if this is the right reference...\n") if( $DEBUG);
+isa_ok($phd,"Bio::Seq::Quality");
+
+my $position = 6;
+
+if( $DEBUG ) {
+    print("What is the base at position $position (using subseq)?\n");
+    print($phd->subseq($position,$position)."\n");
+    print("What is the base at position $position (using baseat)?\n");
+    print($phd->baseat($position)."\n");
+    print("What is the quality at $position? (using subqual)\n");
+}
+my @qualsretr = @{$phd->subqual($position,$position)};
+if( $DEBUG ) {
+    print($qualsretr[0]."\n");
+    print("What is the quality at $position? (using qualat)\n");
+    print($phd->qualat($position)."\n");
+}
+
+print("OK. Now testing write_phd...\n") if($DEBUG);
+
+my $out_phd = Bio::SeqIO->new(-file => ">write_phd.phd",
+			      '-format' => 'phd');
+print("Did it return the right reference?\n") if($DEBUG);
+isa_ok($out_phd,"Bio::SeqIO::phd");
+
+$out_phd->write_seq(	-SeqWithQuality		=>	$phd,
+			-CHROMAT_FILE		=>	$phd->id(),
+			-ABI_THUMBPRINT		=>	"",
+			-PHRED_VERSION		=>	"",
+			-CALL_METHOD		=>	"",
+			-QUALITY_LEVELS		=>	"",
+			-TIME			=>	"",
+			-TRACE_ARRAY_MIN_INDEX	=>	"",
+			-TRACE_ARRAY_MAX_INDEX	=>	"",
+			-CHEM	=>	"",
+			-DYE	=>	""	
+			);
+ok( -e "write_phd.phd");
+
+# Bug 2120
+
+my @qual = q(9 9 12 12 8 8 9 8 8 8 9);
+my @trace = q(113 121 130 145 153 169 177 203 210 218 234);
+
+$in_phd  = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data",
+								"bug2120.phd"),
+			      '-format'  => 'phd',
+			      '-verbose' => $DEBUG || 0);
+
+my $seq = $in_phd->next_seq;
+is($seq->subseq(10,20),'gggggccttat','$seq->subseq()');
+my @seq_qual =$seq->subqual_text(10,20);
+is_deeply(\@seq_qual,\@qual,'$seq->subqual_tex()');
+my @seq_trace = $seq->subtrace_text(10,20);
+is_deeply(\@seq_trace,\@trace,'$seq->subqual_tex()');

Added: trunk/packages/bioperl/branches/upstream/current/t/pir.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/pir.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/pir.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,33 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: pir.t,v 1.1 2005/08/28 03:41:56 bosborne Exp $
+
+use strict;
+
+BEGIN {
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => 8;
+}
+
+use Bio::SeqIO;
+
+ok(1);
+
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $str = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+								  ("t","data","seqfile.pir"),
+								  -verbose => $verbose,
+								  -format => 'pir');
+ok $str;
+my $out = new Bio::SeqIO(-format => 'pir',
+								 -fh => \*STDOUT);
+
+while (my $seq = $str->next_seq()) {
+	# ok($seq->id, qr /^[PF]1/ );
+	ok($seq->length > 1);
+	$out->write_seq($seq) if $verbose > 0;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/pln.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/pln.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/pln.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,52 @@
+# -*-Perl-*-
+# $Id: pln.t,v 1.3 2005/09/17 02:11:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 3;
+	$error = 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	# SeqIO modules abi.pm, ctf.pm, exp.pm, pln.pm, ztr.pm
+	# all require Bio::SeqIO::staden::read, part of bioperl-ext
+	eval {
+		require Bio::SeqIO::staden::read;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Bio::SeqIO::staden::read of bioperl-ext is not installed or is installed incorrectly - skipping pln.t tests\n";
+   }
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+
+END { 
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the pln tests',1);
+   }
+}
+
+exit(0) if ( $error == 1 );
+
+use Bio::SeqIO::pln;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'pln',
+			 -verbose => $verbose,
+			 -file => Bio::Root::IO->catfile
+			 (qw(t data readtest.pln) ));
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "GATGATTCCGGCTTCGGACGACTCTAGAGGATCCCCATTTTTATAGTTTTTATCTTGTAATAGATGTTTAGATTTTTCGTTGTAATTATTTTCTTTATTGTTGAAATTAGTATCTCTGGGTAATTTATCATATTCTCTGGAAAATGATTTACTATCACTAGATACTTCATAAGATTTATAATCTTTATTATGAAAATCATCTCTATTTTTCAAATTATTATTATATCTATCAAAGTTTCTGTCTTCATTATATCTATTAGCATATCTATCTTTATCTTTATCCCTATCACTATATCTATCATATGGTTCATCTTGTTCAACCGATCAGACTCGATTCGCCATCGCCTCTAACGGATGGCCGCTCCCCCTCTCATACCTCGCTCCCCTCGACATCCCCCGTCTCGCCACCCTATCCGCCCCCTTCATCACCCCCCCTTATCCACACCCTCACCCCCCGCATCGCGCACCCACGACCACCCGAAGAACCGCCCTTACTCCCAAGTACGCCCCGACCTCCATCACCCTATGCGGTACCACTCCCACCACACCCAGTCCTACTTTCGCCCGCACATCGGCCCCGCTTCAGACAGCTCCCAACTACGCAACCCACGCTTGTTCTTGTTCACACTCGAATACTCGAATCTCTCATTACTCCGCGGACTCCGCCGCACCTGTGCACCATTAACTGTGTAGCGCCTGAACCGGCACCTCTGATTACCACTTCCTCCACCAGCACAGTCCTATTACCGCATGTCGCTCTGCTAAGACAGTGCAAGACTCTGCGGTCGCTCTGACCCGCATCCGCCAGGGCACCTCTCACCCTCGCTGGCCACCCCGCCCCCCTCTCCCTGCCCCTTCATTCCCCCAAACCGCTTTCAACGGGACACACCCCTCCGCGGCGGACCACAACTCGCCGTCGGCCACCACTCACACCTTCCCTCCTCCTTCCCCCACATCACGCCAACCCCGTGGGACGGCTCTCCCGCGGCTACGACGCGCAACCCCCCCTCGCCGCTTCCCCCCCAACTTCCCACGGGCTCCCCTCCGCCCCTTACCCGCGAGGAGCTTCACCCGCGAACCACCTCCCCCCTTTCCCAACAGCACCG");
+

Added: trunk/packages/bioperl/branches/upstream/current/t/primaryqual.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/primaryqual.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/primaryqual.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,182 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: primaryqual.t,v 1.15 2005/07/13 12:29:17 heikki Exp $
+#
+# modeled after the t/Allele.t test script
+
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+my $verbose = -1 unless $DEBUG;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 32;
+}
+
+END {
+	unlink qw(batch_write_qual.qual write_qual.qual);
+}
+# redirect STDERR to STDOUT
+open (STDERR, ">&STDOUT");
+use Bio::Root::IO;
+use Bio::SeqIO;
+use Bio::Seq::Quality;
+use Bio::Seq::PrimaryQual;
+
+my $string_quals = "10 20 30 40 50 40 30 20 10";
+print("Quals are $string_quals\n") if($DEBUG);
+my $qualobj = Bio::Seq::PrimaryQual->new(
+					  '-qual' => $string_quals,
+					  '-id'  => 'QualityFragment-12',
+					  '-accession_number' => 'X78121',
+					  );
+ok($qualobj);
+ok($qualobj->display_id, 'QualityFragment-12');
+ok($qualobj->accession_number, 'X78121');
+
+my @q2 = split/ /,$string_quals;
+$qualobj = Bio::Seq::PrimaryQual->new
+    ( '-qual'             => \@q2,
+      '-primary_id'	     => 'chads primary_id',
+      '-desc'		        => 'chads desc',
+      '-accession_number' => 'chads accession_number',
+      '-id'		           => 'chads id',
+		'-header'           => 'chads header'
+      );
+
+ok($qualobj->primary_id, 'chads primary_id');
+my $rqual = $qualobj->qual();
+ok(ref($rqual) eq "ARRAY");
+
+my $newqualstring = "50 90 1000 20 12 0 0";
+
+$qualobj->qual($newqualstring);
+my $retrieved_quality = $qualobj->qual();
+my $retrieved_quality_string = join(' ', @$retrieved_quality);
+ok($retrieved_quality_string,$newqualstring);
+
+my @newqualarray = split/ /,$newqualstring;
+$qualobj->qual(\@newqualarray);
+$retrieved_quality = $qualobj->qual();
+$retrieved_quality_string = join(' ',@$retrieved_quality);
+ok($retrieved_quality_string,$newqualstring);
+
+eval {
+    $qualobj->qual("chad");
+};
+ok($@ =~ /not look healthy/);
+
+eval { $qualobj->qual(""); };
+ok(!$@);
+
+eval { $qualobj->qual(" 4"); };
+ok(!$@);
+
+$qualobj->qual("4 10");
+
+ok($qualobj->length(),2 );
+
+$qualobj->qual("10 20 30 40 50 40 30 20 10");
+my @subquals = @{$qualobj->subqual(3,6);};
+ok(@subquals, 4);
+     # chad, note to self, evaluate border conditions
+ok ("30 20 10" eq join(' ',@{$qualobj->subqual(7,9)}));
+
+
+my @false_comparator = qw(30 40 70 40);
+my @true_comparator = qw(30 40 50 40);
+ok(!&compare_arrays(\@subquals,\@true_comparator));
+
+eval { $qualobj->subqual(-1,6); };
+ok($@ =~ /EX/ );
+eval { $qualobj->subqual(1,6); };
+ok(!$@);
+eval { $qualobj->subqual(1,9); };
+ok(!$@);
+eval { $qualobj->subqual(9,1); };
+ok($@ =~ /EX/ );
+
+
+ok($qualobj->display_id() eq "chads id");
+$qualobj->display_id("chads new display_id");
+ok($qualobj->display_id() eq "chads new display_id");
+
+ok($qualobj->accession_number(), "chads accession_number");
+$qualobj->accession_number("chads new accession_number");
+ok($qualobj->accession_number(), "chads new accession_number");
+ok($qualobj->primary_id(), "chads primary_id");
+$qualobj->primary_id("chads new primary_id");
+ok($qualobj->primary_id(), "chads new primary_id");
+
+ok($qualobj->desc(), "chads desc");
+$qualobj->desc("chads new desc");
+ok($qualobj->desc(), "chads new desc");
+ok($qualobj->display_id(), "chads new display_id");
+$qualobj->display_id("chads new id");
+ok($qualobj->display_id(), "chads new id");
+
+ok($qualobj->header(), "chads header");
+
+my $in_qual  = Bio::SeqIO->new(-file => "<" . 
+					Bio::Root::IO->catfile("t","data","qualfile.qual") ,
+			       '-format' => 'qual',
+			       '-verbose' => $verbose);
+ok($in_qual);
+my $pq = $in_qual->next_seq();
+ok($pq->qual()->[99], '39'); # spot check boundary
+ok($pq->qual()->[100], '39'); # spot check boundary
+
+my $out_qual = Bio::SeqIO->new('-file'    => ">write_qual.qual",
+                               '-format'  => 'qual',
+                               '-verbose' => $verbose);
+$out_qual->write_seq(-source	=>	$pq);
+
+my $swq545 = Bio::Seq::Quality->new (	-seq	=>	"ATA",
+                                        -qual	=>	$pq
+                                    );
+$out_qual->write_seq(-source	=>	$swq545);
+
+$in_qual = Bio::SeqIO->new('-file' => 
+			  Bio::Root::IO->catfile("t","data","qualfile.qual") , 
+			   '-format' => 'qual',
+			   '-verbose' => $verbose);
+
+my $out_qual2 = Bio::SeqIO->new('-file' => ">batch_write_qual.qual",
+				'-format'  => 'qual',
+				'-verbose' => $verbose);
+
+while ( my $batch_qual = $in_qual->next_seq() ) {
+	$out_qual2->write_seq(-source	=>	$batch_qual);
+}
+
+sub display {
+    if($DEBUG ) {
+ 	my @quals;
+	print("I saw these in qualfile.qual:\n") ;
+	while ( my $qual = $in_qual->next_seq() ) {
+	    # ::dumpValue($qual);
+	    print($qual->display_id()."\n");
+	    @quals = @{$qual->qual()};
+	    print("(".scalar(@quals).") quality values.\n");
+	}
+    }
+}
+
+# dumpValue($qualobj);
+
+sub compare_arrays {
+	my ($a1,$a2) = @_;
+	return 1 if (scalar(@{$a1}) != scalar(@{$a2}));
+	my ($v1,$v2,$diff,$curr);
+	for ($curr=0;$curr<scalar(@{$a1});$curr++){
+		return 1 if ($a1->[$curr] ne $a2->[$curr]);
+	}
+	return 0;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/primedseq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/primedseq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/primedseq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,40 @@
+## $Id: primedseq.t,v 1.4.8.2 2006/11/30 09:24:00 sendu Exp $
+
+# test for Bio::Seq::PrimedSeq
+# written by Rob Edwards
+
+use strict;
+use constant NUMTESTS => 9;
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    plan tests => NUMTESTS;
+}
+
+use Bio::SeqIO;
+use Bio::Seq::PrimedSeq;
+ok(1);
+
+my ($seqio, $seq, $left, $right, $primed_seq, $left_test, $annseq, $amplicon, $returnedseq);
+
+
+$seqio=Bio::SeqIO->new(-file=>File::Spec->catfile(qw(t data primedseq.fa)));
+$seq=$seqio->next_seq;
+$left=Bio::SeqFeature::Primer->new(-seq=>'CTTTTCATTCTGACTGCAACG');
+$right=Bio::SeqFeature::Primer->new(-seq=>'GGTGGTGCTAATGCGT');
+
+
+ok $primed_seq = Bio::Seq::PrimedSeq->new(-seq=>$seq, -left_primer=>$left, -right_primer=>$right);
+ok $left_test = $primed_seq->get_primer('left');
+ok $left_test eq $left;
+ok $annseq = $primed_seq->annotated_sequence; # should I check that this is what I think it is, or just be happy?
+ok $amplicon=$primed_seq->amplicon->seq;
+ok uc($amplicon) eq uc('cttttcattctgactgcaacgGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAacgcattagcaccacc');
+ok $returnedseq=$primed_seq->seq;
+ok $returnedseq->seq eq $seq->seq;
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/primedseq.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/primer3.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/primer3.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/primer3.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+## $Id: primer3.t,v 1.11.4.2 2006/11/30 09:24:00 sendu Exp $
+
+# test for Bio::Tools::Primer3.pm
+# written by Rob Edwards
+# and Chad Matsalla
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR $XML_ERROR);
+
+
+BEGIN {
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    $NUMTESTS  = 24;
+
+    plan tests => $NUMTESTS;
+
+    eval {  require Clone; };
+    if ( $@ ) {
+	warn("Clone not installed. This means that the module is not usable. Skipping tests\n");
+	$ERROR = 1;
+    }
+}
+
+END {
+        foreach ( $Test::ntest..$NUMTESTS) {
+	skip("Missing dependencies. Skipping tests",1);
+    }
+}
+
+exit 0 if $ERROR;
+
+require Bio::Tools::Primer3;
+ok(1);
+
+my ($p3, $num, $primer);
+
+ok $p3 = Bio::Tools::Primer3->new(-file => File::Spec->catfile(qw(t data primer3_output.txt)));
+ok $num = $p3->number_of_results;
+ok $num, 5, "Got $num";
+ok $num = $p3->all_results;
+ok defined $num, 1, "Can't get all results";
+ok $num = $p3->primer_results(1);
+ok defined $num, 1, "Can't get results for 1";
+ok $primer = $p3->next_primer;
+ok ref($primer) eq "Bio::Seq::PrimedSeq", 1, 
+  "reference for primer stream is not right";
+
+# get the left primer
+my $left_primer = $primer->get_primer('left');
+
+# get the sequence for that primer. This is a test to verify behavior 
+# on the bioperl list in or about 050315
+my $seqobj = $left_primer->seq();
+
+my $seq = $seqobj->seq();
+
+my $other_left_primer = $primer->get_primer();
+
+# a different way to access the primers in the stream
+my $alt = $p3->primer_results(0,'PRIMER_LEFT_INPUT');
+
+# next one
+ok $primer = $p3->next_primer;
+# get the left primer
+my $left_primer_seq = $primer->get_primer('left')->seq;
+ok $left_primer_seq->seq, "GAGGGTAACACGCTGGTCAT";


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/primer3.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/protgraph.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/protgraph.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/protgraph.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,282 @@
+# This is -*-Perl-*- code#
+# Bioperl Test Harness Script for Modules#
+# $Id: protgraph.t,v 1.1 2004/03/13 23:45:32 radams Exp
+
+use vars qw($NUMTESTS $DEBUG $ERROR $XML_ERROR);
+use strict;
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+use Bio::Root::IO;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test;};
+    $ERROR = $XML_ERROR = 0;
+    if ( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $NUMTESTS  = 66;
+    plan tests => $NUMTESTS;
+    eval {	require Class::AutoClass;
+         	require Clone; };
+    if ( $@ ) {
+	warn("Class::AutoClass or Clone not installed. This means that the module is not usable. Skipping tests\n");
+	$ERROR = 1;
+    }
+
+    eval {
+	require XML::Twig;
+    };
+    if ($@) {
+	warn "XML::Twig needed for XML format parsing, skipping these tests\n";
+	$XML_ERROR = 1;
+    }
+}
+
+END {
+    unlink Bio::Root::IO->catfile("t","data","out.mif");
+    foreach ( $Test::ntest..$NUMTESTS) {
+	skip("Missing dependencies. Skipping tests",1);
+    }
+}
+exit 0 if $ERROR ==  1;
+
+require Bio::Graph::ProteinGraph;
+require Bio::Graph::IO;
+require Bio::Graph::Edge;
+
+my $verbose = 0;
+$verbose    = 1 if $DEBUG;
+ok 1;
+################1st of all let's test the io.....
+
+###############  test dip tab delimited  format  ###################
+## test read...
+my %ids;
+my $gr;
+ok my $io = Bio::Graph::IO->new(
+  -format => 'dip',
+  -file   => Bio::Root::IO->catfile("t","data","tab1part.mif"),
+  -threshold => 0.6);
+
+ok  $gr = $io->next_network();
+
+ok my $node   = $gr->nodes_by_id('A64696');
+ok $node->accession_number, 'A64696';
+
+##test write. to filehandle...
+
+ok my $out =  Bio::Graph::IO->new(
+  -format => 'dip',
+  -file   =>">". Bio::Root::IO->catfile("t","data","out.mif"));
+ok $out->write_network($gr);
+
+## get articulation_points. 
+my @nodes = $gr->articulation_points();
+
+##now remove 2 nodes: this removes 4 edges and  3087 should be a new artic point
+ok $gr->edge_count, 72;
+$gr->remove_nodes($gr->nodes_by_id('3082N'), $gr->nodes_by_id('3083N'));
+ok $gr->edge_count, 68;
+ my $nodes = $gr->articulation_points();
+ok grep {$_->object_id eq 'H64521'} @$nodes;
+ok scalar @$nodes, 13;
+ at nodes = @{$gr->articulation_points()};
+# <NOTE>
+# these were failing, I don't understand the module enough to know if 
+# this is a bug. Richard needs to look at it
+#ok grep {$_->object_id eq 'B64701'} @nodes;
+#ok scalar @nodes, 14;
+
+ok grep {$_->object_id eq 'B64528'} @nodes;
+ok scalar @nodes, 13;
+# </NOTE>
+
+ok $node   = $gr->nodes_by_id('A64696');
+ok $node->accession_number, 'A64696';
+
+
+## can we round trip, is out format same as original format?
+ok my $io2 = Bio::Graph::IO->new(
+  -format    => 'dip',
+  -file     => Bio::Root::IO->catfile("t","data","out.mif"));
+ok	my $g2     = $io2->next_network();
+ok  $node      = $g2->nodes_by_id('A64696');
+ok $node->accession_number, 'A64696';
+
+##### now lets test some graph properties.....##
+## basic properties from SImpleGraph.
+
+ok sprintf("%.3f",$g2->density), "0.027";
+ok $g2->is_connected, '';
+ok $g2->is_forest, undef;
+ok $g2->is_tree, '';
+ok $g2->is_empty, '';
+ok $g2->is_cyclic, 1;
+
+## get connected subgraphs
+my @components = $g2->components();
+ok scalar @components, 5;
+
+## get nodes connected to parameter
+my $t       = $g2->traversal($g2->nodes_by_id('3079N'));
+my @dfnodes = $t->get_all;
+##
+
+##before deleting 3048N,  3047N has 2 neighbours
+my @n1 = $g2->neighbors($g2->nodes_by_id('3047N'));
+ok scalar @n1,2;
+
+ok $g2->remove_nodes($g2->nodes_by_id('3048N'));
+
+## after deleting there is only 1 interactor
+ at n1 = $g2->neighbors($g2->nodes_by_id('3047N'));
+ok scalar @n1,1;
+
+##check no undefs left after node removal ##
+
+ok map {$_->object_id}$g2->edges;
+
+## get an edge by its id
+
+ok my $edge = $g2->edge_by_id('4368E');
+ok $edge->object_id, '4368E';
+
+## count all edges
+my $count = 0;
+ok $g2->edge_count, 71;
+
+my @n = $g2->neighbors($g2->nodes_by_id('3075N'));
+ok scalar @n, 13;
+
+ok $g2->remove_nodes($g2->nodes_by_id('3075N'));
+
+## should be 13  less interactions in graph.  
+ok scalar $g2->edge_count, 58;
+
+## many more subgraphs now
+ at components = $g2->components();
+#there were 5 subgraphs, now there are 10 unconnected nodes, total 15
+ok scalar @components, 15;
+
+## how many unconnected nodes?
+my @ucnodes = $g2->unconnected_nodes;
+ok scalar  @ucnodes, 10;
+
+##get CC using protein object..
+ok  sprintf("%.3f", $g2->clustering_coefficient($g2->nodes_by_id('B64525'))), 0.022;
+
+#.. and using id string (same as previous, for convenience	)
+ok  sprintf("%.3f", $g2->clustering_coefficient('B64525')), 0.022;
+
+## test has_node() method
+ok $g2->has_node('B64525'), 1;
+ok $g2->has_node('B64'), 0;
+
+## remove a single duplicate edge
+ok $g2->remove_dup_edges($g2->nodes_by_id('3103N'));
+
+## remove  all duplicate edges
+ok $g2->remove_dup_edges();
+
+## should now be no duplicates
+my @dupids = map{$_->object_id()} $g2->dup_edges();
+ok $dupids[0], undef;
+
+########### now we test the 'union()' method to see it conforms to 
+## the rules described in its documentation:
+
+$io = Bio::Graph::IO->new(
+   -format => 'dip',
+   -file   => Bio::Root::IO->catfile("t","data","tab1part.mif"));
+$gr = $io->next_network();
+$io2 = Bio::Graph::IO->new(
+   -format => 'dip',
+   -file   => Bio::Root::IO->catfile("t","data","tab1part.mif"));
+
+$g2 = $io2->next_network();
+
+# First of all we put the same graph into both variables. After a union
+# graph 1 should be unaffected. Because all edge ids are the same, 
+# all duplicates will be redundant. 
+# therea re 3 duplicates in dataset. 
+my @dups = $gr->dup_edges();
+ok scalar @dups, 3;
+$gr->union($g2);
+ at dups = $gr->dup_edges();
+ok scalar @dups, 3;
+my @redundant = $gr->redundant_edge();
+ok scalar @redundant, 72; 
+
+## now lets do a union with a graph that has some new edges, 
+## using existing nodes
+
+##read in graph data
+$gr = undef;
+$g2 = undef;
+$io = Bio::Graph::IO->new(
+   -format => 'dip',
+   -file   => Bio::Root::IO->catfile("t","data","tab1part.mif"));
+$gr = $io->next_network();
+$io2 = Bio::Graph::IO->new(
+    -format => 'dip',
+    -file   => Bio::Root::IO->catfile("t","data","tab2part.mif"));
+$g2 = $io2->next_network();
+ok $gr->edge_count, 72;
+ok $gr->node_count, 74;
+$gr->union($g2);
+#there should be 1 more edge in the graph $gr now, with no new nodes. 
+#$g2 is unaffected.  
+ok $gr->edge_count, 73;
+ok $gr->node_count, 74;
+
+## now lets test a union that has new nodes in $g2 
+$gr = undef;
+$g2 = undef;
+$io = Bio::Graph::IO->new
+    (-format => 'dip',
+     -file   => Bio::Root::IO->catfile("t","data","tab1part.mif"));
+$gr = $io->next_network();
+$io2 = Bio::Graph::IO->new
+    (-format => 'dip',
+     -file   => Bio::Root::IO->catfile("t","data","tab3part.mif"));
+
+$g2 = $io2->next_network();
+ok $gr->edge_count, 72;
+ok $gr->node_count, 74;
+$gr->union($g2);
+# there should be 2 more edge in the graph $gr now and 2 more nodes. 
+# $g2 is unaffected.  
+ok $gr->edge_count, 74;
+ok $gr->node_count, 76;
+
+# test IO/psi_xml if the required modules are present
+unless( $XML_ERROR ) {
+	# PSI XML from DIP
+	ok $io = Bio::Graph::IO->new
+	  (-format => 'psi_xml',
+		-file   => Bio::Root::IO->catfile("t", "data", "psi_xml.dat"));
+	ok my $g = $io->next_network();
+	ok $g->edge_count, 3;
+	ok $g->node_count, 4;
+	#my @rts =$g->articulation_points();
+	my $n = $g->nodes_by_id(207153);
+	ok $n->species->node_name,"Helicobacter pylori 26695";
+	ok $n->primary_seq->desc,"bogus-binding membrane protein (lepA) HP0355";
+
+	# PSI XML from IntAct
+	ok my $io2 = Bio::Graph::IO->new
+	  (-format => 'psi_xml',
+		-file   => Bio::Root::IO->catfile("t", "data", "sv40_small.xml"));
+	ok my $g3 = $io2->next_network();
+	ok $g3->edge_count, 3;
+	ok $g3->node_count, 5;
+
+	# my @rts =$g->articulation_points();
+	# my $n = $g->nodes_by_id(207153);
+	# ok $n->species->binomial,"Helicobacter pylori 26695";
+	# ok $n->primary_seq->desc,"bogus-binding membrane protein (lepA) HP0355";
+} 
+

Added: trunk/packages/bioperl/branches/upstream/current/t/psm.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/psm.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/psm.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,208 @@
+# -*-Perl-*-
+#Some simple tests for meme and transfac parsers
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    plan tests => 63;
+}
+
+use Bio::Matrix::PSM::IO;
+
+
+my $mmt= "chr04q	170164	170208	strong	-	0	Motif 3 occurrance in chr04q
+chr04q	215755	215799	strong	+	0	Motif 4 occurrance in chr04q
+chr04q	532530	532574	strong	+	2	Motif 2 occurrance in chr04q
+chr04q	539492	539536	strong	-	1	Motif 1 occurrance in chr04q
+chr04q	586113	586157	strong	+	2	Motif 2 occurrance in chr04q
+chr04q	698245	698289	strong	-	0	Motif 4 occurrance in chr04q
+chr04q	804412	804456	strong	-	0	Motif 3 occurrance in chr04q
+chr04q	858870	858914	strong	-	2	Motif 3 occurrance in chr04q
+chr04q	861561	861605	strong	-	2	Motif 3 occurrance in chr04q
+chr04q	916898	916942	strong	-	1	Motif 1 occurrance in chr04q
+chr04q	1146916	1146960	strong	-	0	Motif 1 occurrance in chr04q
+chr04q	1315772	1315816	strong	+	1	Motif 1 occurrance in chr04q
+chr04q	1636119	1636163	strong	+	2	Motif 3 occurrance in chr04q
+chr04q	1636200	1636244	strong	+	2	Motif 1 occurrance in chr04q
+chr04q	1636437	1636481	strong	+	2	Motif 4 occurrance in chr04q
+chr04q	1637361	1637405	strong	+	2	Motif 2 occurrance in chr04q
+chr04q	1652447	1652491	strong	+	1	Motif 4 occurrance in chr04q";
+my @mmt=split(/\n/,$mmt);
+
+ok(1);
+
+#Let's try meme here
+my $psmIO =  new Bio::Matrix::PSM::IO(-format=>'meme', 
+	     -file=>Bio::Root::IO->catfile(qw(t data meme.dat)));
+ok $psmIO;
+
+my @inputfile=grep(/datafile/i,$psmIO->unstructured);
+ok @inputfile;
+
+my $release=$psmIO->release;
+ok $release;
+
+my @ids=$psmIO->hid;
+ok @ids,4;
+
+my %weights=$psmIO->weight;
+ok %weights;
+
+my %seq = $psmIO->seq;
+ok %seq,'0';#Meme doesn't have seq
+
+ok $psmIO->version,'3.0';
+
+my $psm = $psmIO->next_psm;
+ok $psm;
+
+#Lets try to compress and uncompress the log odds and the frequencies, see if there is no
+#considerable loss of data.
+my $fA=$psm->get_compressed_freq('A');
+my @check=Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1);
+my @A=$psm->get_array('A');
+my ($var,$max) = (0,0);
+for (my $i = 0; $i<@check;$i++) {
+  my $diff=abs(abs($check[$i])-abs($A[$i]));
+  $var += $diff;
+  $max=$diff if ($diff>$max);
+}
+my $avg=$var/@check;
+ok $avg<0.01; #Loss of data under 1 percent
+#print $avg,"\n";
+ok $psm->sequence_match_weight('CAGAAAAATAAAATGGCCACCACCC'),2015;
+
+my $lA=$psm->get_compressed_logs('A');
+ at check=Bio::Matrix::PSM::SiteMatrix::_uncompress_string($lA,1000,2);
+ at A=$psm->get_logs_array('A');
+($var,$max) = (0,0);
+for (my $i = 0;$i<@check;$i++) {
+  my $diff=abs(abs($check[$i])-abs($A[$i]));
+  $var += $diff;
+  $max=$diff if ($diff>$max);
+}
+$avg=$var/@check;
+ok $avg<10; #Loss of data under 1 percent
+
+my $matrix=$psm->matrix;
+ok $matrix;
+my $psm2=$psm;
+$psm2->matrix($matrix);
+ok $psm,$psm2;
+
+my %psm_header=$psm->header;
+ok $psm_header{IC},38.1;
+ok $psm_header{sites},4;
+ok $psm_header{width},25;
+ok $psm_header{e_val},'1.2e-002';
+
+
+#Quick check if returned object works
+my $IUPAC=$psm->IUPAC;
+ok $IUPAC,'CMKWMAAAKWVAWTYCMCASCHCCM';
+ok $IUPAC,$psm2->IUPAC;
+ok $IUPAC,$matrix->IUPAC;
+
+my $instances=$psm->instances;
+ok $instances;
+
+foreach my $instance (@{$instances}) {
+  my $id=$instance->primary_id;
+  ok $instance->strand,1;
+  last if (ok $id);
+}
+
+ok $psm->header('e_val');
+#Meme parser should be OK if tests passed
+
+
+#Now we are going to try transfac
+
+$psmIO =  new Bio::Matrix::PSM::IO(-format=>'transfac', 
+	  -file=> Bio::Root::IO->catfile(qw(t data transfac.dat)));
+ok $psmIO;
+
+my $version=$psmIO->version;
+ok !$version;
+
+ok $psmIO->release, '6.4--2002-12-02';
+
+$psm     = $psmIO->next_psm;
+ok $psm;
+
+# Lets try to compress and uncompress the the frequencies, see if
+# there is no considerable loss of data.
+$fA=$psm->get_compressed_freq('A');
+ at check=Bio::Matrix::PSM::SiteMatrix::_uncompress_string($fA,1,1);
+ at A=$psm->get_array('A');
+($var,$max) = (0,0);
+for (my $i = 0; $i<@check;$i++) {
+  my $diff=abs(abs($check[$i])-abs($A[$i]));
+  $var += $diff;
+  $max=$diff if ($diff>$max);
+}
+$avg=$var/@check;
+ok $avg<0.01; #Loss of data under 1 percent
+
+%weights = $psmIO->weight;
+ok !$weights{''};
+
+%seq     = $psmIO->seq;
+ok scalar keys %seq, 0;
+
+#Quick check if returned object works
+$IUPAC   = $psm->IUPAC;
+ok $IUPAC,'VVDCAKSTGBYD';
+
+#Now we are going to try mast
+$psmIO =  new Bio::Matrix::PSM::IO(-format=>'mast', 
+	  -file=>Bio::Root::IO->catfile(qw(t data mast.dat)));
+ok $psmIO;
+
+ at inputfile = grep(/datafile/i,$psmIO->unstructured);
+ok !@inputfile;
+
+ok( $psmIO->release, '2002/04/02 0:11:59');
+
+ at ids     = $psmIO->hid;
+ok @ids,4;
+
+%weights = $psmIO->weight;
+ok !%weights; #Mast doesn't have weights
+
+ok %seq    = $psmIO->seq;
+
+foreach my $id ($psmIO->hid) {
+    ok $seq{$id};
+}
+ok $psm=$psmIO->next_psm;
+
+my %instances=$psmIO->instances;
+ok %instances;
+
+ok $psmIO->version, '3.0';
+
+my $mmastIO=new Bio::Matrix::PSM::IO(-format=>'mast',-file=>Bio::Root::IO->catfile(qw(t data mixedmast.dat)));
+
+$psm = $mmastIO->next_psm; 
+my $lastinstances = $psm->instances();
+my $i=0;
+foreach my $hit (@$lastinstances) {
+    $hit -> end ( $hit-> start () + length ($hit->seq) - 1 ) ; # fix an old bug in InstanceSite.pm
+    my $d=join("\t",$hit->{accession_number},$hit -> start () , $hit-> end (),$hit -> score (),
+    $hit -> strand == 1 ? '+' : '-' , $hit -> frame,  $hit -> desc ( ));
+    ok $d,$mmt[$i];
+    $i++;
+    last if ($hit -> start == 1652447);
+}
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/qual.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/qual.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/qual.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,77 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: qual.t,v 1.6 2004/10/28 02:27:27 jason Exp $
+#
+
+
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 12;
+}
+
+END {
+    unlink qw(write_qual.qual );
+}
+
+
+warn("Checking if the Bio::SeqIO::Qual module could be used, even though it shouldn't be directly use'd...\n") if ( $DEBUG );
+    # test 1
+use Bio::SeqIO::qual;
+ok(1);
+
+warn("Checking to see if PrimaryQual.pm can be used...\n") if ( $DEBUG );
+use Bio::Seq::PrimaryQual;
+ok(1);
+
+warn("Checking to see if PrimaryQual objects can be created from a file...\n") if ( $DEBUG );
+my $in_qual  = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile("t","data",
+								 "qualfile.qual"),
+			       '-format' => 'qual');
+ok($in_qual);
+
+my @quals;
+warn("I saw these in qualfile.qual:\n") if $DEBUG;
+my $first = 1;
+while ( my $qual = $in_qual->next_seq() ) {
+		# ::dumpValue($qual);
+
+    ok(1);
+    @quals = @{$qual->qual()};
+    if( $DEBUG ) {
+	warn($qual->id()."\n");
+	
+	warn("(".scalar(@quals).") quality values.\n");
+    }
+    if( $first ) { 
+	ok(@quals, 484);
+    }
+    $first = 0;
+}
+
+# in October 2004, Carlos Mauricio La Rota posted a problem with descriptions
+# this routine is to test that
+
+ at quals = 10..20;
+# this one has a forced header
+my $seq = new Bio::Seq::PrimaryQual(
+                    -qual =>   \@quals,
+                    -header   =>   "Hank is a good cat. I gave him a bath yesterday.");
+my $out = new Bio::SeqIO(     -fh  =>   \*STDOUT,
+                         -format   =>   'qual');
+# yes, that works
+# $out->write_seq($seq);
+$seq->header('');
+$seq->id('Hank1');
+# yes, that works
+# $out->write_seq($seq);
+

Added: trunk/packages/bioperl/branches/upstream/current/t/raw.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/raw.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/raw.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,35 @@
+# -*-Perl-*-
+# $Id: raw.t,v 1.1 2005/09/07 02:15:50 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+BEGIN {
+	$NUMTESTS = 5;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO::raw;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'raw',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 (qw(t data test.raw) ));
+
+ok(my $seq = $io->next_seq);
+ok($seq->length, 358);
+ok($seq = $io->next_seq);
+ok($seq->length, 158);

Added: trunk/packages/bioperl/branches/upstream/current/t/rnamotif.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/rnamotif.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/rnamotif.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,147 @@
+# -*-Perl-*-
+# Bioperl Test Script for RNA Motif Modules
+# $Id: rnamotif.t,v 1.2.4.2 2006/11/30 09:24:00 sendu Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+    use lib 't/lib';
+    }
+
+    use Test::More;
+	
+	eval {
+		require Bio::Tools::RNAMotif;
+	};
+	if ($@) {
+		plan skip_all => 'Bio::Tools::RNAMotif failed to load, DB_File probably not installed. This means that the module is not usable. Skipping tests';
+	}
+	else {
+		plan tests => 72;
+	}
+}
+
+use Bio::Tools::ERPIN;
+use Bio::Root::IO;
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+### RNAMotif.pm tests ###
+
+my $parser = new Bio::Tools::RNAMotif(
+		-verbose => $verbose,
+        -file => Bio::Root::IO->catfile('t','data','trna.strict.rnamotif'),
+        -motiftag => 'tRNA_gene',
+        -desctag => 'tRNA');
+
+
+my @genes;
+while( my $gene = $parser->next_prediction ) {
+    push @genes, $gene;
+}
+#tests 1-12 
+is($genes[1]->display_name, 'tRNA','RNAMotif::display_name()');
+is($genes[12]->seq_id, 'M33910','RNAMotif::seq_id()');
+is($genes[6]->primary_tag, 'tRNA_gene','RNAMotif::primary_tag()');
+is($genes[22]->start, 464,'RNAMotif::start()');
+is($genes[8]->end, 585,'RNAMotif::end()');
+is($genes[9]->strand, 1,'RNAMotif::strand()');
+is($genes[90]->get_Annotations('sequence'),
+   'cggatt ta ttg ggcg taa a gggct cgtaggc ggctc'.
+   ' gtcgcgtccggtgtgaaagtc catc gcttaac ggtg gatctg cgcc',
+   "RNAMotif::get_Annotations('sequence')");
+is($genes[84]->get_Annotations('descfile'), 'trna.strict.descr',
+   "RNAMotif::get_Annotations('descfile')");
+is($genes[4]->get_Annotations('descline'),
+   'gi|173683|gb|M10671|ACSTRW Avian oncornavirus Trp-tRNA',
+   "RNAMotif::get_Annotations('descline')");
+is($genes[26]->get_Annotations('secstructure'),
+   'h5 ss h5 ss h3 ss h5 ss h3 ss h5 ss h3 h3 ss',
+   "RNAMotif::get_Annotations('secstructure')");
+is($genes[4]->score, '0.000','RNAMotif::score()');
+is($genes[4]->source_tag, 'RNAMotif','RNAMotif::source_tag()');
+
+ at genes=();
+
+$parser = Bio::Tools::RNAMotif->new(
+			-verbose => $verbose,
+            -file => Bio::Root::IO->catfile('t','data','sprintf.rnamotif'),
+            -motiftag => 'term',
+            -desctag => 'stem_loop');
+
+while( my $gene = $parser->next_prediction ) {
+    push @genes, $gene;
+}
+
+#tests 13-24
+is($genes[1]->display_name, 'stem_loop','RNAMotif::display_name()');
+is($genes[12]->seq_id, 'M82700','RNAMotif::seq_id()');
+is($genes[6]->primary_tag, 'term','RNAMotif::primary_tag()');
+is($genes[22]->start, 141,'RNAMotif::start()');
+is($genes[8]->end, 154,'RNAMotif::end()');
+is($genes[9]->strand, -1,'RNAMotif::strand()');
+is($genes[90]->get_Annotations('sequence'), 'ggggaag cttg cttcccc',
+   "RNAMotif::get_Annotations('sequence')");
+is($genes[84]->get_Annotations('descfile'), 'sprintf.descr',
+   "RNAMotif::get_Annotations('descfile')");
+is($genes[4]->get_Annotations('descline'),
+   'gi|173741|gb|M83548|AQF16SRRN Aquifex pyrophilus 16S ribosomal RNA (16S rRNA)',
+   "RNAMotif::get_Annotations('descline')");
+is($genes[26]->get_Annotations('secstructure'), 'h5 ss h3',
+   "RNAMotif::get_Annotations('secstructure')");
+is($genes[4]->score, '-12.100,5,gaaa','RNAMotif::score()');
+is($genes[4]->source_tag, 'RNAMotif','RNAMotif::source_tag()');
+
+### ERPIN.pm tests ###
+
+ at genes = ();
+
+my @erpinstats = (
+['30260185','5181155','5181183',1,'CTTT.aacc--.CAACC.CCGTGA.GGTTG.a.GAAG',0,
+ 'gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome',
+ 0,'1.68e-05'],
+['30260185','3709092','3709121',-1,'CTTT.taatt-.CAGTC.CTGTGA.GACCG.g.AAAG',0,
+ 'gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome',
+ 0,'5.61e-05'],
+['30260185','3710524','3710553',-1,'TTTT.aaatg-.TAGTC.CTGTGA.GGCTG.c.CAAA',0,
+ 'gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome',
+ 0,'1.31e-04'],
+['30260185','3711223','3711251',-1,'CTTT.aaca--.CAGCC.CCGTGA.GGTTG.a.GAAG',0,
+ 'gi|30260185|gb|AE016879.1| Bacillus anthracis str. Ames, complete genome',
+ 0,'4.44e-06']
+);
+
+$parser = Bio::Tools::ERPIN->new(
+			-verbose => $verbose,
+            -file => Bio::Root::IO->catfile('t','data','testfile.erpin'),
+            -motiftag => 'protein_bind',
+			-desctag =>  'pyrR_BL');
+
+while( my $gene = $parser->next_prediction ) {
+	my @stats = @{ shift @erpinstats };
+	is($gene->display_name, 'pyrR_BL','ERPIN::display_name()');
+	is($gene->seq_id, shift @stats,'ERPIN::seq_id()');
+	is($gene->primary_tag, 'protein_bind','ERPIN::primary_tag()');
+	is($gene->start, shift @stats,'ERPIN::start()');
+	is($gene->end, shift @stats,'ERPIN::end()');
+	is($gene->strand, shift @stats,'ERPIN::strand()');
+	is($gene->get_Annotations('sequence'), shift @stats,
+	   "ERPIN::get_Annotations('sequence')");
+	is($gene->get_Annotations('descfile'), shift @stats,
+	   "ERPIN::get_Annotations('descfile')");
+	is($gene->get_Annotations('descline'), shift @stats,
+	   "ERPIN::get_Annotations('descline')");
+	is($gene->get_Annotations('secstructure'), shift @stats,
+	   "ERPIN::get_Annotations('secstructure')");
+	is($gene->score, shift @stats,'ERPIN::score()');
+	is($gene->source_tag, 'ERPIN','ERPIN::source_tag()');
+}
+
+### Infernal.pm tests ###
+### FASTR.pm tests ###
\ No newline at end of file

Added: trunk/packages/bioperl/branches/upstream/current/t/scf.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/scf.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/scf.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,171 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: scf.t,v 1.10.6.1 2006/11/08 17:25:55 sendu Exp $
+#
+
+use strict;
+
+BEGIN {
+    use vars qw($DEBUG $verbose);
+    $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+    $verbose = $DEBUG ? 0 : -1;
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 17;
+}
+
+END {
+	unlink qw(
+				 write_scf.scf
+				 write_scf_synthetic_traces.scf 
+				 write_scf_subtrace.scf
+				 write_scf_version2.scf
+				);
+}
+
+use Dumpvalue();
+
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1) if $DEBUG;
+
+use Bio::SeqIO::scf;
+use Bio::Seq::SequenceTrace;
+
+my $in_scf = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+									  ("t","data","chad100.scf"),
+									  -format => 'scf',
+									  -verbose => $verbose);
+ok($in_scf);
+
+my $swq = $in_scf->next_seq();
+
+ok (ref($swq) eq "Bio::Seq::SequenceTrace");
+
+ok (length($swq->seq())>10);
+my $qualities = join(' ',@{$swq->qual()});
+
+ok (length($qualities)>10);
+my $id = $swq->id();
+ok ($swq->id() eq "ML4942R");
+
+my $a_channel = $swq->trace("a");
+ok (scalar(@$a_channel) > 10);
+my $c_channel = $swq->trace("c");
+ok (length($c_channel) > 10);
+my $g_channel = $swq->trace("g");
+ok (length($g_channel) > 10);
+my $t_channel = $swq->trace("t");
+ok (length($t_channel) > 10);
+
+my $ref = $swq->peak_indices();
+my @indices = @$ref;
+ok (scalar(@indices), 761);
+
+warn("Now checking version3...\n") if $DEBUG;
+my $in_scf_v3 = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+										  ("t","data","version3.scf"),
+										  -format => 'scf',
+										  -verbose => $verbose);
+
+my $v3 = $in_scf_v3->next_seq();
+my $ind = $v3->peak_indices();
+my @ff = @$ind;
+
+ at indices = @{$v3->peak_indices()};
+ok (scalar(@indices) == 1106);
+
+my %header = %{$in_scf_v3->get_header()};
+ok $header{bases}, 1106;
+ok $header{samples},  14107;
+
+my $ac = $in_scf_v3->get_comments();
+
+ok (ref($ac) eq "Bio::Annotation::Collection");
+
+my @name_comments = grep {$_->tagname() eq 'NAME'} 
+  $ac->get_Annotations('comment');
+
+ok $name_comments[0]->as_text(), 'Comment: IIABP1D4373';
+
+my @conv_comments = grep {$_->tagname() eq 'CONV'} 
+  $ac->get_Annotations('comment');
+
+ok $conv_comments[0]->as_text(), 'Comment: phred version=0.990722.h';
+
+# is the SequenceTrace object annotated?
+my $st_ac = $swq->annotation();
+
+ok (ref($st_ac) eq "Bio::Annotation::Collection");
+
+warn("Now testing the _writing_ of scfs\n") if $DEBUG;
+
+my $out_scf = Bio::SeqIO->new(-file => ">write_scf.scf",
+										-format => 'scf',
+										-verbose => $verbose);
+exit;	# the new way
+
+$out_scf->write_seq(-target	=>	$v3,
+						  -MACH		=>	'CSM sequence-o-matic 5000',
+						  -TPSW		=>	'trace processing software',
+						  -BCSW		=>	'basecalling software',
+						  -DATF		=>	'AM_Version=2.00',
+						  -DATN		=>	'a22c.alf',
+						  -CONV		=>	'Bioperl-scf.pm');
+
+ok( -e "write_scf.scf" && ! -z "write_scf.scf" );
+
+$out_scf = Bio::SeqIO->new(-verbose => 1,
+									-file => ">write_scf_synthetic_traces.scf",
+									-format => 'scf');
+
+$swq = Bio::Seq::Quality->new(-seq =>'ATCGATCGAA',
+										-qual =>"10 20 30 40 50 20 10 30 40 50",
+										-alphabet =>'dna');
+
+my $trace = Bio::Seq::SequenceTrace->new(-swq => $swq);
+
+$out_scf->write_seq(	-target	=>	$trace,
+							-MACH		=>	'CSM sequence-o-matic 5000',
+							-TPSW		=>	'trace processing software',
+							-BCSW		=>	'basecalling software',
+							-DATF		=>	'AM_Version=2.00',
+							-DATN		=>	'a22c.alf',
+							-CONV		=>	'Bioperl-scf.pm' );
+
+warn("Trying to write an scf with a subset of a real scf...\n") if $DEBUG;
+$out_scf = Bio::SeqIO->new(-verbose => 1,
+									-file => ">write_scf_subtrace.scf",
+									-format => 'scf');
+
+$in_scf_v3 = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+									  ("t","data","version3.scf"),
+									  -format => 'scf',
+									  -verbose => $verbose);
+$v3 = $in_scf_v3->next_seq();
+
+my $sub_v3 = $v3->sub_trace_object(5,50);
+
+warn("The subtrace object is this:\n") if $DEBUG;
+$dumper->dumpValue($sub_v3) if $DEBUG;
+
+$out_scf->write_seq(-target => $sub_v3 );
+
+my $in_scf_v2 = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+										  ("t","data","version2.scf"),
+										  -format => 'scf',
+										  -verbose => $verbose);
+$v3 = $in_scf_v2->next_seq();
+ok($v3);
+
+$out_scf = Bio::SeqIO->new(-file   => ">write_scf_version2.scf",
+                           -format => "scf");
+$out_scf->write_seq( -target  => $v3,
+                     -version => 2 );
+
+# now some version 2 things.


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/scf.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/seq_quality.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/seq_quality.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/seq_quality.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,263 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: seq_quality.t,v 1.1 2005/07/11 14:22:39 heikki Exp $
+
+use strict;
+use Data::Dumper;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 48;
+}
+
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+
+# redirect STDERR to STDOUT
+open (STDERR, ">&STDOUT");
+
+print("Checking if the Bio::Seq::Quality module could be used...\n") if $DEBUG;
+        # test 1
+use Bio::Seq::Quality;
+ok(1);
+
+
+# create some random sequence object with no id
+my $seqobj_broken = Bio::Seq::Quality->new( -seq => "ATCGATCGA",
+                                          );
+#print Dumper $seqobj_broken;
+
+my $seqobj = Bio::Seq::Quality->new( -seq => "ATCGATCGA",
+                                     -id  => 'QualityFragment-12',
+                                     -accession_number => 'X78121',
+                                   );
+ok(!$@);
+
+
+
+# create some random quality object with the same number of qualities and the same identifiers
+my $string_quals = "10 20 30 40 50 40 30 20 10";
+my $qualobj;
+eval {
+ok $qualobj = Bio::Seq::Quality->new( -qual => $string_quals,
+                                      -id  => 'QualityFragment-12',
+                                      -accession_number => 'X78121',
+                                        );
+};
+#print Dumper $qualobj;
+
+# check to see what happens when you construct the Quality object
+ok my $swq1 = Bio::Seq::Quality->new( -seq => "ATCGATCGA",
+                                      -id  => 'QualityFragment-12',
+                                      -accession_number => 'X78121',
+                                      -qual	=>	$string_quals);
+
+
+
+
+print("Testing various weird constructors...\n") if $DEBUG;
+print("\ta) No ids, Sequence object, no quality...\n") if $DEBUG;
+	# w for weird
+my $wswq1;
+eval {
+	$wswq1 = Bio::Seq::Quality->new( -seq  =>  "ATCGATCGA",
+                                         -qual	=>	"");
+};
+ok(!$@);
+print $@;
+
+
+print("\tb) No ids, no sequence, quality object...\n") if $DEBUG;
+	# note that you must provide a alphabet for this one.
+$wswq1 = Bio::Seq::Quality->new( -seq => "",
+					-qual => $string_quals,
+					-alphabet => 'dna'
+);
+print("\tc) Absolutely nothing. (HAHAHAHA)...\n") if $DEBUG;
+eval {
+	$wswq1 = Bio::Seq::Quality->new( -seq => "",
+						-qual => "",
+						-alphabet => 'dna'
+	);
+};
+ok(!$@);
+
+
+print("\td) Absolutely nothing but an ID\n") if $DEBUG;
+eval {
+    $wswq1 = Bio::Seq::Quality->new( -seq => "",
+                                            -qual => "",
+                                            -alphabet => 'dna',
+                                            -id => 'an object with no sequence and no quality but with an id'
+	);
+};
+ok(!$@);
+
+print("\td) No sequence, No quality, No ID...\n") if $DEBUG;
+
+ok $wswq1 = Bio::Seq::Quality->new( -seq  =>	"",
+                                    -qual =>	"",
+                                    -verbose => -1 # silence the warning about inability to  guess alphabet
+);
+
+
+
+
+
+
+print("Testing various methods and behaviors...\n") if $DEBUG;
+
+print("1. Testing the seq() method...\n") if $DEBUG;
+	print("\t1a) get\n") if $DEBUG;
+	my $original_seq = $swq1->seq();
+	ok ($original_seq eq "ATCGATCGA");
+	print("\t1b) set\n") if $DEBUG;
+	ok ($swq1->seq("AAAAAAAAAAAA"));
+	print("\t1c) get (again, to make sure the set was done.)\n") if $DEBUG;
+	ok($swq1->seq() eq "AAAAAAAAAAAA");
+	print("\tSetting the sequence back to the original value...\n") if $DEBUG;
+	$swq1->seq($original_seq);
+
+
+print("2. Testing the qual() method...\n") if $DEBUG;
+	print("\t2a) get\n") if $DEBUG;
+	my @qual = @{$swq1->qual()};
+	my $str_qual = join(' ', at qual);
+	ok $str_qual eq "10 20 30 40 50 40 30 20 10";
+	print("\t2b) set\n") if $DEBUG;
+	ok $swq1->qual("10 10 10 10 10");
+	print("\t2c) get (again, to make sure the set was done.)\n") if $DEBUG;
+	my @qual2 = @{$swq1->qual()};
+	my $str_qual2 = join(' ', at qual2);
+	ok($str_qual2 eq "10 10 10 10 10 0 0 0 0"); ###!
+	print("\tSetting the quality back to the original value...\n") if $DEBUG;
+	$swq1->qual($str_qual);
+
+print("3. Testing the length() method...\n") if $DEBUG;
+	print("\t3a) When lengths are equal...\n") if $DEBUG;
+	ok($swq1->length() == 9);	
+	print("\t3b) When lengths are different\n") if $DEBUG;
+	$swq1->qual("10 10 10 10 10");
+	# why is this test failing?
+	# dumpValue($swq1);
+ok(not $swq1->length() eq "DIFFERENT");
+
+
+print("6. Testing the subqual() method...\n") if $DEBUG;
+     my $t_subqual = "10 20 30 40 50 60 70 80 90";
+     $swq1->qual($t_subqual);
+     print("\t6d) Testing the subqual at the start (border condition)\n") if $DEBUG;
+          # ok ('1 2 3' eq join(' ',@{$swq1->subqual(1,3)}));
+     print("\t6d) Testing the subqual at the end (border condition)\n") if $DEBUG;
+          # ok ('7 8 9' eq join(' ',@{$swq1->subqual(7,9)}));
+     print("\t6d) Testing the subqual in the middle\n") if $DEBUG;
+          # ok ('4 5 6' eq join(' ',@{$swq1->subqual(4,6)}));
+
+print("7. Testing cases where quality is zero...\n") if $DEBUG;
+$swq1 = Bio::Seq::Quality->new(-seq =>  'G',
+                               -qual => '0',
+                                     );
+my $swq2 = Bio::Seq::Quality->new(-seq =>  'G',
+                                  -qual => '65',
+                                     );
+ok  $swq1->length, $swq2->length;
+
+$swq1 = Bio::Seq::Quality->new(-seq =>  'GC',
+                               -qual => '0 0',
+                                     );
+$swq2 = Bio::Seq::Quality->new(-seq =>  'GT',
+                               -qual => '65 0',
+                                     );
+ok  $swq1->length, $swq2->length;
+
+
+#
+# end of test inherited from seqwithquality.t 
+#
+#################################################################
+#
+# testing new functionality
+#
+
+my $qual = '0 1 2 3 4 5 6 7 8 9 11 12';
+my $trace = '0 5 10 15 20 25 30 35 40 45 50 55';
+
+ok my $seq = Bio::Seq::Quality->new
+    ( -qual => $qual,
+      -trace_indices => $trace,
+      -seq =>  'atcgatcgatcg',
+      -id  => 'human_id',
+      -accession_number => 'S000012',
+      -verbose => -1   # to silence deprecated methods
+);
+
+no warnings;
+ok @{$seq->qual}, scalar split / /, $qual;
+ok @{$seq->trace}, scalar split / /, $trace;
+ok @{$seq->trace_indices}, scalar split / /, $trace; #deprecated
+use warnings;
+
+ok $seq->qual_text, $qual;
+ok $seq->trace_text, $trace;
+
+ok join (' ', @{$seq->subqual(2, 3)}), '1 2';
+ok $seq->subqual_text(2, 3), '1 2';
+ok join (' ', @{$seq->subqual(2, 3, "9 9")}), '9 9';
+ok $seq->subqual_text(2, 3, "8 8"), '8 8';
+
+ok join (' ', @{$seq->subtrace(2, 3)}), '5 10';
+ok $seq->subtrace_text(2, 3), '5 10';
+ok join (' ', @{$seq->subtrace(2, 3, "9 9")}), '9 9';
+ok $seq->subtrace_text(2, 3, "8 8"), '8 8';
+
+
+ok $seq->trace_index_at(5), 20;
+ok join(' ', @{$seq->sub_trace_index(5,6)}), "20 25";
+
+ok $seq->baseat(2), 't';
+#print Dumper $seq;
+
+
+#############################################
+#
+# same tests using Seq::Meta::Array methods follow ...
+#
+
+my $meta = '0 1 2 3 4 5 6 7 8 9 11 12';
+$trace = '0 5 10 15 20 25 30 35 40 45 50 55';
+my @trace_array = qw(0 5 10 15 20 25 30 35 40 45 50 55);
+
+ok $seq = Bio::Seq::Quality->new
+    ( -meta => $meta,
+      -seq =>  'atcgatcgatcg',
+      -id  => 'human_id',
+      -accession_number => 'S000012',
+      -verbose => -1   # to silence deprecated methods
+);
+
+$seq->named_meta('trace', \@trace_array);
+
+no warnings;
+ok @{$seq->meta}, scalar split / /, $meta;
+ok @{$seq->named_meta('trace')}, scalar split / /, $trace;
+use warnings;
+
+ok $seq->meta_text, $meta;
+ok $seq->named_meta_text('trace'), $trace;
+
+ok join (' ', @{$seq->submeta(2, 3)}), '1 2';
+ok $seq->submeta_text(2, 3), '1 2';
+ok join (' ', @{$seq->submeta(2, 3, "9 9")}), '9 9';
+ok $seq->submeta_text(2, 3, "8 8"), '8 8';
+
+ok join (' ', @{$seq->named_submeta('trace', 2, 3)}), '5 10';
+ok $seq->named_submeta_text('trace', 2, 3), '5 10';
+ok join (' ', @{$seq->named_submeta('trace', 2, 3, "9 9")}), '9 9';
+ok $seq->named_submeta_text('trace', 2, 3, "8 8"), '8 8';
+

Added: trunk/packages/bioperl/branches/upstream/current/t/seqfeaturePrimer.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/seqfeaturePrimer.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/seqfeaturePrimer.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,71 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: seqfeaturePrimer.t,v 1.3 2002/10/30 14:21:59 heikki Exp $
+#
+# modeled after the t/Allele.t test script
+
+use strict;
+#use Dumpvalue qw(dumpValue);
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 8;
+}
+
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+#my $dumper = new Dumpvalue();
+
+print("Checking to see if Bio::SeqFeature::Primer is available.\n") if $DEBUG;
+use Bio::SeqFeature::Primer;
+ok(1);
+print("Checking to see if a BSFP object can be created:\n") if $DEBUG;
+     # yes sure, but first scope a few variables
+my $seqsequence = "gcatcgatctagctagcta";
+my $primersequence = "aaaaaacgatcgatcgtagctagct";
+
+my $seqname = "chads_nifty_sequence";
+my $primername = "chads_nifty_primer";
+     # ok, and what about variables governing where the feature is located?
+     # check the primer3docs, luke...
+# TARGET=513,26
+# PRIMER_FIRST_BASE_INDEX=1
+# PRIMER_LEFT=484,20
+
+
+print("Checking to see if the BSFP object can be constructed with a bio::seq object\n") if $DEBUG;
+my $seq = new Bio::Seq( -seq => $seqsequence, -id =>$seqname);
+my $bsfp_seq = new Bio::SeqFeature::Primer( -sequence => $seq,
+                                             -TARGET => '5,3' );
+ok(ref($bsfp_seq) eq "Bio::SeqFeature::Primer");
+
+print("Checking to see if the BSFP object can be constructed with scalars\n") if $DEBUG;
+
+my $bsfp_scalar = new Bio::SeqFeature::Primer( -sequence => $primersequence,
+                                        -id => $primername,
+                                             -TARGET => '5,3' );
+ok(ref($bsfp_scalar) eq "Bio::SeqFeature::Primer");
+
+print("Checking to see that seq() returns a Bio::Seq object and that the object is the right one.\n") if $DEBUG;
+ok(ref($bsfp_scalar->seq()) eq "Bio::Seq");
+print("First for the scalar-ily created one.\n") if $DEBUG;
+print("id ok?\n") if $DEBUG;
+ok($bsfp_scalar->seq()->id() eq $primername);
+print("sequence ok?\n") if $DEBUG;
+ok($bsfp_scalar->seq()->seq() eq $primersequence);
+print("Now for the seq-ily created one\n") if $DEBUG;
+print("id ok?\n") if $DEBUG;
+ok($bsfp_seq->seq()->display_id() eq $seqname);
+print("sequence ok?\n") if $DEBUG;
+ok($bsfp_seq->seq()->seq() eq $seqsequence);
+
+print("Here is the structure of the BSFP_scalar object:\n") if $DEBUG;
+# $dumper->dumpValue($bsfp_scalar);
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/seqfeaturePrimer.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/seqread_fail.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/seqread_fail.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/seqread_fail.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,87 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+# $Id: seqread_fail.t,v 1.5 2005/09/17 02:11:21 bosborne Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+#use lib '..','.','./blib/lib';
+use vars qw($NUMTESTS $DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $error;
+
+BEGIN { 
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	$error = 0;
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+
+	$NUMTESTS = 13;
+	plan tests => $NUMTESTS;
+
+	eval { require IO::String; 
+			 require LWP::UserAgent;
+			 require HTTP::Request::Common;
+       };
+	if( $@ ) {
+		print STDERR "IO::String or LWP::UserAgent or HTTP::Request not installed. This means the Bio::DB::* modules are not usable. Skipping tests.\n";
+		for( 1..$NUMTESTS ) {
+			skip("IO::String, LWP::UserAgent,or HTTP::Request not installed",1);
+		}
+		$error = 1; 
+	}
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the DB tests',1);
+	}
+}
+
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+
+require Bio::DB::GenBank;
+require Bio::DB::GenPept;
+require Bio::DB::SwissProt;
+require Bio::DB::RefSeq;
+require Bio::DB::EMBL;
+require Bio::DB::BioFetch;
+
+my $verbose = -1;
+$verbose = 0 if $DEBUG;
+
+sub fetch {
+    my ($id, $class) = @_;
+    print "###################### $class  ####################################\n" if $DEBUG;
+    my $seq;
+    ok defined ( my $gb = new $class('-verbose'=>$verbose,'-delay'=>0) );
+    eval { $seq = $gb->get_Seq_by_id($id) };
+    if( $@ or not defined$seq ) {
+	ok 1;
+	return;
+    }
+    ok 0;
+}
+
+my @classes = qw( Bio::DB::BioFetch Bio::DB::GenBank Bio::DB::GenPept
+                  Bio::DB::SwissProt Bio::DB::RefSeq Bio::DB::EMBL  );
+
+my $id = 'XXX111';  # nonsense id
+
+for (@classes) {
+    fetch ($id, $_);
+}
+
+ok 1;

Added: trunk/packages/bioperl/branches/upstream/current/t/sequencetrace.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/sequencetrace.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/sequencetrace.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,119 @@
+# -*-Perl-*-
+
+use strict;
+use Dumpvalue;
+use Bio::SeqIO;
+use Bio::PrimarySeq;
+use Bio::Seq::PrimaryQual;
+use Bio::Seq::Quality;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 5;
+}
+
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1);
+# $dumper->compactDump(1);
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+
+        # redirect STDERR to STDOUT
+open (STDERR, ">&STDOUT");
+
+# print("Checking if the Bio::Seq::SequenceTrace module could be used...\n") if $DEBUG;
+        # test 1
+use Bio::Seq::SequenceTrace;
+     # test 1
+ok(1);
+
+# print("Reading an scf...\n");
+
+my $in_scf_v3 = Bio::SeqIO->new('-file' => Bio::Root::IO->catfile
+                    ("t","data",
+                     "version3.scf"),
+                    '-format' => 'scf',
+                    );
+
+my $trace = $in_scf_v3->next_seq();
+
+# print("Testing those values...\n");
+# print("Length: ".$trace->length()."\n");
+     # at the very end
+my $start = $trace->length()-19;
+my $end = $trace->length();
+
+# print("Testing subseq from the end...".$trace->subseq($start,$end)."\n");
+     # test 2
+ok ($trace->subseq($trace->length()-19,$trace->length()) eq "CCCCTTTCCCAACAGCACCG");
+# print("Testing the qualities for those bases...".join(' ',@{$trace->subqual($start,$end)})."\n");
+my $qualstring = join(' ',@{$trace->subqual($start,$end)});
+     # test 3
+ok ($qualstring eq "12 10 7 7 9 7 7 9 13 9 9 9 6 6 6 8 8 8 6 6");
+# print("Testing getting sub qual indices\n");
+
+# print("Testing the trace indices for those bases ($start->$end):...\n");
+my $ref = $trace->sub_peak_index($start,$end);
+my @temp = @{$ref};
+
+my $indices_at_end = join(' ',@{$trace->sub_peak_index($start,$end)});
+     # test 4
+ok($indices_at_end eq "13863 13874 13883 13898 13905 13922 13934 13952 13966 13975 13982 14003 14013 14026 14037 14056 14061 14084 14093 14099");
+# print("Getting all of the trace values for that range\n");
+my $trace_end = $trace->trace_length();
+my $trace_start = $trace_end - 19;
+my $subtrace_a = join(' ',@{$trace->sub_trace('a',$trace_start,$trace_end)});
+     # test 5
+(ok $subtrace_a eq "63 61 68 82 101 120 135 145 148 143 131 111 85 59 37 18 4 0 3 6");
+     # print("scf_dump ing...\n");
+     # $trace->scf_dump();
+     # print("The traces are:\n");
+     # $trace->_dump_traces();
+
+
+
+# whew! now given a subset of bases, get their traces....
+my $traces2 = $trace->sub_trace_object(1,5);
+$traces2->verbose(-1);
+
+
+# print("Attempting to synthesize traces for this object:\n");
+# print("The sequence is : ".$traces2->seq()."\n");
+# print("The qualities are : ".join(' ',@{$traces2->qual()})."\n");
+# print("The length is : ".$traces2->length()."\n");
+# $dumper->dumpValue($traces2);
+# can you synthesize false traces?
+
+$traces2->_synthesize_traces();
+
+
+$traces2->set_accuracies();
+print("This is an scf dump:\n");
+$traces2->scf_dump();
+
+sub the_old_way {
+        my $start2 = 1;
+        my $end2 = 5;
+        my $subtraces;
+        $subtraces->{base_start} = $start2;
+        $subtraces->{base_end} = $end2;
+        $subtraces->{sequence} = $trace->subseq($start2,$end2);
+        $subtraces->{qualities} = join(' ',$trace->subqual($start2,$end2));
+        $subtraces->{indices} = $trace->sub_trace_index($start2,$end2);
+        my @temp = @{$subtraces->{indices}};
+        $subtraces->{trace_start} = $temp[0];
+        $subtraces->{trace_end} = $temp[$#temp];
+        foreach (qw(a t g c)) {
+             $subtraces->{traces}->{$_} = $trace->sub_trace($_,$subtraces->{trace_start},$subtraces->{trace_end});
+        }
+
+        $dumper->dumpValue($subtraces);
+}
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/sequencetrace.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/seqwithquality.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/seqwithquality.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/seqwithquality.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,205 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: seqwithquality.t,v 1.9 2005/07/11 14:40:49 heikki Exp $
+
+use strict;
+use Dumpvalue;
+
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 20;
+}
+
+
+my $dumper = new Dumpvalue();
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+
+        # redirect STDERR to STDOUT
+open (STDERR, ">&STDOUT");
+
+my $verbosity = -1;
+
+print("Checking if the Bio::Seq::SeqWithQuality module could be used...\n") if $DEBUG;
+        # test 1
+use Bio::Seq::SeqWithQuality;
+ok(1);
+
+use Bio::PrimarySeq;
+use Bio::Seq::PrimaryQual;
+
+# create some random sequence object with no id
+my $seqobj_broken = Bio::PrimarySeq->new( -seq => "ATCGATCGA",
+                            );
+	# dumpValue($seqobj_broken);
+
+my $seqobj = Bio::PrimarySeq->new( -seq => "ATCGATCGA",
+                            -id  => 'QualityFragment-12',
+                            -accession_number => 'X78121',
+                            -verbose => $verbosity
+                            );
+ok(!$@);
+
+
+# create some random quality object with the same number of qualities and the same identifiers
+my $string_quals = "10 20 30 40 50 40 30 20 10";
+my $indices = "5 10 15 20 25 30 35 40 45";
+my $qualobj;
+eval {
+$qualobj = Bio::Seq::PrimaryQual->new( -qual => $string_quals,
+                            -id  => 'QualityFragment-12',
+                            -accession_number => 'X78121',
+                            -verbose => $verbosity
+                            );
+};
+ok(!$@);
+
+
+     # check to see what happens when you construct the SeqWithQuality object
+my $swq1 = Bio::Seq::SeqWithQuality->new( -seq	=>	$seqobj,
+                                         -verbose => $verbosity,
+					-qual		=>	$qualobj);
+ok(!$@);
+no warnings;
+
+print("Testing various weird constructors...\n") if $DEBUG;
+print("\ta) No ids, Sequence object, no quality...\n") if $DEBUG;
+	# w for weird
+my $wswq1;
+eval {
+	$wswq1 = Bio::Seq::SeqWithQuality->new( -seq  =>	$seqobj,
+                                                -verbose => $verbosity,
+						-qual	=>	"");
+};
+ok(!$@);
+
+print("\tb) No ids, no sequence, quality object...\n") if $DEBUG;
+	# note that you must provide a alphabet for this one.
+$wswq1 = Bio::Seq::SeqWithQuality->new( -seq => "",
+                                        -verbose => $verbosity,
+					-qual => $qualobj,
+					-alphabet => 'dna'
+);
+print("\tc) Absolutely nothing. (HAHAHAHA)...\n") if $DEBUG;
+eval {
+	$wswq1 = Bio::Seq::SeqWithQuality->new( -seq => "",
+                                                -verbose => $verbosity,
+						-qual => "",
+						-alphabet => 'dna'
+	);
+};
+ok(!$@);
+print("\td) Absolutely nothing but an ID\n") if $DEBUG;
+eval {
+	$wswq1 = Bio::Seq::SeqWithQuality->new( -seq => "",
+                                                -verbose => $verbosity,
+						-qual => "",
+						-alphabet => 'dna',
+						-id => 'an object with no sequence and no quality but with an id'
+	);
+};
+ok(!$@);
+
+print("\td) No sequence, No quality, No ID...\n") if $DEBUG;
+
+eval {
+	$wswq1 = Bio::Seq::SeqWithQuality->new( -seq  =>	"",
+                                                -verbose => $verbosity,
+							-qual	=>	"");
+};
+	# this should fail without a alphabet
+ok($@);
+	# dumpValue($wswq1);
+
+
+
+
+
+print("Testing various methods and behaviors...\n") if $DEBUG;
+
+print("1. Testing the seq() method...\n") if $DEBUG;
+	print("\t1a) get\n") if $DEBUG;
+	my $original_seq = $swq1->seq();
+	ok ($original_seq eq "ATCGATCGA");
+	print("\t1b) set\n") if $DEBUG;
+	ok ($swq1->seq("AAAAAAAAAAAA"));
+	print("\t1c) get (again, to make sure the set was done.)\n") if $DEBUG;
+	ok($swq1->seq() eq "AAAAAAAAAAAA");
+	print("\tSetting the sequence back to the original value...\n") if $DEBUG;
+	$swq1->seq($original_seq);
+
+print("2. Testing the qual() method...\n") if $DEBUG;
+	print("\t2a) get\n") if $DEBUG;
+	my @qual = @{$swq1->qual()};
+	my $str_qual = join(' ', at qual);
+	ok ($str_qual eq "10 20 30 40 50 40 30 20 10");
+	print("\t2b) set\n") if $DEBUG;
+	ok ($swq1->qual("10 10 10 10 10"));
+	print("\t2c) get (again, to make sure the set was done.)\n") if $DEBUG;
+	my @qual2 = @{$swq1->qual()};
+	my $str_qual2 = join(' ', at qual2);
+	ok($str_qual2 eq "10 10 10 10 10");
+	print("\tSetting the quality back to the original value...\n") if $DEBUG;
+	$swq1->qual($str_qual);
+
+print("3. Testing the length() method...\n") if $DEBUG;
+	print("\t3a) When lengths are equal...\n") if $DEBUG;
+	ok($swq1->length() == 9);	
+	print("\t3b) When lengths are different\n") if $DEBUG;
+	$swq1->qual("10 10 10 10 10");
+	# why is this test failing?
+	# dumpValue($swq1);
+ok($swq1->length() eq "DIFFERENT");
+
+
+print("4. Testing the qual_obj() method...\n") if $DEBUG;
+	print("\t4a) Testing qual_obj()...\n") if $DEBUG;
+		my $retr_qual_obj = $swq1->qual_obj();
+		ok (ref($retr_qual_obj) eq "Bio::Seq::PrimaryQual");
+	print("\t4b) Testing qual_obj(\$ref)...\n") if $DEBUG;
+		$swq1->qual_obj($qualobj);
+
+print("5. Testing the seq_obj() method...\n") if $DEBUG;
+	print("\t5a) Testing seq_qual_obj()...\n") if $DEBUG;
+		my $retr_seq_obj = $swq1->seq_obj();
+		ok (ref($retr_seq_obj) eq "Bio::PrimarySeq");
+	print("\t5b) Testing seq_obj(\$ref)...\n") if $DEBUG;
+		$swq1->seq_obj($seqobj);
+
+print("6. Testing the subqual() method...\n") if $DEBUG;
+     my $t_subqual = "10 20 30 40 50 60 70 80 90";
+     $swq1->qual($t_subqual);
+     print("\t6d) Testing the subqual at the start (border condition)\n") if $DEBUG;
+          # ok ('1 2 3' eq join(' ',@{$swq1->subqual(1,3)}));
+     print("\t6d) Testing the subqual at the end (border condition)\n") if $DEBUG;
+          # ok ('7 8 9' eq join(' ',@{$swq1->subqual(7,9)}));
+     print("\t6d) Testing the subqual in the middle\n") if $DEBUG;
+          # ok ('4 5 6' eq join(' ',@{$swq1->subqual(4,6)}));
+
+
+print("7. Testing cases where quality is zero...\n") if $DEBUG;
+$swq1 = Bio::Seq::SeqWithQuality->new(-seq =>  'G',
+                                      -qual => '0',
+                                      -verbose => $verbosity,
+                                     );
+my $swq2 = Bio::Seq::SeqWithQuality->new(-seq =>  'G',
+                                         -qual => '65',
+                                         -verbose => $verbosity,
+                                     );
+ok  $swq1->length, $swq2->length;
+
+$swq1 = Bio::Seq::SeqWithQuality->new(-seq =>  'GC',
+                                      -verbose => $verbosity,
+                                      -qual => '0 0',
+                                     );
+$swq2 = Bio::Seq::SeqWithQuality->new(-seq =>  'GT',
+                                      -verbose => $verbosity,
+                                      -qual => '65 0',
+                                     );
+ok  $swq1->length, $swq2->length;

Added: trunk/packages/bioperl/branches/upstream/current/t/simpleGOparser.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/simpleGOparser.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/simpleGOparser.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,271 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## # $Id: simpleGOparser.t,v 1.13 2004/07/02 08:07:05 allenday Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use Data::Dumper;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+
+    eval { require 'Graph.pm' };
+    if( $@ ) {
+	    print STDERR "\nGraph.pm doesn't seem to be installed on this system -- the GO Parser needs it...\n\n";
+	    plan tests => 1;
+	    ok( 1 );
+	    exit( 0 );
+    }
+
+    plan tests => 101;
+}
+
+
+use Bio::OntologyIO;
+use Bio::Root::IO;
+
+my $io = new Bio::Root::IO; # less typing from now on ...
+my $parser = Bio::OntologyIO->new(
+                      -format    => "go",
+		      -defs_file => $io->catfile( "t","data",
+                                                  "GO.defs.test" ),
+                      # test using -file
+		      -file      => $io->catfile( "t","data",
+						  "component.ontology.test" ));
+
+
+my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
+my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
+
+
+my @onts = ();
+while(my $ont = $parser->next_ontology()) {
+    push(@onts, $ont);
+}
+ok (scalar(@onts), 1);
+my $ont = $onts[0];
+ok ($ont->isa("Bio::Ontology::OntologyI"));
+ok ($ont->name(), "Gene Ontology");
+
+my $engine = $ont->engine();
+ok ($engine->isa("Bio::Ontology::OntologyEngineI"));
+
+my $term = $engine->get_terms( "GO:0018897" );
+
+my @dblinks = sort ( $term->get_dblinks() );
+my @synos = sort ( $term->get_synonyms() );
+
+ok( $dblinks[ 0 ], "MetaCyc:PWY-681" );
+ok( $dblinks[ 1 ], "UM-BBD_pathwayID:dbt" );
+ok( $synos[ 0 ], "murein sacculus" );
+ok( $synos[ 1 ], "peptidoglycan" );
+ok( $term->ontology()->name(), "Gene Ontology" );
+ok( $term->name(), "dibenzothiophene desulfurization" );
+
+
+$term = $engine->get_terms( "GO:0004796" );
+ at dblinks = sort ( $term->get_dblinks() );
+ at synos = sort ( $term->get_synonyms() );
+my @sec = sort ( $term->get_secondary_GO_ids() ); 
+
+ok( $dblinks[ 0 ], "EC:5.3.99.5" );
+ok( $synos[ 0 ], "cytochrome P450 CYP5" );
+ok( $sec[ 0 ], "GO:0008400" );
+ok( $term->ontology()->name(), "Gene Ontology" );
+ok( $term->name(), "thromboxane-A synthase" );
+
+my @parents = sort goid ( $ont->get_parent_terms( $term ) );
+ok( @parents == 2 );
+ok( $parents[ 0 ]->GO_id(), "GO:0015034" );
+ok( $parents[ 1 ]->GO_id(), "GO:0018895" );
+
+
+ at parents = sort goid ( $ont->get_parent_terms( $term, $PART_OF, $IS_A) );
+
+ok( @parents == 2 );
+ok( $parents[ 0 ]->GO_id(), "GO:0015034" );
+ok( $parents[ 1 ]->GO_id(), "GO:0018895" );
+
+
+ at parents = sort goid ( $ont->get_parent_terms( "GO:0004796", $IS_A ) );
+ok( @parents == 2 );
+ok( $parents[ 0 ]->GO_id(), "GO:0015034" );
+ok( $parents[ 1 ]->GO_id(), "GO:0018895" );
+
+
+ at parents = sort goid ( $ont->get_parent_terms( "GO:0004796", $PART_OF ) );
+ok( scalar(@parents), 0 );
+my @anc = sort goid ( $ont->get_ancestor_terms( $term ) );
+ok( scalar(@anc), 3 );
+ok( $anc[ 0 ]->GO_id(), "GO:0003673" );
+ok( $anc[ 1 ]->GO_id(), "GO:0015034" );
+ok( $anc[ 2 ]->GO_id(), "GO:0018895" );
+
+
+ at anc = sort goid ( $ont->get_ancestor_terms( "GO:0004796", $IS_A ) );
+ok( scalar(@anc), 3 );
+ok( $anc[ 0 ]->GO_id(), "GO:0003673" );
+ok( $anc[ 1 ]->GO_id(), "GO:0015034" );
+ok( $anc[ 2 ]->GO_id(), "GO:0018895" );
+
+
+ at anc = sort goid ( $ont->get_ancestor_terms( "GO:0000666" ) );
+ok( @anc == 12 );
+
+ at anc = sort goid ( $ont->get_ancestor_terms( "GO:0000666", $IS_A ) );
+ok( @anc == 2 );
+ok( $anc[ 0 ]->GO_id(), "GO:0005811" );
+ok( $anc[ 1 ]->GO_id(), "GO:0030481" );
+
+ at anc = sort goid ( $ont->get_ancestor_terms( "GO:0000666", $PART_OF ) );
+ok( @anc == 6 );
+ok( $anc[ 0 ]->GO_id(), "GO:0005623" );
+ok( $anc[ 1 ]->GO_id(), "GO:0005625" );
+ok( $anc[ 2 ]->GO_id(), "GO:0005933" );
+ok( $anc[ 3 ]->GO_id(), "GO:0005935" );
+ok( $anc[ 4 ]->GO_id(), "GO:0005937" );
+ok( $anc[ 5 ]->GO_id(), "GO:0005938" );
+
+
+my @childs = sort goid ( $ont->get_child_terms( "GO:0005625", $PART_OF ) );
+ok( @childs == 2 );
+ok( $childs[ 0 ]->GO_id(), "GO:0000666" );
+ok( $childs[ 0 ]->name(), "polarisomeX" );
+ok( $childs[ 1 ]->GO_id(), "GO:0000667" );
+ok( $childs[ 1 ]->name(), "polarisomeY" );
+ok( $childs[ 1 ]->ontology()->name(), "Gene Ontology" );
+
+
+ok( $engine->get_terms( "GO:0005625" )->name(), "soluble fraction" ); 
+
+
+ at childs = sort goid ( $ont->get_descendant_terms( "GO:0005624", $IS_A ) );
+ok( @childs == 6 );
+ok( $childs[ 0 ]->GO_id(), "GO:0000299" );
+ok( $childs[ 0 ]->name(), "integral membrane protein of membrane fraction" );
+ok( $childs[ 1 ]->GO_id(), "GO:0000300" );
+ok( $childs[ 1 ]->name(), "peripheral membrane protein of membrane fraction" );
+ok( $childs[ 2 ]->GO_id(), "GO:0005792" );
+ok( $childs[ 2 ]->name(), "microsome" );
+ok( $childs[ 3 ]->GO_id(), "GO:0019717" );
+ok( $childs[ 3 ]->name(), "synaptosome" );
+ok( $childs[ 4 ]->GO_id(), "GO:0019718" );
+ok( $childs[ 4 ]->name(), "rough microsome" );
+ok( $childs[ 5 ]->GO_id(), "GO:0019719" );
+ok( $childs[ 5 ]->name(), "smooth microsome" );
+
+ at childs = sort goid ( $ont->get_descendant_terms( "GO:0005625", $IS_A ) );
+ok( @childs == 0 );
+
+
+ at childs = sort goid ( $ont->get_descendant_terms( "GO:0005625", $PART_OF ) );
+ok( @childs == 2 );
+
+my @rels = sort child_goid ( $ont->get_relationships( "GO:0005625" ) );
+ok( @rels == 3 );
+ok( $rels[ 0 ]->object_term()->GO_id(), "GO:0005625" );
+ok( $rels[ 0 ]->subject_term()->GO_id(), "GO:0000666" );
+ok( $rels[ 0 ]->predicate_term()->equals( $PART_OF ) );
+
+ok( $rels[ 1 ]->object_term()->GO_id(), "GO:0005625" );
+ok( $rels[ 1 ]->subject_term()->GO_id(), "GO:0000667" );
+ok( $rels[ 1 ]->predicate_term()->equals( $PART_OF ) );
+
+ok( $rels[ 2 ]->object_term()->GO_id(), "GO:0000267" );
+ok( $rels[ 2 ]->subject_term()->GO_id(), "GO:0005625" );
+ok( $rels[ 2 ]->predicate_term()->equals( $IS_A ) );
+
+# dbxrefs and synonyms are candidates for being falsely picked up by
+# overly promiscuous regular expressions as related terms, so we test for
+# that here
+my @terms = $engine->get_terms( "EC:5.3.99.5" );
+ok (scalar(@terms), 0);
+ at terms = $engine->get_terms("MetaCyc:PWY-681","MetaCyc:PWY");
+ok (scalar(@terms), 0);
+ at terms = $engine->get_terms("UM-BBD_pathwayID:dbt","BBD_pathwayID:dbt",
+                            "UM-BBD_pathwayID:dbt2","BBD_pathwayID:dbt2");
+ok (scalar(@terms), 0);
+
+
+ok( $engine->graph() );
+
+ok( $ont->add_term( Bio::Ontology::GOterm->new(-identifier => "GO:0000000")));
+
+ok( $engine->has_term( "GO:0000300" ) );
+
+ok( scalar $ont->get_all_terms(), 44 );
+ok( scalar $ont->get_relationship_types(), 3 );
+
+ok( ! $ont->add_relationship( $rels[ 2 ] ) ); # this edge already exists, cannot add
+
+$rels[ 2 ]->subject_term()->GO_id( "GO:0005938" );
+ok( $ont->add_relationship( $rels[ 2 ] ) ); # now it's changed, can add
+ 
+
+my @roots = $ont->get_root_terms();
+ok( scalar(@roots), 10 );
+
+my @leafs = $ont->get_leaf_terms();
+ok( scalar(@leafs), 19 );
+
+
+
+$parser = Bio::OntologyIO->new(
+                      -format    => "go",
+		      -defs_file => $io->catfile("t", "data",
+						 "GO.defs.test2"),
+		      # test using -files
+		      -files     => $io->catfile("t", "data",
+						 "component.ontology.test2"));
+
+$ont = $parser->next_ontology();
+ok ($ont);
+
+ at roots = $ont->get_root_terms();
+ok( scalar(@roots), 1 );
+
+ at leafs = $ont->get_leaf_terms();
+ok( scalar(@leafs), 4 );
+
+$parser = Bio::OntologyIO->new(
+                      -format    => "go",
+		      -file      => $io->catfile( "t","data",
+						  "mpath.ontology.test" ));
+
+ok($parser);
+$ont = $parser->next_ontology;
+ok($ont);
+$engine = $ont->engine;
+ok($engine);
+$term = $engine->get_terms( "MPATH:30" );
+ok($term->identifier,"MPATH:30");
+ok($term->name,"cystic medial necrosis");
+ok($term->definition,undef);
+ok((sort $term->get_synonyms)[0],"erdheim disease");
+ok($ont->get_parent_terms( $term )->name,"tissue specific degenerative process");
+ok(scalar($ont->get_root_terms()),2);
+ at anc = $ont->get_ancestor_terms($term);
+ok(scalar(@anc),4);
+
+#################################################################
+# helper functions
+#################################################################
+
+sub goid { num ( $a->GO_id() ) <=> num ( $b->GO_id() ) }
+
+sub child_goid { num ( $a->child_term()->GO_id() ) <=> num ( $b->child_term()->GO_id() ) }
+
+sub num {
+    my $x = shift( @_ );
+    $x =~ s/\D+//g;
+    return $x;
+}
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/simpleGOparser.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/singlet.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/singlet.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/singlet.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,91 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: singlet.t,v 1.5.6.1 2006/10/16 17:08:15 sendu Exp $
+#
+# modeled after the t/Allele.t test script
+
+use strict;
+use vars qw($DEBUG $NUMTESTS $HAVE_DB_FILE);
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+my $verbose = -1 unless $DEBUG;
+BEGIN {
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	# bioperl takes far too long to compile.
+	unshift(@INC,'Bio');
+	eval { require Test; };
+	if( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	eval { require Bio::Assembly::Contig;
+			 require DB_File;
+			 $HAVE_DB_FILE = 1;
+		 };
+	if( $@ ) {
+		warn "No DB_File installed which is needed for Bio::Assembly::Contig\n";
+		$HAVE_DB_FILE=0;
+	}
+	plan tests => ($NUMTESTS = 3);
+}
+
+END {
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Cannot complete singlet tests',1);
+	}
+}
+
+exit(0) unless $HAVE_DB_FILE;
+# redirect STDERR to STDOUT
+open (STDERR, ">&STDOUT");
+use Bio::Assembly::IO;
+use Bio::Assembly::Singlet;
+use Bio::Seq::SeqWithQuality;
+use Bio::Seq::PrimaryQual;
+use Dumpvalue();
+my $dumper = new Dumpvalue();
+$dumper->veryCompact(1);
+
+
+my $aio = Bio::Assembly::IO->new(-file=>File::Spec->catfile(qw(t data consed_project edit_dir test_project.fasta.screen.ace.1)),
+                                -format=>'ace');
+
+my $assembly = $aio->next_assembly();
+my @contigs = $assembly->all_contigs();
+my @singlets = $assembly->all_singlets();
+
+# print("Testing to see if the first contig is a Contig.\n");
+ok(ref($contigs[0]) eq "Bio::Assembly::Contig");
+
+# print("Testing to see if the first singlet is a Singlet.\n");
+ok(ref($singlets[0]) eq "Bio::Assembly::Singlet");
+
+# print("Testing to see if the Singlet ISA Contig.\n");
+ok(UNIVERSAL::isa($singlets[0],'Bio::Assembly::Contig'));
+
+# this is what i really want to do:
+# print("There were this many contigs: (".scalar(@contigs).")\n");
+# print("There were this many singlets: (".scalar(@singlets).")\n");
+push @contigs, at singlets;
+# print("This is a list of the ".scalar(@contigs)." contigs:\n");
+foreach my $contig (@contigs) {
+     # print &contig_dump($contig);
+}
+
+
+
+sub contig_dump {
+     my ($contig) = @_;
+     my $returner;
+     #my $count = 1;
+     my $prefix .= ("Contig: name(".$contig->id().") ");
+     my @members = $contig->each_seq();
+     if (!@members) { return $prefix." No Members\n"; }
+     my $count = 1;
+     foreach my $member (@members) {
+          print("$prefix Member $count chromatfilename(".$member->{chromatfilename}.") phdfilenamename(".$member->{phdfilename}.") start(".$member->start().")\n");
+          $count++;
+     }
+     return $returner;
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/sirna.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/sirna.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/sirna.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,62 @@
+# This is -*-Perl-*- code
+## Bioperl Test Harness Script for Modules
+##
+## $Id: sirna.t,v 1.4.6.1 2006/10/16 17:08:15 sendu Exp $
+
+use strict;
+use vars qw($NUMTESTS $DEBUG $ERROR);
+$DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    $ERROR = 0;
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+
+    $NUMTESTS = 7;
+    plan tests => $NUMTESTS;
+}
+
+use Bio::Tools::SiRNA;
+use Bio::Seq;
+use Bio::SeqIO;
+
+
+# modules compile
+ok 1;
+
+my $input = Bio::SeqIO->new( -file 	=> File::Spec->catfile(qw(t data NM_002254.gb)),
+			     -format 	=> 'Genbank' );
+my $seq = $input->next_seq;
+
+#object creation
+ok my $sirna = Bio::Tools::SiRNA->new( -target 	=> $seq,
+                                     );
+
+# first test - cds only
+my @pairs = $sirna->design;
+ok scalar(@pairs), 65, "CDS only: got ". scalar(@pairs) ;
+
+
+# next test - include 3prime utr
+my @feats = $seq->remove_SeqFeatures;
+foreach my $feat (@feats) {
+    $seq->add_SeqFeature($feat) unless
+	($feat->primary_tag eq 'Target' or $feat->isa('Bio::SeqFeature::SiRNA::Pair'));
+}
+ok $sirna->include_3pr(1);
+ at pairs = $sirna->design;
+print "With 3p UTR: got ",scalar(@pairs),"\n" if $DEBUG;
+ok  scalar(@pairs), 140;
+
+
+#third test - naked sequence
+my $newseq = Bio::Seq->new( -seq => $seq->seq);
+ok $sirna->target($newseq);
+ at pairs = $sirna->design;
+print "Bare sequence: got ",scalar(@pairs),"\n" if $DEBUG;
+ok scalar(@pairs),  142;

Added: trunk/packages/bioperl/branches/upstream/current/t/splicedseq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/splicedseq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/splicedseq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,66 @@
+# -*-Perl-*-
+
+use strict;
+use vars qw($DEBUG $TESTCOUNT);
+my $error;
+
+BEGIN {     
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    $TESTCOUNT = 9;
+    plan tests => $TESTCOUNT;
+    $error = 0;
+};
+
+if( $error ==  1 ) {
+    exit(0);
+}
+
+
+use Bio::Seq;
+use Bio::SeqIO;
+
+
+my $str = Bio::SeqIO->new( '-file'=> Bio::Root::IO->catfile("t","data",
+							    "U58726.gb"), 
+			'-format' => 'GenBank');
+ok $str;
+my $seq;
+
+ok ( $seq = $str->next_seq() );
+
+# Here is a cute way to verify the sequence by seeing if the
+# the translation matches what is annotated in the file -js
+foreach my $ft ( grep { $_->primary_tag eq 'CDS'} 
+		 $seq->top_SeqFeatures ) {
+    if( $ft->has_tag('translation') ) {
+	my ($translation) = $ft->each_tag_value('translation');
+	my $t = $ft->spliced_seq(-nosort => 1);
+	my $pepseq = $t->translate()->seq();
+	chop($pepseq);# chop is to remove stop codon
+	ok($translation,$pepseq); 
+    }	
+}
+
+eval { require Bio::DB::GenBank };
+if( $@ ) {
+    print STDERR "Skipping remote location tests\n";
+    for( $Test::ntest..$TESTCOUNT ) {
+	skip("Not possible to test remote locations without DB access",1);
+    }
+    exit(0);
+} else { 
+	
+#my $db = Bio::DB::GenBank->new();
+#
+#foreach my $ft ( $seq->top_SeqFeatures ) {
+#	my $t = $ft->spliced_seq();
+#	print "Got ",$t->seq,"\n";
+#}
+
+}
+
+

Added: trunk/packages/bioperl/branches/upstream/current/t/swiss.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/swiss.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/swiss.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,383 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: swiss.t,v 1.3.4.4 2006/11/09 00:40:53 cjfields Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test::More; };
+    if( $@ ) {
+        use lib 't/lib';
+    }
+
+    use Test::More;
+    plan tests => 239;
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+END {
+   unlink(qw (swiss_unk.dat test.swiss));
+}
+
+use_ok('Bio::SeqIO');
+use_ok('Bio::Root::IO');
+my $verbose = $ENV{'BIOPERLDEBUG'};
+
+my $seqio = new Bio::SeqIO( -verbose => $verbose,
+                                     -format => 'swiss',
+                                     -file   => Bio::Root::IO->catfile('t','data', 
+                                                    'test.swiss'));
+
+isa_ok($seqio, 'Bio::SeqIO');
+my $seq = $seqio->next_seq;
+my @gns = $seq->annotation->get_Annotations('gene_name');
+
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                                 -format => 'swiss',
+                                 -file   => Bio::Root::IO->catfile
+                                 ('>test.swiss'));
+
+$seqio->write_seq($seq);
+
+# reads it in once again
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                                 -format => 'swiss',
+                                 -file => Bio::Root::IO->catfile('test.swiss'));
+    
+$seq = $seqio->next_seq;
+isa_ok($seq->species, 'Bio::Taxon');
+is($seq->species->ncbi_taxid, 6239);
+
+# version, seq_update, dates (5 tests)
+is($seq->version, 40);
+my ($ann) = $seq->get_Annotations('seq_update');
+is($ann, 35);
+my @dates = $seq->get_dates;
+my @date_check = qw(01-NOV-1997 01-NOV-1997 16-OCT-2001);
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+my @gns2 = $seq->annotation->get_Annotations('gene_name');
+# check gene name is preserved (was losing suffix in worm gene names)
+ok($#gns2 == 0 && $gns[0]->value eq $gns2[0]->value);
+
+# test swissprot multiple RP lines
+my $str = Bio::SeqIO->new(-file => Bio::Root::IO->catfile
+                                  (qw(t data P33897) ));
+$seq = $str->next_seq;
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+my @refs = $seq->annotation->get_Annotations('reference');
+is( @refs, 23);
+is($refs[20]->rp, 'VARIANTS X-ALD LEU-98; ASP-99; GLU-217; GLN-518; ASP-608; ILE-633 AND PRO-660, AND VARIANT THR-13.');
+
+# version, seq_update, dates (5 tests)
+is($seq->version, 44);
+($ann) = $seq->get_Annotations('seq_update');
+is($ann, 28);
+ at dates = $seq->get_dates;
+ at date_check = qw(01-FEB-1994 01-FEB-1994 15-JUN-2004);
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+my $ast = Bio::SeqIO->new(-verbose => $verbose,
+                                  -format => 'swiss' ,
+                                  -file => Bio::Root::IO->catfile("t","data","roa1.swiss"));
+my $as = $ast->next_seq();
+
+ok defined $as->seq;
+is($as->id, 'ROA1_HUMAN', "id is ".$as->id);
+like($as->primary_id, qr(Bio::PrimarySeq));
+is($as->length, 371);
+is($as->alphabet, 'protein');
+is($as->division, 'HUMAN');
+is(scalar $as->all_SeqFeatures(), 16);
+is(scalar $as->annotation->get_Annotations('reference'), 11);
+
+# version, seq_update, dates (5 tests)
+is($as->version, 35);
+($ann) = $as->get_Annotations('seq_update');
+is($ann, 15);
+ at dates = $as->get_dates;
+ at date_check = qw(01-MAR-1989 01-AUG-1990 01-NOV-1997);
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+my ($ent,$out) = undef;
+($as,$seq) = undef;
+
+$seqio = Bio::SeqIO->new(-format => 'swiss' ,
+                                 -verbose => $verbose,
+                                 -file => Bio::Root::IO->catfile
+                                 ("t","data","swiss.dat"));
+$seq = $seqio->next_seq;
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+
+# more tests to verify we are actually parsing correctly
+like($seq->primary_id, qr(Bio::PrimarySeq));
+is($seq->display_id, 'MA32_HUMAN');
+is($seq->length, 282);
+is($seq->division, 'HUMAN');
+is($seq->alphabet, 'protein');
+my @f = $seq->all_SeqFeatures();
+is(@f, 2);
+is($f[1]->primary_tag, 'CHAIN');
+is(($f[1]->get_tag_values('description'))[0], 'COMPLEMENT COMPONENT 1, Q SUBCOMPONENT BINDING PROTEIN');
+
+# version, seq_update, dates (5 tests)
+is($seq->version, 40);
+($ann) = $seq->get_Annotations('seq_update');
+is($ann, 31);
+ at dates = $seq->get_dates;
+ at date_check = qw(01-FEB-1995 01-FEB-1995 01-OCT-2000);
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+my @genenames = qw(GC1QBP HABP1 SF2P32 C1QBP);
+($ann) = $seq->annotation->get_Annotations('gene_name');
+foreach my $gn ( $ann->get_all_values() ) {
+    ok ($gn, shift(@genenames));
+}
+ok($ann->value(-joins => [" AND "," OR "]), "GC1QBP OR HABP1 OR SF2P32 OR C1QBP");
+
+# test for feature locations like ?..N
+$seq = $seqio->next_seq();
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+like($seq->primary_id, qr(Bio::PrimarySeq));
+is($seq->display_id, 'ACON_CAEEL');
+is($seq->length, 788);
+is($seq->division, 'CAEEL');
+is($seq->alphabet, 'protein');
+is(scalar $seq->all_SeqFeatures(), 5);
+
+foreach my $gn ( $seq->annotation->get_Annotations('gene_name') ) {
+    ok ($gn->value, 'F54H12.1');
+}
+
+# test species in swissprot -- this can be a n:n nightmare
+$seq = $seqio->next_seq();
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+like($seq->primary_id, qr(Bio::PrimarySeq));
+my @sec_acc = $seq->get_secondary_accessions();
+is($sec_acc[0], 'P29360');
+is($sec_acc[1], 'Q63631');
+is($seq->accession_number, 'P42655');
+my @kw = $seq->get_keywords;
+is( $kw[0], 'Brain');
+is( $kw[1], 'Neurone');
+is($kw[3], 'Multigene family');
+is($seq->display_id, '143E_HUMAN');
+is($seq->species->binomial, "Homo sapiens");
+is($seq->species->common_name, "Human");
+is($seq->species->ncbi_taxid, 9606);
+
+$seq = $seqio->next_seq();
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+like($seq->primary_id, qr(Bio::PrimarySeq));
+is($seq->species->binomial, "Bos taurus");
+is($seq->species->common_name, "Bovine");
+is($seq->species->ncbi_taxid, 9913);
+
+# multiple genes in swissprot
+$seq = $seqio->next_seq();
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+like($seq->primary_id, qr(Bio::PrimarySeq));
+
+($ann) = $seq->annotation->get_Annotations("gene_name");
+ at genenames = qw(CALM1 CAM1 CALM CAM CALM2 CAM2 CAMB CALM3 CAM3 CAMC);
+my $flatnames = "(CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND (CALM3 OR CAM3 OR CAMC)";
+
+my @names = @genenames; # copy array
+my @ann_names = $ann->get_all_values();
+
+is(scalar(@ann_names), scalar(@names));
+foreach my $gn (@ann_names) {
+    is($gn, shift(@names));
+}
+is($ann->value(-joins => [" AND "," OR "]), $flatnames);
+
+# same entry as before, but with the new gene names format
+$seqio = Bio::SeqIO->new(-format => 'swiss',
+                                 -verbose => $verbose,
+                         -file => Bio::Root::IO->catfile
+                                 ("t","data","calm.swiss"));
+$seq = $seqio->next_seq();
+isa_ok($seq, 'Bio::Seq::RichSeqI');
+like($seq->primary_id, qr(Bio::PrimarySeq));
+($ann) = $seq->annotation->get_Annotations("gene_name");
+my @ann_names2 = $ann->get_all_values();
+ at names = @genenames; # copy array
+is(scalar(@ann_names2), scalar(@names));
+foreach my $gn (@ann_names2) {
+    is($gn, shift(@names));
+}
+is($ann->value(-joins => [" AND "," OR "]), $flatnames);
+
+# test proper parsing of references
+my @litrefs = $seq->annotation->get_Annotations('reference');
+is(scalar(@litrefs), 17);
+
+my @titles = (
+    '"Complete amino acid sequence of human brain calmodulin."',
+    '"Multiple divergent mRNAs code for a single human calmodulin."',
+    '"Molecular analysis of human and rat calmodulin complementary DNA clones. Evidence for additional active genes in these species."',
+    '"Isolation and nucleotide sequence of a cDNA encoding human calmodulin."',
+    '"Structure of the human CALM1 calmodulin gene and identification of two CALM1-related pseudogenes CALM1P1 and CALM1P2."',
+    undef,
+    '"Characterization of the human CALM2 calmodulin gene and comparison of the transcriptional activity of CALM1, CALM2 and CALM3."',
+    '"Cloning of human full-length CDSs in BD Creator(TM) system donor vector."',
+    '"The DNA sequence and analysis of human chromosome 14."',
+    '"Generation and initial analysis of more than 15,000 full-length human and mouse cDNA sequences."',
+    '"Alpha-helix nucleation by a calcium-binding peptide loop."',
+    '"Solution structure of Ca(2+)-calmodulin reveals flexible hand-like properties of its domains."',
+    '"Calmodulin structure refined at 1.7 A resolution."',
+    '"Drug binding by calmodulin: crystal structure of a calmodulin-trifluoperazine complex."',
+    '"Structural basis for the activation of anthrax adenylyl cyclase exotoxin by calmodulin."',
+    '"Physiological calcium concentrations regulate calmodulin binding and catalysis of adenylyl cyclase exotoxins."',
+    '"Crystal structure of a MARCKS peptide containing the calmodulin-binding domain in complex with Ca2+-calmodulin."',
+);
+
+my @locs = (
+    "Biochemistry 21:2565-2569(1982).",
+    "J. Biol. Chem. 263:17055-17062(1988).",
+    "J. Biol. Chem. 262:16663-16670(1987).",
+    "Biochem. Int. 9:177-185(1984).",
+    "Eur. J. Biochem. 225:71-82(1994).",
+    "Submitted (FEB-1995) to the EMBL/GenBank/DDBJ databases.",
+    "Cell Calcium 23:323-338(1998).",
+    "Submitted (MAY-2003) to the EMBL/GenBank/DDBJ databases.",
+    "Nature 421:601-607(2003).",
+    "Proc. Natl. Acad. Sci. U.S.A. 99:16899-16903(2002).",
+    "Proc. Natl. Acad. Sci. U.S.A. 96:903-908(1999).",
+    "Nat. Struct. Biol. 8:990-997(2001).",
+    "J. Mol. Biol. 228:1177-1192(1992).",
+    "Biochemistry 33:15259-15265(1994).",
+    "Nature 415:396-402(2002).",
+    "EMBO J. 21:6721-6732(2002).",
+    "Nat. Struct. Biol. 10:226-231(2003).",
+);
+
+my @positions = (
+     undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    undef, undef,
+    94, 103,
+    1, 76,
+    undef, undef,
+    undef, undef,
+    5, 148,
+    1, 148,
+    undef, undef,
+);
+
+foreach my $litref (@litrefs) {
+    is($litref->title, shift(@titles));
+    is($litref->location, shift(@locs));
+    is($litref->start, shift(@positions));
+    is($litref->end, shift(@positions));
+}
+
+# format parsing changes (pre-rel 9.0)
+
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                         -format => 'swiss',
+                         -file   => Bio::Root::IO->catfile('t','data', 
+                                                       'pre_rel9.swiss'));
+
+ok($seqio);
+$seq = $seqio->next_seq;
+isa_ok($seq->species, 'Bio::Taxon');
+is($seq->species->ncbi_taxid, "6239");
+
+# version, seq_update, dates (5 tests)
+is($seq->version, 44);
+($ann) = $seq->get_Annotations('seq_update');
+is($ann, 1);
+ at dates = $seq->get_dates;
+ at date_check = qw(01-NOV-1997 01-NOV-1996 30-MAY-2006 );
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+my @idcheck = qw(Z66513 T22647 Cel.30446 Q06319 Q20772 F54D5.7 WBGene00010052
+		 F54D5.7 GO:0005515 IPR006089 IPR006091 IPR006090
+		 IPR006092 IPR009075 IPR009100 IPR013764 PF00441
+		 PF02770 PF02771 PS00072 PS00073);
+
+for my $dblink ( $seq->annotation->get_Annotations('dblink') ) {
+    is($dblink->primary_id, shift @idcheck);
+}
+
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                         -format => 'swiss',
+                         -file   => Bio::Root::IO->catfile('t','data', 
+                                                       'pre_rel9.swiss'));
+
+my @namespaces = qw(Swiss-Prot TrEMBL TrEMBL);
+
+while (my $seq = $seqio->next_seq) {
+    is($seq->namespace, shift @namespaces);
+}
+
+# format parsing changes (rel 9.0, Oct 2006)
+
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                         -format => 'swiss',
+                         -file   => Bio::Root::IO->catfile('t','data', 
+                                                       'rel9.swiss'));
+
+ok($seqio);
+$seq = $seqio->next_seq;
+isa_ok($seq->species, 'Bio::Taxon');
+is($seq->species->ncbi_taxid, 6239);
+
+is($seq->version, 47);
+($ann) = $seq->get_Annotations('seq_update');
+is($ann, 1);
+ at dates = $seq->get_dates;
+ at date_check = qw(01-NOV-1997 01-NOV-1996 31-OCT-2006 );
+for my $date (@dates) {
+    is($date, shift @date_check);
+}
+
+ at idcheck = qw(Z66513 T22647 Cel.30446 Q06319 Q20772 F54D5.7 cel:F54D5.7
+         WBGene00010052 F54D5.7 GO:0005515 IPR006089 IPR006091 IPR006090
+         IPR006092 IPR009075 IPR013786 IPR009100 IPR013764 PF00441 PF02770
+         PF02771 PS00072 PS00073 );
+
+for my $dblink ( $seq->annotation->get_Annotations('dblink') ) {
+    is($dblink->primary_id, shift @idcheck);
+}
+
+$seqio = new Bio::SeqIO( -verbose => $verbose,
+                         -format => 'swiss',
+                         -file   => Bio::Root::IO->catfile('t','data', 
+                                                       'rel9.swiss'));
+
+ at namespaces = qw(Swiss-Prot TrEMBL TrEMBL);
+
+while (my $seq = $seqio->next_seq) {
+    is($seq->namespace, shift @namespaces);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/tRNAscanSE.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/tRNAscanSE.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/tRNAscanSE.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,56 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: tRNAscanSE.t,v 1.1 2004/05/13 15:09:22 jason Exp $
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error = 0;
+
+use strict;
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+
+    use Test;
+    plan tests => 12; 
+
+
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use Bio::Tools::tRNAscanSE;
+use Bio::Root::IO;
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $parser = new Bio::Tools::tRNAscanSE(-verbose => $verbose,
+					 -file => Bio::Root::IO->catfile
+					 ('t','data', 
+					  'yeast.tRNAscanSE'));
+
+my @genes;
+while( my $gene = $parser->next_prediction ) {
+    push @genes, $gene;
+}
+
+ok(@genes, 287);
+ok($genes[2]->seq_id, 'I');
+my ($codon) = $genes[2]->get_tag_values('Codon');
+ok($codon, 'TTG');
+ok($genes[2]->start, 181135);
+ok($genes[2]->end, 181248);
+ok($genes[2]->strand, 1);
+ok(my @exons = $genes[2]->get_SeqFeatures,2);
+ok($exons[0]->end,181172); 
+ok($exons[0]->start,$genes[2]->start); 
+ok($exons[1]->start,181205); 
+ok($exons[1]->end,$genes[2]->end); 
+ok($exons[0]->seq_id, $genes[2]->seq_id);

Added: trunk/packages/bioperl/branches/upstream/current/t/tab.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/tab.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/tab.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,36 @@
+# -*-Perl-*-
+# $Id: tab.t,v 1.1 2005/09/07 01:36:37 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($NUMTESTS);
+BEGIN {
+	$NUMTESTS = 7;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'tab',
+								 -verbose => $verbose,
+								 -file => Bio::Root::IO->catfile
+								 (qw(t data test.tab) ));
+
+while (my $seq = $io->next_seq) {
+	ok defined $seq;
+	ok($seq->length, 358);
+	ok($seq->display_id =~ /^roa\d_drome$/);
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/table.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/table.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/table.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,194 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $GNF: projects/gi/symgene/src/perl/seqproc/t/table.t,v 1.3 2006/01/19 04:20:36 hlapp Exp $
+
+use strict;
+use vars qw($DEBUG $ERROR);
+use constant NUMTESTS => 449;
+use constant NONEXCELTESTS => 337;
+
+BEGIN {     
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) { 
+        use lib 't';
+    }
+    use Test;
+	
+	# we seem to need IO::Scalar for this
+	eval {
+		require IO::Scalar;
+	};
+	if ($@) {
+		$ERROR = 1;
+	}
+	
+    plan tests => NUMTESTS;
+}
+
+if ($ERROR) {
+	foreach (1..NUMTESTS) { 
+        skip ("IO::Scalar not installed, skipping all tests",1,1); 
+    }
+    exit(0);
+}
+
+use Bio::Tools::CodonTable;
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+ok(1); # if this fails already we're in trouble
+
+my @names = qw(A6
+               A6r
+               A6ps1
+               A6ps2
+               CaMK2d
+               CaMKK2
+               AMPKa1
+               AMPKa2
+               MARK3
+               MARK2);
+my @accs = qw(SK001
+              SK512
+              SK752
+              SK766
+              SK703
+              SK482
+              SK032
+              SK033
+              SK096
+              SK120);
+my @num_anns = (5, 5, 5, 5, 6, 7, 7, 7, 7, 7);
+my @psg = (0, 0, 1, 1, 0, 0, 0, 0, 0, 0);
+my @rs = (0, 0, 0, 0, 1, 1, 1, 1, 1, 1);
+
+my $io = "Bio::Root::IO";
+my $seqin = Bio::SeqIO->new(-file => $io->catfile("t","data","kinases.tsv"),
+			    -format  => 'table',
+                            -species => "Homo sapiens",
+                            -delim   => "\t",
+                            -header  => 1,
+                            -display_id => 1,
+                            -accession_number => 2,
+                            -seq => 7,
+                            -annotation => 1,
+                            -trim => 1);
+ok $seqin;
+run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
+
+$seqin->close();
+
+$seqin = Bio::SeqIO->new(-file => $io->catfile("t","data","kinases.tsv"),
+                         -format  => 'table',
+                         -species => "Homo sapiens",
+                         -delim   => "\t",
+                         -header  => 1,
+                         -display_id => 1,
+                         -accession_number => 2,
+                         -seq => 7,
+                         -colnames => "[Family,Subfamily,Pseudogene?,Protein,Novelty]",
+                         -trim => 1);
+ok $seqin;
+run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
+
+$seqin->close();
+
+$seqin = Bio::SeqIO->new(-file => $io->catfile("t","data","kinases.tsv"),
+                         -format  => 'table',
+                         -species => "Homo sapiens",
+                         -delim   => "\t",
+                         -header  => 1,
+                         -display_id => 1,
+                         -accession_number => 2,
+                         -seq => 7,
+                         -annotation => "[4,5,6,8,10]",
+                         -trim => 1);
+ok $seqin;
+run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
+
+$seqin->close();
+
+# need Spreadsheet::ParseExcel installed for testing Excel format
+eval {
+    require Spreadsheet::ParseExcel;
+};
+if ($@) {
+    foreach ((NONEXCELTESTS+1)..NUMTESTS) { 
+        skip ("Skip Excel format test because Spreadsheet::ParseExcel not installed",1,1); 
+    }
+    exit(0);
+}
+
+$seqin = Bio::SeqIO->new(-file => $io->catfile("t","data","kinases.xls"),
+                         -format  => 'excel',
+                         -species => "Homo sapiens",
+                         -header  => 1,
+                         -display_id => 1,
+                         -accession_number => 2,
+                         -seq => 7,
+                         -annotation => 1,
+                         -trim => 1);
+ok $seqin;
+run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
+
+$seqin->close();
+
+sub run_tests {
+    my ($names_,$accs_,$num_anns_,$psg_,$rs_) = @_;
+
+    my @names = @$names_;
+    my @accs = @$accs_;
+    my @num_anns = @$num_anns_;
+    my @psg = @$psg_;
+    my @rs = @$rs_;
+
+    my $n = 0;
+    my $translator = Bio::Tools::CodonTable->new(-id => 1);
+    while (my $seq = $seqin->next_seq()) {
+        $n++;
+        ok ($seq->display_id, shift(@names));
+        ok ($seq->accession_number, shift(@accs));
+        ok ($seq->species);
+        ok ($seq->species->binomial, "Homo sapiens");
+        my @anns = $seq->annotation->get_Annotations();
+        ok (scalar(@anns), shift(@num_anns));
+        @anns = grep { $_->value eq "Y"; 
+                     } $seq->annotation->get_Annotations("Pseudogene?");
+        ok (scalar(@anns), shift(@psg));
+        
+        # check sequences and that they translate to what we expect
+        if (($n >= 5) && ($seq->display_id ne "MARK3")) {
+            my $dna = $seq->seq;
+            my $protein = "";
+            my $frame = 0;
+            while ($frame <= 2) {
+                my $inframe = substr($dna,$frame);
+                # translate to protein
+                my $protseq = $translator->translate($inframe);
+                # chop off everything after the stop and before the first Met
+                while ($protseq =~ /(M[^\*]+)/g) {
+                    $protein = $1 if length($1) > length($protein);
+                }
+                $frame++;
+            }
+            # retrieve expected result from annotation and compare
+            my ($protann) = $seq->annotation->get_Annotations("Protein");
+            ok ($protann);
+            ok ($protein, $protann->value);
+        }
+        
+        @anns = grep { $_->value eq "Known - Refseq"; 
+                     } $seq->annotation->get_Annotations("Novelty");
+        ok (scalar(@anns), shift(@rs));
+        @anns = $seq->annotation->get_Annotations("Subfamily");
+        ok (scalar(@anns), ($n <= 5) ? 0 : 1);
+        @anns = $seq->annotation->get_Annotations("Family");
+        ok (scalar(@anns), 1);
+        ok (substr($anns[0]->value,0,4), ($n <= 4) ? "A6" : "CAMK");    
+    }
+    
+    ok ($n, 10);
+}
+

Added: trunk/packages/bioperl/branches/upstream/current/t/testformats.pl
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/testformats.pl	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/testformats.pl	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+#!/usr/local/bin/perl -w
+use strict;
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+use Algorithm::Diff qw(diff LCS);
+use IO::ScalarArray;
+use IO::String;
+
+my %files = ( 
+#'test.embl'      => 'embl',
+#	      'test.ace'       => 'ace',
+	      'test.fasta'     => 'fasta',
+#	      'test.game'      => 'game',
+	      'test.gcg'       => 'gcg',
+#	      'test.genbank'   => 'genbank',
+	      'test.raw'       => 'raw',
+#	      'test_badlf.gcg' => 'gcg'
+	      );
+
+while( my ($file, $type) = each %files ) {
+    my $filename = Bio::Root::IO->catfile('t','data',$file);
+    print "processing file $filename\n";
+    open(FILE, "< $filename") or die("cannot open $filename");
+    my @datain = <FILE>;
+    my $in = new IO::String(join('', @datain));
+    my $seqin = new Bio::SeqIO( -fh => $in,
+				-format => $type);
+    my $out = new IO::String;
+    my $seqout = new Bio::SeqIO( -fh => $out,
+				 -format => $type);
+    my $seq;
+    while( defined($seq = $seqin->next_seq) ) {	
+	$seqout->write_seq($seq);
+    }
+    $seqout->close();
+    $seqin->close();
+    my $strref = $out->string_ref;
+    my @dataout = map { $_."\n"} split(/\n/, $$strref );
+    my @diffs = &diff( \@datain, \@dataout);
+    foreach my $d ( @diffs ) {
+	foreach my $diff ( @$d ) {
+	    chomp($diff->[2]);
+	    print $diff->[0], $diff->[1], "\n>", $diff->[2], "\n";
+	}
+    }
+    if( @diffs ) {
+	print "in is \n", join('', @datain), "\n";
+	print "out is \n", join('', at dataout), "\n";	
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/tigrxml.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/tigrxml.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/tigrxml.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,116 @@
+# -*-Perl-*- mode (to keep my emacs happy)
+# $Id: tigrxml.t,v 1.4 2005/10/11 23:52:37 jason Exp $
+
+use strict;
+use vars qw($error $NUMTESTS $verbose);
+$verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+BEGIN {
+    $NUMTESTS = 48;
+    $error = 0;
+    eval { require Test; };
+    if ( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    plan tests => $NUMTESTS;
+    eval {
+	require XML::SAX;
+        require XML::SAX::Writer;
+    };
+    if( $@ ) {
+	$error = 1;
+	warn("No XML::SAX or XML::SAX::Writer installed cannot test Bio::SeqIO::tigrxml\n");
+    }
+}
+END { 
+   foreach ( $Test::ntest..$NUMTESTS) {
+      skip('Unable to run tigrxml tests no XML::SAX or XML::SAX::Writer is installed',1);
+   }
+}
+
+exit (0) if ($error);
+
+use Bio::SeqIO;
+use Bio::Root::IO;
+
+ok(1);
+
+my $verbose = $ENV{'BIOPERLDEBUG'} || 0;
+
+my $ast = Bio::SeqIO->new(-format => 'tigrxml' ,
+			  -verbose => $verbose,
+			  -file => Bio::Root::IO->catfile
+			  (qw(t data test.tigrxml)));
+$ast->verbose($verbose);
+my $as = $ast->next_seq();
+ok($as);
+ok($as->display_id, 'chr9');
+
+my $first = 1;
+for my $f ( sort { $a->start * $a->strand <=> $b->start * $b->strand } $as->get_SeqFeatures ) {
+    ok($f);
+
+    my ($name);
+    for my $tag ( qw(Parent ID) ) {
+	if( $f->has_tag($tag) ) {
+	    ($name) = $f->get_tag_values($tag);
+	    last;
+	}
+    }
+    if( $name eq '162.t00500' || $name eq '162.m02638' ) {
+	if( $f->primary_tag eq 'gene' ) {
+	    ok($f->start, 185408);
+	    ok($f->end, 187155);
+	    # warn($f->gff_string, "\n");
+	} elsif( $f->primary_tag eq 'mRNA' ) { 
+	    ok($f->start, 185408); # the values list for COORD are start/end of CDS not whole transcript
+	    ok($f->end, 187155);    
+	    ok($f->strand, 1);
+	} elsif( $f->primary_tag eq "five_prime_UTR" ) {
+	    my ($id) = $f->get_tag_values('ID');
+	    if( $id =~ /UTR1$/ ) {
+		ok($f->start, 185408);
+		ok($f->end,   185433);
+	    } elsif( $id =~ /UTR2$/ ) {
+		ok($f->start, 185487);
+		ok($f->end,   185793);
+	    } else {
+		ok(0, , 'expected only two UTRS');
+	    }	    
+	} elsif( $f->primary_tag eq "three_prime_UTR" ) {
+	    ok($f->start, 187042);
+	    ok($f->end, 187155);
+	} elsif( $f->primary_tag eq 'CDS' ) {
+	    ok($f->start, 185794);
+	    ok($f->end, 187041);
+	}
+    } elsif ( $name eq '162.t00448' || $name eq '162.m02967' ) {
+	if( $f->primary_tag eq 'gene' ) {
+	    ok($f->start, 59343);
+	    ok($f->end, 61061);
+	} elsif( $f->primary_tag eq 'mRNA' ) { 
+	    ok($f->start, 59343); # the values list for COORD are start/end of CDS not whole transcript
+	    ok($f->end, 61061);    
+	    ok($f->strand, -1);
+	} elsif( $f->primary_tag eq "five_prime_UTR" ) {
+	    my ($id) = $f->get_tag_values('ID');
+	    ok($f->start, 60834);
+	    ok($f->end, 61061);
+	    ok($f->strand, -1);
+	} elsif( $f->primary_tag eq "three_prime_UTR" ) {
+	    ok($f->start, 59343);
+	    ok($f->end,   59632);
+	    ok($f->strand, -1);
+	} elsif( $f->primary_tag eq 'CDS' ) {
+	    if( $first ) { 
+		ok($f->start, 60801);
+		ok($f->end,   60833);
+		ok($f->strand, -1);
+		$first = 0;
+	    }
+	}
+    } else { 
+	warn("name is $name\n");
+    }
+}

Added: trunk/packages/bioperl/branches/upstream/current/t/tinyseq.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/tinyseq.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/tinyseq.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,94 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+my $error;
+use strict;
+use vars qw($DEBUG);
+$DEBUG = $ENV{'BIOPERLDEBUG'};
+
+
+use strict;
+
+BEGIN { 
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+	use lib 't';
+    }
+    use Test;
+    use vars qw($TESTCOUNT);
+    $TESTCOUNT = 15;
+    plan tests => $TESTCOUNT;
+    
+    $error  = 0;
+    eval { require XML::Parser::PerlSAX; };
+    if( $@ ) {
+	print STDERR "XML::Parser::PerlSAX not loaded. This means game test cannot be executed. Skipping\n";
+	foreach ( $Test::ntest..$TESTCOUNT ) {
+	    skip('XML::Parser::PerlSAX installed',1);
+	}
+	$error = 1;
+    } 
+    # make sure we can load it, assuming that the prerequisites are really met
+
+    if( $error == 0 ) {
+	eval { require Bio::SeqIO::tinyseq; };
+	if( $@ ) {
+	    print STDERR "tinyseq.pm not loaded. This means tinyseq test cannot be executed. Skipping\n";
+	    foreach ( $Test::ntest..$TESTCOUNT ) {
+		skip('tinyseq.pm not loaded because XML::Writer not loaded',1);
+	    }
+	    $error = 1;
+	} 
+    }
+}
+
+if( $error == 1 ) {
+    exit(0);
+}
+
+use Bio::SeqIO;
+use Bio::Seq;
+
+my $file = File::Spec->catfile(qw(t data NM_002253.tseq));
+my $outfile = 'tinyseqout.xml';
+
+my $instream = Bio::SeqIO->new( -file 		=> $file,
+				-format		=> 'tinyseq' );
+
+my $outstream = Bio::SeqIO->new( -file		=> ">$outfile",
+				 -format	=> 'tinyseq' );
+
+my $seq = $instream->next_seq;
+ok(defined $seq);
+ok(defined $seq->seq);
+ok($seq->length, 5830);
+ok($seq->accession_number,'NM_002253');
+ok($seq->species);
+ok($seq->species->binomial, 'Homo sapiens');
+ok($seq->species->ncbi_taxid, 9606);   
+$outstream->write_seq($seq);
+undef $outstream;
+#$outstream->close_writer;
+ 
+ok(-e $outfile);
+
+my $reread = Bio::SeqIO->new( -file 		=> $outfile,
+			      -format		=> 'tinyseq' );
+
+my $seq2 = $reread->next_seq;
+
+ok($seq2);
+ok($seq2->seq);
+ok($seq2->length, 5830);
+ok($seq2->accession_number, 'NM_002253');
+ok($seq2->species);
+ok($seq2->species->binomial, 'Homo sapiens');
+ok($seq2->species->ncbi_taxid, 9606);   
+
+unlink $outfile;


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/tinyseq.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/trim.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/trim.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/trim.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,68 @@
+# -*-Perl-*-
+## Bioperl Test Harness Script for Modules
+## $Id: trim.t,v 1.4 2002/10/30 14:21:59 heikki Exp $
+#
+
+#####
+#
+# this test script tests working within the chromat_dir,phd_dir,edit_dir structure
+# it also tests the ability of Trim.pm to do its thing
+#
+#####
+
+
+use strict;
+#require 'dumpvar.pl';
+
+BEGIN {
+    # to handle systems with no installed Test module
+    # we include the t dir (where a copy of Test.pm is located)
+    # as a fallback
+    eval { require Test; };
+    if( $@ ) {
+        use lib 't';
+    }
+    use Test;
+    plan tests => 7;
+}
+my $DEBUG = $ENV{'BIOPERLDEBUG'};
+
+print("Checking if the Bio::Tools::Alignment::Consed module could be used...\n") if($DEBUG);
+use Bio::Tools::Alignment::Consed;
+use Bio::Root::IO;
+ok(1);
+
+print("Checking if the Bio::Tools::Alignment::Trim module could be used...\n") if $DEBUG;
+use Bio::Tools::Alignment::Trim;
+
+ok(1);
+
+	# scope some variables
+my($o_consed, at singlets, at singletons, at pairs, at doublets, at multiplets,$invoker);
+
+	# instantiate a new object
+
+$o_consed = Bio::Tools::Alignment::Consed->new(	-acefile	=>Bio::Root::IO->catfile("t","data","consed_project","edit_dir","test_project.fasta.screen.ace.1"));
+print("Checking if a new CSM::Consed object was created...\n") if( $DEBUG);
+ok defined $o_consed;
+
+	# set the verbosity to a valid value (0)
+my $verbosity = $o_consed->set_verbose(0);
+
+	#
+print("Checking if the new object is a reference to a Bio::Tools::Alignment::Consed object...\n") if($DEBUG);
+	# test 3
+ok ref($o_consed),'Bio::Tools::Alignment::Consed';
+
+print("Checking if singlets can be successfully set...\n") if ($DEBUG);
+	# test 4
+$invoker = $o_consed->set_singlets("verbosely");
+ok $invoker != 1;
+
+print("Checking if singlets quality can be set...\n") if ($DEBUG);
+ok !($o_consed->set_singlet_quality());
+
+print("Checking if singlet and singleton qualities can be set...\n") if( $DEBUG);
+ok !($o_consed->set_trim_points_singlets_and_singletons());
+
+


Property changes on: trunk/packages/bioperl/branches/upstream/current/t/trim.t
___________________________________________________________________
Name: svn:executable
   + 

Added: trunk/packages/bioperl/branches/upstream/current/t/ztr.t
===================================================================
--- trunk/packages/bioperl/branches/upstream/current/t/ztr.t	                        (rev 0)
+++ trunk/packages/bioperl/branches/upstream/current/t/ztr.t	2007-06-14 23:43:54 UTC (rev 309)
@@ -0,0 +1,51 @@
+# -*-Perl-*-
+# $Id: ztr.t,v 1.3 2005/09/17 02:11:21 bosborne Exp $
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.t'
+
+use strict;
+use vars qw($error $NUMTESTS);
+BEGIN {
+	$NUMTESTS = 3;
+	$error = 0;
+	# to handle systems with no installed Test module
+	# we include the t dir (where a copy of Test.pm is located)
+	# as a fallback
+	eval { require Test; };
+	if ( $@ ) {
+		use lib 't';
+	}
+	# SeqIO modules abi.pm, ctf.pm, exp.pm, pln.pm, ztr.pm
+	# all require Bio::SeqIO::staden::read, part of bioperl-ext
+	eval {
+		require Bio::SeqIO::staden::read;
+	};
+	if ( $@ ) {
+		$error = 1;
+		warn "Bio::SeqIO::staden::read of bioperl-ext is not installed or is installed incorrectly - skipping ztr.t tests\n";
+   }
+	use Test;
+	plan tests => $NUMTESTS;
+}
+
+END { 
+	foreach ( $Test::ntest..$NUMTESTS) {
+		skip('Unable to run all of the ztr tests',1);
+   }
+}
+
+exit(0) if ( $error == 1 );
+
+use Bio::SeqIO::ztr;
+use Bio::Root::IO;
+
+my $verbose = $ENV{'BIOPERLDEBUG'};
+ok(1);
+
+my $io = Bio::SeqIO->new(-format => 'ztr',
+			 -verbose => $verbose,
+			 -file => Bio::Root::IO->catfile
+			 (qw(t data readtest.ztr) ));
+ok(my $seq = $io->next_seq);
+ok($seq->seq, "GATGATTCCGGCTTCGGACGACTCTAGAGGATCCCCATTTTTATAGTTTTTATCTTGTAATAGATGTTTAGATTTTTCGTTGTAATTATTTTCTTTATTGTTGAAATTAGTATCTCTGGGTAATTTATCATATTCTCTGGAAAATGATTTACTATCACTAGATACTTCATAAGATTTATAATCTTTATTATGAAAATCATCTCTATTTTTCAAATTATTATTATATCTATCAAAGTTTCTGTCTTCATTATATCTATTAGCATATCTATCTTTATCTTTATCCCTATCACTATATCTATCATATGGTTCATCTTGTTCAACCGATCAGACTCGATTCGCCATCGCCTCTAACGGATGGCCGCTCCCCCTCTCATACCTCGCTCCCCTCGACATCCCCCGTCTCGCCACCCTATCCGCCCCCTTCATCACCCCCCCTTATCCACACCCTCACCCCCCGCATCGCGCACCCACGACCACCCGAAGAACCGCCCTTACTCCCAAGTACGCCCCGACCTCCATCACCCTATGCGGTACCACTCCCACCACACCCAGTCCTACTTTCGCCCGCACATCGGCCCCGCTTCAGACAGCTCCCAACTACGCAACCCACGCTTGTTCTTGTTCACACTCGAATACTCGAATCTCTCATTACTCCGCGGACTCCGCCGCACCTGTGCACCATTAACTGTGTAGCGCCTGAACCGGCACCTCTGATTACCACTTCCTCCACCAGCACAGTCCTATTACCGCATGTCGCTCTGCTAAGACAGTGCAAGACTCTGCGGTCGCTCTGACCCGCATCCGCCAGGGCACCTCTCACCCTCGCTGGCCACCCCGCCCCCCTCTCCCTGCCCCTTCATTCCCCCAAACCGCTTTCAACGGGACACACCCCTCCGCGGCGGACCACAACTCGCCGTCGGCCACCACTCACACCTTCCCTCCTCCTTCCCCCACATCACGCCAACCCCGTGGGACGGCTCTCCCGCGGCTACGACGCGCAACCCCCCCTCGCCGCTTCCCCCCCAACTTCCCACGGGCTCCCCTCCGCCCCTTACCCGCGAGGAGCTTCACCCGCGAACCACCTCCCCCCTTTCCCAACAGCACCG");
+




More information about the debian-med-commit mailing list